{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 25558, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.912669222943892e-05, "grad_norm": 0.0, "learning_rate": 2.607561929595828e-08, "loss": 2.2258, "step": 1 }, { "epoch": 7.825338445887784e-05, "grad_norm": 0.0, "learning_rate": 5.215123859191656e-08, "loss": 2.2266, "step": 2 }, { "epoch": 0.00011738007668831677, "grad_norm": 0.0, "learning_rate": 7.822685788787485e-08, "loss": 2.1685, "step": 3 }, { "epoch": 0.0001565067689177557, "grad_norm": 0.0, "learning_rate": 1.0430247718383312e-07, "loss": 2.251, "step": 4 }, { "epoch": 0.0001956334611471946, "grad_norm": 0.0, "learning_rate": 1.3037809647979142e-07, "loss": 2.3447, "step": 5 }, { "epoch": 0.00023476015337663353, "grad_norm": 0.0, "learning_rate": 1.564537157757497e-07, "loss": 2.3401, "step": 6 }, { "epoch": 0.00027388684560607245, "grad_norm": 0.0, "learning_rate": 1.8252933507170796e-07, "loss": 1.9788, "step": 7 }, { "epoch": 0.0003130135378355114, "grad_norm": 0.0, "learning_rate": 2.0860495436766624e-07, "loss": 2.1409, "step": 8 }, { "epoch": 0.0003521402300649503, "grad_norm": 0.0, "learning_rate": 2.3468057366362453e-07, "loss": 2.2629, "step": 9 }, { "epoch": 0.0003912669222943892, "grad_norm": 0.0, "learning_rate": 2.6075619295958284e-07, "loss": 1.9866, "step": 10 }, { "epoch": 0.00043039361452382814, "grad_norm": 0.0, "learning_rate": 2.868318122555411e-07, "loss": 1.8477, "step": 11 }, { "epoch": 0.00046952030675326707, "grad_norm": 0.0, "learning_rate": 3.129074315514994e-07, "loss": 2.0034, "step": 12 }, { "epoch": 0.000508646998982706, "grad_norm": 0.0, "learning_rate": 3.3898305084745766e-07, "loss": 2.1292, "step": 13 }, { "epoch": 0.0005477736912121449, "grad_norm": 0.0, "learning_rate": 3.650586701434159e-07, "loss": 1.7556, "step": 14 }, { "epoch": 0.0005869003834415838, "grad_norm": 0.0, "learning_rate": 3.9113428943937423e-07, "loss": 2.0747, "step": 15 }, { "epoch": 0.0006260270756710228, "grad_norm": 0.0, "learning_rate": 4.172099087353325e-07, "loss": 1.7646, "step": 16 }, { "epoch": 0.0006651537679004617, "grad_norm": 0.0, "learning_rate": 4.432855280312908e-07, "loss": 1.9348, "step": 17 }, { "epoch": 0.0007042804601299006, "grad_norm": 0.0, "learning_rate": 4.6936114732724906e-07, "loss": 1.873, "step": 18 }, { "epoch": 0.0007434071523593395, "grad_norm": 0.0, "learning_rate": 4.954367666232074e-07, "loss": 1.8451, "step": 19 }, { "epoch": 0.0007825338445887784, "grad_norm": 0.0, "learning_rate": 5.215123859191657e-07, "loss": 1.8474, "step": 20 }, { "epoch": 0.0008216605368182174, "grad_norm": 0.0, "learning_rate": 5.475880052151239e-07, "loss": 1.6666, "step": 21 }, { "epoch": 0.0008607872290476563, "grad_norm": 0.0, "learning_rate": 5.736636245110822e-07, "loss": 1.5818, "step": 22 }, { "epoch": 0.0008999139212770952, "grad_norm": 0.0, "learning_rate": 5.997392438070405e-07, "loss": 1.8182, "step": 23 }, { "epoch": 0.0009390406135065341, "grad_norm": 0.0, "learning_rate": 6.258148631029988e-07, "loss": 1.7399, "step": 24 }, { "epoch": 0.000978167305735973, "grad_norm": 0.0, "learning_rate": 6.51890482398957e-07, "loss": 1.4778, "step": 25 }, { "epoch": 0.001017293997965412, "grad_norm": 0.0, "learning_rate": 6.779661016949153e-07, "loss": 1.5547, "step": 26 }, { "epoch": 0.001056420690194851, "grad_norm": 0.0, "learning_rate": 7.040417209908735e-07, "loss": 1.6879, "step": 27 }, { "epoch": 0.0010955473824242898, "grad_norm": 0.0, "learning_rate": 7.301173402868318e-07, "loss": 1.5538, "step": 28 }, { "epoch": 0.0011346740746537287, "grad_norm": 0.0, "learning_rate": 7.561929595827903e-07, "loss": 1.5094, "step": 29 }, { "epoch": 0.0011738007668831677, "grad_norm": 0.0, "learning_rate": 7.822685788787485e-07, "loss": 1.5953, "step": 30 }, { "epoch": 0.0012129274591126066, "grad_norm": 0.0, "learning_rate": 8.083441981747067e-07, "loss": 1.7544, "step": 31 }, { "epoch": 0.0012520541513420455, "grad_norm": 0.0, "learning_rate": 8.34419817470665e-07, "loss": 1.3999, "step": 32 }, { "epoch": 0.0012911808435714844, "grad_norm": 0.0, "learning_rate": 8.604954367666232e-07, "loss": 1.5848, "step": 33 }, { "epoch": 0.0013303075358009234, "grad_norm": 0.0, "learning_rate": 8.865710560625816e-07, "loss": 1.6837, "step": 34 }, { "epoch": 0.0013694342280303623, "grad_norm": 0.0, "learning_rate": 9.126466753585399e-07, "loss": 1.5407, "step": 35 }, { "epoch": 0.0014085609202598012, "grad_norm": 0.0, "learning_rate": 9.387222946544981e-07, "loss": 1.6915, "step": 36 }, { "epoch": 0.0014476876124892401, "grad_norm": 0.0, "learning_rate": 9.647979139504563e-07, "loss": 1.5996, "step": 37 }, { "epoch": 0.001486814304718679, "grad_norm": 0.0, "learning_rate": 9.908735332464147e-07, "loss": 1.6036, "step": 38 }, { "epoch": 0.001525940996948118, "grad_norm": 0.0, "learning_rate": 1.016949152542373e-06, "loss": 1.5721, "step": 39 }, { "epoch": 0.0015650676891775569, "grad_norm": 0.0, "learning_rate": 1.0430247718383314e-06, "loss": 1.4548, "step": 40 }, { "epoch": 0.0016041943814069958, "grad_norm": 0.0, "learning_rate": 1.0691003911342896e-06, "loss": 1.4706, "step": 41 }, { "epoch": 0.0016433210736364347, "grad_norm": 0.0, "learning_rate": 1.0951760104302478e-06, "loss": 1.6071, "step": 42 }, { "epoch": 0.0016824477658658737, "grad_norm": 0.0, "learning_rate": 1.121251629726206e-06, "loss": 1.4688, "step": 43 }, { "epoch": 0.0017215744580953126, "grad_norm": 0.0, "learning_rate": 1.1473272490221644e-06, "loss": 1.6227, "step": 44 }, { "epoch": 0.0017607011503247515, "grad_norm": 0.0, "learning_rate": 1.1734028683181228e-06, "loss": 1.4862, "step": 45 }, { "epoch": 0.0017998278425541904, "grad_norm": 0.0, "learning_rate": 1.199478487614081e-06, "loss": 1.6094, "step": 46 }, { "epoch": 0.0018389545347836293, "grad_norm": 0.0, "learning_rate": 1.2255541069100392e-06, "loss": 1.6226, "step": 47 }, { "epoch": 0.0018780812270130683, "grad_norm": 0.0, "learning_rate": 1.2516297262059976e-06, "loss": 1.5422, "step": 48 }, { "epoch": 0.0019172079192425072, "grad_norm": 0.0, "learning_rate": 1.2777053455019558e-06, "loss": 1.6066, "step": 49 }, { "epoch": 0.001956334611471946, "grad_norm": 0.0, "learning_rate": 1.303780964797914e-06, "loss": 1.4391, "step": 50 }, { "epoch": 0.0019954613037013852, "grad_norm": 0.0, "learning_rate": 1.3298565840938724e-06, "loss": 1.4432, "step": 51 }, { "epoch": 0.002034587995930824, "grad_norm": 0.0, "learning_rate": 1.3559322033898307e-06, "loss": 1.4398, "step": 52 }, { "epoch": 0.002073714688160263, "grad_norm": 0.0, "learning_rate": 1.3820078226857889e-06, "loss": 1.4083, "step": 53 }, { "epoch": 0.002112841380389702, "grad_norm": 0.0, "learning_rate": 1.408083441981747e-06, "loss": 1.3663, "step": 54 }, { "epoch": 0.002151968072619141, "grad_norm": 0.0, "learning_rate": 1.4341590612777055e-06, "loss": 1.5204, "step": 55 }, { "epoch": 0.0021910947648485796, "grad_norm": 0.0, "learning_rate": 1.4602346805736637e-06, "loss": 1.4727, "step": 56 }, { "epoch": 0.0022302214570780188, "grad_norm": 0.0, "learning_rate": 1.4863102998696219e-06, "loss": 1.6703, "step": 57 }, { "epoch": 0.0022693481493074575, "grad_norm": 0.0, "learning_rate": 1.5123859191655805e-06, "loss": 1.5342, "step": 58 }, { "epoch": 0.0023084748415368966, "grad_norm": 0.0, "learning_rate": 1.5384615384615387e-06, "loss": 1.3996, "step": 59 }, { "epoch": 0.0023476015337663353, "grad_norm": 0.0, "learning_rate": 1.564537157757497e-06, "loss": 1.4977, "step": 60 }, { "epoch": 0.0023867282259957745, "grad_norm": 0.0, "learning_rate": 1.5906127770534551e-06, "loss": 1.5876, "step": 61 }, { "epoch": 0.002425854918225213, "grad_norm": 0.0, "learning_rate": 1.6166883963494133e-06, "loss": 1.3766, "step": 62 }, { "epoch": 0.0024649816104546523, "grad_norm": 0.0, "learning_rate": 1.6427640156453717e-06, "loss": 1.4597, "step": 63 }, { "epoch": 0.002504108302684091, "grad_norm": 0.0, "learning_rate": 1.66883963494133e-06, "loss": 1.5759, "step": 64 }, { "epoch": 0.00254323499491353, "grad_norm": 0.0, "learning_rate": 1.6949152542372882e-06, "loss": 1.6177, "step": 65 }, { "epoch": 0.002582361687142969, "grad_norm": 0.0, "learning_rate": 1.7209908735332464e-06, "loss": 1.5114, "step": 66 }, { "epoch": 0.002621488379372408, "grad_norm": 0.0, "learning_rate": 1.7470664928292048e-06, "loss": 1.4045, "step": 67 }, { "epoch": 0.0026606150716018467, "grad_norm": 0.0, "learning_rate": 1.7731421121251632e-06, "loss": 1.3693, "step": 68 }, { "epoch": 0.002699741763831286, "grad_norm": 0.0, "learning_rate": 1.7992177314211214e-06, "loss": 1.4917, "step": 69 }, { "epoch": 0.0027388684560607245, "grad_norm": 0.0, "learning_rate": 1.8252933507170798e-06, "loss": 1.4686, "step": 70 }, { "epoch": 0.0027779951482901637, "grad_norm": 0.0, "learning_rate": 1.851368970013038e-06, "loss": 1.506, "step": 71 }, { "epoch": 0.0028171218405196024, "grad_norm": 0.0, "learning_rate": 1.8774445893089962e-06, "loss": 1.4941, "step": 72 }, { "epoch": 0.0028562485327490415, "grad_norm": 0.0, "learning_rate": 1.9035202086049544e-06, "loss": 1.5632, "step": 73 }, { "epoch": 0.0028953752249784802, "grad_norm": 0.0, "learning_rate": 1.9295958279009126e-06, "loss": 1.4847, "step": 74 }, { "epoch": 0.0029345019172079194, "grad_norm": 0.0, "learning_rate": 1.955671447196871e-06, "loss": 1.4893, "step": 75 }, { "epoch": 0.002973628609437358, "grad_norm": 0.0, "learning_rate": 1.9817470664928295e-06, "loss": 1.3707, "step": 76 }, { "epoch": 0.0030127553016667972, "grad_norm": 0.0, "learning_rate": 2.0078226857887877e-06, "loss": 1.484, "step": 77 }, { "epoch": 0.003051881993896236, "grad_norm": 0.0, "learning_rate": 2.033898305084746e-06, "loss": 1.4838, "step": 78 }, { "epoch": 0.003091008686125675, "grad_norm": 0.0, "learning_rate": 2.0599739243807045e-06, "loss": 1.4841, "step": 79 }, { "epoch": 0.0031301353783551138, "grad_norm": 0.0, "learning_rate": 2.0860495436766627e-06, "loss": 1.4669, "step": 80 }, { "epoch": 0.003169262070584553, "grad_norm": 0.0, "learning_rate": 2.112125162972621e-06, "loss": 1.385, "step": 81 }, { "epoch": 0.0032083887628139916, "grad_norm": 0.0, "learning_rate": 2.138200782268579e-06, "loss": 1.4819, "step": 82 }, { "epoch": 0.0032475154550434308, "grad_norm": 0.0, "learning_rate": 2.1642764015645373e-06, "loss": 1.4581, "step": 83 }, { "epoch": 0.0032866421472728695, "grad_norm": 0.0, "learning_rate": 2.1903520208604955e-06, "loss": 1.4097, "step": 84 }, { "epoch": 0.0033257688395023086, "grad_norm": 0.0, "learning_rate": 2.2164276401564537e-06, "loss": 1.5341, "step": 85 }, { "epoch": 0.0033648955317317473, "grad_norm": 0.0, "learning_rate": 2.242503259452412e-06, "loss": 1.2742, "step": 86 }, { "epoch": 0.0034040222239611864, "grad_norm": 0.0, "learning_rate": 2.2685788787483706e-06, "loss": 1.3947, "step": 87 }, { "epoch": 0.003443148916190625, "grad_norm": 0.0, "learning_rate": 2.2946544980443288e-06, "loss": 1.5566, "step": 88 }, { "epoch": 0.0034822756084200643, "grad_norm": 0.0, "learning_rate": 2.320730117340287e-06, "loss": 1.5649, "step": 89 }, { "epoch": 0.003521402300649503, "grad_norm": 0.0, "learning_rate": 2.3468057366362456e-06, "loss": 1.5057, "step": 90 }, { "epoch": 0.003560528992878942, "grad_norm": 0.0, "learning_rate": 2.372881355932204e-06, "loss": 1.3528, "step": 91 }, { "epoch": 0.003599655685108381, "grad_norm": 0.0, "learning_rate": 2.398956975228162e-06, "loss": 1.3575, "step": 92 }, { "epoch": 0.00363878237733782, "grad_norm": 0.0, "learning_rate": 2.42503259452412e-06, "loss": 1.4628, "step": 93 }, { "epoch": 0.0036779090695672587, "grad_norm": 0.0, "learning_rate": 2.4511082138200784e-06, "loss": 1.4552, "step": 94 }, { "epoch": 0.003717035761796698, "grad_norm": 0.0, "learning_rate": 2.4771838331160366e-06, "loss": 1.4139, "step": 95 }, { "epoch": 0.0037561624540261365, "grad_norm": 0.0, "learning_rate": 2.5032594524119952e-06, "loss": 1.3741, "step": 96 }, { "epoch": 0.0037952891462555757, "grad_norm": 0.0, "learning_rate": 2.5293350717079534e-06, "loss": 1.394, "step": 97 }, { "epoch": 0.0038344158384850144, "grad_norm": 0.0, "learning_rate": 2.5554106910039117e-06, "loss": 1.3538, "step": 98 }, { "epoch": 0.0038735425307144535, "grad_norm": 0.0, "learning_rate": 2.58148631029987e-06, "loss": 1.309, "step": 99 }, { "epoch": 0.003912669222943892, "grad_norm": 0.0, "learning_rate": 2.607561929595828e-06, "loss": 1.3005, "step": 100 }, { "epoch": 0.003951795915173331, "grad_norm": 0.0, "learning_rate": 2.6336375488917863e-06, "loss": 1.3753, "step": 101 }, { "epoch": 0.0039909226074027705, "grad_norm": 0.0, "learning_rate": 2.659713168187745e-06, "loss": 1.2949, "step": 102 }, { "epoch": 0.004030049299632209, "grad_norm": 0.0, "learning_rate": 2.685788787483703e-06, "loss": 1.3506, "step": 103 }, { "epoch": 0.004069175991861648, "grad_norm": 0.0, "learning_rate": 2.7118644067796613e-06, "loss": 1.3557, "step": 104 }, { "epoch": 0.004108302684091087, "grad_norm": 0.0, "learning_rate": 2.7379400260756195e-06, "loss": 1.2952, "step": 105 }, { "epoch": 0.004147429376320526, "grad_norm": 0.0, "learning_rate": 2.7640156453715777e-06, "loss": 1.4055, "step": 106 }, { "epoch": 0.0041865560685499644, "grad_norm": 0.0, "learning_rate": 2.790091264667536e-06, "loss": 1.3723, "step": 107 }, { "epoch": 0.004225682760779404, "grad_norm": 0.0, "learning_rate": 2.816166883963494e-06, "loss": 1.3752, "step": 108 }, { "epoch": 0.004264809453008843, "grad_norm": 0.0, "learning_rate": 2.8422425032594523e-06, "loss": 1.3835, "step": 109 }, { "epoch": 0.004303936145238282, "grad_norm": 0.0, "learning_rate": 2.868318122555411e-06, "loss": 1.3755, "step": 110 }, { "epoch": 0.00434306283746772, "grad_norm": 0.0, "learning_rate": 2.894393741851369e-06, "loss": 1.3472, "step": 111 }, { "epoch": 0.004382189529697159, "grad_norm": 0.0, "learning_rate": 2.9204693611473274e-06, "loss": 1.3887, "step": 112 }, { "epoch": 0.004421316221926598, "grad_norm": 0.0, "learning_rate": 2.9465449804432856e-06, "loss": 1.3813, "step": 113 }, { "epoch": 0.0044604429141560376, "grad_norm": 0.0, "learning_rate": 2.9726205997392438e-06, "loss": 1.3345, "step": 114 }, { "epoch": 0.004499569606385476, "grad_norm": 0.0, "learning_rate": 2.9986962190352024e-06, "loss": 1.394, "step": 115 }, { "epoch": 0.004538696298614915, "grad_norm": 0.0, "learning_rate": 3.024771838331161e-06, "loss": 1.3119, "step": 116 }, { "epoch": 0.004577822990844354, "grad_norm": 0.0, "learning_rate": 3.0508474576271192e-06, "loss": 1.3721, "step": 117 }, { "epoch": 0.004616949683073793, "grad_norm": 0.0, "learning_rate": 3.0769230769230774e-06, "loss": 1.505, "step": 118 }, { "epoch": 0.0046560763753032315, "grad_norm": 0.0, "learning_rate": 3.1029986962190356e-06, "loss": 1.3782, "step": 119 }, { "epoch": 0.004695203067532671, "grad_norm": 0.0, "learning_rate": 3.129074315514994e-06, "loss": 1.3685, "step": 120 }, { "epoch": 0.00473432975976211, "grad_norm": 0.0, "learning_rate": 3.155149934810952e-06, "loss": 1.391, "step": 121 }, { "epoch": 0.004773456451991549, "grad_norm": 0.0, "learning_rate": 3.1812255541069103e-06, "loss": 1.3251, "step": 122 }, { "epoch": 0.004812583144220987, "grad_norm": 0.0, "learning_rate": 3.2073011734028685e-06, "loss": 1.5154, "step": 123 }, { "epoch": 0.004851709836450426, "grad_norm": 0.0, "learning_rate": 3.2333767926988267e-06, "loss": 1.3533, "step": 124 }, { "epoch": 0.0048908365286798655, "grad_norm": 0.0, "learning_rate": 3.2594524119947853e-06, "loss": 1.4227, "step": 125 }, { "epoch": 0.004929963220909305, "grad_norm": 0.0, "learning_rate": 3.2855280312907435e-06, "loss": 1.4224, "step": 126 }, { "epoch": 0.004969089913138743, "grad_norm": 0.0, "learning_rate": 3.3116036505867017e-06, "loss": 1.3929, "step": 127 }, { "epoch": 0.005008216605368182, "grad_norm": 0.0, "learning_rate": 3.33767926988266e-06, "loss": 1.447, "step": 128 }, { "epoch": 0.005047343297597621, "grad_norm": 0.0, "learning_rate": 3.363754889178618e-06, "loss": 1.3491, "step": 129 }, { "epoch": 0.00508646998982706, "grad_norm": 0.0, "learning_rate": 3.3898305084745763e-06, "loss": 1.3488, "step": 130 }, { "epoch": 0.005125596682056499, "grad_norm": 0.0, "learning_rate": 3.4159061277705345e-06, "loss": 1.5355, "step": 131 }, { "epoch": 0.005164723374285938, "grad_norm": 0.0, "learning_rate": 3.4419817470664927e-06, "loss": 1.5056, "step": 132 }, { "epoch": 0.005203850066515377, "grad_norm": 0.0, "learning_rate": 3.4680573663624513e-06, "loss": 1.369, "step": 133 }, { "epoch": 0.005242976758744816, "grad_norm": 0.0, "learning_rate": 3.4941329856584096e-06, "loss": 1.2498, "step": 134 }, { "epoch": 0.005282103450974254, "grad_norm": 0.0, "learning_rate": 3.520208604954368e-06, "loss": 1.3, "step": 135 }, { "epoch": 0.005321230143203693, "grad_norm": 0.0, "learning_rate": 3.5462842242503264e-06, "loss": 1.4124, "step": 136 }, { "epoch": 0.0053603568354331325, "grad_norm": 0.0, "learning_rate": 3.5723598435462846e-06, "loss": 1.3636, "step": 137 }, { "epoch": 0.005399483527662572, "grad_norm": 0.0, "learning_rate": 3.598435462842243e-06, "loss": 1.4164, "step": 138 }, { "epoch": 0.00543861021989201, "grad_norm": 0.0, "learning_rate": 3.6245110821382014e-06, "loss": 1.2739, "step": 139 }, { "epoch": 0.005477736912121449, "grad_norm": 0.0, "learning_rate": 3.6505867014341596e-06, "loss": 1.3809, "step": 140 }, { "epoch": 0.005516863604350888, "grad_norm": 0.0, "learning_rate": 3.676662320730118e-06, "loss": 1.4399, "step": 141 }, { "epoch": 0.005555990296580327, "grad_norm": 0.0, "learning_rate": 3.702737940026076e-06, "loss": 1.4296, "step": 142 }, { "epoch": 0.005595116988809766, "grad_norm": 0.0, "learning_rate": 3.7288135593220342e-06, "loss": 1.3194, "step": 143 }, { "epoch": 0.005634243681039205, "grad_norm": 0.0, "learning_rate": 3.7548891786179924e-06, "loss": 1.3386, "step": 144 }, { "epoch": 0.005673370373268644, "grad_norm": 0.0, "learning_rate": 3.7809647979139506e-06, "loss": 1.5277, "step": 145 }, { "epoch": 0.005712497065498083, "grad_norm": 0.0, "learning_rate": 3.807040417209909e-06, "loss": 1.3021, "step": 146 }, { "epoch": 0.005751623757727521, "grad_norm": 0.0, "learning_rate": 3.8331160365058675e-06, "loss": 1.3967, "step": 147 }, { "epoch": 0.0057907504499569605, "grad_norm": 0.0, "learning_rate": 3.859191655801825e-06, "loss": 1.3381, "step": 148 }, { "epoch": 0.0058298771421864, "grad_norm": 0.0, "learning_rate": 3.885267275097784e-06, "loss": 1.338, "step": 149 }, { "epoch": 0.005869003834415839, "grad_norm": 0.0, "learning_rate": 3.911342894393742e-06, "loss": 1.4635, "step": 150 }, { "epoch": 0.005908130526645277, "grad_norm": 0.0, "learning_rate": 3.9374185136897e-06, "loss": 1.3329, "step": 151 }, { "epoch": 0.005947257218874716, "grad_norm": 0.0, "learning_rate": 3.963494132985659e-06, "loss": 1.3795, "step": 152 }, { "epoch": 0.005986383911104155, "grad_norm": 0.0, "learning_rate": 3.989569752281617e-06, "loss": 1.3138, "step": 153 }, { "epoch": 0.0060255106033335944, "grad_norm": 0.0, "learning_rate": 4.015645371577575e-06, "loss": 1.3652, "step": 154 }, { "epoch": 0.006064637295563033, "grad_norm": 0.0, "learning_rate": 4.041720990873533e-06, "loss": 1.3557, "step": 155 }, { "epoch": 0.006103763987792472, "grad_norm": 0.0, "learning_rate": 4.067796610169492e-06, "loss": 1.3422, "step": 156 }, { "epoch": 0.006142890680021911, "grad_norm": 0.0, "learning_rate": 4.09387222946545e-06, "loss": 1.2483, "step": 157 }, { "epoch": 0.00618201737225135, "grad_norm": 0.0, "learning_rate": 4.119947848761409e-06, "loss": 1.3649, "step": 158 }, { "epoch": 0.006221144064480788, "grad_norm": 0.0, "learning_rate": 4.146023468057367e-06, "loss": 1.3962, "step": 159 }, { "epoch": 0.0062602707567102275, "grad_norm": 0.0, "learning_rate": 4.172099087353325e-06, "loss": 1.3009, "step": 160 }, { "epoch": 0.006299397448939667, "grad_norm": 0.0, "learning_rate": 4.198174706649283e-06, "loss": 1.408, "step": 161 }, { "epoch": 0.006338524141169106, "grad_norm": 0.0, "learning_rate": 4.224250325945242e-06, "loss": 1.468, "step": 162 }, { "epoch": 0.006377650833398544, "grad_norm": 0.0, "learning_rate": 4.2503259452412e-06, "loss": 1.4591, "step": 163 }, { "epoch": 0.006416777525627983, "grad_norm": 0.0, "learning_rate": 4.276401564537158e-06, "loss": 1.4445, "step": 164 }, { "epoch": 0.006455904217857422, "grad_norm": 0.0, "learning_rate": 4.302477183833116e-06, "loss": 1.3512, "step": 165 }, { "epoch": 0.0064950309100868615, "grad_norm": 0.0, "learning_rate": 4.328552803129075e-06, "loss": 1.4269, "step": 166 }, { "epoch": 0.0065341576023163, "grad_norm": 0.0, "learning_rate": 4.354628422425033e-06, "loss": 1.3937, "step": 167 }, { "epoch": 0.006573284294545739, "grad_norm": 0.0, "learning_rate": 4.380704041720991e-06, "loss": 1.3075, "step": 168 }, { "epoch": 0.006612410986775178, "grad_norm": 0.0, "learning_rate": 4.40677966101695e-06, "loss": 1.2495, "step": 169 }, { "epoch": 0.006651537679004617, "grad_norm": 0.0, "learning_rate": 4.4328552803129075e-06, "loss": 1.5041, "step": 170 }, { "epoch": 0.0066906643712340555, "grad_norm": 0.0, "learning_rate": 4.458930899608866e-06, "loss": 1.301, "step": 171 }, { "epoch": 0.006729791063463495, "grad_norm": 0.0, "learning_rate": 4.485006518904824e-06, "loss": 1.4299, "step": 172 }, { "epoch": 0.006768917755692934, "grad_norm": 0.0, "learning_rate": 4.5110821382007825e-06, "loss": 1.1853, "step": 173 }, { "epoch": 0.006808044447922373, "grad_norm": 0.0, "learning_rate": 4.537157757496741e-06, "loss": 1.4109, "step": 174 }, { "epoch": 0.006847171140151811, "grad_norm": 0.0, "learning_rate": 4.563233376792699e-06, "loss": 1.3104, "step": 175 }, { "epoch": 0.00688629783238125, "grad_norm": 0.0, "learning_rate": 4.5893089960886575e-06, "loss": 1.3376, "step": 176 }, { "epoch": 0.006925424524610689, "grad_norm": 0.0, "learning_rate": 4.615384615384616e-06, "loss": 1.3854, "step": 177 }, { "epoch": 0.006964551216840129, "grad_norm": 0.0, "learning_rate": 4.641460234680574e-06, "loss": 1.2471, "step": 178 }, { "epoch": 0.007003677909069567, "grad_norm": 0.0, "learning_rate": 4.6675358539765326e-06, "loss": 1.403, "step": 179 }, { "epoch": 0.007042804601299006, "grad_norm": 0.0, "learning_rate": 4.693611473272491e-06, "loss": 1.4435, "step": 180 }, { "epoch": 0.007081931293528445, "grad_norm": 0.0, "learning_rate": 4.719687092568449e-06, "loss": 1.4517, "step": 181 }, { "epoch": 0.007121057985757884, "grad_norm": 0.0, "learning_rate": 4.745762711864408e-06, "loss": 1.3156, "step": 182 }, { "epoch": 0.0071601846779873225, "grad_norm": 0.0, "learning_rate": 4.771838331160365e-06, "loss": 1.3231, "step": 183 }, { "epoch": 0.007199311370216762, "grad_norm": 0.0, "learning_rate": 4.797913950456324e-06, "loss": 1.3873, "step": 184 }, { "epoch": 0.007238438062446201, "grad_norm": 0.0, "learning_rate": 4.823989569752282e-06, "loss": 1.334, "step": 185 }, { "epoch": 0.00727756475467564, "grad_norm": 0.0, "learning_rate": 4.85006518904824e-06, "loss": 1.2059, "step": 186 }, { "epoch": 0.007316691446905078, "grad_norm": 0.0, "learning_rate": 4.876140808344198e-06, "loss": 1.3802, "step": 187 }, { "epoch": 0.007355818139134517, "grad_norm": 0.0, "learning_rate": 4.902216427640157e-06, "loss": 1.251, "step": 188 }, { "epoch": 0.0073949448313639565, "grad_norm": 0.0, "learning_rate": 4.9282920469361155e-06, "loss": 1.2852, "step": 189 }, { "epoch": 0.007434071523593396, "grad_norm": 0.0, "learning_rate": 4.954367666232073e-06, "loss": 1.3348, "step": 190 }, { "epoch": 0.007473198215822835, "grad_norm": 0.0, "learning_rate": 4.980443285528032e-06, "loss": 1.3463, "step": 191 }, { "epoch": 0.007512324908052273, "grad_norm": 0.0, "learning_rate": 5.0065189048239905e-06, "loss": 1.2957, "step": 192 }, { "epoch": 0.007551451600281712, "grad_norm": 0.0, "learning_rate": 5.032594524119948e-06, "loss": 1.3495, "step": 193 }, { "epoch": 0.007590578292511151, "grad_norm": 0.0, "learning_rate": 5.058670143415907e-06, "loss": 1.4135, "step": 194 }, { "epoch": 0.0076297049847405905, "grad_norm": 0.0, "learning_rate": 5.084745762711865e-06, "loss": 1.3567, "step": 195 }, { "epoch": 0.007668831676970029, "grad_norm": 0.0, "learning_rate": 5.110821382007823e-06, "loss": 1.2578, "step": 196 }, { "epoch": 0.007707958369199468, "grad_norm": 0.0, "learning_rate": 5.136897001303781e-06, "loss": 1.2433, "step": 197 }, { "epoch": 0.007747085061428907, "grad_norm": 0.0, "learning_rate": 5.16297262059974e-06, "loss": 1.3201, "step": 198 }, { "epoch": 0.007786211753658346, "grad_norm": 0.0, "learning_rate": 5.1890482398956975e-06, "loss": 1.3608, "step": 199 }, { "epoch": 0.007825338445887784, "grad_norm": 0.0, "learning_rate": 5.215123859191656e-06, "loss": 1.4556, "step": 200 }, { "epoch": 0.007864465138117224, "grad_norm": 0.0, "learning_rate": 5.241199478487614e-06, "loss": 1.4248, "step": 201 }, { "epoch": 0.007903591830346663, "grad_norm": 0.0, "learning_rate": 5.2672750977835725e-06, "loss": 1.3947, "step": 202 }, { "epoch": 0.007942718522576102, "grad_norm": 0.0, "learning_rate": 5.29335071707953e-06, "loss": 1.4028, "step": 203 }, { "epoch": 0.007981845214805541, "grad_norm": 0.0, "learning_rate": 5.31942633637549e-06, "loss": 1.3906, "step": 204 }, { "epoch": 0.00802097190703498, "grad_norm": 0.0, "learning_rate": 5.345501955671447e-06, "loss": 1.3363, "step": 205 }, { "epoch": 0.008060098599264418, "grad_norm": 0.0, "learning_rate": 5.371577574967406e-06, "loss": 1.4054, "step": 206 }, { "epoch": 0.008099225291493857, "grad_norm": 0.0, "learning_rate": 5.397653194263364e-06, "loss": 1.3038, "step": 207 }, { "epoch": 0.008138351983723296, "grad_norm": 0.0, "learning_rate": 5.423728813559323e-06, "loss": 1.3452, "step": 208 }, { "epoch": 0.008177478675952735, "grad_norm": 0.0, "learning_rate": 5.449804432855281e-06, "loss": 1.2614, "step": 209 }, { "epoch": 0.008216605368182174, "grad_norm": 0.0, "learning_rate": 5.475880052151239e-06, "loss": 1.3498, "step": 210 }, { "epoch": 0.008255732060411613, "grad_norm": 0.0, "learning_rate": 5.501955671447198e-06, "loss": 1.2994, "step": 211 }, { "epoch": 0.008294858752641052, "grad_norm": 0.0, "learning_rate": 5.5280312907431554e-06, "loss": 1.4379, "step": 212 }, { "epoch": 0.008333985444870491, "grad_norm": 0.0, "learning_rate": 5.554106910039114e-06, "loss": 1.3136, "step": 213 }, { "epoch": 0.008373112137099929, "grad_norm": 0.0, "learning_rate": 5.580182529335072e-06, "loss": 1.2997, "step": 214 }, { "epoch": 0.008412238829329368, "grad_norm": 0.0, "learning_rate": 5.6062581486310305e-06, "loss": 1.3591, "step": 215 }, { "epoch": 0.008451365521558807, "grad_norm": 0.0, "learning_rate": 5.632333767926988e-06, "loss": 1.3214, "step": 216 }, { "epoch": 0.008490492213788246, "grad_norm": 0.0, "learning_rate": 5.658409387222948e-06, "loss": 1.3735, "step": 217 }, { "epoch": 0.008529618906017685, "grad_norm": 0.0, "learning_rate": 5.684485006518905e-06, "loss": 1.1454, "step": 218 }, { "epoch": 0.008568745598247125, "grad_norm": 0.0, "learning_rate": 5.710560625814864e-06, "loss": 1.3994, "step": 219 }, { "epoch": 0.008607872290476564, "grad_norm": 0.0, "learning_rate": 5.736636245110822e-06, "loss": 1.3139, "step": 220 }, { "epoch": 0.008646998982706003, "grad_norm": 0.0, "learning_rate": 5.7627118644067805e-06, "loss": 1.4988, "step": 221 }, { "epoch": 0.00868612567493544, "grad_norm": 0.0, "learning_rate": 5.788787483702738e-06, "loss": 1.4006, "step": 222 }, { "epoch": 0.00872525236716488, "grad_norm": 0.0, "learning_rate": 5.814863102998697e-06, "loss": 1.3512, "step": 223 }, { "epoch": 0.008764379059394319, "grad_norm": 0.0, "learning_rate": 5.840938722294655e-06, "loss": 1.1885, "step": 224 }, { "epoch": 0.008803505751623758, "grad_norm": 0.0, "learning_rate": 5.867014341590613e-06, "loss": 1.3357, "step": 225 }, { "epoch": 0.008842632443853197, "grad_norm": 0.0, "learning_rate": 5.893089960886571e-06, "loss": 1.3458, "step": 226 }, { "epoch": 0.008881759136082636, "grad_norm": 0.0, "learning_rate": 5.91916558018253e-06, "loss": 1.2893, "step": 227 }, { "epoch": 0.008920885828312075, "grad_norm": 0.0, "learning_rate": 5.9452411994784875e-06, "loss": 1.3563, "step": 228 }, { "epoch": 0.008960012520541514, "grad_norm": 0.0, "learning_rate": 5.971316818774446e-06, "loss": 1.1919, "step": 229 }, { "epoch": 0.008999139212770952, "grad_norm": 0.0, "learning_rate": 5.997392438070405e-06, "loss": 1.2916, "step": 230 }, { "epoch": 0.00903826590500039, "grad_norm": 0.0, "learning_rate": 6.023468057366363e-06, "loss": 1.3856, "step": 231 }, { "epoch": 0.00907739259722983, "grad_norm": 0.0, "learning_rate": 6.049543676662322e-06, "loss": 1.2431, "step": 232 }, { "epoch": 0.009116519289459269, "grad_norm": 0.0, "learning_rate": 6.075619295958279e-06, "loss": 1.4371, "step": 233 }, { "epoch": 0.009155645981688708, "grad_norm": 0.0, "learning_rate": 6.1016949152542385e-06, "loss": 1.2924, "step": 234 }, { "epoch": 0.009194772673918147, "grad_norm": 0.0, "learning_rate": 6.127770534550196e-06, "loss": 1.357, "step": 235 }, { "epoch": 0.009233899366147586, "grad_norm": 0.0, "learning_rate": 6.153846153846155e-06, "loss": 1.2363, "step": 236 }, { "epoch": 0.009273026058377026, "grad_norm": 0.0, "learning_rate": 6.179921773142113e-06, "loss": 1.3647, "step": 237 }, { "epoch": 0.009312152750606463, "grad_norm": 0.0, "learning_rate": 6.205997392438071e-06, "loss": 1.3705, "step": 238 }, { "epoch": 0.009351279442835902, "grad_norm": 0.0, "learning_rate": 6.232073011734029e-06, "loss": 1.2914, "step": 239 }, { "epoch": 0.009390406135065341, "grad_norm": 0.0, "learning_rate": 6.258148631029988e-06, "loss": 1.1712, "step": 240 }, { "epoch": 0.00942953282729478, "grad_norm": 0.0, "learning_rate": 6.2842242503259455e-06, "loss": 1.2874, "step": 241 }, { "epoch": 0.00946865951952422, "grad_norm": 0.0, "learning_rate": 6.310299869621904e-06, "loss": 1.5271, "step": 242 }, { "epoch": 0.009507786211753659, "grad_norm": 0.0, "learning_rate": 6.336375488917862e-06, "loss": 1.3885, "step": 243 }, { "epoch": 0.009546912903983098, "grad_norm": 0.0, "learning_rate": 6.3624511082138205e-06, "loss": 1.38, "step": 244 }, { "epoch": 0.009586039596212537, "grad_norm": 0.0, "learning_rate": 6.388526727509778e-06, "loss": 1.2515, "step": 245 }, { "epoch": 0.009625166288441974, "grad_norm": 0.0, "learning_rate": 6.414602346805737e-06, "loss": 1.2769, "step": 246 }, { "epoch": 0.009664292980671414, "grad_norm": 0.0, "learning_rate": 6.440677966101695e-06, "loss": 1.2955, "step": 247 }, { "epoch": 0.009703419672900853, "grad_norm": 0.0, "learning_rate": 6.466753585397653e-06, "loss": 1.2698, "step": 248 }, { "epoch": 0.009742546365130292, "grad_norm": 0.0, "learning_rate": 6.492829204693613e-06, "loss": 1.3268, "step": 249 }, { "epoch": 0.009781673057359731, "grad_norm": 0.0, "learning_rate": 6.518904823989571e-06, "loss": 1.3779, "step": 250 }, { "epoch": 0.00982079974958917, "grad_norm": 0.0, "learning_rate": 6.544980443285529e-06, "loss": 1.3955, "step": 251 }, { "epoch": 0.00985992644181861, "grad_norm": 0.0, "learning_rate": 6.571056062581487e-06, "loss": 1.2912, "step": 252 }, { "epoch": 0.009899053134048048, "grad_norm": 0.0, "learning_rate": 6.597131681877446e-06, "loss": 1.329, "step": 253 }, { "epoch": 0.009938179826277486, "grad_norm": 0.0, "learning_rate": 6.623207301173403e-06, "loss": 1.3978, "step": 254 }, { "epoch": 0.009977306518506925, "grad_norm": 0.0, "learning_rate": 6.649282920469362e-06, "loss": 1.3483, "step": 255 }, { "epoch": 0.010016433210736364, "grad_norm": 0.0, "learning_rate": 6.67535853976532e-06, "loss": 1.3959, "step": 256 }, { "epoch": 0.010055559902965803, "grad_norm": 0.0, "learning_rate": 6.7014341590612784e-06, "loss": 1.2706, "step": 257 }, { "epoch": 0.010094686595195242, "grad_norm": 0.0, "learning_rate": 6.727509778357236e-06, "loss": 1.2396, "step": 258 }, { "epoch": 0.010133813287424681, "grad_norm": 0.0, "learning_rate": 6.753585397653195e-06, "loss": 1.4016, "step": 259 }, { "epoch": 0.01017293997965412, "grad_norm": 0.0, "learning_rate": 6.779661016949153e-06, "loss": 1.2732, "step": 260 }, { "epoch": 0.01021206667188356, "grad_norm": 0.0, "learning_rate": 6.805736636245111e-06, "loss": 1.3431, "step": 261 }, { "epoch": 0.010251193364112997, "grad_norm": 0.0, "learning_rate": 6.831812255541069e-06, "loss": 1.3865, "step": 262 }, { "epoch": 0.010290320056342436, "grad_norm": 0.0, "learning_rate": 6.8578878748370285e-06, "loss": 1.2385, "step": 263 }, { "epoch": 0.010329446748571875, "grad_norm": 0.0, "learning_rate": 6.8839634941329854e-06, "loss": 1.3173, "step": 264 }, { "epoch": 0.010368573440801315, "grad_norm": 0.0, "learning_rate": 6.910039113428945e-06, "loss": 1.4006, "step": 265 }, { "epoch": 0.010407700133030754, "grad_norm": 0.0, "learning_rate": 6.936114732724903e-06, "loss": 1.3444, "step": 266 }, { "epoch": 0.010446826825260193, "grad_norm": 0.0, "learning_rate": 6.962190352020861e-06, "loss": 1.2354, "step": 267 }, { "epoch": 0.010485953517489632, "grad_norm": 0.0, "learning_rate": 6.988265971316819e-06, "loss": 1.3185, "step": 268 }, { "epoch": 0.010525080209719071, "grad_norm": 0.0, "learning_rate": 7.014341590612778e-06, "loss": 1.2294, "step": 269 }, { "epoch": 0.010564206901948509, "grad_norm": 0.0, "learning_rate": 7.040417209908736e-06, "loss": 1.2622, "step": 270 }, { "epoch": 0.010603333594177948, "grad_norm": 0.0, "learning_rate": 7.066492829204694e-06, "loss": 1.279, "step": 271 }, { "epoch": 0.010642460286407387, "grad_norm": 0.0, "learning_rate": 7.092568448500653e-06, "loss": 1.3441, "step": 272 }, { "epoch": 0.010681586978636826, "grad_norm": 0.0, "learning_rate": 7.1186440677966106e-06, "loss": 1.3218, "step": 273 }, { "epoch": 0.010720713670866265, "grad_norm": 0.0, "learning_rate": 7.144719687092569e-06, "loss": 1.3236, "step": 274 }, { "epoch": 0.010759840363095704, "grad_norm": 0.0, "learning_rate": 7.170795306388527e-06, "loss": 1.356, "step": 275 }, { "epoch": 0.010798967055325143, "grad_norm": 0.0, "learning_rate": 7.196870925684486e-06, "loss": 1.2808, "step": 276 }, { "epoch": 0.010838093747554583, "grad_norm": 0.0, "learning_rate": 7.222946544980443e-06, "loss": 1.201, "step": 277 }, { "epoch": 0.01087722043978402, "grad_norm": 0.0, "learning_rate": 7.249022164276403e-06, "loss": 1.3279, "step": 278 }, { "epoch": 0.010916347132013459, "grad_norm": 0.0, "learning_rate": 7.27509778357236e-06, "loss": 1.2931, "step": 279 }, { "epoch": 0.010955473824242898, "grad_norm": 0.0, "learning_rate": 7.301173402868319e-06, "loss": 1.3418, "step": 280 }, { "epoch": 0.010994600516472337, "grad_norm": 0.0, "learning_rate": 7.327249022164277e-06, "loss": 1.3231, "step": 281 }, { "epoch": 0.011033727208701776, "grad_norm": 0.0, "learning_rate": 7.353324641460236e-06, "loss": 1.2429, "step": 282 }, { "epoch": 0.011072853900931216, "grad_norm": 0.0, "learning_rate": 7.3794002607561934e-06, "loss": 1.3893, "step": 283 }, { "epoch": 0.011111980593160655, "grad_norm": 0.0, "learning_rate": 7.405475880052152e-06, "loss": 1.3075, "step": 284 }, { "epoch": 0.011151107285390094, "grad_norm": 0.0, "learning_rate": 7.43155149934811e-06, "loss": 1.2918, "step": 285 }, { "epoch": 0.011190233977619531, "grad_norm": 0.0, "learning_rate": 7.4576271186440685e-06, "loss": 1.3269, "step": 286 }, { "epoch": 0.01122936066984897, "grad_norm": 0.0, "learning_rate": 7.483702737940026e-06, "loss": 1.381, "step": 287 }, { "epoch": 0.01126848736207841, "grad_norm": 0.0, "learning_rate": 7.509778357235985e-06, "loss": 1.2844, "step": 288 }, { "epoch": 0.011307614054307849, "grad_norm": 0.0, "learning_rate": 7.535853976531943e-06, "loss": 1.2786, "step": 289 }, { "epoch": 0.011346740746537288, "grad_norm": 0.0, "learning_rate": 7.561929595827901e-06, "loss": 1.2607, "step": 290 }, { "epoch": 0.011385867438766727, "grad_norm": 0.0, "learning_rate": 7.58800521512386e-06, "loss": 1.2665, "step": 291 }, { "epoch": 0.011424994130996166, "grad_norm": 0.0, "learning_rate": 7.614080834419818e-06, "loss": 1.376, "step": 292 }, { "epoch": 0.011464120823225605, "grad_norm": 0.0, "learning_rate": 7.640156453715776e-06, "loss": 1.2978, "step": 293 }, { "epoch": 0.011503247515455043, "grad_norm": 0.0, "learning_rate": 7.666232073011735e-06, "loss": 1.4371, "step": 294 }, { "epoch": 0.011542374207684482, "grad_norm": 0.0, "learning_rate": 7.692307692307694e-06, "loss": 1.2955, "step": 295 }, { "epoch": 0.011581500899913921, "grad_norm": 0.0, "learning_rate": 7.71838331160365e-06, "loss": 1.3795, "step": 296 }, { "epoch": 0.01162062759214336, "grad_norm": 0.0, "learning_rate": 7.744458930899609e-06, "loss": 1.2765, "step": 297 }, { "epoch": 0.0116597542843728, "grad_norm": 0.0, "learning_rate": 7.770534550195568e-06, "loss": 1.3157, "step": 298 }, { "epoch": 0.011698880976602238, "grad_norm": 0.0, "learning_rate": 7.796610169491526e-06, "loss": 1.2561, "step": 299 }, { "epoch": 0.011738007668831677, "grad_norm": 0.0, "learning_rate": 7.822685788787483e-06, "loss": 1.3549, "step": 300 }, { "epoch": 0.011777134361061117, "grad_norm": 0.0, "learning_rate": 7.848761408083444e-06, "loss": 1.2984, "step": 301 }, { "epoch": 0.011816261053290554, "grad_norm": 0.0, "learning_rate": 7.8748370273794e-06, "loss": 1.384, "step": 302 }, { "epoch": 0.011855387745519993, "grad_norm": 0.0, "learning_rate": 7.90091264667536e-06, "loss": 1.3005, "step": 303 }, { "epoch": 0.011894514437749432, "grad_norm": 0.0, "learning_rate": 7.926988265971318e-06, "loss": 1.4104, "step": 304 }, { "epoch": 0.011933641129978871, "grad_norm": 0.0, "learning_rate": 7.953063885267276e-06, "loss": 1.2677, "step": 305 }, { "epoch": 0.01197276782220831, "grad_norm": 0.0, "learning_rate": 7.979139504563233e-06, "loss": 1.264, "step": 306 }, { "epoch": 0.01201189451443775, "grad_norm": 0.0, "learning_rate": 8.005215123859192e-06, "loss": 1.3384, "step": 307 }, { "epoch": 0.012051021206667189, "grad_norm": 0.0, "learning_rate": 8.03129074315515e-06, "loss": 1.3146, "step": 308 }, { "epoch": 0.012090147898896628, "grad_norm": 0.0, "learning_rate": 8.05736636245111e-06, "loss": 1.2887, "step": 309 }, { "epoch": 0.012129274591126065, "grad_norm": 0.0, "learning_rate": 8.083441981747066e-06, "loss": 1.3411, "step": 310 }, { "epoch": 0.012168401283355505, "grad_norm": 0.0, "learning_rate": 8.109517601043025e-06, "loss": 1.2863, "step": 311 }, { "epoch": 0.012207527975584944, "grad_norm": 0.0, "learning_rate": 8.135593220338983e-06, "loss": 1.4172, "step": 312 }, { "epoch": 0.012246654667814383, "grad_norm": 0.0, "learning_rate": 8.161668839634942e-06, "loss": 1.3495, "step": 313 }, { "epoch": 0.012285781360043822, "grad_norm": 0.0, "learning_rate": 8.1877444589309e-06, "loss": 1.3222, "step": 314 }, { "epoch": 0.012324908052273261, "grad_norm": 0.0, "learning_rate": 8.213820078226858e-06, "loss": 1.407, "step": 315 }, { "epoch": 0.0123640347445027, "grad_norm": 0.0, "learning_rate": 8.239895697522818e-06, "loss": 1.3616, "step": 316 }, { "epoch": 0.01240316143673214, "grad_norm": 0.0, "learning_rate": 8.265971316818775e-06, "loss": 1.3243, "step": 317 }, { "epoch": 0.012442288128961577, "grad_norm": 0.0, "learning_rate": 8.292046936114734e-06, "loss": 1.0859, "step": 318 }, { "epoch": 0.012481414821191016, "grad_norm": 0.0, "learning_rate": 8.318122555410692e-06, "loss": 1.3722, "step": 319 }, { "epoch": 0.012520541513420455, "grad_norm": 0.0, "learning_rate": 8.34419817470665e-06, "loss": 1.241, "step": 320 }, { "epoch": 0.012559668205649894, "grad_norm": 0.0, "learning_rate": 8.370273794002608e-06, "loss": 1.2573, "step": 321 }, { "epoch": 0.012598794897879333, "grad_norm": 0.0, "learning_rate": 8.396349413298566e-06, "loss": 1.275, "step": 322 }, { "epoch": 0.012637921590108772, "grad_norm": 0.0, "learning_rate": 8.422425032594525e-06, "loss": 1.3417, "step": 323 }, { "epoch": 0.012677048282338212, "grad_norm": 0.0, "learning_rate": 8.448500651890484e-06, "loss": 1.3077, "step": 324 }, { "epoch": 0.01271617497456765, "grad_norm": 0.0, "learning_rate": 8.47457627118644e-06, "loss": 1.2665, "step": 325 }, { "epoch": 0.012755301666797088, "grad_norm": 0.0, "learning_rate": 8.5006518904824e-06, "loss": 1.4011, "step": 326 }, { "epoch": 0.012794428359026527, "grad_norm": 0.0, "learning_rate": 8.526727509778358e-06, "loss": 1.2346, "step": 327 }, { "epoch": 0.012833555051255966, "grad_norm": 0.0, "learning_rate": 8.552803129074316e-06, "loss": 1.2982, "step": 328 }, { "epoch": 0.012872681743485406, "grad_norm": 0.0, "learning_rate": 8.578878748370273e-06, "loss": 1.3824, "step": 329 }, { "epoch": 0.012911808435714845, "grad_norm": 0.0, "learning_rate": 8.604954367666232e-06, "loss": 1.2346, "step": 330 }, { "epoch": 0.012950935127944284, "grad_norm": 0.0, "learning_rate": 8.63102998696219e-06, "loss": 1.3313, "step": 331 }, { "epoch": 0.012990061820173723, "grad_norm": 0.0, "learning_rate": 8.65710560625815e-06, "loss": 1.4097, "step": 332 }, { "epoch": 0.013029188512403162, "grad_norm": 0.0, "learning_rate": 8.683181225554108e-06, "loss": 1.2245, "step": 333 }, { "epoch": 0.0130683152046326, "grad_norm": 0.0, "learning_rate": 8.709256844850067e-06, "loss": 1.3786, "step": 334 }, { "epoch": 0.013107441896862039, "grad_norm": 0.0, "learning_rate": 8.735332464146025e-06, "loss": 1.3371, "step": 335 }, { "epoch": 0.013146568589091478, "grad_norm": 0.0, "learning_rate": 8.761408083441982e-06, "loss": 1.1729, "step": 336 }, { "epoch": 0.013185695281320917, "grad_norm": 0.0, "learning_rate": 8.78748370273794e-06, "loss": 1.2436, "step": 337 }, { "epoch": 0.013224821973550356, "grad_norm": 0.0, "learning_rate": 8.8135593220339e-06, "loss": 1.3173, "step": 338 }, { "epoch": 0.013263948665779795, "grad_norm": 0.0, "learning_rate": 8.839634941329858e-06, "loss": 1.3599, "step": 339 }, { "epoch": 0.013303075358009234, "grad_norm": 0.0, "learning_rate": 8.865710560625815e-06, "loss": 1.2366, "step": 340 }, { "epoch": 0.013342202050238674, "grad_norm": 0.0, "learning_rate": 8.891786179921774e-06, "loss": 1.2457, "step": 341 }, { "epoch": 0.013381328742468111, "grad_norm": 0.0, "learning_rate": 8.917861799217732e-06, "loss": 1.3181, "step": 342 }, { "epoch": 0.01342045543469755, "grad_norm": 0.0, "learning_rate": 8.94393741851369e-06, "loss": 1.3078, "step": 343 }, { "epoch": 0.01345958212692699, "grad_norm": 0.0, "learning_rate": 8.970013037809648e-06, "loss": 1.2429, "step": 344 }, { "epoch": 0.013498708819156428, "grad_norm": 0.0, "learning_rate": 8.996088657105606e-06, "loss": 1.3337, "step": 345 }, { "epoch": 0.013537835511385867, "grad_norm": 0.0, "learning_rate": 9.022164276401565e-06, "loss": 1.2596, "step": 346 }, { "epoch": 0.013576962203615307, "grad_norm": 0.0, "learning_rate": 9.048239895697524e-06, "loss": 1.2844, "step": 347 }, { "epoch": 0.013616088895844746, "grad_norm": 0.0, "learning_rate": 9.074315514993482e-06, "loss": 1.2958, "step": 348 }, { "epoch": 0.013655215588074185, "grad_norm": 0.0, "learning_rate": 9.100391134289441e-06, "loss": 1.361, "step": 349 }, { "epoch": 0.013694342280303622, "grad_norm": 0.0, "learning_rate": 9.126466753585398e-06, "loss": 1.2936, "step": 350 }, { "epoch": 0.013733468972533061, "grad_norm": 0.0, "learning_rate": 9.152542372881356e-06, "loss": 1.4554, "step": 351 }, { "epoch": 0.0137725956647625, "grad_norm": 0.0, "learning_rate": 9.178617992177315e-06, "loss": 1.3511, "step": 352 }, { "epoch": 0.01381172235699194, "grad_norm": 0.0, "learning_rate": 9.204693611473274e-06, "loss": 1.3425, "step": 353 }, { "epoch": 0.013850849049221379, "grad_norm": 0.0, "learning_rate": 9.230769230769232e-06, "loss": 1.2671, "step": 354 }, { "epoch": 0.013889975741450818, "grad_norm": 0.0, "learning_rate": 9.25684485006519e-06, "loss": 1.3187, "step": 355 }, { "epoch": 0.013929102433680257, "grad_norm": 0.0, "learning_rate": 9.282920469361148e-06, "loss": 1.4668, "step": 356 }, { "epoch": 0.013968229125909696, "grad_norm": 0.0, "learning_rate": 9.308996088657106e-06, "loss": 1.1431, "step": 357 }, { "epoch": 0.014007355818139134, "grad_norm": 0.0, "learning_rate": 9.335071707953065e-06, "loss": 1.3146, "step": 358 }, { "epoch": 0.014046482510368573, "grad_norm": 0.0, "learning_rate": 9.361147327249022e-06, "loss": 1.3269, "step": 359 }, { "epoch": 0.014085609202598012, "grad_norm": 0.0, "learning_rate": 9.387222946544982e-06, "loss": 1.3187, "step": 360 }, { "epoch": 0.014124735894827451, "grad_norm": 0.0, "learning_rate": 9.41329856584094e-06, "loss": 1.2446, "step": 361 }, { "epoch": 0.01416386258705689, "grad_norm": 0.0, "learning_rate": 9.439374185136898e-06, "loss": 1.2377, "step": 362 }, { "epoch": 0.01420298927928633, "grad_norm": 0.0, "learning_rate": 9.465449804432857e-06, "loss": 1.3555, "step": 363 }, { "epoch": 0.014242115971515769, "grad_norm": 0.0, "learning_rate": 9.491525423728815e-06, "loss": 1.3267, "step": 364 }, { "epoch": 0.014281242663745208, "grad_norm": 0.0, "learning_rate": 9.517601043024772e-06, "loss": 1.2662, "step": 365 }, { "epoch": 0.014320369355974645, "grad_norm": 0.0, "learning_rate": 9.54367666232073e-06, "loss": 1.4364, "step": 366 }, { "epoch": 0.014359496048204084, "grad_norm": 0.0, "learning_rate": 9.56975228161669e-06, "loss": 1.2874, "step": 367 }, { "epoch": 0.014398622740433523, "grad_norm": 0.0, "learning_rate": 9.595827900912648e-06, "loss": 1.3748, "step": 368 }, { "epoch": 0.014437749432662962, "grad_norm": 0.0, "learning_rate": 9.621903520208605e-06, "loss": 1.376, "step": 369 }, { "epoch": 0.014476876124892402, "grad_norm": 0.0, "learning_rate": 9.647979139504564e-06, "loss": 1.3081, "step": 370 }, { "epoch": 0.01451600281712184, "grad_norm": 0.0, "learning_rate": 9.674054758800522e-06, "loss": 1.3496, "step": 371 }, { "epoch": 0.01455512950935128, "grad_norm": 0.0, "learning_rate": 9.70013037809648e-06, "loss": 1.2365, "step": 372 }, { "epoch": 0.014594256201580719, "grad_norm": 0.0, "learning_rate": 9.726205997392438e-06, "loss": 1.3702, "step": 373 }, { "epoch": 0.014633382893810156, "grad_norm": 0.0, "learning_rate": 9.752281616688396e-06, "loss": 1.2757, "step": 374 }, { "epoch": 0.014672509586039596, "grad_norm": 0.0, "learning_rate": 9.778357235984357e-06, "loss": 1.3745, "step": 375 }, { "epoch": 0.014711636278269035, "grad_norm": 0.0, "learning_rate": 9.804432855280314e-06, "loss": 1.2296, "step": 376 }, { "epoch": 0.014750762970498474, "grad_norm": 0.0, "learning_rate": 9.830508474576272e-06, "loss": 1.2949, "step": 377 }, { "epoch": 0.014789889662727913, "grad_norm": 0.0, "learning_rate": 9.856584093872231e-06, "loss": 1.2956, "step": 378 }, { "epoch": 0.014829016354957352, "grad_norm": 0.0, "learning_rate": 9.88265971316819e-06, "loss": 1.369, "step": 379 }, { "epoch": 0.014868143047186791, "grad_norm": 0.0, "learning_rate": 9.908735332464146e-06, "loss": 1.4247, "step": 380 }, { "epoch": 0.01490726973941623, "grad_norm": 0.0, "learning_rate": 9.934810951760105e-06, "loss": 1.2836, "step": 381 }, { "epoch": 0.01494639643164567, "grad_norm": 0.0, "learning_rate": 9.960886571056064e-06, "loss": 1.1904, "step": 382 }, { "epoch": 0.014985523123875107, "grad_norm": 0.0, "learning_rate": 9.986962190352022e-06, "loss": 1.4243, "step": 383 }, { "epoch": 0.015024649816104546, "grad_norm": 0.0, "learning_rate": 1.0013037809647981e-05, "loss": 1.3124, "step": 384 }, { "epoch": 0.015063776508333985, "grad_norm": 0.0, "learning_rate": 1.0039113428943938e-05, "loss": 1.2766, "step": 385 }, { "epoch": 0.015102903200563424, "grad_norm": 0.0, "learning_rate": 1.0065189048239897e-05, "loss": 1.3047, "step": 386 }, { "epoch": 0.015142029892792864, "grad_norm": 0.0, "learning_rate": 1.0091264667535853e-05, "loss": 1.3464, "step": 387 }, { "epoch": 0.015181156585022303, "grad_norm": 0.0, "learning_rate": 1.0117340286831814e-05, "loss": 1.2581, "step": 388 }, { "epoch": 0.015220283277251742, "grad_norm": 0.0, "learning_rate": 1.014341590612777e-05, "loss": 1.3329, "step": 389 }, { "epoch": 0.015259409969481181, "grad_norm": 0.0, "learning_rate": 1.016949152542373e-05, "loss": 1.266, "step": 390 }, { "epoch": 0.015298536661710618, "grad_norm": 0.0, "learning_rate": 1.0195567144719686e-05, "loss": 1.3263, "step": 391 }, { "epoch": 0.015337663353940057, "grad_norm": 0.0, "learning_rate": 1.0221642764015647e-05, "loss": 1.2424, "step": 392 }, { "epoch": 0.015376790046169497, "grad_norm": 0.0, "learning_rate": 1.0247718383311605e-05, "loss": 1.3334, "step": 393 }, { "epoch": 0.015415916738398936, "grad_norm": 0.0, "learning_rate": 1.0273794002607562e-05, "loss": 1.3246, "step": 394 }, { "epoch": 0.015455043430628375, "grad_norm": 0.0, "learning_rate": 1.0299869621903522e-05, "loss": 1.22, "step": 395 }, { "epoch": 0.015494170122857814, "grad_norm": 0.0, "learning_rate": 1.032594524119948e-05, "loss": 1.3192, "step": 396 }, { "epoch": 0.015533296815087253, "grad_norm": 0.0, "learning_rate": 1.0352020860495438e-05, "loss": 1.1805, "step": 397 }, { "epoch": 0.015572423507316692, "grad_norm": 0.0, "learning_rate": 1.0378096479791395e-05, "loss": 1.3291, "step": 398 }, { "epoch": 0.01561155019954613, "grad_norm": 0.0, "learning_rate": 1.0404172099087355e-05, "loss": 1.3257, "step": 399 }, { "epoch": 0.01565067689177557, "grad_norm": 0.0, "learning_rate": 1.0430247718383312e-05, "loss": 1.1376, "step": 400 }, { "epoch": 0.01568980358400501, "grad_norm": 0.0, "learning_rate": 1.0456323337679271e-05, "loss": 1.3307, "step": 401 }, { "epoch": 0.015728930276234447, "grad_norm": 0.0, "learning_rate": 1.0482398956975228e-05, "loss": 1.2794, "step": 402 }, { "epoch": 0.015768056968463885, "grad_norm": 0.0, "learning_rate": 1.0508474576271188e-05, "loss": 1.197, "step": 403 }, { "epoch": 0.015807183660693325, "grad_norm": 0.0, "learning_rate": 1.0534550195567145e-05, "loss": 1.3406, "step": 404 }, { "epoch": 0.015846310352922763, "grad_norm": 0.0, "learning_rate": 1.0560625814863104e-05, "loss": 1.366, "step": 405 }, { "epoch": 0.015885437045152204, "grad_norm": 0.0, "learning_rate": 1.058670143415906e-05, "loss": 1.387, "step": 406 }, { "epoch": 0.01592456373738164, "grad_norm": 0.0, "learning_rate": 1.0612777053455021e-05, "loss": 1.2883, "step": 407 }, { "epoch": 0.015963690429611082, "grad_norm": 0.0, "learning_rate": 1.063885267275098e-05, "loss": 1.3931, "step": 408 }, { "epoch": 0.01600281712184052, "grad_norm": 0.0, "learning_rate": 1.0664928292046937e-05, "loss": 1.3173, "step": 409 }, { "epoch": 0.01604194381406996, "grad_norm": 0.0, "learning_rate": 1.0691003911342893e-05, "loss": 1.3502, "step": 410 }, { "epoch": 0.016081070506299398, "grad_norm": 0.0, "learning_rate": 1.0717079530638854e-05, "loss": 1.3649, "step": 411 }, { "epoch": 0.016120197198528835, "grad_norm": 0.0, "learning_rate": 1.0743155149934812e-05, "loss": 1.385, "step": 412 }, { "epoch": 0.016159323890758276, "grad_norm": 0.0, "learning_rate": 1.076923076923077e-05, "loss": 1.3809, "step": 413 }, { "epoch": 0.016198450582987713, "grad_norm": 0.0, "learning_rate": 1.0795306388526728e-05, "loss": 1.3506, "step": 414 }, { "epoch": 0.016237577275217154, "grad_norm": 0.0, "learning_rate": 1.0821382007822687e-05, "loss": 1.3571, "step": 415 }, { "epoch": 0.01627670396744659, "grad_norm": 0.0, "learning_rate": 1.0847457627118645e-05, "loss": 1.3202, "step": 416 }, { "epoch": 0.016315830659676032, "grad_norm": 0.0, "learning_rate": 1.0873533246414602e-05, "loss": 1.3489, "step": 417 }, { "epoch": 0.01635495735190547, "grad_norm": 0.0, "learning_rate": 1.0899608865710562e-05, "loss": 1.2774, "step": 418 }, { "epoch": 0.016394084044134907, "grad_norm": 0.0, "learning_rate": 1.092568448500652e-05, "loss": 1.1565, "step": 419 }, { "epoch": 0.016433210736364348, "grad_norm": 0.0, "learning_rate": 1.0951760104302478e-05, "loss": 1.3238, "step": 420 }, { "epoch": 0.016472337428593786, "grad_norm": 0.0, "learning_rate": 1.0977835723598435e-05, "loss": 1.4148, "step": 421 }, { "epoch": 0.016511464120823226, "grad_norm": 0.0, "learning_rate": 1.1003911342894395e-05, "loss": 1.2267, "step": 422 }, { "epoch": 0.016550590813052664, "grad_norm": 0.0, "learning_rate": 1.1029986962190354e-05, "loss": 1.343, "step": 423 }, { "epoch": 0.016589717505282105, "grad_norm": 0.0, "learning_rate": 1.1056062581486311e-05, "loss": 1.3377, "step": 424 }, { "epoch": 0.016628844197511542, "grad_norm": 0.0, "learning_rate": 1.108213820078227e-05, "loss": 1.2331, "step": 425 }, { "epoch": 0.016667970889740983, "grad_norm": 0.0, "learning_rate": 1.1108213820078228e-05, "loss": 1.2964, "step": 426 }, { "epoch": 0.01670709758197042, "grad_norm": 0.0, "learning_rate": 1.1134289439374187e-05, "loss": 1.2442, "step": 427 }, { "epoch": 0.016746224274199858, "grad_norm": 0.0, "learning_rate": 1.1160365058670144e-05, "loss": 1.3762, "step": 428 }, { "epoch": 0.0167853509664293, "grad_norm": 0.0, "learning_rate": 1.1186440677966102e-05, "loss": 1.3043, "step": 429 }, { "epoch": 0.016824477658658736, "grad_norm": 0.0, "learning_rate": 1.1212516297262061e-05, "loss": 1.3174, "step": 430 }, { "epoch": 0.016863604350888177, "grad_norm": 0.0, "learning_rate": 1.123859191655802e-05, "loss": 1.2753, "step": 431 }, { "epoch": 0.016902731043117614, "grad_norm": 0.0, "learning_rate": 1.1264667535853976e-05, "loss": 1.3466, "step": 432 }, { "epoch": 0.016941857735347055, "grad_norm": 0.0, "learning_rate": 1.1290743155149935e-05, "loss": 1.3131, "step": 433 }, { "epoch": 0.016980984427576493, "grad_norm": 0.0, "learning_rate": 1.1316818774445895e-05, "loss": 1.3953, "step": 434 }, { "epoch": 0.01702011111980593, "grad_norm": 0.0, "learning_rate": 1.1342894393741852e-05, "loss": 1.2693, "step": 435 }, { "epoch": 0.01705923781203537, "grad_norm": 0.0, "learning_rate": 1.136897001303781e-05, "loss": 1.3479, "step": 436 }, { "epoch": 0.01709836450426481, "grad_norm": 0.0, "learning_rate": 1.139504563233377e-05, "loss": 1.2905, "step": 437 }, { "epoch": 0.01713749119649425, "grad_norm": 0.0, "learning_rate": 1.1421121251629728e-05, "loss": 1.3465, "step": 438 }, { "epoch": 0.017176617888723687, "grad_norm": 0.0, "learning_rate": 1.1447196870925685e-05, "loss": 1.3466, "step": 439 }, { "epoch": 0.017215744580953127, "grad_norm": 0.0, "learning_rate": 1.1473272490221644e-05, "loss": 1.1919, "step": 440 }, { "epoch": 0.017254871273182565, "grad_norm": 0.0, "learning_rate": 1.1499348109517602e-05, "loss": 1.3463, "step": 441 }, { "epoch": 0.017293997965412006, "grad_norm": 0.0, "learning_rate": 1.1525423728813561e-05, "loss": 1.3576, "step": 442 }, { "epoch": 0.017333124657641443, "grad_norm": 0.0, "learning_rate": 1.1551499348109518e-05, "loss": 1.3021, "step": 443 }, { "epoch": 0.01737225134987088, "grad_norm": 0.0, "learning_rate": 1.1577574967405477e-05, "loss": 1.2849, "step": 444 }, { "epoch": 0.01741137804210032, "grad_norm": 0.0, "learning_rate": 1.1603650586701435e-05, "loss": 1.3307, "step": 445 }, { "epoch": 0.01745050473432976, "grad_norm": 0.0, "learning_rate": 1.1629726205997394e-05, "loss": 1.2638, "step": 446 }, { "epoch": 0.0174896314265592, "grad_norm": 0.0, "learning_rate": 1.165580182529335e-05, "loss": 1.3296, "step": 447 }, { "epoch": 0.017528758118788637, "grad_norm": 0.0, "learning_rate": 1.168187744458931e-05, "loss": 1.2498, "step": 448 }, { "epoch": 0.017567884811018078, "grad_norm": 0.0, "learning_rate": 1.170795306388527e-05, "loss": 1.4353, "step": 449 }, { "epoch": 0.017607011503247515, "grad_norm": 0.0, "learning_rate": 1.1734028683181227e-05, "loss": 1.2798, "step": 450 }, { "epoch": 0.017646138195476953, "grad_norm": 0.0, "learning_rate": 1.1760104302477184e-05, "loss": 1.3431, "step": 451 }, { "epoch": 0.017685264887706394, "grad_norm": 0.0, "learning_rate": 1.1786179921773142e-05, "loss": 1.1878, "step": 452 }, { "epoch": 0.01772439157993583, "grad_norm": 0.0, "learning_rate": 1.1812255541069103e-05, "loss": 1.3752, "step": 453 }, { "epoch": 0.017763518272165272, "grad_norm": 0.0, "learning_rate": 1.183833116036506e-05, "loss": 1.3649, "step": 454 }, { "epoch": 0.01780264496439471, "grad_norm": 0.0, "learning_rate": 1.1864406779661018e-05, "loss": 1.2357, "step": 455 }, { "epoch": 0.01784177165662415, "grad_norm": 0.0, "learning_rate": 1.1890482398956975e-05, "loss": 1.295, "step": 456 }, { "epoch": 0.017880898348853588, "grad_norm": 0.0, "learning_rate": 1.1916558018252935e-05, "loss": 1.2719, "step": 457 }, { "epoch": 0.01792002504108303, "grad_norm": 0.0, "learning_rate": 1.1942633637548892e-05, "loss": 1.2902, "step": 458 }, { "epoch": 0.017959151733312466, "grad_norm": 0.0, "learning_rate": 1.1968709256844851e-05, "loss": 1.2144, "step": 459 }, { "epoch": 0.017998278425541903, "grad_norm": 0.0, "learning_rate": 1.199478487614081e-05, "loss": 1.2572, "step": 460 }, { "epoch": 0.018037405117771344, "grad_norm": 0.0, "learning_rate": 1.2020860495436768e-05, "loss": 1.2002, "step": 461 }, { "epoch": 0.01807653181000078, "grad_norm": 0.0, "learning_rate": 1.2046936114732725e-05, "loss": 1.3511, "step": 462 }, { "epoch": 0.018115658502230222, "grad_norm": 0.0, "learning_rate": 1.2073011734028684e-05, "loss": 1.3073, "step": 463 }, { "epoch": 0.01815478519445966, "grad_norm": 0.0, "learning_rate": 1.2099087353324644e-05, "loss": 1.254, "step": 464 }, { "epoch": 0.0181939118866891, "grad_norm": 0.0, "learning_rate": 1.2125162972620601e-05, "loss": 1.2787, "step": 465 }, { "epoch": 0.018233038578918538, "grad_norm": 0.0, "learning_rate": 1.2151238591916558e-05, "loss": 1.2566, "step": 466 }, { "epoch": 0.018272165271147976, "grad_norm": 0.0, "learning_rate": 1.2177314211212517e-05, "loss": 1.3323, "step": 467 }, { "epoch": 0.018311291963377416, "grad_norm": 0.0, "learning_rate": 1.2203389830508477e-05, "loss": 1.0441, "step": 468 }, { "epoch": 0.018350418655606854, "grad_norm": 0.0, "learning_rate": 1.2229465449804434e-05, "loss": 1.2524, "step": 469 }, { "epoch": 0.018389545347836295, "grad_norm": 0.0, "learning_rate": 1.2255541069100392e-05, "loss": 1.3871, "step": 470 }, { "epoch": 0.018428672040065732, "grad_norm": 0.0, "learning_rate": 1.228161668839635e-05, "loss": 1.3555, "step": 471 }, { "epoch": 0.018467798732295173, "grad_norm": 0.0, "learning_rate": 1.230769230769231e-05, "loss": 1.401, "step": 472 }, { "epoch": 0.01850692542452461, "grad_norm": 0.0, "learning_rate": 1.2333767926988267e-05, "loss": 1.3975, "step": 473 }, { "epoch": 0.01854605211675405, "grad_norm": 0.0, "learning_rate": 1.2359843546284225e-05, "loss": 1.3734, "step": 474 }, { "epoch": 0.01858517880898349, "grad_norm": 0.0, "learning_rate": 1.2385919165580182e-05, "loss": 1.3063, "step": 475 }, { "epoch": 0.018624305501212926, "grad_norm": 0.0, "learning_rate": 1.2411994784876143e-05, "loss": 1.2352, "step": 476 }, { "epoch": 0.018663432193442367, "grad_norm": 0.0, "learning_rate": 1.24380704041721e-05, "loss": 1.3312, "step": 477 }, { "epoch": 0.018702558885671804, "grad_norm": 0.0, "learning_rate": 1.2464146023468058e-05, "loss": 1.2395, "step": 478 }, { "epoch": 0.018741685577901245, "grad_norm": 0.0, "learning_rate": 1.2490221642764018e-05, "loss": 1.3203, "step": 479 }, { "epoch": 0.018780812270130683, "grad_norm": 0.0, "learning_rate": 1.2516297262059975e-05, "loss": 1.2645, "step": 480 }, { "epoch": 0.018819938962360123, "grad_norm": 0.0, "learning_rate": 1.2542372881355932e-05, "loss": 1.2374, "step": 481 }, { "epoch": 0.01885906565458956, "grad_norm": 0.0, "learning_rate": 1.2568448500651891e-05, "loss": 1.3541, "step": 482 }, { "epoch": 0.018898192346819, "grad_norm": 0.0, "learning_rate": 1.2594524119947851e-05, "loss": 1.2851, "step": 483 }, { "epoch": 0.01893731903904844, "grad_norm": 0.0, "learning_rate": 1.2620599739243808e-05, "loss": 1.3577, "step": 484 }, { "epoch": 0.018976445731277877, "grad_norm": 0.0, "learning_rate": 1.2646675358539767e-05, "loss": 1.2777, "step": 485 }, { "epoch": 0.019015572423507317, "grad_norm": 0.0, "learning_rate": 1.2672750977835724e-05, "loss": 1.3284, "step": 486 }, { "epoch": 0.019054699115736755, "grad_norm": 0.0, "learning_rate": 1.2698826597131684e-05, "loss": 1.2481, "step": 487 }, { "epoch": 0.019093825807966196, "grad_norm": 0.0, "learning_rate": 1.2724902216427641e-05, "loss": 1.2972, "step": 488 }, { "epoch": 0.019132952500195633, "grad_norm": 0.0, "learning_rate": 1.27509778357236e-05, "loss": 1.2696, "step": 489 }, { "epoch": 0.019172079192425074, "grad_norm": 0.0, "learning_rate": 1.2777053455019557e-05, "loss": 1.3395, "step": 490 }, { "epoch": 0.01921120588465451, "grad_norm": 0.0, "learning_rate": 1.2803129074315517e-05, "loss": 1.3043, "step": 491 }, { "epoch": 0.01925033257688395, "grad_norm": 0.0, "learning_rate": 1.2829204693611474e-05, "loss": 1.3562, "step": 492 }, { "epoch": 0.01928945926911339, "grad_norm": 0.0, "learning_rate": 1.2855280312907432e-05, "loss": 1.2086, "step": 493 }, { "epoch": 0.019328585961342827, "grad_norm": 0.0, "learning_rate": 1.288135593220339e-05, "loss": 1.3158, "step": 494 }, { "epoch": 0.019367712653572268, "grad_norm": 0.0, "learning_rate": 1.290743155149935e-05, "loss": 1.1956, "step": 495 }, { "epoch": 0.019406839345801705, "grad_norm": 0.0, "learning_rate": 1.2933507170795307e-05, "loss": 1.2387, "step": 496 }, { "epoch": 0.019445966038031146, "grad_norm": 0.0, "learning_rate": 1.2959582790091265e-05, "loss": 1.2819, "step": 497 }, { "epoch": 0.019485092730260584, "grad_norm": 0.0, "learning_rate": 1.2985658409387226e-05, "loss": 1.358, "step": 498 }, { "epoch": 0.01952421942249002, "grad_norm": 0.0, "learning_rate": 1.3011734028683183e-05, "loss": 1.2592, "step": 499 }, { "epoch": 0.019563346114719462, "grad_norm": 0.0, "learning_rate": 1.3037809647979141e-05, "loss": 1.2681, "step": 500 }, { "epoch": 0.0196024728069489, "grad_norm": 0.0, "learning_rate": 1.3063885267275098e-05, "loss": 1.2669, "step": 501 }, { "epoch": 0.01964159949917834, "grad_norm": 0.0, "learning_rate": 1.3089960886571058e-05, "loss": 1.397, "step": 502 }, { "epoch": 0.019680726191407778, "grad_norm": 0.0, "learning_rate": 1.3116036505867015e-05, "loss": 1.1334, "step": 503 }, { "epoch": 0.01971985288363722, "grad_norm": 0.0, "learning_rate": 1.3142112125162974e-05, "loss": 1.3112, "step": 504 }, { "epoch": 0.019758979575866656, "grad_norm": 0.0, "learning_rate": 1.3168187744458931e-05, "loss": 1.4437, "step": 505 }, { "epoch": 0.019798106268096097, "grad_norm": 0.0, "learning_rate": 1.3194263363754891e-05, "loss": 1.3493, "step": 506 }, { "epoch": 0.019837232960325534, "grad_norm": 0.0, "learning_rate": 1.3220338983050848e-05, "loss": 1.394, "step": 507 }, { "epoch": 0.01987635965255497, "grad_norm": 0.0, "learning_rate": 1.3246414602346807e-05, "loss": 1.316, "step": 508 }, { "epoch": 0.019915486344784412, "grad_norm": 0.0, "learning_rate": 1.3272490221642764e-05, "loss": 1.4286, "step": 509 }, { "epoch": 0.01995461303701385, "grad_norm": 0.0, "learning_rate": 1.3298565840938724e-05, "loss": 1.3679, "step": 510 }, { "epoch": 0.01999373972924329, "grad_norm": 0.0, "learning_rate": 1.3324641460234683e-05, "loss": 1.3904, "step": 511 }, { "epoch": 0.020032866421472728, "grad_norm": 0.0, "learning_rate": 1.335071707953064e-05, "loss": 1.4126, "step": 512 }, { "epoch": 0.02007199311370217, "grad_norm": 0.0, "learning_rate": 1.3376792698826597e-05, "loss": 1.2758, "step": 513 }, { "epoch": 0.020111119805931606, "grad_norm": 0.0, "learning_rate": 1.3402868318122557e-05, "loss": 1.3433, "step": 514 }, { "epoch": 0.020150246498161044, "grad_norm": 0.0, "learning_rate": 1.3428943937418515e-05, "loss": 1.2764, "step": 515 }, { "epoch": 0.020189373190390485, "grad_norm": 0.0, "learning_rate": 1.3455019556714472e-05, "loss": 1.4464, "step": 516 }, { "epoch": 0.020228499882619922, "grad_norm": 0.0, "learning_rate": 1.3481095176010431e-05, "loss": 1.3225, "step": 517 }, { "epoch": 0.020267626574849363, "grad_norm": 0.0, "learning_rate": 1.350717079530639e-05, "loss": 1.2516, "step": 518 }, { "epoch": 0.0203067532670788, "grad_norm": 0.0, "learning_rate": 1.3533246414602348e-05, "loss": 1.2319, "step": 519 }, { "epoch": 0.02034587995930824, "grad_norm": 0.0, "learning_rate": 1.3559322033898305e-05, "loss": 1.3469, "step": 520 }, { "epoch": 0.02038500665153768, "grad_norm": 0.0, "learning_rate": 1.3585397653194266e-05, "loss": 1.3397, "step": 521 }, { "epoch": 0.02042413334376712, "grad_norm": 0.0, "learning_rate": 1.3611473272490223e-05, "loss": 1.2921, "step": 522 }, { "epoch": 0.020463260035996557, "grad_norm": 0.0, "learning_rate": 1.3637548891786181e-05, "loss": 1.2772, "step": 523 }, { "epoch": 0.020502386728225994, "grad_norm": 0.0, "learning_rate": 1.3663624511082138e-05, "loss": 1.2426, "step": 524 }, { "epoch": 0.020541513420455435, "grad_norm": 0.0, "learning_rate": 1.3689700130378098e-05, "loss": 1.255, "step": 525 }, { "epoch": 0.020580640112684873, "grad_norm": 0.0, "learning_rate": 1.3715775749674057e-05, "loss": 1.2694, "step": 526 }, { "epoch": 0.020619766804914313, "grad_norm": 0.0, "learning_rate": 1.3741851368970014e-05, "loss": 1.2217, "step": 527 }, { "epoch": 0.02065889349714375, "grad_norm": 0.0, "learning_rate": 1.3767926988265971e-05, "loss": 1.3327, "step": 528 }, { "epoch": 0.020698020189373192, "grad_norm": 0.0, "learning_rate": 1.3794002607561931e-05, "loss": 1.283, "step": 529 }, { "epoch": 0.02073714688160263, "grad_norm": 0.0, "learning_rate": 1.382007822685789e-05, "loss": 1.2919, "step": 530 }, { "epoch": 0.020776273573832067, "grad_norm": 0.0, "learning_rate": 1.3846153846153847e-05, "loss": 1.2773, "step": 531 }, { "epoch": 0.020815400266061507, "grad_norm": 0.0, "learning_rate": 1.3872229465449805e-05, "loss": 1.3373, "step": 532 }, { "epoch": 0.020854526958290945, "grad_norm": 0.0, "learning_rate": 1.3898305084745764e-05, "loss": 1.3696, "step": 533 }, { "epoch": 0.020893653650520386, "grad_norm": 0.0, "learning_rate": 1.3924380704041723e-05, "loss": 1.267, "step": 534 }, { "epoch": 0.020932780342749823, "grad_norm": 0.0, "learning_rate": 1.395045632333768e-05, "loss": 1.3962, "step": 535 }, { "epoch": 0.020971907034979264, "grad_norm": 0.0, "learning_rate": 1.3976531942633638e-05, "loss": 1.2551, "step": 536 }, { "epoch": 0.0210110337272087, "grad_norm": 0.0, "learning_rate": 1.4002607561929597e-05, "loss": 1.4036, "step": 537 }, { "epoch": 0.021050160419438142, "grad_norm": 0.0, "learning_rate": 1.4028683181225555e-05, "loss": 1.4045, "step": 538 }, { "epoch": 0.02108928711166758, "grad_norm": 0.0, "learning_rate": 1.4054758800521512e-05, "loss": 1.2567, "step": 539 }, { "epoch": 0.021128413803897017, "grad_norm": 0.0, "learning_rate": 1.4080834419817473e-05, "loss": 1.3707, "step": 540 }, { "epoch": 0.021167540496126458, "grad_norm": 0.0, "learning_rate": 1.4106910039113431e-05, "loss": 1.2898, "step": 541 }, { "epoch": 0.021206667188355895, "grad_norm": 0.0, "learning_rate": 1.4132985658409388e-05, "loss": 1.3692, "step": 542 }, { "epoch": 0.021245793880585336, "grad_norm": 0.0, "learning_rate": 1.4159061277705345e-05, "loss": 1.3638, "step": 543 }, { "epoch": 0.021284920572814774, "grad_norm": 0.0, "learning_rate": 1.4185136897001306e-05, "loss": 1.2377, "step": 544 }, { "epoch": 0.021324047265044214, "grad_norm": 0.0, "learning_rate": 1.4211212516297264e-05, "loss": 1.3616, "step": 545 }, { "epoch": 0.021363173957273652, "grad_norm": 0.0, "learning_rate": 1.4237288135593221e-05, "loss": 1.4467, "step": 546 }, { "epoch": 0.02140230064950309, "grad_norm": 0.0, "learning_rate": 1.426336375488918e-05, "loss": 1.3633, "step": 547 }, { "epoch": 0.02144142734173253, "grad_norm": 0.0, "learning_rate": 1.4289439374185138e-05, "loss": 1.2937, "step": 548 }, { "epoch": 0.021480554033961968, "grad_norm": 0.0, "learning_rate": 1.4315514993481097e-05, "loss": 1.3168, "step": 549 }, { "epoch": 0.02151968072619141, "grad_norm": 0.0, "learning_rate": 1.4341590612777054e-05, "loss": 1.2364, "step": 550 }, { "epoch": 0.021558807418420846, "grad_norm": 0.0, "learning_rate": 1.4367666232073013e-05, "loss": 1.2286, "step": 551 }, { "epoch": 0.021597934110650287, "grad_norm": 0.0, "learning_rate": 1.4393741851368971e-05, "loss": 1.3817, "step": 552 }, { "epoch": 0.021637060802879724, "grad_norm": 0.0, "learning_rate": 1.441981747066493e-05, "loss": 1.3618, "step": 553 }, { "epoch": 0.021676187495109165, "grad_norm": 0.0, "learning_rate": 1.4445893089960887e-05, "loss": 1.1876, "step": 554 }, { "epoch": 0.021715314187338602, "grad_norm": 0.0, "learning_rate": 1.4471968709256845e-05, "loss": 1.3948, "step": 555 }, { "epoch": 0.02175444087956804, "grad_norm": 0.0, "learning_rate": 1.4498044328552806e-05, "loss": 1.1573, "step": 556 }, { "epoch": 0.02179356757179748, "grad_norm": 0.0, "learning_rate": 1.4524119947848763e-05, "loss": 1.1481, "step": 557 }, { "epoch": 0.021832694264026918, "grad_norm": 0.0, "learning_rate": 1.455019556714472e-05, "loss": 1.2844, "step": 558 }, { "epoch": 0.02187182095625636, "grad_norm": 0.0, "learning_rate": 1.4576271186440678e-05, "loss": 1.3307, "step": 559 }, { "epoch": 0.021910947648485796, "grad_norm": 0.0, "learning_rate": 1.4602346805736639e-05, "loss": 1.392, "step": 560 }, { "epoch": 0.021950074340715237, "grad_norm": 0.0, "learning_rate": 1.4628422425032595e-05, "loss": 1.2487, "step": 561 }, { "epoch": 0.021989201032944675, "grad_norm": 0.0, "learning_rate": 1.4654498044328554e-05, "loss": 1.3196, "step": 562 }, { "epoch": 0.022028327725174112, "grad_norm": 0.0, "learning_rate": 1.4680573663624513e-05, "loss": 1.1874, "step": 563 }, { "epoch": 0.022067454417403553, "grad_norm": 0.0, "learning_rate": 1.4706649282920471e-05, "loss": 1.2336, "step": 564 }, { "epoch": 0.02210658110963299, "grad_norm": 0.0, "learning_rate": 1.4732724902216428e-05, "loss": 1.2797, "step": 565 }, { "epoch": 0.02214570780186243, "grad_norm": 0.0, "learning_rate": 1.4758800521512387e-05, "loss": 1.2125, "step": 566 }, { "epoch": 0.02218483449409187, "grad_norm": 0.0, "learning_rate": 1.4784876140808346e-05, "loss": 1.3219, "step": 567 }, { "epoch": 0.02222396118632131, "grad_norm": 0.0, "learning_rate": 1.4810951760104304e-05, "loss": 1.2401, "step": 568 }, { "epoch": 0.022263087878550747, "grad_norm": 0.0, "learning_rate": 1.4837027379400261e-05, "loss": 1.3568, "step": 569 }, { "epoch": 0.022302214570780188, "grad_norm": 0.0, "learning_rate": 1.486310299869622e-05, "loss": 1.3213, "step": 570 }, { "epoch": 0.022341341263009625, "grad_norm": 0.0, "learning_rate": 1.488917861799218e-05, "loss": 1.1525, "step": 571 }, { "epoch": 0.022380467955239063, "grad_norm": 0.0, "learning_rate": 1.4915254237288137e-05, "loss": 1.3313, "step": 572 }, { "epoch": 0.022419594647468503, "grad_norm": 0.0, "learning_rate": 1.4941329856584096e-05, "loss": 1.3093, "step": 573 }, { "epoch": 0.02245872133969794, "grad_norm": 0.0, "learning_rate": 1.4967405475880053e-05, "loss": 1.2823, "step": 574 }, { "epoch": 0.02249784803192738, "grad_norm": 0.0, "learning_rate": 1.4993481095176013e-05, "loss": 1.3871, "step": 575 }, { "epoch": 0.02253697472415682, "grad_norm": 0.0, "learning_rate": 1.501955671447197e-05, "loss": 1.316, "step": 576 }, { "epoch": 0.02257610141638626, "grad_norm": 0.0, "learning_rate": 1.5045632333767928e-05, "loss": 1.2987, "step": 577 }, { "epoch": 0.022615228108615697, "grad_norm": 0.0, "learning_rate": 1.5071707953063885e-05, "loss": 1.2885, "step": 578 }, { "epoch": 0.02265435480084514, "grad_norm": 0.0, "learning_rate": 1.5097783572359846e-05, "loss": 1.3373, "step": 579 }, { "epoch": 0.022693481493074576, "grad_norm": 0.0, "learning_rate": 1.5123859191655803e-05, "loss": 1.3535, "step": 580 }, { "epoch": 0.022732608185304013, "grad_norm": 0.0, "learning_rate": 1.5149934810951761e-05, "loss": 1.2526, "step": 581 }, { "epoch": 0.022771734877533454, "grad_norm": 0.0, "learning_rate": 1.517601043024772e-05, "loss": 1.3046, "step": 582 }, { "epoch": 0.02281086156976289, "grad_norm": 0.0, "learning_rate": 1.5202086049543678e-05, "loss": 1.2877, "step": 583 }, { "epoch": 0.022849988261992332, "grad_norm": 0.0, "learning_rate": 1.5228161668839635e-05, "loss": 1.2001, "step": 584 }, { "epoch": 0.02288911495422177, "grad_norm": 0.0, "learning_rate": 1.5254237288135594e-05, "loss": 1.3007, "step": 585 }, { "epoch": 0.02292824164645121, "grad_norm": 0.0, "learning_rate": 1.5280312907431553e-05, "loss": 1.2885, "step": 586 }, { "epoch": 0.022967368338680648, "grad_norm": 0.0, "learning_rate": 1.5306388526727513e-05, "loss": 1.2919, "step": 587 }, { "epoch": 0.023006495030910085, "grad_norm": 0.0, "learning_rate": 1.533246414602347e-05, "loss": 1.3212, "step": 588 }, { "epoch": 0.023045621723139526, "grad_norm": 0.0, "learning_rate": 1.5358539765319427e-05, "loss": 1.2966, "step": 589 }, { "epoch": 0.023084748415368964, "grad_norm": 0.0, "learning_rate": 1.5384615384615387e-05, "loss": 1.3199, "step": 590 }, { "epoch": 0.023123875107598404, "grad_norm": 0.0, "learning_rate": 1.5410691003911344e-05, "loss": 1.3271, "step": 591 }, { "epoch": 0.023163001799827842, "grad_norm": 0.0, "learning_rate": 1.54367666232073e-05, "loss": 1.2537, "step": 592 }, { "epoch": 0.023202128492057283, "grad_norm": 0.0, "learning_rate": 1.546284224250326e-05, "loss": 1.2363, "step": 593 }, { "epoch": 0.02324125518428672, "grad_norm": 0.0, "learning_rate": 1.5488917861799218e-05, "loss": 1.3585, "step": 594 }, { "epoch": 0.02328038187651616, "grad_norm": 0.0, "learning_rate": 1.551499348109518e-05, "loss": 1.356, "step": 595 }, { "epoch": 0.0233195085687456, "grad_norm": 0.0, "learning_rate": 1.5541069100391136e-05, "loss": 1.3203, "step": 596 }, { "epoch": 0.023358635260975036, "grad_norm": 0.0, "learning_rate": 1.5567144719687092e-05, "loss": 1.3473, "step": 597 }, { "epoch": 0.023397761953204477, "grad_norm": 0.0, "learning_rate": 1.5593220338983053e-05, "loss": 1.2452, "step": 598 }, { "epoch": 0.023436888645433914, "grad_norm": 0.0, "learning_rate": 1.561929595827901e-05, "loss": 1.2676, "step": 599 }, { "epoch": 0.023476015337663355, "grad_norm": 0.0, "learning_rate": 1.5645371577574967e-05, "loss": 1.2343, "step": 600 }, { "epoch": 0.023515142029892792, "grad_norm": 0.0, "learning_rate": 1.5671447196870927e-05, "loss": 1.2606, "step": 601 }, { "epoch": 0.023554268722122233, "grad_norm": 0.0, "learning_rate": 1.5697522816166887e-05, "loss": 1.3198, "step": 602 }, { "epoch": 0.02359339541435167, "grad_norm": 0.0, "learning_rate": 1.5723598435462844e-05, "loss": 1.2146, "step": 603 }, { "epoch": 0.023632522106581108, "grad_norm": 0.0, "learning_rate": 1.57496740547588e-05, "loss": 1.302, "step": 604 }, { "epoch": 0.02367164879881055, "grad_norm": 0.0, "learning_rate": 1.577574967405476e-05, "loss": 1.436, "step": 605 }, { "epoch": 0.023710775491039986, "grad_norm": 0.0, "learning_rate": 1.580182529335072e-05, "loss": 1.2823, "step": 606 }, { "epoch": 0.023749902183269427, "grad_norm": 0.0, "learning_rate": 1.5827900912646675e-05, "loss": 1.3143, "step": 607 }, { "epoch": 0.023789028875498865, "grad_norm": 0.0, "learning_rate": 1.5853976531942636e-05, "loss": 1.412, "step": 608 }, { "epoch": 0.023828155567728306, "grad_norm": 0.0, "learning_rate": 1.5880052151238593e-05, "loss": 1.3329, "step": 609 }, { "epoch": 0.023867282259957743, "grad_norm": 0.0, "learning_rate": 1.5906127770534553e-05, "loss": 1.2751, "step": 610 }, { "epoch": 0.023906408952187184, "grad_norm": 0.0, "learning_rate": 1.593220338983051e-05, "loss": 1.2751, "step": 611 }, { "epoch": 0.02394553564441662, "grad_norm": 0.0, "learning_rate": 1.5958279009126467e-05, "loss": 1.2504, "step": 612 }, { "epoch": 0.02398466233664606, "grad_norm": 0.0, "learning_rate": 1.5984354628422427e-05, "loss": 1.3025, "step": 613 }, { "epoch": 0.0240237890288755, "grad_norm": 0.0, "learning_rate": 1.6010430247718384e-05, "loss": 1.2187, "step": 614 }, { "epoch": 0.024062915721104937, "grad_norm": 0.0, "learning_rate": 1.603650586701434e-05, "loss": 1.2695, "step": 615 }, { "epoch": 0.024102042413334378, "grad_norm": 0.0, "learning_rate": 1.60625814863103e-05, "loss": 1.306, "step": 616 }, { "epoch": 0.024141169105563815, "grad_norm": 0.0, "learning_rate": 1.608865710560626e-05, "loss": 1.1747, "step": 617 }, { "epoch": 0.024180295797793256, "grad_norm": 0.0, "learning_rate": 1.611473272490222e-05, "loss": 1.327, "step": 618 }, { "epoch": 0.024219422490022693, "grad_norm": 0.0, "learning_rate": 1.6140808344198176e-05, "loss": 1.3613, "step": 619 }, { "epoch": 0.02425854918225213, "grad_norm": 0.0, "learning_rate": 1.6166883963494132e-05, "loss": 1.3497, "step": 620 }, { "epoch": 0.02429767587448157, "grad_norm": 0.0, "learning_rate": 1.6192959582790093e-05, "loss": 1.4113, "step": 621 }, { "epoch": 0.02433680256671101, "grad_norm": 0.0, "learning_rate": 1.621903520208605e-05, "loss": 1.1857, "step": 622 }, { "epoch": 0.02437592925894045, "grad_norm": 0.0, "learning_rate": 1.624511082138201e-05, "loss": 1.302, "step": 623 }, { "epoch": 0.024415055951169887, "grad_norm": 0.0, "learning_rate": 1.6271186440677967e-05, "loss": 1.2444, "step": 624 }, { "epoch": 0.024454182643399328, "grad_norm": 0.0, "learning_rate": 1.6297262059973927e-05, "loss": 1.3859, "step": 625 }, { "epoch": 0.024493309335628766, "grad_norm": 0.0, "learning_rate": 1.6323337679269884e-05, "loss": 1.3608, "step": 626 }, { "epoch": 0.024532436027858207, "grad_norm": 0.0, "learning_rate": 1.634941329856584e-05, "loss": 1.3057, "step": 627 }, { "epoch": 0.024571562720087644, "grad_norm": 0.0, "learning_rate": 1.63754889178618e-05, "loss": 1.3798, "step": 628 }, { "epoch": 0.02461068941231708, "grad_norm": 0.0, "learning_rate": 1.640156453715776e-05, "loss": 1.3898, "step": 629 }, { "epoch": 0.024649816104546522, "grad_norm": 0.0, "learning_rate": 1.6427640156453715e-05, "loss": 1.2563, "step": 630 }, { "epoch": 0.02468894279677596, "grad_norm": 0.0, "learning_rate": 1.6453715775749676e-05, "loss": 1.3121, "step": 631 }, { "epoch": 0.0247280694890054, "grad_norm": 0.0, "learning_rate": 1.6479791395045636e-05, "loss": 1.2454, "step": 632 }, { "epoch": 0.024767196181234838, "grad_norm": 0.0, "learning_rate": 1.6505867014341593e-05, "loss": 1.4177, "step": 633 }, { "epoch": 0.02480632287346428, "grad_norm": 0.0, "learning_rate": 1.653194263363755e-05, "loss": 1.3256, "step": 634 }, { "epoch": 0.024845449565693716, "grad_norm": 0.0, "learning_rate": 1.6558018252933507e-05, "loss": 1.3937, "step": 635 }, { "epoch": 0.024884576257923154, "grad_norm": 0.0, "learning_rate": 1.6584093872229467e-05, "loss": 1.3501, "step": 636 }, { "epoch": 0.024923702950152594, "grad_norm": 0.0, "learning_rate": 1.6610169491525424e-05, "loss": 1.2028, "step": 637 }, { "epoch": 0.024962829642382032, "grad_norm": 0.0, "learning_rate": 1.6636245110821384e-05, "loss": 1.3718, "step": 638 }, { "epoch": 0.025001956334611473, "grad_norm": 0.0, "learning_rate": 1.666232073011734e-05, "loss": 1.2925, "step": 639 }, { "epoch": 0.02504108302684091, "grad_norm": 0.0, "learning_rate": 1.66883963494133e-05, "loss": 1.3925, "step": 640 }, { "epoch": 0.02508020971907035, "grad_norm": 0.0, "learning_rate": 1.671447196870926e-05, "loss": 1.3677, "step": 641 }, { "epoch": 0.02511933641129979, "grad_norm": 0.0, "learning_rate": 1.6740547588005215e-05, "loss": 1.2722, "step": 642 }, { "epoch": 0.02515846310352923, "grad_norm": 0.0, "learning_rate": 1.6766623207301176e-05, "loss": 1.2402, "step": 643 }, { "epoch": 0.025197589795758667, "grad_norm": 0.0, "learning_rate": 1.6792698826597133e-05, "loss": 1.2477, "step": 644 }, { "epoch": 0.025236716487988104, "grad_norm": 0.0, "learning_rate": 1.681877444589309e-05, "loss": 1.1802, "step": 645 }, { "epoch": 0.025275843180217545, "grad_norm": 0.0, "learning_rate": 1.684485006518905e-05, "loss": 1.3247, "step": 646 }, { "epoch": 0.025314969872446982, "grad_norm": 0.0, "learning_rate": 1.687092568448501e-05, "loss": 1.374, "step": 647 }, { "epoch": 0.025354096564676423, "grad_norm": 0.0, "learning_rate": 1.6897001303780967e-05, "loss": 1.2789, "step": 648 }, { "epoch": 0.02539322325690586, "grad_norm": 0.0, "learning_rate": 1.6923076923076924e-05, "loss": 1.2743, "step": 649 }, { "epoch": 0.0254323499491353, "grad_norm": 0.0, "learning_rate": 1.694915254237288e-05, "loss": 1.3802, "step": 650 }, { "epoch": 0.02547147664136474, "grad_norm": 0.0, "learning_rate": 1.697522816166884e-05, "loss": 1.3582, "step": 651 }, { "epoch": 0.025510603333594176, "grad_norm": 0.0, "learning_rate": 1.70013037809648e-05, "loss": 1.3396, "step": 652 }, { "epoch": 0.025549730025823617, "grad_norm": 0.0, "learning_rate": 1.702737940026076e-05, "loss": 1.269, "step": 653 }, { "epoch": 0.025588856718053055, "grad_norm": 0.0, "learning_rate": 1.7053455019556716e-05, "loss": 1.3394, "step": 654 }, { "epoch": 0.025627983410282495, "grad_norm": 0.0, "learning_rate": 1.7079530638852676e-05, "loss": 1.3657, "step": 655 }, { "epoch": 0.025667110102511933, "grad_norm": 0.0, "learning_rate": 1.7105606258148633e-05, "loss": 1.3312, "step": 656 }, { "epoch": 0.025706236794741374, "grad_norm": 0.0, "learning_rate": 1.713168187744459e-05, "loss": 1.3413, "step": 657 }, { "epoch": 0.02574536348697081, "grad_norm": 0.0, "learning_rate": 1.7157757496740547e-05, "loss": 1.3212, "step": 658 }, { "epoch": 0.025784490179200252, "grad_norm": 0.0, "learning_rate": 1.7183833116036507e-05, "loss": 1.3818, "step": 659 }, { "epoch": 0.02582361687142969, "grad_norm": 0.0, "learning_rate": 1.7209908735332464e-05, "loss": 1.2554, "step": 660 }, { "epoch": 0.025862743563659127, "grad_norm": 0.0, "learning_rate": 1.7235984354628424e-05, "loss": 1.2344, "step": 661 }, { "epoch": 0.025901870255888568, "grad_norm": 0.0, "learning_rate": 1.726205997392438e-05, "loss": 1.3683, "step": 662 }, { "epoch": 0.025940996948118005, "grad_norm": 0.0, "learning_rate": 1.728813559322034e-05, "loss": 1.2631, "step": 663 }, { "epoch": 0.025980123640347446, "grad_norm": 0.0, "learning_rate": 1.73142112125163e-05, "loss": 1.312, "step": 664 }, { "epoch": 0.026019250332576883, "grad_norm": 0.0, "learning_rate": 1.7340286831812255e-05, "loss": 1.2645, "step": 665 }, { "epoch": 0.026058377024806324, "grad_norm": 0.0, "learning_rate": 1.7366362451108216e-05, "loss": 1.3475, "step": 666 }, { "epoch": 0.02609750371703576, "grad_norm": 0.0, "learning_rate": 1.7392438070404173e-05, "loss": 1.3962, "step": 667 }, { "epoch": 0.0261366304092652, "grad_norm": 0.0, "learning_rate": 1.7418513689700133e-05, "loss": 1.3545, "step": 668 }, { "epoch": 0.02617575710149464, "grad_norm": 0.0, "learning_rate": 1.744458930899609e-05, "loss": 1.261, "step": 669 }, { "epoch": 0.026214883793724077, "grad_norm": 0.0, "learning_rate": 1.747066492829205e-05, "loss": 1.2177, "step": 670 }, { "epoch": 0.026254010485953518, "grad_norm": 0.0, "learning_rate": 1.7496740547588007e-05, "loss": 1.4032, "step": 671 }, { "epoch": 0.026293137178182956, "grad_norm": 0.0, "learning_rate": 1.7522816166883964e-05, "loss": 1.3062, "step": 672 }, { "epoch": 0.026332263870412397, "grad_norm": 0.0, "learning_rate": 1.754889178617992e-05, "loss": 1.3318, "step": 673 }, { "epoch": 0.026371390562641834, "grad_norm": 0.0, "learning_rate": 1.757496740547588e-05, "loss": 1.3521, "step": 674 }, { "epoch": 0.026410517254871275, "grad_norm": 0.0, "learning_rate": 1.760104302477184e-05, "loss": 1.2144, "step": 675 }, { "epoch": 0.026449643947100712, "grad_norm": 0.0, "learning_rate": 1.76271186440678e-05, "loss": 1.3651, "step": 676 }, { "epoch": 0.02648877063933015, "grad_norm": 0.0, "learning_rate": 1.7653194263363756e-05, "loss": 1.3687, "step": 677 }, { "epoch": 0.02652789733155959, "grad_norm": 0.0, "learning_rate": 1.7679269882659716e-05, "loss": 1.3085, "step": 678 }, { "epoch": 0.026567024023789028, "grad_norm": 0.0, "learning_rate": 1.7705345501955673e-05, "loss": 1.3804, "step": 679 }, { "epoch": 0.02660615071601847, "grad_norm": 0.0, "learning_rate": 1.773142112125163e-05, "loss": 1.3939, "step": 680 }, { "epoch": 0.026645277408247906, "grad_norm": 0.0, "learning_rate": 1.7757496740547587e-05, "loss": 1.3356, "step": 681 }, { "epoch": 0.026684404100477347, "grad_norm": 0.0, "learning_rate": 1.7783572359843547e-05, "loss": 1.2709, "step": 682 }, { "epoch": 0.026723530792706784, "grad_norm": 0.0, "learning_rate": 1.7809647979139507e-05, "loss": 1.2949, "step": 683 }, { "epoch": 0.026762657484936222, "grad_norm": 0.0, "learning_rate": 1.7835723598435464e-05, "loss": 1.277, "step": 684 }, { "epoch": 0.026801784177165663, "grad_norm": 0.0, "learning_rate": 1.7861799217731425e-05, "loss": 1.3973, "step": 685 }, { "epoch": 0.0268409108693951, "grad_norm": 0.0, "learning_rate": 1.788787483702738e-05, "loss": 1.2893, "step": 686 }, { "epoch": 0.02688003756162454, "grad_norm": 0.0, "learning_rate": 1.791395045632334e-05, "loss": 1.3168, "step": 687 }, { "epoch": 0.02691916425385398, "grad_norm": 0.0, "learning_rate": 1.7940026075619295e-05, "loss": 1.3116, "step": 688 }, { "epoch": 0.02695829094608342, "grad_norm": 0.0, "learning_rate": 1.7966101694915256e-05, "loss": 1.2865, "step": 689 }, { "epoch": 0.026997417638312857, "grad_norm": 0.0, "learning_rate": 1.7992177314211213e-05, "loss": 1.3519, "step": 690 }, { "epoch": 0.027036544330542298, "grad_norm": 0.0, "learning_rate": 1.8018252933507173e-05, "loss": 1.2482, "step": 691 }, { "epoch": 0.027075671022771735, "grad_norm": 0.0, "learning_rate": 1.804432855280313e-05, "loss": 1.2324, "step": 692 }, { "epoch": 0.027114797715001172, "grad_norm": 0.0, "learning_rate": 1.807040417209909e-05, "loss": 1.296, "step": 693 }, { "epoch": 0.027153924407230613, "grad_norm": 0.0, "learning_rate": 1.8096479791395047e-05, "loss": 1.2523, "step": 694 }, { "epoch": 0.02719305109946005, "grad_norm": 0.0, "learning_rate": 1.8122555410691004e-05, "loss": 1.3862, "step": 695 }, { "epoch": 0.02723217779168949, "grad_norm": 0.0, "learning_rate": 1.8148631029986964e-05, "loss": 1.2624, "step": 696 }, { "epoch": 0.02727130448391893, "grad_norm": 0.0, "learning_rate": 1.817470664928292e-05, "loss": 1.2974, "step": 697 }, { "epoch": 0.02731043117614837, "grad_norm": 0.0, "learning_rate": 1.8200782268578882e-05, "loss": 1.3933, "step": 698 }, { "epoch": 0.027349557868377807, "grad_norm": 0.0, "learning_rate": 1.822685788787484e-05, "loss": 1.3102, "step": 699 }, { "epoch": 0.027388684560607245, "grad_norm": 0.0, "learning_rate": 1.8252933507170796e-05, "loss": 1.4967, "step": 700 }, { "epoch": 0.027427811252836685, "grad_norm": 0.0, "learning_rate": 1.8279009126466756e-05, "loss": 1.3389, "step": 701 }, { "epoch": 0.027466937945066123, "grad_norm": 0.0, "learning_rate": 1.8305084745762713e-05, "loss": 1.3268, "step": 702 }, { "epoch": 0.027506064637295564, "grad_norm": 0.0, "learning_rate": 1.833116036505867e-05, "loss": 1.3375, "step": 703 }, { "epoch": 0.027545191329525, "grad_norm": 0.0, "learning_rate": 1.835723598435463e-05, "loss": 1.2125, "step": 704 }, { "epoch": 0.027584318021754442, "grad_norm": 0.0, "learning_rate": 1.838331160365059e-05, "loss": 1.3193, "step": 705 }, { "epoch": 0.02762344471398388, "grad_norm": 0.0, "learning_rate": 1.8409387222946547e-05, "loss": 1.2108, "step": 706 }, { "epoch": 0.02766257140621332, "grad_norm": 0.0, "learning_rate": 1.8435462842242504e-05, "loss": 1.1917, "step": 707 }, { "epoch": 0.027701698098442758, "grad_norm": 0.0, "learning_rate": 1.8461538461538465e-05, "loss": 1.3711, "step": 708 }, { "epoch": 0.027740824790672195, "grad_norm": 0.0, "learning_rate": 1.848761408083442e-05, "loss": 1.3287, "step": 709 }, { "epoch": 0.027779951482901636, "grad_norm": 0.0, "learning_rate": 1.851368970013038e-05, "loss": 1.3901, "step": 710 }, { "epoch": 0.027819078175131073, "grad_norm": 0.0, "learning_rate": 1.853976531942634e-05, "loss": 1.2802, "step": 711 }, { "epoch": 0.027858204867360514, "grad_norm": 0.0, "learning_rate": 1.8565840938722296e-05, "loss": 1.2836, "step": 712 }, { "epoch": 0.02789733155958995, "grad_norm": 0.0, "learning_rate": 1.8591916558018256e-05, "loss": 1.2554, "step": 713 }, { "epoch": 0.027936458251819393, "grad_norm": 0.0, "learning_rate": 1.8617992177314213e-05, "loss": 1.2759, "step": 714 }, { "epoch": 0.02797558494404883, "grad_norm": 0.0, "learning_rate": 1.864406779661017e-05, "loss": 1.2703, "step": 715 }, { "epoch": 0.028014711636278267, "grad_norm": 0.0, "learning_rate": 1.867014341590613e-05, "loss": 1.4824, "step": 716 }, { "epoch": 0.028053838328507708, "grad_norm": 0.0, "learning_rate": 1.8696219035202087e-05, "loss": 1.2855, "step": 717 }, { "epoch": 0.028092965020737146, "grad_norm": 0.0, "learning_rate": 1.8722294654498044e-05, "loss": 1.3131, "step": 718 }, { "epoch": 0.028132091712966587, "grad_norm": 0.0, "learning_rate": 1.8748370273794004e-05, "loss": 1.2399, "step": 719 }, { "epoch": 0.028171218405196024, "grad_norm": 0.0, "learning_rate": 1.8774445893089965e-05, "loss": 1.2284, "step": 720 }, { "epoch": 0.028210345097425465, "grad_norm": 0.0, "learning_rate": 1.880052151238592e-05, "loss": 1.3964, "step": 721 }, { "epoch": 0.028249471789654902, "grad_norm": 0.0, "learning_rate": 1.882659713168188e-05, "loss": 1.3767, "step": 722 }, { "epoch": 0.028288598481884343, "grad_norm": 0.0, "learning_rate": 1.8852672750977836e-05, "loss": 1.2424, "step": 723 }, { "epoch": 0.02832772517411378, "grad_norm": 0.0, "learning_rate": 1.8878748370273796e-05, "loss": 1.3093, "step": 724 }, { "epoch": 0.028366851866343218, "grad_norm": 0.0, "learning_rate": 1.8904823989569753e-05, "loss": 1.4847, "step": 725 }, { "epoch": 0.02840597855857266, "grad_norm": 0.0, "learning_rate": 1.8930899608865713e-05, "loss": 1.2566, "step": 726 }, { "epoch": 0.028445105250802096, "grad_norm": 0.0, "learning_rate": 1.895697522816167e-05, "loss": 1.424, "step": 727 }, { "epoch": 0.028484231943031537, "grad_norm": 0.0, "learning_rate": 1.898305084745763e-05, "loss": 1.2705, "step": 728 }, { "epoch": 0.028523358635260974, "grad_norm": 0.0, "learning_rate": 1.9009126466753587e-05, "loss": 1.2917, "step": 729 }, { "epoch": 0.028562485327490415, "grad_norm": 0.0, "learning_rate": 1.9035202086049544e-05, "loss": 1.3721, "step": 730 }, { "epoch": 0.028601612019719853, "grad_norm": 0.0, "learning_rate": 1.9061277705345505e-05, "loss": 1.2763, "step": 731 }, { "epoch": 0.02864073871194929, "grad_norm": 0.0, "learning_rate": 1.908735332464146e-05, "loss": 1.3436, "step": 732 }, { "epoch": 0.02867986540417873, "grad_norm": 0.0, "learning_rate": 1.911342894393742e-05, "loss": 1.2905, "step": 733 }, { "epoch": 0.02871899209640817, "grad_norm": 0.0, "learning_rate": 1.913950456323338e-05, "loss": 1.2676, "step": 734 }, { "epoch": 0.02875811878863761, "grad_norm": 0.0, "learning_rate": 1.916558018252934e-05, "loss": 1.1979, "step": 735 }, { "epoch": 0.028797245480867047, "grad_norm": 0.0, "learning_rate": 1.9191655801825296e-05, "loss": 1.1898, "step": 736 }, { "epoch": 0.028836372173096488, "grad_norm": 0.0, "learning_rate": 1.9217731421121253e-05, "loss": 1.2773, "step": 737 }, { "epoch": 0.028875498865325925, "grad_norm": 0.0, "learning_rate": 1.924380704041721e-05, "loss": 1.3876, "step": 738 }, { "epoch": 0.028914625557555366, "grad_norm": 0.0, "learning_rate": 1.926988265971317e-05, "loss": 1.2874, "step": 739 }, { "epoch": 0.028953752249784803, "grad_norm": 0.0, "learning_rate": 1.9295958279009127e-05, "loss": 1.4272, "step": 740 }, { "epoch": 0.02899287894201424, "grad_norm": 0.0, "learning_rate": 1.9322033898305087e-05, "loss": 1.3996, "step": 741 }, { "epoch": 0.02903200563424368, "grad_norm": 0.0, "learning_rate": 1.9348109517601044e-05, "loss": 1.4507, "step": 742 }, { "epoch": 0.02907113232647312, "grad_norm": 0.0, "learning_rate": 1.9374185136897005e-05, "loss": 1.366, "step": 743 }, { "epoch": 0.02911025901870256, "grad_norm": 0.0, "learning_rate": 1.940026075619296e-05, "loss": 1.3633, "step": 744 }, { "epoch": 0.029149385710931997, "grad_norm": 0.0, "learning_rate": 1.942633637548892e-05, "loss": 1.3103, "step": 745 }, { "epoch": 0.029188512403161438, "grad_norm": 0.0, "learning_rate": 1.9452411994784876e-05, "loss": 1.3976, "step": 746 }, { "epoch": 0.029227639095390875, "grad_norm": 0.0, "learning_rate": 1.9478487614080836e-05, "loss": 1.4258, "step": 747 }, { "epoch": 0.029266765787620313, "grad_norm": 0.0, "learning_rate": 1.9504563233376793e-05, "loss": 1.2855, "step": 748 }, { "epoch": 0.029305892479849754, "grad_norm": 0.0, "learning_rate": 1.9530638852672753e-05, "loss": 1.3599, "step": 749 }, { "epoch": 0.02934501917207919, "grad_norm": 0.0, "learning_rate": 1.9556714471968713e-05, "loss": 1.299, "step": 750 }, { "epoch": 0.029384145864308632, "grad_norm": 0.0, "learning_rate": 1.958279009126467e-05, "loss": 1.2695, "step": 751 }, { "epoch": 0.02942327255653807, "grad_norm": 0.0, "learning_rate": 1.9608865710560627e-05, "loss": 1.3786, "step": 752 }, { "epoch": 0.02946239924876751, "grad_norm": 0.0, "learning_rate": 1.9634941329856584e-05, "loss": 1.3497, "step": 753 }, { "epoch": 0.029501525940996948, "grad_norm": 0.0, "learning_rate": 1.9661016949152545e-05, "loss": 1.1986, "step": 754 }, { "epoch": 0.02954065263322639, "grad_norm": 0.0, "learning_rate": 1.96870925684485e-05, "loss": 1.3461, "step": 755 }, { "epoch": 0.029579779325455826, "grad_norm": 0.0, "learning_rate": 1.9713168187744462e-05, "loss": 1.3925, "step": 756 }, { "epoch": 0.029618906017685263, "grad_norm": 0.0, "learning_rate": 1.973924380704042e-05, "loss": 1.325, "step": 757 }, { "epoch": 0.029658032709914704, "grad_norm": 0.0, "learning_rate": 1.976531942633638e-05, "loss": 1.4158, "step": 758 }, { "epoch": 0.02969715940214414, "grad_norm": 0.0, "learning_rate": 1.9791395045632336e-05, "loss": 1.417, "step": 759 }, { "epoch": 0.029736286094373583, "grad_norm": 0.0, "learning_rate": 1.9817470664928293e-05, "loss": 1.3062, "step": 760 }, { "epoch": 0.02977541278660302, "grad_norm": 0.0, "learning_rate": 1.984354628422425e-05, "loss": 1.4702, "step": 761 }, { "epoch": 0.02981453947883246, "grad_norm": 0.0, "learning_rate": 1.986962190352021e-05, "loss": 1.3258, "step": 762 }, { "epoch": 0.029853666171061898, "grad_norm": 0.0, "learning_rate": 1.9895697522816167e-05, "loss": 1.3329, "step": 763 }, { "epoch": 0.02989279286329134, "grad_norm": 0.0, "learning_rate": 1.9921773142112127e-05, "loss": 1.4747, "step": 764 }, { "epoch": 0.029931919555520776, "grad_norm": 0.0, "learning_rate": 1.9947848761408084e-05, "loss": 1.2937, "step": 765 }, { "epoch": 0.029971046247750214, "grad_norm": 0.0, "learning_rate": 1.9973924380704045e-05, "loss": 1.2008, "step": 766 }, { "epoch": 0.030010172939979655, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 1.2762, "step": 767 }, { "epoch": 0.030049299632209092, "grad_norm": 0.0, "learning_rate": 1.9999999919706266e-05, "loss": 1.254, "step": 768 }, { "epoch": 0.030088426324438533, "grad_norm": 0.0, "learning_rate": 1.9999999678825064e-05, "loss": 1.401, "step": 769 }, { "epoch": 0.03012755301666797, "grad_norm": 0.0, "learning_rate": 1.9999999277356397e-05, "loss": 1.299, "step": 770 }, { "epoch": 0.03016667970889741, "grad_norm": 0.0, "learning_rate": 1.9999998715300272e-05, "loss": 1.3342, "step": 771 }, { "epoch": 0.03020580640112685, "grad_norm": 0.0, "learning_rate": 1.99999979926567e-05, "loss": 1.3817, "step": 772 }, { "epoch": 0.030244933093356286, "grad_norm": 0.0, "learning_rate": 1.999999710942569e-05, "loss": 1.2819, "step": 773 }, { "epoch": 0.030284059785585727, "grad_norm": 0.0, "learning_rate": 1.9999996065607256e-05, "loss": 1.3702, "step": 774 }, { "epoch": 0.030323186477815164, "grad_norm": 0.0, "learning_rate": 1.999999486120142e-05, "loss": 1.2876, "step": 775 }, { "epoch": 0.030362313170044605, "grad_norm": 0.0, "learning_rate": 1.999999349620819e-05, "loss": 1.506, "step": 776 }, { "epoch": 0.030401439862274043, "grad_norm": 0.0, "learning_rate": 1.9999991970627597e-05, "loss": 1.3424, "step": 777 }, { "epoch": 0.030440566554503484, "grad_norm": 0.0, "learning_rate": 1.999999028445967e-05, "loss": 1.408, "step": 778 }, { "epoch": 0.03047969324673292, "grad_norm": 0.0, "learning_rate": 1.9999988437704427e-05, "loss": 1.2872, "step": 779 }, { "epoch": 0.030518819938962362, "grad_norm": 0.0, "learning_rate": 1.9999986430361896e-05, "loss": 1.3309, "step": 780 }, { "epoch": 0.0305579466311918, "grad_norm": 0.0, "learning_rate": 1.9999984262432116e-05, "loss": 1.4011, "step": 781 }, { "epoch": 0.030597073323421237, "grad_norm": 0.0, "learning_rate": 1.999998193391512e-05, "loss": 1.4145, "step": 782 }, { "epoch": 0.030636200015650678, "grad_norm": 0.0, "learning_rate": 1.9999979444810947e-05, "loss": 1.2733, "step": 783 }, { "epoch": 0.030675326707880115, "grad_norm": 0.0, "learning_rate": 1.9999976795119632e-05, "loss": 1.1775, "step": 784 }, { "epoch": 0.030714453400109556, "grad_norm": 0.0, "learning_rate": 1.9999973984841218e-05, "loss": 1.3878, "step": 785 }, { "epoch": 0.030753580092338993, "grad_norm": 0.0, "learning_rate": 1.9999971013975758e-05, "loss": 1.282, "step": 786 }, { "epoch": 0.030792706784568434, "grad_norm": 0.0, "learning_rate": 1.9999967882523294e-05, "loss": 1.2957, "step": 787 }, { "epoch": 0.03083183347679787, "grad_norm": 0.0, "learning_rate": 1.9999964590483872e-05, "loss": 1.3677, "step": 788 }, { "epoch": 0.03087096016902731, "grad_norm": 0.0, "learning_rate": 1.999996113785755e-05, "loss": 1.308, "step": 789 }, { "epoch": 0.03091008686125675, "grad_norm": 0.0, "learning_rate": 1.9999957524644385e-05, "loss": 1.2306, "step": 790 }, { "epoch": 0.030949213553486187, "grad_norm": 0.0, "learning_rate": 1.999995375084443e-05, "loss": 1.3874, "step": 791 }, { "epoch": 0.030988340245715628, "grad_norm": 0.0, "learning_rate": 1.999994981645775e-05, "loss": 1.4169, "step": 792 }, { "epoch": 0.031027466937945065, "grad_norm": 0.0, "learning_rate": 1.9999945721484407e-05, "loss": 1.274, "step": 793 }, { "epoch": 0.031066593630174506, "grad_norm": 0.0, "learning_rate": 1.999994146592447e-05, "loss": 1.3195, "step": 794 }, { "epoch": 0.031105720322403944, "grad_norm": 0.0, "learning_rate": 1.9999937049777998e-05, "loss": 1.3754, "step": 795 }, { "epoch": 0.031144847014633385, "grad_norm": 0.0, "learning_rate": 1.999993247304507e-05, "loss": 1.3586, "step": 796 }, { "epoch": 0.031183973706862822, "grad_norm": 0.0, "learning_rate": 1.9999927735725756e-05, "loss": 1.2333, "step": 797 }, { "epoch": 0.03122310039909226, "grad_norm": 0.0, "learning_rate": 1.9999922837820134e-05, "loss": 1.2209, "step": 798 }, { "epoch": 0.0312622270913217, "grad_norm": 0.0, "learning_rate": 1.999991777932828e-05, "loss": 1.2996, "step": 799 }, { "epoch": 0.03130135378355114, "grad_norm": 0.0, "learning_rate": 1.999991256025028e-05, "loss": 1.343, "step": 800 }, { "epoch": 0.03134048047578058, "grad_norm": 0.0, "learning_rate": 1.9999907180586212e-05, "loss": 1.3289, "step": 801 }, { "epoch": 0.03137960716801002, "grad_norm": 0.0, "learning_rate": 1.999990164033617e-05, "loss": 1.2967, "step": 802 }, { "epoch": 0.03141873386023945, "grad_norm": 0.0, "learning_rate": 1.9999895939500235e-05, "loss": 1.2498, "step": 803 }, { "epoch": 0.031457860552468894, "grad_norm": 0.0, "learning_rate": 1.99998900780785e-05, "loss": 1.4691, "step": 804 }, { "epoch": 0.031496987244698335, "grad_norm": 0.0, "learning_rate": 1.9999884056071065e-05, "loss": 1.3203, "step": 805 }, { "epoch": 0.03153611393692777, "grad_norm": 0.0, "learning_rate": 1.999987787347802e-05, "loss": 1.3314, "step": 806 }, { "epoch": 0.03157524062915721, "grad_norm": 0.0, "learning_rate": 1.9999871530299466e-05, "loss": 1.4283, "step": 807 }, { "epoch": 0.03161436732138665, "grad_norm": 0.0, "learning_rate": 1.999986502653551e-05, "loss": 1.2816, "step": 808 }, { "epoch": 0.03165349401361609, "grad_norm": 0.0, "learning_rate": 1.9999858362186247e-05, "loss": 1.3999, "step": 809 }, { "epoch": 0.031692620705845526, "grad_norm": 0.0, "learning_rate": 1.999985153725179e-05, "loss": 1.3618, "step": 810 }, { "epoch": 0.031731747398074966, "grad_norm": 0.0, "learning_rate": 1.999984455173225e-05, "loss": 1.193, "step": 811 }, { "epoch": 0.03177087409030441, "grad_norm": 0.0, "learning_rate": 1.9999837405627737e-05, "loss": 1.2946, "step": 812 }, { "epoch": 0.03181000078253384, "grad_norm": 0.0, "learning_rate": 1.9999830098938364e-05, "loss": 1.3202, "step": 813 }, { "epoch": 0.03184912747476328, "grad_norm": 0.0, "learning_rate": 1.999982263166425e-05, "loss": 1.3276, "step": 814 }, { "epoch": 0.03188825416699272, "grad_norm": 0.0, "learning_rate": 1.9999815003805518e-05, "loss": 1.2808, "step": 815 }, { "epoch": 0.031927380859222164, "grad_norm": 0.0, "learning_rate": 1.9999807215362284e-05, "loss": 1.2472, "step": 816 }, { "epoch": 0.0319665075514516, "grad_norm": 0.0, "learning_rate": 1.9999799266334682e-05, "loss": 1.2759, "step": 817 }, { "epoch": 0.03200563424368104, "grad_norm": 0.0, "learning_rate": 1.9999791156722827e-05, "loss": 1.329, "step": 818 }, { "epoch": 0.03204476093591048, "grad_norm": 0.0, "learning_rate": 1.9999782886526863e-05, "loss": 1.2081, "step": 819 }, { "epoch": 0.03208388762813992, "grad_norm": 0.0, "learning_rate": 1.999977445574691e-05, "loss": 1.2971, "step": 820 }, { "epoch": 0.032123014320369354, "grad_norm": 0.0, "learning_rate": 1.9999765864383115e-05, "loss": 1.2996, "step": 821 }, { "epoch": 0.032162141012598795, "grad_norm": 0.0, "learning_rate": 1.9999757112435608e-05, "loss": 1.433, "step": 822 }, { "epoch": 0.032201267704828236, "grad_norm": 0.0, "learning_rate": 1.9999748199904535e-05, "loss": 1.385, "step": 823 }, { "epoch": 0.03224039439705767, "grad_norm": 0.0, "learning_rate": 1.9999739126790032e-05, "loss": 1.1989, "step": 824 }, { "epoch": 0.03227952108928711, "grad_norm": 0.0, "learning_rate": 1.999972989309225e-05, "loss": 1.4124, "step": 825 }, { "epoch": 0.03231864778151655, "grad_norm": 0.0, "learning_rate": 1.9999720498811335e-05, "loss": 1.2709, "step": 826 }, { "epoch": 0.03235777447374599, "grad_norm": 0.0, "learning_rate": 1.9999710943947447e-05, "loss": 1.3668, "step": 827 }, { "epoch": 0.03239690116597543, "grad_norm": 0.0, "learning_rate": 1.9999701228500724e-05, "loss": 1.4264, "step": 828 }, { "epoch": 0.03243602785820487, "grad_norm": 0.0, "learning_rate": 1.999969135247133e-05, "loss": 1.3146, "step": 829 }, { "epoch": 0.03247515455043431, "grad_norm": 0.0, "learning_rate": 1.999968131585943e-05, "loss": 1.2606, "step": 830 }, { "epoch": 0.03251428124266374, "grad_norm": 0.0, "learning_rate": 1.999967111866517e-05, "loss": 1.327, "step": 831 }, { "epoch": 0.03255340793489318, "grad_norm": 0.0, "learning_rate": 1.9999660760888722e-05, "loss": 1.3347, "step": 832 }, { "epoch": 0.032592534627122624, "grad_norm": 0.0, "learning_rate": 1.9999650242530257e-05, "loss": 1.4439, "step": 833 }, { "epoch": 0.032631661319352065, "grad_norm": 0.0, "learning_rate": 1.999963956358994e-05, "loss": 1.1837, "step": 834 }, { "epoch": 0.0326707880115815, "grad_norm": 0.0, "learning_rate": 1.999962872406794e-05, "loss": 1.2983, "step": 835 }, { "epoch": 0.03270991470381094, "grad_norm": 0.0, "learning_rate": 1.9999617723964434e-05, "loss": 1.2805, "step": 836 }, { "epoch": 0.03274904139604038, "grad_norm": 0.0, "learning_rate": 1.9999606563279594e-05, "loss": 1.2205, "step": 837 }, { "epoch": 0.032788168088269815, "grad_norm": 0.0, "learning_rate": 1.9999595242013604e-05, "loss": 1.2717, "step": 838 }, { "epoch": 0.032827294780499255, "grad_norm": 0.0, "learning_rate": 1.9999583760166646e-05, "loss": 1.3795, "step": 839 }, { "epoch": 0.032866421472728696, "grad_norm": 0.0, "learning_rate": 1.9999572117738902e-05, "loss": 1.4551, "step": 840 }, { "epoch": 0.03290554816495814, "grad_norm": 0.0, "learning_rate": 1.9999560314730563e-05, "loss": 1.2886, "step": 841 }, { "epoch": 0.03294467485718757, "grad_norm": 0.0, "learning_rate": 1.9999548351141813e-05, "loss": 1.2686, "step": 842 }, { "epoch": 0.03298380154941701, "grad_norm": 0.0, "learning_rate": 1.9999536226972845e-05, "loss": 1.2619, "step": 843 }, { "epoch": 0.03302292824164645, "grad_norm": 0.0, "learning_rate": 1.999952394222386e-05, "loss": 1.26, "step": 844 }, { "epoch": 0.03306205493387589, "grad_norm": 0.0, "learning_rate": 1.9999511496895047e-05, "loss": 1.4089, "step": 845 }, { "epoch": 0.03310118162610533, "grad_norm": 0.0, "learning_rate": 1.999949889098661e-05, "loss": 1.2825, "step": 846 }, { "epoch": 0.03314030831833477, "grad_norm": 0.0, "learning_rate": 1.999948612449875e-05, "loss": 1.3613, "step": 847 }, { "epoch": 0.03317943501056421, "grad_norm": 0.0, "learning_rate": 1.9999473197431677e-05, "loss": 1.2984, "step": 848 }, { "epoch": 0.03321856170279364, "grad_norm": 0.0, "learning_rate": 1.999946010978559e-05, "loss": 1.3828, "step": 849 }, { "epoch": 0.033257688395023084, "grad_norm": 0.0, "learning_rate": 1.9999446861560704e-05, "loss": 1.3482, "step": 850 }, { "epoch": 0.033296815087252525, "grad_norm": 0.0, "learning_rate": 1.9999433452757234e-05, "loss": 1.3253, "step": 851 }, { "epoch": 0.033335941779481966, "grad_norm": 0.0, "learning_rate": 1.9999419883375393e-05, "loss": 1.2423, "step": 852 }, { "epoch": 0.0333750684717114, "grad_norm": 0.0, "learning_rate": 1.9999406153415397e-05, "loss": 1.4014, "step": 853 }, { "epoch": 0.03341419516394084, "grad_norm": 0.0, "learning_rate": 1.999939226287747e-05, "loss": 1.2924, "step": 854 }, { "epoch": 0.03345332185617028, "grad_norm": 0.0, "learning_rate": 1.999937821176183e-05, "loss": 1.4016, "step": 855 }, { "epoch": 0.033492448548399716, "grad_norm": 0.0, "learning_rate": 1.9999364000068703e-05, "loss": 1.2872, "step": 856 }, { "epoch": 0.033531575240629156, "grad_norm": 0.0, "learning_rate": 1.9999349627798324e-05, "loss": 1.2828, "step": 857 }, { "epoch": 0.0335707019328586, "grad_norm": 0.0, "learning_rate": 1.9999335094950922e-05, "loss": 1.3521, "step": 858 }, { "epoch": 0.03360982862508804, "grad_norm": 0.0, "learning_rate": 1.9999320401526727e-05, "loss": 1.4202, "step": 859 }, { "epoch": 0.03364895531731747, "grad_norm": 0.0, "learning_rate": 1.9999305547525977e-05, "loss": 1.3143, "step": 860 }, { "epoch": 0.03368808200954691, "grad_norm": 0.0, "learning_rate": 1.9999290532948908e-05, "loss": 1.2248, "step": 861 }, { "epoch": 0.033727208701776354, "grad_norm": 0.0, "learning_rate": 1.999927535779576e-05, "loss": 1.3083, "step": 862 }, { "epoch": 0.03376633539400579, "grad_norm": 0.0, "learning_rate": 1.9999260022066784e-05, "loss": 1.3148, "step": 863 }, { "epoch": 0.03380546208623523, "grad_norm": 0.0, "learning_rate": 1.999924452576222e-05, "loss": 1.2968, "step": 864 }, { "epoch": 0.03384458877846467, "grad_norm": 0.0, "learning_rate": 1.999922886888232e-05, "loss": 1.2639, "step": 865 }, { "epoch": 0.03388371547069411, "grad_norm": 0.0, "learning_rate": 1.9999213051427336e-05, "loss": 1.2764, "step": 866 }, { "epoch": 0.033922842162923544, "grad_norm": 0.0, "learning_rate": 1.9999197073397517e-05, "loss": 1.4142, "step": 867 }, { "epoch": 0.033961968855152985, "grad_norm": 0.0, "learning_rate": 1.999918093479312e-05, "loss": 1.272, "step": 868 }, { "epoch": 0.034001095547382426, "grad_norm": 0.0, "learning_rate": 1.9999164635614413e-05, "loss": 1.2815, "step": 869 }, { "epoch": 0.03404022223961186, "grad_norm": 0.0, "learning_rate": 1.9999148175861646e-05, "loss": 1.2863, "step": 870 }, { "epoch": 0.0340793489318413, "grad_norm": 0.0, "learning_rate": 1.999913155553509e-05, "loss": 1.3367, "step": 871 }, { "epoch": 0.03411847562407074, "grad_norm": 0.0, "learning_rate": 1.9999114774635013e-05, "loss": 1.3163, "step": 872 }, { "epoch": 0.03415760231630018, "grad_norm": 0.0, "learning_rate": 1.9999097833161683e-05, "loss": 1.3362, "step": 873 }, { "epoch": 0.03419672900852962, "grad_norm": 0.0, "learning_rate": 1.999908073111537e-05, "loss": 1.3036, "step": 874 }, { "epoch": 0.03423585570075906, "grad_norm": 0.0, "learning_rate": 1.999906346849635e-05, "loss": 1.3046, "step": 875 }, { "epoch": 0.0342749823929885, "grad_norm": 0.0, "learning_rate": 1.99990460453049e-05, "loss": 1.3578, "step": 876 }, { "epoch": 0.03431410908521793, "grad_norm": 0.0, "learning_rate": 1.99990284615413e-05, "loss": 1.4672, "step": 877 }, { "epoch": 0.03435323577744737, "grad_norm": 0.0, "learning_rate": 1.9999010717205832e-05, "loss": 1.2216, "step": 878 }, { "epoch": 0.034392362469676814, "grad_norm": 0.0, "learning_rate": 1.9998992812298783e-05, "loss": 1.2873, "step": 879 }, { "epoch": 0.034431489161906255, "grad_norm": 0.0, "learning_rate": 1.999897474682044e-05, "loss": 1.2637, "step": 880 }, { "epoch": 0.03447061585413569, "grad_norm": 0.0, "learning_rate": 1.9998956520771088e-05, "loss": 1.2993, "step": 881 }, { "epoch": 0.03450974254636513, "grad_norm": 0.0, "learning_rate": 1.9998938134151028e-05, "loss": 1.4233, "step": 882 }, { "epoch": 0.03454886923859457, "grad_norm": 0.0, "learning_rate": 1.9998919586960545e-05, "loss": 1.2816, "step": 883 }, { "epoch": 0.03458799593082401, "grad_norm": 0.0, "learning_rate": 1.9998900879199948e-05, "loss": 1.3059, "step": 884 }, { "epoch": 0.034627122623053445, "grad_norm": 0.0, "learning_rate": 1.999888201086953e-05, "loss": 1.2919, "step": 885 }, { "epoch": 0.034666249315282886, "grad_norm": 0.0, "learning_rate": 1.9998862981969597e-05, "loss": 1.2939, "step": 886 }, { "epoch": 0.03470537600751233, "grad_norm": 0.0, "learning_rate": 1.9998843792500454e-05, "loss": 1.3831, "step": 887 }, { "epoch": 0.03474450269974176, "grad_norm": 0.0, "learning_rate": 1.9998824442462407e-05, "loss": 1.3323, "step": 888 }, { "epoch": 0.0347836293919712, "grad_norm": 0.0, "learning_rate": 1.9998804931855772e-05, "loss": 1.4501, "step": 889 }, { "epoch": 0.03482275608420064, "grad_norm": 0.0, "learning_rate": 1.9998785260680855e-05, "loss": 1.238, "step": 890 }, { "epoch": 0.034861882776430084, "grad_norm": 0.0, "learning_rate": 1.999876542893798e-05, "loss": 1.3301, "step": 891 }, { "epoch": 0.03490100946865952, "grad_norm": 0.0, "learning_rate": 1.9998745436627458e-05, "loss": 1.3508, "step": 892 }, { "epoch": 0.03494013616088896, "grad_norm": 0.0, "learning_rate": 1.9998725283749617e-05, "loss": 1.2231, "step": 893 }, { "epoch": 0.0349792628531184, "grad_norm": 0.0, "learning_rate": 1.9998704970304772e-05, "loss": 1.4127, "step": 894 }, { "epoch": 0.03501838954534783, "grad_norm": 0.0, "learning_rate": 1.9998684496293258e-05, "loss": 1.3048, "step": 895 }, { "epoch": 0.035057516237577274, "grad_norm": 0.0, "learning_rate": 1.9998663861715397e-05, "loss": 1.2628, "step": 896 }, { "epoch": 0.035096642929806715, "grad_norm": 0.0, "learning_rate": 1.9998643066571527e-05, "loss": 1.2761, "step": 897 }, { "epoch": 0.035135769622036156, "grad_norm": 0.0, "learning_rate": 1.9998622110861978e-05, "loss": 1.3774, "step": 898 }, { "epoch": 0.03517489631426559, "grad_norm": 0.0, "learning_rate": 1.9998600994587085e-05, "loss": 1.3322, "step": 899 }, { "epoch": 0.03521402300649503, "grad_norm": 0.0, "learning_rate": 1.999857971774719e-05, "loss": 1.3008, "step": 900 }, { "epoch": 0.03525314969872447, "grad_norm": 0.0, "learning_rate": 1.9998558280342634e-05, "loss": 1.3599, "step": 901 }, { "epoch": 0.035292276390953906, "grad_norm": 0.0, "learning_rate": 1.999853668237376e-05, "loss": 1.2042, "step": 902 }, { "epoch": 0.035331403083183346, "grad_norm": 0.0, "learning_rate": 1.9998514923840916e-05, "loss": 1.4352, "step": 903 }, { "epoch": 0.03537052977541279, "grad_norm": 0.0, "learning_rate": 1.999849300474445e-05, "loss": 1.289, "step": 904 }, { "epoch": 0.03540965646764223, "grad_norm": 0.0, "learning_rate": 1.9998470925084715e-05, "loss": 1.3278, "step": 905 }, { "epoch": 0.03544878315987166, "grad_norm": 0.0, "learning_rate": 1.999844868486207e-05, "loss": 1.3103, "step": 906 }, { "epoch": 0.0354879098521011, "grad_norm": 0.0, "learning_rate": 1.9998426284076862e-05, "loss": 1.3226, "step": 907 }, { "epoch": 0.035527036544330544, "grad_norm": 0.0, "learning_rate": 1.999840372272946e-05, "loss": 1.3013, "step": 908 }, { "epoch": 0.03556616323655998, "grad_norm": 0.0, "learning_rate": 1.9998381000820226e-05, "loss": 1.2596, "step": 909 }, { "epoch": 0.03560528992878942, "grad_norm": 0.0, "learning_rate": 1.9998358118349513e-05, "loss": 1.3431, "step": 910 }, { "epoch": 0.03564441662101886, "grad_norm": 0.0, "learning_rate": 1.9998335075317706e-05, "loss": 1.4734, "step": 911 }, { "epoch": 0.0356835433132483, "grad_norm": 0.0, "learning_rate": 1.9998311871725162e-05, "loss": 1.3673, "step": 912 }, { "epoch": 0.035722670005477734, "grad_norm": 0.0, "learning_rate": 1.9998288507572258e-05, "loss": 1.187, "step": 913 }, { "epoch": 0.035761796697707175, "grad_norm": 0.0, "learning_rate": 1.999826498285937e-05, "loss": 1.2999, "step": 914 }, { "epoch": 0.035800923389936616, "grad_norm": 0.0, "learning_rate": 1.9998241297586876e-05, "loss": 1.3326, "step": 915 }, { "epoch": 0.03584005008216606, "grad_norm": 0.0, "learning_rate": 1.9998217451755154e-05, "loss": 1.2893, "step": 916 }, { "epoch": 0.03587917677439549, "grad_norm": 0.0, "learning_rate": 1.9998193445364586e-05, "loss": 1.3876, "step": 917 }, { "epoch": 0.03591830346662493, "grad_norm": 0.0, "learning_rate": 1.9998169278415562e-05, "loss": 1.295, "step": 918 }, { "epoch": 0.03595743015885437, "grad_norm": 0.0, "learning_rate": 1.9998144950908468e-05, "loss": 1.3507, "step": 919 }, { "epoch": 0.03599655685108381, "grad_norm": 0.0, "learning_rate": 1.9998120462843694e-05, "loss": 1.0994, "step": 920 }, { "epoch": 0.03603568354331325, "grad_norm": 0.0, "learning_rate": 1.9998095814221636e-05, "loss": 1.3184, "step": 921 }, { "epoch": 0.03607481023554269, "grad_norm": 0.0, "learning_rate": 1.9998071005042683e-05, "loss": 1.4352, "step": 922 }, { "epoch": 0.03611393692777213, "grad_norm": 0.0, "learning_rate": 1.999804603530724e-05, "loss": 1.2288, "step": 923 }, { "epoch": 0.03615306362000156, "grad_norm": 0.0, "learning_rate": 1.9998020905015705e-05, "loss": 1.2621, "step": 924 }, { "epoch": 0.036192190312231004, "grad_norm": 0.0, "learning_rate": 1.9997995614168486e-05, "loss": 1.3243, "step": 925 }, { "epoch": 0.036231317004460445, "grad_norm": 0.0, "learning_rate": 1.9997970162765985e-05, "loss": 1.3518, "step": 926 }, { "epoch": 0.03627044369668988, "grad_norm": 0.0, "learning_rate": 1.999794455080861e-05, "loss": 1.5003, "step": 927 }, { "epoch": 0.03630957038891932, "grad_norm": 0.0, "learning_rate": 1.9997918778296772e-05, "loss": 1.349, "step": 928 }, { "epoch": 0.03634869708114876, "grad_norm": 0.0, "learning_rate": 1.999789284523089e-05, "loss": 1.208, "step": 929 }, { "epoch": 0.0363878237733782, "grad_norm": 0.0, "learning_rate": 1.9997866751611373e-05, "loss": 1.3976, "step": 930 }, { "epoch": 0.036426950465607635, "grad_norm": 0.0, "learning_rate": 1.9997840497438648e-05, "loss": 1.3269, "step": 931 }, { "epoch": 0.036466077157837076, "grad_norm": 0.0, "learning_rate": 1.9997814082713128e-05, "loss": 1.2702, "step": 932 }, { "epoch": 0.03650520385006652, "grad_norm": 0.0, "learning_rate": 1.9997787507435244e-05, "loss": 1.2599, "step": 933 }, { "epoch": 0.03654433054229595, "grad_norm": 0.0, "learning_rate": 1.9997760771605423e-05, "loss": 1.2624, "step": 934 }, { "epoch": 0.03658345723452539, "grad_norm": 0.0, "learning_rate": 1.9997733875224088e-05, "loss": 1.4012, "step": 935 }, { "epoch": 0.03662258392675483, "grad_norm": 0.0, "learning_rate": 1.999770681829168e-05, "loss": 1.2995, "step": 936 }, { "epoch": 0.036661710618984274, "grad_norm": 0.0, "learning_rate": 1.999767960080862e-05, "loss": 1.3032, "step": 937 }, { "epoch": 0.03670083731121371, "grad_norm": 0.0, "learning_rate": 1.9997652222775363e-05, "loss": 1.4407, "step": 938 }, { "epoch": 0.03673996400344315, "grad_norm": 0.0, "learning_rate": 1.9997624684192332e-05, "loss": 1.2351, "step": 939 }, { "epoch": 0.03677909069567259, "grad_norm": 0.0, "learning_rate": 1.9997596985059977e-05, "loss": 1.3862, "step": 940 }, { "epoch": 0.03681821738790202, "grad_norm": 0.0, "learning_rate": 1.9997569125378743e-05, "loss": 1.2936, "step": 941 }, { "epoch": 0.036857344080131464, "grad_norm": 0.0, "learning_rate": 1.999754110514908e-05, "loss": 1.3401, "step": 942 }, { "epoch": 0.036896470772360905, "grad_norm": 0.0, "learning_rate": 1.9997512924371432e-05, "loss": 1.2708, "step": 943 }, { "epoch": 0.036935597464590346, "grad_norm": 0.0, "learning_rate": 1.9997484583046255e-05, "loss": 1.3372, "step": 944 }, { "epoch": 0.03697472415681978, "grad_norm": 0.0, "learning_rate": 1.9997456081174e-05, "loss": 1.3243, "step": 945 }, { "epoch": 0.03701385084904922, "grad_norm": 0.0, "learning_rate": 1.9997427418755132e-05, "loss": 1.2847, "step": 946 }, { "epoch": 0.03705297754127866, "grad_norm": 0.0, "learning_rate": 1.9997398595790104e-05, "loss": 1.2844, "step": 947 }, { "epoch": 0.0370921042335081, "grad_norm": 0.0, "learning_rate": 1.9997369612279383e-05, "loss": 1.2772, "step": 948 }, { "epoch": 0.037131230925737536, "grad_norm": 0.0, "learning_rate": 1.999734046822343e-05, "loss": 1.4015, "step": 949 }, { "epoch": 0.03717035761796698, "grad_norm": 0.0, "learning_rate": 1.9997311163622722e-05, "loss": 1.2936, "step": 950 }, { "epoch": 0.03720948431019642, "grad_norm": 0.0, "learning_rate": 1.999728169847772e-05, "loss": 1.2478, "step": 951 }, { "epoch": 0.03724861100242585, "grad_norm": 0.0, "learning_rate": 1.9997252072788903e-05, "loss": 1.3051, "step": 952 }, { "epoch": 0.03728773769465529, "grad_norm": 0.0, "learning_rate": 1.9997222286556747e-05, "loss": 1.3505, "step": 953 }, { "epoch": 0.037326864386884734, "grad_norm": 0.0, "learning_rate": 1.9997192339781724e-05, "loss": 1.2867, "step": 954 }, { "epoch": 0.037365991079114175, "grad_norm": 0.0, "learning_rate": 1.9997162232464325e-05, "loss": 1.2798, "step": 955 }, { "epoch": 0.03740511777134361, "grad_norm": 0.0, "learning_rate": 1.999713196460502e-05, "loss": 1.3127, "step": 956 }, { "epoch": 0.03744424446357305, "grad_norm": 0.0, "learning_rate": 1.999710153620431e-05, "loss": 1.3055, "step": 957 }, { "epoch": 0.03748337115580249, "grad_norm": 0.0, "learning_rate": 1.9997070947262674e-05, "loss": 1.3083, "step": 958 }, { "epoch": 0.037522497848031924, "grad_norm": 0.0, "learning_rate": 1.9997040197780605e-05, "loss": 1.2365, "step": 959 }, { "epoch": 0.037561624540261365, "grad_norm": 0.0, "learning_rate": 1.9997009287758596e-05, "loss": 1.3403, "step": 960 }, { "epoch": 0.037600751232490806, "grad_norm": 0.0, "learning_rate": 1.9996978217197145e-05, "loss": 1.2811, "step": 961 }, { "epoch": 0.03763987792472025, "grad_norm": 0.0, "learning_rate": 1.9996946986096754e-05, "loss": 1.2516, "step": 962 }, { "epoch": 0.03767900461694968, "grad_norm": 0.0, "learning_rate": 1.999691559445792e-05, "loss": 1.3697, "step": 963 }, { "epoch": 0.03771813130917912, "grad_norm": 0.0, "learning_rate": 1.9996884042281145e-05, "loss": 1.3818, "step": 964 }, { "epoch": 0.03775725800140856, "grad_norm": 0.0, "learning_rate": 1.9996852329566944e-05, "loss": 1.257, "step": 965 }, { "epoch": 0.037796384693638, "grad_norm": 0.0, "learning_rate": 1.9996820456315818e-05, "loss": 1.2632, "step": 966 }, { "epoch": 0.03783551138586744, "grad_norm": 0.0, "learning_rate": 1.9996788422528283e-05, "loss": 1.3012, "step": 967 }, { "epoch": 0.03787463807809688, "grad_norm": 0.0, "learning_rate": 1.9996756228204853e-05, "loss": 1.283, "step": 968 }, { "epoch": 0.03791376477032632, "grad_norm": 0.0, "learning_rate": 1.9996723873346043e-05, "loss": 1.2121, "step": 969 }, { "epoch": 0.03795289146255575, "grad_norm": 0.0, "learning_rate": 1.9996691357952376e-05, "loss": 1.2932, "step": 970 }, { "epoch": 0.037992018154785194, "grad_norm": 0.0, "learning_rate": 1.9996658682024373e-05, "loss": 1.4485, "step": 971 }, { "epoch": 0.038031144847014635, "grad_norm": 0.0, "learning_rate": 1.9996625845562555e-05, "loss": 1.3214, "step": 972 }, { "epoch": 0.038070271539244076, "grad_norm": 0.0, "learning_rate": 1.9996592848567455e-05, "loss": 1.3696, "step": 973 }, { "epoch": 0.03810939823147351, "grad_norm": 0.0, "learning_rate": 1.99965596910396e-05, "loss": 1.3929, "step": 974 }, { "epoch": 0.03814852492370295, "grad_norm": 0.0, "learning_rate": 1.9996526372979522e-05, "loss": 1.3751, "step": 975 }, { "epoch": 0.03818765161593239, "grad_norm": 0.0, "learning_rate": 1.999649289438776e-05, "loss": 1.3842, "step": 976 }, { "epoch": 0.038226778308161825, "grad_norm": 0.0, "learning_rate": 1.9996459255264843e-05, "loss": 1.2668, "step": 977 }, { "epoch": 0.038265905000391266, "grad_norm": 0.0, "learning_rate": 1.999642545561132e-05, "loss": 1.346, "step": 978 }, { "epoch": 0.03830503169262071, "grad_norm": 0.0, "learning_rate": 1.999639149542773e-05, "loss": 1.1881, "step": 979 }, { "epoch": 0.03834415838485015, "grad_norm": 0.0, "learning_rate": 1.999635737471462e-05, "loss": 1.2051, "step": 980 }, { "epoch": 0.03838328507707958, "grad_norm": 0.0, "learning_rate": 1.9996323093472535e-05, "loss": 1.3511, "step": 981 }, { "epoch": 0.03842241176930902, "grad_norm": 0.0, "learning_rate": 1.9996288651702028e-05, "loss": 1.3239, "step": 982 }, { "epoch": 0.038461538461538464, "grad_norm": 0.0, "learning_rate": 1.999625404940365e-05, "loss": 1.2927, "step": 983 }, { "epoch": 0.0385006651537679, "grad_norm": 0.0, "learning_rate": 1.9996219286577957e-05, "loss": 1.2457, "step": 984 }, { "epoch": 0.03853979184599734, "grad_norm": 0.0, "learning_rate": 1.9996184363225512e-05, "loss": 1.4034, "step": 985 }, { "epoch": 0.03857891853822678, "grad_norm": 0.0, "learning_rate": 1.999614927934687e-05, "loss": 1.2393, "step": 986 }, { "epoch": 0.03861804523045622, "grad_norm": 0.0, "learning_rate": 1.9996114034942594e-05, "loss": 1.236, "step": 987 }, { "epoch": 0.038657171922685654, "grad_norm": 0.0, "learning_rate": 1.9996078630013253e-05, "loss": 1.3185, "step": 988 }, { "epoch": 0.038696298614915095, "grad_norm": 0.0, "learning_rate": 1.999604306455942e-05, "loss": 1.1909, "step": 989 }, { "epoch": 0.038735425307144536, "grad_norm": 0.0, "learning_rate": 1.9996007338581656e-05, "loss": 1.42, "step": 990 }, { "epoch": 0.03877455199937397, "grad_norm": 0.0, "learning_rate": 1.9995971452080543e-05, "loss": 1.3624, "step": 991 }, { "epoch": 0.03881367869160341, "grad_norm": 0.0, "learning_rate": 1.9995935405056653e-05, "loss": 1.3718, "step": 992 }, { "epoch": 0.03885280538383285, "grad_norm": 0.0, "learning_rate": 1.9995899197510567e-05, "loss": 1.3787, "step": 993 }, { "epoch": 0.03889193207606229, "grad_norm": 0.0, "learning_rate": 1.9995862829442864e-05, "loss": 1.3403, "step": 994 }, { "epoch": 0.038931058768291726, "grad_norm": 0.0, "learning_rate": 1.999582630085413e-05, "loss": 1.4604, "step": 995 }, { "epoch": 0.03897018546052117, "grad_norm": 0.0, "learning_rate": 1.9995789611744947e-05, "loss": 1.2492, "step": 996 }, { "epoch": 0.03900931215275061, "grad_norm": 0.0, "learning_rate": 1.9995752762115917e-05, "loss": 1.3782, "step": 997 }, { "epoch": 0.03904843884498004, "grad_norm": 0.0, "learning_rate": 1.9995715751967614e-05, "loss": 1.3665, "step": 998 }, { "epoch": 0.03908756553720948, "grad_norm": 0.0, "learning_rate": 1.999567858130065e-05, "loss": 1.3026, "step": 999 }, { "epoch": 0.039126692229438924, "grad_norm": 0.0, "learning_rate": 1.9995641250115606e-05, "loss": 1.3451, "step": 1000 }, { "epoch": 0.039165818921668365, "grad_norm": 0.0, "learning_rate": 1.999560375841309e-05, "loss": 1.3078, "step": 1001 }, { "epoch": 0.0392049456138978, "grad_norm": 0.0, "learning_rate": 1.9995566106193706e-05, "loss": 1.1863, "step": 1002 }, { "epoch": 0.03924407230612724, "grad_norm": 0.0, "learning_rate": 1.9995528293458056e-05, "loss": 1.293, "step": 1003 }, { "epoch": 0.03928319899835668, "grad_norm": 0.0, "learning_rate": 1.9995490320206743e-05, "loss": 1.3221, "step": 1004 }, { "epoch": 0.03932232569058612, "grad_norm": 0.0, "learning_rate": 1.9995452186440382e-05, "loss": 1.3593, "step": 1005 }, { "epoch": 0.039361452382815555, "grad_norm": 0.0, "learning_rate": 1.9995413892159587e-05, "loss": 1.2421, "step": 1006 }, { "epoch": 0.039400579075044996, "grad_norm": 0.0, "learning_rate": 1.9995375437364964e-05, "loss": 1.4045, "step": 1007 }, { "epoch": 0.03943970576727444, "grad_norm": 0.0, "learning_rate": 1.9995336822057137e-05, "loss": 1.2922, "step": 1008 }, { "epoch": 0.03947883245950387, "grad_norm": 0.0, "learning_rate": 1.999529804623673e-05, "loss": 1.3707, "step": 1009 }, { "epoch": 0.03951795915173331, "grad_norm": 0.0, "learning_rate": 1.999525910990436e-05, "loss": 1.267, "step": 1010 }, { "epoch": 0.03955708584396275, "grad_norm": 0.0, "learning_rate": 1.999522001306065e-05, "loss": 1.3087, "step": 1011 }, { "epoch": 0.039596212536192193, "grad_norm": 0.0, "learning_rate": 1.9995180755706234e-05, "loss": 1.3307, "step": 1012 }, { "epoch": 0.03963533922842163, "grad_norm": 0.0, "learning_rate": 1.999514133784174e-05, "loss": 1.4219, "step": 1013 }, { "epoch": 0.03967446592065107, "grad_norm": 0.0, "learning_rate": 1.99951017594678e-05, "loss": 1.3727, "step": 1014 }, { "epoch": 0.03971359261288051, "grad_norm": 0.0, "learning_rate": 1.999506202058505e-05, "loss": 1.2426, "step": 1015 }, { "epoch": 0.03975271930510994, "grad_norm": 0.0, "learning_rate": 1.999502212119413e-05, "loss": 1.2742, "step": 1016 }, { "epoch": 0.039791845997339384, "grad_norm": 0.0, "learning_rate": 1.9994982061295676e-05, "loss": 1.268, "step": 1017 }, { "epoch": 0.039830972689568825, "grad_norm": 0.0, "learning_rate": 1.9994941840890338e-05, "loss": 1.4445, "step": 1018 }, { "epoch": 0.039870099381798266, "grad_norm": 0.0, "learning_rate": 1.999490145997876e-05, "loss": 1.3713, "step": 1019 }, { "epoch": 0.0399092260740277, "grad_norm": 0.0, "learning_rate": 1.9994860918561584e-05, "loss": 1.2727, "step": 1020 }, { "epoch": 0.03994835276625714, "grad_norm": 0.0, "learning_rate": 1.999482021663947e-05, "loss": 1.1573, "step": 1021 }, { "epoch": 0.03998747945848658, "grad_norm": 0.0, "learning_rate": 1.999477935421306e-05, "loss": 1.2461, "step": 1022 }, { "epoch": 0.040026606150716015, "grad_norm": 0.0, "learning_rate": 1.9994738331283026e-05, "loss": 1.3088, "step": 1023 }, { "epoch": 0.040065732842945456, "grad_norm": 0.0, "learning_rate": 1.9994697147850016e-05, "loss": 1.3212, "step": 1024 }, { "epoch": 0.0401048595351749, "grad_norm": 0.0, "learning_rate": 1.999465580391469e-05, "loss": 1.1978, "step": 1025 }, { "epoch": 0.04014398622740434, "grad_norm": 0.0, "learning_rate": 1.9994614299477723e-05, "loss": 1.38, "step": 1026 }, { "epoch": 0.04018311291963377, "grad_norm": 0.0, "learning_rate": 1.9994572634539767e-05, "loss": 1.3429, "step": 1027 }, { "epoch": 0.04022223961186321, "grad_norm": 0.0, "learning_rate": 1.9994530809101503e-05, "loss": 1.3749, "step": 1028 }, { "epoch": 0.040261366304092654, "grad_norm": 0.0, "learning_rate": 1.99944888231636e-05, "loss": 1.319, "step": 1029 }, { "epoch": 0.04030049299632209, "grad_norm": 0.0, "learning_rate": 1.9994446676726723e-05, "loss": 1.3461, "step": 1030 }, { "epoch": 0.04033961968855153, "grad_norm": 0.0, "learning_rate": 1.9994404369791563e-05, "loss": 1.2871, "step": 1031 }, { "epoch": 0.04037874638078097, "grad_norm": 0.0, "learning_rate": 1.999436190235879e-05, "loss": 1.434, "step": 1032 }, { "epoch": 0.04041787307301041, "grad_norm": 0.0, "learning_rate": 1.9994319274429088e-05, "loss": 1.3204, "step": 1033 }, { "epoch": 0.040456999765239844, "grad_norm": 0.0, "learning_rate": 1.999427648600314e-05, "loss": 1.3728, "step": 1034 }, { "epoch": 0.040496126457469285, "grad_norm": 0.0, "learning_rate": 1.999423353708164e-05, "loss": 1.3203, "step": 1035 }, { "epoch": 0.040535253149698726, "grad_norm": 0.0, "learning_rate": 1.999419042766527e-05, "loss": 1.2211, "step": 1036 }, { "epoch": 0.04057437984192817, "grad_norm": 0.0, "learning_rate": 1.9994147157754727e-05, "loss": 1.2312, "step": 1037 }, { "epoch": 0.0406135065341576, "grad_norm": 0.0, "learning_rate": 1.9994103727350702e-05, "loss": 1.2343, "step": 1038 }, { "epoch": 0.04065263322638704, "grad_norm": 0.0, "learning_rate": 1.9994060136453894e-05, "loss": 1.1444, "step": 1039 }, { "epoch": 0.04069175991861648, "grad_norm": 0.0, "learning_rate": 1.9994016385065005e-05, "loss": 1.3528, "step": 1040 }, { "epoch": 0.040730886610845916, "grad_norm": 0.0, "learning_rate": 1.9993972473184736e-05, "loss": 1.4061, "step": 1041 }, { "epoch": 0.04077001330307536, "grad_norm": 0.0, "learning_rate": 1.999392840081379e-05, "loss": 1.2121, "step": 1042 }, { "epoch": 0.0408091399953048, "grad_norm": 0.0, "learning_rate": 1.999388416795288e-05, "loss": 1.2636, "step": 1043 }, { "epoch": 0.04084826668753424, "grad_norm": 0.0, "learning_rate": 1.999383977460271e-05, "loss": 1.3624, "step": 1044 }, { "epoch": 0.04088739337976367, "grad_norm": 0.0, "learning_rate": 1.9993795220763997e-05, "loss": 1.2493, "step": 1045 }, { "epoch": 0.040926520071993114, "grad_norm": 0.0, "learning_rate": 1.999375050643746e-05, "loss": 1.3727, "step": 1046 }, { "epoch": 0.040965646764222555, "grad_norm": 0.0, "learning_rate": 1.9993705631623807e-05, "loss": 1.2955, "step": 1047 }, { "epoch": 0.04100477345645199, "grad_norm": 0.0, "learning_rate": 1.999366059632377e-05, "loss": 1.3702, "step": 1048 }, { "epoch": 0.04104390014868143, "grad_norm": 0.0, "learning_rate": 1.999361540053806e-05, "loss": 1.2275, "step": 1049 }, { "epoch": 0.04108302684091087, "grad_norm": 0.0, "learning_rate": 1.9993570044267415e-05, "loss": 1.3579, "step": 1050 }, { "epoch": 0.04112215353314031, "grad_norm": 0.0, "learning_rate": 1.9993524527512556e-05, "loss": 1.3281, "step": 1051 }, { "epoch": 0.041161280225369745, "grad_norm": 0.0, "learning_rate": 1.9993478850274214e-05, "loss": 1.3505, "step": 1052 }, { "epoch": 0.041200406917599186, "grad_norm": 0.0, "learning_rate": 1.9993433012553128e-05, "loss": 1.1343, "step": 1053 }, { "epoch": 0.04123953360982863, "grad_norm": 0.0, "learning_rate": 1.9993387014350027e-05, "loss": 1.2709, "step": 1054 }, { "epoch": 0.04127866030205806, "grad_norm": 0.0, "learning_rate": 1.9993340855665656e-05, "loss": 1.3373, "step": 1055 }, { "epoch": 0.0413177869942875, "grad_norm": 0.0, "learning_rate": 1.999329453650075e-05, "loss": 1.2472, "step": 1056 }, { "epoch": 0.04135691368651694, "grad_norm": 0.0, "learning_rate": 1.9993248056856055e-05, "loss": 1.2744, "step": 1057 }, { "epoch": 0.041396040378746383, "grad_norm": 0.0, "learning_rate": 1.9993201416732322e-05, "loss": 1.4343, "step": 1058 }, { "epoch": 0.04143516707097582, "grad_norm": 0.0, "learning_rate": 1.9993154616130293e-05, "loss": 1.3251, "step": 1059 }, { "epoch": 0.04147429376320526, "grad_norm": 0.0, "learning_rate": 1.9993107655050727e-05, "loss": 1.2797, "step": 1060 }, { "epoch": 0.0415134204554347, "grad_norm": 0.0, "learning_rate": 1.999306053349437e-05, "loss": 1.2936, "step": 1061 }, { "epoch": 0.04155254714766413, "grad_norm": 0.0, "learning_rate": 1.9993013251461987e-05, "loss": 1.2452, "step": 1062 }, { "epoch": 0.041591673839893574, "grad_norm": 0.0, "learning_rate": 1.999296580895433e-05, "loss": 1.2907, "step": 1063 }, { "epoch": 0.041630800532123015, "grad_norm": 0.0, "learning_rate": 1.9992918205972164e-05, "loss": 1.255, "step": 1064 }, { "epoch": 0.041669927224352456, "grad_norm": 0.0, "learning_rate": 1.9992870442516257e-05, "loss": 1.2142, "step": 1065 }, { "epoch": 0.04170905391658189, "grad_norm": 0.0, "learning_rate": 1.999282251858737e-05, "loss": 1.3812, "step": 1066 }, { "epoch": 0.04174818060881133, "grad_norm": 0.0, "learning_rate": 1.9992774434186275e-05, "loss": 1.363, "step": 1067 }, { "epoch": 0.04178730730104077, "grad_norm": 0.0, "learning_rate": 1.999272618931374e-05, "loss": 1.1848, "step": 1068 }, { "epoch": 0.04182643399327021, "grad_norm": 0.0, "learning_rate": 1.999267778397055e-05, "loss": 1.2239, "step": 1069 }, { "epoch": 0.041865560685499646, "grad_norm": 0.0, "learning_rate": 1.9992629218157478e-05, "loss": 1.3157, "step": 1070 }, { "epoch": 0.04190468737772909, "grad_norm": 0.0, "learning_rate": 1.9992580491875296e-05, "loss": 1.2838, "step": 1071 }, { "epoch": 0.04194381406995853, "grad_norm": 0.0, "learning_rate": 1.99925316051248e-05, "loss": 1.3483, "step": 1072 }, { "epoch": 0.04198294076218796, "grad_norm": 0.0, "learning_rate": 1.999248255790676e-05, "loss": 1.2932, "step": 1073 }, { "epoch": 0.0420220674544174, "grad_norm": 0.0, "learning_rate": 1.9992433350221976e-05, "loss": 1.3293, "step": 1074 }, { "epoch": 0.042061194146646844, "grad_norm": 0.0, "learning_rate": 1.999238398207123e-05, "loss": 1.3144, "step": 1075 }, { "epoch": 0.042100320838876285, "grad_norm": 0.0, "learning_rate": 1.9992334453455322e-05, "loss": 1.3781, "step": 1076 }, { "epoch": 0.04213944753110572, "grad_norm": 0.0, "learning_rate": 1.9992284764375042e-05, "loss": 1.3487, "step": 1077 }, { "epoch": 0.04217857422333516, "grad_norm": 0.0, "learning_rate": 1.999223491483119e-05, "loss": 1.2717, "step": 1078 }, { "epoch": 0.0422177009155646, "grad_norm": 0.0, "learning_rate": 1.9992184904824566e-05, "loss": 1.4091, "step": 1079 }, { "epoch": 0.042256827607794034, "grad_norm": 0.0, "learning_rate": 1.9992134734355974e-05, "loss": 1.0168, "step": 1080 }, { "epoch": 0.042295954300023475, "grad_norm": 0.0, "learning_rate": 1.999208440342622e-05, "loss": 1.1735, "step": 1081 }, { "epoch": 0.042335080992252916, "grad_norm": 0.0, "learning_rate": 1.999203391203611e-05, "loss": 1.256, "step": 1082 }, { "epoch": 0.04237420768448236, "grad_norm": 0.0, "learning_rate": 1.9991983260186452e-05, "loss": 1.3036, "step": 1083 }, { "epoch": 0.04241333437671179, "grad_norm": 0.0, "learning_rate": 1.999193244787807e-05, "loss": 1.2602, "step": 1084 }, { "epoch": 0.04245246106894123, "grad_norm": 0.0, "learning_rate": 1.9991881475111773e-05, "loss": 1.4188, "step": 1085 }, { "epoch": 0.04249158776117067, "grad_norm": 0.0, "learning_rate": 1.9991830341888375e-05, "loss": 1.3242, "step": 1086 }, { "epoch": 0.042530714453400106, "grad_norm": 0.0, "learning_rate": 1.9991779048208707e-05, "loss": 1.3475, "step": 1087 }, { "epoch": 0.04256984114562955, "grad_norm": 0.0, "learning_rate": 1.9991727594073585e-05, "loss": 1.2897, "step": 1088 }, { "epoch": 0.04260896783785899, "grad_norm": 0.0, "learning_rate": 1.9991675979483844e-05, "loss": 1.3208, "step": 1089 }, { "epoch": 0.04264809453008843, "grad_norm": 0.0, "learning_rate": 1.99916242044403e-05, "loss": 1.4097, "step": 1090 }, { "epoch": 0.04268722122231786, "grad_norm": 0.0, "learning_rate": 1.9991572268943793e-05, "loss": 1.3672, "step": 1091 }, { "epoch": 0.042726347914547304, "grad_norm": 0.0, "learning_rate": 1.9991520172995158e-05, "loss": 1.2902, "step": 1092 }, { "epoch": 0.042765474606776745, "grad_norm": 0.0, "learning_rate": 1.9991467916595226e-05, "loss": 1.1862, "step": 1093 }, { "epoch": 0.04280460129900618, "grad_norm": 0.0, "learning_rate": 1.9991415499744842e-05, "loss": 1.2642, "step": 1094 }, { "epoch": 0.04284372799123562, "grad_norm": 0.0, "learning_rate": 1.9991362922444842e-05, "loss": 1.2922, "step": 1095 }, { "epoch": 0.04288285468346506, "grad_norm": 0.0, "learning_rate": 1.9991310184696076e-05, "loss": 1.2566, "step": 1096 }, { "epoch": 0.0429219813756945, "grad_norm": 0.0, "learning_rate": 1.9991257286499386e-05, "loss": 1.1525, "step": 1097 }, { "epoch": 0.042961108067923935, "grad_norm": 0.0, "learning_rate": 1.9991204227855627e-05, "loss": 1.2256, "step": 1098 }, { "epoch": 0.043000234760153376, "grad_norm": 0.0, "learning_rate": 1.9991151008765644e-05, "loss": 1.4133, "step": 1099 }, { "epoch": 0.04303936145238282, "grad_norm": 0.0, "learning_rate": 1.9991097629230298e-05, "loss": 1.1948, "step": 1100 }, { "epoch": 0.04307848814461226, "grad_norm": 0.0, "learning_rate": 1.999104408925044e-05, "loss": 1.5092, "step": 1101 }, { "epoch": 0.04311761483684169, "grad_norm": 0.0, "learning_rate": 1.999099038882694e-05, "loss": 1.2379, "step": 1102 }, { "epoch": 0.04315674152907113, "grad_norm": 0.0, "learning_rate": 1.999093652796065e-05, "loss": 1.2388, "step": 1103 }, { "epoch": 0.04319586822130057, "grad_norm": 0.0, "learning_rate": 1.999088250665244e-05, "loss": 1.4178, "step": 1104 }, { "epoch": 0.04323499491353001, "grad_norm": 0.0, "learning_rate": 1.999082832490317e-05, "loss": 1.2202, "step": 1105 }, { "epoch": 0.04327412160575945, "grad_norm": 0.0, "learning_rate": 1.9990773982713725e-05, "loss": 1.1854, "step": 1106 }, { "epoch": 0.04331324829798889, "grad_norm": 0.0, "learning_rate": 1.9990719480084966e-05, "loss": 1.3325, "step": 1107 }, { "epoch": 0.04335237499021833, "grad_norm": 0.0, "learning_rate": 1.999066481701777e-05, "loss": 1.2999, "step": 1108 }, { "epoch": 0.043391501682447764, "grad_norm": 0.0, "learning_rate": 1.9990609993513014e-05, "loss": 1.3221, "step": 1109 }, { "epoch": 0.043430628374677205, "grad_norm": 0.0, "learning_rate": 1.9990555009571582e-05, "loss": 1.3391, "step": 1110 }, { "epoch": 0.043469755066906646, "grad_norm": 0.0, "learning_rate": 1.999049986519436e-05, "loss": 1.2958, "step": 1111 }, { "epoch": 0.04350888175913608, "grad_norm": 0.0, "learning_rate": 1.9990444560382224e-05, "loss": 1.2324, "step": 1112 }, { "epoch": 0.04354800845136552, "grad_norm": 0.0, "learning_rate": 1.9990389095136068e-05, "loss": 1.3754, "step": 1113 }, { "epoch": 0.04358713514359496, "grad_norm": 0.0, "learning_rate": 1.9990333469456784e-05, "loss": 1.3578, "step": 1114 }, { "epoch": 0.0436262618358244, "grad_norm": 0.0, "learning_rate": 1.999027768334526e-05, "loss": 1.272, "step": 1115 }, { "epoch": 0.043665388528053836, "grad_norm": 0.0, "learning_rate": 1.9990221736802398e-05, "loss": 1.1941, "step": 1116 }, { "epoch": 0.04370451522028328, "grad_norm": 0.0, "learning_rate": 1.999016562982909e-05, "loss": 1.4199, "step": 1117 }, { "epoch": 0.04374364191251272, "grad_norm": 0.0, "learning_rate": 1.9990109362426243e-05, "loss": 1.3228, "step": 1118 }, { "epoch": 0.04378276860474215, "grad_norm": 0.0, "learning_rate": 1.9990052934594753e-05, "loss": 1.247, "step": 1119 }, { "epoch": 0.04382189529697159, "grad_norm": 0.0, "learning_rate": 1.998999634633554e-05, "loss": 1.2927, "step": 1120 }, { "epoch": 0.043861021989201034, "grad_norm": 0.0, "learning_rate": 1.9989939597649497e-05, "loss": 1.4026, "step": 1121 }, { "epoch": 0.043900148681430474, "grad_norm": 0.0, "learning_rate": 1.9989882688537542e-05, "loss": 1.1652, "step": 1122 }, { "epoch": 0.04393927537365991, "grad_norm": 0.0, "learning_rate": 1.9989825619000593e-05, "loss": 1.3982, "step": 1123 }, { "epoch": 0.04397840206588935, "grad_norm": 0.0, "learning_rate": 1.998976838903956e-05, "loss": 1.2191, "step": 1124 }, { "epoch": 0.04401752875811879, "grad_norm": 0.0, "learning_rate": 1.9989710998655365e-05, "loss": 1.3917, "step": 1125 }, { "epoch": 0.044056655450348224, "grad_norm": 0.0, "learning_rate": 1.998965344784893e-05, "loss": 1.4061, "step": 1126 }, { "epoch": 0.044095782142577665, "grad_norm": 0.0, "learning_rate": 1.9989595736621178e-05, "loss": 1.2428, "step": 1127 }, { "epoch": 0.044134908834807106, "grad_norm": 0.0, "learning_rate": 1.9989537864973037e-05, "loss": 1.2302, "step": 1128 }, { "epoch": 0.04417403552703655, "grad_norm": 0.0, "learning_rate": 1.9989479832905432e-05, "loss": 1.1662, "step": 1129 }, { "epoch": 0.04421316221926598, "grad_norm": 0.0, "learning_rate": 1.99894216404193e-05, "loss": 1.3441, "step": 1130 }, { "epoch": 0.04425228891149542, "grad_norm": 0.0, "learning_rate": 1.9989363287515577e-05, "loss": 1.3217, "step": 1131 }, { "epoch": 0.04429141560372486, "grad_norm": 0.0, "learning_rate": 1.9989304774195194e-05, "loss": 1.1744, "step": 1132 }, { "epoch": 0.0443305422959543, "grad_norm": 0.0, "learning_rate": 1.998924610045909e-05, "loss": 1.2409, "step": 1133 }, { "epoch": 0.04436966898818374, "grad_norm": 0.0, "learning_rate": 1.998918726630822e-05, "loss": 1.3866, "step": 1134 }, { "epoch": 0.04440879568041318, "grad_norm": 0.0, "learning_rate": 1.9989128271743512e-05, "loss": 1.2281, "step": 1135 }, { "epoch": 0.04444792237264262, "grad_norm": 0.0, "learning_rate": 1.9989069116765924e-05, "loss": 1.3711, "step": 1136 }, { "epoch": 0.04448704906487205, "grad_norm": 0.0, "learning_rate": 1.9989009801376403e-05, "loss": 1.3267, "step": 1137 }, { "epoch": 0.044526175757101494, "grad_norm": 0.0, "learning_rate": 1.99889503255759e-05, "loss": 1.4145, "step": 1138 }, { "epoch": 0.044565302449330935, "grad_norm": 0.0, "learning_rate": 1.9988890689365374e-05, "loss": 1.3125, "step": 1139 }, { "epoch": 0.044604429141560376, "grad_norm": 0.0, "learning_rate": 1.998883089274578e-05, "loss": 1.3785, "step": 1140 }, { "epoch": 0.04464355583378981, "grad_norm": 0.0, "learning_rate": 1.9988770935718075e-05, "loss": 1.1849, "step": 1141 }, { "epoch": 0.04468268252601925, "grad_norm": 0.0, "learning_rate": 1.998871081828323e-05, "loss": 1.2139, "step": 1142 }, { "epoch": 0.04472180921824869, "grad_norm": 0.0, "learning_rate": 1.9988650540442207e-05, "loss": 1.2544, "step": 1143 }, { "epoch": 0.044760935910478125, "grad_norm": 0.0, "learning_rate": 1.9988590102195968e-05, "loss": 1.1888, "step": 1144 }, { "epoch": 0.044800062602707566, "grad_norm": 0.0, "learning_rate": 1.9988529503545488e-05, "loss": 1.267, "step": 1145 }, { "epoch": 0.04483918929493701, "grad_norm": 0.0, "learning_rate": 1.9988468744491744e-05, "loss": 1.1844, "step": 1146 }, { "epoch": 0.04487831598716645, "grad_norm": 0.0, "learning_rate": 1.9988407825035704e-05, "loss": 1.2526, "step": 1147 }, { "epoch": 0.04491744267939588, "grad_norm": 0.0, "learning_rate": 1.9988346745178356e-05, "loss": 1.2491, "step": 1148 }, { "epoch": 0.04495656937162532, "grad_norm": 0.0, "learning_rate": 1.9988285504920672e-05, "loss": 1.25, "step": 1149 }, { "epoch": 0.04499569606385476, "grad_norm": 0.0, "learning_rate": 1.9988224104263642e-05, "loss": 1.3207, "step": 1150 }, { "epoch": 0.0450348227560842, "grad_norm": 0.0, "learning_rate": 1.9988162543208245e-05, "loss": 1.172, "step": 1151 }, { "epoch": 0.04507394944831364, "grad_norm": 0.0, "learning_rate": 1.9988100821755474e-05, "loss": 1.3838, "step": 1152 }, { "epoch": 0.04511307614054308, "grad_norm": 0.0, "learning_rate": 1.998803893990632e-05, "loss": 1.2557, "step": 1153 }, { "epoch": 0.04515220283277252, "grad_norm": 0.0, "learning_rate": 1.9987976897661777e-05, "loss": 1.2975, "step": 1154 }, { "epoch": 0.045191329525001954, "grad_norm": 0.0, "learning_rate": 1.998791469502284e-05, "loss": 1.3393, "step": 1155 }, { "epoch": 0.045230456217231395, "grad_norm": 0.0, "learning_rate": 1.998785233199051e-05, "loss": 1.2532, "step": 1156 }, { "epoch": 0.045269582909460836, "grad_norm": 0.0, "learning_rate": 1.9987789808565785e-05, "loss": 1.3035, "step": 1157 }, { "epoch": 0.04530870960169028, "grad_norm": 0.0, "learning_rate": 1.9987727124749673e-05, "loss": 1.4033, "step": 1158 }, { "epoch": 0.04534783629391971, "grad_norm": 0.0, "learning_rate": 1.9987664280543177e-05, "loss": 1.2786, "step": 1159 }, { "epoch": 0.04538696298614915, "grad_norm": 0.0, "learning_rate": 1.998760127594731e-05, "loss": 1.312, "step": 1160 }, { "epoch": 0.04542608967837859, "grad_norm": 0.0, "learning_rate": 1.998753811096308e-05, "loss": 1.2175, "step": 1161 }, { "epoch": 0.045465216370608026, "grad_norm": 0.0, "learning_rate": 1.9987474785591502e-05, "loss": 1.4963, "step": 1162 }, { "epoch": 0.04550434306283747, "grad_norm": 0.0, "learning_rate": 1.9987411299833598e-05, "loss": 1.2417, "step": 1163 }, { "epoch": 0.04554346975506691, "grad_norm": 0.0, "learning_rate": 1.998734765369038e-05, "loss": 1.2422, "step": 1164 }, { "epoch": 0.04558259644729635, "grad_norm": 0.0, "learning_rate": 1.9987283847162873e-05, "loss": 1.2487, "step": 1165 }, { "epoch": 0.04562172313952578, "grad_norm": 0.0, "learning_rate": 1.99872198802521e-05, "loss": 1.2296, "step": 1166 }, { "epoch": 0.045660849831755224, "grad_norm": 0.0, "learning_rate": 1.9987155752959094e-05, "loss": 1.3336, "step": 1167 }, { "epoch": 0.045699976523984664, "grad_norm": 0.0, "learning_rate": 1.9987091465284884e-05, "loss": 1.3399, "step": 1168 }, { "epoch": 0.0457391032162141, "grad_norm": 0.0, "learning_rate": 1.9987027017230497e-05, "loss": 1.3513, "step": 1169 }, { "epoch": 0.04577822990844354, "grad_norm": 0.0, "learning_rate": 1.9986962408796972e-05, "loss": 1.3708, "step": 1170 }, { "epoch": 0.04581735660067298, "grad_norm": 0.0, "learning_rate": 1.998689763998534e-05, "loss": 1.3723, "step": 1171 }, { "epoch": 0.04585648329290242, "grad_norm": 0.0, "learning_rate": 1.998683271079665e-05, "loss": 1.2317, "step": 1172 }, { "epoch": 0.045895609985131855, "grad_norm": 0.0, "learning_rate": 1.998676762123194e-05, "loss": 1.4271, "step": 1173 }, { "epoch": 0.045934736677361296, "grad_norm": 0.0, "learning_rate": 1.9986702371292256e-05, "loss": 1.4222, "step": 1174 }, { "epoch": 0.04597386336959074, "grad_norm": 0.0, "learning_rate": 1.9986636960978646e-05, "loss": 1.163, "step": 1175 }, { "epoch": 0.04601299006182017, "grad_norm": 0.0, "learning_rate": 1.9986571390292162e-05, "loss": 1.425, "step": 1176 }, { "epoch": 0.04605211675404961, "grad_norm": 0.0, "learning_rate": 1.9986505659233853e-05, "loss": 1.3381, "step": 1177 }, { "epoch": 0.04609124344627905, "grad_norm": 0.0, "learning_rate": 1.998643976780478e-05, "loss": 1.3591, "step": 1178 }, { "epoch": 0.04613037013850849, "grad_norm": 0.0, "learning_rate": 1.9986373716005995e-05, "loss": 1.3948, "step": 1179 }, { "epoch": 0.04616949683073793, "grad_norm": 0.0, "learning_rate": 1.9986307503838563e-05, "loss": 1.3328, "step": 1180 }, { "epoch": 0.04620862352296737, "grad_norm": 0.0, "learning_rate": 1.9986241131303545e-05, "loss": 1.2915, "step": 1181 }, { "epoch": 0.04624775021519681, "grad_norm": 0.0, "learning_rate": 1.9986174598402012e-05, "loss": 1.2141, "step": 1182 }, { "epoch": 0.04628687690742624, "grad_norm": 0.0, "learning_rate": 1.998610790513502e-05, "loss": 1.319, "step": 1183 }, { "epoch": 0.046326003599655684, "grad_norm": 0.0, "learning_rate": 1.9986041051503656e-05, "loss": 1.2749, "step": 1184 }, { "epoch": 0.046365130291885125, "grad_norm": 0.0, "learning_rate": 1.9985974037508984e-05, "loss": 1.2271, "step": 1185 }, { "epoch": 0.046404256984114566, "grad_norm": 0.0, "learning_rate": 1.998590686315208e-05, "loss": 1.1765, "step": 1186 }, { "epoch": 0.046443383676344, "grad_norm": 0.0, "learning_rate": 1.9985839528434024e-05, "loss": 1.4022, "step": 1187 }, { "epoch": 0.04648251036857344, "grad_norm": 0.0, "learning_rate": 1.9985772033355904e-05, "loss": 1.2337, "step": 1188 }, { "epoch": 0.04652163706080288, "grad_norm": 0.0, "learning_rate": 1.9985704377918794e-05, "loss": 1.3912, "step": 1189 }, { "epoch": 0.04656076375303232, "grad_norm": 0.0, "learning_rate": 1.9985636562123782e-05, "loss": 1.2811, "step": 1190 }, { "epoch": 0.046599890445261756, "grad_norm": 0.0, "learning_rate": 1.9985568585971965e-05, "loss": 1.2588, "step": 1191 }, { "epoch": 0.0466390171374912, "grad_norm": 0.0, "learning_rate": 1.9985500449464427e-05, "loss": 1.3692, "step": 1192 }, { "epoch": 0.04667814382972064, "grad_norm": 0.0, "learning_rate": 1.9985432152602263e-05, "loss": 1.2274, "step": 1193 }, { "epoch": 0.04671727052195007, "grad_norm": 0.0, "learning_rate": 1.9985363695386574e-05, "loss": 1.3125, "step": 1194 }, { "epoch": 0.04675639721417951, "grad_norm": 0.0, "learning_rate": 1.998529507781845e-05, "loss": 1.3956, "step": 1195 }, { "epoch": 0.04679552390640895, "grad_norm": 0.0, "learning_rate": 1.9985226299899006e-05, "loss": 1.3129, "step": 1196 }, { "epoch": 0.046834650598638394, "grad_norm": 0.0, "learning_rate": 1.9985157361629338e-05, "loss": 1.3396, "step": 1197 }, { "epoch": 0.04687377729086783, "grad_norm": 0.0, "learning_rate": 1.9985088263010554e-05, "loss": 1.2857, "step": 1198 }, { "epoch": 0.04691290398309727, "grad_norm": 0.0, "learning_rate": 1.998501900404376e-05, "loss": 1.3507, "step": 1199 }, { "epoch": 0.04695203067532671, "grad_norm": 0.0, "learning_rate": 1.9984949584730082e-05, "loss": 1.3057, "step": 1200 }, { "epoch": 0.046991157367556144, "grad_norm": 0.0, "learning_rate": 1.998488000507062e-05, "loss": 1.3711, "step": 1201 }, { "epoch": 0.047030284059785585, "grad_norm": 0.0, "learning_rate": 1.99848102650665e-05, "loss": 1.2334, "step": 1202 }, { "epoch": 0.047069410752015026, "grad_norm": 0.0, "learning_rate": 1.9984740364718833e-05, "loss": 1.4353, "step": 1203 }, { "epoch": 0.04710853744424447, "grad_norm": 0.0, "learning_rate": 1.998467030402875e-05, "loss": 1.3066, "step": 1204 }, { "epoch": 0.0471476641364739, "grad_norm": 0.0, "learning_rate": 1.9984600082997376e-05, "loss": 1.2462, "step": 1205 }, { "epoch": 0.04718679082870334, "grad_norm": 0.0, "learning_rate": 1.9984529701625838e-05, "loss": 1.4004, "step": 1206 }, { "epoch": 0.04722591752093278, "grad_norm": 0.0, "learning_rate": 1.9984459159915256e-05, "loss": 1.2304, "step": 1207 }, { "epoch": 0.047265044213162216, "grad_norm": 0.0, "learning_rate": 1.998438845786678e-05, "loss": 1.2768, "step": 1208 }, { "epoch": 0.04730417090539166, "grad_norm": 0.0, "learning_rate": 1.998431759548153e-05, "loss": 1.1823, "step": 1209 }, { "epoch": 0.0473432975976211, "grad_norm": 0.0, "learning_rate": 1.9984246572760653e-05, "loss": 1.3138, "step": 1210 }, { "epoch": 0.04738242428985054, "grad_norm": 0.0, "learning_rate": 1.9984175389705286e-05, "loss": 1.226, "step": 1211 }, { "epoch": 0.04742155098207997, "grad_norm": 0.0, "learning_rate": 1.9984104046316576e-05, "loss": 1.2095, "step": 1212 }, { "epoch": 0.047460677674309414, "grad_norm": 0.0, "learning_rate": 1.9984032542595663e-05, "loss": 1.3292, "step": 1213 }, { "epoch": 0.047499804366538854, "grad_norm": 0.0, "learning_rate": 1.99839608785437e-05, "loss": 1.2545, "step": 1214 }, { "epoch": 0.04753893105876829, "grad_norm": 0.0, "learning_rate": 1.9983889054161835e-05, "loss": 1.3263, "step": 1215 }, { "epoch": 0.04757805775099773, "grad_norm": 0.0, "learning_rate": 1.9983817069451223e-05, "loss": 1.3216, "step": 1216 }, { "epoch": 0.04761718444322717, "grad_norm": 0.0, "learning_rate": 1.998374492441302e-05, "loss": 1.2236, "step": 1217 }, { "epoch": 0.04765631113545661, "grad_norm": 0.0, "learning_rate": 1.998367261904838e-05, "loss": 1.3915, "step": 1218 }, { "epoch": 0.047695437827686045, "grad_norm": 0.0, "learning_rate": 1.998360015335847e-05, "loss": 1.2211, "step": 1219 }, { "epoch": 0.047734564519915486, "grad_norm": 0.0, "learning_rate": 1.998352752734445e-05, "loss": 1.1931, "step": 1220 }, { "epoch": 0.04777369121214493, "grad_norm": 0.0, "learning_rate": 1.9983454741007492e-05, "loss": 1.2599, "step": 1221 }, { "epoch": 0.04781281790437437, "grad_norm": 0.0, "learning_rate": 1.998338179434876e-05, "loss": 1.2902, "step": 1222 }, { "epoch": 0.0478519445966038, "grad_norm": 0.0, "learning_rate": 1.9983308687369423e-05, "loss": 1.2902, "step": 1223 }, { "epoch": 0.04789107128883324, "grad_norm": 0.0, "learning_rate": 1.998323542007066e-05, "loss": 1.3695, "step": 1224 }, { "epoch": 0.04793019798106268, "grad_norm": 0.0, "learning_rate": 1.9983161992453646e-05, "loss": 1.274, "step": 1225 }, { "epoch": 0.04796932467329212, "grad_norm": 0.0, "learning_rate": 1.998308840451956e-05, "loss": 1.16, "step": 1226 }, { "epoch": 0.04800845136552156, "grad_norm": 0.0, "learning_rate": 1.9983014656269584e-05, "loss": 1.2758, "step": 1227 }, { "epoch": 0.048047578057751, "grad_norm": 0.0, "learning_rate": 1.99829407477049e-05, "loss": 1.2446, "step": 1228 }, { "epoch": 0.04808670474998044, "grad_norm": 0.0, "learning_rate": 1.99828666788267e-05, "loss": 1.3047, "step": 1229 }, { "epoch": 0.048125831442209874, "grad_norm": 0.0, "learning_rate": 1.9982792449636167e-05, "loss": 1.2758, "step": 1230 }, { "epoch": 0.048164958134439315, "grad_norm": 0.0, "learning_rate": 1.99827180601345e-05, "loss": 1.1571, "step": 1231 }, { "epoch": 0.048204084826668755, "grad_norm": 0.0, "learning_rate": 1.998264351032289e-05, "loss": 1.304, "step": 1232 }, { "epoch": 0.04824321151889819, "grad_norm": 0.0, "learning_rate": 1.9982568800202532e-05, "loss": 1.1532, "step": 1233 }, { "epoch": 0.04828233821112763, "grad_norm": 0.0, "learning_rate": 1.9982493929774627e-05, "loss": 1.2505, "step": 1234 }, { "epoch": 0.04832146490335707, "grad_norm": 0.0, "learning_rate": 1.998241889904038e-05, "loss": 1.3256, "step": 1235 }, { "epoch": 0.04836059159558651, "grad_norm": 0.0, "learning_rate": 1.9982343708000994e-05, "loss": 1.297, "step": 1236 }, { "epoch": 0.048399718287815946, "grad_norm": 0.0, "learning_rate": 1.9982268356657675e-05, "loss": 1.4338, "step": 1237 }, { "epoch": 0.04843884498004539, "grad_norm": 0.0, "learning_rate": 1.998219284501164e-05, "loss": 1.2986, "step": 1238 }, { "epoch": 0.04847797167227483, "grad_norm": 0.0, "learning_rate": 1.998211717306409e-05, "loss": 1.2132, "step": 1239 }, { "epoch": 0.04851709836450426, "grad_norm": 0.0, "learning_rate": 1.998204134081625e-05, "loss": 1.3682, "step": 1240 }, { "epoch": 0.0485562250567337, "grad_norm": 0.0, "learning_rate": 1.9981965348269333e-05, "loss": 1.2611, "step": 1241 }, { "epoch": 0.04859535174896314, "grad_norm": 0.0, "learning_rate": 1.998188919542456e-05, "loss": 1.3163, "step": 1242 }, { "epoch": 0.048634478441192584, "grad_norm": 0.0, "learning_rate": 1.9981812882283157e-05, "loss": 1.2839, "step": 1243 }, { "epoch": 0.04867360513342202, "grad_norm": 0.0, "learning_rate": 1.9981736408846345e-05, "loss": 1.3353, "step": 1244 }, { "epoch": 0.04871273182565146, "grad_norm": 0.0, "learning_rate": 1.9981659775115357e-05, "loss": 1.301, "step": 1245 }, { "epoch": 0.0487518585178809, "grad_norm": 0.0, "learning_rate": 1.9981582981091418e-05, "loss": 1.3343, "step": 1246 }, { "epoch": 0.048790985210110334, "grad_norm": 0.0, "learning_rate": 1.9981506026775763e-05, "loss": 1.2767, "step": 1247 }, { "epoch": 0.048830111902339775, "grad_norm": 0.0, "learning_rate": 1.998142891216963e-05, "loss": 1.2328, "step": 1248 }, { "epoch": 0.048869238594569216, "grad_norm": 0.0, "learning_rate": 1.9981351637274254e-05, "loss": 1.3132, "step": 1249 }, { "epoch": 0.048908365286798657, "grad_norm": 0.0, "learning_rate": 1.9981274202090884e-05, "loss": 1.2146, "step": 1250 }, { "epoch": 0.04894749197902809, "grad_norm": 0.0, "learning_rate": 1.9981196606620755e-05, "loss": 1.2127, "step": 1251 }, { "epoch": 0.04898661867125753, "grad_norm": 0.0, "learning_rate": 1.998111885086511e-05, "loss": 1.238, "step": 1252 }, { "epoch": 0.04902574536348697, "grad_norm": 0.0, "learning_rate": 1.9981040934825212e-05, "loss": 1.3955, "step": 1253 }, { "epoch": 0.04906487205571641, "grad_norm": 0.0, "learning_rate": 1.99809628585023e-05, "loss": 1.2358, "step": 1254 }, { "epoch": 0.04910399874794585, "grad_norm": 0.0, "learning_rate": 1.9980884621897627e-05, "loss": 1.3937, "step": 1255 }, { "epoch": 0.04914312544017529, "grad_norm": 0.0, "learning_rate": 1.9980806225012456e-05, "loss": 1.4086, "step": 1256 }, { "epoch": 0.04918225213240473, "grad_norm": 0.0, "learning_rate": 1.9980727667848047e-05, "loss": 1.3369, "step": 1257 }, { "epoch": 0.04922137882463416, "grad_norm": 0.0, "learning_rate": 1.9980648950405656e-05, "loss": 1.2438, "step": 1258 }, { "epoch": 0.049260505516863604, "grad_norm": 0.0, "learning_rate": 1.9980570072686548e-05, "loss": 1.3087, "step": 1259 }, { "epoch": 0.049299632209093044, "grad_norm": 0.0, "learning_rate": 1.9980491034691994e-05, "loss": 1.2571, "step": 1260 }, { "epoch": 0.049338758901322485, "grad_norm": 0.0, "learning_rate": 1.9980411836423256e-05, "loss": 1.3216, "step": 1261 }, { "epoch": 0.04937788559355192, "grad_norm": 0.0, "learning_rate": 1.9980332477881613e-05, "loss": 1.2319, "step": 1262 }, { "epoch": 0.04941701228578136, "grad_norm": 0.0, "learning_rate": 1.998025295906834e-05, "loss": 1.1716, "step": 1263 }, { "epoch": 0.0494561389780108, "grad_norm": 0.0, "learning_rate": 1.9980173279984706e-05, "loss": 1.3242, "step": 1264 }, { "epoch": 0.049495265670240235, "grad_norm": 0.0, "learning_rate": 1.9980093440631997e-05, "loss": 1.1636, "step": 1265 }, { "epoch": 0.049534392362469676, "grad_norm": 0.0, "learning_rate": 1.998001344101149e-05, "loss": 1.2932, "step": 1266 }, { "epoch": 0.04957351905469912, "grad_norm": 0.0, "learning_rate": 1.9979933281124474e-05, "loss": 1.3546, "step": 1267 }, { "epoch": 0.04961264574692856, "grad_norm": 0.0, "learning_rate": 1.9979852960972235e-05, "loss": 1.2999, "step": 1268 }, { "epoch": 0.04965177243915799, "grad_norm": 0.0, "learning_rate": 1.9979772480556063e-05, "loss": 1.3027, "step": 1269 }, { "epoch": 0.04969089913138743, "grad_norm": 0.0, "learning_rate": 1.997969183987725e-05, "loss": 1.3887, "step": 1270 }, { "epoch": 0.04973002582361687, "grad_norm": 0.0, "learning_rate": 1.9979611038937096e-05, "loss": 1.302, "step": 1271 }, { "epoch": 0.04976915251584631, "grad_norm": 0.0, "learning_rate": 1.997953007773689e-05, "loss": 1.2841, "step": 1272 }, { "epoch": 0.04980827920807575, "grad_norm": 0.0, "learning_rate": 1.9979448956277932e-05, "loss": 1.3502, "step": 1273 }, { "epoch": 0.04984740590030519, "grad_norm": 0.0, "learning_rate": 1.9979367674561535e-05, "loss": 1.2405, "step": 1274 }, { "epoch": 0.04988653259253463, "grad_norm": 0.0, "learning_rate": 1.9979286232588995e-05, "loss": 1.228, "step": 1275 }, { "epoch": 0.049925659284764064, "grad_norm": 0.0, "learning_rate": 1.9979204630361622e-05, "loss": 1.3185, "step": 1276 }, { "epoch": 0.049964785976993505, "grad_norm": 0.0, "learning_rate": 1.997912286788073e-05, "loss": 1.3049, "step": 1277 }, { "epoch": 0.050003912669222945, "grad_norm": 0.0, "learning_rate": 1.997904094514763e-05, "loss": 1.3333, "step": 1278 }, { "epoch": 0.05004303936145238, "grad_norm": 0.0, "learning_rate": 1.9978958862163634e-05, "loss": 1.3312, "step": 1279 }, { "epoch": 0.05008216605368182, "grad_norm": 0.0, "learning_rate": 1.997887661893006e-05, "loss": 1.2845, "step": 1280 }, { "epoch": 0.05012129274591126, "grad_norm": 0.0, "learning_rate": 1.9978794215448237e-05, "loss": 1.2443, "step": 1281 }, { "epoch": 0.0501604194381407, "grad_norm": 0.0, "learning_rate": 1.997871165171948e-05, "loss": 1.3696, "step": 1282 }, { "epoch": 0.050199546130370136, "grad_norm": 0.0, "learning_rate": 1.9978628927745123e-05, "loss": 1.2037, "step": 1283 }, { "epoch": 0.05023867282259958, "grad_norm": 0.0, "learning_rate": 1.9978546043526487e-05, "loss": 1.1984, "step": 1284 }, { "epoch": 0.05027779951482902, "grad_norm": 0.0, "learning_rate": 1.9978462999064903e-05, "loss": 1.3674, "step": 1285 }, { "epoch": 0.05031692620705846, "grad_norm": 0.0, "learning_rate": 1.997837979436171e-05, "loss": 1.3292, "step": 1286 }, { "epoch": 0.05035605289928789, "grad_norm": 0.0, "learning_rate": 1.9978296429418237e-05, "loss": 1.3183, "step": 1287 }, { "epoch": 0.05039517959151733, "grad_norm": 0.0, "learning_rate": 1.997821290423583e-05, "loss": 1.2584, "step": 1288 }, { "epoch": 0.050434306283746774, "grad_norm": 0.0, "learning_rate": 1.9978129218815824e-05, "loss": 1.3247, "step": 1289 }, { "epoch": 0.05047343297597621, "grad_norm": 0.0, "learning_rate": 1.9978045373159573e-05, "loss": 1.2837, "step": 1290 }, { "epoch": 0.05051255966820565, "grad_norm": 0.0, "learning_rate": 1.9977961367268408e-05, "loss": 1.1142, "step": 1291 }, { "epoch": 0.05055168636043509, "grad_norm": 0.0, "learning_rate": 1.9977877201143692e-05, "loss": 1.2507, "step": 1292 }, { "epoch": 0.05059081305266453, "grad_norm": 0.0, "learning_rate": 1.997779287478677e-05, "loss": 1.322, "step": 1293 }, { "epoch": 0.050629939744893965, "grad_norm": 0.0, "learning_rate": 1.9977708388198997e-05, "loss": 1.2856, "step": 1294 }, { "epoch": 0.050669066437123406, "grad_norm": 0.0, "learning_rate": 1.9977623741381728e-05, "loss": 1.2303, "step": 1295 }, { "epoch": 0.050708193129352847, "grad_norm": 0.0, "learning_rate": 1.997753893433633e-05, "loss": 1.2891, "step": 1296 }, { "epoch": 0.05074731982158228, "grad_norm": 0.0, "learning_rate": 1.9977453967064154e-05, "loss": 1.1869, "step": 1297 }, { "epoch": 0.05078644651381172, "grad_norm": 0.0, "learning_rate": 1.997736883956657e-05, "loss": 1.2087, "step": 1298 }, { "epoch": 0.05082557320604116, "grad_norm": 0.0, "learning_rate": 1.997728355184495e-05, "loss": 1.2941, "step": 1299 }, { "epoch": 0.0508646998982706, "grad_norm": 0.0, "learning_rate": 1.997719810390065e-05, "loss": 1.3341, "step": 1300 }, { "epoch": 0.05090382659050004, "grad_norm": 0.0, "learning_rate": 1.9977112495735057e-05, "loss": 1.2844, "step": 1301 }, { "epoch": 0.05094295328272948, "grad_norm": 0.0, "learning_rate": 1.9977026727349536e-05, "loss": 1.3175, "step": 1302 }, { "epoch": 0.05098207997495892, "grad_norm": 0.0, "learning_rate": 1.997694079874547e-05, "loss": 1.2095, "step": 1303 }, { "epoch": 0.05102120666718835, "grad_norm": 0.0, "learning_rate": 1.9976854709924235e-05, "loss": 1.2428, "step": 1304 }, { "epoch": 0.051060333359417794, "grad_norm": 0.0, "learning_rate": 1.9976768460887216e-05, "loss": 1.098, "step": 1305 }, { "epoch": 0.051099460051647234, "grad_norm": 0.0, "learning_rate": 1.9976682051635795e-05, "loss": 1.3397, "step": 1306 }, { "epoch": 0.051138586743876675, "grad_norm": 0.0, "learning_rate": 1.9976595482171365e-05, "loss": 1.1931, "step": 1307 }, { "epoch": 0.05117771343610611, "grad_norm": 0.0, "learning_rate": 1.997650875249531e-05, "loss": 1.2776, "step": 1308 }, { "epoch": 0.05121684012833555, "grad_norm": 0.0, "learning_rate": 1.9976421862609027e-05, "loss": 1.3566, "step": 1309 }, { "epoch": 0.05125596682056499, "grad_norm": 0.0, "learning_rate": 1.9976334812513912e-05, "loss": 1.225, "step": 1310 }, { "epoch": 0.051295093512794425, "grad_norm": 0.0, "learning_rate": 1.997624760221136e-05, "loss": 1.2328, "step": 1311 }, { "epoch": 0.051334220205023866, "grad_norm": 0.0, "learning_rate": 1.9976160231702774e-05, "loss": 1.1437, "step": 1312 }, { "epoch": 0.05137334689725331, "grad_norm": 0.0, "learning_rate": 1.997607270098955e-05, "loss": 1.2817, "step": 1313 }, { "epoch": 0.05141247358948275, "grad_norm": 0.0, "learning_rate": 1.99759850100731e-05, "loss": 1.1863, "step": 1314 }, { "epoch": 0.05145160028171218, "grad_norm": 0.0, "learning_rate": 1.9975897158954835e-05, "loss": 1.3195, "step": 1315 }, { "epoch": 0.05149072697394162, "grad_norm": 0.0, "learning_rate": 1.9975809147636163e-05, "loss": 1.2756, "step": 1316 }, { "epoch": 0.05152985366617106, "grad_norm": 0.0, "learning_rate": 1.9975720976118492e-05, "loss": 1.2133, "step": 1317 }, { "epoch": 0.051568980358400504, "grad_norm": 0.0, "learning_rate": 1.9975632644403245e-05, "loss": 1.222, "step": 1318 }, { "epoch": 0.05160810705062994, "grad_norm": 0.0, "learning_rate": 1.997554415249184e-05, "loss": 1.3347, "step": 1319 }, { "epoch": 0.05164723374285938, "grad_norm": 0.0, "learning_rate": 1.9975455500385692e-05, "loss": 1.2095, "step": 1320 }, { "epoch": 0.05168636043508882, "grad_norm": 0.0, "learning_rate": 1.997536668808623e-05, "loss": 1.3668, "step": 1321 }, { "epoch": 0.051725487127318254, "grad_norm": 0.0, "learning_rate": 1.997527771559488e-05, "loss": 1.3558, "step": 1322 }, { "epoch": 0.051764613819547695, "grad_norm": 0.0, "learning_rate": 1.9975188582913067e-05, "loss": 1.29, "step": 1323 }, { "epoch": 0.051803740511777135, "grad_norm": 0.0, "learning_rate": 1.9975099290042226e-05, "loss": 1.2385, "step": 1324 }, { "epoch": 0.051842867204006576, "grad_norm": 0.0, "learning_rate": 1.997500983698379e-05, "loss": 1.342, "step": 1325 }, { "epoch": 0.05188199389623601, "grad_norm": 0.0, "learning_rate": 1.9974920223739195e-05, "loss": 1.3779, "step": 1326 }, { "epoch": 0.05192112058846545, "grad_norm": 0.0, "learning_rate": 1.9974830450309883e-05, "loss": 1.2609, "step": 1327 }, { "epoch": 0.05196024728069489, "grad_norm": 0.0, "learning_rate": 1.997474051669729e-05, "loss": 1.2148, "step": 1328 }, { "epoch": 0.051999373972924326, "grad_norm": 0.0, "learning_rate": 1.9974650422902866e-05, "loss": 1.2375, "step": 1329 }, { "epoch": 0.05203850066515377, "grad_norm": 0.0, "learning_rate": 1.9974560168928054e-05, "loss": 1.3319, "step": 1330 }, { "epoch": 0.05207762735738321, "grad_norm": 0.0, "learning_rate": 1.9974469754774307e-05, "loss": 1.3246, "step": 1331 }, { "epoch": 0.05211675404961265, "grad_norm": 0.0, "learning_rate": 1.9974379180443072e-05, "loss": 1.3878, "step": 1332 }, { "epoch": 0.05215588074184208, "grad_norm": 0.0, "learning_rate": 1.9974288445935803e-05, "loss": 1.1826, "step": 1333 }, { "epoch": 0.05219500743407152, "grad_norm": 0.0, "learning_rate": 1.9974197551253963e-05, "loss": 1.2886, "step": 1334 }, { "epoch": 0.052234134126300964, "grad_norm": 0.0, "learning_rate": 1.997410649639901e-05, "loss": 1.2247, "step": 1335 }, { "epoch": 0.0522732608185304, "grad_norm": 0.0, "learning_rate": 1.9974015281372402e-05, "loss": 1.2919, "step": 1336 }, { "epoch": 0.05231238751075984, "grad_norm": 0.0, "learning_rate": 1.9973923906175608e-05, "loss": 1.377, "step": 1337 }, { "epoch": 0.05235151420298928, "grad_norm": 0.0, "learning_rate": 1.9973832370810095e-05, "loss": 1.3671, "step": 1338 }, { "epoch": 0.05239064089521872, "grad_norm": 0.0, "learning_rate": 1.997374067527733e-05, "loss": 1.2843, "step": 1339 }, { "epoch": 0.052429767587448155, "grad_norm": 0.0, "learning_rate": 1.997364881957879e-05, "loss": 1.2309, "step": 1340 }, { "epoch": 0.052468894279677596, "grad_norm": 0.0, "learning_rate": 1.9973556803715944e-05, "loss": 1.338, "step": 1341 }, { "epoch": 0.052508020971907036, "grad_norm": 0.0, "learning_rate": 1.997346462769028e-05, "loss": 1.4231, "step": 1342 }, { "epoch": 0.05254714766413648, "grad_norm": 0.0, "learning_rate": 1.9973372291503266e-05, "loss": 1.2411, "step": 1343 }, { "epoch": 0.05258627435636591, "grad_norm": 0.0, "learning_rate": 1.9973279795156394e-05, "loss": 1.3165, "step": 1344 }, { "epoch": 0.05262540104859535, "grad_norm": 0.0, "learning_rate": 1.9973187138651143e-05, "loss": 1.344, "step": 1345 }, { "epoch": 0.05266452774082479, "grad_norm": 0.0, "learning_rate": 1.9973094321989e-05, "loss": 1.3028, "step": 1346 }, { "epoch": 0.05270365443305423, "grad_norm": 0.0, "learning_rate": 1.9973001345171465e-05, "loss": 1.3562, "step": 1347 }, { "epoch": 0.05274278112528367, "grad_norm": 0.0, "learning_rate": 1.9972908208200023e-05, "loss": 1.265, "step": 1348 }, { "epoch": 0.05278190781751311, "grad_norm": 0.0, "learning_rate": 1.9972814911076175e-05, "loss": 1.1354, "step": 1349 }, { "epoch": 0.05282103450974255, "grad_norm": 0.0, "learning_rate": 1.9972721453801412e-05, "loss": 1.254, "step": 1350 }, { "epoch": 0.052860161201971984, "grad_norm": 0.0, "learning_rate": 1.9972627836377242e-05, "loss": 1.3676, "step": 1351 }, { "epoch": 0.052899287894201424, "grad_norm": 0.0, "learning_rate": 1.9972534058805163e-05, "loss": 1.422, "step": 1352 }, { "epoch": 0.052938414586430865, "grad_norm": 0.0, "learning_rate": 1.997244012108668e-05, "loss": 1.2822, "step": 1353 }, { "epoch": 0.0529775412786603, "grad_norm": 0.0, "learning_rate": 1.9972346023223313e-05, "loss": 1.2349, "step": 1354 }, { "epoch": 0.05301666797088974, "grad_norm": 0.0, "learning_rate": 1.997225176521656e-05, "loss": 1.2072, "step": 1355 }, { "epoch": 0.05305579466311918, "grad_norm": 0.0, "learning_rate": 1.997215734706794e-05, "loss": 1.2492, "step": 1356 }, { "epoch": 0.05309492135534862, "grad_norm": 0.0, "learning_rate": 1.997206276877897e-05, "loss": 1.2317, "step": 1357 }, { "epoch": 0.053134048047578056, "grad_norm": 0.0, "learning_rate": 1.9971968030351166e-05, "loss": 1.2672, "step": 1358 }, { "epoch": 0.0531731747398075, "grad_norm": 0.0, "learning_rate": 1.9971873131786052e-05, "loss": 1.2523, "step": 1359 }, { "epoch": 0.05321230143203694, "grad_norm": 0.0, "learning_rate": 1.997177807308515e-05, "loss": 1.3531, "step": 1360 }, { "epoch": 0.05325142812426637, "grad_norm": 0.0, "learning_rate": 1.997168285424999e-05, "loss": 1.21, "step": 1361 }, { "epoch": 0.05329055481649581, "grad_norm": 0.0, "learning_rate": 1.99715874752821e-05, "loss": 1.2273, "step": 1362 }, { "epoch": 0.05332968150872525, "grad_norm": 0.0, "learning_rate": 1.9971491936183005e-05, "loss": 1.2141, "step": 1363 }, { "epoch": 0.053368808200954694, "grad_norm": 0.0, "learning_rate": 1.9971396236954247e-05, "loss": 1.3196, "step": 1364 }, { "epoch": 0.05340793489318413, "grad_norm": 0.0, "learning_rate": 1.997130037759736e-05, "loss": 1.2297, "step": 1365 }, { "epoch": 0.05344706158541357, "grad_norm": 0.0, "learning_rate": 1.9971204358113882e-05, "loss": 1.2087, "step": 1366 }, { "epoch": 0.05348618827764301, "grad_norm": 0.0, "learning_rate": 1.997110817850536e-05, "loss": 1.3336, "step": 1367 }, { "epoch": 0.053525314969872444, "grad_norm": 0.0, "learning_rate": 1.997101183877333e-05, "loss": 1.2298, "step": 1368 }, { "epoch": 0.053564441662101885, "grad_norm": 0.0, "learning_rate": 1.997091533891935e-05, "loss": 1.3546, "step": 1369 }, { "epoch": 0.053603568354331325, "grad_norm": 0.0, "learning_rate": 1.9970818678944962e-05, "loss": 1.3578, "step": 1370 }, { "epoch": 0.053642695046560766, "grad_norm": 0.0, "learning_rate": 1.997072185885172e-05, "loss": 1.283, "step": 1371 }, { "epoch": 0.0536818217387902, "grad_norm": 0.0, "learning_rate": 1.9970624878641178e-05, "loss": 1.2152, "step": 1372 }, { "epoch": 0.05372094843101964, "grad_norm": 0.0, "learning_rate": 1.9970527738314898e-05, "loss": 1.2372, "step": 1373 }, { "epoch": 0.05376007512324908, "grad_norm": 0.0, "learning_rate": 1.9970430437874434e-05, "loss": 1.2915, "step": 1374 }, { "epoch": 0.05379920181547852, "grad_norm": 0.0, "learning_rate": 1.9970332977321348e-05, "loss": 1.2908, "step": 1375 }, { "epoch": 0.05383832850770796, "grad_norm": 0.0, "learning_rate": 1.9970235356657212e-05, "loss": 1.2699, "step": 1376 }, { "epoch": 0.0538774551999374, "grad_norm": 0.0, "learning_rate": 1.9970137575883584e-05, "loss": 1.2947, "step": 1377 }, { "epoch": 0.05391658189216684, "grad_norm": 0.0, "learning_rate": 1.9970039635002044e-05, "loss": 1.3246, "step": 1378 }, { "epoch": 0.05395570858439627, "grad_norm": 0.0, "learning_rate": 1.996994153401416e-05, "loss": 1.2508, "step": 1379 }, { "epoch": 0.05399483527662571, "grad_norm": 0.0, "learning_rate": 1.996984327292151e-05, "loss": 1.4706, "step": 1380 }, { "epoch": 0.054033961968855154, "grad_norm": 0.0, "learning_rate": 1.996974485172567e-05, "loss": 1.2924, "step": 1381 }, { "epoch": 0.054073088661084595, "grad_norm": 0.0, "learning_rate": 1.9969646270428216e-05, "loss": 1.2487, "step": 1382 }, { "epoch": 0.05411221535331403, "grad_norm": 0.0, "learning_rate": 1.996954752903074e-05, "loss": 1.2987, "step": 1383 }, { "epoch": 0.05415134204554347, "grad_norm": 0.0, "learning_rate": 1.996944862753482e-05, "loss": 1.2137, "step": 1384 }, { "epoch": 0.05419046873777291, "grad_norm": 0.0, "learning_rate": 1.9969349565942048e-05, "loss": 1.1837, "step": 1385 }, { "epoch": 0.054229595430002345, "grad_norm": 0.0, "learning_rate": 1.9969250344254016e-05, "loss": 1.2672, "step": 1386 }, { "epoch": 0.054268722122231786, "grad_norm": 0.0, "learning_rate": 1.9969150962472315e-05, "loss": 1.1474, "step": 1387 }, { "epoch": 0.054307848814461226, "grad_norm": 0.0, "learning_rate": 1.996905142059854e-05, "loss": 1.1169, "step": 1388 }, { "epoch": 0.05434697550669067, "grad_norm": 0.0, "learning_rate": 1.9968951718634293e-05, "loss": 1.1744, "step": 1389 }, { "epoch": 0.0543861021989201, "grad_norm": 0.0, "learning_rate": 1.9968851856581174e-05, "loss": 1.4155, "step": 1390 }, { "epoch": 0.05442522889114954, "grad_norm": 0.0, "learning_rate": 1.9968751834440783e-05, "loss": 1.345, "step": 1391 }, { "epoch": 0.05446435558337898, "grad_norm": 0.0, "learning_rate": 1.996865165221473e-05, "loss": 1.3215, "step": 1392 }, { "epoch": 0.05450348227560842, "grad_norm": 0.0, "learning_rate": 1.9968551309904622e-05, "loss": 1.2598, "step": 1393 }, { "epoch": 0.05454260896783786, "grad_norm": 0.0, "learning_rate": 1.9968450807512074e-05, "loss": 1.3619, "step": 1394 }, { "epoch": 0.0545817356600673, "grad_norm": 0.0, "learning_rate": 1.99683501450387e-05, "loss": 1.271, "step": 1395 }, { "epoch": 0.05462086235229674, "grad_norm": 0.0, "learning_rate": 1.9968249322486108e-05, "loss": 1.1515, "step": 1396 }, { "epoch": 0.054659989044526174, "grad_norm": 0.0, "learning_rate": 1.9968148339855925e-05, "loss": 1.2378, "step": 1397 }, { "epoch": 0.054699115736755614, "grad_norm": 0.0, "learning_rate": 1.9968047197149766e-05, "loss": 1.2681, "step": 1398 }, { "epoch": 0.054738242428985055, "grad_norm": 0.0, "learning_rate": 1.9967945894369264e-05, "loss": 1.2922, "step": 1399 }, { "epoch": 0.05477736912121449, "grad_norm": 0.0, "learning_rate": 1.996784443151604e-05, "loss": 1.0974, "step": 1400 }, { "epoch": 0.05481649581344393, "grad_norm": 0.0, "learning_rate": 1.996774280859173e-05, "loss": 1.4211, "step": 1401 }, { "epoch": 0.05485562250567337, "grad_norm": 0.0, "learning_rate": 1.9967641025597953e-05, "loss": 1.2762, "step": 1402 }, { "epoch": 0.05489474919790281, "grad_norm": 0.0, "learning_rate": 1.9967539082536356e-05, "loss": 1.2609, "step": 1403 }, { "epoch": 0.054933875890132246, "grad_norm": 0.0, "learning_rate": 1.996743697940857e-05, "loss": 1.2465, "step": 1404 }, { "epoch": 0.05497300258236169, "grad_norm": 0.0, "learning_rate": 1.9967334716216234e-05, "loss": 1.1572, "step": 1405 }, { "epoch": 0.05501212927459113, "grad_norm": 0.0, "learning_rate": 1.9967232292960997e-05, "loss": 1.3344, "step": 1406 }, { "epoch": 0.05505125596682057, "grad_norm": 0.0, "learning_rate": 1.9967129709644495e-05, "loss": 1.3733, "step": 1407 }, { "epoch": 0.05509038265905, "grad_norm": 0.0, "learning_rate": 1.996702696626838e-05, "loss": 1.3813, "step": 1408 }, { "epoch": 0.05512950935127944, "grad_norm": 0.0, "learning_rate": 1.9966924062834306e-05, "loss": 1.2879, "step": 1409 }, { "epoch": 0.055168636043508884, "grad_norm": 0.0, "learning_rate": 1.9966820999343913e-05, "loss": 1.2905, "step": 1410 }, { "epoch": 0.05520776273573832, "grad_norm": 0.0, "learning_rate": 1.996671777579887e-05, "loss": 1.2346, "step": 1411 }, { "epoch": 0.05524688942796776, "grad_norm": 0.0, "learning_rate": 1.9966614392200827e-05, "loss": 1.3134, "step": 1412 }, { "epoch": 0.0552860161201972, "grad_norm": 0.0, "learning_rate": 1.9966510848551444e-05, "loss": 1.2278, "step": 1413 }, { "epoch": 0.05532514281242664, "grad_norm": 0.0, "learning_rate": 1.996640714485239e-05, "loss": 1.3411, "step": 1414 }, { "epoch": 0.055364269504656075, "grad_norm": 0.0, "learning_rate": 1.996630328110532e-05, "loss": 1.3084, "step": 1415 }, { "epoch": 0.055403396196885515, "grad_norm": 0.0, "learning_rate": 1.996619925731191e-05, "loss": 1.3545, "step": 1416 }, { "epoch": 0.055442522889114956, "grad_norm": 0.0, "learning_rate": 1.9966095073473828e-05, "loss": 1.2815, "step": 1417 }, { "epoch": 0.05548164958134439, "grad_norm": 0.0, "learning_rate": 1.9965990729592748e-05, "loss": 1.1809, "step": 1418 }, { "epoch": 0.05552077627357383, "grad_norm": 0.0, "learning_rate": 1.9965886225670346e-05, "loss": 1.3041, "step": 1419 }, { "epoch": 0.05555990296580327, "grad_norm": 0.0, "learning_rate": 1.99657815617083e-05, "loss": 1.3282, "step": 1420 }, { "epoch": 0.05559902965803271, "grad_norm": 0.0, "learning_rate": 1.9965676737708284e-05, "loss": 1.2586, "step": 1421 }, { "epoch": 0.05563815635026215, "grad_norm": 0.0, "learning_rate": 1.996557175367199e-05, "loss": 1.3147, "step": 1422 }, { "epoch": 0.05567728304249159, "grad_norm": 0.0, "learning_rate": 1.9965466609601105e-05, "loss": 1.3737, "step": 1423 }, { "epoch": 0.05571640973472103, "grad_norm": 0.0, "learning_rate": 1.996536130549731e-05, "loss": 1.1664, "step": 1424 }, { "epoch": 0.05575553642695046, "grad_norm": 0.0, "learning_rate": 1.9965255841362303e-05, "loss": 1.2214, "step": 1425 }, { "epoch": 0.0557946631191799, "grad_norm": 0.0, "learning_rate": 1.996515021719777e-05, "loss": 1.1888, "step": 1426 }, { "epoch": 0.055833789811409344, "grad_norm": 0.0, "learning_rate": 1.9965044433005418e-05, "loss": 1.3079, "step": 1427 }, { "epoch": 0.055872916503638785, "grad_norm": 0.0, "learning_rate": 1.996493848878693e-05, "loss": 1.3465, "step": 1428 }, { "epoch": 0.05591204319586822, "grad_norm": 0.0, "learning_rate": 1.996483238454402e-05, "loss": 1.1924, "step": 1429 }, { "epoch": 0.05595116988809766, "grad_norm": 0.0, "learning_rate": 1.9964726120278394e-05, "loss": 1.2723, "step": 1430 }, { "epoch": 0.0559902965803271, "grad_norm": 0.0, "learning_rate": 1.996461969599175e-05, "loss": 1.2961, "step": 1431 }, { "epoch": 0.056029423272556535, "grad_norm": 0.0, "learning_rate": 1.99645131116858e-05, "loss": 1.3085, "step": 1432 }, { "epoch": 0.056068549964785976, "grad_norm": 0.0, "learning_rate": 1.9964406367362253e-05, "loss": 1.1934, "step": 1433 }, { "epoch": 0.056107676657015416, "grad_norm": 0.0, "learning_rate": 1.9964299463022827e-05, "loss": 1.3039, "step": 1434 }, { "epoch": 0.05614680334924486, "grad_norm": 0.0, "learning_rate": 1.996419239866924e-05, "loss": 1.199, "step": 1435 }, { "epoch": 0.05618593004147429, "grad_norm": 0.0, "learning_rate": 1.996408517430321e-05, "loss": 1.2791, "step": 1436 }, { "epoch": 0.05622505673370373, "grad_norm": 0.0, "learning_rate": 1.996397778992645e-05, "loss": 1.2206, "step": 1437 }, { "epoch": 0.05626418342593317, "grad_norm": 0.0, "learning_rate": 1.99638702455407e-05, "loss": 1.2244, "step": 1438 }, { "epoch": 0.056303310118162614, "grad_norm": 0.0, "learning_rate": 1.9963762541147676e-05, "loss": 1.271, "step": 1439 }, { "epoch": 0.05634243681039205, "grad_norm": 0.0, "learning_rate": 1.996365467674911e-05, "loss": 1.2829, "step": 1440 }, { "epoch": 0.05638156350262149, "grad_norm": 0.0, "learning_rate": 1.9963546652346736e-05, "loss": 1.1707, "step": 1441 }, { "epoch": 0.05642069019485093, "grad_norm": 0.0, "learning_rate": 1.9963438467942288e-05, "loss": 1.3314, "step": 1442 }, { "epoch": 0.056459816887080364, "grad_norm": 0.0, "learning_rate": 1.9963330123537507e-05, "loss": 1.1826, "step": 1443 }, { "epoch": 0.056498943579309804, "grad_norm": 0.0, "learning_rate": 1.9963221619134125e-05, "loss": 1.2538, "step": 1444 }, { "epoch": 0.056538070271539245, "grad_norm": 0.0, "learning_rate": 1.9963112954733886e-05, "loss": 1.2751, "step": 1445 }, { "epoch": 0.056577196963768686, "grad_norm": 0.0, "learning_rate": 1.9963004130338543e-05, "loss": 1.3487, "step": 1446 }, { "epoch": 0.05661632365599812, "grad_norm": 0.0, "learning_rate": 1.9962895145949833e-05, "loss": 1.1616, "step": 1447 }, { "epoch": 0.05665545034822756, "grad_norm": 0.0, "learning_rate": 1.9962786001569515e-05, "loss": 1.3129, "step": 1448 }, { "epoch": 0.056694577040457, "grad_norm": 0.0, "learning_rate": 1.9962676697199333e-05, "loss": 1.1373, "step": 1449 }, { "epoch": 0.056733703732686436, "grad_norm": 0.0, "learning_rate": 1.9962567232841054e-05, "loss": 1.3264, "step": 1450 }, { "epoch": 0.05677283042491588, "grad_norm": 0.0, "learning_rate": 1.9962457608496424e-05, "loss": 1.3156, "step": 1451 }, { "epoch": 0.05681195711714532, "grad_norm": 0.0, "learning_rate": 1.996234782416721e-05, "loss": 1.405, "step": 1452 }, { "epoch": 0.05685108380937476, "grad_norm": 0.0, "learning_rate": 1.9962237879855174e-05, "loss": 1.2362, "step": 1453 }, { "epoch": 0.05689021050160419, "grad_norm": 0.0, "learning_rate": 1.996212777556208e-05, "loss": 1.353, "step": 1454 }, { "epoch": 0.05692933719383363, "grad_norm": 0.0, "learning_rate": 1.9962017511289696e-05, "loss": 1.3002, "step": 1455 }, { "epoch": 0.056968463886063074, "grad_norm": 0.0, "learning_rate": 1.9961907087039796e-05, "loss": 1.267, "step": 1456 }, { "epoch": 0.05700759057829251, "grad_norm": 0.0, "learning_rate": 1.996179650281415e-05, "loss": 1.3147, "step": 1457 }, { "epoch": 0.05704671727052195, "grad_norm": 0.0, "learning_rate": 1.9961685758614537e-05, "loss": 1.2137, "step": 1458 }, { "epoch": 0.05708584396275139, "grad_norm": 0.0, "learning_rate": 1.996157485444273e-05, "loss": 1.2678, "step": 1459 }, { "epoch": 0.05712497065498083, "grad_norm": 0.0, "learning_rate": 1.9961463790300518e-05, "loss": 1.2242, "step": 1460 }, { "epoch": 0.057164097347210265, "grad_norm": 0.0, "learning_rate": 1.9961352566189677e-05, "loss": 1.2961, "step": 1461 }, { "epoch": 0.057203224039439705, "grad_norm": 0.0, "learning_rate": 1.9961241182111996e-05, "loss": 1.4189, "step": 1462 }, { "epoch": 0.057242350731669146, "grad_norm": 0.0, "learning_rate": 1.9961129638069266e-05, "loss": 1.342, "step": 1463 }, { "epoch": 0.05728147742389858, "grad_norm": 0.0, "learning_rate": 1.9961017934063273e-05, "loss": 1.2049, "step": 1464 }, { "epoch": 0.05732060411612802, "grad_norm": 0.0, "learning_rate": 1.9960906070095815e-05, "loss": 1.2917, "step": 1465 }, { "epoch": 0.05735973080835746, "grad_norm": 0.0, "learning_rate": 1.9960794046168687e-05, "loss": 1.3035, "step": 1466 }, { "epoch": 0.0573988575005869, "grad_norm": 0.0, "learning_rate": 1.996068186228369e-05, "loss": 1.2864, "step": 1467 }, { "epoch": 0.05743798419281634, "grad_norm": 0.0, "learning_rate": 1.996056951844262e-05, "loss": 1.3156, "step": 1468 }, { "epoch": 0.05747711088504578, "grad_norm": 0.0, "learning_rate": 1.996045701464729e-05, "loss": 1.283, "step": 1469 }, { "epoch": 0.05751623757727522, "grad_norm": 0.0, "learning_rate": 1.9960344350899495e-05, "loss": 1.3331, "step": 1470 }, { "epoch": 0.05755536426950466, "grad_norm": 0.0, "learning_rate": 1.9960231527201056e-05, "loss": 1.3807, "step": 1471 }, { "epoch": 0.05759449096173409, "grad_norm": 0.0, "learning_rate": 1.9960118543553776e-05, "loss": 1.2469, "step": 1472 }, { "epoch": 0.057633617653963534, "grad_norm": 0.0, "learning_rate": 1.9960005399959477e-05, "loss": 1.3196, "step": 1473 }, { "epoch": 0.057672744346192975, "grad_norm": 0.0, "learning_rate": 1.995989209641997e-05, "loss": 1.3495, "step": 1474 }, { "epoch": 0.05771187103842241, "grad_norm": 0.0, "learning_rate": 1.9959778632937074e-05, "loss": 1.0748, "step": 1475 }, { "epoch": 0.05775099773065185, "grad_norm": 0.0, "learning_rate": 1.9959665009512616e-05, "loss": 1.2234, "step": 1476 }, { "epoch": 0.05779012442288129, "grad_norm": 0.0, "learning_rate": 1.9959551226148416e-05, "loss": 1.4152, "step": 1477 }, { "epoch": 0.05782925111511073, "grad_norm": 0.0, "learning_rate": 1.9959437282846306e-05, "loss": 1.3237, "step": 1478 }, { "epoch": 0.057868377807340166, "grad_norm": 0.0, "learning_rate": 1.995932317960811e-05, "loss": 1.3206, "step": 1479 }, { "epoch": 0.057907504499569606, "grad_norm": 0.0, "learning_rate": 1.9959208916435665e-05, "loss": 1.3827, "step": 1480 }, { "epoch": 0.05794663119179905, "grad_norm": 0.0, "learning_rate": 1.9959094493330806e-05, "loss": 1.3763, "step": 1481 }, { "epoch": 0.05798575788402848, "grad_norm": 0.0, "learning_rate": 1.9958979910295367e-05, "loss": 1.3118, "step": 1482 }, { "epoch": 0.05802488457625792, "grad_norm": 0.0, "learning_rate": 1.9958865167331193e-05, "loss": 1.2704, "step": 1483 }, { "epoch": 0.05806401126848736, "grad_norm": 0.0, "learning_rate": 1.9958750264440118e-05, "loss": 1.2126, "step": 1484 }, { "epoch": 0.058103137960716804, "grad_norm": 0.0, "learning_rate": 1.9958635201624e-05, "loss": 1.3442, "step": 1485 }, { "epoch": 0.05814226465294624, "grad_norm": 0.0, "learning_rate": 1.995851997888467e-05, "loss": 1.2122, "step": 1486 }, { "epoch": 0.05818139134517568, "grad_norm": 0.0, "learning_rate": 1.9958404596223997e-05, "loss": 1.1628, "step": 1487 }, { "epoch": 0.05822051803740512, "grad_norm": 0.0, "learning_rate": 1.995828905364382e-05, "loss": 1.2178, "step": 1488 }, { "epoch": 0.058259644729634553, "grad_norm": 0.0, "learning_rate": 1.9958173351146e-05, "loss": 1.3267, "step": 1489 }, { "epoch": 0.058298771421863994, "grad_norm": 0.0, "learning_rate": 1.9958057488732393e-05, "loss": 1.1824, "step": 1490 }, { "epoch": 0.058337898114093435, "grad_norm": 0.0, "learning_rate": 1.9957941466404865e-05, "loss": 1.2064, "step": 1491 }, { "epoch": 0.058377024806322876, "grad_norm": 0.0, "learning_rate": 1.9957825284165272e-05, "loss": 1.2435, "step": 1492 }, { "epoch": 0.05841615149855231, "grad_norm": 0.0, "learning_rate": 1.9957708942015484e-05, "loss": 1.3102, "step": 1493 }, { "epoch": 0.05845527819078175, "grad_norm": 0.0, "learning_rate": 1.9957592439957368e-05, "loss": 1.3268, "step": 1494 }, { "epoch": 0.05849440488301119, "grad_norm": 0.0, "learning_rate": 1.9957475777992794e-05, "loss": 1.2578, "step": 1495 }, { "epoch": 0.058533531575240626, "grad_norm": 0.0, "learning_rate": 1.9957358956123637e-05, "loss": 1.2845, "step": 1496 }, { "epoch": 0.05857265826747007, "grad_norm": 0.0, "learning_rate": 1.9957241974351772e-05, "loss": 1.1483, "step": 1497 }, { "epoch": 0.05861178495969951, "grad_norm": 0.0, "learning_rate": 1.9957124832679078e-05, "loss": 1.29, "step": 1498 }, { "epoch": 0.05865091165192895, "grad_norm": 0.0, "learning_rate": 1.9957007531107437e-05, "loss": 1.2665, "step": 1499 }, { "epoch": 0.05869003834415838, "grad_norm": 0.0, "learning_rate": 1.995689006963873e-05, "loss": 1.3008, "step": 1500 }, { "epoch": 0.05872916503638782, "grad_norm": 0.0, "learning_rate": 1.995677244827485e-05, "loss": 1.2659, "step": 1501 }, { "epoch": 0.058768291728617264, "grad_norm": 0.0, "learning_rate": 1.9956654667017676e-05, "loss": 1.3276, "step": 1502 }, { "epoch": 0.058807418420846705, "grad_norm": 0.0, "learning_rate": 1.9956536725869105e-05, "loss": 1.3114, "step": 1503 }, { "epoch": 0.05884654511307614, "grad_norm": 0.0, "learning_rate": 1.995641862483103e-05, "loss": 1.32, "step": 1504 }, { "epoch": 0.05888567180530558, "grad_norm": 0.0, "learning_rate": 1.9956300363905348e-05, "loss": 1.1043, "step": 1505 }, { "epoch": 0.05892479849753502, "grad_norm": 0.0, "learning_rate": 1.9956181943093963e-05, "loss": 1.2742, "step": 1506 }, { "epoch": 0.058963925189764455, "grad_norm": 0.0, "learning_rate": 1.9956063362398766e-05, "loss": 1.1654, "step": 1507 }, { "epoch": 0.059003051881993895, "grad_norm": 0.0, "learning_rate": 1.995594462182167e-05, "loss": 1.3583, "step": 1508 }, { "epoch": 0.059042178574223336, "grad_norm": 0.0, "learning_rate": 1.9955825721364576e-05, "loss": 1.3568, "step": 1509 }, { "epoch": 0.05908130526645278, "grad_norm": 0.0, "learning_rate": 1.99557066610294e-05, "loss": 1.2193, "step": 1510 }, { "epoch": 0.05912043195868221, "grad_norm": 0.0, "learning_rate": 1.9955587440818047e-05, "loss": 1.2645, "step": 1511 }, { "epoch": 0.05915955865091165, "grad_norm": 0.0, "learning_rate": 1.995546806073244e-05, "loss": 1.2852, "step": 1512 }, { "epoch": 0.05919868534314109, "grad_norm": 0.0, "learning_rate": 1.9955348520774484e-05, "loss": 1.2707, "step": 1513 }, { "epoch": 0.05923781203537053, "grad_norm": 0.0, "learning_rate": 1.995522882094611e-05, "loss": 1.2805, "step": 1514 }, { "epoch": 0.05927693872759997, "grad_norm": 0.0, "learning_rate": 1.9955108961249235e-05, "loss": 1.3357, "step": 1515 }, { "epoch": 0.05931606541982941, "grad_norm": 0.0, "learning_rate": 1.9954988941685784e-05, "loss": 1.2684, "step": 1516 }, { "epoch": 0.05935519211205885, "grad_norm": 0.0, "learning_rate": 1.9954868762257685e-05, "loss": 1.2357, "step": 1517 }, { "epoch": 0.05939431880428828, "grad_norm": 0.0, "learning_rate": 1.9954748422966866e-05, "loss": 1.1926, "step": 1518 }, { "epoch": 0.059433445496517724, "grad_norm": 0.0, "learning_rate": 1.9954627923815265e-05, "loss": 1.3364, "step": 1519 }, { "epoch": 0.059472572188747165, "grad_norm": 0.0, "learning_rate": 1.995450726480481e-05, "loss": 1.2817, "step": 1520 }, { "epoch": 0.0595116988809766, "grad_norm": 0.0, "learning_rate": 1.9954386445937444e-05, "loss": 1.3055, "step": 1521 }, { "epoch": 0.05955082557320604, "grad_norm": 0.0, "learning_rate": 1.9954265467215104e-05, "loss": 1.2646, "step": 1522 }, { "epoch": 0.05958995226543548, "grad_norm": 0.0, "learning_rate": 1.9954144328639737e-05, "loss": 1.3732, "step": 1523 }, { "epoch": 0.05962907895766492, "grad_norm": 0.0, "learning_rate": 1.995402303021328e-05, "loss": 1.2338, "step": 1524 }, { "epoch": 0.059668205649894356, "grad_norm": 0.0, "learning_rate": 1.9953901571937688e-05, "loss": 1.1318, "step": 1525 }, { "epoch": 0.059707332342123796, "grad_norm": 0.0, "learning_rate": 1.9953779953814912e-05, "loss": 1.2487, "step": 1526 }, { "epoch": 0.05974645903435324, "grad_norm": 0.0, "learning_rate": 1.9953658175846903e-05, "loss": 1.2556, "step": 1527 }, { "epoch": 0.05978558572658268, "grad_norm": 0.0, "learning_rate": 1.9953536238035614e-05, "loss": 1.4261, "step": 1528 }, { "epoch": 0.05982471241881211, "grad_norm": 0.0, "learning_rate": 1.9953414140383005e-05, "loss": 1.1581, "step": 1529 }, { "epoch": 0.05986383911104155, "grad_norm": 0.0, "learning_rate": 1.995329188289104e-05, "loss": 1.2424, "step": 1530 }, { "epoch": 0.059902965803270994, "grad_norm": 0.0, "learning_rate": 1.9953169465561677e-05, "loss": 1.1438, "step": 1531 }, { "epoch": 0.05994209249550043, "grad_norm": 0.0, "learning_rate": 1.9953046888396886e-05, "loss": 1.3212, "step": 1532 }, { "epoch": 0.05998121918772987, "grad_norm": 0.0, "learning_rate": 1.9952924151398634e-05, "loss": 1.2181, "step": 1533 }, { "epoch": 0.06002034587995931, "grad_norm": 0.0, "learning_rate": 1.995280125456889e-05, "loss": 1.3993, "step": 1534 }, { "epoch": 0.06005947257218875, "grad_norm": 0.0, "learning_rate": 1.995267819790963e-05, "loss": 1.2994, "step": 1535 }, { "epoch": 0.060098599264418184, "grad_norm": 0.0, "learning_rate": 1.9952554981422832e-05, "loss": 1.3257, "step": 1536 }, { "epoch": 0.060137725956647625, "grad_norm": 0.0, "learning_rate": 1.995243160511047e-05, "loss": 1.1563, "step": 1537 }, { "epoch": 0.060176852648877066, "grad_norm": 0.0, "learning_rate": 1.9952308068974527e-05, "loss": 1.2894, "step": 1538 }, { "epoch": 0.0602159793411065, "grad_norm": 0.0, "learning_rate": 1.995218437301699e-05, "loss": 1.2744, "step": 1539 }, { "epoch": 0.06025510603333594, "grad_norm": 0.0, "learning_rate": 1.9952060517239838e-05, "loss": 1.2245, "step": 1540 }, { "epoch": 0.06029423272556538, "grad_norm": 0.0, "learning_rate": 1.995193650164507e-05, "loss": 1.2327, "step": 1541 }, { "epoch": 0.06033335941779482, "grad_norm": 0.0, "learning_rate": 1.995181232623467e-05, "loss": 1.3288, "step": 1542 }, { "epoch": 0.06037248611002426, "grad_norm": 0.0, "learning_rate": 1.9951687991010634e-05, "loss": 1.3696, "step": 1543 }, { "epoch": 0.0604116128022537, "grad_norm": 0.0, "learning_rate": 1.995156349597496e-05, "loss": 1.3776, "step": 1544 }, { "epoch": 0.06045073949448314, "grad_norm": 0.0, "learning_rate": 1.9951438841129647e-05, "loss": 1.302, "step": 1545 }, { "epoch": 0.06048986618671257, "grad_norm": 0.0, "learning_rate": 1.9951314026476693e-05, "loss": 1.2681, "step": 1546 }, { "epoch": 0.06052899287894201, "grad_norm": 0.0, "learning_rate": 1.995118905201811e-05, "loss": 1.3726, "step": 1547 }, { "epoch": 0.060568119571171454, "grad_norm": 0.0, "learning_rate": 1.9951063917755897e-05, "loss": 1.1849, "step": 1548 }, { "epoch": 0.060607246263400895, "grad_norm": 0.0, "learning_rate": 1.9950938623692066e-05, "loss": 1.1852, "step": 1549 }, { "epoch": 0.06064637295563033, "grad_norm": 0.0, "learning_rate": 1.995081316982863e-05, "loss": 1.3372, "step": 1550 }, { "epoch": 0.06068549964785977, "grad_norm": 0.0, "learning_rate": 1.9950687556167606e-05, "loss": 1.2148, "step": 1551 }, { "epoch": 0.06072462634008921, "grad_norm": 0.0, "learning_rate": 1.995056178271101e-05, "loss": 1.3265, "step": 1552 }, { "epoch": 0.060763753032318644, "grad_norm": 0.0, "learning_rate": 1.9950435849460856e-05, "loss": 1.3611, "step": 1553 }, { "epoch": 0.060802879724548085, "grad_norm": 0.0, "learning_rate": 1.9950309756419174e-05, "loss": 1.2398, "step": 1554 }, { "epoch": 0.060842006416777526, "grad_norm": 0.0, "learning_rate": 1.9950183503587987e-05, "loss": 1.4207, "step": 1555 }, { "epoch": 0.06088113310900697, "grad_norm": 0.0, "learning_rate": 1.9950057090969315e-05, "loss": 1.3572, "step": 1556 }, { "epoch": 0.0609202598012364, "grad_norm": 0.0, "learning_rate": 1.99499305185652e-05, "loss": 1.0651, "step": 1557 }, { "epoch": 0.06095938649346584, "grad_norm": 0.0, "learning_rate": 1.9949803786377665e-05, "loss": 1.2227, "step": 1558 }, { "epoch": 0.06099851318569528, "grad_norm": 0.0, "learning_rate": 1.994967689440875e-05, "loss": 1.1921, "step": 1559 }, { "epoch": 0.061037639877924724, "grad_norm": 0.0, "learning_rate": 1.9949549842660495e-05, "loss": 1.2289, "step": 1560 }, { "epoch": 0.06107676657015416, "grad_norm": 0.0, "learning_rate": 1.9949422631134934e-05, "loss": 1.3741, "step": 1561 }, { "epoch": 0.0611158932623836, "grad_norm": 0.0, "learning_rate": 1.9949295259834114e-05, "loss": 1.3209, "step": 1562 }, { "epoch": 0.06115501995461304, "grad_norm": 0.0, "learning_rate": 1.994916772876008e-05, "loss": 1.3702, "step": 1563 }, { "epoch": 0.06119414664684247, "grad_norm": 0.0, "learning_rate": 1.9949040037914876e-05, "loss": 1.232, "step": 1564 }, { "epoch": 0.061233273339071914, "grad_norm": 0.0, "learning_rate": 1.994891218730056e-05, "loss": 1.2184, "step": 1565 }, { "epoch": 0.061272400031301355, "grad_norm": 0.0, "learning_rate": 1.994878417691918e-05, "loss": 1.2621, "step": 1566 }, { "epoch": 0.061311526723530796, "grad_norm": 0.0, "learning_rate": 1.994865600677279e-05, "loss": 1.1561, "step": 1567 }, { "epoch": 0.06135065341576023, "grad_norm": 0.0, "learning_rate": 1.9948527676863453e-05, "loss": 1.2736, "step": 1568 }, { "epoch": 0.06138978010798967, "grad_norm": 0.0, "learning_rate": 1.994839918719323e-05, "loss": 1.2659, "step": 1569 }, { "epoch": 0.06142890680021911, "grad_norm": 0.0, "learning_rate": 1.9948270537764177e-05, "loss": 1.1981, "step": 1570 }, { "epoch": 0.061468033492448546, "grad_norm": 0.0, "learning_rate": 1.9948141728578366e-05, "loss": 1.2188, "step": 1571 }, { "epoch": 0.061507160184677986, "grad_norm": 0.0, "learning_rate": 1.9948012759637865e-05, "loss": 1.3298, "step": 1572 }, { "epoch": 0.06154628687690743, "grad_norm": 0.0, "learning_rate": 1.994788363094475e-05, "loss": 1.21, "step": 1573 }, { "epoch": 0.06158541356913687, "grad_norm": 0.0, "learning_rate": 1.9947754342501082e-05, "loss": 1.3038, "step": 1574 }, { "epoch": 0.0616245402613663, "grad_norm": 0.0, "learning_rate": 1.9947624894308947e-05, "loss": 1.2179, "step": 1575 }, { "epoch": 0.06166366695359574, "grad_norm": 0.0, "learning_rate": 1.9947495286370423e-05, "loss": 1.2944, "step": 1576 }, { "epoch": 0.061702793645825184, "grad_norm": 0.0, "learning_rate": 1.994736551868759e-05, "loss": 1.2939, "step": 1577 }, { "epoch": 0.06174192033805462, "grad_norm": 0.0, "learning_rate": 1.994723559126253e-05, "loss": 1.2991, "step": 1578 }, { "epoch": 0.06178104703028406, "grad_norm": 0.0, "learning_rate": 1.9947105504097328e-05, "loss": 1.3268, "step": 1579 }, { "epoch": 0.0618201737225135, "grad_norm": 0.0, "learning_rate": 1.994697525719408e-05, "loss": 1.1271, "step": 1580 }, { "epoch": 0.06185930041474294, "grad_norm": 0.0, "learning_rate": 1.994684485055487e-05, "loss": 1.1627, "step": 1581 }, { "epoch": 0.061898427106972374, "grad_norm": 0.0, "learning_rate": 1.9946714284181802e-05, "loss": 1.2455, "step": 1582 }, { "epoch": 0.061937553799201815, "grad_norm": 0.0, "learning_rate": 1.994658355807696e-05, "loss": 1.4002, "step": 1583 }, { "epoch": 0.061976680491431256, "grad_norm": 0.0, "learning_rate": 1.9946452672242452e-05, "loss": 1.2309, "step": 1584 }, { "epoch": 0.06201580718366069, "grad_norm": 0.0, "learning_rate": 1.9946321626680382e-05, "loss": 1.1601, "step": 1585 }, { "epoch": 0.06205493387589013, "grad_norm": 0.0, "learning_rate": 1.9946190421392845e-05, "loss": 1.2192, "step": 1586 }, { "epoch": 0.06209406056811957, "grad_norm": 0.0, "learning_rate": 1.9946059056381952e-05, "loss": 1.3606, "step": 1587 }, { "epoch": 0.06213318726034901, "grad_norm": 0.0, "learning_rate": 1.9945927531649816e-05, "loss": 1.3104, "step": 1588 }, { "epoch": 0.06217231395257845, "grad_norm": 0.0, "learning_rate": 1.9945795847198547e-05, "loss": 1.2811, "step": 1589 }, { "epoch": 0.06221144064480789, "grad_norm": 0.0, "learning_rate": 1.994566400303026e-05, "loss": 1.2063, "step": 1590 }, { "epoch": 0.06225056733703733, "grad_norm": 0.0, "learning_rate": 1.9945531999147073e-05, "loss": 1.2283, "step": 1591 }, { "epoch": 0.06228969402926677, "grad_norm": 0.0, "learning_rate": 1.99453998355511e-05, "loss": 1.1428, "step": 1592 }, { "epoch": 0.0623288207214962, "grad_norm": 0.0, "learning_rate": 1.9945267512244473e-05, "loss": 1.2547, "step": 1593 }, { "epoch": 0.062367947413725644, "grad_norm": 0.0, "learning_rate": 1.994513502922931e-05, "loss": 1.1899, "step": 1594 }, { "epoch": 0.062407074105955085, "grad_norm": 0.0, "learning_rate": 1.994500238650774e-05, "loss": 1.2372, "step": 1595 }, { "epoch": 0.06244620079818452, "grad_norm": 0.0, "learning_rate": 1.994486958408189e-05, "loss": 1.3384, "step": 1596 }, { "epoch": 0.06248532749041396, "grad_norm": 0.0, "learning_rate": 1.99447366219539e-05, "loss": 1.1882, "step": 1597 }, { "epoch": 0.0625244541826434, "grad_norm": 0.0, "learning_rate": 1.9944603500125905e-05, "loss": 1.3441, "step": 1598 }, { "epoch": 0.06256358087487283, "grad_norm": 0.0, "learning_rate": 1.994447021860003e-05, "loss": 1.2297, "step": 1599 }, { "epoch": 0.06260270756710228, "grad_norm": 0.0, "learning_rate": 1.994433677737843e-05, "loss": 1.3197, "step": 1600 }, { "epoch": 0.06264183425933172, "grad_norm": 0.0, "learning_rate": 1.9944203176463244e-05, "loss": 1.3176, "step": 1601 }, { "epoch": 0.06268096095156116, "grad_norm": 0.0, "learning_rate": 1.9944069415856612e-05, "loss": 1.29, "step": 1602 }, { "epoch": 0.0627200876437906, "grad_norm": 0.0, "learning_rate": 1.9943935495560688e-05, "loss": 1.0977, "step": 1603 }, { "epoch": 0.06275921433602004, "grad_norm": 0.0, "learning_rate": 1.9943801415577616e-05, "loss": 1.2789, "step": 1604 }, { "epoch": 0.06279834102824947, "grad_norm": 0.0, "learning_rate": 1.994366717590956e-05, "loss": 1.2147, "step": 1605 }, { "epoch": 0.0628374677204789, "grad_norm": 0.0, "learning_rate": 1.9943532776558665e-05, "loss": 1.3025, "step": 1606 }, { "epoch": 0.06287659441270835, "grad_norm": 0.0, "learning_rate": 1.9943398217527094e-05, "loss": 1.2639, "step": 1607 }, { "epoch": 0.06291572110493779, "grad_norm": 0.0, "learning_rate": 1.994326349881701e-05, "loss": 1.29, "step": 1608 }, { "epoch": 0.06295484779716723, "grad_norm": 0.0, "learning_rate": 1.994312862043057e-05, "loss": 1.3134, "step": 1609 }, { "epoch": 0.06299397448939667, "grad_norm": 0.0, "learning_rate": 1.9942993582369947e-05, "loss": 1.2051, "step": 1610 }, { "epoch": 0.06303310118162611, "grad_norm": 0.0, "learning_rate": 1.9942858384637306e-05, "loss": 1.326, "step": 1611 }, { "epoch": 0.06307222787385554, "grad_norm": 0.0, "learning_rate": 1.9942723027234817e-05, "loss": 1.1853, "step": 1612 }, { "epoch": 0.06311135456608498, "grad_norm": 0.0, "learning_rate": 1.9942587510164657e-05, "loss": 1.2507, "step": 1613 }, { "epoch": 0.06315048125831442, "grad_norm": 0.0, "learning_rate": 1.9942451833429e-05, "loss": 1.2822, "step": 1614 }, { "epoch": 0.06318960795054386, "grad_norm": 0.0, "learning_rate": 1.9942315997030022e-05, "loss": 1.2184, "step": 1615 }, { "epoch": 0.0632287346427733, "grad_norm": 0.0, "learning_rate": 1.994218000096991e-05, "loss": 1.1006, "step": 1616 }, { "epoch": 0.06326786133500274, "grad_norm": 0.0, "learning_rate": 1.9942043845250845e-05, "loss": 1.2996, "step": 1617 }, { "epoch": 0.06330698802723218, "grad_norm": 0.0, "learning_rate": 1.994190752987501e-05, "loss": 1.3077, "step": 1618 }, { "epoch": 0.06334611471946161, "grad_norm": 0.0, "learning_rate": 1.9941771054844604e-05, "loss": 1.2872, "step": 1619 }, { "epoch": 0.06338524141169105, "grad_norm": 0.0, "learning_rate": 1.9941634420161812e-05, "loss": 1.3132, "step": 1620 }, { "epoch": 0.06342436810392049, "grad_norm": 0.0, "learning_rate": 1.9941497625828827e-05, "loss": 1.2499, "step": 1621 }, { "epoch": 0.06346349479614993, "grad_norm": 0.0, "learning_rate": 1.9941360671847847e-05, "loss": 1.2687, "step": 1622 }, { "epoch": 0.06350262148837937, "grad_norm": 0.0, "learning_rate": 1.9941223558221073e-05, "loss": 1.4062, "step": 1623 }, { "epoch": 0.06354174818060881, "grad_norm": 0.0, "learning_rate": 1.9941086284950706e-05, "loss": 1.2335, "step": 1624 }, { "epoch": 0.06358087487283826, "grad_norm": 0.0, "learning_rate": 1.994094885203895e-05, "loss": 1.2642, "step": 1625 }, { "epoch": 0.06362000156506768, "grad_norm": 0.0, "learning_rate": 1.9940811259488012e-05, "loss": 1.3231, "step": 1626 }, { "epoch": 0.06365912825729712, "grad_norm": 0.0, "learning_rate": 1.99406735073001e-05, "loss": 1.2703, "step": 1627 }, { "epoch": 0.06369825494952656, "grad_norm": 0.0, "learning_rate": 1.994053559547743e-05, "loss": 1.2773, "step": 1628 }, { "epoch": 0.063737381641756, "grad_norm": 0.0, "learning_rate": 1.9940397524022213e-05, "loss": 1.1769, "step": 1629 }, { "epoch": 0.06377650833398545, "grad_norm": 0.0, "learning_rate": 1.994025929293667e-05, "loss": 1.2841, "step": 1630 }, { "epoch": 0.06381563502621489, "grad_norm": 0.0, "learning_rate": 1.994012090222302e-05, "loss": 1.257, "step": 1631 }, { "epoch": 0.06385476171844433, "grad_norm": 0.0, "learning_rate": 1.993998235188348e-05, "loss": 1.3123, "step": 1632 }, { "epoch": 0.06389388841067375, "grad_norm": 0.0, "learning_rate": 1.993984364192028e-05, "loss": 1.3221, "step": 1633 }, { "epoch": 0.0639330151029032, "grad_norm": 0.0, "learning_rate": 1.9939704772335645e-05, "loss": 1.3371, "step": 1634 }, { "epoch": 0.06397214179513264, "grad_norm": 0.0, "learning_rate": 1.993956574313181e-05, "loss": 1.1579, "step": 1635 }, { "epoch": 0.06401126848736208, "grad_norm": 0.0, "learning_rate": 1.9939426554311e-05, "loss": 1.3828, "step": 1636 }, { "epoch": 0.06405039517959152, "grad_norm": 0.0, "learning_rate": 1.9939287205875456e-05, "loss": 1.2478, "step": 1637 }, { "epoch": 0.06408952187182096, "grad_norm": 0.0, "learning_rate": 1.9939147697827415e-05, "loss": 1.1887, "step": 1638 }, { "epoch": 0.0641286485640504, "grad_norm": 0.0, "learning_rate": 1.9939008030169117e-05, "loss": 1.3044, "step": 1639 }, { "epoch": 0.06416777525627984, "grad_norm": 0.0, "learning_rate": 1.99388682029028e-05, "loss": 1.3647, "step": 1640 }, { "epoch": 0.06420690194850927, "grad_norm": 0.0, "learning_rate": 1.993872821603072e-05, "loss": 1.3687, "step": 1641 }, { "epoch": 0.06424602864073871, "grad_norm": 0.0, "learning_rate": 1.9938588069555116e-05, "loss": 1.2043, "step": 1642 }, { "epoch": 0.06428515533296815, "grad_norm": 0.0, "learning_rate": 1.993844776347824e-05, "loss": 1.3668, "step": 1643 }, { "epoch": 0.06432428202519759, "grad_norm": 0.0, "learning_rate": 1.993830729780235e-05, "loss": 1.3565, "step": 1644 }, { "epoch": 0.06436340871742703, "grad_norm": 0.0, "learning_rate": 1.9938166672529695e-05, "loss": 1.4568, "step": 1645 }, { "epoch": 0.06440253540965647, "grad_norm": 0.0, "learning_rate": 1.993802588766254e-05, "loss": 1.2877, "step": 1646 }, { "epoch": 0.06444166210188591, "grad_norm": 0.0, "learning_rate": 1.9937884943203137e-05, "loss": 1.1927, "step": 1647 }, { "epoch": 0.06448078879411534, "grad_norm": 0.0, "learning_rate": 1.9937743839153757e-05, "loss": 1.3301, "step": 1648 }, { "epoch": 0.06451991548634478, "grad_norm": 0.0, "learning_rate": 1.9937602575516664e-05, "loss": 1.1594, "step": 1649 }, { "epoch": 0.06455904217857422, "grad_norm": 0.0, "learning_rate": 1.9937461152294125e-05, "loss": 1.2841, "step": 1650 }, { "epoch": 0.06459816887080366, "grad_norm": 0.0, "learning_rate": 1.9937319569488414e-05, "loss": 1.2, "step": 1651 }, { "epoch": 0.0646372955630331, "grad_norm": 0.0, "learning_rate": 1.99371778271018e-05, "loss": 1.1874, "step": 1652 }, { "epoch": 0.06467642225526254, "grad_norm": 0.0, "learning_rate": 1.9937035925136566e-05, "loss": 1.2169, "step": 1653 }, { "epoch": 0.06471554894749199, "grad_norm": 0.0, "learning_rate": 1.9936893863594986e-05, "loss": 1.2186, "step": 1654 }, { "epoch": 0.06475467563972141, "grad_norm": 0.0, "learning_rate": 1.993675164247934e-05, "loss": 1.3478, "step": 1655 }, { "epoch": 0.06479380233195085, "grad_norm": 0.0, "learning_rate": 1.9936609261791917e-05, "loss": 1.4156, "step": 1656 }, { "epoch": 0.0648329290241803, "grad_norm": 0.0, "learning_rate": 1.9936466721534996e-05, "loss": 1.3256, "step": 1657 }, { "epoch": 0.06487205571640974, "grad_norm": 0.0, "learning_rate": 1.9936324021710874e-05, "loss": 1.275, "step": 1658 }, { "epoch": 0.06491118240863918, "grad_norm": 0.0, "learning_rate": 1.9936181162321842e-05, "loss": 1.3887, "step": 1659 }, { "epoch": 0.06495030910086862, "grad_norm": 0.0, "learning_rate": 1.9936038143370187e-05, "loss": 1.2706, "step": 1660 }, { "epoch": 0.06498943579309806, "grad_norm": 0.0, "learning_rate": 1.9935894964858212e-05, "loss": 1.1506, "step": 1661 }, { "epoch": 0.06502856248532748, "grad_norm": 0.0, "learning_rate": 1.9935751626788212e-05, "loss": 1.1631, "step": 1662 }, { "epoch": 0.06506768917755693, "grad_norm": 0.0, "learning_rate": 1.993560812916249e-05, "loss": 1.324, "step": 1663 }, { "epoch": 0.06510681586978637, "grad_norm": 0.0, "learning_rate": 1.9935464471983354e-05, "loss": 1.1899, "step": 1664 }, { "epoch": 0.06514594256201581, "grad_norm": 0.0, "learning_rate": 1.9935320655253107e-05, "loss": 1.1978, "step": 1665 }, { "epoch": 0.06518506925424525, "grad_norm": 0.0, "learning_rate": 1.993517667897406e-05, "loss": 1.2755, "step": 1666 }, { "epoch": 0.06522419594647469, "grad_norm": 0.0, "learning_rate": 1.993503254314853e-05, "loss": 1.2968, "step": 1667 }, { "epoch": 0.06526332263870413, "grad_norm": 0.0, "learning_rate": 1.9934888247778823e-05, "loss": 1.2979, "step": 1668 }, { "epoch": 0.06530244933093356, "grad_norm": 0.0, "learning_rate": 1.993474379286726e-05, "loss": 1.2623, "step": 1669 }, { "epoch": 0.065341576023163, "grad_norm": 0.0, "learning_rate": 1.9934599178416158e-05, "loss": 1.3064, "step": 1670 }, { "epoch": 0.06538070271539244, "grad_norm": 0.0, "learning_rate": 1.9934454404427845e-05, "loss": 1.3279, "step": 1671 }, { "epoch": 0.06541982940762188, "grad_norm": 0.0, "learning_rate": 1.993430947090464e-05, "loss": 1.2786, "step": 1672 }, { "epoch": 0.06545895609985132, "grad_norm": 0.0, "learning_rate": 1.9934164377848873e-05, "loss": 1.343, "step": 1673 }, { "epoch": 0.06549808279208076, "grad_norm": 0.0, "learning_rate": 1.993401912526288e-05, "loss": 1.3094, "step": 1674 }, { "epoch": 0.0655372094843102, "grad_norm": 0.0, "learning_rate": 1.9933873713148983e-05, "loss": 1.2059, "step": 1675 }, { "epoch": 0.06557633617653963, "grad_norm": 0.0, "learning_rate": 1.9933728141509524e-05, "loss": 1.297, "step": 1676 }, { "epoch": 0.06561546286876907, "grad_norm": 0.0, "learning_rate": 1.993358241034684e-05, "loss": 1.1859, "step": 1677 }, { "epoch": 0.06565458956099851, "grad_norm": 0.0, "learning_rate": 1.9933436519663265e-05, "loss": 1.2485, "step": 1678 }, { "epoch": 0.06569371625322795, "grad_norm": 0.0, "learning_rate": 1.993329046946115e-05, "loss": 1.2874, "step": 1679 }, { "epoch": 0.06573284294545739, "grad_norm": 0.0, "learning_rate": 1.9933144259742837e-05, "loss": 1.2261, "step": 1680 }, { "epoch": 0.06577196963768683, "grad_norm": 0.0, "learning_rate": 1.9932997890510676e-05, "loss": 1.2794, "step": 1681 }, { "epoch": 0.06581109632991627, "grad_norm": 0.0, "learning_rate": 1.9932851361767012e-05, "loss": 1.1898, "step": 1682 }, { "epoch": 0.0658502230221457, "grad_norm": 0.0, "learning_rate": 1.9932704673514203e-05, "loss": 1.2879, "step": 1683 }, { "epoch": 0.06588934971437514, "grad_norm": 0.0, "learning_rate": 1.9932557825754604e-05, "loss": 1.376, "step": 1684 }, { "epoch": 0.06592847640660458, "grad_norm": 0.0, "learning_rate": 1.9932410818490573e-05, "loss": 1.3007, "step": 1685 }, { "epoch": 0.06596760309883402, "grad_norm": 0.0, "learning_rate": 1.9932263651724467e-05, "loss": 1.2952, "step": 1686 }, { "epoch": 0.06600672979106346, "grad_norm": 0.0, "learning_rate": 1.9932116325458656e-05, "loss": 1.2074, "step": 1687 }, { "epoch": 0.0660458564832929, "grad_norm": 0.0, "learning_rate": 1.99319688396955e-05, "loss": 1.2127, "step": 1688 }, { "epoch": 0.06608498317552235, "grad_norm": 0.0, "learning_rate": 1.9931821194437374e-05, "loss": 1.1739, "step": 1689 }, { "epoch": 0.06612410986775177, "grad_norm": 0.0, "learning_rate": 1.9931673389686642e-05, "loss": 1.097, "step": 1690 }, { "epoch": 0.06616323655998121, "grad_norm": 0.0, "learning_rate": 1.9931525425445678e-05, "loss": 1.1993, "step": 1691 }, { "epoch": 0.06620236325221066, "grad_norm": 0.0, "learning_rate": 1.9931377301716867e-05, "loss": 1.3179, "step": 1692 }, { "epoch": 0.0662414899444401, "grad_norm": 0.0, "learning_rate": 1.9931229018502577e-05, "loss": 1.295, "step": 1693 }, { "epoch": 0.06628061663666954, "grad_norm": 0.0, "learning_rate": 1.9931080575805193e-05, "loss": 1.2687, "step": 1694 }, { "epoch": 0.06631974332889898, "grad_norm": 0.0, "learning_rate": 1.9930931973627097e-05, "loss": 1.399, "step": 1695 }, { "epoch": 0.06635887002112842, "grad_norm": 0.0, "learning_rate": 1.9930783211970682e-05, "loss": 1.3044, "step": 1696 }, { "epoch": 0.06639799671335785, "grad_norm": 0.0, "learning_rate": 1.9930634290838332e-05, "loss": 1.2271, "step": 1697 }, { "epoch": 0.06643712340558729, "grad_norm": 0.0, "learning_rate": 1.993048521023244e-05, "loss": 1.3456, "step": 1698 }, { "epoch": 0.06647625009781673, "grad_norm": 0.0, "learning_rate": 1.99303359701554e-05, "loss": 1.1822, "step": 1699 }, { "epoch": 0.06651537679004617, "grad_norm": 0.0, "learning_rate": 1.9930186570609602e-05, "loss": 1.1696, "step": 1700 }, { "epoch": 0.06655450348227561, "grad_norm": 0.0, "learning_rate": 1.9930037011597455e-05, "loss": 1.3401, "step": 1701 }, { "epoch": 0.06659363017450505, "grad_norm": 0.0, "learning_rate": 1.9929887293121357e-05, "loss": 1.2614, "step": 1702 }, { "epoch": 0.06663275686673449, "grad_norm": 0.0, "learning_rate": 1.992973741518371e-05, "loss": 1.0578, "step": 1703 }, { "epoch": 0.06667188355896393, "grad_norm": 0.0, "learning_rate": 1.9929587377786924e-05, "loss": 1.3089, "step": 1704 }, { "epoch": 0.06671101025119336, "grad_norm": 0.0, "learning_rate": 1.9929437180933407e-05, "loss": 1.3494, "step": 1705 }, { "epoch": 0.0667501369434228, "grad_norm": 0.0, "learning_rate": 1.992928682462557e-05, "loss": 1.1106, "step": 1706 }, { "epoch": 0.06678926363565224, "grad_norm": 0.0, "learning_rate": 1.9929136308865828e-05, "loss": 1.2758, "step": 1707 }, { "epoch": 0.06682839032788168, "grad_norm": 0.0, "learning_rate": 1.9928985633656604e-05, "loss": 1.266, "step": 1708 }, { "epoch": 0.06686751702011112, "grad_norm": 0.0, "learning_rate": 1.992883479900031e-05, "loss": 1.2996, "step": 1709 }, { "epoch": 0.06690664371234056, "grad_norm": 0.0, "learning_rate": 1.9928683804899368e-05, "loss": 1.3372, "step": 1710 }, { "epoch": 0.06694577040457, "grad_norm": 0.0, "learning_rate": 1.9928532651356205e-05, "loss": 1.3061, "step": 1711 }, { "epoch": 0.06698489709679943, "grad_norm": 0.0, "learning_rate": 1.9928381338373252e-05, "loss": 1.222, "step": 1712 }, { "epoch": 0.06702402378902887, "grad_norm": 0.0, "learning_rate": 1.9928229865952935e-05, "loss": 1.3434, "step": 1713 }, { "epoch": 0.06706315048125831, "grad_norm": 0.0, "learning_rate": 1.9928078234097687e-05, "loss": 1.3143, "step": 1714 }, { "epoch": 0.06710227717348775, "grad_norm": 0.0, "learning_rate": 1.9927926442809943e-05, "loss": 1.2975, "step": 1715 }, { "epoch": 0.0671414038657172, "grad_norm": 0.0, "learning_rate": 1.9927774492092137e-05, "loss": 1.3589, "step": 1716 }, { "epoch": 0.06718053055794664, "grad_norm": 0.0, "learning_rate": 1.9927622381946718e-05, "loss": 1.4301, "step": 1717 }, { "epoch": 0.06721965725017608, "grad_norm": 0.0, "learning_rate": 1.9927470112376122e-05, "loss": 1.0717, "step": 1718 }, { "epoch": 0.0672587839424055, "grad_norm": 0.0, "learning_rate": 1.9927317683382795e-05, "loss": 1.2332, "step": 1719 }, { "epoch": 0.06729791063463494, "grad_norm": 0.0, "learning_rate": 1.9927165094969187e-05, "loss": 1.3154, "step": 1720 }, { "epoch": 0.06733703732686439, "grad_norm": 0.0, "learning_rate": 1.9927012347137748e-05, "loss": 1.2991, "step": 1721 }, { "epoch": 0.06737616401909383, "grad_norm": 0.0, "learning_rate": 1.9926859439890927e-05, "loss": 1.2992, "step": 1722 }, { "epoch": 0.06741529071132327, "grad_norm": 0.0, "learning_rate": 1.9926706373231184e-05, "loss": 1.1696, "step": 1723 }, { "epoch": 0.06745441740355271, "grad_norm": 0.0, "learning_rate": 1.9926553147160975e-05, "loss": 1.3211, "step": 1724 }, { "epoch": 0.06749354409578215, "grad_norm": 0.0, "learning_rate": 1.992639976168276e-05, "loss": 1.2443, "step": 1725 }, { "epoch": 0.06753267078801158, "grad_norm": 0.0, "learning_rate": 1.9926246216799003e-05, "loss": 1.1652, "step": 1726 }, { "epoch": 0.06757179748024102, "grad_norm": 0.0, "learning_rate": 1.9926092512512172e-05, "loss": 1.1624, "step": 1727 }, { "epoch": 0.06761092417247046, "grad_norm": 0.0, "learning_rate": 1.9925938648824733e-05, "loss": 1.2501, "step": 1728 }, { "epoch": 0.0676500508646999, "grad_norm": 0.0, "learning_rate": 1.9925784625739157e-05, "loss": 1.2884, "step": 1729 }, { "epoch": 0.06768917755692934, "grad_norm": 0.0, "learning_rate": 1.9925630443257918e-05, "loss": 1.1824, "step": 1730 }, { "epoch": 0.06772830424915878, "grad_norm": 0.0, "learning_rate": 1.9925476101383493e-05, "loss": 1.2595, "step": 1731 }, { "epoch": 0.06776743094138822, "grad_norm": 0.0, "learning_rate": 1.992532160011836e-05, "loss": 1.3229, "step": 1732 }, { "epoch": 0.06780655763361765, "grad_norm": 0.0, "learning_rate": 1.992516693946499e-05, "loss": 1.1683, "step": 1733 }, { "epoch": 0.06784568432584709, "grad_norm": 0.0, "learning_rate": 1.9925012119425885e-05, "loss": 1.4284, "step": 1734 }, { "epoch": 0.06788481101807653, "grad_norm": 0.0, "learning_rate": 1.9924857140003523e-05, "loss": 1.2672, "step": 1735 }, { "epoch": 0.06792393771030597, "grad_norm": 0.0, "learning_rate": 1.9924702001200386e-05, "loss": 1.2678, "step": 1736 }, { "epoch": 0.06796306440253541, "grad_norm": 0.0, "learning_rate": 1.9924546703018974e-05, "loss": 1.3033, "step": 1737 }, { "epoch": 0.06800219109476485, "grad_norm": 0.0, "learning_rate": 1.9924391245461777e-05, "loss": 1.2269, "step": 1738 }, { "epoch": 0.0680413177869943, "grad_norm": 0.0, "learning_rate": 1.9924235628531292e-05, "loss": 1.2873, "step": 1739 }, { "epoch": 0.06808044447922372, "grad_norm": 0.0, "learning_rate": 1.992407985223002e-05, "loss": 1.3174, "step": 1740 }, { "epoch": 0.06811957117145316, "grad_norm": 0.0, "learning_rate": 1.992392391656046e-05, "loss": 1.3036, "step": 1741 }, { "epoch": 0.0681586978636826, "grad_norm": 0.0, "learning_rate": 1.992376782152512e-05, "loss": 1.192, "step": 1742 }, { "epoch": 0.06819782455591204, "grad_norm": 0.0, "learning_rate": 1.9923611567126505e-05, "loss": 1.2625, "step": 1743 }, { "epoch": 0.06823695124814148, "grad_norm": 0.0, "learning_rate": 1.992345515336712e-05, "loss": 1.2366, "step": 1744 }, { "epoch": 0.06827607794037092, "grad_norm": 0.0, "learning_rate": 1.992329858024948e-05, "loss": 1.4681, "step": 1745 }, { "epoch": 0.06831520463260037, "grad_norm": 0.0, "learning_rate": 1.9923141847776098e-05, "loss": 1.3658, "step": 1746 }, { "epoch": 0.06835433132482979, "grad_norm": 0.0, "learning_rate": 1.9922984955949497e-05, "loss": 1.1591, "step": 1747 }, { "epoch": 0.06839345801705923, "grad_norm": 0.0, "learning_rate": 1.9922827904772187e-05, "loss": 1.2719, "step": 1748 }, { "epoch": 0.06843258470928867, "grad_norm": 0.0, "learning_rate": 1.99226706942467e-05, "loss": 1.2632, "step": 1749 }, { "epoch": 0.06847171140151811, "grad_norm": 0.0, "learning_rate": 1.992251332437555e-05, "loss": 1.2985, "step": 1750 }, { "epoch": 0.06851083809374756, "grad_norm": 0.0, "learning_rate": 1.992235579516127e-05, "loss": 1.2123, "step": 1751 }, { "epoch": 0.068549964785977, "grad_norm": 0.0, "learning_rate": 1.9922198106606393e-05, "loss": 1.3221, "step": 1752 }, { "epoch": 0.06858909147820644, "grad_norm": 0.0, "learning_rate": 1.9922040258713447e-05, "loss": 1.2205, "step": 1753 }, { "epoch": 0.06862821817043586, "grad_norm": 0.0, "learning_rate": 1.9921882251484967e-05, "loss": 1.2539, "step": 1754 }, { "epoch": 0.0686673448626653, "grad_norm": 0.0, "learning_rate": 1.992172408492349e-05, "loss": 1.2136, "step": 1755 }, { "epoch": 0.06870647155489475, "grad_norm": 0.0, "learning_rate": 1.9921565759031557e-05, "loss": 1.1508, "step": 1756 }, { "epoch": 0.06874559824712419, "grad_norm": 0.0, "learning_rate": 1.992140727381171e-05, "loss": 1.2848, "step": 1757 }, { "epoch": 0.06878472493935363, "grad_norm": 0.0, "learning_rate": 1.9921248629266495e-05, "loss": 1.3842, "step": 1758 }, { "epoch": 0.06882385163158307, "grad_norm": 0.0, "learning_rate": 1.992108982539846e-05, "loss": 1.262, "step": 1759 }, { "epoch": 0.06886297832381251, "grad_norm": 0.0, "learning_rate": 1.992093086221015e-05, "loss": 1.278, "step": 1760 }, { "epoch": 0.06890210501604195, "grad_norm": 0.0, "learning_rate": 1.9920771739704127e-05, "loss": 1.3209, "step": 1761 }, { "epoch": 0.06894123170827138, "grad_norm": 0.0, "learning_rate": 1.992061245788294e-05, "loss": 1.173, "step": 1762 }, { "epoch": 0.06898035840050082, "grad_norm": 0.0, "learning_rate": 1.9920453016749146e-05, "loss": 1.2296, "step": 1763 }, { "epoch": 0.06901948509273026, "grad_norm": 0.0, "learning_rate": 1.992029341630531e-05, "loss": 1.319, "step": 1764 }, { "epoch": 0.0690586117849597, "grad_norm": 0.0, "learning_rate": 1.992013365655399e-05, "loss": 1.1454, "step": 1765 }, { "epoch": 0.06909773847718914, "grad_norm": 0.0, "learning_rate": 1.991997373749776e-05, "loss": 1.1742, "step": 1766 }, { "epoch": 0.06913686516941858, "grad_norm": 0.0, "learning_rate": 1.9919813659139177e-05, "loss": 1.1975, "step": 1767 }, { "epoch": 0.06917599186164802, "grad_norm": 0.0, "learning_rate": 1.9919653421480816e-05, "loss": 1.3209, "step": 1768 }, { "epoch": 0.06921511855387745, "grad_norm": 0.0, "learning_rate": 1.9919493024525255e-05, "loss": 1.2196, "step": 1769 }, { "epoch": 0.06925424524610689, "grad_norm": 0.0, "learning_rate": 1.9919332468275062e-05, "loss": 1.2317, "step": 1770 }, { "epoch": 0.06929337193833633, "grad_norm": 0.0, "learning_rate": 1.991917175273282e-05, "loss": 1.3445, "step": 1771 }, { "epoch": 0.06933249863056577, "grad_norm": 0.0, "learning_rate": 1.991901087790111e-05, "loss": 1.3221, "step": 1772 }, { "epoch": 0.06937162532279521, "grad_norm": 0.0, "learning_rate": 1.9918849843782513e-05, "loss": 1.2411, "step": 1773 }, { "epoch": 0.06941075201502465, "grad_norm": 0.0, "learning_rate": 1.9918688650379622e-05, "loss": 1.3253, "step": 1774 }, { "epoch": 0.0694498787072541, "grad_norm": 0.0, "learning_rate": 1.9918527297695014e-05, "loss": 1.2332, "step": 1775 }, { "epoch": 0.06948900539948352, "grad_norm": 0.0, "learning_rate": 1.991836578573129e-05, "loss": 1.1119, "step": 1776 }, { "epoch": 0.06952813209171296, "grad_norm": 0.0, "learning_rate": 1.9918204114491034e-05, "loss": 1.2925, "step": 1777 }, { "epoch": 0.0695672587839424, "grad_norm": 0.0, "learning_rate": 1.9918042283976855e-05, "loss": 1.3717, "step": 1778 }, { "epoch": 0.06960638547617184, "grad_norm": 0.0, "learning_rate": 1.991788029419134e-05, "loss": 1.2773, "step": 1779 }, { "epoch": 0.06964551216840129, "grad_norm": 0.0, "learning_rate": 1.99177181451371e-05, "loss": 1.2186, "step": 1780 }, { "epoch": 0.06968463886063073, "grad_norm": 0.0, "learning_rate": 1.991755583681673e-05, "loss": 1.2682, "step": 1781 }, { "epoch": 0.06972376555286017, "grad_norm": 0.0, "learning_rate": 1.9917393369232843e-05, "loss": 1.3552, "step": 1782 }, { "epoch": 0.0697628922450896, "grad_norm": 0.0, "learning_rate": 1.9917230742388046e-05, "loss": 1.2062, "step": 1783 }, { "epoch": 0.06980201893731904, "grad_norm": 0.0, "learning_rate": 1.9917067956284947e-05, "loss": 1.3907, "step": 1784 }, { "epoch": 0.06984114562954848, "grad_norm": 0.0, "learning_rate": 1.9916905010926165e-05, "loss": 1.292, "step": 1785 }, { "epoch": 0.06988027232177792, "grad_norm": 0.0, "learning_rate": 1.9916741906314317e-05, "loss": 1.1696, "step": 1786 }, { "epoch": 0.06991939901400736, "grad_norm": 0.0, "learning_rate": 1.991657864245202e-05, "loss": 1.2238, "step": 1787 }, { "epoch": 0.0699585257062368, "grad_norm": 0.0, "learning_rate": 1.9916415219341895e-05, "loss": 1.2587, "step": 1788 }, { "epoch": 0.06999765239846624, "grad_norm": 0.0, "learning_rate": 1.9916251636986568e-05, "loss": 1.146, "step": 1789 }, { "epoch": 0.07003677909069567, "grad_norm": 0.0, "learning_rate": 1.9916087895388664e-05, "loss": 1.2631, "step": 1790 }, { "epoch": 0.07007590578292511, "grad_norm": 0.0, "learning_rate": 1.9915923994550816e-05, "loss": 1.2489, "step": 1791 }, { "epoch": 0.07011503247515455, "grad_norm": 0.0, "learning_rate": 1.9915759934475653e-05, "loss": 1.4487, "step": 1792 }, { "epoch": 0.07015415916738399, "grad_norm": 0.0, "learning_rate": 1.991559571516581e-05, "loss": 1.2693, "step": 1793 }, { "epoch": 0.07019328585961343, "grad_norm": 0.0, "learning_rate": 1.9915431336623928e-05, "loss": 1.2261, "step": 1794 }, { "epoch": 0.07023241255184287, "grad_norm": 0.0, "learning_rate": 1.9915266798852642e-05, "loss": 1.3287, "step": 1795 }, { "epoch": 0.07027153924407231, "grad_norm": 0.0, "learning_rate": 1.9915102101854594e-05, "loss": 1.2385, "step": 1796 }, { "epoch": 0.07031066593630174, "grad_norm": 0.0, "learning_rate": 1.9914937245632432e-05, "loss": 1.3311, "step": 1797 }, { "epoch": 0.07034979262853118, "grad_norm": 0.0, "learning_rate": 1.9914772230188797e-05, "loss": 1.1758, "step": 1798 }, { "epoch": 0.07038891932076062, "grad_norm": 0.0, "learning_rate": 1.991460705552635e-05, "loss": 1.2943, "step": 1799 }, { "epoch": 0.07042804601299006, "grad_norm": 0.0, "learning_rate": 1.9914441721647737e-05, "loss": 1.2666, "step": 1800 }, { "epoch": 0.0704671727052195, "grad_norm": 0.0, "learning_rate": 1.9914276228555613e-05, "loss": 1.374, "step": 1801 }, { "epoch": 0.07050629939744894, "grad_norm": 0.0, "learning_rate": 1.991411057625263e-05, "loss": 1.2898, "step": 1802 }, { "epoch": 0.07054542608967838, "grad_norm": 0.0, "learning_rate": 1.9913944764741463e-05, "loss": 1.3965, "step": 1803 }, { "epoch": 0.07058455278190781, "grad_norm": 0.0, "learning_rate": 1.9913778794024764e-05, "loss": 1.2716, "step": 1804 }, { "epoch": 0.07062367947413725, "grad_norm": 0.0, "learning_rate": 1.9913612664105196e-05, "loss": 1.1607, "step": 1805 }, { "epoch": 0.07066280616636669, "grad_norm": 0.0, "learning_rate": 1.9913446374985434e-05, "loss": 1.1209, "step": 1806 }, { "epoch": 0.07070193285859613, "grad_norm": 0.0, "learning_rate": 1.9913279926668146e-05, "loss": 1.2715, "step": 1807 }, { "epoch": 0.07074105955082557, "grad_norm": 0.0, "learning_rate": 1.9913113319156e-05, "loss": 1.2874, "step": 1808 }, { "epoch": 0.07078018624305502, "grad_norm": 0.0, "learning_rate": 1.9912946552451683e-05, "loss": 1.276, "step": 1809 }, { "epoch": 0.07081931293528446, "grad_norm": 0.0, "learning_rate": 1.991277962655786e-05, "loss": 1.4103, "step": 1810 }, { "epoch": 0.07085843962751388, "grad_norm": 0.0, "learning_rate": 1.9912612541477222e-05, "loss": 1.3237, "step": 1811 }, { "epoch": 0.07089756631974332, "grad_norm": 0.0, "learning_rate": 1.9912445297212442e-05, "loss": 1.2756, "step": 1812 }, { "epoch": 0.07093669301197277, "grad_norm": 0.0, "learning_rate": 1.9912277893766218e-05, "loss": 1.2252, "step": 1813 }, { "epoch": 0.0709758197042022, "grad_norm": 0.0, "learning_rate": 1.991211033114123e-05, "loss": 1.4095, "step": 1814 }, { "epoch": 0.07101494639643165, "grad_norm": 0.0, "learning_rate": 1.991194260934017e-05, "loss": 1.2072, "step": 1815 }, { "epoch": 0.07105407308866109, "grad_norm": 0.0, "learning_rate": 1.9911774728365732e-05, "loss": 1.3199, "step": 1816 }, { "epoch": 0.07109319978089053, "grad_norm": 0.0, "learning_rate": 1.991160668822061e-05, "loss": 1.3018, "step": 1817 }, { "epoch": 0.07113232647311996, "grad_norm": 0.0, "learning_rate": 1.9911438488907506e-05, "loss": 1.1051, "step": 1818 }, { "epoch": 0.0711714531653494, "grad_norm": 0.0, "learning_rate": 1.991127013042912e-05, "loss": 1.4069, "step": 1819 }, { "epoch": 0.07121057985757884, "grad_norm": 0.0, "learning_rate": 1.9911101612788157e-05, "loss": 1.2693, "step": 1820 }, { "epoch": 0.07124970654980828, "grad_norm": 0.0, "learning_rate": 1.991093293598732e-05, "loss": 1.3079, "step": 1821 }, { "epoch": 0.07128883324203772, "grad_norm": 0.0, "learning_rate": 1.9910764100029316e-05, "loss": 1.2044, "step": 1822 }, { "epoch": 0.07132795993426716, "grad_norm": 0.0, "learning_rate": 1.9910595104916864e-05, "loss": 1.2864, "step": 1823 }, { "epoch": 0.0713670866264966, "grad_norm": 0.0, "learning_rate": 1.991042595065267e-05, "loss": 1.275, "step": 1824 }, { "epoch": 0.07140621331872604, "grad_norm": 0.0, "learning_rate": 1.9910256637239455e-05, "loss": 1.1634, "step": 1825 }, { "epoch": 0.07144534001095547, "grad_norm": 0.0, "learning_rate": 1.9910087164679938e-05, "loss": 1.2861, "step": 1826 }, { "epoch": 0.07148446670318491, "grad_norm": 0.0, "learning_rate": 1.9909917532976838e-05, "loss": 1.2358, "step": 1827 }, { "epoch": 0.07152359339541435, "grad_norm": 0.0, "learning_rate": 1.990974774213288e-05, "loss": 1.1472, "step": 1828 }, { "epoch": 0.07156272008764379, "grad_norm": 0.0, "learning_rate": 1.990957779215079e-05, "loss": 1.2343, "step": 1829 }, { "epoch": 0.07160184677987323, "grad_norm": 0.0, "learning_rate": 1.9909407683033296e-05, "loss": 1.3584, "step": 1830 }, { "epoch": 0.07164097347210267, "grad_norm": 0.0, "learning_rate": 1.9909237414783137e-05, "loss": 1.2531, "step": 1831 }, { "epoch": 0.07168010016433211, "grad_norm": 0.0, "learning_rate": 1.990906698740304e-05, "loss": 1.3658, "step": 1832 }, { "epoch": 0.07171922685656154, "grad_norm": 0.0, "learning_rate": 1.9908896400895745e-05, "loss": 1.3233, "step": 1833 }, { "epoch": 0.07175835354879098, "grad_norm": 0.0, "learning_rate": 1.9908725655263986e-05, "loss": 1.221, "step": 1834 }, { "epoch": 0.07179748024102042, "grad_norm": 0.0, "learning_rate": 1.9908554750510513e-05, "loss": 1.1685, "step": 1835 }, { "epoch": 0.07183660693324986, "grad_norm": 0.0, "learning_rate": 1.9908383686638068e-05, "loss": 1.3413, "step": 1836 }, { "epoch": 0.0718757336254793, "grad_norm": 0.0, "learning_rate": 1.9908212463649396e-05, "loss": 1.2809, "step": 1837 }, { "epoch": 0.07191486031770875, "grad_norm": 0.0, "learning_rate": 1.990804108154725e-05, "loss": 1.4349, "step": 1838 }, { "epoch": 0.07195398700993819, "grad_norm": 0.0, "learning_rate": 1.9907869540334374e-05, "loss": 1.2101, "step": 1839 }, { "epoch": 0.07199311370216761, "grad_norm": 0.0, "learning_rate": 1.9907697840013532e-05, "loss": 1.3693, "step": 1840 }, { "epoch": 0.07203224039439705, "grad_norm": 0.0, "learning_rate": 1.9907525980587475e-05, "loss": 1.2379, "step": 1841 }, { "epoch": 0.0720713670866265, "grad_norm": 0.0, "learning_rate": 1.9907353962058968e-05, "loss": 1.1862, "step": 1842 }, { "epoch": 0.07211049377885594, "grad_norm": 0.0, "learning_rate": 1.9907181784430768e-05, "loss": 1.2073, "step": 1843 }, { "epoch": 0.07214962047108538, "grad_norm": 0.0, "learning_rate": 1.9907009447705646e-05, "loss": 1.2107, "step": 1844 }, { "epoch": 0.07218874716331482, "grad_norm": 0.0, "learning_rate": 1.9906836951886365e-05, "loss": 1.1639, "step": 1845 }, { "epoch": 0.07222787385554426, "grad_norm": 0.0, "learning_rate": 1.9906664296975696e-05, "loss": 1.1622, "step": 1846 }, { "epoch": 0.07226700054777369, "grad_norm": 0.0, "learning_rate": 1.9906491482976413e-05, "loss": 1.4441, "step": 1847 }, { "epoch": 0.07230612724000313, "grad_norm": 0.0, "learning_rate": 1.9906318509891292e-05, "loss": 1.1623, "step": 1848 }, { "epoch": 0.07234525393223257, "grad_norm": 0.0, "learning_rate": 1.9906145377723107e-05, "loss": 1.2059, "step": 1849 }, { "epoch": 0.07238438062446201, "grad_norm": 0.0, "learning_rate": 1.990597208647464e-05, "loss": 1.1954, "step": 1850 }, { "epoch": 0.07242350731669145, "grad_norm": 0.0, "learning_rate": 1.9905798636148675e-05, "loss": 1.2042, "step": 1851 }, { "epoch": 0.07246263400892089, "grad_norm": 0.0, "learning_rate": 1.9905625026748e-05, "loss": 1.2872, "step": 1852 }, { "epoch": 0.07250176070115033, "grad_norm": 0.0, "learning_rate": 1.9905451258275396e-05, "loss": 1.2136, "step": 1853 }, { "epoch": 0.07254088739337976, "grad_norm": 0.0, "learning_rate": 1.9905277330733655e-05, "loss": 1.3086, "step": 1854 }, { "epoch": 0.0725800140856092, "grad_norm": 0.0, "learning_rate": 1.9905103244125573e-05, "loss": 1.2618, "step": 1855 }, { "epoch": 0.07261914077783864, "grad_norm": 0.0, "learning_rate": 1.9904928998453947e-05, "loss": 1.1924, "step": 1856 }, { "epoch": 0.07265826747006808, "grad_norm": 0.0, "learning_rate": 1.9904754593721575e-05, "loss": 1.2982, "step": 1857 }, { "epoch": 0.07269739416229752, "grad_norm": 0.0, "learning_rate": 1.990458002993125e-05, "loss": 1.2673, "step": 1858 }, { "epoch": 0.07273652085452696, "grad_norm": 0.0, "learning_rate": 1.990440530708578e-05, "loss": 1.2605, "step": 1859 }, { "epoch": 0.0727756475467564, "grad_norm": 0.0, "learning_rate": 1.9904230425187978e-05, "loss": 1.2355, "step": 1860 }, { "epoch": 0.07281477423898583, "grad_norm": 0.0, "learning_rate": 1.9904055384240642e-05, "loss": 1.3147, "step": 1861 }, { "epoch": 0.07285390093121527, "grad_norm": 0.0, "learning_rate": 1.990388018424659e-05, "loss": 1.2831, "step": 1862 }, { "epoch": 0.07289302762344471, "grad_norm": 0.0, "learning_rate": 1.990370482520863e-05, "loss": 1.215, "step": 1863 }, { "epoch": 0.07293215431567415, "grad_norm": 0.0, "learning_rate": 1.9903529307129582e-05, "loss": 1.2944, "step": 1864 }, { "epoch": 0.0729712810079036, "grad_norm": 0.0, "learning_rate": 1.9903353630012262e-05, "loss": 1.3025, "step": 1865 }, { "epoch": 0.07301040770013303, "grad_norm": 0.0, "learning_rate": 1.9903177793859488e-05, "loss": 1.3029, "step": 1866 }, { "epoch": 0.07304953439236248, "grad_norm": 0.0, "learning_rate": 1.9903001798674097e-05, "loss": 1.2471, "step": 1867 }, { "epoch": 0.0730886610845919, "grad_norm": 0.0, "learning_rate": 1.99028256444589e-05, "loss": 1.2047, "step": 1868 }, { "epoch": 0.07312778777682134, "grad_norm": 0.0, "learning_rate": 1.9902649331216732e-05, "loss": 1.1525, "step": 1869 }, { "epoch": 0.07316691446905078, "grad_norm": 0.0, "learning_rate": 1.9902472858950428e-05, "loss": 1.2337, "step": 1870 }, { "epoch": 0.07320604116128022, "grad_norm": 0.0, "learning_rate": 1.9902296227662815e-05, "loss": 1.1521, "step": 1871 }, { "epoch": 0.07324516785350967, "grad_norm": 0.0, "learning_rate": 1.9902119437356737e-05, "loss": 1.2357, "step": 1872 }, { "epoch": 0.0732842945457391, "grad_norm": 0.0, "learning_rate": 1.9901942488035026e-05, "loss": 1.2852, "step": 1873 }, { "epoch": 0.07332342123796855, "grad_norm": 0.0, "learning_rate": 1.9901765379700527e-05, "loss": 1.2617, "step": 1874 }, { "epoch": 0.07336254793019797, "grad_norm": 0.0, "learning_rate": 1.9901588112356084e-05, "loss": 1.2689, "step": 1875 }, { "epoch": 0.07340167462242742, "grad_norm": 0.0, "learning_rate": 1.990141068600454e-05, "loss": 1.2642, "step": 1876 }, { "epoch": 0.07344080131465686, "grad_norm": 0.0, "learning_rate": 1.990123310064875e-05, "loss": 1.2461, "step": 1877 }, { "epoch": 0.0734799280068863, "grad_norm": 0.0, "learning_rate": 1.9901055356291567e-05, "loss": 1.2837, "step": 1878 }, { "epoch": 0.07351905469911574, "grad_norm": 0.0, "learning_rate": 1.9900877452935837e-05, "loss": 1.3474, "step": 1879 }, { "epoch": 0.07355818139134518, "grad_norm": 0.0, "learning_rate": 1.9900699390584424e-05, "loss": 1.2607, "step": 1880 }, { "epoch": 0.07359730808357462, "grad_norm": 0.0, "learning_rate": 1.9900521169240182e-05, "loss": 1.1183, "step": 1881 }, { "epoch": 0.07363643477580405, "grad_norm": 0.0, "learning_rate": 1.990034278890598e-05, "loss": 1.1786, "step": 1882 }, { "epoch": 0.07367556146803349, "grad_norm": 0.0, "learning_rate": 1.9900164249584676e-05, "loss": 1.2324, "step": 1883 }, { "epoch": 0.07371468816026293, "grad_norm": 0.0, "learning_rate": 1.989998555127914e-05, "loss": 1.2347, "step": 1884 }, { "epoch": 0.07375381485249237, "grad_norm": 0.0, "learning_rate": 1.9899806693992242e-05, "loss": 1.1398, "step": 1885 }, { "epoch": 0.07379294154472181, "grad_norm": 0.0, "learning_rate": 1.9899627677726855e-05, "loss": 1.2064, "step": 1886 }, { "epoch": 0.07383206823695125, "grad_norm": 0.0, "learning_rate": 1.989944850248585e-05, "loss": 1.2109, "step": 1887 }, { "epoch": 0.07387119492918069, "grad_norm": 0.0, "learning_rate": 1.9899269168272107e-05, "loss": 1.1867, "step": 1888 }, { "epoch": 0.07391032162141013, "grad_norm": 0.0, "learning_rate": 1.9899089675088505e-05, "loss": 1.2373, "step": 1889 }, { "epoch": 0.07394944831363956, "grad_norm": 0.0, "learning_rate": 1.989891002293793e-05, "loss": 1.2576, "step": 1890 }, { "epoch": 0.073988575005869, "grad_norm": 0.0, "learning_rate": 1.989873021182326e-05, "loss": 1.1708, "step": 1891 }, { "epoch": 0.07402770169809844, "grad_norm": 0.0, "learning_rate": 1.989855024174739e-05, "loss": 1.278, "step": 1892 }, { "epoch": 0.07406682839032788, "grad_norm": 0.0, "learning_rate": 1.9898370112713204e-05, "loss": 1.3256, "step": 1893 }, { "epoch": 0.07410595508255732, "grad_norm": 0.0, "learning_rate": 1.9898189824723602e-05, "loss": 1.3955, "step": 1894 }, { "epoch": 0.07414508177478676, "grad_norm": 0.0, "learning_rate": 1.989800937778147e-05, "loss": 1.1417, "step": 1895 }, { "epoch": 0.0741842084670162, "grad_norm": 0.0, "learning_rate": 1.9897828771889715e-05, "loss": 1.2388, "step": 1896 }, { "epoch": 0.07422333515924563, "grad_norm": 0.0, "learning_rate": 1.989764800705123e-05, "loss": 1.3481, "step": 1897 }, { "epoch": 0.07426246185147507, "grad_norm": 0.0, "learning_rate": 1.989746708326892e-05, "loss": 1.2322, "step": 1898 }, { "epoch": 0.07430158854370451, "grad_norm": 0.0, "learning_rate": 1.9897286000545688e-05, "loss": 1.1375, "step": 1899 }, { "epoch": 0.07434071523593395, "grad_norm": 0.0, "learning_rate": 1.9897104758884448e-05, "loss": 1.3627, "step": 1900 }, { "epoch": 0.0743798419281634, "grad_norm": 0.0, "learning_rate": 1.989692335828811e-05, "loss": 1.2697, "step": 1901 }, { "epoch": 0.07441896862039284, "grad_norm": 0.0, "learning_rate": 1.9896741798759578e-05, "loss": 1.3601, "step": 1902 }, { "epoch": 0.07445809531262228, "grad_norm": 0.0, "learning_rate": 1.9896560080301775e-05, "loss": 1.2968, "step": 1903 }, { "epoch": 0.0744972220048517, "grad_norm": 0.0, "learning_rate": 1.9896378202917623e-05, "loss": 1.4075, "step": 1904 }, { "epoch": 0.07453634869708115, "grad_norm": 0.0, "learning_rate": 1.9896196166610036e-05, "loss": 1.3802, "step": 1905 }, { "epoch": 0.07457547538931059, "grad_norm": 0.0, "learning_rate": 1.989601397138194e-05, "loss": 1.3283, "step": 1906 }, { "epoch": 0.07461460208154003, "grad_norm": 0.0, "learning_rate": 1.9895831617236258e-05, "loss": 1.1471, "step": 1907 }, { "epoch": 0.07465372877376947, "grad_norm": 0.0, "learning_rate": 1.9895649104175922e-05, "loss": 1.2631, "step": 1908 }, { "epoch": 0.07469285546599891, "grad_norm": 0.0, "learning_rate": 1.9895466432203857e-05, "loss": 1.3337, "step": 1909 }, { "epoch": 0.07473198215822835, "grad_norm": 0.0, "learning_rate": 1.9895283601323007e-05, "loss": 1.2415, "step": 1910 }, { "epoch": 0.07477110885045778, "grad_norm": 0.0, "learning_rate": 1.98951006115363e-05, "loss": 1.3805, "step": 1911 }, { "epoch": 0.07481023554268722, "grad_norm": 0.0, "learning_rate": 1.989491746284667e-05, "loss": 1.1025, "step": 1912 }, { "epoch": 0.07484936223491666, "grad_norm": 0.0, "learning_rate": 1.9894734155257074e-05, "loss": 1.2964, "step": 1913 }, { "epoch": 0.0748884889271461, "grad_norm": 0.0, "learning_rate": 1.9894550688770442e-05, "loss": 1.3315, "step": 1914 }, { "epoch": 0.07492761561937554, "grad_norm": 0.0, "learning_rate": 1.9894367063389727e-05, "loss": 1.2904, "step": 1915 }, { "epoch": 0.07496674231160498, "grad_norm": 0.0, "learning_rate": 1.989418327911787e-05, "loss": 1.2506, "step": 1916 }, { "epoch": 0.07500586900383442, "grad_norm": 0.0, "learning_rate": 1.989399933595783e-05, "loss": 1.2877, "step": 1917 }, { "epoch": 0.07504499569606385, "grad_norm": 0.0, "learning_rate": 1.989381523391256e-05, "loss": 1.2717, "step": 1918 }, { "epoch": 0.07508412238829329, "grad_norm": 0.0, "learning_rate": 1.9893630972985016e-05, "loss": 1.277, "step": 1919 }, { "epoch": 0.07512324908052273, "grad_norm": 0.0, "learning_rate": 1.9893446553178154e-05, "loss": 1.2408, "step": 1920 }, { "epoch": 0.07516237577275217, "grad_norm": 0.0, "learning_rate": 1.989326197449494e-05, "loss": 1.2579, "step": 1921 }, { "epoch": 0.07520150246498161, "grad_norm": 0.0, "learning_rate": 1.9893077236938332e-05, "loss": 1.2526, "step": 1922 }, { "epoch": 0.07524062915721105, "grad_norm": 0.0, "learning_rate": 1.98928923405113e-05, "loss": 1.1964, "step": 1923 }, { "epoch": 0.0752797558494405, "grad_norm": 0.0, "learning_rate": 1.9892707285216816e-05, "loss": 1.2639, "step": 1924 }, { "epoch": 0.07531888254166992, "grad_norm": 0.0, "learning_rate": 1.9892522071057848e-05, "loss": 1.2607, "step": 1925 }, { "epoch": 0.07535800923389936, "grad_norm": 0.0, "learning_rate": 1.9892336698037373e-05, "loss": 1.1143, "step": 1926 }, { "epoch": 0.0753971359261288, "grad_norm": 0.0, "learning_rate": 1.9892151166158366e-05, "loss": 1.222, "step": 1927 }, { "epoch": 0.07543626261835824, "grad_norm": 0.0, "learning_rate": 1.9891965475423808e-05, "loss": 1.3527, "step": 1928 }, { "epoch": 0.07547538931058768, "grad_norm": 0.0, "learning_rate": 1.9891779625836677e-05, "loss": 1.2106, "step": 1929 }, { "epoch": 0.07551451600281713, "grad_norm": 0.0, "learning_rate": 1.9891593617399962e-05, "loss": 1.3357, "step": 1930 }, { "epoch": 0.07555364269504657, "grad_norm": 0.0, "learning_rate": 1.989140745011665e-05, "loss": 1.3646, "step": 1931 }, { "epoch": 0.075592769387276, "grad_norm": 0.0, "learning_rate": 1.9891221123989727e-05, "loss": 1.4304, "step": 1932 }, { "epoch": 0.07563189607950543, "grad_norm": 0.0, "learning_rate": 1.9891034639022184e-05, "loss": 1.2086, "step": 1933 }, { "epoch": 0.07567102277173487, "grad_norm": 0.0, "learning_rate": 1.9890847995217022e-05, "loss": 1.2603, "step": 1934 }, { "epoch": 0.07571014946396432, "grad_norm": 0.0, "learning_rate": 1.9890661192577236e-05, "loss": 1.2716, "step": 1935 }, { "epoch": 0.07574927615619376, "grad_norm": 0.0, "learning_rate": 1.9890474231105822e-05, "loss": 1.205, "step": 1936 }, { "epoch": 0.0757884028484232, "grad_norm": 0.0, "learning_rate": 1.9890287110805787e-05, "loss": 1.3789, "step": 1937 }, { "epoch": 0.07582752954065264, "grad_norm": 0.0, "learning_rate": 1.9890099831680135e-05, "loss": 1.3787, "step": 1938 }, { "epoch": 0.07586665623288207, "grad_norm": 0.0, "learning_rate": 1.988991239373187e-05, "loss": 1.3361, "step": 1939 }, { "epoch": 0.0759057829251115, "grad_norm": 0.0, "learning_rate": 1.988972479696401e-05, "loss": 1.2819, "step": 1940 }, { "epoch": 0.07594490961734095, "grad_norm": 0.0, "learning_rate": 1.988953704137956e-05, "loss": 1.1558, "step": 1941 }, { "epoch": 0.07598403630957039, "grad_norm": 0.0, "learning_rate": 1.9889349126981535e-05, "loss": 1.2296, "step": 1942 }, { "epoch": 0.07602316300179983, "grad_norm": 0.0, "learning_rate": 1.9889161053772958e-05, "loss": 1.2879, "step": 1943 }, { "epoch": 0.07606228969402927, "grad_norm": 0.0, "learning_rate": 1.9888972821756846e-05, "loss": 1.2853, "step": 1944 }, { "epoch": 0.07610141638625871, "grad_norm": 0.0, "learning_rate": 1.988878443093622e-05, "loss": 1.3057, "step": 1945 }, { "epoch": 0.07614054307848815, "grad_norm": 0.0, "learning_rate": 1.9888595881314106e-05, "loss": 1.2603, "step": 1946 }, { "epoch": 0.07617966977071758, "grad_norm": 0.0, "learning_rate": 1.9888407172893536e-05, "loss": 1.3647, "step": 1947 }, { "epoch": 0.07621879646294702, "grad_norm": 0.0, "learning_rate": 1.9888218305677534e-05, "loss": 1.1335, "step": 1948 }, { "epoch": 0.07625792315517646, "grad_norm": 0.0, "learning_rate": 1.9888029279669143e-05, "loss": 1.113, "step": 1949 }, { "epoch": 0.0762970498474059, "grad_norm": 0.0, "learning_rate": 1.988784009487139e-05, "loss": 1.3087, "step": 1950 }, { "epoch": 0.07633617653963534, "grad_norm": 0.0, "learning_rate": 1.9887650751287314e-05, "loss": 1.1516, "step": 1951 }, { "epoch": 0.07637530323186478, "grad_norm": 0.0, "learning_rate": 1.9887461248919956e-05, "loss": 1.1907, "step": 1952 }, { "epoch": 0.07641442992409422, "grad_norm": 0.0, "learning_rate": 1.9887271587772363e-05, "loss": 1.24, "step": 1953 }, { "epoch": 0.07645355661632365, "grad_norm": 0.0, "learning_rate": 1.9887081767847577e-05, "loss": 1.2995, "step": 1954 }, { "epoch": 0.07649268330855309, "grad_norm": 0.0, "learning_rate": 1.9886891789148643e-05, "loss": 1.1891, "step": 1955 }, { "epoch": 0.07653181000078253, "grad_norm": 0.0, "learning_rate": 1.9886701651678618e-05, "loss": 1.2789, "step": 1956 }, { "epoch": 0.07657093669301197, "grad_norm": 0.0, "learning_rate": 1.9886511355440558e-05, "loss": 1.2745, "step": 1957 }, { "epoch": 0.07661006338524141, "grad_norm": 0.0, "learning_rate": 1.9886320900437508e-05, "loss": 1.1973, "step": 1958 }, { "epoch": 0.07664919007747086, "grad_norm": 0.0, "learning_rate": 1.9886130286672532e-05, "loss": 1.1722, "step": 1959 }, { "epoch": 0.0766883167697003, "grad_norm": 0.0, "learning_rate": 1.9885939514148696e-05, "loss": 1.2322, "step": 1960 }, { "epoch": 0.07672744346192972, "grad_norm": 0.0, "learning_rate": 1.9885748582869056e-05, "loss": 1.307, "step": 1961 }, { "epoch": 0.07676657015415916, "grad_norm": 0.0, "learning_rate": 1.9885557492836685e-05, "loss": 1.309, "step": 1962 }, { "epoch": 0.0768056968463886, "grad_norm": 0.0, "learning_rate": 1.9885366244054646e-05, "loss": 1.2284, "step": 1963 }, { "epoch": 0.07684482353861805, "grad_norm": 0.0, "learning_rate": 1.988517483652601e-05, "loss": 1.3523, "step": 1964 }, { "epoch": 0.07688395023084749, "grad_norm": 0.0, "learning_rate": 1.9884983270253855e-05, "loss": 1.3691, "step": 1965 }, { "epoch": 0.07692307692307693, "grad_norm": 0.0, "learning_rate": 1.9884791545241256e-05, "loss": 1.2338, "step": 1966 }, { "epoch": 0.07696220361530637, "grad_norm": 0.0, "learning_rate": 1.988459966149129e-05, "loss": 1.2858, "step": 1967 }, { "epoch": 0.0770013303075358, "grad_norm": 0.0, "learning_rate": 1.988440761900704e-05, "loss": 1.1157, "step": 1968 }, { "epoch": 0.07704045699976524, "grad_norm": 0.0, "learning_rate": 1.9884215417791587e-05, "loss": 1.1881, "step": 1969 }, { "epoch": 0.07707958369199468, "grad_norm": 0.0, "learning_rate": 1.9884023057848025e-05, "loss": 1.2648, "step": 1970 }, { "epoch": 0.07711871038422412, "grad_norm": 0.0, "learning_rate": 1.9883830539179435e-05, "loss": 1.2596, "step": 1971 }, { "epoch": 0.07715783707645356, "grad_norm": 0.0, "learning_rate": 1.988363786178891e-05, "loss": 1.2316, "step": 1972 }, { "epoch": 0.077196963768683, "grad_norm": 0.0, "learning_rate": 1.9883445025679552e-05, "loss": 1.1535, "step": 1973 }, { "epoch": 0.07723609046091244, "grad_norm": 0.0, "learning_rate": 1.9883252030854444e-05, "loss": 1.2886, "step": 1974 }, { "epoch": 0.07727521715314187, "grad_norm": 0.0, "learning_rate": 1.9883058877316697e-05, "loss": 1.3214, "step": 1975 }, { "epoch": 0.07731434384537131, "grad_norm": 0.0, "learning_rate": 1.9882865565069408e-05, "loss": 1.2138, "step": 1976 }, { "epoch": 0.07735347053760075, "grad_norm": 0.0, "learning_rate": 1.9882672094115683e-05, "loss": 1.2271, "step": 1977 }, { "epoch": 0.07739259722983019, "grad_norm": 0.0, "learning_rate": 1.9882478464458622e-05, "loss": 1.1528, "step": 1978 }, { "epoch": 0.07743172392205963, "grad_norm": 0.0, "learning_rate": 1.9882284676101347e-05, "loss": 1.2858, "step": 1979 }, { "epoch": 0.07747085061428907, "grad_norm": 0.0, "learning_rate": 1.988209072904696e-05, "loss": 1.2916, "step": 1980 }, { "epoch": 0.07750997730651851, "grad_norm": 0.0, "learning_rate": 1.9881896623298582e-05, "loss": 1.1839, "step": 1981 }, { "epoch": 0.07754910399874794, "grad_norm": 0.0, "learning_rate": 1.9881702358859323e-05, "loss": 1.1323, "step": 1982 }, { "epoch": 0.07758823069097738, "grad_norm": 0.0, "learning_rate": 1.9881507935732308e-05, "loss": 1.2905, "step": 1983 }, { "epoch": 0.07762735738320682, "grad_norm": 0.0, "learning_rate": 1.988131335392066e-05, "loss": 1.1903, "step": 1984 }, { "epoch": 0.07766648407543626, "grad_norm": 0.0, "learning_rate": 1.9881118613427502e-05, "loss": 1.2212, "step": 1985 }, { "epoch": 0.0777056107676657, "grad_norm": 0.0, "learning_rate": 1.9880923714255956e-05, "loss": 1.1963, "step": 1986 }, { "epoch": 0.07774473745989514, "grad_norm": 0.0, "learning_rate": 1.988072865640916e-05, "loss": 1.2706, "step": 1987 }, { "epoch": 0.07778386415212458, "grad_norm": 0.0, "learning_rate": 1.9880533439890245e-05, "loss": 1.307, "step": 1988 }, { "epoch": 0.07782299084435401, "grad_norm": 0.0, "learning_rate": 1.9880338064702337e-05, "loss": 1.2079, "step": 1989 }, { "epoch": 0.07786211753658345, "grad_norm": 0.0, "learning_rate": 1.9880142530848587e-05, "loss": 1.3634, "step": 1990 }, { "epoch": 0.0779012442288129, "grad_norm": 0.0, "learning_rate": 1.9879946838332125e-05, "loss": 1.3547, "step": 1991 }, { "epoch": 0.07794037092104233, "grad_norm": 0.0, "learning_rate": 1.9879750987156095e-05, "loss": 1.2313, "step": 1992 }, { "epoch": 0.07797949761327178, "grad_norm": 0.0, "learning_rate": 1.9879554977323653e-05, "loss": 1.2162, "step": 1993 }, { "epoch": 0.07801862430550122, "grad_norm": 0.0, "learning_rate": 1.987935880883793e-05, "loss": 1.1378, "step": 1994 }, { "epoch": 0.07805775099773066, "grad_norm": 0.0, "learning_rate": 1.9879162481702084e-05, "loss": 1.1661, "step": 1995 }, { "epoch": 0.07809687768996008, "grad_norm": 0.0, "learning_rate": 1.987896599591927e-05, "loss": 1.191, "step": 1996 }, { "epoch": 0.07813600438218953, "grad_norm": 0.0, "learning_rate": 1.9878769351492644e-05, "loss": 1.325, "step": 1997 }, { "epoch": 0.07817513107441897, "grad_norm": 0.0, "learning_rate": 1.9878572548425356e-05, "loss": 1.2504, "step": 1998 }, { "epoch": 0.0782142577666484, "grad_norm": 0.0, "learning_rate": 1.9878375586720574e-05, "loss": 1.1767, "step": 1999 }, { "epoch": 0.07825338445887785, "grad_norm": 0.0, "learning_rate": 1.987817846638146e-05, "loss": 1.2444, "step": 2000 }, { "epoch": 0.07829251115110729, "grad_norm": 0.0, "learning_rate": 1.9877981187411176e-05, "loss": 1.3528, "step": 2001 }, { "epoch": 0.07833163784333673, "grad_norm": 0.0, "learning_rate": 1.9877783749812892e-05, "loss": 1.2441, "step": 2002 }, { "epoch": 0.07837076453556616, "grad_norm": 0.0, "learning_rate": 1.9877586153589776e-05, "loss": 1.332, "step": 2003 }, { "epoch": 0.0784098912277956, "grad_norm": 0.0, "learning_rate": 1.9877388398745006e-05, "loss": 1.3743, "step": 2004 }, { "epoch": 0.07844901792002504, "grad_norm": 0.0, "learning_rate": 1.9877190485281756e-05, "loss": 1.2224, "step": 2005 }, { "epoch": 0.07848814461225448, "grad_norm": 0.0, "learning_rate": 1.9876992413203205e-05, "loss": 1.2276, "step": 2006 }, { "epoch": 0.07852727130448392, "grad_norm": 0.0, "learning_rate": 1.987679418251253e-05, "loss": 1.3881, "step": 2007 }, { "epoch": 0.07856639799671336, "grad_norm": 0.0, "learning_rate": 1.9876595793212916e-05, "loss": 1.235, "step": 2008 }, { "epoch": 0.0786055246889428, "grad_norm": 0.0, "learning_rate": 1.9876397245307552e-05, "loss": 1.3197, "step": 2009 }, { "epoch": 0.07864465138117224, "grad_norm": 0.0, "learning_rate": 1.9876198538799622e-05, "loss": 1.2609, "step": 2010 }, { "epoch": 0.07868377807340167, "grad_norm": 0.0, "learning_rate": 1.9875999673692318e-05, "loss": 1.2108, "step": 2011 }, { "epoch": 0.07872290476563111, "grad_norm": 0.0, "learning_rate": 1.9875800649988835e-05, "loss": 1.1584, "step": 2012 }, { "epoch": 0.07876203145786055, "grad_norm": 0.0, "learning_rate": 1.9875601467692372e-05, "loss": 1.1816, "step": 2013 }, { "epoch": 0.07880115815008999, "grad_norm": 0.0, "learning_rate": 1.9875402126806126e-05, "loss": 1.1968, "step": 2014 }, { "epoch": 0.07884028484231943, "grad_norm": 0.0, "learning_rate": 1.987520262733329e-05, "loss": 1.2242, "step": 2015 }, { "epoch": 0.07887941153454887, "grad_norm": 0.0, "learning_rate": 1.987500296927708e-05, "loss": 1.2639, "step": 2016 }, { "epoch": 0.07891853822677831, "grad_norm": 0.0, "learning_rate": 1.987480315264069e-05, "loss": 1.1299, "step": 2017 }, { "epoch": 0.07895766491900774, "grad_norm": 0.0, "learning_rate": 1.9874603177427337e-05, "loss": 1.2459, "step": 2018 }, { "epoch": 0.07899679161123718, "grad_norm": 0.0, "learning_rate": 1.9874403043640234e-05, "loss": 1.3464, "step": 2019 }, { "epoch": 0.07903591830346662, "grad_norm": 0.0, "learning_rate": 1.9874202751282587e-05, "loss": 1.3138, "step": 2020 }, { "epoch": 0.07907504499569606, "grad_norm": 0.0, "learning_rate": 1.987400230035762e-05, "loss": 1.3686, "step": 2021 }, { "epoch": 0.0791141716879255, "grad_norm": 0.0, "learning_rate": 1.9873801690868548e-05, "loss": 1.1448, "step": 2022 }, { "epoch": 0.07915329838015495, "grad_norm": 0.0, "learning_rate": 1.9873600922818593e-05, "loss": 1.2674, "step": 2023 }, { "epoch": 0.07919242507238439, "grad_norm": 0.0, "learning_rate": 1.9873399996210983e-05, "loss": 1.3005, "step": 2024 }, { "epoch": 0.07923155176461381, "grad_norm": 0.0, "learning_rate": 1.987319891104894e-05, "loss": 1.4323, "step": 2025 }, { "epoch": 0.07927067845684325, "grad_norm": 0.0, "learning_rate": 1.987299766733569e-05, "loss": 1.3154, "step": 2026 }, { "epoch": 0.0793098051490727, "grad_norm": 0.0, "learning_rate": 1.987279626507447e-05, "loss": 1.2755, "step": 2027 }, { "epoch": 0.07934893184130214, "grad_norm": 0.0, "learning_rate": 1.9872594704268516e-05, "loss": 1.2774, "step": 2028 }, { "epoch": 0.07938805853353158, "grad_norm": 0.0, "learning_rate": 1.987239298492106e-05, "loss": 1.3884, "step": 2029 }, { "epoch": 0.07942718522576102, "grad_norm": 0.0, "learning_rate": 1.9872191107035347e-05, "loss": 1.2461, "step": 2030 }, { "epoch": 0.07946631191799046, "grad_norm": 0.0, "learning_rate": 1.987198907061461e-05, "loss": 1.2164, "step": 2031 }, { "epoch": 0.07950543861021989, "grad_norm": 0.0, "learning_rate": 1.9871786875662102e-05, "loss": 1.2709, "step": 2032 }, { "epoch": 0.07954456530244933, "grad_norm": 0.0, "learning_rate": 1.9871584522181068e-05, "loss": 1.1746, "step": 2033 }, { "epoch": 0.07958369199467877, "grad_norm": 0.0, "learning_rate": 1.9871382010174755e-05, "loss": 1.3473, "step": 2034 }, { "epoch": 0.07962281868690821, "grad_norm": 0.0, "learning_rate": 1.9871179339646415e-05, "loss": 1.2283, "step": 2035 }, { "epoch": 0.07966194537913765, "grad_norm": 0.0, "learning_rate": 1.9870976510599305e-05, "loss": 1.2024, "step": 2036 }, { "epoch": 0.07970107207136709, "grad_norm": 0.0, "learning_rate": 1.987077352303668e-05, "loss": 1.2036, "step": 2037 }, { "epoch": 0.07974019876359653, "grad_norm": 0.0, "learning_rate": 1.9870570376961805e-05, "loss": 1.3672, "step": 2038 }, { "epoch": 0.07977932545582596, "grad_norm": 0.0, "learning_rate": 1.9870367072377937e-05, "loss": 1.2617, "step": 2039 }, { "epoch": 0.0798184521480554, "grad_norm": 0.0, "learning_rate": 1.9870163609288336e-05, "loss": 1.3557, "step": 2040 }, { "epoch": 0.07985757884028484, "grad_norm": 0.0, "learning_rate": 1.9869959987696282e-05, "loss": 1.2239, "step": 2041 }, { "epoch": 0.07989670553251428, "grad_norm": 0.0, "learning_rate": 1.9869756207605038e-05, "loss": 1.1373, "step": 2042 }, { "epoch": 0.07993583222474372, "grad_norm": 0.0, "learning_rate": 1.9869552269017876e-05, "loss": 1.1147, "step": 2043 }, { "epoch": 0.07997495891697316, "grad_norm": 0.0, "learning_rate": 1.9869348171938074e-05, "loss": 1.2496, "step": 2044 }, { "epoch": 0.0800140856092026, "grad_norm": 0.0, "learning_rate": 1.9869143916368903e-05, "loss": 1.1543, "step": 2045 }, { "epoch": 0.08005321230143203, "grad_norm": 0.0, "learning_rate": 1.9868939502313647e-05, "loss": 1.2766, "step": 2046 }, { "epoch": 0.08009233899366147, "grad_norm": 0.0, "learning_rate": 1.986873492977559e-05, "loss": 1.356, "step": 2047 }, { "epoch": 0.08013146568589091, "grad_norm": 0.0, "learning_rate": 1.986853019875802e-05, "loss": 1.2643, "step": 2048 }, { "epoch": 0.08017059237812035, "grad_norm": 0.0, "learning_rate": 1.9868325309264217e-05, "loss": 1.3496, "step": 2049 }, { "epoch": 0.0802097190703498, "grad_norm": 0.0, "learning_rate": 1.9868120261297476e-05, "loss": 1.2707, "step": 2050 }, { "epoch": 0.08024884576257924, "grad_norm": 0.0, "learning_rate": 1.986791505486109e-05, "loss": 1.1767, "step": 2051 }, { "epoch": 0.08028797245480868, "grad_norm": 0.0, "learning_rate": 1.9867709689958352e-05, "loss": 1.4019, "step": 2052 }, { "epoch": 0.0803270991470381, "grad_norm": 0.0, "learning_rate": 1.9867504166592563e-05, "loss": 1.2551, "step": 2053 }, { "epoch": 0.08036622583926754, "grad_norm": 0.0, "learning_rate": 1.9867298484767022e-05, "loss": 1.244, "step": 2054 }, { "epoch": 0.08040535253149698, "grad_norm": 0.0, "learning_rate": 1.986709264448503e-05, "loss": 1.1669, "step": 2055 }, { "epoch": 0.08044447922372643, "grad_norm": 0.0, "learning_rate": 1.9866886645749895e-05, "loss": 1.3257, "step": 2056 }, { "epoch": 0.08048360591595587, "grad_norm": 0.0, "learning_rate": 1.9866680488564927e-05, "loss": 1.2372, "step": 2057 }, { "epoch": 0.08052273260818531, "grad_norm": 0.0, "learning_rate": 1.986647417293343e-05, "loss": 1.3104, "step": 2058 }, { "epoch": 0.08056185930041475, "grad_norm": 0.0, "learning_rate": 1.9866267698858722e-05, "loss": 1.4178, "step": 2059 }, { "epoch": 0.08060098599264418, "grad_norm": 0.0, "learning_rate": 1.986606106634412e-05, "loss": 1.203, "step": 2060 }, { "epoch": 0.08064011268487362, "grad_norm": 0.0, "learning_rate": 1.986585427539294e-05, "loss": 1.4532, "step": 2061 }, { "epoch": 0.08067923937710306, "grad_norm": 0.0, "learning_rate": 1.98656473260085e-05, "loss": 1.3433, "step": 2062 }, { "epoch": 0.0807183660693325, "grad_norm": 0.0, "learning_rate": 1.9865440218194126e-05, "loss": 1.2357, "step": 2063 }, { "epoch": 0.08075749276156194, "grad_norm": 0.0, "learning_rate": 1.9865232951953145e-05, "loss": 1.2809, "step": 2064 }, { "epoch": 0.08079661945379138, "grad_norm": 0.0, "learning_rate": 1.9865025527288888e-05, "loss": 1.1917, "step": 2065 }, { "epoch": 0.08083574614602082, "grad_norm": 0.0, "learning_rate": 1.9864817944204682e-05, "loss": 1.1599, "step": 2066 }, { "epoch": 0.08087487283825025, "grad_norm": 0.0, "learning_rate": 1.9864610202703858e-05, "loss": 1.2741, "step": 2067 }, { "epoch": 0.08091399953047969, "grad_norm": 0.0, "learning_rate": 1.9864402302789757e-05, "loss": 1.1345, "step": 2068 }, { "epoch": 0.08095312622270913, "grad_norm": 0.0, "learning_rate": 1.9864194244465715e-05, "loss": 1.2741, "step": 2069 }, { "epoch": 0.08099225291493857, "grad_norm": 0.0, "learning_rate": 1.9863986027735077e-05, "loss": 1.3484, "step": 2070 }, { "epoch": 0.08103137960716801, "grad_norm": 0.0, "learning_rate": 1.9863777652601178e-05, "loss": 1.2541, "step": 2071 }, { "epoch": 0.08107050629939745, "grad_norm": 0.0, "learning_rate": 1.9863569119067373e-05, "loss": 1.2376, "step": 2072 }, { "epoch": 0.08110963299162689, "grad_norm": 0.0, "learning_rate": 1.986336042713701e-05, "loss": 1.2126, "step": 2073 }, { "epoch": 0.08114875968385633, "grad_norm": 0.0, "learning_rate": 1.9863151576813434e-05, "loss": 1.3079, "step": 2074 }, { "epoch": 0.08118788637608576, "grad_norm": 0.0, "learning_rate": 1.9862942568100004e-05, "loss": 1.1422, "step": 2075 }, { "epoch": 0.0812270130683152, "grad_norm": 0.0, "learning_rate": 1.9862733401000077e-05, "loss": 1.2272, "step": 2076 }, { "epoch": 0.08126613976054464, "grad_norm": 0.0, "learning_rate": 1.986252407551701e-05, "loss": 1.2081, "step": 2077 }, { "epoch": 0.08130526645277408, "grad_norm": 0.0, "learning_rate": 1.9862314591654163e-05, "loss": 1.1804, "step": 2078 }, { "epoch": 0.08134439314500352, "grad_norm": 0.0, "learning_rate": 1.9862104949414902e-05, "loss": 1.1267, "step": 2079 }, { "epoch": 0.08138351983723296, "grad_norm": 0.0, "learning_rate": 1.9861895148802594e-05, "loss": 1.2889, "step": 2080 }, { "epoch": 0.0814226465294624, "grad_norm": 0.0, "learning_rate": 1.9861685189820608e-05, "loss": 1.3091, "step": 2081 }, { "epoch": 0.08146177322169183, "grad_norm": 0.0, "learning_rate": 1.9861475072472313e-05, "loss": 1.3091, "step": 2082 }, { "epoch": 0.08150089991392127, "grad_norm": 0.0, "learning_rate": 1.986126479676109e-05, "loss": 1.2316, "step": 2083 }, { "epoch": 0.08154002660615071, "grad_norm": 0.0, "learning_rate": 1.986105436269031e-05, "loss": 1.1272, "step": 2084 }, { "epoch": 0.08157915329838016, "grad_norm": 0.0, "learning_rate": 1.986084377026335e-05, "loss": 1.1979, "step": 2085 }, { "epoch": 0.0816182799906096, "grad_norm": 0.0, "learning_rate": 1.98606330194836e-05, "loss": 1.2325, "step": 2086 }, { "epoch": 0.08165740668283904, "grad_norm": 0.0, "learning_rate": 1.9860422110354435e-05, "loss": 1.3016, "step": 2087 }, { "epoch": 0.08169653337506848, "grad_norm": 0.0, "learning_rate": 1.986021104287925e-05, "loss": 1.2198, "step": 2088 }, { "epoch": 0.0817356600672979, "grad_norm": 0.0, "learning_rate": 1.985999981706143e-05, "loss": 1.1501, "step": 2089 }, { "epoch": 0.08177478675952735, "grad_norm": 0.0, "learning_rate": 1.985978843290437e-05, "loss": 1.1739, "step": 2090 }, { "epoch": 0.08181391345175679, "grad_norm": 0.0, "learning_rate": 1.985957689041146e-05, "loss": 1.2869, "step": 2091 }, { "epoch": 0.08185304014398623, "grad_norm": 0.0, "learning_rate": 1.9859365189586102e-05, "loss": 1.3025, "step": 2092 }, { "epoch": 0.08189216683621567, "grad_norm": 0.0, "learning_rate": 1.9859153330431692e-05, "loss": 1.3046, "step": 2093 }, { "epoch": 0.08193129352844511, "grad_norm": 0.0, "learning_rate": 1.9858941312951633e-05, "loss": 1.2119, "step": 2094 }, { "epoch": 0.08197042022067455, "grad_norm": 0.0, "learning_rate": 1.985872913714933e-05, "loss": 1.3358, "step": 2095 }, { "epoch": 0.08200954691290398, "grad_norm": 0.0, "learning_rate": 1.9858516803028193e-05, "loss": 1.2052, "step": 2096 }, { "epoch": 0.08204867360513342, "grad_norm": 0.0, "learning_rate": 1.985830431059163e-05, "loss": 1.1996, "step": 2097 }, { "epoch": 0.08208780029736286, "grad_norm": 0.0, "learning_rate": 1.9858091659843054e-05, "loss": 1.2026, "step": 2098 }, { "epoch": 0.0821269269895923, "grad_norm": 0.0, "learning_rate": 1.9857878850785877e-05, "loss": 1.284, "step": 2099 }, { "epoch": 0.08216605368182174, "grad_norm": 0.0, "learning_rate": 1.9857665883423518e-05, "loss": 1.2417, "step": 2100 }, { "epoch": 0.08220518037405118, "grad_norm": 0.0, "learning_rate": 1.9857452757759398e-05, "loss": 1.1869, "step": 2101 }, { "epoch": 0.08224430706628062, "grad_norm": 0.0, "learning_rate": 1.9857239473796936e-05, "loss": 1.2828, "step": 2102 }, { "epoch": 0.08228343375851005, "grad_norm": 0.0, "learning_rate": 1.985702603153956e-05, "loss": 1.342, "step": 2103 }, { "epoch": 0.08232256045073949, "grad_norm": 0.0, "learning_rate": 1.9856812430990705e-05, "loss": 1.1622, "step": 2104 }, { "epoch": 0.08236168714296893, "grad_norm": 0.0, "learning_rate": 1.9856598672153783e-05, "loss": 1.245, "step": 2105 }, { "epoch": 0.08240081383519837, "grad_norm": 0.0, "learning_rate": 1.9856384755032245e-05, "loss": 1.2698, "step": 2106 }, { "epoch": 0.08243994052742781, "grad_norm": 0.0, "learning_rate": 1.9856170679629516e-05, "loss": 1.15, "step": 2107 }, { "epoch": 0.08247906721965725, "grad_norm": 0.0, "learning_rate": 1.9855956445949037e-05, "loss": 1.1405, "step": 2108 }, { "epoch": 0.0825181939118867, "grad_norm": 0.0, "learning_rate": 1.9855742053994246e-05, "loss": 1.3607, "step": 2109 }, { "epoch": 0.08255732060411612, "grad_norm": 0.0, "learning_rate": 1.9855527503768587e-05, "loss": 1.4602, "step": 2110 }, { "epoch": 0.08259644729634556, "grad_norm": 0.0, "learning_rate": 1.985531279527551e-05, "loss": 1.2855, "step": 2111 }, { "epoch": 0.082635573988575, "grad_norm": 0.0, "learning_rate": 1.9855097928518456e-05, "loss": 1.3413, "step": 2112 }, { "epoch": 0.08267470068080444, "grad_norm": 0.0, "learning_rate": 1.985488290350088e-05, "loss": 1.3031, "step": 2113 }, { "epoch": 0.08271382737303389, "grad_norm": 0.0, "learning_rate": 1.985466772022623e-05, "loss": 1.228, "step": 2114 }, { "epoch": 0.08275295406526333, "grad_norm": 0.0, "learning_rate": 1.9854452378697968e-05, "loss": 1.2708, "step": 2115 }, { "epoch": 0.08279208075749277, "grad_norm": 0.0, "learning_rate": 1.985423687891955e-05, "loss": 1.3062, "step": 2116 }, { "epoch": 0.0828312074497222, "grad_norm": 0.0, "learning_rate": 1.9854021220894433e-05, "loss": 1.1624, "step": 2117 }, { "epoch": 0.08287033414195163, "grad_norm": 0.0, "learning_rate": 1.9853805404626084e-05, "loss": 1.3208, "step": 2118 }, { "epoch": 0.08290946083418108, "grad_norm": 0.0, "learning_rate": 1.985358943011797e-05, "loss": 1.2802, "step": 2119 }, { "epoch": 0.08294858752641052, "grad_norm": 0.0, "learning_rate": 1.9853373297373554e-05, "loss": 1.2278, "step": 2120 }, { "epoch": 0.08298771421863996, "grad_norm": 0.0, "learning_rate": 1.9853157006396312e-05, "loss": 1.2787, "step": 2121 }, { "epoch": 0.0830268409108694, "grad_norm": 0.0, "learning_rate": 1.985294055718971e-05, "loss": 1.3176, "step": 2122 }, { "epoch": 0.08306596760309884, "grad_norm": 0.0, "learning_rate": 1.9852723949757234e-05, "loss": 1.2279, "step": 2123 }, { "epoch": 0.08310509429532827, "grad_norm": 0.0, "learning_rate": 1.9852507184102356e-05, "loss": 1.2298, "step": 2124 }, { "epoch": 0.08314422098755771, "grad_norm": 0.0, "learning_rate": 1.9852290260228558e-05, "loss": 1.2598, "step": 2125 }, { "epoch": 0.08318334767978715, "grad_norm": 0.0, "learning_rate": 1.985207317813933e-05, "loss": 1.2824, "step": 2126 }, { "epoch": 0.08322247437201659, "grad_norm": 0.0, "learning_rate": 1.9851855937838144e-05, "loss": 1.3816, "step": 2127 }, { "epoch": 0.08326160106424603, "grad_norm": 0.0, "learning_rate": 1.98516385393285e-05, "loss": 1.2876, "step": 2128 }, { "epoch": 0.08330072775647547, "grad_norm": 0.0, "learning_rate": 1.9851420982613888e-05, "loss": 1.2319, "step": 2129 }, { "epoch": 0.08333985444870491, "grad_norm": 0.0, "learning_rate": 1.9851203267697796e-05, "loss": 1.3121, "step": 2130 }, { "epoch": 0.08337898114093435, "grad_norm": 0.0, "learning_rate": 1.9850985394583725e-05, "loss": 1.2464, "step": 2131 }, { "epoch": 0.08341810783316378, "grad_norm": 0.0, "learning_rate": 1.985076736327517e-05, "loss": 1.1261, "step": 2132 }, { "epoch": 0.08345723452539322, "grad_norm": 0.0, "learning_rate": 1.985054917377564e-05, "loss": 1.1036, "step": 2133 }, { "epoch": 0.08349636121762266, "grad_norm": 0.0, "learning_rate": 1.9850330826088632e-05, "loss": 1.2057, "step": 2134 }, { "epoch": 0.0835354879098521, "grad_norm": 0.0, "learning_rate": 1.9850112320217652e-05, "loss": 1.209, "step": 2135 }, { "epoch": 0.08357461460208154, "grad_norm": 0.0, "learning_rate": 1.984989365616621e-05, "loss": 1.227, "step": 2136 }, { "epoch": 0.08361374129431098, "grad_norm": 0.0, "learning_rate": 1.984967483393782e-05, "loss": 1.1523, "step": 2137 }, { "epoch": 0.08365286798654042, "grad_norm": 0.0, "learning_rate": 1.9849455853535996e-05, "loss": 1.3145, "step": 2138 }, { "epoch": 0.08369199467876985, "grad_norm": 0.0, "learning_rate": 1.9849236714964256e-05, "loss": 1.2721, "step": 2139 }, { "epoch": 0.08373112137099929, "grad_norm": 0.0, "learning_rate": 1.984901741822611e-05, "loss": 1.1634, "step": 2140 }, { "epoch": 0.08377024806322873, "grad_norm": 0.0, "learning_rate": 1.984879796332509e-05, "loss": 1.2434, "step": 2141 }, { "epoch": 0.08380937475545817, "grad_norm": 0.0, "learning_rate": 1.9848578350264713e-05, "loss": 1.188, "step": 2142 }, { "epoch": 0.08384850144768762, "grad_norm": 0.0, "learning_rate": 1.984835857904851e-05, "loss": 1.2159, "step": 2143 }, { "epoch": 0.08388762813991706, "grad_norm": 0.0, "learning_rate": 1.9848138649680008e-05, "loss": 1.2916, "step": 2144 }, { "epoch": 0.0839267548321465, "grad_norm": 0.0, "learning_rate": 1.984791856216274e-05, "loss": 1.2823, "step": 2145 }, { "epoch": 0.08396588152437592, "grad_norm": 0.0, "learning_rate": 1.984769831650024e-05, "loss": 1.1257, "step": 2146 }, { "epoch": 0.08400500821660536, "grad_norm": 0.0, "learning_rate": 1.9847477912696046e-05, "loss": 1.3354, "step": 2147 }, { "epoch": 0.0840441349088348, "grad_norm": 0.0, "learning_rate": 1.98472573507537e-05, "loss": 1.2372, "step": 2148 }, { "epoch": 0.08408326160106425, "grad_norm": 0.0, "learning_rate": 1.984703663067673e-05, "loss": 1.2838, "step": 2149 }, { "epoch": 0.08412238829329369, "grad_norm": 0.0, "learning_rate": 1.98468157524687e-05, "loss": 1.2218, "step": 2150 }, { "epoch": 0.08416151498552313, "grad_norm": 0.0, "learning_rate": 1.9846594716133144e-05, "loss": 1.1861, "step": 2151 }, { "epoch": 0.08420064167775257, "grad_norm": 0.0, "learning_rate": 1.9846373521673613e-05, "loss": 1.3203, "step": 2152 }, { "epoch": 0.084239768369982, "grad_norm": 0.0, "learning_rate": 1.9846152169093663e-05, "loss": 1.3984, "step": 2153 }, { "epoch": 0.08427889506221144, "grad_norm": 0.0, "learning_rate": 1.984593065839685e-05, "loss": 1.1664, "step": 2154 }, { "epoch": 0.08431802175444088, "grad_norm": 0.0, "learning_rate": 1.9845708989586726e-05, "loss": 1.1274, "step": 2155 }, { "epoch": 0.08435714844667032, "grad_norm": 0.0, "learning_rate": 1.9845487162666853e-05, "loss": 1.1508, "step": 2156 }, { "epoch": 0.08439627513889976, "grad_norm": 0.0, "learning_rate": 1.9845265177640795e-05, "loss": 1.1303, "step": 2157 }, { "epoch": 0.0844354018311292, "grad_norm": 0.0, "learning_rate": 1.984504303451211e-05, "loss": 1.2322, "step": 2158 }, { "epoch": 0.08447452852335864, "grad_norm": 0.0, "learning_rate": 1.984482073328438e-05, "loss": 1.2189, "step": 2159 }, { "epoch": 0.08451365521558807, "grad_norm": 0.0, "learning_rate": 1.9844598273961156e-05, "loss": 1.1429, "step": 2160 }, { "epoch": 0.08455278190781751, "grad_norm": 0.0, "learning_rate": 1.9844375656546023e-05, "loss": 1.132, "step": 2161 }, { "epoch": 0.08459190860004695, "grad_norm": 0.0, "learning_rate": 1.9844152881042554e-05, "loss": 1.3393, "step": 2162 }, { "epoch": 0.08463103529227639, "grad_norm": 0.0, "learning_rate": 1.9843929947454325e-05, "loss": 1.3237, "step": 2163 }, { "epoch": 0.08467016198450583, "grad_norm": 0.0, "learning_rate": 1.9843706855784916e-05, "loss": 1.1533, "step": 2164 }, { "epoch": 0.08470928867673527, "grad_norm": 0.0, "learning_rate": 1.984348360603791e-05, "loss": 1.2504, "step": 2165 }, { "epoch": 0.08474841536896471, "grad_norm": 0.0, "learning_rate": 1.9843260198216888e-05, "loss": 1.2516, "step": 2166 }, { "epoch": 0.08478754206119414, "grad_norm": 0.0, "learning_rate": 1.984303663232544e-05, "loss": 1.3307, "step": 2167 }, { "epoch": 0.08482666875342358, "grad_norm": 0.0, "learning_rate": 1.9842812908367167e-05, "loss": 1.3076, "step": 2168 }, { "epoch": 0.08486579544565302, "grad_norm": 0.0, "learning_rate": 1.9842589026345647e-05, "loss": 1.3237, "step": 2169 }, { "epoch": 0.08490492213788246, "grad_norm": 0.0, "learning_rate": 1.984236498626448e-05, "loss": 1.2139, "step": 2170 }, { "epoch": 0.0849440488301119, "grad_norm": 0.0, "learning_rate": 1.9842140788127264e-05, "loss": 1.1368, "step": 2171 }, { "epoch": 0.08498317552234134, "grad_norm": 0.0, "learning_rate": 1.98419164319376e-05, "loss": 1.267, "step": 2172 }, { "epoch": 0.08502230221457079, "grad_norm": 0.0, "learning_rate": 1.9841691917699096e-05, "loss": 1.2498, "step": 2173 }, { "epoch": 0.08506142890680021, "grad_norm": 0.0, "learning_rate": 1.9841467245415348e-05, "loss": 1.1814, "step": 2174 }, { "epoch": 0.08510055559902965, "grad_norm": 0.0, "learning_rate": 1.9841242415089967e-05, "loss": 1.1507, "step": 2175 }, { "epoch": 0.0851396822912591, "grad_norm": 0.0, "learning_rate": 1.9841017426726568e-05, "loss": 1.3197, "step": 2176 }, { "epoch": 0.08517880898348854, "grad_norm": 0.0, "learning_rate": 1.984079228032876e-05, "loss": 1.1353, "step": 2177 }, { "epoch": 0.08521793567571798, "grad_norm": 0.0, "learning_rate": 1.9840566975900155e-05, "loss": 1.3154, "step": 2178 }, { "epoch": 0.08525706236794742, "grad_norm": 0.0, "learning_rate": 1.984034151344438e-05, "loss": 1.1562, "step": 2179 }, { "epoch": 0.08529618906017686, "grad_norm": 0.0, "learning_rate": 1.9840115892965045e-05, "loss": 1.3832, "step": 2180 }, { "epoch": 0.08533531575240628, "grad_norm": 0.0, "learning_rate": 1.983989011446578e-05, "loss": 1.3523, "step": 2181 }, { "epoch": 0.08537444244463573, "grad_norm": 0.0, "learning_rate": 1.983966417795022e-05, "loss": 1.2655, "step": 2182 }, { "epoch": 0.08541356913686517, "grad_norm": 0.0, "learning_rate": 1.9839438083421974e-05, "loss": 1.1927, "step": 2183 }, { "epoch": 0.08545269582909461, "grad_norm": 0.0, "learning_rate": 1.9839211830884682e-05, "loss": 1.1671, "step": 2184 }, { "epoch": 0.08549182252132405, "grad_norm": 0.0, "learning_rate": 1.983898542034198e-05, "loss": 1.1765, "step": 2185 }, { "epoch": 0.08553094921355349, "grad_norm": 0.0, "learning_rate": 1.98387588517975e-05, "loss": 1.374, "step": 2186 }, { "epoch": 0.08557007590578293, "grad_norm": 0.0, "learning_rate": 1.9838532125254883e-05, "loss": 1.1452, "step": 2187 }, { "epoch": 0.08560920259801236, "grad_norm": 0.0, "learning_rate": 1.9838305240717765e-05, "loss": 1.1881, "step": 2188 }, { "epoch": 0.0856483292902418, "grad_norm": 0.0, "learning_rate": 1.9838078198189798e-05, "loss": 1.2064, "step": 2189 }, { "epoch": 0.08568745598247124, "grad_norm": 0.0, "learning_rate": 1.9837850997674618e-05, "loss": 1.1797, "step": 2190 }, { "epoch": 0.08572658267470068, "grad_norm": 0.0, "learning_rate": 1.983762363917588e-05, "loss": 1.2832, "step": 2191 }, { "epoch": 0.08576570936693012, "grad_norm": 0.0, "learning_rate": 1.9837396122697235e-05, "loss": 1.1912, "step": 2192 }, { "epoch": 0.08580483605915956, "grad_norm": 0.0, "learning_rate": 1.983716844824233e-05, "loss": 1.162, "step": 2193 }, { "epoch": 0.085843962751389, "grad_norm": 0.0, "learning_rate": 1.9836940615814836e-05, "loss": 1.2322, "step": 2194 }, { "epoch": 0.08588308944361844, "grad_norm": 0.0, "learning_rate": 1.9836712625418393e-05, "loss": 1.0967, "step": 2195 }, { "epoch": 0.08592221613584787, "grad_norm": 0.0, "learning_rate": 1.9836484477056676e-05, "loss": 1.2963, "step": 2196 }, { "epoch": 0.08596134282807731, "grad_norm": 0.0, "learning_rate": 1.9836256170733343e-05, "loss": 1.2928, "step": 2197 }, { "epoch": 0.08600046952030675, "grad_norm": 0.0, "learning_rate": 1.9836027706452063e-05, "loss": 1.1191, "step": 2198 }, { "epoch": 0.08603959621253619, "grad_norm": 0.0, "learning_rate": 1.98357990842165e-05, "loss": 1.1942, "step": 2199 }, { "epoch": 0.08607872290476563, "grad_norm": 0.0, "learning_rate": 1.983557030403033e-05, "loss": 1.3035, "step": 2200 }, { "epoch": 0.08611784959699507, "grad_norm": 0.0, "learning_rate": 1.983534136589723e-05, "loss": 1.1857, "step": 2201 }, { "epoch": 0.08615697628922452, "grad_norm": 0.0, "learning_rate": 1.9835112269820867e-05, "loss": 1.1791, "step": 2202 }, { "epoch": 0.08619610298145394, "grad_norm": 0.0, "learning_rate": 1.9834883015804926e-05, "loss": 1.1729, "step": 2203 }, { "epoch": 0.08623522967368338, "grad_norm": 0.0, "learning_rate": 1.9834653603853088e-05, "loss": 1.1847, "step": 2204 }, { "epoch": 0.08627435636591282, "grad_norm": 0.0, "learning_rate": 1.9834424033969033e-05, "loss": 1.1985, "step": 2205 }, { "epoch": 0.08631348305814227, "grad_norm": 0.0, "learning_rate": 1.9834194306156455e-05, "loss": 1.1304, "step": 2206 }, { "epoch": 0.0863526097503717, "grad_norm": 0.0, "learning_rate": 1.9833964420419042e-05, "loss": 1.3164, "step": 2207 }, { "epoch": 0.08639173644260115, "grad_norm": 0.0, "learning_rate": 1.9833734376760478e-05, "loss": 1.269, "step": 2208 }, { "epoch": 0.08643086313483059, "grad_norm": 0.0, "learning_rate": 1.9833504175184462e-05, "loss": 1.2407, "step": 2209 }, { "epoch": 0.08646998982706001, "grad_norm": 0.0, "learning_rate": 1.9833273815694695e-05, "loss": 1.2067, "step": 2210 }, { "epoch": 0.08650911651928946, "grad_norm": 0.0, "learning_rate": 1.983304329829487e-05, "loss": 1.2029, "step": 2211 }, { "epoch": 0.0865482432115189, "grad_norm": 0.0, "learning_rate": 1.9832812622988694e-05, "loss": 1.3021, "step": 2212 }, { "epoch": 0.08658736990374834, "grad_norm": 0.0, "learning_rate": 1.9832581789779864e-05, "loss": 1.3237, "step": 2213 }, { "epoch": 0.08662649659597778, "grad_norm": 0.0, "learning_rate": 1.9832350798672096e-05, "loss": 1.267, "step": 2214 }, { "epoch": 0.08666562328820722, "grad_norm": 0.0, "learning_rate": 1.983211964966909e-05, "loss": 1.3251, "step": 2215 }, { "epoch": 0.08670474998043666, "grad_norm": 0.0, "learning_rate": 1.9831888342774565e-05, "loss": 1.1714, "step": 2216 }, { "epoch": 0.08674387667266609, "grad_norm": 0.0, "learning_rate": 1.9831656877992233e-05, "loss": 1.3182, "step": 2217 }, { "epoch": 0.08678300336489553, "grad_norm": 0.0, "learning_rate": 1.983142525532581e-05, "loss": 1.2028, "step": 2218 }, { "epoch": 0.08682213005712497, "grad_norm": 0.0, "learning_rate": 1.983119347477902e-05, "loss": 1.1022, "step": 2219 }, { "epoch": 0.08686125674935441, "grad_norm": 0.0, "learning_rate": 1.983096153635558e-05, "loss": 1.0848, "step": 2220 }, { "epoch": 0.08690038344158385, "grad_norm": 0.0, "learning_rate": 1.983072944005922e-05, "loss": 1.1321, "step": 2221 }, { "epoch": 0.08693951013381329, "grad_norm": 0.0, "learning_rate": 1.9830497185893657e-05, "loss": 1.2548, "step": 2222 }, { "epoch": 0.08697863682604273, "grad_norm": 0.0, "learning_rate": 1.9830264773862633e-05, "loss": 1.2845, "step": 2223 }, { "epoch": 0.08701776351827216, "grad_norm": 0.0, "learning_rate": 1.9830032203969873e-05, "loss": 1.2539, "step": 2224 }, { "epoch": 0.0870568902105016, "grad_norm": 0.0, "learning_rate": 1.9829799476219113e-05, "loss": 1.3281, "step": 2225 }, { "epoch": 0.08709601690273104, "grad_norm": 0.0, "learning_rate": 1.9829566590614093e-05, "loss": 1.3756, "step": 2226 }, { "epoch": 0.08713514359496048, "grad_norm": 0.0, "learning_rate": 1.9829333547158547e-05, "loss": 1.2143, "step": 2227 }, { "epoch": 0.08717427028718992, "grad_norm": 0.0, "learning_rate": 1.9829100345856224e-05, "loss": 1.2003, "step": 2228 }, { "epoch": 0.08721339697941936, "grad_norm": 0.0, "learning_rate": 1.9828866986710865e-05, "loss": 1.2588, "step": 2229 }, { "epoch": 0.0872525236716488, "grad_norm": 0.0, "learning_rate": 1.982863346972622e-05, "loss": 1.2687, "step": 2230 }, { "epoch": 0.08729165036387823, "grad_norm": 0.0, "learning_rate": 1.9828399794906037e-05, "loss": 1.3709, "step": 2231 }, { "epoch": 0.08733077705610767, "grad_norm": 0.0, "learning_rate": 1.9828165962254065e-05, "loss": 1.3713, "step": 2232 }, { "epoch": 0.08736990374833711, "grad_norm": 0.0, "learning_rate": 1.9827931971774068e-05, "loss": 1.2647, "step": 2233 }, { "epoch": 0.08740903044056655, "grad_norm": 0.0, "learning_rate": 1.9827697823469797e-05, "loss": 1.2028, "step": 2234 }, { "epoch": 0.087448157132796, "grad_norm": 0.0, "learning_rate": 1.9827463517345015e-05, "loss": 1.2396, "step": 2235 }, { "epoch": 0.08748728382502544, "grad_norm": 0.0, "learning_rate": 1.982722905340348e-05, "loss": 1.3017, "step": 2236 }, { "epoch": 0.08752641051725488, "grad_norm": 0.0, "learning_rate": 1.9826994431648964e-05, "loss": 1.2489, "step": 2237 }, { "epoch": 0.0875655372094843, "grad_norm": 0.0, "learning_rate": 1.982675965208523e-05, "loss": 1.3699, "step": 2238 }, { "epoch": 0.08760466390171374, "grad_norm": 0.0, "learning_rate": 1.982652471471605e-05, "loss": 1.1547, "step": 2239 }, { "epoch": 0.08764379059394319, "grad_norm": 0.0, "learning_rate": 1.9826289619545194e-05, "loss": 1.3557, "step": 2240 }, { "epoch": 0.08768291728617263, "grad_norm": 0.0, "learning_rate": 1.9826054366576443e-05, "loss": 1.1804, "step": 2241 }, { "epoch": 0.08772204397840207, "grad_norm": 0.0, "learning_rate": 1.982581895581357e-05, "loss": 1.2526, "step": 2242 }, { "epoch": 0.08776117067063151, "grad_norm": 0.0, "learning_rate": 1.9825583387260355e-05, "loss": 1.3167, "step": 2243 }, { "epoch": 0.08780029736286095, "grad_norm": 0.0, "learning_rate": 1.9825347660920588e-05, "loss": 1.2136, "step": 2244 }, { "epoch": 0.08783942405509038, "grad_norm": 0.0, "learning_rate": 1.9825111776798044e-05, "loss": 1.1724, "step": 2245 }, { "epoch": 0.08787855074731982, "grad_norm": 0.0, "learning_rate": 1.9824875734896517e-05, "loss": 1.3711, "step": 2246 }, { "epoch": 0.08791767743954926, "grad_norm": 0.0, "learning_rate": 1.9824639535219804e-05, "loss": 1.1638, "step": 2247 }, { "epoch": 0.0879568041317787, "grad_norm": 0.0, "learning_rate": 1.982440317777168e-05, "loss": 1.2206, "step": 2248 }, { "epoch": 0.08799593082400814, "grad_norm": 0.0, "learning_rate": 1.982416666255596e-05, "loss": 1.1562, "step": 2249 }, { "epoch": 0.08803505751623758, "grad_norm": 0.0, "learning_rate": 1.9823929989576433e-05, "loss": 1.3079, "step": 2250 }, { "epoch": 0.08807418420846702, "grad_norm": 0.0, "learning_rate": 1.9823693158836898e-05, "loss": 1.3649, "step": 2251 }, { "epoch": 0.08811331090069645, "grad_norm": 0.0, "learning_rate": 1.9823456170341162e-05, "loss": 1.2612, "step": 2252 }, { "epoch": 0.08815243759292589, "grad_norm": 0.0, "learning_rate": 1.9823219024093028e-05, "loss": 1.2248, "step": 2253 }, { "epoch": 0.08819156428515533, "grad_norm": 0.0, "learning_rate": 1.982298172009631e-05, "loss": 1.2576, "step": 2254 }, { "epoch": 0.08823069097738477, "grad_norm": 0.0, "learning_rate": 1.9822744258354806e-05, "loss": 1.1277, "step": 2255 }, { "epoch": 0.08826981766961421, "grad_norm": 0.0, "learning_rate": 1.9822506638872346e-05, "loss": 1.1804, "step": 2256 }, { "epoch": 0.08830894436184365, "grad_norm": 0.0, "learning_rate": 1.9822268861652733e-05, "loss": 1.2752, "step": 2257 }, { "epoch": 0.0883480710540731, "grad_norm": 0.0, "learning_rate": 1.982203092669979e-05, "loss": 1.332, "step": 2258 }, { "epoch": 0.08838719774630253, "grad_norm": 0.0, "learning_rate": 1.9821792834017343e-05, "loss": 1.2223, "step": 2259 }, { "epoch": 0.08842632443853196, "grad_norm": 0.0, "learning_rate": 1.9821554583609205e-05, "loss": 1.2651, "step": 2260 }, { "epoch": 0.0884654511307614, "grad_norm": 0.0, "learning_rate": 1.982131617547921e-05, "loss": 1.293, "step": 2261 }, { "epoch": 0.08850457782299084, "grad_norm": 0.0, "learning_rate": 1.9821077609631184e-05, "loss": 1.2205, "step": 2262 }, { "epoch": 0.08854370451522028, "grad_norm": 0.0, "learning_rate": 1.9820838886068958e-05, "loss": 1.1507, "step": 2263 }, { "epoch": 0.08858283120744972, "grad_norm": 0.0, "learning_rate": 1.9820600004796363e-05, "loss": 1.2588, "step": 2264 }, { "epoch": 0.08862195789967917, "grad_norm": 0.0, "learning_rate": 1.982036096581724e-05, "loss": 1.189, "step": 2265 }, { "epoch": 0.0886610845919086, "grad_norm": 0.0, "learning_rate": 1.9820121769135428e-05, "loss": 1.299, "step": 2266 }, { "epoch": 0.08870021128413803, "grad_norm": 0.0, "learning_rate": 1.9819882414754762e-05, "loss": 1.317, "step": 2267 }, { "epoch": 0.08873933797636747, "grad_norm": 0.0, "learning_rate": 1.981964290267909e-05, "loss": 1.3607, "step": 2268 }, { "epoch": 0.08877846466859692, "grad_norm": 0.0, "learning_rate": 1.9819403232912258e-05, "loss": 1.2207, "step": 2269 }, { "epoch": 0.08881759136082636, "grad_norm": 0.0, "learning_rate": 1.9819163405458118e-05, "loss": 1.2434, "step": 2270 }, { "epoch": 0.0888567180530558, "grad_norm": 0.0, "learning_rate": 1.9818923420320514e-05, "loss": 1.3551, "step": 2271 }, { "epoch": 0.08889584474528524, "grad_norm": 0.0, "learning_rate": 1.9818683277503302e-05, "loss": 1.2858, "step": 2272 }, { "epoch": 0.08893497143751468, "grad_norm": 0.0, "learning_rate": 1.9818442977010344e-05, "loss": 1.2442, "step": 2273 }, { "epoch": 0.0889740981297441, "grad_norm": 0.0, "learning_rate": 1.9818202518845493e-05, "loss": 1.3613, "step": 2274 }, { "epoch": 0.08901322482197355, "grad_norm": 0.0, "learning_rate": 1.981796190301261e-05, "loss": 1.2528, "step": 2275 }, { "epoch": 0.08905235151420299, "grad_norm": 0.0, "learning_rate": 1.9817721129515565e-05, "loss": 1.2122, "step": 2276 }, { "epoch": 0.08909147820643243, "grad_norm": 0.0, "learning_rate": 1.981748019835822e-05, "loss": 1.239, "step": 2277 }, { "epoch": 0.08913060489866187, "grad_norm": 0.0, "learning_rate": 1.9817239109544447e-05, "loss": 1.2894, "step": 2278 }, { "epoch": 0.08916973159089131, "grad_norm": 0.0, "learning_rate": 1.9816997863078115e-05, "loss": 1.1482, "step": 2279 }, { "epoch": 0.08920885828312075, "grad_norm": 0.0, "learning_rate": 1.9816756458963094e-05, "loss": 1.1641, "step": 2280 }, { "epoch": 0.08924798497535018, "grad_norm": 0.0, "learning_rate": 1.9816514897203272e-05, "loss": 1.2695, "step": 2281 }, { "epoch": 0.08928711166757962, "grad_norm": 0.0, "learning_rate": 1.9816273177802517e-05, "loss": 1.2448, "step": 2282 }, { "epoch": 0.08932623835980906, "grad_norm": 0.0, "learning_rate": 1.981603130076472e-05, "loss": 1.1563, "step": 2283 }, { "epoch": 0.0893653650520385, "grad_norm": 0.0, "learning_rate": 1.9815789266093755e-05, "loss": 1.3999, "step": 2284 }, { "epoch": 0.08940449174426794, "grad_norm": 0.0, "learning_rate": 1.9815547073793516e-05, "loss": 1.0562, "step": 2285 }, { "epoch": 0.08944361843649738, "grad_norm": 0.0, "learning_rate": 1.9815304723867893e-05, "loss": 1.361, "step": 2286 }, { "epoch": 0.08948274512872682, "grad_norm": 0.0, "learning_rate": 1.9815062216320772e-05, "loss": 1.2057, "step": 2287 }, { "epoch": 0.08952187182095625, "grad_norm": 0.0, "learning_rate": 1.981481955115605e-05, "loss": 1.1443, "step": 2288 }, { "epoch": 0.08956099851318569, "grad_norm": 0.0, "learning_rate": 1.9814576728377627e-05, "loss": 1.2842, "step": 2289 }, { "epoch": 0.08960012520541513, "grad_norm": 0.0, "learning_rate": 1.98143337479894e-05, "loss": 1.3607, "step": 2290 }, { "epoch": 0.08963925189764457, "grad_norm": 0.0, "learning_rate": 1.9814090609995273e-05, "loss": 1.3335, "step": 2291 }, { "epoch": 0.08967837858987401, "grad_norm": 0.0, "learning_rate": 1.9813847314399147e-05, "loss": 1.1827, "step": 2292 }, { "epoch": 0.08971750528210345, "grad_norm": 0.0, "learning_rate": 1.9813603861204928e-05, "loss": 1.2151, "step": 2293 }, { "epoch": 0.0897566319743329, "grad_norm": 0.0, "learning_rate": 1.9813360250416532e-05, "loss": 1.0854, "step": 2294 }, { "epoch": 0.08979575866656232, "grad_norm": 0.0, "learning_rate": 1.9813116482037864e-05, "loss": 1.3364, "step": 2295 }, { "epoch": 0.08983488535879176, "grad_norm": 0.0, "learning_rate": 1.9812872556072845e-05, "loss": 1.195, "step": 2296 }, { "epoch": 0.0898740120510212, "grad_norm": 0.0, "learning_rate": 1.9812628472525387e-05, "loss": 1.2096, "step": 2297 }, { "epoch": 0.08991313874325065, "grad_norm": 0.0, "learning_rate": 1.981238423139941e-05, "loss": 1.288, "step": 2298 }, { "epoch": 0.08995226543548009, "grad_norm": 0.0, "learning_rate": 1.981213983269884e-05, "loss": 1.2783, "step": 2299 }, { "epoch": 0.08999139212770953, "grad_norm": 0.0, "learning_rate": 1.98118952764276e-05, "loss": 1.2295, "step": 2300 }, { "epoch": 0.09003051881993897, "grad_norm": 0.0, "learning_rate": 1.9811650562589616e-05, "loss": 1.2576, "step": 2301 }, { "epoch": 0.0900696455121684, "grad_norm": 0.0, "learning_rate": 1.9811405691188817e-05, "loss": 1.1266, "step": 2302 }, { "epoch": 0.09010877220439784, "grad_norm": 0.0, "learning_rate": 1.981116066222914e-05, "loss": 1.2207, "step": 2303 }, { "epoch": 0.09014789889662728, "grad_norm": 0.0, "learning_rate": 1.9810915475714514e-05, "loss": 1.0493, "step": 2304 }, { "epoch": 0.09018702558885672, "grad_norm": 0.0, "learning_rate": 1.9810670131648884e-05, "loss": 1.145, "step": 2305 }, { "epoch": 0.09022615228108616, "grad_norm": 0.0, "learning_rate": 1.981042463003618e-05, "loss": 1.224, "step": 2306 }, { "epoch": 0.0902652789733156, "grad_norm": 0.0, "learning_rate": 1.981017897088035e-05, "loss": 1.2671, "step": 2307 }, { "epoch": 0.09030440566554504, "grad_norm": 0.0, "learning_rate": 1.980993315418534e-05, "loss": 1.191, "step": 2308 }, { "epoch": 0.09034353235777447, "grad_norm": 0.0, "learning_rate": 1.9809687179955096e-05, "loss": 1.3811, "step": 2309 }, { "epoch": 0.09038265905000391, "grad_norm": 0.0, "learning_rate": 1.980944104819357e-05, "loss": 1.1893, "step": 2310 }, { "epoch": 0.09042178574223335, "grad_norm": 0.0, "learning_rate": 1.9809194758904712e-05, "loss": 1.3173, "step": 2311 }, { "epoch": 0.09046091243446279, "grad_norm": 0.0, "learning_rate": 1.9808948312092475e-05, "loss": 1.0584, "step": 2312 }, { "epoch": 0.09050003912669223, "grad_norm": 0.0, "learning_rate": 1.9808701707760824e-05, "loss": 1.1687, "step": 2313 }, { "epoch": 0.09053916581892167, "grad_norm": 0.0, "learning_rate": 1.980845494591371e-05, "loss": 1.3087, "step": 2314 }, { "epoch": 0.09057829251115111, "grad_norm": 0.0, "learning_rate": 1.9808208026555103e-05, "loss": 1.3307, "step": 2315 }, { "epoch": 0.09061741920338055, "grad_norm": 0.0, "learning_rate": 1.980796094968896e-05, "loss": 1.2764, "step": 2316 }, { "epoch": 0.09065654589560998, "grad_norm": 0.0, "learning_rate": 1.9807713715319262e-05, "loss": 1.327, "step": 2317 }, { "epoch": 0.09069567258783942, "grad_norm": 0.0, "learning_rate": 1.980746632344997e-05, "loss": 1.3403, "step": 2318 }, { "epoch": 0.09073479928006886, "grad_norm": 0.0, "learning_rate": 1.9807218774085055e-05, "loss": 1.1846, "step": 2319 }, { "epoch": 0.0907739259722983, "grad_norm": 0.0, "learning_rate": 1.9806971067228496e-05, "loss": 1.2686, "step": 2320 }, { "epoch": 0.09081305266452774, "grad_norm": 0.0, "learning_rate": 1.9806723202884273e-05, "loss": 1.2883, "step": 2321 }, { "epoch": 0.09085217935675718, "grad_norm": 0.0, "learning_rate": 1.980647518105636e-05, "loss": 1.329, "step": 2322 }, { "epoch": 0.09089130604898663, "grad_norm": 0.0, "learning_rate": 1.9806227001748748e-05, "loss": 1.226, "step": 2323 }, { "epoch": 0.09093043274121605, "grad_norm": 0.0, "learning_rate": 1.9805978664965417e-05, "loss": 1.3793, "step": 2324 }, { "epoch": 0.0909695594334455, "grad_norm": 0.0, "learning_rate": 1.9805730170710354e-05, "loss": 1.3254, "step": 2325 }, { "epoch": 0.09100868612567493, "grad_norm": 0.0, "learning_rate": 1.9805481518987553e-05, "loss": 1.2914, "step": 2326 }, { "epoch": 0.09104781281790437, "grad_norm": 0.0, "learning_rate": 1.9805232709801008e-05, "loss": 1.2378, "step": 2327 }, { "epoch": 0.09108693951013382, "grad_norm": 0.0, "learning_rate": 1.980498374315471e-05, "loss": 1.2058, "step": 2328 }, { "epoch": 0.09112606620236326, "grad_norm": 0.0, "learning_rate": 1.980473461905266e-05, "loss": 1.3414, "step": 2329 }, { "epoch": 0.0911651928945927, "grad_norm": 0.0, "learning_rate": 1.9804485337498857e-05, "loss": 1.1805, "step": 2330 }, { "epoch": 0.09120431958682212, "grad_norm": 0.0, "learning_rate": 1.9804235898497305e-05, "loss": 1.2378, "step": 2331 }, { "epoch": 0.09124344627905157, "grad_norm": 0.0, "learning_rate": 1.980398630205201e-05, "loss": 1.1586, "step": 2332 }, { "epoch": 0.091282572971281, "grad_norm": 0.0, "learning_rate": 1.9803736548166984e-05, "loss": 1.2111, "step": 2333 }, { "epoch": 0.09132169966351045, "grad_norm": 0.0, "learning_rate": 1.980348663684623e-05, "loss": 1.4274, "step": 2334 }, { "epoch": 0.09136082635573989, "grad_norm": 0.0, "learning_rate": 1.9803236568093765e-05, "loss": 1.1889, "step": 2335 }, { "epoch": 0.09139995304796933, "grad_norm": 0.0, "learning_rate": 1.980298634191361e-05, "loss": 1.1687, "step": 2336 }, { "epoch": 0.09143907974019877, "grad_norm": 0.0, "learning_rate": 1.980273595830977e-05, "loss": 1.1526, "step": 2337 }, { "epoch": 0.0914782064324282, "grad_norm": 0.0, "learning_rate": 1.980248541728628e-05, "loss": 1.1613, "step": 2338 }, { "epoch": 0.09151733312465764, "grad_norm": 0.0, "learning_rate": 1.9802234718847156e-05, "loss": 1.199, "step": 2339 }, { "epoch": 0.09155645981688708, "grad_norm": 0.0, "learning_rate": 1.9801983862996423e-05, "loss": 1.2476, "step": 2340 }, { "epoch": 0.09159558650911652, "grad_norm": 0.0, "learning_rate": 1.9801732849738114e-05, "loss": 1.2103, "step": 2341 }, { "epoch": 0.09163471320134596, "grad_norm": 0.0, "learning_rate": 1.9801481679076256e-05, "loss": 1.2821, "step": 2342 }, { "epoch": 0.0916738398935754, "grad_norm": 0.0, "learning_rate": 1.9801230351014887e-05, "loss": 1.2827, "step": 2343 }, { "epoch": 0.09171296658580484, "grad_norm": 0.0, "learning_rate": 1.9800978865558038e-05, "loss": 1.0976, "step": 2344 }, { "epoch": 0.09175209327803427, "grad_norm": 0.0, "learning_rate": 1.980072722270975e-05, "loss": 1.334, "step": 2345 }, { "epoch": 0.09179121997026371, "grad_norm": 0.0, "learning_rate": 1.9800475422474064e-05, "loss": 1.3531, "step": 2346 }, { "epoch": 0.09183034666249315, "grad_norm": 0.0, "learning_rate": 1.9800223464855022e-05, "loss": 1.1334, "step": 2347 }, { "epoch": 0.09186947335472259, "grad_norm": 0.0, "learning_rate": 1.9799971349856673e-05, "loss": 1.2913, "step": 2348 }, { "epoch": 0.09190860004695203, "grad_norm": 0.0, "learning_rate": 1.9799719077483065e-05, "loss": 1.3014, "step": 2349 }, { "epoch": 0.09194772673918147, "grad_norm": 0.0, "learning_rate": 1.9799466647738247e-05, "loss": 1.3028, "step": 2350 }, { "epoch": 0.09198685343141091, "grad_norm": 0.0, "learning_rate": 1.9799214060626275e-05, "loss": 1.2131, "step": 2351 }, { "epoch": 0.09202598012364034, "grad_norm": 0.0, "learning_rate": 1.9798961316151203e-05, "loss": 1.132, "step": 2352 }, { "epoch": 0.09206510681586978, "grad_norm": 0.0, "learning_rate": 1.9798708414317095e-05, "loss": 1.1347, "step": 2353 }, { "epoch": 0.09210423350809922, "grad_norm": 0.0, "learning_rate": 1.9798455355128003e-05, "loss": 1.1703, "step": 2354 }, { "epoch": 0.09214336020032866, "grad_norm": 0.0, "learning_rate": 1.9798202138588e-05, "loss": 1.3021, "step": 2355 }, { "epoch": 0.0921824868925581, "grad_norm": 0.0, "learning_rate": 1.9797948764701145e-05, "loss": 1.2634, "step": 2356 }, { "epoch": 0.09222161358478755, "grad_norm": 0.0, "learning_rate": 1.979769523347151e-05, "loss": 1.209, "step": 2357 }, { "epoch": 0.09226074027701699, "grad_norm": 0.0, "learning_rate": 1.9797441544903173e-05, "loss": 1.299, "step": 2358 }, { "epoch": 0.09229986696924641, "grad_norm": 0.0, "learning_rate": 1.9797187699000196e-05, "loss": 1.3671, "step": 2359 }, { "epoch": 0.09233899366147585, "grad_norm": 0.0, "learning_rate": 1.9796933695766663e-05, "loss": 1.0894, "step": 2360 }, { "epoch": 0.0923781203537053, "grad_norm": 0.0, "learning_rate": 1.979667953520665e-05, "loss": 1.251, "step": 2361 }, { "epoch": 0.09241724704593474, "grad_norm": 0.0, "learning_rate": 1.9796425217324244e-05, "loss": 1.2676, "step": 2362 }, { "epoch": 0.09245637373816418, "grad_norm": 0.0, "learning_rate": 1.979617074212352e-05, "loss": 1.2667, "step": 2363 }, { "epoch": 0.09249550043039362, "grad_norm": 0.0, "learning_rate": 1.979591610960857e-05, "loss": 1.3065, "step": 2364 }, { "epoch": 0.09253462712262306, "grad_norm": 0.0, "learning_rate": 1.979566131978348e-05, "loss": 1.3518, "step": 2365 }, { "epoch": 0.09257375381485249, "grad_norm": 0.0, "learning_rate": 1.9795406372652345e-05, "loss": 1.0041, "step": 2366 }, { "epoch": 0.09261288050708193, "grad_norm": 0.0, "learning_rate": 1.979515126821926e-05, "loss": 1.2136, "step": 2367 }, { "epoch": 0.09265200719931137, "grad_norm": 0.0, "learning_rate": 1.979489600648832e-05, "loss": 1.2261, "step": 2368 }, { "epoch": 0.09269113389154081, "grad_norm": 0.0, "learning_rate": 1.9794640587463622e-05, "loss": 1.2607, "step": 2369 }, { "epoch": 0.09273026058377025, "grad_norm": 0.0, "learning_rate": 1.979438501114927e-05, "loss": 1.2678, "step": 2370 }, { "epoch": 0.09276938727599969, "grad_norm": 0.0, "learning_rate": 1.979412927754937e-05, "loss": 1.3452, "step": 2371 }, { "epoch": 0.09280851396822913, "grad_norm": 0.0, "learning_rate": 1.9793873386668023e-05, "loss": 1.2584, "step": 2372 }, { "epoch": 0.09284764066045856, "grad_norm": 0.0, "learning_rate": 1.9793617338509344e-05, "loss": 1.1936, "step": 2373 }, { "epoch": 0.092886767352688, "grad_norm": 0.0, "learning_rate": 1.9793361133077444e-05, "loss": 1.229, "step": 2374 }, { "epoch": 0.09292589404491744, "grad_norm": 0.0, "learning_rate": 1.979310477037643e-05, "loss": 1.1553, "step": 2375 }, { "epoch": 0.09296502073714688, "grad_norm": 0.0, "learning_rate": 1.979284825041043e-05, "loss": 1.2519, "step": 2376 }, { "epoch": 0.09300414742937632, "grad_norm": 0.0, "learning_rate": 1.9792591573183556e-05, "loss": 1.4362, "step": 2377 }, { "epoch": 0.09304327412160576, "grad_norm": 0.0, "learning_rate": 1.9792334738699934e-05, "loss": 1.355, "step": 2378 }, { "epoch": 0.0930824008138352, "grad_norm": 0.0, "learning_rate": 1.9792077746963686e-05, "loss": 1.2569, "step": 2379 }, { "epoch": 0.09312152750606464, "grad_norm": 0.0, "learning_rate": 1.9791820597978942e-05, "loss": 1.2335, "step": 2380 }, { "epoch": 0.09316065419829407, "grad_norm": 0.0, "learning_rate": 1.9791563291749824e-05, "loss": 1.1928, "step": 2381 }, { "epoch": 0.09319978089052351, "grad_norm": 0.0, "learning_rate": 1.9791305828280473e-05, "loss": 1.2739, "step": 2382 }, { "epoch": 0.09323890758275295, "grad_norm": 0.0, "learning_rate": 1.9791048207575018e-05, "loss": 1.1231, "step": 2383 }, { "epoch": 0.0932780342749824, "grad_norm": 0.0, "learning_rate": 1.9790790429637597e-05, "loss": 1.2415, "step": 2384 }, { "epoch": 0.09331716096721183, "grad_norm": 0.0, "learning_rate": 1.979053249447235e-05, "loss": 1.2538, "step": 2385 }, { "epoch": 0.09335628765944128, "grad_norm": 0.0, "learning_rate": 1.979027440208342e-05, "loss": 1.2688, "step": 2386 }, { "epoch": 0.09339541435167072, "grad_norm": 0.0, "learning_rate": 1.9790016152474952e-05, "loss": 1.1998, "step": 2387 }, { "epoch": 0.09343454104390014, "grad_norm": 0.0, "learning_rate": 1.9789757745651093e-05, "loss": 1.2414, "step": 2388 }, { "epoch": 0.09347366773612958, "grad_norm": 0.0, "learning_rate": 1.978949918161599e-05, "loss": 1.253, "step": 2389 }, { "epoch": 0.09351279442835903, "grad_norm": 0.0, "learning_rate": 1.9789240460373794e-05, "loss": 1.0985, "step": 2390 }, { "epoch": 0.09355192112058847, "grad_norm": 0.0, "learning_rate": 1.978898158192867e-05, "loss": 1.2473, "step": 2391 }, { "epoch": 0.0935910478128179, "grad_norm": 0.0, "learning_rate": 1.978872254628476e-05, "loss": 1.0877, "step": 2392 }, { "epoch": 0.09363017450504735, "grad_norm": 0.0, "learning_rate": 1.978846335344624e-05, "loss": 1.1967, "step": 2393 }, { "epoch": 0.09366930119727679, "grad_norm": 0.0, "learning_rate": 1.978820400341726e-05, "loss": 1.236, "step": 2394 }, { "epoch": 0.09370842788950622, "grad_norm": 0.0, "learning_rate": 1.9787944496201988e-05, "loss": 1.2208, "step": 2395 }, { "epoch": 0.09374755458173566, "grad_norm": 0.0, "learning_rate": 1.9787684831804596e-05, "loss": 1.2623, "step": 2396 }, { "epoch": 0.0937866812739651, "grad_norm": 0.0, "learning_rate": 1.9787425010229246e-05, "loss": 1.2856, "step": 2397 }, { "epoch": 0.09382580796619454, "grad_norm": 0.0, "learning_rate": 1.978716503148012e-05, "loss": 1.1584, "step": 2398 }, { "epoch": 0.09386493465842398, "grad_norm": 0.0, "learning_rate": 1.9786904895561382e-05, "loss": 1.3575, "step": 2399 }, { "epoch": 0.09390406135065342, "grad_norm": 0.0, "learning_rate": 1.9786644602477217e-05, "loss": 1.1601, "step": 2400 }, { "epoch": 0.09394318804288286, "grad_norm": 0.0, "learning_rate": 1.9786384152231804e-05, "loss": 1.3351, "step": 2401 }, { "epoch": 0.09398231473511229, "grad_norm": 0.0, "learning_rate": 1.978612354482932e-05, "loss": 1.3103, "step": 2402 }, { "epoch": 0.09402144142734173, "grad_norm": 0.0, "learning_rate": 1.978586278027396e-05, "loss": 1.2827, "step": 2403 }, { "epoch": 0.09406056811957117, "grad_norm": 0.0, "learning_rate": 1.9785601858569907e-05, "loss": 1.162, "step": 2404 }, { "epoch": 0.09409969481180061, "grad_norm": 0.0, "learning_rate": 1.9785340779721348e-05, "loss": 1.1744, "step": 2405 }, { "epoch": 0.09413882150403005, "grad_norm": 0.0, "learning_rate": 1.9785079543732476e-05, "loss": 1.1522, "step": 2406 }, { "epoch": 0.09417794819625949, "grad_norm": 0.0, "learning_rate": 1.978481815060749e-05, "loss": 1.1475, "step": 2407 }, { "epoch": 0.09421707488848893, "grad_norm": 0.0, "learning_rate": 1.9784556600350583e-05, "loss": 1.2178, "step": 2408 }, { "epoch": 0.09425620158071836, "grad_norm": 0.0, "learning_rate": 1.978429489296596e-05, "loss": 1.2318, "step": 2409 }, { "epoch": 0.0942953282729478, "grad_norm": 0.0, "learning_rate": 1.978403302845782e-05, "loss": 1.3063, "step": 2410 }, { "epoch": 0.09433445496517724, "grad_norm": 0.0, "learning_rate": 1.978377100683037e-05, "loss": 1.3357, "step": 2411 }, { "epoch": 0.09437358165740668, "grad_norm": 0.0, "learning_rate": 1.9783508828087822e-05, "loss": 1.161, "step": 2412 }, { "epoch": 0.09441270834963612, "grad_norm": 0.0, "learning_rate": 1.9783246492234376e-05, "loss": 1.1031, "step": 2413 }, { "epoch": 0.09445183504186556, "grad_norm": 0.0, "learning_rate": 1.9782983999274252e-05, "loss": 1.2414, "step": 2414 }, { "epoch": 0.094490961734095, "grad_norm": 0.0, "learning_rate": 1.9782721349211664e-05, "loss": 1.0992, "step": 2415 }, { "epoch": 0.09453008842632443, "grad_norm": 0.0, "learning_rate": 1.978245854205083e-05, "loss": 1.2701, "step": 2416 }, { "epoch": 0.09456921511855387, "grad_norm": 0.0, "learning_rate": 1.978219557779597e-05, "loss": 1.1993, "step": 2417 }, { "epoch": 0.09460834181078331, "grad_norm": 0.0, "learning_rate": 1.978193245645131e-05, "loss": 1.2811, "step": 2418 }, { "epoch": 0.09464746850301275, "grad_norm": 0.0, "learning_rate": 1.9781669178021066e-05, "loss": 1.301, "step": 2419 }, { "epoch": 0.0946865951952422, "grad_norm": 0.0, "learning_rate": 1.9781405742509475e-05, "loss": 1.2021, "step": 2420 }, { "epoch": 0.09472572188747164, "grad_norm": 0.0, "learning_rate": 1.9781142149920763e-05, "loss": 1.2181, "step": 2421 }, { "epoch": 0.09476484857970108, "grad_norm": 0.0, "learning_rate": 1.9780878400259163e-05, "loss": 1.1689, "step": 2422 }, { "epoch": 0.0948039752719305, "grad_norm": 0.0, "learning_rate": 1.9780614493528917e-05, "loss": 1.2856, "step": 2423 }, { "epoch": 0.09484310196415995, "grad_norm": 0.0, "learning_rate": 1.9780350429734256e-05, "loss": 1.3085, "step": 2424 }, { "epoch": 0.09488222865638939, "grad_norm": 0.0, "learning_rate": 1.978008620887942e-05, "loss": 1.2037, "step": 2425 }, { "epoch": 0.09492135534861883, "grad_norm": 0.0, "learning_rate": 1.977982183096866e-05, "loss": 1.2634, "step": 2426 }, { "epoch": 0.09496048204084827, "grad_norm": 0.0, "learning_rate": 1.9779557296006213e-05, "loss": 1.1938, "step": 2427 }, { "epoch": 0.09499960873307771, "grad_norm": 0.0, "learning_rate": 1.977929260399633e-05, "loss": 1.2869, "step": 2428 }, { "epoch": 0.09503873542530715, "grad_norm": 0.0, "learning_rate": 1.977902775494326e-05, "loss": 1.3593, "step": 2429 }, { "epoch": 0.09507786211753658, "grad_norm": 0.0, "learning_rate": 1.977876274885126e-05, "loss": 1.2872, "step": 2430 }, { "epoch": 0.09511698880976602, "grad_norm": 0.0, "learning_rate": 1.9778497585724586e-05, "loss": 1.2473, "step": 2431 }, { "epoch": 0.09515611550199546, "grad_norm": 0.0, "learning_rate": 1.9778232265567493e-05, "loss": 1.0958, "step": 2432 }, { "epoch": 0.0951952421942249, "grad_norm": 0.0, "learning_rate": 1.9777966788384243e-05, "loss": 1.2195, "step": 2433 }, { "epoch": 0.09523436888645434, "grad_norm": 0.0, "learning_rate": 1.9777701154179097e-05, "loss": 1.2337, "step": 2434 }, { "epoch": 0.09527349557868378, "grad_norm": 0.0, "learning_rate": 1.9777435362956322e-05, "loss": 1.1921, "step": 2435 }, { "epoch": 0.09531262227091322, "grad_norm": 0.0, "learning_rate": 1.977716941472019e-05, "loss": 1.2926, "step": 2436 }, { "epoch": 0.09535174896314265, "grad_norm": 0.0, "learning_rate": 1.9776903309474965e-05, "loss": 1.1451, "step": 2437 }, { "epoch": 0.09539087565537209, "grad_norm": 0.0, "learning_rate": 1.9776637047224927e-05, "loss": 1.2238, "step": 2438 }, { "epoch": 0.09543000234760153, "grad_norm": 0.0, "learning_rate": 1.9776370627974347e-05, "loss": 1.1623, "step": 2439 }, { "epoch": 0.09546912903983097, "grad_norm": 0.0, "learning_rate": 1.9776104051727505e-05, "loss": 1.2244, "step": 2440 }, { "epoch": 0.09550825573206041, "grad_norm": 0.0, "learning_rate": 1.9775837318488683e-05, "loss": 1.2841, "step": 2441 }, { "epoch": 0.09554738242428985, "grad_norm": 0.0, "learning_rate": 1.9775570428262164e-05, "loss": 1.1956, "step": 2442 }, { "epoch": 0.0955865091165193, "grad_norm": 0.0, "learning_rate": 1.9775303381052234e-05, "loss": 1.2427, "step": 2443 }, { "epoch": 0.09562563580874874, "grad_norm": 0.0, "learning_rate": 1.9775036176863178e-05, "loss": 1.2264, "step": 2444 }, { "epoch": 0.09566476250097816, "grad_norm": 0.0, "learning_rate": 1.977476881569929e-05, "loss": 1.2567, "step": 2445 }, { "epoch": 0.0957038891932076, "grad_norm": 0.0, "learning_rate": 1.9774501297564864e-05, "loss": 1.2451, "step": 2446 }, { "epoch": 0.09574301588543704, "grad_norm": 0.0, "learning_rate": 1.9774233622464196e-05, "loss": 1.2675, "step": 2447 }, { "epoch": 0.09578214257766648, "grad_norm": 0.0, "learning_rate": 1.9773965790401583e-05, "loss": 1.1664, "step": 2448 }, { "epoch": 0.09582126926989593, "grad_norm": 0.0, "learning_rate": 1.977369780138133e-05, "loss": 1.2742, "step": 2449 }, { "epoch": 0.09586039596212537, "grad_norm": 0.0, "learning_rate": 1.9773429655407734e-05, "loss": 1.3243, "step": 2450 }, { "epoch": 0.09589952265435481, "grad_norm": 0.0, "learning_rate": 1.9773161352485106e-05, "loss": 1.1566, "step": 2451 }, { "epoch": 0.09593864934658423, "grad_norm": 0.0, "learning_rate": 1.9772892892617753e-05, "loss": 1.2048, "step": 2452 }, { "epoch": 0.09597777603881368, "grad_norm": 0.0, "learning_rate": 1.9772624275809984e-05, "loss": 1.2483, "step": 2453 }, { "epoch": 0.09601690273104312, "grad_norm": 0.0, "learning_rate": 1.977235550206612e-05, "loss": 1.232, "step": 2454 }, { "epoch": 0.09605602942327256, "grad_norm": 0.0, "learning_rate": 1.9772086571390467e-05, "loss": 1.1658, "step": 2455 }, { "epoch": 0.096095156115502, "grad_norm": 0.0, "learning_rate": 1.977181748378735e-05, "loss": 1.2995, "step": 2456 }, { "epoch": 0.09613428280773144, "grad_norm": 0.0, "learning_rate": 1.9771548239261088e-05, "loss": 1.3193, "step": 2457 }, { "epoch": 0.09617340949996088, "grad_norm": 0.0, "learning_rate": 1.977127883781601e-05, "loss": 1.2826, "step": 2458 }, { "epoch": 0.0962125361921903, "grad_norm": 0.0, "learning_rate": 1.9771009279456436e-05, "loss": 1.2234, "step": 2459 }, { "epoch": 0.09625166288441975, "grad_norm": 0.0, "learning_rate": 1.9770739564186695e-05, "loss": 1.1762, "step": 2460 }, { "epoch": 0.09629078957664919, "grad_norm": 0.0, "learning_rate": 1.977046969201112e-05, "loss": 1.2862, "step": 2461 }, { "epoch": 0.09632991626887863, "grad_norm": 0.0, "learning_rate": 1.977019966293405e-05, "loss": 1.079, "step": 2462 }, { "epoch": 0.09636904296110807, "grad_norm": 0.0, "learning_rate": 1.9769929476959812e-05, "loss": 1.2704, "step": 2463 }, { "epoch": 0.09640816965333751, "grad_norm": 0.0, "learning_rate": 1.976965913409275e-05, "loss": 1.1914, "step": 2464 }, { "epoch": 0.09644729634556695, "grad_norm": 0.0, "learning_rate": 1.9769388634337202e-05, "loss": 1.1525, "step": 2465 }, { "epoch": 0.09648642303779638, "grad_norm": 0.0, "learning_rate": 1.9769117977697513e-05, "loss": 1.1359, "step": 2466 }, { "epoch": 0.09652554973002582, "grad_norm": 0.0, "learning_rate": 1.9768847164178036e-05, "loss": 1.2455, "step": 2467 }, { "epoch": 0.09656467642225526, "grad_norm": 0.0, "learning_rate": 1.976857619378311e-05, "loss": 1.3289, "step": 2468 }, { "epoch": 0.0966038031144847, "grad_norm": 0.0, "learning_rate": 1.9768305066517093e-05, "loss": 1.2919, "step": 2469 }, { "epoch": 0.09664292980671414, "grad_norm": 0.0, "learning_rate": 1.9768033782384338e-05, "loss": 1.2004, "step": 2470 }, { "epoch": 0.09668205649894358, "grad_norm": 0.0, "learning_rate": 1.97677623413892e-05, "loss": 1.2002, "step": 2471 }, { "epoch": 0.09672118319117302, "grad_norm": 0.0, "learning_rate": 1.9767490743536037e-05, "loss": 1.1664, "step": 2472 }, { "epoch": 0.09676030988340245, "grad_norm": 0.0, "learning_rate": 1.9767218988829212e-05, "loss": 1.2495, "step": 2473 }, { "epoch": 0.09679943657563189, "grad_norm": 0.0, "learning_rate": 1.9766947077273092e-05, "loss": 1.3549, "step": 2474 }, { "epoch": 0.09683856326786133, "grad_norm": 0.0, "learning_rate": 1.976667500887204e-05, "loss": 1.1138, "step": 2475 }, { "epoch": 0.09687768996009077, "grad_norm": 0.0, "learning_rate": 1.9766402783630424e-05, "loss": 1.2346, "step": 2476 }, { "epoch": 0.09691681665232021, "grad_norm": 0.0, "learning_rate": 1.9766130401552617e-05, "loss": 1.2761, "step": 2477 }, { "epoch": 0.09695594334454966, "grad_norm": 0.0, "learning_rate": 1.976585786264299e-05, "loss": 1.2886, "step": 2478 }, { "epoch": 0.0969950700367791, "grad_norm": 0.0, "learning_rate": 1.976558516690593e-05, "loss": 1.158, "step": 2479 }, { "epoch": 0.09703419672900852, "grad_norm": 0.0, "learning_rate": 1.9765312314345807e-05, "loss": 1.2901, "step": 2480 }, { "epoch": 0.09707332342123796, "grad_norm": 0.0, "learning_rate": 1.9765039304967004e-05, "loss": 1.2234, "step": 2481 }, { "epoch": 0.0971124501134674, "grad_norm": 0.0, "learning_rate": 1.976476613877391e-05, "loss": 1.0668, "step": 2482 }, { "epoch": 0.09715157680569685, "grad_norm": 0.0, "learning_rate": 1.97644928157709e-05, "loss": 1.1866, "step": 2483 }, { "epoch": 0.09719070349792629, "grad_norm": 0.0, "learning_rate": 1.9764219335962376e-05, "loss": 1.2631, "step": 2484 }, { "epoch": 0.09722983019015573, "grad_norm": 0.0, "learning_rate": 1.976394569935272e-05, "loss": 1.2759, "step": 2485 }, { "epoch": 0.09726895688238517, "grad_norm": 0.0, "learning_rate": 1.9763671905946338e-05, "loss": 1.0764, "step": 2486 }, { "epoch": 0.0973080835746146, "grad_norm": 0.0, "learning_rate": 1.9763397955747617e-05, "loss": 1.1696, "step": 2487 }, { "epoch": 0.09734721026684404, "grad_norm": 0.0, "learning_rate": 1.9763123848760956e-05, "loss": 1.2881, "step": 2488 }, { "epoch": 0.09738633695907348, "grad_norm": 0.0, "learning_rate": 1.9762849584990763e-05, "loss": 1.2899, "step": 2489 }, { "epoch": 0.09742546365130292, "grad_norm": 0.0, "learning_rate": 1.976257516444144e-05, "loss": 1.3429, "step": 2490 }, { "epoch": 0.09746459034353236, "grad_norm": 0.0, "learning_rate": 1.976230058711739e-05, "loss": 1.1832, "step": 2491 }, { "epoch": 0.0975037170357618, "grad_norm": 0.0, "learning_rate": 1.9762025853023025e-05, "loss": 1.3469, "step": 2492 }, { "epoch": 0.09754284372799124, "grad_norm": 0.0, "learning_rate": 1.976175096216276e-05, "loss": 1.3391, "step": 2493 }, { "epoch": 0.09758197042022067, "grad_norm": 0.0, "learning_rate": 1.9761475914541008e-05, "loss": 1.3527, "step": 2494 }, { "epoch": 0.09762109711245011, "grad_norm": 0.0, "learning_rate": 1.9761200710162184e-05, "loss": 1.2901, "step": 2495 }, { "epoch": 0.09766022380467955, "grad_norm": 0.0, "learning_rate": 1.9760925349030704e-05, "loss": 1.2701, "step": 2496 }, { "epoch": 0.09769935049690899, "grad_norm": 0.0, "learning_rate": 1.9760649831150997e-05, "loss": 1.2017, "step": 2497 }, { "epoch": 0.09773847718913843, "grad_norm": 0.0, "learning_rate": 1.9760374156527484e-05, "loss": 1.2106, "step": 2498 }, { "epoch": 0.09777760388136787, "grad_norm": 0.0, "learning_rate": 1.9760098325164593e-05, "loss": 1.3197, "step": 2499 }, { "epoch": 0.09781673057359731, "grad_norm": 0.0, "learning_rate": 1.9759822337066753e-05, "loss": 1.2597, "step": 2500 }, { "epoch": 0.09785585726582675, "grad_norm": 0.0, "learning_rate": 1.97595461922384e-05, "loss": 1.1885, "step": 2501 }, { "epoch": 0.09789498395805618, "grad_norm": 0.0, "learning_rate": 1.9759269890683958e-05, "loss": 1.1138, "step": 2502 }, { "epoch": 0.09793411065028562, "grad_norm": 0.0, "learning_rate": 1.9758993432407873e-05, "loss": 1.3549, "step": 2503 }, { "epoch": 0.09797323734251506, "grad_norm": 0.0, "learning_rate": 1.975871681741458e-05, "loss": 1.2849, "step": 2504 }, { "epoch": 0.0980123640347445, "grad_norm": 0.0, "learning_rate": 1.9758440045708523e-05, "loss": 1.1179, "step": 2505 }, { "epoch": 0.09805149072697394, "grad_norm": 0.0, "learning_rate": 1.975816311729415e-05, "loss": 1.1721, "step": 2506 }, { "epoch": 0.09809061741920339, "grad_norm": 0.0, "learning_rate": 1.9757886032175903e-05, "loss": 1.2861, "step": 2507 }, { "epoch": 0.09812974411143283, "grad_norm": 0.0, "learning_rate": 1.9757608790358234e-05, "loss": 1.1195, "step": 2508 }, { "epoch": 0.09816887080366225, "grad_norm": 0.0, "learning_rate": 1.9757331391845596e-05, "loss": 1.2068, "step": 2509 }, { "epoch": 0.0982079974958917, "grad_norm": 0.0, "learning_rate": 1.9757053836642444e-05, "loss": 1.2511, "step": 2510 }, { "epoch": 0.09824712418812113, "grad_norm": 0.0, "learning_rate": 1.9756776124753233e-05, "loss": 1.2838, "step": 2511 }, { "epoch": 0.09828625088035058, "grad_norm": 0.0, "learning_rate": 1.9756498256182422e-05, "loss": 1.2736, "step": 2512 }, { "epoch": 0.09832537757258002, "grad_norm": 0.0, "learning_rate": 1.9756220230934474e-05, "loss": 1.1744, "step": 2513 }, { "epoch": 0.09836450426480946, "grad_norm": 0.0, "learning_rate": 1.9755942049013853e-05, "loss": 1.2357, "step": 2514 }, { "epoch": 0.0984036309570389, "grad_norm": 0.0, "learning_rate": 1.975566371042503e-05, "loss": 1.1631, "step": 2515 }, { "epoch": 0.09844275764926833, "grad_norm": 0.0, "learning_rate": 1.975538521517247e-05, "loss": 1.1289, "step": 2516 }, { "epoch": 0.09848188434149777, "grad_norm": 0.0, "learning_rate": 1.975510656326065e-05, "loss": 1.3187, "step": 2517 }, { "epoch": 0.09852101103372721, "grad_norm": 0.0, "learning_rate": 1.9754827754694043e-05, "loss": 1.289, "step": 2518 }, { "epoch": 0.09856013772595665, "grad_norm": 0.0, "learning_rate": 1.9754548789477126e-05, "loss": 1.0887, "step": 2519 }, { "epoch": 0.09859926441818609, "grad_norm": 0.0, "learning_rate": 1.9754269667614378e-05, "loss": 1.3399, "step": 2520 }, { "epoch": 0.09863839111041553, "grad_norm": 0.0, "learning_rate": 1.975399038911028e-05, "loss": 1.1662, "step": 2521 }, { "epoch": 0.09867751780264497, "grad_norm": 0.0, "learning_rate": 1.975371095396932e-05, "loss": 1.1399, "step": 2522 }, { "epoch": 0.0987166444948744, "grad_norm": 0.0, "learning_rate": 1.9753431362195985e-05, "loss": 1.2209, "step": 2523 }, { "epoch": 0.09875577118710384, "grad_norm": 0.0, "learning_rate": 1.9753151613794763e-05, "loss": 1.3686, "step": 2524 }, { "epoch": 0.09879489787933328, "grad_norm": 0.0, "learning_rate": 1.9752871708770146e-05, "loss": 1.2068, "step": 2525 }, { "epoch": 0.09883402457156272, "grad_norm": 0.0, "learning_rate": 1.9752591647126633e-05, "loss": 1.1475, "step": 2526 }, { "epoch": 0.09887315126379216, "grad_norm": 0.0, "learning_rate": 1.9752311428868716e-05, "loss": 1.2056, "step": 2527 }, { "epoch": 0.0989122779560216, "grad_norm": 0.0, "learning_rate": 1.9752031054000903e-05, "loss": 1.1562, "step": 2528 }, { "epoch": 0.09895140464825104, "grad_norm": 0.0, "learning_rate": 1.9751750522527686e-05, "loss": 1.2678, "step": 2529 }, { "epoch": 0.09899053134048047, "grad_norm": 0.0, "learning_rate": 1.9751469834453577e-05, "loss": 1.0852, "step": 2530 }, { "epoch": 0.09902965803270991, "grad_norm": 0.0, "learning_rate": 1.975118898978308e-05, "loss": 1.3014, "step": 2531 }, { "epoch": 0.09906878472493935, "grad_norm": 0.0, "learning_rate": 1.975090798852071e-05, "loss": 1.2078, "step": 2532 }, { "epoch": 0.09910791141716879, "grad_norm": 0.0, "learning_rate": 1.9750626830670976e-05, "loss": 1.2271, "step": 2533 }, { "epoch": 0.09914703810939823, "grad_norm": 0.0, "learning_rate": 1.975034551623839e-05, "loss": 1.245, "step": 2534 }, { "epoch": 0.09918616480162767, "grad_norm": 0.0, "learning_rate": 1.9750064045227474e-05, "loss": 1.1929, "step": 2535 }, { "epoch": 0.09922529149385712, "grad_norm": 0.0, "learning_rate": 1.974978241764275e-05, "loss": 1.3653, "step": 2536 }, { "epoch": 0.09926441818608654, "grad_norm": 0.0, "learning_rate": 1.9749500633488736e-05, "loss": 1.2261, "step": 2537 }, { "epoch": 0.09930354487831598, "grad_norm": 0.0, "learning_rate": 1.9749218692769958e-05, "loss": 1.2863, "step": 2538 }, { "epoch": 0.09934267157054542, "grad_norm": 0.0, "learning_rate": 1.9748936595490943e-05, "loss": 1.2886, "step": 2539 }, { "epoch": 0.09938179826277486, "grad_norm": 0.0, "learning_rate": 1.9748654341656225e-05, "loss": 1.2417, "step": 2540 }, { "epoch": 0.0994209249550043, "grad_norm": 0.0, "learning_rate": 1.9748371931270333e-05, "loss": 1.2285, "step": 2541 }, { "epoch": 0.09946005164723375, "grad_norm": 0.0, "learning_rate": 1.9748089364337803e-05, "loss": 1.3273, "step": 2542 }, { "epoch": 0.09949917833946319, "grad_norm": 0.0, "learning_rate": 1.9747806640863174e-05, "loss": 1.3798, "step": 2543 }, { "epoch": 0.09953830503169261, "grad_norm": 0.0, "learning_rate": 1.9747523760850984e-05, "loss": 1.2285, "step": 2544 }, { "epoch": 0.09957743172392206, "grad_norm": 0.0, "learning_rate": 1.9747240724305773e-05, "loss": 1.1813, "step": 2545 }, { "epoch": 0.0996165584161515, "grad_norm": 0.0, "learning_rate": 1.9746957531232097e-05, "loss": 1.2482, "step": 2546 }, { "epoch": 0.09965568510838094, "grad_norm": 0.0, "learning_rate": 1.974667418163449e-05, "loss": 1.3182, "step": 2547 }, { "epoch": 0.09969481180061038, "grad_norm": 0.0, "learning_rate": 1.9746390675517514e-05, "loss": 1.3567, "step": 2548 }, { "epoch": 0.09973393849283982, "grad_norm": 0.0, "learning_rate": 1.9746107012885715e-05, "loss": 1.1809, "step": 2549 }, { "epoch": 0.09977306518506926, "grad_norm": 0.0, "learning_rate": 1.9745823193743648e-05, "loss": 1.2922, "step": 2550 }, { "epoch": 0.09981219187729869, "grad_norm": 0.0, "learning_rate": 1.9745539218095876e-05, "loss": 1.11, "step": 2551 }, { "epoch": 0.09985131856952813, "grad_norm": 0.0, "learning_rate": 1.9745255085946955e-05, "loss": 1.1371, "step": 2552 }, { "epoch": 0.09989044526175757, "grad_norm": 0.0, "learning_rate": 1.9744970797301447e-05, "loss": 1.135, "step": 2553 }, { "epoch": 0.09992957195398701, "grad_norm": 0.0, "learning_rate": 1.974468635216392e-05, "loss": 1.4044, "step": 2554 }, { "epoch": 0.09996869864621645, "grad_norm": 0.0, "learning_rate": 1.9744401750538943e-05, "loss": 1.2329, "step": 2555 }, { "epoch": 0.10000782533844589, "grad_norm": 0.0, "learning_rate": 1.9744116992431082e-05, "loss": 1.2093, "step": 2556 }, { "epoch": 0.10004695203067533, "grad_norm": 0.0, "learning_rate": 1.974383207784491e-05, "loss": 1.296, "step": 2557 }, { "epoch": 0.10008607872290476, "grad_norm": 0.0, "learning_rate": 1.9743547006785012e-05, "loss": 1.3259, "step": 2558 }, { "epoch": 0.1001252054151342, "grad_norm": 0.0, "learning_rate": 1.9743261779255954e-05, "loss": 1.1331, "step": 2559 }, { "epoch": 0.10016433210736364, "grad_norm": 0.0, "learning_rate": 1.974297639526232e-05, "loss": 1.0946, "step": 2560 }, { "epoch": 0.10020345879959308, "grad_norm": 0.0, "learning_rate": 1.9742690854808692e-05, "loss": 1.3524, "step": 2561 }, { "epoch": 0.10024258549182252, "grad_norm": 0.0, "learning_rate": 1.974240515789966e-05, "loss": 1.2665, "step": 2562 }, { "epoch": 0.10028171218405196, "grad_norm": 0.0, "learning_rate": 1.9742119304539807e-05, "loss": 1.2438, "step": 2563 }, { "epoch": 0.1003208388762814, "grad_norm": 0.0, "learning_rate": 1.9741833294733728e-05, "loss": 1.088, "step": 2564 }, { "epoch": 0.10035996556851084, "grad_norm": 0.0, "learning_rate": 1.974154712848601e-05, "loss": 1.2794, "step": 2565 }, { "epoch": 0.10039909226074027, "grad_norm": 0.0, "learning_rate": 1.9741260805801258e-05, "loss": 1.3518, "step": 2566 }, { "epoch": 0.10043821895296971, "grad_norm": 0.0, "learning_rate": 1.9740974326684062e-05, "loss": 1.1483, "step": 2567 }, { "epoch": 0.10047734564519915, "grad_norm": 0.0, "learning_rate": 1.974068769113902e-05, "loss": 1.1661, "step": 2568 }, { "epoch": 0.1005164723374286, "grad_norm": 0.0, "learning_rate": 1.9740400899170744e-05, "loss": 1.2635, "step": 2569 }, { "epoch": 0.10055559902965804, "grad_norm": 0.0, "learning_rate": 1.974011395078383e-05, "loss": 1.113, "step": 2570 }, { "epoch": 0.10059472572188748, "grad_norm": 0.0, "learning_rate": 1.9739826845982896e-05, "loss": 1.2604, "step": 2571 }, { "epoch": 0.10063385241411692, "grad_norm": 0.0, "learning_rate": 1.9739539584772546e-05, "loss": 1.1924, "step": 2572 }, { "epoch": 0.10067297910634634, "grad_norm": 0.0, "learning_rate": 1.9739252167157393e-05, "loss": 1.2683, "step": 2573 }, { "epoch": 0.10071210579857579, "grad_norm": 0.0, "learning_rate": 1.973896459314206e-05, "loss": 1.2744, "step": 2574 }, { "epoch": 0.10075123249080523, "grad_norm": 0.0, "learning_rate": 1.9738676862731153e-05, "loss": 1.2283, "step": 2575 }, { "epoch": 0.10079035918303467, "grad_norm": 0.0, "learning_rate": 1.9738388975929303e-05, "loss": 1.2079, "step": 2576 }, { "epoch": 0.10082948587526411, "grad_norm": 0.0, "learning_rate": 1.9738100932741125e-05, "loss": 1.2112, "step": 2577 }, { "epoch": 0.10086861256749355, "grad_norm": 0.0, "learning_rate": 1.973781273317125e-05, "loss": 1.2003, "step": 2578 }, { "epoch": 0.10090773925972299, "grad_norm": 0.0, "learning_rate": 1.97375243772243e-05, "loss": 1.2418, "step": 2579 }, { "epoch": 0.10094686595195242, "grad_norm": 0.0, "learning_rate": 1.973723586490492e-05, "loss": 1.2783, "step": 2580 }, { "epoch": 0.10098599264418186, "grad_norm": 0.0, "learning_rate": 1.9736947196217726e-05, "loss": 1.3773, "step": 2581 }, { "epoch": 0.1010251193364113, "grad_norm": 0.0, "learning_rate": 1.9736658371167366e-05, "loss": 1.3026, "step": 2582 }, { "epoch": 0.10106424602864074, "grad_norm": 0.0, "learning_rate": 1.973636938975847e-05, "loss": 1.2067, "step": 2583 }, { "epoch": 0.10110337272087018, "grad_norm": 0.0, "learning_rate": 1.973608025199568e-05, "loss": 1.2513, "step": 2584 }, { "epoch": 0.10114249941309962, "grad_norm": 0.0, "learning_rate": 1.9735790957883645e-05, "loss": 1.2469, "step": 2585 }, { "epoch": 0.10118162610532906, "grad_norm": 0.0, "learning_rate": 1.9735501507427007e-05, "loss": 1.1902, "step": 2586 }, { "epoch": 0.10122075279755849, "grad_norm": 0.0, "learning_rate": 1.9735211900630414e-05, "loss": 1.202, "step": 2587 }, { "epoch": 0.10125987948978793, "grad_norm": 0.0, "learning_rate": 1.9734922137498516e-05, "loss": 1.2738, "step": 2588 }, { "epoch": 0.10129900618201737, "grad_norm": 0.0, "learning_rate": 1.9734632218035964e-05, "loss": 1.3317, "step": 2589 }, { "epoch": 0.10133813287424681, "grad_norm": 0.0, "learning_rate": 1.973434214224742e-05, "loss": 1.2226, "step": 2590 }, { "epoch": 0.10137725956647625, "grad_norm": 0.0, "learning_rate": 1.973405191013754e-05, "loss": 1.2065, "step": 2591 }, { "epoch": 0.10141638625870569, "grad_norm": 0.0, "learning_rate": 1.973376152171098e-05, "loss": 1.2964, "step": 2592 }, { "epoch": 0.10145551295093513, "grad_norm": 0.0, "learning_rate": 1.973347097697241e-05, "loss": 1.2242, "step": 2593 }, { "epoch": 0.10149463964316456, "grad_norm": 0.0, "learning_rate": 1.973318027592649e-05, "loss": 1.1012, "step": 2594 }, { "epoch": 0.101533766335394, "grad_norm": 0.0, "learning_rate": 1.9732889418577897e-05, "loss": 1.2178, "step": 2595 }, { "epoch": 0.10157289302762344, "grad_norm": 0.0, "learning_rate": 1.9732598404931293e-05, "loss": 1.2803, "step": 2596 }, { "epoch": 0.10161201971985288, "grad_norm": 0.0, "learning_rate": 1.973230723499135e-05, "loss": 1.1878, "step": 2597 }, { "epoch": 0.10165114641208232, "grad_norm": 0.0, "learning_rate": 1.973201590876275e-05, "loss": 1.1793, "step": 2598 }, { "epoch": 0.10169027310431177, "grad_norm": 0.0, "learning_rate": 1.9731724426250173e-05, "loss": 1.2279, "step": 2599 }, { "epoch": 0.1017293997965412, "grad_norm": 0.0, "learning_rate": 1.9731432787458294e-05, "loss": 1.2753, "step": 2600 }, { "epoch": 0.10176852648877063, "grad_norm": 0.0, "learning_rate": 1.9731140992391798e-05, "loss": 1.2893, "step": 2601 }, { "epoch": 0.10180765318100007, "grad_norm": 0.0, "learning_rate": 1.9730849041055373e-05, "loss": 1.2783, "step": 2602 }, { "epoch": 0.10184677987322951, "grad_norm": 0.0, "learning_rate": 1.9730556933453706e-05, "loss": 1.2683, "step": 2603 }, { "epoch": 0.10188590656545896, "grad_norm": 0.0, "learning_rate": 1.973026466959149e-05, "loss": 1.1532, "step": 2604 }, { "epoch": 0.1019250332576884, "grad_norm": 0.0, "learning_rate": 1.9729972249473408e-05, "loss": 1.2394, "step": 2605 }, { "epoch": 0.10196415994991784, "grad_norm": 0.0, "learning_rate": 1.972967967310417e-05, "loss": 1.2303, "step": 2606 }, { "epoch": 0.10200328664214728, "grad_norm": 0.0, "learning_rate": 1.9729386940488467e-05, "loss": 1.2133, "step": 2607 }, { "epoch": 0.1020424133343767, "grad_norm": 0.0, "learning_rate": 1.9729094051631003e-05, "loss": 1.173, "step": 2608 }, { "epoch": 0.10208154002660615, "grad_norm": 0.0, "learning_rate": 1.9728801006536478e-05, "loss": 1.2592, "step": 2609 }, { "epoch": 0.10212066671883559, "grad_norm": 0.0, "learning_rate": 1.97285078052096e-05, "loss": 1.2031, "step": 2610 }, { "epoch": 0.10215979341106503, "grad_norm": 0.0, "learning_rate": 1.9728214447655076e-05, "loss": 1.0997, "step": 2611 }, { "epoch": 0.10219892010329447, "grad_norm": 0.0, "learning_rate": 1.972792093387762e-05, "loss": 1.2413, "step": 2612 }, { "epoch": 0.10223804679552391, "grad_norm": 0.0, "learning_rate": 1.9727627263881942e-05, "loss": 1.211, "step": 2613 }, { "epoch": 0.10227717348775335, "grad_norm": 0.0, "learning_rate": 1.9727333437672763e-05, "loss": 1.2003, "step": 2614 }, { "epoch": 0.10231630017998278, "grad_norm": 0.0, "learning_rate": 1.9727039455254794e-05, "loss": 1.2532, "step": 2615 }, { "epoch": 0.10235542687221222, "grad_norm": 0.0, "learning_rate": 1.9726745316632762e-05, "loss": 1.2714, "step": 2616 }, { "epoch": 0.10239455356444166, "grad_norm": 0.0, "learning_rate": 1.9726451021811387e-05, "loss": 1.1354, "step": 2617 }, { "epoch": 0.1024336802566711, "grad_norm": 0.0, "learning_rate": 1.97261565707954e-05, "loss": 1.1111, "step": 2618 }, { "epoch": 0.10247280694890054, "grad_norm": 0.0, "learning_rate": 1.972586196358952e-05, "loss": 1.1769, "step": 2619 }, { "epoch": 0.10251193364112998, "grad_norm": 0.0, "learning_rate": 1.9725567200198486e-05, "loss": 1.2462, "step": 2620 }, { "epoch": 0.10255106033335942, "grad_norm": 0.0, "learning_rate": 1.9725272280627036e-05, "loss": 1.3253, "step": 2621 }, { "epoch": 0.10259018702558885, "grad_norm": 0.0, "learning_rate": 1.9724977204879894e-05, "loss": 1.3265, "step": 2622 }, { "epoch": 0.10262931371781829, "grad_norm": 0.0, "learning_rate": 1.9724681972961806e-05, "loss": 1.2364, "step": 2623 }, { "epoch": 0.10266844041004773, "grad_norm": 0.0, "learning_rate": 1.972438658487751e-05, "loss": 1.266, "step": 2624 }, { "epoch": 0.10270756710227717, "grad_norm": 0.0, "learning_rate": 1.972409104063175e-05, "loss": 1.2046, "step": 2625 }, { "epoch": 0.10274669379450661, "grad_norm": 0.0, "learning_rate": 1.9723795340229274e-05, "loss": 1.1349, "step": 2626 }, { "epoch": 0.10278582048673605, "grad_norm": 0.0, "learning_rate": 1.972349948367483e-05, "loss": 1.1608, "step": 2627 }, { "epoch": 0.1028249471789655, "grad_norm": 0.0, "learning_rate": 1.9723203470973168e-05, "loss": 1.2825, "step": 2628 }, { "epoch": 0.10286407387119494, "grad_norm": 0.0, "learning_rate": 1.9722907302129042e-05, "loss": 1.2864, "step": 2629 }, { "epoch": 0.10290320056342436, "grad_norm": 0.0, "learning_rate": 1.9722610977147203e-05, "loss": 1.3141, "step": 2630 }, { "epoch": 0.1029423272556538, "grad_norm": 0.0, "learning_rate": 1.9722314496032422e-05, "loss": 1.228, "step": 2631 }, { "epoch": 0.10298145394788324, "grad_norm": 0.0, "learning_rate": 1.972201785878945e-05, "loss": 1.2372, "step": 2632 }, { "epoch": 0.10302058064011269, "grad_norm": 0.0, "learning_rate": 1.9721721065423055e-05, "loss": 1.2178, "step": 2633 }, { "epoch": 0.10305970733234213, "grad_norm": 0.0, "learning_rate": 1.9721424115938004e-05, "loss": 1.0566, "step": 2634 }, { "epoch": 0.10309883402457157, "grad_norm": 0.0, "learning_rate": 1.9721127010339057e-05, "loss": 1.3369, "step": 2635 }, { "epoch": 0.10313796071680101, "grad_norm": 0.0, "learning_rate": 1.9720829748630997e-05, "loss": 1.1698, "step": 2636 }, { "epoch": 0.10317708740903044, "grad_norm": 0.0, "learning_rate": 1.972053233081859e-05, "loss": 1.3497, "step": 2637 }, { "epoch": 0.10321621410125988, "grad_norm": 0.0, "learning_rate": 1.9720234756906613e-05, "loss": 1.2905, "step": 2638 }, { "epoch": 0.10325534079348932, "grad_norm": 0.0, "learning_rate": 1.971993702689985e-05, "loss": 1.2036, "step": 2639 }, { "epoch": 0.10329446748571876, "grad_norm": 0.0, "learning_rate": 1.9719639140803073e-05, "loss": 1.3002, "step": 2640 }, { "epoch": 0.1033335941779482, "grad_norm": 0.0, "learning_rate": 1.9719341098621074e-05, "loss": 1.1849, "step": 2641 }, { "epoch": 0.10337272087017764, "grad_norm": 0.0, "learning_rate": 1.9719042900358635e-05, "loss": 1.298, "step": 2642 }, { "epoch": 0.10341184756240708, "grad_norm": 0.0, "learning_rate": 1.9718744546020547e-05, "loss": 1.2909, "step": 2643 }, { "epoch": 0.10345097425463651, "grad_norm": 0.0, "learning_rate": 1.97184460356116e-05, "loss": 1.1343, "step": 2644 }, { "epoch": 0.10349010094686595, "grad_norm": 0.0, "learning_rate": 1.9718147369136584e-05, "loss": 1.1638, "step": 2645 }, { "epoch": 0.10352922763909539, "grad_norm": 0.0, "learning_rate": 1.97178485466003e-05, "loss": 1.3221, "step": 2646 }, { "epoch": 0.10356835433132483, "grad_norm": 0.0, "learning_rate": 1.9717549568007544e-05, "loss": 1.1782, "step": 2647 }, { "epoch": 0.10360748102355427, "grad_norm": 0.0, "learning_rate": 1.9717250433363125e-05, "loss": 1.3007, "step": 2648 }, { "epoch": 0.10364660771578371, "grad_norm": 0.0, "learning_rate": 1.9716951142671835e-05, "loss": 1.2807, "step": 2649 }, { "epoch": 0.10368573440801315, "grad_norm": 0.0, "learning_rate": 1.9716651695938488e-05, "loss": 1.3334, "step": 2650 }, { "epoch": 0.10372486110024258, "grad_norm": 0.0, "learning_rate": 1.971635209316789e-05, "loss": 1.2803, "step": 2651 }, { "epoch": 0.10376398779247202, "grad_norm": 0.0, "learning_rate": 1.971605233436485e-05, "loss": 1.1603, "step": 2652 }, { "epoch": 0.10380311448470146, "grad_norm": 0.0, "learning_rate": 1.971575241953419e-05, "loss": 1.1904, "step": 2653 }, { "epoch": 0.1038422411769309, "grad_norm": 0.0, "learning_rate": 1.9715452348680716e-05, "loss": 1.1935, "step": 2654 }, { "epoch": 0.10388136786916034, "grad_norm": 0.0, "learning_rate": 1.9715152121809253e-05, "loss": 1.3083, "step": 2655 }, { "epoch": 0.10392049456138978, "grad_norm": 0.0, "learning_rate": 1.971485173892462e-05, "loss": 1.202, "step": 2656 }, { "epoch": 0.10395962125361922, "grad_norm": 0.0, "learning_rate": 1.9714551200031644e-05, "loss": 1.16, "step": 2657 }, { "epoch": 0.10399874794584865, "grad_norm": 0.0, "learning_rate": 1.9714250505135144e-05, "loss": 1.1212, "step": 2658 }, { "epoch": 0.10403787463807809, "grad_norm": 0.0, "learning_rate": 1.9713949654239956e-05, "loss": 1.1828, "step": 2659 }, { "epoch": 0.10407700133030753, "grad_norm": 0.0, "learning_rate": 1.9713648647350912e-05, "loss": 1.1162, "step": 2660 }, { "epoch": 0.10411612802253697, "grad_norm": 0.0, "learning_rate": 1.971334748447284e-05, "loss": 1.3428, "step": 2661 }, { "epoch": 0.10415525471476642, "grad_norm": 0.0, "learning_rate": 1.9713046165610576e-05, "loss": 1.2352, "step": 2662 }, { "epoch": 0.10419438140699586, "grad_norm": 0.0, "learning_rate": 1.9712744690768967e-05, "loss": 1.2942, "step": 2663 }, { "epoch": 0.1042335080992253, "grad_norm": 0.0, "learning_rate": 1.9712443059952845e-05, "loss": 1.1516, "step": 2664 }, { "epoch": 0.10427263479145472, "grad_norm": 0.0, "learning_rate": 1.9712141273167058e-05, "loss": 1.2319, "step": 2665 }, { "epoch": 0.10431176148368417, "grad_norm": 0.0, "learning_rate": 1.9711839330416453e-05, "loss": 1.2684, "step": 2666 }, { "epoch": 0.1043508881759136, "grad_norm": 0.0, "learning_rate": 1.971153723170588e-05, "loss": 1.3095, "step": 2667 }, { "epoch": 0.10439001486814305, "grad_norm": 0.0, "learning_rate": 1.9711234977040187e-05, "loss": 1.3777, "step": 2668 }, { "epoch": 0.10442914156037249, "grad_norm": 0.0, "learning_rate": 1.971093256642423e-05, "loss": 1.2677, "step": 2669 }, { "epoch": 0.10446826825260193, "grad_norm": 0.0, "learning_rate": 1.971062999986286e-05, "loss": 1.2097, "step": 2670 }, { "epoch": 0.10450739494483137, "grad_norm": 0.0, "learning_rate": 1.9710327277360942e-05, "loss": 1.2749, "step": 2671 }, { "epoch": 0.1045465216370608, "grad_norm": 0.0, "learning_rate": 1.9710024398923338e-05, "loss": 1.3203, "step": 2672 }, { "epoch": 0.10458564832929024, "grad_norm": 0.0, "learning_rate": 1.970972136455491e-05, "loss": 1.2589, "step": 2673 }, { "epoch": 0.10462477502151968, "grad_norm": 0.0, "learning_rate": 1.9709418174260523e-05, "loss": 1.0568, "step": 2674 }, { "epoch": 0.10466390171374912, "grad_norm": 0.0, "learning_rate": 1.9709114828045046e-05, "loss": 1.1962, "step": 2675 }, { "epoch": 0.10470302840597856, "grad_norm": 0.0, "learning_rate": 1.9708811325913352e-05, "loss": 1.1312, "step": 2676 }, { "epoch": 0.104742155098208, "grad_norm": 0.0, "learning_rate": 1.9708507667870312e-05, "loss": 1.3019, "step": 2677 }, { "epoch": 0.10478128179043744, "grad_norm": 0.0, "learning_rate": 1.9708203853920803e-05, "loss": 1.2592, "step": 2678 }, { "epoch": 0.10482040848266687, "grad_norm": 0.0, "learning_rate": 1.970789988406971e-05, "loss": 1.188, "step": 2679 }, { "epoch": 0.10485953517489631, "grad_norm": 0.0, "learning_rate": 1.9707595758321906e-05, "loss": 1.1658, "step": 2680 }, { "epoch": 0.10489866186712575, "grad_norm": 0.0, "learning_rate": 1.970729147668228e-05, "loss": 1.1459, "step": 2681 }, { "epoch": 0.10493778855935519, "grad_norm": 0.0, "learning_rate": 1.9706987039155715e-05, "loss": 1.1649, "step": 2682 }, { "epoch": 0.10497691525158463, "grad_norm": 0.0, "learning_rate": 1.9706682445747104e-05, "loss": 1.1313, "step": 2683 }, { "epoch": 0.10501604194381407, "grad_norm": 0.0, "learning_rate": 1.9706377696461337e-05, "loss": 1.2822, "step": 2684 }, { "epoch": 0.10505516863604351, "grad_norm": 0.0, "learning_rate": 1.9706072791303303e-05, "loss": 1.2679, "step": 2685 }, { "epoch": 0.10509429532827295, "grad_norm": 0.0, "learning_rate": 1.9705767730277905e-05, "loss": 1.2129, "step": 2686 }, { "epoch": 0.10513342202050238, "grad_norm": 0.0, "learning_rate": 1.970546251339004e-05, "loss": 1.3077, "step": 2687 }, { "epoch": 0.10517254871273182, "grad_norm": 0.0, "learning_rate": 1.9705157140644608e-05, "loss": 1.169, "step": 2688 }, { "epoch": 0.10521167540496126, "grad_norm": 0.0, "learning_rate": 1.970485161204651e-05, "loss": 1.1033, "step": 2689 }, { "epoch": 0.1052508020971907, "grad_norm": 0.0, "learning_rate": 1.970454592760066e-05, "loss": 1.1514, "step": 2690 }, { "epoch": 0.10528992878942015, "grad_norm": 0.0, "learning_rate": 1.9704240087311963e-05, "loss": 1.2596, "step": 2691 }, { "epoch": 0.10532905548164959, "grad_norm": 0.0, "learning_rate": 1.9703934091185325e-05, "loss": 1.3158, "step": 2692 }, { "epoch": 0.10536818217387903, "grad_norm": 0.0, "learning_rate": 1.9703627939225673e-05, "loss": 1.1985, "step": 2693 }, { "epoch": 0.10540730886610845, "grad_norm": 0.0, "learning_rate": 1.970332163143791e-05, "loss": 1.1464, "step": 2694 }, { "epoch": 0.1054464355583379, "grad_norm": 0.0, "learning_rate": 1.9703015167826963e-05, "loss": 1.2519, "step": 2695 }, { "epoch": 0.10548556225056734, "grad_norm": 0.0, "learning_rate": 1.970270854839775e-05, "loss": 1.1286, "step": 2696 }, { "epoch": 0.10552468894279678, "grad_norm": 0.0, "learning_rate": 1.9702401773155197e-05, "loss": 1.2816, "step": 2697 }, { "epoch": 0.10556381563502622, "grad_norm": 0.0, "learning_rate": 1.9702094842104228e-05, "loss": 1.0856, "step": 2698 }, { "epoch": 0.10560294232725566, "grad_norm": 0.0, "learning_rate": 1.9701787755249774e-05, "loss": 1.1471, "step": 2699 }, { "epoch": 0.1056420690194851, "grad_norm": 0.0, "learning_rate": 1.9701480512596767e-05, "loss": 1.3154, "step": 2700 }, { "epoch": 0.10568119571171453, "grad_norm": 0.0, "learning_rate": 1.9701173114150137e-05, "loss": 1.2383, "step": 2701 }, { "epoch": 0.10572032240394397, "grad_norm": 0.0, "learning_rate": 1.9700865559914823e-05, "loss": 1.2812, "step": 2702 }, { "epoch": 0.10575944909617341, "grad_norm": 0.0, "learning_rate": 1.9700557849895764e-05, "loss": 1.1826, "step": 2703 }, { "epoch": 0.10579857578840285, "grad_norm": 0.0, "learning_rate": 1.9700249984097907e-05, "loss": 1.147, "step": 2704 }, { "epoch": 0.10583770248063229, "grad_norm": 0.0, "learning_rate": 1.9699941962526184e-05, "loss": 1.3214, "step": 2705 }, { "epoch": 0.10587682917286173, "grad_norm": 0.0, "learning_rate": 1.9699633785185546e-05, "loss": 1.1892, "step": 2706 }, { "epoch": 0.10591595586509117, "grad_norm": 0.0, "learning_rate": 1.969932545208095e-05, "loss": 1.0544, "step": 2707 }, { "epoch": 0.1059550825573206, "grad_norm": 0.0, "learning_rate": 1.969901696321734e-05, "loss": 1.0943, "step": 2708 }, { "epoch": 0.10599420924955004, "grad_norm": 0.0, "learning_rate": 1.969870831859967e-05, "loss": 1.1971, "step": 2709 }, { "epoch": 0.10603333594177948, "grad_norm": 0.0, "learning_rate": 1.9698399518232895e-05, "loss": 1.2338, "step": 2710 }, { "epoch": 0.10607246263400892, "grad_norm": 0.0, "learning_rate": 1.969809056212198e-05, "loss": 1.219, "step": 2711 }, { "epoch": 0.10611158932623836, "grad_norm": 0.0, "learning_rate": 1.969778145027188e-05, "loss": 1.3264, "step": 2712 }, { "epoch": 0.1061507160184678, "grad_norm": 0.0, "learning_rate": 1.9697472182687564e-05, "loss": 1.1141, "step": 2713 }, { "epoch": 0.10618984271069724, "grad_norm": 0.0, "learning_rate": 1.9697162759373997e-05, "loss": 1.2715, "step": 2714 }, { "epoch": 0.10622896940292667, "grad_norm": 0.0, "learning_rate": 1.9696853180336146e-05, "loss": 1.2425, "step": 2715 }, { "epoch": 0.10626809609515611, "grad_norm": 0.0, "learning_rate": 1.9696543445578983e-05, "loss": 1.1389, "step": 2716 }, { "epoch": 0.10630722278738555, "grad_norm": 0.0, "learning_rate": 1.9696233555107484e-05, "loss": 1.2482, "step": 2717 }, { "epoch": 0.106346349479615, "grad_norm": 0.0, "learning_rate": 1.9695923508926626e-05, "loss": 1.0629, "step": 2718 }, { "epoch": 0.10638547617184443, "grad_norm": 0.0, "learning_rate": 1.969561330704138e-05, "loss": 1.3921, "step": 2719 }, { "epoch": 0.10642460286407388, "grad_norm": 0.0, "learning_rate": 1.969530294945674e-05, "loss": 1.2714, "step": 2720 }, { "epoch": 0.10646372955630332, "grad_norm": 0.0, "learning_rate": 1.9694992436177683e-05, "loss": 1.1523, "step": 2721 }, { "epoch": 0.10650285624853274, "grad_norm": 0.0, "learning_rate": 1.9694681767209194e-05, "loss": 1.147, "step": 2722 }, { "epoch": 0.10654198294076218, "grad_norm": 0.0, "learning_rate": 1.969437094255626e-05, "loss": 1.17, "step": 2723 }, { "epoch": 0.10658110963299162, "grad_norm": 0.0, "learning_rate": 1.9694059962223885e-05, "loss": 1.2913, "step": 2724 }, { "epoch": 0.10662023632522107, "grad_norm": 0.0, "learning_rate": 1.969374882621705e-05, "loss": 1.2441, "step": 2725 }, { "epoch": 0.1066593630174505, "grad_norm": 0.0, "learning_rate": 1.9693437534540753e-05, "loss": 1.2241, "step": 2726 }, { "epoch": 0.10669848970967995, "grad_norm": 0.0, "learning_rate": 1.96931260872e-05, "loss": 1.207, "step": 2727 }, { "epoch": 0.10673761640190939, "grad_norm": 0.0, "learning_rate": 1.9692814484199785e-05, "loss": 1.1948, "step": 2728 }, { "epoch": 0.10677674309413882, "grad_norm": 0.0, "learning_rate": 1.9692502725545116e-05, "loss": 1.2203, "step": 2729 }, { "epoch": 0.10681586978636826, "grad_norm": 0.0, "learning_rate": 1.9692190811241e-05, "loss": 1.3416, "step": 2730 }, { "epoch": 0.1068549964785977, "grad_norm": 0.0, "learning_rate": 1.9691878741292444e-05, "loss": 1.2997, "step": 2731 }, { "epoch": 0.10689412317082714, "grad_norm": 0.0, "learning_rate": 1.969156651570446e-05, "loss": 1.1766, "step": 2732 }, { "epoch": 0.10693324986305658, "grad_norm": 0.0, "learning_rate": 1.9691254134482062e-05, "loss": 1.2845, "step": 2733 }, { "epoch": 0.10697237655528602, "grad_norm": 0.0, "learning_rate": 1.9690941597630266e-05, "loss": 1.2617, "step": 2734 }, { "epoch": 0.10701150324751546, "grad_norm": 0.0, "learning_rate": 1.9690628905154092e-05, "loss": 1.1873, "step": 2735 }, { "epoch": 0.10705062993974489, "grad_norm": 0.0, "learning_rate": 1.969031605705856e-05, "loss": 1.1574, "step": 2736 }, { "epoch": 0.10708975663197433, "grad_norm": 0.0, "learning_rate": 1.9690003053348698e-05, "loss": 1.2865, "step": 2737 }, { "epoch": 0.10712888332420377, "grad_norm": 0.0, "learning_rate": 1.9689689894029526e-05, "loss": 1.181, "step": 2738 }, { "epoch": 0.10716801001643321, "grad_norm": 0.0, "learning_rate": 1.9689376579106075e-05, "loss": 1.2419, "step": 2739 }, { "epoch": 0.10720713670866265, "grad_norm": 0.0, "learning_rate": 1.968906310858338e-05, "loss": 1.1766, "step": 2740 }, { "epoch": 0.10724626340089209, "grad_norm": 0.0, "learning_rate": 1.968874948246647e-05, "loss": 1.2691, "step": 2741 }, { "epoch": 0.10728539009312153, "grad_norm": 0.0, "learning_rate": 1.9688435700760388e-05, "loss": 1.3247, "step": 2742 }, { "epoch": 0.10732451678535096, "grad_norm": 0.0, "learning_rate": 1.9688121763470165e-05, "loss": 1.2122, "step": 2743 }, { "epoch": 0.1073636434775804, "grad_norm": 0.0, "learning_rate": 1.9687807670600847e-05, "loss": 1.3059, "step": 2744 }, { "epoch": 0.10740277016980984, "grad_norm": 0.0, "learning_rate": 1.968749342215748e-05, "loss": 1.2368, "step": 2745 }, { "epoch": 0.10744189686203928, "grad_norm": 0.0, "learning_rate": 1.9687179018145105e-05, "loss": 1.1801, "step": 2746 }, { "epoch": 0.10748102355426872, "grad_norm": 0.0, "learning_rate": 1.968686445856878e-05, "loss": 1.2588, "step": 2747 }, { "epoch": 0.10752015024649816, "grad_norm": 0.0, "learning_rate": 1.968654974343354e-05, "loss": 1.2386, "step": 2748 }, { "epoch": 0.1075592769387276, "grad_norm": 0.0, "learning_rate": 1.9686234872744454e-05, "loss": 1.2441, "step": 2749 }, { "epoch": 0.10759840363095705, "grad_norm": 0.0, "learning_rate": 1.9685919846506577e-05, "loss": 1.2722, "step": 2750 }, { "epoch": 0.10763753032318647, "grad_norm": 0.0, "learning_rate": 1.968560466472496e-05, "loss": 1.2854, "step": 2751 }, { "epoch": 0.10767665701541591, "grad_norm": 0.0, "learning_rate": 1.9685289327404668e-05, "loss": 1.1225, "step": 2752 }, { "epoch": 0.10771578370764535, "grad_norm": 0.0, "learning_rate": 1.968497383455077e-05, "loss": 1.2634, "step": 2753 }, { "epoch": 0.1077549103998748, "grad_norm": 0.0, "learning_rate": 1.9684658186168324e-05, "loss": 1.2858, "step": 2754 }, { "epoch": 0.10779403709210424, "grad_norm": 0.0, "learning_rate": 1.9684342382262404e-05, "loss": 1.1946, "step": 2755 }, { "epoch": 0.10783316378433368, "grad_norm": 0.0, "learning_rate": 1.968402642283808e-05, "loss": 1.0994, "step": 2756 }, { "epoch": 0.10787229047656312, "grad_norm": 0.0, "learning_rate": 1.968371030790043e-05, "loss": 1.2589, "step": 2757 }, { "epoch": 0.10791141716879254, "grad_norm": 0.0, "learning_rate": 1.9683394037454522e-05, "loss": 1.2103, "step": 2758 }, { "epoch": 0.10795054386102199, "grad_norm": 0.0, "learning_rate": 1.9683077611505443e-05, "loss": 1.3177, "step": 2759 }, { "epoch": 0.10798967055325143, "grad_norm": 0.0, "learning_rate": 1.968276103005827e-05, "loss": 1.3246, "step": 2760 }, { "epoch": 0.10802879724548087, "grad_norm": 0.0, "learning_rate": 1.968244429311809e-05, "loss": 1.2635, "step": 2761 }, { "epoch": 0.10806792393771031, "grad_norm": 0.0, "learning_rate": 1.9682127400689986e-05, "loss": 1.1378, "step": 2762 }, { "epoch": 0.10810705062993975, "grad_norm": 0.0, "learning_rate": 1.9681810352779047e-05, "loss": 1.2765, "step": 2763 }, { "epoch": 0.10814617732216919, "grad_norm": 0.0, "learning_rate": 1.9681493149390366e-05, "loss": 1.1438, "step": 2764 }, { "epoch": 0.10818530401439862, "grad_norm": 0.0, "learning_rate": 1.968117579052904e-05, "loss": 1.1374, "step": 2765 }, { "epoch": 0.10822443070662806, "grad_norm": 0.0, "learning_rate": 1.9680858276200156e-05, "loss": 1.0966, "step": 2766 }, { "epoch": 0.1082635573988575, "grad_norm": 0.0, "learning_rate": 1.9680540606408826e-05, "loss": 1.2269, "step": 2767 }, { "epoch": 0.10830268409108694, "grad_norm": 0.0, "learning_rate": 1.968022278116014e-05, "loss": 1.1691, "step": 2768 }, { "epoch": 0.10834181078331638, "grad_norm": 0.0, "learning_rate": 1.9679904800459205e-05, "loss": 1.2585, "step": 2769 }, { "epoch": 0.10838093747554582, "grad_norm": 0.0, "learning_rate": 1.967958666431113e-05, "loss": 1.3185, "step": 2770 }, { "epoch": 0.10842006416777526, "grad_norm": 0.0, "learning_rate": 1.9679268372721025e-05, "loss": 1.0474, "step": 2771 }, { "epoch": 0.10845919086000469, "grad_norm": 0.0, "learning_rate": 1.9678949925693996e-05, "loss": 1.1672, "step": 2772 }, { "epoch": 0.10849831755223413, "grad_norm": 0.0, "learning_rate": 1.967863132323516e-05, "loss": 1.2904, "step": 2773 }, { "epoch": 0.10853744424446357, "grad_norm": 0.0, "learning_rate": 1.967831256534963e-05, "loss": 1.2274, "step": 2774 }, { "epoch": 0.10857657093669301, "grad_norm": 0.0, "learning_rate": 1.9677993652042532e-05, "loss": 1.1387, "step": 2775 }, { "epoch": 0.10861569762892245, "grad_norm": 0.0, "learning_rate": 1.9677674583318982e-05, "loss": 1.3404, "step": 2776 }, { "epoch": 0.1086548243211519, "grad_norm": 0.0, "learning_rate": 1.9677355359184104e-05, "loss": 1.2654, "step": 2777 }, { "epoch": 0.10869395101338133, "grad_norm": 0.0, "learning_rate": 1.9677035979643027e-05, "loss": 1.1057, "step": 2778 }, { "epoch": 0.10873307770561076, "grad_norm": 0.0, "learning_rate": 1.9676716444700877e-05, "loss": 1.2428, "step": 2779 }, { "epoch": 0.1087722043978402, "grad_norm": 0.0, "learning_rate": 1.967639675436279e-05, "loss": 1.1557, "step": 2780 }, { "epoch": 0.10881133109006964, "grad_norm": 0.0, "learning_rate": 1.967607690863389e-05, "loss": 1.2642, "step": 2781 }, { "epoch": 0.10885045778229908, "grad_norm": 0.0, "learning_rate": 1.9675756907519325e-05, "loss": 1.363, "step": 2782 }, { "epoch": 0.10888958447452853, "grad_norm": 0.0, "learning_rate": 1.9675436751024222e-05, "loss": 1.3279, "step": 2783 }, { "epoch": 0.10892871116675797, "grad_norm": 0.0, "learning_rate": 1.9675116439153736e-05, "loss": 1.0835, "step": 2784 }, { "epoch": 0.1089678378589874, "grad_norm": 0.0, "learning_rate": 1.9674795971913e-05, "loss": 1.2189, "step": 2785 }, { "epoch": 0.10900696455121683, "grad_norm": 0.0, "learning_rate": 1.9674475349307163e-05, "loss": 1.1454, "step": 2786 }, { "epoch": 0.10904609124344627, "grad_norm": 0.0, "learning_rate": 1.9674154571341378e-05, "loss": 1.2034, "step": 2787 }, { "epoch": 0.10908521793567572, "grad_norm": 0.0, "learning_rate": 1.9673833638020793e-05, "loss": 1.2103, "step": 2788 }, { "epoch": 0.10912434462790516, "grad_norm": 0.0, "learning_rate": 1.9673512549350557e-05, "loss": 1.1517, "step": 2789 }, { "epoch": 0.1091634713201346, "grad_norm": 0.0, "learning_rate": 1.9673191305335833e-05, "loss": 1.1562, "step": 2790 }, { "epoch": 0.10920259801236404, "grad_norm": 0.0, "learning_rate": 1.967286990598178e-05, "loss": 1.3098, "step": 2791 }, { "epoch": 0.10924172470459348, "grad_norm": 0.0, "learning_rate": 1.9672548351293555e-05, "loss": 1.1395, "step": 2792 }, { "epoch": 0.1092808513968229, "grad_norm": 0.0, "learning_rate": 1.9672226641276327e-05, "loss": 1.2114, "step": 2793 }, { "epoch": 0.10931997808905235, "grad_norm": 0.0, "learning_rate": 1.9671904775935256e-05, "loss": 1.2544, "step": 2794 }, { "epoch": 0.10935910478128179, "grad_norm": 0.0, "learning_rate": 1.9671582755275515e-05, "loss": 1.2516, "step": 2795 }, { "epoch": 0.10939823147351123, "grad_norm": 0.0, "learning_rate": 1.9671260579302275e-05, "loss": 1.2263, "step": 2796 }, { "epoch": 0.10943735816574067, "grad_norm": 0.0, "learning_rate": 1.9670938248020706e-05, "loss": 1.2156, "step": 2797 }, { "epoch": 0.10947648485797011, "grad_norm": 0.0, "learning_rate": 1.967061576143599e-05, "loss": 1.1986, "step": 2798 }, { "epoch": 0.10951561155019955, "grad_norm": 0.0, "learning_rate": 1.96702931195533e-05, "loss": 1.2402, "step": 2799 }, { "epoch": 0.10955473824242898, "grad_norm": 0.0, "learning_rate": 1.9669970322377824e-05, "loss": 1.0554, "step": 2800 }, { "epoch": 0.10959386493465842, "grad_norm": 0.0, "learning_rate": 1.966964736991474e-05, "loss": 1.1972, "step": 2801 }, { "epoch": 0.10963299162688786, "grad_norm": 0.0, "learning_rate": 1.9669324262169234e-05, "loss": 1.2529, "step": 2802 }, { "epoch": 0.1096721183191173, "grad_norm": 0.0, "learning_rate": 1.96690009991465e-05, "loss": 1.1158, "step": 2803 }, { "epoch": 0.10971124501134674, "grad_norm": 0.0, "learning_rate": 1.9668677580851723e-05, "loss": 1.1614, "step": 2804 }, { "epoch": 0.10975037170357618, "grad_norm": 0.0, "learning_rate": 1.96683540072901e-05, "loss": 1.305, "step": 2805 }, { "epoch": 0.10978949839580562, "grad_norm": 0.0, "learning_rate": 1.966803027846683e-05, "loss": 1.1697, "step": 2806 }, { "epoch": 0.10982862508803505, "grad_norm": 0.0, "learning_rate": 1.9667706394387107e-05, "loss": 1.1731, "step": 2807 }, { "epoch": 0.10986775178026449, "grad_norm": 0.0, "learning_rate": 1.9667382355056128e-05, "loss": 1.1976, "step": 2808 }, { "epoch": 0.10990687847249393, "grad_norm": 0.0, "learning_rate": 1.9667058160479108e-05, "loss": 1.3775, "step": 2809 }, { "epoch": 0.10994600516472337, "grad_norm": 0.0, "learning_rate": 1.9666733810661247e-05, "loss": 1.1266, "step": 2810 }, { "epoch": 0.10998513185695281, "grad_norm": 0.0, "learning_rate": 1.9666409305607753e-05, "loss": 1.2328, "step": 2811 }, { "epoch": 0.11002425854918226, "grad_norm": 0.0, "learning_rate": 1.966608464532384e-05, "loss": 1.215, "step": 2812 }, { "epoch": 0.1100633852414117, "grad_norm": 0.0, "learning_rate": 1.9665759829814717e-05, "loss": 1.2556, "step": 2813 }, { "epoch": 0.11010251193364114, "grad_norm": 0.0, "learning_rate": 1.9665434859085602e-05, "loss": 1.1801, "step": 2814 }, { "epoch": 0.11014163862587056, "grad_norm": 0.0, "learning_rate": 1.9665109733141718e-05, "loss": 1.2233, "step": 2815 }, { "epoch": 0.1101807653181, "grad_norm": 0.0, "learning_rate": 1.966478445198828e-05, "loss": 1.3419, "step": 2816 }, { "epoch": 0.11021989201032945, "grad_norm": 0.0, "learning_rate": 1.9664459015630518e-05, "loss": 1.2139, "step": 2817 }, { "epoch": 0.11025901870255889, "grad_norm": 0.0, "learning_rate": 1.9664133424073656e-05, "loss": 1.0851, "step": 2818 }, { "epoch": 0.11029814539478833, "grad_norm": 0.0, "learning_rate": 1.9663807677322916e-05, "loss": 1.1959, "step": 2819 }, { "epoch": 0.11033727208701777, "grad_norm": 0.0, "learning_rate": 1.9663481775383535e-05, "loss": 1.0554, "step": 2820 }, { "epoch": 0.11037639877924721, "grad_norm": 0.0, "learning_rate": 1.9663155718260746e-05, "loss": 1.2462, "step": 2821 }, { "epoch": 0.11041552547147664, "grad_norm": 0.0, "learning_rate": 1.9662829505959786e-05, "loss": 1.2999, "step": 2822 }, { "epoch": 0.11045465216370608, "grad_norm": 0.0, "learning_rate": 1.966250313848589e-05, "loss": 1.1733, "step": 2823 }, { "epoch": 0.11049377885593552, "grad_norm": 0.0, "learning_rate": 1.9662176615844304e-05, "loss": 1.098, "step": 2824 }, { "epoch": 0.11053290554816496, "grad_norm": 0.0, "learning_rate": 1.966184993804027e-05, "loss": 1.3754, "step": 2825 }, { "epoch": 0.1105720322403944, "grad_norm": 0.0, "learning_rate": 1.966152310507903e-05, "loss": 1.2242, "step": 2826 }, { "epoch": 0.11061115893262384, "grad_norm": 0.0, "learning_rate": 1.9661196116965838e-05, "loss": 1.1589, "step": 2827 }, { "epoch": 0.11065028562485328, "grad_norm": 0.0, "learning_rate": 1.9660868973705938e-05, "loss": 1.2359, "step": 2828 }, { "epoch": 0.11068941231708271, "grad_norm": 0.0, "learning_rate": 1.966054167530459e-05, "loss": 1.2999, "step": 2829 }, { "epoch": 0.11072853900931215, "grad_norm": 0.0, "learning_rate": 1.9660214221767053e-05, "loss": 1.2822, "step": 2830 }, { "epoch": 0.11076766570154159, "grad_norm": 0.0, "learning_rate": 1.9659886613098574e-05, "loss": 1.2144, "step": 2831 }, { "epoch": 0.11080679239377103, "grad_norm": 0.0, "learning_rate": 1.9659558849304424e-05, "loss": 1.1837, "step": 2832 }, { "epoch": 0.11084591908600047, "grad_norm": 0.0, "learning_rate": 1.965923093038986e-05, "loss": 1.1823, "step": 2833 }, { "epoch": 0.11088504577822991, "grad_norm": 0.0, "learning_rate": 1.9658902856360153e-05, "loss": 1.1815, "step": 2834 }, { "epoch": 0.11092417247045935, "grad_norm": 0.0, "learning_rate": 1.965857462722057e-05, "loss": 1.2321, "step": 2835 }, { "epoch": 0.11096329916268878, "grad_norm": 0.0, "learning_rate": 1.965824624297638e-05, "loss": 1.231, "step": 2836 }, { "epoch": 0.11100242585491822, "grad_norm": 0.0, "learning_rate": 1.965791770363286e-05, "loss": 1.2219, "step": 2837 }, { "epoch": 0.11104155254714766, "grad_norm": 0.0, "learning_rate": 1.965758900919528e-05, "loss": 1.1547, "step": 2838 }, { "epoch": 0.1110806792393771, "grad_norm": 0.0, "learning_rate": 1.965726015966893e-05, "loss": 1.2701, "step": 2839 }, { "epoch": 0.11111980593160654, "grad_norm": 0.0, "learning_rate": 1.9656931155059077e-05, "loss": 1.1033, "step": 2840 }, { "epoch": 0.11115893262383598, "grad_norm": 0.0, "learning_rate": 1.965660199537101e-05, "loss": 1.3561, "step": 2841 }, { "epoch": 0.11119805931606543, "grad_norm": 0.0, "learning_rate": 1.9656272680610015e-05, "loss": 1.2312, "step": 2842 }, { "epoch": 0.11123718600829485, "grad_norm": 0.0, "learning_rate": 1.9655943210781384e-05, "loss": 1.2881, "step": 2843 }, { "epoch": 0.1112763127005243, "grad_norm": 0.0, "learning_rate": 1.96556135858904e-05, "loss": 1.2299, "step": 2844 }, { "epoch": 0.11131543939275373, "grad_norm": 0.0, "learning_rate": 1.9655283805942364e-05, "loss": 1.2087, "step": 2845 }, { "epoch": 0.11135456608498318, "grad_norm": 0.0, "learning_rate": 1.965495387094257e-05, "loss": 1.2642, "step": 2846 }, { "epoch": 0.11139369277721262, "grad_norm": 0.0, "learning_rate": 1.9654623780896313e-05, "loss": 1.2253, "step": 2847 }, { "epoch": 0.11143281946944206, "grad_norm": 0.0, "learning_rate": 1.9654293535808895e-05, "loss": 1.2693, "step": 2848 }, { "epoch": 0.1114719461616715, "grad_norm": 0.0, "learning_rate": 1.9653963135685622e-05, "loss": 1.169, "step": 2849 }, { "epoch": 0.11151107285390092, "grad_norm": 0.0, "learning_rate": 1.96536325805318e-05, "loss": 1.0095, "step": 2850 }, { "epoch": 0.11155019954613037, "grad_norm": 0.0, "learning_rate": 1.9653301870352733e-05, "loss": 1.2183, "step": 2851 }, { "epoch": 0.1115893262383598, "grad_norm": 0.0, "learning_rate": 1.9652971005153735e-05, "loss": 1.2426, "step": 2852 }, { "epoch": 0.11162845293058925, "grad_norm": 0.0, "learning_rate": 1.965263998494012e-05, "loss": 1.2117, "step": 2853 }, { "epoch": 0.11166757962281869, "grad_norm": 0.0, "learning_rate": 1.96523088097172e-05, "loss": 1.1135, "step": 2854 }, { "epoch": 0.11170670631504813, "grad_norm": 0.0, "learning_rate": 1.9651977479490293e-05, "loss": 1.2654, "step": 2855 }, { "epoch": 0.11174583300727757, "grad_norm": 0.0, "learning_rate": 1.965164599426473e-05, "loss": 1.1665, "step": 2856 }, { "epoch": 0.111784959699507, "grad_norm": 0.0, "learning_rate": 1.9651314354045817e-05, "loss": 1.17, "step": 2857 }, { "epoch": 0.11182408639173644, "grad_norm": 0.0, "learning_rate": 1.9650982558838897e-05, "loss": 1.2178, "step": 2858 }, { "epoch": 0.11186321308396588, "grad_norm": 0.0, "learning_rate": 1.9650650608649285e-05, "loss": 1.3123, "step": 2859 }, { "epoch": 0.11190233977619532, "grad_norm": 0.0, "learning_rate": 1.9650318503482323e-05, "loss": 1.1476, "step": 2860 }, { "epoch": 0.11194146646842476, "grad_norm": 0.0, "learning_rate": 1.9649986243343335e-05, "loss": 1.2916, "step": 2861 }, { "epoch": 0.1119805931606542, "grad_norm": 0.0, "learning_rate": 1.964965382823766e-05, "loss": 1.2771, "step": 2862 }, { "epoch": 0.11201971985288364, "grad_norm": 0.0, "learning_rate": 1.9649321258170634e-05, "loss": 1.2462, "step": 2863 }, { "epoch": 0.11205884654511307, "grad_norm": 0.0, "learning_rate": 1.96489885331476e-05, "loss": 1.1943, "step": 2864 }, { "epoch": 0.11209797323734251, "grad_norm": 0.0, "learning_rate": 1.9648655653173906e-05, "loss": 1.2294, "step": 2865 }, { "epoch": 0.11213709992957195, "grad_norm": 0.0, "learning_rate": 1.9648322618254888e-05, "loss": 1.2223, "step": 2866 }, { "epoch": 0.11217622662180139, "grad_norm": 0.0, "learning_rate": 1.96479894283959e-05, "loss": 1.1136, "step": 2867 }, { "epoch": 0.11221535331403083, "grad_norm": 0.0, "learning_rate": 1.9647656083602292e-05, "loss": 1.37, "step": 2868 }, { "epoch": 0.11225448000626027, "grad_norm": 0.0, "learning_rate": 1.964732258387942e-05, "loss": 1.2302, "step": 2869 }, { "epoch": 0.11229360669848971, "grad_norm": 0.0, "learning_rate": 1.964698892923263e-05, "loss": 1.1993, "step": 2870 }, { "epoch": 0.11233273339071916, "grad_norm": 0.0, "learning_rate": 1.964665511966729e-05, "loss": 1.1821, "step": 2871 }, { "epoch": 0.11237186008294858, "grad_norm": 0.0, "learning_rate": 1.9646321155188755e-05, "loss": 1.2419, "step": 2872 }, { "epoch": 0.11241098677517802, "grad_norm": 0.0, "learning_rate": 1.964598703580239e-05, "loss": 1.1566, "step": 2873 }, { "epoch": 0.11245011346740746, "grad_norm": 0.0, "learning_rate": 1.964565276151356e-05, "loss": 1.2423, "step": 2874 }, { "epoch": 0.1124892401596369, "grad_norm": 0.0, "learning_rate": 1.9645318332327633e-05, "loss": 1.2894, "step": 2875 }, { "epoch": 0.11252836685186635, "grad_norm": 0.0, "learning_rate": 1.9644983748249982e-05, "loss": 1.1785, "step": 2876 }, { "epoch": 0.11256749354409579, "grad_norm": 0.0, "learning_rate": 1.9644649009285977e-05, "loss": 1.1757, "step": 2877 }, { "epoch": 0.11260662023632523, "grad_norm": 0.0, "learning_rate": 1.9644314115440995e-05, "loss": 1.3589, "step": 2878 }, { "epoch": 0.11264574692855465, "grad_norm": 0.0, "learning_rate": 1.9643979066720412e-05, "loss": 1.253, "step": 2879 }, { "epoch": 0.1126848736207841, "grad_norm": 0.0, "learning_rate": 1.964364386312961e-05, "loss": 1.2175, "step": 2880 }, { "epoch": 0.11272400031301354, "grad_norm": 0.0, "learning_rate": 1.964330850467397e-05, "loss": 1.2092, "step": 2881 }, { "epoch": 0.11276312700524298, "grad_norm": 0.0, "learning_rate": 1.9642972991358883e-05, "loss": 1.2203, "step": 2882 }, { "epoch": 0.11280225369747242, "grad_norm": 0.0, "learning_rate": 1.964263732318973e-05, "loss": 1.1689, "step": 2883 }, { "epoch": 0.11284138038970186, "grad_norm": 0.0, "learning_rate": 1.9642301500171904e-05, "loss": 1.257, "step": 2884 }, { "epoch": 0.1128805070819313, "grad_norm": 0.0, "learning_rate": 1.96419655223108e-05, "loss": 1.0759, "step": 2885 }, { "epoch": 0.11291963377416073, "grad_norm": 0.0, "learning_rate": 1.9641629389611813e-05, "loss": 1.3393, "step": 2886 }, { "epoch": 0.11295876046639017, "grad_norm": 0.0, "learning_rate": 1.964129310208034e-05, "loss": 1.2073, "step": 2887 }, { "epoch": 0.11299788715861961, "grad_norm": 0.0, "learning_rate": 1.9640956659721775e-05, "loss": 1.1423, "step": 2888 }, { "epoch": 0.11303701385084905, "grad_norm": 0.0, "learning_rate": 1.9640620062541532e-05, "loss": 1.2097, "step": 2889 }, { "epoch": 0.11307614054307849, "grad_norm": 0.0, "learning_rate": 1.9640283310545012e-05, "loss": 1.2206, "step": 2890 }, { "epoch": 0.11311526723530793, "grad_norm": 0.0, "learning_rate": 1.963994640373762e-05, "loss": 1.2523, "step": 2891 }, { "epoch": 0.11315439392753737, "grad_norm": 0.0, "learning_rate": 1.9639609342124768e-05, "loss": 1.2167, "step": 2892 }, { "epoch": 0.1131935206197668, "grad_norm": 0.0, "learning_rate": 1.963927212571187e-05, "loss": 1.324, "step": 2893 }, { "epoch": 0.11323264731199624, "grad_norm": 0.0, "learning_rate": 1.963893475450434e-05, "loss": 1.2341, "step": 2894 }, { "epoch": 0.11327177400422568, "grad_norm": 0.0, "learning_rate": 1.9638597228507596e-05, "loss": 1.2193, "step": 2895 }, { "epoch": 0.11331090069645512, "grad_norm": 0.0, "learning_rate": 1.9638259547727058e-05, "loss": 1.3411, "step": 2896 }, { "epoch": 0.11335002738868456, "grad_norm": 0.0, "learning_rate": 1.963792171216815e-05, "loss": 1.257, "step": 2897 }, { "epoch": 0.113389154080914, "grad_norm": 0.0, "learning_rate": 1.9637583721836294e-05, "loss": 1.1802, "step": 2898 }, { "epoch": 0.11342828077314344, "grad_norm": 0.0, "learning_rate": 1.9637245576736923e-05, "loss": 1.1543, "step": 2899 }, { "epoch": 0.11346740746537287, "grad_norm": 0.0, "learning_rate": 1.963690727687546e-05, "loss": 1.1737, "step": 2900 }, { "epoch": 0.11350653415760231, "grad_norm": 0.0, "learning_rate": 1.9636568822257345e-05, "loss": 1.2693, "step": 2901 }, { "epoch": 0.11354566084983175, "grad_norm": 0.0, "learning_rate": 1.963623021288801e-05, "loss": 1.222, "step": 2902 }, { "epoch": 0.1135847875420612, "grad_norm": 0.0, "learning_rate": 1.9635891448772894e-05, "loss": 1.2264, "step": 2903 }, { "epoch": 0.11362391423429063, "grad_norm": 0.0, "learning_rate": 1.9635552529917433e-05, "loss": 1.2833, "step": 2904 }, { "epoch": 0.11366304092652008, "grad_norm": 0.0, "learning_rate": 1.9635213456327074e-05, "loss": 1.2541, "step": 2905 }, { "epoch": 0.11370216761874952, "grad_norm": 0.0, "learning_rate": 1.9634874228007262e-05, "loss": 1.1522, "step": 2906 }, { "epoch": 0.11374129431097894, "grad_norm": 0.0, "learning_rate": 1.963453484496344e-05, "loss": 1.2626, "step": 2907 }, { "epoch": 0.11378042100320838, "grad_norm": 0.0, "learning_rate": 1.963419530720106e-05, "loss": 1.2087, "step": 2908 }, { "epoch": 0.11381954769543783, "grad_norm": 0.0, "learning_rate": 1.963385561472558e-05, "loss": 1.1992, "step": 2909 }, { "epoch": 0.11385867438766727, "grad_norm": 0.0, "learning_rate": 1.9633515767542448e-05, "loss": 1.2354, "step": 2910 }, { "epoch": 0.11389780107989671, "grad_norm": 0.0, "learning_rate": 1.9633175765657125e-05, "loss": 1.0726, "step": 2911 }, { "epoch": 0.11393692777212615, "grad_norm": 0.0, "learning_rate": 1.9632835609075072e-05, "loss": 1.2561, "step": 2912 }, { "epoch": 0.11397605446435559, "grad_norm": 0.0, "learning_rate": 1.963249529780175e-05, "loss": 1.1329, "step": 2913 }, { "epoch": 0.11401518115658502, "grad_norm": 0.0, "learning_rate": 1.963215483184262e-05, "loss": 1.2843, "step": 2914 }, { "epoch": 0.11405430784881446, "grad_norm": 0.0, "learning_rate": 1.963181421120315e-05, "loss": 1.2226, "step": 2915 }, { "epoch": 0.1140934345410439, "grad_norm": 0.0, "learning_rate": 1.9631473435888822e-05, "loss": 1.2336, "step": 2916 }, { "epoch": 0.11413256123327334, "grad_norm": 0.0, "learning_rate": 1.9631132505905095e-05, "loss": 1.1706, "step": 2917 }, { "epoch": 0.11417168792550278, "grad_norm": 0.0, "learning_rate": 1.9630791421257447e-05, "loss": 1.3123, "step": 2918 }, { "epoch": 0.11421081461773222, "grad_norm": 0.0, "learning_rate": 1.9630450181951362e-05, "loss": 1.2655, "step": 2919 }, { "epoch": 0.11424994130996166, "grad_norm": 0.0, "learning_rate": 1.963010878799231e-05, "loss": 1.1207, "step": 2920 }, { "epoch": 0.11428906800219109, "grad_norm": 0.0, "learning_rate": 1.962976723938578e-05, "loss": 1.1572, "step": 2921 }, { "epoch": 0.11432819469442053, "grad_norm": 0.0, "learning_rate": 1.9629425536137253e-05, "loss": 1.2174, "step": 2922 }, { "epoch": 0.11436732138664997, "grad_norm": 0.0, "learning_rate": 1.9629083678252222e-05, "loss": 1.2844, "step": 2923 }, { "epoch": 0.11440644807887941, "grad_norm": 0.0, "learning_rate": 1.962874166573617e-05, "loss": 1.2619, "step": 2924 }, { "epoch": 0.11444557477110885, "grad_norm": 0.0, "learning_rate": 1.962839949859459e-05, "loss": 1.2829, "step": 2925 }, { "epoch": 0.11448470146333829, "grad_norm": 0.0, "learning_rate": 1.9628057176832986e-05, "loss": 1.2003, "step": 2926 }, { "epoch": 0.11452382815556773, "grad_norm": 0.0, "learning_rate": 1.9627714700456844e-05, "loss": 1.238, "step": 2927 }, { "epoch": 0.11456295484779716, "grad_norm": 0.0, "learning_rate": 1.9627372069471668e-05, "loss": 1.2435, "step": 2928 }, { "epoch": 0.1146020815400266, "grad_norm": 0.0, "learning_rate": 1.962702928388296e-05, "loss": 1.2025, "step": 2929 }, { "epoch": 0.11464120823225604, "grad_norm": 0.0, "learning_rate": 1.9626686343696227e-05, "loss": 1.2721, "step": 2930 }, { "epoch": 0.11468033492448548, "grad_norm": 0.0, "learning_rate": 1.9626343248916972e-05, "loss": 1.25, "step": 2931 }, { "epoch": 0.11471946161671492, "grad_norm": 0.0, "learning_rate": 1.9625999999550708e-05, "loss": 1.2314, "step": 2932 }, { "epoch": 0.11475858830894436, "grad_norm": 0.0, "learning_rate": 1.9625656595602947e-05, "loss": 1.2749, "step": 2933 }, { "epoch": 0.1147977150011738, "grad_norm": 0.0, "learning_rate": 1.96253130370792e-05, "loss": 1.2615, "step": 2934 }, { "epoch": 0.11483684169340325, "grad_norm": 0.0, "learning_rate": 1.9624969323984994e-05, "loss": 1.2669, "step": 2935 }, { "epoch": 0.11487596838563267, "grad_norm": 0.0, "learning_rate": 1.962462545632583e-05, "loss": 1.0994, "step": 2936 }, { "epoch": 0.11491509507786211, "grad_norm": 0.0, "learning_rate": 1.962428143410725e-05, "loss": 1.2295, "step": 2937 }, { "epoch": 0.11495422177009156, "grad_norm": 0.0, "learning_rate": 1.9623937257334767e-05, "loss": 1.2246, "step": 2938 }, { "epoch": 0.114993348462321, "grad_norm": 0.0, "learning_rate": 1.9623592926013915e-05, "loss": 1.3218, "step": 2939 }, { "epoch": 0.11503247515455044, "grad_norm": 0.0, "learning_rate": 1.9623248440150212e-05, "loss": 1.222, "step": 2940 }, { "epoch": 0.11507160184677988, "grad_norm": 0.0, "learning_rate": 1.9622903799749203e-05, "loss": 1.3933, "step": 2941 }, { "epoch": 0.11511072853900932, "grad_norm": 0.0, "learning_rate": 1.9622559004816418e-05, "loss": 1.1322, "step": 2942 }, { "epoch": 0.11514985523123875, "grad_norm": 0.0, "learning_rate": 1.9622214055357393e-05, "loss": 1.1219, "step": 2943 }, { "epoch": 0.11518898192346819, "grad_norm": 0.0, "learning_rate": 1.9621868951377664e-05, "loss": 1.227, "step": 2944 }, { "epoch": 0.11522810861569763, "grad_norm": 0.0, "learning_rate": 1.962152369288278e-05, "loss": 1.1367, "step": 2945 }, { "epoch": 0.11526723530792707, "grad_norm": 0.0, "learning_rate": 1.9621178279878277e-05, "loss": 1.2072, "step": 2946 }, { "epoch": 0.11530636200015651, "grad_norm": 0.0, "learning_rate": 1.9620832712369712e-05, "loss": 1.2796, "step": 2947 }, { "epoch": 0.11534548869238595, "grad_norm": 0.0, "learning_rate": 1.962048699036263e-05, "loss": 1.1904, "step": 2948 }, { "epoch": 0.11538461538461539, "grad_norm": 0.0, "learning_rate": 1.9620141113862578e-05, "loss": 1.1425, "step": 2949 }, { "epoch": 0.11542374207684482, "grad_norm": 0.0, "learning_rate": 1.9619795082875118e-05, "loss": 1.2401, "step": 2950 }, { "epoch": 0.11546286876907426, "grad_norm": 0.0, "learning_rate": 1.96194488974058e-05, "loss": 1.2546, "step": 2951 }, { "epoch": 0.1155019954613037, "grad_norm": 0.0, "learning_rate": 1.9619102557460188e-05, "loss": 1.2434, "step": 2952 }, { "epoch": 0.11554112215353314, "grad_norm": 0.0, "learning_rate": 1.961875606304384e-05, "loss": 1.1864, "step": 2953 }, { "epoch": 0.11558024884576258, "grad_norm": 0.0, "learning_rate": 1.9618409414162326e-05, "loss": 1.3434, "step": 2954 }, { "epoch": 0.11561937553799202, "grad_norm": 0.0, "learning_rate": 1.961806261082121e-05, "loss": 1.2977, "step": 2955 }, { "epoch": 0.11565850223022146, "grad_norm": 0.0, "learning_rate": 1.9617715653026056e-05, "loss": 1.3497, "step": 2956 }, { "epoch": 0.11569762892245089, "grad_norm": 0.0, "learning_rate": 1.9617368540782444e-05, "loss": 1.0754, "step": 2957 }, { "epoch": 0.11573675561468033, "grad_norm": 0.0, "learning_rate": 1.9617021274095945e-05, "loss": 1.261, "step": 2958 }, { "epoch": 0.11577588230690977, "grad_norm": 0.0, "learning_rate": 1.9616673852972133e-05, "loss": 1.265, "step": 2959 }, { "epoch": 0.11581500899913921, "grad_norm": 0.0, "learning_rate": 1.9616326277416587e-05, "loss": 1.1463, "step": 2960 }, { "epoch": 0.11585413569136865, "grad_norm": 0.0, "learning_rate": 1.9615978547434896e-05, "loss": 1.1737, "step": 2961 }, { "epoch": 0.1158932623835981, "grad_norm": 0.0, "learning_rate": 1.9615630663032635e-05, "loss": 1.1252, "step": 2962 }, { "epoch": 0.11593238907582754, "grad_norm": 0.0, "learning_rate": 1.9615282624215397e-05, "loss": 1.2376, "step": 2963 }, { "epoch": 0.11597151576805696, "grad_norm": 0.0, "learning_rate": 1.961493443098877e-05, "loss": 1.1813, "step": 2964 }, { "epoch": 0.1160106424602864, "grad_norm": 0.0, "learning_rate": 1.961458608335834e-05, "loss": 1.2247, "step": 2965 }, { "epoch": 0.11604976915251584, "grad_norm": 0.0, "learning_rate": 1.9614237581329707e-05, "loss": 1.2148, "step": 2966 }, { "epoch": 0.11608889584474529, "grad_norm": 0.0, "learning_rate": 1.9613888924908468e-05, "loss": 1.2306, "step": 2967 }, { "epoch": 0.11612802253697473, "grad_norm": 0.0, "learning_rate": 1.9613540114100214e-05, "loss": 1.1149, "step": 2968 }, { "epoch": 0.11616714922920417, "grad_norm": 0.0, "learning_rate": 1.961319114891056e-05, "loss": 1.322, "step": 2969 }, { "epoch": 0.11620627592143361, "grad_norm": 0.0, "learning_rate": 1.96128420293451e-05, "loss": 1.2139, "step": 2970 }, { "epoch": 0.11624540261366303, "grad_norm": 0.0, "learning_rate": 1.961249275540944e-05, "loss": 1.2635, "step": 2971 }, { "epoch": 0.11628452930589248, "grad_norm": 0.0, "learning_rate": 1.961214332710919e-05, "loss": 1.2184, "step": 2972 }, { "epoch": 0.11632365599812192, "grad_norm": 0.0, "learning_rate": 1.9611793744449964e-05, "loss": 1.2052, "step": 2973 }, { "epoch": 0.11636278269035136, "grad_norm": 0.0, "learning_rate": 1.961144400743738e-05, "loss": 1.1665, "step": 2974 }, { "epoch": 0.1164019093825808, "grad_norm": 0.0, "learning_rate": 1.9611094116077042e-05, "loss": 1.2104, "step": 2975 }, { "epoch": 0.11644103607481024, "grad_norm": 0.0, "learning_rate": 1.9610744070374583e-05, "loss": 1.2936, "step": 2976 }, { "epoch": 0.11648016276703968, "grad_norm": 0.0, "learning_rate": 1.961039387033561e-05, "loss": 1.1256, "step": 2977 }, { "epoch": 0.11651928945926911, "grad_norm": 0.0, "learning_rate": 1.9610043515965757e-05, "loss": 1.3054, "step": 2978 }, { "epoch": 0.11655841615149855, "grad_norm": 0.0, "learning_rate": 1.9609693007270647e-05, "loss": 1.1966, "step": 2979 }, { "epoch": 0.11659754284372799, "grad_norm": 0.0, "learning_rate": 1.960934234425591e-05, "loss": 1.1532, "step": 2980 }, { "epoch": 0.11663666953595743, "grad_norm": 0.0, "learning_rate": 1.9608991526927176e-05, "loss": 1.1965, "step": 2981 }, { "epoch": 0.11667579622818687, "grad_norm": 0.0, "learning_rate": 1.9608640555290077e-05, "loss": 1.2074, "step": 2982 }, { "epoch": 0.11671492292041631, "grad_norm": 0.0, "learning_rate": 1.960828942935025e-05, "loss": 1.2655, "step": 2983 }, { "epoch": 0.11675404961264575, "grad_norm": 0.0, "learning_rate": 1.960793814911334e-05, "loss": 1.2532, "step": 2984 }, { "epoch": 0.11679317630487518, "grad_norm": 0.0, "learning_rate": 1.9607586714584977e-05, "loss": 1.174, "step": 2985 }, { "epoch": 0.11683230299710462, "grad_norm": 0.0, "learning_rate": 1.9607235125770816e-05, "loss": 1.3483, "step": 2986 }, { "epoch": 0.11687142968933406, "grad_norm": 0.0, "learning_rate": 1.9606883382676493e-05, "loss": 1.2077, "step": 2987 }, { "epoch": 0.1169105563815635, "grad_norm": 0.0, "learning_rate": 1.9606531485307664e-05, "loss": 1.2349, "step": 2988 }, { "epoch": 0.11694968307379294, "grad_norm": 0.0, "learning_rate": 1.960617943366997e-05, "loss": 1.2697, "step": 2989 }, { "epoch": 0.11698880976602238, "grad_norm": 0.0, "learning_rate": 1.960582722776908e-05, "loss": 1.1312, "step": 2990 }, { "epoch": 0.11702793645825182, "grad_norm": 0.0, "learning_rate": 1.9605474867610636e-05, "loss": 1.2018, "step": 2991 }, { "epoch": 0.11706706315048125, "grad_norm": 0.0, "learning_rate": 1.9605122353200308e-05, "loss": 1.2532, "step": 2992 }, { "epoch": 0.11710618984271069, "grad_norm": 0.0, "learning_rate": 1.9604769684543745e-05, "loss": 1.1836, "step": 2993 }, { "epoch": 0.11714531653494013, "grad_norm": 0.0, "learning_rate": 1.960441686164662e-05, "loss": 0.9823, "step": 2994 }, { "epoch": 0.11718444322716957, "grad_norm": 0.0, "learning_rate": 1.9604063884514592e-05, "loss": 1.1428, "step": 2995 }, { "epoch": 0.11722356991939901, "grad_norm": 0.0, "learning_rate": 1.9603710753153335e-05, "loss": 1.3118, "step": 2996 }, { "epoch": 0.11726269661162846, "grad_norm": 0.0, "learning_rate": 1.9603357467568514e-05, "loss": 1.2168, "step": 2997 }, { "epoch": 0.1173018233038579, "grad_norm": 0.0, "learning_rate": 1.960300402776581e-05, "loss": 1.1177, "step": 2998 }, { "epoch": 0.11734094999608734, "grad_norm": 0.0, "learning_rate": 1.9602650433750893e-05, "loss": 1.3507, "step": 2999 }, { "epoch": 0.11738007668831676, "grad_norm": 0.0, "learning_rate": 1.9602296685529442e-05, "loss": 1.3184, "step": 3000 }, { "epoch": 0.1174192033805462, "grad_norm": 0.0, "learning_rate": 1.9601942783107138e-05, "loss": 1.23, "step": 3001 }, { "epoch": 0.11745833007277565, "grad_norm": 0.0, "learning_rate": 1.960158872648967e-05, "loss": 1.2313, "step": 3002 }, { "epoch": 0.11749745676500509, "grad_norm": 0.0, "learning_rate": 1.9601234515682712e-05, "loss": 1.0298, "step": 3003 }, { "epoch": 0.11753658345723453, "grad_norm": 0.0, "learning_rate": 1.960088015069196e-05, "loss": 1.2634, "step": 3004 }, { "epoch": 0.11757571014946397, "grad_norm": 0.0, "learning_rate": 1.9600525631523108e-05, "loss": 1.2368, "step": 3005 }, { "epoch": 0.11761483684169341, "grad_norm": 0.0, "learning_rate": 1.9600170958181838e-05, "loss": 1.2236, "step": 3006 }, { "epoch": 0.11765396353392284, "grad_norm": 0.0, "learning_rate": 1.959981613067386e-05, "loss": 1.2992, "step": 3007 }, { "epoch": 0.11769309022615228, "grad_norm": 0.0, "learning_rate": 1.9599461149004857e-05, "loss": 1.2113, "step": 3008 }, { "epoch": 0.11773221691838172, "grad_norm": 0.0, "learning_rate": 1.959910601318054e-05, "loss": 1.1097, "step": 3009 }, { "epoch": 0.11777134361061116, "grad_norm": 0.0, "learning_rate": 1.9598750723206606e-05, "loss": 1.3312, "step": 3010 }, { "epoch": 0.1178104703028406, "grad_norm": 0.0, "learning_rate": 1.9598395279088765e-05, "loss": 1.1579, "step": 3011 }, { "epoch": 0.11784959699507004, "grad_norm": 0.0, "learning_rate": 1.9598039680832724e-05, "loss": 1.2227, "step": 3012 }, { "epoch": 0.11788872368729948, "grad_norm": 0.0, "learning_rate": 1.9597683928444195e-05, "loss": 1.1776, "step": 3013 }, { "epoch": 0.11792785037952891, "grad_norm": 0.0, "learning_rate": 1.9597328021928886e-05, "loss": 1.1709, "step": 3014 }, { "epoch": 0.11796697707175835, "grad_norm": 0.0, "learning_rate": 1.9596971961292514e-05, "loss": 1.1575, "step": 3015 }, { "epoch": 0.11800610376398779, "grad_norm": 0.0, "learning_rate": 1.9596615746540798e-05, "loss": 1.1876, "step": 3016 }, { "epoch": 0.11804523045621723, "grad_norm": 0.0, "learning_rate": 1.959625937767946e-05, "loss": 1.2363, "step": 3017 }, { "epoch": 0.11808435714844667, "grad_norm": 0.0, "learning_rate": 1.959590285471422e-05, "loss": 1.1892, "step": 3018 }, { "epoch": 0.11812348384067611, "grad_norm": 0.0, "learning_rate": 1.9595546177650807e-05, "loss": 1.2354, "step": 3019 }, { "epoch": 0.11816261053290555, "grad_norm": 0.0, "learning_rate": 1.9595189346494943e-05, "loss": 1.1965, "step": 3020 }, { "epoch": 0.11820173722513498, "grad_norm": 0.0, "learning_rate": 1.9594832361252364e-05, "loss": 1.1741, "step": 3021 }, { "epoch": 0.11824086391736442, "grad_norm": 0.0, "learning_rate": 1.9594475221928797e-05, "loss": 1.1715, "step": 3022 }, { "epoch": 0.11827999060959386, "grad_norm": 0.0, "learning_rate": 1.9594117928529984e-05, "loss": 1.2479, "step": 3023 }, { "epoch": 0.1183191173018233, "grad_norm": 0.0, "learning_rate": 1.9593760481061655e-05, "loss": 1.1636, "step": 3024 }, { "epoch": 0.11835824399405274, "grad_norm": 0.0, "learning_rate": 1.959340287952956e-05, "loss": 1.1867, "step": 3025 }, { "epoch": 0.11839737068628219, "grad_norm": 0.0, "learning_rate": 1.9593045123939433e-05, "loss": 1.205, "step": 3026 }, { "epoch": 0.11843649737851163, "grad_norm": 0.0, "learning_rate": 1.9592687214297022e-05, "loss": 1.222, "step": 3027 }, { "epoch": 0.11847562407074105, "grad_norm": 0.0, "learning_rate": 1.9592329150608074e-05, "loss": 1.2007, "step": 3028 }, { "epoch": 0.1185147507629705, "grad_norm": 0.0, "learning_rate": 1.959197093287834e-05, "loss": 1.2657, "step": 3029 }, { "epoch": 0.11855387745519994, "grad_norm": 0.0, "learning_rate": 1.9591612561113574e-05, "loss": 1.2141, "step": 3030 }, { "epoch": 0.11859300414742938, "grad_norm": 0.0, "learning_rate": 1.9591254035319526e-05, "loss": 1.2358, "step": 3031 }, { "epoch": 0.11863213083965882, "grad_norm": 0.0, "learning_rate": 1.959089535550196e-05, "loss": 1.1337, "step": 3032 }, { "epoch": 0.11867125753188826, "grad_norm": 0.0, "learning_rate": 1.9590536521666633e-05, "loss": 1.1425, "step": 3033 }, { "epoch": 0.1187103842241177, "grad_norm": 0.0, "learning_rate": 1.9590177533819304e-05, "loss": 1.1985, "step": 3034 }, { "epoch": 0.11874951091634713, "grad_norm": 0.0, "learning_rate": 1.9589818391965742e-05, "loss": 1.1699, "step": 3035 }, { "epoch": 0.11878863760857657, "grad_norm": 0.0, "learning_rate": 1.9589459096111714e-05, "loss": 1.1105, "step": 3036 }, { "epoch": 0.11882776430080601, "grad_norm": 0.0, "learning_rate": 1.958909964626299e-05, "loss": 1.1913, "step": 3037 }, { "epoch": 0.11886689099303545, "grad_norm": 0.0, "learning_rate": 1.958874004242534e-05, "loss": 1.1781, "step": 3038 }, { "epoch": 0.11890601768526489, "grad_norm": 0.0, "learning_rate": 1.9588380284604543e-05, "loss": 1.2075, "step": 3039 }, { "epoch": 0.11894514437749433, "grad_norm": 0.0, "learning_rate": 1.958802037280637e-05, "loss": 1.254, "step": 3040 }, { "epoch": 0.11898427106972377, "grad_norm": 0.0, "learning_rate": 1.9587660307036605e-05, "loss": 1.1544, "step": 3041 }, { "epoch": 0.1190233977619532, "grad_norm": 0.0, "learning_rate": 1.958730008730103e-05, "loss": 1.2151, "step": 3042 }, { "epoch": 0.11906252445418264, "grad_norm": 0.0, "learning_rate": 1.9586939713605428e-05, "loss": 1.1163, "step": 3043 }, { "epoch": 0.11910165114641208, "grad_norm": 0.0, "learning_rate": 1.958657918595559e-05, "loss": 1.3146, "step": 3044 }, { "epoch": 0.11914077783864152, "grad_norm": 0.0, "learning_rate": 1.95862185043573e-05, "loss": 1.1325, "step": 3045 }, { "epoch": 0.11917990453087096, "grad_norm": 0.0, "learning_rate": 1.9585857668816355e-05, "loss": 1.2328, "step": 3046 }, { "epoch": 0.1192190312231004, "grad_norm": 0.0, "learning_rate": 1.9585496679338547e-05, "loss": 1.2382, "step": 3047 }, { "epoch": 0.11925815791532984, "grad_norm": 0.0, "learning_rate": 1.9585135535929674e-05, "loss": 1.1025, "step": 3048 }, { "epoch": 0.11929728460755927, "grad_norm": 0.0, "learning_rate": 1.9584774238595535e-05, "loss": 1.0647, "step": 3049 }, { "epoch": 0.11933641129978871, "grad_norm": 0.0, "learning_rate": 1.958441278734193e-05, "loss": 1.143, "step": 3050 }, { "epoch": 0.11937553799201815, "grad_norm": 0.0, "learning_rate": 1.958405118217467e-05, "loss": 1.2031, "step": 3051 }, { "epoch": 0.11941466468424759, "grad_norm": 0.0, "learning_rate": 1.958368942309955e-05, "loss": 1.2022, "step": 3052 }, { "epoch": 0.11945379137647703, "grad_norm": 0.0, "learning_rate": 1.9583327510122397e-05, "loss": 1.2994, "step": 3053 }, { "epoch": 0.11949291806870647, "grad_norm": 0.0, "learning_rate": 1.9582965443249007e-05, "loss": 1.231, "step": 3054 }, { "epoch": 0.11953204476093592, "grad_norm": 0.0, "learning_rate": 1.95826032224852e-05, "loss": 1.2745, "step": 3055 }, { "epoch": 0.11957117145316536, "grad_norm": 0.0, "learning_rate": 1.9582240847836795e-05, "loss": 1.1747, "step": 3056 }, { "epoch": 0.11961029814539478, "grad_norm": 0.0, "learning_rate": 1.9581878319309608e-05, "loss": 1.1562, "step": 3057 }, { "epoch": 0.11964942483762422, "grad_norm": 0.0, "learning_rate": 1.958151563690946e-05, "loss": 1.1288, "step": 3058 }, { "epoch": 0.11968855152985367, "grad_norm": 0.0, "learning_rate": 1.958115280064218e-05, "loss": 1.1932, "step": 3059 }, { "epoch": 0.1197276782220831, "grad_norm": 0.0, "learning_rate": 1.958078981051359e-05, "loss": 1.2739, "step": 3060 }, { "epoch": 0.11976680491431255, "grad_norm": 0.0, "learning_rate": 1.9580426666529522e-05, "loss": 1.2226, "step": 3061 }, { "epoch": 0.11980593160654199, "grad_norm": 0.0, "learning_rate": 1.9580063368695808e-05, "loss": 1.2717, "step": 3062 }, { "epoch": 0.11984505829877143, "grad_norm": 0.0, "learning_rate": 1.9579699917018278e-05, "loss": 1.1193, "step": 3063 }, { "epoch": 0.11988418499100086, "grad_norm": 0.0, "learning_rate": 1.9579336311502772e-05, "loss": 1.1057, "step": 3064 }, { "epoch": 0.1199233116832303, "grad_norm": 0.0, "learning_rate": 1.957897255215513e-05, "loss": 1.1776, "step": 3065 }, { "epoch": 0.11996243837545974, "grad_norm": 0.0, "learning_rate": 1.9578608638981192e-05, "loss": 1.1598, "step": 3066 }, { "epoch": 0.12000156506768918, "grad_norm": 0.0, "learning_rate": 1.95782445719868e-05, "loss": 1.2907, "step": 3067 }, { "epoch": 0.12004069175991862, "grad_norm": 0.0, "learning_rate": 1.9577880351177803e-05, "loss": 1.2163, "step": 3068 }, { "epoch": 0.12007981845214806, "grad_norm": 0.0, "learning_rate": 1.957751597656005e-05, "loss": 1.1404, "step": 3069 }, { "epoch": 0.1201189451443775, "grad_norm": 0.0, "learning_rate": 1.957715144813939e-05, "loss": 1.1484, "step": 3070 }, { "epoch": 0.12015807183660693, "grad_norm": 0.0, "learning_rate": 1.9576786765921682e-05, "loss": 1.0851, "step": 3071 }, { "epoch": 0.12019719852883637, "grad_norm": 0.0, "learning_rate": 1.9576421929912775e-05, "loss": 1.3209, "step": 3072 }, { "epoch": 0.12023632522106581, "grad_norm": 0.0, "learning_rate": 1.9576056940118534e-05, "loss": 1.2961, "step": 3073 }, { "epoch": 0.12027545191329525, "grad_norm": 0.0, "learning_rate": 1.957569179654482e-05, "loss": 1.1642, "step": 3074 }, { "epoch": 0.12031457860552469, "grad_norm": 0.0, "learning_rate": 1.9575326499197492e-05, "loss": 1.2048, "step": 3075 }, { "epoch": 0.12035370529775413, "grad_norm": 0.0, "learning_rate": 1.957496104808242e-05, "loss": 1.3037, "step": 3076 }, { "epoch": 0.12039283198998357, "grad_norm": 0.0, "learning_rate": 1.957459544320547e-05, "loss": 1.0001, "step": 3077 }, { "epoch": 0.120431958682213, "grad_norm": 0.0, "learning_rate": 1.9574229684572518e-05, "loss": 1.1837, "step": 3078 }, { "epoch": 0.12047108537444244, "grad_norm": 0.0, "learning_rate": 1.9573863772189432e-05, "loss": 1.2416, "step": 3079 }, { "epoch": 0.12051021206667188, "grad_norm": 0.0, "learning_rate": 1.957349770606209e-05, "loss": 1.2726, "step": 3080 }, { "epoch": 0.12054933875890132, "grad_norm": 0.0, "learning_rate": 1.9573131486196372e-05, "loss": 1.1475, "step": 3081 }, { "epoch": 0.12058846545113076, "grad_norm": 0.0, "learning_rate": 1.9572765112598157e-05, "loss": 1.058, "step": 3082 }, { "epoch": 0.1206275921433602, "grad_norm": 0.0, "learning_rate": 1.9572398585273333e-05, "loss": 1.2417, "step": 3083 }, { "epoch": 0.12066671883558965, "grad_norm": 0.0, "learning_rate": 1.957203190422778e-05, "loss": 1.2473, "step": 3084 }, { "epoch": 0.12070584552781907, "grad_norm": 0.0, "learning_rate": 1.957166506946739e-05, "loss": 1.2734, "step": 3085 }, { "epoch": 0.12074497222004851, "grad_norm": 0.0, "learning_rate": 1.9571298080998052e-05, "loss": 1.1303, "step": 3086 }, { "epoch": 0.12078409891227795, "grad_norm": 0.0, "learning_rate": 1.9570930938825662e-05, "loss": 1.2042, "step": 3087 }, { "epoch": 0.1208232256045074, "grad_norm": 0.0, "learning_rate": 1.9570563642956114e-05, "loss": 1.2542, "step": 3088 }, { "epoch": 0.12086235229673684, "grad_norm": 0.0, "learning_rate": 1.9570196193395305e-05, "loss": 1.2094, "step": 3089 }, { "epoch": 0.12090147898896628, "grad_norm": 0.0, "learning_rate": 1.9569828590149135e-05, "loss": 1.2554, "step": 3090 }, { "epoch": 0.12094060568119572, "grad_norm": 0.0, "learning_rate": 1.9569460833223512e-05, "loss": 1.1374, "step": 3091 }, { "epoch": 0.12097973237342514, "grad_norm": 0.0, "learning_rate": 1.956909292262434e-05, "loss": 1.2595, "step": 3092 }, { "epoch": 0.12101885906565459, "grad_norm": 0.0, "learning_rate": 1.9568724858357527e-05, "loss": 1.2979, "step": 3093 }, { "epoch": 0.12105798575788403, "grad_norm": 0.0, "learning_rate": 1.956835664042898e-05, "loss": 1.3547, "step": 3094 }, { "epoch": 0.12109711245011347, "grad_norm": 0.0, "learning_rate": 1.956798826884462e-05, "loss": 1.2149, "step": 3095 }, { "epoch": 0.12113623914234291, "grad_norm": 0.0, "learning_rate": 1.9567619743610354e-05, "loss": 1.1578, "step": 3096 }, { "epoch": 0.12117536583457235, "grad_norm": 0.0, "learning_rate": 1.9567251064732105e-05, "loss": 1.3519, "step": 3097 }, { "epoch": 0.12121449252680179, "grad_norm": 0.0, "learning_rate": 1.9566882232215788e-05, "loss": 1.2251, "step": 3098 }, { "epoch": 0.12125361921903122, "grad_norm": 0.0, "learning_rate": 1.9566513246067335e-05, "loss": 1.0903, "step": 3099 }, { "epoch": 0.12129274591126066, "grad_norm": 0.0, "learning_rate": 1.956614410629267e-05, "loss": 1.2116, "step": 3100 }, { "epoch": 0.1213318726034901, "grad_norm": 0.0, "learning_rate": 1.956577481289771e-05, "loss": 1.2716, "step": 3101 }, { "epoch": 0.12137099929571954, "grad_norm": 0.0, "learning_rate": 1.95654053658884e-05, "loss": 1.2904, "step": 3102 }, { "epoch": 0.12141012598794898, "grad_norm": 0.0, "learning_rate": 1.956503576527066e-05, "loss": 1.1221, "step": 3103 }, { "epoch": 0.12144925268017842, "grad_norm": 0.0, "learning_rate": 1.9564666011050435e-05, "loss": 1.1664, "step": 3104 }, { "epoch": 0.12148837937240786, "grad_norm": 0.0, "learning_rate": 1.956429610323366e-05, "loss": 1.1993, "step": 3105 }, { "epoch": 0.12152750606463729, "grad_norm": 0.0, "learning_rate": 1.9563926041826272e-05, "loss": 1.2843, "step": 3106 }, { "epoch": 0.12156663275686673, "grad_norm": 0.0, "learning_rate": 1.9563555826834214e-05, "loss": 1.1272, "step": 3107 }, { "epoch": 0.12160575944909617, "grad_norm": 0.0, "learning_rate": 1.9563185458263437e-05, "loss": 1.2098, "step": 3108 }, { "epoch": 0.12164488614132561, "grad_norm": 0.0, "learning_rate": 1.9562814936119885e-05, "loss": 1.1389, "step": 3109 }, { "epoch": 0.12168401283355505, "grad_norm": 0.0, "learning_rate": 1.9562444260409507e-05, "loss": 1.1987, "step": 3110 }, { "epoch": 0.1217231395257845, "grad_norm": 0.0, "learning_rate": 1.9562073431138255e-05, "loss": 1.1802, "step": 3111 }, { "epoch": 0.12176226621801393, "grad_norm": 0.0, "learning_rate": 1.9561702448312084e-05, "loss": 1.1123, "step": 3112 }, { "epoch": 0.12180139291024336, "grad_norm": 0.0, "learning_rate": 1.956133131193696e-05, "loss": 1.1779, "step": 3113 }, { "epoch": 0.1218405196024728, "grad_norm": 0.0, "learning_rate": 1.956096002201883e-05, "loss": 1.3257, "step": 3114 }, { "epoch": 0.12187964629470224, "grad_norm": 0.0, "learning_rate": 1.9560588578563667e-05, "loss": 1.2579, "step": 3115 }, { "epoch": 0.12191877298693168, "grad_norm": 0.0, "learning_rate": 1.9560216981577426e-05, "loss": 1.1532, "step": 3116 }, { "epoch": 0.12195789967916112, "grad_norm": 0.0, "learning_rate": 1.9559845231066084e-05, "loss": 1.158, "step": 3117 }, { "epoch": 0.12199702637139057, "grad_norm": 0.0, "learning_rate": 1.9559473327035607e-05, "loss": 1.0392, "step": 3118 }, { "epoch": 0.12203615306362, "grad_norm": 0.0, "learning_rate": 1.9559101269491965e-05, "loss": 1.2292, "step": 3119 }, { "epoch": 0.12207527975584945, "grad_norm": 0.0, "learning_rate": 1.9558729058441135e-05, "loss": 1.1035, "step": 3120 }, { "epoch": 0.12211440644807887, "grad_norm": 0.0, "learning_rate": 1.9558356693889098e-05, "loss": 1.1942, "step": 3121 }, { "epoch": 0.12215353314030832, "grad_norm": 0.0, "learning_rate": 1.9557984175841825e-05, "loss": 1.1974, "step": 3122 }, { "epoch": 0.12219265983253776, "grad_norm": 0.0, "learning_rate": 1.9557611504305305e-05, "loss": 1.2513, "step": 3123 }, { "epoch": 0.1222317865247672, "grad_norm": 0.0, "learning_rate": 1.955723867928552e-05, "loss": 1.1013, "step": 3124 }, { "epoch": 0.12227091321699664, "grad_norm": 0.0, "learning_rate": 1.9556865700788457e-05, "loss": 1.2515, "step": 3125 }, { "epoch": 0.12231003990922608, "grad_norm": 0.0, "learning_rate": 1.9556492568820107e-05, "loss": 1.1331, "step": 3126 }, { "epoch": 0.12234916660145552, "grad_norm": 0.0, "learning_rate": 1.9556119283386463e-05, "loss": 1.1569, "step": 3127 }, { "epoch": 0.12238829329368495, "grad_norm": 0.0, "learning_rate": 1.955574584449352e-05, "loss": 1.1786, "step": 3128 }, { "epoch": 0.12242741998591439, "grad_norm": 0.0, "learning_rate": 1.9555372252147264e-05, "loss": 1.2644, "step": 3129 }, { "epoch": 0.12246654667814383, "grad_norm": 0.0, "learning_rate": 1.9554998506353707e-05, "loss": 1.1954, "step": 3130 }, { "epoch": 0.12250567337037327, "grad_norm": 0.0, "learning_rate": 1.955462460711885e-05, "loss": 1.2015, "step": 3131 }, { "epoch": 0.12254480006260271, "grad_norm": 0.0, "learning_rate": 1.9554250554448692e-05, "loss": 1.1463, "step": 3132 }, { "epoch": 0.12258392675483215, "grad_norm": 0.0, "learning_rate": 1.9553876348349242e-05, "loss": 1.1703, "step": 3133 }, { "epoch": 0.12262305344706159, "grad_norm": 0.0, "learning_rate": 1.9553501988826514e-05, "loss": 1.2977, "step": 3134 }, { "epoch": 0.12266218013929102, "grad_norm": 0.0, "learning_rate": 1.955312747588651e-05, "loss": 1.2676, "step": 3135 }, { "epoch": 0.12270130683152046, "grad_norm": 0.0, "learning_rate": 1.955275280953525e-05, "loss": 1.2216, "step": 3136 }, { "epoch": 0.1227404335237499, "grad_norm": 0.0, "learning_rate": 1.9552377989778754e-05, "loss": 1.2294, "step": 3137 }, { "epoch": 0.12277956021597934, "grad_norm": 0.0, "learning_rate": 1.9552003016623032e-05, "loss": 1.2809, "step": 3138 }, { "epoch": 0.12281868690820878, "grad_norm": 0.0, "learning_rate": 1.9551627890074115e-05, "loss": 1.4247, "step": 3139 }, { "epoch": 0.12285781360043822, "grad_norm": 0.0, "learning_rate": 1.955125261013802e-05, "loss": 1.2419, "step": 3140 }, { "epoch": 0.12289694029266766, "grad_norm": 0.0, "learning_rate": 1.955087717682078e-05, "loss": 1.1672, "step": 3141 }, { "epoch": 0.12293606698489709, "grad_norm": 0.0, "learning_rate": 1.9550501590128418e-05, "loss": 1.2898, "step": 3142 }, { "epoch": 0.12297519367712653, "grad_norm": 0.0, "learning_rate": 1.955012585006697e-05, "loss": 1.2273, "step": 3143 }, { "epoch": 0.12301432036935597, "grad_norm": 0.0, "learning_rate": 1.9549749956642464e-05, "loss": 1.2841, "step": 3144 }, { "epoch": 0.12305344706158541, "grad_norm": 0.0, "learning_rate": 1.9549373909860944e-05, "loss": 1.3269, "step": 3145 }, { "epoch": 0.12309257375381485, "grad_norm": 0.0, "learning_rate": 1.9548997709728443e-05, "loss": 1.283, "step": 3146 }, { "epoch": 0.1231317004460443, "grad_norm": 0.0, "learning_rate": 1.9548621356251004e-05, "loss": 1.2133, "step": 3147 }, { "epoch": 0.12317082713827374, "grad_norm": 0.0, "learning_rate": 1.9548244849434673e-05, "loss": 1.219, "step": 3148 }, { "epoch": 0.12320995383050316, "grad_norm": 0.0, "learning_rate": 1.9547868189285493e-05, "loss": 1.2233, "step": 3149 }, { "epoch": 0.1232490805227326, "grad_norm": 0.0, "learning_rate": 1.9547491375809512e-05, "loss": 1.2968, "step": 3150 }, { "epoch": 0.12328820721496205, "grad_norm": 0.0, "learning_rate": 1.954711440901279e-05, "loss": 1.3376, "step": 3151 }, { "epoch": 0.12332733390719149, "grad_norm": 0.0, "learning_rate": 1.9546737288901364e-05, "loss": 1.2108, "step": 3152 }, { "epoch": 0.12336646059942093, "grad_norm": 0.0, "learning_rate": 1.9546360015481306e-05, "loss": 1.167, "step": 3153 }, { "epoch": 0.12340558729165037, "grad_norm": 0.0, "learning_rate": 1.954598258875867e-05, "loss": 1.2242, "step": 3154 }, { "epoch": 0.12344471398387981, "grad_norm": 0.0, "learning_rate": 1.954560500873951e-05, "loss": 1.2247, "step": 3155 }, { "epoch": 0.12348384067610924, "grad_norm": 0.0, "learning_rate": 1.9545227275429898e-05, "loss": 1.3351, "step": 3156 }, { "epoch": 0.12352296736833868, "grad_norm": 0.0, "learning_rate": 1.95448493888359e-05, "loss": 1.1411, "step": 3157 }, { "epoch": 0.12356209406056812, "grad_norm": 0.0, "learning_rate": 1.9544471348963578e-05, "loss": 1.2416, "step": 3158 }, { "epoch": 0.12360122075279756, "grad_norm": 0.0, "learning_rate": 1.9544093155819004e-05, "loss": 1.2729, "step": 3159 }, { "epoch": 0.123640347445027, "grad_norm": 0.0, "learning_rate": 1.9543714809408258e-05, "loss": 1.1623, "step": 3160 }, { "epoch": 0.12367947413725644, "grad_norm": 0.0, "learning_rate": 1.9543336309737406e-05, "loss": 1.1922, "step": 3161 }, { "epoch": 0.12371860082948588, "grad_norm": 0.0, "learning_rate": 1.9542957656812534e-05, "loss": 1.194, "step": 3162 }, { "epoch": 0.12375772752171531, "grad_norm": 0.0, "learning_rate": 1.9542578850639717e-05, "loss": 1.1314, "step": 3163 }, { "epoch": 0.12379685421394475, "grad_norm": 0.0, "learning_rate": 1.9542199891225046e-05, "loss": 1.1385, "step": 3164 }, { "epoch": 0.12383598090617419, "grad_norm": 0.0, "learning_rate": 1.9541820778574597e-05, "loss": 1.2313, "step": 3165 }, { "epoch": 0.12387510759840363, "grad_norm": 0.0, "learning_rate": 1.954144151269447e-05, "loss": 1.121, "step": 3166 }, { "epoch": 0.12391423429063307, "grad_norm": 0.0, "learning_rate": 1.954106209359074e-05, "loss": 1.0292, "step": 3167 }, { "epoch": 0.12395336098286251, "grad_norm": 0.0, "learning_rate": 1.9540682521269515e-05, "loss": 1.2003, "step": 3168 }, { "epoch": 0.12399248767509195, "grad_norm": 0.0, "learning_rate": 1.9540302795736878e-05, "loss": 1.2007, "step": 3169 }, { "epoch": 0.12403161436732138, "grad_norm": 0.0, "learning_rate": 1.9539922916998935e-05, "loss": 1.239, "step": 3170 }, { "epoch": 0.12407074105955082, "grad_norm": 0.0, "learning_rate": 1.9539542885061785e-05, "loss": 1.1366, "step": 3171 }, { "epoch": 0.12410986775178026, "grad_norm": 0.0, "learning_rate": 1.9539162699931534e-05, "loss": 1.0818, "step": 3172 }, { "epoch": 0.1241489944440097, "grad_norm": 0.0, "learning_rate": 1.9538782361614277e-05, "loss": 1.2155, "step": 3173 }, { "epoch": 0.12418812113623914, "grad_norm": 0.0, "learning_rate": 1.9538401870116132e-05, "loss": 1.2468, "step": 3174 }, { "epoch": 0.12422724782846858, "grad_norm": 0.0, "learning_rate": 1.9538021225443202e-05, "loss": 1.1047, "step": 3175 }, { "epoch": 0.12426637452069803, "grad_norm": 0.0, "learning_rate": 1.9537640427601605e-05, "loss": 1.2318, "step": 3176 }, { "epoch": 0.12430550121292745, "grad_norm": 0.0, "learning_rate": 1.9537259476597455e-05, "loss": 1.2393, "step": 3177 }, { "epoch": 0.1243446279051569, "grad_norm": 0.0, "learning_rate": 1.9536878372436866e-05, "loss": 1.1534, "step": 3178 }, { "epoch": 0.12438375459738633, "grad_norm": 0.0, "learning_rate": 1.953649711512596e-05, "loss": 1.1396, "step": 3179 }, { "epoch": 0.12442288128961577, "grad_norm": 0.0, "learning_rate": 1.9536115704670865e-05, "loss": 1.1818, "step": 3180 }, { "epoch": 0.12446200798184522, "grad_norm": 0.0, "learning_rate": 1.9535734141077694e-05, "loss": 1.0723, "step": 3181 }, { "epoch": 0.12450113467407466, "grad_norm": 0.0, "learning_rate": 1.9535352424352588e-05, "loss": 1.1146, "step": 3182 }, { "epoch": 0.1245402613663041, "grad_norm": 0.0, "learning_rate": 1.953497055450167e-05, "loss": 1.079, "step": 3183 }, { "epoch": 0.12457938805853354, "grad_norm": 0.0, "learning_rate": 1.953458853153107e-05, "loss": 1.1658, "step": 3184 }, { "epoch": 0.12461851475076297, "grad_norm": 0.0, "learning_rate": 1.9534206355446927e-05, "loss": 1.2498, "step": 3185 }, { "epoch": 0.1246576414429924, "grad_norm": 0.0, "learning_rate": 1.953382402625538e-05, "loss": 1.2148, "step": 3186 }, { "epoch": 0.12469676813522185, "grad_norm": 0.0, "learning_rate": 1.953344154396256e-05, "loss": 1.2584, "step": 3187 }, { "epoch": 0.12473589482745129, "grad_norm": 0.0, "learning_rate": 1.9533058908574617e-05, "loss": 1.229, "step": 3188 }, { "epoch": 0.12477502151968073, "grad_norm": 0.0, "learning_rate": 1.9532676120097696e-05, "loss": 1.1368, "step": 3189 }, { "epoch": 0.12481414821191017, "grad_norm": 0.0, "learning_rate": 1.953229317853794e-05, "loss": 1.2589, "step": 3190 }, { "epoch": 0.12485327490413961, "grad_norm": 0.0, "learning_rate": 1.95319100839015e-05, "loss": 1.3189, "step": 3191 }, { "epoch": 0.12489240159636904, "grad_norm": 0.0, "learning_rate": 1.9531526836194526e-05, "loss": 1.1721, "step": 3192 }, { "epoch": 0.12493152828859848, "grad_norm": 0.0, "learning_rate": 1.9531143435423176e-05, "loss": 1.2721, "step": 3193 }, { "epoch": 0.12497065498082792, "grad_norm": 0.0, "learning_rate": 1.953075988159361e-05, "loss": 1.2012, "step": 3194 }, { "epoch": 0.12500978167305735, "grad_norm": 0.0, "learning_rate": 1.9530376174711977e-05, "loss": 1.298, "step": 3195 }, { "epoch": 0.1250489083652868, "grad_norm": 0.0, "learning_rate": 1.9529992314784446e-05, "loss": 1.1802, "step": 3196 }, { "epoch": 0.12508803505751623, "grad_norm": 0.0, "learning_rate": 1.952960830181718e-05, "loss": 1.0813, "step": 3197 }, { "epoch": 0.12512716174974567, "grad_norm": 0.0, "learning_rate": 1.9529224135816348e-05, "loss": 1.2982, "step": 3198 }, { "epoch": 0.1251662884419751, "grad_norm": 0.0, "learning_rate": 1.952883981678812e-05, "loss": 1.2296, "step": 3199 }, { "epoch": 0.12520541513420455, "grad_norm": 0.0, "learning_rate": 1.952845534473866e-05, "loss": 1.205, "step": 3200 }, { "epoch": 0.125244541826434, "grad_norm": 0.0, "learning_rate": 1.952807071967415e-05, "loss": 1.0558, "step": 3201 }, { "epoch": 0.12528366851866343, "grad_norm": 0.0, "learning_rate": 1.9527685941600762e-05, "loss": 1.0873, "step": 3202 }, { "epoch": 0.12532279521089287, "grad_norm": 0.0, "learning_rate": 1.9527301010524677e-05, "loss": 1.3458, "step": 3203 }, { "epoch": 0.12536192190312231, "grad_norm": 0.0, "learning_rate": 1.9526915926452073e-05, "loss": 1.2303, "step": 3204 }, { "epoch": 0.12540104859535176, "grad_norm": 0.0, "learning_rate": 1.952653068938914e-05, "loss": 1.1429, "step": 3205 }, { "epoch": 0.1254401752875812, "grad_norm": 0.0, "learning_rate": 1.9526145299342063e-05, "loss": 1.1616, "step": 3206 }, { "epoch": 0.12547930197981064, "grad_norm": 0.0, "learning_rate": 1.9525759756317026e-05, "loss": 1.2377, "step": 3207 }, { "epoch": 0.12551842867204008, "grad_norm": 0.0, "learning_rate": 1.9525374060320228e-05, "loss": 1.2079, "step": 3208 }, { "epoch": 0.1255575553642695, "grad_norm": 0.0, "learning_rate": 1.9524988211357855e-05, "loss": 1.2371, "step": 3209 }, { "epoch": 0.12559668205649893, "grad_norm": 0.0, "learning_rate": 1.952460220943611e-05, "loss": 1.1642, "step": 3210 }, { "epoch": 0.12563580874872837, "grad_norm": 0.0, "learning_rate": 1.9524216054561186e-05, "loss": 1.1035, "step": 3211 }, { "epoch": 0.1256749354409578, "grad_norm": 0.0, "learning_rate": 1.9523829746739286e-05, "loss": 1.1353, "step": 3212 }, { "epoch": 0.12571406213318725, "grad_norm": 0.0, "learning_rate": 1.9523443285976617e-05, "loss": 1.2573, "step": 3213 }, { "epoch": 0.1257531888254167, "grad_norm": 0.0, "learning_rate": 1.952305667227938e-05, "loss": 1.0728, "step": 3214 }, { "epoch": 0.12579231551764614, "grad_norm": 0.0, "learning_rate": 1.9522669905653787e-05, "loss": 1.3169, "step": 3215 }, { "epoch": 0.12583144220987558, "grad_norm": 0.0, "learning_rate": 1.9522282986106045e-05, "loss": 1.1599, "step": 3216 }, { "epoch": 0.12587056890210502, "grad_norm": 0.0, "learning_rate": 1.9521895913642375e-05, "loss": 1.261, "step": 3217 }, { "epoch": 0.12590969559433446, "grad_norm": 0.0, "learning_rate": 1.9521508688268986e-05, "loss": 1.2161, "step": 3218 }, { "epoch": 0.1259488222865639, "grad_norm": 0.0, "learning_rate": 1.95211213099921e-05, "loss": 1.2865, "step": 3219 }, { "epoch": 0.12598794897879334, "grad_norm": 0.0, "learning_rate": 1.9520733778817936e-05, "loss": 1.1806, "step": 3220 }, { "epoch": 0.12602707567102278, "grad_norm": 0.0, "learning_rate": 1.9520346094752716e-05, "loss": 1.1368, "step": 3221 }, { "epoch": 0.12606620236325222, "grad_norm": 0.0, "learning_rate": 1.9519958257802668e-05, "loss": 1.2318, "step": 3222 }, { "epoch": 0.12610532905548166, "grad_norm": 0.0, "learning_rate": 1.951957026797402e-05, "loss": 1.2843, "step": 3223 }, { "epoch": 0.12614445574771108, "grad_norm": 0.0, "learning_rate": 1.9519182125273e-05, "loss": 1.0461, "step": 3224 }, { "epoch": 0.12618358243994052, "grad_norm": 0.0, "learning_rate": 1.9518793829705846e-05, "loss": 1.2207, "step": 3225 }, { "epoch": 0.12622270913216996, "grad_norm": 0.0, "learning_rate": 1.9518405381278793e-05, "loss": 1.27, "step": 3226 }, { "epoch": 0.1262618358243994, "grad_norm": 0.0, "learning_rate": 1.951801677999807e-05, "loss": 1.2505, "step": 3227 }, { "epoch": 0.12630096251662884, "grad_norm": 0.0, "learning_rate": 1.951762802586993e-05, "loss": 1.1607, "step": 3228 }, { "epoch": 0.12634008920885828, "grad_norm": 0.0, "learning_rate": 1.9517239118900607e-05, "loss": 1.2053, "step": 3229 }, { "epoch": 0.12637921590108772, "grad_norm": 0.0, "learning_rate": 1.951685005909635e-05, "loss": 1.0711, "step": 3230 }, { "epoch": 0.12641834259331716, "grad_norm": 0.0, "learning_rate": 1.9516460846463408e-05, "loss": 1.2365, "step": 3231 }, { "epoch": 0.1264574692855466, "grad_norm": 0.0, "learning_rate": 1.951607148100803e-05, "loss": 1.2357, "step": 3232 }, { "epoch": 0.12649659597777604, "grad_norm": 0.0, "learning_rate": 1.9515681962736467e-05, "loss": 1.3944, "step": 3233 }, { "epoch": 0.12653572267000548, "grad_norm": 0.0, "learning_rate": 1.9515292291654976e-05, "loss": 1.0451, "step": 3234 }, { "epoch": 0.12657484936223493, "grad_norm": 0.0, "learning_rate": 1.9514902467769812e-05, "loss": 1.0244, "step": 3235 }, { "epoch": 0.12661397605446437, "grad_norm": 0.0, "learning_rate": 1.951451249108724e-05, "loss": 1.2073, "step": 3236 }, { "epoch": 0.1266531027466938, "grad_norm": 0.0, "learning_rate": 1.951412236161352e-05, "loss": 1.2225, "step": 3237 }, { "epoch": 0.12669222943892322, "grad_norm": 0.0, "learning_rate": 1.9513732079354912e-05, "loss": 1.1441, "step": 3238 }, { "epoch": 0.12673135613115266, "grad_norm": 0.0, "learning_rate": 1.9513341644317692e-05, "loss": 1.1394, "step": 3239 }, { "epoch": 0.1267704828233821, "grad_norm": 0.0, "learning_rate": 1.9512951056508126e-05, "loss": 1.1866, "step": 3240 }, { "epoch": 0.12680960951561154, "grad_norm": 0.0, "learning_rate": 1.9512560315932485e-05, "loss": 1.2719, "step": 3241 }, { "epoch": 0.12684873620784098, "grad_norm": 0.0, "learning_rate": 1.9512169422597048e-05, "loss": 1.2017, "step": 3242 }, { "epoch": 0.12688786290007043, "grad_norm": 0.0, "learning_rate": 1.9511778376508088e-05, "loss": 1.2266, "step": 3243 }, { "epoch": 0.12692698959229987, "grad_norm": 0.0, "learning_rate": 1.9511387177671885e-05, "loss": 1.181, "step": 3244 }, { "epoch": 0.1269661162845293, "grad_norm": 0.0, "learning_rate": 1.9510995826094723e-05, "loss": 1.2143, "step": 3245 }, { "epoch": 0.12700524297675875, "grad_norm": 0.0, "learning_rate": 1.9510604321782887e-05, "loss": 1.2482, "step": 3246 }, { "epoch": 0.1270443696689882, "grad_norm": 0.0, "learning_rate": 1.9510212664742663e-05, "loss": 1.1944, "step": 3247 }, { "epoch": 0.12708349636121763, "grad_norm": 0.0, "learning_rate": 1.9509820854980338e-05, "loss": 1.3411, "step": 3248 }, { "epoch": 0.12712262305344707, "grad_norm": 0.0, "learning_rate": 1.9509428892502208e-05, "loss": 1.1804, "step": 3249 }, { "epoch": 0.1271617497456765, "grad_norm": 0.0, "learning_rate": 1.9509036777314568e-05, "loss": 1.2894, "step": 3250 }, { "epoch": 0.12720087643790595, "grad_norm": 0.0, "learning_rate": 1.9508644509423712e-05, "loss": 1.2301, "step": 3251 }, { "epoch": 0.12724000313013537, "grad_norm": 0.0, "learning_rate": 1.9508252088835938e-05, "loss": 1.2495, "step": 3252 }, { "epoch": 0.1272791298223648, "grad_norm": 0.0, "learning_rate": 1.950785951555755e-05, "loss": 1.256, "step": 3253 }, { "epoch": 0.12731825651459425, "grad_norm": 0.0, "learning_rate": 1.9507466789594853e-05, "loss": 1.155, "step": 3254 }, { "epoch": 0.1273573832068237, "grad_norm": 0.0, "learning_rate": 1.9507073910954154e-05, "loss": 1.1413, "step": 3255 }, { "epoch": 0.12739650989905313, "grad_norm": 0.0, "learning_rate": 1.950668087964176e-05, "loss": 1.1444, "step": 3256 }, { "epoch": 0.12743563659128257, "grad_norm": 0.0, "learning_rate": 1.9506287695663986e-05, "loss": 1.181, "step": 3257 }, { "epoch": 0.127474763283512, "grad_norm": 0.0, "learning_rate": 1.950589435902714e-05, "loss": 1.0276, "step": 3258 }, { "epoch": 0.12751388997574145, "grad_norm": 0.0, "learning_rate": 1.9505500869737545e-05, "loss": 1.2935, "step": 3259 }, { "epoch": 0.1275530166679709, "grad_norm": 0.0, "learning_rate": 1.9505107227801515e-05, "loss": 1.1765, "step": 3260 }, { "epoch": 0.12759214336020033, "grad_norm": 0.0, "learning_rate": 1.9504713433225374e-05, "loss": 1.0912, "step": 3261 }, { "epoch": 0.12763127005242977, "grad_norm": 0.0, "learning_rate": 1.9504319486015448e-05, "loss": 1.143, "step": 3262 }, { "epoch": 0.12767039674465921, "grad_norm": 0.0, "learning_rate": 1.950392538617806e-05, "loss": 1.207, "step": 3263 }, { "epoch": 0.12770952343688866, "grad_norm": 0.0, "learning_rate": 1.9503531133719535e-05, "loss": 1.225, "step": 3264 }, { "epoch": 0.1277486501291181, "grad_norm": 0.0, "learning_rate": 1.9503136728646213e-05, "loss": 1.1548, "step": 3265 }, { "epoch": 0.1277877768213475, "grad_norm": 0.0, "learning_rate": 1.9502742170964422e-05, "loss": 1.1017, "step": 3266 }, { "epoch": 0.12782690351357695, "grad_norm": 0.0, "learning_rate": 1.9502347460680498e-05, "loss": 1.2088, "step": 3267 }, { "epoch": 0.1278660302058064, "grad_norm": 0.0, "learning_rate": 1.9501952597800783e-05, "loss": 1.2263, "step": 3268 }, { "epoch": 0.12790515689803583, "grad_norm": 0.0, "learning_rate": 1.9501557582331613e-05, "loss": 1.2111, "step": 3269 }, { "epoch": 0.12794428359026527, "grad_norm": 0.0, "learning_rate": 1.9501162414279337e-05, "loss": 1.096, "step": 3270 }, { "epoch": 0.12798341028249471, "grad_norm": 0.0, "learning_rate": 1.9500767093650298e-05, "loss": 1.2023, "step": 3271 }, { "epoch": 0.12802253697472415, "grad_norm": 0.0, "learning_rate": 1.9500371620450842e-05, "loss": 1.2285, "step": 3272 }, { "epoch": 0.1280616636669536, "grad_norm": 0.0, "learning_rate": 1.9499975994687322e-05, "loss": 1.2983, "step": 3273 }, { "epoch": 0.12810079035918304, "grad_norm": 0.0, "learning_rate": 1.9499580216366097e-05, "loss": 1.1591, "step": 3274 }, { "epoch": 0.12813991705141248, "grad_norm": 0.0, "learning_rate": 1.9499184285493516e-05, "loss": 1.2352, "step": 3275 }, { "epoch": 0.12817904374364192, "grad_norm": 0.0, "learning_rate": 1.9498788202075936e-05, "loss": 1.0651, "step": 3276 }, { "epoch": 0.12821817043587136, "grad_norm": 0.0, "learning_rate": 1.949839196611972e-05, "loss": 1.1392, "step": 3277 }, { "epoch": 0.1282572971281008, "grad_norm": 0.0, "learning_rate": 1.9497995577631233e-05, "loss": 1.2328, "step": 3278 }, { "epoch": 0.12829642382033024, "grad_norm": 0.0, "learning_rate": 1.9497599036616836e-05, "loss": 1.2552, "step": 3279 }, { "epoch": 0.12833555051255968, "grad_norm": 0.0, "learning_rate": 1.9497202343082905e-05, "loss": 1.2954, "step": 3280 }, { "epoch": 0.1283746772047891, "grad_norm": 0.0, "learning_rate": 1.94968054970358e-05, "loss": 1.2921, "step": 3281 }, { "epoch": 0.12841380389701854, "grad_norm": 0.0, "learning_rate": 1.94964084984819e-05, "loss": 1.2255, "step": 3282 }, { "epoch": 0.12845293058924798, "grad_norm": 0.0, "learning_rate": 1.949601134742758e-05, "loss": 1.2021, "step": 3283 }, { "epoch": 0.12849205728147742, "grad_norm": 0.0, "learning_rate": 1.9495614043879216e-05, "loss": 1.2324, "step": 3284 }, { "epoch": 0.12853118397370686, "grad_norm": 0.0, "learning_rate": 1.949521658784319e-05, "loss": 1.1784, "step": 3285 }, { "epoch": 0.1285703106659363, "grad_norm": 0.0, "learning_rate": 1.949481897932588e-05, "loss": 1.3462, "step": 3286 }, { "epoch": 0.12860943735816574, "grad_norm": 0.0, "learning_rate": 1.949442121833368e-05, "loss": 1.1408, "step": 3287 }, { "epoch": 0.12864856405039518, "grad_norm": 0.0, "learning_rate": 1.9494023304872975e-05, "loss": 1.2241, "step": 3288 }, { "epoch": 0.12868769074262462, "grad_norm": 0.0, "learning_rate": 1.9493625238950143e-05, "loss": 1.3329, "step": 3289 }, { "epoch": 0.12872681743485406, "grad_norm": 0.0, "learning_rate": 1.9493227020571593e-05, "loss": 1.2157, "step": 3290 }, { "epoch": 0.1287659441270835, "grad_norm": 0.0, "learning_rate": 1.949282864974371e-05, "loss": 1.227, "step": 3291 }, { "epoch": 0.12880507081931294, "grad_norm": 0.0, "learning_rate": 1.9492430126472897e-05, "loss": 1.0506, "step": 3292 }, { "epoch": 0.12884419751154239, "grad_norm": 0.0, "learning_rate": 1.9492031450765548e-05, "loss": 1.2, "step": 3293 }, { "epoch": 0.12888332420377183, "grad_norm": 0.0, "learning_rate": 1.9491632622628067e-05, "loss": 1.1318, "step": 3294 }, { "epoch": 0.12892245089600124, "grad_norm": 0.0, "learning_rate": 1.949123364206686e-05, "loss": 1.1996, "step": 3295 }, { "epoch": 0.12896157758823068, "grad_norm": 0.0, "learning_rate": 1.9490834509088336e-05, "loss": 1.2516, "step": 3296 }, { "epoch": 0.12900070428046012, "grad_norm": 0.0, "learning_rate": 1.9490435223698902e-05, "loss": 1.0964, "step": 3297 }, { "epoch": 0.12903983097268956, "grad_norm": 0.0, "learning_rate": 1.9490035785904972e-05, "loss": 1.1927, "step": 3298 }, { "epoch": 0.129078957664919, "grad_norm": 0.0, "learning_rate": 1.948963619571296e-05, "loss": 1.0611, "step": 3299 }, { "epoch": 0.12911808435714844, "grad_norm": 0.0, "learning_rate": 1.9489236453129276e-05, "loss": 1.2014, "step": 3300 }, { "epoch": 0.12915721104937788, "grad_norm": 0.0, "learning_rate": 1.948883655816035e-05, "loss": 1.2199, "step": 3301 }, { "epoch": 0.12919633774160733, "grad_norm": 0.0, "learning_rate": 1.9488436510812594e-05, "loss": 1.1394, "step": 3302 }, { "epoch": 0.12923546443383677, "grad_norm": 0.0, "learning_rate": 1.9488036311092442e-05, "loss": 1.091, "step": 3303 }, { "epoch": 0.1292745911260662, "grad_norm": 0.0, "learning_rate": 1.9487635959006314e-05, "loss": 1.252, "step": 3304 }, { "epoch": 0.12931371781829565, "grad_norm": 0.0, "learning_rate": 1.9487235454560642e-05, "loss": 1.1823, "step": 3305 }, { "epoch": 0.1293528445105251, "grad_norm": 0.0, "learning_rate": 1.9486834797761855e-05, "loss": 1.1141, "step": 3306 }, { "epoch": 0.12939197120275453, "grad_norm": 0.0, "learning_rate": 1.9486433988616392e-05, "loss": 1.1714, "step": 3307 }, { "epoch": 0.12943109789498397, "grad_norm": 0.0, "learning_rate": 1.9486033027130685e-05, "loss": 1.2712, "step": 3308 }, { "epoch": 0.12947022458721338, "grad_norm": 0.0, "learning_rate": 1.9485631913311175e-05, "loss": 1.1418, "step": 3309 }, { "epoch": 0.12950935127944282, "grad_norm": 0.0, "learning_rate": 1.9485230647164298e-05, "loss": 1.1676, "step": 3310 }, { "epoch": 0.12954847797167227, "grad_norm": 0.0, "learning_rate": 1.948482922869651e-05, "loss": 1.215, "step": 3311 }, { "epoch": 0.1295876046639017, "grad_norm": 0.0, "learning_rate": 1.9484427657914248e-05, "loss": 1.1418, "step": 3312 }, { "epoch": 0.12962673135613115, "grad_norm": 0.0, "learning_rate": 1.9484025934823955e-05, "loss": 1.3107, "step": 3313 }, { "epoch": 0.1296658580483606, "grad_norm": 0.0, "learning_rate": 1.9483624059432097e-05, "loss": 1.2852, "step": 3314 }, { "epoch": 0.12970498474059003, "grad_norm": 0.0, "learning_rate": 1.9483222031745118e-05, "loss": 1.3485, "step": 3315 }, { "epoch": 0.12974411143281947, "grad_norm": 0.0, "learning_rate": 1.9482819851769475e-05, "loss": 1.2416, "step": 3316 }, { "epoch": 0.1297832381250489, "grad_norm": 0.0, "learning_rate": 1.948241751951163e-05, "loss": 1.1521, "step": 3317 }, { "epoch": 0.12982236481727835, "grad_norm": 0.0, "learning_rate": 1.948201503497804e-05, "loss": 1.2405, "step": 3318 }, { "epoch": 0.1298614915095078, "grad_norm": 0.0, "learning_rate": 1.9481612398175175e-05, "loss": 1.233, "step": 3319 }, { "epoch": 0.12990061820173723, "grad_norm": 0.0, "learning_rate": 1.948120960910949e-05, "loss": 1.3065, "step": 3320 }, { "epoch": 0.12993974489396667, "grad_norm": 0.0, "learning_rate": 1.948080666778746e-05, "loss": 1.1542, "step": 3321 }, { "epoch": 0.12997887158619612, "grad_norm": 0.0, "learning_rate": 1.948040357421556e-05, "loss": 1.1672, "step": 3322 }, { "epoch": 0.13001799827842553, "grad_norm": 0.0, "learning_rate": 1.9480000328400254e-05, "loss": 1.1623, "step": 3323 }, { "epoch": 0.13005712497065497, "grad_norm": 0.0, "learning_rate": 1.9479596930348024e-05, "loss": 1.1709, "step": 3324 }, { "epoch": 0.1300962516628844, "grad_norm": 0.0, "learning_rate": 1.9479193380065343e-05, "loss": 1.1249, "step": 3325 }, { "epoch": 0.13013537835511385, "grad_norm": 0.0, "learning_rate": 1.9478789677558697e-05, "loss": 1.2272, "step": 3326 }, { "epoch": 0.1301745050473433, "grad_norm": 0.0, "learning_rate": 1.9478385822834563e-05, "loss": 1.2231, "step": 3327 }, { "epoch": 0.13021363173957273, "grad_norm": 0.0, "learning_rate": 1.9477981815899435e-05, "loss": 1.2506, "step": 3328 }, { "epoch": 0.13025275843180217, "grad_norm": 0.0, "learning_rate": 1.947757765675979e-05, "loss": 1.2028, "step": 3329 }, { "epoch": 0.13029188512403161, "grad_norm": 0.0, "learning_rate": 1.9477173345422126e-05, "loss": 1.1967, "step": 3330 }, { "epoch": 0.13033101181626106, "grad_norm": 0.0, "learning_rate": 1.947676888189294e-05, "loss": 1.3072, "step": 3331 }, { "epoch": 0.1303701385084905, "grad_norm": 0.0, "learning_rate": 1.947636426617871e-05, "loss": 1.1506, "step": 3332 }, { "epoch": 0.13040926520071994, "grad_norm": 0.0, "learning_rate": 1.947595949828595e-05, "loss": 1.169, "step": 3333 }, { "epoch": 0.13044839189294938, "grad_norm": 0.0, "learning_rate": 1.9475554578221154e-05, "loss": 1.2239, "step": 3334 }, { "epoch": 0.13048751858517882, "grad_norm": 0.0, "learning_rate": 1.9475149505990828e-05, "loss": 1.2318, "step": 3335 }, { "epoch": 0.13052664527740826, "grad_norm": 0.0, "learning_rate": 1.947474428160147e-05, "loss": 1.2253, "step": 3336 }, { "epoch": 0.1305657719696377, "grad_norm": 0.0, "learning_rate": 1.947433890505959e-05, "loss": 1.2246, "step": 3337 }, { "epoch": 0.1306048986618671, "grad_norm": 0.0, "learning_rate": 1.9473933376371704e-05, "loss": 1.1895, "step": 3338 }, { "epoch": 0.13064402535409655, "grad_norm": 0.0, "learning_rate": 1.9473527695544316e-05, "loss": 1.1238, "step": 3339 }, { "epoch": 0.130683152046326, "grad_norm": 0.0, "learning_rate": 1.9473121862583946e-05, "loss": 1.2136, "step": 3340 }, { "epoch": 0.13072227873855544, "grad_norm": 0.0, "learning_rate": 1.947271587749711e-05, "loss": 1.1461, "step": 3341 }, { "epoch": 0.13076140543078488, "grad_norm": 0.0, "learning_rate": 1.9472309740290324e-05, "loss": 1.2243, "step": 3342 }, { "epoch": 0.13080053212301432, "grad_norm": 0.0, "learning_rate": 1.9471903450970116e-05, "loss": 1.1277, "step": 3343 }, { "epoch": 0.13083965881524376, "grad_norm": 0.0, "learning_rate": 1.9471497009543005e-05, "loss": 1.1697, "step": 3344 }, { "epoch": 0.1308787855074732, "grad_norm": 0.0, "learning_rate": 1.9471090416015522e-05, "loss": 1.1996, "step": 3345 }, { "epoch": 0.13091791219970264, "grad_norm": 0.0, "learning_rate": 1.9470683670394194e-05, "loss": 1.1721, "step": 3346 }, { "epoch": 0.13095703889193208, "grad_norm": 0.0, "learning_rate": 1.9470276772685555e-05, "loss": 1.2899, "step": 3347 }, { "epoch": 0.13099616558416152, "grad_norm": 0.0, "learning_rate": 1.946986972289614e-05, "loss": 1.1893, "step": 3348 }, { "epoch": 0.13103529227639096, "grad_norm": 0.0, "learning_rate": 1.946946252103248e-05, "loss": 1.1683, "step": 3349 }, { "epoch": 0.1310744189686204, "grad_norm": 0.0, "learning_rate": 1.9469055167101115e-05, "loss": 1.1762, "step": 3350 }, { "epoch": 0.13111354566084985, "grad_norm": 0.0, "learning_rate": 1.9468647661108592e-05, "loss": 1.1758, "step": 3351 }, { "epoch": 0.13115267235307926, "grad_norm": 0.0, "learning_rate": 1.9468240003061455e-05, "loss": 1.2016, "step": 3352 }, { "epoch": 0.1311917990453087, "grad_norm": 0.0, "learning_rate": 1.9467832192966246e-05, "loss": 1.206, "step": 3353 }, { "epoch": 0.13123092573753814, "grad_norm": 0.0, "learning_rate": 1.9467424230829514e-05, "loss": 1.3585, "step": 3354 }, { "epoch": 0.13127005242976758, "grad_norm": 0.0, "learning_rate": 1.9467016116657818e-05, "loss": 1.2476, "step": 3355 }, { "epoch": 0.13130917912199702, "grad_norm": 0.0, "learning_rate": 1.94666078504577e-05, "loss": 1.2427, "step": 3356 }, { "epoch": 0.13134830581422646, "grad_norm": 0.0, "learning_rate": 1.9466199432235726e-05, "loss": 1.2397, "step": 3357 }, { "epoch": 0.1313874325064559, "grad_norm": 0.0, "learning_rate": 1.946579086199845e-05, "loss": 1.2164, "step": 3358 }, { "epoch": 0.13142655919868534, "grad_norm": 0.0, "learning_rate": 1.9465382139752433e-05, "loss": 1.3044, "step": 3359 }, { "epoch": 0.13146568589091479, "grad_norm": 0.0, "learning_rate": 1.9464973265504243e-05, "loss": 1.1435, "step": 3360 }, { "epoch": 0.13150481258314423, "grad_norm": 0.0, "learning_rate": 1.9464564239260436e-05, "loss": 1.2938, "step": 3361 }, { "epoch": 0.13154393927537367, "grad_norm": 0.0, "learning_rate": 1.946415506102759e-05, "loss": 1.2368, "step": 3362 }, { "epoch": 0.1315830659676031, "grad_norm": 0.0, "learning_rate": 1.9463745730812276e-05, "loss": 1.1584, "step": 3363 }, { "epoch": 0.13162219265983255, "grad_norm": 0.0, "learning_rate": 1.9463336248621062e-05, "loss": 1.1682, "step": 3364 }, { "epoch": 0.131661319352062, "grad_norm": 0.0, "learning_rate": 1.9462926614460527e-05, "loss": 1.1924, "step": 3365 }, { "epoch": 0.1317004460442914, "grad_norm": 0.0, "learning_rate": 1.9462516828337245e-05, "loss": 1.1755, "step": 3366 }, { "epoch": 0.13173957273652084, "grad_norm": 0.0, "learning_rate": 1.9462106890257805e-05, "loss": 1.4261, "step": 3367 }, { "epoch": 0.13177869942875028, "grad_norm": 0.0, "learning_rate": 1.9461696800228783e-05, "loss": 1.2324, "step": 3368 }, { "epoch": 0.13181782612097973, "grad_norm": 0.0, "learning_rate": 1.9461286558256764e-05, "loss": 1.1129, "step": 3369 }, { "epoch": 0.13185695281320917, "grad_norm": 0.0, "learning_rate": 1.9460876164348342e-05, "loss": 1.1345, "step": 3370 }, { "epoch": 0.1318960795054386, "grad_norm": 0.0, "learning_rate": 1.9460465618510104e-05, "loss": 1.3838, "step": 3371 }, { "epoch": 0.13193520619766805, "grad_norm": 0.0, "learning_rate": 1.946005492074864e-05, "loss": 1.2642, "step": 3372 }, { "epoch": 0.1319743328898975, "grad_norm": 0.0, "learning_rate": 1.945964407107055e-05, "loss": 1.1796, "step": 3373 }, { "epoch": 0.13201345958212693, "grad_norm": 0.0, "learning_rate": 1.945923306948243e-05, "loss": 1.1628, "step": 3374 }, { "epoch": 0.13205258627435637, "grad_norm": 0.0, "learning_rate": 1.9458821915990877e-05, "loss": 1.2267, "step": 3375 }, { "epoch": 0.1320917129665858, "grad_norm": 0.0, "learning_rate": 1.94584106106025e-05, "loss": 1.2283, "step": 3376 }, { "epoch": 0.13213083965881525, "grad_norm": 0.0, "learning_rate": 1.94579991533239e-05, "loss": 1.244, "step": 3377 }, { "epoch": 0.1321699663510447, "grad_norm": 0.0, "learning_rate": 1.9457587544161686e-05, "loss": 1.1121, "step": 3378 }, { "epoch": 0.13220909304327413, "grad_norm": 0.0, "learning_rate": 1.9457175783122464e-05, "loss": 1.2671, "step": 3379 }, { "epoch": 0.13224821973550355, "grad_norm": 0.0, "learning_rate": 1.9456763870212853e-05, "loss": 1.2242, "step": 3380 }, { "epoch": 0.132287346427733, "grad_norm": 0.0, "learning_rate": 1.945635180543946e-05, "loss": 1.1901, "step": 3381 }, { "epoch": 0.13232647311996243, "grad_norm": 0.0, "learning_rate": 1.945593958880891e-05, "loss": 1.2449, "step": 3382 }, { "epoch": 0.13236559981219187, "grad_norm": 0.0, "learning_rate": 1.9455527220327816e-05, "loss": 1.295, "step": 3383 }, { "epoch": 0.1324047265044213, "grad_norm": 0.0, "learning_rate": 1.9455114700002808e-05, "loss": 1.0955, "step": 3384 }, { "epoch": 0.13244385319665075, "grad_norm": 0.0, "learning_rate": 1.9454702027840503e-05, "loss": 1.2501, "step": 3385 }, { "epoch": 0.1324829798888802, "grad_norm": 0.0, "learning_rate": 1.945428920384753e-05, "loss": 1.2301, "step": 3386 }, { "epoch": 0.13252210658110963, "grad_norm": 0.0, "learning_rate": 1.945387622803052e-05, "loss": 1.1758, "step": 3387 }, { "epoch": 0.13256123327333907, "grad_norm": 0.0, "learning_rate": 1.9453463100396103e-05, "loss": 1.169, "step": 3388 }, { "epoch": 0.13260035996556852, "grad_norm": 0.0, "learning_rate": 1.9453049820950918e-05, "loss": 1.2303, "step": 3389 }, { "epoch": 0.13263948665779796, "grad_norm": 0.0, "learning_rate": 1.9452636389701593e-05, "loss": 1.1628, "step": 3390 }, { "epoch": 0.1326786133500274, "grad_norm": 0.0, "learning_rate": 1.9452222806654778e-05, "loss": 1.149, "step": 3391 }, { "epoch": 0.13271774004225684, "grad_norm": 0.0, "learning_rate": 1.9451809071817105e-05, "loss": 1.3072, "step": 3392 }, { "epoch": 0.13275686673448628, "grad_norm": 0.0, "learning_rate": 1.9451395185195224e-05, "loss": 1.1671, "step": 3393 }, { "epoch": 0.1327959934267157, "grad_norm": 0.0, "learning_rate": 1.945098114679578e-05, "loss": 1.3302, "step": 3394 }, { "epoch": 0.13283512011894513, "grad_norm": 0.0, "learning_rate": 1.945056695662542e-05, "loss": 1.1697, "step": 3395 }, { "epoch": 0.13287424681117457, "grad_norm": 0.0, "learning_rate": 1.9450152614690798e-05, "loss": 1.1204, "step": 3396 }, { "epoch": 0.13291337350340401, "grad_norm": 0.0, "learning_rate": 1.9449738120998563e-05, "loss": 1.2796, "step": 3397 }, { "epoch": 0.13295250019563346, "grad_norm": 0.0, "learning_rate": 1.9449323475555383e-05, "loss": 1.2104, "step": 3398 }, { "epoch": 0.1329916268878629, "grad_norm": 0.0, "learning_rate": 1.9448908678367903e-05, "loss": 1.1573, "step": 3399 }, { "epoch": 0.13303075358009234, "grad_norm": 0.0, "learning_rate": 1.944849372944279e-05, "loss": 1.2334, "step": 3400 }, { "epoch": 0.13306988027232178, "grad_norm": 0.0, "learning_rate": 1.944807862878671e-05, "loss": 1.2523, "step": 3401 }, { "epoch": 0.13310900696455122, "grad_norm": 0.0, "learning_rate": 1.9447663376406323e-05, "loss": 1.2692, "step": 3402 }, { "epoch": 0.13314813365678066, "grad_norm": 0.0, "learning_rate": 1.9447247972308305e-05, "loss": 1.2343, "step": 3403 }, { "epoch": 0.1331872603490101, "grad_norm": 0.0, "learning_rate": 1.9446832416499316e-05, "loss": 1.2901, "step": 3404 }, { "epoch": 0.13322638704123954, "grad_norm": 0.0, "learning_rate": 1.944641670898604e-05, "loss": 1.1995, "step": 3405 }, { "epoch": 0.13326551373346898, "grad_norm": 0.0, "learning_rate": 1.944600084977515e-05, "loss": 1.2014, "step": 3406 }, { "epoch": 0.13330464042569842, "grad_norm": 0.0, "learning_rate": 1.9445584838873318e-05, "loss": 1.0504, "step": 3407 }, { "epoch": 0.13334376711792786, "grad_norm": 0.0, "learning_rate": 1.9445168676287233e-05, "loss": 1.2958, "step": 3408 }, { "epoch": 0.13338289381015728, "grad_norm": 0.0, "learning_rate": 1.9444752362023575e-05, "loss": 1.0613, "step": 3409 }, { "epoch": 0.13342202050238672, "grad_norm": 0.0, "learning_rate": 1.944433589608903e-05, "loss": 1.2456, "step": 3410 }, { "epoch": 0.13346114719461616, "grad_norm": 0.0, "learning_rate": 1.9443919278490278e-05, "loss": 1.113, "step": 3411 }, { "epoch": 0.1335002738868456, "grad_norm": 0.0, "learning_rate": 1.9443502509234026e-05, "loss": 1.2078, "step": 3412 }, { "epoch": 0.13353940057907504, "grad_norm": 0.0, "learning_rate": 1.944308558832695e-05, "loss": 1.2049, "step": 3413 }, { "epoch": 0.13357852727130448, "grad_norm": 0.0, "learning_rate": 1.9442668515775755e-05, "loss": 1.188, "step": 3414 }, { "epoch": 0.13361765396353392, "grad_norm": 0.0, "learning_rate": 1.9442251291587136e-05, "loss": 1.3024, "step": 3415 }, { "epoch": 0.13365678065576336, "grad_norm": 0.0, "learning_rate": 1.9441833915767795e-05, "loss": 1.186, "step": 3416 }, { "epoch": 0.1336959073479928, "grad_norm": 0.0, "learning_rate": 1.9441416388324427e-05, "loss": 1.3642, "step": 3417 }, { "epoch": 0.13373503404022224, "grad_norm": 0.0, "learning_rate": 1.9440998709263747e-05, "loss": 1.222, "step": 3418 }, { "epoch": 0.13377416073245169, "grad_norm": 0.0, "learning_rate": 1.944058087859246e-05, "loss": 1.113, "step": 3419 }, { "epoch": 0.13381328742468113, "grad_norm": 0.0, "learning_rate": 1.9440162896317268e-05, "loss": 1.0491, "step": 3420 }, { "epoch": 0.13385241411691057, "grad_norm": 0.0, "learning_rate": 1.9439744762444893e-05, "loss": 1.2544, "step": 3421 }, { "epoch": 0.13389154080914, "grad_norm": 0.0, "learning_rate": 1.9439326476982044e-05, "loss": 1.321, "step": 3422 }, { "epoch": 0.13393066750136942, "grad_norm": 0.0, "learning_rate": 1.943890803993544e-05, "loss": 1.2798, "step": 3423 }, { "epoch": 0.13396979419359886, "grad_norm": 0.0, "learning_rate": 1.9438489451311802e-05, "loss": 1.164, "step": 3424 }, { "epoch": 0.1340089208858283, "grad_norm": 0.0, "learning_rate": 1.9438070711117848e-05, "loss": 1.1968, "step": 3425 }, { "epoch": 0.13404804757805774, "grad_norm": 0.0, "learning_rate": 1.9437651819360308e-05, "loss": 1.1601, "step": 3426 }, { "epoch": 0.13408717427028718, "grad_norm": 0.0, "learning_rate": 1.9437232776045903e-05, "loss": 1.0734, "step": 3427 }, { "epoch": 0.13412630096251663, "grad_norm": 0.0, "learning_rate": 1.9436813581181366e-05, "loss": 1.2102, "step": 3428 }, { "epoch": 0.13416542765474607, "grad_norm": 0.0, "learning_rate": 1.943639423477343e-05, "loss": 1.2148, "step": 3429 }, { "epoch": 0.1342045543469755, "grad_norm": 0.0, "learning_rate": 1.9435974736828825e-05, "loss": 1.2086, "step": 3430 }, { "epoch": 0.13424368103920495, "grad_norm": 0.0, "learning_rate": 1.943555508735429e-05, "loss": 1.2175, "step": 3431 }, { "epoch": 0.1342828077314344, "grad_norm": 0.0, "learning_rate": 1.9435135286356563e-05, "loss": 1.2202, "step": 3432 }, { "epoch": 0.13432193442366383, "grad_norm": 0.0, "learning_rate": 1.9434715333842383e-05, "loss": 1.3006, "step": 3433 }, { "epoch": 0.13436106111589327, "grad_norm": 0.0, "learning_rate": 1.9434295229818505e-05, "loss": 1.0957, "step": 3434 }, { "epoch": 0.1344001878081227, "grad_norm": 0.0, "learning_rate": 1.943387497429166e-05, "loss": 1.1854, "step": 3435 }, { "epoch": 0.13443931450035215, "grad_norm": 0.0, "learning_rate": 1.9433454567268607e-05, "loss": 1.226, "step": 3436 }, { "epoch": 0.13447844119258157, "grad_norm": 0.0, "learning_rate": 1.9433034008756096e-05, "loss": 1.3373, "step": 3437 }, { "epoch": 0.134517567884811, "grad_norm": 0.0, "learning_rate": 1.943261329876088e-05, "loss": 1.2419, "step": 3438 }, { "epoch": 0.13455669457704045, "grad_norm": 0.0, "learning_rate": 1.9432192437289712e-05, "loss": 1.1807, "step": 3439 }, { "epoch": 0.1345958212692699, "grad_norm": 0.0, "learning_rate": 1.9431771424349354e-05, "loss": 1.1665, "step": 3440 }, { "epoch": 0.13463494796149933, "grad_norm": 0.0, "learning_rate": 1.9431350259946563e-05, "loss": 1.1431, "step": 3441 }, { "epoch": 0.13467407465372877, "grad_norm": 0.0, "learning_rate": 1.9430928944088107e-05, "loss": 1.0773, "step": 3442 }, { "epoch": 0.1347132013459582, "grad_norm": 0.0, "learning_rate": 1.943050747678075e-05, "loss": 1.2581, "step": 3443 }, { "epoch": 0.13475232803818765, "grad_norm": 0.0, "learning_rate": 1.9430085858031258e-05, "loss": 1.3192, "step": 3444 }, { "epoch": 0.1347914547304171, "grad_norm": 0.0, "learning_rate": 1.9429664087846407e-05, "loss": 1.2042, "step": 3445 }, { "epoch": 0.13483058142264653, "grad_norm": 0.0, "learning_rate": 1.9429242166232966e-05, "loss": 1.0297, "step": 3446 }, { "epoch": 0.13486970811487597, "grad_norm": 0.0, "learning_rate": 1.9428820093197708e-05, "loss": 1.1585, "step": 3447 }, { "epoch": 0.13490883480710542, "grad_norm": 0.0, "learning_rate": 1.9428397868747416e-05, "loss": 1.2093, "step": 3448 }, { "epoch": 0.13494796149933486, "grad_norm": 0.0, "learning_rate": 1.9427975492888868e-05, "loss": 1.3597, "step": 3449 }, { "epoch": 0.1349870881915643, "grad_norm": 0.0, "learning_rate": 1.9427552965628848e-05, "loss": 1.1675, "step": 3450 }, { "epoch": 0.1350262148837937, "grad_norm": 0.0, "learning_rate": 1.9427130286974144e-05, "loss": 1.1821, "step": 3451 }, { "epoch": 0.13506534157602315, "grad_norm": 0.0, "learning_rate": 1.9426707456931534e-05, "loss": 1.2452, "step": 3452 }, { "epoch": 0.1351044682682526, "grad_norm": 0.0, "learning_rate": 1.942628447550782e-05, "loss": 1.2345, "step": 3453 }, { "epoch": 0.13514359496048203, "grad_norm": 0.0, "learning_rate": 1.9425861342709788e-05, "loss": 1.2383, "step": 3454 }, { "epoch": 0.13518272165271147, "grad_norm": 0.0, "learning_rate": 1.9425438058544233e-05, "loss": 1.1517, "step": 3455 }, { "epoch": 0.13522184834494091, "grad_norm": 0.0, "learning_rate": 1.9425014623017953e-05, "loss": 1.2261, "step": 3456 }, { "epoch": 0.13526097503717036, "grad_norm": 0.0, "learning_rate": 1.942459103613775e-05, "loss": 1.3705, "step": 3457 }, { "epoch": 0.1353001017293998, "grad_norm": 0.0, "learning_rate": 1.9424167297910425e-05, "loss": 1.2511, "step": 3458 }, { "epoch": 0.13533922842162924, "grad_norm": 0.0, "learning_rate": 1.942374340834278e-05, "loss": 1.2046, "step": 3459 }, { "epoch": 0.13537835511385868, "grad_norm": 0.0, "learning_rate": 1.9423319367441625e-05, "loss": 1.158, "step": 3460 }, { "epoch": 0.13541748180608812, "grad_norm": 0.0, "learning_rate": 1.9422895175213772e-05, "loss": 1.2061, "step": 3461 }, { "epoch": 0.13545660849831756, "grad_norm": 0.0, "learning_rate": 1.942247083166603e-05, "loss": 1.1968, "step": 3462 }, { "epoch": 0.135495735190547, "grad_norm": 0.0, "learning_rate": 1.9422046336805207e-05, "loss": 1.2328, "step": 3463 }, { "epoch": 0.13553486188277644, "grad_norm": 0.0, "learning_rate": 1.942162169063813e-05, "loss": 1.1639, "step": 3464 }, { "epoch": 0.13557398857500588, "grad_norm": 0.0, "learning_rate": 1.9421196893171617e-05, "loss": 1.1855, "step": 3465 }, { "epoch": 0.1356131152672353, "grad_norm": 0.0, "learning_rate": 1.9420771944412486e-05, "loss": 1.3157, "step": 3466 }, { "epoch": 0.13565224195946474, "grad_norm": 0.0, "learning_rate": 1.9420346844367562e-05, "loss": 1.2004, "step": 3467 }, { "epoch": 0.13569136865169418, "grad_norm": 0.0, "learning_rate": 1.941992159304367e-05, "loss": 1.187, "step": 3468 }, { "epoch": 0.13573049534392362, "grad_norm": 0.0, "learning_rate": 1.9419496190447645e-05, "loss": 1.174, "step": 3469 }, { "epoch": 0.13576962203615306, "grad_norm": 0.0, "learning_rate": 1.941907063658631e-05, "loss": 1.1549, "step": 3470 }, { "epoch": 0.1358087487283825, "grad_norm": 0.0, "learning_rate": 1.9418644931466507e-05, "loss": 1.1732, "step": 3471 }, { "epoch": 0.13584787542061194, "grad_norm": 0.0, "learning_rate": 1.941821907509507e-05, "loss": 1.1872, "step": 3472 }, { "epoch": 0.13588700211284138, "grad_norm": 0.0, "learning_rate": 1.9417793067478832e-05, "loss": 1.3188, "step": 3473 }, { "epoch": 0.13592612880507082, "grad_norm": 0.0, "learning_rate": 1.9417366908624638e-05, "loss": 1.1635, "step": 3474 }, { "epoch": 0.13596525549730026, "grad_norm": 0.0, "learning_rate": 1.9416940598539335e-05, "loss": 1.1415, "step": 3475 }, { "epoch": 0.1360043821895297, "grad_norm": 0.0, "learning_rate": 1.9416514137229767e-05, "loss": 1.0001, "step": 3476 }, { "epoch": 0.13604350888175915, "grad_norm": 0.0, "learning_rate": 1.941608752470278e-05, "loss": 1.195, "step": 3477 }, { "epoch": 0.1360826355739886, "grad_norm": 0.0, "learning_rate": 1.9415660760965223e-05, "loss": 1.2423, "step": 3478 }, { "epoch": 0.13612176226621803, "grad_norm": 0.0, "learning_rate": 1.941523384602396e-05, "loss": 1.2302, "step": 3479 }, { "epoch": 0.13616088895844744, "grad_norm": 0.0, "learning_rate": 1.9414806779885836e-05, "loss": 1.2225, "step": 3480 }, { "epoch": 0.13620001565067688, "grad_norm": 0.0, "learning_rate": 1.941437956255771e-05, "loss": 1.1946, "step": 3481 }, { "epoch": 0.13623914234290632, "grad_norm": 0.0, "learning_rate": 1.941395219404645e-05, "loss": 1.1821, "step": 3482 }, { "epoch": 0.13627826903513576, "grad_norm": 0.0, "learning_rate": 1.9413524674358907e-05, "loss": 1.1967, "step": 3483 }, { "epoch": 0.1363173957273652, "grad_norm": 0.0, "learning_rate": 1.941309700350196e-05, "loss": 1.132, "step": 3484 }, { "epoch": 0.13635652241959464, "grad_norm": 0.0, "learning_rate": 1.9412669181482467e-05, "loss": 1.1053, "step": 3485 }, { "epoch": 0.13639564911182409, "grad_norm": 0.0, "learning_rate": 1.94122412083073e-05, "loss": 1.2185, "step": 3486 }, { "epoch": 0.13643477580405353, "grad_norm": 0.0, "learning_rate": 1.941181308398334e-05, "loss": 1.1915, "step": 3487 }, { "epoch": 0.13647390249628297, "grad_norm": 0.0, "learning_rate": 1.941138480851745e-05, "loss": 1.1175, "step": 3488 }, { "epoch": 0.1365130291885124, "grad_norm": 0.0, "learning_rate": 1.9410956381916514e-05, "loss": 1.2513, "step": 3489 }, { "epoch": 0.13655215588074185, "grad_norm": 0.0, "learning_rate": 1.9410527804187412e-05, "loss": 1.2429, "step": 3490 }, { "epoch": 0.1365912825729713, "grad_norm": 0.0, "learning_rate": 1.9410099075337028e-05, "loss": 1.1631, "step": 3491 }, { "epoch": 0.13663040926520073, "grad_norm": 0.0, "learning_rate": 1.940967019537224e-05, "loss": 1.038, "step": 3492 }, { "epoch": 0.13666953595743017, "grad_norm": 0.0, "learning_rate": 1.9409241164299942e-05, "loss": 1.1463, "step": 3493 }, { "epoch": 0.13670866264965958, "grad_norm": 0.0, "learning_rate": 1.940881198212702e-05, "loss": 1.2415, "step": 3494 }, { "epoch": 0.13674778934188903, "grad_norm": 0.0, "learning_rate": 1.940838264886037e-05, "loss": 1.2462, "step": 3495 }, { "epoch": 0.13678691603411847, "grad_norm": 0.0, "learning_rate": 1.940795316450688e-05, "loss": 1.3203, "step": 3496 }, { "epoch": 0.1368260427263479, "grad_norm": 0.0, "learning_rate": 1.9407523529073455e-05, "loss": 1.2718, "step": 3497 }, { "epoch": 0.13686516941857735, "grad_norm": 0.0, "learning_rate": 1.9407093742566988e-05, "loss": 1.155, "step": 3498 }, { "epoch": 0.1369042961108068, "grad_norm": 0.0, "learning_rate": 1.9406663804994384e-05, "loss": 1.1572, "step": 3499 }, { "epoch": 0.13694342280303623, "grad_norm": 0.0, "learning_rate": 1.9406233716362544e-05, "loss": 1.225, "step": 3500 }, { "epoch": 0.13698254949526567, "grad_norm": 0.0, "learning_rate": 1.940580347667838e-05, "loss": 1.1915, "step": 3501 }, { "epoch": 0.1370216761874951, "grad_norm": 0.0, "learning_rate": 1.94053730859488e-05, "loss": 1.2568, "step": 3502 }, { "epoch": 0.13706080287972455, "grad_norm": 0.0, "learning_rate": 1.940494254418071e-05, "loss": 1.2125, "step": 3503 }, { "epoch": 0.137099929571954, "grad_norm": 0.0, "learning_rate": 1.9404511851381032e-05, "loss": 1.2465, "step": 3504 }, { "epoch": 0.13713905626418343, "grad_norm": 0.0, "learning_rate": 1.9404081007556673e-05, "loss": 1.1589, "step": 3505 }, { "epoch": 0.13717818295641288, "grad_norm": 0.0, "learning_rate": 1.9403650012714563e-05, "loss": 1.0957, "step": 3506 }, { "epoch": 0.13721730964864232, "grad_norm": 0.0, "learning_rate": 1.940321886686161e-05, "loss": 1.0853, "step": 3507 }, { "epoch": 0.13725643634087173, "grad_norm": 0.0, "learning_rate": 1.940278757000475e-05, "loss": 1.2385, "step": 3508 }, { "epoch": 0.13729556303310117, "grad_norm": 0.0, "learning_rate": 1.94023561221509e-05, "loss": 1.294, "step": 3509 }, { "epoch": 0.1373346897253306, "grad_norm": 0.0, "learning_rate": 1.9401924523306998e-05, "loss": 1.251, "step": 3510 }, { "epoch": 0.13737381641756005, "grad_norm": 0.0, "learning_rate": 1.9401492773479966e-05, "loss": 1.1728, "step": 3511 }, { "epoch": 0.1374129431097895, "grad_norm": 0.0, "learning_rate": 1.940106087267674e-05, "loss": 1.1326, "step": 3512 }, { "epoch": 0.13745206980201893, "grad_norm": 0.0, "learning_rate": 1.940062882090426e-05, "loss": 1.2356, "step": 3513 }, { "epoch": 0.13749119649424837, "grad_norm": 0.0, "learning_rate": 1.940019661816946e-05, "loss": 1.3602, "step": 3514 }, { "epoch": 0.13753032318647782, "grad_norm": 0.0, "learning_rate": 1.939976426447928e-05, "loss": 1.1753, "step": 3515 }, { "epoch": 0.13756944987870726, "grad_norm": 0.0, "learning_rate": 1.9399331759840664e-05, "loss": 1.3192, "step": 3516 }, { "epoch": 0.1376085765709367, "grad_norm": 0.0, "learning_rate": 1.939889910426056e-05, "loss": 1.2093, "step": 3517 }, { "epoch": 0.13764770326316614, "grad_norm": 0.0, "learning_rate": 1.939846629774591e-05, "loss": 1.0891, "step": 3518 }, { "epoch": 0.13768682995539558, "grad_norm": 0.0, "learning_rate": 1.939803334030367e-05, "loss": 1.2791, "step": 3519 }, { "epoch": 0.13772595664762502, "grad_norm": 0.0, "learning_rate": 1.9397600231940795e-05, "loss": 1.1771, "step": 3520 }, { "epoch": 0.13776508333985446, "grad_norm": 0.0, "learning_rate": 1.9397166972664232e-05, "loss": 1.1584, "step": 3521 }, { "epoch": 0.1378042100320839, "grad_norm": 0.0, "learning_rate": 1.9396733562480943e-05, "loss": 1.2996, "step": 3522 }, { "epoch": 0.13784333672431331, "grad_norm": 0.0, "learning_rate": 1.9396300001397888e-05, "loss": 1.2183, "step": 3523 }, { "epoch": 0.13788246341654276, "grad_norm": 0.0, "learning_rate": 1.939586628942203e-05, "loss": 1.1156, "step": 3524 }, { "epoch": 0.1379215901087722, "grad_norm": 0.0, "learning_rate": 1.9395432426560332e-05, "loss": 1.2031, "step": 3525 }, { "epoch": 0.13796071680100164, "grad_norm": 0.0, "learning_rate": 1.9394998412819763e-05, "loss": 1.2116, "step": 3526 }, { "epoch": 0.13799984349323108, "grad_norm": 0.0, "learning_rate": 1.939456424820729e-05, "loss": 1.1798, "step": 3527 }, { "epoch": 0.13803897018546052, "grad_norm": 0.0, "learning_rate": 1.9394129932729893e-05, "loss": 1.2037, "step": 3528 }, { "epoch": 0.13807809687768996, "grad_norm": 0.0, "learning_rate": 1.9393695466394535e-05, "loss": 1.1572, "step": 3529 }, { "epoch": 0.1381172235699194, "grad_norm": 0.0, "learning_rate": 1.9393260849208202e-05, "loss": 1.2089, "step": 3530 }, { "epoch": 0.13815635026214884, "grad_norm": 0.0, "learning_rate": 1.939282608117787e-05, "loss": 1.1366, "step": 3531 }, { "epoch": 0.13819547695437828, "grad_norm": 0.0, "learning_rate": 1.9392391162310516e-05, "loss": 1.1314, "step": 3532 }, { "epoch": 0.13823460364660772, "grad_norm": 0.0, "learning_rate": 1.9391956092613132e-05, "loss": 1.1837, "step": 3533 }, { "epoch": 0.13827373033883716, "grad_norm": 0.0, "learning_rate": 1.9391520872092705e-05, "loss": 1.2556, "step": 3534 }, { "epoch": 0.1383128570310666, "grad_norm": 0.0, "learning_rate": 1.9391085500756223e-05, "loss": 1.2723, "step": 3535 }, { "epoch": 0.13835198372329605, "grad_norm": 0.0, "learning_rate": 1.939064997861067e-05, "loss": 1.1822, "step": 3536 }, { "epoch": 0.13839111041552546, "grad_norm": 0.0, "learning_rate": 1.9390214305663048e-05, "loss": 1.1912, "step": 3537 }, { "epoch": 0.1384302371077549, "grad_norm": 0.0, "learning_rate": 1.938977848192035e-05, "loss": 1.1299, "step": 3538 }, { "epoch": 0.13846936379998434, "grad_norm": 0.0, "learning_rate": 1.9389342507389573e-05, "loss": 1.2942, "step": 3539 }, { "epoch": 0.13850849049221378, "grad_norm": 0.0, "learning_rate": 1.9388906382077724e-05, "loss": 1.1631, "step": 3540 }, { "epoch": 0.13854761718444322, "grad_norm": 0.0, "learning_rate": 1.9388470105991805e-05, "loss": 1.1323, "step": 3541 }, { "epoch": 0.13858674387667266, "grad_norm": 0.0, "learning_rate": 1.9388033679138818e-05, "loss": 1.1668, "step": 3542 }, { "epoch": 0.1386258705689021, "grad_norm": 0.0, "learning_rate": 1.9387597101525775e-05, "loss": 1.2866, "step": 3543 }, { "epoch": 0.13866499726113155, "grad_norm": 0.0, "learning_rate": 1.9387160373159684e-05, "loss": 1.2017, "step": 3544 }, { "epoch": 0.13870412395336099, "grad_norm": 0.0, "learning_rate": 1.938672349404756e-05, "loss": 1.1885, "step": 3545 }, { "epoch": 0.13874325064559043, "grad_norm": 0.0, "learning_rate": 1.938628646419642e-05, "loss": 1.1525, "step": 3546 }, { "epoch": 0.13878237733781987, "grad_norm": 0.0, "learning_rate": 1.9385849283613282e-05, "loss": 1.2365, "step": 3547 }, { "epoch": 0.1388215040300493, "grad_norm": 0.0, "learning_rate": 1.9385411952305166e-05, "loss": 1.2576, "step": 3548 }, { "epoch": 0.13886063072227875, "grad_norm": 0.0, "learning_rate": 1.9384974470279093e-05, "loss": 1.3671, "step": 3549 }, { "epoch": 0.1388997574145082, "grad_norm": 0.0, "learning_rate": 1.938453683754209e-05, "loss": 1.2363, "step": 3550 }, { "epoch": 0.1389388841067376, "grad_norm": 0.0, "learning_rate": 1.9384099054101187e-05, "loss": 1.1158, "step": 3551 }, { "epoch": 0.13897801079896704, "grad_norm": 0.0, "learning_rate": 1.938366111996341e-05, "loss": 1.2565, "step": 3552 }, { "epoch": 0.13901713749119649, "grad_norm": 0.0, "learning_rate": 1.9383223035135798e-05, "loss": 1.2094, "step": 3553 }, { "epoch": 0.13905626418342593, "grad_norm": 0.0, "learning_rate": 1.9382784799625377e-05, "loss": 1.1343, "step": 3554 }, { "epoch": 0.13909539087565537, "grad_norm": 0.0, "learning_rate": 1.938234641343919e-05, "loss": 1.1859, "step": 3555 }, { "epoch": 0.1391345175678848, "grad_norm": 0.0, "learning_rate": 1.9381907876584277e-05, "loss": 1.2136, "step": 3556 }, { "epoch": 0.13917364426011425, "grad_norm": 0.0, "learning_rate": 1.938146918906768e-05, "loss": 1.2056, "step": 3557 }, { "epoch": 0.1392127709523437, "grad_norm": 0.0, "learning_rate": 1.938103035089644e-05, "loss": 1.146, "step": 3558 }, { "epoch": 0.13925189764457313, "grad_norm": 0.0, "learning_rate": 1.9380591362077614e-05, "loss": 1.2881, "step": 3559 }, { "epoch": 0.13929102433680257, "grad_norm": 0.0, "learning_rate": 1.9380152222618243e-05, "loss": 1.3289, "step": 3560 }, { "epoch": 0.139330151029032, "grad_norm": 0.0, "learning_rate": 1.937971293252538e-05, "loss": 1.1838, "step": 3561 }, { "epoch": 0.13936927772126145, "grad_norm": 0.0, "learning_rate": 1.937927349180608e-05, "loss": 1.0883, "step": 3562 }, { "epoch": 0.1394084044134909, "grad_norm": 0.0, "learning_rate": 1.9378833900467403e-05, "loss": 1.2726, "step": 3563 }, { "epoch": 0.13944753110572033, "grad_norm": 0.0, "learning_rate": 1.9378394158516406e-05, "loss": 1.1296, "step": 3564 }, { "epoch": 0.13948665779794975, "grad_norm": 0.0, "learning_rate": 1.9377954265960147e-05, "loss": 1.2663, "step": 3565 }, { "epoch": 0.1395257844901792, "grad_norm": 0.0, "learning_rate": 1.9377514222805698e-05, "loss": 1.1847, "step": 3566 }, { "epoch": 0.13956491118240863, "grad_norm": 0.0, "learning_rate": 1.9377074029060118e-05, "loss": 1.0638, "step": 3567 }, { "epoch": 0.13960403787463807, "grad_norm": 0.0, "learning_rate": 1.937663368473048e-05, "loss": 1.179, "step": 3568 }, { "epoch": 0.1396431645668675, "grad_norm": 0.0, "learning_rate": 1.9376193189823858e-05, "loss": 1.325, "step": 3569 }, { "epoch": 0.13968229125909695, "grad_norm": 0.0, "learning_rate": 1.937575254434732e-05, "loss": 1.2375, "step": 3570 }, { "epoch": 0.1397214179513264, "grad_norm": 0.0, "learning_rate": 1.9375311748307944e-05, "loss": 1.1657, "step": 3571 }, { "epoch": 0.13976054464355583, "grad_norm": 0.0, "learning_rate": 1.937487080171281e-05, "loss": 1.1071, "step": 3572 }, { "epoch": 0.13979967133578527, "grad_norm": 0.0, "learning_rate": 1.9374429704568997e-05, "loss": 1.1248, "step": 3573 }, { "epoch": 0.13983879802801472, "grad_norm": 0.0, "learning_rate": 1.937398845688359e-05, "loss": 1.1899, "step": 3574 }, { "epoch": 0.13987792472024416, "grad_norm": 0.0, "learning_rate": 1.9373547058663674e-05, "loss": 1.2156, "step": 3575 }, { "epoch": 0.1399170514124736, "grad_norm": 0.0, "learning_rate": 1.9373105509916338e-05, "loss": 1.178, "step": 3576 }, { "epoch": 0.13995617810470304, "grad_norm": 0.0, "learning_rate": 1.9372663810648675e-05, "loss": 1.1644, "step": 3577 }, { "epoch": 0.13999530479693248, "grad_norm": 0.0, "learning_rate": 1.9372221960867773e-05, "loss": 1.2004, "step": 3578 }, { "epoch": 0.1400344314891619, "grad_norm": 0.0, "learning_rate": 1.9371779960580735e-05, "loss": 1.0425, "step": 3579 }, { "epoch": 0.14007355818139133, "grad_norm": 0.0, "learning_rate": 1.937133780979465e-05, "loss": 1.1696, "step": 3580 }, { "epoch": 0.14011268487362077, "grad_norm": 0.0, "learning_rate": 1.9370895508516624e-05, "loss": 1.1192, "step": 3581 }, { "epoch": 0.14015181156585022, "grad_norm": 0.0, "learning_rate": 1.9370453056753757e-05, "loss": 1.075, "step": 3582 }, { "epoch": 0.14019093825807966, "grad_norm": 0.0, "learning_rate": 1.9370010454513156e-05, "loss": 1.2256, "step": 3583 }, { "epoch": 0.1402300649503091, "grad_norm": 0.0, "learning_rate": 1.9369567701801933e-05, "loss": 1.2134, "step": 3584 }, { "epoch": 0.14026919164253854, "grad_norm": 0.0, "learning_rate": 1.936912479862719e-05, "loss": 1.1851, "step": 3585 }, { "epoch": 0.14030831833476798, "grad_norm": 0.0, "learning_rate": 1.9368681744996043e-05, "loss": 1.1244, "step": 3586 }, { "epoch": 0.14034744502699742, "grad_norm": 0.0, "learning_rate": 1.936823854091561e-05, "loss": 1.0472, "step": 3587 }, { "epoch": 0.14038657171922686, "grad_norm": 0.0, "learning_rate": 1.9367795186392996e-05, "loss": 1.2226, "step": 3588 }, { "epoch": 0.1404256984114563, "grad_norm": 0.0, "learning_rate": 1.936735168143534e-05, "loss": 1.186, "step": 3589 }, { "epoch": 0.14046482510368574, "grad_norm": 0.0, "learning_rate": 1.9366908026049747e-05, "loss": 1.2861, "step": 3590 }, { "epoch": 0.14050395179591518, "grad_norm": 0.0, "learning_rate": 1.9366464220243352e-05, "loss": 1.2133, "step": 3591 }, { "epoch": 0.14054307848814462, "grad_norm": 0.0, "learning_rate": 1.936602026402328e-05, "loss": 1.1085, "step": 3592 }, { "epoch": 0.14058220518037406, "grad_norm": 0.0, "learning_rate": 1.9365576157396652e-05, "loss": 1.1426, "step": 3593 }, { "epoch": 0.14062133187260348, "grad_norm": 0.0, "learning_rate": 1.9365131900370612e-05, "loss": 1.1984, "step": 3594 }, { "epoch": 0.14066045856483292, "grad_norm": 0.0, "learning_rate": 1.9364687492952286e-05, "loss": 1.1729, "step": 3595 }, { "epoch": 0.14069958525706236, "grad_norm": 0.0, "learning_rate": 1.9364242935148817e-05, "loss": 1.2335, "step": 3596 }, { "epoch": 0.1407387119492918, "grad_norm": 0.0, "learning_rate": 1.936379822696734e-05, "loss": 1.1885, "step": 3597 }, { "epoch": 0.14077783864152124, "grad_norm": 0.0, "learning_rate": 1.9363353368414995e-05, "loss": 1.2887, "step": 3598 }, { "epoch": 0.14081696533375068, "grad_norm": 0.0, "learning_rate": 1.9362908359498932e-05, "loss": 1.0678, "step": 3599 }, { "epoch": 0.14085609202598012, "grad_norm": 0.0, "learning_rate": 1.9362463200226288e-05, "loss": 1.1898, "step": 3600 }, { "epoch": 0.14089521871820956, "grad_norm": 0.0, "learning_rate": 1.9362017890604215e-05, "loss": 1.1555, "step": 3601 }, { "epoch": 0.140934345410439, "grad_norm": 0.0, "learning_rate": 1.9361572430639873e-05, "loss": 1.1439, "step": 3602 }, { "epoch": 0.14097347210266845, "grad_norm": 0.0, "learning_rate": 1.9361126820340406e-05, "loss": 1.2336, "step": 3603 }, { "epoch": 0.1410125987948979, "grad_norm": 0.0, "learning_rate": 1.936068105971297e-05, "loss": 1.304, "step": 3604 }, { "epoch": 0.14105172548712733, "grad_norm": 0.0, "learning_rate": 1.936023514876473e-05, "loss": 1.2192, "step": 3605 }, { "epoch": 0.14109085217935677, "grad_norm": 0.0, "learning_rate": 1.9359789087502837e-05, "loss": 1.1263, "step": 3606 }, { "epoch": 0.1411299788715862, "grad_norm": 0.0, "learning_rate": 1.935934287593446e-05, "loss": 1.1036, "step": 3607 }, { "epoch": 0.14116910556381562, "grad_norm": 0.0, "learning_rate": 1.935889651406677e-05, "loss": 1.2493, "step": 3608 }, { "epoch": 0.14120823225604506, "grad_norm": 0.0, "learning_rate": 1.9358450001906926e-05, "loss": 1.1955, "step": 3609 }, { "epoch": 0.1412473589482745, "grad_norm": 0.0, "learning_rate": 1.9358003339462103e-05, "loss": 1.219, "step": 3610 }, { "epoch": 0.14128648564050394, "grad_norm": 0.0, "learning_rate": 1.9357556526739468e-05, "loss": 1.2116, "step": 3611 }, { "epoch": 0.14132561233273339, "grad_norm": 0.0, "learning_rate": 1.935710956374621e-05, "loss": 1.2627, "step": 3612 }, { "epoch": 0.14136473902496283, "grad_norm": 0.0, "learning_rate": 1.935666245048949e-05, "loss": 1.2943, "step": 3613 }, { "epoch": 0.14140386571719227, "grad_norm": 0.0, "learning_rate": 1.9356215186976496e-05, "loss": 1.1857, "step": 3614 }, { "epoch": 0.1414429924094217, "grad_norm": 0.0, "learning_rate": 1.9355767773214414e-05, "loss": 1.2057, "step": 3615 }, { "epoch": 0.14148211910165115, "grad_norm": 0.0, "learning_rate": 1.935532020921042e-05, "loss": 1.2388, "step": 3616 }, { "epoch": 0.1415212457938806, "grad_norm": 0.0, "learning_rate": 1.935487249497171e-05, "loss": 1.229, "step": 3617 }, { "epoch": 0.14156037248611003, "grad_norm": 0.0, "learning_rate": 1.935442463050547e-05, "loss": 1.0476, "step": 3618 }, { "epoch": 0.14159949917833947, "grad_norm": 0.0, "learning_rate": 1.935397661581889e-05, "loss": 1.246, "step": 3619 }, { "epoch": 0.1416386258705689, "grad_norm": 0.0, "learning_rate": 1.935352845091917e-05, "loss": 1.2667, "step": 3620 }, { "epoch": 0.14167775256279835, "grad_norm": 0.0, "learning_rate": 1.93530801358135e-05, "loss": 1.1583, "step": 3621 }, { "epoch": 0.14171687925502777, "grad_norm": 0.0, "learning_rate": 1.9352631670509082e-05, "loss": 1.2857, "step": 3622 }, { "epoch": 0.1417560059472572, "grad_norm": 0.0, "learning_rate": 1.9352183055013124e-05, "loss": 1.2337, "step": 3623 }, { "epoch": 0.14179513263948665, "grad_norm": 0.0, "learning_rate": 1.935173428933282e-05, "loss": 1.1395, "step": 3624 }, { "epoch": 0.1418342593317161, "grad_norm": 0.0, "learning_rate": 1.9351285373475388e-05, "loss": 1.192, "step": 3625 }, { "epoch": 0.14187338602394553, "grad_norm": 0.0, "learning_rate": 1.9350836307448023e-05, "loss": 1.1307, "step": 3626 }, { "epoch": 0.14191251271617497, "grad_norm": 0.0, "learning_rate": 1.9350387091257952e-05, "loss": 1.251, "step": 3627 }, { "epoch": 0.1419516394084044, "grad_norm": 0.0, "learning_rate": 1.934993772491238e-05, "loss": 1.1837, "step": 3628 }, { "epoch": 0.14199076610063385, "grad_norm": 0.0, "learning_rate": 1.934948820841852e-05, "loss": 1.2009, "step": 3629 }, { "epoch": 0.1420298927928633, "grad_norm": 0.0, "learning_rate": 1.9349038541783592e-05, "loss": 1.2358, "step": 3630 }, { "epoch": 0.14206901948509273, "grad_norm": 0.0, "learning_rate": 1.9348588725014826e-05, "loss": 1.1697, "step": 3631 }, { "epoch": 0.14210814617732218, "grad_norm": 0.0, "learning_rate": 1.9348138758119437e-05, "loss": 1.2476, "step": 3632 }, { "epoch": 0.14214727286955162, "grad_norm": 0.0, "learning_rate": 1.9347688641104657e-05, "loss": 1.2087, "step": 3633 }, { "epoch": 0.14218639956178106, "grad_norm": 0.0, "learning_rate": 1.934723837397771e-05, "loss": 1.2316, "step": 3634 }, { "epoch": 0.1422255262540105, "grad_norm": 0.0, "learning_rate": 1.9346787956745822e-05, "loss": 1.1872, "step": 3635 }, { "epoch": 0.1422646529462399, "grad_norm": 0.0, "learning_rate": 1.9346337389416234e-05, "loss": 1.1292, "step": 3636 }, { "epoch": 0.14230377963846935, "grad_norm": 0.0, "learning_rate": 1.9345886671996183e-05, "loss": 1.3246, "step": 3637 }, { "epoch": 0.1423429063306988, "grad_norm": 0.0, "learning_rate": 1.9345435804492898e-05, "loss": 1.1966, "step": 3638 }, { "epoch": 0.14238203302292823, "grad_norm": 0.0, "learning_rate": 1.9344984786913627e-05, "loss": 1.3227, "step": 3639 }, { "epoch": 0.14242115971515767, "grad_norm": 0.0, "learning_rate": 1.934453361926561e-05, "loss": 1.2079, "step": 3640 }, { "epoch": 0.14246028640738712, "grad_norm": 0.0, "learning_rate": 1.9344082301556093e-05, "loss": 1.3097, "step": 3641 }, { "epoch": 0.14249941309961656, "grad_norm": 0.0, "learning_rate": 1.9343630833792322e-05, "loss": 1.0661, "step": 3642 }, { "epoch": 0.142538539791846, "grad_norm": 0.0, "learning_rate": 1.934317921598155e-05, "loss": 1.0558, "step": 3643 }, { "epoch": 0.14257766648407544, "grad_norm": 0.0, "learning_rate": 1.9342727448131025e-05, "loss": 1.2701, "step": 3644 }, { "epoch": 0.14261679317630488, "grad_norm": 0.0, "learning_rate": 1.9342275530248006e-05, "loss": 1.2429, "step": 3645 }, { "epoch": 0.14265591986853432, "grad_norm": 0.0, "learning_rate": 1.934182346233975e-05, "loss": 1.2597, "step": 3646 }, { "epoch": 0.14269504656076376, "grad_norm": 0.0, "learning_rate": 1.934137124441351e-05, "loss": 1.2032, "step": 3647 }, { "epoch": 0.1427341732529932, "grad_norm": 0.0, "learning_rate": 1.9340918876476557e-05, "loss": 1.2709, "step": 3648 }, { "epoch": 0.14277329994522264, "grad_norm": 0.0, "learning_rate": 1.934046635853615e-05, "loss": 1.2425, "step": 3649 }, { "epoch": 0.14281242663745208, "grad_norm": 0.0, "learning_rate": 1.934001369059956e-05, "loss": 1.2718, "step": 3650 }, { "epoch": 0.1428515533296815, "grad_norm": 0.0, "learning_rate": 1.9339560872674052e-05, "loss": 1.214, "step": 3651 }, { "epoch": 0.14289068002191094, "grad_norm": 0.0, "learning_rate": 1.93391079047669e-05, "loss": 1.2889, "step": 3652 }, { "epoch": 0.14292980671414038, "grad_norm": 0.0, "learning_rate": 1.9338654786885377e-05, "loss": 1.1192, "step": 3653 }, { "epoch": 0.14296893340636982, "grad_norm": 0.0, "learning_rate": 1.933820151903676e-05, "loss": 1.0825, "step": 3654 }, { "epoch": 0.14300806009859926, "grad_norm": 0.0, "learning_rate": 1.933774810122833e-05, "loss": 1.2767, "step": 3655 }, { "epoch": 0.1430471867908287, "grad_norm": 0.0, "learning_rate": 1.9337294533467363e-05, "loss": 1.2165, "step": 3656 }, { "epoch": 0.14308631348305814, "grad_norm": 0.0, "learning_rate": 1.933684081576115e-05, "loss": 1.277, "step": 3657 }, { "epoch": 0.14312544017528758, "grad_norm": 0.0, "learning_rate": 1.933638694811697e-05, "loss": 1.2098, "step": 3658 }, { "epoch": 0.14316456686751702, "grad_norm": 0.0, "learning_rate": 1.9335932930542115e-05, "loss": 1.1779, "step": 3659 }, { "epoch": 0.14320369355974646, "grad_norm": 0.0, "learning_rate": 1.933547876304388e-05, "loss": 1.1437, "step": 3660 }, { "epoch": 0.1432428202519759, "grad_norm": 0.0, "learning_rate": 1.933502444562955e-05, "loss": 1.1221, "step": 3661 }, { "epoch": 0.14328194694420535, "grad_norm": 0.0, "learning_rate": 1.9334569978306423e-05, "loss": 1.2159, "step": 3662 }, { "epoch": 0.1433210736364348, "grad_norm": 0.0, "learning_rate": 1.9334115361081803e-05, "loss": 1.3313, "step": 3663 }, { "epoch": 0.14336020032866423, "grad_norm": 0.0, "learning_rate": 1.9333660593962984e-05, "loss": 1.0428, "step": 3664 }, { "epoch": 0.14339932702089364, "grad_norm": 0.0, "learning_rate": 1.9333205676957273e-05, "loss": 1.0949, "step": 3665 }, { "epoch": 0.14343845371312308, "grad_norm": 0.0, "learning_rate": 1.9332750610071972e-05, "loss": 1.1753, "step": 3666 }, { "epoch": 0.14347758040535252, "grad_norm": 0.0, "learning_rate": 1.9332295393314397e-05, "loss": 1.2275, "step": 3667 }, { "epoch": 0.14351670709758196, "grad_norm": 0.0, "learning_rate": 1.9331840026691844e-05, "loss": 1.15, "step": 3668 }, { "epoch": 0.1435558337898114, "grad_norm": 0.0, "learning_rate": 1.9331384510211636e-05, "loss": 1.1489, "step": 3669 }, { "epoch": 0.14359496048204085, "grad_norm": 0.0, "learning_rate": 1.933092884388109e-05, "loss": 1.1802, "step": 3670 }, { "epoch": 0.1436340871742703, "grad_norm": 0.0, "learning_rate": 1.9330473027707515e-05, "loss": 1.2457, "step": 3671 }, { "epoch": 0.14367321386649973, "grad_norm": 0.0, "learning_rate": 1.9330017061698235e-05, "loss": 1.251, "step": 3672 }, { "epoch": 0.14371234055872917, "grad_norm": 0.0, "learning_rate": 1.9329560945860573e-05, "loss": 1.1429, "step": 3673 }, { "epoch": 0.1437514672509586, "grad_norm": 0.0, "learning_rate": 1.932910468020185e-05, "loss": 1.1658, "step": 3674 }, { "epoch": 0.14379059394318805, "grad_norm": 0.0, "learning_rate": 1.93286482647294e-05, "loss": 1.0803, "step": 3675 }, { "epoch": 0.1438297206354175, "grad_norm": 0.0, "learning_rate": 1.932819169945055e-05, "loss": 1.2377, "step": 3676 }, { "epoch": 0.14386884732764693, "grad_norm": 0.0, "learning_rate": 1.9327734984372622e-05, "loss": 1.2753, "step": 3677 }, { "epoch": 0.14390797401987637, "grad_norm": 0.0, "learning_rate": 1.932727811950297e-05, "loss": 1.2803, "step": 3678 }, { "epoch": 0.14394710071210579, "grad_norm": 0.0, "learning_rate": 1.932682110484891e-05, "loss": 1.1581, "step": 3679 }, { "epoch": 0.14398622740433523, "grad_norm": 0.0, "learning_rate": 1.932636394041779e-05, "loss": 1.1617, "step": 3680 }, { "epoch": 0.14402535409656467, "grad_norm": 0.0, "learning_rate": 1.9325906626216957e-05, "loss": 1.3583, "step": 3681 }, { "epoch": 0.1440644807887941, "grad_norm": 0.0, "learning_rate": 1.9325449162253746e-05, "loss": 1.1524, "step": 3682 }, { "epoch": 0.14410360748102355, "grad_norm": 0.0, "learning_rate": 1.932499154853551e-05, "loss": 1.2338, "step": 3683 }, { "epoch": 0.144142734173253, "grad_norm": 0.0, "learning_rate": 1.932453378506959e-05, "loss": 1.0989, "step": 3684 }, { "epoch": 0.14418186086548243, "grad_norm": 0.0, "learning_rate": 1.9324075871863347e-05, "loss": 1.1981, "step": 3685 }, { "epoch": 0.14422098755771187, "grad_norm": 0.0, "learning_rate": 1.9323617808924124e-05, "loss": 1.1583, "step": 3686 }, { "epoch": 0.1442601142499413, "grad_norm": 0.0, "learning_rate": 1.9323159596259288e-05, "loss": 1.2072, "step": 3687 }, { "epoch": 0.14429924094217075, "grad_norm": 0.0, "learning_rate": 1.9322701233876188e-05, "loss": 1.1793, "step": 3688 }, { "epoch": 0.1443383676344002, "grad_norm": 0.0, "learning_rate": 1.9322242721782186e-05, "loss": 1.2598, "step": 3689 }, { "epoch": 0.14437749432662964, "grad_norm": 0.0, "learning_rate": 1.9321784059984652e-05, "loss": 1.243, "step": 3690 }, { "epoch": 0.14441662101885908, "grad_norm": 0.0, "learning_rate": 1.9321325248490945e-05, "loss": 1.0851, "step": 3691 }, { "epoch": 0.14445574771108852, "grad_norm": 0.0, "learning_rate": 1.9320866287308433e-05, "loss": 1.3304, "step": 3692 }, { "epoch": 0.14449487440331793, "grad_norm": 0.0, "learning_rate": 1.932040717644449e-05, "loss": 1.1573, "step": 3693 }, { "epoch": 0.14453400109554737, "grad_norm": 0.0, "learning_rate": 1.9319947915906487e-05, "loss": 1.1022, "step": 3694 }, { "epoch": 0.1445731277877768, "grad_norm": 0.0, "learning_rate": 1.9319488505701794e-05, "loss": 1.2855, "step": 3695 }, { "epoch": 0.14461225448000625, "grad_norm": 0.0, "learning_rate": 1.93190289458378e-05, "loss": 1.3092, "step": 3696 }, { "epoch": 0.1446513811722357, "grad_norm": 0.0, "learning_rate": 1.9318569236321877e-05, "loss": 1.2092, "step": 3697 }, { "epoch": 0.14469050786446513, "grad_norm": 0.0, "learning_rate": 1.9318109377161408e-05, "loss": 1.1852, "step": 3698 }, { "epoch": 0.14472963455669458, "grad_norm": 0.0, "learning_rate": 1.931764936836378e-05, "loss": 1.1559, "step": 3699 }, { "epoch": 0.14476876124892402, "grad_norm": 0.0, "learning_rate": 1.931718920993638e-05, "loss": 1.2532, "step": 3700 }, { "epoch": 0.14480788794115346, "grad_norm": 0.0, "learning_rate": 1.931672890188659e-05, "loss": 1.319, "step": 3701 }, { "epoch": 0.1448470146333829, "grad_norm": 0.0, "learning_rate": 1.9316268444221814e-05, "loss": 1.2281, "step": 3702 }, { "epoch": 0.14488614132561234, "grad_norm": 0.0, "learning_rate": 1.931580783694944e-05, "loss": 1.3149, "step": 3703 }, { "epoch": 0.14492526801784178, "grad_norm": 0.0, "learning_rate": 1.9315347080076865e-05, "loss": 1.1848, "step": 3704 }, { "epoch": 0.14496439471007122, "grad_norm": 0.0, "learning_rate": 1.9314886173611487e-05, "loss": 1.2897, "step": 3705 }, { "epoch": 0.14500352140230066, "grad_norm": 0.0, "learning_rate": 1.931442511756071e-05, "loss": 1.247, "step": 3706 }, { "epoch": 0.1450426480945301, "grad_norm": 0.0, "learning_rate": 1.9313963911931938e-05, "loss": 1.1266, "step": 3707 }, { "epoch": 0.14508177478675952, "grad_norm": 0.0, "learning_rate": 1.9313502556732577e-05, "loss": 1.2671, "step": 3708 }, { "epoch": 0.14512090147898896, "grad_norm": 0.0, "learning_rate": 1.9313041051970037e-05, "loss": 1.0233, "step": 3709 }, { "epoch": 0.1451600281712184, "grad_norm": 0.0, "learning_rate": 1.9312579397651724e-05, "loss": 1.244, "step": 3710 }, { "epoch": 0.14519915486344784, "grad_norm": 0.0, "learning_rate": 1.931211759378506e-05, "loss": 1.1212, "step": 3711 }, { "epoch": 0.14523828155567728, "grad_norm": 0.0, "learning_rate": 1.931165564037745e-05, "loss": 1.2619, "step": 3712 }, { "epoch": 0.14527740824790672, "grad_norm": 0.0, "learning_rate": 1.931119353743632e-05, "loss": 1.2358, "step": 3713 }, { "epoch": 0.14531653494013616, "grad_norm": 0.0, "learning_rate": 1.9310731284969093e-05, "loss": 1.2182, "step": 3714 }, { "epoch": 0.1453556616323656, "grad_norm": 0.0, "learning_rate": 1.9310268882983187e-05, "loss": 1.2907, "step": 3715 }, { "epoch": 0.14539478832459504, "grad_norm": 0.0, "learning_rate": 1.9309806331486028e-05, "loss": 1.1672, "step": 3716 }, { "epoch": 0.14543391501682448, "grad_norm": 0.0, "learning_rate": 1.9309343630485043e-05, "loss": 1.0732, "step": 3717 }, { "epoch": 0.14547304170905392, "grad_norm": 0.0, "learning_rate": 1.930888077998767e-05, "loss": 1.1106, "step": 3718 }, { "epoch": 0.14551216840128336, "grad_norm": 0.0, "learning_rate": 1.9308417780001334e-05, "loss": 1.2704, "step": 3719 }, { "epoch": 0.1455512950935128, "grad_norm": 0.0, "learning_rate": 1.9307954630533474e-05, "loss": 1.1747, "step": 3720 }, { "epoch": 0.14559042178574225, "grad_norm": 0.0, "learning_rate": 1.930749133159152e-05, "loss": 1.0312, "step": 3721 }, { "epoch": 0.14562954847797166, "grad_norm": 0.0, "learning_rate": 1.930702788318292e-05, "loss": 1.2213, "step": 3722 }, { "epoch": 0.1456686751702011, "grad_norm": 0.0, "learning_rate": 1.930656428531512e-05, "loss": 1.2408, "step": 3723 }, { "epoch": 0.14570780186243054, "grad_norm": 0.0, "learning_rate": 1.930610053799556e-05, "loss": 1.1646, "step": 3724 }, { "epoch": 0.14574692855465998, "grad_norm": 0.0, "learning_rate": 1.930563664123168e-05, "loss": 1.2969, "step": 3725 }, { "epoch": 0.14578605524688942, "grad_norm": 0.0, "learning_rate": 1.9305172595030938e-05, "loss": 1.1553, "step": 3726 }, { "epoch": 0.14582518193911886, "grad_norm": 0.0, "learning_rate": 1.9304708399400785e-05, "loss": 1.2579, "step": 3727 }, { "epoch": 0.1458643086313483, "grad_norm": 0.0, "learning_rate": 1.9304244054348674e-05, "loss": 1.2567, "step": 3728 }, { "epoch": 0.14590343532357775, "grad_norm": 0.0, "learning_rate": 1.9303779559882065e-05, "loss": 1.1833, "step": 3729 }, { "epoch": 0.1459425620158072, "grad_norm": 0.0, "learning_rate": 1.9303314916008413e-05, "loss": 1.1748, "step": 3730 }, { "epoch": 0.14598168870803663, "grad_norm": 0.0, "learning_rate": 1.930285012273518e-05, "loss": 1.2799, "step": 3731 }, { "epoch": 0.14602081540026607, "grad_norm": 0.0, "learning_rate": 1.9302385180069832e-05, "loss": 1.1276, "step": 3732 }, { "epoch": 0.1460599420924955, "grad_norm": 0.0, "learning_rate": 1.9301920088019836e-05, "loss": 1.2288, "step": 3733 }, { "epoch": 0.14609906878472495, "grad_norm": 0.0, "learning_rate": 1.930145484659266e-05, "loss": 1.1057, "step": 3734 }, { "epoch": 0.1461381954769544, "grad_norm": 0.0, "learning_rate": 1.930098945579577e-05, "loss": 1.2768, "step": 3735 }, { "epoch": 0.1461773221691838, "grad_norm": 0.0, "learning_rate": 1.9300523915636645e-05, "loss": 1.2765, "step": 3736 }, { "epoch": 0.14621644886141325, "grad_norm": 0.0, "learning_rate": 1.9300058226122764e-05, "loss": 1.2313, "step": 3737 }, { "epoch": 0.14625557555364269, "grad_norm": 0.0, "learning_rate": 1.92995923872616e-05, "loss": 1.261, "step": 3738 }, { "epoch": 0.14629470224587213, "grad_norm": 0.0, "learning_rate": 1.9299126399060634e-05, "loss": 1.1788, "step": 3739 }, { "epoch": 0.14633382893810157, "grad_norm": 0.0, "learning_rate": 1.929866026152735e-05, "loss": 1.2869, "step": 3740 }, { "epoch": 0.146372955630331, "grad_norm": 0.0, "learning_rate": 1.9298193974669237e-05, "loss": 1.1149, "step": 3741 }, { "epoch": 0.14641208232256045, "grad_norm": 0.0, "learning_rate": 1.9297727538493777e-05, "loss": 1.0993, "step": 3742 }, { "epoch": 0.1464512090147899, "grad_norm": 0.0, "learning_rate": 1.9297260953008464e-05, "loss": 1.115, "step": 3743 }, { "epoch": 0.14649033570701933, "grad_norm": 0.0, "learning_rate": 1.929679421822079e-05, "loss": 1.2657, "step": 3744 }, { "epoch": 0.14652946239924877, "grad_norm": 0.0, "learning_rate": 1.9296327334138252e-05, "loss": 1.1631, "step": 3745 }, { "epoch": 0.1465685890914782, "grad_norm": 0.0, "learning_rate": 1.9295860300768345e-05, "loss": 1.1642, "step": 3746 }, { "epoch": 0.14660771578370765, "grad_norm": 0.0, "learning_rate": 1.929539311811857e-05, "loss": 1.0728, "step": 3747 }, { "epoch": 0.1466468424759371, "grad_norm": 0.0, "learning_rate": 1.929492578619643e-05, "loss": 1.1979, "step": 3748 }, { "epoch": 0.14668596916816654, "grad_norm": 0.0, "learning_rate": 1.929445830500943e-05, "loss": 1.177, "step": 3749 }, { "epoch": 0.14672509586039595, "grad_norm": 0.0, "learning_rate": 1.9293990674565074e-05, "loss": 1.3344, "step": 3750 }, { "epoch": 0.1467642225526254, "grad_norm": 0.0, "learning_rate": 1.9293522894870875e-05, "loss": 1.1491, "step": 3751 }, { "epoch": 0.14680334924485483, "grad_norm": 0.0, "learning_rate": 1.9293054965934344e-05, "loss": 1.0909, "step": 3752 }, { "epoch": 0.14684247593708427, "grad_norm": 0.0, "learning_rate": 1.929258688776299e-05, "loss": 1.1918, "step": 3753 }, { "epoch": 0.1468816026293137, "grad_norm": 0.0, "learning_rate": 1.9292118660364342e-05, "loss": 1.2003, "step": 3754 }, { "epoch": 0.14692072932154315, "grad_norm": 0.0, "learning_rate": 1.929165028374591e-05, "loss": 1.1227, "step": 3755 }, { "epoch": 0.1469598560137726, "grad_norm": 0.0, "learning_rate": 1.9291181757915216e-05, "loss": 1.0366, "step": 3756 }, { "epoch": 0.14699898270600203, "grad_norm": 0.0, "learning_rate": 1.9290713082879786e-05, "loss": 1.2307, "step": 3757 }, { "epoch": 0.14703810939823148, "grad_norm": 0.0, "learning_rate": 1.9290244258647146e-05, "loss": 1.2132, "step": 3758 }, { "epoch": 0.14707723609046092, "grad_norm": 0.0, "learning_rate": 1.9289775285224824e-05, "loss": 1.2889, "step": 3759 }, { "epoch": 0.14711636278269036, "grad_norm": 0.0, "learning_rate": 1.9289306162620353e-05, "loss": 1.1815, "step": 3760 }, { "epoch": 0.1471554894749198, "grad_norm": 0.0, "learning_rate": 1.9288836890841263e-05, "loss": 1.1805, "step": 3761 }, { "epoch": 0.14719461616714924, "grad_norm": 0.0, "learning_rate": 1.9288367469895095e-05, "loss": 1.2657, "step": 3762 }, { "epoch": 0.14723374285937868, "grad_norm": 0.0, "learning_rate": 1.9287897899789382e-05, "loss": 1.1929, "step": 3763 }, { "epoch": 0.1472728695516081, "grad_norm": 0.0, "learning_rate": 1.928742818053167e-05, "loss": 1.1764, "step": 3764 }, { "epoch": 0.14731199624383753, "grad_norm": 0.0, "learning_rate": 1.9286958312129495e-05, "loss": 1.3082, "step": 3765 }, { "epoch": 0.14735112293606697, "grad_norm": 0.0, "learning_rate": 1.928648829459041e-05, "loss": 1.0905, "step": 3766 }, { "epoch": 0.14739024962829642, "grad_norm": 0.0, "learning_rate": 1.928601812792196e-05, "loss": 1.2421, "step": 3767 }, { "epoch": 0.14742937632052586, "grad_norm": 0.0, "learning_rate": 1.9285547812131692e-05, "loss": 1.1182, "step": 3768 }, { "epoch": 0.1474685030127553, "grad_norm": 0.0, "learning_rate": 1.9285077347227164e-05, "loss": 1.238, "step": 3769 }, { "epoch": 0.14750762970498474, "grad_norm": 0.0, "learning_rate": 1.9284606733215925e-05, "loss": 1.2739, "step": 3770 }, { "epoch": 0.14754675639721418, "grad_norm": 0.0, "learning_rate": 1.928413597010554e-05, "loss": 1.1909, "step": 3771 }, { "epoch": 0.14758588308944362, "grad_norm": 0.0, "learning_rate": 1.9283665057903566e-05, "loss": 1.2244, "step": 3772 }, { "epoch": 0.14762500978167306, "grad_norm": 0.0, "learning_rate": 1.928319399661756e-05, "loss": 1.1545, "step": 3773 }, { "epoch": 0.1476641364739025, "grad_norm": 0.0, "learning_rate": 1.9282722786255092e-05, "loss": 1.1113, "step": 3774 }, { "epoch": 0.14770326316613194, "grad_norm": 0.0, "learning_rate": 1.928225142682373e-05, "loss": 1.1992, "step": 3775 }, { "epoch": 0.14774238985836138, "grad_norm": 0.0, "learning_rate": 1.928177991833104e-05, "loss": 1.2185, "step": 3776 }, { "epoch": 0.14778151655059082, "grad_norm": 0.0, "learning_rate": 1.9281308260784595e-05, "loss": 1.2766, "step": 3777 }, { "epoch": 0.14782064324282027, "grad_norm": 0.0, "learning_rate": 1.928083645419197e-05, "loss": 1.2495, "step": 3778 }, { "epoch": 0.14785976993504968, "grad_norm": 0.0, "learning_rate": 1.928036449856074e-05, "loss": 1.1443, "step": 3779 }, { "epoch": 0.14789889662727912, "grad_norm": 0.0, "learning_rate": 1.9279892393898486e-05, "loss": 1.2354, "step": 3780 }, { "epoch": 0.14793802331950856, "grad_norm": 0.0, "learning_rate": 1.927942014021279e-05, "loss": 1.1064, "step": 3781 }, { "epoch": 0.147977150011738, "grad_norm": 0.0, "learning_rate": 1.9278947737511233e-05, "loss": 1.2108, "step": 3782 }, { "epoch": 0.14801627670396744, "grad_norm": 0.0, "learning_rate": 1.9278475185801404e-05, "loss": 1.25, "step": 3783 }, { "epoch": 0.14805540339619688, "grad_norm": 0.0, "learning_rate": 1.9278002485090886e-05, "loss": 1.2753, "step": 3784 }, { "epoch": 0.14809453008842632, "grad_norm": 0.0, "learning_rate": 1.927752963538728e-05, "loss": 1.1716, "step": 3785 }, { "epoch": 0.14813365678065576, "grad_norm": 0.0, "learning_rate": 1.9277056636698168e-05, "loss": 1.1311, "step": 3786 }, { "epoch": 0.1481727834728852, "grad_norm": 0.0, "learning_rate": 1.9276583489031156e-05, "loss": 1.269, "step": 3787 }, { "epoch": 0.14821191016511465, "grad_norm": 0.0, "learning_rate": 1.9276110192393834e-05, "loss": 1.181, "step": 3788 }, { "epoch": 0.1482510368573441, "grad_norm": 0.0, "learning_rate": 1.9275636746793805e-05, "loss": 1.091, "step": 3789 }, { "epoch": 0.14829016354957353, "grad_norm": 0.0, "learning_rate": 1.9275163152238675e-05, "loss": 1.1265, "step": 3790 }, { "epoch": 0.14832929024180297, "grad_norm": 0.0, "learning_rate": 1.9274689408736047e-05, "loss": 1.2689, "step": 3791 }, { "epoch": 0.1483684169340324, "grad_norm": 0.0, "learning_rate": 1.9274215516293527e-05, "loss": 1.1711, "step": 3792 }, { "epoch": 0.14840754362626182, "grad_norm": 0.0, "learning_rate": 1.927374147491873e-05, "loss": 1.249, "step": 3793 }, { "epoch": 0.14844667031849126, "grad_norm": 0.0, "learning_rate": 1.9273267284619262e-05, "loss": 1.3002, "step": 3794 }, { "epoch": 0.1484857970107207, "grad_norm": 0.0, "learning_rate": 1.9272792945402743e-05, "loss": 1.1331, "step": 3795 }, { "epoch": 0.14852492370295015, "grad_norm": 0.0, "learning_rate": 1.9272318457276792e-05, "loss": 1.1917, "step": 3796 }, { "epoch": 0.1485640503951796, "grad_norm": 0.0, "learning_rate": 1.9271843820249022e-05, "loss": 1.0818, "step": 3797 }, { "epoch": 0.14860317708740903, "grad_norm": 0.0, "learning_rate": 1.9271369034327062e-05, "loss": 1.2186, "step": 3798 }, { "epoch": 0.14864230377963847, "grad_norm": 0.0, "learning_rate": 1.927089409951853e-05, "loss": 1.0901, "step": 3799 }, { "epoch": 0.1486814304718679, "grad_norm": 0.0, "learning_rate": 1.9270419015831056e-05, "loss": 1.0841, "step": 3800 }, { "epoch": 0.14872055716409735, "grad_norm": 0.0, "learning_rate": 1.926994378327227e-05, "loss": 1.2695, "step": 3801 }, { "epoch": 0.1487596838563268, "grad_norm": 0.0, "learning_rate": 1.92694684018498e-05, "loss": 1.1721, "step": 3802 }, { "epoch": 0.14879881054855623, "grad_norm": 0.0, "learning_rate": 1.9268992871571284e-05, "loss": 1.1616, "step": 3803 }, { "epoch": 0.14883793724078567, "grad_norm": 0.0, "learning_rate": 1.926851719244436e-05, "loss": 1.2693, "step": 3804 }, { "epoch": 0.1488770639330151, "grad_norm": 0.0, "learning_rate": 1.9268041364476662e-05, "loss": 1.1821, "step": 3805 }, { "epoch": 0.14891619062524455, "grad_norm": 0.0, "learning_rate": 1.9267565387675832e-05, "loss": 1.0999, "step": 3806 }, { "epoch": 0.14895531731747397, "grad_norm": 0.0, "learning_rate": 1.9267089262049517e-05, "loss": 1.2307, "step": 3807 }, { "epoch": 0.1489944440097034, "grad_norm": 0.0, "learning_rate": 1.926661298760536e-05, "loss": 1.1554, "step": 3808 }, { "epoch": 0.14903357070193285, "grad_norm": 0.0, "learning_rate": 1.926613656435101e-05, "loss": 1.2624, "step": 3809 }, { "epoch": 0.1490726973941623, "grad_norm": 0.0, "learning_rate": 1.926565999229412e-05, "loss": 1.1295, "step": 3810 }, { "epoch": 0.14911182408639173, "grad_norm": 0.0, "learning_rate": 1.9265183271442336e-05, "loss": 1.2524, "step": 3811 }, { "epoch": 0.14915095077862117, "grad_norm": 0.0, "learning_rate": 1.926470640180332e-05, "loss": 1.2985, "step": 3812 }, { "epoch": 0.1491900774708506, "grad_norm": 0.0, "learning_rate": 1.9264229383384734e-05, "loss": 1.0598, "step": 3813 }, { "epoch": 0.14922920416308005, "grad_norm": 0.0, "learning_rate": 1.926375221619423e-05, "loss": 1.2234, "step": 3814 }, { "epoch": 0.1492683308553095, "grad_norm": 0.0, "learning_rate": 1.926327490023947e-05, "loss": 1.2546, "step": 3815 }, { "epoch": 0.14930745754753894, "grad_norm": 0.0, "learning_rate": 1.9262797435528126e-05, "loss": 1.2014, "step": 3816 }, { "epoch": 0.14934658423976838, "grad_norm": 0.0, "learning_rate": 1.926231982206786e-05, "loss": 1.1225, "step": 3817 }, { "epoch": 0.14938571093199782, "grad_norm": 0.0, "learning_rate": 1.9261842059866345e-05, "loss": 1.1714, "step": 3818 }, { "epoch": 0.14942483762422726, "grad_norm": 0.0, "learning_rate": 1.926136414893125e-05, "loss": 1.2579, "step": 3819 }, { "epoch": 0.1494639643164567, "grad_norm": 0.0, "learning_rate": 1.9260886089270258e-05, "loss": 1.0762, "step": 3820 }, { "epoch": 0.1495030910086861, "grad_norm": 0.0, "learning_rate": 1.9260407880891035e-05, "loss": 1.2043, "step": 3821 }, { "epoch": 0.14954221770091555, "grad_norm": 0.0, "learning_rate": 1.9259929523801266e-05, "loss": 1.097, "step": 3822 }, { "epoch": 0.149581344393145, "grad_norm": 0.0, "learning_rate": 1.9259451018008632e-05, "loss": 1.1255, "step": 3823 }, { "epoch": 0.14962047108537443, "grad_norm": 0.0, "learning_rate": 1.925897236352082e-05, "loss": 1.1549, "step": 3824 }, { "epoch": 0.14965959777760388, "grad_norm": 0.0, "learning_rate": 1.925849356034551e-05, "loss": 1.2953, "step": 3825 }, { "epoch": 0.14969872446983332, "grad_norm": 0.0, "learning_rate": 1.92580146084904e-05, "loss": 1.2635, "step": 3826 }, { "epoch": 0.14973785116206276, "grad_norm": 0.0, "learning_rate": 1.925753550796317e-05, "loss": 1.2186, "step": 3827 }, { "epoch": 0.1497769778542922, "grad_norm": 0.0, "learning_rate": 1.9257056258771525e-05, "loss": 1.1989, "step": 3828 }, { "epoch": 0.14981610454652164, "grad_norm": 0.0, "learning_rate": 1.925657686092315e-05, "loss": 1.1406, "step": 3829 }, { "epoch": 0.14985523123875108, "grad_norm": 0.0, "learning_rate": 1.925609731442576e-05, "loss": 1.2759, "step": 3830 }, { "epoch": 0.14989435793098052, "grad_norm": 0.0, "learning_rate": 1.925561761928704e-05, "loss": 1.1598, "step": 3831 }, { "epoch": 0.14993348462320996, "grad_norm": 0.0, "learning_rate": 1.92551377755147e-05, "loss": 1.2648, "step": 3832 }, { "epoch": 0.1499726113154394, "grad_norm": 0.0, "learning_rate": 1.925465778311644e-05, "loss": 1.2142, "step": 3833 }, { "epoch": 0.15001173800766884, "grad_norm": 0.0, "learning_rate": 1.925417764209998e-05, "loss": 1.2515, "step": 3834 }, { "epoch": 0.15005086469989828, "grad_norm": 0.0, "learning_rate": 1.925369735247302e-05, "loss": 1.1108, "step": 3835 }, { "epoch": 0.1500899913921277, "grad_norm": 0.0, "learning_rate": 1.9253216914243275e-05, "loss": 1.1712, "step": 3836 }, { "epoch": 0.15012911808435714, "grad_norm": 0.0, "learning_rate": 1.925273632741846e-05, "loss": 1.2311, "step": 3837 }, { "epoch": 0.15016824477658658, "grad_norm": 0.0, "learning_rate": 1.9252255592006298e-05, "loss": 1.2065, "step": 3838 }, { "epoch": 0.15020737146881602, "grad_norm": 0.0, "learning_rate": 1.92517747080145e-05, "loss": 1.1738, "step": 3839 }, { "epoch": 0.15024649816104546, "grad_norm": 0.0, "learning_rate": 1.92512936754508e-05, "loss": 1.1854, "step": 3840 }, { "epoch": 0.1502856248532749, "grad_norm": 0.0, "learning_rate": 1.925081249432291e-05, "loss": 1.1276, "step": 3841 }, { "epoch": 0.15032475154550434, "grad_norm": 0.0, "learning_rate": 1.9250331164638566e-05, "loss": 1.1388, "step": 3842 }, { "epoch": 0.15036387823773378, "grad_norm": 0.0, "learning_rate": 1.9249849686405496e-05, "loss": 1.2111, "step": 3843 }, { "epoch": 0.15040300492996322, "grad_norm": 0.0, "learning_rate": 1.924936805963143e-05, "loss": 1.2115, "step": 3844 }, { "epoch": 0.15044213162219267, "grad_norm": 0.0, "learning_rate": 1.92488862843241e-05, "loss": 1.1249, "step": 3845 }, { "epoch": 0.1504812583144221, "grad_norm": 0.0, "learning_rate": 1.9248404360491247e-05, "loss": 1.2242, "step": 3846 }, { "epoch": 0.15052038500665155, "grad_norm": 0.0, "learning_rate": 1.924792228814061e-05, "loss": 1.1333, "step": 3847 }, { "epoch": 0.150559511698881, "grad_norm": 0.0, "learning_rate": 1.924744006727993e-05, "loss": 1.2502, "step": 3848 }, { "epoch": 0.15059863839111043, "grad_norm": 0.0, "learning_rate": 1.9246957697916947e-05, "loss": 1.1507, "step": 3849 }, { "epoch": 0.15063776508333984, "grad_norm": 0.0, "learning_rate": 1.9246475180059416e-05, "loss": 1.2394, "step": 3850 }, { "epoch": 0.15067689177556928, "grad_norm": 0.0, "learning_rate": 1.9245992513715074e-05, "loss": 1.034, "step": 3851 }, { "epoch": 0.15071601846779872, "grad_norm": 0.0, "learning_rate": 1.924550969889168e-05, "loss": 1.0717, "step": 3852 }, { "epoch": 0.15075514516002816, "grad_norm": 0.0, "learning_rate": 1.924502673559699e-05, "loss": 1.2034, "step": 3853 }, { "epoch": 0.1507942718522576, "grad_norm": 0.0, "learning_rate": 1.924454362383875e-05, "loss": 1.2368, "step": 3854 }, { "epoch": 0.15083339854448705, "grad_norm": 0.0, "learning_rate": 1.9244060363624727e-05, "loss": 1.186, "step": 3855 }, { "epoch": 0.1508725252367165, "grad_norm": 0.0, "learning_rate": 1.9243576954962676e-05, "loss": 1.3087, "step": 3856 }, { "epoch": 0.15091165192894593, "grad_norm": 0.0, "learning_rate": 1.924309339786036e-05, "loss": 1.1306, "step": 3857 }, { "epoch": 0.15095077862117537, "grad_norm": 0.0, "learning_rate": 1.924260969232555e-05, "loss": 1.2231, "step": 3858 }, { "epoch": 0.1509899053134048, "grad_norm": 0.0, "learning_rate": 1.924212583836601e-05, "loss": 1.2237, "step": 3859 }, { "epoch": 0.15102903200563425, "grad_norm": 0.0, "learning_rate": 1.9241641835989506e-05, "loss": 1.2693, "step": 3860 }, { "epoch": 0.1510681586978637, "grad_norm": 0.0, "learning_rate": 1.9241157685203817e-05, "loss": 1.2078, "step": 3861 }, { "epoch": 0.15110728539009313, "grad_norm": 0.0, "learning_rate": 1.924067338601672e-05, "loss": 1.2562, "step": 3862 }, { "epoch": 0.15114641208232257, "grad_norm": 0.0, "learning_rate": 1.924018893843598e-05, "loss": 1.2223, "step": 3863 }, { "epoch": 0.151185538774552, "grad_norm": 0.0, "learning_rate": 1.923970434246939e-05, "loss": 1.2868, "step": 3864 }, { "epoch": 0.15122466546678143, "grad_norm": 0.0, "learning_rate": 1.9239219598124722e-05, "loss": 1.2565, "step": 3865 }, { "epoch": 0.15126379215901087, "grad_norm": 0.0, "learning_rate": 1.9238734705409766e-05, "loss": 1.0752, "step": 3866 }, { "epoch": 0.1513029188512403, "grad_norm": 0.0, "learning_rate": 1.9238249664332307e-05, "loss": 1.2852, "step": 3867 }, { "epoch": 0.15134204554346975, "grad_norm": 0.0, "learning_rate": 1.9237764474900137e-05, "loss": 1.1938, "step": 3868 }, { "epoch": 0.1513811722356992, "grad_norm": 0.0, "learning_rate": 1.9237279137121043e-05, "loss": 1.2128, "step": 3869 }, { "epoch": 0.15142029892792863, "grad_norm": 0.0, "learning_rate": 1.923679365100282e-05, "loss": 1.0651, "step": 3870 }, { "epoch": 0.15145942562015807, "grad_norm": 0.0, "learning_rate": 1.9236308016553266e-05, "loss": 1.104, "step": 3871 }, { "epoch": 0.1514985523123875, "grad_norm": 0.0, "learning_rate": 1.9235822233780182e-05, "loss": 1.1729, "step": 3872 }, { "epoch": 0.15153767900461695, "grad_norm": 0.0, "learning_rate": 1.923533630269136e-05, "loss": 1.2643, "step": 3873 }, { "epoch": 0.1515768056968464, "grad_norm": 0.0, "learning_rate": 1.9234850223294613e-05, "loss": 1.1589, "step": 3874 }, { "epoch": 0.15161593238907584, "grad_norm": 0.0, "learning_rate": 1.9234363995597748e-05, "loss": 1.1191, "step": 3875 }, { "epoch": 0.15165505908130528, "grad_norm": 0.0, "learning_rate": 1.9233877619608564e-05, "loss": 1.0532, "step": 3876 }, { "epoch": 0.15169418577353472, "grad_norm": 0.0, "learning_rate": 1.9233391095334876e-05, "loss": 1.2513, "step": 3877 }, { "epoch": 0.15173331246576413, "grad_norm": 0.0, "learning_rate": 1.9232904422784496e-05, "loss": 1.2178, "step": 3878 }, { "epoch": 0.15177243915799357, "grad_norm": 0.0, "learning_rate": 1.923241760196524e-05, "loss": 1.2234, "step": 3879 }, { "epoch": 0.151811565850223, "grad_norm": 0.0, "learning_rate": 1.923193063288493e-05, "loss": 1.1938, "step": 3880 }, { "epoch": 0.15185069254245245, "grad_norm": 0.0, "learning_rate": 1.9231443515551382e-05, "loss": 1.1136, "step": 3881 }, { "epoch": 0.1518898192346819, "grad_norm": 0.0, "learning_rate": 1.9230956249972415e-05, "loss": 1.1674, "step": 3882 }, { "epoch": 0.15192894592691134, "grad_norm": 0.0, "learning_rate": 1.923046883615586e-05, "loss": 1.0928, "step": 3883 }, { "epoch": 0.15196807261914078, "grad_norm": 0.0, "learning_rate": 1.922998127410954e-05, "loss": 1.1082, "step": 3884 }, { "epoch": 0.15200719931137022, "grad_norm": 0.0, "learning_rate": 1.922949356384129e-05, "loss": 1.2875, "step": 3885 }, { "epoch": 0.15204632600359966, "grad_norm": 0.0, "learning_rate": 1.9229005705358937e-05, "loss": 1.1799, "step": 3886 }, { "epoch": 0.1520854526958291, "grad_norm": 0.0, "learning_rate": 1.9228517698670316e-05, "loss": 1.1807, "step": 3887 }, { "epoch": 0.15212457938805854, "grad_norm": 0.0, "learning_rate": 1.9228029543783265e-05, "loss": 1.1824, "step": 3888 }, { "epoch": 0.15216370608028798, "grad_norm": 0.0, "learning_rate": 1.9227541240705622e-05, "loss": 1.264, "step": 3889 }, { "epoch": 0.15220283277251742, "grad_norm": 0.0, "learning_rate": 1.9227052789445233e-05, "loss": 1.2039, "step": 3890 }, { "epoch": 0.15224195946474686, "grad_norm": 0.0, "learning_rate": 1.9226564190009935e-05, "loss": 1.3123, "step": 3891 }, { "epoch": 0.1522810861569763, "grad_norm": 0.0, "learning_rate": 1.9226075442407582e-05, "loss": 1.1701, "step": 3892 }, { "epoch": 0.15232021284920572, "grad_norm": 0.0, "learning_rate": 1.9225586546646014e-05, "loss": 1.2052, "step": 3893 }, { "epoch": 0.15235933954143516, "grad_norm": 0.0, "learning_rate": 1.9225097502733087e-05, "loss": 1.418, "step": 3894 }, { "epoch": 0.1523984662336646, "grad_norm": 0.0, "learning_rate": 1.9224608310676653e-05, "loss": 1.2289, "step": 3895 }, { "epoch": 0.15243759292589404, "grad_norm": 0.0, "learning_rate": 1.9224118970484568e-05, "loss": 1.2732, "step": 3896 }, { "epoch": 0.15247671961812348, "grad_norm": 0.0, "learning_rate": 1.9223629482164695e-05, "loss": 1.2563, "step": 3897 }, { "epoch": 0.15251584631035292, "grad_norm": 0.0, "learning_rate": 1.922313984572489e-05, "loss": 1.2404, "step": 3898 }, { "epoch": 0.15255497300258236, "grad_norm": 0.0, "learning_rate": 1.9222650061173012e-05, "loss": 1.1989, "step": 3899 }, { "epoch": 0.1525940996948118, "grad_norm": 0.0, "learning_rate": 1.9222160128516932e-05, "loss": 1.1565, "step": 3900 }, { "epoch": 0.15263322638704124, "grad_norm": 0.0, "learning_rate": 1.9221670047764517e-05, "loss": 1.2963, "step": 3901 }, { "epoch": 0.15267235307927068, "grad_norm": 0.0, "learning_rate": 1.9221179818923638e-05, "loss": 1.209, "step": 3902 }, { "epoch": 0.15271147977150012, "grad_norm": 0.0, "learning_rate": 1.9220689442002165e-05, "loss": 1.2054, "step": 3903 }, { "epoch": 0.15275060646372957, "grad_norm": 0.0, "learning_rate": 1.9220198917007976e-05, "loss": 1.1794, "step": 3904 }, { "epoch": 0.152789733155959, "grad_norm": 0.0, "learning_rate": 1.921970824394894e-05, "loss": 1.1905, "step": 3905 }, { "epoch": 0.15282885984818845, "grad_norm": 0.0, "learning_rate": 1.921921742283295e-05, "loss": 1.2788, "step": 3906 }, { "epoch": 0.15286798654041786, "grad_norm": 0.0, "learning_rate": 1.9218726453667872e-05, "loss": 1.2379, "step": 3907 }, { "epoch": 0.1529071132326473, "grad_norm": 0.0, "learning_rate": 1.9218235336461606e-05, "loss": 1.2142, "step": 3908 }, { "epoch": 0.15294623992487674, "grad_norm": 0.0, "learning_rate": 1.9217744071222028e-05, "loss": 1.0614, "step": 3909 }, { "epoch": 0.15298536661710618, "grad_norm": 0.0, "learning_rate": 1.921725265795703e-05, "loss": 1.1804, "step": 3910 }, { "epoch": 0.15302449330933562, "grad_norm": 0.0, "learning_rate": 1.9216761096674505e-05, "loss": 1.2074, "step": 3911 }, { "epoch": 0.15306362000156506, "grad_norm": 0.0, "learning_rate": 1.921626938738235e-05, "loss": 1.1329, "step": 3912 }, { "epoch": 0.1531027466937945, "grad_norm": 0.0, "learning_rate": 1.9215777530088452e-05, "loss": 1.1299, "step": 3913 }, { "epoch": 0.15314187338602395, "grad_norm": 0.0, "learning_rate": 1.9215285524800717e-05, "loss": 1.1452, "step": 3914 }, { "epoch": 0.1531810000782534, "grad_norm": 0.0, "learning_rate": 1.9214793371527043e-05, "loss": 1.1453, "step": 3915 }, { "epoch": 0.15322012677048283, "grad_norm": 0.0, "learning_rate": 1.9214301070275335e-05, "loss": 1.1331, "step": 3916 }, { "epoch": 0.15325925346271227, "grad_norm": 0.0, "learning_rate": 1.92138086210535e-05, "loss": 1.2079, "step": 3917 }, { "epoch": 0.1532983801549417, "grad_norm": 0.0, "learning_rate": 1.921331602386944e-05, "loss": 1.2784, "step": 3918 }, { "epoch": 0.15333750684717115, "grad_norm": 0.0, "learning_rate": 1.921282327873107e-05, "loss": 1.2344, "step": 3919 }, { "epoch": 0.1533766335394006, "grad_norm": 0.0, "learning_rate": 1.9212330385646306e-05, "loss": 1.1358, "step": 3920 }, { "epoch": 0.15341576023163, "grad_norm": 0.0, "learning_rate": 1.921183734462306e-05, "loss": 1.1962, "step": 3921 }, { "epoch": 0.15345488692385945, "grad_norm": 0.0, "learning_rate": 1.9211344155669244e-05, "loss": 1.2269, "step": 3922 }, { "epoch": 0.1534940136160889, "grad_norm": 0.0, "learning_rate": 1.9210850818792787e-05, "loss": 1.2086, "step": 3923 }, { "epoch": 0.15353314030831833, "grad_norm": 0.0, "learning_rate": 1.921035733400161e-05, "loss": 1.0941, "step": 3924 }, { "epoch": 0.15357226700054777, "grad_norm": 0.0, "learning_rate": 1.920986370130363e-05, "loss": 1.2756, "step": 3925 }, { "epoch": 0.1536113936927772, "grad_norm": 0.0, "learning_rate": 1.9209369920706783e-05, "loss": 1.2045, "step": 3926 }, { "epoch": 0.15365052038500665, "grad_norm": 0.0, "learning_rate": 1.9208875992218995e-05, "loss": 1.2863, "step": 3927 }, { "epoch": 0.1536896470772361, "grad_norm": 0.0, "learning_rate": 1.9208381915848196e-05, "loss": 1.1957, "step": 3928 }, { "epoch": 0.15372877376946553, "grad_norm": 0.0, "learning_rate": 1.9207887691602324e-05, "loss": 1.0756, "step": 3929 }, { "epoch": 0.15376790046169497, "grad_norm": 0.0, "learning_rate": 1.9207393319489314e-05, "loss": 1.1996, "step": 3930 }, { "epoch": 0.1538070271539244, "grad_norm": 0.0, "learning_rate": 1.9206898799517105e-05, "loss": 1.2125, "step": 3931 }, { "epoch": 0.15384615384615385, "grad_norm": 0.0, "learning_rate": 1.9206404131693634e-05, "loss": 1.167, "step": 3932 }, { "epoch": 0.1538852805383833, "grad_norm": 0.0, "learning_rate": 1.9205909316026854e-05, "loss": 1.2745, "step": 3933 }, { "epoch": 0.15392440723061274, "grad_norm": 0.0, "learning_rate": 1.9205414352524703e-05, "loss": 1.231, "step": 3934 }, { "epoch": 0.15396353392284215, "grad_norm": 0.0, "learning_rate": 1.9204919241195134e-05, "loss": 1.1247, "step": 3935 }, { "epoch": 0.1540026606150716, "grad_norm": 0.0, "learning_rate": 1.9204423982046097e-05, "loss": 1.2279, "step": 3936 }, { "epoch": 0.15404178730730103, "grad_norm": 0.0, "learning_rate": 1.9203928575085543e-05, "loss": 1.0684, "step": 3937 }, { "epoch": 0.15408091399953047, "grad_norm": 0.0, "learning_rate": 1.920343302032143e-05, "loss": 1.1102, "step": 3938 }, { "epoch": 0.1541200406917599, "grad_norm": 0.0, "learning_rate": 1.9202937317761713e-05, "loss": 1.22, "step": 3939 }, { "epoch": 0.15415916738398935, "grad_norm": 0.0, "learning_rate": 1.920244146741436e-05, "loss": 1.061, "step": 3940 }, { "epoch": 0.1541982940762188, "grad_norm": 0.0, "learning_rate": 1.9201945469287325e-05, "loss": 1.2156, "step": 3941 }, { "epoch": 0.15423742076844824, "grad_norm": 0.0, "learning_rate": 1.9201449323388573e-05, "loss": 1.3179, "step": 3942 }, { "epoch": 0.15427654746067768, "grad_norm": 0.0, "learning_rate": 1.9200953029726082e-05, "loss": 1.2757, "step": 3943 }, { "epoch": 0.15431567415290712, "grad_norm": 0.0, "learning_rate": 1.920045658830781e-05, "loss": 1.1016, "step": 3944 }, { "epoch": 0.15435480084513656, "grad_norm": 0.0, "learning_rate": 1.9199959999141735e-05, "loss": 1.2632, "step": 3945 }, { "epoch": 0.154393927537366, "grad_norm": 0.0, "learning_rate": 1.919946326223583e-05, "loss": 0.9958, "step": 3946 }, { "epoch": 0.15443305422959544, "grad_norm": 0.0, "learning_rate": 1.9198966377598073e-05, "loss": 1.1239, "step": 3947 }, { "epoch": 0.15447218092182488, "grad_norm": 0.0, "learning_rate": 1.9198469345236444e-05, "loss": 1.2368, "step": 3948 }, { "epoch": 0.1545113076140543, "grad_norm": 0.0, "learning_rate": 1.9197972165158924e-05, "loss": 1.2322, "step": 3949 }, { "epoch": 0.15455043430628373, "grad_norm": 0.0, "learning_rate": 1.9197474837373495e-05, "loss": 1.0769, "step": 3950 }, { "epoch": 0.15458956099851318, "grad_norm": 0.0, "learning_rate": 1.919697736188815e-05, "loss": 1.1706, "step": 3951 }, { "epoch": 0.15462868769074262, "grad_norm": 0.0, "learning_rate": 1.9196479738710865e-05, "loss": 1.123, "step": 3952 }, { "epoch": 0.15466781438297206, "grad_norm": 0.0, "learning_rate": 1.9195981967849643e-05, "loss": 1.1554, "step": 3953 }, { "epoch": 0.1547069410752015, "grad_norm": 0.0, "learning_rate": 1.9195484049312475e-05, "loss": 1.1396, "step": 3954 }, { "epoch": 0.15474606776743094, "grad_norm": 0.0, "learning_rate": 1.9194985983107354e-05, "loss": 1.2631, "step": 3955 }, { "epoch": 0.15478519445966038, "grad_norm": 0.0, "learning_rate": 1.9194487769242283e-05, "loss": 1.1077, "step": 3956 }, { "epoch": 0.15482432115188982, "grad_norm": 0.0, "learning_rate": 1.9193989407725255e-05, "loss": 1.1094, "step": 3957 }, { "epoch": 0.15486344784411926, "grad_norm": 0.0, "learning_rate": 1.919349089856428e-05, "loss": 1.176, "step": 3958 }, { "epoch": 0.1549025745363487, "grad_norm": 0.0, "learning_rate": 1.9192992241767363e-05, "loss": 1.2145, "step": 3959 }, { "epoch": 0.15494170122857814, "grad_norm": 0.0, "learning_rate": 1.9192493437342508e-05, "loss": 1.2012, "step": 3960 }, { "epoch": 0.15498082792080758, "grad_norm": 0.0, "learning_rate": 1.9191994485297727e-05, "loss": 1.1234, "step": 3961 }, { "epoch": 0.15501995461303703, "grad_norm": 0.0, "learning_rate": 1.9191495385641037e-05, "loss": 1.2312, "step": 3962 }, { "epoch": 0.15505908130526647, "grad_norm": 0.0, "learning_rate": 1.9190996138380446e-05, "loss": 1.0781, "step": 3963 }, { "epoch": 0.15509820799749588, "grad_norm": 0.0, "learning_rate": 1.919049674352397e-05, "loss": 1.1855, "step": 3964 }, { "epoch": 0.15513733468972532, "grad_norm": 0.0, "learning_rate": 1.9189997201079638e-05, "loss": 1.1898, "step": 3965 }, { "epoch": 0.15517646138195476, "grad_norm": 0.0, "learning_rate": 1.9189497511055464e-05, "loss": 1.145, "step": 3966 }, { "epoch": 0.1552155880741842, "grad_norm": 0.0, "learning_rate": 1.9188997673459475e-05, "loss": 1.1259, "step": 3967 }, { "epoch": 0.15525471476641364, "grad_norm": 0.0, "learning_rate": 1.9188497688299697e-05, "loss": 1.2398, "step": 3968 }, { "epoch": 0.15529384145864308, "grad_norm": 0.0, "learning_rate": 1.9187997555584165e-05, "loss": 1.1325, "step": 3969 }, { "epoch": 0.15533296815087252, "grad_norm": 0.0, "learning_rate": 1.91874972753209e-05, "loss": 1.1984, "step": 3970 }, { "epoch": 0.15537209484310197, "grad_norm": 0.0, "learning_rate": 1.9186996847517945e-05, "loss": 1.2748, "step": 3971 }, { "epoch": 0.1554112215353314, "grad_norm": 0.0, "learning_rate": 1.918649627218333e-05, "loss": 1.1505, "step": 3972 }, { "epoch": 0.15545034822756085, "grad_norm": 0.0, "learning_rate": 1.9185995549325097e-05, "loss": 1.2668, "step": 3973 }, { "epoch": 0.1554894749197903, "grad_norm": 0.0, "learning_rate": 1.9185494678951285e-05, "loss": 1.124, "step": 3974 }, { "epoch": 0.15552860161201973, "grad_norm": 0.0, "learning_rate": 1.918499366106994e-05, "loss": 1.3634, "step": 3975 }, { "epoch": 0.15556772830424917, "grad_norm": 0.0, "learning_rate": 1.918449249568911e-05, "loss": 1.2705, "step": 3976 }, { "epoch": 0.1556068549964786, "grad_norm": 0.0, "learning_rate": 1.9183991182816832e-05, "loss": 1.0793, "step": 3977 }, { "epoch": 0.15564598168870802, "grad_norm": 0.0, "learning_rate": 1.9183489722461167e-05, "loss": 1.0814, "step": 3978 }, { "epoch": 0.15568510838093746, "grad_norm": 0.0, "learning_rate": 1.9182988114630164e-05, "loss": 1.1106, "step": 3979 }, { "epoch": 0.1557242350731669, "grad_norm": 0.0, "learning_rate": 1.918248635933188e-05, "loss": 1.1316, "step": 3980 }, { "epoch": 0.15576336176539635, "grad_norm": 0.0, "learning_rate": 1.918198445657437e-05, "loss": 1.1895, "step": 3981 }, { "epoch": 0.1558024884576258, "grad_norm": 0.0, "learning_rate": 1.91814824063657e-05, "loss": 1.1743, "step": 3982 }, { "epoch": 0.15584161514985523, "grad_norm": 0.0, "learning_rate": 1.9180980208713923e-05, "loss": 1.2478, "step": 3983 }, { "epoch": 0.15588074184208467, "grad_norm": 0.0, "learning_rate": 1.918047786362711e-05, "loss": 1.2704, "step": 3984 }, { "epoch": 0.1559198685343141, "grad_norm": 0.0, "learning_rate": 1.9179975371113324e-05, "loss": 1.2409, "step": 3985 }, { "epoch": 0.15595899522654355, "grad_norm": 0.0, "learning_rate": 1.9179472731180638e-05, "loss": 1.2106, "step": 3986 }, { "epoch": 0.155998121918773, "grad_norm": 0.0, "learning_rate": 1.9178969943837123e-05, "loss": 1.078, "step": 3987 }, { "epoch": 0.15603724861100243, "grad_norm": 0.0, "learning_rate": 1.917846700909085e-05, "loss": 1.1657, "step": 3988 }, { "epoch": 0.15607637530323187, "grad_norm": 0.0, "learning_rate": 1.9177963926949903e-05, "loss": 1.1831, "step": 3989 }, { "epoch": 0.15611550199546131, "grad_norm": 0.0, "learning_rate": 1.9177460697422356e-05, "loss": 1.1744, "step": 3990 }, { "epoch": 0.15615462868769076, "grad_norm": 0.0, "learning_rate": 1.9176957320516287e-05, "loss": 1.1015, "step": 3991 }, { "epoch": 0.15619375537992017, "grad_norm": 0.0, "learning_rate": 1.917645379623978e-05, "loss": 1.1522, "step": 3992 }, { "epoch": 0.1562328820721496, "grad_norm": 0.0, "learning_rate": 1.9175950124600926e-05, "loss": 1.2601, "step": 3993 }, { "epoch": 0.15627200876437905, "grad_norm": 0.0, "learning_rate": 1.9175446305607816e-05, "loss": 1.2673, "step": 3994 }, { "epoch": 0.1563111354566085, "grad_norm": 0.0, "learning_rate": 1.917494233926853e-05, "loss": 1.1482, "step": 3995 }, { "epoch": 0.15635026214883793, "grad_norm": 0.0, "learning_rate": 1.917443822559117e-05, "loss": 1.2053, "step": 3996 }, { "epoch": 0.15638938884106737, "grad_norm": 0.0, "learning_rate": 1.9173933964583824e-05, "loss": 1.2182, "step": 3997 }, { "epoch": 0.1564285155332968, "grad_norm": 0.0, "learning_rate": 1.9173429556254598e-05, "loss": 1.0339, "step": 3998 }, { "epoch": 0.15646764222552625, "grad_norm": 0.0, "learning_rate": 1.9172925000611587e-05, "loss": 1.1821, "step": 3999 }, { "epoch": 0.1565067689177557, "grad_norm": 0.0, "learning_rate": 1.9172420297662896e-05, "loss": 1.0907, "step": 4000 }, { "epoch": 0.15654589560998514, "grad_norm": 0.0, "learning_rate": 1.917191544741663e-05, "loss": 1.1224, "step": 4001 }, { "epoch": 0.15658502230221458, "grad_norm": 0.0, "learning_rate": 1.917141044988089e-05, "loss": 1.1597, "step": 4002 }, { "epoch": 0.15662414899444402, "grad_norm": 0.0, "learning_rate": 1.9170905305063795e-05, "loss": 1.1562, "step": 4003 }, { "epoch": 0.15666327568667346, "grad_norm": 0.0, "learning_rate": 1.917040001297345e-05, "loss": 1.0906, "step": 4004 }, { "epoch": 0.1567024023789029, "grad_norm": 0.0, "learning_rate": 1.9169894573617974e-05, "loss": 1.1159, "step": 4005 }, { "epoch": 0.1567415290711323, "grad_norm": 0.0, "learning_rate": 1.916938898700548e-05, "loss": 1.0933, "step": 4006 }, { "epoch": 0.15678065576336175, "grad_norm": 0.0, "learning_rate": 1.916888325314409e-05, "loss": 1.3438, "step": 4007 }, { "epoch": 0.1568197824555912, "grad_norm": 0.0, "learning_rate": 1.9168377372041924e-05, "loss": 1.1796, "step": 4008 }, { "epoch": 0.15685890914782064, "grad_norm": 0.0, "learning_rate": 1.9167871343707105e-05, "loss": 1.1618, "step": 4009 }, { "epoch": 0.15689803584005008, "grad_norm": 0.0, "learning_rate": 1.9167365168147763e-05, "loss": 1.1956, "step": 4010 }, { "epoch": 0.15693716253227952, "grad_norm": 0.0, "learning_rate": 1.916685884537202e-05, "loss": 1.221, "step": 4011 }, { "epoch": 0.15697628922450896, "grad_norm": 0.0, "learning_rate": 1.9166352375388013e-05, "loss": 1.1706, "step": 4012 }, { "epoch": 0.1570154159167384, "grad_norm": 0.0, "learning_rate": 1.9165845758203872e-05, "loss": 1.1896, "step": 4013 }, { "epoch": 0.15705454260896784, "grad_norm": 0.0, "learning_rate": 1.9165338993827736e-05, "loss": 1.2356, "step": 4014 }, { "epoch": 0.15709366930119728, "grad_norm": 0.0, "learning_rate": 1.9164832082267737e-05, "loss": 1.191, "step": 4015 }, { "epoch": 0.15713279599342672, "grad_norm": 0.0, "learning_rate": 1.916432502353202e-05, "loss": 1.2, "step": 4016 }, { "epoch": 0.15717192268565616, "grad_norm": 0.0, "learning_rate": 1.9163817817628728e-05, "loss": 1.1403, "step": 4017 }, { "epoch": 0.1572110493778856, "grad_norm": 0.0, "learning_rate": 1.9163310464566e-05, "loss": 1.1756, "step": 4018 }, { "epoch": 0.15725017607011504, "grad_norm": 0.0, "learning_rate": 1.9162802964351995e-05, "loss": 1.2119, "step": 4019 }, { "epoch": 0.15728930276234449, "grad_norm": 0.0, "learning_rate": 1.9162295316994854e-05, "loss": 1.1672, "step": 4020 }, { "epoch": 0.1573284294545739, "grad_norm": 0.0, "learning_rate": 1.9161787522502727e-05, "loss": 1.199, "step": 4021 }, { "epoch": 0.15736755614680334, "grad_norm": 0.0, "learning_rate": 1.916127958088378e-05, "loss": 1.1951, "step": 4022 }, { "epoch": 0.15740668283903278, "grad_norm": 0.0, "learning_rate": 1.9160771492146158e-05, "loss": 1.1595, "step": 4023 }, { "epoch": 0.15744580953126222, "grad_norm": 0.0, "learning_rate": 1.9160263256298023e-05, "loss": 1.1675, "step": 4024 }, { "epoch": 0.15748493622349166, "grad_norm": 0.0, "learning_rate": 1.9159754873347544e-05, "loss": 1.2214, "step": 4025 }, { "epoch": 0.1575240629157211, "grad_norm": 0.0, "learning_rate": 1.9159246343302874e-05, "loss": 1.1345, "step": 4026 }, { "epoch": 0.15756318960795054, "grad_norm": 0.0, "learning_rate": 1.915873766617219e-05, "loss": 1.1758, "step": 4027 }, { "epoch": 0.15760231630017998, "grad_norm": 0.0, "learning_rate": 1.915822884196365e-05, "loss": 1.2819, "step": 4028 }, { "epoch": 0.15764144299240943, "grad_norm": 0.0, "learning_rate": 1.9157719870685438e-05, "loss": 1.2749, "step": 4029 }, { "epoch": 0.15768056968463887, "grad_norm": 0.0, "learning_rate": 1.9157210752345713e-05, "loss": 1.1726, "step": 4030 }, { "epoch": 0.1577196963768683, "grad_norm": 0.0, "learning_rate": 1.9156701486952663e-05, "loss": 1.1194, "step": 4031 }, { "epoch": 0.15775882306909775, "grad_norm": 0.0, "learning_rate": 1.915619207451446e-05, "loss": 1.1808, "step": 4032 }, { "epoch": 0.1577979497613272, "grad_norm": 0.0, "learning_rate": 1.9155682515039286e-05, "loss": 1.3103, "step": 4033 }, { "epoch": 0.15783707645355663, "grad_norm": 0.0, "learning_rate": 1.915517280853532e-05, "loss": 1.2291, "step": 4034 }, { "epoch": 0.15787620314578604, "grad_norm": 0.0, "learning_rate": 1.9154662955010753e-05, "loss": 1.1607, "step": 4035 }, { "epoch": 0.15791532983801548, "grad_norm": 0.0, "learning_rate": 1.915415295447377e-05, "loss": 1.2983, "step": 4036 }, { "epoch": 0.15795445653024492, "grad_norm": 0.0, "learning_rate": 1.915364280693256e-05, "loss": 1.201, "step": 4037 }, { "epoch": 0.15799358322247437, "grad_norm": 0.0, "learning_rate": 1.915313251239532e-05, "loss": 1.162, "step": 4038 }, { "epoch": 0.1580327099147038, "grad_norm": 0.0, "learning_rate": 1.915262207087024e-05, "loss": 1.0977, "step": 4039 }, { "epoch": 0.15807183660693325, "grad_norm": 0.0, "learning_rate": 1.915211148236552e-05, "loss": 1.1366, "step": 4040 }, { "epoch": 0.1581109632991627, "grad_norm": 0.0, "learning_rate": 1.9151600746889355e-05, "loss": 1.0785, "step": 4041 }, { "epoch": 0.15815008999139213, "grad_norm": 0.0, "learning_rate": 1.915108986444995e-05, "loss": 1.239, "step": 4042 }, { "epoch": 0.15818921668362157, "grad_norm": 0.0, "learning_rate": 1.9150578835055507e-05, "loss": 1.2688, "step": 4043 }, { "epoch": 0.158228343375851, "grad_norm": 0.0, "learning_rate": 1.9150067658714238e-05, "loss": 1.1685, "step": 4044 }, { "epoch": 0.15826747006808045, "grad_norm": 0.0, "learning_rate": 1.9149556335434348e-05, "loss": 1.1905, "step": 4045 }, { "epoch": 0.1583065967603099, "grad_norm": 0.0, "learning_rate": 1.9149044865224047e-05, "loss": 1.1472, "step": 4046 }, { "epoch": 0.15834572345253933, "grad_norm": 0.0, "learning_rate": 1.914853324809155e-05, "loss": 1.1653, "step": 4047 }, { "epoch": 0.15838485014476877, "grad_norm": 0.0, "learning_rate": 1.9148021484045073e-05, "loss": 1.1768, "step": 4048 }, { "epoch": 0.1584239768369982, "grad_norm": 0.0, "learning_rate": 1.9147509573092833e-05, "loss": 1.0132, "step": 4049 }, { "epoch": 0.15846310352922763, "grad_norm": 0.0, "learning_rate": 1.9146997515243054e-05, "loss": 1.3075, "step": 4050 }, { "epoch": 0.15850223022145707, "grad_norm": 0.0, "learning_rate": 1.9146485310503955e-05, "loss": 1.2378, "step": 4051 }, { "epoch": 0.1585413569136865, "grad_norm": 0.0, "learning_rate": 1.9145972958883763e-05, "loss": 1.1958, "step": 4052 }, { "epoch": 0.15858048360591595, "grad_norm": 0.0, "learning_rate": 1.9145460460390707e-05, "loss": 1.1798, "step": 4053 }, { "epoch": 0.1586196102981454, "grad_norm": 0.0, "learning_rate": 1.9144947815033014e-05, "loss": 1.1157, "step": 4054 }, { "epoch": 0.15865873699037483, "grad_norm": 0.0, "learning_rate": 1.9144435022818917e-05, "loss": 1.0374, "step": 4055 }, { "epoch": 0.15869786368260427, "grad_norm": 0.0, "learning_rate": 1.9143922083756656e-05, "loss": 1.2153, "step": 4056 }, { "epoch": 0.15873699037483371, "grad_norm": 0.0, "learning_rate": 1.9143408997854464e-05, "loss": 1.0899, "step": 4057 }, { "epoch": 0.15877611706706315, "grad_norm": 0.0, "learning_rate": 1.914289576512058e-05, "loss": 1.2448, "step": 4058 }, { "epoch": 0.1588152437592926, "grad_norm": 0.0, "learning_rate": 1.9142382385563244e-05, "loss": 1.3151, "step": 4059 }, { "epoch": 0.15885437045152204, "grad_norm": 0.0, "learning_rate": 1.9141868859190706e-05, "loss": 1.2283, "step": 4060 }, { "epoch": 0.15889349714375148, "grad_norm": 0.0, "learning_rate": 1.9141355186011206e-05, "loss": 1.1511, "step": 4061 }, { "epoch": 0.15893262383598092, "grad_norm": 0.0, "learning_rate": 1.9140841366033003e-05, "loss": 1.1904, "step": 4062 }, { "epoch": 0.15897175052821033, "grad_norm": 0.0, "learning_rate": 1.914032739926434e-05, "loss": 1.2993, "step": 4063 }, { "epoch": 0.15901087722043977, "grad_norm": 0.0, "learning_rate": 1.9139813285713468e-05, "loss": 1.2037, "step": 4064 }, { "epoch": 0.1590500039126692, "grad_norm": 0.0, "learning_rate": 1.913929902538865e-05, "loss": 1.2281, "step": 4065 }, { "epoch": 0.15908913060489865, "grad_norm": 0.0, "learning_rate": 1.913878461829814e-05, "loss": 1.2894, "step": 4066 }, { "epoch": 0.1591282572971281, "grad_norm": 0.0, "learning_rate": 1.9138270064450202e-05, "loss": 1.1588, "step": 4067 }, { "epoch": 0.15916738398935754, "grad_norm": 0.0, "learning_rate": 1.9137755363853098e-05, "loss": 1.3121, "step": 4068 }, { "epoch": 0.15920651068158698, "grad_norm": 0.0, "learning_rate": 1.9137240516515094e-05, "loss": 1.2424, "step": 4069 }, { "epoch": 0.15924563737381642, "grad_norm": 0.0, "learning_rate": 1.9136725522444458e-05, "loss": 1.2384, "step": 4070 }, { "epoch": 0.15928476406604586, "grad_norm": 0.0, "learning_rate": 1.9136210381649457e-05, "loss": 1.071, "step": 4071 }, { "epoch": 0.1593238907582753, "grad_norm": 0.0, "learning_rate": 1.9135695094138364e-05, "loss": 1.1059, "step": 4072 }, { "epoch": 0.15936301745050474, "grad_norm": 0.0, "learning_rate": 1.9135179659919458e-05, "loss": 1.2036, "step": 4073 }, { "epoch": 0.15940214414273418, "grad_norm": 0.0, "learning_rate": 1.9134664079001015e-05, "loss": 1.2382, "step": 4074 }, { "epoch": 0.15944127083496362, "grad_norm": 0.0, "learning_rate": 1.9134148351391308e-05, "loss": 1.2312, "step": 4075 }, { "epoch": 0.15948039752719306, "grad_norm": 0.0, "learning_rate": 1.913363247709863e-05, "loss": 1.2003, "step": 4076 }, { "epoch": 0.1595195242194225, "grad_norm": 0.0, "learning_rate": 1.9133116456131256e-05, "loss": 1.1519, "step": 4077 }, { "epoch": 0.15955865091165192, "grad_norm": 0.0, "learning_rate": 1.9132600288497477e-05, "loss": 1.0596, "step": 4078 }, { "epoch": 0.15959777760388136, "grad_norm": 0.0, "learning_rate": 1.913208397420558e-05, "loss": 1.1978, "step": 4079 }, { "epoch": 0.1596369042961108, "grad_norm": 0.0, "learning_rate": 1.913156751326386e-05, "loss": 1.1387, "step": 4080 }, { "epoch": 0.15967603098834024, "grad_norm": 0.0, "learning_rate": 1.9131050905680604e-05, "loss": 1.1583, "step": 4081 }, { "epoch": 0.15971515768056968, "grad_norm": 0.0, "learning_rate": 1.9130534151464116e-05, "loss": 1.2946, "step": 4082 }, { "epoch": 0.15975428437279912, "grad_norm": 0.0, "learning_rate": 1.913001725062269e-05, "loss": 1.0678, "step": 4083 }, { "epoch": 0.15979341106502856, "grad_norm": 0.0, "learning_rate": 1.9129500203164627e-05, "loss": 1.1598, "step": 4084 }, { "epoch": 0.159832537757258, "grad_norm": 0.0, "learning_rate": 1.912898300909823e-05, "loss": 1.1545, "step": 4085 }, { "epoch": 0.15987166444948744, "grad_norm": 0.0, "learning_rate": 1.9128465668431808e-05, "loss": 1.252, "step": 4086 }, { "epoch": 0.15991079114171688, "grad_norm": 0.0, "learning_rate": 1.9127948181173664e-05, "loss": 1.0614, "step": 4087 }, { "epoch": 0.15994991783394633, "grad_norm": 0.0, "learning_rate": 1.912743054733211e-05, "loss": 1.2971, "step": 4088 }, { "epoch": 0.15998904452617577, "grad_norm": 0.0, "learning_rate": 1.912691276691546e-05, "loss": 1.0942, "step": 4089 }, { "epoch": 0.1600281712184052, "grad_norm": 0.0, "learning_rate": 1.9126394839932027e-05, "loss": 1.2647, "step": 4090 }, { "epoch": 0.16006729791063465, "grad_norm": 0.0, "learning_rate": 1.912587676639013e-05, "loss": 1.1896, "step": 4091 }, { "epoch": 0.16010642460286406, "grad_norm": 0.0, "learning_rate": 1.9125358546298084e-05, "loss": 1.1624, "step": 4092 }, { "epoch": 0.1601455512950935, "grad_norm": 0.0, "learning_rate": 1.9124840179664216e-05, "loss": 1.1523, "step": 4093 }, { "epoch": 0.16018467798732294, "grad_norm": 0.0, "learning_rate": 1.9124321666496846e-05, "loss": 1.2122, "step": 4094 }, { "epoch": 0.16022380467955238, "grad_norm": 0.0, "learning_rate": 1.912380300680431e-05, "loss": 1.2114, "step": 4095 }, { "epoch": 0.16026293137178182, "grad_norm": 0.0, "learning_rate": 1.912328420059493e-05, "loss": 1.1309, "step": 4096 }, { "epoch": 0.16030205806401127, "grad_norm": 0.0, "learning_rate": 1.912276524787703e-05, "loss": 1.0958, "step": 4097 }, { "epoch": 0.1603411847562407, "grad_norm": 0.0, "learning_rate": 1.912224614865896e-05, "loss": 1.0923, "step": 4098 }, { "epoch": 0.16038031144847015, "grad_norm": 0.0, "learning_rate": 1.9121726902949043e-05, "loss": 1.1484, "step": 4099 }, { "epoch": 0.1604194381406996, "grad_norm": 0.0, "learning_rate": 1.9121207510755624e-05, "loss": 1.1593, "step": 4100 }, { "epoch": 0.16045856483292903, "grad_norm": 0.0, "learning_rate": 1.912068797208704e-05, "loss": 1.1688, "step": 4101 }, { "epoch": 0.16049769152515847, "grad_norm": 0.0, "learning_rate": 1.9120168286951638e-05, "loss": 1.2585, "step": 4102 }, { "epoch": 0.1605368182173879, "grad_norm": 0.0, "learning_rate": 1.911964845535776e-05, "loss": 1.2526, "step": 4103 }, { "epoch": 0.16057594490961735, "grad_norm": 0.0, "learning_rate": 1.9119128477313757e-05, "loss": 1.1746, "step": 4104 }, { "epoch": 0.1606150716018468, "grad_norm": 0.0, "learning_rate": 1.9118608352827978e-05, "loss": 1.1797, "step": 4105 }, { "epoch": 0.1606541982940762, "grad_norm": 0.0, "learning_rate": 1.9118088081908773e-05, "loss": 1.3448, "step": 4106 }, { "epoch": 0.16069332498630565, "grad_norm": 0.0, "learning_rate": 1.91175676645645e-05, "loss": 1.1989, "step": 4107 }, { "epoch": 0.1607324516785351, "grad_norm": 0.0, "learning_rate": 1.9117047100803513e-05, "loss": 1.2637, "step": 4108 }, { "epoch": 0.16077157837076453, "grad_norm": 0.0, "learning_rate": 1.9116526390634177e-05, "loss": 1.0911, "step": 4109 }, { "epoch": 0.16081070506299397, "grad_norm": 0.0, "learning_rate": 1.9116005534064853e-05, "loss": 1.1855, "step": 4110 }, { "epoch": 0.1608498317552234, "grad_norm": 0.0, "learning_rate": 1.9115484531103895e-05, "loss": 1.2479, "step": 4111 }, { "epoch": 0.16088895844745285, "grad_norm": 0.0, "learning_rate": 1.9114963381759685e-05, "loss": 1.1398, "step": 4112 }, { "epoch": 0.1609280851396823, "grad_norm": 0.0, "learning_rate": 1.9114442086040583e-05, "loss": 1.134, "step": 4113 }, { "epoch": 0.16096721183191173, "grad_norm": 0.0, "learning_rate": 1.9113920643954963e-05, "loss": 1.0888, "step": 4114 }, { "epoch": 0.16100633852414117, "grad_norm": 0.0, "learning_rate": 1.9113399055511195e-05, "loss": 1.2395, "step": 4115 }, { "epoch": 0.16104546521637061, "grad_norm": 0.0, "learning_rate": 1.9112877320717657e-05, "loss": 1.2004, "step": 4116 }, { "epoch": 0.16108459190860006, "grad_norm": 0.0, "learning_rate": 1.9112355439582734e-05, "loss": 1.2437, "step": 4117 }, { "epoch": 0.1611237186008295, "grad_norm": 0.0, "learning_rate": 1.9111833412114796e-05, "loss": 1.1625, "step": 4118 }, { "epoch": 0.16116284529305894, "grad_norm": 0.0, "learning_rate": 1.9111311238322235e-05, "loss": 1.1535, "step": 4119 }, { "epoch": 0.16120197198528835, "grad_norm": 0.0, "learning_rate": 1.911078891821343e-05, "loss": 1.08, "step": 4120 }, { "epoch": 0.1612410986775178, "grad_norm": 0.0, "learning_rate": 1.9110266451796772e-05, "loss": 1.2352, "step": 4121 }, { "epoch": 0.16128022536974723, "grad_norm": 0.0, "learning_rate": 1.910974383908065e-05, "loss": 1.2636, "step": 4122 }, { "epoch": 0.16131935206197667, "grad_norm": 0.0, "learning_rate": 1.9109221080073457e-05, "loss": 1.2855, "step": 4123 }, { "epoch": 0.1613584787542061, "grad_norm": 0.0, "learning_rate": 1.910869817478359e-05, "loss": 1.0542, "step": 4124 }, { "epoch": 0.16139760544643555, "grad_norm": 0.0, "learning_rate": 1.910817512321944e-05, "loss": 1.1822, "step": 4125 }, { "epoch": 0.161436732138665, "grad_norm": 0.0, "learning_rate": 1.9107651925389413e-05, "loss": 1.25, "step": 4126 }, { "epoch": 0.16147585883089444, "grad_norm": 0.0, "learning_rate": 1.910712858130191e-05, "loss": 1.1656, "step": 4127 }, { "epoch": 0.16151498552312388, "grad_norm": 0.0, "learning_rate": 1.9106605090965333e-05, "loss": 1.1891, "step": 4128 }, { "epoch": 0.16155411221535332, "grad_norm": 0.0, "learning_rate": 1.910608145438809e-05, "loss": 1.2762, "step": 4129 }, { "epoch": 0.16159323890758276, "grad_norm": 0.0, "learning_rate": 1.9105557671578588e-05, "loss": 1.1245, "step": 4130 }, { "epoch": 0.1616323655998122, "grad_norm": 0.0, "learning_rate": 1.9105033742545244e-05, "loss": 1.2416, "step": 4131 }, { "epoch": 0.16167149229204164, "grad_norm": 0.0, "learning_rate": 1.910450966729646e-05, "loss": 1.247, "step": 4132 }, { "epoch": 0.16171061898427108, "grad_norm": 0.0, "learning_rate": 1.910398544584066e-05, "loss": 1.1813, "step": 4133 }, { "epoch": 0.1617497456765005, "grad_norm": 0.0, "learning_rate": 1.9103461078186268e-05, "loss": 1.1754, "step": 4134 }, { "epoch": 0.16178887236872994, "grad_norm": 0.0, "learning_rate": 1.9102936564341696e-05, "loss": 1.242, "step": 4135 }, { "epoch": 0.16182799906095938, "grad_norm": 0.0, "learning_rate": 1.910241190431537e-05, "loss": 1.319, "step": 4136 }, { "epoch": 0.16186712575318882, "grad_norm": 0.0, "learning_rate": 1.910188709811571e-05, "loss": 1.2273, "step": 4137 }, { "epoch": 0.16190625244541826, "grad_norm": 0.0, "learning_rate": 1.9101362145751154e-05, "loss": 1.1494, "step": 4138 }, { "epoch": 0.1619453791376477, "grad_norm": 0.0, "learning_rate": 1.9100837047230123e-05, "loss": 1.1727, "step": 4139 }, { "epoch": 0.16198450582987714, "grad_norm": 0.0, "learning_rate": 1.9100311802561055e-05, "loss": 1.2802, "step": 4140 }, { "epoch": 0.16202363252210658, "grad_norm": 0.0, "learning_rate": 1.909978641175238e-05, "loss": 1.2495, "step": 4141 }, { "epoch": 0.16206275921433602, "grad_norm": 0.0, "learning_rate": 1.909926087481254e-05, "loss": 1.2982, "step": 4142 }, { "epoch": 0.16210188590656546, "grad_norm": 0.0, "learning_rate": 1.9098735191749977e-05, "loss": 1.1136, "step": 4143 }, { "epoch": 0.1621410125987949, "grad_norm": 0.0, "learning_rate": 1.909820936257312e-05, "loss": 1.1752, "step": 4144 }, { "epoch": 0.16218013929102434, "grad_norm": 0.0, "learning_rate": 1.9097683387290428e-05, "loss": 1.231, "step": 4145 }, { "epoch": 0.16221926598325379, "grad_norm": 0.0, "learning_rate": 1.9097157265910337e-05, "loss": 1.2205, "step": 4146 }, { "epoch": 0.16225839267548323, "grad_norm": 0.0, "learning_rate": 1.9096630998441298e-05, "loss": 1.1556, "step": 4147 }, { "epoch": 0.16229751936771267, "grad_norm": 0.0, "learning_rate": 1.9096104584891767e-05, "loss": 1.1721, "step": 4148 }, { "epoch": 0.16233664605994208, "grad_norm": 0.0, "learning_rate": 1.9095578025270195e-05, "loss": 1.167, "step": 4149 }, { "epoch": 0.16237577275217152, "grad_norm": 0.0, "learning_rate": 1.9095051319585035e-05, "loss": 1.2411, "step": 4150 }, { "epoch": 0.16241489944440096, "grad_norm": 0.0, "learning_rate": 1.9094524467844747e-05, "loss": 1.2506, "step": 4151 }, { "epoch": 0.1624540261366304, "grad_norm": 0.0, "learning_rate": 1.9093997470057796e-05, "loss": 1.1369, "step": 4152 }, { "epoch": 0.16249315282885984, "grad_norm": 0.0, "learning_rate": 1.909347032623264e-05, "loss": 1.2423, "step": 4153 }, { "epoch": 0.16253227952108928, "grad_norm": 0.0, "learning_rate": 1.9092943036377743e-05, "loss": 1.1349, "step": 4154 }, { "epoch": 0.16257140621331873, "grad_norm": 0.0, "learning_rate": 1.9092415600501577e-05, "loss": 1.2032, "step": 4155 }, { "epoch": 0.16261053290554817, "grad_norm": 0.0, "learning_rate": 1.9091888018612607e-05, "loss": 1.2603, "step": 4156 }, { "epoch": 0.1626496595977776, "grad_norm": 0.0, "learning_rate": 1.909136029071931e-05, "loss": 1.2834, "step": 4157 }, { "epoch": 0.16268878629000705, "grad_norm": 0.0, "learning_rate": 1.909083241683016e-05, "loss": 1.2194, "step": 4158 }, { "epoch": 0.1627279129822365, "grad_norm": 0.0, "learning_rate": 1.9090304396953632e-05, "loss": 1.0561, "step": 4159 }, { "epoch": 0.16276703967446593, "grad_norm": 0.0, "learning_rate": 1.9089776231098204e-05, "loss": 1.0687, "step": 4160 }, { "epoch": 0.16280616636669537, "grad_norm": 0.0, "learning_rate": 1.908924791927236e-05, "loss": 1.3264, "step": 4161 }, { "epoch": 0.1628452930589248, "grad_norm": 0.0, "learning_rate": 1.9088719461484588e-05, "loss": 1.1408, "step": 4162 }, { "epoch": 0.16288441975115422, "grad_norm": 0.0, "learning_rate": 1.9088190857743368e-05, "loss": 1.2086, "step": 4163 }, { "epoch": 0.16292354644338367, "grad_norm": 0.0, "learning_rate": 1.9087662108057192e-05, "loss": 1.1926, "step": 4164 }, { "epoch": 0.1629626731356131, "grad_norm": 0.0, "learning_rate": 1.908713321243455e-05, "loss": 1.1262, "step": 4165 }, { "epoch": 0.16300179982784255, "grad_norm": 0.0, "learning_rate": 1.9086604170883932e-05, "loss": 1.262, "step": 4166 }, { "epoch": 0.163040926520072, "grad_norm": 0.0, "learning_rate": 1.9086074983413843e-05, "loss": 1.1426, "step": 4167 }, { "epoch": 0.16308005321230143, "grad_norm": 0.0, "learning_rate": 1.908554565003277e-05, "loss": 1.3193, "step": 4168 }, { "epoch": 0.16311917990453087, "grad_norm": 0.0, "learning_rate": 1.9085016170749223e-05, "loss": 1.1625, "step": 4169 }, { "epoch": 0.1631583065967603, "grad_norm": 0.0, "learning_rate": 1.9084486545571694e-05, "loss": 1.1174, "step": 4170 }, { "epoch": 0.16319743328898975, "grad_norm": 0.0, "learning_rate": 1.90839567745087e-05, "loss": 1.1218, "step": 4171 }, { "epoch": 0.1632365599812192, "grad_norm": 0.0, "learning_rate": 1.908342685756874e-05, "loss": 1.2762, "step": 4172 }, { "epoch": 0.16327568667344863, "grad_norm": 0.0, "learning_rate": 1.9082896794760327e-05, "loss": 1.1449, "step": 4173 }, { "epoch": 0.16331481336567807, "grad_norm": 0.0, "learning_rate": 1.9082366586091975e-05, "loss": 1.1412, "step": 4174 }, { "epoch": 0.16335394005790752, "grad_norm": 0.0, "learning_rate": 1.90818362315722e-05, "loss": 1.2247, "step": 4175 }, { "epoch": 0.16339306675013696, "grad_norm": 0.0, "learning_rate": 1.908130573120951e-05, "loss": 1.2349, "step": 4176 }, { "epoch": 0.16343219344236637, "grad_norm": 0.0, "learning_rate": 1.908077508501243e-05, "loss": 1.2173, "step": 4177 }, { "epoch": 0.1634713201345958, "grad_norm": 0.0, "learning_rate": 1.9080244292989484e-05, "loss": 1.3192, "step": 4178 }, { "epoch": 0.16351044682682525, "grad_norm": 0.0, "learning_rate": 1.907971335514919e-05, "loss": 1.2048, "step": 4179 }, { "epoch": 0.1635495735190547, "grad_norm": 0.0, "learning_rate": 1.9079182271500073e-05, "loss": 1.1899, "step": 4180 }, { "epoch": 0.16358870021128413, "grad_norm": 0.0, "learning_rate": 1.9078651042050673e-05, "loss": 1.1673, "step": 4181 }, { "epoch": 0.16362782690351357, "grad_norm": 0.0, "learning_rate": 1.907811966680951e-05, "loss": 1.2289, "step": 4182 }, { "epoch": 0.16366695359574301, "grad_norm": 0.0, "learning_rate": 1.9077588145785123e-05, "loss": 1.0131, "step": 4183 }, { "epoch": 0.16370608028797246, "grad_norm": 0.0, "learning_rate": 1.9077056478986043e-05, "loss": 1.2811, "step": 4184 }, { "epoch": 0.1637452069802019, "grad_norm": 0.0, "learning_rate": 1.9076524666420808e-05, "loss": 1.2052, "step": 4185 }, { "epoch": 0.16378433367243134, "grad_norm": 0.0, "learning_rate": 1.9075992708097965e-05, "loss": 1.1334, "step": 4186 }, { "epoch": 0.16382346036466078, "grad_norm": 0.0, "learning_rate": 1.9075460604026047e-05, "loss": 1.2309, "step": 4187 }, { "epoch": 0.16386258705689022, "grad_norm": 0.0, "learning_rate": 1.907492835421361e-05, "loss": 1.1668, "step": 4188 }, { "epoch": 0.16390171374911966, "grad_norm": 0.0, "learning_rate": 1.907439595866919e-05, "loss": 1.0172, "step": 4189 }, { "epoch": 0.1639408404413491, "grad_norm": 0.0, "learning_rate": 1.9073863417401342e-05, "loss": 1.1213, "step": 4190 }, { "epoch": 0.1639799671335785, "grad_norm": 0.0, "learning_rate": 1.9073330730418623e-05, "loss": 1.1378, "step": 4191 }, { "epoch": 0.16401909382580795, "grad_norm": 0.0, "learning_rate": 1.9072797897729577e-05, "loss": 0.9984, "step": 4192 }, { "epoch": 0.1640582205180374, "grad_norm": 0.0, "learning_rate": 1.9072264919342766e-05, "loss": 1.1155, "step": 4193 }, { "epoch": 0.16409734721026684, "grad_norm": 0.0, "learning_rate": 1.907173179526675e-05, "loss": 1.2513, "step": 4194 }, { "epoch": 0.16413647390249628, "grad_norm": 0.0, "learning_rate": 1.907119852551009e-05, "loss": 1.0969, "step": 4195 }, { "epoch": 0.16417560059472572, "grad_norm": 0.0, "learning_rate": 1.9070665110081345e-05, "loss": 1.2139, "step": 4196 }, { "epoch": 0.16421472728695516, "grad_norm": 0.0, "learning_rate": 1.9070131548989087e-05, "loss": 1.2436, "step": 4197 }, { "epoch": 0.1642538539791846, "grad_norm": 0.0, "learning_rate": 1.906959784224188e-05, "loss": 1.2196, "step": 4198 }, { "epoch": 0.16429298067141404, "grad_norm": 0.0, "learning_rate": 1.9069063989848298e-05, "loss": 1.2025, "step": 4199 }, { "epoch": 0.16433210736364348, "grad_norm": 0.0, "learning_rate": 1.9068529991816914e-05, "loss": 1.2258, "step": 4200 }, { "epoch": 0.16437123405587292, "grad_norm": 0.0, "learning_rate": 1.90679958481563e-05, "loss": 1.1583, "step": 4201 }, { "epoch": 0.16441036074810236, "grad_norm": 0.0, "learning_rate": 1.9067461558875036e-05, "loss": 1.1564, "step": 4202 }, { "epoch": 0.1644494874403318, "grad_norm": 0.0, "learning_rate": 1.90669271239817e-05, "loss": 1.1551, "step": 4203 }, { "epoch": 0.16448861413256124, "grad_norm": 0.0, "learning_rate": 1.906639254348488e-05, "loss": 1.2021, "step": 4204 }, { "epoch": 0.16452774082479069, "grad_norm": 0.0, "learning_rate": 1.9065857817393153e-05, "loss": 1.2073, "step": 4205 }, { "epoch": 0.1645668675170201, "grad_norm": 0.0, "learning_rate": 1.9065322945715107e-05, "loss": 1.3113, "step": 4206 }, { "epoch": 0.16460599420924954, "grad_norm": 0.0, "learning_rate": 1.9064787928459338e-05, "loss": 1.3683, "step": 4207 }, { "epoch": 0.16464512090147898, "grad_norm": 0.0, "learning_rate": 1.9064252765634435e-05, "loss": 1.1498, "step": 4208 }, { "epoch": 0.16468424759370842, "grad_norm": 0.0, "learning_rate": 1.906371745724899e-05, "loss": 1.1281, "step": 4209 }, { "epoch": 0.16472337428593786, "grad_norm": 0.0, "learning_rate": 1.9063182003311595e-05, "loss": 1.1297, "step": 4210 }, { "epoch": 0.1647625009781673, "grad_norm": 0.0, "learning_rate": 1.906264640383086e-05, "loss": 1.0109, "step": 4211 }, { "epoch": 0.16480162767039674, "grad_norm": 0.0, "learning_rate": 1.9062110658815375e-05, "loss": 1.1478, "step": 4212 }, { "epoch": 0.16484075436262619, "grad_norm": 0.0, "learning_rate": 1.9061574768273753e-05, "loss": 1.1984, "step": 4213 }, { "epoch": 0.16487988105485563, "grad_norm": 0.0, "learning_rate": 1.9061038732214592e-05, "loss": 1.1606, "step": 4214 }, { "epoch": 0.16491900774708507, "grad_norm": 0.0, "learning_rate": 1.9060502550646504e-05, "loss": 1.2404, "step": 4215 }, { "epoch": 0.1649581344393145, "grad_norm": 0.0, "learning_rate": 1.9059966223578098e-05, "loss": 1.1774, "step": 4216 }, { "epoch": 0.16499726113154395, "grad_norm": 0.0, "learning_rate": 1.9059429751017987e-05, "loss": 1.228, "step": 4217 }, { "epoch": 0.1650363878237734, "grad_norm": 0.0, "learning_rate": 1.905889313297479e-05, "loss": 1.1737, "step": 4218 }, { "epoch": 0.16507551451600283, "grad_norm": 0.0, "learning_rate": 1.9058356369457117e-05, "loss": 1.075, "step": 4219 }, { "epoch": 0.16511464120823224, "grad_norm": 0.0, "learning_rate": 1.905781946047359e-05, "loss": 1.2129, "step": 4220 }, { "epoch": 0.16515376790046168, "grad_norm": 0.0, "learning_rate": 1.9057282406032835e-05, "loss": 1.0834, "step": 4221 }, { "epoch": 0.16519289459269113, "grad_norm": 0.0, "learning_rate": 1.9056745206143472e-05, "loss": 1.1469, "step": 4222 }, { "epoch": 0.16523202128492057, "grad_norm": 0.0, "learning_rate": 1.9056207860814134e-05, "loss": 1.2673, "step": 4223 }, { "epoch": 0.16527114797715, "grad_norm": 0.0, "learning_rate": 1.9055670370053444e-05, "loss": 1.0922, "step": 4224 }, { "epoch": 0.16531027466937945, "grad_norm": 0.0, "learning_rate": 1.905513273387003e-05, "loss": 1.0136, "step": 4225 }, { "epoch": 0.1653494013616089, "grad_norm": 0.0, "learning_rate": 1.9054594952272538e-05, "loss": 1.1691, "step": 4226 }, { "epoch": 0.16538852805383833, "grad_norm": 0.0, "learning_rate": 1.9054057025269595e-05, "loss": 1.2629, "step": 4227 }, { "epoch": 0.16542765474606777, "grad_norm": 0.0, "learning_rate": 1.9053518952869842e-05, "loss": 1.2699, "step": 4228 }, { "epoch": 0.1654667814382972, "grad_norm": 0.0, "learning_rate": 1.9052980735081918e-05, "loss": 1.2357, "step": 4229 }, { "epoch": 0.16550590813052665, "grad_norm": 0.0, "learning_rate": 1.9052442371914466e-05, "loss": 1.2788, "step": 4230 }, { "epoch": 0.1655450348227561, "grad_norm": 0.0, "learning_rate": 1.9051903863376134e-05, "loss": 1.1591, "step": 4231 }, { "epoch": 0.16558416151498553, "grad_norm": 0.0, "learning_rate": 1.905136520947557e-05, "loss": 1.2725, "step": 4232 }, { "epoch": 0.16562328820721497, "grad_norm": 0.0, "learning_rate": 1.9050826410221423e-05, "loss": 1.2554, "step": 4233 }, { "epoch": 0.1656624148994444, "grad_norm": 0.0, "learning_rate": 1.905028746562234e-05, "loss": 1.1738, "step": 4234 }, { "epoch": 0.16570154159167383, "grad_norm": 0.0, "learning_rate": 1.9049748375686986e-05, "loss": 1.1126, "step": 4235 }, { "epoch": 0.16574066828390327, "grad_norm": 0.0, "learning_rate": 1.9049209140424014e-05, "loss": 1.222, "step": 4236 }, { "epoch": 0.1657797949761327, "grad_norm": 0.0, "learning_rate": 1.904866975984208e-05, "loss": 1.2964, "step": 4237 }, { "epoch": 0.16581892166836215, "grad_norm": 0.0, "learning_rate": 1.904813023394985e-05, "loss": 1.3085, "step": 4238 }, { "epoch": 0.1658580483605916, "grad_norm": 0.0, "learning_rate": 1.9047590562755984e-05, "loss": 1.1964, "step": 4239 }, { "epoch": 0.16589717505282103, "grad_norm": 0.0, "learning_rate": 1.9047050746269155e-05, "loss": 1.1931, "step": 4240 }, { "epoch": 0.16593630174505047, "grad_norm": 0.0, "learning_rate": 1.904651078449802e-05, "loss": 1.1885, "step": 4241 }, { "epoch": 0.16597542843727991, "grad_norm": 0.0, "learning_rate": 1.9045970677451264e-05, "loss": 1.2438, "step": 4242 }, { "epoch": 0.16601455512950936, "grad_norm": 0.0, "learning_rate": 1.9045430425137553e-05, "loss": 1.2953, "step": 4243 }, { "epoch": 0.1660536818217388, "grad_norm": 0.0, "learning_rate": 1.9044890027565563e-05, "loss": 1.0874, "step": 4244 }, { "epoch": 0.16609280851396824, "grad_norm": 0.0, "learning_rate": 1.9044349484743976e-05, "loss": 1.2646, "step": 4245 }, { "epoch": 0.16613193520619768, "grad_norm": 0.0, "learning_rate": 1.9043808796681467e-05, "loss": 1.2135, "step": 4246 }, { "epoch": 0.16617106189842712, "grad_norm": 0.0, "learning_rate": 1.904326796338672e-05, "loss": 1.2799, "step": 4247 }, { "epoch": 0.16621018859065653, "grad_norm": 0.0, "learning_rate": 1.9042726984868424e-05, "loss": 1.1631, "step": 4248 }, { "epoch": 0.16624931528288597, "grad_norm": 0.0, "learning_rate": 1.904218586113526e-05, "loss": 1.2445, "step": 4249 }, { "epoch": 0.16628844197511541, "grad_norm": 0.0, "learning_rate": 1.9041644592195924e-05, "loss": 1.1387, "step": 4250 }, { "epoch": 0.16632756866734486, "grad_norm": 0.0, "learning_rate": 1.9041103178059107e-05, "loss": 1.0436, "step": 4251 }, { "epoch": 0.1663666953595743, "grad_norm": 0.0, "learning_rate": 1.90405616187335e-05, "loss": 1.1896, "step": 4252 }, { "epoch": 0.16640582205180374, "grad_norm": 0.0, "learning_rate": 1.9040019914227803e-05, "loss": 1.2952, "step": 4253 }, { "epoch": 0.16644494874403318, "grad_norm": 0.0, "learning_rate": 1.9039478064550713e-05, "loss": 1.1815, "step": 4254 }, { "epoch": 0.16648407543626262, "grad_norm": 0.0, "learning_rate": 1.9038936069710934e-05, "loss": 1.1314, "step": 4255 }, { "epoch": 0.16652320212849206, "grad_norm": 0.0, "learning_rate": 1.9038393929717168e-05, "loss": 1.0931, "step": 4256 }, { "epoch": 0.1665623288207215, "grad_norm": 0.0, "learning_rate": 1.903785164457812e-05, "loss": 1.1885, "step": 4257 }, { "epoch": 0.16660145551295094, "grad_norm": 0.0, "learning_rate": 1.90373092143025e-05, "loss": 1.0405, "step": 4258 }, { "epoch": 0.16664058220518038, "grad_norm": 0.0, "learning_rate": 1.903676663889902e-05, "loss": 1.1608, "step": 4259 }, { "epoch": 0.16667970889740982, "grad_norm": 0.0, "learning_rate": 1.9036223918376386e-05, "loss": 1.0435, "step": 4260 }, { "epoch": 0.16671883558963926, "grad_norm": 0.0, "learning_rate": 1.9035681052743325e-05, "loss": 1.1837, "step": 4261 }, { "epoch": 0.1667579622818687, "grad_norm": 0.0, "learning_rate": 1.9035138042008546e-05, "loss": 1.1472, "step": 4262 }, { "epoch": 0.16679708897409812, "grad_norm": 0.0, "learning_rate": 1.903459488618077e-05, "loss": 1.2695, "step": 4263 }, { "epoch": 0.16683621566632756, "grad_norm": 0.0, "learning_rate": 1.9034051585268725e-05, "loss": 1.2701, "step": 4264 }, { "epoch": 0.166875342358557, "grad_norm": 0.0, "learning_rate": 1.903350813928113e-05, "loss": 1.1107, "step": 4265 }, { "epoch": 0.16691446905078644, "grad_norm": 0.0, "learning_rate": 1.9032964548226714e-05, "loss": 1.2721, "step": 4266 }, { "epoch": 0.16695359574301588, "grad_norm": 0.0, "learning_rate": 1.9032420812114206e-05, "loss": 1.2367, "step": 4267 }, { "epoch": 0.16699272243524532, "grad_norm": 0.0, "learning_rate": 1.9031876930952338e-05, "loss": 1.2262, "step": 4268 }, { "epoch": 0.16703184912747476, "grad_norm": 0.0, "learning_rate": 1.9031332904749843e-05, "loss": 1.165, "step": 4269 }, { "epoch": 0.1670709758197042, "grad_norm": 0.0, "learning_rate": 1.9030788733515463e-05, "loss": 1.1967, "step": 4270 }, { "epoch": 0.16711010251193364, "grad_norm": 0.0, "learning_rate": 1.9030244417257927e-05, "loss": 1.1743, "step": 4271 }, { "epoch": 0.16714922920416309, "grad_norm": 0.0, "learning_rate": 1.9029699955985985e-05, "loss": 1.2125, "step": 4272 }, { "epoch": 0.16718835589639253, "grad_norm": 0.0, "learning_rate": 1.902915534970837e-05, "loss": 1.1526, "step": 4273 }, { "epoch": 0.16722748258862197, "grad_norm": 0.0, "learning_rate": 1.9028610598433843e-05, "loss": 1.2156, "step": 4274 }, { "epoch": 0.1672666092808514, "grad_norm": 0.0, "learning_rate": 1.9028065702171136e-05, "loss": 1.1783, "step": 4275 }, { "epoch": 0.16730573597308085, "grad_norm": 0.0, "learning_rate": 1.9027520660929013e-05, "loss": 1.1661, "step": 4276 }, { "epoch": 0.16734486266531026, "grad_norm": 0.0, "learning_rate": 1.9026975474716215e-05, "loss": 1.1166, "step": 4277 }, { "epoch": 0.1673839893575397, "grad_norm": 0.0, "learning_rate": 1.9026430143541503e-05, "loss": 1.1608, "step": 4278 }, { "epoch": 0.16742311604976914, "grad_norm": 0.0, "learning_rate": 1.9025884667413637e-05, "loss": 1.0762, "step": 4279 }, { "epoch": 0.16746224274199858, "grad_norm": 0.0, "learning_rate": 1.902533904634137e-05, "loss": 1.1051, "step": 4280 }, { "epoch": 0.16750136943422803, "grad_norm": 0.0, "learning_rate": 1.9024793280333467e-05, "loss": 1.199, "step": 4281 }, { "epoch": 0.16754049612645747, "grad_norm": 0.0, "learning_rate": 1.9024247369398697e-05, "loss": 1.151, "step": 4282 }, { "epoch": 0.1675796228186869, "grad_norm": 0.0, "learning_rate": 1.902370131354582e-05, "loss": 1.1334, "step": 4283 }, { "epoch": 0.16761874951091635, "grad_norm": 0.0, "learning_rate": 1.9023155112783607e-05, "loss": 1.303, "step": 4284 }, { "epoch": 0.1676578762031458, "grad_norm": 0.0, "learning_rate": 1.902260876712083e-05, "loss": 1.3567, "step": 4285 }, { "epoch": 0.16769700289537523, "grad_norm": 0.0, "learning_rate": 1.902206227656626e-05, "loss": 1.2795, "step": 4286 }, { "epoch": 0.16773612958760467, "grad_norm": 0.0, "learning_rate": 1.9021515641128676e-05, "loss": 1.1378, "step": 4287 }, { "epoch": 0.1677752562798341, "grad_norm": 0.0, "learning_rate": 1.902096886081686e-05, "loss": 1.1225, "step": 4288 }, { "epoch": 0.16781438297206355, "grad_norm": 0.0, "learning_rate": 1.9020421935639582e-05, "loss": 1.1781, "step": 4289 }, { "epoch": 0.167853509664293, "grad_norm": 0.0, "learning_rate": 1.901987486560563e-05, "loss": 1.1588, "step": 4290 }, { "epoch": 0.1678926363565224, "grad_norm": 0.0, "learning_rate": 1.9019327650723795e-05, "loss": 1.2261, "step": 4291 }, { "epoch": 0.16793176304875185, "grad_norm": 0.0, "learning_rate": 1.901878029100286e-05, "loss": 1.2667, "step": 4292 }, { "epoch": 0.1679708897409813, "grad_norm": 0.0, "learning_rate": 1.901823278645161e-05, "loss": 1.2495, "step": 4293 }, { "epoch": 0.16801001643321073, "grad_norm": 0.0, "learning_rate": 1.9017685137078847e-05, "loss": 1.064, "step": 4294 }, { "epoch": 0.16804914312544017, "grad_norm": 0.0, "learning_rate": 1.9017137342893356e-05, "loss": 1.1983, "step": 4295 }, { "epoch": 0.1680882698176696, "grad_norm": 0.0, "learning_rate": 1.901658940390394e-05, "loss": 1.1038, "step": 4296 }, { "epoch": 0.16812739650989905, "grad_norm": 0.0, "learning_rate": 1.9016041320119397e-05, "loss": 1.0071, "step": 4297 }, { "epoch": 0.1681665232021285, "grad_norm": 0.0, "learning_rate": 1.9015493091548526e-05, "loss": 1.1726, "step": 4298 }, { "epoch": 0.16820564989435793, "grad_norm": 0.0, "learning_rate": 1.9014944718200137e-05, "loss": 1.3197, "step": 4299 }, { "epoch": 0.16824477658658737, "grad_norm": 0.0, "learning_rate": 1.901439620008303e-05, "loss": 1.1791, "step": 4300 }, { "epoch": 0.16828390327881682, "grad_norm": 0.0, "learning_rate": 1.9013847537206014e-05, "loss": 1.2303, "step": 4301 }, { "epoch": 0.16832302997104626, "grad_norm": 0.0, "learning_rate": 1.9013298729577905e-05, "loss": 1.1153, "step": 4302 }, { "epoch": 0.1683621566632757, "grad_norm": 0.0, "learning_rate": 1.901274977720751e-05, "loss": 1.1785, "step": 4303 }, { "epoch": 0.16840128335550514, "grad_norm": 0.0, "learning_rate": 1.901220068010365e-05, "loss": 1.3015, "step": 4304 }, { "epoch": 0.16844041004773455, "grad_norm": 0.0, "learning_rate": 1.9011651438275137e-05, "loss": 1.2987, "step": 4305 }, { "epoch": 0.168479536739964, "grad_norm": 0.0, "learning_rate": 1.9011102051730796e-05, "loss": 1.0646, "step": 4306 }, { "epoch": 0.16851866343219343, "grad_norm": 0.0, "learning_rate": 1.9010552520479448e-05, "loss": 1.1924, "step": 4307 }, { "epoch": 0.16855779012442287, "grad_norm": 0.0, "learning_rate": 1.901000284452992e-05, "loss": 1.2032, "step": 4308 }, { "epoch": 0.16859691681665231, "grad_norm": 0.0, "learning_rate": 1.9009453023891028e-05, "loss": 1.1331, "step": 4309 }, { "epoch": 0.16863604350888176, "grad_norm": 0.0, "learning_rate": 1.9008903058571615e-05, "loss": 1.2094, "step": 4310 }, { "epoch": 0.1686751702011112, "grad_norm": 0.0, "learning_rate": 1.900835294858051e-05, "loss": 1.2139, "step": 4311 }, { "epoch": 0.16871429689334064, "grad_norm": 0.0, "learning_rate": 1.900780269392654e-05, "loss": 1.1286, "step": 4312 }, { "epoch": 0.16875342358557008, "grad_norm": 0.0, "learning_rate": 1.900725229461855e-05, "loss": 1.2438, "step": 4313 }, { "epoch": 0.16879255027779952, "grad_norm": 0.0, "learning_rate": 1.9006701750665373e-05, "loss": 1.2408, "step": 4314 }, { "epoch": 0.16883167697002896, "grad_norm": 0.0, "learning_rate": 1.9006151062075853e-05, "loss": 1.1401, "step": 4315 }, { "epoch": 0.1688708036622584, "grad_norm": 0.0, "learning_rate": 1.9005600228858832e-05, "loss": 1.2613, "step": 4316 }, { "epoch": 0.16890993035448784, "grad_norm": 0.0, "learning_rate": 1.9005049251023156e-05, "loss": 1.143, "step": 4317 }, { "epoch": 0.16894905704671728, "grad_norm": 0.0, "learning_rate": 1.9004498128577672e-05, "loss": 1.2166, "step": 4318 }, { "epoch": 0.1689881837389467, "grad_norm": 0.0, "learning_rate": 1.900394686153123e-05, "loss": 1.141, "step": 4319 }, { "epoch": 0.16902731043117614, "grad_norm": 0.0, "learning_rate": 1.9003395449892685e-05, "loss": 1.0888, "step": 4320 }, { "epoch": 0.16906643712340558, "grad_norm": 0.0, "learning_rate": 1.900284389367089e-05, "loss": 1.2271, "step": 4321 }, { "epoch": 0.16910556381563502, "grad_norm": 0.0, "learning_rate": 1.900229219287471e-05, "loss": 1.1249, "step": 4322 }, { "epoch": 0.16914469050786446, "grad_norm": 0.0, "learning_rate": 1.900174034751299e-05, "loss": 1.1157, "step": 4323 }, { "epoch": 0.1691838172000939, "grad_norm": 0.0, "learning_rate": 1.9001188357594604e-05, "loss": 1.22, "step": 4324 }, { "epoch": 0.16922294389232334, "grad_norm": 0.0, "learning_rate": 1.900063622312841e-05, "loss": 1.2192, "step": 4325 }, { "epoch": 0.16926207058455278, "grad_norm": 0.0, "learning_rate": 1.9000083944123277e-05, "loss": 1.1087, "step": 4326 }, { "epoch": 0.16930119727678222, "grad_norm": 0.0, "learning_rate": 1.8999531520588074e-05, "loss": 1.1699, "step": 4327 }, { "epoch": 0.16934032396901166, "grad_norm": 0.0, "learning_rate": 1.8998978952531672e-05, "loss": 1.2156, "step": 4328 }, { "epoch": 0.1693794506612411, "grad_norm": 0.0, "learning_rate": 1.8998426239962945e-05, "loss": 1.0555, "step": 4329 }, { "epoch": 0.16941857735347055, "grad_norm": 0.0, "learning_rate": 1.8997873382890767e-05, "loss": 1.1249, "step": 4330 }, { "epoch": 0.1694577040457, "grad_norm": 0.0, "learning_rate": 1.8997320381324018e-05, "loss": 1.1191, "step": 4331 }, { "epoch": 0.16949683073792943, "grad_norm": 0.0, "learning_rate": 1.8996767235271577e-05, "loss": 1.0623, "step": 4332 }, { "epoch": 0.16953595743015887, "grad_norm": 0.0, "learning_rate": 1.899621394474233e-05, "loss": 1.0843, "step": 4333 }, { "epoch": 0.16957508412238828, "grad_norm": 0.0, "learning_rate": 1.899566050974516e-05, "loss": 1.0079, "step": 4334 }, { "epoch": 0.16961421081461772, "grad_norm": 0.0, "learning_rate": 1.899510693028895e-05, "loss": 1.2097, "step": 4335 }, { "epoch": 0.16965333750684716, "grad_norm": 0.0, "learning_rate": 1.8994553206382597e-05, "loss": 1.0765, "step": 4336 }, { "epoch": 0.1696924641990766, "grad_norm": 0.0, "learning_rate": 1.899399933803499e-05, "loss": 1.1555, "step": 4337 }, { "epoch": 0.16973159089130604, "grad_norm": 0.0, "learning_rate": 1.8993445325255027e-05, "loss": 1.2379, "step": 4338 }, { "epoch": 0.16977071758353549, "grad_norm": 0.0, "learning_rate": 1.89928911680516e-05, "loss": 1.1472, "step": 4339 }, { "epoch": 0.16980984427576493, "grad_norm": 0.0, "learning_rate": 1.8992336866433606e-05, "loss": 1.2579, "step": 4340 }, { "epoch": 0.16984897096799437, "grad_norm": 0.0, "learning_rate": 1.899178242040995e-05, "loss": 1.1705, "step": 4341 }, { "epoch": 0.1698880976602238, "grad_norm": 0.0, "learning_rate": 1.899122782998954e-05, "loss": 1.1254, "step": 4342 }, { "epoch": 0.16992722435245325, "grad_norm": 0.0, "learning_rate": 1.8990673095181276e-05, "loss": 1.2457, "step": 4343 }, { "epoch": 0.1699663510446827, "grad_norm": 0.0, "learning_rate": 1.8990118215994068e-05, "loss": 1.1609, "step": 4344 }, { "epoch": 0.17000547773691213, "grad_norm": 0.0, "learning_rate": 1.8989563192436826e-05, "loss": 1.1854, "step": 4345 }, { "epoch": 0.17004460442914157, "grad_norm": 0.0, "learning_rate": 1.8989008024518462e-05, "loss": 1.1246, "step": 4346 }, { "epoch": 0.170083731121371, "grad_norm": 0.0, "learning_rate": 1.8988452712247896e-05, "loss": 1.2095, "step": 4347 }, { "epoch": 0.17012285781360043, "grad_norm": 0.0, "learning_rate": 1.8987897255634042e-05, "loss": 1.1484, "step": 4348 }, { "epoch": 0.17016198450582987, "grad_norm": 0.0, "learning_rate": 1.898734165468582e-05, "loss": 1.2528, "step": 4349 }, { "epoch": 0.1702011111980593, "grad_norm": 0.0, "learning_rate": 1.898678590941215e-05, "loss": 1.2349, "step": 4350 }, { "epoch": 0.17024023789028875, "grad_norm": 0.0, "learning_rate": 1.8986230019821963e-05, "loss": 1.2042, "step": 4351 }, { "epoch": 0.1702793645825182, "grad_norm": 0.0, "learning_rate": 1.8985673985924185e-05, "loss": 1.0508, "step": 4352 }, { "epoch": 0.17031849127474763, "grad_norm": 0.0, "learning_rate": 1.8985117807727737e-05, "loss": 1.316, "step": 4353 }, { "epoch": 0.17035761796697707, "grad_norm": 0.0, "learning_rate": 1.8984561485241558e-05, "loss": 1.2086, "step": 4354 }, { "epoch": 0.1703967446592065, "grad_norm": 0.0, "learning_rate": 1.898400501847458e-05, "loss": 1.1855, "step": 4355 }, { "epoch": 0.17043587135143595, "grad_norm": 0.0, "learning_rate": 1.898344840743574e-05, "loss": 1.1801, "step": 4356 }, { "epoch": 0.1704749980436654, "grad_norm": 0.0, "learning_rate": 1.8982891652133976e-05, "loss": 1.1839, "step": 4357 }, { "epoch": 0.17051412473589483, "grad_norm": 0.0, "learning_rate": 1.8982334752578226e-05, "loss": 1.1744, "step": 4358 }, { "epoch": 0.17055325142812428, "grad_norm": 0.0, "learning_rate": 1.8981777708777437e-05, "loss": 1.2429, "step": 4359 }, { "epoch": 0.17059237812035372, "grad_norm": 0.0, "learning_rate": 1.898122052074055e-05, "loss": 1.1681, "step": 4360 }, { "epoch": 0.17063150481258316, "grad_norm": 0.0, "learning_rate": 1.8980663188476518e-05, "loss": 1.0998, "step": 4361 }, { "epoch": 0.17067063150481257, "grad_norm": 0.0, "learning_rate": 1.8980105711994288e-05, "loss": 1.0024, "step": 4362 }, { "epoch": 0.170709758197042, "grad_norm": 0.0, "learning_rate": 1.8979548091302814e-05, "loss": 1.2431, "step": 4363 }, { "epoch": 0.17074888488927145, "grad_norm": 0.0, "learning_rate": 1.8978990326411046e-05, "loss": 1.2228, "step": 4364 }, { "epoch": 0.1707880115815009, "grad_norm": 0.0, "learning_rate": 1.8978432417327948e-05, "loss": 1.2422, "step": 4365 }, { "epoch": 0.17082713827373033, "grad_norm": 0.0, "learning_rate": 1.8977874364062474e-05, "loss": 0.9973, "step": 4366 }, { "epoch": 0.17086626496595977, "grad_norm": 0.0, "learning_rate": 1.897731616662359e-05, "loss": 1.2368, "step": 4367 }, { "epoch": 0.17090539165818922, "grad_norm": 0.0, "learning_rate": 1.8976757825020255e-05, "loss": 1.1647, "step": 4368 }, { "epoch": 0.17094451835041866, "grad_norm": 0.0, "learning_rate": 1.897619933926144e-05, "loss": 1.2939, "step": 4369 }, { "epoch": 0.1709836450426481, "grad_norm": 0.0, "learning_rate": 1.897564070935611e-05, "loss": 1.1894, "step": 4370 }, { "epoch": 0.17102277173487754, "grad_norm": 0.0, "learning_rate": 1.897508193531324e-05, "loss": 1.1304, "step": 4371 }, { "epoch": 0.17106189842710698, "grad_norm": 0.0, "learning_rate": 1.8974523017141798e-05, "loss": 1.2377, "step": 4372 }, { "epoch": 0.17110102511933642, "grad_norm": 0.0, "learning_rate": 1.8973963954850762e-05, "loss": 1.1261, "step": 4373 }, { "epoch": 0.17114015181156586, "grad_norm": 0.0, "learning_rate": 1.8973404748449108e-05, "loss": 1.1989, "step": 4374 }, { "epoch": 0.1711792785037953, "grad_norm": 0.0, "learning_rate": 1.897284539794582e-05, "loss": 1.1217, "step": 4375 }, { "epoch": 0.17121840519602471, "grad_norm": 0.0, "learning_rate": 1.8972285903349885e-05, "loss": 1.2041, "step": 4376 }, { "epoch": 0.17125753188825416, "grad_norm": 0.0, "learning_rate": 1.8971726264670272e-05, "loss": 1.2497, "step": 4377 }, { "epoch": 0.1712966585804836, "grad_norm": 0.0, "learning_rate": 1.8971166481915983e-05, "loss": 1.1281, "step": 4378 }, { "epoch": 0.17133578527271304, "grad_norm": 0.0, "learning_rate": 1.8970606555096e-05, "loss": 1.2095, "step": 4379 }, { "epoch": 0.17137491196494248, "grad_norm": 0.0, "learning_rate": 1.8970046484219312e-05, "loss": 1.0728, "step": 4380 }, { "epoch": 0.17141403865717192, "grad_norm": 0.0, "learning_rate": 1.8969486269294922e-05, "loss": 1.1997, "step": 4381 }, { "epoch": 0.17145316534940136, "grad_norm": 0.0, "learning_rate": 1.8968925910331823e-05, "loss": 1.2479, "step": 4382 }, { "epoch": 0.1714922920416308, "grad_norm": 0.0, "learning_rate": 1.896836540733901e-05, "loss": 1.0992, "step": 4383 }, { "epoch": 0.17153141873386024, "grad_norm": 0.0, "learning_rate": 1.8967804760325487e-05, "loss": 1.2778, "step": 4384 }, { "epoch": 0.17157054542608968, "grad_norm": 0.0, "learning_rate": 1.8967243969300257e-05, "loss": 0.9611, "step": 4385 }, { "epoch": 0.17160967211831912, "grad_norm": 0.0, "learning_rate": 1.8966683034272328e-05, "loss": 1.2111, "step": 4386 }, { "epoch": 0.17164879881054856, "grad_norm": 0.0, "learning_rate": 1.8966121955250702e-05, "loss": 1.1711, "step": 4387 }, { "epoch": 0.171687925502778, "grad_norm": 0.0, "learning_rate": 1.8965560732244395e-05, "loss": 1.121, "step": 4388 }, { "epoch": 0.17172705219500745, "grad_norm": 0.0, "learning_rate": 1.8964999365262416e-05, "loss": 1.1506, "step": 4389 }, { "epoch": 0.1717661788872369, "grad_norm": 0.0, "learning_rate": 1.896443785431378e-05, "loss": 1.1005, "step": 4390 }, { "epoch": 0.1718053055794663, "grad_norm": 0.0, "learning_rate": 1.8963876199407508e-05, "loss": 1.2621, "step": 4391 }, { "epoch": 0.17184443227169574, "grad_norm": 0.0, "learning_rate": 1.896331440055261e-05, "loss": 1.2086, "step": 4392 }, { "epoch": 0.17188355896392518, "grad_norm": 0.0, "learning_rate": 1.896275245775812e-05, "loss": 1.1672, "step": 4393 }, { "epoch": 0.17192268565615462, "grad_norm": 0.0, "learning_rate": 1.8962190371033057e-05, "loss": 1.1758, "step": 4394 }, { "epoch": 0.17196181234838406, "grad_norm": 0.0, "learning_rate": 1.8961628140386446e-05, "loss": 1.0793, "step": 4395 }, { "epoch": 0.1720009390406135, "grad_norm": 0.0, "learning_rate": 1.8961065765827317e-05, "loss": 1.1247, "step": 4396 }, { "epoch": 0.17204006573284295, "grad_norm": 0.0, "learning_rate": 1.8960503247364697e-05, "loss": 1.1357, "step": 4397 }, { "epoch": 0.17207919242507239, "grad_norm": 0.0, "learning_rate": 1.8959940585007626e-05, "loss": 1.0712, "step": 4398 }, { "epoch": 0.17211831911730183, "grad_norm": 0.0, "learning_rate": 1.8959377778765135e-05, "loss": 1.0366, "step": 4399 }, { "epoch": 0.17215744580953127, "grad_norm": 0.0, "learning_rate": 1.8958814828646268e-05, "loss": 1.1201, "step": 4400 }, { "epoch": 0.1721965725017607, "grad_norm": 0.0, "learning_rate": 1.8958251734660055e-05, "loss": 1.1088, "step": 4401 }, { "epoch": 0.17223569919399015, "grad_norm": 0.0, "learning_rate": 1.895768849681555e-05, "loss": 1.2289, "step": 4402 }, { "epoch": 0.1722748258862196, "grad_norm": 0.0, "learning_rate": 1.8957125115121786e-05, "loss": 1.2599, "step": 4403 }, { "epoch": 0.17231395257844903, "grad_norm": 0.0, "learning_rate": 1.895656158958782e-05, "loss": 1.1932, "step": 4404 }, { "epoch": 0.17235307927067844, "grad_norm": 0.0, "learning_rate": 1.89559979202227e-05, "loss": 1.2019, "step": 4405 }, { "epoch": 0.17239220596290789, "grad_norm": 0.0, "learning_rate": 1.8955434107035472e-05, "loss": 1.1969, "step": 4406 }, { "epoch": 0.17243133265513733, "grad_norm": 0.0, "learning_rate": 1.8954870150035195e-05, "loss": 1.1476, "step": 4407 }, { "epoch": 0.17247045934736677, "grad_norm": 0.0, "learning_rate": 1.895430604923093e-05, "loss": 1.2125, "step": 4408 }, { "epoch": 0.1725095860395962, "grad_norm": 0.0, "learning_rate": 1.8953741804631725e-05, "loss": 1.0639, "step": 4409 }, { "epoch": 0.17254871273182565, "grad_norm": 0.0, "learning_rate": 1.8953177416246648e-05, "loss": 1.1388, "step": 4410 }, { "epoch": 0.1725878394240551, "grad_norm": 0.0, "learning_rate": 1.895261288408476e-05, "loss": 1.1716, "step": 4411 }, { "epoch": 0.17262696611628453, "grad_norm": 0.0, "learning_rate": 1.8952048208155123e-05, "loss": 1.1887, "step": 4412 }, { "epoch": 0.17266609280851397, "grad_norm": 0.0, "learning_rate": 1.8951483388466816e-05, "loss": 1.1619, "step": 4413 }, { "epoch": 0.1727052195007434, "grad_norm": 0.0, "learning_rate": 1.89509184250289e-05, "loss": 1.1846, "step": 4414 }, { "epoch": 0.17274434619297285, "grad_norm": 0.0, "learning_rate": 1.895035331785045e-05, "loss": 1.2076, "step": 4415 }, { "epoch": 0.1727834728852023, "grad_norm": 0.0, "learning_rate": 1.894978806694054e-05, "loss": 1.3023, "step": 4416 }, { "epoch": 0.17282259957743173, "grad_norm": 0.0, "learning_rate": 1.894922267230825e-05, "loss": 1.096, "step": 4417 }, { "epoch": 0.17286172626966118, "grad_norm": 0.0, "learning_rate": 1.8948657133962658e-05, "loss": 1.2277, "step": 4418 }, { "epoch": 0.1729008529618906, "grad_norm": 0.0, "learning_rate": 1.8948091451912844e-05, "loss": 1.3237, "step": 4419 }, { "epoch": 0.17293997965412003, "grad_norm": 0.0, "learning_rate": 1.8947525626167896e-05, "loss": 1.1808, "step": 4420 }, { "epoch": 0.17297910634634947, "grad_norm": 0.0, "learning_rate": 1.8946959656736897e-05, "loss": 1.3514, "step": 4421 }, { "epoch": 0.1730182330385789, "grad_norm": 0.0, "learning_rate": 1.894639354362894e-05, "loss": 1.2404, "step": 4422 }, { "epoch": 0.17305735973080835, "grad_norm": 0.0, "learning_rate": 1.894582728685311e-05, "loss": 1.093, "step": 4423 }, { "epoch": 0.1730964864230378, "grad_norm": 0.0, "learning_rate": 1.8945260886418502e-05, "loss": 1.1303, "step": 4424 }, { "epoch": 0.17313561311526723, "grad_norm": 0.0, "learning_rate": 1.894469434233422e-05, "loss": 1.1951, "step": 4425 }, { "epoch": 0.17317473980749667, "grad_norm": 0.0, "learning_rate": 1.8944127654609346e-05, "loss": 1.0833, "step": 4426 }, { "epoch": 0.17321386649972612, "grad_norm": 0.0, "learning_rate": 1.8943560823252995e-05, "loss": 1.1138, "step": 4427 }, { "epoch": 0.17325299319195556, "grad_norm": 0.0, "learning_rate": 1.8942993848274263e-05, "loss": 1.1971, "step": 4428 }, { "epoch": 0.173292119884185, "grad_norm": 0.0, "learning_rate": 1.8942426729682256e-05, "loss": 1.162, "step": 4429 }, { "epoch": 0.17333124657641444, "grad_norm": 0.0, "learning_rate": 1.8941859467486083e-05, "loss": 1.1793, "step": 4430 }, { "epoch": 0.17337037326864388, "grad_norm": 0.0, "learning_rate": 1.894129206169485e-05, "loss": 1.1385, "step": 4431 }, { "epoch": 0.17340949996087332, "grad_norm": 0.0, "learning_rate": 1.8940724512317672e-05, "loss": 1.2037, "step": 4432 }, { "epoch": 0.17344862665310273, "grad_norm": 0.0, "learning_rate": 1.894015681936366e-05, "loss": 1.0979, "step": 4433 }, { "epoch": 0.17348775334533217, "grad_norm": 0.0, "learning_rate": 1.893958898284193e-05, "loss": 1.1006, "step": 4434 }, { "epoch": 0.17352688003756161, "grad_norm": 0.0, "learning_rate": 1.8939021002761605e-05, "loss": 1.1134, "step": 4435 }, { "epoch": 0.17356600672979106, "grad_norm": 0.0, "learning_rate": 1.8938452879131807e-05, "loss": 1.0721, "step": 4436 }, { "epoch": 0.1736051334220205, "grad_norm": 0.0, "learning_rate": 1.8937884611961653e-05, "loss": 1.3123, "step": 4437 }, { "epoch": 0.17364426011424994, "grad_norm": 0.0, "learning_rate": 1.8937316201260273e-05, "loss": 1.1208, "step": 4438 }, { "epoch": 0.17368338680647938, "grad_norm": 0.0, "learning_rate": 1.8936747647036793e-05, "loss": 1.0829, "step": 4439 }, { "epoch": 0.17372251349870882, "grad_norm": 0.0, "learning_rate": 1.8936178949300344e-05, "loss": 1.1677, "step": 4440 }, { "epoch": 0.17376164019093826, "grad_norm": 0.0, "learning_rate": 1.893561010806006e-05, "loss": 1.1608, "step": 4441 }, { "epoch": 0.1738007668831677, "grad_norm": 0.0, "learning_rate": 1.8935041123325073e-05, "loss": 1.1776, "step": 4442 }, { "epoch": 0.17383989357539714, "grad_norm": 0.0, "learning_rate": 1.8934471995104525e-05, "loss": 1.3594, "step": 4443 }, { "epoch": 0.17387902026762658, "grad_norm": 0.0, "learning_rate": 1.8933902723407547e-05, "loss": 1.0615, "step": 4444 }, { "epoch": 0.17391814695985602, "grad_norm": 0.0, "learning_rate": 1.893333330824329e-05, "loss": 1.2118, "step": 4445 }, { "epoch": 0.17395727365208546, "grad_norm": 0.0, "learning_rate": 1.8932763749620894e-05, "loss": 1.3845, "step": 4446 }, { "epoch": 0.1739964003443149, "grad_norm": 0.0, "learning_rate": 1.8932194047549504e-05, "loss": 1.132, "step": 4447 }, { "epoch": 0.17403552703654432, "grad_norm": 0.0, "learning_rate": 1.8931624202038272e-05, "loss": 1.1263, "step": 4448 }, { "epoch": 0.17407465372877376, "grad_norm": 0.0, "learning_rate": 1.8931054213096345e-05, "loss": 1.1619, "step": 4449 }, { "epoch": 0.1741137804210032, "grad_norm": 0.0, "learning_rate": 1.893048408073288e-05, "loss": 1.1931, "step": 4450 }, { "epoch": 0.17415290711323264, "grad_norm": 0.0, "learning_rate": 1.8929913804957028e-05, "loss": 1.1163, "step": 4451 }, { "epoch": 0.17419203380546208, "grad_norm": 0.0, "learning_rate": 1.8929343385777957e-05, "loss": 1.1378, "step": 4452 }, { "epoch": 0.17423116049769152, "grad_norm": 0.0, "learning_rate": 1.8928772823204814e-05, "loss": 1.2235, "step": 4453 }, { "epoch": 0.17427028718992096, "grad_norm": 0.0, "learning_rate": 1.8928202117246773e-05, "loss": 1.2335, "step": 4454 }, { "epoch": 0.1743094138821504, "grad_norm": 0.0, "learning_rate": 1.892763126791299e-05, "loss": 1.3854, "step": 4455 }, { "epoch": 0.17434854057437985, "grad_norm": 0.0, "learning_rate": 1.8927060275212637e-05, "loss": 1.1325, "step": 4456 }, { "epoch": 0.1743876672666093, "grad_norm": 0.0, "learning_rate": 1.892648913915488e-05, "loss": 1.1536, "step": 4457 }, { "epoch": 0.17442679395883873, "grad_norm": 0.0, "learning_rate": 1.8925917859748898e-05, "loss": 1.2169, "step": 4458 }, { "epoch": 0.17446592065106817, "grad_norm": 0.0, "learning_rate": 1.8925346437003856e-05, "loss": 1.1578, "step": 4459 }, { "epoch": 0.1745050473432976, "grad_norm": 0.0, "learning_rate": 1.8924774870928934e-05, "loss": 1.0969, "step": 4460 }, { "epoch": 0.17454417403552705, "grad_norm": 0.0, "learning_rate": 1.8924203161533312e-05, "loss": 1.2028, "step": 4461 }, { "epoch": 0.17458330072775646, "grad_norm": 0.0, "learning_rate": 1.892363130882617e-05, "loss": 1.247, "step": 4462 }, { "epoch": 0.1746224274199859, "grad_norm": 0.0, "learning_rate": 1.892305931281669e-05, "loss": 1.1283, "step": 4463 }, { "epoch": 0.17466155411221534, "grad_norm": 0.0, "learning_rate": 1.892248717351406e-05, "loss": 1.0954, "step": 4464 }, { "epoch": 0.17470068080444479, "grad_norm": 0.0, "learning_rate": 1.892191489092747e-05, "loss": 1.1097, "step": 4465 }, { "epoch": 0.17473980749667423, "grad_norm": 0.0, "learning_rate": 1.89213424650661e-05, "loss": 1.0547, "step": 4466 }, { "epoch": 0.17477893418890367, "grad_norm": 0.0, "learning_rate": 1.8920769895939154e-05, "loss": 1.2075, "step": 4467 }, { "epoch": 0.1748180608811331, "grad_norm": 0.0, "learning_rate": 1.8920197183555818e-05, "loss": 1.2498, "step": 4468 }, { "epoch": 0.17485718757336255, "grad_norm": 0.0, "learning_rate": 1.8919624327925295e-05, "loss": 1.1829, "step": 4469 }, { "epoch": 0.174896314265592, "grad_norm": 0.0, "learning_rate": 1.891905132905678e-05, "loss": 1.1215, "step": 4470 }, { "epoch": 0.17493544095782143, "grad_norm": 0.0, "learning_rate": 1.891847818695948e-05, "loss": 1.1792, "step": 4471 }, { "epoch": 0.17497456765005087, "grad_norm": 0.0, "learning_rate": 1.8917904901642593e-05, "loss": 1.1142, "step": 4472 }, { "epoch": 0.1750136943422803, "grad_norm": 0.0, "learning_rate": 1.8917331473115332e-05, "loss": 1.1709, "step": 4473 }, { "epoch": 0.17505282103450975, "grad_norm": 0.0, "learning_rate": 1.89167579013869e-05, "loss": 1.2002, "step": 4474 }, { "epoch": 0.1750919477267392, "grad_norm": 0.0, "learning_rate": 1.8916184186466507e-05, "loss": 1.1753, "step": 4475 }, { "epoch": 0.1751310744189686, "grad_norm": 0.0, "learning_rate": 1.891561032836337e-05, "loss": 1.1944, "step": 4476 }, { "epoch": 0.17517020111119805, "grad_norm": 0.0, "learning_rate": 1.8915036327086702e-05, "loss": 0.8831, "step": 4477 }, { "epoch": 0.1752093278034275, "grad_norm": 0.0, "learning_rate": 1.8914462182645724e-05, "loss": 1.235, "step": 4478 }, { "epoch": 0.17524845449565693, "grad_norm": 0.0, "learning_rate": 1.8913887895049652e-05, "loss": 1.1368, "step": 4479 }, { "epoch": 0.17528758118788637, "grad_norm": 0.0, "learning_rate": 1.8913313464307713e-05, "loss": 1.0982, "step": 4480 }, { "epoch": 0.1753267078801158, "grad_norm": 0.0, "learning_rate": 1.8912738890429125e-05, "loss": 1.1565, "step": 4481 }, { "epoch": 0.17536583457234525, "grad_norm": 0.0, "learning_rate": 1.891216417342312e-05, "loss": 1.1837, "step": 4482 }, { "epoch": 0.1754049612645747, "grad_norm": 0.0, "learning_rate": 1.8911589313298927e-05, "loss": 1.3203, "step": 4483 }, { "epoch": 0.17544408795680413, "grad_norm": 0.0, "learning_rate": 1.8911014310065776e-05, "loss": 1.274, "step": 4484 }, { "epoch": 0.17548321464903358, "grad_norm": 0.0, "learning_rate": 1.89104391637329e-05, "loss": 1.1756, "step": 4485 }, { "epoch": 0.17552234134126302, "grad_norm": 0.0, "learning_rate": 1.8909863874309538e-05, "loss": 1.2891, "step": 4486 }, { "epoch": 0.17556146803349246, "grad_norm": 0.0, "learning_rate": 1.8909288441804927e-05, "loss": 1.0999, "step": 4487 }, { "epoch": 0.1756005947257219, "grad_norm": 0.0, "learning_rate": 1.8908712866228306e-05, "loss": 1.2434, "step": 4488 }, { "epoch": 0.17563972141795134, "grad_norm": 0.0, "learning_rate": 1.8908137147588922e-05, "loss": 1.1646, "step": 4489 }, { "epoch": 0.17567884811018075, "grad_norm": 0.0, "learning_rate": 1.8907561285896014e-05, "loss": 1.2185, "step": 4490 }, { "epoch": 0.1757179748024102, "grad_norm": 0.0, "learning_rate": 1.8906985281158837e-05, "loss": 1.0901, "step": 4491 }, { "epoch": 0.17575710149463963, "grad_norm": 0.0, "learning_rate": 1.890640913338664e-05, "loss": 1.2159, "step": 4492 }, { "epoch": 0.17579622818686907, "grad_norm": 0.0, "learning_rate": 1.8905832842588666e-05, "loss": 1.0787, "step": 4493 }, { "epoch": 0.17583535487909852, "grad_norm": 0.0, "learning_rate": 1.890525640877418e-05, "loss": 1.1685, "step": 4494 }, { "epoch": 0.17587448157132796, "grad_norm": 0.0, "learning_rate": 1.890467983195243e-05, "loss": 1.1062, "step": 4495 }, { "epoch": 0.1759136082635574, "grad_norm": 0.0, "learning_rate": 1.8904103112132687e-05, "loss": 1.192, "step": 4496 }, { "epoch": 0.17595273495578684, "grad_norm": 0.0, "learning_rate": 1.8903526249324203e-05, "loss": 1.1556, "step": 4497 }, { "epoch": 0.17599186164801628, "grad_norm": 0.0, "learning_rate": 1.8902949243536245e-05, "loss": 1.0969, "step": 4498 }, { "epoch": 0.17603098834024572, "grad_norm": 0.0, "learning_rate": 1.8902372094778078e-05, "loss": 1.0747, "step": 4499 }, { "epoch": 0.17607011503247516, "grad_norm": 0.0, "learning_rate": 1.8901794803058967e-05, "loss": 1.1879, "step": 4500 }, { "epoch": 0.1761092417247046, "grad_norm": 0.0, "learning_rate": 1.8901217368388193e-05, "loss": 1.0059, "step": 4501 }, { "epoch": 0.17614836841693404, "grad_norm": 0.0, "learning_rate": 1.8900639790775014e-05, "loss": 1.3317, "step": 4502 }, { "epoch": 0.17618749510916348, "grad_norm": 0.0, "learning_rate": 1.8900062070228716e-05, "loss": 1.2022, "step": 4503 }, { "epoch": 0.1762266218013929, "grad_norm": 0.0, "learning_rate": 1.8899484206758574e-05, "loss": 1.1211, "step": 4504 }, { "epoch": 0.17626574849362234, "grad_norm": 0.0, "learning_rate": 1.889890620037387e-05, "loss": 1.2247, "step": 4505 }, { "epoch": 0.17630487518585178, "grad_norm": 0.0, "learning_rate": 1.889832805108388e-05, "loss": 1.1786, "step": 4506 }, { "epoch": 0.17634400187808122, "grad_norm": 0.0, "learning_rate": 1.8897749758897893e-05, "loss": 1.0692, "step": 4507 }, { "epoch": 0.17638312857031066, "grad_norm": 0.0, "learning_rate": 1.8897171323825195e-05, "loss": 1.1556, "step": 4508 }, { "epoch": 0.1764222552625401, "grad_norm": 0.0, "learning_rate": 1.8896592745875075e-05, "loss": 1.1383, "step": 4509 }, { "epoch": 0.17646138195476954, "grad_norm": 0.0, "learning_rate": 1.8896014025056822e-05, "loss": 1.1144, "step": 4510 }, { "epoch": 0.17650050864699898, "grad_norm": 0.0, "learning_rate": 1.889543516137973e-05, "loss": 1.2275, "step": 4511 }, { "epoch": 0.17653963533922842, "grad_norm": 0.0, "learning_rate": 1.8894856154853097e-05, "loss": 1.1922, "step": 4512 }, { "epoch": 0.17657876203145786, "grad_norm": 0.0, "learning_rate": 1.889427700548622e-05, "loss": 1.2329, "step": 4513 }, { "epoch": 0.1766178887236873, "grad_norm": 0.0, "learning_rate": 1.88936977132884e-05, "loss": 1.2418, "step": 4514 }, { "epoch": 0.17665701541591675, "grad_norm": 0.0, "learning_rate": 1.8893118278268937e-05, "loss": 1.1539, "step": 4515 }, { "epoch": 0.1766961421081462, "grad_norm": 0.0, "learning_rate": 1.889253870043714e-05, "loss": 1.319, "step": 4516 }, { "epoch": 0.17673526880037563, "grad_norm": 0.0, "learning_rate": 1.8891958979802316e-05, "loss": 1.1099, "step": 4517 }, { "epoch": 0.17677439549260507, "grad_norm": 0.0, "learning_rate": 1.8891379116373768e-05, "loss": 1.3125, "step": 4518 }, { "epoch": 0.17681352218483448, "grad_norm": 0.0, "learning_rate": 1.8890799110160815e-05, "loss": 1.1499, "step": 4519 }, { "epoch": 0.17685264887706392, "grad_norm": 0.0, "learning_rate": 1.889021896117277e-05, "loss": 1.0594, "step": 4520 }, { "epoch": 0.17689177556929336, "grad_norm": 0.0, "learning_rate": 1.888963866941895e-05, "loss": 1.226, "step": 4521 }, { "epoch": 0.1769309022615228, "grad_norm": 0.0, "learning_rate": 1.888905823490867e-05, "loss": 1.0658, "step": 4522 }, { "epoch": 0.17697002895375225, "grad_norm": 0.0, "learning_rate": 1.8888477657651255e-05, "loss": 1.1181, "step": 4523 }, { "epoch": 0.1770091556459817, "grad_norm": 0.0, "learning_rate": 1.8887896937656028e-05, "loss": 1.3084, "step": 4524 }, { "epoch": 0.17704828233821113, "grad_norm": 0.0, "learning_rate": 1.888731607493231e-05, "loss": 1.187, "step": 4525 }, { "epoch": 0.17708740903044057, "grad_norm": 0.0, "learning_rate": 1.8886735069489433e-05, "loss": 1.0996, "step": 4526 }, { "epoch": 0.17712653572267, "grad_norm": 0.0, "learning_rate": 1.888615392133673e-05, "loss": 1.216, "step": 4527 }, { "epoch": 0.17716566241489945, "grad_norm": 0.0, "learning_rate": 1.8885572630483527e-05, "loss": 1.2082, "step": 4528 }, { "epoch": 0.1772047891071289, "grad_norm": 0.0, "learning_rate": 1.8884991196939163e-05, "loss": 1.1439, "step": 4529 }, { "epoch": 0.17724391579935833, "grad_norm": 0.0, "learning_rate": 1.888440962071297e-05, "loss": 1.158, "step": 4530 }, { "epoch": 0.17728304249158777, "grad_norm": 0.0, "learning_rate": 1.8883827901814296e-05, "loss": 1.1774, "step": 4531 }, { "epoch": 0.1773221691838172, "grad_norm": 0.0, "learning_rate": 1.8883246040252478e-05, "loss": 1.1643, "step": 4532 }, { "epoch": 0.17736129587604663, "grad_norm": 0.0, "learning_rate": 1.8882664036036854e-05, "loss": 1.1993, "step": 4533 }, { "epoch": 0.17740042256827607, "grad_norm": 0.0, "learning_rate": 1.888208188917678e-05, "loss": 1.1875, "step": 4534 }, { "epoch": 0.1774395492605055, "grad_norm": 0.0, "learning_rate": 1.8881499599681603e-05, "loss": 1.1647, "step": 4535 }, { "epoch": 0.17747867595273495, "grad_norm": 0.0, "learning_rate": 1.8880917167560668e-05, "loss": 1.218, "step": 4536 }, { "epoch": 0.1775178026449644, "grad_norm": 0.0, "learning_rate": 1.8880334592823333e-05, "loss": 1.1763, "step": 4537 }, { "epoch": 0.17755692933719383, "grad_norm": 0.0, "learning_rate": 1.8879751875478954e-05, "loss": 1.0676, "step": 4538 }, { "epoch": 0.17759605602942327, "grad_norm": 0.0, "learning_rate": 1.8879169015536884e-05, "loss": 1.1329, "step": 4539 }, { "epoch": 0.1776351827216527, "grad_norm": 0.0, "learning_rate": 1.8878586013006487e-05, "loss": 1.1038, "step": 4540 }, { "epoch": 0.17767430941388215, "grad_norm": 0.0, "learning_rate": 1.8878002867897124e-05, "loss": 0.9558, "step": 4541 }, { "epoch": 0.1777134361061116, "grad_norm": 0.0, "learning_rate": 1.887741958021816e-05, "loss": 1.2072, "step": 4542 }, { "epoch": 0.17775256279834104, "grad_norm": 0.0, "learning_rate": 1.8876836149978964e-05, "loss": 1.1981, "step": 4543 }, { "epoch": 0.17779168949057048, "grad_norm": 0.0, "learning_rate": 1.8876252577188897e-05, "loss": 1.2228, "step": 4544 }, { "epoch": 0.17783081618279992, "grad_norm": 0.0, "learning_rate": 1.887566886185734e-05, "loss": 1.1869, "step": 4545 }, { "epoch": 0.17786994287502936, "grad_norm": 0.0, "learning_rate": 1.8875085003993663e-05, "loss": 1.1678, "step": 4546 }, { "epoch": 0.17790906956725877, "grad_norm": 0.0, "learning_rate": 1.8874501003607242e-05, "loss": 1.161, "step": 4547 }, { "epoch": 0.1779481962594882, "grad_norm": 0.0, "learning_rate": 1.8873916860707453e-05, "loss": 1.1193, "step": 4548 }, { "epoch": 0.17798732295171765, "grad_norm": 0.0, "learning_rate": 1.8873332575303682e-05, "loss": 1.1425, "step": 4549 }, { "epoch": 0.1780264496439471, "grad_norm": 0.0, "learning_rate": 1.8872748147405303e-05, "loss": 1.1277, "step": 4550 }, { "epoch": 0.17806557633617653, "grad_norm": 0.0, "learning_rate": 1.8872163577021714e-05, "loss": 1.1665, "step": 4551 }, { "epoch": 0.17810470302840598, "grad_norm": 0.0, "learning_rate": 1.8871578864162292e-05, "loss": 1.3033, "step": 4552 }, { "epoch": 0.17814382972063542, "grad_norm": 0.0, "learning_rate": 1.887099400883643e-05, "loss": 1.2838, "step": 4553 }, { "epoch": 0.17818295641286486, "grad_norm": 0.0, "learning_rate": 1.8870409011053522e-05, "loss": 1.0154, "step": 4554 }, { "epoch": 0.1782220831050943, "grad_norm": 0.0, "learning_rate": 1.8869823870822958e-05, "loss": 1.0679, "step": 4555 }, { "epoch": 0.17826120979732374, "grad_norm": 0.0, "learning_rate": 1.8869238588154138e-05, "loss": 1.1614, "step": 4556 }, { "epoch": 0.17830033648955318, "grad_norm": 0.0, "learning_rate": 1.886865316305646e-05, "loss": 1.2178, "step": 4557 }, { "epoch": 0.17833946318178262, "grad_norm": 0.0, "learning_rate": 1.8868067595539327e-05, "loss": 1.166, "step": 4558 }, { "epoch": 0.17837858987401206, "grad_norm": 0.0, "learning_rate": 1.886748188561214e-05, "loss": 1.1086, "step": 4559 }, { "epoch": 0.1784177165662415, "grad_norm": 0.0, "learning_rate": 1.8866896033284305e-05, "loss": 1.0898, "step": 4560 }, { "epoch": 0.17845684325847092, "grad_norm": 0.0, "learning_rate": 1.886631003856523e-05, "loss": 1.0368, "step": 4561 }, { "epoch": 0.17849596995070036, "grad_norm": 0.0, "learning_rate": 1.8865723901464326e-05, "loss": 1.152, "step": 4562 }, { "epoch": 0.1785350966429298, "grad_norm": 0.0, "learning_rate": 1.886513762199101e-05, "loss": 1.2094, "step": 4563 }, { "epoch": 0.17857422333515924, "grad_norm": 0.0, "learning_rate": 1.8864551200154687e-05, "loss": 1.202, "step": 4564 }, { "epoch": 0.17861335002738868, "grad_norm": 0.0, "learning_rate": 1.8863964635964777e-05, "loss": 1.1681, "step": 4565 }, { "epoch": 0.17865247671961812, "grad_norm": 0.0, "learning_rate": 1.8863377929430706e-05, "loss": 1.2178, "step": 4566 }, { "epoch": 0.17869160341184756, "grad_norm": 0.0, "learning_rate": 1.8862791080561894e-05, "loss": 1.2032, "step": 4567 }, { "epoch": 0.178730730104077, "grad_norm": 0.0, "learning_rate": 1.886220408936776e-05, "loss": 1.2034, "step": 4568 }, { "epoch": 0.17876985679630644, "grad_norm": 0.0, "learning_rate": 1.8861616955857734e-05, "loss": 1.1194, "step": 4569 }, { "epoch": 0.17880898348853588, "grad_norm": 0.0, "learning_rate": 1.8861029680041242e-05, "loss": 1.2424, "step": 4570 }, { "epoch": 0.17884811018076532, "grad_norm": 0.0, "learning_rate": 1.8860442261927722e-05, "loss": 1.0947, "step": 4571 }, { "epoch": 0.17888723687299476, "grad_norm": 0.0, "learning_rate": 1.8859854701526598e-05, "loss": 1.1826, "step": 4572 }, { "epoch": 0.1789263635652242, "grad_norm": 0.0, "learning_rate": 1.885926699884731e-05, "loss": 1.0986, "step": 4573 }, { "epoch": 0.17896549025745365, "grad_norm": 0.0, "learning_rate": 1.8858679153899295e-05, "loss": 1.2298, "step": 4574 }, { "epoch": 0.1790046169496831, "grad_norm": 0.0, "learning_rate": 1.8858091166691995e-05, "loss": 1.2611, "step": 4575 }, { "epoch": 0.1790437436419125, "grad_norm": 0.0, "learning_rate": 1.885750303723485e-05, "loss": 1.2273, "step": 4576 }, { "epoch": 0.17908287033414194, "grad_norm": 0.0, "learning_rate": 1.8856914765537303e-05, "loss": 1.2407, "step": 4577 }, { "epoch": 0.17912199702637138, "grad_norm": 0.0, "learning_rate": 1.8856326351608806e-05, "loss": 0.9752, "step": 4578 }, { "epoch": 0.17916112371860082, "grad_norm": 0.0, "learning_rate": 1.8855737795458806e-05, "loss": 1.3062, "step": 4579 }, { "epoch": 0.17920025041083026, "grad_norm": 0.0, "learning_rate": 1.885514909709675e-05, "loss": 1.2061, "step": 4580 }, { "epoch": 0.1792393771030597, "grad_norm": 0.0, "learning_rate": 1.8854560256532098e-05, "loss": 1.1608, "step": 4581 }, { "epoch": 0.17927850379528915, "grad_norm": 0.0, "learning_rate": 1.8853971273774306e-05, "loss": 1.1391, "step": 4582 }, { "epoch": 0.1793176304875186, "grad_norm": 0.0, "learning_rate": 1.885338214883283e-05, "loss": 1.2205, "step": 4583 }, { "epoch": 0.17935675717974803, "grad_norm": 0.0, "learning_rate": 1.8852792881717125e-05, "loss": 1.2137, "step": 4584 }, { "epoch": 0.17939588387197747, "grad_norm": 0.0, "learning_rate": 1.8852203472436662e-05, "loss": 1.1208, "step": 4585 }, { "epoch": 0.1794350105642069, "grad_norm": 0.0, "learning_rate": 1.8851613921000906e-05, "loss": 1.1845, "step": 4586 }, { "epoch": 0.17947413725643635, "grad_norm": 0.0, "learning_rate": 1.8851024227419322e-05, "loss": 1.3884, "step": 4587 }, { "epoch": 0.1795132639486658, "grad_norm": 0.0, "learning_rate": 1.885043439170138e-05, "loss": 1.1501, "step": 4588 }, { "epoch": 0.17955239064089523, "grad_norm": 0.0, "learning_rate": 1.8849844413856548e-05, "loss": 1.2246, "step": 4589 }, { "epoch": 0.17959151733312465, "grad_norm": 0.0, "learning_rate": 1.8849254293894307e-05, "loss": 1.0839, "step": 4590 }, { "epoch": 0.17963064402535409, "grad_norm": 0.0, "learning_rate": 1.8848664031824132e-05, "loss": 1.2349, "step": 4591 }, { "epoch": 0.17966977071758353, "grad_norm": 0.0, "learning_rate": 1.8848073627655496e-05, "loss": 1.2089, "step": 4592 }, { "epoch": 0.17970889740981297, "grad_norm": 0.0, "learning_rate": 1.884748308139789e-05, "loss": 1.1687, "step": 4593 }, { "epoch": 0.1797480241020424, "grad_norm": 0.0, "learning_rate": 1.884689239306079e-05, "loss": 1.2243, "step": 4594 }, { "epoch": 0.17978715079427185, "grad_norm": 0.0, "learning_rate": 1.8846301562653682e-05, "loss": 1.122, "step": 4595 }, { "epoch": 0.1798262774865013, "grad_norm": 0.0, "learning_rate": 1.8845710590186058e-05, "loss": 1.1019, "step": 4596 }, { "epoch": 0.17986540417873073, "grad_norm": 0.0, "learning_rate": 1.8845119475667407e-05, "loss": 1.178, "step": 4597 }, { "epoch": 0.17990453087096017, "grad_norm": 0.0, "learning_rate": 1.884452821910722e-05, "loss": 1.2399, "step": 4598 }, { "epoch": 0.1799436575631896, "grad_norm": 0.0, "learning_rate": 1.884393682051499e-05, "loss": 1.2457, "step": 4599 }, { "epoch": 0.17998278425541905, "grad_norm": 0.0, "learning_rate": 1.884334527990022e-05, "loss": 1.1498, "step": 4600 }, { "epoch": 0.1800219109476485, "grad_norm": 0.0, "learning_rate": 1.8842753597272408e-05, "loss": 1.2216, "step": 4601 }, { "epoch": 0.18006103763987794, "grad_norm": 0.0, "learning_rate": 1.884216177264105e-05, "loss": 1.1417, "step": 4602 }, { "epoch": 0.18010016433210738, "grad_norm": 0.0, "learning_rate": 1.8841569806015652e-05, "loss": 1.1022, "step": 4603 }, { "epoch": 0.1801392910243368, "grad_norm": 0.0, "learning_rate": 1.8840977697405728e-05, "loss": 1.1118, "step": 4604 }, { "epoch": 0.18017841771656623, "grad_norm": 0.0, "learning_rate": 1.884038544682078e-05, "loss": 1.1564, "step": 4605 }, { "epoch": 0.18021754440879567, "grad_norm": 0.0, "learning_rate": 1.8839793054270315e-05, "loss": 1.238, "step": 4606 }, { "epoch": 0.1802566711010251, "grad_norm": 0.0, "learning_rate": 1.8839200519763852e-05, "loss": 1.2336, "step": 4607 }, { "epoch": 0.18029579779325455, "grad_norm": 0.0, "learning_rate": 1.8838607843310907e-05, "loss": 1.0875, "step": 4608 }, { "epoch": 0.180334924485484, "grad_norm": 0.0, "learning_rate": 1.8838015024920993e-05, "loss": 1.166, "step": 4609 }, { "epoch": 0.18037405117771343, "grad_norm": 0.0, "learning_rate": 1.883742206460363e-05, "loss": 1.1813, "step": 4610 }, { "epoch": 0.18041317786994288, "grad_norm": 0.0, "learning_rate": 1.8836828962368344e-05, "loss": 1.2397, "step": 4611 }, { "epoch": 0.18045230456217232, "grad_norm": 0.0, "learning_rate": 1.8836235718224664e-05, "loss": 1.1896, "step": 4612 }, { "epoch": 0.18049143125440176, "grad_norm": 0.0, "learning_rate": 1.8835642332182105e-05, "loss": 1.1949, "step": 4613 }, { "epoch": 0.1805305579466312, "grad_norm": 0.0, "learning_rate": 1.8835048804250204e-05, "loss": 1.218, "step": 4614 }, { "epoch": 0.18056968463886064, "grad_norm": 0.0, "learning_rate": 1.883445513443849e-05, "loss": 1.0898, "step": 4615 }, { "epoch": 0.18060881133109008, "grad_norm": 0.0, "learning_rate": 1.8833861322756496e-05, "loss": 1.0529, "step": 4616 }, { "epoch": 0.18064793802331952, "grad_norm": 0.0, "learning_rate": 1.883326736921376e-05, "loss": 1.1663, "step": 4617 }, { "epoch": 0.18068706471554893, "grad_norm": 0.0, "learning_rate": 1.8832673273819812e-05, "loss": 1.1663, "step": 4618 }, { "epoch": 0.18072619140777837, "grad_norm": 0.0, "learning_rate": 1.8832079036584208e-05, "loss": 1.208, "step": 4619 }, { "epoch": 0.18076531810000782, "grad_norm": 0.0, "learning_rate": 1.8831484657516478e-05, "loss": 1.17, "step": 4620 }, { "epoch": 0.18080444479223726, "grad_norm": 0.0, "learning_rate": 1.883089013662617e-05, "loss": 1.1344, "step": 4621 }, { "epoch": 0.1808435714844667, "grad_norm": 0.0, "learning_rate": 1.8830295473922833e-05, "loss": 1.1466, "step": 4622 }, { "epoch": 0.18088269817669614, "grad_norm": 0.0, "learning_rate": 1.8829700669416017e-05, "loss": 1.1217, "step": 4623 }, { "epoch": 0.18092182486892558, "grad_norm": 0.0, "learning_rate": 1.8829105723115272e-05, "loss": 1.2742, "step": 4624 }, { "epoch": 0.18096095156115502, "grad_norm": 0.0, "learning_rate": 1.882851063503015e-05, "loss": 1.2891, "step": 4625 }, { "epoch": 0.18100007825338446, "grad_norm": 0.0, "learning_rate": 1.8827915405170212e-05, "loss": 1.1975, "step": 4626 }, { "epoch": 0.1810392049456139, "grad_norm": 0.0, "learning_rate": 1.8827320033545015e-05, "loss": 1.2396, "step": 4627 }, { "epoch": 0.18107833163784334, "grad_norm": 0.0, "learning_rate": 1.8826724520164118e-05, "loss": 1.0533, "step": 4628 }, { "epoch": 0.18111745833007278, "grad_norm": 0.0, "learning_rate": 1.882612886503709e-05, "loss": 1.0398, "step": 4629 }, { "epoch": 0.18115658502230222, "grad_norm": 0.0, "learning_rate": 1.8825533068173486e-05, "loss": 1.1506, "step": 4630 }, { "epoch": 0.18119571171453167, "grad_norm": 0.0, "learning_rate": 1.8824937129582886e-05, "loss": 1.2109, "step": 4631 }, { "epoch": 0.1812348384067611, "grad_norm": 0.0, "learning_rate": 1.882434104927485e-05, "loss": 1.2931, "step": 4632 }, { "epoch": 0.18127396509899052, "grad_norm": 0.0, "learning_rate": 1.8823744827258954e-05, "loss": 1.145, "step": 4633 }, { "epoch": 0.18131309179121996, "grad_norm": 0.0, "learning_rate": 1.8823148463544775e-05, "loss": 1.0826, "step": 4634 }, { "epoch": 0.1813522184834494, "grad_norm": 0.0, "learning_rate": 1.882255195814189e-05, "loss": 1.2265, "step": 4635 }, { "epoch": 0.18139134517567884, "grad_norm": 0.0, "learning_rate": 1.882195531105987e-05, "loss": 1.1564, "step": 4636 }, { "epoch": 0.18143047186790828, "grad_norm": 0.0, "learning_rate": 1.8821358522308306e-05, "loss": 1.2659, "step": 4637 }, { "epoch": 0.18146959856013772, "grad_norm": 0.0, "learning_rate": 1.8820761591896775e-05, "loss": 1.0577, "step": 4638 }, { "epoch": 0.18150872525236716, "grad_norm": 0.0, "learning_rate": 1.8820164519834868e-05, "loss": 1.1875, "step": 4639 }, { "epoch": 0.1815478519445966, "grad_norm": 0.0, "learning_rate": 1.881956730613217e-05, "loss": 1.1053, "step": 4640 }, { "epoch": 0.18158697863682605, "grad_norm": 0.0, "learning_rate": 1.8818969950798274e-05, "loss": 1.2126, "step": 4641 }, { "epoch": 0.1816261053290555, "grad_norm": 0.0, "learning_rate": 1.881837245384277e-05, "loss": 1.1569, "step": 4642 }, { "epoch": 0.18166523202128493, "grad_norm": 0.0, "learning_rate": 1.8817774815275256e-05, "loss": 1.1471, "step": 4643 }, { "epoch": 0.18170435871351437, "grad_norm": 0.0, "learning_rate": 1.881717703510533e-05, "loss": 1.1234, "step": 4644 }, { "epoch": 0.1817434854057438, "grad_norm": 0.0, "learning_rate": 1.881657911334258e-05, "loss": 1.2017, "step": 4645 }, { "epoch": 0.18178261209797325, "grad_norm": 0.0, "learning_rate": 1.8815981049996627e-05, "loss": 1.1543, "step": 4646 }, { "epoch": 0.18182173879020266, "grad_norm": 0.0, "learning_rate": 1.881538284507706e-05, "loss": 1.0834, "step": 4647 }, { "epoch": 0.1818608654824321, "grad_norm": 0.0, "learning_rate": 1.8814784498593494e-05, "loss": 1.1343, "step": 4648 }, { "epoch": 0.18189999217466155, "grad_norm": 0.0, "learning_rate": 1.8814186010555533e-05, "loss": 1.2212, "step": 4649 }, { "epoch": 0.181939118866891, "grad_norm": 0.0, "learning_rate": 1.881358738097279e-05, "loss": 1.2234, "step": 4650 }, { "epoch": 0.18197824555912043, "grad_norm": 0.0, "learning_rate": 1.8812988609854877e-05, "loss": 1.1791, "step": 4651 }, { "epoch": 0.18201737225134987, "grad_norm": 0.0, "learning_rate": 1.881238969721141e-05, "loss": 1.1073, "step": 4652 }, { "epoch": 0.1820564989435793, "grad_norm": 0.0, "learning_rate": 1.881179064305201e-05, "loss": 1.1268, "step": 4653 }, { "epoch": 0.18209562563580875, "grad_norm": 0.0, "learning_rate": 1.881119144738629e-05, "loss": 1.1137, "step": 4654 }, { "epoch": 0.1821347523280382, "grad_norm": 0.0, "learning_rate": 1.8810592110223876e-05, "loss": 1.185, "step": 4655 }, { "epoch": 0.18217387902026763, "grad_norm": 0.0, "learning_rate": 1.8809992631574395e-05, "loss": 1.0962, "step": 4656 }, { "epoch": 0.18221300571249707, "grad_norm": 0.0, "learning_rate": 1.880939301144747e-05, "loss": 1.2541, "step": 4657 }, { "epoch": 0.1822521324047265, "grad_norm": 0.0, "learning_rate": 1.8808793249852737e-05, "loss": 1.2389, "step": 4658 }, { "epoch": 0.18229125909695595, "grad_norm": 0.0, "learning_rate": 1.880819334679982e-05, "loss": 1.1641, "step": 4659 }, { "epoch": 0.1823303857891854, "grad_norm": 0.0, "learning_rate": 1.8807593302298354e-05, "loss": 1.1259, "step": 4660 }, { "epoch": 0.1823695124814148, "grad_norm": 0.0, "learning_rate": 1.8806993116357975e-05, "loss": 1.1758, "step": 4661 }, { "epoch": 0.18240863917364425, "grad_norm": 0.0, "learning_rate": 1.8806392788988325e-05, "loss": 1.1133, "step": 4662 }, { "epoch": 0.1824477658658737, "grad_norm": 0.0, "learning_rate": 1.880579232019904e-05, "loss": 1.0837, "step": 4663 }, { "epoch": 0.18248689255810313, "grad_norm": 0.0, "learning_rate": 1.8805191709999767e-05, "loss": 1.1856, "step": 4664 }, { "epoch": 0.18252601925033257, "grad_norm": 0.0, "learning_rate": 1.8804590958400147e-05, "loss": 1.2021, "step": 4665 }, { "epoch": 0.182565145942562, "grad_norm": 0.0, "learning_rate": 1.8803990065409826e-05, "loss": 1.2362, "step": 4666 }, { "epoch": 0.18260427263479145, "grad_norm": 0.0, "learning_rate": 1.8803389031038462e-05, "loss": 1.0806, "step": 4667 }, { "epoch": 0.1826433993270209, "grad_norm": 0.0, "learning_rate": 1.88027878552957e-05, "loss": 1.3218, "step": 4668 }, { "epoch": 0.18268252601925034, "grad_norm": 0.0, "learning_rate": 1.880218653819119e-05, "loss": 1.0877, "step": 4669 }, { "epoch": 0.18272165271147978, "grad_norm": 0.0, "learning_rate": 1.88015850797346e-05, "loss": 0.9909, "step": 4670 }, { "epoch": 0.18276077940370922, "grad_norm": 0.0, "learning_rate": 1.8800983479935585e-05, "loss": 1.1028, "step": 4671 }, { "epoch": 0.18279990609593866, "grad_norm": 0.0, "learning_rate": 1.88003817388038e-05, "loss": 1.2654, "step": 4672 }, { "epoch": 0.1828390327881681, "grad_norm": 0.0, "learning_rate": 1.879977985634891e-05, "loss": 1.1433, "step": 4673 }, { "epoch": 0.18287815948039754, "grad_norm": 0.0, "learning_rate": 1.8799177832580585e-05, "loss": 1.2122, "step": 4674 }, { "epoch": 0.18291728617262695, "grad_norm": 0.0, "learning_rate": 1.8798575667508486e-05, "loss": 1.147, "step": 4675 }, { "epoch": 0.1829564128648564, "grad_norm": 0.0, "learning_rate": 1.879797336114229e-05, "loss": 1.2271, "step": 4676 }, { "epoch": 0.18299553955708583, "grad_norm": 0.0, "learning_rate": 1.8797370913491666e-05, "loss": 1.1489, "step": 4677 }, { "epoch": 0.18303466624931528, "grad_norm": 0.0, "learning_rate": 1.879676832456629e-05, "loss": 1.1574, "step": 4678 }, { "epoch": 0.18307379294154472, "grad_norm": 0.0, "learning_rate": 1.8796165594375835e-05, "loss": 1.0451, "step": 4679 }, { "epoch": 0.18311291963377416, "grad_norm": 0.0, "learning_rate": 1.8795562722929986e-05, "loss": 1.2725, "step": 4680 }, { "epoch": 0.1831520463260036, "grad_norm": 0.0, "learning_rate": 1.8794959710238417e-05, "loss": 1.0546, "step": 4681 }, { "epoch": 0.18319117301823304, "grad_norm": 0.0, "learning_rate": 1.879435655631082e-05, "loss": 1.1812, "step": 4682 }, { "epoch": 0.18323029971046248, "grad_norm": 0.0, "learning_rate": 1.8793753261156874e-05, "loss": 1.1942, "step": 4683 }, { "epoch": 0.18326942640269192, "grad_norm": 0.0, "learning_rate": 1.879314982478627e-05, "loss": 1.0081, "step": 4684 }, { "epoch": 0.18330855309492136, "grad_norm": 0.0, "learning_rate": 1.87925462472087e-05, "loss": 1.2726, "step": 4685 }, { "epoch": 0.1833476797871508, "grad_norm": 0.0, "learning_rate": 1.8791942528433854e-05, "loss": 1.2358, "step": 4686 }, { "epoch": 0.18338680647938024, "grad_norm": 0.0, "learning_rate": 1.8791338668471427e-05, "loss": 1.158, "step": 4687 }, { "epoch": 0.18342593317160968, "grad_norm": 0.0, "learning_rate": 1.879073466733112e-05, "loss": 1.1735, "step": 4688 }, { "epoch": 0.1834650598638391, "grad_norm": 0.0, "learning_rate": 1.8790130525022625e-05, "loss": 1.1477, "step": 4689 }, { "epoch": 0.18350418655606854, "grad_norm": 0.0, "learning_rate": 1.878952624155565e-05, "loss": 1.1797, "step": 4690 }, { "epoch": 0.18354331324829798, "grad_norm": 0.0, "learning_rate": 1.87889218169399e-05, "loss": 1.25, "step": 4691 }, { "epoch": 0.18358243994052742, "grad_norm": 0.0, "learning_rate": 1.8788317251185077e-05, "loss": 1.2106, "step": 4692 }, { "epoch": 0.18362156663275686, "grad_norm": 0.0, "learning_rate": 1.878771254430089e-05, "loss": 1.1262, "step": 4693 }, { "epoch": 0.1836606933249863, "grad_norm": 0.0, "learning_rate": 1.8787107696297052e-05, "loss": 1.1697, "step": 4694 }, { "epoch": 0.18369982001721574, "grad_norm": 0.0, "learning_rate": 1.8786502707183277e-05, "loss": 1.2346, "step": 4695 }, { "epoch": 0.18373894670944518, "grad_norm": 0.0, "learning_rate": 1.878589757696928e-05, "loss": 1.1523, "step": 4696 }, { "epoch": 0.18377807340167462, "grad_norm": 0.0, "learning_rate": 1.8785292305664774e-05, "loss": 1.1196, "step": 4697 }, { "epoch": 0.18381720009390407, "grad_norm": 0.0, "learning_rate": 1.878468689327948e-05, "loss": 1.1682, "step": 4698 }, { "epoch": 0.1838563267861335, "grad_norm": 0.0, "learning_rate": 1.8784081339823127e-05, "loss": 1.1339, "step": 4699 }, { "epoch": 0.18389545347836295, "grad_norm": 0.0, "learning_rate": 1.8783475645305436e-05, "loss": 1.1525, "step": 4700 }, { "epoch": 0.1839345801705924, "grad_norm": 0.0, "learning_rate": 1.878286980973613e-05, "loss": 1.1415, "step": 4701 }, { "epoch": 0.18397370686282183, "grad_norm": 0.0, "learning_rate": 1.8782263833124937e-05, "loss": 1.1614, "step": 4702 }, { "epoch": 0.18401283355505127, "grad_norm": 0.0, "learning_rate": 1.8781657715481594e-05, "loss": 1.0883, "step": 4703 }, { "epoch": 0.18405196024728068, "grad_norm": 0.0, "learning_rate": 1.8781051456815834e-05, "loss": 1.1976, "step": 4704 }, { "epoch": 0.18409108693951012, "grad_norm": 0.0, "learning_rate": 1.8780445057137387e-05, "loss": 1.1927, "step": 4705 }, { "epoch": 0.18413021363173956, "grad_norm": 0.0, "learning_rate": 1.8779838516455998e-05, "loss": 1.3055, "step": 4706 }, { "epoch": 0.184169340323969, "grad_norm": 0.0, "learning_rate": 1.8779231834781405e-05, "loss": 1.1566, "step": 4707 }, { "epoch": 0.18420846701619845, "grad_norm": 0.0, "learning_rate": 1.8778625012123347e-05, "loss": 1.1536, "step": 4708 }, { "epoch": 0.1842475937084279, "grad_norm": 0.0, "learning_rate": 1.8778018048491574e-05, "loss": 1.1071, "step": 4709 }, { "epoch": 0.18428672040065733, "grad_norm": 0.0, "learning_rate": 1.877741094389583e-05, "loss": 1.2478, "step": 4710 }, { "epoch": 0.18432584709288677, "grad_norm": 0.0, "learning_rate": 1.8776803698345866e-05, "loss": 1.113, "step": 4711 }, { "epoch": 0.1843649737851162, "grad_norm": 0.0, "learning_rate": 1.877619631185143e-05, "loss": 1.1661, "step": 4712 }, { "epoch": 0.18440410047734565, "grad_norm": 0.0, "learning_rate": 1.877558878442228e-05, "loss": 0.9972, "step": 4713 }, { "epoch": 0.1844432271695751, "grad_norm": 0.0, "learning_rate": 1.877498111606817e-05, "loss": 1.22, "step": 4714 }, { "epoch": 0.18448235386180453, "grad_norm": 0.0, "learning_rate": 1.8774373306798858e-05, "loss": 1.0317, "step": 4715 }, { "epoch": 0.18452148055403397, "grad_norm": 0.0, "learning_rate": 1.8773765356624104e-05, "loss": 1.1992, "step": 4716 }, { "epoch": 0.18456060724626341, "grad_norm": 0.0, "learning_rate": 1.8773157265553676e-05, "loss": 1.1731, "step": 4717 }, { "epoch": 0.18459973393849283, "grad_norm": 0.0, "learning_rate": 1.8772549033597336e-05, "loss": 1.2013, "step": 4718 }, { "epoch": 0.18463886063072227, "grad_norm": 0.0, "learning_rate": 1.877194066076485e-05, "loss": 1.1014, "step": 4719 }, { "epoch": 0.1846779873229517, "grad_norm": 0.0, "learning_rate": 1.877133214706599e-05, "loss": 1.1069, "step": 4720 }, { "epoch": 0.18471711401518115, "grad_norm": 0.0, "learning_rate": 1.8770723492510522e-05, "loss": 1.1208, "step": 4721 }, { "epoch": 0.1847562407074106, "grad_norm": 0.0, "learning_rate": 1.8770114697108227e-05, "loss": 1.2175, "step": 4722 }, { "epoch": 0.18479536739964003, "grad_norm": 0.0, "learning_rate": 1.8769505760868884e-05, "loss": 1.1973, "step": 4723 }, { "epoch": 0.18483449409186947, "grad_norm": 0.0, "learning_rate": 1.8768896683802263e-05, "loss": 1.1824, "step": 4724 }, { "epoch": 0.1848736207840989, "grad_norm": 0.0, "learning_rate": 1.8768287465918152e-05, "loss": 1.1668, "step": 4725 }, { "epoch": 0.18491274747632835, "grad_norm": 0.0, "learning_rate": 1.876767810722633e-05, "loss": 1.1393, "step": 4726 }, { "epoch": 0.1849518741685578, "grad_norm": 0.0, "learning_rate": 1.8767068607736586e-05, "loss": 1.2121, "step": 4727 }, { "epoch": 0.18499100086078724, "grad_norm": 0.0, "learning_rate": 1.8766458967458704e-05, "loss": 1.1216, "step": 4728 }, { "epoch": 0.18503012755301668, "grad_norm": 0.0, "learning_rate": 1.8765849186402475e-05, "loss": 1.2924, "step": 4729 }, { "epoch": 0.18506925424524612, "grad_norm": 0.0, "learning_rate": 1.8765239264577695e-05, "loss": 1.1535, "step": 4730 }, { "epoch": 0.18510838093747556, "grad_norm": 0.0, "learning_rate": 1.8764629201994152e-05, "loss": 1.2347, "step": 4731 }, { "epoch": 0.18514750762970497, "grad_norm": 0.0, "learning_rate": 1.876401899866165e-05, "loss": 1.1031, "step": 4732 }, { "epoch": 0.1851866343219344, "grad_norm": 0.0, "learning_rate": 1.876340865458998e-05, "loss": 1.1432, "step": 4733 }, { "epoch": 0.18522576101416385, "grad_norm": 0.0, "learning_rate": 1.8762798169788958e-05, "loss": 1.0842, "step": 4734 }, { "epoch": 0.1852648877063933, "grad_norm": 0.0, "learning_rate": 1.8762187544268368e-05, "loss": 1.2186, "step": 4735 }, { "epoch": 0.18530401439862274, "grad_norm": 0.0, "learning_rate": 1.8761576778038032e-05, "loss": 1.1904, "step": 4736 }, { "epoch": 0.18534314109085218, "grad_norm": 0.0, "learning_rate": 1.8760965871107748e-05, "loss": 1.1292, "step": 4737 }, { "epoch": 0.18538226778308162, "grad_norm": 0.0, "learning_rate": 1.8760354823487334e-05, "loss": 1.1524, "step": 4738 }, { "epoch": 0.18542139447531106, "grad_norm": 0.0, "learning_rate": 1.8759743635186596e-05, "loss": 1.1739, "step": 4739 }, { "epoch": 0.1854605211675405, "grad_norm": 0.0, "learning_rate": 1.8759132306215352e-05, "loss": 1.0596, "step": 4740 }, { "epoch": 0.18549964785976994, "grad_norm": 0.0, "learning_rate": 1.875852083658342e-05, "loss": 1.1732, "step": 4741 }, { "epoch": 0.18553877455199938, "grad_norm": 0.0, "learning_rate": 1.8757909226300617e-05, "loss": 1.03, "step": 4742 }, { "epoch": 0.18557790124422882, "grad_norm": 0.0, "learning_rate": 1.8757297475376766e-05, "loss": 1.1812, "step": 4743 }, { "epoch": 0.18561702793645826, "grad_norm": 0.0, "learning_rate": 1.8756685583821693e-05, "loss": 1.233, "step": 4744 }, { "epoch": 0.1856561546286877, "grad_norm": 0.0, "learning_rate": 1.875607355164522e-05, "loss": 1.1719, "step": 4745 }, { "epoch": 0.18569528132091712, "grad_norm": 0.0, "learning_rate": 1.8755461378857177e-05, "loss": 1.1515, "step": 4746 }, { "epoch": 0.18573440801314656, "grad_norm": 0.0, "learning_rate": 1.8754849065467396e-05, "loss": 1.0269, "step": 4747 }, { "epoch": 0.185773534705376, "grad_norm": 0.0, "learning_rate": 1.875423661148571e-05, "loss": 1.1702, "step": 4748 }, { "epoch": 0.18581266139760544, "grad_norm": 0.0, "learning_rate": 1.8753624016921955e-05, "loss": 1.314, "step": 4749 }, { "epoch": 0.18585178808983488, "grad_norm": 0.0, "learning_rate": 1.875301128178597e-05, "loss": 1.1193, "step": 4750 }, { "epoch": 0.18589091478206432, "grad_norm": 0.0, "learning_rate": 1.875239840608759e-05, "loss": 1.0579, "step": 4751 }, { "epoch": 0.18593004147429376, "grad_norm": 0.0, "learning_rate": 1.8751785389836653e-05, "loss": 1.1774, "step": 4752 }, { "epoch": 0.1859691681665232, "grad_norm": 0.0, "learning_rate": 1.8751172233043017e-05, "loss": 1.2139, "step": 4753 }, { "epoch": 0.18600829485875264, "grad_norm": 0.0, "learning_rate": 1.8750558935716516e-05, "loss": 1.1559, "step": 4754 }, { "epoch": 0.18604742155098208, "grad_norm": 0.0, "learning_rate": 1.8749945497867004e-05, "loss": 1.1322, "step": 4755 }, { "epoch": 0.18608654824321152, "grad_norm": 0.0, "learning_rate": 1.8749331919504336e-05, "loss": 1.2776, "step": 4756 }, { "epoch": 0.18612567493544097, "grad_norm": 0.0, "learning_rate": 1.8748718200638356e-05, "loss": 1.1322, "step": 4757 }, { "epoch": 0.1861648016276704, "grad_norm": 0.0, "learning_rate": 1.8748104341278924e-05, "loss": 1.2327, "step": 4758 }, { "epoch": 0.18620392831989985, "grad_norm": 0.0, "learning_rate": 1.8747490341435904e-05, "loss": 1.1874, "step": 4759 }, { "epoch": 0.1862430550121293, "grad_norm": 0.0, "learning_rate": 1.8746876201119143e-05, "loss": 1.0848, "step": 4760 }, { "epoch": 0.1862821817043587, "grad_norm": 0.0, "learning_rate": 1.8746261920338516e-05, "loss": 1.1719, "step": 4761 }, { "epoch": 0.18632130839658814, "grad_norm": 0.0, "learning_rate": 1.8745647499103882e-05, "loss": 1.259, "step": 4762 }, { "epoch": 0.18636043508881758, "grad_norm": 0.0, "learning_rate": 1.8745032937425103e-05, "loss": 1.0412, "step": 4763 }, { "epoch": 0.18639956178104702, "grad_norm": 0.0, "learning_rate": 1.8744418235312057e-05, "loss": 1.1843, "step": 4764 }, { "epoch": 0.18643868847327646, "grad_norm": 0.0, "learning_rate": 1.8743803392774612e-05, "loss": 1.2305, "step": 4765 }, { "epoch": 0.1864778151655059, "grad_norm": 0.0, "learning_rate": 1.8743188409822642e-05, "loss": 1.225, "step": 4766 }, { "epoch": 0.18651694185773535, "grad_norm": 0.0, "learning_rate": 1.874257328646602e-05, "loss": 1.1282, "step": 4767 }, { "epoch": 0.1865560685499648, "grad_norm": 0.0, "learning_rate": 1.8741958022714625e-05, "loss": 1.149, "step": 4768 }, { "epoch": 0.18659519524219423, "grad_norm": 0.0, "learning_rate": 1.874134261857834e-05, "loss": 1.0873, "step": 4769 }, { "epoch": 0.18663432193442367, "grad_norm": 0.0, "learning_rate": 1.8740727074067047e-05, "loss": 1.1022, "step": 4770 }, { "epoch": 0.1866734486266531, "grad_norm": 0.0, "learning_rate": 1.874011138919063e-05, "loss": 1.2581, "step": 4771 }, { "epoch": 0.18671257531888255, "grad_norm": 0.0, "learning_rate": 1.8739495563958973e-05, "loss": 1.1657, "step": 4772 }, { "epoch": 0.186751702011112, "grad_norm": 0.0, "learning_rate": 1.873887959838197e-05, "loss": 1.2218, "step": 4773 }, { "epoch": 0.18679082870334143, "grad_norm": 0.0, "learning_rate": 1.873826349246951e-05, "loss": 1.1769, "step": 4774 }, { "epoch": 0.18682995539557085, "grad_norm": 0.0, "learning_rate": 1.8737647246231492e-05, "loss": 1.1701, "step": 4775 }, { "epoch": 0.1868690820878003, "grad_norm": 0.0, "learning_rate": 1.8737030859677807e-05, "loss": 1.086, "step": 4776 }, { "epoch": 0.18690820878002973, "grad_norm": 0.0, "learning_rate": 1.873641433281835e-05, "loss": 1.2136, "step": 4777 }, { "epoch": 0.18694733547225917, "grad_norm": 0.0, "learning_rate": 1.873579766566303e-05, "loss": 1.277, "step": 4778 }, { "epoch": 0.1869864621644886, "grad_norm": 0.0, "learning_rate": 1.8735180858221746e-05, "loss": 1.1143, "step": 4779 }, { "epoch": 0.18702558885671805, "grad_norm": 0.0, "learning_rate": 1.8734563910504403e-05, "loss": 1.2751, "step": 4780 }, { "epoch": 0.1870647155489475, "grad_norm": 0.0, "learning_rate": 1.8733946822520908e-05, "loss": 1.0901, "step": 4781 }, { "epoch": 0.18710384224117693, "grad_norm": 0.0, "learning_rate": 1.873332959428117e-05, "loss": 1.2755, "step": 4782 }, { "epoch": 0.18714296893340637, "grad_norm": 0.0, "learning_rate": 1.8732712225795105e-05, "loss": 1.1727, "step": 4783 }, { "epoch": 0.1871820956256358, "grad_norm": 0.0, "learning_rate": 1.873209471707262e-05, "loss": 1.1812, "step": 4784 }, { "epoch": 0.18722122231786525, "grad_norm": 0.0, "learning_rate": 1.873147706812364e-05, "loss": 1.1461, "step": 4785 }, { "epoch": 0.1872603490100947, "grad_norm": 0.0, "learning_rate": 1.873085927895808e-05, "loss": 1.2124, "step": 4786 }, { "epoch": 0.18729947570232414, "grad_norm": 0.0, "learning_rate": 1.8730241349585857e-05, "loss": 1.1506, "step": 4787 }, { "epoch": 0.18733860239455358, "grad_norm": 0.0, "learning_rate": 1.87296232800169e-05, "loss": 1.2932, "step": 4788 }, { "epoch": 0.187377729086783, "grad_norm": 0.0, "learning_rate": 1.872900507026113e-05, "loss": 1.2691, "step": 4789 }, { "epoch": 0.18741685577901243, "grad_norm": 0.0, "learning_rate": 1.8728386720328477e-05, "loss": 1.164, "step": 4790 }, { "epoch": 0.18745598247124187, "grad_norm": 0.0, "learning_rate": 1.872776823022887e-05, "loss": 1.2066, "step": 4791 }, { "epoch": 0.1874951091634713, "grad_norm": 0.0, "learning_rate": 1.8727149599972244e-05, "loss": 1.0396, "step": 4792 }, { "epoch": 0.18753423585570075, "grad_norm": 0.0, "learning_rate": 1.872653082956853e-05, "loss": 1.1707, "step": 4793 }, { "epoch": 0.1875733625479302, "grad_norm": 0.0, "learning_rate": 1.8725911919027668e-05, "loss": 1.259, "step": 4794 }, { "epoch": 0.18761248924015964, "grad_norm": 0.0, "learning_rate": 1.872529286835959e-05, "loss": 1.0987, "step": 4795 }, { "epoch": 0.18765161593238908, "grad_norm": 0.0, "learning_rate": 1.8724673677574245e-05, "loss": 1.1081, "step": 4796 }, { "epoch": 0.18769074262461852, "grad_norm": 0.0, "learning_rate": 1.8724054346681573e-05, "loss": 1.2079, "step": 4797 }, { "epoch": 0.18772986931684796, "grad_norm": 0.0, "learning_rate": 1.8723434875691518e-05, "loss": 1.1483, "step": 4798 }, { "epoch": 0.1877689960090774, "grad_norm": 0.0, "learning_rate": 1.8722815264614035e-05, "loss": 1.186, "step": 4799 }, { "epoch": 0.18780812270130684, "grad_norm": 0.0, "learning_rate": 1.8722195513459065e-05, "loss": 1.1804, "step": 4800 }, { "epoch": 0.18784724939353628, "grad_norm": 0.0, "learning_rate": 1.8721575622236565e-05, "loss": 1.1679, "step": 4801 }, { "epoch": 0.18788637608576572, "grad_norm": 0.0, "learning_rate": 1.872095559095649e-05, "loss": 1.3154, "step": 4802 }, { "epoch": 0.18792550277799513, "grad_norm": 0.0, "learning_rate": 1.8720335419628796e-05, "loss": 1.0707, "step": 4803 }, { "epoch": 0.18796462947022458, "grad_norm": 0.0, "learning_rate": 1.8719715108263442e-05, "loss": 1.1655, "step": 4804 }, { "epoch": 0.18800375616245402, "grad_norm": 0.0, "learning_rate": 1.871909465687039e-05, "loss": 1.1021, "step": 4805 }, { "epoch": 0.18804288285468346, "grad_norm": 0.0, "learning_rate": 1.8718474065459603e-05, "loss": 1.2336, "step": 4806 }, { "epoch": 0.1880820095469129, "grad_norm": 0.0, "learning_rate": 1.871785333404105e-05, "loss": 1.05, "step": 4807 }, { "epoch": 0.18812113623914234, "grad_norm": 0.0, "learning_rate": 1.8717232462624695e-05, "loss": 1.1861, "step": 4808 }, { "epoch": 0.18816026293137178, "grad_norm": 0.0, "learning_rate": 1.871661145122051e-05, "loss": 1.1608, "step": 4809 }, { "epoch": 0.18819938962360122, "grad_norm": 0.0, "learning_rate": 1.8715990299838463e-05, "loss": 1.1978, "step": 4810 }, { "epoch": 0.18823851631583066, "grad_norm": 0.0, "learning_rate": 1.871536900848854e-05, "loss": 1.2668, "step": 4811 }, { "epoch": 0.1882776430080601, "grad_norm": 0.0, "learning_rate": 1.871474757718071e-05, "loss": 1.1837, "step": 4812 }, { "epoch": 0.18831676970028954, "grad_norm": 0.0, "learning_rate": 1.8714126005924954e-05, "loss": 1.1173, "step": 4813 }, { "epoch": 0.18835589639251898, "grad_norm": 0.0, "learning_rate": 1.8713504294731254e-05, "loss": 1.1624, "step": 4814 }, { "epoch": 0.18839502308474843, "grad_norm": 0.0, "learning_rate": 1.8712882443609592e-05, "loss": 1.2413, "step": 4815 }, { "epoch": 0.18843414977697787, "grad_norm": 0.0, "learning_rate": 1.8712260452569955e-05, "loss": 1.1162, "step": 4816 }, { "epoch": 0.1884732764692073, "grad_norm": 0.0, "learning_rate": 1.8711638321622336e-05, "loss": 1.2459, "step": 4817 }, { "epoch": 0.18851240316143672, "grad_norm": 0.0, "learning_rate": 1.871101605077672e-05, "loss": 1.0581, "step": 4818 }, { "epoch": 0.18855152985366616, "grad_norm": 0.0, "learning_rate": 1.87103936400431e-05, "loss": 1.1141, "step": 4819 }, { "epoch": 0.1885906565458956, "grad_norm": 0.0, "learning_rate": 1.8709771089431476e-05, "loss": 1.1984, "step": 4820 }, { "epoch": 0.18862978323812504, "grad_norm": 0.0, "learning_rate": 1.870914839895184e-05, "loss": 1.1514, "step": 4821 }, { "epoch": 0.18866890993035448, "grad_norm": 0.0, "learning_rate": 1.8708525568614194e-05, "loss": 1.3022, "step": 4822 }, { "epoch": 0.18870803662258392, "grad_norm": 0.0, "learning_rate": 1.870790259842854e-05, "loss": 1.006, "step": 4823 }, { "epoch": 0.18874716331481337, "grad_norm": 0.0, "learning_rate": 1.870727948840488e-05, "loss": 1.1426, "step": 4824 }, { "epoch": 0.1887862900070428, "grad_norm": 0.0, "learning_rate": 1.8706656238553224e-05, "loss": 1.1791, "step": 4825 }, { "epoch": 0.18882541669927225, "grad_norm": 0.0, "learning_rate": 1.8706032848883583e-05, "loss": 1.1967, "step": 4826 }, { "epoch": 0.1888645433915017, "grad_norm": 0.0, "learning_rate": 1.8705409319405957e-05, "loss": 1.1309, "step": 4827 }, { "epoch": 0.18890367008373113, "grad_norm": 0.0, "learning_rate": 1.870478565013037e-05, "loss": 1.1087, "step": 4828 }, { "epoch": 0.18894279677596057, "grad_norm": 0.0, "learning_rate": 1.870416184106683e-05, "loss": 1.1532, "step": 4829 }, { "epoch": 0.18898192346819, "grad_norm": 0.0, "learning_rate": 1.870353789222536e-05, "loss": 1.1774, "step": 4830 }, { "epoch": 0.18902105016041945, "grad_norm": 0.0, "learning_rate": 1.870291380361598e-05, "loss": 1.2236, "step": 4831 }, { "epoch": 0.18906017685264886, "grad_norm": 0.0, "learning_rate": 1.8702289575248706e-05, "loss": 1.2892, "step": 4832 }, { "epoch": 0.1890993035448783, "grad_norm": 0.0, "learning_rate": 1.8701665207133567e-05, "loss": 0.9876, "step": 4833 }, { "epoch": 0.18913843023710775, "grad_norm": 0.0, "learning_rate": 1.8701040699280588e-05, "loss": 1.1888, "step": 4834 }, { "epoch": 0.1891775569293372, "grad_norm": 0.0, "learning_rate": 1.87004160516998e-05, "loss": 1.273, "step": 4835 }, { "epoch": 0.18921668362156663, "grad_norm": 0.0, "learning_rate": 1.869979126440123e-05, "loss": 1.0736, "step": 4836 }, { "epoch": 0.18925581031379607, "grad_norm": 0.0, "learning_rate": 1.8699166337394916e-05, "loss": 1.0906, "step": 4837 }, { "epoch": 0.1892949370060255, "grad_norm": 0.0, "learning_rate": 1.869854127069089e-05, "loss": 1.1613, "step": 4838 }, { "epoch": 0.18933406369825495, "grad_norm": 0.0, "learning_rate": 1.8697916064299192e-05, "loss": 1.1329, "step": 4839 }, { "epoch": 0.1893731903904844, "grad_norm": 0.0, "learning_rate": 1.869729071822986e-05, "loss": 1.1606, "step": 4840 }, { "epoch": 0.18941231708271383, "grad_norm": 0.0, "learning_rate": 1.869666523249294e-05, "loss": 1.1789, "step": 4841 }, { "epoch": 0.18945144377494327, "grad_norm": 0.0, "learning_rate": 1.869603960709847e-05, "loss": 1.0372, "step": 4842 }, { "epoch": 0.18949057046717271, "grad_norm": 0.0, "learning_rate": 1.8695413842056503e-05, "loss": 1.205, "step": 4843 }, { "epoch": 0.18952969715940216, "grad_norm": 0.0, "learning_rate": 1.8694787937377085e-05, "loss": 1.2, "step": 4844 }, { "epoch": 0.1895688238516316, "grad_norm": 0.0, "learning_rate": 1.869416189307027e-05, "loss": 1.0679, "step": 4845 }, { "epoch": 0.189607950543861, "grad_norm": 0.0, "learning_rate": 1.8693535709146106e-05, "loss": 1.1426, "step": 4846 }, { "epoch": 0.18964707723609045, "grad_norm": 0.0, "learning_rate": 1.8692909385614653e-05, "loss": 1.1482, "step": 4847 }, { "epoch": 0.1896862039283199, "grad_norm": 0.0, "learning_rate": 1.8692282922485966e-05, "loss": 1.1486, "step": 4848 }, { "epoch": 0.18972533062054933, "grad_norm": 0.0, "learning_rate": 1.8691656319770112e-05, "loss": 1.1294, "step": 4849 }, { "epoch": 0.18976445731277877, "grad_norm": 0.0, "learning_rate": 1.8691029577477147e-05, "loss": 1.1191, "step": 4850 }, { "epoch": 0.1898035840050082, "grad_norm": 0.0, "learning_rate": 1.8690402695617136e-05, "loss": 1.1516, "step": 4851 }, { "epoch": 0.18984271069723765, "grad_norm": 0.0, "learning_rate": 1.8689775674200147e-05, "loss": 1.1668, "step": 4852 }, { "epoch": 0.1898818373894671, "grad_norm": 0.0, "learning_rate": 1.868914851323625e-05, "loss": 1.2462, "step": 4853 }, { "epoch": 0.18992096408169654, "grad_norm": 0.0, "learning_rate": 1.8688521212735523e-05, "loss": 1.3203, "step": 4854 }, { "epoch": 0.18996009077392598, "grad_norm": 0.0, "learning_rate": 1.8687893772708024e-05, "loss": 1.2341, "step": 4855 }, { "epoch": 0.18999921746615542, "grad_norm": 0.0, "learning_rate": 1.868726619316384e-05, "loss": 1.2203, "step": 4856 }, { "epoch": 0.19003834415838486, "grad_norm": 0.0, "learning_rate": 1.8686638474113047e-05, "loss": 1.208, "step": 4857 }, { "epoch": 0.1900774708506143, "grad_norm": 0.0, "learning_rate": 1.8686010615565725e-05, "loss": 1.1065, "step": 4858 }, { "epoch": 0.19011659754284374, "grad_norm": 0.0, "learning_rate": 1.868538261753196e-05, "loss": 1.1613, "step": 4859 }, { "epoch": 0.19015572423507315, "grad_norm": 0.0, "learning_rate": 1.8684754480021827e-05, "loss": 1.0647, "step": 4860 }, { "epoch": 0.1901948509273026, "grad_norm": 0.0, "learning_rate": 1.8684126203045423e-05, "loss": 1.1315, "step": 4861 }, { "epoch": 0.19023397761953204, "grad_norm": 0.0, "learning_rate": 1.8683497786612834e-05, "loss": 1.1204, "step": 4862 }, { "epoch": 0.19027310431176148, "grad_norm": 0.0, "learning_rate": 1.8682869230734152e-05, "loss": 1.1109, "step": 4863 }, { "epoch": 0.19031223100399092, "grad_norm": 0.0, "learning_rate": 1.868224053541947e-05, "loss": 1.0919, "step": 4864 }, { "epoch": 0.19035135769622036, "grad_norm": 0.0, "learning_rate": 1.8681611700678886e-05, "loss": 1.0939, "step": 4865 }, { "epoch": 0.1903904843884498, "grad_norm": 0.0, "learning_rate": 1.8680982726522494e-05, "loss": 1.1818, "step": 4866 }, { "epoch": 0.19042961108067924, "grad_norm": 0.0, "learning_rate": 1.86803536129604e-05, "loss": 1.2216, "step": 4867 }, { "epoch": 0.19046873777290868, "grad_norm": 0.0, "learning_rate": 1.8679724360002703e-05, "loss": 1.1603, "step": 4868 }, { "epoch": 0.19050786446513812, "grad_norm": 0.0, "learning_rate": 1.8679094967659508e-05, "loss": 1.1827, "step": 4869 }, { "epoch": 0.19054699115736756, "grad_norm": 0.0, "learning_rate": 1.867846543594092e-05, "loss": 1.1156, "step": 4870 }, { "epoch": 0.190586117849597, "grad_norm": 0.0, "learning_rate": 1.8677835764857058e-05, "loss": 1.2535, "step": 4871 }, { "epoch": 0.19062524454182644, "grad_norm": 0.0, "learning_rate": 1.8677205954418024e-05, "loss": 1.0573, "step": 4872 }, { "epoch": 0.19066437123405588, "grad_norm": 0.0, "learning_rate": 1.867657600463394e-05, "loss": 1.1215, "step": 4873 }, { "epoch": 0.1907034979262853, "grad_norm": 0.0, "learning_rate": 1.8675945915514916e-05, "loss": 1.1063, "step": 4874 }, { "epoch": 0.19074262461851474, "grad_norm": 0.0, "learning_rate": 1.8675315687071068e-05, "loss": 1.302, "step": 4875 }, { "epoch": 0.19078175131074418, "grad_norm": 0.0, "learning_rate": 1.8674685319312524e-05, "loss": 1.124, "step": 4876 }, { "epoch": 0.19082087800297362, "grad_norm": 0.0, "learning_rate": 1.8674054812249403e-05, "loss": 1.1245, "step": 4877 }, { "epoch": 0.19086000469520306, "grad_norm": 0.0, "learning_rate": 1.8673424165891832e-05, "loss": 1.2272, "step": 4878 }, { "epoch": 0.1908991313874325, "grad_norm": 0.0, "learning_rate": 1.8672793380249935e-05, "loss": 1.2534, "step": 4879 }, { "epoch": 0.19093825807966194, "grad_norm": 0.0, "learning_rate": 1.8672162455333847e-05, "loss": 1.0871, "step": 4880 }, { "epoch": 0.19097738477189138, "grad_norm": 0.0, "learning_rate": 1.8671531391153697e-05, "loss": 1.2665, "step": 4881 }, { "epoch": 0.19101651146412083, "grad_norm": 0.0, "learning_rate": 1.8670900187719616e-05, "loss": 1.2513, "step": 4882 }, { "epoch": 0.19105563815635027, "grad_norm": 0.0, "learning_rate": 1.8670268845041744e-05, "loss": 1.2701, "step": 4883 }, { "epoch": 0.1910947648485797, "grad_norm": 0.0, "learning_rate": 1.866963736313022e-05, "loss": 1.301, "step": 4884 }, { "epoch": 0.19113389154080915, "grad_norm": 0.0, "learning_rate": 1.866900574199518e-05, "loss": 1.1941, "step": 4885 }, { "epoch": 0.1911730182330386, "grad_norm": 0.0, "learning_rate": 1.8668373981646776e-05, "loss": 1.2292, "step": 4886 }, { "epoch": 0.19121214492526803, "grad_norm": 0.0, "learning_rate": 1.8667742082095147e-05, "loss": 1.2769, "step": 4887 }, { "epoch": 0.19125127161749747, "grad_norm": 0.0, "learning_rate": 1.8667110043350435e-05, "loss": 1.1753, "step": 4888 }, { "epoch": 0.19129039830972688, "grad_norm": 0.0, "learning_rate": 1.8666477865422804e-05, "loss": 1.1248, "step": 4889 }, { "epoch": 0.19132952500195632, "grad_norm": 0.0, "learning_rate": 1.8665845548322394e-05, "loss": 1.1966, "step": 4890 }, { "epoch": 0.19136865169418577, "grad_norm": 0.0, "learning_rate": 1.8665213092059367e-05, "loss": 1.1643, "step": 4891 }, { "epoch": 0.1914077783864152, "grad_norm": 0.0, "learning_rate": 1.866458049664387e-05, "loss": 1.0357, "step": 4892 }, { "epoch": 0.19144690507864465, "grad_norm": 0.0, "learning_rate": 1.8663947762086074e-05, "loss": 1.1673, "step": 4893 }, { "epoch": 0.1914860317708741, "grad_norm": 0.0, "learning_rate": 1.866331488839613e-05, "loss": 1.296, "step": 4894 }, { "epoch": 0.19152515846310353, "grad_norm": 0.0, "learning_rate": 1.8662681875584207e-05, "loss": 1.1765, "step": 4895 }, { "epoch": 0.19156428515533297, "grad_norm": 0.0, "learning_rate": 1.8662048723660464e-05, "loss": 1.1964, "step": 4896 }, { "epoch": 0.1916034118475624, "grad_norm": 0.0, "learning_rate": 1.8661415432635076e-05, "loss": 1.1683, "step": 4897 }, { "epoch": 0.19164253853979185, "grad_norm": 0.0, "learning_rate": 1.866078200251821e-05, "loss": 1.0937, "step": 4898 }, { "epoch": 0.1916816652320213, "grad_norm": 0.0, "learning_rate": 1.8660148433320032e-05, "loss": 1.2487, "step": 4899 }, { "epoch": 0.19172079192425073, "grad_norm": 0.0, "learning_rate": 1.8659514725050728e-05, "loss": 1.2235, "step": 4900 }, { "epoch": 0.19175991861648017, "grad_norm": 0.0, "learning_rate": 1.8658880877720467e-05, "loss": 1.0493, "step": 4901 }, { "epoch": 0.19179904530870961, "grad_norm": 0.0, "learning_rate": 1.8658246891339428e-05, "loss": 1.1022, "step": 4902 }, { "epoch": 0.19183817200093903, "grad_norm": 0.0, "learning_rate": 1.8657612765917794e-05, "loss": 1.1521, "step": 4903 }, { "epoch": 0.19187729869316847, "grad_norm": 0.0, "learning_rate": 1.865697850146575e-05, "loss": 1.2854, "step": 4904 }, { "epoch": 0.1919164253853979, "grad_norm": 0.0, "learning_rate": 1.8656344097993475e-05, "loss": 1.2386, "step": 4905 }, { "epoch": 0.19195555207762735, "grad_norm": 0.0, "learning_rate": 1.8655709555511162e-05, "loss": 1.2449, "step": 4906 }, { "epoch": 0.1919946787698568, "grad_norm": 0.0, "learning_rate": 1.8655074874029e-05, "loss": 1.0416, "step": 4907 }, { "epoch": 0.19203380546208623, "grad_norm": 0.0, "learning_rate": 1.865444005355718e-05, "loss": 1.0896, "step": 4908 }, { "epoch": 0.19207293215431567, "grad_norm": 0.0, "learning_rate": 1.86538050941059e-05, "loss": 1.1508, "step": 4909 }, { "epoch": 0.19211205884654511, "grad_norm": 0.0, "learning_rate": 1.865316999568535e-05, "loss": 1.1533, "step": 4910 }, { "epoch": 0.19215118553877455, "grad_norm": 0.0, "learning_rate": 1.865253475830574e-05, "loss": 1.1821, "step": 4911 }, { "epoch": 0.192190312231004, "grad_norm": 0.0, "learning_rate": 1.8651899381977257e-05, "loss": 1.2946, "step": 4912 }, { "epoch": 0.19222943892323344, "grad_norm": 0.0, "learning_rate": 1.8651263866710113e-05, "loss": 1.2534, "step": 4913 }, { "epoch": 0.19226856561546288, "grad_norm": 0.0, "learning_rate": 1.8650628212514516e-05, "loss": 1.1993, "step": 4914 }, { "epoch": 0.19230769230769232, "grad_norm": 0.0, "learning_rate": 1.8649992419400663e-05, "loss": 1.1555, "step": 4915 }, { "epoch": 0.19234681899992176, "grad_norm": 0.0, "learning_rate": 1.8649356487378774e-05, "loss": 1.2495, "step": 4916 }, { "epoch": 0.19238594569215117, "grad_norm": 0.0, "learning_rate": 1.864872041645906e-05, "loss": 1.1823, "step": 4917 }, { "epoch": 0.1924250723843806, "grad_norm": 0.0, "learning_rate": 1.864808420665173e-05, "loss": 1.0782, "step": 4918 }, { "epoch": 0.19246419907661005, "grad_norm": 0.0, "learning_rate": 1.8647447857967007e-05, "loss": 1.2419, "step": 4919 }, { "epoch": 0.1925033257688395, "grad_norm": 0.0, "learning_rate": 1.8646811370415107e-05, "loss": 1.1087, "step": 4920 }, { "epoch": 0.19254245246106894, "grad_norm": 0.0, "learning_rate": 1.8646174744006253e-05, "loss": 1.1345, "step": 4921 }, { "epoch": 0.19258157915329838, "grad_norm": 0.0, "learning_rate": 1.8645537978750664e-05, "loss": 1.0924, "step": 4922 }, { "epoch": 0.19262070584552782, "grad_norm": 0.0, "learning_rate": 1.864490107465857e-05, "loss": 1.1898, "step": 4923 }, { "epoch": 0.19265983253775726, "grad_norm": 0.0, "learning_rate": 1.8644264031740197e-05, "loss": 1.136, "step": 4924 }, { "epoch": 0.1926989592299867, "grad_norm": 0.0, "learning_rate": 1.8643626850005772e-05, "loss": 1.2061, "step": 4925 }, { "epoch": 0.19273808592221614, "grad_norm": 0.0, "learning_rate": 1.8642989529465537e-05, "loss": 1.1994, "step": 4926 }, { "epoch": 0.19277721261444558, "grad_norm": 0.0, "learning_rate": 1.8642352070129715e-05, "loss": 1.2907, "step": 4927 }, { "epoch": 0.19281633930667502, "grad_norm": 0.0, "learning_rate": 1.864171447200855e-05, "loss": 1.1562, "step": 4928 }, { "epoch": 0.19285546599890446, "grad_norm": 0.0, "learning_rate": 1.864107673511228e-05, "loss": 1.191, "step": 4929 }, { "epoch": 0.1928945926911339, "grad_norm": 0.0, "learning_rate": 1.8640438859451147e-05, "loss": 1.1668, "step": 4930 }, { "epoch": 0.19293371938336332, "grad_norm": 0.0, "learning_rate": 1.863980084503539e-05, "loss": 1.1561, "step": 4931 }, { "epoch": 0.19297284607559276, "grad_norm": 0.0, "learning_rate": 1.863916269187526e-05, "loss": 1.2158, "step": 4932 }, { "epoch": 0.1930119727678222, "grad_norm": 0.0, "learning_rate": 1.8638524399981003e-05, "loss": 1.1563, "step": 4933 }, { "epoch": 0.19305109946005164, "grad_norm": 0.0, "learning_rate": 1.8637885969362867e-05, "loss": 0.966, "step": 4934 }, { "epoch": 0.19309022615228108, "grad_norm": 0.0, "learning_rate": 1.8637247400031106e-05, "loss": 1.2153, "step": 4935 }, { "epoch": 0.19312935284451052, "grad_norm": 0.0, "learning_rate": 1.8636608691995976e-05, "loss": 1.2854, "step": 4936 }, { "epoch": 0.19316847953673996, "grad_norm": 0.0, "learning_rate": 1.8635969845267735e-05, "loss": 1.132, "step": 4937 }, { "epoch": 0.1932076062289694, "grad_norm": 0.0, "learning_rate": 1.8635330859856636e-05, "loss": 1.1449, "step": 4938 }, { "epoch": 0.19324673292119884, "grad_norm": 0.0, "learning_rate": 1.8634691735772942e-05, "loss": 1.1192, "step": 4939 }, { "epoch": 0.19328585961342828, "grad_norm": 0.0, "learning_rate": 1.8634052473026925e-05, "loss": 1.0576, "step": 4940 }, { "epoch": 0.19332498630565773, "grad_norm": 0.0, "learning_rate": 1.8633413071628842e-05, "loss": 1.1893, "step": 4941 }, { "epoch": 0.19336411299788717, "grad_norm": 0.0, "learning_rate": 1.8632773531588962e-05, "loss": 1.2005, "step": 4942 }, { "epoch": 0.1934032396901166, "grad_norm": 0.0, "learning_rate": 1.8632133852917557e-05, "loss": 1.1102, "step": 4943 }, { "epoch": 0.19344236638234605, "grad_norm": 0.0, "learning_rate": 1.8631494035624898e-05, "loss": 1.2141, "step": 4944 }, { "epoch": 0.1934814930745755, "grad_norm": 0.0, "learning_rate": 1.8630854079721263e-05, "loss": 1.1364, "step": 4945 }, { "epoch": 0.1935206197668049, "grad_norm": 0.0, "learning_rate": 1.8630213985216925e-05, "loss": 1.2272, "step": 4946 }, { "epoch": 0.19355974645903434, "grad_norm": 0.0, "learning_rate": 1.862957375212216e-05, "loss": 1.1326, "step": 4947 }, { "epoch": 0.19359887315126378, "grad_norm": 0.0, "learning_rate": 1.862893338044726e-05, "loss": 1.0977, "step": 4948 }, { "epoch": 0.19363799984349322, "grad_norm": 0.0, "learning_rate": 1.8628292870202504e-05, "loss": 1.1838, "step": 4949 }, { "epoch": 0.19367712653572267, "grad_norm": 0.0, "learning_rate": 1.8627652221398173e-05, "loss": 1.1749, "step": 4950 }, { "epoch": 0.1937162532279521, "grad_norm": 0.0, "learning_rate": 1.862701143404456e-05, "loss": 1.1948, "step": 4951 }, { "epoch": 0.19375537992018155, "grad_norm": 0.0, "learning_rate": 1.8626370508151955e-05, "loss": 1.1544, "step": 4952 }, { "epoch": 0.193794506612411, "grad_norm": 0.0, "learning_rate": 1.8625729443730643e-05, "loss": 1.1938, "step": 4953 }, { "epoch": 0.19383363330464043, "grad_norm": 0.0, "learning_rate": 1.862508824079093e-05, "loss": 1.0456, "step": 4954 }, { "epoch": 0.19387275999686987, "grad_norm": 0.0, "learning_rate": 1.8624446899343107e-05, "loss": 1.1425, "step": 4955 }, { "epoch": 0.1939118866890993, "grad_norm": 0.0, "learning_rate": 1.8623805419397474e-05, "loss": 1.1872, "step": 4956 }, { "epoch": 0.19395101338132875, "grad_norm": 0.0, "learning_rate": 1.862316380096433e-05, "loss": 1.1149, "step": 4957 }, { "epoch": 0.1939901400735582, "grad_norm": 0.0, "learning_rate": 1.8622522044053984e-05, "loss": 1.1206, "step": 4958 }, { "epoch": 0.19402926676578763, "grad_norm": 0.0, "learning_rate": 1.8621880148676736e-05, "loss": 1.2714, "step": 4959 }, { "epoch": 0.19406839345801705, "grad_norm": 0.0, "learning_rate": 1.8621238114842897e-05, "loss": 1.153, "step": 4960 }, { "epoch": 0.1941075201502465, "grad_norm": 0.0, "learning_rate": 1.862059594256278e-05, "loss": 1.194, "step": 4961 }, { "epoch": 0.19414664684247593, "grad_norm": 0.0, "learning_rate": 1.861995363184669e-05, "loss": 1.1339, "step": 4962 }, { "epoch": 0.19418577353470537, "grad_norm": 0.0, "learning_rate": 1.8619311182704946e-05, "loss": 1.0961, "step": 4963 }, { "epoch": 0.1942249002269348, "grad_norm": 0.0, "learning_rate": 1.8618668595147867e-05, "loss": 1.1907, "step": 4964 }, { "epoch": 0.19426402691916425, "grad_norm": 0.0, "learning_rate": 1.861802586918577e-05, "loss": 1.1104, "step": 4965 }, { "epoch": 0.1943031536113937, "grad_norm": 0.0, "learning_rate": 1.8617383004828978e-05, "loss": 1.2413, "step": 4966 }, { "epoch": 0.19434228030362313, "grad_norm": 0.0, "learning_rate": 1.861674000208781e-05, "loss": 1.2216, "step": 4967 }, { "epoch": 0.19438140699585257, "grad_norm": 0.0, "learning_rate": 1.8616096860972598e-05, "loss": 1.1856, "step": 4968 }, { "epoch": 0.19442053368808201, "grad_norm": 0.0, "learning_rate": 1.8615453581493664e-05, "loss": 1.0993, "step": 4969 }, { "epoch": 0.19445966038031146, "grad_norm": 0.0, "learning_rate": 1.8614810163661342e-05, "loss": 1.0587, "step": 4970 }, { "epoch": 0.1944987870725409, "grad_norm": 0.0, "learning_rate": 1.8614166607485967e-05, "loss": 1.0466, "step": 4971 }, { "epoch": 0.19453791376477034, "grad_norm": 0.0, "learning_rate": 1.8613522912977868e-05, "loss": 1.1266, "step": 4972 }, { "epoch": 0.19457704045699978, "grad_norm": 0.0, "learning_rate": 1.8612879080147384e-05, "loss": 1.1619, "step": 4973 }, { "epoch": 0.1946161671492292, "grad_norm": 0.0, "learning_rate": 1.8612235109004853e-05, "loss": 1.1503, "step": 4974 }, { "epoch": 0.19465529384145863, "grad_norm": 0.0, "learning_rate": 1.8611590999560622e-05, "loss": 1.038, "step": 4975 }, { "epoch": 0.19469442053368807, "grad_norm": 0.0, "learning_rate": 1.861094675182503e-05, "loss": 1.1593, "step": 4976 }, { "epoch": 0.1947335472259175, "grad_norm": 0.0, "learning_rate": 1.8610302365808423e-05, "loss": 1.2377, "step": 4977 }, { "epoch": 0.19477267391814695, "grad_norm": 0.0, "learning_rate": 1.8609657841521146e-05, "loss": 1.1926, "step": 4978 }, { "epoch": 0.1948118006103764, "grad_norm": 0.0, "learning_rate": 1.8609013178973555e-05, "loss": 1.1805, "step": 4979 }, { "epoch": 0.19485092730260584, "grad_norm": 0.0, "learning_rate": 1.8608368378175998e-05, "loss": 1.0035, "step": 4980 }, { "epoch": 0.19489005399483528, "grad_norm": 0.0, "learning_rate": 1.8607723439138832e-05, "loss": 1.1014, "step": 4981 }, { "epoch": 0.19492918068706472, "grad_norm": 0.0, "learning_rate": 1.8607078361872414e-05, "loss": 1.1911, "step": 4982 }, { "epoch": 0.19496830737929416, "grad_norm": 0.0, "learning_rate": 1.8606433146387103e-05, "loss": 1.1451, "step": 4983 }, { "epoch": 0.1950074340715236, "grad_norm": 0.0, "learning_rate": 1.8605787792693265e-05, "loss": 1.0843, "step": 4984 }, { "epoch": 0.19504656076375304, "grad_norm": 0.0, "learning_rate": 1.860514230080125e-05, "loss": 1.268, "step": 4985 }, { "epoch": 0.19508568745598248, "grad_norm": 0.0, "learning_rate": 1.860449667072144e-05, "loss": 1.2591, "step": 4986 }, { "epoch": 0.19512481414821192, "grad_norm": 0.0, "learning_rate": 1.8603850902464193e-05, "loss": 1.1492, "step": 4987 }, { "epoch": 0.19516394084044134, "grad_norm": 0.0, "learning_rate": 1.860320499603988e-05, "loss": 1.0651, "step": 4988 }, { "epoch": 0.19520306753267078, "grad_norm": 0.0, "learning_rate": 1.8602558951458878e-05, "loss": 1.1608, "step": 4989 }, { "epoch": 0.19524219422490022, "grad_norm": 0.0, "learning_rate": 1.8601912768731556e-05, "loss": 1.1511, "step": 4990 }, { "epoch": 0.19528132091712966, "grad_norm": 0.0, "learning_rate": 1.8601266447868296e-05, "loss": 1.0543, "step": 4991 }, { "epoch": 0.1953204476093591, "grad_norm": 0.0, "learning_rate": 1.860061998887947e-05, "loss": 1.2479, "step": 4992 }, { "epoch": 0.19535957430158854, "grad_norm": 0.0, "learning_rate": 1.859997339177547e-05, "loss": 1.0458, "step": 4993 }, { "epoch": 0.19539870099381798, "grad_norm": 0.0, "learning_rate": 1.8599326656566674e-05, "loss": 1.0219, "step": 4994 }, { "epoch": 0.19543782768604742, "grad_norm": 0.0, "learning_rate": 1.8598679783263463e-05, "loss": 1.0874, "step": 4995 }, { "epoch": 0.19547695437827686, "grad_norm": 0.0, "learning_rate": 1.859803277187623e-05, "loss": 1.1345, "step": 4996 }, { "epoch": 0.1955160810705063, "grad_norm": 0.0, "learning_rate": 1.8597385622415366e-05, "loss": 1.118, "step": 4997 }, { "epoch": 0.19555520776273574, "grad_norm": 0.0, "learning_rate": 1.8596738334891262e-05, "loss": 1.1445, "step": 4998 }, { "epoch": 0.19559433445496519, "grad_norm": 0.0, "learning_rate": 1.8596090909314313e-05, "loss": 1.2452, "step": 4999 }, { "epoch": 0.19563346114719463, "grad_norm": 0.0, "learning_rate": 1.8595443345694918e-05, "loss": 1.103, "step": 5000 }, { "epoch": 0.19567258783942407, "grad_norm": 0.0, "learning_rate": 1.859479564404347e-05, "loss": 1.1436, "step": 5001 }, { "epoch": 0.1957117145316535, "grad_norm": 0.0, "learning_rate": 1.8594147804370375e-05, "loss": 1.0621, "step": 5002 }, { "epoch": 0.19575084122388292, "grad_norm": 0.0, "learning_rate": 1.8593499826686035e-05, "loss": 1.2169, "step": 5003 }, { "epoch": 0.19578996791611236, "grad_norm": 0.0, "learning_rate": 1.8592851711000857e-05, "loss": 1.1651, "step": 5004 }, { "epoch": 0.1958290946083418, "grad_norm": 0.0, "learning_rate": 1.8592203457325248e-05, "loss": 1.1946, "step": 5005 }, { "epoch": 0.19586822130057124, "grad_norm": 0.0, "learning_rate": 1.8591555065669614e-05, "loss": 1.1144, "step": 5006 }, { "epoch": 0.19590734799280068, "grad_norm": 0.0, "learning_rate": 1.8590906536044375e-05, "loss": 1.23, "step": 5007 }, { "epoch": 0.19594647468503013, "grad_norm": 0.0, "learning_rate": 1.8590257868459943e-05, "loss": 1.2425, "step": 5008 }, { "epoch": 0.19598560137725957, "grad_norm": 0.0, "learning_rate": 1.8589609062926732e-05, "loss": 1.105, "step": 5009 }, { "epoch": 0.196024728069489, "grad_norm": 0.0, "learning_rate": 1.8588960119455164e-05, "loss": 1.217, "step": 5010 }, { "epoch": 0.19606385476171845, "grad_norm": 0.0, "learning_rate": 1.858831103805566e-05, "loss": 1.0924, "step": 5011 }, { "epoch": 0.1961029814539479, "grad_norm": 0.0, "learning_rate": 1.8587661818738637e-05, "loss": 1.1385, "step": 5012 }, { "epoch": 0.19614210814617733, "grad_norm": 0.0, "learning_rate": 1.858701246151453e-05, "loss": 1.2245, "step": 5013 }, { "epoch": 0.19618123483840677, "grad_norm": 0.0, "learning_rate": 1.8586362966393765e-05, "loss": 1.1335, "step": 5014 }, { "epoch": 0.1962203615306362, "grad_norm": 0.0, "learning_rate": 1.8585713333386767e-05, "loss": 1.2324, "step": 5015 }, { "epoch": 0.19625948822286565, "grad_norm": 0.0, "learning_rate": 1.8585063562503973e-05, "loss": 1.2714, "step": 5016 }, { "epoch": 0.19629861491509507, "grad_norm": 0.0, "learning_rate": 1.8584413653755814e-05, "loss": 1.0894, "step": 5017 }, { "epoch": 0.1963377416073245, "grad_norm": 0.0, "learning_rate": 1.858376360715273e-05, "loss": 1.1689, "step": 5018 }, { "epoch": 0.19637686829955395, "grad_norm": 0.0, "learning_rate": 1.8583113422705162e-05, "loss": 1.2944, "step": 5019 }, { "epoch": 0.1964159949917834, "grad_norm": 0.0, "learning_rate": 1.858246310042354e-05, "loss": 1.0884, "step": 5020 }, { "epoch": 0.19645512168401283, "grad_norm": 0.0, "learning_rate": 1.8581812640318322e-05, "loss": 1.1034, "step": 5021 }, { "epoch": 0.19649424837624227, "grad_norm": 0.0, "learning_rate": 1.8581162042399942e-05, "loss": 1.1151, "step": 5022 }, { "epoch": 0.1965333750684717, "grad_norm": 0.0, "learning_rate": 1.8580511306678856e-05, "loss": 1.1658, "step": 5023 }, { "epoch": 0.19657250176070115, "grad_norm": 0.0, "learning_rate": 1.857986043316551e-05, "loss": 1.152, "step": 5024 }, { "epoch": 0.1966116284529306, "grad_norm": 0.0, "learning_rate": 1.857920942187035e-05, "loss": 1.1757, "step": 5025 }, { "epoch": 0.19665075514516003, "grad_norm": 0.0, "learning_rate": 1.8578558272803848e-05, "loss": 1.1246, "step": 5026 }, { "epoch": 0.19668988183738947, "grad_norm": 0.0, "learning_rate": 1.857790698597644e-05, "loss": 1.2351, "step": 5027 }, { "epoch": 0.19672900852961892, "grad_norm": 0.0, "learning_rate": 1.85772555613986e-05, "loss": 1.054, "step": 5028 }, { "epoch": 0.19676813522184836, "grad_norm": 0.0, "learning_rate": 1.857660399908078e-05, "loss": 1.1986, "step": 5029 }, { "epoch": 0.1968072619140778, "grad_norm": 0.0, "learning_rate": 1.857595229903345e-05, "loss": 1.2903, "step": 5030 }, { "epoch": 0.1968463886063072, "grad_norm": 0.0, "learning_rate": 1.8575300461267073e-05, "loss": 1.1719, "step": 5031 }, { "epoch": 0.19688551529853665, "grad_norm": 0.0, "learning_rate": 1.8574648485792113e-05, "loss": 1.1801, "step": 5032 }, { "epoch": 0.1969246419907661, "grad_norm": 0.0, "learning_rate": 1.8573996372619044e-05, "loss": 1.239, "step": 5033 }, { "epoch": 0.19696376868299553, "grad_norm": 0.0, "learning_rate": 1.8573344121758338e-05, "loss": 1.2752, "step": 5034 }, { "epoch": 0.19700289537522497, "grad_norm": 0.0, "learning_rate": 1.8572691733220465e-05, "loss": 1.157, "step": 5035 }, { "epoch": 0.19704202206745441, "grad_norm": 0.0, "learning_rate": 1.8572039207015907e-05, "loss": 1.1685, "step": 5036 }, { "epoch": 0.19708114875968386, "grad_norm": 0.0, "learning_rate": 1.8571386543155142e-05, "loss": 1.1099, "step": 5037 }, { "epoch": 0.1971202754519133, "grad_norm": 0.0, "learning_rate": 1.8570733741648654e-05, "loss": 1.1664, "step": 5038 }, { "epoch": 0.19715940214414274, "grad_norm": 0.0, "learning_rate": 1.8570080802506914e-05, "loss": 1.1171, "step": 5039 }, { "epoch": 0.19719852883637218, "grad_norm": 0.0, "learning_rate": 1.856942772574042e-05, "loss": 1.1801, "step": 5040 }, { "epoch": 0.19723765552860162, "grad_norm": 0.0, "learning_rate": 1.856877451135965e-05, "loss": 1.1452, "step": 5041 }, { "epoch": 0.19727678222083106, "grad_norm": 0.0, "learning_rate": 1.85681211593751e-05, "loss": 1.204, "step": 5042 }, { "epoch": 0.1973159089130605, "grad_norm": 0.0, "learning_rate": 1.8567467669797264e-05, "loss": 1.1023, "step": 5043 }, { "epoch": 0.19735503560528994, "grad_norm": 0.0, "learning_rate": 1.856681404263663e-05, "loss": 1.118, "step": 5044 }, { "epoch": 0.19739416229751935, "grad_norm": 0.0, "learning_rate": 1.85661602779037e-05, "loss": 1.1824, "step": 5045 }, { "epoch": 0.1974332889897488, "grad_norm": 0.0, "learning_rate": 1.8565506375608964e-05, "loss": 1.1868, "step": 5046 }, { "epoch": 0.19747241568197824, "grad_norm": 0.0, "learning_rate": 1.8564852335762936e-05, "loss": 1.0314, "step": 5047 }, { "epoch": 0.19751154237420768, "grad_norm": 0.0, "learning_rate": 1.856419815837611e-05, "loss": 1.1881, "step": 5048 }, { "epoch": 0.19755066906643712, "grad_norm": 0.0, "learning_rate": 1.8563543843458994e-05, "loss": 1.1951, "step": 5049 }, { "epoch": 0.19758979575866656, "grad_norm": 0.0, "learning_rate": 1.856288939102209e-05, "loss": 1.1606, "step": 5050 }, { "epoch": 0.197628922450896, "grad_norm": 0.0, "learning_rate": 1.8562234801075917e-05, "loss": 1.1855, "step": 5051 }, { "epoch": 0.19766804914312544, "grad_norm": 0.0, "learning_rate": 1.856158007363098e-05, "loss": 1.1567, "step": 5052 }, { "epoch": 0.19770717583535488, "grad_norm": 0.0, "learning_rate": 1.8560925208697794e-05, "loss": 1.2124, "step": 5053 }, { "epoch": 0.19774630252758432, "grad_norm": 0.0, "learning_rate": 1.8560270206286875e-05, "loss": 1.1852, "step": 5054 }, { "epoch": 0.19778542921981376, "grad_norm": 0.0, "learning_rate": 1.855961506640875e-05, "loss": 1.1086, "step": 5055 }, { "epoch": 0.1978245559120432, "grad_norm": 0.0, "learning_rate": 1.8558959789073926e-05, "loss": 1.1141, "step": 5056 }, { "epoch": 0.19786368260427264, "grad_norm": 0.0, "learning_rate": 1.855830437429294e-05, "loss": 1.1627, "step": 5057 }, { "epoch": 0.19790280929650209, "grad_norm": 0.0, "learning_rate": 1.8557648822076302e-05, "loss": 1.244, "step": 5058 }, { "epoch": 0.1979419359887315, "grad_norm": 0.0, "learning_rate": 1.855699313243455e-05, "loss": 1.0648, "step": 5059 }, { "epoch": 0.19798106268096094, "grad_norm": 0.0, "learning_rate": 1.8556337305378212e-05, "loss": 1.1667, "step": 5060 }, { "epoch": 0.19802018937319038, "grad_norm": 0.0, "learning_rate": 1.855568134091782e-05, "loss": 1.2961, "step": 5061 }, { "epoch": 0.19805931606541982, "grad_norm": 0.0, "learning_rate": 1.8555025239063905e-05, "loss": 1.2082, "step": 5062 }, { "epoch": 0.19809844275764926, "grad_norm": 0.0, "learning_rate": 1.8554368999827e-05, "loss": 1.2007, "step": 5063 }, { "epoch": 0.1981375694498787, "grad_norm": 0.0, "learning_rate": 1.855371262321766e-05, "loss": 1.257, "step": 5064 }, { "epoch": 0.19817669614210814, "grad_norm": 0.0, "learning_rate": 1.85530561092464e-05, "loss": 1.2158, "step": 5065 }, { "epoch": 0.19821582283433758, "grad_norm": 0.0, "learning_rate": 1.8552399457923785e-05, "loss": 1.1749, "step": 5066 }, { "epoch": 0.19825494952656703, "grad_norm": 0.0, "learning_rate": 1.8551742669260348e-05, "loss": 1.0994, "step": 5067 }, { "epoch": 0.19829407621879647, "grad_norm": 0.0, "learning_rate": 1.8551085743266642e-05, "loss": 1.0919, "step": 5068 }, { "epoch": 0.1983332029110259, "grad_norm": 0.0, "learning_rate": 1.8550428679953213e-05, "loss": 1.1791, "step": 5069 }, { "epoch": 0.19837232960325535, "grad_norm": 0.0, "learning_rate": 1.8549771479330612e-05, "loss": 1.0764, "step": 5070 }, { "epoch": 0.1984114562954848, "grad_norm": 0.0, "learning_rate": 1.8549114141409396e-05, "loss": 1.1371, "step": 5071 }, { "epoch": 0.19845058298771423, "grad_norm": 0.0, "learning_rate": 1.8548456666200117e-05, "loss": 1.1597, "step": 5072 }, { "epoch": 0.19848970967994367, "grad_norm": 0.0, "learning_rate": 1.854779905371334e-05, "loss": 1.1127, "step": 5073 }, { "epoch": 0.19852883637217308, "grad_norm": 0.0, "learning_rate": 1.8547141303959623e-05, "loss": 1.2726, "step": 5074 }, { "epoch": 0.19856796306440253, "grad_norm": 0.0, "learning_rate": 1.854648341694952e-05, "loss": 1.0474, "step": 5075 }, { "epoch": 0.19860708975663197, "grad_norm": 0.0, "learning_rate": 1.8545825392693605e-05, "loss": 1.0779, "step": 5076 }, { "epoch": 0.1986462164488614, "grad_norm": 0.0, "learning_rate": 1.8545167231202447e-05, "loss": 1.2449, "step": 5077 }, { "epoch": 0.19868534314109085, "grad_norm": 0.0, "learning_rate": 1.8544508932486605e-05, "loss": 1.1439, "step": 5078 }, { "epoch": 0.1987244698333203, "grad_norm": 0.0, "learning_rate": 1.854385049655666e-05, "loss": 1.1512, "step": 5079 }, { "epoch": 0.19876359652554973, "grad_norm": 0.0, "learning_rate": 1.854319192342318e-05, "loss": 1.1863, "step": 5080 }, { "epoch": 0.19880272321777917, "grad_norm": 0.0, "learning_rate": 1.8542533213096746e-05, "loss": 1.2068, "step": 5081 }, { "epoch": 0.1988418499100086, "grad_norm": 0.0, "learning_rate": 1.854187436558793e-05, "loss": 1.2245, "step": 5082 }, { "epoch": 0.19888097660223805, "grad_norm": 0.0, "learning_rate": 1.8541215380907317e-05, "loss": 1.1605, "step": 5083 }, { "epoch": 0.1989201032944675, "grad_norm": 0.0, "learning_rate": 1.8540556259065487e-05, "loss": 1.0871, "step": 5084 }, { "epoch": 0.19895922998669693, "grad_norm": 0.0, "learning_rate": 1.8539897000073025e-05, "loss": 1.1163, "step": 5085 }, { "epoch": 0.19899835667892637, "grad_norm": 0.0, "learning_rate": 1.8539237603940517e-05, "loss": 1.154, "step": 5086 }, { "epoch": 0.19903748337115582, "grad_norm": 0.0, "learning_rate": 1.8538578070678556e-05, "loss": 1.179, "step": 5087 }, { "epoch": 0.19907661006338523, "grad_norm": 0.0, "learning_rate": 1.853791840029773e-05, "loss": 1.1596, "step": 5088 }, { "epoch": 0.19911573675561467, "grad_norm": 0.0, "learning_rate": 1.8537258592808634e-05, "loss": 1.2094, "step": 5089 }, { "epoch": 0.1991548634478441, "grad_norm": 0.0, "learning_rate": 1.853659864822186e-05, "loss": 1.0394, "step": 5090 }, { "epoch": 0.19919399014007355, "grad_norm": 0.0, "learning_rate": 1.853593856654801e-05, "loss": 1.1572, "step": 5091 }, { "epoch": 0.199233116832303, "grad_norm": 0.0, "learning_rate": 1.8535278347797683e-05, "loss": 1.0742, "step": 5092 }, { "epoch": 0.19927224352453243, "grad_norm": 0.0, "learning_rate": 1.853461799198148e-05, "loss": 1.1736, "step": 5093 }, { "epoch": 0.19931137021676187, "grad_norm": 0.0, "learning_rate": 1.853395749911001e-05, "loss": 1.2209, "step": 5094 }, { "epoch": 0.19935049690899131, "grad_norm": 0.0, "learning_rate": 1.8533296869193874e-05, "loss": 1.1335, "step": 5095 }, { "epoch": 0.19938962360122076, "grad_norm": 0.0, "learning_rate": 1.853263610224368e-05, "loss": 1.2036, "step": 5096 }, { "epoch": 0.1994287502934502, "grad_norm": 0.0, "learning_rate": 1.8531975198270047e-05, "loss": 1.2068, "step": 5097 }, { "epoch": 0.19946787698567964, "grad_norm": 0.0, "learning_rate": 1.853131415728358e-05, "loss": 1.1822, "step": 5098 }, { "epoch": 0.19950700367790908, "grad_norm": 0.0, "learning_rate": 1.85306529792949e-05, "loss": 1.013, "step": 5099 }, { "epoch": 0.19954613037013852, "grad_norm": 0.0, "learning_rate": 1.852999166431462e-05, "loss": 1.1915, "step": 5100 }, { "epoch": 0.19958525706236796, "grad_norm": 0.0, "learning_rate": 1.8529330212353367e-05, "loss": 1.201, "step": 5101 }, { "epoch": 0.19962438375459737, "grad_norm": 0.0, "learning_rate": 1.852866862342176e-05, "loss": 1.2386, "step": 5102 }, { "epoch": 0.19966351044682681, "grad_norm": 0.0, "learning_rate": 1.8528006897530415e-05, "loss": 1.0876, "step": 5103 }, { "epoch": 0.19970263713905625, "grad_norm": 0.0, "learning_rate": 1.8527345034689968e-05, "loss": 1.1583, "step": 5104 }, { "epoch": 0.1997417638312857, "grad_norm": 0.0, "learning_rate": 1.8526683034911046e-05, "loss": 1.2076, "step": 5105 }, { "epoch": 0.19978089052351514, "grad_norm": 0.0, "learning_rate": 1.8526020898204278e-05, "loss": 1.1432, "step": 5106 }, { "epoch": 0.19982001721574458, "grad_norm": 0.0, "learning_rate": 1.8525358624580302e-05, "loss": 1.0949, "step": 5107 }, { "epoch": 0.19985914390797402, "grad_norm": 0.0, "learning_rate": 1.8524696214049744e-05, "loss": 0.9551, "step": 5108 }, { "epoch": 0.19989827060020346, "grad_norm": 0.0, "learning_rate": 1.852403366662325e-05, "loss": 1.1982, "step": 5109 }, { "epoch": 0.1999373972924329, "grad_norm": 0.0, "learning_rate": 1.8523370982311455e-05, "loss": 1.1293, "step": 5110 }, { "epoch": 0.19997652398466234, "grad_norm": 0.0, "learning_rate": 1.8522708161125003e-05, "loss": 1.1858, "step": 5111 }, { "epoch": 0.20001565067689178, "grad_norm": 0.0, "learning_rate": 1.8522045203074536e-05, "loss": 1.1689, "step": 5112 }, { "epoch": 0.20005477736912122, "grad_norm": 0.0, "learning_rate": 1.85213821081707e-05, "loss": 1.2134, "step": 5113 }, { "epoch": 0.20009390406135066, "grad_norm": 0.0, "learning_rate": 1.8520718876424153e-05, "loss": 1.186, "step": 5114 }, { "epoch": 0.2001330307535801, "grad_norm": 0.0, "learning_rate": 1.8520055507845535e-05, "loss": 1.1639, "step": 5115 }, { "epoch": 0.20017215744580952, "grad_norm": 0.0, "learning_rate": 1.85193920024455e-05, "loss": 1.2792, "step": 5116 }, { "epoch": 0.20021128413803896, "grad_norm": 0.0, "learning_rate": 1.8518728360234702e-05, "loss": 1.1854, "step": 5117 }, { "epoch": 0.2002504108302684, "grad_norm": 0.0, "learning_rate": 1.8518064581223803e-05, "loss": 1.2462, "step": 5118 }, { "epoch": 0.20028953752249784, "grad_norm": 0.0, "learning_rate": 1.8517400665423462e-05, "loss": 1.1605, "step": 5119 }, { "epoch": 0.20032866421472728, "grad_norm": 0.0, "learning_rate": 1.851673661284434e-05, "loss": 1.2087, "step": 5120 }, { "epoch": 0.20036779090695672, "grad_norm": 0.0, "learning_rate": 1.8516072423497097e-05, "loss": 1.184, "step": 5121 }, { "epoch": 0.20040691759918616, "grad_norm": 0.0, "learning_rate": 1.8515408097392408e-05, "loss": 1.2777, "step": 5122 }, { "epoch": 0.2004460442914156, "grad_norm": 0.0, "learning_rate": 1.8514743634540928e-05, "loss": 1.0637, "step": 5123 }, { "epoch": 0.20048517098364504, "grad_norm": 0.0, "learning_rate": 1.851407903495334e-05, "loss": 1.2133, "step": 5124 }, { "epoch": 0.20052429767587449, "grad_norm": 0.0, "learning_rate": 1.851341429864031e-05, "loss": 1.2323, "step": 5125 }, { "epoch": 0.20056342436810393, "grad_norm": 0.0, "learning_rate": 1.851274942561251e-05, "loss": 1.1607, "step": 5126 }, { "epoch": 0.20060255106033337, "grad_norm": 0.0, "learning_rate": 1.851208441588063e-05, "loss": 1.2037, "step": 5127 }, { "epoch": 0.2006416777525628, "grad_norm": 0.0, "learning_rate": 1.8511419269455335e-05, "loss": 1.1359, "step": 5128 }, { "epoch": 0.20068080444479225, "grad_norm": 0.0, "learning_rate": 1.8510753986347313e-05, "loss": 1.1929, "step": 5129 }, { "epoch": 0.2007199311370217, "grad_norm": 0.0, "learning_rate": 1.8510088566567245e-05, "loss": 1.2126, "step": 5130 }, { "epoch": 0.2007590578292511, "grad_norm": 0.0, "learning_rate": 1.850942301012582e-05, "loss": 1.1202, "step": 5131 }, { "epoch": 0.20079818452148054, "grad_norm": 0.0, "learning_rate": 1.8508757317033723e-05, "loss": 1.1265, "step": 5132 }, { "epoch": 0.20083731121370998, "grad_norm": 0.0, "learning_rate": 1.8508091487301648e-05, "loss": 1.2551, "step": 5133 }, { "epoch": 0.20087643790593943, "grad_norm": 0.0, "learning_rate": 1.8507425520940282e-05, "loss": 1.0681, "step": 5134 }, { "epoch": 0.20091556459816887, "grad_norm": 0.0, "learning_rate": 1.8506759417960322e-05, "loss": 1.1451, "step": 5135 }, { "epoch": 0.2009546912903983, "grad_norm": 0.0, "learning_rate": 1.8506093178372467e-05, "loss": 1.0831, "step": 5136 }, { "epoch": 0.20099381798262775, "grad_norm": 0.0, "learning_rate": 1.8505426802187417e-05, "loss": 1.1379, "step": 5137 }, { "epoch": 0.2010329446748572, "grad_norm": 0.0, "learning_rate": 1.8504760289415867e-05, "loss": 1.2077, "step": 5138 }, { "epoch": 0.20107207136708663, "grad_norm": 0.0, "learning_rate": 1.8504093640068523e-05, "loss": 1.1947, "step": 5139 }, { "epoch": 0.20111119805931607, "grad_norm": 0.0, "learning_rate": 1.8503426854156095e-05, "loss": 1.1537, "step": 5140 }, { "epoch": 0.2011503247515455, "grad_norm": 0.0, "learning_rate": 1.8502759931689285e-05, "loss": 1.2084, "step": 5141 }, { "epoch": 0.20118945144377495, "grad_norm": 0.0, "learning_rate": 1.8502092872678806e-05, "loss": 1.1907, "step": 5142 }, { "epoch": 0.2012285781360044, "grad_norm": 0.0, "learning_rate": 1.8501425677135367e-05, "loss": 1.285, "step": 5143 }, { "epoch": 0.20126770482823383, "grad_norm": 0.0, "learning_rate": 1.8500758345069685e-05, "loss": 1.22, "step": 5144 }, { "epoch": 0.20130683152046325, "grad_norm": 0.0, "learning_rate": 1.850009087649248e-05, "loss": 1.2628, "step": 5145 }, { "epoch": 0.2013459582126927, "grad_norm": 0.0, "learning_rate": 1.8499423271414463e-05, "loss": 1.116, "step": 5146 }, { "epoch": 0.20138508490492213, "grad_norm": 0.0, "learning_rate": 1.849875552984636e-05, "loss": 1.1306, "step": 5147 }, { "epoch": 0.20142421159715157, "grad_norm": 0.0, "learning_rate": 1.8498087651798893e-05, "loss": 1.1987, "step": 5148 }, { "epoch": 0.201463338289381, "grad_norm": 0.0, "learning_rate": 1.8497419637282784e-05, "loss": 1.2489, "step": 5149 }, { "epoch": 0.20150246498161045, "grad_norm": 0.0, "learning_rate": 1.8496751486308766e-05, "loss": 1.1843, "step": 5150 }, { "epoch": 0.2015415916738399, "grad_norm": 0.0, "learning_rate": 1.8496083198887565e-05, "loss": 1.078, "step": 5151 }, { "epoch": 0.20158071836606933, "grad_norm": 0.0, "learning_rate": 1.8495414775029916e-05, "loss": 1.1263, "step": 5152 }, { "epoch": 0.20161984505829877, "grad_norm": 0.0, "learning_rate": 1.849474621474655e-05, "loss": 1.2113, "step": 5153 }, { "epoch": 0.20165897175052822, "grad_norm": 0.0, "learning_rate": 1.8494077518048207e-05, "loss": 1.1501, "step": 5154 }, { "epoch": 0.20169809844275766, "grad_norm": 0.0, "learning_rate": 1.8493408684945617e-05, "loss": 1.1494, "step": 5155 }, { "epoch": 0.2017372251349871, "grad_norm": 0.0, "learning_rate": 1.8492739715449526e-05, "loss": 1.1282, "step": 5156 }, { "epoch": 0.20177635182721654, "grad_norm": 0.0, "learning_rate": 1.8492070609570682e-05, "loss": 1.2301, "step": 5157 }, { "epoch": 0.20181547851944598, "grad_norm": 0.0, "learning_rate": 1.8491401367319825e-05, "loss": 1.0168, "step": 5158 }, { "epoch": 0.2018546052116754, "grad_norm": 0.0, "learning_rate": 1.84907319887077e-05, "loss": 1.1561, "step": 5159 }, { "epoch": 0.20189373190390483, "grad_norm": 0.0, "learning_rate": 1.849006247374506e-05, "loss": 1.1655, "step": 5160 }, { "epoch": 0.20193285859613427, "grad_norm": 0.0, "learning_rate": 1.8489392822442657e-05, "loss": 1.1084, "step": 5161 }, { "epoch": 0.20197198528836371, "grad_norm": 0.0, "learning_rate": 1.848872303481124e-05, "loss": 1.1297, "step": 5162 }, { "epoch": 0.20201111198059316, "grad_norm": 0.0, "learning_rate": 1.8488053110861568e-05, "loss": 1.1383, "step": 5163 }, { "epoch": 0.2020502386728226, "grad_norm": 0.0, "learning_rate": 1.8487383050604403e-05, "loss": 1.2009, "step": 5164 }, { "epoch": 0.20208936536505204, "grad_norm": 0.0, "learning_rate": 1.8486712854050497e-05, "loss": 1.2283, "step": 5165 }, { "epoch": 0.20212849205728148, "grad_norm": 0.0, "learning_rate": 1.8486042521210618e-05, "loss": 1.2525, "step": 5166 }, { "epoch": 0.20216761874951092, "grad_norm": 0.0, "learning_rate": 1.848537205209553e-05, "loss": 1.1129, "step": 5167 }, { "epoch": 0.20220674544174036, "grad_norm": 0.0, "learning_rate": 1.8484701446716e-05, "loss": 1.1375, "step": 5168 }, { "epoch": 0.2022458721339698, "grad_norm": 0.0, "learning_rate": 1.84840307050828e-05, "loss": 1.2878, "step": 5169 }, { "epoch": 0.20228499882619924, "grad_norm": 0.0, "learning_rate": 1.8483359827206692e-05, "loss": 1.1272, "step": 5170 }, { "epoch": 0.20232412551842868, "grad_norm": 0.0, "learning_rate": 1.848268881309846e-05, "loss": 1.1736, "step": 5171 }, { "epoch": 0.20236325221065812, "grad_norm": 0.0, "learning_rate": 1.848201766276887e-05, "loss": 1.068, "step": 5172 }, { "epoch": 0.20240237890288754, "grad_norm": 0.0, "learning_rate": 1.848134637622871e-05, "loss": 1.1786, "step": 5173 }, { "epoch": 0.20244150559511698, "grad_norm": 0.0, "learning_rate": 1.8480674953488752e-05, "loss": 1.2307, "step": 5174 }, { "epoch": 0.20248063228734642, "grad_norm": 0.0, "learning_rate": 1.8480003394559786e-05, "loss": 1.1847, "step": 5175 }, { "epoch": 0.20251975897957586, "grad_norm": 0.0, "learning_rate": 1.8479331699452583e-05, "loss": 1.2589, "step": 5176 }, { "epoch": 0.2025588856718053, "grad_norm": 0.0, "learning_rate": 1.8478659868177943e-05, "loss": 1.1497, "step": 5177 }, { "epoch": 0.20259801236403474, "grad_norm": 0.0, "learning_rate": 1.8477987900746653e-05, "loss": 1.1389, "step": 5178 }, { "epoch": 0.20263713905626418, "grad_norm": 0.0, "learning_rate": 1.8477315797169498e-05, "loss": 1.1169, "step": 5179 }, { "epoch": 0.20267626574849362, "grad_norm": 0.0, "learning_rate": 1.847664355745727e-05, "loss": 1.1722, "step": 5180 }, { "epoch": 0.20271539244072306, "grad_norm": 0.0, "learning_rate": 1.8475971181620772e-05, "loss": 1.2057, "step": 5181 }, { "epoch": 0.2027545191329525, "grad_norm": 0.0, "learning_rate": 1.8475298669670798e-05, "loss": 1.1297, "step": 5182 }, { "epoch": 0.20279364582518195, "grad_norm": 0.0, "learning_rate": 1.847462602161815e-05, "loss": 1.3037, "step": 5183 }, { "epoch": 0.20283277251741139, "grad_norm": 0.0, "learning_rate": 1.8473953237473623e-05, "loss": 1.1487, "step": 5184 }, { "epoch": 0.20287189920964083, "grad_norm": 0.0, "learning_rate": 1.8473280317248025e-05, "loss": 1.0319, "step": 5185 }, { "epoch": 0.20291102590187027, "grad_norm": 0.0, "learning_rate": 1.8472607260952168e-05, "loss": 1.1646, "step": 5186 }, { "epoch": 0.2029501525940997, "grad_norm": 0.0, "learning_rate": 1.847193406859685e-05, "loss": 1.1278, "step": 5187 }, { "epoch": 0.20298927928632912, "grad_norm": 0.0, "learning_rate": 1.8471260740192888e-05, "loss": 1.0136, "step": 5188 }, { "epoch": 0.20302840597855856, "grad_norm": 0.0, "learning_rate": 1.8470587275751095e-05, "loss": 1.2085, "step": 5189 }, { "epoch": 0.203067532670788, "grad_norm": 0.0, "learning_rate": 1.8469913675282285e-05, "loss": 1.15, "step": 5190 }, { "epoch": 0.20310665936301744, "grad_norm": 0.0, "learning_rate": 1.846923993879727e-05, "loss": 1.2373, "step": 5191 }, { "epoch": 0.20314578605524689, "grad_norm": 0.0, "learning_rate": 1.846856606630688e-05, "loss": 1.2498, "step": 5192 }, { "epoch": 0.20318491274747633, "grad_norm": 0.0, "learning_rate": 1.846789205782193e-05, "loss": 1.1473, "step": 5193 }, { "epoch": 0.20322403943970577, "grad_norm": 0.0, "learning_rate": 1.846721791335324e-05, "loss": 1.2819, "step": 5194 }, { "epoch": 0.2032631661319352, "grad_norm": 0.0, "learning_rate": 1.8466543632911645e-05, "loss": 1.1609, "step": 5195 }, { "epoch": 0.20330229282416465, "grad_norm": 0.0, "learning_rate": 1.8465869216507965e-05, "loss": 1.0301, "step": 5196 }, { "epoch": 0.2033414195163941, "grad_norm": 0.0, "learning_rate": 1.8465194664153036e-05, "loss": 1.2109, "step": 5197 }, { "epoch": 0.20338054620862353, "grad_norm": 0.0, "learning_rate": 1.8464519975857688e-05, "loss": 1.122, "step": 5198 }, { "epoch": 0.20341967290085297, "grad_norm": 0.0, "learning_rate": 1.8463845151632756e-05, "loss": 1.0578, "step": 5199 }, { "epoch": 0.2034587995930824, "grad_norm": 0.0, "learning_rate": 1.8463170191489075e-05, "loss": 1.1986, "step": 5200 }, { "epoch": 0.20349792628531185, "grad_norm": 0.0, "learning_rate": 1.8462495095437484e-05, "loss": 1.1725, "step": 5201 }, { "epoch": 0.20353705297754127, "grad_norm": 0.0, "learning_rate": 1.846181986348883e-05, "loss": 1.0909, "step": 5202 }, { "epoch": 0.2035761796697707, "grad_norm": 0.0, "learning_rate": 1.8461144495653952e-05, "loss": 1.1731, "step": 5203 }, { "epoch": 0.20361530636200015, "grad_norm": 0.0, "learning_rate": 1.8460468991943693e-05, "loss": 1.1324, "step": 5204 }, { "epoch": 0.2036544330542296, "grad_norm": 0.0, "learning_rate": 1.8459793352368908e-05, "loss": 1.1091, "step": 5205 }, { "epoch": 0.20369355974645903, "grad_norm": 0.0, "learning_rate": 1.8459117576940436e-05, "loss": 1.0029, "step": 5206 }, { "epoch": 0.20373268643868847, "grad_norm": 0.0, "learning_rate": 1.845844166566914e-05, "loss": 1.2093, "step": 5207 }, { "epoch": 0.2037718131309179, "grad_norm": 0.0, "learning_rate": 1.845776561856587e-05, "loss": 1.0176, "step": 5208 }, { "epoch": 0.20381093982314735, "grad_norm": 0.0, "learning_rate": 1.845708943564148e-05, "loss": 1.127, "step": 5209 }, { "epoch": 0.2038500665153768, "grad_norm": 0.0, "learning_rate": 1.8456413116906834e-05, "loss": 1.2526, "step": 5210 }, { "epoch": 0.20388919320760623, "grad_norm": 0.0, "learning_rate": 1.8455736662372787e-05, "loss": 1.1232, "step": 5211 }, { "epoch": 0.20392831989983567, "grad_norm": 0.0, "learning_rate": 1.8455060072050208e-05, "loss": 1.2657, "step": 5212 }, { "epoch": 0.20396744659206512, "grad_norm": 0.0, "learning_rate": 1.8454383345949954e-05, "loss": 1.2794, "step": 5213 }, { "epoch": 0.20400657328429456, "grad_norm": 0.0, "learning_rate": 1.8453706484082903e-05, "loss": 1.2028, "step": 5214 }, { "epoch": 0.204045699976524, "grad_norm": 0.0, "learning_rate": 1.8453029486459916e-05, "loss": 1.2454, "step": 5215 }, { "epoch": 0.2040848266687534, "grad_norm": 0.0, "learning_rate": 1.8452352353091866e-05, "loss": 1.1829, "step": 5216 }, { "epoch": 0.20412395336098285, "grad_norm": 0.0, "learning_rate": 1.8451675083989632e-05, "loss": 1.1609, "step": 5217 }, { "epoch": 0.2041630800532123, "grad_norm": 0.0, "learning_rate": 1.8450997679164087e-05, "loss": 1.0489, "step": 5218 }, { "epoch": 0.20420220674544173, "grad_norm": 0.0, "learning_rate": 1.8450320138626107e-05, "loss": 1.0876, "step": 5219 }, { "epoch": 0.20424133343767117, "grad_norm": 0.0, "learning_rate": 1.8449642462386574e-05, "loss": 1.0706, "step": 5220 }, { "epoch": 0.20428046012990062, "grad_norm": 0.0, "learning_rate": 1.844896465045637e-05, "loss": 1.1439, "step": 5221 }, { "epoch": 0.20431958682213006, "grad_norm": 0.0, "learning_rate": 1.8448286702846384e-05, "loss": 1.2552, "step": 5222 }, { "epoch": 0.2043587135143595, "grad_norm": 0.0, "learning_rate": 1.84476086195675e-05, "loss": 1.1, "step": 5223 }, { "epoch": 0.20439784020658894, "grad_norm": 0.0, "learning_rate": 1.8446930400630606e-05, "loss": 1.2148, "step": 5224 }, { "epoch": 0.20443696689881838, "grad_norm": 0.0, "learning_rate": 1.8446252046046593e-05, "loss": 1.0359, "step": 5225 }, { "epoch": 0.20447609359104782, "grad_norm": 0.0, "learning_rate": 1.8445573555826355e-05, "loss": 1.19, "step": 5226 }, { "epoch": 0.20451522028327726, "grad_norm": 0.0, "learning_rate": 1.8444894929980794e-05, "loss": 1.0209, "step": 5227 }, { "epoch": 0.2045543469755067, "grad_norm": 0.0, "learning_rate": 1.8444216168520795e-05, "loss": 1.1136, "step": 5228 }, { "epoch": 0.20459347366773614, "grad_norm": 0.0, "learning_rate": 1.844353727145727e-05, "loss": 1.1149, "step": 5229 }, { "epoch": 0.20463260035996556, "grad_norm": 0.0, "learning_rate": 1.8442858238801117e-05, "loss": 1.0188, "step": 5230 }, { "epoch": 0.204671727052195, "grad_norm": 0.0, "learning_rate": 1.844217907056324e-05, "loss": 1.1853, "step": 5231 }, { "epoch": 0.20471085374442444, "grad_norm": 0.0, "learning_rate": 1.8441499766754543e-05, "loss": 1.0648, "step": 5232 }, { "epoch": 0.20474998043665388, "grad_norm": 0.0, "learning_rate": 1.844082032738594e-05, "loss": 1.0836, "step": 5233 }, { "epoch": 0.20478910712888332, "grad_norm": 0.0, "learning_rate": 1.844014075246834e-05, "loss": 1.1452, "step": 5234 }, { "epoch": 0.20482823382111276, "grad_norm": 0.0, "learning_rate": 1.8439461042012653e-05, "loss": 1.2335, "step": 5235 }, { "epoch": 0.2048673605133422, "grad_norm": 0.0, "learning_rate": 1.84387811960298e-05, "loss": 1.0446, "step": 5236 }, { "epoch": 0.20490648720557164, "grad_norm": 0.0, "learning_rate": 1.8438101214530696e-05, "loss": 1.2932, "step": 5237 }, { "epoch": 0.20494561389780108, "grad_norm": 0.0, "learning_rate": 1.8437421097526257e-05, "loss": 1.1023, "step": 5238 }, { "epoch": 0.20498474059003052, "grad_norm": 0.0, "learning_rate": 1.843674084502741e-05, "loss": 1.2858, "step": 5239 }, { "epoch": 0.20502386728225996, "grad_norm": 0.0, "learning_rate": 1.8436060457045072e-05, "loss": 1.1657, "step": 5240 }, { "epoch": 0.2050629939744894, "grad_norm": 0.0, "learning_rate": 1.843537993359018e-05, "loss": 1.2913, "step": 5241 }, { "epoch": 0.20510212066671885, "grad_norm": 0.0, "learning_rate": 1.8434699274673654e-05, "loss": 1.0997, "step": 5242 }, { "epoch": 0.2051412473589483, "grad_norm": 0.0, "learning_rate": 1.8434018480306427e-05, "loss": 1.2256, "step": 5243 }, { "epoch": 0.2051803740511777, "grad_norm": 0.0, "learning_rate": 1.843333755049943e-05, "loss": 1.1511, "step": 5244 }, { "epoch": 0.20521950074340714, "grad_norm": 0.0, "learning_rate": 1.84326564852636e-05, "loss": 1.1292, "step": 5245 }, { "epoch": 0.20525862743563658, "grad_norm": 0.0, "learning_rate": 1.8431975284609874e-05, "loss": 1.1442, "step": 5246 }, { "epoch": 0.20529775412786602, "grad_norm": 0.0, "learning_rate": 1.8431293948549194e-05, "loss": 1.1154, "step": 5247 }, { "epoch": 0.20533688082009546, "grad_norm": 0.0, "learning_rate": 1.8430612477092496e-05, "loss": 1.1607, "step": 5248 }, { "epoch": 0.2053760075123249, "grad_norm": 0.0, "learning_rate": 1.8429930870250727e-05, "loss": 0.9913, "step": 5249 }, { "epoch": 0.20541513420455434, "grad_norm": 0.0, "learning_rate": 1.8429249128034827e-05, "loss": 1.1864, "step": 5250 }, { "epoch": 0.20545426089678379, "grad_norm": 0.0, "learning_rate": 1.842856725045575e-05, "loss": 1.2151, "step": 5251 }, { "epoch": 0.20549338758901323, "grad_norm": 0.0, "learning_rate": 1.8427885237524446e-05, "loss": 0.9781, "step": 5252 }, { "epoch": 0.20553251428124267, "grad_norm": 0.0, "learning_rate": 1.842720308925187e-05, "loss": 1.2333, "step": 5253 }, { "epoch": 0.2055716409734721, "grad_norm": 0.0, "learning_rate": 1.8426520805648966e-05, "loss": 1.285, "step": 5254 }, { "epoch": 0.20561076766570155, "grad_norm": 0.0, "learning_rate": 1.84258383867267e-05, "loss": 1.2858, "step": 5255 }, { "epoch": 0.205649894357931, "grad_norm": 0.0, "learning_rate": 1.8425155832496027e-05, "loss": 1.2271, "step": 5256 }, { "epoch": 0.20568902105016043, "grad_norm": 0.0, "learning_rate": 1.842447314296791e-05, "loss": 1.3264, "step": 5257 }, { "epoch": 0.20572814774238987, "grad_norm": 0.0, "learning_rate": 1.8423790318153307e-05, "loss": 1.0729, "step": 5258 }, { "epoch": 0.20576727443461929, "grad_norm": 0.0, "learning_rate": 1.8423107358063187e-05, "loss": 1.1599, "step": 5259 }, { "epoch": 0.20580640112684873, "grad_norm": 0.0, "learning_rate": 1.8422424262708523e-05, "loss": 1.1494, "step": 5260 }, { "epoch": 0.20584552781907817, "grad_norm": 0.0, "learning_rate": 1.8421741032100274e-05, "loss": 1.0458, "step": 5261 }, { "epoch": 0.2058846545113076, "grad_norm": 0.0, "learning_rate": 1.842105766624942e-05, "loss": 1.0864, "step": 5262 }, { "epoch": 0.20592378120353705, "grad_norm": 0.0, "learning_rate": 1.8420374165166928e-05, "loss": 1.2937, "step": 5263 }, { "epoch": 0.2059629078957665, "grad_norm": 0.0, "learning_rate": 1.8419690528863783e-05, "loss": 1.1837, "step": 5264 }, { "epoch": 0.20600203458799593, "grad_norm": 0.0, "learning_rate": 1.8419006757350956e-05, "loss": 1.2469, "step": 5265 }, { "epoch": 0.20604116128022537, "grad_norm": 0.0, "learning_rate": 1.841832285063943e-05, "loss": 1.191, "step": 5266 }, { "epoch": 0.2060802879724548, "grad_norm": 0.0, "learning_rate": 1.841763880874019e-05, "loss": 1.2017, "step": 5267 }, { "epoch": 0.20611941466468425, "grad_norm": 0.0, "learning_rate": 1.8416954631664216e-05, "loss": 1.2155, "step": 5268 }, { "epoch": 0.2061585413569137, "grad_norm": 0.0, "learning_rate": 1.84162703194225e-05, "loss": 1.2429, "step": 5269 }, { "epoch": 0.20619766804914313, "grad_norm": 0.0, "learning_rate": 1.8415585872026026e-05, "loss": 1.0596, "step": 5270 }, { "epoch": 0.20623679474137258, "grad_norm": 0.0, "learning_rate": 1.8414901289485793e-05, "loss": 1.2954, "step": 5271 }, { "epoch": 0.20627592143360202, "grad_norm": 0.0, "learning_rate": 1.8414216571812787e-05, "loss": 1.0694, "step": 5272 }, { "epoch": 0.20631504812583143, "grad_norm": 0.0, "learning_rate": 1.8413531719018006e-05, "loss": 1.1062, "step": 5273 }, { "epoch": 0.20635417481806087, "grad_norm": 0.0, "learning_rate": 1.8412846731112447e-05, "loss": 1.2136, "step": 5274 }, { "epoch": 0.2063933015102903, "grad_norm": 0.0, "learning_rate": 1.8412161608107114e-05, "loss": 1.1917, "step": 5275 }, { "epoch": 0.20643242820251975, "grad_norm": 0.0, "learning_rate": 1.8411476350013003e-05, "loss": 1.2411, "step": 5276 }, { "epoch": 0.2064715548947492, "grad_norm": 0.0, "learning_rate": 1.8410790956841126e-05, "loss": 1.2133, "step": 5277 }, { "epoch": 0.20651068158697863, "grad_norm": 0.0, "learning_rate": 1.8410105428602485e-05, "loss": 1.1594, "step": 5278 }, { "epoch": 0.20654980827920807, "grad_norm": 0.0, "learning_rate": 1.8409419765308087e-05, "loss": 1.2021, "step": 5279 }, { "epoch": 0.20658893497143752, "grad_norm": 0.0, "learning_rate": 1.840873396696895e-05, "loss": 1.0812, "step": 5280 }, { "epoch": 0.20662806166366696, "grad_norm": 0.0, "learning_rate": 1.8408048033596076e-05, "loss": 1.2158, "step": 5281 }, { "epoch": 0.2066671883558964, "grad_norm": 0.0, "learning_rate": 1.840736196520049e-05, "loss": 1.048, "step": 5282 }, { "epoch": 0.20670631504812584, "grad_norm": 0.0, "learning_rate": 1.8406675761793206e-05, "loss": 1.1545, "step": 5283 }, { "epoch": 0.20674544174035528, "grad_norm": 0.0, "learning_rate": 1.840598942338524e-05, "loss": 1.1792, "step": 5284 }, { "epoch": 0.20678456843258472, "grad_norm": 0.0, "learning_rate": 1.840530294998762e-05, "loss": 1.1764, "step": 5285 }, { "epoch": 0.20682369512481416, "grad_norm": 0.0, "learning_rate": 1.8404616341611366e-05, "loss": 1.1633, "step": 5286 }, { "epoch": 0.20686282181704357, "grad_norm": 0.0, "learning_rate": 1.8403929598267507e-05, "loss": 1.1976, "step": 5287 }, { "epoch": 0.20690194850927301, "grad_norm": 0.0, "learning_rate": 1.840324271996707e-05, "loss": 1.1215, "step": 5288 }, { "epoch": 0.20694107520150246, "grad_norm": 0.0, "learning_rate": 1.840255570672108e-05, "loss": 1.1368, "step": 5289 }, { "epoch": 0.2069802018937319, "grad_norm": 0.0, "learning_rate": 1.840186855854058e-05, "loss": 1.2061, "step": 5290 }, { "epoch": 0.20701932858596134, "grad_norm": 0.0, "learning_rate": 1.8401181275436596e-05, "loss": 1.0252, "step": 5291 }, { "epoch": 0.20705845527819078, "grad_norm": 0.0, "learning_rate": 1.840049385742017e-05, "loss": 1.2101, "step": 5292 }, { "epoch": 0.20709758197042022, "grad_norm": 0.0, "learning_rate": 1.8399806304502333e-05, "loss": 1.1556, "step": 5293 }, { "epoch": 0.20713670866264966, "grad_norm": 0.0, "learning_rate": 1.839911861669414e-05, "loss": 1.1128, "step": 5294 }, { "epoch": 0.2071758353548791, "grad_norm": 0.0, "learning_rate": 1.839843079400662e-05, "loss": 1.1604, "step": 5295 }, { "epoch": 0.20721496204710854, "grad_norm": 0.0, "learning_rate": 1.839774283645083e-05, "loss": 1.0905, "step": 5296 }, { "epoch": 0.20725408873933798, "grad_norm": 0.0, "learning_rate": 1.839705474403781e-05, "loss": 1.231, "step": 5297 }, { "epoch": 0.20729321543156742, "grad_norm": 0.0, "learning_rate": 1.8396366516778615e-05, "loss": 1.1814, "step": 5298 }, { "epoch": 0.20733234212379686, "grad_norm": 0.0, "learning_rate": 1.8395678154684297e-05, "loss": 1.2345, "step": 5299 }, { "epoch": 0.2073714688160263, "grad_norm": 0.0, "learning_rate": 1.8394989657765904e-05, "loss": 1.1711, "step": 5300 }, { "epoch": 0.20741059550825572, "grad_norm": 0.0, "learning_rate": 1.83943010260345e-05, "loss": 1.0597, "step": 5301 }, { "epoch": 0.20744972220048516, "grad_norm": 0.0, "learning_rate": 1.8393612259501136e-05, "loss": 1.1815, "step": 5302 }, { "epoch": 0.2074888488927146, "grad_norm": 0.0, "learning_rate": 1.8392923358176878e-05, "loss": 1.1185, "step": 5303 }, { "epoch": 0.20752797558494404, "grad_norm": 0.0, "learning_rate": 1.8392234322072792e-05, "loss": 1.1445, "step": 5304 }, { "epoch": 0.20756710227717348, "grad_norm": 0.0, "learning_rate": 1.8391545151199936e-05, "loss": 1.1072, "step": 5305 }, { "epoch": 0.20760622896940292, "grad_norm": 0.0, "learning_rate": 1.839085584556938e-05, "loss": 1.0608, "step": 5306 }, { "epoch": 0.20764535566163236, "grad_norm": 0.0, "learning_rate": 1.8390166405192192e-05, "loss": 1.1322, "step": 5307 }, { "epoch": 0.2076844823538618, "grad_norm": 0.0, "learning_rate": 1.8389476830079446e-05, "loss": 1.153, "step": 5308 }, { "epoch": 0.20772360904609125, "grad_norm": 0.0, "learning_rate": 1.838878712024222e-05, "loss": 1.0927, "step": 5309 }, { "epoch": 0.2077627357383207, "grad_norm": 0.0, "learning_rate": 1.838809727569158e-05, "loss": 1.0849, "step": 5310 }, { "epoch": 0.20780186243055013, "grad_norm": 0.0, "learning_rate": 1.8387407296438606e-05, "loss": 1.1188, "step": 5311 }, { "epoch": 0.20784098912277957, "grad_norm": 0.0, "learning_rate": 1.8386717182494385e-05, "loss": 1.3183, "step": 5312 }, { "epoch": 0.207880115815009, "grad_norm": 0.0, "learning_rate": 1.838602693386999e-05, "loss": 1.1824, "step": 5313 }, { "epoch": 0.20791924250723845, "grad_norm": 0.0, "learning_rate": 1.8385336550576515e-05, "loss": 1.2272, "step": 5314 }, { "epoch": 0.2079583691994679, "grad_norm": 0.0, "learning_rate": 1.8384646032625042e-05, "loss": 1.2067, "step": 5315 }, { "epoch": 0.2079974958916973, "grad_norm": 0.0, "learning_rate": 1.838395538002666e-05, "loss": 1.3491, "step": 5316 }, { "epoch": 0.20803662258392674, "grad_norm": 0.0, "learning_rate": 1.838326459279246e-05, "loss": 1.1464, "step": 5317 }, { "epoch": 0.20807574927615619, "grad_norm": 0.0, "learning_rate": 1.8382573670933536e-05, "loss": 1.1032, "step": 5318 }, { "epoch": 0.20811487596838563, "grad_norm": 0.0, "learning_rate": 1.8381882614460985e-05, "loss": 1.2354, "step": 5319 }, { "epoch": 0.20815400266061507, "grad_norm": 0.0, "learning_rate": 1.8381191423385897e-05, "loss": 1.1711, "step": 5320 }, { "epoch": 0.2081931293528445, "grad_norm": 0.0, "learning_rate": 1.838050009771938e-05, "loss": 1.166, "step": 5321 }, { "epoch": 0.20823225604507395, "grad_norm": 0.0, "learning_rate": 1.8379808637472532e-05, "loss": 1.1409, "step": 5322 }, { "epoch": 0.2082713827373034, "grad_norm": 0.0, "learning_rate": 1.837911704265646e-05, "loss": 1.2248, "step": 5323 }, { "epoch": 0.20831050942953283, "grad_norm": 0.0, "learning_rate": 1.8378425313282266e-05, "loss": 1.0572, "step": 5324 }, { "epoch": 0.20834963612176227, "grad_norm": 0.0, "learning_rate": 1.837773344936106e-05, "loss": 1.133, "step": 5325 }, { "epoch": 0.2083887628139917, "grad_norm": 0.0, "learning_rate": 1.8377041450903955e-05, "loss": 1.1219, "step": 5326 }, { "epoch": 0.20842788950622115, "grad_norm": 0.0, "learning_rate": 1.8376349317922056e-05, "loss": 1.2594, "step": 5327 }, { "epoch": 0.2084670161984506, "grad_norm": 0.0, "learning_rate": 1.837565705042649e-05, "loss": 1.0265, "step": 5328 }, { "epoch": 0.20850614289068004, "grad_norm": 0.0, "learning_rate": 1.837496464842836e-05, "loss": 1.0501, "step": 5329 }, { "epoch": 0.20854526958290945, "grad_norm": 0.0, "learning_rate": 1.8374272111938797e-05, "loss": 1.1177, "step": 5330 }, { "epoch": 0.2085843962751389, "grad_norm": 0.0, "learning_rate": 1.837357944096892e-05, "loss": 1.1179, "step": 5331 }, { "epoch": 0.20862352296736833, "grad_norm": 0.0, "learning_rate": 1.8372886635529843e-05, "loss": 1.1175, "step": 5332 }, { "epoch": 0.20866264965959777, "grad_norm": 0.0, "learning_rate": 1.83721936956327e-05, "loss": 1.0392, "step": 5333 }, { "epoch": 0.2087017763518272, "grad_norm": 0.0, "learning_rate": 1.837150062128862e-05, "loss": 1.1451, "step": 5334 }, { "epoch": 0.20874090304405665, "grad_norm": 0.0, "learning_rate": 1.8370807412508728e-05, "loss": 1.2771, "step": 5335 }, { "epoch": 0.2087800297362861, "grad_norm": 0.0, "learning_rate": 1.8370114069304158e-05, "loss": 1.1506, "step": 5336 }, { "epoch": 0.20881915642851553, "grad_norm": 0.0, "learning_rate": 1.8369420591686048e-05, "loss": 1.1258, "step": 5337 }, { "epoch": 0.20885828312074498, "grad_norm": 0.0, "learning_rate": 1.8368726979665527e-05, "loss": 1.1887, "step": 5338 }, { "epoch": 0.20889740981297442, "grad_norm": 0.0, "learning_rate": 1.836803323325374e-05, "loss": 0.999, "step": 5339 }, { "epoch": 0.20893653650520386, "grad_norm": 0.0, "learning_rate": 1.836733935246182e-05, "loss": 1.178, "step": 5340 }, { "epoch": 0.2089756631974333, "grad_norm": 0.0, "learning_rate": 1.836664533730092e-05, "loss": 1.1127, "step": 5341 }, { "epoch": 0.20901478988966274, "grad_norm": 0.0, "learning_rate": 1.836595118778218e-05, "loss": 1.1465, "step": 5342 }, { "epoch": 0.20905391658189218, "grad_norm": 0.0, "learning_rate": 1.836525690391674e-05, "loss": 1.1562, "step": 5343 }, { "epoch": 0.2090930432741216, "grad_norm": 0.0, "learning_rate": 1.8364562485715763e-05, "loss": 1.2314, "step": 5344 }, { "epoch": 0.20913216996635103, "grad_norm": 0.0, "learning_rate": 1.8363867933190396e-05, "loss": 1.2122, "step": 5345 }, { "epoch": 0.20917129665858047, "grad_norm": 0.0, "learning_rate": 1.8363173246351788e-05, "loss": 1.1837, "step": 5346 }, { "epoch": 0.20921042335080992, "grad_norm": 0.0, "learning_rate": 1.836247842521109e-05, "loss": 1.1593, "step": 5347 }, { "epoch": 0.20924955004303936, "grad_norm": 0.0, "learning_rate": 1.8361783469779476e-05, "loss": 1.1359, "step": 5348 }, { "epoch": 0.2092886767352688, "grad_norm": 0.0, "learning_rate": 1.8361088380068098e-05, "loss": 1.162, "step": 5349 }, { "epoch": 0.20932780342749824, "grad_norm": 0.0, "learning_rate": 1.8360393156088113e-05, "loss": 1.0151, "step": 5350 }, { "epoch": 0.20936693011972768, "grad_norm": 0.0, "learning_rate": 1.8359697797850692e-05, "loss": 1.2221, "step": 5351 }, { "epoch": 0.20940605681195712, "grad_norm": 0.0, "learning_rate": 1.8359002305366996e-05, "loss": 1.231, "step": 5352 }, { "epoch": 0.20944518350418656, "grad_norm": 0.0, "learning_rate": 1.8358306678648205e-05, "loss": 1.2227, "step": 5353 }, { "epoch": 0.209484310196416, "grad_norm": 0.0, "learning_rate": 1.8357610917705476e-05, "loss": 1.1533, "step": 5354 }, { "epoch": 0.20952343688864544, "grad_norm": 0.0, "learning_rate": 1.835691502254999e-05, "loss": 1.2803, "step": 5355 }, { "epoch": 0.20956256358087488, "grad_norm": 0.0, "learning_rate": 1.8356218993192922e-05, "loss": 1.1396, "step": 5356 }, { "epoch": 0.20960169027310432, "grad_norm": 0.0, "learning_rate": 1.8355522829645445e-05, "loss": 1.2576, "step": 5357 }, { "epoch": 0.20964081696533374, "grad_norm": 0.0, "learning_rate": 1.8354826531918744e-05, "loss": 1.1162, "step": 5358 }, { "epoch": 0.20967994365756318, "grad_norm": 0.0, "learning_rate": 1.8354130100024e-05, "loss": 0.9481, "step": 5359 }, { "epoch": 0.20971907034979262, "grad_norm": 0.0, "learning_rate": 1.8353433533972392e-05, "loss": 1.1005, "step": 5360 }, { "epoch": 0.20975819704202206, "grad_norm": 0.0, "learning_rate": 1.8352736833775106e-05, "loss": 1.1344, "step": 5361 }, { "epoch": 0.2097973237342515, "grad_norm": 0.0, "learning_rate": 1.8352039999443336e-05, "loss": 1.2896, "step": 5362 }, { "epoch": 0.20983645042648094, "grad_norm": 0.0, "learning_rate": 1.8351343030988273e-05, "loss": 1.0726, "step": 5363 }, { "epoch": 0.20987557711871038, "grad_norm": 0.0, "learning_rate": 1.8350645928421097e-05, "loss": 1.121, "step": 5364 }, { "epoch": 0.20991470381093982, "grad_norm": 0.0, "learning_rate": 1.8349948691753017e-05, "loss": 1.2027, "step": 5365 }, { "epoch": 0.20995383050316926, "grad_norm": 0.0, "learning_rate": 1.8349251320995224e-05, "loss": 1.0792, "step": 5366 }, { "epoch": 0.2099929571953987, "grad_norm": 0.0, "learning_rate": 1.8348553816158915e-05, "loss": 1.2187, "step": 5367 }, { "epoch": 0.21003208388762815, "grad_norm": 0.0, "learning_rate": 1.8347856177255296e-05, "loss": 1.1929, "step": 5368 }, { "epoch": 0.2100712105798576, "grad_norm": 0.0, "learning_rate": 1.8347158404295566e-05, "loss": 1.3102, "step": 5369 }, { "epoch": 0.21011033727208703, "grad_norm": 0.0, "learning_rate": 1.834646049729093e-05, "loss": 1.161, "step": 5370 }, { "epoch": 0.21014946396431647, "grad_norm": 0.0, "learning_rate": 1.8345762456252602e-05, "loss": 1.1677, "step": 5371 }, { "epoch": 0.2101885906565459, "grad_norm": 0.0, "learning_rate": 1.8345064281191784e-05, "loss": 1.1634, "step": 5372 }, { "epoch": 0.21022771734877532, "grad_norm": 0.0, "learning_rate": 1.8344365972119687e-05, "loss": 1.2948, "step": 5373 }, { "epoch": 0.21026684404100476, "grad_norm": 0.0, "learning_rate": 1.834366752904753e-05, "loss": 1.3063, "step": 5374 }, { "epoch": 0.2103059707332342, "grad_norm": 0.0, "learning_rate": 1.834296895198653e-05, "loss": 0.9497, "step": 5375 }, { "epoch": 0.21034509742546365, "grad_norm": 0.0, "learning_rate": 1.83422702409479e-05, "loss": 1.1331, "step": 5376 }, { "epoch": 0.21038422411769309, "grad_norm": 0.0, "learning_rate": 1.834157139594287e-05, "loss": 1.2402, "step": 5377 }, { "epoch": 0.21042335080992253, "grad_norm": 0.0, "learning_rate": 1.834087241698265e-05, "loss": 1.2138, "step": 5378 }, { "epoch": 0.21046247750215197, "grad_norm": 0.0, "learning_rate": 1.8340173304078472e-05, "loss": 1.19, "step": 5379 }, { "epoch": 0.2105016041943814, "grad_norm": 0.0, "learning_rate": 1.833947405724156e-05, "loss": 1.1542, "step": 5380 }, { "epoch": 0.21054073088661085, "grad_norm": 0.0, "learning_rate": 1.8338774676483148e-05, "loss": 1.0744, "step": 5381 }, { "epoch": 0.2105798575788403, "grad_norm": 0.0, "learning_rate": 1.8338075161814462e-05, "loss": 1.076, "step": 5382 }, { "epoch": 0.21061898427106973, "grad_norm": 0.0, "learning_rate": 1.8337375513246736e-05, "loss": 1.1885, "step": 5383 }, { "epoch": 0.21065811096329917, "grad_norm": 0.0, "learning_rate": 1.833667573079121e-05, "loss": 1.2914, "step": 5384 }, { "epoch": 0.2106972376555286, "grad_norm": 0.0, "learning_rate": 1.8335975814459113e-05, "loss": 1.1824, "step": 5385 }, { "epoch": 0.21073636434775805, "grad_norm": 0.0, "learning_rate": 1.8335275764261693e-05, "loss": 1.1133, "step": 5386 }, { "epoch": 0.21077549103998747, "grad_norm": 0.0, "learning_rate": 1.833457558021019e-05, "loss": 1.1246, "step": 5387 }, { "epoch": 0.2108146177322169, "grad_norm": 0.0, "learning_rate": 1.8333875262315844e-05, "loss": 1.2311, "step": 5388 }, { "epoch": 0.21085374442444635, "grad_norm": 0.0, "learning_rate": 1.8333174810589905e-05, "loss": 1.1649, "step": 5389 }, { "epoch": 0.2108928711166758, "grad_norm": 0.0, "learning_rate": 1.8332474225043623e-05, "loss": 1.1425, "step": 5390 }, { "epoch": 0.21093199780890523, "grad_norm": 0.0, "learning_rate": 1.8331773505688243e-05, "loss": 1.2617, "step": 5391 }, { "epoch": 0.21097112450113467, "grad_norm": 0.0, "learning_rate": 1.8331072652535024e-05, "loss": 1.0261, "step": 5392 }, { "epoch": 0.2110102511933641, "grad_norm": 0.0, "learning_rate": 1.8330371665595218e-05, "loss": 1.2296, "step": 5393 }, { "epoch": 0.21104937788559355, "grad_norm": 0.0, "learning_rate": 1.832967054488008e-05, "loss": 1.0838, "step": 5394 }, { "epoch": 0.211088504577823, "grad_norm": 0.0, "learning_rate": 1.8328969290400867e-05, "loss": 1.11, "step": 5395 }, { "epoch": 0.21112763127005243, "grad_norm": 0.0, "learning_rate": 1.8328267902168848e-05, "loss": 1.1964, "step": 5396 }, { "epoch": 0.21116675796228188, "grad_norm": 0.0, "learning_rate": 1.8327566380195283e-05, "loss": 1.1044, "step": 5397 }, { "epoch": 0.21120588465451132, "grad_norm": 0.0, "learning_rate": 1.8326864724491434e-05, "loss": 1.1432, "step": 5398 }, { "epoch": 0.21124501134674076, "grad_norm": 0.0, "learning_rate": 1.8326162935068575e-05, "loss": 1.22, "step": 5399 }, { "epoch": 0.2112841380389702, "grad_norm": 0.0, "learning_rate": 1.832546101193797e-05, "loss": 1.2318, "step": 5400 }, { "epoch": 0.2113232647311996, "grad_norm": 0.0, "learning_rate": 1.8324758955110895e-05, "loss": 1.1873, "step": 5401 }, { "epoch": 0.21136239142342905, "grad_norm": 0.0, "learning_rate": 1.8324056764598623e-05, "loss": 1.2614, "step": 5402 }, { "epoch": 0.2114015181156585, "grad_norm": 0.0, "learning_rate": 1.8323354440412425e-05, "loss": 1.1061, "step": 5403 }, { "epoch": 0.21144064480788793, "grad_norm": 0.0, "learning_rate": 1.832265198256359e-05, "loss": 1.1392, "step": 5404 }, { "epoch": 0.21147977150011738, "grad_norm": 0.0, "learning_rate": 1.8321949391063394e-05, "loss": 1.0645, "step": 5405 }, { "epoch": 0.21151889819234682, "grad_norm": 0.0, "learning_rate": 1.8321246665923116e-05, "loss": 1.1523, "step": 5406 }, { "epoch": 0.21155802488457626, "grad_norm": 0.0, "learning_rate": 1.8320543807154043e-05, "loss": 1.2289, "step": 5407 }, { "epoch": 0.2115971515768057, "grad_norm": 0.0, "learning_rate": 1.8319840814767463e-05, "loss": 1.1923, "step": 5408 }, { "epoch": 0.21163627826903514, "grad_norm": 0.0, "learning_rate": 1.8319137688774667e-05, "loss": 1.1721, "step": 5409 }, { "epoch": 0.21167540496126458, "grad_norm": 0.0, "learning_rate": 1.8318434429186943e-05, "loss": 1.0993, "step": 5410 }, { "epoch": 0.21171453165349402, "grad_norm": 0.0, "learning_rate": 1.8317731036015584e-05, "loss": 1.1623, "step": 5411 }, { "epoch": 0.21175365834572346, "grad_norm": 0.0, "learning_rate": 1.831702750927189e-05, "loss": 1.2911, "step": 5412 }, { "epoch": 0.2117927850379529, "grad_norm": 0.0, "learning_rate": 1.8316323848967156e-05, "loss": 1.1111, "step": 5413 }, { "epoch": 0.21183191173018234, "grad_norm": 0.0, "learning_rate": 1.831562005511268e-05, "loss": 1.139, "step": 5414 }, { "epoch": 0.21187103842241176, "grad_norm": 0.0, "learning_rate": 1.8314916127719765e-05, "loss": 1.0829, "step": 5415 }, { "epoch": 0.2119101651146412, "grad_norm": 0.0, "learning_rate": 1.8314212066799716e-05, "loss": 1.1694, "step": 5416 }, { "epoch": 0.21194929180687064, "grad_norm": 0.0, "learning_rate": 1.8313507872363843e-05, "loss": 1.0843, "step": 5417 }, { "epoch": 0.21198841849910008, "grad_norm": 0.0, "learning_rate": 1.831280354442345e-05, "loss": 1.191, "step": 5418 }, { "epoch": 0.21202754519132952, "grad_norm": 0.0, "learning_rate": 1.8312099082989846e-05, "loss": 1.1664, "step": 5419 }, { "epoch": 0.21206667188355896, "grad_norm": 0.0, "learning_rate": 1.831139448807435e-05, "loss": 1.1023, "step": 5420 }, { "epoch": 0.2121057985757884, "grad_norm": 0.0, "learning_rate": 1.831068975968827e-05, "loss": 1.1467, "step": 5421 }, { "epoch": 0.21214492526801784, "grad_norm": 0.0, "learning_rate": 1.830998489784293e-05, "loss": 1.1061, "step": 5422 }, { "epoch": 0.21218405196024728, "grad_norm": 0.0, "learning_rate": 1.8309279902549642e-05, "loss": 1.2316, "step": 5423 }, { "epoch": 0.21222317865247672, "grad_norm": 0.0, "learning_rate": 1.830857477381973e-05, "loss": 1.1539, "step": 5424 }, { "epoch": 0.21226230534470616, "grad_norm": 0.0, "learning_rate": 1.8307869511664522e-05, "loss": 1.1017, "step": 5425 }, { "epoch": 0.2123014320369356, "grad_norm": 0.0, "learning_rate": 1.830716411609534e-05, "loss": 1.2296, "step": 5426 }, { "epoch": 0.21234055872916505, "grad_norm": 0.0, "learning_rate": 1.8306458587123507e-05, "loss": 1.2854, "step": 5427 }, { "epoch": 0.2123796854213945, "grad_norm": 0.0, "learning_rate": 1.8305752924760362e-05, "loss": 1.2113, "step": 5428 }, { "epoch": 0.2124188121136239, "grad_norm": 0.0, "learning_rate": 1.8305047129017233e-05, "loss": 1.1359, "step": 5429 }, { "epoch": 0.21245793880585334, "grad_norm": 0.0, "learning_rate": 1.830434119990545e-05, "loss": 1.2902, "step": 5430 }, { "epoch": 0.21249706549808278, "grad_norm": 0.0, "learning_rate": 1.830363513743636e-05, "loss": 1.1726, "step": 5431 }, { "epoch": 0.21253619219031222, "grad_norm": 0.0, "learning_rate": 1.8302928941621286e-05, "loss": 1.2181, "step": 5432 }, { "epoch": 0.21257531888254166, "grad_norm": 0.0, "learning_rate": 1.8302222612471583e-05, "loss": 1.2133, "step": 5433 }, { "epoch": 0.2126144455747711, "grad_norm": 0.0, "learning_rate": 1.830151614999859e-05, "loss": 1.1968, "step": 5434 }, { "epoch": 0.21265357226700055, "grad_norm": 0.0, "learning_rate": 1.8300809554213644e-05, "loss": 1.2075, "step": 5435 }, { "epoch": 0.21269269895923, "grad_norm": 0.0, "learning_rate": 1.8300102825128103e-05, "loss": 1.0816, "step": 5436 }, { "epoch": 0.21273182565145943, "grad_norm": 0.0, "learning_rate": 1.829939596275331e-05, "loss": 1.0805, "step": 5437 }, { "epoch": 0.21277095234368887, "grad_norm": 0.0, "learning_rate": 1.8298688967100615e-05, "loss": 1.1625, "step": 5438 }, { "epoch": 0.2128100790359183, "grad_norm": 0.0, "learning_rate": 1.8297981838181376e-05, "loss": 1.1884, "step": 5439 }, { "epoch": 0.21284920572814775, "grad_norm": 0.0, "learning_rate": 1.829727457600695e-05, "loss": 1.2435, "step": 5440 }, { "epoch": 0.2128883324203772, "grad_norm": 0.0, "learning_rate": 1.8296567180588685e-05, "loss": 1.1755, "step": 5441 }, { "epoch": 0.21292745911260663, "grad_norm": 0.0, "learning_rate": 1.829585965193795e-05, "loss": 1.2415, "step": 5442 }, { "epoch": 0.21296658580483607, "grad_norm": 0.0, "learning_rate": 1.8295151990066105e-05, "loss": 1.134, "step": 5443 }, { "epoch": 0.21300571249706549, "grad_norm": 0.0, "learning_rate": 1.8294444194984514e-05, "loss": 1.0706, "step": 5444 }, { "epoch": 0.21304483918929493, "grad_norm": 0.0, "learning_rate": 1.829373626670454e-05, "loss": 1.2309, "step": 5445 }, { "epoch": 0.21308396588152437, "grad_norm": 0.0, "learning_rate": 1.8293028205237556e-05, "loss": 1.1263, "step": 5446 }, { "epoch": 0.2131230925737538, "grad_norm": 0.0, "learning_rate": 1.829232001059493e-05, "loss": 1.2143, "step": 5447 }, { "epoch": 0.21316221926598325, "grad_norm": 0.0, "learning_rate": 1.8291611682788034e-05, "loss": 1.0906, "step": 5448 }, { "epoch": 0.2132013459582127, "grad_norm": 0.0, "learning_rate": 1.829090322182825e-05, "loss": 1.1034, "step": 5449 }, { "epoch": 0.21324047265044213, "grad_norm": 0.0, "learning_rate": 1.8290194627726942e-05, "loss": 1.0661, "step": 5450 }, { "epoch": 0.21327959934267157, "grad_norm": 0.0, "learning_rate": 1.8289485900495502e-05, "loss": 1.2627, "step": 5451 }, { "epoch": 0.213318726034901, "grad_norm": 0.0, "learning_rate": 1.8288777040145303e-05, "loss": 1.0492, "step": 5452 }, { "epoch": 0.21335785272713045, "grad_norm": 0.0, "learning_rate": 1.8288068046687732e-05, "loss": 1.2667, "step": 5453 }, { "epoch": 0.2133969794193599, "grad_norm": 0.0, "learning_rate": 1.8287358920134176e-05, "loss": 1.2174, "step": 5454 }, { "epoch": 0.21343610611158934, "grad_norm": 0.0, "learning_rate": 1.8286649660496017e-05, "loss": 1.1912, "step": 5455 }, { "epoch": 0.21347523280381878, "grad_norm": 0.0, "learning_rate": 1.828594026778465e-05, "loss": 1.1625, "step": 5456 }, { "epoch": 0.21351435949604822, "grad_norm": 0.0, "learning_rate": 1.8285230742011464e-05, "loss": 1.2369, "step": 5457 }, { "epoch": 0.21355348618827763, "grad_norm": 0.0, "learning_rate": 1.8284521083187856e-05, "loss": 1.195, "step": 5458 }, { "epoch": 0.21359261288050707, "grad_norm": 0.0, "learning_rate": 1.8283811291325218e-05, "loss": 1.2427, "step": 5459 }, { "epoch": 0.2136317395727365, "grad_norm": 0.0, "learning_rate": 1.8283101366434954e-05, "loss": 1.1789, "step": 5460 }, { "epoch": 0.21367086626496595, "grad_norm": 0.0, "learning_rate": 1.828239130852846e-05, "loss": 1.1638, "step": 5461 }, { "epoch": 0.2137099929571954, "grad_norm": 0.0, "learning_rate": 1.8281681117617138e-05, "loss": 1.0125, "step": 5462 }, { "epoch": 0.21374911964942483, "grad_norm": 0.0, "learning_rate": 1.8280970793712397e-05, "loss": 1.0185, "step": 5463 }, { "epoch": 0.21378824634165428, "grad_norm": 0.0, "learning_rate": 1.8280260336825642e-05, "loss": 1.1448, "step": 5464 }, { "epoch": 0.21382737303388372, "grad_norm": 0.0, "learning_rate": 1.827954974696828e-05, "loss": 1.3115, "step": 5465 }, { "epoch": 0.21386649972611316, "grad_norm": 0.0, "learning_rate": 1.8278839024151723e-05, "loss": 1.146, "step": 5466 }, { "epoch": 0.2139056264183426, "grad_norm": 0.0, "learning_rate": 1.827812816838739e-05, "loss": 1.2011, "step": 5467 }, { "epoch": 0.21394475311057204, "grad_norm": 0.0, "learning_rate": 1.8277417179686688e-05, "loss": 1.1286, "step": 5468 }, { "epoch": 0.21398387980280148, "grad_norm": 0.0, "learning_rate": 1.827670605806104e-05, "loss": 1.107, "step": 5469 }, { "epoch": 0.21402300649503092, "grad_norm": 0.0, "learning_rate": 1.827599480352186e-05, "loss": 1.2267, "step": 5470 }, { "epoch": 0.21406213318726036, "grad_norm": 0.0, "learning_rate": 1.8275283416080576e-05, "loss": 1.2112, "step": 5471 }, { "epoch": 0.21410125987948977, "grad_norm": 0.0, "learning_rate": 1.827457189574861e-05, "loss": 1.1436, "step": 5472 }, { "epoch": 0.21414038657171922, "grad_norm": 0.0, "learning_rate": 1.827386024253739e-05, "loss": 1.203, "step": 5473 }, { "epoch": 0.21417951326394866, "grad_norm": 0.0, "learning_rate": 1.827314845645834e-05, "loss": 1.2644, "step": 5474 }, { "epoch": 0.2142186399561781, "grad_norm": 0.0, "learning_rate": 1.8272436537522897e-05, "loss": 1.2243, "step": 5475 }, { "epoch": 0.21425776664840754, "grad_norm": 0.0, "learning_rate": 1.8271724485742484e-05, "loss": 1.2451, "step": 5476 }, { "epoch": 0.21429689334063698, "grad_norm": 0.0, "learning_rate": 1.8271012301128542e-05, "loss": 1.1591, "step": 5477 }, { "epoch": 0.21433602003286642, "grad_norm": 0.0, "learning_rate": 1.8270299983692505e-05, "loss": 1.1136, "step": 5478 }, { "epoch": 0.21437514672509586, "grad_norm": 0.0, "learning_rate": 1.826958753344582e-05, "loss": 1.1514, "step": 5479 }, { "epoch": 0.2144142734173253, "grad_norm": 0.0, "learning_rate": 1.8268874950399914e-05, "loss": 1.1816, "step": 5480 }, { "epoch": 0.21445340010955474, "grad_norm": 0.0, "learning_rate": 1.826816223456624e-05, "loss": 1.1317, "step": 5481 }, { "epoch": 0.21449252680178418, "grad_norm": 0.0, "learning_rate": 1.8267449385956242e-05, "loss": 1.1243, "step": 5482 }, { "epoch": 0.21453165349401362, "grad_norm": 0.0, "learning_rate": 1.8266736404581368e-05, "loss": 1.065, "step": 5483 }, { "epoch": 0.21457078018624307, "grad_norm": 0.0, "learning_rate": 1.8266023290453067e-05, "loss": 1.1809, "step": 5484 }, { "epoch": 0.2146099068784725, "grad_norm": 0.0, "learning_rate": 1.8265310043582786e-05, "loss": 1.0837, "step": 5485 }, { "epoch": 0.21464903357070192, "grad_norm": 0.0, "learning_rate": 1.8264596663981985e-05, "loss": 1.1622, "step": 5486 }, { "epoch": 0.21468816026293136, "grad_norm": 0.0, "learning_rate": 1.8263883151662116e-05, "loss": 1.3585, "step": 5487 }, { "epoch": 0.2147272869551608, "grad_norm": 0.0, "learning_rate": 1.8263169506634638e-05, "loss": 1.1392, "step": 5488 }, { "epoch": 0.21476641364739024, "grad_norm": 0.0, "learning_rate": 1.8262455728911015e-05, "loss": 1.2092, "step": 5489 }, { "epoch": 0.21480554033961968, "grad_norm": 0.0, "learning_rate": 1.8261741818502706e-05, "loss": 1.2592, "step": 5490 }, { "epoch": 0.21484466703184912, "grad_norm": 0.0, "learning_rate": 1.8261027775421174e-05, "loss": 1.0802, "step": 5491 }, { "epoch": 0.21488379372407856, "grad_norm": 0.0, "learning_rate": 1.826031359967789e-05, "loss": 1.1573, "step": 5492 }, { "epoch": 0.214922920416308, "grad_norm": 0.0, "learning_rate": 1.8259599291284318e-05, "loss": 1.1092, "step": 5493 }, { "epoch": 0.21496204710853745, "grad_norm": 0.0, "learning_rate": 1.8258884850251932e-05, "loss": 1.2002, "step": 5494 }, { "epoch": 0.2150011738007669, "grad_norm": 0.0, "learning_rate": 1.8258170276592207e-05, "loss": 1.18, "step": 5495 }, { "epoch": 0.21504030049299633, "grad_norm": 0.0, "learning_rate": 1.825745557031661e-05, "loss": 1.1275, "step": 5496 }, { "epoch": 0.21507942718522577, "grad_norm": 0.0, "learning_rate": 1.8256740731436627e-05, "loss": 1.1365, "step": 5497 }, { "epoch": 0.2151185538774552, "grad_norm": 0.0, "learning_rate": 1.8256025759963735e-05, "loss": 1.1805, "step": 5498 }, { "epoch": 0.21515768056968465, "grad_norm": 0.0, "learning_rate": 1.8255310655909414e-05, "loss": 1.1419, "step": 5499 }, { "epoch": 0.2151968072619141, "grad_norm": 0.0, "learning_rate": 1.8254595419285147e-05, "loss": 1.2194, "step": 5500 }, { "epoch": 0.2152359339541435, "grad_norm": 0.0, "learning_rate": 1.8253880050102422e-05, "loss": 1.2761, "step": 5501 }, { "epoch": 0.21527506064637295, "grad_norm": 0.0, "learning_rate": 1.8253164548372726e-05, "loss": 1.2208, "step": 5502 }, { "epoch": 0.2153141873386024, "grad_norm": 0.0, "learning_rate": 1.825244891410755e-05, "loss": 1.1796, "step": 5503 }, { "epoch": 0.21535331403083183, "grad_norm": 0.0, "learning_rate": 1.8251733147318383e-05, "loss": 1.1328, "step": 5504 }, { "epoch": 0.21539244072306127, "grad_norm": 0.0, "learning_rate": 1.8251017248016724e-05, "loss": 1.1085, "step": 5505 }, { "epoch": 0.2154315674152907, "grad_norm": 0.0, "learning_rate": 1.8250301216214067e-05, "loss": 1.15, "step": 5506 }, { "epoch": 0.21547069410752015, "grad_norm": 0.0, "learning_rate": 1.824958505192191e-05, "loss": 1.1351, "step": 5507 }, { "epoch": 0.2155098207997496, "grad_norm": 0.0, "learning_rate": 1.8248868755151753e-05, "loss": 1.2754, "step": 5508 }, { "epoch": 0.21554894749197903, "grad_norm": 0.0, "learning_rate": 1.82481523259151e-05, "loss": 1.156, "step": 5509 }, { "epoch": 0.21558807418420847, "grad_norm": 0.0, "learning_rate": 1.8247435764223462e-05, "loss": 1.1018, "step": 5510 }, { "epoch": 0.2156272008764379, "grad_norm": 0.0, "learning_rate": 1.8246719070088335e-05, "loss": 1.2772, "step": 5511 }, { "epoch": 0.21566632756866735, "grad_norm": 0.0, "learning_rate": 1.8246002243521234e-05, "loss": 1.1613, "step": 5512 }, { "epoch": 0.2157054542608968, "grad_norm": 0.0, "learning_rate": 1.824528528453367e-05, "loss": 1.096, "step": 5513 }, { "epoch": 0.21574458095312624, "grad_norm": 0.0, "learning_rate": 1.8244568193137157e-05, "loss": 1.2298, "step": 5514 }, { "epoch": 0.21578370764535565, "grad_norm": 0.0, "learning_rate": 1.824385096934321e-05, "loss": 1.2289, "step": 5515 }, { "epoch": 0.2158228343375851, "grad_norm": 0.0, "learning_rate": 1.8243133613163344e-05, "loss": 1.2034, "step": 5516 }, { "epoch": 0.21586196102981453, "grad_norm": 0.0, "learning_rate": 1.8242416124609087e-05, "loss": 1.0987, "step": 5517 }, { "epoch": 0.21590108772204397, "grad_norm": 0.0, "learning_rate": 1.824169850369195e-05, "loss": 1.2416, "step": 5518 }, { "epoch": 0.2159402144142734, "grad_norm": 0.0, "learning_rate": 1.8240980750423465e-05, "loss": 1.1747, "step": 5519 }, { "epoch": 0.21597934110650285, "grad_norm": 0.0, "learning_rate": 1.8240262864815152e-05, "loss": 0.9843, "step": 5520 }, { "epoch": 0.2160184677987323, "grad_norm": 0.0, "learning_rate": 1.8239544846878544e-05, "loss": 1.2889, "step": 5521 }, { "epoch": 0.21605759449096174, "grad_norm": 0.0, "learning_rate": 1.8238826696625175e-05, "loss": 1.2194, "step": 5522 }, { "epoch": 0.21609672118319118, "grad_norm": 0.0, "learning_rate": 1.823810841406657e-05, "loss": 1.1752, "step": 5523 }, { "epoch": 0.21613584787542062, "grad_norm": 0.0, "learning_rate": 1.8237389999214267e-05, "loss": 1.1916, "step": 5524 }, { "epoch": 0.21617497456765006, "grad_norm": 0.0, "learning_rate": 1.8236671452079805e-05, "loss": 1.1242, "step": 5525 }, { "epoch": 0.2162141012598795, "grad_norm": 0.0, "learning_rate": 1.8235952772674718e-05, "loss": 1.1077, "step": 5526 }, { "epoch": 0.21625322795210894, "grad_norm": 0.0, "learning_rate": 1.823523396101055e-05, "loss": 1.2402, "step": 5527 }, { "epoch": 0.21629235464433838, "grad_norm": 0.0, "learning_rate": 1.8234515017098845e-05, "loss": 1.0641, "step": 5528 }, { "epoch": 0.2163314813365678, "grad_norm": 0.0, "learning_rate": 1.8233795940951145e-05, "loss": 1.1788, "step": 5529 }, { "epoch": 0.21637060802879723, "grad_norm": 0.0, "learning_rate": 1.8233076732579e-05, "loss": 1.1405, "step": 5530 }, { "epoch": 0.21640973472102668, "grad_norm": 0.0, "learning_rate": 1.823235739199396e-05, "loss": 1.033, "step": 5531 }, { "epoch": 0.21644886141325612, "grad_norm": 0.0, "learning_rate": 1.8231637919207576e-05, "loss": 1.2212, "step": 5532 }, { "epoch": 0.21648798810548556, "grad_norm": 0.0, "learning_rate": 1.8230918314231406e-05, "loss": 1.1176, "step": 5533 }, { "epoch": 0.216527114797715, "grad_norm": 0.0, "learning_rate": 1.8230198577076996e-05, "loss": 1.1545, "step": 5534 }, { "epoch": 0.21656624148994444, "grad_norm": 0.0, "learning_rate": 1.822947870775591e-05, "loss": 1.0468, "step": 5535 }, { "epoch": 0.21660536818217388, "grad_norm": 0.0, "learning_rate": 1.822875870627971e-05, "loss": 1.1628, "step": 5536 }, { "epoch": 0.21664449487440332, "grad_norm": 0.0, "learning_rate": 1.8228038572659958e-05, "loss": 1.2669, "step": 5537 }, { "epoch": 0.21668362156663276, "grad_norm": 0.0, "learning_rate": 1.8227318306908216e-05, "loss": 1.0937, "step": 5538 }, { "epoch": 0.2167227482588622, "grad_norm": 0.0, "learning_rate": 1.8226597909036048e-05, "loss": 1.1727, "step": 5539 }, { "epoch": 0.21676187495109164, "grad_norm": 0.0, "learning_rate": 1.822587737905503e-05, "loss": 1.0814, "step": 5540 }, { "epoch": 0.21680100164332108, "grad_norm": 0.0, "learning_rate": 1.822515671697673e-05, "loss": 1.1798, "step": 5541 }, { "epoch": 0.21684012833555052, "grad_norm": 0.0, "learning_rate": 1.8224435922812716e-05, "loss": 1.1748, "step": 5542 }, { "epoch": 0.21687925502777994, "grad_norm": 0.0, "learning_rate": 1.8223714996574566e-05, "loss": 1.0745, "step": 5543 }, { "epoch": 0.21691838172000938, "grad_norm": 0.0, "learning_rate": 1.822299393827386e-05, "loss": 1.2459, "step": 5544 }, { "epoch": 0.21695750841223882, "grad_norm": 0.0, "learning_rate": 1.8222272747922178e-05, "loss": 1.1013, "step": 5545 }, { "epoch": 0.21699663510446826, "grad_norm": 0.0, "learning_rate": 1.8221551425531095e-05, "loss": 1.1343, "step": 5546 }, { "epoch": 0.2170357617966977, "grad_norm": 0.0, "learning_rate": 1.82208299711122e-05, "loss": 1.2205, "step": 5547 }, { "epoch": 0.21707488848892714, "grad_norm": 0.0, "learning_rate": 1.8220108384677076e-05, "loss": 1.1807, "step": 5548 }, { "epoch": 0.21711401518115658, "grad_norm": 0.0, "learning_rate": 1.8219386666237314e-05, "loss": 1.0995, "step": 5549 }, { "epoch": 0.21715314187338602, "grad_norm": 0.0, "learning_rate": 1.82186648158045e-05, "loss": 1.113, "step": 5550 }, { "epoch": 0.21719226856561547, "grad_norm": 0.0, "learning_rate": 1.8217942833390227e-05, "loss": 1.3029, "step": 5551 }, { "epoch": 0.2172313952578449, "grad_norm": 0.0, "learning_rate": 1.821722071900609e-05, "loss": 1.1521, "step": 5552 }, { "epoch": 0.21727052195007435, "grad_norm": 0.0, "learning_rate": 1.8216498472663685e-05, "loss": 1.1888, "step": 5553 }, { "epoch": 0.2173096486423038, "grad_norm": 0.0, "learning_rate": 1.8215776094374612e-05, "loss": 1.1631, "step": 5554 }, { "epoch": 0.21734877533453323, "grad_norm": 0.0, "learning_rate": 1.8215053584150467e-05, "loss": 1.1355, "step": 5555 }, { "epoch": 0.21738790202676267, "grad_norm": 0.0, "learning_rate": 1.8214330942002855e-05, "loss": 1.1631, "step": 5556 }, { "epoch": 0.2174270287189921, "grad_norm": 0.0, "learning_rate": 1.8213608167943384e-05, "loss": 1.2104, "step": 5557 }, { "epoch": 0.21746615541122152, "grad_norm": 0.0, "learning_rate": 1.8212885261983657e-05, "loss": 1.1777, "step": 5558 }, { "epoch": 0.21750528210345096, "grad_norm": 0.0, "learning_rate": 1.8212162224135283e-05, "loss": 1.2266, "step": 5559 }, { "epoch": 0.2175444087956804, "grad_norm": 0.0, "learning_rate": 1.821143905440988e-05, "loss": 1.1328, "step": 5560 }, { "epoch": 0.21758353548790985, "grad_norm": 0.0, "learning_rate": 1.8210715752819047e-05, "loss": 1.1327, "step": 5561 }, { "epoch": 0.2176226621801393, "grad_norm": 0.0, "learning_rate": 1.820999231937441e-05, "loss": 1.007, "step": 5562 }, { "epoch": 0.21766178887236873, "grad_norm": 0.0, "learning_rate": 1.8209268754087586e-05, "loss": 1.1601, "step": 5563 }, { "epoch": 0.21770091556459817, "grad_norm": 0.0, "learning_rate": 1.8208545056970193e-05, "loss": 1.2724, "step": 5564 }, { "epoch": 0.2177400422568276, "grad_norm": 0.0, "learning_rate": 1.820782122803385e-05, "loss": 1.266, "step": 5565 }, { "epoch": 0.21777916894905705, "grad_norm": 0.0, "learning_rate": 1.8207097267290183e-05, "loss": 1.0701, "step": 5566 }, { "epoch": 0.2178182956412865, "grad_norm": 0.0, "learning_rate": 1.8206373174750824e-05, "loss": 1.1523, "step": 5567 }, { "epoch": 0.21785742233351593, "grad_norm": 0.0, "learning_rate": 1.8205648950427388e-05, "loss": 1.2166, "step": 5568 }, { "epoch": 0.21789654902574537, "grad_norm": 0.0, "learning_rate": 1.8204924594331514e-05, "loss": 1.1706, "step": 5569 }, { "epoch": 0.2179356757179748, "grad_norm": 0.0, "learning_rate": 1.8204200106474834e-05, "loss": 1.191, "step": 5570 }, { "epoch": 0.21797480241020425, "grad_norm": 0.0, "learning_rate": 1.820347548686898e-05, "loss": 1.1541, "step": 5571 }, { "epoch": 0.21801392910243367, "grad_norm": 0.0, "learning_rate": 1.8202750735525584e-05, "loss": 1.0621, "step": 5572 }, { "epoch": 0.2180530557946631, "grad_norm": 0.0, "learning_rate": 1.8202025852456294e-05, "loss": 1.1832, "step": 5573 }, { "epoch": 0.21809218248689255, "grad_norm": 0.0, "learning_rate": 1.820130083767275e-05, "loss": 1.2719, "step": 5574 }, { "epoch": 0.218131309179122, "grad_norm": 0.0, "learning_rate": 1.8200575691186588e-05, "loss": 1.2143, "step": 5575 }, { "epoch": 0.21817043587135143, "grad_norm": 0.0, "learning_rate": 1.8199850413009455e-05, "loss": 1.256, "step": 5576 }, { "epoch": 0.21820956256358087, "grad_norm": 0.0, "learning_rate": 1.8199125003153e-05, "loss": 1.4114, "step": 5577 }, { "epoch": 0.2182486892558103, "grad_norm": 0.0, "learning_rate": 1.8198399461628873e-05, "loss": 1.2153, "step": 5578 }, { "epoch": 0.21828781594803975, "grad_norm": 0.0, "learning_rate": 1.8197673788448723e-05, "loss": 1.0945, "step": 5579 }, { "epoch": 0.2183269426402692, "grad_norm": 0.0, "learning_rate": 1.8196947983624207e-05, "loss": 1.1382, "step": 5580 }, { "epoch": 0.21836606933249864, "grad_norm": 0.0, "learning_rate": 1.8196222047166973e-05, "loss": 1.0108, "step": 5581 }, { "epoch": 0.21840519602472808, "grad_norm": 0.0, "learning_rate": 1.8195495979088686e-05, "loss": 1.0105, "step": 5582 }, { "epoch": 0.21844432271695752, "grad_norm": 0.0, "learning_rate": 1.8194769779401004e-05, "loss": 1.1564, "step": 5583 }, { "epoch": 0.21848344940918696, "grad_norm": 0.0, "learning_rate": 1.8194043448115584e-05, "loss": 1.2119, "step": 5584 }, { "epoch": 0.2185225761014164, "grad_norm": 0.0, "learning_rate": 1.8193316985244097e-05, "loss": 1.1299, "step": 5585 }, { "epoch": 0.2185617027936458, "grad_norm": 0.0, "learning_rate": 1.8192590390798205e-05, "loss": 1.1947, "step": 5586 }, { "epoch": 0.21860082948587525, "grad_norm": 0.0, "learning_rate": 1.819186366478958e-05, "loss": 1.1915, "step": 5587 }, { "epoch": 0.2186399561781047, "grad_norm": 0.0, "learning_rate": 1.8191136807229888e-05, "loss": 1.1701, "step": 5588 }, { "epoch": 0.21867908287033413, "grad_norm": 0.0, "learning_rate": 1.81904098181308e-05, "loss": 1.1154, "step": 5589 }, { "epoch": 0.21871820956256358, "grad_norm": 0.0, "learning_rate": 1.8189682697504e-05, "loss": 1.199, "step": 5590 }, { "epoch": 0.21875733625479302, "grad_norm": 0.0, "learning_rate": 1.818895544536115e-05, "loss": 1.1815, "step": 5591 }, { "epoch": 0.21879646294702246, "grad_norm": 0.0, "learning_rate": 1.8188228061713943e-05, "loss": 1.1929, "step": 5592 }, { "epoch": 0.2188355896392519, "grad_norm": 0.0, "learning_rate": 1.8187500546574052e-05, "loss": 1.1071, "step": 5593 }, { "epoch": 0.21887471633148134, "grad_norm": 0.0, "learning_rate": 1.818677289995316e-05, "loss": 1.1788, "step": 5594 }, { "epoch": 0.21891384302371078, "grad_norm": 0.0, "learning_rate": 1.8186045121862953e-05, "loss": 1.0587, "step": 5595 }, { "epoch": 0.21895296971594022, "grad_norm": 0.0, "learning_rate": 1.818531721231512e-05, "loss": 1.11, "step": 5596 }, { "epoch": 0.21899209640816966, "grad_norm": 0.0, "learning_rate": 1.8184589171321353e-05, "loss": 1.0551, "step": 5597 }, { "epoch": 0.2190312231003991, "grad_norm": 0.0, "learning_rate": 1.818386099889334e-05, "loss": 1.018, "step": 5598 }, { "epoch": 0.21907034979262854, "grad_norm": 0.0, "learning_rate": 1.8183132695042768e-05, "loss": 0.993, "step": 5599 }, { "epoch": 0.21910947648485796, "grad_norm": 0.0, "learning_rate": 1.8182404259781344e-05, "loss": 1.1147, "step": 5600 }, { "epoch": 0.2191486031770874, "grad_norm": 0.0, "learning_rate": 1.8181675693120756e-05, "loss": 1.2205, "step": 5601 }, { "epoch": 0.21918772986931684, "grad_norm": 0.0, "learning_rate": 1.8180946995072714e-05, "loss": 1.1599, "step": 5602 }, { "epoch": 0.21922685656154628, "grad_norm": 0.0, "learning_rate": 1.8180218165648913e-05, "loss": 1.1646, "step": 5603 }, { "epoch": 0.21926598325377572, "grad_norm": 0.0, "learning_rate": 1.8179489204861055e-05, "loss": 1.2142, "step": 5604 }, { "epoch": 0.21930510994600516, "grad_norm": 0.0, "learning_rate": 1.8178760112720854e-05, "loss": 1.2047, "step": 5605 }, { "epoch": 0.2193442366382346, "grad_norm": 0.0, "learning_rate": 1.8178030889240013e-05, "loss": 1.3321, "step": 5606 }, { "epoch": 0.21938336333046404, "grad_norm": 0.0, "learning_rate": 1.8177301534430243e-05, "loss": 1.1721, "step": 5607 }, { "epoch": 0.21942249002269348, "grad_norm": 0.0, "learning_rate": 1.8176572048303258e-05, "loss": 1.2519, "step": 5608 }, { "epoch": 0.21946161671492292, "grad_norm": 0.0, "learning_rate": 1.8175842430870774e-05, "loss": 1.0932, "step": 5609 }, { "epoch": 0.21950074340715237, "grad_norm": 0.0, "learning_rate": 1.81751126821445e-05, "loss": 1.1035, "step": 5610 }, { "epoch": 0.2195398700993818, "grad_norm": 0.0, "learning_rate": 1.8174382802136167e-05, "loss": 1.132, "step": 5611 }, { "epoch": 0.21957899679161125, "grad_norm": 0.0, "learning_rate": 1.8173652790857482e-05, "loss": 1.0948, "step": 5612 }, { "epoch": 0.2196181234838407, "grad_norm": 0.0, "learning_rate": 1.817292264832018e-05, "loss": 1.1893, "step": 5613 }, { "epoch": 0.2196572501760701, "grad_norm": 0.0, "learning_rate": 1.817219237453598e-05, "loss": 1.152, "step": 5614 }, { "epoch": 0.21969637686829954, "grad_norm": 0.0, "learning_rate": 1.8171461969516612e-05, "loss": 1.303, "step": 5615 }, { "epoch": 0.21973550356052898, "grad_norm": 0.0, "learning_rate": 1.8170731433273802e-05, "loss": 1.2507, "step": 5616 }, { "epoch": 0.21977463025275842, "grad_norm": 0.0, "learning_rate": 1.8170000765819283e-05, "loss": 1.2578, "step": 5617 }, { "epoch": 0.21981375694498786, "grad_norm": 0.0, "learning_rate": 1.8169269967164792e-05, "loss": 1.1086, "step": 5618 }, { "epoch": 0.2198528836372173, "grad_norm": 0.0, "learning_rate": 1.816853903732206e-05, "loss": 1.3674, "step": 5619 }, { "epoch": 0.21989201032944675, "grad_norm": 0.0, "learning_rate": 1.8167807976302828e-05, "loss": 1.0291, "step": 5620 }, { "epoch": 0.2199311370216762, "grad_norm": 0.0, "learning_rate": 1.816707678411884e-05, "loss": 1.1287, "step": 5621 }, { "epoch": 0.21997026371390563, "grad_norm": 0.0, "learning_rate": 1.8166345460781826e-05, "loss": 1.1454, "step": 5622 }, { "epoch": 0.22000939040613507, "grad_norm": 0.0, "learning_rate": 1.8165614006303537e-05, "loss": 1.1345, "step": 5623 }, { "epoch": 0.2200485170983645, "grad_norm": 0.0, "learning_rate": 1.816488242069572e-05, "loss": 1.1262, "step": 5624 }, { "epoch": 0.22008764379059395, "grad_norm": 0.0, "learning_rate": 1.8164150703970124e-05, "loss": 1.0819, "step": 5625 }, { "epoch": 0.2201267704828234, "grad_norm": 0.0, "learning_rate": 1.8163418856138496e-05, "loss": 1.0975, "step": 5626 }, { "epoch": 0.22016589717505283, "grad_norm": 0.0, "learning_rate": 1.8162686877212592e-05, "loss": 1.1521, "step": 5627 }, { "epoch": 0.22020502386728227, "grad_norm": 0.0, "learning_rate": 1.8161954767204165e-05, "loss": 1.1283, "step": 5628 }, { "epoch": 0.2202441505595117, "grad_norm": 0.0, "learning_rate": 1.816122252612497e-05, "loss": 1.2858, "step": 5629 }, { "epoch": 0.22028327725174113, "grad_norm": 0.0, "learning_rate": 1.816049015398677e-05, "loss": 1.0851, "step": 5630 }, { "epoch": 0.22032240394397057, "grad_norm": 0.0, "learning_rate": 1.8159757650801323e-05, "loss": 1.0947, "step": 5631 }, { "epoch": 0.2203615306362, "grad_norm": 0.0, "learning_rate": 1.8159025016580394e-05, "loss": 1.2516, "step": 5632 }, { "epoch": 0.22040065732842945, "grad_norm": 0.0, "learning_rate": 1.8158292251335746e-05, "loss": 1.1412, "step": 5633 }, { "epoch": 0.2204397840206589, "grad_norm": 0.0, "learning_rate": 1.8157559355079147e-05, "loss": 1.2073, "step": 5634 }, { "epoch": 0.22047891071288833, "grad_norm": 0.0, "learning_rate": 1.8156826327822367e-05, "loss": 1.266, "step": 5635 }, { "epoch": 0.22051803740511777, "grad_norm": 0.0, "learning_rate": 1.8156093169577177e-05, "loss": 1.0803, "step": 5636 }, { "epoch": 0.2205571640973472, "grad_norm": 0.0, "learning_rate": 1.8155359880355352e-05, "loss": 1.1016, "step": 5637 }, { "epoch": 0.22059629078957665, "grad_norm": 0.0, "learning_rate": 1.815462646016867e-05, "loss": 0.9656, "step": 5638 }, { "epoch": 0.2206354174818061, "grad_norm": 0.0, "learning_rate": 1.81538929090289e-05, "loss": 1.0507, "step": 5639 }, { "epoch": 0.22067454417403554, "grad_norm": 0.0, "learning_rate": 1.8153159226947827e-05, "loss": 1.1124, "step": 5640 }, { "epoch": 0.22071367086626498, "grad_norm": 0.0, "learning_rate": 1.8152425413937233e-05, "loss": 1.298, "step": 5641 }, { "epoch": 0.22075279755849442, "grad_norm": 0.0, "learning_rate": 1.8151691470008906e-05, "loss": 1.1498, "step": 5642 }, { "epoch": 0.22079192425072383, "grad_norm": 0.0, "learning_rate": 1.8150957395174628e-05, "loss": 1.1945, "step": 5643 }, { "epoch": 0.22083105094295327, "grad_norm": 0.0, "learning_rate": 1.8150223189446184e-05, "loss": 1.1793, "step": 5644 }, { "epoch": 0.2208701776351827, "grad_norm": 0.0, "learning_rate": 1.814948885283537e-05, "loss": 1.151, "step": 5645 }, { "epoch": 0.22090930432741215, "grad_norm": 0.0, "learning_rate": 1.8148754385353982e-05, "loss": 1.1034, "step": 5646 }, { "epoch": 0.2209484310196416, "grad_norm": 0.0, "learning_rate": 1.8148019787013804e-05, "loss": 1.121, "step": 5647 }, { "epoch": 0.22098755771187104, "grad_norm": 0.0, "learning_rate": 1.814728505782664e-05, "loss": 1.0861, "step": 5648 }, { "epoch": 0.22102668440410048, "grad_norm": 0.0, "learning_rate": 1.8146550197804287e-05, "loss": 1.0995, "step": 5649 }, { "epoch": 0.22106581109632992, "grad_norm": 0.0, "learning_rate": 1.8145815206958544e-05, "loss": 1.2747, "step": 5650 }, { "epoch": 0.22110493778855936, "grad_norm": 0.0, "learning_rate": 1.814508008530122e-05, "loss": 1.1134, "step": 5651 }, { "epoch": 0.2211440644807888, "grad_norm": 0.0, "learning_rate": 1.8144344832844114e-05, "loss": 1.1548, "step": 5652 }, { "epoch": 0.22118319117301824, "grad_norm": 0.0, "learning_rate": 1.8143609449599035e-05, "loss": 1.0919, "step": 5653 }, { "epoch": 0.22122231786524768, "grad_norm": 0.0, "learning_rate": 1.8142873935577795e-05, "loss": 1.2157, "step": 5654 }, { "epoch": 0.22126144455747712, "grad_norm": 0.0, "learning_rate": 1.8142138290792202e-05, "loss": 1.2334, "step": 5655 }, { "epoch": 0.22130057124970656, "grad_norm": 0.0, "learning_rate": 1.814140251525407e-05, "loss": 1.0992, "step": 5656 }, { "epoch": 0.22133969794193598, "grad_norm": 0.0, "learning_rate": 1.8140666608975216e-05, "loss": 1.1585, "step": 5657 }, { "epoch": 0.22137882463416542, "grad_norm": 0.0, "learning_rate": 1.8139930571967455e-05, "loss": 1.2488, "step": 5658 }, { "epoch": 0.22141795132639486, "grad_norm": 0.0, "learning_rate": 1.8139194404242613e-05, "loss": 1.2367, "step": 5659 }, { "epoch": 0.2214570780186243, "grad_norm": 0.0, "learning_rate": 1.8138458105812505e-05, "loss": 1.2625, "step": 5660 }, { "epoch": 0.22149620471085374, "grad_norm": 0.0, "learning_rate": 1.8137721676688958e-05, "loss": 1.2211, "step": 5661 }, { "epoch": 0.22153533140308318, "grad_norm": 0.0, "learning_rate": 1.81369851168838e-05, "loss": 1.0512, "step": 5662 }, { "epoch": 0.22157445809531262, "grad_norm": 0.0, "learning_rate": 1.8136248426408856e-05, "loss": 1.1733, "step": 5663 }, { "epoch": 0.22161358478754206, "grad_norm": 0.0, "learning_rate": 1.8135511605275954e-05, "loss": 1.2429, "step": 5664 }, { "epoch": 0.2216527114797715, "grad_norm": 0.0, "learning_rate": 1.8134774653496934e-05, "loss": 1.1571, "step": 5665 }, { "epoch": 0.22169183817200094, "grad_norm": 0.0, "learning_rate": 1.8134037571083624e-05, "loss": 1.1386, "step": 5666 }, { "epoch": 0.22173096486423038, "grad_norm": 0.0, "learning_rate": 1.813330035804787e-05, "loss": 1.2353, "step": 5667 }, { "epoch": 0.22177009155645983, "grad_norm": 0.0, "learning_rate": 1.8132563014401497e-05, "loss": 1.1344, "step": 5668 }, { "epoch": 0.22180921824868927, "grad_norm": 0.0, "learning_rate": 1.8131825540156353e-05, "loss": 1.121, "step": 5669 }, { "epoch": 0.2218483449409187, "grad_norm": 0.0, "learning_rate": 1.813108793532428e-05, "loss": 1.0997, "step": 5670 }, { "epoch": 0.22188747163314812, "grad_norm": 0.0, "learning_rate": 1.8130350199917124e-05, "loss": 1.1341, "step": 5671 }, { "epoch": 0.22192659832537756, "grad_norm": 0.0, "learning_rate": 1.8129612333946737e-05, "loss": 1.0776, "step": 5672 }, { "epoch": 0.221965725017607, "grad_norm": 0.0, "learning_rate": 1.8128874337424957e-05, "loss": 1.0715, "step": 5673 }, { "epoch": 0.22200485170983644, "grad_norm": 0.0, "learning_rate": 1.8128136210363646e-05, "loss": 1.1542, "step": 5674 }, { "epoch": 0.22204397840206588, "grad_norm": 0.0, "learning_rate": 1.812739795277465e-05, "loss": 1.124, "step": 5675 }, { "epoch": 0.22208310509429532, "grad_norm": 0.0, "learning_rate": 1.8126659564669827e-05, "loss": 1.0048, "step": 5676 }, { "epoch": 0.22212223178652477, "grad_norm": 0.0, "learning_rate": 1.8125921046061035e-05, "loss": 1.1614, "step": 5677 }, { "epoch": 0.2221613584787542, "grad_norm": 0.0, "learning_rate": 1.8125182396960132e-05, "loss": 1.2309, "step": 5678 }, { "epoch": 0.22220048517098365, "grad_norm": 0.0, "learning_rate": 1.812444361737898e-05, "loss": 0.948, "step": 5679 }, { "epoch": 0.2222396118632131, "grad_norm": 0.0, "learning_rate": 1.8123704707329447e-05, "loss": 1.158, "step": 5680 }, { "epoch": 0.22227873855544253, "grad_norm": 0.0, "learning_rate": 1.8122965666823398e-05, "loss": 1.2961, "step": 5681 }, { "epoch": 0.22231786524767197, "grad_norm": 0.0, "learning_rate": 1.8122226495872693e-05, "loss": 1.1713, "step": 5682 }, { "epoch": 0.2223569919399014, "grad_norm": 0.0, "learning_rate": 1.812148719448921e-05, "loss": 1.1423, "step": 5683 }, { "epoch": 0.22239611863213085, "grad_norm": 0.0, "learning_rate": 1.8120747762684826e-05, "loss": 1.1928, "step": 5684 }, { "epoch": 0.2224352453243603, "grad_norm": 0.0, "learning_rate": 1.81200082004714e-05, "loss": 1.1556, "step": 5685 }, { "epoch": 0.2224743720165897, "grad_norm": 0.0, "learning_rate": 1.8119268507860823e-05, "loss": 1.0092, "step": 5686 }, { "epoch": 0.22251349870881915, "grad_norm": 0.0, "learning_rate": 1.8118528684864965e-05, "loss": 1.2409, "step": 5687 }, { "epoch": 0.2225526254010486, "grad_norm": 0.0, "learning_rate": 1.811778873149571e-05, "loss": 1.0878, "step": 5688 }, { "epoch": 0.22259175209327803, "grad_norm": 0.0, "learning_rate": 1.8117048647764937e-05, "loss": 1.2236, "step": 5689 }, { "epoch": 0.22263087878550747, "grad_norm": 0.0, "learning_rate": 1.8116308433684538e-05, "loss": 1.1021, "step": 5690 }, { "epoch": 0.2226700054777369, "grad_norm": 0.0, "learning_rate": 1.8115568089266396e-05, "loss": 1.2059, "step": 5691 }, { "epoch": 0.22270913216996635, "grad_norm": 0.0, "learning_rate": 1.81148276145224e-05, "loss": 1.0938, "step": 5692 }, { "epoch": 0.2227482588621958, "grad_norm": 0.0, "learning_rate": 1.811408700946444e-05, "loss": 1.2177, "step": 5693 }, { "epoch": 0.22278738555442523, "grad_norm": 0.0, "learning_rate": 1.811334627410441e-05, "loss": 1.1503, "step": 5694 }, { "epoch": 0.22282651224665467, "grad_norm": 0.0, "learning_rate": 1.8112605408454205e-05, "loss": 1.2169, "step": 5695 }, { "epoch": 0.22286563893888411, "grad_norm": 0.0, "learning_rate": 1.8111864412525723e-05, "loss": 1.1652, "step": 5696 }, { "epoch": 0.22290476563111356, "grad_norm": 0.0, "learning_rate": 1.811112328633086e-05, "loss": 1.0692, "step": 5697 }, { "epoch": 0.222943892323343, "grad_norm": 0.0, "learning_rate": 1.8110382029881526e-05, "loss": 1.2817, "step": 5698 }, { "epoch": 0.22298301901557244, "grad_norm": 0.0, "learning_rate": 1.810964064318962e-05, "loss": 1.1747, "step": 5699 }, { "epoch": 0.22302214570780185, "grad_norm": 0.0, "learning_rate": 1.8108899126267045e-05, "loss": 1.1005, "step": 5700 }, { "epoch": 0.2230612724000313, "grad_norm": 0.0, "learning_rate": 1.810815747912571e-05, "loss": 1.0374, "step": 5701 }, { "epoch": 0.22310039909226073, "grad_norm": 0.0, "learning_rate": 1.8107415701777527e-05, "loss": 1.2223, "step": 5702 }, { "epoch": 0.22313952578449017, "grad_norm": 0.0, "learning_rate": 1.810667379423441e-05, "loss": 1.063, "step": 5703 }, { "epoch": 0.2231786524767196, "grad_norm": 0.0, "learning_rate": 1.8105931756508263e-05, "loss": 0.8236, "step": 5704 }, { "epoch": 0.22321777916894905, "grad_norm": 0.0, "learning_rate": 1.8105189588611015e-05, "loss": 1.1941, "step": 5705 }, { "epoch": 0.2232569058611785, "grad_norm": 0.0, "learning_rate": 1.8104447290554575e-05, "loss": 1.1931, "step": 5706 }, { "epoch": 0.22329603255340794, "grad_norm": 0.0, "learning_rate": 1.810370486235087e-05, "loss": 1.0878, "step": 5707 }, { "epoch": 0.22333515924563738, "grad_norm": 0.0, "learning_rate": 1.810296230401182e-05, "loss": 1.1958, "step": 5708 }, { "epoch": 0.22337428593786682, "grad_norm": 0.0, "learning_rate": 1.8102219615549346e-05, "loss": 0.9774, "step": 5709 }, { "epoch": 0.22341341263009626, "grad_norm": 0.0, "learning_rate": 1.8101476796975377e-05, "loss": 1.1609, "step": 5710 }, { "epoch": 0.2234525393223257, "grad_norm": 0.0, "learning_rate": 1.8100733848301845e-05, "loss": 1.2446, "step": 5711 }, { "epoch": 0.22349166601455514, "grad_norm": 0.0, "learning_rate": 1.8099990769540677e-05, "loss": 1.1777, "step": 5712 }, { "epoch": 0.22353079270678458, "grad_norm": 0.0, "learning_rate": 1.8099247560703806e-05, "loss": 1.1247, "step": 5713 }, { "epoch": 0.223569919399014, "grad_norm": 0.0, "learning_rate": 1.809850422180317e-05, "loss": 1.018, "step": 5714 }, { "epoch": 0.22360904609124344, "grad_norm": 0.0, "learning_rate": 1.8097760752850705e-05, "loss": 1.1667, "step": 5715 }, { "epoch": 0.22364817278347288, "grad_norm": 0.0, "learning_rate": 1.809701715385835e-05, "loss": 1.2219, "step": 5716 }, { "epoch": 0.22368729947570232, "grad_norm": 0.0, "learning_rate": 1.8096273424838046e-05, "loss": 1.1536, "step": 5717 }, { "epoch": 0.22372642616793176, "grad_norm": 0.0, "learning_rate": 1.809552956580173e-05, "loss": 1.1501, "step": 5718 }, { "epoch": 0.2237655528601612, "grad_norm": 0.0, "learning_rate": 1.8094785576761357e-05, "loss": 1.1594, "step": 5719 }, { "epoch": 0.22380467955239064, "grad_norm": 0.0, "learning_rate": 1.809404145772887e-05, "loss": 1.1303, "step": 5720 }, { "epoch": 0.22384380624462008, "grad_norm": 0.0, "learning_rate": 1.8093297208716223e-05, "loss": 1.1464, "step": 5721 }, { "epoch": 0.22388293293684952, "grad_norm": 0.0, "learning_rate": 1.809255282973536e-05, "loss": 1.1694, "step": 5722 }, { "epoch": 0.22392205962907896, "grad_norm": 0.0, "learning_rate": 1.8091808320798242e-05, "loss": 0.9955, "step": 5723 }, { "epoch": 0.2239611863213084, "grad_norm": 0.0, "learning_rate": 1.8091063681916823e-05, "loss": 1.0962, "step": 5724 }, { "epoch": 0.22400031301353784, "grad_norm": 0.0, "learning_rate": 1.8090318913103057e-05, "loss": 1.0452, "step": 5725 }, { "epoch": 0.22403943970576728, "grad_norm": 0.0, "learning_rate": 1.808957401436891e-05, "loss": 1.1335, "step": 5726 }, { "epoch": 0.22407856639799673, "grad_norm": 0.0, "learning_rate": 1.8088828985726337e-05, "loss": 1.0955, "step": 5727 }, { "epoch": 0.22411769309022614, "grad_norm": 0.0, "learning_rate": 1.808808382718731e-05, "loss": 1.1101, "step": 5728 }, { "epoch": 0.22415681978245558, "grad_norm": 0.0, "learning_rate": 1.808733853876379e-05, "loss": 1.1257, "step": 5729 }, { "epoch": 0.22419594647468502, "grad_norm": 0.0, "learning_rate": 1.8086593120467748e-05, "loss": 1.1852, "step": 5730 }, { "epoch": 0.22423507316691446, "grad_norm": 0.0, "learning_rate": 1.8085847572311154e-05, "loss": 1.201, "step": 5731 }, { "epoch": 0.2242741998591439, "grad_norm": 0.0, "learning_rate": 1.8085101894305975e-05, "loss": 1.2269, "step": 5732 }, { "epoch": 0.22431332655137334, "grad_norm": 0.0, "learning_rate": 1.8084356086464197e-05, "loss": 1.2072, "step": 5733 }, { "epoch": 0.22435245324360278, "grad_norm": 0.0, "learning_rate": 1.808361014879779e-05, "loss": 1.3187, "step": 5734 }, { "epoch": 0.22439157993583222, "grad_norm": 0.0, "learning_rate": 1.808286408131873e-05, "loss": 1.1882, "step": 5735 }, { "epoch": 0.22443070662806167, "grad_norm": 0.0, "learning_rate": 1.8082117884039004e-05, "loss": 1.0153, "step": 5736 }, { "epoch": 0.2244698333202911, "grad_norm": 0.0, "learning_rate": 1.808137155697059e-05, "loss": 1.2931, "step": 5737 }, { "epoch": 0.22450896001252055, "grad_norm": 0.0, "learning_rate": 1.8080625100125474e-05, "loss": 1.1066, "step": 5738 }, { "epoch": 0.22454808670475, "grad_norm": 0.0, "learning_rate": 1.8079878513515648e-05, "loss": 1.1675, "step": 5739 }, { "epoch": 0.22458721339697943, "grad_norm": 0.0, "learning_rate": 1.8079131797153097e-05, "loss": 1.2004, "step": 5740 }, { "epoch": 0.22462634008920887, "grad_norm": 0.0, "learning_rate": 1.8078384951049815e-05, "loss": 1.2153, "step": 5741 }, { "epoch": 0.2246654667814383, "grad_norm": 0.0, "learning_rate": 1.807763797521779e-05, "loss": 1.212, "step": 5742 }, { "epoch": 0.22470459347366772, "grad_norm": 0.0, "learning_rate": 1.807689086966902e-05, "loss": 1.1682, "step": 5743 }, { "epoch": 0.22474372016589717, "grad_norm": 0.0, "learning_rate": 1.807614363441551e-05, "loss": 1.1186, "step": 5744 }, { "epoch": 0.2247828468581266, "grad_norm": 0.0, "learning_rate": 1.8075396269469247e-05, "loss": 1.2174, "step": 5745 }, { "epoch": 0.22482197355035605, "grad_norm": 0.0, "learning_rate": 1.807464877484224e-05, "loss": 1.1048, "step": 5746 }, { "epoch": 0.2248611002425855, "grad_norm": 0.0, "learning_rate": 1.8073901150546492e-05, "loss": 1.0815, "step": 5747 }, { "epoch": 0.22490022693481493, "grad_norm": 0.0, "learning_rate": 1.8073153396594012e-05, "loss": 1.1494, "step": 5748 }, { "epoch": 0.22493935362704437, "grad_norm": 0.0, "learning_rate": 1.80724055129968e-05, "loss": 1.1958, "step": 5749 }, { "epoch": 0.2249784803192738, "grad_norm": 0.0, "learning_rate": 1.8071657499766875e-05, "loss": 1.1531, "step": 5750 }, { "epoch": 0.22501760701150325, "grad_norm": 0.0, "learning_rate": 1.807090935691624e-05, "loss": 1.3494, "step": 5751 }, { "epoch": 0.2250567337037327, "grad_norm": 0.0, "learning_rate": 1.8070161084456915e-05, "loss": 1.1358, "step": 5752 }, { "epoch": 0.22509586039596213, "grad_norm": 0.0, "learning_rate": 1.806941268240092e-05, "loss": 1.1257, "step": 5753 }, { "epoch": 0.22513498708819157, "grad_norm": 0.0, "learning_rate": 1.8068664150760267e-05, "loss": 1.1625, "step": 5754 }, { "epoch": 0.22517411378042101, "grad_norm": 0.0, "learning_rate": 1.8067915489546976e-05, "loss": 1.1837, "step": 5755 }, { "epoch": 0.22521324047265046, "grad_norm": 0.0, "learning_rate": 1.8067166698773073e-05, "loss": 1.0018, "step": 5756 }, { "epoch": 0.22525236716487987, "grad_norm": 0.0, "learning_rate": 1.8066417778450584e-05, "loss": 1.2357, "step": 5757 }, { "epoch": 0.2252914938571093, "grad_norm": 0.0, "learning_rate": 1.806566872859153e-05, "loss": 1.1533, "step": 5758 }, { "epoch": 0.22533062054933875, "grad_norm": 0.0, "learning_rate": 1.8064919549207946e-05, "loss": 1.0984, "step": 5759 }, { "epoch": 0.2253697472415682, "grad_norm": 0.0, "learning_rate": 1.8064170240311857e-05, "loss": 1.117, "step": 5760 }, { "epoch": 0.22540887393379763, "grad_norm": 0.0, "learning_rate": 1.80634208019153e-05, "loss": 1.1388, "step": 5761 }, { "epoch": 0.22544800062602707, "grad_norm": 0.0, "learning_rate": 1.806267123403031e-05, "loss": 1.0758, "step": 5762 }, { "epoch": 0.22548712731825651, "grad_norm": 0.0, "learning_rate": 1.806192153666892e-05, "loss": 1.0352, "step": 5763 }, { "epoch": 0.22552625401048595, "grad_norm": 0.0, "learning_rate": 1.8061171709843174e-05, "loss": 1.0444, "step": 5764 }, { "epoch": 0.2255653807027154, "grad_norm": 0.0, "learning_rate": 1.8060421753565113e-05, "loss": 1.1678, "step": 5765 }, { "epoch": 0.22560450739494484, "grad_norm": 0.0, "learning_rate": 1.805967166784678e-05, "loss": 1.0348, "step": 5766 }, { "epoch": 0.22564363408717428, "grad_norm": 0.0, "learning_rate": 1.8058921452700216e-05, "loss": 1.2822, "step": 5767 }, { "epoch": 0.22568276077940372, "grad_norm": 0.0, "learning_rate": 1.8058171108137474e-05, "loss": 1.2462, "step": 5768 }, { "epoch": 0.22572188747163316, "grad_norm": 0.0, "learning_rate": 1.80574206341706e-05, "loss": 1.1511, "step": 5769 }, { "epoch": 0.2257610141638626, "grad_norm": 0.0, "learning_rate": 1.8056670030811647e-05, "loss": 1.3509, "step": 5770 }, { "epoch": 0.225800140856092, "grad_norm": 0.0, "learning_rate": 1.805591929807267e-05, "loss": 1.0965, "step": 5771 }, { "epoch": 0.22583926754832145, "grad_norm": 0.0, "learning_rate": 1.8055168435965722e-05, "loss": 1.2179, "step": 5772 }, { "epoch": 0.2258783942405509, "grad_norm": 0.0, "learning_rate": 1.8054417444502864e-05, "loss": 1.1631, "step": 5773 }, { "epoch": 0.22591752093278034, "grad_norm": 0.0, "learning_rate": 1.8053666323696155e-05, "loss": 1.2284, "step": 5774 }, { "epoch": 0.22595664762500978, "grad_norm": 0.0, "learning_rate": 1.8052915073557655e-05, "loss": 1.1169, "step": 5775 }, { "epoch": 0.22599577431723922, "grad_norm": 0.0, "learning_rate": 1.8052163694099433e-05, "loss": 1.0177, "step": 5776 }, { "epoch": 0.22603490100946866, "grad_norm": 0.0, "learning_rate": 1.805141218533355e-05, "loss": 1.1926, "step": 5777 }, { "epoch": 0.2260740277016981, "grad_norm": 0.0, "learning_rate": 1.8050660547272074e-05, "loss": 1.0111, "step": 5778 }, { "epoch": 0.22611315439392754, "grad_norm": 0.0, "learning_rate": 1.804990877992708e-05, "loss": 1.0635, "step": 5779 }, { "epoch": 0.22615228108615698, "grad_norm": 0.0, "learning_rate": 1.804915688331064e-05, "loss": 1.0997, "step": 5780 }, { "epoch": 0.22619140777838642, "grad_norm": 0.0, "learning_rate": 1.8048404857434823e-05, "loss": 1.0582, "step": 5781 }, { "epoch": 0.22623053447061586, "grad_norm": 0.0, "learning_rate": 1.8047652702311712e-05, "loss": 1.037, "step": 5782 }, { "epoch": 0.2262696611628453, "grad_norm": 0.0, "learning_rate": 1.804690041795338e-05, "loss": 1.0822, "step": 5783 }, { "epoch": 0.22630878785507474, "grad_norm": 0.0, "learning_rate": 1.8046148004371914e-05, "loss": 1.101, "step": 5784 }, { "epoch": 0.22634791454730416, "grad_norm": 0.0, "learning_rate": 1.804539546157939e-05, "loss": 1.2075, "step": 5785 }, { "epoch": 0.2263870412395336, "grad_norm": 0.0, "learning_rate": 1.80446427895879e-05, "loss": 1.2213, "step": 5786 }, { "epoch": 0.22642616793176304, "grad_norm": 0.0, "learning_rate": 1.8043889988409524e-05, "loss": 1.0886, "step": 5787 }, { "epoch": 0.22646529462399248, "grad_norm": 0.0, "learning_rate": 1.8043137058056354e-05, "loss": 1.1674, "step": 5788 }, { "epoch": 0.22650442131622192, "grad_norm": 0.0, "learning_rate": 1.8042383998540486e-05, "loss": 1.0717, "step": 5789 }, { "epoch": 0.22654354800845136, "grad_norm": 0.0, "learning_rate": 1.8041630809874004e-05, "loss": 0.9409, "step": 5790 }, { "epoch": 0.2265826747006808, "grad_norm": 0.0, "learning_rate": 1.804087749206901e-05, "loss": 1.197, "step": 5791 }, { "epoch": 0.22662180139291024, "grad_norm": 0.0, "learning_rate": 1.80401240451376e-05, "loss": 1.0916, "step": 5792 }, { "epoch": 0.22666092808513968, "grad_norm": 0.0, "learning_rate": 1.803937046909187e-05, "loss": 1.1875, "step": 5793 }, { "epoch": 0.22670005477736913, "grad_norm": 0.0, "learning_rate": 1.8038616763943925e-05, "loss": 1.0496, "step": 5794 }, { "epoch": 0.22673918146959857, "grad_norm": 0.0, "learning_rate": 1.8037862929705872e-05, "loss": 1.1561, "step": 5795 }, { "epoch": 0.226778308161828, "grad_norm": 0.0, "learning_rate": 1.8037108966389806e-05, "loss": 1.2091, "step": 5796 }, { "epoch": 0.22681743485405745, "grad_norm": 0.0, "learning_rate": 1.8036354874007846e-05, "loss": 1.2079, "step": 5797 }, { "epoch": 0.2268565615462869, "grad_norm": 0.0, "learning_rate": 1.8035600652572093e-05, "loss": 1.3018, "step": 5798 }, { "epoch": 0.2268956882385163, "grad_norm": 0.0, "learning_rate": 1.8034846302094668e-05, "loss": 1.1075, "step": 5799 }, { "epoch": 0.22693481493074574, "grad_norm": 0.0, "learning_rate": 1.803409182258767e-05, "loss": 0.9697, "step": 5800 }, { "epoch": 0.22697394162297518, "grad_norm": 0.0, "learning_rate": 1.8033337214063234e-05, "loss": 1.1506, "step": 5801 }, { "epoch": 0.22701306831520462, "grad_norm": 0.0, "learning_rate": 1.8032582476533463e-05, "loss": 1.0989, "step": 5802 }, { "epoch": 0.22705219500743407, "grad_norm": 0.0, "learning_rate": 1.8031827610010485e-05, "loss": 1.179, "step": 5803 }, { "epoch": 0.2270913216996635, "grad_norm": 0.0, "learning_rate": 1.8031072614506426e-05, "loss": 1.235, "step": 5804 }, { "epoch": 0.22713044839189295, "grad_norm": 0.0, "learning_rate": 1.8030317490033394e-05, "loss": 1.2507, "step": 5805 }, { "epoch": 0.2271695750841224, "grad_norm": 0.0, "learning_rate": 1.802956223660353e-05, "loss": 1.1132, "step": 5806 }, { "epoch": 0.22720870177635183, "grad_norm": 0.0, "learning_rate": 1.802880685422896e-05, "loss": 1.1024, "step": 5807 }, { "epoch": 0.22724782846858127, "grad_norm": 0.0, "learning_rate": 1.8028051342921807e-05, "loss": 1.0528, "step": 5808 }, { "epoch": 0.2272869551608107, "grad_norm": 0.0, "learning_rate": 1.8027295702694216e-05, "loss": 1.3079, "step": 5809 }, { "epoch": 0.22732608185304015, "grad_norm": 0.0, "learning_rate": 1.8026539933558312e-05, "loss": 1.1246, "step": 5810 }, { "epoch": 0.2273652085452696, "grad_norm": 0.0, "learning_rate": 1.8025784035526235e-05, "loss": 1.1449, "step": 5811 }, { "epoch": 0.22740433523749903, "grad_norm": 0.0, "learning_rate": 1.802502800861012e-05, "loss": 1.0276, "step": 5812 }, { "epoch": 0.22744346192972847, "grad_norm": 0.0, "learning_rate": 1.8024271852822116e-05, "loss": 1.131, "step": 5813 }, { "epoch": 0.2274825886219579, "grad_norm": 0.0, "learning_rate": 1.802351556817436e-05, "loss": 1.2775, "step": 5814 }, { "epoch": 0.22752171531418733, "grad_norm": 0.0, "learning_rate": 1.8022759154678995e-05, "loss": 1.161, "step": 5815 }, { "epoch": 0.22756084200641677, "grad_norm": 0.0, "learning_rate": 1.802200261234817e-05, "loss": 1.1647, "step": 5816 }, { "epoch": 0.2275999686986462, "grad_norm": 0.0, "learning_rate": 1.8021245941194042e-05, "loss": 1.0561, "step": 5817 }, { "epoch": 0.22763909539087565, "grad_norm": 0.0, "learning_rate": 1.802048914122875e-05, "loss": 1.1273, "step": 5818 }, { "epoch": 0.2276782220831051, "grad_norm": 0.0, "learning_rate": 1.8019732212464453e-05, "loss": 1.2306, "step": 5819 }, { "epoch": 0.22771734877533453, "grad_norm": 0.0, "learning_rate": 1.8018975154913307e-05, "loss": 1.0568, "step": 5820 }, { "epoch": 0.22775647546756397, "grad_norm": 0.0, "learning_rate": 1.8018217968587467e-05, "loss": 1.2175, "step": 5821 }, { "epoch": 0.22779560215979341, "grad_norm": 0.0, "learning_rate": 1.8017460653499097e-05, "loss": 1.155, "step": 5822 }, { "epoch": 0.22783472885202286, "grad_norm": 0.0, "learning_rate": 1.8016703209660354e-05, "loss": 0.9985, "step": 5823 }, { "epoch": 0.2278738555442523, "grad_norm": 0.0, "learning_rate": 1.80159456370834e-05, "loss": 1.2841, "step": 5824 }, { "epoch": 0.22791298223648174, "grad_norm": 0.0, "learning_rate": 1.8015187935780405e-05, "loss": 1.016, "step": 5825 }, { "epoch": 0.22795210892871118, "grad_norm": 0.0, "learning_rate": 1.8014430105763536e-05, "loss": 1.1252, "step": 5826 }, { "epoch": 0.22799123562094062, "grad_norm": 0.0, "learning_rate": 1.8013672147044964e-05, "loss": 1.0651, "step": 5827 }, { "epoch": 0.22803036231317003, "grad_norm": 0.0, "learning_rate": 1.8012914059636855e-05, "loss": 1.0981, "step": 5828 }, { "epoch": 0.22806948900539947, "grad_norm": 0.0, "learning_rate": 1.801215584355139e-05, "loss": 1.1204, "step": 5829 }, { "epoch": 0.2281086156976289, "grad_norm": 0.0, "learning_rate": 1.8011397498800742e-05, "loss": 1.0717, "step": 5830 }, { "epoch": 0.22814774238985835, "grad_norm": 0.0, "learning_rate": 1.801063902539709e-05, "loss": 1.2082, "step": 5831 }, { "epoch": 0.2281868690820878, "grad_norm": 0.0, "learning_rate": 1.800988042335261e-05, "loss": 1.1992, "step": 5832 }, { "epoch": 0.22822599577431724, "grad_norm": 0.0, "learning_rate": 1.800912169267949e-05, "loss": 1.2138, "step": 5833 }, { "epoch": 0.22826512246654668, "grad_norm": 0.0, "learning_rate": 1.800836283338991e-05, "loss": 1.2406, "step": 5834 }, { "epoch": 0.22830424915877612, "grad_norm": 0.0, "learning_rate": 1.800760384549606e-05, "loss": 1.1223, "step": 5835 }, { "epoch": 0.22834337585100556, "grad_norm": 0.0, "learning_rate": 1.8006844729010123e-05, "loss": 1.0827, "step": 5836 }, { "epoch": 0.228382502543235, "grad_norm": 0.0, "learning_rate": 1.8006085483944295e-05, "loss": 1.0451, "step": 5837 }, { "epoch": 0.22842162923546444, "grad_norm": 0.0, "learning_rate": 1.8005326110310767e-05, "loss": 1.0295, "step": 5838 }, { "epoch": 0.22846075592769388, "grad_norm": 0.0, "learning_rate": 1.800456660812173e-05, "loss": 1.1675, "step": 5839 }, { "epoch": 0.22849988261992332, "grad_norm": 0.0, "learning_rate": 1.8003806977389385e-05, "loss": 1.1785, "step": 5840 }, { "epoch": 0.22853900931215276, "grad_norm": 0.0, "learning_rate": 1.800304721812593e-05, "loss": 1.1239, "step": 5841 }, { "epoch": 0.22857813600438218, "grad_norm": 0.0, "learning_rate": 1.8002287330343564e-05, "loss": 1.1717, "step": 5842 }, { "epoch": 0.22861726269661162, "grad_norm": 0.0, "learning_rate": 1.800152731405449e-05, "loss": 1.1837, "step": 5843 }, { "epoch": 0.22865638938884106, "grad_norm": 0.0, "learning_rate": 1.8000767169270917e-05, "loss": 1.1868, "step": 5844 }, { "epoch": 0.2286955160810705, "grad_norm": 0.0, "learning_rate": 1.800000689600505e-05, "loss": 1.1252, "step": 5845 }, { "epoch": 0.22873464277329994, "grad_norm": 0.0, "learning_rate": 1.7999246494269093e-05, "loss": 1.1596, "step": 5846 }, { "epoch": 0.22877376946552938, "grad_norm": 0.0, "learning_rate": 1.7998485964075258e-05, "loss": 1.2136, "step": 5847 }, { "epoch": 0.22881289615775882, "grad_norm": 0.0, "learning_rate": 1.7997725305435768e-05, "loss": 1.1724, "step": 5848 }, { "epoch": 0.22885202284998826, "grad_norm": 0.0, "learning_rate": 1.7996964518362827e-05, "loss": 1.109, "step": 5849 }, { "epoch": 0.2288911495422177, "grad_norm": 0.0, "learning_rate": 1.7996203602868657e-05, "loss": 1.1523, "step": 5850 }, { "epoch": 0.22893027623444714, "grad_norm": 0.0, "learning_rate": 1.7995442558965477e-05, "loss": 1.2567, "step": 5851 }, { "epoch": 0.22896940292667659, "grad_norm": 0.0, "learning_rate": 1.799468138666551e-05, "loss": 1.2088, "step": 5852 }, { "epoch": 0.22900852961890603, "grad_norm": 0.0, "learning_rate": 1.7993920085980975e-05, "loss": 1.1826, "step": 5853 }, { "epoch": 0.22904765631113547, "grad_norm": 0.0, "learning_rate": 1.7993158656924104e-05, "loss": 1.1754, "step": 5854 }, { "epoch": 0.2290867830033649, "grad_norm": 0.0, "learning_rate": 1.7992397099507116e-05, "loss": 1.2177, "step": 5855 }, { "epoch": 0.22912590969559432, "grad_norm": 0.0, "learning_rate": 1.7991635413742254e-05, "loss": 1.1132, "step": 5856 }, { "epoch": 0.22916503638782376, "grad_norm": 0.0, "learning_rate": 1.7990873599641735e-05, "loss": 1.1607, "step": 5857 }, { "epoch": 0.2292041630800532, "grad_norm": 0.0, "learning_rate": 1.79901116572178e-05, "loss": 1.0776, "step": 5858 }, { "epoch": 0.22924328977228264, "grad_norm": 0.0, "learning_rate": 1.7989349586482683e-05, "loss": 1.1069, "step": 5859 }, { "epoch": 0.22928241646451208, "grad_norm": 0.0, "learning_rate": 1.7988587387448625e-05, "loss": 1.2086, "step": 5860 }, { "epoch": 0.22932154315674153, "grad_norm": 0.0, "learning_rate": 1.7987825060127863e-05, "loss": 1.2743, "step": 5861 }, { "epoch": 0.22936066984897097, "grad_norm": 0.0, "learning_rate": 1.7987062604532638e-05, "loss": 1.123, "step": 5862 }, { "epoch": 0.2293997965412004, "grad_norm": 0.0, "learning_rate": 1.7986300020675198e-05, "loss": 1.3572, "step": 5863 }, { "epoch": 0.22943892323342985, "grad_norm": 0.0, "learning_rate": 1.7985537308567788e-05, "loss": 1.1668, "step": 5864 }, { "epoch": 0.2294780499256593, "grad_norm": 0.0, "learning_rate": 1.7984774468222652e-05, "loss": 1.0809, "step": 5865 }, { "epoch": 0.22951717661788873, "grad_norm": 0.0, "learning_rate": 1.7984011499652046e-05, "loss": 1.1234, "step": 5866 }, { "epoch": 0.22955630331011817, "grad_norm": 0.0, "learning_rate": 1.798324840286822e-05, "loss": 1.1454, "step": 5867 }, { "epoch": 0.2295954300023476, "grad_norm": 0.0, "learning_rate": 1.7982485177883426e-05, "loss": 1.1871, "step": 5868 }, { "epoch": 0.22963455669457705, "grad_norm": 0.0, "learning_rate": 1.7981721824709924e-05, "loss": 1.132, "step": 5869 }, { "epoch": 0.2296736833868065, "grad_norm": 0.0, "learning_rate": 1.7980958343359972e-05, "loss": 1.1992, "step": 5870 }, { "epoch": 0.2297128100790359, "grad_norm": 0.0, "learning_rate": 1.798019473384583e-05, "loss": 1.1787, "step": 5871 }, { "epoch": 0.22975193677126535, "grad_norm": 0.0, "learning_rate": 1.797943099617976e-05, "loss": 1.1396, "step": 5872 }, { "epoch": 0.2297910634634948, "grad_norm": 0.0, "learning_rate": 1.7978667130374025e-05, "loss": 1.1996, "step": 5873 }, { "epoch": 0.22983019015572423, "grad_norm": 0.0, "learning_rate": 1.7977903136440895e-05, "loss": 1.063, "step": 5874 }, { "epoch": 0.22986931684795367, "grad_norm": 0.0, "learning_rate": 1.7977139014392636e-05, "loss": 1.075, "step": 5875 }, { "epoch": 0.2299084435401831, "grad_norm": 0.0, "learning_rate": 1.7976374764241523e-05, "loss": 1.0786, "step": 5876 }, { "epoch": 0.22994757023241255, "grad_norm": 0.0, "learning_rate": 1.7975610385999828e-05, "loss": 1.0959, "step": 5877 }, { "epoch": 0.229986696924642, "grad_norm": 0.0, "learning_rate": 1.797484587967982e-05, "loss": 1.0771, "step": 5878 }, { "epoch": 0.23002582361687143, "grad_norm": 0.0, "learning_rate": 1.797408124529378e-05, "loss": 1.1188, "step": 5879 }, { "epoch": 0.23006495030910087, "grad_norm": 0.0, "learning_rate": 1.797331648285399e-05, "loss": 1.2406, "step": 5880 }, { "epoch": 0.23010407700133031, "grad_norm": 0.0, "learning_rate": 1.7972551592372726e-05, "loss": 1.1105, "step": 5881 }, { "epoch": 0.23014320369355976, "grad_norm": 0.0, "learning_rate": 1.7971786573862275e-05, "loss": 1.1953, "step": 5882 }, { "epoch": 0.2301823303857892, "grad_norm": 0.0, "learning_rate": 1.7971021427334924e-05, "loss": 1.3677, "step": 5883 }, { "epoch": 0.23022145707801864, "grad_norm": 0.0, "learning_rate": 1.797025615280295e-05, "loss": 1.1988, "step": 5884 }, { "epoch": 0.23026058377024805, "grad_norm": 0.0, "learning_rate": 1.7969490750278655e-05, "loss": 1.1119, "step": 5885 }, { "epoch": 0.2302997104624775, "grad_norm": 0.0, "learning_rate": 1.7968725219774324e-05, "loss": 1.1086, "step": 5886 }, { "epoch": 0.23033883715470693, "grad_norm": 0.0, "learning_rate": 1.796795956130225e-05, "loss": 1.0255, "step": 5887 }, { "epoch": 0.23037796384693637, "grad_norm": 0.0, "learning_rate": 1.796719377487473e-05, "loss": 1.1326, "step": 5888 }, { "epoch": 0.23041709053916581, "grad_norm": 0.0, "learning_rate": 1.796642786050406e-05, "loss": 1.0825, "step": 5889 }, { "epoch": 0.23045621723139526, "grad_norm": 0.0, "learning_rate": 1.7965661818202544e-05, "loss": 1.159, "step": 5890 }, { "epoch": 0.2304953439236247, "grad_norm": 0.0, "learning_rate": 1.7964895647982483e-05, "loss": 1.1502, "step": 5891 }, { "epoch": 0.23053447061585414, "grad_norm": 0.0, "learning_rate": 1.7964129349856173e-05, "loss": 1.1009, "step": 5892 }, { "epoch": 0.23057359730808358, "grad_norm": 0.0, "learning_rate": 1.7963362923835926e-05, "loss": 1.2004, "step": 5893 }, { "epoch": 0.23061272400031302, "grad_norm": 0.0, "learning_rate": 1.7962596369934052e-05, "loss": 1.1934, "step": 5894 }, { "epoch": 0.23065185069254246, "grad_norm": 0.0, "learning_rate": 1.7961829688162855e-05, "loss": 1.2112, "step": 5895 }, { "epoch": 0.2306909773847719, "grad_norm": 0.0, "learning_rate": 1.796106287853465e-05, "loss": 1.183, "step": 5896 }, { "epoch": 0.23073010407700134, "grad_norm": 0.0, "learning_rate": 1.7960295941061754e-05, "loss": 1.0785, "step": 5897 }, { "epoch": 0.23076923076923078, "grad_norm": 0.0, "learning_rate": 1.7959528875756478e-05, "loss": 1.2248, "step": 5898 }, { "epoch": 0.2308083574614602, "grad_norm": 0.0, "learning_rate": 1.795876168263114e-05, "loss": 1.1093, "step": 5899 }, { "epoch": 0.23084748415368964, "grad_norm": 0.0, "learning_rate": 1.7957994361698068e-05, "loss": 1.1638, "step": 5900 }, { "epoch": 0.23088661084591908, "grad_norm": 0.0, "learning_rate": 1.7957226912969576e-05, "loss": 1.1544, "step": 5901 }, { "epoch": 0.23092573753814852, "grad_norm": 0.0, "learning_rate": 1.795645933645799e-05, "loss": 1.1474, "step": 5902 }, { "epoch": 0.23096486423037796, "grad_norm": 0.0, "learning_rate": 1.7955691632175637e-05, "loss": 1.2065, "step": 5903 }, { "epoch": 0.2310039909226074, "grad_norm": 0.0, "learning_rate": 1.7954923800134843e-05, "loss": 1.1207, "step": 5904 }, { "epoch": 0.23104311761483684, "grad_norm": 0.0, "learning_rate": 1.7954155840347945e-05, "loss": 1.2152, "step": 5905 }, { "epoch": 0.23108224430706628, "grad_norm": 0.0, "learning_rate": 1.795338775282727e-05, "loss": 1.1256, "step": 5906 }, { "epoch": 0.23112137099929572, "grad_norm": 0.0, "learning_rate": 1.795261953758515e-05, "loss": 1.1666, "step": 5907 }, { "epoch": 0.23116049769152516, "grad_norm": 0.0, "learning_rate": 1.795185119463393e-05, "loss": 1.1219, "step": 5908 }, { "epoch": 0.2311996243837546, "grad_norm": 0.0, "learning_rate": 1.7951082723985944e-05, "loss": 1.1215, "step": 5909 }, { "epoch": 0.23123875107598404, "grad_norm": 0.0, "learning_rate": 1.795031412565353e-05, "loss": 1.1736, "step": 5910 }, { "epoch": 0.23127787776821349, "grad_norm": 0.0, "learning_rate": 1.7949545399649038e-05, "loss": 1.1926, "step": 5911 }, { "epoch": 0.23131700446044293, "grad_norm": 0.0, "learning_rate": 1.7948776545984804e-05, "loss": 1.2293, "step": 5912 }, { "epoch": 0.23135613115267234, "grad_norm": 0.0, "learning_rate": 1.794800756467318e-05, "loss": 1.3571, "step": 5913 }, { "epoch": 0.23139525784490178, "grad_norm": 0.0, "learning_rate": 1.7947238455726515e-05, "loss": 1.0303, "step": 5914 }, { "epoch": 0.23143438453713122, "grad_norm": 0.0, "learning_rate": 1.7946469219157158e-05, "loss": 1.1916, "step": 5915 }, { "epoch": 0.23147351122936066, "grad_norm": 0.0, "learning_rate": 1.794569985497746e-05, "loss": 1.1777, "step": 5916 }, { "epoch": 0.2315126379215901, "grad_norm": 0.0, "learning_rate": 1.7944930363199783e-05, "loss": 1.1997, "step": 5917 }, { "epoch": 0.23155176461381954, "grad_norm": 0.0, "learning_rate": 1.7944160743836478e-05, "loss": 1.2664, "step": 5918 }, { "epoch": 0.23159089130604898, "grad_norm": 0.0, "learning_rate": 1.7943390996899907e-05, "loss": 1.1116, "step": 5919 }, { "epoch": 0.23163001799827843, "grad_norm": 0.0, "learning_rate": 1.794262112240243e-05, "loss": 1.2375, "step": 5920 }, { "epoch": 0.23166914469050787, "grad_norm": 0.0, "learning_rate": 1.794185112035641e-05, "loss": 1.0826, "step": 5921 }, { "epoch": 0.2317082713827373, "grad_norm": 0.0, "learning_rate": 1.7941080990774213e-05, "loss": 1.2651, "step": 5922 }, { "epoch": 0.23174739807496675, "grad_norm": 0.0, "learning_rate": 1.7940310733668208e-05, "loss": 1.2502, "step": 5923 }, { "epoch": 0.2317865247671962, "grad_norm": 0.0, "learning_rate": 1.793954034905076e-05, "loss": 1.1091, "step": 5924 }, { "epoch": 0.23182565145942563, "grad_norm": 0.0, "learning_rate": 1.793876983693424e-05, "loss": 1.0025, "step": 5925 }, { "epoch": 0.23186477815165507, "grad_norm": 0.0, "learning_rate": 1.793799919733103e-05, "loss": 1.1673, "step": 5926 }, { "epoch": 0.2319039048438845, "grad_norm": 0.0, "learning_rate": 1.7937228430253497e-05, "loss": 1.2155, "step": 5927 }, { "epoch": 0.23194303153611392, "grad_norm": 0.0, "learning_rate": 1.7936457535714023e-05, "loss": 1.1395, "step": 5928 }, { "epoch": 0.23198215822834337, "grad_norm": 0.0, "learning_rate": 1.7935686513724985e-05, "loss": 1.0768, "step": 5929 }, { "epoch": 0.2320212849205728, "grad_norm": 0.0, "learning_rate": 1.793491536429876e-05, "loss": 1.2339, "step": 5930 }, { "epoch": 0.23206041161280225, "grad_norm": 0.0, "learning_rate": 1.7934144087447744e-05, "loss": 1.0071, "step": 5931 }, { "epoch": 0.2320995383050317, "grad_norm": 0.0, "learning_rate": 1.7933372683184317e-05, "loss": 1.1801, "step": 5932 }, { "epoch": 0.23213866499726113, "grad_norm": 0.0, "learning_rate": 1.7932601151520863e-05, "loss": 1.0634, "step": 5933 }, { "epoch": 0.23217779168949057, "grad_norm": 0.0, "learning_rate": 1.7931829492469773e-05, "loss": 1.2128, "step": 5934 }, { "epoch": 0.23221691838172, "grad_norm": 0.0, "learning_rate": 1.7931057706043443e-05, "loss": 1.0373, "step": 5935 }, { "epoch": 0.23225604507394945, "grad_norm": 0.0, "learning_rate": 1.7930285792254262e-05, "loss": 1.3115, "step": 5936 }, { "epoch": 0.2322951717661789, "grad_norm": 0.0, "learning_rate": 1.792951375111463e-05, "loss": 1.166, "step": 5937 }, { "epoch": 0.23233429845840833, "grad_norm": 0.0, "learning_rate": 1.7928741582636944e-05, "loss": 1.0734, "step": 5938 }, { "epoch": 0.23237342515063777, "grad_norm": 0.0, "learning_rate": 1.7927969286833603e-05, "loss": 1.2532, "step": 5939 }, { "epoch": 0.23241255184286722, "grad_norm": 0.0, "learning_rate": 1.792719686371701e-05, "loss": 1.1085, "step": 5940 }, { "epoch": 0.23245167853509666, "grad_norm": 0.0, "learning_rate": 1.7926424313299568e-05, "loss": 1.1055, "step": 5941 }, { "epoch": 0.23249080522732607, "grad_norm": 0.0, "learning_rate": 1.7925651635593682e-05, "loss": 1.1452, "step": 5942 }, { "epoch": 0.2325299319195555, "grad_norm": 0.0, "learning_rate": 1.7924878830611765e-05, "loss": 1.1633, "step": 5943 }, { "epoch": 0.23256905861178495, "grad_norm": 0.0, "learning_rate": 1.7924105898366224e-05, "loss": 1.0948, "step": 5944 }, { "epoch": 0.2326081853040144, "grad_norm": 0.0, "learning_rate": 1.7923332838869473e-05, "loss": 1.053, "step": 5945 }, { "epoch": 0.23264731199624383, "grad_norm": 0.0, "learning_rate": 1.792255965213392e-05, "loss": 1.1546, "step": 5946 }, { "epoch": 0.23268643868847327, "grad_norm": 0.0, "learning_rate": 1.7921786338171992e-05, "loss": 1.1951, "step": 5947 }, { "epoch": 0.23272556538070271, "grad_norm": 0.0, "learning_rate": 1.79210128969961e-05, "loss": 1.1473, "step": 5948 }, { "epoch": 0.23276469207293216, "grad_norm": 0.0, "learning_rate": 1.7920239328618665e-05, "loss": 1.176, "step": 5949 }, { "epoch": 0.2328038187651616, "grad_norm": 0.0, "learning_rate": 1.7919465633052114e-05, "loss": 1.0706, "step": 5950 }, { "epoch": 0.23284294545739104, "grad_norm": 0.0, "learning_rate": 1.7918691810308865e-05, "loss": 1.2096, "step": 5951 }, { "epoch": 0.23288207214962048, "grad_norm": 0.0, "learning_rate": 1.791791786040135e-05, "loss": 1.0901, "step": 5952 }, { "epoch": 0.23292119884184992, "grad_norm": 0.0, "learning_rate": 1.7917143783341995e-05, "loss": 1.2055, "step": 5953 }, { "epoch": 0.23296032553407936, "grad_norm": 0.0, "learning_rate": 1.7916369579143235e-05, "loss": 1.1537, "step": 5954 }, { "epoch": 0.2329994522263088, "grad_norm": 0.0, "learning_rate": 1.7915595247817495e-05, "loss": 1.0143, "step": 5955 }, { "epoch": 0.23303857891853821, "grad_norm": 0.0, "learning_rate": 1.7914820789377215e-05, "loss": 1.0411, "step": 5956 }, { "epoch": 0.23307770561076765, "grad_norm": 0.0, "learning_rate": 1.7914046203834834e-05, "loss": 0.9853, "step": 5957 }, { "epoch": 0.2331168323029971, "grad_norm": 0.0, "learning_rate": 1.7913271491202782e-05, "loss": 1.1432, "step": 5958 }, { "epoch": 0.23315595899522654, "grad_norm": 0.0, "learning_rate": 1.7912496651493512e-05, "loss": 1.0461, "step": 5959 }, { "epoch": 0.23319508568745598, "grad_norm": 0.0, "learning_rate": 1.791172168471946e-05, "loss": 1.04, "step": 5960 }, { "epoch": 0.23323421237968542, "grad_norm": 0.0, "learning_rate": 1.7910946590893068e-05, "loss": 1.151, "step": 5961 }, { "epoch": 0.23327333907191486, "grad_norm": 0.0, "learning_rate": 1.7910171370026788e-05, "loss": 1.1008, "step": 5962 }, { "epoch": 0.2333124657641443, "grad_norm": 0.0, "learning_rate": 1.7909396022133067e-05, "loss": 1.056, "step": 5963 }, { "epoch": 0.23335159245637374, "grad_norm": 0.0, "learning_rate": 1.790862054722436e-05, "loss": 1.2986, "step": 5964 }, { "epoch": 0.23339071914860318, "grad_norm": 0.0, "learning_rate": 1.7907844945313115e-05, "loss": 1.1552, "step": 5965 }, { "epoch": 0.23342984584083262, "grad_norm": 0.0, "learning_rate": 1.790706921641179e-05, "loss": 1.1455, "step": 5966 }, { "epoch": 0.23346897253306206, "grad_norm": 0.0, "learning_rate": 1.790629336053284e-05, "loss": 1.0528, "step": 5967 }, { "epoch": 0.2335080992252915, "grad_norm": 0.0, "learning_rate": 1.7905517377688725e-05, "loss": 1.1492, "step": 5968 }, { "epoch": 0.23354722591752095, "grad_norm": 0.0, "learning_rate": 1.7904741267891914e-05, "loss": 1.1315, "step": 5969 }, { "epoch": 0.23358635260975036, "grad_norm": 0.0, "learning_rate": 1.790396503115486e-05, "loss": 1.2202, "step": 5970 }, { "epoch": 0.2336254793019798, "grad_norm": 0.0, "learning_rate": 1.7903188667490025e-05, "loss": 1.1785, "step": 5971 }, { "epoch": 0.23366460599420924, "grad_norm": 0.0, "learning_rate": 1.7902412176909888e-05, "loss": 1.1378, "step": 5972 }, { "epoch": 0.23370373268643868, "grad_norm": 0.0, "learning_rate": 1.7901635559426915e-05, "loss": 1.1898, "step": 5973 }, { "epoch": 0.23374285937866812, "grad_norm": 0.0, "learning_rate": 1.7900858815053576e-05, "loss": 1.1916, "step": 5974 }, { "epoch": 0.23378198607089756, "grad_norm": 0.0, "learning_rate": 1.7900081943802345e-05, "loss": 1.0873, "step": 5975 }, { "epoch": 0.233821112763127, "grad_norm": 0.0, "learning_rate": 1.7899304945685693e-05, "loss": 1.127, "step": 5976 }, { "epoch": 0.23386023945535644, "grad_norm": 0.0, "learning_rate": 1.7898527820716107e-05, "loss": 1.2555, "step": 5977 }, { "epoch": 0.23389936614758589, "grad_norm": 0.0, "learning_rate": 1.789775056890606e-05, "loss": 1.1161, "step": 5978 }, { "epoch": 0.23393849283981533, "grad_norm": 0.0, "learning_rate": 1.7896973190268036e-05, "loss": 1.1238, "step": 5979 }, { "epoch": 0.23397761953204477, "grad_norm": 0.0, "learning_rate": 1.7896195684814516e-05, "loss": 1.1575, "step": 5980 }, { "epoch": 0.2340167462242742, "grad_norm": 0.0, "learning_rate": 1.789541805255799e-05, "loss": 1.1273, "step": 5981 }, { "epoch": 0.23405587291650365, "grad_norm": 0.0, "learning_rate": 1.7894640293510942e-05, "loss": 1.1647, "step": 5982 }, { "epoch": 0.2340949996087331, "grad_norm": 0.0, "learning_rate": 1.7893862407685866e-05, "loss": 1.1112, "step": 5983 }, { "epoch": 0.2341341263009625, "grad_norm": 0.0, "learning_rate": 1.7893084395095248e-05, "loss": 1.1943, "step": 5984 }, { "epoch": 0.23417325299319194, "grad_norm": 0.0, "learning_rate": 1.7892306255751584e-05, "loss": 1.144, "step": 5985 }, { "epoch": 0.23421237968542138, "grad_norm": 0.0, "learning_rate": 1.7891527989667377e-05, "loss": 1.0953, "step": 5986 }, { "epoch": 0.23425150637765083, "grad_norm": 0.0, "learning_rate": 1.7890749596855114e-05, "loss": 1.21, "step": 5987 }, { "epoch": 0.23429063306988027, "grad_norm": 0.0, "learning_rate": 1.7889971077327302e-05, "loss": 1.0252, "step": 5988 }, { "epoch": 0.2343297597621097, "grad_norm": 0.0, "learning_rate": 1.7889192431096442e-05, "loss": 1.106, "step": 5989 }, { "epoch": 0.23436888645433915, "grad_norm": 0.0, "learning_rate": 1.7888413658175038e-05, "loss": 1.1203, "step": 5990 }, { "epoch": 0.2344080131465686, "grad_norm": 0.0, "learning_rate": 1.788763475857559e-05, "loss": 1.2433, "step": 5991 }, { "epoch": 0.23444713983879803, "grad_norm": 0.0, "learning_rate": 1.788685573231062e-05, "loss": 1.0885, "step": 5992 }, { "epoch": 0.23448626653102747, "grad_norm": 0.0, "learning_rate": 1.7886076579392622e-05, "loss": 0.9399, "step": 5993 }, { "epoch": 0.2345253932232569, "grad_norm": 0.0, "learning_rate": 1.788529729983412e-05, "loss": 1.1912, "step": 5994 }, { "epoch": 0.23456451991548635, "grad_norm": 0.0, "learning_rate": 1.7884517893647624e-05, "loss": 1.1682, "step": 5995 }, { "epoch": 0.2346036466077158, "grad_norm": 0.0, "learning_rate": 1.7883738360845648e-05, "loss": 1.176, "step": 5996 }, { "epoch": 0.23464277329994523, "grad_norm": 0.0, "learning_rate": 1.7882958701440716e-05, "loss": 1.2006, "step": 5997 }, { "epoch": 0.23468189999217468, "grad_norm": 0.0, "learning_rate": 1.788217891544534e-05, "loss": 1.132, "step": 5998 }, { "epoch": 0.2347210266844041, "grad_norm": 0.0, "learning_rate": 1.7881399002872057e-05, "loss": 1.0921, "step": 5999 }, { "epoch": 0.23476015337663353, "grad_norm": 0.0, "learning_rate": 1.7880618963733374e-05, "loss": 1.114, "step": 6000 }, { "epoch": 0.23479928006886297, "grad_norm": 0.0, "learning_rate": 1.7879838798041827e-05, "loss": 1.1429, "step": 6001 }, { "epoch": 0.2348384067610924, "grad_norm": 0.0, "learning_rate": 1.7879058505809944e-05, "loss": 1.1126, "step": 6002 }, { "epoch": 0.23487753345332185, "grad_norm": 0.0, "learning_rate": 1.7878278087050255e-05, "loss": 1.11, "step": 6003 }, { "epoch": 0.2349166601455513, "grad_norm": 0.0, "learning_rate": 1.7877497541775288e-05, "loss": 1.1114, "step": 6004 }, { "epoch": 0.23495578683778073, "grad_norm": 0.0, "learning_rate": 1.7876716869997584e-05, "loss": 1.1793, "step": 6005 }, { "epoch": 0.23499491353001017, "grad_norm": 0.0, "learning_rate": 1.7875936071729682e-05, "loss": 1.0809, "step": 6006 }, { "epoch": 0.23503404022223962, "grad_norm": 0.0, "learning_rate": 1.7875155146984108e-05, "loss": 1.1771, "step": 6007 }, { "epoch": 0.23507316691446906, "grad_norm": 0.0, "learning_rate": 1.7874374095773414e-05, "loss": 1.4, "step": 6008 }, { "epoch": 0.2351122936066985, "grad_norm": 0.0, "learning_rate": 1.787359291811014e-05, "loss": 1.2081, "step": 6009 }, { "epoch": 0.23515142029892794, "grad_norm": 0.0, "learning_rate": 1.7872811614006827e-05, "loss": 1.0987, "step": 6010 }, { "epoch": 0.23519054699115738, "grad_norm": 0.0, "learning_rate": 1.787203018347603e-05, "loss": 1.1567, "step": 6011 }, { "epoch": 0.23522967368338682, "grad_norm": 0.0, "learning_rate": 1.7871248626530285e-05, "loss": 1.2257, "step": 6012 }, { "epoch": 0.23526880037561623, "grad_norm": 0.0, "learning_rate": 1.7870466943182156e-05, "loss": 1.1288, "step": 6013 }, { "epoch": 0.23530792706784567, "grad_norm": 0.0, "learning_rate": 1.786968513344419e-05, "loss": 1.3461, "step": 6014 }, { "epoch": 0.23534705376007511, "grad_norm": 0.0, "learning_rate": 1.7868903197328938e-05, "loss": 1.1196, "step": 6015 }, { "epoch": 0.23538618045230456, "grad_norm": 0.0, "learning_rate": 1.7868121134848967e-05, "loss": 1.1703, "step": 6016 }, { "epoch": 0.235425307144534, "grad_norm": 0.0, "learning_rate": 1.7867338946016826e-05, "loss": 1.1423, "step": 6017 }, { "epoch": 0.23546443383676344, "grad_norm": 0.0, "learning_rate": 1.7866556630845076e-05, "loss": 1.1966, "step": 6018 }, { "epoch": 0.23550356052899288, "grad_norm": 0.0, "learning_rate": 1.786577418934629e-05, "loss": 1.1584, "step": 6019 }, { "epoch": 0.23554268722122232, "grad_norm": 0.0, "learning_rate": 1.7864991621533025e-05, "loss": 1.1673, "step": 6020 }, { "epoch": 0.23558181391345176, "grad_norm": 0.0, "learning_rate": 1.786420892741785e-05, "loss": 1.092, "step": 6021 }, { "epoch": 0.2356209406056812, "grad_norm": 0.0, "learning_rate": 1.7863426107013333e-05, "loss": 1.2126, "step": 6022 }, { "epoch": 0.23566006729791064, "grad_norm": 0.0, "learning_rate": 1.7862643160332046e-05, "loss": 1.1779, "step": 6023 }, { "epoch": 0.23569919399014008, "grad_norm": 0.0, "learning_rate": 1.7861860087386563e-05, "loss": 1.1286, "step": 6024 }, { "epoch": 0.23573832068236952, "grad_norm": 0.0, "learning_rate": 1.786107688818946e-05, "loss": 1.0307, "step": 6025 }, { "epoch": 0.23577744737459896, "grad_norm": 0.0, "learning_rate": 1.786029356275331e-05, "loss": 1.2787, "step": 6026 }, { "epoch": 0.23581657406682838, "grad_norm": 0.0, "learning_rate": 1.7859510111090697e-05, "loss": 1.2169, "step": 6027 }, { "epoch": 0.23585570075905782, "grad_norm": 0.0, "learning_rate": 1.78587265332142e-05, "loss": 1.0411, "step": 6028 }, { "epoch": 0.23589482745128726, "grad_norm": 0.0, "learning_rate": 1.7857942829136404e-05, "loss": 1.2031, "step": 6029 }, { "epoch": 0.2359339541435167, "grad_norm": 0.0, "learning_rate": 1.785715899886989e-05, "loss": 1.2626, "step": 6030 }, { "epoch": 0.23597308083574614, "grad_norm": 0.0, "learning_rate": 1.7856375042427246e-05, "loss": 1.1674, "step": 6031 }, { "epoch": 0.23601220752797558, "grad_norm": 0.0, "learning_rate": 1.7855590959821068e-05, "loss": 1.188, "step": 6032 }, { "epoch": 0.23605133422020502, "grad_norm": 0.0, "learning_rate": 1.785480675106394e-05, "loss": 1.0977, "step": 6033 }, { "epoch": 0.23609046091243446, "grad_norm": 0.0, "learning_rate": 1.785402241616846e-05, "loss": 1.0096, "step": 6034 }, { "epoch": 0.2361295876046639, "grad_norm": 0.0, "learning_rate": 1.7853237955147218e-05, "loss": 1.064, "step": 6035 }, { "epoch": 0.23616871429689335, "grad_norm": 0.0, "learning_rate": 1.7852453368012817e-05, "loss": 1.0058, "step": 6036 }, { "epoch": 0.23620784098912279, "grad_norm": 0.0, "learning_rate": 1.7851668654777857e-05, "loss": 1.0129, "step": 6037 }, { "epoch": 0.23624696768135223, "grad_norm": 0.0, "learning_rate": 1.7850883815454935e-05, "loss": 1.2175, "step": 6038 }, { "epoch": 0.23628609437358167, "grad_norm": 0.0, "learning_rate": 1.785009885005666e-05, "loss": 1.1676, "step": 6039 }, { "epoch": 0.2363252210658111, "grad_norm": 0.0, "learning_rate": 1.784931375859563e-05, "loss": 1.1835, "step": 6040 }, { "epoch": 0.23636434775804052, "grad_norm": 0.0, "learning_rate": 1.7848528541084457e-05, "loss": 1.1585, "step": 6041 }, { "epoch": 0.23640347445026996, "grad_norm": 0.0, "learning_rate": 1.784774319753575e-05, "loss": 1.204, "step": 6042 }, { "epoch": 0.2364426011424994, "grad_norm": 0.0, "learning_rate": 1.7846957727962124e-05, "loss": 1.296, "step": 6043 }, { "epoch": 0.23648172783472884, "grad_norm": 0.0, "learning_rate": 1.7846172132376188e-05, "loss": 1.2318, "step": 6044 }, { "epoch": 0.23652085452695829, "grad_norm": 0.0, "learning_rate": 1.7845386410790558e-05, "loss": 1.1602, "step": 6045 }, { "epoch": 0.23655998121918773, "grad_norm": 0.0, "learning_rate": 1.7844600563217857e-05, "loss": 1.0885, "step": 6046 }, { "epoch": 0.23659910791141717, "grad_norm": 0.0, "learning_rate": 1.7843814589670696e-05, "loss": 1.1464, "step": 6047 }, { "epoch": 0.2366382346036466, "grad_norm": 0.0, "learning_rate": 1.7843028490161705e-05, "loss": 0.9994, "step": 6048 }, { "epoch": 0.23667736129587605, "grad_norm": 0.0, "learning_rate": 1.7842242264703503e-05, "loss": 1.1871, "step": 6049 }, { "epoch": 0.2367164879881055, "grad_norm": 0.0, "learning_rate": 1.7841455913308717e-05, "loss": 1.1636, "step": 6050 }, { "epoch": 0.23675561468033493, "grad_norm": 0.0, "learning_rate": 1.7840669435989974e-05, "loss": 1.2443, "step": 6051 }, { "epoch": 0.23679474137256437, "grad_norm": 0.0, "learning_rate": 1.7839882832759906e-05, "loss": 1.2524, "step": 6052 }, { "epoch": 0.2368338680647938, "grad_norm": 0.0, "learning_rate": 1.7839096103631143e-05, "loss": 0.9946, "step": 6053 }, { "epoch": 0.23687299475702325, "grad_norm": 0.0, "learning_rate": 1.7838309248616323e-05, "loss": 1.1983, "step": 6054 }, { "epoch": 0.2369121214492527, "grad_norm": 0.0, "learning_rate": 1.7837522267728077e-05, "loss": 1.1564, "step": 6055 }, { "epoch": 0.2369512481414821, "grad_norm": 0.0, "learning_rate": 1.783673516097904e-05, "loss": 1.1624, "step": 6056 }, { "epoch": 0.23699037483371155, "grad_norm": 0.0, "learning_rate": 1.7835947928381858e-05, "loss": 1.0367, "step": 6057 }, { "epoch": 0.237029501525941, "grad_norm": 0.0, "learning_rate": 1.7835160569949174e-05, "loss": 1.1587, "step": 6058 }, { "epoch": 0.23706862821817043, "grad_norm": 0.0, "learning_rate": 1.7834373085693628e-05, "loss": 1.1068, "step": 6059 }, { "epoch": 0.23710775491039987, "grad_norm": 0.0, "learning_rate": 1.7833585475627865e-05, "loss": 1.086, "step": 6060 }, { "epoch": 0.2371468816026293, "grad_norm": 0.0, "learning_rate": 1.7832797739764537e-05, "loss": 1.212, "step": 6061 }, { "epoch": 0.23718600829485875, "grad_norm": 0.0, "learning_rate": 1.7832009878116294e-05, "loss": 1.1954, "step": 6062 }, { "epoch": 0.2372251349870882, "grad_norm": 0.0, "learning_rate": 1.7831221890695785e-05, "loss": 1.2024, "step": 6063 }, { "epoch": 0.23726426167931763, "grad_norm": 0.0, "learning_rate": 1.7830433777515664e-05, "loss": 1.2551, "step": 6064 }, { "epoch": 0.23730338837154707, "grad_norm": 0.0, "learning_rate": 1.7829645538588592e-05, "loss": 1.1203, "step": 6065 }, { "epoch": 0.23734251506377652, "grad_norm": 0.0, "learning_rate": 1.782885717392722e-05, "loss": 1.1289, "step": 6066 }, { "epoch": 0.23738164175600596, "grad_norm": 0.0, "learning_rate": 1.7828068683544217e-05, "loss": 1.1949, "step": 6067 }, { "epoch": 0.2374207684482354, "grad_norm": 0.0, "learning_rate": 1.7827280067452232e-05, "loss": 1.0324, "step": 6068 }, { "epoch": 0.23745989514046484, "grad_norm": 0.0, "learning_rate": 1.7826491325663943e-05, "loss": 1.0885, "step": 6069 }, { "epoch": 0.23749902183269425, "grad_norm": 0.0, "learning_rate": 1.782570245819201e-05, "loss": 1.2628, "step": 6070 }, { "epoch": 0.2375381485249237, "grad_norm": 0.0, "learning_rate": 1.78249134650491e-05, "loss": 1.1417, "step": 6071 }, { "epoch": 0.23757727521715313, "grad_norm": 0.0, "learning_rate": 1.7824124346247885e-05, "loss": 1.1089, "step": 6072 }, { "epoch": 0.23761640190938257, "grad_norm": 0.0, "learning_rate": 1.782333510180104e-05, "loss": 1.0929, "step": 6073 }, { "epoch": 0.23765552860161201, "grad_norm": 0.0, "learning_rate": 1.7822545731721237e-05, "loss": 1.1933, "step": 6074 }, { "epoch": 0.23769465529384146, "grad_norm": 0.0, "learning_rate": 1.782175623602115e-05, "loss": 1.0397, "step": 6075 }, { "epoch": 0.2377337819860709, "grad_norm": 0.0, "learning_rate": 1.7820966614713455e-05, "loss": 1.1632, "step": 6076 }, { "epoch": 0.23777290867830034, "grad_norm": 0.0, "learning_rate": 1.7820176867810842e-05, "loss": 1.1379, "step": 6077 }, { "epoch": 0.23781203537052978, "grad_norm": 0.0, "learning_rate": 1.7819386995325986e-05, "loss": 1.2098, "step": 6078 }, { "epoch": 0.23785116206275922, "grad_norm": 0.0, "learning_rate": 1.7818596997271572e-05, "loss": 1.151, "step": 6079 }, { "epoch": 0.23789028875498866, "grad_norm": 0.0, "learning_rate": 1.781780687366029e-05, "loss": 1.1711, "step": 6080 }, { "epoch": 0.2379294154472181, "grad_norm": 0.0, "learning_rate": 1.781701662450482e-05, "loss": 1.2812, "step": 6081 }, { "epoch": 0.23796854213944754, "grad_norm": 0.0, "learning_rate": 1.7816226249817865e-05, "loss": 1.0815, "step": 6082 }, { "epoch": 0.23800766883167698, "grad_norm": 0.0, "learning_rate": 1.7815435749612108e-05, "loss": 1.0802, "step": 6083 }, { "epoch": 0.2380467955239064, "grad_norm": 0.0, "learning_rate": 1.7814645123900246e-05, "loss": 1.1044, "step": 6084 }, { "epoch": 0.23808592221613584, "grad_norm": 0.0, "learning_rate": 1.7813854372694978e-05, "loss": 1.1278, "step": 6085 }, { "epoch": 0.23812504890836528, "grad_norm": 0.0, "learning_rate": 1.7813063496008995e-05, "loss": 1.1852, "step": 6086 }, { "epoch": 0.23816417560059472, "grad_norm": 0.0, "learning_rate": 1.7812272493855007e-05, "loss": 1.2083, "step": 6087 }, { "epoch": 0.23820330229282416, "grad_norm": 0.0, "learning_rate": 1.7811481366245708e-05, "loss": 1.2222, "step": 6088 }, { "epoch": 0.2382424289850536, "grad_norm": 0.0, "learning_rate": 1.7810690113193812e-05, "loss": 1.3394, "step": 6089 }, { "epoch": 0.23828155567728304, "grad_norm": 0.0, "learning_rate": 1.7809898734712016e-05, "loss": 1.1754, "step": 6090 }, { "epoch": 0.23832068236951248, "grad_norm": 0.0, "learning_rate": 1.7809107230813034e-05, "loss": 1.201, "step": 6091 }, { "epoch": 0.23835980906174192, "grad_norm": 0.0, "learning_rate": 1.7808315601509576e-05, "loss": 1.0887, "step": 6092 }, { "epoch": 0.23839893575397136, "grad_norm": 0.0, "learning_rate": 1.7807523846814353e-05, "loss": 1.1876, "step": 6093 }, { "epoch": 0.2384380624462008, "grad_norm": 0.0, "learning_rate": 1.780673196674008e-05, "loss": 1.1339, "step": 6094 }, { "epoch": 0.23847718913843025, "grad_norm": 0.0, "learning_rate": 1.7805939961299472e-05, "loss": 1.2116, "step": 6095 }, { "epoch": 0.2385163158306597, "grad_norm": 0.0, "learning_rate": 1.7805147830505253e-05, "loss": 1.1647, "step": 6096 }, { "epoch": 0.23855544252288913, "grad_norm": 0.0, "learning_rate": 1.780435557437014e-05, "loss": 1.1955, "step": 6097 }, { "epoch": 0.23859456921511854, "grad_norm": 0.0, "learning_rate": 1.7803563192906853e-05, "loss": 1.1658, "step": 6098 }, { "epoch": 0.23863369590734798, "grad_norm": 0.0, "learning_rate": 1.7802770686128122e-05, "loss": 1.1032, "step": 6099 }, { "epoch": 0.23867282259957742, "grad_norm": 0.0, "learning_rate": 1.7801978054046675e-05, "loss": 1.0849, "step": 6100 }, { "epoch": 0.23871194929180686, "grad_norm": 0.0, "learning_rate": 1.780118529667523e-05, "loss": 1.1124, "step": 6101 }, { "epoch": 0.2387510759840363, "grad_norm": 0.0, "learning_rate": 1.7800392414026524e-05, "loss": 1.1177, "step": 6102 }, { "epoch": 0.23879020267626574, "grad_norm": 0.0, "learning_rate": 1.7799599406113296e-05, "loss": 1.1484, "step": 6103 }, { "epoch": 0.23882932936849519, "grad_norm": 0.0, "learning_rate": 1.7798806272948272e-05, "loss": 1.1459, "step": 6104 }, { "epoch": 0.23886845606072463, "grad_norm": 0.0, "learning_rate": 1.7798013014544193e-05, "loss": 1.2695, "step": 6105 }, { "epoch": 0.23890758275295407, "grad_norm": 0.0, "learning_rate": 1.7797219630913797e-05, "loss": 1.1273, "step": 6106 }, { "epoch": 0.2389467094451835, "grad_norm": 0.0, "learning_rate": 1.779642612206982e-05, "loss": 1.2594, "step": 6107 }, { "epoch": 0.23898583613741295, "grad_norm": 0.0, "learning_rate": 1.7795632488025016e-05, "loss": 1.1129, "step": 6108 }, { "epoch": 0.2390249628296424, "grad_norm": 0.0, "learning_rate": 1.7794838728792117e-05, "loss": 1.1194, "step": 6109 }, { "epoch": 0.23906408952187183, "grad_norm": 0.0, "learning_rate": 1.779404484438388e-05, "loss": 1.1643, "step": 6110 }, { "epoch": 0.23910321621410127, "grad_norm": 0.0, "learning_rate": 1.779325083481304e-05, "loss": 1.2799, "step": 6111 }, { "epoch": 0.2391423429063307, "grad_norm": 0.0, "learning_rate": 1.7792456700092368e-05, "loss": 1.064, "step": 6112 }, { "epoch": 0.23918146959856013, "grad_norm": 0.0, "learning_rate": 1.7791662440234597e-05, "loss": 1.2317, "step": 6113 }, { "epoch": 0.23922059629078957, "grad_norm": 0.0, "learning_rate": 1.7790868055252496e-05, "loss": 1.0014, "step": 6114 }, { "epoch": 0.239259722983019, "grad_norm": 0.0, "learning_rate": 1.7790073545158818e-05, "loss": 1.1094, "step": 6115 }, { "epoch": 0.23929884967524845, "grad_norm": 0.0, "learning_rate": 1.7789278909966315e-05, "loss": 1.12, "step": 6116 }, { "epoch": 0.2393379763674779, "grad_norm": 0.0, "learning_rate": 1.7788484149687758e-05, "loss": 1.0126, "step": 6117 }, { "epoch": 0.23937710305970733, "grad_norm": 0.0, "learning_rate": 1.7787689264335902e-05, "loss": 1.1691, "step": 6118 }, { "epoch": 0.23941622975193677, "grad_norm": 0.0, "learning_rate": 1.7786894253923518e-05, "loss": 1.1041, "step": 6119 }, { "epoch": 0.2394553564441662, "grad_norm": 0.0, "learning_rate": 1.7786099118463368e-05, "loss": 1.0537, "step": 6120 }, { "epoch": 0.23949448313639565, "grad_norm": 0.0, "learning_rate": 1.7785303857968223e-05, "loss": 1.1786, "step": 6121 }, { "epoch": 0.2395336098286251, "grad_norm": 0.0, "learning_rate": 1.7784508472450852e-05, "loss": 1.1169, "step": 6122 }, { "epoch": 0.23957273652085453, "grad_norm": 0.0, "learning_rate": 1.7783712961924032e-05, "loss": 1.1119, "step": 6123 }, { "epoch": 0.23961186321308398, "grad_norm": 0.0, "learning_rate": 1.7782917326400533e-05, "loss": 1.0833, "step": 6124 }, { "epoch": 0.23965098990531342, "grad_norm": 0.0, "learning_rate": 1.7782121565893135e-05, "loss": 1.1072, "step": 6125 }, { "epoch": 0.23969011659754286, "grad_norm": 0.0, "learning_rate": 1.7781325680414614e-05, "loss": 1.158, "step": 6126 }, { "epoch": 0.23972924328977227, "grad_norm": 0.0, "learning_rate": 1.7780529669977758e-05, "loss": 1.2437, "step": 6127 }, { "epoch": 0.2397683699820017, "grad_norm": 0.0, "learning_rate": 1.7779733534595343e-05, "loss": 1.1371, "step": 6128 }, { "epoch": 0.23980749667423115, "grad_norm": 0.0, "learning_rate": 1.7778937274280156e-05, "loss": 1.2374, "step": 6129 }, { "epoch": 0.2398466233664606, "grad_norm": 0.0, "learning_rate": 1.7778140889044983e-05, "loss": 1.2601, "step": 6130 }, { "epoch": 0.23988575005869003, "grad_norm": 0.0, "learning_rate": 1.7777344378902615e-05, "loss": 1.0179, "step": 6131 }, { "epoch": 0.23992487675091947, "grad_norm": 0.0, "learning_rate": 1.777654774386584e-05, "loss": 1.1119, "step": 6132 }, { "epoch": 0.23996400344314892, "grad_norm": 0.0, "learning_rate": 1.7775750983947455e-05, "loss": 1.1478, "step": 6133 }, { "epoch": 0.24000313013537836, "grad_norm": 0.0, "learning_rate": 1.7774954099160252e-05, "loss": 1.1864, "step": 6134 }, { "epoch": 0.2400422568276078, "grad_norm": 0.0, "learning_rate": 1.777415708951703e-05, "loss": 1.2549, "step": 6135 }, { "epoch": 0.24008138351983724, "grad_norm": 0.0, "learning_rate": 1.7773359955030583e-05, "loss": 1.1221, "step": 6136 }, { "epoch": 0.24012051021206668, "grad_norm": 0.0, "learning_rate": 1.777256269571372e-05, "loss": 1.0428, "step": 6137 }, { "epoch": 0.24015963690429612, "grad_norm": 0.0, "learning_rate": 1.7771765311579236e-05, "loss": 1.1385, "step": 6138 }, { "epoch": 0.24019876359652556, "grad_norm": 0.0, "learning_rate": 1.7770967802639943e-05, "loss": 1.1021, "step": 6139 }, { "epoch": 0.240237890288755, "grad_norm": 0.0, "learning_rate": 1.7770170168908642e-05, "loss": 1.0675, "step": 6140 }, { "epoch": 0.24027701698098441, "grad_norm": 0.0, "learning_rate": 1.7769372410398145e-05, "loss": 1.1407, "step": 6141 }, { "epoch": 0.24031614367321386, "grad_norm": 0.0, "learning_rate": 1.776857452712126e-05, "loss": 1.2491, "step": 6142 }, { "epoch": 0.2403552703654433, "grad_norm": 0.0, "learning_rate": 1.776777651909081e-05, "loss": 1.2274, "step": 6143 }, { "epoch": 0.24039439705767274, "grad_norm": 0.0, "learning_rate": 1.77669783863196e-05, "loss": 1.0641, "step": 6144 }, { "epoch": 0.24043352374990218, "grad_norm": 0.0, "learning_rate": 1.7766180128820445e-05, "loss": 1.1183, "step": 6145 }, { "epoch": 0.24047265044213162, "grad_norm": 0.0, "learning_rate": 1.7765381746606172e-05, "loss": 1.268, "step": 6146 }, { "epoch": 0.24051177713436106, "grad_norm": 0.0, "learning_rate": 1.7764583239689602e-05, "loss": 1.1475, "step": 6147 }, { "epoch": 0.2405509038265905, "grad_norm": 0.0, "learning_rate": 1.776378460808355e-05, "loss": 1.2389, "step": 6148 }, { "epoch": 0.24059003051881994, "grad_norm": 0.0, "learning_rate": 1.7762985851800846e-05, "loss": 1.2159, "step": 6149 }, { "epoch": 0.24062915721104938, "grad_norm": 0.0, "learning_rate": 1.776218697085432e-05, "loss": 1.1503, "step": 6150 }, { "epoch": 0.24066828390327882, "grad_norm": 0.0, "learning_rate": 1.7761387965256792e-05, "loss": 1.2256, "step": 6151 }, { "epoch": 0.24070741059550826, "grad_norm": 0.0, "learning_rate": 1.7760588835021106e-05, "loss": 1.0375, "step": 6152 }, { "epoch": 0.2407465372877377, "grad_norm": 0.0, "learning_rate": 1.775978958016008e-05, "loss": 1.1727, "step": 6153 }, { "epoch": 0.24078566397996715, "grad_norm": 0.0, "learning_rate": 1.7758990200686564e-05, "loss": 1.0487, "step": 6154 }, { "epoch": 0.24082479067219656, "grad_norm": 0.0, "learning_rate": 1.7758190696613385e-05, "loss": 1.2913, "step": 6155 }, { "epoch": 0.240863917364426, "grad_norm": 0.0, "learning_rate": 1.7757391067953387e-05, "loss": 1.2838, "step": 6156 }, { "epoch": 0.24090304405665544, "grad_norm": 0.0, "learning_rate": 1.7756591314719405e-05, "loss": 1.2231, "step": 6157 }, { "epoch": 0.24094217074888488, "grad_norm": 0.0, "learning_rate": 1.775579143692429e-05, "loss": 1.0939, "step": 6158 }, { "epoch": 0.24098129744111432, "grad_norm": 0.0, "learning_rate": 1.7754991434580883e-05, "loss": 1.1841, "step": 6159 }, { "epoch": 0.24102042413334376, "grad_norm": 0.0, "learning_rate": 1.7754191307702027e-05, "loss": 1.2281, "step": 6160 }, { "epoch": 0.2410595508255732, "grad_norm": 0.0, "learning_rate": 1.775339105630058e-05, "loss": 1.2637, "step": 6161 }, { "epoch": 0.24109867751780265, "grad_norm": 0.0, "learning_rate": 1.7752590680389382e-05, "loss": 1.1372, "step": 6162 }, { "epoch": 0.2411378042100321, "grad_norm": 0.0, "learning_rate": 1.77517901799813e-05, "loss": 1.1288, "step": 6163 }, { "epoch": 0.24117693090226153, "grad_norm": 0.0, "learning_rate": 1.7750989555089174e-05, "loss": 1.17, "step": 6164 }, { "epoch": 0.24121605759449097, "grad_norm": 0.0, "learning_rate": 1.7750188805725873e-05, "loss": 1.2081, "step": 6165 }, { "epoch": 0.2412551842867204, "grad_norm": 0.0, "learning_rate": 1.774938793190425e-05, "loss": 1.0684, "step": 6166 }, { "epoch": 0.24129431097894985, "grad_norm": 0.0, "learning_rate": 1.774858693363717e-05, "loss": 1.2346, "step": 6167 }, { "epoch": 0.2413334376711793, "grad_norm": 0.0, "learning_rate": 1.7747785810937487e-05, "loss": 1.2193, "step": 6168 }, { "epoch": 0.2413725643634087, "grad_norm": 0.0, "learning_rate": 1.7746984563818077e-05, "loss": 1.0605, "step": 6169 }, { "epoch": 0.24141169105563814, "grad_norm": 0.0, "learning_rate": 1.7746183192291803e-05, "loss": 1.1323, "step": 6170 }, { "epoch": 0.24145081774786759, "grad_norm": 0.0, "learning_rate": 1.774538169637153e-05, "loss": 1.0971, "step": 6171 }, { "epoch": 0.24148994444009703, "grad_norm": 0.0, "learning_rate": 1.7744580076070133e-05, "loss": 1.1704, "step": 6172 }, { "epoch": 0.24152907113232647, "grad_norm": 0.0, "learning_rate": 1.7743778331400486e-05, "loss": 1.2039, "step": 6173 }, { "epoch": 0.2415681978245559, "grad_norm": 0.0, "learning_rate": 1.7742976462375466e-05, "loss": 1.2568, "step": 6174 }, { "epoch": 0.24160732451678535, "grad_norm": 0.0, "learning_rate": 1.774217446900794e-05, "loss": 1.1948, "step": 6175 }, { "epoch": 0.2416464512090148, "grad_norm": 0.0, "learning_rate": 1.7741372351310797e-05, "loss": 1.222, "step": 6176 }, { "epoch": 0.24168557790124423, "grad_norm": 0.0, "learning_rate": 1.7740570109296915e-05, "loss": 1.1913, "step": 6177 }, { "epoch": 0.24172470459347367, "grad_norm": 0.0, "learning_rate": 1.7739767742979174e-05, "loss": 1.1563, "step": 6178 }, { "epoch": 0.2417638312857031, "grad_norm": 0.0, "learning_rate": 1.7738965252370463e-05, "loss": 1.1103, "step": 6179 }, { "epoch": 0.24180295797793255, "grad_norm": 0.0, "learning_rate": 1.773816263748367e-05, "loss": 1.2137, "step": 6180 }, { "epoch": 0.241842084670162, "grad_norm": 0.0, "learning_rate": 1.7737359898331677e-05, "loss": 1.1163, "step": 6181 }, { "epoch": 0.24188121136239144, "grad_norm": 0.0, "learning_rate": 1.773655703492738e-05, "loss": 1.1783, "step": 6182 }, { "epoch": 0.24192033805462088, "grad_norm": 0.0, "learning_rate": 1.7735754047283674e-05, "loss": 1.1135, "step": 6183 }, { "epoch": 0.2419594647468503, "grad_norm": 0.0, "learning_rate": 1.773495093541345e-05, "loss": 1.0815, "step": 6184 }, { "epoch": 0.24199859143907973, "grad_norm": 0.0, "learning_rate": 1.7734147699329607e-05, "loss": 1.1071, "step": 6185 }, { "epoch": 0.24203771813130917, "grad_norm": 0.0, "learning_rate": 1.773334433904504e-05, "loss": 1.0775, "step": 6186 }, { "epoch": 0.2420768448235386, "grad_norm": 0.0, "learning_rate": 1.7732540854572658e-05, "loss": 1.095, "step": 6187 }, { "epoch": 0.24211597151576805, "grad_norm": 0.0, "learning_rate": 1.7731737245925357e-05, "loss": 1.1344, "step": 6188 }, { "epoch": 0.2421550982079975, "grad_norm": 0.0, "learning_rate": 1.7730933513116046e-05, "loss": 1.2171, "step": 6189 }, { "epoch": 0.24219422490022693, "grad_norm": 0.0, "learning_rate": 1.773012965615763e-05, "loss": 1.2204, "step": 6190 }, { "epoch": 0.24223335159245638, "grad_norm": 0.0, "learning_rate": 1.7729325675063017e-05, "loss": 1.1989, "step": 6191 }, { "epoch": 0.24227247828468582, "grad_norm": 0.0, "learning_rate": 1.772852156984512e-05, "loss": 1.1325, "step": 6192 }, { "epoch": 0.24231160497691526, "grad_norm": 0.0, "learning_rate": 1.772771734051685e-05, "loss": 1.1562, "step": 6193 }, { "epoch": 0.2423507316691447, "grad_norm": 0.0, "learning_rate": 1.7726912987091123e-05, "loss": 1.1819, "step": 6194 }, { "epoch": 0.24238985836137414, "grad_norm": 0.0, "learning_rate": 1.7726108509580857e-05, "loss": 1.1393, "step": 6195 }, { "epoch": 0.24242898505360358, "grad_norm": 0.0, "learning_rate": 1.7725303907998966e-05, "loss": 1.123, "step": 6196 }, { "epoch": 0.24246811174583302, "grad_norm": 0.0, "learning_rate": 1.772449918235838e-05, "loss": 1.0527, "step": 6197 }, { "epoch": 0.24250723843806243, "grad_norm": 0.0, "learning_rate": 1.7723694332672012e-05, "loss": 1.1112, "step": 6198 }, { "epoch": 0.24254636513029187, "grad_norm": 0.0, "learning_rate": 1.7722889358952793e-05, "loss": 1.1405, "step": 6199 }, { "epoch": 0.24258549182252132, "grad_norm": 0.0, "learning_rate": 1.772208426121365e-05, "loss": 1.2443, "step": 6200 }, { "epoch": 0.24262461851475076, "grad_norm": 0.0, "learning_rate": 1.772127903946751e-05, "loss": 1.1408, "step": 6201 }, { "epoch": 0.2426637452069802, "grad_norm": 0.0, "learning_rate": 1.77204736937273e-05, "loss": 1.1734, "step": 6202 }, { "epoch": 0.24270287189920964, "grad_norm": 0.0, "learning_rate": 1.771966822400596e-05, "loss": 1.0796, "step": 6203 }, { "epoch": 0.24274199859143908, "grad_norm": 0.0, "learning_rate": 1.7718862630316417e-05, "loss": 1.1185, "step": 6204 }, { "epoch": 0.24278112528366852, "grad_norm": 0.0, "learning_rate": 1.771805691267162e-05, "loss": 1.0215, "step": 6205 }, { "epoch": 0.24282025197589796, "grad_norm": 0.0, "learning_rate": 1.7717251071084492e-05, "loss": 1.1198, "step": 6206 }, { "epoch": 0.2428593786681274, "grad_norm": 0.0, "learning_rate": 1.771644510556799e-05, "loss": 1.3296, "step": 6207 }, { "epoch": 0.24289850536035684, "grad_norm": 0.0, "learning_rate": 1.7715639016135043e-05, "loss": 1.0197, "step": 6208 }, { "epoch": 0.24293763205258628, "grad_norm": 0.0, "learning_rate": 1.7714832802798606e-05, "loss": 1.1075, "step": 6209 }, { "epoch": 0.24297675874481572, "grad_norm": 0.0, "learning_rate": 1.771402646557162e-05, "loss": 1.2003, "step": 6210 }, { "epoch": 0.24301588543704516, "grad_norm": 0.0, "learning_rate": 1.7713220004467035e-05, "loss": 1.0578, "step": 6211 }, { "epoch": 0.24305501212927458, "grad_norm": 0.0, "learning_rate": 1.77124134194978e-05, "loss": 1.1403, "step": 6212 }, { "epoch": 0.24309413882150402, "grad_norm": 0.0, "learning_rate": 1.771160671067687e-05, "loss": 1.201, "step": 6213 }, { "epoch": 0.24313326551373346, "grad_norm": 0.0, "learning_rate": 1.7710799878017203e-05, "loss": 1.1836, "step": 6214 }, { "epoch": 0.2431723922059629, "grad_norm": 0.0, "learning_rate": 1.7709992921531748e-05, "loss": 1.1448, "step": 6215 }, { "epoch": 0.24321151889819234, "grad_norm": 0.0, "learning_rate": 1.770918584123347e-05, "loss": 1.1556, "step": 6216 }, { "epoch": 0.24325064559042178, "grad_norm": 0.0, "learning_rate": 1.7708378637135325e-05, "loss": 1.1437, "step": 6217 }, { "epoch": 0.24328977228265122, "grad_norm": 0.0, "learning_rate": 1.7707571309250283e-05, "loss": 1.1507, "step": 6218 }, { "epoch": 0.24332889897488066, "grad_norm": 0.0, "learning_rate": 1.77067638575913e-05, "loss": 1.2057, "step": 6219 }, { "epoch": 0.2433680256671101, "grad_norm": 0.0, "learning_rate": 1.770595628217135e-05, "loss": 1.0814, "step": 6220 }, { "epoch": 0.24340715235933955, "grad_norm": 0.0, "learning_rate": 1.7705148583003395e-05, "loss": 1.1652, "step": 6221 }, { "epoch": 0.243446279051569, "grad_norm": 0.0, "learning_rate": 1.770434076010041e-05, "loss": 1.3118, "step": 6222 }, { "epoch": 0.24348540574379843, "grad_norm": 0.0, "learning_rate": 1.7703532813475367e-05, "loss": 1.138, "step": 6223 }, { "epoch": 0.24352453243602787, "grad_norm": 0.0, "learning_rate": 1.7702724743141234e-05, "loss": 1.1891, "step": 6224 }, { "epoch": 0.2435636591282573, "grad_norm": 0.0, "learning_rate": 1.7701916549111003e-05, "loss": 1.1534, "step": 6225 }, { "epoch": 0.24360278582048672, "grad_norm": 0.0, "learning_rate": 1.770110823139764e-05, "loss": 1.3439, "step": 6226 }, { "epoch": 0.24364191251271616, "grad_norm": 0.0, "learning_rate": 1.7700299790014126e-05, "loss": 1.1132, "step": 6227 }, { "epoch": 0.2436810392049456, "grad_norm": 0.0, "learning_rate": 1.769949122497345e-05, "loss": 1.0887, "step": 6228 }, { "epoch": 0.24372016589717505, "grad_norm": 0.0, "learning_rate": 1.769868253628859e-05, "loss": 1.2522, "step": 6229 }, { "epoch": 0.24375929258940449, "grad_norm": 0.0, "learning_rate": 1.7697873723972536e-05, "loss": 1.3215, "step": 6230 }, { "epoch": 0.24379841928163393, "grad_norm": 0.0, "learning_rate": 1.7697064788038277e-05, "loss": 1.087, "step": 6231 }, { "epoch": 0.24383754597386337, "grad_norm": 0.0, "learning_rate": 1.76962557284988e-05, "loss": 1.2132, "step": 6232 }, { "epoch": 0.2438766726660928, "grad_norm": 0.0, "learning_rate": 1.7695446545367106e-05, "loss": 1.1538, "step": 6233 }, { "epoch": 0.24391579935832225, "grad_norm": 0.0, "learning_rate": 1.7694637238656178e-05, "loss": 1.1533, "step": 6234 }, { "epoch": 0.2439549260505517, "grad_norm": 0.0, "learning_rate": 1.769382780837902e-05, "loss": 1.0618, "step": 6235 }, { "epoch": 0.24399405274278113, "grad_norm": 0.0, "learning_rate": 1.7693018254548628e-05, "loss": 1.0541, "step": 6236 }, { "epoch": 0.24403317943501057, "grad_norm": 0.0, "learning_rate": 1.7692208577178003e-05, "loss": 1.1373, "step": 6237 }, { "epoch": 0.24407230612724, "grad_norm": 0.0, "learning_rate": 1.769139877628015e-05, "loss": 1.0011, "step": 6238 }, { "epoch": 0.24411143281946945, "grad_norm": 0.0, "learning_rate": 1.7690588851868066e-05, "loss": 0.9766, "step": 6239 }, { "epoch": 0.2441505595116989, "grad_norm": 0.0, "learning_rate": 1.7689778803954764e-05, "loss": 1.1626, "step": 6240 }, { "epoch": 0.2441896862039283, "grad_norm": 0.0, "learning_rate": 1.7688968632553246e-05, "loss": 1.1253, "step": 6241 }, { "epoch": 0.24422881289615775, "grad_norm": 0.0, "learning_rate": 1.7688158337676528e-05, "loss": 1.074, "step": 6242 }, { "epoch": 0.2442679395883872, "grad_norm": 0.0, "learning_rate": 1.7687347919337626e-05, "loss": 1.1115, "step": 6243 }, { "epoch": 0.24430706628061663, "grad_norm": 0.0, "learning_rate": 1.7686537377549546e-05, "loss": 1.1448, "step": 6244 }, { "epoch": 0.24434619297284607, "grad_norm": 0.0, "learning_rate": 1.7685726712325307e-05, "loss": 1.1189, "step": 6245 }, { "epoch": 0.2443853196650755, "grad_norm": 0.0, "learning_rate": 1.7684915923677928e-05, "loss": 1.1931, "step": 6246 }, { "epoch": 0.24442444635730495, "grad_norm": 0.0, "learning_rate": 1.768410501162043e-05, "loss": 1.1985, "step": 6247 }, { "epoch": 0.2444635730495344, "grad_norm": 0.0, "learning_rate": 1.7683293976165835e-05, "loss": 1.1696, "step": 6248 }, { "epoch": 0.24450269974176383, "grad_norm": 0.0, "learning_rate": 1.7682482817327163e-05, "loss": 1.1661, "step": 6249 }, { "epoch": 0.24454182643399328, "grad_norm": 0.0, "learning_rate": 1.7681671535117443e-05, "loss": 1.2922, "step": 6250 }, { "epoch": 0.24458095312622272, "grad_norm": 0.0, "learning_rate": 1.7680860129549708e-05, "loss": 1.1821, "step": 6251 }, { "epoch": 0.24462007981845216, "grad_norm": 0.0, "learning_rate": 1.768004860063698e-05, "loss": 1.1272, "step": 6252 }, { "epoch": 0.2446592065106816, "grad_norm": 0.0, "learning_rate": 1.76792369483923e-05, "loss": 1.1146, "step": 6253 }, { "epoch": 0.24469833320291104, "grad_norm": 0.0, "learning_rate": 1.7678425172828696e-05, "loss": 1.1445, "step": 6254 }, { "epoch": 0.24473745989514045, "grad_norm": 0.0, "learning_rate": 1.7677613273959204e-05, "loss": 1.1738, "step": 6255 }, { "epoch": 0.2447765865873699, "grad_norm": 0.0, "learning_rate": 1.7676801251796863e-05, "loss": 1.0743, "step": 6256 }, { "epoch": 0.24481571327959933, "grad_norm": 0.0, "learning_rate": 1.7675989106354712e-05, "loss": 1.1606, "step": 6257 }, { "epoch": 0.24485483997182877, "grad_norm": 0.0, "learning_rate": 1.76751768376458e-05, "loss": 1.1025, "step": 6258 }, { "epoch": 0.24489396666405822, "grad_norm": 0.0, "learning_rate": 1.767436444568316e-05, "loss": 0.9461, "step": 6259 }, { "epoch": 0.24493309335628766, "grad_norm": 0.0, "learning_rate": 1.7673551930479847e-05, "loss": 1.1462, "step": 6260 }, { "epoch": 0.2449722200485171, "grad_norm": 0.0, "learning_rate": 1.7672739292048904e-05, "loss": 1.1277, "step": 6261 }, { "epoch": 0.24501134674074654, "grad_norm": 0.0, "learning_rate": 1.7671926530403382e-05, "loss": 1.1764, "step": 6262 }, { "epoch": 0.24505047343297598, "grad_norm": 0.0, "learning_rate": 1.767111364555633e-05, "loss": 1.0243, "step": 6263 }, { "epoch": 0.24508960012520542, "grad_norm": 0.0, "learning_rate": 1.767030063752081e-05, "loss": 1.0712, "step": 6264 }, { "epoch": 0.24512872681743486, "grad_norm": 0.0, "learning_rate": 1.7669487506309874e-05, "loss": 1.0237, "step": 6265 }, { "epoch": 0.2451678535096643, "grad_norm": 0.0, "learning_rate": 1.766867425193658e-05, "loss": 1.1282, "step": 6266 }, { "epoch": 0.24520698020189374, "grad_norm": 0.0, "learning_rate": 1.766786087441398e-05, "loss": 1.1658, "step": 6267 }, { "epoch": 0.24524610689412318, "grad_norm": 0.0, "learning_rate": 1.766704737375515e-05, "loss": 1.1695, "step": 6268 }, { "epoch": 0.2452852335863526, "grad_norm": 0.0, "learning_rate": 1.7666233749973143e-05, "loss": 1.0594, "step": 6269 }, { "epoch": 0.24532436027858204, "grad_norm": 0.0, "learning_rate": 1.7665420003081028e-05, "loss": 1.1169, "step": 6270 }, { "epoch": 0.24536348697081148, "grad_norm": 0.0, "learning_rate": 1.7664606133091875e-05, "loss": 1.1439, "step": 6271 }, { "epoch": 0.24540261366304092, "grad_norm": 0.0, "learning_rate": 1.7663792140018747e-05, "loss": 1.2332, "step": 6272 }, { "epoch": 0.24544174035527036, "grad_norm": 0.0, "learning_rate": 1.7662978023874725e-05, "loss": 1.1619, "step": 6273 }, { "epoch": 0.2454808670474998, "grad_norm": 0.0, "learning_rate": 1.7662163784672878e-05, "loss": 1.1693, "step": 6274 }, { "epoch": 0.24551999373972924, "grad_norm": 0.0, "learning_rate": 1.7661349422426282e-05, "loss": 1.0717, "step": 6275 }, { "epoch": 0.24555912043195868, "grad_norm": 0.0, "learning_rate": 1.7660534937148014e-05, "loss": 1.0518, "step": 6276 }, { "epoch": 0.24559824712418812, "grad_norm": 0.0, "learning_rate": 1.7659720328851154e-05, "loss": 1.1641, "step": 6277 }, { "epoch": 0.24563737381641756, "grad_norm": 0.0, "learning_rate": 1.7658905597548783e-05, "loss": 1.2328, "step": 6278 }, { "epoch": 0.245676500508647, "grad_norm": 0.0, "learning_rate": 1.7658090743253985e-05, "loss": 1.2518, "step": 6279 }, { "epoch": 0.24571562720087645, "grad_norm": 0.0, "learning_rate": 1.7657275765979846e-05, "loss": 1.1356, "step": 6280 }, { "epoch": 0.2457547538931059, "grad_norm": 0.0, "learning_rate": 1.7656460665739453e-05, "loss": 1.2781, "step": 6281 }, { "epoch": 0.24579388058533533, "grad_norm": 0.0, "learning_rate": 1.76556454425459e-05, "loss": 1.0737, "step": 6282 }, { "epoch": 0.24583300727756474, "grad_norm": 0.0, "learning_rate": 1.7654830096412266e-05, "loss": 1.1458, "step": 6283 }, { "epoch": 0.24587213396979418, "grad_norm": 0.0, "learning_rate": 1.765401462735166e-05, "loss": 1.1665, "step": 6284 }, { "epoch": 0.24591126066202362, "grad_norm": 0.0, "learning_rate": 1.7653199035377167e-05, "loss": 1.2396, "step": 6285 }, { "epoch": 0.24595038735425306, "grad_norm": 0.0, "learning_rate": 1.765238332050189e-05, "loss": 1.1437, "step": 6286 }, { "epoch": 0.2459895140464825, "grad_norm": 0.0, "learning_rate": 1.7651567482738925e-05, "loss": 1.1379, "step": 6287 }, { "epoch": 0.24602864073871195, "grad_norm": 0.0, "learning_rate": 1.765075152210137e-05, "loss": 1.1055, "step": 6288 }, { "epoch": 0.2460677674309414, "grad_norm": 0.0, "learning_rate": 1.7649935438602338e-05, "loss": 1.1137, "step": 6289 }, { "epoch": 0.24610689412317083, "grad_norm": 0.0, "learning_rate": 1.7649119232254925e-05, "loss": 1.1068, "step": 6290 }, { "epoch": 0.24614602081540027, "grad_norm": 0.0, "learning_rate": 1.7648302903072244e-05, "loss": 1.1664, "step": 6291 }, { "epoch": 0.2461851475076297, "grad_norm": 0.0, "learning_rate": 1.76474864510674e-05, "loss": 1.1003, "step": 6292 }, { "epoch": 0.24622427419985915, "grad_norm": 0.0, "learning_rate": 1.764666987625351e-05, "loss": 1.0598, "step": 6293 }, { "epoch": 0.2462634008920886, "grad_norm": 0.0, "learning_rate": 1.764585317864368e-05, "loss": 1.1723, "step": 6294 }, { "epoch": 0.24630252758431803, "grad_norm": 0.0, "learning_rate": 1.7645036358251033e-05, "loss": 1.226, "step": 6295 }, { "epoch": 0.24634165427654747, "grad_norm": 0.0, "learning_rate": 1.764421941508868e-05, "loss": 1.1836, "step": 6296 }, { "epoch": 0.2463807809687769, "grad_norm": 0.0, "learning_rate": 1.764340234916974e-05, "loss": 1.1185, "step": 6297 }, { "epoch": 0.24641990766100633, "grad_norm": 0.0, "learning_rate": 1.7642585160507338e-05, "loss": 1.0761, "step": 6298 }, { "epoch": 0.24645903435323577, "grad_norm": 0.0, "learning_rate": 1.764176784911459e-05, "loss": 1.2067, "step": 6299 }, { "epoch": 0.2464981610454652, "grad_norm": 0.0, "learning_rate": 1.7640950415004635e-05, "loss": 1.2329, "step": 6300 }, { "epoch": 0.24653728773769465, "grad_norm": 0.0, "learning_rate": 1.7640132858190585e-05, "loss": 1.1879, "step": 6301 }, { "epoch": 0.2465764144299241, "grad_norm": 0.0, "learning_rate": 1.7639315178685575e-05, "loss": 1.0917, "step": 6302 }, { "epoch": 0.24661554112215353, "grad_norm": 0.0, "learning_rate": 1.7638497376502736e-05, "loss": 1.0922, "step": 6303 }, { "epoch": 0.24665466781438297, "grad_norm": 0.0, "learning_rate": 1.7637679451655204e-05, "loss": 1.1704, "step": 6304 }, { "epoch": 0.2466937945066124, "grad_norm": 0.0, "learning_rate": 1.7636861404156106e-05, "loss": 0.9128, "step": 6305 }, { "epoch": 0.24673292119884185, "grad_norm": 0.0, "learning_rate": 1.7636043234018587e-05, "loss": 1.0104, "step": 6306 }, { "epoch": 0.2467720478910713, "grad_norm": 0.0, "learning_rate": 1.763522494125578e-05, "loss": 1.1469, "step": 6307 }, { "epoch": 0.24681117458330074, "grad_norm": 0.0, "learning_rate": 1.763440652588083e-05, "loss": 1.139, "step": 6308 }, { "epoch": 0.24685030127553018, "grad_norm": 0.0, "learning_rate": 1.7633587987906874e-05, "loss": 1.1875, "step": 6309 }, { "epoch": 0.24688942796775962, "grad_norm": 0.0, "learning_rate": 1.7632769327347063e-05, "loss": 1.1397, "step": 6310 }, { "epoch": 0.24692855465998906, "grad_norm": 0.0, "learning_rate": 1.763195054421454e-05, "loss": 1.2203, "step": 6311 }, { "epoch": 0.24696768135221847, "grad_norm": 0.0, "learning_rate": 1.7631131638522458e-05, "loss": 1.0554, "step": 6312 }, { "epoch": 0.2470068080444479, "grad_norm": 0.0, "learning_rate": 1.7630312610283958e-05, "loss": 1.1062, "step": 6313 }, { "epoch": 0.24704593473667735, "grad_norm": 0.0, "learning_rate": 1.7629493459512205e-05, "loss": 1.2245, "step": 6314 }, { "epoch": 0.2470850614289068, "grad_norm": 0.0, "learning_rate": 1.7628674186220344e-05, "loss": 1.022, "step": 6315 }, { "epoch": 0.24712418812113623, "grad_norm": 0.0, "learning_rate": 1.7627854790421536e-05, "loss": 1.1621, "step": 6316 }, { "epoch": 0.24716331481336568, "grad_norm": 0.0, "learning_rate": 1.7627035272128936e-05, "loss": 1.0201, "step": 6317 }, { "epoch": 0.24720244150559512, "grad_norm": 0.0, "learning_rate": 1.762621563135571e-05, "loss": 1.3035, "step": 6318 }, { "epoch": 0.24724156819782456, "grad_norm": 0.0, "learning_rate": 1.7625395868115017e-05, "loss": 1.1981, "step": 6319 }, { "epoch": 0.247280694890054, "grad_norm": 0.0, "learning_rate": 1.762457598242002e-05, "loss": 1.2208, "step": 6320 }, { "epoch": 0.24731982158228344, "grad_norm": 0.0, "learning_rate": 1.7623755974283885e-05, "loss": 1.1589, "step": 6321 }, { "epoch": 0.24735894827451288, "grad_norm": 0.0, "learning_rate": 1.7622935843719784e-05, "loss": 1.1604, "step": 6322 }, { "epoch": 0.24739807496674232, "grad_norm": 0.0, "learning_rate": 1.7622115590740886e-05, "loss": 1.1751, "step": 6323 }, { "epoch": 0.24743720165897176, "grad_norm": 0.0, "learning_rate": 1.7621295215360363e-05, "loss": 1.2125, "step": 6324 }, { "epoch": 0.2474763283512012, "grad_norm": 0.0, "learning_rate": 1.7620474717591385e-05, "loss": 1.2377, "step": 6325 }, { "epoch": 0.24751545504343062, "grad_norm": 0.0, "learning_rate": 1.761965409744714e-05, "loss": 1.1553, "step": 6326 }, { "epoch": 0.24755458173566006, "grad_norm": 0.0, "learning_rate": 1.761883335494079e-05, "loss": 1.2333, "step": 6327 }, { "epoch": 0.2475937084278895, "grad_norm": 0.0, "learning_rate": 1.7618012490085527e-05, "loss": 1.1307, "step": 6328 }, { "epoch": 0.24763283512011894, "grad_norm": 0.0, "learning_rate": 1.761719150289453e-05, "loss": 1.036, "step": 6329 }, { "epoch": 0.24767196181234838, "grad_norm": 0.0, "learning_rate": 1.761637039338098e-05, "loss": 1.1365, "step": 6330 }, { "epoch": 0.24771108850457782, "grad_norm": 0.0, "learning_rate": 1.761554916155807e-05, "loss": 1.2065, "step": 6331 }, { "epoch": 0.24775021519680726, "grad_norm": 0.0, "learning_rate": 1.7614727807438975e-05, "loss": 1.0801, "step": 6332 }, { "epoch": 0.2477893418890367, "grad_norm": 0.0, "learning_rate": 1.76139063310369e-05, "loss": 1.1436, "step": 6333 }, { "epoch": 0.24782846858126614, "grad_norm": 0.0, "learning_rate": 1.7613084732365027e-05, "loss": 1.0488, "step": 6334 }, { "epoch": 0.24786759527349558, "grad_norm": 0.0, "learning_rate": 1.7612263011436554e-05, "loss": 1.0684, "step": 6335 }, { "epoch": 0.24790672196572502, "grad_norm": 0.0, "learning_rate": 1.7611441168264675e-05, "loss": 1.1748, "step": 6336 }, { "epoch": 0.24794584865795447, "grad_norm": 0.0, "learning_rate": 1.761061920286259e-05, "loss": 1.1545, "step": 6337 }, { "epoch": 0.2479849753501839, "grad_norm": 0.0, "learning_rate": 1.7609797115243495e-05, "loss": 1.2521, "step": 6338 }, { "epoch": 0.24802410204241335, "grad_norm": 0.0, "learning_rate": 1.7608974905420594e-05, "loss": 1.2075, "step": 6339 }, { "epoch": 0.24806322873464276, "grad_norm": 0.0, "learning_rate": 1.7608152573407093e-05, "loss": 1.1207, "step": 6340 }, { "epoch": 0.2481023554268722, "grad_norm": 0.0, "learning_rate": 1.7607330119216196e-05, "loss": 1.1496, "step": 6341 }, { "epoch": 0.24814148211910164, "grad_norm": 0.0, "learning_rate": 1.7606507542861106e-05, "loss": 1.2099, "step": 6342 }, { "epoch": 0.24818060881133108, "grad_norm": 0.0, "learning_rate": 1.760568484435504e-05, "loss": 1.1321, "step": 6343 }, { "epoch": 0.24821973550356052, "grad_norm": 0.0, "learning_rate": 1.7604862023711204e-05, "loss": 1.1121, "step": 6344 }, { "epoch": 0.24825886219578996, "grad_norm": 0.0, "learning_rate": 1.7604039080942814e-05, "loss": 1.1444, "step": 6345 }, { "epoch": 0.2482979888880194, "grad_norm": 0.0, "learning_rate": 1.7603216016063084e-05, "loss": 1.1604, "step": 6346 }, { "epoch": 0.24833711558024885, "grad_norm": 0.0, "learning_rate": 1.760239282908523e-05, "loss": 1.1429, "step": 6347 }, { "epoch": 0.2483762422724783, "grad_norm": 0.0, "learning_rate": 1.7601569520022477e-05, "loss": 1.1286, "step": 6348 }, { "epoch": 0.24841536896470773, "grad_norm": 0.0, "learning_rate": 1.7600746088888042e-05, "loss": 1.0516, "step": 6349 }, { "epoch": 0.24845449565693717, "grad_norm": 0.0, "learning_rate": 1.759992253569515e-05, "loss": 1.1088, "step": 6350 }, { "epoch": 0.2484936223491666, "grad_norm": 0.0, "learning_rate": 1.7599098860457024e-05, "loss": 1.1547, "step": 6351 }, { "epoch": 0.24853274904139605, "grad_norm": 0.0, "learning_rate": 1.7598275063186895e-05, "loss": 1.0536, "step": 6352 }, { "epoch": 0.2485718757336255, "grad_norm": 0.0, "learning_rate": 1.7597451143897987e-05, "loss": 1.2966, "step": 6353 }, { "epoch": 0.2486110024258549, "grad_norm": 0.0, "learning_rate": 1.7596627102603534e-05, "loss": 1.2429, "step": 6354 }, { "epoch": 0.24865012911808435, "grad_norm": 0.0, "learning_rate": 1.7595802939316767e-05, "loss": 1.0941, "step": 6355 }, { "epoch": 0.2486892558103138, "grad_norm": 0.0, "learning_rate": 1.7594978654050927e-05, "loss": 1.1721, "step": 6356 }, { "epoch": 0.24872838250254323, "grad_norm": 0.0, "learning_rate": 1.759415424681924e-05, "loss": 1.1299, "step": 6357 }, { "epoch": 0.24876750919477267, "grad_norm": 0.0, "learning_rate": 1.759332971763496e-05, "loss": 1.1166, "step": 6358 }, { "epoch": 0.2488066358870021, "grad_norm": 0.0, "learning_rate": 1.7592505066511316e-05, "loss": 1.1246, "step": 6359 }, { "epoch": 0.24884576257923155, "grad_norm": 0.0, "learning_rate": 1.7591680293461553e-05, "loss": 1.2477, "step": 6360 }, { "epoch": 0.248884889271461, "grad_norm": 0.0, "learning_rate": 1.759085539849892e-05, "loss": 1.1842, "step": 6361 }, { "epoch": 0.24892401596369043, "grad_norm": 0.0, "learning_rate": 1.759003038163666e-05, "loss": 1.1163, "step": 6362 }, { "epoch": 0.24896314265591987, "grad_norm": 0.0, "learning_rate": 1.7589205242888027e-05, "loss": 1.2144, "step": 6363 }, { "epoch": 0.2490022693481493, "grad_norm": 0.0, "learning_rate": 1.7588379982266262e-05, "loss": 1.2949, "step": 6364 }, { "epoch": 0.24904139604037875, "grad_norm": 0.0, "learning_rate": 1.7587554599784625e-05, "loss": 1.1249, "step": 6365 }, { "epoch": 0.2490805227326082, "grad_norm": 0.0, "learning_rate": 1.7586729095456366e-05, "loss": 1.1048, "step": 6366 }, { "epoch": 0.24911964942483764, "grad_norm": 0.0, "learning_rate": 1.758590346929475e-05, "loss": 1.1537, "step": 6367 }, { "epoch": 0.24915877611706708, "grad_norm": 0.0, "learning_rate": 1.7585077721313026e-05, "loss": 1.212, "step": 6368 }, { "epoch": 0.2491979028092965, "grad_norm": 0.0, "learning_rate": 1.758425185152446e-05, "loss": 1.3311, "step": 6369 }, { "epoch": 0.24923702950152593, "grad_norm": 0.0, "learning_rate": 1.7583425859942312e-05, "loss": 1.1987, "step": 6370 }, { "epoch": 0.24927615619375537, "grad_norm": 0.0, "learning_rate": 1.7582599746579848e-05, "loss": 1.146, "step": 6371 }, { "epoch": 0.2493152828859848, "grad_norm": 0.0, "learning_rate": 1.7581773511450336e-05, "loss": 1.0363, "step": 6372 }, { "epoch": 0.24935440957821425, "grad_norm": 0.0, "learning_rate": 1.7580947154567038e-05, "loss": 1.1771, "step": 6373 }, { "epoch": 0.2493935362704437, "grad_norm": 0.0, "learning_rate": 1.758012067594323e-05, "loss": 1.2597, "step": 6374 }, { "epoch": 0.24943266296267314, "grad_norm": 0.0, "learning_rate": 1.757929407559218e-05, "loss": 0.9346, "step": 6375 }, { "epoch": 0.24947178965490258, "grad_norm": 0.0, "learning_rate": 1.757846735352717e-05, "loss": 1.1714, "step": 6376 }, { "epoch": 0.24951091634713202, "grad_norm": 0.0, "learning_rate": 1.7577640509761465e-05, "loss": 1.1741, "step": 6377 }, { "epoch": 0.24955004303936146, "grad_norm": 0.0, "learning_rate": 1.757681354430835e-05, "loss": 1.1466, "step": 6378 }, { "epoch": 0.2495891697315909, "grad_norm": 0.0, "learning_rate": 1.7575986457181104e-05, "loss": 1.2261, "step": 6379 }, { "epoch": 0.24962829642382034, "grad_norm": 0.0, "learning_rate": 1.757515924839301e-05, "loss": 1.1629, "step": 6380 }, { "epoch": 0.24966742311604978, "grad_norm": 0.0, "learning_rate": 1.757433191795735e-05, "loss": 1.237, "step": 6381 }, { "epoch": 0.24970654980827922, "grad_norm": 0.0, "learning_rate": 1.757350446588741e-05, "loss": 1.1341, "step": 6382 }, { "epoch": 0.24974567650050863, "grad_norm": 0.0, "learning_rate": 1.757267689219648e-05, "loss": 1.0234, "step": 6383 }, { "epoch": 0.24978480319273808, "grad_norm": 0.0, "learning_rate": 1.7571849196897844e-05, "loss": 1.2313, "step": 6384 }, { "epoch": 0.24982392988496752, "grad_norm": 0.0, "learning_rate": 1.7571021380004804e-05, "loss": 1.1319, "step": 6385 }, { "epoch": 0.24986305657719696, "grad_norm": 0.0, "learning_rate": 1.7570193441530646e-05, "loss": 1.0327, "step": 6386 }, { "epoch": 0.2499021832694264, "grad_norm": 0.0, "learning_rate": 1.7569365381488666e-05, "loss": 1.2347, "step": 6387 }, { "epoch": 0.24994130996165584, "grad_norm": 0.0, "learning_rate": 1.7568537199892163e-05, "loss": 1.2358, "step": 6388 }, { "epoch": 0.24998043665388528, "grad_norm": 0.0, "learning_rate": 1.7567708896754435e-05, "loss": 1.1289, "step": 6389 }, { "epoch": 0.2500195633461147, "grad_norm": 0.0, "learning_rate": 1.7566880472088787e-05, "loss": 1.0883, "step": 6390 }, { "epoch": 0.25005869003834413, "grad_norm": 0.0, "learning_rate": 1.7566051925908517e-05, "loss": 1.1919, "step": 6391 }, { "epoch": 0.2500978167305736, "grad_norm": 0.0, "learning_rate": 1.756522325822694e-05, "loss": 1.2599, "step": 6392 }, { "epoch": 0.250136943422803, "grad_norm": 0.0, "learning_rate": 1.7564394469057357e-05, "loss": 1.1551, "step": 6393 }, { "epoch": 0.25017607011503246, "grad_norm": 0.0, "learning_rate": 1.756356555841307e-05, "loss": 1.2468, "step": 6394 }, { "epoch": 0.2502151968072619, "grad_norm": 0.0, "learning_rate": 1.7562736526307404e-05, "loss": 1.1333, "step": 6395 }, { "epoch": 0.25025432349949134, "grad_norm": 0.0, "learning_rate": 1.7561907372753665e-05, "loss": 1.2029, "step": 6396 }, { "epoch": 0.2502934501917208, "grad_norm": 0.0, "learning_rate": 1.7561078097765166e-05, "loss": 1.2497, "step": 6397 }, { "epoch": 0.2503325768839502, "grad_norm": 0.0, "learning_rate": 1.756024870135523e-05, "loss": 1.0197, "step": 6398 }, { "epoch": 0.25037170357617966, "grad_norm": 0.0, "learning_rate": 1.7559419183537175e-05, "loss": 1.1938, "step": 6399 }, { "epoch": 0.2504108302684091, "grad_norm": 0.0, "learning_rate": 1.755858954432432e-05, "loss": 1.1078, "step": 6400 }, { "epoch": 0.25044995696063854, "grad_norm": 0.0, "learning_rate": 1.7557759783729985e-05, "loss": 1.0722, "step": 6401 }, { "epoch": 0.250489083652868, "grad_norm": 0.0, "learning_rate": 1.7556929901767502e-05, "loss": 1.1418, "step": 6402 }, { "epoch": 0.2505282103450974, "grad_norm": 0.0, "learning_rate": 1.7556099898450192e-05, "loss": 1.0853, "step": 6403 }, { "epoch": 0.25056733703732686, "grad_norm": 0.0, "learning_rate": 1.7555269773791387e-05, "loss": 1.1152, "step": 6404 }, { "epoch": 0.2506064637295563, "grad_norm": 0.0, "learning_rate": 1.7554439527804413e-05, "loss": 1.0639, "step": 6405 }, { "epoch": 0.25064559042178575, "grad_norm": 0.0, "learning_rate": 1.7553609160502612e-05, "loss": 1.2256, "step": 6406 }, { "epoch": 0.2506847171140152, "grad_norm": 0.0, "learning_rate": 1.755277867189931e-05, "loss": 1.0486, "step": 6407 }, { "epoch": 0.25072384380624463, "grad_norm": 0.0, "learning_rate": 1.7551948062007845e-05, "loss": 1.2196, "step": 6408 }, { "epoch": 0.25076297049847407, "grad_norm": 0.0, "learning_rate": 1.755111733084156e-05, "loss": 1.0458, "step": 6409 }, { "epoch": 0.2508020971907035, "grad_norm": 0.0, "learning_rate": 1.7550286478413792e-05, "loss": 1.1657, "step": 6410 }, { "epoch": 0.25084122388293295, "grad_norm": 0.0, "learning_rate": 1.7549455504737886e-05, "loss": 1.0806, "step": 6411 }, { "epoch": 0.2508803505751624, "grad_norm": 0.0, "learning_rate": 1.7548624409827184e-05, "loss": 1.1355, "step": 6412 }, { "epoch": 0.25091947726739183, "grad_norm": 0.0, "learning_rate": 1.754779319369503e-05, "loss": 1.1073, "step": 6413 }, { "epoch": 0.2509586039596213, "grad_norm": 0.0, "learning_rate": 1.754696185635478e-05, "loss": 1.2321, "step": 6414 }, { "epoch": 0.2509977306518507, "grad_norm": 0.0, "learning_rate": 1.7546130397819778e-05, "loss": 1.255, "step": 6415 }, { "epoch": 0.25103685734408016, "grad_norm": 0.0, "learning_rate": 1.7545298818103375e-05, "loss": 1.226, "step": 6416 }, { "epoch": 0.2510759840363096, "grad_norm": 0.0, "learning_rate": 1.754446711721893e-05, "loss": 1.1137, "step": 6417 }, { "epoch": 0.251115110728539, "grad_norm": 0.0, "learning_rate": 1.7543635295179796e-05, "loss": 1.1506, "step": 6418 }, { "epoch": 0.2511542374207684, "grad_norm": 0.0, "learning_rate": 1.754280335199933e-05, "loss": 1.2497, "step": 6419 }, { "epoch": 0.25119336411299786, "grad_norm": 0.0, "learning_rate": 1.7541971287690895e-05, "loss": 1.1844, "step": 6420 }, { "epoch": 0.2512324908052273, "grad_norm": 0.0, "learning_rate": 1.7541139102267855e-05, "loss": 1.2166, "step": 6421 }, { "epoch": 0.25127161749745675, "grad_norm": 0.0, "learning_rate": 1.7540306795743566e-05, "loss": 1.1566, "step": 6422 }, { "epoch": 0.2513107441896862, "grad_norm": 0.0, "learning_rate": 1.75394743681314e-05, "loss": 1.0967, "step": 6423 }, { "epoch": 0.2513498708819156, "grad_norm": 0.0, "learning_rate": 1.7538641819444722e-05, "loss": 0.9797, "step": 6424 }, { "epoch": 0.25138899757414507, "grad_norm": 0.0, "learning_rate": 1.7537809149696907e-05, "loss": 1.0384, "step": 6425 }, { "epoch": 0.2514281242663745, "grad_norm": 0.0, "learning_rate": 1.753697635890132e-05, "loss": 1.1712, "step": 6426 }, { "epoch": 0.25146725095860395, "grad_norm": 0.0, "learning_rate": 1.7536143447071336e-05, "loss": 1.0718, "step": 6427 }, { "epoch": 0.2515063776508334, "grad_norm": 0.0, "learning_rate": 1.7535310414220333e-05, "loss": 1.1357, "step": 6428 }, { "epoch": 0.25154550434306283, "grad_norm": 0.0, "learning_rate": 1.7534477260361685e-05, "loss": 1.0759, "step": 6429 }, { "epoch": 0.25158463103529227, "grad_norm": 0.0, "learning_rate": 1.7533643985508775e-05, "loss": 1.126, "step": 6430 }, { "epoch": 0.2516237577275217, "grad_norm": 0.0, "learning_rate": 1.7532810589674983e-05, "loss": 1.1879, "step": 6431 }, { "epoch": 0.25166288441975115, "grad_norm": 0.0, "learning_rate": 1.753197707287369e-05, "loss": 1.1602, "step": 6432 }, { "epoch": 0.2517020111119806, "grad_norm": 0.0, "learning_rate": 1.7531143435118284e-05, "loss": 1.173, "step": 6433 }, { "epoch": 0.25174113780421004, "grad_norm": 0.0, "learning_rate": 1.7530309676422157e-05, "loss": 1.2012, "step": 6434 }, { "epoch": 0.2517802644964395, "grad_norm": 0.0, "learning_rate": 1.7529475796798686e-05, "loss": 1.2217, "step": 6435 }, { "epoch": 0.2518193911886689, "grad_norm": 0.0, "learning_rate": 1.7528641796261273e-05, "loss": 1.1727, "step": 6436 }, { "epoch": 0.25185851788089836, "grad_norm": 0.0, "learning_rate": 1.7527807674823303e-05, "loss": 0.9862, "step": 6437 }, { "epoch": 0.2518976445731278, "grad_norm": 0.0, "learning_rate": 1.7526973432498177e-05, "loss": 1.1044, "step": 6438 }, { "epoch": 0.25193677126535724, "grad_norm": 0.0, "learning_rate": 1.7526139069299287e-05, "loss": 1.1704, "step": 6439 }, { "epoch": 0.2519758979575867, "grad_norm": 0.0, "learning_rate": 1.7525304585240034e-05, "loss": 1.1804, "step": 6440 }, { "epoch": 0.2520150246498161, "grad_norm": 0.0, "learning_rate": 1.7524469980333822e-05, "loss": 1.0767, "step": 6441 }, { "epoch": 0.25205415134204556, "grad_norm": 0.0, "learning_rate": 1.752363525459405e-05, "loss": 1.2739, "step": 6442 }, { "epoch": 0.252093278034275, "grad_norm": 0.0, "learning_rate": 1.7522800408034125e-05, "loss": 1.0349, "step": 6443 }, { "epoch": 0.25213240472650444, "grad_norm": 0.0, "learning_rate": 1.7521965440667448e-05, "loss": 1.2247, "step": 6444 }, { "epoch": 0.2521715314187339, "grad_norm": 0.0, "learning_rate": 1.7521130352507434e-05, "loss": 1.155, "step": 6445 }, { "epoch": 0.2522106581109633, "grad_norm": 0.0, "learning_rate": 1.7520295143567492e-05, "loss": 1.1685, "step": 6446 }, { "epoch": 0.2522497848031927, "grad_norm": 0.0, "learning_rate": 1.751945981386103e-05, "loss": 1.2903, "step": 6447 }, { "epoch": 0.25228891149542215, "grad_norm": 0.0, "learning_rate": 1.751862436340147e-05, "loss": 1.0691, "step": 6448 }, { "epoch": 0.2523280381876516, "grad_norm": 0.0, "learning_rate": 1.7517788792202225e-05, "loss": 1.1782, "step": 6449 }, { "epoch": 0.25236716487988103, "grad_norm": 0.0, "learning_rate": 1.7516953100276707e-05, "loss": 1.1552, "step": 6450 }, { "epoch": 0.2524062915721105, "grad_norm": 0.0, "learning_rate": 1.7516117287638345e-05, "loss": 1.0809, "step": 6451 }, { "epoch": 0.2524454182643399, "grad_norm": 0.0, "learning_rate": 1.7515281354300556e-05, "loss": 1.1414, "step": 6452 }, { "epoch": 0.25248454495656936, "grad_norm": 0.0, "learning_rate": 1.7514445300276767e-05, "loss": 1.0562, "step": 6453 }, { "epoch": 0.2525236716487988, "grad_norm": 0.0, "learning_rate": 1.75136091255804e-05, "loss": 0.9898, "step": 6454 }, { "epoch": 0.25256279834102824, "grad_norm": 0.0, "learning_rate": 1.751277283022489e-05, "loss": 1.2126, "step": 6455 }, { "epoch": 0.2526019250332577, "grad_norm": 0.0, "learning_rate": 1.7511936414223658e-05, "loss": 1.0771, "step": 6456 }, { "epoch": 0.2526410517254871, "grad_norm": 0.0, "learning_rate": 1.751109987759014e-05, "loss": 1.0921, "step": 6457 }, { "epoch": 0.25268017841771656, "grad_norm": 0.0, "learning_rate": 1.7510263220337772e-05, "loss": 1.017, "step": 6458 }, { "epoch": 0.252719305109946, "grad_norm": 0.0, "learning_rate": 1.750942644247999e-05, "loss": 1.2403, "step": 6459 }, { "epoch": 0.25275843180217544, "grad_norm": 0.0, "learning_rate": 1.7508589544030224e-05, "loss": 1.1357, "step": 6460 }, { "epoch": 0.2527975584944049, "grad_norm": 0.0, "learning_rate": 1.7507752525001924e-05, "loss": 1.095, "step": 6461 }, { "epoch": 0.2528366851866343, "grad_norm": 0.0, "learning_rate": 1.7506915385408523e-05, "loss": 1.2614, "step": 6462 }, { "epoch": 0.25287581187886377, "grad_norm": 0.0, "learning_rate": 1.750607812526347e-05, "loss": 1.1393, "step": 6463 }, { "epoch": 0.2529149385710932, "grad_norm": 0.0, "learning_rate": 1.75052407445802e-05, "loss": 1.1957, "step": 6464 }, { "epoch": 0.25295406526332265, "grad_norm": 0.0, "learning_rate": 1.750440324337218e-05, "loss": 1.0427, "step": 6465 }, { "epoch": 0.2529931919555521, "grad_norm": 0.0, "learning_rate": 1.750356562165284e-05, "loss": 1.0795, "step": 6466 }, { "epoch": 0.25303231864778153, "grad_norm": 0.0, "learning_rate": 1.750272787943564e-05, "loss": 1.194, "step": 6467 }, { "epoch": 0.25307144534001097, "grad_norm": 0.0, "learning_rate": 1.7501890016734032e-05, "loss": 1.1387, "step": 6468 }, { "epoch": 0.2531105720322404, "grad_norm": 0.0, "learning_rate": 1.750105203356147e-05, "loss": 1.1731, "step": 6469 }, { "epoch": 0.25314969872446985, "grad_norm": 0.0, "learning_rate": 1.7500213929931412e-05, "loss": 1.1523, "step": 6470 }, { "epoch": 0.2531888254166993, "grad_norm": 0.0, "learning_rate": 1.7499375705857318e-05, "loss": 1.0591, "step": 6471 }, { "epoch": 0.25322795210892873, "grad_norm": 0.0, "learning_rate": 1.7498537361352644e-05, "loss": 1.093, "step": 6472 }, { "epoch": 0.2532670788011582, "grad_norm": 0.0, "learning_rate": 1.749769889643086e-05, "loss": 1.0653, "step": 6473 }, { "epoch": 0.2533062054933876, "grad_norm": 0.0, "learning_rate": 1.7496860311105426e-05, "loss": 1.1787, "step": 6474 }, { "epoch": 0.253345332185617, "grad_norm": 0.0, "learning_rate": 1.7496021605389806e-05, "loss": 1.2814, "step": 6475 }, { "epoch": 0.25338445887784644, "grad_norm": 0.0, "learning_rate": 1.749518277929748e-05, "loss": 1.0345, "step": 6476 }, { "epoch": 0.2534235855700759, "grad_norm": 0.0, "learning_rate": 1.7494343832841903e-05, "loss": 1.1934, "step": 6477 }, { "epoch": 0.2534627122623053, "grad_norm": 0.0, "learning_rate": 1.749350476603656e-05, "loss": 1.1375, "step": 6478 }, { "epoch": 0.25350183895453476, "grad_norm": 0.0, "learning_rate": 1.7492665578894917e-05, "loss": 1.209, "step": 6479 }, { "epoch": 0.2535409656467642, "grad_norm": 0.0, "learning_rate": 1.7491826271430453e-05, "loss": 1.0728, "step": 6480 }, { "epoch": 0.25358009233899365, "grad_norm": 0.0, "learning_rate": 1.7490986843656648e-05, "loss": 1.1603, "step": 6481 }, { "epoch": 0.2536192190312231, "grad_norm": 0.0, "learning_rate": 1.7490147295586983e-05, "loss": 1.1468, "step": 6482 }, { "epoch": 0.2536583457234525, "grad_norm": 0.0, "learning_rate": 1.7489307627234936e-05, "loss": 0.9748, "step": 6483 }, { "epoch": 0.25369747241568197, "grad_norm": 0.0, "learning_rate": 1.7488467838613995e-05, "loss": 1.0875, "step": 6484 }, { "epoch": 0.2537365991079114, "grad_norm": 0.0, "learning_rate": 1.748762792973764e-05, "loss": 1.1025, "step": 6485 }, { "epoch": 0.25377572580014085, "grad_norm": 0.0, "learning_rate": 1.7486787900619366e-05, "loss": 1.1421, "step": 6486 }, { "epoch": 0.2538148524923703, "grad_norm": 0.0, "learning_rate": 1.7485947751272657e-05, "loss": 1.1947, "step": 6487 }, { "epoch": 0.25385397918459973, "grad_norm": 0.0, "learning_rate": 1.7485107481711014e-05, "loss": 1.0044, "step": 6488 }, { "epoch": 0.2538931058768292, "grad_norm": 0.0, "learning_rate": 1.748426709194792e-05, "loss": 1.1182, "step": 6489 }, { "epoch": 0.2539322325690586, "grad_norm": 0.0, "learning_rate": 1.7483426581996874e-05, "loss": 0.9367, "step": 6490 }, { "epoch": 0.25397135926128805, "grad_norm": 0.0, "learning_rate": 1.7482585951871374e-05, "loss": 1.1769, "step": 6491 }, { "epoch": 0.2540104859535175, "grad_norm": 0.0, "learning_rate": 1.748174520158492e-05, "loss": 1.0826, "step": 6492 }, { "epoch": 0.25404961264574694, "grad_norm": 0.0, "learning_rate": 1.7480904331151015e-05, "loss": 1.2732, "step": 6493 }, { "epoch": 0.2540887393379764, "grad_norm": 0.0, "learning_rate": 1.748006334058316e-05, "loss": 1.2517, "step": 6494 }, { "epoch": 0.2541278660302058, "grad_norm": 0.0, "learning_rate": 1.7479222229894863e-05, "loss": 1.2352, "step": 6495 }, { "epoch": 0.25416699272243526, "grad_norm": 0.0, "learning_rate": 1.7478380999099624e-05, "loss": 1.2178, "step": 6496 }, { "epoch": 0.2542061194146647, "grad_norm": 0.0, "learning_rate": 1.747753964821096e-05, "loss": 1.088, "step": 6497 }, { "epoch": 0.25424524610689414, "grad_norm": 0.0, "learning_rate": 1.747669817724238e-05, "loss": 1.1415, "step": 6498 }, { "epoch": 0.2542843727991236, "grad_norm": 0.0, "learning_rate": 1.7475856586207396e-05, "loss": 1.1464, "step": 6499 }, { "epoch": 0.254323499491353, "grad_norm": 0.0, "learning_rate": 1.747501487511952e-05, "loss": 1.0598, "step": 6500 }, { "epoch": 0.25436262618358246, "grad_norm": 0.0, "learning_rate": 1.7474173043992278e-05, "loss": 1.155, "step": 6501 }, { "epoch": 0.2544017528758119, "grad_norm": 0.0, "learning_rate": 1.7473331092839177e-05, "loss": 1.2058, "step": 6502 }, { "epoch": 0.25444087956804134, "grad_norm": 0.0, "learning_rate": 1.7472489021673743e-05, "loss": 1.0725, "step": 6503 }, { "epoch": 0.25448000626027073, "grad_norm": 0.0, "learning_rate": 1.7471646830509503e-05, "loss": 1.1925, "step": 6504 }, { "epoch": 0.25451913295250017, "grad_norm": 0.0, "learning_rate": 1.7470804519359973e-05, "loss": 1.1204, "step": 6505 }, { "epoch": 0.2545582596447296, "grad_norm": 0.0, "learning_rate": 1.7469962088238688e-05, "loss": 1.2126, "step": 6506 }, { "epoch": 0.25459738633695905, "grad_norm": 0.0, "learning_rate": 1.7469119537159168e-05, "loss": 1.1418, "step": 6507 }, { "epoch": 0.2546365130291885, "grad_norm": 0.0, "learning_rate": 1.746827686613495e-05, "loss": 1.1087, "step": 6508 }, { "epoch": 0.25467563972141793, "grad_norm": 0.0, "learning_rate": 1.7467434075179562e-05, "loss": 1.188, "step": 6509 }, { "epoch": 0.2547147664136474, "grad_norm": 0.0, "learning_rate": 1.7466591164306545e-05, "loss": 1.1553, "step": 6510 }, { "epoch": 0.2547538931058768, "grad_norm": 0.0, "learning_rate": 1.7465748133529424e-05, "loss": 1.128, "step": 6511 }, { "epoch": 0.25479301979810626, "grad_norm": 0.0, "learning_rate": 1.7464904982861745e-05, "loss": 1.195, "step": 6512 }, { "epoch": 0.2548321464903357, "grad_norm": 0.0, "learning_rate": 1.7464061712317047e-05, "loss": 1.2294, "step": 6513 }, { "epoch": 0.25487127318256514, "grad_norm": 0.0, "learning_rate": 1.746321832190887e-05, "loss": 1.0941, "step": 6514 }, { "epoch": 0.2549103998747946, "grad_norm": 0.0, "learning_rate": 1.746237481165076e-05, "loss": 1.1315, "step": 6515 }, { "epoch": 0.254949526567024, "grad_norm": 0.0, "learning_rate": 1.746153118155626e-05, "loss": 1.1624, "step": 6516 }, { "epoch": 0.25498865325925346, "grad_norm": 0.0, "learning_rate": 1.746068743163892e-05, "loss": 1.0897, "step": 6517 }, { "epoch": 0.2550277799514829, "grad_norm": 0.0, "learning_rate": 1.745984356191229e-05, "loss": 1.1155, "step": 6518 }, { "epoch": 0.25506690664371234, "grad_norm": 0.0, "learning_rate": 1.7458999572389918e-05, "loss": 1.1443, "step": 6519 }, { "epoch": 0.2551060333359418, "grad_norm": 0.0, "learning_rate": 1.745815546308536e-05, "loss": 1.126, "step": 6520 }, { "epoch": 0.2551451600281712, "grad_norm": 0.0, "learning_rate": 1.7457311234012167e-05, "loss": 1.2877, "step": 6521 }, { "epoch": 0.25518428672040067, "grad_norm": 0.0, "learning_rate": 1.7456466885183906e-05, "loss": 1.1671, "step": 6522 }, { "epoch": 0.2552234134126301, "grad_norm": 0.0, "learning_rate": 1.7455622416614127e-05, "loss": 1.1263, "step": 6523 }, { "epoch": 0.25526254010485955, "grad_norm": 0.0, "learning_rate": 1.7454777828316397e-05, "loss": 0.9959, "step": 6524 }, { "epoch": 0.255301666797089, "grad_norm": 0.0, "learning_rate": 1.7453933120304275e-05, "loss": 1.1888, "step": 6525 }, { "epoch": 0.25534079348931843, "grad_norm": 0.0, "learning_rate": 1.7453088292591327e-05, "loss": 1.0827, "step": 6526 }, { "epoch": 0.25537992018154787, "grad_norm": 0.0, "learning_rate": 1.745224334519112e-05, "loss": 1.0984, "step": 6527 }, { "epoch": 0.2554190468737773, "grad_norm": 0.0, "learning_rate": 1.7451398278117225e-05, "loss": 1.1892, "step": 6528 }, { "epoch": 0.25545817356600675, "grad_norm": 0.0, "learning_rate": 1.745055309138321e-05, "loss": 1.1556, "step": 6529 }, { "epoch": 0.2554973002582362, "grad_norm": 0.0, "learning_rate": 1.7449707785002647e-05, "loss": 1.0412, "step": 6530 }, { "epoch": 0.25553642695046563, "grad_norm": 0.0, "learning_rate": 1.7448862358989115e-05, "loss": 1.137, "step": 6531 }, { "epoch": 0.255575553642695, "grad_norm": 0.0, "learning_rate": 1.744801681335618e-05, "loss": 1.2047, "step": 6532 }, { "epoch": 0.25561468033492446, "grad_norm": 0.0, "learning_rate": 1.7447171148117435e-05, "loss": 1.1118, "step": 6533 }, { "epoch": 0.2556538070271539, "grad_norm": 0.0, "learning_rate": 1.7446325363286452e-05, "loss": 1.187, "step": 6534 }, { "epoch": 0.25569293371938334, "grad_norm": 0.0, "learning_rate": 1.7445479458876816e-05, "loss": 1.2589, "step": 6535 }, { "epoch": 0.2557320604116128, "grad_norm": 0.0, "learning_rate": 1.7444633434902107e-05, "loss": 1.1359, "step": 6536 }, { "epoch": 0.2557711871038422, "grad_norm": 0.0, "learning_rate": 1.744378729137591e-05, "loss": 1.0613, "step": 6537 }, { "epoch": 0.25581031379607166, "grad_norm": 0.0, "learning_rate": 1.7442941028311823e-05, "loss": 1.1824, "step": 6538 }, { "epoch": 0.2558494404883011, "grad_norm": 0.0, "learning_rate": 1.7442094645723425e-05, "loss": 1.0662, "step": 6539 }, { "epoch": 0.25588856718053055, "grad_norm": 0.0, "learning_rate": 1.7441248143624316e-05, "loss": 1.0521, "step": 6540 }, { "epoch": 0.25592769387276, "grad_norm": 0.0, "learning_rate": 1.744040152202809e-05, "loss": 1.1202, "step": 6541 }, { "epoch": 0.25596682056498943, "grad_norm": 0.0, "learning_rate": 1.743955478094833e-05, "loss": 1.113, "step": 6542 }, { "epoch": 0.25600594725721887, "grad_norm": 0.0, "learning_rate": 1.7438707920398647e-05, "loss": 1.0189, "step": 6543 }, { "epoch": 0.2560450739494483, "grad_norm": 0.0, "learning_rate": 1.7437860940392636e-05, "loss": 1.1888, "step": 6544 }, { "epoch": 0.25608420064167775, "grad_norm": 0.0, "learning_rate": 1.7437013840943897e-05, "loss": 1.1143, "step": 6545 }, { "epoch": 0.2561233273339072, "grad_norm": 0.0, "learning_rate": 1.7436166622066036e-05, "loss": 1.089, "step": 6546 }, { "epoch": 0.25616245402613663, "grad_norm": 0.0, "learning_rate": 1.7435319283772657e-05, "loss": 1.1428, "step": 6547 }, { "epoch": 0.2562015807183661, "grad_norm": 0.0, "learning_rate": 1.7434471826077367e-05, "loss": 1.1072, "step": 6548 }, { "epoch": 0.2562407074105955, "grad_norm": 0.0, "learning_rate": 1.7433624248993776e-05, "loss": 1.0966, "step": 6549 }, { "epoch": 0.25627983410282495, "grad_norm": 0.0, "learning_rate": 1.7432776552535493e-05, "loss": 1.1167, "step": 6550 }, { "epoch": 0.2563189607950544, "grad_norm": 0.0, "learning_rate": 1.7431928736716133e-05, "loss": 1.1073, "step": 6551 }, { "epoch": 0.25635808748728384, "grad_norm": 0.0, "learning_rate": 1.7431080801549313e-05, "loss": 1.1427, "step": 6552 }, { "epoch": 0.2563972141795133, "grad_norm": 0.0, "learning_rate": 1.743023274704864e-05, "loss": 1.0983, "step": 6553 }, { "epoch": 0.2564363408717427, "grad_norm": 0.0, "learning_rate": 1.7429384573227747e-05, "loss": 1.1915, "step": 6554 }, { "epoch": 0.25647546756397216, "grad_norm": 0.0, "learning_rate": 1.7428536280100243e-05, "loss": 1.0701, "step": 6555 }, { "epoch": 0.2565145942562016, "grad_norm": 0.0, "learning_rate": 1.7427687867679755e-05, "loss": 1.0816, "step": 6556 }, { "epoch": 0.25655372094843104, "grad_norm": 0.0, "learning_rate": 1.7426839335979912e-05, "loss": 1.0557, "step": 6557 }, { "epoch": 0.2565928476406605, "grad_norm": 0.0, "learning_rate": 1.742599068501433e-05, "loss": 1.1256, "step": 6558 }, { "epoch": 0.2566319743328899, "grad_norm": 0.0, "learning_rate": 1.7425141914796646e-05, "loss": 1.121, "step": 6559 }, { "epoch": 0.25667110102511936, "grad_norm": 0.0, "learning_rate": 1.7424293025340488e-05, "loss": 1.0976, "step": 6560 }, { "epoch": 0.25671022771734875, "grad_norm": 0.0, "learning_rate": 1.742344401665948e-05, "loss": 1.2392, "step": 6561 }, { "epoch": 0.2567493544095782, "grad_norm": 0.0, "learning_rate": 1.742259488876727e-05, "loss": 1.1385, "step": 6562 }, { "epoch": 0.25678848110180763, "grad_norm": 0.0, "learning_rate": 1.7421745641677486e-05, "loss": 1.1395, "step": 6563 }, { "epoch": 0.25682760779403707, "grad_norm": 0.0, "learning_rate": 1.7420896275403768e-05, "loss": 1.0833, "step": 6564 }, { "epoch": 0.2568667344862665, "grad_norm": 0.0, "learning_rate": 1.7420046789959754e-05, "loss": 1.1816, "step": 6565 }, { "epoch": 0.25690586117849595, "grad_norm": 0.0, "learning_rate": 1.7419197185359086e-05, "loss": 0.9876, "step": 6566 }, { "epoch": 0.2569449878707254, "grad_norm": 0.0, "learning_rate": 1.7418347461615407e-05, "loss": 1.0669, "step": 6567 }, { "epoch": 0.25698411456295484, "grad_norm": 0.0, "learning_rate": 1.7417497618742364e-05, "loss": 1.114, "step": 6568 }, { "epoch": 0.2570232412551843, "grad_norm": 0.0, "learning_rate": 1.7416647656753603e-05, "loss": 1.1224, "step": 6569 }, { "epoch": 0.2570623679474137, "grad_norm": 0.0, "learning_rate": 1.7415797575662777e-05, "loss": 1.1028, "step": 6570 }, { "epoch": 0.25710149463964316, "grad_norm": 0.0, "learning_rate": 1.741494737548353e-05, "loss": 1.0261, "step": 6571 }, { "epoch": 0.2571406213318726, "grad_norm": 0.0, "learning_rate": 1.7414097056229525e-05, "loss": 1.0966, "step": 6572 }, { "epoch": 0.25717974802410204, "grad_norm": 0.0, "learning_rate": 1.7413246617914408e-05, "loss": 1.1824, "step": 6573 }, { "epoch": 0.2572188747163315, "grad_norm": 0.0, "learning_rate": 1.7412396060551843e-05, "loss": 1.1059, "step": 6574 }, { "epoch": 0.2572580014085609, "grad_norm": 0.0, "learning_rate": 1.741154538415548e-05, "loss": 1.296, "step": 6575 }, { "epoch": 0.25729712810079036, "grad_norm": 0.0, "learning_rate": 1.741069458873899e-05, "loss": 1.1234, "step": 6576 }, { "epoch": 0.2573362547930198, "grad_norm": 0.0, "learning_rate": 1.7409843674316026e-05, "loss": 1.0905, "step": 6577 }, { "epoch": 0.25737538148524924, "grad_norm": 0.0, "learning_rate": 1.7408992640900263e-05, "loss": 1.1157, "step": 6578 }, { "epoch": 0.2574145081774787, "grad_norm": 0.0, "learning_rate": 1.740814148850536e-05, "loss": 1.1, "step": 6579 }, { "epoch": 0.2574536348697081, "grad_norm": 0.0, "learning_rate": 1.7407290217144988e-05, "loss": 1.1888, "step": 6580 }, { "epoch": 0.25749276156193757, "grad_norm": 0.0, "learning_rate": 1.7406438826832818e-05, "loss": 1.2493, "step": 6581 }, { "epoch": 0.257531888254167, "grad_norm": 0.0, "learning_rate": 1.740558731758252e-05, "loss": 1.1855, "step": 6582 }, { "epoch": 0.25757101494639645, "grad_norm": 0.0, "learning_rate": 1.7404735689407768e-05, "loss": 0.9633, "step": 6583 }, { "epoch": 0.2576101416386259, "grad_norm": 0.0, "learning_rate": 1.740388394232224e-05, "loss": 1.0888, "step": 6584 }, { "epoch": 0.25764926833085533, "grad_norm": 0.0, "learning_rate": 1.7403032076339617e-05, "loss": 1.1726, "step": 6585 }, { "epoch": 0.25768839502308477, "grad_norm": 0.0, "learning_rate": 1.7402180091473574e-05, "loss": 1.1573, "step": 6586 }, { "epoch": 0.2577275217153142, "grad_norm": 0.0, "learning_rate": 1.740132798773779e-05, "loss": 1.3249, "step": 6587 }, { "epoch": 0.25776664840754365, "grad_norm": 0.0, "learning_rate": 1.7400475765145958e-05, "loss": 1.0158, "step": 6588 }, { "epoch": 0.25780577509977304, "grad_norm": 0.0, "learning_rate": 1.7399623423711757e-05, "loss": 1.1605, "step": 6589 }, { "epoch": 0.2578449017920025, "grad_norm": 0.0, "learning_rate": 1.7398770963448872e-05, "loss": 1.0251, "step": 6590 }, { "epoch": 0.2578840284842319, "grad_norm": 0.0, "learning_rate": 1.7397918384371003e-05, "loss": 1.1491, "step": 6591 }, { "epoch": 0.25792315517646136, "grad_norm": 0.0, "learning_rate": 1.739706568649183e-05, "loss": 1.072, "step": 6592 }, { "epoch": 0.2579622818686908, "grad_norm": 0.0, "learning_rate": 1.7396212869825053e-05, "loss": 1.1005, "step": 6593 }, { "epoch": 0.25800140856092024, "grad_norm": 0.0, "learning_rate": 1.7395359934384366e-05, "loss": 1.1695, "step": 6594 }, { "epoch": 0.2580405352531497, "grad_norm": 0.0, "learning_rate": 1.7394506880183463e-05, "loss": 1.1796, "step": 6595 }, { "epoch": 0.2580796619453791, "grad_norm": 0.0, "learning_rate": 1.7393653707236045e-05, "loss": 1.1927, "step": 6596 }, { "epoch": 0.25811878863760856, "grad_norm": 0.0, "learning_rate": 1.7392800415555818e-05, "loss": 1.0897, "step": 6597 }, { "epoch": 0.258157915329838, "grad_norm": 0.0, "learning_rate": 1.7391947005156476e-05, "loss": 1.1991, "step": 6598 }, { "epoch": 0.25819704202206745, "grad_norm": 0.0, "learning_rate": 1.7391093476051728e-05, "loss": 1.1423, "step": 6599 }, { "epoch": 0.2582361687142969, "grad_norm": 0.0, "learning_rate": 1.7390239828255282e-05, "loss": 1.1162, "step": 6600 }, { "epoch": 0.25827529540652633, "grad_norm": 0.0, "learning_rate": 1.7389386061780843e-05, "loss": 1.2562, "step": 6601 }, { "epoch": 0.25831442209875577, "grad_norm": 0.0, "learning_rate": 1.7388532176642127e-05, "loss": 1.0165, "step": 6602 }, { "epoch": 0.2583535487909852, "grad_norm": 0.0, "learning_rate": 1.7387678172852837e-05, "loss": 1.1005, "step": 6603 }, { "epoch": 0.25839267548321465, "grad_norm": 0.0, "learning_rate": 1.7386824050426697e-05, "loss": 1.0551, "step": 6604 }, { "epoch": 0.2584318021754441, "grad_norm": 0.0, "learning_rate": 1.7385969809377418e-05, "loss": 1.183, "step": 6605 }, { "epoch": 0.25847092886767353, "grad_norm": 0.0, "learning_rate": 1.7385115449718718e-05, "loss": 1.3622, "step": 6606 }, { "epoch": 0.258510055559903, "grad_norm": 0.0, "learning_rate": 1.738426097146432e-05, "loss": 1.0568, "step": 6607 }, { "epoch": 0.2585491822521324, "grad_norm": 0.0, "learning_rate": 1.738340637462794e-05, "loss": 1.1768, "step": 6608 }, { "epoch": 0.25858830894436186, "grad_norm": 0.0, "learning_rate": 1.738255165922331e-05, "loss": 1.0323, "step": 6609 }, { "epoch": 0.2586274356365913, "grad_norm": 0.0, "learning_rate": 1.738169682526415e-05, "loss": 0.9881, "step": 6610 }, { "epoch": 0.25866656232882074, "grad_norm": 0.0, "learning_rate": 1.7380841872764185e-05, "loss": 1.0492, "step": 6611 }, { "epoch": 0.2587056890210502, "grad_norm": 0.0, "learning_rate": 1.7379986801737154e-05, "loss": 1.1525, "step": 6612 }, { "epoch": 0.2587448157132796, "grad_norm": 0.0, "learning_rate": 1.737913161219678e-05, "loss": 1.2635, "step": 6613 }, { "epoch": 0.25878394240550906, "grad_norm": 0.0, "learning_rate": 1.73782763041568e-05, "loss": 1.0284, "step": 6614 }, { "epoch": 0.2588230690977385, "grad_norm": 0.0, "learning_rate": 1.7377420877630947e-05, "loss": 0.979, "step": 6615 }, { "epoch": 0.25886219578996794, "grad_norm": 0.0, "learning_rate": 1.737656533263296e-05, "loss": 1.2404, "step": 6616 }, { "epoch": 0.2589013224821974, "grad_norm": 0.0, "learning_rate": 1.7375709669176572e-05, "loss": 1.1646, "step": 6617 }, { "epoch": 0.25894044917442677, "grad_norm": 0.0, "learning_rate": 1.7374853887275533e-05, "loss": 1.0869, "step": 6618 }, { "epoch": 0.2589795758666562, "grad_norm": 0.0, "learning_rate": 1.737399798694358e-05, "loss": 1.2184, "step": 6619 }, { "epoch": 0.25901870255888565, "grad_norm": 0.0, "learning_rate": 1.737314196819446e-05, "loss": 1.2767, "step": 6620 }, { "epoch": 0.2590578292511151, "grad_norm": 0.0, "learning_rate": 1.7372285831041923e-05, "loss": 1.086, "step": 6621 }, { "epoch": 0.25909695594334453, "grad_norm": 0.0, "learning_rate": 1.737142957549971e-05, "loss": 1.2015, "step": 6622 }, { "epoch": 0.25913608263557397, "grad_norm": 0.0, "learning_rate": 1.737057320158158e-05, "loss": 1.3355, "step": 6623 }, { "epoch": 0.2591752093278034, "grad_norm": 0.0, "learning_rate": 1.7369716709301275e-05, "loss": 1.1996, "step": 6624 }, { "epoch": 0.25921433602003285, "grad_norm": 0.0, "learning_rate": 1.736886009867255e-05, "loss": 1.0744, "step": 6625 }, { "epoch": 0.2592534627122623, "grad_norm": 0.0, "learning_rate": 1.7368003369709175e-05, "loss": 1.0771, "step": 6626 }, { "epoch": 0.25929258940449174, "grad_norm": 0.0, "learning_rate": 1.7367146522424895e-05, "loss": 1.2311, "step": 6627 }, { "epoch": 0.2593317160967212, "grad_norm": 0.0, "learning_rate": 1.7366289556833473e-05, "loss": 1.2138, "step": 6628 }, { "epoch": 0.2593708427889506, "grad_norm": 0.0, "learning_rate": 1.736543247294867e-05, "loss": 1.1292, "step": 6629 }, { "epoch": 0.25940996948118006, "grad_norm": 0.0, "learning_rate": 1.736457527078425e-05, "loss": 1.1911, "step": 6630 }, { "epoch": 0.2594490961734095, "grad_norm": 0.0, "learning_rate": 1.7363717950353983e-05, "loss": 1.1313, "step": 6631 }, { "epoch": 0.25948822286563894, "grad_norm": 0.0, "learning_rate": 1.7362860511671634e-05, "loss": 1.1499, "step": 6632 }, { "epoch": 0.2595273495578684, "grad_norm": 0.0, "learning_rate": 1.7362002954750967e-05, "loss": 1.0535, "step": 6633 }, { "epoch": 0.2595664762500978, "grad_norm": 0.0, "learning_rate": 1.736114527960576e-05, "loss": 1.1465, "step": 6634 }, { "epoch": 0.25960560294232726, "grad_norm": 0.0, "learning_rate": 1.7360287486249782e-05, "loss": 1.1168, "step": 6635 }, { "epoch": 0.2596447296345567, "grad_norm": 0.0, "learning_rate": 1.7359429574696813e-05, "loss": 1.0954, "step": 6636 }, { "epoch": 0.25968385632678614, "grad_norm": 0.0, "learning_rate": 1.7358571544960623e-05, "loss": 1.0877, "step": 6637 }, { "epoch": 0.2597229830190156, "grad_norm": 0.0, "learning_rate": 1.7357713397054995e-05, "loss": 1.1899, "step": 6638 }, { "epoch": 0.259762109711245, "grad_norm": 0.0, "learning_rate": 1.7356855130993713e-05, "loss": 1.1289, "step": 6639 }, { "epoch": 0.25980123640347447, "grad_norm": 0.0, "learning_rate": 1.7355996746790556e-05, "loss": 1.2284, "step": 6640 }, { "epoch": 0.2598403630957039, "grad_norm": 0.0, "learning_rate": 1.7355138244459306e-05, "loss": 0.9862, "step": 6641 }, { "epoch": 0.25987948978793335, "grad_norm": 0.0, "learning_rate": 1.7354279624013753e-05, "loss": 1.1306, "step": 6642 }, { "epoch": 0.2599186164801628, "grad_norm": 0.0, "learning_rate": 1.7353420885467688e-05, "loss": 1.2341, "step": 6643 }, { "epoch": 0.25995774317239223, "grad_norm": 0.0, "learning_rate": 1.7352562028834895e-05, "loss": 1.2251, "step": 6644 }, { "epoch": 0.25999686986462167, "grad_norm": 0.0, "learning_rate": 1.735170305412917e-05, "loss": 1.1242, "step": 6645 }, { "epoch": 0.26003599655685106, "grad_norm": 0.0, "learning_rate": 1.7350843961364307e-05, "loss": 1.129, "step": 6646 }, { "epoch": 0.2600751232490805, "grad_norm": 0.0, "learning_rate": 1.73499847505541e-05, "loss": 1.1502, "step": 6647 }, { "epoch": 0.26011424994130994, "grad_norm": 0.0, "learning_rate": 1.7349125421712346e-05, "loss": 1.1144, "step": 6648 }, { "epoch": 0.2601533766335394, "grad_norm": 0.0, "learning_rate": 1.7348265974852847e-05, "loss": 1.084, "step": 6649 }, { "epoch": 0.2601925033257688, "grad_norm": 0.0, "learning_rate": 1.7347406409989407e-05, "loss": 1.1658, "step": 6650 }, { "epoch": 0.26023163001799826, "grad_norm": 0.0, "learning_rate": 1.7346546727135823e-05, "loss": 1.0538, "step": 6651 }, { "epoch": 0.2602707567102277, "grad_norm": 0.0, "learning_rate": 1.7345686926305908e-05, "loss": 1.1579, "step": 6652 }, { "epoch": 0.26030988340245714, "grad_norm": 0.0, "learning_rate": 1.7344827007513464e-05, "loss": 1.0239, "step": 6653 }, { "epoch": 0.2603490100946866, "grad_norm": 0.0, "learning_rate": 1.7343966970772303e-05, "loss": 1.162, "step": 6654 }, { "epoch": 0.260388136786916, "grad_norm": 0.0, "learning_rate": 1.7343106816096234e-05, "loss": 0.9659, "step": 6655 }, { "epoch": 0.26042726347914547, "grad_norm": 0.0, "learning_rate": 1.7342246543499074e-05, "loss": 1.2029, "step": 6656 }, { "epoch": 0.2604663901713749, "grad_norm": 0.0, "learning_rate": 1.734138615299463e-05, "loss": 0.9999, "step": 6657 }, { "epoch": 0.26050551686360435, "grad_norm": 0.0, "learning_rate": 1.7340525644596728e-05, "loss": 1.1097, "step": 6658 }, { "epoch": 0.2605446435558338, "grad_norm": 0.0, "learning_rate": 1.7339665018319178e-05, "loss": 1.2263, "step": 6659 }, { "epoch": 0.26058377024806323, "grad_norm": 0.0, "learning_rate": 1.7338804274175805e-05, "loss": 1.1777, "step": 6660 }, { "epoch": 0.26062289694029267, "grad_norm": 0.0, "learning_rate": 1.7337943412180435e-05, "loss": 1.1581, "step": 6661 }, { "epoch": 0.2606620236325221, "grad_norm": 0.0, "learning_rate": 1.7337082432346888e-05, "loss": 1.0647, "step": 6662 }, { "epoch": 0.26070115032475155, "grad_norm": 0.0, "learning_rate": 1.733622133468899e-05, "loss": 1.1975, "step": 6663 }, { "epoch": 0.260740277016981, "grad_norm": 0.0, "learning_rate": 1.733536011922057e-05, "loss": 1.0816, "step": 6664 }, { "epoch": 0.26077940370921043, "grad_norm": 0.0, "learning_rate": 1.733449878595546e-05, "loss": 1.0363, "step": 6665 }, { "epoch": 0.2608185304014399, "grad_norm": 0.0, "learning_rate": 1.7333637334907487e-05, "loss": 1.1218, "step": 6666 }, { "epoch": 0.2608576570936693, "grad_norm": 0.0, "learning_rate": 1.7332775766090492e-05, "loss": 1.1593, "step": 6667 }, { "epoch": 0.26089678378589876, "grad_norm": 0.0, "learning_rate": 1.7331914079518305e-05, "loss": 1.2074, "step": 6668 }, { "epoch": 0.2609359104781282, "grad_norm": 0.0, "learning_rate": 1.733105227520476e-05, "loss": 1.1467, "step": 6669 }, { "epoch": 0.26097503717035764, "grad_norm": 0.0, "learning_rate": 1.733019035316371e-05, "loss": 1.187, "step": 6670 }, { "epoch": 0.2610141638625871, "grad_norm": 0.0, "learning_rate": 1.7329328313408984e-05, "loss": 1.1909, "step": 6671 }, { "epoch": 0.2610532905548165, "grad_norm": 0.0, "learning_rate": 1.7328466155954428e-05, "loss": 1.1895, "step": 6672 }, { "epoch": 0.26109241724704596, "grad_norm": 0.0, "learning_rate": 1.7327603880813893e-05, "loss": 1.1223, "step": 6673 }, { "epoch": 0.2611315439392754, "grad_norm": 0.0, "learning_rate": 1.732674148800122e-05, "loss": 1.1246, "step": 6674 }, { "epoch": 0.2611706706315048, "grad_norm": 0.0, "learning_rate": 1.7325878977530258e-05, "loss": 1.1206, "step": 6675 }, { "epoch": 0.2612097973237342, "grad_norm": 0.0, "learning_rate": 1.7325016349414858e-05, "loss": 1.1287, "step": 6676 }, { "epoch": 0.26124892401596367, "grad_norm": 0.0, "learning_rate": 1.732415360366888e-05, "loss": 1.1027, "step": 6677 }, { "epoch": 0.2612880507081931, "grad_norm": 0.0, "learning_rate": 1.732329074030617e-05, "loss": 1.204, "step": 6678 }, { "epoch": 0.26132717740042255, "grad_norm": 0.0, "learning_rate": 1.7322427759340584e-05, "loss": 1.1281, "step": 6679 }, { "epoch": 0.261366304092652, "grad_norm": 0.0, "learning_rate": 1.732156466078599e-05, "loss": 1.1635, "step": 6680 }, { "epoch": 0.26140543078488143, "grad_norm": 0.0, "learning_rate": 1.7320701444656235e-05, "loss": 1.2204, "step": 6681 }, { "epoch": 0.2614445574771109, "grad_norm": 0.0, "learning_rate": 1.7319838110965192e-05, "loss": 1.1428, "step": 6682 }, { "epoch": 0.2614836841693403, "grad_norm": 0.0, "learning_rate": 1.7318974659726722e-05, "loss": 1.0007, "step": 6683 }, { "epoch": 0.26152281086156975, "grad_norm": 0.0, "learning_rate": 1.731811109095469e-05, "loss": 1.0828, "step": 6684 }, { "epoch": 0.2615619375537992, "grad_norm": 0.0, "learning_rate": 1.7317247404662963e-05, "loss": 1.2355, "step": 6685 }, { "epoch": 0.26160106424602864, "grad_norm": 0.0, "learning_rate": 1.731638360086541e-05, "loss": 1.0446, "step": 6686 }, { "epoch": 0.2616401909382581, "grad_norm": 0.0, "learning_rate": 1.7315519679575905e-05, "loss": 1.2053, "step": 6687 }, { "epoch": 0.2616793176304875, "grad_norm": 0.0, "learning_rate": 1.731465564080832e-05, "loss": 1.1868, "step": 6688 }, { "epoch": 0.26171844432271696, "grad_norm": 0.0, "learning_rate": 1.7313791484576533e-05, "loss": 1.3285, "step": 6689 }, { "epoch": 0.2617575710149464, "grad_norm": 0.0, "learning_rate": 1.731292721089442e-05, "loss": 1.0479, "step": 6690 }, { "epoch": 0.26179669770717584, "grad_norm": 0.0, "learning_rate": 1.731206281977586e-05, "loss": 1.1077, "step": 6691 }, { "epoch": 0.2618358243994053, "grad_norm": 0.0, "learning_rate": 1.7311198311234734e-05, "loss": 1.0486, "step": 6692 }, { "epoch": 0.2618749510916347, "grad_norm": 0.0, "learning_rate": 1.7310333685284924e-05, "loss": 1.1622, "step": 6693 }, { "epoch": 0.26191407778386416, "grad_norm": 0.0, "learning_rate": 1.7309468941940312e-05, "loss": 1.233, "step": 6694 }, { "epoch": 0.2619532044760936, "grad_norm": 0.0, "learning_rate": 1.7308604081214793e-05, "loss": 1.2091, "step": 6695 }, { "epoch": 0.26199233116832304, "grad_norm": 0.0, "learning_rate": 1.7307739103122247e-05, "loss": 1.2341, "step": 6696 }, { "epoch": 0.2620314578605525, "grad_norm": 0.0, "learning_rate": 1.730687400767657e-05, "loss": 1.2513, "step": 6697 }, { "epoch": 0.2620705845527819, "grad_norm": 0.0, "learning_rate": 1.730600879489165e-05, "loss": 0.9594, "step": 6698 }, { "epoch": 0.26210971124501137, "grad_norm": 0.0, "learning_rate": 1.7305143464781387e-05, "loss": 1.1366, "step": 6699 }, { "epoch": 0.2621488379372408, "grad_norm": 0.0, "learning_rate": 1.7304278017359672e-05, "loss": 1.0854, "step": 6700 }, { "epoch": 0.26218796462947025, "grad_norm": 0.0, "learning_rate": 1.7303412452640404e-05, "loss": 1.0668, "step": 6701 }, { "epoch": 0.2622270913216997, "grad_norm": 0.0, "learning_rate": 1.730254677063749e-05, "loss": 1.1745, "step": 6702 }, { "epoch": 0.2622662180139291, "grad_norm": 0.0, "learning_rate": 1.7301680971364817e-05, "loss": 1.1131, "step": 6703 }, { "epoch": 0.2623053447061585, "grad_norm": 0.0, "learning_rate": 1.73008150548363e-05, "loss": 1.168, "step": 6704 }, { "epoch": 0.26234447139838796, "grad_norm": 0.0, "learning_rate": 1.7299949021065842e-05, "loss": 1.0228, "step": 6705 }, { "epoch": 0.2623835980906174, "grad_norm": 0.0, "learning_rate": 1.729908287006735e-05, "loss": 1.1752, "step": 6706 }, { "epoch": 0.26242272478284684, "grad_norm": 0.0, "learning_rate": 1.7298216601854734e-05, "loss": 1.1863, "step": 6707 }, { "epoch": 0.2624618514750763, "grad_norm": 0.0, "learning_rate": 1.7297350216441903e-05, "loss": 1.1524, "step": 6708 }, { "epoch": 0.2625009781673057, "grad_norm": 0.0, "learning_rate": 1.7296483713842772e-05, "loss": 1.1494, "step": 6709 }, { "epoch": 0.26254010485953516, "grad_norm": 0.0, "learning_rate": 1.7295617094071256e-05, "loss": 1.0369, "step": 6710 }, { "epoch": 0.2625792315517646, "grad_norm": 0.0, "learning_rate": 1.7294750357141273e-05, "loss": 1.1031, "step": 6711 }, { "epoch": 0.26261835824399404, "grad_norm": 0.0, "learning_rate": 1.729388350306674e-05, "loss": 1.2446, "step": 6712 }, { "epoch": 0.2626574849362235, "grad_norm": 0.0, "learning_rate": 1.7293016531861575e-05, "loss": 1.2765, "step": 6713 }, { "epoch": 0.2626966116284529, "grad_norm": 0.0, "learning_rate": 1.7292149443539706e-05, "loss": 1.2275, "step": 6714 }, { "epoch": 0.26273573832068237, "grad_norm": 0.0, "learning_rate": 1.7291282238115052e-05, "loss": 1.1304, "step": 6715 }, { "epoch": 0.2627748650129118, "grad_norm": 0.0, "learning_rate": 1.7290414915601543e-05, "loss": 1.2815, "step": 6716 }, { "epoch": 0.26281399170514125, "grad_norm": 0.0, "learning_rate": 1.7289547476013105e-05, "loss": 1.2474, "step": 6717 }, { "epoch": 0.2628531183973707, "grad_norm": 0.0, "learning_rate": 1.728867991936367e-05, "loss": 1.2285, "step": 6718 }, { "epoch": 0.26289224508960013, "grad_norm": 0.0, "learning_rate": 1.7287812245667168e-05, "loss": 1.0813, "step": 6719 }, { "epoch": 0.26293137178182957, "grad_norm": 0.0, "learning_rate": 1.7286944454937536e-05, "loss": 1.0217, "step": 6720 }, { "epoch": 0.262970498474059, "grad_norm": 0.0, "learning_rate": 1.7286076547188703e-05, "loss": 1.1456, "step": 6721 }, { "epoch": 0.26300962516628845, "grad_norm": 0.0, "learning_rate": 1.7285208522434615e-05, "loss": 1.1633, "step": 6722 }, { "epoch": 0.2630487518585179, "grad_norm": 0.0, "learning_rate": 1.7284340380689203e-05, "loss": 1.0519, "step": 6723 }, { "epoch": 0.26308787855074733, "grad_norm": 0.0, "learning_rate": 1.7283472121966414e-05, "loss": 1.1991, "step": 6724 }, { "epoch": 0.2631270052429768, "grad_norm": 0.0, "learning_rate": 1.728260374628019e-05, "loss": 1.2553, "step": 6725 }, { "epoch": 0.2631661319352062, "grad_norm": 0.0, "learning_rate": 1.728173525364447e-05, "loss": 1.1265, "step": 6726 }, { "epoch": 0.26320525862743566, "grad_norm": 0.0, "learning_rate": 1.7280866644073214e-05, "loss": 1.0154, "step": 6727 }, { "epoch": 0.2632443853196651, "grad_norm": 0.0, "learning_rate": 1.727999791758036e-05, "loss": 1.141, "step": 6728 }, { "epoch": 0.26328351201189454, "grad_norm": 0.0, "learning_rate": 1.727912907417986e-05, "loss": 1.2047, "step": 6729 }, { "epoch": 0.263322638704124, "grad_norm": 0.0, "learning_rate": 1.727826011388567e-05, "loss": 1.1212, "step": 6730 }, { "epoch": 0.2633617653963534, "grad_norm": 0.0, "learning_rate": 1.7277391036711747e-05, "loss": 1.1066, "step": 6731 }, { "epoch": 0.2634008920885828, "grad_norm": 0.0, "learning_rate": 1.727652184267204e-05, "loss": 1.1135, "step": 6732 }, { "epoch": 0.26344001878081225, "grad_norm": 0.0, "learning_rate": 1.7275652531780508e-05, "loss": 1.0594, "step": 6733 }, { "epoch": 0.2634791454730417, "grad_norm": 0.0, "learning_rate": 1.7274783104051112e-05, "loss": 1.0533, "step": 6734 }, { "epoch": 0.26351827216527113, "grad_norm": 0.0, "learning_rate": 1.7273913559497818e-05, "loss": 1.1683, "step": 6735 }, { "epoch": 0.26355739885750057, "grad_norm": 0.0, "learning_rate": 1.7273043898134587e-05, "loss": 1.246, "step": 6736 }, { "epoch": 0.26359652554973, "grad_norm": 0.0, "learning_rate": 1.7272174119975386e-05, "loss": 1.0787, "step": 6737 }, { "epoch": 0.26363565224195945, "grad_norm": 0.0, "learning_rate": 1.7271304225034177e-05, "loss": 1.1102, "step": 6738 }, { "epoch": 0.2636747789341889, "grad_norm": 0.0, "learning_rate": 1.7270434213324936e-05, "loss": 1.1442, "step": 6739 }, { "epoch": 0.26371390562641833, "grad_norm": 0.0, "learning_rate": 1.7269564084861632e-05, "loss": 1.0429, "step": 6740 }, { "epoch": 0.2637530323186478, "grad_norm": 0.0, "learning_rate": 1.726869383965824e-05, "loss": 1.2886, "step": 6741 }, { "epoch": 0.2637921590108772, "grad_norm": 0.0, "learning_rate": 1.7267823477728727e-05, "loss": 1.2391, "step": 6742 }, { "epoch": 0.26383128570310665, "grad_norm": 0.0, "learning_rate": 1.7266952999087082e-05, "loss": 1.1257, "step": 6743 }, { "epoch": 0.2638704123953361, "grad_norm": 0.0, "learning_rate": 1.7266082403747278e-05, "loss": 1.1819, "step": 6744 }, { "epoch": 0.26390953908756554, "grad_norm": 0.0, "learning_rate": 1.7265211691723292e-05, "loss": 1.0703, "step": 6745 }, { "epoch": 0.263948665779795, "grad_norm": 0.0, "learning_rate": 1.7264340863029113e-05, "loss": 1.0772, "step": 6746 }, { "epoch": 0.2639877924720244, "grad_norm": 0.0, "learning_rate": 1.726346991767872e-05, "loss": 1.1359, "step": 6747 }, { "epoch": 0.26402691916425386, "grad_norm": 0.0, "learning_rate": 1.7262598855686105e-05, "loss": 1.1827, "step": 6748 }, { "epoch": 0.2640660458564833, "grad_norm": 0.0, "learning_rate": 1.7261727677065248e-05, "loss": 1.1741, "step": 6749 }, { "epoch": 0.26410517254871274, "grad_norm": 0.0, "learning_rate": 1.726085638183015e-05, "loss": 1.079, "step": 6750 }, { "epoch": 0.2641442992409422, "grad_norm": 0.0, "learning_rate": 1.7259984969994793e-05, "loss": 1.2155, "step": 6751 }, { "epoch": 0.2641834259331716, "grad_norm": 0.0, "learning_rate": 1.7259113441573174e-05, "loss": 1.1924, "step": 6752 }, { "epoch": 0.26422255262540106, "grad_norm": 0.0, "learning_rate": 1.725824179657929e-05, "loss": 1.0557, "step": 6753 }, { "epoch": 0.2642616793176305, "grad_norm": 0.0, "learning_rate": 1.725737003502714e-05, "loss": 1.1863, "step": 6754 }, { "epoch": 0.26430080600985995, "grad_norm": 0.0, "learning_rate": 1.7256498156930717e-05, "loss": 1.1888, "step": 6755 }, { "epoch": 0.2643399327020894, "grad_norm": 0.0, "learning_rate": 1.7255626162304027e-05, "loss": 1.1833, "step": 6756 }, { "epoch": 0.2643790593943188, "grad_norm": 0.0, "learning_rate": 1.7254754051161075e-05, "loss": 1.0015, "step": 6757 }, { "epoch": 0.26441818608654827, "grad_norm": 0.0, "learning_rate": 1.7253881823515866e-05, "loss": 1.0428, "step": 6758 }, { "epoch": 0.2644573127787777, "grad_norm": 0.0, "learning_rate": 1.72530094793824e-05, "loss": 1.0559, "step": 6759 }, { "epoch": 0.2644964394710071, "grad_norm": 0.0, "learning_rate": 1.7252137018774694e-05, "loss": 1.1107, "step": 6760 }, { "epoch": 0.26453556616323654, "grad_norm": 0.0, "learning_rate": 1.7251264441706754e-05, "loss": 1.0775, "step": 6761 }, { "epoch": 0.264574692855466, "grad_norm": 0.0, "learning_rate": 1.725039174819259e-05, "loss": 1.1353, "step": 6762 }, { "epoch": 0.2646138195476954, "grad_norm": 0.0, "learning_rate": 1.7249518938246223e-05, "loss": 1.1451, "step": 6763 }, { "epoch": 0.26465294623992486, "grad_norm": 0.0, "learning_rate": 1.7248646011881665e-05, "loss": 1.1069, "step": 6764 }, { "epoch": 0.2646920729321543, "grad_norm": 0.0, "learning_rate": 1.7247772969112934e-05, "loss": 1.1614, "step": 6765 }, { "epoch": 0.26473119962438374, "grad_norm": 0.0, "learning_rate": 1.7246899809954053e-05, "loss": 1.1941, "step": 6766 }, { "epoch": 0.2647703263166132, "grad_norm": 0.0, "learning_rate": 1.724602653441904e-05, "loss": 1.2394, "step": 6767 }, { "epoch": 0.2648094530088426, "grad_norm": 0.0, "learning_rate": 1.7245153142521923e-05, "loss": 1.2094, "step": 6768 }, { "epoch": 0.26484857970107206, "grad_norm": 0.0, "learning_rate": 1.7244279634276725e-05, "loss": 1.1125, "step": 6769 }, { "epoch": 0.2648877063933015, "grad_norm": 0.0, "learning_rate": 1.724340600969747e-05, "loss": 1.2842, "step": 6770 }, { "epoch": 0.26492683308553094, "grad_norm": 0.0, "learning_rate": 1.72425322687982e-05, "loss": 1.1042, "step": 6771 }, { "epoch": 0.2649659597777604, "grad_norm": 0.0, "learning_rate": 1.7241658411592926e-05, "loss": 1.0487, "step": 6772 }, { "epoch": 0.2650050864699898, "grad_norm": 0.0, "learning_rate": 1.72407844380957e-05, "loss": 1.2023, "step": 6773 }, { "epoch": 0.26504421316221927, "grad_norm": 0.0, "learning_rate": 1.7239910348320546e-05, "loss": 1.2093, "step": 6774 }, { "epoch": 0.2650833398544487, "grad_norm": 0.0, "learning_rate": 1.7239036142281502e-05, "loss": 1.2138, "step": 6775 }, { "epoch": 0.26512246654667815, "grad_norm": 0.0, "learning_rate": 1.7238161819992613e-05, "loss": 1.1141, "step": 6776 }, { "epoch": 0.2651615932389076, "grad_norm": 0.0, "learning_rate": 1.7237287381467915e-05, "loss": 1.156, "step": 6777 }, { "epoch": 0.26520071993113703, "grad_norm": 0.0, "learning_rate": 1.7236412826721445e-05, "loss": 1.1246, "step": 6778 }, { "epoch": 0.26523984662336647, "grad_norm": 0.0, "learning_rate": 1.7235538155767257e-05, "loss": 1.1487, "step": 6779 }, { "epoch": 0.2652789733155959, "grad_norm": 0.0, "learning_rate": 1.7234663368619392e-05, "loss": 1.1458, "step": 6780 }, { "epoch": 0.26531810000782535, "grad_norm": 0.0, "learning_rate": 1.72337884652919e-05, "loss": 1.2936, "step": 6781 }, { "epoch": 0.2653572267000548, "grad_norm": 0.0, "learning_rate": 1.7232913445798825e-05, "loss": 1.0685, "step": 6782 }, { "epoch": 0.26539635339228423, "grad_norm": 0.0, "learning_rate": 1.723203831015423e-05, "loss": 1.1583, "step": 6783 }, { "epoch": 0.2654354800845137, "grad_norm": 0.0, "learning_rate": 1.7231163058372158e-05, "loss": 1.1637, "step": 6784 }, { "epoch": 0.2654746067767431, "grad_norm": 0.0, "learning_rate": 1.723028769046667e-05, "loss": 1.2148, "step": 6785 }, { "epoch": 0.26551373346897256, "grad_norm": 0.0, "learning_rate": 1.722941220645182e-05, "loss": 1.14, "step": 6786 }, { "epoch": 0.265552860161202, "grad_norm": 0.0, "learning_rate": 1.7228536606341672e-05, "loss": 1.0232, "step": 6787 }, { "epoch": 0.2655919868534314, "grad_norm": 0.0, "learning_rate": 1.722766089015028e-05, "loss": 1.1797, "step": 6788 }, { "epoch": 0.2656311135456608, "grad_norm": 0.0, "learning_rate": 1.722678505789171e-05, "loss": 1.175, "step": 6789 }, { "epoch": 0.26567024023789026, "grad_norm": 0.0, "learning_rate": 1.7225909109580038e-05, "loss": 1.1359, "step": 6790 }, { "epoch": 0.2657093669301197, "grad_norm": 0.0, "learning_rate": 1.7225033045229312e-05, "loss": 1.0245, "step": 6791 }, { "epoch": 0.26574849362234915, "grad_norm": 0.0, "learning_rate": 1.722415686485361e-05, "loss": 1.0877, "step": 6792 }, { "epoch": 0.2657876203145786, "grad_norm": 0.0, "learning_rate": 1.7223280568467e-05, "loss": 1.2192, "step": 6793 }, { "epoch": 0.26582674700680803, "grad_norm": 0.0, "learning_rate": 1.7222404156083555e-05, "loss": 1.108, "step": 6794 }, { "epoch": 0.26586587369903747, "grad_norm": 0.0, "learning_rate": 1.722152762771735e-05, "loss": 1.1658, "step": 6795 }, { "epoch": 0.2659050003912669, "grad_norm": 0.0, "learning_rate": 1.7220650983382462e-05, "loss": 1.0706, "step": 6796 }, { "epoch": 0.26594412708349635, "grad_norm": 0.0, "learning_rate": 1.7219774223092964e-05, "loss": 1.1353, "step": 6797 }, { "epoch": 0.2659832537757258, "grad_norm": 0.0, "learning_rate": 1.721889734686294e-05, "loss": 1.1559, "step": 6798 }, { "epoch": 0.26602238046795523, "grad_norm": 0.0, "learning_rate": 1.7218020354706473e-05, "loss": 1.176, "step": 6799 }, { "epoch": 0.2660615071601847, "grad_norm": 0.0, "learning_rate": 1.7217143246637643e-05, "loss": 1.1991, "step": 6800 }, { "epoch": 0.2661006338524141, "grad_norm": 0.0, "learning_rate": 1.7216266022670532e-05, "loss": 1.2748, "step": 6801 }, { "epoch": 0.26613976054464356, "grad_norm": 0.0, "learning_rate": 1.7215388682819237e-05, "loss": 1.2018, "step": 6802 }, { "epoch": 0.266178887236873, "grad_norm": 0.0, "learning_rate": 1.721451122709784e-05, "loss": 1.2091, "step": 6803 }, { "epoch": 0.26621801392910244, "grad_norm": 0.0, "learning_rate": 1.721363365552043e-05, "loss": 1.0593, "step": 6804 }, { "epoch": 0.2662571406213319, "grad_norm": 0.0, "learning_rate": 1.7212755968101104e-05, "loss": 1.1989, "step": 6805 }, { "epoch": 0.2662962673135613, "grad_norm": 0.0, "learning_rate": 1.7211878164853954e-05, "loss": 1.0609, "step": 6806 }, { "epoch": 0.26633539400579076, "grad_norm": 0.0, "learning_rate": 1.721100024579308e-05, "loss": 1.1497, "step": 6807 }, { "epoch": 0.2663745206980202, "grad_norm": 0.0, "learning_rate": 1.7210122210932576e-05, "loss": 1.0911, "step": 6808 }, { "epoch": 0.26641364739024964, "grad_norm": 0.0, "learning_rate": 1.7209244060286545e-05, "loss": 1.1968, "step": 6809 }, { "epoch": 0.2664527740824791, "grad_norm": 0.0, "learning_rate": 1.7208365793869087e-05, "loss": 1.1903, "step": 6810 }, { "epoch": 0.2664919007747085, "grad_norm": 0.0, "learning_rate": 1.720748741169431e-05, "loss": 1.1755, "step": 6811 }, { "epoch": 0.26653102746693796, "grad_norm": 0.0, "learning_rate": 1.7206608913776315e-05, "loss": 1.237, "step": 6812 }, { "epoch": 0.2665701541591674, "grad_norm": 0.0, "learning_rate": 1.720573030012921e-05, "loss": 1.1134, "step": 6813 }, { "epoch": 0.26660928085139685, "grad_norm": 0.0, "learning_rate": 1.7204851570767108e-05, "loss": 1.1304, "step": 6814 }, { "epoch": 0.2666484075436263, "grad_norm": 0.0, "learning_rate": 1.7203972725704114e-05, "loss": 0.9573, "step": 6815 }, { "epoch": 0.2666875342358557, "grad_norm": 0.0, "learning_rate": 1.720309376495435e-05, "loss": 1.1462, "step": 6816 }, { "epoch": 0.2667266609280851, "grad_norm": 0.0, "learning_rate": 1.7202214688531925e-05, "loss": 1.2164, "step": 6817 }, { "epoch": 0.26676578762031455, "grad_norm": 0.0, "learning_rate": 1.7201335496450954e-05, "loss": 1.2998, "step": 6818 }, { "epoch": 0.266804914312544, "grad_norm": 0.0, "learning_rate": 1.720045618872556e-05, "loss": 1.1024, "step": 6819 }, { "epoch": 0.26684404100477344, "grad_norm": 0.0, "learning_rate": 1.7199576765369865e-05, "loss": 1.2053, "step": 6820 }, { "epoch": 0.2668831676970029, "grad_norm": 0.0, "learning_rate": 1.7198697226397985e-05, "loss": 1.0699, "step": 6821 }, { "epoch": 0.2669222943892323, "grad_norm": 0.0, "learning_rate": 1.7197817571824048e-05, "loss": 1.104, "step": 6822 }, { "epoch": 0.26696142108146176, "grad_norm": 0.0, "learning_rate": 1.7196937801662182e-05, "loss": 1.1416, "step": 6823 }, { "epoch": 0.2670005477736912, "grad_norm": 0.0, "learning_rate": 1.7196057915926513e-05, "loss": 1.0955, "step": 6824 }, { "epoch": 0.26703967446592064, "grad_norm": 0.0, "learning_rate": 1.7195177914631172e-05, "loss": 1.1069, "step": 6825 }, { "epoch": 0.2670788011581501, "grad_norm": 0.0, "learning_rate": 1.7194297797790288e-05, "loss": 1.1134, "step": 6826 }, { "epoch": 0.2671179278503795, "grad_norm": 0.0, "learning_rate": 1.7193417565418e-05, "loss": 1.1429, "step": 6827 }, { "epoch": 0.26715705454260896, "grad_norm": 0.0, "learning_rate": 1.7192537217528435e-05, "loss": 1.2532, "step": 6828 }, { "epoch": 0.2671961812348384, "grad_norm": 0.0, "learning_rate": 1.7191656754135733e-05, "loss": 1.1178, "step": 6829 }, { "epoch": 0.26723530792706784, "grad_norm": 0.0, "learning_rate": 1.7190776175254043e-05, "loss": 1.2356, "step": 6830 }, { "epoch": 0.2672744346192973, "grad_norm": 0.0, "learning_rate": 1.7189895480897493e-05, "loss": 1.1091, "step": 6831 }, { "epoch": 0.2673135613115267, "grad_norm": 0.0, "learning_rate": 1.7189014671080232e-05, "loss": 1.0674, "step": 6832 }, { "epoch": 0.26735268800375617, "grad_norm": 0.0, "learning_rate": 1.7188133745816406e-05, "loss": 1.1056, "step": 6833 }, { "epoch": 0.2673918146959856, "grad_norm": 0.0, "learning_rate": 1.7187252705120155e-05, "loss": 1.1871, "step": 6834 }, { "epoch": 0.26743094138821505, "grad_norm": 0.0, "learning_rate": 1.7186371549005634e-05, "loss": 0.9976, "step": 6835 }, { "epoch": 0.2674700680804445, "grad_norm": 0.0, "learning_rate": 1.718549027748699e-05, "loss": 1.0659, "step": 6836 }, { "epoch": 0.26750919477267393, "grad_norm": 0.0, "learning_rate": 1.718460889057838e-05, "loss": 1.2539, "step": 6837 }, { "epoch": 0.26754832146490337, "grad_norm": 0.0, "learning_rate": 1.718372738829395e-05, "loss": 0.9921, "step": 6838 }, { "epoch": 0.2675874481571328, "grad_norm": 0.0, "learning_rate": 1.718284577064786e-05, "loss": 1.2213, "step": 6839 }, { "epoch": 0.26762657484936225, "grad_norm": 0.0, "learning_rate": 1.7181964037654268e-05, "loss": 1.2169, "step": 6840 }, { "epoch": 0.2676657015415917, "grad_norm": 0.0, "learning_rate": 1.7181082189327335e-05, "loss": 0.9831, "step": 6841 }, { "epoch": 0.26770482823382113, "grad_norm": 0.0, "learning_rate": 1.7180200225681217e-05, "loss": 1.0408, "step": 6842 }, { "epoch": 0.2677439549260506, "grad_norm": 0.0, "learning_rate": 1.7179318146730083e-05, "loss": 1.1537, "step": 6843 }, { "epoch": 0.26778308161828, "grad_norm": 0.0, "learning_rate": 1.7178435952488092e-05, "loss": 1.2324, "step": 6844 }, { "epoch": 0.2678222083105094, "grad_norm": 0.0, "learning_rate": 1.717755364296942e-05, "loss": 1.069, "step": 6845 }, { "epoch": 0.26786133500273884, "grad_norm": 0.0, "learning_rate": 1.7176671218188228e-05, "loss": 1.0621, "step": 6846 }, { "epoch": 0.2679004616949683, "grad_norm": 0.0, "learning_rate": 1.717578867815869e-05, "loss": 1.0906, "step": 6847 }, { "epoch": 0.2679395883871977, "grad_norm": 0.0, "learning_rate": 1.7174906022894976e-05, "loss": 1.1658, "step": 6848 }, { "epoch": 0.26797871507942717, "grad_norm": 0.0, "learning_rate": 1.7174023252411266e-05, "loss": 1.2456, "step": 6849 }, { "epoch": 0.2680178417716566, "grad_norm": 0.0, "learning_rate": 1.7173140366721725e-05, "loss": 1.2363, "step": 6850 }, { "epoch": 0.26805696846388605, "grad_norm": 0.0, "learning_rate": 1.7172257365840544e-05, "loss": 1.1496, "step": 6851 }, { "epoch": 0.2680960951561155, "grad_norm": 0.0, "learning_rate": 1.7171374249781897e-05, "loss": 1.1098, "step": 6852 }, { "epoch": 0.26813522184834493, "grad_norm": 0.0, "learning_rate": 1.7170491018559962e-05, "loss": 1.1371, "step": 6853 }, { "epoch": 0.26817434854057437, "grad_norm": 0.0, "learning_rate": 1.716960767218893e-05, "loss": 1.2033, "step": 6854 }, { "epoch": 0.2682134752328038, "grad_norm": 0.0, "learning_rate": 1.7168724210682982e-05, "loss": 1.1513, "step": 6855 }, { "epoch": 0.26825260192503325, "grad_norm": 0.0, "learning_rate": 1.7167840634056302e-05, "loss": 1.1226, "step": 6856 }, { "epoch": 0.2682917286172627, "grad_norm": 0.0, "learning_rate": 1.7166956942323086e-05, "loss": 1.1313, "step": 6857 }, { "epoch": 0.26833085530949213, "grad_norm": 0.0, "learning_rate": 1.7166073135497527e-05, "loss": 1.019, "step": 6858 }, { "epoch": 0.2683699820017216, "grad_norm": 0.0, "learning_rate": 1.7165189213593808e-05, "loss": 1.1001, "step": 6859 }, { "epoch": 0.268409108693951, "grad_norm": 0.0, "learning_rate": 1.7164305176626127e-05, "loss": 1.1187, "step": 6860 }, { "epoch": 0.26844823538618046, "grad_norm": 0.0, "learning_rate": 1.7163421024608685e-05, "loss": 1.1466, "step": 6861 }, { "epoch": 0.2684873620784099, "grad_norm": 0.0, "learning_rate": 1.716253675755568e-05, "loss": 1.2588, "step": 6862 }, { "epoch": 0.26852648877063934, "grad_norm": 0.0, "learning_rate": 1.7161652375481307e-05, "loss": 1.2079, "step": 6863 }, { "epoch": 0.2685656154628688, "grad_norm": 0.0, "learning_rate": 1.716076787839977e-05, "loss": 1.1068, "step": 6864 }, { "epoch": 0.2686047421550982, "grad_norm": 0.0, "learning_rate": 1.7159883266325273e-05, "loss": 1.0395, "step": 6865 }, { "epoch": 0.26864386884732766, "grad_norm": 0.0, "learning_rate": 1.7158998539272027e-05, "loss": 1.157, "step": 6866 }, { "epoch": 0.2686829955395571, "grad_norm": 0.0, "learning_rate": 1.7158113697254232e-05, "loss": 1.2073, "step": 6867 }, { "epoch": 0.26872212223178654, "grad_norm": 0.0, "learning_rate": 1.71572287402861e-05, "loss": 1.0681, "step": 6868 }, { "epoch": 0.268761248924016, "grad_norm": 0.0, "learning_rate": 1.7156343668381845e-05, "loss": 1.0219, "step": 6869 }, { "epoch": 0.2688003756162454, "grad_norm": 0.0, "learning_rate": 1.7155458481555676e-05, "loss": 0.9837, "step": 6870 }, { "epoch": 0.26883950230847486, "grad_norm": 0.0, "learning_rate": 1.7154573179821815e-05, "loss": 1.2389, "step": 6871 }, { "epoch": 0.2688786290007043, "grad_norm": 0.0, "learning_rate": 1.715368776319447e-05, "loss": 1.2232, "step": 6872 }, { "epoch": 0.26891775569293375, "grad_norm": 0.0, "learning_rate": 1.7152802231687863e-05, "loss": 1.1619, "step": 6873 }, { "epoch": 0.26895688238516313, "grad_norm": 0.0, "learning_rate": 1.7151916585316217e-05, "loss": 1.0852, "step": 6874 }, { "epoch": 0.2689960090773926, "grad_norm": 0.0, "learning_rate": 1.715103082409375e-05, "loss": 1.1927, "step": 6875 }, { "epoch": 0.269035135769622, "grad_norm": 0.0, "learning_rate": 1.715014494803469e-05, "loss": 1.1873, "step": 6876 }, { "epoch": 0.26907426246185145, "grad_norm": 0.0, "learning_rate": 1.714925895715326e-05, "loss": 1.1447, "step": 6877 }, { "epoch": 0.2691133891540809, "grad_norm": 0.0, "learning_rate": 1.7148372851463695e-05, "loss": 1.1131, "step": 6878 }, { "epoch": 0.26915251584631034, "grad_norm": 0.0, "learning_rate": 1.7147486630980216e-05, "loss": 1.0453, "step": 6879 }, { "epoch": 0.2691916425385398, "grad_norm": 0.0, "learning_rate": 1.714660029571706e-05, "loss": 1.2123, "step": 6880 }, { "epoch": 0.2692307692307692, "grad_norm": 0.0, "learning_rate": 1.7145713845688455e-05, "loss": 1.2596, "step": 6881 }, { "epoch": 0.26926989592299866, "grad_norm": 0.0, "learning_rate": 1.714482728090864e-05, "loss": 1.0637, "step": 6882 }, { "epoch": 0.2693090226152281, "grad_norm": 0.0, "learning_rate": 1.7143940601391854e-05, "loss": 1.1932, "step": 6883 }, { "epoch": 0.26934814930745754, "grad_norm": 0.0, "learning_rate": 1.7143053807152332e-05, "loss": 1.1756, "step": 6884 }, { "epoch": 0.269387275999687, "grad_norm": 0.0, "learning_rate": 1.714216689820432e-05, "loss": 1.1169, "step": 6885 }, { "epoch": 0.2694264026919164, "grad_norm": 0.0, "learning_rate": 1.7141279874562054e-05, "loss": 1.1486, "step": 6886 }, { "epoch": 0.26946552938414586, "grad_norm": 0.0, "learning_rate": 1.7140392736239785e-05, "loss": 1.0921, "step": 6887 }, { "epoch": 0.2695046560763753, "grad_norm": 0.0, "learning_rate": 1.713950548325175e-05, "loss": 1.184, "step": 6888 }, { "epoch": 0.26954378276860474, "grad_norm": 0.0, "learning_rate": 1.7138618115612206e-05, "loss": 1.188, "step": 6889 }, { "epoch": 0.2695829094608342, "grad_norm": 0.0, "learning_rate": 1.7137730633335404e-05, "loss": 1.178, "step": 6890 }, { "epoch": 0.2696220361530636, "grad_norm": 0.0, "learning_rate": 1.7136843036435586e-05, "loss": 1.0706, "step": 6891 }, { "epoch": 0.26966116284529307, "grad_norm": 0.0, "learning_rate": 1.713595532492702e-05, "loss": 1.1385, "step": 6892 }, { "epoch": 0.2697002895375225, "grad_norm": 0.0, "learning_rate": 1.7135067498823945e-05, "loss": 1.1937, "step": 6893 }, { "epoch": 0.26973941622975195, "grad_norm": 0.0, "learning_rate": 1.713417955814063e-05, "loss": 1.1218, "step": 6894 }, { "epoch": 0.2697785429219814, "grad_norm": 0.0, "learning_rate": 1.713329150289133e-05, "loss": 0.9317, "step": 6895 }, { "epoch": 0.26981766961421083, "grad_norm": 0.0, "learning_rate": 1.713240333309031e-05, "loss": 1.1447, "step": 6896 }, { "epoch": 0.26985679630644027, "grad_norm": 0.0, "learning_rate": 1.7131515048751826e-05, "loss": 1.1415, "step": 6897 }, { "epoch": 0.2698959229986697, "grad_norm": 0.0, "learning_rate": 1.7130626649890148e-05, "loss": 1.0088, "step": 6898 }, { "epoch": 0.26993504969089915, "grad_norm": 0.0, "learning_rate": 1.7129738136519543e-05, "loss": 1.1617, "step": 6899 }, { "epoch": 0.2699741763831286, "grad_norm": 0.0, "learning_rate": 1.7128849508654278e-05, "loss": 1.1238, "step": 6900 }, { "epoch": 0.27001330307535804, "grad_norm": 0.0, "learning_rate": 1.712796076630862e-05, "loss": 1.1407, "step": 6901 }, { "epoch": 0.2700524297675874, "grad_norm": 0.0, "learning_rate": 1.7127071909496844e-05, "loss": 1.0309, "step": 6902 }, { "epoch": 0.27009155645981686, "grad_norm": 0.0, "learning_rate": 1.7126182938233228e-05, "loss": 1.1661, "step": 6903 }, { "epoch": 0.2701306831520463, "grad_norm": 0.0, "learning_rate": 1.7125293852532035e-05, "loss": 1.0834, "step": 6904 }, { "epoch": 0.27016980984427574, "grad_norm": 0.0, "learning_rate": 1.712440465240756e-05, "loss": 1.0251, "step": 6905 }, { "epoch": 0.2702089365365052, "grad_norm": 0.0, "learning_rate": 1.712351533787407e-05, "loss": 1.1721, "step": 6906 }, { "epoch": 0.2702480632287346, "grad_norm": 0.0, "learning_rate": 1.7122625908945848e-05, "loss": 1.1094, "step": 6907 }, { "epoch": 0.27028718992096407, "grad_norm": 0.0, "learning_rate": 1.7121736365637182e-05, "loss": 1.2315, "step": 6908 }, { "epoch": 0.2703263166131935, "grad_norm": 0.0, "learning_rate": 1.7120846707962355e-05, "loss": 1.0275, "step": 6909 }, { "epoch": 0.27036544330542295, "grad_norm": 0.0, "learning_rate": 1.711995693593565e-05, "loss": 1.0752, "step": 6910 }, { "epoch": 0.2704045699976524, "grad_norm": 0.0, "learning_rate": 1.711906704957136e-05, "loss": 1.1559, "step": 6911 }, { "epoch": 0.27044369668988183, "grad_norm": 0.0, "learning_rate": 1.7118177048883774e-05, "loss": 1.2339, "step": 6912 }, { "epoch": 0.27048282338211127, "grad_norm": 0.0, "learning_rate": 1.7117286933887182e-05, "loss": 1.3002, "step": 6913 }, { "epoch": 0.2705219500743407, "grad_norm": 0.0, "learning_rate": 1.7116396704595883e-05, "loss": 1.0801, "step": 6914 }, { "epoch": 0.27056107676657015, "grad_norm": 0.0, "learning_rate": 1.711550636102417e-05, "loss": 1.0989, "step": 6915 }, { "epoch": 0.2706002034587996, "grad_norm": 0.0, "learning_rate": 1.711461590318634e-05, "loss": 0.9985, "step": 6916 }, { "epoch": 0.27063933015102903, "grad_norm": 0.0, "learning_rate": 1.7113725331096692e-05, "loss": 1.2605, "step": 6917 }, { "epoch": 0.2706784568432585, "grad_norm": 0.0, "learning_rate": 1.7112834644769533e-05, "loss": 1.1765, "step": 6918 }, { "epoch": 0.2707175835354879, "grad_norm": 0.0, "learning_rate": 1.711194384421916e-05, "loss": 1.2146, "step": 6919 }, { "epoch": 0.27075671022771736, "grad_norm": 0.0, "learning_rate": 1.7111052929459883e-05, "loss": 1.1969, "step": 6920 }, { "epoch": 0.2707958369199468, "grad_norm": 0.0, "learning_rate": 1.7110161900506003e-05, "loss": 1.0953, "step": 6921 }, { "epoch": 0.27083496361217624, "grad_norm": 0.0, "learning_rate": 1.7109270757371833e-05, "loss": 1.1827, "step": 6922 }, { "epoch": 0.2708740903044057, "grad_norm": 0.0, "learning_rate": 1.7108379500071687e-05, "loss": 1.1493, "step": 6923 }, { "epoch": 0.2709132169966351, "grad_norm": 0.0, "learning_rate": 1.7107488128619868e-05, "loss": 1.0132, "step": 6924 }, { "epoch": 0.27095234368886456, "grad_norm": 0.0, "learning_rate": 1.7106596643030702e-05, "loss": 1.2065, "step": 6925 }, { "epoch": 0.270991470381094, "grad_norm": 0.0, "learning_rate": 1.7105705043318493e-05, "loss": 1.1652, "step": 6926 }, { "epoch": 0.27103059707332344, "grad_norm": 0.0, "learning_rate": 1.710481332949757e-05, "loss": 1.039, "step": 6927 }, { "epoch": 0.2710697237655529, "grad_norm": 0.0, "learning_rate": 1.7103921501582243e-05, "loss": 1.2018, "step": 6928 }, { "epoch": 0.2711088504577823, "grad_norm": 0.0, "learning_rate": 1.7103029559586843e-05, "loss": 1.1505, "step": 6929 }, { "epoch": 0.27114797715001177, "grad_norm": 0.0, "learning_rate": 1.7102137503525686e-05, "loss": 1.0786, "step": 6930 }, { "epoch": 0.27118710384224115, "grad_norm": 0.0, "learning_rate": 1.7101245333413098e-05, "loss": 1.2097, "step": 6931 }, { "epoch": 0.2712262305344706, "grad_norm": 0.0, "learning_rate": 1.710035304926341e-05, "loss": 1.1417, "step": 6932 }, { "epoch": 0.27126535722670003, "grad_norm": 0.0, "learning_rate": 1.7099460651090952e-05, "loss": 1.1229, "step": 6933 }, { "epoch": 0.2713044839189295, "grad_norm": 0.0, "learning_rate": 1.709856813891005e-05, "loss": 1.1624, "step": 6934 }, { "epoch": 0.2713436106111589, "grad_norm": 0.0, "learning_rate": 1.7097675512735042e-05, "loss": 1.1736, "step": 6935 }, { "epoch": 0.27138273730338835, "grad_norm": 0.0, "learning_rate": 1.7096782772580255e-05, "loss": 0.9431, "step": 6936 }, { "epoch": 0.2714218639956178, "grad_norm": 0.0, "learning_rate": 1.709588991846003e-05, "loss": 1.0508, "step": 6937 }, { "epoch": 0.27146099068784724, "grad_norm": 0.0, "learning_rate": 1.7094996950388704e-05, "loss": 1.2195, "step": 6938 }, { "epoch": 0.2715001173800767, "grad_norm": 0.0, "learning_rate": 1.7094103868380618e-05, "loss": 1.2393, "step": 6939 }, { "epoch": 0.2715392440723061, "grad_norm": 0.0, "learning_rate": 1.7093210672450114e-05, "loss": 1.1552, "step": 6940 }, { "epoch": 0.27157837076453556, "grad_norm": 0.0, "learning_rate": 1.7092317362611537e-05, "loss": 1.2435, "step": 6941 }, { "epoch": 0.271617497456765, "grad_norm": 0.0, "learning_rate": 1.7091423938879227e-05, "loss": 1.1109, "step": 6942 }, { "epoch": 0.27165662414899444, "grad_norm": 0.0, "learning_rate": 1.7090530401267534e-05, "loss": 1.1354, "step": 6943 }, { "epoch": 0.2716957508412239, "grad_norm": 0.0, "learning_rate": 1.7089636749790812e-05, "loss": 1.1753, "step": 6944 }, { "epoch": 0.2717348775334533, "grad_norm": 0.0, "learning_rate": 1.7088742984463405e-05, "loss": 1.0514, "step": 6945 }, { "epoch": 0.27177400422568276, "grad_norm": 0.0, "learning_rate": 1.708784910529967e-05, "loss": 1.1071, "step": 6946 }, { "epoch": 0.2718131309179122, "grad_norm": 0.0, "learning_rate": 1.7086955112313958e-05, "loss": 1.067, "step": 6947 }, { "epoch": 0.27185225761014165, "grad_norm": 0.0, "learning_rate": 1.7086061005520628e-05, "loss": 1.1378, "step": 6948 }, { "epoch": 0.2718913843023711, "grad_norm": 0.0, "learning_rate": 1.708516678493404e-05, "loss": 1.1965, "step": 6949 }, { "epoch": 0.2719305109946005, "grad_norm": 0.0, "learning_rate": 1.7084272450568543e-05, "loss": 1.077, "step": 6950 }, { "epoch": 0.27196963768682997, "grad_norm": 0.0, "learning_rate": 1.7083378002438516e-05, "loss": 1.2202, "step": 6951 }, { "epoch": 0.2720087643790594, "grad_norm": 0.0, "learning_rate": 1.7082483440558314e-05, "loss": 1.1353, "step": 6952 }, { "epoch": 0.27204789107128885, "grad_norm": 0.0, "learning_rate": 1.7081588764942298e-05, "loss": 1.1027, "step": 6953 }, { "epoch": 0.2720870177635183, "grad_norm": 0.0, "learning_rate": 1.7080693975604842e-05, "loss": 1.1512, "step": 6954 }, { "epoch": 0.27212614445574773, "grad_norm": 0.0, "learning_rate": 1.7079799072560318e-05, "loss": 1.0848, "step": 6955 }, { "epoch": 0.2721652711479772, "grad_norm": 0.0, "learning_rate": 1.7078904055823087e-05, "loss": 1.2422, "step": 6956 }, { "epoch": 0.2722043978402066, "grad_norm": 0.0, "learning_rate": 1.7078008925407527e-05, "loss": 1.1702, "step": 6957 }, { "epoch": 0.27224352453243605, "grad_norm": 0.0, "learning_rate": 1.7077113681328016e-05, "loss": 1.2685, "step": 6958 }, { "epoch": 0.27228265122466544, "grad_norm": 0.0, "learning_rate": 1.7076218323598926e-05, "loss": 1.1698, "step": 6959 }, { "epoch": 0.2723217779168949, "grad_norm": 0.0, "learning_rate": 1.7075322852234637e-05, "loss": 1.2643, "step": 6960 }, { "epoch": 0.2723609046091243, "grad_norm": 0.0, "learning_rate": 1.7074427267249528e-05, "loss": 1.1145, "step": 6961 }, { "epoch": 0.27240003130135376, "grad_norm": 0.0, "learning_rate": 1.707353156865798e-05, "loss": 1.1112, "step": 6962 }, { "epoch": 0.2724391579935832, "grad_norm": 0.0, "learning_rate": 1.7072635756474384e-05, "loss": 1.1048, "step": 6963 }, { "epoch": 0.27247828468581264, "grad_norm": 0.0, "learning_rate": 1.7071739830713117e-05, "loss": 1.1045, "step": 6964 }, { "epoch": 0.2725174113780421, "grad_norm": 0.0, "learning_rate": 1.7070843791388568e-05, "loss": 1.1449, "step": 6965 }, { "epoch": 0.2725565380702715, "grad_norm": 0.0, "learning_rate": 1.7069947638515132e-05, "loss": 1.0226, "step": 6966 }, { "epoch": 0.27259566476250097, "grad_norm": 0.0, "learning_rate": 1.7069051372107193e-05, "loss": 1.0793, "step": 6967 }, { "epoch": 0.2726347914547304, "grad_norm": 0.0, "learning_rate": 1.706815499217915e-05, "loss": 1.1205, "step": 6968 }, { "epoch": 0.27267391814695985, "grad_norm": 0.0, "learning_rate": 1.7067258498745393e-05, "loss": 1.2505, "step": 6969 }, { "epoch": 0.2727130448391893, "grad_norm": 0.0, "learning_rate": 1.706636189182032e-05, "loss": 1.1732, "step": 6970 }, { "epoch": 0.27275217153141873, "grad_norm": 0.0, "learning_rate": 1.706546517141833e-05, "loss": 1.0059, "step": 6971 }, { "epoch": 0.27279129822364817, "grad_norm": 0.0, "learning_rate": 1.706456833755382e-05, "loss": 1.0602, "step": 6972 }, { "epoch": 0.2728304249158776, "grad_norm": 0.0, "learning_rate": 1.70636713902412e-05, "loss": 1.127, "step": 6973 }, { "epoch": 0.27286955160810705, "grad_norm": 0.0, "learning_rate": 1.7062774329494865e-05, "loss": 1.1792, "step": 6974 }, { "epoch": 0.2729086783003365, "grad_norm": 0.0, "learning_rate": 1.7061877155329224e-05, "loss": 1.1074, "step": 6975 }, { "epoch": 0.27294780499256593, "grad_norm": 0.0, "learning_rate": 1.7060979867758685e-05, "loss": 1.1655, "step": 6976 }, { "epoch": 0.2729869316847954, "grad_norm": 0.0, "learning_rate": 1.7060082466797662e-05, "loss": 1.0202, "step": 6977 }, { "epoch": 0.2730260583770248, "grad_norm": 0.0, "learning_rate": 1.705918495246056e-05, "loss": 1.1165, "step": 6978 }, { "epoch": 0.27306518506925426, "grad_norm": 0.0, "learning_rate": 1.705828732476179e-05, "loss": 0.9697, "step": 6979 }, { "epoch": 0.2731043117614837, "grad_norm": 0.0, "learning_rate": 1.705738958371577e-05, "loss": 1.133, "step": 6980 }, { "epoch": 0.27314343845371314, "grad_norm": 0.0, "learning_rate": 1.7056491729336917e-05, "loss": 1.0978, "step": 6981 }, { "epoch": 0.2731825651459426, "grad_norm": 0.0, "learning_rate": 1.7055593761639653e-05, "loss": 1.1201, "step": 6982 }, { "epoch": 0.273221691838172, "grad_norm": 0.0, "learning_rate": 1.705469568063839e-05, "loss": 1.1237, "step": 6983 }, { "epoch": 0.27326081853040146, "grad_norm": 0.0, "learning_rate": 1.705379748634756e-05, "loss": 1.0771, "step": 6984 }, { "epoch": 0.2732999452226309, "grad_norm": 0.0, "learning_rate": 1.7052899178781575e-05, "loss": 1.1635, "step": 6985 }, { "epoch": 0.27333907191486034, "grad_norm": 0.0, "learning_rate": 1.705200075795487e-05, "loss": 1.1343, "step": 6986 }, { "epoch": 0.2733781986070898, "grad_norm": 0.0, "learning_rate": 1.705110222388187e-05, "loss": 1.0273, "step": 6987 }, { "epoch": 0.27341732529931917, "grad_norm": 0.0, "learning_rate": 1.7050203576577e-05, "loss": 1.0114, "step": 6988 }, { "epoch": 0.2734564519915486, "grad_norm": 0.0, "learning_rate": 1.70493048160547e-05, "loss": 1.1075, "step": 6989 }, { "epoch": 0.27349557868377805, "grad_norm": 0.0, "learning_rate": 1.7048405942329393e-05, "loss": 1.1074, "step": 6990 }, { "epoch": 0.2735347053760075, "grad_norm": 0.0, "learning_rate": 1.704750695541552e-05, "loss": 1.1593, "step": 6991 }, { "epoch": 0.27357383206823693, "grad_norm": 0.0, "learning_rate": 1.704660785532752e-05, "loss": 1.0842, "step": 6992 }, { "epoch": 0.2736129587604664, "grad_norm": 0.0, "learning_rate": 1.7045708642079824e-05, "loss": 0.9985, "step": 6993 }, { "epoch": 0.2736520854526958, "grad_norm": 0.0, "learning_rate": 1.704480931568688e-05, "loss": 1.2453, "step": 6994 }, { "epoch": 0.27369121214492526, "grad_norm": 0.0, "learning_rate": 1.704390987616312e-05, "loss": 1.051, "step": 6995 }, { "epoch": 0.2737303388371547, "grad_norm": 0.0, "learning_rate": 1.7043010323522998e-05, "loss": 1.3528, "step": 6996 }, { "epoch": 0.27376946552938414, "grad_norm": 0.0, "learning_rate": 1.7042110657780953e-05, "loss": 1.1677, "step": 6997 }, { "epoch": 0.2738085922216136, "grad_norm": 0.0, "learning_rate": 1.704121087895144e-05, "loss": 1.1386, "step": 6998 }, { "epoch": 0.273847718913843, "grad_norm": 0.0, "learning_rate": 1.7040310987048897e-05, "loss": 1.1561, "step": 6999 }, { "epoch": 0.27388684560607246, "grad_norm": 0.0, "learning_rate": 1.7039410982087786e-05, "loss": 1.1029, "step": 7000 }, { "epoch": 0.2739259722983019, "grad_norm": 0.0, "learning_rate": 1.7038510864082555e-05, "loss": 1.2509, "step": 7001 }, { "epoch": 0.27396509899053134, "grad_norm": 0.0, "learning_rate": 1.703761063304766e-05, "loss": 1.1747, "step": 7002 }, { "epoch": 0.2740042256827608, "grad_norm": 0.0, "learning_rate": 1.7036710288997555e-05, "loss": 1.2322, "step": 7003 }, { "epoch": 0.2740433523749902, "grad_norm": 0.0, "learning_rate": 1.70358098319467e-05, "loss": 1.1766, "step": 7004 }, { "epoch": 0.27408247906721966, "grad_norm": 0.0, "learning_rate": 1.7034909261909556e-05, "loss": 1.1671, "step": 7005 }, { "epoch": 0.2741216057594491, "grad_norm": 0.0, "learning_rate": 1.7034008578900584e-05, "loss": 1.1711, "step": 7006 }, { "epoch": 0.27416073245167855, "grad_norm": 0.0, "learning_rate": 1.703310778293425e-05, "loss": 1.0688, "step": 7007 }, { "epoch": 0.274199859143908, "grad_norm": 0.0, "learning_rate": 1.7032206874025017e-05, "loss": 1.0698, "step": 7008 }, { "epoch": 0.2742389858361374, "grad_norm": 0.0, "learning_rate": 1.703130585218735e-05, "loss": 1.0759, "step": 7009 }, { "epoch": 0.27427811252836687, "grad_norm": 0.0, "learning_rate": 1.703040471743573e-05, "loss": 1.0925, "step": 7010 }, { "epoch": 0.2743172392205963, "grad_norm": 0.0, "learning_rate": 1.7029503469784613e-05, "loss": 1.176, "step": 7011 }, { "epoch": 0.27435636591282575, "grad_norm": 0.0, "learning_rate": 1.7028602109248484e-05, "loss": 1.0603, "step": 7012 }, { "epoch": 0.2743954926050552, "grad_norm": 0.0, "learning_rate": 1.702770063584181e-05, "loss": 1.0674, "step": 7013 }, { "epoch": 0.27443461929728463, "grad_norm": 0.0, "learning_rate": 1.7026799049579063e-05, "loss": 1.1623, "step": 7014 }, { "epoch": 0.2744737459895141, "grad_norm": 0.0, "learning_rate": 1.702589735047474e-05, "loss": 1.0362, "step": 7015 }, { "epoch": 0.27451287268174346, "grad_norm": 0.0, "learning_rate": 1.70249955385433e-05, "loss": 1.1394, "step": 7016 }, { "epoch": 0.2745519993739729, "grad_norm": 0.0, "learning_rate": 1.702409361379924e-05, "loss": 1.0408, "step": 7017 }, { "epoch": 0.27459112606620234, "grad_norm": 0.0, "learning_rate": 1.7023191576257038e-05, "loss": 0.9655, "step": 7018 }, { "epoch": 0.2746302527584318, "grad_norm": 0.0, "learning_rate": 1.7022289425931176e-05, "loss": 1.13, "step": 7019 }, { "epoch": 0.2746693794506612, "grad_norm": 0.0, "learning_rate": 1.702138716283615e-05, "loss": 1.0302, "step": 7020 }, { "epoch": 0.27470850614289066, "grad_norm": 0.0, "learning_rate": 1.702048478698644e-05, "loss": 1.1376, "step": 7021 }, { "epoch": 0.2747476328351201, "grad_norm": 0.0, "learning_rate": 1.7019582298396544e-05, "loss": 1.0327, "step": 7022 }, { "epoch": 0.27478675952734954, "grad_norm": 0.0, "learning_rate": 1.7018679697080952e-05, "loss": 1.0997, "step": 7023 }, { "epoch": 0.274825886219579, "grad_norm": 0.0, "learning_rate": 1.701777698305416e-05, "loss": 1.1218, "step": 7024 }, { "epoch": 0.2748650129118084, "grad_norm": 0.0, "learning_rate": 1.701687415633066e-05, "loss": 1.1749, "step": 7025 }, { "epoch": 0.27490413960403787, "grad_norm": 0.0, "learning_rate": 1.7015971216924957e-05, "loss": 1.0305, "step": 7026 }, { "epoch": 0.2749432662962673, "grad_norm": 0.0, "learning_rate": 1.701506816485155e-05, "loss": 1.125, "step": 7027 }, { "epoch": 0.27498239298849675, "grad_norm": 0.0, "learning_rate": 1.7014165000124932e-05, "loss": 1.2236, "step": 7028 }, { "epoch": 0.2750215196807262, "grad_norm": 0.0, "learning_rate": 1.701326172275962e-05, "loss": 1.2162, "step": 7029 }, { "epoch": 0.27506064637295563, "grad_norm": 0.0, "learning_rate": 1.7012358332770105e-05, "loss": 1.1615, "step": 7030 }, { "epoch": 0.27509977306518507, "grad_norm": 0.0, "learning_rate": 1.701145483017091e-05, "loss": 1.1685, "step": 7031 }, { "epoch": 0.2751388997574145, "grad_norm": 0.0, "learning_rate": 1.701055121497653e-05, "loss": 1.2342, "step": 7032 }, { "epoch": 0.27517802644964395, "grad_norm": 0.0, "learning_rate": 1.7009647487201492e-05, "loss": 1.1956, "step": 7033 }, { "epoch": 0.2752171531418734, "grad_norm": 0.0, "learning_rate": 1.7008743646860288e-05, "loss": 1.1354, "step": 7034 }, { "epoch": 0.27525627983410283, "grad_norm": 0.0, "learning_rate": 1.7007839693967446e-05, "loss": 1.1672, "step": 7035 }, { "epoch": 0.2752954065263323, "grad_norm": 0.0, "learning_rate": 1.7006935628537485e-05, "loss": 1.1076, "step": 7036 }, { "epoch": 0.2753345332185617, "grad_norm": 0.0, "learning_rate": 1.7006031450584913e-05, "loss": 1.1104, "step": 7037 }, { "epoch": 0.27537365991079116, "grad_norm": 0.0, "learning_rate": 1.700512716012426e-05, "loss": 1.1495, "step": 7038 }, { "epoch": 0.2754127866030206, "grad_norm": 0.0, "learning_rate": 1.700422275717004e-05, "loss": 1.0091, "step": 7039 }, { "epoch": 0.27545191329525004, "grad_norm": 0.0, "learning_rate": 1.7003318241736775e-05, "loss": 1.13, "step": 7040 }, { "epoch": 0.2754910399874795, "grad_norm": 0.0, "learning_rate": 1.7002413613838997e-05, "loss": 1.1443, "step": 7041 }, { "epoch": 0.2755301666797089, "grad_norm": 0.0, "learning_rate": 1.7001508873491236e-05, "loss": 1.1266, "step": 7042 }, { "epoch": 0.27556929337193836, "grad_norm": 0.0, "learning_rate": 1.700060402070801e-05, "loss": 1.2426, "step": 7043 }, { "epoch": 0.2756084200641678, "grad_norm": 0.0, "learning_rate": 1.6999699055503856e-05, "loss": 1.1136, "step": 7044 }, { "epoch": 0.2756475467563972, "grad_norm": 0.0, "learning_rate": 1.6998793977893312e-05, "loss": 1.0602, "step": 7045 }, { "epoch": 0.27568667344862663, "grad_norm": 0.0, "learning_rate": 1.69978887878909e-05, "loss": 1.1055, "step": 7046 }, { "epoch": 0.27572580014085607, "grad_norm": 0.0, "learning_rate": 1.6996983485511164e-05, "loss": 1.166, "step": 7047 }, { "epoch": 0.2757649268330855, "grad_norm": 0.0, "learning_rate": 1.6996078070768642e-05, "loss": 1.2081, "step": 7048 }, { "epoch": 0.27580405352531495, "grad_norm": 0.0, "learning_rate": 1.6995172543677875e-05, "loss": 1.0446, "step": 7049 }, { "epoch": 0.2758431802175444, "grad_norm": 0.0, "learning_rate": 1.69942669042534e-05, "loss": 1.0399, "step": 7050 }, { "epoch": 0.27588230690977383, "grad_norm": 0.0, "learning_rate": 1.6993361152509762e-05, "loss": 1.0946, "step": 7051 }, { "epoch": 0.2759214336020033, "grad_norm": 0.0, "learning_rate": 1.699245528846151e-05, "loss": 1.1807, "step": 7052 }, { "epoch": 0.2759605602942327, "grad_norm": 0.0, "learning_rate": 1.6991549312123187e-05, "loss": 1.1884, "step": 7053 }, { "epoch": 0.27599968698646216, "grad_norm": 0.0, "learning_rate": 1.6990643223509342e-05, "loss": 1.2018, "step": 7054 }, { "epoch": 0.2760388136786916, "grad_norm": 0.0, "learning_rate": 1.698973702263453e-05, "loss": 1.0782, "step": 7055 }, { "epoch": 0.27607794037092104, "grad_norm": 0.0, "learning_rate": 1.6988830709513294e-05, "loss": 0.9938, "step": 7056 }, { "epoch": 0.2761170670631505, "grad_norm": 0.0, "learning_rate": 1.6987924284160197e-05, "loss": 1.0971, "step": 7057 }, { "epoch": 0.2761561937553799, "grad_norm": 0.0, "learning_rate": 1.6987017746589797e-05, "loss": 1.0402, "step": 7058 }, { "epoch": 0.27619532044760936, "grad_norm": 0.0, "learning_rate": 1.698611109681664e-05, "loss": 1.1567, "step": 7059 }, { "epoch": 0.2762344471398388, "grad_norm": 0.0, "learning_rate": 1.6985204334855298e-05, "loss": 1.1089, "step": 7060 }, { "epoch": 0.27627357383206824, "grad_norm": 0.0, "learning_rate": 1.6984297460720323e-05, "loss": 1.1685, "step": 7061 }, { "epoch": 0.2763127005242977, "grad_norm": 0.0, "learning_rate": 1.6983390474426284e-05, "loss": 1.1534, "step": 7062 }, { "epoch": 0.2763518272165271, "grad_norm": 0.0, "learning_rate": 1.6982483375987746e-05, "loss": 1.0626, "step": 7063 }, { "epoch": 0.27639095390875656, "grad_norm": 0.0, "learning_rate": 1.6981576165419275e-05, "loss": 0.9927, "step": 7064 }, { "epoch": 0.276430080600986, "grad_norm": 0.0, "learning_rate": 1.6980668842735438e-05, "loss": 0.9493, "step": 7065 }, { "epoch": 0.27646920729321545, "grad_norm": 0.0, "learning_rate": 1.6979761407950806e-05, "loss": 1.2271, "step": 7066 }, { "epoch": 0.2765083339854449, "grad_norm": 0.0, "learning_rate": 1.6978853861079954e-05, "loss": 1.1474, "step": 7067 }, { "epoch": 0.27654746067767433, "grad_norm": 0.0, "learning_rate": 1.697794620213745e-05, "loss": 1.1251, "step": 7068 }, { "epoch": 0.27658658736990377, "grad_norm": 0.0, "learning_rate": 1.697703843113788e-05, "loss": 1.2307, "step": 7069 }, { "epoch": 0.2766257140621332, "grad_norm": 0.0, "learning_rate": 1.697613054809581e-05, "loss": 1.1639, "step": 7070 }, { "epoch": 0.27666484075436265, "grad_norm": 0.0, "learning_rate": 1.697522255302583e-05, "loss": 1.1524, "step": 7071 }, { "epoch": 0.2767039674465921, "grad_norm": 0.0, "learning_rate": 1.6974314445942514e-05, "loss": 1.1664, "step": 7072 }, { "epoch": 0.2767430941388215, "grad_norm": 0.0, "learning_rate": 1.6973406226860444e-05, "loss": 1.077, "step": 7073 }, { "epoch": 0.2767822208310509, "grad_norm": 0.0, "learning_rate": 1.697249789579421e-05, "loss": 1.0295, "step": 7074 }, { "epoch": 0.27682134752328036, "grad_norm": 0.0, "learning_rate": 1.6971589452758397e-05, "loss": 1.0681, "step": 7075 }, { "epoch": 0.2768604742155098, "grad_norm": 0.0, "learning_rate": 1.6970680897767597e-05, "loss": 1.09, "step": 7076 }, { "epoch": 0.27689960090773924, "grad_norm": 0.0, "learning_rate": 1.696977223083639e-05, "loss": 1.0657, "step": 7077 }, { "epoch": 0.2769387275999687, "grad_norm": 0.0, "learning_rate": 1.696886345197938e-05, "loss": 1.0466, "step": 7078 }, { "epoch": 0.2769778542921981, "grad_norm": 0.0, "learning_rate": 1.6967954561211154e-05, "loss": 1.163, "step": 7079 }, { "epoch": 0.27701698098442756, "grad_norm": 0.0, "learning_rate": 1.696704555854631e-05, "loss": 1.1243, "step": 7080 }, { "epoch": 0.277056107676657, "grad_norm": 0.0, "learning_rate": 1.696613644399944e-05, "loss": 1.0965, "step": 7081 }, { "epoch": 0.27709523436888644, "grad_norm": 0.0, "learning_rate": 1.696522721758515e-05, "loss": 1.1109, "step": 7082 }, { "epoch": 0.2771343610611159, "grad_norm": 0.0, "learning_rate": 1.696431787931804e-05, "loss": 1.0919, "step": 7083 }, { "epoch": 0.2771734877533453, "grad_norm": 0.0, "learning_rate": 1.6963408429212712e-05, "loss": 1.2177, "step": 7084 }, { "epoch": 0.27721261444557477, "grad_norm": 0.0, "learning_rate": 1.696249886728377e-05, "loss": 1.0958, "step": 7085 }, { "epoch": 0.2772517411378042, "grad_norm": 0.0, "learning_rate": 1.696158919354582e-05, "loss": 1.1142, "step": 7086 }, { "epoch": 0.27729086783003365, "grad_norm": 0.0, "learning_rate": 1.6960679408013475e-05, "loss": 1.0965, "step": 7087 }, { "epoch": 0.2773299945222631, "grad_norm": 0.0, "learning_rate": 1.6959769510701333e-05, "loss": 1.1415, "step": 7088 }, { "epoch": 0.27736912121449253, "grad_norm": 0.0, "learning_rate": 1.695885950162402e-05, "loss": 1.1531, "step": 7089 }, { "epoch": 0.27740824790672197, "grad_norm": 0.0, "learning_rate": 1.695794938079614e-05, "loss": 1.1714, "step": 7090 }, { "epoch": 0.2774473745989514, "grad_norm": 0.0, "learning_rate": 1.6957039148232315e-05, "loss": 1.15, "step": 7091 }, { "epoch": 0.27748650129118085, "grad_norm": 0.0, "learning_rate": 1.6956128803947155e-05, "loss": 1.0163, "step": 7092 }, { "epoch": 0.2775256279834103, "grad_norm": 0.0, "learning_rate": 1.6955218347955286e-05, "loss": 1.1738, "step": 7093 }, { "epoch": 0.27756475467563974, "grad_norm": 0.0, "learning_rate": 1.6954307780271325e-05, "loss": 1.104, "step": 7094 }, { "epoch": 0.2776038813678692, "grad_norm": 0.0, "learning_rate": 1.6953397100909896e-05, "loss": 1.2696, "step": 7095 }, { "epoch": 0.2776430080600986, "grad_norm": 0.0, "learning_rate": 1.6952486309885617e-05, "loss": 1.0852, "step": 7096 }, { "epoch": 0.27768213475232806, "grad_norm": 0.0, "learning_rate": 1.6951575407213126e-05, "loss": 1.1553, "step": 7097 }, { "epoch": 0.2777212614445575, "grad_norm": 0.0, "learning_rate": 1.6950664392907042e-05, "loss": 1.0663, "step": 7098 }, { "epoch": 0.27776038813678694, "grad_norm": 0.0, "learning_rate": 1.6949753266982e-05, "loss": 1.1503, "step": 7099 }, { "epoch": 0.2777995148290164, "grad_norm": 0.0, "learning_rate": 1.6948842029452627e-05, "loss": 1.0057, "step": 7100 }, { "epoch": 0.2778386415212458, "grad_norm": 0.0, "learning_rate": 1.6947930680333556e-05, "loss": 1.1348, "step": 7101 }, { "epoch": 0.2778777682134752, "grad_norm": 0.0, "learning_rate": 1.6947019219639432e-05, "loss": 1.1273, "step": 7102 }, { "epoch": 0.27791689490570465, "grad_norm": 0.0, "learning_rate": 1.694610764738488e-05, "loss": 1.176, "step": 7103 }, { "epoch": 0.2779560215979341, "grad_norm": 0.0, "learning_rate": 1.6945195963584543e-05, "loss": 1.1523, "step": 7104 }, { "epoch": 0.27799514829016353, "grad_norm": 0.0, "learning_rate": 1.6944284168253062e-05, "loss": 1.0909, "step": 7105 }, { "epoch": 0.27803427498239297, "grad_norm": 0.0, "learning_rate": 1.694337226140508e-05, "loss": 1.1245, "step": 7106 }, { "epoch": 0.2780734016746224, "grad_norm": 0.0, "learning_rate": 1.6942460243055237e-05, "loss": 1.0918, "step": 7107 }, { "epoch": 0.27811252836685185, "grad_norm": 0.0, "learning_rate": 1.694154811321818e-05, "loss": 1.1862, "step": 7108 }, { "epoch": 0.2781516550590813, "grad_norm": 0.0, "learning_rate": 1.6940635871908564e-05, "loss": 1.0662, "step": 7109 }, { "epoch": 0.27819078175131073, "grad_norm": 0.0, "learning_rate": 1.693972351914103e-05, "loss": 1.0626, "step": 7110 }, { "epoch": 0.2782299084435402, "grad_norm": 0.0, "learning_rate": 1.6938811054930237e-05, "loss": 1.1416, "step": 7111 }, { "epoch": 0.2782690351357696, "grad_norm": 0.0, "learning_rate": 1.6937898479290826e-05, "loss": 1.091, "step": 7112 }, { "epoch": 0.27830816182799906, "grad_norm": 0.0, "learning_rate": 1.6936985792237464e-05, "loss": 1.1155, "step": 7113 }, { "epoch": 0.2783472885202285, "grad_norm": 0.0, "learning_rate": 1.6936072993784802e-05, "loss": 1.1124, "step": 7114 }, { "epoch": 0.27838641521245794, "grad_norm": 0.0, "learning_rate": 1.6935160083947498e-05, "loss": 1.2565, "step": 7115 }, { "epoch": 0.2784255419046874, "grad_norm": 0.0, "learning_rate": 1.6934247062740215e-05, "loss": 1.1502, "step": 7116 }, { "epoch": 0.2784646685969168, "grad_norm": 0.0, "learning_rate": 1.6933333930177613e-05, "loss": 1.2311, "step": 7117 }, { "epoch": 0.27850379528914626, "grad_norm": 0.0, "learning_rate": 1.6932420686274353e-05, "loss": 1.1657, "step": 7118 }, { "epoch": 0.2785429219813757, "grad_norm": 0.0, "learning_rate": 1.693150733104511e-05, "loss": 1.1926, "step": 7119 }, { "epoch": 0.27858204867360514, "grad_norm": 0.0, "learning_rate": 1.6930593864504538e-05, "loss": 0.9451, "step": 7120 }, { "epoch": 0.2786211753658346, "grad_norm": 0.0, "learning_rate": 1.6929680286667313e-05, "loss": 1.2469, "step": 7121 }, { "epoch": 0.278660302058064, "grad_norm": 0.0, "learning_rate": 1.692876659754811e-05, "loss": 1.1003, "step": 7122 }, { "epoch": 0.27869942875029347, "grad_norm": 0.0, "learning_rate": 1.69278527971616e-05, "loss": 1.0416, "step": 7123 }, { "epoch": 0.2787385554425229, "grad_norm": 0.0, "learning_rate": 1.692693888552245e-05, "loss": 1.1392, "step": 7124 }, { "epoch": 0.27877768213475235, "grad_norm": 0.0, "learning_rate": 1.692602486264534e-05, "loss": 1.1405, "step": 7125 }, { "epoch": 0.2788168088269818, "grad_norm": 0.0, "learning_rate": 1.6925110728544953e-05, "loss": 0.9554, "step": 7126 }, { "epoch": 0.27885593551921123, "grad_norm": 0.0, "learning_rate": 1.6924196483235968e-05, "loss": 1.0675, "step": 7127 }, { "epoch": 0.27889506221144067, "grad_norm": 0.0, "learning_rate": 1.6923282126733058e-05, "loss": 1.0405, "step": 7128 }, { "epoch": 0.2789341889036701, "grad_norm": 0.0, "learning_rate": 1.6922367659050914e-05, "loss": 1.1296, "step": 7129 }, { "epoch": 0.2789733155958995, "grad_norm": 0.0, "learning_rate": 1.6921453080204224e-05, "loss": 1.1603, "step": 7130 }, { "epoch": 0.27901244228812894, "grad_norm": 0.0, "learning_rate": 1.6920538390207664e-05, "loss": 1.1612, "step": 7131 }, { "epoch": 0.2790515689803584, "grad_norm": 0.0, "learning_rate": 1.6919623589075934e-05, "loss": 1.2462, "step": 7132 }, { "epoch": 0.2790906956725878, "grad_norm": 0.0, "learning_rate": 1.691870867682372e-05, "loss": 1.0887, "step": 7133 }, { "epoch": 0.27912982236481726, "grad_norm": 0.0, "learning_rate": 1.6917793653465712e-05, "loss": 1.0852, "step": 7134 }, { "epoch": 0.2791689490570467, "grad_norm": 0.0, "learning_rate": 1.691687851901661e-05, "loss": 0.9153, "step": 7135 }, { "epoch": 0.27920807574927614, "grad_norm": 0.0, "learning_rate": 1.6915963273491103e-05, "loss": 1.1937, "step": 7136 }, { "epoch": 0.2792472024415056, "grad_norm": 0.0, "learning_rate": 1.691504791690389e-05, "loss": 1.2247, "step": 7137 }, { "epoch": 0.279286329133735, "grad_norm": 0.0, "learning_rate": 1.6914132449269676e-05, "loss": 1.1797, "step": 7138 }, { "epoch": 0.27932545582596446, "grad_norm": 0.0, "learning_rate": 1.6913216870603157e-05, "loss": 1.2275, "step": 7139 }, { "epoch": 0.2793645825181939, "grad_norm": 0.0, "learning_rate": 1.691230118091904e-05, "loss": 1.2075, "step": 7140 }, { "epoch": 0.27940370921042335, "grad_norm": 0.0, "learning_rate": 1.6911385380232027e-05, "loss": 1.097, "step": 7141 }, { "epoch": 0.2794428359026528, "grad_norm": 0.0, "learning_rate": 1.6910469468556826e-05, "loss": 1.167, "step": 7142 }, { "epoch": 0.2794819625948822, "grad_norm": 0.0, "learning_rate": 1.690955344590814e-05, "loss": 1.1405, "step": 7143 }, { "epoch": 0.27952108928711167, "grad_norm": 0.0, "learning_rate": 1.690863731230069e-05, "loss": 1.1369, "step": 7144 }, { "epoch": 0.2795602159793411, "grad_norm": 0.0, "learning_rate": 1.690772106774918e-05, "loss": 1.054, "step": 7145 }, { "epoch": 0.27959934267157055, "grad_norm": 0.0, "learning_rate": 1.690680471226832e-05, "loss": 1.1249, "step": 7146 }, { "epoch": 0.2796384693638, "grad_norm": 0.0, "learning_rate": 1.690588824587284e-05, "loss": 1.116, "step": 7147 }, { "epoch": 0.27967759605602943, "grad_norm": 0.0, "learning_rate": 1.690497166857744e-05, "loss": 1.1163, "step": 7148 }, { "epoch": 0.2797167227482589, "grad_norm": 0.0, "learning_rate": 1.6904054980396852e-05, "loss": 1.1267, "step": 7149 }, { "epoch": 0.2797558494404883, "grad_norm": 0.0, "learning_rate": 1.690313818134579e-05, "loss": 1.1439, "step": 7150 }, { "epoch": 0.27979497613271775, "grad_norm": 0.0, "learning_rate": 1.6902221271438982e-05, "loss": 1.0449, "step": 7151 }, { "epoch": 0.2798341028249472, "grad_norm": 0.0, "learning_rate": 1.690130425069115e-05, "loss": 1.166, "step": 7152 }, { "epoch": 0.27987322951717664, "grad_norm": 0.0, "learning_rate": 1.6900387119117013e-05, "loss": 1.1402, "step": 7153 }, { "epoch": 0.2799123562094061, "grad_norm": 0.0, "learning_rate": 1.6899469876731313e-05, "loss": 1.1492, "step": 7154 }, { "epoch": 0.2799514829016355, "grad_norm": 0.0, "learning_rate": 1.6898552523548767e-05, "loss": 1.203, "step": 7155 }, { "epoch": 0.27999060959386496, "grad_norm": 0.0, "learning_rate": 1.6897635059584114e-05, "loss": 1.043, "step": 7156 }, { "epoch": 0.2800297362860944, "grad_norm": 0.0, "learning_rate": 1.6896717484852084e-05, "loss": 1.3303, "step": 7157 }, { "epoch": 0.2800688629783238, "grad_norm": 0.0, "learning_rate": 1.6895799799367417e-05, "loss": 1.025, "step": 7158 }, { "epoch": 0.2801079896705532, "grad_norm": 0.0, "learning_rate": 1.689488200314484e-05, "loss": 1.1763, "step": 7159 }, { "epoch": 0.28014711636278267, "grad_norm": 0.0, "learning_rate": 1.6893964096199103e-05, "loss": 1.1831, "step": 7160 }, { "epoch": 0.2801862430550121, "grad_norm": 0.0, "learning_rate": 1.689304607854494e-05, "loss": 1.1263, "step": 7161 }, { "epoch": 0.28022536974724155, "grad_norm": 0.0, "learning_rate": 1.6892127950197092e-05, "loss": 1.1396, "step": 7162 }, { "epoch": 0.280264496439471, "grad_norm": 0.0, "learning_rate": 1.689120971117031e-05, "loss": 1.1705, "step": 7163 }, { "epoch": 0.28030362313170043, "grad_norm": 0.0, "learning_rate": 1.6890291361479332e-05, "loss": 1.0513, "step": 7164 }, { "epoch": 0.28034274982392987, "grad_norm": 0.0, "learning_rate": 1.688937290113891e-05, "loss": 1.0646, "step": 7165 }, { "epoch": 0.2803818765161593, "grad_norm": 0.0, "learning_rate": 1.688845433016379e-05, "loss": 1.1937, "step": 7166 }, { "epoch": 0.28042100320838875, "grad_norm": 0.0, "learning_rate": 1.688753564856873e-05, "loss": 1.121, "step": 7167 }, { "epoch": 0.2804601299006182, "grad_norm": 0.0, "learning_rate": 1.6886616856368472e-05, "loss": 1.1534, "step": 7168 }, { "epoch": 0.28049925659284763, "grad_norm": 0.0, "learning_rate": 1.688569795357778e-05, "loss": 1.2186, "step": 7169 }, { "epoch": 0.2805383832850771, "grad_norm": 0.0, "learning_rate": 1.6884778940211408e-05, "loss": 1.1593, "step": 7170 }, { "epoch": 0.2805775099773065, "grad_norm": 0.0, "learning_rate": 1.688385981628411e-05, "loss": 1.2131, "step": 7171 }, { "epoch": 0.28061663666953596, "grad_norm": 0.0, "learning_rate": 1.6882940581810655e-05, "loss": 1.1591, "step": 7172 }, { "epoch": 0.2806557633617654, "grad_norm": 0.0, "learning_rate": 1.6882021236805793e-05, "loss": 1.1956, "step": 7173 }, { "epoch": 0.28069489005399484, "grad_norm": 0.0, "learning_rate": 1.6881101781284294e-05, "loss": 1.2729, "step": 7174 }, { "epoch": 0.2807340167462243, "grad_norm": 0.0, "learning_rate": 1.6880182215260924e-05, "loss": 1.1787, "step": 7175 }, { "epoch": 0.2807731434384537, "grad_norm": 0.0, "learning_rate": 1.6879262538750453e-05, "loss": 1.1138, "step": 7176 }, { "epoch": 0.28081227013068316, "grad_norm": 0.0, "learning_rate": 1.6878342751767642e-05, "loss": 1.005, "step": 7177 }, { "epoch": 0.2808513968229126, "grad_norm": 0.0, "learning_rate": 1.6877422854327265e-05, "loss": 1.0713, "step": 7178 }, { "epoch": 0.28089052351514204, "grad_norm": 0.0, "learning_rate": 1.6876502846444096e-05, "loss": 0.9827, "step": 7179 }, { "epoch": 0.2809296502073715, "grad_norm": 0.0, "learning_rate": 1.687558272813291e-05, "loss": 1.1227, "step": 7180 }, { "epoch": 0.2809687768996009, "grad_norm": 0.0, "learning_rate": 1.687466249940848e-05, "loss": 1.1964, "step": 7181 }, { "epoch": 0.28100790359183037, "grad_norm": 0.0, "learning_rate": 1.687374216028558e-05, "loss": 1.2675, "step": 7182 }, { "epoch": 0.2810470302840598, "grad_norm": 0.0, "learning_rate": 1.6872821710778997e-05, "loss": 1.0932, "step": 7183 }, { "epoch": 0.28108615697628925, "grad_norm": 0.0, "learning_rate": 1.687190115090351e-05, "loss": 1.1411, "step": 7184 }, { "epoch": 0.2811252836685187, "grad_norm": 0.0, "learning_rate": 1.6870980480673905e-05, "loss": 1.0549, "step": 7185 }, { "epoch": 0.28116441036074813, "grad_norm": 0.0, "learning_rate": 1.6870059700104956e-05, "loss": 1.1755, "step": 7186 }, { "epoch": 0.2812035370529775, "grad_norm": 0.0, "learning_rate": 1.686913880921146e-05, "loss": 1.2122, "step": 7187 }, { "epoch": 0.28124266374520696, "grad_norm": 0.0, "learning_rate": 1.6868217808008203e-05, "loss": 1.0643, "step": 7188 }, { "epoch": 0.2812817904374364, "grad_norm": 0.0, "learning_rate": 1.6867296696509978e-05, "loss": 1.254, "step": 7189 }, { "epoch": 0.28132091712966584, "grad_norm": 0.0, "learning_rate": 1.686637547473157e-05, "loss": 1.0258, "step": 7190 }, { "epoch": 0.2813600438218953, "grad_norm": 0.0, "learning_rate": 1.6865454142687773e-05, "loss": 1.1826, "step": 7191 }, { "epoch": 0.2813991705141247, "grad_norm": 0.0, "learning_rate": 1.686453270039339e-05, "loss": 1.0758, "step": 7192 }, { "epoch": 0.28143829720635416, "grad_norm": 0.0, "learning_rate": 1.686361114786321e-05, "loss": 1.11, "step": 7193 }, { "epoch": 0.2814774238985836, "grad_norm": 0.0, "learning_rate": 1.686268948511204e-05, "loss": 1.0463, "step": 7194 }, { "epoch": 0.28151655059081304, "grad_norm": 0.0, "learning_rate": 1.686176771215467e-05, "loss": 1.1329, "step": 7195 }, { "epoch": 0.2815556772830425, "grad_norm": 0.0, "learning_rate": 1.6860845829005914e-05, "loss": 1.1375, "step": 7196 }, { "epoch": 0.2815948039752719, "grad_norm": 0.0, "learning_rate": 1.685992383568057e-05, "loss": 1.2158, "step": 7197 }, { "epoch": 0.28163393066750136, "grad_norm": 0.0, "learning_rate": 1.6859001732193442e-05, "loss": 1.099, "step": 7198 }, { "epoch": 0.2816730573597308, "grad_norm": 0.0, "learning_rate": 1.6858079518559343e-05, "loss": 0.8829, "step": 7199 }, { "epoch": 0.28171218405196025, "grad_norm": 0.0, "learning_rate": 1.6857157194793083e-05, "loss": 1.1949, "step": 7200 }, { "epoch": 0.2817513107441897, "grad_norm": 0.0, "learning_rate": 1.685623476090947e-05, "loss": 1.0036, "step": 7201 }, { "epoch": 0.28179043743641913, "grad_norm": 0.0, "learning_rate": 1.6855312216923316e-05, "loss": 1.1252, "step": 7202 }, { "epoch": 0.28182956412864857, "grad_norm": 0.0, "learning_rate": 1.685438956284944e-05, "loss": 1.1337, "step": 7203 }, { "epoch": 0.281868690820878, "grad_norm": 0.0, "learning_rate": 1.6853466798702654e-05, "loss": 1.1987, "step": 7204 }, { "epoch": 0.28190781751310745, "grad_norm": 0.0, "learning_rate": 1.6852543924497782e-05, "loss": 1.1703, "step": 7205 }, { "epoch": 0.2819469442053369, "grad_norm": 0.0, "learning_rate": 1.685162094024964e-05, "loss": 1.0881, "step": 7206 }, { "epoch": 0.28198607089756633, "grad_norm": 0.0, "learning_rate": 1.685069784597305e-05, "loss": 1.1308, "step": 7207 }, { "epoch": 0.2820251975897958, "grad_norm": 0.0, "learning_rate": 1.6849774641682838e-05, "loss": 1.1629, "step": 7208 }, { "epoch": 0.2820643242820252, "grad_norm": 0.0, "learning_rate": 1.6848851327393833e-05, "loss": 0.9789, "step": 7209 }, { "epoch": 0.28210345097425465, "grad_norm": 0.0, "learning_rate": 1.6847927903120852e-05, "loss": 1.1575, "step": 7210 }, { "epoch": 0.2821425776664841, "grad_norm": 0.0, "learning_rate": 1.684700436887873e-05, "loss": 1.2281, "step": 7211 }, { "epoch": 0.28218170435871354, "grad_norm": 0.0, "learning_rate": 1.68460807246823e-05, "loss": 1.121, "step": 7212 }, { "epoch": 0.282220831050943, "grad_norm": 0.0, "learning_rate": 1.6845156970546393e-05, "loss": 1.2744, "step": 7213 }, { "epoch": 0.2822599577431724, "grad_norm": 0.0, "learning_rate": 1.684423310648584e-05, "loss": 0.9843, "step": 7214 }, { "epoch": 0.2822990844354018, "grad_norm": 0.0, "learning_rate": 1.684330913251548e-05, "loss": 1.0938, "step": 7215 }, { "epoch": 0.28233821112763124, "grad_norm": 0.0, "learning_rate": 1.684238504865015e-05, "loss": 1.0566, "step": 7216 }, { "epoch": 0.2823773378198607, "grad_norm": 0.0, "learning_rate": 1.684146085490469e-05, "loss": 1.1219, "step": 7217 }, { "epoch": 0.2824164645120901, "grad_norm": 0.0, "learning_rate": 1.6840536551293946e-05, "loss": 1.0483, "step": 7218 }, { "epoch": 0.28245559120431957, "grad_norm": 0.0, "learning_rate": 1.6839612137832752e-05, "loss": 1.0287, "step": 7219 }, { "epoch": 0.282494717896549, "grad_norm": 0.0, "learning_rate": 1.683868761453596e-05, "loss": 0.9698, "step": 7220 }, { "epoch": 0.28253384458877845, "grad_norm": 0.0, "learning_rate": 1.683776298141841e-05, "loss": 1.1606, "step": 7221 }, { "epoch": 0.2825729712810079, "grad_norm": 0.0, "learning_rate": 1.6836838238494956e-05, "loss": 1.1627, "step": 7222 }, { "epoch": 0.28261209797323733, "grad_norm": 0.0, "learning_rate": 1.6835913385780453e-05, "loss": 1.1609, "step": 7223 }, { "epoch": 0.28265122466546677, "grad_norm": 0.0, "learning_rate": 1.683498842328974e-05, "loss": 1.2047, "step": 7224 }, { "epoch": 0.2826903513576962, "grad_norm": 0.0, "learning_rate": 1.683406335103768e-05, "loss": 1.21, "step": 7225 }, { "epoch": 0.28272947804992565, "grad_norm": 0.0, "learning_rate": 1.6833138169039127e-05, "loss": 0.9925, "step": 7226 }, { "epoch": 0.2827686047421551, "grad_norm": 0.0, "learning_rate": 1.6832212877308934e-05, "loss": 1.0343, "step": 7227 }, { "epoch": 0.28280773143438453, "grad_norm": 0.0, "learning_rate": 1.683128747586197e-05, "loss": 1.1501, "step": 7228 }, { "epoch": 0.282846858126614, "grad_norm": 0.0, "learning_rate": 1.6830361964713082e-05, "loss": 1.056, "step": 7229 }, { "epoch": 0.2828859848188434, "grad_norm": 0.0, "learning_rate": 1.6829436343877142e-05, "loss": 1.147, "step": 7230 }, { "epoch": 0.28292511151107286, "grad_norm": 0.0, "learning_rate": 1.682851061336902e-05, "loss": 1.224, "step": 7231 }, { "epoch": 0.2829642382033023, "grad_norm": 0.0, "learning_rate": 1.6827584773203564e-05, "loss": 1.2388, "step": 7232 }, { "epoch": 0.28300336489553174, "grad_norm": 0.0, "learning_rate": 1.6826658823395657e-05, "loss": 1.1721, "step": 7233 }, { "epoch": 0.2830424915877612, "grad_norm": 0.0, "learning_rate": 1.6825732763960162e-05, "loss": 1.0671, "step": 7234 }, { "epoch": 0.2830816182799906, "grad_norm": 0.0, "learning_rate": 1.682480659491195e-05, "loss": 1.2374, "step": 7235 }, { "epoch": 0.28312074497222006, "grad_norm": 0.0, "learning_rate": 1.68238803162659e-05, "loss": 1.1328, "step": 7236 }, { "epoch": 0.2831598716644495, "grad_norm": 0.0, "learning_rate": 1.682295392803688e-05, "loss": 0.9437, "step": 7237 }, { "epoch": 0.28319899835667894, "grad_norm": 0.0, "learning_rate": 1.682202743023977e-05, "loss": 1.2204, "step": 7238 }, { "epoch": 0.2832381250489084, "grad_norm": 0.0, "learning_rate": 1.682110082288945e-05, "loss": 1.2586, "step": 7239 }, { "epoch": 0.2832772517411378, "grad_norm": 0.0, "learning_rate": 1.6820174106000794e-05, "loss": 1.0565, "step": 7240 }, { "epoch": 0.28331637843336727, "grad_norm": 0.0, "learning_rate": 1.681924727958869e-05, "loss": 1.1092, "step": 7241 }, { "epoch": 0.2833555051255967, "grad_norm": 0.0, "learning_rate": 1.681832034366802e-05, "loss": 1.1735, "step": 7242 }, { "epoch": 0.28339463181782615, "grad_norm": 0.0, "learning_rate": 1.681739329825367e-05, "loss": 1.1162, "step": 7243 }, { "epoch": 0.28343375851005553, "grad_norm": 0.0, "learning_rate": 1.6816466143360527e-05, "loss": 1.0736, "step": 7244 }, { "epoch": 0.283472885202285, "grad_norm": 0.0, "learning_rate": 1.6815538879003477e-05, "loss": 0.9967, "step": 7245 }, { "epoch": 0.2835120118945144, "grad_norm": 0.0, "learning_rate": 1.6814611505197413e-05, "loss": 1.0916, "step": 7246 }, { "epoch": 0.28355113858674386, "grad_norm": 0.0, "learning_rate": 1.6813684021957226e-05, "loss": 1.1506, "step": 7247 }, { "epoch": 0.2835902652789733, "grad_norm": 0.0, "learning_rate": 1.6812756429297815e-05, "loss": 1.0266, "step": 7248 }, { "epoch": 0.28362939197120274, "grad_norm": 0.0, "learning_rate": 1.6811828727234073e-05, "loss": 1.086, "step": 7249 }, { "epoch": 0.2836685186634322, "grad_norm": 0.0, "learning_rate": 1.6810900915780894e-05, "loss": 1.1202, "step": 7250 }, { "epoch": 0.2837076453556616, "grad_norm": 0.0, "learning_rate": 1.6809972994953184e-05, "loss": 1.0865, "step": 7251 }, { "epoch": 0.28374677204789106, "grad_norm": 0.0, "learning_rate": 1.680904496476584e-05, "loss": 1.1071, "step": 7252 }, { "epoch": 0.2837858987401205, "grad_norm": 0.0, "learning_rate": 1.6808116825233765e-05, "loss": 1.0682, "step": 7253 }, { "epoch": 0.28382502543234994, "grad_norm": 0.0, "learning_rate": 1.6807188576371864e-05, "loss": 0.9411, "step": 7254 }, { "epoch": 0.2838641521245794, "grad_norm": 0.0, "learning_rate": 1.6806260218195046e-05, "loss": 1.0457, "step": 7255 }, { "epoch": 0.2839032788168088, "grad_norm": 0.0, "learning_rate": 1.6805331750718218e-05, "loss": 1.0612, "step": 7256 }, { "epoch": 0.28394240550903826, "grad_norm": 0.0, "learning_rate": 1.680440317395629e-05, "loss": 1.0667, "step": 7257 }, { "epoch": 0.2839815322012677, "grad_norm": 0.0, "learning_rate": 1.6803474487924173e-05, "loss": 1.1059, "step": 7258 }, { "epoch": 0.28402065889349715, "grad_norm": 0.0, "learning_rate": 1.680254569263678e-05, "loss": 1.0945, "step": 7259 }, { "epoch": 0.2840597855857266, "grad_norm": 0.0, "learning_rate": 1.6801616788109028e-05, "loss": 1.0754, "step": 7260 }, { "epoch": 0.28409891227795603, "grad_norm": 0.0, "learning_rate": 1.6800687774355834e-05, "loss": 1.0824, "step": 7261 }, { "epoch": 0.28413803897018547, "grad_norm": 0.0, "learning_rate": 1.6799758651392114e-05, "loss": 1.1848, "step": 7262 }, { "epoch": 0.2841771656624149, "grad_norm": 0.0, "learning_rate": 1.6798829419232793e-05, "loss": 1.0528, "step": 7263 }, { "epoch": 0.28421629235464435, "grad_norm": 0.0, "learning_rate": 1.6797900077892788e-05, "loss": 1.1208, "step": 7264 }, { "epoch": 0.2842554190468738, "grad_norm": 0.0, "learning_rate": 1.6796970627387028e-05, "loss": 1.1214, "step": 7265 }, { "epoch": 0.28429454573910323, "grad_norm": 0.0, "learning_rate": 1.6796041067730437e-05, "loss": 1.1902, "step": 7266 }, { "epoch": 0.2843336724313327, "grad_norm": 0.0, "learning_rate": 1.6795111398937944e-05, "loss": 1.0521, "step": 7267 }, { "epoch": 0.2843727991235621, "grad_norm": 0.0, "learning_rate": 1.6794181621024473e-05, "loss": 1.1893, "step": 7268 }, { "epoch": 0.28441192581579156, "grad_norm": 0.0, "learning_rate": 1.679325173400496e-05, "loss": 1.1754, "step": 7269 }, { "epoch": 0.284451052508021, "grad_norm": 0.0, "learning_rate": 1.6792321737894337e-05, "loss": 1.145, "step": 7270 }, { "epoch": 0.28449017920025044, "grad_norm": 0.0, "learning_rate": 1.6791391632707535e-05, "loss": 1.0786, "step": 7271 }, { "epoch": 0.2845293058924798, "grad_norm": 0.0, "learning_rate": 1.67904614184595e-05, "loss": 1.2043, "step": 7272 }, { "epoch": 0.28456843258470926, "grad_norm": 0.0, "learning_rate": 1.678953109516516e-05, "loss": 1.0768, "step": 7273 }, { "epoch": 0.2846075592769387, "grad_norm": 0.0, "learning_rate": 1.6788600662839457e-05, "loss": 1.2474, "step": 7274 }, { "epoch": 0.28464668596916815, "grad_norm": 0.0, "learning_rate": 1.6787670121497335e-05, "loss": 1.2545, "step": 7275 }, { "epoch": 0.2846858126613976, "grad_norm": 0.0, "learning_rate": 1.678673947115374e-05, "loss": 1.0763, "step": 7276 }, { "epoch": 0.284724939353627, "grad_norm": 0.0, "learning_rate": 1.678580871182361e-05, "loss": 1.2645, "step": 7277 }, { "epoch": 0.28476406604585647, "grad_norm": 0.0, "learning_rate": 1.678487784352189e-05, "loss": 1.1193, "step": 7278 }, { "epoch": 0.2848031927380859, "grad_norm": 0.0, "learning_rate": 1.6783946866263542e-05, "loss": 1.1387, "step": 7279 }, { "epoch": 0.28484231943031535, "grad_norm": 0.0, "learning_rate": 1.6783015780063503e-05, "loss": 1.2352, "step": 7280 }, { "epoch": 0.2848814461225448, "grad_norm": 0.0, "learning_rate": 1.6782084584936734e-05, "loss": 1.1381, "step": 7281 }, { "epoch": 0.28492057281477423, "grad_norm": 0.0, "learning_rate": 1.678115328089818e-05, "loss": 0.9892, "step": 7282 }, { "epoch": 0.28495969950700367, "grad_norm": 0.0, "learning_rate": 1.6780221867962806e-05, "loss": 1.1926, "step": 7283 }, { "epoch": 0.2849988261992331, "grad_norm": 0.0, "learning_rate": 1.6779290346145563e-05, "loss": 1.1827, "step": 7284 }, { "epoch": 0.28503795289146255, "grad_norm": 0.0, "learning_rate": 1.6778358715461416e-05, "loss": 1.0476, "step": 7285 }, { "epoch": 0.285077079583692, "grad_norm": 0.0, "learning_rate": 1.6777426975925318e-05, "loss": 1.1855, "step": 7286 }, { "epoch": 0.28511620627592144, "grad_norm": 0.0, "learning_rate": 1.6776495127552236e-05, "loss": 1.1671, "step": 7287 }, { "epoch": 0.2851553329681509, "grad_norm": 0.0, "learning_rate": 1.6775563170357134e-05, "loss": 1.0518, "step": 7288 }, { "epoch": 0.2851944596603803, "grad_norm": 0.0, "learning_rate": 1.6774631104354976e-05, "loss": 1.0684, "step": 7289 }, { "epoch": 0.28523358635260976, "grad_norm": 0.0, "learning_rate": 1.6773698929560732e-05, "loss": 1.2657, "step": 7290 }, { "epoch": 0.2852727130448392, "grad_norm": 0.0, "learning_rate": 1.6772766645989372e-05, "loss": 1.0321, "step": 7291 }, { "epoch": 0.28531183973706864, "grad_norm": 0.0, "learning_rate": 1.677183425365587e-05, "loss": 1.2057, "step": 7292 }, { "epoch": 0.2853509664292981, "grad_norm": 0.0, "learning_rate": 1.6770901752575186e-05, "loss": 1.1001, "step": 7293 }, { "epoch": 0.2853900931215275, "grad_norm": 0.0, "learning_rate": 1.6769969142762313e-05, "loss": 1.1752, "step": 7294 }, { "epoch": 0.28542921981375696, "grad_norm": 0.0, "learning_rate": 1.6769036424232213e-05, "loss": 1.0252, "step": 7295 }, { "epoch": 0.2854683465059864, "grad_norm": 0.0, "learning_rate": 1.6768103596999874e-05, "loss": 1.2076, "step": 7296 }, { "epoch": 0.28550747319821584, "grad_norm": 0.0, "learning_rate": 1.6767170661080273e-05, "loss": 1.2391, "step": 7297 }, { "epoch": 0.2855465998904453, "grad_norm": 0.0, "learning_rate": 1.676623761648839e-05, "loss": 1.2499, "step": 7298 }, { "epoch": 0.2855857265826747, "grad_norm": 0.0, "learning_rate": 1.6765304463239206e-05, "loss": 1.0705, "step": 7299 }, { "epoch": 0.28562485327490417, "grad_norm": 0.0, "learning_rate": 1.676437120134771e-05, "loss": 1.1336, "step": 7300 }, { "epoch": 0.28566397996713355, "grad_norm": 0.0, "learning_rate": 1.6763437830828896e-05, "loss": 1.2656, "step": 7301 }, { "epoch": 0.285703106659363, "grad_norm": 0.0, "learning_rate": 1.6762504351697738e-05, "loss": 1.0446, "step": 7302 }, { "epoch": 0.28574223335159243, "grad_norm": 0.0, "learning_rate": 1.6761570763969237e-05, "loss": 1.0836, "step": 7303 }, { "epoch": 0.2857813600438219, "grad_norm": 0.0, "learning_rate": 1.676063706765838e-05, "loss": 1.1874, "step": 7304 }, { "epoch": 0.2858204867360513, "grad_norm": 0.0, "learning_rate": 1.6759703262780167e-05, "loss": 1.0088, "step": 7305 }, { "epoch": 0.28585961342828076, "grad_norm": 0.0, "learning_rate": 1.6758769349349586e-05, "loss": 1.1674, "step": 7306 }, { "epoch": 0.2858987401205102, "grad_norm": 0.0, "learning_rate": 1.675783532738164e-05, "loss": 1.1989, "step": 7307 }, { "epoch": 0.28593786681273964, "grad_norm": 0.0, "learning_rate": 1.675690119689133e-05, "loss": 1.2415, "step": 7308 }, { "epoch": 0.2859769935049691, "grad_norm": 0.0, "learning_rate": 1.675596695789365e-05, "loss": 1.2014, "step": 7309 }, { "epoch": 0.2860161201971985, "grad_norm": 0.0, "learning_rate": 1.6755032610403606e-05, "loss": 1.045, "step": 7310 }, { "epoch": 0.28605524688942796, "grad_norm": 0.0, "learning_rate": 1.6754098154436204e-05, "loss": 1.1223, "step": 7311 }, { "epoch": 0.2860943735816574, "grad_norm": 0.0, "learning_rate": 1.675316359000645e-05, "loss": 1.1474, "step": 7312 }, { "epoch": 0.28613350027388684, "grad_norm": 0.0, "learning_rate": 1.6752228917129347e-05, "loss": 1.1707, "step": 7313 }, { "epoch": 0.2861726269661163, "grad_norm": 0.0, "learning_rate": 1.675129413581991e-05, "loss": 1.2997, "step": 7314 }, { "epoch": 0.2862117536583457, "grad_norm": 0.0, "learning_rate": 1.6750359246093154e-05, "loss": 1.0973, "step": 7315 }, { "epoch": 0.28625088035057517, "grad_norm": 0.0, "learning_rate": 1.6749424247964082e-05, "loss": 1.0455, "step": 7316 }, { "epoch": 0.2862900070428046, "grad_norm": 0.0, "learning_rate": 1.6748489141447716e-05, "loss": 1.1749, "step": 7317 }, { "epoch": 0.28632913373503405, "grad_norm": 0.0, "learning_rate": 1.6747553926559072e-05, "loss": 0.9977, "step": 7318 }, { "epoch": 0.2863682604272635, "grad_norm": 0.0, "learning_rate": 1.6746618603313165e-05, "loss": 1.1118, "step": 7319 }, { "epoch": 0.28640738711949293, "grad_norm": 0.0, "learning_rate": 1.6745683171725015e-05, "loss": 1.0812, "step": 7320 }, { "epoch": 0.28644651381172237, "grad_norm": 0.0, "learning_rate": 1.674474763180965e-05, "loss": 1.1607, "step": 7321 }, { "epoch": 0.2864856405039518, "grad_norm": 0.0, "learning_rate": 1.674381198358209e-05, "loss": 1.2078, "step": 7322 }, { "epoch": 0.28652476719618125, "grad_norm": 0.0, "learning_rate": 1.6742876227057356e-05, "loss": 1.1146, "step": 7323 }, { "epoch": 0.2865638938884107, "grad_norm": 0.0, "learning_rate": 1.6741940362250485e-05, "loss": 1.2402, "step": 7324 }, { "epoch": 0.28660302058064013, "grad_norm": 0.0, "learning_rate": 1.6741004389176496e-05, "loss": 1.0298, "step": 7325 }, { "epoch": 0.2866421472728696, "grad_norm": 0.0, "learning_rate": 1.6740068307850423e-05, "loss": 1.0411, "step": 7326 }, { "epoch": 0.286681273965099, "grad_norm": 0.0, "learning_rate": 1.67391321182873e-05, "loss": 1.1891, "step": 7327 }, { "epoch": 0.28672040065732846, "grad_norm": 0.0, "learning_rate": 1.673819582050216e-05, "loss": 1.0591, "step": 7328 }, { "epoch": 0.28675952734955784, "grad_norm": 0.0, "learning_rate": 1.6737259414510038e-05, "loss": 1.0687, "step": 7329 }, { "epoch": 0.2867986540417873, "grad_norm": 0.0, "learning_rate": 1.673632290032597e-05, "loss": 1.1809, "step": 7330 }, { "epoch": 0.2868377807340167, "grad_norm": 0.0, "learning_rate": 1.6735386277965e-05, "loss": 1.1024, "step": 7331 }, { "epoch": 0.28687690742624616, "grad_norm": 0.0, "learning_rate": 1.6734449547442165e-05, "loss": 1.1144, "step": 7332 }, { "epoch": 0.2869160341184756, "grad_norm": 0.0, "learning_rate": 1.673351270877251e-05, "loss": 1.1152, "step": 7333 }, { "epoch": 0.28695516081070505, "grad_norm": 0.0, "learning_rate": 1.6732575761971078e-05, "loss": 1.0408, "step": 7334 }, { "epoch": 0.2869942875029345, "grad_norm": 0.0, "learning_rate": 1.6731638707052917e-05, "loss": 1.0468, "step": 7335 }, { "epoch": 0.2870334141951639, "grad_norm": 0.0, "learning_rate": 1.6730701544033072e-05, "loss": 1.0584, "step": 7336 }, { "epoch": 0.28707254088739337, "grad_norm": 0.0, "learning_rate": 1.6729764272926594e-05, "loss": 1.0123, "step": 7337 }, { "epoch": 0.2871116675796228, "grad_norm": 0.0, "learning_rate": 1.6728826893748535e-05, "loss": 1.0574, "step": 7338 }, { "epoch": 0.28715079427185225, "grad_norm": 0.0, "learning_rate": 1.672788940651395e-05, "loss": 1.045, "step": 7339 }, { "epoch": 0.2871899209640817, "grad_norm": 0.0, "learning_rate": 1.6726951811237887e-05, "loss": 1.1066, "step": 7340 }, { "epoch": 0.28722904765631113, "grad_norm": 0.0, "learning_rate": 1.672601410793541e-05, "loss": 1.2224, "step": 7341 }, { "epoch": 0.2872681743485406, "grad_norm": 0.0, "learning_rate": 1.6725076296621578e-05, "loss": 1.099, "step": 7342 }, { "epoch": 0.28730730104077, "grad_norm": 0.0, "learning_rate": 1.672413837731144e-05, "loss": 1.3236, "step": 7343 }, { "epoch": 0.28734642773299945, "grad_norm": 0.0, "learning_rate": 1.672320035002007e-05, "loss": 1.0377, "step": 7344 }, { "epoch": 0.2873855544252289, "grad_norm": 0.0, "learning_rate": 1.6722262214762527e-05, "loss": 1.1024, "step": 7345 }, { "epoch": 0.28742468111745834, "grad_norm": 0.0, "learning_rate": 1.6721323971553877e-05, "loss": 1.1749, "step": 7346 }, { "epoch": 0.2874638078096878, "grad_norm": 0.0, "learning_rate": 1.6720385620409186e-05, "loss": 1.1203, "step": 7347 }, { "epoch": 0.2875029345019172, "grad_norm": 0.0, "learning_rate": 1.6719447161343523e-05, "loss": 1.0868, "step": 7348 }, { "epoch": 0.28754206119414666, "grad_norm": 0.0, "learning_rate": 1.6718508594371955e-05, "loss": 1.2323, "step": 7349 }, { "epoch": 0.2875811878863761, "grad_norm": 0.0, "learning_rate": 1.6717569919509565e-05, "loss": 1.1662, "step": 7350 }, { "epoch": 0.28762031457860554, "grad_norm": 0.0, "learning_rate": 1.6716631136771413e-05, "loss": 0.985, "step": 7351 }, { "epoch": 0.287659441270835, "grad_norm": 0.0, "learning_rate": 1.6715692246172584e-05, "loss": 1.2539, "step": 7352 }, { "epoch": 0.2876985679630644, "grad_norm": 0.0, "learning_rate": 1.671475324772815e-05, "loss": 1.1063, "step": 7353 }, { "epoch": 0.28773769465529386, "grad_norm": 0.0, "learning_rate": 1.67138141414532e-05, "loss": 1.1598, "step": 7354 }, { "epoch": 0.2877768213475233, "grad_norm": 0.0, "learning_rate": 1.67128749273628e-05, "loss": 1.0471, "step": 7355 }, { "epoch": 0.28781594803975274, "grad_norm": 0.0, "learning_rate": 1.6711935605472043e-05, "loss": 1.0882, "step": 7356 }, { "epoch": 0.2878550747319822, "grad_norm": 0.0, "learning_rate": 1.6710996175796012e-05, "loss": 1.175, "step": 7357 }, { "epoch": 0.28789420142421157, "grad_norm": 0.0, "learning_rate": 1.671005663834979e-05, "loss": 1.0562, "step": 7358 }, { "epoch": 0.287933328116441, "grad_norm": 0.0, "learning_rate": 1.6709116993148468e-05, "loss": 1.1796, "step": 7359 }, { "epoch": 0.28797245480867045, "grad_norm": 0.0, "learning_rate": 1.6708177240207133e-05, "loss": 1.1991, "step": 7360 }, { "epoch": 0.2880115815008999, "grad_norm": 0.0, "learning_rate": 1.670723737954088e-05, "loss": 1.0911, "step": 7361 }, { "epoch": 0.28805070819312933, "grad_norm": 0.0, "learning_rate": 1.6706297411164797e-05, "loss": 1.1724, "step": 7362 }, { "epoch": 0.2880898348853588, "grad_norm": 0.0, "learning_rate": 1.670535733509398e-05, "loss": 1.0841, "step": 7363 }, { "epoch": 0.2881289615775882, "grad_norm": 0.0, "learning_rate": 1.670441715134353e-05, "loss": 1.1075, "step": 7364 }, { "epoch": 0.28816808826981766, "grad_norm": 0.0, "learning_rate": 1.6703476859928537e-05, "loss": 1.2229, "step": 7365 }, { "epoch": 0.2882072149620471, "grad_norm": 0.0, "learning_rate": 1.6702536460864108e-05, "loss": 1.1857, "step": 7366 }, { "epoch": 0.28824634165427654, "grad_norm": 0.0, "learning_rate": 1.670159595416534e-05, "loss": 1.176, "step": 7367 }, { "epoch": 0.288285468346506, "grad_norm": 0.0, "learning_rate": 1.6700655339847346e-05, "loss": 1.1767, "step": 7368 }, { "epoch": 0.2883245950387354, "grad_norm": 0.0, "learning_rate": 1.6699714617925216e-05, "loss": 1.1897, "step": 7369 }, { "epoch": 0.28836372173096486, "grad_norm": 0.0, "learning_rate": 1.6698773788414066e-05, "loss": 1.1464, "step": 7370 }, { "epoch": 0.2884028484231943, "grad_norm": 0.0, "learning_rate": 1.6697832851329002e-05, "loss": 1.0078, "step": 7371 }, { "epoch": 0.28844197511542374, "grad_norm": 0.0, "learning_rate": 1.6696891806685137e-05, "loss": 1.171, "step": 7372 }, { "epoch": 0.2884811018076532, "grad_norm": 0.0, "learning_rate": 1.6695950654497582e-05, "loss": 1.0688, "step": 7373 }, { "epoch": 0.2885202284998826, "grad_norm": 0.0, "learning_rate": 1.669500939478145e-05, "loss": 1.0715, "step": 7374 }, { "epoch": 0.28855935519211207, "grad_norm": 0.0, "learning_rate": 1.6694068027551856e-05, "loss": 0.9894, "step": 7375 }, { "epoch": 0.2885984818843415, "grad_norm": 0.0, "learning_rate": 1.6693126552823916e-05, "loss": 1.1008, "step": 7376 }, { "epoch": 0.28863760857657095, "grad_norm": 0.0, "learning_rate": 1.6692184970612752e-05, "loss": 1.1292, "step": 7377 }, { "epoch": 0.2886767352688004, "grad_norm": 0.0, "learning_rate": 1.669124328093348e-05, "loss": 1.164, "step": 7378 }, { "epoch": 0.28871586196102983, "grad_norm": 0.0, "learning_rate": 1.6690301483801233e-05, "loss": 1.1036, "step": 7379 }, { "epoch": 0.28875498865325927, "grad_norm": 0.0, "learning_rate": 1.6689359579231122e-05, "loss": 1.1932, "step": 7380 }, { "epoch": 0.2887941153454887, "grad_norm": 0.0, "learning_rate": 1.668841756723828e-05, "loss": 1.1378, "step": 7381 }, { "epoch": 0.28883324203771815, "grad_norm": 0.0, "learning_rate": 1.6687475447837833e-05, "loss": 1.1065, "step": 7382 }, { "epoch": 0.2888723687299476, "grad_norm": 0.0, "learning_rate": 1.6686533221044907e-05, "loss": 1.1084, "step": 7383 }, { "epoch": 0.28891149542217703, "grad_norm": 0.0, "learning_rate": 1.668559088687464e-05, "loss": 1.1544, "step": 7384 }, { "epoch": 0.2889506221144065, "grad_norm": 0.0, "learning_rate": 1.668464844534216e-05, "loss": 1.1298, "step": 7385 }, { "epoch": 0.28898974880663586, "grad_norm": 0.0, "learning_rate": 1.6683705896462603e-05, "loss": 1.1203, "step": 7386 }, { "epoch": 0.2890288754988653, "grad_norm": 0.0, "learning_rate": 1.66827632402511e-05, "loss": 1.174, "step": 7387 }, { "epoch": 0.28906800219109474, "grad_norm": 0.0, "learning_rate": 1.66818204767228e-05, "loss": 1.2218, "step": 7388 }, { "epoch": 0.2891071288833242, "grad_norm": 0.0, "learning_rate": 1.668087760589283e-05, "loss": 1.1069, "step": 7389 }, { "epoch": 0.2891462555755536, "grad_norm": 0.0, "learning_rate": 1.6679934627776343e-05, "loss": 1.1551, "step": 7390 }, { "epoch": 0.28918538226778306, "grad_norm": 0.0, "learning_rate": 1.667899154238847e-05, "loss": 1.2618, "step": 7391 }, { "epoch": 0.2892245089600125, "grad_norm": 0.0, "learning_rate": 1.6678048349744366e-05, "loss": 1.2392, "step": 7392 }, { "epoch": 0.28926363565224195, "grad_norm": 0.0, "learning_rate": 1.6677105049859175e-05, "loss": 1.1849, "step": 7393 }, { "epoch": 0.2893027623444714, "grad_norm": 0.0, "learning_rate": 1.667616164274804e-05, "loss": 1.1174, "step": 7394 }, { "epoch": 0.28934188903670083, "grad_norm": 0.0, "learning_rate": 1.6675218128426117e-05, "loss": 1.0955, "step": 7395 }, { "epoch": 0.28938101572893027, "grad_norm": 0.0, "learning_rate": 1.6674274506908554e-05, "loss": 1.1533, "step": 7396 }, { "epoch": 0.2894201424211597, "grad_norm": 0.0, "learning_rate": 1.6673330778210508e-05, "loss": 1.1114, "step": 7397 }, { "epoch": 0.28945926911338915, "grad_norm": 0.0, "learning_rate": 1.6672386942347127e-05, "loss": 1.1499, "step": 7398 }, { "epoch": 0.2894983958056186, "grad_norm": 0.0, "learning_rate": 1.6671442999333577e-05, "loss": 1.1406, "step": 7399 }, { "epoch": 0.28953752249784803, "grad_norm": 0.0, "learning_rate": 1.6670498949185013e-05, "loss": 1.1489, "step": 7400 }, { "epoch": 0.2895766491900775, "grad_norm": 0.0, "learning_rate": 1.6669554791916593e-05, "loss": 1.0648, "step": 7401 }, { "epoch": 0.2896157758823069, "grad_norm": 0.0, "learning_rate": 1.6668610527543476e-05, "loss": 1.1721, "step": 7402 }, { "epoch": 0.28965490257453635, "grad_norm": 0.0, "learning_rate": 1.6667666156080837e-05, "loss": 1.081, "step": 7403 }, { "epoch": 0.2896940292667658, "grad_norm": 0.0, "learning_rate": 1.666672167754383e-05, "loss": 1.1434, "step": 7404 }, { "epoch": 0.28973315595899524, "grad_norm": 0.0, "learning_rate": 1.666577709194763e-05, "loss": 1.1689, "step": 7405 }, { "epoch": 0.2897722826512247, "grad_norm": 0.0, "learning_rate": 1.6664832399307402e-05, "loss": 1.236, "step": 7406 }, { "epoch": 0.2898114093434541, "grad_norm": 0.0, "learning_rate": 1.6663887599638316e-05, "loss": 1.1542, "step": 7407 }, { "epoch": 0.28985053603568356, "grad_norm": 0.0, "learning_rate": 1.6662942692955548e-05, "loss": 1.1355, "step": 7408 }, { "epoch": 0.289889662727913, "grad_norm": 0.0, "learning_rate": 1.6661997679274263e-05, "loss": 1.0264, "step": 7409 }, { "epoch": 0.28992878942014244, "grad_norm": 0.0, "learning_rate": 1.666105255860965e-05, "loss": 1.2787, "step": 7410 }, { "epoch": 0.2899679161123719, "grad_norm": 0.0, "learning_rate": 1.6660107330976876e-05, "loss": 1.2917, "step": 7411 }, { "epoch": 0.2900070428046013, "grad_norm": 0.0, "learning_rate": 1.6659161996391125e-05, "loss": 1.2099, "step": 7412 }, { "epoch": 0.29004616949683076, "grad_norm": 0.0, "learning_rate": 1.665821655486758e-05, "loss": 1.0507, "step": 7413 }, { "epoch": 0.2900852961890602, "grad_norm": 0.0, "learning_rate": 1.6657271006421412e-05, "loss": 1.1039, "step": 7414 }, { "epoch": 0.2901244228812896, "grad_norm": 0.0, "learning_rate": 1.665632535106782e-05, "loss": 1.1371, "step": 7415 }, { "epoch": 0.29016354957351903, "grad_norm": 0.0, "learning_rate": 1.6655379588821983e-05, "loss": 1.1794, "step": 7416 }, { "epoch": 0.29020267626574847, "grad_norm": 0.0, "learning_rate": 1.665443371969909e-05, "loss": 1.1246, "step": 7417 }, { "epoch": 0.2902418029579779, "grad_norm": 0.0, "learning_rate": 1.6653487743714328e-05, "loss": 1.1547, "step": 7418 }, { "epoch": 0.29028092965020735, "grad_norm": 0.0, "learning_rate": 1.6652541660882894e-05, "loss": 1.0905, "step": 7419 }, { "epoch": 0.2903200563424368, "grad_norm": 0.0, "learning_rate": 1.665159547121997e-05, "loss": 1.2206, "step": 7420 }, { "epoch": 0.29035918303466624, "grad_norm": 0.0, "learning_rate": 1.6650649174740766e-05, "loss": 1.112, "step": 7421 }, { "epoch": 0.2903983097268957, "grad_norm": 0.0, "learning_rate": 1.6649702771460464e-05, "loss": 1.0714, "step": 7422 }, { "epoch": 0.2904374364191251, "grad_norm": 0.0, "learning_rate": 1.664875626139427e-05, "loss": 1.1918, "step": 7423 }, { "epoch": 0.29047656311135456, "grad_norm": 0.0, "learning_rate": 1.664780964455738e-05, "loss": 1.1544, "step": 7424 }, { "epoch": 0.290515689803584, "grad_norm": 0.0, "learning_rate": 1.6646862920965e-05, "loss": 1.0199, "step": 7425 }, { "epoch": 0.29055481649581344, "grad_norm": 0.0, "learning_rate": 1.664591609063233e-05, "loss": 1.1509, "step": 7426 }, { "epoch": 0.2905939431880429, "grad_norm": 0.0, "learning_rate": 1.6644969153574574e-05, "loss": 1.1436, "step": 7427 }, { "epoch": 0.2906330698802723, "grad_norm": 0.0, "learning_rate": 1.6644022109806938e-05, "loss": 1.157, "step": 7428 }, { "epoch": 0.29067219657250176, "grad_norm": 0.0, "learning_rate": 1.664307495934464e-05, "loss": 1.0693, "step": 7429 }, { "epoch": 0.2907113232647312, "grad_norm": 0.0, "learning_rate": 1.664212770220287e-05, "loss": 1.1588, "step": 7430 }, { "epoch": 0.29075044995696064, "grad_norm": 0.0, "learning_rate": 1.6641180338396867e-05, "loss": 1.0651, "step": 7431 }, { "epoch": 0.2907895766491901, "grad_norm": 0.0, "learning_rate": 1.664023286794182e-05, "loss": 1.0561, "step": 7432 }, { "epoch": 0.2908287033414195, "grad_norm": 0.0, "learning_rate": 1.6639285290852954e-05, "loss": 1.2411, "step": 7433 }, { "epoch": 0.29086783003364897, "grad_norm": 0.0, "learning_rate": 1.663833760714549e-05, "loss": 1.1922, "step": 7434 }, { "epoch": 0.2909069567258784, "grad_norm": 0.0, "learning_rate": 1.6637389816834638e-05, "loss": 1.1401, "step": 7435 }, { "epoch": 0.29094608341810785, "grad_norm": 0.0, "learning_rate": 1.6636441919935627e-05, "loss": 1.113, "step": 7436 }, { "epoch": 0.2909852101103373, "grad_norm": 0.0, "learning_rate": 1.6635493916463673e-05, "loss": 1.2631, "step": 7437 }, { "epoch": 0.29102433680256673, "grad_norm": 0.0, "learning_rate": 1.6634545806434e-05, "loss": 1.0952, "step": 7438 }, { "epoch": 0.29106346349479617, "grad_norm": 0.0, "learning_rate": 1.6633597589861836e-05, "loss": 1.0762, "step": 7439 }, { "epoch": 0.2911025901870256, "grad_norm": 0.0, "learning_rate": 1.6632649266762406e-05, "loss": 1.0574, "step": 7440 }, { "epoch": 0.29114171687925505, "grad_norm": 0.0, "learning_rate": 1.663170083715094e-05, "loss": 1.1699, "step": 7441 }, { "epoch": 0.2911808435714845, "grad_norm": 0.0, "learning_rate": 1.663075230104267e-05, "loss": 1.1013, "step": 7442 }, { "epoch": 0.2912199702637139, "grad_norm": 0.0, "learning_rate": 1.6629803658452825e-05, "loss": 1.1548, "step": 7443 }, { "epoch": 0.2912590969559433, "grad_norm": 0.0, "learning_rate": 1.6628854909396643e-05, "loss": 1.2241, "step": 7444 }, { "epoch": 0.29129822364817276, "grad_norm": 0.0, "learning_rate": 1.6627906053889354e-05, "loss": 1.0305, "step": 7445 }, { "epoch": 0.2913373503404022, "grad_norm": 0.0, "learning_rate": 1.6626957091946203e-05, "loss": 1.0999, "step": 7446 }, { "epoch": 0.29137647703263164, "grad_norm": 0.0, "learning_rate": 1.6626008023582425e-05, "loss": 1.1095, "step": 7447 }, { "epoch": 0.2914156037248611, "grad_norm": 0.0, "learning_rate": 1.662505884881326e-05, "loss": 1.0983, "step": 7448 }, { "epoch": 0.2914547304170905, "grad_norm": 0.0, "learning_rate": 1.662410956765395e-05, "loss": 1.0801, "step": 7449 }, { "epoch": 0.29149385710931996, "grad_norm": 0.0, "learning_rate": 1.6623160180119745e-05, "loss": 1.1115, "step": 7450 }, { "epoch": 0.2915329838015494, "grad_norm": 0.0, "learning_rate": 1.6622210686225882e-05, "loss": 1.1501, "step": 7451 }, { "epoch": 0.29157211049377885, "grad_norm": 0.0, "learning_rate": 1.6621261085987613e-05, "loss": 1.1929, "step": 7452 }, { "epoch": 0.2916112371860083, "grad_norm": 0.0, "learning_rate": 1.662031137942019e-05, "loss": 1.0357, "step": 7453 }, { "epoch": 0.29165036387823773, "grad_norm": 0.0, "learning_rate": 1.6619361566538863e-05, "loss": 1.1893, "step": 7454 }, { "epoch": 0.29168949057046717, "grad_norm": 0.0, "learning_rate": 1.6618411647358886e-05, "loss": 1.0197, "step": 7455 }, { "epoch": 0.2917286172626966, "grad_norm": 0.0, "learning_rate": 1.6617461621895508e-05, "loss": 1.149, "step": 7456 }, { "epoch": 0.29176774395492605, "grad_norm": 0.0, "learning_rate": 1.661651149016399e-05, "loss": 1.0921, "step": 7457 }, { "epoch": 0.2918068706471555, "grad_norm": 0.0, "learning_rate": 1.6615561252179585e-05, "loss": 1.1414, "step": 7458 }, { "epoch": 0.29184599733938493, "grad_norm": 0.0, "learning_rate": 1.6614610907957556e-05, "loss": 1.0708, "step": 7459 }, { "epoch": 0.2918851240316144, "grad_norm": 0.0, "learning_rate": 1.6613660457513168e-05, "loss": 1.174, "step": 7460 }, { "epoch": 0.2919242507238438, "grad_norm": 0.0, "learning_rate": 1.661270990086168e-05, "loss": 1.1675, "step": 7461 }, { "epoch": 0.29196337741607326, "grad_norm": 0.0, "learning_rate": 1.6611759238018356e-05, "loss": 1.1289, "step": 7462 }, { "epoch": 0.2920025041083027, "grad_norm": 0.0, "learning_rate": 1.6610808468998462e-05, "loss": 1.1144, "step": 7463 }, { "epoch": 0.29204163080053214, "grad_norm": 0.0, "learning_rate": 1.660985759381727e-05, "loss": 1.0113, "step": 7464 }, { "epoch": 0.2920807574927616, "grad_norm": 0.0, "learning_rate": 1.660890661249005e-05, "loss": 1.1123, "step": 7465 }, { "epoch": 0.292119884184991, "grad_norm": 0.0, "learning_rate": 1.6607955525032066e-05, "loss": 1.2365, "step": 7466 }, { "epoch": 0.29215901087722046, "grad_norm": 0.0, "learning_rate": 1.6607004331458598e-05, "loss": 1.0501, "step": 7467 }, { "epoch": 0.2921981375694499, "grad_norm": 0.0, "learning_rate": 1.660605303178492e-05, "loss": 1.1244, "step": 7468 }, { "epoch": 0.29223726426167934, "grad_norm": 0.0, "learning_rate": 1.660510162602631e-05, "loss": 1.1846, "step": 7469 }, { "epoch": 0.2922763909539088, "grad_norm": 0.0, "learning_rate": 1.660415011419804e-05, "loss": 1.0024, "step": 7470 }, { "epoch": 0.2923155176461382, "grad_norm": 0.0, "learning_rate": 1.6603198496315403e-05, "loss": 1.0851, "step": 7471 }, { "epoch": 0.2923546443383676, "grad_norm": 0.0, "learning_rate": 1.6602246772393665e-05, "loss": 0.9959, "step": 7472 }, { "epoch": 0.29239377103059705, "grad_norm": 0.0, "learning_rate": 1.6601294942448122e-05, "loss": 0.9717, "step": 7473 }, { "epoch": 0.2924328977228265, "grad_norm": 0.0, "learning_rate": 1.660034300649405e-05, "loss": 1.1078, "step": 7474 }, { "epoch": 0.29247202441505593, "grad_norm": 0.0, "learning_rate": 1.659939096454674e-05, "loss": 1.1921, "step": 7475 }, { "epoch": 0.29251115110728537, "grad_norm": 0.0, "learning_rate": 1.6598438816621484e-05, "loss": 1.0745, "step": 7476 }, { "epoch": 0.2925502777995148, "grad_norm": 0.0, "learning_rate": 1.6597486562733565e-05, "loss": 1.2017, "step": 7477 }, { "epoch": 0.29258940449174425, "grad_norm": 0.0, "learning_rate": 1.659653420289828e-05, "loss": 1.1979, "step": 7478 }, { "epoch": 0.2926285311839737, "grad_norm": 0.0, "learning_rate": 1.6595581737130923e-05, "loss": 1.1163, "step": 7479 }, { "epoch": 0.29266765787620314, "grad_norm": 0.0, "learning_rate": 1.659462916544679e-05, "loss": 1.1435, "step": 7480 }, { "epoch": 0.2927067845684326, "grad_norm": 0.0, "learning_rate": 1.659367648786117e-05, "loss": 1.0449, "step": 7481 }, { "epoch": 0.292745911260662, "grad_norm": 0.0, "learning_rate": 1.6592723704389374e-05, "loss": 1.1166, "step": 7482 }, { "epoch": 0.29278503795289146, "grad_norm": 0.0, "learning_rate": 1.659177081504669e-05, "loss": 1.0286, "step": 7483 }, { "epoch": 0.2928241646451209, "grad_norm": 0.0, "learning_rate": 1.659081781984843e-05, "loss": 1.2441, "step": 7484 }, { "epoch": 0.29286329133735034, "grad_norm": 0.0, "learning_rate": 1.6589864718809896e-05, "loss": 1.1594, "step": 7485 }, { "epoch": 0.2929024180295798, "grad_norm": 0.0, "learning_rate": 1.658891151194639e-05, "loss": 0.9778, "step": 7486 }, { "epoch": 0.2929415447218092, "grad_norm": 0.0, "learning_rate": 1.6587958199273225e-05, "loss": 1.2089, "step": 7487 }, { "epoch": 0.29298067141403866, "grad_norm": 0.0, "learning_rate": 1.6587004780805704e-05, "loss": 1.1231, "step": 7488 }, { "epoch": 0.2930197981062681, "grad_norm": 0.0, "learning_rate": 1.658605125655914e-05, "loss": 1.1834, "step": 7489 }, { "epoch": 0.29305892479849754, "grad_norm": 0.0, "learning_rate": 1.6585097626548848e-05, "loss": 1.1208, "step": 7490 }, { "epoch": 0.293098051490727, "grad_norm": 0.0, "learning_rate": 1.6584143890790138e-05, "loss": 1.0186, "step": 7491 }, { "epoch": 0.2931371781829564, "grad_norm": 0.0, "learning_rate": 1.6583190049298327e-05, "loss": 1.139, "step": 7492 }, { "epoch": 0.29317630487518587, "grad_norm": 0.0, "learning_rate": 1.6582236102088734e-05, "loss": 1.1665, "step": 7493 }, { "epoch": 0.2932154315674153, "grad_norm": 0.0, "learning_rate": 1.6581282049176674e-05, "loss": 1.188, "step": 7494 }, { "epoch": 0.29325455825964475, "grad_norm": 0.0, "learning_rate": 1.6580327890577476e-05, "loss": 1.1483, "step": 7495 }, { "epoch": 0.2932936849518742, "grad_norm": 0.0, "learning_rate": 1.6579373626306453e-05, "loss": 1.1649, "step": 7496 }, { "epoch": 0.29333281164410363, "grad_norm": 0.0, "learning_rate": 1.6578419256378935e-05, "loss": 1.0864, "step": 7497 }, { "epoch": 0.29337193833633307, "grad_norm": 0.0, "learning_rate": 1.657746478081025e-05, "loss": 1.1581, "step": 7498 }, { "epoch": 0.2934110650285625, "grad_norm": 0.0, "learning_rate": 1.657651019961572e-05, "loss": 1.122, "step": 7499 }, { "epoch": 0.2934501917207919, "grad_norm": 0.0, "learning_rate": 1.6575555512810678e-05, "loss": 1.198, "step": 7500 }, { "epoch": 0.29348931841302134, "grad_norm": 0.0, "learning_rate": 1.6574600720410455e-05, "loss": 1.0694, "step": 7501 }, { "epoch": 0.2935284451052508, "grad_norm": 0.0, "learning_rate": 1.657364582243038e-05, "loss": 1.1804, "step": 7502 }, { "epoch": 0.2935675717974802, "grad_norm": 0.0, "learning_rate": 1.6572690818885796e-05, "loss": 1.0295, "step": 7503 }, { "epoch": 0.29360669848970966, "grad_norm": 0.0, "learning_rate": 1.657173570979203e-05, "loss": 1.1211, "step": 7504 }, { "epoch": 0.2936458251819391, "grad_norm": 0.0, "learning_rate": 1.657078049516442e-05, "loss": 1.1822, "step": 7505 }, { "epoch": 0.29368495187416854, "grad_norm": 0.0, "learning_rate": 1.6569825175018315e-05, "loss": 1.1733, "step": 7506 }, { "epoch": 0.293724078566398, "grad_norm": 0.0, "learning_rate": 1.6568869749369044e-05, "loss": 1.0023, "step": 7507 }, { "epoch": 0.2937632052586274, "grad_norm": 0.0, "learning_rate": 1.6567914218231963e-05, "loss": 1.0936, "step": 7508 }, { "epoch": 0.29380233195085687, "grad_norm": 0.0, "learning_rate": 1.6566958581622404e-05, "loss": 1.0666, "step": 7509 }, { "epoch": 0.2938414586430863, "grad_norm": 0.0, "learning_rate": 1.6566002839555722e-05, "loss": 1.1797, "step": 7510 }, { "epoch": 0.29388058533531575, "grad_norm": 0.0, "learning_rate": 1.656504699204726e-05, "loss": 1.1249, "step": 7511 }, { "epoch": 0.2939197120275452, "grad_norm": 0.0, "learning_rate": 1.656409103911237e-05, "loss": 1.1919, "step": 7512 }, { "epoch": 0.29395883871977463, "grad_norm": 0.0, "learning_rate": 1.6563134980766406e-05, "loss": 0.9508, "step": 7513 }, { "epoch": 0.29399796541200407, "grad_norm": 0.0, "learning_rate": 1.6562178817024713e-05, "loss": 1.1726, "step": 7514 }, { "epoch": 0.2940370921042335, "grad_norm": 0.0, "learning_rate": 1.6561222547902656e-05, "loss": 0.903, "step": 7515 }, { "epoch": 0.29407621879646295, "grad_norm": 0.0, "learning_rate": 1.656026617341558e-05, "loss": 0.9619, "step": 7516 }, { "epoch": 0.2941153454886924, "grad_norm": 0.0, "learning_rate": 1.655930969357886e-05, "loss": 1.211, "step": 7517 }, { "epoch": 0.29415447218092183, "grad_norm": 0.0, "learning_rate": 1.655835310840784e-05, "loss": 1.2164, "step": 7518 }, { "epoch": 0.2941935988731513, "grad_norm": 0.0, "learning_rate": 1.6557396417917885e-05, "loss": 1.1348, "step": 7519 }, { "epoch": 0.2942327255653807, "grad_norm": 0.0, "learning_rate": 1.6556439622124364e-05, "loss": 1.1669, "step": 7520 }, { "epoch": 0.29427185225761016, "grad_norm": 0.0, "learning_rate": 1.6555482721042636e-05, "loss": 1.0031, "step": 7521 }, { "epoch": 0.2943109789498396, "grad_norm": 0.0, "learning_rate": 1.655452571468807e-05, "loss": 1.253, "step": 7522 }, { "epoch": 0.29435010564206904, "grad_norm": 0.0, "learning_rate": 1.6553568603076036e-05, "loss": 1.1939, "step": 7523 }, { "epoch": 0.2943892323342985, "grad_norm": 0.0, "learning_rate": 1.6552611386221902e-05, "loss": 1.1196, "step": 7524 }, { "epoch": 0.2944283590265279, "grad_norm": 0.0, "learning_rate": 1.655165406414104e-05, "loss": 1.2126, "step": 7525 }, { "epoch": 0.29446748571875736, "grad_norm": 0.0, "learning_rate": 1.6550696636848823e-05, "loss": 1.0558, "step": 7526 }, { "epoch": 0.2945066124109868, "grad_norm": 0.0, "learning_rate": 1.6549739104360627e-05, "loss": 1.1344, "step": 7527 }, { "epoch": 0.2945457391032162, "grad_norm": 0.0, "learning_rate": 1.6548781466691828e-05, "loss": 1.1677, "step": 7528 }, { "epoch": 0.2945848657954456, "grad_norm": 0.0, "learning_rate": 1.6547823723857806e-05, "loss": 1.0674, "step": 7529 }, { "epoch": 0.29462399248767507, "grad_norm": 0.0, "learning_rate": 1.6546865875873938e-05, "loss": 1.131, "step": 7530 }, { "epoch": 0.2946631191799045, "grad_norm": 0.0, "learning_rate": 1.6545907922755605e-05, "loss": 1.2896, "step": 7531 }, { "epoch": 0.29470224587213395, "grad_norm": 0.0, "learning_rate": 1.65449498645182e-05, "loss": 1.1141, "step": 7532 }, { "epoch": 0.2947413725643634, "grad_norm": 0.0, "learning_rate": 1.6543991701177094e-05, "loss": 1.1124, "step": 7533 }, { "epoch": 0.29478049925659283, "grad_norm": 0.0, "learning_rate": 1.6543033432747687e-05, "loss": 1.1177, "step": 7534 }, { "epoch": 0.2948196259488223, "grad_norm": 0.0, "learning_rate": 1.654207505924536e-05, "loss": 1.1408, "step": 7535 }, { "epoch": 0.2948587526410517, "grad_norm": 0.0, "learning_rate": 1.6541116580685504e-05, "loss": 1.1713, "step": 7536 }, { "epoch": 0.29489787933328115, "grad_norm": 0.0, "learning_rate": 1.6540157997083516e-05, "loss": 1.2232, "step": 7537 }, { "epoch": 0.2949370060255106, "grad_norm": 0.0, "learning_rate": 1.6539199308454783e-05, "loss": 1.1476, "step": 7538 }, { "epoch": 0.29497613271774004, "grad_norm": 0.0, "learning_rate": 1.6538240514814702e-05, "loss": 1.2307, "step": 7539 }, { "epoch": 0.2950152594099695, "grad_norm": 0.0, "learning_rate": 1.6537281616178674e-05, "loss": 1.1466, "step": 7540 }, { "epoch": 0.2950543861021989, "grad_norm": 0.0, "learning_rate": 1.653632261256209e-05, "loss": 1.288, "step": 7541 }, { "epoch": 0.29509351279442836, "grad_norm": 0.0, "learning_rate": 1.6535363503980365e-05, "loss": 0.9048, "step": 7542 }, { "epoch": 0.2951326394866578, "grad_norm": 0.0, "learning_rate": 1.6534404290448885e-05, "loss": 1.0999, "step": 7543 }, { "epoch": 0.29517176617888724, "grad_norm": 0.0, "learning_rate": 1.653344497198306e-05, "loss": 1.1461, "step": 7544 }, { "epoch": 0.2952108928711167, "grad_norm": 0.0, "learning_rate": 1.6532485548598293e-05, "loss": 1.111, "step": 7545 }, { "epoch": 0.2952500195633461, "grad_norm": 0.0, "learning_rate": 1.6531526020309997e-05, "loss": 1.0596, "step": 7546 }, { "epoch": 0.29528914625557556, "grad_norm": 0.0, "learning_rate": 1.6530566387133577e-05, "loss": 1.2079, "step": 7547 }, { "epoch": 0.295328272947805, "grad_norm": 0.0, "learning_rate": 1.6529606649084446e-05, "loss": 1.059, "step": 7548 }, { "epoch": 0.29536739964003444, "grad_norm": 0.0, "learning_rate": 1.6528646806178016e-05, "loss": 0.9839, "step": 7549 }, { "epoch": 0.2954065263322639, "grad_norm": 0.0, "learning_rate": 1.6527686858429694e-05, "loss": 1.0453, "step": 7550 }, { "epoch": 0.2954456530244933, "grad_norm": 0.0, "learning_rate": 1.65267268058549e-05, "loss": 1.214, "step": 7551 }, { "epoch": 0.29548477971672277, "grad_norm": 0.0, "learning_rate": 1.652576664846906e-05, "loss": 1.1442, "step": 7552 }, { "epoch": 0.2955239064089522, "grad_norm": 0.0, "learning_rate": 1.6524806386287578e-05, "loss": 1.1362, "step": 7553 }, { "epoch": 0.29556303310118165, "grad_norm": 0.0, "learning_rate": 1.652384601932588e-05, "loss": 1.0031, "step": 7554 }, { "epoch": 0.2956021597934111, "grad_norm": 0.0, "learning_rate": 1.6522885547599394e-05, "loss": 1.0789, "step": 7555 }, { "epoch": 0.29564128648564053, "grad_norm": 0.0, "learning_rate": 1.652192497112354e-05, "loss": 1.1255, "step": 7556 }, { "epoch": 0.2956804131778699, "grad_norm": 0.0, "learning_rate": 1.652096428991374e-05, "loss": 1.2069, "step": 7557 }, { "epoch": 0.29571953987009936, "grad_norm": 0.0, "learning_rate": 1.652000350398543e-05, "loss": 0.9448, "step": 7558 }, { "epoch": 0.2957586665623288, "grad_norm": 0.0, "learning_rate": 1.6519042613354027e-05, "loss": 1.2164, "step": 7559 }, { "epoch": 0.29579779325455824, "grad_norm": 0.0, "learning_rate": 1.6518081618034973e-05, "loss": 1.1307, "step": 7560 }, { "epoch": 0.2958369199467877, "grad_norm": 0.0, "learning_rate": 1.6517120518043693e-05, "loss": 1.1627, "step": 7561 }, { "epoch": 0.2958760466390171, "grad_norm": 0.0, "learning_rate": 1.651615931339563e-05, "loss": 1.0665, "step": 7562 }, { "epoch": 0.29591517333124656, "grad_norm": 0.0, "learning_rate": 1.6515198004106204e-05, "loss": 1.315, "step": 7563 }, { "epoch": 0.295954300023476, "grad_norm": 0.0, "learning_rate": 1.6514236590190867e-05, "loss": 1.1621, "step": 7564 }, { "epoch": 0.29599342671570544, "grad_norm": 0.0, "learning_rate": 1.6513275071665057e-05, "loss": 1.2134, "step": 7565 }, { "epoch": 0.2960325534079349, "grad_norm": 0.0, "learning_rate": 1.6512313448544207e-05, "loss": 1.1129, "step": 7566 }, { "epoch": 0.2960716801001643, "grad_norm": 0.0, "learning_rate": 1.6511351720843763e-05, "loss": 1.0866, "step": 7567 }, { "epoch": 0.29611080679239377, "grad_norm": 0.0, "learning_rate": 1.651038988857917e-05, "loss": 1.1797, "step": 7568 }, { "epoch": 0.2961499334846232, "grad_norm": 0.0, "learning_rate": 1.6509427951765876e-05, "loss": 1.2188, "step": 7569 }, { "epoch": 0.29618906017685265, "grad_norm": 0.0, "learning_rate": 1.650846591041932e-05, "loss": 1.1021, "step": 7570 }, { "epoch": 0.2962281868690821, "grad_norm": 0.0, "learning_rate": 1.6507503764554963e-05, "loss": 1.1697, "step": 7571 }, { "epoch": 0.29626731356131153, "grad_norm": 0.0, "learning_rate": 1.6506541514188244e-05, "loss": 0.9818, "step": 7572 }, { "epoch": 0.29630644025354097, "grad_norm": 0.0, "learning_rate": 1.6505579159334628e-05, "loss": 1.1705, "step": 7573 }, { "epoch": 0.2963455669457704, "grad_norm": 0.0, "learning_rate": 1.6504616700009557e-05, "loss": 1.062, "step": 7574 }, { "epoch": 0.29638469363799985, "grad_norm": 0.0, "learning_rate": 1.65036541362285e-05, "loss": 1.2003, "step": 7575 }, { "epoch": 0.2964238203302293, "grad_norm": 0.0, "learning_rate": 1.65026914680069e-05, "loss": 1.1359, "step": 7576 }, { "epoch": 0.29646294702245873, "grad_norm": 0.0, "learning_rate": 1.6501728695360224e-05, "loss": 1.1331, "step": 7577 }, { "epoch": 0.2965020737146882, "grad_norm": 0.0, "learning_rate": 1.6500765818303935e-05, "loss": 1.0292, "step": 7578 }, { "epoch": 0.2965412004069176, "grad_norm": 0.0, "learning_rate": 1.649980283685349e-05, "loss": 1.1134, "step": 7579 }, { "epoch": 0.29658032709914706, "grad_norm": 0.0, "learning_rate": 1.6498839751024357e-05, "loss": 1.0035, "step": 7580 }, { "epoch": 0.2966194537913765, "grad_norm": 0.0, "learning_rate": 1.6497876560832e-05, "loss": 1.2873, "step": 7581 }, { "epoch": 0.29665858048360594, "grad_norm": 0.0, "learning_rate": 1.6496913266291895e-05, "loss": 1.1442, "step": 7582 }, { "epoch": 0.2966977071758354, "grad_norm": 0.0, "learning_rate": 1.6495949867419495e-05, "loss": 1.0667, "step": 7583 }, { "epoch": 0.2967368338680648, "grad_norm": 0.0, "learning_rate": 1.6494986364230285e-05, "loss": 1.1324, "step": 7584 }, { "epoch": 0.2967759605602942, "grad_norm": 0.0, "learning_rate": 1.649402275673973e-05, "loss": 1.2008, "step": 7585 }, { "epoch": 0.29681508725252365, "grad_norm": 0.0, "learning_rate": 1.6493059044963312e-05, "loss": 1.1118, "step": 7586 }, { "epoch": 0.2968542139447531, "grad_norm": 0.0, "learning_rate": 1.6492095228916496e-05, "loss": 1.2047, "step": 7587 }, { "epoch": 0.29689334063698253, "grad_norm": 0.0, "learning_rate": 1.649113130861477e-05, "loss": 1.0745, "step": 7588 }, { "epoch": 0.29693246732921197, "grad_norm": 0.0, "learning_rate": 1.6490167284073607e-05, "loss": 1.075, "step": 7589 }, { "epoch": 0.2969715940214414, "grad_norm": 0.0, "learning_rate": 1.6489203155308488e-05, "loss": 1.125, "step": 7590 }, { "epoch": 0.29701072071367085, "grad_norm": 0.0, "learning_rate": 1.64882389223349e-05, "loss": 1.1134, "step": 7591 }, { "epoch": 0.2970498474059003, "grad_norm": 0.0, "learning_rate": 1.6487274585168327e-05, "loss": 1.1016, "step": 7592 }, { "epoch": 0.29708897409812973, "grad_norm": 0.0, "learning_rate": 1.6486310143824252e-05, "loss": 1.0763, "step": 7593 }, { "epoch": 0.2971281007903592, "grad_norm": 0.0, "learning_rate": 1.648534559831816e-05, "loss": 1.0054, "step": 7594 }, { "epoch": 0.2971672274825886, "grad_norm": 0.0, "learning_rate": 1.6484380948665547e-05, "loss": 1.0709, "step": 7595 }, { "epoch": 0.29720635417481805, "grad_norm": 0.0, "learning_rate": 1.6483416194881904e-05, "loss": 1.1119, "step": 7596 }, { "epoch": 0.2972454808670475, "grad_norm": 0.0, "learning_rate": 1.648245133698272e-05, "loss": 1.2091, "step": 7597 }, { "epoch": 0.29728460755927694, "grad_norm": 0.0, "learning_rate": 1.6481486374983488e-05, "loss": 1.1998, "step": 7598 }, { "epoch": 0.2973237342515064, "grad_norm": 0.0, "learning_rate": 1.6480521308899705e-05, "loss": 1.1686, "step": 7599 }, { "epoch": 0.2973628609437358, "grad_norm": 0.0, "learning_rate": 1.6479556138746877e-05, "loss": 1.2815, "step": 7600 }, { "epoch": 0.29740198763596526, "grad_norm": 0.0, "learning_rate": 1.647859086454049e-05, "loss": 0.9929, "step": 7601 }, { "epoch": 0.2974411143281947, "grad_norm": 0.0, "learning_rate": 1.6477625486296057e-05, "loss": 1.2183, "step": 7602 }, { "epoch": 0.29748024102042414, "grad_norm": 0.0, "learning_rate": 1.6476660004029073e-05, "loss": 1.1799, "step": 7603 }, { "epoch": 0.2975193677126536, "grad_norm": 0.0, "learning_rate": 1.6475694417755046e-05, "loss": 1.0211, "step": 7604 }, { "epoch": 0.297558494404883, "grad_norm": 0.0, "learning_rate": 1.647472872748948e-05, "loss": 1.1697, "step": 7605 }, { "epoch": 0.29759762109711246, "grad_norm": 0.0, "learning_rate": 1.6473762933247885e-05, "loss": 1.1366, "step": 7606 }, { "epoch": 0.2976367477893419, "grad_norm": 0.0, "learning_rate": 1.6472797035045766e-05, "loss": 1.0972, "step": 7607 }, { "epoch": 0.29767587448157135, "grad_norm": 0.0, "learning_rate": 1.6471831032898643e-05, "loss": 1.103, "step": 7608 }, { "epoch": 0.2977150011738008, "grad_norm": 0.0, "learning_rate": 1.647086492682202e-05, "loss": 1.1198, "step": 7609 }, { "epoch": 0.2977541278660302, "grad_norm": 0.0, "learning_rate": 1.6469898716831414e-05, "loss": 1.0739, "step": 7610 }, { "epoch": 0.29779325455825967, "grad_norm": 0.0, "learning_rate": 1.6468932402942343e-05, "loss": 1.0953, "step": 7611 }, { "epoch": 0.2978323812504891, "grad_norm": 0.0, "learning_rate": 1.6467965985170326e-05, "loss": 1.134, "step": 7612 }, { "epoch": 0.29787150794271855, "grad_norm": 0.0, "learning_rate": 1.6466999463530877e-05, "loss": 1.2233, "step": 7613 }, { "epoch": 0.29791063463494794, "grad_norm": 0.0, "learning_rate": 1.646603283803952e-05, "loss": 0.9492, "step": 7614 }, { "epoch": 0.2979497613271774, "grad_norm": 0.0, "learning_rate": 1.646506610871178e-05, "loss": 1.0644, "step": 7615 }, { "epoch": 0.2979888880194068, "grad_norm": 0.0, "learning_rate": 1.646409927556318e-05, "loss": 1.0883, "step": 7616 }, { "epoch": 0.29802801471163626, "grad_norm": 0.0, "learning_rate": 1.6463132338609243e-05, "loss": 1.1677, "step": 7617 }, { "epoch": 0.2980671414038657, "grad_norm": 0.0, "learning_rate": 1.6462165297865503e-05, "loss": 1.1115, "step": 7618 }, { "epoch": 0.29810626809609514, "grad_norm": 0.0, "learning_rate": 1.646119815334748e-05, "loss": 1.0625, "step": 7619 }, { "epoch": 0.2981453947883246, "grad_norm": 0.0, "learning_rate": 1.6460230905070714e-05, "loss": 0.9795, "step": 7620 }, { "epoch": 0.298184521480554, "grad_norm": 0.0, "learning_rate": 1.6459263553050738e-05, "loss": 1.1523, "step": 7621 }, { "epoch": 0.29822364817278346, "grad_norm": 0.0, "learning_rate": 1.645829609730308e-05, "loss": 1.0385, "step": 7622 }, { "epoch": 0.2982627748650129, "grad_norm": 0.0, "learning_rate": 1.6457328537843278e-05, "loss": 1.1274, "step": 7623 }, { "epoch": 0.29830190155724234, "grad_norm": 0.0, "learning_rate": 1.6456360874686873e-05, "loss": 1.1537, "step": 7624 }, { "epoch": 0.2983410282494718, "grad_norm": 0.0, "learning_rate": 1.6455393107849404e-05, "loss": 1.1749, "step": 7625 }, { "epoch": 0.2983801549417012, "grad_norm": 0.0, "learning_rate": 1.645442523734641e-05, "loss": 1.0217, "step": 7626 }, { "epoch": 0.29841928163393067, "grad_norm": 0.0, "learning_rate": 1.645345726319343e-05, "loss": 1.2006, "step": 7627 }, { "epoch": 0.2984584083261601, "grad_norm": 0.0, "learning_rate": 1.645248918540602e-05, "loss": 1.2175, "step": 7628 }, { "epoch": 0.29849753501838955, "grad_norm": 0.0, "learning_rate": 1.6451521003999714e-05, "loss": 1.0976, "step": 7629 }, { "epoch": 0.298536661710619, "grad_norm": 0.0, "learning_rate": 1.645055271899007e-05, "loss": 1.1414, "step": 7630 }, { "epoch": 0.29857578840284843, "grad_norm": 0.0, "learning_rate": 1.6449584330392627e-05, "loss": 1.1054, "step": 7631 }, { "epoch": 0.29861491509507787, "grad_norm": 0.0, "learning_rate": 1.6448615838222942e-05, "loss": 1.0358, "step": 7632 }, { "epoch": 0.2986540417873073, "grad_norm": 0.0, "learning_rate": 1.644764724249657e-05, "loss": 1.0242, "step": 7633 }, { "epoch": 0.29869316847953675, "grad_norm": 0.0, "learning_rate": 1.6446678543229066e-05, "loss": 0.9194, "step": 7634 }, { "epoch": 0.2987322951717662, "grad_norm": 0.0, "learning_rate": 1.6445709740435974e-05, "loss": 1.0069, "step": 7635 }, { "epoch": 0.29877142186399563, "grad_norm": 0.0, "learning_rate": 1.6444740834132867e-05, "loss": 1.1148, "step": 7636 }, { "epoch": 0.2988105485562251, "grad_norm": 0.0, "learning_rate": 1.6443771824335294e-05, "loss": 1.1803, "step": 7637 }, { "epoch": 0.2988496752484545, "grad_norm": 0.0, "learning_rate": 1.6442802711058826e-05, "loss": 1.1365, "step": 7638 }, { "epoch": 0.29888880194068396, "grad_norm": 0.0, "learning_rate": 1.644183349431901e-05, "loss": 1.0208, "step": 7639 }, { "epoch": 0.2989279286329134, "grad_norm": 0.0, "learning_rate": 1.644086417413143e-05, "loss": 0.9985, "step": 7640 }, { "epoch": 0.29896705532514284, "grad_norm": 0.0, "learning_rate": 1.6439894750511634e-05, "loss": 1.0857, "step": 7641 }, { "epoch": 0.2990061820173722, "grad_norm": 0.0, "learning_rate": 1.6438925223475204e-05, "loss": 1.1107, "step": 7642 }, { "epoch": 0.29904530870960166, "grad_norm": 0.0, "learning_rate": 1.64379555930377e-05, "loss": 1.0278, "step": 7643 }, { "epoch": 0.2990844354018311, "grad_norm": 0.0, "learning_rate": 1.6436985859214698e-05, "loss": 1.0351, "step": 7644 }, { "epoch": 0.29912356209406055, "grad_norm": 0.0, "learning_rate": 1.643601602202177e-05, "loss": 1.1137, "step": 7645 }, { "epoch": 0.29916268878629, "grad_norm": 0.0, "learning_rate": 1.6435046081474487e-05, "loss": 1.1425, "step": 7646 }, { "epoch": 0.29920181547851943, "grad_norm": 0.0, "learning_rate": 1.643407603758843e-05, "loss": 1.1404, "step": 7647 }, { "epoch": 0.29924094217074887, "grad_norm": 0.0, "learning_rate": 1.6433105890379168e-05, "loss": 1.0308, "step": 7648 }, { "epoch": 0.2992800688629783, "grad_norm": 0.0, "learning_rate": 1.6432135639862296e-05, "loss": 1.1999, "step": 7649 }, { "epoch": 0.29931919555520775, "grad_norm": 0.0, "learning_rate": 1.6431165286053377e-05, "loss": 1.1871, "step": 7650 }, { "epoch": 0.2993583222474372, "grad_norm": 0.0, "learning_rate": 1.6430194828968005e-05, "loss": 1.0984, "step": 7651 }, { "epoch": 0.29939744893966663, "grad_norm": 0.0, "learning_rate": 1.6429224268621765e-05, "loss": 1.1257, "step": 7652 }, { "epoch": 0.2994365756318961, "grad_norm": 0.0, "learning_rate": 1.6428253605030238e-05, "loss": 1.2504, "step": 7653 }, { "epoch": 0.2994757023241255, "grad_norm": 0.0, "learning_rate": 1.642728283820901e-05, "loss": 1.0585, "step": 7654 }, { "epoch": 0.29951482901635496, "grad_norm": 0.0, "learning_rate": 1.6426311968173677e-05, "loss": 0.9985, "step": 7655 }, { "epoch": 0.2995539557085844, "grad_norm": 0.0, "learning_rate": 1.6425340994939824e-05, "loss": 1.1414, "step": 7656 }, { "epoch": 0.29959308240081384, "grad_norm": 0.0, "learning_rate": 1.642436991852305e-05, "loss": 1.0953, "step": 7657 }, { "epoch": 0.2996322090930433, "grad_norm": 0.0, "learning_rate": 1.642339873893894e-05, "loss": 1.1583, "step": 7658 }, { "epoch": 0.2996713357852727, "grad_norm": 0.0, "learning_rate": 1.6422427456203097e-05, "loss": 1.1514, "step": 7659 }, { "epoch": 0.29971046247750216, "grad_norm": 0.0, "learning_rate": 1.642145607033112e-05, "loss": 1.166, "step": 7660 }, { "epoch": 0.2997495891697316, "grad_norm": 0.0, "learning_rate": 1.64204845813386e-05, "loss": 1.1391, "step": 7661 }, { "epoch": 0.29978871586196104, "grad_norm": 0.0, "learning_rate": 1.6419512989241147e-05, "loss": 1.0638, "step": 7662 }, { "epoch": 0.2998278425541905, "grad_norm": 0.0, "learning_rate": 1.6418541294054356e-05, "loss": 1.0897, "step": 7663 }, { "epoch": 0.2998669692464199, "grad_norm": 0.0, "learning_rate": 1.6417569495793838e-05, "loss": 1.158, "step": 7664 }, { "epoch": 0.29990609593864936, "grad_norm": 0.0, "learning_rate": 1.6416597594475193e-05, "loss": 1.1767, "step": 7665 }, { "epoch": 0.2999452226308788, "grad_norm": 0.0, "learning_rate": 1.641562559011403e-05, "loss": 0.9315, "step": 7666 }, { "epoch": 0.29998434932310825, "grad_norm": 0.0, "learning_rate": 1.6414653482725962e-05, "loss": 1.2816, "step": 7667 }, { "epoch": 0.3000234760153377, "grad_norm": 0.0, "learning_rate": 1.64136812723266e-05, "loss": 1.0499, "step": 7668 }, { "epoch": 0.3000626027075671, "grad_norm": 0.0, "learning_rate": 1.6412708958931547e-05, "loss": 1.1439, "step": 7669 }, { "epoch": 0.30010172939979657, "grad_norm": 0.0, "learning_rate": 1.641173654255643e-05, "loss": 1.1754, "step": 7670 }, { "epoch": 0.30014085609202595, "grad_norm": 0.0, "learning_rate": 1.6410764023216855e-05, "loss": 1.2428, "step": 7671 }, { "epoch": 0.3001799827842554, "grad_norm": 0.0, "learning_rate": 1.6409791400928445e-05, "loss": 1.0124, "step": 7672 }, { "epoch": 0.30021910947648484, "grad_norm": 0.0, "learning_rate": 1.6408818675706812e-05, "loss": 1.1736, "step": 7673 }, { "epoch": 0.3002582361687143, "grad_norm": 0.0, "learning_rate": 1.6407845847567586e-05, "loss": 0.9424, "step": 7674 }, { "epoch": 0.3002973628609437, "grad_norm": 0.0, "learning_rate": 1.6406872916526384e-05, "loss": 1.223, "step": 7675 }, { "epoch": 0.30033648955317316, "grad_norm": 0.0, "learning_rate": 1.6405899882598833e-05, "loss": 1.1502, "step": 7676 }, { "epoch": 0.3003756162454026, "grad_norm": 0.0, "learning_rate": 1.6404926745800554e-05, "loss": 1.178, "step": 7677 }, { "epoch": 0.30041474293763204, "grad_norm": 0.0, "learning_rate": 1.640395350614718e-05, "loss": 1.1374, "step": 7678 }, { "epoch": 0.3004538696298615, "grad_norm": 0.0, "learning_rate": 1.6402980163654335e-05, "loss": 1.2178, "step": 7679 }, { "epoch": 0.3004929963220909, "grad_norm": 0.0, "learning_rate": 1.6402006718337654e-05, "loss": 1.1256, "step": 7680 }, { "epoch": 0.30053212301432036, "grad_norm": 0.0, "learning_rate": 1.6401033170212763e-05, "loss": 1.1321, "step": 7681 }, { "epoch": 0.3005712497065498, "grad_norm": 0.0, "learning_rate": 1.6400059519295304e-05, "loss": 1.185, "step": 7682 }, { "epoch": 0.30061037639877924, "grad_norm": 0.0, "learning_rate": 1.639908576560091e-05, "loss": 1.1976, "step": 7683 }, { "epoch": 0.3006495030910087, "grad_norm": 0.0, "learning_rate": 1.6398111909145214e-05, "loss": 1.0917, "step": 7684 }, { "epoch": 0.3006886297832381, "grad_norm": 0.0, "learning_rate": 1.639713794994386e-05, "loss": 1.0363, "step": 7685 }, { "epoch": 0.30072775647546757, "grad_norm": 0.0, "learning_rate": 1.6396163888012485e-05, "loss": 1.0622, "step": 7686 }, { "epoch": 0.300766883167697, "grad_norm": 0.0, "learning_rate": 1.6395189723366735e-05, "loss": 1.145, "step": 7687 }, { "epoch": 0.30080600985992645, "grad_norm": 0.0, "learning_rate": 1.6394215456022248e-05, "loss": 1.2159, "step": 7688 }, { "epoch": 0.3008451365521559, "grad_norm": 0.0, "learning_rate": 1.6393241085994674e-05, "loss": 1.3502, "step": 7689 }, { "epoch": 0.30088426324438533, "grad_norm": 0.0, "learning_rate": 1.6392266613299663e-05, "loss": 1.1878, "step": 7690 }, { "epoch": 0.30092338993661477, "grad_norm": 0.0, "learning_rate": 1.6391292037952858e-05, "loss": 1.0825, "step": 7691 }, { "epoch": 0.3009625166288442, "grad_norm": 0.0, "learning_rate": 1.639031735996991e-05, "loss": 1.0955, "step": 7692 }, { "epoch": 0.30100164332107365, "grad_norm": 0.0, "learning_rate": 1.6389342579366478e-05, "loss": 0.9028, "step": 7693 }, { "epoch": 0.3010407700133031, "grad_norm": 0.0, "learning_rate": 1.6388367696158206e-05, "loss": 1.1454, "step": 7694 }, { "epoch": 0.30107989670553253, "grad_norm": 0.0, "learning_rate": 1.6387392710360752e-05, "loss": 1.17, "step": 7695 }, { "epoch": 0.301119023397762, "grad_norm": 0.0, "learning_rate": 1.638641762198978e-05, "loss": 1.0394, "step": 7696 }, { "epoch": 0.3011581500899914, "grad_norm": 0.0, "learning_rate": 1.638544243106094e-05, "loss": 1.2354, "step": 7697 }, { "epoch": 0.30119727678222086, "grad_norm": 0.0, "learning_rate": 1.63844671375899e-05, "loss": 1.0229, "step": 7698 }, { "epoch": 0.30123640347445024, "grad_norm": 0.0, "learning_rate": 1.6383491741592316e-05, "loss": 1.1124, "step": 7699 }, { "epoch": 0.3012755301666797, "grad_norm": 0.0, "learning_rate": 1.638251624308385e-05, "loss": 1.0005, "step": 7700 }, { "epoch": 0.3013146568589091, "grad_norm": 0.0, "learning_rate": 1.6381540642080175e-05, "loss": 1.246, "step": 7701 }, { "epoch": 0.30135378355113857, "grad_norm": 0.0, "learning_rate": 1.6380564938596953e-05, "loss": 1.0851, "step": 7702 }, { "epoch": 0.301392910243368, "grad_norm": 0.0, "learning_rate": 1.6379589132649854e-05, "loss": 1.035, "step": 7703 }, { "epoch": 0.30143203693559745, "grad_norm": 0.0, "learning_rate": 1.6378613224254546e-05, "loss": 1.0606, "step": 7704 }, { "epoch": 0.3014711636278269, "grad_norm": 0.0, "learning_rate": 1.6377637213426704e-05, "loss": 1.1234, "step": 7705 }, { "epoch": 0.30151029032005633, "grad_norm": 0.0, "learning_rate": 1.6376661100181994e-05, "loss": 1.1172, "step": 7706 }, { "epoch": 0.30154941701228577, "grad_norm": 0.0, "learning_rate": 1.6375684884536106e-05, "loss": 1.0298, "step": 7707 }, { "epoch": 0.3015885437045152, "grad_norm": 0.0, "learning_rate": 1.63747085665047e-05, "loss": 1.2592, "step": 7708 }, { "epoch": 0.30162767039674465, "grad_norm": 0.0, "learning_rate": 1.6373732146103466e-05, "loss": 1.2058, "step": 7709 }, { "epoch": 0.3016667970889741, "grad_norm": 0.0, "learning_rate": 1.637275562334808e-05, "loss": 1.1291, "step": 7710 }, { "epoch": 0.30170592378120353, "grad_norm": 0.0, "learning_rate": 1.6371778998254225e-05, "loss": 1.1664, "step": 7711 }, { "epoch": 0.301745050473433, "grad_norm": 0.0, "learning_rate": 1.6370802270837587e-05, "loss": 1.1581, "step": 7712 }, { "epoch": 0.3017841771656624, "grad_norm": 0.0, "learning_rate": 1.6369825441113843e-05, "loss": 1.0682, "step": 7713 }, { "epoch": 0.30182330385789186, "grad_norm": 0.0, "learning_rate": 1.6368848509098687e-05, "loss": 1.2238, "step": 7714 }, { "epoch": 0.3018624305501213, "grad_norm": 0.0, "learning_rate": 1.6367871474807802e-05, "loss": 1.1865, "step": 7715 }, { "epoch": 0.30190155724235074, "grad_norm": 0.0, "learning_rate": 1.636689433825688e-05, "loss": 1.207, "step": 7716 }, { "epoch": 0.3019406839345802, "grad_norm": 0.0, "learning_rate": 1.6365917099461616e-05, "loss": 1.196, "step": 7717 }, { "epoch": 0.3019798106268096, "grad_norm": 0.0, "learning_rate": 1.6364939758437695e-05, "loss": 1.0596, "step": 7718 }, { "epoch": 0.30201893731903906, "grad_norm": 0.0, "learning_rate": 1.636396231520082e-05, "loss": 1.1503, "step": 7719 }, { "epoch": 0.3020580640112685, "grad_norm": 0.0, "learning_rate": 1.636298476976669e-05, "loss": 1.1156, "step": 7720 }, { "epoch": 0.30209719070349794, "grad_norm": 0.0, "learning_rate": 1.6362007122150993e-05, "loss": 1.089, "step": 7721 }, { "epoch": 0.3021363173957274, "grad_norm": 0.0, "learning_rate": 1.6361029372369433e-05, "loss": 1.1143, "step": 7722 }, { "epoch": 0.3021754440879568, "grad_norm": 0.0, "learning_rate": 1.6360051520437716e-05, "loss": 1.1466, "step": 7723 }, { "epoch": 0.30221457078018626, "grad_norm": 0.0, "learning_rate": 1.6359073566371538e-05, "loss": 1.0162, "step": 7724 }, { "epoch": 0.3022536974724157, "grad_norm": 0.0, "learning_rate": 1.6358095510186607e-05, "loss": 1.1141, "step": 7725 }, { "epoch": 0.30229282416464515, "grad_norm": 0.0, "learning_rate": 1.635711735189863e-05, "loss": 1.0879, "step": 7726 }, { "epoch": 0.3023319508568746, "grad_norm": 0.0, "learning_rate": 1.6356139091523317e-05, "loss": 1.109, "step": 7727 }, { "epoch": 0.302371077549104, "grad_norm": 0.0, "learning_rate": 1.635516072907637e-05, "loss": 1.1891, "step": 7728 }, { "epoch": 0.3024102042413334, "grad_norm": 0.0, "learning_rate": 1.6354182264573507e-05, "loss": 1.1543, "step": 7729 }, { "epoch": 0.30244933093356285, "grad_norm": 0.0, "learning_rate": 1.635320369803044e-05, "loss": 1.0591, "step": 7730 }, { "epoch": 0.3024884576257923, "grad_norm": 0.0, "learning_rate": 1.635222502946288e-05, "loss": 1.0383, "step": 7731 }, { "epoch": 0.30252758431802174, "grad_norm": 0.0, "learning_rate": 1.6351246258886546e-05, "loss": 1.1031, "step": 7732 }, { "epoch": 0.3025667110102512, "grad_norm": 0.0, "learning_rate": 1.6350267386317156e-05, "loss": 1.1803, "step": 7733 }, { "epoch": 0.3026058377024806, "grad_norm": 0.0, "learning_rate": 1.634928841177043e-05, "loss": 0.9663, "step": 7734 }, { "epoch": 0.30264496439471006, "grad_norm": 0.0, "learning_rate": 1.634830933526209e-05, "loss": 1.0403, "step": 7735 }, { "epoch": 0.3026840910869395, "grad_norm": 0.0, "learning_rate": 1.6347330156807856e-05, "loss": 1.1329, "step": 7736 }, { "epoch": 0.30272321777916894, "grad_norm": 0.0, "learning_rate": 1.6346350876423452e-05, "loss": 1.1078, "step": 7737 }, { "epoch": 0.3027623444713984, "grad_norm": 0.0, "learning_rate": 1.6345371494124607e-05, "loss": 1.0546, "step": 7738 }, { "epoch": 0.3028014711636278, "grad_norm": 0.0, "learning_rate": 1.6344392009927047e-05, "loss": 1.1951, "step": 7739 }, { "epoch": 0.30284059785585726, "grad_norm": 0.0, "learning_rate": 1.6343412423846498e-05, "loss": 1.0789, "step": 7740 }, { "epoch": 0.3028797245480867, "grad_norm": 0.0, "learning_rate": 1.63424327358987e-05, "loss": 1.1191, "step": 7741 }, { "epoch": 0.30291885124031614, "grad_norm": 0.0, "learning_rate": 1.6341452946099374e-05, "loss": 1.0306, "step": 7742 }, { "epoch": 0.3029579779325456, "grad_norm": 0.0, "learning_rate": 1.6340473054464263e-05, "loss": 1.1007, "step": 7743 }, { "epoch": 0.302997104624775, "grad_norm": 0.0, "learning_rate": 1.63394930610091e-05, "loss": 1.1192, "step": 7744 }, { "epoch": 0.30303623131700447, "grad_norm": 0.0, "learning_rate": 1.633851296574962e-05, "loss": 1.1639, "step": 7745 }, { "epoch": 0.3030753580092339, "grad_norm": 0.0, "learning_rate": 1.6337532768701568e-05, "loss": 1.182, "step": 7746 }, { "epoch": 0.30311448470146335, "grad_norm": 0.0, "learning_rate": 1.6336552469880676e-05, "loss": 1.1679, "step": 7747 }, { "epoch": 0.3031536113936928, "grad_norm": 0.0, "learning_rate": 1.6335572069302694e-05, "loss": 1.0481, "step": 7748 }, { "epoch": 0.30319273808592223, "grad_norm": 0.0, "learning_rate": 1.6334591566983363e-05, "loss": 1.2145, "step": 7749 }, { "epoch": 0.30323186477815167, "grad_norm": 0.0, "learning_rate": 1.633361096293843e-05, "loss": 1.1693, "step": 7750 }, { "epoch": 0.3032709914703811, "grad_norm": 0.0, "learning_rate": 1.6332630257183644e-05, "loss": 1.0967, "step": 7751 }, { "epoch": 0.30331011816261055, "grad_norm": 0.0, "learning_rate": 1.633164944973475e-05, "loss": 1.0929, "step": 7752 }, { "epoch": 0.30334924485484, "grad_norm": 0.0, "learning_rate": 1.6330668540607498e-05, "loss": 1.2263, "step": 7753 }, { "epoch": 0.30338837154706944, "grad_norm": 0.0, "learning_rate": 1.6329687529817643e-05, "loss": 1.1297, "step": 7754 }, { "epoch": 0.3034274982392989, "grad_norm": 0.0, "learning_rate": 1.6328706417380934e-05, "loss": 1.2185, "step": 7755 }, { "epoch": 0.30346662493152826, "grad_norm": 0.0, "learning_rate": 1.6327725203313133e-05, "loss": 1.0241, "step": 7756 }, { "epoch": 0.3035057516237577, "grad_norm": 0.0, "learning_rate": 1.6326743887629995e-05, "loss": 1.1592, "step": 7757 }, { "epoch": 0.30354487831598714, "grad_norm": 0.0, "learning_rate": 1.632576247034728e-05, "loss": 1.1588, "step": 7758 }, { "epoch": 0.3035840050082166, "grad_norm": 0.0, "learning_rate": 1.6324780951480745e-05, "loss": 1.1671, "step": 7759 }, { "epoch": 0.303623131700446, "grad_norm": 0.0, "learning_rate": 1.632379933104615e-05, "loss": 1.1289, "step": 7760 }, { "epoch": 0.30366225839267547, "grad_norm": 0.0, "learning_rate": 1.6322817609059267e-05, "loss": 1.0016, "step": 7761 }, { "epoch": 0.3037013850849049, "grad_norm": 0.0, "learning_rate": 1.632183578553585e-05, "loss": 0.9656, "step": 7762 }, { "epoch": 0.30374051177713435, "grad_norm": 0.0, "learning_rate": 1.632085386049168e-05, "loss": 1.2013, "step": 7763 }, { "epoch": 0.3037796384693638, "grad_norm": 0.0, "learning_rate": 1.631987183394251e-05, "loss": 1.1171, "step": 7764 }, { "epoch": 0.30381876516159323, "grad_norm": 0.0, "learning_rate": 1.6318889705904123e-05, "loss": 1.182, "step": 7765 }, { "epoch": 0.30385789185382267, "grad_norm": 0.0, "learning_rate": 1.631790747639228e-05, "loss": 1.125, "step": 7766 }, { "epoch": 0.3038970185460521, "grad_norm": 0.0, "learning_rate": 1.6316925145422765e-05, "loss": 0.9782, "step": 7767 }, { "epoch": 0.30393614523828155, "grad_norm": 0.0, "learning_rate": 1.6315942713011344e-05, "loss": 1.1523, "step": 7768 }, { "epoch": 0.303975271930511, "grad_norm": 0.0, "learning_rate": 1.63149601791738e-05, "loss": 1.1191, "step": 7769 }, { "epoch": 0.30401439862274043, "grad_norm": 0.0, "learning_rate": 1.6313977543925907e-05, "loss": 1.2458, "step": 7770 }, { "epoch": 0.3040535253149699, "grad_norm": 0.0, "learning_rate": 1.6312994807283448e-05, "loss": 1.1015, "step": 7771 }, { "epoch": 0.3040926520071993, "grad_norm": 0.0, "learning_rate": 1.6312011969262203e-05, "loss": 1.0864, "step": 7772 }, { "epoch": 0.30413177869942876, "grad_norm": 0.0, "learning_rate": 1.6311029029877952e-05, "loss": 1.1108, "step": 7773 }, { "epoch": 0.3041709053916582, "grad_norm": 0.0, "learning_rate": 1.6310045989146486e-05, "loss": 1.0948, "step": 7774 }, { "epoch": 0.30421003208388764, "grad_norm": 0.0, "learning_rate": 1.6309062847083585e-05, "loss": 1.1783, "step": 7775 }, { "epoch": 0.3042491587761171, "grad_norm": 0.0, "learning_rate": 1.6308079603705044e-05, "loss": 1.2394, "step": 7776 }, { "epoch": 0.3042882854683465, "grad_norm": 0.0, "learning_rate": 1.6307096259026647e-05, "loss": 1.0536, "step": 7777 }, { "epoch": 0.30432741216057596, "grad_norm": 0.0, "learning_rate": 1.6306112813064188e-05, "loss": 0.9669, "step": 7778 }, { "epoch": 0.3043665388528054, "grad_norm": 0.0, "learning_rate": 1.6305129265833457e-05, "loss": 1.1063, "step": 7779 }, { "epoch": 0.30440566554503484, "grad_norm": 0.0, "learning_rate": 1.6304145617350255e-05, "loss": 1.0542, "step": 7780 }, { "epoch": 0.3044447922372643, "grad_norm": 0.0, "learning_rate": 1.6303161867630373e-05, "loss": 1.2471, "step": 7781 }, { "epoch": 0.3044839189294937, "grad_norm": 0.0, "learning_rate": 1.6302178016689606e-05, "loss": 1.0585, "step": 7782 }, { "epoch": 0.30452304562172317, "grad_norm": 0.0, "learning_rate": 1.630119406454376e-05, "loss": 1.1231, "step": 7783 }, { "epoch": 0.3045621723139526, "grad_norm": 0.0, "learning_rate": 1.6300210011208635e-05, "loss": 1.0083, "step": 7784 }, { "epoch": 0.304601299006182, "grad_norm": 0.0, "learning_rate": 1.629922585670003e-05, "loss": 1.0658, "step": 7785 }, { "epoch": 0.30464042569841143, "grad_norm": 0.0, "learning_rate": 1.629824160103375e-05, "loss": 1.1115, "step": 7786 }, { "epoch": 0.3046795523906409, "grad_norm": 0.0, "learning_rate": 1.6297257244225602e-05, "loss": 1.0703, "step": 7787 }, { "epoch": 0.3047186790828703, "grad_norm": 0.0, "learning_rate": 1.6296272786291397e-05, "loss": 1.1628, "step": 7788 }, { "epoch": 0.30475780577509975, "grad_norm": 0.0, "learning_rate": 1.6295288227246936e-05, "loss": 1.1053, "step": 7789 }, { "epoch": 0.3047969324673292, "grad_norm": 0.0, "learning_rate": 1.629430356710804e-05, "loss": 1.1922, "step": 7790 }, { "epoch": 0.30483605915955864, "grad_norm": 0.0, "learning_rate": 1.629331880589051e-05, "loss": 1.0554, "step": 7791 }, { "epoch": 0.3048751858517881, "grad_norm": 0.0, "learning_rate": 1.629233394361017e-05, "loss": 1.1678, "step": 7792 }, { "epoch": 0.3049143125440175, "grad_norm": 0.0, "learning_rate": 1.629134898028283e-05, "loss": 1.2717, "step": 7793 }, { "epoch": 0.30495343923624696, "grad_norm": 0.0, "learning_rate": 1.6290363915924306e-05, "loss": 1.0705, "step": 7794 }, { "epoch": 0.3049925659284764, "grad_norm": 0.0, "learning_rate": 1.6289378750550423e-05, "loss": 1.0562, "step": 7795 }, { "epoch": 0.30503169262070584, "grad_norm": 0.0, "learning_rate": 1.6288393484177e-05, "loss": 1.176, "step": 7796 }, { "epoch": 0.3050708193129353, "grad_norm": 0.0, "learning_rate": 1.6287408116819855e-05, "loss": 1.088, "step": 7797 }, { "epoch": 0.3051099460051647, "grad_norm": 0.0, "learning_rate": 1.6286422648494815e-05, "loss": 1.1522, "step": 7798 }, { "epoch": 0.30514907269739416, "grad_norm": 0.0, "learning_rate": 1.6285437079217702e-05, "loss": 1.1135, "step": 7799 }, { "epoch": 0.3051881993896236, "grad_norm": 0.0, "learning_rate": 1.6284451409004352e-05, "loss": 1.1328, "step": 7800 }, { "epoch": 0.30522732608185305, "grad_norm": 0.0, "learning_rate": 1.628346563787058e-05, "loss": 1.1382, "step": 7801 }, { "epoch": 0.3052664527740825, "grad_norm": 0.0, "learning_rate": 1.628247976583223e-05, "loss": 1.1083, "step": 7802 }, { "epoch": 0.3053055794663119, "grad_norm": 0.0, "learning_rate": 1.6281493792905124e-05, "loss": 1.0962, "step": 7803 }, { "epoch": 0.30534470615854137, "grad_norm": 0.0, "learning_rate": 1.6280507719105097e-05, "loss": 1.2245, "step": 7804 }, { "epoch": 0.3053838328507708, "grad_norm": 0.0, "learning_rate": 1.627952154444799e-05, "loss": 1.1017, "step": 7805 }, { "epoch": 0.30542295954300025, "grad_norm": 0.0, "learning_rate": 1.6278535268949633e-05, "loss": 1.0956, "step": 7806 }, { "epoch": 0.3054620862352297, "grad_norm": 0.0, "learning_rate": 1.6277548892625867e-05, "loss": 1.2379, "step": 7807 }, { "epoch": 0.30550121292745913, "grad_norm": 0.0, "learning_rate": 1.6276562415492533e-05, "loss": 1.0479, "step": 7808 }, { "epoch": 0.3055403396196886, "grad_norm": 0.0, "learning_rate": 1.6275575837565472e-05, "loss": 1.1883, "step": 7809 }, { "epoch": 0.305579466311918, "grad_norm": 0.0, "learning_rate": 1.6274589158860523e-05, "loss": 0.9793, "step": 7810 }, { "epoch": 0.30561859300414745, "grad_norm": 0.0, "learning_rate": 1.627360237939354e-05, "loss": 1.1248, "step": 7811 }, { "epoch": 0.3056577196963769, "grad_norm": 0.0, "learning_rate": 1.627261549918036e-05, "loss": 0.9392, "step": 7812 }, { "epoch": 0.3056968463886063, "grad_norm": 0.0, "learning_rate": 1.6271628518236836e-05, "loss": 0.991, "step": 7813 }, { "epoch": 0.3057359730808357, "grad_norm": 0.0, "learning_rate": 1.627064143657882e-05, "loss": 1.097, "step": 7814 }, { "epoch": 0.30577509977306516, "grad_norm": 0.0, "learning_rate": 1.6269654254222155e-05, "loss": 1.1293, "step": 7815 }, { "epoch": 0.3058142264652946, "grad_norm": 0.0, "learning_rate": 1.62686669711827e-05, "loss": 1.0819, "step": 7816 }, { "epoch": 0.30585335315752404, "grad_norm": 0.0, "learning_rate": 1.6267679587476312e-05, "loss": 1.132, "step": 7817 }, { "epoch": 0.3058924798497535, "grad_norm": 0.0, "learning_rate": 1.626669210311884e-05, "loss": 1.028, "step": 7818 }, { "epoch": 0.3059316065419829, "grad_norm": 0.0, "learning_rate": 1.626570451812615e-05, "loss": 1.0923, "step": 7819 }, { "epoch": 0.30597073323421237, "grad_norm": 0.0, "learning_rate": 1.6264716832514095e-05, "loss": 1.1835, "step": 7820 }, { "epoch": 0.3060098599264418, "grad_norm": 0.0, "learning_rate": 1.6263729046298534e-05, "loss": 1.2462, "step": 7821 }, { "epoch": 0.30604898661867125, "grad_norm": 0.0, "learning_rate": 1.6262741159495336e-05, "loss": 1.143, "step": 7822 }, { "epoch": 0.3060881133109007, "grad_norm": 0.0, "learning_rate": 1.6261753172120363e-05, "loss": 1.1288, "step": 7823 }, { "epoch": 0.30612724000313013, "grad_norm": 0.0, "learning_rate": 1.626076508418948e-05, "loss": 1.1775, "step": 7824 }, { "epoch": 0.30616636669535957, "grad_norm": 0.0, "learning_rate": 1.6259776895718555e-05, "loss": 1.2592, "step": 7825 }, { "epoch": 0.306205493387589, "grad_norm": 0.0, "learning_rate": 1.6258788606723457e-05, "loss": 1.0433, "step": 7826 }, { "epoch": 0.30624462007981845, "grad_norm": 0.0, "learning_rate": 1.625780021722006e-05, "loss": 1.1497, "step": 7827 }, { "epoch": 0.3062837467720479, "grad_norm": 0.0, "learning_rate": 1.625681172722423e-05, "loss": 1.2572, "step": 7828 }, { "epoch": 0.30632287346427733, "grad_norm": 0.0, "learning_rate": 1.625582313675184e-05, "loss": 1.1707, "step": 7829 }, { "epoch": 0.3063620001565068, "grad_norm": 0.0, "learning_rate": 1.6254834445818775e-05, "loss": 1.2506, "step": 7830 }, { "epoch": 0.3064011268487362, "grad_norm": 0.0, "learning_rate": 1.6253845654440904e-05, "loss": 1.1566, "step": 7831 }, { "epoch": 0.30644025354096566, "grad_norm": 0.0, "learning_rate": 1.625285676263411e-05, "loss": 1.1237, "step": 7832 }, { "epoch": 0.3064793802331951, "grad_norm": 0.0, "learning_rate": 1.625186777041427e-05, "loss": 1.1915, "step": 7833 }, { "epoch": 0.30651850692542454, "grad_norm": 0.0, "learning_rate": 1.625087867779727e-05, "loss": 1.0842, "step": 7834 }, { "epoch": 0.306557633617654, "grad_norm": 0.0, "learning_rate": 1.6249889484798987e-05, "loss": 1.1751, "step": 7835 }, { "epoch": 0.3065967603098834, "grad_norm": 0.0, "learning_rate": 1.6248900191435314e-05, "loss": 1.1187, "step": 7836 }, { "epoch": 0.30663588700211286, "grad_norm": 0.0, "learning_rate": 1.624791079772213e-05, "loss": 1.1718, "step": 7837 }, { "epoch": 0.3066750136943423, "grad_norm": 0.0, "learning_rate": 1.6246921303675334e-05, "loss": 1.0671, "step": 7838 }, { "epoch": 0.30671414038657174, "grad_norm": 0.0, "learning_rate": 1.6245931709310806e-05, "loss": 1.2157, "step": 7839 }, { "epoch": 0.3067532670788012, "grad_norm": 0.0, "learning_rate": 1.6244942014644443e-05, "loss": 1.2037, "step": 7840 }, { "epoch": 0.3067923937710306, "grad_norm": 0.0, "learning_rate": 1.6243952219692135e-05, "loss": 1.0305, "step": 7841 }, { "epoch": 0.30683152046326, "grad_norm": 0.0, "learning_rate": 1.6242962324469777e-05, "loss": 1.0545, "step": 7842 }, { "epoch": 0.30687064715548945, "grad_norm": 0.0, "learning_rate": 1.624197232899327e-05, "loss": 1.2817, "step": 7843 }, { "epoch": 0.3069097738477189, "grad_norm": 0.0, "learning_rate": 1.6240982233278505e-05, "loss": 1.0506, "step": 7844 }, { "epoch": 0.30694890053994833, "grad_norm": 0.0, "learning_rate": 1.623999203734139e-05, "loss": 1.1725, "step": 7845 }, { "epoch": 0.3069880272321778, "grad_norm": 0.0, "learning_rate": 1.623900174119782e-05, "loss": 1.1441, "step": 7846 }, { "epoch": 0.3070271539244072, "grad_norm": 0.0, "learning_rate": 1.62380113448637e-05, "loss": 1.1382, "step": 7847 }, { "epoch": 0.30706628061663666, "grad_norm": 0.0, "learning_rate": 1.6237020848354937e-05, "loss": 1.131, "step": 7848 }, { "epoch": 0.3071054073088661, "grad_norm": 0.0, "learning_rate": 1.6236030251687435e-05, "loss": 1.0975, "step": 7849 }, { "epoch": 0.30714453400109554, "grad_norm": 0.0, "learning_rate": 1.6235039554877097e-05, "loss": 1.1565, "step": 7850 }, { "epoch": 0.307183660693325, "grad_norm": 0.0, "learning_rate": 1.623404875793984e-05, "loss": 1.1304, "step": 7851 }, { "epoch": 0.3072227873855544, "grad_norm": 0.0, "learning_rate": 1.6233057860891566e-05, "loss": 1.1595, "step": 7852 }, { "epoch": 0.30726191407778386, "grad_norm": 0.0, "learning_rate": 1.62320668637482e-05, "loss": 1.1707, "step": 7853 }, { "epoch": 0.3073010407700133, "grad_norm": 0.0, "learning_rate": 1.6231075766525647e-05, "loss": 1.1141, "step": 7854 }, { "epoch": 0.30734016746224274, "grad_norm": 0.0, "learning_rate": 1.6230084569239824e-05, "loss": 1.0975, "step": 7855 }, { "epoch": 0.3073792941544722, "grad_norm": 0.0, "learning_rate": 1.6229093271906654e-05, "loss": 1.1218, "step": 7856 }, { "epoch": 0.3074184208467016, "grad_norm": 0.0, "learning_rate": 1.6228101874542047e-05, "loss": 1.1877, "step": 7857 }, { "epoch": 0.30745754753893106, "grad_norm": 0.0, "learning_rate": 1.6227110377161928e-05, "loss": 1.1807, "step": 7858 }, { "epoch": 0.3074966742311605, "grad_norm": 0.0, "learning_rate": 1.6226118779782224e-05, "loss": 1.1301, "step": 7859 }, { "epoch": 0.30753580092338995, "grad_norm": 0.0, "learning_rate": 1.622512708241885e-05, "loss": 1.0153, "step": 7860 }, { "epoch": 0.3075749276156194, "grad_norm": 0.0, "learning_rate": 1.6224135285087734e-05, "loss": 1.0949, "step": 7861 }, { "epoch": 0.3076140543078488, "grad_norm": 0.0, "learning_rate": 1.6223143387804804e-05, "loss": 1.1287, "step": 7862 }, { "epoch": 0.30765318100007827, "grad_norm": 0.0, "learning_rate": 1.6222151390585996e-05, "loss": 1.2133, "step": 7863 }, { "epoch": 0.3076923076923077, "grad_norm": 0.0, "learning_rate": 1.6221159293447227e-05, "loss": 1.0915, "step": 7864 }, { "epoch": 0.30773143438453715, "grad_norm": 0.0, "learning_rate": 1.622016709640444e-05, "loss": 1.149, "step": 7865 }, { "epoch": 0.3077705610767666, "grad_norm": 0.0, "learning_rate": 1.621917479947356e-05, "loss": 1.1415, "step": 7866 }, { "epoch": 0.30780968776899603, "grad_norm": 0.0, "learning_rate": 1.621818240267053e-05, "loss": 1.2538, "step": 7867 }, { "epoch": 0.3078488144612255, "grad_norm": 0.0, "learning_rate": 1.621718990601128e-05, "loss": 1.0739, "step": 7868 }, { "epoch": 0.3078879411534549, "grad_norm": 0.0, "learning_rate": 1.621619730951175e-05, "loss": 1.0395, "step": 7869 }, { "epoch": 0.3079270678456843, "grad_norm": 0.0, "learning_rate": 1.621520461318788e-05, "loss": 1.2267, "step": 7870 }, { "epoch": 0.30796619453791374, "grad_norm": 0.0, "learning_rate": 1.6214211817055612e-05, "loss": 1.0431, "step": 7871 }, { "epoch": 0.3080053212301432, "grad_norm": 0.0, "learning_rate": 1.621321892113089e-05, "loss": 1.1198, "step": 7872 }, { "epoch": 0.3080444479223726, "grad_norm": 0.0, "learning_rate": 1.621222592542966e-05, "loss": 1.0812, "step": 7873 }, { "epoch": 0.30808357461460206, "grad_norm": 0.0, "learning_rate": 1.6211232829967865e-05, "loss": 1.1912, "step": 7874 }, { "epoch": 0.3081227013068315, "grad_norm": 0.0, "learning_rate": 1.6210239634761452e-05, "loss": 1.1179, "step": 7875 }, { "epoch": 0.30816182799906094, "grad_norm": 0.0, "learning_rate": 1.6209246339826372e-05, "loss": 1.1593, "step": 7876 }, { "epoch": 0.3082009546912904, "grad_norm": 0.0, "learning_rate": 1.6208252945178578e-05, "loss": 1.1981, "step": 7877 }, { "epoch": 0.3082400813835198, "grad_norm": 0.0, "learning_rate": 1.6207259450834022e-05, "loss": 1.235, "step": 7878 }, { "epoch": 0.30827920807574927, "grad_norm": 0.0, "learning_rate": 1.6206265856808655e-05, "loss": 1.2128, "step": 7879 }, { "epoch": 0.3083183347679787, "grad_norm": 0.0, "learning_rate": 1.6205272163118436e-05, "loss": 1.1635, "step": 7880 }, { "epoch": 0.30835746146020815, "grad_norm": 0.0, "learning_rate": 1.6204278369779324e-05, "loss": 1.1439, "step": 7881 }, { "epoch": 0.3083965881524376, "grad_norm": 0.0, "learning_rate": 1.620328447680727e-05, "loss": 1.1791, "step": 7882 }, { "epoch": 0.30843571484466703, "grad_norm": 0.0, "learning_rate": 1.6202290484218244e-05, "loss": 1.0632, "step": 7883 }, { "epoch": 0.30847484153689647, "grad_norm": 0.0, "learning_rate": 1.6201296392028206e-05, "loss": 1.1864, "step": 7884 }, { "epoch": 0.3085139682291259, "grad_norm": 0.0, "learning_rate": 1.6200302200253117e-05, "loss": 1.1223, "step": 7885 }, { "epoch": 0.30855309492135535, "grad_norm": 0.0, "learning_rate": 1.6199307908908943e-05, "loss": 1.1348, "step": 7886 }, { "epoch": 0.3085922216135848, "grad_norm": 0.0, "learning_rate": 1.6198313518011655e-05, "loss": 1.1889, "step": 7887 }, { "epoch": 0.30863134830581423, "grad_norm": 0.0, "learning_rate": 1.6197319027577218e-05, "loss": 1.1006, "step": 7888 }, { "epoch": 0.3086704749980437, "grad_norm": 0.0, "learning_rate": 1.6196324437621603e-05, "loss": 1.102, "step": 7889 }, { "epoch": 0.3087096016902731, "grad_norm": 0.0, "learning_rate": 1.6195329748160783e-05, "loss": 1.0438, "step": 7890 }, { "epoch": 0.30874872838250256, "grad_norm": 0.0, "learning_rate": 1.6194334959210726e-05, "loss": 1.0844, "step": 7891 }, { "epoch": 0.308787855074732, "grad_norm": 0.0, "learning_rate": 1.6193340070787417e-05, "loss": 1.049, "step": 7892 }, { "epoch": 0.30882698176696144, "grad_norm": 0.0, "learning_rate": 1.6192345082906823e-05, "loss": 1.0962, "step": 7893 }, { "epoch": 0.3088661084591909, "grad_norm": 0.0, "learning_rate": 1.6191349995584928e-05, "loss": 1.0955, "step": 7894 }, { "epoch": 0.3089052351514203, "grad_norm": 0.0, "learning_rate": 1.619035480883771e-05, "loss": 1.2358, "step": 7895 }, { "epoch": 0.30894436184364976, "grad_norm": 0.0, "learning_rate": 1.618935952268115e-05, "loss": 1.1801, "step": 7896 }, { "epoch": 0.3089834885358792, "grad_norm": 0.0, "learning_rate": 1.6188364137131233e-05, "loss": 1.1002, "step": 7897 }, { "epoch": 0.3090226152281086, "grad_norm": 0.0, "learning_rate": 1.6187368652203944e-05, "loss": 1.1535, "step": 7898 }, { "epoch": 0.30906174192033803, "grad_norm": 0.0, "learning_rate": 1.6186373067915265e-05, "loss": 1.0721, "step": 7899 }, { "epoch": 0.30910086861256747, "grad_norm": 0.0, "learning_rate": 1.6185377384281185e-05, "loss": 1.0481, "step": 7900 }, { "epoch": 0.3091399953047969, "grad_norm": 0.0, "learning_rate": 1.61843816013177e-05, "loss": 1.3419, "step": 7901 }, { "epoch": 0.30917912199702635, "grad_norm": 0.0, "learning_rate": 1.618338571904079e-05, "loss": 1.0812, "step": 7902 }, { "epoch": 0.3092182486892558, "grad_norm": 0.0, "learning_rate": 1.618238973746646e-05, "loss": 1.0181, "step": 7903 }, { "epoch": 0.30925737538148523, "grad_norm": 0.0, "learning_rate": 1.6181393656610693e-05, "loss": 1.1688, "step": 7904 }, { "epoch": 0.3092965020737147, "grad_norm": 0.0, "learning_rate": 1.618039747648949e-05, "loss": 1.1564, "step": 7905 }, { "epoch": 0.3093356287659441, "grad_norm": 0.0, "learning_rate": 1.617940119711885e-05, "loss": 1.0064, "step": 7906 }, { "epoch": 0.30937475545817356, "grad_norm": 0.0, "learning_rate": 1.617840481851477e-05, "loss": 1.0458, "step": 7907 }, { "epoch": 0.309413882150403, "grad_norm": 0.0, "learning_rate": 1.617740834069325e-05, "loss": 1.1231, "step": 7908 }, { "epoch": 0.30945300884263244, "grad_norm": 0.0, "learning_rate": 1.6176411763670292e-05, "loss": 1.0005, "step": 7909 }, { "epoch": 0.3094921355348619, "grad_norm": 0.0, "learning_rate": 1.6175415087461904e-05, "loss": 1.091, "step": 7910 }, { "epoch": 0.3095312622270913, "grad_norm": 0.0, "learning_rate": 1.617441831208408e-05, "loss": 1.011, "step": 7911 }, { "epoch": 0.30957038891932076, "grad_norm": 0.0, "learning_rate": 1.617342143755284e-05, "loss": 1.1384, "step": 7912 }, { "epoch": 0.3096095156115502, "grad_norm": 0.0, "learning_rate": 1.6172424463884187e-05, "loss": 1.2124, "step": 7913 }, { "epoch": 0.30964864230377964, "grad_norm": 0.0, "learning_rate": 1.6171427391094136e-05, "loss": 0.9992, "step": 7914 }, { "epoch": 0.3096877689960091, "grad_norm": 0.0, "learning_rate": 1.617043021919869e-05, "loss": 1.1131, "step": 7915 }, { "epoch": 0.3097268956882385, "grad_norm": 0.0, "learning_rate": 1.6169432948213864e-05, "loss": 1.1586, "step": 7916 }, { "epoch": 0.30976602238046796, "grad_norm": 0.0, "learning_rate": 1.616843557815568e-05, "loss": 1.087, "step": 7917 }, { "epoch": 0.3098051490726974, "grad_norm": 0.0, "learning_rate": 1.6167438109040148e-05, "loss": 1.2209, "step": 7918 }, { "epoch": 0.30984427576492685, "grad_norm": 0.0, "learning_rate": 1.616644054088329e-05, "loss": 1.2152, "step": 7919 }, { "epoch": 0.3098834024571563, "grad_norm": 0.0, "learning_rate": 1.616544287370112e-05, "loss": 1.0604, "step": 7920 }, { "epoch": 0.30992252914938573, "grad_norm": 0.0, "learning_rate": 1.616444510750967e-05, "loss": 1.2168, "step": 7921 }, { "epoch": 0.30996165584161517, "grad_norm": 0.0, "learning_rate": 1.616344724232495e-05, "loss": 1.1343, "step": 7922 }, { "epoch": 0.3100007825338446, "grad_norm": 0.0, "learning_rate": 1.616244927816299e-05, "loss": 0.9808, "step": 7923 }, { "epoch": 0.31003990922607405, "grad_norm": 0.0, "learning_rate": 1.616145121503982e-05, "loss": 1.0319, "step": 7924 }, { "epoch": 0.3100790359183035, "grad_norm": 0.0, "learning_rate": 1.6160453052971466e-05, "loss": 1.0648, "step": 7925 }, { "epoch": 0.31011816261053293, "grad_norm": 0.0, "learning_rate": 1.6159454791973953e-05, "loss": 1.0951, "step": 7926 }, { "epoch": 0.3101572893027623, "grad_norm": 0.0, "learning_rate": 1.615845643206331e-05, "loss": 1.0272, "step": 7927 }, { "epoch": 0.31019641599499176, "grad_norm": 0.0, "learning_rate": 1.615745797325558e-05, "loss": 1.2076, "step": 7928 }, { "epoch": 0.3102355426872212, "grad_norm": 0.0, "learning_rate": 1.6156459415566786e-05, "loss": 0.9814, "step": 7929 }, { "epoch": 0.31027466937945064, "grad_norm": 0.0, "learning_rate": 1.615546075901297e-05, "loss": 1.1421, "step": 7930 }, { "epoch": 0.3103137960716801, "grad_norm": 0.0, "learning_rate": 1.6154462003610168e-05, "loss": 1.0519, "step": 7931 }, { "epoch": 0.3103529227639095, "grad_norm": 0.0, "learning_rate": 1.615346314937442e-05, "loss": 1.1417, "step": 7932 }, { "epoch": 0.31039204945613896, "grad_norm": 0.0, "learning_rate": 1.6152464196321762e-05, "loss": 1.0498, "step": 7933 }, { "epoch": 0.3104311761483684, "grad_norm": 0.0, "learning_rate": 1.6151465144468242e-05, "loss": 1.1204, "step": 7934 }, { "epoch": 0.31047030284059784, "grad_norm": 0.0, "learning_rate": 1.6150465993829894e-05, "loss": 1.0482, "step": 7935 }, { "epoch": 0.3105094295328273, "grad_norm": 0.0, "learning_rate": 1.6149466744422772e-05, "loss": 1.159, "step": 7936 }, { "epoch": 0.3105485562250567, "grad_norm": 0.0, "learning_rate": 1.614846739626292e-05, "loss": 1.136, "step": 7937 }, { "epoch": 0.31058768291728617, "grad_norm": 0.0, "learning_rate": 1.6147467949366386e-05, "loss": 0.9992, "step": 7938 }, { "epoch": 0.3106268096095156, "grad_norm": 0.0, "learning_rate": 1.6146468403749223e-05, "loss": 1.2538, "step": 7939 }, { "epoch": 0.31066593630174505, "grad_norm": 0.0, "learning_rate": 1.6145468759427476e-05, "loss": 1.0443, "step": 7940 }, { "epoch": 0.3107050629939745, "grad_norm": 0.0, "learning_rate": 1.6144469016417202e-05, "loss": 1.0572, "step": 7941 }, { "epoch": 0.31074418968620393, "grad_norm": 0.0, "learning_rate": 1.6143469174734455e-05, "loss": 1.0659, "step": 7942 }, { "epoch": 0.31078331637843337, "grad_norm": 0.0, "learning_rate": 1.614246923439529e-05, "loss": 1.156, "step": 7943 }, { "epoch": 0.3108224430706628, "grad_norm": 0.0, "learning_rate": 1.6141469195415766e-05, "loss": 1.1415, "step": 7944 }, { "epoch": 0.31086156976289225, "grad_norm": 0.0, "learning_rate": 1.6140469057811944e-05, "loss": 1.0798, "step": 7945 }, { "epoch": 0.3109006964551217, "grad_norm": 0.0, "learning_rate": 1.6139468821599888e-05, "loss": 1.1789, "step": 7946 }, { "epoch": 0.31093982314735114, "grad_norm": 0.0, "learning_rate": 1.6138468486795646e-05, "loss": 1.1345, "step": 7947 }, { "epoch": 0.3109789498395806, "grad_norm": 0.0, "learning_rate": 1.61374680534153e-05, "loss": 1.1368, "step": 7948 }, { "epoch": 0.31101807653181, "grad_norm": 0.0, "learning_rate": 1.6136467521474902e-05, "loss": 1.027, "step": 7949 }, { "epoch": 0.31105720322403946, "grad_norm": 0.0, "learning_rate": 1.613546689099053e-05, "loss": 1.1648, "step": 7950 }, { "epoch": 0.3110963299162689, "grad_norm": 0.0, "learning_rate": 1.6134466161978242e-05, "loss": 1.1518, "step": 7951 }, { "epoch": 0.31113545660849834, "grad_norm": 0.0, "learning_rate": 1.613346533445412e-05, "loss": 1.1826, "step": 7952 }, { "epoch": 0.3111745833007278, "grad_norm": 0.0, "learning_rate": 1.613246440843423e-05, "loss": 1.1068, "step": 7953 }, { "epoch": 0.3112137099929572, "grad_norm": 0.0, "learning_rate": 1.6131463383934643e-05, "loss": 1.1096, "step": 7954 }, { "epoch": 0.3112528366851866, "grad_norm": 0.0, "learning_rate": 1.613046226097144e-05, "loss": 1.0496, "step": 7955 }, { "epoch": 0.31129196337741605, "grad_norm": 0.0, "learning_rate": 1.6129461039560693e-05, "loss": 1.1901, "step": 7956 }, { "epoch": 0.3113310900696455, "grad_norm": 0.0, "learning_rate": 1.6128459719718482e-05, "loss": 1.2886, "step": 7957 }, { "epoch": 0.31137021676187493, "grad_norm": 0.0, "learning_rate": 1.612745830146089e-05, "loss": 1.339, "step": 7958 }, { "epoch": 0.31140934345410437, "grad_norm": 0.0, "learning_rate": 1.6126456784803993e-05, "loss": 1.1132, "step": 7959 }, { "epoch": 0.3114484701463338, "grad_norm": 0.0, "learning_rate": 1.612545516976388e-05, "loss": 1.1945, "step": 7960 }, { "epoch": 0.31148759683856325, "grad_norm": 0.0, "learning_rate": 1.6124453456356628e-05, "loss": 0.9921, "step": 7961 }, { "epoch": 0.3115267235307927, "grad_norm": 0.0, "learning_rate": 1.612345164459833e-05, "loss": 1.0621, "step": 7962 }, { "epoch": 0.31156585022302213, "grad_norm": 0.0, "learning_rate": 1.612244973450507e-05, "loss": 1.0437, "step": 7963 }, { "epoch": 0.3116049769152516, "grad_norm": 0.0, "learning_rate": 1.6121447726092942e-05, "loss": 1.0811, "step": 7964 }, { "epoch": 0.311644103607481, "grad_norm": 0.0, "learning_rate": 1.6120445619378035e-05, "loss": 1.148, "step": 7965 }, { "epoch": 0.31168323029971046, "grad_norm": 0.0, "learning_rate": 1.611944341437644e-05, "loss": 1.0571, "step": 7966 }, { "epoch": 0.3117223569919399, "grad_norm": 0.0, "learning_rate": 1.611844111110425e-05, "loss": 1.1111, "step": 7967 }, { "epoch": 0.31176148368416934, "grad_norm": 0.0, "learning_rate": 1.6117438709577565e-05, "loss": 1.1299, "step": 7968 }, { "epoch": 0.3118006103763988, "grad_norm": 0.0, "learning_rate": 1.6116436209812476e-05, "loss": 1.1126, "step": 7969 }, { "epoch": 0.3118397370686282, "grad_norm": 0.0, "learning_rate": 1.611543361182509e-05, "loss": 1.0751, "step": 7970 }, { "epoch": 0.31187886376085766, "grad_norm": 0.0, "learning_rate": 1.61144309156315e-05, "loss": 1.1707, "step": 7971 }, { "epoch": 0.3119179904530871, "grad_norm": 0.0, "learning_rate": 1.6113428121247813e-05, "loss": 0.9675, "step": 7972 }, { "epoch": 0.31195711714531654, "grad_norm": 0.0, "learning_rate": 1.611242522869013e-05, "loss": 1.15, "step": 7973 }, { "epoch": 0.311996243837546, "grad_norm": 0.0, "learning_rate": 1.611142223797456e-05, "loss": 1.1736, "step": 7974 }, { "epoch": 0.3120353705297754, "grad_norm": 0.0, "learning_rate": 1.6110419149117205e-05, "loss": 1.1799, "step": 7975 }, { "epoch": 0.31207449722200487, "grad_norm": 0.0, "learning_rate": 1.6109415962134174e-05, "loss": 1.1798, "step": 7976 }, { "epoch": 0.3121136239142343, "grad_norm": 0.0, "learning_rate": 1.610841267704158e-05, "loss": 1.0947, "step": 7977 }, { "epoch": 0.31215275060646375, "grad_norm": 0.0, "learning_rate": 1.610740929385553e-05, "loss": 1.1824, "step": 7978 }, { "epoch": 0.3121918772986932, "grad_norm": 0.0, "learning_rate": 1.6106405812592143e-05, "loss": 1.0967, "step": 7979 }, { "epoch": 0.31223100399092263, "grad_norm": 0.0, "learning_rate": 1.6105402233267526e-05, "loss": 1.2648, "step": 7980 }, { "epoch": 0.31227013068315207, "grad_norm": 0.0, "learning_rate": 1.6104398555897805e-05, "loss": 1.0115, "step": 7981 }, { "epoch": 0.3123092573753815, "grad_norm": 0.0, "learning_rate": 1.6103394780499088e-05, "loss": 1.2064, "step": 7982 }, { "epoch": 0.31234838406761095, "grad_norm": 0.0, "learning_rate": 1.61023909070875e-05, "loss": 1.1009, "step": 7983 }, { "epoch": 0.31238751075984034, "grad_norm": 0.0, "learning_rate": 1.6101386935679163e-05, "loss": 1.0587, "step": 7984 }, { "epoch": 0.3124266374520698, "grad_norm": 0.0, "learning_rate": 1.6100382866290197e-05, "loss": 1.1752, "step": 7985 }, { "epoch": 0.3124657641442992, "grad_norm": 0.0, "learning_rate": 1.6099378698936724e-05, "loss": 1.1511, "step": 7986 }, { "epoch": 0.31250489083652866, "grad_norm": 0.0, "learning_rate": 1.6098374433634876e-05, "loss": 1.1778, "step": 7987 }, { "epoch": 0.3125440175287581, "grad_norm": 0.0, "learning_rate": 1.609737007040077e-05, "loss": 1.0403, "step": 7988 }, { "epoch": 0.31258314422098754, "grad_norm": 0.0, "learning_rate": 1.6096365609250546e-05, "loss": 1.1, "step": 7989 }, { "epoch": 0.312622270913217, "grad_norm": 0.0, "learning_rate": 1.609536105020033e-05, "loss": 1.0587, "step": 7990 }, { "epoch": 0.3126613976054464, "grad_norm": 0.0, "learning_rate": 1.609435639326625e-05, "loss": 1.0911, "step": 7991 }, { "epoch": 0.31270052429767586, "grad_norm": 0.0, "learning_rate": 1.6093351638464447e-05, "loss": 1.137, "step": 7992 }, { "epoch": 0.3127396509899053, "grad_norm": 0.0, "learning_rate": 1.609234678581105e-05, "loss": 1.0193, "step": 7993 }, { "epoch": 0.31277877768213475, "grad_norm": 0.0, "learning_rate": 1.6091341835322193e-05, "loss": 1.1414, "step": 7994 }, { "epoch": 0.3128179043743642, "grad_norm": 0.0, "learning_rate": 1.6090336787014028e-05, "loss": 1.1279, "step": 7995 }, { "epoch": 0.3128570310665936, "grad_norm": 0.0, "learning_rate": 1.608933164090268e-05, "loss": 1.0743, "step": 7996 }, { "epoch": 0.31289615775882307, "grad_norm": 0.0, "learning_rate": 1.6088326397004296e-05, "loss": 1.0597, "step": 7997 }, { "epoch": 0.3129352844510525, "grad_norm": 0.0, "learning_rate": 1.608732105533502e-05, "loss": 1.0895, "step": 7998 }, { "epoch": 0.31297441114328195, "grad_norm": 0.0, "learning_rate": 1.6086315615911e-05, "loss": 0.9594, "step": 7999 }, { "epoch": 0.3130135378355114, "grad_norm": 0.0, "learning_rate": 1.608531007874837e-05, "loss": 1.1869, "step": 8000 }, { "epoch": 0.31305266452774083, "grad_norm": 0.0, "learning_rate": 1.608430444386329e-05, "loss": 1.1313, "step": 8001 }, { "epoch": 0.3130917912199703, "grad_norm": 0.0, "learning_rate": 1.6083298711271903e-05, "loss": 1.0521, "step": 8002 }, { "epoch": 0.3131309179121997, "grad_norm": 0.0, "learning_rate": 1.6082292880990364e-05, "loss": 1.1334, "step": 8003 }, { "epoch": 0.31317004460442915, "grad_norm": 0.0, "learning_rate": 1.6081286953034824e-05, "loss": 1.1432, "step": 8004 }, { "epoch": 0.3132091712966586, "grad_norm": 0.0, "learning_rate": 1.6080280927421434e-05, "loss": 1.0248, "step": 8005 }, { "epoch": 0.31324829798888804, "grad_norm": 0.0, "learning_rate": 1.607927480416635e-05, "loss": 1.1494, "step": 8006 }, { "epoch": 0.3132874246811175, "grad_norm": 0.0, "learning_rate": 1.607826858328573e-05, "loss": 1.0671, "step": 8007 }, { "epoch": 0.3133265513733469, "grad_norm": 0.0, "learning_rate": 1.6077262264795735e-05, "loss": 1.1307, "step": 8008 }, { "epoch": 0.31336567806557636, "grad_norm": 0.0, "learning_rate": 1.607625584871252e-05, "loss": 1.0498, "step": 8009 }, { "epoch": 0.3134048047578058, "grad_norm": 0.0, "learning_rate": 1.6075249335052253e-05, "loss": 1.061, "step": 8010 }, { "epoch": 0.31344393145003524, "grad_norm": 0.0, "learning_rate": 1.6074242723831095e-05, "loss": 1.1108, "step": 8011 }, { "epoch": 0.3134830581422646, "grad_norm": 0.0, "learning_rate": 1.607323601506521e-05, "loss": 1.1255, "step": 8012 }, { "epoch": 0.31352218483449407, "grad_norm": 0.0, "learning_rate": 1.6072229208770766e-05, "loss": 1.1102, "step": 8013 }, { "epoch": 0.3135613115267235, "grad_norm": 0.0, "learning_rate": 1.6071222304963926e-05, "loss": 1.1714, "step": 8014 }, { "epoch": 0.31360043821895295, "grad_norm": 0.0, "learning_rate": 1.6070215303660866e-05, "loss": 1.1643, "step": 8015 }, { "epoch": 0.3136395649111824, "grad_norm": 0.0, "learning_rate": 1.6069208204877755e-05, "loss": 1.0816, "step": 8016 }, { "epoch": 0.31367869160341183, "grad_norm": 0.0, "learning_rate": 1.606820100863076e-05, "loss": 1.1384, "step": 8017 }, { "epoch": 0.31371781829564127, "grad_norm": 0.0, "learning_rate": 1.6067193714936067e-05, "loss": 1.1453, "step": 8018 }, { "epoch": 0.3137569449878707, "grad_norm": 0.0, "learning_rate": 1.6066186323809844e-05, "loss": 1.1862, "step": 8019 }, { "epoch": 0.31379607168010015, "grad_norm": 0.0, "learning_rate": 1.606517883526827e-05, "loss": 1.1638, "step": 8020 }, { "epoch": 0.3138351983723296, "grad_norm": 0.0, "learning_rate": 1.606417124932752e-05, "loss": 1.0695, "step": 8021 }, { "epoch": 0.31387432506455903, "grad_norm": 0.0, "learning_rate": 1.6063163566003788e-05, "loss": 1.0731, "step": 8022 }, { "epoch": 0.3139134517567885, "grad_norm": 0.0, "learning_rate": 1.6062155785313238e-05, "loss": 1.2149, "step": 8023 }, { "epoch": 0.3139525784490179, "grad_norm": 0.0, "learning_rate": 1.6061147907272067e-05, "loss": 1.1299, "step": 8024 }, { "epoch": 0.31399170514124736, "grad_norm": 0.0, "learning_rate": 1.6060139931896452e-05, "loss": 1.1639, "step": 8025 }, { "epoch": 0.3140308318334768, "grad_norm": 0.0, "learning_rate": 1.6059131859202586e-05, "loss": 1.15, "step": 8026 }, { "epoch": 0.31406995852570624, "grad_norm": 0.0, "learning_rate": 1.6058123689206654e-05, "loss": 1.0125, "step": 8027 }, { "epoch": 0.3141090852179357, "grad_norm": 0.0, "learning_rate": 1.6057115421924848e-05, "loss": 0.9721, "step": 8028 }, { "epoch": 0.3141482119101651, "grad_norm": 0.0, "learning_rate": 1.6056107057373356e-05, "loss": 1.1692, "step": 8029 }, { "epoch": 0.31418733860239456, "grad_norm": 0.0, "learning_rate": 1.6055098595568373e-05, "loss": 1.2362, "step": 8030 }, { "epoch": 0.314226465294624, "grad_norm": 0.0, "learning_rate": 1.60540900365261e-05, "loss": 1.0784, "step": 8031 }, { "epoch": 0.31426559198685344, "grad_norm": 0.0, "learning_rate": 1.605308138026272e-05, "loss": 0.9564, "step": 8032 }, { "epoch": 0.3143047186790829, "grad_norm": 0.0, "learning_rate": 1.6052072626794442e-05, "loss": 1.1341, "step": 8033 }, { "epoch": 0.3143438453713123, "grad_norm": 0.0, "learning_rate": 1.605106377613746e-05, "loss": 1.1978, "step": 8034 }, { "epoch": 0.31438297206354177, "grad_norm": 0.0, "learning_rate": 1.6050054828307978e-05, "loss": 1.1225, "step": 8035 }, { "epoch": 0.3144220987557712, "grad_norm": 0.0, "learning_rate": 1.6049045783322193e-05, "loss": 1.0826, "step": 8036 }, { "epoch": 0.31446122544800065, "grad_norm": 0.0, "learning_rate": 1.6048036641196312e-05, "loss": 1.1522, "step": 8037 }, { "epoch": 0.3145003521402301, "grad_norm": 0.0, "learning_rate": 1.6047027401946547e-05, "loss": 1.0699, "step": 8038 }, { "epoch": 0.31453947883245953, "grad_norm": 0.0, "learning_rate": 1.6046018065589096e-05, "loss": 1.1746, "step": 8039 }, { "epoch": 0.31457860552468897, "grad_norm": 0.0, "learning_rate": 1.6045008632140172e-05, "loss": 1.1637, "step": 8040 }, { "epoch": 0.31461773221691836, "grad_norm": 0.0, "learning_rate": 1.6043999101615983e-05, "loss": 1.0926, "step": 8041 }, { "epoch": 0.3146568589091478, "grad_norm": 0.0, "learning_rate": 1.604298947403274e-05, "loss": 1.1028, "step": 8042 }, { "epoch": 0.31469598560137724, "grad_norm": 0.0, "learning_rate": 1.6041979749406663e-05, "loss": 1.1407, "step": 8043 }, { "epoch": 0.3147351122936067, "grad_norm": 0.0, "learning_rate": 1.6040969927753957e-05, "loss": 1.1865, "step": 8044 }, { "epoch": 0.3147742389858361, "grad_norm": 0.0, "learning_rate": 1.603996000909085e-05, "loss": 0.9804, "step": 8045 }, { "epoch": 0.31481336567806556, "grad_norm": 0.0, "learning_rate": 1.6038949993433546e-05, "loss": 1.0937, "step": 8046 }, { "epoch": 0.314852492370295, "grad_norm": 0.0, "learning_rate": 1.6037939880798277e-05, "loss": 1.1274, "step": 8047 }, { "epoch": 0.31489161906252444, "grad_norm": 0.0, "learning_rate": 1.603692967120126e-05, "loss": 1.0001, "step": 8048 }, { "epoch": 0.3149307457547539, "grad_norm": 0.0, "learning_rate": 1.6035919364658714e-05, "loss": 1.1009, "step": 8049 }, { "epoch": 0.3149698724469833, "grad_norm": 0.0, "learning_rate": 1.603490896118687e-05, "loss": 0.9767, "step": 8050 }, { "epoch": 0.31500899913921276, "grad_norm": 0.0, "learning_rate": 1.603389846080195e-05, "loss": 1.2018, "step": 8051 }, { "epoch": 0.3150481258314422, "grad_norm": 0.0, "learning_rate": 1.603288786352018e-05, "loss": 1.256, "step": 8052 }, { "epoch": 0.31508725252367165, "grad_norm": 0.0, "learning_rate": 1.603187716935779e-05, "loss": 1.1891, "step": 8053 }, { "epoch": 0.3151263792159011, "grad_norm": 0.0, "learning_rate": 1.6030866378331013e-05, "loss": 1.0197, "step": 8054 }, { "epoch": 0.3151655059081305, "grad_norm": 0.0, "learning_rate": 1.602985549045608e-05, "loss": 1.0894, "step": 8055 }, { "epoch": 0.31520463260035997, "grad_norm": 0.0, "learning_rate": 1.602884450574922e-05, "loss": 1.0237, "step": 8056 }, { "epoch": 0.3152437592925894, "grad_norm": 0.0, "learning_rate": 1.6027833424226673e-05, "loss": 1.2164, "step": 8057 }, { "epoch": 0.31528288598481885, "grad_norm": 0.0, "learning_rate": 1.6026822245904673e-05, "loss": 1.1104, "step": 8058 }, { "epoch": 0.3153220126770483, "grad_norm": 0.0, "learning_rate": 1.6025810970799462e-05, "loss": 1.2646, "step": 8059 }, { "epoch": 0.31536113936927773, "grad_norm": 0.0, "learning_rate": 1.602479959892728e-05, "loss": 0.9714, "step": 8060 }, { "epoch": 0.3154002660615072, "grad_norm": 0.0, "learning_rate": 1.602378813030436e-05, "loss": 1.112, "step": 8061 }, { "epoch": 0.3154393927537366, "grad_norm": 0.0, "learning_rate": 1.6022776564946957e-05, "loss": 1.1945, "step": 8062 }, { "epoch": 0.31547851944596605, "grad_norm": 0.0, "learning_rate": 1.6021764902871305e-05, "loss": 1.1713, "step": 8063 }, { "epoch": 0.3155176461381955, "grad_norm": 0.0, "learning_rate": 1.6020753144093656e-05, "loss": 1.0723, "step": 8064 }, { "epoch": 0.31555677283042494, "grad_norm": 0.0, "learning_rate": 1.6019741288630255e-05, "loss": 1.0873, "step": 8065 }, { "epoch": 0.3155958995226544, "grad_norm": 0.0, "learning_rate": 1.6018729336497356e-05, "loss": 1.0954, "step": 8066 }, { "epoch": 0.3156350262148838, "grad_norm": 0.0, "learning_rate": 1.6017717287711197e-05, "loss": 1.2073, "step": 8067 }, { "epoch": 0.31567415290711326, "grad_norm": 0.0, "learning_rate": 1.601670514228805e-05, "loss": 1.117, "step": 8068 }, { "epoch": 0.31571327959934264, "grad_norm": 0.0, "learning_rate": 1.6015692900244148e-05, "loss": 1.1862, "step": 8069 }, { "epoch": 0.3157524062915721, "grad_norm": 0.0, "learning_rate": 1.6014680561595763e-05, "loss": 1.1639, "step": 8070 }, { "epoch": 0.3157915329838015, "grad_norm": 0.0, "learning_rate": 1.6013668126359143e-05, "loss": 1.1009, "step": 8071 }, { "epoch": 0.31583065967603097, "grad_norm": 0.0, "learning_rate": 1.6012655594550546e-05, "loss": 1.3154, "step": 8072 }, { "epoch": 0.3158697863682604, "grad_norm": 0.0, "learning_rate": 1.6011642966186237e-05, "loss": 1.131, "step": 8073 }, { "epoch": 0.31590891306048985, "grad_norm": 0.0, "learning_rate": 1.6010630241282476e-05, "loss": 1.1277, "step": 8074 }, { "epoch": 0.3159480397527193, "grad_norm": 0.0, "learning_rate": 1.6009617419855523e-05, "loss": 1.1855, "step": 8075 }, { "epoch": 0.31598716644494873, "grad_norm": 0.0, "learning_rate": 1.6008604501921647e-05, "loss": 1.1269, "step": 8076 }, { "epoch": 0.31602629313717817, "grad_norm": 0.0, "learning_rate": 1.600759148749711e-05, "loss": 1.1771, "step": 8077 }, { "epoch": 0.3160654198294076, "grad_norm": 0.0, "learning_rate": 1.600657837659818e-05, "loss": 1.0954, "step": 8078 }, { "epoch": 0.31610454652163705, "grad_norm": 0.0, "learning_rate": 1.6005565169241132e-05, "loss": 1.0405, "step": 8079 }, { "epoch": 0.3161436732138665, "grad_norm": 0.0, "learning_rate": 1.6004551865442228e-05, "loss": 0.9945, "step": 8080 }, { "epoch": 0.31618279990609593, "grad_norm": 0.0, "learning_rate": 1.6003538465217746e-05, "loss": 1.0789, "step": 8081 }, { "epoch": 0.3162219265983254, "grad_norm": 0.0, "learning_rate": 1.600252496858396e-05, "loss": 1.0922, "step": 8082 }, { "epoch": 0.3162610532905548, "grad_norm": 0.0, "learning_rate": 1.6001511375557146e-05, "loss": 1.0677, "step": 8083 }, { "epoch": 0.31630017998278426, "grad_norm": 0.0, "learning_rate": 1.600049768615358e-05, "loss": 1.1403, "step": 8084 }, { "epoch": 0.3163393066750137, "grad_norm": 0.0, "learning_rate": 1.5999483900389536e-05, "loss": 1.1324, "step": 8085 }, { "epoch": 0.31637843336724314, "grad_norm": 0.0, "learning_rate": 1.5998470018281303e-05, "loss": 1.1517, "step": 8086 }, { "epoch": 0.3164175600594726, "grad_norm": 0.0, "learning_rate": 1.5997456039845155e-05, "loss": 1.0782, "step": 8087 }, { "epoch": 0.316456686751702, "grad_norm": 0.0, "learning_rate": 1.599644196509738e-05, "loss": 1.2303, "step": 8088 }, { "epoch": 0.31649581344393146, "grad_norm": 0.0, "learning_rate": 1.5995427794054262e-05, "loss": 1.2717, "step": 8089 }, { "epoch": 0.3165349401361609, "grad_norm": 0.0, "learning_rate": 1.5994413526732083e-05, "loss": 1.1275, "step": 8090 }, { "epoch": 0.31657406682839034, "grad_norm": 0.0, "learning_rate": 1.5993399163147137e-05, "loss": 0.9987, "step": 8091 }, { "epoch": 0.3166131935206198, "grad_norm": 0.0, "learning_rate": 1.5992384703315707e-05, "loss": 1.076, "step": 8092 }, { "epoch": 0.3166523202128492, "grad_norm": 0.0, "learning_rate": 1.599137014725409e-05, "loss": 1.0682, "step": 8093 }, { "epoch": 0.31669144690507867, "grad_norm": 0.0, "learning_rate": 1.599035549497858e-05, "loss": 1.1341, "step": 8094 }, { "epoch": 0.3167305735973081, "grad_norm": 0.0, "learning_rate": 1.5989340746505463e-05, "loss": 1.1902, "step": 8095 }, { "epoch": 0.31676970028953755, "grad_norm": 0.0, "learning_rate": 1.5988325901851038e-05, "loss": 1.0133, "step": 8096 }, { "epoch": 0.316808826981767, "grad_norm": 0.0, "learning_rate": 1.5987310961031604e-05, "loss": 1.0516, "step": 8097 }, { "epoch": 0.3168479536739964, "grad_norm": 0.0, "learning_rate": 1.598629592406346e-05, "loss": 1.1294, "step": 8098 }, { "epoch": 0.3168870803662258, "grad_norm": 0.0, "learning_rate": 1.5985280790962903e-05, "loss": 1.0692, "step": 8099 }, { "epoch": 0.31692620705845526, "grad_norm": 0.0, "learning_rate": 1.5984265561746236e-05, "loss": 1.1619, "step": 8100 }, { "epoch": 0.3169653337506847, "grad_norm": 0.0, "learning_rate": 1.5983250236429765e-05, "loss": 1.0321, "step": 8101 }, { "epoch": 0.31700446044291414, "grad_norm": 0.0, "learning_rate": 1.598223481502979e-05, "loss": 1.0459, "step": 8102 }, { "epoch": 0.3170435871351436, "grad_norm": 0.0, "learning_rate": 1.598121929756262e-05, "loss": 1.0252, "step": 8103 }, { "epoch": 0.317082713827373, "grad_norm": 0.0, "learning_rate": 1.5980203684044565e-05, "loss": 1.1245, "step": 8104 }, { "epoch": 0.31712184051960246, "grad_norm": 0.0, "learning_rate": 1.597918797449193e-05, "loss": 1.0678, "step": 8105 }, { "epoch": 0.3171609672118319, "grad_norm": 0.0, "learning_rate": 1.5978172168921032e-05, "loss": 1.0967, "step": 8106 }, { "epoch": 0.31720009390406134, "grad_norm": 0.0, "learning_rate": 1.5977156267348175e-05, "loss": 1.1106, "step": 8107 }, { "epoch": 0.3172392205962908, "grad_norm": 0.0, "learning_rate": 1.597614026978968e-05, "loss": 1.1326, "step": 8108 }, { "epoch": 0.3172783472885202, "grad_norm": 0.0, "learning_rate": 1.5975124176261866e-05, "loss": 1.09, "step": 8109 }, { "epoch": 0.31731747398074966, "grad_norm": 0.0, "learning_rate": 1.5974107986781036e-05, "loss": 1.1032, "step": 8110 }, { "epoch": 0.3173566006729791, "grad_norm": 0.0, "learning_rate": 1.5973091701363524e-05, "loss": 1.1805, "step": 8111 }, { "epoch": 0.31739572736520855, "grad_norm": 0.0, "learning_rate": 1.5972075320025643e-05, "loss": 1.1094, "step": 8112 }, { "epoch": 0.317434854057438, "grad_norm": 0.0, "learning_rate": 1.5971058842783717e-05, "loss": 1.1411, "step": 8113 }, { "epoch": 0.31747398074966743, "grad_norm": 0.0, "learning_rate": 1.5970042269654063e-05, "loss": 1.1909, "step": 8114 }, { "epoch": 0.31751310744189687, "grad_norm": 0.0, "learning_rate": 1.5969025600653015e-05, "loss": 1.1388, "step": 8115 }, { "epoch": 0.3175522341341263, "grad_norm": 0.0, "learning_rate": 1.5968008835796895e-05, "loss": 1.067, "step": 8116 }, { "epoch": 0.31759136082635575, "grad_norm": 0.0, "learning_rate": 1.5966991975102032e-05, "loss": 1.13, "step": 8117 }, { "epoch": 0.3176304875185852, "grad_norm": 0.0, "learning_rate": 1.5965975018584754e-05, "loss": 1.0569, "step": 8118 }, { "epoch": 0.31766961421081463, "grad_norm": 0.0, "learning_rate": 1.5964957966261392e-05, "loss": 1.0287, "step": 8119 }, { "epoch": 0.3177087409030441, "grad_norm": 0.0, "learning_rate": 1.5963940818148284e-05, "loss": 1.1504, "step": 8120 }, { "epoch": 0.3177478675952735, "grad_norm": 0.0, "learning_rate": 1.5962923574261754e-05, "loss": 1.1172, "step": 8121 }, { "epoch": 0.31778699428750296, "grad_norm": 0.0, "learning_rate": 1.596190623461815e-05, "loss": 1.1328, "step": 8122 }, { "epoch": 0.3178261209797324, "grad_norm": 0.0, "learning_rate": 1.5960888799233796e-05, "loss": 1.0591, "step": 8123 }, { "epoch": 0.31786524767196184, "grad_norm": 0.0, "learning_rate": 1.595987126812504e-05, "loss": 1.1805, "step": 8124 }, { "epoch": 0.3179043743641913, "grad_norm": 0.0, "learning_rate": 1.595885364130822e-05, "loss": 1.0481, "step": 8125 }, { "epoch": 0.31794350105642066, "grad_norm": 0.0, "learning_rate": 1.5957835918799674e-05, "loss": 1.1458, "step": 8126 }, { "epoch": 0.3179826277486501, "grad_norm": 0.0, "learning_rate": 1.5956818100615753e-05, "loss": 1.1762, "step": 8127 }, { "epoch": 0.31802175444087954, "grad_norm": 0.0, "learning_rate": 1.5955800186772795e-05, "loss": 1.1749, "step": 8128 }, { "epoch": 0.318060881133109, "grad_norm": 0.0, "learning_rate": 1.5954782177287153e-05, "loss": 1.199, "step": 8129 }, { "epoch": 0.3181000078253384, "grad_norm": 0.0, "learning_rate": 1.5953764072175167e-05, "loss": 1.129, "step": 8130 }, { "epoch": 0.31813913451756787, "grad_norm": 0.0, "learning_rate": 1.595274587145319e-05, "loss": 1.0177, "step": 8131 }, { "epoch": 0.3181782612097973, "grad_norm": 0.0, "learning_rate": 1.5951727575137576e-05, "loss": 1.2217, "step": 8132 }, { "epoch": 0.31821738790202675, "grad_norm": 0.0, "learning_rate": 1.5950709183244676e-05, "loss": 1.207, "step": 8133 }, { "epoch": 0.3182565145942562, "grad_norm": 0.0, "learning_rate": 1.5949690695790837e-05, "loss": 1.0948, "step": 8134 }, { "epoch": 0.31829564128648563, "grad_norm": 0.0, "learning_rate": 1.5948672112792427e-05, "loss": 1.1907, "step": 8135 }, { "epoch": 0.31833476797871507, "grad_norm": 0.0, "learning_rate": 1.5947653434265794e-05, "loss": 1.1042, "step": 8136 }, { "epoch": 0.3183738946709445, "grad_norm": 0.0, "learning_rate": 1.5946634660227302e-05, "loss": 1.0826, "step": 8137 }, { "epoch": 0.31841302136317395, "grad_norm": 0.0, "learning_rate": 1.594561579069331e-05, "loss": 1.1602, "step": 8138 }, { "epoch": 0.3184521480554034, "grad_norm": 0.0, "learning_rate": 1.5944596825680174e-05, "loss": 0.9823, "step": 8139 }, { "epoch": 0.31849127474763284, "grad_norm": 0.0, "learning_rate": 1.5943577765204264e-05, "loss": 0.9599, "step": 8140 }, { "epoch": 0.3185304014398623, "grad_norm": 0.0, "learning_rate": 1.5942558609281943e-05, "loss": 1.0436, "step": 8141 }, { "epoch": 0.3185695281320917, "grad_norm": 0.0, "learning_rate": 1.5941539357929577e-05, "loss": 1.0985, "step": 8142 }, { "epoch": 0.31860865482432116, "grad_norm": 0.0, "learning_rate": 1.5940520011163534e-05, "loss": 1.0115, "step": 8143 }, { "epoch": 0.3186477815165506, "grad_norm": 0.0, "learning_rate": 1.5939500569000185e-05, "loss": 1.0273, "step": 8144 }, { "epoch": 0.31868690820878004, "grad_norm": 0.0, "learning_rate": 1.5938481031455898e-05, "loss": 1.2299, "step": 8145 }, { "epoch": 0.3187260349010095, "grad_norm": 0.0, "learning_rate": 1.5937461398547048e-05, "loss": 1.0342, "step": 8146 }, { "epoch": 0.3187651615932389, "grad_norm": 0.0, "learning_rate": 1.5936441670290006e-05, "loss": 1.1154, "step": 8147 }, { "epoch": 0.31880428828546836, "grad_norm": 0.0, "learning_rate": 1.593542184670115e-05, "loss": 1.1028, "step": 8148 }, { "epoch": 0.3188434149776978, "grad_norm": 0.0, "learning_rate": 1.5934401927796858e-05, "loss": 1.1942, "step": 8149 }, { "epoch": 0.31888254166992724, "grad_norm": 0.0, "learning_rate": 1.5933381913593508e-05, "loss": 1.0457, "step": 8150 }, { "epoch": 0.3189216683621567, "grad_norm": 0.0, "learning_rate": 1.593236180410748e-05, "loss": 0.9611, "step": 8151 }, { "epoch": 0.3189607950543861, "grad_norm": 0.0, "learning_rate": 1.5931341599355153e-05, "loss": 1.18, "step": 8152 }, { "epoch": 0.31899992174661557, "grad_norm": 0.0, "learning_rate": 1.5930321299352912e-05, "loss": 1.0681, "step": 8153 }, { "epoch": 0.319039048438845, "grad_norm": 0.0, "learning_rate": 1.592930090411714e-05, "loss": 1.0465, "step": 8154 }, { "epoch": 0.3190781751310744, "grad_norm": 0.0, "learning_rate": 1.592828041366423e-05, "loss": 1.168, "step": 8155 }, { "epoch": 0.31911730182330383, "grad_norm": 0.0, "learning_rate": 1.5927259828010563e-05, "loss": 1.0833, "step": 8156 }, { "epoch": 0.3191564285155333, "grad_norm": 0.0, "learning_rate": 1.5926239147172527e-05, "loss": 1.0087, "step": 8157 }, { "epoch": 0.3191955552077627, "grad_norm": 0.0, "learning_rate": 1.592521837116652e-05, "loss": 0.972, "step": 8158 }, { "epoch": 0.31923468189999216, "grad_norm": 0.0, "learning_rate": 1.5924197500008933e-05, "loss": 1.0313, "step": 8159 }, { "epoch": 0.3192738085922216, "grad_norm": 0.0, "learning_rate": 1.5923176533716156e-05, "loss": 1.1208, "step": 8160 }, { "epoch": 0.31931293528445104, "grad_norm": 0.0, "learning_rate": 1.5922155472304584e-05, "loss": 1.097, "step": 8161 }, { "epoch": 0.3193520619766805, "grad_norm": 0.0, "learning_rate": 1.592113431579062e-05, "loss": 1.041, "step": 8162 }, { "epoch": 0.3193911886689099, "grad_norm": 0.0, "learning_rate": 1.5920113064190653e-05, "loss": 1.1291, "step": 8163 }, { "epoch": 0.31943031536113936, "grad_norm": 0.0, "learning_rate": 1.591909171752109e-05, "loss": 1.0004, "step": 8164 }, { "epoch": 0.3194694420533688, "grad_norm": 0.0, "learning_rate": 1.5918070275798333e-05, "loss": 1.1552, "step": 8165 }, { "epoch": 0.31950856874559824, "grad_norm": 0.0, "learning_rate": 1.5917048739038782e-05, "loss": 1.2408, "step": 8166 }, { "epoch": 0.3195476954378277, "grad_norm": 0.0, "learning_rate": 1.5916027107258846e-05, "loss": 1.1304, "step": 8167 }, { "epoch": 0.3195868221300571, "grad_norm": 0.0, "learning_rate": 1.591500538047492e-05, "loss": 1.0932, "step": 8168 }, { "epoch": 0.31962594882228657, "grad_norm": 0.0, "learning_rate": 1.5913983558703427e-05, "loss": 1.042, "step": 8169 }, { "epoch": 0.319665075514516, "grad_norm": 0.0, "learning_rate": 1.5912961641960763e-05, "loss": 1.1245, "step": 8170 }, { "epoch": 0.31970420220674545, "grad_norm": 0.0, "learning_rate": 1.5911939630263348e-05, "loss": 1.115, "step": 8171 }, { "epoch": 0.3197433288989749, "grad_norm": 0.0, "learning_rate": 1.5910917523627593e-05, "loss": 1.1585, "step": 8172 }, { "epoch": 0.31978245559120433, "grad_norm": 0.0, "learning_rate": 1.5909895322069907e-05, "loss": 1.1878, "step": 8173 }, { "epoch": 0.31982158228343377, "grad_norm": 0.0, "learning_rate": 1.5908873025606703e-05, "loss": 1.1254, "step": 8174 }, { "epoch": 0.3198607089756632, "grad_norm": 0.0, "learning_rate": 1.590785063425441e-05, "loss": 1.0468, "step": 8175 }, { "epoch": 0.31989983566789265, "grad_norm": 0.0, "learning_rate": 1.590682814802943e-05, "loss": 1.1114, "step": 8176 }, { "epoch": 0.3199389623601221, "grad_norm": 0.0, "learning_rate": 1.59058055669482e-05, "loss": 1.1471, "step": 8177 }, { "epoch": 0.31997808905235153, "grad_norm": 0.0, "learning_rate": 1.5904782891027132e-05, "loss": 1.1418, "step": 8178 }, { "epoch": 0.320017215744581, "grad_norm": 0.0, "learning_rate": 1.5903760120282645e-05, "loss": 1.0343, "step": 8179 }, { "epoch": 0.3200563424368104, "grad_norm": 0.0, "learning_rate": 1.590273725473117e-05, "loss": 1.0043, "step": 8180 }, { "epoch": 0.32009546912903986, "grad_norm": 0.0, "learning_rate": 1.5901714294389132e-05, "loss": 0.9769, "step": 8181 }, { "epoch": 0.3201345958212693, "grad_norm": 0.0, "learning_rate": 1.5900691239272957e-05, "loss": 1.2689, "step": 8182 }, { "epoch": 0.3201737225134987, "grad_norm": 0.0, "learning_rate": 1.5899668089399078e-05, "loss": 1.2096, "step": 8183 }, { "epoch": 0.3202128492057281, "grad_norm": 0.0, "learning_rate": 1.589864484478392e-05, "loss": 1.1851, "step": 8184 }, { "epoch": 0.32025197589795756, "grad_norm": 0.0, "learning_rate": 1.5897621505443915e-05, "loss": 1.1976, "step": 8185 }, { "epoch": 0.320291102590187, "grad_norm": 0.0, "learning_rate": 1.58965980713955e-05, "loss": 1.0892, "step": 8186 }, { "epoch": 0.32033022928241645, "grad_norm": 0.0, "learning_rate": 1.5895574542655113e-05, "loss": 1.1936, "step": 8187 }, { "epoch": 0.3203693559746459, "grad_norm": 0.0, "learning_rate": 1.589455091923918e-05, "loss": 1.0251, "step": 8188 }, { "epoch": 0.3204084826668753, "grad_norm": 0.0, "learning_rate": 1.5893527201164152e-05, "loss": 1.1156, "step": 8189 }, { "epoch": 0.32044760935910477, "grad_norm": 0.0, "learning_rate": 1.5892503388446456e-05, "loss": 1.0981, "step": 8190 }, { "epoch": 0.3204867360513342, "grad_norm": 0.0, "learning_rate": 1.5891479481102545e-05, "loss": 1.2075, "step": 8191 }, { "epoch": 0.32052586274356365, "grad_norm": 0.0, "learning_rate": 1.5890455479148852e-05, "loss": 1.1106, "step": 8192 }, { "epoch": 0.3205649894357931, "grad_norm": 0.0, "learning_rate": 1.5889431382601827e-05, "loss": 1.1178, "step": 8193 }, { "epoch": 0.32060411612802253, "grad_norm": 0.0, "learning_rate": 1.588840719147791e-05, "loss": 1.1556, "step": 8194 }, { "epoch": 0.320643242820252, "grad_norm": 0.0, "learning_rate": 1.5887382905793555e-05, "loss": 0.9509, "step": 8195 }, { "epoch": 0.3206823695124814, "grad_norm": 0.0, "learning_rate": 1.588635852556521e-05, "loss": 1.2617, "step": 8196 }, { "epoch": 0.32072149620471085, "grad_norm": 0.0, "learning_rate": 1.588533405080932e-05, "loss": 1.0393, "step": 8197 }, { "epoch": 0.3207606228969403, "grad_norm": 0.0, "learning_rate": 1.588430948154234e-05, "loss": 1.1387, "step": 8198 }, { "epoch": 0.32079974958916974, "grad_norm": 0.0, "learning_rate": 1.5883284817780726e-05, "loss": 1.1438, "step": 8199 }, { "epoch": 0.3208388762813992, "grad_norm": 0.0, "learning_rate": 1.5882260059540927e-05, "loss": 1.1049, "step": 8200 }, { "epoch": 0.3208780029736286, "grad_norm": 0.0, "learning_rate": 1.58812352068394e-05, "loss": 1.0122, "step": 8201 }, { "epoch": 0.32091712966585806, "grad_norm": 0.0, "learning_rate": 1.588021025969261e-05, "loss": 1.0461, "step": 8202 }, { "epoch": 0.3209562563580875, "grad_norm": 0.0, "learning_rate": 1.5879185218117012e-05, "loss": 1.0455, "step": 8203 }, { "epoch": 0.32099538305031694, "grad_norm": 0.0, "learning_rate": 1.5878160082129064e-05, "loss": 0.9386, "step": 8204 }, { "epoch": 0.3210345097425464, "grad_norm": 0.0, "learning_rate": 1.587713485174523e-05, "loss": 1.1075, "step": 8205 }, { "epoch": 0.3210736364347758, "grad_norm": 0.0, "learning_rate": 1.5876109526981975e-05, "loss": 1.1345, "step": 8206 }, { "epoch": 0.32111276312700526, "grad_norm": 0.0, "learning_rate": 1.5875084107855767e-05, "loss": 1.0644, "step": 8207 }, { "epoch": 0.3211518898192347, "grad_norm": 0.0, "learning_rate": 1.587405859438307e-05, "loss": 1.1321, "step": 8208 }, { "epoch": 0.32119101651146414, "grad_norm": 0.0, "learning_rate": 1.587303298658035e-05, "loss": 1.3369, "step": 8209 }, { "epoch": 0.3212301432036936, "grad_norm": 0.0, "learning_rate": 1.5872007284464078e-05, "loss": 1.123, "step": 8210 }, { "epoch": 0.321269269895923, "grad_norm": 0.0, "learning_rate": 1.587098148805073e-05, "loss": 1.1627, "step": 8211 }, { "epoch": 0.3213083965881524, "grad_norm": 0.0, "learning_rate": 1.5869955597356778e-05, "loss": 1.1738, "step": 8212 }, { "epoch": 0.32134752328038185, "grad_norm": 0.0, "learning_rate": 1.586892961239869e-05, "loss": 1.0861, "step": 8213 }, { "epoch": 0.3213866499726113, "grad_norm": 0.0, "learning_rate": 1.586790353319295e-05, "loss": 1.1589, "step": 8214 }, { "epoch": 0.32142577666484073, "grad_norm": 0.0, "learning_rate": 1.586687735975603e-05, "loss": 1.0245, "step": 8215 }, { "epoch": 0.3214649033570702, "grad_norm": 0.0, "learning_rate": 1.5865851092104414e-05, "loss": 1.0749, "step": 8216 }, { "epoch": 0.3215040300492996, "grad_norm": 0.0, "learning_rate": 1.5864824730254578e-05, "loss": 1.0154, "step": 8217 }, { "epoch": 0.32154315674152906, "grad_norm": 0.0, "learning_rate": 1.5863798274223007e-05, "loss": 1.079, "step": 8218 }, { "epoch": 0.3215822834337585, "grad_norm": 0.0, "learning_rate": 1.586277172402618e-05, "loss": 1.0251, "step": 8219 }, { "epoch": 0.32162141012598794, "grad_norm": 0.0, "learning_rate": 1.586174507968059e-05, "loss": 1.1012, "step": 8220 }, { "epoch": 0.3216605368182174, "grad_norm": 0.0, "learning_rate": 1.5860718341202714e-05, "loss": 1.0266, "step": 8221 }, { "epoch": 0.3216996635104468, "grad_norm": 0.0, "learning_rate": 1.585969150860905e-05, "loss": 1.018, "step": 8222 }, { "epoch": 0.32173879020267626, "grad_norm": 0.0, "learning_rate": 1.5858664581916083e-05, "loss": 1.1169, "step": 8223 }, { "epoch": 0.3217779168949057, "grad_norm": 0.0, "learning_rate": 1.58576375611403e-05, "loss": 1.0357, "step": 8224 }, { "epoch": 0.32181704358713514, "grad_norm": 0.0, "learning_rate": 1.5856610446298198e-05, "loss": 0.9878, "step": 8225 }, { "epoch": 0.3218561702793646, "grad_norm": 0.0, "learning_rate": 1.5855583237406277e-05, "loss": 1.0431, "step": 8226 }, { "epoch": 0.321895296971594, "grad_norm": 0.0, "learning_rate": 1.585455593448102e-05, "loss": 1.1059, "step": 8227 }, { "epoch": 0.32193442366382347, "grad_norm": 0.0, "learning_rate": 1.5853528537538933e-05, "loss": 1.0688, "step": 8228 }, { "epoch": 0.3219735503560529, "grad_norm": 0.0, "learning_rate": 1.5852501046596516e-05, "loss": 1.1892, "step": 8229 }, { "epoch": 0.32201267704828235, "grad_norm": 0.0, "learning_rate": 1.5851473461670265e-05, "loss": 1.1468, "step": 8230 }, { "epoch": 0.3220518037405118, "grad_norm": 0.0, "learning_rate": 1.585044578277668e-05, "loss": 1.1038, "step": 8231 }, { "epoch": 0.32209093043274123, "grad_norm": 0.0, "learning_rate": 1.5849418009932265e-05, "loss": 1.1011, "step": 8232 }, { "epoch": 0.32213005712497067, "grad_norm": 0.0, "learning_rate": 1.584839014315353e-05, "loss": 1.1212, "step": 8233 }, { "epoch": 0.3221691838172001, "grad_norm": 0.0, "learning_rate": 1.5847362182456975e-05, "loss": 1.1039, "step": 8234 }, { "epoch": 0.32220831050942955, "grad_norm": 0.0, "learning_rate": 1.5846334127859113e-05, "loss": 1.1812, "step": 8235 }, { "epoch": 0.322247437201659, "grad_norm": 0.0, "learning_rate": 1.584530597937645e-05, "loss": 1.0276, "step": 8236 }, { "epoch": 0.32228656389388843, "grad_norm": 0.0, "learning_rate": 1.5844277737025496e-05, "loss": 1.16, "step": 8237 }, { "epoch": 0.3223256905861179, "grad_norm": 0.0, "learning_rate": 1.5843249400822765e-05, "loss": 1.1411, "step": 8238 }, { "epoch": 0.3223648172783473, "grad_norm": 0.0, "learning_rate": 1.5842220970784773e-05, "loss": 1.1177, "step": 8239 }, { "epoch": 0.3224039439705767, "grad_norm": 0.0, "learning_rate": 1.5841192446928032e-05, "loss": 0.9876, "step": 8240 }, { "epoch": 0.32244307066280614, "grad_norm": 0.0, "learning_rate": 1.584016382926906e-05, "loss": 1.2944, "step": 8241 }, { "epoch": 0.3224821973550356, "grad_norm": 0.0, "learning_rate": 1.5839135117824375e-05, "loss": 1.1553, "step": 8242 }, { "epoch": 0.322521324047265, "grad_norm": 0.0, "learning_rate": 1.5838106312610496e-05, "loss": 1.1535, "step": 8243 }, { "epoch": 0.32256045073949446, "grad_norm": 0.0, "learning_rate": 1.5837077413643947e-05, "loss": 1.015, "step": 8244 }, { "epoch": 0.3225995774317239, "grad_norm": 0.0, "learning_rate": 1.5836048420941246e-05, "loss": 1.0398, "step": 8245 }, { "epoch": 0.32263870412395335, "grad_norm": 0.0, "learning_rate": 1.583501933451892e-05, "loss": 1.0978, "step": 8246 }, { "epoch": 0.3226778308161828, "grad_norm": 0.0, "learning_rate": 1.58339901543935e-05, "loss": 1.142, "step": 8247 }, { "epoch": 0.3227169575084122, "grad_norm": 0.0, "learning_rate": 1.5832960880581506e-05, "loss": 1.164, "step": 8248 }, { "epoch": 0.32275608420064167, "grad_norm": 0.0, "learning_rate": 1.583193151309947e-05, "loss": 1.1168, "step": 8249 }, { "epoch": 0.3227952108928711, "grad_norm": 0.0, "learning_rate": 1.583090205196392e-05, "loss": 1.1092, "step": 8250 }, { "epoch": 0.32283433758510055, "grad_norm": 0.0, "learning_rate": 1.5829872497191388e-05, "loss": 0.987, "step": 8251 }, { "epoch": 0.32287346427733, "grad_norm": 0.0, "learning_rate": 1.5828842848798413e-05, "loss": 1.164, "step": 8252 }, { "epoch": 0.32291259096955943, "grad_norm": 0.0, "learning_rate": 1.5827813106801524e-05, "loss": 1.2147, "step": 8253 }, { "epoch": 0.3229517176617889, "grad_norm": 0.0, "learning_rate": 1.582678327121726e-05, "loss": 1.0036, "step": 8254 }, { "epoch": 0.3229908443540183, "grad_norm": 0.0, "learning_rate": 1.5825753342062155e-05, "loss": 1.0466, "step": 8255 }, { "epoch": 0.32302997104624775, "grad_norm": 0.0, "learning_rate": 1.5824723319352754e-05, "loss": 1.1137, "step": 8256 }, { "epoch": 0.3230690977384772, "grad_norm": 0.0, "learning_rate": 1.5823693203105595e-05, "loss": 1.1357, "step": 8257 }, { "epoch": 0.32310822443070664, "grad_norm": 0.0, "learning_rate": 1.582266299333722e-05, "loss": 1.16, "step": 8258 }, { "epoch": 0.3231473511229361, "grad_norm": 0.0, "learning_rate": 1.5821632690064175e-05, "loss": 1.1871, "step": 8259 }, { "epoch": 0.3231864778151655, "grad_norm": 0.0, "learning_rate": 1.5820602293303004e-05, "loss": 0.9099, "step": 8260 }, { "epoch": 0.32322560450739496, "grad_norm": 0.0, "learning_rate": 1.581957180307025e-05, "loss": 1.0651, "step": 8261 }, { "epoch": 0.3232647311996244, "grad_norm": 0.0, "learning_rate": 1.5818541219382472e-05, "loss": 1.0807, "step": 8262 }, { "epoch": 0.32330385789185384, "grad_norm": 0.0, "learning_rate": 1.5817510542256208e-05, "loss": 1.1684, "step": 8263 }, { "epoch": 0.3233429845840833, "grad_norm": 0.0, "learning_rate": 1.5816479771708014e-05, "loss": 1.1591, "step": 8264 }, { "epoch": 0.3233821112763127, "grad_norm": 0.0, "learning_rate": 1.5815448907754448e-05, "loss": 1.0497, "step": 8265 }, { "epoch": 0.32342123796854216, "grad_norm": 0.0, "learning_rate": 1.5814417950412053e-05, "loss": 1.1922, "step": 8266 }, { "epoch": 0.3234603646607716, "grad_norm": 0.0, "learning_rate": 1.5813386899697395e-05, "loss": 1.11, "step": 8267 }, { "epoch": 0.323499491353001, "grad_norm": 0.0, "learning_rate": 1.5812355755627028e-05, "loss": 1.0959, "step": 8268 }, { "epoch": 0.32353861804523043, "grad_norm": 0.0, "learning_rate": 1.5811324518217513e-05, "loss": 1.0095, "step": 8269 }, { "epoch": 0.32357774473745987, "grad_norm": 0.0, "learning_rate": 1.5810293187485407e-05, "loss": 1.1328, "step": 8270 }, { "epoch": 0.3236168714296893, "grad_norm": 0.0, "learning_rate": 1.580926176344727e-05, "loss": 1.0482, "step": 8271 }, { "epoch": 0.32365599812191875, "grad_norm": 0.0, "learning_rate": 1.580823024611967e-05, "loss": 1.0085, "step": 8272 }, { "epoch": 0.3236951248141482, "grad_norm": 0.0, "learning_rate": 1.5807198635519172e-05, "loss": 0.9737, "step": 8273 }, { "epoch": 0.32373425150637763, "grad_norm": 0.0, "learning_rate": 1.5806166931662338e-05, "loss": 1.1042, "step": 8274 }, { "epoch": 0.3237733781986071, "grad_norm": 0.0, "learning_rate": 1.580513513456574e-05, "loss": 1.1328, "step": 8275 }, { "epoch": 0.3238125048908365, "grad_norm": 0.0, "learning_rate": 1.580410324424595e-05, "loss": 1.0304, "step": 8276 }, { "epoch": 0.32385163158306596, "grad_norm": 0.0, "learning_rate": 1.5803071260719528e-05, "loss": 1.1844, "step": 8277 }, { "epoch": 0.3238907582752954, "grad_norm": 0.0, "learning_rate": 1.580203918400306e-05, "loss": 1.0766, "step": 8278 }, { "epoch": 0.32392988496752484, "grad_norm": 0.0, "learning_rate": 1.5801007014113106e-05, "loss": 0.9823, "step": 8279 }, { "epoch": 0.3239690116597543, "grad_norm": 0.0, "learning_rate": 1.5799974751066252e-05, "loss": 1.1364, "step": 8280 }, { "epoch": 0.3240081383519837, "grad_norm": 0.0, "learning_rate": 1.5798942394879073e-05, "loss": 1.2115, "step": 8281 }, { "epoch": 0.32404726504421316, "grad_norm": 0.0, "learning_rate": 1.5797909945568146e-05, "loss": 1.0315, "step": 8282 }, { "epoch": 0.3240863917364426, "grad_norm": 0.0, "learning_rate": 1.579687740315005e-05, "loss": 1.1663, "step": 8283 }, { "epoch": 0.32412551842867204, "grad_norm": 0.0, "learning_rate": 1.579584476764136e-05, "loss": 1.1031, "step": 8284 }, { "epoch": 0.3241646451209015, "grad_norm": 0.0, "learning_rate": 1.5794812039058674e-05, "loss": 1.0292, "step": 8285 }, { "epoch": 0.3242037718131309, "grad_norm": 0.0, "learning_rate": 1.5793779217418562e-05, "loss": 1.1381, "step": 8286 }, { "epoch": 0.32424289850536037, "grad_norm": 0.0, "learning_rate": 1.579274630273762e-05, "loss": 1.2523, "step": 8287 }, { "epoch": 0.3242820251975898, "grad_norm": 0.0, "learning_rate": 1.579171329503243e-05, "loss": 1.1212, "step": 8288 }, { "epoch": 0.32432115188981925, "grad_norm": 0.0, "learning_rate": 1.579068019431958e-05, "loss": 1.1093, "step": 8289 }, { "epoch": 0.3243602785820487, "grad_norm": 0.0, "learning_rate": 1.5789647000615665e-05, "loss": 1.0723, "step": 8290 }, { "epoch": 0.32439940527427813, "grad_norm": 0.0, "learning_rate": 1.5788613713937273e-05, "loss": 1.1087, "step": 8291 }, { "epoch": 0.32443853196650757, "grad_norm": 0.0, "learning_rate": 1.5787580334300997e-05, "loss": 1.1776, "step": 8292 }, { "epoch": 0.324477658658737, "grad_norm": 0.0, "learning_rate": 1.5786546861723434e-05, "loss": 1.1179, "step": 8293 }, { "epoch": 0.32451678535096645, "grad_norm": 0.0, "learning_rate": 1.578551329622118e-05, "loss": 1.2154, "step": 8294 }, { "epoch": 0.3245559120431959, "grad_norm": 0.0, "learning_rate": 1.5784479637810832e-05, "loss": 1.1607, "step": 8295 }, { "epoch": 0.32459503873542533, "grad_norm": 0.0, "learning_rate": 1.5783445886508987e-05, "loss": 1.1527, "step": 8296 }, { "epoch": 0.3246341654276547, "grad_norm": 0.0, "learning_rate": 1.578241204233225e-05, "loss": 1.1062, "step": 8297 }, { "epoch": 0.32467329211988416, "grad_norm": 0.0, "learning_rate": 1.5781378105297225e-05, "loss": 1.2365, "step": 8298 }, { "epoch": 0.3247124188121136, "grad_norm": 0.0, "learning_rate": 1.5780344075420507e-05, "loss": 1.0874, "step": 8299 }, { "epoch": 0.32475154550434304, "grad_norm": 0.0, "learning_rate": 1.5779309952718706e-05, "loss": 1.0651, "step": 8300 }, { "epoch": 0.3247906721965725, "grad_norm": 0.0, "learning_rate": 1.577827573720843e-05, "loss": 1.152, "step": 8301 }, { "epoch": 0.3248297988888019, "grad_norm": 0.0, "learning_rate": 1.577724142890629e-05, "loss": 1.1099, "step": 8302 }, { "epoch": 0.32486892558103136, "grad_norm": 0.0, "learning_rate": 1.577620702782889e-05, "loss": 1.1133, "step": 8303 }, { "epoch": 0.3249080522732608, "grad_norm": 0.0, "learning_rate": 1.577517253399284e-05, "loss": 1.0048, "step": 8304 }, { "epoch": 0.32494717896549025, "grad_norm": 0.0, "learning_rate": 1.5774137947414757e-05, "loss": 1.1121, "step": 8305 }, { "epoch": 0.3249863056577197, "grad_norm": 0.0, "learning_rate": 1.5773103268111255e-05, "loss": 1.1048, "step": 8306 }, { "epoch": 0.32502543234994913, "grad_norm": 0.0, "learning_rate": 1.577206849609895e-05, "loss": 1.0099, "step": 8307 }, { "epoch": 0.32506455904217857, "grad_norm": 0.0, "learning_rate": 1.5771033631394455e-05, "loss": 1.1194, "step": 8308 }, { "epoch": 0.325103685734408, "grad_norm": 0.0, "learning_rate": 1.5769998674014393e-05, "loss": 1.2639, "step": 8309 }, { "epoch": 0.32514281242663745, "grad_norm": 0.0, "learning_rate": 1.5768963623975386e-05, "loss": 1.1326, "step": 8310 }, { "epoch": 0.3251819391188669, "grad_norm": 0.0, "learning_rate": 1.5767928481294046e-05, "loss": 1.1096, "step": 8311 }, { "epoch": 0.32522106581109633, "grad_norm": 0.0, "learning_rate": 1.5766893245987005e-05, "loss": 1.1833, "step": 8312 }, { "epoch": 0.3252601925033258, "grad_norm": 0.0, "learning_rate": 1.5765857918070883e-05, "loss": 1.2768, "step": 8313 }, { "epoch": 0.3252993191955552, "grad_norm": 0.0, "learning_rate": 1.576482249756231e-05, "loss": 1.0384, "step": 8314 }, { "epoch": 0.32533844588778466, "grad_norm": 0.0, "learning_rate": 1.576378698447791e-05, "loss": 1.1293, "step": 8315 }, { "epoch": 0.3253775725800141, "grad_norm": 0.0, "learning_rate": 1.5762751378834314e-05, "loss": 1.0543, "step": 8316 }, { "epoch": 0.32541669927224354, "grad_norm": 0.0, "learning_rate": 1.5761715680648154e-05, "loss": 1.1655, "step": 8317 }, { "epoch": 0.325455825964473, "grad_norm": 0.0, "learning_rate": 1.5760679889936056e-05, "loss": 1.0388, "step": 8318 }, { "epoch": 0.3254949526567024, "grad_norm": 0.0, "learning_rate": 1.575964400671466e-05, "loss": 1.0237, "step": 8319 }, { "epoch": 0.32553407934893186, "grad_norm": 0.0, "learning_rate": 1.57586080310006e-05, "loss": 1.162, "step": 8320 }, { "epoch": 0.3255732060411613, "grad_norm": 0.0, "learning_rate": 1.5757571962810507e-05, "loss": 1.1241, "step": 8321 }, { "epoch": 0.32561233273339074, "grad_norm": 0.0, "learning_rate": 1.5756535802161028e-05, "loss": 1.0555, "step": 8322 }, { "epoch": 0.3256514594256202, "grad_norm": 0.0, "learning_rate": 1.5755499549068792e-05, "loss": 1.205, "step": 8323 }, { "epoch": 0.3256905861178496, "grad_norm": 0.0, "learning_rate": 1.575446320355045e-05, "loss": 0.9938, "step": 8324 }, { "epoch": 0.325729712810079, "grad_norm": 0.0, "learning_rate": 1.5753426765622637e-05, "loss": 1.1445, "step": 8325 }, { "epoch": 0.32576883950230845, "grad_norm": 0.0, "learning_rate": 1.5752390235301996e-05, "loss": 1.171, "step": 8326 }, { "epoch": 0.3258079661945379, "grad_norm": 0.0, "learning_rate": 1.5751353612605183e-05, "loss": 1.1017, "step": 8327 }, { "epoch": 0.32584709288676733, "grad_norm": 0.0, "learning_rate": 1.575031689754883e-05, "loss": 1.0432, "step": 8328 }, { "epoch": 0.32588621957899677, "grad_norm": 0.0, "learning_rate": 1.5749280090149602e-05, "loss": 1.1303, "step": 8329 }, { "epoch": 0.3259253462712262, "grad_norm": 0.0, "learning_rate": 1.5748243190424134e-05, "loss": 1.1049, "step": 8330 }, { "epoch": 0.32596447296345565, "grad_norm": 0.0, "learning_rate": 1.5747206198389086e-05, "loss": 1.064, "step": 8331 }, { "epoch": 0.3260035996556851, "grad_norm": 0.0, "learning_rate": 1.5746169114061108e-05, "loss": 0.9478, "step": 8332 }, { "epoch": 0.32604272634791454, "grad_norm": 0.0, "learning_rate": 1.5745131937456853e-05, "loss": 1.1714, "step": 8333 }, { "epoch": 0.326081853040144, "grad_norm": 0.0, "learning_rate": 1.574409466859298e-05, "loss": 1.1464, "step": 8334 }, { "epoch": 0.3261209797323734, "grad_norm": 0.0, "learning_rate": 1.574305730748614e-05, "loss": 1.0966, "step": 8335 }, { "epoch": 0.32616010642460286, "grad_norm": 0.0, "learning_rate": 1.5742019854153003e-05, "loss": 1.0485, "step": 8336 }, { "epoch": 0.3261992331168323, "grad_norm": 0.0, "learning_rate": 1.5740982308610218e-05, "loss": 1.0773, "step": 8337 }, { "epoch": 0.32623835980906174, "grad_norm": 0.0, "learning_rate": 1.5739944670874453e-05, "loss": 1.1885, "step": 8338 }, { "epoch": 0.3262774865012912, "grad_norm": 0.0, "learning_rate": 1.5738906940962368e-05, "loss": 1.0697, "step": 8339 }, { "epoch": 0.3263166131935206, "grad_norm": 0.0, "learning_rate": 1.5737869118890628e-05, "loss": 1.0814, "step": 8340 }, { "epoch": 0.32635573988575006, "grad_norm": 0.0, "learning_rate": 1.57368312046759e-05, "loss": 1.0265, "step": 8341 }, { "epoch": 0.3263948665779795, "grad_norm": 0.0, "learning_rate": 1.573579319833485e-05, "loss": 1.1449, "step": 8342 }, { "epoch": 0.32643399327020894, "grad_norm": 0.0, "learning_rate": 1.573475509988415e-05, "loss": 0.9836, "step": 8343 }, { "epoch": 0.3264731199624384, "grad_norm": 0.0, "learning_rate": 1.573371690934047e-05, "loss": 1.0798, "step": 8344 }, { "epoch": 0.3265122466546678, "grad_norm": 0.0, "learning_rate": 1.573267862672048e-05, "loss": 0.9966, "step": 8345 }, { "epoch": 0.32655137334689727, "grad_norm": 0.0, "learning_rate": 1.5731640252040857e-05, "loss": 1.1639, "step": 8346 }, { "epoch": 0.3265905000391267, "grad_norm": 0.0, "learning_rate": 1.573060178531827e-05, "loss": 1.0747, "step": 8347 }, { "epoch": 0.32662962673135615, "grad_norm": 0.0, "learning_rate": 1.5729563226569402e-05, "loss": 1.1157, "step": 8348 }, { "epoch": 0.3266687534235856, "grad_norm": 0.0, "learning_rate": 1.572852457581093e-05, "loss": 1.024, "step": 8349 }, { "epoch": 0.32670788011581503, "grad_norm": 0.0, "learning_rate": 1.5727485833059526e-05, "loss": 1.1975, "step": 8350 }, { "epoch": 0.32674700680804447, "grad_norm": 0.0, "learning_rate": 1.572644699833188e-05, "loss": 1.1256, "step": 8351 }, { "epoch": 0.3267861335002739, "grad_norm": 0.0, "learning_rate": 1.572540807164467e-05, "loss": 1.0616, "step": 8352 }, { "epoch": 0.32682526019250335, "grad_norm": 0.0, "learning_rate": 1.5724369053014583e-05, "loss": 1.205, "step": 8353 }, { "epoch": 0.32686438688473274, "grad_norm": 0.0, "learning_rate": 1.5723329942458302e-05, "loss": 1.1667, "step": 8354 }, { "epoch": 0.3269035135769622, "grad_norm": 0.0, "learning_rate": 1.572229073999251e-05, "loss": 1.2708, "step": 8355 }, { "epoch": 0.3269426402691916, "grad_norm": 0.0, "learning_rate": 1.57212514456339e-05, "loss": 1.2173, "step": 8356 }, { "epoch": 0.32698176696142106, "grad_norm": 0.0, "learning_rate": 1.5720212059399163e-05, "loss": 1.1771, "step": 8357 }, { "epoch": 0.3270208936536505, "grad_norm": 0.0, "learning_rate": 1.5719172581304987e-05, "loss": 0.9752, "step": 8358 }, { "epoch": 0.32706002034587994, "grad_norm": 0.0, "learning_rate": 1.5718133011368065e-05, "loss": 1.1197, "step": 8359 }, { "epoch": 0.3270991470381094, "grad_norm": 0.0, "learning_rate": 1.5717093349605093e-05, "loss": 0.9719, "step": 8360 }, { "epoch": 0.3271382737303388, "grad_norm": 0.0, "learning_rate": 1.5716053596032767e-05, "loss": 1.2187, "step": 8361 }, { "epoch": 0.32717740042256827, "grad_norm": 0.0, "learning_rate": 1.571501375066778e-05, "loss": 1.1702, "step": 8362 }, { "epoch": 0.3272165271147977, "grad_norm": 0.0, "learning_rate": 1.5713973813526836e-05, "loss": 1.0453, "step": 8363 }, { "epoch": 0.32725565380702715, "grad_norm": 0.0, "learning_rate": 1.5712933784626633e-05, "loss": 1.1655, "step": 8364 }, { "epoch": 0.3272947804992566, "grad_norm": 0.0, "learning_rate": 1.571189366398387e-05, "loss": 1.1596, "step": 8365 }, { "epoch": 0.32733390719148603, "grad_norm": 0.0, "learning_rate": 1.5710853451615254e-05, "loss": 1.1611, "step": 8366 }, { "epoch": 0.32737303388371547, "grad_norm": 0.0, "learning_rate": 1.570981314753749e-05, "loss": 1.1483, "step": 8367 }, { "epoch": 0.3274121605759449, "grad_norm": 0.0, "learning_rate": 1.5708772751767275e-05, "loss": 1.1471, "step": 8368 }, { "epoch": 0.32745128726817435, "grad_norm": 0.0, "learning_rate": 1.5707732264321327e-05, "loss": 1.1286, "step": 8369 }, { "epoch": 0.3274904139604038, "grad_norm": 0.0, "learning_rate": 1.570669168521635e-05, "loss": 1.3379, "step": 8370 }, { "epoch": 0.32752954065263323, "grad_norm": 0.0, "learning_rate": 1.5705651014469054e-05, "loss": 1.1024, "step": 8371 }, { "epoch": 0.3275686673448627, "grad_norm": 0.0, "learning_rate": 1.5704610252096158e-05, "loss": 1.1288, "step": 8372 }, { "epoch": 0.3276077940370921, "grad_norm": 0.0, "learning_rate": 1.5703569398114364e-05, "loss": 1.2219, "step": 8373 }, { "epoch": 0.32764692072932156, "grad_norm": 0.0, "learning_rate": 1.5702528452540394e-05, "loss": 1.1082, "step": 8374 }, { "epoch": 0.327686047421551, "grad_norm": 0.0, "learning_rate": 1.570148741539096e-05, "loss": 1.089, "step": 8375 }, { "epoch": 0.32772517411378044, "grad_norm": 0.0, "learning_rate": 1.5700446286682786e-05, "loss": 1.0193, "step": 8376 }, { "epoch": 0.3277643008060099, "grad_norm": 0.0, "learning_rate": 1.569940506643259e-05, "loss": 1.0176, "step": 8377 }, { "epoch": 0.3278034274982393, "grad_norm": 0.0, "learning_rate": 1.5698363754657087e-05, "loss": 1.0817, "step": 8378 }, { "epoch": 0.32784255419046876, "grad_norm": 0.0, "learning_rate": 1.5697322351373e-05, "loss": 1.2126, "step": 8379 }, { "epoch": 0.3278816808826982, "grad_norm": 0.0, "learning_rate": 1.569628085659706e-05, "loss": 1.1345, "step": 8380 }, { "epoch": 0.32792080757492764, "grad_norm": 0.0, "learning_rate": 1.5695239270345986e-05, "loss": 0.9465, "step": 8381 }, { "epoch": 0.327959934267157, "grad_norm": 0.0, "learning_rate": 1.5694197592636506e-05, "loss": 1.1228, "step": 8382 }, { "epoch": 0.32799906095938647, "grad_norm": 0.0, "learning_rate": 1.5693155823485348e-05, "loss": 1.0888, "step": 8383 }, { "epoch": 0.3280381876516159, "grad_norm": 0.0, "learning_rate": 1.569211396290924e-05, "loss": 1.0363, "step": 8384 }, { "epoch": 0.32807731434384535, "grad_norm": 0.0, "learning_rate": 1.5691072010924915e-05, "loss": 1.1675, "step": 8385 }, { "epoch": 0.3281164410360748, "grad_norm": 0.0, "learning_rate": 1.5690029967549107e-05, "loss": 1.2169, "step": 8386 }, { "epoch": 0.32815556772830423, "grad_norm": 0.0, "learning_rate": 1.5688987832798545e-05, "loss": 1.0966, "step": 8387 }, { "epoch": 0.3281946944205337, "grad_norm": 0.0, "learning_rate": 1.5687945606689967e-05, "loss": 1.1288, "step": 8388 }, { "epoch": 0.3282338211127631, "grad_norm": 0.0, "learning_rate": 1.568690328924011e-05, "loss": 1.0775, "step": 8389 }, { "epoch": 0.32827294780499255, "grad_norm": 0.0, "learning_rate": 1.5685860880465713e-05, "loss": 1.1536, "step": 8390 }, { "epoch": 0.328312074497222, "grad_norm": 0.0, "learning_rate": 1.5684818380383515e-05, "loss": 1.1138, "step": 8391 }, { "epoch": 0.32835120118945144, "grad_norm": 0.0, "learning_rate": 1.5683775789010257e-05, "loss": 1.2057, "step": 8392 }, { "epoch": 0.3283903278816809, "grad_norm": 0.0, "learning_rate": 1.568273310636268e-05, "loss": 1.0452, "step": 8393 }, { "epoch": 0.3284294545739103, "grad_norm": 0.0, "learning_rate": 1.5681690332457537e-05, "loss": 1.1496, "step": 8394 }, { "epoch": 0.32846858126613976, "grad_norm": 0.0, "learning_rate": 1.568064746731156e-05, "loss": 1.1823, "step": 8395 }, { "epoch": 0.3285077079583692, "grad_norm": 0.0, "learning_rate": 1.5679604510941504e-05, "loss": 1.1431, "step": 8396 }, { "epoch": 0.32854683465059864, "grad_norm": 0.0, "learning_rate": 1.5678561463364118e-05, "loss": 1.0723, "step": 8397 }, { "epoch": 0.3285859613428281, "grad_norm": 0.0, "learning_rate": 1.567751832459615e-05, "loss": 1.0932, "step": 8398 }, { "epoch": 0.3286250880350575, "grad_norm": 0.0, "learning_rate": 1.5676475094654353e-05, "loss": 1.1101, "step": 8399 }, { "epoch": 0.32866421472728696, "grad_norm": 0.0, "learning_rate": 1.567543177355548e-05, "loss": 1.0723, "step": 8400 }, { "epoch": 0.3287033414195164, "grad_norm": 0.0, "learning_rate": 1.567438836131628e-05, "loss": 1.1993, "step": 8401 }, { "epoch": 0.32874246811174584, "grad_norm": 0.0, "learning_rate": 1.5673344857953518e-05, "loss": 1.1046, "step": 8402 }, { "epoch": 0.3287815948039753, "grad_norm": 0.0, "learning_rate": 1.5672301263483945e-05, "loss": 1.1436, "step": 8403 }, { "epoch": 0.3288207214962047, "grad_norm": 0.0, "learning_rate": 1.5671257577924318e-05, "loss": 0.9937, "step": 8404 }, { "epoch": 0.32885984818843417, "grad_norm": 0.0, "learning_rate": 1.5670213801291406e-05, "loss": 1.1261, "step": 8405 }, { "epoch": 0.3288989748806636, "grad_norm": 0.0, "learning_rate": 1.5669169933601965e-05, "loss": 1.0365, "step": 8406 }, { "epoch": 0.32893810157289305, "grad_norm": 0.0, "learning_rate": 1.5668125974872755e-05, "loss": 1.1305, "step": 8407 }, { "epoch": 0.3289772282651225, "grad_norm": 0.0, "learning_rate": 1.5667081925120548e-05, "loss": 1.0665, "step": 8408 }, { "epoch": 0.32901635495735193, "grad_norm": 0.0, "learning_rate": 1.5666037784362104e-05, "loss": 1.0019, "step": 8409 }, { "epoch": 0.32905548164958137, "grad_norm": 0.0, "learning_rate": 1.5664993552614192e-05, "loss": 1.1, "step": 8410 }, { "epoch": 0.32909460834181076, "grad_norm": 0.0, "learning_rate": 1.5663949229893587e-05, "loss": 1.0256, "step": 8411 }, { "epoch": 0.3291337350340402, "grad_norm": 0.0, "learning_rate": 1.566290481621705e-05, "loss": 1.0456, "step": 8412 }, { "epoch": 0.32917286172626964, "grad_norm": 0.0, "learning_rate": 1.566186031160136e-05, "loss": 1.1884, "step": 8413 }, { "epoch": 0.3292119884184991, "grad_norm": 0.0, "learning_rate": 1.5660815716063292e-05, "loss": 1.0487, "step": 8414 }, { "epoch": 0.3292511151107285, "grad_norm": 0.0, "learning_rate": 1.565977102961961e-05, "loss": 1.0276, "step": 8415 }, { "epoch": 0.32929024180295796, "grad_norm": 0.0, "learning_rate": 1.56587262522871e-05, "loss": 1.0081, "step": 8416 }, { "epoch": 0.3293293684951874, "grad_norm": 0.0, "learning_rate": 1.565768138408254e-05, "loss": 1.0235, "step": 8417 }, { "epoch": 0.32936849518741684, "grad_norm": 0.0, "learning_rate": 1.5656636425022702e-05, "loss": 1.2762, "step": 8418 }, { "epoch": 0.3294076218796463, "grad_norm": 0.0, "learning_rate": 1.5655591375124375e-05, "loss": 1.0891, "step": 8419 }, { "epoch": 0.3294467485718757, "grad_norm": 0.0, "learning_rate": 1.5654546234404333e-05, "loss": 1.1033, "step": 8420 }, { "epoch": 0.32948587526410517, "grad_norm": 0.0, "learning_rate": 1.5653501002879368e-05, "loss": 1.0046, "step": 8421 }, { "epoch": 0.3295250019563346, "grad_norm": 0.0, "learning_rate": 1.565245568056626e-05, "loss": 1.1413, "step": 8422 }, { "epoch": 0.32956412864856405, "grad_norm": 0.0, "learning_rate": 1.5651410267481795e-05, "loss": 1.0881, "step": 8423 }, { "epoch": 0.3296032553407935, "grad_norm": 0.0, "learning_rate": 1.5650364763642764e-05, "loss": 1.1137, "step": 8424 }, { "epoch": 0.32964238203302293, "grad_norm": 0.0, "learning_rate": 1.5649319169065955e-05, "loss": 1.0626, "step": 8425 }, { "epoch": 0.32968150872525237, "grad_norm": 0.0, "learning_rate": 1.564827348376816e-05, "loss": 1.0923, "step": 8426 }, { "epoch": 0.3297206354174818, "grad_norm": 0.0, "learning_rate": 1.5647227707766167e-05, "loss": 1.0671, "step": 8427 }, { "epoch": 0.32975976210971125, "grad_norm": 0.0, "learning_rate": 1.564618184107678e-05, "loss": 1.1255, "step": 8428 }, { "epoch": 0.3297988888019407, "grad_norm": 0.0, "learning_rate": 1.564513588371678e-05, "loss": 0.9719, "step": 8429 }, { "epoch": 0.32983801549417013, "grad_norm": 0.0, "learning_rate": 1.564408983570298e-05, "loss": 1.0042, "step": 8430 }, { "epoch": 0.3298771421863996, "grad_norm": 0.0, "learning_rate": 1.5643043697052164e-05, "loss": 1.0724, "step": 8431 }, { "epoch": 0.329916268878629, "grad_norm": 0.0, "learning_rate": 1.5641997467781137e-05, "loss": 1.0312, "step": 8432 }, { "epoch": 0.32995539557085846, "grad_norm": 0.0, "learning_rate": 1.56409511479067e-05, "loss": 1.2842, "step": 8433 }, { "epoch": 0.3299945222630879, "grad_norm": 0.0, "learning_rate": 1.5639904737445658e-05, "loss": 1.1379, "step": 8434 }, { "epoch": 0.33003364895531734, "grad_norm": 0.0, "learning_rate": 1.5638858236414812e-05, "loss": 1.1946, "step": 8435 }, { "epoch": 0.3300727756475468, "grad_norm": 0.0, "learning_rate": 1.563781164483097e-05, "loss": 1.1682, "step": 8436 }, { "epoch": 0.3301119023397762, "grad_norm": 0.0, "learning_rate": 1.5636764962710936e-05, "loss": 1.2208, "step": 8437 }, { "epoch": 0.33015102903200566, "grad_norm": 0.0, "learning_rate": 1.5635718190071526e-05, "loss": 1.1448, "step": 8438 }, { "epoch": 0.33019015572423505, "grad_norm": 0.0, "learning_rate": 1.563467132692954e-05, "loss": 1.1057, "step": 8439 }, { "epoch": 0.3302292824164645, "grad_norm": 0.0, "learning_rate": 1.563362437330179e-05, "loss": 1.0419, "step": 8440 }, { "epoch": 0.33026840910869393, "grad_norm": 0.0, "learning_rate": 1.5632577329205095e-05, "loss": 1.1091, "step": 8441 }, { "epoch": 0.33030753580092337, "grad_norm": 0.0, "learning_rate": 1.5631530194656265e-05, "loss": 1.0105, "step": 8442 }, { "epoch": 0.3303466624931528, "grad_norm": 0.0, "learning_rate": 1.5630482969672116e-05, "loss": 1.1986, "step": 8443 }, { "epoch": 0.33038578918538225, "grad_norm": 0.0, "learning_rate": 1.5629435654269464e-05, "loss": 1.0249, "step": 8444 }, { "epoch": 0.3304249158776117, "grad_norm": 0.0, "learning_rate": 1.5628388248465136e-05, "loss": 1.0425, "step": 8445 }, { "epoch": 0.33046404256984113, "grad_norm": 0.0, "learning_rate": 1.562734075227594e-05, "loss": 1.1215, "step": 8446 }, { "epoch": 0.3305031692620706, "grad_norm": 0.0, "learning_rate": 1.5626293165718704e-05, "loss": 1.0972, "step": 8447 }, { "epoch": 0.3305422959543, "grad_norm": 0.0, "learning_rate": 1.5625245488810253e-05, "loss": 1.1004, "step": 8448 }, { "epoch": 0.33058142264652945, "grad_norm": 0.0, "learning_rate": 1.5624197721567405e-05, "loss": 1.0065, "step": 8449 }, { "epoch": 0.3306205493387589, "grad_norm": 0.0, "learning_rate": 1.5623149864006993e-05, "loss": 1.0425, "step": 8450 }, { "epoch": 0.33065967603098834, "grad_norm": 0.0, "learning_rate": 1.5622101916145835e-05, "loss": 1.1342, "step": 8451 }, { "epoch": 0.3306988027232178, "grad_norm": 0.0, "learning_rate": 1.5621053878000767e-05, "loss": 1.1607, "step": 8452 }, { "epoch": 0.3307379294154472, "grad_norm": 0.0, "learning_rate": 1.5620005749588617e-05, "loss": 1.1257, "step": 8453 }, { "epoch": 0.33077705610767666, "grad_norm": 0.0, "learning_rate": 1.561895753092622e-05, "loss": 1.0885, "step": 8454 }, { "epoch": 0.3308161827999061, "grad_norm": 0.0, "learning_rate": 1.56179092220304e-05, "loss": 1.1931, "step": 8455 }, { "epoch": 0.33085530949213554, "grad_norm": 0.0, "learning_rate": 1.5616860822918004e-05, "loss": 1.0755, "step": 8456 }, { "epoch": 0.330894436184365, "grad_norm": 0.0, "learning_rate": 1.561581233360586e-05, "loss": 1.0314, "step": 8457 }, { "epoch": 0.3309335628765944, "grad_norm": 0.0, "learning_rate": 1.5614763754110804e-05, "loss": 1.0708, "step": 8458 }, { "epoch": 0.33097268956882386, "grad_norm": 0.0, "learning_rate": 1.561371508444968e-05, "loss": 1.0924, "step": 8459 }, { "epoch": 0.3310118162610533, "grad_norm": 0.0, "learning_rate": 1.5612666324639327e-05, "loss": 1.0412, "step": 8460 }, { "epoch": 0.33105094295328275, "grad_norm": 0.0, "learning_rate": 1.5611617474696584e-05, "loss": 1.0317, "step": 8461 }, { "epoch": 0.3310900696455122, "grad_norm": 0.0, "learning_rate": 1.5610568534638294e-05, "loss": 1.0906, "step": 8462 }, { "epoch": 0.3311291963377416, "grad_norm": 0.0, "learning_rate": 1.5609519504481306e-05, "loss": 1.1743, "step": 8463 }, { "epoch": 0.33116832302997107, "grad_norm": 0.0, "learning_rate": 1.5608470384242466e-05, "loss": 1.2103, "step": 8464 }, { "epoch": 0.3312074497222005, "grad_norm": 0.0, "learning_rate": 1.560742117393862e-05, "loss": 1.1716, "step": 8465 }, { "epoch": 0.33124657641442995, "grad_norm": 0.0, "learning_rate": 1.560637187358661e-05, "loss": 0.948, "step": 8466 }, { "epoch": 0.3312857031066594, "grad_norm": 0.0, "learning_rate": 1.56053224832033e-05, "loss": 1.2032, "step": 8467 }, { "epoch": 0.3313248297988888, "grad_norm": 0.0, "learning_rate": 1.560427300280553e-05, "loss": 1.0045, "step": 8468 }, { "epoch": 0.3313639564911182, "grad_norm": 0.0, "learning_rate": 1.560322343241016e-05, "loss": 1.1723, "step": 8469 }, { "epoch": 0.33140308318334766, "grad_norm": 0.0, "learning_rate": 1.5602173772034045e-05, "loss": 1.0218, "step": 8470 }, { "epoch": 0.3314422098755771, "grad_norm": 0.0, "learning_rate": 1.5601124021694036e-05, "loss": 1.1866, "step": 8471 }, { "epoch": 0.33148133656780654, "grad_norm": 0.0, "learning_rate": 1.5600074181406995e-05, "loss": 1.0576, "step": 8472 }, { "epoch": 0.331520463260036, "grad_norm": 0.0, "learning_rate": 1.5599024251189782e-05, "loss": 1.1419, "step": 8473 }, { "epoch": 0.3315595899522654, "grad_norm": 0.0, "learning_rate": 1.5597974231059252e-05, "loss": 1.1982, "step": 8474 }, { "epoch": 0.33159871664449486, "grad_norm": 0.0, "learning_rate": 1.5596924121032272e-05, "loss": 1.1342, "step": 8475 }, { "epoch": 0.3316378433367243, "grad_norm": 0.0, "learning_rate": 1.55958739211257e-05, "loss": 1.0682, "step": 8476 }, { "epoch": 0.33167697002895374, "grad_norm": 0.0, "learning_rate": 1.5594823631356412e-05, "loss": 1.1488, "step": 8477 }, { "epoch": 0.3317160967211832, "grad_norm": 0.0, "learning_rate": 1.5593773251741264e-05, "loss": 1.056, "step": 8478 }, { "epoch": 0.3317552234134126, "grad_norm": 0.0, "learning_rate": 1.5592722782297127e-05, "loss": 1.1074, "step": 8479 }, { "epoch": 0.33179435010564207, "grad_norm": 0.0, "learning_rate": 1.5591672223040867e-05, "loss": 1.2, "step": 8480 }, { "epoch": 0.3318334767978715, "grad_norm": 0.0, "learning_rate": 1.5590621573989363e-05, "loss": 1.1847, "step": 8481 }, { "epoch": 0.33187260349010095, "grad_norm": 0.0, "learning_rate": 1.558957083515948e-05, "loss": 1.0615, "step": 8482 }, { "epoch": 0.3319117301823304, "grad_norm": 0.0, "learning_rate": 1.5588520006568093e-05, "loss": 1.209, "step": 8483 }, { "epoch": 0.33195085687455983, "grad_norm": 0.0, "learning_rate": 1.5587469088232076e-05, "loss": 1.1124, "step": 8484 }, { "epoch": 0.33198998356678927, "grad_norm": 0.0, "learning_rate": 1.558641808016831e-05, "loss": 1.1116, "step": 8485 }, { "epoch": 0.3320291102590187, "grad_norm": 0.0, "learning_rate": 1.558536698239367e-05, "loss": 1.1176, "step": 8486 }, { "epoch": 0.33206823695124815, "grad_norm": 0.0, "learning_rate": 1.5584315794925032e-05, "loss": 1.1049, "step": 8487 }, { "epoch": 0.3321073636434776, "grad_norm": 0.0, "learning_rate": 1.5583264517779282e-05, "loss": 1.0469, "step": 8488 }, { "epoch": 0.33214649033570703, "grad_norm": 0.0, "learning_rate": 1.5582213150973296e-05, "loss": 1.151, "step": 8489 }, { "epoch": 0.3321856170279365, "grad_norm": 0.0, "learning_rate": 1.5581161694523966e-05, "loss": 1.1277, "step": 8490 }, { "epoch": 0.3322247437201659, "grad_norm": 0.0, "learning_rate": 1.5580110148448173e-05, "loss": 1.1285, "step": 8491 }, { "epoch": 0.33226387041239536, "grad_norm": 0.0, "learning_rate": 1.5579058512762802e-05, "loss": 1.1062, "step": 8492 }, { "epoch": 0.3323029971046248, "grad_norm": 0.0, "learning_rate": 1.5578006787484744e-05, "loss": 1.104, "step": 8493 }, { "epoch": 0.33234212379685424, "grad_norm": 0.0, "learning_rate": 1.5576954972630885e-05, "loss": 1.0664, "step": 8494 }, { "epoch": 0.3323812504890837, "grad_norm": 0.0, "learning_rate": 1.5575903068218115e-05, "loss": 0.9892, "step": 8495 }, { "epoch": 0.33242037718131306, "grad_norm": 0.0, "learning_rate": 1.5574851074263334e-05, "loss": 1.098, "step": 8496 }, { "epoch": 0.3324595038735425, "grad_norm": 0.0, "learning_rate": 1.5573798990783425e-05, "loss": 1.062, "step": 8497 }, { "epoch": 0.33249863056577195, "grad_norm": 0.0, "learning_rate": 1.5572746817795294e-05, "loss": 1.094, "step": 8498 }, { "epoch": 0.3325377572580014, "grad_norm": 0.0, "learning_rate": 1.557169455531583e-05, "loss": 1.1191, "step": 8499 }, { "epoch": 0.33257688395023083, "grad_norm": 0.0, "learning_rate": 1.5570642203361932e-05, "loss": 1.1978, "step": 8500 }, { "epoch": 0.33261601064246027, "grad_norm": 0.0, "learning_rate": 1.55695897619505e-05, "loss": 1.0121, "step": 8501 }, { "epoch": 0.3326551373346897, "grad_norm": 0.0, "learning_rate": 1.5568537231098438e-05, "loss": 1.0768, "step": 8502 }, { "epoch": 0.33269426402691915, "grad_norm": 0.0, "learning_rate": 1.5567484610822644e-05, "loss": 1.083, "step": 8503 }, { "epoch": 0.3327333907191486, "grad_norm": 0.0, "learning_rate": 1.5566431901140025e-05, "loss": 1.1033, "step": 8504 }, { "epoch": 0.33277251741137803, "grad_norm": 0.0, "learning_rate": 1.5565379102067485e-05, "loss": 1.0348, "step": 8505 }, { "epoch": 0.3328116441036075, "grad_norm": 0.0, "learning_rate": 1.556432621362193e-05, "loss": 1.0613, "step": 8506 }, { "epoch": 0.3328507707958369, "grad_norm": 0.0, "learning_rate": 1.5563273235820268e-05, "loss": 1.0551, "step": 8507 }, { "epoch": 0.33288989748806636, "grad_norm": 0.0, "learning_rate": 1.5562220168679408e-05, "loss": 1.1187, "step": 8508 }, { "epoch": 0.3329290241802958, "grad_norm": 0.0, "learning_rate": 1.556116701221626e-05, "loss": 1.1489, "step": 8509 }, { "epoch": 0.33296815087252524, "grad_norm": 0.0, "learning_rate": 1.5560113766447743e-05, "loss": 0.9839, "step": 8510 }, { "epoch": 0.3330072775647547, "grad_norm": 0.0, "learning_rate": 1.555906043139076e-05, "loss": 1.1001, "step": 8511 }, { "epoch": 0.3330464042569841, "grad_norm": 0.0, "learning_rate": 1.555800700706224e-05, "loss": 1.2067, "step": 8512 }, { "epoch": 0.33308553094921356, "grad_norm": 0.0, "learning_rate": 1.555695349347909e-05, "loss": 1.0676, "step": 8513 }, { "epoch": 0.333124657641443, "grad_norm": 0.0, "learning_rate": 1.555589989065823e-05, "loss": 1.0533, "step": 8514 }, { "epoch": 0.33316378433367244, "grad_norm": 0.0, "learning_rate": 1.5554846198616576e-05, "loss": 1.0185, "step": 8515 }, { "epoch": 0.3332029110259019, "grad_norm": 0.0, "learning_rate": 1.5553792417371058e-05, "loss": 1.0616, "step": 8516 }, { "epoch": 0.3332420377181313, "grad_norm": 0.0, "learning_rate": 1.555273854693859e-05, "loss": 1.1442, "step": 8517 }, { "epoch": 0.33328116441036076, "grad_norm": 0.0, "learning_rate": 1.5551684587336097e-05, "loss": 1.093, "step": 8518 }, { "epoch": 0.3333202911025902, "grad_norm": 0.0, "learning_rate": 1.5550630538580508e-05, "loss": 0.968, "step": 8519 }, { "epoch": 0.33335941779481965, "grad_norm": 0.0, "learning_rate": 1.5549576400688748e-05, "loss": 1.0397, "step": 8520 }, { "epoch": 0.3333985444870491, "grad_norm": 0.0, "learning_rate": 1.554852217367775e-05, "loss": 1.1454, "step": 8521 }, { "epoch": 0.3334376711792785, "grad_norm": 0.0, "learning_rate": 1.554746785756443e-05, "loss": 1.0364, "step": 8522 }, { "epoch": 0.33347679787150797, "grad_norm": 0.0, "learning_rate": 1.5546413452365734e-05, "loss": 1.0366, "step": 8523 }, { "epoch": 0.3335159245637374, "grad_norm": 0.0, "learning_rate": 1.5545358958098584e-05, "loss": 1.1171, "step": 8524 }, { "epoch": 0.3335550512559668, "grad_norm": 0.0, "learning_rate": 1.554430437477992e-05, "loss": 1.0765, "step": 8525 }, { "epoch": 0.33359417794819624, "grad_norm": 0.0, "learning_rate": 1.5543249702426674e-05, "loss": 1.0775, "step": 8526 }, { "epoch": 0.3336333046404257, "grad_norm": 0.0, "learning_rate": 1.5542194941055785e-05, "loss": 1.1147, "step": 8527 }, { "epoch": 0.3336724313326551, "grad_norm": 0.0, "learning_rate": 1.554114009068419e-05, "loss": 1.0499, "step": 8528 }, { "epoch": 0.33371155802488456, "grad_norm": 0.0, "learning_rate": 1.5540085151328826e-05, "loss": 1.1245, "step": 8529 }, { "epoch": 0.333750684717114, "grad_norm": 0.0, "learning_rate": 1.5539030123006636e-05, "loss": 1.1656, "step": 8530 }, { "epoch": 0.33378981140934344, "grad_norm": 0.0, "learning_rate": 1.5537975005734566e-05, "loss": 0.9885, "step": 8531 }, { "epoch": 0.3338289381015729, "grad_norm": 0.0, "learning_rate": 1.553691979952956e-05, "loss": 1.0934, "step": 8532 }, { "epoch": 0.3338680647938023, "grad_norm": 0.0, "learning_rate": 1.5535864504408553e-05, "loss": 1.0004, "step": 8533 }, { "epoch": 0.33390719148603176, "grad_norm": 0.0, "learning_rate": 1.5534809120388502e-05, "loss": 0.971, "step": 8534 }, { "epoch": 0.3339463181782612, "grad_norm": 0.0, "learning_rate": 1.5533753647486352e-05, "loss": 1.1833, "step": 8535 }, { "epoch": 0.33398544487049064, "grad_norm": 0.0, "learning_rate": 1.5532698085719052e-05, "loss": 0.9976, "step": 8536 }, { "epoch": 0.3340245715627201, "grad_norm": 0.0, "learning_rate": 1.5531642435103556e-05, "loss": 1.1047, "step": 8537 }, { "epoch": 0.3340636982549495, "grad_norm": 0.0, "learning_rate": 1.5530586695656814e-05, "loss": 0.9996, "step": 8538 }, { "epoch": 0.33410282494717897, "grad_norm": 0.0, "learning_rate": 1.5529530867395778e-05, "loss": 1.0464, "step": 8539 }, { "epoch": 0.3341419516394084, "grad_norm": 0.0, "learning_rate": 1.5528474950337405e-05, "loss": 1.15, "step": 8540 }, { "epoch": 0.33418107833163785, "grad_norm": 0.0, "learning_rate": 1.5527418944498656e-05, "loss": 1.151, "step": 8541 }, { "epoch": 0.3342202050238673, "grad_norm": 0.0, "learning_rate": 1.5526362849896478e-05, "loss": 1.1218, "step": 8542 }, { "epoch": 0.33425933171609673, "grad_norm": 0.0, "learning_rate": 1.5525306666547843e-05, "loss": 1.1735, "step": 8543 }, { "epoch": 0.33429845840832617, "grad_norm": 0.0, "learning_rate": 1.5524250394469708e-05, "loss": 1.0681, "step": 8544 }, { "epoch": 0.3343375851005556, "grad_norm": 0.0, "learning_rate": 1.5523194033679027e-05, "loss": 1.1636, "step": 8545 }, { "epoch": 0.33437671179278505, "grad_norm": 0.0, "learning_rate": 1.5522137584192775e-05, "loss": 1.0809, "step": 8546 }, { "epoch": 0.3344158384850145, "grad_norm": 0.0, "learning_rate": 1.552108104602791e-05, "loss": 1.0638, "step": 8547 }, { "epoch": 0.33445496517724393, "grad_norm": 0.0, "learning_rate": 1.5520024419201406e-05, "loss": 1.1503, "step": 8548 }, { "epoch": 0.3344940918694734, "grad_norm": 0.0, "learning_rate": 1.5518967703730224e-05, "loss": 0.9993, "step": 8549 }, { "epoch": 0.3345332185617028, "grad_norm": 0.0, "learning_rate": 1.551791089963134e-05, "loss": 1.0652, "step": 8550 }, { "epoch": 0.33457234525393226, "grad_norm": 0.0, "learning_rate": 1.5516854006921714e-05, "loss": 1.1044, "step": 8551 }, { "epoch": 0.3346114719461617, "grad_norm": 0.0, "learning_rate": 1.5515797025618332e-05, "loss": 1.1785, "step": 8552 }, { "epoch": 0.3346505986383911, "grad_norm": 0.0, "learning_rate": 1.551473995573816e-05, "loss": 1.0978, "step": 8553 }, { "epoch": 0.3346897253306205, "grad_norm": 0.0, "learning_rate": 1.5513682797298172e-05, "loss": 1.1885, "step": 8554 }, { "epoch": 0.33472885202284997, "grad_norm": 0.0, "learning_rate": 1.5512625550315354e-05, "loss": 1.0986, "step": 8555 }, { "epoch": 0.3347679787150794, "grad_norm": 0.0, "learning_rate": 1.551156821480667e-05, "loss": 1.0875, "step": 8556 }, { "epoch": 0.33480710540730885, "grad_norm": 0.0, "learning_rate": 1.551051079078911e-05, "loss": 1.1472, "step": 8557 }, { "epoch": 0.3348462320995383, "grad_norm": 0.0, "learning_rate": 1.550945327827965e-05, "loss": 1.127, "step": 8558 }, { "epoch": 0.33488535879176773, "grad_norm": 0.0, "learning_rate": 1.5508395677295278e-05, "loss": 1.1069, "step": 8559 }, { "epoch": 0.33492448548399717, "grad_norm": 0.0, "learning_rate": 1.5507337987852972e-05, "loss": 1.0716, "step": 8560 }, { "epoch": 0.3349636121762266, "grad_norm": 0.0, "learning_rate": 1.5506280209969716e-05, "loss": 1.0451, "step": 8561 }, { "epoch": 0.33500273886845605, "grad_norm": 0.0, "learning_rate": 1.5505222343662506e-05, "loss": 1.0559, "step": 8562 }, { "epoch": 0.3350418655606855, "grad_norm": 0.0, "learning_rate": 1.550416438894832e-05, "loss": 1.0748, "step": 8563 }, { "epoch": 0.33508099225291493, "grad_norm": 0.0, "learning_rate": 1.550310634584415e-05, "loss": 1.0234, "step": 8564 }, { "epoch": 0.3351201189451444, "grad_norm": 0.0, "learning_rate": 1.5502048214366986e-05, "loss": 1.0584, "step": 8565 }, { "epoch": 0.3351592456373738, "grad_norm": 0.0, "learning_rate": 1.5500989994533828e-05, "loss": 0.944, "step": 8566 }, { "epoch": 0.33519837232960326, "grad_norm": 0.0, "learning_rate": 1.5499931686361658e-05, "loss": 1.1675, "step": 8567 }, { "epoch": 0.3352374990218327, "grad_norm": 0.0, "learning_rate": 1.549887328986748e-05, "loss": 1.0949, "step": 8568 }, { "epoch": 0.33527662571406214, "grad_norm": 0.0, "learning_rate": 1.5497814805068286e-05, "loss": 1.0143, "step": 8569 }, { "epoch": 0.3353157524062916, "grad_norm": 0.0, "learning_rate": 1.5496756231981077e-05, "loss": 1.0553, "step": 8570 }, { "epoch": 0.335354879098521, "grad_norm": 0.0, "learning_rate": 1.549569757062285e-05, "loss": 1.1057, "step": 8571 }, { "epoch": 0.33539400579075046, "grad_norm": 0.0, "learning_rate": 1.5494638821010607e-05, "loss": 1.022, "step": 8572 }, { "epoch": 0.3354331324829799, "grad_norm": 0.0, "learning_rate": 1.549357998316135e-05, "loss": 1.1337, "step": 8573 }, { "epoch": 0.33547225917520934, "grad_norm": 0.0, "learning_rate": 1.549252105709208e-05, "loss": 1.1539, "step": 8574 }, { "epoch": 0.3355113858674388, "grad_norm": 0.0, "learning_rate": 1.5491462042819808e-05, "loss": 1.2344, "step": 8575 }, { "epoch": 0.3355505125596682, "grad_norm": 0.0, "learning_rate": 1.549040294036153e-05, "loss": 1.1789, "step": 8576 }, { "epoch": 0.33558963925189766, "grad_norm": 0.0, "learning_rate": 1.5489343749734268e-05, "loss": 1.0545, "step": 8577 }, { "epoch": 0.3356287659441271, "grad_norm": 0.0, "learning_rate": 1.548828447095502e-05, "loss": 1.1187, "step": 8578 }, { "epoch": 0.33566789263635655, "grad_norm": 0.0, "learning_rate": 1.54872251040408e-05, "loss": 1.1722, "step": 8579 }, { "epoch": 0.335707019328586, "grad_norm": 0.0, "learning_rate": 1.5486165649008623e-05, "loss": 1.1342, "step": 8580 }, { "epoch": 0.3357461460208154, "grad_norm": 0.0, "learning_rate": 1.54851061058755e-05, "loss": 1.1177, "step": 8581 }, { "epoch": 0.3357852727130448, "grad_norm": 0.0, "learning_rate": 1.5484046474658448e-05, "loss": 1.1263, "step": 8582 }, { "epoch": 0.33582439940527425, "grad_norm": 0.0, "learning_rate": 1.5482986755374478e-05, "loss": 1.227, "step": 8583 }, { "epoch": 0.3358635260975037, "grad_norm": 0.0, "learning_rate": 1.5481926948040613e-05, "loss": 1.0955, "step": 8584 }, { "epoch": 0.33590265278973314, "grad_norm": 0.0, "learning_rate": 1.5480867052673868e-05, "loss": 1.0164, "step": 8585 }, { "epoch": 0.3359417794819626, "grad_norm": 0.0, "learning_rate": 1.547980706929127e-05, "loss": 1.093, "step": 8586 }, { "epoch": 0.335980906174192, "grad_norm": 0.0, "learning_rate": 1.547874699790983e-05, "loss": 1.1096, "step": 8587 }, { "epoch": 0.33602003286642146, "grad_norm": 0.0, "learning_rate": 1.547768683854659e-05, "loss": 1.1152, "step": 8588 }, { "epoch": 0.3360591595586509, "grad_norm": 0.0, "learning_rate": 1.5476626591218553e-05, "loss": 1.2123, "step": 8589 }, { "epoch": 0.33609828625088034, "grad_norm": 0.0, "learning_rate": 1.5475566255942764e-05, "loss": 1.0779, "step": 8590 }, { "epoch": 0.3361374129431098, "grad_norm": 0.0, "learning_rate": 1.5474505832736233e-05, "loss": 1.0834, "step": 8591 }, { "epoch": 0.3361765396353392, "grad_norm": 0.0, "learning_rate": 1.5473445321616004e-05, "loss": 1.0179, "step": 8592 }, { "epoch": 0.33621566632756866, "grad_norm": 0.0, "learning_rate": 1.5472384722599102e-05, "loss": 1.1425, "step": 8593 }, { "epoch": 0.3362547930197981, "grad_norm": 0.0, "learning_rate": 1.5471324035702555e-05, "loss": 1.0605, "step": 8594 }, { "epoch": 0.33629391971202754, "grad_norm": 0.0, "learning_rate": 1.5470263260943402e-05, "loss": 1.1158, "step": 8595 }, { "epoch": 0.336333046404257, "grad_norm": 0.0, "learning_rate": 1.5469202398338676e-05, "loss": 1.0988, "step": 8596 }, { "epoch": 0.3363721730964864, "grad_norm": 0.0, "learning_rate": 1.5468141447905412e-05, "loss": 1.125, "step": 8597 }, { "epoch": 0.33641129978871587, "grad_norm": 0.0, "learning_rate": 1.546708040966065e-05, "loss": 1.0938, "step": 8598 }, { "epoch": 0.3364504264809453, "grad_norm": 0.0, "learning_rate": 1.5466019283621426e-05, "loss": 1.1923, "step": 8599 }, { "epoch": 0.33648955317317475, "grad_norm": 0.0, "learning_rate": 1.546495806980478e-05, "loss": 1.1893, "step": 8600 }, { "epoch": 0.3365286798654042, "grad_norm": 0.0, "learning_rate": 1.546389676822776e-05, "loss": 1.1641, "step": 8601 }, { "epoch": 0.33656780655763363, "grad_norm": 0.0, "learning_rate": 1.5462835378907405e-05, "loss": 1.1555, "step": 8602 }, { "epoch": 0.33660693324986307, "grad_norm": 0.0, "learning_rate": 1.5461773901860754e-05, "loss": 1.1185, "step": 8603 }, { "epoch": 0.3366460599420925, "grad_norm": 0.0, "learning_rate": 1.546071233710486e-05, "loss": 1.1942, "step": 8604 }, { "epoch": 0.33668518663432195, "grad_norm": 0.0, "learning_rate": 1.545965068465677e-05, "loss": 1.1306, "step": 8605 }, { "epoch": 0.3367243133265514, "grad_norm": 0.0, "learning_rate": 1.545858894453353e-05, "loss": 1.0649, "step": 8606 }, { "epoch": 0.33676344001878084, "grad_norm": 0.0, "learning_rate": 1.545752711675219e-05, "loss": 1.0665, "step": 8607 }, { "epoch": 0.3368025667110103, "grad_norm": 0.0, "learning_rate": 1.5456465201329805e-05, "loss": 1.1611, "step": 8608 }, { "epoch": 0.3368416934032397, "grad_norm": 0.0, "learning_rate": 1.5455403198283426e-05, "loss": 1.1844, "step": 8609 }, { "epoch": 0.3368808200954691, "grad_norm": 0.0, "learning_rate": 1.5454341107630106e-05, "loss": 1.0643, "step": 8610 }, { "epoch": 0.33691994678769854, "grad_norm": 0.0, "learning_rate": 1.5453278929386904e-05, "loss": 1.0868, "step": 8611 }, { "epoch": 0.336959073479928, "grad_norm": 0.0, "learning_rate": 1.5452216663570877e-05, "loss": 1.0671, "step": 8612 }, { "epoch": 0.3369982001721574, "grad_norm": 0.0, "learning_rate": 1.545115431019908e-05, "loss": 1.0547, "step": 8613 }, { "epoch": 0.33703732686438687, "grad_norm": 0.0, "learning_rate": 1.5450091869288577e-05, "loss": 1.0729, "step": 8614 }, { "epoch": 0.3370764535566163, "grad_norm": 0.0, "learning_rate": 1.544902934085643e-05, "loss": 1.0145, "step": 8615 }, { "epoch": 0.33711558024884575, "grad_norm": 0.0, "learning_rate": 1.5447966724919692e-05, "loss": 1.3272, "step": 8616 }, { "epoch": 0.3371547069410752, "grad_norm": 0.0, "learning_rate": 1.544690402149544e-05, "loss": 1.0305, "step": 8617 }, { "epoch": 0.33719383363330463, "grad_norm": 0.0, "learning_rate": 1.5445841230600738e-05, "loss": 1.148, "step": 8618 }, { "epoch": 0.33723296032553407, "grad_norm": 0.0, "learning_rate": 1.544477835225265e-05, "loss": 1.2292, "step": 8619 }, { "epoch": 0.3372720870177635, "grad_norm": 0.0, "learning_rate": 1.5443715386468235e-05, "loss": 1.0347, "step": 8620 }, { "epoch": 0.33731121370999295, "grad_norm": 0.0, "learning_rate": 1.544265233326458e-05, "loss": 1.0835, "step": 8621 }, { "epoch": 0.3373503404022224, "grad_norm": 0.0, "learning_rate": 1.544158919265875e-05, "loss": 0.9922, "step": 8622 }, { "epoch": 0.33738946709445183, "grad_norm": 0.0, "learning_rate": 1.5440525964667813e-05, "loss": 1.1064, "step": 8623 }, { "epoch": 0.3374285937866813, "grad_norm": 0.0, "learning_rate": 1.5439462649308847e-05, "loss": 1.1742, "step": 8624 }, { "epoch": 0.3374677204789107, "grad_norm": 0.0, "learning_rate": 1.5438399246598926e-05, "loss": 1.0181, "step": 8625 }, { "epoch": 0.33750684717114016, "grad_norm": 0.0, "learning_rate": 1.543733575655513e-05, "loss": 1.0975, "step": 8626 }, { "epoch": 0.3375459738633696, "grad_norm": 0.0, "learning_rate": 1.5436272179194533e-05, "loss": 1.053, "step": 8627 }, { "epoch": 0.33758510055559904, "grad_norm": 0.0, "learning_rate": 1.5435208514534215e-05, "loss": 1.1241, "step": 8628 }, { "epoch": 0.3376242272478285, "grad_norm": 0.0, "learning_rate": 1.5434144762591263e-05, "loss": 1.1078, "step": 8629 }, { "epoch": 0.3376633539400579, "grad_norm": 0.0, "learning_rate": 1.5433080923382754e-05, "loss": 1.1551, "step": 8630 }, { "epoch": 0.33770248063228736, "grad_norm": 0.0, "learning_rate": 1.5432016996925772e-05, "loss": 1.062, "step": 8631 }, { "epoch": 0.3377416073245168, "grad_norm": 0.0, "learning_rate": 1.5430952983237404e-05, "loss": 1.1437, "step": 8632 }, { "epoch": 0.33778073401674624, "grad_norm": 0.0, "learning_rate": 1.542988888233474e-05, "loss": 1.074, "step": 8633 }, { "epoch": 0.3378198607089757, "grad_norm": 0.0, "learning_rate": 1.5428824694234857e-05, "loss": 1.1867, "step": 8634 }, { "epoch": 0.3378589874012051, "grad_norm": 0.0, "learning_rate": 1.542776041895486e-05, "loss": 1.1895, "step": 8635 }, { "epoch": 0.33789811409343457, "grad_norm": 0.0, "learning_rate": 1.5426696056511827e-05, "loss": 1.0718, "step": 8636 }, { "epoch": 0.337937240785664, "grad_norm": 0.0, "learning_rate": 1.542563160692286e-05, "loss": 0.9999, "step": 8637 }, { "epoch": 0.3379763674778934, "grad_norm": 0.0, "learning_rate": 1.5424567070205043e-05, "loss": 1.1321, "step": 8638 }, { "epoch": 0.33801549417012283, "grad_norm": 0.0, "learning_rate": 1.542350244637548e-05, "loss": 1.3097, "step": 8639 }, { "epoch": 0.3380546208623523, "grad_norm": 0.0, "learning_rate": 1.5422437735451262e-05, "loss": 1.0145, "step": 8640 }, { "epoch": 0.3380937475545817, "grad_norm": 0.0, "learning_rate": 1.5421372937449487e-05, "loss": 1.1736, "step": 8641 }, { "epoch": 0.33813287424681115, "grad_norm": 0.0, "learning_rate": 1.542030805238726e-05, "loss": 1.0751, "step": 8642 }, { "epoch": 0.3381720009390406, "grad_norm": 0.0, "learning_rate": 1.5419243080281675e-05, "loss": 1.135, "step": 8643 }, { "epoch": 0.33821112763127004, "grad_norm": 0.0, "learning_rate": 1.5418178021149837e-05, "loss": 1.1316, "step": 8644 }, { "epoch": 0.3382502543234995, "grad_norm": 0.0, "learning_rate": 1.5417112875008854e-05, "loss": 1.1794, "step": 8645 }, { "epoch": 0.3382893810157289, "grad_norm": 0.0, "learning_rate": 1.5416047641875823e-05, "loss": 1.0552, "step": 8646 }, { "epoch": 0.33832850770795836, "grad_norm": 0.0, "learning_rate": 1.541498232176785e-05, "loss": 1.1213, "step": 8647 }, { "epoch": 0.3383676344001878, "grad_norm": 0.0, "learning_rate": 1.541391691470205e-05, "loss": 1.2429, "step": 8648 }, { "epoch": 0.33840676109241724, "grad_norm": 0.0, "learning_rate": 1.5412851420695524e-05, "loss": 1.1356, "step": 8649 }, { "epoch": 0.3384458877846467, "grad_norm": 0.0, "learning_rate": 1.541178583976539e-05, "loss": 1.0198, "step": 8650 }, { "epoch": 0.3384850144768761, "grad_norm": 0.0, "learning_rate": 1.5410720171928758e-05, "loss": 1.1216, "step": 8651 }, { "epoch": 0.33852414116910556, "grad_norm": 0.0, "learning_rate": 1.540965441720274e-05, "loss": 1.3082, "step": 8652 }, { "epoch": 0.338563267861335, "grad_norm": 0.0, "learning_rate": 1.5408588575604452e-05, "loss": 1.2566, "step": 8653 }, { "epoch": 0.33860239455356445, "grad_norm": 0.0, "learning_rate": 1.5407522647151005e-05, "loss": 1.0782, "step": 8654 }, { "epoch": 0.3386415212457939, "grad_norm": 0.0, "learning_rate": 1.5406456631859523e-05, "loss": 1.2216, "step": 8655 }, { "epoch": 0.3386806479380233, "grad_norm": 0.0, "learning_rate": 1.5405390529747123e-05, "loss": 1.1201, "step": 8656 }, { "epoch": 0.33871977463025277, "grad_norm": 0.0, "learning_rate": 1.540432434083092e-05, "loss": 1.2527, "step": 8657 }, { "epoch": 0.3387589013224822, "grad_norm": 0.0, "learning_rate": 1.5403258065128042e-05, "loss": 0.9666, "step": 8658 }, { "epoch": 0.33879802801471165, "grad_norm": 0.0, "learning_rate": 1.5402191702655614e-05, "loss": 1.0194, "step": 8659 }, { "epoch": 0.3388371547069411, "grad_norm": 0.0, "learning_rate": 1.5401125253430753e-05, "loss": 1.1066, "step": 8660 }, { "epoch": 0.33887628139917053, "grad_norm": 0.0, "learning_rate": 1.5400058717470585e-05, "loss": 1.2485, "step": 8661 }, { "epoch": 0.3389154080914, "grad_norm": 0.0, "learning_rate": 1.5398992094792247e-05, "loss": 1.1955, "step": 8662 }, { "epoch": 0.3389545347836294, "grad_norm": 0.0, "learning_rate": 1.5397925385412858e-05, "loss": 1.0389, "step": 8663 }, { "epoch": 0.33899366147585885, "grad_norm": 0.0, "learning_rate": 1.5396858589349553e-05, "loss": 1.0963, "step": 8664 }, { "epoch": 0.3390327881680883, "grad_norm": 0.0, "learning_rate": 1.539579170661946e-05, "loss": 1.1642, "step": 8665 }, { "epoch": 0.33907191486031774, "grad_norm": 0.0, "learning_rate": 1.5394724737239712e-05, "loss": 1.1651, "step": 8666 }, { "epoch": 0.3391110415525471, "grad_norm": 0.0, "learning_rate": 1.5393657681227448e-05, "loss": 1.0625, "step": 8667 }, { "epoch": 0.33915016824477656, "grad_norm": 0.0, "learning_rate": 1.53925905385998e-05, "loss": 1.0311, "step": 8668 }, { "epoch": 0.339189294937006, "grad_norm": 0.0, "learning_rate": 1.53915233093739e-05, "loss": 1.0749, "step": 8669 }, { "epoch": 0.33922842162923544, "grad_norm": 0.0, "learning_rate": 1.53904559935669e-05, "loss": 1.1212, "step": 8670 }, { "epoch": 0.3392675483214649, "grad_norm": 0.0, "learning_rate": 1.5389388591195928e-05, "loss": 1.053, "step": 8671 }, { "epoch": 0.3393066750136943, "grad_norm": 0.0, "learning_rate": 1.5388321102278124e-05, "loss": 1.1605, "step": 8672 }, { "epoch": 0.33934580170592377, "grad_norm": 0.0, "learning_rate": 1.538725352683064e-05, "loss": 1.0556, "step": 8673 }, { "epoch": 0.3393849283981532, "grad_norm": 0.0, "learning_rate": 1.5386185864870615e-05, "loss": 1.1765, "step": 8674 }, { "epoch": 0.33942405509038265, "grad_norm": 0.0, "learning_rate": 1.5385118116415194e-05, "loss": 1.1217, "step": 8675 }, { "epoch": 0.3394631817826121, "grad_norm": 0.0, "learning_rate": 1.538405028148152e-05, "loss": 1.209, "step": 8676 }, { "epoch": 0.33950230847484153, "grad_norm": 0.0, "learning_rate": 1.5382982360086753e-05, "loss": 1.2601, "step": 8677 }, { "epoch": 0.33954143516707097, "grad_norm": 0.0, "learning_rate": 1.5381914352248027e-05, "loss": 1.199, "step": 8678 }, { "epoch": 0.3395805618593004, "grad_norm": 0.0, "learning_rate": 1.53808462579825e-05, "loss": 1.2377, "step": 8679 }, { "epoch": 0.33961968855152985, "grad_norm": 0.0, "learning_rate": 1.5379778077307333e-05, "loss": 1.0605, "step": 8680 }, { "epoch": 0.3396588152437593, "grad_norm": 0.0, "learning_rate": 1.5378709810239666e-05, "loss": 1.0494, "step": 8681 }, { "epoch": 0.33969794193598873, "grad_norm": 0.0, "learning_rate": 1.5377641456796658e-05, "loss": 1.181, "step": 8682 }, { "epoch": 0.3397370686282182, "grad_norm": 0.0, "learning_rate": 1.537657301699547e-05, "loss": 1.0969, "step": 8683 }, { "epoch": 0.3397761953204476, "grad_norm": 0.0, "learning_rate": 1.5375504490853255e-05, "loss": 1.1502, "step": 8684 }, { "epoch": 0.33981532201267706, "grad_norm": 0.0, "learning_rate": 1.537443587838717e-05, "loss": 1.1922, "step": 8685 }, { "epoch": 0.3398544487049065, "grad_norm": 0.0, "learning_rate": 1.5373367179614387e-05, "loss": 1.1213, "step": 8686 }, { "epoch": 0.33989357539713594, "grad_norm": 0.0, "learning_rate": 1.5372298394552053e-05, "loss": 1.1523, "step": 8687 }, { "epoch": 0.3399327020893654, "grad_norm": 0.0, "learning_rate": 1.5371229523217343e-05, "loss": 1.1516, "step": 8688 }, { "epoch": 0.3399718287815948, "grad_norm": 0.0, "learning_rate": 1.5370160565627417e-05, "loss": 1.1469, "step": 8689 }, { "epoch": 0.34001095547382426, "grad_norm": 0.0, "learning_rate": 1.5369091521799438e-05, "loss": 1.0207, "step": 8690 }, { "epoch": 0.3400500821660537, "grad_norm": 0.0, "learning_rate": 1.5368022391750583e-05, "loss": 1.1346, "step": 8691 }, { "epoch": 0.34008920885828314, "grad_norm": 0.0, "learning_rate": 1.5366953175498012e-05, "loss": 1.0328, "step": 8692 }, { "epoch": 0.3401283355505126, "grad_norm": 0.0, "learning_rate": 1.5365883873058897e-05, "loss": 1.2458, "step": 8693 }, { "epoch": 0.340167462242742, "grad_norm": 0.0, "learning_rate": 1.536481448445041e-05, "loss": 1.0721, "step": 8694 }, { "epoch": 0.3402065889349714, "grad_norm": 0.0, "learning_rate": 1.536374500968973e-05, "loss": 1.0857, "step": 8695 }, { "epoch": 0.34024571562720085, "grad_norm": 0.0, "learning_rate": 1.536267544879402e-05, "loss": 1.1306, "step": 8696 }, { "epoch": 0.3402848423194303, "grad_norm": 0.0, "learning_rate": 1.5361605801780465e-05, "loss": 1.143, "step": 8697 }, { "epoch": 0.34032396901165973, "grad_norm": 0.0, "learning_rate": 1.536053606866624e-05, "loss": 1.0528, "step": 8698 }, { "epoch": 0.3403630957038892, "grad_norm": 0.0, "learning_rate": 1.535946624946852e-05, "loss": 1.1377, "step": 8699 }, { "epoch": 0.3404022223961186, "grad_norm": 0.0, "learning_rate": 1.5358396344204494e-05, "loss": 1.1437, "step": 8700 }, { "epoch": 0.34044134908834806, "grad_norm": 0.0, "learning_rate": 1.535732635289133e-05, "loss": 1.1049, "step": 8701 }, { "epoch": 0.3404804757805775, "grad_norm": 0.0, "learning_rate": 1.535625627554622e-05, "loss": 1.1694, "step": 8702 }, { "epoch": 0.34051960247280694, "grad_norm": 0.0, "learning_rate": 1.535518611218635e-05, "loss": 1.1136, "step": 8703 }, { "epoch": 0.3405587291650364, "grad_norm": 0.0, "learning_rate": 1.53541158628289e-05, "loss": 1.0199, "step": 8704 }, { "epoch": 0.3405978558572658, "grad_norm": 0.0, "learning_rate": 1.5353045527491052e-05, "loss": 1.261, "step": 8705 }, { "epoch": 0.34063698254949526, "grad_norm": 0.0, "learning_rate": 1.5351975106190006e-05, "loss": 1.1104, "step": 8706 }, { "epoch": 0.3406761092417247, "grad_norm": 0.0, "learning_rate": 1.5350904598942947e-05, "loss": 1.1063, "step": 8707 }, { "epoch": 0.34071523593395414, "grad_norm": 0.0, "learning_rate": 1.534983400576706e-05, "loss": 1.1192, "step": 8708 }, { "epoch": 0.3407543626261836, "grad_norm": 0.0, "learning_rate": 1.534876332667955e-05, "loss": 1.1195, "step": 8709 }, { "epoch": 0.340793489318413, "grad_norm": 0.0, "learning_rate": 1.53476925616976e-05, "loss": 1.0052, "step": 8710 }, { "epoch": 0.34083261601064246, "grad_norm": 0.0, "learning_rate": 1.534662171083841e-05, "loss": 1.0535, "step": 8711 }, { "epoch": 0.3408717427028719, "grad_norm": 0.0, "learning_rate": 1.534555077411917e-05, "loss": 1.0587, "step": 8712 }, { "epoch": 0.34091086939510135, "grad_norm": 0.0, "learning_rate": 1.5344479751557085e-05, "loss": 1.1689, "step": 8713 }, { "epoch": 0.3409499960873308, "grad_norm": 0.0, "learning_rate": 1.5343408643169354e-05, "loss": 1.0348, "step": 8714 }, { "epoch": 0.3409891227795602, "grad_norm": 0.0, "learning_rate": 1.5342337448973176e-05, "loss": 1.1177, "step": 8715 }, { "epoch": 0.34102824947178967, "grad_norm": 0.0, "learning_rate": 1.534126616898575e-05, "loss": 1.1489, "step": 8716 }, { "epoch": 0.3410673761640191, "grad_norm": 0.0, "learning_rate": 1.5340194803224284e-05, "loss": 1.0009, "step": 8717 }, { "epoch": 0.34110650285624855, "grad_norm": 0.0, "learning_rate": 1.5339123351705986e-05, "loss": 1.0039, "step": 8718 }, { "epoch": 0.341145629548478, "grad_norm": 0.0, "learning_rate": 1.5338051814448048e-05, "loss": 1.0619, "step": 8719 }, { "epoch": 0.34118475624070743, "grad_norm": 0.0, "learning_rate": 1.5336980191467696e-05, "loss": 1.1315, "step": 8720 }, { "epoch": 0.3412238829329369, "grad_norm": 0.0, "learning_rate": 1.533590848278212e-05, "loss": 1.1919, "step": 8721 }, { "epoch": 0.3412630096251663, "grad_norm": 0.0, "learning_rate": 1.5334836688408548e-05, "loss": 0.9978, "step": 8722 }, { "epoch": 0.34130213631739575, "grad_norm": 0.0, "learning_rate": 1.533376480836418e-05, "loss": 1.1498, "step": 8723 }, { "epoch": 0.34134126300962514, "grad_norm": 0.0, "learning_rate": 1.5332692842666236e-05, "loss": 1.1345, "step": 8724 }, { "epoch": 0.3413803897018546, "grad_norm": 0.0, "learning_rate": 1.5331620791331924e-05, "loss": 1.0057, "step": 8725 }, { "epoch": 0.341419516394084, "grad_norm": 0.0, "learning_rate": 1.5330548654378464e-05, "loss": 1.2075, "step": 8726 }, { "epoch": 0.34145864308631346, "grad_norm": 0.0, "learning_rate": 1.5329476431823072e-05, "loss": 1.0696, "step": 8727 }, { "epoch": 0.3414977697785429, "grad_norm": 0.0, "learning_rate": 1.5328404123682966e-05, "loss": 1.0584, "step": 8728 }, { "epoch": 0.34153689647077234, "grad_norm": 0.0, "learning_rate": 1.532733172997537e-05, "loss": 1.0242, "step": 8729 }, { "epoch": 0.3415760231630018, "grad_norm": 0.0, "learning_rate": 1.5326259250717497e-05, "loss": 1.2058, "step": 8730 }, { "epoch": 0.3416151498552312, "grad_norm": 0.0, "learning_rate": 1.5325186685926578e-05, "loss": 1.0872, "step": 8731 }, { "epoch": 0.34165427654746067, "grad_norm": 0.0, "learning_rate": 1.5324114035619832e-05, "loss": 1.0728, "step": 8732 }, { "epoch": 0.3416934032396901, "grad_norm": 0.0, "learning_rate": 1.532304129981449e-05, "loss": 1.0006, "step": 8733 }, { "epoch": 0.34173252993191955, "grad_norm": 0.0, "learning_rate": 1.532196847852777e-05, "loss": 1.0727, "step": 8734 }, { "epoch": 0.341771656624149, "grad_norm": 0.0, "learning_rate": 1.5320895571776903e-05, "loss": 1.1371, "step": 8735 }, { "epoch": 0.34181078331637843, "grad_norm": 0.0, "learning_rate": 1.5319822579579125e-05, "loss": 1.0787, "step": 8736 }, { "epoch": 0.34184991000860787, "grad_norm": 0.0, "learning_rate": 1.531874950195166e-05, "loss": 1.1111, "step": 8737 }, { "epoch": 0.3418890367008373, "grad_norm": 0.0, "learning_rate": 1.5317676338911748e-05, "loss": 1.2657, "step": 8738 }, { "epoch": 0.34192816339306675, "grad_norm": 0.0, "learning_rate": 1.5316603090476614e-05, "loss": 1.0328, "step": 8739 }, { "epoch": 0.3419672900852962, "grad_norm": 0.0, "learning_rate": 1.5315529756663495e-05, "loss": 1.0851, "step": 8740 }, { "epoch": 0.34200641677752563, "grad_norm": 0.0, "learning_rate": 1.531445633748963e-05, "loss": 1.1053, "step": 8741 }, { "epoch": 0.3420455434697551, "grad_norm": 0.0, "learning_rate": 1.5313382832972257e-05, "loss": 1.2402, "step": 8742 }, { "epoch": 0.3420846701619845, "grad_norm": 0.0, "learning_rate": 1.531230924312861e-05, "loss": 1.0619, "step": 8743 }, { "epoch": 0.34212379685421396, "grad_norm": 0.0, "learning_rate": 1.5311235567975936e-05, "loss": 1.0052, "step": 8744 }, { "epoch": 0.3421629235464434, "grad_norm": 0.0, "learning_rate": 1.5310161807531476e-05, "loss": 1.1005, "step": 8745 }, { "epoch": 0.34220205023867284, "grad_norm": 0.0, "learning_rate": 1.5309087961812468e-05, "loss": 1.0942, "step": 8746 }, { "epoch": 0.3422411769309023, "grad_norm": 0.0, "learning_rate": 1.530801403083616e-05, "loss": 1.1062, "step": 8747 }, { "epoch": 0.3422803036231317, "grad_norm": 0.0, "learning_rate": 1.53069400146198e-05, "loss": 1.1732, "step": 8748 }, { "epoch": 0.34231943031536116, "grad_norm": 0.0, "learning_rate": 1.5305865913180633e-05, "loss": 0.944, "step": 8749 }, { "epoch": 0.3423585570075906, "grad_norm": 0.0, "learning_rate": 1.5304791726535905e-05, "loss": 1.0647, "step": 8750 }, { "epoch": 0.34239768369982004, "grad_norm": 0.0, "learning_rate": 1.5303717454702872e-05, "loss": 1.1603, "step": 8751 }, { "epoch": 0.34243681039204943, "grad_norm": 0.0, "learning_rate": 1.530264309769878e-05, "loss": 1.0897, "step": 8752 }, { "epoch": 0.34247593708427887, "grad_norm": 0.0, "learning_rate": 1.530156865554089e-05, "loss": 0.9761, "step": 8753 }, { "epoch": 0.3425150637765083, "grad_norm": 0.0, "learning_rate": 1.5300494128246447e-05, "loss": 1.2428, "step": 8754 }, { "epoch": 0.34255419046873775, "grad_norm": 0.0, "learning_rate": 1.529941951583271e-05, "loss": 1.1503, "step": 8755 }, { "epoch": 0.3425933171609672, "grad_norm": 0.0, "learning_rate": 1.5298344818316937e-05, "loss": 1.2796, "step": 8756 }, { "epoch": 0.34263244385319663, "grad_norm": 0.0, "learning_rate": 1.5297270035716386e-05, "loss": 1.028, "step": 8757 }, { "epoch": 0.3426715705454261, "grad_norm": 0.0, "learning_rate": 1.5296195168048315e-05, "loss": 1.0863, "step": 8758 }, { "epoch": 0.3427106972376555, "grad_norm": 0.0, "learning_rate": 1.5295120215329985e-05, "loss": 1.0028, "step": 8759 }, { "epoch": 0.34274982392988496, "grad_norm": 0.0, "learning_rate": 1.529404517757866e-05, "loss": 1.0538, "step": 8760 }, { "epoch": 0.3427889506221144, "grad_norm": 0.0, "learning_rate": 1.5292970054811606e-05, "loss": 1.2142, "step": 8761 }, { "epoch": 0.34282807731434384, "grad_norm": 0.0, "learning_rate": 1.529189484704608e-05, "loss": 1.1408, "step": 8762 }, { "epoch": 0.3428672040065733, "grad_norm": 0.0, "learning_rate": 1.5290819554299358e-05, "loss": 1.018, "step": 8763 }, { "epoch": 0.3429063306988027, "grad_norm": 0.0, "learning_rate": 1.5289744176588704e-05, "loss": 0.9889, "step": 8764 }, { "epoch": 0.34294545739103216, "grad_norm": 0.0, "learning_rate": 1.5288668713931387e-05, "loss": 1.0557, "step": 8765 }, { "epoch": 0.3429845840832616, "grad_norm": 0.0, "learning_rate": 1.5287593166344677e-05, "loss": 1.1954, "step": 8766 }, { "epoch": 0.34302371077549104, "grad_norm": 0.0, "learning_rate": 1.5286517533845845e-05, "loss": 1.0335, "step": 8767 }, { "epoch": 0.3430628374677205, "grad_norm": 0.0, "learning_rate": 1.5285441816452168e-05, "loss": 1.1544, "step": 8768 }, { "epoch": 0.3431019641599499, "grad_norm": 0.0, "learning_rate": 1.5284366014180916e-05, "loss": 1.0999, "step": 8769 }, { "epoch": 0.34314109085217936, "grad_norm": 0.0, "learning_rate": 1.528329012704937e-05, "loss": 0.9557, "step": 8770 }, { "epoch": 0.3431802175444088, "grad_norm": 0.0, "learning_rate": 1.5282214155074805e-05, "loss": 1.1402, "step": 8771 }, { "epoch": 0.34321934423663825, "grad_norm": 0.0, "learning_rate": 1.5281138098274496e-05, "loss": 1.0716, "step": 8772 }, { "epoch": 0.3432584709288677, "grad_norm": 0.0, "learning_rate": 1.528006195666573e-05, "loss": 1.1842, "step": 8773 }, { "epoch": 0.34329759762109713, "grad_norm": 0.0, "learning_rate": 1.5278985730265782e-05, "loss": 1.1579, "step": 8774 }, { "epoch": 0.34333672431332657, "grad_norm": 0.0, "learning_rate": 1.5277909419091942e-05, "loss": 1.0832, "step": 8775 }, { "epoch": 0.343375851005556, "grad_norm": 0.0, "learning_rate": 1.527683302316149e-05, "loss": 0.9991, "step": 8776 }, { "epoch": 0.34341497769778545, "grad_norm": 0.0, "learning_rate": 1.527575654249171e-05, "loss": 1.022, "step": 8777 }, { "epoch": 0.3434541043900149, "grad_norm": 0.0, "learning_rate": 1.527467997709989e-05, "loss": 1.1412, "step": 8778 }, { "epoch": 0.34349323108224433, "grad_norm": 0.0, "learning_rate": 1.527360332700332e-05, "loss": 1.0337, "step": 8779 }, { "epoch": 0.3435323577744738, "grad_norm": 0.0, "learning_rate": 1.527252659221929e-05, "loss": 1.0904, "step": 8780 }, { "epoch": 0.34357148446670316, "grad_norm": 0.0, "learning_rate": 1.527144977276509e-05, "loss": 1.2372, "step": 8781 }, { "epoch": 0.3436106111589326, "grad_norm": 0.0, "learning_rate": 1.5270372868658012e-05, "loss": 1.2811, "step": 8782 }, { "epoch": 0.34364973785116204, "grad_norm": 0.0, "learning_rate": 1.526929587991535e-05, "loss": 1.0625, "step": 8783 }, { "epoch": 0.3436888645433915, "grad_norm": 0.0, "learning_rate": 1.5268218806554398e-05, "loss": 1.1256, "step": 8784 }, { "epoch": 0.3437279912356209, "grad_norm": 0.0, "learning_rate": 1.526714164859245e-05, "loss": 1.1305, "step": 8785 }, { "epoch": 0.34376711792785036, "grad_norm": 0.0, "learning_rate": 1.5266064406046813e-05, "loss": 1.0562, "step": 8786 }, { "epoch": 0.3438062446200798, "grad_norm": 0.0, "learning_rate": 1.5264987078934778e-05, "loss": 1.138, "step": 8787 }, { "epoch": 0.34384537131230924, "grad_norm": 0.0, "learning_rate": 1.526390966727365e-05, "loss": 1.0267, "step": 8788 }, { "epoch": 0.3438844980045387, "grad_norm": 0.0, "learning_rate": 1.5262832171080726e-05, "loss": 1.0262, "step": 8789 }, { "epoch": 0.3439236246967681, "grad_norm": 0.0, "learning_rate": 1.5261754590373312e-05, "loss": 1.1362, "step": 8790 }, { "epoch": 0.34396275138899757, "grad_norm": 0.0, "learning_rate": 1.5260676925168713e-05, "loss": 1.2211, "step": 8791 }, { "epoch": 0.344001878081227, "grad_norm": 0.0, "learning_rate": 1.5259599175484233e-05, "loss": 1.0749, "step": 8792 }, { "epoch": 0.34404100477345645, "grad_norm": 0.0, "learning_rate": 1.5258521341337185e-05, "loss": 1.0777, "step": 8793 }, { "epoch": 0.3440801314656859, "grad_norm": 0.0, "learning_rate": 1.525744342274487e-05, "loss": 1.1267, "step": 8794 }, { "epoch": 0.34411925815791533, "grad_norm": 0.0, "learning_rate": 1.5256365419724603e-05, "loss": 1.06, "step": 8795 }, { "epoch": 0.34415838485014477, "grad_norm": 0.0, "learning_rate": 1.5255287332293697e-05, "loss": 1.1667, "step": 8796 }, { "epoch": 0.3441975115423742, "grad_norm": 0.0, "learning_rate": 1.5254209160469457e-05, "loss": 1.0749, "step": 8797 }, { "epoch": 0.34423663823460365, "grad_norm": 0.0, "learning_rate": 1.5253130904269205e-05, "loss": 1.0844, "step": 8798 }, { "epoch": 0.3442757649268331, "grad_norm": 0.0, "learning_rate": 1.5252052563710252e-05, "loss": 1.0364, "step": 8799 }, { "epoch": 0.34431489161906254, "grad_norm": 0.0, "learning_rate": 1.5250974138809915e-05, "loss": 1.1268, "step": 8800 }, { "epoch": 0.344354018311292, "grad_norm": 0.0, "learning_rate": 1.5249895629585511e-05, "loss": 1.2153, "step": 8801 }, { "epoch": 0.3443931450035214, "grad_norm": 0.0, "learning_rate": 1.5248817036054366e-05, "loss": 1.1224, "step": 8802 }, { "epoch": 0.34443227169575086, "grad_norm": 0.0, "learning_rate": 1.5247738358233795e-05, "loss": 1.0793, "step": 8803 }, { "epoch": 0.3444713983879803, "grad_norm": 0.0, "learning_rate": 1.5246659596141123e-05, "loss": 1.1866, "step": 8804 }, { "epoch": 0.34451052508020974, "grad_norm": 0.0, "learning_rate": 1.5245580749793672e-05, "loss": 1.1823, "step": 8805 }, { "epoch": 0.3445496517724392, "grad_norm": 0.0, "learning_rate": 1.5244501819208766e-05, "loss": 1.0154, "step": 8806 }, { "epoch": 0.3445887784646686, "grad_norm": 0.0, "learning_rate": 1.5243422804403731e-05, "loss": 1.0181, "step": 8807 }, { "epoch": 0.34462790515689806, "grad_norm": 0.0, "learning_rate": 1.5242343705395897e-05, "loss": 1.0289, "step": 8808 }, { "epoch": 0.34466703184912745, "grad_norm": 0.0, "learning_rate": 1.5241264522202594e-05, "loss": 1.1824, "step": 8809 }, { "epoch": 0.3447061585413569, "grad_norm": 0.0, "learning_rate": 1.5240185254841149e-05, "loss": 1.2224, "step": 8810 }, { "epoch": 0.34474528523358633, "grad_norm": 0.0, "learning_rate": 1.5239105903328896e-05, "loss": 1.2039, "step": 8811 }, { "epoch": 0.34478441192581577, "grad_norm": 0.0, "learning_rate": 1.5238026467683167e-05, "loss": 1.111, "step": 8812 }, { "epoch": 0.3448235386180452, "grad_norm": 0.0, "learning_rate": 1.5236946947921296e-05, "loss": 1.1166, "step": 8813 }, { "epoch": 0.34486266531027465, "grad_norm": 0.0, "learning_rate": 1.5235867344060622e-05, "loss": 1.2039, "step": 8814 }, { "epoch": 0.3449017920025041, "grad_norm": 0.0, "learning_rate": 1.5234787656118477e-05, "loss": 1.0923, "step": 8815 }, { "epoch": 0.34494091869473353, "grad_norm": 0.0, "learning_rate": 1.5233707884112201e-05, "loss": 1.0011, "step": 8816 }, { "epoch": 0.344980045386963, "grad_norm": 0.0, "learning_rate": 1.5232628028059137e-05, "loss": 1.0426, "step": 8817 }, { "epoch": 0.3450191720791924, "grad_norm": 0.0, "learning_rate": 1.5231548087976622e-05, "loss": 1.0853, "step": 8818 }, { "epoch": 0.34505829877142186, "grad_norm": 0.0, "learning_rate": 1.5230468063882e-05, "loss": 1.2627, "step": 8819 }, { "epoch": 0.3450974254636513, "grad_norm": 0.0, "learning_rate": 1.5229387955792616e-05, "loss": 0.945, "step": 8820 }, { "epoch": 0.34513655215588074, "grad_norm": 0.0, "learning_rate": 1.5228307763725814e-05, "loss": 1.1454, "step": 8821 }, { "epoch": 0.3451756788481102, "grad_norm": 0.0, "learning_rate": 1.5227227487698941e-05, "loss": 1.0679, "step": 8822 }, { "epoch": 0.3452148055403396, "grad_norm": 0.0, "learning_rate": 1.5226147127729341e-05, "loss": 0.9807, "step": 8823 }, { "epoch": 0.34525393223256906, "grad_norm": 0.0, "learning_rate": 1.522506668383437e-05, "loss": 1.1486, "step": 8824 }, { "epoch": 0.3452930589247985, "grad_norm": 0.0, "learning_rate": 1.5223986156031379e-05, "loss": 1.009, "step": 8825 }, { "epoch": 0.34533218561702794, "grad_norm": 0.0, "learning_rate": 1.5222905544337709e-05, "loss": 1.004, "step": 8826 }, { "epoch": 0.3453713123092574, "grad_norm": 0.0, "learning_rate": 1.5221824848770728e-05, "loss": 1.0406, "step": 8827 }, { "epoch": 0.3454104390014868, "grad_norm": 0.0, "learning_rate": 1.5220744069347777e-05, "loss": 0.9498, "step": 8828 }, { "epoch": 0.34544956569371627, "grad_norm": 0.0, "learning_rate": 1.5219663206086221e-05, "loss": 1.1246, "step": 8829 }, { "epoch": 0.3454886923859457, "grad_norm": 0.0, "learning_rate": 1.5218582259003413e-05, "loss": 1.0243, "step": 8830 }, { "epoch": 0.34552781907817515, "grad_norm": 0.0, "learning_rate": 1.5217501228116715e-05, "loss": 1.0661, "step": 8831 }, { "epoch": 0.3455669457704046, "grad_norm": 0.0, "learning_rate": 1.5216420113443484e-05, "loss": 1.103, "step": 8832 }, { "epoch": 0.34560607246263403, "grad_norm": 0.0, "learning_rate": 1.5215338915001084e-05, "loss": 1.0517, "step": 8833 }, { "epoch": 0.34564519915486347, "grad_norm": 0.0, "learning_rate": 1.5214257632806875e-05, "loss": 1.1376, "step": 8834 }, { "epoch": 0.3456843258470929, "grad_norm": 0.0, "learning_rate": 1.5213176266878223e-05, "loss": 1.108, "step": 8835 }, { "epoch": 0.34572345253932235, "grad_norm": 0.0, "learning_rate": 1.5212094817232492e-05, "loss": 1.1151, "step": 8836 }, { "epoch": 0.3457625792315518, "grad_norm": 0.0, "learning_rate": 1.521101328388705e-05, "loss": 1.1578, "step": 8837 }, { "epoch": 0.3458017059237812, "grad_norm": 0.0, "learning_rate": 1.5209931666859263e-05, "loss": 1.1025, "step": 8838 }, { "epoch": 0.3458408326160106, "grad_norm": 0.0, "learning_rate": 1.5208849966166503e-05, "loss": 1.1846, "step": 8839 }, { "epoch": 0.34587995930824006, "grad_norm": 0.0, "learning_rate": 1.5207768181826138e-05, "loss": 1.0379, "step": 8840 }, { "epoch": 0.3459190860004695, "grad_norm": 0.0, "learning_rate": 1.5206686313855542e-05, "loss": 1.0441, "step": 8841 }, { "epoch": 0.34595821269269894, "grad_norm": 0.0, "learning_rate": 1.5205604362272093e-05, "loss": 1.2103, "step": 8842 }, { "epoch": 0.3459973393849284, "grad_norm": 0.0, "learning_rate": 1.5204522327093157e-05, "loss": 0.9778, "step": 8843 }, { "epoch": 0.3460364660771578, "grad_norm": 0.0, "learning_rate": 1.5203440208336114e-05, "loss": 1.0892, "step": 8844 }, { "epoch": 0.34607559276938726, "grad_norm": 0.0, "learning_rate": 1.5202358006018342e-05, "loss": 1.1503, "step": 8845 }, { "epoch": 0.3461147194616167, "grad_norm": 0.0, "learning_rate": 1.5201275720157218e-05, "loss": 0.9611, "step": 8846 }, { "epoch": 0.34615384615384615, "grad_norm": 0.0, "learning_rate": 1.5200193350770124e-05, "loss": 1.0958, "step": 8847 }, { "epoch": 0.3461929728460756, "grad_norm": 0.0, "learning_rate": 1.5199110897874442e-05, "loss": 1.1259, "step": 8848 }, { "epoch": 0.346232099538305, "grad_norm": 0.0, "learning_rate": 1.5198028361487554e-05, "loss": 1.0861, "step": 8849 }, { "epoch": 0.34627122623053447, "grad_norm": 0.0, "learning_rate": 1.519694574162684e-05, "loss": 1.1841, "step": 8850 }, { "epoch": 0.3463103529227639, "grad_norm": 0.0, "learning_rate": 1.5195863038309698e-05, "loss": 1.1237, "step": 8851 }, { "epoch": 0.34634947961499335, "grad_norm": 0.0, "learning_rate": 1.5194780251553497e-05, "loss": 1.0766, "step": 8852 }, { "epoch": 0.3463886063072228, "grad_norm": 0.0, "learning_rate": 1.5193697381375641e-05, "loss": 1.1677, "step": 8853 }, { "epoch": 0.34642773299945223, "grad_norm": 0.0, "learning_rate": 1.5192614427793513e-05, "loss": 1.0162, "step": 8854 }, { "epoch": 0.3464668596916817, "grad_norm": 0.0, "learning_rate": 1.5191531390824498e-05, "loss": 1.2028, "step": 8855 }, { "epoch": 0.3465059863839111, "grad_norm": 0.0, "learning_rate": 1.5190448270486e-05, "loss": 1.2156, "step": 8856 }, { "epoch": 0.34654511307614055, "grad_norm": 0.0, "learning_rate": 1.5189365066795405e-05, "loss": 1.0305, "step": 8857 }, { "epoch": 0.34658423976837, "grad_norm": 0.0, "learning_rate": 1.5188281779770111e-05, "loss": 0.8735, "step": 8858 }, { "epoch": 0.34662336646059944, "grad_norm": 0.0, "learning_rate": 1.518719840942751e-05, "loss": 1.1418, "step": 8859 }, { "epoch": 0.3466624931528289, "grad_norm": 0.0, "learning_rate": 1.5186114955785005e-05, "loss": 1.1048, "step": 8860 }, { "epoch": 0.3467016198450583, "grad_norm": 0.0, "learning_rate": 1.5185031418859992e-05, "loss": 1.0775, "step": 8861 }, { "epoch": 0.34674074653728776, "grad_norm": 0.0, "learning_rate": 1.5183947798669874e-05, "loss": 1.1228, "step": 8862 }, { "epoch": 0.3467798732295172, "grad_norm": 0.0, "learning_rate": 1.5182864095232044e-05, "loss": 1.0997, "step": 8863 }, { "epoch": 0.34681899992174664, "grad_norm": 0.0, "learning_rate": 1.5181780308563916e-05, "loss": 1.1309, "step": 8864 }, { "epoch": 0.3468581266139761, "grad_norm": 0.0, "learning_rate": 1.5180696438682887e-05, "loss": 0.9367, "step": 8865 }, { "epoch": 0.34689725330620547, "grad_norm": 0.0, "learning_rate": 1.5179612485606366e-05, "loss": 1.1282, "step": 8866 }, { "epoch": 0.3469363799984349, "grad_norm": 0.0, "learning_rate": 1.517852844935176e-05, "loss": 1.0394, "step": 8867 }, { "epoch": 0.34697550669066435, "grad_norm": 0.0, "learning_rate": 1.5177444329936472e-05, "loss": 0.999, "step": 8868 }, { "epoch": 0.3470146333828938, "grad_norm": 0.0, "learning_rate": 1.517636012737792e-05, "loss": 1.075, "step": 8869 }, { "epoch": 0.34705376007512323, "grad_norm": 0.0, "learning_rate": 1.517527584169351e-05, "loss": 1.0068, "step": 8870 }, { "epoch": 0.34709288676735267, "grad_norm": 0.0, "learning_rate": 1.5174191472900654e-05, "loss": 1.0734, "step": 8871 }, { "epoch": 0.3471320134595821, "grad_norm": 0.0, "learning_rate": 1.5173107021016766e-05, "loss": 1.2061, "step": 8872 }, { "epoch": 0.34717114015181155, "grad_norm": 0.0, "learning_rate": 1.5172022486059264e-05, "loss": 1.0836, "step": 8873 }, { "epoch": 0.347210266844041, "grad_norm": 0.0, "learning_rate": 1.5170937868045558e-05, "loss": 1.1163, "step": 8874 }, { "epoch": 0.34724939353627043, "grad_norm": 0.0, "learning_rate": 1.516985316699307e-05, "loss": 1.1658, "step": 8875 }, { "epoch": 0.3472885202284999, "grad_norm": 0.0, "learning_rate": 1.5168768382919222e-05, "loss": 1.0613, "step": 8876 }, { "epoch": 0.3473276469207293, "grad_norm": 0.0, "learning_rate": 1.5167683515841427e-05, "loss": 1.0327, "step": 8877 }, { "epoch": 0.34736677361295876, "grad_norm": 0.0, "learning_rate": 1.5166598565777113e-05, "loss": 1.061, "step": 8878 }, { "epoch": 0.3474059003051882, "grad_norm": 0.0, "learning_rate": 1.5165513532743696e-05, "loss": 1.1433, "step": 8879 }, { "epoch": 0.34744502699741764, "grad_norm": 0.0, "learning_rate": 1.516442841675861e-05, "loss": 1.1007, "step": 8880 }, { "epoch": 0.3474841536896471, "grad_norm": 0.0, "learning_rate": 1.516334321783927e-05, "loss": 0.9626, "step": 8881 }, { "epoch": 0.3475232803818765, "grad_norm": 0.0, "learning_rate": 1.5162257936003112e-05, "loss": 1.1428, "step": 8882 }, { "epoch": 0.34756240707410596, "grad_norm": 0.0, "learning_rate": 1.516117257126756e-05, "loss": 1.1009, "step": 8883 }, { "epoch": 0.3476015337663354, "grad_norm": 0.0, "learning_rate": 1.5160087123650041e-05, "loss": 1.1638, "step": 8884 }, { "epoch": 0.34764066045856484, "grad_norm": 0.0, "learning_rate": 1.515900159316799e-05, "loss": 1.1598, "step": 8885 }, { "epoch": 0.3476797871507943, "grad_norm": 0.0, "learning_rate": 1.5157915979838835e-05, "loss": 1.2122, "step": 8886 }, { "epoch": 0.3477189138430237, "grad_norm": 0.0, "learning_rate": 1.5156830283680017e-05, "loss": 1.1116, "step": 8887 }, { "epoch": 0.34775804053525317, "grad_norm": 0.0, "learning_rate": 1.5155744504708964e-05, "loss": 1.1711, "step": 8888 }, { "epoch": 0.3477971672274826, "grad_norm": 0.0, "learning_rate": 1.5154658642943116e-05, "loss": 1.0488, "step": 8889 }, { "epoch": 0.34783629391971205, "grad_norm": 0.0, "learning_rate": 1.515357269839991e-05, "loss": 1.1047, "step": 8890 }, { "epoch": 0.3478754206119415, "grad_norm": 0.0, "learning_rate": 1.5152486671096785e-05, "loss": 1.0899, "step": 8891 }, { "epoch": 0.34791454730417093, "grad_norm": 0.0, "learning_rate": 1.5151400561051177e-05, "loss": 1.2236, "step": 8892 }, { "epoch": 0.34795367399640037, "grad_norm": 0.0, "learning_rate": 1.5150314368280535e-05, "loss": 1.0468, "step": 8893 }, { "epoch": 0.3479928006886298, "grad_norm": 0.0, "learning_rate": 1.5149228092802296e-05, "loss": 1.0577, "step": 8894 }, { "epoch": 0.3480319273808592, "grad_norm": 0.0, "learning_rate": 1.5148141734633903e-05, "loss": 1.1078, "step": 8895 }, { "epoch": 0.34807105407308864, "grad_norm": 0.0, "learning_rate": 1.514705529379281e-05, "loss": 1.1443, "step": 8896 }, { "epoch": 0.3481101807653181, "grad_norm": 0.0, "learning_rate": 1.5145968770296456e-05, "loss": 1.1016, "step": 8897 }, { "epoch": 0.3481493074575475, "grad_norm": 0.0, "learning_rate": 1.514488216416229e-05, "loss": 1.1123, "step": 8898 }, { "epoch": 0.34818843414977696, "grad_norm": 0.0, "learning_rate": 1.5143795475407767e-05, "loss": 1.124, "step": 8899 }, { "epoch": 0.3482275608420064, "grad_norm": 0.0, "learning_rate": 1.5142708704050333e-05, "loss": 0.9843, "step": 8900 }, { "epoch": 0.34826668753423584, "grad_norm": 0.0, "learning_rate": 1.5141621850107441e-05, "loss": 1.249, "step": 8901 }, { "epoch": 0.3483058142264653, "grad_norm": 0.0, "learning_rate": 1.5140534913596545e-05, "loss": 1.2092, "step": 8902 }, { "epoch": 0.3483449409186947, "grad_norm": 0.0, "learning_rate": 1.5139447894535102e-05, "loss": 1.0971, "step": 8903 }, { "epoch": 0.34838406761092416, "grad_norm": 0.0, "learning_rate": 1.5138360792940562e-05, "loss": 1.0854, "step": 8904 }, { "epoch": 0.3484231943031536, "grad_norm": 0.0, "learning_rate": 1.5137273608830387e-05, "loss": 1.1248, "step": 8905 }, { "epoch": 0.34846232099538305, "grad_norm": 0.0, "learning_rate": 1.5136186342222038e-05, "loss": 1.0826, "step": 8906 }, { "epoch": 0.3485014476876125, "grad_norm": 0.0, "learning_rate": 1.5135098993132969e-05, "loss": 0.9735, "step": 8907 }, { "epoch": 0.3485405743798419, "grad_norm": 0.0, "learning_rate": 1.5134011561580648e-05, "loss": 1.2521, "step": 8908 }, { "epoch": 0.34857970107207137, "grad_norm": 0.0, "learning_rate": 1.5132924047582533e-05, "loss": 1.1149, "step": 8909 }, { "epoch": 0.3486188277643008, "grad_norm": 0.0, "learning_rate": 1.5131836451156089e-05, "loss": 1.1162, "step": 8910 }, { "epoch": 0.34865795445653025, "grad_norm": 0.0, "learning_rate": 1.5130748772318784e-05, "loss": 1.1298, "step": 8911 }, { "epoch": 0.3486970811487597, "grad_norm": 0.0, "learning_rate": 1.5129661011088082e-05, "loss": 1.1145, "step": 8912 }, { "epoch": 0.34873620784098913, "grad_norm": 0.0, "learning_rate": 1.512857316748145e-05, "loss": 1.0497, "step": 8913 }, { "epoch": 0.3487753345332186, "grad_norm": 0.0, "learning_rate": 1.5127485241516362e-05, "loss": 1.1701, "step": 8914 }, { "epoch": 0.348814461225448, "grad_norm": 0.0, "learning_rate": 1.5126397233210286e-05, "loss": 1.0134, "step": 8915 }, { "epoch": 0.34885358791767745, "grad_norm": 0.0, "learning_rate": 1.5125309142580694e-05, "loss": 1.1217, "step": 8916 }, { "epoch": 0.3488927146099069, "grad_norm": 0.0, "learning_rate": 1.5124220969645059e-05, "loss": 1.1891, "step": 8917 }, { "epoch": 0.34893184130213634, "grad_norm": 0.0, "learning_rate": 1.5123132714420856e-05, "loss": 1.14, "step": 8918 }, { "epoch": 0.3489709679943658, "grad_norm": 0.0, "learning_rate": 1.5122044376925559e-05, "loss": 1.0649, "step": 8919 }, { "epoch": 0.3490100946865952, "grad_norm": 0.0, "learning_rate": 1.5120955957176651e-05, "loss": 1.0465, "step": 8920 }, { "epoch": 0.34904922137882466, "grad_norm": 0.0, "learning_rate": 1.5119867455191607e-05, "loss": 1.1651, "step": 8921 }, { "epoch": 0.3490883480710541, "grad_norm": 0.0, "learning_rate": 1.5118778870987906e-05, "loss": 1.1172, "step": 8922 }, { "epoch": 0.3491274747632835, "grad_norm": 0.0, "learning_rate": 1.5117690204583033e-05, "loss": 1.1258, "step": 8923 }, { "epoch": 0.3491666014555129, "grad_norm": 0.0, "learning_rate": 1.5116601455994464e-05, "loss": 1.0944, "step": 8924 }, { "epoch": 0.34920572814774237, "grad_norm": 0.0, "learning_rate": 1.511551262523969e-05, "loss": 1.1361, "step": 8925 }, { "epoch": 0.3492448548399718, "grad_norm": 0.0, "learning_rate": 1.5114423712336193e-05, "loss": 1.1822, "step": 8926 }, { "epoch": 0.34928398153220125, "grad_norm": 0.0, "learning_rate": 1.5113334717301458e-05, "loss": 1.1159, "step": 8927 }, { "epoch": 0.3493231082244307, "grad_norm": 0.0, "learning_rate": 1.5112245640152975e-05, "loss": 1.1276, "step": 8928 }, { "epoch": 0.34936223491666013, "grad_norm": 0.0, "learning_rate": 1.5111156480908236e-05, "loss": 0.8925, "step": 8929 }, { "epoch": 0.34940136160888957, "grad_norm": 0.0, "learning_rate": 1.5110067239584725e-05, "loss": 0.9465, "step": 8930 }, { "epoch": 0.349440488301119, "grad_norm": 0.0, "learning_rate": 1.5108977916199941e-05, "loss": 1.2056, "step": 8931 }, { "epoch": 0.34947961499334845, "grad_norm": 0.0, "learning_rate": 1.5107888510771374e-05, "loss": 1.1422, "step": 8932 }, { "epoch": 0.3495187416855779, "grad_norm": 0.0, "learning_rate": 1.5106799023316514e-05, "loss": 1.1007, "step": 8933 }, { "epoch": 0.34955786837780733, "grad_norm": 0.0, "learning_rate": 1.5105709453852863e-05, "loss": 1.0708, "step": 8934 }, { "epoch": 0.3495969950700368, "grad_norm": 0.0, "learning_rate": 1.5104619802397916e-05, "loss": 1.1021, "step": 8935 }, { "epoch": 0.3496361217622662, "grad_norm": 0.0, "learning_rate": 1.5103530068969172e-05, "loss": 1.16, "step": 8936 }, { "epoch": 0.34967524845449566, "grad_norm": 0.0, "learning_rate": 1.5102440253584128e-05, "loss": 1.0784, "step": 8937 }, { "epoch": 0.3497143751467251, "grad_norm": 0.0, "learning_rate": 1.510135035626029e-05, "loss": 1.1576, "step": 8938 }, { "epoch": 0.34975350183895454, "grad_norm": 0.0, "learning_rate": 1.5100260377015155e-05, "loss": 1.0701, "step": 8939 }, { "epoch": 0.349792628531184, "grad_norm": 0.0, "learning_rate": 1.509917031586623e-05, "loss": 1.2122, "step": 8940 }, { "epoch": 0.3498317552234134, "grad_norm": 0.0, "learning_rate": 1.5098080172831022e-05, "loss": 0.9686, "step": 8941 }, { "epoch": 0.34987088191564286, "grad_norm": 0.0, "learning_rate": 1.5096989947927032e-05, "loss": 1.186, "step": 8942 }, { "epoch": 0.3499100086078723, "grad_norm": 0.0, "learning_rate": 1.509589964117177e-05, "loss": 1.0649, "step": 8943 }, { "epoch": 0.34994913530010174, "grad_norm": 0.0, "learning_rate": 1.5094809252582744e-05, "loss": 1.1041, "step": 8944 }, { "epoch": 0.3499882619923312, "grad_norm": 0.0, "learning_rate": 1.509371878217747e-05, "loss": 1.2971, "step": 8945 }, { "epoch": 0.3500273886845606, "grad_norm": 0.0, "learning_rate": 1.5092628229973452e-05, "loss": 1.0524, "step": 8946 }, { "epoch": 0.35006651537679007, "grad_norm": 0.0, "learning_rate": 1.5091537595988208e-05, "loss": 1.0392, "step": 8947 }, { "epoch": 0.3501056420690195, "grad_norm": 0.0, "learning_rate": 1.5090446880239249e-05, "loss": 1.2343, "step": 8948 }, { "epoch": 0.35014476876124895, "grad_norm": 0.0, "learning_rate": 1.5089356082744093e-05, "loss": 0.9917, "step": 8949 }, { "epoch": 0.3501838954534784, "grad_norm": 0.0, "learning_rate": 1.5088265203520254e-05, "loss": 1.1761, "step": 8950 }, { "epoch": 0.3502230221457078, "grad_norm": 0.0, "learning_rate": 1.5087174242585251e-05, "loss": 1.1575, "step": 8951 }, { "epoch": 0.3502621488379372, "grad_norm": 0.0, "learning_rate": 1.5086083199956608e-05, "loss": 1.0918, "step": 8952 }, { "epoch": 0.35030127553016666, "grad_norm": 0.0, "learning_rate": 1.5084992075651838e-05, "loss": 1.1994, "step": 8953 }, { "epoch": 0.3503404022223961, "grad_norm": 0.0, "learning_rate": 1.5083900869688473e-05, "loss": 1.1102, "step": 8954 }, { "epoch": 0.35037952891462554, "grad_norm": 0.0, "learning_rate": 1.5082809582084023e-05, "loss": 1.1156, "step": 8955 }, { "epoch": 0.350418655606855, "grad_norm": 0.0, "learning_rate": 1.5081718212856025e-05, "loss": 1.1978, "step": 8956 }, { "epoch": 0.3504577822990844, "grad_norm": 0.0, "learning_rate": 1.5080626762021997e-05, "loss": 1.1661, "step": 8957 }, { "epoch": 0.35049690899131386, "grad_norm": 0.0, "learning_rate": 1.507953522959947e-05, "loss": 1.1302, "step": 8958 }, { "epoch": 0.3505360356835433, "grad_norm": 0.0, "learning_rate": 1.5078443615605976e-05, "loss": 1.1003, "step": 8959 }, { "epoch": 0.35057516237577274, "grad_norm": 0.0, "learning_rate": 1.507735192005904e-05, "loss": 1.0915, "step": 8960 }, { "epoch": 0.3506142890680022, "grad_norm": 0.0, "learning_rate": 1.5076260142976194e-05, "loss": 1.2153, "step": 8961 }, { "epoch": 0.3506534157602316, "grad_norm": 0.0, "learning_rate": 1.5075168284374968e-05, "loss": 1.0552, "step": 8962 }, { "epoch": 0.35069254245246106, "grad_norm": 0.0, "learning_rate": 1.50740763442729e-05, "loss": 1.2354, "step": 8963 }, { "epoch": 0.3507316691446905, "grad_norm": 0.0, "learning_rate": 1.5072984322687526e-05, "loss": 1.1366, "step": 8964 }, { "epoch": 0.35077079583691995, "grad_norm": 0.0, "learning_rate": 1.5071892219636382e-05, "loss": 1.1124, "step": 8965 }, { "epoch": 0.3508099225291494, "grad_norm": 0.0, "learning_rate": 1.5070800035136998e-05, "loss": 1.1413, "step": 8966 }, { "epoch": 0.35084904922137883, "grad_norm": 0.0, "learning_rate": 1.5069707769206926e-05, "loss": 1.0641, "step": 8967 }, { "epoch": 0.35088817591360827, "grad_norm": 0.0, "learning_rate": 1.5068615421863697e-05, "loss": 0.9123, "step": 8968 }, { "epoch": 0.3509273026058377, "grad_norm": 0.0, "learning_rate": 1.5067522993124856e-05, "loss": 0.9941, "step": 8969 }, { "epoch": 0.35096642929806715, "grad_norm": 0.0, "learning_rate": 1.5066430483007949e-05, "loss": 1.1542, "step": 8970 }, { "epoch": 0.3510055559902966, "grad_norm": 0.0, "learning_rate": 1.5065337891530511e-05, "loss": 1.1447, "step": 8971 }, { "epoch": 0.35104468268252603, "grad_norm": 0.0, "learning_rate": 1.5064245218710099e-05, "loss": 1.1171, "step": 8972 }, { "epoch": 0.3510838093747555, "grad_norm": 0.0, "learning_rate": 1.506315246456425e-05, "loss": 1.058, "step": 8973 }, { "epoch": 0.3511229360669849, "grad_norm": 0.0, "learning_rate": 1.5062059629110518e-05, "loss": 1.128, "step": 8974 }, { "epoch": 0.35116206275921436, "grad_norm": 0.0, "learning_rate": 1.5060966712366454e-05, "loss": 1.1346, "step": 8975 }, { "epoch": 0.3512011894514438, "grad_norm": 0.0, "learning_rate": 1.5059873714349606e-05, "loss": 1.1942, "step": 8976 }, { "epoch": 0.35124031614367324, "grad_norm": 0.0, "learning_rate": 1.5058780635077525e-05, "loss": 1.139, "step": 8977 }, { "epoch": 0.3512794428359027, "grad_norm": 0.0, "learning_rate": 1.5057687474567769e-05, "loss": 1.2609, "step": 8978 }, { "epoch": 0.3513185695281321, "grad_norm": 0.0, "learning_rate": 1.5056594232837884e-05, "loss": 1.1246, "step": 8979 }, { "epoch": 0.3513576962203615, "grad_norm": 0.0, "learning_rate": 1.5055500909905437e-05, "loss": 1.2423, "step": 8980 }, { "epoch": 0.35139682291259094, "grad_norm": 0.0, "learning_rate": 1.505440750578798e-05, "loss": 1.0763, "step": 8981 }, { "epoch": 0.3514359496048204, "grad_norm": 0.0, "learning_rate": 1.505331402050307e-05, "loss": 1.1002, "step": 8982 }, { "epoch": 0.3514750762970498, "grad_norm": 0.0, "learning_rate": 1.5052220454068267e-05, "loss": 1.2033, "step": 8983 }, { "epoch": 0.35151420298927927, "grad_norm": 0.0, "learning_rate": 1.5051126806501137e-05, "loss": 1.1677, "step": 8984 }, { "epoch": 0.3515533296815087, "grad_norm": 0.0, "learning_rate": 1.505003307781924e-05, "loss": 1.1642, "step": 8985 }, { "epoch": 0.35159245637373815, "grad_norm": 0.0, "learning_rate": 1.504893926804014e-05, "loss": 1.0706, "step": 8986 }, { "epoch": 0.3516315830659676, "grad_norm": 0.0, "learning_rate": 1.5047845377181403e-05, "loss": 1.1858, "step": 8987 }, { "epoch": 0.35167070975819703, "grad_norm": 0.0, "learning_rate": 1.5046751405260592e-05, "loss": 1.0515, "step": 8988 }, { "epoch": 0.35170983645042647, "grad_norm": 0.0, "learning_rate": 1.504565735229528e-05, "loss": 1.1455, "step": 8989 }, { "epoch": 0.3517489631426559, "grad_norm": 0.0, "learning_rate": 1.5044563218303032e-05, "loss": 0.9571, "step": 8990 }, { "epoch": 0.35178808983488535, "grad_norm": 0.0, "learning_rate": 1.5043469003301417e-05, "loss": 1.1066, "step": 8991 }, { "epoch": 0.3518272165271148, "grad_norm": 0.0, "learning_rate": 1.5042374707308014e-05, "loss": 1.0579, "step": 8992 }, { "epoch": 0.35186634321934424, "grad_norm": 0.0, "learning_rate": 1.5041280330340391e-05, "loss": 1.0148, "step": 8993 }, { "epoch": 0.3519054699115737, "grad_norm": 0.0, "learning_rate": 1.5040185872416122e-05, "loss": 1.0348, "step": 8994 }, { "epoch": 0.3519445966038031, "grad_norm": 0.0, "learning_rate": 1.5039091333552783e-05, "loss": 1.1579, "step": 8995 }, { "epoch": 0.35198372329603256, "grad_norm": 0.0, "learning_rate": 1.5037996713767956e-05, "loss": 0.9099, "step": 8996 }, { "epoch": 0.352022849988262, "grad_norm": 0.0, "learning_rate": 1.5036902013079209e-05, "loss": 1.2559, "step": 8997 }, { "epoch": 0.35206197668049144, "grad_norm": 0.0, "learning_rate": 1.503580723150413e-05, "loss": 1.076, "step": 8998 }, { "epoch": 0.3521011033727209, "grad_norm": 0.0, "learning_rate": 1.5034712369060297e-05, "loss": 1.018, "step": 8999 }, { "epoch": 0.3521402300649503, "grad_norm": 0.0, "learning_rate": 1.5033617425765288e-05, "loss": 1.0664, "step": 9000 }, { "epoch": 0.35217935675717976, "grad_norm": 0.0, "learning_rate": 1.5032522401636698e-05, "loss": 0.9784, "step": 9001 }, { "epoch": 0.3522184834494092, "grad_norm": 0.0, "learning_rate": 1.5031427296692096e-05, "loss": 1.1314, "step": 9002 }, { "epoch": 0.35225761014163864, "grad_norm": 0.0, "learning_rate": 1.5030332110949081e-05, "loss": 1.1762, "step": 9003 }, { "epoch": 0.3522967368338681, "grad_norm": 0.0, "learning_rate": 1.5029236844425236e-05, "loss": 1.2505, "step": 9004 }, { "epoch": 0.3523358635260975, "grad_norm": 0.0, "learning_rate": 1.5028141497138151e-05, "loss": 1.2623, "step": 9005 }, { "epoch": 0.35237499021832697, "grad_norm": 0.0, "learning_rate": 1.5027046069105411e-05, "loss": 1.049, "step": 9006 }, { "epoch": 0.3524141169105564, "grad_norm": 0.0, "learning_rate": 1.502595056034461e-05, "loss": 1.0488, "step": 9007 }, { "epoch": 0.3524532436027858, "grad_norm": 0.0, "learning_rate": 1.5024854970873342e-05, "loss": 1.0701, "step": 9008 }, { "epoch": 0.35249237029501523, "grad_norm": 0.0, "learning_rate": 1.5023759300709201e-05, "loss": 1.001, "step": 9009 }, { "epoch": 0.3525314969872447, "grad_norm": 0.0, "learning_rate": 1.5022663549869781e-05, "loss": 1.2239, "step": 9010 }, { "epoch": 0.3525706236794741, "grad_norm": 0.0, "learning_rate": 1.5021567718372674e-05, "loss": 1.0557, "step": 9011 }, { "epoch": 0.35260975037170356, "grad_norm": 0.0, "learning_rate": 1.5020471806235485e-05, "loss": 1.1531, "step": 9012 }, { "epoch": 0.352648877063933, "grad_norm": 0.0, "learning_rate": 1.501937581347581e-05, "loss": 1.2322, "step": 9013 }, { "epoch": 0.35268800375616244, "grad_norm": 0.0, "learning_rate": 1.5018279740111247e-05, "loss": 0.9839, "step": 9014 }, { "epoch": 0.3527271304483919, "grad_norm": 0.0, "learning_rate": 1.5017183586159401e-05, "loss": 1.018, "step": 9015 }, { "epoch": 0.3527662571406213, "grad_norm": 0.0, "learning_rate": 1.5016087351637874e-05, "loss": 1.1202, "step": 9016 }, { "epoch": 0.35280538383285076, "grad_norm": 0.0, "learning_rate": 1.501499103656427e-05, "loss": 1.097, "step": 9017 }, { "epoch": 0.3528445105250802, "grad_norm": 0.0, "learning_rate": 1.5013894640956193e-05, "loss": 1.183, "step": 9018 }, { "epoch": 0.35288363721730964, "grad_norm": 0.0, "learning_rate": 1.501279816483125e-05, "loss": 1.0076, "step": 9019 }, { "epoch": 0.3529227639095391, "grad_norm": 0.0, "learning_rate": 1.5011701608207053e-05, "loss": 1.0513, "step": 9020 }, { "epoch": 0.3529618906017685, "grad_norm": 0.0, "learning_rate": 1.5010604971101206e-05, "loss": 1.1089, "step": 9021 }, { "epoch": 0.35300101729399797, "grad_norm": 0.0, "learning_rate": 1.5009508253531321e-05, "loss": 1.2885, "step": 9022 }, { "epoch": 0.3530401439862274, "grad_norm": 0.0, "learning_rate": 1.500841145551501e-05, "loss": 1.1013, "step": 9023 }, { "epoch": 0.35307927067845685, "grad_norm": 0.0, "learning_rate": 1.5007314577069889e-05, "loss": 1.093, "step": 9024 }, { "epoch": 0.3531183973706863, "grad_norm": 0.0, "learning_rate": 1.500621761821357e-05, "loss": 1.159, "step": 9025 }, { "epoch": 0.35315752406291573, "grad_norm": 0.0, "learning_rate": 1.500512057896367e-05, "loss": 1.1206, "step": 9026 }, { "epoch": 0.35319665075514517, "grad_norm": 0.0, "learning_rate": 1.5004023459337804e-05, "loss": 1.1347, "step": 9027 }, { "epoch": 0.3532357774473746, "grad_norm": 0.0, "learning_rate": 1.5002926259353592e-05, "loss": 1.056, "step": 9028 }, { "epoch": 0.35327490413960405, "grad_norm": 0.0, "learning_rate": 1.5001828979028652e-05, "loss": 0.9846, "step": 9029 }, { "epoch": 0.3533140308318335, "grad_norm": 0.0, "learning_rate": 1.5000731618380608e-05, "loss": 1.1287, "step": 9030 }, { "epoch": 0.35335315752406293, "grad_norm": 0.0, "learning_rate": 1.4999634177427081e-05, "loss": 0.9115, "step": 9031 }, { "epoch": 0.3533922842162924, "grad_norm": 0.0, "learning_rate": 1.4998536656185693e-05, "loss": 1.1217, "step": 9032 }, { "epoch": 0.3534314109085218, "grad_norm": 0.0, "learning_rate": 1.499743905467407e-05, "loss": 1.1219, "step": 9033 }, { "epoch": 0.35347053760075126, "grad_norm": 0.0, "learning_rate": 1.499634137290984e-05, "loss": 1.1382, "step": 9034 }, { "epoch": 0.3535096642929807, "grad_norm": 0.0, "learning_rate": 1.4995243610910625e-05, "loss": 1.1816, "step": 9035 }, { "epoch": 0.35354879098521014, "grad_norm": 0.0, "learning_rate": 1.4994145768694057e-05, "loss": 1.175, "step": 9036 }, { "epoch": 0.3535879176774395, "grad_norm": 0.0, "learning_rate": 1.4993047846277769e-05, "loss": 1.0234, "step": 9037 }, { "epoch": 0.35362704436966896, "grad_norm": 0.0, "learning_rate": 1.4991949843679388e-05, "loss": 1.1005, "step": 9038 }, { "epoch": 0.3536661710618984, "grad_norm": 0.0, "learning_rate": 1.4990851760916544e-05, "loss": 1.2023, "step": 9039 }, { "epoch": 0.35370529775412785, "grad_norm": 0.0, "learning_rate": 1.498975359800688e-05, "loss": 1.0856, "step": 9040 }, { "epoch": 0.3537444244463573, "grad_norm": 0.0, "learning_rate": 1.4988655354968025e-05, "loss": 1.0915, "step": 9041 }, { "epoch": 0.3537835511385867, "grad_norm": 0.0, "learning_rate": 1.4987557031817613e-05, "loss": 1.1433, "step": 9042 }, { "epoch": 0.35382267783081617, "grad_norm": 0.0, "learning_rate": 1.4986458628573285e-05, "loss": 1.2031, "step": 9043 }, { "epoch": 0.3538618045230456, "grad_norm": 0.0, "learning_rate": 1.4985360145252684e-05, "loss": 1.1115, "step": 9044 }, { "epoch": 0.35390093121527505, "grad_norm": 0.0, "learning_rate": 1.4984261581873442e-05, "loss": 1.0562, "step": 9045 }, { "epoch": 0.3539400579075045, "grad_norm": 0.0, "learning_rate": 1.4983162938453203e-05, "loss": 1.0098, "step": 9046 }, { "epoch": 0.35397918459973393, "grad_norm": 0.0, "learning_rate": 1.4982064215009617e-05, "loss": 1.0624, "step": 9047 }, { "epoch": 0.3540183112919634, "grad_norm": 0.0, "learning_rate": 1.498096541156032e-05, "loss": 1.0833, "step": 9048 }, { "epoch": 0.3540574379841928, "grad_norm": 0.0, "learning_rate": 1.497986652812296e-05, "loss": 1.0228, "step": 9049 }, { "epoch": 0.35409656467642225, "grad_norm": 0.0, "learning_rate": 1.4978767564715185e-05, "loss": 0.9262, "step": 9050 }, { "epoch": 0.3541356913686517, "grad_norm": 0.0, "learning_rate": 1.4977668521354639e-05, "loss": 1.0147, "step": 9051 }, { "epoch": 0.35417481806088114, "grad_norm": 0.0, "learning_rate": 1.4976569398058975e-05, "loss": 1.1767, "step": 9052 }, { "epoch": 0.3542139447531106, "grad_norm": 0.0, "learning_rate": 1.4975470194845842e-05, "loss": 1.1422, "step": 9053 }, { "epoch": 0.35425307144534, "grad_norm": 0.0, "learning_rate": 1.4974370911732895e-05, "loss": 1.2357, "step": 9054 }, { "epoch": 0.35429219813756946, "grad_norm": 0.0, "learning_rate": 1.497327154873778e-05, "loss": 1.1083, "step": 9055 }, { "epoch": 0.3543313248297989, "grad_norm": 0.0, "learning_rate": 1.4972172105878158e-05, "loss": 1.0016, "step": 9056 }, { "epoch": 0.35437045152202834, "grad_norm": 0.0, "learning_rate": 1.4971072583171684e-05, "loss": 1.1409, "step": 9057 }, { "epoch": 0.3544095782142578, "grad_norm": 0.0, "learning_rate": 1.4969972980636009e-05, "loss": 1.0561, "step": 9058 }, { "epoch": 0.3544487049064872, "grad_norm": 0.0, "learning_rate": 1.4968873298288801e-05, "loss": 1.0748, "step": 9059 }, { "epoch": 0.35448783159871666, "grad_norm": 0.0, "learning_rate": 1.4967773536147712e-05, "loss": 1.12, "step": 9060 }, { "epoch": 0.3545269582909461, "grad_norm": 0.0, "learning_rate": 1.4966673694230406e-05, "loss": 1.1088, "step": 9061 }, { "epoch": 0.35456608498317554, "grad_norm": 0.0, "learning_rate": 1.496557377255454e-05, "loss": 1.1671, "step": 9062 }, { "epoch": 0.354605211675405, "grad_norm": 0.0, "learning_rate": 1.4964473771137784e-05, "loss": 1.1849, "step": 9063 }, { "epoch": 0.3546443383676344, "grad_norm": 0.0, "learning_rate": 1.49633736899978e-05, "loss": 1.0842, "step": 9064 }, { "epoch": 0.3546834650598638, "grad_norm": 0.0, "learning_rate": 1.4962273529152257e-05, "loss": 0.9742, "step": 9065 }, { "epoch": 0.35472259175209325, "grad_norm": 0.0, "learning_rate": 1.4961173288618814e-05, "loss": 1.1746, "step": 9066 }, { "epoch": 0.3547617184443227, "grad_norm": 0.0, "learning_rate": 1.4960072968415146e-05, "loss": 1.1733, "step": 9067 }, { "epoch": 0.35480084513655213, "grad_norm": 0.0, "learning_rate": 1.4958972568558925e-05, "loss": 1.1403, "step": 9068 }, { "epoch": 0.3548399718287816, "grad_norm": 0.0, "learning_rate": 1.4957872089067815e-05, "loss": 1.126, "step": 9069 }, { "epoch": 0.354879098521011, "grad_norm": 0.0, "learning_rate": 1.4956771529959495e-05, "loss": 1.2935, "step": 9070 }, { "epoch": 0.35491822521324046, "grad_norm": 0.0, "learning_rate": 1.4955670891251633e-05, "loss": 1.1447, "step": 9071 }, { "epoch": 0.3549573519054699, "grad_norm": 0.0, "learning_rate": 1.4954570172961906e-05, "loss": 1.213, "step": 9072 }, { "epoch": 0.35499647859769934, "grad_norm": 0.0, "learning_rate": 1.495346937510799e-05, "loss": 1.0183, "step": 9073 }, { "epoch": 0.3550356052899288, "grad_norm": 0.0, "learning_rate": 1.4952368497707566e-05, "loss": 1.0537, "step": 9074 }, { "epoch": 0.3550747319821582, "grad_norm": 0.0, "learning_rate": 1.4951267540778305e-05, "loss": 1.077, "step": 9075 }, { "epoch": 0.35511385867438766, "grad_norm": 0.0, "learning_rate": 1.4950166504337896e-05, "loss": 1.0878, "step": 9076 }, { "epoch": 0.3551529853666171, "grad_norm": 0.0, "learning_rate": 1.4949065388404014e-05, "loss": 1.0773, "step": 9077 }, { "epoch": 0.35519211205884654, "grad_norm": 0.0, "learning_rate": 1.4947964192994343e-05, "loss": 1.0812, "step": 9078 }, { "epoch": 0.355231238751076, "grad_norm": 0.0, "learning_rate": 1.4946862918126567e-05, "loss": 1.0517, "step": 9079 }, { "epoch": 0.3552703654433054, "grad_norm": 0.0, "learning_rate": 1.4945761563818372e-05, "loss": 1.1497, "step": 9080 }, { "epoch": 0.35530949213553487, "grad_norm": 0.0, "learning_rate": 1.4944660130087445e-05, "loss": 1.1167, "step": 9081 }, { "epoch": 0.3553486188277643, "grad_norm": 0.0, "learning_rate": 1.494355861695147e-05, "loss": 1.1698, "step": 9082 }, { "epoch": 0.35538774551999375, "grad_norm": 0.0, "learning_rate": 1.494245702442814e-05, "loss": 0.9946, "step": 9083 }, { "epoch": 0.3554268722122232, "grad_norm": 0.0, "learning_rate": 1.4941355352535142e-05, "loss": 1.0573, "step": 9084 }, { "epoch": 0.35546599890445263, "grad_norm": 0.0, "learning_rate": 1.4940253601290171e-05, "loss": 1.1801, "step": 9085 }, { "epoch": 0.35550512559668207, "grad_norm": 0.0, "learning_rate": 1.4939151770710915e-05, "loss": 1.1784, "step": 9086 }, { "epoch": 0.3555442522889115, "grad_norm": 0.0, "learning_rate": 1.4938049860815072e-05, "loss": 1.0785, "step": 9087 }, { "epoch": 0.35558337898114095, "grad_norm": 0.0, "learning_rate": 1.4936947871620338e-05, "loss": 1.1053, "step": 9088 }, { "epoch": 0.3556225056733704, "grad_norm": 0.0, "learning_rate": 1.4935845803144404e-05, "loss": 1.0602, "step": 9089 }, { "epoch": 0.35566163236559983, "grad_norm": 0.0, "learning_rate": 1.4934743655404972e-05, "loss": 1.1323, "step": 9090 }, { "epoch": 0.3557007590578293, "grad_norm": 0.0, "learning_rate": 1.493364142841974e-05, "loss": 1.1973, "step": 9091 }, { "epoch": 0.3557398857500587, "grad_norm": 0.0, "learning_rate": 1.493253912220641e-05, "loss": 1.1225, "step": 9092 }, { "epoch": 0.35577901244228816, "grad_norm": 0.0, "learning_rate": 1.4931436736782682e-05, "loss": 1.1642, "step": 9093 }, { "epoch": 0.35581813913451754, "grad_norm": 0.0, "learning_rate": 1.4930334272166263e-05, "loss": 1.1396, "step": 9094 }, { "epoch": 0.355857265826747, "grad_norm": 0.0, "learning_rate": 1.4929231728374847e-05, "loss": 0.9605, "step": 9095 }, { "epoch": 0.3558963925189764, "grad_norm": 0.0, "learning_rate": 1.492812910542615e-05, "loss": 1.032, "step": 9096 }, { "epoch": 0.35593551921120586, "grad_norm": 0.0, "learning_rate": 1.4927026403337876e-05, "loss": 1.0682, "step": 9097 }, { "epoch": 0.3559746459034353, "grad_norm": 0.0, "learning_rate": 1.492592362212773e-05, "loss": 1.1571, "step": 9098 }, { "epoch": 0.35601377259566475, "grad_norm": 0.0, "learning_rate": 1.4924820761813426e-05, "loss": 1.0376, "step": 9099 }, { "epoch": 0.3560528992878942, "grad_norm": 0.0, "learning_rate": 1.4923717822412666e-05, "loss": 1.1838, "step": 9100 }, { "epoch": 0.3560920259801236, "grad_norm": 0.0, "learning_rate": 1.4922614803943172e-05, "loss": 1.0459, "step": 9101 }, { "epoch": 0.35613115267235307, "grad_norm": 0.0, "learning_rate": 1.4921511706422652e-05, "loss": 1.0875, "step": 9102 }, { "epoch": 0.3561702793645825, "grad_norm": 0.0, "learning_rate": 1.492040852986882e-05, "loss": 1.0823, "step": 9103 }, { "epoch": 0.35620940605681195, "grad_norm": 0.0, "learning_rate": 1.4919305274299392e-05, "loss": 1.0005, "step": 9104 }, { "epoch": 0.3562485327490414, "grad_norm": 0.0, "learning_rate": 1.4918201939732087e-05, "loss": 1.1624, "step": 9105 }, { "epoch": 0.35628765944127083, "grad_norm": 0.0, "learning_rate": 1.4917098526184623e-05, "loss": 0.9651, "step": 9106 }, { "epoch": 0.3563267861335003, "grad_norm": 0.0, "learning_rate": 1.4915995033674715e-05, "loss": 1.0938, "step": 9107 }, { "epoch": 0.3563659128257297, "grad_norm": 0.0, "learning_rate": 1.491489146222009e-05, "loss": 1.1408, "step": 9108 }, { "epoch": 0.35640503951795915, "grad_norm": 0.0, "learning_rate": 1.4913787811838463e-05, "loss": 0.9813, "step": 9109 }, { "epoch": 0.3564441662101886, "grad_norm": 0.0, "learning_rate": 1.4912684082547564e-05, "loss": 1.103, "step": 9110 }, { "epoch": 0.35648329290241804, "grad_norm": 0.0, "learning_rate": 1.4911580274365112e-05, "loss": 1.0804, "step": 9111 }, { "epoch": 0.3565224195946475, "grad_norm": 0.0, "learning_rate": 1.4910476387308839e-05, "loss": 0.9644, "step": 9112 }, { "epoch": 0.3565615462868769, "grad_norm": 0.0, "learning_rate": 1.4909372421396464e-05, "loss": 1.1732, "step": 9113 }, { "epoch": 0.35660067297910636, "grad_norm": 0.0, "learning_rate": 1.4908268376645723e-05, "loss": 1.1055, "step": 9114 }, { "epoch": 0.3566397996713358, "grad_norm": 0.0, "learning_rate": 1.4907164253074342e-05, "loss": 1.0409, "step": 9115 }, { "epoch": 0.35667892636356524, "grad_norm": 0.0, "learning_rate": 1.4906060050700052e-05, "loss": 1.0679, "step": 9116 }, { "epoch": 0.3567180530557947, "grad_norm": 0.0, "learning_rate": 1.4904955769540585e-05, "loss": 1.128, "step": 9117 }, { "epoch": 0.3567571797480241, "grad_norm": 0.0, "learning_rate": 1.4903851409613674e-05, "loss": 1.1481, "step": 9118 }, { "epoch": 0.35679630644025356, "grad_norm": 0.0, "learning_rate": 1.4902746970937053e-05, "loss": 1.0767, "step": 9119 }, { "epoch": 0.356835433132483, "grad_norm": 0.0, "learning_rate": 1.4901642453528462e-05, "loss": 1.131, "step": 9120 }, { "epoch": 0.35687455982471245, "grad_norm": 0.0, "learning_rate": 1.4900537857405635e-05, "loss": 1.1819, "step": 9121 }, { "epoch": 0.35691368651694183, "grad_norm": 0.0, "learning_rate": 1.4899433182586307e-05, "loss": 1.1086, "step": 9122 }, { "epoch": 0.35695281320917127, "grad_norm": 0.0, "learning_rate": 1.4898328429088227e-05, "loss": 1.1031, "step": 9123 }, { "epoch": 0.3569919399014007, "grad_norm": 0.0, "learning_rate": 1.4897223596929127e-05, "loss": 1.1606, "step": 9124 }, { "epoch": 0.35703106659363015, "grad_norm": 0.0, "learning_rate": 1.4896118686126752e-05, "loss": 1.0609, "step": 9125 }, { "epoch": 0.3570701932858596, "grad_norm": 0.0, "learning_rate": 1.4895013696698847e-05, "loss": 1.0718, "step": 9126 }, { "epoch": 0.35710931997808903, "grad_norm": 0.0, "learning_rate": 1.489390862866316e-05, "loss": 1.0566, "step": 9127 }, { "epoch": 0.3571484466703185, "grad_norm": 0.0, "learning_rate": 1.4892803482037425e-05, "loss": 1.0907, "step": 9128 }, { "epoch": 0.3571875733625479, "grad_norm": 0.0, "learning_rate": 1.4891698256839402e-05, "loss": 1.1147, "step": 9129 }, { "epoch": 0.35722670005477736, "grad_norm": 0.0, "learning_rate": 1.4890592953086835e-05, "loss": 1.11, "step": 9130 }, { "epoch": 0.3572658267470068, "grad_norm": 0.0, "learning_rate": 1.4889487570797471e-05, "loss": 1.2126, "step": 9131 }, { "epoch": 0.35730495343923624, "grad_norm": 0.0, "learning_rate": 1.4888382109989065e-05, "loss": 1.3104, "step": 9132 }, { "epoch": 0.3573440801314657, "grad_norm": 0.0, "learning_rate": 1.4887276570679368e-05, "loss": 0.9772, "step": 9133 }, { "epoch": 0.3573832068236951, "grad_norm": 0.0, "learning_rate": 1.4886170952886132e-05, "loss": 0.9851, "step": 9134 }, { "epoch": 0.35742233351592456, "grad_norm": 0.0, "learning_rate": 1.4885065256627115e-05, "loss": 0.9661, "step": 9135 }, { "epoch": 0.357461460208154, "grad_norm": 0.0, "learning_rate": 1.488395948192007e-05, "loss": 1.167, "step": 9136 }, { "epoch": 0.35750058690038344, "grad_norm": 0.0, "learning_rate": 1.4882853628782756e-05, "loss": 1.0837, "step": 9137 }, { "epoch": 0.3575397135926129, "grad_norm": 0.0, "learning_rate": 1.4881747697232931e-05, "loss": 1.0201, "step": 9138 }, { "epoch": 0.3575788402848423, "grad_norm": 0.0, "learning_rate": 1.4880641687288356e-05, "loss": 1.1738, "step": 9139 }, { "epoch": 0.35761796697707177, "grad_norm": 0.0, "learning_rate": 1.487953559896679e-05, "loss": 1.1015, "step": 9140 }, { "epoch": 0.3576570936693012, "grad_norm": 0.0, "learning_rate": 1.4878429432285996e-05, "loss": 1.0915, "step": 9141 }, { "epoch": 0.35769622036153065, "grad_norm": 0.0, "learning_rate": 1.4877323187263738e-05, "loss": 1.0241, "step": 9142 }, { "epoch": 0.3577353470537601, "grad_norm": 0.0, "learning_rate": 1.4876216863917785e-05, "loss": 1.2347, "step": 9143 }, { "epoch": 0.35777447374598953, "grad_norm": 0.0, "learning_rate": 1.4875110462265898e-05, "loss": 0.9811, "step": 9144 }, { "epoch": 0.35781360043821897, "grad_norm": 0.0, "learning_rate": 1.4874003982325844e-05, "loss": 1.1505, "step": 9145 }, { "epoch": 0.3578527271304484, "grad_norm": 0.0, "learning_rate": 1.4872897424115395e-05, "loss": 1.1487, "step": 9146 }, { "epoch": 0.35789185382267785, "grad_norm": 0.0, "learning_rate": 1.4871790787652319e-05, "loss": 0.917, "step": 9147 }, { "epoch": 0.3579309805149073, "grad_norm": 0.0, "learning_rate": 1.4870684072954388e-05, "loss": 1.0352, "step": 9148 }, { "epoch": 0.35797010720713673, "grad_norm": 0.0, "learning_rate": 1.4869577280039373e-05, "loss": 1.1358, "step": 9149 }, { "epoch": 0.3580092338993662, "grad_norm": 0.0, "learning_rate": 1.4868470408925052e-05, "loss": 1.0979, "step": 9150 }, { "epoch": 0.35804836059159556, "grad_norm": 0.0, "learning_rate": 1.4867363459629191e-05, "loss": 1.0747, "step": 9151 }, { "epoch": 0.358087487283825, "grad_norm": 0.0, "learning_rate": 1.4866256432169577e-05, "loss": 1.0268, "step": 9152 }, { "epoch": 0.35812661397605444, "grad_norm": 0.0, "learning_rate": 1.486514932656398e-05, "loss": 1.0005, "step": 9153 }, { "epoch": 0.3581657406682839, "grad_norm": 0.0, "learning_rate": 1.4864042142830184e-05, "loss": 1.0773, "step": 9154 }, { "epoch": 0.3582048673605133, "grad_norm": 0.0, "learning_rate": 1.4862934880985964e-05, "loss": 1.1484, "step": 9155 }, { "epoch": 0.35824399405274276, "grad_norm": 0.0, "learning_rate": 1.4861827541049103e-05, "loss": 1.1707, "step": 9156 }, { "epoch": 0.3582831207449722, "grad_norm": 0.0, "learning_rate": 1.4860720123037385e-05, "loss": 1.1158, "step": 9157 }, { "epoch": 0.35832224743720165, "grad_norm": 0.0, "learning_rate": 1.4859612626968592e-05, "loss": 1.0834, "step": 9158 }, { "epoch": 0.3583613741294311, "grad_norm": 0.0, "learning_rate": 1.485850505286051e-05, "loss": 1.0704, "step": 9159 }, { "epoch": 0.35840050082166053, "grad_norm": 0.0, "learning_rate": 1.4857397400730924e-05, "loss": 1.0168, "step": 9160 }, { "epoch": 0.35843962751388997, "grad_norm": 0.0, "learning_rate": 1.4856289670597623e-05, "loss": 1.1714, "step": 9161 }, { "epoch": 0.3584787542061194, "grad_norm": 0.0, "learning_rate": 1.4855181862478394e-05, "loss": 1.0978, "step": 9162 }, { "epoch": 0.35851788089834885, "grad_norm": 0.0, "learning_rate": 1.485407397639103e-05, "loss": 1.1381, "step": 9163 }, { "epoch": 0.3585570075905783, "grad_norm": 0.0, "learning_rate": 1.485296601235332e-05, "loss": 1.1342, "step": 9164 }, { "epoch": 0.35859613428280773, "grad_norm": 0.0, "learning_rate": 1.4851857970383057e-05, "loss": 0.9924, "step": 9165 }, { "epoch": 0.3586352609750372, "grad_norm": 0.0, "learning_rate": 1.4850749850498036e-05, "loss": 1.1281, "step": 9166 }, { "epoch": 0.3586743876672666, "grad_norm": 0.0, "learning_rate": 1.4849641652716047e-05, "loss": 1.0837, "step": 9167 }, { "epoch": 0.35871351435949606, "grad_norm": 0.0, "learning_rate": 1.4848533377054892e-05, "loss": 1.0869, "step": 9168 }, { "epoch": 0.3587526410517255, "grad_norm": 0.0, "learning_rate": 1.4847425023532369e-05, "loss": 1.1458, "step": 9169 }, { "epoch": 0.35879176774395494, "grad_norm": 0.0, "learning_rate": 1.484631659216627e-05, "loss": 1.1738, "step": 9170 }, { "epoch": 0.3588308944361844, "grad_norm": 0.0, "learning_rate": 1.4845208082974402e-05, "loss": 1.1767, "step": 9171 }, { "epoch": 0.3588700211284138, "grad_norm": 0.0, "learning_rate": 1.4844099495974565e-05, "loss": 1.0643, "step": 9172 }, { "epoch": 0.35890914782064326, "grad_norm": 0.0, "learning_rate": 1.4842990831184559e-05, "loss": 1.0597, "step": 9173 }, { "epoch": 0.3589482745128727, "grad_norm": 0.0, "learning_rate": 1.4841882088622186e-05, "loss": 1.0092, "step": 9174 }, { "epoch": 0.35898740120510214, "grad_norm": 0.0, "learning_rate": 1.4840773268305258e-05, "loss": 1.1207, "step": 9175 }, { "epoch": 0.3590265278973316, "grad_norm": 0.0, "learning_rate": 1.4839664370251572e-05, "loss": 1.0182, "step": 9176 }, { "epoch": 0.359065654589561, "grad_norm": 0.0, "learning_rate": 1.4838555394478947e-05, "loss": 1.0842, "step": 9177 }, { "epoch": 0.35910478128179046, "grad_norm": 0.0, "learning_rate": 1.4837446341005179e-05, "loss": 1.0501, "step": 9178 }, { "epoch": 0.35914390797401985, "grad_norm": 0.0, "learning_rate": 1.4836337209848088e-05, "loss": 1.0797, "step": 9179 }, { "epoch": 0.3591830346662493, "grad_norm": 0.0, "learning_rate": 1.483522800102548e-05, "loss": 1.0309, "step": 9180 }, { "epoch": 0.35922216135847873, "grad_norm": 0.0, "learning_rate": 1.4834118714555172e-05, "loss": 1.1395, "step": 9181 }, { "epoch": 0.35926128805070817, "grad_norm": 0.0, "learning_rate": 1.4833009350454972e-05, "loss": 1.0707, "step": 9182 }, { "epoch": 0.3593004147429376, "grad_norm": 0.0, "learning_rate": 1.4831899908742699e-05, "loss": 1.0233, "step": 9183 }, { "epoch": 0.35933954143516705, "grad_norm": 0.0, "learning_rate": 1.4830790389436169e-05, "loss": 1.2071, "step": 9184 }, { "epoch": 0.3593786681273965, "grad_norm": 0.0, "learning_rate": 1.4829680792553198e-05, "loss": 1.1226, "step": 9185 }, { "epoch": 0.35941779481962594, "grad_norm": 0.0, "learning_rate": 1.4828571118111605e-05, "loss": 1.2007, "step": 9186 }, { "epoch": 0.3594569215118554, "grad_norm": 0.0, "learning_rate": 1.482746136612921e-05, "loss": 1.045, "step": 9187 }, { "epoch": 0.3594960482040848, "grad_norm": 0.0, "learning_rate": 1.4826351536623838e-05, "loss": 1.1286, "step": 9188 }, { "epoch": 0.35953517489631426, "grad_norm": 0.0, "learning_rate": 1.4825241629613304e-05, "loss": 1.2228, "step": 9189 }, { "epoch": 0.3595743015885437, "grad_norm": 0.0, "learning_rate": 1.4824131645115438e-05, "loss": 1.109, "step": 9190 }, { "epoch": 0.35961342828077314, "grad_norm": 0.0, "learning_rate": 1.482302158314806e-05, "loss": 1.0378, "step": 9191 }, { "epoch": 0.3596525549730026, "grad_norm": 0.0, "learning_rate": 1.4821911443729002e-05, "loss": 1.1537, "step": 9192 }, { "epoch": 0.359691681665232, "grad_norm": 0.0, "learning_rate": 1.482080122687609e-05, "loss": 1.2168, "step": 9193 }, { "epoch": 0.35973080835746146, "grad_norm": 0.0, "learning_rate": 1.4819690932607145e-05, "loss": 1.2112, "step": 9194 }, { "epoch": 0.3597699350496909, "grad_norm": 0.0, "learning_rate": 1.4818580560940008e-05, "loss": 0.9993, "step": 9195 }, { "epoch": 0.35980906174192034, "grad_norm": 0.0, "learning_rate": 1.4817470111892503e-05, "loss": 1.0721, "step": 9196 }, { "epoch": 0.3598481884341498, "grad_norm": 0.0, "learning_rate": 1.4816359585482465e-05, "loss": 1.113, "step": 9197 }, { "epoch": 0.3598873151263792, "grad_norm": 0.0, "learning_rate": 1.4815248981727728e-05, "loss": 1.106, "step": 9198 }, { "epoch": 0.35992644181860867, "grad_norm": 0.0, "learning_rate": 1.4814138300646127e-05, "loss": 1.2255, "step": 9199 }, { "epoch": 0.3599655685108381, "grad_norm": 0.0, "learning_rate": 1.4813027542255494e-05, "loss": 1.1519, "step": 9200 }, { "epoch": 0.36000469520306755, "grad_norm": 0.0, "learning_rate": 1.4811916706573673e-05, "loss": 1.0052, "step": 9201 }, { "epoch": 0.360043821895297, "grad_norm": 0.0, "learning_rate": 1.4810805793618498e-05, "loss": 1.0732, "step": 9202 }, { "epoch": 0.36008294858752643, "grad_norm": 0.0, "learning_rate": 1.480969480340781e-05, "loss": 1.1018, "step": 9203 }, { "epoch": 0.36012207527975587, "grad_norm": 0.0, "learning_rate": 1.4808583735959453e-05, "loss": 1.0652, "step": 9204 }, { "epoch": 0.3601612019719853, "grad_norm": 0.0, "learning_rate": 1.4807472591291263e-05, "loss": 1.1201, "step": 9205 }, { "epoch": 0.36020032866421475, "grad_norm": 0.0, "learning_rate": 1.480636136942109e-05, "loss": 1.1796, "step": 9206 }, { "epoch": 0.3602394553564442, "grad_norm": 0.0, "learning_rate": 1.4805250070366773e-05, "loss": 1.0705, "step": 9207 }, { "epoch": 0.3602785820486736, "grad_norm": 0.0, "learning_rate": 1.4804138694146163e-05, "loss": 1.1555, "step": 9208 }, { "epoch": 0.360317708740903, "grad_norm": 0.0, "learning_rate": 1.4803027240777104e-05, "loss": 1.1594, "step": 9209 }, { "epoch": 0.36035683543313246, "grad_norm": 0.0, "learning_rate": 1.4801915710277451e-05, "loss": 1.2664, "step": 9210 }, { "epoch": 0.3603959621253619, "grad_norm": 0.0, "learning_rate": 1.4800804102665045e-05, "loss": 1.1003, "step": 9211 }, { "epoch": 0.36043508881759134, "grad_norm": 0.0, "learning_rate": 1.479969241795774e-05, "loss": 1.0554, "step": 9212 }, { "epoch": 0.3604742155098208, "grad_norm": 0.0, "learning_rate": 1.4798580656173391e-05, "loss": 0.99, "step": 9213 }, { "epoch": 0.3605133422020502, "grad_norm": 0.0, "learning_rate": 1.4797468817329847e-05, "loss": 1.241, "step": 9214 }, { "epoch": 0.36055246889427967, "grad_norm": 0.0, "learning_rate": 1.479635690144497e-05, "loss": 1.0709, "step": 9215 }, { "epoch": 0.3605915955865091, "grad_norm": 0.0, "learning_rate": 1.479524490853661e-05, "loss": 1.0836, "step": 9216 }, { "epoch": 0.36063072227873855, "grad_norm": 0.0, "learning_rate": 1.4794132838622624e-05, "loss": 1.144, "step": 9217 }, { "epoch": 0.360669848970968, "grad_norm": 0.0, "learning_rate": 1.4793020691720871e-05, "loss": 1.0933, "step": 9218 }, { "epoch": 0.36070897566319743, "grad_norm": 0.0, "learning_rate": 1.4791908467849214e-05, "loss": 0.9507, "step": 9219 }, { "epoch": 0.36074810235542687, "grad_norm": 0.0, "learning_rate": 1.479079616702551e-05, "loss": 1.0997, "step": 9220 }, { "epoch": 0.3607872290476563, "grad_norm": 0.0, "learning_rate": 1.4789683789267623e-05, "loss": 1.0883, "step": 9221 }, { "epoch": 0.36082635573988575, "grad_norm": 0.0, "learning_rate": 1.4788571334593418e-05, "loss": 1.0014, "step": 9222 }, { "epoch": 0.3608654824321152, "grad_norm": 0.0, "learning_rate": 1.4787458803020755e-05, "loss": 1.0325, "step": 9223 }, { "epoch": 0.36090460912434463, "grad_norm": 0.0, "learning_rate": 1.4786346194567505e-05, "loss": 1.1263, "step": 9224 }, { "epoch": 0.3609437358165741, "grad_norm": 0.0, "learning_rate": 1.4785233509251531e-05, "loss": 1.0128, "step": 9225 }, { "epoch": 0.3609828625088035, "grad_norm": 0.0, "learning_rate": 1.4784120747090704e-05, "loss": 1.0888, "step": 9226 }, { "epoch": 0.36102198920103296, "grad_norm": 0.0, "learning_rate": 1.478300790810289e-05, "loss": 1.0437, "step": 9227 }, { "epoch": 0.3610611158932624, "grad_norm": 0.0, "learning_rate": 1.4781894992305967e-05, "loss": 1.1424, "step": 9228 }, { "epoch": 0.36110024258549184, "grad_norm": 0.0, "learning_rate": 1.4780781999717799e-05, "loss": 1.0142, "step": 9229 }, { "epoch": 0.3611393692777213, "grad_norm": 0.0, "learning_rate": 1.4779668930356265e-05, "loss": 0.9615, "step": 9230 }, { "epoch": 0.3611784959699507, "grad_norm": 0.0, "learning_rate": 1.4778555784239237e-05, "loss": 1.0795, "step": 9231 }, { "epoch": 0.36121762266218016, "grad_norm": 0.0, "learning_rate": 1.477744256138459e-05, "loss": 1.0848, "step": 9232 }, { "epoch": 0.3612567493544096, "grad_norm": 0.0, "learning_rate": 1.4776329261810204e-05, "loss": 1.1233, "step": 9233 }, { "epoch": 0.36129587604663904, "grad_norm": 0.0, "learning_rate": 1.477521588553395e-05, "loss": 1.0927, "step": 9234 }, { "epoch": 0.3613350027388685, "grad_norm": 0.0, "learning_rate": 1.4774102432573718e-05, "loss": 1.0074, "step": 9235 }, { "epoch": 0.36137412943109787, "grad_norm": 0.0, "learning_rate": 1.477298890294738e-05, "loss": 1.0873, "step": 9236 }, { "epoch": 0.3614132561233273, "grad_norm": 0.0, "learning_rate": 1.4771875296672824e-05, "loss": 1.1014, "step": 9237 }, { "epoch": 0.36145238281555675, "grad_norm": 0.0, "learning_rate": 1.4770761613767925e-05, "loss": 1.1446, "step": 9238 }, { "epoch": 0.3614915095077862, "grad_norm": 0.0, "learning_rate": 1.4769647854250578e-05, "loss": 1.0652, "step": 9239 }, { "epoch": 0.36153063620001563, "grad_norm": 0.0, "learning_rate": 1.476853401813866e-05, "loss": 1.107, "step": 9240 }, { "epoch": 0.3615697628922451, "grad_norm": 0.0, "learning_rate": 1.4767420105450064e-05, "loss": 1.0864, "step": 9241 }, { "epoch": 0.3616088895844745, "grad_norm": 0.0, "learning_rate": 1.4766306116202674e-05, "loss": 1.069, "step": 9242 }, { "epoch": 0.36164801627670395, "grad_norm": 0.0, "learning_rate": 1.4765192050414378e-05, "loss": 1.0208, "step": 9243 }, { "epoch": 0.3616871429689334, "grad_norm": 0.0, "learning_rate": 1.4764077908103071e-05, "loss": 1.0058, "step": 9244 }, { "epoch": 0.36172626966116284, "grad_norm": 0.0, "learning_rate": 1.476296368928664e-05, "loss": 1.1526, "step": 9245 }, { "epoch": 0.3617653963533923, "grad_norm": 0.0, "learning_rate": 1.4761849393982983e-05, "loss": 0.994, "step": 9246 }, { "epoch": 0.3618045230456217, "grad_norm": 0.0, "learning_rate": 1.4760735022209992e-05, "loss": 1.1253, "step": 9247 }, { "epoch": 0.36184364973785116, "grad_norm": 0.0, "learning_rate": 1.4759620573985561e-05, "loss": 1.1083, "step": 9248 }, { "epoch": 0.3618827764300806, "grad_norm": 0.0, "learning_rate": 1.4758506049327586e-05, "loss": 1.0487, "step": 9249 }, { "epoch": 0.36192190312231004, "grad_norm": 0.0, "learning_rate": 1.4757391448253968e-05, "loss": 1.1252, "step": 9250 }, { "epoch": 0.3619610298145395, "grad_norm": 0.0, "learning_rate": 1.4756276770782607e-05, "loss": 1.2333, "step": 9251 }, { "epoch": 0.3620001565067689, "grad_norm": 0.0, "learning_rate": 1.4755162016931397e-05, "loss": 1.1399, "step": 9252 }, { "epoch": 0.36203928319899836, "grad_norm": 0.0, "learning_rate": 1.4754047186718245e-05, "loss": 1.1595, "step": 9253 }, { "epoch": 0.3620784098912278, "grad_norm": 0.0, "learning_rate": 1.4752932280161055e-05, "loss": 1.0916, "step": 9254 }, { "epoch": 0.36211753658345724, "grad_norm": 0.0, "learning_rate": 1.4751817297277725e-05, "loss": 1.0759, "step": 9255 }, { "epoch": 0.3621566632756867, "grad_norm": 0.0, "learning_rate": 1.4750702238086164e-05, "loss": 1.1783, "step": 9256 }, { "epoch": 0.3621957899679161, "grad_norm": 0.0, "learning_rate": 1.4749587102604279e-05, "loss": 1.2032, "step": 9257 }, { "epoch": 0.36223491666014557, "grad_norm": 0.0, "learning_rate": 1.4748471890849979e-05, "loss": 0.9703, "step": 9258 }, { "epoch": 0.362274043352375, "grad_norm": 0.0, "learning_rate": 1.4747356602841167e-05, "loss": 1.1539, "step": 9259 }, { "epoch": 0.36231317004460445, "grad_norm": 0.0, "learning_rate": 1.4746241238595757e-05, "loss": 1.0464, "step": 9260 }, { "epoch": 0.3623522967368339, "grad_norm": 0.0, "learning_rate": 1.4745125798131664e-05, "loss": 1.0142, "step": 9261 }, { "epoch": 0.36239142342906333, "grad_norm": 0.0, "learning_rate": 1.4744010281466792e-05, "loss": 1.1644, "step": 9262 }, { "epoch": 0.36243055012129277, "grad_norm": 0.0, "learning_rate": 1.4742894688619061e-05, "loss": 1.0634, "step": 9263 }, { "epoch": 0.3624696768135222, "grad_norm": 0.0, "learning_rate": 1.4741779019606386e-05, "loss": 1.0953, "step": 9264 }, { "epoch": 0.3625088035057516, "grad_norm": 0.0, "learning_rate": 1.4740663274446677e-05, "loss": 1.1641, "step": 9265 }, { "epoch": 0.36254793019798104, "grad_norm": 0.0, "learning_rate": 1.473954745315786e-05, "loss": 1.1139, "step": 9266 }, { "epoch": 0.3625870568902105, "grad_norm": 0.0, "learning_rate": 1.473843155575785e-05, "loss": 1.0862, "step": 9267 }, { "epoch": 0.3626261835824399, "grad_norm": 0.0, "learning_rate": 1.4737315582264566e-05, "loss": 1.0614, "step": 9268 }, { "epoch": 0.36266531027466936, "grad_norm": 0.0, "learning_rate": 1.4736199532695929e-05, "loss": 1.0097, "step": 9269 }, { "epoch": 0.3627044369668988, "grad_norm": 0.0, "learning_rate": 1.4735083407069866e-05, "loss": 1.0575, "step": 9270 }, { "epoch": 0.36274356365912824, "grad_norm": 0.0, "learning_rate": 1.4733967205404293e-05, "loss": 1.1132, "step": 9271 }, { "epoch": 0.3627826903513577, "grad_norm": 0.0, "learning_rate": 1.4732850927717139e-05, "loss": 1.1244, "step": 9272 }, { "epoch": 0.3628218170435871, "grad_norm": 0.0, "learning_rate": 1.4731734574026334e-05, "loss": 1.0644, "step": 9273 }, { "epoch": 0.36286094373581657, "grad_norm": 0.0, "learning_rate": 1.4730618144349795e-05, "loss": 1.2094, "step": 9274 }, { "epoch": 0.362900070428046, "grad_norm": 0.0, "learning_rate": 1.4729501638705461e-05, "loss": 1.1097, "step": 9275 }, { "epoch": 0.36293919712027545, "grad_norm": 0.0, "learning_rate": 1.4728385057111254e-05, "loss": 1.0199, "step": 9276 }, { "epoch": 0.3629783238125049, "grad_norm": 0.0, "learning_rate": 1.4727268399585109e-05, "loss": 1.0565, "step": 9277 }, { "epoch": 0.36301745050473433, "grad_norm": 0.0, "learning_rate": 1.4726151666144954e-05, "loss": 1.1317, "step": 9278 }, { "epoch": 0.36305657719696377, "grad_norm": 0.0, "learning_rate": 1.472503485680873e-05, "loss": 1.1281, "step": 9279 }, { "epoch": 0.3630957038891932, "grad_norm": 0.0, "learning_rate": 1.4723917971594368e-05, "loss": 1.0969, "step": 9280 }, { "epoch": 0.36313483058142265, "grad_norm": 0.0, "learning_rate": 1.4722801010519799e-05, "loss": 1.0468, "step": 9281 }, { "epoch": 0.3631739572736521, "grad_norm": 0.0, "learning_rate": 1.4721683973602965e-05, "loss": 1.1187, "step": 9282 }, { "epoch": 0.36321308396588153, "grad_norm": 0.0, "learning_rate": 1.4720566860861802e-05, "loss": 1.1048, "step": 9283 }, { "epoch": 0.363252210658111, "grad_norm": 0.0, "learning_rate": 1.4719449672314252e-05, "loss": 1.047, "step": 9284 }, { "epoch": 0.3632913373503404, "grad_norm": 0.0, "learning_rate": 1.4718332407978252e-05, "loss": 1.1381, "step": 9285 }, { "epoch": 0.36333046404256986, "grad_norm": 0.0, "learning_rate": 1.471721506787175e-05, "loss": 1.2054, "step": 9286 }, { "epoch": 0.3633695907347993, "grad_norm": 0.0, "learning_rate": 1.4716097652012683e-05, "loss": 1.1649, "step": 9287 }, { "epoch": 0.36340871742702874, "grad_norm": 0.0, "learning_rate": 1.4714980160418995e-05, "loss": 1.0209, "step": 9288 }, { "epoch": 0.3634478441192582, "grad_norm": 0.0, "learning_rate": 1.4713862593108637e-05, "loss": 1.1644, "step": 9289 }, { "epoch": 0.3634869708114876, "grad_norm": 0.0, "learning_rate": 1.4712744950099551e-05, "loss": 1.0587, "step": 9290 }, { "epoch": 0.36352609750371706, "grad_norm": 0.0, "learning_rate": 1.4711627231409686e-05, "loss": 1.0692, "step": 9291 }, { "epoch": 0.3635652241959465, "grad_norm": 0.0, "learning_rate": 1.4710509437056992e-05, "loss": 1.1236, "step": 9292 }, { "epoch": 0.3636043508881759, "grad_norm": 0.0, "learning_rate": 1.470939156705942e-05, "loss": 1.0864, "step": 9293 }, { "epoch": 0.3636434775804053, "grad_norm": 0.0, "learning_rate": 1.470827362143492e-05, "loss": 1.0872, "step": 9294 }, { "epoch": 0.36368260427263477, "grad_norm": 0.0, "learning_rate": 1.4707155600201447e-05, "loss": 1.0266, "step": 9295 }, { "epoch": 0.3637217309648642, "grad_norm": 0.0, "learning_rate": 1.4706037503376948e-05, "loss": 1.1531, "step": 9296 }, { "epoch": 0.36376085765709365, "grad_norm": 0.0, "learning_rate": 1.470491933097939e-05, "loss": 1.1071, "step": 9297 }, { "epoch": 0.3637999843493231, "grad_norm": 0.0, "learning_rate": 1.470380108302672e-05, "loss": 0.9628, "step": 9298 }, { "epoch": 0.36383911104155253, "grad_norm": 0.0, "learning_rate": 1.47026827595369e-05, "loss": 1.1635, "step": 9299 }, { "epoch": 0.363878237733782, "grad_norm": 0.0, "learning_rate": 1.470156436052789e-05, "loss": 1.1541, "step": 9300 }, { "epoch": 0.3639173644260114, "grad_norm": 0.0, "learning_rate": 1.4700445886017643e-05, "loss": 1.1124, "step": 9301 }, { "epoch": 0.36395649111824085, "grad_norm": 0.0, "learning_rate": 1.4699327336024127e-05, "loss": 1.1787, "step": 9302 }, { "epoch": 0.3639956178104703, "grad_norm": 0.0, "learning_rate": 1.4698208710565302e-05, "loss": 1.0702, "step": 9303 }, { "epoch": 0.36403474450269974, "grad_norm": 0.0, "learning_rate": 1.4697090009659131e-05, "loss": 1.0826, "step": 9304 }, { "epoch": 0.3640738711949292, "grad_norm": 0.0, "learning_rate": 1.4695971233323584e-05, "loss": 1.0728, "step": 9305 }, { "epoch": 0.3641129978871586, "grad_norm": 0.0, "learning_rate": 1.4694852381576622e-05, "loss": 1.0849, "step": 9306 }, { "epoch": 0.36415212457938806, "grad_norm": 0.0, "learning_rate": 1.4693733454436214e-05, "loss": 1.2011, "step": 9307 }, { "epoch": 0.3641912512716175, "grad_norm": 0.0, "learning_rate": 1.4692614451920328e-05, "loss": 1.0495, "step": 9308 }, { "epoch": 0.36423037796384694, "grad_norm": 0.0, "learning_rate": 1.4691495374046933e-05, "loss": 1.0459, "step": 9309 }, { "epoch": 0.3642695046560764, "grad_norm": 0.0, "learning_rate": 1.4690376220834002e-05, "loss": 1.0758, "step": 9310 }, { "epoch": 0.3643086313483058, "grad_norm": 0.0, "learning_rate": 1.4689256992299506e-05, "loss": 1.0818, "step": 9311 }, { "epoch": 0.36434775804053526, "grad_norm": 0.0, "learning_rate": 1.4688137688461419e-05, "loss": 1.0443, "step": 9312 }, { "epoch": 0.3643868847327647, "grad_norm": 0.0, "learning_rate": 1.4687018309337716e-05, "loss": 1.191, "step": 9313 }, { "epoch": 0.36442601142499415, "grad_norm": 0.0, "learning_rate": 1.468589885494637e-05, "loss": 1.2015, "step": 9314 }, { "epoch": 0.3644651381172236, "grad_norm": 0.0, "learning_rate": 1.4684779325305361e-05, "loss": 1.0515, "step": 9315 }, { "epoch": 0.364504264809453, "grad_norm": 0.0, "learning_rate": 1.4683659720432663e-05, "loss": 1.116, "step": 9316 }, { "epoch": 0.36454339150168247, "grad_norm": 0.0, "learning_rate": 1.4682540040346265e-05, "loss": 1.1703, "step": 9317 }, { "epoch": 0.3645825181939119, "grad_norm": 0.0, "learning_rate": 1.468142028506414e-05, "loss": 1.1018, "step": 9318 }, { "epoch": 0.36462164488614135, "grad_norm": 0.0, "learning_rate": 1.4680300454604267e-05, "loss": 1.0671, "step": 9319 }, { "epoch": 0.3646607715783708, "grad_norm": 0.0, "learning_rate": 1.4679180548984639e-05, "loss": 1.1716, "step": 9320 }, { "epoch": 0.3646998982706002, "grad_norm": 0.0, "learning_rate": 1.4678060568223232e-05, "loss": 1.0866, "step": 9321 }, { "epoch": 0.3647390249628296, "grad_norm": 0.0, "learning_rate": 1.4676940512338035e-05, "loss": 1.1307, "step": 9322 }, { "epoch": 0.36477815165505906, "grad_norm": 0.0, "learning_rate": 1.4675820381347032e-05, "loss": 1.0707, "step": 9323 }, { "epoch": 0.3648172783472885, "grad_norm": 0.0, "learning_rate": 1.4674700175268215e-05, "loss": 0.9666, "step": 9324 }, { "epoch": 0.36485640503951794, "grad_norm": 0.0, "learning_rate": 1.4673579894119572e-05, "loss": 1.1324, "step": 9325 }, { "epoch": 0.3648955317317474, "grad_norm": 0.0, "learning_rate": 1.467245953791909e-05, "loss": 1.0504, "step": 9326 }, { "epoch": 0.3649346584239768, "grad_norm": 0.0, "learning_rate": 1.4671339106684762e-05, "loss": 1.0466, "step": 9327 }, { "epoch": 0.36497378511620626, "grad_norm": 0.0, "learning_rate": 1.4670218600434583e-05, "loss": 1.0045, "step": 9328 }, { "epoch": 0.3650129118084357, "grad_norm": 0.0, "learning_rate": 1.4669098019186546e-05, "loss": 1.1158, "step": 9329 }, { "epoch": 0.36505203850066514, "grad_norm": 0.0, "learning_rate": 1.4667977362958644e-05, "loss": 1.2082, "step": 9330 }, { "epoch": 0.3650911651928946, "grad_norm": 0.0, "learning_rate": 1.4666856631768875e-05, "loss": 1.2363, "step": 9331 }, { "epoch": 0.365130291885124, "grad_norm": 0.0, "learning_rate": 1.4665735825635236e-05, "loss": 1.2063, "step": 9332 }, { "epoch": 0.36516941857735347, "grad_norm": 0.0, "learning_rate": 1.4664614944575728e-05, "loss": 1.0746, "step": 9333 }, { "epoch": 0.3652085452695829, "grad_norm": 0.0, "learning_rate": 1.4663493988608348e-05, "loss": 1.038, "step": 9334 }, { "epoch": 0.36524767196181235, "grad_norm": 0.0, "learning_rate": 1.46623729577511e-05, "loss": 1.1588, "step": 9335 }, { "epoch": 0.3652867986540418, "grad_norm": 0.0, "learning_rate": 1.466125185202198e-05, "loss": 1.117, "step": 9336 }, { "epoch": 0.36532592534627123, "grad_norm": 0.0, "learning_rate": 1.4660130671439002e-05, "loss": 1.1988, "step": 9337 }, { "epoch": 0.36536505203850067, "grad_norm": 0.0, "learning_rate": 1.465900941602016e-05, "loss": 1.0974, "step": 9338 }, { "epoch": 0.3654041787307301, "grad_norm": 0.0, "learning_rate": 1.4657888085783468e-05, "loss": 1.1047, "step": 9339 }, { "epoch": 0.36544330542295955, "grad_norm": 0.0, "learning_rate": 1.4656766680746927e-05, "loss": 1.0989, "step": 9340 }, { "epoch": 0.365482432115189, "grad_norm": 0.0, "learning_rate": 1.465564520092855e-05, "loss": 1.0132, "step": 9341 }, { "epoch": 0.36552155880741843, "grad_norm": 0.0, "learning_rate": 1.4654523646346345e-05, "loss": 0.9304, "step": 9342 }, { "epoch": 0.3655606854996479, "grad_norm": 0.0, "learning_rate": 1.4653402017018323e-05, "loss": 1.1104, "step": 9343 }, { "epoch": 0.3655998121918773, "grad_norm": 0.0, "learning_rate": 1.4652280312962496e-05, "loss": 0.9711, "step": 9344 }, { "epoch": 0.36563893888410676, "grad_norm": 0.0, "learning_rate": 1.4651158534196876e-05, "loss": 0.9505, "step": 9345 }, { "epoch": 0.3656780655763362, "grad_norm": 0.0, "learning_rate": 1.4650036680739479e-05, "loss": 0.9962, "step": 9346 }, { "epoch": 0.36571719226856564, "grad_norm": 0.0, "learning_rate": 1.464891475260832e-05, "loss": 1.1196, "step": 9347 }, { "epoch": 0.3657563189607951, "grad_norm": 0.0, "learning_rate": 1.4647792749821414e-05, "loss": 1.073, "step": 9348 }, { "epoch": 0.3657954456530245, "grad_norm": 0.0, "learning_rate": 1.4646670672396781e-05, "loss": 1.0273, "step": 9349 }, { "epoch": 0.3658345723452539, "grad_norm": 0.0, "learning_rate": 1.464554852035244e-05, "loss": 1.0591, "step": 9350 }, { "epoch": 0.36587369903748335, "grad_norm": 0.0, "learning_rate": 1.464442629370641e-05, "loss": 1.1251, "step": 9351 }, { "epoch": 0.3659128257297128, "grad_norm": 0.0, "learning_rate": 1.4643303992476715e-05, "loss": 1.2606, "step": 9352 }, { "epoch": 0.36595195242194223, "grad_norm": 0.0, "learning_rate": 1.4642181616681375e-05, "loss": 1.0992, "step": 9353 }, { "epoch": 0.36599107911417167, "grad_norm": 0.0, "learning_rate": 1.4641059166338413e-05, "loss": 1.0725, "step": 9354 }, { "epoch": 0.3660302058064011, "grad_norm": 0.0, "learning_rate": 1.4639936641465861e-05, "loss": 1.0161, "step": 9355 }, { "epoch": 0.36606933249863055, "grad_norm": 0.0, "learning_rate": 1.463881404208174e-05, "loss": 1.1367, "step": 9356 }, { "epoch": 0.36610845919086, "grad_norm": 0.0, "learning_rate": 1.4637691368204076e-05, "loss": 1.1152, "step": 9357 }, { "epoch": 0.36614758588308943, "grad_norm": 0.0, "learning_rate": 1.4636568619850902e-05, "loss": 1.0429, "step": 9358 }, { "epoch": 0.3661867125753189, "grad_norm": 0.0, "learning_rate": 1.4635445797040245e-05, "loss": 1.0571, "step": 9359 }, { "epoch": 0.3662258392675483, "grad_norm": 0.0, "learning_rate": 1.4634322899790137e-05, "loss": 1.0917, "step": 9360 }, { "epoch": 0.36626496595977776, "grad_norm": 0.0, "learning_rate": 1.4633199928118608e-05, "loss": 1.0359, "step": 9361 }, { "epoch": 0.3663040926520072, "grad_norm": 0.0, "learning_rate": 1.4632076882043698e-05, "loss": 1.0904, "step": 9362 }, { "epoch": 0.36634321934423664, "grad_norm": 0.0, "learning_rate": 1.4630953761583433e-05, "loss": 1.0489, "step": 9363 }, { "epoch": 0.3663823460364661, "grad_norm": 0.0, "learning_rate": 1.4629830566755858e-05, "loss": 1.1281, "step": 9364 }, { "epoch": 0.3664214727286955, "grad_norm": 0.0, "learning_rate": 1.4628707297579002e-05, "loss": 1.0359, "step": 9365 }, { "epoch": 0.36646059942092496, "grad_norm": 0.0, "learning_rate": 1.4627583954070909e-05, "loss": 1.2154, "step": 9366 }, { "epoch": 0.3664997261131544, "grad_norm": 0.0, "learning_rate": 1.4626460536249619e-05, "loss": 1.1062, "step": 9367 }, { "epoch": 0.36653885280538384, "grad_norm": 0.0, "learning_rate": 1.4625337044133163e-05, "loss": 1.1393, "step": 9368 }, { "epoch": 0.3665779794976133, "grad_norm": 0.0, "learning_rate": 1.4624213477739595e-05, "loss": 1.131, "step": 9369 }, { "epoch": 0.3666171061898427, "grad_norm": 0.0, "learning_rate": 1.4623089837086953e-05, "loss": 1.0703, "step": 9370 }, { "epoch": 0.36665623288207216, "grad_norm": 0.0, "learning_rate": 1.4621966122193282e-05, "loss": 1.1847, "step": 9371 }, { "epoch": 0.3666953595743016, "grad_norm": 0.0, "learning_rate": 1.4620842333076622e-05, "loss": 1.1226, "step": 9372 }, { "epoch": 0.36673448626653105, "grad_norm": 0.0, "learning_rate": 1.4619718469755029e-05, "loss": 1.0816, "step": 9373 }, { "epoch": 0.3667736129587605, "grad_norm": 0.0, "learning_rate": 1.4618594532246544e-05, "loss": 1.0817, "step": 9374 }, { "epoch": 0.3668127396509899, "grad_norm": 0.0, "learning_rate": 1.4617470520569219e-05, "loss": 1.1421, "step": 9375 }, { "epoch": 0.36685186634321937, "grad_norm": 0.0, "learning_rate": 1.4616346434741104e-05, "loss": 0.9765, "step": 9376 }, { "epoch": 0.3668909930354488, "grad_norm": 0.0, "learning_rate": 1.4615222274780248e-05, "loss": 1.0686, "step": 9377 }, { "epoch": 0.3669301197276782, "grad_norm": 0.0, "learning_rate": 1.4614098040704708e-05, "loss": 1.0996, "step": 9378 }, { "epoch": 0.36696924641990764, "grad_norm": 0.0, "learning_rate": 1.4612973732532531e-05, "loss": 1.1562, "step": 9379 }, { "epoch": 0.3670083731121371, "grad_norm": 0.0, "learning_rate": 1.4611849350281782e-05, "loss": 1.1147, "step": 9380 }, { "epoch": 0.3670474998043665, "grad_norm": 0.0, "learning_rate": 1.4610724893970507e-05, "loss": 1.161, "step": 9381 }, { "epoch": 0.36708662649659596, "grad_norm": 0.0, "learning_rate": 1.460960036361677e-05, "loss": 1.1871, "step": 9382 }, { "epoch": 0.3671257531888254, "grad_norm": 0.0, "learning_rate": 1.4608475759238625e-05, "loss": 1.1493, "step": 9383 }, { "epoch": 0.36716487988105484, "grad_norm": 0.0, "learning_rate": 1.4607351080854138e-05, "loss": 1.0945, "step": 9384 }, { "epoch": 0.3672040065732843, "grad_norm": 0.0, "learning_rate": 1.4606226328481361e-05, "loss": 1.2526, "step": 9385 }, { "epoch": 0.3672431332655137, "grad_norm": 0.0, "learning_rate": 1.4605101502138363e-05, "loss": 1.211, "step": 9386 }, { "epoch": 0.36728225995774316, "grad_norm": 0.0, "learning_rate": 1.4603976601843208e-05, "loss": 1.0414, "step": 9387 }, { "epoch": 0.3673213866499726, "grad_norm": 0.0, "learning_rate": 1.4602851627613954e-05, "loss": 1.0137, "step": 9388 }, { "epoch": 0.36736051334220204, "grad_norm": 0.0, "learning_rate": 1.4601726579468674e-05, "loss": 1.0771, "step": 9389 }, { "epoch": 0.3673996400344315, "grad_norm": 0.0, "learning_rate": 1.460060145742543e-05, "loss": 1.0842, "step": 9390 }, { "epoch": 0.3674387667266609, "grad_norm": 0.0, "learning_rate": 1.4599476261502292e-05, "loss": 0.9964, "step": 9391 }, { "epoch": 0.36747789341889037, "grad_norm": 0.0, "learning_rate": 1.4598350991717329e-05, "loss": 1.0392, "step": 9392 }, { "epoch": 0.3675170201111198, "grad_norm": 0.0, "learning_rate": 1.459722564808861e-05, "loss": 1.0535, "step": 9393 }, { "epoch": 0.36755614680334925, "grad_norm": 0.0, "learning_rate": 1.4596100230634208e-05, "loss": 1.1033, "step": 9394 }, { "epoch": 0.3675952734955787, "grad_norm": 0.0, "learning_rate": 1.4594974739372196e-05, "loss": 1.0386, "step": 9395 }, { "epoch": 0.36763440018780813, "grad_norm": 0.0, "learning_rate": 1.4593849174320648e-05, "loss": 0.8586, "step": 9396 }, { "epoch": 0.36767352688003757, "grad_norm": 0.0, "learning_rate": 1.4592723535497637e-05, "loss": 1.0694, "step": 9397 }, { "epoch": 0.367712653572267, "grad_norm": 0.0, "learning_rate": 1.4591597822921242e-05, "loss": 1.121, "step": 9398 }, { "epoch": 0.36775178026449645, "grad_norm": 0.0, "learning_rate": 1.459047203660954e-05, "loss": 1.0996, "step": 9399 }, { "epoch": 0.3677909069567259, "grad_norm": 0.0, "learning_rate": 1.4589346176580608e-05, "loss": 1.0986, "step": 9400 }, { "epoch": 0.36783003364895533, "grad_norm": 0.0, "learning_rate": 1.4588220242852527e-05, "loss": 1.1371, "step": 9401 }, { "epoch": 0.3678691603411848, "grad_norm": 0.0, "learning_rate": 1.458709423544338e-05, "loss": 1.0378, "step": 9402 }, { "epoch": 0.3679082870334142, "grad_norm": 0.0, "learning_rate": 1.4585968154371246e-05, "loss": 1.1302, "step": 9403 }, { "epoch": 0.36794741372564366, "grad_norm": 0.0, "learning_rate": 1.4584841999654212e-05, "loss": 1.1503, "step": 9404 }, { "epoch": 0.3679865404178731, "grad_norm": 0.0, "learning_rate": 1.4583715771310358e-05, "loss": 1.0845, "step": 9405 }, { "epoch": 0.36802566711010254, "grad_norm": 0.0, "learning_rate": 1.4582589469357773e-05, "loss": 1.1126, "step": 9406 }, { "epoch": 0.3680647938023319, "grad_norm": 0.0, "learning_rate": 1.4581463093814545e-05, "loss": 1.1639, "step": 9407 }, { "epoch": 0.36810392049456137, "grad_norm": 0.0, "learning_rate": 1.4580336644698758e-05, "loss": 0.9242, "step": 9408 }, { "epoch": 0.3681430471867908, "grad_norm": 0.0, "learning_rate": 1.4579210122028506e-05, "loss": 1.0787, "step": 9409 }, { "epoch": 0.36818217387902025, "grad_norm": 0.0, "learning_rate": 1.4578083525821876e-05, "loss": 1.132, "step": 9410 }, { "epoch": 0.3682213005712497, "grad_norm": 0.0, "learning_rate": 1.4576956856096965e-05, "loss": 1.059, "step": 9411 }, { "epoch": 0.36826042726347913, "grad_norm": 0.0, "learning_rate": 1.4575830112871855e-05, "loss": 1.1376, "step": 9412 }, { "epoch": 0.36829955395570857, "grad_norm": 0.0, "learning_rate": 1.4574703296164654e-05, "loss": 1.1312, "step": 9413 }, { "epoch": 0.368338680647938, "grad_norm": 0.0, "learning_rate": 1.4573576405993449e-05, "loss": 1.0836, "step": 9414 }, { "epoch": 0.36837780734016745, "grad_norm": 0.0, "learning_rate": 1.4572449442376337e-05, "loss": 1.076, "step": 9415 }, { "epoch": 0.3684169340323969, "grad_norm": 0.0, "learning_rate": 1.4571322405331416e-05, "loss": 1.0403, "step": 9416 }, { "epoch": 0.36845606072462633, "grad_norm": 0.0, "learning_rate": 1.4570195294876785e-05, "loss": 0.9989, "step": 9417 }, { "epoch": 0.3684951874168558, "grad_norm": 0.0, "learning_rate": 1.4569068111030546e-05, "loss": 1.1409, "step": 9418 }, { "epoch": 0.3685343141090852, "grad_norm": 0.0, "learning_rate": 1.4567940853810797e-05, "loss": 1.0616, "step": 9419 }, { "epoch": 0.36857344080131466, "grad_norm": 0.0, "learning_rate": 1.4566813523235643e-05, "loss": 1.1342, "step": 9420 }, { "epoch": 0.3686125674935441, "grad_norm": 0.0, "learning_rate": 1.4565686119323187e-05, "loss": 0.9397, "step": 9421 }, { "epoch": 0.36865169418577354, "grad_norm": 0.0, "learning_rate": 1.4564558642091533e-05, "loss": 1.0905, "step": 9422 }, { "epoch": 0.368690820878003, "grad_norm": 0.0, "learning_rate": 1.4563431091558785e-05, "loss": 1.1898, "step": 9423 }, { "epoch": 0.3687299475702324, "grad_norm": 0.0, "learning_rate": 1.4562303467743053e-05, "loss": 1.1258, "step": 9424 }, { "epoch": 0.36876907426246186, "grad_norm": 0.0, "learning_rate": 1.4561175770662446e-05, "loss": 1.2086, "step": 9425 }, { "epoch": 0.3688082009546913, "grad_norm": 0.0, "learning_rate": 1.4560048000335067e-05, "loss": 1.0717, "step": 9426 }, { "epoch": 0.36884732764692074, "grad_norm": 0.0, "learning_rate": 1.4558920156779034e-05, "loss": 1.1142, "step": 9427 }, { "epoch": 0.3688864543391502, "grad_norm": 0.0, "learning_rate": 1.4557792240012457e-05, "loss": 1.1863, "step": 9428 }, { "epoch": 0.3689255810313796, "grad_norm": 0.0, "learning_rate": 1.4556664250053447e-05, "loss": 1.1643, "step": 9429 }, { "epoch": 0.36896470772360906, "grad_norm": 0.0, "learning_rate": 1.4555536186920115e-05, "loss": 1.1042, "step": 9430 }, { "epoch": 0.3690038344158385, "grad_norm": 0.0, "learning_rate": 1.4554408050630589e-05, "loss": 1.1219, "step": 9431 }, { "epoch": 0.36904296110806795, "grad_norm": 0.0, "learning_rate": 1.455327984120297e-05, "loss": 1.083, "step": 9432 }, { "epoch": 0.3690820878002974, "grad_norm": 0.0, "learning_rate": 1.4552151558655383e-05, "loss": 1.1724, "step": 9433 }, { "epoch": 0.36912121449252683, "grad_norm": 0.0, "learning_rate": 1.4551023203005947e-05, "loss": 1.1729, "step": 9434 }, { "epoch": 0.3691603411847562, "grad_norm": 0.0, "learning_rate": 1.4549894774272781e-05, "loss": 1.1362, "step": 9435 }, { "epoch": 0.36919946787698565, "grad_norm": 0.0, "learning_rate": 1.4548766272474008e-05, "loss": 1.0878, "step": 9436 }, { "epoch": 0.3692385945692151, "grad_norm": 0.0, "learning_rate": 1.4547637697627747e-05, "loss": 1.1105, "step": 9437 }, { "epoch": 0.36927772126144454, "grad_norm": 0.0, "learning_rate": 1.4546509049752122e-05, "loss": 1.1652, "step": 9438 }, { "epoch": 0.369316847953674, "grad_norm": 0.0, "learning_rate": 1.4545380328865261e-05, "loss": 1.2395, "step": 9439 }, { "epoch": 0.3693559746459034, "grad_norm": 0.0, "learning_rate": 1.4544251534985288e-05, "loss": 1.1951, "step": 9440 }, { "epoch": 0.36939510133813286, "grad_norm": 0.0, "learning_rate": 1.4543122668130327e-05, "loss": 1.1271, "step": 9441 }, { "epoch": 0.3694342280303623, "grad_norm": 0.0, "learning_rate": 1.4541993728318511e-05, "loss": 1.0656, "step": 9442 }, { "epoch": 0.36947335472259174, "grad_norm": 0.0, "learning_rate": 1.4540864715567967e-05, "loss": 1.1511, "step": 9443 }, { "epoch": 0.3695124814148212, "grad_norm": 0.0, "learning_rate": 1.4539735629896824e-05, "loss": 1.2549, "step": 9444 }, { "epoch": 0.3695516081070506, "grad_norm": 0.0, "learning_rate": 1.4538606471323218e-05, "loss": 1.0404, "step": 9445 }, { "epoch": 0.36959073479928006, "grad_norm": 0.0, "learning_rate": 1.4537477239865275e-05, "loss": 1.0913, "step": 9446 }, { "epoch": 0.3696298614915095, "grad_norm": 0.0, "learning_rate": 1.4536347935541138e-05, "loss": 1.1384, "step": 9447 }, { "epoch": 0.36966898818373894, "grad_norm": 0.0, "learning_rate": 1.4535218558368934e-05, "loss": 0.9851, "step": 9448 }, { "epoch": 0.3697081148759684, "grad_norm": 0.0, "learning_rate": 1.4534089108366805e-05, "loss": 1.0546, "step": 9449 }, { "epoch": 0.3697472415681978, "grad_norm": 0.0, "learning_rate": 1.4532959585552885e-05, "loss": 1.194, "step": 9450 }, { "epoch": 0.36978636826042727, "grad_norm": 0.0, "learning_rate": 1.4531829989945315e-05, "loss": 1.0771, "step": 9451 }, { "epoch": 0.3698254949526567, "grad_norm": 0.0, "learning_rate": 1.453070032156223e-05, "loss": 1.0628, "step": 9452 }, { "epoch": 0.36986462164488615, "grad_norm": 0.0, "learning_rate": 1.4529570580421782e-05, "loss": 1.1534, "step": 9453 }, { "epoch": 0.3699037483371156, "grad_norm": 0.0, "learning_rate": 1.4528440766542104e-05, "loss": 1.1212, "step": 9454 }, { "epoch": 0.36994287502934503, "grad_norm": 0.0, "learning_rate": 1.4527310879941339e-05, "loss": 1.1991, "step": 9455 }, { "epoch": 0.36998200172157447, "grad_norm": 0.0, "learning_rate": 1.4526180920637637e-05, "loss": 1.1371, "step": 9456 }, { "epoch": 0.3700211284138039, "grad_norm": 0.0, "learning_rate": 1.4525050888649139e-05, "loss": 1.0954, "step": 9457 }, { "epoch": 0.37006025510603335, "grad_norm": 0.0, "learning_rate": 1.4523920783993997e-05, "loss": 1.0031, "step": 9458 }, { "epoch": 0.3700993817982628, "grad_norm": 0.0, "learning_rate": 1.4522790606690354e-05, "loss": 1.0662, "step": 9459 }, { "epoch": 0.37013850849049224, "grad_norm": 0.0, "learning_rate": 1.4521660356756361e-05, "loss": 1.1301, "step": 9460 }, { "epoch": 0.3701776351827217, "grad_norm": 0.0, "learning_rate": 1.4520530034210168e-05, "loss": 1.0678, "step": 9461 }, { "epoch": 0.3702167618749511, "grad_norm": 0.0, "learning_rate": 1.4519399639069929e-05, "loss": 1.0608, "step": 9462 }, { "epoch": 0.37025588856718056, "grad_norm": 0.0, "learning_rate": 1.4518269171353796e-05, "loss": 1.0614, "step": 9463 }, { "epoch": 0.37029501525940994, "grad_norm": 0.0, "learning_rate": 1.451713863107992e-05, "loss": 1.0521, "step": 9464 }, { "epoch": 0.3703341419516394, "grad_norm": 0.0, "learning_rate": 1.4516008018266457e-05, "loss": 1.1377, "step": 9465 }, { "epoch": 0.3703732686438688, "grad_norm": 0.0, "learning_rate": 1.4514877332931564e-05, "loss": 1.1707, "step": 9466 }, { "epoch": 0.37041239533609827, "grad_norm": 0.0, "learning_rate": 1.4513746575093401e-05, "loss": 1.2516, "step": 9467 }, { "epoch": 0.3704515220283277, "grad_norm": 0.0, "learning_rate": 1.4512615744770122e-05, "loss": 1.1129, "step": 9468 }, { "epoch": 0.37049064872055715, "grad_norm": 0.0, "learning_rate": 1.451148484197989e-05, "loss": 1.071, "step": 9469 }, { "epoch": 0.3705297754127866, "grad_norm": 0.0, "learning_rate": 1.4510353866740863e-05, "loss": 1.1836, "step": 9470 }, { "epoch": 0.37056890210501603, "grad_norm": 0.0, "learning_rate": 1.4509222819071207e-05, "loss": 1.0577, "step": 9471 }, { "epoch": 0.37060802879724547, "grad_norm": 0.0, "learning_rate": 1.4508091698989079e-05, "loss": 1.0773, "step": 9472 }, { "epoch": 0.3706471554894749, "grad_norm": 0.0, "learning_rate": 1.4506960506512652e-05, "loss": 1.0572, "step": 9473 }, { "epoch": 0.37068628218170435, "grad_norm": 0.0, "learning_rate": 1.4505829241660086e-05, "loss": 1.1292, "step": 9474 }, { "epoch": 0.3707254088739338, "grad_norm": 0.0, "learning_rate": 1.4504697904449545e-05, "loss": 1.1735, "step": 9475 }, { "epoch": 0.37076453556616323, "grad_norm": 0.0, "learning_rate": 1.4503566494899204e-05, "loss": 1.0589, "step": 9476 }, { "epoch": 0.3708036622583927, "grad_norm": 0.0, "learning_rate": 1.4502435013027225e-05, "loss": 1.1337, "step": 9477 }, { "epoch": 0.3708427889506221, "grad_norm": 0.0, "learning_rate": 1.4501303458851786e-05, "loss": 1.1386, "step": 9478 }, { "epoch": 0.37088191564285156, "grad_norm": 0.0, "learning_rate": 1.4500171832391052e-05, "loss": 1.1215, "step": 9479 }, { "epoch": 0.370921042335081, "grad_norm": 0.0, "learning_rate": 1.4499040133663194e-05, "loss": 1.1665, "step": 9480 }, { "epoch": 0.37096016902731044, "grad_norm": 0.0, "learning_rate": 1.4497908362686393e-05, "loss": 1.0652, "step": 9481 }, { "epoch": 0.3709992957195399, "grad_norm": 0.0, "learning_rate": 1.449677651947882e-05, "loss": 1.1036, "step": 9482 }, { "epoch": 0.3710384224117693, "grad_norm": 0.0, "learning_rate": 1.4495644604058647e-05, "loss": 0.9378, "step": 9483 }, { "epoch": 0.37107754910399876, "grad_norm": 0.0, "learning_rate": 1.4494512616444061e-05, "loss": 1.0621, "step": 9484 }, { "epoch": 0.3711166757962282, "grad_norm": 0.0, "learning_rate": 1.4493380556653232e-05, "loss": 1.1242, "step": 9485 }, { "epoch": 0.37115580248845764, "grad_norm": 0.0, "learning_rate": 1.4492248424704339e-05, "loss": 1.2365, "step": 9486 }, { "epoch": 0.3711949291806871, "grad_norm": 0.0, "learning_rate": 1.4491116220615569e-05, "loss": 1.0633, "step": 9487 }, { "epoch": 0.3712340558729165, "grad_norm": 0.0, "learning_rate": 1.4489983944405096e-05, "loss": 1.0038, "step": 9488 }, { "epoch": 0.37127318256514596, "grad_norm": 0.0, "learning_rate": 1.4488851596091109e-05, "loss": 1.1395, "step": 9489 }, { "epoch": 0.3713123092573754, "grad_norm": 0.0, "learning_rate": 1.448771917569179e-05, "loss": 1.1509, "step": 9490 }, { "epoch": 0.37135143594960485, "grad_norm": 0.0, "learning_rate": 1.4486586683225325e-05, "loss": 1.2066, "step": 9491 }, { "epoch": 0.37139056264183423, "grad_norm": 0.0, "learning_rate": 1.4485454118709904e-05, "loss": 1.0814, "step": 9492 }, { "epoch": 0.3714296893340637, "grad_norm": 0.0, "learning_rate": 1.4484321482163704e-05, "loss": 1.0841, "step": 9493 }, { "epoch": 0.3714688160262931, "grad_norm": 0.0, "learning_rate": 1.4483188773604924e-05, "loss": 1.1588, "step": 9494 }, { "epoch": 0.37150794271852255, "grad_norm": 0.0, "learning_rate": 1.4482055993051748e-05, "loss": 1.0389, "step": 9495 }, { "epoch": 0.371547069410752, "grad_norm": 0.0, "learning_rate": 1.448092314052237e-05, "loss": 1.0079, "step": 9496 }, { "epoch": 0.37158619610298144, "grad_norm": 0.0, "learning_rate": 1.447979021603498e-05, "loss": 1.053, "step": 9497 }, { "epoch": 0.3716253227952109, "grad_norm": 0.0, "learning_rate": 1.4478657219607772e-05, "loss": 1.0859, "step": 9498 }, { "epoch": 0.3716644494874403, "grad_norm": 0.0, "learning_rate": 1.4477524151258941e-05, "loss": 1.0576, "step": 9499 }, { "epoch": 0.37170357617966976, "grad_norm": 0.0, "learning_rate": 1.4476391011006688e-05, "loss": 0.9842, "step": 9500 }, { "epoch": 0.3717427028718992, "grad_norm": 0.0, "learning_rate": 1.44752577988692e-05, "loss": 1.097, "step": 9501 }, { "epoch": 0.37178182956412864, "grad_norm": 0.0, "learning_rate": 1.447412451486468e-05, "loss": 0.9954, "step": 9502 }, { "epoch": 0.3718209562563581, "grad_norm": 0.0, "learning_rate": 1.4472991159011329e-05, "loss": 1.1754, "step": 9503 }, { "epoch": 0.3718600829485875, "grad_norm": 0.0, "learning_rate": 1.447185773132734e-05, "loss": 1.028, "step": 9504 }, { "epoch": 0.37189920964081696, "grad_norm": 0.0, "learning_rate": 1.4470724231830926e-05, "loss": 1.0907, "step": 9505 }, { "epoch": 0.3719383363330464, "grad_norm": 0.0, "learning_rate": 1.4469590660540277e-05, "loss": 1.1167, "step": 9506 }, { "epoch": 0.37197746302527585, "grad_norm": 0.0, "learning_rate": 1.4468457017473606e-05, "loss": 1.0585, "step": 9507 }, { "epoch": 0.3720165897175053, "grad_norm": 0.0, "learning_rate": 1.4467323302649115e-05, "loss": 1.148, "step": 9508 }, { "epoch": 0.3720557164097347, "grad_norm": 0.0, "learning_rate": 1.446618951608501e-05, "loss": 1.0995, "step": 9509 }, { "epoch": 0.37209484310196417, "grad_norm": 0.0, "learning_rate": 1.4465055657799493e-05, "loss": 1.1115, "step": 9510 }, { "epoch": 0.3721339697941936, "grad_norm": 0.0, "learning_rate": 1.4463921727810783e-05, "loss": 1.1001, "step": 9511 }, { "epoch": 0.37217309648642305, "grad_norm": 0.0, "learning_rate": 1.4462787726137083e-05, "loss": 1.1639, "step": 9512 }, { "epoch": 0.3722122231786525, "grad_norm": 0.0, "learning_rate": 1.44616536527966e-05, "loss": 1.1857, "step": 9513 }, { "epoch": 0.37225134987088193, "grad_norm": 0.0, "learning_rate": 1.4460519507807553e-05, "loss": 1.2017, "step": 9514 }, { "epoch": 0.37229047656311137, "grad_norm": 0.0, "learning_rate": 1.445938529118815e-05, "loss": 1.1929, "step": 9515 }, { "epoch": 0.3723296032553408, "grad_norm": 0.0, "learning_rate": 1.4458251002956612e-05, "loss": 1.0864, "step": 9516 }, { "epoch": 0.37236872994757025, "grad_norm": 0.0, "learning_rate": 1.4457116643131144e-05, "loss": 1.1258, "step": 9517 }, { "epoch": 0.3724078566397997, "grad_norm": 0.0, "learning_rate": 1.445598221172997e-05, "loss": 1.1907, "step": 9518 }, { "epoch": 0.37244698333202914, "grad_norm": 0.0, "learning_rate": 1.4454847708771305e-05, "loss": 0.9881, "step": 9519 }, { "epoch": 0.3724861100242586, "grad_norm": 0.0, "learning_rate": 1.4453713134273368e-05, "loss": 1.1394, "step": 9520 }, { "epoch": 0.37252523671648796, "grad_norm": 0.0, "learning_rate": 1.4452578488254381e-05, "loss": 1.0961, "step": 9521 }, { "epoch": 0.3725643634087174, "grad_norm": 0.0, "learning_rate": 1.4451443770732561e-05, "loss": 1.2518, "step": 9522 }, { "epoch": 0.37260349010094684, "grad_norm": 0.0, "learning_rate": 1.4450308981726134e-05, "loss": 1.2009, "step": 9523 }, { "epoch": 0.3726426167931763, "grad_norm": 0.0, "learning_rate": 1.4449174121253315e-05, "loss": 0.9607, "step": 9524 }, { "epoch": 0.3726817434854057, "grad_norm": 0.0, "learning_rate": 1.4448039189332341e-05, "loss": 1.2163, "step": 9525 }, { "epoch": 0.37272087017763517, "grad_norm": 0.0, "learning_rate": 1.4446904185981432e-05, "loss": 1.0573, "step": 9526 }, { "epoch": 0.3727599968698646, "grad_norm": 0.0, "learning_rate": 1.444576911121881e-05, "loss": 1.1014, "step": 9527 }, { "epoch": 0.37279912356209405, "grad_norm": 0.0, "learning_rate": 1.4444633965062708e-05, "loss": 1.1663, "step": 9528 }, { "epoch": 0.3728382502543235, "grad_norm": 0.0, "learning_rate": 1.4443498747531358e-05, "loss": 1.0684, "step": 9529 }, { "epoch": 0.37287737694655293, "grad_norm": 0.0, "learning_rate": 1.444236345864298e-05, "loss": 1.0335, "step": 9530 }, { "epoch": 0.37291650363878237, "grad_norm": 0.0, "learning_rate": 1.4441228098415816e-05, "loss": 1.0347, "step": 9531 }, { "epoch": 0.3729556303310118, "grad_norm": 0.0, "learning_rate": 1.4440092666868091e-05, "loss": 1.08, "step": 9532 }, { "epoch": 0.37299475702324125, "grad_norm": 0.0, "learning_rate": 1.4438957164018041e-05, "loss": 1.1662, "step": 9533 }, { "epoch": 0.3730338837154707, "grad_norm": 0.0, "learning_rate": 1.4437821589883905e-05, "loss": 1.1234, "step": 9534 }, { "epoch": 0.37307301040770013, "grad_norm": 0.0, "learning_rate": 1.4436685944483912e-05, "loss": 1.0784, "step": 9535 }, { "epoch": 0.3731121370999296, "grad_norm": 0.0, "learning_rate": 1.4435550227836304e-05, "loss": 1.0064, "step": 9536 }, { "epoch": 0.373151263792159, "grad_norm": 0.0, "learning_rate": 1.4434414439959315e-05, "loss": 1.0443, "step": 9537 }, { "epoch": 0.37319039048438846, "grad_norm": 0.0, "learning_rate": 1.4433278580871188e-05, "loss": 1.1093, "step": 9538 }, { "epoch": 0.3732295171766179, "grad_norm": 0.0, "learning_rate": 1.4432142650590162e-05, "loss": 1.0176, "step": 9539 }, { "epoch": 0.37326864386884734, "grad_norm": 0.0, "learning_rate": 1.443100664913448e-05, "loss": 1.0339, "step": 9540 }, { "epoch": 0.3733077705610768, "grad_norm": 0.0, "learning_rate": 1.4429870576522384e-05, "loss": 1.0093, "step": 9541 }, { "epoch": 0.3733468972533062, "grad_norm": 0.0, "learning_rate": 1.4428734432772115e-05, "loss": 1.1757, "step": 9542 }, { "epoch": 0.37338602394553566, "grad_norm": 0.0, "learning_rate": 1.4427598217901921e-05, "loss": 1.0887, "step": 9543 }, { "epoch": 0.3734251506377651, "grad_norm": 0.0, "learning_rate": 1.442646193193005e-05, "loss": 1.0834, "step": 9544 }, { "epoch": 0.37346427732999454, "grad_norm": 0.0, "learning_rate": 1.4425325574874745e-05, "loss": 1.2091, "step": 9545 }, { "epoch": 0.373503404022224, "grad_norm": 0.0, "learning_rate": 1.4424189146754257e-05, "loss": 1.1165, "step": 9546 }, { "epoch": 0.3735425307144534, "grad_norm": 0.0, "learning_rate": 1.4423052647586835e-05, "loss": 1.0092, "step": 9547 }, { "epoch": 0.37358165740668287, "grad_norm": 0.0, "learning_rate": 1.442191607739073e-05, "loss": 1.0902, "step": 9548 }, { "epoch": 0.37362078409891225, "grad_norm": 0.0, "learning_rate": 1.4420779436184194e-05, "loss": 1.0682, "step": 9549 }, { "epoch": 0.3736599107911417, "grad_norm": 0.0, "learning_rate": 1.441964272398548e-05, "loss": 1.0597, "step": 9550 }, { "epoch": 0.37369903748337113, "grad_norm": 0.0, "learning_rate": 1.4418505940812842e-05, "loss": 0.9791, "step": 9551 }, { "epoch": 0.3737381641756006, "grad_norm": 0.0, "learning_rate": 1.4417369086684536e-05, "loss": 1.0695, "step": 9552 }, { "epoch": 0.37377729086783, "grad_norm": 0.0, "learning_rate": 1.4416232161618815e-05, "loss": 1.1989, "step": 9553 }, { "epoch": 0.37381641756005946, "grad_norm": 0.0, "learning_rate": 1.4415095165633942e-05, "loss": 0.9195, "step": 9554 }, { "epoch": 0.3738555442522889, "grad_norm": 0.0, "learning_rate": 1.4413958098748171e-05, "loss": 1.0014, "step": 9555 }, { "epoch": 0.37389467094451834, "grad_norm": 0.0, "learning_rate": 1.4412820960979765e-05, "loss": 1.1595, "step": 9556 }, { "epoch": 0.3739337976367478, "grad_norm": 0.0, "learning_rate": 1.4411683752346983e-05, "loss": 1.0753, "step": 9557 }, { "epoch": 0.3739729243289772, "grad_norm": 0.0, "learning_rate": 1.441054647286809e-05, "loss": 1.2131, "step": 9558 }, { "epoch": 0.37401205102120666, "grad_norm": 0.0, "learning_rate": 1.4409409122561347e-05, "loss": 1.0018, "step": 9559 }, { "epoch": 0.3740511777134361, "grad_norm": 0.0, "learning_rate": 1.4408271701445019e-05, "loss": 1.0177, "step": 9560 }, { "epoch": 0.37409030440566554, "grad_norm": 0.0, "learning_rate": 1.440713420953737e-05, "loss": 1.082, "step": 9561 }, { "epoch": 0.374129431097895, "grad_norm": 0.0, "learning_rate": 1.4405996646856668e-05, "loss": 1.0385, "step": 9562 }, { "epoch": 0.3741685577901244, "grad_norm": 0.0, "learning_rate": 1.4404859013421182e-05, "loss": 1.1356, "step": 9563 }, { "epoch": 0.37420768448235386, "grad_norm": 0.0, "learning_rate": 1.4403721309249178e-05, "loss": 1.1844, "step": 9564 }, { "epoch": 0.3742468111745833, "grad_norm": 0.0, "learning_rate": 1.4402583534358929e-05, "loss": 1.0144, "step": 9565 }, { "epoch": 0.37428593786681275, "grad_norm": 0.0, "learning_rate": 1.4401445688768704e-05, "loss": 1.1114, "step": 9566 }, { "epoch": 0.3743250645590422, "grad_norm": 0.0, "learning_rate": 1.440030777249678e-05, "loss": 1.1821, "step": 9567 }, { "epoch": 0.3743641912512716, "grad_norm": 0.0, "learning_rate": 1.4399169785561426e-05, "loss": 1.1467, "step": 9568 }, { "epoch": 0.37440331794350107, "grad_norm": 0.0, "learning_rate": 1.4398031727980915e-05, "loss": 1.0685, "step": 9569 }, { "epoch": 0.3744424446357305, "grad_norm": 0.0, "learning_rate": 1.4396893599773529e-05, "loss": 1.0348, "step": 9570 }, { "epoch": 0.37448157132795995, "grad_norm": 0.0, "learning_rate": 1.439575540095754e-05, "loss": 1.0909, "step": 9571 }, { "epoch": 0.3745206980201894, "grad_norm": 0.0, "learning_rate": 1.4394617131551228e-05, "loss": 1.0156, "step": 9572 }, { "epoch": 0.37455982471241883, "grad_norm": 0.0, "learning_rate": 1.439347879157287e-05, "loss": 1.1033, "step": 9573 }, { "epoch": 0.3745989514046483, "grad_norm": 0.0, "learning_rate": 1.439234038104075e-05, "loss": 1.1412, "step": 9574 }, { "epoch": 0.3746380780968777, "grad_norm": 0.0, "learning_rate": 1.4391201899973143e-05, "loss": 1.0055, "step": 9575 }, { "epoch": 0.37467720478910715, "grad_norm": 0.0, "learning_rate": 1.4390063348388341e-05, "loss": 1.0281, "step": 9576 }, { "epoch": 0.3747163314813366, "grad_norm": 0.0, "learning_rate": 1.438892472630462e-05, "loss": 1.0785, "step": 9577 }, { "epoch": 0.374755458173566, "grad_norm": 0.0, "learning_rate": 1.438778603374027e-05, "loss": 0.9688, "step": 9578 }, { "epoch": 0.3747945848657954, "grad_norm": 0.0, "learning_rate": 1.4386647270713572e-05, "loss": 1.0567, "step": 9579 }, { "epoch": 0.37483371155802486, "grad_norm": 0.0, "learning_rate": 1.4385508437242817e-05, "loss": 0.9404, "step": 9580 }, { "epoch": 0.3748728382502543, "grad_norm": 0.0, "learning_rate": 1.4384369533346292e-05, "loss": 1.1489, "step": 9581 }, { "epoch": 0.37491196494248374, "grad_norm": 0.0, "learning_rate": 1.4383230559042282e-05, "loss": 1.0566, "step": 9582 }, { "epoch": 0.3749510916347132, "grad_norm": 0.0, "learning_rate": 1.4382091514349088e-05, "loss": 1.3304, "step": 9583 }, { "epoch": 0.3749902183269426, "grad_norm": 0.0, "learning_rate": 1.4380952399284991e-05, "loss": 1.049, "step": 9584 }, { "epoch": 0.37502934501917207, "grad_norm": 0.0, "learning_rate": 1.437981321386829e-05, "loss": 1.0704, "step": 9585 }, { "epoch": 0.3750684717114015, "grad_norm": 0.0, "learning_rate": 1.4378673958117276e-05, "loss": 1.1349, "step": 9586 }, { "epoch": 0.37510759840363095, "grad_norm": 0.0, "learning_rate": 1.4377534632050245e-05, "loss": 1.1863, "step": 9587 }, { "epoch": 0.3751467250958604, "grad_norm": 0.0, "learning_rate": 1.4376395235685494e-05, "loss": 1.1191, "step": 9588 }, { "epoch": 0.37518585178808983, "grad_norm": 0.0, "learning_rate": 1.4375255769041321e-05, "loss": 1.1815, "step": 9589 }, { "epoch": 0.37522497848031927, "grad_norm": 0.0, "learning_rate": 1.4374116232136022e-05, "loss": 1.165, "step": 9590 }, { "epoch": 0.3752641051725487, "grad_norm": 0.0, "learning_rate": 1.4372976624987894e-05, "loss": 1.0471, "step": 9591 }, { "epoch": 0.37530323186477815, "grad_norm": 0.0, "learning_rate": 1.4371836947615245e-05, "loss": 1.0408, "step": 9592 }, { "epoch": 0.3753423585570076, "grad_norm": 0.0, "learning_rate": 1.437069720003637e-05, "loss": 1.163, "step": 9593 }, { "epoch": 0.37538148524923703, "grad_norm": 0.0, "learning_rate": 1.4369557382269577e-05, "loss": 0.9934, "step": 9594 }, { "epoch": 0.3754206119414665, "grad_norm": 0.0, "learning_rate": 1.4368417494333167e-05, "loss": 0.9703, "step": 9595 }, { "epoch": 0.3754597386336959, "grad_norm": 0.0, "learning_rate": 1.4367277536245445e-05, "loss": 1.1379, "step": 9596 }, { "epoch": 0.37549886532592536, "grad_norm": 0.0, "learning_rate": 1.4366137508024718e-05, "loss": 1.0687, "step": 9597 }, { "epoch": 0.3755379920181548, "grad_norm": 0.0, "learning_rate": 1.4364997409689297e-05, "loss": 1.0155, "step": 9598 }, { "epoch": 0.37557711871038424, "grad_norm": 0.0, "learning_rate": 1.4363857241257484e-05, "loss": 1.1601, "step": 9599 }, { "epoch": 0.3756162454026137, "grad_norm": 0.0, "learning_rate": 1.4362717002747592e-05, "loss": 1.1672, "step": 9600 }, { "epoch": 0.3756553720948431, "grad_norm": 0.0, "learning_rate": 1.4361576694177935e-05, "loss": 1.1351, "step": 9601 }, { "epoch": 0.37569449878707256, "grad_norm": 0.0, "learning_rate": 1.436043631556682e-05, "loss": 1.0595, "step": 9602 }, { "epoch": 0.375733625479302, "grad_norm": 0.0, "learning_rate": 1.4359295866932561e-05, "loss": 1.2867, "step": 9603 }, { "epoch": 0.37577275217153144, "grad_norm": 0.0, "learning_rate": 1.435815534829347e-05, "loss": 1.0654, "step": 9604 }, { "epoch": 0.3758118788637609, "grad_norm": 0.0, "learning_rate": 1.4357014759667868e-05, "loss": 1.1234, "step": 9605 }, { "epoch": 0.37585100555599027, "grad_norm": 0.0, "learning_rate": 1.435587410107407e-05, "loss": 1.0688, "step": 9606 }, { "epoch": 0.3758901322482197, "grad_norm": 0.0, "learning_rate": 1.4354733372530391e-05, "loss": 1.0068, "step": 9607 }, { "epoch": 0.37592925894044915, "grad_norm": 0.0, "learning_rate": 1.4353592574055152e-05, "loss": 1.1057, "step": 9608 }, { "epoch": 0.3759683856326786, "grad_norm": 0.0, "learning_rate": 1.4352451705666668e-05, "loss": 1.147, "step": 9609 }, { "epoch": 0.37600751232490803, "grad_norm": 0.0, "learning_rate": 1.4351310767383268e-05, "loss": 1.124, "step": 9610 }, { "epoch": 0.3760466390171375, "grad_norm": 0.0, "learning_rate": 1.4350169759223266e-05, "loss": 1.1058, "step": 9611 }, { "epoch": 0.3760857657093669, "grad_norm": 0.0, "learning_rate": 1.434902868120499e-05, "loss": 1.1073, "step": 9612 }, { "epoch": 0.37612489240159636, "grad_norm": 0.0, "learning_rate": 1.4347887533346765e-05, "loss": 1.1214, "step": 9613 }, { "epoch": 0.3761640190938258, "grad_norm": 0.0, "learning_rate": 1.4346746315666913e-05, "loss": 1.0589, "step": 9614 }, { "epoch": 0.37620314578605524, "grad_norm": 0.0, "learning_rate": 1.4345605028183762e-05, "loss": 1.0721, "step": 9615 }, { "epoch": 0.3762422724782847, "grad_norm": 0.0, "learning_rate": 1.4344463670915638e-05, "loss": 1.0524, "step": 9616 }, { "epoch": 0.3762813991705141, "grad_norm": 0.0, "learning_rate": 1.4343322243880873e-05, "loss": 1.1992, "step": 9617 }, { "epoch": 0.37632052586274356, "grad_norm": 0.0, "learning_rate": 1.4342180747097796e-05, "loss": 1.0685, "step": 9618 }, { "epoch": 0.376359652554973, "grad_norm": 0.0, "learning_rate": 1.4341039180584736e-05, "loss": 1.0445, "step": 9619 }, { "epoch": 0.37639877924720244, "grad_norm": 0.0, "learning_rate": 1.4339897544360026e-05, "loss": 1.0479, "step": 9620 }, { "epoch": 0.3764379059394319, "grad_norm": 0.0, "learning_rate": 1.4338755838442003e-05, "loss": 1.2613, "step": 9621 }, { "epoch": 0.3764770326316613, "grad_norm": 0.0, "learning_rate": 1.4337614062848992e-05, "loss": 0.9825, "step": 9622 }, { "epoch": 0.37651615932389076, "grad_norm": 0.0, "learning_rate": 1.4336472217599338e-05, "loss": 1.1085, "step": 9623 }, { "epoch": 0.3765552860161202, "grad_norm": 0.0, "learning_rate": 1.4335330302711373e-05, "loss": 1.1999, "step": 9624 }, { "epoch": 0.37659441270834965, "grad_norm": 0.0, "learning_rate": 1.4334188318203437e-05, "loss": 1.0257, "step": 9625 }, { "epoch": 0.3766335394005791, "grad_norm": 0.0, "learning_rate": 1.4333046264093866e-05, "loss": 1.0997, "step": 9626 }, { "epoch": 0.37667266609280853, "grad_norm": 0.0, "learning_rate": 1.4331904140401003e-05, "loss": 0.9966, "step": 9627 }, { "epoch": 0.37671179278503797, "grad_norm": 0.0, "learning_rate": 1.4330761947143189e-05, "loss": 1.0893, "step": 9628 }, { "epoch": 0.3767509194772674, "grad_norm": 0.0, "learning_rate": 1.4329619684338763e-05, "loss": 1.0882, "step": 9629 }, { "epoch": 0.37679004616949685, "grad_norm": 0.0, "learning_rate": 1.432847735200607e-05, "loss": 1.0922, "step": 9630 }, { "epoch": 0.3768291728617263, "grad_norm": 0.0, "learning_rate": 1.4327334950163454e-05, "loss": 1.1407, "step": 9631 }, { "epoch": 0.37686829955395573, "grad_norm": 0.0, "learning_rate": 1.4326192478829263e-05, "loss": 1.0641, "step": 9632 }, { "epoch": 0.3769074262461852, "grad_norm": 0.0, "learning_rate": 1.432504993802184e-05, "loss": 0.9848, "step": 9633 }, { "epoch": 0.3769465529384146, "grad_norm": 0.0, "learning_rate": 1.4323907327759537e-05, "loss": 1.0773, "step": 9634 }, { "epoch": 0.376985679630644, "grad_norm": 0.0, "learning_rate": 1.43227646480607e-05, "loss": 1.1721, "step": 9635 }, { "epoch": 0.37702480632287344, "grad_norm": 0.0, "learning_rate": 1.4321621898943678e-05, "loss": 1.1313, "step": 9636 }, { "epoch": 0.3770639330151029, "grad_norm": 0.0, "learning_rate": 1.4320479080426824e-05, "loss": 1.0496, "step": 9637 }, { "epoch": 0.3771030597073323, "grad_norm": 0.0, "learning_rate": 1.431933619252849e-05, "loss": 1.0008, "step": 9638 }, { "epoch": 0.37714218639956176, "grad_norm": 0.0, "learning_rate": 1.431819323526703e-05, "loss": 1.1944, "step": 9639 }, { "epoch": 0.3771813130917912, "grad_norm": 0.0, "learning_rate": 1.4317050208660797e-05, "loss": 1.1935, "step": 9640 }, { "epoch": 0.37722043978402064, "grad_norm": 0.0, "learning_rate": 1.4315907112728148e-05, "loss": 1.0938, "step": 9641 }, { "epoch": 0.3772595664762501, "grad_norm": 0.0, "learning_rate": 1.4314763947487436e-05, "loss": 1.0566, "step": 9642 }, { "epoch": 0.3772986931684795, "grad_norm": 0.0, "learning_rate": 1.4313620712957023e-05, "loss": 1.0213, "step": 9643 }, { "epoch": 0.37733781986070897, "grad_norm": 0.0, "learning_rate": 1.4312477409155268e-05, "loss": 1.1018, "step": 9644 }, { "epoch": 0.3773769465529384, "grad_norm": 0.0, "learning_rate": 1.4311334036100528e-05, "loss": 1.2009, "step": 9645 }, { "epoch": 0.37741607324516785, "grad_norm": 0.0, "learning_rate": 1.4310190593811167e-05, "loss": 1.1216, "step": 9646 }, { "epoch": 0.3774551999373973, "grad_norm": 0.0, "learning_rate": 1.4309047082305548e-05, "loss": 1.0915, "step": 9647 }, { "epoch": 0.37749432662962673, "grad_norm": 0.0, "learning_rate": 1.430790350160203e-05, "loss": 1.1526, "step": 9648 }, { "epoch": 0.37753345332185617, "grad_norm": 0.0, "learning_rate": 1.4306759851718979e-05, "loss": 1.0178, "step": 9649 }, { "epoch": 0.3775725800140856, "grad_norm": 0.0, "learning_rate": 1.4305616132674763e-05, "loss": 1.0317, "step": 9650 }, { "epoch": 0.37761170670631505, "grad_norm": 0.0, "learning_rate": 1.4304472344487746e-05, "loss": 1.1381, "step": 9651 }, { "epoch": 0.3776508333985445, "grad_norm": 0.0, "learning_rate": 1.4303328487176298e-05, "loss": 1.1236, "step": 9652 }, { "epoch": 0.37768996009077394, "grad_norm": 0.0, "learning_rate": 1.4302184560758786e-05, "loss": 1.0517, "step": 9653 }, { "epoch": 0.3777290867830034, "grad_norm": 0.0, "learning_rate": 1.4301040565253582e-05, "loss": 1.1561, "step": 9654 }, { "epoch": 0.3777682134752328, "grad_norm": 0.0, "learning_rate": 1.4299896500679055e-05, "loss": 1.0934, "step": 9655 }, { "epoch": 0.37780734016746226, "grad_norm": 0.0, "learning_rate": 1.4298752367053577e-05, "loss": 1.1212, "step": 9656 }, { "epoch": 0.3778464668596917, "grad_norm": 0.0, "learning_rate": 1.4297608164395524e-05, "loss": 1.0045, "step": 9657 }, { "epoch": 0.37788559355192114, "grad_norm": 0.0, "learning_rate": 1.429646389272327e-05, "loss": 1.0732, "step": 9658 }, { "epoch": 0.3779247202441506, "grad_norm": 0.0, "learning_rate": 1.4295319552055191e-05, "loss": 1.0474, "step": 9659 }, { "epoch": 0.37796384693638, "grad_norm": 0.0, "learning_rate": 1.4294175142409662e-05, "loss": 0.9644, "step": 9660 }, { "epoch": 0.37800297362860946, "grad_norm": 0.0, "learning_rate": 1.4293030663805056e-05, "loss": 1.0961, "step": 9661 }, { "epoch": 0.3780421003208389, "grad_norm": 0.0, "learning_rate": 1.429188611625976e-05, "loss": 1.1285, "step": 9662 }, { "epoch": 0.3780812270130683, "grad_norm": 0.0, "learning_rate": 1.4290741499792154e-05, "loss": 1.036, "step": 9663 }, { "epoch": 0.37812035370529773, "grad_norm": 0.0, "learning_rate": 1.4289596814420612e-05, "loss": 1.0571, "step": 9664 }, { "epoch": 0.37815948039752717, "grad_norm": 0.0, "learning_rate": 1.4288452060163525e-05, "loss": 1.0715, "step": 9665 }, { "epoch": 0.3781986070897566, "grad_norm": 0.0, "learning_rate": 1.4287307237039267e-05, "loss": 1.0049, "step": 9666 }, { "epoch": 0.37823773378198605, "grad_norm": 0.0, "learning_rate": 1.4286162345066228e-05, "loss": 0.9075, "step": 9667 }, { "epoch": 0.3782768604742155, "grad_norm": 0.0, "learning_rate": 1.4285017384262794e-05, "loss": 1.1401, "step": 9668 }, { "epoch": 0.37831598716644493, "grad_norm": 0.0, "learning_rate": 1.4283872354647348e-05, "loss": 1.1079, "step": 9669 }, { "epoch": 0.3783551138586744, "grad_norm": 0.0, "learning_rate": 1.4282727256238282e-05, "loss": 1.077, "step": 9670 }, { "epoch": 0.3783942405509038, "grad_norm": 0.0, "learning_rate": 1.4281582089053983e-05, "loss": 1.0042, "step": 9671 }, { "epoch": 0.37843336724313326, "grad_norm": 0.0, "learning_rate": 1.428043685311284e-05, "loss": 1.1382, "step": 9672 }, { "epoch": 0.3784724939353627, "grad_norm": 0.0, "learning_rate": 1.4279291548433243e-05, "loss": 1.0806, "step": 9673 }, { "epoch": 0.37851162062759214, "grad_norm": 0.0, "learning_rate": 1.4278146175033589e-05, "loss": 1.0585, "step": 9674 }, { "epoch": 0.3785507473198216, "grad_norm": 0.0, "learning_rate": 1.4277000732932267e-05, "loss": 1.2059, "step": 9675 }, { "epoch": 0.378589874012051, "grad_norm": 0.0, "learning_rate": 1.4275855222147672e-05, "loss": 1.1071, "step": 9676 }, { "epoch": 0.37862900070428046, "grad_norm": 0.0, "learning_rate": 1.42747096426982e-05, "loss": 1.162, "step": 9677 }, { "epoch": 0.3786681273965099, "grad_norm": 0.0, "learning_rate": 1.427356399460225e-05, "loss": 1.14, "step": 9678 }, { "epoch": 0.37870725408873934, "grad_norm": 0.0, "learning_rate": 1.4272418277878217e-05, "loss": 1.0744, "step": 9679 }, { "epoch": 0.3787463807809688, "grad_norm": 0.0, "learning_rate": 1.4271272492544497e-05, "loss": 1.0111, "step": 9680 }, { "epoch": 0.3787855074731982, "grad_norm": 0.0, "learning_rate": 1.4270126638619495e-05, "loss": 1.0883, "step": 9681 }, { "epoch": 0.37882463416542766, "grad_norm": 0.0, "learning_rate": 1.426898071612161e-05, "loss": 1.0668, "step": 9682 }, { "epoch": 0.3788637608576571, "grad_norm": 0.0, "learning_rate": 1.4267834725069245e-05, "loss": 1.0536, "step": 9683 }, { "epoch": 0.37890288754988655, "grad_norm": 0.0, "learning_rate": 1.4266688665480799e-05, "loss": 1.0527, "step": 9684 }, { "epoch": 0.378942014242116, "grad_norm": 0.0, "learning_rate": 1.4265542537374684e-05, "loss": 1.0229, "step": 9685 }, { "epoch": 0.37898114093434543, "grad_norm": 0.0, "learning_rate": 1.4264396340769297e-05, "loss": 1.0893, "step": 9686 }, { "epoch": 0.37902026762657487, "grad_norm": 0.0, "learning_rate": 1.426325007568305e-05, "loss": 1.1414, "step": 9687 }, { "epoch": 0.3790593943188043, "grad_norm": 0.0, "learning_rate": 1.4262103742134347e-05, "loss": 1.0923, "step": 9688 }, { "epoch": 0.37909852101103375, "grad_norm": 0.0, "learning_rate": 1.4260957340141601e-05, "loss": 1.0801, "step": 9689 }, { "epoch": 0.3791376477032632, "grad_norm": 0.0, "learning_rate": 1.4259810869723221e-05, "loss": 1.0457, "step": 9690 }, { "epoch": 0.3791767743954926, "grad_norm": 0.0, "learning_rate": 1.4258664330897611e-05, "loss": 1.0404, "step": 9691 }, { "epoch": 0.379215901087722, "grad_norm": 0.0, "learning_rate": 1.4257517723683192e-05, "loss": 1.0352, "step": 9692 }, { "epoch": 0.37925502777995146, "grad_norm": 0.0, "learning_rate": 1.4256371048098371e-05, "loss": 1.0572, "step": 9693 }, { "epoch": 0.3792941544721809, "grad_norm": 0.0, "learning_rate": 1.4255224304161569e-05, "loss": 1.1703, "step": 9694 }, { "epoch": 0.37933328116441034, "grad_norm": 0.0, "learning_rate": 1.4254077491891194e-05, "loss": 1.0974, "step": 9695 }, { "epoch": 0.3793724078566398, "grad_norm": 0.0, "learning_rate": 1.4252930611305664e-05, "loss": 1.109, "step": 9696 }, { "epoch": 0.3794115345488692, "grad_norm": 0.0, "learning_rate": 1.42517836624234e-05, "loss": 1.0339, "step": 9697 }, { "epoch": 0.37945066124109866, "grad_norm": 0.0, "learning_rate": 1.4250636645262813e-05, "loss": 1.1543, "step": 9698 }, { "epoch": 0.3794897879333281, "grad_norm": 0.0, "learning_rate": 1.4249489559842333e-05, "loss": 1.1636, "step": 9699 }, { "epoch": 0.37952891462555755, "grad_norm": 0.0, "learning_rate": 1.4248342406180373e-05, "loss": 1.1055, "step": 9700 }, { "epoch": 0.379568041317787, "grad_norm": 0.0, "learning_rate": 1.424719518429536e-05, "loss": 1.0814, "step": 9701 }, { "epoch": 0.3796071680100164, "grad_norm": 0.0, "learning_rate": 1.4246047894205714e-05, "loss": 1.0922, "step": 9702 }, { "epoch": 0.37964629470224587, "grad_norm": 0.0, "learning_rate": 1.4244900535929858e-05, "loss": 1.1068, "step": 9703 }, { "epoch": 0.3796854213944753, "grad_norm": 0.0, "learning_rate": 1.4243753109486217e-05, "loss": 1.09, "step": 9704 }, { "epoch": 0.37972454808670475, "grad_norm": 0.0, "learning_rate": 1.4242605614893224e-05, "loss": 1.1942, "step": 9705 }, { "epoch": 0.3797636747789342, "grad_norm": 0.0, "learning_rate": 1.4241458052169295e-05, "loss": 0.994, "step": 9706 }, { "epoch": 0.37980280147116363, "grad_norm": 0.0, "learning_rate": 1.424031042133287e-05, "loss": 0.9564, "step": 9707 }, { "epoch": 0.37984192816339307, "grad_norm": 0.0, "learning_rate": 1.4239162722402371e-05, "loss": 1.0644, "step": 9708 }, { "epoch": 0.3798810548556225, "grad_norm": 0.0, "learning_rate": 1.4238014955396228e-05, "loss": 1.093, "step": 9709 }, { "epoch": 0.37992018154785195, "grad_norm": 0.0, "learning_rate": 1.4236867120332877e-05, "loss": 1.1369, "step": 9710 }, { "epoch": 0.3799593082400814, "grad_norm": 0.0, "learning_rate": 1.4235719217230751e-05, "loss": 1.1569, "step": 9711 }, { "epoch": 0.37999843493231084, "grad_norm": 0.0, "learning_rate": 1.4234571246108279e-05, "loss": 1.0498, "step": 9712 }, { "epoch": 0.3800375616245403, "grad_norm": 0.0, "learning_rate": 1.4233423206983901e-05, "loss": 0.9964, "step": 9713 }, { "epoch": 0.3800766883167697, "grad_norm": 0.0, "learning_rate": 1.423227509987605e-05, "loss": 1.1979, "step": 9714 }, { "epoch": 0.38011581500899916, "grad_norm": 0.0, "learning_rate": 1.4231126924803167e-05, "loss": 1.2448, "step": 9715 }, { "epoch": 0.3801549417012286, "grad_norm": 0.0, "learning_rate": 1.4229978681783684e-05, "loss": 0.9779, "step": 9716 }, { "epoch": 0.38019406839345804, "grad_norm": 0.0, "learning_rate": 1.4228830370836046e-05, "loss": 0.9355, "step": 9717 }, { "epoch": 0.3802331950856875, "grad_norm": 0.0, "learning_rate": 1.422768199197869e-05, "loss": 1.1477, "step": 9718 }, { "epoch": 0.3802723217779169, "grad_norm": 0.0, "learning_rate": 1.4226533545230058e-05, "loss": 1.0366, "step": 9719 }, { "epoch": 0.3803114484701463, "grad_norm": 0.0, "learning_rate": 1.4225385030608594e-05, "loss": 1.1385, "step": 9720 }, { "epoch": 0.38035057516237575, "grad_norm": 0.0, "learning_rate": 1.4224236448132742e-05, "loss": 1.1233, "step": 9721 }, { "epoch": 0.3803897018546052, "grad_norm": 0.0, "learning_rate": 1.4223087797820945e-05, "loss": 1.084, "step": 9722 }, { "epoch": 0.38042882854683463, "grad_norm": 0.0, "learning_rate": 1.4221939079691652e-05, "loss": 1.0852, "step": 9723 }, { "epoch": 0.38046795523906407, "grad_norm": 0.0, "learning_rate": 1.4220790293763307e-05, "loss": 1.1036, "step": 9724 }, { "epoch": 0.3805070819312935, "grad_norm": 0.0, "learning_rate": 1.4219641440054357e-05, "loss": 1.0778, "step": 9725 }, { "epoch": 0.38054620862352295, "grad_norm": 0.0, "learning_rate": 1.4218492518583253e-05, "loss": 1.0031, "step": 9726 }, { "epoch": 0.3805853353157524, "grad_norm": 0.0, "learning_rate": 1.4217343529368446e-05, "loss": 1.074, "step": 9727 }, { "epoch": 0.38062446200798183, "grad_norm": 0.0, "learning_rate": 1.421619447242839e-05, "loss": 0.9501, "step": 9728 }, { "epoch": 0.3806635887002113, "grad_norm": 0.0, "learning_rate": 1.4215045347781527e-05, "loss": 1.1797, "step": 9729 }, { "epoch": 0.3807027153924407, "grad_norm": 0.0, "learning_rate": 1.4213896155446323e-05, "loss": 0.9762, "step": 9730 }, { "epoch": 0.38074184208467016, "grad_norm": 0.0, "learning_rate": 1.4212746895441224e-05, "loss": 1.0888, "step": 9731 }, { "epoch": 0.3807809687768996, "grad_norm": 0.0, "learning_rate": 1.4211597567784692e-05, "loss": 1.0582, "step": 9732 }, { "epoch": 0.38082009546912904, "grad_norm": 0.0, "learning_rate": 1.4210448172495176e-05, "loss": 1.049, "step": 9733 }, { "epoch": 0.3808592221613585, "grad_norm": 0.0, "learning_rate": 1.420929870959114e-05, "loss": 1.1271, "step": 9734 }, { "epoch": 0.3808983488535879, "grad_norm": 0.0, "learning_rate": 1.4208149179091044e-05, "loss": 1.1225, "step": 9735 }, { "epoch": 0.38093747554581736, "grad_norm": 0.0, "learning_rate": 1.4206999581013343e-05, "loss": 1.1016, "step": 9736 }, { "epoch": 0.3809766022380468, "grad_norm": 0.0, "learning_rate": 1.4205849915376501e-05, "loss": 1.0797, "step": 9737 }, { "epoch": 0.38101572893027624, "grad_norm": 0.0, "learning_rate": 1.4204700182198979e-05, "loss": 1.1503, "step": 9738 }, { "epoch": 0.3810548556225057, "grad_norm": 0.0, "learning_rate": 1.420355038149924e-05, "loss": 1.1645, "step": 9739 }, { "epoch": 0.3810939823147351, "grad_norm": 0.0, "learning_rate": 1.420240051329575e-05, "loss": 1.0389, "step": 9740 }, { "epoch": 0.38113310900696457, "grad_norm": 0.0, "learning_rate": 1.4201250577606975e-05, "loss": 1.1527, "step": 9741 }, { "epoch": 0.381172235699194, "grad_norm": 0.0, "learning_rate": 1.4200100574451378e-05, "loss": 1.0098, "step": 9742 }, { "epoch": 0.38121136239142345, "grad_norm": 0.0, "learning_rate": 1.419895050384743e-05, "loss": 1.1302, "step": 9743 }, { "epoch": 0.3812504890836529, "grad_norm": 0.0, "learning_rate": 1.41978003658136e-05, "loss": 1.1375, "step": 9744 }, { "epoch": 0.38128961577588233, "grad_norm": 0.0, "learning_rate": 1.4196650160368356e-05, "loss": 1.1773, "step": 9745 }, { "epoch": 0.38132874246811177, "grad_norm": 0.0, "learning_rate": 1.4195499887530167e-05, "loss": 0.905, "step": 9746 }, { "epoch": 0.3813678691603412, "grad_norm": 0.0, "learning_rate": 1.419434954731751e-05, "loss": 1.0062, "step": 9747 }, { "epoch": 0.3814069958525706, "grad_norm": 0.0, "learning_rate": 1.419319913974885e-05, "loss": 1.1024, "step": 9748 }, { "epoch": 0.38144612254480004, "grad_norm": 0.0, "learning_rate": 1.4192048664842671e-05, "loss": 1.1124, "step": 9749 }, { "epoch": 0.3814852492370295, "grad_norm": 0.0, "learning_rate": 1.4190898122617443e-05, "loss": 1.0635, "step": 9750 }, { "epoch": 0.3815243759292589, "grad_norm": 0.0, "learning_rate": 1.4189747513091639e-05, "loss": 0.9336, "step": 9751 }, { "epoch": 0.38156350262148836, "grad_norm": 0.0, "learning_rate": 1.4188596836283744e-05, "loss": 1.1846, "step": 9752 }, { "epoch": 0.3816026293137178, "grad_norm": 0.0, "learning_rate": 1.4187446092212232e-05, "loss": 1.1071, "step": 9753 }, { "epoch": 0.38164175600594724, "grad_norm": 0.0, "learning_rate": 1.418629528089558e-05, "loss": 1.0838, "step": 9754 }, { "epoch": 0.3816808826981767, "grad_norm": 0.0, "learning_rate": 1.4185144402352274e-05, "loss": 1.1472, "step": 9755 }, { "epoch": 0.3817200093904061, "grad_norm": 0.0, "learning_rate": 1.418399345660079e-05, "loss": 1.1426, "step": 9756 }, { "epoch": 0.38175913608263556, "grad_norm": 0.0, "learning_rate": 1.4182842443659617e-05, "loss": 1.1157, "step": 9757 }, { "epoch": 0.381798262774865, "grad_norm": 0.0, "learning_rate": 1.4181691363547234e-05, "loss": 1.1904, "step": 9758 }, { "epoch": 0.38183738946709445, "grad_norm": 0.0, "learning_rate": 1.4180540216282128e-05, "loss": 1.1494, "step": 9759 }, { "epoch": 0.3818765161593239, "grad_norm": 0.0, "learning_rate": 1.4179389001882787e-05, "loss": 1.0515, "step": 9760 }, { "epoch": 0.3819156428515533, "grad_norm": 0.0, "learning_rate": 1.4178237720367693e-05, "loss": 1.0622, "step": 9761 }, { "epoch": 0.38195476954378277, "grad_norm": 0.0, "learning_rate": 1.4177086371755337e-05, "loss": 0.9653, "step": 9762 }, { "epoch": 0.3819938962360122, "grad_norm": 0.0, "learning_rate": 1.417593495606421e-05, "loss": 0.9879, "step": 9763 }, { "epoch": 0.38203302292824165, "grad_norm": 0.0, "learning_rate": 1.4174783473312799e-05, "loss": 0.9874, "step": 9764 }, { "epoch": 0.3820721496204711, "grad_norm": 0.0, "learning_rate": 1.4173631923519598e-05, "loss": 1.0972, "step": 9765 }, { "epoch": 0.38211127631270053, "grad_norm": 0.0, "learning_rate": 1.4172480306703096e-05, "loss": 1.0717, "step": 9766 }, { "epoch": 0.38215040300493, "grad_norm": 0.0, "learning_rate": 1.4171328622881788e-05, "loss": 1.2536, "step": 9767 }, { "epoch": 0.3821895296971594, "grad_norm": 0.0, "learning_rate": 1.4170176872074173e-05, "loss": 1.2181, "step": 9768 }, { "epoch": 0.38222865638938885, "grad_norm": 0.0, "learning_rate": 1.416902505429874e-05, "loss": 1.1731, "step": 9769 }, { "epoch": 0.3822677830816183, "grad_norm": 0.0, "learning_rate": 1.4167873169573988e-05, "loss": 1.0839, "step": 9770 }, { "epoch": 0.38230690977384774, "grad_norm": 0.0, "learning_rate": 1.4166721217918418e-05, "loss": 1.1349, "step": 9771 }, { "epoch": 0.3823460364660772, "grad_norm": 0.0, "learning_rate": 1.4165569199350526e-05, "loss": 1.1699, "step": 9772 }, { "epoch": 0.3823851631583066, "grad_norm": 0.0, "learning_rate": 1.4164417113888814e-05, "loss": 1.0652, "step": 9773 }, { "epoch": 0.38242428985053606, "grad_norm": 0.0, "learning_rate": 1.4163264961551777e-05, "loss": 1.1766, "step": 9774 }, { "epoch": 0.3824634165427655, "grad_norm": 0.0, "learning_rate": 1.4162112742357926e-05, "loss": 1.0258, "step": 9775 }, { "epoch": 0.38250254323499494, "grad_norm": 0.0, "learning_rate": 1.4160960456325757e-05, "loss": 1.0881, "step": 9776 }, { "epoch": 0.3825416699272243, "grad_norm": 0.0, "learning_rate": 1.4159808103473778e-05, "loss": 1.2261, "step": 9777 }, { "epoch": 0.38258079661945377, "grad_norm": 0.0, "learning_rate": 1.4158655683820492e-05, "loss": 1.2252, "step": 9778 }, { "epoch": 0.3826199233116832, "grad_norm": 0.0, "learning_rate": 1.4157503197384407e-05, "loss": 1.1897, "step": 9779 }, { "epoch": 0.38265905000391265, "grad_norm": 0.0, "learning_rate": 1.4156350644184032e-05, "loss": 1.0524, "step": 9780 }, { "epoch": 0.3826981766961421, "grad_norm": 0.0, "learning_rate": 1.4155198024237876e-05, "loss": 1.1423, "step": 9781 }, { "epoch": 0.38273730338837153, "grad_norm": 0.0, "learning_rate": 1.4154045337564442e-05, "loss": 1.0352, "step": 9782 }, { "epoch": 0.38277643008060097, "grad_norm": 0.0, "learning_rate": 1.4152892584182247e-05, "loss": 1.0192, "step": 9783 }, { "epoch": 0.3828155567728304, "grad_norm": 0.0, "learning_rate": 1.4151739764109803e-05, "loss": 1.0597, "step": 9784 }, { "epoch": 0.38285468346505985, "grad_norm": 0.0, "learning_rate": 1.4150586877365615e-05, "loss": 1.0846, "step": 9785 }, { "epoch": 0.3828938101572893, "grad_norm": 0.0, "learning_rate": 1.4149433923968207e-05, "loss": 1.0295, "step": 9786 }, { "epoch": 0.38293293684951873, "grad_norm": 0.0, "learning_rate": 1.4148280903936089e-05, "loss": 1.1862, "step": 9787 }, { "epoch": 0.3829720635417482, "grad_norm": 0.0, "learning_rate": 1.414712781728778e-05, "loss": 1.1247, "step": 9788 }, { "epoch": 0.3830111902339776, "grad_norm": 0.0, "learning_rate": 1.4145974664041793e-05, "loss": 1.1854, "step": 9789 }, { "epoch": 0.38305031692620706, "grad_norm": 0.0, "learning_rate": 1.4144821444216646e-05, "loss": 1.174, "step": 9790 }, { "epoch": 0.3830894436184365, "grad_norm": 0.0, "learning_rate": 1.4143668157830863e-05, "loss": 1.0612, "step": 9791 }, { "epoch": 0.38312857031066594, "grad_norm": 0.0, "learning_rate": 1.4142514804902962e-05, "loss": 1.1357, "step": 9792 }, { "epoch": 0.3831676970028954, "grad_norm": 0.0, "learning_rate": 1.4141361385451462e-05, "loss": 0.9746, "step": 9793 }, { "epoch": 0.3832068236951248, "grad_norm": 0.0, "learning_rate": 1.4140207899494888e-05, "loss": 1.1147, "step": 9794 }, { "epoch": 0.38324595038735426, "grad_norm": 0.0, "learning_rate": 1.4139054347051764e-05, "loss": 1.1187, "step": 9795 }, { "epoch": 0.3832850770795837, "grad_norm": 0.0, "learning_rate": 1.4137900728140615e-05, "loss": 1.0925, "step": 9796 }, { "epoch": 0.38332420377181314, "grad_norm": 0.0, "learning_rate": 1.4136747042779963e-05, "loss": 1.0997, "step": 9797 }, { "epoch": 0.3833633304640426, "grad_norm": 0.0, "learning_rate": 1.4135593290988338e-05, "loss": 1.0057, "step": 9798 }, { "epoch": 0.383402457156272, "grad_norm": 0.0, "learning_rate": 1.4134439472784267e-05, "loss": 1.1, "step": 9799 }, { "epoch": 0.38344158384850147, "grad_norm": 0.0, "learning_rate": 1.4133285588186278e-05, "loss": 1.1381, "step": 9800 }, { "epoch": 0.3834807105407309, "grad_norm": 0.0, "learning_rate": 1.4132131637212903e-05, "loss": 1.1118, "step": 9801 }, { "epoch": 0.38351983723296035, "grad_norm": 0.0, "learning_rate": 1.4130977619882673e-05, "loss": 1.1575, "step": 9802 }, { "epoch": 0.3835589639251898, "grad_norm": 0.0, "learning_rate": 1.4129823536214115e-05, "loss": 0.902, "step": 9803 }, { "epoch": 0.38359809061741923, "grad_norm": 0.0, "learning_rate": 1.4128669386225768e-05, "loss": 1.0353, "step": 9804 }, { "epoch": 0.3836372173096486, "grad_norm": 0.0, "learning_rate": 1.4127515169936164e-05, "loss": 1.0122, "step": 9805 }, { "epoch": 0.38367634400187806, "grad_norm": 0.0, "learning_rate": 1.412636088736384e-05, "loss": 1.0347, "step": 9806 }, { "epoch": 0.3837154706941075, "grad_norm": 0.0, "learning_rate": 1.4125206538527327e-05, "loss": 1.0602, "step": 9807 }, { "epoch": 0.38375459738633694, "grad_norm": 0.0, "learning_rate": 1.4124052123445168e-05, "loss": 1.0314, "step": 9808 }, { "epoch": 0.3837937240785664, "grad_norm": 0.0, "learning_rate": 1.4122897642135903e-05, "loss": 1.0673, "step": 9809 }, { "epoch": 0.3838328507707958, "grad_norm": 0.0, "learning_rate": 1.4121743094618063e-05, "loss": 1.0334, "step": 9810 }, { "epoch": 0.38387197746302526, "grad_norm": 0.0, "learning_rate": 1.4120588480910198e-05, "loss": 1.0989, "step": 9811 }, { "epoch": 0.3839111041552547, "grad_norm": 0.0, "learning_rate": 1.4119433801030841e-05, "loss": 1.0872, "step": 9812 }, { "epoch": 0.38395023084748414, "grad_norm": 0.0, "learning_rate": 1.4118279054998543e-05, "loss": 1.0579, "step": 9813 }, { "epoch": 0.3839893575397136, "grad_norm": 0.0, "learning_rate": 1.4117124242831842e-05, "loss": 1.0336, "step": 9814 }, { "epoch": 0.384028484231943, "grad_norm": 0.0, "learning_rate": 1.4115969364549288e-05, "loss": 1.2322, "step": 9815 }, { "epoch": 0.38406761092417246, "grad_norm": 0.0, "learning_rate": 1.4114814420169415e-05, "loss": 1.0392, "step": 9816 }, { "epoch": 0.3841067376164019, "grad_norm": 0.0, "learning_rate": 1.4113659409710787e-05, "loss": 1.0674, "step": 9817 }, { "epoch": 0.38414586430863135, "grad_norm": 0.0, "learning_rate": 1.4112504333191938e-05, "loss": 1.2293, "step": 9818 }, { "epoch": 0.3841849910008608, "grad_norm": 0.0, "learning_rate": 1.4111349190631426e-05, "loss": 1.1852, "step": 9819 }, { "epoch": 0.38422411769309023, "grad_norm": 0.0, "learning_rate": 1.41101939820478e-05, "loss": 1.0432, "step": 9820 }, { "epoch": 0.38426324438531967, "grad_norm": 0.0, "learning_rate": 1.4109038707459606e-05, "loss": 1.1603, "step": 9821 }, { "epoch": 0.3843023710775491, "grad_norm": 0.0, "learning_rate": 1.41078833668854e-05, "loss": 1.0638, "step": 9822 }, { "epoch": 0.38434149776977855, "grad_norm": 0.0, "learning_rate": 1.4106727960343733e-05, "loss": 1.0948, "step": 9823 }, { "epoch": 0.384380624462008, "grad_norm": 0.0, "learning_rate": 1.4105572487853164e-05, "loss": 0.9632, "step": 9824 }, { "epoch": 0.38441975115423743, "grad_norm": 0.0, "learning_rate": 1.410441694943224e-05, "loss": 1.1431, "step": 9825 }, { "epoch": 0.3844588778464669, "grad_norm": 0.0, "learning_rate": 1.4103261345099528e-05, "loss": 0.9859, "step": 9826 }, { "epoch": 0.3844980045386963, "grad_norm": 0.0, "learning_rate": 1.4102105674873579e-05, "loss": 1.0536, "step": 9827 }, { "epoch": 0.38453713123092575, "grad_norm": 0.0, "learning_rate": 1.4100949938772953e-05, "loss": 1.0915, "step": 9828 }, { "epoch": 0.3845762579231552, "grad_norm": 0.0, "learning_rate": 1.409979413681621e-05, "loss": 1.1338, "step": 9829 }, { "epoch": 0.38461538461538464, "grad_norm": 0.0, "learning_rate": 1.409863826902191e-05, "loss": 0.9989, "step": 9830 }, { "epoch": 0.3846545113076141, "grad_norm": 0.0, "learning_rate": 1.4097482335408617e-05, "loss": 1.0908, "step": 9831 }, { "epoch": 0.3846936379998435, "grad_norm": 0.0, "learning_rate": 1.4096326335994887e-05, "loss": 1.0459, "step": 9832 }, { "epoch": 0.38473276469207296, "grad_norm": 0.0, "learning_rate": 1.4095170270799296e-05, "loss": 0.9934, "step": 9833 }, { "epoch": 0.38477189138430234, "grad_norm": 0.0, "learning_rate": 1.4094014139840397e-05, "loss": 1.1689, "step": 9834 }, { "epoch": 0.3848110180765318, "grad_norm": 0.0, "learning_rate": 1.4092857943136764e-05, "loss": 1.1446, "step": 9835 }, { "epoch": 0.3848501447687612, "grad_norm": 0.0, "learning_rate": 1.409170168070696e-05, "loss": 1.1158, "step": 9836 }, { "epoch": 0.38488927146099067, "grad_norm": 0.0, "learning_rate": 1.4090545352569553e-05, "loss": 1.1852, "step": 9837 }, { "epoch": 0.3849283981532201, "grad_norm": 0.0, "learning_rate": 1.4089388958743115e-05, "loss": 1.2206, "step": 9838 }, { "epoch": 0.38496752484544955, "grad_norm": 0.0, "learning_rate": 1.4088232499246214e-05, "loss": 1.0732, "step": 9839 }, { "epoch": 0.385006651537679, "grad_norm": 0.0, "learning_rate": 1.4087075974097421e-05, "loss": 1.1094, "step": 9840 }, { "epoch": 0.38504577822990843, "grad_norm": 0.0, "learning_rate": 1.4085919383315311e-05, "loss": 1.1735, "step": 9841 }, { "epoch": 0.38508490492213787, "grad_norm": 0.0, "learning_rate": 1.4084762726918455e-05, "loss": 1.1308, "step": 9842 }, { "epoch": 0.3851240316143673, "grad_norm": 0.0, "learning_rate": 1.4083606004925427e-05, "loss": 1.0973, "step": 9843 }, { "epoch": 0.38516315830659675, "grad_norm": 0.0, "learning_rate": 1.4082449217354807e-05, "loss": 1.1551, "step": 9844 }, { "epoch": 0.3852022849988262, "grad_norm": 0.0, "learning_rate": 1.4081292364225163e-05, "loss": 1.0642, "step": 9845 }, { "epoch": 0.38524141169105564, "grad_norm": 0.0, "learning_rate": 1.4080135445555083e-05, "loss": 1.0237, "step": 9846 }, { "epoch": 0.3852805383832851, "grad_norm": 0.0, "learning_rate": 1.4078978461363136e-05, "loss": 1.1159, "step": 9847 }, { "epoch": 0.3853196650755145, "grad_norm": 0.0, "learning_rate": 1.407782141166791e-05, "loss": 1.0701, "step": 9848 }, { "epoch": 0.38535879176774396, "grad_norm": 0.0, "learning_rate": 1.4076664296487978e-05, "loss": 1.0688, "step": 9849 }, { "epoch": 0.3853979184599734, "grad_norm": 0.0, "learning_rate": 1.4075507115841929e-05, "loss": 1.1298, "step": 9850 }, { "epoch": 0.38543704515220284, "grad_norm": 0.0, "learning_rate": 1.407434986974834e-05, "loss": 1.0732, "step": 9851 }, { "epoch": 0.3854761718444323, "grad_norm": 0.0, "learning_rate": 1.40731925582258e-05, "loss": 1.0946, "step": 9852 }, { "epoch": 0.3855152985366617, "grad_norm": 0.0, "learning_rate": 1.4072035181292888e-05, "loss": 1.0227, "step": 9853 }, { "epoch": 0.38555442522889116, "grad_norm": 0.0, "learning_rate": 1.4070877738968196e-05, "loss": 1.1058, "step": 9854 }, { "epoch": 0.3855935519211206, "grad_norm": 0.0, "learning_rate": 1.406972023127031e-05, "loss": 1.1611, "step": 9855 }, { "epoch": 0.38563267861335004, "grad_norm": 0.0, "learning_rate": 1.4068562658217815e-05, "loss": 1.1102, "step": 9856 }, { "epoch": 0.3856718053055795, "grad_norm": 0.0, "learning_rate": 1.4067405019829301e-05, "loss": 1.1952, "step": 9857 }, { "epoch": 0.3857109319978089, "grad_norm": 0.0, "learning_rate": 1.406624731612336e-05, "loss": 1.0364, "step": 9858 }, { "epoch": 0.38575005869003837, "grad_norm": 0.0, "learning_rate": 1.4065089547118584e-05, "loss": 1.0743, "step": 9859 }, { "epoch": 0.3857891853822678, "grad_norm": 0.0, "learning_rate": 1.4063931712833563e-05, "loss": 1.1853, "step": 9860 }, { "epoch": 0.38582831207449725, "grad_norm": 0.0, "learning_rate": 1.406277381328689e-05, "loss": 1.0557, "step": 9861 }, { "epoch": 0.38586743876672663, "grad_norm": 0.0, "learning_rate": 1.4061615848497161e-05, "loss": 1.0825, "step": 9862 }, { "epoch": 0.3859065654589561, "grad_norm": 0.0, "learning_rate": 1.4060457818482971e-05, "loss": 1.0301, "step": 9863 }, { "epoch": 0.3859456921511855, "grad_norm": 0.0, "learning_rate": 1.4059299723262916e-05, "loss": 1.0767, "step": 9864 }, { "epoch": 0.38598481884341496, "grad_norm": 0.0, "learning_rate": 1.4058141562855595e-05, "loss": 1.0627, "step": 9865 }, { "epoch": 0.3860239455356444, "grad_norm": 0.0, "learning_rate": 1.4056983337279603e-05, "loss": 1.0867, "step": 9866 }, { "epoch": 0.38606307222787384, "grad_norm": 0.0, "learning_rate": 1.4055825046553544e-05, "loss": 0.9363, "step": 9867 }, { "epoch": 0.3861021989201033, "grad_norm": 0.0, "learning_rate": 1.4054666690696017e-05, "loss": 1.0254, "step": 9868 }, { "epoch": 0.3861413256123327, "grad_norm": 0.0, "learning_rate": 1.4053508269725626e-05, "loss": 0.9914, "step": 9869 }, { "epoch": 0.38618045230456216, "grad_norm": 0.0, "learning_rate": 1.4052349783660968e-05, "loss": 1.0667, "step": 9870 }, { "epoch": 0.3862195789967916, "grad_norm": 0.0, "learning_rate": 1.4051191232520653e-05, "loss": 1.0653, "step": 9871 }, { "epoch": 0.38625870568902104, "grad_norm": 0.0, "learning_rate": 1.4050032616323282e-05, "loss": 0.9386, "step": 9872 }, { "epoch": 0.3862978323812505, "grad_norm": 0.0, "learning_rate": 1.4048873935087462e-05, "loss": 1.0591, "step": 9873 }, { "epoch": 0.3863369590734799, "grad_norm": 0.0, "learning_rate": 1.4047715188831798e-05, "loss": 1.1451, "step": 9874 }, { "epoch": 0.38637608576570937, "grad_norm": 0.0, "learning_rate": 1.4046556377574904e-05, "loss": 1.0732, "step": 9875 }, { "epoch": 0.3864152124579388, "grad_norm": 0.0, "learning_rate": 1.4045397501335382e-05, "loss": 1.0621, "step": 9876 }, { "epoch": 0.38645433915016825, "grad_norm": 0.0, "learning_rate": 1.4044238560131849e-05, "loss": 1.1235, "step": 9877 }, { "epoch": 0.3864934658423977, "grad_norm": 0.0, "learning_rate": 1.4043079553982912e-05, "loss": 1.0983, "step": 9878 }, { "epoch": 0.38653259253462713, "grad_norm": 0.0, "learning_rate": 1.4041920482907181e-05, "loss": 1.1352, "step": 9879 }, { "epoch": 0.38657171922685657, "grad_norm": 0.0, "learning_rate": 1.4040761346923275e-05, "loss": 1.0692, "step": 9880 }, { "epoch": 0.386610845919086, "grad_norm": 0.0, "learning_rate": 1.4039602146049802e-05, "loss": 1.0892, "step": 9881 }, { "epoch": 0.38664997261131545, "grad_norm": 0.0, "learning_rate": 1.4038442880305381e-05, "loss": 1.0172, "step": 9882 }, { "epoch": 0.3866890993035449, "grad_norm": 0.0, "learning_rate": 1.4037283549708628e-05, "loss": 1.2244, "step": 9883 }, { "epoch": 0.38672822599577433, "grad_norm": 0.0, "learning_rate": 1.4036124154278164e-05, "loss": 1.1574, "step": 9884 }, { "epoch": 0.3867673526880038, "grad_norm": 0.0, "learning_rate": 1.4034964694032598e-05, "loss": 1.0116, "step": 9885 }, { "epoch": 0.3868064793802332, "grad_norm": 0.0, "learning_rate": 1.4033805168990557e-05, "loss": 1.0795, "step": 9886 }, { "epoch": 0.38684560607246266, "grad_norm": 0.0, "learning_rate": 1.4032645579170661e-05, "loss": 1.1445, "step": 9887 }, { "epoch": 0.3868847327646921, "grad_norm": 0.0, "learning_rate": 1.4031485924591528e-05, "loss": 1.1259, "step": 9888 }, { "epoch": 0.38692385945692154, "grad_norm": 0.0, "learning_rate": 1.4030326205271785e-05, "loss": 1.1481, "step": 9889 }, { "epoch": 0.386962986149151, "grad_norm": 0.0, "learning_rate": 1.4029166421230052e-05, "loss": 1.0938, "step": 9890 }, { "epoch": 0.38700211284138036, "grad_norm": 0.0, "learning_rate": 1.4028006572484956e-05, "loss": 1.1047, "step": 9891 }, { "epoch": 0.3870412395336098, "grad_norm": 0.0, "learning_rate": 1.402684665905512e-05, "loss": 1.0684, "step": 9892 }, { "epoch": 0.38708036622583925, "grad_norm": 0.0, "learning_rate": 1.4025686680959174e-05, "loss": 1.0532, "step": 9893 }, { "epoch": 0.3871194929180687, "grad_norm": 0.0, "learning_rate": 1.4024526638215743e-05, "loss": 1.0909, "step": 9894 }, { "epoch": 0.3871586196102981, "grad_norm": 0.0, "learning_rate": 1.4023366530843458e-05, "loss": 0.9879, "step": 9895 }, { "epoch": 0.38719774630252757, "grad_norm": 0.0, "learning_rate": 1.4022206358860949e-05, "loss": 1.0162, "step": 9896 }, { "epoch": 0.387236872994757, "grad_norm": 0.0, "learning_rate": 1.4021046122286845e-05, "loss": 1.0461, "step": 9897 }, { "epoch": 0.38727599968698645, "grad_norm": 0.0, "learning_rate": 1.4019885821139782e-05, "loss": 1.0515, "step": 9898 }, { "epoch": 0.3873151263792159, "grad_norm": 0.0, "learning_rate": 1.4018725455438386e-05, "loss": 1.1033, "step": 9899 }, { "epoch": 0.38735425307144533, "grad_norm": 0.0, "learning_rate": 1.4017565025201296e-05, "loss": 1.1697, "step": 9900 }, { "epoch": 0.38739337976367477, "grad_norm": 0.0, "learning_rate": 1.4016404530447146e-05, "loss": 1.1021, "step": 9901 }, { "epoch": 0.3874325064559042, "grad_norm": 0.0, "learning_rate": 1.4015243971194572e-05, "loss": 0.9697, "step": 9902 }, { "epoch": 0.38747163314813365, "grad_norm": 0.0, "learning_rate": 1.401408334746221e-05, "loss": 1.174, "step": 9903 }, { "epoch": 0.3875107598403631, "grad_norm": 0.0, "learning_rate": 1.4012922659268702e-05, "loss": 0.9457, "step": 9904 }, { "epoch": 0.38754988653259254, "grad_norm": 0.0, "learning_rate": 1.4011761906632684e-05, "loss": 1.0605, "step": 9905 }, { "epoch": 0.387589013224822, "grad_norm": 0.0, "learning_rate": 1.4010601089572794e-05, "loss": 1.1197, "step": 9906 }, { "epoch": 0.3876281399170514, "grad_norm": 0.0, "learning_rate": 1.4009440208107678e-05, "loss": 1.0875, "step": 9907 }, { "epoch": 0.38766726660928086, "grad_norm": 0.0, "learning_rate": 1.400827926225598e-05, "loss": 1.1991, "step": 9908 }, { "epoch": 0.3877063933015103, "grad_norm": 0.0, "learning_rate": 1.4007118252036335e-05, "loss": 0.9725, "step": 9909 }, { "epoch": 0.38774551999373974, "grad_norm": 0.0, "learning_rate": 1.4005957177467394e-05, "loss": 1.1865, "step": 9910 }, { "epoch": 0.3877846466859692, "grad_norm": 0.0, "learning_rate": 1.4004796038567801e-05, "loss": 0.9819, "step": 9911 }, { "epoch": 0.3878237733781986, "grad_norm": 0.0, "learning_rate": 1.4003634835356199e-05, "loss": 1.1299, "step": 9912 }, { "epoch": 0.38786290007042806, "grad_norm": 0.0, "learning_rate": 1.400247356785124e-05, "loss": 1.1744, "step": 9913 }, { "epoch": 0.3879020267626575, "grad_norm": 0.0, "learning_rate": 1.400131223607157e-05, "loss": 1.1331, "step": 9914 }, { "epoch": 0.38794115345488694, "grad_norm": 0.0, "learning_rate": 1.4000150840035842e-05, "loss": 1.0374, "step": 9915 }, { "epoch": 0.3879802801471164, "grad_norm": 0.0, "learning_rate": 1.39989893797627e-05, "loss": 1.0321, "step": 9916 }, { "epoch": 0.3880194068393458, "grad_norm": 0.0, "learning_rate": 1.3997827855270804e-05, "loss": 1.1164, "step": 9917 }, { "epoch": 0.38805853353157527, "grad_norm": 0.0, "learning_rate": 1.39966662665788e-05, "loss": 1.0169, "step": 9918 }, { "epoch": 0.38809766022380465, "grad_norm": 0.0, "learning_rate": 1.3995504613705344e-05, "loss": 1.022, "step": 9919 }, { "epoch": 0.3881367869160341, "grad_norm": 0.0, "learning_rate": 1.399434289666909e-05, "loss": 1.0401, "step": 9920 }, { "epoch": 0.38817591360826353, "grad_norm": 0.0, "learning_rate": 1.3993181115488693e-05, "loss": 1.1185, "step": 9921 }, { "epoch": 0.388215040300493, "grad_norm": 0.0, "learning_rate": 1.3992019270182815e-05, "loss": 0.9549, "step": 9922 }, { "epoch": 0.3882541669927224, "grad_norm": 0.0, "learning_rate": 1.3990857360770108e-05, "loss": 1.1437, "step": 9923 }, { "epoch": 0.38829329368495186, "grad_norm": 0.0, "learning_rate": 1.3989695387269232e-05, "loss": 0.9819, "step": 9924 }, { "epoch": 0.3883324203771813, "grad_norm": 0.0, "learning_rate": 1.398853334969885e-05, "loss": 1.098, "step": 9925 }, { "epoch": 0.38837154706941074, "grad_norm": 0.0, "learning_rate": 1.3987371248077617e-05, "loss": 1.0492, "step": 9926 }, { "epoch": 0.3884106737616402, "grad_norm": 0.0, "learning_rate": 1.3986209082424198e-05, "loss": 0.9811, "step": 9927 }, { "epoch": 0.3884498004538696, "grad_norm": 0.0, "learning_rate": 1.398504685275726e-05, "loss": 1.0758, "step": 9928 }, { "epoch": 0.38848892714609906, "grad_norm": 0.0, "learning_rate": 1.398388455909546e-05, "loss": 1.0156, "step": 9929 }, { "epoch": 0.3885280538383285, "grad_norm": 0.0, "learning_rate": 1.3982722201457467e-05, "loss": 1.0658, "step": 9930 }, { "epoch": 0.38856718053055794, "grad_norm": 0.0, "learning_rate": 1.3981559779861947e-05, "loss": 1.1166, "step": 9931 }, { "epoch": 0.3886063072227874, "grad_norm": 0.0, "learning_rate": 1.3980397294327563e-05, "loss": 1.165, "step": 9932 }, { "epoch": 0.3886454339150168, "grad_norm": 0.0, "learning_rate": 1.397923474487299e-05, "loss": 1.0795, "step": 9933 }, { "epoch": 0.38868456060724627, "grad_norm": 0.0, "learning_rate": 1.3978072131516889e-05, "loss": 1.0795, "step": 9934 }, { "epoch": 0.3887236872994757, "grad_norm": 0.0, "learning_rate": 1.3976909454277937e-05, "loss": 1.1447, "step": 9935 }, { "epoch": 0.38876281399170515, "grad_norm": 0.0, "learning_rate": 1.39757467131748e-05, "loss": 1.1699, "step": 9936 }, { "epoch": 0.3888019406839346, "grad_norm": 0.0, "learning_rate": 1.3974583908226157e-05, "loss": 1.1028, "step": 9937 }, { "epoch": 0.38884106737616403, "grad_norm": 0.0, "learning_rate": 1.3973421039450675e-05, "loss": 1.1918, "step": 9938 }, { "epoch": 0.38888019406839347, "grad_norm": 0.0, "learning_rate": 1.397225810686703e-05, "loss": 1.1368, "step": 9939 }, { "epoch": 0.3889193207606229, "grad_norm": 0.0, "learning_rate": 1.3971095110493895e-05, "loss": 1.1327, "step": 9940 }, { "epoch": 0.38895844745285235, "grad_norm": 0.0, "learning_rate": 1.396993205034995e-05, "loss": 1.0912, "step": 9941 }, { "epoch": 0.3889975741450818, "grad_norm": 0.0, "learning_rate": 1.3968768926453873e-05, "loss": 1.1458, "step": 9942 }, { "epoch": 0.38903670083731123, "grad_norm": 0.0, "learning_rate": 1.3967605738824338e-05, "loss": 1.0974, "step": 9943 }, { "epoch": 0.3890758275295407, "grad_norm": 0.0, "learning_rate": 1.3966442487480028e-05, "loss": 1.0629, "step": 9944 }, { "epoch": 0.3891149542217701, "grad_norm": 0.0, "learning_rate": 1.396527917243962e-05, "loss": 1.0745, "step": 9945 }, { "epoch": 0.38915408091399956, "grad_norm": 0.0, "learning_rate": 1.3964115793721798e-05, "loss": 0.966, "step": 9946 }, { "epoch": 0.389193207606229, "grad_norm": 0.0, "learning_rate": 1.3962952351345247e-05, "loss": 1.1254, "step": 9947 }, { "epoch": 0.3892323342984584, "grad_norm": 0.0, "learning_rate": 1.396178884532864e-05, "loss": 1.0151, "step": 9948 }, { "epoch": 0.3892714609906878, "grad_norm": 0.0, "learning_rate": 1.3960625275690676e-05, "loss": 1.0522, "step": 9949 }, { "epoch": 0.38931058768291726, "grad_norm": 0.0, "learning_rate": 1.3959461642450027e-05, "loss": 1.1037, "step": 9950 }, { "epoch": 0.3893497143751467, "grad_norm": 0.0, "learning_rate": 1.395829794562539e-05, "loss": 1.0795, "step": 9951 }, { "epoch": 0.38938884106737615, "grad_norm": 0.0, "learning_rate": 1.3957134185235445e-05, "loss": 1.0731, "step": 9952 }, { "epoch": 0.3894279677596056, "grad_norm": 0.0, "learning_rate": 1.3955970361298884e-05, "loss": 1.1302, "step": 9953 }, { "epoch": 0.389467094451835, "grad_norm": 0.0, "learning_rate": 1.3954806473834397e-05, "loss": 1.0073, "step": 9954 }, { "epoch": 0.38950622114406447, "grad_norm": 0.0, "learning_rate": 1.3953642522860675e-05, "loss": 1.1552, "step": 9955 }, { "epoch": 0.3895453478362939, "grad_norm": 0.0, "learning_rate": 1.3952478508396406e-05, "loss": 1.0135, "step": 9956 }, { "epoch": 0.38958447452852335, "grad_norm": 0.0, "learning_rate": 1.3951314430460285e-05, "loss": 0.9518, "step": 9957 }, { "epoch": 0.3896236012207528, "grad_norm": 0.0, "learning_rate": 1.3950150289071007e-05, "loss": 0.9673, "step": 9958 }, { "epoch": 0.38966272791298223, "grad_norm": 0.0, "learning_rate": 1.3948986084247264e-05, "loss": 1.0453, "step": 9959 }, { "epoch": 0.3897018546052117, "grad_norm": 0.0, "learning_rate": 1.3947821816007755e-05, "loss": 1.1985, "step": 9960 }, { "epoch": 0.3897409812974411, "grad_norm": 0.0, "learning_rate": 1.3946657484371172e-05, "loss": 1.0123, "step": 9961 }, { "epoch": 0.38978010798967055, "grad_norm": 0.0, "learning_rate": 1.3945493089356218e-05, "loss": 1.0267, "step": 9962 }, { "epoch": 0.3898192346819, "grad_norm": 0.0, "learning_rate": 1.3944328630981583e-05, "loss": 1.0173, "step": 9963 }, { "epoch": 0.38985836137412944, "grad_norm": 0.0, "learning_rate": 1.3943164109265978e-05, "loss": 1.0727, "step": 9964 }, { "epoch": 0.3898974880663589, "grad_norm": 0.0, "learning_rate": 1.3941999524228098e-05, "loss": 1.1606, "step": 9965 }, { "epoch": 0.3899366147585883, "grad_norm": 0.0, "learning_rate": 1.3940834875886644e-05, "loss": 1.1461, "step": 9966 }, { "epoch": 0.38997574145081776, "grad_norm": 0.0, "learning_rate": 1.3939670164260321e-05, "loss": 1.1306, "step": 9967 }, { "epoch": 0.3900148681430472, "grad_norm": 0.0, "learning_rate": 1.393850538936783e-05, "loss": 1.2645, "step": 9968 }, { "epoch": 0.39005399483527664, "grad_norm": 0.0, "learning_rate": 1.393734055122788e-05, "loss": 1.1033, "step": 9969 }, { "epoch": 0.3900931215275061, "grad_norm": 0.0, "learning_rate": 1.3936175649859173e-05, "loss": 1.087, "step": 9970 }, { "epoch": 0.3901322482197355, "grad_norm": 0.0, "learning_rate": 1.3935010685280417e-05, "loss": 1.1862, "step": 9971 }, { "epoch": 0.39017137491196496, "grad_norm": 0.0, "learning_rate": 1.3933845657510321e-05, "loss": 0.996, "step": 9972 }, { "epoch": 0.3902105016041944, "grad_norm": 0.0, "learning_rate": 1.3932680566567596e-05, "loss": 1.1722, "step": 9973 }, { "epoch": 0.39024962829642384, "grad_norm": 0.0, "learning_rate": 1.3931515412470943e-05, "loss": 1.1082, "step": 9974 }, { "epoch": 0.3902887549886533, "grad_norm": 0.0, "learning_rate": 1.3930350195239085e-05, "loss": 1.1805, "step": 9975 }, { "epoch": 0.39032788168088267, "grad_norm": 0.0, "learning_rate": 1.3929184914890725e-05, "loss": 1.0506, "step": 9976 }, { "epoch": 0.3903670083731121, "grad_norm": 0.0, "learning_rate": 1.3928019571444577e-05, "loss": 1.111, "step": 9977 }, { "epoch": 0.39040613506534155, "grad_norm": 0.0, "learning_rate": 1.392685416491936e-05, "loss": 1.0931, "step": 9978 }, { "epoch": 0.390445261757571, "grad_norm": 0.0, "learning_rate": 1.3925688695333783e-05, "loss": 0.999, "step": 9979 }, { "epoch": 0.39048438844980043, "grad_norm": 0.0, "learning_rate": 1.3924523162706567e-05, "loss": 1.1045, "step": 9980 }, { "epoch": 0.3905235151420299, "grad_norm": 0.0, "learning_rate": 1.3923357567056424e-05, "loss": 1.1301, "step": 9981 }, { "epoch": 0.3905626418342593, "grad_norm": 0.0, "learning_rate": 1.3922191908402079e-05, "loss": 1.009, "step": 9982 }, { "epoch": 0.39060176852648876, "grad_norm": 0.0, "learning_rate": 1.392102618676224e-05, "loss": 1.0667, "step": 9983 }, { "epoch": 0.3906408952187182, "grad_norm": 0.0, "learning_rate": 1.391986040215564e-05, "loss": 1.0864, "step": 9984 }, { "epoch": 0.39068002191094764, "grad_norm": 0.0, "learning_rate": 1.3918694554600992e-05, "loss": 1.0454, "step": 9985 }, { "epoch": 0.3907191486031771, "grad_norm": 0.0, "learning_rate": 1.391752864411702e-05, "loss": 0.9895, "step": 9986 }, { "epoch": 0.3907582752954065, "grad_norm": 0.0, "learning_rate": 1.3916362670722447e-05, "loss": 1.0634, "step": 9987 }, { "epoch": 0.39079740198763596, "grad_norm": 0.0, "learning_rate": 1.3915196634435995e-05, "loss": 0.9807, "step": 9988 }, { "epoch": 0.3908365286798654, "grad_norm": 0.0, "learning_rate": 1.3914030535276394e-05, "loss": 1.2319, "step": 9989 }, { "epoch": 0.39087565537209484, "grad_norm": 0.0, "learning_rate": 1.3912864373262365e-05, "loss": 1.1043, "step": 9990 }, { "epoch": 0.3909147820643243, "grad_norm": 0.0, "learning_rate": 1.3911698148412638e-05, "loss": 1.1335, "step": 9991 }, { "epoch": 0.3909539087565537, "grad_norm": 0.0, "learning_rate": 1.391053186074594e-05, "loss": 1.0112, "step": 9992 }, { "epoch": 0.39099303544878317, "grad_norm": 0.0, "learning_rate": 1.3909365510281001e-05, "loss": 1.1188, "step": 9993 }, { "epoch": 0.3910321621410126, "grad_norm": 0.0, "learning_rate": 1.3908199097036549e-05, "loss": 1.0787, "step": 9994 }, { "epoch": 0.39107128883324205, "grad_norm": 0.0, "learning_rate": 1.390703262103132e-05, "loss": 1.0353, "step": 9995 }, { "epoch": 0.3911104155254715, "grad_norm": 0.0, "learning_rate": 1.390586608228404e-05, "loss": 1.1016, "step": 9996 }, { "epoch": 0.39114954221770093, "grad_norm": 0.0, "learning_rate": 1.3904699480813446e-05, "loss": 1.1083, "step": 9997 }, { "epoch": 0.39118866890993037, "grad_norm": 0.0, "learning_rate": 1.3903532816638272e-05, "loss": 1.0103, "step": 9998 }, { "epoch": 0.3912277956021598, "grad_norm": 0.0, "learning_rate": 1.390236608977725e-05, "loss": 1.0305, "step": 9999 }, { "epoch": 0.39126692229438925, "grad_norm": 0.0, "learning_rate": 1.3901199300249121e-05, "loss": 1.1525, "step": 10000 }, { "epoch": 0.3913060489866187, "grad_norm": 0.0, "learning_rate": 1.390003244807262e-05, "loss": 1.1277, "step": 10001 }, { "epoch": 0.39134517567884813, "grad_norm": 0.0, "learning_rate": 1.3898865533266483e-05, "loss": 1.11, "step": 10002 }, { "epoch": 0.3913843023710776, "grad_norm": 0.0, "learning_rate": 1.3897698555849453e-05, "loss": 1.0789, "step": 10003 }, { "epoch": 0.391423429063307, "grad_norm": 0.0, "learning_rate": 1.3896531515840268e-05, "loss": 1.2353, "step": 10004 }, { "epoch": 0.3914625557555364, "grad_norm": 0.0, "learning_rate": 1.3895364413257669e-05, "loss": 1.1479, "step": 10005 }, { "epoch": 0.39150168244776584, "grad_norm": 0.0, "learning_rate": 1.3894197248120396e-05, "loss": 0.9423, "step": 10006 }, { "epoch": 0.3915408091399953, "grad_norm": 0.0, "learning_rate": 1.38930300204472e-05, "loss": 0.9914, "step": 10007 }, { "epoch": 0.3915799358322247, "grad_norm": 0.0, "learning_rate": 1.3891862730256815e-05, "loss": 1.2346, "step": 10008 }, { "epoch": 0.39161906252445416, "grad_norm": 0.0, "learning_rate": 1.3890695377567996e-05, "loss": 1.0407, "step": 10009 }, { "epoch": 0.3916581892166836, "grad_norm": 0.0, "learning_rate": 1.388952796239948e-05, "loss": 0.9792, "step": 10010 }, { "epoch": 0.39169731590891305, "grad_norm": 0.0, "learning_rate": 1.3888360484770023e-05, "loss": 1.1668, "step": 10011 }, { "epoch": 0.3917364426011425, "grad_norm": 0.0, "learning_rate": 1.3887192944698366e-05, "loss": 1.256, "step": 10012 }, { "epoch": 0.39177556929337193, "grad_norm": 0.0, "learning_rate": 1.3886025342203263e-05, "loss": 1.0548, "step": 10013 }, { "epoch": 0.39181469598560137, "grad_norm": 0.0, "learning_rate": 1.3884857677303463e-05, "loss": 1.1521, "step": 10014 }, { "epoch": 0.3918538226778308, "grad_norm": 0.0, "learning_rate": 1.3883689950017716e-05, "loss": 1.0015, "step": 10015 }, { "epoch": 0.39189294937006025, "grad_norm": 0.0, "learning_rate": 1.3882522160364773e-05, "loss": 1.0703, "step": 10016 }, { "epoch": 0.3919320760622897, "grad_norm": 0.0, "learning_rate": 1.3881354308363391e-05, "loss": 1.0543, "step": 10017 }, { "epoch": 0.39197120275451913, "grad_norm": 0.0, "learning_rate": 1.3880186394032322e-05, "loss": 1.1402, "step": 10018 }, { "epoch": 0.3920103294467486, "grad_norm": 0.0, "learning_rate": 1.3879018417390323e-05, "loss": 1.0092, "step": 10019 }, { "epoch": 0.392049456138978, "grad_norm": 0.0, "learning_rate": 1.3877850378456145e-05, "loss": 1.1747, "step": 10020 }, { "epoch": 0.39208858283120746, "grad_norm": 0.0, "learning_rate": 1.3876682277248552e-05, "loss": 1.0733, "step": 10021 }, { "epoch": 0.3921277095234369, "grad_norm": 0.0, "learning_rate": 1.3875514113786301e-05, "loss": 0.9604, "step": 10022 }, { "epoch": 0.39216683621566634, "grad_norm": 0.0, "learning_rate": 1.3874345888088145e-05, "loss": 0.9394, "step": 10023 }, { "epoch": 0.3922059629078958, "grad_norm": 0.0, "learning_rate": 1.3873177600172854e-05, "loss": 1.083, "step": 10024 }, { "epoch": 0.3922450896001252, "grad_norm": 0.0, "learning_rate": 1.3872009250059181e-05, "loss": 1.1477, "step": 10025 }, { "epoch": 0.39228421629235466, "grad_norm": 0.0, "learning_rate": 1.387084083776589e-05, "loss": 1.1238, "step": 10026 }, { "epoch": 0.3923233429845841, "grad_norm": 0.0, "learning_rate": 1.386967236331175e-05, "loss": 1.2132, "step": 10027 }, { "epoch": 0.39236246967681354, "grad_norm": 0.0, "learning_rate": 1.3868503826715518e-05, "loss": 1.2034, "step": 10028 }, { "epoch": 0.392401596369043, "grad_norm": 0.0, "learning_rate": 1.3867335227995961e-05, "loss": 1.0827, "step": 10029 }, { "epoch": 0.3924407230612724, "grad_norm": 0.0, "learning_rate": 1.3866166567171848e-05, "loss": 0.9912, "step": 10030 }, { "epoch": 0.39247984975350186, "grad_norm": 0.0, "learning_rate": 1.3864997844261945e-05, "loss": 1.0978, "step": 10031 }, { "epoch": 0.3925189764457313, "grad_norm": 0.0, "learning_rate": 1.3863829059285019e-05, "loss": 1.1614, "step": 10032 }, { "epoch": 0.3925581031379607, "grad_norm": 0.0, "learning_rate": 1.386266021225984e-05, "loss": 1.2528, "step": 10033 }, { "epoch": 0.39259722983019013, "grad_norm": 0.0, "learning_rate": 1.3861491303205179e-05, "loss": 1.17, "step": 10034 }, { "epoch": 0.39263635652241957, "grad_norm": 0.0, "learning_rate": 1.3860322332139805e-05, "loss": 1.1973, "step": 10035 }, { "epoch": 0.392675483214649, "grad_norm": 0.0, "learning_rate": 1.3859153299082493e-05, "loss": 1.0062, "step": 10036 }, { "epoch": 0.39271460990687845, "grad_norm": 0.0, "learning_rate": 1.3857984204052014e-05, "loss": 1.132, "step": 10037 }, { "epoch": 0.3927537365991079, "grad_norm": 0.0, "learning_rate": 1.3856815047067143e-05, "loss": 1.1251, "step": 10038 }, { "epoch": 0.39279286329133734, "grad_norm": 0.0, "learning_rate": 1.3855645828146653e-05, "loss": 1.2589, "step": 10039 }, { "epoch": 0.3928319899835668, "grad_norm": 0.0, "learning_rate": 1.3854476547309326e-05, "loss": 1.1037, "step": 10040 }, { "epoch": 0.3928711166757962, "grad_norm": 0.0, "learning_rate": 1.3853307204573931e-05, "loss": 1.0893, "step": 10041 }, { "epoch": 0.39291024336802566, "grad_norm": 0.0, "learning_rate": 1.3852137799959256e-05, "loss": 1.2145, "step": 10042 }, { "epoch": 0.3929493700602551, "grad_norm": 0.0, "learning_rate": 1.3850968333484072e-05, "loss": 1.0886, "step": 10043 }, { "epoch": 0.39298849675248454, "grad_norm": 0.0, "learning_rate": 1.3849798805167163e-05, "loss": 1.072, "step": 10044 }, { "epoch": 0.393027623444714, "grad_norm": 0.0, "learning_rate": 1.3848629215027309e-05, "loss": 1.1625, "step": 10045 }, { "epoch": 0.3930667501369434, "grad_norm": 0.0, "learning_rate": 1.3847459563083292e-05, "loss": 1.0813, "step": 10046 }, { "epoch": 0.39310587682917286, "grad_norm": 0.0, "learning_rate": 1.3846289849353897e-05, "loss": 1.1489, "step": 10047 }, { "epoch": 0.3931450035214023, "grad_norm": 0.0, "learning_rate": 1.3845120073857906e-05, "loss": 1.0226, "step": 10048 }, { "epoch": 0.39318413021363174, "grad_norm": 0.0, "learning_rate": 1.3843950236614103e-05, "loss": 1.0412, "step": 10049 }, { "epoch": 0.3932232569058612, "grad_norm": 0.0, "learning_rate": 1.3842780337641278e-05, "loss": 1.085, "step": 10050 }, { "epoch": 0.3932623835980906, "grad_norm": 0.0, "learning_rate": 1.3841610376958217e-05, "loss": 1.0626, "step": 10051 }, { "epoch": 0.39330151029032007, "grad_norm": 0.0, "learning_rate": 1.3840440354583704e-05, "loss": 1.2355, "step": 10052 }, { "epoch": 0.3933406369825495, "grad_norm": 0.0, "learning_rate": 1.3839270270536534e-05, "loss": 1.1019, "step": 10053 }, { "epoch": 0.39337976367477895, "grad_norm": 0.0, "learning_rate": 1.3838100124835494e-05, "loss": 1.0954, "step": 10054 }, { "epoch": 0.3934188903670084, "grad_norm": 0.0, "learning_rate": 1.3836929917499374e-05, "loss": 1.1725, "step": 10055 }, { "epoch": 0.39345801705923783, "grad_norm": 0.0, "learning_rate": 1.383575964854697e-05, "loss": 1.061, "step": 10056 }, { "epoch": 0.39349714375146727, "grad_norm": 0.0, "learning_rate": 1.383458931799707e-05, "loss": 1.1194, "step": 10057 }, { "epoch": 0.3935362704436967, "grad_norm": 0.0, "learning_rate": 1.383341892586847e-05, "loss": 1.1586, "step": 10058 }, { "epoch": 0.39357539713592615, "grad_norm": 0.0, "learning_rate": 1.3832248472179967e-05, "loss": 1.1043, "step": 10059 }, { "epoch": 0.3936145238281556, "grad_norm": 0.0, "learning_rate": 1.3831077956950355e-05, "loss": 1.1495, "step": 10060 }, { "epoch": 0.393653650520385, "grad_norm": 0.0, "learning_rate": 1.3829907380198433e-05, "loss": 1.0704, "step": 10061 }, { "epoch": 0.3936927772126144, "grad_norm": 0.0, "learning_rate": 1.3828736741942998e-05, "loss": 1.0341, "step": 10062 }, { "epoch": 0.39373190390484386, "grad_norm": 0.0, "learning_rate": 1.3827566042202849e-05, "loss": 1.1366, "step": 10063 }, { "epoch": 0.3937710305970733, "grad_norm": 0.0, "learning_rate": 1.3826395280996783e-05, "loss": 1.1804, "step": 10064 }, { "epoch": 0.39381015728930274, "grad_norm": 0.0, "learning_rate": 1.3825224458343604e-05, "loss": 1.0135, "step": 10065 }, { "epoch": 0.3938492839815322, "grad_norm": 0.0, "learning_rate": 1.3824053574262113e-05, "loss": 1.084, "step": 10066 }, { "epoch": 0.3938884106737616, "grad_norm": 0.0, "learning_rate": 1.3822882628771115e-05, "loss": 1.0457, "step": 10067 }, { "epoch": 0.39392753736599107, "grad_norm": 0.0, "learning_rate": 1.3821711621889412e-05, "loss": 0.956, "step": 10068 }, { "epoch": 0.3939666640582205, "grad_norm": 0.0, "learning_rate": 1.3820540553635808e-05, "loss": 1.1422, "step": 10069 }, { "epoch": 0.39400579075044995, "grad_norm": 0.0, "learning_rate": 1.381936942402911e-05, "loss": 0.9774, "step": 10070 }, { "epoch": 0.3940449174426794, "grad_norm": 0.0, "learning_rate": 1.3818198233088128e-05, "loss": 1.1802, "step": 10071 }, { "epoch": 0.39408404413490883, "grad_norm": 0.0, "learning_rate": 1.3817026980831662e-05, "loss": 0.9934, "step": 10072 }, { "epoch": 0.39412317082713827, "grad_norm": 0.0, "learning_rate": 1.381585566727853e-05, "loss": 1.0769, "step": 10073 }, { "epoch": 0.3941622975193677, "grad_norm": 0.0, "learning_rate": 1.3814684292447537e-05, "loss": 1.1198, "step": 10074 }, { "epoch": 0.39420142421159715, "grad_norm": 0.0, "learning_rate": 1.3813512856357491e-05, "loss": 1.043, "step": 10075 }, { "epoch": 0.3942405509038266, "grad_norm": 0.0, "learning_rate": 1.3812341359027212e-05, "loss": 1.1647, "step": 10076 }, { "epoch": 0.39427967759605603, "grad_norm": 0.0, "learning_rate": 1.3811169800475503e-05, "loss": 1.1481, "step": 10077 }, { "epoch": 0.3943188042882855, "grad_norm": 0.0, "learning_rate": 1.3809998180721187e-05, "loss": 1.127, "step": 10078 }, { "epoch": 0.3943579309805149, "grad_norm": 0.0, "learning_rate": 1.380882649978307e-05, "loss": 1.1171, "step": 10079 }, { "epoch": 0.39439705767274436, "grad_norm": 0.0, "learning_rate": 1.3807654757679976e-05, "loss": 1.0204, "step": 10080 }, { "epoch": 0.3944361843649738, "grad_norm": 0.0, "learning_rate": 1.3806482954430716e-05, "loss": 1.0965, "step": 10081 }, { "epoch": 0.39447531105720324, "grad_norm": 0.0, "learning_rate": 1.380531109005411e-05, "loss": 0.9947, "step": 10082 }, { "epoch": 0.3945144377494327, "grad_norm": 0.0, "learning_rate": 1.3804139164568976e-05, "loss": 0.9131, "step": 10083 }, { "epoch": 0.3945535644416621, "grad_norm": 0.0, "learning_rate": 1.3802967177994133e-05, "loss": 1.0026, "step": 10084 }, { "epoch": 0.39459269113389156, "grad_norm": 0.0, "learning_rate": 1.3801795130348405e-05, "loss": 1.037, "step": 10085 }, { "epoch": 0.394631817826121, "grad_norm": 0.0, "learning_rate": 1.380062302165061e-05, "loss": 1.0694, "step": 10086 }, { "epoch": 0.39467094451835044, "grad_norm": 0.0, "learning_rate": 1.379945085191957e-05, "loss": 1.0201, "step": 10087 }, { "epoch": 0.3947100712105799, "grad_norm": 0.0, "learning_rate": 1.3798278621174113e-05, "loss": 1.1213, "step": 10088 }, { "epoch": 0.3947491979028093, "grad_norm": 0.0, "learning_rate": 1.3797106329433062e-05, "loss": 1.1009, "step": 10089 }, { "epoch": 0.3947883245950387, "grad_norm": 0.0, "learning_rate": 1.3795933976715236e-05, "loss": 0.9745, "step": 10090 }, { "epoch": 0.39482745128726815, "grad_norm": 0.0, "learning_rate": 1.3794761563039472e-05, "loss": 1.1074, "step": 10091 }, { "epoch": 0.3948665779794976, "grad_norm": 0.0, "learning_rate": 1.3793589088424591e-05, "loss": 1.1702, "step": 10092 }, { "epoch": 0.39490570467172703, "grad_norm": 0.0, "learning_rate": 1.3792416552889419e-05, "loss": 1.1144, "step": 10093 }, { "epoch": 0.3949448313639565, "grad_norm": 0.0, "learning_rate": 1.3791243956452794e-05, "loss": 1.2206, "step": 10094 }, { "epoch": 0.3949839580561859, "grad_norm": 0.0, "learning_rate": 1.3790071299133539e-05, "loss": 1.1482, "step": 10095 }, { "epoch": 0.39502308474841535, "grad_norm": 0.0, "learning_rate": 1.378889858095049e-05, "loss": 1.0367, "step": 10096 }, { "epoch": 0.3950622114406448, "grad_norm": 0.0, "learning_rate": 1.3787725801922477e-05, "loss": 1.0075, "step": 10097 }, { "epoch": 0.39510133813287424, "grad_norm": 0.0, "learning_rate": 1.3786552962068334e-05, "loss": 1.0723, "step": 10098 }, { "epoch": 0.3951404648251037, "grad_norm": 0.0, "learning_rate": 1.3785380061406897e-05, "loss": 0.9789, "step": 10099 }, { "epoch": 0.3951795915173331, "grad_norm": 0.0, "learning_rate": 1.3784207099956994e-05, "loss": 1.1899, "step": 10100 }, { "epoch": 0.39521871820956256, "grad_norm": 0.0, "learning_rate": 1.3783034077737472e-05, "loss": 0.8785, "step": 10101 }, { "epoch": 0.395257844901792, "grad_norm": 0.0, "learning_rate": 1.3781860994767162e-05, "loss": 1.0358, "step": 10102 }, { "epoch": 0.39529697159402144, "grad_norm": 0.0, "learning_rate": 1.3780687851064901e-05, "loss": 1.1044, "step": 10103 }, { "epoch": 0.3953360982862509, "grad_norm": 0.0, "learning_rate": 1.3779514646649534e-05, "loss": 1.1901, "step": 10104 }, { "epoch": 0.3953752249784803, "grad_norm": 0.0, "learning_rate": 1.3778341381539896e-05, "loss": 1.0886, "step": 10105 }, { "epoch": 0.39541435167070976, "grad_norm": 0.0, "learning_rate": 1.377716805575483e-05, "loss": 1.0418, "step": 10106 }, { "epoch": 0.3954534783629392, "grad_norm": 0.0, "learning_rate": 1.377599466931318e-05, "loss": 1.158, "step": 10107 }, { "epoch": 0.39549260505516864, "grad_norm": 0.0, "learning_rate": 1.3774821222233784e-05, "loss": 1.0364, "step": 10108 }, { "epoch": 0.3955317317473981, "grad_norm": 0.0, "learning_rate": 1.3773647714535491e-05, "loss": 1.071, "step": 10109 }, { "epoch": 0.3955708584396275, "grad_norm": 0.0, "learning_rate": 1.3772474146237145e-05, "loss": 1.1675, "step": 10110 }, { "epoch": 0.39560998513185697, "grad_norm": 0.0, "learning_rate": 1.377130051735759e-05, "loss": 1.0236, "step": 10111 }, { "epoch": 0.3956491118240864, "grad_norm": 0.0, "learning_rate": 1.3770126827915678e-05, "loss": 1.0792, "step": 10112 }, { "epoch": 0.39568823851631585, "grad_norm": 0.0, "learning_rate": 1.3768953077930248e-05, "loss": 1.1252, "step": 10113 }, { "epoch": 0.3957273652085453, "grad_norm": 0.0, "learning_rate": 1.3767779267420158e-05, "loss": 0.8905, "step": 10114 }, { "epoch": 0.39576649190077473, "grad_norm": 0.0, "learning_rate": 1.3766605396404252e-05, "loss": 1.0756, "step": 10115 }, { "epoch": 0.39580561859300417, "grad_norm": 0.0, "learning_rate": 1.3765431464901384e-05, "loss": 1.1013, "step": 10116 }, { "epoch": 0.3958447452852336, "grad_norm": 0.0, "learning_rate": 1.3764257472930404e-05, "loss": 1.1346, "step": 10117 }, { "epoch": 0.395883871977463, "grad_norm": 0.0, "learning_rate": 1.3763083420510168e-05, "loss": 1.1384, "step": 10118 }, { "epoch": 0.39592299866969244, "grad_norm": 0.0, "learning_rate": 1.3761909307659525e-05, "loss": 1.1967, "step": 10119 }, { "epoch": 0.3959621253619219, "grad_norm": 0.0, "learning_rate": 1.3760735134397335e-05, "loss": 1.1324, "step": 10120 }, { "epoch": 0.3960012520541513, "grad_norm": 0.0, "learning_rate": 1.3759560900742451e-05, "loss": 1.0079, "step": 10121 }, { "epoch": 0.39604037874638076, "grad_norm": 0.0, "learning_rate": 1.3758386606713727e-05, "loss": 1.1726, "step": 10122 }, { "epoch": 0.3960795054386102, "grad_norm": 0.0, "learning_rate": 1.3757212252330028e-05, "loss": 0.9677, "step": 10123 }, { "epoch": 0.39611863213083964, "grad_norm": 0.0, "learning_rate": 1.3756037837610205e-05, "loss": 1.1006, "step": 10124 }, { "epoch": 0.3961577588230691, "grad_norm": 0.0, "learning_rate": 1.3754863362573124e-05, "loss": 1.0788, "step": 10125 }, { "epoch": 0.3961968855152985, "grad_norm": 0.0, "learning_rate": 1.3753688827237639e-05, "loss": 1.0299, "step": 10126 }, { "epoch": 0.39623601220752797, "grad_norm": 0.0, "learning_rate": 1.3752514231622617e-05, "loss": 1.2603, "step": 10127 }, { "epoch": 0.3962751388997574, "grad_norm": 0.0, "learning_rate": 1.3751339575746915e-05, "loss": 1.078, "step": 10128 }, { "epoch": 0.39631426559198685, "grad_norm": 0.0, "learning_rate": 1.3750164859629407e-05, "loss": 1.0853, "step": 10129 }, { "epoch": 0.3963533922842163, "grad_norm": 0.0, "learning_rate": 1.3748990083288944e-05, "loss": 1.1279, "step": 10130 }, { "epoch": 0.39639251897644573, "grad_norm": 0.0, "learning_rate": 1.3747815246744403e-05, "loss": 1.061, "step": 10131 }, { "epoch": 0.39643164566867517, "grad_norm": 0.0, "learning_rate": 1.3746640350014643e-05, "loss": 1.1165, "step": 10132 }, { "epoch": 0.3964707723609046, "grad_norm": 0.0, "learning_rate": 1.3745465393118533e-05, "loss": 1.0984, "step": 10133 }, { "epoch": 0.39650989905313405, "grad_norm": 0.0, "learning_rate": 1.3744290376074945e-05, "loss": 1.1115, "step": 10134 }, { "epoch": 0.3965490257453635, "grad_norm": 0.0, "learning_rate": 1.3743115298902743e-05, "loss": 1.1668, "step": 10135 }, { "epoch": 0.39658815243759293, "grad_norm": 0.0, "learning_rate": 1.3741940161620799e-05, "loss": 1.1587, "step": 10136 }, { "epoch": 0.3966272791298224, "grad_norm": 0.0, "learning_rate": 1.3740764964247986e-05, "loss": 1.2047, "step": 10137 }, { "epoch": 0.3966664058220518, "grad_norm": 0.0, "learning_rate": 1.3739589706803176e-05, "loss": 1.1647, "step": 10138 }, { "epoch": 0.39670553251428126, "grad_norm": 0.0, "learning_rate": 1.3738414389305242e-05, "loss": 1.0588, "step": 10139 }, { "epoch": 0.3967446592065107, "grad_norm": 0.0, "learning_rate": 1.3737239011773054e-05, "loss": 1.0564, "step": 10140 }, { "epoch": 0.39678378589874014, "grad_norm": 0.0, "learning_rate": 1.3736063574225496e-05, "loss": 1.1736, "step": 10141 }, { "epoch": 0.3968229125909696, "grad_norm": 0.0, "learning_rate": 1.3734888076681432e-05, "loss": 1.1008, "step": 10142 }, { "epoch": 0.396862039283199, "grad_norm": 0.0, "learning_rate": 1.373371251915975e-05, "loss": 1.1889, "step": 10143 }, { "epoch": 0.39690116597542846, "grad_norm": 0.0, "learning_rate": 1.3732536901679321e-05, "loss": 1.0092, "step": 10144 }, { "epoch": 0.3969402926676579, "grad_norm": 0.0, "learning_rate": 1.3731361224259027e-05, "loss": 1.1719, "step": 10145 }, { "epoch": 0.39697941935988734, "grad_norm": 0.0, "learning_rate": 1.373018548691775e-05, "loss": 1.1032, "step": 10146 }, { "epoch": 0.3970185460521167, "grad_norm": 0.0, "learning_rate": 1.3729009689674366e-05, "loss": 1.1677, "step": 10147 }, { "epoch": 0.39705767274434617, "grad_norm": 0.0, "learning_rate": 1.3727833832547758e-05, "loss": 1.106, "step": 10148 }, { "epoch": 0.3970967994365756, "grad_norm": 0.0, "learning_rate": 1.3726657915556814e-05, "loss": 1.0051, "step": 10149 }, { "epoch": 0.39713592612880505, "grad_norm": 0.0, "learning_rate": 1.3725481938720409e-05, "loss": 0.9881, "step": 10150 }, { "epoch": 0.3971750528210345, "grad_norm": 0.0, "learning_rate": 1.3724305902057436e-05, "loss": 1.2267, "step": 10151 }, { "epoch": 0.39721417951326393, "grad_norm": 0.0, "learning_rate": 1.3723129805586775e-05, "loss": 1.0269, "step": 10152 }, { "epoch": 0.3972533062054934, "grad_norm": 0.0, "learning_rate": 1.3721953649327316e-05, "loss": 1.0748, "step": 10153 }, { "epoch": 0.3972924328977228, "grad_norm": 0.0, "learning_rate": 1.3720777433297942e-05, "loss": 1.1429, "step": 10154 }, { "epoch": 0.39733155958995225, "grad_norm": 0.0, "learning_rate": 1.3719601157517548e-05, "loss": 1.0695, "step": 10155 }, { "epoch": 0.3973706862821817, "grad_norm": 0.0, "learning_rate": 1.3718424822005019e-05, "loss": 1.0724, "step": 10156 }, { "epoch": 0.39740981297441114, "grad_norm": 0.0, "learning_rate": 1.3717248426779249e-05, "loss": 1.0717, "step": 10157 }, { "epoch": 0.3974489396666406, "grad_norm": 0.0, "learning_rate": 1.3716071971859123e-05, "loss": 1.2051, "step": 10158 }, { "epoch": 0.39748806635887, "grad_norm": 0.0, "learning_rate": 1.371489545726354e-05, "loss": 1.1089, "step": 10159 }, { "epoch": 0.39752719305109946, "grad_norm": 0.0, "learning_rate": 1.3713718883011393e-05, "loss": 1.2631, "step": 10160 }, { "epoch": 0.3975663197433289, "grad_norm": 0.0, "learning_rate": 1.3712542249121573e-05, "loss": 1.0844, "step": 10161 }, { "epoch": 0.39760544643555834, "grad_norm": 0.0, "learning_rate": 1.3711365555612974e-05, "loss": 1.222, "step": 10162 }, { "epoch": 0.3976445731277878, "grad_norm": 0.0, "learning_rate": 1.3710188802504498e-05, "loss": 1.1199, "step": 10163 }, { "epoch": 0.3976836998200172, "grad_norm": 0.0, "learning_rate": 1.3709011989815035e-05, "loss": 1.0455, "step": 10164 }, { "epoch": 0.39772282651224666, "grad_norm": 0.0, "learning_rate": 1.3707835117563493e-05, "loss": 1.0597, "step": 10165 }, { "epoch": 0.3977619532044761, "grad_norm": 0.0, "learning_rate": 1.370665818576876e-05, "loss": 0.9389, "step": 10166 }, { "epoch": 0.39780107989670555, "grad_norm": 0.0, "learning_rate": 1.3705481194449743e-05, "loss": 1.0157, "step": 10167 }, { "epoch": 0.397840206588935, "grad_norm": 0.0, "learning_rate": 1.370430414362534e-05, "loss": 1.0795, "step": 10168 }, { "epoch": 0.3978793332811644, "grad_norm": 0.0, "learning_rate": 1.3703127033314458e-05, "loss": 1.087, "step": 10169 }, { "epoch": 0.39791845997339387, "grad_norm": 0.0, "learning_rate": 1.3701949863535994e-05, "loss": 0.9906, "step": 10170 }, { "epoch": 0.3979575866656233, "grad_norm": 0.0, "learning_rate": 1.3700772634308852e-05, "loss": 1.1302, "step": 10171 }, { "epoch": 0.39799671335785275, "grad_norm": 0.0, "learning_rate": 1.3699595345651941e-05, "loss": 1.2292, "step": 10172 }, { "epoch": 0.3980358400500822, "grad_norm": 0.0, "learning_rate": 1.3698417997584164e-05, "loss": 0.9078, "step": 10173 }, { "epoch": 0.39807496674231163, "grad_norm": 0.0, "learning_rate": 1.369724059012443e-05, "loss": 1.1234, "step": 10174 }, { "epoch": 0.398114093434541, "grad_norm": 0.0, "learning_rate": 1.369606312329164e-05, "loss": 1.0825, "step": 10175 }, { "epoch": 0.39815322012677046, "grad_norm": 0.0, "learning_rate": 1.3694885597104715e-05, "loss": 0.9744, "step": 10176 }, { "epoch": 0.3981923468189999, "grad_norm": 0.0, "learning_rate": 1.3693708011582551e-05, "loss": 0.9053, "step": 10177 }, { "epoch": 0.39823147351122934, "grad_norm": 0.0, "learning_rate": 1.3692530366744068e-05, "loss": 1.0103, "step": 10178 }, { "epoch": 0.3982706002034588, "grad_norm": 0.0, "learning_rate": 1.3691352662608175e-05, "loss": 1.0925, "step": 10179 }, { "epoch": 0.3983097268956882, "grad_norm": 0.0, "learning_rate": 1.369017489919378e-05, "loss": 1.1334, "step": 10180 }, { "epoch": 0.39834885358791766, "grad_norm": 0.0, "learning_rate": 1.3688997076519803e-05, "loss": 1.0931, "step": 10181 }, { "epoch": 0.3983879802801471, "grad_norm": 0.0, "learning_rate": 1.3687819194605154e-05, "loss": 1.0734, "step": 10182 }, { "epoch": 0.39842710697237654, "grad_norm": 0.0, "learning_rate": 1.3686641253468754e-05, "loss": 0.9661, "step": 10183 }, { "epoch": 0.398466233664606, "grad_norm": 0.0, "learning_rate": 1.368546325312951e-05, "loss": 1.0423, "step": 10184 }, { "epoch": 0.3985053603568354, "grad_norm": 0.0, "learning_rate": 1.3684285193606346e-05, "loss": 1.0385, "step": 10185 }, { "epoch": 0.39854448704906487, "grad_norm": 0.0, "learning_rate": 1.3683107074918179e-05, "loss": 1.2006, "step": 10186 }, { "epoch": 0.3985836137412943, "grad_norm": 0.0, "learning_rate": 1.3681928897083928e-05, "loss": 1.1589, "step": 10187 }, { "epoch": 0.39862274043352375, "grad_norm": 0.0, "learning_rate": 1.3680750660122511e-05, "loss": 1.0334, "step": 10188 }, { "epoch": 0.3986618671257532, "grad_norm": 0.0, "learning_rate": 1.3679572364052852e-05, "loss": 0.9943, "step": 10189 }, { "epoch": 0.39870099381798263, "grad_norm": 0.0, "learning_rate": 1.3678394008893871e-05, "loss": 1.0502, "step": 10190 }, { "epoch": 0.39874012051021207, "grad_norm": 0.0, "learning_rate": 1.3677215594664493e-05, "loss": 1.0278, "step": 10191 }, { "epoch": 0.3987792472024415, "grad_norm": 0.0, "learning_rate": 1.3676037121383638e-05, "loss": 1.145, "step": 10192 }, { "epoch": 0.39881837389467095, "grad_norm": 0.0, "learning_rate": 1.3674858589070234e-05, "loss": 0.9107, "step": 10193 }, { "epoch": 0.3988575005869004, "grad_norm": 0.0, "learning_rate": 1.3673679997743207e-05, "loss": 0.9708, "step": 10194 }, { "epoch": 0.39889662727912983, "grad_norm": 0.0, "learning_rate": 1.3672501347421481e-05, "loss": 1.0674, "step": 10195 }, { "epoch": 0.3989357539713593, "grad_norm": 0.0, "learning_rate": 1.3671322638123988e-05, "loss": 1.1199, "step": 10196 }, { "epoch": 0.3989748806635887, "grad_norm": 0.0, "learning_rate": 1.3670143869869649e-05, "loss": 1.1125, "step": 10197 }, { "epoch": 0.39901400735581816, "grad_norm": 0.0, "learning_rate": 1.3668965042677403e-05, "loss": 1.1007, "step": 10198 }, { "epoch": 0.3990531340480476, "grad_norm": 0.0, "learning_rate": 1.3667786156566175e-05, "loss": 1.1313, "step": 10199 }, { "epoch": 0.39909226074027704, "grad_norm": 0.0, "learning_rate": 1.3666607211554894e-05, "loss": 1.1055, "step": 10200 }, { "epoch": 0.3991313874325065, "grad_norm": 0.0, "learning_rate": 1.3665428207662498e-05, "loss": 1.1662, "step": 10201 }, { "epoch": 0.3991705141247359, "grad_norm": 0.0, "learning_rate": 1.366424914490792e-05, "loss": 1.0364, "step": 10202 }, { "epoch": 0.39920964081696536, "grad_norm": 0.0, "learning_rate": 1.366307002331009e-05, "loss": 1.0982, "step": 10203 }, { "epoch": 0.39924876750919475, "grad_norm": 0.0, "learning_rate": 1.3661890842887944e-05, "loss": 1.0614, "step": 10204 }, { "epoch": 0.3992878942014242, "grad_norm": 0.0, "learning_rate": 1.3660711603660422e-05, "loss": 1.1073, "step": 10205 }, { "epoch": 0.39932702089365363, "grad_norm": 0.0, "learning_rate": 1.365953230564646e-05, "loss": 1.1043, "step": 10206 }, { "epoch": 0.39936614758588307, "grad_norm": 0.0, "learning_rate": 1.3658352948864993e-05, "loss": 1.084, "step": 10207 }, { "epoch": 0.3994052742781125, "grad_norm": 0.0, "learning_rate": 1.3657173533334962e-05, "loss": 1.0842, "step": 10208 }, { "epoch": 0.39944440097034195, "grad_norm": 0.0, "learning_rate": 1.3655994059075306e-05, "loss": 1.159, "step": 10209 }, { "epoch": 0.3994835276625714, "grad_norm": 0.0, "learning_rate": 1.3654814526104967e-05, "loss": 0.9694, "step": 10210 }, { "epoch": 0.39952265435480083, "grad_norm": 0.0, "learning_rate": 1.3653634934442885e-05, "loss": 1.0138, "step": 10211 }, { "epoch": 0.3995617810470303, "grad_norm": 0.0, "learning_rate": 1.3652455284108009e-05, "loss": 1.0418, "step": 10212 }, { "epoch": 0.3996009077392597, "grad_norm": 0.0, "learning_rate": 1.3651275575119272e-05, "loss": 1.1074, "step": 10213 }, { "epoch": 0.39964003443148916, "grad_norm": 0.0, "learning_rate": 1.365009580749563e-05, "loss": 1.1661, "step": 10214 }, { "epoch": 0.3996791611237186, "grad_norm": 0.0, "learning_rate": 1.364891598125602e-05, "loss": 1.04, "step": 10215 }, { "epoch": 0.39971828781594804, "grad_norm": 0.0, "learning_rate": 1.3647736096419393e-05, "loss": 1.0885, "step": 10216 }, { "epoch": 0.3997574145081775, "grad_norm": 0.0, "learning_rate": 1.3646556153004693e-05, "loss": 1.1514, "step": 10217 }, { "epoch": 0.3997965412004069, "grad_norm": 0.0, "learning_rate": 1.3645376151030871e-05, "loss": 1.0552, "step": 10218 }, { "epoch": 0.39983566789263636, "grad_norm": 0.0, "learning_rate": 1.364419609051688e-05, "loss": 1.087, "step": 10219 }, { "epoch": 0.3998747945848658, "grad_norm": 0.0, "learning_rate": 1.3643015971481661e-05, "loss": 0.9102, "step": 10220 }, { "epoch": 0.39991392127709524, "grad_norm": 0.0, "learning_rate": 1.3641835793944174e-05, "loss": 1.1761, "step": 10221 }, { "epoch": 0.3999530479693247, "grad_norm": 0.0, "learning_rate": 1.3640655557923365e-05, "loss": 1.0735, "step": 10222 }, { "epoch": 0.3999921746615541, "grad_norm": 0.0, "learning_rate": 1.3639475263438194e-05, "loss": 1.1187, "step": 10223 }, { "epoch": 0.40003130135378356, "grad_norm": 0.0, "learning_rate": 1.3638294910507606e-05, "loss": 1.1224, "step": 10224 }, { "epoch": 0.400070428046013, "grad_norm": 0.0, "learning_rate": 1.3637114499150563e-05, "loss": 1.0765, "step": 10225 }, { "epoch": 0.40010955473824245, "grad_norm": 0.0, "learning_rate": 1.3635934029386015e-05, "loss": 1.1267, "step": 10226 }, { "epoch": 0.4001486814304719, "grad_norm": 0.0, "learning_rate": 1.3634753501232929e-05, "loss": 1.1268, "step": 10227 }, { "epoch": 0.4001878081227013, "grad_norm": 0.0, "learning_rate": 1.3633572914710254e-05, "loss": 1.0362, "step": 10228 }, { "epoch": 0.40022693481493077, "grad_norm": 0.0, "learning_rate": 1.363239226983695e-05, "loss": 1.1519, "step": 10229 }, { "epoch": 0.4002660615071602, "grad_norm": 0.0, "learning_rate": 1.3631211566631976e-05, "loss": 1.069, "step": 10230 }, { "epoch": 0.40030518819938965, "grad_norm": 0.0, "learning_rate": 1.3630030805114297e-05, "loss": 1.1067, "step": 10231 }, { "epoch": 0.40034431489161904, "grad_norm": 0.0, "learning_rate": 1.3628849985302873e-05, "loss": 1.0449, "step": 10232 }, { "epoch": 0.4003834415838485, "grad_norm": 0.0, "learning_rate": 1.3627669107216663e-05, "loss": 1.1066, "step": 10233 }, { "epoch": 0.4004225682760779, "grad_norm": 0.0, "learning_rate": 1.3626488170874634e-05, "loss": 1.1175, "step": 10234 }, { "epoch": 0.40046169496830736, "grad_norm": 0.0, "learning_rate": 1.362530717629575e-05, "loss": 1.0676, "step": 10235 }, { "epoch": 0.4005008216605368, "grad_norm": 0.0, "learning_rate": 1.3624126123498976e-05, "loss": 0.9029, "step": 10236 }, { "epoch": 0.40053994835276624, "grad_norm": 0.0, "learning_rate": 1.3622945012503275e-05, "loss": 1.0887, "step": 10237 }, { "epoch": 0.4005790750449957, "grad_norm": 0.0, "learning_rate": 1.3621763843327618e-05, "loss": 1.0428, "step": 10238 }, { "epoch": 0.4006182017372251, "grad_norm": 0.0, "learning_rate": 1.362058261599097e-05, "loss": 1.1078, "step": 10239 }, { "epoch": 0.40065732842945456, "grad_norm": 0.0, "learning_rate": 1.3619401330512307e-05, "loss": 1.1175, "step": 10240 }, { "epoch": 0.400696455121684, "grad_norm": 0.0, "learning_rate": 1.3618219986910592e-05, "loss": 1.078, "step": 10241 }, { "epoch": 0.40073558181391344, "grad_norm": 0.0, "learning_rate": 1.3617038585204796e-05, "loss": 1.0479, "step": 10242 }, { "epoch": 0.4007747085061429, "grad_norm": 0.0, "learning_rate": 1.3615857125413894e-05, "loss": 1.1606, "step": 10243 }, { "epoch": 0.4008138351983723, "grad_norm": 0.0, "learning_rate": 1.3614675607556857e-05, "loss": 0.9774, "step": 10244 }, { "epoch": 0.40085296189060177, "grad_norm": 0.0, "learning_rate": 1.3613494031652659e-05, "loss": 0.877, "step": 10245 }, { "epoch": 0.4008920885828312, "grad_norm": 0.0, "learning_rate": 1.3612312397720275e-05, "loss": 1.0031, "step": 10246 }, { "epoch": 0.40093121527506065, "grad_norm": 0.0, "learning_rate": 1.3611130705778682e-05, "loss": 1.0972, "step": 10247 }, { "epoch": 0.4009703419672901, "grad_norm": 0.0, "learning_rate": 1.3609948955846855e-05, "loss": 1.0297, "step": 10248 }, { "epoch": 0.40100946865951953, "grad_norm": 0.0, "learning_rate": 1.360876714794377e-05, "loss": 1.0366, "step": 10249 }, { "epoch": 0.40104859535174897, "grad_norm": 0.0, "learning_rate": 1.3607585282088405e-05, "loss": 1.0443, "step": 10250 }, { "epoch": 0.4010877220439784, "grad_norm": 0.0, "learning_rate": 1.3606403358299742e-05, "loss": 1.0081, "step": 10251 }, { "epoch": 0.40112684873620785, "grad_norm": 0.0, "learning_rate": 1.360522137659676e-05, "loss": 1.218, "step": 10252 }, { "epoch": 0.4011659754284373, "grad_norm": 0.0, "learning_rate": 1.360403933699844e-05, "loss": 1.0835, "step": 10253 }, { "epoch": 0.40120510212066673, "grad_norm": 0.0, "learning_rate": 1.3602857239523766e-05, "loss": 1.0097, "step": 10254 }, { "epoch": 0.4012442288128962, "grad_norm": 0.0, "learning_rate": 1.360167508419172e-05, "loss": 1.0964, "step": 10255 }, { "epoch": 0.4012833555051256, "grad_norm": 0.0, "learning_rate": 1.3600492871021282e-05, "loss": 0.9907, "step": 10256 }, { "epoch": 0.40132248219735506, "grad_norm": 0.0, "learning_rate": 1.3599310600031443e-05, "loss": 1.0957, "step": 10257 }, { "epoch": 0.4013616088895845, "grad_norm": 0.0, "learning_rate": 1.3598128271241184e-05, "loss": 1.0016, "step": 10258 }, { "epoch": 0.40140073558181394, "grad_norm": 0.0, "learning_rate": 1.3596945884669498e-05, "loss": 1.0467, "step": 10259 }, { "epoch": 0.4014398622740434, "grad_norm": 0.0, "learning_rate": 1.3595763440335361e-05, "loss": 1.1675, "step": 10260 }, { "epoch": 0.40147898896627277, "grad_norm": 0.0, "learning_rate": 1.3594580938257776e-05, "loss": 1.0559, "step": 10261 }, { "epoch": 0.4015181156585022, "grad_norm": 0.0, "learning_rate": 1.359339837845572e-05, "loss": 1.081, "step": 10262 }, { "epoch": 0.40155724235073165, "grad_norm": 0.0, "learning_rate": 1.359221576094819e-05, "loss": 1.0982, "step": 10263 }, { "epoch": 0.4015963690429611, "grad_norm": 0.0, "learning_rate": 1.3591033085754177e-05, "loss": 1.046, "step": 10264 }, { "epoch": 0.40163549573519053, "grad_norm": 0.0, "learning_rate": 1.3589850352892676e-05, "loss": 1.1126, "step": 10265 }, { "epoch": 0.40167462242741997, "grad_norm": 0.0, "learning_rate": 1.358866756238267e-05, "loss": 1.0807, "step": 10266 }, { "epoch": 0.4017137491196494, "grad_norm": 0.0, "learning_rate": 1.3587484714243165e-05, "loss": 1.1158, "step": 10267 }, { "epoch": 0.40175287581187885, "grad_norm": 0.0, "learning_rate": 1.358630180849315e-05, "loss": 1.1114, "step": 10268 }, { "epoch": 0.4017920025041083, "grad_norm": 0.0, "learning_rate": 1.358511884515162e-05, "loss": 1.161, "step": 10269 }, { "epoch": 0.40183112919633773, "grad_norm": 0.0, "learning_rate": 1.3583935824237576e-05, "loss": 1.0541, "step": 10270 }, { "epoch": 0.4018702558885672, "grad_norm": 0.0, "learning_rate": 1.358275274577001e-05, "loss": 1.1674, "step": 10271 }, { "epoch": 0.4019093825807966, "grad_norm": 0.0, "learning_rate": 1.3581569609767927e-05, "loss": 1.0269, "step": 10272 }, { "epoch": 0.40194850927302606, "grad_norm": 0.0, "learning_rate": 1.3580386416250321e-05, "loss": 1.0979, "step": 10273 }, { "epoch": 0.4019876359652555, "grad_norm": 0.0, "learning_rate": 1.3579203165236201e-05, "loss": 1.1006, "step": 10274 }, { "epoch": 0.40202676265748494, "grad_norm": 0.0, "learning_rate": 1.357801985674456e-05, "loss": 1.0594, "step": 10275 }, { "epoch": 0.4020658893497144, "grad_norm": 0.0, "learning_rate": 1.3576836490794404e-05, "loss": 1.072, "step": 10276 }, { "epoch": 0.4021050160419438, "grad_norm": 0.0, "learning_rate": 1.3575653067404736e-05, "loss": 1.0355, "step": 10277 }, { "epoch": 0.40214414273417326, "grad_norm": 0.0, "learning_rate": 1.3574469586594558e-05, "loss": 1.1331, "step": 10278 }, { "epoch": 0.4021832694264027, "grad_norm": 0.0, "learning_rate": 1.3573286048382884e-05, "loss": 1.0883, "step": 10279 }, { "epoch": 0.40222239611863214, "grad_norm": 0.0, "learning_rate": 1.357210245278871e-05, "loss": 1.2501, "step": 10280 }, { "epoch": 0.4022615228108616, "grad_norm": 0.0, "learning_rate": 1.3570918799831044e-05, "loss": 1.1212, "step": 10281 }, { "epoch": 0.402300649503091, "grad_norm": 0.0, "learning_rate": 1.35697350895289e-05, "loss": 1.0929, "step": 10282 }, { "epoch": 0.40233977619532046, "grad_norm": 0.0, "learning_rate": 1.3568551321901282e-05, "loss": 1.048, "step": 10283 }, { "epoch": 0.4023789028875499, "grad_norm": 0.0, "learning_rate": 1.3567367496967201e-05, "loss": 1.0332, "step": 10284 }, { "epoch": 0.40241802957977935, "grad_norm": 0.0, "learning_rate": 1.356618361474567e-05, "loss": 1.1528, "step": 10285 }, { "epoch": 0.4024571562720088, "grad_norm": 0.0, "learning_rate": 1.35649996752557e-05, "loss": 1.2017, "step": 10286 }, { "epoch": 0.40249628296423823, "grad_norm": 0.0, "learning_rate": 1.3563815678516296e-05, "loss": 1.0486, "step": 10287 }, { "epoch": 0.40253540965646767, "grad_norm": 0.0, "learning_rate": 1.3562631624546485e-05, "loss": 1.1389, "step": 10288 }, { "epoch": 0.40257453634869705, "grad_norm": 0.0, "learning_rate": 1.3561447513365269e-05, "loss": 1.1016, "step": 10289 }, { "epoch": 0.4026136630409265, "grad_norm": 0.0, "learning_rate": 1.3560263344991673e-05, "loss": 0.9976, "step": 10290 }, { "epoch": 0.40265278973315594, "grad_norm": 0.0, "learning_rate": 1.3559079119444705e-05, "loss": 1.2186, "step": 10291 }, { "epoch": 0.4026919164253854, "grad_norm": 0.0, "learning_rate": 1.355789483674339e-05, "loss": 1.102, "step": 10292 }, { "epoch": 0.4027310431176148, "grad_norm": 0.0, "learning_rate": 1.355671049690674e-05, "loss": 1.081, "step": 10293 }, { "epoch": 0.40277016980984426, "grad_norm": 0.0, "learning_rate": 1.3555526099953778e-05, "loss": 1.1198, "step": 10294 }, { "epoch": 0.4028092965020737, "grad_norm": 0.0, "learning_rate": 1.355434164590352e-05, "loss": 0.9753, "step": 10295 }, { "epoch": 0.40284842319430314, "grad_norm": 0.0, "learning_rate": 1.355315713477499e-05, "loss": 1.0294, "step": 10296 }, { "epoch": 0.4028875498865326, "grad_norm": 0.0, "learning_rate": 1.3551972566587208e-05, "loss": 1.1315, "step": 10297 }, { "epoch": 0.402926676578762, "grad_norm": 0.0, "learning_rate": 1.3550787941359199e-05, "loss": 1.2324, "step": 10298 }, { "epoch": 0.40296580327099146, "grad_norm": 0.0, "learning_rate": 1.3549603259109985e-05, "loss": 1.1551, "step": 10299 }, { "epoch": 0.4030049299632209, "grad_norm": 0.0, "learning_rate": 1.3548418519858585e-05, "loss": 1.1418, "step": 10300 }, { "epoch": 0.40304405665545034, "grad_norm": 0.0, "learning_rate": 1.3547233723624036e-05, "loss": 1.1633, "step": 10301 }, { "epoch": 0.4030831833476798, "grad_norm": 0.0, "learning_rate": 1.3546048870425356e-05, "loss": 1.1062, "step": 10302 }, { "epoch": 0.4031223100399092, "grad_norm": 0.0, "learning_rate": 1.3544863960281578e-05, "loss": 1.0638, "step": 10303 }, { "epoch": 0.40316143673213867, "grad_norm": 0.0, "learning_rate": 1.3543678993211722e-05, "loss": 1.212, "step": 10304 }, { "epoch": 0.4032005634243681, "grad_norm": 0.0, "learning_rate": 1.3542493969234825e-05, "loss": 1.1636, "step": 10305 }, { "epoch": 0.40323969011659755, "grad_norm": 0.0, "learning_rate": 1.3541308888369916e-05, "loss": 1.119, "step": 10306 }, { "epoch": 0.403278816808827, "grad_norm": 0.0, "learning_rate": 1.3540123750636018e-05, "loss": 1.1636, "step": 10307 }, { "epoch": 0.40331794350105643, "grad_norm": 0.0, "learning_rate": 1.3538938556052172e-05, "loss": 1.1175, "step": 10308 }, { "epoch": 0.40335707019328587, "grad_norm": 0.0, "learning_rate": 1.3537753304637406e-05, "loss": 1.1019, "step": 10309 }, { "epoch": 0.4033961968855153, "grad_norm": 0.0, "learning_rate": 1.3536567996410756e-05, "loss": 1.0729, "step": 10310 }, { "epoch": 0.40343532357774475, "grad_norm": 0.0, "learning_rate": 1.3535382631391254e-05, "loss": 1.1099, "step": 10311 }, { "epoch": 0.4034744502699742, "grad_norm": 0.0, "learning_rate": 1.3534197209597939e-05, "loss": 1.1294, "step": 10312 }, { "epoch": 0.40351357696220364, "grad_norm": 0.0, "learning_rate": 1.3533011731049843e-05, "loss": 1.0864, "step": 10313 }, { "epoch": 0.4035527036544331, "grad_norm": 0.0, "learning_rate": 1.3531826195766008e-05, "loss": 1.0342, "step": 10314 }, { "epoch": 0.4035918303466625, "grad_norm": 0.0, "learning_rate": 1.3530640603765473e-05, "loss": 1.0873, "step": 10315 }, { "epoch": 0.40363095703889196, "grad_norm": 0.0, "learning_rate": 1.3529454955067267e-05, "loss": 1.0887, "step": 10316 }, { "epoch": 0.4036700837311214, "grad_norm": 0.0, "learning_rate": 1.3528269249690441e-05, "loss": 1.0413, "step": 10317 }, { "epoch": 0.4037092104233508, "grad_norm": 0.0, "learning_rate": 1.3527083487654032e-05, "loss": 1.1521, "step": 10318 }, { "epoch": 0.4037483371155802, "grad_norm": 0.0, "learning_rate": 1.3525897668977081e-05, "loss": 1.0522, "step": 10319 }, { "epoch": 0.40378746380780967, "grad_norm": 0.0, "learning_rate": 1.3524711793678631e-05, "loss": 1.0716, "step": 10320 }, { "epoch": 0.4038265905000391, "grad_norm": 0.0, "learning_rate": 1.352352586177773e-05, "loss": 1.0017, "step": 10321 }, { "epoch": 0.40386571719226855, "grad_norm": 0.0, "learning_rate": 1.3522339873293416e-05, "loss": 1.0891, "step": 10322 }, { "epoch": 0.403904843884498, "grad_norm": 0.0, "learning_rate": 1.3521153828244735e-05, "loss": 1.0245, "step": 10323 }, { "epoch": 0.40394397057672743, "grad_norm": 0.0, "learning_rate": 1.351996772665074e-05, "loss": 1.0373, "step": 10324 }, { "epoch": 0.40398309726895687, "grad_norm": 0.0, "learning_rate": 1.3518781568530472e-05, "loss": 1.1153, "step": 10325 }, { "epoch": 0.4040222239611863, "grad_norm": 0.0, "learning_rate": 1.3517595353902982e-05, "loss": 1.0367, "step": 10326 }, { "epoch": 0.40406135065341575, "grad_norm": 0.0, "learning_rate": 1.3516409082787316e-05, "loss": 1.0229, "step": 10327 }, { "epoch": 0.4041004773456452, "grad_norm": 0.0, "learning_rate": 1.351522275520253e-05, "loss": 1.1323, "step": 10328 }, { "epoch": 0.40413960403787463, "grad_norm": 0.0, "learning_rate": 1.3514036371167669e-05, "loss": 1.0444, "step": 10329 }, { "epoch": 0.4041787307301041, "grad_norm": 0.0, "learning_rate": 1.351284993070179e-05, "loss": 1.0607, "step": 10330 }, { "epoch": 0.4042178574223335, "grad_norm": 0.0, "learning_rate": 1.3511663433823938e-05, "loss": 1.0283, "step": 10331 }, { "epoch": 0.40425698411456296, "grad_norm": 0.0, "learning_rate": 1.3510476880553177e-05, "loss": 1.1338, "step": 10332 }, { "epoch": 0.4042961108067924, "grad_norm": 0.0, "learning_rate": 1.3509290270908552e-05, "loss": 1.0244, "step": 10333 }, { "epoch": 0.40433523749902184, "grad_norm": 0.0, "learning_rate": 1.3508103604909127e-05, "loss": 1.0304, "step": 10334 }, { "epoch": 0.4043743641912513, "grad_norm": 0.0, "learning_rate": 1.350691688257395e-05, "loss": 1.1638, "step": 10335 }, { "epoch": 0.4044134908834807, "grad_norm": 0.0, "learning_rate": 1.3505730103922083e-05, "loss": 1.1051, "step": 10336 }, { "epoch": 0.40445261757571016, "grad_norm": 0.0, "learning_rate": 1.3504543268972585e-05, "loss": 1.0349, "step": 10337 }, { "epoch": 0.4044917442679396, "grad_norm": 0.0, "learning_rate": 1.3503356377744512e-05, "loss": 1.064, "step": 10338 }, { "epoch": 0.40453087096016904, "grad_norm": 0.0, "learning_rate": 1.350216943025693e-05, "loss": 1.0814, "step": 10339 }, { "epoch": 0.4045699976523985, "grad_norm": 0.0, "learning_rate": 1.3500982426528888e-05, "loss": 1.0298, "step": 10340 }, { "epoch": 0.4046091243446279, "grad_norm": 0.0, "learning_rate": 1.349979536657946e-05, "loss": 1.131, "step": 10341 }, { "epoch": 0.40464825103685736, "grad_norm": 0.0, "learning_rate": 1.34986082504277e-05, "loss": 1.0137, "step": 10342 }, { "epoch": 0.4046873777290868, "grad_norm": 0.0, "learning_rate": 1.349742107809268e-05, "loss": 1.1024, "step": 10343 }, { "epoch": 0.40472650442131625, "grad_norm": 0.0, "learning_rate": 1.3496233849593458e-05, "loss": 1.0476, "step": 10344 }, { "epoch": 0.4047656311135457, "grad_norm": 0.0, "learning_rate": 1.3495046564949102e-05, "loss": 1.1298, "step": 10345 }, { "epoch": 0.4048047578057751, "grad_norm": 0.0, "learning_rate": 1.3493859224178678e-05, "loss": 1.0953, "step": 10346 }, { "epoch": 0.4048438844980045, "grad_norm": 0.0, "learning_rate": 1.3492671827301251e-05, "loss": 1.1395, "step": 10347 }, { "epoch": 0.40488301119023395, "grad_norm": 0.0, "learning_rate": 1.3491484374335893e-05, "loss": 1.0111, "step": 10348 }, { "epoch": 0.4049221378824634, "grad_norm": 0.0, "learning_rate": 1.3490296865301668e-05, "loss": 1.0157, "step": 10349 }, { "epoch": 0.40496126457469284, "grad_norm": 0.0, "learning_rate": 1.3489109300217654e-05, "loss": 1.0047, "step": 10350 }, { "epoch": 0.4050003912669223, "grad_norm": 0.0, "learning_rate": 1.3487921679102912e-05, "loss": 0.8996, "step": 10351 }, { "epoch": 0.4050395179591517, "grad_norm": 0.0, "learning_rate": 1.3486734001976523e-05, "loss": 1.0687, "step": 10352 }, { "epoch": 0.40507864465138116, "grad_norm": 0.0, "learning_rate": 1.3485546268857552e-05, "loss": 0.9986, "step": 10353 }, { "epoch": 0.4051177713436106, "grad_norm": 0.0, "learning_rate": 1.3484358479765075e-05, "loss": 1.0882, "step": 10354 }, { "epoch": 0.40515689803584004, "grad_norm": 0.0, "learning_rate": 1.3483170634718169e-05, "loss": 1.1606, "step": 10355 }, { "epoch": 0.4051960247280695, "grad_norm": 0.0, "learning_rate": 1.3481982733735905e-05, "loss": 1.0023, "step": 10356 }, { "epoch": 0.4052351514202989, "grad_norm": 0.0, "learning_rate": 1.3480794776837362e-05, "loss": 1.0098, "step": 10357 }, { "epoch": 0.40527427811252836, "grad_norm": 0.0, "learning_rate": 1.3479606764041616e-05, "loss": 1.1736, "step": 10358 }, { "epoch": 0.4053134048047578, "grad_norm": 0.0, "learning_rate": 1.347841869536775e-05, "loss": 1.1562, "step": 10359 }, { "epoch": 0.40535253149698725, "grad_norm": 0.0, "learning_rate": 1.3477230570834831e-05, "loss": 1.2156, "step": 10360 }, { "epoch": 0.4053916581892167, "grad_norm": 0.0, "learning_rate": 1.3476042390461954e-05, "loss": 1.0472, "step": 10361 }, { "epoch": 0.4054307848814461, "grad_norm": 0.0, "learning_rate": 1.3474854154268186e-05, "loss": 1.0389, "step": 10362 }, { "epoch": 0.40546991157367557, "grad_norm": 0.0, "learning_rate": 1.3473665862272619e-05, "loss": 1.0925, "step": 10363 }, { "epoch": 0.405509038265905, "grad_norm": 0.0, "learning_rate": 1.3472477514494328e-05, "loss": 1.0858, "step": 10364 }, { "epoch": 0.40554816495813445, "grad_norm": 0.0, "learning_rate": 1.34712891109524e-05, "loss": 0.9988, "step": 10365 }, { "epoch": 0.4055872916503639, "grad_norm": 0.0, "learning_rate": 1.3470100651665921e-05, "loss": 1.0029, "step": 10366 }, { "epoch": 0.40562641834259333, "grad_norm": 0.0, "learning_rate": 1.3468912136653974e-05, "loss": 1.1203, "step": 10367 }, { "epoch": 0.40566554503482277, "grad_norm": 0.0, "learning_rate": 1.3467723565935639e-05, "loss": 1.1071, "step": 10368 }, { "epoch": 0.4057046717270522, "grad_norm": 0.0, "learning_rate": 1.3466534939530014e-05, "loss": 1.2299, "step": 10369 }, { "epoch": 0.40574379841928165, "grad_norm": 0.0, "learning_rate": 1.3465346257456181e-05, "loss": 1.0631, "step": 10370 }, { "epoch": 0.4057829251115111, "grad_norm": 0.0, "learning_rate": 1.3464157519733228e-05, "loss": 1.0796, "step": 10371 }, { "epoch": 0.40582205180374054, "grad_norm": 0.0, "learning_rate": 1.3462968726380248e-05, "loss": 1.1522, "step": 10372 }, { "epoch": 0.40586117849597, "grad_norm": 0.0, "learning_rate": 1.3461779877416327e-05, "loss": 0.882, "step": 10373 }, { "epoch": 0.4059003051881994, "grad_norm": 0.0, "learning_rate": 1.3460590972860561e-05, "loss": 1.0824, "step": 10374 }, { "epoch": 0.4059394318804288, "grad_norm": 0.0, "learning_rate": 1.345940201273204e-05, "loss": 1.1268, "step": 10375 }, { "epoch": 0.40597855857265824, "grad_norm": 0.0, "learning_rate": 1.3458212997049855e-05, "loss": 1.1201, "step": 10376 }, { "epoch": 0.4060176852648877, "grad_norm": 0.0, "learning_rate": 1.3457023925833106e-05, "loss": 1.1545, "step": 10377 }, { "epoch": 0.4060568119571171, "grad_norm": 0.0, "learning_rate": 1.3455834799100881e-05, "loss": 1.1678, "step": 10378 }, { "epoch": 0.40609593864934657, "grad_norm": 0.0, "learning_rate": 1.3454645616872286e-05, "loss": 1.0417, "step": 10379 }, { "epoch": 0.406135065341576, "grad_norm": 0.0, "learning_rate": 1.3453456379166405e-05, "loss": 0.9727, "step": 10380 }, { "epoch": 0.40617419203380545, "grad_norm": 0.0, "learning_rate": 1.3452267086002345e-05, "loss": 1.0334, "step": 10381 }, { "epoch": 0.4062133187260349, "grad_norm": 0.0, "learning_rate": 1.3451077737399202e-05, "loss": 1.0258, "step": 10382 }, { "epoch": 0.40625244541826433, "grad_norm": 0.0, "learning_rate": 1.3449888333376073e-05, "loss": 1.1078, "step": 10383 }, { "epoch": 0.40629157211049377, "grad_norm": 0.0, "learning_rate": 1.3448698873952063e-05, "loss": 1.2384, "step": 10384 }, { "epoch": 0.4063306988027232, "grad_norm": 0.0, "learning_rate": 1.3447509359146267e-05, "loss": 1.0285, "step": 10385 }, { "epoch": 0.40636982549495265, "grad_norm": 0.0, "learning_rate": 1.3446319788977793e-05, "loss": 0.9624, "step": 10386 }, { "epoch": 0.4064089521871821, "grad_norm": 0.0, "learning_rate": 1.3445130163465739e-05, "loss": 1.0927, "step": 10387 }, { "epoch": 0.40644807887941153, "grad_norm": 0.0, "learning_rate": 1.3443940482629214e-05, "loss": 1.1597, "step": 10388 }, { "epoch": 0.406487205571641, "grad_norm": 0.0, "learning_rate": 1.3442750746487319e-05, "loss": 1.0691, "step": 10389 }, { "epoch": 0.4065263322638704, "grad_norm": 0.0, "learning_rate": 1.3441560955059163e-05, "loss": 0.9617, "step": 10390 }, { "epoch": 0.40656545895609986, "grad_norm": 0.0, "learning_rate": 1.3440371108363847e-05, "loss": 1.0219, "step": 10391 }, { "epoch": 0.4066045856483293, "grad_norm": 0.0, "learning_rate": 1.3439181206420486e-05, "loss": 1.1863, "step": 10392 }, { "epoch": 0.40664371234055874, "grad_norm": 0.0, "learning_rate": 1.3437991249248184e-05, "loss": 0.9779, "step": 10393 }, { "epoch": 0.4066828390327882, "grad_norm": 0.0, "learning_rate": 1.3436801236866048e-05, "loss": 1.0428, "step": 10394 }, { "epoch": 0.4067219657250176, "grad_norm": 0.0, "learning_rate": 1.3435611169293194e-05, "loss": 1.0361, "step": 10395 }, { "epoch": 0.40676109241724706, "grad_norm": 0.0, "learning_rate": 1.3434421046548727e-05, "loss": 1.0794, "step": 10396 }, { "epoch": 0.4068002191094765, "grad_norm": 0.0, "learning_rate": 1.3433230868651763e-05, "loss": 1.2128, "step": 10397 }, { "epoch": 0.40683934580170594, "grad_norm": 0.0, "learning_rate": 1.343204063562141e-05, "loss": 0.9736, "step": 10398 }, { "epoch": 0.4068784724939354, "grad_norm": 0.0, "learning_rate": 1.343085034747679e-05, "loss": 1.0222, "step": 10399 }, { "epoch": 0.4069175991861648, "grad_norm": 0.0, "learning_rate": 1.3429660004237008e-05, "loss": 1.1782, "step": 10400 }, { "epoch": 0.40695672587839427, "grad_norm": 0.0, "learning_rate": 1.3428469605921189e-05, "loss": 1.2345, "step": 10401 }, { "epoch": 0.4069958525706237, "grad_norm": 0.0, "learning_rate": 1.3427279152548442e-05, "loss": 1.0628, "step": 10402 }, { "epoch": 0.4070349792628531, "grad_norm": 0.0, "learning_rate": 1.3426088644137884e-05, "loss": 1.0222, "step": 10403 }, { "epoch": 0.40707410595508253, "grad_norm": 0.0, "learning_rate": 1.3424898080708639e-05, "loss": 1.1411, "step": 10404 }, { "epoch": 0.407113232647312, "grad_norm": 0.0, "learning_rate": 1.342370746227982e-05, "loss": 1.1421, "step": 10405 }, { "epoch": 0.4071523593395414, "grad_norm": 0.0, "learning_rate": 1.342251678887055e-05, "loss": 1.0836, "step": 10406 }, { "epoch": 0.40719148603177086, "grad_norm": 0.0, "learning_rate": 1.3421326060499949e-05, "loss": 1.058, "step": 10407 }, { "epoch": 0.4072306127240003, "grad_norm": 0.0, "learning_rate": 1.3420135277187139e-05, "loss": 1.0319, "step": 10408 }, { "epoch": 0.40726973941622974, "grad_norm": 0.0, "learning_rate": 1.3418944438951242e-05, "loss": 1.0165, "step": 10409 }, { "epoch": 0.4073088661084592, "grad_norm": 0.0, "learning_rate": 1.341775354581138e-05, "loss": 0.9758, "step": 10410 }, { "epoch": 0.4073479928006886, "grad_norm": 0.0, "learning_rate": 1.3416562597786683e-05, "loss": 0.9966, "step": 10411 }, { "epoch": 0.40738711949291806, "grad_norm": 0.0, "learning_rate": 1.3415371594896266e-05, "loss": 1.1071, "step": 10412 }, { "epoch": 0.4074262461851475, "grad_norm": 0.0, "learning_rate": 1.3414180537159265e-05, "loss": 0.964, "step": 10413 }, { "epoch": 0.40746537287737694, "grad_norm": 0.0, "learning_rate": 1.3412989424594803e-05, "loss": 1.1846, "step": 10414 }, { "epoch": 0.4075044995696064, "grad_norm": 0.0, "learning_rate": 1.3411798257222004e-05, "loss": 1.0651, "step": 10415 }, { "epoch": 0.4075436262618358, "grad_norm": 0.0, "learning_rate": 1.3410607035060004e-05, "loss": 1.0477, "step": 10416 }, { "epoch": 0.40758275295406526, "grad_norm": 0.0, "learning_rate": 1.3409415758127929e-05, "loss": 0.9948, "step": 10417 }, { "epoch": 0.4076218796462947, "grad_norm": 0.0, "learning_rate": 1.3408224426444908e-05, "loss": 1.006, "step": 10418 }, { "epoch": 0.40766100633852415, "grad_norm": 0.0, "learning_rate": 1.3407033040030071e-05, "loss": 1.1399, "step": 10419 }, { "epoch": 0.4077001330307536, "grad_norm": 0.0, "learning_rate": 1.3405841598902553e-05, "loss": 1.0636, "step": 10420 }, { "epoch": 0.407739259722983, "grad_norm": 0.0, "learning_rate": 1.340465010308149e-05, "loss": 1.0922, "step": 10421 }, { "epoch": 0.40777838641521247, "grad_norm": 0.0, "learning_rate": 1.340345855258601e-05, "loss": 1.0223, "step": 10422 }, { "epoch": 0.4078175131074419, "grad_norm": 0.0, "learning_rate": 1.3402266947435251e-05, "loss": 1.0231, "step": 10423 }, { "epoch": 0.40785663979967135, "grad_norm": 0.0, "learning_rate": 1.3401075287648348e-05, "loss": 0.9553, "step": 10424 }, { "epoch": 0.4078957664919008, "grad_norm": 0.0, "learning_rate": 1.3399883573244438e-05, "loss": 1.2144, "step": 10425 }, { "epoch": 0.40793489318413023, "grad_norm": 0.0, "learning_rate": 1.3398691804242658e-05, "loss": 1.099, "step": 10426 }, { "epoch": 0.4079740198763597, "grad_norm": 0.0, "learning_rate": 1.3397499980662145e-05, "loss": 1.1054, "step": 10427 }, { "epoch": 0.4080131465685891, "grad_norm": 0.0, "learning_rate": 1.3396308102522042e-05, "loss": 1.2227, "step": 10428 }, { "epoch": 0.40805227326081855, "grad_norm": 0.0, "learning_rate": 1.3395116169841487e-05, "loss": 1.0613, "step": 10429 }, { "epoch": 0.408091399953048, "grad_norm": 0.0, "learning_rate": 1.3393924182639619e-05, "loss": 1.1166, "step": 10430 }, { "epoch": 0.4081305266452774, "grad_norm": 0.0, "learning_rate": 1.3392732140935583e-05, "loss": 1.0591, "step": 10431 }, { "epoch": 0.4081696533375068, "grad_norm": 0.0, "learning_rate": 1.3391540044748517e-05, "loss": 1.0499, "step": 10432 }, { "epoch": 0.40820878002973626, "grad_norm": 0.0, "learning_rate": 1.339034789409757e-05, "loss": 1.1078, "step": 10433 }, { "epoch": 0.4082479067219657, "grad_norm": 0.0, "learning_rate": 1.3389155689001884e-05, "loss": 1.0277, "step": 10434 }, { "epoch": 0.40828703341419514, "grad_norm": 0.0, "learning_rate": 1.3387963429480605e-05, "loss": 0.9703, "step": 10435 }, { "epoch": 0.4083261601064246, "grad_norm": 0.0, "learning_rate": 1.3386771115552876e-05, "loss": 1.111, "step": 10436 }, { "epoch": 0.408365286798654, "grad_norm": 0.0, "learning_rate": 1.338557874723785e-05, "loss": 1.0869, "step": 10437 }, { "epoch": 0.40840441349088347, "grad_norm": 0.0, "learning_rate": 1.338438632455467e-05, "loss": 1.0073, "step": 10438 }, { "epoch": 0.4084435401831129, "grad_norm": 0.0, "learning_rate": 1.3383193847522487e-05, "loss": 1.0193, "step": 10439 }, { "epoch": 0.40848266687534235, "grad_norm": 0.0, "learning_rate": 1.3382001316160451e-05, "loss": 1.0436, "step": 10440 }, { "epoch": 0.4085217935675718, "grad_norm": 0.0, "learning_rate": 1.3380808730487708e-05, "loss": 1.0388, "step": 10441 }, { "epoch": 0.40856092025980123, "grad_norm": 0.0, "learning_rate": 1.337961609052342e-05, "loss": 1.1454, "step": 10442 }, { "epoch": 0.40860004695203067, "grad_norm": 0.0, "learning_rate": 1.3378423396286726e-05, "loss": 1.1074, "step": 10443 }, { "epoch": 0.4086391736442601, "grad_norm": 0.0, "learning_rate": 1.337723064779679e-05, "loss": 0.9149, "step": 10444 }, { "epoch": 0.40867830033648955, "grad_norm": 0.0, "learning_rate": 1.3376037845072759e-05, "loss": 1.0215, "step": 10445 }, { "epoch": 0.408717427028719, "grad_norm": 0.0, "learning_rate": 1.3374844988133791e-05, "loss": 1.1806, "step": 10446 }, { "epoch": 0.40875655372094843, "grad_norm": 0.0, "learning_rate": 1.3373652076999041e-05, "loss": 1.1587, "step": 10447 }, { "epoch": 0.4087956804131779, "grad_norm": 0.0, "learning_rate": 1.3372459111687668e-05, "loss": 1.0987, "step": 10448 }, { "epoch": 0.4088348071054073, "grad_norm": 0.0, "learning_rate": 1.3371266092218824e-05, "loss": 1.1484, "step": 10449 }, { "epoch": 0.40887393379763676, "grad_norm": 0.0, "learning_rate": 1.3370073018611676e-05, "loss": 0.9866, "step": 10450 }, { "epoch": 0.4089130604898662, "grad_norm": 0.0, "learning_rate": 1.3368879890885379e-05, "loss": 1.0665, "step": 10451 }, { "epoch": 0.40895218718209564, "grad_norm": 0.0, "learning_rate": 1.3367686709059084e-05, "loss": 1.155, "step": 10452 }, { "epoch": 0.4089913138743251, "grad_norm": 0.0, "learning_rate": 1.3366493473151966e-05, "loss": 1.142, "step": 10453 }, { "epoch": 0.4090304405665545, "grad_norm": 0.0, "learning_rate": 1.3365300183183183e-05, "loss": 1.063, "step": 10454 }, { "epoch": 0.40906956725878396, "grad_norm": 0.0, "learning_rate": 1.3364106839171893e-05, "loss": 1.083, "step": 10455 }, { "epoch": 0.4091086939510134, "grad_norm": 0.0, "learning_rate": 1.3362913441137267e-05, "loss": 1.119, "step": 10456 }, { "epoch": 0.40914782064324284, "grad_norm": 0.0, "learning_rate": 1.3361719989098458e-05, "loss": 1.0493, "step": 10457 }, { "epoch": 0.4091869473354723, "grad_norm": 0.0, "learning_rate": 1.3360526483074643e-05, "loss": 1.0664, "step": 10458 }, { "epoch": 0.4092260740277017, "grad_norm": 0.0, "learning_rate": 1.3359332923084985e-05, "loss": 0.9941, "step": 10459 }, { "epoch": 0.4092652007199311, "grad_norm": 0.0, "learning_rate": 1.3358139309148645e-05, "loss": 1.1633, "step": 10460 }, { "epoch": 0.40930432741216055, "grad_norm": 0.0, "learning_rate": 1.33569456412848e-05, "loss": 0.974, "step": 10461 }, { "epoch": 0.40934345410439, "grad_norm": 0.0, "learning_rate": 1.3355751919512614e-05, "loss": 1.1151, "step": 10462 }, { "epoch": 0.40938258079661943, "grad_norm": 0.0, "learning_rate": 1.3354558143851252e-05, "loss": 1.1639, "step": 10463 }, { "epoch": 0.4094217074888489, "grad_norm": 0.0, "learning_rate": 1.3353364314319896e-05, "loss": 1.1454, "step": 10464 }, { "epoch": 0.4094608341810783, "grad_norm": 0.0, "learning_rate": 1.3352170430937707e-05, "loss": 1.0202, "step": 10465 }, { "epoch": 0.40949996087330776, "grad_norm": 0.0, "learning_rate": 1.3350976493723864e-05, "loss": 1.1675, "step": 10466 }, { "epoch": 0.4095390875655372, "grad_norm": 0.0, "learning_rate": 1.3349782502697535e-05, "loss": 1.0865, "step": 10467 }, { "epoch": 0.40957821425776664, "grad_norm": 0.0, "learning_rate": 1.33485884578779e-05, "loss": 0.9377, "step": 10468 }, { "epoch": 0.4096173409499961, "grad_norm": 0.0, "learning_rate": 1.334739435928413e-05, "loss": 1.0419, "step": 10469 }, { "epoch": 0.4096564676422255, "grad_norm": 0.0, "learning_rate": 1.3346200206935398e-05, "loss": 1.0762, "step": 10470 }, { "epoch": 0.40969559433445496, "grad_norm": 0.0, "learning_rate": 1.3345006000850887e-05, "loss": 1.1515, "step": 10471 }, { "epoch": 0.4097347210266844, "grad_norm": 0.0, "learning_rate": 1.3343811741049768e-05, "loss": 0.9894, "step": 10472 }, { "epoch": 0.40977384771891384, "grad_norm": 0.0, "learning_rate": 1.3342617427551223e-05, "loss": 1.1159, "step": 10473 }, { "epoch": 0.4098129744111433, "grad_norm": 0.0, "learning_rate": 1.334142306037443e-05, "loss": 0.9655, "step": 10474 }, { "epoch": 0.4098521011033727, "grad_norm": 0.0, "learning_rate": 1.3340228639538572e-05, "loss": 1.2433, "step": 10475 }, { "epoch": 0.40989122779560216, "grad_norm": 0.0, "learning_rate": 1.3339034165062827e-05, "loss": 1.0582, "step": 10476 }, { "epoch": 0.4099303544878316, "grad_norm": 0.0, "learning_rate": 1.3337839636966377e-05, "loss": 1.1555, "step": 10477 }, { "epoch": 0.40996948118006105, "grad_norm": 0.0, "learning_rate": 1.3336645055268405e-05, "loss": 1.0771, "step": 10478 }, { "epoch": 0.4100086078722905, "grad_norm": 0.0, "learning_rate": 1.3335450419988095e-05, "loss": 1.1041, "step": 10479 }, { "epoch": 0.41004773456451993, "grad_norm": 0.0, "learning_rate": 1.3334255731144633e-05, "loss": 1.2028, "step": 10480 }, { "epoch": 0.41008686125674937, "grad_norm": 0.0, "learning_rate": 1.3333060988757196e-05, "loss": 0.982, "step": 10481 }, { "epoch": 0.4101259879489788, "grad_norm": 0.0, "learning_rate": 1.333186619284498e-05, "loss": 1.1779, "step": 10482 }, { "epoch": 0.41016511464120825, "grad_norm": 0.0, "learning_rate": 1.3330671343427169e-05, "loss": 1.1104, "step": 10483 }, { "epoch": 0.4102042413334377, "grad_norm": 0.0, "learning_rate": 1.3329476440522948e-05, "loss": 1.0827, "step": 10484 }, { "epoch": 0.41024336802566713, "grad_norm": 0.0, "learning_rate": 1.3328281484151506e-05, "loss": 0.87, "step": 10485 }, { "epoch": 0.4102824947178966, "grad_norm": 0.0, "learning_rate": 1.3327086474332037e-05, "loss": 1.097, "step": 10486 }, { "epoch": 0.410321621410126, "grad_norm": 0.0, "learning_rate": 1.3325891411083727e-05, "loss": 0.998, "step": 10487 }, { "epoch": 0.4103607481023554, "grad_norm": 0.0, "learning_rate": 1.3324696294425768e-05, "loss": 1.197, "step": 10488 }, { "epoch": 0.41039987479458484, "grad_norm": 0.0, "learning_rate": 1.3323501124377354e-05, "loss": 1.0221, "step": 10489 }, { "epoch": 0.4104390014868143, "grad_norm": 0.0, "learning_rate": 1.3322305900957675e-05, "loss": 1.1158, "step": 10490 }, { "epoch": 0.4104781281790437, "grad_norm": 0.0, "learning_rate": 1.3321110624185927e-05, "loss": 1.2158, "step": 10491 }, { "epoch": 0.41051725487127316, "grad_norm": 0.0, "learning_rate": 1.3319915294081303e-05, "loss": 1.0486, "step": 10492 }, { "epoch": 0.4105563815635026, "grad_norm": 0.0, "learning_rate": 1.3318719910663001e-05, "loss": 1.0644, "step": 10493 }, { "epoch": 0.41059550825573204, "grad_norm": 0.0, "learning_rate": 1.3317524473950214e-05, "loss": 1.195, "step": 10494 }, { "epoch": 0.4106346349479615, "grad_norm": 0.0, "learning_rate": 1.3316328983962144e-05, "loss": 1.0641, "step": 10495 }, { "epoch": 0.4106737616401909, "grad_norm": 0.0, "learning_rate": 1.3315133440717982e-05, "loss": 1.0617, "step": 10496 }, { "epoch": 0.41071288833242037, "grad_norm": 0.0, "learning_rate": 1.3313937844236935e-05, "loss": 1.1398, "step": 10497 }, { "epoch": 0.4107520150246498, "grad_norm": 0.0, "learning_rate": 1.3312742194538198e-05, "loss": 0.9958, "step": 10498 }, { "epoch": 0.41079114171687925, "grad_norm": 0.0, "learning_rate": 1.3311546491640969e-05, "loss": 0.8824, "step": 10499 }, { "epoch": 0.4108302684091087, "grad_norm": 0.0, "learning_rate": 1.3310350735564457e-05, "loss": 1.0649, "step": 10500 }, { "epoch": 0.41086939510133813, "grad_norm": 0.0, "learning_rate": 1.3309154926327859e-05, "loss": 1.0653, "step": 10501 }, { "epoch": 0.41090852179356757, "grad_norm": 0.0, "learning_rate": 1.330795906395038e-05, "loss": 1.0214, "step": 10502 }, { "epoch": 0.410947648485797, "grad_norm": 0.0, "learning_rate": 1.3306763148451223e-05, "loss": 1.1545, "step": 10503 }, { "epoch": 0.41098677517802645, "grad_norm": 0.0, "learning_rate": 1.3305567179849594e-05, "loss": 0.9519, "step": 10504 }, { "epoch": 0.4110259018702559, "grad_norm": 0.0, "learning_rate": 1.3304371158164697e-05, "loss": 1.0484, "step": 10505 }, { "epoch": 0.41106502856248534, "grad_norm": 0.0, "learning_rate": 1.330317508341574e-05, "loss": 1.0089, "step": 10506 }, { "epoch": 0.4111041552547148, "grad_norm": 0.0, "learning_rate": 1.330197895562193e-05, "loss": 1.1211, "step": 10507 }, { "epoch": 0.4111432819469442, "grad_norm": 0.0, "learning_rate": 1.3300782774802476e-05, "loss": 1.1471, "step": 10508 }, { "epoch": 0.41118240863917366, "grad_norm": 0.0, "learning_rate": 1.3299586540976588e-05, "loss": 1.1099, "step": 10509 }, { "epoch": 0.4112215353314031, "grad_norm": 0.0, "learning_rate": 1.3298390254163473e-05, "loss": 1.1584, "step": 10510 }, { "epoch": 0.41126066202363254, "grad_norm": 0.0, "learning_rate": 1.3297193914382344e-05, "loss": 1.1017, "step": 10511 }, { "epoch": 0.411299788715862, "grad_norm": 0.0, "learning_rate": 1.3295997521652413e-05, "loss": 1.1384, "step": 10512 }, { "epoch": 0.4113389154080914, "grad_norm": 0.0, "learning_rate": 1.3294801075992892e-05, "loss": 1.0331, "step": 10513 }, { "epoch": 0.41137804210032086, "grad_norm": 0.0, "learning_rate": 1.3293604577422992e-05, "loss": 1.0646, "step": 10514 }, { "epoch": 0.4114171687925503, "grad_norm": 0.0, "learning_rate": 1.3292408025961934e-05, "loss": 0.9855, "step": 10515 }, { "epoch": 0.41145629548477974, "grad_norm": 0.0, "learning_rate": 1.3291211421628924e-05, "loss": 1.0631, "step": 10516 }, { "epoch": 0.41149542217700913, "grad_norm": 0.0, "learning_rate": 1.3290014764443186e-05, "loss": 1.1486, "step": 10517 }, { "epoch": 0.41153454886923857, "grad_norm": 0.0, "learning_rate": 1.3288818054423933e-05, "loss": 1.1797, "step": 10518 }, { "epoch": 0.411573675561468, "grad_norm": 0.0, "learning_rate": 1.3287621291590383e-05, "loss": 1.0273, "step": 10519 }, { "epoch": 0.41161280225369745, "grad_norm": 0.0, "learning_rate": 1.3286424475961755e-05, "loss": 1.0822, "step": 10520 }, { "epoch": 0.4116519289459269, "grad_norm": 0.0, "learning_rate": 1.3285227607557265e-05, "loss": 1.1175, "step": 10521 }, { "epoch": 0.41169105563815633, "grad_norm": 0.0, "learning_rate": 1.328403068639614e-05, "loss": 1.0457, "step": 10522 }, { "epoch": 0.4117301823303858, "grad_norm": 0.0, "learning_rate": 1.3282833712497594e-05, "loss": 1.1137, "step": 10523 }, { "epoch": 0.4117693090226152, "grad_norm": 0.0, "learning_rate": 1.3281636685880855e-05, "loss": 1.0298, "step": 10524 }, { "epoch": 0.41180843571484466, "grad_norm": 0.0, "learning_rate": 1.3280439606565141e-05, "loss": 1.1218, "step": 10525 }, { "epoch": 0.4118475624070741, "grad_norm": 0.0, "learning_rate": 1.3279242474569678e-05, "loss": 1.1143, "step": 10526 }, { "epoch": 0.41188668909930354, "grad_norm": 0.0, "learning_rate": 1.3278045289913693e-05, "loss": 0.9869, "step": 10527 }, { "epoch": 0.411925815791533, "grad_norm": 0.0, "learning_rate": 1.3276848052616405e-05, "loss": 1.0596, "step": 10528 }, { "epoch": 0.4119649424837624, "grad_norm": 0.0, "learning_rate": 1.3275650762697043e-05, "loss": 1.1321, "step": 10529 }, { "epoch": 0.41200406917599186, "grad_norm": 0.0, "learning_rate": 1.3274453420174835e-05, "loss": 1.0134, "step": 10530 }, { "epoch": 0.4120431958682213, "grad_norm": 0.0, "learning_rate": 1.327325602506901e-05, "loss": 0.9236, "step": 10531 }, { "epoch": 0.41208232256045074, "grad_norm": 0.0, "learning_rate": 1.3272058577398792e-05, "loss": 1.1233, "step": 10532 }, { "epoch": 0.4121214492526802, "grad_norm": 0.0, "learning_rate": 1.3270861077183416e-05, "loss": 1.1288, "step": 10533 }, { "epoch": 0.4121605759449096, "grad_norm": 0.0, "learning_rate": 1.326966352444211e-05, "loss": 1.0878, "step": 10534 }, { "epoch": 0.41219970263713906, "grad_norm": 0.0, "learning_rate": 1.3268465919194103e-05, "loss": 1.1001, "step": 10535 }, { "epoch": 0.4122388293293685, "grad_norm": 0.0, "learning_rate": 1.3267268261458628e-05, "loss": 1.1353, "step": 10536 }, { "epoch": 0.41227795602159795, "grad_norm": 0.0, "learning_rate": 1.3266070551254922e-05, "loss": 1.1681, "step": 10537 }, { "epoch": 0.4123170827138274, "grad_norm": 0.0, "learning_rate": 1.3264872788602215e-05, "loss": 0.9735, "step": 10538 }, { "epoch": 0.41235620940605683, "grad_norm": 0.0, "learning_rate": 1.3263674973519737e-05, "loss": 1.1335, "step": 10539 }, { "epoch": 0.41239533609828627, "grad_norm": 0.0, "learning_rate": 1.3262477106026737e-05, "loss": 1.1416, "step": 10540 }, { "epoch": 0.4124344627905157, "grad_norm": 0.0, "learning_rate": 1.3261279186142435e-05, "loss": 0.9856, "step": 10541 }, { "epoch": 0.41247358948274515, "grad_norm": 0.0, "learning_rate": 1.326008121388608e-05, "loss": 0.965, "step": 10542 }, { "epoch": 0.4125127161749746, "grad_norm": 0.0, "learning_rate": 1.3258883189276906e-05, "loss": 1.0952, "step": 10543 }, { "epoch": 0.41255184286720403, "grad_norm": 0.0, "learning_rate": 1.3257685112334149e-05, "loss": 1.089, "step": 10544 }, { "epoch": 0.4125909695594334, "grad_norm": 0.0, "learning_rate": 1.3256486983077055e-05, "loss": 1.0134, "step": 10545 }, { "epoch": 0.41263009625166286, "grad_norm": 0.0, "learning_rate": 1.325528880152486e-05, "loss": 1.1746, "step": 10546 }, { "epoch": 0.4126692229438923, "grad_norm": 0.0, "learning_rate": 1.3254090567696802e-05, "loss": 1.0415, "step": 10547 }, { "epoch": 0.41270834963612174, "grad_norm": 0.0, "learning_rate": 1.325289228161213e-05, "loss": 1.1668, "step": 10548 }, { "epoch": 0.4127474763283512, "grad_norm": 0.0, "learning_rate": 1.3251693943290084e-05, "loss": 1.0587, "step": 10549 }, { "epoch": 0.4127866030205806, "grad_norm": 0.0, "learning_rate": 1.325049555274991e-05, "loss": 1.1442, "step": 10550 }, { "epoch": 0.41282572971281006, "grad_norm": 0.0, "learning_rate": 1.3249297110010847e-05, "loss": 0.9775, "step": 10551 }, { "epoch": 0.4128648564050395, "grad_norm": 0.0, "learning_rate": 1.3248098615092144e-05, "loss": 1.1205, "step": 10552 }, { "epoch": 0.41290398309726895, "grad_norm": 0.0, "learning_rate": 1.324690006801305e-05, "loss": 1.146, "step": 10553 }, { "epoch": 0.4129431097894984, "grad_norm": 0.0, "learning_rate": 1.3245701468792804e-05, "loss": 1.126, "step": 10554 }, { "epoch": 0.4129822364817278, "grad_norm": 0.0, "learning_rate": 1.3244502817450666e-05, "loss": 1.0894, "step": 10555 }, { "epoch": 0.41302136317395727, "grad_norm": 0.0, "learning_rate": 1.3243304114005878e-05, "loss": 1.1645, "step": 10556 }, { "epoch": 0.4130604898661867, "grad_norm": 0.0, "learning_rate": 1.3242105358477684e-05, "loss": 1.0992, "step": 10557 }, { "epoch": 0.41309961655841615, "grad_norm": 0.0, "learning_rate": 1.3240906550885347e-05, "loss": 1.0434, "step": 10558 }, { "epoch": 0.4131387432506456, "grad_norm": 0.0, "learning_rate": 1.3239707691248107e-05, "loss": 1.0515, "step": 10559 }, { "epoch": 0.41317786994287503, "grad_norm": 0.0, "learning_rate": 1.3238508779585224e-05, "loss": 0.9972, "step": 10560 }, { "epoch": 0.41321699663510447, "grad_norm": 0.0, "learning_rate": 1.3237309815915946e-05, "loss": 1.0951, "step": 10561 }, { "epoch": 0.4132561233273339, "grad_norm": 0.0, "learning_rate": 1.3236110800259531e-05, "loss": 1.1014, "step": 10562 }, { "epoch": 0.41329525001956335, "grad_norm": 0.0, "learning_rate": 1.323491173263523e-05, "loss": 1.1254, "step": 10563 }, { "epoch": 0.4133343767117928, "grad_norm": 0.0, "learning_rate": 1.3233712613062301e-05, "loss": 1.1533, "step": 10564 }, { "epoch": 0.41337350340402224, "grad_norm": 0.0, "learning_rate": 1.323251344156e-05, "loss": 1.0887, "step": 10565 }, { "epoch": 0.4134126300962517, "grad_norm": 0.0, "learning_rate": 1.3231314218147584e-05, "loss": 1.0495, "step": 10566 }, { "epoch": 0.4134517567884811, "grad_norm": 0.0, "learning_rate": 1.323011494284431e-05, "loss": 1.0406, "step": 10567 }, { "epoch": 0.41349088348071056, "grad_norm": 0.0, "learning_rate": 1.3228915615669436e-05, "loss": 1.0503, "step": 10568 }, { "epoch": 0.41353001017294, "grad_norm": 0.0, "learning_rate": 1.3227716236642226e-05, "loss": 1.1504, "step": 10569 }, { "epoch": 0.41356913686516944, "grad_norm": 0.0, "learning_rate": 1.3226516805781934e-05, "loss": 1.135, "step": 10570 }, { "epoch": 0.4136082635573989, "grad_norm": 0.0, "learning_rate": 1.322531732310783e-05, "loss": 1.1162, "step": 10571 }, { "epoch": 0.4136473902496283, "grad_norm": 0.0, "learning_rate": 1.3224117788639168e-05, "loss": 1.1016, "step": 10572 }, { "epoch": 0.41368651694185776, "grad_norm": 0.0, "learning_rate": 1.3222918202395217e-05, "loss": 1.1149, "step": 10573 }, { "epoch": 0.41372564363408715, "grad_norm": 0.0, "learning_rate": 1.3221718564395234e-05, "loss": 1.162, "step": 10574 }, { "epoch": 0.4137647703263166, "grad_norm": 0.0, "learning_rate": 1.3220518874658492e-05, "loss": 1.0191, "step": 10575 }, { "epoch": 0.41380389701854603, "grad_norm": 0.0, "learning_rate": 1.3219319133204251e-05, "loss": 0.9699, "step": 10576 }, { "epoch": 0.41384302371077547, "grad_norm": 0.0, "learning_rate": 1.3218119340051778e-05, "loss": 1.1317, "step": 10577 }, { "epoch": 0.4138821504030049, "grad_norm": 0.0, "learning_rate": 1.3216919495220344e-05, "loss": 1.1417, "step": 10578 }, { "epoch": 0.41392127709523435, "grad_norm": 0.0, "learning_rate": 1.3215719598729208e-05, "loss": 1.1302, "step": 10579 }, { "epoch": 0.4139604037874638, "grad_norm": 0.0, "learning_rate": 1.321451965059765e-05, "loss": 1.1019, "step": 10580 }, { "epoch": 0.41399953047969323, "grad_norm": 0.0, "learning_rate": 1.321331965084493e-05, "loss": 1.098, "step": 10581 }, { "epoch": 0.4140386571719227, "grad_norm": 0.0, "learning_rate": 1.3212119599490327e-05, "loss": 1.0141, "step": 10582 }, { "epoch": 0.4140777838641521, "grad_norm": 0.0, "learning_rate": 1.3210919496553107e-05, "loss": 1.1039, "step": 10583 }, { "epoch": 0.41411691055638156, "grad_norm": 0.0, "learning_rate": 1.3209719342052545e-05, "loss": 1.1494, "step": 10584 }, { "epoch": 0.414156037248611, "grad_norm": 0.0, "learning_rate": 1.3208519136007912e-05, "loss": 1.0414, "step": 10585 }, { "epoch": 0.41419516394084044, "grad_norm": 0.0, "learning_rate": 1.3207318878438478e-05, "loss": 1.0368, "step": 10586 }, { "epoch": 0.4142342906330699, "grad_norm": 0.0, "learning_rate": 1.3206118569363526e-05, "loss": 1.1293, "step": 10587 }, { "epoch": 0.4142734173252993, "grad_norm": 0.0, "learning_rate": 1.3204918208802324e-05, "loss": 1.0925, "step": 10588 }, { "epoch": 0.41431254401752876, "grad_norm": 0.0, "learning_rate": 1.3203717796774154e-05, "loss": 1.0927, "step": 10589 }, { "epoch": 0.4143516707097582, "grad_norm": 0.0, "learning_rate": 1.3202517333298292e-05, "loss": 1.0105, "step": 10590 }, { "epoch": 0.41439079740198764, "grad_norm": 0.0, "learning_rate": 1.3201316818394012e-05, "loss": 1.1331, "step": 10591 }, { "epoch": 0.4144299240942171, "grad_norm": 0.0, "learning_rate": 1.3200116252080596e-05, "loss": 1.0226, "step": 10592 }, { "epoch": 0.4144690507864465, "grad_norm": 0.0, "learning_rate": 1.3198915634377326e-05, "loss": 1.1069, "step": 10593 }, { "epoch": 0.41450817747867597, "grad_norm": 0.0, "learning_rate": 1.3197714965303474e-05, "loss": 1.1693, "step": 10594 }, { "epoch": 0.4145473041709054, "grad_norm": 0.0, "learning_rate": 1.319651424487833e-05, "loss": 1.1247, "step": 10595 }, { "epoch": 0.41458643086313485, "grad_norm": 0.0, "learning_rate": 1.3195313473121172e-05, "loss": 1.1273, "step": 10596 }, { "epoch": 0.4146255575553643, "grad_norm": 0.0, "learning_rate": 1.3194112650051282e-05, "loss": 1.1973, "step": 10597 }, { "epoch": 0.41466468424759373, "grad_norm": 0.0, "learning_rate": 1.3192911775687949e-05, "loss": 1.0969, "step": 10598 }, { "epoch": 0.41470381093982317, "grad_norm": 0.0, "learning_rate": 1.319171085005045e-05, "loss": 1.1185, "step": 10599 }, { "epoch": 0.4147429376320526, "grad_norm": 0.0, "learning_rate": 1.3190509873158076e-05, "loss": 1.1541, "step": 10600 }, { "epoch": 0.41478206432428205, "grad_norm": 0.0, "learning_rate": 1.3189308845030109e-05, "loss": 1.1302, "step": 10601 }, { "epoch": 0.41482119101651144, "grad_norm": 0.0, "learning_rate": 1.3188107765685842e-05, "loss": 1.0368, "step": 10602 }, { "epoch": 0.4148603177087409, "grad_norm": 0.0, "learning_rate": 1.3186906635144556e-05, "loss": 1.0108, "step": 10603 }, { "epoch": 0.4148994444009703, "grad_norm": 0.0, "learning_rate": 1.3185705453425546e-05, "loss": 1.0775, "step": 10604 }, { "epoch": 0.41493857109319976, "grad_norm": 0.0, "learning_rate": 1.3184504220548097e-05, "loss": 0.9739, "step": 10605 }, { "epoch": 0.4149776977854292, "grad_norm": 0.0, "learning_rate": 1.3183302936531499e-05, "loss": 1.1293, "step": 10606 }, { "epoch": 0.41501682447765864, "grad_norm": 0.0, "learning_rate": 1.3182101601395047e-05, "loss": 1.192, "step": 10607 }, { "epoch": 0.4150559511698881, "grad_norm": 0.0, "learning_rate": 1.3180900215158028e-05, "loss": 1.0889, "step": 10608 }, { "epoch": 0.4150950778621175, "grad_norm": 0.0, "learning_rate": 1.3179698777839742e-05, "loss": 1.1627, "step": 10609 }, { "epoch": 0.41513420455434696, "grad_norm": 0.0, "learning_rate": 1.3178497289459474e-05, "loss": 1.1276, "step": 10610 }, { "epoch": 0.4151733312465764, "grad_norm": 0.0, "learning_rate": 1.3177295750036522e-05, "loss": 1.1494, "step": 10611 }, { "epoch": 0.41521245793880585, "grad_norm": 0.0, "learning_rate": 1.3176094159590185e-05, "loss": 1.1954, "step": 10612 }, { "epoch": 0.4152515846310353, "grad_norm": 0.0, "learning_rate": 1.3174892518139752e-05, "loss": 0.9987, "step": 10613 }, { "epoch": 0.4152907113232647, "grad_norm": 0.0, "learning_rate": 1.3173690825704529e-05, "loss": 1.0295, "step": 10614 }, { "epoch": 0.41532983801549417, "grad_norm": 0.0, "learning_rate": 1.3172489082303802e-05, "loss": 1.1039, "step": 10615 }, { "epoch": 0.4153689647077236, "grad_norm": 0.0, "learning_rate": 1.317128728795688e-05, "loss": 1.1323, "step": 10616 }, { "epoch": 0.41540809139995305, "grad_norm": 0.0, "learning_rate": 1.3170085442683056e-05, "loss": 1.0254, "step": 10617 }, { "epoch": 0.4154472180921825, "grad_norm": 0.0, "learning_rate": 1.3168883546501631e-05, "loss": 1.0254, "step": 10618 }, { "epoch": 0.41548634478441193, "grad_norm": 0.0, "learning_rate": 1.3167681599431909e-05, "loss": 1.0885, "step": 10619 }, { "epoch": 0.4155254714766414, "grad_norm": 0.0, "learning_rate": 1.316647960149319e-05, "loss": 1.0959, "step": 10620 }, { "epoch": 0.4155645981688708, "grad_norm": 0.0, "learning_rate": 1.3165277552704774e-05, "loss": 0.9682, "step": 10621 }, { "epoch": 0.41560372486110025, "grad_norm": 0.0, "learning_rate": 1.316407545308597e-05, "loss": 0.9606, "step": 10622 }, { "epoch": 0.4156428515533297, "grad_norm": 0.0, "learning_rate": 1.3162873302656077e-05, "loss": 1.0463, "step": 10623 }, { "epoch": 0.41568197824555914, "grad_norm": 0.0, "learning_rate": 1.3161671101434403e-05, "loss": 1.0139, "step": 10624 }, { "epoch": 0.4157211049377886, "grad_norm": 0.0, "learning_rate": 1.3160468849440253e-05, "loss": 0.9686, "step": 10625 }, { "epoch": 0.415760231630018, "grad_norm": 0.0, "learning_rate": 1.3159266546692933e-05, "loss": 1.1225, "step": 10626 }, { "epoch": 0.41579935832224746, "grad_norm": 0.0, "learning_rate": 1.3158064193211753e-05, "loss": 1.0477, "step": 10627 }, { "epoch": 0.4158384850144769, "grad_norm": 0.0, "learning_rate": 1.3156861789016016e-05, "loss": 1.0999, "step": 10628 }, { "epoch": 0.41587761170670634, "grad_norm": 0.0, "learning_rate": 1.3155659334125037e-05, "loss": 1.0888, "step": 10629 }, { "epoch": 0.4159167383989358, "grad_norm": 0.0, "learning_rate": 1.3154456828558119e-05, "loss": 1.0509, "step": 10630 }, { "epoch": 0.41595586509116517, "grad_norm": 0.0, "learning_rate": 1.3153254272334583e-05, "loss": 0.9993, "step": 10631 }, { "epoch": 0.4159949917833946, "grad_norm": 0.0, "learning_rate": 1.3152051665473732e-05, "loss": 0.9445, "step": 10632 }, { "epoch": 0.41603411847562405, "grad_norm": 0.0, "learning_rate": 1.3150849007994882e-05, "loss": 1.0575, "step": 10633 }, { "epoch": 0.4160732451678535, "grad_norm": 0.0, "learning_rate": 1.3149646299917342e-05, "loss": 1.2632, "step": 10634 }, { "epoch": 0.41611237186008293, "grad_norm": 0.0, "learning_rate": 1.3148443541260433e-05, "loss": 1.0763, "step": 10635 }, { "epoch": 0.41615149855231237, "grad_norm": 0.0, "learning_rate": 1.3147240732043466e-05, "loss": 1.0286, "step": 10636 }, { "epoch": 0.4161906252445418, "grad_norm": 0.0, "learning_rate": 1.3146037872285753e-05, "loss": 0.9878, "step": 10637 }, { "epoch": 0.41622975193677125, "grad_norm": 0.0, "learning_rate": 1.3144834962006619e-05, "loss": 1.063, "step": 10638 }, { "epoch": 0.4162688786290007, "grad_norm": 0.0, "learning_rate": 1.314363200122537e-05, "loss": 1.1452, "step": 10639 }, { "epoch": 0.41630800532123013, "grad_norm": 0.0, "learning_rate": 1.3142428989961336e-05, "loss": 1.1417, "step": 10640 }, { "epoch": 0.4163471320134596, "grad_norm": 0.0, "learning_rate": 1.3141225928233826e-05, "loss": 1.2387, "step": 10641 }, { "epoch": 0.416386258705689, "grad_norm": 0.0, "learning_rate": 1.3140022816062166e-05, "loss": 1.1024, "step": 10642 }, { "epoch": 0.41642538539791846, "grad_norm": 0.0, "learning_rate": 1.3138819653465674e-05, "loss": 1.0799, "step": 10643 }, { "epoch": 0.4164645120901479, "grad_norm": 0.0, "learning_rate": 1.313761644046367e-05, "loss": 1.045, "step": 10644 }, { "epoch": 0.41650363878237734, "grad_norm": 0.0, "learning_rate": 1.3136413177075479e-05, "loss": 1.0896, "step": 10645 }, { "epoch": 0.4165427654746068, "grad_norm": 0.0, "learning_rate": 1.3135209863320422e-05, "loss": 1.0924, "step": 10646 }, { "epoch": 0.4165818921668362, "grad_norm": 0.0, "learning_rate": 1.3134006499217824e-05, "loss": 1.0104, "step": 10647 }, { "epoch": 0.41662101885906566, "grad_norm": 0.0, "learning_rate": 1.3132803084787008e-05, "loss": 1.1694, "step": 10648 }, { "epoch": 0.4166601455512951, "grad_norm": 0.0, "learning_rate": 1.31315996200473e-05, "loss": 1.0929, "step": 10649 }, { "epoch": 0.41669927224352454, "grad_norm": 0.0, "learning_rate": 1.3130396105018024e-05, "loss": 1.0343, "step": 10650 }, { "epoch": 0.416738398935754, "grad_norm": 0.0, "learning_rate": 1.3129192539718514e-05, "loss": 1.1236, "step": 10651 }, { "epoch": 0.4167775256279834, "grad_norm": 0.0, "learning_rate": 1.312798892416809e-05, "loss": 1.2426, "step": 10652 }, { "epoch": 0.41681665232021287, "grad_norm": 0.0, "learning_rate": 1.3126785258386083e-05, "loss": 1.0703, "step": 10653 }, { "epoch": 0.4168557790124423, "grad_norm": 0.0, "learning_rate": 1.3125581542391825e-05, "loss": 1.1004, "step": 10654 }, { "epoch": 0.41689490570467175, "grad_norm": 0.0, "learning_rate": 1.3124377776204641e-05, "loss": 0.9923, "step": 10655 }, { "epoch": 0.4169340323969012, "grad_norm": 0.0, "learning_rate": 1.312317395984387e-05, "loss": 1.1691, "step": 10656 }, { "epoch": 0.41697315908913063, "grad_norm": 0.0, "learning_rate": 1.3121970093328833e-05, "loss": 1.1575, "step": 10657 }, { "epoch": 0.41701228578136007, "grad_norm": 0.0, "learning_rate": 1.3120766176678872e-05, "loss": 1.1895, "step": 10658 }, { "epoch": 0.41705141247358946, "grad_norm": 0.0, "learning_rate": 1.3119562209913314e-05, "loss": 1.1613, "step": 10659 }, { "epoch": 0.4170905391658189, "grad_norm": 0.0, "learning_rate": 1.3118358193051499e-05, "loss": 1.142, "step": 10660 }, { "epoch": 0.41712966585804834, "grad_norm": 0.0, "learning_rate": 1.3117154126112755e-05, "loss": 0.9964, "step": 10661 }, { "epoch": 0.4171687925502778, "grad_norm": 0.0, "learning_rate": 1.3115950009116425e-05, "loss": 1.1159, "step": 10662 }, { "epoch": 0.4172079192425072, "grad_norm": 0.0, "learning_rate": 1.3114745842081841e-05, "loss": 1.0351, "step": 10663 }, { "epoch": 0.41724704593473666, "grad_norm": 0.0, "learning_rate": 1.3113541625028344e-05, "loss": 1.0723, "step": 10664 }, { "epoch": 0.4172861726269661, "grad_norm": 0.0, "learning_rate": 1.311233735797527e-05, "loss": 1.0436, "step": 10665 }, { "epoch": 0.41732529931919554, "grad_norm": 0.0, "learning_rate": 1.3111133040941955e-05, "loss": 1.0543, "step": 10666 }, { "epoch": 0.417364426011425, "grad_norm": 0.0, "learning_rate": 1.3109928673947742e-05, "loss": 1.0993, "step": 10667 }, { "epoch": 0.4174035527036544, "grad_norm": 0.0, "learning_rate": 1.3108724257011973e-05, "loss": 1.1629, "step": 10668 }, { "epoch": 0.41744267939588386, "grad_norm": 0.0, "learning_rate": 1.3107519790153988e-05, "loss": 1.1452, "step": 10669 }, { "epoch": 0.4174818060881133, "grad_norm": 0.0, "learning_rate": 1.3106315273393126e-05, "loss": 0.9787, "step": 10670 }, { "epoch": 0.41752093278034275, "grad_norm": 0.0, "learning_rate": 1.3105110706748738e-05, "loss": 1.0353, "step": 10671 }, { "epoch": 0.4175600594725722, "grad_norm": 0.0, "learning_rate": 1.310390609024016e-05, "loss": 0.9891, "step": 10672 }, { "epoch": 0.41759918616480163, "grad_norm": 0.0, "learning_rate": 1.310270142388674e-05, "loss": 1.204, "step": 10673 }, { "epoch": 0.41763831285703107, "grad_norm": 0.0, "learning_rate": 1.3101496707707825e-05, "loss": 1.1177, "step": 10674 }, { "epoch": 0.4176774395492605, "grad_norm": 0.0, "learning_rate": 1.3100291941722756e-05, "loss": 1.217, "step": 10675 }, { "epoch": 0.41771656624148995, "grad_norm": 0.0, "learning_rate": 1.3099087125950886e-05, "loss": 0.9893, "step": 10676 }, { "epoch": 0.4177556929337194, "grad_norm": 0.0, "learning_rate": 1.3097882260411561e-05, "loss": 1.1165, "step": 10677 }, { "epoch": 0.41779481962594883, "grad_norm": 0.0, "learning_rate": 1.3096677345124125e-05, "loss": 1.0936, "step": 10678 }, { "epoch": 0.4178339463181783, "grad_norm": 0.0, "learning_rate": 1.3095472380107934e-05, "loss": 0.9999, "step": 10679 }, { "epoch": 0.4178730730104077, "grad_norm": 0.0, "learning_rate": 1.3094267365382337e-05, "loss": 1.1743, "step": 10680 }, { "epoch": 0.41791219970263715, "grad_norm": 0.0, "learning_rate": 1.3093062300966679e-05, "loss": 1.1009, "step": 10681 }, { "epoch": 0.4179513263948666, "grad_norm": 0.0, "learning_rate": 1.3091857186880317e-05, "loss": 1.0198, "step": 10682 }, { "epoch": 0.41799045308709604, "grad_norm": 0.0, "learning_rate": 1.3090652023142606e-05, "loss": 1.097, "step": 10683 }, { "epoch": 0.4180295797793255, "grad_norm": 0.0, "learning_rate": 1.3089446809772892e-05, "loss": 1.0432, "step": 10684 }, { "epoch": 0.4180687064715549, "grad_norm": 0.0, "learning_rate": 1.3088241546790538e-05, "loss": 1.1958, "step": 10685 }, { "epoch": 0.41810783316378436, "grad_norm": 0.0, "learning_rate": 1.3087036234214892e-05, "loss": 1.0919, "step": 10686 }, { "epoch": 0.4181469598560138, "grad_norm": 0.0, "learning_rate": 1.3085830872065313e-05, "loss": 1.1254, "step": 10687 }, { "epoch": 0.4181860865482432, "grad_norm": 0.0, "learning_rate": 1.3084625460361155e-05, "loss": 1.1218, "step": 10688 }, { "epoch": 0.4182252132404726, "grad_norm": 0.0, "learning_rate": 1.308341999912178e-05, "loss": 1.1325, "step": 10689 }, { "epoch": 0.41826433993270207, "grad_norm": 0.0, "learning_rate": 1.3082214488366542e-05, "loss": 0.9181, "step": 10690 }, { "epoch": 0.4183034666249315, "grad_norm": 0.0, "learning_rate": 1.3081008928114804e-05, "loss": 1.1168, "step": 10691 }, { "epoch": 0.41834259331716095, "grad_norm": 0.0, "learning_rate": 1.3079803318385922e-05, "loss": 1.0421, "step": 10692 }, { "epoch": 0.4183817200093904, "grad_norm": 0.0, "learning_rate": 1.3078597659199255e-05, "loss": 1.0855, "step": 10693 }, { "epoch": 0.41842084670161983, "grad_norm": 0.0, "learning_rate": 1.3077391950574172e-05, "loss": 1.0934, "step": 10694 }, { "epoch": 0.41845997339384927, "grad_norm": 0.0, "learning_rate": 1.3076186192530027e-05, "loss": 1.0109, "step": 10695 }, { "epoch": 0.4184991000860787, "grad_norm": 0.0, "learning_rate": 1.3074980385086189e-05, "loss": 0.9843, "step": 10696 }, { "epoch": 0.41853822677830815, "grad_norm": 0.0, "learning_rate": 1.3073774528262015e-05, "loss": 1.0175, "step": 10697 }, { "epoch": 0.4185773534705376, "grad_norm": 0.0, "learning_rate": 1.3072568622076878e-05, "loss": 1.1447, "step": 10698 }, { "epoch": 0.41861648016276704, "grad_norm": 0.0, "learning_rate": 1.3071362666550136e-05, "loss": 1.0651, "step": 10699 }, { "epoch": 0.4186556068549965, "grad_norm": 0.0, "learning_rate": 1.3070156661701161e-05, "loss": 1.1971, "step": 10700 }, { "epoch": 0.4186947335472259, "grad_norm": 0.0, "learning_rate": 1.3068950607549318e-05, "loss": 1.0298, "step": 10701 }, { "epoch": 0.41873386023945536, "grad_norm": 0.0, "learning_rate": 1.306774450411397e-05, "loss": 1.241, "step": 10702 }, { "epoch": 0.4187729869316848, "grad_norm": 0.0, "learning_rate": 1.3066538351414493e-05, "loss": 1.0754, "step": 10703 }, { "epoch": 0.41881211362391424, "grad_norm": 0.0, "learning_rate": 1.3065332149470249e-05, "loss": 1.0004, "step": 10704 }, { "epoch": 0.4188512403161437, "grad_norm": 0.0, "learning_rate": 1.3064125898300615e-05, "loss": 1.2278, "step": 10705 }, { "epoch": 0.4188903670083731, "grad_norm": 0.0, "learning_rate": 1.3062919597924957e-05, "loss": 1.0609, "step": 10706 }, { "epoch": 0.41892949370060256, "grad_norm": 0.0, "learning_rate": 1.3061713248362648e-05, "loss": 1.0166, "step": 10707 }, { "epoch": 0.418968620392832, "grad_norm": 0.0, "learning_rate": 1.3060506849633062e-05, "loss": 1.0568, "step": 10708 }, { "epoch": 0.41900774708506144, "grad_norm": 0.0, "learning_rate": 1.3059300401755571e-05, "loss": 0.9569, "step": 10709 }, { "epoch": 0.4190468737772909, "grad_norm": 0.0, "learning_rate": 1.3058093904749547e-05, "loss": 1.0091, "step": 10710 }, { "epoch": 0.4190860004695203, "grad_norm": 0.0, "learning_rate": 1.305688735863437e-05, "loss": 1.0184, "step": 10711 }, { "epoch": 0.41912512716174977, "grad_norm": 0.0, "learning_rate": 1.3055680763429411e-05, "loss": 0.9998, "step": 10712 }, { "epoch": 0.4191642538539792, "grad_norm": 0.0, "learning_rate": 1.3054474119154046e-05, "loss": 1.0585, "step": 10713 }, { "epoch": 0.41920338054620865, "grad_norm": 0.0, "learning_rate": 1.3053267425827656e-05, "loss": 1.066, "step": 10714 }, { "epoch": 0.4192425072384381, "grad_norm": 0.0, "learning_rate": 1.3052060683469617e-05, "loss": 0.8908, "step": 10715 }, { "epoch": 0.4192816339306675, "grad_norm": 0.0, "learning_rate": 1.3050853892099307e-05, "loss": 1.0114, "step": 10716 }, { "epoch": 0.4193207606228969, "grad_norm": 0.0, "learning_rate": 1.3049647051736108e-05, "loss": 1.0652, "step": 10717 }, { "epoch": 0.41935988731512636, "grad_norm": 0.0, "learning_rate": 1.3048440162399399e-05, "loss": 1.1071, "step": 10718 }, { "epoch": 0.4193990140073558, "grad_norm": 0.0, "learning_rate": 1.3047233224108558e-05, "loss": 1.0825, "step": 10719 }, { "epoch": 0.41943814069958524, "grad_norm": 0.0, "learning_rate": 1.3046026236882972e-05, "loss": 1.1835, "step": 10720 }, { "epoch": 0.4194772673918147, "grad_norm": 0.0, "learning_rate": 1.3044819200742022e-05, "loss": 1.0689, "step": 10721 }, { "epoch": 0.4195163940840441, "grad_norm": 0.0, "learning_rate": 1.3043612115705088e-05, "loss": 1.02, "step": 10722 }, { "epoch": 0.41955552077627356, "grad_norm": 0.0, "learning_rate": 1.304240498179156e-05, "loss": 1.072, "step": 10723 }, { "epoch": 0.419594647468503, "grad_norm": 0.0, "learning_rate": 1.304119779902082e-05, "loss": 1.1179, "step": 10724 }, { "epoch": 0.41963377416073244, "grad_norm": 0.0, "learning_rate": 1.3039990567412255e-05, "loss": 1.1559, "step": 10725 }, { "epoch": 0.4196729008529619, "grad_norm": 0.0, "learning_rate": 1.303878328698525e-05, "loss": 1.1351, "step": 10726 }, { "epoch": 0.4197120275451913, "grad_norm": 0.0, "learning_rate": 1.3037575957759195e-05, "loss": 1.0342, "step": 10727 }, { "epoch": 0.41975115423742076, "grad_norm": 0.0, "learning_rate": 1.3036368579753473e-05, "loss": 1.1381, "step": 10728 }, { "epoch": 0.4197902809296502, "grad_norm": 0.0, "learning_rate": 1.303516115298748e-05, "loss": 1.0746, "step": 10729 }, { "epoch": 0.41982940762187965, "grad_norm": 0.0, "learning_rate": 1.3033953677480603e-05, "loss": 1.0234, "step": 10730 }, { "epoch": 0.4198685343141091, "grad_norm": 0.0, "learning_rate": 1.3032746153252225e-05, "loss": 1.1991, "step": 10731 }, { "epoch": 0.41990766100633853, "grad_norm": 0.0, "learning_rate": 1.3031538580321748e-05, "loss": 1.056, "step": 10732 }, { "epoch": 0.41994678769856797, "grad_norm": 0.0, "learning_rate": 1.303033095870856e-05, "loss": 1.159, "step": 10733 }, { "epoch": 0.4199859143907974, "grad_norm": 0.0, "learning_rate": 1.3029123288432056e-05, "loss": 1.1193, "step": 10734 }, { "epoch": 0.42002504108302685, "grad_norm": 0.0, "learning_rate": 1.3027915569511622e-05, "loss": 0.993, "step": 10735 }, { "epoch": 0.4200641677752563, "grad_norm": 0.0, "learning_rate": 1.3026707801966665e-05, "loss": 1.062, "step": 10736 }, { "epoch": 0.42010329446748573, "grad_norm": 0.0, "learning_rate": 1.3025499985816568e-05, "loss": 1.0129, "step": 10737 }, { "epoch": 0.4201424211597152, "grad_norm": 0.0, "learning_rate": 1.3024292121080735e-05, "loss": 1.0791, "step": 10738 }, { "epoch": 0.4201815478519446, "grad_norm": 0.0, "learning_rate": 1.3023084207778558e-05, "loss": 1.0995, "step": 10739 }, { "epoch": 0.42022067454417406, "grad_norm": 0.0, "learning_rate": 1.302187624592944e-05, "loss": 1.0145, "step": 10740 }, { "epoch": 0.4202598012364035, "grad_norm": 0.0, "learning_rate": 1.3020668235552776e-05, "loss": 1.0923, "step": 10741 }, { "epoch": 0.42029892792863294, "grad_norm": 0.0, "learning_rate": 1.3019460176667963e-05, "loss": 1.0183, "step": 10742 }, { "epoch": 0.4203380546208624, "grad_norm": 0.0, "learning_rate": 1.3018252069294404e-05, "loss": 1.1111, "step": 10743 }, { "epoch": 0.4203771813130918, "grad_norm": 0.0, "learning_rate": 1.3017043913451498e-05, "loss": 1.065, "step": 10744 }, { "epoch": 0.4204163080053212, "grad_norm": 0.0, "learning_rate": 1.3015835709158649e-05, "loss": 1.1385, "step": 10745 }, { "epoch": 0.42045543469755065, "grad_norm": 0.0, "learning_rate": 1.3014627456435257e-05, "loss": 1.0415, "step": 10746 }, { "epoch": 0.4204945613897801, "grad_norm": 0.0, "learning_rate": 1.3013419155300725e-05, "loss": 1.0011, "step": 10747 }, { "epoch": 0.4205336880820095, "grad_norm": 0.0, "learning_rate": 1.3012210805774456e-05, "loss": 1.058, "step": 10748 }, { "epoch": 0.42057281477423897, "grad_norm": 0.0, "learning_rate": 1.301100240787586e-05, "loss": 1.0252, "step": 10749 }, { "epoch": 0.4206119414664684, "grad_norm": 0.0, "learning_rate": 1.3009793961624334e-05, "loss": 1.1815, "step": 10750 }, { "epoch": 0.42065106815869785, "grad_norm": 0.0, "learning_rate": 1.3008585467039291e-05, "loss": 1.1817, "step": 10751 }, { "epoch": 0.4206901948509273, "grad_norm": 0.0, "learning_rate": 1.3007376924140136e-05, "loss": 1.1127, "step": 10752 }, { "epoch": 0.42072932154315673, "grad_norm": 0.0, "learning_rate": 1.3006168332946275e-05, "loss": 1.1517, "step": 10753 }, { "epoch": 0.42076844823538617, "grad_norm": 0.0, "learning_rate": 1.3004959693477117e-05, "loss": 1.1418, "step": 10754 }, { "epoch": 0.4208075749276156, "grad_norm": 0.0, "learning_rate": 1.300375100575207e-05, "loss": 1.1648, "step": 10755 }, { "epoch": 0.42084670161984505, "grad_norm": 0.0, "learning_rate": 1.300254226979055e-05, "loss": 1.1195, "step": 10756 }, { "epoch": 0.4208858283120745, "grad_norm": 0.0, "learning_rate": 1.300133348561196e-05, "loss": 1.0102, "step": 10757 }, { "epoch": 0.42092495500430394, "grad_norm": 0.0, "learning_rate": 1.3000124653235717e-05, "loss": 1.0066, "step": 10758 }, { "epoch": 0.4209640816965334, "grad_norm": 0.0, "learning_rate": 1.2998915772681233e-05, "loss": 1.0597, "step": 10759 }, { "epoch": 0.4210032083887628, "grad_norm": 0.0, "learning_rate": 1.2997706843967915e-05, "loss": 1.1029, "step": 10760 }, { "epoch": 0.42104233508099226, "grad_norm": 0.0, "learning_rate": 1.2996497867115185e-05, "loss": 1.0511, "step": 10761 }, { "epoch": 0.4210814617732217, "grad_norm": 0.0, "learning_rate": 1.2995288842142453e-05, "loss": 1.1685, "step": 10762 }, { "epoch": 0.42112058846545114, "grad_norm": 0.0, "learning_rate": 1.2994079769069137e-05, "loss": 1.0389, "step": 10763 }, { "epoch": 0.4211597151576806, "grad_norm": 0.0, "learning_rate": 1.2992870647914648e-05, "loss": 1.0323, "step": 10764 }, { "epoch": 0.42119884184991, "grad_norm": 0.0, "learning_rate": 1.299166147869841e-05, "loss": 1.0413, "step": 10765 }, { "epoch": 0.42123796854213946, "grad_norm": 0.0, "learning_rate": 1.2990452261439837e-05, "loss": 1.0933, "step": 10766 }, { "epoch": 0.4212770952343689, "grad_norm": 0.0, "learning_rate": 1.2989242996158347e-05, "loss": 1.062, "step": 10767 }, { "epoch": 0.42131622192659834, "grad_norm": 0.0, "learning_rate": 1.298803368287336e-05, "loss": 1.0875, "step": 10768 }, { "epoch": 0.4213553486188278, "grad_norm": 0.0, "learning_rate": 1.2986824321604298e-05, "loss": 0.9339, "step": 10769 }, { "epoch": 0.4213944753110572, "grad_norm": 0.0, "learning_rate": 1.2985614912370577e-05, "loss": 1.1023, "step": 10770 }, { "epoch": 0.42143360200328667, "grad_norm": 0.0, "learning_rate": 1.2984405455191624e-05, "loss": 1.1993, "step": 10771 }, { "epoch": 0.4214727286955161, "grad_norm": 0.0, "learning_rate": 1.298319595008686e-05, "loss": 1.0642, "step": 10772 }, { "epoch": 0.4215118553877455, "grad_norm": 0.0, "learning_rate": 1.2981986397075705e-05, "loss": 1.0016, "step": 10773 }, { "epoch": 0.42155098207997493, "grad_norm": 0.0, "learning_rate": 1.2980776796177588e-05, "loss": 1.0108, "step": 10774 }, { "epoch": 0.4215901087722044, "grad_norm": 0.0, "learning_rate": 1.2979567147411927e-05, "loss": 1.0598, "step": 10775 }, { "epoch": 0.4216292354644338, "grad_norm": 0.0, "learning_rate": 1.2978357450798153e-05, "loss": 1.1495, "step": 10776 }, { "epoch": 0.42166836215666326, "grad_norm": 0.0, "learning_rate": 1.2977147706355688e-05, "loss": 1.1435, "step": 10777 }, { "epoch": 0.4217074888488927, "grad_norm": 0.0, "learning_rate": 1.2975937914103967e-05, "loss": 1.1392, "step": 10778 }, { "epoch": 0.42174661554112214, "grad_norm": 0.0, "learning_rate": 1.2974728074062409e-05, "loss": 1.0334, "step": 10779 }, { "epoch": 0.4217857422333516, "grad_norm": 0.0, "learning_rate": 1.2973518186250444e-05, "loss": 1.2034, "step": 10780 }, { "epoch": 0.421824868925581, "grad_norm": 0.0, "learning_rate": 1.2972308250687507e-05, "loss": 0.9534, "step": 10781 }, { "epoch": 0.42186399561781046, "grad_norm": 0.0, "learning_rate": 1.2971098267393019e-05, "loss": 0.9997, "step": 10782 }, { "epoch": 0.4219031223100399, "grad_norm": 0.0, "learning_rate": 1.296988823638642e-05, "loss": 1.0587, "step": 10783 }, { "epoch": 0.42194224900226934, "grad_norm": 0.0, "learning_rate": 1.2968678157687133e-05, "loss": 1.1497, "step": 10784 }, { "epoch": 0.4219813756944988, "grad_norm": 0.0, "learning_rate": 1.2967468031314598e-05, "loss": 0.9773, "step": 10785 }, { "epoch": 0.4220205023867282, "grad_norm": 0.0, "learning_rate": 1.2966257857288245e-05, "loss": 1.1367, "step": 10786 }, { "epoch": 0.42205962907895767, "grad_norm": 0.0, "learning_rate": 1.2965047635627507e-05, "loss": 1.0803, "step": 10787 }, { "epoch": 0.4220987557711871, "grad_norm": 0.0, "learning_rate": 1.2963837366351822e-05, "loss": 1.0701, "step": 10788 }, { "epoch": 0.42213788246341655, "grad_norm": 0.0, "learning_rate": 1.2962627049480618e-05, "loss": 1.0197, "step": 10789 }, { "epoch": 0.422177009155646, "grad_norm": 0.0, "learning_rate": 1.2961416685033339e-05, "loss": 0.9651, "step": 10790 }, { "epoch": 0.42221613584787543, "grad_norm": 0.0, "learning_rate": 1.2960206273029417e-05, "loss": 1.2126, "step": 10791 }, { "epoch": 0.42225526254010487, "grad_norm": 0.0, "learning_rate": 1.2958995813488293e-05, "loss": 1.0184, "step": 10792 }, { "epoch": 0.4222943892323343, "grad_norm": 0.0, "learning_rate": 1.2957785306429402e-05, "loss": 1.143, "step": 10793 }, { "epoch": 0.42233351592456375, "grad_norm": 0.0, "learning_rate": 1.2956574751872188e-05, "loss": 1.0782, "step": 10794 }, { "epoch": 0.4223726426167932, "grad_norm": 0.0, "learning_rate": 1.2955364149836088e-05, "loss": 1.0558, "step": 10795 }, { "epoch": 0.42241176930902263, "grad_norm": 0.0, "learning_rate": 1.2954153500340543e-05, "loss": 1.0712, "step": 10796 }, { "epoch": 0.4224508960012521, "grad_norm": 0.0, "learning_rate": 1.2952942803404991e-05, "loss": 1.0982, "step": 10797 }, { "epoch": 0.4224900226934815, "grad_norm": 0.0, "learning_rate": 1.2951732059048882e-05, "loss": 1.0365, "step": 10798 }, { "epoch": 0.42252914938571096, "grad_norm": 0.0, "learning_rate": 1.2950521267291656e-05, "loss": 1.0577, "step": 10799 }, { "epoch": 0.4225682760779404, "grad_norm": 0.0, "learning_rate": 1.294931042815275e-05, "loss": 1.1026, "step": 10800 }, { "epoch": 0.4226074027701698, "grad_norm": 0.0, "learning_rate": 1.294809954165162e-05, "loss": 1.1716, "step": 10801 }, { "epoch": 0.4226465294623992, "grad_norm": 0.0, "learning_rate": 1.2946888607807702e-05, "loss": 1.0137, "step": 10802 }, { "epoch": 0.42268565615462866, "grad_norm": 0.0, "learning_rate": 1.2945677626640447e-05, "loss": 1.1226, "step": 10803 }, { "epoch": 0.4227247828468581, "grad_norm": 0.0, "learning_rate": 1.2944466598169299e-05, "loss": 1.116, "step": 10804 }, { "epoch": 0.42276390953908755, "grad_norm": 0.0, "learning_rate": 1.2943255522413708e-05, "loss": 1.0054, "step": 10805 }, { "epoch": 0.422803036231317, "grad_norm": 0.0, "learning_rate": 1.294204439939312e-05, "loss": 1.1633, "step": 10806 }, { "epoch": 0.4228421629235464, "grad_norm": 0.0, "learning_rate": 1.294083322912699e-05, "loss": 1.1501, "step": 10807 }, { "epoch": 0.42288128961577587, "grad_norm": 0.0, "learning_rate": 1.2939622011634762e-05, "loss": 1.0437, "step": 10808 }, { "epoch": 0.4229204163080053, "grad_norm": 0.0, "learning_rate": 1.2938410746935883e-05, "loss": 1.0757, "step": 10809 }, { "epoch": 0.42295954300023475, "grad_norm": 0.0, "learning_rate": 1.2937199435049816e-05, "loss": 1.04, "step": 10810 }, { "epoch": 0.4229986696924642, "grad_norm": 0.0, "learning_rate": 1.2935988075996004e-05, "loss": 1.0497, "step": 10811 }, { "epoch": 0.42303779638469363, "grad_norm": 0.0, "learning_rate": 1.29347766697939e-05, "loss": 1.1053, "step": 10812 }, { "epoch": 0.4230769230769231, "grad_norm": 0.0, "learning_rate": 1.2933565216462965e-05, "loss": 1.0578, "step": 10813 }, { "epoch": 0.4231160497691525, "grad_norm": 0.0, "learning_rate": 1.2932353716022646e-05, "loss": 0.8955, "step": 10814 }, { "epoch": 0.42315517646138195, "grad_norm": 0.0, "learning_rate": 1.2931142168492399e-05, "loss": 0.8147, "step": 10815 }, { "epoch": 0.4231943031536114, "grad_norm": 0.0, "learning_rate": 1.2929930573891685e-05, "loss": 0.9838, "step": 10816 }, { "epoch": 0.42323342984584084, "grad_norm": 0.0, "learning_rate": 1.2928718932239957e-05, "loss": 1.0261, "step": 10817 }, { "epoch": 0.4232725565380703, "grad_norm": 0.0, "learning_rate": 1.2927507243556669e-05, "loss": 1.1439, "step": 10818 }, { "epoch": 0.4233116832302997, "grad_norm": 0.0, "learning_rate": 1.2926295507861287e-05, "loss": 0.9851, "step": 10819 }, { "epoch": 0.42335080992252916, "grad_norm": 0.0, "learning_rate": 1.2925083725173264e-05, "loss": 1.0941, "step": 10820 }, { "epoch": 0.4233899366147586, "grad_norm": 0.0, "learning_rate": 1.2923871895512064e-05, "loss": 1.0995, "step": 10821 }, { "epoch": 0.42342906330698804, "grad_norm": 0.0, "learning_rate": 1.292266001889714e-05, "loss": 1.0377, "step": 10822 }, { "epoch": 0.4234681899992175, "grad_norm": 0.0, "learning_rate": 1.2921448095347964e-05, "loss": 1.0449, "step": 10823 }, { "epoch": 0.4235073166914469, "grad_norm": 0.0, "learning_rate": 1.2920236124883989e-05, "loss": 1.0889, "step": 10824 }, { "epoch": 0.42354644338367636, "grad_norm": 0.0, "learning_rate": 1.2919024107524683e-05, "loss": 1.1101, "step": 10825 }, { "epoch": 0.4235855700759058, "grad_norm": 0.0, "learning_rate": 1.2917812043289506e-05, "loss": 1.142, "step": 10826 }, { "epoch": 0.42362469676813524, "grad_norm": 0.0, "learning_rate": 1.2916599932197924e-05, "loss": 1.0593, "step": 10827 }, { "epoch": 0.4236638234603647, "grad_norm": 0.0, "learning_rate": 1.2915387774269403e-05, "loss": 1.1215, "step": 10828 }, { "epoch": 0.4237029501525941, "grad_norm": 0.0, "learning_rate": 1.2914175569523408e-05, "loss": 1.0074, "step": 10829 }, { "epoch": 0.4237420768448235, "grad_norm": 0.0, "learning_rate": 1.2912963317979403e-05, "loss": 1.0397, "step": 10830 }, { "epoch": 0.42378120353705295, "grad_norm": 0.0, "learning_rate": 1.2911751019656858e-05, "loss": 0.9621, "step": 10831 }, { "epoch": 0.4238203302292824, "grad_norm": 0.0, "learning_rate": 1.2910538674575242e-05, "loss": 1.1629, "step": 10832 }, { "epoch": 0.42385945692151183, "grad_norm": 0.0, "learning_rate": 1.290932628275402e-05, "loss": 0.9405, "step": 10833 }, { "epoch": 0.4238985836137413, "grad_norm": 0.0, "learning_rate": 1.2908113844212665e-05, "loss": 1.1071, "step": 10834 }, { "epoch": 0.4239377103059707, "grad_norm": 0.0, "learning_rate": 1.2906901358970643e-05, "loss": 0.9796, "step": 10835 }, { "epoch": 0.42397683699820016, "grad_norm": 0.0, "learning_rate": 1.2905688827047431e-05, "loss": 1.0019, "step": 10836 }, { "epoch": 0.4240159636904296, "grad_norm": 0.0, "learning_rate": 1.2904476248462496e-05, "loss": 1.0723, "step": 10837 }, { "epoch": 0.42405509038265904, "grad_norm": 0.0, "learning_rate": 1.2903263623235312e-05, "loss": 1.0392, "step": 10838 }, { "epoch": 0.4240942170748885, "grad_norm": 0.0, "learning_rate": 1.2902050951385353e-05, "loss": 1.1155, "step": 10839 }, { "epoch": 0.4241333437671179, "grad_norm": 0.0, "learning_rate": 1.290083823293209e-05, "loss": 1.0692, "step": 10840 }, { "epoch": 0.42417247045934736, "grad_norm": 0.0, "learning_rate": 1.2899625467895e-05, "loss": 1.12, "step": 10841 }, { "epoch": 0.4242115971515768, "grad_norm": 0.0, "learning_rate": 1.289841265629356e-05, "loss": 1.1561, "step": 10842 }, { "epoch": 0.42425072384380624, "grad_norm": 0.0, "learning_rate": 1.2897199798147243e-05, "loss": 1.0982, "step": 10843 }, { "epoch": 0.4242898505360357, "grad_norm": 0.0, "learning_rate": 1.289598689347553e-05, "loss": 1.0093, "step": 10844 }, { "epoch": 0.4243289772282651, "grad_norm": 0.0, "learning_rate": 1.2894773942297896e-05, "loss": 1.196, "step": 10845 }, { "epoch": 0.42436810392049457, "grad_norm": 0.0, "learning_rate": 1.289356094463382e-05, "loss": 0.9801, "step": 10846 }, { "epoch": 0.424407230612724, "grad_norm": 0.0, "learning_rate": 1.289234790050278e-05, "loss": 1.0156, "step": 10847 }, { "epoch": 0.42444635730495345, "grad_norm": 0.0, "learning_rate": 1.2891134809924257e-05, "loss": 1.013, "step": 10848 }, { "epoch": 0.4244854839971829, "grad_norm": 0.0, "learning_rate": 1.2889921672917731e-05, "loss": 1.1384, "step": 10849 }, { "epoch": 0.42452461068941233, "grad_norm": 0.0, "learning_rate": 1.2888708489502686e-05, "loss": 0.9999, "step": 10850 }, { "epoch": 0.42456373738164177, "grad_norm": 0.0, "learning_rate": 1.2887495259698602e-05, "loss": 1.1664, "step": 10851 }, { "epoch": 0.4246028640738712, "grad_norm": 0.0, "learning_rate": 1.2886281983524962e-05, "loss": 1.0167, "step": 10852 }, { "epoch": 0.42464199076610065, "grad_norm": 0.0, "learning_rate": 1.288506866100125e-05, "loss": 1.0237, "step": 10853 }, { "epoch": 0.4246811174583301, "grad_norm": 0.0, "learning_rate": 1.288385529214695e-05, "loss": 1.0122, "step": 10854 }, { "epoch": 0.42472024415055953, "grad_norm": 0.0, "learning_rate": 1.288264187698155e-05, "loss": 1.2428, "step": 10855 }, { "epoch": 0.424759370842789, "grad_norm": 0.0, "learning_rate": 1.2881428415524531e-05, "loss": 1.0507, "step": 10856 }, { "epoch": 0.4247984975350184, "grad_norm": 0.0, "learning_rate": 1.2880214907795383e-05, "loss": 0.9315, "step": 10857 }, { "epoch": 0.4248376242272478, "grad_norm": 0.0, "learning_rate": 1.2879001353813595e-05, "loss": 1.0302, "step": 10858 }, { "epoch": 0.42487675091947724, "grad_norm": 0.0, "learning_rate": 1.2877787753598647e-05, "loss": 1.2037, "step": 10859 }, { "epoch": 0.4249158776117067, "grad_norm": 0.0, "learning_rate": 1.287657410717004e-05, "loss": 1.123, "step": 10860 }, { "epoch": 0.4249550043039361, "grad_norm": 0.0, "learning_rate": 1.2875360414547256e-05, "loss": 1.132, "step": 10861 }, { "epoch": 0.42499413099616556, "grad_norm": 0.0, "learning_rate": 1.2874146675749784e-05, "loss": 1.0009, "step": 10862 }, { "epoch": 0.425033257688395, "grad_norm": 0.0, "learning_rate": 1.2872932890797121e-05, "loss": 0.9825, "step": 10863 }, { "epoch": 0.42507238438062445, "grad_norm": 0.0, "learning_rate": 1.2871719059708751e-05, "loss": 1.0756, "step": 10864 }, { "epoch": 0.4251115110728539, "grad_norm": 0.0, "learning_rate": 1.2870505182504175e-05, "loss": 0.9943, "step": 10865 }, { "epoch": 0.42515063776508333, "grad_norm": 0.0, "learning_rate": 1.2869291259202886e-05, "loss": 1.1245, "step": 10866 }, { "epoch": 0.42518976445731277, "grad_norm": 0.0, "learning_rate": 1.2868077289824368e-05, "loss": 0.9968, "step": 10867 }, { "epoch": 0.4252288911495422, "grad_norm": 0.0, "learning_rate": 1.2866863274388128e-05, "loss": 1.0561, "step": 10868 }, { "epoch": 0.42526801784177165, "grad_norm": 0.0, "learning_rate": 1.2865649212913654e-05, "loss": 1.0556, "step": 10869 }, { "epoch": 0.4253071445340011, "grad_norm": 0.0, "learning_rate": 1.2864435105420442e-05, "loss": 0.9847, "step": 10870 }, { "epoch": 0.42534627122623053, "grad_norm": 0.0, "learning_rate": 1.2863220951927995e-05, "loss": 1.191, "step": 10871 }, { "epoch": 0.42538539791846, "grad_norm": 0.0, "learning_rate": 1.2862006752455806e-05, "loss": 1.1302, "step": 10872 }, { "epoch": 0.4254245246106894, "grad_norm": 0.0, "learning_rate": 1.2860792507023374e-05, "loss": 1.2042, "step": 10873 }, { "epoch": 0.42546365130291885, "grad_norm": 0.0, "learning_rate": 1.2859578215650202e-05, "loss": 1.0479, "step": 10874 }, { "epoch": 0.4255027779951483, "grad_norm": 0.0, "learning_rate": 1.2858363878355786e-05, "loss": 1.1042, "step": 10875 }, { "epoch": 0.42554190468737774, "grad_norm": 0.0, "learning_rate": 1.2857149495159627e-05, "loss": 1.1573, "step": 10876 }, { "epoch": 0.4255810313796072, "grad_norm": 0.0, "learning_rate": 1.2855935066081227e-05, "loss": 1.1351, "step": 10877 }, { "epoch": 0.4256201580718366, "grad_norm": 0.0, "learning_rate": 1.2854720591140088e-05, "loss": 1.0562, "step": 10878 }, { "epoch": 0.42565928476406606, "grad_norm": 0.0, "learning_rate": 1.2853506070355717e-05, "loss": 1.1557, "step": 10879 }, { "epoch": 0.4256984114562955, "grad_norm": 0.0, "learning_rate": 1.285229150374761e-05, "loss": 1.1676, "step": 10880 }, { "epoch": 0.42573753814852494, "grad_norm": 0.0, "learning_rate": 1.2851076891335277e-05, "loss": 1.0314, "step": 10881 }, { "epoch": 0.4257766648407544, "grad_norm": 0.0, "learning_rate": 1.2849862233138222e-05, "loss": 1.0946, "step": 10882 }, { "epoch": 0.4258157915329838, "grad_norm": 0.0, "learning_rate": 1.284864752917595e-05, "loss": 1.0735, "step": 10883 }, { "epoch": 0.42585491822521326, "grad_norm": 0.0, "learning_rate": 1.284743277946797e-05, "loss": 1.0451, "step": 10884 }, { "epoch": 0.4258940449174427, "grad_norm": 0.0, "learning_rate": 1.2846217984033786e-05, "loss": 1.0697, "step": 10885 }, { "epoch": 0.42593317160967215, "grad_norm": 0.0, "learning_rate": 1.284500314289291e-05, "loss": 1.0024, "step": 10886 }, { "epoch": 0.42597229830190153, "grad_norm": 0.0, "learning_rate": 1.2843788256064844e-05, "loss": 0.9757, "step": 10887 }, { "epoch": 0.42601142499413097, "grad_norm": 0.0, "learning_rate": 1.2842573323569107e-05, "loss": 1.1214, "step": 10888 }, { "epoch": 0.4260505516863604, "grad_norm": 0.0, "learning_rate": 1.2841358345425202e-05, "loss": 1.0215, "step": 10889 }, { "epoch": 0.42608967837858985, "grad_norm": 0.0, "learning_rate": 1.2840143321652642e-05, "loss": 1.048, "step": 10890 }, { "epoch": 0.4261288050708193, "grad_norm": 0.0, "learning_rate": 1.2838928252270937e-05, "loss": 1.1561, "step": 10891 }, { "epoch": 0.42616793176304874, "grad_norm": 0.0, "learning_rate": 1.2837713137299605e-05, "loss": 1.1036, "step": 10892 }, { "epoch": 0.4262070584552782, "grad_norm": 0.0, "learning_rate": 1.2836497976758156e-05, "loss": 1.1544, "step": 10893 }, { "epoch": 0.4262461851475076, "grad_norm": 0.0, "learning_rate": 1.2835282770666101e-05, "loss": 1.04, "step": 10894 }, { "epoch": 0.42628531183973706, "grad_norm": 0.0, "learning_rate": 1.2834067519042962e-05, "loss": 1.0612, "step": 10895 }, { "epoch": 0.4263244385319665, "grad_norm": 0.0, "learning_rate": 1.2832852221908247e-05, "loss": 1.0952, "step": 10896 }, { "epoch": 0.42636356522419594, "grad_norm": 0.0, "learning_rate": 1.2831636879281475e-05, "loss": 0.9871, "step": 10897 }, { "epoch": 0.4264026919164254, "grad_norm": 0.0, "learning_rate": 1.2830421491182164e-05, "loss": 1.075, "step": 10898 }, { "epoch": 0.4264418186086548, "grad_norm": 0.0, "learning_rate": 1.282920605762983e-05, "loss": 1.0811, "step": 10899 }, { "epoch": 0.42648094530088426, "grad_norm": 0.0, "learning_rate": 1.2827990578643994e-05, "loss": 0.9326, "step": 10900 }, { "epoch": 0.4265200719931137, "grad_norm": 0.0, "learning_rate": 1.2826775054244167e-05, "loss": 1.1078, "step": 10901 }, { "epoch": 0.42655919868534314, "grad_norm": 0.0, "learning_rate": 1.2825559484449882e-05, "loss": 1.1224, "step": 10902 }, { "epoch": 0.4265983253775726, "grad_norm": 0.0, "learning_rate": 1.2824343869280648e-05, "loss": 1.103, "step": 10903 }, { "epoch": 0.426637452069802, "grad_norm": 0.0, "learning_rate": 1.282312820875599e-05, "loss": 0.9832, "step": 10904 }, { "epoch": 0.42667657876203147, "grad_norm": 0.0, "learning_rate": 1.2821912502895436e-05, "loss": 1.1079, "step": 10905 }, { "epoch": 0.4267157054542609, "grad_norm": 0.0, "learning_rate": 1.28206967517185e-05, "loss": 1.0634, "step": 10906 }, { "epoch": 0.42675483214649035, "grad_norm": 0.0, "learning_rate": 1.2819480955244705e-05, "loss": 0.9874, "step": 10907 }, { "epoch": 0.4267939588387198, "grad_norm": 0.0, "learning_rate": 1.2818265113493582e-05, "loss": 1.1507, "step": 10908 }, { "epoch": 0.42683308553094923, "grad_norm": 0.0, "learning_rate": 1.2817049226484652e-05, "loss": 1.048, "step": 10909 }, { "epoch": 0.42687221222317867, "grad_norm": 0.0, "learning_rate": 1.2815833294237444e-05, "loss": 1.0288, "step": 10910 }, { "epoch": 0.4269113389154081, "grad_norm": 0.0, "learning_rate": 1.2814617316771478e-05, "loss": 1.0243, "step": 10911 }, { "epoch": 0.42695046560763755, "grad_norm": 0.0, "learning_rate": 1.2813401294106286e-05, "loss": 1.0409, "step": 10912 }, { "epoch": 0.426989592299867, "grad_norm": 0.0, "learning_rate": 1.2812185226261396e-05, "loss": 1.175, "step": 10913 }, { "epoch": 0.42702871899209643, "grad_norm": 0.0, "learning_rate": 1.2810969113256335e-05, "loss": 1.1893, "step": 10914 }, { "epoch": 0.4270678456843258, "grad_norm": 0.0, "learning_rate": 1.2809752955110632e-05, "loss": 1.1122, "step": 10915 }, { "epoch": 0.42710697237655526, "grad_norm": 0.0, "learning_rate": 1.2808536751843816e-05, "loss": 1.0891, "step": 10916 }, { "epoch": 0.4271460990687847, "grad_norm": 0.0, "learning_rate": 1.2807320503475422e-05, "loss": 1.0981, "step": 10917 }, { "epoch": 0.42718522576101414, "grad_norm": 0.0, "learning_rate": 1.2806104210024974e-05, "loss": 1.1289, "step": 10918 }, { "epoch": 0.4272243524532436, "grad_norm": 0.0, "learning_rate": 1.2804887871512013e-05, "loss": 1.1738, "step": 10919 }, { "epoch": 0.427263479145473, "grad_norm": 0.0, "learning_rate": 1.2803671487956063e-05, "loss": 1.1876, "step": 10920 }, { "epoch": 0.42730260583770246, "grad_norm": 0.0, "learning_rate": 1.280245505937667e-05, "loss": 1.0464, "step": 10921 }, { "epoch": 0.4273417325299319, "grad_norm": 0.0, "learning_rate": 1.2801238585793352e-05, "loss": 1.1959, "step": 10922 }, { "epoch": 0.42738085922216135, "grad_norm": 0.0, "learning_rate": 1.2800022067225655e-05, "loss": 1.114, "step": 10923 }, { "epoch": 0.4274199859143908, "grad_norm": 0.0, "learning_rate": 1.2798805503693115e-05, "loss": 1.071, "step": 10924 }, { "epoch": 0.42745911260662023, "grad_norm": 0.0, "learning_rate": 1.2797588895215263e-05, "loss": 0.9398, "step": 10925 }, { "epoch": 0.42749823929884967, "grad_norm": 0.0, "learning_rate": 1.2796372241811642e-05, "loss": 1.1363, "step": 10926 }, { "epoch": 0.4275373659910791, "grad_norm": 0.0, "learning_rate": 1.279515554350178e-05, "loss": 1.1516, "step": 10927 }, { "epoch": 0.42757649268330855, "grad_norm": 0.0, "learning_rate": 1.279393880030523e-05, "loss": 1.0545, "step": 10928 }, { "epoch": 0.427615619375538, "grad_norm": 0.0, "learning_rate": 1.2792722012241519e-05, "loss": 1.112, "step": 10929 }, { "epoch": 0.42765474606776743, "grad_norm": 0.0, "learning_rate": 1.2791505179330195e-05, "loss": 1.1216, "step": 10930 }, { "epoch": 0.4276938727599969, "grad_norm": 0.0, "learning_rate": 1.2790288301590793e-05, "loss": 1.1363, "step": 10931 }, { "epoch": 0.4277329994522263, "grad_norm": 0.0, "learning_rate": 1.278907137904286e-05, "loss": 1.0426, "step": 10932 }, { "epoch": 0.42777212614445576, "grad_norm": 0.0, "learning_rate": 1.2787854411705935e-05, "loss": 1.2088, "step": 10933 }, { "epoch": 0.4278112528366852, "grad_norm": 0.0, "learning_rate": 1.278663739959956e-05, "loss": 1.1211, "step": 10934 }, { "epoch": 0.42785037952891464, "grad_norm": 0.0, "learning_rate": 1.2785420342743282e-05, "loss": 1.0108, "step": 10935 }, { "epoch": 0.4278895062211441, "grad_norm": 0.0, "learning_rate": 1.2784203241156642e-05, "loss": 1.135, "step": 10936 }, { "epoch": 0.4279286329133735, "grad_norm": 0.0, "learning_rate": 1.2782986094859187e-05, "loss": 1.1052, "step": 10937 }, { "epoch": 0.42796775960560296, "grad_norm": 0.0, "learning_rate": 1.2781768903870462e-05, "loss": 1.1212, "step": 10938 }, { "epoch": 0.4280068862978324, "grad_norm": 0.0, "learning_rate": 1.2780551668210016e-05, "loss": 1.0382, "step": 10939 }, { "epoch": 0.42804601299006184, "grad_norm": 0.0, "learning_rate": 1.2779334387897393e-05, "loss": 1.1395, "step": 10940 }, { "epoch": 0.4280851396822913, "grad_norm": 0.0, "learning_rate": 1.2778117062952143e-05, "loss": 1.157, "step": 10941 }, { "epoch": 0.4281242663745207, "grad_norm": 0.0, "learning_rate": 1.2776899693393815e-05, "loss": 1.049, "step": 10942 }, { "epoch": 0.42816339306675016, "grad_norm": 0.0, "learning_rate": 1.2775682279241956e-05, "loss": 1.11, "step": 10943 }, { "epoch": 0.42820251975897955, "grad_norm": 0.0, "learning_rate": 1.277446482051612e-05, "loss": 1.1176, "step": 10944 }, { "epoch": 0.428241646451209, "grad_norm": 0.0, "learning_rate": 1.2773247317235855e-05, "loss": 0.9989, "step": 10945 }, { "epoch": 0.42828077314343843, "grad_norm": 0.0, "learning_rate": 1.277202976942071e-05, "loss": 1.0042, "step": 10946 }, { "epoch": 0.42831989983566787, "grad_norm": 0.0, "learning_rate": 1.2770812177090243e-05, "loss": 1.1231, "step": 10947 }, { "epoch": 0.4283590265278973, "grad_norm": 0.0, "learning_rate": 1.2769594540264008e-05, "loss": 0.8767, "step": 10948 }, { "epoch": 0.42839815322012675, "grad_norm": 0.0, "learning_rate": 1.2768376858961549e-05, "loss": 1.0217, "step": 10949 }, { "epoch": 0.4284372799123562, "grad_norm": 0.0, "learning_rate": 1.276715913320243e-05, "loss": 0.8254, "step": 10950 }, { "epoch": 0.42847640660458564, "grad_norm": 0.0, "learning_rate": 1.27659413630062e-05, "loss": 1.1548, "step": 10951 }, { "epoch": 0.4285155332968151, "grad_norm": 0.0, "learning_rate": 1.2764723548392421e-05, "loss": 1.0157, "step": 10952 }, { "epoch": 0.4285546599890445, "grad_norm": 0.0, "learning_rate": 1.2763505689380647e-05, "loss": 1.1308, "step": 10953 }, { "epoch": 0.42859378668127396, "grad_norm": 0.0, "learning_rate": 1.2762287785990428e-05, "loss": 0.9818, "step": 10954 }, { "epoch": 0.4286329133735034, "grad_norm": 0.0, "learning_rate": 1.2761069838241334e-05, "loss": 0.9811, "step": 10955 }, { "epoch": 0.42867204006573284, "grad_norm": 0.0, "learning_rate": 1.2759851846152914e-05, "loss": 1.0554, "step": 10956 }, { "epoch": 0.4287111667579623, "grad_norm": 0.0, "learning_rate": 1.2758633809744736e-05, "loss": 1.0678, "step": 10957 }, { "epoch": 0.4287502934501917, "grad_norm": 0.0, "learning_rate": 1.2757415729036354e-05, "loss": 1.0404, "step": 10958 }, { "epoch": 0.42878942014242116, "grad_norm": 0.0, "learning_rate": 1.2756197604047333e-05, "loss": 1.0457, "step": 10959 }, { "epoch": 0.4288285468346506, "grad_norm": 0.0, "learning_rate": 1.2754979434797228e-05, "loss": 1.0552, "step": 10960 }, { "epoch": 0.42886767352688004, "grad_norm": 0.0, "learning_rate": 1.2753761221305609e-05, "loss": 1.1204, "step": 10961 }, { "epoch": 0.4289068002191095, "grad_norm": 0.0, "learning_rate": 1.2752542963592033e-05, "loss": 1.0777, "step": 10962 }, { "epoch": 0.4289459269113389, "grad_norm": 0.0, "learning_rate": 1.2751324661676068e-05, "loss": 1.234, "step": 10963 }, { "epoch": 0.42898505360356837, "grad_norm": 0.0, "learning_rate": 1.2750106315577276e-05, "loss": 1.0586, "step": 10964 }, { "epoch": 0.4290241802957978, "grad_norm": 0.0, "learning_rate": 1.2748887925315221e-05, "loss": 0.9859, "step": 10965 }, { "epoch": 0.42906330698802725, "grad_norm": 0.0, "learning_rate": 1.2747669490909473e-05, "loss": 1.1307, "step": 10966 }, { "epoch": 0.4291024336802567, "grad_norm": 0.0, "learning_rate": 1.2746451012379593e-05, "loss": 1.1552, "step": 10967 }, { "epoch": 0.42914156037248613, "grad_norm": 0.0, "learning_rate": 1.2745232489745153e-05, "loss": 1.0354, "step": 10968 }, { "epoch": 0.42918068706471557, "grad_norm": 0.0, "learning_rate": 1.2744013923025717e-05, "loss": 1.0672, "step": 10969 }, { "epoch": 0.429219813756945, "grad_norm": 0.0, "learning_rate": 1.2742795312240862e-05, "loss": 1.0275, "step": 10970 }, { "epoch": 0.42925894044917445, "grad_norm": 0.0, "learning_rate": 1.2741576657410144e-05, "loss": 1.0955, "step": 10971 }, { "epoch": 0.42929806714140384, "grad_norm": 0.0, "learning_rate": 1.2740357958553144e-05, "loss": 1.0874, "step": 10972 }, { "epoch": 0.4293371938336333, "grad_norm": 0.0, "learning_rate": 1.2739139215689428e-05, "loss": 1.1138, "step": 10973 }, { "epoch": 0.4293763205258627, "grad_norm": 0.0, "learning_rate": 1.273792042883857e-05, "loss": 1.0673, "step": 10974 }, { "epoch": 0.42941544721809216, "grad_norm": 0.0, "learning_rate": 1.273670159802014e-05, "loss": 1.0771, "step": 10975 }, { "epoch": 0.4294545739103216, "grad_norm": 0.0, "learning_rate": 1.2735482723253711e-05, "loss": 1.1649, "step": 10976 }, { "epoch": 0.42949370060255104, "grad_norm": 0.0, "learning_rate": 1.2734263804558858e-05, "loss": 1.0911, "step": 10977 }, { "epoch": 0.4295328272947805, "grad_norm": 0.0, "learning_rate": 1.2733044841955153e-05, "loss": 1.0431, "step": 10978 }, { "epoch": 0.4295719539870099, "grad_norm": 0.0, "learning_rate": 1.2731825835462175e-05, "loss": 1.0438, "step": 10979 }, { "epoch": 0.42961108067923937, "grad_norm": 0.0, "learning_rate": 1.2730606785099493e-05, "loss": 1.0273, "step": 10980 }, { "epoch": 0.4296502073714688, "grad_norm": 0.0, "learning_rate": 1.2729387690886692e-05, "loss": 1.0583, "step": 10981 }, { "epoch": 0.42968933406369825, "grad_norm": 0.0, "learning_rate": 1.2728168552843345e-05, "loss": 1.1367, "step": 10982 }, { "epoch": 0.4297284607559277, "grad_norm": 0.0, "learning_rate": 1.2726949370989026e-05, "loss": 1.0068, "step": 10983 }, { "epoch": 0.42976758744815713, "grad_norm": 0.0, "learning_rate": 1.272573014534332e-05, "loss": 0.8564, "step": 10984 }, { "epoch": 0.42980671414038657, "grad_norm": 0.0, "learning_rate": 1.2724510875925802e-05, "loss": 0.886, "step": 10985 }, { "epoch": 0.429845840832616, "grad_norm": 0.0, "learning_rate": 1.2723291562756055e-05, "loss": 1.1152, "step": 10986 }, { "epoch": 0.42988496752484545, "grad_norm": 0.0, "learning_rate": 1.2722072205853658e-05, "loss": 0.9373, "step": 10987 }, { "epoch": 0.4299240942170749, "grad_norm": 0.0, "learning_rate": 1.2720852805238191e-05, "loss": 1.1065, "step": 10988 }, { "epoch": 0.42996322090930433, "grad_norm": 0.0, "learning_rate": 1.2719633360929238e-05, "loss": 1.065, "step": 10989 }, { "epoch": 0.4300023476015338, "grad_norm": 0.0, "learning_rate": 1.2718413872946381e-05, "loss": 1.1122, "step": 10990 }, { "epoch": 0.4300414742937632, "grad_norm": 0.0, "learning_rate": 1.2717194341309203e-05, "loss": 1.0864, "step": 10991 }, { "epoch": 0.43008060098599266, "grad_norm": 0.0, "learning_rate": 1.2715974766037289e-05, "loss": 0.9189, "step": 10992 }, { "epoch": 0.4301197276782221, "grad_norm": 0.0, "learning_rate": 1.2714755147150222e-05, "loss": 1.104, "step": 10993 }, { "epoch": 0.43015885437045154, "grad_norm": 0.0, "learning_rate": 1.2713535484667592e-05, "loss": 1.2152, "step": 10994 }, { "epoch": 0.430197981062681, "grad_norm": 0.0, "learning_rate": 1.2712315778608982e-05, "loss": 1.1678, "step": 10995 }, { "epoch": 0.4302371077549104, "grad_norm": 0.0, "learning_rate": 1.2711096028993977e-05, "loss": 1.1193, "step": 10996 }, { "epoch": 0.43027623444713986, "grad_norm": 0.0, "learning_rate": 1.2709876235842166e-05, "loss": 1.1394, "step": 10997 }, { "epoch": 0.4303153611393693, "grad_norm": 0.0, "learning_rate": 1.270865639917314e-05, "loss": 0.9002, "step": 10998 }, { "epoch": 0.43035448783159874, "grad_norm": 0.0, "learning_rate": 1.2707436519006489e-05, "loss": 1.0664, "step": 10999 }, { "epoch": 0.4303936145238282, "grad_norm": 0.0, "learning_rate": 1.2706216595361797e-05, "loss": 1.1779, "step": 11000 }, { "epoch": 0.43043274121605757, "grad_norm": 0.0, "learning_rate": 1.2704996628258659e-05, "loss": 1.0959, "step": 11001 }, { "epoch": 0.430471867908287, "grad_norm": 0.0, "learning_rate": 1.2703776617716666e-05, "loss": 1.0409, "step": 11002 }, { "epoch": 0.43051099460051645, "grad_norm": 0.0, "learning_rate": 1.2702556563755406e-05, "loss": 1.163, "step": 11003 }, { "epoch": 0.4305501212927459, "grad_norm": 0.0, "learning_rate": 1.2701336466394476e-05, "loss": 1.114, "step": 11004 }, { "epoch": 0.43058924798497533, "grad_norm": 0.0, "learning_rate": 1.2700116325653464e-05, "loss": 1.0078, "step": 11005 }, { "epoch": 0.4306283746772048, "grad_norm": 0.0, "learning_rate": 1.269889614155197e-05, "loss": 1.0694, "step": 11006 }, { "epoch": 0.4306675013694342, "grad_norm": 0.0, "learning_rate": 1.2697675914109583e-05, "loss": 1.1631, "step": 11007 }, { "epoch": 0.43070662806166365, "grad_norm": 0.0, "learning_rate": 1.2696455643345906e-05, "loss": 1.1294, "step": 11008 }, { "epoch": 0.4307457547538931, "grad_norm": 0.0, "learning_rate": 1.2695235329280527e-05, "loss": 1.0935, "step": 11009 }, { "epoch": 0.43078488144612254, "grad_norm": 0.0, "learning_rate": 1.2694014971933047e-05, "loss": 1.1945, "step": 11010 }, { "epoch": 0.430824008138352, "grad_norm": 0.0, "learning_rate": 1.2692794571323064e-05, "loss": 1.0269, "step": 11011 }, { "epoch": 0.4308631348305814, "grad_norm": 0.0, "learning_rate": 1.269157412747017e-05, "loss": 1.0667, "step": 11012 }, { "epoch": 0.43090226152281086, "grad_norm": 0.0, "learning_rate": 1.2690353640393974e-05, "loss": 1.1419, "step": 11013 }, { "epoch": 0.4309413882150403, "grad_norm": 0.0, "learning_rate": 1.2689133110114065e-05, "loss": 0.9483, "step": 11014 }, { "epoch": 0.43098051490726974, "grad_norm": 0.0, "learning_rate": 1.2687912536650048e-05, "loss": 1.1192, "step": 11015 }, { "epoch": 0.4310196415994992, "grad_norm": 0.0, "learning_rate": 1.2686691920021526e-05, "loss": 1.0008, "step": 11016 }, { "epoch": 0.4310587682917286, "grad_norm": 0.0, "learning_rate": 1.2685471260248096e-05, "loss": 0.9835, "step": 11017 }, { "epoch": 0.43109789498395806, "grad_norm": 0.0, "learning_rate": 1.2684250557349365e-05, "loss": 1.1716, "step": 11018 }, { "epoch": 0.4311370216761875, "grad_norm": 0.0, "learning_rate": 1.2683029811344933e-05, "loss": 0.9968, "step": 11019 }, { "epoch": 0.43117614836841694, "grad_norm": 0.0, "learning_rate": 1.2681809022254404e-05, "loss": 1.0528, "step": 11020 }, { "epoch": 0.4312152750606464, "grad_norm": 0.0, "learning_rate": 1.2680588190097382e-05, "loss": 1.0245, "step": 11021 }, { "epoch": 0.4312544017528758, "grad_norm": 0.0, "learning_rate": 1.2679367314893476e-05, "loss": 1.0012, "step": 11022 }, { "epoch": 0.43129352844510527, "grad_norm": 0.0, "learning_rate": 1.2678146396662282e-05, "loss": 1.0949, "step": 11023 }, { "epoch": 0.4313326551373347, "grad_norm": 0.0, "learning_rate": 1.267692543542342e-05, "loss": 1.165, "step": 11024 }, { "epoch": 0.43137178182956415, "grad_norm": 0.0, "learning_rate": 1.2675704431196483e-05, "loss": 1.1055, "step": 11025 }, { "epoch": 0.4314109085217936, "grad_norm": 0.0, "learning_rate": 1.2674483384001091e-05, "loss": 1.2231, "step": 11026 }, { "epoch": 0.43145003521402303, "grad_norm": 0.0, "learning_rate": 1.2673262293856845e-05, "loss": 1.1746, "step": 11027 }, { "epoch": 0.43148916190625247, "grad_norm": 0.0, "learning_rate": 1.2672041160783354e-05, "loss": 1.0871, "step": 11028 }, { "epoch": 0.43152828859848186, "grad_norm": 0.0, "learning_rate": 1.2670819984800233e-05, "loss": 1.0954, "step": 11029 }, { "epoch": 0.4315674152907113, "grad_norm": 0.0, "learning_rate": 1.266959876592709e-05, "loss": 1.1228, "step": 11030 }, { "epoch": 0.43160654198294074, "grad_norm": 0.0, "learning_rate": 1.2668377504183537e-05, "loss": 1.07, "step": 11031 }, { "epoch": 0.4316456686751702, "grad_norm": 0.0, "learning_rate": 1.2667156199589183e-05, "loss": 1.0711, "step": 11032 }, { "epoch": 0.4316847953673996, "grad_norm": 0.0, "learning_rate": 1.2665934852163644e-05, "loss": 1.0912, "step": 11033 }, { "epoch": 0.43172392205962906, "grad_norm": 0.0, "learning_rate": 1.2664713461926533e-05, "loss": 1.0912, "step": 11034 }, { "epoch": 0.4317630487518585, "grad_norm": 0.0, "learning_rate": 1.2663492028897459e-05, "loss": 1.0704, "step": 11035 }, { "epoch": 0.43180217544408794, "grad_norm": 0.0, "learning_rate": 1.2662270553096043e-05, "loss": 1.1631, "step": 11036 }, { "epoch": 0.4318413021363174, "grad_norm": 0.0, "learning_rate": 1.2661049034541897e-05, "loss": 1.1113, "step": 11037 }, { "epoch": 0.4318804288285468, "grad_norm": 0.0, "learning_rate": 1.2659827473254637e-05, "loss": 1.0601, "step": 11038 }, { "epoch": 0.43191955552077627, "grad_norm": 0.0, "learning_rate": 1.2658605869253884e-05, "loss": 1.1124, "step": 11039 }, { "epoch": 0.4319586822130057, "grad_norm": 0.0, "learning_rate": 1.265738422255925e-05, "loss": 0.9903, "step": 11040 }, { "epoch": 0.43199780890523515, "grad_norm": 0.0, "learning_rate": 1.2656162533190354e-05, "loss": 1.1768, "step": 11041 }, { "epoch": 0.4320369355974646, "grad_norm": 0.0, "learning_rate": 1.2654940801166819e-05, "loss": 1.0791, "step": 11042 }, { "epoch": 0.43207606228969403, "grad_norm": 0.0, "learning_rate": 1.2653719026508258e-05, "loss": 1.0428, "step": 11043 }, { "epoch": 0.43211518898192347, "grad_norm": 0.0, "learning_rate": 1.2652497209234299e-05, "loss": 1.1625, "step": 11044 }, { "epoch": 0.4321543156741529, "grad_norm": 0.0, "learning_rate": 1.2651275349364553e-05, "loss": 0.9513, "step": 11045 }, { "epoch": 0.43219344236638235, "grad_norm": 0.0, "learning_rate": 1.2650053446918653e-05, "loss": 1.1328, "step": 11046 }, { "epoch": 0.4322325690586118, "grad_norm": 0.0, "learning_rate": 1.2648831501916212e-05, "loss": 0.8956, "step": 11047 }, { "epoch": 0.43227169575084123, "grad_norm": 0.0, "learning_rate": 1.2647609514376858e-05, "loss": 1.0944, "step": 11048 }, { "epoch": 0.4323108224430707, "grad_norm": 0.0, "learning_rate": 1.2646387484320211e-05, "loss": 1.1656, "step": 11049 }, { "epoch": 0.4323499491353001, "grad_norm": 0.0, "learning_rate": 1.2645165411765899e-05, "loss": 1.1763, "step": 11050 }, { "epoch": 0.43238907582752956, "grad_norm": 0.0, "learning_rate": 1.2643943296733545e-05, "loss": 1.1261, "step": 11051 }, { "epoch": 0.432428202519759, "grad_norm": 0.0, "learning_rate": 1.2642721139242773e-05, "loss": 1.1068, "step": 11052 }, { "epoch": 0.43246732921198844, "grad_norm": 0.0, "learning_rate": 1.2641498939313213e-05, "loss": 1.0533, "step": 11053 }, { "epoch": 0.4325064559042179, "grad_norm": 0.0, "learning_rate": 1.2640276696964487e-05, "loss": 1.1345, "step": 11054 }, { "epoch": 0.4325455825964473, "grad_norm": 0.0, "learning_rate": 1.2639054412216227e-05, "loss": 1.1002, "step": 11055 }, { "epoch": 0.43258470928867676, "grad_norm": 0.0, "learning_rate": 1.263783208508806e-05, "loss": 1.2067, "step": 11056 }, { "epoch": 0.4326238359809062, "grad_norm": 0.0, "learning_rate": 1.2636609715599617e-05, "loss": 1.2342, "step": 11057 }, { "epoch": 0.4326629626731356, "grad_norm": 0.0, "learning_rate": 1.2635387303770523e-05, "loss": 1.042, "step": 11058 }, { "epoch": 0.43270208936536503, "grad_norm": 0.0, "learning_rate": 1.2634164849620414e-05, "loss": 1.029, "step": 11059 }, { "epoch": 0.43274121605759447, "grad_norm": 0.0, "learning_rate": 1.2632942353168917e-05, "loss": 1.0508, "step": 11060 }, { "epoch": 0.4327803427498239, "grad_norm": 0.0, "learning_rate": 1.2631719814435664e-05, "loss": 1.04, "step": 11061 }, { "epoch": 0.43281946944205335, "grad_norm": 0.0, "learning_rate": 1.263049723344029e-05, "loss": 0.9339, "step": 11062 }, { "epoch": 0.4328585961342828, "grad_norm": 0.0, "learning_rate": 1.2629274610202427e-05, "loss": 1.2342, "step": 11063 }, { "epoch": 0.43289772282651223, "grad_norm": 0.0, "learning_rate": 1.2628051944741709e-05, "loss": 1.1349, "step": 11064 }, { "epoch": 0.4329368495187417, "grad_norm": 0.0, "learning_rate": 1.2626829237077766e-05, "loss": 1.1714, "step": 11065 }, { "epoch": 0.4329759762109711, "grad_norm": 0.0, "learning_rate": 1.2625606487230239e-05, "loss": 1.0874, "step": 11066 }, { "epoch": 0.43301510290320055, "grad_norm": 0.0, "learning_rate": 1.2624383695218762e-05, "loss": 1.0945, "step": 11067 }, { "epoch": 0.43305422959543, "grad_norm": 0.0, "learning_rate": 1.2623160861062976e-05, "loss": 1.0724, "step": 11068 }, { "epoch": 0.43309335628765944, "grad_norm": 0.0, "learning_rate": 1.2621937984782508e-05, "loss": 0.9877, "step": 11069 }, { "epoch": 0.4331324829798889, "grad_norm": 0.0, "learning_rate": 1.2620715066397002e-05, "loss": 0.9922, "step": 11070 }, { "epoch": 0.4331716096721183, "grad_norm": 0.0, "learning_rate": 1.2619492105926096e-05, "loss": 1.1637, "step": 11071 }, { "epoch": 0.43321073636434776, "grad_norm": 0.0, "learning_rate": 1.261826910338943e-05, "loss": 0.9438, "step": 11072 }, { "epoch": 0.4332498630565772, "grad_norm": 0.0, "learning_rate": 1.2617046058806645e-05, "loss": 1.1483, "step": 11073 }, { "epoch": 0.43328898974880664, "grad_norm": 0.0, "learning_rate": 1.2615822972197375e-05, "loss": 1.1512, "step": 11074 }, { "epoch": 0.4333281164410361, "grad_norm": 0.0, "learning_rate": 1.261459984358127e-05, "loss": 1.0882, "step": 11075 }, { "epoch": 0.4333672431332655, "grad_norm": 0.0, "learning_rate": 1.2613376672977968e-05, "loss": 1.2028, "step": 11076 }, { "epoch": 0.43340636982549496, "grad_norm": 0.0, "learning_rate": 1.2612153460407109e-05, "loss": 1.0078, "step": 11077 }, { "epoch": 0.4334454965177244, "grad_norm": 0.0, "learning_rate": 1.2610930205888341e-05, "loss": 1.0672, "step": 11078 }, { "epoch": 0.43348462320995385, "grad_norm": 0.0, "learning_rate": 1.2609706909441303e-05, "loss": 1.0926, "step": 11079 }, { "epoch": 0.4335237499021833, "grad_norm": 0.0, "learning_rate": 1.2608483571085644e-05, "loss": 1.1689, "step": 11080 }, { "epoch": 0.4335628765944127, "grad_norm": 0.0, "learning_rate": 1.2607260190841007e-05, "loss": 1.1344, "step": 11081 }, { "epoch": 0.43360200328664217, "grad_norm": 0.0, "learning_rate": 1.260603676872704e-05, "loss": 1.054, "step": 11082 }, { "epoch": 0.4336411299788716, "grad_norm": 0.0, "learning_rate": 1.2604813304763383e-05, "loss": 1.046, "step": 11083 }, { "epoch": 0.43368025667110105, "grad_norm": 0.0, "learning_rate": 1.2603589798969693e-05, "loss": 1.0695, "step": 11084 }, { "epoch": 0.4337193833633305, "grad_norm": 0.0, "learning_rate": 1.2602366251365613e-05, "loss": 1.0017, "step": 11085 }, { "epoch": 0.4337585100555599, "grad_norm": 0.0, "learning_rate": 1.2601142661970789e-05, "loss": 1.0153, "step": 11086 }, { "epoch": 0.4337976367477893, "grad_norm": 0.0, "learning_rate": 1.2599919030804875e-05, "loss": 1.0852, "step": 11087 }, { "epoch": 0.43383676344001876, "grad_norm": 0.0, "learning_rate": 1.259869535788752e-05, "loss": 1.0906, "step": 11088 }, { "epoch": 0.4338758901322482, "grad_norm": 0.0, "learning_rate": 1.2597471643238372e-05, "loss": 1.0932, "step": 11089 }, { "epoch": 0.43391501682447764, "grad_norm": 0.0, "learning_rate": 1.2596247886877086e-05, "loss": 0.9979, "step": 11090 }, { "epoch": 0.4339541435167071, "grad_norm": 0.0, "learning_rate": 1.2595024088823313e-05, "loss": 0.9064, "step": 11091 }, { "epoch": 0.4339932702089365, "grad_norm": 0.0, "learning_rate": 1.2593800249096702e-05, "loss": 1.1193, "step": 11092 }, { "epoch": 0.43403239690116596, "grad_norm": 0.0, "learning_rate": 1.259257636771691e-05, "loss": 1.1232, "step": 11093 }, { "epoch": 0.4340715235933954, "grad_norm": 0.0, "learning_rate": 1.2591352444703591e-05, "loss": 1.2031, "step": 11094 }, { "epoch": 0.43411065028562484, "grad_norm": 0.0, "learning_rate": 1.25901284800764e-05, "loss": 0.9991, "step": 11095 }, { "epoch": 0.4341497769778543, "grad_norm": 0.0, "learning_rate": 1.2588904473854988e-05, "loss": 1.1971, "step": 11096 }, { "epoch": 0.4341889036700837, "grad_norm": 0.0, "learning_rate": 1.258768042605902e-05, "loss": 1.0622, "step": 11097 }, { "epoch": 0.43422803036231317, "grad_norm": 0.0, "learning_rate": 1.2586456336708141e-05, "loss": 1.1382, "step": 11098 }, { "epoch": 0.4342671570545426, "grad_norm": 0.0, "learning_rate": 1.2585232205822015e-05, "loss": 1.0647, "step": 11099 }, { "epoch": 0.43430628374677205, "grad_norm": 0.0, "learning_rate": 1.2584008033420304e-05, "loss": 1.0985, "step": 11100 }, { "epoch": 0.4343454104390015, "grad_norm": 0.0, "learning_rate": 1.2582783819522656e-05, "loss": 1.0086, "step": 11101 }, { "epoch": 0.43438453713123093, "grad_norm": 0.0, "learning_rate": 1.258155956414874e-05, "loss": 1.1724, "step": 11102 }, { "epoch": 0.43442366382346037, "grad_norm": 0.0, "learning_rate": 1.2580335267318209e-05, "loss": 1.1188, "step": 11103 }, { "epoch": 0.4344627905156898, "grad_norm": 0.0, "learning_rate": 1.2579110929050731e-05, "loss": 1.1005, "step": 11104 }, { "epoch": 0.43450191720791925, "grad_norm": 0.0, "learning_rate": 1.2577886549365958e-05, "loss": 0.9999, "step": 11105 }, { "epoch": 0.4345410439001487, "grad_norm": 0.0, "learning_rate": 1.2576662128283564e-05, "loss": 1.0982, "step": 11106 }, { "epoch": 0.43458017059237813, "grad_norm": 0.0, "learning_rate": 1.25754376658232e-05, "loss": 1.0394, "step": 11107 }, { "epoch": 0.4346192972846076, "grad_norm": 0.0, "learning_rate": 1.2574213162004536e-05, "loss": 0.9924, "step": 11108 }, { "epoch": 0.434658423976837, "grad_norm": 0.0, "learning_rate": 1.2572988616847234e-05, "loss": 1.0804, "step": 11109 }, { "epoch": 0.43469755066906646, "grad_norm": 0.0, "learning_rate": 1.2571764030370958e-05, "loss": 1.008, "step": 11110 }, { "epoch": 0.4347366773612959, "grad_norm": 0.0, "learning_rate": 1.257053940259538e-05, "loss": 0.9752, "step": 11111 }, { "epoch": 0.43477580405352534, "grad_norm": 0.0, "learning_rate": 1.2569314733540153e-05, "loss": 1.1564, "step": 11112 }, { "epoch": 0.4348149307457548, "grad_norm": 0.0, "learning_rate": 1.2568090023224952e-05, "loss": 1.1071, "step": 11113 }, { "epoch": 0.4348540574379842, "grad_norm": 0.0, "learning_rate": 1.2566865271669446e-05, "loss": 1.0349, "step": 11114 }, { "epoch": 0.4348931841302136, "grad_norm": 0.0, "learning_rate": 1.2565640478893299e-05, "loss": 0.891, "step": 11115 }, { "epoch": 0.43493231082244305, "grad_norm": 0.0, "learning_rate": 1.2564415644916179e-05, "loss": 1.0808, "step": 11116 }, { "epoch": 0.4349714375146725, "grad_norm": 0.0, "learning_rate": 1.256319076975776e-05, "loss": 1.0225, "step": 11117 }, { "epoch": 0.43501056420690193, "grad_norm": 0.0, "learning_rate": 1.256196585343771e-05, "loss": 0.9223, "step": 11118 }, { "epoch": 0.43504969089913137, "grad_norm": 0.0, "learning_rate": 1.2560740895975694e-05, "loss": 1.1313, "step": 11119 }, { "epoch": 0.4350888175913608, "grad_norm": 0.0, "learning_rate": 1.2559515897391392e-05, "loss": 0.9194, "step": 11120 }, { "epoch": 0.43512794428359025, "grad_norm": 0.0, "learning_rate": 1.2558290857704472e-05, "loss": 0.9732, "step": 11121 }, { "epoch": 0.4351670709758197, "grad_norm": 0.0, "learning_rate": 1.2557065776934604e-05, "loss": 1.0432, "step": 11122 }, { "epoch": 0.43520619766804913, "grad_norm": 0.0, "learning_rate": 1.2555840655101465e-05, "loss": 1.1086, "step": 11123 }, { "epoch": 0.4352453243602786, "grad_norm": 0.0, "learning_rate": 1.2554615492224731e-05, "loss": 1.0469, "step": 11124 }, { "epoch": 0.435284451052508, "grad_norm": 0.0, "learning_rate": 1.2553390288324067e-05, "loss": 1.0204, "step": 11125 }, { "epoch": 0.43532357774473746, "grad_norm": 0.0, "learning_rate": 1.255216504341916e-05, "loss": 0.9179, "step": 11126 }, { "epoch": 0.4353627044369669, "grad_norm": 0.0, "learning_rate": 1.2550939757529678e-05, "loss": 1.08, "step": 11127 }, { "epoch": 0.43540183112919634, "grad_norm": 0.0, "learning_rate": 1.2549714430675299e-05, "loss": 1.0318, "step": 11128 }, { "epoch": 0.4354409578214258, "grad_norm": 0.0, "learning_rate": 1.2548489062875705e-05, "loss": 1.0738, "step": 11129 }, { "epoch": 0.4354800845136552, "grad_norm": 0.0, "learning_rate": 1.2547263654150565e-05, "loss": 0.9485, "step": 11130 }, { "epoch": 0.43551921120588466, "grad_norm": 0.0, "learning_rate": 1.2546038204519567e-05, "loss": 1.1277, "step": 11131 }, { "epoch": 0.4355583378981141, "grad_norm": 0.0, "learning_rate": 1.2544812714002381e-05, "loss": 1.1212, "step": 11132 }, { "epoch": 0.43559746459034354, "grad_norm": 0.0, "learning_rate": 1.2543587182618695e-05, "loss": 1.0975, "step": 11133 }, { "epoch": 0.435636591282573, "grad_norm": 0.0, "learning_rate": 1.2542361610388185e-05, "loss": 1.0438, "step": 11134 }, { "epoch": 0.4356757179748024, "grad_norm": 0.0, "learning_rate": 1.2541135997330534e-05, "loss": 1.1216, "step": 11135 }, { "epoch": 0.43571484466703186, "grad_norm": 0.0, "learning_rate": 1.2539910343465422e-05, "loss": 1.1628, "step": 11136 }, { "epoch": 0.4357539713592613, "grad_norm": 0.0, "learning_rate": 1.2538684648812535e-05, "loss": 1.0253, "step": 11137 }, { "epoch": 0.43579309805149075, "grad_norm": 0.0, "learning_rate": 1.2537458913391553e-05, "loss": 1.024, "step": 11138 }, { "epoch": 0.4358322247437202, "grad_norm": 0.0, "learning_rate": 1.2536233137222159e-05, "loss": 1.1237, "step": 11139 }, { "epoch": 0.4358713514359496, "grad_norm": 0.0, "learning_rate": 1.2535007320324039e-05, "loss": 1.0467, "step": 11140 }, { "epoch": 0.43591047812817907, "grad_norm": 0.0, "learning_rate": 1.2533781462716879e-05, "loss": 1.0858, "step": 11141 }, { "epoch": 0.4359496048204085, "grad_norm": 0.0, "learning_rate": 1.2532555564420363e-05, "loss": 1.0187, "step": 11142 }, { "epoch": 0.4359887315126379, "grad_norm": 0.0, "learning_rate": 1.2531329625454179e-05, "loss": 1.157, "step": 11143 }, { "epoch": 0.43602785820486734, "grad_norm": 0.0, "learning_rate": 1.2530103645838011e-05, "loss": 1.0712, "step": 11144 }, { "epoch": 0.4360669848970968, "grad_norm": 0.0, "learning_rate": 1.2528877625591552e-05, "loss": 1.1025, "step": 11145 }, { "epoch": 0.4361061115893262, "grad_norm": 0.0, "learning_rate": 1.2527651564734487e-05, "loss": 0.9718, "step": 11146 }, { "epoch": 0.43614523828155566, "grad_norm": 0.0, "learning_rate": 1.2526425463286503e-05, "loss": 1.1348, "step": 11147 }, { "epoch": 0.4361843649737851, "grad_norm": 0.0, "learning_rate": 1.2525199321267292e-05, "loss": 1.0074, "step": 11148 }, { "epoch": 0.43622349166601454, "grad_norm": 0.0, "learning_rate": 1.2523973138696546e-05, "loss": 1.1091, "step": 11149 }, { "epoch": 0.436262618358244, "grad_norm": 0.0, "learning_rate": 1.2522746915593951e-05, "loss": 1.1015, "step": 11150 }, { "epoch": 0.4363017450504734, "grad_norm": 0.0, "learning_rate": 1.2521520651979205e-05, "loss": 1.1057, "step": 11151 }, { "epoch": 0.43634087174270286, "grad_norm": 0.0, "learning_rate": 1.2520294347871993e-05, "loss": 1.2505, "step": 11152 }, { "epoch": 0.4363799984349323, "grad_norm": 0.0, "learning_rate": 1.2519068003292017e-05, "loss": 1.0417, "step": 11153 }, { "epoch": 0.43641912512716174, "grad_norm": 0.0, "learning_rate": 1.2517841618258961e-05, "loss": 1.0262, "step": 11154 }, { "epoch": 0.4364582518193912, "grad_norm": 0.0, "learning_rate": 1.2516615192792524e-05, "loss": 1.1627, "step": 11155 }, { "epoch": 0.4364973785116206, "grad_norm": 0.0, "learning_rate": 1.2515388726912406e-05, "loss": 1.0065, "step": 11156 }, { "epoch": 0.43653650520385007, "grad_norm": 0.0, "learning_rate": 1.251416222063829e-05, "loss": 0.9927, "step": 11157 }, { "epoch": 0.4365756318960795, "grad_norm": 0.0, "learning_rate": 1.2512935673989884e-05, "loss": 1.0338, "step": 11158 }, { "epoch": 0.43661475858830895, "grad_norm": 0.0, "learning_rate": 1.2511709086986876e-05, "loss": 1.1105, "step": 11159 }, { "epoch": 0.4366538852805384, "grad_norm": 0.0, "learning_rate": 1.2510482459648972e-05, "loss": 1.1051, "step": 11160 }, { "epoch": 0.43669301197276783, "grad_norm": 0.0, "learning_rate": 1.2509255791995863e-05, "loss": 1.1708, "step": 11161 }, { "epoch": 0.43673213866499727, "grad_norm": 0.0, "learning_rate": 1.2508029084047251e-05, "loss": 1.0053, "step": 11162 }, { "epoch": 0.4367712653572267, "grad_norm": 0.0, "learning_rate": 1.2506802335822831e-05, "loss": 1.1135, "step": 11163 }, { "epoch": 0.43681039204945615, "grad_norm": 0.0, "learning_rate": 1.250557554734231e-05, "loss": 1.1047, "step": 11164 }, { "epoch": 0.4368495187416856, "grad_norm": 0.0, "learning_rate": 1.2504348718625385e-05, "loss": 1.054, "step": 11165 }, { "epoch": 0.43688864543391503, "grad_norm": 0.0, "learning_rate": 1.2503121849691758e-05, "loss": 0.9965, "step": 11166 }, { "epoch": 0.4369277721261445, "grad_norm": 0.0, "learning_rate": 1.2501894940561133e-05, "loss": 1.0362, "step": 11167 }, { "epoch": 0.4369668988183739, "grad_norm": 0.0, "learning_rate": 1.2500667991253205e-05, "loss": 1.0319, "step": 11168 }, { "epoch": 0.43700602551060336, "grad_norm": 0.0, "learning_rate": 1.2499441001787687e-05, "loss": 1.0436, "step": 11169 }, { "epoch": 0.4370451522028328, "grad_norm": 0.0, "learning_rate": 1.2498213972184277e-05, "loss": 1.1807, "step": 11170 }, { "epoch": 0.4370842788950622, "grad_norm": 0.0, "learning_rate": 1.2496986902462683e-05, "loss": 1.0802, "step": 11171 }, { "epoch": 0.4371234055872916, "grad_norm": 0.0, "learning_rate": 1.2495759792642603e-05, "loss": 1.136, "step": 11172 }, { "epoch": 0.43716253227952107, "grad_norm": 0.0, "learning_rate": 1.2494532642743753e-05, "loss": 1.087, "step": 11173 }, { "epoch": 0.4372016589717505, "grad_norm": 0.0, "learning_rate": 1.249330545278583e-05, "loss": 1.0092, "step": 11174 }, { "epoch": 0.43724078566397995, "grad_norm": 0.0, "learning_rate": 1.249207822278855e-05, "loss": 0.9903, "step": 11175 }, { "epoch": 0.4372799123562094, "grad_norm": 0.0, "learning_rate": 1.2490850952771617e-05, "loss": 1.1094, "step": 11176 }, { "epoch": 0.43731903904843883, "grad_norm": 0.0, "learning_rate": 1.2489623642754736e-05, "loss": 1.059, "step": 11177 }, { "epoch": 0.43735816574066827, "grad_norm": 0.0, "learning_rate": 1.248839629275762e-05, "loss": 1.0119, "step": 11178 }, { "epoch": 0.4373972924328977, "grad_norm": 0.0, "learning_rate": 1.2487168902799976e-05, "loss": 1.0246, "step": 11179 }, { "epoch": 0.43743641912512715, "grad_norm": 0.0, "learning_rate": 1.2485941472901519e-05, "loss": 1.0522, "step": 11180 }, { "epoch": 0.4374755458173566, "grad_norm": 0.0, "learning_rate": 1.2484714003081954e-05, "loss": 1.1778, "step": 11181 }, { "epoch": 0.43751467250958603, "grad_norm": 0.0, "learning_rate": 1.2483486493360996e-05, "loss": 1.1464, "step": 11182 }, { "epoch": 0.4375537992018155, "grad_norm": 0.0, "learning_rate": 1.248225894375836e-05, "loss": 1.0792, "step": 11183 }, { "epoch": 0.4375929258940449, "grad_norm": 0.0, "learning_rate": 1.2481031354293754e-05, "loss": 1.1031, "step": 11184 }, { "epoch": 0.43763205258627436, "grad_norm": 0.0, "learning_rate": 1.2479803724986894e-05, "loss": 0.9165, "step": 11185 }, { "epoch": 0.4376711792785038, "grad_norm": 0.0, "learning_rate": 1.2478576055857492e-05, "loss": 1.137, "step": 11186 }, { "epoch": 0.43771030597073324, "grad_norm": 0.0, "learning_rate": 1.2477348346925264e-05, "loss": 1.0817, "step": 11187 }, { "epoch": 0.4377494326629627, "grad_norm": 0.0, "learning_rate": 1.2476120598209926e-05, "loss": 1.166, "step": 11188 }, { "epoch": 0.4377885593551921, "grad_norm": 0.0, "learning_rate": 1.2474892809731196e-05, "loss": 1.0325, "step": 11189 }, { "epoch": 0.43782768604742156, "grad_norm": 0.0, "learning_rate": 1.2473664981508786e-05, "loss": 0.9876, "step": 11190 }, { "epoch": 0.437866812739651, "grad_norm": 0.0, "learning_rate": 1.247243711356242e-05, "loss": 0.9878, "step": 11191 }, { "epoch": 0.43790593943188044, "grad_norm": 0.0, "learning_rate": 1.247120920591181e-05, "loss": 1.0951, "step": 11192 }, { "epoch": 0.4379450661241099, "grad_norm": 0.0, "learning_rate": 1.2469981258576676e-05, "loss": 1.1052, "step": 11193 }, { "epoch": 0.4379841928163393, "grad_norm": 0.0, "learning_rate": 1.2468753271576737e-05, "loss": 0.9444, "step": 11194 }, { "epoch": 0.43802331950856876, "grad_norm": 0.0, "learning_rate": 1.2467525244931717e-05, "loss": 1.0938, "step": 11195 }, { "epoch": 0.4380624462007982, "grad_norm": 0.0, "learning_rate": 1.2466297178661332e-05, "loss": 1.0363, "step": 11196 }, { "epoch": 0.43810157289302765, "grad_norm": 0.0, "learning_rate": 1.2465069072785304e-05, "loss": 1.0993, "step": 11197 }, { "epoch": 0.4381406995852571, "grad_norm": 0.0, "learning_rate": 1.2463840927323358e-05, "loss": 1.0925, "step": 11198 }, { "epoch": 0.43817982627748653, "grad_norm": 0.0, "learning_rate": 1.246261274229521e-05, "loss": 1.0565, "step": 11199 }, { "epoch": 0.4382189529697159, "grad_norm": 0.0, "learning_rate": 1.2461384517720592e-05, "loss": 0.9464, "step": 11200 }, { "epoch": 0.43825807966194535, "grad_norm": 0.0, "learning_rate": 1.2460156253619218e-05, "loss": 0.9592, "step": 11201 }, { "epoch": 0.4382972063541748, "grad_norm": 0.0, "learning_rate": 1.2458927950010821e-05, "loss": 1.0578, "step": 11202 }, { "epoch": 0.43833633304640424, "grad_norm": 0.0, "learning_rate": 1.245769960691512e-05, "loss": 1.2245, "step": 11203 }, { "epoch": 0.4383754597386337, "grad_norm": 0.0, "learning_rate": 1.2456471224351847e-05, "loss": 1.0995, "step": 11204 }, { "epoch": 0.4384145864308631, "grad_norm": 0.0, "learning_rate": 1.2455242802340721e-05, "loss": 0.9635, "step": 11205 }, { "epoch": 0.43845371312309256, "grad_norm": 0.0, "learning_rate": 1.2454014340901472e-05, "loss": 1.0962, "step": 11206 }, { "epoch": 0.438492839815322, "grad_norm": 0.0, "learning_rate": 1.2452785840053829e-05, "loss": 1.1951, "step": 11207 }, { "epoch": 0.43853196650755144, "grad_norm": 0.0, "learning_rate": 1.2451557299817519e-05, "loss": 1.0186, "step": 11208 }, { "epoch": 0.4385710931997809, "grad_norm": 0.0, "learning_rate": 1.245032872021227e-05, "loss": 1.0833, "step": 11209 }, { "epoch": 0.4386102198920103, "grad_norm": 0.0, "learning_rate": 1.2449100101257812e-05, "loss": 0.9848, "step": 11210 }, { "epoch": 0.43864934658423976, "grad_norm": 0.0, "learning_rate": 1.2447871442973876e-05, "loss": 0.9724, "step": 11211 }, { "epoch": 0.4386884732764692, "grad_norm": 0.0, "learning_rate": 1.2446642745380192e-05, "loss": 0.9958, "step": 11212 }, { "epoch": 0.43872759996869864, "grad_norm": 0.0, "learning_rate": 1.2445414008496492e-05, "loss": 1.0258, "step": 11213 }, { "epoch": 0.4387667266609281, "grad_norm": 0.0, "learning_rate": 1.2444185232342505e-05, "loss": 1.0832, "step": 11214 }, { "epoch": 0.4388058533531575, "grad_norm": 0.0, "learning_rate": 1.2442956416937967e-05, "loss": 1.1117, "step": 11215 }, { "epoch": 0.43884498004538697, "grad_norm": 0.0, "learning_rate": 1.2441727562302612e-05, "loss": 1.0249, "step": 11216 }, { "epoch": 0.4388841067376164, "grad_norm": 0.0, "learning_rate": 1.2440498668456169e-05, "loss": 1.0551, "step": 11217 }, { "epoch": 0.43892323342984585, "grad_norm": 0.0, "learning_rate": 1.2439269735418377e-05, "loss": 1.2086, "step": 11218 }, { "epoch": 0.4389623601220753, "grad_norm": 0.0, "learning_rate": 1.2438040763208967e-05, "loss": 1.1995, "step": 11219 }, { "epoch": 0.43900148681430473, "grad_norm": 0.0, "learning_rate": 1.2436811751847682e-05, "loss": 1.1395, "step": 11220 }, { "epoch": 0.43904061350653417, "grad_norm": 0.0, "learning_rate": 1.243558270135425e-05, "loss": 1.017, "step": 11221 }, { "epoch": 0.4390797401987636, "grad_norm": 0.0, "learning_rate": 1.2434353611748415e-05, "loss": 1.0816, "step": 11222 }, { "epoch": 0.43911886689099305, "grad_norm": 0.0, "learning_rate": 1.2433124483049907e-05, "loss": 1.1348, "step": 11223 }, { "epoch": 0.4391579935832225, "grad_norm": 0.0, "learning_rate": 1.2431895315278473e-05, "loss": 1.0285, "step": 11224 }, { "epoch": 0.43919712027545194, "grad_norm": 0.0, "learning_rate": 1.2430666108453848e-05, "loss": 1.062, "step": 11225 }, { "epoch": 0.4392362469676814, "grad_norm": 0.0, "learning_rate": 1.2429436862595767e-05, "loss": 0.9428, "step": 11226 }, { "epoch": 0.4392753736599108, "grad_norm": 0.0, "learning_rate": 1.2428207577723977e-05, "loss": 1.0379, "step": 11227 }, { "epoch": 0.4393145003521402, "grad_norm": 0.0, "learning_rate": 1.2426978253858215e-05, "loss": 1.0898, "step": 11228 }, { "epoch": 0.43935362704436964, "grad_norm": 0.0, "learning_rate": 1.2425748891018223e-05, "loss": 1.1241, "step": 11229 }, { "epoch": 0.4393927537365991, "grad_norm": 0.0, "learning_rate": 1.2424519489223743e-05, "loss": 1.1021, "step": 11230 }, { "epoch": 0.4394318804288285, "grad_norm": 0.0, "learning_rate": 1.2423290048494521e-05, "loss": 1.0667, "step": 11231 }, { "epoch": 0.43947100712105797, "grad_norm": 0.0, "learning_rate": 1.2422060568850293e-05, "loss": 1.0898, "step": 11232 }, { "epoch": 0.4395101338132874, "grad_norm": 0.0, "learning_rate": 1.2420831050310812e-05, "loss": 0.9332, "step": 11233 }, { "epoch": 0.43954926050551685, "grad_norm": 0.0, "learning_rate": 1.2419601492895816e-05, "loss": 1.0325, "step": 11234 }, { "epoch": 0.4395883871977463, "grad_norm": 0.0, "learning_rate": 1.2418371896625053e-05, "loss": 1.0641, "step": 11235 }, { "epoch": 0.43962751388997573, "grad_norm": 0.0, "learning_rate": 1.2417142261518265e-05, "loss": 1.1228, "step": 11236 }, { "epoch": 0.43966664058220517, "grad_norm": 0.0, "learning_rate": 1.2415912587595202e-05, "loss": 0.9694, "step": 11237 }, { "epoch": 0.4397057672744346, "grad_norm": 0.0, "learning_rate": 1.2414682874875612e-05, "loss": 1.092, "step": 11238 }, { "epoch": 0.43974489396666405, "grad_norm": 0.0, "learning_rate": 1.2413453123379238e-05, "loss": 1.1332, "step": 11239 }, { "epoch": 0.4397840206588935, "grad_norm": 0.0, "learning_rate": 1.2412223333125833e-05, "loss": 1.0979, "step": 11240 }, { "epoch": 0.43982314735112293, "grad_norm": 0.0, "learning_rate": 1.2410993504135143e-05, "loss": 0.9946, "step": 11241 }, { "epoch": 0.4398622740433524, "grad_norm": 0.0, "learning_rate": 1.2409763636426919e-05, "loss": 1.0765, "step": 11242 }, { "epoch": 0.4399014007355818, "grad_norm": 0.0, "learning_rate": 1.240853373002091e-05, "loss": 1.1046, "step": 11243 }, { "epoch": 0.43994052742781126, "grad_norm": 0.0, "learning_rate": 1.2407303784936868e-05, "loss": 1.0563, "step": 11244 }, { "epoch": 0.4399796541200407, "grad_norm": 0.0, "learning_rate": 1.2406073801194546e-05, "loss": 0.9787, "step": 11245 }, { "epoch": 0.44001878081227014, "grad_norm": 0.0, "learning_rate": 1.2404843778813689e-05, "loss": 1.0278, "step": 11246 }, { "epoch": 0.4400579075044996, "grad_norm": 0.0, "learning_rate": 1.2403613717814058e-05, "loss": 1.1056, "step": 11247 }, { "epoch": 0.440097034196729, "grad_norm": 0.0, "learning_rate": 1.24023836182154e-05, "loss": 0.9845, "step": 11248 }, { "epoch": 0.44013616088895846, "grad_norm": 0.0, "learning_rate": 1.2401153480037473e-05, "loss": 0.9919, "step": 11249 }, { "epoch": 0.4401752875811879, "grad_norm": 0.0, "learning_rate": 1.2399923303300028e-05, "loss": 1.1078, "step": 11250 }, { "epoch": 0.44021441427341734, "grad_norm": 0.0, "learning_rate": 1.2398693088022827e-05, "loss": 1.0676, "step": 11251 }, { "epoch": 0.4402535409656468, "grad_norm": 0.0, "learning_rate": 1.2397462834225618e-05, "loss": 1.0704, "step": 11252 }, { "epoch": 0.4402926676578762, "grad_norm": 0.0, "learning_rate": 1.2396232541928157e-05, "loss": 1.2204, "step": 11253 }, { "epoch": 0.44033179435010567, "grad_norm": 0.0, "learning_rate": 1.2395002211150207e-05, "loss": 1.0214, "step": 11254 }, { "epoch": 0.4403709210423351, "grad_norm": 0.0, "learning_rate": 1.2393771841911524e-05, "loss": 1.0669, "step": 11255 }, { "epoch": 0.44041004773456455, "grad_norm": 0.0, "learning_rate": 1.2392541434231861e-05, "loss": 1.1148, "step": 11256 }, { "epoch": 0.44044917442679393, "grad_norm": 0.0, "learning_rate": 1.2391310988130983e-05, "loss": 1.0071, "step": 11257 }, { "epoch": 0.4404883011190234, "grad_norm": 0.0, "learning_rate": 1.2390080503628647e-05, "loss": 1.0244, "step": 11258 }, { "epoch": 0.4405274278112528, "grad_norm": 0.0, "learning_rate": 1.2388849980744613e-05, "loss": 1.227, "step": 11259 }, { "epoch": 0.44056655450348226, "grad_norm": 0.0, "learning_rate": 1.2387619419498642e-05, "loss": 1.086, "step": 11260 }, { "epoch": 0.4406056811957117, "grad_norm": 0.0, "learning_rate": 1.2386388819910493e-05, "loss": 1.1632, "step": 11261 }, { "epoch": 0.44064480788794114, "grad_norm": 0.0, "learning_rate": 1.2385158181999933e-05, "loss": 1.1094, "step": 11262 }, { "epoch": 0.4406839345801706, "grad_norm": 0.0, "learning_rate": 1.238392750578672e-05, "loss": 1.0314, "step": 11263 }, { "epoch": 0.4407230612724, "grad_norm": 0.0, "learning_rate": 1.2382696791290615e-05, "loss": 1.0054, "step": 11264 }, { "epoch": 0.44076218796462946, "grad_norm": 0.0, "learning_rate": 1.2381466038531388e-05, "loss": 0.9568, "step": 11265 }, { "epoch": 0.4408013146568589, "grad_norm": 0.0, "learning_rate": 1.23802352475288e-05, "loss": 1.0355, "step": 11266 }, { "epoch": 0.44084044134908834, "grad_norm": 0.0, "learning_rate": 1.237900441830262e-05, "loss": 1.1773, "step": 11267 }, { "epoch": 0.4408795680413178, "grad_norm": 0.0, "learning_rate": 1.2377773550872605e-05, "loss": 1.1409, "step": 11268 }, { "epoch": 0.4409186947335472, "grad_norm": 0.0, "learning_rate": 1.237654264525853e-05, "loss": 0.9655, "step": 11269 }, { "epoch": 0.44095782142577666, "grad_norm": 0.0, "learning_rate": 1.2375311701480156e-05, "loss": 0.9987, "step": 11270 }, { "epoch": 0.4409969481180061, "grad_norm": 0.0, "learning_rate": 1.2374080719557253e-05, "loss": 0.975, "step": 11271 }, { "epoch": 0.44103607481023555, "grad_norm": 0.0, "learning_rate": 1.237284969950959e-05, "loss": 1.0909, "step": 11272 }, { "epoch": 0.441075201502465, "grad_norm": 0.0, "learning_rate": 1.2371618641356933e-05, "loss": 1.161, "step": 11273 }, { "epoch": 0.4411143281946944, "grad_norm": 0.0, "learning_rate": 1.2370387545119052e-05, "loss": 1.0427, "step": 11274 }, { "epoch": 0.44115345488692387, "grad_norm": 0.0, "learning_rate": 1.2369156410815717e-05, "loss": 1.0964, "step": 11275 }, { "epoch": 0.4411925815791533, "grad_norm": 0.0, "learning_rate": 1.23679252384667e-05, "loss": 1.0244, "step": 11276 }, { "epoch": 0.44123170827138275, "grad_norm": 0.0, "learning_rate": 1.236669402809177e-05, "loss": 0.9044, "step": 11277 }, { "epoch": 0.4412708349636122, "grad_norm": 0.0, "learning_rate": 1.2365462779710699e-05, "loss": 1.1607, "step": 11278 }, { "epoch": 0.44130996165584163, "grad_norm": 0.0, "learning_rate": 1.2364231493343262e-05, "loss": 1.0038, "step": 11279 }, { "epoch": 0.4413490883480711, "grad_norm": 0.0, "learning_rate": 1.2363000169009228e-05, "loss": 0.9369, "step": 11280 }, { "epoch": 0.4413882150403005, "grad_norm": 0.0, "learning_rate": 1.2361768806728372e-05, "loss": 1.0442, "step": 11281 }, { "epoch": 0.44142734173252995, "grad_norm": 0.0, "learning_rate": 1.236053740652047e-05, "loss": 1.1556, "step": 11282 }, { "epoch": 0.4414664684247594, "grad_norm": 0.0, "learning_rate": 1.2359305968405295e-05, "loss": 1.0883, "step": 11283 }, { "epoch": 0.44150559511698884, "grad_norm": 0.0, "learning_rate": 1.235807449240262e-05, "loss": 0.9235, "step": 11284 }, { "epoch": 0.4415447218092182, "grad_norm": 0.0, "learning_rate": 1.2356842978532227e-05, "loss": 1.0449, "step": 11285 }, { "epoch": 0.44158384850144766, "grad_norm": 0.0, "learning_rate": 1.2355611426813886e-05, "loss": 0.9857, "step": 11286 }, { "epoch": 0.4416229751936771, "grad_norm": 0.0, "learning_rate": 1.2354379837267378e-05, "loss": 1.1087, "step": 11287 }, { "epoch": 0.44166210188590654, "grad_norm": 0.0, "learning_rate": 1.235314820991248e-05, "loss": 1.1487, "step": 11288 }, { "epoch": 0.441701228578136, "grad_norm": 0.0, "learning_rate": 1.2351916544768972e-05, "loss": 1.0397, "step": 11289 }, { "epoch": 0.4417403552703654, "grad_norm": 0.0, "learning_rate": 1.235068484185663e-05, "loss": 1.0497, "step": 11290 }, { "epoch": 0.44177948196259487, "grad_norm": 0.0, "learning_rate": 1.2349453101195237e-05, "loss": 1.093, "step": 11291 }, { "epoch": 0.4418186086548243, "grad_norm": 0.0, "learning_rate": 1.234822132280457e-05, "loss": 0.9159, "step": 11292 }, { "epoch": 0.44185773534705375, "grad_norm": 0.0, "learning_rate": 1.234698950670441e-05, "loss": 1.1516, "step": 11293 }, { "epoch": 0.4418968620392832, "grad_norm": 0.0, "learning_rate": 1.2345757652914541e-05, "loss": 1.1576, "step": 11294 }, { "epoch": 0.44193598873151263, "grad_norm": 0.0, "learning_rate": 1.2344525761454742e-05, "loss": 0.9969, "step": 11295 }, { "epoch": 0.44197511542374207, "grad_norm": 0.0, "learning_rate": 1.2343293832344798e-05, "loss": 1.0889, "step": 11296 }, { "epoch": 0.4420142421159715, "grad_norm": 0.0, "learning_rate": 1.2342061865604492e-05, "loss": 1.0788, "step": 11297 }, { "epoch": 0.44205336880820095, "grad_norm": 0.0, "learning_rate": 1.2340829861253605e-05, "loss": 1.1962, "step": 11298 }, { "epoch": 0.4420924955004304, "grad_norm": 0.0, "learning_rate": 1.2339597819311925e-05, "loss": 1.0164, "step": 11299 }, { "epoch": 0.44213162219265983, "grad_norm": 0.0, "learning_rate": 1.2338365739799236e-05, "loss": 0.9669, "step": 11300 }, { "epoch": 0.4421707488848893, "grad_norm": 0.0, "learning_rate": 1.2337133622735324e-05, "loss": 1.0499, "step": 11301 }, { "epoch": 0.4422098755771187, "grad_norm": 0.0, "learning_rate": 1.2335901468139974e-05, "loss": 0.941, "step": 11302 }, { "epoch": 0.44224900226934816, "grad_norm": 0.0, "learning_rate": 1.2334669276032971e-05, "loss": 1.0224, "step": 11303 }, { "epoch": 0.4422881289615776, "grad_norm": 0.0, "learning_rate": 1.233343704643411e-05, "loss": 1.1548, "step": 11304 }, { "epoch": 0.44232725565380704, "grad_norm": 0.0, "learning_rate": 1.2332204779363171e-05, "loss": 1.1115, "step": 11305 }, { "epoch": 0.4423663823460365, "grad_norm": 0.0, "learning_rate": 1.2330972474839944e-05, "loss": 1.0781, "step": 11306 }, { "epoch": 0.4424055090382659, "grad_norm": 0.0, "learning_rate": 1.2329740132884222e-05, "loss": 1.0044, "step": 11307 }, { "epoch": 0.44244463573049536, "grad_norm": 0.0, "learning_rate": 1.2328507753515793e-05, "loss": 0.9818, "step": 11308 }, { "epoch": 0.4424837624227248, "grad_norm": 0.0, "learning_rate": 1.2327275336754448e-05, "loss": 0.9127, "step": 11309 }, { "epoch": 0.44252288911495424, "grad_norm": 0.0, "learning_rate": 1.2326042882619973e-05, "loss": 0.9927, "step": 11310 }, { "epoch": 0.4425620158071837, "grad_norm": 0.0, "learning_rate": 1.232481039113217e-05, "loss": 1.064, "step": 11311 }, { "epoch": 0.4426011424994131, "grad_norm": 0.0, "learning_rate": 1.2323577862310823e-05, "loss": 1.069, "step": 11312 }, { "epoch": 0.44264026919164257, "grad_norm": 0.0, "learning_rate": 1.2322345296175724e-05, "loss": 1.1057, "step": 11313 }, { "epoch": 0.44267939588387195, "grad_norm": 0.0, "learning_rate": 1.2321112692746673e-05, "loss": 0.9813, "step": 11314 }, { "epoch": 0.4427185225761014, "grad_norm": 0.0, "learning_rate": 1.2319880052043458e-05, "loss": 0.995, "step": 11315 }, { "epoch": 0.44275764926833083, "grad_norm": 0.0, "learning_rate": 1.2318647374085878e-05, "loss": 0.9644, "step": 11316 }, { "epoch": 0.4427967759605603, "grad_norm": 0.0, "learning_rate": 1.2317414658893728e-05, "loss": 1.0301, "step": 11317 }, { "epoch": 0.4428359026527897, "grad_norm": 0.0, "learning_rate": 1.2316181906486802e-05, "loss": 1.0699, "step": 11318 }, { "epoch": 0.44287502934501916, "grad_norm": 0.0, "learning_rate": 1.2314949116884894e-05, "loss": 1.171, "step": 11319 }, { "epoch": 0.4429141560372486, "grad_norm": 0.0, "learning_rate": 1.2313716290107806e-05, "loss": 1.1104, "step": 11320 }, { "epoch": 0.44295328272947804, "grad_norm": 0.0, "learning_rate": 1.2312483426175337e-05, "loss": 1.0128, "step": 11321 }, { "epoch": 0.4429924094217075, "grad_norm": 0.0, "learning_rate": 1.2311250525107276e-05, "loss": 0.9991, "step": 11322 }, { "epoch": 0.4430315361139369, "grad_norm": 0.0, "learning_rate": 1.2310017586923431e-05, "loss": 0.9648, "step": 11323 }, { "epoch": 0.44307066280616636, "grad_norm": 0.0, "learning_rate": 1.2308784611643597e-05, "loss": 0.9978, "step": 11324 }, { "epoch": 0.4431097894983958, "grad_norm": 0.0, "learning_rate": 1.2307551599287577e-05, "loss": 1.0854, "step": 11325 }, { "epoch": 0.44314891619062524, "grad_norm": 0.0, "learning_rate": 1.2306318549875167e-05, "loss": 1.108, "step": 11326 }, { "epoch": 0.4431880428828547, "grad_norm": 0.0, "learning_rate": 1.2305085463426173e-05, "loss": 0.995, "step": 11327 }, { "epoch": 0.4432271695750841, "grad_norm": 0.0, "learning_rate": 1.2303852339960393e-05, "loss": 1.1284, "step": 11328 }, { "epoch": 0.44326629626731356, "grad_norm": 0.0, "learning_rate": 1.2302619179497635e-05, "loss": 1.1846, "step": 11329 }, { "epoch": 0.443305422959543, "grad_norm": 0.0, "learning_rate": 1.2301385982057696e-05, "loss": 1.0414, "step": 11330 }, { "epoch": 0.44334454965177245, "grad_norm": 0.0, "learning_rate": 1.2300152747660382e-05, "loss": 1.0051, "step": 11331 }, { "epoch": 0.4433836763440019, "grad_norm": 0.0, "learning_rate": 1.2298919476325497e-05, "loss": 1.1392, "step": 11332 }, { "epoch": 0.44342280303623133, "grad_norm": 0.0, "learning_rate": 1.2297686168072844e-05, "loss": 1.041, "step": 11333 }, { "epoch": 0.44346192972846077, "grad_norm": 0.0, "learning_rate": 1.2296452822922234e-05, "loss": 1.029, "step": 11334 }, { "epoch": 0.4435010564206902, "grad_norm": 0.0, "learning_rate": 1.2295219440893467e-05, "loss": 1.1632, "step": 11335 }, { "epoch": 0.44354018311291965, "grad_norm": 0.0, "learning_rate": 1.2293986022006353e-05, "loss": 1.1598, "step": 11336 }, { "epoch": 0.4435793098051491, "grad_norm": 0.0, "learning_rate": 1.2292752566280696e-05, "loss": 1.0225, "step": 11337 }, { "epoch": 0.44361843649737853, "grad_norm": 0.0, "learning_rate": 1.2291519073736308e-05, "loss": 1.0273, "step": 11338 }, { "epoch": 0.443657563189608, "grad_norm": 0.0, "learning_rate": 1.2290285544392992e-05, "loss": 1.0365, "step": 11339 }, { "epoch": 0.4436966898818374, "grad_norm": 0.0, "learning_rate": 1.2289051978270565e-05, "loss": 1.0706, "step": 11340 }, { "epoch": 0.44373581657406685, "grad_norm": 0.0, "learning_rate": 1.228781837538883e-05, "loss": 1.143, "step": 11341 }, { "epoch": 0.44377494326629624, "grad_norm": 0.0, "learning_rate": 1.2286584735767595e-05, "loss": 1.1097, "step": 11342 }, { "epoch": 0.4438140699585257, "grad_norm": 0.0, "learning_rate": 1.228535105942668e-05, "loss": 1.0047, "step": 11343 }, { "epoch": 0.4438531966507551, "grad_norm": 0.0, "learning_rate": 1.2284117346385887e-05, "loss": 1.1125, "step": 11344 }, { "epoch": 0.44389232334298456, "grad_norm": 0.0, "learning_rate": 1.2282883596665032e-05, "loss": 0.9536, "step": 11345 }, { "epoch": 0.443931450035214, "grad_norm": 0.0, "learning_rate": 1.2281649810283928e-05, "loss": 0.9402, "step": 11346 }, { "epoch": 0.44397057672744344, "grad_norm": 0.0, "learning_rate": 1.2280415987262387e-05, "loss": 0.9812, "step": 11347 }, { "epoch": 0.4440097034196729, "grad_norm": 0.0, "learning_rate": 1.2279182127620221e-05, "loss": 1.111, "step": 11348 }, { "epoch": 0.4440488301119023, "grad_norm": 0.0, "learning_rate": 1.2277948231377247e-05, "loss": 1.0113, "step": 11349 }, { "epoch": 0.44408795680413177, "grad_norm": 0.0, "learning_rate": 1.2276714298553283e-05, "loss": 1.2053, "step": 11350 }, { "epoch": 0.4441270834963612, "grad_norm": 0.0, "learning_rate": 1.2275480329168135e-05, "loss": 1.1714, "step": 11351 }, { "epoch": 0.44416621018859065, "grad_norm": 0.0, "learning_rate": 1.2274246323241626e-05, "loss": 0.9572, "step": 11352 }, { "epoch": 0.4442053368808201, "grad_norm": 0.0, "learning_rate": 1.2273012280793569e-05, "loss": 1.0803, "step": 11353 }, { "epoch": 0.44424446357304953, "grad_norm": 0.0, "learning_rate": 1.2271778201843785e-05, "loss": 1.097, "step": 11354 }, { "epoch": 0.44428359026527897, "grad_norm": 0.0, "learning_rate": 1.2270544086412088e-05, "loss": 1.1775, "step": 11355 }, { "epoch": 0.4443227169575084, "grad_norm": 0.0, "learning_rate": 1.22693099345183e-05, "loss": 1.0875, "step": 11356 }, { "epoch": 0.44436184364973785, "grad_norm": 0.0, "learning_rate": 1.2268075746182237e-05, "loss": 0.9283, "step": 11357 }, { "epoch": 0.4444009703419673, "grad_norm": 0.0, "learning_rate": 1.226684152142372e-05, "loss": 0.9755, "step": 11358 }, { "epoch": 0.44444009703419673, "grad_norm": 0.0, "learning_rate": 1.2265607260262571e-05, "loss": 0.9979, "step": 11359 }, { "epoch": 0.4444792237264262, "grad_norm": 0.0, "learning_rate": 1.2264372962718602e-05, "loss": 1.041, "step": 11360 }, { "epoch": 0.4445183504186556, "grad_norm": 0.0, "learning_rate": 1.2263138628811648e-05, "loss": 0.9982, "step": 11361 }, { "epoch": 0.44455747711088506, "grad_norm": 0.0, "learning_rate": 1.226190425856152e-05, "loss": 1.137, "step": 11362 }, { "epoch": 0.4445966038031145, "grad_norm": 0.0, "learning_rate": 1.2260669851988042e-05, "loss": 1.0964, "step": 11363 }, { "epoch": 0.44463573049534394, "grad_norm": 0.0, "learning_rate": 1.225943540911104e-05, "loss": 1.0452, "step": 11364 }, { "epoch": 0.4446748571875734, "grad_norm": 0.0, "learning_rate": 1.225820092995034e-05, "loss": 1.064, "step": 11365 }, { "epoch": 0.4447139838798028, "grad_norm": 0.0, "learning_rate": 1.225696641452576e-05, "loss": 1.0439, "step": 11366 }, { "epoch": 0.44475311057203226, "grad_norm": 0.0, "learning_rate": 1.2255731862857127e-05, "loss": 1.1212, "step": 11367 }, { "epoch": 0.4447922372642617, "grad_norm": 0.0, "learning_rate": 1.2254497274964268e-05, "loss": 1.1243, "step": 11368 }, { "epoch": 0.44483136395649114, "grad_norm": 0.0, "learning_rate": 1.2253262650867008e-05, "loss": 1.2158, "step": 11369 }, { "epoch": 0.4448704906487206, "grad_norm": 0.0, "learning_rate": 1.2252027990585173e-05, "loss": 0.9609, "step": 11370 }, { "epoch": 0.44490961734094997, "grad_norm": 0.0, "learning_rate": 1.225079329413859e-05, "loss": 1.1245, "step": 11371 }, { "epoch": 0.4449487440331794, "grad_norm": 0.0, "learning_rate": 1.2249558561547088e-05, "loss": 1.0922, "step": 11372 }, { "epoch": 0.44498787072540885, "grad_norm": 0.0, "learning_rate": 1.2248323792830493e-05, "loss": 1.062, "step": 11373 }, { "epoch": 0.4450269974176383, "grad_norm": 0.0, "learning_rate": 1.2247088988008636e-05, "loss": 1.0881, "step": 11374 }, { "epoch": 0.44506612410986773, "grad_norm": 0.0, "learning_rate": 1.2245854147101344e-05, "loss": 1.0382, "step": 11375 }, { "epoch": 0.4451052508020972, "grad_norm": 0.0, "learning_rate": 1.2244619270128451e-05, "loss": 0.9681, "step": 11376 }, { "epoch": 0.4451443774943266, "grad_norm": 0.0, "learning_rate": 1.2243384357109785e-05, "loss": 1.051, "step": 11377 }, { "epoch": 0.44518350418655606, "grad_norm": 0.0, "learning_rate": 1.2242149408065176e-05, "loss": 0.9847, "step": 11378 }, { "epoch": 0.4452226308787855, "grad_norm": 0.0, "learning_rate": 1.2240914423014457e-05, "loss": 0.9973, "step": 11379 }, { "epoch": 0.44526175757101494, "grad_norm": 0.0, "learning_rate": 1.2239679401977462e-05, "loss": 1.0535, "step": 11380 }, { "epoch": 0.4453008842632444, "grad_norm": 0.0, "learning_rate": 1.2238444344974024e-05, "loss": 1.0635, "step": 11381 }, { "epoch": 0.4453400109554738, "grad_norm": 0.0, "learning_rate": 1.2237209252023969e-05, "loss": 1.0657, "step": 11382 }, { "epoch": 0.44537913764770326, "grad_norm": 0.0, "learning_rate": 1.223597412314714e-05, "loss": 0.9557, "step": 11383 }, { "epoch": 0.4454182643399327, "grad_norm": 0.0, "learning_rate": 1.2234738958363369e-05, "loss": 1.1657, "step": 11384 }, { "epoch": 0.44545739103216214, "grad_norm": 0.0, "learning_rate": 1.2233503757692492e-05, "loss": 1.0744, "step": 11385 }, { "epoch": 0.4454965177243916, "grad_norm": 0.0, "learning_rate": 1.223226852115434e-05, "loss": 0.9443, "step": 11386 }, { "epoch": 0.445535644416621, "grad_norm": 0.0, "learning_rate": 1.2231033248768752e-05, "loss": 0.9775, "step": 11387 }, { "epoch": 0.44557477110885046, "grad_norm": 0.0, "learning_rate": 1.222979794055557e-05, "loss": 1.0324, "step": 11388 }, { "epoch": 0.4456138978010799, "grad_norm": 0.0, "learning_rate": 1.2228562596534625e-05, "loss": 1.2, "step": 11389 }, { "epoch": 0.44565302449330935, "grad_norm": 0.0, "learning_rate": 1.2227327216725758e-05, "loss": 1.0108, "step": 11390 }, { "epoch": 0.4456921511855388, "grad_norm": 0.0, "learning_rate": 1.2226091801148807e-05, "loss": 1.0344, "step": 11391 }, { "epoch": 0.44573127787776823, "grad_norm": 0.0, "learning_rate": 1.2224856349823611e-05, "loss": 1.0085, "step": 11392 }, { "epoch": 0.44577040456999767, "grad_norm": 0.0, "learning_rate": 1.2223620862770007e-05, "loss": 0.9212, "step": 11393 }, { "epoch": 0.4458095312622271, "grad_norm": 0.0, "learning_rate": 1.222238534000784e-05, "loss": 1.0352, "step": 11394 }, { "epoch": 0.44584865795445655, "grad_norm": 0.0, "learning_rate": 1.2221149781556951e-05, "loss": 1.1642, "step": 11395 }, { "epoch": 0.445887784646686, "grad_norm": 0.0, "learning_rate": 1.2219914187437178e-05, "loss": 1.1166, "step": 11396 }, { "epoch": 0.44592691133891543, "grad_norm": 0.0, "learning_rate": 1.2218678557668365e-05, "loss": 1.0515, "step": 11397 }, { "epoch": 0.4459660380311449, "grad_norm": 0.0, "learning_rate": 1.2217442892270355e-05, "loss": 0.9935, "step": 11398 }, { "epoch": 0.44600516472337426, "grad_norm": 0.0, "learning_rate": 1.2216207191262991e-05, "loss": 1.1476, "step": 11399 }, { "epoch": 0.4460442914156037, "grad_norm": 0.0, "learning_rate": 1.2214971454666115e-05, "loss": 1.0507, "step": 11400 }, { "epoch": 0.44608341810783314, "grad_norm": 0.0, "learning_rate": 1.2213735682499578e-05, "loss": 0.9653, "step": 11401 }, { "epoch": 0.4461225448000626, "grad_norm": 0.0, "learning_rate": 1.2212499874783213e-05, "loss": 1.201, "step": 11402 }, { "epoch": 0.446161671492292, "grad_norm": 0.0, "learning_rate": 1.2211264031536876e-05, "loss": 1.1285, "step": 11403 }, { "epoch": 0.44620079818452146, "grad_norm": 0.0, "learning_rate": 1.2210028152780408e-05, "loss": 0.9949, "step": 11404 }, { "epoch": 0.4462399248767509, "grad_norm": 0.0, "learning_rate": 1.220879223853366e-05, "loss": 1.1611, "step": 11405 }, { "epoch": 0.44627905156898034, "grad_norm": 0.0, "learning_rate": 1.2207556288816474e-05, "loss": 0.9774, "step": 11406 }, { "epoch": 0.4463181782612098, "grad_norm": 0.0, "learning_rate": 1.22063203036487e-05, "loss": 1.0672, "step": 11407 }, { "epoch": 0.4463573049534392, "grad_norm": 0.0, "learning_rate": 1.2205084283050188e-05, "loss": 1.0475, "step": 11408 }, { "epoch": 0.44639643164566867, "grad_norm": 0.0, "learning_rate": 1.2203848227040784e-05, "loss": 1.0269, "step": 11409 }, { "epoch": 0.4464355583378981, "grad_norm": 0.0, "learning_rate": 1.2202612135640341e-05, "loss": 0.9963, "step": 11410 }, { "epoch": 0.44647468503012755, "grad_norm": 0.0, "learning_rate": 1.2201376008868707e-05, "loss": 1.0468, "step": 11411 }, { "epoch": 0.446513811722357, "grad_norm": 0.0, "learning_rate": 1.220013984674573e-05, "loss": 1.1161, "step": 11412 }, { "epoch": 0.44655293841458643, "grad_norm": 0.0, "learning_rate": 1.2198903649291265e-05, "loss": 0.9778, "step": 11413 }, { "epoch": 0.44659206510681587, "grad_norm": 0.0, "learning_rate": 1.2197667416525165e-05, "loss": 0.9954, "step": 11414 }, { "epoch": 0.4466311917990453, "grad_norm": 0.0, "learning_rate": 1.2196431148467278e-05, "loss": 1.1762, "step": 11415 }, { "epoch": 0.44667031849127475, "grad_norm": 0.0, "learning_rate": 1.2195194845137462e-05, "loss": 1.1075, "step": 11416 }, { "epoch": 0.4467094451835042, "grad_norm": 0.0, "learning_rate": 1.2193958506555566e-05, "loss": 1.1302, "step": 11417 }, { "epoch": 0.44674857187573364, "grad_norm": 0.0, "learning_rate": 1.2192722132741443e-05, "loss": 1.1231, "step": 11418 }, { "epoch": 0.4467876985679631, "grad_norm": 0.0, "learning_rate": 1.2191485723714953e-05, "loss": 1.1373, "step": 11419 }, { "epoch": 0.4468268252601925, "grad_norm": 0.0, "learning_rate": 1.2190249279495947e-05, "loss": 1.1153, "step": 11420 }, { "epoch": 0.44686595195242196, "grad_norm": 0.0, "learning_rate": 1.2189012800104284e-05, "loss": 1.0966, "step": 11421 }, { "epoch": 0.4469050786446514, "grad_norm": 0.0, "learning_rate": 1.2187776285559814e-05, "loss": 0.9264, "step": 11422 }, { "epoch": 0.44694420533688084, "grad_norm": 0.0, "learning_rate": 1.2186539735882402e-05, "loss": 1.0638, "step": 11423 }, { "epoch": 0.4469833320291103, "grad_norm": 0.0, "learning_rate": 1.21853031510919e-05, "loss": 0.967, "step": 11424 }, { "epoch": 0.4470224587213397, "grad_norm": 0.0, "learning_rate": 1.2184066531208169e-05, "loss": 0.9682, "step": 11425 }, { "epoch": 0.44706158541356916, "grad_norm": 0.0, "learning_rate": 1.2182829876251065e-05, "loss": 1.1525, "step": 11426 }, { "epoch": 0.4471007121057986, "grad_norm": 0.0, "learning_rate": 1.218159318624045e-05, "loss": 1.1353, "step": 11427 }, { "epoch": 0.447139838798028, "grad_norm": 0.0, "learning_rate": 1.2180356461196183e-05, "loss": 1.0921, "step": 11428 }, { "epoch": 0.44717896549025743, "grad_norm": 0.0, "learning_rate": 1.217911970113812e-05, "loss": 1.1416, "step": 11429 }, { "epoch": 0.44721809218248687, "grad_norm": 0.0, "learning_rate": 1.217788290608613e-05, "loss": 1.023, "step": 11430 }, { "epoch": 0.4472572188747163, "grad_norm": 0.0, "learning_rate": 1.2176646076060066e-05, "loss": 1.0801, "step": 11431 }, { "epoch": 0.44729634556694575, "grad_norm": 0.0, "learning_rate": 1.2175409211079794e-05, "loss": 0.9964, "step": 11432 }, { "epoch": 0.4473354722591752, "grad_norm": 0.0, "learning_rate": 1.2174172311165178e-05, "loss": 1.1008, "step": 11433 }, { "epoch": 0.44737459895140463, "grad_norm": 0.0, "learning_rate": 1.2172935376336077e-05, "loss": 1.0669, "step": 11434 }, { "epoch": 0.4474137256436341, "grad_norm": 0.0, "learning_rate": 1.2171698406612356e-05, "loss": 0.9619, "step": 11435 }, { "epoch": 0.4474528523358635, "grad_norm": 0.0, "learning_rate": 1.2170461402013883e-05, "loss": 0.9894, "step": 11436 }, { "epoch": 0.44749197902809296, "grad_norm": 0.0, "learning_rate": 1.2169224362560514e-05, "loss": 1.0341, "step": 11437 }, { "epoch": 0.4475311057203224, "grad_norm": 0.0, "learning_rate": 1.2167987288272124e-05, "loss": 0.9715, "step": 11438 }, { "epoch": 0.44757023241255184, "grad_norm": 0.0, "learning_rate": 1.2166750179168576e-05, "loss": 1.0901, "step": 11439 }, { "epoch": 0.4476093591047813, "grad_norm": 0.0, "learning_rate": 1.2165513035269733e-05, "loss": 1.0694, "step": 11440 }, { "epoch": 0.4476484857970107, "grad_norm": 0.0, "learning_rate": 1.2164275856595466e-05, "loss": 1.0539, "step": 11441 }, { "epoch": 0.44768761248924016, "grad_norm": 0.0, "learning_rate": 1.2163038643165636e-05, "loss": 1.1927, "step": 11442 }, { "epoch": 0.4477267391814696, "grad_norm": 0.0, "learning_rate": 1.216180139500012e-05, "loss": 1.0068, "step": 11443 }, { "epoch": 0.44776586587369904, "grad_norm": 0.0, "learning_rate": 1.2160564112118781e-05, "loss": 1.0246, "step": 11444 }, { "epoch": 0.4478049925659285, "grad_norm": 0.0, "learning_rate": 1.2159326794541492e-05, "loss": 1.0836, "step": 11445 }, { "epoch": 0.4478441192581579, "grad_norm": 0.0, "learning_rate": 1.2158089442288121e-05, "loss": 0.9771, "step": 11446 }, { "epoch": 0.44788324595038737, "grad_norm": 0.0, "learning_rate": 1.2156852055378534e-05, "loss": 1.0178, "step": 11447 }, { "epoch": 0.4479223726426168, "grad_norm": 0.0, "learning_rate": 1.2155614633832609e-05, "loss": 1.0466, "step": 11448 }, { "epoch": 0.44796149933484625, "grad_norm": 0.0, "learning_rate": 1.2154377177670211e-05, "loss": 1.0474, "step": 11449 }, { "epoch": 0.4480006260270757, "grad_norm": 0.0, "learning_rate": 1.2153139686911217e-05, "loss": 0.9951, "step": 11450 }, { "epoch": 0.44803975271930513, "grad_norm": 0.0, "learning_rate": 1.2151902161575496e-05, "loss": 1.1316, "step": 11451 }, { "epoch": 0.44807887941153457, "grad_norm": 0.0, "learning_rate": 1.2150664601682924e-05, "loss": 1.0155, "step": 11452 }, { "epoch": 0.448118006103764, "grad_norm": 0.0, "learning_rate": 1.2149427007253372e-05, "loss": 1.0154, "step": 11453 }, { "epoch": 0.44815713279599345, "grad_norm": 0.0, "learning_rate": 1.2148189378306718e-05, "loss": 1.1202, "step": 11454 }, { "epoch": 0.4481962594882229, "grad_norm": 0.0, "learning_rate": 1.2146951714862834e-05, "loss": 1.1854, "step": 11455 }, { "epoch": 0.4482353861804523, "grad_norm": 0.0, "learning_rate": 1.2145714016941594e-05, "loss": 0.9931, "step": 11456 }, { "epoch": 0.4482745128726817, "grad_norm": 0.0, "learning_rate": 1.2144476284562878e-05, "loss": 1.0679, "step": 11457 }, { "epoch": 0.44831363956491116, "grad_norm": 0.0, "learning_rate": 1.2143238517746558e-05, "loss": 1.1556, "step": 11458 }, { "epoch": 0.4483527662571406, "grad_norm": 0.0, "learning_rate": 1.2142000716512517e-05, "loss": 1.0948, "step": 11459 }, { "epoch": 0.44839189294937004, "grad_norm": 0.0, "learning_rate": 1.2140762880880623e-05, "loss": 0.9525, "step": 11460 }, { "epoch": 0.4484310196415995, "grad_norm": 0.0, "learning_rate": 1.2139525010870763e-05, "loss": 1.049, "step": 11461 }, { "epoch": 0.4484701463338289, "grad_norm": 0.0, "learning_rate": 1.213828710650281e-05, "loss": 1.0693, "step": 11462 }, { "epoch": 0.44850927302605836, "grad_norm": 0.0, "learning_rate": 1.2137049167796649e-05, "loss": 1.1088, "step": 11463 }, { "epoch": 0.4485483997182878, "grad_norm": 0.0, "learning_rate": 1.2135811194772152e-05, "loss": 1.1199, "step": 11464 }, { "epoch": 0.44858752641051725, "grad_norm": 0.0, "learning_rate": 1.2134573187449206e-05, "loss": 1.1398, "step": 11465 }, { "epoch": 0.4486266531027467, "grad_norm": 0.0, "learning_rate": 1.2133335145847691e-05, "loss": 0.8933, "step": 11466 }, { "epoch": 0.4486657797949761, "grad_norm": 0.0, "learning_rate": 1.2132097069987483e-05, "loss": 1.1588, "step": 11467 }, { "epoch": 0.44870490648720557, "grad_norm": 0.0, "learning_rate": 1.2130858959888469e-05, "loss": 1.0988, "step": 11468 }, { "epoch": 0.448744033179435, "grad_norm": 0.0, "learning_rate": 1.2129620815570531e-05, "loss": 1.0775, "step": 11469 }, { "epoch": 0.44878315987166445, "grad_norm": 0.0, "learning_rate": 1.2128382637053552e-05, "loss": 1.1796, "step": 11470 }, { "epoch": 0.4488222865638939, "grad_norm": 0.0, "learning_rate": 1.2127144424357413e-05, "loss": 1.0488, "step": 11471 }, { "epoch": 0.44886141325612333, "grad_norm": 0.0, "learning_rate": 1.2125906177502002e-05, "loss": 1.0204, "step": 11472 }, { "epoch": 0.4489005399483528, "grad_norm": 0.0, "learning_rate": 1.2124667896507199e-05, "loss": 0.977, "step": 11473 }, { "epoch": 0.4489396666405822, "grad_norm": 0.0, "learning_rate": 1.2123429581392894e-05, "loss": 1.1868, "step": 11474 }, { "epoch": 0.44897879333281165, "grad_norm": 0.0, "learning_rate": 1.2122191232178972e-05, "loss": 1.178, "step": 11475 }, { "epoch": 0.4490179200250411, "grad_norm": 0.0, "learning_rate": 1.2120952848885315e-05, "loss": 0.9553, "step": 11476 }, { "epoch": 0.44905704671727054, "grad_norm": 0.0, "learning_rate": 1.2119714431531814e-05, "loss": 1.0613, "step": 11477 }, { "epoch": 0.4490961734095, "grad_norm": 0.0, "learning_rate": 1.2118475980138358e-05, "loss": 1.1559, "step": 11478 }, { "epoch": 0.4491353001017294, "grad_norm": 0.0, "learning_rate": 1.211723749472483e-05, "loss": 0.9889, "step": 11479 }, { "epoch": 0.44917442679395886, "grad_norm": 0.0, "learning_rate": 1.211599897531112e-05, "loss": 1.0858, "step": 11480 }, { "epoch": 0.4492135534861883, "grad_norm": 0.0, "learning_rate": 1.211476042191712e-05, "loss": 1.162, "step": 11481 }, { "epoch": 0.44925268017841774, "grad_norm": 0.0, "learning_rate": 1.2113521834562716e-05, "loss": 1.1339, "step": 11482 }, { "epoch": 0.4492918068706472, "grad_norm": 0.0, "learning_rate": 1.2112283213267801e-05, "loss": 1.1069, "step": 11483 }, { "epoch": 0.4493309335628766, "grad_norm": 0.0, "learning_rate": 1.2111044558052263e-05, "loss": 1.072, "step": 11484 }, { "epoch": 0.449370060255106, "grad_norm": 0.0, "learning_rate": 1.2109805868935995e-05, "loss": 1.0454, "step": 11485 }, { "epoch": 0.44940918694733545, "grad_norm": 0.0, "learning_rate": 1.210856714593889e-05, "loss": 1.0776, "step": 11486 }, { "epoch": 0.4494483136395649, "grad_norm": 0.0, "learning_rate": 1.2107328389080837e-05, "loss": 1.0894, "step": 11487 }, { "epoch": 0.44948744033179433, "grad_norm": 0.0, "learning_rate": 1.2106089598381732e-05, "loss": 1.054, "step": 11488 }, { "epoch": 0.44952656702402377, "grad_norm": 0.0, "learning_rate": 1.2104850773861466e-05, "loss": 1.0684, "step": 11489 }, { "epoch": 0.4495656937162532, "grad_norm": 0.0, "learning_rate": 1.2103611915539934e-05, "loss": 1.0987, "step": 11490 }, { "epoch": 0.44960482040848265, "grad_norm": 0.0, "learning_rate": 1.2102373023437031e-05, "loss": 1.0931, "step": 11491 }, { "epoch": 0.4496439471007121, "grad_norm": 0.0, "learning_rate": 1.2101134097572654e-05, "loss": 0.8803, "step": 11492 }, { "epoch": 0.44968307379294153, "grad_norm": 0.0, "learning_rate": 1.209989513796669e-05, "loss": 1.1414, "step": 11493 }, { "epoch": 0.449722200485171, "grad_norm": 0.0, "learning_rate": 1.2098656144639047e-05, "loss": 1.0258, "step": 11494 }, { "epoch": 0.4497613271774004, "grad_norm": 0.0, "learning_rate": 1.2097417117609615e-05, "loss": 1.132, "step": 11495 }, { "epoch": 0.44980045386962986, "grad_norm": 0.0, "learning_rate": 1.209617805689829e-05, "loss": 1.0677, "step": 11496 }, { "epoch": 0.4498395805618593, "grad_norm": 0.0, "learning_rate": 1.2094938962524975e-05, "loss": 0.993, "step": 11497 }, { "epoch": 0.44987870725408874, "grad_norm": 0.0, "learning_rate": 1.2093699834509565e-05, "loss": 1.0667, "step": 11498 }, { "epoch": 0.4499178339463182, "grad_norm": 0.0, "learning_rate": 1.2092460672871959e-05, "loss": 1.0675, "step": 11499 }, { "epoch": 0.4499569606385476, "grad_norm": 0.0, "learning_rate": 1.2091221477632056e-05, "loss": 0.9909, "step": 11500 }, { "epoch": 0.44999608733077706, "grad_norm": 0.0, "learning_rate": 1.2089982248809755e-05, "loss": 1.0534, "step": 11501 }, { "epoch": 0.4500352140230065, "grad_norm": 0.0, "learning_rate": 1.208874298642496e-05, "loss": 1.0395, "step": 11502 }, { "epoch": 0.45007434071523594, "grad_norm": 0.0, "learning_rate": 1.2087503690497571e-05, "loss": 1.0444, "step": 11503 }, { "epoch": 0.4501134674074654, "grad_norm": 0.0, "learning_rate": 1.2086264361047487e-05, "loss": 1.0285, "step": 11504 }, { "epoch": 0.4501525940996948, "grad_norm": 0.0, "learning_rate": 1.208502499809461e-05, "loss": 1.0175, "step": 11505 }, { "epoch": 0.45019172079192427, "grad_norm": 0.0, "learning_rate": 1.2083785601658846e-05, "loss": 0.9543, "step": 11506 }, { "epoch": 0.4502308474841537, "grad_norm": 0.0, "learning_rate": 1.2082546171760097e-05, "loss": 1.0559, "step": 11507 }, { "epoch": 0.45026997417638315, "grad_norm": 0.0, "learning_rate": 1.2081306708418266e-05, "loss": 1.0205, "step": 11508 }, { "epoch": 0.4503091008686126, "grad_norm": 0.0, "learning_rate": 1.2080067211653255e-05, "loss": 1.0913, "step": 11509 }, { "epoch": 0.45034822756084203, "grad_norm": 0.0, "learning_rate": 1.2078827681484973e-05, "loss": 1.0822, "step": 11510 }, { "epoch": 0.45038735425307147, "grad_norm": 0.0, "learning_rate": 1.2077588117933324e-05, "loss": 1.2026, "step": 11511 }, { "epoch": 0.4504264809453009, "grad_norm": 0.0, "learning_rate": 1.207634852101821e-05, "loss": 1.1325, "step": 11512 }, { "epoch": 0.4504656076375303, "grad_norm": 0.0, "learning_rate": 1.2075108890759543e-05, "loss": 0.9723, "step": 11513 }, { "epoch": 0.45050473432975974, "grad_norm": 0.0, "learning_rate": 1.2073869227177228e-05, "loss": 1.1025, "step": 11514 }, { "epoch": 0.4505438610219892, "grad_norm": 0.0, "learning_rate": 1.2072629530291171e-05, "loss": 0.9984, "step": 11515 }, { "epoch": 0.4505829877142186, "grad_norm": 0.0, "learning_rate": 1.207138980012128e-05, "loss": 1.2245, "step": 11516 }, { "epoch": 0.45062211440644806, "grad_norm": 0.0, "learning_rate": 1.2070150036687467e-05, "loss": 1.0398, "step": 11517 }, { "epoch": 0.4506612410986775, "grad_norm": 0.0, "learning_rate": 1.2068910240009636e-05, "loss": 1.0813, "step": 11518 }, { "epoch": 0.45070036779090694, "grad_norm": 0.0, "learning_rate": 1.20676704101077e-05, "loss": 1.0804, "step": 11519 }, { "epoch": 0.4507394944831364, "grad_norm": 0.0, "learning_rate": 1.206643054700157e-05, "loss": 1.0657, "step": 11520 }, { "epoch": 0.4507786211753658, "grad_norm": 0.0, "learning_rate": 1.2065190650711151e-05, "loss": 1.0894, "step": 11521 }, { "epoch": 0.45081774786759526, "grad_norm": 0.0, "learning_rate": 1.206395072125636e-05, "loss": 1.0772, "step": 11522 }, { "epoch": 0.4508568745598247, "grad_norm": 0.0, "learning_rate": 1.2062710758657109e-05, "loss": 1.0732, "step": 11523 }, { "epoch": 0.45089600125205415, "grad_norm": 0.0, "learning_rate": 1.2061470762933305e-05, "loss": 1.0503, "step": 11524 }, { "epoch": 0.4509351279442836, "grad_norm": 0.0, "learning_rate": 1.2060230734104864e-05, "loss": 1.0126, "step": 11525 }, { "epoch": 0.45097425463651303, "grad_norm": 0.0, "learning_rate": 1.20589906721917e-05, "loss": 1.0066, "step": 11526 }, { "epoch": 0.45101338132874247, "grad_norm": 0.0, "learning_rate": 1.2057750577213726e-05, "loss": 1.0497, "step": 11527 }, { "epoch": 0.4510525080209719, "grad_norm": 0.0, "learning_rate": 1.2056510449190855e-05, "loss": 1.0501, "step": 11528 }, { "epoch": 0.45109163471320135, "grad_norm": 0.0, "learning_rate": 1.2055270288143001e-05, "loss": 1.0777, "step": 11529 }, { "epoch": 0.4511307614054308, "grad_norm": 0.0, "learning_rate": 1.2054030094090086e-05, "loss": 1.17, "step": 11530 }, { "epoch": 0.45116988809766023, "grad_norm": 0.0, "learning_rate": 1.2052789867052018e-05, "loss": 0.9072, "step": 11531 }, { "epoch": 0.4512090147898897, "grad_norm": 0.0, "learning_rate": 1.205154960704872e-05, "loss": 1.0733, "step": 11532 }, { "epoch": 0.4512481414821191, "grad_norm": 0.0, "learning_rate": 1.2050309314100107e-05, "loss": 1.0955, "step": 11533 }, { "epoch": 0.45128726817434855, "grad_norm": 0.0, "learning_rate": 1.204906898822609e-05, "loss": 1.0901, "step": 11534 }, { "epoch": 0.451326394866578, "grad_norm": 0.0, "learning_rate": 1.2047828629446597e-05, "loss": 1.0215, "step": 11535 }, { "epoch": 0.45136552155880744, "grad_norm": 0.0, "learning_rate": 1.204658823778154e-05, "loss": 0.9851, "step": 11536 }, { "epoch": 0.4514046482510369, "grad_norm": 0.0, "learning_rate": 1.2045347813250842e-05, "loss": 1.1065, "step": 11537 }, { "epoch": 0.4514437749432663, "grad_norm": 0.0, "learning_rate": 1.204410735587442e-05, "loss": 1.0569, "step": 11538 }, { "epoch": 0.45148290163549576, "grad_norm": 0.0, "learning_rate": 1.2042866865672195e-05, "loss": 1.0792, "step": 11539 }, { "epoch": 0.4515220283277252, "grad_norm": 0.0, "learning_rate": 1.204162634266409e-05, "loss": 1.1027, "step": 11540 }, { "epoch": 0.4515611550199546, "grad_norm": 0.0, "learning_rate": 1.2040385786870023e-05, "loss": 0.8517, "step": 11541 }, { "epoch": 0.451600281712184, "grad_norm": 0.0, "learning_rate": 1.2039145198309916e-05, "loss": 1.0419, "step": 11542 }, { "epoch": 0.45163940840441347, "grad_norm": 0.0, "learning_rate": 1.2037904577003693e-05, "loss": 1.1265, "step": 11543 }, { "epoch": 0.4516785350966429, "grad_norm": 0.0, "learning_rate": 1.2036663922971279e-05, "loss": 1.0552, "step": 11544 }, { "epoch": 0.45171766178887235, "grad_norm": 0.0, "learning_rate": 1.2035423236232591e-05, "loss": 1.2568, "step": 11545 }, { "epoch": 0.4517567884811018, "grad_norm": 0.0, "learning_rate": 1.2034182516807559e-05, "loss": 1.0436, "step": 11546 }, { "epoch": 0.45179591517333123, "grad_norm": 0.0, "learning_rate": 1.2032941764716102e-05, "loss": 1.0236, "step": 11547 }, { "epoch": 0.45183504186556067, "grad_norm": 0.0, "learning_rate": 1.203170097997815e-05, "loss": 1.086, "step": 11548 }, { "epoch": 0.4518741685577901, "grad_norm": 0.0, "learning_rate": 1.2030460162613624e-05, "loss": 1.0268, "step": 11549 }, { "epoch": 0.45191329525001955, "grad_norm": 0.0, "learning_rate": 1.2029219312642453e-05, "loss": 1.093, "step": 11550 }, { "epoch": 0.451952421942249, "grad_norm": 0.0, "learning_rate": 1.202797843008456e-05, "loss": 1.1842, "step": 11551 }, { "epoch": 0.45199154863447843, "grad_norm": 0.0, "learning_rate": 1.2026737514959881e-05, "loss": 1.1505, "step": 11552 }, { "epoch": 0.4520306753267079, "grad_norm": 0.0, "learning_rate": 1.2025496567288335e-05, "loss": 0.9791, "step": 11553 }, { "epoch": 0.4520698020189373, "grad_norm": 0.0, "learning_rate": 1.2024255587089848e-05, "loss": 1.0903, "step": 11554 }, { "epoch": 0.45210892871116676, "grad_norm": 0.0, "learning_rate": 1.2023014574384358e-05, "loss": 0.9553, "step": 11555 }, { "epoch": 0.4521480554033962, "grad_norm": 0.0, "learning_rate": 1.2021773529191784e-05, "loss": 0.9722, "step": 11556 }, { "epoch": 0.45218718209562564, "grad_norm": 0.0, "learning_rate": 1.2020532451532063e-05, "loss": 1.0564, "step": 11557 }, { "epoch": 0.4522263087878551, "grad_norm": 0.0, "learning_rate": 1.2019291341425121e-05, "loss": 1.1661, "step": 11558 }, { "epoch": 0.4522654354800845, "grad_norm": 0.0, "learning_rate": 1.2018050198890894e-05, "loss": 1.0128, "step": 11559 }, { "epoch": 0.45230456217231396, "grad_norm": 0.0, "learning_rate": 1.2016809023949307e-05, "loss": 1.1328, "step": 11560 }, { "epoch": 0.4523436888645434, "grad_norm": 0.0, "learning_rate": 1.2015567816620296e-05, "loss": 1.1613, "step": 11561 }, { "epoch": 0.45238281555677284, "grad_norm": 0.0, "learning_rate": 1.2014326576923792e-05, "loss": 1.0013, "step": 11562 }, { "epoch": 0.4524219422490023, "grad_norm": 0.0, "learning_rate": 1.2013085304879724e-05, "loss": 1.1223, "step": 11563 }, { "epoch": 0.4524610689412317, "grad_norm": 0.0, "learning_rate": 1.2011844000508031e-05, "loss": 1.0977, "step": 11564 }, { "epoch": 0.45250019563346117, "grad_norm": 0.0, "learning_rate": 1.2010602663828644e-05, "loss": 1.09, "step": 11565 }, { "epoch": 0.4525393223256906, "grad_norm": 0.0, "learning_rate": 1.2009361294861497e-05, "loss": 1.0037, "step": 11566 }, { "epoch": 0.45257844901792005, "grad_norm": 0.0, "learning_rate": 1.2008119893626527e-05, "loss": 1.0367, "step": 11567 }, { "epoch": 0.4526175757101495, "grad_norm": 0.0, "learning_rate": 1.2006878460143666e-05, "loss": 1.047, "step": 11568 }, { "epoch": 0.45265670240237893, "grad_norm": 0.0, "learning_rate": 1.2005636994432849e-05, "loss": 0.983, "step": 11569 }, { "epoch": 0.4526958290946083, "grad_norm": 0.0, "learning_rate": 1.2004395496514021e-05, "loss": 1.091, "step": 11570 }, { "epoch": 0.45273495578683776, "grad_norm": 0.0, "learning_rate": 1.2003153966407108e-05, "loss": 1.0183, "step": 11571 }, { "epoch": 0.4527740824790672, "grad_norm": 0.0, "learning_rate": 1.2001912404132057e-05, "loss": 0.9735, "step": 11572 }, { "epoch": 0.45281320917129664, "grad_norm": 0.0, "learning_rate": 1.20006708097088e-05, "loss": 1.0997, "step": 11573 }, { "epoch": 0.4528523358635261, "grad_norm": 0.0, "learning_rate": 1.1999429183157273e-05, "loss": 1.0159, "step": 11574 }, { "epoch": 0.4528914625557555, "grad_norm": 0.0, "learning_rate": 1.1998187524497422e-05, "loss": 1.017, "step": 11575 }, { "epoch": 0.45293058924798496, "grad_norm": 0.0, "learning_rate": 1.1996945833749184e-05, "loss": 1.0102, "step": 11576 }, { "epoch": 0.4529697159402144, "grad_norm": 0.0, "learning_rate": 1.1995704110932499e-05, "loss": 1.0867, "step": 11577 }, { "epoch": 0.45300884263244384, "grad_norm": 0.0, "learning_rate": 1.19944623560673e-05, "loss": 0.9226, "step": 11578 }, { "epoch": 0.4530479693246733, "grad_norm": 0.0, "learning_rate": 1.199322056917354e-05, "loss": 1.2022, "step": 11579 }, { "epoch": 0.4530870960169027, "grad_norm": 0.0, "learning_rate": 1.1991978750271155e-05, "loss": 1.0206, "step": 11580 }, { "epoch": 0.45312622270913216, "grad_norm": 0.0, "learning_rate": 1.1990736899380089e-05, "loss": 1.0587, "step": 11581 }, { "epoch": 0.4531653494013616, "grad_norm": 0.0, "learning_rate": 1.198949501652028e-05, "loss": 1.1339, "step": 11582 }, { "epoch": 0.45320447609359105, "grad_norm": 0.0, "learning_rate": 1.1988253101711675e-05, "loss": 1.1195, "step": 11583 }, { "epoch": 0.4532436027858205, "grad_norm": 0.0, "learning_rate": 1.1987011154974218e-05, "loss": 1.1347, "step": 11584 }, { "epoch": 0.45328272947804993, "grad_norm": 0.0, "learning_rate": 1.1985769176327848e-05, "loss": 1.0927, "step": 11585 }, { "epoch": 0.45332185617027937, "grad_norm": 0.0, "learning_rate": 1.1984527165792517e-05, "loss": 1.116, "step": 11586 }, { "epoch": 0.4533609828625088, "grad_norm": 0.0, "learning_rate": 1.1983285123388163e-05, "loss": 1.0757, "step": 11587 }, { "epoch": 0.45340010955473825, "grad_norm": 0.0, "learning_rate": 1.1982043049134739e-05, "loss": 1.0708, "step": 11588 }, { "epoch": 0.4534392362469677, "grad_norm": 0.0, "learning_rate": 1.1980800943052184e-05, "loss": 1.0648, "step": 11589 }, { "epoch": 0.45347836293919713, "grad_norm": 0.0, "learning_rate": 1.197955880516045e-05, "loss": 1.1282, "step": 11590 }, { "epoch": 0.4535174896314266, "grad_norm": 0.0, "learning_rate": 1.1978316635479483e-05, "loss": 0.9976, "step": 11591 }, { "epoch": 0.453556616323656, "grad_norm": 0.0, "learning_rate": 1.1977074434029228e-05, "loss": 0.9718, "step": 11592 }, { "epoch": 0.45359574301588546, "grad_norm": 0.0, "learning_rate": 1.1975832200829635e-05, "loss": 0.9866, "step": 11593 }, { "epoch": 0.4536348697081149, "grad_norm": 0.0, "learning_rate": 1.1974589935900651e-05, "loss": 0.9863, "step": 11594 }, { "epoch": 0.45367399640034434, "grad_norm": 0.0, "learning_rate": 1.1973347639262231e-05, "loss": 1.0758, "step": 11595 }, { "epoch": 0.4537131230925738, "grad_norm": 0.0, "learning_rate": 1.1972105310934318e-05, "loss": 1.0438, "step": 11596 }, { "epoch": 0.4537522497848032, "grad_norm": 0.0, "learning_rate": 1.1970862950936866e-05, "loss": 1.1304, "step": 11597 }, { "epoch": 0.4537913764770326, "grad_norm": 0.0, "learning_rate": 1.1969620559289824e-05, "loss": 1.0584, "step": 11598 }, { "epoch": 0.45383050316926205, "grad_norm": 0.0, "learning_rate": 1.1968378136013144e-05, "loss": 0.9836, "step": 11599 }, { "epoch": 0.4538696298614915, "grad_norm": 0.0, "learning_rate": 1.1967135681126777e-05, "loss": 1.0228, "step": 11600 }, { "epoch": 0.4539087565537209, "grad_norm": 0.0, "learning_rate": 1.1965893194650678e-05, "loss": 1.1841, "step": 11601 }, { "epoch": 0.45394788324595037, "grad_norm": 0.0, "learning_rate": 1.1964650676604796e-05, "loss": 1.1618, "step": 11602 }, { "epoch": 0.4539870099381798, "grad_norm": 0.0, "learning_rate": 1.1963408127009087e-05, "loss": 0.9233, "step": 11603 }, { "epoch": 0.45402613663040925, "grad_norm": 0.0, "learning_rate": 1.1962165545883501e-05, "loss": 1.2534, "step": 11604 }, { "epoch": 0.4540652633226387, "grad_norm": 0.0, "learning_rate": 1.1960922933247999e-05, "loss": 1.0164, "step": 11605 }, { "epoch": 0.45410439001486813, "grad_norm": 0.0, "learning_rate": 1.195968028912253e-05, "loss": 1.1519, "step": 11606 }, { "epoch": 0.45414351670709757, "grad_norm": 0.0, "learning_rate": 1.195843761352705e-05, "loss": 0.8846, "step": 11607 }, { "epoch": 0.454182643399327, "grad_norm": 0.0, "learning_rate": 1.1957194906481517e-05, "loss": 1.0179, "step": 11608 }, { "epoch": 0.45422177009155645, "grad_norm": 0.0, "learning_rate": 1.1955952168005889e-05, "loss": 0.9063, "step": 11609 }, { "epoch": 0.4542608967837859, "grad_norm": 0.0, "learning_rate": 1.1954709398120117e-05, "loss": 1.0466, "step": 11610 }, { "epoch": 0.45430002347601534, "grad_norm": 0.0, "learning_rate": 1.195346659684416e-05, "loss": 0.9587, "step": 11611 }, { "epoch": 0.4543391501682448, "grad_norm": 0.0, "learning_rate": 1.1952223764197984e-05, "loss": 1.1337, "step": 11612 }, { "epoch": 0.4543782768604742, "grad_norm": 0.0, "learning_rate": 1.1950980900201532e-05, "loss": 0.9568, "step": 11613 }, { "epoch": 0.45441740355270366, "grad_norm": 0.0, "learning_rate": 1.1949738004874779e-05, "loss": 1.1356, "step": 11614 }, { "epoch": 0.4544565302449331, "grad_norm": 0.0, "learning_rate": 1.1948495078237673e-05, "loss": 0.992, "step": 11615 }, { "epoch": 0.45449565693716254, "grad_norm": 0.0, "learning_rate": 1.1947252120310175e-05, "loss": 1.1241, "step": 11616 }, { "epoch": 0.454534783629392, "grad_norm": 0.0, "learning_rate": 1.1946009131112252e-05, "loss": 0.9921, "step": 11617 }, { "epoch": 0.4545739103216214, "grad_norm": 0.0, "learning_rate": 1.1944766110663858e-05, "loss": 1.0079, "step": 11618 }, { "epoch": 0.45461303701385086, "grad_norm": 0.0, "learning_rate": 1.194352305898496e-05, "loss": 1.0542, "step": 11619 }, { "epoch": 0.4546521637060803, "grad_norm": 0.0, "learning_rate": 1.1942279976095518e-05, "loss": 1.0711, "step": 11620 }, { "epoch": 0.45469129039830974, "grad_norm": 0.0, "learning_rate": 1.1941036862015491e-05, "loss": 0.9493, "step": 11621 }, { "epoch": 0.4547304170905392, "grad_norm": 0.0, "learning_rate": 1.1939793716764845e-05, "loss": 1.0552, "step": 11622 }, { "epoch": 0.4547695437827686, "grad_norm": 0.0, "learning_rate": 1.193855054036354e-05, "loss": 1.1012, "step": 11623 }, { "epoch": 0.45480867047499807, "grad_norm": 0.0, "learning_rate": 1.1937307332831547e-05, "loss": 1.0044, "step": 11624 }, { "epoch": 0.4548477971672275, "grad_norm": 0.0, "learning_rate": 1.1936064094188821e-05, "loss": 1.1233, "step": 11625 }, { "epoch": 0.45488692385945695, "grad_norm": 0.0, "learning_rate": 1.1934820824455336e-05, "loss": 1.1003, "step": 11626 }, { "epoch": 0.45492605055168633, "grad_norm": 0.0, "learning_rate": 1.193357752365105e-05, "loss": 1.0, "step": 11627 }, { "epoch": 0.4549651772439158, "grad_norm": 0.0, "learning_rate": 1.1932334191795934e-05, "loss": 0.997, "step": 11628 }, { "epoch": 0.4550043039361452, "grad_norm": 0.0, "learning_rate": 1.193109082890995e-05, "loss": 0.9401, "step": 11629 }, { "epoch": 0.45504343062837466, "grad_norm": 0.0, "learning_rate": 1.192984743501307e-05, "loss": 1.02, "step": 11630 }, { "epoch": 0.4550825573206041, "grad_norm": 0.0, "learning_rate": 1.1928604010125258e-05, "loss": 1.1003, "step": 11631 }, { "epoch": 0.45512168401283354, "grad_norm": 0.0, "learning_rate": 1.1927360554266478e-05, "loss": 1.1802, "step": 11632 }, { "epoch": 0.455160810705063, "grad_norm": 0.0, "learning_rate": 1.1926117067456709e-05, "loss": 1.0512, "step": 11633 }, { "epoch": 0.4551999373972924, "grad_norm": 0.0, "learning_rate": 1.1924873549715907e-05, "loss": 0.9959, "step": 11634 }, { "epoch": 0.45523906408952186, "grad_norm": 0.0, "learning_rate": 1.1923630001064052e-05, "loss": 1.1052, "step": 11635 }, { "epoch": 0.4552781907817513, "grad_norm": 0.0, "learning_rate": 1.1922386421521108e-05, "loss": 1.0558, "step": 11636 }, { "epoch": 0.45531731747398074, "grad_norm": 0.0, "learning_rate": 1.1921142811107048e-05, "loss": 1.0563, "step": 11637 }, { "epoch": 0.4553564441662102, "grad_norm": 0.0, "learning_rate": 1.191989916984184e-05, "loss": 1.1472, "step": 11638 }, { "epoch": 0.4553955708584396, "grad_norm": 0.0, "learning_rate": 1.1918655497745459e-05, "loss": 1.0503, "step": 11639 }, { "epoch": 0.45543469755066907, "grad_norm": 0.0, "learning_rate": 1.1917411794837876e-05, "loss": 1.0734, "step": 11640 }, { "epoch": 0.4554738242428985, "grad_norm": 0.0, "learning_rate": 1.191616806113906e-05, "loss": 1.1128, "step": 11641 }, { "epoch": 0.45551295093512795, "grad_norm": 0.0, "learning_rate": 1.1914924296668986e-05, "loss": 1.052, "step": 11642 }, { "epoch": 0.4555520776273574, "grad_norm": 0.0, "learning_rate": 1.1913680501447626e-05, "loss": 1.147, "step": 11643 }, { "epoch": 0.45559120431958683, "grad_norm": 0.0, "learning_rate": 1.1912436675494958e-05, "loss": 0.9645, "step": 11644 }, { "epoch": 0.45563033101181627, "grad_norm": 0.0, "learning_rate": 1.1911192818830952e-05, "loss": 1.0394, "step": 11645 }, { "epoch": 0.4556694577040457, "grad_norm": 0.0, "learning_rate": 1.1909948931475585e-05, "loss": 1.0757, "step": 11646 }, { "epoch": 0.45570858439627515, "grad_norm": 0.0, "learning_rate": 1.1908705013448832e-05, "loss": 1.0029, "step": 11647 }, { "epoch": 0.4557477110885046, "grad_norm": 0.0, "learning_rate": 1.1907461064770667e-05, "loss": 1.0185, "step": 11648 }, { "epoch": 0.45578683778073403, "grad_norm": 0.0, "learning_rate": 1.1906217085461068e-05, "loss": 1.0832, "step": 11649 }, { "epoch": 0.4558259644729635, "grad_norm": 0.0, "learning_rate": 1.1904973075540013e-05, "loss": 0.9672, "step": 11650 }, { "epoch": 0.4558650911651929, "grad_norm": 0.0, "learning_rate": 1.1903729035027474e-05, "loss": 1.1244, "step": 11651 }, { "epoch": 0.45590421785742236, "grad_norm": 0.0, "learning_rate": 1.1902484963943433e-05, "loss": 0.9224, "step": 11652 }, { "epoch": 0.4559433445496518, "grad_norm": 0.0, "learning_rate": 1.1901240862307868e-05, "loss": 1.0889, "step": 11653 }, { "epoch": 0.45598247124188124, "grad_norm": 0.0, "learning_rate": 1.189999673014076e-05, "loss": 1.015, "step": 11654 }, { "epoch": 0.4560215979341106, "grad_norm": 0.0, "learning_rate": 1.189875256746208e-05, "loss": 1.0053, "step": 11655 }, { "epoch": 0.45606072462634006, "grad_norm": 0.0, "learning_rate": 1.1897508374291817e-05, "loss": 1.1106, "step": 11656 }, { "epoch": 0.4560998513185695, "grad_norm": 0.0, "learning_rate": 1.1896264150649948e-05, "loss": 1.1202, "step": 11657 }, { "epoch": 0.45613897801079895, "grad_norm": 0.0, "learning_rate": 1.1895019896556447e-05, "loss": 1.0353, "step": 11658 }, { "epoch": 0.4561781047030284, "grad_norm": 0.0, "learning_rate": 1.1893775612031306e-05, "loss": 1.0861, "step": 11659 }, { "epoch": 0.4562172313952578, "grad_norm": 0.0, "learning_rate": 1.1892531297094502e-05, "loss": 0.9599, "step": 11660 }, { "epoch": 0.45625635808748727, "grad_norm": 0.0, "learning_rate": 1.1891286951766014e-05, "loss": 1.0076, "step": 11661 }, { "epoch": 0.4562954847797167, "grad_norm": 0.0, "learning_rate": 1.189004257606583e-05, "loss": 1.053, "step": 11662 }, { "epoch": 0.45633461147194615, "grad_norm": 0.0, "learning_rate": 1.1888798170013929e-05, "loss": 1.1465, "step": 11663 }, { "epoch": 0.4563737381641756, "grad_norm": 0.0, "learning_rate": 1.1887553733630297e-05, "loss": 0.9963, "step": 11664 }, { "epoch": 0.45641286485640503, "grad_norm": 0.0, "learning_rate": 1.1886309266934916e-05, "loss": 0.9149, "step": 11665 }, { "epoch": 0.4564519915486345, "grad_norm": 0.0, "learning_rate": 1.1885064769947774e-05, "loss": 1.1378, "step": 11666 }, { "epoch": 0.4564911182408639, "grad_norm": 0.0, "learning_rate": 1.188382024268885e-05, "loss": 0.785, "step": 11667 }, { "epoch": 0.45653024493309335, "grad_norm": 0.0, "learning_rate": 1.1882575685178136e-05, "loss": 1.0427, "step": 11668 }, { "epoch": 0.4565693716253228, "grad_norm": 0.0, "learning_rate": 1.1881331097435619e-05, "loss": 1.0197, "step": 11669 }, { "epoch": 0.45660849831755224, "grad_norm": 0.0, "learning_rate": 1.1880086479481277e-05, "loss": 1.0767, "step": 11670 }, { "epoch": 0.4566476250097817, "grad_norm": 0.0, "learning_rate": 1.1878841831335103e-05, "loss": 1.0859, "step": 11671 }, { "epoch": 0.4566867517020111, "grad_norm": 0.0, "learning_rate": 1.1877597153017084e-05, "loss": 1.179, "step": 11672 }, { "epoch": 0.45672587839424056, "grad_norm": 0.0, "learning_rate": 1.1876352444547208e-05, "loss": 1.1669, "step": 11673 }, { "epoch": 0.45676500508647, "grad_norm": 0.0, "learning_rate": 1.1875107705945461e-05, "loss": 0.9491, "step": 11674 }, { "epoch": 0.45680413177869944, "grad_norm": 0.0, "learning_rate": 1.1873862937231837e-05, "loss": 0.9946, "step": 11675 }, { "epoch": 0.4568432584709289, "grad_norm": 0.0, "learning_rate": 1.187261813842632e-05, "loss": 1.1338, "step": 11676 }, { "epoch": 0.4568823851631583, "grad_norm": 0.0, "learning_rate": 1.1871373309548904e-05, "loss": 1.0739, "step": 11677 }, { "epoch": 0.45692151185538776, "grad_norm": 0.0, "learning_rate": 1.1870128450619578e-05, "loss": 1.0203, "step": 11678 }, { "epoch": 0.4569606385476172, "grad_norm": 0.0, "learning_rate": 1.186888356165833e-05, "loss": 1.0668, "step": 11679 }, { "epoch": 0.45699976523984664, "grad_norm": 0.0, "learning_rate": 1.1867638642685155e-05, "loss": 1.1113, "step": 11680 }, { "epoch": 0.4570388919320761, "grad_norm": 0.0, "learning_rate": 1.1866393693720043e-05, "loss": 1.0399, "step": 11681 }, { "epoch": 0.4570780186243055, "grad_norm": 0.0, "learning_rate": 1.1865148714782988e-05, "loss": 1.1054, "step": 11682 }, { "epoch": 0.45711714531653497, "grad_norm": 0.0, "learning_rate": 1.186390370589398e-05, "loss": 1.1238, "step": 11683 }, { "epoch": 0.45715627200876435, "grad_norm": 0.0, "learning_rate": 1.1862658667073018e-05, "loss": 1.179, "step": 11684 }, { "epoch": 0.4571953987009938, "grad_norm": 0.0, "learning_rate": 1.1861413598340086e-05, "loss": 1.2006, "step": 11685 }, { "epoch": 0.45723452539322323, "grad_norm": 0.0, "learning_rate": 1.186016849971519e-05, "loss": 1.0273, "step": 11686 }, { "epoch": 0.4572736520854527, "grad_norm": 0.0, "learning_rate": 1.1858923371218314e-05, "loss": 1.1581, "step": 11687 }, { "epoch": 0.4573127787776821, "grad_norm": 0.0, "learning_rate": 1.185767821286946e-05, "loss": 1.0727, "step": 11688 }, { "epoch": 0.45735190546991156, "grad_norm": 0.0, "learning_rate": 1.1856433024688624e-05, "loss": 1.1243, "step": 11689 }, { "epoch": 0.457391032162141, "grad_norm": 0.0, "learning_rate": 1.1855187806695797e-05, "loss": 1.0587, "step": 11690 }, { "epoch": 0.45743015885437044, "grad_norm": 0.0, "learning_rate": 1.185394255891098e-05, "loss": 1.1766, "step": 11691 }, { "epoch": 0.4574692855465999, "grad_norm": 0.0, "learning_rate": 1.1852697281354166e-05, "loss": 1.0822, "step": 11692 }, { "epoch": 0.4575084122388293, "grad_norm": 0.0, "learning_rate": 1.1851451974045357e-05, "loss": 1.0626, "step": 11693 }, { "epoch": 0.45754753893105876, "grad_norm": 0.0, "learning_rate": 1.185020663700455e-05, "loss": 0.9486, "step": 11694 }, { "epoch": 0.4575866656232882, "grad_norm": 0.0, "learning_rate": 1.1848961270251745e-05, "loss": 1.0094, "step": 11695 }, { "epoch": 0.45762579231551764, "grad_norm": 0.0, "learning_rate": 1.1847715873806935e-05, "loss": 1.0673, "step": 11696 }, { "epoch": 0.4576649190077471, "grad_norm": 0.0, "learning_rate": 1.1846470447690124e-05, "loss": 1.0723, "step": 11697 }, { "epoch": 0.4577040456999765, "grad_norm": 0.0, "learning_rate": 1.1845224991921313e-05, "loss": 1.0757, "step": 11698 }, { "epoch": 0.45774317239220597, "grad_norm": 0.0, "learning_rate": 1.18439795065205e-05, "loss": 1.0029, "step": 11699 }, { "epoch": 0.4577822990844354, "grad_norm": 0.0, "learning_rate": 1.1842733991507687e-05, "loss": 1.1169, "step": 11700 }, { "epoch": 0.45782142577666485, "grad_norm": 0.0, "learning_rate": 1.1841488446902876e-05, "loss": 0.9718, "step": 11701 }, { "epoch": 0.4578605524688943, "grad_norm": 0.0, "learning_rate": 1.1840242872726066e-05, "loss": 1.1167, "step": 11702 }, { "epoch": 0.45789967916112373, "grad_norm": 0.0, "learning_rate": 1.183899726899726e-05, "loss": 1.1106, "step": 11703 }, { "epoch": 0.45793880585335317, "grad_norm": 0.0, "learning_rate": 1.1837751635736467e-05, "loss": 1.0046, "step": 11704 }, { "epoch": 0.4579779325455826, "grad_norm": 0.0, "learning_rate": 1.1836505972963681e-05, "loss": 1.049, "step": 11705 }, { "epoch": 0.45801705923781205, "grad_norm": 0.0, "learning_rate": 1.1835260280698915e-05, "loss": 0.9757, "step": 11706 }, { "epoch": 0.4580561859300415, "grad_norm": 0.0, "learning_rate": 1.1834014558962165e-05, "loss": 0.9051, "step": 11707 }, { "epoch": 0.45809531262227093, "grad_norm": 0.0, "learning_rate": 1.183276880777344e-05, "loss": 1.0654, "step": 11708 }, { "epoch": 0.4581344393145004, "grad_norm": 0.0, "learning_rate": 1.1831523027152745e-05, "loss": 1.119, "step": 11709 }, { "epoch": 0.4581735660067298, "grad_norm": 0.0, "learning_rate": 1.1830277217120085e-05, "loss": 0.9816, "step": 11710 }, { "epoch": 0.45821269269895926, "grad_norm": 0.0, "learning_rate": 1.1829031377695465e-05, "loss": 1.0629, "step": 11711 }, { "epoch": 0.45825181939118864, "grad_norm": 0.0, "learning_rate": 1.1827785508898895e-05, "loss": 1.1956, "step": 11712 }, { "epoch": 0.4582909460834181, "grad_norm": 0.0, "learning_rate": 1.182653961075038e-05, "loss": 1.1583, "step": 11713 }, { "epoch": 0.4583300727756475, "grad_norm": 0.0, "learning_rate": 1.1825293683269928e-05, "loss": 1.1096, "step": 11714 }, { "epoch": 0.45836919946787696, "grad_norm": 0.0, "learning_rate": 1.1824047726477546e-05, "loss": 0.9897, "step": 11715 }, { "epoch": 0.4584083261601064, "grad_norm": 0.0, "learning_rate": 1.182280174039324e-05, "loss": 0.9641, "step": 11716 }, { "epoch": 0.45844745285233585, "grad_norm": 0.0, "learning_rate": 1.1821555725037025e-05, "loss": 1.1092, "step": 11717 }, { "epoch": 0.4584865795445653, "grad_norm": 0.0, "learning_rate": 1.182030968042891e-05, "loss": 0.9236, "step": 11718 }, { "epoch": 0.45852570623679473, "grad_norm": 0.0, "learning_rate": 1.1819063606588898e-05, "loss": 1.1271, "step": 11719 }, { "epoch": 0.45856483292902417, "grad_norm": 0.0, "learning_rate": 1.1817817503537008e-05, "loss": 1.0781, "step": 11720 }, { "epoch": 0.4586039596212536, "grad_norm": 0.0, "learning_rate": 1.1816571371293244e-05, "loss": 1.044, "step": 11721 }, { "epoch": 0.45864308631348305, "grad_norm": 0.0, "learning_rate": 1.1815325209877622e-05, "loss": 1.0512, "step": 11722 }, { "epoch": 0.4586822130057125, "grad_norm": 0.0, "learning_rate": 1.1814079019310149e-05, "loss": 1.0509, "step": 11723 }, { "epoch": 0.45872133969794193, "grad_norm": 0.0, "learning_rate": 1.1812832799610843e-05, "loss": 1.1063, "step": 11724 }, { "epoch": 0.4587604663901714, "grad_norm": 0.0, "learning_rate": 1.1811586550799712e-05, "loss": 0.9886, "step": 11725 }, { "epoch": 0.4587995930824008, "grad_norm": 0.0, "learning_rate": 1.1810340272896772e-05, "loss": 1.0845, "step": 11726 }, { "epoch": 0.45883871977463025, "grad_norm": 0.0, "learning_rate": 1.1809093965922035e-05, "loss": 1.1044, "step": 11727 }, { "epoch": 0.4588778464668597, "grad_norm": 0.0, "learning_rate": 1.1807847629895517e-05, "loss": 1.058, "step": 11728 }, { "epoch": 0.45891697315908914, "grad_norm": 0.0, "learning_rate": 1.1806601264837232e-05, "loss": 1.1241, "step": 11729 }, { "epoch": 0.4589560998513186, "grad_norm": 0.0, "learning_rate": 1.180535487076719e-05, "loss": 1.0747, "step": 11730 }, { "epoch": 0.458995226543548, "grad_norm": 0.0, "learning_rate": 1.1804108447705415e-05, "loss": 1.0353, "step": 11731 }, { "epoch": 0.45903435323577746, "grad_norm": 0.0, "learning_rate": 1.1802861995671917e-05, "loss": 1.0538, "step": 11732 }, { "epoch": 0.4590734799280069, "grad_norm": 0.0, "learning_rate": 1.1801615514686718e-05, "loss": 0.9315, "step": 11733 }, { "epoch": 0.45911260662023634, "grad_norm": 0.0, "learning_rate": 1.1800369004769827e-05, "loss": 1.1185, "step": 11734 }, { "epoch": 0.4591517333124658, "grad_norm": 0.0, "learning_rate": 1.1799122465941268e-05, "loss": 1.0149, "step": 11735 }, { "epoch": 0.4591908600046952, "grad_norm": 0.0, "learning_rate": 1.1797875898221058e-05, "loss": 0.9973, "step": 11736 }, { "epoch": 0.45922998669692466, "grad_norm": 0.0, "learning_rate": 1.1796629301629213e-05, "loss": 1.1281, "step": 11737 }, { "epoch": 0.4592691133891541, "grad_norm": 0.0, "learning_rate": 1.1795382676185751e-05, "loss": 0.9338, "step": 11738 }, { "epoch": 0.45930824008138355, "grad_norm": 0.0, "learning_rate": 1.1794136021910694e-05, "loss": 1.0877, "step": 11739 }, { "epoch": 0.459347366773613, "grad_norm": 0.0, "learning_rate": 1.179288933882406e-05, "loss": 1.125, "step": 11740 }, { "epoch": 0.45938649346584237, "grad_norm": 0.0, "learning_rate": 1.1791642626945872e-05, "loss": 0.9859, "step": 11741 }, { "epoch": 0.4594256201580718, "grad_norm": 0.0, "learning_rate": 1.1790395886296146e-05, "loss": 1.0022, "step": 11742 }, { "epoch": 0.45946474685030125, "grad_norm": 0.0, "learning_rate": 1.1789149116894907e-05, "loss": 1.1028, "step": 11743 }, { "epoch": 0.4595038735425307, "grad_norm": 0.0, "learning_rate": 1.1787902318762174e-05, "loss": 1.0426, "step": 11744 }, { "epoch": 0.45954300023476014, "grad_norm": 0.0, "learning_rate": 1.1786655491917971e-05, "loss": 1.0726, "step": 11745 }, { "epoch": 0.4595821269269896, "grad_norm": 0.0, "learning_rate": 1.178540863638232e-05, "loss": 0.8405, "step": 11746 }, { "epoch": 0.459621253619219, "grad_norm": 0.0, "learning_rate": 1.1784161752175243e-05, "loss": 1.044, "step": 11747 }, { "epoch": 0.45966038031144846, "grad_norm": 0.0, "learning_rate": 1.1782914839316764e-05, "loss": 1.1977, "step": 11748 }, { "epoch": 0.4596995070036779, "grad_norm": 0.0, "learning_rate": 1.178166789782691e-05, "loss": 1.0204, "step": 11749 }, { "epoch": 0.45973863369590734, "grad_norm": 0.0, "learning_rate": 1.1780420927725695e-05, "loss": 1.0923, "step": 11750 }, { "epoch": 0.4597777603881368, "grad_norm": 0.0, "learning_rate": 1.1779173929033157e-05, "loss": 1.0213, "step": 11751 }, { "epoch": 0.4598168870803662, "grad_norm": 0.0, "learning_rate": 1.177792690176931e-05, "loss": 1.0481, "step": 11752 }, { "epoch": 0.45985601377259566, "grad_norm": 0.0, "learning_rate": 1.177667984595419e-05, "loss": 0.9439, "step": 11753 }, { "epoch": 0.4598951404648251, "grad_norm": 0.0, "learning_rate": 1.1775432761607814e-05, "loss": 1.1952, "step": 11754 }, { "epoch": 0.45993426715705454, "grad_norm": 0.0, "learning_rate": 1.1774185648750216e-05, "loss": 0.9629, "step": 11755 }, { "epoch": 0.459973393849284, "grad_norm": 0.0, "learning_rate": 1.1772938507401417e-05, "loss": 1.1182, "step": 11756 }, { "epoch": 0.4600125205415134, "grad_norm": 0.0, "learning_rate": 1.1771691337581447e-05, "loss": 0.9771, "step": 11757 }, { "epoch": 0.46005164723374287, "grad_norm": 0.0, "learning_rate": 1.1770444139310337e-05, "loss": 1.1081, "step": 11758 }, { "epoch": 0.4600907739259723, "grad_norm": 0.0, "learning_rate": 1.176919691260811e-05, "loss": 0.9228, "step": 11759 }, { "epoch": 0.46012990061820175, "grad_norm": 0.0, "learning_rate": 1.1767949657494799e-05, "loss": 1.1091, "step": 11760 }, { "epoch": 0.4601690273104312, "grad_norm": 0.0, "learning_rate": 1.1766702373990431e-05, "loss": 1.1335, "step": 11761 }, { "epoch": 0.46020815400266063, "grad_norm": 0.0, "learning_rate": 1.1765455062115036e-05, "loss": 1.116, "step": 11762 }, { "epoch": 0.46024728069489007, "grad_norm": 0.0, "learning_rate": 1.1764207721888645e-05, "loss": 1.1086, "step": 11763 }, { "epoch": 0.4602864073871195, "grad_norm": 0.0, "learning_rate": 1.1762960353331293e-05, "loss": 1.0957, "step": 11764 }, { "epoch": 0.46032553407934895, "grad_norm": 0.0, "learning_rate": 1.1761712956463003e-05, "loss": 1.0488, "step": 11765 }, { "epoch": 0.4603646607715784, "grad_norm": 0.0, "learning_rate": 1.1760465531303808e-05, "loss": 1.0783, "step": 11766 }, { "epoch": 0.46040378746380783, "grad_norm": 0.0, "learning_rate": 1.1759218077873746e-05, "loss": 1.0706, "step": 11767 }, { "epoch": 0.4604429141560373, "grad_norm": 0.0, "learning_rate": 1.1757970596192845e-05, "loss": 1.0443, "step": 11768 }, { "epoch": 0.46048204084826666, "grad_norm": 0.0, "learning_rate": 1.1756723086281141e-05, "loss": 1.0094, "step": 11769 }, { "epoch": 0.4605211675404961, "grad_norm": 0.0, "learning_rate": 1.1755475548158662e-05, "loss": 1.0422, "step": 11770 }, { "epoch": 0.46056029423272554, "grad_norm": 0.0, "learning_rate": 1.175422798184545e-05, "loss": 1.0765, "step": 11771 }, { "epoch": 0.460599420924955, "grad_norm": 0.0, "learning_rate": 1.175298038736153e-05, "loss": 1.011, "step": 11772 }, { "epoch": 0.4606385476171844, "grad_norm": 0.0, "learning_rate": 1.1751732764726944e-05, "loss": 1.026, "step": 11773 }, { "epoch": 0.46067767430941386, "grad_norm": 0.0, "learning_rate": 1.1750485113961725e-05, "loss": 1.0991, "step": 11774 }, { "epoch": 0.4607168010016433, "grad_norm": 0.0, "learning_rate": 1.1749237435085909e-05, "loss": 1.0179, "step": 11775 }, { "epoch": 0.46075592769387275, "grad_norm": 0.0, "learning_rate": 1.1747989728119532e-05, "loss": 1.0591, "step": 11776 }, { "epoch": 0.4607950543861022, "grad_norm": 0.0, "learning_rate": 1.1746741993082628e-05, "loss": 0.9759, "step": 11777 }, { "epoch": 0.46083418107833163, "grad_norm": 0.0, "learning_rate": 1.1745494229995237e-05, "loss": 0.8959, "step": 11778 }, { "epoch": 0.46087330777056107, "grad_norm": 0.0, "learning_rate": 1.1744246438877395e-05, "loss": 1.1124, "step": 11779 }, { "epoch": 0.4609124344627905, "grad_norm": 0.0, "learning_rate": 1.1742998619749144e-05, "loss": 1.0185, "step": 11780 }, { "epoch": 0.46095156115501995, "grad_norm": 0.0, "learning_rate": 1.1741750772630518e-05, "loss": 1.0821, "step": 11781 }, { "epoch": 0.4609906878472494, "grad_norm": 0.0, "learning_rate": 1.1740502897541557e-05, "loss": 0.963, "step": 11782 }, { "epoch": 0.46102981453947883, "grad_norm": 0.0, "learning_rate": 1.1739254994502299e-05, "loss": 1.1508, "step": 11783 }, { "epoch": 0.4610689412317083, "grad_norm": 0.0, "learning_rate": 1.1738007063532788e-05, "loss": 0.9855, "step": 11784 }, { "epoch": 0.4611080679239377, "grad_norm": 0.0, "learning_rate": 1.1736759104653062e-05, "loss": 1.0148, "step": 11785 }, { "epoch": 0.46114719461616716, "grad_norm": 0.0, "learning_rate": 1.1735511117883156e-05, "loss": 1.0475, "step": 11786 }, { "epoch": 0.4611863213083966, "grad_norm": 0.0, "learning_rate": 1.173426310324312e-05, "loss": 1.0953, "step": 11787 }, { "epoch": 0.46122544800062604, "grad_norm": 0.0, "learning_rate": 1.1733015060752993e-05, "loss": 1.1759, "step": 11788 }, { "epoch": 0.4612645746928555, "grad_norm": 0.0, "learning_rate": 1.1731766990432812e-05, "loss": 1.0547, "step": 11789 }, { "epoch": 0.4613037013850849, "grad_norm": 0.0, "learning_rate": 1.1730518892302625e-05, "loss": 1.109, "step": 11790 }, { "epoch": 0.46134282807731436, "grad_norm": 0.0, "learning_rate": 1.1729270766382474e-05, "loss": 1.1371, "step": 11791 }, { "epoch": 0.4613819547695438, "grad_norm": 0.0, "learning_rate": 1.1728022612692397e-05, "loss": 1.0878, "step": 11792 }, { "epoch": 0.46142108146177324, "grad_norm": 0.0, "learning_rate": 1.1726774431252447e-05, "loss": 1.1953, "step": 11793 }, { "epoch": 0.4614602081540027, "grad_norm": 0.0, "learning_rate": 1.1725526222082663e-05, "loss": 0.9344, "step": 11794 }, { "epoch": 0.4614993348462321, "grad_norm": 0.0, "learning_rate": 1.1724277985203087e-05, "loss": 1.113, "step": 11795 }, { "epoch": 0.46153846153846156, "grad_norm": 0.0, "learning_rate": 1.1723029720633772e-05, "loss": 1.0627, "step": 11796 }, { "epoch": 0.461577588230691, "grad_norm": 0.0, "learning_rate": 1.1721781428394755e-05, "loss": 1.0675, "step": 11797 }, { "epoch": 0.4616167149229204, "grad_norm": 0.0, "learning_rate": 1.1720533108506085e-05, "loss": 1.0317, "step": 11798 }, { "epoch": 0.46165584161514983, "grad_norm": 0.0, "learning_rate": 1.1719284760987811e-05, "loss": 0.972, "step": 11799 }, { "epoch": 0.46169496830737927, "grad_norm": 0.0, "learning_rate": 1.171803638585998e-05, "loss": 1.1053, "step": 11800 }, { "epoch": 0.4617340949996087, "grad_norm": 0.0, "learning_rate": 1.1716787983142633e-05, "loss": 1.0543, "step": 11801 }, { "epoch": 0.46177322169183815, "grad_norm": 0.0, "learning_rate": 1.1715539552855823e-05, "loss": 0.9856, "step": 11802 }, { "epoch": 0.4618123483840676, "grad_norm": 0.0, "learning_rate": 1.1714291095019598e-05, "loss": 1.0536, "step": 11803 }, { "epoch": 0.46185147507629704, "grad_norm": 0.0, "learning_rate": 1.1713042609654008e-05, "loss": 0.9792, "step": 11804 }, { "epoch": 0.4618906017685265, "grad_norm": 0.0, "learning_rate": 1.1711794096779102e-05, "loss": 1.1485, "step": 11805 }, { "epoch": 0.4619297284607559, "grad_norm": 0.0, "learning_rate": 1.1710545556414922e-05, "loss": 0.9562, "step": 11806 }, { "epoch": 0.46196885515298536, "grad_norm": 0.0, "learning_rate": 1.1709296988581528e-05, "loss": 1.1322, "step": 11807 }, { "epoch": 0.4620079818452148, "grad_norm": 0.0, "learning_rate": 1.1708048393298965e-05, "loss": 1.0348, "step": 11808 }, { "epoch": 0.46204710853744424, "grad_norm": 0.0, "learning_rate": 1.1706799770587287e-05, "loss": 1.1039, "step": 11809 }, { "epoch": 0.4620862352296737, "grad_norm": 0.0, "learning_rate": 1.1705551120466541e-05, "loss": 1.1593, "step": 11810 }, { "epoch": 0.4621253619219031, "grad_norm": 0.0, "learning_rate": 1.1704302442956784e-05, "loss": 1.0051, "step": 11811 }, { "epoch": 0.46216448861413256, "grad_norm": 0.0, "learning_rate": 1.170305373807806e-05, "loss": 1.0981, "step": 11812 }, { "epoch": 0.462203615306362, "grad_norm": 0.0, "learning_rate": 1.1701805005850434e-05, "loss": 1.1074, "step": 11813 }, { "epoch": 0.46224274199859144, "grad_norm": 0.0, "learning_rate": 1.1700556246293948e-05, "loss": 1.1061, "step": 11814 }, { "epoch": 0.4622818686908209, "grad_norm": 0.0, "learning_rate": 1.1699307459428657e-05, "loss": 1.1001, "step": 11815 }, { "epoch": 0.4623209953830503, "grad_norm": 0.0, "learning_rate": 1.1698058645274623e-05, "loss": 1.1346, "step": 11816 }, { "epoch": 0.46236012207527977, "grad_norm": 0.0, "learning_rate": 1.1696809803851891e-05, "loss": 0.9414, "step": 11817 }, { "epoch": 0.4623992487675092, "grad_norm": 0.0, "learning_rate": 1.1695560935180523e-05, "loss": 1.1785, "step": 11818 }, { "epoch": 0.46243837545973865, "grad_norm": 0.0, "learning_rate": 1.1694312039280567e-05, "loss": 1.1566, "step": 11819 }, { "epoch": 0.4624775021519681, "grad_norm": 0.0, "learning_rate": 1.1693063116172087e-05, "loss": 0.986, "step": 11820 }, { "epoch": 0.46251662884419753, "grad_norm": 0.0, "learning_rate": 1.169181416587513e-05, "loss": 1.101, "step": 11821 }, { "epoch": 0.46255575553642697, "grad_norm": 0.0, "learning_rate": 1.1690565188409764e-05, "loss": 1.1465, "step": 11822 }, { "epoch": 0.4625948822286564, "grad_norm": 0.0, "learning_rate": 1.1689316183796035e-05, "loss": 0.9996, "step": 11823 }, { "epoch": 0.46263400892088585, "grad_norm": 0.0, "learning_rate": 1.1688067152054005e-05, "loss": 1.1744, "step": 11824 }, { "epoch": 0.4626731356131153, "grad_norm": 0.0, "learning_rate": 1.1686818093203733e-05, "loss": 1.0582, "step": 11825 }, { "epoch": 0.4627122623053447, "grad_norm": 0.0, "learning_rate": 1.1685569007265277e-05, "loss": 1.1245, "step": 11826 }, { "epoch": 0.4627513889975741, "grad_norm": 0.0, "learning_rate": 1.1684319894258693e-05, "loss": 0.955, "step": 11827 }, { "epoch": 0.46279051568980356, "grad_norm": 0.0, "learning_rate": 1.1683070754204043e-05, "loss": 0.9966, "step": 11828 }, { "epoch": 0.462829642382033, "grad_norm": 0.0, "learning_rate": 1.1681821587121385e-05, "loss": 1.0824, "step": 11829 }, { "epoch": 0.46286876907426244, "grad_norm": 0.0, "learning_rate": 1.168057239303078e-05, "loss": 1.231, "step": 11830 }, { "epoch": 0.4629078957664919, "grad_norm": 0.0, "learning_rate": 1.1679323171952287e-05, "loss": 1.1329, "step": 11831 }, { "epoch": 0.4629470224587213, "grad_norm": 0.0, "learning_rate": 1.167807392390597e-05, "loss": 1.034, "step": 11832 }, { "epoch": 0.46298614915095077, "grad_norm": 0.0, "learning_rate": 1.167682464891189e-05, "loss": 1.1674, "step": 11833 }, { "epoch": 0.4630252758431802, "grad_norm": 0.0, "learning_rate": 1.1675575346990105e-05, "loss": 1.1732, "step": 11834 }, { "epoch": 0.46306440253540965, "grad_norm": 0.0, "learning_rate": 1.167432601816068e-05, "loss": 1.0161, "step": 11835 }, { "epoch": 0.4631035292276391, "grad_norm": 0.0, "learning_rate": 1.1673076662443678e-05, "loss": 1.009, "step": 11836 }, { "epoch": 0.46314265591986853, "grad_norm": 0.0, "learning_rate": 1.167182727985916e-05, "loss": 0.9124, "step": 11837 }, { "epoch": 0.46318178261209797, "grad_norm": 0.0, "learning_rate": 1.1670577870427191e-05, "loss": 1.0091, "step": 11838 }, { "epoch": 0.4632209093043274, "grad_norm": 0.0, "learning_rate": 1.1669328434167835e-05, "loss": 1.0956, "step": 11839 }, { "epoch": 0.46326003599655685, "grad_norm": 0.0, "learning_rate": 1.1668078971101155e-05, "loss": 1.2191, "step": 11840 }, { "epoch": 0.4632991626887863, "grad_norm": 0.0, "learning_rate": 1.166682948124722e-05, "loss": 1.187, "step": 11841 }, { "epoch": 0.46333828938101573, "grad_norm": 0.0, "learning_rate": 1.166557996462609e-05, "loss": 1.2146, "step": 11842 }, { "epoch": 0.4633774160732452, "grad_norm": 0.0, "learning_rate": 1.1664330421257835e-05, "loss": 1.0748, "step": 11843 }, { "epoch": 0.4634165427654746, "grad_norm": 0.0, "learning_rate": 1.1663080851162515e-05, "loss": 1.0368, "step": 11844 }, { "epoch": 0.46345566945770406, "grad_norm": 0.0, "learning_rate": 1.1661831254360203e-05, "loss": 1.0478, "step": 11845 }, { "epoch": 0.4634947961499335, "grad_norm": 0.0, "learning_rate": 1.1660581630870962e-05, "loss": 1.0518, "step": 11846 }, { "epoch": 0.46353392284216294, "grad_norm": 0.0, "learning_rate": 1.1659331980714863e-05, "loss": 0.948, "step": 11847 }, { "epoch": 0.4635730495343924, "grad_norm": 0.0, "learning_rate": 1.1658082303911969e-05, "loss": 0.9818, "step": 11848 }, { "epoch": 0.4636121762266218, "grad_norm": 0.0, "learning_rate": 1.1656832600482354e-05, "loss": 1.0379, "step": 11849 }, { "epoch": 0.46365130291885126, "grad_norm": 0.0, "learning_rate": 1.1655582870446081e-05, "loss": 0.9868, "step": 11850 }, { "epoch": 0.4636904296110807, "grad_norm": 0.0, "learning_rate": 1.1654333113823222e-05, "loss": 1.0886, "step": 11851 }, { "epoch": 0.46372955630331014, "grad_norm": 0.0, "learning_rate": 1.1653083330633848e-05, "loss": 0.9806, "step": 11852 }, { "epoch": 0.4637686829955396, "grad_norm": 0.0, "learning_rate": 1.1651833520898023e-05, "loss": 1.0051, "step": 11853 }, { "epoch": 0.463807809687769, "grad_norm": 0.0, "learning_rate": 1.1650583684635828e-05, "loss": 1.0712, "step": 11854 }, { "epoch": 0.4638469363799984, "grad_norm": 0.0, "learning_rate": 1.1649333821867321e-05, "loss": 1.0394, "step": 11855 }, { "epoch": 0.46388606307222785, "grad_norm": 0.0, "learning_rate": 1.1648083932612584e-05, "loss": 1.0132, "step": 11856 }, { "epoch": 0.4639251897644573, "grad_norm": 0.0, "learning_rate": 1.1646834016891682e-05, "loss": 1.0079, "step": 11857 }, { "epoch": 0.46396431645668673, "grad_norm": 0.0, "learning_rate": 1.164558407472469e-05, "loss": 1.0117, "step": 11858 }, { "epoch": 0.4640034431489162, "grad_norm": 0.0, "learning_rate": 1.1644334106131678e-05, "loss": 1.0501, "step": 11859 }, { "epoch": 0.4640425698411456, "grad_norm": 0.0, "learning_rate": 1.1643084111132723e-05, "loss": 1.1385, "step": 11860 }, { "epoch": 0.46408169653337505, "grad_norm": 0.0, "learning_rate": 1.1641834089747895e-05, "loss": 0.9388, "step": 11861 }, { "epoch": 0.4641208232256045, "grad_norm": 0.0, "learning_rate": 1.1640584041997269e-05, "loss": 1.1105, "step": 11862 }, { "epoch": 0.46415994991783394, "grad_norm": 0.0, "learning_rate": 1.1639333967900921e-05, "loss": 1.0226, "step": 11863 }, { "epoch": 0.4641990766100634, "grad_norm": 0.0, "learning_rate": 1.163808386747892e-05, "loss": 1.0482, "step": 11864 }, { "epoch": 0.4642382033022928, "grad_norm": 0.0, "learning_rate": 1.1636833740751348e-05, "loss": 1.0814, "step": 11865 }, { "epoch": 0.46427732999452226, "grad_norm": 0.0, "learning_rate": 1.1635583587738276e-05, "loss": 1.1147, "step": 11866 }, { "epoch": 0.4643164566867517, "grad_norm": 0.0, "learning_rate": 1.1634333408459782e-05, "loss": 0.9775, "step": 11867 }, { "epoch": 0.46435558337898114, "grad_norm": 0.0, "learning_rate": 1.1633083202935937e-05, "loss": 1.1097, "step": 11868 }, { "epoch": 0.4643947100712106, "grad_norm": 0.0, "learning_rate": 1.1631832971186827e-05, "loss": 1.1918, "step": 11869 }, { "epoch": 0.46443383676344, "grad_norm": 0.0, "learning_rate": 1.1630582713232524e-05, "loss": 1.0894, "step": 11870 }, { "epoch": 0.46447296345566946, "grad_norm": 0.0, "learning_rate": 1.1629332429093103e-05, "loss": 1.0604, "step": 11871 }, { "epoch": 0.4645120901478989, "grad_norm": 0.0, "learning_rate": 1.1628082118788648e-05, "loss": 1.076, "step": 11872 }, { "epoch": 0.46455121684012834, "grad_norm": 0.0, "learning_rate": 1.1626831782339232e-05, "loss": 1.0044, "step": 11873 }, { "epoch": 0.4645903435323578, "grad_norm": 0.0, "learning_rate": 1.1625581419764938e-05, "loss": 1.2274, "step": 11874 }, { "epoch": 0.4646294702245872, "grad_norm": 0.0, "learning_rate": 1.1624331031085843e-05, "loss": 0.9863, "step": 11875 }, { "epoch": 0.46466859691681667, "grad_norm": 0.0, "learning_rate": 1.1623080616322024e-05, "loss": 1.2013, "step": 11876 }, { "epoch": 0.4647077236090461, "grad_norm": 0.0, "learning_rate": 1.162183017549357e-05, "loss": 0.9312, "step": 11877 }, { "epoch": 0.46474685030127555, "grad_norm": 0.0, "learning_rate": 1.1620579708620551e-05, "loss": 0.9976, "step": 11878 }, { "epoch": 0.464785976993505, "grad_norm": 0.0, "learning_rate": 1.1619329215723052e-05, "loss": 1.1223, "step": 11879 }, { "epoch": 0.46482510368573443, "grad_norm": 0.0, "learning_rate": 1.1618078696821157e-05, "loss": 1.0848, "step": 11880 }, { "epoch": 0.46486423037796387, "grad_norm": 0.0, "learning_rate": 1.1616828151934944e-05, "loss": 0.9857, "step": 11881 }, { "epoch": 0.4649033570701933, "grad_norm": 0.0, "learning_rate": 1.16155775810845e-05, "loss": 1.0494, "step": 11882 }, { "epoch": 0.4649424837624227, "grad_norm": 0.0, "learning_rate": 1.1614326984289902e-05, "loss": 1.1032, "step": 11883 }, { "epoch": 0.46498161045465214, "grad_norm": 0.0, "learning_rate": 1.1613076361571236e-05, "loss": 1.0693, "step": 11884 }, { "epoch": 0.4650207371468816, "grad_norm": 0.0, "learning_rate": 1.1611825712948588e-05, "loss": 0.99, "step": 11885 }, { "epoch": 0.465059863839111, "grad_norm": 0.0, "learning_rate": 1.1610575038442033e-05, "loss": 1.1165, "step": 11886 }, { "epoch": 0.46509899053134046, "grad_norm": 0.0, "learning_rate": 1.1609324338071666e-05, "loss": 1.171, "step": 11887 }, { "epoch": 0.4651381172235699, "grad_norm": 0.0, "learning_rate": 1.1608073611857562e-05, "loss": 1.0317, "step": 11888 }, { "epoch": 0.46517724391579934, "grad_norm": 0.0, "learning_rate": 1.1606822859819815e-05, "loss": 0.9563, "step": 11889 }, { "epoch": 0.4652163706080288, "grad_norm": 0.0, "learning_rate": 1.1605572081978504e-05, "loss": 1.121, "step": 11890 }, { "epoch": 0.4652554973002582, "grad_norm": 0.0, "learning_rate": 1.1604321278353718e-05, "loss": 1.0925, "step": 11891 }, { "epoch": 0.46529462399248767, "grad_norm": 0.0, "learning_rate": 1.1603070448965543e-05, "loss": 1.0268, "step": 11892 }, { "epoch": 0.4653337506847171, "grad_norm": 0.0, "learning_rate": 1.1601819593834066e-05, "loss": 1.1469, "step": 11893 }, { "epoch": 0.46537287737694655, "grad_norm": 0.0, "learning_rate": 1.1600568712979371e-05, "loss": 1.059, "step": 11894 }, { "epoch": 0.465412004069176, "grad_norm": 0.0, "learning_rate": 1.1599317806421548e-05, "loss": 0.9977, "step": 11895 }, { "epoch": 0.46545113076140543, "grad_norm": 0.0, "learning_rate": 1.1598066874180689e-05, "loss": 0.9745, "step": 11896 }, { "epoch": 0.46549025745363487, "grad_norm": 0.0, "learning_rate": 1.1596815916276876e-05, "loss": 0.9919, "step": 11897 }, { "epoch": 0.4655293841458643, "grad_norm": 0.0, "learning_rate": 1.15955649327302e-05, "loss": 1.1173, "step": 11898 }, { "epoch": 0.46556851083809375, "grad_norm": 0.0, "learning_rate": 1.159431392356075e-05, "loss": 1.065, "step": 11899 }, { "epoch": 0.4656076375303232, "grad_norm": 0.0, "learning_rate": 1.1593062888788617e-05, "loss": 0.9721, "step": 11900 }, { "epoch": 0.46564676422255263, "grad_norm": 0.0, "learning_rate": 1.1591811828433892e-05, "loss": 1.172, "step": 11901 }, { "epoch": 0.4656858909147821, "grad_norm": 0.0, "learning_rate": 1.1590560742516662e-05, "loss": 1.0184, "step": 11902 }, { "epoch": 0.4657250176070115, "grad_norm": 0.0, "learning_rate": 1.1589309631057019e-05, "loss": 1.0962, "step": 11903 }, { "epoch": 0.46576414429924096, "grad_norm": 0.0, "learning_rate": 1.1588058494075054e-05, "loss": 1.106, "step": 11904 }, { "epoch": 0.4658032709914704, "grad_norm": 0.0, "learning_rate": 1.1586807331590864e-05, "loss": 0.9529, "step": 11905 }, { "epoch": 0.46584239768369984, "grad_norm": 0.0, "learning_rate": 1.1585556143624532e-05, "loss": 1.078, "step": 11906 }, { "epoch": 0.4658815243759293, "grad_norm": 0.0, "learning_rate": 1.1584304930196158e-05, "loss": 1.0146, "step": 11907 }, { "epoch": 0.4659206510681587, "grad_norm": 0.0, "learning_rate": 1.1583053691325829e-05, "loss": 1.0073, "step": 11908 }, { "epoch": 0.46595977776038816, "grad_norm": 0.0, "learning_rate": 1.1581802427033644e-05, "loss": 1.0214, "step": 11909 }, { "epoch": 0.4659989044526176, "grad_norm": 0.0, "learning_rate": 1.1580551137339696e-05, "loss": 0.9201, "step": 11910 }, { "epoch": 0.466038031144847, "grad_norm": 0.0, "learning_rate": 1.1579299822264073e-05, "loss": 1.143, "step": 11911 }, { "epoch": 0.46607715783707643, "grad_norm": 0.0, "learning_rate": 1.1578048481826875e-05, "loss": 1.0067, "step": 11912 }, { "epoch": 0.46611628452930587, "grad_norm": 0.0, "learning_rate": 1.1576797116048195e-05, "loss": 1.1806, "step": 11913 }, { "epoch": 0.4661554112215353, "grad_norm": 0.0, "learning_rate": 1.1575545724948131e-05, "loss": 1.1686, "step": 11914 }, { "epoch": 0.46619453791376475, "grad_norm": 0.0, "learning_rate": 1.1574294308546776e-05, "loss": 0.9871, "step": 11915 }, { "epoch": 0.4662336646059942, "grad_norm": 0.0, "learning_rate": 1.1573042866864229e-05, "loss": 1.0537, "step": 11916 }, { "epoch": 0.46627279129822363, "grad_norm": 0.0, "learning_rate": 1.1571791399920582e-05, "loss": 0.9816, "step": 11917 }, { "epoch": 0.4663119179904531, "grad_norm": 0.0, "learning_rate": 1.1570539907735937e-05, "loss": 1.0626, "step": 11918 }, { "epoch": 0.4663510446826825, "grad_norm": 0.0, "learning_rate": 1.1569288390330389e-05, "loss": 1.1676, "step": 11919 }, { "epoch": 0.46639017137491195, "grad_norm": 0.0, "learning_rate": 1.1568036847724033e-05, "loss": 1.1257, "step": 11920 }, { "epoch": 0.4664292980671414, "grad_norm": 0.0, "learning_rate": 1.1566785279936972e-05, "loss": 1.134, "step": 11921 }, { "epoch": 0.46646842475937084, "grad_norm": 0.0, "learning_rate": 1.1565533686989302e-05, "loss": 0.9774, "step": 11922 }, { "epoch": 0.4665075514516003, "grad_norm": 0.0, "learning_rate": 1.1564282068901125e-05, "loss": 1.0604, "step": 11923 }, { "epoch": 0.4665466781438297, "grad_norm": 0.0, "learning_rate": 1.1563030425692536e-05, "loss": 1.1283, "step": 11924 }, { "epoch": 0.46658580483605916, "grad_norm": 0.0, "learning_rate": 1.1561778757383639e-05, "loss": 1.0659, "step": 11925 }, { "epoch": 0.4666249315282886, "grad_norm": 0.0, "learning_rate": 1.156052706399453e-05, "loss": 1.1189, "step": 11926 }, { "epoch": 0.46666405822051804, "grad_norm": 0.0, "learning_rate": 1.1559275345545312e-05, "loss": 1.0831, "step": 11927 }, { "epoch": 0.4667031849127475, "grad_norm": 0.0, "learning_rate": 1.1558023602056085e-05, "loss": 1.0811, "step": 11928 }, { "epoch": 0.4667423116049769, "grad_norm": 0.0, "learning_rate": 1.1556771833546954e-05, "loss": 0.9488, "step": 11929 }, { "epoch": 0.46678143829720636, "grad_norm": 0.0, "learning_rate": 1.1555520040038017e-05, "loss": 0.97, "step": 11930 }, { "epoch": 0.4668205649894358, "grad_norm": 0.0, "learning_rate": 1.1554268221549377e-05, "loss": 0.8829, "step": 11931 }, { "epoch": 0.46685969168166525, "grad_norm": 0.0, "learning_rate": 1.1553016378101137e-05, "loss": 1.078, "step": 11932 }, { "epoch": 0.4668988183738947, "grad_norm": 0.0, "learning_rate": 1.1551764509713399e-05, "loss": 1.144, "step": 11933 }, { "epoch": 0.4669379450661241, "grad_norm": 0.0, "learning_rate": 1.1550512616406269e-05, "loss": 1.0949, "step": 11934 }, { "epoch": 0.46697707175835357, "grad_norm": 0.0, "learning_rate": 1.1549260698199846e-05, "loss": 1.1515, "step": 11935 }, { "epoch": 0.467016198450583, "grad_norm": 0.0, "learning_rate": 1.1548008755114242e-05, "loss": 1.0065, "step": 11936 }, { "epoch": 0.46705532514281245, "grad_norm": 0.0, "learning_rate": 1.1546756787169553e-05, "loss": 1.1184, "step": 11937 }, { "epoch": 0.4670944518350419, "grad_norm": 0.0, "learning_rate": 1.1545504794385893e-05, "loss": 0.9786, "step": 11938 }, { "epoch": 0.46713357852727133, "grad_norm": 0.0, "learning_rate": 1.154425277678336e-05, "loss": 1.1083, "step": 11939 }, { "epoch": 0.4671727052195007, "grad_norm": 0.0, "learning_rate": 1.154300073438206e-05, "loss": 1.0342, "step": 11940 }, { "epoch": 0.46721183191173016, "grad_norm": 0.0, "learning_rate": 1.1541748667202106e-05, "loss": 1.0582, "step": 11941 }, { "epoch": 0.4672509586039596, "grad_norm": 0.0, "learning_rate": 1.1540496575263598e-05, "loss": 1.0469, "step": 11942 }, { "epoch": 0.46729008529618904, "grad_norm": 0.0, "learning_rate": 1.1539244458586646e-05, "loss": 1.0242, "step": 11943 }, { "epoch": 0.4673292119884185, "grad_norm": 0.0, "learning_rate": 1.1537992317191355e-05, "loss": 1.0193, "step": 11944 }, { "epoch": 0.4673683386806479, "grad_norm": 0.0, "learning_rate": 1.1536740151097839e-05, "loss": 1.1216, "step": 11945 }, { "epoch": 0.46740746537287736, "grad_norm": 0.0, "learning_rate": 1.1535487960326197e-05, "loss": 1.1371, "step": 11946 }, { "epoch": 0.4674465920651068, "grad_norm": 0.0, "learning_rate": 1.1534235744896547e-05, "loss": 1.0314, "step": 11947 }, { "epoch": 0.46748571875733624, "grad_norm": 0.0, "learning_rate": 1.1532983504828987e-05, "loss": 0.9639, "step": 11948 }, { "epoch": 0.4675248454495657, "grad_norm": 0.0, "learning_rate": 1.153173124014364e-05, "loss": 1.0944, "step": 11949 }, { "epoch": 0.4675639721417951, "grad_norm": 0.0, "learning_rate": 1.1530478950860607e-05, "loss": 0.9853, "step": 11950 }, { "epoch": 0.46760309883402457, "grad_norm": 0.0, "learning_rate": 1.1529226636999995e-05, "loss": 1.0977, "step": 11951 }, { "epoch": 0.467642225526254, "grad_norm": 0.0, "learning_rate": 1.1527974298581924e-05, "loss": 1.1288, "step": 11952 }, { "epoch": 0.46768135221848345, "grad_norm": 0.0, "learning_rate": 1.15267219356265e-05, "loss": 1.134, "step": 11953 }, { "epoch": 0.4677204789107129, "grad_norm": 0.0, "learning_rate": 1.1525469548153833e-05, "loss": 1.0096, "step": 11954 }, { "epoch": 0.46775960560294233, "grad_norm": 0.0, "learning_rate": 1.1524217136184037e-05, "loss": 1.1002, "step": 11955 }, { "epoch": 0.46779873229517177, "grad_norm": 0.0, "learning_rate": 1.1522964699737224e-05, "loss": 1.0753, "step": 11956 }, { "epoch": 0.4678378589874012, "grad_norm": 0.0, "learning_rate": 1.1521712238833507e-05, "loss": 1.0923, "step": 11957 }, { "epoch": 0.46787698567963065, "grad_norm": 0.0, "learning_rate": 1.1520459753492999e-05, "loss": 1.0464, "step": 11958 }, { "epoch": 0.4679161123718601, "grad_norm": 0.0, "learning_rate": 1.1519207243735813e-05, "loss": 1.1895, "step": 11959 }, { "epoch": 0.46795523906408953, "grad_norm": 0.0, "learning_rate": 1.1517954709582058e-05, "loss": 1.0929, "step": 11960 }, { "epoch": 0.467994365756319, "grad_norm": 0.0, "learning_rate": 1.1516702151051855e-05, "loss": 1.1155, "step": 11961 }, { "epoch": 0.4680334924485484, "grad_norm": 0.0, "learning_rate": 1.1515449568165316e-05, "loss": 0.9372, "step": 11962 }, { "epoch": 0.46807261914077786, "grad_norm": 0.0, "learning_rate": 1.1514196960942556e-05, "loss": 1.1287, "step": 11963 }, { "epoch": 0.4681117458330073, "grad_norm": 0.0, "learning_rate": 1.1512944329403693e-05, "loss": 1.0678, "step": 11964 }, { "epoch": 0.46815087252523674, "grad_norm": 0.0, "learning_rate": 1.1511691673568835e-05, "loss": 0.9649, "step": 11965 }, { "epoch": 0.4681899992174662, "grad_norm": 0.0, "learning_rate": 1.1510438993458106e-05, "loss": 1.1443, "step": 11966 }, { "epoch": 0.4682291259096956, "grad_norm": 0.0, "learning_rate": 1.1509186289091621e-05, "loss": 1.1163, "step": 11967 }, { "epoch": 0.468268252601925, "grad_norm": 0.0, "learning_rate": 1.1507933560489491e-05, "loss": 1.0692, "step": 11968 }, { "epoch": 0.46830737929415445, "grad_norm": 0.0, "learning_rate": 1.1506680807671841e-05, "loss": 1.019, "step": 11969 }, { "epoch": 0.4683465059863839, "grad_norm": 0.0, "learning_rate": 1.1505428030658784e-05, "loss": 1.1066, "step": 11970 }, { "epoch": 0.46838563267861333, "grad_norm": 0.0, "learning_rate": 1.1504175229470438e-05, "loss": 1.1129, "step": 11971 }, { "epoch": 0.46842475937084277, "grad_norm": 0.0, "learning_rate": 1.1502922404126924e-05, "loss": 0.9771, "step": 11972 }, { "epoch": 0.4684638860630722, "grad_norm": 0.0, "learning_rate": 1.1501669554648359e-05, "loss": 1.0567, "step": 11973 }, { "epoch": 0.46850301275530165, "grad_norm": 0.0, "learning_rate": 1.1500416681054864e-05, "loss": 1.1606, "step": 11974 }, { "epoch": 0.4685421394475311, "grad_norm": 0.0, "learning_rate": 1.1499163783366553e-05, "loss": 0.9371, "step": 11975 }, { "epoch": 0.46858126613976053, "grad_norm": 0.0, "learning_rate": 1.1497910861603557e-05, "loss": 1.0605, "step": 11976 }, { "epoch": 0.46862039283199, "grad_norm": 0.0, "learning_rate": 1.1496657915785983e-05, "loss": 1.0634, "step": 11977 }, { "epoch": 0.4686595195242194, "grad_norm": 0.0, "learning_rate": 1.1495404945933962e-05, "loss": 1.0698, "step": 11978 }, { "epoch": 0.46869864621644886, "grad_norm": 0.0, "learning_rate": 1.1494151952067611e-05, "loss": 1.0931, "step": 11979 }, { "epoch": 0.4687377729086783, "grad_norm": 0.0, "learning_rate": 1.1492898934207052e-05, "loss": 1.0975, "step": 11980 }, { "epoch": 0.46877689960090774, "grad_norm": 0.0, "learning_rate": 1.1491645892372405e-05, "loss": 1.1495, "step": 11981 }, { "epoch": 0.4688160262931372, "grad_norm": 0.0, "learning_rate": 1.1490392826583794e-05, "loss": 1.0168, "step": 11982 }, { "epoch": 0.4688551529853666, "grad_norm": 0.0, "learning_rate": 1.1489139736861344e-05, "loss": 1.0421, "step": 11983 }, { "epoch": 0.46889427967759606, "grad_norm": 0.0, "learning_rate": 1.1487886623225174e-05, "loss": 1.1392, "step": 11984 }, { "epoch": 0.4689334063698255, "grad_norm": 0.0, "learning_rate": 1.148663348569541e-05, "loss": 1.1541, "step": 11985 }, { "epoch": 0.46897253306205494, "grad_norm": 0.0, "learning_rate": 1.1485380324292175e-05, "loss": 1.1152, "step": 11986 }, { "epoch": 0.4690116597542844, "grad_norm": 0.0, "learning_rate": 1.1484127139035594e-05, "loss": 1.1326, "step": 11987 }, { "epoch": 0.4690507864465138, "grad_norm": 0.0, "learning_rate": 1.1482873929945793e-05, "loss": 1.1362, "step": 11988 }, { "epoch": 0.46908991313874326, "grad_norm": 0.0, "learning_rate": 1.148162069704289e-05, "loss": 1.0964, "step": 11989 }, { "epoch": 0.4691290398309727, "grad_norm": 0.0, "learning_rate": 1.1480367440347017e-05, "loss": 1.1969, "step": 11990 }, { "epoch": 0.46916816652320215, "grad_norm": 0.0, "learning_rate": 1.1479114159878297e-05, "loss": 1.0932, "step": 11991 }, { "epoch": 0.4692072932154316, "grad_norm": 0.0, "learning_rate": 1.1477860855656862e-05, "loss": 0.9514, "step": 11992 }, { "epoch": 0.469246419907661, "grad_norm": 0.0, "learning_rate": 1.1476607527702828e-05, "loss": 0.9854, "step": 11993 }, { "epoch": 0.46928554659989047, "grad_norm": 0.0, "learning_rate": 1.147535417603633e-05, "loss": 1.2301, "step": 11994 }, { "epoch": 0.4693246732921199, "grad_norm": 0.0, "learning_rate": 1.147410080067749e-05, "loss": 1.2125, "step": 11995 }, { "epoch": 0.46936379998434935, "grad_norm": 0.0, "learning_rate": 1.147284740164644e-05, "loss": 1.0749, "step": 11996 }, { "epoch": 0.46940292667657874, "grad_norm": 0.0, "learning_rate": 1.1471593978963309e-05, "loss": 1.0626, "step": 11997 }, { "epoch": 0.4694420533688082, "grad_norm": 0.0, "learning_rate": 1.147034053264822e-05, "loss": 1.0165, "step": 11998 }, { "epoch": 0.4694811800610376, "grad_norm": 0.0, "learning_rate": 1.1469087062721305e-05, "loss": 1.143, "step": 11999 }, { "epoch": 0.46952030675326706, "grad_norm": 0.0, "learning_rate": 1.146783356920269e-05, "loss": 1.0022, "step": 12000 }, { "epoch": 0.4695594334454965, "grad_norm": 0.0, "learning_rate": 1.1466580052112511e-05, "loss": 1.0602, "step": 12001 }, { "epoch": 0.46959856013772594, "grad_norm": 0.0, "learning_rate": 1.146532651147089e-05, "loss": 1.0598, "step": 12002 }, { "epoch": 0.4696376868299554, "grad_norm": 0.0, "learning_rate": 1.1464072947297966e-05, "loss": 0.9972, "step": 12003 }, { "epoch": 0.4696768135221848, "grad_norm": 0.0, "learning_rate": 1.1462819359613862e-05, "loss": 0.8667, "step": 12004 }, { "epoch": 0.46971594021441426, "grad_norm": 0.0, "learning_rate": 1.1461565748438715e-05, "loss": 1.0737, "step": 12005 }, { "epoch": 0.4697550669066437, "grad_norm": 0.0, "learning_rate": 1.1460312113792651e-05, "loss": 1.1503, "step": 12006 }, { "epoch": 0.46979419359887314, "grad_norm": 0.0, "learning_rate": 1.1459058455695804e-05, "loss": 1.015, "step": 12007 }, { "epoch": 0.4698333202911026, "grad_norm": 0.0, "learning_rate": 1.1457804774168308e-05, "loss": 1.0185, "step": 12008 }, { "epoch": 0.469872446983332, "grad_norm": 0.0, "learning_rate": 1.1456551069230296e-05, "loss": 1.1205, "step": 12009 }, { "epoch": 0.46991157367556147, "grad_norm": 0.0, "learning_rate": 1.1455297340901895e-05, "loss": 0.9765, "step": 12010 }, { "epoch": 0.4699507003677909, "grad_norm": 0.0, "learning_rate": 1.1454043589203243e-05, "loss": 1.1632, "step": 12011 }, { "epoch": 0.46998982706002035, "grad_norm": 0.0, "learning_rate": 1.1452789814154475e-05, "loss": 1.0315, "step": 12012 }, { "epoch": 0.4700289537522498, "grad_norm": 0.0, "learning_rate": 1.1451536015775722e-05, "loss": 1.0966, "step": 12013 }, { "epoch": 0.47006808044447923, "grad_norm": 0.0, "learning_rate": 1.1450282194087119e-05, "loss": 1.153, "step": 12014 }, { "epoch": 0.47010720713670867, "grad_norm": 0.0, "learning_rate": 1.1449028349108802e-05, "loss": 0.987, "step": 12015 }, { "epoch": 0.4701463338289381, "grad_norm": 0.0, "learning_rate": 1.1447774480860905e-05, "loss": 1.166, "step": 12016 }, { "epoch": 0.47018546052116755, "grad_norm": 0.0, "learning_rate": 1.1446520589363566e-05, "loss": 0.9974, "step": 12017 }, { "epoch": 0.470224587213397, "grad_norm": 0.0, "learning_rate": 1.1445266674636915e-05, "loss": 1.0164, "step": 12018 }, { "epoch": 0.47026371390562643, "grad_norm": 0.0, "learning_rate": 1.1444012736701096e-05, "loss": 1.1218, "step": 12019 }, { "epoch": 0.4703028405978559, "grad_norm": 0.0, "learning_rate": 1.1442758775576237e-05, "loss": 1.1526, "step": 12020 }, { "epoch": 0.4703419672900853, "grad_norm": 0.0, "learning_rate": 1.1441504791282485e-05, "loss": 1.1278, "step": 12021 }, { "epoch": 0.47038109398231476, "grad_norm": 0.0, "learning_rate": 1.1440250783839967e-05, "loss": 0.873, "step": 12022 }, { "epoch": 0.4704202206745442, "grad_norm": 0.0, "learning_rate": 1.1438996753268832e-05, "loss": 1.102, "step": 12023 }, { "epoch": 0.47045934736677364, "grad_norm": 0.0, "learning_rate": 1.143774269958921e-05, "loss": 1.0759, "step": 12024 }, { "epoch": 0.470498474059003, "grad_norm": 0.0, "learning_rate": 1.1436488622821243e-05, "loss": 1.1271, "step": 12025 }, { "epoch": 0.47053760075123247, "grad_norm": 0.0, "learning_rate": 1.1435234522985065e-05, "loss": 1.1082, "step": 12026 }, { "epoch": 0.4705767274434619, "grad_norm": 0.0, "learning_rate": 1.1433980400100822e-05, "loss": 1.0537, "step": 12027 }, { "epoch": 0.47061585413569135, "grad_norm": 0.0, "learning_rate": 1.143272625418865e-05, "loss": 1.082, "step": 12028 }, { "epoch": 0.4706549808279208, "grad_norm": 0.0, "learning_rate": 1.1431472085268688e-05, "loss": 0.9001, "step": 12029 }, { "epoch": 0.47069410752015023, "grad_norm": 0.0, "learning_rate": 1.1430217893361082e-05, "loss": 1.0369, "step": 12030 }, { "epoch": 0.47073323421237967, "grad_norm": 0.0, "learning_rate": 1.1428963678485965e-05, "loss": 1.0162, "step": 12031 }, { "epoch": 0.4707723609046091, "grad_norm": 0.0, "learning_rate": 1.1427709440663484e-05, "loss": 1.0898, "step": 12032 }, { "epoch": 0.47081148759683855, "grad_norm": 0.0, "learning_rate": 1.1426455179913777e-05, "loss": 0.9701, "step": 12033 }, { "epoch": 0.470850614289068, "grad_norm": 0.0, "learning_rate": 1.142520089625699e-05, "loss": 1.1682, "step": 12034 }, { "epoch": 0.47088974098129743, "grad_norm": 0.0, "learning_rate": 1.1423946589713259e-05, "loss": 1.0872, "step": 12035 }, { "epoch": 0.4709288676735269, "grad_norm": 0.0, "learning_rate": 1.1422692260302733e-05, "loss": 1.0182, "step": 12036 }, { "epoch": 0.4709679943657563, "grad_norm": 0.0, "learning_rate": 1.1421437908045552e-05, "loss": 1.1517, "step": 12037 }, { "epoch": 0.47100712105798576, "grad_norm": 0.0, "learning_rate": 1.1420183532961855e-05, "loss": 0.9595, "step": 12038 }, { "epoch": 0.4710462477502152, "grad_norm": 0.0, "learning_rate": 1.1418929135071795e-05, "loss": 0.9382, "step": 12039 }, { "epoch": 0.47108537444244464, "grad_norm": 0.0, "learning_rate": 1.1417674714395509e-05, "loss": 1.0349, "step": 12040 }, { "epoch": 0.4711245011346741, "grad_norm": 0.0, "learning_rate": 1.1416420270953144e-05, "loss": 1.1581, "step": 12041 }, { "epoch": 0.4711636278269035, "grad_norm": 0.0, "learning_rate": 1.1415165804764842e-05, "loss": 1.1264, "step": 12042 }, { "epoch": 0.47120275451913296, "grad_norm": 0.0, "learning_rate": 1.1413911315850754e-05, "loss": 1.093, "step": 12043 }, { "epoch": 0.4712418812113624, "grad_norm": 0.0, "learning_rate": 1.1412656804231019e-05, "loss": 1.069, "step": 12044 }, { "epoch": 0.47128100790359184, "grad_norm": 0.0, "learning_rate": 1.141140226992579e-05, "loss": 1.0798, "step": 12045 }, { "epoch": 0.4713201345958213, "grad_norm": 0.0, "learning_rate": 1.1410147712955206e-05, "loss": 0.9701, "step": 12046 }, { "epoch": 0.4713592612880507, "grad_norm": 0.0, "learning_rate": 1.1408893133339416e-05, "loss": 1.0009, "step": 12047 }, { "epoch": 0.47139838798028016, "grad_norm": 0.0, "learning_rate": 1.140763853109857e-05, "loss": 1.0406, "step": 12048 }, { "epoch": 0.4714375146725096, "grad_norm": 0.0, "learning_rate": 1.1406383906252812e-05, "loss": 1.046, "step": 12049 }, { "epoch": 0.47147664136473905, "grad_norm": 0.0, "learning_rate": 1.1405129258822292e-05, "loss": 0.9676, "step": 12050 }, { "epoch": 0.4715157680569685, "grad_norm": 0.0, "learning_rate": 1.1403874588827156e-05, "loss": 1.0452, "step": 12051 }, { "epoch": 0.47155489474919793, "grad_norm": 0.0, "learning_rate": 1.1402619896287553e-05, "loss": 1.1876, "step": 12052 }, { "epoch": 0.47159402144142737, "grad_norm": 0.0, "learning_rate": 1.140136518122363e-05, "loss": 1.0607, "step": 12053 }, { "epoch": 0.47163314813365675, "grad_norm": 0.0, "learning_rate": 1.1400110443655541e-05, "loss": 1.1602, "step": 12054 }, { "epoch": 0.4716722748258862, "grad_norm": 0.0, "learning_rate": 1.1398855683603431e-05, "loss": 0.9715, "step": 12055 }, { "epoch": 0.47171140151811564, "grad_norm": 0.0, "learning_rate": 1.1397600901087455e-05, "loss": 1.0544, "step": 12056 }, { "epoch": 0.4717505282103451, "grad_norm": 0.0, "learning_rate": 1.1396346096127755e-05, "loss": 1.0035, "step": 12057 }, { "epoch": 0.4717896549025745, "grad_norm": 0.0, "learning_rate": 1.1395091268744492e-05, "loss": 1.1156, "step": 12058 }, { "epoch": 0.47182878159480396, "grad_norm": 0.0, "learning_rate": 1.1393836418957806e-05, "loss": 1.0136, "step": 12059 }, { "epoch": 0.4718679082870334, "grad_norm": 0.0, "learning_rate": 1.1392581546787855e-05, "loss": 1.0626, "step": 12060 }, { "epoch": 0.47190703497926284, "grad_norm": 0.0, "learning_rate": 1.1391326652254792e-05, "loss": 0.9537, "step": 12061 }, { "epoch": 0.4719461616714923, "grad_norm": 0.0, "learning_rate": 1.1390071735378762e-05, "loss": 1.0349, "step": 12062 }, { "epoch": 0.4719852883637217, "grad_norm": 0.0, "learning_rate": 1.1388816796179927e-05, "loss": 1.1387, "step": 12063 }, { "epoch": 0.47202441505595116, "grad_norm": 0.0, "learning_rate": 1.138756183467843e-05, "loss": 1.1138, "step": 12064 }, { "epoch": 0.4720635417481806, "grad_norm": 0.0, "learning_rate": 1.1386306850894431e-05, "loss": 1.0979, "step": 12065 }, { "epoch": 0.47210266844041004, "grad_norm": 0.0, "learning_rate": 1.1385051844848083e-05, "loss": 1.0717, "step": 12066 }, { "epoch": 0.4721417951326395, "grad_norm": 0.0, "learning_rate": 1.1383796816559534e-05, "loss": 0.9366, "step": 12067 }, { "epoch": 0.4721809218248689, "grad_norm": 0.0, "learning_rate": 1.1382541766048944e-05, "loss": 1.0554, "step": 12068 }, { "epoch": 0.47222004851709837, "grad_norm": 0.0, "learning_rate": 1.1381286693336465e-05, "loss": 1.0383, "step": 12069 }, { "epoch": 0.4722591752093278, "grad_norm": 0.0, "learning_rate": 1.1380031598442254e-05, "loss": 0.9103, "step": 12070 }, { "epoch": 0.47229830190155725, "grad_norm": 0.0, "learning_rate": 1.1378776481386462e-05, "loss": 1.165, "step": 12071 }, { "epoch": 0.4723374285937867, "grad_norm": 0.0, "learning_rate": 1.137752134218925e-05, "loss": 1.1365, "step": 12072 }, { "epoch": 0.47237655528601613, "grad_norm": 0.0, "learning_rate": 1.1376266180870769e-05, "loss": 1.1129, "step": 12073 }, { "epoch": 0.47241568197824557, "grad_norm": 0.0, "learning_rate": 1.1375010997451182e-05, "loss": 0.9449, "step": 12074 }, { "epoch": 0.472454808670475, "grad_norm": 0.0, "learning_rate": 1.1373755791950638e-05, "loss": 1.0768, "step": 12075 }, { "epoch": 0.47249393536270445, "grad_norm": 0.0, "learning_rate": 1.1372500564389298e-05, "loss": 0.8929, "step": 12076 }, { "epoch": 0.4725330620549339, "grad_norm": 0.0, "learning_rate": 1.1371245314787318e-05, "loss": 1.0992, "step": 12077 }, { "epoch": 0.47257218874716334, "grad_norm": 0.0, "learning_rate": 1.1369990043164855e-05, "loss": 0.999, "step": 12078 }, { "epoch": 0.4726113154393928, "grad_norm": 0.0, "learning_rate": 1.1368734749542072e-05, "loss": 1.1586, "step": 12079 }, { "epoch": 0.4726504421316222, "grad_norm": 0.0, "learning_rate": 1.1367479433939124e-05, "loss": 1.053, "step": 12080 }, { "epoch": 0.47268956882385166, "grad_norm": 0.0, "learning_rate": 1.1366224096376168e-05, "loss": 1.1448, "step": 12081 }, { "epoch": 0.47272869551608104, "grad_norm": 0.0, "learning_rate": 1.1364968736873366e-05, "loss": 1.0094, "step": 12082 }, { "epoch": 0.4727678222083105, "grad_norm": 0.0, "learning_rate": 1.1363713355450876e-05, "loss": 1.0153, "step": 12083 }, { "epoch": 0.4728069489005399, "grad_norm": 0.0, "learning_rate": 1.136245795212886e-05, "loss": 1.1046, "step": 12084 }, { "epoch": 0.47284607559276937, "grad_norm": 0.0, "learning_rate": 1.1361202526927473e-05, "loss": 1.0616, "step": 12085 }, { "epoch": 0.4728852022849988, "grad_norm": 0.0, "learning_rate": 1.1359947079866882e-05, "loss": 0.9836, "step": 12086 }, { "epoch": 0.47292432897722825, "grad_norm": 0.0, "learning_rate": 1.1358691610967242e-05, "loss": 1.1752, "step": 12087 }, { "epoch": 0.4729634556694577, "grad_norm": 0.0, "learning_rate": 1.1357436120248722e-05, "loss": 0.9716, "step": 12088 }, { "epoch": 0.47300258236168713, "grad_norm": 0.0, "learning_rate": 1.1356180607731476e-05, "loss": 1.0239, "step": 12089 }, { "epoch": 0.47304170905391657, "grad_norm": 0.0, "learning_rate": 1.135492507343567e-05, "loss": 1.0633, "step": 12090 }, { "epoch": 0.473080835746146, "grad_norm": 0.0, "learning_rate": 1.1353669517381463e-05, "loss": 1.0361, "step": 12091 }, { "epoch": 0.47311996243837545, "grad_norm": 0.0, "learning_rate": 1.1352413939589022e-05, "loss": 1.1422, "step": 12092 }, { "epoch": 0.4731590891306049, "grad_norm": 0.0, "learning_rate": 1.1351158340078505e-05, "loss": 1.0468, "step": 12093 }, { "epoch": 0.47319821582283433, "grad_norm": 0.0, "learning_rate": 1.1349902718870081e-05, "loss": 1.1537, "step": 12094 }, { "epoch": 0.4732373425150638, "grad_norm": 0.0, "learning_rate": 1.1348647075983909e-05, "loss": 1.0742, "step": 12095 }, { "epoch": 0.4732764692072932, "grad_norm": 0.0, "learning_rate": 1.1347391411440157e-05, "loss": 0.9488, "step": 12096 }, { "epoch": 0.47331559589952266, "grad_norm": 0.0, "learning_rate": 1.1346135725258987e-05, "loss": 1.064, "step": 12097 }, { "epoch": 0.4733547225917521, "grad_norm": 0.0, "learning_rate": 1.1344880017460565e-05, "loss": 1.0555, "step": 12098 }, { "epoch": 0.47339384928398154, "grad_norm": 0.0, "learning_rate": 1.134362428806505e-05, "loss": 1.0256, "step": 12099 }, { "epoch": 0.473432975976211, "grad_norm": 0.0, "learning_rate": 1.1342368537092617e-05, "loss": 1.1142, "step": 12100 }, { "epoch": 0.4734721026684404, "grad_norm": 0.0, "learning_rate": 1.1341112764563428e-05, "loss": 0.9308, "step": 12101 }, { "epoch": 0.47351122936066986, "grad_norm": 0.0, "learning_rate": 1.1339856970497645e-05, "loss": 1.2094, "step": 12102 }, { "epoch": 0.4735503560528993, "grad_norm": 0.0, "learning_rate": 1.1338601154915441e-05, "loss": 0.9163, "step": 12103 }, { "epoch": 0.47358948274512874, "grad_norm": 0.0, "learning_rate": 1.133734531783698e-05, "loss": 1.0915, "step": 12104 }, { "epoch": 0.4736286094373582, "grad_norm": 0.0, "learning_rate": 1.1336089459282426e-05, "loss": 1.0325, "step": 12105 }, { "epoch": 0.4736677361295876, "grad_norm": 0.0, "learning_rate": 1.133483357927195e-05, "loss": 0.9743, "step": 12106 }, { "epoch": 0.47370686282181707, "grad_norm": 0.0, "learning_rate": 1.133357767782572e-05, "loss": 0.9971, "step": 12107 }, { "epoch": 0.4737459895140465, "grad_norm": 0.0, "learning_rate": 1.1332321754963907e-05, "loss": 0.975, "step": 12108 }, { "epoch": 0.47378511620627595, "grad_norm": 0.0, "learning_rate": 1.133106581070667e-05, "loss": 0.9757, "step": 12109 }, { "epoch": 0.4738242428985054, "grad_norm": 0.0, "learning_rate": 1.132980984507419e-05, "loss": 1.0609, "step": 12110 }, { "epoch": 0.4738633695907348, "grad_norm": 0.0, "learning_rate": 1.1328553858086624e-05, "loss": 1.1141, "step": 12111 }, { "epoch": 0.4739024962829642, "grad_norm": 0.0, "learning_rate": 1.1327297849764153e-05, "loss": 1.011, "step": 12112 }, { "epoch": 0.47394162297519365, "grad_norm": 0.0, "learning_rate": 1.1326041820126939e-05, "loss": 1.1588, "step": 12113 }, { "epoch": 0.4739807496674231, "grad_norm": 0.0, "learning_rate": 1.1324785769195157e-05, "loss": 1.0674, "step": 12114 }, { "epoch": 0.47401987635965254, "grad_norm": 0.0, "learning_rate": 1.1323529696988976e-05, "loss": 1.0148, "step": 12115 }, { "epoch": 0.474059003051882, "grad_norm": 0.0, "learning_rate": 1.1322273603528562e-05, "loss": 1.0102, "step": 12116 }, { "epoch": 0.4740981297441114, "grad_norm": 0.0, "learning_rate": 1.1321017488834097e-05, "loss": 1.0583, "step": 12117 }, { "epoch": 0.47413725643634086, "grad_norm": 0.0, "learning_rate": 1.131976135292574e-05, "loss": 1.1145, "step": 12118 }, { "epoch": 0.4741763831285703, "grad_norm": 0.0, "learning_rate": 1.1318505195823674e-05, "loss": 1.0579, "step": 12119 }, { "epoch": 0.47421550982079974, "grad_norm": 0.0, "learning_rate": 1.1317249017548064e-05, "loss": 1.0096, "step": 12120 }, { "epoch": 0.4742546365130292, "grad_norm": 0.0, "learning_rate": 1.1315992818119087e-05, "loss": 1.0582, "step": 12121 }, { "epoch": 0.4742937632052586, "grad_norm": 0.0, "learning_rate": 1.1314736597556915e-05, "loss": 1.0293, "step": 12122 }, { "epoch": 0.47433288989748806, "grad_norm": 0.0, "learning_rate": 1.131348035588172e-05, "loss": 1.0455, "step": 12123 }, { "epoch": 0.4743720165897175, "grad_norm": 0.0, "learning_rate": 1.1312224093113677e-05, "loss": 1.1715, "step": 12124 }, { "epoch": 0.47441114328194695, "grad_norm": 0.0, "learning_rate": 1.1310967809272957e-05, "loss": 0.9001, "step": 12125 }, { "epoch": 0.4744502699741764, "grad_norm": 0.0, "learning_rate": 1.1309711504379739e-05, "loss": 1.0969, "step": 12126 }, { "epoch": 0.4744893966664058, "grad_norm": 0.0, "learning_rate": 1.1308455178454194e-05, "loss": 1.0397, "step": 12127 }, { "epoch": 0.47452852335863527, "grad_norm": 0.0, "learning_rate": 1.1307198831516502e-05, "loss": 0.9707, "step": 12128 }, { "epoch": 0.4745676500508647, "grad_norm": 0.0, "learning_rate": 1.130594246358683e-05, "loss": 1.0457, "step": 12129 }, { "epoch": 0.47460677674309415, "grad_norm": 0.0, "learning_rate": 1.130468607468536e-05, "loss": 0.9303, "step": 12130 }, { "epoch": 0.4746459034353236, "grad_norm": 0.0, "learning_rate": 1.1303429664832266e-05, "loss": 1.0688, "step": 12131 }, { "epoch": 0.47468503012755303, "grad_norm": 0.0, "learning_rate": 1.1302173234047727e-05, "loss": 1.0002, "step": 12132 }, { "epoch": 0.4747241568197825, "grad_norm": 0.0, "learning_rate": 1.1300916782351918e-05, "loss": 1.0148, "step": 12133 }, { "epoch": 0.4747632835120119, "grad_norm": 0.0, "learning_rate": 1.1299660309765013e-05, "loss": 1.082, "step": 12134 }, { "epoch": 0.47480241020424135, "grad_norm": 0.0, "learning_rate": 1.1298403816307193e-05, "loss": 0.8997, "step": 12135 }, { "epoch": 0.4748415368964708, "grad_norm": 0.0, "learning_rate": 1.1297147301998633e-05, "loss": 1.058, "step": 12136 }, { "epoch": 0.47488066358870024, "grad_norm": 0.0, "learning_rate": 1.1295890766859516e-05, "loss": 1.0779, "step": 12137 }, { "epoch": 0.4749197902809297, "grad_norm": 0.0, "learning_rate": 1.1294634210910015e-05, "loss": 1.1821, "step": 12138 }, { "epoch": 0.47495891697315906, "grad_norm": 0.0, "learning_rate": 1.1293377634170311e-05, "loss": 1.1506, "step": 12139 }, { "epoch": 0.4749980436653885, "grad_norm": 0.0, "learning_rate": 1.1292121036660584e-05, "loss": 1.2045, "step": 12140 }, { "epoch": 0.47503717035761794, "grad_norm": 0.0, "learning_rate": 1.129086441840101e-05, "loss": 1.0454, "step": 12141 }, { "epoch": 0.4750762970498474, "grad_norm": 0.0, "learning_rate": 1.1289607779411775e-05, "loss": 1.0414, "step": 12142 }, { "epoch": 0.4751154237420768, "grad_norm": 0.0, "learning_rate": 1.1288351119713053e-05, "loss": 1.043, "step": 12143 }, { "epoch": 0.47515455043430627, "grad_norm": 0.0, "learning_rate": 1.1287094439325027e-05, "loss": 0.9676, "step": 12144 }, { "epoch": 0.4751936771265357, "grad_norm": 0.0, "learning_rate": 1.1285837738267875e-05, "loss": 1.0781, "step": 12145 }, { "epoch": 0.47523280381876515, "grad_norm": 0.0, "learning_rate": 1.1284581016561781e-05, "loss": 1.1553, "step": 12146 }, { "epoch": 0.4752719305109946, "grad_norm": 0.0, "learning_rate": 1.1283324274226925e-05, "loss": 0.9576, "step": 12147 }, { "epoch": 0.47531105720322403, "grad_norm": 0.0, "learning_rate": 1.1282067511283491e-05, "loss": 1.0867, "step": 12148 }, { "epoch": 0.47535018389545347, "grad_norm": 0.0, "learning_rate": 1.1280810727751658e-05, "loss": 1.1192, "step": 12149 }, { "epoch": 0.4753893105876829, "grad_norm": 0.0, "learning_rate": 1.1279553923651612e-05, "loss": 1.0956, "step": 12150 }, { "epoch": 0.47542843727991235, "grad_norm": 0.0, "learning_rate": 1.1278297099003529e-05, "loss": 1.0156, "step": 12151 }, { "epoch": 0.4754675639721418, "grad_norm": 0.0, "learning_rate": 1.12770402538276e-05, "loss": 1.1134, "step": 12152 }, { "epoch": 0.47550669066437123, "grad_norm": 0.0, "learning_rate": 1.1275783388144003e-05, "loss": 1.0568, "step": 12153 }, { "epoch": 0.4755458173566007, "grad_norm": 0.0, "learning_rate": 1.1274526501972924e-05, "loss": 1.0957, "step": 12154 }, { "epoch": 0.4755849440488301, "grad_norm": 0.0, "learning_rate": 1.1273269595334547e-05, "loss": 0.9365, "step": 12155 }, { "epoch": 0.47562407074105956, "grad_norm": 0.0, "learning_rate": 1.1272012668249055e-05, "loss": 1.1476, "step": 12156 }, { "epoch": 0.475663197433289, "grad_norm": 0.0, "learning_rate": 1.1270755720736632e-05, "loss": 1.0664, "step": 12157 }, { "epoch": 0.47570232412551844, "grad_norm": 0.0, "learning_rate": 1.1269498752817466e-05, "loss": 0.9466, "step": 12158 }, { "epoch": 0.4757414508177479, "grad_norm": 0.0, "learning_rate": 1.1268241764511742e-05, "loss": 1.0349, "step": 12159 }, { "epoch": 0.4757805775099773, "grad_norm": 0.0, "learning_rate": 1.1266984755839642e-05, "loss": 1.0578, "step": 12160 }, { "epoch": 0.47581970420220676, "grad_norm": 0.0, "learning_rate": 1.1265727726821356e-05, "loss": 0.939, "step": 12161 }, { "epoch": 0.4758588308944362, "grad_norm": 0.0, "learning_rate": 1.126447067747707e-05, "loss": 1.0845, "step": 12162 }, { "epoch": 0.47589795758666564, "grad_norm": 0.0, "learning_rate": 1.1263213607826968e-05, "loss": 0.9935, "step": 12163 }, { "epoch": 0.4759370842788951, "grad_norm": 0.0, "learning_rate": 1.126195651789124e-05, "loss": 0.993, "step": 12164 }, { "epoch": 0.4759762109711245, "grad_norm": 0.0, "learning_rate": 1.1260699407690067e-05, "loss": 0.8794, "step": 12165 }, { "epoch": 0.47601533766335397, "grad_norm": 0.0, "learning_rate": 1.1259442277243646e-05, "loss": 1.1574, "step": 12166 }, { "epoch": 0.4760544643555834, "grad_norm": 0.0, "learning_rate": 1.1258185126572156e-05, "loss": 1.1661, "step": 12167 }, { "epoch": 0.4760935910478128, "grad_norm": 0.0, "learning_rate": 1.1256927955695793e-05, "loss": 1.2537, "step": 12168 }, { "epoch": 0.47613271774004223, "grad_norm": 0.0, "learning_rate": 1.125567076463474e-05, "loss": 0.8852, "step": 12169 }, { "epoch": 0.4761718444322717, "grad_norm": 0.0, "learning_rate": 1.1254413553409189e-05, "loss": 1.0445, "step": 12170 }, { "epoch": 0.4762109711245011, "grad_norm": 0.0, "learning_rate": 1.1253156322039328e-05, "loss": 1.1283, "step": 12171 }, { "epoch": 0.47625009781673056, "grad_norm": 0.0, "learning_rate": 1.125189907054535e-05, "loss": 1.0106, "step": 12172 }, { "epoch": 0.47628922450896, "grad_norm": 0.0, "learning_rate": 1.1250641798947437e-05, "loss": 1.1216, "step": 12173 }, { "epoch": 0.47632835120118944, "grad_norm": 0.0, "learning_rate": 1.1249384507265783e-05, "loss": 0.9953, "step": 12174 }, { "epoch": 0.4763674778934189, "grad_norm": 0.0, "learning_rate": 1.1248127195520583e-05, "loss": 1.0222, "step": 12175 }, { "epoch": 0.4764066045856483, "grad_norm": 0.0, "learning_rate": 1.1246869863732023e-05, "loss": 0.8782, "step": 12176 }, { "epoch": 0.47644573127787776, "grad_norm": 0.0, "learning_rate": 1.1245612511920297e-05, "loss": 1.0781, "step": 12177 }, { "epoch": 0.4764848579701072, "grad_norm": 0.0, "learning_rate": 1.1244355140105591e-05, "loss": 0.9978, "step": 12178 }, { "epoch": 0.47652398466233664, "grad_norm": 0.0, "learning_rate": 1.1243097748308106e-05, "loss": 1.0622, "step": 12179 }, { "epoch": 0.4765631113545661, "grad_norm": 0.0, "learning_rate": 1.1241840336548022e-05, "loss": 1.1345, "step": 12180 }, { "epoch": 0.4766022380467955, "grad_norm": 0.0, "learning_rate": 1.1240582904845542e-05, "loss": 1.0077, "step": 12181 }, { "epoch": 0.47664136473902496, "grad_norm": 0.0, "learning_rate": 1.1239325453220858e-05, "loss": 1.0472, "step": 12182 }, { "epoch": 0.4766804914312544, "grad_norm": 0.0, "learning_rate": 1.1238067981694157e-05, "loss": 1.0973, "step": 12183 }, { "epoch": 0.47671961812348385, "grad_norm": 0.0, "learning_rate": 1.1236810490285638e-05, "loss": 1.0756, "step": 12184 }, { "epoch": 0.4767587448157133, "grad_norm": 0.0, "learning_rate": 1.123555297901549e-05, "loss": 0.991, "step": 12185 }, { "epoch": 0.4767978715079427, "grad_norm": 0.0, "learning_rate": 1.1234295447903909e-05, "loss": 1.1052, "step": 12186 }, { "epoch": 0.47683699820017217, "grad_norm": 0.0, "learning_rate": 1.1233037896971091e-05, "loss": 1.0473, "step": 12187 }, { "epoch": 0.4768761248924016, "grad_norm": 0.0, "learning_rate": 1.1231780326237227e-05, "loss": 1.0986, "step": 12188 }, { "epoch": 0.47691525158463105, "grad_norm": 0.0, "learning_rate": 1.1230522735722519e-05, "loss": 1.0674, "step": 12189 }, { "epoch": 0.4769543782768605, "grad_norm": 0.0, "learning_rate": 1.1229265125447155e-05, "loss": 1.0447, "step": 12190 }, { "epoch": 0.47699350496908993, "grad_norm": 0.0, "learning_rate": 1.1228007495431332e-05, "loss": 1.1258, "step": 12191 }, { "epoch": 0.4770326316613194, "grad_norm": 0.0, "learning_rate": 1.1226749845695251e-05, "loss": 1.0289, "step": 12192 }, { "epoch": 0.4770717583535488, "grad_norm": 0.0, "learning_rate": 1.1225492176259102e-05, "loss": 0.9713, "step": 12193 }, { "epoch": 0.47711088504577825, "grad_norm": 0.0, "learning_rate": 1.1224234487143085e-05, "loss": 1.0515, "step": 12194 }, { "epoch": 0.4771500117380077, "grad_norm": 0.0, "learning_rate": 1.1222976778367397e-05, "loss": 1.057, "step": 12195 }, { "epoch": 0.4771891384302371, "grad_norm": 0.0, "learning_rate": 1.1221719049952232e-05, "loss": 1.0494, "step": 12196 }, { "epoch": 0.4772282651224665, "grad_norm": 0.0, "learning_rate": 1.1220461301917793e-05, "loss": 0.9911, "step": 12197 }, { "epoch": 0.47726739181469596, "grad_norm": 0.0, "learning_rate": 1.121920353428427e-05, "loss": 1.071, "step": 12198 }, { "epoch": 0.4773065185069254, "grad_norm": 0.0, "learning_rate": 1.1217945747071871e-05, "loss": 1.023, "step": 12199 }, { "epoch": 0.47734564519915484, "grad_norm": 0.0, "learning_rate": 1.1216687940300789e-05, "loss": 0.9846, "step": 12200 }, { "epoch": 0.4773847718913843, "grad_norm": 0.0, "learning_rate": 1.1215430113991222e-05, "loss": 0.898, "step": 12201 }, { "epoch": 0.4774238985836137, "grad_norm": 0.0, "learning_rate": 1.121417226816337e-05, "loss": 1.1094, "step": 12202 }, { "epoch": 0.47746302527584317, "grad_norm": 0.0, "learning_rate": 1.1212914402837434e-05, "loss": 1.0587, "step": 12203 }, { "epoch": 0.4775021519680726, "grad_norm": 0.0, "learning_rate": 1.1211656518033612e-05, "loss": 0.8909, "step": 12204 }, { "epoch": 0.47754127866030205, "grad_norm": 0.0, "learning_rate": 1.1210398613772105e-05, "loss": 1.2284, "step": 12205 }, { "epoch": 0.4775804053525315, "grad_norm": 0.0, "learning_rate": 1.1209140690073112e-05, "loss": 1.0344, "step": 12206 }, { "epoch": 0.47761953204476093, "grad_norm": 0.0, "learning_rate": 1.1207882746956834e-05, "loss": 1.1573, "step": 12207 }, { "epoch": 0.47765865873699037, "grad_norm": 0.0, "learning_rate": 1.1206624784443473e-05, "loss": 0.9438, "step": 12208 }, { "epoch": 0.4776977854292198, "grad_norm": 0.0, "learning_rate": 1.1205366802553231e-05, "loss": 1.0057, "step": 12209 }, { "epoch": 0.47773691212144925, "grad_norm": 0.0, "learning_rate": 1.1204108801306308e-05, "loss": 0.8779, "step": 12210 }, { "epoch": 0.4777760388136787, "grad_norm": 0.0, "learning_rate": 1.120285078072291e-05, "loss": 1.0717, "step": 12211 }, { "epoch": 0.47781516550590813, "grad_norm": 0.0, "learning_rate": 1.1201592740823231e-05, "loss": 1.0237, "step": 12212 }, { "epoch": 0.4778542921981376, "grad_norm": 0.0, "learning_rate": 1.120033468162748e-05, "loss": 1.0033, "step": 12213 }, { "epoch": 0.477893418890367, "grad_norm": 0.0, "learning_rate": 1.1199076603155857e-05, "loss": 1.0391, "step": 12214 }, { "epoch": 0.47793254558259646, "grad_norm": 0.0, "learning_rate": 1.1197818505428568e-05, "loss": 1.1593, "step": 12215 }, { "epoch": 0.4779716722748259, "grad_norm": 0.0, "learning_rate": 1.1196560388465811e-05, "loss": 1.0455, "step": 12216 }, { "epoch": 0.47801079896705534, "grad_norm": 0.0, "learning_rate": 1.11953022522878e-05, "loss": 0.9842, "step": 12217 }, { "epoch": 0.4780499256592848, "grad_norm": 0.0, "learning_rate": 1.1194044096914727e-05, "loss": 1.1119, "step": 12218 }, { "epoch": 0.4780890523515142, "grad_norm": 0.0, "learning_rate": 1.1192785922366805e-05, "loss": 1.0824, "step": 12219 }, { "epoch": 0.47812817904374366, "grad_norm": 0.0, "learning_rate": 1.1191527728664235e-05, "loss": 1.1323, "step": 12220 }, { "epoch": 0.4781673057359731, "grad_norm": 0.0, "learning_rate": 1.119026951582722e-05, "loss": 0.9396, "step": 12221 }, { "epoch": 0.47820643242820254, "grad_norm": 0.0, "learning_rate": 1.1189011283875973e-05, "loss": 1.0708, "step": 12222 }, { "epoch": 0.478245559120432, "grad_norm": 0.0, "learning_rate": 1.118775303283069e-05, "loss": 0.9991, "step": 12223 }, { "epoch": 0.4782846858126614, "grad_norm": 0.0, "learning_rate": 1.1186494762711585e-05, "loss": 1.1643, "step": 12224 }, { "epoch": 0.4783238125048908, "grad_norm": 0.0, "learning_rate": 1.1185236473538861e-05, "loss": 1.0976, "step": 12225 }, { "epoch": 0.47836293919712025, "grad_norm": 0.0, "learning_rate": 1.1183978165332723e-05, "loss": 0.9603, "step": 12226 }, { "epoch": 0.4784020658893497, "grad_norm": 0.0, "learning_rate": 1.1182719838113378e-05, "loss": 0.8558, "step": 12227 }, { "epoch": 0.47844119258157913, "grad_norm": 0.0, "learning_rate": 1.1181461491901038e-05, "loss": 0.9347, "step": 12228 }, { "epoch": 0.4784803192738086, "grad_norm": 0.0, "learning_rate": 1.1180203126715905e-05, "loss": 1.1669, "step": 12229 }, { "epoch": 0.478519445966038, "grad_norm": 0.0, "learning_rate": 1.1178944742578189e-05, "loss": 1.0831, "step": 12230 }, { "epoch": 0.47855857265826746, "grad_norm": 0.0, "learning_rate": 1.1177686339508096e-05, "loss": 1.0791, "step": 12231 }, { "epoch": 0.4785976993504969, "grad_norm": 0.0, "learning_rate": 1.1176427917525839e-05, "loss": 1.0539, "step": 12232 }, { "epoch": 0.47863682604272634, "grad_norm": 0.0, "learning_rate": 1.1175169476651622e-05, "loss": 0.943, "step": 12233 }, { "epoch": 0.4786759527349558, "grad_norm": 0.0, "learning_rate": 1.1173911016905656e-05, "loss": 0.8899, "step": 12234 }, { "epoch": 0.4787150794271852, "grad_norm": 0.0, "learning_rate": 1.1172652538308152e-05, "loss": 1.0531, "step": 12235 }, { "epoch": 0.47875420611941466, "grad_norm": 0.0, "learning_rate": 1.1171394040879312e-05, "loss": 0.9378, "step": 12236 }, { "epoch": 0.4787933328116441, "grad_norm": 0.0, "learning_rate": 1.1170135524639355e-05, "loss": 1.0353, "step": 12237 }, { "epoch": 0.47883245950387354, "grad_norm": 0.0, "learning_rate": 1.1168876989608487e-05, "loss": 1.1834, "step": 12238 }, { "epoch": 0.478871586196103, "grad_norm": 0.0, "learning_rate": 1.1167618435806924e-05, "loss": 1.0555, "step": 12239 }, { "epoch": 0.4789107128883324, "grad_norm": 0.0, "learning_rate": 1.1166359863254868e-05, "loss": 1.1141, "step": 12240 }, { "epoch": 0.47894983958056186, "grad_norm": 0.0, "learning_rate": 1.1165101271972534e-05, "loss": 1.0858, "step": 12241 }, { "epoch": 0.4789889662727913, "grad_norm": 0.0, "learning_rate": 1.1163842661980133e-05, "loss": 1.0499, "step": 12242 }, { "epoch": 0.47902809296502075, "grad_norm": 0.0, "learning_rate": 1.1162584033297878e-05, "loss": 1.0059, "step": 12243 }, { "epoch": 0.4790672196572502, "grad_norm": 0.0, "learning_rate": 1.1161325385945981e-05, "loss": 1.0627, "step": 12244 }, { "epoch": 0.47910634634947963, "grad_norm": 0.0, "learning_rate": 1.1160066719944651e-05, "loss": 0.9973, "step": 12245 }, { "epoch": 0.47914547304170907, "grad_norm": 0.0, "learning_rate": 1.1158808035314105e-05, "loss": 1.1213, "step": 12246 }, { "epoch": 0.4791845997339385, "grad_norm": 0.0, "learning_rate": 1.1157549332074553e-05, "loss": 1.0117, "step": 12247 }, { "epoch": 0.47922372642616795, "grad_norm": 0.0, "learning_rate": 1.1156290610246213e-05, "loss": 1.0231, "step": 12248 }, { "epoch": 0.4792628531183974, "grad_norm": 0.0, "learning_rate": 1.1155031869849291e-05, "loss": 1.0317, "step": 12249 }, { "epoch": 0.47930197981062683, "grad_norm": 0.0, "learning_rate": 1.1153773110904005e-05, "loss": 0.9365, "step": 12250 }, { "epoch": 0.4793411065028563, "grad_norm": 0.0, "learning_rate": 1.1152514333430571e-05, "loss": 1.0031, "step": 12251 }, { "epoch": 0.4793802331950857, "grad_norm": 0.0, "learning_rate": 1.1151255537449198e-05, "loss": 1.0782, "step": 12252 }, { "epoch": 0.4794193598873151, "grad_norm": 0.0, "learning_rate": 1.1149996722980102e-05, "loss": 1.1268, "step": 12253 }, { "epoch": 0.47945848657954454, "grad_norm": 0.0, "learning_rate": 1.1148737890043503e-05, "loss": 1.1112, "step": 12254 }, { "epoch": 0.479497613271774, "grad_norm": 0.0, "learning_rate": 1.1147479038659614e-05, "loss": 1.1471, "step": 12255 }, { "epoch": 0.4795367399640034, "grad_norm": 0.0, "learning_rate": 1.1146220168848645e-05, "loss": 1.1523, "step": 12256 }, { "epoch": 0.47957586665623286, "grad_norm": 0.0, "learning_rate": 1.1144961280630822e-05, "loss": 1.0597, "step": 12257 }, { "epoch": 0.4796149933484623, "grad_norm": 0.0, "learning_rate": 1.1143702374026351e-05, "loss": 1.0648, "step": 12258 }, { "epoch": 0.47965412004069174, "grad_norm": 0.0, "learning_rate": 1.1142443449055455e-05, "loss": 1.0917, "step": 12259 }, { "epoch": 0.4796932467329212, "grad_norm": 0.0, "learning_rate": 1.1141184505738349e-05, "loss": 0.8868, "step": 12260 }, { "epoch": 0.4797323734251506, "grad_norm": 0.0, "learning_rate": 1.1139925544095249e-05, "loss": 1.0096, "step": 12261 }, { "epoch": 0.47977150011738007, "grad_norm": 0.0, "learning_rate": 1.1138666564146375e-05, "loss": 1.1215, "step": 12262 }, { "epoch": 0.4798106268096095, "grad_norm": 0.0, "learning_rate": 1.113740756591194e-05, "loss": 1.101, "step": 12263 }, { "epoch": 0.47984975350183895, "grad_norm": 0.0, "learning_rate": 1.1136148549412166e-05, "loss": 1.1234, "step": 12264 }, { "epoch": 0.4798888801940684, "grad_norm": 0.0, "learning_rate": 1.1134889514667269e-05, "loss": 1.019, "step": 12265 }, { "epoch": 0.47992800688629783, "grad_norm": 0.0, "learning_rate": 1.113363046169747e-05, "loss": 1.0787, "step": 12266 }, { "epoch": 0.47996713357852727, "grad_norm": 0.0, "learning_rate": 1.1132371390522985e-05, "loss": 1.0706, "step": 12267 }, { "epoch": 0.4800062602707567, "grad_norm": 0.0, "learning_rate": 1.1131112301164035e-05, "loss": 1.0904, "step": 12268 }, { "epoch": 0.48004538696298615, "grad_norm": 0.0, "learning_rate": 1.112985319364084e-05, "loss": 1.0413, "step": 12269 }, { "epoch": 0.4800845136552156, "grad_norm": 0.0, "learning_rate": 1.1128594067973617e-05, "loss": 1.0192, "step": 12270 }, { "epoch": 0.48012364034744504, "grad_norm": 0.0, "learning_rate": 1.112733492418259e-05, "loss": 1.123, "step": 12271 }, { "epoch": 0.4801627670396745, "grad_norm": 0.0, "learning_rate": 1.1126075762287972e-05, "loss": 0.8829, "step": 12272 }, { "epoch": 0.4802018937319039, "grad_norm": 0.0, "learning_rate": 1.1124816582309992e-05, "loss": 1.0741, "step": 12273 }, { "epoch": 0.48024102042413336, "grad_norm": 0.0, "learning_rate": 1.1123557384268869e-05, "loss": 1.09, "step": 12274 }, { "epoch": 0.4802801471163628, "grad_norm": 0.0, "learning_rate": 1.1122298168184817e-05, "loss": 1.0677, "step": 12275 }, { "epoch": 0.48031927380859224, "grad_norm": 0.0, "learning_rate": 1.1121038934078066e-05, "loss": 1.0942, "step": 12276 }, { "epoch": 0.4803584005008217, "grad_norm": 0.0, "learning_rate": 1.1119779681968834e-05, "loss": 0.9463, "step": 12277 }, { "epoch": 0.4803975271930511, "grad_norm": 0.0, "learning_rate": 1.1118520411877343e-05, "loss": 1.0401, "step": 12278 }, { "epoch": 0.48043665388528056, "grad_norm": 0.0, "learning_rate": 1.1117261123823817e-05, "loss": 0.8649, "step": 12279 }, { "epoch": 0.48047578057751, "grad_norm": 0.0, "learning_rate": 1.1116001817828477e-05, "loss": 1.076, "step": 12280 }, { "epoch": 0.4805149072697394, "grad_norm": 0.0, "learning_rate": 1.1114742493911544e-05, "loss": 1.0713, "step": 12281 }, { "epoch": 0.48055403396196883, "grad_norm": 0.0, "learning_rate": 1.1113483152093246e-05, "loss": 1.0065, "step": 12282 }, { "epoch": 0.48059316065419827, "grad_norm": 0.0, "learning_rate": 1.1112223792393801e-05, "loss": 0.9676, "step": 12283 }, { "epoch": 0.4806322873464277, "grad_norm": 0.0, "learning_rate": 1.1110964414833438e-05, "loss": 1.0699, "step": 12284 }, { "epoch": 0.48067141403865715, "grad_norm": 0.0, "learning_rate": 1.1109705019432378e-05, "loss": 0.9639, "step": 12285 }, { "epoch": 0.4807105407308866, "grad_norm": 0.0, "learning_rate": 1.1108445606210845e-05, "loss": 0.9771, "step": 12286 }, { "epoch": 0.48074966742311603, "grad_norm": 0.0, "learning_rate": 1.1107186175189064e-05, "loss": 1.1089, "step": 12287 }, { "epoch": 0.4807887941153455, "grad_norm": 0.0, "learning_rate": 1.1105926726387264e-05, "loss": 1.02, "step": 12288 }, { "epoch": 0.4808279208075749, "grad_norm": 0.0, "learning_rate": 1.1104667259825666e-05, "loss": 1.0085, "step": 12289 }, { "epoch": 0.48086704749980436, "grad_norm": 0.0, "learning_rate": 1.1103407775524493e-05, "loss": 1.0582, "step": 12290 }, { "epoch": 0.4809061741920338, "grad_norm": 0.0, "learning_rate": 1.1102148273503974e-05, "loss": 1.0388, "step": 12291 }, { "epoch": 0.48094530088426324, "grad_norm": 0.0, "learning_rate": 1.1100888753784336e-05, "loss": 1.1771, "step": 12292 }, { "epoch": 0.4809844275764927, "grad_norm": 0.0, "learning_rate": 1.1099629216385805e-05, "loss": 0.9927, "step": 12293 }, { "epoch": 0.4810235542687221, "grad_norm": 0.0, "learning_rate": 1.1098369661328606e-05, "loss": 1.0826, "step": 12294 }, { "epoch": 0.48106268096095156, "grad_norm": 0.0, "learning_rate": 1.1097110088632965e-05, "loss": 1.0704, "step": 12295 }, { "epoch": 0.481101807653181, "grad_norm": 0.0, "learning_rate": 1.1095850498319112e-05, "loss": 1.1493, "step": 12296 }, { "epoch": 0.48114093434541044, "grad_norm": 0.0, "learning_rate": 1.1094590890407273e-05, "loss": 1.017, "step": 12297 }, { "epoch": 0.4811800610376399, "grad_norm": 0.0, "learning_rate": 1.1093331264917676e-05, "loss": 0.9736, "step": 12298 }, { "epoch": 0.4812191877298693, "grad_norm": 0.0, "learning_rate": 1.109207162187055e-05, "loss": 1.1584, "step": 12299 }, { "epoch": 0.48125831442209877, "grad_norm": 0.0, "learning_rate": 1.1090811961286124e-05, "loss": 1.0405, "step": 12300 }, { "epoch": 0.4812974411143282, "grad_norm": 0.0, "learning_rate": 1.1089552283184619e-05, "loss": 1.0935, "step": 12301 }, { "epoch": 0.48133656780655765, "grad_norm": 0.0, "learning_rate": 1.1088292587586273e-05, "loss": 0.9096, "step": 12302 }, { "epoch": 0.4813756944987871, "grad_norm": 0.0, "learning_rate": 1.108703287451131e-05, "loss": 1.1302, "step": 12303 }, { "epoch": 0.48141482119101653, "grad_norm": 0.0, "learning_rate": 1.1085773143979962e-05, "loss": 0.8746, "step": 12304 }, { "epoch": 0.48145394788324597, "grad_norm": 0.0, "learning_rate": 1.108451339601246e-05, "loss": 1.055, "step": 12305 }, { "epoch": 0.4814930745754754, "grad_norm": 0.0, "learning_rate": 1.1083253630629029e-05, "loss": 0.9646, "step": 12306 }, { "epoch": 0.48153220126770485, "grad_norm": 0.0, "learning_rate": 1.1081993847849906e-05, "loss": 1.2194, "step": 12307 }, { "epoch": 0.4815713279599343, "grad_norm": 0.0, "learning_rate": 1.1080734047695314e-05, "loss": 1.0803, "step": 12308 }, { "epoch": 0.48161045465216373, "grad_norm": 0.0, "learning_rate": 1.107947423018549e-05, "loss": 1.064, "step": 12309 }, { "epoch": 0.4816495813443931, "grad_norm": 0.0, "learning_rate": 1.1078214395340658e-05, "loss": 1.0248, "step": 12310 }, { "epoch": 0.48168870803662256, "grad_norm": 0.0, "learning_rate": 1.1076954543181058e-05, "loss": 1.0555, "step": 12311 }, { "epoch": 0.481727834728852, "grad_norm": 0.0, "learning_rate": 1.1075694673726916e-05, "loss": 0.9638, "step": 12312 }, { "epoch": 0.48176696142108144, "grad_norm": 0.0, "learning_rate": 1.1074434786998467e-05, "loss": 1.0918, "step": 12313 }, { "epoch": 0.4818060881133109, "grad_norm": 0.0, "learning_rate": 1.1073174883015938e-05, "loss": 1.1724, "step": 12314 }, { "epoch": 0.4818452148055403, "grad_norm": 0.0, "learning_rate": 1.1071914961799569e-05, "loss": 1.0672, "step": 12315 }, { "epoch": 0.48188434149776976, "grad_norm": 0.0, "learning_rate": 1.1070655023369587e-05, "loss": 1.059, "step": 12316 }, { "epoch": 0.4819234681899992, "grad_norm": 0.0, "learning_rate": 1.1069395067746226e-05, "loss": 1.2188, "step": 12317 }, { "epoch": 0.48196259488222865, "grad_norm": 0.0, "learning_rate": 1.1068135094949724e-05, "loss": 1.0081, "step": 12318 }, { "epoch": 0.4820017215744581, "grad_norm": 0.0, "learning_rate": 1.106687510500031e-05, "loss": 0.873, "step": 12319 }, { "epoch": 0.4820408482666875, "grad_norm": 0.0, "learning_rate": 1.1065615097918217e-05, "loss": 1.0433, "step": 12320 }, { "epoch": 0.48207997495891697, "grad_norm": 0.0, "learning_rate": 1.106435507372368e-05, "loss": 1.0217, "step": 12321 }, { "epoch": 0.4821191016511464, "grad_norm": 0.0, "learning_rate": 1.1063095032436937e-05, "loss": 1.0776, "step": 12322 }, { "epoch": 0.48215822834337585, "grad_norm": 0.0, "learning_rate": 1.1061834974078216e-05, "loss": 1.1572, "step": 12323 }, { "epoch": 0.4821973550356053, "grad_norm": 0.0, "learning_rate": 1.106057489866776e-05, "loss": 0.9785, "step": 12324 }, { "epoch": 0.48223648172783473, "grad_norm": 0.0, "learning_rate": 1.1059314806225796e-05, "loss": 1.0594, "step": 12325 }, { "epoch": 0.4822756084200642, "grad_norm": 0.0, "learning_rate": 1.1058054696772566e-05, "loss": 1.0921, "step": 12326 }, { "epoch": 0.4823147351122936, "grad_norm": 0.0, "learning_rate": 1.1056794570328304e-05, "loss": 0.9583, "step": 12327 }, { "epoch": 0.48235386180452305, "grad_norm": 0.0, "learning_rate": 1.1055534426913244e-05, "loss": 1.0763, "step": 12328 }, { "epoch": 0.4823929884967525, "grad_norm": 0.0, "learning_rate": 1.1054274266547624e-05, "loss": 1.1947, "step": 12329 }, { "epoch": 0.48243211518898194, "grad_norm": 0.0, "learning_rate": 1.1053014089251681e-05, "loss": 0.9815, "step": 12330 }, { "epoch": 0.4824712418812114, "grad_norm": 0.0, "learning_rate": 1.105175389504565e-05, "loss": 1.1507, "step": 12331 }, { "epoch": 0.4825103685734408, "grad_norm": 0.0, "learning_rate": 1.105049368394977e-05, "loss": 1.1882, "step": 12332 }, { "epoch": 0.48254949526567026, "grad_norm": 0.0, "learning_rate": 1.1049233455984281e-05, "loss": 0.9546, "step": 12333 }, { "epoch": 0.4825886219578997, "grad_norm": 0.0, "learning_rate": 1.1047973211169411e-05, "loss": 1.0121, "step": 12334 }, { "epoch": 0.48262774865012914, "grad_norm": 0.0, "learning_rate": 1.104671294952541e-05, "loss": 1.058, "step": 12335 }, { "epoch": 0.4826668753423586, "grad_norm": 0.0, "learning_rate": 1.104545267107251e-05, "loss": 0.9872, "step": 12336 }, { "epoch": 0.482706002034588, "grad_norm": 0.0, "learning_rate": 1.1044192375830946e-05, "loss": 1.0864, "step": 12337 }, { "epoch": 0.4827451287268174, "grad_norm": 0.0, "learning_rate": 1.1042932063820966e-05, "loss": 1.1233, "step": 12338 }, { "epoch": 0.48278425541904685, "grad_norm": 0.0, "learning_rate": 1.10416717350628e-05, "loss": 1.1473, "step": 12339 }, { "epoch": 0.4828233821112763, "grad_norm": 0.0, "learning_rate": 1.1040411389576692e-05, "loss": 1.034, "step": 12340 }, { "epoch": 0.48286250880350573, "grad_norm": 0.0, "learning_rate": 1.1039151027382881e-05, "loss": 1.0639, "step": 12341 }, { "epoch": 0.48290163549573517, "grad_norm": 0.0, "learning_rate": 1.1037890648501608e-05, "loss": 1.0572, "step": 12342 }, { "epoch": 0.4829407621879646, "grad_norm": 0.0, "learning_rate": 1.103663025295311e-05, "loss": 1.1467, "step": 12343 }, { "epoch": 0.48297988888019405, "grad_norm": 0.0, "learning_rate": 1.103536984075763e-05, "loss": 1.1277, "step": 12344 }, { "epoch": 0.4830190155724235, "grad_norm": 0.0, "learning_rate": 1.1034109411935407e-05, "loss": 1.1335, "step": 12345 }, { "epoch": 0.48305814226465293, "grad_norm": 0.0, "learning_rate": 1.1032848966506684e-05, "loss": 0.9642, "step": 12346 }, { "epoch": 0.4830972689568824, "grad_norm": 0.0, "learning_rate": 1.1031588504491702e-05, "loss": 1.0609, "step": 12347 }, { "epoch": 0.4831363956491118, "grad_norm": 0.0, "learning_rate": 1.1030328025910699e-05, "loss": 1.0224, "step": 12348 }, { "epoch": 0.48317552234134126, "grad_norm": 0.0, "learning_rate": 1.1029067530783919e-05, "loss": 0.9938, "step": 12349 }, { "epoch": 0.4832146490335707, "grad_norm": 0.0, "learning_rate": 1.1027807019131605e-05, "loss": 0.9536, "step": 12350 }, { "epoch": 0.48325377572580014, "grad_norm": 0.0, "learning_rate": 1.1026546490973997e-05, "loss": 1.2173, "step": 12351 }, { "epoch": 0.4832929024180296, "grad_norm": 0.0, "learning_rate": 1.102528594633134e-05, "loss": 0.9964, "step": 12352 }, { "epoch": 0.483332029110259, "grad_norm": 0.0, "learning_rate": 1.1024025385223872e-05, "loss": 1.118, "step": 12353 }, { "epoch": 0.48337115580248846, "grad_norm": 0.0, "learning_rate": 1.1022764807671842e-05, "loss": 0.9207, "step": 12354 }, { "epoch": 0.4834102824947179, "grad_norm": 0.0, "learning_rate": 1.1021504213695493e-05, "loss": 1.0642, "step": 12355 }, { "epoch": 0.48344940918694734, "grad_norm": 0.0, "learning_rate": 1.1020243603315066e-05, "loss": 1.1888, "step": 12356 }, { "epoch": 0.4834885358791768, "grad_norm": 0.0, "learning_rate": 1.10189829765508e-05, "loss": 1.1865, "step": 12357 }, { "epoch": 0.4835276625714062, "grad_norm": 0.0, "learning_rate": 1.101772233342295e-05, "loss": 1.0617, "step": 12358 }, { "epoch": 0.48356678926363567, "grad_norm": 0.0, "learning_rate": 1.1016461673951751e-05, "loss": 0.9912, "step": 12359 }, { "epoch": 0.4836059159558651, "grad_norm": 0.0, "learning_rate": 1.1015200998157453e-05, "loss": 0.9865, "step": 12360 }, { "epoch": 0.48364504264809455, "grad_norm": 0.0, "learning_rate": 1.1013940306060298e-05, "loss": 1.0085, "step": 12361 }, { "epoch": 0.483684169340324, "grad_norm": 0.0, "learning_rate": 1.1012679597680533e-05, "loss": 1.0353, "step": 12362 }, { "epoch": 0.48372329603255343, "grad_norm": 0.0, "learning_rate": 1.1011418873038404e-05, "loss": 1.1057, "step": 12363 }, { "epoch": 0.48376242272478287, "grad_norm": 0.0, "learning_rate": 1.1010158132154153e-05, "loss": 1.0336, "step": 12364 }, { "epoch": 0.4838015494170123, "grad_norm": 0.0, "learning_rate": 1.1008897375048031e-05, "loss": 1.0436, "step": 12365 }, { "epoch": 0.48384067610924175, "grad_norm": 0.0, "learning_rate": 1.100763660174028e-05, "loss": 0.9689, "step": 12366 }, { "epoch": 0.48387980280147114, "grad_norm": 0.0, "learning_rate": 1.1006375812251144e-05, "loss": 1.1183, "step": 12367 }, { "epoch": 0.4839189294937006, "grad_norm": 0.0, "learning_rate": 1.1005115006600879e-05, "loss": 1.1691, "step": 12368 }, { "epoch": 0.48395805618593, "grad_norm": 0.0, "learning_rate": 1.1003854184809725e-05, "loss": 1.1825, "step": 12369 }, { "epoch": 0.48399718287815946, "grad_norm": 0.0, "learning_rate": 1.100259334689793e-05, "loss": 0.9419, "step": 12370 }, { "epoch": 0.4840363095703889, "grad_norm": 0.0, "learning_rate": 1.1001332492885741e-05, "loss": 1.0982, "step": 12371 }, { "epoch": 0.48407543626261834, "grad_norm": 0.0, "learning_rate": 1.1000071622793406e-05, "loss": 1.0175, "step": 12372 }, { "epoch": 0.4841145629548478, "grad_norm": 0.0, "learning_rate": 1.0998810736641179e-05, "loss": 1.0251, "step": 12373 }, { "epoch": 0.4841536896470772, "grad_norm": 0.0, "learning_rate": 1.0997549834449297e-05, "loss": 1.0525, "step": 12374 }, { "epoch": 0.48419281633930666, "grad_norm": 0.0, "learning_rate": 1.099628891623802e-05, "loss": 0.9992, "step": 12375 }, { "epoch": 0.4842319430315361, "grad_norm": 0.0, "learning_rate": 1.0995027982027588e-05, "loss": 1.1165, "step": 12376 }, { "epoch": 0.48427106972376555, "grad_norm": 0.0, "learning_rate": 1.0993767031838253e-05, "loss": 1.0962, "step": 12377 }, { "epoch": 0.484310196415995, "grad_norm": 0.0, "learning_rate": 1.099250606569027e-05, "loss": 1.0291, "step": 12378 }, { "epoch": 0.4843493231082244, "grad_norm": 0.0, "learning_rate": 1.0991245083603877e-05, "loss": 1.0457, "step": 12379 }, { "epoch": 0.48438844980045387, "grad_norm": 0.0, "learning_rate": 1.0989984085599335e-05, "loss": 1.1866, "step": 12380 }, { "epoch": 0.4844275764926833, "grad_norm": 0.0, "learning_rate": 1.0988723071696885e-05, "loss": 1.0265, "step": 12381 }, { "epoch": 0.48446670318491275, "grad_norm": 0.0, "learning_rate": 1.0987462041916783e-05, "loss": 0.9712, "step": 12382 }, { "epoch": 0.4845058298771422, "grad_norm": 0.0, "learning_rate": 1.0986200996279277e-05, "loss": 1.0492, "step": 12383 }, { "epoch": 0.48454495656937163, "grad_norm": 0.0, "learning_rate": 1.0984939934804621e-05, "loss": 0.9456, "step": 12384 }, { "epoch": 0.4845840832616011, "grad_norm": 0.0, "learning_rate": 1.0983678857513063e-05, "loss": 1.0252, "step": 12385 }, { "epoch": 0.4846232099538305, "grad_norm": 0.0, "learning_rate": 1.0982417764424853e-05, "loss": 1.0978, "step": 12386 }, { "epoch": 0.48466233664605995, "grad_norm": 0.0, "learning_rate": 1.0981156655560247e-05, "loss": 1.1857, "step": 12387 }, { "epoch": 0.4847014633382894, "grad_norm": 0.0, "learning_rate": 1.0979895530939495e-05, "loss": 1.0187, "step": 12388 }, { "epoch": 0.48474059003051884, "grad_norm": 0.0, "learning_rate": 1.0978634390582847e-05, "loss": 0.9872, "step": 12389 }, { "epoch": 0.4847797167227483, "grad_norm": 0.0, "learning_rate": 1.0977373234510557e-05, "loss": 0.964, "step": 12390 }, { "epoch": 0.4848188434149777, "grad_norm": 0.0, "learning_rate": 1.097611206274288e-05, "loss": 1.0268, "step": 12391 }, { "epoch": 0.48485797010720716, "grad_norm": 0.0, "learning_rate": 1.0974850875300063e-05, "loss": 1.0454, "step": 12392 }, { "epoch": 0.4848970967994366, "grad_norm": 0.0, "learning_rate": 1.0973589672202365e-05, "loss": 1.0661, "step": 12393 }, { "epoch": 0.48493622349166604, "grad_norm": 0.0, "learning_rate": 1.0972328453470039e-05, "loss": 1.1208, "step": 12394 }, { "epoch": 0.4849753501838954, "grad_norm": 0.0, "learning_rate": 1.0971067219123331e-05, "loss": 1.1219, "step": 12395 }, { "epoch": 0.48501447687612487, "grad_norm": 0.0, "learning_rate": 1.0969805969182503e-05, "loss": 1.0468, "step": 12396 }, { "epoch": 0.4850536035683543, "grad_norm": 0.0, "learning_rate": 1.0968544703667805e-05, "loss": 1.1065, "step": 12397 }, { "epoch": 0.48509273026058375, "grad_norm": 0.0, "learning_rate": 1.0967283422599495e-05, "loss": 1.0222, "step": 12398 }, { "epoch": 0.4851318569528132, "grad_norm": 0.0, "learning_rate": 1.0966022125997825e-05, "loss": 1.0372, "step": 12399 }, { "epoch": 0.48517098364504263, "grad_norm": 0.0, "learning_rate": 1.0964760813883048e-05, "loss": 1.0981, "step": 12400 }, { "epoch": 0.48521011033727207, "grad_norm": 0.0, "learning_rate": 1.0963499486275421e-05, "loss": 1.1418, "step": 12401 }, { "epoch": 0.4852492370295015, "grad_norm": 0.0, "learning_rate": 1.0962238143195203e-05, "loss": 1.0432, "step": 12402 }, { "epoch": 0.48528836372173095, "grad_norm": 0.0, "learning_rate": 1.0960976784662642e-05, "loss": 1.0609, "step": 12403 }, { "epoch": 0.4853274904139604, "grad_norm": 0.0, "learning_rate": 1.0959715410698003e-05, "loss": 1.0766, "step": 12404 }, { "epoch": 0.48536661710618983, "grad_norm": 0.0, "learning_rate": 1.0958454021321536e-05, "loss": 0.935, "step": 12405 }, { "epoch": 0.4854057437984193, "grad_norm": 0.0, "learning_rate": 1.0957192616553494e-05, "loss": 1.0788, "step": 12406 }, { "epoch": 0.4854448704906487, "grad_norm": 0.0, "learning_rate": 1.0955931196414143e-05, "loss": 1.0982, "step": 12407 }, { "epoch": 0.48548399718287816, "grad_norm": 0.0, "learning_rate": 1.0954669760923733e-05, "loss": 0.998, "step": 12408 }, { "epoch": 0.4855231238751076, "grad_norm": 0.0, "learning_rate": 1.0953408310102522e-05, "loss": 1.0328, "step": 12409 }, { "epoch": 0.48556225056733704, "grad_norm": 0.0, "learning_rate": 1.095214684397077e-05, "loss": 1.1035, "step": 12410 }, { "epoch": 0.4856013772595665, "grad_norm": 0.0, "learning_rate": 1.095088536254873e-05, "loss": 1.0686, "step": 12411 }, { "epoch": 0.4856405039517959, "grad_norm": 0.0, "learning_rate": 1.0949623865856664e-05, "loss": 1.0193, "step": 12412 }, { "epoch": 0.48567963064402536, "grad_norm": 0.0, "learning_rate": 1.0948362353914832e-05, "loss": 1.0833, "step": 12413 }, { "epoch": 0.4857187573362548, "grad_norm": 0.0, "learning_rate": 1.0947100826743487e-05, "loss": 0.9197, "step": 12414 }, { "epoch": 0.48575788402848424, "grad_norm": 0.0, "learning_rate": 1.0945839284362885e-05, "loss": 1.0703, "step": 12415 }, { "epoch": 0.4857970107207137, "grad_norm": 0.0, "learning_rate": 1.0944577726793296e-05, "loss": 0.9691, "step": 12416 }, { "epoch": 0.4858361374129431, "grad_norm": 0.0, "learning_rate": 1.0943316154054966e-05, "loss": 1.001, "step": 12417 }, { "epoch": 0.48587526410517257, "grad_norm": 0.0, "learning_rate": 1.0942054566168166e-05, "loss": 1.077, "step": 12418 }, { "epoch": 0.485914390797402, "grad_norm": 0.0, "learning_rate": 1.0940792963153146e-05, "loss": 0.974, "step": 12419 }, { "epoch": 0.48595351748963145, "grad_norm": 0.0, "learning_rate": 1.0939531345030173e-05, "loss": 1.0704, "step": 12420 }, { "epoch": 0.4859926441818609, "grad_norm": 0.0, "learning_rate": 1.0938269711819501e-05, "loss": 1.0197, "step": 12421 }, { "epoch": 0.48603177087409033, "grad_norm": 0.0, "learning_rate": 1.0937008063541395e-05, "loss": 1.0748, "step": 12422 }, { "epoch": 0.48607089756631977, "grad_norm": 0.0, "learning_rate": 1.0935746400216114e-05, "loss": 0.9746, "step": 12423 }, { "epoch": 0.48611002425854916, "grad_norm": 0.0, "learning_rate": 1.0934484721863917e-05, "loss": 1.1205, "step": 12424 }, { "epoch": 0.4861491509507786, "grad_norm": 0.0, "learning_rate": 1.0933223028505066e-05, "loss": 1.1014, "step": 12425 }, { "epoch": 0.48618827764300804, "grad_norm": 0.0, "learning_rate": 1.0931961320159822e-05, "loss": 1.0159, "step": 12426 }, { "epoch": 0.4862274043352375, "grad_norm": 0.0, "learning_rate": 1.093069959684845e-05, "loss": 1.0637, "step": 12427 }, { "epoch": 0.4862665310274669, "grad_norm": 0.0, "learning_rate": 1.0929437858591207e-05, "loss": 1.1098, "step": 12428 }, { "epoch": 0.48630565771969636, "grad_norm": 0.0, "learning_rate": 1.0928176105408359e-05, "loss": 1.1262, "step": 12429 }, { "epoch": 0.4863447844119258, "grad_norm": 0.0, "learning_rate": 1.0926914337320162e-05, "loss": 1.1241, "step": 12430 }, { "epoch": 0.48638391110415524, "grad_norm": 0.0, "learning_rate": 1.0925652554346884e-05, "loss": 1.0333, "step": 12431 }, { "epoch": 0.4864230377963847, "grad_norm": 0.0, "learning_rate": 1.0924390756508785e-05, "loss": 0.9966, "step": 12432 }, { "epoch": 0.4864621644886141, "grad_norm": 0.0, "learning_rate": 1.0923128943826132e-05, "loss": 1.0479, "step": 12433 }, { "epoch": 0.48650129118084356, "grad_norm": 0.0, "learning_rate": 1.0921867116319183e-05, "loss": 1.077, "step": 12434 }, { "epoch": 0.486540417873073, "grad_norm": 0.0, "learning_rate": 1.09206052740082e-05, "loss": 1.0723, "step": 12435 }, { "epoch": 0.48657954456530245, "grad_norm": 0.0, "learning_rate": 1.0919343416913455e-05, "loss": 1.1193, "step": 12436 }, { "epoch": 0.4866186712575319, "grad_norm": 0.0, "learning_rate": 1.0918081545055203e-05, "loss": 1.1361, "step": 12437 }, { "epoch": 0.48665779794976133, "grad_norm": 0.0, "learning_rate": 1.0916819658453715e-05, "loss": 1.1125, "step": 12438 }, { "epoch": 0.48669692464199077, "grad_norm": 0.0, "learning_rate": 1.0915557757129246e-05, "loss": 1.1052, "step": 12439 }, { "epoch": 0.4867360513342202, "grad_norm": 0.0, "learning_rate": 1.0914295841102075e-05, "loss": 1.0208, "step": 12440 }, { "epoch": 0.48677517802644965, "grad_norm": 0.0, "learning_rate": 1.0913033910392452e-05, "loss": 1.0955, "step": 12441 }, { "epoch": 0.4868143047186791, "grad_norm": 0.0, "learning_rate": 1.0911771965020652e-05, "loss": 0.9963, "step": 12442 }, { "epoch": 0.48685343141090853, "grad_norm": 0.0, "learning_rate": 1.0910510005006938e-05, "loss": 1.0346, "step": 12443 }, { "epoch": 0.486892558103138, "grad_norm": 0.0, "learning_rate": 1.0909248030371571e-05, "loss": 1.0563, "step": 12444 }, { "epoch": 0.4869316847953674, "grad_norm": 0.0, "learning_rate": 1.0907986041134821e-05, "loss": 1.06, "step": 12445 }, { "epoch": 0.48697081148759686, "grad_norm": 0.0, "learning_rate": 1.0906724037316952e-05, "loss": 1.0919, "step": 12446 }, { "epoch": 0.4870099381798263, "grad_norm": 0.0, "learning_rate": 1.0905462018938234e-05, "loss": 1.1177, "step": 12447 }, { "epoch": 0.48704906487205574, "grad_norm": 0.0, "learning_rate": 1.0904199986018926e-05, "loss": 1.0178, "step": 12448 }, { "epoch": 0.4870881915642852, "grad_norm": 0.0, "learning_rate": 1.0902937938579304e-05, "loss": 0.9907, "step": 12449 }, { "epoch": 0.4871273182565146, "grad_norm": 0.0, "learning_rate": 1.0901675876639628e-05, "loss": 1.0357, "step": 12450 }, { "epoch": 0.48716644494874406, "grad_norm": 0.0, "learning_rate": 1.0900413800220166e-05, "loss": 1.0623, "step": 12451 }, { "epoch": 0.48720557164097344, "grad_norm": 0.0, "learning_rate": 1.0899151709341189e-05, "loss": 1.1957, "step": 12452 }, { "epoch": 0.4872446983332029, "grad_norm": 0.0, "learning_rate": 1.0897889604022962e-05, "loss": 1.1233, "step": 12453 }, { "epoch": 0.4872838250254323, "grad_norm": 0.0, "learning_rate": 1.0896627484285752e-05, "loss": 1.0987, "step": 12454 }, { "epoch": 0.48732295171766177, "grad_norm": 0.0, "learning_rate": 1.0895365350149829e-05, "loss": 1.0948, "step": 12455 }, { "epoch": 0.4873620784098912, "grad_norm": 0.0, "learning_rate": 1.0894103201635459e-05, "loss": 1.1071, "step": 12456 }, { "epoch": 0.48740120510212065, "grad_norm": 0.0, "learning_rate": 1.0892841038762913e-05, "loss": 0.9587, "step": 12457 }, { "epoch": 0.4874403317943501, "grad_norm": 0.0, "learning_rate": 1.0891578861552461e-05, "loss": 1.057, "step": 12458 }, { "epoch": 0.48747945848657953, "grad_norm": 0.0, "learning_rate": 1.0890316670024366e-05, "loss": 0.9791, "step": 12459 }, { "epoch": 0.48751858517880897, "grad_norm": 0.0, "learning_rate": 1.0889054464198903e-05, "loss": 1.142, "step": 12460 }, { "epoch": 0.4875577118710384, "grad_norm": 0.0, "learning_rate": 1.0887792244096336e-05, "loss": 1.0201, "step": 12461 }, { "epoch": 0.48759683856326785, "grad_norm": 0.0, "learning_rate": 1.0886530009736942e-05, "loss": 0.9074, "step": 12462 }, { "epoch": 0.4876359652554973, "grad_norm": 0.0, "learning_rate": 1.0885267761140988e-05, "loss": 1.0925, "step": 12463 }, { "epoch": 0.48767509194772674, "grad_norm": 0.0, "learning_rate": 1.0884005498328737e-05, "loss": 0.9878, "step": 12464 }, { "epoch": 0.4877142186399562, "grad_norm": 0.0, "learning_rate": 1.088274322132047e-05, "loss": 0.946, "step": 12465 }, { "epoch": 0.4877533453321856, "grad_norm": 0.0, "learning_rate": 1.0881480930136452e-05, "loss": 0.9931, "step": 12466 }, { "epoch": 0.48779247202441506, "grad_norm": 0.0, "learning_rate": 1.0880218624796954e-05, "loss": 1.0647, "step": 12467 }, { "epoch": 0.4878315987166445, "grad_norm": 0.0, "learning_rate": 1.0878956305322248e-05, "loss": 1.056, "step": 12468 }, { "epoch": 0.48787072540887394, "grad_norm": 0.0, "learning_rate": 1.0877693971732608e-05, "loss": 1.0277, "step": 12469 }, { "epoch": 0.4879098521011034, "grad_norm": 0.0, "learning_rate": 1.0876431624048298e-05, "loss": 1.0475, "step": 12470 }, { "epoch": 0.4879489787933328, "grad_norm": 0.0, "learning_rate": 1.0875169262289597e-05, "loss": 1.1679, "step": 12471 }, { "epoch": 0.48798810548556226, "grad_norm": 0.0, "learning_rate": 1.0873906886476777e-05, "loss": 0.8572, "step": 12472 }, { "epoch": 0.4880272321777917, "grad_norm": 0.0, "learning_rate": 1.0872644496630103e-05, "loss": 1.0372, "step": 12473 }, { "epoch": 0.48806635887002114, "grad_norm": 0.0, "learning_rate": 1.0871382092769853e-05, "loss": 1.1256, "step": 12474 }, { "epoch": 0.4881054855622506, "grad_norm": 0.0, "learning_rate": 1.0870119674916298e-05, "loss": 1.0976, "step": 12475 }, { "epoch": 0.48814461225448, "grad_norm": 0.0, "learning_rate": 1.0868857243089714e-05, "loss": 1.0829, "step": 12476 }, { "epoch": 0.48818373894670947, "grad_norm": 0.0, "learning_rate": 1.0867594797310368e-05, "loss": 1.0652, "step": 12477 }, { "epoch": 0.4882228656389389, "grad_norm": 0.0, "learning_rate": 1.086633233759854e-05, "loss": 1.1113, "step": 12478 }, { "epoch": 0.48826199233116835, "grad_norm": 0.0, "learning_rate": 1.0865069863974496e-05, "loss": 0.8906, "step": 12479 }, { "epoch": 0.4883011190233978, "grad_norm": 0.0, "learning_rate": 1.0863807376458516e-05, "loss": 1.0865, "step": 12480 }, { "epoch": 0.4883402457156272, "grad_norm": 0.0, "learning_rate": 1.0862544875070875e-05, "loss": 0.9153, "step": 12481 }, { "epoch": 0.4883793724078566, "grad_norm": 0.0, "learning_rate": 1.0861282359831842e-05, "loss": 0.9704, "step": 12482 }, { "epoch": 0.48841849910008606, "grad_norm": 0.0, "learning_rate": 1.0860019830761693e-05, "loss": 1.086, "step": 12483 }, { "epoch": 0.4884576257923155, "grad_norm": 0.0, "learning_rate": 1.08587572878807e-05, "loss": 1.0405, "step": 12484 }, { "epoch": 0.48849675248454494, "grad_norm": 0.0, "learning_rate": 1.0857494731209148e-05, "loss": 1.0519, "step": 12485 }, { "epoch": 0.4885358791767744, "grad_norm": 0.0, "learning_rate": 1.08562321607673e-05, "loss": 1.192, "step": 12486 }, { "epoch": 0.4885750058690038, "grad_norm": 0.0, "learning_rate": 1.0854969576575438e-05, "loss": 1.0288, "step": 12487 }, { "epoch": 0.48861413256123326, "grad_norm": 0.0, "learning_rate": 1.0853706978653835e-05, "loss": 0.9993, "step": 12488 }, { "epoch": 0.4886532592534627, "grad_norm": 0.0, "learning_rate": 1.085244436702277e-05, "loss": 1.0053, "step": 12489 }, { "epoch": 0.48869238594569214, "grad_norm": 0.0, "learning_rate": 1.0851181741702515e-05, "loss": 0.9828, "step": 12490 }, { "epoch": 0.4887315126379216, "grad_norm": 0.0, "learning_rate": 1.084991910271335e-05, "loss": 1.0571, "step": 12491 }, { "epoch": 0.488770639330151, "grad_norm": 0.0, "learning_rate": 1.0848656450075545e-05, "loss": 0.9471, "step": 12492 }, { "epoch": 0.48880976602238047, "grad_norm": 0.0, "learning_rate": 1.0847393783809383e-05, "loss": 0.9637, "step": 12493 }, { "epoch": 0.4888488927146099, "grad_norm": 0.0, "learning_rate": 1.084613110393514e-05, "loss": 1.0724, "step": 12494 }, { "epoch": 0.48888801940683935, "grad_norm": 0.0, "learning_rate": 1.084486841047309e-05, "loss": 0.9995, "step": 12495 }, { "epoch": 0.4889271460990688, "grad_norm": 0.0, "learning_rate": 1.0843605703443511e-05, "loss": 1.1909, "step": 12496 }, { "epoch": 0.48896627279129823, "grad_norm": 0.0, "learning_rate": 1.0842342982866686e-05, "loss": 1.1973, "step": 12497 }, { "epoch": 0.48900539948352767, "grad_norm": 0.0, "learning_rate": 1.0841080248762883e-05, "loss": 1.0898, "step": 12498 }, { "epoch": 0.4890445261757571, "grad_norm": 0.0, "learning_rate": 1.0839817501152387e-05, "loss": 1.2204, "step": 12499 }, { "epoch": 0.48908365286798655, "grad_norm": 0.0, "learning_rate": 1.0838554740055479e-05, "loss": 1.02, "step": 12500 }, { "epoch": 0.489122779560216, "grad_norm": 0.0, "learning_rate": 1.0837291965492425e-05, "loss": 1.1028, "step": 12501 }, { "epoch": 0.48916190625244543, "grad_norm": 0.0, "learning_rate": 1.0836029177483516e-05, "loss": 0.9636, "step": 12502 }, { "epoch": 0.4892010329446749, "grad_norm": 0.0, "learning_rate": 1.0834766376049024e-05, "loss": 0.9967, "step": 12503 }, { "epoch": 0.4892401596369043, "grad_norm": 0.0, "learning_rate": 1.0833503561209232e-05, "loss": 1.0104, "step": 12504 }, { "epoch": 0.48927928632913376, "grad_norm": 0.0, "learning_rate": 1.0832240732984415e-05, "loss": 0.9753, "step": 12505 }, { "epoch": 0.4893184130213632, "grad_norm": 0.0, "learning_rate": 1.0830977891394853e-05, "loss": 0.9696, "step": 12506 }, { "epoch": 0.48935753971359264, "grad_norm": 0.0, "learning_rate": 1.0829715036460833e-05, "loss": 1.1593, "step": 12507 }, { "epoch": 0.4893966664058221, "grad_norm": 0.0, "learning_rate": 1.0828452168202624e-05, "loss": 1.0789, "step": 12508 }, { "epoch": 0.48943579309805146, "grad_norm": 0.0, "learning_rate": 1.0827189286640513e-05, "loss": 0.9511, "step": 12509 }, { "epoch": 0.4894749197902809, "grad_norm": 0.0, "learning_rate": 1.0825926391794782e-05, "loss": 0.9657, "step": 12510 }, { "epoch": 0.48951404648251035, "grad_norm": 0.0, "learning_rate": 1.0824663483685702e-05, "loss": 1.1569, "step": 12511 }, { "epoch": 0.4895531731747398, "grad_norm": 0.0, "learning_rate": 1.0823400562333563e-05, "loss": 1.1583, "step": 12512 }, { "epoch": 0.4895922998669692, "grad_norm": 0.0, "learning_rate": 1.0822137627758642e-05, "loss": 1.0711, "step": 12513 }, { "epoch": 0.48963142655919867, "grad_norm": 0.0, "learning_rate": 1.0820874679981223e-05, "loss": 1.1238, "step": 12514 }, { "epoch": 0.4896705532514281, "grad_norm": 0.0, "learning_rate": 1.0819611719021584e-05, "loss": 0.9992, "step": 12515 }, { "epoch": 0.48970967994365755, "grad_norm": 0.0, "learning_rate": 1.0818348744900007e-05, "loss": 1.0827, "step": 12516 }, { "epoch": 0.489748806635887, "grad_norm": 0.0, "learning_rate": 1.0817085757636774e-05, "loss": 1.1075, "step": 12517 }, { "epoch": 0.48978793332811643, "grad_norm": 0.0, "learning_rate": 1.081582275725217e-05, "loss": 1.239, "step": 12518 }, { "epoch": 0.4898270600203459, "grad_norm": 0.0, "learning_rate": 1.081455974376647e-05, "loss": 1.0881, "step": 12519 }, { "epoch": 0.4898661867125753, "grad_norm": 0.0, "learning_rate": 1.0813296717199969e-05, "loss": 1.1873, "step": 12520 }, { "epoch": 0.48990531340480475, "grad_norm": 0.0, "learning_rate": 1.0812033677572937e-05, "loss": 1.0092, "step": 12521 }, { "epoch": 0.4899444400970342, "grad_norm": 0.0, "learning_rate": 1.081077062490566e-05, "loss": 0.9858, "step": 12522 }, { "epoch": 0.48998356678926364, "grad_norm": 0.0, "learning_rate": 1.0809507559218426e-05, "loss": 1.1107, "step": 12523 }, { "epoch": 0.4900226934814931, "grad_norm": 0.0, "learning_rate": 1.0808244480531513e-05, "loss": 1.0559, "step": 12524 }, { "epoch": 0.4900618201737225, "grad_norm": 0.0, "learning_rate": 1.0806981388865208e-05, "loss": 1.0873, "step": 12525 }, { "epoch": 0.49010094686595196, "grad_norm": 0.0, "learning_rate": 1.0805718284239793e-05, "loss": 1.001, "step": 12526 }, { "epoch": 0.4901400735581814, "grad_norm": 0.0, "learning_rate": 1.0804455166675552e-05, "loss": 1.1585, "step": 12527 }, { "epoch": 0.49017920025041084, "grad_norm": 0.0, "learning_rate": 1.0803192036192767e-05, "loss": 1.0234, "step": 12528 }, { "epoch": 0.4902183269426403, "grad_norm": 0.0, "learning_rate": 1.0801928892811726e-05, "loss": 1.1548, "step": 12529 }, { "epoch": 0.4902574536348697, "grad_norm": 0.0, "learning_rate": 1.0800665736552713e-05, "loss": 1.0328, "step": 12530 }, { "epoch": 0.49029658032709916, "grad_norm": 0.0, "learning_rate": 1.0799402567436009e-05, "loss": 1.1338, "step": 12531 }, { "epoch": 0.4903357070193286, "grad_norm": 0.0, "learning_rate": 1.0798139385481903e-05, "loss": 1.0058, "step": 12532 }, { "epoch": 0.49037483371155804, "grad_norm": 0.0, "learning_rate": 1.0796876190710677e-05, "loss": 1.0867, "step": 12533 }, { "epoch": 0.4904139604037875, "grad_norm": 0.0, "learning_rate": 1.079561298314262e-05, "loss": 0.87, "step": 12534 }, { "epoch": 0.4904530870960169, "grad_norm": 0.0, "learning_rate": 1.0794349762798013e-05, "loss": 1.1058, "step": 12535 }, { "epoch": 0.49049221378824637, "grad_norm": 0.0, "learning_rate": 1.0793086529697148e-05, "loss": 1.076, "step": 12536 }, { "epoch": 0.4905313404804758, "grad_norm": 0.0, "learning_rate": 1.0791823283860304e-05, "loss": 0.9811, "step": 12537 }, { "epoch": 0.4905704671727052, "grad_norm": 0.0, "learning_rate": 1.0790560025307773e-05, "loss": 1.0349, "step": 12538 }, { "epoch": 0.49060959386493463, "grad_norm": 0.0, "learning_rate": 1.0789296754059837e-05, "loss": 0.9808, "step": 12539 }, { "epoch": 0.4906487205571641, "grad_norm": 0.0, "learning_rate": 1.0788033470136783e-05, "loss": 1.1637, "step": 12540 }, { "epoch": 0.4906878472493935, "grad_norm": 0.0, "learning_rate": 1.07867701735589e-05, "loss": 1.1133, "step": 12541 }, { "epoch": 0.49072697394162296, "grad_norm": 0.0, "learning_rate": 1.0785506864346475e-05, "loss": 1.1018, "step": 12542 }, { "epoch": 0.4907661006338524, "grad_norm": 0.0, "learning_rate": 1.0784243542519792e-05, "loss": 1.0724, "step": 12543 }, { "epoch": 0.49080522732608184, "grad_norm": 0.0, "learning_rate": 1.0782980208099143e-05, "loss": 1.1135, "step": 12544 }, { "epoch": 0.4908443540183113, "grad_norm": 0.0, "learning_rate": 1.0781716861104812e-05, "loss": 1.1328, "step": 12545 }, { "epoch": 0.4908834807105407, "grad_norm": 0.0, "learning_rate": 1.0780453501557084e-05, "loss": 0.9926, "step": 12546 }, { "epoch": 0.49092260740277016, "grad_norm": 0.0, "learning_rate": 1.0779190129476256e-05, "loss": 1.0925, "step": 12547 }, { "epoch": 0.4909617340949996, "grad_norm": 0.0, "learning_rate": 1.0777926744882607e-05, "loss": 1.1586, "step": 12548 }, { "epoch": 0.49100086078722904, "grad_norm": 0.0, "learning_rate": 1.0776663347796433e-05, "loss": 1.1301, "step": 12549 }, { "epoch": 0.4910399874794585, "grad_norm": 0.0, "learning_rate": 1.0775399938238019e-05, "loss": 1.0211, "step": 12550 }, { "epoch": 0.4910791141716879, "grad_norm": 0.0, "learning_rate": 1.077413651622765e-05, "loss": 1.0264, "step": 12551 }, { "epoch": 0.49111824086391737, "grad_norm": 0.0, "learning_rate": 1.077287308178562e-05, "loss": 1.0174, "step": 12552 }, { "epoch": 0.4911573675561468, "grad_norm": 0.0, "learning_rate": 1.0771609634932216e-05, "loss": 1.1001, "step": 12553 }, { "epoch": 0.49119649424837625, "grad_norm": 0.0, "learning_rate": 1.0770346175687728e-05, "loss": 1.0326, "step": 12554 }, { "epoch": 0.4912356209406057, "grad_norm": 0.0, "learning_rate": 1.0769082704072447e-05, "loss": 1.1085, "step": 12555 }, { "epoch": 0.49127474763283513, "grad_norm": 0.0, "learning_rate": 1.076781922010666e-05, "loss": 1.1039, "step": 12556 }, { "epoch": 0.49131387432506457, "grad_norm": 0.0, "learning_rate": 1.0766555723810661e-05, "loss": 0.9537, "step": 12557 }, { "epoch": 0.491353001017294, "grad_norm": 0.0, "learning_rate": 1.0765292215204738e-05, "loss": 0.9954, "step": 12558 }, { "epoch": 0.49139212770952345, "grad_norm": 0.0, "learning_rate": 1.0764028694309179e-05, "loss": 1.011, "step": 12559 }, { "epoch": 0.4914312544017529, "grad_norm": 0.0, "learning_rate": 1.0762765161144275e-05, "loss": 1.0582, "step": 12560 }, { "epoch": 0.49147038109398233, "grad_norm": 0.0, "learning_rate": 1.0761501615730321e-05, "loss": 1.1107, "step": 12561 }, { "epoch": 0.4915095077862118, "grad_norm": 0.0, "learning_rate": 1.0760238058087605e-05, "loss": 1.084, "step": 12562 }, { "epoch": 0.4915486344784412, "grad_norm": 0.0, "learning_rate": 1.0758974488236418e-05, "loss": 0.9865, "step": 12563 }, { "epoch": 0.49158776117067066, "grad_norm": 0.0, "learning_rate": 1.075771090619705e-05, "loss": 1.1471, "step": 12564 }, { "epoch": 0.4916268878629001, "grad_norm": 0.0, "learning_rate": 1.0756447311989796e-05, "loss": 1.1162, "step": 12565 }, { "epoch": 0.4916660145551295, "grad_norm": 0.0, "learning_rate": 1.0755183705634944e-05, "loss": 1.0835, "step": 12566 }, { "epoch": 0.4917051412473589, "grad_norm": 0.0, "learning_rate": 1.0753920087152792e-05, "loss": 1.0902, "step": 12567 }, { "epoch": 0.49174426793958836, "grad_norm": 0.0, "learning_rate": 1.0752656456563626e-05, "loss": 1.0973, "step": 12568 }, { "epoch": 0.4917833946318178, "grad_norm": 0.0, "learning_rate": 1.075139281388774e-05, "loss": 1.0315, "step": 12569 }, { "epoch": 0.49182252132404725, "grad_norm": 0.0, "learning_rate": 1.0750129159145429e-05, "loss": 1.1763, "step": 12570 }, { "epoch": 0.4918616480162767, "grad_norm": 0.0, "learning_rate": 1.0748865492356981e-05, "loss": 0.8835, "step": 12571 }, { "epoch": 0.4919007747085061, "grad_norm": 0.0, "learning_rate": 1.0747601813542694e-05, "loss": 1.0448, "step": 12572 }, { "epoch": 0.49193990140073557, "grad_norm": 0.0, "learning_rate": 1.0746338122722854e-05, "loss": 0.9775, "step": 12573 }, { "epoch": 0.491979028092965, "grad_norm": 0.0, "learning_rate": 1.0745074419917765e-05, "loss": 1.0323, "step": 12574 }, { "epoch": 0.49201815478519445, "grad_norm": 0.0, "learning_rate": 1.074381070514771e-05, "loss": 1.0983, "step": 12575 }, { "epoch": 0.4920572814774239, "grad_norm": 0.0, "learning_rate": 1.074254697843299e-05, "loss": 0.9277, "step": 12576 }, { "epoch": 0.49209640816965333, "grad_norm": 0.0, "learning_rate": 1.0741283239793894e-05, "loss": 1.0296, "step": 12577 }, { "epoch": 0.4921355348618828, "grad_norm": 0.0, "learning_rate": 1.0740019489250719e-05, "loss": 1.0679, "step": 12578 }, { "epoch": 0.4921746615541122, "grad_norm": 0.0, "learning_rate": 1.0738755726823759e-05, "loss": 1.1629, "step": 12579 }, { "epoch": 0.49221378824634165, "grad_norm": 0.0, "learning_rate": 1.0737491952533305e-05, "loss": 1.0494, "step": 12580 }, { "epoch": 0.4922529149385711, "grad_norm": 0.0, "learning_rate": 1.0736228166399659e-05, "loss": 1.064, "step": 12581 }, { "epoch": 0.49229204163080054, "grad_norm": 0.0, "learning_rate": 1.0734964368443106e-05, "loss": 0.9395, "step": 12582 }, { "epoch": 0.49233116832303, "grad_norm": 0.0, "learning_rate": 1.073370055868395e-05, "loss": 0.9685, "step": 12583 }, { "epoch": 0.4923702950152594, "grad_norm": 0.0, "learning_rate": 1.0732436737142482e-05, "loss": 1.0193, "step": 12584 }, { "epoch": 0.49240942170748886, "grad_norm": 0.0, "learning_rate": 1.0731172903838995e-05, "loss": 1.1364, "step": 12585 }, { "epoch": 0.4924485483997183, "grad_norm": 0.0, "learning_rate": 1.072990905879379e-05, "loss": 1.0422, "step": 12586 }, { "epoch": 0.49248767509194774, "grad_norm": 0.0, "learning_rate": 1.0728645202027162e-05, "loss": 0.913, "step": 12587 }, { "epoch": 0.4925268017841772, "grad_norm": 0.0, "learning_rate": 1.07273813335594e-05, "loss": 1.1365, "step": 12588 }, { "epoch": 0.4925659284764066, "grad_norm": 0.0, "learning_rate": 1.072611745341081e-05, "loss": 1.0458, "step": 12589 }, { "epoch": 0.49260505516863606, "grad_norm": 0.0, "learning_rate": 1.0724853561601683e-05, "loss": 1.0574, "step": 12590 }, { "epoch": 0.4926441818608655, "grad_norm": 0.0, "learning_rate": 1.0723589658152311e-05, "loss": 1.0727, "step": 12591 }, { "epoch": 0.49268330855309495, "grad_norm": 0.0, "learning_rate": 1.0722325743083001e-05, "loss": 1.0438, "step": 12592 }, { "epoch": 0.4927224352453244, "grad_norm": 0.0, "learning_rate": 1.0721061816414043e-05, "loss": 1.15, "step": 12593 }, { "epoch": 0.4927615619375538, "grad_norm": 0.0, "learning_rate": 1.0719797878165737e-05, "loss": 1.1071, "step": 12594 }, { "epoch": 0.4928006886297832, "grad_norm": 0.0, "learning_rate": 1.0718533928358374e-05, "loss": 1.0326, "step": 12595 }, { "epoch": 0.49283981532201265, "grad_norm": 0.0, "learning_rate": 1.0717269967012262e-05, "loss": 1.0745, "step": 12596 }, { "epoch": 0.4928789420142421, "grad_norm": 0.0, "learning_rate": 1.0716005994147694e-05, "loss": 0.9106, "step": 12597 }, { "epoch": 0.49291806870647153, "grad_norm": 0.0, "learning_rate": 1.0714742009784963e-05, "loss": 0.8962, "step": 12598 }, { "epoch": 0.492957195398701, "grad_norm": 0.0, "learning_rate": 1.0713478013944371e-05, "loss": 1.1254, "step": 12599 }, { "epoch": 0.4929963220909304, "grad_norm": 0.0, "learning_rate": 1.0712214006646217e-05, "loss": 1.0101, "step": 12600 }, { "epoch": 0.49303544878315986, "grad_norm": 0.0, "learning_rate": 1.07109499879108e-05, "loss": 1.0568, "step": 12601 }, { "epoch": 0.4930745754753893, "grad_norm": 0.0, "learning_rate": 1.0709685957758416e-05, "loss": 1.0387, "step": 12602 }, { "epoch": 0.49311370216761874, "grad_norm": 0.0, "learning_rate": 1.0708421916209364e-05, "loss": 1.0754, "step": 12603 }, { "epoch": 0.4931528288598482, "grad_norm": 0.0, "learning_rate": 1.0707157863283944e-05, "loss": 1.048, "step": 12604 }, { "epoch": 0.4931919555520776, "grad_norm": 0.0, "learning_rate": 1.0705893799002455e-05, "loss": 1.1165, "step": 12605 }, { "epoch": 0.49323108224430706, "grad_norm": 0.0, "learning_rate": 1.0704629723385195e-05, "loss": 1.0836, "step": 12606 }, { "epoch": 0.4932702089365365, "grad_norm": 0.0, "learning_rate": 1.0703365636452468e-05, "loss": 1.0695, "step": 12607 }, { "epoch": 0.49330933562876594, "grad_norm": 0.0, "learning_rate": 1.070210153822457e-05, "loss": 1.0447, "step": 12608 }, { "epoch": 0.4933484623209954, "grad_norm": 0.0, "learning_rate": 1.0700837428721798e-05, "loss": 0.9906, "step": 12609 }, { "epoch": 0.4933875890132248, "grad_norm": 0.0, "learning_rate": 1.0699573307964457e-05, "loss": 0.9844, "step": 12610 }, { "epoch": 0.49342671570545427, "grad_norm": 0.0, "learning_rate": 1.0698309175972843e-05, "loss": 1.0652, "step": 12611 }, { "epoch": 0.4934658423976837, "grad_norm": 0.0, "learning_rate": 1.0697045032767262e-05, "loss": 0.9522, "step": 12612 }, { "epoch": 0.49350496908991315, "grad_norm": 0.0, "learning_rate": 1.0695780878368007e-05, "loss": 1.0629, "step": 12613 }, { "epoch": 0.4935440957821426, "grad_norm": 0.0, "learning_rate": 1.0694516712795387e-05, "loss": 1.0315, "step": 12614 }, { "epoch": 0.49358322247437203, "grad_norm": 0.0, "learning_rate": 1.0693252536069695e-05, "loss": 1.0839, "step": 12615 }, { "epoch": 0.49362234916660147, "grad_norm": 0.0, "learning_rate": 1.0691988348211239e-05, "loss": 1.1247, "step": 12616 }, { "epoch": 0.4936614758588309, "grad_norm": 0.0, "learning_rate": 1.0690724149240319e-05, "loss": 1.0431, "step": 12617 }, { "epoch": 0.49370060255106035, "grad_norm": 0.0, "learning_rate": 1.0689459939177231e-05, "loss": 1.1125, "step": 12618 }, { "epoch": 0.4937397292432898, "grad_norm": 0.0, "learning_rate": 1.0688195718042283e-05, "loss": 1.0888, "step": 12619 }, { "epoch": 0.49377885593551923, "grad_norm": 0.0, "learning_rate": 1.0686931485855772e-05, "loss": 0.8771, "step": 12620 }, { "epoch": 0.4938179826277487, "grad_norm": 0.0, "learning_rate": 1.0685667242638003e-05, "loss": 1.0781, "step": 12621 }, { "epoch": 0.4938571093199781, "grad_norm": 0.0, "learning_rate": 1.0684402988409278e-05, "loss": 1.1075, "step": 12622 }, { "epoch": 0.4938962360122075, "grad_norm": 0.0, "learning_rate": 1.0683138723189897e-05, "loss": 1.1173, "step": 12623 }, { "epoch": 0.49393536270443694, "grad_norm": 0.0, "learning_rate": 1.0681874447000165e-05, "loss": 1.0892, "step": 12624 }, { "epoch": 0.4939744893966664, "grad_norm": 0.0, "learning_rate": 1.0680610159860382e-05, "loss": 1.0517, "step": 12625 }, { "epoch": 0.4940136160888958, "grad_norm": 0.0, "learning_rate": 1.0679345861790858e-05, "loss": 1.0096, "step": 12626 }, { "epoch": 0.49405274278112526, "grad_norm": 0.0, "learning_rate": 1.0678081552811886e-05, "loss": 1.0118, "step": 12627 }, { "epoch": 0.4940918694733547, "grad_norm": 0.0, "learning_rate": 1.0676817232943775e-05, "loss": 1.0682, "step": 12628 }, { "epoch": 0.49413099616558415, "grad_norm": 0.0, "learning_rate": 1.0675552902206827e-05, "loss": 0.9764, "step": 12629 }, { "epoch": 0.4941701228578136, "grad_norm": 0.0, "learning_rate": 1.0674288560621346e-05, "loss": 0.9883, "step": 12630 }, { "epoch": 0.49420924955004303, "grad_norm": 0.0, "learning_rate": 1.0673024208207636e-05, "loss": 0.9787, "step": 12631 }, { "epoch": 0.49424837624227247, "grad_norm": 0.0, "learning_rate": 1.0671759844986002e-05, "loss": 0.9981, "step": 12632 }, { "epoch": 0.4942875029345019, "grad_norm": 0.0, "learning_rate": 1.0670495470976745e-05, "loss": 0.9565, "step": 12633 }, { "epoch": 0.49432662962673135, "grad_norm": 0.0, "learning_rate": 1.066923108620017e-05, "loss": 1.1302, "step": 12634 }, { "epoch": 0.4943657563189608, "grad_norm": 0.0, "learning_rate": 1.0667966690676583e-05, "loss": 1.1275, "step": 12635 }, { "epoch": 0.49440488301119023, "grad_norm": 0.0, "learning_rate": 1.0666702284426289e-05, "loss": 1.0193, "step": 12636 }, { "epoch": 0.4944440097034197, "grad_norm": 0.0, "learning_rate": 1.0665437867469593e-05, "loss": 1.038, "step": 12637 }, { "epoch": 0.4944831363956491, "grad_norm": 0.0, "learning_rate": 1.0664173439826794e-05, "loss": 1.1118, "step": 12638 }, { "epoch": 0.49452226308787856, "grad_norm": 0.0, "learning_rate": 1.0662909001518207e-05, "loss": 1.1549, "step": 12639 }, { "epoch": 0.494561389780108, "grad_norm": 0.0, "learning_rate": 1.0661644552564127e-05, "loss": 1.118, "step": 12640 }, { "epoch": 0.49460051647233744, "grad_norm": 0.0, "learning_rate": 1.066038009298487e-05, "loss": 1.0034, "step": 12641 }, { "epoch": 0.4946396431645669, "grad_norm": 0.0, "learning_rate": 1.0659115622800733e-05, "loss": 1.0797, "step": 12642 }, { "epoch": 0.4946787698567963, "grad_norm": 0.0, "learning_rate": 1.0657851142032026e-05, "loss": 1.0582, "step": 12643 }, { "epoch": 0.49471789654902576, "grad_norm": 0.0, "learning_rate": 1.0656586650699052e-05, "loss": 0.9374, "step": 12644 }, { "epoch": 0.4947570232412552, "grad_norm": 0.0, "learning_rate": 1.0655322148822123e-05, "loss": 1.1814, "step": 12645 }, { "epoch": 0.49479614993348464, "grad_norm": 0.0, "learning_rate": 1.0654057636421538e-05, "loss": 1.0325, "step": 12646 }, { "epoch": 0.4948352766257141, "grad_norm": 0.0, "learning_rate": 1.0652793113517607e-05, "loss": 1.1251, "step": 12647 }, { "epoch": 0.4948744033179435, "grad_norm": 0.0, "learning_rate": 1.0651528580130639e-05, "loss": 1.0183, "step": 12648 }, { "epoch": 0.49491353001017296, "grad_norm": 0.0, "learning_rate": 1.0650264036280935e-05, "loss": 1.0054, "step": 12649 }, { "epoch": 0.4949526567024024, "grad_norm": 0.0, "learning_rate": 1.064899948198881e-05, "loss": 0.9792, "step": 12650 }, { "epoch": 0.4949917833946318, "grad_norm": 0.0, "learning_rate": 1.0647734917274562e-05, "loss": 0.9113, "step": 12651 }, { "epoch": 0.49503091008686123, "grad_norm": 0.0, "learning_rate": 1.0646470342158505e-05, "loss": 1.0181, "step": 12652 }, { "epoch": 0.49507003677909067, "grad_norm": 0.0, "learning_rate": 1.0645205756660943e-05, "loss": 1.0692, "step": 12653 }, { "epoch": 0.4951091634713201, "grad_norm": 0.0, "learning_rate": 1.0643941160802189e-05, "loss": 0.9409, "step": 12654 }, { "epoch": 0.49514829016354955, "grad_norm": 0.0, "learning_rate": 1.0642676554602545e-05, "loss": 1.0844, "step": 12655 }, { "epoch": 0.495187416855779, "grad_norm": 0.0, "learning_rate": 1.0641411938082317e-05, "loss": 1.162, "step": 12656 }, { "epoch": 0.49522654354800844, "grad_norm": 0.0, "learning_rate": 1.0640147311261821e-05, "loss": 1.1071, "step": 12657 }, { "epoch": 0.4952656702402379, "grad_norm": 0.0, "learning_rate": 1.063888267416136e-05, "loss": 0.9933, "step": 12658 }, { "epoch": 0.4953047969324673, "grad_norm": 0.0, "learning_rate": 1.0637618026801246e-05, "loss": 1.0591, "step": 12659 }, { "epoch": 0.49534392362469676, "grad_norm": 0.0, "learning_rate": 1.0636353369201782e-05, "loss": 1.0072, "step": 12660 }, { "epoch": 0.4953830503169262, "grad_norm": 0.0, "learning_rate": 1.0635088701383282e-05, "loss": 1.0267, "step": 12661 }, { "epoch": 0.49542217700915564, "grad_norm": 0.0, "learning_rate": 1.0633824023366053e-05, "loss": 1.1214, "step": 12662 }, { "epoch": 0.4954613037013851, "grad_norm": 0.0, "learning_rate": 1.0632559335170405e-05, "loss": 1.0827, "step": 12663 }, { "epoch": 0.4955004303936145, "grad_norm": 0.0, "learning_rate": 1.0631294636816645e-05, "loss": 1.1321, "step": 12664 }, { "epoch": 0.49553955708584396, "grad_norm": 0.0, "learning_rate": 1.0630029928325087e-05, "loss": 1.0452, "step": 12665 }, { "epoch": 0.4955786837780734, "grad_norm": 0.0, "learning_rate": 1.0628765209716037e-05, "loss": 0.8669, "step": 12666 }, { "epoch": 0.49561781047030284, "grad_norm": 0.0, "learning_rate": 1.0627500481009805e-05, "loss": 1.0765, "step": 12667 }, { "epoch": 0.4956569371625323, "grad_norm": 0.0, "learning_rate": 1.0626235742226702e-05, "loss": 1.0653, "step": 12668 }, { "epoch": 0.4956960638547617, "grad_norm": 0.0, "learning_rate": 1.0624970993387036e-05, "loss": 0.9873, "step": 12669 }, { "epoch": 0.49573519054699117, "grad_norm": 0.0, "learning_rate": 1.062370623451112e-05, "loss": 1.1164, "step": 12670 }, { "epoch": 0.4957743172392206, "grad_norm": 0.0, "learning_rate": 1.0622441465619265e-05, "loss": 0.9958, "step": 12671 }, { "epoch": 0.49581344393145005, "grad_norm": 0.0, "learning_rate": 1.0621176686731778e-05, "loss": 1.0907, "step": 12672 }, { "epoch": 0.4958525706236795, "grad_norm": 0.0, "learning_rate": 1.0619911897868973e-05, "loss": 1.093, "step": 12673 }, { "epoch": 0.49589169731590893, "grad_norm": 0.0, "learning_rate": 1.0618647099051158e-05, "loss": 0.9189, "step": 12674 }, { "epoch": 0.49593082400813837, "grad_norm": 0.0, "learning_rate": 1.0617382290298649e-05, "loss": 1.158, "step": 12675 }, { "epoch": 0.4959699507003678, "grad_norm": 0.0, "learning_rate": 1.0616117471631753e-05, "loss": 1.0649, "step": 12676 }, { "epoch": 0.49600907739259725, "grad_norm": 0.0, "learning_rate": 1.0614852643070779e-05, "loss": 1.0115, "step": 12677 }, { "epoch": 0.4960482040848267, "grad_norm": 0.0, "learning_rate": 1.0613587804636045e-05, "loss": 1.0885, "step": 12678 }, { "epoch": 0.49608733077705613, "grad_norm": 0.0, "learning_rate": 1.061232295634786e-05, "loss": 1.1321, "step": 12679 }, { "epoch": 0.4961264574692855, "grad_norm": 0.0, "learning_rate": 1.0611058098226534e-05, "loss": 1.1644, "step": 12680 }, { "epoch": 0.49616558416151496, "grad_norm": 0.0, "learning_rate": 1.0609793230292382e-05, "loss": 1.1075, "step": 12681 }, { "epoch": 0.4962047108537444, "grad_norm": 0.0, "learning_rate": 1.0608528352565714e-05, "loss": 1.0686, "step": 12682 }, { "epoch": 0.49624383754597384, "grad_norm": 0.0, "learning_rate": 1.0607263465066844e-05, "loss": 0.937, "step": 12683 }, { "epoch": 0.4962829642382033, "grad_norm": 0.0, "learning_rate": 1.0605998567816084e-05, "loss": 1.0308, "step": 12684 }, { "epoch": 0.4963220909304327, "grad_norm": 0.0, "learning_rate": 1.0604733660833744e-05, "loss": 1.0497, "step": 12685 }, { "epoch": 0.49636121762266217, "grad_norm": 0.0, "learning_rate": 1.0603468744140142e-05, "loss": 1.0295, "step": 12686 }, { "epoch": 0.4964003443148916, "grad_norm": 0.0, "learning_rate": 1.0602203817755585e-05, "loss": 0.9605, "step": 12687 }, { "epoch": 0.49643947100712105, "grad_norm": 0.0, "learning_rate": 1.0600938881700394e-05, "loss": 1.1746, "step": 12688 }, { "epoch": 0.4964785976993505, "grad_norm": 0.0, "learning_rate": 1.0599673935994872e-05, "loss": 1.0633, "step": 12689 }, { "epoch": 0.49651772439157993, "grad_norm": 0.0, "learning_rate": 1.0598408980659342e-05, "loss": 0.979, "step": 12690 }, { "epoch": 0.49655685108380937, "grad_norm": 0.0, "learning_rate": 1.0597144015714112e-05, "loss": 1.0737, "step": 12691 }, { "epoch": 0.4965959777760388, "grad_norm": 0.0, "learning_rate": 1.0595879041179498e-05, "loss": 1.0186, "step": 12692 }, { "epoch": 0.49663510446826825, "grad_norm": 0.0, "learning_rate": 1.0594614057075811e-05, "loss": 0.9656, "step": 12693 }, { "epoch": 0.4966742311604977, "grad_norm": 0.0, "learning_rate": 1.059334906342337e-05, "loss": 1.1265, "step": 12694 }, { "epoch": 0.49671335785272713, "grad_norm": 0.0, "learning_rate": 1.0592084060242489e-05, "loss": 1.0045, "step": 12695 }, { "epoch": 0.4967524845449566, "grad_norm": 0.0, "learning_rate": 1.0590819047553476e-05, "loss": 1.1192, "step": 12696 }, { "epoch": 0.496791611237186, "grad_norm": 0.0, "learning_rate": 1.058955402537665e-05, "loss": 0.9593, "step": 12697 }, { "epoch": 0.49683073792941546, "grad_norm": 0.0, "learning_rate": 1.0588288993732324e-05, "loss": 1.1309, "step": 12698 }, { "epoch": 0.4968698646216449, "grad_norm": 0.0, "learning_rate": 1.0587023952640815e-05, "loss": 1.1733, "step": 12699 }, { "epoch": 0.49690899131387434, "grad_norm": 0.0, "learning_rate": 1.0585758902122437e-05, "loss": 0.8993, "step": 12700 }, { "epoch": 0.4969481180061038, "grad_norm": 0.0, "learning_rate": 1.0584493842197505e-05, "loss": 1.0587, "step": 12701 }, { "epoch": 0.4969872446983332, "grad_norm": 0.0, "learning_rate": 1.0583228772886333e-05, "loss": 1.1295, "step": 12702 }, { "epoch": 0.49702637139056266, "grad_norm": 0.0, "learning_rate": 1.058196369420924e-05, "loss": 1.1218, "step": 12703 }, { "epoch": 0.4970654980827921, "grad_norm": 0.0, "learning_rate": 1.0580698606186542e-05, "loss": 1.0948, "step": 12704 }, { "epoch": 0.49710462477502154, "grad_norm": 0.0, "learning_rate": 1.0579433508838546e-05, "loss": 0.9939, "step": 12705 }, { "epoch": 0.497143751467251, "grad_norm": 0.0, "learning_rate": 1.0578168402185577e-05, "loss": 1.189, "step": 12706 }, { "epoch": 0.4971828781594804, "grad_norm": 0.0, "learning_rate": 1.0576903286247947e-05, "loss": 0.9865, "step": 12707 }, { "epoch": 0.4972220048517098, "grad_norm": 0.0, "learning_rate": 1.0575638161045976e-05, "loss": 1.018, "step": 12708 }, { "epoch": 0.49726113154393925, "grad_norm": 0.0, "learning_rate": 1.0574373026599973e-05, "loss": 0.9939, "step": 12709 }, { "epoch": 0.4973002582361687, "grad_norm": 0.0, "learning_rate": 1.0573107882930262e-05, "loss": 1.0832, "step": 12710 }, { "epoch": 0.49733938492839813, "grad_norm": 0.0, "learning_rate": 1.0571842730057154e-05, "loss": 0.9699, "step": 12711 }, { "epoch": 0.4973785116206276, "grad_norm": 0.0, "learning_rate": 1.057057756800097e-05, "loss": 1.0678, "step": 12712 }, { "epoch": 0.497417638312857, "grad_norm": 0.0, "learning_rate": 1.0569312396782029e-05, "loss": 0.8745, "step": 12713 }, { "epoch": 0.49745676500508645, "grad_norm": 0.0, "learning_rate": 1.0568047216420636e-05, "loss": 1.1296, "step": 12714 }, { "epoch": 0.4974958916973159, "grad_norm": 0.0, "learning_rate": 1.0566782026937124e-05, "loss": 0.9894, "step": 12715 }, { "epoch": 0.49753501838954534, "grad_norm": 0.0, "learning_rate": 1.0565516828351796e-05, "loss": 1.0974, "step": 12716 }, { "epoch": 0.4975741450817748, "grad_norm": 0.0, "learning_rate": 1.0564251620684982e-05, "loss": 1.0371, "step": 12717 }, { "epoch": 0.4976132717740042, "grad_norm": 0.0, "learning_rate": 1.0562986403956994e-05, "loss": 1.1038, "step": 12718 }, { "epoch": 0.49765239846623366, "grad_norm": 0.0, "learning_rate": 1.0561721178188148e-05, "loss": 1.0667, "step": 12719 }, { "epoch": 0.4976915251584631, "grad_norm": 0.0, "learning_rate": 1.0560455943398763e-05, "loss": 0.9623, "step": 12720 }, { "epoch": 0.49773065185069254, "grad_norm": 0.0, "learning_rate": 1.0559190699609158e-05, "loss": 0.9362, "step": 12721 }, { "epoch": 0.497769778542922, "grad_norm": 0.0, "learning_rate": 1.0557925446839652e-05, "loss": 1.0612, "step": 12722 }, { "epoch": 0.4978089052351514, "grad_norm": 0.0, "learning_rate": 1.0556660185110564e-05, "loss": 1.0777, "step": 12723 }, { "epoch": 0.49784803192738086, "grad_norm": 0.0, "learning_rate": 1.0555394914442207e-05, "loss": 1.0512, "step": 12724 }, { "epoch": 0.4978871586196103, "grad_norm": 0.0, "learning_rate": 1.0554129634854906e-05, "loss": 1.1687, "step": 12725 }, { "epoch": 0.49792628531183974, "grad_norm": 0.0, "learning_rate": 1.0552864346368979e-05, "loss": 1.0208, "step": 12726 }, { "epoch": 0.4979654120040692, "grad_norm": 0.0, "learning_rate": 1.0551599049004738e-05, "loss": 1.0653, "step": 12727 }, { "epoch": 0.4980045386962986, "grad_norm": 0.0, "learning_rate": 1.0550333742782512e-05, "loss": 1.1434, "step": 12728 }, { "epoch": 0.49804366538852807, "grad_norm": 0.0, "learning_rate": 1.0549068427722613e-05, "loss": 1.1456, "step": 12729 }, { "epoch": 0.4980827920807575, "grad_norm": 0.0, "learning_rate": 1.0547803103845366e-05, "loss": 1.1241, "step": 12730 }, { "epoch": 0.49812191877298695, "grad_norm": 0.0, "learning_rate": 1.0546537771171087e-05, "loss": 1.0913, "step": 12731 }, { "epoch": 0.4981610454652164, "grad_norm": 0.0, "learning_rate": 1.0545272429720094e-05, "loss": 1.0573, "step": 12732 }, { "epoch": 0.49820017215744583, "grad_norm": 0.0, "learning_rate": 1.0544007079512713e-05, "loss": 1.0496, "step": 12733 }, { "epoch": 0.49823929884967527, "grad_norm": 0.0, "learning_rate": 1.0542741720569257e-05, "loss": 1.0272, "step": 12734 }, { "epoch": 0.4982784255419047, "grad_norm": 0.0, "learning_rate": 1.054147635291005e-05, "loss": 1.0571, "step": 12735 }, { "epoch": 0.49831755223413415, "grad_norm": 0.0, "learning_rate": 1.054021097655541e-05, "loss": 1.1659, "step": 12736 }, { "epoch": 0.49835667892636354, "grad_norm": 0.0, "learning_rate": 1.053894559152566e-05, "loss": 1.0027, "step": 12737 }, { "epoch": 0.498395805618593, "grad_norm": 0.0, "learning_rate": 1.0537680197841116e-05, "loss": 1.0963, "step": 12738 }, { "epoch": 0.4984349323108224, "grad_norm": 0.0, "learning_rate": 1.0536414795522105e-05, "loss": 1.0113, "step": 12739 }, { "epoch": 0.49847405900305186, "grad_norm": 0.0, "learning_rate": 1.0535149384588943e-05, "loss": 0.9771, "step": 12740 }, { "epoch": 0.4985131856952813, "grad_norm": 0.0, "learning_rate": 1.0533883965061955e-05, "loss": 0.9287, "step": 12741 }, { "epoch": 0.49855231238751074, "grad_norm": 0.0, "learning_rate": 1.0532618536961459e-05, "loss": 1.0697, "step": 12742 }, { "epoch": 0.4985914390797402, "grad_norm": 0.0, "learning_rate": 1.0531353100307775e-05, "loss": 0.9219, "step": 12743 }, { "epoch": 0.4986305657719696, "grad_norm": 0.0, "learning_rate": 1.0530087655121227e-05, "loss": 1.0203, "step": 12744 }, { "epoch": 0.49866969246419907, "grad_norm": 0.0, "learning_rate": 1.0528822201422133e-05, "loss": 1.2217, "step": 12745 }, { "epoch": 0.4987088191564285, "grad_norm": 0.0, "learning_rate": 1.0527556739230822e-05, "loss": 1.0342, "step": 12746 }, { "epoch": 0.49874794584865795, "grad_norm": 0.0, "learning_rate": 1.0526291268567605e-05, "loss": 0.9553, "step": 12747 }, { "epoch": 0.4987870725408874, "grad_norm": 0.0, "learning_rate": 1.0525025789452815e-05, "loss": 1.0902, "step": 12748 }, { "epoch": 0.49882619923311683, "grad_norm": 0.0, "learning_rate": 1.0523760301906764e-05, "loss": 1.0205, "step": 12749 }, { "epoch": 0.49886532592534627, "grad_norm": 0.0, "learning_rate": 1.0522494805949784e-05, "loss": 1.0744, "step": 12750 }, { "epoch": 0.4989044526175757, "grad_norm": 0.0, "learning_rate": 1.0521229301602188e-05, "loss": 1.079, "step": 12751 }, { "epoch": 0.49894357930980515, "grad_norm": 0.0, "learning_rate": 1.0519963788884305e-05, "loss": 1.0073, "step": 12752 }, { "epoch": 0.4989827060020346, "grad_norm": 0.0, "learning_rate": 1.0518698267816454e-05, "loss": 1.1044, "step": 12753 }, { "epoch": 0.49902183269426403, "grad_norm": 0.0, "learning_rate": 1.0517432738418957e-05, "loss": 0.9726, "step": 12754 }, { "epoch": 0.4990609593864935, "grad_norm": 0.0, "learning_rate": 1.0516167200712144e-05, "loss": 1.0276, "step": 12755 }, { "epoch": 0.4991000860787229, "grad_norm": 0.0, "learning_rate": 1.0514901654716327e-05, "loss": 1.1074, "step": 12756 }, { "epoch": 0.49913921277095236, "grad_norm": 0.0, "learning_rate": 1.0513636100451838e-05, "loss": 0.9887, "step": 12757 }, { "epoch": 0.4991783394631818, "grad_norm": 0.0, "learning_rate": 1.0512370537938994e-05, "loss": 0.9919, "step": 12758 }, { "epoch": 0.49921746615541124, "grad_norm": 0.0, "learning_rate": 1.0511104967198124e-05, "loss": 1.0724, "step": 12759 }, { "epoch": 0.4992565928476407, "grad_norm": 0.0, "learning_rate": 1.0509839388249548e-05, "loss": 0.9935, "step": 12760 }, { "epoch": 0.4992957195398701, "grad_norm": 0.0, "learning_rate": 1.0508573801113588e-05, "loss": 1.0529, "step": 12761 }, { "epoch": 0.49933484623209956, "grad_norm": 0.0, "learning_rate": 1.0507308205810573e-05, "loss": 0.9489, "step": 12762 }, { "epoch": 0.499373972924329, "grad_norm": 0.0, "learning_rate": 1.0506042602360823e-05, "loss": 1.1468, "step": 12763 }, { "epoch": 0.49941309961655844, "grad_norm": 0.0, "learning_rate": 1.0504776990784661e-05, "loss": 1.1041, "step": 12764 }, { "epoch": 0.49945222630878783, "grad_norm": 0.0, "learning_rate": 1.0503511371102417e-05, "loss": 0.9355, "step": 12765 }, { "epoch": 0.49949135300101727, "grad_norm": 0.0, "learning_rate": 1.0502245743334409e-05, "loss": 1.1039, "step": 12766 }, { "epoch": 0.4995304796932467, "grad_norm": 0.0, "learning_rate": 1.0500980107500965e-05, "loss": 0.9694, "step": 12767 }, { "epoch": 0.49956960638547615, "grad_norm": 0.0, "learning_rate": 1.0499714463622405e-05, "loss": 1.1009, "step": 12768 }, { "epoch": 0.4996087330777056, "grad_norm": 0.0, "learning_rate": 1.049844881171906e-05, "loss": 1.1695, "step": 12769 }, { "epoch": 0.49964785976993503, "grad_norm": 0.0, "learning_rate": 1.049718315181125e-05, "loss": 0.9108, "step": 12770 }, { "epoch": 0.4996869864621645, "grad_norm": 0.0, "learning_rate": 1.0495917483919302e-05, "loss": 1.0485, "step": 12771 }, { "epoch": 0.4997261131543939, "grad_norm": 0.0, "learning_rate": 1.049465180806354e-05, "loss": 1.0883, "step": 12772 }, { "epoch": 0.49976523984662335, "grad_norm": 0.0, "learning_rate": 1.0493386124264292e-05, "loss": 1.0978, "step": 12773 }, { "epoch": 0.4998043665388528, "grad_norm": 0.0, "learning_rate": 1.049212043254188e-05, "loss": 1.064, "step": 12774 }, { "epoch": 0.49984349323108224, "grad_norm": 0.0, "learning_rate": 1.0490854732916629e-05, "loss": 1.1299, "step": 12775 }, { "epoch": 0.4998826199233117, "grad_norm": 0.0, "learning_rate": 1.0489589025408866e-05, "loss": 1.0749, "step": 12776 }, { "epoch": 0.4999217466155411, "grad_norm": 0.0, "learning_rate": 1.048832331003892e-05, "loss": 1.0062, "step": 12777 }, { "epoch": 0.49996087330777056, "grad_norm": 0.0, "learning_rate": 1.048705758682711e-05, "loss": 1.0033, "step": 12778 }, { "epoch": 0.5, "grad_norm": 0.0, "learning_rate": 1.0485791855793768e-05, "loss": 1.1282, "step": 12779 }, { "epoch": 0.5000391266922294, "grad_norm": 0.0, "learning_rate": 1.0484526116959214e-05, "loss": 1.106, "step": 12780 }, { "epoch": 0.5000782533844589, "grad_norm": 0.0, "learning_rate": 1.0483260370343781e-05, "loss": 0.9452, "step": 12781 }, { "epoch": 0.5001173800766883, "grad_norm": 0.0, "learning_rate": 1.0481994615967791e-05, "loss": 0.9466, "step": 12782 }, { "epoch": 0.5001565067689178, "grad_norm": 0.0, "learning_rate": 1.0480728853851572e-05, "loss": 1.1118, "step": 12783 }, { "epoch": 0.5001956334611471, "grad_norm": 0.0, "learning_rate": 1.047946308401545e-05, "loss": 1.0887, "step": 12784 }, { "epoch": 0.5002347601533766, "grad_norm": 0.0, "learning_rate": 1.0478197306479752e-05, "loss": 1.0998, "step": 12785 }, { "epoch": 0.500273886845606, "grad_norm": 0.0, "learning_rate": 1.0476931521264804e-05, "loss": 0.9883, "step": 12786 }, { "epoch": 0.5003130135378355, "grad_norm": 0.0, "learning_rate": 1.047566572839093e-05, "loss": 0.8498, "step": 12787 }, { "epoch": 0.5003521402300649, "grad_norm": 0.0, "learning_rate": 1.0474399927878464e-05, "loss": 0.9202, "step": 12788 }, { "epoch": 0.5003912669222944, "grad_norm": 0.0, "learning_rate": 1.047313411974773e-05, "loss": 1.0205, "step": 12789 }, { "epoch": 0.5004303936145238, "grad_norm": 0.0, "learning_rate": 1.0471868304019056e-05, "loss": 1.0438, "step": 12790 }, { "epoch": 0.5004695203067533, "grad_norm": 0.0, "learning_rate": 1.0470602480712766e-05, "loss": 1.0739, "step": 12791 }, { "epoch": 0.5005086469989827, "grad_norm": 0.0, "learning_rate": 1.046933664984919e-05, "loss": 1.0435, "step": 12792 }, { "epoch": 0.5005477736912122, "grad_norm": 0.0, "learning_rate": 1.0468070811448656e-05, "loss": 0.9573, "step": 12793 }, { "epoch": 0.5005869003834416, "grad_norm": 0.0, "learning_rate": 1.046680496553149e-05, "loss": 1.0282, "step": 12794 }, { "epoch": 0.500626027075671, "grad_norm": 0.0, "learning_rate": 1.0465539112118024e-05, "loss": 1.0096, "step": 12795 }, { "epoch": 0.5006651537679004, "grad_norm": 0.0, "learning_rate": 1.046427325122858e-05, "loss": 1.0089, "step": 12796 }, { "epoch": 0.5007042804601299, "grad_norm": 0.0, "learning_rate": 1.0463007382883492e-05, "loss": 0.9643, "step": 12797 }, { "epoch": 0.5007434071523593, "grad_norm": 0.0, "learning_rate": 1.0461741507103083e-05, "loss": 1.1267, "step": 12798 }, { "epoch": 0.5007825338445888, "grad_norm": 0.0, "learning_rate": 1.0460475623907687e-05, "loss": 1.0444, "step": 12799 }, { "epoch": 0.5008216605368182, "grad_norm": 0.0, "learning_rate": 1.0459209733317628e-05, "loss": 1.0107, "step": 12800 }, { "epoch": 0.5008607872290477, "grad_norm": 0.0, "learning_rate": 1.0457943835353235e-05, "loss": 0.9806, "step": 12801 }, { "epoch": 0.5008999139212771, "grad_norm": 0.0, "learning_rate": 1.045667793003484e-05, "loss": 0.9225, "step": 12802 }, { "epoch": 0.5009390406135066, "grad_norm": 0.0, "learning_rate": 1.0455412017382767e-05, "loss": 1.041, "step": 12803 }, { "epoch": 0.500978167305736, "grad_norm": 0.0, "learning_rate": 1.045414609741735e-05, "loss": 1.0968, "step": 12804 }, { "epoch": 0.5010172939979655, "grad_norm": 0.0, "learning_rate": 1.0452880170158914e-05, "loss": 0.9492, "step": 12805 }, { "epoch": 0.5010564206901948, "grad_norm": 0.0, "learning_rate": 1.0451614235627791e-05, "loss": 1.1517, "step": 12806 }, { "epoch": 0.5010955473824243, "grad_norm": 0.0, "learning_rate": 1.045034829384431e-05, "loss": 0.9983, "step": 12807 }, { "epoch": 0.5011346740746537, "grad_norm": 0.0, "learning_rate": 1.0449082344828799e-05, "loss": 1.0559, "step": 12808 }, { "epoch": 0.5011738007668831, "grad_norm": 0.0, "learning_rate": 1.0447816388601588e-05, "loss": 1.0733, "step": 12809 }, { "epoch": 0.5012129274591126, "grad_norm": 0.0, "learning_rate": 1.0446550425183008e-05, "loss": 1.063, "step": 12810 }, { "epoch": 0.501252054151342, "grad_norm": 0.0, "learning_rate": 1.0445284454593387e-05, "loss": 1.0095, "step": 12811 }, { "epoch": 0.5012911808435715, "grad_norm": 0.0, "learning_rate": 1.0444018476853055e-05, "loss": 1.0519, "step": 12812 }, { "epoch": 0.5013303075358009, "grad_norm": 0.0, "learning_rate": 1.0442752491982345e-05, "loss": 0.9682, "step": 12813 }, { "epoch": 0.5013694342280304, "grad_norm": 0.0, "learning_rate": 1.044148650000158e-05, "loss": 1.0521, "step": 12814 }, { "epoch": 0.5014085609202598, "grad_norm": 0.0, "learning_rate": 1.0440220500931099e-05, "loss": 1.1424, "step": 12815 }, { "epoch": 0.5014476876124893, "grad_norm": 0.0, "learning_rate": 1.0438954494791227e-05, "loss": 1.0152, "step": 12816 }, { "epoch": 0.5014868143047186, "grad_norm": 0.0, "learning_rate": 1.0437688481602297e-05, "loss": 0.9526, "step": 12817 }, { "epoch": 0.5015259409969481, "grad_norm": 0.0, "learning_rate": 1.0436422461384636e-05, "loss": 1.029, "step": 12818 }, { "epoch": 0.5015650676891775, "grad_norm": 0.0, "learning_rate": 1.0435156434158581e-05, "loss": 1.1407, "step": 12819 }, { "epoch": 0.501604194381407, "grad_norm": 0.0, "learning_rate": 1.0433890399944458e-05, "loss": 1.0505, "step": 12820 }, { "epoch": 0.5016433210736364, "grad_norm": 0.0, "learning_rate": 1.0432624358762595e-05, "loss": 1.0946, "step": 12821 }, { "epoch": 0.5016824477658659, "grad_norm": 0.0, "learning_rate": 1.0431358310633333e-05, "loss": 1.0985, "step": 12822 }, { "epoch": 0.5017215744580953, "grad_norm": 0.0, "learning_rate": 1.0430092255576991e-05, "loss": 1.0192, "step": 12823 }, { "epoch": 0.5017607011503248, "grad_norm": 0.0, "learning_rate": 1.042882619361391e-05, "loss": 1.0101, "step": 12824 }, { "epoch": 0.5017998278425542, "grad_norm": 0.0, "learning_rate": 1.0427560124764415e-05, "loss": 0.9917, "step": 12825 }, { "epoch": 0.5018389545347837, "grad_norm": 0.0, "learning_rate": 1.0426294049048845e-05, "loss": 1.0109, "step": 12826 }, { "epoch": 0.501878081227013, "grad_norm": 0.0, "learning_rate": 1.042502796648752e-05, "loss": 0.9843, "step": 12827 }, { "epoch": 0.5019172079192425, "grad_norm": 0.0, "learning_rate": 1.0423761877100784e-05, "loss": 1.0651, "step": 12828 }, { "epoch": 0.5019563346114719, "grad_norm": 0.0, "learning_rate": 1.0422495780908961e-05, "loss": 0.948, "step": 12829 }, { "epoch": 0.5019954613037014, "grad_norm": 0.0, "learning_rate": 1.0421229677932384e-05, "loss": 1.1344, "step": 12830 }, { "epoch": 0.5020345879959308, "grad_norm": 0.0, "learning_rate": 1.0419963568191389e-05, "loss": 1.0093, "step": 12831 }, { "epoch": 0.5020737146881603, "grad_norm": 0.0, "learning_rate": 1.0418697451706304e-05, "loss": 1.0506, "step": 12832 }, { "epoch": 0.5021128413803897, "grad_norm": 0.0, "learning_rate": 1.0417431328497462e-05, "loss": 1.0646, "step": 12833 }, { "epoch": 0.5021519680726192, "grad_norm": 0.0, "learning_rate": 1.0416165198585195e-05, "loss": 1.0494, "step": 12834 }, { "epoch": 0.5021910947648486, "grad_norm": 0.0, "learning_rate": 1.0414899061989838e-05, "loss": 1.1022, "step": 12835 }, { "epoch": 0.502230221457078, "grad_norm": 0.0, "learning_rate": 1.0413632918731722e-05, "loss": 0.9989, "step": 12836 }, { "epoch": 0.5022693481493075, "grad_norm": 0.0, "learning_rate": 1.0412366768831178e-05, "loss": 1.078, "step": 12837 }, { "epoch": 0.5023084748415368, "grad_norm": 0.0, "learning_rate": 1.0411100612308543e-05, "loss": 1.0287, "step": 12838 }, { "epoch": 0.5023476015337663, "grad_norm": 0.0, "learning_rate": 1.0409834449184146e-05, "loss": 1.045, "step": 12839 }, { "epoch": 0.5023867282259957, "grad_norm": 0.0, "learning_rate": 1.0408568279478324e-05, "loss": 1.0165, "step": 12840 }, { "epoch": 0.5024258549182252, "grad_norm": 0.0, "learning_rate": 1.0407302103211403e-05, "loss": 1.1536, "step": 12841 }, { "epoch": 0.5024649816104546, "grad_norm": 0.0, "learning_rate": 1.0406035920403723e-05, "loss": 1.0841, "step": 12842 }, { "epoch": 0.5025041083026841, "grad_norm": 0.0, "learning_rate": 1.0404769731075612e-05, "loss": 1.0277, "step": 12843 }, { "epoch": 0.5025432349949135, "grad_norm": 0.0, "learning_rate": 1.040350353524741e-05, "loss": 1.0117, "step": 12844 }, { "epoch": 0.502582361687143, "grad_norm": 0.0, "learning_rate": 1.0402237332939444e-05, "loss": 0.9842, "step": 12845 }, { "epoch": 0.5026214883793724, "grad_norm": 0.0, "learning_rate": 1.0400971124172055e-05, "loss": 1.0179, "step": 12846 }, { "epoch": 0.5026606150716019, "grad_norm": 0.0, "learning_rate": 1.0399704908965566e-05, "loss": 1.1028, "step": 12847 }, { "epoch": 0.5026997417638313, "grad_norm": 0.0, "learning_rate": 1.0398438687340322e-05, "loss": 0.9818, "step": 12848 }, { "epoch": 0.5027388684560608, "grad_norm": 0.0, "learning_rate": 1.0397172459316651e-05, "loss": 1.0977, "step": 12849 }, { "epoch": 0.5027779951482901, "grad_norm": 0.0, "learning_rate": 1.0395906224914887e-05, "loss": 0.9623, "step": 12850 }, { "epoch": 0.5028171218405196, "grad_norm": 0.0, "learning_rate": 1.0394639984155366e-05, "loss": 0.8991, "step": 12851 }, { "epoch": 0.502856248532749, "grad_norm": 0.0, "learning_rate": 1.039337373705842e-05, "loss": 1.1595, "step": 12852 }, { "epoch": 0.5028953752249785, "grad_norm": 0.0, "learning_rate": 1.0392107483644386e-05, "loss": 1.0294, "step": 12853 }, { "epoch": 0.5029345019172079, "grad_norm": 0.0, "learning_rate": 1.0390841223933596e-05, "loss": 1.2187, "step": 12854 }, { "epoch": 0.5029736286094374, "grad_norm": 0.0, "learning_rate": 1.0389574957946387e-05, "loss": 1.1143, "step": 12855 }, { "epoch": 0.5030127553016668, "grad_norm": 0.0, "learning_rate": 1.038830868570309e-05, "loss": 1.0785, "step": 12856 }, { "epoch": 0.5030518819938963, "grad_norm": 0.0, "learning_rate": 1.0387042407224046e-05, "loss": 1.1918, "step": 12857 }, { "epoch": 0.5030910086861257, "grad_norm": 0.0, "learning_rate": 1.0385776122529583e-05, "loss": 1.101, "step": 12858 }, { "epoch": 0.5031301353783552, "grad_norm": 0.0, "learning_rate": 1.038450983164004e-05, "loss": 1.0934, "step": 12859 }, { "epoch": 0.5031692620705845, "grad_norm": 0.0, "learning_rate": 1.0383243534575751e-05, "loss": 0.9827, "step": 12860 }, { "epoch": 0.503208388762814, "grad_norm": 0.0, "learning_rate": 1.0381977231357048e-05, "loss": 1.0908, "step": 12861 }, { "epoch": 0.5032475154550434, "grad_norm": 0.0, "learning_rate": 1.0380710922004273e-05, "loss": 1.0246, "step": 12862 }, { "epoch": 0.5032866421472729, "grad_norm": 0.0, "learning_rate": 1.0379444606537754e-05, "loss": 0.996, "step": 12863 }, { "epoch": 0.5033257688395023, "grad_norm": 0.0, "learning_rate": 1.0378178284977833e-05, "loss": 1.0333, "step": 12864 }, { "epoch": 0.5033648955317317, "grad_norm": 0.0, "learning_rate": 1.0376911957344842e-05, "loss": 0.9885, "step": 12865 }, { "epoch": 0.5034040222239612, "grad_norm": 0.0, "learning_rate": 1.0375645623659118e-05, "loss": 1.0274, "step": 12866 }, { "epoch": 0.5034431489161906, "grad_norm": 0.0, "learning_rate": 1.0374379283940995e-05, "loss": 1.0331, "step": 12867 }, { "epoch": 0.5034822756084201, "grad_norm": 0.0, "learning_rate": 1.037311293821081e-05, "loss": 1.1218, "step": 12868 }, { "epoch": 0.5035214023006495, "grad_norm": 0.0, "learning_rate": 1.0371846586488901e-05, "loss": 0.9838, "step": 12869 }, { "epoch": 0.503560528992879, "grad_norm": 0.0, "learning_rate": 1.0370580228795597e-05, "loss": 0.9954, "step": 12870 }, { "epoch": 0.5035996556851083, "grad_norm": 0.0, "learning_rate": 1.0369313865151243e-05, "loss": 1.0317, "step": 12871 }, { "epoch": 0.5036387823773378, "grad_norm": 0.0, "learning_rate": 1.0368047495576168e-05, "loss": 0.9772, "step": 12872 }, { "epoch": 0.5036779090695672, "grad_norm": 0.0, "learning_rate": 1.0366781120090714e-05, "loss": 0.9797, "step": 12873 }, { "epoch": 0.5037170357617967, "grad_norm": 0.0, "learning_rate": 1.0365514738715215e-05, "loss": 0.9466, "step": 12874 }, { "epoch": 0.5037561624540261, "grad_norm": 0.0, "learning_rate": 1.0364248351470005e-05, "loss": 1.1198, "step": 12875 }, { "epoch": 0.5037952891462556, "grad_norm": 0.0, "learning_rate": 1.0362981958375425e-05, "loss": 1.0249, "step": 12876 }, { "epoch": 0.503834415838485, "grad_norm": 0.0, "learning_rate": 1.0361715559451808e-05, "loss": 1.1524, "step": 12877 }, { "epoch": 0.5038735425307145, "grad_norm": 0.0, "learning_rate": 1.0360449154719495e-05, "loss": 1.0418, "step": 12878 }, { "epoch": 0.5039126692229439, "grad_norm": 0.0, "learning_rate": 1.0359182744198817e-05, "loss": 1.0596, "step": 12879 }, { "epoch": 0.5039517959151734, "grad_norm": 0.0, "learning_rate": 1.0357916327910117e-05, "loss": 1.0971, "step": 12880 }, { "epoch": 0.5039909226074027, "grad_norm": 0.0, "learning_rate": 1.0356649905873727e-05, "loss": 1.0873, "step": 12881 }, { "epoch": 0.5040300492996322, "grad_norm": 0.0, "learning_rate": 1.0355383478109986e-05, "loss": 1.0695, "step": 12882 }, { "epoch": 0.5040691759918616, "grad_norm": 0.0, "learning_rate": 1.0354117044639232e-05, "loss": 1.13, "step": 12883 }, { "epoch": 0.5041083026840911, "grad_norm": 0.0, "learning_rate": 1.0352850605481804e-05, "loss": 1.1046, "step": 12884 }, { "epoch": 0.5041474293763205, "grad_norm": 0.0, "learning_rate": 1.0351584160658034e-05, "loss": 1.0294, "step": 12885 }, { "epoch": 0.50418655606855, "grad_norm": 0.0, "learning_rate": 1.0350317710188267e-05, "loss": 0.9704, "step": 12886 }, { "epoch": 0.5042256827607794, "grad_norm": 0.0, "learning_rate": 1.0349051254092837e-05, "loss": 1.0464, "step": 12887 }, { "epoch": 0.5042648094530089, "grad_norm": 0.0, "learning_rate": 1.0347784792392077e-05, "loss": 1.0444, "step": 12888 }, { "epoch": 0.5043039361452383, "grad_norm": 0.0, "learning_rate": 1.0346518325106332e-05, "loss": 1.011, "step": 12889 }, { "epoch": 0.5043430628374678, "grad_norm": 0.0, "learning_rate": 1.0345251852255934e-05, "loss": 1.0189, "step": 12890 }, { "epoch": 0.5043821895296972, "grad_norm": 0.0, "learning_rate": 1.0343985373861227e-05, "loss": 1.043, "step": 12891 }, { "epoch": 0.5044213162219267, "grad_norm": 0.0, "learning_rate": 1.0342718889942543e-05, "loss": 1.1015, "step": 12892 }, { "epoch": 0.504460442914156, "grad_norm": 0.0, "learning_rate": 1.0341452400520227e-05, "loss": 0.9553, "step": 12893 }, { "epoch": 0.5044995696063854, "grad_norm": 0.0, "learning_rate": 1.034018590561461e-05, "loss": 0.8903, "step": 12894 }, { "epoch": 0.5045386962986149, "grad_norm": 0.0, "learning_rate": 1.0338919405246034e-05, "loss": 0.9267, "step": 12895 }, { "epoch": 0.5045778229908443, "grad_norm": 0.0, "learning_rate": 1.033765289943484e-05, "loss": 1.1019, "step": 12896 }, { "epoch": 0.5046169496830738, "grad_norm": 0.0, "learning_rate": 1.0336386388201363e-05, "loss": 0.9581, "step": 12897 }, { "epoch": 0.5046560763753032, "grad_norm": 0.0, "learning_rate": 1.0335119871565938e-05, "loss": 0.9679, "step": 12898 }, { "epoch": 0.5046952030675327, "grad_norm": 0.0, "learning_rate": 1.0333853349548912e-05, "loss": 1.1307, "step": 12899 }, { "epoch": 0.5047343297597621, "grad_norm": 0.0, "learning_rate": 1.0332586822170618e-05, "loss": 1.0592, "step": 12900 }, { "epoch": 0.5047734564519916, "grad_norm": 0.0, "learning_rate": 1.0331320289451394e-05, "loss": 1.0372, "step": 12901 }, { "epoch": 0.504812583144221, "grad_norm": 0.0, "learning_rate": 1.0330053751411587e-05, "loss": 1.0332, "step": 12902 }, { "epoch": 0.5048517098364504, "grad_norm": 0.0, "learning_rate": 1.0328787208071524e-05, "loss": 1.092, "step": 12903 }, { "epoch": 0.5048908365286798, "grad_norm": 0.0, "learning_rate": 1.0327520659451555e-05, "loss": 1.0019, "step": 12904 }, { "epoch": 0.5049299632209093, "grad_norm": 0.0, "learning_rate": 1.0326254105572012e-05, "loss": 1.0472, "step": 12905 }, { "epoch": 0.5049690899131387, "grad_norm": 0.0, "learning_rate": 1.0324987546453238e-05, "loss": 1.1186, "step": 12906 }, { "epoch": 0.5050082166053682, "grad_norm": 0.0, "learning_rate": 1.0323720982115573e-05, "loss": 1.0251, "step": 12907 }, { "epoch": 0.5050473432975976, "grad_norm": 0.0, "learning_rate": 1.032245441257935e-05, "loss": 1.0004, "step": 12908 }, { "epoch": 0.5050864699898271, "grad_norm": 0.0, "learning_rate": 1.0321187837864917e-05, "loss": 1.0317, "step": 12909 }, { "epoch": 0.5051255966820565, "grad_norm": 0.0, "learning_rate": 1.0319921257992607e-05, "loss": 1.0254, "step": 12910 }, { "epoch": 0.505164723374286, "grad_norm": 0.0, "learning_rate": 1.0318654672982766e-05, "loss": 1.1149, "step": 12911 }, { "epoch": 0.5052038500665154, "grad_norm": 0.0, "learning_rate": 1.0317388082855725e-05, "loss": 1.2014, "step": 12912 }, { "epoch": 0.5052429767587449, "grad_norm": 0.0, "learning_rate": 1.0316121487631837e-05, "loss": 1.191, "step": 12913 }, { "epoch": 0.5052821034509742, "grad_norm": 0.0, "learning_rate": 1.0314854887331427e-05, "loss": 1.0426, "step": 12914 }, { "epoch": 0.5053212301432037, "grad_norm": 0.0, "learning_rate": 1.0313588281974845e-05, "loss": 1.1207, "step": 12915 }, { "epoch": 0.5053603568354331, "grad_norm": 0.0, "learning_rate": 1.0312321671582427e-05, "loss": 1.0956, "step": 12916 }, { "epoch": 0.5053994835276626, "grad_norm": 0.0, "learning_rate": 1.0311055056174514e-05, "loss": 1.0751, "step": 12917 }, { "epoch": 0.505438610219892, "grad_norm": 0.0, "learning_rate": 1.0309788435771451e-05, "loss": 0.9153, "step": 12918 }, { "epoch": 0.5054777369121215, "grad_norm": 0.0, "learning_rate": 1.0308521810393569e-05, "loss": 1.037, "step": 12919 }, { "epoch": 0.5055168636043509, "grad_norm": 0.0, "learning_rate": 1.0307255180061216e-05, "loss": 1.1437, "step": 12920 }, { "epoch": 0.5055559902965804, "grad_norm": 0.0, "learning_rate": 1.0305988544794727e-05, "loss": 0.9764, "step": 12921 }, { "epoch": 0.5055951169888098, "grad_norm": 0.0, "learning_rate": 1.0304721904614447e-05, "loss": 1.1252, "step": 12922 }, { "epoch": 0.5056342436810392, "grad_norm": 0.0, "learning_rate": 1.0303455259540716e-05, "loss": 1.012, "step": 12923 }, { "epoch": 0.5056733703732686, "grad_norm": 0.0, "learning_rate": 1.0302188609593872e-05, "loss": 1.0421, "step": 12924 }, { "epoch": 0.505712497065498, "grad_norm": 0.0, "learning_rate": 1.0300921954794258e-05, "loss": 1.0371, "step": 12925 }, { "epoch": 0.5057516237577275, "grad_norm": 0.0, "learning_rate": 1.0299655295162216e-05, "loss": 1.0872, "step": 12926 }, { "epoch": 0.5057907504499569, "grad_norm": 0.0, "learning_rate": 1.0298388630718087e-05, "loss": 1.0323, "step": 12927 }, { "epoch": 0.5058298771421864, "grad_norm": 0.0, "learning_rate": 1.0297121961482205e-05, "loss": 1.1174, "step": 12928 }, { "epoch": 0.5058690038344158, "grad_norm": 0.0, "learning_rate": 1.0295855287474921e-05, "loss": 1.0214, "step": 12929 }, { "epoch": 0.5059081305266453, "grad_norm": 0.0, "learning_rate": 1.0294588608716569e-05, "loss": 0.9913, "step": 12930 }, { "epoch": 0.5059472572188747, "grad_norm": 0.0, "learning_rate": 1.0293321925227494e-05, "loss": 0.9391, "step": 12931 }, { "epoch": 0.5059863839111042, "grad_norm": 0.0, "learning_rate": 1.0292055237028036e-05, "loss": 1.0889, "step": 12932 }, { "epoch": 0.5060255106033336, "grad_norm": 0.0, "learning_rate": 1.029078854413854e-05, "loss": 1.0441, "step": 12933 }, { "epoch": 0.5060646372955631, "grad_norm": 0.0, "learning_rate": 1.028952184657934e-05, "loss": 0.9281, "step": 12934 }, { "epoch": 0.5061037639877924, "grad_norm": 0.0, "learning_rate": 1.0288255144370784e-05, "loss": 0.9254, "step": 12935 }, { "epoch": 0.5061428906800219, "grad_norm": 0.0, "learning_rate": 1.0286988437533214e-05, "loss": 1.0925, "step": 12936 }, { "epoch": 0.5061820173722513, "grad_norm": 0.0, "learning_rate": 1.0285721726086966e-05, "loss": 0.9496, "step": 12937 }, { "epoch": 0.5062211440644808, "grad_norm": 0.0, "learning_rate": 1.0284455010052385e-05, "loss": 1.1144, "step": 12938 }, { "epoch": 0.5062602707567102, "grad_norm": 0.0, "learning_rate": 1.0283188289449817e-05, "loss": 1.0891, "step": 12939 }, { "epoch": 0.5062993974489397, "grad_norm": 0.0, "learning_rate": 1.0281921564299595e-05, "loss": 0.9934, "step": 12940 }, { "epoch": 0.5063385241411691, "grad_norm": 0.0, "learning_rate": 1.0280654834622069e-05, "loss": 0.9481, "step": 12941 }, { "epoch": 0.5063776508333986, "grad_norm": 0.0, "learning_rate": 1.0279388100437574e-05, "loss": 1.0176, "step": 12942 }, { "epoch": 0.506416777525628, "grad_norm": 0.0, "learning_rate": 1.0278121361766462e-05, "loss": 1.1364, "step": 12943 }, { "epoch": 0.5064559042178575, "grad_norm": 0.0, "learning_rate": 1.0276854618629067e-05, "loss": 0.9561, "step": 12944 }, { "epoch": 0.5064950309100869, "grad_norm": 0.0, "learning_rate": 1.0275587871045731e-05, "loss": 1.0571, "step": 12945 }, { "epoch": 0.5065341576023163, "grad_norm": 0.0, "learning_rate": 1.0274321119036803e-05, "loss": 0.9796, "step": 12946 }, { "epoch": 0.5065732842945457, "grad_norm": 0.0, "learning_rate": 1.027305436262262e-05, "loss": 0.9651, "step": 12947 }, { "epoch": 0.5066124109867752, "grad_norm": 0.0, "learning_rate": 1.0271787601823526e-05, "loss": 1.0869, "step": 12948 }, { "epoch": 0.5066515376790046, "grad_norm": 0.0, "learning_rate": 1.0270520836659866e-05, "loss": 0.957, "step": 12949 }, { "epoch": 0.506690664371234, "grad_norm": 0.0, "learning_rate": 1.0269254067151975e-05, "loss": 1.0316, "step": 12950 }, { "epoch": 0.5067297910634635, "grad_norm": 0.0, "learning_rate": 1.0267987293320205e-05, "loss": 1.1483, "step": 12951 }, { "epoch": 0.5067689177556929, "grad_norm": 0.0, "learning_rate": 1.0266720515184894e-05, "loss": 1.051, "step": 12952 }, { "epoch": 0.5068080444479224, "grad_norm": 0.0, "learning_rate": 1.0265453732766387e-05, "loss": 1.0117, "step": 12953 }, { "epoch": 0.5068471711401518, "grad_norm": 0.0, "learning_rate": 1.0264186946085022e-05, "loss": 1.0796, "step": 12954 }, { "epoch": 0.5068862978323813, "grad_norm": 0.0, "learning_rate": 1.026292015516115e-05, "loss": 1.0631, "step": 12955 }, { "epoch": 0.5069254245246106, "grad_norm": 0.0, "learning_rate": 1.026165336001511e-05, "loss": 0.9735, "step": 12956 }, { "epoch": 0.5069645512168401, "grad_norm": 0.0, "learning_rate": 1.026038656066724e-05, "loss": 0.9549, "step": 12957 }, { "epoch": 0.5070036779090695, "grad_norm": 0.0, "learning_rate": 1.0259119757137891e-05, "loss": 1.0533, "step": 12958 }, { "epoch": 0.507042804601299, "grad_norm": 0.0, "learning_rate": 1.0257852949447404e-05, "loss": 1.0157, "step": 12959 }, { "epoch": 0.5070819312935284, "grad_norm": 0.0, "learning_rate": 1.0256586137616123e-05, "loss": 1.0038, "step": 12960 }, { "epoch": 0.5071210579857579, "grad_norm": 0.0, "learning_rate": 1.0255319321664386e-05, "loss": 0.9925, "step": 12961 }, { "epoch": 0.5071601846779873, "grad_norm": 0.0, "learning_rate": 1.0254052501612543e-05, "loss": 1.0581, "step": 12962 }, { "epoch": 0.5071993113702168, "grad_norm": 0.0, "learning_rate": 1.0252785677480934e-05, "loss": 1.0898, "step": 12963 }, { "epoch": 0.5072384380624462, "grad_norm": 0.0, "learning_rate": 1.0251518849289905e-05, "loss": 0.9947, "step": 12964 }, { "epoch": 0.5072775647546757, "grad_norm": 0.0, "learning_rate": 1.02502520170598e-05, "loss": 1.0475, "step": 12965 }, { "epoch": 0.507316691446905, "grad_norm": 0.0, "learning_rate": 1.0248985180810958e-05, "loss": 1.0394, "step": 12966 }, { "epoch": 0.5073558181391346, "grad_norm": 0.0, "learning_rate": 1.0247718340563728e-05, "loss": 1.0017, "step": 12967 }, { "epoch": 0.5073949448313639, "grad_norm": 0.0, "learning_rate": 1.024645149633845e-05, "loss": 1.0813, "step": 12968 }, { "epoch": 0.5074340715235934, "grad_norm": 0.0, "learning_rate": 1.0245184648155472e-05, "loss": 1.1146, "step": 12969 }, { "epoch": 0.5074731982158228, "grad_norm": 0.0, "learning_rate": 1.0243917796035135e-05, "loss": 0.9334, "step": 12970 }, { "epoch": 0.5075123249080523, "grad_norm": 0.0, "learning_rate": 1.0242650939997786e-05, "loss": 0.9893, "step": 12971 }, { "epoch": 0.5075514516002817, "grad_norm": 0.0, "learning_rate": 1.0241384080063761e-05, "loss": 0.9288, "step": 12972 }, { "epoch": 0.5075905782925112, "grad_norm": 0.0, "learning_rate": 1.0240117216253416e-05, "loss": 1.05, "step": 12973 }, { "epoch": 0.5076297049847406, "grad_norm": 0.0, "learning_rate": 1.0238850348587088e-05, "loss": 1.1587, "step": 12974 }, { "epoch": 0.5076688316769701, "grad_norm": 0.0, "learning_rate": 1.023758347708512e-05, "loss": 0.9327, "step": 12975 }, { "epoch": 0.5077079583691995, "grad_norm": 0.0, "learning_rate": 1.0236316601767862e-05, "loss": 0.9609, "step": 12976 }, { "epoch": 0.507747085061429, "grad_norm": 0.0, "learning_rate": 1.0235049722655654e-05, "loss": 1.1088, "step": 12977 }, { "epoch": 0.5077862117536583, "grad_norm": 0.0, "learning_rate": 1.0233782839768843e-05, "loss": 1.0167, "step": 12978 }, { "epoch": 0.5078253384458877, "grad_norm": 0.0, "learning_rate": 1.0232515953127771e-05, "loss": 1.073, "step": 12979 }, { "epoch": 0.5078644651381172, "grad_norm": 0.0, "learning_rate": 1.0231249062752787e-05, "loss": 1.1171, "step": 12980 }, { "epoch": 0.5079035918303466, "grad_norm": 0.0, "learning_rate": 1.0229982168664227e-05, "loss": 1.1476, "step": 12981 }, { "epoch": 0.5079427185225761, "grad_norm": 0.0, "learning_rate": 1.0228715270882448e-05, "loss": 1.0943, "step": 12982 }, { "epoch": 0.5079818452148055, "grad_norm": 0.0, "learning_rate": 1.0227448369427786e-05, "loss": 1.0712, "step": 12983 }, { "epoch": 0.508020971907035, "grad_norm": 0.0, "learning_rate": 1.0226181464320589e-05, "loss": 1.0738, "step": 12984 }, { "epoch": 0.5080600985992644, "grad_norm": 0.0, "learning_rate": 1.02249145555812e-05, "loss": 1.1317, "step": 12985 }, { "epoch": 0.5080992252914939, "grad_norm": 0.0, "learning_rate": 1.0223647643229966e-05, "loss": 1.1112, "step": 12986 }, { "epoch": 0.5081383519837233, "grad_norm": 0.0, "learning_rate": 1.022238072728723e-05, "loss": 1.1469, "step": 12987 }, { "epoch": 0.5081774786759528, "grad_norm": 0.0, "learning_rate": 1.022111380777334e-05, "loss": 1.0972, "step": 12988 }, { "epoch": 0.5082166053681821, "grad_norm": 0.0, "learning_rate": 1.0219846884708638e-05, "loss": 1.0462, "step": 12989 }, { "epoch": 0.5082557320604116, "grad_norm": 0.0, "learning_rate": 1.0218579958113468e-05, "loss": 1.1027, "step": 12990 }, { "epoch": 0.508294858752641, "grad_norm": 0.0, "learning_rate": 1.0217313028008183e-05, "loss": 0.8725, "step": 12991 }, { "epoch": 0.5083339854448705, "grad_norm": 0.0, "learning_rate": 1.0216046094413117e-05, "loss": 1.0138, "step": 12992 }, { "epoch": 0.5083731121370999, "grad_norm": 0.0, "learning_rate": 1.0214779157348627e-05, "loss": 1.0509, "step": 12993 }, { "epoch": 0.5084122388293294, "grad_norm": 0.0, "learning_rate": 1.0213512216835052e-05, "loss": 1.0953, "step": 12994 }, { "epoch": 0.5084513655215588, "grad_norm": 0.0, "learning_rate": 1.0212245272892733e-05, "loss": 1.1435, "step": 12995 }, { "epoch": 0.5084904922137883, "grad_norm": 0.0, "learning_rate": 1.0210978325542027e-05, "loss": 0.9882, "step": 12996 }, { "epoch": 0.5085296189060177, "grad_norm": 0.0, "learning_rate": 1.020971137480327e-05, "loss": 0.921, "step": 12997 }, { "epoch": 0.5085687455982472, "grad_norm": 0.0, "learning_rate": 1.0208444420696812e-05, "loss": 0.9015, "step": 12998 }, { "epoch": 0.5086078722904765, "grad_norm": 0.0, "learning_rate": 1.0207177463242998e-05, "loss": 1.1537, "step": 12999 }, { "epoch": 0.508646998982706, "grad_norm": 0.0, "learning_rate": 1.0205910502462174e-05, "loss": 0.9963, "step": 13000 }, { "epoch": 0.5086861256749354, "grad_norm": 0.0, "learning_rate": 1.0204643538374685e-05, "loss": 1.0302, "step": 13001 }, { "epoch": 0.5087252523671649, "grad_norm": 0.0, "learning_rate": 1.0203376571000879e-05, "loss": 1.0653, "step": 13002 }, { "epoch": 0.5087643790593943, "grad_norm": 0.0, "learning_rate": 1.0202109600361098e-05, "loss": 0.9597, "step": 13003 }, { "epoch": 0.5088035057516238, "grad_norm": 0.0, "learning_rate": 1.0200842626475689e-05, "loss": 1.0632, "step": 13004 }, { "epoch": 0.5088426324438532, "grad_norm": 0.0, "learning_rate": 1.0199575649365002e-05, "loss": 1.0342, "step": 13005 }, { "epoch": 0.5088817591360827, "grad_norm": 0.0, "learning_rate": 1.0198308669049377e-05, "loss": 1.1478, "step": 13006 }, { "epoch": 0.5089208858283121, "grad_norm": 0.0, "learning_rate": 1.0197041685549166e-05, "loss": 0.9992, "step": 13007 }, { "epoch": 0.5089600125205415, "grad_norm": 0.0, "learning_rate": 1.019577469888471e-05, "loss": 1.0554, "step": 13008 }, { "epoch": 0.508999139212771, "grad_norm": 0.0, "learning_rate": 1.019450770907636e-05, "loss": 0.9401, "step": 13009 }, { "epoch": 0.5090382659050003, "grad_norm": 0.0, "learning_rate": 1.019324071614446e-05, "loss": 0.9413, "step": 13010 }, { "epoch": 0.5090773925972298, "grad_norm": 0.0, "learning_rate": 1.0191973720109354e-05, "loss": 1.0015, "step": 13011 }, { "epoch": 0.5091165192894592, "grad_norm": 0.0, "learning_rate": 1.019070672099139e-05, "loss": 1.1125, "step": 13012 }, { "epoch": 0.5091556459816887, "grad_norm": 0.0, "learning_rate": 1.0189439718810919e-05, "loss": 1.0985, "step": 13013 }, { "epoch": 0.5091947726739181, "grad_norm": 0.0, "learning_rate": 1.0188172713588282e-05, "loss": 1.0033, "step": 13014 }, { "epoch": 0.5092338993661476, "grad_norm": 0.0, "learning_rate": 1.0186905705343826e-05, "loss": 1.1419, "step": 13015 }, { "epoch": 0.509273026058377, "grad_norm": 0.0, "learning_rate": 1.01856386940979e-05, "loss": 1.049, "step": 13016 }, { "epoch": 0.5093121527506065, "grad_norm": 0.0, "learning_rate": 1.0184371679870845e-05, "loss": 1.0171, "step": 13017 }, { "epoch": 0.5093512794428359, "grad_norm": 0.0, "learning_rate": 1.0183104662683016e-05, "loss": 0.9807, "step": 13018 }, { "epoch": 0.5093904061350654, "grad_norm": 0.0, "learning_rate": 1.0181837642554753e-05, "loss": 0.996, "step": 13019 }, { "epoch": 0.5094295328272948, "grad_norm": 0.0, "learning_rate": 1.018057061950641e-05, "loss": 1.0542, "step": 13020 }, { "epoch": 0.5094686595195242, "grad_norm": 0.0, "learning_rate": 1.0179303593558323e-05, "loss": 0.9233, "step": 13021 }, { "epoch": 0.5095077862117536, "grad_norm": 0.0, "learning_rate": 1.0178036564730849e-05, "loss": 0.8931, "step": 13022 }, { "epoch": 0.5095469129039831, "grad_norm": 0.0, "learning_rate": 1.0176769533044331e-05, "loss": 0.9772, "step": 13023 }, { "epoch": 0.5095860395962125, "grad_norm": 0.0, "learning_rate": 1.0175502498519115e-05, "loss": 1.2129, "step": 13024 }, { "epoch": 0.509625166288442, "grad_norm": 0.0, "learning_rate": 1.0174235461175547e-05, "loss": 0.901, "step": 13025 }, { "epoch": 0.5096642929806714, "grad_norm": 0.0, "learning_rate": 1.0172968421033977e-05, "loss": 0.9671, "step": 13026 }, { "epoch": 0.5097034196729009, "grad_norm": 0.0, "learning_rate": 1.0171701378114751e-05, "loss": 1.0705, "step": 13027 }, { "epoch": 0.5097425463651303, "grad_norm": 0.0, "learning_rate": 1.0170434332438217e-05, "loss": 1.1263, "step": 13028 }, { "epoch": 0.5097816730573598, "grad_norm": 0.0, "learning_rate": 1.0169167284024718e-05, "loss": 1.1561, "step": 13029 }, { "epoch": 0.5098207997495892, "grad_norm": 0.0, "learning_rate": 1.016790023289461e-05, "loss": 0.9768, "step": 13030 }, { "epoch": 0.5098599264418187, "grad_norm": 0.0, "learning_rate": 1.0166633179068232e-05, "loss": 0.9872, "step": 13031 }, { "epoch": 0.509899053134048, "grad_norm": 0.0, "learning_rate": 1.0165366122565931e-05, "loss": 1.1153, "step": 13032 }, { "epoch": 0.5099381798262775, "grad_norm": 0.0, "learning_rate": 1.0164099063408062e-05, "loss": 0.9963, "step": 13033 }, { "epoch": 0.5099773065185069, "grad_norm": 0.0, "learning_rate": 1.0162832001614966e-05, "loss": 1.0531, "step": 13034 }, { "epoch": 0.5100164332107364, "grad_norm": 0.0, "learning_rate": 1.016156493720699e-05, "loss": 1.0639, "step": 13035 }, { "epoch": 0.5100555599029658, "grad_norm": 0.0, "learning_rate": 1.0160297870204486e-05, "loss": 0.994, "step": 13036 }, { "epoch": 0.5100946865951952, "grad_norm": 0.0, "learning_rate": 1.0159030800627796e-05, "loss": 1.0744, "step": 13037 }, { "epoch": 0.5101338132874247, "grad_norm": 0.0, "learning_rate": 1.0157763728497275e-05, "loss": 1.1637, "step": 13038 }, { "epoch": 0.5101729399796541, "grad_norm": 0.0, "learning_rate": 1.015649665383326e-05, "loss": 0.8987, "step": 13039 }, { "epoch": 0.5102120666718836, "grad_norm": 0.0, "learning_rate": 1.0155229576656112e-05, "loss": 1.039, "step": 13040 }, { "epoch": 0.510251193364113, "grad_norm": 0.0, "learning_rate": 1.0153962496986166e-05, "loss": 1.0383, "step": 13041 }, { "epoch": 0.5102903200563425, "grad_norm": 0.0, "learning_rate": 1.0152695414843778e-05, "loss": 1.0292, "step": 13042 }, { "epoch": 0.5103294467485718, "grad_norm": 0.0, "learning_rate": 1.0151428330249294e-05, "loss": 1.176, "step": 13043 }, { "epoch": 0.5103685734408013, "grad_norm": 0.0, "learning_rate": 1.0150161243223058e-05, "loss": 1.0053, "step": 13044 }, { "epoch": 0.5104077001330307, "grad_norm": 0.0, "learning_rate": 1.0148894153785422e-05, "loss": 1.0469, "step": 13045 }, { "epoch": 0.5104468268252602, "grad_norm": 0.0, "learning_rate": 1.0147627061956731e-05, "loss": 1.1526, "step": 13046 }, { "epoch": 0.5104859535174896, "grad_norm": 0.0, "learning_rate": 1.0146359967757337e-05, "loss": 1.0593, "step": 13047 }, { "epoch": 0.5105250802097191, "grad_norm": 0.0, "learning_rate": 1.0145092871207583e-05, "loss": 0.9783, "step": 13048 }, { "epoch": 0.5105642069019485, "grad_norm": 0.0, "learning_rate": 1.014382577232782e-05, "loss": 1.0402, "step": 13049 }, { "epoch": 0.510603333594178, "grad_norm": 0.0, "learning_rate": 1.0142558671138394e-05, "loss": 1.0957, "step": 13050 }, { "epoch": 0.5106424602864074, "grad_norm": 0.0, "learning_rate": 1.0141291567659658e-05, "loss": 1.0865, "step": 13051 }, { "epoch": 0.5106815869786369, "grad_norm": 0.0, "learning_rate": 1.0140024461911955e-05, "loss": 0.9838, "step": 13052 }, { "epoch": 0.5107207136708662, "grad_norm": 0.0, "learning_rate": 1.0138757353915632e-05, "loss": 0.9936, "step": 13053 }, { "epoch": 0.5107598403630957, "grad_norm": 0.0, "learning_rate": 1.0137490243691044e-05, "loss": 1.0756, "step": 13054 }, { "epoch": 0.5107989670553251, "grad_norm": 0.0, "learning_rate": 1.0136223131258532e-05, "loss": 0.9828, "step": 13055 }, { "epoch": 0.5108380937475546, "grad_norm": 0.0, "learning_rate": 1.0134956016638446e-05, "loss": 1.0713, "step": 13056 }, { "epoch": 0.510877220439784, "grad_norm": 0.0, "learning_rate": 1.0133688899851137e-05, "loss": 0.9582, "step": 13057 }, { "epoch": 0.5109163471320135, "grad_norm": 0.0, "learning_rate": 1.0132421780916954e-05, "loss": 1.1227, "step": 13058 }, { "epoch": 0.5109554738242429, "grad_norm": 0.0, "learning_rate": 1.013115465985624e-05, "loss": 0.9964, "step": 13059 }, { "epoch": 0.5109946005164724, "grad_norm": 0.0, "learning_rate": 1.012988753668935e-05, "loss": 1.1448, "step": 13060 }, { "epoch": 0.5110337272087018, "grad_norm": 0.0, "learning_rate": 1.0128620411436626e-05, "loss": 1.1083, "step": 13061 }, { "epoch": 0.5110728539009313, "grad_norm": 0.0, "learning_rate": 1.012735328411842e-05, "loss": 1.0115, "step": 13062 }, { "epoch": 0.5111119805931607, "grad_norm": 0.0, "learning_rate": 1.0126086154755079e-05, "loss": 0.9794, "step": 13063 }, { "epoch": 0.51115110728539, "grad_norm": 0.0, "learning_rate": 1.0124819023366954e-05, "loss": 1.0997, "step": 13064 }, { "epoch": 0.5111902339776195, "grad_norm": 0.0, "learning_rate": 1.012355188997439e-05, "loss": 1.0447, "step": 13065 }, { "epoch": 0.5112293606698489, "grad_norm": 0.0, "learning_rate": 1.0122284754597738e-05, "loss": 1.0409, "step": 13066 }, { "epoch": 0.5112684873620784, "grad_norm": 0.0, "learning_rate": 1.0121017617257349e-05, "loss": 1.0826, "step": 13067 }, { "epoch": 0.5113076140543078, "grad_norm": 0.0, "learning_rate": 1.0119750477973564e-05, "loss": 1.0848, "step": 13068 }, { "epoch": 0.5113467407465373, "grad_norm": 0.0, "learning_rate": 1.011848333676674e-05, "loss": 1.009, "step": 13069 }, { "epoch": 0.5113858674387667, "grad_norm": 0.0, "learning_rate": 1.0117216193657221e-05, "loss": 1.0694, "step": 13070 }, { "epoch": 0.5114249941309962, "grad_norm": 0.0, "learning_rate": 1.0115949048665356e-05, "loss": 0.9168, "step": 13071 }, { "epoch": 0.5114641208232256, "grad_norm": 0.0, "learning_rate": 1.0114681901811496e-05, "loss": 1.0407, "step": 13072 }, { "epoch": 0.5115032475154551, "grad_norm": 0.0, "learning_rate": 1.011341475311599e-05, "loss": 1.1593, "step": 13073 }, { "epoch": 0.5115423742076844, "grad_norm": 0.0, "learning_rate": 1.011214760259918e-05, "loss": 1.1185, "step": 13074 }, { "epoch": 0.5115815008999139, "grad_norm": 0.0, "learning_rate": 1.0110880450281424e-05, "loss": 1.0139, "step": 13075 }, { "epoch": 0.5116206275921433, "grad_norm": 0.0, "learning_rate": 1.0109613296183067e-05, "loss": 1.0094, "step": 13076 }, { "epoch": 0.5116597542843728, "grad_norm": 0.0, "learning_rate": 1.0108346140324457e-05, "loss": 1.0579, "step": 13077 }, { "epoch": 0.5116988809766022, "grad_norm": 0.0, "learning_rate": 1.0107078982725942e-05, "loss": 1.0561, "step": 13078 }, { "epoch": 0.5117380076688317, "grad_norm": 0.0, "learning_rate": 1.0105811823407874e-05, "loss": 1.0546, "step": 13079 }, { "epoch": 0.5117771343610611, "grad_norm": 0.0, "learning_rate": 1.01045446623906e-05, "loss": 1.0876, "step": 13080 }, { "epoch": 0.5118162610532906, "grad_norm": 0.0, "learning_rate": 1.0103277499694472e-05, "loss": 1.0762, "step": 13081 }, { "epoch": 0.51185538774552, "grad_norm": 0.0, "learning_rate": 1.0102010335339835e-05, "loss": 1.0884, "step": 13082 }, { "epoch": 0.5118945144377495, "grad_norm": 0.0, "learning_rate": 1.010074316934704e-05, "loss": 0.8553, "step": 13083 }, { "epoch": 0.5119336411299789, "grad_norm": 0.0, "learning_rate": 1.0099476001736434e-05, "loss": 1.1041, "step": 13084 }, { "epoch": 0.5119727678222084, "grad_norm": 0.0, "learning_rate": 1.0098208832528373e-05, "loss": 1.0349, "step": 13085 }, { "epoch": 0.5120118945144377, "grad_norm": 0.0, "learning_rate": 1.0096941661743196e-05, "loss": 1.023, "step": 13086 }, { "epoch": 0.5120510212066672, "grad_norm": 0.0, "learning_rate": 1.0095674489401262e-05, "loss": 1.0016, "step": 13087 }, { "epoch": 0.5120901478988966, "grad_norm": 0.0, "learning_rate": 1.0094407315522912e-05, "loss": 1.0137, "step": 13088 }, { "epoch": 0.5121292745911261, "grad_norm": 0.0, "learning_rate": 1.0093140140128502e-05, "loss": 1.1196, "step": 13089 }, { "epoch": 0.5121684012833555, "grad_norm": 0.0, "learning_rate": 1.0091872963238376e-05, "loss": 1.1069, "step": 13090 }, { "epoch": 0.512207527975585, "grad_norm": 0.0, "learning_rate": 1.0090605784872884e-05, "loss": 0.987, "step": 13091 }, { "epoch": 0.5122466546678144, "grad_norm": 0.0, "learning_rate": 1.0089338605052379e-05, "loss": 0.91, "step": 13092 }, { "epoch": 0.5122857813600438, "grad_norm": 0.0, "learning_rate": 1.0088071423797206e-05, "loss": 1.1036, "step": 13093 }, { "epoch": 0.5123249080522733, "grad_norm": 0.0, "learning_rate": 1.008680424112772e-05, "loss": 0.9921, "step": 13094 }, { "epoch": 0.5123640347445026, "grad_norm": 0.0, "learning_rate": 1.008553705706426e-05, "loss": 1.024, "step": 13095 }, { "epoch": 0.5124031614367321, "grad_norm": 0.0, "learning_rate": 1.0084269871627189e-05, "loss": 0.9695, "step": 13096 }, { "epoch": 0.5124422881289615, "grad_norm": 0.0, "learning_rate": 1.0083002684836845e-05, "loss": 1.1418, "step": 13097 }, { "epoch": 0.512481414821191, "grad_norm": 0.0, "learning_rate": 1.0081735496713582e-05, "loss": 0.9768, "step": 13098 }, { "epoch": 0.5125205415134204, "grad_norm": 0.0, "learning_rate": 1.0080468307277751e-05, "loss": 1.0078, "step": 13099 }, { "epoch": 0.5125596682056499, "grad_norm": 0.0, "learning_rate": 1.00792011165497e-05, "loss": 1.1088, "step": 13100 }, { "epoch": 0.5125987948978793, "grad_norm": 0.0, "learning_rate": 1.0077933924549778e-05, "loss": 1.1411, "step": 13101 }, { "epoch": 0.5126379215901088, "grad_norm": 0.0, "learning_rate": 1.0076666731298334e-05, "loss": 1.0113, "step": 13102 }, { "epoch": 0.5126770482823382, "grad_norm": 0.0, "learning_rate": 1.007539953681572e-05, "loss": 1.0306, "step": 13103 }, { "epoch": 0.5127161749745677, "grad_norm": 0.0, "learning_rate": 1.0074132341122282e-05, "loss": 1.0956, "step": 13104 }, { "epoch": 0.5127553016667971, "grad_norm": 0.0, "learning_rate": 1.0072865144238373e-05, "loss": 1.0923, "step": 13105 }, { "epoch": 0.5127944283590266, "grad_norm": 0.0, "learning_rate": 1.007159794618434e-05, "loss": 1.0814, "step": 13106 }, { "epoch": 0.5128335550512559, "grad_norm": 0.0, "learning_rate": 1.0070330746980534e-05, "loss": 1.0727, "step": 13107 }, { "epoch": 0.5128726817434854, "grad_norm": 0.0, "learning_rate": 1.0069063546647304e-05, "loss": 1.0804, "step": 13108 }, { "epoch": 0.5129118084357148, "grad_norm": 0.0, "learning_rate": 1.0067796345205002e-05, "loss": 0.9756, "step": 13109 }, { "epoch": 0.5129509351279443, "grad_norm": 0.0, "learning_rate": 1.0066529142673976e-05, "loss": 1.1917, "step": 13110 }, { "epoch": 0.5129900618201737, "grad_norm": 0.0, "learning_rate": 1.0065261939074571e-05, "loss": 1.1582, "step": 13111 }, { "epoch": 0.5130291885124032, "grad_norm": 0.0, "learning_rate": 1.0063994734427145e-05, "loss": 1.1423, "step": 13112 }, { "epoch": 0.5130683152046326, "grad_norm": 0.0, "learning_rate": 1.0062727528752042e-05, "loss": 1.0945, "step": 13113 }, { "epoch": 0.5131074418968621, "grad_norm": 0.0, "learning_rate": 1.0061460322069613e-05, "loss": 0.9752, "step": 13114 }, { "epoch": 0.5131465685890915, "grad_norm": 0.0, "learning_rate": 1.006019311440021e-05, "loss": 1.0197, "step": 13115 }, { "epoch": 0.513185695281321, "grad_norm": 0.0, "learning_rate": 1.005892590576418e-05, "loss": 1.0098, "step": 13116 }, { "epoch": 0.5132248219735503, "grad_norm": 0.0, "learning_rate": 1.0057658696181875e-05, "loss": 1.1143, "step": 13117 }, { "epoch": 0.5132639486657798, "grad_norm": 0.0, "learning_rate": 1.005639148567364e-05, "loss": 1.0056, "step": 13118 }, { "epoch": 0.5133030753580092, "grad_norm": 0.0, "learning_rate": 1.0055124274259833e-05, "loss": 1.0915, "step": 13119 }, { "epoch": 0.5133422020502387, "grad_norm": 0.0, "learning_rate": 1.0053857061960798e-05, "loss": 0.959, "step": 13120 }, { "epoch": 0.5133813287424681, "grad_norm": 0.0, "learning_rate": 1.0052589848796882e-05, "loss": 1.0749, "step": 13121 }, { "epoch": 0.5134204554346975, "grad_norm": 0.0, "learning_rate": 1.0051322634788446e-05, "loss": 1.0212, "step": 13122 }, { "epoch": 0.513459582126927, "grad_norm": 0.0, "learning_rate": 1.0050055419955829e-05, "loss": 1.0276, "step": 13123 }, { "epoch": 0.5134987088191564, "grad_norm": 0.0, "learning_rate": 1.0048788204319383e-05, "loss": 1.0236, "step": 13124 }, { "epoch": 0.5135378355113859, "grad_norm": 0.0, "learning_rate": 1.0047520987899463e-05, "loss": 1.0656, "step": 13125 }, { "epoch": 0.5135769622036153, "grad_norm": 0.0, "learning_rate": 1.0046253770716412e-05, "loss": 0.9253, "step": 13126 }, { "epoch": 0.5136160888958448, "grad_norm": 0.0, "learning_rate": 1.0044986552790586e-05, "loss": 1.0266, "step": 13127 }, { "epoch": 0.5136552155880741, "grad_norm": 0.0, "learning_rate": 1.004371933414233e-05, "loss": 0.9818, "step": 13128 }, { "epoch": 0.5136943422803036, "grad_norm": 0.0, "learning_rate": 1.0042452114791998e-05, "loss": 1.0632, "step": 13129 }, { "epoch": 0.513733468972533, "grad_norm": 0.0, "learning_rate": 1.004118489475994e-05, "loss": 1.0541, "step": 13130 }, { "epoch": 0.5137725956647625, "grad_norm": 0.0, "learning_rate": 1.0039917674066499e-05, "loss": 1.0245, "step": 13131 }, { "epoch": 0.5138117223569919, "grad_norm": 0.0, "learning_rate": 1.0038650452732035e-05, "loss": 1.0775, "step": 13132 }, { "epoch": 0.5138508490492214, "grad_norm": 0.0, "learning_rate": 1.0037383230776891e-05, "loss": 1.049, "step": 13133 }, { "epoch": 0.5138899757414508, "grad_norm": 0.0, "learning_rate": 1.003611600822142e-05, "loss": 0.9583, "step": 13134 }, { "epoch": 0.5139291024336803, "grad_norm": 0.0, "learning_rate": 1.0034848785085969e-05, "loss": 1.0303, "step": 13135 }, { "epoch": 0.5139682291259097, "grad_norm": 0.0, "learning_rate": 1.0033581561390892e-05, "loss": 1.1414, "step": 13136 }, { "epoch": 0.5140073558181392, "grad_norm": 0.0, "learning_rate": 1.0032314337156537e-05, "loss": 1.0495, "step": 13137 }, { "epoch": 0.5140464825103686, "grad_norm": 0.0, "learning_rate": 1.0031047112403255e-05, "loss": 1.0449, "step": 13138 }, { "epoch": 0.514085609202598, "grad_norm": 0.0, "learning_rate": 1.0029779887151397e-05, "loss": 1.076, "step": 13139 }, { "epoch": 0.5141247358948274, "grad_norm": 0.0, "learning_rate": 1.0028512661421306e-05, "loss": 1.1614, "step": 13140 }, { "epoch": 0.5141638625870569, "grad_norm": 0.0, "learning_rate": 1.002724543523334e-05, "loss": 1.1102, "step": 13141 }, { "epoch": 0.5142029892792863, "grad_norm": 0.0, "learning_rate": 1.0025978208607847e-05, "loss": 1.0371, "step": 13142 }, { "epoch": 0.5142421159715158, "grad_norm": 0.0, "learning_rate": 1.0024710981565177e-05, "loss": 1.0044, "step": 13143 }, { "epoch": 0.5142812426637452, "grad_norm": 0.0, "learning_rate": 1.0023443754125678e-05, "loss": 0.8911, "step": 13144 }, { "epoch": 0.5143203693559747, "grad_norm": 0.0, "learning_rate": 1.0022176526309703e-05, "loss": 1.1743, "step": 13145 }, { "epoch": 0.5143594960482041, "grad_norm": 0.0, "learning_rate": 1.00209092981376e-05, "loss": 1.0694, "step": 13146 }, { "epoch": 0.5143986227404336, "grad_norm": 0.0, "learning_rate": 1.0019642069629723e-05, "loss": 0.9637, "step": 13147 }, { "epoch": 0.514437749432663, "grad_norm": 0.0, "learning_rate": 1.0018374840806416e-05, "loss": 1.0515, "step": 13148 }, { "epoch": 0.5144768761248923, "grad_norm": 0.0, "learning_rate": 1.0017107611688031e-05, "loss": 1.0713, "step": 13149 }, { "epoch": 0.5145160028171218, "grad_norm": 0.0, "learning_rate": 1.0015840382294922e-05, "loss": 1.1153, "step": 13150 }, { "epoch": 0.5145551295093512, "grad_norm": 0.0, "learning_rate": 1.0014573152647433e-05, "loss": 0.9518, "step": 13151 }, { "epoch": 0.5145942562015807, "grad_norm": 0.0, "learning_rate": 1.0013305922765919e-05, "loss": 1.0637, "step": 13152 }, { "epoch": 0.5146333828938101, "grad_norm": 0.0, "learning_rate": 1.001203869267073e-05, "loss": 1.0509, "step": 13153 }, { "epoch": 0.5146725095860396, "grad_norm": 0.0, "learning_rate": 1.0010771462382214e-05, "loss": 1.067, "step": 13154 }, { "epoch": 0.514711636278269, "grad_norm": 0.0, "learning_rate": 1.000950423192072e-05, "loss": 0.9441, "step": 13155 }, { "epoch": 0.5147507629704985, "grad_norm": 0.0, "learning_rate": 1.0008237001306602e-05, "loss": 1.0439, "step": 13156 }, { "epoch": 0.5147898896627279, "grad_norm": 0.0, "learning_rate": 1.0006969770560207e-05, "loss": 1.0969, "step": 13157 }, { "epoch": 0.5148290163549574, "grad_norm": 0.0, "learning_rate": 1.0005702539701888e-05, "loss": 0.9621, "step": 13158 }, { "epoch": 0.5148681430471868, "grad_norm": 0.0, "learning_rate": 1.0004435308751993e-05, "loss": 0.8561, "step": 13159 }, { "epoch": 0.5149072697394163, "grad_norm": 0.0, "learning_rate": 1.0003168077730867e-05, "loss": 0.9843, "step": 13160 }, { "epoch": 0.5149463964316456, "grad_norm": 0.0, "learning_rate": 1.0001900846658872e-05, "loss": 1.0129, "step": 13161 }, { "epoch": 0.5149855231238751, "grad_norm": 0.0, "learning_rate": 1.000063361555635e-05, "loss": 1.0311, "step": 13162 }, { "epoch": 0.5150246498161045, "grad_norm": 0.0, "learning_rate": 9.999366384443652e-06, "loss": 1.0672, "step": 13163 }, { "epoch": 0.515063776508334, "grad_norm": 0.0, "learning_rate": 9.998099153341131e-06, "loss": 1.1266, "step": 13164 }, { "epoch": 0.5151029032005634, "grad_norm": 0.0, "learning_rate": 9.996831922269135e-06, "loss": 1.0659, "step": 13165 }, { "epoch": 0.5151420298927929, "grad_norm": 0.0, "learning_rate": 9.995564691248013e-06, "loss": 1.0598, "step": 13166 }, { "epoch": 0.5151811565850223, "grad_norm": 0.0, "learning_rate": 9.994297460298114e-06, "loss": 1.0099, "step": 13167 }, { "epoch": 0.5152202832772518, "grad_norm": 0.0, "learning_rate": 9.993030229439795e-06, "loss": 1.1377, "step": 13168 }, { "epoch": 0.5152594099694812, "grad_norm": 0.0, "learning_rate": 9.991762998693401e-06, "loss": 1.0806, "step": 13169 }, { "epoch": 0.5152985366617107, "grad_norm": 0.0, "learning_rate": 9.990495768079283e-06, "loss": 1.0884, "step": 13170 }, { "epoch": 0.51533766335394, "grad_norm": 0.0, "learning_rate": 9.989228537617792e-06, "loss": 1.0222, "step": 13171 }, { "epoch": 0.5153767900461695, "grad_norm": 0.0, "learning_rate": 9.987961307329272e-06, "loss": 0.9978, "step": 13172 }, { "epoch": 0.5154159167383989, "grad_norm": 0.0, "learning_rate": 9.986694077234083e-06, "loss": 1.0732, "step": 13173 }, { "epoch": 0.5154550434306284, "grad_norm": 0.0, "learning_rate": 9.985426847352569e-06, "loss": 0.9064, "step": 13174 }, { "epoch": 0.5154941701228578, "grad_norm": 0.0, "learning_rate": 9.984159617705083e-06, "loss": 1.0236, "step": 13175 }, { "epoch": 0.5155332968150873, "grad_norm": 0.0, "learning_rate": 9.98289238831197e-06, "loss": 0.9853, "step": 13176 }, { "epoch": 0.5155724235073167, "grad_norm": 0.0, "learning_rate": 9.981625159193587e-06, "loss": 1.0462, "step": 13177 }, { "epoch": 0.5156115501995461, "grad_norm": 0.0, "learning_rate": 9.980357930370282e-06, "loss": 1.1169, "step": 13178 }, { "epoch": 0.5156506768917756, "grad_norm": 0.0, "learning_rate": 9.979090701862403e-06, "loss": 0.9699, "step": 13179 }, { "epoch": 0.515689803584005, "grad_norm": 0.0, "learning_rate": 9.977823473690302e-06, "loss": 1.071, "step": 13180 }, { "epoch": 0.5157289302762345, "grad_norm": 0.0, "learning_rate": 9.976556245874322e-06, "loss": 0.9229, "step": 13181 }, { "epoch": 0.5157680569684638, "grad_norm": 0.0, "learning_rate": 9.975289018434826e-06, "loss": 1.0172, "step": 13182 }, { "epoch": 0.5158071836606933, "grad_norm": 0.0, "learning_rate": 9.974021791392155e-06, "loss": 1.0343, "step": 13183 }, { "epoch": 0.5158463103529227, "grad_norm": 0.0, "learning_rate": 9.972754564766663e-06, "loss": 1.0018, "step": 13184 }, { "epoch": 0.5158854370451522, "grad_norm": 0.0, "learning_rate": 9.971487338578694e-06, "loss": 0.9661, "step": 13185 }, { "epoch": 0.5159245637373816, "grad_norm": 0.0, "learning_rate": 9.970220112848607e-06, "loss": 1.0639, "step": 13186 }, { "epoch": 0.5159636904296111, "grad_norm": 0.0, "learning_rate": 9.968952887596748e-06, "loss": 0.9698, "step": 13187 }, { "epoch": 0.5160028171218405, "grad_norm": 0.0, "learning_rate": 9.967685662843466e-06, "loss": 1.1051, "step": 13188 }, { "epoch": 0.51604194381407, "grad_norm": 0.0, "learning_rate": 9.966418438609111e-06, "loss": 1.0105, "step": 13189 }, { "epoch": 0.5160810705062994, "grad_norm": 0.0, "learning_rate": 9.965151214914031e-06, "loss": 1.0667, "step": 13190 }, { "epoch": 0.5161201971985289, "grad_norm": 0.0, "learning_rate": 9.963883991778582e-06, "loss": 1.1091, "step": 13191 }, { "epoch": 0.5161593238907582, "grad_norm": 0.0, "learning_rate": 9.962616769223112e-06, "loss": 0.9001, "step": 13192 }, { "epoch": 0.5161984505829877, "grad_norm": 0.0, "learning_rate": 9.96134954726797e-06, "loss": 1.0483, "step": 13193 }, { "epoch": 0.5162375772752171, "grad_norm": 0.0, "learning_rate": 9.9600823259335e-06, "loss": 1.0121, "step": 13194 }, { "epoch": 0.5162767039674466, "grad_norm": 0.0, "learning_rate": 9.958815105240064e-06, "loss": 0.9586, "step": 13195 }, { "epoch": 0.516315830659676, "grad_norm": 0.0, "learning_rate": 9.957547885208005e-06, "loss": 1.1177, "step": 13196 }, { "epoch": 0.5163549573519055, "grad_norm": 0.0, "learning_rate": 9.956280665857672e-06, "loss": 1.1564, "step": 13197 }, { "epoch": 0.5163940840441349, "grad_norm": 0.0, "learning_rate": 9.95501344720942e-06, "loss": 1.0683, "step": 13198 }, { "epoch": 0.5164332107363644, "grad_norm": 0.0, "learning_rate": 9.953746229283589e-06, "loss": 0.9505, "step": 13199 }, { "epoch": 0.5164723374285938, "grad_norm": 0.0, "learning_rate": 9.95247901210054e-06, "loss": 1.0075, "step": 13200 }, { "epoch": 0.5165114641208233, "grad_norm": 0.0, "learning_rate": 9.95121179568062e-06, "loss": 0.9962, "step": 13201 }, { "epoch": 0.5165505908130527, "grad_norm": 0.0, "learning_rate": 9.949944580044176e-06, "loss": 1.0033, "step": 13202 }, { "epoch": 0.5165897175052822, "grad_norm": 0.0, "learning_rate": 9.948677365211558e-06, "loss": 1.0122, "step": 13203 }, { "epoch": 0.5166288441975115, "grad_norm": 0.0, "learning_rate": 9.94741015120312e-06, "loss": 1.0294, "step": 13204 }, { "epoch": 0.516667970889741, "grad_norm": 0.0, "learning_rate": 9.946142938039205e-06, "loss": 0.9471, "step": 13205 }, { "epoch": 0.5167070975819704, "grad_norm": 0.0, "learning_rate": 9.944875725740169e-06, "loss": 1.0115, "step": 13206 }, { "epoch": 0.5167462242741998, "grad_norm": 0.0, "learning_rate": 9.943608514326362e-06, "loss": 1.0214, "step": 13207 }, { "epoch": 0.5167853509664293, "grad_norm": 0.0, "learning_rate": 9.942341303818128e-06, "loss": 1.0394, "step": 13208 }, { "epoch": 0.5168244776586587, "grad_norm": 0.0, "learning_rate": 9.941074094235823e-06, "loss": 1.1495, "step": 13209 }, { "epoch": 0.5168636043508882, "grad_norm": 0.0, "learning_rate": 9.939806885599795e-06, "loss": 0.9718, "step": 13210 }, { "epoch": 0.5169027310431176, "grad_norm": 0.0, "learning_rate": 9.938539677930388e-06, "loss": 0.9319, "step": 13211 }, { "epoch": 0.5169418577353471, "grad_norm": 0.0, "learning_rate": 9.93727247124796e-06, "loss": 1.0192, "step": 13212 }, { "epoch": 0.5169809844275765, "grad_norm": 0.0, "learning_rate": 9.936005265572857e-06, "loss": 0.9645, "step": 13213 }, { "epoch": 0.517020111119806, "grad_norm": 0.0, "learning_rate": 9.93473806092543e-06, "loss": 1.0765, "step": 13214 }, { "epoch": 0.5170592378120353, "grad_norm": 0.0, "learning_rate": 9.93347085732603e-06, "loss": 0.9858, "step": 13215 }, { "epoch": 0.5170983645042648, "grad_norm": 0.0, "learning_rate": 9.932203654795001e-06, "loss": 0.9689, "step": 13216 }, { "epoch": 0.5171374911964942, "grad_norm": 0.0, "learning_rate": 9.930936453352696e-06, "loss": 1.0931, "step": 13217 }, { "epoch": 0.5171766178887237, "grad_norm": 0.0, "learning_rate": 9.929669253019468e-06, "loss": 1.079, "step": 13218 }, { "epoch": 0.5172157445809531, "grad_norm": 0.0, "learning_rate": 9.928402053815664e-06, "loss": 1.0094, "step": 13219 }, { "epoch": 0.5172548712731826, "grad_norm": 0.0, "learning_rate": 9.92713485576163e-06, "loss": 0.8955, "step": 13220 }, { "epoch": 0.517293997965412, "grad_norm": 0.0, "learning_rate": 9.925867658877718e-06, "loss": 1.145, "step": 13221 }, { "epoch": 0.5173331246576415, "grad_norm": 0.0, "learning_rate": 9.924600463184282e-06, "loss": 1.0539, "step": 13222 }, { "epoch": 0.5173722513498709, "grad_norm": 0.0, "learning_rate": 9.923333268701669e-06, "loss": 1.0844, "step": 13223 }, { "epoch": 0.5174113780421004, "grad_norm": 0.0, "learning_rate": 9.922066075450227e-06, "loss": 0.948, "step": 13224 }, { "epoch": 0.5174505047343297, "grad_norm": 0.0, "learning_rate": 9.9207988834503e-06, "loss": 0.9095, "step": 13225 }, { "epoch": 0.5174896314265592, "grad_norm": 0.0, "learning_rate": 9.91953169272225e-06, "loss": 1.1169, "step": 13226 }, { "epoch": 0.5175287581187886, "grad_norm": 0.0, "learning_rate": 9.91826450328642e-06, "loss": 0.9915, "step": 13227 }, { "epoch": 0.5175678848110181, "grad_norm": 0.0, "learning_rate": 9.916997315163159e-06, "loss": 0.904, "step": 13228 }, { "epoch": 0.5176070115032475, "grad_norm": 0.0, "learning_rate": 9.915730128372816e-06, "loss": 1.0152, "step": 13229 }, { "epoch": 0.517646138195477, "grad_norm": 0.0, "learning_rate": 9.91446294293574e-06, "loss": 0.9897, "step": 13230 }, { "epoch": 0.5176852648877064, "grad_norm": 0.0, "learning_rate": 9.913195758872284e-06, "loss": 1.0245, "step": 13231 }, { "epoch": 0.5177243915799359, "grad_norm": 0.0, "learning_rate": 9.911928576202798e-06, "loss": 1.0941, "step": 13232 }, { "epoch": 0.5177635182721653, "grad_norm": 0.0, "learning_rate": 9.910661394947626e-06, "loss": 0.9499, "step": 13233 }, { "epoch": 0.5178026449643948, "grad_norm": 0.0, "learning_rate": 9.909394215127118e-06, "loss": 0.9503, "step": 13234 }, { "epoch": 0.5178417716566242, "grad_norm": 0.0, "learning_rate": 9.908127036761628e-06, "loss": 1.0142, "step": 13235 }, { "epoch": 0.5178808983488535, "grad_norm": 0.0, "learning_rate": 9.906859859871503e-06, "loss": 0.9799, "step": 13236 }, { "epoch": 0.517920025041083, "grad_norm": 0.0, "learning_rate": 9.905592684477091e-06, "loss": 1.0934, "step": 13237 }, { "epoch": 0.5179591517333124, "grad_norm": 0.0, "learning_rate": 9.904325510598743e-06, "loss": 1.0683, "step": 13238 }, { "epoch": 0.5179982784255419, "grad_norm": 0.0, "learning_rate": 9.903058338256804e-06, "loss": 0.9305, "step": 13239 }, { "epoch": 0.5180374051177713, "grad_norm": 0.0, "learning_rate": 9.90179116747163e-06, "loss": 0.9244, "step": 13240 }, { "epoch": 0.5180765318100008, "grad_norm": 0.0, "learning_rate": 9.900523998263567e-06, "loss": 0.9643, "step": 13241 }, { "epoch": 0.5181156585022302, "grad_norm": 0.0, "learning_rate": 9.899256830652965e-06, "loss": 0.9984, "step": 13242 }, { "epoch": 0.5181547851944597, "grad_norm": 0.0, "learning_rate": 9.897989664660168e-06, "loss": 1.0249, "step": 13243 }, { "epoch": 0.5181939118866891, "grad_norm": 0.0, "learning_rate": 9.89672250030553e-06, "loss": 0.9832, "step": 13244 }, { "epoch": 0.5182330385789186, "grad_norm": 0.0, "learning_rate": 9.895455337609402e-06, "loss": 0.968, "step": 13245 }, { "epoch": 0.5182721652711479, "grad_norm": 0.0, "learning_rate": 9.894188176592129e-06, "loss": 1.0574, "step": 13246 }, { "epoch": 0.5183112919633774, "grad_norm": 0.0, "learning_rate": 9.892921017274062e-06, "loss": 1.0354, "step": 13247 }, { "epoch": 0.5183504186556068, "grad_norm": 0.0, "learning_rate": 9.891653859675548e-06, "loss": 1.0286, "step": 13248 }, { "epoch": 0.5183895453478363, "grad_norm": 0.0, "learning_rate": 9.890386703816936e-06, "loss": 0.9356, "step": 13249 }, { "epoch": 0.5184286720400657, "grad_norm": 0.0, "learning_rate": 9.889119549718577e-06, "loss": 0.9376, "step": 13250 }, { "epoch": 0.5184677987322952, "grad_norm": 0.0, "learning_rate": 9.887852397400823e-06, "loss": 1.0309, "step": 13251 }, { "epoch": 0.5185069254245246, "grad_norm": 0.0, "learning_rate": 9.886585246884014e-06, "loss": 1.1088, "step": 13252 }, { "epoch": 0.5185460521167541, "grad_norm": 0.0, "learning_rate": 9.885318098188507e-06, "loss": 1.1053, "step": 13253 }, { "epoch": 0.5185851788089835, "grad_norm": 0.0, "learning_rate": 9.884050951334645e-06, "loss": 1.101, "step": 13254 }, { "epoch": 0.518624305501213, "grad_norm": 0.0, "learning_rate": 9.88278380634278e-06, "loss": 1.1295, "step": 13255 }, { "epoch": 0.5186634321934424, "grad_norm": 0.0, "learning_rate": 9.881516663233263e-06, "loss": 1.11, "step": 13256 }, { "epoch": 0.5187025588856718, "grad_norm": 0.0, "learning_rate": 9.880249522026436e-06, "loss": 1.0479, "step": 13257 }, { "epoch": 0.5187416855779012, "grad_norm": 0.0, "learning_rate": 9.878982382742654e-06, "loss": 1.1081, "step": 13258 }, { "epoch": 0.5187808122701307, "grad_norm": 0.0, "learning_rate": 9.877715245402263e-06, "loss": 1.0085, "step": 13259 }, { "epoch": 0.5188199389623601, "grad_norm": 0.0, "learning_rate": 9.876448110025615e-06, "loss": 1.0284, "step": 13260 }, { "epoch": 0.5188590656545896, "grad_norm": 0.0, "learning_rate": 9.875180976633047e-06, "loss": 1.0051, "step": 13261 }, { "epoch": 0.518898192346819, "grad_norm": 0.0, "learning_rate": 9.873913845244923e-06, "loss": 1.0608, "step": 13262 }, { "epoch": 0.5189373190390484, "grad_norm": 0.0, "learning_rate": 9.872646715881585e-06, "loss": 0.8269, "step": 13263 }, { "epoch": 0.5189764457312779, "grad_norm": 0.0, "learning_rate": 9.871379588563379e-06, "loss": 1.0156, "step": 13264 }, { "epoch": 0.5190155724235073, "grad_norm": 0.0, "learning_rate": 9.870112463310656e-06, "loss": 1.1178, "step": 13265 }, { "epoch": 0.5190546991157368, "grad_norm": 0.0, "learning_rate": 9.868845340143762e-06, "loss": 0.9594, "step": 13266 }, { "epoch": 0.5190938258079661, "grad_norm": 0.0, "learning_rate": 9.867578219083049e-06, "loss": 0.9404, "step": 13267 }, { "epoch": 0.5191329525001956, "grad_norm": 0.0, "learning_rate": 9.866311100148865e-06, "loss": 1.1342, "step": 13268 }, { "epoch": 0.519172079192425, "grad_norm": 0.0, "learning_rate": 9.865043983361557e-06, "loss": 1.1282, "step": 13269 }, { "epoch": 0.5192112058846545, "grad_norm": 0.0, "learning_rate": 9.86377686874147e-06, "loss": 1.0995, "step": 13270 }, { "epoch": 0.5192503325768839, "grad_norm": 0.0, "learning_rate": 9.86250975630896e-06, "loss": 1.0315, "step": 13271 }, { "epoch": 0.5192894592691134, "grad_norm": 0.0, "learning_rate": 9.86124264608437e-06, "loss": 1.0271, "step": 13272 }, { "epoch": 0.5193285859613428, "grad_norm": 0.0, "learning_rate": 9.85997553808805e-06, "loss": 0.9989, "step": 13273 }, { "epoch": 0.5193677126535723, "grad_norm": 0.0, "learning_rate": 9.858708432340347e-06, "loss": 1.1517, "step": 13274 }, { "epoch": 0.5194068393458017, "grad_norm": 0.0, "learning_rate": 9.857441328861606e-06, "loss": 1.0674, "step": 13275 }, { "epoch": 0.5194459660380312, "grad_norm": 0.0, "learning_rate": 9.856174227672183e-06, "loss": 1.0123, "step": 13276 }, { "epoch": 0.5194850927302606, "grad_norm": 0.0, "learning_rate": 9.85490712879242e-06, "loss": 1.035, "step": 13277 }, { "epoch": 0.51952421942249, "grad_norm": 0.0, "learning_rate": 9.853640032242668e-06, "loss": 1.0551, "step": 13278 }, { "epoch": 0.5195633461147194, "grad_norm": 0.0, "learning_rate": 9.85237293804327e-06, "loss": 1.0, "step": 13279 }, { "epoch": 0.5196024728069489, "grad_norm": 0.0, "learning_rate": 9.85110584621458e-06, "loss": 1.0133, "step": 13280 }, { "epoch": 0.5196415994991783, "grad_norm": 0.0, "learning_rate": 9.849838756776946e-06, "loss": 1.0622, "step": 13281 }, { "epoch": 0.5196807261914078, "grad_norm": 0.0, "learning_rate": 9.848571669750711e-06, "loss": 0.9855, "step": 13282 }, { "epoch": 0.5197198528836372, "grad_norm": 0.0, "learning_rate": 9.847304585156222e-06, "loss": 1.0373, "step": 13283 }, { "epoch": 0.5197589795758667, "grad_norm": 0.0, "learning_rate": 9.846037503013834e-06, "loss": 1.0197, "step": 13284 }, { "epoch": 0.5197981062680961, "grad_norm": 0.0, "learning_rate": 9.844770423343893e-06, "loss": 1.0547, "step": 13285 }, { "epoch": 0.5198372329603256, "grad_norm": 0.0, "learning_rate": 9.843503346166741e-06, "loss": 1.0652, "step": 13286 }, { "epoch": 0.519876359652555, "grad_norm": 0.0, "learning_rate": 9.84223627150273e-06, "loss": 1.0619, "step": 13287 }, { "epoch": 0.5199154863447845, "grad_norm": 0.0, "learning_rate": 9.840969199372204e-06, "loss": 1.0455, "step": 13288 }, { "epoch": 0.5199546130370138, "grad_norm": 0.0, "learning_rate": 9.839702129795518e-06, "loss": 1.0558, "step": 13289 }, { "epoch": 0.5199937397292433, "grad_norm": 0.0, "learning_rate": 9.838435062793013e-06, "loss": 1.1156, "step": 13290 }, { "epoch": 0.5200328664214727, "grad_norm": 0.0, "learning_rate": 9.83716799838504e-06, "loss": 1.0486, "step": 13291 }, { "epoch": 0.5200719931137021, "grad_norm": 0.0, "learning_rate": 9.835900936591941e-06, "loss": 1.1653, "step": 13292 }, { "epoch": 0.5201111198059316, "grad_norm": 0.0, "learning_rate": 9.83463387743407e-06, "loss": 1.1414, "step": 13293 }, { "epoch": 0.520150246498161, "grad_norm": 0.0, "learning_rate": 9.833366820931771e-06, "loss": 1.0617, "step": 13294 }, { "epoch": 0.5201893731903905, "grad_norm": 0.0, "learning_rate": 9.832099767105393e-06, "loss": 1.1719, "step": 13295 }, { "epoch": 0.5202284998826199, "grad_norm": 0.0, "learning_rate": 9.830832715975283e-06, "loss": 1.0641, "step": 13296 }, { "epoch": 0.5202676265748494, "grad_norm": 0.0, "learning_rate": 9.829565667561786e-06, "loss": 1.1523, "step": 13297 }, { "epoch": 0.5203067532670788, "grad_norm": 0.0, "learning_rate": 9.828298621885252e-06, "loss": 1.047, "step": 13298 }, { "epoch": 0.5203458799593083, "grad_norm": 0.0, "learning_rate": 9.827031578966026e-06, "loss": 1.0435, "step": 13299 }, { "epoch": 0.5203850066515376, "grad_norm": 0.0, "learning_rate": 9.825764538824454e-06, "loss": 0.9037, "step": 13300 }, { "epoch": 0.5204241333437671, "grad_norm": 0.0, "learning_rate": 9.824497501480887e-06, "loss": 1.0692, "step": 13301 }, { "epoch": 0.5204632600359965, "grad_norm": 0.0, "learning_rate": 9.823230466955672e-06, "loss": 0.8978, "step": 13302 }, { "epoch": 0.520502386728226, "grad_norm": 0.0, "learning_rate": 9.821963435269155e-06, "loss": 0.997, "step": 13303 }, { "epoch": 0.5205415134204554, "grad_norm": 0.0, "learning_rate": 9.82069640644168e-06, "loss": 1.0482, "step": 13304 }, { "epoch": 0.5205806401126849, "grad_norm": 0.0, "learning_rate": 9.819429380493597e-06, "loss": 1.0984, "step": 13305 }, { "epoch": 0.5206197668049143, "grad_norm": 0.0, "learning_rate": 9.818162357445247e-06, "loss": 1.06, "step": 13306 }, { "epoch": 0.5206588934971438, "grad_norm": 0.0, "learning_rate": 9.816895337316985e-06, "loss": 0.9708, "step": 13307 }, { "epoch": 0.5206980201893732, "grad_norm": 0.0, "learning_rate": 9.815628320129156e-06, "loss": 1.0683, "step": 13308 }, { "epoch": 0.5207371468816027, "grad_norm": 0.0, "learning_rate": 9.814361305902105e-06, "loss": 1.1952, "step": 13309 }, { "epoch": 0.520776273573832, "grad_norm": 0.0, "learning_rate": 9.813094294656175e-06, "loss": 0.9686, "step": 13310 }, { "epoch": 0.5208154002660615, "grad_norm": 0.0, "learning_rate": 9.811827286411721e-06, "loss": 0.9705, "step": 13311 }, { "epoch": 0.5208545269582909, "grad_norm": 0.0, "learning_rate": 9.810560281189085e-06, "loss": 0.8941, "step": 13312 }, { "epoch": 0.5208936536505204, "grad_norm": 0.0, "learning_rate": 9.809293279008613e-06, "loss": 1.1019, "step": 13313 }, { "epoch": 0.5209327803427498, "grad_norm": 0.0, "learning_rate": 9.808026279890651e-06, "loss": 1.0503, "step": 13314 }, { "epoch": 0.5209719070349793, "grad_norm": 0.0, "learning_rate": 9.806759283855542e-06, "loss": 0.9835, "step": 13315 }, { "epoch": 0.5210110337272087, "grad_norm": 0.0, "learning_rate": 9.805492290923643e-06, "loss": 1.1437, "step": 13316 }, { "epoch": 0.5210501604194382, "grad_norm": 0.0, "learning_rate": 9.804225301115292e-06, "loss": 1.0007, "step": 13317 }, { "epoch": 0.5210892871116676, "grad_norm": 0.0, "learning_rate": 9.802958314450839e-06, "loss": 0.9272, "step": 13318 }, { "epoch": 0.5211284138038971, "grad_norm": 0.0, "learning_rate": 9.801691330950623e-06, "loss": 1.0532, "step": 13319 }, { "epoch": 0.5211675404961265, "grad_norm": 0.0, "learning_rate": 9.800424350635e-06, "loss": 1.1468, "step": 13320 }, { "epoch": 0.5212066671883558, "grad_norm": 0.0, "learning_rate": 9.799157373524313e-06, "loss": 1.1008, "step": 13321 }, { "epoch": 0.5212457938805853, "grad_norm": 0.0, "learning_rate": 9.797890399638907e-06, "loss": 1.0078, "step": 13322 }, { "epoch": 0.5212849205728147, "grad_norm": 0.0, "learning_rate": 9.796623428999126e-06, "loss": 1.084, "step": 13323 }, { "epoch": 0.5213240472650442, "grad_norm": 0.0, "learning_rate": 9.795356461625317e-06, "loss": 1.1194, "step": 13324 }, { "epoch": 0.5213631739572736, "grad_norm": 0.0, "learning_rate": 9.794089497537827e-06, "loss": 1.0184, "step": 13325 }, { "epoch": 0.5214023006495031, "grad_norm": 0.0, "learning_rate": 9.792822536757004e-06, "loss": 0.9861, "step": 13326 }, { "epoch": 0.5214414273417325, "grad_norm": 0.0, "learning_rate": 9.791555579303192e-06, "loss": 1.1956, "step": 13327 }, { "epoch": 0.521480554033962, "grad_norm": 0.0, "learning_rate": 9.79028862519673e-06, "loss": 0.9299, "step": 13328 }, { "epoch": 0.5215196807261914, "grad_norm": 0.0, "learning_rate": 9.789021674457977e-06, "loss": 0.9513, "step": 13329 }, { "epoch": 0.5215588074184209, "grad_norm": 0.0, "learning_rate": 9.787754727107269e-06, "loss": 0.884, "step": 13330 }, { "epoch": 0.5215979341106503, "grad_norm": 0.0, "learning_rate": 9.786487783164953e-06, "loss": 0.9143, "step": 13331 }, { "epoch": 0.5216370608028797, "grad_norm": 0.0, "learning_rate": 9.785220842651378e-06, "loss": 1.1066, "step": 13332 }, { "epoch": 0.5216761874951091, "grad_norm": 0.0, "learning_rate": 9.783953905586883e-06, "loss": 1.0132, "step": 13333 }, { "epoch": 0.5217153141873386, "grad_norm": 0.0, "learning_rate": 9.78268697199182e-06, "loss": 1.0398, "step": 13334 }, { "epoch": 0.521754440879568, "grad_norm": 0.0, "learning_rate": 9.781420041886535e-06, "loss": 1.1384, "step": 13335 }, { "epoch": 0.5217935675717975, "grad_norm": 0.0, "learning_rate": 9.780153115291367e-06, "loss": 0.9434, "step": 13336 }, { "epoch": 0.5218326942640269, "grad_norm": 0.0, "learning_rate": 9.778886192226664e-06, "loss": 1.0681, "step": 13337 }, { "epoch": 0.5218718209562564, "grad_norm": 0.0, "learning_rate": 9.777619272712774e-06, "loss": 1.0912, "step": 13338 }, { "epoch": 0.5219109476484858, "grad_norm": 0.0, "learning_rate": 9.776352356770037e-06, "loss": 1.0007, "step": 13339 }, { "epoch": 0.5219500743407153, "grad_norm": 0.0, "learning_rate": 9.775085444418802e-06, "loss": 1.0829, "step": 13340 }, { "epoch": 0.5219892010329447, "grad_norm": 0.0, "learning_rate": 9.773818535679413e-06, "loss": 1.0835, "step": 13341 }, { "epoch": 0.5220283277251742, "grad_norm": 0.0, "learning_rate": 9.772551630572215e-06, "loss": 1.0306, "step": 13342 }, { "epoch": 0.5220674544174035, "grad_norm": 0.0, "learning_rate": 9.771284729117555e-06, "loss": 0.9677, "step": 13343 }, { "epoch": 0.522106581109633, "grad_norm": 0.0, "learning_rate": 9.770017831335774e-06, "loss": 0.9496, "step": 13344 }, { "epoch": 0.5221457078018624, "grad_norm": 0.0, "learning_rate": 9.768750937247216e-06, "loss": 0.973, "step": 13345 }, { "epoch": 0.5221848344940919, "grad_norm": 0.0, "learning_rate": 9.76748404687223e-06, "loss": 1.0079, "step": 13346 }, { "epoch": 0.5222239611863213, "grad_norm": 0.0, "learning_rate": 9.766217160231159e-06, "loss": 1.1025, "step": 13347 }, { "epoch": 0.5222630878785508, "grad_norm": 0.0, "learning_rate": 9.764950277344349e-06, "loss": 1.0383, "step": 13348 }, { "epoch": 0.5223022145707802, "grad_norm": 0.0, "learning_rate": 9.763683398232143e-06, "loss": 0.9902, "step": 13349 }, { "epoch": 0.5223413412630096, "grad_norm": 0.0, "learning_rate": 9.76241652291488e-06, "loss": 1.0086, "step": 13350 }, { "epoch": 0.5223804679552391, "grad_norm": 0.0, "learning_rate": 9.761149651412915e-06, "loss": 0.9881, "step": 13351 }, { "epoch": 0.5224195946474685, "grad_norm": 0.0, "learning_rate": 9.759882783746588e-06, "loss": 1.1271, "step": 13352 }, { "epoch": 0.522458721339698, "grad_norm": 0.0, "learning_rate": 9.75861591993624e-06, "loss": 0.9572, "step": 13353 }, { "epoch": 0.5224978480319273, "grad_norm": 0.0, "learning_rate": 9.757349060002221e-06, "loss": 1.0349, "step": 13354 }, { "epoch": 0.5225369747241568, "grad_norm": 0.0, "learning_rate": 9.756082203964867e-06, "loss": 1.0276, "step": 13355 }, { "epoch": 0.5225761014163862, "grad_norm": 0.0, "learning_rate": 9.75481535184453e-06, "loss": 0.9853, "step": 13356 }, { "epoch": 0.5226152281086157, "grad_norm": 0.0, "learning_rate": 9.753548503661552e-06, "loss": 1.0629, "step": 13357 }, { "epoch": 0.5226543548008451, "grad_norm": 0.0, "learning_rate": 9.752281659436277e-06, "loss": 1.1473, "step": 13358 }, { "epoch": 0.5226934814930746, "grad_norm": 0.0, "learning_rate": 9.751014819189042e-06, "loss": 1.0525, "step": 13359 }, { "epoch": 0.522732608185304, "grad_norm": 0.0, "learning_rate": 9.749747982940203e-06, "loss": 0.9429, "step": 13360 }, { "epoch": 0.5227717348775335, "grad_norm": 0.0, "learning_rate": 9.748481150710097e-06, "loss": 1.0714, "step": 13361 }, { "epoch": 0.5228108615697629, "grad_norm": 0.0, "learning_rate": 9.747214322519069e-06, "loss": 1.0406, "step": 13362 }, { "epoch": 0.5228499882619924, "grad_norm": 0.0, "learning_rate": 9.745947498387462e-06, "loss": 1.0464, "step": 13363 }, { "epoch": 0.5228891149542217, "grad_norm": 0.0, "learning_rate": 9.744680678335614e-06, "loss": 0.9787, "step": 13364 }, { "epoch": 0.5229282416464512, "grad_norm": 0.0, "learning_rate": 9.74341386238388e-06, "loss": 1.0802, "step": 13365 }, { "epoch": 0.5229673683386806, "grad_norm": 0.0, "learning_rate": 9.7421470505526e-06, "loss": 0.9481, "step": 13366 }, { "epoch": 0.5230064950309101, "grad_norm": 0.0, "learning_rate": 9.740880242862112e-06, "loss": 0.965, "step": 13367 }, { "epoch": 0.5230456217231395, "grad_norm": 0.0, "learning_rate": 9.73961343933276e-06, "loss": 1.0358, "step": 13368 }, { "epoch": 0.523084748415369, "grad_norm": 0.0, "learning_rate": 9.738346639984893e-06, "loss": 1.0222, "step": 13369 }, { "epoch": 0.5231238751075984, "grad_norm": 0.0, "learning_rate": 9.737079844838852e-06, "loss": 1.0225, "step": 13370 }, { "epoch": 0.5231630017998279, "grad_norm": 0.0, "learning_rate": 9.73581305391498e-06, "loss": 1.0321, "step": 13371 }, { "epoch": 0.5232021284920573, "grad_norm": 0.0, "learning_rate": 9.734546267233618e-06, "loss": 1.1486, "step": 13372 }, { "epoch": 0.5232412551842868, "grad_norm": 0.0, "learning_rate": 9.733279484815108e-06, "loss": 0.9899, "step": 13373 }, { "epoch": 0.5232803818765162, "grad_norm": 0.0, "learning_rate": 9.732012706679797e-06, "loss": 1.0353, "step": 13374 }, { "epoch": 0.5233195085687457, "grad_norm": 0.0, "learning_rate": 9.730745932848027e-06, "loss": 1.0204, "step": 13375 }, { "epoch": 0.523358635260975, "grad_norm": 0.0, "learning_rate": 9.72947916334014e-06, "loss": 1.0664, "step": 13376 }, { "epoch": 0.5233977619532044, "grad_norm": 0.0, "learning_rate": 9.728212398176476e-06, "loss": 1.0165, "step": 13377 }, { "epoch": 0.5234368886454339, "grad_norm": 0.0, "learning_rate": 9.726945637377381e-06, "loss": 0.9115, "step": 13378 }, { "epoch": 0.5234760153376633, "grad_norm": 0.0, "learning_rate": 9.7256788809632e-06, "loss": 0.9649, "step": 13379 }, { "epoch": 0.5235151420298928, "grad_norm": 0.0, "learning_rate": 9.72441212895427e-06, "loss": 1.0578, "step": 13380 }, { "epoch": 0.5235542687221222, "grad_norm": 0.0, "learning_rate": 9.723145381370938e-06, "loss": 1.0457, "step": 13381 }, { "epoch": 0.5235933954143517, "grad_norm": 0.0, "learning_rate": 9.721878638233541e-06, "loss": 0.9982, "step": 13382 }, { "epoch": 0.5236325221065811, "grad_norm": 0.0, "learning_rate": 9.720611899562427e-06, "loss": 1.0658, "step": 13383 }, { "epoch": 0.5236716487988106, "grad_norm": 0.0, "learning_rate": 9.719345165377933e-06, "loss": 1.1505, "step": 13384 }, { "epoch": 0.52371077549104, "grad_norm": 0.0, "learning_rate": 9.718078435700408e-06, "loss": 1.0157, "step": 13385 }, { "epoch": 0.5237499021832694, "grad_norm": 0.0, "learning_rate": 9.716811710550186e-06, "loss": 1.1049, "step": 13386 }, { "epoch": 0.5237890288754988, "grad_norm": 0.0, "learning_rate": 9.715544989947616e-06, "loss": 1.0383, "step": 13387 }, { "epoch": 0.5238281555677283, "grad_norm": 0.0, "learning_rate": 9.714278273913038e-06, "loss": 0.9782, "step": 13388 }, { "epoch": 0.5238672822599577, "grad_norm": 0.0, "learning_rate": 9.713011562466789e-06, "loss": 1.0641, "step": 13389 }, { "epoch": 0.5239064089521872, "grad_norm": 0.0, "learning_rate": 9.711744855629218e-06, "loss": 1.0073, "step": 13390 }, { "epoch": 0.5239455356444166, "grad_norm": 0.0, "learning_rate": 9.71047815342066e-06, "loss": 0.969, "step": 13391 }, { "epoch": 0.5239846623366461, "grad_norm": 0.0, "learning_rate": 9.709211455861464e-06, "loss": 0.9543, "step": 13392 }, { "epoch": 0.5240237890288755, "grad_norm": 0.0, "learning_rate": 9.707944762971965e-06, "loss": 0.9366, "step": 13393 }, { "epoch": 0.524062915721105, "grad_norm": 0.0, "learning_rate": 9.70667807477251e-06, "loss": 1.1548, "step": 13394 }, { "epoch": 0.5241020424133344, "grad_norm": 0.0, "learning_rate": 9.705411391283433e-06, "loss": 0.9674, "step": 13395 }, { "epoch": 0.5241411691055639, "grad_norm": 0.0, "learning_rate": 9.704144712525082e-06, "loss": 1.0587, "step": 13396 }, { "epoch": 0.5241802957977932, "grad_norm": 0.0, "learning_rate": 9.702878038517798e-06, "loss": 1.0317, "step": 13397 }, { "epoch": 0.5242194224900227, "grad_norm": 0.0, "learning_rate": 9.70161136928192e-06, "loss": 1.0591, "step": 13398 }, { "epoch": 0.5242585491822521, "grad_norm": 0.0, "learning_rate": 9.700344704837786e-06, "loss": 1.0352, "step": 13399 }, { "epoch": 0.5242976758744816, "grad_norm": 0.0, "learning_rate": 9.699078045205743e-06, "loss": 1.0163, "step": 13400 }, { "epoch": 0.524336802566711, "grad_norm": 0.0, "learning_rate": 9.69781139040613e-06, "loss": 1.0769, "step": 13401 }, { "epoch": 0.5243759292589405, "grad_norm": 0.0, "learning_rate": 9.696544740459289e-06, "loss": 1.0408, "step": 13402 }, { "epoch": 0.5244150559511699, "grad_norm": 0.0, "learning_rate": 9.695278095385558e-06, "loss": 1.0383, "step": 13403 }, { "epoch": 0.5244541826433994, "grad_norm": 0.0, "learning_rate": 9.694011455205273e-06, "loss": 0.9219, "step": 13404 }, { "epoch": 0.5244933093356288, "grad_norm": 0.0, "learning_rate": 9.692744819938787e-06, "loss": 0.9687, "step": 13405 }, { "epoch": 0.5245324360278582, "grad_norm": 0.0, "learning_rate": 9.691478189606433e-06, "loss": 1.0564, "step": 13406 }, { "epoch": 0.5245715627200876, "grad_norm": 0.0, "learning_rate": 9.690211564228554e-06, "loss": 1.0135, "step": 13407 }, { "epoch": 0.524610689412317, "grad_norm": 0.0, "learning_rate": 9.688944943825484e-06, "loss": 0.9753, "step": 13408 }, { "epoch": 0.5246498161045465, "grad_norm": 0.0, "learning_rate": 9.687678328417574e-06, "loss": 1.0159, "step": 13409 }, { "epoch": 0.5246889427967759, "grad_norm": 0.0, "learning_rate": 9.686411718025157e-06, "loss": 0.9781, "step": 13410 }, { "epoch": 0.5247280694890054, "grad_norm": 0.0, "learning_rate": 9.685145112668577e-06, "loss": 0.94, "step": 13411 }, { "epoch": 0.5247671961812348, "grad_norm": 0.0, "learning_rate": 9.68387851236817e-06, "loss": 1.0387, "step": 13412 }, { "epoch": 0.5248063228734643, "grad_norm": 0.0, "learning_rate": 9.682611917144273e-06, "loss": 1.0047, "step": 13413 }, { "epoch": 0.5248454495656937, "grad_norm": 0.0, "learning_rate": 9.681345327017237e-06, "loss": 0.9439, "step": 13414 }, { "epoch": 0.5248845762579232, "grad_norm": 0.0, "learning_rate": 9.680078742007395e-06, "loss": 1.0185, "step": 13415 }, { "epoch": 0.5249237029501526, "grad_norm": 0.0, "learning_rate": 9.678812162135087e-06, "loss": 0.9486, "step": 13416 }, { "epoch": 0.5249628296423821, "grad_norm": 0.0, "learning_rate": 9.67754558742065e-06, "loss": 1.0072, "step": 13417 }, { "epoch": 0.5250019563346114, "grad_norm": 0.0, "learning_rate": 9.676279017884432e-06, "loss": 1.086, "step": 13418 }, { "epoch": 0.5250410830268409, "grad_norm": 0.0, "learning_rate": 9.675012453546766e-06, "loss": 1.0746, "step": 13419 }, { "epoch": 0.5250802097190703, "grad_norm": 0.0, "learning_rate": 9.673745894427991e-06, "loss": 1.0146, "step": 13420 }, { "epoch": 0.5251193364112998, "grad_norm": 0.0, "learning_rate": 9.672479340548451e-06, "loss": 1.0673, "step": 13421 }, { "epoch": 0.5251584631035292, "grad_norm": 0.0, "learning_rate": 9.671212791928476e-06, "loss": 0.9528, "step": 13422 }, { "epoch": 0.5251975897957587, "grad_norm": 0.0, "learning_rate": 9.669946248588418e-06, "loss": 1.0627, "step": 13423 }, { "epoch": 0.5252367164879881, "grad_norm": 0.0, "learning_rate": 9.668679710548608e-06, "loss": 1.0623, "step": 13424 }, { "epoch": 0.5252758431802176, "grad_norm": 0.0, "learning_rate": 9.667413177829387e-06, "loss": 1.0921, "step": 13425 }, { "epoch": 0.525314969872447, "grad_norm": 0.0, "learning_rate": 9.666146650451091e-06, "loss": 1.0302, "step": 13426 }, { "epoch": 0.5253540965646765, "grad_norm": 0.0, "learning_rate": 9.664880128434064e-06, "loss": 1.0195, "step": 13427 }, { "epoch": 0.5253932232569059, "grad_norm": 0.0, "learning_rate": 9.66361361179864e-06, "loss": 1.0056, "step": 13428 }, { "epoch": 0.5254323499491353, "grad_norm": 0.0, "learning_rate": 9.662347100565163e-06, "loss": 1.1539, "step": 13429 }, { "epoch": 0.5254714766413647, "grad_norm": 0.0, "learning_rate": 9.661080594753967e-06, "loss": 1.0822, "step": 13430 }, { "epoch": 0.5255106033335942, "grad_norm": 0.0, "learning_rate": 9.659814094385391e-06, "loss": 1.103, "step": 13431 }, { "epoch": 0.5255497300258236, "grad_norm": 0.0, "learning_rate": 9.658547599479777e-06, "loss": 1.0127, "step": 13432 }, { "epoch": 0.5255888567180531, "grad_norm": 0.0, "learning_rate": 9.657281110057459e-06, "loss": 0.9816, "step": 13433 }, { "epoch": 0.5256279834102825, "grad_norm": 0.0, "learning_rate": 9.656014626138776e-06, "loss": 1.1124, "step": 13434 }, { "epoch": 0.5256671101025119, "grad_norm": 0.0, "learning_rate": 9.654748147744066e-06, "loss": 1.2607, "step": 13435 }, { "epoch": 0.5257062367947414, "grad_norm": 0.0, "learning_rate": 9.653481674893672e-06, "loss": 1.121, "step": 13436 }, { "epoch": 0.5257453634869708, "grad_norm": 0.0, "learning_rate": 9.652215207607926e-06, "loss": 1.0402, "step": 13437 }, { "epoch": 0.5257844901792003, "grad_norm": 0.0, "learning_rate": 9.65094874590717e-06, "loss": 1.0877, "step": 13438 }, { "epoch": 0.5258236168714296, "grad_norm": 0.0, "learning_rate": 9.649682289811738e-06, "loss": 1.0526, "step": 13439 }, { "epoch": 0.5258627435636591, "grad_norm": 0.0, "learning_rate": 9.648415839341966e-06, "loss": 1.0598, "step": 13440 }, { "epoch": 0.5259018702558885, "grad_norm": 0.0, "learning_rate": 9.6471493945182e-06, "loss": 1.0188, "step": 13441 }, { "epoch": 0.525940996948118, "grad_norm": 0.0, "learning_rate": 9.645882955360771e-06, "loss": 1.136, "step": 13442 }, { "epoch": 0.5259801236403474, "grad_norm": 0.0, "learning_rate": 9.644616521890019e-06, "loss": 0.9977, "step": 13443 }, { "epoch": 0.5260192503325769, "grad_norm": 0.0, "learning_rate": 9.643350094126275e-06, "loss": 1.137, "step": 13444 }, { "epoch": 0.5260583770248063, "grad_norm": 0.0, "learning_rate": 9.642083672089887e-06, "loss": 0.9895, "step": 13445 }, { "epoch": 0.5260975037170358, "grad_norm": 0.0, "learning_rate": 9.640817255801188e-06, "loss": 1.0454, "step": 13446 }, { "epoch": 0.5261366304092652, "grad_norm": 0.0, "learning_rate": 9.63955084528051e-06, "loss": 1.1128, "step": 13447 }, { "epoch": 0.5261757571014947, "grad_norm": 0.0, "learning_rate": 9.638284440548197e-06, "loss": 1.0148, "step": 13448 }, { "epoch": 0.526214883793724, "grad_norm": 0.0, "learning_rate": 9.637018041624577e-06, "loss": 0.9518, "step": 13449 }, { "epoch": 0.5262540104859535, "grad_norm": 0.0, "learning_rate": 9.635751648529998e-06, "loss": 0.9245, "step": 13450 }, { "epoch": 0.5262931371781829, "grad_norm": 0.0, "learning_rate": 9.63448526128479e-06, "loss": 0.9378, "step": 13451 }, { "epoch": 0.5263322638704124, "grad_norm": 0.0, "learning_rate": 9.633218879909291e-06, "loss": 0.8673, "step": 13452 }, { "epoch": 0.5263713905626418, "grad_norm": 0.0, "learning_rate": 9.631952504423832e-06, "loss": 1.0368, "step": 13453 }, { "epoch": 0.5264105172548713, "grad_norm": 0.0, "learning_rate": 9.630686134848759e-06, "loss": 1.0889, "step": 13454 }, { "epoch": 0.5264496439471007, "grad_norm": 0.0, "learning_rate": 9.629419771204405e-06, "loss": 1.0806, "step": 13455 }, { "epoch": 0.5264887706393302, "grad_norm": 0.0, "learning_rate": 9.628153413511104e-06, "loss": 1.0566, "step": 13456 }, { "epoch": 0.5265278973315596, "grad_norm": 0.0, "learning_rate": 9.62688706178919e-06, "loss": 1.0268, "step": 13457 }, { "epoch": 0.5265670240237891, "grad_norm": 0.0, "learning_rate": 9.625620716059007e-06, "loss": 0.9509, "step": 13458 }, { "epoch": 0.5266061507160185, "grad_norm": 0.0, "learning_rate": 9.624354376340884e-06, "loss": 1.103, "step": 13459 }, { "epoch": 0.526645277408248, "grad_norm": 0.0, "learning_rate": 9.623088042655161e-06, "loss": 1.0038, "step": 13460 }, { "epoch": 0.5266844041004773, "grad_norm": 0.0, "learning_rate": 9.62182171502217e-06, "loss": 1.0791, "step": 13461 }, { "epoch": 0.5267235307927068, "grad_norm": 0.0, "learning_rate": 9.620555393462245e-06, "loss": 1.1892, "step": 13462 }, { "epoch": 0.5267626574849362, "grad_norm": 0.0, "learning_rate": 9.61928907799573e-06, "loss": 0.9563, "step": 13463 }, { "epoch": 0.5268017841771656, "grad_norm": 0.0, "learning_rate": 9.618022768642955e-06, "loss": 1.0838, "step": 13464 }, { "epoch": 0.5268409108693951, "grad_norm": 0.0, "learning_rate": 9.616756465424256e-06, "loss": 1.1167, "step": 13465 }, { "epoch": 0.5268800375616245, "grad_norm": 0.0, "learning_rate": 9.615490168359964e-06, "loss": 1.1088, "step": 13466 }, { "epoch": 0.526919164253854, "grad_norm": 0.0, "learning_rate": 9.614223877470419e-06, "loss": 1.0851, "step": 13467 }, { "epoch": 0.5269582909460834, "grad_norm": 0.0, "learning_rate": 9.612957592775957e-06, "loss": 1.1522, "step": 13468 }, { "epoch": 0.5269974176383129, "grad_norm": 0.0, "learning_rate": 9.611691314296913e-06, "loss": 1.0685, "step": 13469 }, { "epoch": 0.5270365443305423, "grad_norm": 0.0, "learning_rate": 9.610425042053618e-06, "loss": 1.1625, "step": 13470 }, { "epoch": 0.5270756710227718, "grad_norm": 0.0, "learning_rate": 9.609158776066405e-06, "loss": 0.9156, "step": 13471 }, { "epoch": 0.5271147977150011, "grad_norm": 0.0, "learning_rate": 9.607892516355618e-06, "loss": 1.1102, "step": 13472 }, { "epoch": 0.5271539244072306, "grad_norm": 0.0, "learning_rate": 9.606626262941582e-06, "loss": 0.9443, "step": 13473 }, { "epoch": 0.52719305109946, "grad_norm": 0.0, "learning_rate": 9.605360015844637e-06, "loss": 1.0542, "step": 13474 }, { "epoch": 0.5272321777916895, "grad_norm": 0.0, "learning_rate": 9.604093775085114e-06, "loss": 1.0313, "step": 13475 }, { "epoch": 0.5272713044839189, "grad_norm": 0.0, "learning_rate": 9.60282754068335e-06, "loss": 0.9282, "step": 13476 }, { "epoch": 0.5273104311761484, "grad_norm": 0.0, "learning_rate": 9.601561312659681e-06, "loss": 0.8982, "step": 13477 }, { "epoch": 0.5273495578683778, "grad_norm": 0.0, "learning_rate": 9.600295091034436e-06, "loss": 1.0988, "step": 13478 }, { "epoch": 0.5273886845606073, "grad_norm": 0.0, "learning_rate": 9.59902887582795e-06, "loss": 1.0674, "step": 13479 }, { "epoch": 0.5274278112528367, "grad_norm": 0.0, "learning_rate": 9.597762667060556e-06, "loss": 0.9875, "step": 13480 }, { "epoch": 0.5274669379450662, "grad_norm": 0.0, "learning_rate": 9.596496464752593e-06, "loss": 1.0382, "step": 13481 }, { "epoch": 0.5275060646372955, "grad_norm": 0.0, "learning_rate": 9.59523026892439e-06, "loss": 1.0419, "step": 13482 }, { "epoch": 0.527545191329525, "grad_norm": 0.0, "learning_rate": 9.593964079596282e-06, "loss": 1.0866, "step": 13483 }, { "epoch": 0.5275843180217544, "grad_norm": 0.0, "learning_rate": 9.592697896788598e-06, "loss": 0.9091, "step": 13484 }, { "epoch": 0.5276234447139839, "grad_norm": 0.0, "learning_rate": 9.591431720521681e-06, "loss": 0.9369, "step": 13485 }, { "epoch": 0.5276625714062133, "grad_norm": 0.0, "learning_rate": 9.590165550815857e-06, "loss": 1.1633, "step": 13486 }, { "epoch": 0.5277016980984428, "grad_norm": 0.0, "learning_rate": 9.58889938769146e-06, "loss": 1.0678, "step": 13487 }, { "epoch": 0.5277408247906722, "grad_norm": 0.0, "learning_rate": 9.587633231168825e-06, "loss": 1.0544, "step": 13488 }, { "epoch": 0.5277799514829017, "grad_norm": 0.0, "learning_rate": 9.58636708126828e-06, "loss": 0.946, "step": 13489 }, { "epoch": 0.5278190781751311, "grad_norm": 0.0, "learning_rate": 9.585100938010163e-06, "loss": 1.0435, "step": 13490 }, { "epoch": 0.5278582048673605, "grad_norm": 0.0, "learning_rate": 9.583834801414808e-06, "loss": 0.9287, "step": 13491 }, { "epoch": 0.52789733155959, "grad_norm": 0.0, "learning_rate": 9.582568671502543e-06, "loss": 0.9665, "step": 13492 }, { "epoch": 0.5279364582518193, "grad_norm": 0.0, "learning_rate": 9.581302548293698e-06, "loss": 1.1407, "step": 13493 }, { "epoch": 0.5279755849440488, "grad_norm": 0.0, "learning_rate": 9.580036431808614e-06, "loss": 1.0204, "step": 13494 }, { "epoch": 0.5280147116362782, "grad_norm": 0.0, "learning_rate": 9.578770322067619e-06, "loss": 1.008, "step": 13495 }, { "epoch": 0.5280538383285077, "grad_norm": 0.0, "learning_rate": 9.577504219091044e-06, "loss": 1.0522, "step": 13496 }, { "epoch": 0.5280929650207371, "grad_norm": 0.0, "learning_rate": 9.576238122899221e-06, "loss": 1.1509, "step": 13497 }, { "epoch": 0.5281320917129666, "grad_norm": 0.0, "learning_rate": 9.574972033512482e-06, "loss": 0.9294, "step": 13498 }, { "epoch": 0.528171218405196, "grad_norm": 0.0, "learning_rate": 9.57370595095116e-06, "loss": 0.9634, "step": 13499 }, { "epoch": 0.5282103450974255, "grad_norm": 0.0, "learning_rate": 9.572439875235587e-06, "loss": 1.0211, "step": 13500 }, { "epoch": 0.5282494717896549, "grad_norm": 0.0, "learning_rate": 9.571173806386095e-06, "loss": 1.0934, "step": 13501 }, { "epoch": 0.5282885984818844, "grad_norm": 0.0, "learning_rate": 9.569907744423009e-06, "loss": 0.9563, "step": 13502 }, { "epoch": 0.5283277251741137, "grad_norm": 0.0, "learning_rate": 9.56864168936667e-06, "loss": 0.9789, "step": 13503 }, { "epoch": 0.5283668518663432, "grad_norm": 0.0, "learning_rate": 9.567375641237407e-06, "loss": 1.1208, "step": 13504 }, { "epoch": 0.5284059785585726, "grad_norm": 0.0, "learning_rate": 9.566109600055547e-06, "loss": 1.1661, "step": 13505 }, { "epoch": 0.5284451052508021, "grad_norm": 0.0, "learning_rate": 9.564843565841424e-06, "loss": 0.9811, "step": 13506 }, { "epoch": 0.5284842319430315, "grad_norm": 0.0, "learning_rate": 9.563577538615363e-06, "loss": 1.0604, "step": 13507 }, { "epoch": 0.528523358635261, "grad_norm": 0.0, "learning_rate": 9.562311518397704e-06, "loss": 1.2902, "step": 13508 }, { "epoch": 0.5285624853274904, "grad_norm": 0.0, "learning_rate": 9.561045505208775e-06, "loss": 1.1618, "step": 13509 }, { "epoch": 0.5286016120197199, "grad_norm": 0.0, "learning_rate": 9.559779499068904e-06, "loss": 0.8938, "step": 13510 }, { "epoch": 0.5286407387119493, "grad_norm": 0.0, "learning_rate": 9.558513499998421e-06, "loss": 1.1041, "step": 13511 }, { "epoch": 0.5286798654041788, "grad_norm": 0.0, "learning_rate": 9.557247508017657e-06, "loss": 1.0024, "step": 13512 }, { "epoch": 0.5287189920964082, "grad_norm": 0.0, "learning_rate": 9.555981523146946e-06, "loss": 0.9979, "step": 13513 }, { "epoch": 0.5287581187886377, "grad_norm": 0.0, "learning_rate": 9.554715545406617e-06, "loss": 1.0248, "step": 13514 }, { "epoch": 0.528797245480867, "grad_norm": 0.0, "learning_rate": 9.553449574816995e-06, "loss": 0.9194, "step": 13515 }, { "epoch": 0.5288363721730965, "grad_norm": 0.0, "learning_rate": 9.552183611398415e-06, "loss": 1.0566, "step": 13516 }, { "epoch": 0.5288754988653259, "grad_norm": 0.0, "learning_rate": 9.550917655171205e-06, "loss": 0.9846, "step": 13517 }, { "epoch": 0.5289146255575554, "grad_norm": 0.0, "learning_rate": 9.549651706155692e-06, "loss": 1.0242, "step": 13518 }, { "epoch": 0.5289537522497848, "grad_norm": 0.0, "learning_rate": 9.54838576437221e-06, "loss": 0.8621, "step": 13519 }, { "epoch": 0.5289928789420142, "grad_norm": 0.0, "learning_rate": 9.547119829841088e-06, "loss": 1.075, "step": 13520 }, { "epoch": 0.5290320056342437, "grad_norm": 0.0, "learning_rate": 9.545853902582653e-06, "loss": 0.9848, "step": 13521 }, { "epoch": 0.5290711323264731, "grad_norm": 0.0, "learning_rate": 9.544587982617236e-06, "loss": 0.9596, "step": 13522 }, { "epoch": 0.5291102590187026, "grad_norm": 0.0, "learning_rate": 9.543322069965163e-06, "loss": 1.0768, "step": 13523 }, { "epoch": 0.529149385710932, "grad_norm": 0.0, "learning_rate": 9.542056164646765e-06, "loss": 1.0572, "step": 13524 }, { "epoch": 0.5291885124031614, "grad_norm": 0.0, "learning_rate": 9.540790266682375e-06, "loss": 1.0775, "step": 13525 }, { "epoch": 0.5292276390953908, "grad_norm": 0.0, "learning_rate": 9.539524376092317e-06, "loss": 1.0075, "step": 13526 }, { "epoch": 0.5292667657876203, "grad_norm": 0.0, "learning_rate": 9.53825849289692e-06, "loss": 1.1107, "step": 13527 }, { "epoch": 0.5293058924798497, "grad_norm": 0.0, "learning_rate": 9.536992617116515e-06, "loss": 0.9786, "step": 13528 }, { "epoch": 0.5293450191720792, "grad_norm": 0.0, "learning_rate": 9.535726748771422e-06, "loss": 1.1302, "step": 13529 }, { "epoch": 0.5293841458643086, "grad_norm": 0.0, "learning_rate": 9.53446088788198e-06, "loss": 1.0727, "step": 13530 }, { "epoch": 0.5294232725565381, "grad_norm": 0.0, "learning_rate": 9.533195034468513e-06, "loss": 0.8994, "step": 13531 }, { "epoch": 0.5294623992487675, "grad_norm": 0.0, "learning_rate": 9.531929188551349e-06, "loss": 0.9664, "step": 13532 }, { "epoch": 0.529501525940997, "grad_norm": 0.0, "learning_rate": 9.530663350150812e-06, "loss": 1.04, "step": 13533 }, { "epoch": 0.5295406526332264, "grad_norm": 0.0, "learning_rate": 9.529397519287237e-06, "loss": 0.9979, "step": 13534 }, { "epoch": 0.5295797793254559, "grad_norm": 0.0, "learning_rate": 9.528131695980948e-06, "loss": 1.0309, "step": 13535 }, { "epoch": 0.5296189060176852, "grad_norm": 0.0, "learning_rate": 9.526865880252273e-06, "loss": 1.1, "step": 13536 }, { "epoch": 0.5296580327099147, "grad_norm": 0.0, "learning_rate": 9.52560007212154e-06, "loss": 1.1292, "step": 13537 }, { "epoch": 0.5296971594021441, "grad_norm": 0.0, "learning_rate": 9.524334271609069e-06, "loss": 1.1414, "step": 13538 }, { "epoch": 0.5297362860943736, "grad_norm": 0.0, "learning_rate": 9.5230684787352e-06, "loss": 1.1035, "step": 13539 }, { "epoch": 0.529775412786603, "grad_norm": 0.0, "learning_rate": 9.521802693520253e-06, "loss": 0.996, "step": 13540 }, { "epoch": 0.5298145394788325, "grad_norm": 0.0, "learning_rate": 9.520536915984555e-06, "loss": 1.029, "step": 13541 }, { "epoch": 0.5298536661710619, "grad_norm": 0.0, "learning_rate": 9.51927114614843e-06, "loss": 1.0722, "step": 13542 }, { "epoch": 0.5298927928632914, "grad_norm": 0.0, "learning_rate": 9.51800538403221e-06, "loss": 1.0411, "step": 13543 }, { "epoch": 0.5299319195555208, "grad_norm": 0.0, "learning_rate": 9.51673962965622e-06, "loss": 0.9385, "step": 13544 }, { "epoch": 0.5299710462477503, "grad_norm": 0.0, "learning_rate": 9.515473883040789e-06, "loss": 1.0413, "step": 13545 }, { "epoch": 0.5300101729399797, "grad_norm": 0.0, "learning_rate": 9.514208144206237e-06, "loss": 0.9799, "step": 13546 }, { "epoch": 0.5300492996322091, "grad_norm": 0.0, "learning_rate": 9.512942413172892e-06, "loss": 1.0057, "step": 13547 }, { "epoch": 0.5300884263244385, "grad_norm": 0.0, "learning_rate": 9.511676689961084e-06, "loss": 1.0799, "step": 13548 }, { "epoch": 0.5301275530166679, "grad_norm": 0.0, "learning_rate": 9.510410974591137e-06, "loss": 1.0667, "step": 13549 }, { "epoch": 0.5301666797088974, "grad_norm": 0.0, "learning_rate": 9.509145267083374e-06, "loss": 1.0775, "step": 13550 }, { "epoch": 0.5302058064011268, "grad_norm": 0.0, "learning_rate": 9.507879567458122e-06, "loss": 1.0273, "step": 13551 }, { "epoch": 0.5302449330933563, "grad_norm": 0.0, "learning_rate": 9.506613875735711e-06, "loss": 1.0048, "step": 13552 }, { "epoch": 0.5302840597855857, "grad_norm": 0.0, "learning_rate": 9.505348191936461e-06, "loss": 1.0757, "step": 13553 }, { "epoch": 0.5303231864778152, "grad_norm": 0.0, "learning_rate": 9.504082516080702e-06, "loss": 0.9892, "step": 13554 }, { "epoch": 0.5303623131700446, "grad_norm": 0.0, "learning_rate": 9.502816848188755e-06, "loss": 1.1455, "step": 13555 }, { "epoch": 0.5304014398622741, "grad_norm": 0.0, "learning_rate": 9.501551188280942e-06, "loss": 0.9993, "step": 13556 }, { "epoch": 0.5304405665545034, "grad_norm": 0.0, "learning_rate": 9.500285536377597e-06, "loss": 1.1294, "step": 13557 }, { "epoch": 0.5304796932467329, "grad_norm": 0.0, "learning_rate": 9.49901989249904e-06, "loss": 0.9031, "step": 13558 }, { "epoch": 0.5305188199389623, "grad_norm": 0.0, "learning_rate": 9.497754256665596e-06, "loss": 1.0398, "step": 13559 }, { "epoch": 0.5305579466311918, "grad_norm": 0.0, "learning_rate": 9.496488628897586e-06, "loss": 0.9794, "step": 13560 }, { "epoch": 0.5305970733234212, "grad_norm": 0.0, "learning_rate": 9.49522300921534e-06, "loss": 1.1222, "step": 13561 }, { "epoch": 0.5306362000156507, "grad_norm": 0.0, "learning_rate": 9.493957397639178e-06, "loss": 1.0179, "step": 13562 }, { "epoch": 0.5306753267078801, "grad_norm": 0.0, "learning_rate": 9.49269179418943e-06, "loss": 1.1594, "step": 13563 }, { "epoch": 0.5307144534001096, "grad_norm": 0.0, "learning_rate": 9.491426198886414e-06, "loss": 1.0235, "step": 13564 }, { "epoch": 0.530753580092339, "grad_norm": 0.0, "learning_rate": 9.490160611750456e-06, "loss": 1.0468, "step": 13565 }, { "epoch": 0.5307927067845685, "grad_norm": 0.0, "learning_rate": 9.488895032801879e-06, "loss": 1.1302, "step": 13566 }, { "epoch": 0.5308318334767979, "grad_norm": 0.0, "learning_rate": 9.48762946206101e-06, "loss": 1.0311, "step": 13567 }, { "epoch": 0.5308709601690274, "grad_norm": 0.0, "learning_rate": 9.486363899548165e-06, "loss": 1.0802, "step": 13568 }, { "epoch": 0.5309100868612567, "grad_norm": 0.0, "learning_rate": 9.485098345283675e-06, "loss": 0.9382, "step": 13569 }, { "epoch": 0.5309492135534862, "grad_norm": 0.0, "learning_rate": 9.48383279928786e-06, "loss": 1.1183, "step": 13570 }, { "epoch": 0.5309883402457156, "grad_norm": 0.0, "learning_rate": 9.482567261581044e-06, "loss": 1.0244, "step": 13571 }, { "epoch": 0.5310274669379451, "grad_norm": 0.0, "learning_rate": 9.48130173218355e-06, "loss": 1.093, "step": 13572 }, { "epoch": 0.5310665936301745, "grad_norm": 0.0, "learning_rate": 9.480036211115697e-06, "loss": 1.1202, "step": 13573 }, { "epoch": 0.531105720322404, "grad_norm": 0.0, "learning_rate": 9.478770698397814e-06, "loss": 0.9898, "step": 13574 }, { "epoch": 0.5311448470146334, "grad_norm": 0.0, "learning_rate": 9.47750519405022e-06, "loss": 0.9807, "step": 13575 }, { "epoch": 0.5311839737068628, "grad_norm": 0.0, "learning_rate": 9.476239698093238e-06, "loss": 1.1049, "step": 13576 }, { "epoch": 0.5312231003990923, "grad_norm": 0.0, "learning_rate": 9.47497421054719e-06, "loss": 1.0919, "step": 13577 }, { "epoch": 0.5312622270913216, "grad_norm": 0.0, "learning_rate": 9.473708731432395e-06, "loss": 0.9095, "step": 13578 }, { "epoch": 0.5313013537835511, "grad_norm": 0.0, "learning_rate": 9.472443260769181e-06, "loss": 1.009, "step": 13579 }, { "epoch": 0.5313404804757805, "grad_norm": 0.0, "learning_rate": 9.471177798577869e-06, "loss": 1.1621, "step": 13580 }, { "epoch": 0.53137960716801, "grad_norm": 0.0, "learning_rate": 9.469912344878779e-06, "loss": 1.0852, "step": 13581 }, { "epoch": 0.5314187338602394, "grad_norm": 0.0, "learning_rate": 9.468646899692227e-06, "loss": 1.1156, "step": 13582 }, { "epoch": 0.5314578605524689, "grad_norm": 0.0, "learning_rate": 9.467381463038545e-06, "loss": 1.0002, "step": 13583 }, { "epoch": 0.5314969872446983, "grad_norm": 0.0, "learning_rate": 9.466116034938047e-06, "loss": 0.9239, "step": 13584 }, { "epoch": 0.5315361139369278, "grad_norm": 0.0, "learning_rate": 9.464850615411059e-06, "loss": 0.9817, "step": 13585 }, { "epoch": 0.5315752406291572, "grad_norm": 0.0, "learning_rate": 9.463585204477898e-06, "loss": 0.9485, "step": 13586 }, { "epoch": 0.5316143673213867, "grad_norm": 0.0, "learning_rate": 9.462319802158884e-06, "loss": 1.1289, "step": 13587 }, { "epoch": 0.5316534940136161, "grad_norm": 0.0, "learning_rate": 9.461054408474343e-06, "loss": 0.9135, "step": 13588 }, { "epoch": 0.5316926207058456, "grad_norm": 0.0, "learning_rate": 9.459789023444595e-06, "loss": 1.1118, "step": 13589 }, { "epoch": 0.5317317473980749, "grad_norm": 0.0, "learning_rate": 9.458523647089955e-06, "loss": 1.0647, "step": 13590 }, { "epoch": 0.5317708740903044, "grad_norm": 0.0, "learning_rate": 9.457258279430745e-06, "loss": 0.9483, "step": 13591 }, { "epoch": 0.5318100007825338, "grad_norm": 0.0, "learning_rate": 9.45599292048729e-06, "loss": 1.0948, "step": 13592 }, { "epoch": 0.5318491274747633, "grad_norm": 0.0, "learning_rate": 9.454727570279907e-06, "loss": 1.1513, "step": 13593 }, { "epoch": 0.5318882541669927, "grad_norm": 0.0, "learning_rate": 9.453462228828917e-06, "loss": 0.8948, "step": 13594 }, { "epoch": 0.5319273808592222, "grad_norm": 0.0, "learning_rate": 9.452196896154639e-06, "loss": 1.0361, "step": 13595 }, { "epoch": 0.5319665075514516, "grad_norm": 0.0, "learning_rate": 9.450931572277387e-06, "loss": 1.1232, "step": 13596 }, { "epoch": 0.5320056342436811, "grad_norm": 0.0, "learning_rate": 9.44966625721749e-06, "loss": 0.9742, "step": 13597 }, { "epoch": 0.5320447609359105, "grad_norm": 0.0, "learning_rate": 9.448400950995265e-06, "loss": 1.0248, "step": 13598 }, { "epoch": 0.53208388762814, "grad_norm": 0.0, "learning_rate": 9.447135653631028e-06, "loss": 1.0617, "step": 13599 }, { "epoch": 0.5321230143203693, "grad_norm": 0.0, "learning_rate": 9.445870365145097e-06, "loss": 1.0415, "step": 13600 }, { "epoch": 0.5321621410125988, "grad_norm": 0.0, "learning_rate": 9.444605085557795e-06, "loss": 1.0541, "step": 13601 }, { "epoch": 0.5322012677048282, "grad_norm": 0.0, "learning_rate": 9.443339814889441e-06, "loss": 1.1744, "step": 13602 }, { "epoch": 0.5322403943970577, "grad_norm": 0.0, "learning_rate": 9.442074553160353e-06, "loss": 1.1078, "step": 13603 }, { "epoch": 0.5322795210892871, "grad_norm": 0.0, "learning_rate": 9.440809300390847e-06, "loss": 1.1352, "step": 13604 }, { "epoch": 0.5323186477815165, "grad_norm": 0.0, "learning_rate": 9.43954405660124e-06, "loss": 1.022, "step": 13605 }, { "epoch": 0.532357774473746, "grad_norm": 0.0, "learning_rate": 9.438278821811857e-06, "loss": 1.0635, "step": 13606 }, { "epoch": 0.5323969011659754, "grad_norm": 0.0, "learning_rate": 9.43701359604301e-06, "loss": 0.9712, "step": 13607 }, { "epoch": 0.5324360278582049, "grad_norm": 0.0, "learning_rate": 9.435748379315021e-06, "loss": 1.0207, "step": 13608 }, { "epoch": 0.5324751545504343, "grad_norm": 0.0, "learning_rate": 9.434483171648204e-06, "loss": 1.0606, "step": 13609 }, { "epoch": 0.5325142812426638, "grad_norm": 0.0, "learning_rate": 9.43321797306288e-06, "loss": 1.0169, "step": 13610 }, { "epoch": 0.5325534079348931, "grad_norm": 0.0, "learning_rate": 9.431952783579365e-06, "loss": 1.0511, "step": 13611 }, { "epoch": 0.5325925346271226, "grad_norm": 0.0, "learning_rate": 9.430687603217978e-06, "loss": 1.0502, "step": 13612 }, { "epoch": 0.532631661319352, "grad_norm": 0.0, "learning_rate": 9.429422431999033e-06, "loss": 1.0103, "step": 13613 }, { "epoch": 0.5326707880115815, "grad_norm": 0.0, "learning_rate": 9.428157269942847e-06, "loss": 0.9537, "step": 13614 }, { "epoch": 0.5327099147038109, "grad_norm": 0.0, "learning_rate": 9.426892117069741e-06, "loss": 0.9737, "step": 13615 }, { "epoch": 0.5327490413960404, "grad_norm": 0.0, "learning_rate": 9.42562697340003e-06, "loss": 1.124, "step": 13616 }, { "epoch": 0.5327881680882698, "grad_norm": 0.0, "learning_rate": 9.42436183895403e-06, "loss": 1.1382, "step": 13617 }, { "epoch": 0.5328272947804993, "grad_norm": 0.0, "learning_rate": 9.423096713752054e-06, "loss": 0.9677, "step": 13618 }, { "epoch": 0.5328664214727287, "grad_norm": 0.0, "learning_rate": 9.421831597814424e-06, "loss": 0.9651, "step": 13619 }, { "epoch": 0.5329055481649582, "grad_norm": 0.0, "learning_rate": 9.420566491161456e-06, "loss": 1.0671, "step": 13620 }, { "epoch": 0.5329446748571876, "grad_norm": 0.0, "learning_rate": 9.419301393813463e-06, "loss": 1.0661, "step": 13621 }, { "epoch": 0.532983801549417, "grad_norm": 0.0, "learning_rate": 9.418036305790763e-06, "loss": 1.0079, "step": 13622 }, { "epoch": 0.5330229282416464, "grad_norm": 0.0, "learning_rate": 9.416771227113665e-06, "loss": 1.0992, "step": 13623 }, { "epoch": 0.5330620549338759, "grad_norm": 0.0, "learning_rate": 9.415506157802497e-06, "loss": 1.0562, "step": 13624 }, { "epoch": 0.5331011816261053, "grad_norm": 0.0, "learning_rate": 9.414241097877565e-06, "loss": 1.0333, "step": 13625 }, { "epoch": 0.5331403083183348, "grad_norm": 0.0, "learning_rate": 9.41297604735919e-06, "loss": 0.888, "step": 13626 }, { "epoch": 0.5331794350105642, "grad_norm": 0.0, "learning_rate": 9.411711006267676e-06, "loss": 1.0673, "step": 13627 }, { "epoch": 0.5332185617027937, "grad_norm": 0.0, "learning_rate": 9.410445974623353e-06, "loss": 1.0023, "step": 13628 }, { "epoch": 0.5332576883950231, "grad_norm": 0.0, "learning_rate": 9.409180952446528e-06, "loss": 1.0005, "step": 13629 }, { "epoch": 0.5332968150872526, "grad_norm": 0.0, "learning_rate": 9.407915939757516e-06, "loss": 1.0672, "step": 13630 }, { "epoch": 0.533335941779482, "grad_norm": 0.0, "learning_rate": 9.40665093657663e-06, "loss": 0.9802, "step": 13631 }, { "epoch": 0.5333750684717115, "grad_norm": 0.0, "learning_rate": 9.405385942924189e-06, "loss": 0.9369, "step": 13632 }, { "epoch": 0.5334141951639408, "grad_norm": 0.0, "learning_rate": 9.404120958820505e-06, "loss": 0.9611, "step": 13633 }, { "epoch": 0.5334533218561702, "grad_norm": 0.0, "learning_rate": 9.402855984285891e-06, "loss": 0.9766, "step": 13634 }, { "epoch": 0.5334924485483997, "grad_norm": 0.0, "learning_rate": 9.401591019340663e-06, "loss": 1.0134, "step": 13635 }, { "epoch": 0.5335315752406291, "grad_norm": 0.0, "learning_rate": 9.400326064005128e-06, "loss": 1.0601, "step": 13636 }, { "epoch": 0.5335707019328586, "grad_norm": 0.0, "learning_rate": 9.39906111829961e-06, "loss": 0.9261, "step": 13637 }, { "epoch": 0.533609828625088, "grad_norm": 0.0, "learning_rate": 9.397796182244416e-06, "loss": 1.1107, "step": 13638 }, { "epoch": 0.5336489553173175, "grad_norm": 0.0, "learning_rate": 9.396531255859863e-06, "loss": 1.0085, "step": 13639 }, { "epoch": 0.5336880820095469, "grad_norm": 0.0, "learning_rate": 9.395266339166256e-06, "loss": 0.9876, "step": 13640 }, { "epoch": 0.5337272087017764, "grad_norm": 0.0, "learning_rate": 9.394001432183919e-06, "loss": 1.0137, "step": 13641 }, { "epoch": 0.5337663353940058, "grad_norm": 0.0, "learning_rate": 9.392736534933159e-06, "loss": 0.9056, "step": 13642 }, { "epoch": 0.5338054620862352, "grad_norm": 0.0, "learning_rate": 9.391471647434289e-06, "loss": 0.9743, "step": 13643 }, { "epoch": 0.5338445887784646, "grad_norm": 0.0, "learning_rate": 9.390206769707623e-06, "loss": 1.0939, "step": 13644 }, { "epoch": 0.5338837154706941, "grad_norm": 0.0, "learning_rate": 9.38894190177347e-06, "loss": 0.9458, "step": 13645 }, { "epoch": 0.5339228421629235, "grad_norm": 0.0, "learning_rate": 9.387677043652141e-06, "loss": 0.9635, "step": 13646 }, { "epoch": 0.533961968855153, "grad_norm": 0.0, "learning_rate": 9.386412195363958e-06, "loss": 0.9268, "step": 13647 }, { "epoch": 0.5340010955473824, "grad_norm": 0.0, "learning_rate": 9.385147356929224e-06, "loss": 1.0383, "step": 13648 }, { "epoch": 0.5340402222396119, "grad_norm": 0.0, "learning_rate": 9.38388252836825e-06, "loss": 0.9886, "step": 13649 }, { "epoch": 0.5340793489318413, "grad_norm": 0.0, "learning_rate": 9.382617709701355e-06, "loss": 1.0131, "step": 13650 }, { "epoch": 0.5341184756240708, "grad_norm": 0.0, "learning_rate": 9.381352900948844e-06, "loss": 1.0201, "step": 13651 }, { "epoch": 0.5341576023163002, "grad_norm": 0.0, "learning_rate": 9.38008810213103e-06, "loss": 1.0999, "step": 13652 }, { "epoch": 0.5341967290085297, "grad_norm": 0.0, "learning_rate": 9.378823313268226e-06, "loss": 1.0101, "step": 13653 }, { "epoch": 0.534235855700759, "grad_norm": 0.0, "learning_rate": 9.377558534380737e-06, "loss": 1.1396, "step": 13654 }, { "epoch": 0.5342749823929885, "grad_norm": 0.0, "learning_rate": 9.376293765488882e-06, "loss": 1.0915, "step": 13655 }, { "epoch": 0.5343141090852179, "grad_norm": 0.0, "learning_rate": 9.375029006612966e-06, "loss": 1.1273, "step": 13656 }, { "epoch": 0.5343532357774474, "grad_norm": 0.0, "learning_rate": 9.373764257773303e-06, "loss": 0.9174, "step": 13657 }, { "epoch": 0.5343923624696768, "grad_norm": 0.0, "learning_rate": 9.372499518990197e-06, "loss": 1.0577, "step": 13658 }, { "epoch": 0.5344314891619063, "grad_norm": 0.0, "learning_rate": 9.371234790283965e-06, "loss": 1.0675, "step": 13659 }, { "epoch": 0.5344706158541357, "grad_norm": 0.0, "learning_rate": 9.369970071674916e-06, "loss": 0.9195, "step": 13660 }, { "epoch": 0.5345097425463652, "grad_norm": 0.0, "learning_rate": 9.368705363183356e-06, "loss": 1.036, "step": 13661 }, { "epoch": 0.5345488692385946, "grad_norm": 0.0, "learning_rate": 9.3674406648296e-06, "loss": 1.0926, "step": 13662 }, { "epoch": 0.534587995930824, "grad_norm": 0.0, "learning_rate": 9.366175976633949e-06, "loss": 0.9832, "step": 13663 }, { "epoch": 0.5346271226230535, "grad_norm": 0.0, "learning_rate": 9.36491129861672e-06, "loss": 1.0529, "step": 13664 }, { "epoch": 0.5346662493152828, "grad_norm": 0.0, "learning_rate": 9.363646630798221e-06, "loss": 1.0908, "step": 13665 }, { "epoch": 0.5347053760075123, "grad_norm": 0.0, "learning_rate": 9.36238197319876e-06, "loss": 1.0123, "step": 13666 }, { "epoch": 0.5347445026997417, "grad_norm": 0.0, "learning_rate": 9.36111732583864e-06, "loss": 0.9594, "step": 13667 }, { "epoch": 0.5347836293919712, "grad_norm": 0.0, "learning_rate": 9.35985268873818e-06, "loss": 1.0404, "step": 13668 }, { "epoch": 0.5348227560842006, "grad_norm": 0.0, "learning_rate": 9.358588061917684e-06, "loss": 1.0403, "step": 13669 }, { "epoch": 0.5348618827764301, "grad_norm": 0.0, "learning_rate": 9.35732344539746e-06, "loss": 1.1019, "step": 13670 }, { "epoch": 0.5349010094686595, "grad_norm": 0.0, "learning_rate": 9.356058839197816e-06, "loss": 0.9508, "step": 13671 }, { "epoch": 0.534940136160889, "grad_norm": 0.0, "learning_rate": 9.354794243339056e-06, "loss": 1.0029, "step": 13672 }, { "epoch": 0.5349792628531184, "grad_norm": 0.0, "learning_rate": 9.353529657841497e-06, "loss": 1.084, "step": 13673 }, { "epoch": 0.5350183895453479, "grad_norm": 0.0, "learning_rate": 9.35226508272544e-06, "loss": 0.937, "step": 13674 }, { "epoch": 0.5350575162375772, "grad_norm": 0.0, "learning_rate": 9.351000518011196e-06, "loss": 1.0872, "step": 13675 }, { "epoch": 0.5350966429298067, "grad_norm": 0.0, "learning_rate": 9.349735963719065e-06, "loss": 0.9786, "step": 13676 }, { "epoch": 0.5351357696220361, "grad_norm": 0.0, "learning_rate": 9.348471419869364e-06, "loss": 1.0353, "step": 13677 }, { "epoch": 0.5351748963142656, "grad_norm": 0.0, "learning_rate": 9.347206886482394e-06, "loss": 1.0739, "step": 13678 }, { "epoch": 0.535214023006495, "grad_norm": 0.0, "learning_rate": 9.345942363578467e-06, "loss": 1.0254, "step": 13679 }, { "epoch": 0.5352531496987245, "grad_norm": 0.0, "learning_rate": 9.344677851177884e-06, "loss": 0.9784, "step": 13680 }, { "epoch": 0.5352922763909539, "grad_norm": 0.0, "learning_rate": 9.343413349300948e-06, "loss": 1.0517, "step": 13681 }, { "epoch": 0.5353314030831834, "grad_norm": 0.0, "learning_rate": 9.342148857967978e-06, "loss": 1.0021, "step": 13682 }, { "epoch": 0.5353705297754128, "grad_norm": 0.0, "learning_rate": 9.34088437719927e-06, "loss": 1.1639, "step": 13683 }, { "epoch": 0.5354096564676423, "grad_norm": 0.0, "learning_rate": 9.339619907015135e-06, "loss": 1.0463, "step": 13684 }, { "epoch": 0.5354487831598717, "grad_norm": 0.0, "learning_rate": 9.338355447435871e-06, "loss": 1.054, "step": 13685 }, { "epoch": 0.5354879098521012, "grad_norm": 0.0, "learning_rate": 9.337090998481796e-06, "loss": 1.0684, "step": 13686 }, { "epoch": 0.5355270365443305, "grad_norm": 0.0, "learning_rate": 9.335826560173207e-06, "loss": 1.1258, "step": 13687 }, { "epoch": 0.53556616323656, "grad_norm": 0.0, "learning_rate": 9.334562132530412e-06, "loss": 1.074, "step": 13688 }, { "epoch": 0.5356052899287894, "grad_norm": 0.0, "learning_rate": 9.333297715573713e-06, "loss": 1.1237, "step": 13689 }, { "epoch": 0.5356444166210188, "grad_norm": 0.0, "learning_rate": 9.33203330932342e-06, "loss": 1.0502, "step": 13690 }, { "epoch": 0.5356835433132483, "grad_norm": 0.0, "learning_rate": 9.330768913799831e-06, "loss": 1.0811, "step": 13691 }, { "epoch": 0.5357226700054777, "grad_norm": 0.0, "learning_rate": 9.329504529023259e-06, "loss": 1.1392, "step": 13692 }, { "epoch": 0.5357617966977072, "grad_norm": 0.0, "learning_rate": 9.328240155014001e-06, "loss": 1.0505, "step": 13693 }, { "epoch": 0.5358009233899366, "grad_norm": 0.0, "learning_rate": 9.326975791792366e-06, "loss": 1.0159, "step": 13694 }, { "epoch": 0.5358400500821661, "grad_norm": 0.0, "learning_rate": 9.325711439378658e-06, "loss": 1.0266, "step": 13695 }, { "epoch": 0.5358791767743954, "grad_norm": 0.0, "learning_rate": 9.324447097793174e-06, "loss": 1.0518, "step": 13696 }, { "epoch": 0.535918303466625, "grad_norm": 0.0, "learning_rate": 9.323182767056228e-06, "loss": 1.0317, "step": 13697 }, { "epoch": 0.5359574301588543, "grad_norm": 0.0, "learning_rate": 9.321918447188116e-06, "loss": 1.1213, "step": 13698 }, { "epoch": 0.5359965568510838, "grad_norm": 0.0, "learning_rate": 9.320654138209146e-06, "loss": 0.9915, "step": 13699 }, { "epoch": 0.5360356835433132, "grad_norm": 0.0, "learning_rate": 9.31938984013962e-06, "loss": 1.0435, "step": 13700 }, { "epoch": 0.5360748102355427, "grad_norm": 0.0, "learning_rate": 9.318125552999839e-06, "loss": 1.0106, "step": 13701 }, { "epoch": 0.5361139369277721, "grad_norm": 0.0, "learning_rate": 9.316861276810105e-06, "loss": 1.0161, "step": 13702 }, { "epoch": 0.5361530636200016, "grad_norm": 0.0, "learning_rate": 9.315597011590724e-06, "loss": 0.9383, "step": 13703 }, { "epoch": 0.536192190312231, "grad_norm": 0.0, "learning_rate": 9.314332757361998e-06, "loss": 1.0061, "step": 13704 }, { "epoch": 0.5362313170044605, "grad_norm": 0.0, "learning_rate": 9.313068514144232e-06, "loss": 1.1442, "step": 13705 }, { "epoch": 0.5362704436966899, "grad_norm": 0.0, "learning_rate": 9.31180428195772e-06, "loss": 1.0478, "step": 13706 }, { "epoch": 0.5363095703889194, "grad_norm": 0.0, "learning_rate": 9.310540060822769e-06, "loss": 1.0141, "step": 13707 }, { "epoch": 0.5363486970811487, "grad_norm": 0.0, "learning_rate": 9.309275850759683e-06, "loss": 0.9849, "step": 13708 }, { "epoch": 0.5363878237733782, "grad_norm": 0.0, "learning_rate": 9.308011651788763e-06, "loss": 1.0684, "step": 13709 }, { "epoch": 0.5364269504656076, "grad_norm": 0.0, "learning_rate": 9.306747463930307e-06, "loss": 1.0515, "step": 13710 }, { "epoch": 0.5364660771578371, "grad_norm": 0.0, "learning_rate": 9.305483287204618e-06, "loss": 1.0223, "step": 13711 }, { "epoch": 0.5365052038500665, "grad_norm": 0.0, "learning_rate": 9.304219121631993e-06, "loss": 1.064, "step": 13712 }, { "epoch": 0.536544330542296, "grad_norm": 0.0, "learning_rate": 9.302954967232741e-06, "loss": 0.9938, "step": 13713 }, { "epoch": 0.5365834572345254, "grad_norm": 0.0, "learning_rate": 9.30169082402716e-06, "loss": 1.1079, "step": 13714 }, { "epoch": 0.5366225839267549, "grad_norm": 0.0, "learning_rate": 9.30042669203555e-06, "loss": 1.0517, "step": 13715 }, { "epoch": 0.5366617106189843, "grad_norm": 0.0, "learning_rate": 9.299162571278203e-06, "loss": 0.9772, "step": 13716 }, { "epoch": 0.5367008373112138, "grad_norm": 0.0, "learning_rate": 9.297898461775435e-06, "loss": 1.1067, "step": 13717 }, { "epoch": 0.5367399640034431, "grad_norm": 0.0, "learning_rate": 9.296634363547535e-06, "loss": 0.9614, "step": 13718 }, { "epoch": 0.5367790906956725, "grad_norm": 0.0, "learning_rate": 9.295370276614806e-06, "loss": 1.0656, "step": 13719 }, { "epoch": 0.536818217387902, "grad_norm": 0.0, "learning_rate": 9.294106200997548e-06, "loss": 1.0857, "step": 13720 }, { "epoch": 0.5368573440801314, "grad_norm": 0.0, "learning_rate": 9.292842136716058e-06, "loss": 0.9975, "step": 13721 }, { "epoch": 0.5368964707723609, "grad_norm": 0.0, "learning_rate": 9.29157808379064e-06, "loss": 0.9545, "step": 13722 }, { "epoch": 0.5369355974645903, "grad_norm": 0.0, "learning_rate": 9.290314042241589e-06, "loss": 1.0452, "step": 13723 }, { "epoch": 0.5369747241568198, "grad_norm": 0.0, "learning_rate": 9.289050012089205e-06, "loss": 1.0679, "step": 13724 }, { "epoch": 0.5370138508490492, "grad_norm": 0.0, "learning_rate": 9.287785993353784e-06, "loss": 0.9583, "step": 13725 }, { "epoch": 0.5370529775412787, "grad_norm": 0.0, "learning_rate": 9.28652198605563e-06, "loss": 1.0697, "step": 13726 }, { "epoch": 0.5370921042335081, "grad_norm": 0.0, "learning_rate": 9.28525799021504e-06, "loss": 1.1141, "step": 13727 }, { "epoch": 0.5371312309257376, "grad_norm": 0.0, "learning_rate": 9.283994005852313e-06, "loss": 0.9716, "step": 13728 }, { "epoch": 0.5371703576179669, "grad_norm": 0.0, "learning_rate": 9.282730032987743e-06, "loss": 0.9705, "step": 13729 }, { "epoch": 0.5372094843101964, "grad_norm": 0.0, "learning_rate": 9.281466071641624e-06, "loss": 0.8954, "step": 13730 }, { "epoch": 0.5372486110024258, "grad_norm": 0.0, "learning_rate": 9.280202121834268e-06, "loss": 1.0353, "step": 13731 }, { "epoch": 0.5372877376946553, "grad_norm": 0.0, "learning_rate": 9.27893818358596e-06, "loss": 1.0075, "step": 13732 }, { "epoch": 0.5373268643868847, "grad_norm": 0.0, "learning_rate": 9.277674256917004e-06, "loss": 1.0808, "step": 13733 }, { "epoch": 0.5373659910791142, "grad_norm": 0.0, "learning_rate": 9.27641034184769e-06, "loss": 1.0539, "step": 13734 }, { "epoch": 0.5374051177713436, "grad_norm": 0.0, "learning_rate": 9.275146438398322e-06, "loss": 1.0207, "step": 13735 }, { "epoch": 0.5374442444635731, "grad_norm": 0.0, "learning_rate": 9.273882546589194e-06, "loss": 1.1016, "step": 13736 }, { "epoch": 0.5374833711558025, "grad_norm": 0.0, "learning_rate": 9.2726186664406e-06, "loss": 0.9505, "step": 13737 }, { "epoch": 0.537522497848032, "grad_norm": 0.0, "learning_rate": 9.271354797972841e-06, "loss": 0.9882, "step": 13738 }, { "epoch": 0.5375616245402614, "grad_norm": 0.0, "learning_rate": 9.270090941206211e-06, "loss": 1.0784, "step": 13739 }, { "epoch": 0.5376007512324908, "grad_norm": 0.0, "learning_rate": 9.268827096161007e-06, "loss": 0.9548, "step": 13740 }, { "epoch": 0.5376398779247202, "grad_norm": 0.0, "learning_rate": 9.26756326285752e-06, "loss": 1.0412, "step": 13741 }, { "epoch": 0.5376790046169497, "grad_norm": 0.0, "learning_rate": 9.266299441316053e-06, "loss": 0.9771, "step": 13742 }, { "epoch": 0.5377181313091791, "grad_norm": 0.0, "learning_rate": 9.265035631556894e-06, "loss": 0.9653, "step": 13743 }, { "epoch": 0.5377572580014086, "grad_norm": 0.0, "learning_rate": 9.263771833600345e-06, "loss": 0.9927, "step": 13744 }, { "epoch": 0.537796384693638, "grad_norm": 0.0, "learning_rate": 9.262508047466698e-06, "loss": 1.0782, "step": 13745 }, { "epoch": 0.5378355113858675, "grad_norm": 0.0, "learning_rate": 9.261244273176246e-06, "loss": 1.053, "step": 13746 }, { "epoch": 0.5378746380780969, "grad_norm": 0.0, "learning_rate": 9.259980510749281e-06, "loss": 1.0736, "step": 13747 }, { "epoch": 0.5379137647703263, "grad_norm": 0.0, "learning_rate": 9.258716760206107e-06, "loss": 1.0127, "step": 13748 }, { "epoch": 0.5379528914625558, "grad_norm": 0.0, "learning_rate": 9.257453021567013e-06, "loss": 0.9558, "step": 13749 }, { "epoch": 0.5379920181547851, "grad_norm": 0.0, "learning_rate": 9.256189294852294e-06, "loss": 1.1007, "step": 13750 }, { "epoch": 0.5380311448470146, "grad_norm": 0.0, "learning_rate": 9.254925580082242e-06, "loss": 1.1175, "step": 13751 }, { "epoch": 0.538070271539244, "grad_norm": 0.0, "learning_rate": 9.253661877277145e-06, "loss": 1.0451, "step": 13752 }, { "epoch": 0.5381093982314735, "grad_norm": 0.0, "learning_rate": 9.25239818645731e-06, "loss": 1.0419, "step": 13753 }, { "epoch": 0.5381485249237029, "grad_norm": 0.0, "learning_rate": 9.251134507643022e-06, "loss": 1.053, "step": 13754 }, { "epoch": 0.5381876516159324, "grad_norm": 0.0, "learning_rate": 9.249870840854576e-06, "loss": 0.9881, "step": 13755 }, { "epoch": 0.5382267783081618, "grad_norm": 0.0, "learning_rate": 9.24860718611226e-06, "loss": 1.2278, "step": 13756 }, { "epoch": 0.5382659050003913, "grad_norm": 0.0, "learning_rate": 9.247343543436376e-06, "loss": 1.1267, "step": 13757 }, { "epoch": 0.5383050316926207, "grad_norm": 0.0, "learning_rate": 9.246079912847211e-06, "loss": 0.9176, "step": 13758 }, { "epoch": 0.5383441583848502, "grad_norm": 0.0, "learning_rate": 9.244816294365058e-06, "loss": 1.0432, "step": 13759 }, { "epoch": 0.5383832850770796, "grad_norm": 0.0, "learning_rate": 9.243552688010209e-06, "loss": 0.9976, "step": 13760 }, { "epoch": 0.538422411769309, "grad_norm": 0.0, "learning_rate": 9.24228909380295e-06, "loss": 1.15, "step": 13761 }, { "epoch": 0.5384615384615384, "grad_norm": 0.0, "learning_rate": 9.241025511763587e-06, "loss": 1.0229, "step": 13762 }, { "epoch": 0.5385006651537679, "grad_norm": 0.0, "learning_rate": 9.2397619419124e-06, "loss": 0.9476, "step": 13763 }, { "epoch": 0.5385397918459973, "grad_norm": 0.0, "learning_rate": 9.238498384269684e-06, "loss": 1.0185, "step": 13764 }, { "epoch": 0.5385789185382268, "grad_norm": 0.0, "learning_rate": 9.237234838855725e-06, "loss": 1.1294, "step": 13765 }, { "epoch": 0.5386180452304562, "grad_norm": 0.0, "learning_rate": 9.235971305690825e-06, "loss": 1.0139, "step": 13766 }, { "epoch": 0.5386571719226857, "grad_norm": 0.0, "learning_rate": 9.234707784795266e-06, "loss": 0.9816, "step": 13767 }, { "epoch": 0.5386962986149151, "grad_norm": 0.0, "learning_rate": 9.233444276189342e-06, "loss": 1.0958, "step": 13768 }, { "epoch": 0.5387354253071446, "grad_norm": 0.0, "learning_rate": 9.232180779893343e-06, "loss": 0.8993, "step": 13769 }, { "epoch": 0.538774551999374, "grad_norm": 0.0, "learning_rate": 9.230917295927553e-06, "loss": 1.0141, "step": 13770 }, { "epoch": 0.5388136786916035, "grad_norm": 0.0, "learning_rate": 9.229653824312273e-06, "loss": 1.0136, "step": 13771 }, { "epoch": 0.5388528053838328, "grad_norm": 0.0, "learning_rate": 9.228390365067787e-06, "loss": 1.0901, "step": 13772 }, { "epoch": 0.5388919320760623, "grad_norm": 0.0, "learning_rate": 9.227126918214385e-06, "loss": 0.9493, "step": 13773 }, { "epoch": 0.5389310587682917, "grad_norm": 0.0, "learning_rate": 9.22586348377235e-06, "loss": 1.0197, "step": 13774 }, { "epoch": 0.5389701854605212, "grad_norm": 0.0, "learning_rate": 9.224600061761986e-06, "loss": 0.901, "step": 13775 }, { "epoch": 0.5390093121527506, "grad_norm": 0.0, "learning_rate": 9.22333665220357e-06, "loss": 1.0986, "step": 13776 }, { "epoch": 0.53904843884498, "grad_norm": 0.0, "learning_rate": 9.222073255117395e-06, "loss": 0.9938, "step": 13777 }, { "epoch": 0.5390875655372095, "grad_norm": 0.0, "learning_rate": 9.220809870523749e-06, "loss": 0.9299, "step": 13778 }, { "epoch": 0.5391266922294389, "grad_norm": 0.0, "learning_rate": 9.219546498442917e-06, "loss": 1.1349, "step": 13779 }, { "epoch": 0.5391658189216684, "grad_norm": 0.0, "learning_rate": 9.218283138895192e-06, "loss": 1.0095, "step": 13780 }, { "epoch": 0.5392049456138978, "grad_norm": 0.0, "learning_rate": 9.21701979190086e-06, "loss": 1.0012, "step": 13781 }, { "epoch": 0.5392440723061273, "grad_norm": 0.0, "learning_rate": 9.21575645748021e-06, "loss": 1.1046, "step": 13782 }, { "epoch": 0.5392831989983566, "grad_norm": 0.0, "learning_rate": 9.214493135653526e-06, "loss": 0.9871, "step": 13783 }, { "epoch": 0.5393223256905861, "grad_norm": 0.0, "learning_rate": 9.213229826441103e-06, "loss": 1.0946, "step": 13784 }, { "epoch": 0.5393614523828155, "grad_norm": 0.0, "learning_rate": 9.21196652986322e-06, "loss": 1.1327, "step": 13785 }, { "epoch": 0.539400579075045, "grad_norm": 0.0, "learning_rate": 9.210703245940166e-06, "loss": 1.0066, "step": 13786 }, { "epoch": 0.5394397057672744, "grad_norm": 0.0, "learning_rate": 9.20943997469223e-06, "loss": 1.1328, "step": 13787 }, { "epoch": 0.5394788324595039, "grad_norm": 0.0, "learning_rate": 9.208176716139698e-06, "loss": 0.9021, "step": 13788 }, { "epoch": 0.5395179591517333, "grad_norm": 0.0, "learning_rate": 9.206913470302856e-06, "loss": 1.1418, "step": 13789 }, { "epoch": 0.5395570858439628, "grad_norm": 0.0, "learning_rate": 9.205650237201989e-06, "loss": 1.0627, "step": 13790 }, { "epoch": 0.5395962125361922, "grad_norm": 0.0, "learning_rate": 9.204387016857384e-06, "loss": 1.0664, "step": 13791 }, { "epoch": 0.5396353392284217, "grad_norm": 0.0, "learning_rate": 9.203123809289323e-06, "loss": 1.2209, "step": 13792 }, { "epoch": 0.539674465920651, "grad_norm": 0.0, "learning_rate": 9.201860614518098e-06, "loss": 1.1523, "step": 13793 }, { "epoch": 0.5397135926128805, "grad_norm": 0.0, "learning_rate": 9.200597432563993e-06, "loss": 1.0886, "step": 13794 }, { "epoch": 0.5397527193051099, "grad_norm": 0.0, "learning_rate": 9.199334263447292e-06, "loss": 1.0876, "step": 13795 }, { "epoch": 0.5397918459973394, "grad_norm": 0.0, "learning_rate": 9.198071107188274e-06, "loss": 1.1551, "step": 13796 }, { "epoch": 0.5398309726895688, "grad_norm": 0.0, "learning_rate": 9.196807963807234e-06, "loss": 0.8626, "step": 13797 }, { "epoch": 0.5398700993817983, "grad_norm": 0.0, "learning_rate": 9.195544833324452e-06, "loss": 0.9677, "step": 13798 }, { "epoch": 0.5399092260740277, "grad_norm": 0.0, "learning_rate": 9.194281715760212e-06, "loss": 1.0512, "step": 13799 }, { "epoch": 0.5399483527662572, "grad_norm": 0.0, "learning_rate": 9.193018611134796e-06, "loss": 0.9995, "step": 13800 }, { "epoch": 0.5399874794584866, "grad_norm": 0.0, "learning_rate": 9.191755519468487e-06, "loss": 0.8924, "step": 13801 }, { "epoch": 0.5400266061507161, "grad_norm": 0.0, "learning_rate": 9.190492440781576e-06, "loss": 1.0669, "step": 13802 }, { "epoch": 0.5400657328429455, "grad_norm": 0.0, "learning_rate": 9.189229375094342e-06, "loss": 1.0043, "step": 13803 }, { "epoch": 0.5401048595351748, "grad_norm": 0.0, "learning_rate": 9.187966322427068e-06, "loss": 0.962, "step": 13804 }, { "epoch": 0.5401439862274043, "grad_norm": 0.0, "learning_rate": 9.186703282800033e-06, "loss": 1.0222, "step": 13805 }, { "epoch": 0.5401831129196337, "grad_norm": 0.0, "learning_rate": 9.18544025623353e-06, "loss": 0.9715, "step": 13806 }, { "epoch": 0.5402222396118632, "grad_norm": 0.0, "learning_rate": 9.184177242747833e-06, "loss": 1.0483, "step": 13807 }, { "epoch": 0.5402613663040926, "grad_norm": 0.0, "learning_rate": 9.18291424236323e-06, "loss": 1.0875, "step": 13808 }, { "epoch": 0.5403004929963221, "grad_norm": 0.0, "learning_rate": 9.181651255099998e-06, "loss": 0.9515, "step": 13809 }, { "epoch": 0.5403396196885515, "grad_norm": 0.0, "learning_rate": 9.180388280978418e-06, "loss": 1.1049, "step": 13810 }, { "epoch": 0.540378746380781, "grad_norm": 0.0, "learning_rate": 9.17912532001878e-06, "loss": 0.9233, "step": 13811 }, { "epoch": 0.5404178730730104, "grad_norm": 0.0, "learning_rate": 9.177862372241361e-06, "loss": 1.0028, "step": 13812 }, { "epoch": 0.5404569997652399, "grad_norm": 0.0, "learning_rate": 9.17659943766644e-06, "loss": 1.0605, "step": 13813 }, { "epoch": 0.5404961264574693, "grad_norm": 0.0, "learning_rate": 9.175336516314298e-06, "loss": 1.0841, "step": 13814 }, { "epoch": 0.5405352531496987, "grad_norm": 0.0, "learning_rate": 9.174073608205222e-06, "loss": 1.1268, "step": 13815 }, { "epoch": 0.5405743798419281, "grad_norm": 0.0, "learning_rate": 9.172810713359488e-06, "loss": 1.1251, "step": 13816 }, { "epoch": 0.5406135065341576, "grad_norm": 0.0, "learning_rate": 9.17154783179738e-06, "loss": 1.0309, "step": 13817 }, { "epoch": 0.540652633226387, "grad_norm": 0.0, "learning_rate": 9.170284963539174e-06, "loss": 1.034, "step": 13818 }, { "epoch": 0.5406917599186165, "grad_norm": 0.0, "learning_rate": 9.169022108605147e-06, "loss": 1.0634, "step": 13819 }, { "epoch": 0.5407308866108459, "grad_norm": 0.0, "learning_rate": 9.167759267015588e-06, "loss": 0.8905, "step": 13820 }, { "epoch": 0.5407700133030754, "grad_norm": 0.0, "learning_rate": 9.166496438790773e-06, "loss": 0.9737, "step": 13821 }, { "epoch": 0.5408091399953048, "grad_norm": 0.0, "learning_rate": 9.16523362395098e-06, "loss": 1.0361, "step": 13822 }, { "epoch": 0.5408482666875343, "grad_norm": 0.0, "learning_rate": 9.163970822516487e-06, "loss": 0.9975, "step": 13823 }, { "epoch": 0.5408873933797637, "grad_norm": 0.0, "learning_rate": 9.162708034507578e-06, "loss": 1.0989, "step": 13824 }, { "epoch": 0.5409265200719932, "grad_norm": 0.0, "learning_rate": 9.161445259944526e-06, "loss": 1.0347, "step": 13825 }, { "epoch": 0.5409656467642225, "grad_norm": 0.0, "learning_rate": 9.160182498847615e-06, "loss": 1.0369, "step": 13826 }, { "epoch": 0.541004773456452, "grad_norm": 0.0, "learning_rate": 9.158919751237119e-06, "loss": 1.0941, "step": 13827 }, { "epoch": 0.5410439001486814, "grad_norm": 0.0, "learning_rate": 9.157657017133318e-06, "loss": 1.0098, "step": 13828 }, { "epoch": 0.5410830268409109, "grad_norm": 0.0, "learning_rate": 9.15639429655649e-06, "loss": 1.0495, "step": 13829 }, { "epoch": 0.5411221535331403, "grad_norm": 0.0, "learning_rate": 9.155131589526913e-06, "loss": 1.0641, "step": 13830 }, { "epoch": 0.5411612802253698, "grad_norm": 0.0, "learning_rate": 9.153868896064864e-06, "loss": 1.0338, "step": 13831 }, { "epoch": 0.5412004069175992, "grad_norm": 0.0, "learning_rate": 9.152606216190619e-06, "loss": 1.1258, "step": 13832 }, { "epoch": 0.5412395336098286, "grad_norm": 0.0, "learning_rate": 9.151343549924456e-06, "loss": 1.0642, "step": 13833 }, { "epoch": 0.5412786603020581, "grad_norm": 0.0, "learning_rate": 9.150080897286656e-06, "loss": 0.9393, "step": 13834 }, { "epoch": 0.5413177869942875, "grad_norm": 0.0, "learning_rate": 9.14881825829749e-06, "loss": 1.0073, "step": 13835 }, { "epoch": 0.541356913686517, "grad_norm": 0.0, "learning_rate": 9.147555632977232e-06, "loss": 1.0507, "step": 13836 }, { "epoch": 0.5413960403787463, "grad_norm": 0.0, "learning_rate": 9.146293021346165e-06, "loss": 1.0047, "step": 13837 }, { "epoch": 0.5414351670709758, "grad_norm": 0.0, "learning_rate": 9.145030423424564e-06, "loss": 1.0336, "step": 13838 }, { "epoch": 0.5414742937632052, "grad_norm": 0.0, "learning_rate": 9.143767839232704e-06, "loss": 1.0057, "step": 13839 }, { "epoch": 0.5415134204554347, "grad_norm": 0.0, "learning_rate": 9.142505268790857e-06, "loss": 0.9709, "step": 13840 }, { "epoch": 0.5415525471476641, "grad_norm": 0.0, "learning_rate": 9.141242712119298e-06, "loss": 0.9865, "step": 13841 }, { "epoch": 0.5415916738398936, "grad_norm": 0.0, "learning_rate": 9.139980169238309e-06, "loss": 0.996, "step": 13842 }, { "epoch": 0.541630800532123, "grad_norm": 0.0, "learning_rate": 9.138717640168161e-06, "loss": 1.0922, "step": 13843 }, { "epoch": 0.5416699272243525, "grad_norm": 0.0, "learning_rate": 9.13745512492913e-06, "loss": 0.9292, "step": 13844 }, { "epoch": 0.5417090539165819, "grad_norm": 0.0, "learning_rate": 9.136192623541487e-06, "loss": 1.1266, "step": 13845 }, { "epoch": 0.5417481806088114, "grad_norm": 0.0, "learning_rate": 9.134930136025504e-06, "loss": 1.0321, "step": 13846 }, { "epoch": 0.5417873073010407, "grad_norm": 0.0, "learning_rate": 9.133667662401464e-06, "loss": 0.9983, "step": 13847 }, { "epoch": 0.5418264339932702, "grad_norm": 0.0, "learning_rate": 9.132405202689636e-06, "loss": 1.1008, "step": 13848 }, { "epoch": 0.5418655606854996, "grad_norm": 0.0, "learning_rate": 9.131142756910291e-06, "loss": 0.9078, "step": 13849 }, { "epoch": 0.5419046873777291, "grad_norm": 0.0, "learning_rate": 9.129880325083702e-06, "loss": 0.9934, "step": 13850 }, { "epoch": 0.5419438140699585, "grad_norm": 0.0, "learning_rate": 9.12861790723015e-06, "loss": 1.0345, "step": 13851 }, { "epoch": 0.541982940762188, "grad_norm": 0.0, "learning_rate": 9.1273555033699e-06, "loss": 0.9314, "step": 13852 }, { "epoch": 0.5420220674544174, "grad_norm": 0.0, "learning_rate": 9.12609311352323e-06, "loss": 1.0776, "step": 13853 }, { "epoch": 0.5420611941466469, "grad_norm": 0.0, "learning_rate": 9.124830737710403e-06, "loss": 1.0712, "step": 13854 }, { "epoch": 0.5421003208388763, "grad_norm": 0.0, "learning_rate": 9.123568375951702e-06, "loss": 1.0922, "step": 13855 }, { "epoch": 0.5421394475311058, "grad_norm": 0.0, "learning_rate": 9.122306028267396e-06, "loss": 1.0207, "step": 13856 }, { "epoch": 0.5421785742233352, "grad_norm": 0.0, "learning_rate": 9.121043694677755e-06, "loss": 0.9907, "step": 13857 }, { "epoch": 0.5422177009155646, "grad_norm": 0.0, "learning_rate": 9.11978137520305e-06, "loss": 0.9869, "step": 13858 }, { "epoch": 0.542256827607794, "grad_norm": 0.0, "learning_rate": 9.11851906986355e-06, "loss": 1.0592, "step": 13859 }, { "epoch": 0.5422959543000235, "grad_norm": 0.0, "learning_rate": 9.117256778679533e-06, "loss": 1.0186, "step": 13860 }, { "epoch": 0.5423350809922529, "grad_norm": 0.0, "learning_rate": 9.115994501671264e-06, "loss": 0.9934, "step": 13861 }, { "epoch": 0.5423742076844823, "grad_norm": 0.0, "learning_rate": 9.114732238859019e-06, "loss": 1.075, "step": 13862 }, { "epoch": 0.5424133343767118, "grad_norm": 0.0, "learning_rate": 9.113469990263061e-06, "loss": 1.0388, "step": 13863 }, { "epoch": 0.5424524610689412, "grad_norm": 0.0, "learning_rate": 9.112207755903664e-06, "loss": 1.0428, "step": 13864 }, { "epoch": 0.5424915877611707, "grad_norm": 0.0, "learning_rate": 9.110945535801102e-06, "loss": 1.096, "step": 13865 }, { "epoch": 0.5425307144534001, "grad_norm": 0.0, "learning_rate": 9.109683329975639e-06, "loss": 1.0418, "step": 13866 }, { "epoch": 0.5425698411456296, "grad_norm": 0.0, "learning_rate": 9.108421138447545e-06, "loss": 1.0133, "step": 13867 }, { "epoch": 0.542608967837859, "grad_norm": 0.0, "learning_rate": 9.10715896123709e-06, "loss": 0.9009, "step": 13868 }, { "epoch": 0.5426480945300884, "grad_norm": 0.0, "learning_rate": 9.105896798364543e-06, "loss": 0.9436, "step": 13869 }, { "epoch": 0.5426872212223178, "grad_norm": 0.0, "learning_rate": 9.104634649850174e-06, "loss": 1.0194, "step": 13870 }, { "epoch": 0.5427263479145473, "grad_norm": 0.0, "learning_rate": 9.103372515714252e-06, "loss": 1.0811, "step": 13871 }, { "epoch": 0.5427654746067767, "grad_norm": 0.0, "learning_rate": 9.10211039597704e-06, "loss": 0.9495, "step": 13872 }, { "epoch": 0.5428046012990062, "grad_norm": 0.0, "learning_rate": 9.100848290658814e-06, "loss": 0.897, "step": 13873 }, { "epoch": 0.5428437279912356, "grad_norm": 0.0, "learning_rate": 9.099586199779836e-06, "loss": 1.041, "step": 13874 }, { "epoch": 0.5428828546834651, "grad_norm": 0.0, "learning_rate": 9.098324123360375e-06, "loss": 1.2017, "step": 13875 }, { "epoch": 0.5429219813756945, "grad_norm": 0.0, "learning_rate": 9.0970620614207e-06, "loss": 0.9596, "step": 13876 }, { "epoch": 0.542961108067924, "grad_norm": 0.0, "learning_rate": 9.095800013981074e-06, "loss": 0.9247, "step": 13877 }, { "epoch": 0.5430002347601534, "grad_norm": 0.0, "learning_rate": 9.094537981061771e-06, "loss": 1.0772, "step": 13878 }, { "epoch": 0.5430393614523829, "grad_norm": 0.0, "learning_rate": 9.093275962683051e-06, "loss": 0.9388, "step": 13879 }, { "epoch": 0.5430784881446122, "grad_norm": 0.0, "learning_rate": 9.092013958865185e-06, "loss": 1.062, "step": 13880 }, { "epoch": 0.5431176148368417, "grad_norm": 0.0, "learning_rate": 9.09075196962843e-06, "loss": 1.0436, "step": 13881 }, { "epoch": 0.5431567415290711, "grad_norm": 0.0, "learning_rate": 9.089489994993066e-06, "loss": 1.0699, "step": 13882 }, { "epoch": 0.5431958682213006, "grad_norm": 0.0, "learning_rate": 9.08822803497935e-06, "loss": 0.8737, "step": 13883 }, { "epoch": 0.54323499491353, "grad_norm": 0.0, "learning_rate": 9.086966089607551e-06, "loss": 1.0631, "step": 13884 }, { "epoch": 0.5432741216057595, "grad_norm": 0.0, "learning_rate": 9.08570415889793e-06, "loss": 1.0602, "step": 13885 }, { "epoch": 0.5433132482979889, "grad_norm": 0.0, "learning_rate": 9.084442242870752e-06, "loss": 1.0956, "step": 13886 }, { "epoch": 0.5433523749902184, "grad_norm": 0.0, "learning_rate": 9.08318034154629e-06, "loss": 1.0983, "step": 13887 }, { "epoch": 0.5433915016824478, "grad_norm": 0.0, "learning_rate": 9.0819184549448e-06, "loss": 1.0834, "step": 13888 }, { "epoch": 0.5434306283746771, "grad_norm": 0.0, "learning_rate": 9.08065658308655e-06, "loss": 1.0459, "step": 13889 }, { "epoch": 0.5434697550669066, "grad_norm": 0.0, "learning_rate": 9.079394725991799e-06, "loss": 1.019, "step": 13890 }, { "epoch": 0.543508881759136, "grad_norm": 0.0, "learning_rate": 9.07813288368082e-06, "loss": 0.9958, "step": 13891 }, { "epoch": 0.5435480084513655, "grad_norm": 0.0, "learning_rate": 9.076871056173872e-06, "loss": 0.9623, "step": 13892 }, { "epoch": 0.5435871351435949, "grad_norm": 0.0, "learning_rate": 9.075609243491218e-06, "loss": 0.9775, "step": 13893 }, { "epoch": 0.5436262618358244, "grad_norm": 0.0, "learning_rate": 9.07434744565312e-06, "loss": 0.9057, "step": 13894 }, { "epoch": 0.5436653885280538, "grad_norm": 0.0, "learning_rate": 9.07308566267984e-06, "loss": 1.0917, "step": 13895 }, { "epoch": 0.5437045152202833, "grad_norm": 0.0, "learning_rate": 9.071823894591645e-06, "loss": 1.2031, "step": 13896 }, { "epoch": 0.5437436419125127, "grad_norm": 0.0, "learning_rate": 9.070562141408795e-06, "loss": 0.9759, "step": 13897 }, { "epoch": 0.5437827686047422, "grad_norm": 0.0, "learning_rate": 9.069300403151555e-06, "loss": 1.1367, "step": 13898 }, { "epoch": 0.5438218952969716, "grad_norm": 0.0, "learning_rate": 9.068038679840176e-06, "loss": 1.0526, "step": 13899 }, { "epoch": 0.543861021989201, "grad_norm": 0.0, "learning_rate": 9.066776971494935e-06, "loss": 1.0146, "step": 13900 }, { "epoch": 0.5439001486814304, "grad_norm": 0.0, "learning_rate": 9.065515278136086e-06, "loss": 0.8638, "step": 13901 }, { "epoch": 0.5439392753736599, "grad_norm": 0.0, "learning_rate": 9.064253599783891e-06, "loss": 1.0715, "step": 13902 }, { "epoch": 0.5439784020658893, "grad_norm": 0.0, "learning_rate": 9.06299193645861e-06, "loss": 1.1452, "step": 13903 }, { "epoch": 0.5440175287581188, "grad_norm": 0.0, "learning_rate": 9.0617302881805e-06, "loss": 1.1572, "step": 13904 }, { "epoch": 0.5440566554503482, "grad_norm": 0.0, "learning_rate": 9.06046865496983e-06, "loss": 0.9764, "step": 13905 }, { "epoch": 0.5440957821425777, "grad_norm": 0.0, "learning_rate": 9.059207036846857e-06, "loss": 0.9972, "step": 13906 }, { "epoch": 0.5441349088348071, "grad_norm": 0.0, "learning_rate": 9.05794543383184e-06, "loss": 1.064, "step": 13907 }, { "epoch": 0.5441740355270366, "grad_norm": 0.0, "learning_rate": 9.056683845945034e-06, "loss": 0.9745, "step": 13908 }, { "epoch": 0.544213162219266, "grad_norm": 0.0, "learning_rate": 9.05542227320671e-06, "loss": 1.0582, "step": 13909 }, { "epoch": 0.5442522889114955, "grad_norm": 0.0, "learning_rate": 9.054160715637117e-06, "loss": 0.9819, "step": 13910 }, { "epoch": 0.5442914156037248, "grad_norm": 0.0, "learning_rate": 9.05289917325652e-06, "loss": 1.0514, "step": 13911 }, { "epoch": 0.5443305422959543, "grad_norm": 0.0, "learning_rate": 9.051637646085171e-06, "loss": 0.9831, "step": 13912 }, { "epoch": 0.5443696689881837, "grad_norm": 0.0, "learning_rate": 9.050376134143339e-06, "loss": 0.978, "step": 13913 }, { "epoch": 0.5444087956804132, "grad_norm": 0.0, "learning_rate": 9.049114637451271e-06, "loss": 0.9439, "step": 13914 }, { "epoch": 0.5444479223726426, "grad_norm": 0.0, "learning_rate": 9.047853156029234e-06, "loss": 0.9121, "step": 13915 }, { "epoch": 0.5444870490648721, "grad_norm": 0.0, "learning_rate": 9.046591689897481e-06, "loss": 1.1735, "step": 13916 }, { "epoch": 0.5445261757571015, "grad_norm": 0.0, "learning_rate": 9.045330239076269e-06, "loss": 0.9662, "step": 13917 }, { "epoch": 0.5445653024493309, "grad_norm": 0.0, "learning_rate": 9.04406880358586e-06, "loss": 1.0955, "step": 13918 }, { "epoch": 0.5446044291415604, "grad_norm": 0.0, "learning_rate": 9.042807383446508e-06, "loss": 1.0791, "step": 13919 }, { "epoch": 0.5446435558337898, "grad_norm": 0.0, "learning_rate": 9.041545978678467e-06, "loss": 0.9577, "step": 13920 }, { "epoch": 0.5446826825260193, "grad_norm": 0.0, "learning_rate": 9.040284589301997e-06, "loss": 0.9628, "step": 13921 }, { "epoch": 0.5447218092182486, "grad_norm": 0.0, "learning_rate": 9.039023215337357e-06, "loss": 1.0717, "step": 13922 }, { "epoch": 0.5447609359104781, "grad_norm": 0.0, "learning_rate": 9.0377618568048e-06, "loss": 0.955, "step": 13923 }, { "epoch": 0.5448000626027075, "grad_norm": 0.0, "learning_rate": 9.03650051372458e-06, "loss": 0.9854, "step": 13924 }, { "epoch": 0.544839189294937, "grad_norm": 0.0, "learning_rate": 9.035239186116957e-06, "loss": 1.1236, "step": 13925 }, { "epoch": 0.5448783159871664, "grad_norm": 0.0, "learning_rate": 9.033977874002177e-06, "loss": 1.0331, "step": 13926 }, { "epoch": 0.5449174426793959, "grad_norm": 0.0, "learning_rate": 9.032716577400508e-06, "loss": 1.0962, "step": 13927 }, { "epoch": 0.5449565693716253, "grad_norm": 0.0, "learning_rate": 9.031455296332196e-06, "loss": 0.9542, "step": 13928 }, { "epoch": 0.5449956960638548, "grad_norm": 0.0, "learning_rate": 9.0301940308175e-06, "loss": 1.0916, "step": 13929 }, { "epoch": 0.5450348227560842, "grad_norm": 0.0, "learning_rate": 9.028932780876669e-06, "loss": 1.15, "step": 13930 }, { "epoch": 0.5450739494483137, "grad_norm": 0.0, "learning_rate": 9.027671546529965e-06, "loss": 1.0938, "step": 13931 }, { "epoch": 0.545113076140543, "grad_norm": 0.0, "learning_rate": 9.026410327797637e-06, "loss": 1.0528, "step": 13932 }, { "epoch": 0.5451522028327725, "grad_norm": 0.0, "learning_rate": 9.025149124699938e-06, "loss": 0.9017, "step": 13933 }, { "epoch": 0.5451913295250019, "grad_norm": 0.0, "learning_rate": 9.023887937257126e-06, "loss": 1.1002, "step": 13934 }, { "epoch": 0.5452304562172314, "grad_norm": 0.0, "learning_rate": 9.022626765489443e-06, "loss": 1.0236, "step": 13935 }, { "epoch": 0.5452695829094608, "grad_norm": 0.0, "learning_rate": 9.021365609417155e-06, "loss": 1.0284, "step": 13936 }, { "epoch": 0.5453087096016903, "grad_norm": 0.0, "learning_rate": 9.020104469060508e-06, "loss": 1.0844, "step": 13937 }, { "epoch": 0.5453478362939197, "grad_norm": 0.0, "learning_rate": 9.018843344439756e-06, "loss": 1.1092, "step": 13938 }, { "epoch": 0.5453869629861492, "grad_norm": 0.0, "learning_rate": 9.017582235575147e-06, "loss": 1.149, "step": 13939 }, { "epoch": 0.5454260896783786, "grad_norm": 0.0, "learning_rate": 9.016321142486938e-06, "loss": 1.1561, "step": 13940 }, { "epoch": 0.5454652163706081, "grad_norm": 0.0, "learning_rate": 9.015060065195382e-06, "loss": 1.0457, "step": 13941 }, { "epoch": 0.5455043430628375, "grad_norm": 0.0, "learning_rate": 9.013799003720725e-06, "loss": 1.1006, "step": 13942 }, { "epoch": 0.545543469755067, "grad_norm": 0.0, "learning_rate": 9.012537958083222e-06, "loss": 1.0466, "step": 13943 }, { "epoch": 0.5455825964472963, "grad_norm": 0.0, "learning_rate": 9.011276928303116e-06, "loss": 1.1091, "step": 13944 }, { "epoch": 0.5456217231395258, "grad_norm": 0.0, "learning_rate": 9.010015914400669e-06, "loss": 1.031, "step": 13945 }, { "epoch": 0.5456608498317552, "grad_norm": 0.0, "learning_rate": 9.008754916396125e-06, "loss": 1.1178, "step": 13946 }, { "epoch": 0.5456999765239846, "grad_norm": 0.0, "learning_rate": 9.007493934309737e-06, "loss": 0.9441, "step": 13947 }, { "epoch": 0.5457391032162141, "grad_norm": 0.0, "learning_rate": 9.006232968161745e-06, "loss": 1.1081, "step": 13948 }, { "epoch": 0.5457782299084435, "grad_norm": 0.0, "learning_rate": 9.004972017972414e-06, "loss": 1.0279, "step": 13949 }, { "epoch": 0.545817356600673, "grad_norm": 0.0, "learning_rate": 9.003711083761984e-06, "loss": 1.0737, "step": 13950 }, { "epoch": 0.5458564832929024, "grad_norm": 0.0, "learning_rate": 9.002450165550705e-06, "loss": 0.9675, "step": 13951 }, { "epoch": 0.5458956099851319, "grad_norm": 0.0, "learning_rate": 9.001189263358828e-06, "loss": 0.9937, "step": 13952 }, { "epoch": 0.5459347366773613, "grad_norm": 0.0, "learning_rate": 8.999928377206594e-06, "loss": 0.9131, "step": 13953 }, { "epoch": 0.5459738633695908, "grad_norm": 0.0, "learning_rate": 8.998667507114262e-06, "loss": 1.0584, "step": 13954 }, { "epoch": 0.5460129900618201, "grad_norm": 0.0, "learning_rate": 8.997406653102075e-06, "loss": 0.937, "step": 13955 }, { "epoch": 0.5460521167540496, "grad_norm": 0.0, "learning_rate": 8.99614581519028e-06, "loss": 1.0851, "step": 13956 }, { "epoch": 0.546091243446279, "grad_norm": 0.0, "learning_rate": 8.994884993399125e-06, "loss": 1.008, "step": 13957 }, { "epoch": 0.5461303701385085, "grad_norm": 0.0, "learning_rate": 8.993624187748858e-06, "loss": 1.0831, "step": 13958 }, { "epoch": 0.5461694968307379, "grad_norm": 0.0, "learning_rate": 8.992363398259724e-06, "loss": 1.1571, "step": 13959 }, { "epoch": 0.5462086235229674, "grad_norm": 0.0, "learning_rate": 8.991102624951972e-06, "loss": 0.939, "step": 13960 }, { "epoch": 0.5462477502151968, "grad_norm": 0.0, "learning_rate": 8.98984186784585e-06, "loss": 1.1198, "step": 13961 }, { "epoch": 0.5462868769074263, "grad_norm": 0.0, "learning_rate": 8.9885811269616e-06, "loss": 1.0524, "step": 13962 }, { "epoch": 0.5463260035996557, "grad_norm": 0.0, "learning_rate": 8.987320402319468e-06, "loss": 1.0292, "step": 13963 }, { "epoch": 0.5463651302918852, "grad_norm": 0.0, "learning_rate": 8.986059693939706e-06, "loss": 0.9425, "step": 13964 }, { "epoch": 0.5464042569841145, "grad_norm": 0.0, "learning_rate": 8.984799001842549e-06, "loss": 0.9934, "step": 13965 }, { "epoch": 0.546443383676344, "grad_norm": 0.0, "learning_rate": 8.983538326048249e-06, "loss": 1.0714, "step": 13966 }, { "epoch": 0.5464825103685734, "grad_norm": 0.0, "learning_rate": 8.982277666577053e-06, "loss": 0.9616, "step": 13967 }, { "epoch": 0.5465216370608029, "grad_norm": 0.0, "learning_rate": 8.981017023449202e-06, "loss": 1.0385, "step": 13968 }, { "epoch": 0.5465607637530323, "grad_norm": 0.0, "learning_rate": 8.97975639668494e-06, "loss": 1.0858, "step": 13969 }, { "epoch": 0.5465998904452618, "grad_norm": 0.0, "learning_rate": 8.978495786304507e-06, "loss": 0.9478, "step": 13970 }, { "epoch": 0.5466390171374912, "grad_norm": 0.0, "learning_rate": 8.977235192328158e-06, "loss": 1.205, "step": 13971 }, { "epoch": 0.5466781438297207, "grad_norm": 0.0, "learning_rate": 8.97597461477613e-06, "loss": 1.1362, "step": 13972 }, { "epoch": 0.5467172705219501, "grad_norm": 0.0, "learning_rate": 8.974714053668665e-06, "loss": 0.9889, "step": 13973 }, { "epoch": 0.5467563972141796, "grad_norm": 0.0, "learning_rate": 8.973453509026008e-06, "loss": 1.2443, "step": 13974 }, { "epoch": 0.546795523906409, "grad_norm": 0.0, "learning_rate": 8.972192980868397e-06, "loss": 0.9931, "step": 13975 }, { "epoch": 0.5468346505986383, "grad_norm": 0.0, "learning_rate": 8.970932469216083e-06, "loss": 0.833, "step": 13976 }, { "epoch": 0.5468737772908678, "grad_norm": 0.0, "learning_rate": 8.969671974089304e-06, "loss": 0.9948, "step": 13977 }, { "epoch": 0.5469129039830972, "grad_norm": 0.0, "learning_rate": 8.968411495508303e-06, "loss": 1.0385, "step": 13978 }, { "epoch": 0.5469520306753267, "grad_norm": 0.0, "learning_rate": 8.967151033493315e-06, "loss": 1.1031, "step": 13979 }, { "epoch": 0.5469911573675561, "grad_norm": 0.0, "learning_rate": 8.965890588064593e-06, "loss": 1.0814, "step": 13980 }, { "epoch": 0.5470302840597856, "grad_norm": 0.0, "learning_rate": 8.964630159242373e-06, "loss": 1.0807, "step": 13981 }, { "epoch": 0.547069410752015, "grad_norm": 0.0, "learning_rate": 8.963369747046893e-06, "loss": 0.9818, "step": 13982 }, { "epoch": 0.5471085374442445, "grad_norm": 0.0, "learning_rate": 8.962109351498397e-06, "loss": 1.0367, "step": 13983 }, { "epoch": 0.5471476641364739, "grad_norm": 0.0, "learning_rate": 8.960848972617119e-06, "loss": 0.9427, "step": 13984 }, { "epoch": 0.5471867908287034, "grad_norm": 0.0, "learning_rate": 8.95958861042331e-06, "loss": 0.9736, "step": 13985 }, { "epoch": 0.5472259175209327, "grad_norm": 0.0, "learning_rate": 8.958328264937203e-06, "loss": 1.0189, "step": 13986 }, { "epoch": 0.5472650442131622, "grad_norm": 0.0, "learning_rate": 8.95706793617904e-06, "loss": 1.1244, "step": 13987 }, { "epoch": 0.5473041709053916, "grad_norm": 0.0, "learning_rate": 8.955807624169054e-06, "loss": 0.9852, "step": 13988 }, { "epoch": 0.5473432975976211, "grad_norm": 0.0, "learning_rate": 8.954547328927494e-06, "loss": 1.0691, "step": 13989 }, { "epoch": 0.5473824242898505, "grad_norm": 0.0, "learning_rate": 8.953287050474592e-06, "loss": 1.0767, "step": 13990 }, { "epoch": 0.54742155098208, "grad_norm": 0.0, "learning_rate": 8.95202678883059e-06, "loss": 1.0398, "step": 13991 }, { "epoch": 0.5474606776743094, "grad_norm": 0.0, "learning_rate": 8.950766544015726e-06, "loss": 0.8929, "step": 13992 }, { "epoch": 0.5474998043665389, "grad_norm": 0.0, "learning_rate": 8.94950631605023e-06, "loss": 1.044, "step": 13993 }, { "epoch": 0.5475389310587683, "grad_norm": 0.0, "learning_rate": 8.948246104954351e-06, "loss": 1.0642, "step": 13994 }, { "epoch": 0.5475780577509978, "grad_norm": 0.0, "learning_rate": 8.946985910748322e-06, "loss": 1.0749, "step": 13995 }, { "epoch": 0.5476171844432272, "grad_norm": 0.0, "learning_rate": 8.94572573345238e-06, "loss": 1.0612, "step": 13996 }, { "epoch": 0.5476563111354567, "grad_norm": 0.0, "learning_rate": 8.944465573086757e-06, "loss": 1.232, "step": 13997 }, { "epoch": 0.547695437827686, "grad_norm": 0.0, "learning_rate": 8.943205429671697e-06, "loss": 1.0278, "step": 13998 }, { "epoch": 0.5477345645199155, "grad_norm": 0.0, "learning_rate": 8.941945303227436e-06, "loss": 1.1161, "step": 13999 }, { "epoch": 0.5477736912121449, "grad_norm": 0.0, "learning_rate": 8.940685193774207e-06, "loss": 1.0292, "step": 14000 }, { "epoch": 0.5478128179043744, "grad_norm": 0.0, "learning_rate": 8.939425101332245e-06, "loss": 1.0168, "step": 14001 }, { "epoch": 0.5478519445966038, "grad_norm": 0.0, "learning_rate": 8.938165025921786e-06, "loss": 1.0742, "step": 14002 }, { "epoch": 0.5478910712888332, "grad_norm": 0.0, "learning_rate": 8.936904967563066e-06, "loss": 1.1814, "step": 14003 }, { "epoch": 0.5479301979810627, "grad_norm": 0.0, "learning_rate": 8.935644926276322e-06, "loss": 1.0023, "step": 14004 }, { "epoch": 0.5479693246732921, "grad_norm": 0.0, "learning_rate": 8.934384902081788e-06, "loss": 0.9787, "step": 14005 }, { "epoch": 0.5480084513655216, "grad_norm": 0.0, "learning_rate": 8.933124894999693e-06, "loss": 1.0224, "step": 14006 }, { "epoch": 0.548047578057751, "grad_norm": 0.0, "learning_rate": 8.931864905050277e-06, "loss": 1.006, "step": 14007 }, { "epoch": 0.5480867047499804, "grad_norm": 0.0, "learning_rate": 8.930604932253776e-06, "loss": 1.0541, "step": 14008 }, { "epoch": 0.5481258314422098, "grad_norm": 0.0, "learning_rate": 8.929344976630414e-06, "loss": 1.0238, "step": 14009 }, { "epoch": 0.5481649581344393, "grad_norm": 0.0, "learning_rate": 8.928085038200433e-06, "loss": 1.051, "step": 14010 }, { "epoch": 0.5482040848266687, "grad_norm": 0.0, "learning_rate": 8.926825116984063e-06, "loss": 1.0793, "step": 14011 }, { "epoch": 0.5482432115188982, "grad_norm": 0.0, "learning_rate": 8.925565213001536e-06, "loss": 1.075, "step": 14012 }, { "epoch": 0.5482823382111276, "grad_norm": 0.0, "learning_rate": 8.924305326273087e-06, "loss": 1.1647, "step": 14013 }, { "epoch": 0.5483214649033571, "grad_norm": 0.0, "learning_rate": 8.923045456818947e-06, "loss": 1.0405, "step": 14014 }, { "epoch": 0.5483605915955865, "grad_norm": 0.0, "learning_rate": 8.921785604659342e-06, "loss": 1.0606, "step": 14015 }, { "epoch": 0.548399718287816, "grad_norm": 0.0, "learning_rate": 8.920525769814514e-06, "loss": 1.0524, "step": 14016 }, { "epoch": 0.5484388449800454, "grad_norm": 0.0, "learning_rate": 8.91926595230469e-06, "loss": 1.1039, "step": 14017 }, { "epoch": 0.5484779716722749, "grad_norm": 0.0, "learning_rate": 8.918006152150099e-06, "loss": 0.9402, "step": 14018 }, { "epoch": 0.5485170983645042, "grad_norm": 0.0, "learning_rate": 8.916746369370975e-06, "loss": 1.0022, "step": 14019 }, { "epoch": 0.5485562250567337, "grad_norm": 0.0, "learning_rate": 8.91548660398754e-06, "loss": 1.0623, "step": 14020 }, { "epoch": 0.5485953517489631, "grad_norm": 0.0, "learning_rate": 8.91422685602004e-06, "loss": 1.0594, "step": 14021 }, { "epoch": 0.5486344784411926, "grad_norm": 0.0, "learning_rate": 8.912967125488692e-06, "loss": 1.1069, "step": 14022 }, { "epoch": 0.548673605133422, "grad_norm": 0.0, "learning_rate": 8.911707412413732e-06, "loss": 1.0364, "step": 14023 }, { "epoch": 0.5487127318256515, "grad_norm": 0.0, "learning_rate": 8.910447716815383e-06, "loss": 1.1122, "step": 14024 }, { "epoch": 0.5487518585178809, "grad_norm": 0.0, "learning_rate": 8.909188038713881e-06, "loss": 1.0823, "step": 14025 }, { "epoch": 0.5487909852101104, "grad_norm": 0.0, "learning_rate": 8.907928378129453e-06, "loss": 1.025, "step": 14026 }, { "epoch": 0.5488301119023398, "grad_norm": 0.0, "learning_rate": 8.906668735082327e-06, "loss": 1.0514, "step": 14027 }, { "epoch": 0.5488692385945693, "grad_norm": 0.0, "learning_rate": 8.905409109592727e-06, "loss": 1.023, "step": 14028 }, { "epoch": 0.5489083652867986, "grad_norm": 0.0, "learning_rate": 8.904149501680888e-06, "loss": 1.0928, "step": 14029 }, { "epoch": 0.5489474919790281, "grad_norm": 0.0, "learning_rate": 8.902889911367038e-06, "loss": 1.1962, "step": 14030 }, { "epoch": 0.5489866186712575, "grad_norm": 0.0, "learning_rate": 8.9016303386714e-06, "loss": 1.0835, "step": 14031 }, { "epoch": 0.5490257453634869, "grad_norm": 0.0, "learning_rate": 8.900370783614201e-06, "loss": 0.9064, "step": 14032 }, { "epoch": 0.5490648720557164, "grad_norm": 0.0, "learning_rate": 8.899111246215665e-06, "loss": 0.9001, "step": 14033 }, { "epoch": 0.5491039987479458, "grad_norm": 0.0, "learning_rate": 8.897851726496028e-06, "loss": 1.0424, "step": 14034 }, { "epoch": 0.5491431254401753, "grad_norm": 0.0, "learning_rate": 8.89659222447551e-06, "loss": 1.0434, "step": 14035 }, { "epoch": 0.5491822521324047, "grad_norm": 0.0, "learning_rate": 8.89533274017434e-06, "loss": 0.9904, "step": 14036 }, { "epoch": 0.5492213788246342, "grad_norm": 0.0, "learning_rate": 8.894073273612738e-06, "loss": 0.9098, "step": 14037 }, { "epoch": 0.5492605055168636, "grad_norm": 0.0, "learning_rate": 8.892813824810936e-06, "loss": 0.9964, "step": 14038 }, { "epoch": 0.5492996322090931, "grad_norm": 0.0, "learning_rate": 8.891554393789157e-06, "loss": 0.9648, "step": 14039 }, { "epoch": 0.5493387589013224, "grad_norm": 0.0, "learning_rate": 8.890294980567626e-06, "loss": 1.0398, "step": 14040 }, { "epoch": 0.5493778855935519, "grad_norm": 0.0, "learning_rate": 8.889035585166567e-06, "loss": 1.0208, "step": 14041 }, { "epoch": 0.5494170122857813, "grad_norm": 0.0, "learning_rate": 8.8877762076062e-06, "loss": 1.0488, "step": 14042 }, { "epoch": 0.5494561389780108, "grad_norm": 0.0, "learning_rate": 8.886516847906757e-06, "loss": 1.0201, "step": 14043 }, { "epoch": 0.5494952656702402, "grad_norm": 0.0, "learning_rate": 8.885257506088459e-06, "loss": 0.9932, "step": 14044 }, { "epoch": 0.5495343923624697, "grad_norm": 0.0, "learning_rate": 8.88399818217153e-06, "loss": 1.0342, "step": 14045 }, { "epoch": 0.5495735190546991, "grad_norm": 0.0, "learning_rate": 8.882738876176188e-06, "loss": 1.1172, "step": 14046 }, { "epoch": 0.5496126457469286, "grad_norm": 0.0, "learning_rate": 8.881479588122662e-06, "loss": 1.0766, "step": 14047 }, { "epoch": 0.549651772439158, "grad_norm": 0.0, "learning_rate": 8.880220318031168e-06, "loss": 1.002, "step": 14048 }, { "epoch": 0.5496908991313875, "grad_norm": 0.0, "learning_rate": 8.878961065921937e-06, "loss": 1.1501, "step": 14049 }, { "epoch": 0.5497300258236169, "grad_norm": 0.0, "learning_rate": 8.877701831815186e-06, "loss": 1.081, "step": 14050 }, { "epoch": 0.5497691525158463, "grad_norm": 0.0, "learning_rate": 8.876442615731136e-06, "loss": 1.0541, "step": 14051 }, { "epoch": 0.5498082792080757, "grad_norm": 0.0, "learning_rate": 8.875183417690011e-06, "loss": 0.9972, "step": 14052 }, { "epoch": 0.5498474059003052, "grad_norm": 0.0, "learning_rate": 8.87392423771203e-06, "loss": 1.0917, "step": 14053 }, { "epoch": 0.5498865325925346, "grad_norm": 0.0, "learning_rate": 8.872665075817414e-06, "loss": 1.0477, "step": 14054 }, { "epoch": 0.5499256592847641, "grad_norm": 0.0, "learning_rate": 8.871405932026383e-06, "loss": 1.0081, "step": 14055 }, { "epoch": 0.5499647859769935, "grad_norm": 0.0, "learning_rate": 8.870146806359163e-06, "loss": 1.0796, "step": 14056 }, { "epoch": 0.550003912669223, "grad_norm": 0.0, "learning_rate": 8.868887698835968e-06, "loss": 1.0818, "step": 14057 }, { "epoch": 0.5500430393614524, "grad_norm": 0.0, "learning_rate": 8.867628609477019e-06, "loss": 1.1707, "step": 14058 }, { "epoch": 0.5500821660536819, "grad_norm": 0.0, "learning_rate": 8.866369538302535e-06, "loss": 1.0934, "step": 14059 }, { "epoch": 0.5501212927459113, "grad_norm": 0.0, "learning_rate": 8.865110485332731e-06, "loss": 1.0588, "step": 14060 }, { "epoch": 0.5501604194381406, "grad_norm": 0.0, "learning_rate": 8.863851450587837e-06, "loss": 1.0598, "step": 14061 }, { "epoch": 0.5501995461303701, "grad_norm": 0.0, "learning_rate": 8.862592434088063e-06, "loss": 0.9261, "step": 14062 }, { "epoch": 0.5502386728225995, "grad_norm": 0.0, "learning_rate": 8.86133343585363e-06, "loss": 1.0607, "step": 14063 }, { "epoch": 0.550277799514829, "grad_norm": 0.0, "learning_rate": 8.860074455904753e-06, "loss": 1.014, "step": 14064 }, { "epoch": 0.5503169262070584, "grad_norm": 0.0, "learning_rate": 8.858815494261653e-06, "loss": 1.1822, "step": 14065 }, { "epoch": 0.5503560528992879, "grad_norm": 0.0, "learning_rate": 8.857556550944548e-06, "loss": 1.0945, "step": 14066 }, { "epoch": 0.5503951795915173, "grad_norm": 0.0, "learning_rate": 8.856297625973652e-06, "loss": 1.0049, "step": 14067 }, { "epoch": 0.5504343062837468, "grad_norm": 0.0, "learning_rate": 8.855038719369185e-06, "loss": 0.9844, "step": 14068 }, { "epoch": 0.5504734329759762, "grad_norm": 0.0, "learning_rate": 8.853779831151355e-06, "loss": 0.9807, "step": 14069 }, { "epoch": 0.5505125596682057, "grad_norm": 0.0, "learning_rate": 8.852520961340389e-06, "loss": 1.0112, "step": 14070 }, { "epoch": 0.550551686360435, "grad_norm": 0.0, "learning_rate": 8.8512621099565e-06, "loss": 1.092, "step": 14071 }, { "epoch": 0.5505908130526646, "grad_norm": 0.0, "learning_rate": 8.850003277019901e-06, "loss": 1.0909, "step": 14072 }, { "epoch": 0.5506299397448939, "grad_norm": 0.0, "learning_rate": 8.848744462550804e-06, "loss": 0.8909, "step": 14073 }, { "epoch": 0.5506690664371234, "grad_norm": 0.0, "learning_rate": 8.847485666569434e-06, "loss": 1.1405, "step": 14074 }, { "epoch": 0.5507081931293528, "grad_norm": 0.0, "learning_rate": 8.846226889095998e-06, "loss": 1.0898, "step": 14075 }, { "epoch": 0.5507473198215823, "grad_norm": 0.0, "learning_rate": 8.844968130150714e-06, "loss": 0.9536, "step": 14076 }, { "epoch": 0.5507864465138117, "grad_norm": 0.0, "learning_rate": 8.843709389753792e-06, "loss": 0.9403, "step": 14077 }, { "epoch": 0.5508255732060412, "grad_norm": 0.0, "learning_rate": 8.842450667925446e-06, "loss": 0.9451, "step": 14078 }, { "epoch": 0.5508646998982706, "grad_norm": 0.0, "learning_rate": 8.841191964685896e-06, "loss": 0.983, "step": 14079 }, { "epoch": 0.5509038265905001, "grad_norm": 0.0, "learning_rate": 8.839933280055352e-06, "loss": 1.0579, "step": 14080 }, { "epoch": 0.5509429532827295, "grad_norm": 0.0, "learning_rate": 8.838674614054024e-06, "loss": 0.9144, "step": 14081 }, { "epoch": 0.550982079974959, "grad_norm": 0.0, "learning_rate": 8.837415966702123e-06, "loss": 1.0603, "step": 14082 }, { "epoch": 0.5510212066671883, "grad_norm": 0.0, "learning_rate": 8.836157338019869e-06, "loss": 1.0349, "step": 14083 }, { "epoch": 0.5510603333594178, "grad_norm": 0.0, "learning_rate": 8.83489872802747e-06, "loss": 1.0541, "step": 14084 }, { "epoch": 0.5510994600516472, "grad_norm": 0.0, "learning_rate": 8.833640136745137e-06, "loss": 1.0095, "step": 14085 }, { "epoch": 0.5511385867438767, "grad_norm": 0.0, "learning_rate": 8.83238156419308e-06, "loss": 1.2173, "step": 14086 }, { "epoch": 0.5511777134361061, "grad_norm": 0.0, "learning_rate": 8.83112301039151e-06, "loss": 0.9911, "step": 14087 }, { "epoch": 0.5512168401283356, "grad_norm": 0.0, "learning_rate": 8.829864475360646e-06, "loss": 1.14, "step": 14088 }, { "epoch": 0.551255966820565, "grad_norm": 0.0, "learning_rate": 8.82860595912069e-06, "loss": 1.1001, "step": 14089 }, { "epoch": 0.5512950935127944, "grad_norm": 0.0, "learning_rate": 8.827347461691853e-06, "loss": 1.1255, "step": 14090 }, { "epoch": 0.5513342202050239, "grad_norm": 0.0, "learning_rate": 8.826088983094347e-06, "loss": 0.9696, "step": 14091 }, { "epoch": 0.5513733468972533, "grad_norm": 0.0, "learning_rate": 8.824830523348383e-06, "loss": 1.0543, "step": 14092 }, { "epoch": 0.5514124735894828, "grad_norm": 0.0, "learning_rate": 8.823572082474165e-06, "loss": 0.9575, "step": 14093 }, { "epoch": 0.5514516002817121, "grad_norm": 0.0, "learning_rate": 8.822313660491905e-06, "loss": 1.0554, "step": 14094 }, { "epoch": 0.5514907269739416, "grad_norm": 0.0, "learning_rate": 8.821055257421813e-06, "loss": 1.0309, "step": 14095 }, { "epoch": 0.551529853666171, "grad_norm": 0.0, "learning_rate": 8.819796873284098e-06, "loss": 0.9818, "step": 14096 }, { "epoch": 0.5515689803584005, "grad_norm": 0.0, "learning_rate": 8.818538508098965e-06, "loss": 0.9736, "step": 14097 }, { "epoch": 0.5516081070506299, "grad_norm": 0.0, "learning_rate": 8.817280161886624e-06, "loss": 1.0132, "step": 14098 }, { "epoch": 0.5516472337428594, "grad_norm": 0.0, "learning_rate": 8.81602183466728e-06, "loss": 0.9031, "step": 14099 }, { "epoch": 0.5516863604350888, "grad_norm": 0.0, "learning_rate": 8.81476352646114e-06, "loss": 1.058, "step": 14100 }, { "epoch": 0.5517254871273183, "grad_norm": 0.0, "learning_rate": 8.813505237288416e-06, "loss": 1.009, "step": 14101 }, { "epoch": 0.5517646138195477, "grad_norm": 0.0, "learning_rate": 8.812246967169312e-06, "loss": 1.0047, "step": 14102 }, { "epoch": 0.5518037405117772, "grad_norm": 0.0, "learning_rate": 8.810988716124032e-06, "loss": 1.0386, "step": 14103 }, { "epoch": 0.5518428672040065, "grad_norm": 0.0, "learning_rate": 8.80973048417278e-06, "loss": 1.1073, "step": 14104 }, { "epoch": 0.551881993896236, "grad_norm": 0.0, "learning_rate": 8.808472271335767e-06, "loss": 1.0067, "step": 14105 }, { "epoch": 0.5519211205884654, "grad_norm": 0.0, "learning_rate": 8.807214077633198e-06, "loss": 1.1812, "step": 14106 }, { "epoch": 0.5519602472806949, "grad_norm": 0.0, "learning_rate": 8.805955903085277e-06, "loss": 1.1749, "step": 14107 }, { "epoch": 0.5519993739729243, "grad_norm": 0.0, "learning_rate": 8.804697747712206e-06, "loss": 0.9899, "step": 14108 }, { "epoch": 0.5520385006651538, "grad_norm": 0.0, "learning_rate": 8.803439611534187e-06, "loss": 1.0935, "step": 14109 }, { "epoch": 0.5520776273573832, "grad_norm": 0.0, "learning_rate": 8.802181494571435e-06, "loss": 1.1671, "step": 14110 }, { "epoch": 0.5521167540496127, "grad_norm": 0.0, "learning_rate": 8.800923396844147e-06, "loss": 1.1059, "step": 14111 }, { "epoch": 0.5521558807418421, "grad_norm": 0.0, "learning_rate": 8.799665318372526e-06, "loss": 1.1186, "step": 14112 }, { "epoch": 0.5521950074340716, "grad_norm": 0.0, "learning_rate": 8.79840725917677e-06, "loss": 0.8793, "step": 14113 }, { "epoch": 0.552234134126301, "grad_norm": 0.0, "learning_rate": 8.797149219277094e-06, "loss": 1.0879, "step": 14114 }, { "epoch": 0.5522732608185305, "grad_norm": 0.0, "learning_rate": 8.795891198693694e-06, "loss": 1.0959, "step": 14115 }, { "epoch": 0.5523123875107598, "grad_norm": 0.0, "learning_rate": 8.79463319744677e-06, "loss": 0.9131, "step": 14116 }, { "epoch": 0.5523515142029892, "grad_norm": 0.0, "learning_rate": 8.79337521555653e-06, "loss": 1.1047, "step": 14117 }, { "epoch": 0.5523906408952187, "grad_norm": 0.0, "learning_rate": 8.792117253043166e-06, "loss": 0.8925, "step": 14118 }, { "epoch": 0.5524297675874481, "grad_norm": 0.0, "learning_rate": 8.790859309926891e-06, "loss": 1.1133, "step": 14119 }, { "epoch": 0.5524688942796776, "grad_norm": 0.0, "learning_rate": 8.789601386227899e-06, "loss": 1.0943, "step": 14120 }, { "epoch": 0.552508020971907, "grad_norm": 0.0, "learning_rate": 8.788343481966393e-06, "loss": 0.9841, "step": 14121 }, { "epoch": 0.5525471476641365, "grad_norm": 0.0, "learning_rate": 8.787085597162568e-06, "loss": 1.005, "step": 14122 }, { "epoch": 0.5525862743563659, "grad_norm": 0.0, "learning_rate": 8.785827731836631e-06, "loss": 0.8887, "step": 14123 }, { "epoch": 0.5526254010485954, "grad_norm": 0.0, "learning_rate": 8.784569886008781e-06, "loss": 0.9661, "step": 14124 }, { "epoch": 0.5526645277408248, "grad_norm": 0.0, "learning_rate": 8.783312059699215e-06, "loss": 1.0652, "step": 14125 }, { "epoch": 0.5527036544330542, "grad_norm": 0.0, "learning_rate": 8.782054252928132e-06, "loss": 1.1127, "step": 14126 }, { "epoch": 0.5527427811252836, "grad_norm": 0.0, "learning_rate": 8.78079646571573e-06, "loss": 0.9003, "step": 14127 }, { "epoch": 0.5527819078175131, "grad_norm": 0.0, "learning_rate": 8.77953869808221e-06, "loss": 1.0944, "step": 14128 }, { "epoch": 0.5528210345097425, "grad_norm": 0.0, "learning_rate": 8.77828095004777e-06, "loss": 1.0978, "step": 14129 }, { "epoch": 0.552860161201972, "grad_norm": 0.0, "learning_rate": 8.777023221632609e-06, "loss": 1.1934, "step": 14130 }, { "epoch": 0.5528992878942014, "grad_norm": 0.0, "learning_rate": 8.775765512856919e-06, "loss": 0.9171, "step": 14131 }, { "epoch": 0.5529384145864309, "grad_norm": 0.0, "learning_rate": 8.7745078237409e-06, "loss": 1.1401, "step": 14132 }, { "epoch": 0.5529775412786603, "grad_norm": 0.0, "learning_rate": 8.773250154304754e-06, "loss": 0.9334, "step": 14133 }, { "epoch": 0.5530166679708898, "grad_norm": 0.0, "learning_rate": 8.77199250456867e-06, "loss": 1.0674, "step": 14134 }, { "epoch": 0.5530557946631192, "grad_norm": 0.0, "learning_rate": 8.770734874552851e-06, "loss": 1.0483, "step": 14135 }, { "epoch": 0.5530949213553487, "grad_norm": 0.0, "learning_rate": 8.769477264277485e-06, "loss": 1.2802, "step": 14136 }, { "epoch": 0.553134048047578, "grad_norm": 0.0, "learning_rate": 8.768219673762775e-06, "loss": 0.9799, "step": 14137 }, { "epoch": 0.5531731747398075, "grad_norm": 0.0, "learning_rate": 8.766962103028912e-06, "loss": 1.0598, "step": 14138 }, { "epoch": 0.5532123014320369, "grad_norm": 0.0, "learning_rate": 8.765704552096094e-06, "loss": 1.0288, "step": 14139 }, { "epoch": 0.5532514281242664, "grad_norm": 0.0, "learning_rate": 8.764447020984513e-06, "loss": 1.0553, "step": 14140 }, { "epoch": 0.5532905548164958, "grad_norm": 0.0, "learning_rate": 8.763189509714367e-06, "loss": 1.0483, "step": 14141 }, { "epoch": 0.5533296815087253, "grad_norm": 0.0, "learning_rate": 8.761932018305846e-06, "loss": 0.9946, "step": 14142 }, { "epoch": 0.5533688082009547, "grad_norm": 0.0, "learning_rate": 8.760674546779144e-06, "loss": 0.9258, "step": 14143 }, { "epoch": 0.5534079348931842, "grad_norm": 0.0, "learning_rate": 8.759417095154456e-06, "loss": 1.0838, "step": 14144 }, { "epoch": 0.5534470615854136, "grad_norm": 0.0, "learning_rate": 8.758159663451978e-06, "loss": 1.028, "step": 14145 }, { "epoch": 0.553486188277643, "grad_norm": 0.0, "learning_rate": 8.7569022516919e-06, "loss": 0.9513, "step": 14146 }, { "epoch": 0.5535253149698725, "grad_norm": 0.0, "learning_rate": 8.755644859894412e-06, "loss": 1.0444, "step": 14147 }, { "epoch": 0.5535644416621018, "grad_norm": 0.0, "learning_rate": 8.754387488079708e-06, "loss": 1.0585, "step": 14148 }, { "epoch": 0.5536035683543313, "grad_norm": 0.0, "learning_rate": 8.753130136267979e-06, "loss": 1.0353, "step": 14149 }, { "epoch": 0.5536426950465607, "grad_norm": 0.0, "learning_rate": 8.751872804479418e-06, "loss": 1.0261, "step": 14150 }, { "epoch": 0.5536818217387902, "grad_norm": 0.0, "learning_rate": 8.750615492734219e-06, "loss": 1.1213, "step": 14151 }, { "epoch": 0.5537209484310196, "grad_norm": 0.0, "learning_rate": 8.749358201052568e-06, "loss": 0.9806, "step": 14152 }, { "epoch": 0.5537600751232491, "grad_norm": 0.0, "learning_rate": 8.748100929454652e-06, "loss": 1.1255, "step": 14153 }, { "epoch": 0.5537992018154785, "grad_norm": 0.0, "learning_rate": 8.746843677960673e-06, "loss": 1.0076, "step": 14154 }, { "epoch": 0.553838328507708, "grad_norm": 0.0, "learning_rate": 8.745586446590813e-06, "loss": 0.9826, "step": 14155 }, { "epoch": 0.5538774551999374, "grad_norm": 0.0, "learning_rate": 8.744329235365263e-06, "loss": 1.0724, "step": 14156 }, { "epoch": 0.5539165818921669, "grad_norm": 0.0, "learning_rate": 8.743072044304212e-06, "loss": 1.0153, "step": 14157 }, { "epoch": 0.5539557085843962, "grad_norm": 0.0, "learning_rate": 8.741814873427844e-06, "loss": 1.0083, "step": 14158 }, { "epoch": 0.5539948352766257, "grad_norm": 0.0, "learning_rate": 8.740557722756358e-06, "loss": 1.1329, "step": 14159 }, { "epoch": 0.5540339619688551, "grad_norm": 0.0, "learning_rate": 8.739300592309934e-06, "loss": 1.0607, "step": 14160 }, { "epoch": 0.5540730886610846, "grad_norm": 0.0, "learning_rate": 8.738043482108767e-06, "loss": 0.9339, "step": 14161 }, { "epoch": 0.554112215353314, "grad_norm": 0.0, "learning_rate": 8.736786392173033e-06, "loss": 0.9541, "step": 14162 }, { "epoch": 0.5541513420455435, "grad_norm": 0.0, "learning_rate": 8.735529322522932e-06, "loss": 0.9958, "step": 14163 }, { "epoch": 0.5541904687377729, "grad_norm": 0.0, "learning_rate": 8.734272273178646e-06, "loss": 0.9473, "step": 14164 }, { "epoch": 0.5542295954300024, "grad_norm": 0.0, "learning_rate": 8.73301524416036e-06, "loss": 1.076, "step": 14165 }, { "epoch": 0.5542687221222318, "grad_norm": 0.0, "learning_rate": 8.731758235488263e-06, "loss": 0.8716, "step": 14166 }, { "epoch": 0.5543078488144613, "grad_norm": 0.0, "learning_rate": 8.730501247182535e-06, "loss": 1.0671, "step": 14167 }, { "epoch": 0.5543469755066907, "grad_norm": 0.0, "learning_rate": 8.72924427926337e-06, "loss": 1.0364, "step": 14168 }, { "epoch": 0.5543861021989201, "grad_norm": 0.0, "learning_rate": 8.727987331750948e-06, "loss": 1.0105, "step": 14169 }, { "epoch": 0.5544252288911495, "grad_norm": 0.0, "learning_rate": 8.726730404665458e-06, "loss": 1.0177, "step": 14170 }, { "epoch": 0.554464355583379, "grad_norm": 0.0, "learning_rate": 8.725473498027078e-06, "loss": 1.0448, "step": 14171 }, { "epoch": 0.5545034822756084, "grad_norm": 0.0, "learning_rate": 8.724216611855998e-06, "loss": 1.0278, "step": 14172 }, { "epoch": 0.5545426089678379, "grad_norm": 0.0, "learning_rate": 8.722959746172403e-06, "loss": 0.9863, "step": 14173 }, { "epoch": 0.5545817356600673, "grad_norm": 0.0, "learning_rate": 8.721702900996473e-06, "loss": 1.0102, "step": 14174 }, { "epoch": 0.5546208623522967, "grad_norm": 0.0, "learning_rate": 8.720446076348395e-06, "loss": 1.0319, "step": 14175 }, { "epoch": 0.5546599890445262, "grad_norm": 0.0, "learning_rate": 8.719189272248344e-06, "loss": 1.0919, "step": 14176 }, { "epoch": 0.5546991157367556, "grad_norm": 0.0, "learning_rate": 8.717932488716512e-06, "loss": 0.8966, "step": 14177 }, { "epoch": 0.5547382424289851, "grad_norm": 0.0, "learning_rate": 8.716675725773077e-06, "loss": 0.8908, "step": 14178 }, { "epoch": 0.5547773691212144, "grad_norm": 0.0, "learning_rate": 8.715418983438222e-06, "loss": 0.9846, "step": 14179 }, { "epoch": 0.5548164958134439, "grad_norm": 0.0, "learning_rate": 8.714162261732128e-06, "loss": 1.0683, "step": 14180 }, { "epoch": 0.5548556225056733, "grad_norm": 0.0, "learning_rate": 8.712905560674978e-06, "loss": 0.9637, "step": 14181 }, { "epoch": 0.5548947491979028, "grad_norm": 0.0, "learning_rate": 8.711648880286949e-06, "loss": 1.077, "step": 14182 }, { "epoch": 0.5549338758901322, "grad_norm": 0.0, "learning_rate": 8.710392220588229e-06, "loss": 0.8438, "step": 14183 }, { "epoch": 0.5549730025823617, "grad_norm": 0.0, "learning_rate": 8.709135581598992e-06, "loss": 1.0508, "step": 14184 }, { "epoch": 0.5550121292745911, "grad_norm": 0.0, "learning_rate": 8.707878963339417e-06, "loss": 1.073, "step": 14185 }, { "epoch": 0.5550512559668206, "grad_norm": 0.0, "learning_rate": 8.70662236582969e-06, "loss": 1.0164, "step": 14186 }, { "epoch": 0.55509038265905, "grad_norm": 0.0, "learning_rate": 8.705365789089989e-06, "loss": 1.048, "step": 14187 }, { "epoch": 0.5551295093512795, "grad_norm": 0.0, "learning_rate": 8.704109233140487e-06, "loss": 1.1735, "step": 14188 }, { "epoch": 0.5551686360435089, "grad_norm": 0.0, "learning_rate": 8.702852698001367e-06, "loss": 0.968, "step": 14189 }, { "epoch": 0.5552077627357384, "grad_norm": 0.0, "learning_rate": 8.70159618369281e-06, "loss": 1.0692, "step": 14190 }, { "epoch": 0.5552468894279677, "grad_norm": 0.0, "learning_rate": 8.70033969023499e-06, "loss": 1.0731, "step": 14191 }, { "epoch": 0.5552860161201972, "grad_norm": 0.0, "learning_rate": 8.699083217648088e-06, "loss": 0.9432, "step": 14192 }, { "epoch": 0.5553251428124266, "grad_norm": 0.0, "learning_rate": 8.697826765952278e-06, "loss": 1.1046, "step": 14193 }, { "epoch": 0.5553642695046561, "grad_norm": 0.0, "learning_rate": 8.696570335167734e-06, "loss": 1.0015, "step": 14194 }, { "epoch": 0.5554033961968855, "grad_norm": 0.0, "learning_rate": 8.695313925314643e-06, "loss": 1.0477, "step": 14195 }, { "epoch": 0.555442522889115, "grad_norm": 0.0, "learning_rate": 8.694057536413175e-06, "loss": 0.9739, "step": 14196 }, { "epoch": 0.5554816495813444, "grad_norm": 0.0, "learning_rate": 8.692801168483505e-06, "loss": 1.1174, "step": 14197 }, { "epoch": 0.5555207762735739, "grad_norm": 0.0, "learning_rate": 8.691544821545807e-06, "loss": 1.1335, "step": 14198 }, { "epoch": 0.5555599029658033, "grad_norm": 0.0, "learning_rate": 8.690288495620263e-06, "loss": 1.1272, "step": 14199 }, { "epoch": 0.5555990296580328, "grad_norm": 0.0, "learning_rate": 8.689032190727046e-06, "loss": 1.0729, "step": 14200 }, { "epoch": 0.5556381563502621, "grad_norm": 0.0, "learning_rate": 8.687775906886328e-06, "loss": 1.1806, "step": 14201 }, { "epoch": 0.5556772830424916, "grad_norm": 0.0, "learning_rate": 8.68651964411828e-06, "loss": 1.193, "step": 14202 }, { "epoch": 0.555716409734721, "grad_norm": 0.0, "learning_rate": 8.685263402443087e-06, "loss": 1.1261, "step": 14203 }, { "epoch": 0.5557555364269504, "grad_norm": 0.0, "learning_rate": 8.684007181880914e-06, "loss": 0.9805, "step": 14204 }, { "epoch": 0.5557946631191799, "grad_norm": 0.0, "learning_rate": 8.682750982451939e-06, "loss": 0.956, "step": 14205 }, { "epoch": 0.5558337898114093, "grad_norm": 0.0, "learning_rate": 8.681494804176331e-06, "loss": 1.0879, "step": 14206 }, { "epoch": 0.5558729165036388, "grad_norm": 0.0, "learning_rate": 8.68023864707426e-06, "loss": 1.0683, "step": 14207 }, { "epoch": 0.5559120431958682, "grad_norm": 0.0, "learning_rate": 8.678982511165908e-06, "loss": 1.0555, "step": 14208 }, { "epoch": 0.5559511698880977, "grad_norm": 0.0, "learning_rate": 8.67772639647144e-06, "loss": 1.1007, "step": 14209 }, { "epoch": 0.5559902965803271, "grad_norm": 0.0, "learning_rate": 8.676470303011031e-06, "loss": 1.0076, "step": 14210 }, { "epoch": 0.5560294232725566, "grad_norm": 0.0, "learning_rate": 8.675214230804844e-06, "loss": 0.9379, "step": 14211 }, { "epoch": 0.5560685499647859, "grad_norm": 0.0, "learning_rate": 8.673958179873063e-06, "loss": 1.0742, "step": 14212 }, { "epoch": 0.5561076766570154, "grad_norm": 0.0, "learning_rate": 8.67270215023585e-06, "loss": 1.1001, "step": 14213 }, { "epoch": 0.5561468033492448, "grad_norm": 0.0, "learning_rate": 8.671446141913378e-06, "loss": 0.9689, "step": 14214 }, { "epoch": 0.5561859300414743, "grad_norm": 0.0, "learning_rate": 8.670190154925816e-06, "loss": 0.91, "step": 14215 }, { "epoch": 0.5562250567337037, "grad_norm": 0.0, "learning_rate": 8.66893418929333e-06, "loss": 1.2691, "step": 14216 }, { "epoch": 0.5562641834259332, "grad_norm": 0.0, "learning_rate": 8.667678245036098e-06, "loss": 0.9892, "step": 14217 }, { "epoch": 0.5563033101181626, "grad_norm": 0.0, "learning_rate": 8.666422322174281e-06, "loss": 1.099, "step": 14218 }, { "epoch": 0.5563424368103921, "grad_norm": 0.0, "learning_rate": 8.665166420728053e-06, "loss": 1.0594, "step": 14219 }, { "epoch": 0.5563815635026215, "grad_norm": 0.0, "learning_rate": 8.663910540717577e-06, "loss": 0.9984, "step": 14220 }, { "epoch": 0.556420690194851, "grad_norm": 0.0, "learning_rate": 8.662654682163022e-06, "loss": 1.0406, "step": 14221 }, { "epoch": 0.5564598168870803, "grad_norm": 0.0, "learning_rate": 8.661398845084562e-06, "loss": 1.0535, "step": 14222 }, { "epoch": 0.5564989435793098, "grad_norm": 0.0, "learning_rate": 8.660143029502359e-06, "loss": 1.0245, "step": 14223 }, { "epoch": 0.5565380702715392, "grad_norm": 0.0, "learning_rate": 8.658887235436577e-06, "loss": 0.9255, "step": 14224 }, { "epoch": 0.5565771969637687, "grad_norm": 0.0, "learning_rate": 8.657631462907385e-06, "loss": 1.0707, "step": 14225 }, { "epoch": 0.5566163236559981, "grad_norm": 0.0, "learning_rate": 8.656375711934952e-06, "loss": 0.9704, "step": 14226 }, { "epoch": 0.5566554503482276, "grad_norm": 0.0, "learning_rate": 8.65511998253944e-06, "loss": 1.0281, "step": 14227 }, { "epoch": 0.556694577040457, "grad_norm": 0.0, "learning_rate": 8.653864274741017e-06, "loss": 0.8887, "step": 14228 }, { "epoch": 0.5567337037326865, "grad_norm": 0.0, "learning_rate": 8.652608588559845e-06, "loss": 1.0106, "step": 14229 }, { "epoch": 0.5567728304249159, "grad_norm": 0.0, "learning_rate": 8.651352924016093e-06, "loss": 1.0158, "step": 14230 }, { "epoch": 0.5568119571171453, "grad_norm": 0.0, "learning_rate": 8.650097281129922e-06, "loss": 1.0491, "step": 14231 }, { "epoch": 0.5568510838093748, "grad_norm": 0.0, "learning_rate": 8.648841659921499e-06, "loss": 0.9177, "step": 14232 }, { "epoch": 0.5568902105016041, "grad_norm": 0.0, "learning_rate": 8.647586060410981e-06, "loss": 0.9804, "step": 14233 }, { "epoch": 0.5569293371938336, "grad_norm": 0.0, "learning_rate": 8.646330482618539e-06, "loss": 1.1868, "step": 14234 }, { "epoch": 0.556968463886063, "grad_norm": 0.0, "learning_rate": 8.645074926564334e-06, "loss": 1.0065, "step": 14235 }, { "epoch": 0.5570075905782925, "grad_norm": 0.0, "learning_rate": 8.643819392268527e-06, "loss": 1.0844, "step": 14236 }, { "epoch": 0.5570467172705219, "grad_norm": 0.0, "learning_rate": 8.642563879751283e-06, "loss": 0.9967, "step": 14237 }, { "epoch": 0.5570858439627514, "grad_norm": 0.0, "learning_rate": 8.641308389032758e-06, "loss": 1.0937, "step": 14238 }, { "epoch": 0.5571249706549808, "grad_norm": 0.0, "learning_rate": 8.640052920133121e-06, "loss": 0.8242, "step": 14239 }, { "epoch": 0.5571640973472103, "grad_norm": 0.0, "learning_rate": 8.63879747307253e-06, "loss": 1.0317, "step": 14240 }, { "epoch": 0.5572032240394397, "grad_norm": 0.0, "learning_rate": 8.637542047871146e-06, "loss": 1.0528, "step": 14241 }, { "epoch": 0.5572423507316692, "grad_norm": 0.0, "learning_rate": 8.636286644549129e-06, "loss": 1.0027, "step": 14242 }, { "epoch": 0.5572814774238986, "grad_norm": 0.0, "learning_rate": 8.635031263126636e-06, "loss": 1.0286, "step": 14243 }, { "epoch": 0.557320604116128, "grad_norm": 0.0, "learning_rate": 8.633775903623834e-06, "loss": 1.0751, "step": 14244 }, { "epoch": 0.5573597308083574, "grad_norm": 0.0, "learning_rate": 8.63252056606088e-06, "loss": 1.0457, "step": 14245 }, { "epoch": 0.5573988575005869, "grad_norm": 0.0, "learning_rate": 8.631265250457931e-06, "loss": 1.0201, "step": 14246 }, { "epoch": 0.5574379841928163, "grad_norm": 0.0, "learning_rate": 8.630009956835144e-06, "loss": 1.0656, "step": 14247 }, { "epoch": 0.5574771108850458, "grad_norm": 0.0, "learning_rate": 8.628754685212685e-06, "loss": 1.0587, "step": 14248 }, { "epoch": 0.5575162375772752, "grad_norm": 0.0, "learning_rate": 8.627499435610707e-06, "loss": 0.9998, "step": 14249 }, { "epoch": 0.5575553642695047, "grad_norm": 0.0, "learning_rate": 8.626244208049367e-06, "loss": 1.0048, "step": 14250 }, { "epoch": 0.5575944909617341, "grad_norm": 0.0, "learning_rate": 8.624989002548825e-06, "loss": 1.1704, "step": 14251 }, { "epoch": 0.5576336176539636, "grad_norm": 0.0, "learning_rate": 8.623733819129233e-06, "loss": 0.9976, "step": 14252 }, { "epoch": 0.557672744346193, "grad_norm": 0.0, "learning_rate": 8.622478657810753e-06, "loss": 0.9797, "step": 14253 }, { "epoch": 0.5577118710384225, "grad_norm": 0.0, "learning_rate": 8.621223518613541e-06, "loss": 1.0954, "step": 14254 }, { "epoch": 0.5577509977306518, "grad_norm": 0.0, "learning_rate": 8.619968401557752e-06, "loss": 1.0877, "step": 14255 }, { "epoch": 0.5577901244228813, "grad_norm": 0.0, "learning_rate": 8.618713306663537e-06, "loss": 0.9327, "step": 14256 }, { "epoch": 0.5578292511151107, "grad_norm": 0.0, "learning_rate": 8.617458233951058e-06, "loss": 1.0221, "step": 14257 }, { "epoch": 0.5578683778073402, "grad_norm": 0.0, "learning_rate": 8.616203183440469e-06, "loss": 0.9467, "step": 14258 }, { "epoch": 0.5579075044995696, "grad_norm": 0.0, "learning_rate": 8.614948155151924e-06, "loss": 0.9485, "step": 14259 }, { "epoch": 0.557946631191799, "grad_norm": 0.0, "learning_rate": 8.613693149105569e-06, "loss": 1.0164, "step": 14260 }, { "epoch": 0.5579857578840285, "grad_norm": 0.0, "learning_rate": 8.612438165321571e-06, "loss": 1.1205, "step": 14261 }, { "epoch": 0.5580248845762579, "grad_norm": 0.0, "learning_rate": 8.611183203820076e-06, "loss": 1.0929, "step": 14262 }, { "epoch": 0.5580640112684874, "grad_norm": 0.0, "learning_rate": 8.60992826462124e-06, "loss": 0.9688, "step": 14263 }, { "epoch": 0.5581031379607168, "grad_norm": 0.0, "learning_rate": 8.608673347745213e-06, "loss": 0.9634, "step": 14264 }, { "epoch": 0.5581422646529463, "grad_norm": 0.0, "learning_rate": 8.607418453212147e-06, "loss": 1.044, "step": 14265 }, { "epoch": 0.5581813913451756, "grad_norm": 0.0, "learning_rate": 8.606163581042196e-06, "loss": 0.9111, "step": 14266 }, { "epoch": 0.5582205180374051, "grad_norm": 0.0, "learning_rate": 8.604908731255512e-06, "loss": 1.1496, "step": 14267 }, { "epoch": 0.5582596447296345, "grad_norm": 0.0, "learning_rate": 8.603653903872246e-06, "loss": 1.0931, "step": 14268 }, { "epoch": 0.558298771421864, "grad_norm": 0.0, "learning_rate": 8.602399098912548e-06, "loss": 1.0375, "step": 14269 }, { "epoch": 0.5583378981140934, "grad_norm": 0.0, "learning_rate": 8.60114431639657e-06, "loss": 0.975, "step": 14270 }, { "epoch": 0.5583770248063229, "grad_norm": 0.0, "learning_rate": 8.599889556344462e-06, "loss": 0.9763, "step": 14271 }, { "epoch": 0.5584161514985523, "grad_norm": 0.0, "learning_rate": 8.598634818776371e-06, "loss": 0.9597, "step": 14272 }, { "epoch": 0.5584552781907818, "grad_norm": 0.0, "learning_rate": 8.59738010371245e-06, "loss": 1.0124, "step": 14273 }, { "epoch": 0.5584944048830112, "grad_norm": 0.0, "learning_rate": 8.596125411172846e-06, "loss": 0.8925, "step": 14274 }, { "epoch": 0.5585335315752407, "grad_norm": 0.0, "learning_rate": 8.594870741177713e-06, "loss": 0.9989, "step": 14275 }, { "epoch": 0.55857265826747, "grad_norm": 0.0, "learning_rate": 8.593616093747191e-06, "loss": 0.945, "step": 14276 }, { "epoch": 0.5586117849596995, "grad_norm": 0.0, "learning_rate": 8.592361468901432e-06, "loss": 1.06, "step": 14277 }, { "epoch": 0.5586509116519289, "grad_norm": 0.0, "learning_rate": 8.591106866660584e-06, "loss": 1.1763, "step": 14278 }, { "epoch": 0.5586900383441584, "grad_norm": 0.0, "learning_rate": 8.589852287044796e-06, "loss": 1.0659, "step": 14279 }, { "epoch": 0.5587291650363878, "grad_norm": 0.0, "learning_rate": 8.588597730074214e-06, "loss": 1.0352, "step": 14280 }, { "epoch": 0.5587682917286173, "grad_norm": 0.0, "learning_rate": 8.587343195768983e-06, "loss": 1.0565, "step": 14281 }, { "epoch": 0.5588074184208467, "grad_norm": 0.0, "learning_rate": 8.586088684149251e-06, "loss": 1.0417, "step": 14282 }, { "epoch": 0.5588465451130762, "grad_norm": 0.0, "learning_rate": 8.584834195235158e-06, "loss": 1.0191, "step": 14283 }, { "epoch": 0.5588856718053056, "grad_norm": 0.0, "learning_rate": 8.58357972904686e-06, "loss": 0.967, "step": 14284 }, { "epoch": 0.5589247984975351, "grad_norm": 0.0, "learning_rate": 8.582325285604494e-06, "loss": 1.0541, "step": 14285 }, { "epoch": 0.5589639251897645, "grad_norm": 0.0, "learning_rate": 8.58107086492821e-06, "loss": 1.0594, "step": 14286 }, { "epoch": 0.559003051881994, "grad_norm": 0.0, "learning_rate": 8.579816467038144e-06, "loss": 0.9733, "step": 14287 }, { "epoch": 0.5590421785742233, "grad_norm": 0.0, "learning_rate": 8.578562091954451e-06, "loss": 0.9545, "step": 14288 }, { "epoch": 0.5590813052664527, "grad_norm": 0.0, "learning_rate": 8.57730773969727e-06, "loss": 0.9301, "step": 14289 }, { "epoch": 0.5591204319586822, "grad_norm": 0.0, "learning_rate": 8.576053410286744e-06, "loss": 0.9515, "step": 14290 }, { "epoch": 0.5591595586509116, "grad_norm": 0.0, "learning_rate": 8.574799103743015e-06, "loss": 1.1573, "step": 14291 }, { "epoch": 0.5591986853431411, "grad_norm": 0.0, "learning_rate": 8.573544820086224e-06, "loss": 0.8688, "step": 14292 }, { "epoch": 0.5592378120353705, "grad_norm": 0.0, "learning_rate": 8.572290559336518e-06, "loss": 1.0422, "step": 14293 }, { "epoch": 0.5592769387276, "grad_norm": 0.0, "learning_rate": 8.571036321514039e-06, "loss": 1.0271, "step": 14294 }, { "epoch": 0.5593160654198294, "grad_norm": 0.0, "learning_rate": 8.569782106638923e-06, "loss": 1.0726, "step": 14295 }, { "epoch": 0.5593551921120589, "grad_norm": 0.0, "learning_rate": 8.568527914731312e-06, "loss": 1.0275, "step": 14296 }, { "epoch": 0.5593943188042882, "grad_norm": 0.0, "learning_rate": 8.567273745811353e-06, "loss": 1.0396, "step": 14297 }, { "epoch": 0.5594334454965177, "grad_norm": 0.0, "learning_rate": 8.566019599899182e-06, "loss": 1.0513, "step": 14298 }, { "epoch": 0.5594725721887471, "grad_norm": 0.0, "learning_rate": 8.564765477014938e-06, "loss": 1.0504, "step": 14299 }, { "epoch": 0.5595116988809766, "grad_norm": 0.0, "learning_rate": 8.563511377178764e-06, "loss": 0.9372, "step": 14300 }, { "epoch": 0.559550825573206, "grad_norm": 0.0, "learning_rate": 8.562257300410792e-06, "loss": 1.0527, "step": 14301 }, { "epoch": 0.5595899522654355, "grad_norm": 0.0, "learning_rate": 8.561003246731172e-06, "loss": 0.9477, "step": 14302 }, { "epoch": 0.5596290789576649, "grad_norm": 0.0, "learning_rate": 8.559749216160034e-06, "loss": 1.0129, "step": 14303 }, { "epoch": 0.5596682056498944, "grad_norm": 0.0, "learning_rate": 8.558495208717521e-06, "loss": 0.9782, "step": 14304 }, { "epoch": 0.5597073323421238, "grad_norm": 0.0, "learning_rate": 8.557241224423763e-06, "loss": 1.0777, "step": 14305 }, { "epoch": 0.5597464590343533, "grad_norm": 0.0, "learning_rate": 8.555987263298908e-06, "loss": 1.121, "step": 14306 }, { "epoch": 0.5597855857265827, "grad_norm": 0.0, "learning_rate": 8.554733325363088e-06, "loss": 0.9975, "step": 14307 }, { "epoch": 0.5598247124188122, "grad_norm": 0.0, "learning_rate": 8.553479410636441e-06, "loss": 1.0668, "step": 14308 }, { "epoch": 0.5598638391110415, "grad_norm": 0.0, "learning_rate": 8.5522255191391e-06, "loss": 1.0676, "step": 14309 }, { "epoch": 0.559902965803271, "grad_norm": 0.0, "learning_rate": 8.5509716508912e-06, "loss": 0.9653, "step": 14310 }, { "epoch": 0.5599420924955004, "grad_norm": 0.0, "learning_rate": 8.549717805912883e-06, "loss": 1.0878, "step": 14311 }, { "epoch": 0.5599812191877299, "grad_norm": 0.0, "learning_rate": 8.548463984224282e-06, "loss": 1.0572, "step": 14312 }, { "epoch": 0.5600203458799593, "grad_norm": 0.0, "learning_rate": 8.54721018584553e-06, "loss": 1.0021, "step": 14313 }, { "epoch": 0.5600594725721888, "grad_norm": 0.0, "learning_rate": 8.545956410796758e-06, "loss": 1.1065, "step": 14314 }, { "epoch": 0.5600985992644182, "grad_norm": 0.0, "learning_rate": 8.544702659098109e-06, "loss": 1.0273, "step": 14315 }, { "epoch": 0.5601377259566476, "grad_norm": 0.0, "learning_rate": 8.543448930769708e-06, "loss": 1.0272, "step": 14316 }, { "epoch": 0.5601768526488771, "grad_norm": 0.0, "learning_rate": 8.542195225831695e-06, "loss": 0.9725, "step": 14317 }, { "epoch": 0.5602159793411065, "grad_norm": 0.0, "learning_rate": 8.540941544304197e-06, "loss": 0.9758, "step": 14318 }, { "epoch": 0.560255106033336, "grad_norm": 0.0, "learning_rate": 8.539687886207352e-06, "loss": 1.0482, "step": 14319 }, { "epoch": 0.5602942327255653, "grad_norm": 0.0, "learning_rate": 8.53843425156129e-06, "loss": 1.0171, "step": 14320 }, { "epoch": 0.5603333594177948, "grad_norm": 0.0, "learning_rate": 8.537180640386141e-06, "loss": 1.1097, "step": 14321 }, { "epoch": 0.5603724861100242, "grad_norm": 0.0, "learning_rate": 8.535927052702037e-06, "loss": 1.0898, "step": 14322 }, { "epoch": 0.5604116128022537, "grad_norm": 0.0, "learning_rate": 8.53467348852911e-06, "loss": 1.028, "step": 14323 }, { "epoch": 0.5604507394944831, "grad_norm": 0.0, "learning_rate": 8.533419947887492e-06, "loss": 1.0284, "step": 14324 }, { "epoch": 0.5604898661867126, "grad_norm": 0.0, "learning_rate": 8.532166430797313e-06, "loss": 1.0928, "step": 14325 }, { "epoch": 0.560528992878942, "grad_norm": 0.0, "learning_rate": 8.530912937278702e-06, "loss": 1.1245, "step": 14326 }, { "epoch": 0.5605681195711715, "grad_norm": 0.0, "learning_rate": 8.529659467351782e-06, "loss": 1.0579, "step": 14327 }, { "epoch": 0.5606072462634009, "grad_norm": 0.0, "learning_rate": 8.528406021036694e-06, "loss": 1.0627, "step": 14328 }, { "epoch": 0.5606463729556304, "grad_norm": 0.0, "learning_rate": 8.527152598353561e-06, "loss": 1.0237, "step": 14329 }, { "epoch": 0.5606854996478597, "grad_norm": 0.0, "learning_rate": 8.525899199322512e-06, "loss": 1.0119, "step": 14330 }, { "epoch": 0.5607246263400892, "grad_norm": 0.0, "learning_rate": 8.524645823963676e-06, "loss": 1.0319, "step": 14331 }, { "epoch": 0.5607637530323186, "grad_norm": 0.0, "learning_rate": 8.523392472297174e-06, "loss": 0.9562, "step": 14332 }, { "epoch": 0.5608028797245481, "grad_norm": 0.0, "learning_rate": 8.522139144343143e-06, "loss": 1.0924, "step": 14333 }, { "epoch": 0.5608420064167775, "grad_norm": 0.0, "learning_rate": 8.520885840121705e-06, "loss": 1.0404, "step": 14334 }, { "epoch": 0.560881133109007, "grad_norm": 0.0, "learning_rate": 8.519632559652988e-06, "loss": 0.9724, "step": 14335 }, { "epoch": 0.5609202598012364, "grad_norm": 0.0, "learning_rate": 8.518379302957111e-06, "loss": 1.0334, "step": 14336 }, { "epoch": 0.5609593864934659, "grad_norm": 0.0, "learning_rate": 8.517126070054212e-06, "loss": 1.0748, "step": 14337 }, { "epoch": 0.5609985131856953, "grad_norm": 0.0, "learning_rate": 8.515872860964407e-06, "loss": 0.9958, "step": 14338 }, { "epoch": 0.5610376398779248, "grad_norm": 0.0, "learning_rate": 8.514619675707828e-06, "loss": 1.1555, "step": 14339 }, { "epoch": 0.5610767665701542, "grad_norm": 0.0, "learning_rate": 8.513366514304595e-06, "loss": 1.0065, "step": 14340 }, { "epoch": 0.5611158932623836, "grad_norm": 0.0, "learning_rate": 8.512113376774827e-06, "loss": 1.0795, "step": 14341 }, { "epoch": 0.561155019954613, "grad_norm": 0.0, "learning_rate": 8.510860263138658e-06, "loss": 0.9537, "step": 14342 }, { "epoch": 0.5611941466468425, "grad_norm": 0.0, "learning_rate": 8.509607173416208e-06, "loss": 0.9203, "step": 14343 }, { "epoch": 0.5612332733390719, "grad_norm": 0.0, "learning_rate": 8.5083541076276e-06, "loss": 0.9858, "step": 14344 }, { "epoch": 0.5612724000313013, "grad_norm": 0.0, "learning_rate": 8.50710106579295e-06, "loss": 1.1699, "step": 14345 }, { "epoch": 0.5613115267235308, "grad_norm": 0.0, "learning_rate": 8.505848047932392e-06, "loss": 1.0164, "step": 14346 }, { "epoch": 0.5613506534157602, "grad_norm": 0.0, "learning_rate": 8.50459505406604e-06, "loss": 1.153, "step": 14347 }, { "epoch": 0.5613897801079897, "grad_norm": 0.0, "learning_rate": 8.50334208421402e-06, "loss": 0.9422, "step": 14348 }, { "epoch": 0.5614289068002191, "grad_norm": 0.0, "learning_rate": 8.50208913839645e-06, "loss": 0.9223, "step": 14349 }, { "epoch": 0.5614680334924486, "grad_norm": 0.0, "learning_rate": 8.500836216633447e-06, "loss": 1.1452, "step": 14350 }, { "epoch": 0.561507160184678, "grad_norm": 0.0, "learning_rate": 8.49958331894514e-06, "loss": 0.9946, "step": 14351 }, { "epoch": 0.5615462868769074, "grad_norm": 0.0, "learning_rate": 8.498330445351643e-06, "loss": 1.0504, "step": 14352 }, { "epoch": 0.5615854135691368, "grad_norm": 0.0, "learning_rate": 8.49707759587308e-06, "loss": 1.1821, "step": 14353 }, { "epoch": 0.5616245402613663, "grad_norm": 0.0, "learning_rate": 8.495824770529565e-06, "loss": 1.0698, "step": 14354 }, { "epoch": 0.5616636669535957, "grad_norm": 0.0, "learning_rate": 8.494571969341219e-06, "loss": 1.0833, "step": 14355 }, { "epoch": 0.5617027936458252, "grad_norm": 0.0, "learning_rate": 8.493319192328162e-06, "loss": 1.011, "step": 14356 }, { "epoch": 0.5617419203380546, "grad_norm": 0.0, "learning_rate": 8.49206643951051e-06, "loss": 1.0599, "step": 14357 }, { "epoch": 0.5617810470302841, "grad_norm": 0.0, "learning_rate": 8.490813710908384e-06, "loss": 1.1204, "step": 14358 }, { "epoch": 0.5618201737225135, "grad_norm": 0.0, "learning_rate": 8.489561006541895e-06, "loss": 1.0614, "step": 14359 }, { "epoch": 0.561859300414743, "grad_norm": 0.0, "learning_rate": 8.488308326431166e-06, "loss": 1.0686, "step": 14360 }, { "epoch": 0.5618984271069724, "grad_norm": 0.0, "learning_rate": 8.48705567059631e-06, "loss": 1.2256, "step": 14361 }, { "epoch": 0.5619375537992018, "grad_norm": 0.0, "learning_rate": 8.485803039057446e-06, "loss": 1.0804, "step": 14362 }, { "epoch": 0.5619766804914312, "grad_norm": 0.0, "learning_rate": 8.484550431834684e-06, "loss": 0.9182, "step": 14363 }, { "epoch": 0.5620158071836607, "grad_norm": 0.0, "learning_rate": 8.483297848948147e-06, "loss": 1.0452, "step": 14364 }, { "epoch": 0.5620549338758901, "grad_norm": 0.0, "learning_rate": 8.482045290417946e-06, "loss": 1.0079, "step": 14365 }, { "epoch": 0.5620940605681196, "grad_norm": 0.0, "learning_rate": 8.480792756264194e-06, "loss": 0.9325, "step": 14366 }, { "epoch": 0.562133187260349, "grad_norm": 0.0, "learning_rate": 8.479540246507005e-06, "loss": 0.9969, "step": 14367 }, { "epoch": 0.5621723139525785, "grad_norm": 0.0, "learning_rate": 8.478287761166494e-06, "loss": 1.1315, "step": 14368 }, { "epoch": 0.5622114406448079, "grad_norm": 0.0, "learning_rate": 8.477035300262778e-06, "loss": 1.1556, "step": 14369 }, { "epoch": 0.5622505673370374, "grad_norm": 0.0, "learning_rate": 8.475782863815967e-06, "loss": 0.9642, "step": 14370 }, { "epoch": 0.5622896940292668, "grad_norm": 0.0, "learning_rate": 8.474530451846172e-06, "loss": 0.9632, "step": 14371 }, { "epoch": 0.5623288207214963, "grad_norm": 0.0, "learning_rate": 8.473278064373502e-06, "loss": 1.1002, "step": 14372 }, { "epoch": 0.5623679474137256, "grad_norm": 0.0, "learning_rate": 8.472025701418078e-06, "loss": 0.8677, "step": 14373 }, { "epoch": 0.562407074105955, "grad_norm": 0.0, "learning_rate": 8.470773363000006e-06, "loss": 0.9234, "step": 14374 }, { "epoch": 0.5624462007981845, "grad_norm": 0.0, "learning_rate": 8.4695210491394e-06, "loss": 1.1023, "step": 14375 }, { "epoch": 0.5624853274904139, "grad_norm": 0.0, "learning_rate": 8.468268759856361e-06, "loss": 1.009, "step": 14376 }, { "epoch": 0.5625244541826434, "grad_norm": 0.0, "learning_rate": 8.467016495171012e-06, "loss": 1.027, "step": 14377 }, { "epoch": 0.5625635808748728, "grad_norm": 0.0, "learning_rate": 8.465764255103457e-06, "loss": 1.1677, "step": 14378 }, { "epoch": 0.5626027075671023, "grad_norm": 0.0, "learning_rate": 8.464512039673806e-06, "loss": 0.9966, "step": 14379 }, { "epoch": 0.5626418342593317, "grad_norm": 0.0, "learning_rate": 8.463259848902166e-06, "loss": 1.0157, "step": 14380 }, { "epoch": 0.5626809609515612, "grad_norm": 0.0, "learning_rate": 8.462007682808645e-06, "loss": 1.0365, "step": 14381 }, { "epoch": 0.5627200876437906, "grad_norm": 0.0, "learning_rate": 8.460755541413355e-06, "loss": 1.0355, "step": 14382 }, { "epoch": 0.56275921433602, "grad_norm": 0.0, "learning_rate": 8.459503424736405e-06, "loss": 1.0767, "step": 14383 }, { "epoch": 0.5627983410282494, "grad_norm": 0.0, "learning_rate": 8.458251332797899e-06, "loss": 1.0564, "step": 14384 }, { "epoch": 0.5628374677204789, "grad_norm": 0.0, "learning_rate": 8.45699926561794e-06, "loss": 0.9964, "step": 14385 }, { "epoch": 0.5628765944127083, "grad_norm": 0.0, "learning_rate": 8.455747223216642e-06, "loss": 1.067, "step": 14386 }, { "epoch": 0.5629157211049378, "grad_norm": 0.0, "learning_rate": 8.45449520561411e-06, "loss": 0.9916, "step": 14387 }, { "epoch": 0.5629548477971672, "grad_norm": 0.0, "learning_rate": 8.453243212830448e-06, "loss": 0.9787, "step": 14388 }, { "epoch": 0.5629939744893967, "grad_norm": 0.0, "learning_rate": 8.451991244885763e-06, "loss": 1.1859, "step": 14389 }, { "epoch": 0.5630331011816261, "grad_norm": 0.0, "learning_rate": 8.450739301800153e-06, "loss": 1.0577, "step": 14390 }, { "epoch": 0.5630722278738556, "grad_norm": 0.0, "learning_rate": 8.449487383593734e-06, "loss": 1.0805, "step": 14391 }, { "epoch": 0.563111354566085, "grad_norm": 0.0, "learning_rate": 8.448235490286604e-06, "loss": 1.0423, "step": 14392 }, { "epoch": 0.5631504812583145, "grad_norm": 0.0, "learning_rate": 8.446983621898868e-06, "loss": 1.0499, "step": 14393 }, { "epoch": 0.5631896079505438, "grad_norm": 0.0, "learning_rate": 8.445731778450625e-06, "loss": 1.2064, "step": 14394 }, { "epoch": 0.5632287346427733, "grad_norm": 0.0, "learning_rate": 8.444479959961986e-06, "loss": 1.179, "step": 14395 }, { "epoch": 0.5632678613350027, "grad_norm": 0.0, "learning_rate": 8.443228166453049e-06, "loss": 0.9624, "step": 14396 }, { "epoch": 0.5633069880272322, "grad_norm": 0.0, "learning_rate": 8.441976397943918e-06, "loss": 1.0954, "step": 14397 }, { "epoch": 0.5633461147194616, "grad_norm": 0.0, "learning_rate": 8.440724654454693e-06, "loss": 1.0694, "step": 14398 }, { "epoch": 0.5633852414116911, "grad_norm": 0.0, "learning_rate": 8.439472936005474e-06, "loss": 1.0818, "step": 14399 }, { "epoch": 0.5634243681039205, "grad_norm": 0.0, "learning_rate": 8.438221242616364e-06, "loss": 1.0116, "step": 14400 }, { "epoch": 0.56346349479615, "grad_norm": 0.0, "learning_rate": 8.436969574307467e-06, "loss": 1.0218, "step": 14401 }, { "epoch": 0.5635026214883794, "grad_norm": 0.0, "learning_rate": 8.43571793109888e-06, "loss": 1.092, "step": 14402 }, { "epoch": 0.5635417481806088, "grad_norm": 0.0, "learning_rate": 8.4344663130107e-06, "loss": 1.0556, "step": 14403 }, { "epoch": 0.5635808748728383, "grad_norm": 0.0, "learning_rate": 8.43321472006303e-06, "loss": 0.9848, "step": 14404 }, { "epoch": 0.5636200015650676, "grad_norm": 0.0, "learning_rate": 8.43196315227597e-06, "loss": 0.9005, "step": 14405 }, { "epoch": 0.5636591282572971, "grad_norm": 0.0, "learning_rate": 8.430711609669615e-06, "loss": 1.0349, "step": 14406 }, { "epoch": 0.5636982549495265, "grad_norm": 0.0, "learning_rate": 8.429460092264067e-06, "loss": 0.9782, "step": 14407 }, { "epoch": 0.563737381641756, "grad_norm": 0.0, "learning_rate": 8.42820860007942e-06, "loss": 0.8971, "step": 14408 }, { "epoch": 0.5637765083339854, "grad_norm": 0.0, "learning_rate": 8.426957133135775e-06, "loss": 0.9527, "step": 14409 }, { "epoch": 0.5638156350262149, "grad_norm": 0.0, "learning_rate": 8.425705691453227e-06, "loss": 1.1094, "step": 14410 }, { "epoch": 0.5638547617184443, "grad_norm": 0.0, "learning_rate": 8.42445427505187e-06, "loss": 1.0426, "step": 14411 }, { "epoch": 0.5638938884106738, "grad_norm": 0.0, "learning_rate": 8.423202883951805e-06, "loss": 0.9212, "step": 14412 }, { "epoch": 0.5639330151029032, "grad_norm": 0.0, "learning_rate": 8.421951518173126e-06, "loss": 1.0049, "step": 14413 }, { "epoch": 0.5639721417951327, "grad_norm": 0.0, "learning_rate": 8.42070017773593e-06, "loss": 1.0466, "step": 14414 }, { "epoch": 0.564011268487362, "grad_norm": 0.0, "learning_rate": 8.419448862660311e-06, "loss": 0.9762, "step": 14415 }, { "epoch": 0.5640503951795915, "grad_norm": 0.0, "learning_rate": 8.41819757296636e-06, "loss": 0.9355, "step": 14416 }, { "epoch": 0.5640895218718209, "grad_norm": 0.0, "learning_rate": 8.416946308674173e-06, "loss": 1.0594, "step": 14417 }, { "epoch": 0.5641286485640504, "grad_norm": 0.0, "learning_rate": 8.415695069803846e-06, "loss": 0.9859, "step": 14418 }, { "epoch": 0.5641677752562798, "grad_norm": 0.0, "learning_rate": 8.414443856375471e-06, "loss": 1.0414, "step": 14419 }, { "epoch": 0.5642069019485093, "grad_norm": 0.0, "learning_rate": 8.413192668409143e-06, "loss": 1.022, "step": 14420 }, { "epoch": 0.5642460286407387, "grad_norm": 0.0, "learning_rate": 8.411941505924945e-06, "loss": 1.0327, "step": 14421 }, { "epoch": 0.5642851553329682, "grad_norm": 0.0, "learning_rate": 8.410690368942983e-06, "loss": 1.0475, "step": 14422 }, { "epoch": 0.5643242820251976, "grad_norm": 0.0, "learning_rate": 8.409439257483341e-06, "loss": 1.0089, "step": 14423 }, { "epoch": 0.5643634087174271, "grad_norm": 0.0, "learning_rate": 8.408188171566113e-06, "loss": 1.0283, "step": 14424 }, { "epoch": 0.5644025354096565, "grad_norm": 0.0, "learning_rate": 8.406937111211386e-06, "loss": 1.0414, "step": 14425 }, { "epoch": 0.564441662101886, "grad_norm": 0.0, "learning_rate": 8.40568607643925e-06, "loss": 0.8507, "step": 14426 }, { "epoch": 0.5644807887941153, "grad_norm": 0.0, "learning_rate": 8.404435067269803e-06, "loss": 0.9833, "step": 14427 }, { "epoch": 0.5645199154863448, "grad_norm": 0.0, "learning_rate": 8.403184083723128e-06, "loss": 0.9556, "step": 14428 }, { "epoch": 0.5645590421785742, "grad_norm": 0.0, "learning_rate": 8.401933125819316e-06, "loss": 1.0612, "step": 14429 }, { "epoch": 0.5645981688708036, "grad_norm": 0.0, "learning_rate": 8.400682193578451e-06, "loss": 0.9446, "step": 14430 }, { "epoch": 0.5646372955630331, "grad_norm": 0.0, "learning_rate": 8.39943128702063e-06, "loss": 1.0078, "step": 14431 }, { "epoch": 0.5646764222552625, "grad_norm": 0.0, "learning_rate": 8.39818040616594e-06, "loss": 0.9083, "step": 14432 }, { "epoch": 0.564715548947492, "grad_norm": 0.0, "learning_rate": 8.39692955103446e-06, "loss": 1.1149, "step": 14433 }, { "epoch": 0.5647546756397214, "grad_norm": 0.0, "learning_rate": 8.395678721646282e-06, "loss": 1.0388, "step": 14434 }, { "epoch": 0.5647938023319509, "grad_norm": 0.0, "learning_rate": 8.394427918021497e-06, "loss": 0.9523, "step": 14435 }, { "epoch": 0.5648329290241803, "grad_norm": 0.0, "learning_rate": 8.393177140180189e-06, "loss": 0.9773, "step": 14436 }, { "epoch": 0.5648720557164097, "grad_norm": 0.0, "learning_rate": 8.391926388142441e-06, "loss": 1.1493, "step": 14437 }, { "epoch": 0.5649111824086391, "grad_norm": 0.0, "learning_rate": 8.390675661928341e-06, "loss": 1.0149, "step": 14438 }, { "epoch": 0.5649503091008686, "grad_norm": 0.0, "learning_rate": 8.389424961557967e-06, "loss": 1.0255, "step": 14439 }, { "epoch": 0.564989435793098, "grad_norm": 0.0, "learning_rate": 8.388174287051417e-06, "loss": 0.906, "step": 14440 }, { "epoch": 0.5650285624853275, "grad_norm": 0.0, "learning_rate": 8.386923638428765e-06, "loss": 1.1225, "step": 14441 }, { "epoch": 0.5650676891775569, "grad_norm": 0.0, "learning_rate": 8.3856730157101e-06, "loss": 0.9416, "step": 14442 }, { "epoch": 0.5651068158697864, "grad_norm": 0.0, "learning_rate": 8.384422418915503e-06, "loss": 0.9547, "step": 14443 }, { "epoch": 0.5651459425620158, "grad_norm": 0.0, "learning_rate": 8.383171848065059e-06, "loss": 0.943, "step": 14444 }, { "epoch": 0.5651850692542453, "grad_norm": 0.0, "learning_rate": 8.381921303178844e-06, "loss": 1.0699, "step": 14445 }, { "epoch": 0.5652241959464747, "grad_norm": 0.0, "learning_rate": 8.38067078427695e-06, "loss": 0.9682, "step": 14446 }, { "epoch": 0.5652633226387042, "grad_norm": 0.0, "learning_rate": 8.379420291379454e-06, "loss": 1.0558, "step": 14447 }, { "epoch": 0.5653024493309335, "grad_norm": 0.0, "learning_rate": 8.378169824506435e-06, "loss": 1.0519, "step": 14448 }, { "epoch": 0.565341576023163, "grad_norm": 0.0, "learning_rate": 8.376919383677978e-06, "loss": 1.1121, "step": 14449 }, { "epoch": 0.5653807027153924, "grad_norm": 0.0, "learning_rate": 8.37566896891416e-06, "loss": 1.0742, "step": 14450 }, { "epoch": 0.5654198294076219, "grad_norm": 0.0, "learning_rate": 8.374418580235065e-06, "loss": 1.0709, "step": 14451 }, { "epoch": 0.5654589560998513, "grad_norm": 0.0, "learning_rate": 8.37316821766077e-06, "loss": 1.0016, "step": 14452 }, { "epoch": 0.5654980827920808, "grad_norm": 0.0, "learning_rate": 8.371917881211354e-06, "loss": 1.0665, "step": 14453 }, { "epoch": 0.5655372094843102, "grad_norm": 0.0, "learning_rate": 8.370667570906899e-06, "loss": 0.9715, "step": 14454 }, { "epoch": 0.5655763361765397, "grad_norm": 0.0, "learning_rate": 8.36941728676748e-06, "loss": 1.1239, "step": 14455 }, { "epoch": 0.5656154628687691, "grad_norm": 0.0, "learning_rate": 8.368167028813176e-06, "loss": 0.9477, "step": 14456 }, { "epoch": 0.5656545895609986, "grad_norm": 0.0, "learning_rate": 8.366916797064061e-06, "loss": 1.0273, "step": 14457 }, { "epoch": 0.565693716253228, "grad_norm": 0.0, "learning_rate": 8.365666591540223e-06, "loss": 1.0836, "step": 14458 }, { "epoch": 0.5657328429454573, "grad_norm": 0.0, "learning_rate": 8.364416412261728e-06, "loss": 1.0516, "step": 14459 }, { "epoch": 0.5657719696376868, "grad_norm": 0.0, "learning_rate": 8.363166259248657e-06, "loss": 1.1552, "step": 14460 }, { "epoch": 0.5658110963299162, "grad_norm": 0.0, "learning_rate": 8.36191613252108e-06, "loss": 1.0216, "step": 14461 }, { "epoch": 0.5658502230221457, "grad_norm": 0.0, "learning_rate": 8.360666032099082e-06, "loss": 1.0911, "step": 14462 }, { "epoch": 0.5658893497143751, "grad_norm": 0.0, "learning_rate": 8.359415958002733e-06, "loss": 0.97, "step": 14463 }, { "epoch": 0.5659284764066046, "grad_norm": 0.0, "learning_rate": 8.358165910252108e-06, "loss": 1.0289, "step": 14464 }, { "epoch": 0.565967603098834, "grad_norm": 0.0, "learning_rate": 8.356915888867282e-06, "loss": 0.909, "step": 14465 }, { "epoch": 0.5660067297910635, "grad_norm": 0.0, "learning_rate": 8.355665893868322e-06, "loss": 1.1116, "step": 14466 }, { "epoch": 0.5660458564832929, "grad_norm": 0.0, "learning_rate": 8.354415925275314e-06, "loss": 1.0386, "step": 14467 }, { "epoch": 0.5660849831755224, "grad_norm": 0.0, "learning_rate": 8.353165983108322e-06, "loss": 1.0081, "step": 14468 }, { "epoch": 0.5661241098677517, "grad_norm": 0.0, "learning_rate": 8.351916067387421e-06, "loss": 1.0725, "step": 14469 }, { "epoch": 0.5661632365599812, "grad_norm": 0.0, "learning_rate": 8.350666178132679e-06, "loss": 0.932, "step": 14470 }, { "epoch": 0.5662023632522106, "grad_norm": 0.0, "learning_rate": 8.349416315364177e-06, "loss": 1.0057, "step": 14471 }, { "epoch": 0.5662414899444401, "grad_norm": 0.0, "learning_rate": 8.348166479101979e-06, "loss": 1.0204, "step": 14472 }, { "epoch": 0.5662806166366695, "grad_norm": 0.0, "learning_rate": 8.346916669366157e-06, "loss": 1.0339, "step": 14473 }, { "epoch": 0.566319743328899, "grad_norm": 0.0, "learning_rate": 8.345666886176783e-06, "loss": 1.0472, "step": 14474 }, { "epoch": 0.5663588700211284, "grad_norm": 0.0, "learning_rate": 8.34441712955392e-06, "loss": 0.9381, "step": 14475 }, { "epoch": 0.5663979967133579, "grad_norm": 0.0, "learning_rate": 8.34316739951765e-06, "loss": 1.0477, "step": 14476 }, { "epoch": 0.5664371234055873, "grad_norm": 0.0, "learning_rate": 8.341917696088034e-06, "loss": 1.0753, "step": 14477 }, { "epoch": 0.5664762500978168, "grad_norm": 0.0, "learning_rate": 8.340668019285143e-06, "loss": 0.993, "step": 14478 }, { "epoch": 0.5665153767900462, "grad_norm": 0.0, "learning_rate": 8.339418369129038e-06, "loss": 0.9598, "step": 14479 }, { "epoch": 0.5665545034822757, "grad_norm": 0.0, "learning_rate": 8.3381687456398e-06, "loss": 1.0438, "step": 14480 }, { "epoch": 0.566593630174505, "grad_norm": 0.0, "learning_rate": 8.336919148837487e-06, "loss": 1.1016, "step": 14481 }, { "epoch": 0.5666327568667345, "grad_norm": 0.0, "learning_rate": 8.335669578742172e-06, "loss": 1.0991, "step": 14482 }, { "epoch": 0.5666718835589639, "grad_norm": 0.0, "learning_rate": 8.334420035373916e-06, "loss": 0.9836, "step": 14483 }, { "epoch": 0.5667110102511934, "grad_norm": 0.0, "learning_rate": 8.333170518752782e-06, "loss": 1.014, "step": 14484 }, { "epoch": 0.5667501369434228, "grad_norm": 0.0, "learning_rate": 8.331921028898846e-06, "loss": 1.0696, "step": 14485 }, { "epoch": 0.5667892636356523, "grad_norm": 0.0, "learning_rate": 8.330671565832168e-06, "loss": 1.0396, "step": 14486 }, { "epoch": 0.5668283903278817, "grad_norm": 0.0, "learning_rate": 8.329422129572812e-06, "loss": 0.9387, "step": 14487 }, { "epoch": 0.5668675170201111, "grad_norm": 0.0, "learning_rate": 8.328172720140843e-06, "loss": 0.9728, "step": 14488 }, { "epoch": 0.5669066437123406, "grad_norm": 0.0, "learning_rate": 8.326923337556324e-06, "loss": 0.9819, "step": 14489 }, { "epoch": 0.56694577040457, "grad_norm": 0.0, "learning_rate": 8.325673981839322e-06, "loss": 1.0529, "step": 14490 }, { "epoch": 0.5669848970967994, "grad_norm": 0.0, "learning_rate": 8.324424653009898e-06, "loss": 1.1757, "step": 14491 }, { "epoch": 0.5670240237890288, "grad_norm": 0.0, "learning_rate": 8.323175351088113e-06, "loss": 0.9348, "step": 14492 }, { "epoch": 0.5670631504812583, "grad_norm": 0.0, "learning_rate": 8.321926076094032e-06, "loss": 1.0822, "step": 14493 }, { "epoch": 0.5671022771734877, "grad_norm": 0.0, "learning_rate": 8.320676828047716e-06, "loss": 0.9859, "step": 14494 }, { "epoch": 0.5671414038657172, "grad_norm": 0.0, "learning_rate": 8.319427606969223e-06, "loss": 1.0478, "step": 14495 }, { "epoch": 0.5671805305579466, "grad_norm": 0.0, "learning_rate": 8.318178412878618e-06, "loss": 1.063, "step": 14496 }, { "epoch": 0.5672196572501761, "grad_norm": 0.0, "learning_rate": 8.316929245795959e-06, "loss": 1.0534, "step": 14497 }, { "epoch": 0.5672587839424055, "grad_norm": 0.0, "learning_rate": 8.31568010574131e-06, "loss": 0.9846, "step": 14498 }, { "epoch": 0.567297910634635, "grad_norm": 0.0, "learning_rate": 8.314430992734728e-06, "loss": 0.9746, "step": 14499 }, { "epoch": 0.5673370373268644, "grad_norm": 0.0, "learning_rate": 8.313181906796272e-06, "loss": 1.0601, "step": 14500 }, { "epoch": 0.5673761640190939, "grad_norm": 0.0, "learning_rate": 8.311932847945996e-06, "loss": 1.0378, "step": 14501 }, { "epoch": 0.5674152907113232, "grad_norm": 0.0, "learning_rate": 8.310683816203967e-06, "loss": 1.0204, "step": 14502 }, { "epoch": 0.5674544174035527, "grad_norm": 0.0, "learning_rate": 8.30943481159024e-06, "loss": 0.9874, "step": 14503 }, { "epoch": 0.5674935440957821, "grad_norm": 0.0, "learning_rate": 8.308185834124872e-06, "loss": 0.9571, "step": 14504 }, { "epoch": 0.5675326707880116, "grad_norm": 0.0, "learning_rate": 8.306936883827918e-06, "loss": 0.9955, "step": 14505 }, { "epoch": 0.567571797480241, "grad_norm": 0.0, "learning_rate": 8.305687960719433e-06, "loss": 1.002, "step": 14506 }, { "epoch": 0.5676109241724705, "grad_norm": 0.0, "learning_rate": 8.30443906481948e-06, "loss": 1.0273, "step": 14507 }, { "epoch": 0.5676500508646999, "grad_norm": 0.0, "learning_rate": 8.303190196148112e-06, "loss": 0.8959, "step": 14508 }, { "epoch": 0.5676891775569294, "grad_norm": 0.0, "learning_rate": 8.301941354725382e-06, "loss": 0.9465, "step": 14509 }, { "epoch": 0.5677283042491588, "grad_norm": 0.0, "learning_rate": 8.300692540571343e-06, "loss": 0.9797, "step": 14510 }, { "epoch": 0.5677674309413883, "grad_norm": 0.0, "learning_rate": 8.299443753706056e-06, "loss": 1.0178, "step": 14511 }, { "epoch": 0.5678065576336176, "grad_norm": 0.0, "learning_rate": 8.298194994149571e-06, "loss": 1.0038, "step": 14512 }, { "epoch": 0.5678456843258471, "grad_norm": 0.0, "learning_rate": 8.296946261921941e-06, "loss": 1.0441, "step": 14513 }, { "epoch": 0.5678848110180765, "grad_norm": 0.0, "learning_rate": 8.295697557043223e-06, "loss": 1.0194, "step": 14514 }, { "epoch": 0.567923937710306, "grad_norm": 0.0, "learning_rate": 8.29444887953346e-06, "loss": 1.0917, "step": 14515 }, { "epoch": 0.5679630644025354, "grad_norm": 0.0, "learning_rate": 8.293200229412716e-06, "loss": 0.9003, "step": 14516 }, { "epoch": 0.5680021910947648, "grad_norm": 0.0, "learning_rate": 8.291951606701037e-06, "loss": 0.9928, "step": 14517 }, { "epoch": 0.5680413177869943, "grad_norm": 0.0, "learning_rate": 8.290703011418475e-06, "loss": 1.0444, "step": 14518 }, { "epoch": 0.5680804444792237, "grad_norm": 0.0, "learning_rate": 8.289454443585076e-06, "loss": 1.0664, "step": 14519 }, { "epoch": 0.5681195711714532, "grad_norm": 0.0, "learning_rate": 8.2882059032209e-06, "loss": 1.0488, "step": 14520 }, { "epoch": 0.5681586978636826, "grad_norm": 0.0, "learning_rate": 8.286957390345994e-06, "loss": 0.9223, "step": 14521 }, { "epoch": 0.5681978245559121, "grad_norm": 0.0, "learning_rate": 8.285708904980404e-06, "loss": 1.0441, "step": 14522 }, { "epoch": 0.5682369512481414, "grad_norm": 0.0, "learning_rate": 8.28446044714418e-06, "loss": 0.9194, "step": 14523 }, { "epoch": 0.5682760779403709, "grad_norm": 0.0, "learning_rate": 8.283212016857369e-06, "loss": 0.9645, "step": 14524 }, { "epoch": 0.5683152046326003, "grad_norm": 0.0, "learning_rate": 8.281963614140026e-06, "loss": 0.9916, "step": 14525 }, { "epoch": 0.5683543313248298, "grad_norm": 0.0, "learning_rate": 8.280715239012192e-06, "loss": 1.1539, "step": 14526 }, { "epoch": 0.5683934580170592, "grad_norm": 0.0, "learning_rate": 8.27946689149392e-06, "loss": 0.9712, "step": 14527 }, { "epoch": 0.5684325847092887, "grad_norm": 0.0, "learning_rate": 8.278218571605247e-06, "loss": 1.1527, "step": 14528 }, { "epoch": 0.5684717114015181, "grad_norm": 0.0, "learning_rate": 8.276970279366232e-06, "loss": 0.8885, "step": 14529 }, { "epoch": 0.5685108380937476, "grad_norm": 0.0, "learning_rate": 8.275722014796915e-06, "loss": 0.9656, "step": 14530 }, { "epoch": 0.568549964785977, "grad_norm": 0.0, "learning_rate": 8.274473777917342e-06, "loss": 1.1003, "step": 14531 }, { "epoch": 0.5685890914782065, "grad_norm": 0.0, "learning_rate": 8.273225568747558e-06, "loss": 1.1003, "step": 14532 }, { "epoch": 0.5686282181704359, "grad_norm": 0.0, "learning_rate": 8.271977387307604e-06, "loss": 0.9822, "step": 14533 }, { "epoch": 0.5686673448626653, "grad_norm": 0.0, "learning_rate": 8.27072923361753e-06, "loss": 1.0361, "step": 14534 }, { "epoch": 0.5687064715548947, "grad_norm": 0.0, "learning_rate": 8.269481107697379e-06, "loss": 0.958, "step": 14535 }, { "epoch": 0.5687455982471242, "grad_norm": 0.0, "learning_rate": 8.268233009567192e-06, "loss": 1.0364, "step": 14536 }, { "epoch": 0.5687847249393536, "grad_norm": 0.0, "learning_rate": 8.266984939247012e-06, "loss": 0.9988, "step": 14537 }, { "epoch": 0.5688238516315831, "grad_norm": 0.0, "learning_rate": 8.265736896756883e-06, "loss": 0.9667, "step": 14538 }, { "epoch": 0.5688629783238125, "grad_norm": 0.0, "learning_rate": 8.264488882116846e-06, "loss": 1.0322, "step": 14539 }, { "epoch": 0.568902105016042, "grad_norm": 0.0, "learning_rate": 8.263240895346943e-06, "loss": 1.1116, "step": 14540 }, { "epoch": 0.5689412317082714, "grad_norm": 0.0, "learning_rate": 8.261992936467215e-06, "loss": 1.058, "step": 14541 }, { "epoch": 0.5689803584005009, "grad_norm": 0.0, "learning_rate": 8.260745005497701e-06, "loss": 1.06, "step": 14542 }, { "epoch": 0.5690194850927303, "grad_norm": 0.0, "learning_rate": 8.259497102458447e-06, "loss": 1.0143, "step": 14543 }, { "epoch": 0.5690586117849596, "grad_norm": 0.0, "learning_rate": 8.258249227369485e-06, "loss": 0.9551, "step": 14544 }, { "epoch": 0.5690977384771891, "grad_norm": 0.0, "learning_rate": 8.25700138025086e-06, "loss": 0.9528, "step": 14545 }, { "epoch": 0.5691368651694185, "grad_norm": 0.0, "learning_rate": 8.255753561122603e-06, "loss": 1.01, "step": 14546 }, { "epoch": 0.569175991861648, "grad_norm": 0.0, "learning_rate": 8.254505770004764e-06, "loss": 1.0287, "step": 14547 }, { "epoch": 0.5692151185538774, "grad_norm": 0.0, "learning_rate": 8.253258006917375e-06, "loss": 1.0687, "step": 14548 }, { "epoch": 0.5692542452461069, "grad_norm": 0.0, "learning_rate": 8.252010271880473e-06, "loss": 1.0891, "step": 14549 }, { "epoch": 0.5692933719383363, "grad_norm": 0.0, "learning_rate": 8.250762564914093e-06, "loss": 1.0847, "step": 14550 }, { "epoch": 0.5693324986305658, "grad_norm": 0.0, "learning_rate": 8.249514886038277e-06, "loss": 1.0212, "step": 14551 }, { "epoch": 0.5693716253227952, "grad_norm": 0.0, "learning_rate": 8.248267235273057e-06, "loss": 1.0479, "step": 14552 }, { "epoch": 0.5694107520150247, "grad_norm": 0.0, "learning_rate": 8.247019612638473e-06, "loss": 0.9913, "step": 14553 }, { "epoch": 0.569449878707254, "grad_norm": 0.0, "learning_rate": 8.245772018154557e-06, "loss": 1.0654, "step": 14554 }, { "epoch": 0.5694890053994835, "grad_norm": 0.0, "learning_rate": 8.244524451841338e-06, "loss": 1.0466, "step": 14555 }, { "epoch": 0.5695281320917129, "grad_norm": 0.0, "learning_rate": 8.243276913718862e-06, "loss": 1.0284, "step": 14556 }, { "epoch": 0.5695672587839424, "grad_norm": 0.0, "learning_rate": 8.242029403807158e-06, "loss": 0.9438, "step": 14557 }, { "epoch": 0.5696063854761718, "grad_norm": 0.0, "learning_rate": 8.240781922126257e-06, "loss": 1.0757, "step": 14558 }, { "epoch": 0.5696455121684013, "grad_norm": 0.0, "learning_rate": 8.239534468696192e-06, "loss": 0.9429, "step": 14559 }, { "epoch": 0.5696846388606307, "grad_norm": 0.0, "learning_rate": 8.238287043537e-06, "loss": 0.9542, "step": 14560 }, { "epoch": 0.5697237655528602, "grad_norm": 0.0, "learning_rate": 8.237039646668712e-06, "loss": 0.9702, "step": 14561 }, { "epoch": 0.5697628922450896, "grad_norm": 0.0, "learning_rate": 8.235792278111357e-06, "loss": 1.0679, "step": 14562 }, { "epoch": 0.5698020189373191, "grad_norm": 0.0, "learning_rate": 8.234544937884969e-06, "loss": 1.0654, "step": 14563 }, { "epoch": 0.5698411456295485, "grad_norm": 0.0, "learning_rate": 8.23329762600957e-06, "loss": 0.9847, "step": 14564 }, { "epoch": 0.569880272321778, "grad_norm": 0.0, "learning_rate": 8.232050342505204e-06, "loss": 1.0222, "step": 14565 }, { "epoch": 0.5699193990140073, "grad_norm": 0.0, "learning_rate": 8.230803087391893e-06, "loss": 1.0392, "step": 14566 }, { "epoch": 0.5699585257062368, "grad_norm": 0.0, "learning_rate": 8.229555860689668e-06, "loss": 1.0278, "step": 14567 }, { "epoch": 0.5699976523984662, "grad_norm": 0.0, "learning_rate": 8.228308662418553e-06, "loss": 0.9933, "step": 14568 }, { "epoch": 0.5700367790906957, "grad_norm": 0.0, "learning_rate": 8.227061492598585e-06, "loss": 1.1186, "step": 14569 }, { "epoch": 0.5700759057829251, "grad_norm": 0.0, "learning_rate": 8.225814351249788e-06, "loss": 1.0593, "step": 14570 }, { "epoch": 0.5701150324751546, "grad_norm": 0.0, "learning_rate": 8.224567238392189e-06, "loss": 0.9024, "step": 14571 }, { "epoch": 0.570154159167384, "grad_norm": 0.0, "learning_rate": 8.223320154045816e-06, "loss": 1.0242, "step": 14572 }, { "epoch": 0.5701932858596134, "grad_norm": 0.0, "learning_rate": 8.22207309823069e-06, "loss": 1.1578, "step": 14573 }, { "epoch": 0.5702324125518429, "grad_norm": 0.0, "learning_rate": 8.220826070966847e-06, "loss": 1.0078, "step": 14574 }, { "epoch": 0.5702715392440723, "grad_norm": 0.0, "learning_rate": 8.219579072274307e-06, "loss": 0.9871, "step": 14575 }, { "epoch": 0.5703106659363018, "grad_norm": 0.0, "learning_rate": 8.218332102173097e-06, "loss": 0.9317, "step": 14576 }, { "epoch": 0.5703497926285311, "grad_norm": 0.0, "learning_rate": 8.217085160683238e-06, "loss": 1.0254, "step": 14577 }, { "epoch": 0.5703889193207606, "grad_norm": 0.0, "learning_rate": 8.21583824782476e-06, "loss": 0.9164, "step": 14578 }, { "epoch": 0.57042804601299, "grad_norm": 0.0, "learning_rate": 8.214591363617683e-06, "loss": 1.0424, "step": 14579 }, { "epoch": 0.5704671727052195, "grad_norm": 0.0, "learning_rate": 8.21334450808203e-06, "loss": 1.0819, "step": 14580 }, { "epoch": 0.5705062993974489, "grad_norm": 0.0, "learning_rate": 8.212097681237829e-06, "loss": 1.1301, "step": 14581 }, { "epoch": 0.5705454260896784, "grad_norm": 0.0, "learning_rate": 8.210850883105095e-06, "loss": 0.9762, "step": 14582 }, { "epoch": 0.5705845527819078, "grad_norm": 0.0, "learning_rate": 8.209604113703857e-06, "loss": 0.9563, "step": 14583 }, { "epoch": 0.5706236794741373, "grad_norm": 0.0, "learning_rate": 8.20835737305413e-06, "loss": 1.0072, "step": 14584 }, { "epoch": 0.5706628061663667, "grad_norm": 0.0, "learning_rate": 8.207110661175942e-06, "loss": 1.0565, "step": 14585 }, { "epoch": 0.5707019328585962, "grad_norm": 0.0, "learning_rate": 8.205863978089308e-06, "loss": 0.9651, "step": 14586 }, { "epoch": 0.5707410595508255, "grad_norm": 0.0, "learning_rate": 8.20461732381425e-06, "loss": 1.092, "step": 14587 }, { "epoch": 0.570780186243055, "grad_norm": 0.0, "learning_rate": 8.203370698370792e-06, "loss": 1.1127, "step": 14588 }, { "epoch": 0.5708193129352844, "grad_norm": 0.0, "learning_rate": 8.202124101778947e-06, "loss": 0.9667, "step": 14589 }, { "epoch": 0.5708584396275139, "grad_norm": 0.0, "learning_rate": 8.200877534058734e-06, "loss": 1.1448, "step": 14590 }, { "epoch": 0.5708975663197433, "grad_norm": 0.0, "learning_rate": 8.199630995230173e-06, "loss": 1.0595, "step": 14591 }, { "epoch": 0.5709366930119728, "grad_norm": 0.0, "learning_rate": 8.198384485313286e-06, "loss": 0.9977, "step": 14592 }, { "epoch": 0.5709758197042022, "grad_norm": 0.0, "learning_rate": 8.197138004328085e-06, "loss": 1.0912, "step": 14593 }, { "epoch": 0.5710149463964317, "grad_norm": 0.0, "learning_rate": 8.19589155229459e-06, "loss": 0.9971, "step": 14594 }, { "epoch": 0.5710540730886611, "grad_norm": 0.0, "learning_rate": 8.19464512923281e-06, "loss": 0.887, "step": 14595 }, { "epoch": 0.5710931997808906, "grad_norm": 0.0, "learning_rate": 8.193398735162771e-06, "loss": 0.9594, "step": 14596 }, { "epoch": 0.57113232647312, "grad_norm": 0.0, "learning_rate": 8.192152370104487e-06, "loss": 1.0787, "step": 14597 }, { "epoch": 0.5711714531653495, "grad_norm": 0.0, "learning_rate": 8.19090603407797e-06, "loss": 1.0515, "step": 14598 }, { "epoch": 0.5712105798575788, "grad_norm": 0.0, "learning_rate": 8.189659727103233e-06, "loss": 1.1615, "step": 14599 }, { "epoch": 0.5712497065498083, "grad_norm": 0.0, "learning_rate": 8.18841344920029e-06, "loss": 1.002, "step": 14600 }, { "epoch": 0.5712888332420377, "grad_norm": 0.0, "learning_rate": 8.18716720038916e-06, "loss": 0.9864, "step": 14601 }, { "epoch": 0.5713279599342671, "grad_norm": 0.0, "learning_rate": 8.185920980689854e-06, "loss": 0.9349, "step": 14602 }, { "epoch": 0.5713670866264966, "grad_norm": 0.0, "learning_rate": 8.184674790122384e-06, "loss": 0.964, "step": 14603 }, { "epoch": 0.571406213318726, "grad_norm": 0.0, "learning_rate": 8.183428628706758e-06, "loss": 1.0193, "step": 14604 }, { "epoch": 0.5714453400109555, "grad_norm": 0.0, "learning_rate": 8.182182496462995e-06, "loss": 0.9842, "step": 14605 }, { "epoch": 0.5714844667031849, "grad_norm": 0.0, "learning_rate": 8.180936393411103e-06, "loss": 1.0623, "step": 14606 }, { "epoch": 0.5715235933954144, "grad_norm": 0.0, "learning_rate": 8.179690319571096e-06, "loss": 1.1099, "step": 14607 }, { "epoch": 0.5715627200876437, "grad_norm": 0.0, "learning_rate": 8.178444274962975e-06, "loss": 1.0088, "step": 14608 }, { "epoch": 0.5716018467798732, "grad_norm": 0.0, "learning_rate": 8.17719825960676e-06, "loss": 0.9729, "step": 14609 }, { "epoch": 0.5716409734721026, "grad_norm": 0.0, "learning_rate": 8.175952273522458e-06, "loss": 1.0494, "step": 14610 }, { "epoch": 0.5716801001643321, "grad_norm": 0.0, "learning_rate": 8.174706316730076e-06, "loss": 1.1235, "step": 14611 }, { "epoch": 0.5717192268565615, "grad_norm": 0.0, "learning_rate": 8.173460389249625e-06, "loss": 0.9689, "step": 14612 }, { "epoch": 0.571758353548791, "grad_norm": 0.0, "learning_rate": 8.172214491101107e-06, "loss": 1.0067, "step": 14613 }, { "epoch": 0.5717974802410204, "grad_norm": 0.0, "learning_rate": 8.170968622304536e-06, "loss": 0.9592, "step": 14614 }, { "epoch": 0.5718366069332499, "grad_norm": 0.0, "learning_rate": 8.169722782879918e-06, "loss": 1.0465, "step": 14615 }, { "epoch": 0.5718757336254793, "grad_norm": 0.0, "learning_rate": 8.16847697284726e-06, "loss": 0.9689, "step": 14616 }, { "epoch": 0.5719148603177088, "grad_norm": 0.0, "learning_rate": 8.167231192226562e-06, "loss": 1.0082, "step": 14617 }, { "epoch": 0.5719539870099382, "grad_norm": 0.0, "learning_rate": 8.165985441037836e-06, "loss": 0.8974, "step": 14618 }, { "epoch": 0.5719931137021677, "grad_norm": 0.0, "learning_rate": 8.164739719301089e-06, "loss": 1.0593, "step": 14619 }, { "epoch": 0.572032240394397, "grad_norm": 0.0, "learning_rate": 8.163494027036322e-06, "loss": 0.887, "step": 14620 }, { "epoch": 0.5720713670866265, "grad_norm": 0.0, "learning_rate": 8.162248364263538e-06, "loss": 1.0574, "step": 14621 }, { "epoch": 0.5721104937788559, "grad_norm": 0.0, "learning_rate": 8.161002731002741e-06, "loss": 1.0342, "step": 14622 }, { "epoch": 0.5721496204710854, "grad_norm": 0.0, "learning_rate": 8.159757127273936e-06, "loss": 1.099, "step": 14623 }, { "epoch": 0.5721887471633148, "grad_norm": 0.0, "learning_rate": 8.158511553097127e-06, "loss": 0.9916, "step": 14624 }, { "epoch": 0.5722278738555443, "grad_norm": 0.0, "learning_rate": 8.157266008492318e-06, "loss": 0.9485, "step": 14625 }, { "epoch": 0.5722670005477737, "grad_norm": 0.0, "learning_rate": 8.156020493479502e-06, "loss": 0.9342, "step": 14626 }, { "epoch": 0.5723061272400032, "grad_norm": 0.0, "learning_rate": 8.15477500807869e-06, "loss": 1.0062, "step": 14627 }, { "epoch": 0.5723452539322326, "grad_norm": 0.0, "learning_rate": 8.15352955230988e-06, "loss": 1.0207, "step": 14628 }, { "epoch": 0.572384380624462, "grad_norm": 0.0, "learning_rate": 8.152284126193067e-06, "loss": 0.8397, "step": 14629 }, { "epoch": 0.5724235073166914, "grad_norm": 0.0, "learning_rate": 8.15103872974826e-06, "loss": 1.0481, "step": 14630 }, { "epoch": 0.5724626340089208, "grad_norm": 0.0, "learning_rate": 8.149793362995451e-06, "loss": 0.8882, "step": 14631 }, { "epoch": 0.5725017607011503, "grad_norm": 0.0, "learning_rate": 8.148548025954644e-06, "loss": 1.0765, "step": 14632 }, { "epoch": 0.5725408873933797, "grad_norm": 0.0, "learning_rate": 8.147302718645835e-06, "loss": 1.1025, "step": 14633 }, { "epoch": 0.5725800140856092, "grad_norm": 0.0, "learning_rate": 8.146057441089025e-06, "loss": 0.8862, "step": 14634 }, { "epoch": 0.5726191407778386, "grad_norm": 0.0, "learning_rate": 8.144812193304204e-06, "loss": 1.0159, "step": 14635 }, { "epoch": 0.5726582674700681, "grad_norm": 0.0, "learning_rate": 8.143566975311379e-06, "loss": 1.0131, "step": 14636 }, { "epoch": 0.5726973941622975, "grad_norm": 0.0, "learning_rate": 8.14232178713054e-06, "loss": 1.0263, "step": 14637 }, { "epoch": 0.572736520854527, "grad_norm": 0.0, "learning_rate": 8.14107662878169e-06, "loss": 1.081, "step": 14638 }, { "epoch": 0.5727756475467564, "grad_norm": 0.0, "learning_rate": 8.139831500284816e-06, "loss": 1.0895, "step": 14639 }, { "epoch": 0.5728147742389859, "grad_norm": 0.0, "learning_rate": 8.138586401659914e-06, "loss": 1.0316, "step": 14640 }, { "epoch": 0.5728539009312152, "grad_norm": 0.0, "learning_rate": 8.137341332926986e-06, "loss": 1.1065, "step": 14641 }, { "epoch": 0.5728930276234447, "grad_norm": 0.0, "learning_rate": 8.136096294106023e-06, "loss": 1.0612, "step": 14642 }, { "epoch": 0.5729321543156741, "grad_norm": 0.0, "learning_rate": 8.134851285217017e-06, "loss": 1.0179, "step": 14643 }, { "epoch": 0.5729712810079036, "grad_norm": 0.0, "learning_rate": 8.133606306279957e-06, "loss": 1.0401, "step": 14644 }, { "epoch": 0.573010407700133, "grad_norm": 0.0, "learning_rate": 8.132361357314847e-06, "loss": 0.991, "step": 14645 }, { "epoch": 0.5730495343923625, "grad_norm": 0.0, "learning_rate": 8.131116438341672e-06, "loss": 1.1265, "step": 14646 }, { "epoch": 0.5730886610845919, "grad_norm": 0.0, "learning_rate": 8.129871549380429e-06, "loss": 0.9186, "step": 14647 }, { "epoch": 0.5731277877768214, "grad_norm": 0.0, "learning_rate": 8.1286266904511e-06, "loss": 1.0639, "step": 14648 }, { "epoch": 0.5731669144690508, "grad_norm": 0.0, "learning_rate": 8.12738186157368e-06, "loss": 1.0809, "step": 14649 }, { "epoch": 0.5732060411612803, "grad_norm": 0.0, "learning_rate": 8.126137062768165e-06, "loss": 1.0995, "step": 14650 }, { "epoch": 0.5732451678535097, "grad_norm": 0.0, "learning_rate": 8.12489229405454e-06, "loss": 1.0043, "step": 14651 }, { "epoch": 0.5732842945457391, "grad_norm": 0.0, "learning_rate": 8.123647555452795e-06, "loss": 0.9574, "step": 14652 }, { "epoch": 0.5733234212379685, "grad_norm": 0.0, "learning_rate": 8.122402846982916e-06, "loss": 0.9559, "step": 14653 }, { "epoch": 0.573362547930198, "grad_norm": 0.0, "learning_rate": 8.121158168664898e-06, "loss": 1.0435, "step": 14654 }, { "epoch": 0.5734016746224274, "grad_norm": 0.0, "learning_rate": 8.119913520518726e-06, "loss": 0.9895, "step": 14655 }, { "epoch": 0.5734408013146569, "grad_norm": 0.0, "learning_rate": 8.118668902564386e-06, "loss": 0.9315, "step": 14656 }, { "epoch": 0.5734799280068863, "grad_norm": 0.0, "learning_rate": 8.117424314821867e-06, "loss": 1.0412, "step": 14657 }, { "epoch": 0.5735190546991157, "grad_norm": 0.0, "learning_rate": 8.11617975731115e-06, "loss": 0.9806, "step": 14658 }, { "epoch": 0.5735581813913452, "grad_norm": 0.0, "learning_rate": 8.11493523005223e-06, "loss": 1.0367, "step": 14659 }, { "epoch": 0.5735973080835746, "grad_norm": 0.0, "learning_rate": 8.113690733065087e-06, "loss": 0.9929, "step": 14660 }, { "epoch": 0.5736364347758041, "grad_norm": 0.0, "learning_rate": 8.112446266369708e-06, "loss": 0.8964, "step": 14661 }, { "epoch": 0.5736755614680334, "grad_norm": 0.0, "learning_rate": 8.111201829986071e-06, "loss": 0.9442, "step": 14662 }, { "epoch": 0.5737146881602629, "grad_norm": 0.0, "learning_rate": 8.109957423934172e-06, "loss": 1.1802, "step": 14663 }, { "epoch": 0.5737538148524923, "grad_norm": 0.0, "learning_rate": 8.108713048233988e-06, "loss": 0.8762, "step": 14664 }, { "epoch": 0.5737929415447218, "grad_norm": 0.0, "learning_rate": 8.107468702905503e-06, "loss": 1.0709, "step": 14665 }, { "epoch": 0.5738320682369512, "grad_norm": 0.0, "learning_rate": 8.106224387968696e-06, "loss": 1.0439, "step": 14666 }, { "epoch": 0.5738711949291807, "grad_norm": 0.0, "learning_rate": 8.104980103443555e-06, "loss": 1.0302, "step": 14667 }, { "epoch": 0.5739103216214101, "grad_norm": 0.0, "learning_rate": 8.103735849350056e-06, "loss": 1.0657, "step": 14668 }, { "epoch": 0.5739494483136396, "grad_norm": 0.0, "learning_rate": 8.102491625708186e-06, "loss": 1.0994, "step": 14669 }, { "epoch": 0.573988575005869, "grad_norm": 0.0, "learning_rate": 8.101247432537922e-06, "loss": 0.9942, "step": 14670 }, { "epoch": 0.5740277016980985, "grad_norm": 0.0, "learning_rate": 8.100003269859244e-06, "loss": 0.9049, "step": 14671 }, { "epoch": 0.5740668283903279, "grad_norm": 0.0, "learning_rate": 8.098759137692133e-06, "loss": 0.885, "step": 14672 }, { "epoch": 0.5741059550825574, "grad_norm": 0.0, "learning_rate": 8.09751503605657e-06, "loss": 0.9363, "step": 14673 }, { "epoch": 0.5741450817747867, "grad_norm": 0.0, "learning_rate": 8.096270964972528e-06, "loss": 1.1305, "step": 14674 }, { "epoch": 0.5741842084670162, "grad_norm": 0.0, "learning_rate": 8.095026924459989e-06, "loss": 1.0545, "step": 14675 }, { "epoch": 0.5742233351592456, "grad_norm": 0.0, "learning_rate": 8.093782914538933e-06, "loss": 1.1376, "step": 14676 }, { "epoch": 0.5742624618514751, "grad_norm": 0.0, "learning_rate": 8.092538935229336e-06, "loss": 1.075, "step": 14677 }, { "epoch": 0.5743015885437045, "grad_norm": 0.0, "learning_rate": 8.091294986551173e-06, "loss": 0.8517, "step": 14678 }, { "epoch": 0.574340715235934, "grad_norm": 0.0, "learning_rate": 8.090051068524418e-06, "loss": 0.905, "step": 14679 }, { "epoch": 0.5743798419281634, "grad_norm": 0.0, "learning_rate": 8.088807181169048e-06, "loss": 0.9192, "step": 14680 }, { "epoch": 0.5744189686203929, "grad_norm": 0.0, "learning_rate": 8.087563324505043e-06, "loss": 0.9177, "step": 14681 }, { "epoch": 0.5744580953126223, "grad_norm": 0.0, "learning_rate": 8.086319498552376e-06, "loss": 0.9863, "step": 14682 }, { "epoch": 0.5744972220048518, "grad_norm": 0.0, "learning_rate": 8.085075703331017e-06, "loss": 1.0848, "step": 14683 }, { "epoch": 0.5745363486970811, "grad_norm": 0.0, "learning_rate": 8.083831938860941e-06, "loss": 0.9236, "step": 14684 }, { "epoch": 0.5745754753893106, "grad_norm": 0.0, "learning_rate": 8.082588205162127e-06, "loss": 0.994, "step": 14685 }, { "epoch": 0.57461460208154, "grad_norm": 0.0, "learning_rate": 8.081344502254543e-06, "loss": 0.9886, "step": 14686 }, { "epoch": 0.5746537287737694, "grad_norm": 0.0, "learning_rate": 8.080100830158163e-06, "loss": 1.0237, "step": 14687 }, { "epoch": 0.5746928554659989, "grad_norm": 0.0, "learning_rate": 8.078857188892957e-06, "loss": 0.923, "step": 14688 }, { "epoch": 0.5747319821582283, "grad_norm": 0.0, "learning_rate": 8.077613578478894e-06, "loss": 1.0873, "step": 14689 }, { "epoch": 0.5747711088504578, "grad_norm": 0.0, "learning_rate": 8.076369998935951e-06, "loss": 1.0358, "step": 14690 }, { "epoch": 0.5748102355426872, "grad_norm": 0.0, "learning_rate": 8.075126450284095e-06, "loss": 1.0073, "step": 14691 }, { "epoch": 0.5748493622349167, "grad_norm": 0.0, "learning_rate": 8.073882932543298e-06, "loss": 1.0633, "step": 14692 }, { "epoch": 0.5748884889271461, "grad_norm": 0.0, "learning_rate": 8.07263944573352e-06, "loss": 1.0352, "step": 14693 }, { "epoch": 0.5749276156193756, "grad_norm": 0.0, "learning_rate": 8.071395989874747e-06, "loss": 1.0515, "step": 14694 }, { "epoch": 0.5749667423116049, "grad_norm": 0.0, "learning_rate": 8.070152564986934e-06, "loss": 1.0595, "step": 14695 }, { "epoch": 0.5750058690038344, "grad_norm": 0.0, "learning_rate": 8.068909171090053e-06, "loss": 0.852, "step": 14696 }, { "epoch": 0.5750449956960638, "grad_norm": 0.0, "learning_rate": 8.067665808204071e-06, "loss": 1.0461, "step": 14697 }, { "epoch": 0.5750841223882933, "grad_norm": 0.0, "learning_rate": 8.06642247634895e-06, "loss": 1.0513, "step": 14698 }, { "epoch": 0.5751232490805227, "grad_norm": 0.0, "learning_rate": 8.065179175544666e-06, "loss": 1.0874, "step": 14699 }, { "epoch": 0.5751623757727522, "grad_norm": 0.0, "learning_rate": 8.06393590581118e-06, "loss": 1.0842, "step": 14700 }, { "epoch": 0.5752015024649816, "grad_norm": 0.0, "learning_rate": 8.062692667168458e-06, "loss": 0.9963, "step": 14701 }, { "epoch": 0.5752406291572111, "grad_norm": 0.0, "learning_rate": 8.06144945963646e-06, "loss": 1.0443, "step": 14702 }, { "epoch": 0.5752797558494405, "grad_norm": 0.0, "learning_rate": 8.060206283235159e-06, "loss": 1.02, "step": 14703 }, { "epoch": 0.57531888254167, "grad_norm": 0.0, "learning_rate": 8.058963137984512e-06, "loss": 1.1952, "step": 14704 }, { "epoch": 0.5753580092338993, "grad_norm": 0.0, "learning_rate": 8.057720023904487e-06, "loss": 1.0076, "step": 14705 }, { "epoch": 0.5753971359261288, "grad_norm": 0.0, "learning_rate": 8.056476941015043e-06, "loss": 1.2063, "step": 14706 }, { "epoch": 0.5754362626183582, "grad_norm": 0.0, "learning_rate": 8.055233889336142e-06, "loss": 1.0664, "step": 14707 }, { "epoch": 0.5754753893105877, "grad_norm": 0.0, "learning_rate": 8.05399086888775e-06, "loss": 0.9406, "step": 14708 }, { "epoch": 0.5755145160028171, "grad_norm": 0.0, "learning_rate": 8.052747879689827e-06, "loss": 1.0655, "step": 14709 }, { "epoch": 0.5755536426950466, "grad_norm": 0.0, "learning_rate": 8.051504921762332e-06, "loss": 0.9167, "step": 14710 }, { "epoch": 0.575592769387276, "grad_norm": 0.0, "learning_rate": 8.050261995125226e-06, "loss": 1.1458, "step": 14711 }, { "epoch": 0.5756318960795055, "grad_norm": 0.0, "learning_rate": 8.04901909979847e-06, "loss": 1.1512, "step": 14712 }, { "epoch": 0.5756710227717349, "grad_norm": 0.0, "learning_rate": 8.047776235802021e-06, "loss": 1.0797, "step": 14713 }, { "epoch": 0.5757101494639644, "grad_norm": 0.0, "learning_rate": 8.046533403155841e-06, "loss": 1.0018, "step": 14714 }, { "epoch": 0.5757492761561938, "grad_norm": 0.0, "learning_rate": 8.045290601879888e-06, "loss": 0.9493, "step": 14715 }, { "epoch": 0.5757884028484231, "grad_norm": 0.0, "learning_rate": 8.044047831994114e-06, "loss": 1.0704, "step": 14716 }, { "epoch": 0.5758275295406526, "grad_norm": 0.0, "learning_rate": 8.042805093518484e-06, "loss": 1.1395, "step": 14717 }, { "epoch": 0.575866656232882, "grad_norm": 0.0, "learning_rate": 8.041562386472953e-06, "loss": 0.9831, "step": 14718 }, { "epoch": 0.5759057829251115, "grad_norm": 0.0, "learning_rate": 8.040319710877473e-06, "loss": 1.0236, "step": 14719 }, { "epoch": 0.5759449096173409, "grad_norm": 0.0, "learning_rate": 8.039077066752003e-06, "loss": 1.0986, "step": 14720 }, { "epoch": 0.5759840363095704, "grad_norm": 0.0, "learning_rate": 8.0378344541165e-06, "loss": 1.0848, "step": 14721 }, { "epoch": 0.5760231630017998, "grad_norm": 0.0, "learning_rate": 8.036591872990918e-06, "loss": 0.9984, "step": 14722 }, { "epoch": 0.5760622896940293, "grad_norm": 0.0, "learning_rate": 8.035349323395209e-06, "loss": 1.0071, "step": 14723 }, { "epoch": 0.5761014163862587, "grad_norm": 0.0, "learning_rate": 8.034106805349324e-06, "loss": 1.0004, "step": 14724 }, { "epoch": 0.5761405430784882, "grad_norm": 0.0, "learning_rate": 8.032864318873224e-06, "loss": 0.8923, "step": 14725 }, { "epoch": 0.5761796697707176, "grad_norm": 0.0, "learning_rate": 8.031621863986857e-06, "loss": 1.0256, "step": 14726 }, { "epoch": 0.576218796462947, "grad_norm": 0.0, "learning_rate": 8.03037944071018e-06, "loss": 1.1369, "step": 14727 }, { "epoch": 0.5762579231551764, "grad_norm": 0.0, "learning_rate": 8.029137049063139e-06, "loss": 1.0672, "step": 14728 }, { "epoch": 0.5762970498474059, "grad_norm": 0.0, "learning_rate": 8.027894689065684e-06, "loss": 1.0034, "step": 14729 }, { "epoch": 0.5763361765396353, "grad_norm": 0.0, "learning_rate": 8.02665236073777e-06, "loss": 1.0831, "step": 14730 }, { "epoch": 0.5763753032318648, "grad_norm": 0.0, "learning_rate": 8.02541006409935e-06, "loss": 0.9118, "step": 14731 }, { "epoch": 0.5764144299240942, "grad_norm": 0.0, "learning_rate": 8.02416779917037e-06, "loss": 1.0627, "step": 14732 }, { "epoch": 0.5764535566163237, "grad_norm": 0.0, "learning_rate": 8.022925565970774e-06, "loss": 1.117, "step": 14733 }, { "epoch": 0.5764926833085531, "grad_norm": 0.0, "learning_rate": 8.021683364520519e-06, "loss": 1.1006, "step": 14734 }, { "epoch": 0.5765318100007826, "grad_norm": 0.0, "learning_rate": 8.020441194839552e-06, "loss": 0.9209, "step": 14735 }, { "epoch": 0.576570936693012, "grad_norm": 0.0, "learning_rate": 8.019199056947819e-06, "loss": 0.9649, "step": 14736 }, { "epoch": 0.5766100633852415, "grad_norm": 0.0, "learning_rate": 8.017956950865266e-06, "loss": 1.0986, "step": 14737 }, { "epoch": 0.5766491900774708, "grad_norm": 0.0, "learning_rate": 8.016714876611838e-06, "loss": 1.1412, "step": 14738 }, { "epoch": 0.5766883167697003, "grad_norm": 0.0, "learning_rate": 8.015472834207486e-06, "loss": 0.945, "step": 14739 }, { "epoch": 0.5767274434619297, "grad_norm": 0.0, "learning_rate": 8.014230823672154e-06, "loss": 1.1036, "step": 14740 }, { "epoch": 0.5767665701541592, "grad_norm": 0.0, "learning_rate": 8.012988845025787e-06, "loss": 1.1095, "step": 14741 }, { "epoch": 0.5768056968463886, "grad_norm": 0.0, "learning_rate": 8.011746898288326e-06, "loss": 0.9283, "step": 14742 }, { "epoch": 0.576844823538618, "grad_norm": 0.0, "learning_rate": 8.010504983479723e-06, "loss": 1.0, "step": 14743 }, { "epoch": 0.5768839502308475, "grad_norm": 0.0, "learning_rate": 8.009263100619915e-06, "loss": 1.0207, "step": 14744 }, { "epoch": 0.5769230769230769, "grad_norm": 0.0, "learning_rate": 8.008021249728848e-06, "loss": 1.1321, "step": 14745 }, { "epoch": 0.5769622036153064, "grad_norm": 0.0, "learning_rate": 8.006779430826463e-06, "loss": 0.9825, "step": 14746 }, { "epoch": 0.5770013303075358, "grad_norm": 0.0, "learning_rate": 8.0055376439327e-06, "loss": 1.0848, "step": 14747 }, { "epoch": 0.5770404569997652, "grad_norm": 0.0, "learning_rate": 8.004295889067506e-06, "loss": 1.1436, "step": 14748 }, { "epoch": 0.5770795836919946, "grad_norm": 0.0, "learning_rate": 8.00305416625082e-06, "loss": 0.9608, "step": 14749 }, { "epoch": 0.5771187103842241, "grad_norm": 0.0, "learning_rate": 8.001812475502582e-06, "loss": 0.9341, "step": 14750 }, { "epoch": 0.5771578370764535, "grad_norm": 0.0, "learning_rate": 8.000570816842728e-06, "loss": 0.9626, "step": 14751 }, { "epoch": 0.577196963768683, "grad_norm": 0.0, "learning_rate": 7.999329190291202e-06, "loss": 1.0557, "step": 14752 }, { "epoch": 0.5772360904609124, "grad_norm": 0.0, "learning_rate": 7.998087595867946e-06, "loss": 1.0567, "step": 14753 }, { "epoch": 0.5772752171531419, "grad_norm": 0.0, "learning_rate": 7.996846033592893e-06, "loss": 1.0065, "step": 14754 }, { "epoch": 0.5773143438453713, "grad_norm": 0.0, "learning_rate": 7.995604503485984e-06, "loss": 1.1538, "step": 14755 }, { "epoch": 0.5773534705376008, "grad_norm": 0.0, "learning_rate": 7.994363005567153e-06, "loss": 0.9249, "step": 14756 }, { "epoch": 0.5773925972298302, "grad_norm": 0.0, "learning_rate": 7.993121539856336e-06, "loss": 1.0037, "step": 14757 }, { "epoch": 0.5774317239220597, "grad_norm": 0.0, "learning_rate": 7.991880106373478e-06, "loss": 1.2276, "step": 14758 }, { "epoch": 0.577470850614289, "grad_norm": 0.0, "learning_rate": 7.990638705138506e-06, "loss": 1.0343, "step": 14759 }, { "epoch": 0.5775099773065185, "grad_norm": 0.0, "learning_rate": 7.989397336171358e-06, "loss": 0.9831, "step": 14760 }, { "epoch": 0.5775491039987479, "grad_norm": 0.0, "learning_rate": 7.988155999491972e-06, "loss": 0.9455, "step": 14761 }, { "epoch": 0.5775882306909774, "grad_norm": 0.0, "learning_rate": 7.98691469512028e-06, "loss": 1.0512, "step": 14762 }, { "epoch": 0.5776273573832068, "grad_norm": 0.0, "learning_rate": 7.985673423076213e-06, "loss": 1.0592, "step": 14763 }, { "epoch": 0.5776664840754363, "grad_norm": 0.0, "learning_rate": 7.984432183379706e-06, "loss": 1.0486, "step": 14764 }, { "epoch": 0.5777056107676657, "grad_norm": 0.0, "learning_rate": 7.983190976050694e-06, "loss": 0.9075, "step": 14765 }, { "epoch": 0.5777447374598952, "grad_norm": 0.0, "learning_rate": 7.981949801109107e-06, "loss": 1.0377, "step": 14766 }, { "epoch": 0.5777838641521246, "grad_norm": 0.0, "learning_rate": 7.98070865857488e-06, "loss": 1.0094, "step": 14767 }, { "epoch": 0.5778229908443541, "grad_norm": 0.0, "learning_rate": 7.97946754846794e-06, "loss": 0.9344, "step": 14768 }, { "epoch": 0.5778621175365835, "grad_norm": 0.0, "learning_rate": 7.978226470808217e-06, "loss": 0.9687, "step": 14769 }, { "epoch": 0.577901244228813, "grad_norm": 0.0, "learning_rate": 7.976985425615647e-06, "loss": 0.8944, "step": 14770 }, { "epoch": 0.5779403709210423, "grad_norm": 0.0, "learning_rate": 7.975744412910155e-06, "loss": 1.0373, "step": 14771 }, { "epoch": 0.5779794976132717, "grad_norm": 0.0, "learning_rate": 7.974503432711671e-06, "loss": 1.0491, "step": 14772 }, { "epoch": 0.5780186243055012, "grad_norm": 0.0, "learning_rate": 7.973262485040125e-06, "loss": 1.0297, "step": 14773 }, { "epoch": 0.5780577509977306, "grad_norm": 0.0, "learning_rate": 7.972021569915437e-06, "loss": 1.039, "step": 14774 }, { "epoch": 0.5780968776899601, "grad_norm": 0.0, "learning_rate": 7.970780687357549e-06, "loss": 1.072, "step": 14775 }, { "epoch": 0.5781360043821895, "grad_norm": 0.0, "learning_rate": 7.96953983738638e-06, "loss": 1.0848, "step": 14776 }, { "epoch": 0.578175131074419, "grad_norm": 0.0, "learning_rate": 7.968299020021855e-06, "loss": 0.9927, "step": 14777 }, { "epoch": 0.5782142577666484, "grad_norm": 0.0, "learning_rate": 7.967058235283898e-06, "loss": 1.0194, "step": 14778 }, { "epoch": 0.5782533844588779, "grad_norm": 0.0, "learning_rate": 7.965817483192444e-06, "loss": 1.0001, "step": 14779 }, { "epoch": 0.5782925111511072, "grad_norm": 0.0, "learning_rate": 7.96457676376741e-06, "loss": 1.0118, "step": 14780 }, { "epoch": 0.5783316378433367, "grad_norm": 0.0, "learning_rate": 7.963336077028725e-06, "loss": 1.0305, "step": 14781 }, { "epoch": 0.5783707645355661, "grad_norm": 0.0, "learning_rate": 7.962095422996305e-06, "loss": 1.0829, "step": 14782 }, { "epoch": 0.5784098912277956, "grad_norm": 0.0, "learning_rate": 7.960854801690084e-06, "loss": 0.9945, "step": 14783 }, { "epoch": 0.578449017920025, "grad_norm": 0.0, "learning_rate": 7.959614213129979e-06, "loss": 1.0077, "step": 14784 }, { "epoch": 0.5784881446122545, "grad_norm": 0.0, "learning_rate": 7.958373657335913e-06, "loss": 1.0385, "step": 14785 }, { "epoch": 0.5785272713044839, "grad_norm": 0.0, "learning_rate": 7.957133134327808e-06, "loss": 0.8807, "step": 14786 }, { "epoch": 0.5785663979967134, "grad_norm": 0.0, "learning_rate": 7.95589264412558e-06, "loss": 1.0089, "step": 14787 }, { "epoch": 0.5786055246889428, "grad_norm": 0.0, "learning_rate": 7.954652186749161e-06, "loss": 1.0164, "step": 14788 }, { "epoch": 0.5786446513811723, "grad_norm": 0.0, "learning_rate": 7.953411762218463e-06, "loss": 0.9388, "step": 14789 }, { "epoch": 0.5786837780734017, "grad_norm": 0.0, "learning_rate": 7.952171370553408e-06, "loss": 1.0468, "step": 14790 }, { "epoch": 0.5787229047656312, "grad_norm": 0.0, "learning_rate": 7.95093101177391e-06, "loss": 1.1084, "step": 14791 }, { "epoch": 0.5787620314578605, "grad_norm": 0.0, "learning_rate": 7.949690685899898e-06, "loss": 1.1609, "step": 14792 }, { "epoch": 0.57880115815009, "grad_norm": 0.0, "learning_rate": 7.948450392951283e-06, "loss": 0.919, "step": 14793 }, { "epoch": 0.5788402848423194, "grad_norm": 0.0, "learning_rate": 7.947210132947984e-06, "loss": 1.0941, "step": 14794 }, { "epoch": 0.5788794115345489, "grad_norm": 0.0, "learning_rate": 7.94596990590992e-06, "loss": 0.9974, "step": 14795 }, { "epoch": 0.5789185382267783, "grad_norm": 0.0, "learning_rate": 7.944729711856999e-06, "loss": 1.1338, "step": 14796 }, { "epoch": 0.5789576649190078, "grad_norm": 0.0, "learning_rate": 7.943489550809148e-06, "loss": 1.0983, "step": 14797 }, { "epoch": 0.5789967916112372, "grad_norm": 0.0, "learning_rate": 7.94224942278628e-06, "loss": 0.9953, "step": 14798 }, { "epoch": 0.5790359183034667, "grad_norm": 0.0, "learning_rate": 7.941009327808305e-06, "loss": 1.0745, "step": 14799 }, { "epoch": 0.5790750449956961, "grad_norm": 0.0, "learning_rate": 7.939769265895138e-06, "loss": 1.024, "step": 14800 }, { "epoch": 0.5791141716879254, "grad_norm": 0.0, "learning_rate": 7.938529237066698e-06, "loss": 0.9947, "step": 14801 }, { "epoch": 0.579153298380155, "grad_norm": 0.0, "learning_rate": 7.937289241342893e-06, "loss": 0.933, "step": 14802 }, { "epoch": 0.5791924250723843, "grad_norm": 0.0, "learning_rate": 7.936049278743641e-06, "loss": 0.9707, "step": 14803 }, { "epoch": 0.5792315517646138, "grad_norm": 0.0, "learning_rate": 7.93480934928885e-06, "loss": 0.9493, "step": 14804 }, { "epoch": 0.5792706784568432, "grad_norm": 0.0, "learning_rate": 7.933569452998433e-06, "loss": 1.1125, "step": 14805 }, { "epoch": 0.5793098051490727, "grad_norm": 0.0, "learning_rate": 7.932329589892303e-06, "loss": 0.8813, "step": 14806 }, { "epoch": 0.5793489318413021, "grad_norm": 0.0, "learning_rate": 7.931089759990367e-06, "loss": 1.1058, "step": 14807 }, { "epoch": 0.5793880585335316, "grad_norm": 0.0, "learning_rate": 7.929849963312536e-06, "loss": 0.9962, "step": 14808 }, { "epoch": 0.579427185225761, "grad_norm": 0.0, "learning_rate": 7.92861019987872e-06, "loss": 0.9979, "step": 14809 }, { "epoch": 0.5794663119179905, "grad_norm": 0.0, "learning_rate": 7.92737046970883e-06, "loss": 1.052, "step": 14810 }, { "epoch": 0.5795054386102199, "grad_norm": 0.0, "learning_rate": 7.926130772822775e-06, "loss": 0.8022, "step": 14811 }, { "epoch": 0.5795445653024494, "grad_norm": 0.0, "learning_rate": 7.92489110924046e-06, "loss": 1.0893, "step": 14812 }, { "epoch": 0.5795836919946787, "grad_norm": 0.0, "learning_rate": 7.923651478981793e-06, "loss": 0.8496, "step": 14813 }, { "epoch": 0.5796228186869082, "grad_norm": 0.0, "learning_rate": 7.922411882066678e-06, "loss": 1.141, "step": 14814 }, { "epoch": 0.5796619453791376, "grad_norm": 0.0, "learning_rate": 7.921172318515028e-06, "loss": 0.8797, "step": 14815 }, { "epoch": 0.5797010720713671, "grad_norm": 0.0, "learning_rate": 7.919932788346748e-06, "loss": 0.9657, "step": 14816 }, { "epoch": 0.5797401987635965, "grad_norm": 0.0, "learning_rate": 7.91869329158174e-06, "loss": 1.1117, "step": 14817 }, { "epoch": 0.579779325455826, "grad_norm": 0.0, "learning_rate": 7.917453828239905e-06, "loss": 0.9694, "step": 14818 }, { "epoch": 0.5798184521480554, "grad_norm": 0.0, "learning_rate": 7.916214398341156e-06, "loss": 1.1052, "step": 14819 }, { "epoch": 0.5798575788402849, "grad_norm": 0.0, "learning_rate": 7.914975001905393e-06, "loss": 1.1338, "step": 14820 }, { "epoch": 0.5798967055325143, "grad_norm": 0.0, "learning_rate": 7.913735638952518e-06, "loss": 1.0018, "step": 14821 }, { "epoch": 0.5799358322247438, "grad_norm": 0.0, "learning_rate": 7.912496309502435e-06, "loss": 1.0328, "step": 14822 }, { "epoch": 0.5799749589169731, "grad_norm": 0.0, "learning_rate": 7.91125701357504e-06, "loss": 1.0598, "step": 14823 }, { "epoch": 0.5800140856092026, "grad_norm": 0.0, "learning_rate": 7.910017751190246e-06, "loss": 1.0187, "step": 14824 }, { "epoch": 0.580053212301432, "grad_norm": 0.0, "learning_rate": 7.908778522367949e-06, "loss": 0.9214, "step": 14825 }, { "epoch": 0.5800923389936615, "grad_norm": 0.0, "learning_rate": 7.907539327128046e-06, "loss": 1.1336, "step": 14826 }, { "epoch": 0.5801314656858909, "grad_norm": 0.0, "learning_rate": 7.906300165490437e-06, "loss": 1.0968, "step": 14827 }, { "epoch": 0.5801705923781204, "grad_norm": 0.0, "learning_rate": 7.905061037475026e-06, "loss": 1.0202, "step": 14828 }, { "epoch": 0.5802097190703498, "grad_norm": 0.0, "learning_rate": 7.903821943101711e-06, "loss": 1.0207, "step": 14829 }, { "epoch": 0.5802488457625792, "grad_norm": 0.0, "learning_rate": 7.90258288239039e-06, "loss": 1.0438, "step": 14830 }, { "epoch": 0.5802879724548087, "grad_norm": 0.0, "learning_rate": 7.901343855360954e-06, "loss": 1.017, "step": 14831 }, { "epoch": 0.5803270991470381, "grad_norm": 0.0, "learning_rate": 7.90010486203331e-06, "loss": 0.9474, "step": 14832 }, { "epoch": 0.5803662258392676, "grad_norm": 0.0, "learning_rate": 7.898865902427351e-06, "loss": 1.028, "step": 14833 }, { "epoch": 0.5804053525314969, "grad_norm": 0.0, "learning_rate": 7.897626976562974e-06, "loss": 0.9259, "step": 14834 }, { "epoch": 0.5804444792237264, "grad_norm": 0.0, "learning_rate": 7.896388084460071e-06, "loss": 1.1435, "step": 14835 }, { "epoch": 0.5804836059159558, "grad_norm": 0.0, "learning_rate": 7.895149226138536e-06, "loss": 1.0178, "step": 14836 }, { "epoch": 0.5805227326081853, "grad_norm": 0.0, "learning_rate": 7.893910401618271e-06, "loss": 0.9725, "step": 14837 }, { "epoch": 0.5805618593004147, "grad_norm": 0.0, "learning_rate": 7.892671610919166e-06, "loss": 1.0193, "step": 14838 }, { "epoch": 0.5806009859926442, "grad_norm": 0.0, "learning_rate": 7.891432854061115e-06, "loss": 0.9424, "step": 14839 }, { "epoch": 0.5806401126848736, "grad_norm": 0.0, "learning_rate": 7.890194131064008e-06, "loss": 1.0496, "step": 14840 }, { "epoch": 0.5806792393771031, "grad_norm": 0.0, "learning_rate": 7.888955441947738e-06, "loss": 1.01, "step": 14841 }, { "epoch": 0.5807183660693325, "grad_norm": 0.0, "learning_rate": 7.887716786732202e-06, "loss": 0.9812, "step": 14842 }, { "epoch": 0.580757492761562, "grad_norm": 0.0, "learning_rate": 7.886478165437288e-06, "loss": 1.0488, "step": 14843 }, { "epoch": 0.5807966194537914, "grad_norm": 0.0, "learning_rate": 7.885239578082885e-06, "loss": 1.0331, "step": 14844 }, { "epoch": 0.5808357461460208, "grad_norm": 0.0, "learning_rate": 7.884001024688881e-06, "loss": 0.9454, "step": 14845 }, { "epoch": 0.5808748728382502, "grad_norm": 0.0, "learning_rate": 7.882762505275175e-06, "loss": 1.0976, "step": 14846 }, { "epoch": 0.5809139995304797, "grad_norm": 0.0, "learning_rate": 7.881524019861645e-06, "loss": 0.946, "step": 14847 }, { "epoch": 0.5809531262227091, "grad_norm": 0.0, "learning_rate": 7.880285568468187e-06, "loss": 0.9852, "step": 14848 }, { "epoch": 0.5809922529149386, "grad_norm": 0.0, "learning_rate": 7.879047151114686e-06, "loss": 0.9877, "step": 14849 }, { "epoch": 0.581031379607168, "grad_norm": 0.0, "learning_rate": 7.877808767821031e-06, "loss": 0.9866, "step": 14850 }, { "epoch": 0.5810705062993975, "grad_norm": 0.0, "learning_rate": 7.876570418607108e-06, "loss": 0.9919, "step": 14851 }, { "epoch": 0.5811096329916269, "grad_norm": 0.0, "learning_rate": 7.875332103492803e-06, "loss": 0.9722, "step": 14852 }, { "epoch": 0.5811487596838564, "grad_norm": 0.0, "learning_rate": 7.874093822498002e-06, "loss": 0.9928, "step": 14853 }, { "epoch": 0.5811878863760858, "grad_norm": 0.0, "learning_rate": 7.872855575642589e-06, "loss": 1.0182, "step": 14854 }, { "epoch": 0.5812270130683153, "grad_norm": 0.0, "learning_rate": 7.871617362946451e-06, "loss": 0.8826, "step": 14855 }, { "epoch": 0.5812661397605446, "grad_norm": 0.0, "learning_rate": 7.870379184429472e-06, "loss": 1.0818, "step": 14856 }, { "epoch": 0.581305266452774, "grad_norm": 0.0, "learning_rate": 7.869141040111534e-06, "loss": 0.9293, "step": 14857 }, { "epoch": 0.5813443931450035, "grad_norm": 0.0, "learning_rate": 7.867902930012518e-06, "loss": 1.0094, "step": 14858 }, { "epoch": 0.5813835198372329, "grad_norm": 0.0, "learning_rate": 7.866664854152312e-06, "loss": 1.0567, "step": 14859 }, { "epoch": 0.5814226465294624, "grad_norm": 0.0, "learning_rate": 7.865426812550795e-06, "loss": 1.0122, "step": 14860 }, { "epoch": 0.5814617732216918, "grad_norm": 0.0, "learning_rate": 7.864188805227852e-06, "loss": 1.0421, "step": 14861 }, { "epoch": 0.5815008999139213, "grad_norm": 0.0, "learning_rate": 7.862950832203358e-06, "loss": 0.9866, "step": 14862 }, { "epoch": 0.5815400266061507, "grad_norm": 0.0, "learning_rate": 7.861712893497191e-06, "loss": 0.9598, "step": 14863 }, { "epoch": 0.5815791532983802, "grad_norm": 0.0, "learning_rate": 7.86047498912924e-06, "loss": 1.0609, "step": 14864 }, { "epoch": 0.5816182799906096, "grad_norm": 0.0, "learning_rate": 7.85923711911938e-06, "loss": 1.0602, "step": 14865 }, { "epoch": 0.581657406682839, "grad_norm": 0.0, "learning_rate": 7.85799928348749e-06, "loss": 1.0525, "step": 14866 }, { "epoch": 0.5816965333750684, "grad_norm": 0.0, "learning_rate": 7.856761482253442e-06, "loss": 0.9302, "step": 14867 }, { "epoch": 0.5817356600672979, "grad_norm": 0.0, "learning_rate": 7.855523715437123e-06, "loss": 0.9716, "step": 14868 }, { "epoch": 0.5817747867595273, "grad_norm": 0.0, "learning_rate": 7.854285983058408e-06, "loss": 0.9952, "step": 14869 }, { "epoch": 0.5818139134517568, "grad_norm": 0.0, "learning_rate": 7.85304828513717e-06, "loss": 0.943, "step": 14870 }, { "epoch": 0.5818530401439862, "grad_norm": 0.0, "learning_rate": 7.851810621693287e-06, "loss": 1.0405, "step": 14871 }, { "epoch": 0.5818921668362157, "grad_norm": 0.0, "learning_rate": 7.850572992746628e-06, "loss": 0.9554, "step": 14872 }, { "epoch": 0.5819312935284451, "grad_norm": 0.0, "learning_rate": 7.849335398317078e-06, "loss": 1.0016, "step": 14873 }, { "epoch": 0.5819704202206746, "grad_norm": 0.0, "learning_rate": 7.848097838424506e-06, "loss": 1.0335, "step": 14874 }, { "epoch": 0.582009546912904, "grad_norm": 0.0, "learning_rate": 7.846860313088788e-06, "loss": 1.0828, "step": 14875 }, { "epoch": 0.5820486736051335, "grad_norm": 0.0, "learning_rate": 7.84562282232979e-06, "loss": 1.0724, "step": 14876 }, { "epoch": 0.5820878002973628, "grad_norm": 0.0, "learning_rate": 7.844385366167396e-06, "loss": 0.8804, "step": 14877 }, { "epoch": 0.5821269269895923, "grad_norm": 0.0, "learning_rate": 7.843147944621468e-06, "loss": 1.0128, "step": 14878 }, { "epoch": 0.5821660536818217, "grad_norm": 0.0, "learning_rate": 7.841910557711884e-06, "loss": 1.0388, "step": 14879 }, { "epoch": 0.5822051803740512, "grad_norm": 0.0, "learning_rate": 7.840673205458513e-06, "loss": 1.1364, "step": 14880 }, { "epoch": 0.5822443070662806, "grad_norm": 0.0, "learning_rate": 7.839435887881218e-06, "loss": 1.0844, "step": 14881 }, { "epoch": 0.5822834337585101, "grad_norm": 0.0, "learning_rate": 7.838198604999881e-06, "loss": 1.0306, "step": 14882 }, { "epoch": 0.5823225604507395, "grad_norm": 0.0, "learning_rate": 7.836961356834365e-06, "loss": 1.0464, "step": 14883 }, { "epoch": 0.582361687142969, "grad_norm": 0.0, "learning_rate": 7.835724143404539e-06, "loss": 0.8782, "step": 14884 }, { "epoch": 0.5824008138351984, "grad_norm": 0.0, "learning_rate": 7.83448696473027e-06, "loss": 0.9532, "step": 14885 }, { "epoch": 0.5824399405274278, "grad_norm": 0.0, "learning_rate": 7.833249820831425e-06, "loss": 1.0603, "step": 14886 }, { "epoch": 0.5824790672196573, "grad_norm": 0.0, "learning_rate": 7.832012711727877e-06, "loss": 0.8919, "step": 14887 }, { "epoch": 0.5825181939118866, "grad_norm": 0.0, "learning_rate": 7.830775637439487e-06, "loss": 1.0671, "step": 14888 }, { "epoch": 0.5825573206041161, "grad_norm": 0.0, "learning_rate": 7.82953859798612e-06, "loss": 1.0281, "step": 14889 }, { "epoch": 0.5825964472963455, "grad_norm": 0.0, "learning_rate": 7.828301593387646e-06, "loss": 0.9454, "step": 14890 }, { "epoch": 0.582635573988575, "grad_norm": 0.0, "learning_rate": 7.827064623663927e-06, "loss": 0.9764, "step": 14891 }, { "epoch": 0.5826747006808044, "grad_norm": 0.0, "learning_rate": 7.825827688834826e-06, "loss": 1.0491, "step": 14892 }, { "epoch": 0.5827138273730339, "grad_norm": 0.0, "learning_rate": 7.82459078892021e-06, "loss": 0.9635, "step": 14893 }, { "epoch": 0.5827529540652633, "grad_norm": 0.0, "learning_rate": 7.823353923939936e-06, "loss": 0.9598, "step": 14894 }, { "epoch": 0.5827920807574928, "grad_norm": 0.0, "learning_rate": 7.822117093913874e-06, "loss": 1.0487, "step": 14895 }, { "epoch": 0.5828312074497222, "grad_norm": 0.0, "learning_rate": 7.820880298861881e-06, "loss": 0.9902, "step": 14896 }, { "epoch": 0.5828703341419517, "grad_norm": 0.0, "learning_rate": 7.819643538803819e-06, "loss": 1.0645, "step": 14897 }, { "epoch": 0.582909460834181, "grad_norm": 0.0, "learning_rate": 7.81840681375955e-06, "loss": 1.046, "step": 14898 }, { "epoch": 0.5829485875264105, "grad_norm": 0.0, "learning_rate": 7.817170123748935e-06, "loss": 0.995, "step": 14899 }, { "epoch": 0.5829877142186399, "grad_norm": 0.0, "learning_rate": 7.815933468791833e-06, "loss": 1.0717, "step": 14900 }, { "epoch": 0.5830268409108694, "grad_norm": 0.0, "learning_rate": 7.814696848908103e-06, "loss": 1.0099, "step": 14901 }, { "epoch": 0.5830659676030988, "grad_norm": 0.0, "learning_rate": 7.813460264117603e-06, "loss": 1.0328, "step": 14902 }, { "epoch": 0.5831050942953283, "grad_norm": 0.0, "learning_rate": 7.812223714440188e-06, "loss": 0.9736, "step": 14903 }, { "epoch": 0.5831442209875577, "grad_norm": 0.0, "learning_rate": 7.810987199895721e-06, "loss": 1.0914, "step": 14904 }, { "epoch": 0.5831833476797872, "grad_norm": 0.0, "learning_rate": 7.809750720504058e-06, "loss": 0.9891, "step": 14905 }, { "epoch": 0.5832224743720166, "grad_norm": 0.0, "learning_rate": 7.808514276285052e-06, "loss": 1.0317, "step": 14906 }, { "epoch": 0.5832616010642461, "grad_norm": 0.0, "learning_rate": 7.807277867258559e-06, "loss": 1.0272, "step": 14907 }, { "epoch": 0.5833007277564755, "grad_norm": 0.0, "learning_rate": 7.806041493444439e-06, "loss": 1.108, "step": 14908 }, { "epoch": 0.583339854448705, "grad_norm": 0.0, "learning_rate": 7.804805154862543e-06, "loss": 0.9422, "step": 14909 }, { "epoch": 0.5833789811409343, "grad_norm": 0.0, "learning_rate": 7.803568851532725e-06, "loss": 0.9902, "step": 14910 }, { "epoch": 0.5834181078331638, "grad_norm": 0.0, "learning_rate": 7.80233258347484e-06, "loss": 0.9871, "step": 14911 }, { "epoch": 0.5834572345253932, "grad_norm": 0.0, "learning_rate": 7.801096350708735e-06, "loss": 0.8786, "step": 14912 }, { "epoch": 0.5834963612176227, "grad_norm": 0.0, "learning_rate": 7.799860153254271e-06, "loss": 1.0021, "step": 14913 }, { "epoch": 0.5835354879098521, "grad_norm": 0.0, "learning_rate": 7.798623991131298e-06, "loss": 0.94, "step": 14914 }, { "epoch": 0.5835746146020815, "grad_norm": 0.0, "learning_rate": 7.797387864359664e-06, "loss": 0.9993, "step": 14915 }, { "epoch": 0.583613741294311, "grad_norm": 0.0, "learning_rate": 7.796151772959216e-06, "loss": 0.9926, "step": 14916 }, { "epoch": 0.5836528679865404, "grad_norm": 0.0, "learning_rate": 7.794915716949815e-06, "loss": 1.0233, "step": 14917 }, { "epoch": 0.5836919946787699, "grad_norm": 0.0, "learning_rate": 7.793679696351302e-06, "loss": 1.0522, "step": 14918 }, { "epoch": 0.5837311213709993, "grad_norm": 0.0, "learning_rate": 7.792443711183531e-06, "loss": 0.9427, "step": 14919 }, { "epoch": 0.5837702480632287, "grad_norm": 0.0, "learning_rate": 7.791207761466346e-06, "loss": 0.9737, "step": 14920 }, { "epoch": 0.5838093747554581, "grad_norm": 0.0, "learning_rate": 7.789971847219593e-06, "loss": 1.0359, "step": 14921 }, { "epoch": 0.5838485014476876, "grad_norm": 0.0, "learning_rate": 7.788735968463128e-06, "loss": 0.9734, "step": 14922 }, { "epoch": 0.583887628139917, "grad_norm": 0.0, "learning_rate": 7.78750012521679e-06, "loss": 1.0008, "step": 14923 }, { "epoch": 0.5839267548321465, "grad_norm": 0.0, "learning_rate": 7.786264317500429e-06, "loss": 0.9366, "step": 14924 }, { "epoch": 0.5839658815243759, "grad_norm": 0.0, "learning_rate": 7.785028545333885e-06, "loss": 1.0407, "step": 14925 }, { "epoch": 0.5840050082166054, "grad_norm": 0.0, "learning_rate": 7.78379280873701e-06, "loss": 1.0479, "step": 14926 }, { "epoch": 0.5840441349088348, "grad_norm": 0.0, "learning_rate": 7.782557107729646e-06, "loss": 1.1412, "step": 14927 }, { "epoch": 0.5840832616010643, "grad_norm": 0.0, "learning_rate": 7.781321442331637e-06, "loss": 0.9135, "step": 14928 }, { "epoch": 0.5841223882932937, "grad_norm": 0.0, "learning_rate": 7.780085812562827e-06, "loss": 1.0151, "step": 14929 }, { "epoch": 0.5841615149855232, "grad_norm": 0.0, "learning_rate": 7.77885021844305e-06, "loss": 1.019, "step": 14930 }, { "epoch": 0.5842006416777525, "grad_norm": 0.0, "learning_rate": 7.777614659992162e-06, "loss": 0.9999, "step": 14931 }, { "epoch": 0.584239768369982, "grad_norm": 0.0, "learning_rate": 7.776379137229996e-06, "loss": 0.9857, "step": 14932 }, { "epoch": 0.5842788950622114, "grad_norm": 0.0, "learning_rate": 7.775143650176394e-06, "loss": 1.0081, "step": 14933 }, { "epoch": 0.5843180217544409, "grad_norm": 0.0, "learning_rate": 7.773908198851197e-06, "loss": 1.0282, "step": 14934 }, { "epoch": 0.5843571484466703, "grad_norm": 0.0, "learning_rate": 7.772672783274246e-06, "loss": 0.8732, "step": 14935 }, { "epoch": 0.5843962751388998, "grad_norm": 0.0, "learning_rate": 7.771437403465377e-06, "loss": 1.035, "step": 14936 }, { "epoch": 0.5844354018311292, "grad_norm": 0.0, "learning_rate": 7.770202059444433e-06, "loss": 0.9, "step": 14937 }, { "epoch": 0.5844745285233587, "grad_norm": 0.0, "learning_rate": 7.76896675123125e-06, "loss": 0.9838, "step": 14938 }, { "epoch": 0.5845136552155881, "grad_norm": 0.0, "learning_rate": 7.767731478845662e-06, "loss": 0.9917, "step": 14939 }, { "epoch": 0.5845527819078176, "grad_norm": 0.0, "learning_rate": 7.766496242307513e-06, "loss": 0.937, "step": 14940 }, { "epoch": 0.584591908600047, "grad_norm": 0.0, "learning_rate": 7.765261041636635e-06, "loss": 1.0875, "step": 14941 }, { "epoch": 0.5846310352922764, "grad_norm": 0.0, "learning_rate": 7.764025876852861e-06, "loss": 1.0004, "step": 14942 }, { "epoch": 0.5846701619845058, "grad_norm": 0.0, "learning_rate": 7.762790747976031e-06, "loss": 0.9567, "step": 14943 }, { "epoch": 0.5847092886767352, "grad_norm": 0.0, "learning_rate": 7.761555655025981e-06, "loss": 1.0412, "step": 14944 }, { "epoch": 0.5847484153689647, "grad_norm": 0.0, "learning_rate": 7.76032059802254e-06, "loss": 0.8301, "step": 14945 }, { "epoch": 0.5847875420611941, "grad_norm": 0.0, "learning_rate": 7.759085576985546e-06, "loss": 1.0795, "step": 14946 }, { "epoch": 0.5848266687534236, "grad_norm": 0.0, "learning_rate": 7.757850591934825e-06, "loss": 1.1171, "step": 14947 }, { "epoch": 0.584865795445653, "grad_norm": 0.0, "learning_rate": 7.756615642890217e-06, "loss": 0.9671, "step": 14948 }, { "epoch": 0.5849049221378825, "grad_norm": 0.0, "learning_rate": 7.75538072987155e-06, "loss": 0.9569, "step": 14949 }, { "epoch": 0.5849440488301119, "grad_norm": 0.0, "learning_rate": 7.754145852898658e-06, "loss": 0.9977, "step": 14950 }, { "epoch": 0.5849831755223414, "grad_norm": 0.0, "learning_rate": 7.75291101199137e-06, "loss": 0.9866, "step": 14951 }, { "epoch": 0.5850223022145707, "grad_norm": 0.0, "learning_rate": 7.751676207169509e-06, "loss": 0.9726, "step": 14952 }, { "epoch": 0.5850614289068002, "grad_norm": 0.0, "learning_rate": 7.750441438452915e-06, "loss": 0.991, "step": 14953 }, { "epoch": 0.5851005555990296, "grad_norm": 0.0, "learning_rate": 7.749206705861413e-06, "loss": 0.9962, "step": 14954 }, { "epoch": 0.5851396822912591, "grad_norm": 0.0, "learning_rate": 7.747972009414832e-06, "loss": 0.9873, "step": 14955 }, { "epoch": 0.5851788089834885, "grad_norm": 0.0, "learning_rate": 7.746737349132994e-06, "loss": 0.9697, "step": 14956 }, { "epoch": 0.585217935675718, "grad_norm": 0.0, "learning_rate": 7.745502725035733e-06, "loss": 1.0257, "step": 14957 }, { "epoch": 0.5852570623679474, "grad_norm": 0.0, "learning_rate": 7.744268137142875e-06, "loss": 0.9275, "step": 14958 }, { "epoch": 0.5852961890601769, "grad_norm": 0.0, "learning_rate": 7.743033585474244e-06, "loss": 0.9925, "step": 14959 }, { "epoch": 0.5853353157524063, "grad_norm": 0.0, "learning_rate": 7.741799070049665e-06, "loss": 1.0747, "step": 14960 }, { "epoch": 0.5853744424446358, "grad_norm": 0.0, "learning_rate": 7.740564590888959e-06, "loss": 0.9594, "step": 14961 }, { "epoch": 0.5854135691368652, "grad_norm": 0.0, "learning_rate": 7.73933014801196e-06, "loss": 1.0217, "step": 14962 }, { "epoch": 0.5854526958290946, "grad_norm": 0.0, "learning_rate": 7.738095741438485e-06, "loss": 0.9212, "step": 14963 }, { "epoch": 0.585491822521324, "grad_norm": 0.0, "learning_rate": 7.736861371188357e-06, "loss": 0.9805, "step": 14964 }, { "epoch": 0.5855309492135535, "grad_norm": 0.0, "learning_rate": 7.735627037281396e-06, "loss": 0.9787, "step": 14965 }, { "epoch": 0.5855700759057829, "grad_norm": 0.0, "learning_rate": 7.734392739737434e-06, "loss": 1.0333, "step": 14966 }, { "epoch": 0.5856092025980124, "grad_norm": 0.0, "learning_rate": 7.733158478576283e-06, "loss": 0.9836, "step": 14967 }, { "epoch": 0.5856483292902418, "grad_norm": 0.0, "learning_rate": 7.731924253817767e-06, "loss": 1.0982, "step": 14968 }, { "epoch": 0.5856874559824713, "grad_norm": 0.0, "learning_rate": 7.730690065481704e-06, "loss": 0.9933, "step": 14969 }, { "epoch": 0.5857265826747007, "grad_norm": 0.0, "learning_rate": 7.729455913587914e-06, "loss": 1.007, "step": 14970 }, { "epoch": 0.5857657093669301, "grad_norm": 0.0, "learning_rate": 7.728221798156218e-06, "loss": 1.0045, "step": 14971 }, { "epoch": 0.5858048360591596, "grad_norm": 0.0, "learning_rate": 7.726987719206433e-06, "loss": 0.898, "step": 14972 }, { "epoch": 0.585843962751389, "grad_norm": 0.0, "learning_rate": 7.725753676758379e-06, "loss": 1.0058, "step": 14973 }, { "epoch": 0.5858830894436184, "grad_norm": 0.0, "learning_rate": 7.72451967083187e-06, "loss": 1.1218, "step": 14974 }, { "epoch": 0.5859222161358478, "grad_norm": 0.0, "learning_rate": 7.723285701446722e-06, "loss": 1.1329, "step": 14975 }, { "epoch": 0.5859613428280773, "grad_norm": 0.0, "learning_rate": 7.722051768622754e-06, "loss": 0.9922, "step": 14976 }, { "epoch": 0.5860004695203067, "grad_norm": 0.0, "learning_rate": 7.720817872379782e-06, "loss": 1.0451, "step": 14977 }, { "epoch": 0.5860395962125362, "grad_norm": 0.0, "learning_rate": 7.719584012737618e-06, "loss": 1.0847, "step": 14978 }, { "epoch": 0.5860787229047656, "grad_norm": 0.0, "learning_rate": 7.718350189716075e-06, "loss": 1.0176, "step": 14979 }, { "epoch": 0.5861178495969951, "grad_norm": 0.0, "learning_rate": 7.717116403334972e-06, "loss": 0.9041, "step": 14980 }, { "epoch": 0.5861569762892245, "grad_norm": 0.0, "learning_rate": 7.715882653614115e-06, "loss": 0.931, "step": 14981 }, { "epoch": 0.586196102981454, "grad_norm": 0.0, "learning_rate": 7.714648940573323e-06, "loss": 1.0347, "step": 14982 }, { "epoch": 0.5862352296736834, "grad_norm": 0.0, "learning_rate": 7.713415264232405e-06, "loss": 0.985, "step": 14983 }, { "epoch": 0.5862743563659129, "grad_norm": 0.0, "learning_rate": 7.712181624611174e-06, "loss": 0.9733, "step": 14984 }, { "epoch": 0.5863134830581422, "grad_norm": 0.0, "learning_rate": 7.710948021729438e-06, "loss": 0.967, "step": 14985 }, { "epoch": 0.5863526097503717, "grad_norm": 0.0, "learning_rate": 7.70971445560701e-06, "loss": 1.0057, "step": 14986 }, { "epoch": 0.5863917364426011, "grad_norm": 0.0, "learning_rate": 7.708480926263695e-06, "loss": 1.0541, "step": 14987 }, { "epoch": 0.5864308631348306, "grad_norm": 0.0, "learning_rate": 7.707247433719306e-06, "loss": 1.0395, "step": 14988 }, { "epoch": 0.58646998982706, "grad_norm": 0.0, "learning_rate": 7.70601397799365e-06, "loss": 1.0538, "step": 14989 }, { "epoch": 0.5865091165192895, "grad_norm": 0.0, "learning_rate": 7.704780559106538e-06, "loss": 0.8425, "step": 14990 }, { "epoch": 0.5865482432115189, "grad_norm": 0.0, "learning_rate": 7.70354717707777e-06, "loss": 0.8053, "step": 14991 }, { "epoch": 0.5865873699037484, "grad_norm": 0.0, "learning_rate": 7.702313831927156e-06, "loss": 0.9855, "step": 14992 }, { "epoch": 0.5866264965959778, "grad_norm": 0.0, "learning_rate": 7.701080523674506e-06, "loss": 1.0744, "step": 14993 }, { "epoch": 0.5866656232882073, "grad_norm": 0.0, "learning_rate": 7.699847252339623e-06, "loss": 0.9629, "step": 14994 }, { "epoch": 0.5867047499804366, "grad_norm": 0.0, "learning_rate": 7.69861401794231e-06, "loss": 1.1638, "step": 14995 }, { "epoch": 0.5867438766726661, "grad_norm": 0.0, "learning_rate": 7.697380820502372e-06, "loss": 0.9742, "step": 14996 }, { "epoch": 0.5867830033648955, "grad_norm": 0.0, "learning_rate": 7.696147660039609e-06, "loss": 0.9822, "step": 14997 }, { "epoch": 0.586822130057125, "grad_norm": 0.0, "learning_rate": 7.69491453657383e-06, "loss": 1.006, "step": 14998 }, { "epoch": 0.5868612567493544, "grad_norm": 0.0, "learning_rate": 7.693681450124837e-06, "loss": 0.9246, "step": 14999 }, { "epoch": 0.5869003834415838, "grad_norm": 0.0, "learning_rate": 7.69244840071243e-06, "loss": 1.0175, "step": 15000 }, { "epoch": 0.5869395101338133, "grad_norm": 0.0, "learning_rate": 7.691215388356403e-06, "loss": 1.1064, "step": 15001 }, { "epoch": 0.5869786368260427, "grad_norm": 0.0, "learning_rate": 7.68998241307657e-06, "loss": 0.9902, "step": 15002 }, { "epoch": 0.5870177635182722, "grad_norm": 0.0, "learning_rate": 7.688749474892727e-06, "loss": 0.9189, "step": 15003 }, { "epoch": 0.5870568902105016, "grad_norm": 0.0, "learning_rate": 7.68751657382467e-06, "loss": 1.0858, "step": 15004 }, { "epoch": 0.587096016902731, "grad_norm": 0.0, "learning_rate": 7.686283709892192e-06, "loss": 0.9214, "step": 15005 }, { "epoch": 0.5871351435949604, "grad_norm": 0.0, "learning_rate": 7.685050883115106e-06, "loss": 1.0814, "step": 15006 }, { "epoch": 0.5871742702871899, "grad_norm": 0.0, "learning_rate": 7.683818093513201e-06, "loss": 1.0109, "step": 15007 }, { "epoch": 0.5872133969794193, "grad_norm": 0.0, "learning_rate": 7.682585341106276e-06, "loss": 0.9903, "step": 15008 }, { "epoch": 0.5872525236716488, "grad_norm": 0.0, "learning_rate": 7.681352625914125e-06, "loss": 1.1365, "step": 15009 }, { "epoch": 0.5872916503638782, "grad_norm": 0.0, "learning_rate": 7.680119947956542e-06, "loss": 1.1121, "step": 15010 }, { "epoch": 0.5873307770561077, "grad_norm": 0.0, "learning_rate": 7.678887307253329e-06, "loss": 1.0026, "step": 15011 }, { "epoch": 0.5873699037483371, "grad_norm": 0.0, "learning_rate": 7.677654703824279e-06, "loss": 0.8855, "step": 15012 }, { "epoch": 0.5874090304405666, "grad_norm": 0.0, "learning_rate": 7.676422137689183e-06, "loss": 1.0473, "step": 15013 }, { "epoch": 0.587448157132796, "grad_norm": 0.0, "learning_rate": 7.675189608867832e-06, "loss": 1.0113, "step": 15014 }, { "epoch": 0.5874872838250255, "grad_norm": 0.0, "learning_rate": 7.673957117380027e-06, "loss": 1.0743, "step": 15015 }, { "epoch": 0.5875264105172548, "grad_norm": 0.0, "learning_rate": 7.672724663245555e-06, "loss": 1.0853, "step": 15016 }, { "epoch": 0.5875655372094843, "grad_norm": 0.0, "learning_rate": 7.67149224648421e-06, "loss": 1.028, "step": 15017 }, { "epoch": 0.5876046639017137, "grad_norm": 0.0, "learning_rate": 7.670259867115781e-06, "loss": 1.0241, "step": 15018 }, { "epoch": 0.5876437905939432, "grad_norm": 0.0, "learning_rate": 7.669027525160057e-06, "loss": 1.0899, "step": 15019 }, { "epoch": 0.5876829172861726, "grad_norm": 0.0, "learning_rate": 7.66779522063683e-06, "loss": 1.0408, "step": 15020 }, { "epoch": 0.5877220439784021, "grad_norm": 0.0, "learning_rate": 7.666562953565894e-06, "loss": 0.9573, "step": 15021 }, { "epoch": 0.5877611706706315, "grad_norm": 0.0, "learning_rate": 7.66533072396703e-06, "loss": 1.0384, "step": 15022 }, { "epoch": 0.587800297362861, "grad_norm": 0.0, "learning_rate": 7.664098531860029e-06, "loss": 1.0288, "step": 15023 }, { "epoch": 0.5878394240550904, "grad_norm": 0.0, "learning_rate": 7.66286637726468e-06, "loss": 1.0319, "step": 15024 }, { "epoch": 0.5878785507473199, "grad_norm": 0.0, "learning_rate": 7.661634260200767e-06, "loss": 1.0372, "step": 15025 }, { "epoch": 0.5879176774395493, "grad_norm": 0.0, "learning_rate": 7.660402180688076e-06, "loss": 1.0788, "step": 15026 }, { "epoch": 0.5879568041317788, "grad_norm": 0.0, "learning_rate": 7.659170138746398e-06, "loss": 1.1038, "step": 15027 }, { "epoch": 0.5879959308240081, "grad_norm": 0.0, "learning_rate": 7.65793813439551e-06, "loss": 1.0429, "step": 15028 }, { "epoch": 0.5880350575162375, "grad_norm": 0.0, "learning_rate": 7.656706167655205e-06, "loss": 0.9806, "step": 15029 }, { "epoch": 0.588074184208467, "grad_norm": 0.0, "learning_rate": 7.655474238545261e-06, "loss": 0.8874, "step": 15030 }, { "epoch": 0.5881133109006964, "grad_norm": 0.0, "learning_rate": 7.654242347085462e-06, "loss": 1.0573, "step": 15031 }, { "epoch": 0.5881524375929259, "grad_norm": 0.0, "learning_rate": 7.653010493295592e-06, "loss": 1.1005, "step": 15032 }, { "epoch": 0.5881915642851553, "grad_norm": 0.0, "learning_rate": 7.651778677195433e-06, "loss": 1.0077, "step": 15033 }, { "epoch": 0.5882306909773848, "grad_norm": 0.0, "learning_rate": 7.650546898804766e-06, "loss": 0.921, "step": 15034 }, { "epoch": 0.5882698176696142, "grad_norm": 0.0, "learning_rate": 7.649315158143373e-06, "loss": 1.0207, "step": 15035 }, { "epoch": 0.5883089443618437, "grad_norm": 0.0, "learning_rate": 7.648083455231033e-06, "loss": 0.9543, "step": 15036 }, { "epoch": 0.588348071054073, "grad_norm": 0.0, "learning_rate": 7.646851790087519e-06, "loss": 0.9447, "step": 15037 }, { "epoch": 0.5883871977463025, "grad_norm": 0.0, "learning_rate": 7.645620162732624e-06, "loss": 1.1102, "step": 15038 }, { "epoch": 0.5884263244385319, "grad_norm": 0.0, "learning_rate": 7.644388573186116e-06, "loss": 1.0356, "step": 15039 }, { "epoch": 0.5884654511307614, "grad_norm": 0.0, "learning_rate": 7.643157021467778e-06, "loss": 1.0572, "step": 15040 }, { "epoch": 0.5885045778229908, "grad_norm": 0.0, "learning_rate": 7.641925507597381e-06, "loss": 1.0853, "step": 15041 }, { "epoch": 0.5885437045152203, "grad_norm": 0.0, "learning_rate": 7.640694031594708e-06, "loss": 0.9858, "step": 15042 }, { "epoch": 0.5885828312074497, "grad_norm": 0.0, "learning_rate": 7.639462593479533e-06, "loss": 0.9847, "step": 15043 }, { "epoch": 0.5886219578996792, "grad_norm": 0.0, "learning_rate": 7.638231193271632e-06, "loss": 1.0113, "step": 15044 }, { "epoch": 0.5886610845919086, "grad_norm": 0.0, "learning_rate": 7.636999830990777e-06, "loss": 0.982, "step": 15045 }, { "epoch": 0.5887002112841381, "grad_norm": 0.0, "learning_rate": 7.63576850665674e-06, "loss": 1.0435, "step": 15046 }, { "epoch": 0.5887393379763675, "grad_norm": 0.0, "learning_rate": 7.634537220289303e-06, "loss": 1.0305, "step": 15047 }, { "epoch": 0.588778464668597, "grad_norm": 0.0, "learning_rate": 7.633305971908234e-06, "loss": 1.0338, "step": 15048 }, { "epoch": 0.5888175913608263, "grad_norm": 0.0, "learning_rate": 7.632074761533304e-06, "loss": 1.0348, "step": 15049 }, { "epoch": 0.5888567180530558, "grad_norm": 0.0, "learning_rate": 7.630843589184284e-06, "loss": 0.9609, "step": 15050 }, { "epoch": 0.5888958447452852, "grad_norm": 0.0, "learning_rate": 7.62961245488095e-06, "loss": 0.9808, "step": 15051 }, { "epoch": 0.5889349714375147, "grad_norm": 0.0, "learning_rate": 7.628381358643071e-06, "loss": 1.1631, "step": 15052 }, { "epoch": 0.5889740981297441, "grad_norm": 0.0, "learning_rate": 7.6271503004904144e-06, "loss": 0.955, "step": 15053 }, { "epoch": 0.5890132248219736, "grad_norm": 0.0, "learning_rate": 7.625919280442751e-06, "loss": 1.1194, "step": 15054 }, { "epoch": 0.589052351514203, "grad_norm": 0.0, "learning_rate": 7.624688298519844e-06, "loss": 0.9555, "step": 15055 }, { "epoch": 0.5890914782064324, "grad_norm": 0.0, "learning_rate": 7.623457354741472e-06, "loss": 1.1046, "step": 15056 }, { "epoch": 0.5891306048986619, "grad_norm": 0.0, "learning_rate": 7.6222264491273965e-06, "loss": 1.0042, "step": 15057 }, { "epoch": 0.5891697315908913, "grad_norm": 0.0, "learning_rate": 7.6209955816973854e-06, "loss": 1.0311, "step": 15058 }, { "epoch": 0.5892088582831208, "grad_norm": 0.0, "learning_rate": 7.6197647524712e-06, "loss": 1.0815, "step": 15059 }, { "epoch": 0.5892479849753501, "grad_norm": 0.0, "learning_rate": 7.618533961468612e-06, "loss": 1.0426, "step": 15060 }, { "epoch": 0.5892871116675796, "grad_norm": 0.0, "learning_rate": 7.617303208709388e-06, "loss": 1.0604, "step": 15061 }, { "epoch": 0.589326238359809, "grad_norm": 0.0, "learning_rate": 7.616072494213286e-06, "loss": 1.0126, "step": 15062 }, { "epoch": 0.5893653650520385, "grad_norm": 0.0, "learning_rate": 7.614841818000071e-06, "loss": 1.0109, "step": 15063 }, { "epoch": 0.5894044917442679, "grad_norm": 0.0, "learning_rate": 7.613611180089508e-06, "loss": 1.1554, "step": 15064 }, { "epoch": 0.5894436184364974, "grad_norm": 0.0, "learning_rate": 7.612380580501362e-06, "loss": 1.2014, "step": 15065 }, { "epoch": 0.5894827451287268, "grad_norm": 0.0, "learning_rate": 7.611150019255391e-06, "loss": 0.8036, "step": 15066 }, { "epoch": 0.5895218718209563, "grad_norm": 0.0, "learning_rate": 7.609919496371357e-06, "loss": 1.0198, "step": 15067 }, { "epoch": 0.5895609985131857, "grad_norm": 0.0, "learning_rate": 7.608689011869019e-06, "loss": 1.0695, "step": 15068 }, { "epoch": 0.5896001252054152, "grad_norm": 0.0, "learning_rate": 7.607458565768142e-06, "loss": 0.9174, "step": 15069 }, { "epoch": 0.5896392518976445, "grad_norm": 0.0, "learning_rate": 7.60622815808848e-06, "loss": 0.9894, "step": 15070 }, { "epoch": 0.589678378589874, "grad_norm": 0.0, "learning_rate": 7.604997788849795e-06, "loss": 0.8874, "step": 15071 }, { "epoch": 0.5897175052821034, "grad_norm": 0.0, "learning_rate": 7.603767458071843e-06, "loss": 1.1353, "step": 15072 }, { "epoch": 0.5897566319743329, "grad_norm": 0.0, "learning_rate": 7.602537165774386e-06, "loss": 1.1398, "step": 15073 }, { "epoch": 0.5897957586665623, "grad_norm": 0.0, "learning_rate": 7.601306911977178e-06, "loss": 0.9827, "step": 15074 }, { "epoch": 0.5898348853587918, "grad_norm": 0.0, "learning_rate": 7.600076696699974e-06, "loss": 1.0462, "step": 15075 }, { "epoch": 0.5898740120510212, "grad_norm": 0.0, "learning_rate": 7.598846519962529e-06, "loss": 0.9607, "step": 15076 }, { "epoch": 0.5899131387432507, "grad_norm": 0.0, "learning_rate": 7.597616381784601e-06, "loss": 1.0627, "step": 15077 }, { "epoch": 0.5899522654354801, "grad_norm": 0.0, "learning_rate": 7.5963862821859456e-06, "loss": 1.007, "step": 15078 }, { "epoch": 0.5899913921277096, "grad_norm": 0.0, "learning_rate": 7.595156221186314e-06, "loss": 0.8423, "step": 15079 }, { "epoch": 0.590030518819939, "grad_norm": 0.0, "learning_rate": 7.59392619880546e-06, "loss": 1.0223, "step": 15080 }, { "epoch": 0.5900696455121685, "grad_norm": 0.0, "learning_rate": 7.5926962150631324e-06, "loss": 0.9266, "step": 15081 }, { "epoch": 0.5901087722043978, "grad_norm": 0.0, "learning_rate": 7.591466269979091e-06, "loss": 1.1006, "step": 15082 }, { "epoch": 0.5901478988966273, "grad_norm": 0.0, "learning_rate": 7.5902363635730835e-06, "loss": 0.9928, "step": 15083 }, { "epoch": 0.5901870255888567, "grad_norm": 0.0, "learning_rate": 7.589006495864861e-06, "loss": 0.978, "step": 15084 }, { "epoch": 0.5902261522810861, "grad_norm": 0.0, "learning_rate": 7.587776666874171e-06, "loss": 0.9857, "step": 15085 }, { "epoch": 0.5902652789733156, "grad_norm": 0.0, "learning_rate": 7.586546876620763e-06, "loss": 1.1048, "step": 15086 }, { "epoch": 0.590304405665545, "grad_norm": 0.0, "learning_rate": 7.585317125124392e-06, "loss": 0.9101, "step": 15087 }, { "epoch": 0.5903435323577745, "grad_norm": 0.0, "learning_rate": 7.584087412404802e-06, "loss": 0.9908, "step": 15088 }, { "epoch": 0.5903826590500039, "grad_norm": 0.0, "learning_rate": 7.5828577384817395e-06, "loss": 1.0252, "step": 15089 }, { "epoch": 0.5904217857422334, "grad_norm": 0.0, "learning_rate": 7.58162810337495e-06, "loss": 1.0608, "step": 15090 }, { "epoch": 0.5904609124344627, "grad_norm": 0.0, "learning_rate": 7.580398507104186e-06, "loss": 0.9947, "step": 15091 }, { "epoch": 0.5905000391266922, "grad_norm": 0.0, "learning_rate": 7.579168949689191e-06, "loss": 0.9401, "step": 15092 }, { "epoch": 0.5905391658189216, "grad_norm": 0.0, "learning_rate": 7.577939431149709e-06, "loss": 1.1121, "step": 15093 }, { "epoch": 0.5905782925111511, "grad_norm": 0.0, "learning_rate": 7.576709951505484e-06, "loss": 0.9522, "step": 15094 }, { "epoch": 0.5906174192033805, "grad_norm": 0.0, "learning_rate": 7.575480510776256e-06, "loss": 1.0207, "step": 15095 }, { "epoch": 0.59065654589561, "grad_norm": 0.0, "learning_rate": 7.5742511089817795e-06, "loss": 1.0263, "step": 15096 }, { "epoch": 0.5906956725878394, "grad_norm": 0.0, "learning_rate": 7.573021746141789e-06, "loss": 0.9495, "step": 15097 }, { "epoch": 0.5907347992800689, "grad_norm": 0.0, "learning_rate": 7.571792422276028e-06, "loss": 0.9988, "step": 15098 }, { "epoch": 0.5907739259722983, "grad_norm": 0.0, "learning_rate": 7.570563137404234e-06, "loss": 1.0814, "step": 15099 }, { "epoch": 0.5908130526645278, "grad_norm": 0.0, "learning_rate": 7.569333891546156e-06, "loss": 0.9705, "step": 15100 }, { "epoch": 0.5908521793567572, "grad_norm": 0.0, "learning_rate": 7.568104684721529e-06, "loss": 0.9499, "step": 15101 }, { "epoch": 0.5908913060489867, "grad_norm": 0.0, "learning_rate": 7.566875516950095e-06, "loss": 0.9882, "step": 15102 }, { "epoch": 0.590930432741216, "grad_norm": 0.0, "learning_rate": 7.565646388251591e-06, "loss": 0.9771, "step": 15103 }, { "epoch": 0.5909695594334455, "grad_norm": 0.0, "learning_rate": 7.56441729864575e-06, "loss": 0.9786, "step": 15104 }, { "epoch": 0.5910086861256749, "grad_norm": 0.0, "learning_rate": 7.5631882481523215e-06, "loss": 1.0377, "step": 15105 }, { "epoch": 0.5910478128179044, "grad_norm": 0.0, "learning_rate": 7.5619592367910345e-06, "loss": 1.0407, "step": 15106 }, { "epoch": 0.5910869395101338, "grad_norm": 0.0, "learning_rate": 7.560730264581629e-06, "loss": 0.9973, "step": 15107 }, { "epoch": 0.5911260662023633, "grad_norm": 0.0, "learning_rate": 7.559501331543835e-06, "loss": 0.9309, "step": 15108 }, { "epoch": 0.5911651928945927, "grad_norm": 0.0, "learning_rate": 7.558272437697392e-06, "loss": 0.9395, "step": 15109 }, { "epoch": 0.5912043195868222, "grad_norm": 0.0, "learning_rate": 7.557043583062036e-06, "loss": 1.037, "step": 15110 }, { "epoch": 0.5912434462790516, "grad_norm": 0.0, "learning_rate": 7.555814767657499e-06, "loss": 0.882, "step": 15111 }, { "epoch": 0.5912825729712811, "grad_norm": 0.0, "learning_rate": 7.554585991503514e-06, "loss": 1.1528, "step": 15112 }, { "epoch": 0.5913216996635104, "grad_norm": 0.0, "learning_rate": 7.553357254619811e-06, "loss": 0.9468, "step": 15113 }, { "epoch": 0.5913608263557398, "grad_norm": 0.0, "learning_rate": 7.552128557026129e-06, "loss": 0.9975, "step": 15114 }, { "epoch": 0.5913999530479693, "grad_norm": 0.0, "learning_rate": 7.55089989874219e-06, "loss": 1.1814, "step": 15115 }, { "epoch": 0.5914390797401987, "grad_norm": 0.0, "learning_rate": 7.5496712797877334e-06, "loss": 1.0488, "step": 15116 }, { "epoch": 0.5914782064324282, "grad_norm": 0.0, "learning_rate": 7.548442700182483e-06, "loss": 1.0252, "step": 15117 }, { "epoch": 0.5915173331246576, "grad_norm": 0.0, "learning_rate": 7.547214159946174e-06, "loss": 1.0175, "step": 15118 }, { "epoch": 0.5915564598168871, "grad_norm": 0.0, "learning_rate": 7.545985659098531e-06, "loss": 0.9728, "step": 15119 }, { "epoch": 0.5915955865091165, "grad_norm": 0.0, "learning_rate": 7.544757197659284e-06, "loss": 1.0792, "step": 15120 }, { "epoch": 0.591634713201346, "grad_norm": 0.0, "learning_rate": 7.5435287756481544e-06, "loss": 1.0389, "step": 15121 }, { "epoch": 0.5916738398935754, "grad_norm": 0.0, "learning_rate": 7.54230039308488e-06, "loss": 0.986, "step": 15122 }, { "epoch": 0.5917129665858049, "grad_norm": 0.0, "learning_rate": 7.5410720499891806e-06, "loss": 1.067, "step": 15123 }, { "epoch": 0.5917520932780342, "grad_norm": 0.0, "learning_rate": 7.539843746380784e-06, "loss": 1.018, "step": 15124 }, { "epoch": 0.5917912199702637, "grad_norm": 0.0, "learning_rate": 7.5386154822794135e-06, "loss": 0.9583, "step": 15125 }, { "epoch": 0.5918303466624931, "grad_norm": 0.0, "learning_rate": 7.537387257704789e-06, "loss": 0.9913, "step": 15126 }, { "epoch": 0.5918694733547226, "grad_norm": 0.0, "learning_rate": 7.536159072676645e-06, "loss": 0.8984, "step": 15127 }, { "epoch": 0.591908600046952, "grad_norm": 0.0, "learning_rate": 7.534930927214699e-06, "loss": 0.9017, "step": 15128 }, { "epoch": 0.5919477267391815, "grad_norm": 0.0, "learning_rate": 7.533702821338672e-06, "loss": 0.9379, "step": 15129 }, { "epoch": 0.5919868534314109, "grad_norm": 0.0, "learning_rate": 7.532474755068284e-06, "loss": 0.9358, "step": 15130 }, { "epoch": 0.5920259801236404, "grad_norm": 0.0, "learning_rate": 7.531246728423264e-06, "loss": 1.1133, "step": 15131 }, { "epoch": 0.5920651068158698, "grad_norm": 0.0, "learning_rate": 7.530018741423328e-06, "loss": 1.1135, "step": 15132 }, { "epoch": 0.5921042335080993, "grad_norm": 0.0, "learning_rate": 7.528790794088194e-06, "loss": 1.0091, "step": 15133 }, { "epoch": 0.5921433602003287, "grad_norm": 0.0, "learning_rate": 7.527562886437585e-06, "loss": 0.9333, "step": 15134 }, { "epoch": 0.5921824868925581, "grad_norm": 0.0, "learning_rate": 7.526335018491213e-06, "loss": 1.0569, "step": 15135 }, { "epoch": 0.5922216135847875, "grad_norm": 0.0, "learning_rate": 7.525107190268805e-06, "loss": 1.0542, "step": 15136 }, { "epoch": 0.592260740277017, "grad_norm": 0.0, "learning_rate": 7.523879401790076e-06, "loss": 1.1255, "step": 15137 }, { "epoch": 0.5922998669692464, "grad_norm": 0.0, "learning_rate": 7.5226516530747395e-06, "loss": 1.1525, "step": 15138 }, { "epoch": 0.5923389936614759, "grad_norm": 0.0, "learning_rate": 7.52142394414251e-06, "loss": 1.1224, "step": 15139 }, { "epoch": 0.5923781203537053, "grad_norm": 0.0, "learning_rate": 7.520196275013109e-06, "loss": 1.0424, "step": 15140 }, { "epoch": 0.5924172470459348, "grad_norm": 0.0, "learning_rate": 7.518968645706249e-06, "loss": 1.0531, "step": 15141 }, { "epoch": 0.5924563737381642, "grad_norm": 0.0, "learning_rate": 7.517741056241644e-06, "loss": 0.955, "step": 15142 }, { "epoch": 0.5924955004303936, "grad_norm": 0.0, "learning_rate": 7.516513506639007e-06, "loss": 1.0042, "step": 15143 }, { "epoch": 0.5925346271226231, "grad_norm": 0.0, "learning_rate": 7.515285996918047e-06, "loss": 1.0518, "step": 15144 }, { "epoch": 0.5925737538148524, "grad_norm": 0.0, "learning_rate": 7.514058527098484e-06, "loss": 1.0494, "step": 15145 }, { "epoch": 0.5926128805070819, "grad_norm": 0.0, "learning_rate": 7.512831097200026e-06, "loss": 1.1577, "step": 15146 }, { "epoch": 0.5926520071993113, "grad_norm": 0.0, "learning_rate": 7.5116037072423855e-06, "loss": 0.9191, "step": 15147 }, { "epoch": 0.5926911338915408, "grad_norm": 0.0, "learning_rate": 7.510376357245266e-06, "loss": 1.0131, "step": 15148 }, { "epoch": 0.5927302605837702, "grad_norm": 0.0, "learning_rate": 7.509149047228387e-06, "loss": 0.9728, "step": 15149 }, { "epoch": 0.5927693872759997, "grad_norm": 0.0, "learning_rate": 7.507921777211452e-06, "loss": 1.0034, "step": 15150 }, { "epoch": 0.5928085139682291, "grad_norm": 0.0, "learning_rate": 7.506694547214172e-06, "loss": 1.0879, "step": 15151 }, { "epoch": 0.5928476406604586, "grad_norm": 0.0, "learning_rate": 7.505467357256252e-06, "loss": 0.9851, "step": 15152 }, { "epoch": 0.592886767352688, "grad_norm": 0.0, "learning_rate": 7.5042402073573995e-06, "loss": 0.951, "step": 15153 }, { "epoch": 0.5929258940449175, "grad_norm": 0.0, "learning_rate": 7.503013097537322e-06, "loss": 1.0856, "step": 15154 }, { "epoch": 0.5929650207371469, "grad_norm": 0.0, "learning_rate": 7.501786027815726e-06, "loss": 0.9867, "step": 15155 }, { "epoch": 0.5930041474293763, "grad_norm": 0.0, "learning_rate": 7.500558998212318e-06, "loss": 0.9962, "step": 15156 }, { "epoch": 0.5930432741216057, "grad_norm": 0.0, "learning_rate": 7.4993320087467955e-06, "loss": 1.0944, "step": 15157 }, { "epoch": 0.5930824008138352, "grad_norm": 0.0, "learning_rate": 7.4981050594388716e-06, "loss": 1.0618, "step": 15158 }, { "epoch": 0.5931215275060646, "grad_norm": 0.0, "learning_rate": 7.4968781503082445e-06, "loss": 1.0229, "step": 15159 }, { "epoch": 0.5931606541982941, "grad_norm": 0.0, "learning_rate": 7.495651281374616e-06, "loss": 1.0635, "step": 15160 }, { "epoch": 0.5931997808905235, "grad_norm": 0.0, "learning_rate": 7.494424452657691e-06, "loss": 0.8964, "step": 15161 }, { "epoch": 0.593238907582753, "grad_norm": 0.0, "learning_rate": 7.49319766417717e-06, "loss": 0.9132, "step": 15162 }, { "epoch": 0.5932780342749824, "grad_norm": 0.0, "learning_rate": 7.491970915952753e-06, "loss": 0.9935, "step": 15163 }, { "epoch": 0.5933171609672119, "grad_norm": 0.0, "learning_rate": 7.4907442080041415e-06, "loss": 0.9718, "step": 15164 }, { "epoch": 0.5933562876594413, "grad_norm": 0.0, "learning_rate": 7.489517540351032e-06, "loss": 0.9717, "step": 15165 }, { "epoch": 0.5933954143516708, "grad_norm": 0.0, "learning_rate": 7.488290913013123e-06, "loss": 0.9404, "step": 15166 }, { "epoch": 0.5934345410439001, "grad_norm": 0.0, "learning_rate": 7.487064326010118e-06, "loss": 1.0717, "step": 15167 }, { "epoch": 0.5934736677361296, "grad_norm": 0.0, "learning_rate": 7.485837779361712e-06, "loss": 1.1149, "step": 15168 }, { "epoch": 0.593512794428359, "grad_norm": 0.0, "learning_rate": 7.484611273087601e-06, "loss": 1.0531, "step": 15169 }, { "epoch": 0.5935519211205884, "grad_norm": 0.0, "learning_rate": 7.483384807207479e-06, "loss": 1.0187, "step": 15170 }, { "epoch": 0.5935910478128179, "grad_norm": 0.0, "learning_rate": 7.48215838174104e-06, "loss": 1.0042, "step": 15171 }, { "epoch": 0.5936301745050473, "grad_norm": 0.0, "learning_rate": 7.480931996707988e-06, "loss": 0.9085, "step": 15172 }, { "epoch": 0.5936693011972768, "grad_norm": 0.0, "learning_rate": 7.479705652128009e-06, "loss": 1.0447, "step": 15173 }, { "epoch": 0.5937084278895062, "grad_norm": 0.0, "learning_rate": 7.4784793480208e-06, "loss": 1.0001, "step": 15174 }, { "epoch": 0.5937475545817357, "grad_norm": 0.0, "learning_rate": 7.47725308440605e-06, "loss": 1.1386, "step": 15175 }, { "epoch": 0.5937866812739651, "grad_norm": 0.0, "learning_rate": 7.476026861303458e-06, "loss": 1.0282, "step": 15176 }, { "epoch": 0.5938258079661946, "grad_norm": 0.0, "learning_rate": 7.474800678732712e-06, "loss": 0.8961, "step": 15177 }, { "epoch": 0.5938649346584239, "grad_norm": 0.0, "learning_rate": 7.4735745367135014e-06, "loss": 1.151, "step": 15178 }, { "epoch": 0.5939040613506534, "grad_norm": 0.0, "learning_rate": 7.472348435265515e-06, "loss": 1.0198, "step": 15179 }, { "epoch": 0.5939431880428828, "grad_norm": 0.0, "learning_rate": 7.471122374408451e-06, "loss": 1.0624, "step": 15180 }, { "epoch": 0.5939823147351123, "grad_norm": 0.0, "learning_rate": 7.4698963541619895e-06, "loss": 0.9554, "step": 15181 }, { "epoch": 0.5940214414273417, "grad_norm": 0.0, "learning_rate": 7.468670374545826e-06, "loss": 0.938, "step": 15182 }, { "epoch": 0.5940605681195712, "grad_norm": 0.0, "learning_rate": 7.46744443557964e-06, "loss": 1.0226, "step": 15183 }, { "epoch": 0.5940996948118006, "grad_norm": 0.0, "learning_rate": 7.466218537283122e-06, "loss": 1.0303, "step": 15184 }, { "epoch": 0.5941388215040301, "grad_norm": 0.0, "learning_rate": 7.464992679675962e-06, "loss": 1.0529, "step": 15185 }, { "epoch": 0.5941779481962595, "grad_norm": 0.0, "learning_rate": 7.463766862777844e-06, "loss": 1.0038, "step": 15186 }, { "epoch": 0.594217074888489, "grad_norm": 0.0, "learning_rate": 7.462541086608453e-06, "loss": 1.029, "step": 15187 }, { "epoch": 0.5942562015807183, "grad_norm": 0.0, "learning_rate": 7.461315351187466e-06, "loss": 0.938, "step": 15188 }, { "epoch": 0.5942953282729478, "grad_norm": 0.0, "learning_rate": 7.460089656534578e-06, "loss": 0.9913, "step": 15189 }, { "epoch": 0.5943344549651772, "grad_norm": 0.0, "learning_rate": 7.458864002669468e-06, "loss": 0.9481, "step": 15190 }, { "epoch": 0.5943735816574067, "grad_norm": 0.0, "learning_rate": 7.457638389611818e-06, "loss": 0.9599, "step": 15191 }, { "epoch": 0.5944127083496361, "grad_norm": 0.0, "learning_rate": 7.4564128173813085e-06, "loss": 1.0039, "step": 15192 }, { "epoch": 0.5944518350418656, "grad_norm": 0.0, "learning_rate": 7.455187285997619e-06, "loss": 1.0114, "step": 15193 }, { "epoch": 0.594490961734095, "grad_norm": 0.0, "learning_rate": 7.453961795480438e-06, "loss": 0.9614, "step": 15194 }, { "epoch": 0.5945300884263245, "grad_norm": 0.0, "learning_rate": 7.452736345849438e-06, "loss": 1.0544, "step": 15195 }, { "epoch": 0.5945692151185539, "grad_norm": 0.0, "learning_rate": 7.451510937124301e-06, "loss": 1.0187, "step": 15196 }, { "epoch": 0.5946083418107834, "grad_norm": 0.0, "learning_rate": 7.450285569324703e-06, "loss": 0.9794, "step": 15197 }, { "epoch": 0.5946474685030128, "grad_norm": 0.0, "learning_rate": 7.449060242470324e-06, "loss": 0.9075, "step": 15198 }, { "epoch": 0.5946865951952421, "grad_norm": 0.0, "learning_rate": 7.447834956580844e-06, "loss": 1.0702, "step": 15199 }, { "epoch": 0.5947257218874716, "grad_norm": 0.0, "learning_rate": 7.446609711675935e-06, "loss": 1.1049, "step": 15200 }, { "epoch": 0.594764848579701, "grad_norm": 0.0, "learning_rate": 7.445384507775275e-06, "loss": 1.0581, "step": 15201 }, { "epoch": 0.5948039752719305, "grad_norm": 0.0, "learning_rate": 7.4441593448985365e-06, "loss": 1.1204, "step": 15202 }, { "epoch": 0.5948431019641599, "grad_norm": 0.0, "learning_rate": 7.442934223065399e-06, "loss": 1.026, "step": 15203 }, { "epoch": 0.5948822286563894, "grad_norm": 0.0, "learning_rate": 7.44170914229553e-06, "loss": 1.0743, "step": 15204 }, { "epoch": 0.5949213553486188, "grad_norm": 0.0, "learning_rate": 7.4404841026086096e-06, "loss": 1.0043, "step": 15205 }, { "epoch": 0.5949604820408483, "grad_norm": 0.0, "learning_rate": 7.4392591040243056e-06, "loss": 1.0301, "step": 15206 }, { "epoch": 0.5949996087330777, "grad_norm": 0.0, "learning_rate": 7.438034146562294e-06, "loss": 1.0211, "step": 15207 }, { "epoch": 0.5950387354253072, "grad_norm": 0.0, "learning_rate": 7.4368092302422424e-06, "loss": 1.0489, "step": 15208 }, { "epoch": 0.5950778621175365, "grad_norm": 0.0, "learning_rate": 7.435584355083822e-06, "loss": 1.0335, "step": 15209 }, { "epoch": 0.595116988809766, "grad_norm": 0.0, "learning_rate": 7.4343595211067045e-06, "loss": 0.9238, "step": 15210 }, { "epoch": 0.5951561155019954, "grad_norm": 0.0, "learning_rate": 7.433134728330555e-06, "loss": 0.8416, "step": 15211 }, { "epoch": 0.5951952421942249, "grad_norm": 0.0, "learning_rate": 7.431909976775049e-06, "loss": 1.0624, "step": 15212 }, { "epoch": 0.5952343688864543, "grad_norm": 0.0, "learning_rate": 7.43068526645985e-06, "loss": 1.0134, "step": 15213 }, { "epoch": 0.5952734955786838, "grad_norm": 0.0, "learning_rate": 7.4294605974046275e-06, "loss": 1.0087, "step": 15214 }, { "epoch": 0.5953126222709132, "grad_norm": 0.0, "learning_rate": 7.42823596962904e-06, "loss": 0.891, "step": 15215 }, { "epoch": 0.5953517489631427, "grad_norm": 0.0, "learning_rate": 7.427011383152767e-06, "loss": 0.9882, "step": 15216 }, { "epoch": 0.5953908756553721, "grad_norm": 0.0, "learning_rate": 7.425786837995466e-06, "loss": 1.1603, "step": 15217 }, { "epoch": 0.5954300023476016, "grad_norm": 0.0, "learning_rate": 7.424562334176804e-06, "loss": 0.9848, "step": 15218 }, { "epoch": 0.595469129039831, "grad_norm": 0.0, "learning_rate": 7.423337871716442e-06, "loss": 0.9871, "step": 15219 }, { "epoch": 0.5955082557320605, "grad_norm": 0.0, "learning_rate": 7.4221134506340405e-06, "loss": 0.9852, "step": 15220 }, { "epoch": 0.5955473824242898, "grad_norm": 0.0, "learning_rate": 7.420889070949272e-06, "loss": 1.0323, "step": 15221 }, { "epoch": 0.5955865091165193, "grad_norm": 0.0, "learning_rate": 7.419664732681793e-06, "loss": 0.9406, "step": 15222 }, { "epoch": 0.5956256358087487, "grad_norm": 0.0, "learning_rate": 7.418440435851265e-06, "loss": 0.9583, "step": 15223 }, { "epoch": 0.5956647625009782, "grad_norm": 0.0, "learning_rate": 7.417216180477344e-06, "loss": 1.0328, "step": 15224 }, { "epoch": 0.5957038891932076, "grad_norm": 0.0, "learning_rate": 7.4159919665797006e-06, "loss": 0.9999, "step": 15225 }, { "epoch": 0.5957430158854371, "grad_norm": 0.0, "learning_rate": 7.414767794177986e-06, "loss": 0.9625, "step": 15226 }, { "epoch": 0.5957821425776665, "grad_norm": 0.0, "learning_rate": 7.413543663291864e-06, "loss": 1.005, "step": 15227 }, { "epoch": 0.5958212692698959, "grad_norm": 0.0, "learning_rate": 7.412319573940987e-06, "loss": 1.0138, "step": 15228 }, { "epoch": 0.5958603959621254, "grad_norm": 0.0, "learning_rate": 7.411095526145011e-06, "loss": 0.8246, "step": 15229 }, { "epoch": 0.5958995226543548, "grad_norm": 0.0, "learning_rate": 7.4098715199236036e-06, "loss": 0.9174, "step": 15230 }, { "epoch": 0.5959386493465842, "grad_norm": 0.0, "learning_rate": 7.408647555296411e-06, "loss": 1.0371, "step": 15231 }, { "epoch": 0.5959777760388136, "grad_norm": 0.0, "learning_rate": 7.4074236322830926e-06, "loss": 0.9975, "step": 15232 }, { "epoch": 0.5960169027310431, "grad_norm": 0.0, "learning_rate": 7.406199750903299e-06, "loss": 0.8653, "step": 15233 }, { "epoch": 0.5960560294232725, "grad_norm": 0.0, "learning_rate": 7.404975911176691e-06, "loss": 0.9625, "step": 15234 }, { "epoch": 0.596095156115502, "grad_norm": 0.0, "learning_rate": 7.403752113122918e-06, "loss": 0.9506, "step": 15235 }, { "epoch": 0.5961342828077314, "grad_norm": 0.0, "learning_rate": 7.4025283567616315e-06, "loss": 1.0472, "step": 15236 }, { "epoch": 0.5961734094999609, "grad_norm": 0.0, "learning_rate": 7.401304642112481e-06, "loss": 1.0507, "step": 15237 }, { "epoch": 0.5962125361921903, "grad_norm": 0.0, "learning_rate": 7.4000809691951255e-06, "loss": 1.0331, "step": 15238 }, { "epoch": 0.5962516628844198, "grad_norm": 0.0, "learning_rate": 7.398857338029213e-06, "loss": 1.028, "step": 15239 }, { "epoch": 0.5962907895766492, "grad_norm": 0.0, "learning_rate": 7.397633748634392e-06, "loss": 1.0505, "step": 15240 }, { "epoch": 0.5963299162688787, "grad_norm": 0.0, "learning_rate": 7.39641020103031e-06, "loss": 0.9152, "step": 15241 }, { "epoch": 0.596369042961108, "grad_norm": 0.0, "learning_rate": 7.395186695236618e-06, "loss": 0.9446, "step": 15242 }, { "epoch": 0.5964081696533375, "grad_norm": 0.0, "learning_rate": 7.393963231272964e-06, "loss": 1.0972, "step": 15243 }, { "epoch": 0.5964472963455669, "grad_norm": 0.0, "learning_rate": 7.392739809158995e-06, "loss": 1.0809, "step": 15244 }, { "epoch": 0.5964864230377964, "grad_norm": 0.0, "learning_rate": 7.3915164289143595e-06, "loss": 0.9773, "step": 15245 }, { "epoch": 0.5965255497300258, "grad_norm": 0.0, "learning_rate": 7.390293090558698e-06, "loss": 1.088, "step": 15246 }, { "epoch": 0.5965646764222553, "grad_norm": 0.0, "learning_rate": 7.389069794111663e-06, "loss": 0.9355, "step": 15247 }, { "epoch": 0.5966038031144847, "grad_norm": 0.0, "learning_rate": 7.387846539592894e-06, "loss": 1.0496, "step": 15248 }, { "epoch": 0.5966429298067142, "grad_norm": 0.0, "learning_rate": 7.386623327022034e-06, "loss": 1.0395, "step": 15249 }, { "epoch": 0.5966820564989436, "grad_norm": 0.0, "learning_rate": 7.385400156418731e-06, "loss": 1.0049, "step": 15250 }, { "epoch": 0.5967211831911731, "grad_norm": 0.0, "learning_rate": 7.384177027802624e-06, "loss": 1.0305, "step": 15251 }, { "epoch": 0.5967603098834025, "grad_norm": 0.0, "learning_rate": 7.382953941193358e-06, "loss": 1.0231, "step": 15252 }, { "epoch": 0.596799436575632, "grad_norm": 0.0, "learning_rate": 7.381730896610573e-06, "loss": 1.0525, "step": 15253 }, { "epoch": 0.5968385632678613, "grad_norm": 0.0, "learning_rate": 7.380507894073907e-06, "loss": 1.12, "step": 15254 }, { "epoch": 0.5968776899600908, "grad_norm": 0.0, "learning_rate": 7.3792849336029995e-06, "loss": 1.0123, "step": 15255 }, { "epoch": 0.5969168166523202, "grad_norm": 0.0, "learning_rate": 7.378062015217494e-06, "loss": 1.0625, "step": 15256 }, { "epoch": 0.5969559433445496, "grad_norm": 0.0, "learning_rate": 7.376839138937028e-06, "loss": 1.0973, "step": 15257 }, { "epoch": 0.5969950700367791, "grad_norm": 0.0, "learning_rate": 7.375616304781239e-06, "loss": 1.1131, "step": 15258 }, { "epoch": 0.5970341967290085, "grad_norm": 0.0, "learning_rate": 7.374393512769764e-06, "loss": 1.0703, "step": 15259 }, { "epoch": 0.597073323421238, "grad_norm": 0.0, "learning_rate": 7.373170762922235e-06, "loss": 1.1244, "step": 15260 }, { "epoch": 0.5971124501134674, "grad_norm": 0.0, "learning_rate": 7.371948055258296e-06, "loss": 0.9366, "step": 15261 }, { "epoch": 0.5971515768056969, "grad_norm": 0.0, "learning_rate": 7.370725389797577e-06, "loss": 0.886, "step": 15262 }, { "epoch": 0.5971907034979262, "grad_norm": 0.0, "learning_rate": 7.369502766559713e-06, "loss": 1.0796, "step": 15263 }, { "epoch": 0.5972298301901557, "grad_norm": 0.0, "learning_rate": 7.368280185564336e-06, "loss": 1.0585, "step": 15264 }, { "epoch": 0.5972689568823851, "grad_norm": 0.0, "learning_rate": 7.367057646831085e-06, "loss": 0.8812, "step": 15265 }, { "epoch": 0.5973080835746146, "grad_norm": 0.0, "learning_rate": 7.365835150379589e-06, "loss": 1.0365, "step": 15266 }, { "epoch": 0.597347210266844, "grad_norm": 0.0, "learning_rate": 7.364612696229479e-06, "loss": 0.9293, "step": 15267 }, { "epoch": 0.5973863369590735, "grad_norm": 0.0, "learning_rate": 7.363390284400388e-06, "loss": 1.0561, "step": 15268 }, { "epoch": 0.5974254636513029, "grad_norm": 0.0, "learning_rate": 7.362167914911939e-06, "loss": 1.0461, "step": 15269 }, { "epoch": 0.5974645903435324, "grad_norm": 0.0, "learning_rate": 7.360945587783774e-06, "loss": 1.0399, "step": 15270 }, { "epoch": 0.5975037170357618, "grad_norm": 0.0, "learning_rate": 7.3597233030355165e-06, "loss": 0.96, "step": 15271 }, { "epoch": 0.5975428437279913, "grad_norm": 0.0, "learning_rate": 7.358501060686794e-06, "loss": 0.9967, "step": 15272 }, { "epoch": 0.5975819704202207, "grad_norm": 0.0, "learning_rate": 7.357278860757229e-06, "loss": 0.9627, "step": 15273 }, { "epoch": 0.5976210971124502, "grad_norm": 0.0, "learning_rate": 7.356056703266459e-06, "loss": 1.0131, "step": 15274 }, { "epoch": 0.5976602238046795, "grad_norm": 0.0, "learning_rate": 7.354834588234105e-06, "loss": 1.0469, "step": 15275 }, { "epoch": 0.597699350496909, "grad_norm": 0.0, "learning_rate": 7.353612515679792e-06, "loss": 1.0279, "step": 15276 }, { "epoch": 0.5977384771891384, "grad_norm": 0.0, "learning_rate": 7.352390485623146e-06, "loss": 0.98, "step": 15277 }, { "epoch": 0.5977776038813679, "grad_norm": 0.0, "learning_rate": 7.351168498083789e-06, "loss": 1.0372, "step": 15278 }, { "epoch": 0.5978167305735973, "grad_norm": 0.0, "learning_rate": 7.349946553081349e-06, "loss": 1.0416, "step": 15279 }, { "epoch": 0.5978558572658268, "grad_norm": 0.0, "learning_rate": 7.348724650635448e-06, "loss": 1.0475, "step": 15280 }, { "epoch": 0.5978949839580562, "grad_norm": 0.0, "learning_rate": 7.347502790765706e-06, "loss": 0.9157, "step": 15281 }, { "epoch": 0.5979341106502857, "grad_norm": 0.0, "learning_rate": 7.346280973491741e-06, "loss": 1.0994, "step": 15282 }, { "epoch": 0.5979732373425151, "grad_norm": 0.0, "learning_rate": 7.345059198833185e-06, "loss": 0.9894, "step": 15283 }, { "epoch": 0.5980123640347444, "grad_norm": 0.0, "learning_rate": 7.3438374668096475e-06, "loss": 1.0256, "step": 15284 }, { "epoch": 0.5980514907269739, "grad_norm": 0.0, "learning_rate": 7.342615777440753e-06, "loss": 1.1091, "step": 15285 }, { "epoch": 0.5980906174192033, "grad_norm": 0.0, "learning_rate": 7.341394130746122e-06, "loss": 1.1323, "step": 15286 }, { "epoch": 0.5981297441114328, "grad_norm": 0.0, "learning_rate": 7.340172526745366e-06, "loss": 0.9855, "step": 15287 }, { "epoch": 0.5981688708036622, "grad_norm": 0.0, "learning_rate": 7.3389509654581045e-06, "loss": 1.0368, "step": 15288 }, { "epoch": 0.5982079974958917, "grad_norm": 0.0, "learning_rate": 7.337729446903961e-06, "loss": 1.0459, "step": 15289 }, { "epoch": 0.5982471241881211, "grad_norm": 0.0, "learning_rate": 7.3365079711025445e-06, "loss": 0.9427, "step": 15290 }, { "epoch": 0.5982862508803506, "grad_norm": 0.0, "learning_rate": 7.335286538073472e-06, "loss": 0.9904, "step": 15291 }, { "epoch": 0.59832537757258, "grad_norm": 0.0, "learning_rate": 7.334065147836359e-06, "loss": 0.827, "step": 15292 }, { "epoch": 0.5983645042648095, "grad_norm": 0.0, "learning_rate": 7.33284380041082e-06, "loss": 1.0227, "step": 15293 }, { "epoch": 0.5984036309570389, "grad_norm": 0.0, "learning_rate": 7.3316224958164654e-06, "loss": 1.0283, "step": 15294 }, { "epoch": 0.5984427576492684, "grad_norm": 0.0, "learning_rate": 7.33040123407291e-06, "loss": 1.0678, "step": 15295 }, { "epoch": 0.5984818843414977, "grad_norm": 0.0, "learning_rate": 7.329180015199767e-06, "loss": 1.029, "step": 15296 }, { "epoch": 0.5985210110337272, "grad_norm": 0.0, "learning_rate": 7.327958839216647e-06, "loss": 1.0587, "step": 15297 }, { "epoch": 0.5985601377259566, "grad_norm": 0.0, "learning_rate": 7.326737706143159e-06, "loss": 0.9697, "step": 15298 }, { "epoch": 0.5985992644181861, "grad_norm": 0.0, "learning_rate": 7.325516615998915e-06, "loss": 1.0176, "step": 15299 }, { "epoch": 0.5986383911104155, "grad_norm": 0.0, "learning_rate": 7.324295568803517e-06, "loss": 1.0544, "step": 15300 }, { "epoch": 0.598677517802645, "grad_norm": 0.0, "learning_rate": 7.3230745645765845e-06, "loss": 0.9903, "step": 15301 }, { "epoch": 0.5987166444948744, "grad_norm": 0.0, "learning_rate": 7.321853603337719e-06, "loss": 1.0142, "step": 15302 }, { "epoch": 0.5987557711871039, "grad_norm": 0.0, "learning_rate": 7.32063268510653e-06, "loss": 1.0018, "step": 15303 }, { "epoch": 0.5987948978793333, "grad_norm": 0.0, "learning_rate": 7.3194118099026175e-06, "loss": 1.0961, "step": 15304 }, { "epoch": 0.5988340245715628, "grad_norm": 0.0, "learning_rate": 7.318190977745598e-06, "loss": 0.8494, "step": 15305 }, { "epoch": 0.5988731512637921, "grad_norm": 0.0, "learning_rate": 7.316970188655069e-06, "loss": 1.0252, "step": 15306 }, { "epoch": 0.5989122779560216, "grad_norm": 0.0, "learning_rate": 7.315749442650638e-06, "loss": 1.0198, "step": 15307 }, { "epoch": 0.598951404648251, "grad_norm": 0.0, "learning_rate": 7.314528739751907e-06, "loss": 1.1005, "step": 15308 }, { "epoch": 0.5989905313404805, "grad_norm": 0.0, "learning_rate": 7.3133080799784765e-06, "loss": 0.9572, "step": 15309 }, { "epoch": 0.5990296580327099, "grad_norm": 0.0, "learning_rate": 7.312087463349954e-06, "loss": 1.0054, "step": 15310 }, { "epoch": 0.5990687847249394, "grad_norm": 0.0, "learning_rate": 7.310866889885939e-06, "loss": 0.9822, "step": 15311 }, { "epoch": 0.5991079114171688, "grad_norm": 0.0, "learning_rate": 7.309646359606033e-06, "loss": 0.9486, "step": 15312 }, { "epoch": 0.5991470381093982, "grad_norm": 0.0, "learning_rate": 7.308425872529829e-06, "loss": 1.1118, "step": 15313 }, { "epoch": 0.5991861648016277, "grad_norm": 0.0, "learning_rate": 7.307205428676939e-06, "loss": 1.0546, "step": 15314 }, { "epoch": 0.5992252914938571, "grad_norm": 0.0, "learning_rate": 7.305985028066955e-06, "loss": 1.1009, "step": 15315 }, { "epoch": 0.5992644181860866, "grad_norm": 0.0, "learning_rate": 7.304764670719476e-06, "loss": 0.9735, "step": 15316 }, { "epoch": 0.5993035448783159, "grad_norm": 0.0, "learning_rate": 7.303544356654098e-06, "loss": 1.0572, "step": 15317 }, { "epoch": 0.5993426715705454, "grad_norm": 0.0, "learning_rate": 7.302324085890416e-06, "loss": 1.0873, "step": 15318 }, { "epoch": 0.5993817982627748, "grad_norm": 0.0, "learning_rate": 7.301103858448032e-06, "loss": 1.0533, "step": 15319 }, { "epoch": 0.5994209249550043, "grad_norm": 0.0, "learning_rate": 7.299883674346538e-06, "loss": 1.001, "step": 15320 }, { "epoch": 0.5994600516472337, "grad_norm": 0.0, "learning_rate": 7.29866353360553e-06, "loss": 0.7968, "step": 15321 }, { "epoch": 0.5994991783394632, "grad_norm": 0.0, "learning_rate": 7.297443436244595e-06, "loss": 0.9496, "step": 15322 }, { "epoch": 0.5995383050316926, "grad_norm": 0.0, "learning_rate": 7.296223382283336e-06, "loss": 0.905, "step": 15323 }, { "epoch": 0.5995774317239221, "grad_norm": 0.0, "learning_rate": 7.295003371741343e-06, "loss": 0.9558, "step": 15324 }, { "epoch": 0.5996165584161515, "grad_norm": 0.0, "learning_rate": 7.293783404638206e-06, "loss": 1.0091, "step": 15325 }, { "epoch": 0.599655685108381, "grad_norm": 0.0, "learning_rate": 7.292563480993515e-06, "loss": 0.9851, "step": 15326 }, { "epoch": 0.5996948118006104, "grad_norm": 0.0, "learning_rate": 7.291343600826859e-06, "loss": 0.9469, "step": 15327 }, { "epoch": 0.5997339384928398, "grad_norm": 0.0, "learning_rate": 7.2901237641578345e-06, "loss": 0.9205, "step": 15328 }, { "epoch": 0.5997730651850692, "grad_norm": 0.0, "learning_rate": 7.288903971006027e-06, "loss": 1.0097, "step": 15329 }, { "epoch": 0.5998121918772987, "grad_norm": 0.0, "learning_rate": 7.287684221391025e-06, "loss": 1.0263, "step": 15330 }, { "epoch": 0.5998513185695281, "grad_norm": 0.0, "learning_rate": 7.286464515332412e-06, "loss": 1.2154, "step": 15331 }, { "epoch": 0.5998904452617576, "grad_norm": 0.0, "learning_rate": 7.285244852849782e-06, "loss": 1.0469, "step": 15332 }, { "epoch": 0.599929571953987, "grad_norm": 0.0, "learning_rate": 7.284025233962715e-06, "loss": 1.0066, "step": 15333 }, { "epoch": 0.5999686986462165, "grad_norm": 0.0, "learning_rate": 7.282805658690801e-06, "loss": 1.0831, "step": 15334 }, { "epoch": 0.6000078253384459, "grad_norm": 0.0, "learning_rate": 7.281586127053625e-06, "loss": 0.965, "step": 15335 }, { "epoch": 0.6000469520306754, "grad_norm": 0.0, "learning_rate": 7.280366639070766e-06, "loss": 1.0441, "step": 15336 }, { "epoch": 0.6000860787229048, "grad_norm": 0.0, "learning_rate": 7.279147194761813e-06, "loss": 1.1962, "step": 15337 }, { "epoch": 0.6001252054151343, "grad_norm": 0.0, "learning_rate": 7.277927794146345e-06, "loss": 0.9602, "step": 15338 }, { "epoch": 0.6001643321073636, "grad_norm": 0.0, "learning_rate": 7.276708437243949e-06, "loss": 0.9677, "step": 15339 }, { "epoch": 0.6002034587995931, "grad_norm": 0.0, "learning_rate": 7.275489124074198e-06, "loss": 0.9831, "step": 15340 }, { "epoch": 0.6002425854918225, "grad_norm": 0.0, "learning_rate": 7.274269854656682e-06, "loss": 1.0626, "step": 15341 }, { "epoch": 0.6002817121840519, "grad_norm": 0.0, "learning_rate": 7.273050629010976e-06, "loss": 1.1107, "step": 15342 }, { "epoch": 0.6003208388762814, "grad_norm": 0.0, "learning_rate": 7.271831447156661e-06, "loss": 1.1134, "step": 15343 }, { "epoch": 0.6003599655685108, "grad_norm": 0.0, "learning_rate": 7.270612309113309e-06, "loss": 0.9542, "step": 15344 }, { "epoch": 0.6003990922607403, "grad_norm": 0.0, "learning_rate": 7.2693932149005065e-06, "loss": 1.0117, "step": 15345 }, { "epoch": 0.6004382189529697, "grad_norm": 0.0, "learning_rate": 7.268174164537829e-06, "loss": 0.9781, "step": 15346 }, { "epoch": 0.6004773456451992, "grad_norm": 0.0, "learning_rate": 7.266955158044851e-06, "loss": 0.9253, "step": 15347 }, { "epoch": 0.6005164723374286, "grad_norm": 0.0, "learning_rate": 7.2657361954411465e-06, "loss": 1.015, "step": 15348 }, { "epoch": 0.600555599029658, "grad_norm": 0.0, "learning_rate": 7.26451727674629e-06, "loss": 0.9551, "step": 15349 }, { "epoch": 0.6005947257218874, "grad_norm": 0.0, "learning_rate": 7.263298401979862e-06, "loss": 0.9953, "step": 15350 }, { "epoch": 0.6006338524141169, "grad_norm": 0.0, "learning_rate": 7.2620795711614335e-06, "loss": 0.9272, "step": 15351 }, { "epoch": 0.6006729791063463, "grad_norm": 0.0, "learning_rate": 7.260860784310575e-06, "loss": 0.9571, "step": 15352 }, { "epoch": 0.6007121057985758, "grad_norm": 0.0, "learning_rate": 7.259642041446856e-06, "loss": 0.9416, "step": 15353 }, { "epoch": 0.6007512324908052, "grad_norm": 0.0, "learning_rate": 7.258423342589857e-06, "loss": 1.1089, "step": 15354 }, { "epoch": 0.6007903591830347, "grad_norm": 0.0, "learning_rate": 7.257204687759143e-06, "loss": 1.0948, "step": 15355 }, { "epoch": 0.6008294858752641, "grad_norm": 0.0, "learning_rate": 7.255986076974284e-06, "loss": 0.9968, "step": 15356 }, { "epoch": 0.6008686125674936, "grad_norm": 0.0, "learning_rate": 7.254767510254852e-06, "loss": 0.9562, "step": 15357 }, { "epoch": 0.600907739259723, "grad_norm": 0.0, "learning_rate": 7.253548987620408e-06, "loss": 0.9631, "step": 15358 }, { "epoch": 0.6009468659519525, "grad_norm": 0.0, "learning_rate": 7.2523305090905305e-06, "loss": 1.1223, "step": 15359 }, { "epoch": 0.6009859926441818, "grad_norm": 0.0, "learning_rate": 7.251112074684783e-06, "loss": 1.1151, "step": 15360 }, { "epoch": 0.6010251193364113, "grad_norm": 0.0, "learning_rate": 7.24989368442273e-06, "loss": 0.8951, "step": 15361 }, { "epoch": 0.6010642460286407, "grad_norm": 0.0, "learning_rate": 7.248675338323934e-06, "loss": 1.0047, "step": 15362 }, { "epoch": 0.6011033727208702, "grad_norm": 0.0, "learning_rate": 7.247457036407968e-06, "loss": 1.0082, "step": 15363 }, { "epoch": 0.6011424994130996, "grad_norm": 0.0, "learning_rate": 7.246238778694394e-06, "loss": 0.9167, "step": 15364 }, { "epoch": 0.6011816261053291, "grad_norm": 0.0, "learning_rate": 7.245020565202775e-06, "loss": 1.0746, "step": 15365 }, { "epoch": 0.6012207527975585, "grad_norm": 0.0, "learning_rate": 7.243802395952673e-06, "loss": 1.0626, "step": 15366 }, { "epoch": 0.601259879489788, "grad_norm": 0.0, "learning_rate": 7.242584270963646e-06, "loss": 1.017, "step": 15367 }, { "epoch": 0.6012990061820174, "grad_norm": 0.0, "learning_rate": 7.2413661902552654e-06, "loss": 1.0165, "step": 15368 }, { "epoch": 0.6013381328742468, "grad_norm": 0.0, "learning_rate": 7.240148153847086e-06, "loss": 0.9869, "step": 15369 }, { "epoch": 0.6013772595664763, "grad_norm": 0.0, "learning_rate": 7.23893016175867e-06, "loss": 1.065, "step": 15370 }, { "epoch": 0.6014163862587056, "grad_norm": 0.0, "learning_rate": 7.237712214009571e-06, "loss": 1.1438, "step": 15371 }, { "epoch": 0.6014555129509351, "grad_norm": 0.0, "learning_rate": 7.236494310619357e-06, "loss": 0.9609, "step": 15372 }, { "epoch": 0.6014946396431645, "grad_norm": 0.0, "learning_rate": 7.235276451607582e-06, "loss": 0.963, "step": 15373 }, { "epoch": 0.601533766335394, "grad_norm": 0.0, "learning_rate": 7.234058636993803e-06, "loss": 1.0334, "step": 15374 }, { "epoch": 0.6015728930276234, "grad_norm": 0.0, "learning_rate": 7.232840866797575e-06, "loss": 1.1028, "step": 15375 }, { "epoch": 0.6016120197198529, "grad_norm": 0.0, "learning_rate": 7.231623141038454e-06, "loss": 0.939, "step": 15376 }, { "epoch": 0.6016511464120823, "grad_norm": 0.0, "learning_rate": 7.230405459735996e-06, "loss": 1.1015, "step": 15377 }, { "epoch": 0.6016902731043118, "grad_norm": 0.0, "learning_rate": 7.229187822909758e-06, "loss": 1.0042, "step": 15378 }, { "epoch": 0.6017293997965412, "grad_norm": 0.0, "learning_rate": 7.227970230579292e-06, "loss": 0.9468, "step": 15379 }, { "epoch": 0.6017685264887707, "grad_norm": 0.0, "learning_rate": 7.226752682764149e-06, "loss": 1.1544, "step": 15380 }, { "epoch": 0.601807653181, "grad_norm": 0.0, "learning_rate": 7.225535179483882e-06, "loss": 1.0125, "step": 15381 }, { "epoch": 0.6018467798732295, "grad_norm": 0.0, "learning_rate": 7.224317720758047e-06, "loss": 0.9489, "step": 15382 }, { "epoch": 0.6018859065654589, "grad_norm": 0.0, "learning_rate": 7.223100306606188e-06, "loss": 1.0533, "step": 15383 }, { "epoch": 0.6019250332576884, "grad_norm": 0.0, "learning_rate": 7.2218829370478596e-06, "loss": 0.8505, "step": 15384 }, { "epoch": 0.6019641599499178, "grad_norm": 0.0, "learning_rate": 7.220665612102608e-06, "loss": 1.0451, "step": 15385 }, { "epoch": 0.6020032866421473, "grad_norm": 0.0, "learning_rate": 7.219448331789987e-06, "loss": 1.1368, "step": 15386 }, { "epoch": 0.6020424133343767, "grad_norm": 0.0, "learning_rate": 7.218231096129541e-06, "loss": 0.9502, "step": 15387 }, { "epoch": 0.6020815400266062, "grad_norm": 0.0, "learning_rate": 7.217013905140817e-06, "loss": 1.0927, "step": 15388 }, { "epoch": 0.6021206667188356, "grad_norm": 0.0, "learning_rate": 7.215796758843361e-06, "loss": 0.9666, "step": 15389 }, { "epoch": 0.6021597934110651, "grad_norm": 0.0, "learning_rate": 7.214579657256722e-06, "loss": 0.9743, "step": 15390 }, { "epoch": 0.6021989201032945, "grad_norm": 0.0, "learning_rate": 7.213362600400444e-06, "loss": 1.043, "step": 15391 }, { "epoch": 0.602238046795524, "grad_norm": 0.0, "learning_rate": 7.212145588294071e-06, "loss": 1.0961, "step": 15392 }, { "epoch": 0.6022771734877533, "grad_norm": 0.0, "learning_rate": 7.210928620957146e-06, "loss": 1.0425, "step": 15393 }, { "epoch": 0.6023163001799828, "grad_norm": 0.0, "learning_rate": 7.209711698409209e-06, "loss": 1.0359, "step": 15394 }, { "epoch": 0.6023554268722122, "grad_norm": 0.0, "learning_rate": 7.208494820669809e-06, "loss": 1.076, "step": 15395 }, { "epoch": 0.6023945535644417, "grad_norm": 0.0, "learning_rate": 7.2072779877584844e-06, "loss": 0.9077, "step": 15396 }, { "epoch": 0.6024336802566711, "grad_norm": 0.0, "learning_rate": 7.206061199694776e-06, "loss": 1.062, "step": 15397 }, { "epoch": 0.6024728069489005, "grad_norm": 0.0, "learning_rate": 7.204844456498219e-06, "loss": 1.0625, "step": 15398 }, { "epoch": 0.60251193364113, "grad_norm": 0.0, "learning_rate": 7.203627758188363e-06, "loss": 1.0242, "step": 15399 }, { "epoch": 0.6025510603333594, "grad_norm": 0.0, "learning_rate": 7.202411104784741e-06, "loss": 1.029, "step": 15400 }, { "epoch": 0.6025901870255889, "grad_norm": 0.0, "learning_rate": 7.20119449630689e-06, "loss": 0.9541, "step": 15401 }, { "epoch": 0.6026293137178182, "grad_norm": 0.0, "learning_rate": 7.199977932774349e-06, "loss": 0.8856, "step": 15402 }, { "epoch": 0.6026684404100477, "grad_norm": 0.0, "learning_rate": 7.198761414206649e-06, "loss": 1.0279, "step": 15403 }, { "epoch": 0.6027075671022771, "grad_norm": 0.0, "learning_rate": 7.197544940623336e-06, "loss": 0.9492, "step": 15404 }, { "epoch": 0.6027466937945066, "grad_norm": 0.0, "learning_rate": 7.196328512043938e-06, "loss": 1.0249, "step": 15405 }, { "epoch": 0.602785820486736, "grad_norm": 0.0, "learning_rate": 7.1951121284879925e-06, "loss": 1.1247, "step": 15406 }, { "epoch": 0.6028249471789655, "grad_norm": 0.0, "learning_rate": 7.193895789975025e-06, "loss": 1.1632, "step": 15407 }, { "epoch": 0.6028640738711949, "grad_norm": 0.0, "learning_rate": 7.192679496524582e-06, "loss": 1.014, "step": 15408 }, { "epoch": 0.6029032005634244, "grad_norm": 0.0, "learning_rate": 7.191463248156186e-06, "loss": 1.0482, "step": 15409 }, { "epoch": 0.6029423272556538, "grad_norm": 0.0, "learning_rate": 7.190247044889372e-06, "loss": 1.1488, "step": 15410 }, { "epoch": 0.6029814539478833, "grad_norm": 0.0, "learning_rate": 7.189030886743667e-06, "loss": 0.9671, "step": 15411 }, { "epoch": 0.6030205806401127, "grad_norm": 0.0, "learning_rate": 7.187814773738605e-06, "loss": 0.9042, "step": 15412 }, { "epoch": 0.6030597073323422, "grad_norm": 0.0, "learning_rate": 7.186598705893714e-06, "loss": 0.9302, "step": 15413 }, { "epoch": 0.6030988340245715, "grad_norm": 0.0, "learning_rate": 7.185382683228524e-06, "loss": 0.9143, "step": 15414 }, { "epoch": 0.603137960716801, "grad_norm": 0.0, "learning_rate": 7.1841667057625605e-06, "loss": 1.0498, "step": 15415 }, { "epoch": 0.6031770874090304, "grad_norm": 0.0, "learning_rate": 7.182950773515348e-06, "loss": 1.0494, "step": 15416 }, { "epoch": 0.6032162141012599, "grad_norm": 0.0, "learning_rate": 7.18173488650642e-06, "loss": 0.9138, "step": 15417 }, { "epoch": 0.6032553407934893, "grad_norm": 0.0, "learning_rate": 7.180519044755298e-06, "loss": 1.0837, "step": 15418 }, { "epoch": 0.6032944674857188, "grad_norm": 0.0, "learning_rate": 7.179303248281506e-06, "loss": 1.0708, "step": 15419 }, { "epoch": 0.6033335941779482, "grad_norm": 0.0, "learning_rate": 7.1780874971045685e-06, "loss": 1.0148, "step": 15420 }, { "epoch": 0.6033727208701777, "grad_norm": 0.0, "learning_rate": 7.176871791244011e-06, "loss": 0.9164, "step": 15421 }, { "epoch": 0.6034118475624071, "grad_norm": 0.0, "learning_rate": 7.175656130719354e-06, "loss": 1.0842, "step": 15422 }, { "epoch": 0.6034509742546366, "grad_norm": 0.0, "learning_rate": 7.174440515550122e-06, "loss": 1.0114, "step": 15423 }, { "epoch": 0.603490100946866, "grad_norm": 0.0, "learning_rate": 7.173224945755834e-06, "loss": 1.0162, "step": 15424 }, { "epoch": 0.6035292276390954, "grad_norm": 0.0, "learning_rate": 7.17200942135601e-06, "loss": 1.1, "step": 15425 }, { "epoch": 0.6035683543313248, "grad_norm": 0.0, "learning_rate": 7.170793942370173e-06, "loss": 1.09, "step": 15426 }, { "epoch": 0.6036074810235542, "grad_norm": 0.0, "learning_rate": 7.16957850881784e-06, "loss": 1.0688, "step": 15427 }, { "epoch": 0.6036466077157837, "grad_norm": 0.0, "learning_rate": 7.168363120718527e-06, "loss": 1.134, "step": 15428 }, { "epoch": 0.6036857344080131, "grad_norm": 0.0, "learning_rate": 7.167147778091754e-06, "loss": 1.0267, "step": 15429 }, { "epoch": 0.6037248611002426, "grad_norm": 0.0, "learning_rate": 7.165932480957041e-06, "loss": 1.063, "step": 15430 }, { "epoch": 0.603763987792472, "grad_norm": 0.0, "learning_rate": 7.1647172293338995e-06, "loss": 0.9891, "step": 15431 }, { "epoch": 0.6038031144847015, "grad_norm": 0.0, "learning_rate": 7.163502023241849e-06, "loss": 1.0178, "step": 15432 }, { "epoch": 0.6038422411769309, "grad_norm": 0.0, "learning_rate": 7.162286862700398e-06, "loss": 1.0269, "step": 15433 }, { "epoch": 0.6038813678691604, "grad_norm": 0.0, "learning_rate": 7.1610717477290625e-06, "loss": 0.8747, "step": 15434 }, { "epoch": 0.6039204945613897, "grad_norm": 0.0, "learning_rate": 7.159856678347361e-06, "loss": 1.0713, "step": 15435 }, { "epoch": 0.6039596212536192, "grad_norm": 0.0, "learning_rate": 7.1586416545748026e-06, "loss": 1.0666, "step": 15436 }, { "epoch": 0.6039987479458486, "grad_norm": 0.0, "learning_rate": 7.1574266764308984e-06, "loss": 0.9949, "step": 15437 }, { "epoch": 0.6040378746380781, "grad_norm": 0.0, "learning_rate": 7.156211743935157e-06, "loss": 1.0777, "step": 15438 }, { "epoch": 0.6040770013303075, "grad_norm": 0.0, "learning_rate": 7.154996857107094e-06, "loss": 0.8936, "step": 15439 }, { "epoch": 0.604116128022537, "grad_norm": 0.0, "learning_rate": 7.153782015966218e-06, "loss": 1.0284, "step": 15440 }, { "epoch": 0.6041552547147664, "grad_norm": 0.0, "learning_rate": 7.152567220532034e-06, "loss": 0.853, "step": 15441 }, { "epoch": 0.6041943814069959, "grad_norm": 0.0, "learning_rate": 7.151352470824053e-06, "loss": 0.9888, "step": 15442 }, { "epoch": 0.6042335080992253, "grad_norm": 0.0, "learning_rate": 7.15013776686178e-06, "loss": 0.8924, "step": 15443 }, { "epoch": 0.6042726347914548, "grad_norm": 0.0, "learning_rate": 7.148923108664725e-06, "loss": 0.9446, "step": 15444 }, { "epoch": 0.6043117614836842, "grad_norm": 0.0, "learning_rate": 7.1477084962523935e-06, "loss": 1.0906, "step": 15445 }, { "epoch": 0.6043508881759136, "grad_norm": 0.0, "learning_rate": 7.146493929644289e-06, "loss": 0.9798, "step": 15446 }, { "epoch": 0.604390014868143, "grad_norm": 0.0, "learning_rate": 7.145279408859912e-06, "loss": 1.0176, "step": 15447 }, { "epoch": 0.6044291415603725, "grad_norm": 0.0, "learning_rate": 7.1440649339187755e-06, "loss": 1.0325, "step": 15448 }, { "epoch": 0.6044682682526019, "grad_norm": 0.0, "learning_rate": 7.142850504840378e-06, "loss": 1.0544, "step": 15449 }, { "epoch": 0.6045073949448314, "grad_norm": 0.0, "learning_rate": 7.141636121644219e-06, "loss": 0.9237, "step": 15450 }, { "epoch": 0.6045465216370608, "grad_norm": 0.0, "learning_rate": 7.140421784349802e-06, "loss": 1.021, "step": 15451 }, { "epoch": 0.6045856483292903, "grad_norm": 0.0, "learning_rate": 7.1392074929766254e-06, "loss": 1.0703, "step": 15452 }, { "epoch": 0.6046247750215197, "grad_norm": 0.0, "learning_rate": 7.137993247544196e-06, "loss": 0.9818, "step": 15453 }, { "epoch": 0.6046639017137492, "grad_norm": 0.0, "learning_rate": 7.136779048072009e-06, "loss": 1.0215, "step": 15454 }, { "epoch": 0.6047030284059786, "grad_norm": 0.0, "learning_rate": 7.135564894579561e-06, "loss": 0.9484, "step": 15455 }, { "epoch": 0.604742155098208, "grad_norm": 0.0, "learning_rate": 7.134350787086348e-06, "loss": 0.9944, "step": 15456 }, { "epoch": 0.6047812817904374, "grad_norm": 0.0, "learning_rate": 7.133136725611876e-06, "loss": 1.1058, "step": 15457 }, { "epoch": 0.6048204084826668, "grad_norm": 0.0, "learning_rate": 7.131922710175634e-06, "loss": 1.0232, "step": 15458 }, { "epoch": 0.6048595351748963, "grad_norm": 0.0, "learning_rate": 7.13070874079712e-06, "loss": 0.976, "step": 15459 }, { "epoch": 0.6048986618671257, "grad_norm": 0.0, "learning_rate": 7.129494817495828e-06, "loss": 0.9761, "step": 15460 }, { "epoch": 0.6049377885593552, "grad_norm": 0.0, "learning_rate": 7.128280940291248e-06, "loss": 0.9953, "step": 15461 }, { "epoch": 0.6049769152515846, "grad_norm": 0.0, "learning_rate": 7.127067109202883e-06, "loss": 0.9208, "step": 15462 }, { "epoch": 0.6050160419438141, "grad_norm": 0.0, "learning_rate": 7.125853324250218e-06, "loss": 1.0801, "step": 15463 }, { "epoch": 0.6050551686360435, "grad_norm": 0.0, "learning_rate": 7.1246395854527496e-06, "loss": 1.0894, "step": 15464 }, { "epoch": 0.605094295328273, "grad_norm": 0.0, "learning_rate": 7.1234258928299624e-06, "loss": 0.9593, "step": 15465 }, { "epoch": 0.6051334220205024, "grad_norm": 0.0, "learning_rate": 7.122212246401355e-06, "loss": 1.025, "step": 15466 }, { "epoch": 0.6051725487127319, "grad_norm": 0.0, "learning_rate": 7.120998646186409e-06, "loss": 1.1063, "step": 15467 }, { "epoch": 0.6052116754049612, "grad_norm": 0.0, "learning_rate": 7.11978509220462e-06, "loss": 1.0483, "step": 15468 }, { "epoch": 0.6052508020971907, "grad_norm": 0.0, "learning_rate": 7.118571584475471e-06, "loss": 1.0776, "step": 15469 }, { "epoch": 0.6052899287894201, "grad_norm": 0.0, "learning_rate": 7.117358123018454e-06, "loss": 1.0816, "step": 15470 }, { "epoch": 0.6053290554816496, "grad_norm": 0.0, "learning_rate": 7.116144707853052e-06, "loss": 1.1025, "step": 15471 }, { "epoch": 0.605368182173879, "grad_norm": 0.0, "learning_rate": 7.114931338998752e-06, "loss": 0.9943, "step": 15472 }, { "epoch": 0.6054073088661085, "grad_norm": 0.0, "learning_rate": 7.113718016475041e-06, "loss": 1.0019, "step": 15473 }, { "epoch": 0.6054464355583379, "grad_norm": 0.0, "learning_rate": 7.1125047403014005e-06, "loss": 0.8967, "step": 15474 }, { "epoch": 0.6054855622505674, "grad_norm": 0.0, "learning_rate": 7.111291510497316e-06, "loss": 0.9877, "step": 15475 }, { "epoch": 0.6055246889427968, "grad_norm": 0.0, "learning_rate": 7.110078327082271e-06, "loss": 0.9562, "step": 15476 }, { "epoch": 0.6055638156350263, "grad_norm": 0.0, "learning_rate": 7.108865190075747e-06, "loss": 1.1375, "step": 15477 }, { "epoch": 0.6056029423272556, "grad_norm": 0.0, "learning_rate": 7.107652099497222e-06, "loss": 0.998, "step": 15478 }, { "epoch": 0.6056420690194851, "grad_norm": 0.0, "learning_rate": 7.106439055366182e-06, "loss": 1.024, "step": 15479 }, { "epoch": 0.6056811957117145, "grad_norm": 0.0, "learning_rate": 7.105226057702106e-06, "loss": 1.0534, "step": 15480 }, { "epoch": 0.605720322403944, "grad_norm": 0.0, "learning_rate": 7.104013106524472e-06, "loss": 1.03, "step": 15481 }, { "epoch": 0.6057594490961734, "grad_norm": 0.0, "learning_rate": 7.10280020185276e-06, "loss": 1.0192, "step": 15482 }, { "epoch": 0.6057985757884028, "grad_norm": 0.0, "learning_rate": 7.101587343706441e-06, "loss": 1.117, "step": 15483 }, { "epoch": 0.6058377024806323, "grad_norm": 0.0, "learning_rate": 7.100374532105001e-06, "loss": 1.0377, "step": 15484 }, { "epoch": 0.6058768291728617, "grad_norm": 0.0, "learning_rate": 7.099161767067914e-06, "loss": 1.0241, "step": 15485 }, { "epoch": 0.6059159558650912, "grad_norm": 0.0, "learning_rate": 7.097949048614653e-06, "loss": 1.0181, "step": 15486 }, { "epoch": 0.6059550825573206, "grad_norm": 0.0, "learning_rate": 7.09673637676469e-06, "loss": 1.1035, "step": 15487 }, { "epoch": 0.60599420924955, "grad_norm": 0.0, "learning_rate": 7.095523751537506e-06, "loss": 0.9808, "step": 15488 }, { "epoch": 0.6060333359417794, "grad_norm": 0.0, "learning_rate": 7.094311172952573e-06, "loss": 0.9968, "step": 15489 }, { "epoch": 0.6060724626340089, "grad_norm": 0.0, "learning_rate": 7.093098641029359e-06, "loss": 1.0497, "step": 15490 }, { "epoch": 0.6061115893262383, "grad_norm": 0.0, "learning_rate": 7.09188615578734e-06, "loss": 1.0144, "step": 15491 }, { "epoch": 0.6061507160184678, "grad_norm": 0.0, "learning_rate": 7.090673717245982e-06, "loss": 1.01, "step": 15492 }, { "epoch": 0.6061898427106972, "grad_norm": 0.0, "learning_rate": 7.08946132542476e-06, "loss": 1.0027, "step": 15493 }, { "epoch": 0.6062289694029267, "grad_norm": 0.0, "learning_rate": 7.0882489803431444e-06, "loss": 1.0525, "step": 15494 }, { "epoch": 0.6062680960951561, "grad_norm": 0.0, "learning_rate": 7.087036682020601e-06, "loss": 1.0443, "step": 15495 }, { "epoch": 0.6063072227873856, "grad_norm": 0.0, "learning_rate": 7.085824430476593e-06, "loss": 0.9262, "step": 15496 }, { "epoch": 0.606346349479615, "grad_norm": 0.0, "learning_rate": 7.084612225730599e-06, "loss": 0.9485, "step": 15497 }, { "epoch": 0.6063854761718445, "grad_norm": 0.0, "learning_rate": 7.083400067802078e-06, "loss": 0.9993, "step": 15498 }, { "epoch": 0.6064246028640738, "grad_norm": 0.0, "learning_rate": 7.082187956710498e-06, "loss": 0.9903, "step": 15499 }, { "epoch": 0.6064637295563033, "grad_norm": 0.0, "learning_rate": 7.080975892475322e-06, "loss": 1.0499, "step": 15500 }, { "epoch": 0.6065028562485327, "grad_norm": 0.0, "learning_rate": 7.079763875116012e-06, "loss": 0.9592, "step": 15501 }, { "epoch": 0.6065419829407622, "grad_norm": 0.0, "learning_rate": 7.078551904652039e-06, "loss": 0.9802, "step": 15502 }, { "epoch": 0.6065811096329916, "grad_norm": 0.0, "learning_rate": 7.077339981102862e-06, "loss": 1.097, "step": 15503 }, { "epoch": 0.6066202363252211, "grad_norm": 0.0, "learning_rate": 7.076128104487943e-06, "loss": 0.9677, "step": 15504 }, { "epoch": 0.6066593630174505, "grad_norm": 0.0, "learning_rate": 7.074916274826737e-06, "loss": 0.9329, "step": 15505 }, { "epoch": 0.60669848970968, "grad_norm": 0.0, "learning_rate": 7.0737044921387155e-06, "loss": 1.1151, "step": 15506 }, { "epoch": 0.6067376164019094, "grad_norm": 0.0, "learning_rate": 7.072492756443333e-06, "loss": 1.0127, "step": 15507 }, { "epoch": 0.6067767430941389, "grad_norm": 0.0, "learning_rate": 7.071281067760048e-06, "loss": 0.8442, "step": 15508 }, { "epoch": 0.6068158697863683, "grad_norm": 0.0, "learning_rate": 7.07006942610832e-06, "loss": 1.1017, "step": 15509 }, { "epoch": 0.6068549964785978, "grad_norm": 0.0, "learning_rate": 7.0688578315076025e-06, "loss": 0.9815, "step": 15510 }, { "epoch": 0.6068941231708271, "grad_norm": 0.0, "learning_rate": 7.067646283977356e-06, "loss": 0.9568, "step": 15511 }, { "epoch": 0.6069332498630565, "grad_norm": 0.0, "learning_rate": 7.066434783537039e-06, "loss": 1.0534, "step": 15512 }, { "epoch": 0.606972376555286, "grad_norm": 0.0, "learning_rate": 7.065223330206101e-06, "loss": 0.9933, "step": 15513 }, { "epoch": 0.6070115032475154, "grad_norm": 0.0, "learning_rate": 7.064011924004e-06, "loss": 0.9559, "step": 15514 }, { "epoch": 0.6070506299397449, "grad_norm": 0.0, "learning_rate": 7.0628005649501875e-06, "loss": 0.9718, "step": 15515 }, { "epoch": 0.6070897566319743, "grad_norm": 0.0, "learning_rate": 7.061589253064118e-06, "loss": 0.9789, "step": 15516 }, { "epoch": 0.6071288833242038, "grad_norm": 0.0, "learning_rate": 7.060377988365241e-06, "loss": 1.0289, "step": 15517 }, { "epoch": 0.6071680100164332, "grad_norm": 0.0, "learning_rate": 7.059166770873013e-06, "loss": 1.0103, "step": 15518 }, { "epoch": 0.6072071367086627, "grad_norm": 0.0, "learning_rate": 7.057955600606879e-06, "loss": 1.1068, "step": 15519 }, { "epoch": 0.607246263400892, "grad_norm": 0.0, "learning_rate": 7.056744477586293e-06, "loss": 0.953, "step": 15520 }, { "epoch": 0.6072853900931215, "grad_norm": 0.0, "learning_rate": 7.055533401830703e-06, "loss": 0.9868, "step": 15521 }, { "epoch": 0.6073245167853509, "grad_norm": 0.0, "learning_rate": 7.054322373359557e-06, "loss": 1.0473, "step": 15522 }, { "epoch": 0.6073636434775804, "grad_norm": 0.0, "learning_rate": 7.053111392192299e-06, "loss": 1.0236, "step": 15523 }, { "epoch": 0.6074027701698098, "grad_norm": 0.0, "learning_rate": 7.051900458348383e-06, "loss": 1.0075, "step": 15524 }, { "epoch": 0.6074418968620393, "grad_norm": 0.0, "learning_rate": 7.050689571847251e-06, "loss": 0.9993, "step": 15525 }, { "epoch": 0.6074810235542687, "grad_norm": 0.0, "learning_rate": 7.04947873270835e-06, "loss": 1.0619, "step": 15526 }, { "epoch": 0.6075201502464982, "grad_norm": 0.0, "learning_rate": 7.048267940951118e-06, "loss": 1.0655, "step": 15527 }, { "epoch": 0.6075592769387276, "grad_norm": 0.0, "learning_rate": 7.0470571965950085e-06, "loss": 1.1453, "step": 15528 }, { "epoch": 0.6075984036309571, "grad_norm": 0.0, "learning_rate": 7.0458464996594615e-06, "loss": 0.9669, "step": 15529 }, { "epoch": 0.6076375303231865, "grad_norm": 0.0, "learning_rate": 7.044635850163916e-06, "loss": 0.9771, "step": 15530 }, { "epoch": 0.607676657015416, "grad_norm": 0.0, "learning_rate": 7.043425248127815e-06, "loss": 1.112, "step": 15531 }, { "epoch": 0.6077157837076453, "grad_norm": 0.0, "learning_rate": 7.042214693570597e-06, "loss": 1.0685, "step": 15532 }, { "epoch": 0.6077549103998748, "grad_norm": 0.0, "learning_rate": 7.041004186511709e-06, "loss": 1.0076, "step": 15533 }, { "epoch": 0.6077940370921042, "grad_norm": 0.0, "learning_rate": 7.0397937269705865e-06, "loss": 1.0459, "step": 15534 }, { "epoch": 0.6078331637843337, "grad_norm": 0.0, "learning_rate": 7.038583314966666e-06, "loss": 0.9941, "step": 15535 }, { "epoch": 0.6078722904765631, "grad_norm": 0.0, "learning_rate": 7.037372950519383e-06, "loss": 1.0838, "step": 15536 }, { "epoch": 0.6079114171687926, "grad_norm": 0.0, "learning_rate": 7.036162633648183e-06, "loss": 0.9063, "step": 15537 }, { "epoch": 0.607950543861022, "grad_norm": 0.0, "learning_rate": 7.034952364372496e-06, "loss": 1.0494, "step": 15538 }, { "epoch": 0.6079896705532515, "grad_norm": 0.0, "learning_rate": 7.033742142711759e-06, "loss": 0.9787, "step": 15539 }, { "epoch": 0.6080287972454809, "grad_norm": 0.0, "learning_rate": 7.0325319686854054e-06, "loss": 0.8264, "step": 15540 }, { "epoch": 0.6080679239377103, "grad_norm": 0.0, "learning_rate": 7.031321842312867e-06, "loss": 0.9507, "step": 15541 }, { "epoch": 0.6081070506299397, "grad_norm": 0.0, "learning_rate": 7.030111763613583e-06, "loss": 1.0214, "step": 15542 }, { "epoch": 0.6081461773221691, "grad_norm": 0.0, "learning_rate": 7.028901732606984e-06, "loss": 0.9249, "step": 15543 }, { "epoch": 0.6081853040143986, "grad_norm": 0.0, "learning_rate": 7.027691749312499e-06, "loss": 1.0002, "step": 15544 }, { "epoch": 0.608224430706628, "grad_norm": 0.0, "learning_rate": 7.026481813749557e-06, "loss": 0.9124, "step": 15545 }, { "epoch": 0.6082635573988575, "grad_norm": 0.0, "learning_rate": 7.0252719259375935e-06, "loss": 1.0989, "step": 15546 }, { "epoch": 0.6083026840910869, "grad_norm": 0.0, "learning_rate": 7.024062085896037e-06, "loss": 0.9763, "step": 15547 }, { "epoch": 0.6083418107833164, "grad_norm": 0.0, "learning_rate": 7.022852293644313e-06, "loss": 1.0672, "step": 15548 }, { "epoch": 0.6083809374755458, "grad_norm": 0.0, "learning_rate": 7.021642549201851e-06, "loss": 1.0218, "step": 15549 }, { "epoch": 0.6084200641677753, "grad_norm": 0.0, "learning_rate": 7.0204328525880745e-06, "loss": 0.9871, "step": 15550 }, { "epoch": 0.6084591908600047, "grad_norm": 0.0, "learning_rate": 7.0192232038224165e-06, "loss": 1.0761, "step": 15551 }, { "epoch": 0.6084983175522342, "grad_norm": 0.0, "learning_rate": 7.018013602924297e-06, "loss": 0.9889, "step": 15552 }, { "epoch": 0.6085374442444635, "grad_norm": 0.0, "learning_rate": 7.016804049913144e-06, "loss": 1.0074, "step": 15553 }, { "epoch": 0.608576570936693, "grad_norm": 0.0, "learning_rate": 7.015594544808378e-06, "loss": 0.947, "step": 15554 }, { "epoch": 0.6086156976289224, "grad_norm": 0.0, "learning_rate": 7.014385087629425e-06, "loss": 1.0344, "step": 15555 }, { "epoch": 0.6086548243211519, "grad_norm": 0.0, "learning_rate": 7.013175678395705e-06, "loss": 1.016, "step": 15556 }, { "epoch": 0.6086939510133813, "grad_norm": 0.0, "learning_rate": 7.011966317126642e-06, "loss": 0.9463, "step": 15557 }, { "epoch": 0.6087330777056108, "grad_norm": 0.0, "learning_rate": 7.0107570038416575e-06, "loss": 1.0153, "step": 15558 }, { "epoch": 0.6087722043978402, "grad_norm": 0.0, "learning_rate": 7.0095477385601655e-06, "loss": 1.1345, "step": 15559 }, { "epoch": 0.6088113310900697, "grad_norm": 0.0, "learning_rate": 7.0083385213015935e-06, "loss": 0.9994, "step": 15560 }, { "epoch": 0.6088504577822991, "grad_norm": 0.0, "learning_rate": 7.0071293520853555e-06, "loss": 1.007, "step": 15561 }, { "epoch": 0.6088895844745286, "grad_norm": 0.0, "learning_rate": 7.005920230930867e-06, "loss": 1.1077, "step": 15562 }, { "epoch": 0.608928711166758, "grad_norm": 0.0, "learning_rate": 7.004711157857549e-06, "loss": 1.0882, "step": 15563 }, { "epoch": 0.6089678378589874, "grad_norm": 0.0, "learning_rate": 7.003502132884817e-06, "loss": 0.9176, "step": 15564 }, { "epoch": 0.6090069645512168, "grad_norm": 0.0, "learning_rate": 7.002293156032087e-06, "loss": 0.9604, "step": 15565 }, { "epoch": 0.6090460912434463, "grad_norm": 0.0, "learning_rate": 7.001084227318773e-06, "loss": 0.9596, "step": 15566 }, { "epoch": 0.6090852179356757, "grad_norm": 0.0, "learning_rate": 6.999875346764288e-06, "loss": 0.9842, "step": 15567 }, { "epoch": 0.6091243446279052, "grad_norm": 0.0, "learning_rate": 6.998666514388041e-06, "loss": 1.0664, "step": 15568 }, { "epoch": 0.6091634713201346, "grad_norm": 0.0, "learning_rate": 6.9974577302094536e-06, "loss": 0.9974, "step": 15569 }, { "epoch": 0.609202598012364, "grad_norm": 0.0, "learning_rate": 6.996248994247931e-06, "loss": 1.0267, "step": 15570 }, { "epoch": 0.6092417247045935, "grad_norm": 0.0, "learning_rate": 6.995040306522889e-06, "loss": 1.1597, "step": 15571 }, { "epoch": 0.6092808513968229, "grad_norm": 0.0, "learning_rate": 6.993831667053728e-06, "loss": 1.097, "step": 15572 }, { "epoch": 0.6093199780890524, "grad_norm": 0.0, "learning_rate": 6.992623075859867e-06, "loss": 0.9649, "step": 15573 }, { "epoch": 0.6093591047812817, "grad_norm": 0.0, "learning_rate": 6.991414532960712e-06, "loss": 1.0272, "step": 15574 }, { "epoch": 0.6093982314735112, "grad_norm": 0.0, "learning_rate": 6.990206038375669e-06, "loss": 1.0543, "step": 15575 }, { "epoch": 0.6094373581657406, "grad_norm": 0.0, "learning_rate": 6.988997592124146e-06, "loss": 0.9205, "step": 15576 }, { "epoch": 0.6094764848579701, "grad_norm": 0.0, "learning_rate": 6.987789194225545e-06, "loss": 1.0333, "step": 15577 }, { "epoch": 0.6095156115501995, "grad_norm": 0.0, "learning_rate": 6.986580844699278e-06, "loss": 1.0144, "step": 15578 }, { "epoch": 0.609554738242429, "grad_norm": 0.0, "learning_rate": 6.985372543564748e-06, "loss": 0.9719, "step": 15579 }, { "epoch": 0.6095938649346584, "grad_norm": 0.0, "learning_rate": 6.984164290841355e-06, "loss": 1.0553, "step": 15580 }, { "epoch": 0.6096329916268879, "grad_norm": 0.0, "learning_rate": 6.982956086548502e-06, "loss": 0.9002, "step": 15581 }, { "epoch": 0.6096721183191173, "grad_norm": 0.0, "learning_rate": 6.981747930705598e-06, "loss": 1.0669, "step": 15582 }, { "epoch": 0.6097112450113468, "grad_norm": 0.0, "learning_rate": 6.980539823332041e-06, "loss": 1.0528, "step": 15583 }, { "epoch": 0.6097503717035762, "grad_norm": 0.0, "learning_rate": 6.97933176444723e-06, "loss": 1.0627, "step": 15584 }, { "epoch": 0.6097894983958057, "grad_norm": 0.0, "learning_rate": 6.97812375407056e-06, "loss": 0.9439, "step": 15585 }, { "epoch": 0.609828625088035, "grad_norm": 0.0, "learning_rate": 6.976915792221441e-06, "loss": 1.0573, "step": 15586 }, { "epoch": 0.6098677517802645, "grad_norm": 0.0, "learning_rate": 6.975707878919268e-06, "loss": 0.9646, "step": 15587 }, { "epoch": 0.6099068784724939, "grad_norm": 0.0, "learning_rate": 6.974500014183435e-06, "loss": 1.0026, "step": 15588 }, { "epoch": 0.6099460051647234, "grad_norm": 0.0, "learning_rate": 6.973292198033341e-06, "loss": 0.9902, "step": 15589 }, { "epoch": 0.6099851318569528, "grad_norm": 0.0, "learning_rate": 6.972084430488377e-06, "loss": 0.9453, "step": 15590 }, { "epoch": 0.6100242585491823, "grad_norm": 0.0, "learning_rate": 6.970876711567949e-06, "loss": 1.0629, "step": 15591 }, { "epoch": 0.6100633852414117, "grad_norm": 0.0, "learning_rate": 6.969669041291442e-06, "loss": 0.978, "step": 15592 }, { "epoch": 0.6101025119336412, "grad_norm": 0.0, "learning_rate": 6.968461419678256e-06, "loss": 1.0141, "step": 15593 }, { "epoch": 0.6101416386258706, "grad_norm": 0.0, "learning_rate": 6.967253846747778e-06, "loss": 1.1974, "step": 15594 }, { "epoch": 0.6101807653181001, "grad_norm": 0.0, "learning_rate": 6.966046322519402e-06, "loss": 0.9556, "step": 15595 }, { "epoch": 0.6102198920103294, "grad_norm": 0.0, "learning_rate": 6.964838847012523e-06, "loss": 1.0565, "step": 15596 }, { "epoch": 0.6102590187025588, "grad_norm": 0.0, "learning_rate": 6.963631420246529e-06, "loss": 1.0729, "step": 15597 }, { "epoch": 0.6102981453947883, "grad_norm": 0.0, "learning_rate": 6.96242404224081e-06, "loss": 1.0092, "step": 15598 }, { "epoch": 0.6103372720870177, "grad_norm": 0.0, "learning_rate": 6.961216713014751e-06, "loss": 1.0224, "step": 15599 }, { "epoch": 0.6103763987792472, "grad_norm": 0.0, "learning_rate": 6.960009432587747e-06, "loss": 1.1131, "step": 15600 }, { "epoch": 0.6104155254714766, "grad_norm": 0.0, "learning_rate": 6.95880220097918e-06, "loss": 0.946, "step": 15601 }, { "epoch": 0.6104546521637061, "grad_norm": 0.0, "learning_rate": 6.957595018208441e-06, "loss": 1.0773, "step": 15602 }, { "epoch": 0.6104937788559355, "grad_norm": 0.0, "learning_rate": 6.95638788429491e-06, "loss": 0.9791, "step": 15603 }, { "epoch": 0.610532905548165, "grad_norm": 0.0, "learning_rate": 6.955180799257981e-06, "loss": 0.9726, "step": 15604 }, { "epoch": 0.6105720322403944, "grad_norm": 0.0, "learning_rate": 6.95397376311703e-06, "loss": 1.0073, "step": 15605 }, { "epoch": 0.6106111589326239, "grad_norm": 0.0, "learning_rate": 6.9527667758914454e-06, "loss": 1.0393, "step": 15606 }, { "epoch": 0.6106502856248532, "grad_norm": 0.0, "learning_rate": 6.951559837600605e-06, "loss": 0.942, "step": 15607 }, { "epoch": 0.6106894123170827, "grad_norm": 0.0, "learning_rate": 6.950352948263894e-06, "loss": 1.1526, "step": 15608 }, { "epoch": 0.6107285390093121, "grad_norm": 0.0, "learning_rate": 6.949146107900695e-06, "loss": 1.0297, "step": 15609 }, { "epoch": 0.6107676657015416, "grad_norm": 0.0, "learning_rate": 6.947939316530387e-06, "loss": 0.8666, "step": 15610 }, { "epoch": 0.610806792393771, "grad_norm": 0.0, "learning_rate": 6.946732574172349e-06, "loss": 0.9858, "step": 15611 }, { "epoch": 0.6108459190860005, "grad_norm": 0.0, "learning_rate": 6.945525880845956e-06, "loss": 0.9693, "step": 15612 }, { "epoch": 0.6108850457782299, "grad_norm": 0.0, "learning_rate": 6.944319236570593e-06, "loss": 1.0367, "step": 15613 }, { "epoch": 0.6109241724704594, "grad_norm": 0.0, "learning_rate": 6.9431126413656346e-06, "loss": 1.0149, "step": 15614 }, { "epoch": 0.6109632991626888, "grad_norm": 0.0, "learning_rate": 6.941906095250457e-06, "loss": 1.027, "step": 15615 }, { "epoch": 0.6110024258549183, "grad_norm": 0.0, "learning_rate": 6.940699598244436e-06, "loss": 1.0388, "step": 15616 }, { "epoch": 0.6110415525471476, "grad_norm": 0.0, "learning_rate": 6.939493150366941e-06, "loss": 1.0303, "step": 15617 }, { "epoch": 0.6110806792393771, "grad_norm": 0.0, "learning_rate": 6.938286751637355e-06, "loss": 1.0905, "step": 15618 }, { "epoch": 0.6111198059316065, "grad_norm": 0.0, "learning_rate": 6.937080402075047e-06, "loss": 1.0752, "step": 15619 }, { "epoch": 0.611158932623836, "grad_norm": 0.0, "learning_rate": 6.93587410169939e-06, "loss": 0.8599, "step": 15620 }, { "epoch": 0.6111980593160654, "grad_norm": 0.0, "learning_rate": 6.934667850529751e-06, "loss": 1.0084, "step": 15621 }, { "epoch": 0.6112371860082949, "grad_norm": 0.0, "learning_rate": 6.933461648585511e-06, "loss": 0.9404, "step": 15622 }, { "epoch": 0.6112763127005243, "grad_norm": 0.0, "learning_rate": 6.9322554958860334e-06, "loss": 1.0513, "step": 15623 }, { "epoch": 0.6113154393927538, "grad_norm": 0.0, "learning_rate": 6.931049392450687e-06, "loss": 0.9519, "step": 15624 }, { "epoch": 0.6113545660849832, "grad_norm": 0.0, "learning_rate": 6.929843338298843e-06, "loss": 1.0064, "step": 15625 }, { "epoch": 0.6113936927772126, "grad_norm": 0.0, "learning_rate": 6.928637333449863e-06, "loss": 0.8574, "step": 15626 }, { "epoch": 0.6114328194694421, "grad_norm": 0.0, "learning_rate": 6.927431377923124e-06, "loss": 1.0236, "step": 15627 }, { "epoch": 0.6114719461616714, "grad_norm": 0.0, "learning_rate": 6.926225471737986e-06, "loss": 1.0028, "step": 15628 }, { "epoch": 0.6115110728539009, "grad_norm": 0.0, "learning_rate": 6.925019614913817e-06, "loss": 1.0468, "step": 15629 }, { "epoch": 0.6115501995461303, "grad_norm": 0.0, "learning_rate": 6.923813807469975e-06, "loss": 1.1277, "step": 15630 }, { "epoch": 0.6115893262383598, "grad_norm": 0.0, "learning_rate": 6.922608049425832e-06, "loss": 0.9636, "step": 15631 }, { "epoch": 0.6116284529305892, "grad_norm": 0.0, "learning_rate": 6.921402340800747e-06, "loss": 0.9011, "step": 15632 }, { "epoch": 0.6116675796228187, "grad_norm": 0.0, "learning_rate": 6.920196681614083e-06, "loss": 0.9925, "step": 15633 }, { "epoch": 0.6117067063150481, "grad_norm": 0.0, "learning_rate": 6.918991071885202e-06, "loss": 0.9249, "step": 15634 }, { "epoch": 0.6117458330072776, "grad_norm": 0.0, "learning_rate": 6.917785511633459e-06, "loss": 0.9514, "step": 15635 }, { "epoch": 0.611784959699507, "grad_norm": 0.0, "learning_rate": 6.916580000878221e-06, "loss": 0.9862, "step": 15636 }, { "epoch": 0.6118240863917365, "grad_norm": 0.0, "learning_rate": 6.9153745396388475e-06, "loss": 1.0481, "step": 15637 }, { "epoch": 0.6118632130839659, "grad_norm": 0.0, "learning_rate": 6.914169127934691e-06, "loss": 1.0066, "step": 15638 }, { "epoch": 0.6119023397761953, "grad_norm": 0.0, "learning_rate": 6.912963765785111e-06, "loss": 1.0186, "step": 15639 }, { "epoch": 0.6119414664684247, "grad_norm": 0.0, "learning_rate": 6.911758453209465e-06, "loss": 0.9159, "step": 15640 }, { "epoch": 0.6119805931606542, "grad_norm": 0.0, "learning_rate": 6.910553190227109e-06, "loss": 0.9586, "step": 15641 }, { "epoch": 0.6120197198528836, "grad_norm": 0.0, "learning_rate": 6.909347976857399e-06, "loss": 1.0792, "step": 15642 }, { "epoch": 0.6120588465451131, "grad_norm": 0.0, "learning_rate": 6.908142813119685e-06, "loss": 1.0501, "step": 15643 }, { "epoch": 0.6120979732373425, "grad_norm": 0.0, "learning_rate": 6.906937699033325e-06, "loss": 0.9802, "step": 15644 }, { "epoch": 0.612137099929572, "grad_norm": 0.0, "learning_rate": 6.905732634617668e-06, "loss": 1.0747, "step": 15645 }, { "epoch": 0.6121762266218014, "grad_norm": 0.0, "learning_rate": 6.904527619892069e-06, "loss": 1.0261, "step": 15646 }, { "epoch": 0.6122153533140309, "grad_norm": 0.0, "learning_rate": 6.903322654875878e-06, "loss": 0.9243, "step": 15647 }, { "epoch": 0.6122544800062603, "grad_norm": 0.0, "learning_rate": 6.902117739588443e-06, "loss": 1.109, "step": 15648 }, { "epoch": 0.6122936066984898, "grad_norm": 0.0, "learning_rate": 6.900912874049117e-06, "loss": 1.0967, "step": 15649 }, { "epoch": 0.6123327333907191, "grad_norm": 0.0, "learning_rate": 6.899708058277246e-06, "loss": 1.0032, "step": 15650 }, { "epoch": 0.6123718600829486, "grad_norm": 0.0, "learning_rate": 6.898503292292178e-06, "loss": 1.0214, "step": 15651 }, { "epoch": 0.612410986775178, "grad_norm": 0.0, "learning_rate": 6.89729857611326e-06, "loss": 1.0993, "step": 15652 }, { "epoch": 0.6124501134674075, "grad_norm": 0.0, "learning_rate": 6.896093909759841e-06, "loss": 0.9168, "step": 15653 }, { "epoch": 0.6124892401596369, "grad_norm": 0.0, "learning_rate": 6.894889293251265e-06, "loss": 1.0021, "step": 15654 }, { "epoch": 0.6125283668518663, "grad_norm": 0.0, "learning_rate": 6.893684726606875e-06, "loss": 1.06, "step": 15655 }, { "epoch": 0.6125674935440958, "grad_norm": 0.0, "learning_rate": 6.8924802098460175e-06, "loss": 1.0131, "step": 15656 }, { "epoch": 0.6126066202363252, "grad_norm": 0.0, "learning_rate": 6.891275742988028e-06, "loss": 1.1216, "step": 15657 }, { "epoch": 0.6126457469285547, "grad_norm": 0.0, "learning_rate": 6.89007132605226e-06, "loss": 0.8682, "step": 15658 }, { "epoch": 0.612684873620784, "grad_norm": 0.0, "learning_rate": 6.888866959058049e-06, "loss": 1.1094, "step": 15659 }, { "epoch": 0.6127240003130136, "grad_norm": 0.0, "learning_rate": 6.887662642024736e-06, "loss": 1.037, "step": 15660 }, { "epoch": 0.6127631270052429, "grad_norm": 0.0, "learning_rate": 6.886458374971658e-06, "loss": 0.9942, "step": 15661 }, { "epoch": 0.6128022536974724, "grad_norm": 0.0, "learning_rate": 6.8852541579181596e-06, "loss": 1.0462, "step": 15662 }, { "epoch": 0.6128413803897018, "grad_norm": 0.0, "learning_rate": 6.8840499908835764e-06, "loss": 1.071, "step": 15663 }, { "epoch": 0.6128805070819313, "grad_norm": 0.0, "learning_rate": 6.882845873887247e-06, "loss": 0.8469, "step": 15664 }, { "epoch": 0.6129196337741607, "grad_norm": 0.0, "learning_rate": 6.881641806948507e-06, "loss": 0.9977, "step": 15665 }, { "epoch": 0.6129587604663902, "grad_norm": 0.0, "learning_rate": 6.880437790086687e-06, "loss": 0.98, "step": 15666 }, { "epoch": 0.6129978871586196, "grad_norm": 0.0, "learning_rate": 6.879233823321132e-06, "loss": 1.0536, "step": 15667 }, { "epoch": 0.6130370138508491, "grad_norm": 0.0, "learning_rate": 6.878029906671171e-06, "loss": 1.0443, "step": 15668 }, { "epoch": 0.6130761405430785, "grad_norm": 0.0, "learning_rate": 6.876826040156137e-06, "loss": 1.1004, "step": 15669 }, { "epoch": 0.613115267235308, "grad_norm": 0.0, "learning_rate": 6.875622223795359e-06, "loss": 0.926, "step": 15670 }, { "epoch": 0.6131543939275373, "grad_norm": 0.0, "learning_rate": 6.874418457608178e-06, "loss": 1.0233, "step": 15671 }, { "epoch": 0.6131935206197668, "grad_norm": 0.0, "learning_rate": 6.87321474161392e-06, "loss": 1.0561, "step": 15672 }, { "epoch": 0.6132326473119962, "grad_norm": 0.0, "learning_rate": 6.872011075831914e-06, "loss": 0.963, "step": 15673 }, { "epoch": 0.6132717740042257, "grad_norm": 0.0, "learning_rate": 6.870807460281493e-06, "loss": 1.0594, "step": 15674 }, { "epoch": 0.6133109006964551, "grad_norm": 0.0, "learning_rate": 6.869603894981976e-06, "loss": 1.0297, "step": 15675 }, { "epoch": 0.6133500273886846, "grad_norm": 0.0, "learning_rate": 6.868400379952704e-06, "loss": 0.9895, "step": 15676 }, { "epoch": 0.613389154080914, "grad_norm": 0.0, "learning_rate": 6.867196915212997e-06, "loss": 1.0444, "step": 15677 }, { "epoch": 0.6134282807731435, "grad_norm": 0.0, "learning_rate": 6.865993500782182e-06, "loss": 1.099, "step": 15678 }, { "epoch": 0.6134674074653729, "grad_norm": 0.0, "learning_rate": 6.86479013667958e-06, "loss": 1.0645, "step": 15679 }, { "epoch": 0.6135065341576024, "grad_norm": 0.0, "learning_rate": 6.863586822924524e-06, "loss": 1.0627, "step": 15680 }, { "epoch": 0.6135456608498318, "grad_norm": 0.0, "learning_rate": 6.862383559536333e-06, "loss": 1.0726, "step": 15681 }, { "epoch": 0.6135847875420612, "grad_norm": 0.0, "learning_rate": 6.8611803465343305e-06, "loss": 1.004, "step": 15682 }, { "epoch": 0.6136239142342906, "grad_norm": 0.0, "learning_rate": 6.8599771839378394e-06, "loss": 1.0295, "step": 15683 }, { "epoch": 0.61366304092652, "grad_norm": 0.0, "learning_rate": 6.858774071766175e-06, "loss": 0.9203, "step": 15684 }, { "epoch": 0.6137021676187495, "grad_norm": 0.0, "learning_rate": 6.857571010038669e-06, "loss": 0.975, "step": 15685 }, { "epoch": 0.6137412943109789, "grad_norm": 0.0, "learning_rate": 6.856367998774631e-06, "loss": 0.9863, "step": 15686 }, { "epoch": 0.6137804210032084, "grad_norm": 0.0, "learning_rate": 6.855165037993388e-06, "loss": 1.0489, "step": 15687 }, { "epoch": 0.6138195476954378, "grad_norm": 0.0, "learning_rate": 6.853962127714249e-06, "loss": 1.0893, "step": 15688 }, { "epoch": 0.6138586743876673, "grad_norm": 0.0, "learning_rate": 6.8527592679565395e-06, "loss": 1.0808, "step": 15689 }, { "epoch": 0.6138978010798967, "grad_norm": 0.0, "learning_rate": 6.851556458739569e-06, "loss": 1.0467, "step": 15690 }, { "epoch": 0.6139369277721262, "grad_norm": 0.0, "learning_rate": 6.850353700082659e-06, "loss": 1.0591, "step": 15691 }, { "epoch": 0.6139760544643555, "grad_norm": 0.0, "learning_rate": 6.8491509920051225e-06, "loss": 0.9169, "step": 15692 }, { "epoch": 0.614015181156585, "grad_norm": 0.0, "learning_rate": 6.8479483345262695e-06, "loss": 1.0459, "step": 15693 }, { "epoch": 0.6140543078488144, "grad_norm": 0.0, "learning_rate": 6.84674572766542e-06, "loss": 1.0758, "step": 15694 }, { "epoch": 0.6140934345410439, "grad_norm": 0.0, "learning_rate": 6.845543171441882e-06, "loss": 0.9554, "step": 15695 }, { "epoch": 0.6141325612332733, "grad_norm": 0.0, "learning_rate": 6.844340665874967e-06, "loss": 0.9985, "step": 15696 }, { "epoch": 0.6141716879255028, "grad_norm": 0.0, "learning_rate": 6.843138210983986e-06, "loss": 1.0243, "step": 15697 }, { "epoch": 0.6142108146177322, "grad_norm": 0.0, "learning_rate": 6.8419358067882516e-06, "loss": 0.8582, "step": 15698 }, { "epoch": 0.6142499413099617, "grad_norm": 0.0, "learning_rate": 6.84073345330707e-06, "loss": 0.9988, "step": 15699 }, { "epoch": 0.6142890680021911, "grad_norm": 0.0, "learning_rate": 6.839531150559751e-06, "loss": 1.107, "step": 15700 }, { "epoch": 0.6143281946944206, "grad_norm": 0.0, "learning_rate": 6.8383288985655985e-06, "loss": 0.9671, "step": 15701 }, { "epoch": 0.61436732138665, "grad_norm": 0.0, "learning_rate": 6.837126697343924e-06, "loss": 1.0087, "step": 15702 }, { "epoch": 0.6144064480788795, "grad_norm": 0.0, "learning_rate": 6.835924546914032e-06, "loss": 1.0147, "step": 15703 }, { "epoch": 0.6144455747711088, "grad_norm": 0.0, "learning_rate": 6.834722447295228e-06, "loss": 0.8592, "step": 15704 }, { "epoch": 0.6144847014633383, "grad_norm": 0.0, "learning_rate": 6.833520398506814e-06, "loss": 0.9309, "step": 15705 }, { "epoch": 0.6145238281555677, "grad_norm": 0.0, "learning_rate": 6.832318400568092e-06, "loss": 1.0326, "step": 15706 }, { "epoch": 0.6145629548477972, "grad_norm": 0.0, "learning_rate": 6.83111645349837e-06, "loss": 1.035, "step": 15707 }, { "epoch": 0.6146020815400266, "grad_norm": 0.0, "learning_rate": 6.829914557316948e-06, "loss": 1.0412, "step": 15708 }, { "epoch": 0.6146412082322561, "grad_norm": 0.0, "learning_rate": 6.828712712043125e-06, "loss": 0.9209, "step": 15709 }, { "epoch": 0.6146803349244855, "grad_norm": 0.0, "learning_rate": 6.827510917696198e-06, "loss": 1.1135, "step": 15710 }, { "epoch": 0.6147194616167149, "grad_norm": 0.0, "learning_rate": 6.826309174295475e-06, "loss": 0.9608, "step": 15711 }, { "epoch": 0.6147585883089444, "grad_norm": 0.0, "learning_rate": 6.825107481860249e-06, "loss": 0.9681, "step": 15712 }, { "epoch": 0.6147977150011738, "grad_norm": 0.0, "learning_rate": 6.823905840409819e-06, "loss": 0.9518, "step": 15713 }, { "epoch": 0.6148368416934032, "grad_norm": 0.0, "learning_rate": 6.822704249963481e-06, "loss": 0.9437, "step": 15714 }, { "epoch": 0.6148759683856326, "grad_norm": 0.0, "learning_rate": 6.821502710540527e-06, "loss": 0.9812, "step": 15715 }, { "epoch": 0.6149150950778621, "grad_norm": 0.0, "learning_rate": 6.820301222160261e-06, "loss": 0.9355, "step": 15716 }, { "epoch": 0.6149542217700915, "grad_norm": 0.0, "learning_rate": 6.819099784841974e-06, "loss": 1.0158, "step": 15717 }, { "epoch": 0.614993348462321, "grad_norm": 0.0, "learning_rate": 6.817898398604958e-06, "loss": 1.0945, "step": 15718 }, { "epoch": 0.6150324751545504, "grad_norm": 0.0, "learning_rate": 6.816697063468501e-06, "loss": 1.025, "step": 15719 }, { "epoch": 0.6150716018467799, "grad_norm": 0.0, "learning_rate": 6.815495779451906e-06, "loss": 0.9852, "step": 15720 }, { "epoch": 0.6151107285390093, "grad_norm": 0.0, "learning_rate": 6.814294546574458e-06, "loss": 0.9937, "step": 15721 }, { "epoch": 0.6151498552312388, "grad_norm": 0.0, "learning_rate": 6.813093364855447e-06, "loss": 1.0085, "step": 15722 }, { "epoch": 0.6151889819234682, "grad_norm": 0.0, "learning_rate": 6.811892234314163e-06, "loss": 0.9117, "step": 15723 }, { "epoch": 0.6152281086156977, "grad_norm": 0.0, "learning_rate": 6.810691154969891e-06, "loss": 1.0978, "step": 15724 }, { "epoch": 0.615267235307927, "grad_norm": 0.0, "learning_rate": 6.809490126841927e-06, "loss": 0.9459, "step": 15725 }, { "epoch": 0.6153063620001565, "grad_norm": 0.0, "learning_rate": 6.8082891499495545e-06, "loss": 1.0084, "step": 15726 }, { "epoch": 0.6153454886923859, "grad_norm": 0.0, "learning_rate": 6.807088224312057e-06, "loss": 1.0443, "step": 15727 }, { "epoch": 0.6153846153846154, "grad_norm": 0.0, "learning_rate": 6.805887349948721e-06, "loss": 1.0014, "step": 15728 }, { "epoch": 0.6154237420768448, "grad_norm": 0.0, "learning_rate": 6.80468652687883e-06, "loss": 1.0165, "step": 15729 }, { "epoch": 0.6154628687690743, "grad_norm": 0.0, "learning_rate": 6.803485755121672e-06, "loss": 0.9803, "step": 15730 }, { "epoch": 0.6155019954613037, "grad_norm": 0.0, "learning_rate": 6.802285034696529e-06, "loss": 0.9968, "step": 15731 }, { "epoch": 0.6155411221535332, "grad_norm": 0.0, "learning_rate": 6.801084365622681e-06, "loss": 0.9223, "step": 15732 }, { "epoch": 0.6155802488457626, "grad_norm": 0.0, "learning_rate": 6.799883747919407e-06, "loss": 1.0386, "step": 15733 }, { "epoch": 0.6156193755379921, "grad_norm": 0.0, "learning_rate": 6.798683181605992e-06, "loss": 1.0268, "step": 15734 }, { "epoch": 0.6156585022302214, "grad_norm": 0.0, "learning_rate": 6.797482666701711e-06, "loss": 1.0495, "step": 15735 }, { "epoch": 0.615697628922451, "grad_norm": 0.0, "learning_rate": 6.796282203225848e-06, "loss": 1.0673, "step": 15736 }, { "epoch": 0.6157367556146803, "grad_norm": 0.0, "learning_rate": 6.7950817911976755e-06, "loss": 0.9863, "step": 15737 }, { "epoch": 0.6157758823069098, "grad_norm": 0.0, "learning_rate": 6.7938814306364776e-06, "loss": 0.9724, "step": 15738 }, { "epoch": 0.6158150089991392, "grad_norm": 0.0, "learning_rate": 6.792681121561524e-06, "loss": 1.0577, "step": 15739 }, { "epoch": 0.6158541356913686, "grad_norm": 0.0, "learning_rate": 6.791480863992095e-06, "loss": 0.9434, "step": 15740 }, { "epoch": 0.6158932623835981, "grad_norm": 0.0, "learning_rate": 6.790280657947459e-06, "loss": 1.0008, "step": 15741 }, { "epoch": 0.6159323890758275, "grad_norm": 0.0, "learning_rate": 6.789080503446895e-06, "loss": 0.9651, "step": 15742 }, { "epoch": 0.615971515768057, "grad_norm": 0.0, "learning_rate": 6.787880400509674e-06, "loss": 1.0832, "step": 15743 }, { "epoch": 0.6160106424602864, "grad_norm": 0.0, "learning_rate": 6.78668034915507e-06, "loss": 1.0043, "step": 15744 }, { "epoch": 0.6160497691525159, "grad_norm": 0.0, "learning_rate": 6.7854803494023545e-06, "loss": 1.0585, "step": 15745 }, { "epoch": 0.6160888958447452, "grad_norm": 0.0, "learning_rate": 6.7842804012707904e-06, "loss": 1.0727, "step": 15746 }, { "epoch": 0.6161280225369747, "grad_norm": 0.0, "learning_rate": 6.78308050477966e-06, "loss": 0.9259, "step": 15747 }, { "epoch": 0.6161671492292041, "grad_norm": 0.0, "learning_rate": 6.781880659948224e-06, "loss": 0.985, "step": 15748 }, { "epoch": 0.6162062759214336, "grad_norm": 0.0, "learning_rate": 6.780680866795753e-06, "loss": 1.051, "step": 15749 }, { "epoch": 0.616245402613663, "grad_norm": 0.0, "learning_rate": 6.779481125341513e-06, "loss": 0.9348, "step": 15750 }, { "epoch": 0.6162845293058925, "grad_norm": 0.0, "learning_rate": 6.778281435604765e-06, "loss": 1.048, "step": 15751 }, { "epoch": 0.6163236559981219, "grad_norm": 0.0, "learning_rate": 6.777081797604786e-06, "loss": 1.0716, "step": 15752 }, { "epoch": 0.6163627826903514, "grad_norm": 0.0, "learning_rate": 6.775882211360835e-06, "loss": 0.8939, "step": 15753 }, { "epoch": 0.6164019093825808, "grad_norm": 0.0, "learning_rate": 6.774682676892175e-06, "loss": 1.0364, "step": 15754 }, { "epoch": 0.6164410360748103, "grad_norm": 0.0, "learning_rate": 6.773483194218065e-06, "loss": 1.0817, "step": 15755 }, { "epoch": 0.6164801627670397, "grad_norm": 0.0, "learning_rate": 6.7722837633577766e-06, "loss": 1.0536, "step": 15756 }, { "epoch": 0.6165192894592691, "grad_norm": 0.0, "learning_rate": 6.771084384330566e-06, "loss": 1.1309, "step": 15757 }, { "epoch": 0.6165584161514985, "grad_norm": 0.0, "learning_rate": 6.769885057155694e-06, "loss": 1.0394, "step": 15758 }, { "epoch": 0.616597542843728, "grad_norm": 0.0, "learning_rate": 6.768685781852418e-06, "loss": 1.1026, "step": 15759 }, { "epoch": 0.6166366695359574, "grad_norm": 0.0, "learning_rate": 6.767486558440002e-06, "loss": 1.0128, "step": 15760 }, { "epoch": 0.6166757962281869, "grad_norm": 0.0, "learning_rate": 6.7662873869377e-06, "loss": 0.959, "step": 15761 }, { "epoch": 0.6167149229204163, "grad_norm": 0.0, "learning_rate": 6.765088267364772e-06, "loss": 0.8943, "step": 15762 }, { "epoch": 0.6167540496126458, "grad_norm": 0.0, "learning_rate": 6.763889199740473e-06, "loss": 0.9699, "step": 15763 }, { "epoch": 0.6167931763048752, "grad_norm": 0.0, "learning_rate": 6.762690184084054e-06, "loss": 1.0674, "step": 15764 }, { "epoch": 0.6168323029971047, "grad_norm": 0.0, "learning_rate": 6.761491220414779e-06, "loss": 1.1023, "step": 15765 }, { "epoch": 0.6168714296893341, "grad_norm": 0.0, "learning_rate": 6.760292308751896e-06, "loss": 0.9729, "step": 15766 }, { "epoch": 0.6169105563815636, "grad_norm": 0.0, "learning_rate": 6.759093449114659e-06, "loss": 1.0169, "step": 15767 }, { "epoch": 0.6169496830737929, "grad_norm": 0.0, "learning_rate": 6.757894641522316e-06, "loss": 1.121, "step": 15768 }, { "epoch": 0.6169888097660223, "grad_norm": 0.0, "learning_rate": 6.756695885994126e-06, "loss": 0.9906, "step": 15769 }, { "epoch": 0.6170279364582518, "grad_norm": 0.0, "learning_rate": 6.755497182549337e-06, "loss": 1.0303, "step": 15770 }, { "epoch": 0.6170670631504812, "grad_norm": 0.0, "learning_rate": 6.7542985312071975e-06, "loss": 0.9244, "step": 15771 }, { "epoch": 0.6171061898427107, "grad_norm": 0.0, "learning_rate": 6.7530999319869565e-06, "loss": 1.0079, "step": 15772 }, { "epoch": 0.6171453165349401, "grad_norm": 0.0, "learning_rate": 6.75190138490786e-06, "loss": 0.9617, "step": 15773 }, { "epoch": 0.6171844432271696, "grad_norm": 0.0, "learning_rate": 6.750702889989156e-06, "loss": 1.019, "step": 15774 }, { "epoch": 0.617223569919399, "grad_norm": 0.0, "learning_rate": 6.749504447250095e-06, "loss": 0.9448, "step": 15775 }, { "epoch": 0.6172626966116285, "grad_norm": 0.0, "learning_rate": 6.748306056709919e-06, "loss": 1.0099, "step": 15776 }, { "epoch": 0.6173018233038579, "grad_norm": 0.0, "learning_rate": 6.747107718387872e-06, "loss": 1.0231, "step": 15777 }, { "epoch": 0.6173409499960874, "grad_norm": 0.0, "learning_rate": 6.7459094323032e-06, "loss": 1.0885, "step": 15778 }, { "epoch": 0.6173800766883167, "grad_norm": 0.0, "learning_rate": 6.744711198475143e-06, "loss": 0.9733, "step": 15779 }, { "epoch": 0.6174192033805462, "grad_norm": 0.0, "learning_rate": 6.743513016922948e-06, "loss": 1.0822, "step": 15780 }, { "epoch": 0.6174583300727756, "grad_norm": 0.0, "learning_rate": 6.742314887665853e-06, "loss": 0.9774, "step": 15781 }, { "epoch": 0.6174974567650051, "grad_norm": 0.0, "learning_rate": 6.741116810723096e-06, "loss": 1.006, "step": 15782 }, { "epoch": 0.6175365834572345, "grad_norm": 0.0, "learning_rate": 6.7399187861139215e-06, "loss": 1.1207, "step": 15783 }, { "epoch": 0.617575710149464, "grad_norm": 0.0, "learning_rate": 6.738720813857566e-06, "loss": 1.0378, "step": 15784 }, { "epoch": 0.6176148368416934, "grad_norm": 0.0, "learning_rate": 6.737522893973267e-06, "loss": 1.0463, "step": 15785 }, { "epoch": 0.6176539635339229, "grad_norm": 0.0, "learning_rate": 6.736325026480262e-06, "loss": 1.0749, "step": 15786 }, { "epoch": 0.6176930902261523, "grad_norm": 0.0, "learning_rate": 6.735127211397789e-06, "loss": 1.0132, "step": 15787 }, { "epoch": 0.6177322169183818, "grad_norm": 0.0, "learning_rate": 6.7339294487450825e-06, "loss": 0.9603, "step": 15788 }, { "epoch": 0.6177713436106111, "grad_norm": 0.0, "learning_rate": 6.732731738541375e-06, "loss": 1.1393, "step": 15789 }, { "epoch": 0.6178104703028406, "grad_norm": 0.0, "learning_rate": 6.7315340808059025e-06, "loss": 0.9539, "step": 15790 }, { "epoch": 0.61784959699507, "grad_norm": 0.0, "learning_rate": 6.730336475557892e-06, "loss": 0.9453, "step": 15791 }, { "epoch": 0.6178887236872995, "grad_norm": 0.0, "learning_rate": 6.729138922816587e-06, "loss": 1.0442, "step": 15792 }, { "epoch": 0.6179278503795289, "grad_norm": 0.0, "learning_rate": 6.72794142260121e-06, "loss": 1.0603, "step": 15793 }, { "epoch": 0.6179669770717584, "grad_norm": 0.0, "learning_rate": 6.726743974930995e-06, "loss": 1.0568, "step": 15794 }, { "epoch": 0.6180061037639878, "grad_norm": 0.0, "learning_rate": 6.725546579825165e-06, "loss": 0.9621, "step": 15795 }, { "epoch": 0.6180452304562172, "grad_norm": 0.0, "learning_rate": 6.724349237302958e-06, "loss": 1.046, "step": 15796 }, { "epoch": 0.6180843571484467, "grad_norm": 0.0, "learning_rate": 6.723151947383599e-06, "loss": 0.9601, "step": 15797 }, { "epoch": 0.6181234838406761, "grad_norm": 0.0, "learning_rate": 6.721954710086312e-06, "loss": 1.065, "step": 15798 }, { "epoch": 0.6181626105329056, "grad_norm": 0.0, "learning_rate": 6.720757525430326e-06, "loss": 0.9536, "step": 15799 }, { "epoch": 0.6182017372251349, "grad_norm": 0.0, "learning_rate": 6.71956039343486e-06, "loss": 1.1377, "step": 15800 }, { "epoch": 0.6182408639173644, "grad_norm": 0.0, "learning_rate": 6.7183633141191475e-06, "loss": 1.0309, "step": 15801 }, { "epoch": 0.6182799906095938, "grad_norm": 0.0, "learning_rate": 6.717166287502408e-06, "loss": 1.0656, "step": 15802 }, { "epoch": 0.6183191173018233, "grad_norm": 0.0, "learning_rate": 6.715969313603865e-06, "loss": 0.9034, "step": 15803 }, { "epoch": 0.6183582439940527, "grad_norm": 0.0, "learning_rate": 6.714772392442735e-06, "loss": 1.0241, "step": 15804 }, { "epoch": 0.6183973706862822, "grad_norm": 0.0, "learning_rate": 6.71357552403825e-06, "loss": 1.0395, "step": 15805 }, { "epoch": 0.6184364973785116, "grad_norm": 0.0, "learning_rate": 6.7123787084096215e-06, "loss": 1.0184, "step": 15806 }, { "epoch": 0.6184756240707411, "grad_norm": 0.0, "learning_rate": 6.711181945576071e-06, "loss": 1.1585, "step": 15807 }, { "epoch": 0.6185147507629705, "grad_norm": 0.0, "learning_rate": 6.709985235556819e-06, "loss": 1.0706, "step": 15808 }, { "epoch": 0.6185538774552, "grad_norm": 0.0, "learning_rate": 6.708788578371077e-06, "loss": 0.9945, "step": 15809 }, { "epoch": 0.6185930041474293, "grad_norm": 0.0, "learning_rate": 6.70759197403807e-06, "loss": 1.0538, "step": 15810 }, { "epoch": 0.6186321308396588, "grad_norm": 0.0, "learning_rate": 6.70639542257701e-06, "loss": 1.1129, "step": 15811 }, { "epoch": 0.6186712575318882, "grad_norm": 0.0, "learning_rate": 6.705198924007112e-06, "loss": 0.9729, "step": 15812 }, { "epoch": 0.6187103842241177, "grad_norm": 0.0, "learning_rate": 6.704002478347589e-06, "loss": 1.1283, "step": 15813 }, { "epoch": 0.6187495109163471, "grad_norm": 0.0, "learning_rate": 6.702806085617657e-06, "loss": 0.9905, "step": 15814 }, { "epoch": 0.6187886376085766, "grad_norm": 0.0, "learning_rate": 6.70160974583653e-06, "loss": 0.9376, "step": 15815 }, { "epoch": 0.618827764300806, "grad_norm": 0.0, "learning_rate": 6.700413459023416e-06, "loss": 0.9528, "step": 15816 }, { "epoch": 0.6188668909930355, "grad_norm": 0.0, "learning_rate": 6.699217225197527e-06, "loss": 1.0768, "step": 15817 }, { "epoch": 0.6189060176852649, "grad_norm": 0.0, "learning_rate": 6.698021044378071e-06, "loss": 0.9881, "step": 15818 }, { "epoch": 0.6189451443774944, "grad_norm": 0.0, "learning_rate": 6.696824916584262e-06, "loss": 1.0523, "step": 15819 }, { "epoch": 0.6189842710697238, "grad_norm": 0.0, "learning_rate": 6.6956288418353064e-06, "loss": 1.1032, "step": 15820 }, { "epoch": 0.6190233977619533, "grad_norm": 0.0, "learning_rate": 6.694432820150412e-06, "loss": 1.0618, "step": 15821 }, { "epoch": 0.6190625244541826, "grad_norm": 0.0, "learning_rate": 6.69323685154878e-06, "loss": 0.9218, "step": 15822 }, { "epoch": 0.6191016511464121, "grad_norm": 0.0, "learning_rate": 6.692040936049624e-06, "loss": 0.9838, "step": 15823 }, { "epoch": 0.6191407778386415, "grad_norm": 0.0, "learning_rate": 6.690845073672143e-06, "loss": 0.9908, "step": 15824 }, { "epoch": 0.6191799045308709, "grad_norm": 0.0, "learning_rate": 6.689649264435546e-06, "loss": 1.0477, "step": 15825 }, { "epoch": 0.6192190312231004, "grad_norm": 0.0, "learning_rate": 6.688453508359032e-06, "loss": 0.9762, "step": 15826 }, { "epoch": 0.6192581579153298, "grad_norm": 0.0, "learning_rate": 6.687257805461806e-06, "loss": 0.9655, "step": 15827 }, { "epoch": 0.6192972846075593, "grad_norm": 0.0, "learning_rate": 6.6860621557630685e-06, "loss": 0.9758, "step": 15828 }, { "epoch": 0.6193364112997887, "grad_norm": 0.0, "learning_rate": 6.684866559282022e-06, "loss": 0.948, "step": 15829 }, { "epoch": 0.6193755379920182, "grad_norm": 0.0, "learning_rate": 6.683671016037861e-06, "loss": 0.916, "step": 15830 }, { "epoch": 0.6194146646842476, "grad_norm": 0.0, "learning_rate": 6.682475526049787e-06, "loss": 1.0708, "step": 15831 }, { "epoch": 0.619453791376477, "grad_norm": 0.0, "learning_rate": 6.681280089337002e-06, "loss": 1.029, "step": 15832 }, { "epoch": 0.6194929180687064, "grad_norm": 0.0, "learning_rate": 6.6800847059187e-06, "loss": 1.0771, "step": 15833 }, { "epoch": 0.6195320447609359, "grad_norm": 0.0, "learning_rate": 6.678889375814077e-06, "loss": 0.9789, "step": 15834 }, { "epoch": 0.6195711714531653, "grad_norm": 0.0, "learning_rate": 6.6776940990423266e-06, "loss": 0.9743, "step": 15835 }, { "epoch": 0.6196102981453948, "grad_norm": 0.0, "learning_rate": 6.676498875622649e-06, "loss": 1.012, "step": 15836 }, { "epoch": 0.6196494248376242, "grad_norm": 0.0, "learning_rate": 6.675303705574235e-06, "loss": 1.0325, "step": 15837 }, { "epoch": 0.6196885515298537, "grad_norm": 0.0, "learning_rate": 6.6741085889162775e-06, "loss": 0.9851, "step": 15838 }, { "epoch": 0.6197276782220831, "grad_norm": 0.0, "learning_rate": 6.6729135256679676e-06, "loss": 1.0398, "step": 15839 }, { "epoch": 0.6197668049143126, "grad_norm": 0.0, "learning_rate": 6.6717185158484944e-06, "loss": 1.0474, "step": 15840 }, { "epoch": 0.619805931606542, "grad_norm": 0.0, "learning_rate": 6.670523559477055e-06, "loss": 1.0078, "step": 15841 }, { "epoch": 0.6198450582987715, "grad_norm": 0.0, "learning_rate": 6.669328656572835e-06, "loss": 1.0042, "step": 15842 }, { "epoch": 0.6198841849910008, "grad_norm": 0.0, "learning_rate": 6.668133807155024e-06, "loss": 1.0587, "step": 15843 }, { "epoch": 0.6199233116832303, "grad_norm": 0.0, "learning_rate": 6.6669390112428035e-06, "loss": 1.0992, "step": 15844 }, { "epoch": 0.6199624383754597, "grad_norm": 0.0, "learning_rate": 6.665744268855372e-06, "loss": 1.1153, "step": 15845 }, { "epoch": 0.6200015650676892, "grad_norm": 0.0, "learning_rate": 6.664549580011908e-06, "loss": 0.8049, "step": 15846 }, { "epoch": 0.6200406917599186, "grad_norm": 0.0, "learning_rate": 6.663354944731598e-06, "loss": 1.0272, "step": 15847 }, { "epoch": 0.6200798184521481, "grad_norm": 0.0, "learning_rate": 6.662160363033627e-06, "loss": 1.0092, "step": 15848 }, { "epoch": 0.6201189451443775, "grad_norm": 0.0, "learning_rate": 6.660965834937174e-06, "loss": 1.1431, "step": 15849 }, { "epoch": 0.620158071836607, "grad_norm": 0.0, "learning_rate": 6.659771360461429e-06, "loss": 0.9839, "step": 15850 }, { "epoch": 0.6201971985288364, "grad_norm": 0.0, "learning_rate": 6.6585769396255715e-06, "loss": 0.9809, "step": 15851 }, { "epoch": 0.6202363252210659, "grad_norm": 0.0, "learning_rate": 6.657382572448781e-06, "loss": 1.1047, "step": 15852 }, { "epoch": 0.6202754519132953, "grad_norm": 0.0, "learning_rate": 6.656188258950233e-06, "loss": 0.8454, "step": 15853 }, { "epoch": 0.6203145786055246, "grad_norm": 0.0, "learning_rate": 6.654993999149117e-06, "loss": 1.0455, "step": 15854 }, { "epoch": 0.6203537052977541, "grad_norm": 0.0, "learning_rate": 6.653799793064606e-06, "loss": 0.9623, "step": 15855 }, { "epoch": 0.6203928319899835, "grad_norm": 0.0, "learning_rate": 6.652605640715876e-06, "loss": 0.9433, "step": 15856 }, { "epoch": 0.620431958682213, "grad_norm": 0.0, "learning_rate": 6.651411542122105e-06, "loss": 0.9573, "step": 15857 }, { "epoch": 0.6204710853744424, "grad_norm": 0.0, "learning_rate": 6.650217497302465e-06, "loss": 1.0304, "step": 15858 }, { "epoch": 0.6205102120666719, "grad_norm": 0.0, "learning_rate": 6.6490235062761375e-06, "loss": 0.9356, "step": 15859 }, { "epoch": 0.6205493387589013, "grad_norm": 0.0, "learning_rate": 6.647829569062295e-06, "loss": 1.1093, "step": 15860 }, { "epoch": 0.6205884654511308, "grad_norm": 0.0, "learning_rate": 6.646635685680109e-06, "loss": 0.9265, "step": 15861 }, { "epoch": 0.6206275921433602, "grad_norm": 0.0, "learning_rate": 6.6454418561487485e-06, "loss": 0.8776, "step": 15862 }, { "epoch": 0.6206667188355897, "grad_norm": 0.0, "learning_rate": 6.64424808048739e-06, "loss": 1.1038, "step": 15863 }, { "epoch": 0.620705845527819, "grad_norm": 0.0, "learning_rate": 6.643054358715203e-06, "loss": 0.9614, "step": 15864 }, { "epoch": 0.6207449722200485, "grad_norm": 0.0, "learning_rate": 6.641860690851357e-06, "loss": 0.9657, "step": 15865 }, { "epoch": 0.6207840989122779, "grad_norm": 0.0, "learning_rate": 6.6406670769150186e-06, "loss": 1.0549, "step": 15866 }, { "epoch": 0.6208232256045074, "grad_norm": 0.0, "learning_rate": 6.639473516925359e-06, "loss": 0.92, "step": 15867 }, { "epoch": 0.6208623522967368, "grad_norm": 0.0, "learning_rate": 6.638280010901544e-06, "loss": 0.932, "step": 15868 }, { "epoch": 0.6209014789889663, "grad_norm": 0.0, "learning_rate": 6.637086558862738e-06, "loss": 1.0261, "step": 15869 }, { "epoch": 0.6209406056811957, "grad_norm": 0.0, "learning_rate": 6.6358931608281086e-06, "loss": 1.0906, "step": 15870 }, { "epoch": 0.6209797323734252, "grad_norm": 0.0, "learning_rate": 6.634699816816819e-06, "loss": 1.1072, "step": 15871 }, { "epoch": 0.6210188590656546, "grad_norm": 0.0, "learning_rate": 6.633506526848034e-06, "loss": 1.2038, "step": 15872 }, { "epoch": 0.6210579857578841, "grad_norm": 0.0, "learning_rate": 6.632313290940917e-06, "loss": 0.9836, "step": 15873 }, { "epoch": 0.6210971124501135, "grad_norm": 0.0, "learning_rate": 6.631120109114628e-06, "loss": 1.0056, "step": 15874 }, { "epoch": 0.621136239142343, "grad_norm": 0.0, "learning_rate": 6.629926981388325e-06, "loss": 0.9617, "step": 15875 }, { "epoch": 0.6211753658345723, "grad_norm": 0.0, "learning_rate": 6.628733907781175e-06, "loss": 1.0027, "step": 15876 }, { "epoch": 0.6212144925268018, "grad_norm": 0.0, "learning_rate": 6.627540888312335e-06, "loss": 1.0449, "step": 15877 }, { "epoch": 0.6212536192190312, "grad_norm": 0.0, "learning_rate": 6.626347923000962e-06, "loss": 0.9805, "step": 15878 }, { "epoch": 0.6212927459112607, "grad_norm": 0.0, "learning_rate": 6.625155011866213e-06, "loss": 1.0129, "step": 15879 }, { "epoch": 0.6213318726034901, "grad_norm": 0.0, "learning_rate": 6.623962154927243e-06, "loss": 0.993, "step": 15880 }, { "epoch": 0.6213709992957196, "grad_norm": 0.0, "learning_rate": 6.6227693522032135e-06, "loss": 0.9509, "step": 15881 }, { "epoch": 0.621410125987949, "grad_norm": 0.0, "learning_rate": 6.6215766037132765e-06, "loss": 0.9492, "step": 15882 }, { "epoch": 0.6214492526801784, "grad_norm": 0.0, "learning_rate": 6.620383909476587e-06, "loss": 0.8539, "step": 15883 }, { "epoch": 0.6214883793724079, "grad_norm": 0.0, "learning_rate": 6.61919126951229e-06, "loss": 1.1132, "step": 15884 }, { "epoch": 0.6215275060646372, "grad_norm": 0.0, "learning_rate": 6.6179986838395515e-06, "loss": 1.0205, "step": 15885 }, { "epoch": 0.6215666327568667, "grad_norm": 0.0, "learning_rate": 6.616806152477515e-06, "loss": 0.9329, "step": 15886 }, { "epoch": 0.6216057594490961, "grad_norm": 0.0, "learning_rate": 6.615613675445334e-06, "loss": 0.9807, "step": 15887 }, { "epoch": 0.6216448861413256, "grad_norm": 0.0, "learning_rate": 6.614421252762155e-06, "loss": 1.0249, "step": 15888 }, { "epoch": 0.621684012833555, "grad_norm": 0.0, "learning_rate": 6.613228884447125e-06, "loss": 0.9524, "step": 15889 }, { "epoch": 0.6217231395257845, "grad_norm": 0.0, "learning_rate": 6.612036570519398e-06, "loss": 0.965, "step": 15890 }, { "epoch": 0.6217622662180139, "grad_norm": 0.0, "learning_rate": 6.610844310998119e-06, "loss": 1.1245, "step": 15891 }, { "epoch": 0.6218013929102434, "grad_norm": 0.0, "learning_rate": 6.609652105902433e-06, "loss": 1.2169, "step": 15892 }, { "epoch": 0.6218405196024728, "grad_norm": 0.0, "learning_rate": 6.608459955251482e-06, "loss": 1.0087, "step": 15893 }, { "epoch": 0.6218796462947023, "grad_norm": 0.0, "learning_rate": 6.60726785906442e-06, "loss": 1.0652, "step": 15894 }, { "epoch": 0.6219187729869317, "grad_norm": 0.0, "learning_rate": 6.6060758173603846e-06, "loss": 1.0626, "step": 15895 }, { "epoch": 0.6219578996791612, "grad_norm": 0.0, "learning_rate": 6.604883830158517e-06, "loss": 1.0342, "step": 15896 }, { "epoch": 0.6219970263713905, "grad_norm": 0.0, "learning_rate": 6.603691897477962e-06, "loss": 0.9667, "step": 15897 }, { "epoch": 0.62203615306362, "grad_norm": 0.0, "learning_rate": 6.602500019337854e-06, "loss": 1.134, "step": 15898 }, { "epoch": 0.6220752797558494, "grad_norm": 0.0, "learning_rate": 6.601308195757343e-06, "loss": 1.1468, "step": 15899 }, { "epoch": 0.6221144064480789, "grad_norm": 0.0, "learning_rate": 6.600116426755565e-06, "loss": 0.9979, "step": 15900 }, { "epoch": 0.6221535331403083, "grad_norm": 0.0, "learning_rate": 6.598924712351655e-06, "loss": 0.9857, "step": 15901 }, { "epoch": 0.6221926598325378, "grad_norm": 0.0, "learning_rate": 6.5977330525647495e-06, "loss": 1.0133, "step": 15902 }, { "epoch": 0.6222317865247672, "grad_norm": 0.0, "learning_rate": 6.596541447413991e-06, "loss": 1.155, "step": 15903 }, { "epoch": 0.6222709132169967, "grad_norm": 0.0, "learning_rate": 6.595349896918512e-06, "loss": 1.0346, "step": 15904 }, { "epoch": 0.6223100399092261, "grad_norm": 0.0, "learning_rate": 6.594158401097449e-06, "loss": 1.0177, "step": 15905 }, { "epoch": 0.6223491666014556, "grad_norm": 0.0, "learning_rate": 6.592966959969933e-06, "loss": 0.9808, "step": 15906 }, { "epoch": 0.622388293293685, "grad_norm": 0.0, "learning_rate": 6.591775573555097e-06, "loss": 0.9026, "step": 15907 }, { "epoch": 0.6224274199859144, "grad_norm": 0.0, "learning_rate": 6.590584241872075e-06, "loss": 1.103, "step": 15908 }, { "epoch": 0.6224665466781438, "grad_norm": 0.0, "learning_rate": 6.589392964939998e-06, "loss": 1.0646, "step": 15909 }, { "epoch": 0.6225056733703732, "grad_norm": 0.0, "learning_rate": 6.588201742777998e-06, "loss": 1.0456, "step": 15910 }, { "epoch": 0.6225448000626027, "grad_norm": 0.0, "learning_rate": 6.587010575405199e-06, "loss": 1.0159, "step": 15911 }, { "epoch": 0.6225839267548321, "grad_norm": 0.0, "learning_rate": 6.585819462840737e-06, "loss": 0.9175, "step": 15912 }, { "epoch": 0.6226230534470616, "grad_norm": 0.0, "learning_rate": 6.584628405103737e-06, "loss": 0.9226, "step": 15913 }, { "epoch": 0.622662180139291, "grad_norm": 0.0, "learning_rate": 6.5834374022133205e-06, "loss": 0.8837, "step": 15914 }, { "epoch": 0.6227013068315205, "grad_norm": 0.0, "learning_rate": 6.582246454188621e-06, "loss": 1.0241, "step": 15915 }, { "epoch": 0.6227404335237499, "grad_norm": 0.0, "learning_rate": 6.58105556104876e-06, "loss": 1.0451, "step": 15916 }, { "epoch": 0.6227795602159794, "grad_norm": 0.0, "learning_rate": 6.579864722812863e-06, "loss": 1.0115, "step": 15917 }, { "epoch": 0.6228186869082087, "grad_norm": 0.0, "learning_rate": 6.578673939500054e-06, "loss": 0.9978, "step": 15918 }, { "epoch": 0.6228578136004382, "grad_norm": 0.0, "learning_rate": 6.5774832111294515e-06, "loss": 1.0499, "step": 15919 }, { "epoch": 0.6228969402926676, "grad_norm": 0.0, "learning_rate": 6.576292537720182e-06, "loss": 1.1064, "step": 15920 }, { "epoch": 0.6229360669848971, "grad_norm": 0.0, "learning_rate": 6.575101919291365e-06, "loss": 0.9352, "step": 15921 }, { "epoch": 0.6229751936771265, "grad_norm": 0.0, "learning_rate": 6.573911355862118e-06, "loss": 0.9856, "step": 15922 }, { "epoch": 0.623014320369356, "grad_norm": 0.0, "learning_rate": 6.572720847451564e-06, "loss": 0.989, "step": 15923 }, { "epoch": 0.6230534470615854, "grad_norm": 0.0, "learning_rate": 6.571530394078813e-06, "loss": 1.1107, "step": 15924 }, { "epoch": 0.6230925737538149, "grad_norm": 0.0, "learning_rate": 6.570339995762991e-06, "loss": 0.9074, "step": 15925 }, { "epoch": 0.6231317004460443, "grad_norm": 0.0, "learning_rate": 6.569149652523213e-06, "loss": 0.9539, "step": 15926 }, { "epoch": 0.6231708271382738, "grad_norm": 0.0, "learning_rate": 6.567959364378591e-06, "loss": 1.0271, "step": 15927 }, { "epoch": 0.6232099538305031, "grad_norm": 0.0, "learning_rate": 6.566769131348242e-06, "loss": 1.0583, "step": 15928 }, { "epoch": 0.6232490805227326, "grad_norm": 0.0, "learning_rate": 6.565578953451273e-06, "loss": 1.046, "step": 15929 }, { "epoch": 0.623288207214962, "grad_norm": 0.0, "learning_rate": 6.56438883070681e-06, "loss": 0.9151, "step": 15930 }, { "epoch": 0.6233273339071915, "grad_norm": 0.0, "learning_rate": 6.563198763133953e-06, "loss": 0.9921, "step": 15931 }, { "epoch": 0.6233664605994209, "grad_norm": 0.0, "learning_rate": 6.56200875075182e-06, "loss": 0.9866, "step": 15932 }, { "epoch": 0.6234055872916504, "grad_norm": 0.0, "learning_rate": 6.5608187935795135e-06, "loss": 0.9267, "step": 15933 }, { "epoch": 0.6234447139838798, "grad_norm": 0.0, "learning_rate": 6.559628891636152e-06, "loss": 1.04, "step": 15934 }, { "epoch": 0.6234838406761093, "grad_norm": 0.0, "learning_rate": 6.55843904494084e-06, "loss": 0.9543, "step": 15935 }, { "epoch": 0.6235229673683387, "grad_norm": 0.0, "learning_rate": 6.557249253512683e-06, "loss": 1.0098, "step": 15936 }, { "epoch": 0.6235620940605682, "grad_norm": 0.0, "learning_rate": 6.55605951737079e-06, "loss": 0.9964, "step": 15937 }, { "epoch": 0.6236012207527976, "grad_norm": 0.0, "learning_rate": 6.554869836534261e-06, "loss": 0.9254, "step": 15938 }, { "epoch": 0.6236403474450269, "grad_norm": 0.0, "learning_rate": 6.553680211022211e-06, "loss": 1.0323, "step": 15939 }, { "epoch": 0.6236794741372564, "grad_norm": 0.0, "learning_rate": 6.552490640853737e-06, "loss": 0.9607, "step": 15940 }, { "epoch": 0.6237186008294858, "grad_norm": 0.0, "learning_rate": 6.551301126047943e-06, "loss": 0.974, "step": 15941 }, { "epoch": 0.6237577275217153, "grad_norm": 0.0, "learning_rate": 6.550111666623929e-06, "loss": 1.0066, "step": 15942 }, { "epoch": 0.6237968542139447, "grad_norm": 0.0, "learning_rate": 6.548922262600801e-06, "loss": 1.1016, "step": 15943 }, { "epoch": 0.6238359809061742, "grad_norm": 0.0, "learning_rate": 6.547732913997658e-06, "loss": 1.0617, "step": 15944 }, { "epoch": 0.6238751075984036, "grad_norm": 0.0, "learning_rate": 6.546543620833598e-06, "loss": 1.0399, "step": 15945 }, { "epoch": 0.6239142342906331, "grad_norm": 0.0, "learning_rate": 6.54535438312772e-06, "loss": 1.0532, "step": 15946 }, { "epoch": 0.6239533609828625, "grad_norm": 0.0, "learning_rate": 6.544165200899118e-06, "loss": 0.9602, "step": 15947 }, { "epoch": 0.623992487675092, "grad_norm": 0.0, "learning_rate": 6.542976074166896e-06, "loss": 1.0429, "step": 15948 }, { "epoch": 0.6240316143673214, "grad_norm": 0.0, "learning_rate": 6.541787002950146e-06, "loss": 1.0681, "step": 15949 }, { "epoch": 0.6240707410595508, "grad_norm": 0.0, "learning_rate": 6.540597987267965e-06, "loss": 1.0444, "step": 15950 }, { "epoch": 0.6241098677517802, "grad_norm": 0.0, "learning_rate": 6.539409027139443e-06, "loss": 1.0644, "step": 15951 }, { "epoch": 0.6241489944440097, "grad_norm": 0.0, "learning_rate": 6.538220122583674e-06, "loss": 1.0091, "step": 15952 }, { "epoch": 0.6241881211362391, "grad_norm": 0.0, "learning_rate": 6.537031273619756e-06, "loss": 0.9958, "step": 15953 }, { "epoch": 0.6242272478284686, "grad_norm": 0.0, "learning_rate": 6.535842480266776e-06, "loss": 1.0227, "step": 15954 }, { "epoch": 0.624266374520698, "grad_norm": 0.0, "learning_rate": 6.5346537425438235e-06, "loss": 1.0922, "step": 15955 }, { "epoch": 0.6243055012129275, "grad_norm": 0.0, "learning_rate": 6.533465060469989e-06, "loss": 1.0261, "step": 15956 }, { "epoch": 0.6243446279051569, "grad_norm": 0.0, "learning_rate": 6.532276434064364e-06, "loss": 1.0559, "step": 15957 }, { "epoch": 0.6243837545973864, "grad_norm": 0.0, "learning_rate": 6.5310878633460305e-06, "loss": 1.0176, "step": 15958 }, { "epoch": 0.6244228812896158, "grad_norm": 0.0, "learning_rate": 6.529899348334083e-06, "loss": 1.035, "step": 15959 }, { "epoch": 0.6244620079818453, "grad_norm": 0.0, "learning_rate": 6.5287108890476006e-06, "loss": 0.9873, "step": 15960 }, { "epoch": 0.6245011346740746, "grad_norm": 0.0, "learning_rate": 6.527522485505673e-06, "loss": 0.9144, "step": 15961 }, { "epoch": 0.6245402613663041, "grad_norm": 0.0, "learning_rate": 6.526334137727384e-06, "loss": 0.9499, "step": 15962 }, { "epoch": 0.6245793880585335, "grad_norm": 0.0, "learning_rate": 6.525145845731816e-06, "loss": 1.0219, "step": 15963 }, { "epoch": 0.624618514750763, "grad_norm": 0.0, "learning_rate": 6.523957609538049e-06, "loss": 1.0253, "step": 15964 }, { "epoch": 0.6246576414429924, "grad_norm": 0.0, "learning_rate": 6.522769429165168e-06, "loss": 1.0414, "step": 15965 }, { "epoch": 0.6246967681352219, "grad_norm": 0.0, "learning_rate": 6.521581304632254e-06, "loss": 1.0703, "step": 15966 }, { "epoch": 0.6247358948274513, "grad_norm": 0.0, "learning_rate": 6.520393235958387e-06, "loss": 1.0316, "step": 15967 }, { "epoch": 0.6247750215196807, "grad_norm": 0.0, "learning_rate": 6.5192052231626415e-06, "loss": 0.9325, "step": 15968 }, { "epoch": 0.6248141482119102, "grad_norm": 0.0, "learning_rate": 6.518017266264096e-06, "loss": 1.0509, "step": 15969 }, { "epoch": 0.6248532749041396, "grad_norm": 0.0, "learning_rate": 6.516829365281834e-06, "loss": 0.992, "step": 15970 }, { "epoch": 0.624892401596369, "grad_norm": 0.0, "learning_rate": 6.515641520234928e-06, "loss": 1.0055, "step": 15971 }, { "epoch": 0.6249315282885984, "grad_norm": 0.0, "learning_rate": 6.514453731142454e-06, "loss": 0.9911, "step": 15972 }, { "epoch": 0.6249706549808279, "grad_norm": 0.0, "learning_rate": 6.513265998023484e-06, "loss": 1.0225, "step": 15973 }, { "epoch": 0.6250097816730573, "grad_norm": 0.0, "learning_rate": 6.512078320897088e-06, "loss": 0.9539, "step": 15974 }, { "epoch": 0.6250489083652868, "grad_norm": 0.0, "learning_rate": 6.510890699782349e-06, "loss": 0.9781, "step": 15975 }, { "epoch": 0.6250880350575162, "grad_norm": 0.0, "learning_rate": 6.509703134698333e-06, "loss": 0.9384, "step": 15976 }, { "epoch": 0.6251271617497457, "grad_norm": 0.0, "learning_rate": 6.508515625664111e-06, "loss": 1.0887, "step": 15977 }, { "epoch": 0.6251662884419751, "grad_norm": 0.0, "learning_rate": 6.50732817269875e-06, "loss": 0.9521, "step": 15978 }, { "epoch": 0.6252054151342046, "grad_norm": 0.0, "learning_rate": 6.506140775821326e-06, "loss": 1.0176, "step": 15979 }, { "epoch": 0.625244541826434, "grad_norm": 0.0, "learning_rate": 6.504953435050902e-06, "loss": 1.0703, "step": 15980 }, { "epoch": 0.6252836685186635, "grad_norm": 0.0, "learning_rate": 6.503766150406545e-06, "loss": 1.0203, "step": 15981 }, { "epoch": 0.6253227952108928, "grad_norm": 0.0, "learning_rate": 6.502578921907321e-06, "loss": 0.9147, "step": 15982 }, { "epoch": 0.6253619219031223, "grad_norm": 0.0, "learning_rate": 6.5013917495723e-06, "loss": 1.0882, "step": 15983 }, { "epoch": 0.6254010485953517, "grad_norm": 0.0, "learning_rate": 6.500204633420544e-06, "loss": 0.9211, "step": 15984 }, { "epoch": 0.6254401752875812, "grad_norm": 0.0, "learning_rate": 6.4990175734711156e-06, "loss": 1.0219, "step": 15985 }, { "epoch": 0.6254793019798106, "grad_norm": 0.0, "learning_rate": 6.4978305697430775e-06, "loss": 1.0114, "step": 15986 }, { "epoch": 0.6255184286720401, "grad_norm": 0.0, "learning_rate": 6.496643622255489e-06, "loss": 0.9694, "step": 15987 }, { "epoch": 0.6255575553642695, "grad_norm": 0.0, "learning_rate": 6.495456731027417e-06, "loss": 0.9257, "step": 15988 }, { "epoch": 0.625596682056499, "grad_norm": 0.0, "learning_rate": 6.494269896077919e-06, "loss": 1.1044, "step": 15989 }, { "epoch": 0.6256358087487284, "grad_norm": 0.0, "learning_rate": 6.4930831174260535e-06, "loss": 0.9136, "step": 15990 }, { "epoch": 0.6256749354409579, "grad_norm": 0.0, "learning_rate": 6.491896395090875e-06, "loss": 1.0498, "step": 15991 }, { "epoch": 0.6257140621331873, "grad_norm": 0.0, "learning_rate": 6.490709729091449e-06, "loss": 0.9413, "step": 15992 }, { "epoch": 0.6257531888254168, "grad_norm": 0.0, "learning_rate": 6.489523119446826e-06, "loss": 1.0186, "step": 15993 }, { "epoch": 0.6257923155176461, "grad_norm": 0.0, "learning_rate": 6.4883365661760635e-06, "loss": 0.836, "step": 15994 }, { "epoch": 0.6258314422098756, "grad_norm": 0.0, "learning_rate": 6.487150069298216e-06, "loss": 1.0198, "step": 15995 }, { "epoch": 0.625870568902105, "grad_norm": 0.0, "learning_rate": 6.4859636288323336e-06, "loss": 1.1603, "step": 15996 }, { "epoch": 0.6259096955943344, "grad_norm": 0.0, "learning_rate": 6.484777244797472e-06, "loss": 0.967, "step": 15997 }, { "epoch": 0.6259488222865639, "grad_norm": 0.0, "learning_rate": 6.483590917212686e-06, "loss": 0.8976, "step": 15998 }, { "epoch": 0.6259879489787933, "grad_norm": 0.0, "learning_rate": 6.482404646097023e-06, "loss": 0.9888, "step": 15999 }, { "epoch": 0.6260270756710228, "grad_norm": 0.0, "learning_rate": 6.48121843146953e-06, "loss": 0.9592, "step": 16000 }, { "epoch": 0.6260662023632522, "grad_norm": 0.0, "learning_rate": 6.480032273349264e-06, "loss": 1.004, "step": 16001 }, { "epoch": 0.6261053290554817, "grad_norm": 0.0, "learning_rate": 6.478846171755268e-06, "loss": 1.016, "step": 16002 }, { "epoch": 0.626144455747711, "grad_norm": 0.0, "learning_rate": 6.477660126706588e-06, "loss": 0.9677, "step": 16003 }, { "epoch": 0.6261835824399405, "grad_norm": 0.0, "learning_rate": 6.476474138222275e-06, "loss": 1.0622, "step": 16004 }, { "epoch": 0.6262227091321699, "grad_norm": 0.0, "learning_rate": 6.4752882063213705e-06, "loss": 1.0114, "step": 16005 }, { "epoch": 0.6262618358243994, "grad_norm": 0.0, "learning_rate": 6.474102331022922e-06, "loss": 1.1185, "step": 16006 }, { "epoch": 0.6263009625166288, "grad_norm": 0.0, "learning_rate": 6.472916512345972e-06, "loss": 0.9285, "step": 16007 }, { "epoch": 0.6263400892088583, "grad_norm": 0.0, "learning_rate": 6.471730750309564e-06, "loss": 1.1323, "step": 16008 }, { "epoch": 0.6263792159010877, "grad_norm": 0.0, "learning_rate": 6.470545044932734e-06, "loss": 0.9116, "step": 16009 }, { "epoch": 0.6264183425933172, "grad_norm": 0.0, "learning_rate": 6.4693593962345315e-06, "loss": 1.0142, "step": 16010 }, { "epoch": 0.6264574692855466, "grad_norm": 0.0, "learning_rate": 6.468173804233993e-06, "loss": 0.8548, "step": 16011 }, { "epoch": 0.6264965959777761, "grad_norm": 0.0, "learning_rate": 6.466988268950159e-06, "loss": 0.9907, "step": 16012 }, { "epoch": 0.6265357226700055, "grad_norm": 0.0, "learning_rate": 6.465802790402065e-06, "loss": 0.872, "step": 16013 }, { "epoch": 0.626574849362235, "grad_norm": 0.0, "learning_rate": 6.464617368608747e-06, "loss": 0.9715, "step": 16014 }, { "epoch": 0.6266139760544643, "grad_norm": 0.0, "learning_rate": 6.463432003589247e-06, "loss": 1.0211, "step": 16015 }, { "epoch": 0.6266531027466938, "grad_norm": 0.0, "learning_rate": 6.462246695362597e-06, "loss": 0.9928, "step": 16016 }, { "epoch": 0.6266922294389232, "grad_norm": 0.0, "learning_rate": 6.461061443947832e-06, "loss": 1.0953, "step": 16017 }, { "epoch": 0.6267313561311527, "grad_norm": 0.0, "learning_rate": 6.459876249363983e-06, "loss": 0.9517, "step": 16018 }, { "epoch": 0.6267704828233821, "grad_norm": 0.0, "learning_rate": 6.4586911116300885e-06, "loss": 1.0454, "step": 16019 }, { "epoch": 0.6268096095156116, "grad_norm": 0.0, "learning_rate": 6.457506030765177e-06, "loss": 1.1168, "step": 16020 }, { "epoch": 0.626848736207841, "grad_norm": 0.0, "learning_rate": 6.45632100678828e-06, "loss": 1.0807, "step": 16021 }, { "epoch": 0.6268878629000705, "grad_norm": 0.0, "learning_rate": 6.455136039718428e-06, "loss": 1.0438, "step": 16022 }, { "epoch": 0.6269269895922999, "grad_norm": 0.0, "learning_rate": 6.453951129574644e-06, "loss": 0.9792, "step": 16023 }, { "epoch": 0.6269661162845293, "grad_norm": 0.0, "learning_rate": 6.452766276375966e-06, "loss": 0.954, "step": 16024 }, { "epoch": 0.6270052429767587, "grad_norm": 0.0, "learning_rate": 6.4515814801414155e-06, "loss": 0.9794, "step": 16025 }, { "epoch": 0.6270443696689881, "grad_norm": 0.0, "learning_rate": 6.450396740890021e-06, "loss": 1.0998, "step": 16026 }, { "epoch": 0.6270834963612176, "grad_norm": 0.0, "learning_rate": 6.449212058640804e-06, "loss": 0.988, "step": 16027 }, { "epoch": 0.627122623053447, "grad_norm": 0.0, "learning_rate": 6.448027433412794e-06, "loss": 0.9595, "step": 16028 }, { "epoch": 0.6271617497456765, "grad_norm": 0.0, "learning_rate": 6.446842865225014e-06, "loss": 0.9421, "step": 16029 }, { "epoch": 0.6272008764379059, "grad_norm": 0.0, "learning_rate": 6.445658354096484e-06, "loss": 1.0643, "step": 16030 }, { "epoch": 0.6272400031301354, "grad_norm": 0.0, "learning_rate": 6.444473900046229e-06, "loss": 1.0347, "step": 16031 }, { "epoch": 0.6272791298223648, "grad_norm": 0.0, "learning_rate": 6.443289503093262e-06, "loss": 1.0892, "step": 16032 }, { "epoch": 0.6273182565145943, "grad_norm": 0.0, "learning_rate": 6.442105163256614e-06, "loss": 0.9782, "step": 16033 }, { "epoch": 0.6273573832068237, "grad_norm": 0.0, "learning_rate": 6.440920880555297e-06, "loss": 1.0042, "step": 16034 }, { "epoch": 0.6273965098990532, "grad_norm": 0.0, "learning_rate": 6.4397366550083314e-06, "loss": 1.155, "step": 16035 }, { "epoch": 0.6274356365912825, "grad_norm": 0.0, "learning_rate": 6.43855248663473e-06, "loss": 1.0143, "step": 16036 }, { "epoch": 0.627474763283512, "grad_norm": 0.0, "learning_rate": 6.437368375453517e-06, "loss": 0.9019, "step": 16037 }, { "epoch": 0.6275138899757414, "grad_norm": 0.0, "learning_rate": 6.436184321483705e-06, "loss": 1.0129, "step": 16038 }, { "epoch": 0.6275530166679709, "grad_norm": 0.0, "learning_rate": 6.435000324744306e-06, "loss": 0.9596, "step": 16039 }, { "epoch": 0.6275921433602003, "grad_norm": 0.0, "learning_rate": 6.433816385254332e-06, "loss": 0.9971, "step": 16040 }, { "epoch": 0.6276312700524298, "grad_norm": 0.0, "learning_rate": 6.4326325030328e-06, "loss": 0.9771, "step": 16041 }, { "epoch": 0.6276703967446592, "grad_norm": 0.0, "learning_rate": 6.43144867809872e-06, "loss": 0.9852, "step": 16042 }, { "epoch": 0.6277095234368887, "grad_norm": 0.0, "learning_rate": 6.430264910471104e-06, "loss": 1.1017, "step": 16043 }, { "epoch": 0.6277486501291181, "grad_norm": 0.0, "learning_rate": 6.429081200168958e-06, "loss": 1.0986, "step": 16044 }, { "epoch": 0.6277877768213476, "grad_norm": 0.0, "learning_rate": 6.4278975472112945e-06, "loss": 0.9375, "step": 16045 }, { "epoch": 0.627826903513577, "grad_norm": 0.0, "learning_rate": 6.42671395161712e-06, "loss": 1.0563, "step": 16046 }, { "epoch": 0.6278660302058064, "grad_norm": 0.0, "learning_rate": 6.425530413405443e-06, "loss": 0.9373, "step": 16047 }, { "epoch": 0.6279051568980358, "grad_norm": 0.0, "learning_rate": 6.424346932595267e-06, "loss": 1.061, "step": 16048 }, { "epoch": 0.6279442835902653, "grad_norm": 0.0, "learning_rate": 6.423163509205596e-06, "loss": 0.9518, "step": 16049 }, { "epoch": 0.6279834102824947, "grad_norm": 0.0, "learning_rate": 6.421980143255442e-06, "loss": 1.0548, "step": 16050 }, { "epoch": 0.6280225369747242, "grad_norm": 0.0, "learning_rate": 6.420796834763802e-06, "loss": 1.0944, "step": 16051 }, { "epoch": 0.6280616636669536, "grad_norm": 0.0, "learning_rate": 6.419613583749679e-06, "loss": 1.0241, "step": 16052 }, { "epoch": 0.628100790359183, "grad_norm": 0.0, "learning_rate": 6.418430390232076e-06, "loss": 1.0797, "step": 16053 }, { "epoch": 0.6281399170514125, "grad_norm": 0.0, "learning_rate": 6.4172472542299905e-06, "loss": 1.0175, "step": 16054 }, { "epoch": 0.6281790437436419, "grad_norm": 0.0, "learning_rate": 6.416064175762428e-06, "loss": 0.9451, "step": 16055 }, { "epoch": 0.6282181704358714, "grad_norm": 0.0, "learning_rate": 6.414881154848383e-06, "loss": 0.9606, "step": 16056 }, { "epoch": 0.6282572971281007, "grad_norm": 0.0, "learning_rate": 6.413698191506855e-06, "loss": 1.0303, "step": 16057 }, { "epoch": 0.6282964238203302, "grad_norm": 0.0, "learning_rate": 6.412515285756836e-06, "loss": 1.0866, "step": 16058 }, { "epoch": 0.6283355505125596, "grad_norm": 0.0, "learning_rate": 6.41133243761733e-06, "loss": 1.0291, "step": 16059 }, { "epoch": 0.6283746772047891, "grad_norm": 0.0, "learning_rate": 6.4101496471073284e-06, "loss": 1.0421, "step": 16060 }, { "epoch": 0.6284138038970185, "grad_norm": 0.0, "learning_rate": 6.408966914245824e-06, "loss": 0.965, "step": 16061 }, { "epoch": 0.628452930589248, "grad_norm": 0.0, "learning_rate": 6.407784239051813e-06, "loss": 0.9789, "step": 16062 }, { "epoch": 0.6284920572814774, "grad_norm": 0.0, "learning_rate": 6.406601621544282e-06, "loss": 1.0038, "step": 16063 }, { "epoch": 0.6285311839737069, "grad_norm": 0.0, "learning_rate": 6.405419061742229e-06, "loss": 1.0609, "step": 16064 }, { "epoch": 0.6285703106659363, "grad_norm": 0.0, "learning_rate": 6.40423655966464e-06, "loss": 0.9911, "step": 16065 }, { "epoch": 0.6286094373581658, "grad_norm": 0.0, "learning_rate": 6.403054115330509e-06, "loss": 1.0623, "step": 16066 }, { "epoch": 0.6286485640503952, "grad_norm": 0.0, "learning_rate": 6.401871728758816e-06, "loss": 1.0312, "step": 16067 }, { "epoch": 0.6286876907426246, "grad_norm": 0.0, "learning_rate": 6.400689399968559e-06, "loss": 1.0137, "step": 16068 }, { "epoch": 0.628726817434854, "grad_norm": 0.0, "learning_rate": 6.39950712897872e-06, "loss": 0.965, "step": 16069 }, { "epoch": 0.6287659441270835, "grad_norm": 0.0, "learning_rate": 6.398324915808285e-06, "loss": 0.9839, "step": 16070 }, { "epoch": 0.6288050708193129, "grad_norm": 0.0, "learning_rate": 6.397142760476239e-06, "loss": 0.9818, "step": 16071 }, { "epoch": 0.6288441975115424, "grad_norm": 0.0, "learning_rate": 6.395960663001561e-06, "loss": 1.0377, "step": 16072 }, { "epoch": 0.6288833242037718, "grad_norm": 0.0, "learning_rate": 6.394778623403241e-06, "loss": 0.9784, "step": 16073 }, { "epoch": 0.6289224508960013, "grad_norm": 0.0, "learning_rate": 6.393596641700262e-06, "loss": 1.0571, "step": 16074 }, { "epoch": 0.6289615775882307, "grad_norm": 0.0, "learning_rate": 6.392414717911599e-06, "loss": 1.0916, "step": 16075 }, { "epoch": 0.6290007042804602, "grad_norm": 0.0, "learning_rate": 6.391232852056233e-06, "loss": 1.046, "step": 16076 }, { "epoch": 0.6290398309726896, "grad_norm": 0.0, "learning_rate": 6.390051044153149e-06, "loss": 0.9133, "step": 16077 }, { "epoch": 0.6290789576649191, "grad_norm": 0.0, "learning_rate": 6.388869294221322e-06, "loss": 1.0619, "step": 16078 }, { "epoch": 0.6291180843571484, "grad_norm": 0.0, "learning_rate": 6.387687602279729e-06, "loss": 0.9836, "step": 16079 }, { "epoch": 0.6291572110493779, "grad_norm": 0.0, "learning_rate": 6.3865059683473456e-06, "loss": 0.9947, "step": 16080 }, { "epoch": 0.6291963377416073, "grad_norm": 0.0, "learning_rate": 6.385324392443144e-06, "loss": 1.0653, "step": 16081 }, { "epoch": 0.6292354644338367, "grad_norm": 0.0, "learning_rate": 6.384142874586108e-06, "loss": 0.9482, "step": 16082 }, { "epoch": 0.6292745911260662, "grad_norm": 0.0, "learning_rate": 6.382961414795208e-06, "loss": 1.0212, "step": 16083 }, { "epoch": 0.6293137178182956, "grad_norm": 0.0, "learning_rate": 6.3817800130894135e-06, "loss": 1.0184, "step": 16084 }, { "epoch": 0.6293528445105251, "grad_norm": 0.0, "learning_rate": 6.380598669487697e-06, "loss": 1.0645, "step": 16085 }, { "epoch": 0.6293919712027545, "grad_norm": 0.0, "learning_rate": 6.379417384009031e-06, "loss": 1.0189, "step": 16086 }, { "epoch": 0.629431097894984, "grad_norm": 0.0, "learning_rate": 6.378236156672384e-06, "loss": 0.9919, "step": 16087 }, { "epoch": 0.6294702245872134, "grad_norm": 0.0, "learning_rate": 6.3770549874967295e-06, "loss": 0.9966, "step": 16088 }, { "epoch": 0.6295093512794429, "grad_norm": 0.0, "learning_rate": 6.37587387650103e-06, "loss": 0.8989, "step": 16089 }, { "epoch": 0.6295484779716722, "grad_norm": 0.0, "learning_rate": 6.374692823704254e-06, "loss": 0.9993, "step": 16090 }, { "epoch": 0.6295876046639017, "grad_norm": 0.0, "learning_rate": 6.37351182912537e-06, "loss": 0.9843, "step": 16091 }, { "epoch": 0.6296267313561311, "grad_norm": 0.0, "learning_rate": 6.372330892783338e-06, "loss": 1.0389, "step": 16092 }, { "epoch": 0.6296658580483606, "grad_norm": 0.0, "learning_rate": 6.37115001469713e-06, "loss": 1.0863, "step": 16093 }, { "epoch": 0.62970498474059, "grad_norm": 0.0, "learning_rate": 6.369969194885703e-06, "loss": 1.0067, "step": 16094 }, { "epoch": 0.6297441114328195, "grad_norm": 0.0, "learning_rate": 6.3687884333680246e-06, "loss": 1.1848, "step": 16095 }, { "epoch": 0.6297832381250489, "grad_norm": 0.0, "learning_rate": 6.367607730163054e-06, "loss": 0.9759, "step": 16096 }, { "epoch": 0.6298223648172784, "grad_norm": 0.0, "learning_rate": 6.366427085289752e-06, "loss": 1.117, "step": 16097 }, { "epoch": 0.6298614915095078, "grad_norm": 0.0, "learning_rate": 6.3652464987670726e-06, "loss": 1.0203, "step": 16098 }, { "epoch": 0.6299006182017373, "grad_norm": 0.0, "learning_rate": 6.364065970613984e-06, "loss": 0.9505, "step": 16099 }, { "epoch": 0.6299397448939666, "grad_norm": 0.0, "learning_rate": 6.36288550084944e-06, "loss": 1.0341, "step": 16100 }, { "epoch": 0.6299788715861961, "grad_norm": 0.0, "learning_rate": 6.361705089492398e-06, "loss": 1.0213, "step": 16101 }, { "epoch": 0.6300179982784255, "grad_norm": 0.0, "learning_rate": 6.360524736561813e-06, "loss": 1.0201, "step": 16102 }, { "epoch": 0.630057124970655, "grad_norm": 0.0, "learning_rate": 6.359344442076636e-06, "loss": 1.0513, "step": 16103 }, { "epoch": 0.6300962516628844, "grad_norm": 0.0, "learning_rate": 6.358164206055829e-06, "loss": 1.0346, "step": 16104 }, { "epoch": 0.6301353783551139, "grad_norm": 0.0, "learning_rate": 6.356984028518341e-06, "loss": 0.9529, "step": 16105 }, { "epoch": 0.6301745050473433, "grad_norm": 0.0, "learning_rate": 6.355803909483125e-06, "loss": 0.9538, "step": 16106 }, { "epoch": 0.6302136317395728, "grad_norm": 0.0, "learning_rate": 6.354623848969128e-06, "loss": 0.9014, "step": 16107 }, { "epoch": 0.6302527584318022, "grad_norm": 0.0, "learning_rate": 6.353443846995309e-06, "loss": 0.9243, "step": 16108 }, { "epoch": 0.6302918851240316, "grad_norm": 0.0, "learning_rate": 6.352263903580611e-06, "loss": 0.9463, "step": 16109 }, { "epoch": 0.630331011816261, "grad_norm": 0.0, "learning_rate": 6.351084018743984e-06, "loss": 0.9221, "step": 16110 }, { "epoch": 0.6303701385084904, "grad_norm": 0.0, "learning_rate": 6.349904192504375e-06, "loss": 0.9879, "step": 16111 }, { "epoch": 0.6304092652007199, "grad_norm": 0.0, "learning_rate": 6.3487244248807275e-06, "loss": 1.0399, "step": 16112 }, { "epoch": 0.6304483918929493, "grad_norm": 0.0, "learning_rate": 6.347544715891995e-06, "loss": 0.8994, "step": 16113 }, { "epoch": 0.6304875185851788, "grad_norm": 0.0, "learning_rate": 6.346365065557116e-06, "loss": 0.8392, "step": 16114 }, { "epoch": 0.6305266452774082, "grad_norm": 0.0, "learning_rate": 6.345185473895037e-06, "loss": 0.9848, "step": 16115 }, { "epoch": 0.6305657719696377, "grad_norm": 0.0, "learning_rate": 6.344005940924695e-06, "loss": 1.1571, "step": 16116 }, { "epoch": 0.6306048986618671, "grad_norm": 0.0, "learning_rate": 6.342826466665041e-06, "loss": 1.1139, "step": 16117 }, { "epoch": 0.6306440253540966, "grad_norm": 0.0, "learning_rate": 6.341647051135011e-06, "loss": 1.0972, "step": 16118 }, { "epoch": 0.630683152046326, "grad_norm": 0.0, "learning_rate": 6.340467694353545e-06, "loss": 1.0001, "step": 16119 }, { "epoch": 0.6307222787385555, "grad_norm": 0.0, "learning_rate": 6.3392883963395826e-06, "loss": 0.9645, "step": 16120 }, { "epoch": 0.6307614054307848, "grad_norm": 0.0, "learning_rate": 6.338109157112055e-06, "loss": 1.0274, "step": 16121 }, { "epoch": 0.6308005321230143, "grad_norm": 0.0, "learning_rate": 6.336929976689914e-06, "loss": 1.049, "step": 16122 }, { "epoch": 0.6308396588152437, "grad_norm": 0.0, "learning_rate": 6.335750855092084e-06, "loss": 0.9929, "step": 16123 }, { "epoch": 0.6308787855074732, "grad_norm": 0.0, "learning_rate": 6.334571792337505e-06, "loss": 1.0141, "step": 16124 }, { "epoch": 0.6309179121997026, "grad_norm": 0.0, "learning_rate": 6.333392788445106e-06, "loss": 1.0708, "step": 16125 }, { "epoch": 0.6309570388919321, "grad_norm": 0.0, "learning_rate": 6.332213843433829e-06, "loss": 0.9851, "step": 16126 }, { "epoch": 0.6309961655841615, "grad_norm": 0.0, "learning_rate": 6.331034957322601e-06, "loss": 1.0234, "step": 16127 }, { "epoch": 0.631035292276391, "grad_norm": 0.0, "learning_rate": 6.329856130130354e-06, "loss": 0.9099, "step": 16128 }, { "epoch": 0.6310744189686204, "grad_norm": 0.0, "learning_rate": 6.328677361876019e-06, "loss": 1.1183, "step": 16129 }, { "epoch": 0.6311135456608499, "grad_norm": 0.0, "learning_rate": 6.327498652578522e-06, "loss": 0.874, "step": 16130 }, { "epoch": 0.6311526723530793, "grad_norm": 0.0, "learning_rate": 6.326320002256796e-06, "loss": 0.8577, "step": 16131 }, { "epoch": 0.6311917990453088, "grad_norm": 0.0, "learning_rate": 6.325141410929769e-06, "loss": 1.0115, "step": 16132 }, { "epoch": 0.6312309257375381, "grad_norm": 0.0, "learning_rate": 6.323962878616366e-06, "loss": 0.8949, "step": 16133 }, { "epoch": 0.6312700524297676, "grad_norm": 0.0, "learning_rate": 6.3227844053355105e-06, "loss": 1.0025, "step": 16134 }, { "epoch": 0.631309179121997, "grad_norm": 0.0, "learning_rate": 6.321605991106132e-06, "loss": 0.9573, "step": 16135 }, { "epoch": 0.6313483058142265, "grad_norm": 0.0, "learning_rate": 6.3204276359471515e-06, "loss": 0.8552, "step": 16136 }, { "epoch": 0.6313874325064559, "grad_norm": 0.0, "learning_rate": 6.319249339877491e-06, "loss": 1.0238, "step": 16137 }, { "epoch": 0.6314265591986853, "grad_norm": 0.0, "learning_rate": 6.3180711029160756e-06, "loss": 1.2097, "step": 16138 }, { "epoch": 0.6314656858909148, "grad_norm": 0.0, "learning_rate": 6.3168929250818235e-06, "loss": 1.0167, "step": 16139 }, { "epoch": 0.6315048125831442, "grad_norm": 0.0, "learning_rate": 6.315714806393656e-06, "loss": 0.7971, "step": 16140 }, { "epoch": 0.6315439392753737, "grad_norm": 0.0, "learning_rate": 6.3145367468704944e-06, "loss": 0.9585, "step": 16141 }, { "epoch": 0.631583065967603, "grad_norm": 0.0, "learning_rate": 6.313358746531253e-06, "loss": 0.8983, "step": 16142 }, { "epoch": 0.6316221926598325, "grad_norm": 0.0, "learning_rate": 6.312180805394845e-06, "loss": 1.0, "step": 16143 }, { "epoch": 0.6316613193520619, "grad_norm": 0.0, "learning_rate": 6.311002923480198e-06, "loss": 1.0995, "step": 16144 }, { "epoch": 0.6317004460442914, "grad_norm": 0.0, "learning_rate": 6.309825100806221e-06, "loss": 1.0114, "step": 16145 }, { "epoch": 0.6317395727365208, "grad_norm": 0.0, "learning_rate": 6.308647337391831e-06, "loss": 1.0479, "step": 16146 }, { "epoch": 0.6317786994287503, "grad_norm": 0.0, "learning_rate": 6.307469633255937e-06, "loss": 1.0981, "step": 16147 }, { "epoch": 0.6318178261209797, "grad_norm": 0.0, "learning_rate": 6.30629198841745e-06, "loss": 1.045, "step": 16148 }, { "epoch": 0.6318569528132092, "grad_norm": 0.0, "learning_rate": 6.30511440289529e-06, "loss": 0.9473, "step": 16149 }, { "epoch": 0.6318960795054386, "grad_norm": 0.0, "learning_rate": 6.303936876708361e-06, "loss": 0.9172, "step": 16150 }, { "epoch": 0.6319352061976681, "grad_norm": 0.0, "learning_rate": 6.302759409875576e-06, "loss": 0.9706, "step": 16151 }, { "epoch": 0.6319743328898975, "grad_norm": 0.0, "learning_rate": 6.301582002415837e-06, "loss": 1.0221, "step": 16152 }, { "epoch": 0.632013459582127, "grad_norm": 0.0, "learning_rate": 6.30040465434806e-06, "loss": 0.9111, "step": 16153 }, { "epoch": 0.6320525862743563, "grad_norm": 0.0, "learning_rate": 6.299227365691151e-06, "loss": 1.0692, "step": 16154 }, { "epoch": 0.6320917129665858, "grad_norm": 0.0, "learning_rate": 6.298050136464011e-06, "loss": 1.0414, "step": 16155 }, { "epoch": 0.6321308396588152, "grad_norm": 0.0, "learning_rate": 6.296872966685543e-06, "loss": 0.9988, "step": 16156 }, { "epoch": 0.6321699663510447, "grad_norm": 0.0, "learning_rate": 6.2956958563746595e-06, "loss": 0.9483, "step": 16157 }, { "epoch": 0.6322090930432741, "grad_norm": 0.0, "learning_rate": 6.29451880555026e-06, "loss": 1.0569, "step": 16158 }, { "epoch": 0.6322482197355036, "grad_norm": 0.0, "learning_rate": 6.293341814231244e-06, "loss": 1.0002, "step": 16159 }, { "epoch": 0.632287346427733, "grad_norm": 0.0, "learning_rate": 6.292164882436513e-06, "loss": 1.0029, "step": 16160 }, { "epoch": 0.6323264731199625, "grad_norm": 0.0, "learning_rate": 6.290988010184964e-06, "loss": 1.017, "step": 16161 }, { "epoch": 0.6323655998121919, "grad_norm": 0.0, "learning_rate": 6.289811197495506e-06, "loss": 1.0668, "step": 16162 }, { "epoch": 0.6324047265044214, "grad_norm": 0.0, "learning_rate": 6.288634444387028e-06, "loss": 1.0312, "step": 16163 }, { "epoch": 0.6324438531966508, "grad_norm": 0.0, "learning_rate": 6.287457750878431e-06, "loss": 1.007, "step": 16164 }, { "epoch": 0.6324829798888802, "grad_norm": 0.0, "learning_rate": 6.2862811169886085e-06, "loss": 1.1759, "step": 16165 }, { "epoch": 0.6325221065811096, "grad_norm": 0.0, "learning_rate": 6.2851045427364595e-06, "loss": 1.003, "step": 16166 }, { "epoch": 0.632561233273339, "grad_norm": 0.0, "learning_rate": 6.283928028140877e-06, "loss": 0.9034, "step": 16167 }, { "epoch": 0.6326003599655685, "grad_norm": 0.0, "learning_rate": 6.282751573220755e-06, "loss": 0.9864, "step": 16168 }, { "epoch": 0.6326394866577979, "grad_norm": 0.0, "learning_rate": 6.281575177994985e-06, "loss": 0.8424, "step": 16169 }, { "epoch": 0.6326786133500274, "grad_norm": 0.0, "learning_rate": 6.280398842482454e-06, "loss": 0.9376, "step": 16170 }, { "epoch": 0.6327177400422568, "grad_norm": 0.0, "learning_rate": 6.279222566702059e-06, "loss": 1.0267, "step": 16171 }, { "epoch": 0.6327568667344863, "grad_norm": 0.0, "learning_rate": 6.278046350672689e-06, "loss": 1.0216, "step": 16172 }, { "epoch": 0.6327959934267157, "grad_norm": 0.0, "learning_rate": 6.276870194413229e-06, "loss": 0.9593, "step": 16173 }, { "epoch": 0.6328351201189452, "grad_norm": 0.0, "learning_rate": 6.275694097942567e-06, "loss": 0.9666, "step": 16174 }, { "epoch": 0.6328742468111745, "grad_norm": 0.0, "learning_rate": 6.274518061279593e-06, "loss": 0.9469, "step": 16175 }, { "epoch": 0.632913373503404, "grad_norm": 0.0, "learning_rate": 6.273342084443189e-06, "loss": 1.0751, "step": 16176 }, { "epoch": 0.6329525001956334, "grad_norm": 0.0, "learning_rate": 6.272166167452242e-06, "loss": 1.0839, "step": 16177 }, { "epoch": 0.6329916268878629, "grad_norm": 0.0, "learning_rate": 6.270990310325637e-06, "loss": 1.0327, "step": 16178 }, { "epoch": 0.6330307535800923, "grad_norm": 0.0, "learning_rate": 6.269814513082253e-06, "loss": 0.9974, "step": 16179 }, { "epoch": 0.6330698802723218, "grad_norm": 0.0, "learning_rate": 6.2686387757409736e-06, "loss": 1.1146, "step": 16180 }, { "epoch": 0.6331090069645512, "grad_norm": 0.0, "learning_rate": 6.267463098320682e-06, "loss": 0.9786, "step": 16181 }, { "epoch": 0.6331481336567807, "grad_norm": 0.0, "learning_rate": 6.266287480840253e-06, "loss": 0.922, "step": 16182 }, { "epoch": 0.6331872603490101, "grad_norm": 0.0, "learning_rate": 6.265111923318569e-06, "loss": 1.077, "step": 16183 }, { "epoch": 0.6332263870412396, "grad_norm": 0.0, "learning_rate": 6.263936425774508e-06, "loss": 0.9805, "step": 16184 }, { "epoch": 0.633265513733469, "grad_norm": 0.0, "learning_rate": 6.262760988226948e-06, "loss": 0.9597, "step": 16185 }, { "epoch": 0.6333046404256985, "grad_norm": 0.0, "learning_rate": 6.261585610694762e-06, "loss": 0.9751, "step": 16186 }, { "epoch": 0.6333437671179278, "grad_norm": 0.0, "learning_rate": 6.260410293196828e-06, "loss": 0.9068, "step": 16187 }, { "epoch": 0.6333828938101573, "grad_norm": 0.0, "learning_rate": 6.259235035752014e-06, "loss": 0.9369, "step": 16188 }, { "epoch": 0.6334220205023867, "grad_norm": 0.0, "learning_rate": 6.258059838379203e-06, "loss": 0.9804, "step": 16189 }, { "epoch": 0.6334611471946162, "grad_norm": 0.0, "learning_rate": 6.256884701097261e-06, "loss": 1.0746, "step": 16190 }, { "epoch": 0.6335002738868456, "grad_norm": 0.0, "learning_rate": 6.255709623925061e-06, "loss": 0.7877, "step": 16191 }, { "epoch": 0.6335394005790751, "grad_norm": 0.0, "learning_rate": 6.254534606881467e-06, "loss": 1.0609, "step": 16192 }, { "epoch": 0.6335785272713045, "grad_norm": 0.0, "learning_rate": 6.253359649985359e-06, "loss": 0.9143, "step": 16193 }, { "epoch": 0.633617653963534, "grad_norm": 0.0, "learning_rate": 6.2521847532556e-06, "loss": 0.9536, "step": 16194 }, { "epoch": 0.6336567806557634, "grad_norm": 0.0, "learning_rate": 6.251009916711059e-06, "loss": 1.0678, "step": 16195 }, { "epoch": 0.6336959073479927, "grad_norm": 0.0, "learning_rate": 6.2498351403706e-06, "loss": 1.1379, "step": 16196 }, { "epoch": 0.6337350340402222, "grad_norm": 0.0, "learning_rate": 6.248660424253084e-06, "loss": 0.9762, "step": 16197 }, { "epoch": 0.6337741607324516, "grad_norm": 0.0, "learning_rate": 6.247485768377386e-06, "loss": 0.902, "step": 16198 }, { "epoch": 0.6338132874246811, "grad_norm": 0.0, "learning_rate": 6.2463111727623645e-06, "loss": 1.0421, "step": 16199 }, { "epoch": 0.6338524141169105, "grad_norm": 0.0, "learning_rate": 6.245136637426882e-06, "loss": 1.0612, "step": 16200 }, { "epoch": 0.63389154080914, "grad_norm": 0.0, "learning_rate": 6.2439621623897965e-06, "loss": 1.0927, "step": 16201 }, { "epoch": 0.6339306675013694, "grad_norm": 0.0, "learning_rate": 6.242787747669974e-06, "loss": 1.0726, "step": 16202 }, { "epoch": 0.6339697941935989, "grad_norm": 0.0, "learning_rate": 6.2416133932862746e-06, "loss": 1.0259, "step": 16203 }, { "epoch": 0.6340089208858283, "grad_norm": 0.0, "learning_rate": 6.240439099257554e-06, "loss": 0.97, "step": 16204 }, { "epoch": 0.6340480475780578, "grad_norm": 0.0, "learning_rate": 6.23926486560267e-06, "loss": 0.9549, "step": 16205 }, { "epoch": 0.6340871742702872, "grad_norm": 0.0, "learning_rate": 6.238090692340475e-06, "loss": 0.9617, "step": 16206 }, { "epoch": 0.6341263009625167, "grad_norm": 0.0, "learning_rate": 6.236916579489835e-06, "loss": 1.0099, "step": 16207 }, { "epoch": 0.634165427654746, "grad_norm": 0.0, "learning_rate": 6.235742527069599e-06, "loss": 1.0494, "step": 16208 }, { "epoch": 0.6342045543469755, "grad_norm": 0.0, "learning_rate": 6.23456853509862e-06, "loss": 0.9351, "step": 16209 }, { "epoch": 0.6342436810392049, "grad_norm": 0.0, "learning_rate": 6.233394603595751e-06, "loss": 0.8688, "step": 16210 }, { "epoch": 0.6342828077314344, "grad_norm": 0.0, "learning_rate": 6.232220732579845e-06, "loss": 1.0677, "step": 16211 }, { "epoch": 0.6343219344236638, "grad_norm": 0.0, "learning_rate": 6.231046922069755e-06, "loss": 0.9811, "step": 16212 }, { "epoch": 0.6343610611158933, "grad_norm": 0.0, "learning_rate": 6.229873172084329e-06, "loss": 1.1663, "step": 16213 }, { "epoch": 0.6344001878081227, "grad_norm": 0.0, "learning_rate": 6.228699482642412e-06, "loss": 1.116, "step": 16214 }, { "epoch": 0.6344393145003522, "grad_norm": 0.0, "learning_rate": 6.227525853762857e-06, "loss": 1.0886, "step": 16215 }, { "epoch": 0.6344784411925816, "grad_norm": 0.0, "learning_rate": 6.22635228546451e-06, "loss": 0.9691, "step": 16216 }, { "epoch": 0.6345175678848111, "grad_norm": 0.0, "learning_rate": 6.225178777766219e-06, "loss": 0.9657, "step": 16217 }, { "epoch": 0.6345566945770404, "grad_norm": 0.0, "learning_rate": 6.224005330686824e-06, "loss": 1.0466, "step": 16218 }, { "epoch": 0.6345958212692699, "grad_norm": 0.0, "learning_rate": 6.222831944245171e-06, "loss": 0.8994, "step": 16219 }, { "epoch": 0.6346349479614993, "grad_norm": 0.0, "learning_rate": 6.221658618460107e-06, "loss": 0.9472, "step": 16220 }, { "epoch": 0.6346740746537288, "grad_norm": 0.0, "learning_rate": 6.220485353350467e-06, "loss": 1.0699, "step": 16221 }, { "epoch": 0.6347132013459582, "grad_norm": 0.0, "learning_rate": 6.2193121489351e-06, "loss": 0.9035, "step": 16222 }, { "epoch": 0.6347523280381876, "grad_norm": 0.0, "learning_rate": 6.21813900523284e-06, "loss": 0.9825, "step": 16223 }, { "epoch": 0.6347914547304171, "grad_norm": 0.0, "learning_rate": 6.21696592226253e-06, "loss": 0.9397, "step": 16224 }, { "epoch": 0.6348305814226465, "grad_norm": 0.0, "learning_rate": 6.215792900043007e-06, "loss": 0.9868, "step": 16225 }, { "epoch": 0.634869708114876, "grad_norm": 0.0, "learning_rate": 6.214619938593108e-06, "loss": 1.038, "step": 16226 }, { "epoch": 0.6349088348071054, "grad_norm": 0.0, "learning_rate": 6.213447037931669e-06, "loss": 1.0684, "step": 16227 }, { "epoch": 0.6349479614993349, "grad_norm": 0.0, "learning_rate": 6.212274198077526e-06, "loss": 0.9431, "step": 16228 }, { "epoch": 0.6349870881915642, "grad_norm": 0.0, "learning_rate": 6.211101419049512e-06, "loss": 1.0055, "step": 16229 }, { "epoch": 0.6350262148837937, "grad_norm": 0.0, "learning_rate": 6.209928700866463e-06, "loss": 1.0514, "step": 16230 }, { "epoch": 0.6350653415760231, "grad_norm": 0.0, "learning_rate": 6.208756043547211e-06, "loss": 1.0526, "step": 16231 }, { "epoch": 0.6351044682682526, "grad_norm": 0.0, "learning_rate": 6.207583447110581e-06, "loss": 1.0097, "step": 16232 }, { "epoch": 0.635143594960482, "grad_norm": 0.0, "learning_rate": 6.206410911575413e-06, "loss": 0.9831, "step": 16233 }, { "epoch": 0.6351827216527115, "grad_norm": 0.0, "learning_rate": 6.205238436960532e-06, "loss": 0.9677, "step": 16234 }, { "epoch": 0.6352218483449409, "grad_norm": 0.0, "learning_rate": 6.204066023284767e-06, "loss": 1.0332, "step": 16235 }, { "epoch": 0.6352609750371704, "grad_norm": 0.0, "learning_rate": 6.202893670566945e-06, "loss": 1.0758, "step": 16236 }, { "epoch": 0.6353001017293998, "grad_norm": 0.0, "learning_rate": 6.201721378825889e-06, "loss": 0.9635, "step": 16237 }, { "epoch": 0.6353392284216293, "grad_norm": 0.0, "learning_rate": 6.20054914808043e-06, "loss": 0.9709, "step": 16238 }, { "epoch": 0.6353783551138587, "grad_norm": 0.0, "learning_rate": 6.199376978349394e-06, "loss": 0.9457, "step": 16239 }, { "epoch": 0.6354174818060881, "grad_norm": 0.0, "learning_rate": 6.198204869651599e-06, "loss": 1.0611, "step": 16240 }, { "epoch": 0.6354566084983175, "grad_norm": 0.0, "learning_rate": 6.1970328220058665e-06, "loss": 0.9906, "step": 16241 }, { "epoch": 0.635495735190547, "grad_norm": 0.0, "learning_rate": 6.195860835431024e-06, "loss": 0.9953, "step": 16242 }, { "epoch": 0.6355348618827764, "grad_norm": 0.0, "learning_rate": 6.1946889099458915e-06, "loss": 1.0386, "step": 16243 }, { "epoch": 0.6355739885750059, "grad_norm": 0.0, "learning_rate": 6.1935170455692874e-06, "loss": 0.9199, "step": 16244 }, { "epoch": 0.6356131152672353, "grad_norm": 0.0, "learning_rate": 6.192345242320028e-06, "loss": 0.9479, "step": 16245 }, { "epoch": 0.6356522419594648, "grad_norm": 0.0, "learning_rate": 6.1911735002169295e-06, "loss": 0.9633, "step": 16246 }, { "epoch": 0.6356913686516942, "grad_norm": 0.0, "learning_rate": 6.190001819278817e-06, "loss": 0.9576, "step": 16247 }, { "epoch": 0.6357304953439237, "grad_norm": 0.0, "learning_rate": 6.1888301995245e-06, "loss": 1.1973, "step": 16248 }, { "epoch": 0.6357696220361531, "grad_norm": 0.0, "learning_rate": 6.187658640972794e-06, "loss": 1.1147, "step": 16249 }, { "epoch": 0.6358087487283826, "grad_norm": 0.0, "learning_rate": 6.186487143642508e-06, "loss": 0.8904, "step": 16250 }, { "epoch": 0.6358478754206119, "grad_norm": 0.0, "learning_rate": 6.1853157075524664e-06, "loss": 1.036, "step": 16251 }, { "epoch": 0.6358870021128413, "grad_norm": 0.0, "learning_rate": 6.184144332721473e-06, "loss": 0.9683, "step": 16252 }, { "epoch": 0.6359261288050708, "grad_norm": 0.0, "learning_rate": 6.18297301916834e-06, "loss": 0.9791, "step": 16253 }, { "epoch": 0.6359652554973002, "grad_norm": 0.0, "learning_rate": 6.181801766911878e-06, "loss": 0.8203, "step": 16254 }, { "epoch": 0.6360043821895297, "grad_norm": 0.0, "learning_rate": 6.1806305759708906e-06, "loss": 0.8343, "step": 16255 }, { "epoch": 0.6360435088817591, "grad_norm": 0.0, "learning_rate": 6.179459446364194e-06, "loss": 0.9378, "step": 16256 }, { "epoch": 0.6360826355739886, "grad_norm": 0.0, "learning_rate": 6.178288378110593e-06, "loss": 0.9266, "step": 16257 }, { "epoch": 0.636121762266218, "grad_norm": 0.0, "learning_rate": 6.177117371228889e-06, "loss": 1.0191, "step": 16258 }, { "epoch": 0.6361608889584475, "grad_norm": 0.0, "learning_rate": 6.1759464257378874e-06, "loss": 0.9271, "step": 16259 }, { "epoch": 0.6362000156506769, "grad_norm": 0.0, "learning_rate": 6.1747755416563996e-06, "loss": 0.853, "step": 16260 }, { "epoch": 0.6362391423429063, "grad_norm": 0.0, "learning_rate": 6.173604719003221e-06, "loss": 1.0435, "step": 16261 }, { "epoch": 0.6362782690351357, "grad_norm": 0.0, "learning_rate": 6.172433957797157e-06, "loss": 1.129, "step": 16262 }, { "epoch": 0.6363173957273652, "grad_norm": 0.0, "learning_rate": 6.171263258057008e-06, "loss": 1.0555, "step": 16263 }, { "epoch": 0.6363565224195946, "grad_norm": 0.0, "learning_rate": 6.1700926198015685e-06, "loss": 0.9437, "step": 16264 }, { "epoch": 0.6363956491118241, "grad_norm": 0.0, "learning_rate": 6.168922043049645e-06, "loss": 1.0564, "step": 16265 }, { "epoch": 0.6364347758040535, "grad_norm": 0.0, "learning_rate": 6.167751527820034e-06, "loss": 1.0464, "step": 16266 }, { "epoch": 0.636473902496283, "grad_norm": 0.0, "learning_rate": 6.1665810741315325e-06, "loss": 0.9135, "step": 16267 }, { "epoch": 0.6365130291885124, "grad_norm": 0.0, "learning_rate": 6.165410682002932e-06, "loss": 0.9938, "step": 16268 }, { "epoch": 0.6365521558807419, "grad_norm": 0.0, "learning_rate": 6.1642403514530344e-06, "loss": 0.9698, "step": 16269 }, { "epoch": 0.6365912825729713, "grad_norm": 0.0, "learning_rate": 6.163070082500629e-06, "loss": 1.0412, "step": 16270 }, { "epoch": 0.6366304092652008, "grad_norm": 0.0, "learning_rate": 6.161899875164509e-06, "loss": 0.9734, "step": 16271 }, { "epoch": 0.6366695359574301, "grad_norm": 0.0, "learning_rate": 6.1607297294634675e-06, "loss": 0.9026, "step": 16272 }, { "epoch": 0.6367086626496596, "grad_norm": 0.0, "learning_rate": 6.1595596454162975e-06, "loss": 1.0257, "step": 16273 }, { "epoch": 0.636747789341889, "grad_norm": 0.0, "learning_rate": 6.158389623041787e-06, "loss": 0.9179, "step": 16274 }, { "epoch": 0.6367869160341185, "grad_norm": 0.0, "learning_rate": 6.157219662358724e-06, "loss": 0.981, "step": 16275 }, { "epoch": 0.6368260427263479, "grad_norm": 0.0, "learning_rate": 6.156049763385901e-06, "loss": 1.0577, "step": 16276 }, { "epoch": 0.6368651694185774, "grad_norm": 0.0, "learning_rate": 6.154879926142095e-06, "loss": 1.0583, "step": 16277 }, { "epoch": 0.6369042961108068, "grad_norm": 0.0, "learning_rate": 6.153710150646106e-06, "loss": 0.9635, "step": 16278 }, { "epoch": 0.6369434228030363, "grad_norm": 0.0, "learning_rate": 6.1525404369167096e-06, "loss": 0.9804, "step": 16279 }, { "epoch": 0.6369825494952657, "grad_norm": 0.0, "learning_rate": 6.1513707849726945e-06, "loss": 0.9578, "step": 16280 }, { "epoch": 0.6370216761874951, "grad_norm": 0.0, "learning_rate": 6.150201194832837e-06, "loss": 0.8574, "step": 16281 }, { "epoch": 0.6370608028797246, "grad_norm": 0.0, "learning_rate": 6.14903166651593e-06, "loss": 1.0204, "step": 16282 }, { "epoch": 0.6370999295719539, "grad_norm": 0.0, "learning_rate": 6.1478622000407475e-06, "loss": 0.8912, "step": 16283 }, { "epoch": 0.6371390562641834, "grad_norm": 0.0, "learning_rate": 6.1466927954260705e-06, "loss": 0.9347, "step": 16284 }, { "epoch": 0.6371781829564128, "grad_norm": 0.0, "learning_rate": 6.145523452690681e-06, "loss": 1.1616, "step": 16285 }, { "epoch": 0.6372173096486423, "grad_norm": 0.0, "learning_rate": 6.144354171853348e-06, "loss": 1.1168, "step": 16286 }, { "epoch": 0.6372564363408717, "grad_norm": 0.0, "learning_rate": 6.1431849529328615e-06, "loss": 1.0999, "step": 16287 }, { "epoch": 0.6372955630331012, "grad_norm": 0.0, "learning_rate": 6.142015795947991e-06, "loss": 0.9818, "step": 16288 }, { "epoch": 0.6373346897253306, "grad_norm": 0.0, "learning_rate": 6.140846700917513e-06, "loss": 1.0265, "step": 16289 }, { "epoch": 0.6373738164175601, "grad_norm": 0.0, "learning_rate": 6.139677667860197e-06, "loss": 1.0602, "step": 16290 }, { "epoch": 0.6374129431097895, "grad_norm": 0.0, "learning_rate": 6.1385086967948235e-06, "loss": 0.9579, "step": 16291 }, { "epoch": 0.637452069802019, "grad_norm": 0.0, "learning_rate": 6.137339787740163e-06, "loss": 0.967, "step": 16292 }, { "epoch": 0.6374911964942483, "grad_norm": 0.0, "learning_rate": 6.136170940714985e-06, "loss": 0.9873, "step": 16293 }, { "epoch": 0.6375303231864778, "grad_norm": 0.0, "learning_rate": 6.135002155738059e-06, "loss": 0.9475, "step": 16294 }, { "epoch": 0.6375694498787072, "grad_norm": 0.0, "learning_rate": 6.133833432828153e-06, "loss": 0.9659, "step": 16295 }, { "epoch": 0.6376085765709367, "grad_norm": 0.0, "learning_rate": 6.13266477200404e-06, "loss": 0.9767, "step": 16296 }, { "epoch": 0.6376477032631661, "grad_norm": 0.0, "learning_rate": 6.131496173284485e-06, "loss": 1.0473, "step": 16297 }, { "epoch": 0.6376868299553956, "grad_norm": 0.0, "learning_rate": 6.130327636688254e-06, "loss": 0.8708, "step": 16298 }, { "epoch": 0.637725956647625, "grad_norm": 0.0, "learning_rate": 6.129159162234109e-06, "loss": 0.9352, "step": 16299 }, { "epoch": 0.6377650833398545, "grad_norm": 0.0, "learning_rate": 6.127990749940822e-06, "loss": 0.8936, "step": 16300 }, { "epoch": 0.6378042100320839, "grad_norm": 0.0, "learning_rate": 6.1268223998271505e-06, "loss": 1.0054, "step": 16301 }, { "epoch": 0.6378433367243134, "grad_norm": 0.0, "learning_rate": 6.125654111911857e-06, "loss": 0.998, "step": 16302 }, { "epoch": 0.6378824634165428, "grad_norm": 0.0, "learning_rate": 6.124485886213705e-06, "loss": 1.0864, "step": 16303 }, { "epoch": 0.6379215901087723, "grad_norm": 0.0, "learning_rate": 6.123317722751449e-06, "loss": 1.1811, "step": 16304 }, { "epoch": 0.6379607168010016, "grad_norm": 0.0, "learning_rate": 6.122149621543856e-06, "loss": 1.0444, "step": 16305 }, { "epoch": 0.6379998434932311, "grad_norm": 0.0, "learning_rate": 6.120981582609682e-06, "loss": 1.0622, "step": 16306 }, { "epoch": 0.6380389701854605, "grad_norm": 0.0, "learning_rate": 6.119813605967681e-06, "loss": 1.0276, "step": 16307 }, { "epoch": 0.63807809687769, "grad_norm": 0.0, "learning_rate": 6.118645691636612e-06, "loss": 0.9851, "step": 16308 }, { "epoch": 0.6381172235699194, "grad_norm": 0.0, "learning_rate": 6.117477839635231e-06, "loss": 1.1006, "step": 16309 }, { "epoch": 0.6381563502621488, "grad_norm": 0.0, "learning_rate": 6.116310049982288e-06, "loss": 1.0277, "step": 16310 }, { "epoch": 0.6381954769543783, "grad_norm": 0.0, "learning_rate": 6.1151423226965416e-06, "loss": 1.0138, "step": 16311 }, { "epoch": 0.6382346036466077, "grad_norm": 0.0, "learning_rate": 6.1139746577967405e-06, "loss": 1.0628, "step": 16312 }, { "epoch": 0.6382737303388372, "grad_norm": 0.0, "learning_rate": 6.112807055301635e-06, "loss": 0.9718, "step": 16313 }, { "epoch": 0.6383128570310665, "grad_norm": 0.0, "learning_rate": 6.111639515229981e-06, "loss": 0.9781, "step": 16314 }, { "epoch": 0.638351983723296, "grad_norm": 0.0, "learning_rate": 6.110472037600523e-06, "loss": 1.1393, "step": 16315 }, { "epoch": 0.6383911104155254, "grad_norm": 0.0, "learning_rate": 6.109304622432008e-06, "loss": 1.0207, "step": 16316 }, { "epoch": 0.6384302371077549, "grad_norm": 0.0, "learning_rate": 6.1081372697431854e-06, "loss": 1.0076, "step": 16317 }, { "epoch": 0.6384693637999843, "grad_norm": 0.0, "learning_rate": 6.106969979552804e-06, "loss": 0.9116, "step": 16318 }, { "epoch": 0.6385084904922138, "grad_norm": 0.0, "learning_rate": 6.105802751879606e-06, "loss": 0.9599, "step": 16319 }, { "epoch": 0.6385476171844432, "grad_norm": 0.0, "learning_rate": 6.1046355867423355e-06, "loss": 1.024, "step": 16320 }, { "epoch": 0.6385867438766727, "grad_norm": 0.0, "learning_rate": 6.103468484159738e-06, "loss": 0.9828, "step": 16321 }, { "epoch": 0.6386258705689021, "grad_norm": 0.0, "learning_rate": 6.102301444150549e-06, "loss": 1.0272, "step": 16322 }, { "epoch": 0.6386649972611316, "grad_norm": 0.0, "learning_rate": 6.101134466733518e-06, "loss": 0.9992, "step": 16323 }, { "epoch": 0.638704123953361, "grad_norm": 0.0, "learning_rate": 6.099967551927384e-06, "loss": 1.1217, "step": 16324 }, { "epoch": 0.6387432506455905, "grad_norm": 0.0, "learning_rate": 6.098800699750882e-06, "loss": 1.0367, "step": 16325 }, { "epoch": 0.6387823773378198, "grad_norm": 0.0, "learning_rate": 6.097633910222749e-06, "loss": 1.068, "step": 16326 }, { "epoch": 0.6388215040300493, "grad_norm": 0.0, "learning_rate": 6.09646718336173e-06, "loss": 1.1642, "step": 16327 }, { "epoch": 0.6388606307222787, "grad_norm": 0.0, "learning_rate": 6.095300519186555e-06, "loss": 1.0878, "step": 16328 }, { "epoch": 0.6388997574145082, "grad_norm": 0.0, "learning_rate": 6.094133917715963e-06, "loss": 1.0206, "step": 16329 }, { "epoch": 0.6389388841067376, "grad_norm": 0.0, "learning_rate": 6.09296737896868e-06, "loss": 0.9232, "step": 16330 }, { "epoch": 0.6389780107989671, "grad_norm": 0.0, "learning_rate": 6.091800902963451e-06, "loss": 1.0024, "step": 16331 }, { "epoch": 0.6390171374911965, "grad_norm": 0.0, "learning_rate": 6.090634489719e-06, "loss": 0.8987, "step": 16332 }, { "epoch": 0.639056264183426, "grad_norm": 0.0, "learning_rate": 6.089468139254062e-06, "loss": 1.1493, "step": 16333 }, { "epoch": 0.6390953908756554, "grad_norm": 0.0, "learning_rate": 6.088301851587366e-06, "loss": 0.9518, "step": 16334 }, { "epoch": 0.6391345175678849, "grad_norm": 0.0, "learning_rate": 6.087135626737636e-06, "loss": 1.0645, "step": 16335 }, { "epoch": 0.6391736442601142, "grad_norm": 0.0, "learning_rate": 6.085969464723609e-06, "loss": 0.919, "step": 16336 }, { "epoch": 0.6392127709523436, "grad_norm": 0.0, "learning_rate": 6.084803365564007e-06, "loss": 0.9962, "step": 16337 }, { "epoch": 0.6392518976445731, "grad_norm": 0.0, "learning_rate": 6.083637329277557e-06, "loss": 1.1123, "step": 16338 }, { "epoch": 0.6392910243368025, "grad_norm": 0.0, "learning_rate": 6.08247135588298e-06, "loss": 1.0887, "step": 16339 }, { "epoch": 0.639330151029032, "grad_norm": 0.0, "learning_rate": 6.081305445399008e-06, "loss": 1.1124, "step": 16340 }, { "epoch": 0.6393692777212614, "grad_norm": 0.0, "learning_rate": 6.080139597844361e-06, "loss": 0.8588, "step": 16341 }, { "epoch": 0.6394084044134909, "grad_norm": 0.0, "learning_rate": 6.078973813237761e-06, "loss": 0.8532, "step": 16342 }, { "epoch": 0.6394475311057203, "grad_norm": 0.0, "learning_rate": 6.077808091597927e-06, "loss": 0.9852, "step": 16343 }, { "epoch": 0.6394866577979498, "grad_norm": 0.0, "learning_rate": 6.076642432943576e-06, "loss": 0.9949, "step": 16344 }, { "epoch": 0.6395257844901792, "grad_norm": 0.0, "learning_rate": 6.0754768372934345e-06, "loss": 0.9696, "step": 16345 }, { "epoch": 0.6395649111824087, "grad_norm": 0.0, "learning_rate": 6.074311304666219e-06, "loss": 1.0019, "step": 16346 }, { "epoch": 0.639604037874638, "grad_norm": 0.0, "learning_rate": 6.073145835080645e-06, "loss": 0.9931, "step": 16347 }, { "epoch": 0.6396431645668675, "grad_norm": 0.0, "learning_rate": 6.071980428555425e-06, "loss": 1.1149, "step": 16348 }, { "epoch": 0.6396822912590969, "grad_norm": 0.0, "learning_rate": 6.070815085109278e-06, "loss": 1.004, "step": 16349 }, { "epoch": 0.6397214179513264, "grad_norm": 0.0, "learning_rate": 6.069649804760919e-06, "loss": 1.0825, "step": 16350 }, { "epoch": 0.6397605446435558, "grad_norm": 0.0, "learning_rate": 6.068484587529059e-06, "loss": 0.9873, "step": 16351 }, { "epoch": 0.6397996713357853, "grad_norm": 0.0, "learning_rate": 6.06731943343241e-06, "loss": 0.9815, "step": 16352 }, { "epoch": 0.6398387980280147, "grad_norm": 0.0, "learning_rate": 6.066154342489681e-06, "loss": 0.9571, "step": 16353 }, { "epoch": 0.6398779247202442, "grad_norm": 0.0, "learning_rate": 6.0649893147195845e-06, "loss": 0.9958, "step": 16354 }, { "epoch": 0.6399170514124736, "grad_norm": 0.0, "learning_rate": 6.06382435014083e-06, "loss": 1.0685, "step": 16355 }, { "epoch": 0.6399561781047031, "grad_norm": 0.0, "learning_rate": 6.062659448772124e-06, "loss": 0.9602, "step": 16356 }, { "epoch": 0.6399953047969325, "grad_norm": 0.0, "learning_rate": 6.061494610632171e-06, "loss": 1.0688, "step": 16357 }, { "epoch": 0.640034431489162, "grad_norm": 0.0, "learning_rate": 6.060329835739682e-06, "loss": 0.6856, "step": 16358 }, { "epoch": 0.6400735581813913, "grad_norm": 0.0, "learning_rate": 6.05916512411336e-06, "loss": 1.0289, "step": 16359 }, { "epoch": 0.6401126848736208, "grad_norm": 0.0, "learning_rate": 6.058000475771906e-06, "loss": 1.0959, "step": 16360 }, { "epoch": 0.6401518115658502, "grad_norm": 0.0, "learning_rate": 6.056835890734025e-06, "loss": 1.0509, "step": 16361 }, { "epoch": 0.6401909382580797, "grad_norm": 0.0, "learning_rate": 6.055671369018418e-06, "loss": 1.0793, "step": 16362 }, { "epoch": 0.6402300649503091, "grad_norm": 0.0, "learning_rate": 6.054506910643787e-06, "loss": 0.9165, "step": 16363 }, { "epoch": 0.6402691916425386, "grad_norm": 0.0, "learning_rate": 6.0533425156288325e-06, "loss": 0.9056, "step": 16364 }, { "epoch": 0.640308318334768, "grad_norm": 0.0, "learning_rate": 6.05217818399225e-06, "loss": 0.9701, "step": 16365 }, { "epoch": 0.6403474450269974, "grad_norm": 0.0, "learning_rate": 6.051013915752737e-06, "loss": 1.0755, "step": 16366 }, { "epoch": 0.6403865717192269, "grad_norm": 0.0, "learning_rate": 6.049849710928995e-06, "loss": 0.8748, "step": 16367 }, { "epoch": 0.6404256984114562, "grad_norm": 0.0, "learning_rate": 6.048685569539717e-06, "loss": 1.0241, "step": 16368 }, { "epoch": 0.6404648251036857, "grad_norm": 0.0, "learning_rate": 6.0475214916035985e-06, "loss": 0.9764, "step": 16369 }, { "epoch": 0.6405039517959151, "grad_norm": 0.0, "learning_rate": 6.046357477139331e-06, "loss": 1.0464, "step": 16370 }, { "epoch": 0.6405430784881446, "grad_norm": 0.0, "learning_rate": 6.045193526165604e-06, "loss": 0.9805, "step": 16371 }, { "epoch": 0.640582205180374, "grad_norm": 0.0, "learning_rate": 6.044029638701117e-06, "loss": 0.9451, "step": 16372 }, { "epoch": 0.6406213318726035, "grad_norm": 0.0, "learning_rate": 6.042865814764558e-06, "loss": 1.1023, "step": 16373 }, { "epoch": 0.6406604585648329, "grad_norm": 0.0, "learning_rate": 6.041702054374615e-06, "loss": 1.0242, "step": 16374 }, { "epoch": 0.6406995852570624, "grad_norm": 0.0, "learning_rate": 6.0405383575499735e-06, "loss": 1.0295, "step": 16375 }, { "epoch": 0.6407387119492918, "grad_norm": 0.0, "learning_rate": 6.039374724309328e-06, "loss": 0.9569, "step": 16376 }, { "epoch": 0.6407778386415213, "grad_norm": 0.0, "learning_rate": 6.0382111546713605e-06, "loss": 1.0005, "step": 16377 }, { "epoch": 0.6408169653337507, "grad_norm": 0.0, "learning_rate": 6.037047648654759e-06, "loss": 1.0244, "step": 16378 }, { "epoch": 0.6408560920259802, "grad_norm": 0.0, "learning_rate": 6.035884206278206e-06, "loss": 0.9105, "step": 16379 }, { "epoch": 0.6408952187182095, "grad_norm": 0.0, "learning_rate": 6.034720827560381e-06, "loss": 0.9719, "step": 16380 }, { "epoch": 0.640934345410439, "grad_norm": 0.0, "learning_rate": 6.033557512519975e-06, "loss": 0.8893, "step": 16381 }, { "epoch": 0.6409734721026684, "grad_norm": 0.0, "learning_rate": 6.032394261175665e-06, "loss": 0.8642, "step": 16382 }, { "epoch": 0.6410125987948979, "grad_norm": 0.0, "learning_rate": 6.031231073546131e-06, "loss": 0.9669, "step": 16383 }, { "epoch": 0.6410517254871273, "grad_norm": 0.0, "learning_rate": 6.03006794965005e-06, "loss": 1.1372, "step": 16384 }, { "epoch": 0.6410908521793568, "grad_norm": 0.0, "learning_rate": 6.028904889506107e-06, "loss": 0.9453, "step": 16385 }, { "epoch": 0.6411299788715862, "grad_norm": 0.0, "learning_rate": 6.027741893132974e-06, "loss": 1.0634, "step": 16386 }, { "epoch": 0.6411691055638157, "grad_norm": 0.0, "learning_rate": 6.02657896054933e-06, "loss": 1.0417, "step": 16387 }, { "epoch": 0.6412082322560451, "grad_norm": 0.0, "learning_rate": 6.025416091773845e-06, "loss": 1.0897, "step": 16388 }, { "epoch": 0.6412473589482746, "grad_norm": 0.0, "learning_rate": 6.0242532868252e-06, "loss": 1.0469, "step": 16389 }, { "epoch": 0.641286485640504, "grad_norm": 0.0, "learning_rate": 6.023090545722066e-06, "loss": 0.985, "step": 16390 }, { "epoch": 0.6413256123327334, "grad_norm": 0.0, "learning_rate": 6.021927868483115e-06, "loss": 0.987, "step": 16391 }, { "epoch": 0.6413647390249628, "grad_norm": 0.0, "learning_rate": 6.020765255127017e-06, "loss": 1.0316, "step": 16392 }, { "epoch": 0.6414038657171923, "grad_norm": 0.0, "learning_rate": 6.019602705672441e-06, "loss": 1.1577, "step": 16393 }, { "epoch": 0.6414429924094217, "grad_norm": 0.0, "learning_rate": 6.0184402201380575e-06, "loss": 0.9888, "step": 16394 }, { "epoch": 0.6414821191016511, "grad_norm": 0.0, "learning_rate": 6.017277798542536e-06, "loss": 0.9632, "step": 16395 }, { "epoch": 0.6415212457938806, "grad_norm": 0.0, "learning_rate": 6.016115440904544e-06, "loss": 1.1407, "step": 16396 }, { "epoch": 0.64156037248611, "grad_norm": 0.0, "learning_rate": 6.014953147242744e-06, "loss": 1.0155, "step": 16397 }, { "epoch": 0.6415994991783395, "grad_norm": 0.0, "learning_rate": 6.013790917575804e-06, "loss": 0.9564, "step": 16398 }, { "epoch": 0.6416386258705689, "grad_norm": 0.0, "learning_rate": 6.012628751922385e-06, "loss": 0.9928, "step": 16399 }, { "epoch": 0.6416777525627984, "grad_norm": 0.0, "learning_rate": 6.011466650301154e-06, "loss": 1.0542, "step": 16400 }, { "epoch": 0.6417168792550277, "grad_norm": 0.0, "learning_rate": 6.010304612730771e-06, "loss": 0.9495, "step": 16401 }, { "epoch": 0.6417560059472572, "grad_norm": 0.0, "learning_rate": 6.009142639229893e-06, "loss": 1.1446, "step": 16402 }, { "epoch": 0.6417951326394866, "grad_norm": 0.0, "learning_rate": 6.007980729817187e-06, "loss": 0.9922, "step": 16403 }, { "epoch": 0.6418342593317161, "grad_norm": 0.0, "learning_rate": 6.006818884511307e-06, "loss": 1.0087, "step": 16404 }, { "epoch": 0.6418733860239455, "grad_norm": 0.0, "learning_rate": 6.005657103330911e-06, "loss": 0.932, "step": 16405 }, { "epoch": 0.641912512716175, "grad_norm": 0.0, "learning_rate": 6.004495386294657e-06, "loss": 1.0807, "step": 16406 }, { "epoch": 0.6419516394084044, "grad_norm": 0.0, "learning_rate": 6.003333733421202e-06, "loss": 1.0647, "step": 16407 }, { "epoch": 0.6419907661006339, "grad_norm": 0.0, "learning_rate": 6.002172144729199e-06, "loss": 0.9278, "step": 16408 }, { "epoch": 0.6420298927928633, "grad_norm": 0.0, "learning_rate": 6.001010620237302e-06, "loss": 0.9456, "step": 16409 }, { "epoch": 0.6420690194850928, "grad_norm": 0.0, "learning_rate": 5.999849159964164e-06, "loss": 0.983, "step": 16410 }, { "epoch": 0.6421081461773221, "grad_norm": 0.0, "learning_rate": 5.99868776392843e-06, "loss": 0.8174, "step": 16411 }, { "epoch": 0.6421472728695516, "grad_norm": 0.0, "learning_rate": 5.997526432148763e-06, "loss": 1.0322, "step": 16412 }, { "epoch": 0.642186399561781, "grad_norm": 0.0, "learning_rate": 5.9963651646438045e-06, "loss": 0.9782, "step": 16413 }, { "epoch": 0.6422255262540105, "grad_norm": 0.0, "learning_rate": 5.995203961432205e-06, "loss": 0.9724, "step": 16414 }, { "epoch": 0.6422646529462399, "grad_norm": 0.0, "learning_rate": 5.994042822532608e-06, "loss": 0.9381, "step": 16415 }, { "epoch": 0.6423037796384694, "grad_norm": 0.0, "learning_rate": 5.992881747963667e-06, "loss": 0.9651, "step": 16416 }, { "epoch": 0.6423429063306988, "grad_norm": 0.0, "learning_rate": 5.991720737744024e-06, "loss": 1.0706, "step": 16417 }, { "epoch": 0.6423820330229283, "grad_norm": 0.0, "learning_rate": 5.990559791892323e-06, "loss": 0.9788, "step": 16418 }, { "epoch": 0.6424211597151577, "grad_norm": 0.0, "learning_rate": 5.989398910427209e-06, "loss": 1.0131, "step": 16419 }, { "epoch": 0.6424602864073872, "grad_norm": 0.0, "learning_rate": 5.988238093367318e-06, "loss": 0.993, "step": 16420 }, { "epoch": 0.6424994130996166, "grad_norm": 0.0, "learning_rate": 5.9870773407313e-06, "loss": 0.9033, "step": 16421 }, { "epoch": 0.642538539791846, "grad_norm": 0.0, "learning_rate": 5.985916652537791e-06, "loss": 0.9233, "step": 16422 }, { "epoch": 0.6425776664840754, "grad_norm": 0.0, "learning_rate": 5.984756028805432e-06, "loss": 0.9426, "step": 16423 }, { "epoch": 0.6426167931763048, "grad_norm": 0.0, "learning_rate": 5.983595469552855e-06, "loss": 0.8038, "step": 16424 }, { "epoch": 0.6426559198685343, "grad_norm": 0.0, "learning_rate": 5.982434974798705e-06, "loss": 0.964, "step": 16425 }, { "epoch": 0.6426950465607637, "grad_norm": 0.0, "learning_rate": 5.981274544561617e-06, "loss": 1.1541, "step": 16426 }, { "epoch": 0.6427341732529932, "grad_norm": 0.0, "learning_rate": 5.980114178860224e-06, "loss": 0.8937, "step": 16427 }, { "epoch": 0.6427732999452226, "grad_norm": 0.0, "learning_rate": 5.978953877713159e-06, "loss": 1.0223, "step": 16428 }, { "epoch": 0.6428124266374521, "grad_norm": 0.0, "learning_rate": 5.977793641139051e-06, "loss": 1.0811, "step": 16429 }, { "epoch": 0.6428515533296815, "grad_norm": 0.0, "learning_rate": 5.976633469156543e-06, "loss": 0.9023, "step": 16430 }, { "epoch": 0.642890680021911, "grad_norm": 0.0, "learning_rate": 5.9754733617842585e-06, "loss": 1.1102, "step": 16431 }, { "epoch": 0.6429298067141404, "grad_norm": 0.0, "learning_rate": 5.97431331904083e-06, "loss": 0.9621, "step": 16432 }, { "epoch": 0.6429689334063698, "grad_norm": 0.0, "learning_rate": 5.973153340944882e-06, "loss": 1.0077, "step": 16433 }, { "epoch": 0.6430080600985992, "grad_norm": 0.0, "learning_rate": 5.971993427515047e-06, "loss": 0.9922, "step": 16434 }, { "epoch": 0.6430471867908287, "grad_norm": 0.0, "learning_rate": 5.970833578769951e-06, "loss": 1.0744, "step": 16435 }, { "epoch": 0.6430863134830581, "grad_norm": 0.0, "learning_rate": 5.969673794728219e-06, "loss": 0.9746, "step": 16436 }, { "epoch": 0.6431254401752876, "grad_norm": 0.0, "learning_rate": 5.968514075408476e-06, "loss": 1.0675, "step": 16437 }, { "epoch": 0.643164566867517, "grad_norm": 0.0, "learning_rate": 5.967354420829341e-06, "loss": 0.9796, "step": 16438 }, { "epoch": 0.6432036935597465, "grad_norm": 0.0, "learning_rate": 5.9661948310094446e-06, "loss": 0.9672, "step": 16439 }, { "epoch": 0.6432428202519759, "grad_norm": 0.0, "learning_rate": 5.965035305967405e-06, "loss": 0.9576, "step": 16440 }, { "epoch": 0.6432819469442054, "grad_norm": 0.0, "learning_rate": 5.9638758457218425e-06, "loss": 1.0323, "step": 16441 }, { "epoch": 0.6433210736364348, "grad_norm": 0.0, "learning_rate": 5.962716450291372e-06, "loss": 1.0099, "step": 16442 }, { "epoch": 0.6433602003286643, "grad_norm": 0.0, "learning_rate": 5.961557119694621e-06, "loss": 1.0543, "step": 16443 }, { "epoch": 0.6433993270208936, "grad_norm": 0.0, "learning_rate": 5.960397853950199e-06, "loss": 0.9954, "step": 16444 }, { "epoch": 0.6434384537131231, "grad_norm": 0.0, "learning_rate": 5.9592386530767285e-06, "loss": 1.0575, "step": 16445 }, { "epoch": 0.6434775804053525, "grad_norm": 0.0, "learning_rate": 5.9580795170928195e-06, "loss": 1.0145, "step": 16446 }, { "epoch": 0.643516707097582, "grad_norm": 0.0, "learning_rate": 5.95692044601709e-06, "loss": 0.8987, "step": 16447 }, { "epoch": 0.6435558337898114, "grad_norm": 0.0, "learning_rate": 5.955761439868153e-06, "loss": 1.1647, "step": 16448 }, { "epoch": 0.6435949604820409, "grad_norm": 0.0, "learning_rate": 5.9546024986646186e-06, "loss": 1.1093, "step": 16449 }, { "epoch": 0.6436340871742703, "grad_norm": 0.0, "learning_rate": 5.953443622425097e-06, "loss": 0.9493, "step": 16450 }, { "epoch": 0.6436732138664997, "grad_norm": 0.0, "learning_rate": 5.952284811168201e-06, "loss": 1.0493, "step": 16451 }, { "epoch": 0.6437123405587292, "grad_norm": 0.0, "learning_rate": 5.95112606491254e-06, "loss": 1.0005, "step": 16452 }, { "epoch": 0.6437514672509586, "grad_norm": 0.0, "learning_rate": 5.9499673836767215e-06, "loss": 1.0298, "step": 16453 }, { "epoch": 0.643790593943188, "grad_norm": 0.0, "learning_rate": 5.948808767479352e-06, "loss": 1.0637, "step": 16454 }, { "epoch": 0.6438297206354174, "grad_norm": 0.0, "learning_rate": 5.947650216339031e-06, "loss": 0.9817, "step": 16455 }, { "epoch": 0.6438688473276469, "grad_norm": 0.0, "learning_rate": 5.946491730274376e-06, "loss": 1.0445, "step": 16456 }, { "epoch": 0.6439079740198763, "grad_norm": 0.0, "learning_rate": 5.945333309303984e-06, "loss": 0.9435, "step": 16457 }, { "epoch": 0.6439471007121058, "grad_norm": 0.0, "learning_rate": 5.944174953446457e-06, "loss": 1.0427, "step": 16458 }, { "epoch": 0.6439862274043352, "grad_norm": 0.0, "learning_rate": 5.9430166627204e-06, "loss": 1.0209, "step": 16459 }, { "epoch": 0.6440253540965647, "grad_norm": 0.0, "learning_rate": 5.941858437144408e-06, "loss": 1.2066, "step": 16460 }, { "epoch": 0.6440644807887941, "grad_norm": 0.0, "learning_rate": 5.940700276737087e-06, "loss": 1.0336, "step": 16461 }, { "epoch": 0.6441036074810236, "grad_norm": 0.0, "learning_rate": 5.939542181517033e-06, "loss": 0.9209, "step": 16462 }, { "epoch": 0.644142734173253, "grad_norm": 0.0, "learning_rate": 5.938384151502844e-06, "loss": 0.7725, "step": 16463 }, { "epoch": 0.6441818608654825, "grad_norm": 0.0, "learning_rate": 5.937226186713112e-06, "loss": 0.9896, "step": 16464 }, { "epoch": 0.6442209875577118, "grad_norm": 0.0, "learning_rate": 5.93606828716644e-06, "loss": 1.1304, "step": 16465 }, { "epoch": 0.6442601142499413, "grad_norm": 0.0, "learning_rate": 5.934910452881419e-06, "loss": 1.0737, "step": 16466 }, { "epoch": 0.6442992409421707, "grad_norm": 0.0, "learning_rate": 5.933752683876642e-06, "loss": 0.8894, "step": 16467 }, { "epoch": 0.6443383676344002, "grad_norm": 0.0, "learning_rate": 5.932594980170703e-06, "loss": 1.0076, "step": 16468 }, { "epoch": 0.6443774943266296, "grad_norm": 0.0, "learning_rate": 5.931437341782187e-06, "loss": 0.8882, "step": 16469 }, { "epoch": 0.6444166210188591, "grad_norm": 0.0, "learning_rate": 5.930279768729692e-06, "loss": 1.0278, "step": 16470 }, { "epoch": 0.6444557477110885, "grad_norm": 0.0, "learning_rate": 5.929122261031806e-06, "loss": 1.0162, "step": 16471 }, { "epoch": 0.644494874403318, "grad_norm": 0.0, "learning_rate": 5.927964818707115e-06, "loss": 0.9332, "step": 16472 }, { "epoch": 0.6445340010955474, "grad_norm": 0.0, "learning_rate": 5.926807441774202e-06, "loss": 1.0438, "step": 16473 }, { "epoch": 0.6445731277877769, "grad_norm": 0.0, "learning_rate": 5.925650130251661e-06, "loss": 0.9486, "step": 16474 }, { "epoch": 0.6446122544800063, "grad_norm": 0.0, "learning_rate": 5.924492884158075e-06, "loss": 0.9182, "step": 16475 }, { "epoch": 0.6446513811722357, "grad_norm": 0.0, "learning_rate": 5.923335703512026e-06, "loss": 0.9587, "step": 16476 }, { "epoch": 0.6446905078644651, "grad_norm": 0.0, "learning_rate": 5.922178588332096e-06, "loss": 1.012, "step": 16477 }, { "epoch": 0.6447296345566946, "grad_norm": 0.0, "learning_rate": 5.921021538636864e-06, "loss": 1.0789, "step": 16478 }, { "epoch": 0.644768761248924, "grad_norm": 0.0, "learning_rate": 5.919864554444921e-06, "loss": 0.9925, "step": 16479 }, { "epoch": 0.6448078879411534, "grad_norm": 0.0, "learning_rate": 5.918707635774838e-06, "loss": 1.0032, "step": 16480 }, { "epoch": 0.6448470146333829, "grad_norm": 0.0, "learning_rate": 5.917550782645198e-06, "loss": 1.011, "step": 16481 }, { "epoch": 0.6448861413256123, "grad_norm": 0.0, "learning_rate": 5.916393995074574e-06, "loss": 1.08, "step": 16482 }, { "epoch": 0.6449252680178418, "grad_norm": 0.0, "learning_rate": 5.915237273081547e-06, "loss": 1.0334, "step": 16483 }, { "epoch": 0.6449643947100712, "grad_norm": 0.0, "learning_rate": 5.914080616684691e-06, "loss": 1.0352, "step": 16484 }, { "epoch": 0.6450035214023007, "grad_norm": 0.0, "learning_rate": 5.912924025902581e-06, "loss": 0.8888, "step": 16485 }, { "epoch": 0.64504264809453, "grad_norm": 0.0, "learning_rate": 5.911767500753789e-06, "loss": 1.0896, "step": 16486 }, { "epoch": 0.6450817747867595, "grad_norm": 0.0, "learning_rate": 5.910611041256888e-06, "loss": 1.0234, "step": 16487 }, { "epoch": 0.6451209014789889, "grad_norm": 0.0, "learning_rate": 5.9094546474304505e-06, "loss": 1.0506, "step": 16488 }, { "epoch": 0.6451600281712184, "grad_norm": 0.0, "learning_rate": 5.908298319293042e-06, "loss": 0.8873, "step": 16489 }, { "epoch": 0.6451991548634478, "grad_norm": 0.0, "learning_rate": 5.90714205686324e-06, "loss": 1.105, "step": 16490 }, { "epoch": 0.6452382815556773, "grad_norm": 0.0, "learning_rate": 5.9059858601596045e-06, "loss": 0.9482, "step": 16491 }, { "epoch": 0.6452774082479067, "grad_norm": 0.0, "learning_rate": 5.904829729200707e-06, "loss": 0.8935, "step": 16492 }, { "epoch": 0.6453165349401362, "grad_norm": 0.0, "learning_rate": 5.903673664005114e-06, "loss": 0.8631, "step": 16493 }, { "epoch": 0.6453556616323656, "grad_norm": 0.0, "learning_rate": 5.902517664591389e-06, "loss": 1.0424, "step": 16494 }, { "epoch": 0.6453947883245951, "grad_norm": 0.0, "learning_rate": 5.9013617309780925e-06, "loss": 1.1584, "step": 16495 }, { "epoch": 0.6454339150168245, "grad_norm": 0.0, "learning_rate": 5.900205863183792e-06, "loss": 0.965, "step": 16496 }, { "epoch": 0.645473041709054, "grad_norm": 0.0, "learning_rate": 5.89905006122705e-06, "loss": 0.9332, "step": 16497 }, { "epoch": 0.6455121684012833, "grad_norm": 0.0, "learning_rate": 5.897894325126424e-06, "loss": 0.9998, "step": 16498 }, { "epoch": 0.6455512950935128, "grad_norm": 0.0, "learning_rate": 5.896738654900476e-06, "loss": 0.954, "step": 16499 }, { "epoch": 0.6455904217857422, "grad_norm": 0.0, "learning_rate": 5.8955830505677595e-06, "loss": 1.0778, "step": 16500 }, { "epoch": 0.6456295484779717, "grad_norm": 0.0, "learning_rate": 5.89442751214684e-06, "loss": 0.8014, "step": 16501 }, { "epoch": 0.6456686751702011, "grad_norm": 0.0, "learning_rate": 5.89327203965627e-06, "loss": 1.0487, "step": 16502 }, { "epoch": 0.6457078018624306, "grad_norm": 0.0, "learning_rate": 5.892116633114605e-06, "loss": 1.0351, "step": 16503 }, { "epoch": 0.64574692855466, "grad_norm": 0.0, "learning_rate": 5.890961292540396e-06, "loss": 1.0569, "step": 16504 }, { "epoch": 0.6457860552468895, "grad_norm": 0.0, "learning_rate": 5.889806017952202e-06, "loss": 0.8649, "step": 16505 }, { "epoch": 0.6458251819391189, "grad_norm": 0.0, "learning_rate": 5.888650809368574e-06, "loss": 1.0071, "step": 16506 }, { "epoch": 0.6458643086313484, "grad_norm": 0.0, "learning_rate": 5.887495666808063e-06, "loss": 1.0562, "step": 16507 }, { "epoch": 0.6459034353235777, "grad_norm": 0.0, "learning_rate": 5.886340590289218e-06, "loss": 0.9454, "step": 16508 }, { "epoch": 0.6459425620158071, "grad_norm": 0.0, "learning_rate": 5.885185579830584e-06, "loss": 1.0156, "step": 16509 }, { "epoch": 0.6459816887080366, "grad_norm": 0.0, "learning_rate": 5.884030635450717e-06, "loss": 0.9623, "step": 16510 }, { "epoch": 0.646020815400266, "grad_norm": 0.0, "learning_rate": 5.882875757168161e-06, "loss": 0.9402, "step": 16511 }, { "epoch": 0.6460599420924955, "grad_norm": 0.0, "learning_rate": 5.881720945001461e-06, "loss": 1.0029, "step": 16512 }, { "epoch": 0.6460990687847249, "grad_norm": 0.0, "learning_rate": 5.880566198969159e-06, "loss": 0.901, "step": 16513 }, { "epoch": 0.6461381954769544, "grad_norm": 0.0, "learning_rate": 5.879411519089805e-06, "loss": 0.9996, "step": 16514 }, { "epoch": 0.6461773221691838, "grad_norm": 0.0, "learning_rate": 5.878256905381938e-06, "loss": 1.0534, "step": 16515 }, { "epoch": 0.6462164488614133, "grad_norm": 0.0, "learning_rate": 5.8771023578641015e-06, "loss": 0.9221, "step": 16516 }, { "epoch": 0.6462555755536427, "grad_norm": 0.0, "learning_rate": 5.875947876554835e-06, "loss": 1.1082, "step": 16517 }, { "epoch": 0.6462947022458722, "grad_norm": 0.0, "learning_rate": 5.874793461472673e-06, "loss": 1.0726, "step": 16518 }, { "epoch": 0.6463338289381015, "grad_norm": 0.0, "learning_rate": 5.873639112636164e-06, "loss": 0.9022, "step": 16519 }, { "epoch": 0.646372955630331, "grad_norm": 0.0, "learning_rate": 5.872484830063838e-06, "loss": 1.0026, "step": 16520 }, { "epoch": 0.6464120823225604, "grad_norm": 0.0, "learning_rate": 5.8713306137742355e-06, "loss": 1.0073, "step": 16521 }, { "epoch": 0.6464512090147899, "grad_norm": 0.0, "learning_rate": 5.870176463785886e-06, "loss": 0.9791, "step": 16522 }, { "epoch": 0.6464903357070193, "grad_norm": 0.0, "learning_rate": 5.8690223801173305e-06, "loss": 1.0732, "step": 16523 }, { "epoch": 0.6465294623992488, "grad_norm": 0.0, "learning_rate": 5.867868362787099e-06, "loss": 1.0745, "step": 16524 }, { "epoch": 0.6465685890914782, "grad_norm": 0.0, "learning_rate": 5.866714411813725e-06, "loss": 0.9632, "step": 16525 }, { "epoch": 0.6466077157837077, "grad_norm": 0.0, "learning_rate": 5.865560527215737e-06, "loss": 1.0612, "step": 16526 }, { "epoch": 0.6466468424759371, "grad_norm": 0.0, "learning_rate": 5.864406709011665e-06, "loss": 1.0698, "step": 16527 }, { "epoch": 0.6466859691681666, "grad_norm": 0.0, "learning_rate": 5.863252957220038e-06, "loss": 1.0794, "step": 16528 }, { "epoch": 0.646725095860396, "grad_norm": 0.0, "learning_rate": 5.862099271859388e-06, "loss": 0.9004, "step": 16529 }, { "epoch": 0.6467642225526254, "grad_norm": 0.0, "learning_rate": 5.860945652948239e-06, "loss": 1.1013, "step": 16530 }, { "epoch": 0.6468033492448548, "grad_norm": 0.0, "learning_rate": 5.859792100505113e-06, "loss": 1.0376, "step": 16531 }, { "epoch": 0.6468424759370843, "grad_norm": 0.0, "learning_rate": 5.858638614548541e-06, "loss": 1.0644, "step": 16532 }, { "epoch": 0.6468816026293137, "grad_norm": 0.0, "learning_rate": 5.857485195097041e-06, "loss": 0.9814, "step": 16533 }, { "epoch": 0.6469207293215432, "grad_norm": 0.0, "learning_rate": 5.856331842169139e-06, "loss": 0.9962, "step": 16534 }, { "epoch": 0.6469598560137726, "grad_norm": 0.0, "learning_rate": 5.855178555783356e-06, "loss": 0.9116, "step": 16535 }, { "epoch": 0.646998982706002, "grad_norm": 0.0, "learning_rate": 5.85402533595821e-06, "loss": 1.0168, "step": 16536 }, { "epoch": 0.6470381093982315, "grad_norm": 0.0, "learning_rate": 5.852872182712224e-06, "loss": 0.9243, "step": 16537 }, { "epoch": 0.6470772360904609, "grad_norm": 0.0, "learning_rate": 5.851719096063914e-06, "loss": 0.9452, "step": 16538 }, { "epoch": 0.6471163627826904, "grad_norm": 0.0, "learning_rate": 5.850566076031794e-06, "loss": 1.0829, "step": 16539 }, { "epoch": 0.6471554894749197, "grad_norm": 0.0, "learning_rate": 5.849413122634384e-06, "loss": 0.9836, "step": 16540 }, { "epoch": 0.6471946161671492, "grad_norm": 0.0, "learning_rate": 5.848260235890202e-06, "loss": 0.9472, "step": 16541 }, { "epoch": 0.6472337428593786, "grad_norm": 0.0, "learning_rate": 5.847107415817757e-06, "loss": 0.953, "step": 16542 }, { "epoch": 0.6472728695516081, "grad_norm": 0.0, "learning_rate": 5.845954662435563e-06, "loss": 1.0005, "step": 16543 }, { "epoch": 0.6473119962438375, "grad_norm": 0.0, "learning_rate": 5.844801975762131e-06, "loss": 1.0279, "step": 16544 }, { "epoch": 0.647351122936067, "grad_norm": 0.0, "learning_rate": 5.843649355815968e-06, "loss": 0.9425, "step": 16545 }, { "epoch": 0.6473902496282964, "grad_norm": 0.0, "learning_rate": 5.8424968026155935e-06, "loss": 1.0876, "step": 16546 }, { "epoch": 0.6474293763205259, "grad_norm": 0.0, "learning_rate": 5.841344316179511e-06, "loss": 0.9945, "step": 16547 }, { "epoch": 0.6474685030127553, "grad_norm": 0.0, "learning_rate": 5.840191896526226e-06, "loss": 0.9627, "step": 16548 }, { "epoch": 0.6475076297049848, "grad_norm": 0.0, "learning_rate": 5.839039543674245e-06, "loss": 0.9417, "step": 16549 }, { "epoch": 0.6475467563972142, "grad_norm": 0.0, "learning_rate": 5.837887257642075e-06, "loss": 0.9534, "step": 16550 }, { "epoch": 0.6475858830894436, "grad_norm": 0.0, "learning_rate": 5.836735038448225e-06, "loss": 0.9843, "step": 16551 }, { "epoch": 0.647625009781673, "grad_norm": 0.0, "learning_rate": 5.83558288611119e-06, "loss": 1.0379, "step": 16552 }, { "epoch": 0.6476641364739025, "grad_norm": 0.0, "learning_rate": 5.834430800649477e-06, "loss": 1.0869, "step": 16553 }, { "epoch": 0.6477032631661319, "grad_norm": 0.0, "learning_rate": 5.833278782081585e-06, "loss": 0.9055, "step": 16554 }, { "epoch": 0.6477423898583614, "grad_norm": 0.0, "learning_rate": 5.832126830426012e-06, "loss": 0.9981, "step": 16555 }, { "epoch": 0.6477815165505908, "grad_norm": 0.0, "learning_rate": 5.830974945701262e-06, "loss": 1.046, "step": 16556 }, { "epoch": 0.6478206432428203, "grad_norm": 0.0, "learning_rate": 5.829823127925832e-06, "loss": 0.9351, "step": 16557 }, { "epoch": 0.6478597699350497, "grad_norm": 0.0, "learning_rate": 5.828671377118213e-06, "loss": 1.0073, "step": 16558 }, { "epoch": 0.6478988966272792, "grad_norm": 0.0, "learning_rate": 5.827519693296906e-06, "loss": 0.9592, "step": 16559 }, { "epoch": 0.6479380233195086, "grad_norm": 0.0, "learning_rate": 5.826368076480409e-06, "loss": 1.0582, "step": 16560 }, { "epoch": 0.6479771500117381, "grad_norm": 0.0, "learning_rate": 5.825216526687205e-06, "loss": 1.0951, "step": 16561 }, { "epoch": 0.6480162767039674, "grad_norm": 0.0, "learning_rate": 5.824065043935794e-06, "loss": 1.0748, "step": 16562 }, { "epoch": 0.6480554033961969, "grad_norm": 0.0, "learning_rate": 5.822913628244664e-06, "loss": 0.8537, "step": 16563 }, { "epoch": 0.6480945300884263, "grad_norm": 0.0, "learning_rate": 5.821762279632312e-06, "loss": 0.9517, "step": 16564 }, { "epoch": 0.6481336567806557, "grad_norm": 0.0, "learning_rate": 5.820610998117218e-06, "loss": 1.0408, "step": 16565 }, { "epoch": 0.6481727834728852, "grad_norm": 0.0, "learning_rate": 5.819459783717878e-06, "loss": 0.9558, "step": 16566 }, { "epoch": 0.6482119101651146, "grad_norm": 0.0, "learning_rate": 5.818308636452767e-06, "loss": 0.9091, "step": 16567 }, { "epoch": 0.6482510368573441, "grad_norm": 0.0, "learning_rate": 5.817157556340387e-06, "loss": 0.9577, "step": 16568 }, { "epoch": 0.6482901635495735, "grad_norm": 0.0, "learning_rate": 5.816006543399213e-06, "loss": 0.9907, "step": 16569 }, { "epoch": 0.648329290241803, "grad_norm": 0.0, "learning_rate": 5.8148555976477335e-06, "loss": 0.8533, "step": 16570 }, { "epoch": 0.6483684169340324, "grad_norm": 0.0, "learning_rate": 5.813704719104422e-06, "loss": 0.9814, "step": 16571 }, { "epoch": 0.6484075436262619, "grad_norm": 0.0, "learning_rate": 5.812553907787774e-06, "loss": 0.9489, "step": 16572 }, { "epoch": 0.6484466703184912, "grad_norm": 0.0, "learning_rate": 5.811403163716259e-06, "loss": 0.9885, "step": 16573 }, { "epoch": 0.6484857970107207, "grad_norm": 0.0, "learning_rate": 5.810252486908366e-06, "loss": 0.9118, "step": 16574 }, { "epoch": 0.6485249237029501, "grad_norm": 0.0, "learning_rate": 5.809101877382562e-06, "loss": 1.0162, "step": 16575 }, { "epoch": 0.6485640503951796, "grad_norm": 0.0, "learning_rate": 5.807951335157332e-06, "loss": 1.0195, "step": 16576 }, { "epoch": 0.648603177087409, "grad_norm": 0.0, "learning_rate": 5.80680086025115e-06, "loss": 1.0288, "step": 16577 }, { "epoch": 0.6486423037796385, "grad_norm": 0.0, "learning_rate": 5.805650452682496e-06, "loss": 0.96, "step": 16578 }, { "epoch": 0.6486814304718679, "grad_norm": 0.0, "learning_rate": 5.804500112469835e-06, "loss": 0.861, "step": 16579 }, { "epoch": 0.6487205571640974, "grad_norm": 0.0, "learning_rate": 5.803349839631647e-06, "loss": 1.0897, "step": 16580 }, { "epoch": 0.6487596838563268, "grad_norm": 0.0, "learning_rate": 5.802199634186401e-06, "loss": 1.0559, "step": 16581 }, { "epoch": 0.6487988105485563, "grad_norm": 0.0, "learning_rate": 5.801049496152571e-06, "loss": 0.9442, "step": 16582 }, { "epoch": 0.6488379372407856, "grad_norm": 0.0, "learning_rate": 5.799899425548623e-06, "loss": 1.0341, "step": 16583 }, { "epoch": 0.6488770639330151, "grad_norm": 0.0, "learning_rate": 5.79874942239303e-06, "loss": 1.0006, "step": 16584 }, { "epoch": 0.6489161906252445, "grad_norm": 0.0, "learning_rate": 5.797599486704249e-06, "loss": 0.8445, "step": 16585 }, { "epoch": 0.648955317317474, "grad_norm": 0.0, "learning_rate": 5.796449618500761e-06, "loss": 1.0887, "step": 16586 }, { "epoch": 0.6489944440097034, "grad_norm": 0.0, "learning_rate": 5.795299817801024e-06, "loss": 0.941, "step": 16587 }, { "epoch": 0.6490335707019329, "grad_norm": 0.0, "learning_rate": 5.7941500846235045e-06, "loss": 0.9322, "step": 16588 }, { "epoch": 0.6490726973941623, "grad_norm": 0.0, "learning_rate": 5.793000418986656e-06, "loss": 0.9408, "step": 16589 }, { "epoch": 0.6491118240863918, "grad_norm": 0.0, "learning_rate": 5.791850820908959e-06, "loss": 1.0439, "step": 16590 }, { "epoch": 0.6491509507786212, "grad_norm": 0.0, "learning_rate": 5.79070129040886e-06, "loss": 1.0487, "step": 16591 }, { "epoch": 0.6491900774708507, "grad_norm": 0.0, "learning_rate": 5.789551827504827e-06, "loss": 1.0054, "step": 16592 }, { "epoch": 0.64922920416308, "grad_norm": 0.0, "learning_rate": 5.788402432215312e-06, "loss": 0.965, "step": 16593 }, { "epoch": 0.6492683308553094, "grad_norm": 0.0, "learning_rate": 5.787253104558776e-06, "loss": 1.0035, "step": 16594 }, { "epoch": 0.6493074575475389, "grad_norm": 0.0, "learning_rate": 5.786103844553679e-06, "loss": 0.9769, "step": 16595 }, { "epoch": 0.6493465842397683, "grad_norm": 0.0, "learning_rate": 5.784954652218475e-06, "loss": 1.0087, "step": 16596 }, { "epoch": 0.6493857109319978, "grad_norm": 0.0, "learning_rate": 5.783805527571616e-06, "loss": 0.9897, "step": 16597 }, { "epoch": 0.6494248376242272, "grad_norm": 0.0, "learning_rate": 5.782656470631553e-06, "loss": 1.0418, "step": 16598 }, { "epoch": 0.6494639643164567, "grad_norm": 0.0, "learning_rate": 5.78150748141675e-06, "loss": 0.9574, "step": 16599 }, { "epoch": 0.6495030910086861, "grad_norm": 0.0, "learning_rate": 5.780358559945647e-06, "loss": 1.0104, "step": 16600 }, { "epoch": 0.6495422177009156, "grad_norm": 0.0, "learning_rate": 5.779209706236696e-06, "loss": 1.0524, "step": 16601 }, { "epoch": 0.649581344393145, "grad_norm": 0.0, "learning_rate": 5.7780609203083525e-06, "loss": 1.0135, "step": 16602 }, { "epoch": 0.6496204710853745, "grad_norm": 0.0, "learning_rate": 5.7769122021790565e-06, "loss": 0.9566, "step": 16603 }, { "epoch": 0.6496595977776038, "grad_norm": 0.0, "learning_rate": 5.7757635518672595e-06, "loss": 0.8969, "step": 16604 }, { "epoch": 0.6496987244698333, "grad_norm": 0.0, "learning_rate": 5.77461496939141e-06, "loss": 0.9298, "step": 16605 }, { "epoch": 0.6497378511620627, "grad_norm": 0.0, "learning_rate": 5.773466454769946e-06, "loss": 0.9804, "step": 16606 }, { "epoch": 0.6497769778542922, "grad_norm": 0.0, "learning_rate": 5.772318008021314e-06, "loss": 0.9997, "step": 16607 }, { "epoch": 0.6498161045465216, "grad_norm": 0.0, "learning_rate": 5.771169629163957e-06, "loss": 0.9073, "step": 16608 }, { "epoch": 0.6498552312387511, "grad_norm": 0.0, "learning_rate": 5.7700213182163215e-06, "loss": 0.9798, "step": 16609 }, { "epoch": 0.6498943579309805, "grad_norm": 0.0, "learning_rate": 5.768873075196838e-06, "loss": 0.9027, "step": 16610 }, { "epoch": 0.64993348462321, "grad_norm": 0.0, "learning_rate": 5.7677249001239565e-06, "loss": 0.9003, "step": 16611 }, { "epoch": 0.6499726113154394, "grad_norm": 0.0, "learning_rate": 5.766576793016101e-06, "loss": 0.8256, "step": 16612 }, { "epoch": 0.6500117380076689, "grad_norm": 0.0, "learning_rate": 5.7654287538917244e-06, "loss": 1.0099, "step": 16613 }, { "epoch": 0.6500508646998983, "grad_norm": 0.0, "learning_rate": 5.764280782769254e-06, "loss": 0.9669, "step": 16614 }, { "epoch": 0.6500899913921278, "grad_norm": 0.0, "learning_rate": 5.7631328796671285e-06, "loss": 1.0211, "step": 16615 }, { "epoch": 0.6501291180843571, "grad_norm": 0.0, "learning_rate": 5.761985044603773e-06, "loss": 1.1481, "step": 16616 }, { "epoch": 0.6501682447765866, "grad_norm": 0.0, "learning_rate": 5.7608372775976355e-06, "loss": 0.872, "step": 16617 }, { "epoch": 0.650207371468816, "grad_norm": 0.0, "learning_rate": 5.7596895786671335e-06, "loss": 1.0822, "step": 16618 }, { "epoch": 0.6502464981610455, "grad_norm": 0.0, "learning_rate": 5.75854194783071e-06, "loss": 0.9386, "step": 16619 }, { "epoch": 0.6502856248532749, "grad_norm": 0.0, "learning_rate": 5.757394385106778e-06, "loss": 1.0089, "step": 16620 }, { "epoch": 0.6503247515455044, "grad_norm": 0.0, "learning_rate": 5.756246890513784e-06, "loss": 0.9001, "step": 16621 }, { "epoch": 0.6503638782377338, "grad_norm": 0.0, "learning_rate": 5.755099464070144e-06, "loss": 0.9517, "step": 16622 }, { "epoch": 0.6504030049299632, "grad_norm": 0.0, "learning_rate": 5.753952105794291e-06, "loss": 0.9662, "step": 16623 }, { "epoch": 0.6504421316221927, "grad_norm": 0.0, "learning_rate": 5.752804815704642e-06, "loss": 1.0283, "step": 16624 }, { "epoch": 0.650481258314422, "grad_norm": 0.0, "learning_rate": 5.751657593819627e-06, "loss": 1.0173, "step": 16625 }, { "epoch": 0.6505203850066515, "grad_norm": 0.0, "learning_rate": 5.7505104401576676e-06, "loss": 0.8837, "step": 16626 }, { "epoch": 0.6505595116988809, "grad_norm": 0.0, "learning_rate": 5.749363354737188e-06, "loss": 0.9841, "step": 16627 }, { "epoch": 0.6505986383911104, "grad_norm": 0.0, "learning_rate": 5.748216337576604e-06, "loss": 1.0646, "step": 16628 }, { "epoch": 0.6506377650833398, "grad_norm": 0.0, "learning_rate": 5.747069388694337e-06, "loss": 0.9779, "step": 16629 }, { "epoch": 0.6506768917755693, "grad_norm": 0.0, "learning_rate": 5.7459225081088076e-06, "loss": 0.8804, "step": 16630 }, { "epoch": 0.6507160184677987, "grad_norm": 0.0, "learning_rate": 5.744775695838435e-06, "loss": 1.174, "step": 16631 }, { "epoch": 0.6507551451600282, "grad_norm": 0.0, "learning_rate": 5.7436289519016285e-06, "loss": 0.9639, "step": 16632 }, { "epoch": 0.6507942718522576, "grad_norm": 0.0, "learning_rate": 5.742482276316812e-06, "loss": 1.0008, "step": 16633 }, { "epoch": 0.6508333985444871, "grad_norm": 0.0, "learning_rate": 5.741335669102388e-06, "loss": 1.0335, "step": 16634 }, { "epoch": 0.6508725252367165, "grad_norm": 0.0, "learning_rate": 5.740189130276783e-06, "loss": 1.0644, "step": 16635 }, { "epoch": 0.650911651928946, "grad_norm": 0.0, "learning_rate": 5.7390426598584e-06, "loss": 1.1005, "step": 16636 }, { "epoch": 0.6509507786211753, "grad_norm": 0.0, "learning_rate": 5.737896257865656e-06, "loss": 1.0087, "step": 16637 }, { "epoch": 0.6509899053134048, "grad_norm": 0.0, "learning_rate": 5.736749924316954e-06, "loss": 1.0836, "step": 16638 }, { "epoch": 0.6510290320056342, "grad_norm": 0.0, "learning_rate": 5.735603659230705e-06, "loss": 0.9849, "step": 16639 }, { "epoch": 0.6510681586978637, "grad_norm": 0.0, "learning_rate": 5.734457462625318e-06, "loss": 1.0388, "step": 16640 }, { "epoch": 0.6511072853900931, "grad_norm": 0.0, "learning_rate": 5.733311334519204e-06, "loss": 1.0641, "step": 16641 }, { "epoch": 0.6511464120823226, "grad_norm": 0.0, "learning_rate": 5.732165274930759e-06, "loss": 0.913, "step": 16642 }, { "epoch": 0.651185538774552, "grad_norm": 0.0, "learning_rate": 5.731019283878391e-06, "loss": 1.0015, "step": 16643 }, { "epoch": 0.6512246654667815, "grad_norm": 0.0, "learning_rate": 5.7298733613805094e-06, "loss": 1.1379, "step": 16644 }, { "epoch": 0.6512637921590109, "grad_norm": 0.0, "learning_rate": 5.7287275074555045e-06, "loss": 0.9323, "step": 16645 }, { "epoch": 0.6513029188512404, "grad_norm": 0.0, "learning_rate": 5.727581722121787e-06, "loss": 0.9561, "step": 16646 }, { "epoch": 0.6513420455434697, "grad_norm": 0.0, "learning_rate": 5.7264360053977506e-06, "loss": 0.9591, "step": 16647 }, { "epoch": 0.6513811722356992, "grad_norm": 0.0, "learning_rate": 5.725290357301803e-06, "loss": 0.7559, "step": 16648 }, { "epoch": 0.6514202989279286, "grad_norm": 0.0, "learning_rate": 5.7241447778523295e-06, "loss": 0.9471, "step": 16649 }, { "epoch": 0.651459425620158, "grad_norm": 0.0, "learning_rate": 5.722999267067738e-06, "loss": 0.8928, "step": 16650 }, { "epoch": 0.6514985523123875, "grad_norm": 0.0, "learning_rate": 5.721853824966414e-06, "loss": 0.9519, "step": 16651 }, { "epoch": 0.6515376790046169, "grad_norm": 0.0, "learning_rate": 5.720708451566759e-06, "loss": 1.0066, "step": 16652 }, { "epoch": 0.6515768056968464, "grad_norm": 0.0, "learning_rate": 5.719563146887163e-06, "loss": 0.9353, "step": 16653 }, { "epoch": 0.6516159323890758, "grad_norm": 0.0, "learning_rate": 5.7184179109460224e-06, "loss": 1.0592, "step": 16654 }, { "epoch": 0.6516550590813053, "grad_norm": 0.0, "learning_rate": 5.717272743761722e-06, "loss": 0.9064, "step": 16655 }, { "epoch": 0.6516941857735347, "grad_norm": 0.0, "learning_rate": 5.716127645352654e-06, "loss": 0.9878, "step": 16656 }, { "epoch": 0.6517333124657642, "grad_norm": 0.0, "learning_rate": 5.714982615737208e-06, "loss": 0.9684, "step": 16657 }, { "epoch": 0.6517724391579935, "grad_norm": 0.0, "learning_rate": 5.713837654933776e-06, "loss": 0.9334, "step": 16658 }, { "epoch": 0.651811565850223, "grad_norm": 0.0, "learning_rate": 5.712692762960736e-06, "loss": 1.0466, "step": 16659 }, { "epoch": 0.6518506925424524, "grad_norm": 0.0, "learning_rate": 5.711547939836484e-06, "loss": 1.0942, "step": 16660 }, { "epoch": 0.6518898192346819, "grad_norm": 0.0, "learning_rate": 5.710403185579389e-06, "loss": 1.0214, "step": 16661 }, { "epoch": 0.6519289459269113, "grad_norm": 0.0, "learning_rate": 5.70925850020785e-06, "loss": 0.9553, "step": 16662 }, { "epoch": 0.6519680726191408, "grad_norm": 0.0, "learning_rate": 5.708113883740241e-06, "loss": 0.8993, "step": 16663 }, { "epoch": 0.6520071993113702, "grad_norm": 0.0, "learning_rate": 5.706969336194948e-06, "loss": 0.9963, "step": 16664 }, { "epoch": 0.6520463260035997, "grad_norm": 0.0, "learning_rate": 5.705824857590341e-06, "loss": 0.957, "step": 16665 }, { "epoch": 0.6520854526958291, "grad_norm": 0.0, "learning_rate": 5.704680447944812e-06, "loss": 0.9829, "step": 16666 }, { "epoch": 0.6521245793880586, "grad_norm": 0.0, "learning_rate": 5.70353610727673e-06, "loss": 0.9966, "step": 16667 }, { "epoch": 0.652163706080288, "grad_norm": 0.0, "learning_rate": 5.702391835604479e-06, "loss": 0.9139, "step": 16668 }, { "epoch": 0.6522028327725174, "grad_norm": 0.0, "learning_rate": 5.701247632946425e-06, "loss": 1.1136, "step": 16669 }, { "epoch": 0.6522419594647468, "grad_norm": 0.0, "learning_rate": 5.700103499320948e-06, "loss": 1.0999, "step": 16670 }, { "epoch": 0.6522810861569763, "grad_norm": 0.0, "learning_rate": 5.698959434746421e-06, "loss": 1.0325, "step": 16671 }, { "epoch": 0.6523202128492057, "grad_norm": 0.0, "learning_rate": 5.6978154392412186e-06, "loss": 1.0729, "step": 16672 }, { "epoch": 0.6523593395414352, "grad_norm": 0.0, "learning_rate": 5.696671512823706e-06, "loss": 1.0416, "step": 16673 }, { "epoch": 0.6523984662336646, "grad_norm": 0.0, "learning_rate": 5.695527655512256e-06, "loss": 1.0412, "step": 16674 }, { "epoch": 0.6524375929258941, "grad_norm": 0.0, "learning_rate": 5.694383867325238e-06, "loss": 0.937, "step": 16675 }, { "epoch": 0.6524767196181235, "grad_norm": 0.0, "learning_rate": 5.693240148281025e-06, "loss": 0.9878, "step": 16676 }, { "epoch": 0.652515846310353, "grad_norm": 0.0, "learning_rate": 5.692096498397974e-06, "loss": 0.9389, "step": 16677 }, { "epoch": 0.6525549730025824, "grad_norm": 0.0, "learning_rate": 5.690952917694455e-06, "loss": 1.13, "step": 16678 }, { "epoch": 0.6525940996948117, "grad_norm": 0.0, "learning_rate": 5.689809406188832e-06, "loss": 1.0785, "step": 16679 }, { "epoch": 0.6526332263870412, "grad_norm": 0.0, "learning_rate": 5.688665963899473e-06, "loss": 0.9812, "step": 16680 }, { "epoch": 0.6526723530792706, "grad_norm": 0.0, "learning_rate": 5.687522590844734e-06, "loss": 0.8878, "step": 16681 }, { "epoch": 0.6527114797715001, "grad_norm": 0.0, "learning_rate": 5.68637928704298e-06, "loss": 0.9825, "step": 16682 }, { "epoch": 0.6527506064637295, "grad_norm": 0.0, "learning_rate": 5.6852360525125686e-06, "loss": 0.9097, "step": 16683 }, { "epoch": 0.652789733155959, "grad_norm": 0.0, "learning_rate": 5.684092887271857e-06, "loss": 0.9178, "step": 16684 }, { "epoch": 0.6528288598481884, "grad_norm": 0.0, "learning_rate": 5.6829497913392055e-06, "loss": 1.0805, "step": 16685 }, { "epoch": 0.6528679865404179, "grad_norm": 0.0, "learning_rate": 5.681806764732976e-06, "loss": 1.2478, "step": 16686 }, { "epoch": 0.6529071132326473, "grad_norm": 0.0, "learning_rate": 5.680663807471514e-06, "loss": 0.9857, "step": 16687 }, { "epoch": 0.6529462399248768, "grad_norm": 0.0, "learning_rate": 5.6795209195731785e-06, "loss": 1.0363, "step": 16688 }, { "epoch": 0.6529853666171062, "grad_norm": 0.0, "learning_rate": 5.6783781010563275e-06, "loss": 0.9389, "step": 16689 }, { "epoch": 0.6530244933093357, "grad_norm": 0.0, "learning_rate": 5.677235351939305e-06, "loss": 0.9882, "step": 16690 }, { "epoch": 0.653063620001565, "grad_norm": 0.0, "learning_rate": 5.676092672240465e-06, "loss": 1.0833, "step": 16691 }, { "epoch": 0.6531027466937945, "grad_norm": 0.0, "learning_rate": 5.6749500619781595e-06, "loss": 0.9275, "step": 16692 }, { "epoch": 0.6531418733860239, "grad_norm": 0.0, "learning_rate": 5.673807521170741e-06, "loss": 1.0975, "step": 16693 }, { "epoch": 0.6531810000782534, "grad_norm": 0.0, "learning_rate": 5.672665049836548e-06, "loss": 0.9933, "step": 16694 }, { "epoch": 0.6532201267704828, "grad_norm": 0.0, "learning_rate": 5.671522647993937e-06, "loss": 1.0527, "step": 16695 }, { "epoch": 0.6532592534627123, "grad_norm": 0.0, "learning_rate": 5.6703803156612395e-06, "loss": 1.1046, "step": 16696 }, { "epoch": 0.6532983801549417, "grad_norm": 0.0, "learning_rate": 5.669238052856816e-06, "loss": 0.8994, "step": 16697 }, { "epoch": 0.6533375068471712, "grad_norm": 0.0, "learning_rate": 5.668095859598999e-06, "loss": 0.9842, "step": 16698 }, { "epoch": 0.6533766335394006, "grad_norm": 0.0, "learning_rate": 5.666953735906138e-06, "loss": 1.0343, "step": 16699 }, { "epoch": 0.6534157602316301, "grad_norm": 0.0, "learning_rate": 5.665811681796567e-06, "loss": 0.9095, "step": 16700 }, { "epoch": 0.6534548869238594, "grad_norm": 0.0, "learning_rate": 5.66466969728863e-06, "loss": 1.013, "step": 16701 }, { "epoch": 0.6534940136160889, "grad_norm": 0.0, "learning_rate": 5.663527782400664e-06, "loss": 1.0244, "step": 16702 }, { "epoch": 0.6535331403083183, "grad_norm": 0.0, "learning_rate": 5.662385937151013e-06, "loss": 0.9279, "step": 16703 }, { "epoch": 0.6535722670005478, "grad_norm": 0.0, "learning_rate": 5.661244161558004e-06, "loss": 1.0458, "step": 16704 }, { "epoch": 0.6536113936927772, "grad_norm": 0.0, "learning_rate": 5.660102455639978e-06, "loss": 1.1083, "step": 16705 }, { "epoch": 0.6536505203850067, "grad_norm": 0.0, "learning_rate": 5.658960819415267e-06, "loss": 0.9049, "step": 16706 }, { "epoch": 0.6536896470772361, "grad_norm": 0.0, "learning_rate": 5.657819252902209e-06, "loss": 1.0211, "step": 16707 }, { "epoch": 0.6537287737694655, "grad_norm": 0.0, "learning_rate": 5.65667775611913e-06, "loss": 0.8492, "step": 16708 }, { "epoch": 0.653767900461695, "grad_norm": 0.0, "learning_rate": 5.655536329084368e-06, "loss": 0.9878, "step": 16709 }, { "epoch": 0.6538070271539244, "grad_norm": 0.0, "learning_rate": 5.654394971816239e-06, "loss": 0.9247, "step": 16710 }, { "epoch": 0.6538461538461539, "grad_norm": 0.0, "learning_rate": 5.653253684333091e-06, "loss": 0.9169, "step": 16711 }, { "epoch": 0.6538852805383832, "grad_norm": 0.0, "learning_rate": 5.652112466653237e-06, "loss": 0.9543, "step": 16712 }, { "epoch": 0.6539244072306127, "grad_norm": 0.0, "learning_rate": 5.650971318795013e-06, "loss": 1.0327, "step": 16713 }, { "epoch": 0.6539635339228421, "grad_norm": 0.0, "learning_rate": 5.649830240776733e-06, "loss": 0.9651, "step": 16714 }, { "epoch": 0.6540026606150716, "grad_norm": 0.0, "learning_rate": 5.648689232616735e-06, "loss": 1.0222, "step": 16715 }, { "epoch": 0.654041787307301, "grad_norm": 0.0, "learning_rate": 5.647548294333332e-06, "loss": 1.0136, "step": 16716 }, { "epoch": 0.6540809139995305, "grad_norm": 0.0, "learning_rate": 5.646407425944853e-06, "loss": 0.9893, "step": 16717 }, { "epoch": 0.6541200406917599, "grad_norm": 0.0, "learning_rate": 5.645266627469612e-06, "loss": 1.0136, "step": 16718 }, { "epoch": 0.6541591673839894, "grad_norm": 0.0, "learning_rate": 5.644125898925932e-06, "loss": 0.9485, "step": 16719 }, { "epoch": 0.6541982940762188, "grad_norm": 0.0, "learning_rate": 5.642985240332131e-06, "loss": 1.1038, "step": 16720 }, { "epoch": 0.6542374207684483, "grad_norm": 0.0, "learning_rate": 5.6418446517065316e-06, "loss": 0.9553, "step": 16721 }, { "epoch": 0.6542765474606776, "grad_norm": 0.0, "learning_rate": 5.640704133067443e-06, "loss": 0.9686, "step": 16722 }, { "epoch": 0.6543156741529071, "grad_norm": 0.0, "learning_rate": 5.639563684433182e-06, "loss": 1.088, "step": 16723 }, { "epoch": 0.6543548008451365, "grad_norm": 0.0, "learning_rate": 5.638423305822066e-06, "loss": 1.0007, "step": 16724 }, { "epoch": 0.654393927537366, "grad_norm": 0.0, "learning_rate": 5.637282997252409e-06, "loss": 0.9171, "step": 16725 }, { "epoch": 0.6544330542295954, "grad_norm": 0.0, "learning_rate": 5.636142758742518e-06, "loss": 1.0522, "step": 16726 }, { "epoch": 0.6544721809218249, "grad_norm": 0.0, "learning_rate": 5.635002590310708e-06, "loss": 0.9946, "step": 16727 }, { "epoch": 0.6545113076140543, "grad_norm": 0.0, "learning_rate": 5.633862491975284e-06, "loss": 0.99, "step": 16728 }, { "epoch": 0.6545504343062838, "grad_norm": 0.0, "learning_rate": 5.6327224637545574e-06, "loss": 0.9673, "step": 16729 }, { "epoch": 0.6545895609985132, "grad_norm": 0.0, "learning_rate": 5.631582505666835e-06, "loss": 1.113, "step": 16730 }, { "epoch": 0.6546286876907427, "grad_norm": 0.0, "learning_rate": 5.630442617730427e-06, "loss": 1.0164, "step": 16731 }, { "epoch": 0.6546678143829721, "grad_norm": 0.0, "learning_rate": 5.629302799963633e-06, "loss": 1.0515, "step": 16732 }, { "epoch": 0.6547069410752016, "grad_norm": 0.0, "learning_rate": 5.628163052384759e-06, "loss": 0.9334, "step": 16733 }, { "epoch": 0.6547460677674309, "grad_norm": 0.0, "learning_rate": 5.62702337501211e-06, "loss": 0.9127, "step": 16734 }, { "epoch": 0.6547851944596604, "grad_norm": 0.0, "learning_rate": 5.6258837678639845e-06, "loss": 0.8934, "step": 16735 }, { "epoch": 0.6548243211518898, "grad_norm": 0.0, "learning_rate": 5.624744230958683e-06, "loss": 0.9432, "step": 16736 }, { "epoch": 0.6548634478441192, "grad_norm": 0.0, "learning_rate": 5.623604764314507e-06, "loss": 0.9915, "step": 16737 }, { "epoch": 0.6549025745363487, "grad_norm": 0.0, "learning_rate": 5.622465367949759e-06, "loss": 0.8807, "step": 16738 }, { "epoch": 0.6549417012285781, "grad_norm": 0.0, "learning_rate": 5.621326041882727e-06, "loss": 1.033, "step": 16739 }, { "epoch": 0.6549808279208076, "grad_norm": 0.0, "learning_rate": 5.620186786131713e-06, "loss": 0.994, "step": 16740 }, { "epoch": 0.655019954613037, "grad_norm": 0.0, "learning_rate": 5.619047600715011e-06, "loss": 0.9011, "step": 16741 }, { "epoch": 0.6550590813052665, "grad_norm": 0.0, "learning_rate": 5.617908485650918e-06, "loss": 0.994, "step": 16742 }, { "epoch": 0.6550982079974959, "grad_norm": 0.0, "learning_rate": 5.616769440957719e-06, "loss": 0.9603, "step": 16743 }, { "epoch": 0.6551373346897253, "grad_norm": 0.0, "learning_rate": 5.6156304666537166e-06, "loss": 0.8468, "step": 16744 }, { "epoch": 0.6551764613819547, "grad_norm": 0.0, "learning_rate": 5.614491562757185e-06, "loss": 1.0632, "step": 16745 }, { "epoch": 0.6552155880741842, "grad_norm": 0.0, "learning_rate": 5.613352729286432e-06, "loss": 0.9782, "step": 16746 }, { "epoch": 0.6552547147664136, "grad_norm": 0.0, "learning_rate": 5.6122139662597345e-06, "loss": 1.1292, "step": 16747 }, { "epoch": 0.6552938414586431, "grad_norm": 0.0, "learning_rate": 5.611075273695385e-06, "loss": 1.1208, "step": 16748 }, { "epoch": 0.6553329681508725, "grad_norm": 0.0, "learning_rate": 5.609936651611662e-06, "loss": 0.9826, "step": 16749 }, { "epoch": 0.655372094843102, "grad_norm": 0.0, "learning_rate": 5.608798100026858e-06, "loss": 0.9543, "step": 16750 }, { "epoch": 0.6554112215353314, "grad_norm": 0.0, "learning_rate": 5.607659618959253e-06, "loss": 0.9625, "step": 16751 }, { "epoch": 0.6554503482275609, "grad_norm": 0.0, "learning_rate": 5.6065212084271344e-06, "loss": 0.9623, "step": 16752 }, { "epoch": 0.6554894749197903, "grad_norm": 0.0, "learning_rate": 5.6053828684487765e-06, "loss": 1.0076, "step": 16753 }, { "epoch": 0.6555286016120198, "grad_norm": 0.0, "learning_rate": 5.604244599042462e-06, "loss": 1.011, "step": 16754 }, { "epoch": 0.6555677283042491, "grad_norm": 0.0, "learning_rate": 5.603106400226472e-06, "loss": 1.0068, "step": 16755 }, { "epoch": 0.6556068549964786, "grad_norm": 0.0, "learning_rate": 5.601968272019087e-06, "loss": 1.0608, "step": 16756 }, { "epoch": 0.655645981688708, "grad_norm": 0.0, "learning_rate": 5.600830214438577e-06, "loss": 0.9782, "step": 16757 }, { "epoch": 0.6556851083809375, "grad_norm": 0.0, "learning_rate": 5.599692227503224e-06, "loss": 0.9361, "step": 16758 }, { "epoch": 0.6557242350731669, "grad_norm": 0.0, "learning_rate": 5.598554311231293e-06, "loss": 1.0001, "step": 16759 }, { "epoch": 0.6557633617653964, "grad_norm": 0.0, "learning_rate": 5.5974164656410725e-06, "loss": 0.9736, "step": 16760 }, { "epoch": 0.6558024884576258, "grad_norm": 0.0, "learning_rate": 5.596278690750822e-06, "loss": 1.0179, "step": 16761 }, { "epoch": 0.6558416151498553, "grad_norm": 0.0, "learning_rate": 5.595140986578823e-06, "loss": 1.0057, "step": 16762 }, { "epoch": 0.6558807418420847, "grad_norm": 0.0, "learning_rate": 5.5940033531433314e-06, "loss": 0.8877, "step": 16763 }, { "epoch": 0.655919868534314, "grad_norm": 0.0, "learning_rate": 5.5928657904626325e-06, "loss": 1.1679, "step": 16764 }, { "epoch": 0.6559589952265436, "grad_norm": 0.0, "learning_rate": 5.591728298554984e-06, "loss": 0.9917, "step": 16765 }, { "epoch": 0.6559981219187729, "grad_norm": 0.0, "learning_rate": 5.590590877438657e-06, "loss": 0.8846, "step": 16766 }, { "epoch": 0.6560372486110024, "grad_norm": 0.0, "learning_rate": 5.589453527131912e-06, "loss": 0.9992, "step": 16767 }, { "epoch": 0.6560763753032318, "grad_norm": 0.0, "learning_rate": 5.588316247653017e-06, "loss": 0.9904, "step": 16768 }, { "epoch": 0.6561155019954613, "grad_norm": 0.0, "learning_rate": 5.587179039020235e-06, "loss": 1.1024, "step": 16769 }, { "epoch": 0.6561546286876907, "grad_norm": 0.0, "learning_rate": 5.586041901251832e-06, "loss": 1.0864, "step": 16770 }, { "epoch": 0.6561937553799202, "grad_norm": 0.0, "learning_rate": 5.584904834366061e-06, "loss": 0.9757, "step": 16771 }, { "epoch": 0.6562328820721496, "grad_norm": 0.0, "learning_rate": 5.583767838381186e-06, "loss": 1.1052, "step": 16772 }, { "epoch": 0.6562720087643791, "grad_norm": 0.0, "learning_rate": 5.582630913315465e-06, "loss": 0.9493, "step": 16773 }, { "epoch": 0.6563111354566085, "grad_norm": 0.0, "learning_rate": 5.581494059187162e-06, "loss": 1.0811, "step": 16774 }, { "epoch": 0.656350262148838, "grad_norm": 0.0, "learning_rate": 5.580357276014522e-06, "loss": 1.0577, "step": 16775 }, { "epoch": 0.6563893888410673, "grad_norm": 0.0, "learning_rate": 5.5792205638158105e-06, "loss": 0.9847, "step": 16776 }, { "epoch": 0.6564285155332968, "grad_norm": 0.0, "learning_rate": 5.578083922609273e-06, "loss": 0.9894, "step": 16777 }, { "epoch": 0.6564676422255262, "grad_norm": 0.0, "learning_rate": 5.576947352413167e-06, "loss": 1.0532, "step": 16778 }, { "epoch": 0.6565067689177557, "grad_norm": 0.0, "learning_rate": 5.575810853245744e-06, "loss": 0.9259, "step": 16779 }, { "epoch": 0.6565458956099851, "grad_norm": 0.0, "learning_rate": 5.574674425125259e-06, "loss": 0.9747, "step": 16780 }, { "epoch": 0.6565850223022146, "grad_norm": 0.0, "learning_rate": 5.573538068069954e-06, "loss": 0.9744, "step": 16781 }, { "epoch": 0.656624148994444, "grad_norm": 0.0, "learning_rate": 5.572401782098081e-06, "loss": 1.1124, "step": 16782 }, { "epoch": 0.6566632756866735, "grad_norm": 0.0, "learning_rate": 5.5712655672278905e-06, "loss": 1.0727, "step": 16783 }, { "epoch": 0.6567024023789029, "grad_norm": 0.0, "learning_rate": 5.5701294234776215e-06, "loss": 0.9709, "step": 16784 }, { "epoch": 0.6567415290711324, "grad_norm": 0.0, "learning_rate": 5.568993350865522e-06, "loss": 0.9796, "step": 16785 }, { "epoch": 0.6567806557633618, "grad_norm": 0.0, "learning_rate": 5.5678573494098384e-06, "loss": 0.9283, "step": 16786 }, { "epoch": 0.6568197824555913, "grad_norm": 0.0, "learning_rate": 5.566721419128815e-06, "loss": 1.023, "step": 16787 }, { "epoch": 0.6568589091478206, "grad_norm": 0.0, "learning_rate": 5.565585560040688e-06, "loss": 1.0407, "step": 16788 }, { "epoch": 0.6568980358400501, "grad_norm": 0.0, "learning_rate": 5.564449772163704e-06, "loss": 0.953, "step": 16789 }, { "epoch": 0.6569371625322795, "grad_norm": 0.0, "learning_rate": 5.563314055516089e-06, "loss": 0.9584, "step": 16790 }, { "epoch": 0.656976289224509, "grad_norm": 0.0, "learning_rate": 5.5621784101161e-06, "loss": 1.0316, "step": 16791 }, { "epoch": 0.6570154159167384, "grad_norm": 0.0, "learning_rate": 5.5610428359819605e-06, "loss": 0.9921, "step": 16792 }, { "epoch": 0.6570545426089678, "grad_norm": 0.0, "learning_rate": 5.559907333131915e-06, "loss": 0.9875, "step": 16793 }, { "epoch": 0.6570936693011973, "grad_norm": 0.0, "learning_rate": 5.5587719015841856e-06, "loss": 1.1145, "step": 16794 }, { "epoch": 0.6571327959934267, "grad_norm": 0.0, "learning_rate": 5.557636541357023e-06, "loss": 0.9958, "step": 16795 }, { "epoch": 0.6571719226856562, "grad_norm": 0.0, "learning_rate": 5.556501252468647e-06, "loss": 0.882, "step": 16796 }, { "epoch": 0.6572110493778855, "grad_norm": 0.0, "learning_rate": 5.555366034937295e-06, "loss": 1.0179, "step": 16797 }, { "epoch": 0.657250176070115, "grad_norm": 0.0, "learning_rate": 5.5542308887811934e-06, "loss": 1.0649, "step": 16798 }, { "epoch": 0.6572893027623444, "grad_norm": 0.0, "learning_rate": 5.553095814018572e-06, "loss": 1.059, "step": 16799 }, { "epoch": 0.6573284294545739, "grad_norm": 0.0, "learning_rate": 5.551960810667658e-06, "loss": 1.0562, "step": 16800 }, { "epoch": 0.6573675561468033, "grad_norm": 0.0, "learning_rate": 5.550825878746686e-06, "loss": 1.1271, "step": 16801 }, { "epoch": 0.6574066828390328, "grad_norm": 0.0, "learning_rate": 5.549691018273871e-06, "loss": 0.9961, "step": 16802 }, { "epoch": 0.6574458095312622, "grad_norm": 0.0, "learning_rate": 5.548556229267441e-06, "loss": 0.976, "step": 16803 }, { "epoch": 0.6574849362234917, "grad_norm": 0.0, "learning_rate": 5.547421511745619e-06, "loss": 1.0322, "step": 16804 }, { "epoch": 0.6575240629157211, "grad_norm": 0.0, "learning_rate": 5.546286865726633e-06, "loss": 1.0228, "step": 16805 }, { "epoch": 0.6575631896079506, "grad_norm": 0.0, "learning_rate": 5.545152291228696e-06, "loss": 0.9663, "step": 16806 }, { "epoch": 0.65760231630018, "grad_norm": 0.0, "learning_rate": 5.5440177882700345e-06, "loss": 0.9773, "step": 16807 }, { "epoch": 0.6576414429924095, "grad_norm": 0.0, "learning_rate": 5.542883356868856e-06, "loss": 1.0372, "step": 16808 }, { "epoch": 0.6576805696846388, "grad_norm": 0.0, "learning_rate": 5.541748997043392e-06, "loss": 0.9148, "step": 16809 }, { "epoch": 0.6577196963768683, "grad_norm": 0.0, "learning_rate": 5.540614708811849e-06, "loss": 1.0558, "step": 16810 }, { "epoch": 0.6577588230690977, "grad_norm": 0.0, "learning_rate": 5.53948049219245e-06, "loss": 1.0858, "step": 16811 }, { "epoch": 0.6577979497613272, "grad_norm": 0.0, "learning_rate": 5.5383463472033985e-06, "loss": 0.9424, "step": 16812 }, { "epoch": 0.6578370764535566, "grad_norm": 0.0, "learning_rate": 5.537212273862922e-06, "loss": 0.9368, "step": 16813 }, { "epoch": 0.6578762031457861, "grad_norm": 0.0, "learning_rate": 5.536078272189218e-06, "loss": 0.8394, "step": 16814 }, { "epoch": 0.6579153298380155, "grad_norm": 0.0, "learning_rate": 5.534944342200508e-06, "loss": 0.9951, "step": 16815 }, { "epoch": 0.657954456530245, "grad_norm": 0.0, "learning_rate": 5.533810483914994e-06, "loss": 1.0064, "step": 16816 }, { "epoch": 0.6579935832224744, "grad_norm": 0.0, "learning_rate": 5.532676697350887e-06, "loss": 0.9243, "step": 16817 }, { "epoch": 0.6580327099147039, "grad_norm": 0.0, "learning_rate": 5.5315429825263935e-06, "loss": 0.8653, "step": 16818 }, { "epoch": 0.6580718366069332, "grad_norm": 0.0, "learning_rate": 5.530409339459724e-06, "loss": 1.1233, "step": 16819 }, { "epoch": 0.6581109632991627, "grad_norm": 0.0, "learning_rate": 5.529275768169077e-06, "loss": 0.9736, "step": 16820 }, { "epoch": 0.6581500899913921, "grad_norm": 0.0, "learning_rate": 5.528142268672659e-06, "loss": 1.0402, "step": 16821 }, { "epoch": 0.6581892166836215, "grad_norm": 0.0, "learning_rate": 5.527008840988676e-06, "loss": 0.8987, "step": 16822 }, { "epoch": 0.658228343375851, "grad_norm": 0.0, "learning_rate": 5.525875485135321e-06, "loss": 0.9654, "step": 16823 }, { "epoch": 0.6582674700680804, "grad_norm": 0.0, "learning_rate": 5.524742201130801e-06, "loss": 1.0745, "step": 16824 }, { "epoch": 0.6583065967603099, "grad_norm": 0.0, "learning_rate": 5.523608988993317e-06, "loss": 1.0413, "step": 16825 }, { "epoch": 0.6583457234525393, "grad_norm": 0.0, "learning_rate": 5.52247584874106e-06, "loss": 0.8851, "step": 16826 }, { "epoch": 0.6583848501447688, "grad_norm": 0.0, "learning_rate": 5.521342780392229e-06, "loss": 1.0575, "step": 16827 }, { "epoch": 0.6584239768369982, "grad_norm": 0.0, "learning_rate": 5.520209783965026e-06, "loss": 1.1201, "step": 16828 }, { "epoch": 0.6584631035292277, "grad_norm": 0.0, "learning_rate": 5.519076859477635e-06, "loss": 0.98, "step": 16829 }, { "epoch": 0.658502230221457, "grad_norm": 0.0, "learning_rate": 5.517944006948257e-06, "loss": 1.0574, "step": 16830 }, { "epoch": 0.6585413569136865, "grad_norm": 0.0, "learning_rate": 5.51681122639508e-06, "loss": 1.1426, "step": 16831 }, { "epoch": 0.6585804836059159, "grad_norm": 0.0, "learning_rate": 5.515678517836301e-06, "loss": 0.9503, "step": 16832 }, { "epoch": 0.6586196102981454, "grad_norm": 0.0, "learning_rate": 5.514545881290103e-06, "loss": 1.1164, "step": 16833 }, { "epoch": 0.6586587369903748, "grad_norm": 0.0, "learning_rate": 5.51341331677468e-06, "loss": 0.9835, "step": 16834 }, { "epoch": 0.6586978636826043, "grad_norm": 0.0, "learning_rate": 5.51228082430821e-06, "loss": 0.9919, "step": 16835 }, { "epoch": 0.6587369903748337, "grad_norm": 0.0, "learning_rate": 5.511148403908895e-06, "loss": 1.0128, "step": 16836 }, { "epoch": 0.6587761170670632, "grad_norm": 0.0, "learning_rate": 5.510016055594907e-06, "loss": 0.9721, "step": 16837 }, { "epoch": 0.6588152437592926, "grad_norm": 0.0, "learning_rate": 5.50888377938444e-06, "loss": 1.0777, "step": 16838 }, { "epoch": 0.6588543704515221, "grad_norm": 0.0, "learning_rate": 5.507751575295662e-06, "loss": 1.0324, "step": 16839 }, { "epoch": 0.6588934971437514, "grad_norm": 0.0, "learning_rate": 5.506619443346775e-06, "loss": 0.9766, "step": 16840 }, { "epoch": 0.658932623835981, "grad_norm": 0.0, "learning_rate": 5.505487383555943e-06, "loss": 0.8975, "step": 16841 }, { "epoch": 0.6589717505282103, "grad_norm": 0.0, "learning_rate": 5.504355395941356e-06, "loss": 1.0982, "step": 16842 }, { "epoch": 0.6590108772204398, "grad_norm": 0.0, "learning_rate": 5.503223480521185e-06, "loss": 0.9926, "step": 16843 }, { "epoch": 0.6590500039126692, "grad_norm": 0.0, "learning_rate": 5.502091637313609e-06, "loss": 0.9983, "step": 16844 }, { "epoch": 0.6590891306048987, "grad_norm": 0.0, "learning_rate": 5.500959866336806e-06, "loss": 1.0264, "step": 16845 }, { "epoch": 0.6591282572971281, "grad_norm": 0.0, "learning_rate": 5.499828167608954e-06, "loss": 1.0754, "step": 16846 }, { "epoch": 0.6591673839893576, "grad_norm": 0.0, "learning_rate": 5.4986965411482185e-06, "loss": 0.9775, "step": 16847 }, { "epoch": 0.659206510681587, "grad_norm": 0.0, "learning_rate": 5.497564986972775e-06, "loss": 0.9908, "step": 16848 }, { "epoch": 0.6592456373738164, "grad_norm": 0.0, "learning_rate": 5.496433505100798e-06, "loss": 0.9592, "step": 16849 }, { "epoch": 0.6592847640660459, "grad_norm": 0.0, "learning_rate": 5.495302095550457e-06, "loss": 0.934, "step": 16850 }, { "epoch": 0.6593238907582752, "grad_norm": 0.0, "learning_rate": 5.494170758339918e-06, "loss": 0.9087, "step": 16851 }, { "epoch": 0.6593630174505047, "grad_norm": 0.0, "learning_rate": 5.493039493487349e-06, "loss": 0.9769, "step": 16852 }, { "epoch": 0.6594021441427341, "grad_norm": 0.0, "learning_rate": 5.4919083010109185e-06, "loss": 1.1624, "step": 16853 }, { "epoch": 0.6594412708349636, "grad_norm": 0.0, "learning_rate": 5.490777180928797e-06, "loss": 0.9649, "step": 16854 }, { "epoch": 0.659480397527193, "grad_norm": 0.0, "learning_rate": 5.489646133259138e-06, "loss": 0.9716, "step": 16855 }, { "epoch": 0.6595195242194225, "grad_norm": 0.0, "learning_rate": 5.488515158020115e-06, "loss": 1.0275, "step": 16856 }, { "epoch": 0.6595586509116519, "grad_norm": 0.0, "learning_rate": 5.4873842552298775e-06, "loss": 0.9297, "step": 16857 }, { "epoch": 0.6595977776038814, "grad_norm": 0.0, "learning_rate": 5.486253424906602e-06, "loss": 0.8918, "step": 16858 }, { "epoch": 0.6596369042961108, "grad_norm": 0.0, "learning_rate": 5.485122667068436e-06, "loss": 0.9314, "step": 16859 }, { "epoch": 0.6596760309883403, "grad_norm": 0.0, "learning_rate": 5.4839919817335474e-06, "loss": 1.0134, "step": 16860 }, { "epoch": 0.6597151576805697, "grad_norm": 0.0, "learning_rate": 5.482861368920084e-06, "loss": 1.0223, "step": 16861 }, { "epoch": 0.6597542843727991, "grad_norm": 0.0, "learning_rate": 5.4817308286462065e-06, "loss": 0.8779, "step": 16862 }, { "epoch": 0.6597934110650285, "grad_norm": 0.0, "learning_rate": 5.48060036093007e-06, "loss": 0.9417, "step": 16863 }, { "epoch": 0.659832537757258, "grad_norm": 0.0, "learning_rate": 5.4794699657898345e-06, "loss": 0.9641, "step": 16864 }, { "epoch": 0.6598716644494874, "grad_norm": 0.0, "learning_rate": 5.478339643243642e-06, "loss": 0.8398, "step": 16865 }, { "epoch": 0.6599107911417169, "grad_norm": 0.0, "learning_rate": 5.4772093933096485e-06, "loss": 0.9801, "step": 16866 }, { "epoch": 0.6599499178339463, "grad_norm": 0.0, "learning_rate": 5.476079216006007e-06, "loss": 1.0281, "step": 16867 }, { "epoch": 0.6599890445261758, "grad_norm": 0.0, "learning_rate": 5.474949111350862e-06, "loss": 0.9756, "step": 16868 }, { "epoch": 0.6600281712184052, "grad_norm": 0.0, "learning_rate": 5.473819079362366e-06, "loss": 1.026, "step": 16869 }, { "epoch": 0.6600672979106347, "grad_norm": 0.0, "learning_rate": 5.472689120058661e-06, "loss": 0.9906, "step": 16870 }, { "epoch": 0.6601064246028641, "grad_norm": 0.0, "learning_rate": 5.471559233457902e-06, "loss": 0.931, "step": 16871 }, { "epoch": 0.6601455512950936, "grad_norm": 0.0, "learning_rate": 5.470429419578221e-06, "loss": 0.9519, "step": 16872 }, { "epoch": 0.6601846779873229, "grad_norm": 0.0, "learning_rate": 5.469299678437774e-06, "loss": 1.0812, "step": 16873 }, { "epoch": 0.6602238046795524, "grad_norm": 0.0, "learning_rate": 5.468170010054691e-06, "loss": 0.9411, "step": 16874 }, { "epoch": 0.6602629313717818, "grad_norm": 0.0, "learning_rate": 5.46704041444712e-06, "loss": 1.0525, "step": 16875 }, { "epoch": 0.6603020580640113, "grad_norm": 0.0, "learning_rate": 5.465910891633199e-06, "loss": 1.1063, "step": 16876 }, { "epoch": 0.6603411847562407, "grad_norm": 0.0, "learning_rate": 5.464781441631073e-06, "loss": 0.9021, "step": 16877 }, { "epoch": 0.6603803114484701, "grad_norm": 0.0, "learning_rate": 5.463652064458869e-06, "loss": 0.8469, "step": 16878 }, { "epoch": 0.6604194381406996, "grad_norm": 0.0, "learning_rate": 5.4625227601347276e-06, "loss": 0.9561, "step": 16879 }, { "epoch": 0.660458564832929, "grad_norm": 0.0, "learning_rate": 5.461393528676786e-06, "loss": 0.956, "step": 16880 }, { "epoch": 0.6604976915251585, "grad_norm": 0.0, "learning_rate": 5.460264370103181e-06, "loss": 1.0688, "step": 16881 }, { "epoch": 0.6605368182173879, "grad_norm": 0.0, "learning_rate": 5.459135284432038e-06, "loss": 0.9637, "step": 16882 }, { "epoch": 0.6605759449096174, "grad_norm": 0.0, "learning_rate": 5.458006271681495e-06, "loss": 0.8733, "step": 16883 }, { "epoch": 0.6606150716018467, "grad_norm": 0.0, "learning_rate": 5.456877331869674e-06, "loss": 0.9594, "step": 16884 }, { "epoch": 0.6606541982940762, "grad_norm": 0.0, "learning_rate": 5.455748465014716e-06, "loss": 0.9806, "step": 16885 }, { "epoch": 0.6606933249863056, "grad_norm": 0.0, "learning_rate": 5.454619671134741e-06, "loss": 0.9095, "step": 16886 }, { "epoch": 0.6607324516785351, "grad_norm": 0.0, "learning_rate": 5.453490950247882e-06, "loss": 1.0411, "step": 16887 }, { "epoch": 0.6607715783707645, "grad_norm": 0.0, "learning_rate": 5.452362302372253e-06, "loss": 0.9865, "step": 16888 }, { "epoch": 0.660810705062994, "grad_norm": 0.0, "learning_rate": 5.451233727525995e-06, "loss": 0.9323, "step": 16889 }, { "epoch": 0.6608498317552234, "grad_norm": 0.0, "learning_rate": 5.450105225727219e-06, "loss": 0.9334, "step": 16890 }, { "epoch": 0.6608889584474529, "grad_norm": 0.0, "learning_rate": 5.448976796994057e-06, "loss": 1.0837, "step": 16891 }, { "epoch": 0.6609280851396823, "grad_norm": 0.0, "learning_rate": 5.447848441344618e-06, "loss": 0.9916, "step": 16892 }, { "epoch": 0.6609672118319118, "grad_norm": 0.0, "learning_rate": 5.446720158797032e-06, "loss": 1.0785, "step": 16893 }, { "epoch": 0.6610063385241411, "grad_norm": 0.0, "learning_rate": 5.4455919493694134e-06, "loss": 0.9973, "step": 16894 }, { "epoch": 0.6610454652163706, "grad_norm": 0.0, "learning_rate": 5.444463813079885e-06, "loss": 0.9869, "step": 16895 }, { "epoch": 0.6610845919086, "grad_norm": 0.0, "learning_rate": 5.443335749946556e-06, "loss": 0.9391, "step": 16896 }, { "epoch": 0.6611237186008295, "grad_norm": 0.0, "learning_rate": 5.442207759987545e-06, "loss": 1.0074, "step": 16897 }, { "epoch": 0.6611628452930589, "grad_norm": 0.0, "learning_rate": 5.441079843220965e-06, "loss": 0.9902, "step": 16898 }, { "epoch": 0.6612019719852884, "grad_norm": 0.0, "learning_rate": 5.439951999664934e-06, "loss": 0.9266, "step": 16899 }, { "epoch": 0.6612410986775178, "grad_norm": 0.0, "learning_rate": 5.438824229337558e-06, "loss": 1.0476, "step": 16900 }, { "epoch": 0.6612802253697473, "grad_norm": 0.0, "learning_rate": 5.437696532256951e-06, "loss": 0.8206, "step": 16901 }, { "epoch": 0.6613193520619767, "grad_norm": 0.0, "learning_rate": 5.4365689084412144e-06, "loss": 0.7554, "step": 16902 }, { "epoch": 0.6613584787542062, "grad_norm": 0.0, "learning_rate": 5.4354413579084704e-06, "loss": 1.0834, "step": 16903 }, { "epoch": 0.6613976054464356, "grad_norm": 0.0, "learning_rate": 5.434313880676813e-06, "loss": 0.9996, "step": 16904 }, { "epoch": 0.661436732138665, "grad_norm": 0.0, "learning_rate": 5.43318647676436e-06, "loss": 1.0643, "step": 16905 }, { "epoch": 0.6614758588308944, "grad_norm": 0.0, "learning_rate": 5.432059146189205e-06, "loss": 0.9694, "step": 16906 }, { "epoch": 0.6615149855231238, "grad_norm": 0.0, "learning_rate": 5.430931888969455e-06, "loss": 1.0842, "step": 16907 }, { "epoch": 0.6615541122153533, "grad_norm": 0.0, "learning_rate": 5.429804705123215e-06, "loss": 0.9955, "step": 16908 }, { "epoch": 0.6615932389075827, "grad_norm": 0.0, "learning_rate": 5.428677594668588e-06, "loss": 0.9458, "step": 16909 }, { "epoch": 0.6616323655998122, "grad_norm": 0.0, "learning_rate": 5.427550557623667e-06, "loss": 0.8956, "step": 16910 }, { "epoch": 0.6616714922920416, "grad_norm": 0.0, "learning_rate": 5.4264235940065536e-06, "loss": 1.0416, "step": 16911 }, { "epoch": 0.6617106189842711, "grad_norm": 0.0, "learning_rate": 5.42529670383535e-06, "loss": 0.9307, "step": 16912 }, { "epoch": 0.6617497456765005, "grad_norm": 0.0, "learning_rate": 5.424169887128145e-06, "loss": 0.9997, "step": 16913 }, { "epoch": 0.66178887236873, "grad_norm": 0.0, "learning_rate": 5.4230431439030395e-06, "loss": 1.0213, "step": 16914 }, { "epoch": 0.6618279990609593, "grad_norm": 0.0, "learning_rate": 5.421916474178124e-06, "loss": 0.9577, "step": 16915 }, { "epoch": 0.6618671257531888, "grad_norm": 0.0, "learning_rate": 5.420789877971497e-06, "loss": 0.9749, "step": 16916 }, { "epoch": 0.6619062524454182, "grad_norm": 0.0, "learning_rate": 5.419663355301245e-06, "loss": 0.9327, "step": 16917 }, { "epoch": 0.6619453791376477, "grad_norm": 0.0, "learning_rate": 5.418536906185462e-06, "loss": 1.0056, "step": 16918 }, { "epoch": 0.6619845058298771, "grad_norm": 0.0, "learning_rate": 5.417410530642229e-06, "loss": 0.9967, "step": 16919 }, { "epoch": 0.6620236325221066, "grad_norm": 0.0, "learning_rate": 5.416284228689647e-06, "loss": 0.9566, "step": 16920 }, { "epoch": 0.662062759214336, "grad_norm": 0.0, "learning_rate": 5.415158000345792e-06, "loss": 0.9918, "step": 16921 }, { "epoch": 0.6621018859065655, "grad_norm": 0.0, "learning_rate": 5.41403184562876e-06, "loss": 1.0207, "step": 16922 }, { "epoch": 0.6621410125987949, "grad_norm": 0.0, "learning_rate": 5.412905764556626e-06, "loss": 1.0393, "step": 16923 }, { "epoch": 0.6621801392910244, "grad_norm": 0.0, "learning_rate": 5.411779757147476e-06, "loss": 0.9254, "step": 16924 }, { "epoch": 0.6622192659832538, "grad_norm": 0.0, "learning_rate": 5.4106538234193936e-06, "loss": 0.9221, "step": 16925 }, { "epoch": 0.6622583926754833, "grad_norm": 0.0, "learning_rate": 5.4095279633904665e-06, "loss": 0.9333, "step": 16926 }, { "epoch": 0.6622975193677126, "grad_norm": 0.0, "learning_rate": 5.408402177078762e-06, "loss": 0.9054, "step": 16927 }, { "epoch": 0.6623366460599421, "grad_norm": 0.0, "learning_rate": 5.407276464502366e-06, "loss": 0.9657, "step": 16928 }, { "epoch": 0.6623757727521715, "grad_norm": 0.0, "learning_rate": 5.406150825679355e-06, "loss": 0.9657, "step": 16929 }, { "epoch": 0.662414899444401, "grad_norm": 0.0, "learning_rate": 5.405025260627809e-06, "loss": 1.1079, "step": 16930 }, { "epoch": 0.6624540261366304, "grad_norm": 0.0, "learning_rate": 5.403899769365795e-06, "loss": 0.9389, "step": 16931 }, { "epoch": 0.6624931528288599, "grad_norm": 0.0, "learning_rate": 5.402774351911395e-06, "loss": 1.1172, "step": 16932 }, { "epoch": 0.6625322795210893, "grad_norm": 0.0, "learning_rate": 5.401649008282673e-06, "loss": 1.0992, "step": 16933 }, { "epoch": 0.6625714062133188, "grad_norm": 0.0, "learning_rate": 5.400523738497711e-06, "loss": 0.9998, "step": 16934 }, { "epoch": 0.6626105329055482, "grad_norm": 0.0, "learning_rate": 5.399398542574571e-06, "loss": 1.068, "step": 16935 }, { "epoch": 0.6626496595977776, "grad_norm": 0.0, "learning_rate": 5.39827342053133e-06, "loss": 0.9137, "step": 16936 }, { "epoch": 0.662688786290007, "grad_norm": 0.0, "learning_rate": 5.397148372386043e-06, "loss": 1.0495, "step": 16937 }, { "epoch": 0.6627279129822364, "grad_norm": 0.0, "learning_rate": 5.396023398156794e-06, "loss": 0.9342, "step": 16938 }, { "epoch": 0.6627670396744659, "grad_norm": 0.0, "learning_rate": 5.3948984978616355e-06, "loss": 1.0022, "step": 16939 }, { "epoch": 0.6628061663666953, "grad_norm": 0.0, "learning_rate": 5.3937736715186415e-06, "loss": 1.032, "step": 16940 }, { "epoch": 0.6628452930589248, "grad_norm": 0.0, "learning_rate": 5.392648919145867e-06, "loss": 0.8624, "step": 16941 }, { "epoch": 0.6628844197511542, "grad_norm": 0.0, "learning_rate": 5.391524240761376e-06, "loss": 0.8524, "step": 16942 }, { "epoch": 0.6629235464433837, "grad_norm": 0.0, "learning_rate": 5.390399636383231e-06, "loss": 1.0098, "step": 16943 }, { "epoch": 0.6629626731356131, "grad_norm": 0.0, "learning_rate": 5.3892751060294966e-06, "loss": 1.0099, "step": 16944 }, { "epoch": 0.6630017998278426, "grad_norm": 0.0, "learning_rate": 5.3881506497182225e-06, "loss": 1.0596, "step": 16945 }, { "epoch": 0.663040926520072, "grad_norm": 0.0, "learning_rate": 5.387026267467469e-06, "loss": 0.918, "step": 16946 }, { "epoch": 0.6630800532123015, "grad_norm": 0.0, "learning_rate": 5.385901959295293e-06, "loss": 1.0607, "step": 16947 }, { "epoch": 0.6631191799045308, "grad_norm": 0.0, "learning_rate": 5.384777725219754e-06, "loss": 1.0435, "step": 16948 }, { "epoch": 0.6631583065967603, "grad_norm": 0.0, "learning_rate": 5.3836535652589e-06, "loss": 1.0495, "step": 16949 }, { "epoch": 0.6631974332889897, "grad_norm": 0.0, "learning_rate": 5.382529479430786e-06, "loss": 1.0121, "step": 16950 }, { "epoch": 0.6632365599812192, "grad_norm": 0.0, "learning_rate": 5.38140546775346e-06, "loss": 0.9835, "step": 16951 }, { "epoch": 0.6632756866734486, "grad_norm": 0.0, "learning_rate": 5.380281530244974e-06, "loss": 0.9971, "step": 16952 }, { "epoch": 0.6633148133656781, "grad_norm": 0.0, "learning_rate": 5.379157666923378e-06, "loss": 1.031, "step": 16953 }, { "epoch": 0.6633539400579075, "grad_norm": 0.0, "learning_rate": 5.378033877806724e-06, "loss": 1.0156, "step": 16954 }, { "epoch": 0.663393066750137, "grad_norm": 0.0, "learning_rate": 5.37691016291305e-06, "loss": 1.0329, "step": 16955 }, { "epoch": 0.6634321934423664, "grad_norm": 0.0, "learning_rate": 5.375786522260407e-06, "loss": 0.9719, "step": 16956 }, { "epoch": 0.6634713201345959, "grad_norm": 0.0, "learning_rate": 5.374662955866841e-06, "loss": 0.9469, "step": 16957 }, { "epoch": 0.6635104468268253, "grad_norm": 0.0, "learning_rate": 5.373539463750388e-06, "loss": 0.9842, "step": 16958 }, { "epoch": 0.6635495735190547, "grad_norm": 0.0, "learning_rate": 5.372416045929093e-06, "loss": 1.1381, "step": 16959 }, { "epoch": 0.6635887002112841, "grad_norm": 0.0, "learning_rate": 5.371292702420998e-06, "loss": 1.0221, "step": 16960 }, { "epoch": 0.6636278269035136, "grad_norm": 0.0, "learning_rate": 5.370169433244148e-06, "loss": 0.9287, "step": 16961 }, { "epoch": 0.663666953595743, "grad_norm": 0.0, "learning_rate": 5.369046238416569e-06, "loss": 1.0178, "step": 16962 }, { "epoch": 0.6637060802879724, "grad_norm": 0.0, "learning_rate": 5.367923117956309e-06, "loss": 0.994, "step": 16963 }, { "epoch": 0.6637452069802019, "grad_norm": 0.0, "learning_rate": 5.366800071881393e-06, "loss": 1.0533, "step": 16964 }, { "epoch": 0.6637843336724313, "grad_norm": 0.0, "learning_rate": 5.365677100209868e-06, "loss": 1.0187, "step": 16965 }, { "epoch": 0.6638234603646608, "grad_norm": 0.0, "learning_rate": 5.3645542029597594e-06, "loss": 1.0618, "step": 16966 }, { "epoch": 0.6638625870568902, "grad_norm": 0.0, "learning_rate": 5.363431380149104e-06, "loss": 0.9091, "step": 16967 }, { "epoch": 0.6639017137491197, "grad_norm": 0.0, "learning_rate": 5.362308631795925e-06, "loss": 0.9224, "step": 16968 }, { "epoch": 0.663940840441349, "grad_norm": 0.0, "learning_rate": 5.361185957918264e-06, "loss": 0.9641, "step": 16969 }, { "epoch": 0.6639799671335785, "grad_norm": 0.0, "learning_rate": 5.360063358534141e-06, "loss": 1.0712, "step": 16970 }, { "epoch": 0.6640190938258079, "grad_norm": 0.0, "learning_rate": 5.35894083366159e-06, "loss": 1.0526, "step": 16971 }, { "epoch": 0.6640582205180374, "grad_norm": 0.0, "learning_rate": 5.357818383318629e-06, "loss": 1.0618, "step": 16972 }, { "epoch": 0.6640973472102668, "grad_norm": 0.0, "learning_rate": 5.356696007523288e-06, "loss": 1.0168, "step": 16973 }, { "epoch": 0.6641364739024963, "grad_norm": 0.0, "learning_rate": 5.355573706293591e-06, "loss": 0.9692, "step": 16974 }, { "epoch": 0.6641756005947257, "grad_norm": 0.0, "learning_rate": 5.354451479647564e-06, "loss": 1.0828, "step": 16975 }, { "epoch": 0.6642147272869552, "grad_norm": 0.0, "learning_rate": 5.353329327603222e-06, "loss": 1.1445, "step": 16976 }, { "epoch": 0.6642538539791846, "grad_norm": 0.0, "learning_rate": 5.3522072501785884e-06, "loss": 0.8965, "step": 16977 }, { "epoch": 0.6642929806714141, "grad_norm": 0.0, "learning_rate": 5.351085247391681e-06, "loss": 0.9734, "step": 16978 }, { "epoch": 0.6643321073636435, "grad_norm": 0.0, "learning_rate": 5.3499633192605245e-06, "loss": 1.0382, "step": 16979 }, { "epoch": 0.664371234055873, "grad_norm": 0.0, "learning_rate": 5.348841465803125e-06, "loss": 0.9322, "step": 16980 }, { "epoch": 0.6644103607481023, "grad_norm": 0.0, "learning_rate": 5.34771968703751e-06, "loss": 1.0129, "step": 16981 }, { "epoch": 0.6644494874403318, "grad_norm": 0.0, "learning_rate": 5.346597982981676e-06, "loss": 1.0432, "step": 16982 }, { "epoch": 0.6644886141325612, "grad_norm": 0.0, "learning_rate": 5.345476353653656e-06, "loss": 1.0993, "step": 16983 }, { "epoch": 0.6645277408247907, "grad_norm": 0.0, "learning_rate": 5.344354799071451e-06, "loss": 0.9779, "step": 16984 }, { "epoch": 0.6645668675170201, "grad_norm": 0.0, "learning_rate": 5.343233319253076e-06, "loss": 1.0582, "step": 16985 }, { "epoch": 0.6646059942092496, "grad_norm": 0.0, "learning_rate": 5.342111914216532e-06, "loss": 1.0335, "step": 16986 }, { "epoch": 0.664645120901479, "grad_norm": 0.0, "learning_rate": 5.340990583979841e-06, "loss": 0.9436, "step": 16987 }, { "epoch": 0.6646842475937085, "grad_norm": 0.0, "learning_rate": 5.339869328561e-06, "loss": 1.0912, "step": 16988 }, { "epoch": 0.6647233742859379, "grad_norm": 0.0, "learning_rate": 5.338748147978022e-06, "loss": 1.1313, "step": 16989 }, { "epoch": 0.6647625009781674, "grad_norm": 0.0, "learning_rate": 5.337627042248904e-06, "loss": 1.1251, "step": 16990 }, { "epoch": 0.6648016276703967, "grad_norm": 0.0, "learning_rate": 5.336506011391653e-06, "loss": 1.023, "step": 16991 }, { "epoch": 0.6648407543626261, "grad_norm": 0.0, "learning_rate": 5.3353850554242715e-06, "loss": 1.0032, "step": 16992 }, { "epoch": 0.6648798810548556, "grad_norm": 0.0, "learning_rate": 5.334264174364766e-06, "loss": 1.0211, "step": 16993 }, { "epoch": 0.664919007747085, "grad_norm": 0.0, "learning_rate": 5.333143368231126e-06, "loss": 0.9822, "step": 16994 }, { "epoch": 0.6649581344393145, "grad_norm": 0.0, "learning_rate": 5.332022637041356e-06, "loss": 0.9796, "step": 16995 }, { "epoch": 0.6649972611315439, "grad_norm": 0.0, "learning_rate": 5.330901980813459e-06, "loss": 1.1103, "step": 16996 }, { "epoch": 0.6650363878237734, "grad_norm": 0.0, "learning_rate": 5.329781399565419e-06, "loss": 1.0421, "step": 16997 }, { "epoch": 0.6650755145160028, "grad_norm": 0.0, "learning_rate": 5.328660893315238e-06, "loss": 0.9851, "step": 16998 }, { "epoch": 0.6651146412082323, "grad_norm": 0.0, "learning_rate": 5.327540462080916e-06, "loss": 1.0786, "step": 16999 }, { "epoch": 0.6651537679004617, "grad_norm": 0.0, "learning_rate": 5.326420105880432e-06, "loss": 0.9821, "step": 17000 }, { "epoch": 0.6651928945926912, "grad_norm": 0.0, "learning_rate": 5.325299824731786e-06, "loss": 0.9854, "step": 17001 }, { "epoch": 0.6652320212849205, "grad_norm": 0.0, "learning_rate": 5.324179618652971e-06, "loss": 1.0477, "step": 17002 }, { "epoch": 0.66527114797715, "grad_norm": 0.0, "learning_rate": 5.323059487661969e-06, "loss": 1.0381, "step": 17003 }, { "epoch": 0.6653102746693794, "grad_norm": 0.0, "learning_rate": 5.32193943177677e-06, "loss": 0.9709, "step": 17004 }, { "epoch": 0.6653494013616089, "grad_norm": 0.0, "learning_rate": 5.320819451015363e-06, "loss": 0.9228, "step": 17005 }, { "epoch": 0.6653885280538383, "grad_norm": 0.0, "learning_rate": 5.319699545395736e-06, "loss": 1.2228, "step": 17006 }, { "epoch": 0.6654276547460678, "grad_norm": 0.0, "learning_rate": 5.318579714935866e-06, "loss": 0.9157, "step": 17007 }, { "epoch": 0.6654667814382972, "grad_norm": 0.0, "learning_rate": 5.317459959653741e-06, "loss": 0.9619, "step": 17008 }, { "epoch": 0.6655059081305267, "grad_norm": 0.0, "learning_rate": 5.316340279567335e-06, "loss": 0.9512, "step": 17009 }, { "epoch": 0.6655450348227561, "grad_norm": 0.0, "learning_rate": 5.315220674694643e-06, "loss": 1.0379, "step": 17010 }, { "epoch": 0.6655841615149856, "grad_norm": 0.0, "learning_rate": 5.314101145053634e-06, "loss": 0.968, "step": 17011 }, { "epoch": 0.665623288207215, "grad_norm": 0.0, "learning_rate": 5.312981690662293e-06, "loss": 1.0365, "step": 17012 }, { "epoch": 0.6656624148994444, "grad_norm": 0.0, "learning_rate": 5.311862311538583e-06, "loss": 1.0745, "step": 17013 }, { "epoch": 0.6657015415916738, "grad_norm": 0.0, "learning_rate": 5.3107430077004984e-06, "loss": 1.0863, "step": 17014 }, { "epoch": 0.6657406682839033, "grad_norm": 0.0, "learning_rate": 5.3096237791660014e-06, "loss": 1.0423, "step": 17015 }, { "epoch": 0.6657797949761327, "grad_norm": 0.0, "learning_rate": 5.308504625953072e-06, "loss": 1.1509, "step": 17016 }, { "epoch": 0.6658189216683622, "grad_norm": 0.0, "learning_rate": 5.3073855480796735e-06, "loss": 1.0609, "step": 17017 }, { "epoch": 0.6658580483605916, "grad_norm": 0.0, "learning_rate": 5.30626654556379e-06, "loss": 0.9778, "step": 17018 }, { "epoch": 0.6658971750528211, "grad_norm": 0.0, "learning_rate": 5.30514761842338e-06, "loss": 0.9803, "step": 17019 }, { "epoch": 0.6659363017450505, "grad_norm": 0.0, "learning_rate": 5.30402876667642e-06, "loss": 0.9534, "step": 17020 }, { "epoch": 0.6659754284372799, "grad_norm": 0.0, "learning_rate": 5.3029099903408695e-06, "loss": 1.0474, "step": 17021 }, { "epoch": 0.6660145551295094, "grad_norm": 0.0, "learning_rate": 5.301791289434699e-06, "loss": 1.0052, "step": 17022 }, { "epoch": 0.6660536818217387, "grad_norm": 0.0, "learning_rate": 5.300672663975875e-06, "loss": 1.0564, "step": 17023 }, { "epoch": 0.6660928085139682, "grad_norm": 0.0, "learning_rate": 5.299554113982362e-06, "loss": 0.8806, "step": 17024 }, { "epoch": 0.6661319352061976, "grad_norm": 0.0, "learning_rate": 5.298435639472115e-06, "loss": 0.944, "step": 17025 }, { "epoch": 0.6661710618984271, "grad_norm": 0.0, "learning_rate": 5.297317240463102e-06, "loss": 1.1126, "step": 17026 }, { "epoch": 0.6662101885906565, "grad_norm": 0.0, "learning_rate": 5.29619891697328e-06, "loss": 0.8902, "step": 17027 }, { "epoch": 0.666249315282886, "grad_norm": 0.0, "learning_rate": 5.295080669020614e-06, "loss": 0.9801, "step": 17028 }, { "epoch": 0.6662884419751154, "grad_norm": 0.0, "learning_rate": 5.293962496623051e-06, "loss": 1.0602, "step": 17029 }, { "epoch": 0.6663275686673449, "grad_norm": 0.0, "learning_rate": 5.292844399798559e-06, "loss": 0.9694, "step": 17030 }, { "epoch": 0.6663666953595743, "grad_norm": 0.0, "learning_rate": 5.29172637856508e-06, "loss": 0.9359, "step": 17031 }, { "epoch": 0.6664058220518038, "grad_norm": 0.0, "learning_rate": 5.2906084329405825e-06, "loss": 0.9954, "step": 17032 }, { "epoch": 0.6664449487440331, "grad_norm": 0.0, "learning_rate": 5.28949056294301e-06, "loss": 0.9866, "step": 17033 }, { "epoch": 0.6664840754362626, "grad_norm": 0.0, "learning_rate": 5.288372768590317e-06, "loss": 1.0031, "step": 17034 }, { "epoch": 0.666523202128492, "grad_norm": 0.0, "learning_rate": 5.287255049900453e-06, "loss": 1.0628, "step": 17035 }, { "epoch": 0.6665623288207215, "grad_norm": 0.0, "learning_rate": 5.2861374068913654e-06, "loss": 0.9069, "step": 17036 }, { "epoch": 0.6666014555129509, "grad_norm": 0.0, "learning_rate": 5.285019839581005e-06, "loss": 1.0399, "step": 17037 }, { "epoch": 0.6666405822051804, "grad_norm": 0.0, "learning_rate": 5.283902347987322e-06, "loss": 0.9985, "step": 17038 }, { "epoch": 0.6666797088974098, "grad_norm": 0.0, "learning_rate": 5.282784932128253e-06, "loss": 1.0369, "step": 17039 }, { "epoch": 0.6667188355896393, "grad_norm": 0.0, "learning_rate": 5.2816675920217475e-06, "loss": 1.0034, "step": 17040 }, { "epoch": 0.6667579622818687, "grad_norm": 0.0, "learning_rate": 5.280550327685752e-06, "loss": 1.106, "step": 17041 }, { "epoch": 0.6667970889740982, "grad_norm": 0.0, "learning_rate": 5.2794331391382e-06, "loss": 0.9933, "step": 17042 }, { "epoch": 0.6668362156663276, "grad_norm": 0.0, "learning_rate": 5.278316026397037e-06, "loss": 0.9716, "step": 17043 }, { "epoch": 0.666875342358557, "grad_norm": 0.0, "learning_rate": 5.277198989480202e-06, "loss": 0.9704, "step": 17044 }, { "epoch": 0.6669144690507864, "grad_norm": 0.0, "learning_rate": 5.276082028405638e-06, "loss": 1.0125, "step": 17045 }, { "epoch": 0.6669535957430159, "grad_norm": 0.0, "learning_rate": 5.274965143191272e-06, "loss": 0.978, "step": 17046 }, { "epoch": 0.6669927224352453, "grad_norm": 0.0, "learning_rate": 5.273848333855045e-06, "loss": 1.0488, "step": 17047 }, { "epoch": 0.6670318491274748, "grad_norm": 0.0, "learning_rate": 5.272731600414895e-06, "loss": 1.1107, "step": 17048 }, { "epoch": 0.6670709758197042, "grad_norm": 0.0, "learning_rate": 5.27161494288875e-06, "loss": 0.9793, "step": 17049 }, { "epoch": 0.6671101025119336, "grad_norm": 0.0, "learning_rate": 5.270498361294542e-06, "loss": 0.9348, "step": 17050 }, { "epoch": 0.6671492292041631, "grad_norm": 0.0, "learning_rate": 5.269381855650209e-06, "loss": 0.9592, "step": 17051 }, { "epoch": 0.6671883558963925, "grad_norm": 0.0, "learning_rate": 5.268265425973672e-06, "loss": 1.0699, "step": 17052 }, { "epoch": 0.667227482588622, "grad_norm": 0.0, "learning_rate": 5.2671490722828625e-06, "loss": 0.9158, "step": 17053 }, { "epoch": 0.6672666092808514, "grad_norm": 0.0, "learning_rate": 5.266032794595708e-06, "loss": 1.0557, "step": 17054 }, { "epoch": 0.6673057359730808, "grad_norm": 0.0, "learning_rate": 5.264916592930139e-06, "loss": 0.8138, "step": 17055 }, { "epoch": 0.6673448626653102, "grad_norm": 0.0, "learning_rate": 5.263800467304072e-06, "loss": 1.0598, "step": 17056 }, { "epoch": 0.6673839893575397, "grad_norm": 0.0, "learning_rate": 5.26268441773544e-06, "loss": 1.1088, "step": 17057 }, { "epoch": 0.6674231160497691, "grad_norm": 0.0, "learning_rate": 5.261568444242151e-06, "loss": 1.0367, "step": 17058 }, { "epoch": 0.6674622427419986, "grad_norm": 0.0, "learning_rate": 5.260452546842143e-06, "loss": 0.9557, "step": 17059 }, { "epoch": 0.667501369434228, "grad_norm": 0.0, "learning_rate": 5.259336725553323e-06, "loss": 0.9458, "step": 17060 }, { "epoch": 0.6675404961264575, "grad_norm": 0.0, "learning_rate": 5.2582209803936215e-06, "loss": 0.9951, "step": 17061 }, { "epoch": 0.6675796228186869, "grad_norm": 0.0, "learning_rate": 5.257105311380939e-06, "loss": 1.0856, "step": 17062 }, { "epoch": 0.6676187495109164, "grad_norm": 0.0, "learning_rate": 5.255989718533212e-06, "loss": 0.929, "step": 17063 }, { "epoch": 0.6676578762031458, "grad_norm": 0.0, "learning_rate": 5.254874201868341e-06, "loss": 0.9899, "step": 17064 }, { "epoch": 0.6676970028953753, "grad_norm": 0.0, "learning_rate": 5.253758761404246e-06, "loss": 1.0019, "step": 17065 }, { "epoch": 0.6677361295876046, "grad_norm": 0.0, "learning_rate": 5.2526433971588366e-06, "loss": 0.9188, "step": 17066 }, { "epoch": 0.6677752562798341, "grad_norm": 0.0, "learning_rate": 5.251528109150024e-06, "loss": 0.9628, "step": 17067 }, { "epoch": 0.6678143829720635, "grad_norm": 0.0, "learning_rate": 5.2504128973957205e-06, "loss": 1.0101, "step": 17068 }, { "epoch": 0.667853509664293, "grad_norm": 0.0, "learning_rate": 5.249297761913839e-06, "loss": 1.0546, "step": 17069 }, { "epoch": 0.6678926363565224, "grad_norm": 0.0, "learning_rate": 5.248182702722278e-06, "loss": 0.9956, "step": 17070 }, { "epoch": 0.6679317630487519, "grad_norm": 0.0, "learning_rate": 5.247067719838948e-06, "loss": 0.9948, "step": 17071 }, { "epoch": 0.6679708897409813, "grad_norm": 0.0, "learning_rate": 5.245952813281754e-06, "loss": 1.0493, "step": 17072 }, { "epoch": 0.6680100164332108, "grad_norm": 0.0, "learning_rate": 5.244837983068605e-06, "loss": 1.1084, "step": 17073 }, { "epoch": 0.6680491431254402, "grad_norm": 0.0, "learning_rate": 5.243723229217397e-06, "loss": 0.9956, "step": 17074 }, { "epoch": 0.6680882698176697, "grad_norm": 0.0, "learning_rate": 5.2426085517460325e-06, "loss": 0.9088, "step": 17075 }, { "epoch": 0.668127396509899, "grad_norm": 0.0, "learning_rate": 5.241493950672414e-06, "loss": 0.9762, "step": 17076 }, { "epoch": 0.6681665232021284, "grad_norm": 0.0, "learning_rate": 5.240379426014444e-06, "loss": 0.9938, "step": 17077 }, { "epoch": 0.6682056498943579, "grad_norm": 0.0, "learning_rate": 5.239264977790011e-06, "loss": 1.0294, "step": 17078 }, { "epoch": 0.6682447765865873, "grad_norm": 0.0, "learning_rate": 5.238150606017021e-06, "loss": 0.9572, "step": 17079 }, { "epoch": 0.6682839032788168, "grad_norm": 0.0, "learning_rate": 5.237036310713359e-06, "loss": 1.0173, "step": 17080 }, { "epoch": 0.6683230299710462, "grad_norm": 0.0, "learning_rate": 5.235922091896932e-06, "loss": 0.9526, "step": 17081 }, { "epoch": 0.6683621566632757, "grad_norm": 0.0, "learning_rate": 5.234807949585623e-06, "loss": 0.989, "step": 17082 }, { "epoch": 0.6684012833555051, "grad_norm": 0.0, "learning_rate": 5.233693883797332e-06, "loss": 0.801, "step": 17083 }, { "epoch": 0.6684404100477346, "grad_norm": 0.0, "learning_rate": 5.232579894549939e-06, "loss": 0.9828, "step": 17084 }, { "epoch": 0.668479536739964, "grad_norm": 0.0, "learning_rate": 5.231465981861341e-06, "loss": 1.0392, "step": 17085 }, { "epoch": 0.6685186634321935, "grad_norm": 0.0, "learning_rate": 5.230352145749422e-06, "loss": 0.9039, "step": 17086 }, { "epoch": 0.6685577901244228, "grad_norm": 0.0, "learning_rate": 5.229238386232076e-06, "loss": 0.9203, "step": 17087 }, { "epoch": 0.6685969168166523, "grad_norm": 0.0, "learning_rate": 5.2281247033271795e-06, "loss": 0.9955, "step": 17088 }, { "epoch": 0.6686360435088817, "grad_norm": 0.0, "learning_rate": 5.227011097052621e-06, "loss": 1.1134, "step": 17089 }, { "epoch": 0.6686751702011112, "grad_norm": 0.0, "learning_rate": 5.225897567426287e-06, "loss": 1.0223, "step": 17090 }, { "epoch": 0.6687142968933406, "grad_norm": 0.0, "learning_rate": 5.224784114466051e-06, "loss": 0.9455, "step": 17091 }, { "epoch": 0.6687534235855701, "grad_norm": 0.0, "learning_rate": 5.2236707381898e-06, "loss": 1.0191, "step": 17092 }, { "epoch": 0.6687925502777995, "grad_norm": 0.0, "learning_rate": 5.222557438615411e-06, "loss": 1.118, "step": 17093 }, { "epoch": 0.668831676970029, "grad_norm": 0.0, "learning_rate": 5.221444215760767e-06, "loss": 0.9279, "step": 17094 }, { "epoch": 0.6688708036622584, "grad_norm": 0.0, "learning_rate": 5.220331069643737e-06, "loss": 0.9428, "step": 17095 }, { "epoch": 0.6689099303544879, "grad_norm": 0.0, "learning_rate": 5.219218000282204e-06, "loss": 0.9537, "step": 17096 }, { "epoch": 0.6689490570467173, "grad_norm": 0.0, "learning_rate": 5.218105007694037e-06, "loss": 1.0008, "step": 17097 }, { "epoch": 0.6689881837389468, "grad_norm": 0.0, "learning_rate": 5.216992091897111e-06, "loss": 0.9477, "step": 17098 }, { "epoch": 0.6690273104311761, "grad_norm": 0.0, "learning_rate": 5.215879252909298e-06, "loss": 1.0322, "step": 17099 }, { "epoch": 0.6690664371234056, "grad_norm": 0.0, "learning_rate": 5.214766490748473e-06, "loss": 1.0855, "step": 17100 }, { "epoch": 0.669105563815635, "grad_norm": 0.0, "learning_rate": 5.213653805432499e-06, "loss": 1.1318, "step": 17101 }, { "epoch": 0.6691446905078645, "grad_norm": 0.0, "learning_rate": 5.212541196979248e-06, "loss": 1.0648, "step": 17102 }, { "epoch": 0.6691838172000939, "grad_norm": 0.0, "learning_rate": 5.211428665406584e-06, "loss": 1.0786, "step": 17103 }, { "epoch": 0.6692229438923234, "grad_norm": 0.0, "learning_rate": 5.21031621073238e-06, "loss": 0.9303, "step": 17104 }, { "epoch": 0.6692620705845528, "grad_norm": 0.0, "learning_rate": 5.209203832974492e-06, "loss": 0.8839, "step": 17105 }, { "epoch": 0.6693011972767822, "grad_norm": 0.0, "learning_rate": 5.208091532150792e-06, "loss": 0.978, "step": 17106 }, { "epoch": 0.6693403239690117, "grad_norm": 0.0, "learning_rate": 5.20697930827913e-06, "loss": 0.9801, "step": 17107 }, { "epoch": 0.669379450661241, "grad_norm": 0.0, "learning_rate": 5.2058671613773805e-06, "loss": 1.0612, "step": 17108 }, { "epoch": 0.6694185773534705, "grad_norm": 0.0, "learning_rate": 5.2047550914633935e-06, "loss": 0.889, "step": 17109 }, { "epoch": 0.6694577040456999, "grad_norm": 0.0, "learning_rate": 5.203643098555036e-06, "loss": 1.06, "step": 17110 }, { "epoch": 0.6694968307379294, "grad_norm": 0.0, "learning_rate": 5.202531182670151e-06, "loss": 0.9833, "step": 17111 }, { "epoch": 0.6695359574301588, "grad_norm": 0.0, "learning_rate": 5.201419343826611e-06, "loss": 0.9195, "step": 17112 }, { "epoch": 0.6695750841223883, "grad_norm": 0.0, "learning_rate": 5.200307582042261e-06, "loss": 1.0934, "step": 17113 }, { "epoch": 0.6696142108146177, "grad_norm": 0.0, "learning_rate": 5.19919589733496e-06, "loss": 0.9652, "step": 17114 }, { "epoch": 0.6696533375068472, "grad_norm": 0.0, "learning_rate": 5.198084289722553e-06, "loss": 0.9375, "step": 17115 }, { "epoch": 0.6696924641990766, "grad_norm": 0.0, "learning_rate": 5.196972759222895e-06, "loss": 0.9547, "step": 17116 }, { "epoch": 0.6697315908913061, "grad_norm": 0.0, "learning_rate": 5.195861305853837e-06, "loss": 0.9738, "step": 17117 }, { "epoch": 0.6697707175835355, "grad_norm": 0.0, "learning_rate": 5.19474992963323e-06, "loss": 0.9445, "step": 17118 }, { "epoch": 0.669809844275765, "grad_norm": 0.0, "learning_rate": 5.193638630578914e-06, "loss": 0.9451, "step": 17119 }, { "epoch": 0.6698489709679943, "grad_norm": 0.0, "learning_rate": 5.192527408708739e-06, "loss": 0.9258, "step": 17120 }, { "epoch": 0.6698880976602238, "grad_norm": 0.0, "learning_rate": 5.19141626404055e-06, "loss": 0.9677, "step": 17121 }, { "epoch": 0.6699272243524532, "grad_norm": 0.0, "learning_rate": 5.190305196592193e-06, "loss": 1.087, "step": 17122 }, { "epoch": 0.6699663510446827, "grad_norm": 0.0, "learning_rate": 5.189194206381505e-06, "loss": 1.0578, "step": 17123 }, { "epoch": 0.6700054777369121, "grad_norm": 0.0, "learning_rate": 5.188083293426332e-06, "loss": 0.9321, "step": 17124 }, { "epoch": 0.6700446044291416, "grad_norm": 0.0, "learning_rate": 5.1869724577445055e-06, "loss": 0.9436, "step": 17125 }, { "epoch": 0.670083731121371, "grad_norm": 0.0, "learning_rate": 5.185861699353877e-06, "loss": 1.0803, "step": 17126 }, { "epoch": 0.6701228578136005, "grad_norm": 0.0, "learning_rate": 5.1847510182722735e-06, "loss": 0.9577, "step": 17127 }, { "epoch": 0.6701619845058299, "grad_norm": 0.0, "learning_rate": 5.183640414517539e-06, "loss": 1.0214, "step": 17128 }, { "epoch": 0.6702011111980594, "grad_norm": 0.0, "learning_rate": 5.1825298881075e-06, "loss": 1.0494, "step": 17129 }, { "epoch": 0.6702402378902887, "grad_norm": 0.0, "learning_rate": 5.1814194390599945e-06, "loss": 0.9739, "step": 17130 }, { "epoch": 0.6702793645825182, "grad_norm": 0.0, "learning_rate": 5.180309067392855e-06, "loss": 0.9594, "step": 17131 }, { "epoch": 0.6703184912747476, "grad_norm": 0.0, "learning_rate": 5.179198773123917e-06, "loss": 1.0222, "step": 17132 }, { "epoch": 0.6703576179669771, "grad_norm": 0.0, "learning_rate": 5.1780885562710014e-06, "loss": 0.8806, "step": 17133 }, { "epoch": 0.6703967446592065, "grad_norm": 0.0, "learning_rate": 5.176978416851941e-06, "loss": 0.9261, "step": 17134 }, { "epoch": 0.6704358713514359, "grad_norm": 0.0, "learning_rate": 5.1758683548845676e-06, "loss": 0.9793, "step": 17135 }, { "epoch": 0.6704749980436654, "grad_norm": 0.0, "learning_rate": 5.174758370386699e-06, "loss": 1.0513, "step": 17136 }, { "epoch": 0.6705141247358948, "grad_norm": 0.0, "learning_rate": 5.1736484633761665e-06, "loss": 0.9818, "step": 17137 }, { "epoch": 0.6705532514281243, "grad_norm": 0.0, "learning_rate": 5.1725386338707896e-06, "loss": 1.089, "step": 17138 }, { "epoch": 0.6705923781203537, "grad_norm": 0.0, "learning_rate": 5.171428881888398e-06, "loss": 0.9698, "step": 17139 }, { "epoch": 0.6706315048125832, "grad_norm": 0.0, "learning_rate": 5.170319207446806e-06, "loss": 1.023, "step": 17140 }, { "epoch": 0.6706706315048125, "grad_norm": 0.0, "learning_rate": 5.169209610563837e-06, "loss": 1.0582, "step": 17141 }, { "epoch": 0.670709758197042, "grad_norm": 0.0, "learning_rate": 5.168100091257301e-06, "loss": 1.0525, "step": 17142 }, { "epoch": 0.6707488848892714, "grad_norm": 0.0, "learning_rate": 5.16699064954503e-06, "loss": 0.8802, "step": 17143 }, { "epoch": 0.6707880115815009, "grad_norm": 0.0, "learning_rate": 5.165881285444832e-06, "loss": 0.9309, "step": 17144 }, { "epoch": 0.6708271382737303, "grad_norm": 0.0, "learning_rate": 5.164771998974524e-06, "loss": 1.0562, "step": 17145 }, { "epoch": 0.6708662649659598, "grad_norm": 0.0, "learning_rate": 5.1636627901519155e-06, "loss": 0.9543, "step": 17146 }, { "epoch": 0.6709053916581892, "grad_norm": 0.0, "learning_rate": 5.162553658994823e-06, "loss": 1.0815, "step": 17147 }, { "epoch": 0.6709445183504187, "grad_norm": 0.0, "learning_rate": 5.1614446055210576e-06, "loss": 1.0469, "step": 17148 }, { "epoch": 0.6709836450426481, "grad_norm": 0.0, "learning_rate": 5.160335629748432e-06, "loss": 1.0641, "step": 17149 }, { "epoch": 0.6710227717348776, "grad_norm": 0.0, "learning_rate": 5.159226731694748e-06, "loss": 0.9774, "step": 17150 }, { "epoch": 0.671061898427107, "grad_norm": 0.0, "learning_rate": 5.158117911377816e-06, "loss": 0.9049, "step": 17151 }, { "epoch": 0.6711010251193364, "grad_norm": 0.0, "learning_rate": 5.157009168815443e-06, "loss": 1.1097, "step": 17152 }, { "epoch": 0.6711401518115658, "grad_norm": 0.0, "learning_rate": 5.155900504025439e-06, "loss": 1.0092, "step": 17153 }, { "epoch": 0.6711792785037953, "grad_norm": 0.0, "learning_rate": 5.154791917025599e-06, "loss": 1.1304, "step": 17154 }, { "epoch": 0.6712184051960247, "grad_norm": 0.0, "learning_rate": 5.153683407833734e-06, "loss": 0.968, "step": 17155 }, { "epoch": 0.6712575318882542, "grad_norm": 0.0, "learning_rate": 5.152574976467633e-06, "loss": 1.0192, "step": 17156 }, { "epoch": 0.6712966585804836, "grad_norm": 0.0, "learning_rate": 5.151466622945109e-06, "loss": 1.0996, "step": 17157 }, { "epoch": 0.6713357852727131, "grad_norm": 0.0, "learning_rate": 5.150358347283954e-06, "loss": 0.9154, "step": 17158 }, { "epoch": 0.6713749119649425, "grad_norm": 0.0, "learning_rate": 5.14925014950197e-06, "loss": 0.9612, "step": 17159 }, { "epoch": 0.671414038657172, "grad_norm": 0.0, "learning_rate": 5.148142029616943e-06, "loss": 0.9294, "step": 17160 }, { "epoch": 0.6714531653494014, "grad_norm": 0.0, "learning_rate": 5.1470339876466814e-06, "loss": 0.9841, "step": 17161 }, { "epoch": 0.6714922920416307, "grad_norm": 0.0, "learning_rate": 5.14592602360897e-06, "loss": 0.9886, "step": 17162 }, { "epoch": 0.6715314187338602, "grad_norm": 0.0, "learning_rate": 5.144818137521609e-06, "loss": 1.0241, "step": 17163 }, { "epoch": 0.6715705454260896, "grad_norm": 0.0, "learning_rate": 5.1437103294023805e-06, "loss": 0.9903, "step": 17164 }, { "epoch": 0.6716096721183191, "grad_norm": 0.0, "learning_rate": 5.142602599269077e-06, "loss": 1.042, "step": 17165 }, { "epoch": 0.6716487988105485, "grad_norm": 0.0, "learning_rate": 5.141494947139491e-06, "loss": 1.0724, "step": 17166 }, { "epoch": 0.671687925502778, "grad_norm": 0.0, "learning_rate": 5.140387373031413e-06, "loss": 1.0657, "step": 17167 }, { "epoch": 0.6717270521950074, "grad_norm": 0.0, "learning_rate": 5.139279876962618e-06, "loss": 1.0537, "step": 17168 }, { "epoch": 0.6717661788872369, "grad_norm": 0.0, "learning_rate": 5.138172458950899e-06, "loss": 0.9236, "step": 17169 }, { "epoch": 0.6718053055794663, "grad_norm": 0.0, "learning_rate": 5.1370651190140376e-06, "loss": 1.1008, "step": 17170 }, { "epoch": 0.6718444322716958, "grad_norm": 0.0, "learning_rate": 5.1359578571698195e-06, "loss": 1.0146, "step": 17171 }, { "epoch": 0.6718835589639252, "grad_norm": 0.0, "learning_rate": 5.13485067343602e-06, "loss": 1.0555, "step": 17172 }, { "epoch": 0.6719226856561547, "grad_norm": 0.0, "learning_rate": 5.133743567830427e-06, "loss": 0.9804, "step": 17173 }, { "epoch": 0.671961812348384, "grad_norm": 0.0, "learning_rate": 5.1326365403708105e-06, "loss": 1.0236, "step": 17174 }, { "epoch": 0.6720009390406135, "grad_norm": 0.0, "learning_rate": 5.131529591074952e-06, "loss": 0.8941, "step": 17175 }, { "epoch": 0.6720400657328429, "grad_norm": 0.0, "learning_rate": 5.1304227199606285e-06, "loss": 0.8839, "step": 17176 }, { "epoch": 0.6720791924250724, "grad_norm": 0.0, "learning_rate": 5.129315927045616e-06, "loss": 1.0324, "step": 17177 }, { "epoch": 0.6721183191173018, "grad_norm": 0.0, "learning_rate": 5.1282092123476836e-06, "loss": 0.9493, "step": 17178 }, { "epoch": 0.6721574458095313, "grad_norm": 0.0, "learning_rate": 5.1271025758846075e-06, "loss": 1.1289, "step": 17179 }, { "epoch": 0.6721965725017607, "grad_norm": 0.0, "learning_rate": 5.1259960176741594e-06, "loss": 1.0277, "step": 17180 }, { "epoch": 0.6722356991939902, "grad_norm": 0.0, "learning_rate": 5.124889537734106e-06, "loss": 0.8719, "step": 17181 }, { "epoch": 0.6722748258862196, "grad_norm": 0.0, "learning_rate": 5.123783136082218e-06, "loss": 0.9311, "step": 17182 }, { "epoch": 0.6723139525784491, "grad_norm": 0.0, "learning_rate": 5.122676812736262e-06, "loss": 0.9842, "step": 17183 }, { "epoch": 0.6723530792706784, "grad_norm": 0.0, "learning_rate": 5.121570567714007e-06, "loss": 0.8784, "step": 17184 }, { "epoch": 0.6723922059629079, "grad_norm": 0.0, "learning_rate": 5.120464401033214e-06, "loss": 0.8672, "step": 17185 }, { "epoch": 0.6724313326551373, "grad_norm": 0.0, "learning_rate": 5.119358312711651e-06, "loss": 1.0499, "step": 17186 }, { "epoch": 0.6724704593473668, "grad_norm": 0.0, "learning_rate": 5.11825230276707e-06, "loss": 0.9994, "step": 17187 }, { "epoch": 0.6725095860395962, "grad_norm": 0.0, "learning_rate": 5.117146371217249e-06, "loss": 0.9375, "step": 17188 }, { "epoch": 0.6725487127318257, "grad_norm": 0.0, "learning_rate": 5.116040518079933e-06, "loss": 0.9609, "step": 17189 }, { "epoch": 0.6725878394240551, "grad_norm": 0.0, "learning_rate": 5.114934743372891e-06, "loss": 0.9839, "step": 17190 }, { "epoch": 0.6726269661162845, "grad_norm": 0.0, "learning_rate": 5.113829047113868e-06, "loss": 0.9602, "step": 17191 }, { "epoch": 0.672666092808514, "grad_norm": 0.0, "learning_rate": 5.112723429320636e-06, "loss": 0.9334, "step": 17192 }, { "epoch": 0.6727052195007434, "grad_norm": 0.0, "learning_rate": 5.111617890010937e-06, "loss": 1.0814, "step": 17193 }, { "epoch": 0.6727443461929729, "grad_norm": 0.0, "learning_rate": 5.110512429202533e-06, "loss": 1.0148, "step": 17194 }, { "epoch": 0.6727834728852022, "grad_norm": 0.0, "learning_rate": 5.109407046913169e-06, "loss": 0.8288, "step": 17195 }, { "epoch": 0.6728225995774317, "grad_norm": 0.0, "learning_rate": 5.1083017431606e-06, "loss": 0.8893, "step": 17196 }, { "epoch": 0.6728617262696611, "grad_norm": 0.0, "learning_rate": 5.107196517962575e-06, "loss": 0.9503, "step": 17197 }, { "epoch": 0.6729008529618906, "grad_norm": 0.0, "learning_rate": 5.106091371336847e-06, "loss": 0.9263, "step": 17198 }, { "epoch": 0.67293997965412, "grad_norm": 0.0, "learning_rate": 5.1049863033011535e-06, "loss": 1.0327, "step": 17199 }, { "epoch": 0.6729791063463495, "grad_norm": 0.0, "learning_rate": 5.103881313873249e-06, "loss": 1.0403, "step": 17200 }, { "epoch": 0.6730182330385789, "grad_norm": 0.0, "learning_rate": 5.1027764030708735e-06, "loss": 0.9505, "step": 17201 }, { "epoch": 0.6730573597308084, "grad_norm": 0.0, "learning_rate": 5.101671570911777e-06, "loss": 1.0015, "step": 17202 }, { "epoch": 0.6730964864230378, "grad_norm": 0.0, "learning_rate": 5.100566817413693e-06, "loss": 0.9539, "step": 17203 }, { "epoch": 0.6731356131152673, "grad_norm": 0.0, "learning_rate": 5.099462142594372e-06, "loss": 1.0757, "step": 17204 }, { "epoch": 0.6731747398074966, "grad_norm": 0.0, "learning_rate": 5.0983575464715374e-06, "loss": 0.9679, "step": 17205 }, { "epoch": 0.6732138664997261, "grad_norm": 0.0, "learning_rate": 5.097253029062947e-06, "loss": 0.9521, "step": 17206 }, { "epoch": 0.6732529931919555, "grad_norm": 0.0, "learning_rate": 5.096148590386327e-06, "loss": 0.9352, "step": 17207 }, { "epoch": 0.673292119884185, "grad_norm": 0.0, "learning_rate": 5.0950442304594206e-06, "loss": 1.0094, "step": 17208 }, { "epoch": 0.6733312465764144, "grad_norm": 0.0, "learning_rate": 5.0939399492999475e-06, "loss": 0.9763, "step": 17209 }, { "epoch": 0.6733703732686439, "grad_norm": 0.0, "learning_rate": 5.092835746925659e-06, "loss": 0.9831, "step": 17210 }, { "epoch": 0.6734094999608733, "grad_norm": 0.0, "learning_rate": 5.0917316233542765e-06, "loss": 0.9588, "step": 17211 }, { "epoch": 0.6734486266531028, "grad_norm": 0.0, "learning_rate": 5.090627578603537e-06, "loss": 0.9282, "step": 17212 }, { "epoch": 0.6734877533453322, "grad_norm": 0.0, "learning_rate": 5.089523612691165e-06, "loss": 1.0269, "step": 17213 }, { "epoch": 0.6735268800375617, "grad_norm": 0.0, "learning_rate": 5.088419725634887e-06, "loss": 0.8886, "step": 17214 }, { "epoch": 0.6735660067297911, "grad_norm": 0.0, "learning_rate": 5.087315917452438e-06, "loss": 1.0657, "step": 17215 }, { "epoch": 0.6736051334220206, "grad_norm": 0.0, "learning_rate": 5.0862121881615405e-06, "loss": 0.992, "step": 17216 }, { "epoch": 0.6736442601142499, "grad_norm": 0.0, "learning_rate": 5.085108537779915e-06, "loss": 0.9787, "step": 17217 }, { "epoch": 0.6736833868064794, "grad_norm": 0.0, "learning_rate": 5.0840049663252864e-06, "loss": 0.9115, "step": 17218 }, { "epoch": 0.6737225134987088, "grad_norm": 0.0, "learning_rate": 5.0829014738153825e-06, "loss": 1.0674, "step": 17219 }, { "epoch": 0.6737616401909382, "grad_norm": 0.0, "learning_rate": 5.081798060267915e-06, "loss": 1.0278, "step": 17220 }, { "epoch": 0.6738007668831677, "grad_norm": 0.0, "learning_rate": 5.080694725700609e-06, "loss": 1.1265, "step": 17221 }, { "epoch": 0.6738398935753971, "grad_norm": 0.0, "learning_rate": 5.0795914701311845e-06, "loss": 1.0272, "step": 17222 }, { "epoch": 0.6738790202676266, "grad_norm": 0.0, "learning_rate": 5.0784882935773524e-06, "loss": 1.0168, "step": 17223 }, { "epoch": 0.673918146959856, "grad_norm": 0.0, "learning_rate": 5.07738519605683e-06, "loss": 0.9355, "step": 17224 }, { "epoch": 0.6739572736520855, "grad_norm": 0.0, "learning_rate": 5.076282177587339e-06, "loss": 0.8138, "step": 17225 }, { "epoch": 0.6739964003443148, "grad_norm": 0.0, "learning_rate": 5.075179238186581e-06, "loss": 0.9095, "step": 17226 }, { "epoch": 0.6740355270365443, "grad_norm": 0.0, "learning_rate": 5.074076377872272e-06, "loss": 0.9336, "step": 17227 }, { "epoch": 0.6740746537287737, "grad_norm": 0.0, "learning_rate": 5.0729735966621256e-06, "loss": 1.0013, "step": 17228 }, { "epoch": 0.6741137804210032, "grad_norm": 0.0, "learning_rate": 5.071870894573854e-06, "loss": 1.1159, "step": 17229 }, { "epoch": 0.6741529071132326, "grad_norm": 0.0, "learning_rate": 5.070768271625155e-06, "loss": 0.8942, "step": 17230 }, { "epoch": 0.6741920338054621, "grad_norm": 0.0, "learning_rate": 5.069665727833746e-06, "loss": 1.1595, "step": 17231 }, { "epoch": 0.6742311604976915, "grad_norm": 0.0, "learning_rate": 5.0685632632173185e-06, "loss": 1.0081, "step": 17232 }, { "epoch": 0.674270287189921, "grad_norm": 0.0, "learning_rate": 5.067460877793593e-06, "loss": 0.9665, "step": 17233 }, { "epoch": 0.6743094138821504, "grad_norm": 0.0, "learning_rate": 5.0663585715802625e-06, "loss": 1.029, "step": 17234 }, { "epoch": 0.6743485405743799, "grad_norm": 0.0, "learning_rate": 5.065256344595034e-06, "loss": 1.0852, "step": 17235 }, { "epoch": 0.6743876672666093, "grad_norm": 0.0, "learning_rate": 5.064154196855597e-06, "loss": 1.063, "step": 17236 }, { "epoch": 0.6744267939588388, "grad_norm": 0.0, "learning_rate": 5.063052128379667e-06, "loss": 1.0233, "step": 17237 }, { "epoch": 0.6744659206510681, "grad_norm": 0.0, "learning_rate": 5.06195013918493e-06, "loss": 0.9742, "step": 17238 }, { "epoch": 0.6745050473432976, "grad_norm": 0.0, "learning_rate": 5.06084822928909e-06, "loss": 1.0118, "step": 17239 }, { "epoch": 0.674544174035527, "grad_norm": 0.0, "learning_rate": 5.059746398709834e-06, "loss": 1.1313, "step": 17240 }, { "epoch": 0.6745833007277565, "grad_norm": 0.0, "learning_rate": 5.058644647464861e-06, "loss": 0.8322, "step": 17241 }, { "epoch": 0.6746224274199859, "grad_norm": 0.0, "learning_rate": 5.057542975571862e-06, "loss": 0.9708, "step": 17242 }, { "epoch": 0.6746615541122154, "grad_norm": 0.0, "learning_rate": 5.056441383048534e-06, "loss": 1.0629, "step": 17243 }, { "epoch": 0.6747006808044448, "grad_norm": 0.0, "learning_rate": 5.055339869912559e-06, "loss": 1.0075, "step": 17244 }, { "epoch": 0.6747398074966743, "grad_norm": 0.0, "learning_rate": 5.054238436181629e-06, "loss": 0.952, "step": 17245 }, { "epoch": 0.6747789341889037, "grad_norm": 0.0, "learning_rate": 5.0531370818734325e-06, "loss": 0.9767, "step": 17246 }, { "epoch": 0.6748180608811332, "grad_norm": 0.0, "learning_rate": 5.05203580700566e-06, "loss": 0.9413, "step": 17247 }, { "epoch": 0.6748571875733625, "grad_norm": 0.0, "learning_rate": 5.050934611595988e-06, "loss": 0.9496, "step": 17248 }, { "epoch": 0.6748963142655919, "grad_norm": 0.0, "learning_rate": 5.049833495662106e-06, "loss": 1.0189, "step": 17249 }, { "epoch": 0.6749354409578214, "grad_norm": 0.0, "learning_rate": 5.048732459221693e-06, "loss": 0.9406, "step": 17250 }, { "epoch": 0.6749745676500508, "grad_norm": 0.0, "learning_rate": 5.047631502292438e-06, "loss": 0.9973, "step": 17251 }, { "epoch": 0.6750136943422803, "grad_norm": 0.0, "learning_rate": 5.0465306248920096e-06, "loss": 1.0463, "step": 17252 }, { "epoch": 0.6750528210345097, "grad_norm": 0.0, "learning_rate": 5.045429827038099e-06, "loss": 0.9252, "step": 17253 }, { "epoch": 0.6750919477267392, "grad_norm": 0.0, "learning_rate": 5.0443291087483674e-06, "loss": 1.097, "step": 17254 }, { "epoch": 0.6751310744189686, "grad_norm": 0.0, "learning_rate": 5.043228470040509e-06, "loss": 0.9572, "step": 17255 }, { "epoch": 0.6751702011111981, "grad_norm": 0.0, "learning_rate": 5.042127910932185e-06, "loss": 0.9374, "step": 17256 }, { "epoch": 0.6752093278034275, "grad_norm": 0.0, "learning_rate": 5.041027431441079e-06, "loss": 0.972, "step": 17257 }, { "epoch": 0.675248454495657, "grad_norm": 0.0, "learning_rate": 5.039927031584854e-06, "loss": 0.9799, "step": 17258 }, { "epoch": 0.6752875811878863, "grad_norm": 0.0, "learning_rate": 5.038826711381186e-06, "loss": 0.9425, "step": 17259 }, { "epoch": 0.6753267078801158, "grad_norm": 0.0, "learning_rate": 5.037726470847746e-06, "loss": 1.0659, "step": 17260 }, { "epoch": 0.6753658345723452, "grad_norm": 0.0, "learning_rate": 5.0366263100022016e-06, "loss": 1.0225, "step": 17261 }, { "epoch": 0.6754049612645747, "grad_norm": 0.0, "learning_rate": 5.035526228862218e-06, "loss": 1.0841, "step": 17262 }, { "epoch": 0.6754440879568041, "grad_norm": 0.0, "learning_rate": 5.0344262274454605e-06, "loss": 0.9083, "step": 17263 }, { "epoch": 0.6754832146490336, "grad_norm": 0.0, "learning_rate": 5.0333263057696e-06, "loss": 0.9651, "step": 17264 }, { "epoch": 0.675522341341263, "grad_norm": 0.0, "learning_rate": 5.032226463852292e-06, "loss": 0.9941, "step": 17265 }, { "epoch": 0.6755614680334925, "grad_norm": 0.0, "learning_rate": 5.031126701711202e-06, "loss": 0.8534, "step": 17266 }, { "epoch": 0.6756005947257219, "grad_norm": 0.0, "learning_rate": 5.03002701936399e-06, "loss": 1.1068, "step": 17267 }, { "epoch": 0.6756397214179514, "grad_norm": 0.0, "learning_rate": 5.028927416828321e-06, "loss": 0.9538, "step": 17268 }, { "epoch": 0.6756788481101808, "grad_norm": 0.0, "learning_rate": 5.027827894121844e-06, "loss": 0.8513, "step": 17269 }, { "epoch": 0.6757179748024102, "grad_norm": 0.0, "learning_rate": 5.026728451262225e-06, "loss": 1.0269, "step": 17270 }, { "epoch": 0.6757571014946396, "grad_norm": 0.0, "learning_rate": 5.025629088267111e-06, "loss": 0.947, "step": 17271 }, { "epoch": 0.6757962281868691, "grad_norm": 0.0, "learning_rate": 5.024529805154161e-06, "loss": 1.1027, "step": 17272 }, { "epoch": 0.6758353548790985, "grad_norm": 0.0, "learning_rate": 5.023430601941029e-06, "loss": 0.8766, "step": 17273 }, { "epoch": 0.675874481571328, "grad_norm": 0.0, "learning_rate": 5.022331478645366e-06, "loss": 0.9109, "step": 17274 }, { "epoch": 0.6759136082635574, "grad_norm": 0.0, "learning_rate": 5.021232435284821e-06, "loss": 0.9417, "step": 17275 }, { "epoch": 0.6759527349557868, "grad_norm": 0.0, "learning_rate": 5.020133471877044e-06, "loss": 1.1399, "step": 17276 }, { "epoch": 0.6759918616480163, "grad_norm": 0.0, "learning_rate": 5.0190345884396815e-06, "loss": 1.0128, "step": 17277 }, { "epoch": 0.6760309883402457, "grad_norm": 0.0, "learning_rate": 5.017935784990387e-06, "loss": 0.9207, "step": 17278 }, { "epoch": 0.6760701150324752, "grad_norm": 0.0, "learning_rate": 5.0168370615467975e-06, "loss": 1.0559, "step": 17279 }, { "epoch": 0.6761092417247045, "grad_norm": 0.0, "learning_rate": 5.015738418126565e-06, "loss": 1.1141, "step": 17280 }, { "epoch": 0.676148368416934, "grad_norm": 0.0, "learning_rate": 5.014639854747319e-06, "loss": 0.7852, "step": 17281 }, { "epoch": 0.6761874951091634, "grad_norm": 0.0, "learning_rate": 5.013541371426718e-06, "loss": 1.0289, "step": 17282 }, { "epoch": 0.6762266218013929, "grad_norm": 0.0, "learning_rate": 5.0124429681823896e-06, "loss": 0.909, "step": 17283 }, { "epoch": 0.6762657484936223, "grad_norm": 0.0, "learning_rate": 5.011344645031982e-06, "loss": 1.0799, "step": 17284 }, { "epoch": 0.6763048751858518, "grad_norm": 0.0, "learning_rate": 5.010246401993121e-06, "loss": 0.9809, "step": 17285 }, { "epoch": 0.6763440018780812, "grad_norm": 0.0, "learning_rate": 5.009148239083457e-06, "loss": 0.8695, "step": 17286 }, { "epoch": 0.6763831285703107, "grad_norm": 0.0, "learning_rate": 5.008050156320615e-06, "loss": 0.9719, "step": 17287 }, { "epoch": 0.6764222552625401, "grad_norm": 0.0, "learning_rate": 5.006952153722236e-06, "loss": 1.0092, "step": 17288 }, { "epoch": 0.6764613819547696, "grad_norm": 0.0, "learning_rate": 5.005854231305945e-06, "loss": 1.0005, "step": 17289 }, { "epoch": 0.676500508646999, "grad_norm": 0.0, "learning_rate": 5.004756389089378e-06, "loss": 0.9931, "step": 17290 }, { "epoch": 0.6765396353392285, "grad_norm": 0.0, "learning_rate": 5.0036586270901624e-06, "loss": 1.0562, "step": 17291 }, { "epoch": 0.6765787620314578, "grad_norm": 0.0, "learning_rate": 5.0025609453259335e-06, "loss": 0.9028, "step": 17292 }, { "epoch": 0.6766178887236873, "grad_norm": 0.0, "learning_rate": 5.001463343814309e-06, "loss": 0.8625, "step": 17293 }, { "epoch": 0.6766570154159167, "grad_norm": 0.0, "learning_rate": 5.000365822572919e-06, "loss": 0.9004, "step": 17294 }, { "epoch": 0.6766961421081462, "grad_norm": 0.0, "learning_rate": 4.999268381619391e-06, "loss": 0.9569, "step": 17295 }, { "epoch": 0.6767352688003756, "grad_norm": 0.0, "learning_rate": 4.99817102097135e-06, "loss": 1.0316, "step": 17296 }, { "epoch": 0.6767743954926051, "grad_norm": 0.0, "learning_rate": 4.99707374064641e-06, "loss": 1.0629, "step": 17297 }, { "epoch": 0.6768135221848345, "grad_norm": 0.0, "learning_rate": 4.9959765406622e-06, "loss": 0.9746, "step": 17298 }, { "epoch": 0.676852648877064, "grad_norm": 0.0, "learning_rate": 4.994879421036329e-06, "loss": 0.9936, "step": 17299 }, { "epoch": 0.6768917755692934, "grad_norm": 0.0, "learning_rate": 4.993782381786432e-06, "loss": 1.0691, "step": 17300 }, { "epoch": 0.6769309022615229, "grad_norm": 0.0, "learning_rate": 4.992685422930111e-06, "loss": 0.986, "step": 17301 }, { "epoch": 0.6769700289537522, "grad_norm": 0.0, "learning_rate": 4.991588544484993e-06, "loss": 1.009, "step": 17302 }, { "epoch": 0.6770091556459817, "grad_norm": 0.0, "learning_rate": 4.990491746468682e-06, "loss": 0.8822, "step": 17303 }, { "epoch": 0.6770482823382111, "grad_norm": 0.0, "learning_rate": 4.989395028898797e-06, "loss": 1.0365, "step": 17304 }, { "epoch": 0.6770874090304405, "grad_norm": 0.0, "learning_rate": 4.98829839179295e-06, "loss": 0.9078, "step": 17305 }, { "epoch": 0.67712653572267, "grad_norm": 0.0, "learning_rate": 4.987201835168752e-06, "loss": 1.1174, "step": 17306 }, { "epoch": 0.6771656624148994, "grad_norm": 0.0, "learning_rate": 4.98610535904381e-06, "loss": 0.9492, "step": 17307 }, { "epoch": 0.6772047891071289, "grad_norm": 0.0, "learning_rate": 4.9850089634357325e-06, "loss": 1.0425, "step": 17308 }, { "epoch": 0.6772439157993583, "grad_norm": 0.0, "learning_rate": 4.98391264836213e-06, "loss": 0.8959, "step": 17309 }, { "epoch": 0.6772830424915878, "grad_norm": 0.0, "learning_rate": 4.982816413840601e-06, "loss": 1.0288, "step": 17310 }, { "epoch": 0.6773221691838172, "grad_norm": 0.0, "learning_rate": 4.9817202598887536e-06, "loss": 1.0103, "step": 17311 }, { "epoch": 0.6773612958760467, "grad_norm": 0.0, "learning_rate": 4.980624186524191e-06, "loss": 1.0039, "step": 17312 }, { "epoch": 0.677400422568276, "grad_norm": 0.0, "learning_rate": 4.979528193764518e-06, "loss": 1.1285, "step": 17313 }, { "epoch": 0.6774395492605055, "grad_norm": 0.0, "learning_rate": 4.978432281627328e-06, "loss": 0.9448, "step": 17314 }, { "epoch": 0.6774786759527349, "grad_norm": 0.0, "learning_rate": 4.977336450130227e-06, "loss": 0.9773, "step": 17315 }, { "epoch": 0.6775178026449644, "grad_norm": 0.0, "learning_rate": 4.976240699290799e-06, "loss": 0.784, "step": 17316 }, { "epoch": 0.6775569293371938, "grad_norm": 0.0, "learning_rate": 4.97514502912666e-06, "loss": 0.9745, "step": 17317 }, { "epoch": 0.6775960560294233, "grad_norm": 0.0, "learning_rate": 4.974049439655392e-06, "loss": 1.0034, "step": 17318 }, { "epoch": 0.6776351827216527, "grad_norm": 0.0, "learning_rate": 4.972953930894595e-06, "loss": 0.9635, "step": 17319 }, { "epoch": 0.6776743094138822, "grad_norm": 0.0, "learning_rate": 4.9718585028618546e-06, "loss": 1.0857, "step": 17320 }, { "epoch": 0.6777134361061116, "grad_norm": 0.0, "learning_rate": 4.970763155574766e-06, "loss": 0.9483, "step": 17321 }, { "epoch": 0.6777525627983411, "grad_norm": 0.0, "learning_rate": 4.96966788905092e-06, "loss": 1.0316, "step": 17322 }, { "epoch": 0.6777916894905704, "grad_norm": 0.0, "learning_rate": 4.9685727033079066e-06, "loss": 0.946, "step": 17323 }, { "epoch": 0.6778308161828, "grad_norm": 0.0, "learning_rate": 4.967477598363308e-06, "loss": 1.109, "step": 17324 }, { "epoch": 0.6778699428750293, "grad_norm": 0.0, "learning_rate": 4.966382574234714e-06, "loss": 0.7799, "step": 17325 }, { "epoch": 0.6779090695672588, "grad_norm": 0.0, "learning_rate": 4.965287630939707e-06, "loss": 1.0033, "step": 17326 }, { "epoch": 0.6779481962594882, "grad_norm": 0.0, "learning_rate": 4.964192768495876e-06, "loss": 1.015, "step": 17327 }, { "epoch": 0.6779873229517177, "grad_norm": 0.0, "learning_rate": 4.963097986920795e-06, "loss": 0.9371, "step": 17328 }, { "epoch": 0.6780264496439471, "grad_norm": 0.0, "learning_rate": 4.9620032862320535e-06, "loss": 1.0962, "step": 17329 }, { "epoch": 0.6780655763361766, "grad_norm": 0.0, "learning_rate": 4.960908666447217e-06, "loss": 1.0708, "step": 17330 }, { "epoch": 0.678104703028406, "grad_norm": 0.0, "learning_rate": 4.9598141275838814e-06, "loss": 0.9467, "step": 17331 }, { "epoch": 0.6781438297206355, "grad_norm": 0.0, "learning_rate": 4.958719669659612e-06, "loss": 0.9818, "step": 17332 }, { "epoch": 0.6781829564128649, "grad_norm": 0.0, "learning_rate": 4.957625292691991e-06, "loss": 1.0594, "step": 17333 }, { "epoch": 0.6782220831050942, "grad_norm": 0.0, "learning_rate": 4.956530996698581e-06, "loss": 0.9853, "step": 17334 }, { "epoch": 0.6782612097973237, "grad_norm": 0.0, "learning_rate": 4.955436781696972e-06, "loss": 0.8957, "step": 17335 }, { "epoch": 0.6783003364895531, "grad_norm": 0.0, "learning_rate": 4.954342647704723e-06, "loss": 0.9689, "step": 17336 }, { "epoch": 0.6783394631817826, "grad_norm": 0.0, "learning_rate": 4.953248594739412e-06, "loss": 1.02, "step": 17337 }, { "epoch": 0.678378589874012, "grad_norm": 0.0, "learning_rate": 4.952154622818601e-06, "loss": 0.8723, "step": 17338 }, { "epoch": 0.6784177165662415, "grad_norm": 0.0, "learning_rate": 4.951060731959861e-06, "loss": 1.0371, "step": 17339 }, { "epoch": 0.6784568432584709, "grad_norm": 0.0, "learning_rate": 4.94996692218076e-06, "loss": 1.008, "step": 17340 }, { "epoch": 0.6784959699507004, "grad_norm": 0.0, "learning_rate": 4.948873193498866e-06, "loss": 1.0056, "step": 17341 }, { "epoch": 0.6785350966429298, "grad_norm": 0.0, "learning_rate": 4.947779545931734e-06, "loss": 0.9006, "step": 17342 }, { "epoch": 0.6785742233351593, "grad_norm": 0.0, "learning_rate": 4.946685979496933e-06, "loss": 1.0063, "step": 17343 }, { "epoch": 0.6786133500273887, "grad_norm": 0.0, "learning_rate": 4.9455924942120215e-06, "loss": 0.9276, "step": 17344 }, { "epoch": 0.6786524767196181, "grad_norm": 0.0, "learning_rate": 4.944499090094567e-06, "loss": 1.006, "step": 17345 }, { "epoch": 0.6786916034118475, "grad_norm": 0.0, "learning_rate": 4.943405767162116e-06, "loss": 1.0035, "step": 17346 }, { "epoch": 0.678730730104077, "grad_norm": 0.0, "learning_rate": 4.942312525432238e-06, "loss": 0.8788, "step": 17347 }, { "epoch": 0.6787698567963064, "grad_norm": 0.0, "learning_rate": 4.941219364922478e-06, "loss": 0.9974, "step": 17348 }, { "epoch": 0.6788089834885359, "grad_norm": 0.0, "learning_rate": 4.940126285650396e-06, "loss": 1.0456, "step": 17349 }, { "epoch": 0.6788481101807653, "grad_norm": 0.0, "learning_rate": 4.9390332876335466e-06, "loss": 1.0011, "step": 17350 }, { "epoch": 0.6788872368729948, "grad_norm": 0.0, "learning_rate": 4.937940370889483e-06, "loss": 0.9885, "step": 17351 }, { "epoch": 0.6789263635652242, "grad_norm": 0.0, "learning_rate": 4.936847535435753e-06, "loss": 1.0711, "step": 17352 }, { "epoch": 0.6789654902574537, "grad_norm": 0.0, "learning_rate": 4.935754781289904e-06, "loss": 1.0162, "step": 17353 }, { "epoch": 0.6790046169496831, "grad_norm": 0.0, "learning_rate": 4.934662108469489e-06, "loss": 0.9864, "step": 17354 }, { "epoch": 0.6790437436419126, "grad_norm": 0.0, "learning_rate": 4.933569516992057e-06, "loss": 0.944, "step": 17355 }, { "epoch": 0.6790828703341419, "grad_norm": 0.0, "learning_rate": 4.9324770068751456e-06, "loss": 1.0633, "step": 17356 }, { "epoch": 0.6791219970263714, "grad_norm": 0.0, "learning_rate": 4.931384578136303e-06, "loss": 0.9858, "step": 17357 }, { "epoch": 0.6791611237186008, "grad_norm": 0.0, "learning_rate": 4.930292230793078e-06, "loss": 0.9083, "step": 17358 }, { "epoch": 0.6792002504108303, "grad_norm": 0.0, "learning_rate": 4.9291999648630025e-06, "loss": 0.9829, "step": 17359 }, { "epoch": 0.6792393771030597, "grad_norm": 0.0, "learning_rate": 4.928107780363622e-06, "loss": 1.1408, "step": 17360 }, { "epoch": 0.6792785037952892, "grad_norm": 0.0, "learning_rate": 4.927015677312474e-06, "loss": 0.9582, "step": 17361 }, { "epoch": 0.6793176304875186, "grad_norm": 0.0, "learning_rate": 4.925923655727103e-06, "loss": 0.9872, "step": 17362 }, { "epoch": 0.679356757179748, "grad_norm": 0.0, "learning_rate": 4.924831715625035e-06, "loss": 1.0173, "step": 17363 }, { "epoch": 0.6793958838719775, "grad_norm": 0.0, "learning_rate": 4.9237398570238135e-06, "loss": 0.9489, "step": 17364 }, { "epoch": 0.6794350105642069, "grad_norm": 0.0, "learning_rate": 4.922648079940962e-06, "loss": 0.9285, "step": 17365 }, { "epoch": 0.6794741372564364, "grad_norm": 0.0, "learning_rate": 4.921556384394028e-06, "loss": 0.9895, "step": 17366 }, { "epoch": 0.6795132639486657, "grad_norm": 0.0, "learning_rate": 4.92046477040053e-06, "loss": 0.8533, "step": 17367 }, { "epoch": 0.6795523906408952, "grad_norm": 0.0, "learning_rate": 4.919373237978007e-06, "loss": 0.9772, "step": 17368 }, { "epoch": 0.6795915173331246, "grad_norm": 0.0, "learning_rate": 4.9182817871439804e-06, "loss": 0.9536, "step": 17369 }, { "epoch": 0.6796306440253541, "grad_norm": 0.0, "learning_rate": 4.917190417915979e-06, "loss": 1.0517, "step": 17370 }, { "epoch": 0.6796697707175835, "grad_norm": 0.0, "learning_rate": 4.916099130311531e-06, "loss": 0.9103, "step": 17371 }, { "epoch": 0.679708897409813, "grad_norm": 0.0, "learning_rate": 4.915007924348165e-06, "loss": 0.9294, "step": 17372 }, { "epoch": 0.6797480241020424, "grad_norm": 0.0, "learning_rate": 4.913916800043396e-06, "loss": 0.9619, "step": 17373 }, { "epoch": 0.6797871507942719, "grad_norm": 0.0, "learning_rate": 4.91282575741475e-06, "loss": 0.8942, "step": 17374 }, { "epoch": 0.6798262774865013, "grad_norm": 0.0, "learning_rate": 4.911734796479747e-06, "loss": 1.0087, "step": 17375 }, { "epoch": 0.6798654041787308, "grad_norm": 0.0, "learning_rate": 4.910643917255911e-06, "loss": 0.9489, "step": 17376 }, { "epoch": 0.6799045308709601, "grad_norm": 0.0, "learning_rate": 4.909553119760753e-06, "loss": 1.1925, "step": 17377 }, { "epoch": 0.6799436575631896, "grad_norm": 0.0, "learning_rate": 4.908462404011797e-06, "loss": 0.9384, "step": 17378 }, { "epoch": 0.679982784255419, "grad_norm": 0.0, "learning_rate": 4.9073717700265465e-06, "loss": 0.9736, "step": 17379 }, { "epoch": 0.6800219109476485, "grad_norm": 0.0, "learning_rate": 4.906281217822532e-06, "loss": 0.9443, "step": 17380 }, { "epoch": 0.6800610376398779, "grad_norm": 0.0, "learning_rate": 4.905190747417256e-06, "loss": 1.0862, "step": 17381 }, { "epoch": 0.6801001643321074, "grad_norm": 0.0, "learning_rate": 4.904100358828234e-06, "loss": 1.0446, "step": 17382 }, { "epoch": 0.6801392910243368, "grad_norm": 0.0, "learning_rate": 4.9030100520729684e-06, "loss": 0.9433, "step": 17383 }, { "epoch": 0.6801784177165663, "grad_norm": 0.0, "learning_rate": 4.901919827168982e-06, "loss": 0.9173, "step": 17384 }, { "epoch": 0.6802175444087957, "grad_norm": 0.0, "learning_rate": 4.90082968413377e-06, "loss": 0.9151, "step": 17385 }, { "epoch": 0.6802566711010252, "grad_norm": 0.0, "learning_rate": 4.899739622984848e-06, "loss": 1.0362, "step": 17386 }, { "epoch": 0.6802957977932546, "grad_norm": 0.0, "learning_rate": 4.898649643739714e-06, "loss": 1.0514, "step": 17387 }, { "epoch": 0.680334924485484, "grad_norm": 0.0, "learning_rate": 4.897559746415873e-06, "loss": 1.0725, "step": 17388 }, { "epoch": 0.6803740511777134, "grad_norm": 0.0, "learning_rate": 4.896469931030829e-06, "loss": 1.097, "step": 17389 }, { "epoch": 0.6804131778699428, "grad_norm": 0.0, "learning_rate": 4.895380197602088e-06, "loss": 1.0325, "step": 17390 }, { "epoch": 0.6804523045621723, "grad_norm": 0.0, "learning_rate": 4.894290546147139e-06, "loss": 1.0509, "step": 17391 }, { "epoch": 0.6804914312544017, "grad_norm": 0.0, "learning_rate": 4.893200976683486e-06, "loss": 0.9972, "step": 17392 }, { "epoch": 0.6805305579466312, "grad_norm": 0.0, "learning_rate": 4.892111489228628e-06, "loss": 0.9764, "step": 17393 }, { "epoch": 0.6805696846388606, "grad_norm": 0.0, "learning_rate": 4.891022083800061e-06, "loss": 0.9979, "step": 17394 }, { "epoch": 0.6806088113310901, "grad_norm": 0.0, "learning_rate": 4.889932760415275e-06, "loss": 1.0298, "step": 17395 }, { "epoch": 0.6806479380233195, "grad_norm": 0.0, "learning_rate": 4.888843519091768e-06, "loss": 1.0397, "step": 17396 }, { "epoch": 0.680687064715549, "grad_norm": 0.0, "learning_rate": 4.887754359847026e-06, "loss": 1.0646, "step": 17397 }, { "epoch": 0.6807261914077783, "grad_norm": 0.0, "learning_rate": 4.886665282698544e-06, "loss": 0.9233, "step": 17398 }, { "epoch": 0.6807653181000078, "grad_norm": 0.0, "learning_rate": 4.885576287663809e-06, "loss": 0.8954, "step": 17399 }, { "epoch": 0.6808044447922372, "grad_norm": 0.0, "learning_rate": 4.884487374760314e-06, "loss": 1.0213, "step": 17400 }, { "epoch": 0.6808435714844667, "grad_norm": 0.0, "learning_rate": 4.883398544005539e-06, "loss": 1.0755, "step": 17401 }, { "epoch": 0.6808826981766961, "grad_norm": 0.0, "learning_rate": 4.8823097954169705e-06, "loss": 0.9655, "step": 17402 }, { "epoch": 0.6809218248689256, "grad_norm": 0.0, "learning_rate": 4.881221129012098e-06, "loss": 1.0612, "step": 17403 }, { "epoch": 0.680960951561155, "grad_norm": 0.0, "learning_rate": 4.880132544808397e-06, "loss": 1.0278, "step": 17404 }, { "epoch": 0.6810000782533845, "grad_norm": 0.0, "learning_rate": 4.879044042823351e-06, "loss": 0.9555, "step": 17405 }, { "epoch": 0.6810392049456139, "grad_norm": 0.0, "learning_rate": 4.877955623074441e-06, "loss": 0.9614, "step": 17406 }, { "epoch": 0.6810783316378434, "grad_norm": 0.0, "learning_rate": 4.876867285579149e-06, "loss": 1.0917, "step": 17407 }, { "epoch": 0.6811174583300728, "grad_norm": 0.0, "learning_rate": 4.875779030354946e-06, "loss": 1.0595, "step": 17408 }, { "epoch": 0.6811565850223023, "grad_norm": 0.0, "learning_rate": 4.874690857419313e-06, "loss": 0.946, "step": 17409 }, { "epoch": 0.6811957117145316, "grad_norm": 0.0, "learning_rate": 4.873602766789715e-06, "loss": 1.0928, "step": 17410 }, { "epoch": 0.6812348384067611, "grad_norm": 0.0, "learning_rate": 4.872514758483642e-06, "loss": 1.0463, "step": 17411 }, { "epoch": 0.6812739650989905, "grad_norm": 0.0, "learning_rate": 4.871426832518552e-06, "loss": 0.8758, "step": 17412 }, { "epoch": 0.68131309179122, "grad_norm": 0.0, "learning_rate": 4.870338988911924e-06, "loss": 0.9112, "step": 17413 }, { "epoch": 0.6813522184834494, "grad_norm": 0.0, "learning_rate": 4.869251227681221e-06, "loss": 0.9284, "step": 17414 }, { "epoch": 0.6813913451756789, "grad_norm": 0.0, "learning_rate": 4.868163548843914e-06, "loss": 1.0844, "step": 17415 }, { "epoch": 0.6814304718679083, "grad_norm": 0.0, "learning_rate": 4.867075952417469e-06, "loss": 1.0234, "step": 17416 }, { "epoch": 0.6814695985601378, "grad_norm": 0.0, "learning_rate": 4.865988438419357e-06, "loss": 0.9128, "step": 17417 }, { "epoch": 0.6815087252523672, "grad_norm": 0.0, "learning_rate": 4.864901006867033e-06, "loss": 0.851, "step": 17418 }, { "epoch": 0.6815478519445965, "grad_norm": 0.0, "learning_rate": 4.863813657777965e-06, "loss": 1.1227, "step": 17419 }, { "epoch": 0.681586978636826, "grad_norm": 0.0, "learning_rate": 4.862726391169613e-06, "loss": 0.9624, "step": 17420 }, { "epoch": 0.6816261053290554, "grad_norm": 0.0, "learning_rate": 4.861639207059442e-06, "loss": 0.9456, "step": 17421 }, { "epoch": 0.6816652320212849, "grad_norm": 0.0, "learning_rate": 4.8605521054649016e-06, "loss": 1.0344, "step": 17422 }, { "epoch": 0.6817043587135143, "grad_norm": 0.0, "learning_rate": 4.859465086403457e-06, "loss": 0.9344, "step": 17423 }, { "epoch": 0.6817434854057438, "grad_norm": 0.0, "learning_rate": 4.858378149892559e-06, "loss": 0.9814, "step": 17424 }, { "epoch": 0.6817826120979732, "grad_norm": 0.0, "learning_rate": 4.85729129594967e-06, "loss": 0.9363, "step": 17425 }, { "epoch": 0.6818217387902027, "grad_norm": 0.0, "learning_rate": 4.856204524592234e-06, "loss": 0.9448, "step": 17426 }, { "epoch": 0.6818608654824321, "grad_norm": 0.0, "learning_rate": 4.855117835837713e-06, "loss": 1.1708, "step": 17427 }, { "epoch": 0.6818999921746616, "grad_norm": 0.0, "learning_rate": 4.854031229703544e-06, "loss": 0.945, "step": 17428 }, { "epoch": 0.681939118866891, "grad_norm": 0.0, "learning_rate": 4.8529447062071935e-06, "loss": 0.885, "step": 17429 }, { "epoch": 0.6819782455591205, "grad_norm": 0.0, "learning_rate": 4.851858265366098e-06, "loss": 0.867, "step": 17430 }, { "epoch": 0.6820173722513498, "grad_norm": 0.0, "learning_rate": 4.85077190719771e-06, "loss": 1.1138, "step": 17431 }, { "epoch": 0.6820564989435793, "grad_norm": 0.0, "learning_rate": 4.849685631719465e-06, "loss": 0.9921, "step": 17432 }, { "epoch": 0.6820956256358087, "grad_norm": 0.0, "learning_rate": 4.848599438948825e-06, "loss": 0.8253, "step": 17433 }, { "epoch": 0.6821347523280382, "grad_norm": 0.0, "learning_rate": 4.847513328903217e-06, "loss": 1.0114, "step": 17434 }, { "epoch": 0.6821738790202676, "grad_norm": 0.0, "learning_rate": 4.846427301600093e-06, "loss": 0.9142, "step": 17435 }, { "epoch": 0.6822130057124971, "grad_norm": 0.0, "learning_rate": 4.845341357056885e-06, "loss": 1.0898, "step": 17436 }, { "epoch": 0.6822521324047265, "grad_norm": 0.0, "learning_rate": 4.844255495291036e-06, "loss": 1.0331, "step": 17437 }, { "epoch": 0.682291259096956, "grad_norm": 0.0, "learning_rate": 4.843169716319983e-06, "loss": 0.9996, "step": 17438 }, { "epoch": 0.6823303857891854, "grad_norm": 0.0, "learning_rate": 4.8420840201611665e-06, "loss": 0.9609, "step": 17439 }, { "epoch": 0.6823695124814149, "grad_norm": 0.0, "learning_rate": 4.840998406832013e-06, "loss": 1.0073, "step": 17440 }, { "epoch": 0.6824086391736442, "grad_norm": 0.0, "learning_rate": 4.839912876349961e-06, "loss": 0.9587, "step": 17441 }, { "epoch": 0.6824477658658737, "grad_norm": 0.0, "learning_rate": 4.838827428732446e-06, "loss": 0.9335, "step": 17442 }, { "epoch": 0.6824868925581031, "grad_norm": 0.0, "learning_rate": 4.837742063996891e-06, "loss": 1.0422, "step": 17443 }, { "epoch": 0.6825260192503326, "grad_norm": 0.0, "learning_rate": 4.83665678216073e-06, "loss": 0.973, "step": 17444 }, { "epoch": 0.682565145942562, "grad_norm": 0.0, "learning_rate": 4.835571583241395e-06, "loss": 0.916, "step": 17445 }, { "epoch": 0.6826042726347915, "grad_norm": 0.0, "learning_rate": 4.834486467256306e-06, "loss": 0.9285, "step": 17446 }, { "epoch": 0.6826433993270209, "grad_norm": 0.0, "learning_rate": 4.83340143422289e-06, "loss": 0.9804, "step": 17447 }, { "epoch": 0.6826825260192503, "grad_norm": 0.0, "learning_rate": 4.832316484158577e-06, "loss": 0.8599, "step": 17448 }, { "epoch": 0.6827216527114798, "grad_norm": 0.0, "learning_rate": 4.831231617080783e-06, "loss": 1.0212, "step": 17449 }, { "epoch": 0.6827607794037092, "grad_norm": 0.0, "learning_rate": 4.830146833006931e-06, "loss": 0.9833, "step": 17450 }, { "epoch": 0.6827999060959387, "grad_norm": 0.0, "learning_rate": 4.829062131954444e-06, "loss": 0.8878, "step": 17451 }, { "epoch": 0.682839032788168, "grad_norm": 0.0, "learning_rate": 4.827977513940742e-06, "loss": 1.0077, "step": 17452 }, { "epoch": 0.6828781594803975, "grad_norm": 0.0, "learning_rate": 4.826892978983238e-06, "loss": 1.0898, "step": 17453 }, { "epoch": 0.6829172861726269, "grad_norm": 0.0, "learning_rate": 4.8258085270993525e-06, "loss": 0.9717, "step": 17454 }, { "epoch": 0.6829564128648564, "grad_norm": 0.0, "learning_rate": 4.824724158306492e-06, "loss": 1.1072, "step": 17455 }, { "epoch": 0.6829955395570858, "grad_norm": 0.0, "learning_rate": 4.823639872622084e-06, "loss": 1.0188, "step": 17456 }, { "epoch": 0.6830346662493153, "grad_norm": 0.0, "learning_rate": 4.82255567006353e-06, "loss": 0.8348, "step": 17457 }, { "epoch": 0.6830737929415447, "grad_norm": 0.0, "learning_rate": 4.821471550648247e-06, "loss": 1.0245, "step": 17458 }, { "epoch": 0.6831129196337742, "grad_norm": 0.0, "learning_rate": 4.8203875143936355e-06, "loss": 0.9897, "step": 17459 }, { "epoch": 0.6831520463260036, "grad_norm": 0.0, "learning_rate": 4.819303561317117e-06, "loss": 0.9659, "step": 17460 }, { "epoch": 0.6831911730182331, "grad_norm": 0.0, "learning_rate": 4.818219691436087e-06, "loss": 0.9798, "step": 17461 }, { "epoch": 0.6832302997104625, "grad_norm": 0.0, "learning_rate": 4.81713590476796e-06, "loss": 0.9647, "step": 17462 }, { "epoch": 0.683269426402692, "grad_norm": 0.0, "learning_rate": 4.816052201330133e-06, "loss": 0.9877, "step": 17463 }, { "epoch": 0.6833085530949213, "grad_norm": 0.0, "learning_rate": 4.81496858114001e-06, "loss": 0.978, "step": 17464 }, { "epoch": 0.6833476797871508, "grad_norm": 0.0, "learning_rate": 4.813885044214996e-06, "loss": 0.8413, "step": 17465 }, { "epoch": 0.6833868064793802, "grad_norm": 0.0, "learning_rate": 4.8128015905724926e-06, "loss": 0.9865, "step": 17466 }, { "epoch": 0.6834259331716097, "grad_norm": 0.0, "learning_rate": 4.811718220229892e-06, "loss": 1.0406, "step": 17467 }, { "epoch": 0.6834650598638391, "grad_norm": 0.0, "learning_rate": 4.8106349332045954e-06, "loss": 0.8698, "step": 17468 }, { "epoch": 0.6835041865560686, "grad_norm": 0.0, "learning_rate": 4.809551729513999e-06, "loss": 0.9468, "step": 17469 }, { "epoch": 0.683543313248298, "grad_norm": 0.0, "learning_rate": 4.808468609175502e-06, "loss": 0.9928, "step": 17470 }, { "epoch": 0.6835824399405275, "grad_norm": 0.0, "learning_rate": 4.807385572206491e-06, "loss": 1.1273, "step": 17471 }, { "epoch": 0.6836215666327569, "grad_norm": 0.0, "learning_rate": 4.806302618624363e-06, "loss": 0.9662, "step": 17472 }, { "epoch": 0.6836606933249864, "grad_norm": 0.0, "learning_rate": 4.8052197484465e-06, "loss": 1.0329, "step": 17473 }, { "epoch": 0.6836998200172157, "grad_norm": 0.0, "learning_rate": 4.8041369616903065e-06, "loss": 1.0574, "step": 17474 }, { "epoch": 0.6837389467094452, "grad_norm": 0.0, "learning_rate": 4.803054258373158e-06, "loss": 1.0226, "step": 17475 }, { "epoch": 0.6837780734016746, "grad_norm": 0.0, "learning_rate": 4.8019716385124505e-06, "loss": 1.0336, "step": 17476 }, { "epoch": 0.683817200093904, "grad_norm": 0.0, "learning_rate": 4.800889102125558e-06, "loss": 1.1031, "step": 17477 }, { "epoch": 0.6838563267861335, "grad_norm": 0.0, "learning_rate": 4.799806649229878e-06, "loss": 0.996, "step": 17478 }, { "epoch": 0.6838954534783629, "grad_norm": 0.0, "learning_rate": 4.798724279842783e-06, "loss": 0.8871, "step": 17479 }, { "epoch": 0.6839345801705924, "grad_norm": 0.0, "learning_rate": 4.7976419939816635e-06, "loss": 1.0629, "step": 17480 }, { "epoch": 0.6839737068628218, "grad_norm": 0.0, "learning_rate": 4.7965597916638895e-06, "loss": 0.9837, "step": 17481 }, { "epoch": 0.6840128335550513, "grad_norm": 0.0, "learning_rate": 4.795477672906845e-06, "loss": 0.9592, "step": 17482 }, { "epoch": 0.6840519602472807, "grad_norm": 0.0, "learning_rate": 4.794395637727909e-06, "loss": 1.0703, "step": 17483 }, { "epoch": 0.6840910869395102, "grad_norm": 0.0, "learning_rate": 4.793313686144458e-06, "loss": 1.0504, "step": 17484 }, { "epoch": 0.6841302136317395, "grad_norm": 0.0, "learning_rate": 4.7922318181738625e-06, "loss": 1.0138, "step": 17485 }, { "epoch": 0.684169340323969, "grad_norm": 0.0, "learning_rate": 4.7911500338334985e-06, "loss": 0.9863, "step": 17486 }, { "epoch": 0.6842084670161984, "grad_norm": 0.0, "learning_rate": 4.790068333140741e-06, "loss": 0.9929, "step": 17487 }, { "epoch": 0.6842475937084279, "grad_norm": 0.0, "learning_rate": 4.7889867161129534e-06, "loss": 0.9487, "step": 17488 }, { "epoch": 0.6842867204006573, "grad_norm": 0.0, "learning_rate": 4.787905182767511e-06, "loss": 0.8453, "step": 17489 }, { "epoch": 0.6843258470928868, "grad_norm": 0.0, "learning_rate": 4.786823733121778e-06, "loss": 1.0587, "step": 17490 }, { "epoch": 0.6843649737851162, "grad_norm": 0.0, "learning_rate": 4.785742367193128e-06, "loss": 1.0907, "step": 17491 }, { "epoch": 0.6844041004773457, "grad_norm": 0.0, "learning_rate": 4.784661084998919e-06, "loss": 0.9386, "step": 17492 }, { "epoch": 0.6844432271695751, "grad_norm": 0.0, "learning_rate": 4.7835798865565205e-06, "loss": 0.993, "step": 17493 }, { "epoch": 0.6844823538618046, "grad_norm": 0.0, "learning_rate": 4.78249877188329e-06, "loss": 0.9911, "step": 17494 }, { "epoch": 0.684521480554034, "grad_norm": 0.0, "learning_rate": 4.7814177409965885e-06, "loss": 0.9529, "step": 17495 }, { "epoch": 0.6845606072462634, "grad_norm": 0.0, "learning_rate": 4.780336793913781e-06, "loss": 1.039, "step": 17496 }, { "epoch": 0.6845997339384928, "grad_norm": 0.0, "learning_rate": 4.779255930652228e-06, "loss": 1.028, "step": 17497 }, { "epoch": 0.6846388606307223, "grad_norm": 0.0, "learning_rate": 4.778175151229279e-06, "loss": 1.0323, "step": 17498 }, { "epoch": 0.6846779873229517, "grad_norm": 0.0, "learning_rate": 4.777094455662292e-06, "loss": 0.9438, "step": 17499 }, { "epoch": 0.6847171140151812, "grad_norm": 0.0, "learning_rate": 4.776013843968625e-06, "loss": 0.9469, "step": 17500 }, { "epoch": 0.6847562407074106, "grad_norm": 0.0, "learning_rate": 4.774933316165633e-06, "loss": 0.9137, "step": 17501 }, { "epoch": 0.6847953673996401, "grad_norm": 0.0, "learning_rate": 4.773852872270661e-06, "loss": 0.9817, "step": 17502 }, { "epoch": 0.6848344940918695, "grad_norm": 0.0, "learning_rate": 4.772772512301066e-06, "loss": 0.9268, "step": 17503 }, { "epoch": 0.6848736207840989, "grad_norm": 0.0, "learning_rate": 4.771692236274188e-06, "loss": 1.0035, "step": 17504 }, { "epoch": 0.6849127474763284, "grad_norm": 0.0, "learning_rate": 4.770612044207389e-06, "loss": 0.9716, "step": 17505 }, { "epoch": 0.6849518741685577, "grad_norm": 0.0, "learning_rate": 4.769531936118002e-06, "loss": 1.0846, "step": 17506 }, { "epoch": 0.6849910008607872, "grad_norm": 0.0, "learning_rate": 4.768451912023384e-06, "loss": 0.9604, "step": 17507 }, { "epoch": 0.6850301275530166, "grad_norm": 0.0, "learning_rate": 4.767371971940864e-06, "loss": 0.932, "step": 17508 }, { "epoch": 0.6850692542452461, "grad_norm": 0.0, "learning_rate": 4.766292115887801e-06, "loss": 1.0408, "step": 17509 }, { "epoch": 0.6851083809374755, "grad_norm": 0.0, "learning_rate": 4.765212343881524e-06, "loss": 1.0794, "step": 17510 }, { "epoch": 0.685147507629705, "grad_norm": 0.0, "learning_rate": 4.764132655939383e-06, "loss": 0.937, "step": 17511 }, { "epoch": 0.6851866343219344, "grad_norm": 0.0, "learning_rate": 4.763053052078705e-06, "loss": 0.9238, "step": 17512 }, { "epoch": 0.6852257610141639, "grad_norm": 0.0, "learning_rate": 4.761973532316834e-06, "loss": 0.9038, "step": 17513 }, { "epoch": 0.6852648877063933, "grad_norm": 0.0, "learning_rate": 4.7608940966711036e-06, "loss": 0.954, "step": 17514 }, { "epoch": 0.6853040143986228, "grad_norm": 0.0, "learning_rate": 4.759814745158853e-06, "loss": 1.0707, "step": 17515 }, { "epoch": 0.6853431410908521, "grad_norm": 0.0, "learning_rate": 4.758735477797407e-06, "loss": 0.944, "step": 17516 }, { "epoch": 0.6853822677830816, "grad_norm": 0.0, "learning_rate": 4.7576562946041025e-06, "loss": 1.0922, "step": 17517 }, { "epoch": 0.685421394475311, "grad_norm": 0.0, "learning_rate": 4.756577195596268e-06, "loss": 0.9562, "step": 17518 }, { "epoch": 0.6854605211675405, "grad_norm": 0.0, "learning_rate": 4.755498180791238e-06, "loss": 0.9648, "step": 17519 }, { "epoch": 0.6854996478597699, "grad_norm": 0.0, "learning_rate": 4.754419250206331e-06, "loss": 0.9978, "step": 17520 }, { "epoch": 0.6855387745519994, "grad_norm": 0.0, "learning_rate": 4.753340403858883e-06, "loss": 0.9251, "step": 17521 }, { "epoch": 0.6855779012442288, "grad_norm": 0.0, "learning_rate": 4.7522616417662034e-06, "loss": 1.0155, "step": 17522 }, { "epoch": 0.6856170279364583, "grad_norm": 0.0, "learning_rate": 4.7511829639456365e-06, "loss": 1.0818, "step": 17523 }, { "epoch": 0.6856561546286877, "grad_norm": 0.0, "learning_rate": 4.750104370414489e-06, "loss": 0.9892, "step": 17524 }, { "epoch": 0.6856952813209172, "grad_norm": 0.0, "learning_rate": 4.74902586119009e-06, "loss": 0.9, "step": 17525 }, { "epoch": 0.6857344080131466, "grad_norm": 0.0, "learning_rate": 4.747947436289753e-06, "loss": 0.8787, "step": 17526 }, { "epoch": 0.685773534705376, "grad_norm": 0.0, "learning_rate": 4.7468690957307985e-06, "loss": 0.9619, "step": 17527 }, { "epoch": 0.6858126613976054, "grad_norm": 0.0, "learning_rate": 4.745790839530544e-06, "loss": 1.0579, "step": 17528 }, { "epoch": 0.6858517880898349, "grad_norm": 0.0, "learning_rate": 4.7447126677063086e-06, "loss": 1.0799, "step": 17529 }, { "epoch": 0.6858909147820643, "grad_norm": 0.0, "learning_rate": 4.743634580275398e-06, "loss": 1.0433, "step": 17530 }, { "epoch": 0.6859300414742938, "grad_norm": 0.0, "learning_rate": 4.742556577255129e-06, "loss": 1.0614, "step": 17531 }, { "epoch": 0.6859691681665232, "grad_norm": 0.0, "learning_rate": 4.741478658662819e-06, "loss": 0.9561, "step": 17532 }, { "epoch": 0.6860082948587526, "grad_norm": 0.0, "learning_rate": 4.740400824515768e-06, "loss": 1.0113, "step": 17533 }, { "epoch": 0.6860474215509821, "grad_norm": 0.0, "learning_rate": 4.739323074831289e-06, "loss": 0.9178, "step": 17534 }, { "epoch": 0.6860865482432115, "grad_norm": 0.0, "learning_rate": 4.73824540962669e-06, "loss": 0.8689, "step": 17535 }, { "epoch": 0.686125674935441, "grad_norm": 0.0, "learning_rate": 4.737167828919279e-06, "loss": 0.912, "step": 17536 }, { "epoch": 0.6861648016276704, "grad_norm": 0.0, "learning_rate": 4.736090332726354e-06, "loss": 1.0617, "step": 17537 }, { "epoch": 0.6862039283198998, "grad_norm": 0.0, "learning_rate": 4.735012921065228e-06, "loss": 1.008, "step": 17538 }, { "epoch": 0.6862430550121292, "grad_norm": 0.0, "learning_rate": 4.733935593953187e-06, "loss": 1.0506, "step": 17539 }, { "epoch": 0.6862821817043587, "grad_norm": 0.0, "learning_rate": 4.732858351407551e-06, "loss": 0.8296, "step": 17540 }, { "epoch": 0.6863213083965881, "grad_norm": 0.0, "learning_rate": 4.7317811934456046e-06, "loss": 0.9833, "step": 17541 }, { "epoch": 0.6863604350888176, "grad_norm": 0.0, "learning_rate": 4.730704120084656e-06, "loss": 1.013, "step": 17542 }, { "epoch": 0.686399561781047, "grad_norm": 0.0, "learning_rate": 4.729627131341992e-06, "loss": 0.8833, "step": 17543 }, { "epoch": 0.6864386884732765, "grad_norm": 0.0, "learning_rate": 4.728550227234912e-06, "loss": 1.0055, "step": 17544 }, { "epoch": 0.6864778151655059, "grad_norm": 0.0, "learning_rate": 4.72747340778071e-06, "loss": 0.9509, "step": 17545 }, { "epoch": 0.6865169418577354, "grad_norm": 0.0, "learning_rate": 4.7263966729966825e-06, "loss": 1.05, "step": 17546 }, { "epoch": 0.6865560685499648, "grad_norm": 0.0, "learning_rate": 4.725320022900112e-06, "loss": 1.1117, "step": 17547 }, { "epoch": 0.6865951952421943, "grad_norm": 0.0, "learning_rate": 4.724243457508292e-06, "loss": 0.9399, "step": 17548 }, { "epoch": 0.6866343219344236, "grad_norm": 0.0, "learning_rate": 4.723166976838511e-06, "loss": 0.8983, "step": 17549 }, { "epoch": 0.6866734486266531, "grad_norm": 0.0, "learning_rate": 4.722090580908061e-06, "loss": 0.9237, "step": 17550 }, { "epoch": 0.6867125753188825, "grad_norm": 0.0, "learning_rate": 4.721014269734218e-06, "loss": 0.8389, "step": 17551 }, { "epoch": 0.686751702011112, "grad_norm": 0.0, "learning_rate": 4.719938043334276e-06, "loss": 1.0909, "step": 17552 }, { "epoch": 0.6867908287033414, "grad_norm": 0.0, "learning_rate": 4.718861901725504e-06, "loss": 0.9568, "step": 17553 }, { "epoch": 0.6868299553955709, "grad_norm": 0.0, "learning_rate": 4.717785844925199e-06, "loss": 0.9342, "step": 17554 }, { "epoch": 0.6868690820878003, "grad_norm": 0.0, "learning_rate": 4.716709872950632e-06, "loss": 0.963, "step": 17555 }, { "epoch": 0.6869082087800298, "grad_norm": 0.0, "learning_rate": 4.715633985819087e-06, "loss": 0.9662, "step": 17556 }, { "epoch": 0.6869473354722592, "grad_norm": 0.0, "learning_rate": 4.7145581835478314e-06, "loss": 0.9325, "step": 17557 }, { "epoch": 0.6869864621644887, "grad_norm": 0.0, "learning_rate": 4.713482466154155e-06, "loss": 0.9532, "step": 17558 }, { "epoch": 0.687025588856718, "grad_norm": 0.0, "learning_rate": 4.7124068336553245e-06, "loss": 1.0476, "step": 17559 }, { "epoch": 0.6870647155489475, "grad_norm": 0.0, "learning_rate": 4.711331286068616e-06, "loss": 1.0747, "step": 17560 }, { "epoch": 0.6871038422411769, "grad_norm": 0.0, "learning_rate": 4.710255823411297e-06, "loss": 1.0572, "step": 17561 }, { "epoch": 0.6871429689334063, "grad_norm": 0.0, "learning_rate": 4.709180445700641e-06, "loss": 0.889, "step": 17562 }, { "epoch": 0.6871820956256358, "grad_norm": 0.0, "learning_rate": 4.7081051529539166e-06, "loss": 1.0018, "step": 17563 }, { "epoch": 0.6872212223178652, "grad_norm": 0.0, "learning_rate": 4.707029945188398e-06, "loss": 0.7917, "step": 17564 }, { "epoch": 0.6872603490100947, "grad_norm": 0.0, "learning_rate": 4.705954822421341e-06, "loss": 1.1052, "step": 17565 }, { "epoch": 0.6872994757023241, "grad_norm": 0.0, "learning_rate": 4.704879784670015e-06, "loss": 0.9526, "step": 17566 }, { "epoch": 0.6873386023945536, "grad_norm": 0.0, "learning_rate": 4.703804831951685e-06, "loss": 1.0457, "step": 17567 }, { "epoch": 0.687377729086783, "grad_norm": 0.0, "learning_rate": 4.702729964283617e-06, "loss": 1.0403, "step": 17568 }, { "epoch": 0.6874168557790125, "grad_norm": 0.0, "learning_rate": 4.701655181683064e-06, "loss": 0.8743, "step": 17569 }, { "epoch": 0.6874559824712418, "grad_norm": 0.0, "learning_rate": 4.700580484167293e-06, "loss": 0.9422, "step": 17570 }, { "epoch": 0.6874951091634713, "grad_norm": 0.0, "learning_rate": 4.6995058717535555e-06, "loss": 1.0045, "step": 17571 }, { "epoch": 0.6875342358557007, "grad_norm": 0.0, "learning_rate": 4.698431344459112e-06, "loss": 0.859, "step": 17572 }, { "epoch": 0.6875733625479302, "grad_norm": 0.0, "learning_rate": 4.6973569023012175e-06, "loss": 1.0966, "step": 17573 }, { "epoch": 0.6876124892401596, "grad_norm": 0.0, "learning_rate": 4.696282545297131e-06, "loss": 1.0472, "step": 17574 }, { "epoch": 0.6876516159323891, "grad_norm": 0.0, "learning_rate": 4.695208273464097e-06, "loss": 1.0089, "step": 17575 }, { "epoch": 0.6876907426246185, "grad_norm": 0.0, "learning_rate": 4.6941340868193696e-06, "loss": 0.8284, "step": 17576 }, { "epoch": 0.687729869316848, "grad_norm": 0.0, "learning_rate": 4.693059985380205e-06, "loss": 1.0446, "step": 17577 }, { "epoch": 0.6877689960090774, "grad_norm": 0.0, "learning_rate": 4.691985969163844e-06, "loss": 0.9649, "step": 17578 }, { "epoch": 0.6878081227013069, "grad_norm": 0.0, "learning_rate": 4.690912038187535e-06, "loss": 1.0157, "step": 17579 }, { "epoch": 0.6878472493935363, "grad_norm": 0.0, "learning_rate": 4.689838192468528e-06, "loss": 0.9546, "step": 17580 }, { "epoch": 0.6878863760857657, "grad_norm": 0.0, "learning_rate": 4.688764432024068e-06, "loss": 1.0516, "step": 17581 }, { "epoch": 0.6879255027779951, "grad_norm": 0.0, "learning_rate": 4.687690756871393e-06, "loss": 0.9724, "step": 17582 }, { "epoch": 0.6879646294702246, "grad_norm": 0.0, "learning_rate": 4.686617167027751e-06, "loss": 0.9028, "step": 17583 }, { "epoch": 0.688003756162454, "grad_norm": 0.0, "learning_rate": 4.685543662510371e-06, "loss": 0.9283, "step": 17584 }, { "epoch": 0.6880428828546835, "grad_norm": 0.0, "learning_rate": 4.68447024333651e-06, "loss": 0.9571, "step": 17585 }, { "epoch": 0.6880820095469129, "grad_norm": 0.0, "learning_rate": 4.68339690952339e-06, "loss": 0.9247, "step": 17586 }, { "epoch": 0.6881211362391424, "grad_norm": 0.0, "learning_rate": 4.682323661088259e-06, "loss": 0.943, "step": 17587 }, { "epoch": 0.6881602629313718, "grad_norm": 0.0, "learning_rate": 4.681250498048342e-06, "loss": 1.0915, "step": 17588 }, { "epoch": 0.6881993896236012, "grad_norm": 0.0, "learning_rate": 4.6801774204208775e-06, "loss": 0.9071, "step": 17589 }, { "epoch": 0.6882385163158307, "grad_norm": 0.0, "learning_rate": 4.679104428223098e-06, "loss": 0.9737, "step": 17590 }, { "epoch": 0.68827764300806, "grad_norm": 0.0, "learning_rate": 4.678031521472237e-06, "loss": 1.006, "step": 17591 }, { "epoch": 0.6883167697002895, "grad_norm": 0.0, "learning_rate": 4.676958700185518e-06, "loss": 1.1429, "step": 17592 }, { "epoch": 0.6883558963925189, "grad_norm": 0.0, "learning_rate": 4.675885964380171e-06, "loss": 0.9624, "step": 17593 }, { "epoch": 0.6883950230847484, "grad_norm": 0.0, "learning_rate": 4.6748133140734245e-06, "loss": 0.9996, "step": 17594 }, { "epoch": 0.6884341497769778, "grad_norm": 0.0, "learning_rate": 4.673740749282507e-06, "loss": 1.0997, "step": 17595 }, { "epoch": 0.6884732764692073, "grad_norm": 0.0, "learning_rate": 4.672668270024635e-06, "loss": 0.7862, "step": 17596 }, { "epoch": 0.6885124031614367, "grad_norm": 0.0, "learning_rate": 4.671595876317035e-06, "loss": 1.0001, "step": 17597 }, { "epoch": 0.6885515298536662, "grad_norm": 0.0, "learning_rate": 4.670523568176929e-06, "loss": 0.9453, "step": 17598 }, { "epoch": 0.6885906565458956, "grad_norm": 0.0, "learning_rate": 4.669451345621541e-06, "loss": 0.8579, "step": 17599 }, { "epoch": 0.6886297832381251, "grad_norm": 0.0, "learning_rate": 4.668379208668079e-06, "loss": 0.9736, "step": 17600 }, { "epoch": 0.6886689099303545, "grad_norm": 0.0, "learning_rate": 4.667307157333771e-06, "loss": 1.073, "step": 17601 }, { "epoch": 0.688708036622584, "grad_norm": 0.0, "learning_rate": 4.666235191635819e-06, "loss": 0.9716, "step": 17602 }, { "epoch": 0.6887471633148133, "grad_norm": 0.0, "learning_rate": 4.665163311591455e-06, "loss": 1.1151, "step": 17603 }, { "epoch": 0.6887862900070428, "grad_norm": 0.0, "learning_rate": 4.664091517217879e-06, "loss": 0.9746, "step": 17604 }, { "epoch": 0.6888254166992722, "grad_norm": 0.0, "learning_rate": 4.663019808532311e-06, "loss": 1.1367, "step": 17605 }, { "epoch": 0.6888645433915017, "grad_norm": 0.0, "learning_rate": 4.66194818555195e-06, "loss": 0.99, "step": 17606 }, { "epoch": 0.6889036700837311, "grad_norm": 0.0, "learning_rate": 4.660876648294019e-06, "loss": 0.9175, "step": 17607 }, { "epoch": 0.6889427967759606, "grad_norm": 0.0, "learning_rate": 4.659805196775715e-06, "loss": 0.944, "step": 17608 }, { "epoch": 0.68898192346819, "grad_norm": 0.0, "learning_rate": 4.6587338310142526e-06, "loss": 1.0013, "step": 17609 }, { "epoch": 0.6890210501604195, "grad_norm": 0.0, "learning_rate": 4.657662551026827e-06, "loss": 1.0311, "step": 17610 }, { "epoch": 0.6890601768526489, "grad_norm": 0.0, "learning_rate": 4.656591356830648e-06, "loss": 0.9919, "step": 17611 }, { "epoch": 0.6890993035448784, "grad_norm": 0.0, "learning_rate": 4.655520248442914e-06, "loss": 1.0227, "step": 17612 }, { "epoch": 0.6891384302371077, "grad_norm": 0.0, "learning_rate": 4.654449225880833e-06, "loss": 0.9324, "step": 17613 }, { "epoch": 0.6891775569293372, "grad_norm": 0.0, "learning_rate": 4.653378289161595e-06, "loss": 0.9649, "step": 17614 }, { "epoch": 0.6892166836215666, "grad_norm": 0.0, "learning_rate": 4.652307438302402e-06, "loss": 0.8479, "step": 17615 }, { "epoch": 0.6892558103137961, "grad_norm": 0.0, "learning_rate": 4.651236673320454e-06, "loss": 0.9116, "step": 17616 }, { "epoch": 0.6892949370060255, "grad_norm": 0.0, "learning_rate": 4.650165994232939e-06, "loss": 0.9972, "step": 17617 }, { "epoch": 0.6893340636982549, "grad_norm": 0.0, "learning_rate": 4.649095401057055e-06, "loss": 0.9313, "step": 17618 }, { "epoch": 0.6893731903904844, "grad_norm": 0.0, "learning_rate": 4.648024893809997e-06, "loss": 1.0385, "step": 17619 }, { "epoch": 0.6894123170827138, "grad_norm": 0.0, "learning_rate": 4.64695447250895e-06, "loss": 1.0576, "step": 17620 }, { "epoch": 0.6894514437749433, "grad_norm": 0.0, "learning_rate": 4.645884137171105e-06, "loss": 1.0985, "step": 17621 }, { "epoch": 0.6894905704671727, "grad_norm": 0.0, "learning_rate": 4.6448138878136565e-06, "loss": 0.9176, "step": 17622 }, { "epoch": 0.6895296971594022, "grad_norm": 0.0, "learning_rate": 4.643743724453782e-06, "loss": 1.1108, "step": 17623 }, { "epoch": 0.6895688238516315, "grad_norm": 0.0, "learning_rate": 4.642673647108672e-06, "loss": 0.8901, "step": 17624 }, { "epoch": 0.689607950543861, "grad_norm": 0.0, "learning_rate": 4.6416036557955105e-06, "loss": 1.0763, "step": 17625 }, { "epoch": 0.6896470772360904, "grad_norm": 0.0, "learning_rate": 4.640533750531483e-06, "loss": 0.9658, "step": 17626 }, { "epoch": 0.6896862039283199, "grad_norm": 0.0, "learning_rate": 4.639463931333763e-06, "loss": 0.922, "step": 17627 }, { "epoch": 0.6897253306205493, "grad_norm": 0.0, "learning_rate": 4.638394198219537e-06, "loss": 1.037, "step": 17628 }, { "epoch": 0.6897644573127788, "grad_norm": 0.0, "learning_rate": 4.637324551205981e-06, "loss": 0.9898, "step": 17629 }, { "epoch": 0.6898035840050082, "grad_norm": 0.0, "learning_rate": 4.636254990310276e-06, "loss": 0.9324, "step": 17630 }, { "epoch": 0.6898427106972377, "grad_norm": 0.0, "learning_rate": 4.635185515549592e-06, "loss": 0.8893, "step": 17631 }, { "epoch": 0.6898818373894671, "grad_norm": 0.0, "learning_rate": 4.634116126941108e-06, "loss": 0.9832, "step": 17632 }, { "epoch": 0.6899209640816966, "grad_norm": 0.0, "learning_rate": 4.63304682450199e-06, "loss": 1.121, "step": 17633 }, { "epoch": 0.689960090773926, "grad_norm": 0.0, "learning_rate": 4.6319776082494215e-06, "loss": 0.8967, "step": 17634 }, { "epoch": 0.6899992174661554, "grad_norm": 0.0, "learning_rate": 4.630908478200562e-06, "loss": 1.0472, "step": 17635 }, { "epoch": 0.6900383441583848, "grad_norm": 0.0, "learning_rate": 4.6298394343725884e-06, "loss": 1.0116, "step": 17636 }, { "epoch": 0.6900774708506143, "grad_norm": 0.0, "learning_rate": 4.62877047678266e-06, "loss": 1.0002, "step": 17637 }, { "epoch": 0.6901165975428437, "grad_norm": 0.0, "learning_rate": 4.627701605447949e-06, "loss": 0.9916, "step": 17638 }, { "epoch": 0.6901557242350732, "grad_norm": 0.0, "learning_rate": 4.626632820385617e-06, "loss": 0.8826, "step": 17639 }, { "epoch": 0.6901948509273026, "grad_norm": 0.0, "learning_rate": 4.6255641216128326e-06, "loss": 1.1205, "step": 17640 }, { "epoch": 0.6902339776195321, "grad_norm": 0.0, "learning_rate": 4.624495509146749e-06, "loss": 1.0158, "step": 17641 }, { "epoch": 0.6902731043117615, "grad_norm": 0.0, "learning_rate": 4.623426983004533e-06, "loss": 0.9791, "step": 17642 }, { "epoch": 0.690312231003991, "grad_norm": 0.0, "learning_rate": 4.622358543203342e-06, "loss": 1.1974, "step": 17643 }, { "epoch": 0.6903513576962204, "grad_norm": 0.0, "learning_rate": 4.621290189760339e-06, "loss": 1.0608, "step": 17644 }, { "epoch": 0.6903904843884499, "grad_norm": 0.0, "learning_rate": 4.6202219226926704e-06, "loss": 0.9715, "step": 17645 }, { "epoch": 0.6904296110806792, "grad_norm": 0.0, "learning_rate": 4.619153742017501e-06, "loss": 0.9304, "step": 17646 }, { "epoch": 0.6904687377729086, "grad_norm": 0.0, "learning_rate": 4.618085647751973e-06, "loss": 0.9673, "step": 17647 }, { "epoch": 0.6905078644651381, "grad_norm": 0.0, "learning_rate": 4.617017639913252e-06, "loss": 0.9927, "step": 17648 }, { "epoch": 0.6905469911573675, "grad_norm": 0.0, "learning_rate": 4.6159497185184785e-06, "loss": 0.8848, "step": 17649 }, { "epoch": 0.690586117849597, "grad_norm": 0.0, "learning_rate": 4.614881883584811e-06, "loss": 1.0562, "step": 17650 }, { "epoch": 0.6906252445418264, "grad_norm": 0.0, "learning_rate": 4.613814135129384e-06, "loss": 0.971, "step": 17651 }, { "epoch": 0.6906643712340559, "grad_norm": 0.0, "learning_rate": 4.61274647316936e-06, "loss": 0.9237, "step": 17652 }, { "epoch": 0.6907034979262853, "grad_norm": 0.0, "learning_rate": 4.611678897721874e-06, "loss": 0.9343, "step": 17653 }, { "epoch": 0.6907426246185148, "grad_norm": 0.0, "learning_rate": 4.610611408804077e-06, "loss": 1.0528, "step": 17654 }, { "epoch": 0.6907817513107442, "grad_norm": 0.0, "learning_rate": 4.6095440064331035e-06, "loss": 1.0288, "step": 17655 }, { "epoch": 0.6908208780029736, "grad_norm": 0.0, "learning_rate": 4.608476690626098e-06, "loss": 0.9436, "step": 17656 }, { "epoch": 0.690860004695203, "grad_norm": 0.0, "learning_rate": 4.6074094614002015e-06, "loss": 1.0511, "step": 17657 }, { "epoch": 0.6908991313874325, "grad_norm": 0.0, "learning_rate": 4.606342318772556e-06, "loss": 1.1205, "step": 17658 }, { "epoch": 0.6909382580796619, "grad_norm": 0.0, "learning_rate": 4.605275262760289e-06, "loss": 1.0218, "step": 17659 }, { "epoch": 0.6909773847718914, "grad_norm": 0.0, "learning_rate": 4.6042082933805425e-06, "loss": 1.0069, "step": 17660 }, { "epoch": 0.6910165114641208, "grad_norm": 0.0, "learning_rate": 4.603141410650449e-06, "loss": 0.9379, "step": 17661 }, { "epoch": 0.6910556381563503, "grad_norm": 0.0, "learning_rate": 4.6020746145871454e-06, "loss": 0.942, "step": 17662 }, { "epoch": 0.6910947648485797, "grad_norm": 0.0, "learning_rate": 4.601007905207756e-06, "loss": 0.9584, "step": 17663 }, { "epoch": 0.6911338915408092, "grad_norm": 0.0, "learning_rate": 4.5999412825294145e-06, "loss": 0.942, "step": 17664 }, { "epoch": 0.6911730182330386, "grad_norm": 0.0, "learning_rate": 4.5988747465692525e-06, "loss": 1.0162, "step": 17665 }, { "epoch": 0.6912121449252681, "grad_norm": 0.0, "learning_rate": 4.59780829734439e-06, "loss": 0.961, "step": 17666 }, { "epoch": 0.6912512716174974, "grad_norm": 0.0, "learning_rate": 4.596741934871959e-06, "loss": 1.0605, "step": 17667 }, { "epoch": 0.6912903983097269, "grad_norm": 0.0, "learning_rate": 4.5956756591690845e-06, "loss": 0.9688, "step": 17668 }, { "epoch": 0.6913295250019563, "grad_norm": 0.0, "learning_rate": 4.594609470252882e-06, "loss": 1.0639, "step": 17669 }, { "epoch": 0.6913686516941858, "grad_norm": 0.0, "learning_rate": 4.5935433681404795e-06, "loss": 0.9646, "step": 17670 }, { "epoch": 0.6914077783864152, "grad_norm": 0.0, "learning_rate": 4.5924773528489986e-06, "loss": 0.8969, "step": 17671 }, { "epoch": 0.6914469050786447, "grad_norm": 0.0, "learning_rate": 4.591411424395552e-06, "loss": 0.999, "step": 17672 }, { "epoch": 0.6914860317708741, "grad_norm": 0.0, "learning_rate": 4.590345582797261e-06, "loss": 1.068, "step": 17673 }, { "epoch": 0.6915251584631036, "grad_norm": 0.0, "learning_rate": 4.589279828071242e-06, "loss": 1.0153, "step": 17674 }, { "epoch": 0.691564285155333, "grad_norm": 0.0, "learning_rate": 4.588214160234611e-06, "loss": 0.8823, "step": 17675 }, { "epoch": 0.6916034118475624, "grad_norm": 0.0, "learning_rate": 4.587148579304477e-06, "loss": 0.9717, "step": 17676 }, { "epoch": 0.6916425385397919, "grad_norm": 0.0, "learning_rate": 4.586083085297957e-06, "loss": 1.037, "step": 17677 }, { "epoch": 0.6916816652320212, "grad_norm": 0.0, "learning_rate": 4.585017678232151e-06, "loss": 0.9737, "step": 17678 }, { "epoch": 0.6917207919242507, "grad_norm": 0.0, "learning_rate": 4.583952358124183e-06, "loss": 1.0368, "step": 17679 }, { "epoch": 0.6917599186164801, "grad_norm": 0.0, "learning_rate": 4.58288712499115e-06, "loss": 1.0675, "step": 17680 }, { "epoch": 0.6917990453087096, "grad_norm": 0.0, "learning_rate": 4.581821978850166e-06, "loss": 1.0942, "step": 17681 }, { "epoch": 0.691838172000939, "grad_norm": 0.0, "learning_rate": 4.580756919718323e-06, "loss": 1.0628, "step": 17682 }, { "epoch": 0.6918772986931685, "grad_norm": 0.0, "learning_rate": 4.579691947612742e-06, "loss": 0.9594, "step": 17683 }, { "epoch": 0.6919164253853979, "grad_norm": 0.0, "learning_rate": 4.578627062550513e-06, "loss": 0.9486, "step": 17684 }, { "epoch": 0.6919555520776274, "grad_norm": 0.0, "learning_rate": 4.577562264548741e-06, "loss": 0.9657, "step": 17685 }, { "epoch": 0.6919946787698568, "grad_norm": 0.0, "learning_rate": 4.576497553624523e-06, "loss": 0.8609, "step": 17686 }, { "epoch": 0.6920338054620863, "grad_norm": 0.0, "learning_rate": 4.575432929794959e-06, "loss": 0.9489, "step": 17687 }, { "epoch": 0.6920729321543156, "grad_norm": 0.0, "learning_rate": 4.5743683930771425e-06, "loss": 0.9364, "step": 17688 }, { "epoch": 0.6921120588465451, "grad_norm": 0.0, "learning_rate": 4.573303943488175e-06, "loss": 0.9239, "step": 17689 }, { "epoch": 0.6921511855387745, "grad_norm": 0.0, "learning_rate": 4.572239581045144e-06, "loss": 1.0925, "step": 17690 }, { "epoch": 0.692190312231004, "grad_norm": 0.0, "learning_rate": 4.571175305765143e-06, "loss": 0.9766, "step": 17691 }, { "epoch": 0.6922294389232334, "grad_norm": 0.0, "learning_rate": 4.570111117665263e-06, "loss": 0.9683, "step": 17692 }, { "epoch": 0.6922685656154629, "grad_norm": 0.0, "learning_rate": 4.5690470167626e-06, "loss": 1.0397, "step": 17693 }, { "epoch": 0.6923076923076923, "grad_norm": 0.0, "learning_rate": 4.567983003074231e-06, "loss": 0.9018, "step": 17694 }, { "epoch": 0.6923468189999218, "grad_norm": 0.0, "learning_rate": 4.566919076617254e-06, "loss": 0.9023, "step": 17695 }, { "epoch": 0.6923859456921512, "grad_norm": 0.0, "learning_rate": 4.56585523740874e-06, "loss": 0.9747, "step": 17696 }, { "epoch": 0.6924250723843807, "grad_norm": 0.0, "learning_rate": 4.564791485465788e-06, "loss": 0.991, "step": 17697 }, { "epoch": 0.69246419907661, "grad_norm": 0.0, "learning_rate": 4.563727820805471e-06, "loss": 1.0188, "step": 17698 }, { "epoch": 0.6925033257688396, "grad_norm": 0.0, "learning_rate": 4.562664243444877e-06, "loss": 0.9876, "step": 17699 }, { "epoch": 0.6925424524610689, "grad_norm": 0.0, "learning_rate": 4.561600753401075e-06, "loss": 1.0435, "step": 17700 }, { "epoch": 0.6925815791532984, "grad_norm": 0.0, "learning_rate": 4.560537350691158e-06, "loss": 0.9844, "step": 17701 }, { "epoch": 0.6926207058455278, "grad_norm": 0.0, "learning_rate": 4.5594740353321895e-06, "loss": 0.9664, "step": 17702 }, { "epoch": 0.6926598325377572, "grad_norm": 0.0, "learning_rate": 4.558410807341256e-06, "loss": 1.039, "step": 17703 }, { "epoch": 0.6926989592299867, "grad_norm": 0.0, "learning_rate": 4.557347666735422e-06, "loss": 0.8457, "step": 17704 }, { "epoch": 0.6927380859222161, "grad_norm": 0.0, "learning_rate": 4.556284613531764e-06, "loss": 1.0482, "step": 17705 }, { "epoch": 0.6927772126144456, "grad_norm": 0.0, "learning_rate": 4.555221647747354e-06, "loss": 1.0587, "step": 17706 }, { "epoch": 0.692816339306675, "grad_norm": 0.0, "learning_rate": 4.554158769399266e-06, "loss": 0.962, "step": 17707 }, { "epoch": 0.6928554659989045, "grad_norm": 0.0, "learning_rate": 4.553095978504561e-06, "loss": 0.9009, "step": 17708 }, { "epoch": 0.6928945926911338, "grad_norm": 0.0, "learning_rate": 4.5520332750803075e-06, "loss": 1.121, "step": 17709 }, { "epoch": 0.6929337193833633, "grad_norm": 0.0, "learning_rate": 4.550970659143578e-06, "loss": 0.97, "step": 17710 }, { "epoch": 0.6929728460755927, "grad_norm": 0.0, "learning_rate": 4.549908130711427e-06, "loss": 0.9557, "step": 17711 }, { "epoch": 0.6930119727678222, "grad_norm": 0.0, "learning_rate": 4.548845689800923e-06, "loss": 1.0282, "step": 17712 }, { "epoch": 0.6930510994600516, "grad_norm": 0.0, "learning_rate": 4.547783336429124e-06, "loss": 0.9454, "step": 17713 }, { "epoch": 0.6930902261522811, "grad_norm": 0.0, "learning_rate": 4.546721070613099e-06, "loss": 0.9593, "step": 17714 }, { "epoch": 0.6931293528445105, "grad_norm": 0.0, "learning_rate": 4.545658892369897e-06, "loss": 0.9842, "step": 17715 }, { "epoch": 0.69316847953674, "grad_norm": 0.0, "learning_rate": 4.54459680171658e-06, "loss": 0.9404, "step": 17716 }, { "epoch": 0.6932076062289694, "grad_norm": 0.0, "learning_rate": 4.5435347986702e-06, "loss": 1.0261, "step": 17717 }, { "epoch": 0.6932467329211989, "grad_norm": 0.0, "learning_rate": 4.542472883247814e-06, "loss": 1.0318, "step": 17718 }, { "epoch": 0.6932858596134283, "grad_norm": 0.0, "learning_rate": 4.541411055466474e-06, "loss": 0.8776, "step": 17719 }, { "epoch": 0.6933249863056578, "grad_norm": 0.0, "learning_rate": 4.540349315343236e-06, "loss": 1.1007, "step": 17720 }, { "epoch": 0.6933641129978871, "grad_norm": 0.0, "learning_rate": 4.539287662895143e-06, "loss": 1.052, "step": 17721 }, { "epoch": 0.6934032396901166, "grad_norm": 0.0, "learning_rate": 4.538226098139249e-06, "loss": 1.0153, "step": 17722 }, { "epoch": 0.693442366382346, "grad_norm": 0.0, "learning_rate": 4.537164621092599e-06, "loss": 0.9335, "step": 17723 }, { "epoch": 0.6934814930745755, "grad_norm": 0.0, "learning_rate": 4.536103231772243e-06, "loss": 0.9633, "step": 17724 }, { "epoch": 0.6935206197668049, "grad_norm": 0.0, "learning_rate": 4.535041930195218e-06, "loss": 0.8797, "step": 17725 }, { "epoch": 0.6935597464590344, "grad_norm": 0.0, "learning_rate": 4.533980716378577e-06, "loss": 0.891, "step": 17726 }, { "epoch": 0.6935988731512638, "grad_norm": 0.0, "learning_rate": 4.532919590339349e-06, "loss": 1.0516, "step": 17727 }, { "epoch": 0.6936379998434933, "grad_norm": 0.0, "learning_rate": 4.531858552094589e-06, "loss": 1.035, "step": 17728 }, { "epoch": 0.6936771265357227, "grad_norm": 0.0, "learning_rate": 4.530797601661324e-06, "loss": 0.8813, "step": 17729 }, { "epoch": 0.6937162532279522, "grad_norm": 0.0, "learning_rate": 4.529736739056601e-06, "loss": 0.9378, "step": 17730 }, { "epoch": 0.6937553799201815, "grad_norm": 0.0, "learning_rate": 4.528675964297443e-06, "loss": 0.9032, "step": 17731 }, { "epoch": 0.6937945066124109, "grad_norm": 0.0, "learning_rate": 4.527615277400901e-06, "loss": 1.0789, "step": 17732 }, { "epoch": 0.6938336333046404, "grad_norm": 0.0, "learning_rate": 4.526554678383997e-06, "loss": 1.0857, "step": 17733 }, { "epoch": 0.6938727599968698, "grad_norm": 0.0, "learning_rate": 4.52549416726377e-06, "loss": 1.0765, "step": 17734 }, { "epoch": 0.6939118866890993, "grad_norm": 0.0, "learning_rate": 4.5244337440572415e-06, "loss": 0.9962, "step": 17735 }, { "epoch": 0.6939510133813287, "grad_norm": 0.0, "learning_rate": 4.523373408781447e-06, "loss": 0.9348, "step": 17736 }, { "epoch": 0.6939901400735582, "grad_norm": 0.0, "learning_rate": 4.522313161453413e-06, "loss": 1.0516, "step": 17737 }, { "epoch": 0.6940292667657876, "grad_norm": 0.0, "learning_rate": 4.521253002090169e-06, "loss": 1.0154, "step": 17738 }, { "epoch": 0.6940683934580171, "grad_norm": 0.0, "learning_rate": 4.5201929307087336e-06, "loss": 1.0444, "step": 17739 }, { "epoch": 0.6941075201502465, "grad_norm": 0.0, "learning_rate": 4.519132947326132e-06, "loss": 1.0602, "step": 17740 }, { "epoch": 0.694146646842476, "grad_norm": 0.0, "learning_rate": 4.518073051959388e-06, "loss": 0.8544, "step": 17741 }, { "epoch": 0.6941857735347053, "grad_norm": 0.0, "learning_rate": 4.517013244625526e-06, "loss": 1.0012, "step": 17742 }, { "epoch": 0.6942249002269348, "grad_norm": 0.0, "learning_rate": 4.515953525341555e-06, "loss": 1.015, "step": 17743 }, { "epoch": 0.6942640269191642, "grad_norm": 0.0, "learning_rate": 4.514893894124504e-06, "loss": 1.1168, "step": 17744 }, { "epoch": 0.6943031536113937, "grad_norm": 0.0, "learning_rate": 4.513834350991376e-06, "loss": 1.1058, "step": 17745 }, { "epoch": 0.6943422803036231, "grad_norm": 0.0, "learning_rate": 4.512774895959201e-06, "loss": 0.9448, "step": 17746 }, { "epoch": 0.6943814069958526, "grad_norm": 0.0, "learning_rate": 4.511715529044982e-06, "loss": 0.9517, "step": 17747 }, { "epoch": 0.694420533688082, "grad_norm": 0.0, "learning_rate": 4.510656250265738e-06, "loss": 0.9703, "step": 17748 }, { "epoch": 0.6944596603803115, "grad_norm": 0.0, "learning_rate": 4.509597059638472e-06, "loss": 1.0297, "step": 17749 }, { "epoch": 0.6944987870725409, "grad_norm": 0.0, "learning_rate": 4.5085379571801966e-06, "loss": 1.0371, "step": 17750 }, { "epoch": 0.6945379137647704, "grad_norm": 0.0, "learning_rate": 4.5074789429079224e-06, "loss": 0.9686, "step": 17751 }, { "epoch": 0.6945770404569998, "grad_norm": 0.0, "learning_rate": 4.506420016838656e-06, "loss": 0.9663, "step": 17752 }, { "epoch": 0.6946161671492292, "grad_norm": 0.0, "learning_rate": 4.505361178989397e-06, "loss": 0.9431, "step": 17753 }, { "epoch": 0.6946552938414586, "grad_norm": 0.0, "learning_rate": 4.504302429377152e-06, "loss": 0.9797, "step": 17754 }, { "epoch": 0.6946944205336881, "grad_norm": 0.0, "learning_rate": 4.503243768018928e-06, "loss": 1.0994, "step": 17755 }, { "epoch": 0.6947335472259175, "grad_norm": 0.0, "learning_rate": 4.502185194931718e-06, "loss": 0.9623, "step": 17756 }, { "epoch": 0.694772673918147, "grad_norm": 0.0, "learning_rate": 4.501126710132523e-06, "loss": 1.017, "step": 17757 }, { "epoch": 0.6948118006103764, "grad_norm": 0.0, "learning_rate": 4.500068313638343e-06, "loss": 0.919, "step": 17758 }, { "epoch": 0.6948509273026059, "grad_norm": 0.0, "learning_rate": 4.4990100054661775e-06, "loss": 0.8878, "step": 17759 }, { "epoch": 0.6948900539948353, "grad_norm": 0.0, "learning_rate": 4.4979517856330155e-06, "loss": 0.933, "step": 17760 }, { "epoch": 0.6949291806870647, "grad_norm": 0.0, "learning_rate": 4.496893654155858e-06, "loss": 1.0247, "step": 17761 }, { "epoch": 0.6949683073792942, "grad_norm": 0.0, "learning_rate": 4.495835611051687e-06, "loss": 0.9715, "step": 17762 }, { "epoch": 0.6950074340715235, "grad_norm": 0.0, "learning_rate": 4.4947776563374986e-06, "loss": 0.9961, "step": 17763 }, { "epoch": 0.695046560763753, "grad_norm": 0.0, "learning_rate": 4.493719790030284e-06, "loss": 0.9927, "step": 17764 }, { "epoch": 0.6950856874559824, "grad_norm": 0.0, "learning_rate": 4.492662012147033e-06, "loss": 0.9359, "step": 17765 }, { "epoch": 0.6951248141482119, "grad_norm": 0.0, "learning_rate": 4.491604322704726e-06, "loss": 0.9691, "step": 17766 }, { "epoch": 0.6951639408404413, "grad_norm": 0.0, "learning_rate": 4.49054672172035e-06, "loss": 0.9188, "step": 17767 }, { "epoch": 0.6952030675326708, "grad_norm": 0.0, "learning_rate": 4.489489209210891e-06, "loss": 1.0026, "step": 17768 }, { "epoch": 0.6952421942249002, "grad_norm": 0.0, "learning_rate": 4.488431785193333e-06, "loss": 1.0206, "step": 17769 }, { "epoch": 0.6952813209171297, "grad_norm": 0.0, "learning_rate": 4.48737444968465e-06, "loss": 1.007, "step": 17770 }, { "epoch": 0.6953204476093591, "grad_norm": 0.0, "learning_rate": 4.486317202701828e-06, "loss": 1.0169, "step": 17771 }, { "epoch": 0.6953595743015886, "grad_norm": 0.0, "learning_rate": 4.485260044261841e-06, "loss": 1.0123, "step": 17772 }, { "epoch": 0.695398700993818, "grad_norm": 0.0, "learning_rate": 4.484202974381671e-06, "loss": 0.9824, "step": 17773 }, { "epoch": 0.6954378276860474, "grad_norm": 0.0, "learning_rate": 4.483145993078286e-06, "loss": 1.0795, "step": 17774 }, { "epoch": 0.6954769543782768, "grad_norm": 0.0, "learning_rate": 4.482089100368667e-06, "loss": 0.9649, "step": 17775 }, { "epoch": 0.6955160810705063, "grad_norm": 0.0, "learning_rate": 4.481032296269775e-06, "loss": 1.0375, "step": 17776 }, { "epoch": 0.6955552077627357, "grad_norm": 0.0, "learning_rate": 4.4799755807985965e-06, "loss": 1.0986, "step": 17777 }, { "epoch": 0.6955943344549652, "grad_norm": 0.0, "learning_rate": 4.47891895397209e-06, "loss": 0.9067, "step": 17778 }, { "epoch": 0.6956334611471946, "grad_norm": 0.0, "learning_rate": 4.4778624158072305e-06, "loss": 0.921, "step": 17779 }, { "epoch": 0.6956725878394241, "grad_norm": 0.0, "learning_rate": 4.476805966320973e-06, "loss": 0.9038, "step": 17780 }, { "epoch": 0.6957117145316535, "grad_norm": 0.0, "learning_rate": 4.475749605530297e-06, "loss": 0.9462, "step": 17781 }, { "epoch": 0.695750841223883, "grad_norm": 0.0, "learning_rate": 4.474693333452158e-06, "loss": 0.9303, "step": 17782 }, { "epoch": 0.6957899679161124, "grad_norm": 0.0, "learning_rate": 4.473637150103524e-06, "loss": 0.9416, "step": 17783 }, { "epoch": 0.6958290946083419, "grad_norm": 0.0, "learning_rate": 4.472581055501348e-06, "loss": 0.9781, "step": 17784 }, { "epoch": 0.6958682213005712, "grad_norm": 0.0, "learning_rate": 4.471525049662595e-06, "loss": 1.0491, "step": 17785 }, { "epoch": 0.6959073479928007, "grad_norm": 0.0, "learning_rate": 4.470469132604222e-06, "loss": 1.0387, "step": 17786 }, { "epoch": 0.6959464746850301, "grad_norm": 0.0, "learning_rate": 4.469413304343189e-06, "loss": 0.9341, "step": 17787 }, { "epoch": 0.6959856013772596, "grad_norm": 0.0, "learning_rate": 4.468357564896445e-06, "loss": 1.0791, "step": 17788 }, { "epoch": 0.696024728069489, "grad_norm": 0.0, "learning_rate": 4.467301914280946e-06, "loss": 0.9518, "step": 17789 }, { "epoch": 0.6960638547617184, "grad_norm": 0.0, "learning_rate": 4.466246352513646e-06, "loss": 1.0596, "step": 17790 }, { "epoch": 0.6961029814539479, "grad_norm": 0.0, "learning_rate": 4.4651908796115e-06, "loss": 0.9791, "step": 17791 }, { "epoch": 0.6961421081461773, "grad_norm": 0.0, "learning_rate": 4.464135495591447e-06, "loss": 1.018, "step": 17792 }, { "epoch": 0.6961812348384068, "grad_norm": 0.0, "learning_rate": 4.463080200470446e-06, "loss": 1.0024, "step": 17793 }, { "epoch": 0.6962203615306362, "grad_norm": 0.0, "learning_rate": 4.462024994265436e-06, "loss": 0.864, "step": 17794 }, { "epoch": 0.6962594882228657, "grad_norm": 0.0, "learning_rate": 4.460969876993364e-06, "loss": 0.9681, "step": 17795 }, { "epoch": 0.696298614915095, "grad_norm": 0.0, "learning_rate": 4.459914848671175e-06, "loss": 0.9318, "step": 17796 }, { "epoch": 0.6963377416073245, "grad_norm": 0.0, "learning_rate": 4.458859909315816e-06, "loss": 0.8452, "step": 17797 }, { "epoch": 0.6963768682995539, "grad_norm": 0.0, "learning_rate": 4.457805058944219e-06, "loss": 0.9474, "step": 17798 }, { "epoch": 0.6964159949917834, "grad_norm": 0.0, "learning_rate": 4.456750297573329e-06, "loss": 0.9735, "step": 17799 }, { "epoch": 0.6964551216840128, "grad_norm": 0.0, "learning_rate": 4.4556956252200855e-06, "loss": 0.9586, "step": 17800 }, { "epoch": 0.6964942483762423, "grad_norm": 0.0, "learning_rate": 4.45464104190142e-06, "loss": 1.0375, "step": 17801 }, { "epoch": 0.6965333750684717, "grad_norm": 0.0, "learning_rate": 4.45358654763427e-06, "loss": 0.9544, "step": 17802 }, { "epoch": 0.6965725017607012, "grad_norm": 0.0, "learning_rate": 4.452532142435571e-06, "loss": 1.0681, "step": 17803 }, { "epoch": 0.6966116284529306, "grad_norm": 0.0, "learning_rate": 4.4514778263222565e-06, "loss": 1.1183, "step": 17804 }, { "epoch": 0.6966507551451601, "grad_norm": 0.0, "learning_rate": 4.450423599311254e-06, "loss": 0.9187, "step": 17805 }, { "epoch": 0.6966898818373894, "grad_norm": 0.0, "learning_rate": 4.449369461419496e-06, "loss": 1.1138, "step": 17806 }, { "epoch": 0.6967290085296189, "grad_norm": 0.0, "learning_rate": 4.4483154126639026e-06, "loss": 1.0461, "step": 17807 }, { "epoch": 0.6967681352218483, "grad_norm": 0.0, "learning_rate": 4.4472614530614146e-06, "loss": 0.9915, "step": 17808 }, { "epoch": 0.6968072619140778, "grad_norm": 0.0, "learning_rate": 4.446207582628945e-06, "loss": 1.0322, "step": 17809 }, { "epoch": 0.6968463886063072, "grad_norm": 0.0, "learning_rate": 4.445153801383427e-06, "loss": 0.9747, "step": 17810 }, { "epoch": 0.6968855152985367, "grad_norm": 0.0, "learning_rate": 4.444100109341774e-06, "loss": 1.0335, "step": 17811 }, { "epoch": 0.6969246419907661, "grad_norm": 0.0, "learning_rate": 4.443046506520913e-06, "loss": 1.0203, "step": 17812 }, { "epoch": 0.6969637686829956, "grad_norm": 0.0, "learning_rate": 4.44199299293776e-06, "loss": 0.8036, "step": 17813 }, { "epoch": 0.697002895375225, "grad_norm": 0.0, "learning_rate": 4.440939568609239e-06, "loss": 1.072, "step": 17814 }, { "epoch": 0.6970420220674545, "grad_norm": 0.0, "learning_rate": 4.4398862335522595e-06, "loss": 0.9491, "step": 17815 }, { "epoch": 0.6970811487596839, "grad_norm": 0.0, "learning_rate": 4.43883298778374e-06, "loss": 0.9436, "step": 17816 }, { "epoch": 0.6971202754519132, "grad_norm": 0.0, "learning_rate": 4.437779831320595e-06, "loss": 1.0251, "step": 17817 }, { "epoch": 0.6971594021441427, "grad_norm": 0.0, "learning_rate": 4.436726764179737e-06, "loss": 1.0067, "step": 17818 }, { "epoch": 0.6971985288363721, "grad_norm": 0.0, "learning_rate": 4.435673786378074e-06, "loss": 0.9438, "step": 17819 }, { "epoch": 0.6972376555286016, "grad_norm": 0.0, "learning_rate": 4.434620897932521e-06, "loss": 0.8228, "step": 17820 }, { "epoch": 0.697276782220831, "grad_norm": 0.0, "learning_rate": 4.433568098859976e-06, "loss": 0.9586, "step": 17821 }, { "epoch": 0.6973159089130605, "grad_norm": 0.0, "learning_rate": 4.432515389177359e-06, "loss": 0.955, "step": 17822 }, { "epoch": 0.6973550356052899, "grad_norm": 0.0, "learning_rate": 4.431462768901564e-06, "loss": 0.9594, "step": 17823 }, { "epoch": 0.6973941622975194, "grad_norm": 0.0, "learning_rate": 4.430410238049504e-06, "loss": 0.9919, "step": 17824 }, { "epoch": 0.6974332889897488, "grad_norm": 0.0, "learning_rate": 4.429357796638068e-06, "loss": 1.0968, "step": 17825 }, { "epoch": 0.6974724156819783, "grad_norm": 0.0, "learning_rate": 4.428305444684173e-06, "loss": 0.8869, "step": 17826 }, { "epoch": 0.6975115423742076, "grad_norm": 0.0, "learning_rate": 4.427253182204708e-06, "loss": 1.0568, "step": 17827 }, { "epoch": 0.6975506690664371, "grad_norm": 0.0, "learning_rate": 4.426201009216576e-06, "loss": 1.0655, "step": 17828 }, { "epoch": 0.6975897957586665, "grad_norm": 0.0, "learning_rate": 4.425148925736665e-06, "loss": 0.8702, "step": 17829 }, { "epoch": 0.697628922450896, "grad_norm": 0.0, "learning_rate": 4.424096931781885e-06, "loss": 0.9469, "step": 17830 }, { "epoch": 0.6976680491431254, "grad_norm": 0.0, "learning_rate": 4.4230450273691164e-06, "loss": 0.9321, "step": 17831 }, { "epoch": 0.6977071758353549, "grad_norm": 0.0, "learning_rate": 4.42199321251526e-06, "loss": 0.8261, "step": 17832 }, { "epoch": 0.6977463025275843, "grad_norm": 0.0, "learning_rate": 4.4209414872371994e-06, "loss": 1.0892, "step": 17833 }, { "epoch": 0.6977854292198138, "grad_norm": 0.0, "learning_rate": 4.419889851551827e-06, "loss": 1.1695, "step": 17834 }, { "epoch": 0.6978245559120432, "grad_norm": 0.0, "learning_rate": 4.418838305476033e-06, "loss": 0.9302, "step": 17835 }, { "epoch": 0.6978636826042727, "grad_norm": 0.0, "learning_rate": 4.417786849026704e-06, "loss": 1.0555, "step": 17836 }, { "epoch": 0.6979028092965021, "grad_norm": 0.0, "learning_rate": 4.416735482220721e-06, "loss": 0.978, "step": 17837 }, { "epoch": 0.6979419359887316, "grad_norm": 0.0, "learning_rate": 4.415684205074969e-06, "loss": 0.9658, "step": 17838 }, { "epoch": 0.6979810626809609, "grad_norm": 0.0, "learning_rate": 4.414633017606336e-06, "loss": 0.9323, "step": 17839 }, { "epoch": 0.6980201893731904, "grad_norm": 0.0, "learning_rate": 4.413581919831693e-06, "loss": 1.0424, "step": 17840 }, { "epoch": 0.6980593160654198, "grad_norm": 0.0, "learning_rate": 4.412530911767925e-06, "loss": 1.0422, "step": 17841 }, { "epoch": 0.6980984427576493, "grad_norm": 0.0, "learning_rate": 4.411479993431912e-06, "loss": 1.0198, "step": 17842 }, { "epoch": 0.6981375694498787, "grad_norm": 0.0, "learning_rate": 4.410429164840524e-06, "loss": 1.039, "step": 17843 }, { "epoch": 0.6981766961421082, "grad_norm": 0.0, "learning_rate": 4.4093784260106395e-06, "loss": 0.9913, "step": 17844 }, { "epoch": 0.6982158228343376, "grad_norm": 0.0, "learning_rate": 4.408327776959136e-06, "loss": 1.0504, "step": 17845 }, { "epoch": 0.698254949526567, "grad_norm": 0.0, "learning_rate": 4.407277217702878e-06, "loss": 0.9932, "step": 17846 }, { "epoch": 0.6982940762187965, "grad_norm": 0.0, "learning_rate": 4.406226748258739e-06, "loss": 1.0947, "step": 17847 }, { "epoch": 0.6983332029110259, "grad_norm": 0.0, "learning_rate": 4.40517636864359e-06, "loss": 0.9802, "step": 17848 }, { "epoch": 0.6983723296032553, "grad_norm": 0.0, "learning_rate": 4.404126078874301e-06, "loss": 1.0093, "step": 17849 }, { "epoch": 0.6984114562954847, "grad_norm": 0.0, "learning_rate": 4.403075878967732e-06, "loss": 0.9715, "step": 17850 }, { "epoch": 0.6984505829877142, "grad_norm": 0.0, "learning_rate": 4.4020257689407544e-06, "loss": 1.0069, "step": 17851 }, { "epoch": 0.6984897096799436, "grad_norm": 0.0, "learning_rate": 4.400975748810221e-06, "loss": 0.9601, "step": 17852 }, { "epoch": 0.6985288363721731, "grad_norm": 0.0, "learning_rate": 4.399925818593008e-06, "loss": 0.9135, "step": 17853 }, { "epoch": 0.6985679630644025, "grad_norm": 0.0, "learning_rate": 4.398875978305966e-06, "loss": 1.0018, "step": 17854 }, { "epoch": 0.698607089756632, "grad_norm": 0.0, "learning_rate": 4.3978262279659615e-06, "loss": 1.0178, "step": 17855 }, { "epoch": 0.6986462164488614, "grad_norm": 0.0, "learning_rate": 4.396776567589839e-06, "loss": 0.9644, "step": 17856 }, { "epoch": 0.6986853431410909, "grad_norm": 0.0, "learning_rate": 4.395726997194472e-06, "loss": 0.9265, "step": 17857 }, { "epoch": 0.6987244698333203, "grad_norm": 0.0, "learning_rate": 4.394677516796703e-06, "loss": 0.9846, "step": 17858 }, { "epoch": 0.6987635965255498, "grad_norm": 0.0, "learning_rate": 4.393628126413391e-06, "loss": 1.0301, "step": 17859 }, { "epoch": 0.6988027232177791, "grad_norm": 0.0, "learning_rate": 4.3925788260613855e-06, "loss": 1.0864, "step": 17860 }, { "epoch": 0.6988418499100086, "grad_norm": 0.0, "learning_rate": 4.391529615757536e-06, "loss": 1.0005, "step": 17861 }, { "epoch": 0.698880976602238, "grad_norm": 0.0, "learning_rate": 4.390480495518693e-06, "loss": 0.9784, "step": 17862 }, { "epoch": 0.6989201032944675, "grad_norm": 0.0, "learning_rate": 4.389431465361708e-06, "loss": 1.1376, "step": 17863 }, { "epoch": 0.6989592299866969, "grad_norm": 0.0, "learning_rate": 4.388382525303419e-06, "loss": 1.0724, "step": 17864 }, { "epoch": 0.6989983566789264, "grad_norm": 0.0, "learning_rate": 4.3873336753606766e-06, "loss": 1.014, "step": 17865 }, { "epoch": 0.6990374833711558, "grad_norm": 0.0, "learning_rate": 4.386284915550321e-06, "loss": 1.0294, "step": 17866 }, { "epoch": 0.6990766100633853, "grad_norm": 0.0, "learning_rate": 4.385236245889198e-06, "loss": 0.9302, "step": 17867 }, { "epoch": 0.6991157367556147, "grad_norm": 0.0, "learning_rate": 4.384187666394143e-06, "loss": 1.0784, "step": 17868 }, { "epoch": 0.6991548634478442, "grad_norm": 0.0, "learning_rate": 4.383139177082001e-06, "loss": 1.0223, "step": 17869 }, { "epoch": 0.6991939901400736, "grad_norm": 0.0, "learning_rate": 4.382090777969598e-06, "loss": 0.9648, "step": 17870 }, { "epoch": 0.699233116832303, "grad_norm": 0.0, "learning_rate": 4.381042469073784e-06, "loss": 0.9265, "step": 17871 }, { "epoch": 0.6992722435245324, "grad_norm": 0.0, "learning_rate": 4.379994250411383e-06, "loss": 0.9861, "step": 17872 }, { "epoch": 0.6993113702167619, "grad_norm": 0.0, "learning_rate": 4.378946121999237e-06, "loss": 1.0038, "step": 17873 }, { "epoch": 0.6993504969089913, "grad_norm": 0.0, "learning_rate": 4.377898083854165e-06, "loss": 0.9367, "step": 17874 }, { "epoch": 0.6993896236012207, "grad_norm": 0.0, "learning_rate": 4.376850135993012e-06, "loss": 1.0059, "step": 17875 }, { "epoch": 0.6994287502934502, "grad_norm": 0.0, "learning_rate": 4.375802278432596e-06, "loss": 0.9842, "step": 17876 }, { "epoch": 0.6994678769856796, "grad_norm": 0.0, "learning_rate": 4.374754511189751e-06, "loss": 1.0171, "step": 17877 }, { "epoch": 0.6995070036779091, "grad_norm": 0.0, "learning_rate": 4.373706834281297e-06, "loss": 0.9319, "step": 17878 }, { "epoch": 0.6995461303701385, "grad_norm": 0.0, "learning_rate": 4.3726592477240604e-06, "loss": 1.0004, "step": 17879 }, { "epoch": 0.699585257062368, "grad_norm": 0.0, "learning_rate": 4.3716117515348655e-06, "loss": 1.0546, "step": 17880 }, { "epoch": 0.6996243837545973, "grad_norm": 0.0, "learning_rate": 4.370564345730537e-06, "loss": 1.0114, "step": 17881 }, { "epoch": 0.6996635104468268, "grad_norm": 0.0, "learning_rate": 4.369517030327887e-06, "loss": 0.9634, "step": 17882 }, { "epoch": 0.6997026371390562, "grad_norm": 0.0, "learning_rate": 4.368469805343737e-06, "loss": 0.8962, "step": 17883 }, { "epoch": 0.6997417638312857, "grad_norm": 0.0, "learning_rate": 4.367422670794909e-06, "loss": 0.8832, "step": 17884 }, { "epoch": 0.6997808905235151, "grad_norm": 0.0, "learning_rate": 4.366375626698212e-06, "loss": 0.8955, "step": 17885 }, { "epoch": 0.6998200172157446, "grad_norm": 0.0, "learning_rate": 4.3653286730704635e-06, "loss": 0.9553, "step": 17886 }, { "epoch": 0.699859143907974, "grad_norm": 0.0, "learning_rate": 4.3642818099284754e-06, "loss": 0.8927, "step": 17887 }, { "epoch": 0.6998982706002035, "grad_norm": 0.0, "learning_rate": 4.363235037289064e-06, "loss": 0.8826, "step": 17888 }, { "epoch": 0.6999373972924329, "grad_norm": 0.0, "learning_rate": 4.36218835516903e-06, "loss": 1.0127, "step": 17889 }, { "epoch": 0.6999765239846624, "grad_norm": 0.0, "learning_rate": 4.36114176358519e-06, "loss": 1.0042, "step": 17890 }, { "epoch": 0.7000156506768918, "grad_norm": 0.0, "learning_rate": 4.360095262554345e-06, "loss": 0.947, "step": 17891 }, { "epoch": 0.7000547773691213, "grad_norm": 0.0, "learning_rate": 4.359048852093301e-06, "loss": 0.9903, "step": 17892 }, { "epoch": 0.7000939040613506, "grad_norm": 0.0, "learning_rate": 4.358002532218865e-06, "loss": 0.8652, "step": 17893 }, { "epoch": 0.7001330307535801, "grad_norm": 0.0, "learning_rate": 4.356956302947843e-06, "loss": 0.948, "step": 17894 }, { "epoch": 0.7001721574458095, "grad_norm": 0.0, "learning_rate": 4.355910164297026e-06, "loss": 0.9238, "step": 17895 }, { "epoch": 0.700211284138039, "grad_norm": 0.0, "learning_rate": 4.354864116283221e-06, "loss": 0.8741, "step": 17896 }, { "epoch": 0.7002504108302684, "grad_norm": 0.0, "learning_rate": 4.353818158923223e-06, "loss": 0.9977, "step": 17897 }, { "epoch": 0.7002895375224979, "grad_norm": 0.0, "learning_rate": 4.352772292233835e-06, "loss": 1.0563, "step": 17898 }, { "epoch": 0.7003286642147273, "grad_norm": 0.0, "learning_rate": 4.351726516231843e-06, "loss": 1.0786, "step": 17899 }, { "epoch": 0.7003677909069568, "grad_norm": 0.0, "learning_rate": 4.350680830934051e-06, "loss": 0.9177, "step": 17900 }, { "epoch": 0.7004069175991862, "grad_norm": 0.0, "learning_rate": 4.349635236357237e-06, "loss": 1.0614, "step": 17901 }, { "epoch": 0.7004460442914155, "grad_norm": 0.0, "learning_rate": 4.3485897325182094e-06, "loss": 1.0735, "step": 17902 }, { "epoch": 0.700485170983645, "grad_norm": 0.0, "learning_rate": 4.347544319433744e-06, "loss": 1.0468, "step": 17903 }, { "epoch": 0.7005242976758744, "grad_norm": 0.0, "learning_rate": 4.346498997120638e-06, "loss": 1.0164, "step": 17904 }, { "epoch": 0.7005634243681039, "grad_norm": 0.0, "learning_rate": 4.345453765595667e-06, "loss": 1.0321, "step": 17905 }, { "epoch": 0.7006025510603333, "grad_norm": 0.0, "learning_rate": 4.34440862487563e-06, "loss": 0.9418, "step": 17906 }, { "epoch": 0.7006416777525628, "grad_norm": 0.0, "learning_rate": 4.3433635749773e-06, "loss": 0.9595, "step": 17907 }, { "epoch": 0.7006808044447922, "grad_norm": 0.0, "learning_rate": 4.3423186159174665e-06, "loss": 0.9859, "step": 17908 }, { "epoch": 0.7007199311370217, "grad_norm": 0.0, "learning_rate": 4.341273747712903e-06, "loss": 1.0625, "step": 17909 }, { "epoch": 0.7007590578292511, "grad_norm": 0.0, "learning_rate": 4.3402289703803926e-06, "loss": 1.0732, "step": 17910 }, { "epoch": 0.7007981845214806, "grad_norm": 0.0, "learning_rate": 4.3391842839367114e-06, "loss": 0.9876, "step": 17911 }, { "epoch": 0.70083731121371, "grad_norm": 0.0, "learning_rate": 4.3381396883986425e-06, "loss": 1.0709, "step": 17912 }, { "epoch": 0.7008764379059395, "grad_norm": 0.0, "learning_rate": 4.337095183782951e-06, "loss": 0.9297, "step": 17913 }, { "epoch": 0.7009155645981688, "grad_norm": 0.0, "learning_rate": 4.336050770106415e-06, "loss": 0.9092, "step": 17914 }, { "epoch": 0.7009546912903983, "grad_norm": 0.0, "learning_rate": 4.335006447385807e-06, "loss": 0.9011, "step": 17915 }, { "epoch": 0.7009938179826277, "grad_norm": 0.0, "learning_rate": 4.333962215637899e-06, "loss": 0.9959, "step": 17916 }, { "epoch": 0.7010329446748572, "grad_norm": 0.0, "learning_rate": 4.3329180748794554e-06, "loss": 0.9249, "step": 17917 }, { "epoch": 0.7010720713670866, "grad_norm": 0.0, "learning_rate": 4.33187402512725e-06, "loss": 0.9535, "step": 17918 }, { "epoch": 0.7011111980593161, "grad_norm": 0.0, "learning_rate": 4.330830066398037e-06, "loss": 0.8351, "step": 17919 }, { "epoch": 0.7011503247515455, "grad_norm": 0.0, "learning_rate": 4.329786198708598e-06, "loss": 0.8639, "step": 17920 }, { "epoch": 0.701189451443775, "grad_norm": 0.0, "learning_rate": 4.328742422075682e-06, "loss": 0.95, "step": 17921 }, { "epoch": 0.7012285781360044, "grad_norm": 0.0, "learning_rate": 4.3276987365160605e-06, "loss": 1.108, "step": 17922 }, { "epoch": 0.7012677048282339, "grad_norm": 0.0, "learning_rate": 4.3266551420464866e-06, "loss": 0.8299, "step": 17923 }, { "epoch": 0.7013068315204632, "grad_norm": 0.0, "learning_rate": 4.325611638683721e-06, "loss": 0.9787, "step": 17924 }, { "epoch": 0.7013459582126927, "grad_norm": 0.0, "learning_rate": 4.3245682264445235e-06, "loss": 0.9816, "step": 17925 }, { "epoch": 0.7013850849049221, "grad_norm": 0.0, "learning_rate": 4.323524905345651e-06, "loss": 0.9967, "step": 17926 }, { "epoch": 0.7014242115971516, "grad_norm": 0.0, "learning_rate": 4.322481675403852e-06, "loss": 0.9134, "step": 17927 }, { "epoch": 0.701463338289381, "grad_norm": 0.0, "learning_rate": 4.321438536635884e-06, "loss": 0.9845, "step": 17928 }, { "epoch": 0.7015024649816105, "grad_norm": 0.0, "learning_rate": 4.3203954890585e-06, "loss": 0.9509, "step": 17929 }, { "epoch": 0.7015415916738399, "grad_norm": 0.0, "learning_rate": 4.319352532688444e-06, "loss": 0.8827, "step": 17930 }, { "epoch": 0.7015807183660693, "grad_norm": 0.0, "learning_rate": 4.318309667542467e-06, "loss": 1.1018, "step": 17931 }, { "epoch": 0.7016198450582988, "grad_norm": 0.0, "learning_rate": 4.31726689363732e-06, "loss": 0.9536, "step": 17932 }, { "epoch": 0.7016589717505282, "grad_norm": 0.0, "learning_rate": 4.316224210989747e-06, "loss": 1.1302, "step": 17933 }, { "epoch": 0.7016980984427577, "grad_norm": 0.0, "learning_rate": 4.3151816196164885e-06, "loss": 1.0023, "step": 17934 }, { "epoch": 0.701737225134987, "grad_norm": 0.0, "learning_rate": 4.314139119534289e-06, "loss": 0.97, "step": 17935 }, { "epoch": 0.7017763518272165, "grad_norm": 0.0, "learning_rate": 4.313096710759894e-06, "loss": 0.9526, "step": 17936 }, { "epoch": 0.7018154785194459, "grad_norm": 0.0, "learning_rate": 4.312054393310037e-06, "loss": 1.067, "step": 17937 }, { "epoch": 0.7018546052116754, "grad_norm": 0.0, "learning_rate": 4.31101216720146e-06, "loss": 0.9659, "step": 17938 }, { "epoch": 0.7018937319039048, "grad_norm": 0.0, "learning_rate": 4.3099700324509e-06, "loss": 0.9947, "step": 17939 }, { "epoch": 0.7019328585961343, "grad_norm": 0.0, "learning_rate": 4.308927989075089e-06, "loss": 0.9957, "step": 17940 }, { "epoch": 0.7019719852883637, "grad_norm": 0.0, "learning_rate": 4.307886037090763e-06, "loss": 1.0792, "step": 17941 }, { "epoch": 0.7020111119805932, "grad_norm": 0.0, "learning_rate": 4.306844176514654e-06, "loss": 0.9854, "step": 17942 }, { "epoch": 0.7020502386728226, "grad_norm": 0.0, "learning_rate": 4.3058024073634986e-06, "loss": 0.9922, "step": 17943 }, { "epoch": 0.7020893653650521, "grad_norm": 0.0, "learning_rate": 4.304760729654016e-06, "loss": 0.9861, "step": 17944 }, { "epoch": 0.7021284920572815, "grad_norm": 0.0, "learning_rate": 4.303719143402942e-06, "loss": 1.0334, "step": 17945 }, { "epoch": 0.702167618749511, "grad_norm": 0.0, "learning_rate": 4.302677648626998e-06, "loss": 0.9794, "step": 17946 }, { "epoch": 0.7022067454417403, "grad_norm": 0.0, "learning_rate": 4.301636245342918e-06, "loss": 1.1451, "step": 17947 }, { "epoch": 0.7022458721339698, "grad_norm": 0.0, "learning_rate": 4.300594933567414e-06, "loss": 0.9485, "step": 17948 }, { "epoch": 0.7022849988261992, "grad_norm": 0.0, "learning_rate": 4.299553713317217e-06, "loss": 0.8948, "step": 17949 }, { "epoch": 0.7023241255184287, "grad_norm": 0.0, "learning_rate": 4.298512584609038e-06, "loss": 0.9529, "step": 17950 }, { "epoch": 0.7023632522106581, "grad_norm": 0.0, "learning_rate": 4.2974715474596096e-06, "loss": 1.0055, "step": 17951 }, { "epoch": 0.7024023789028876, "grad_norm": 0.0, "learning_rate": 4.296430601885639e-06, "loss": 1.0191, "step": 17952 }, { "epoch": 0.702441505595117, "grad_norm": 0.0, "learning_rate": 4.295389747903848e-06, "loss": 0.9626, "step": 17953 }, { "epoch": 0.7024806322873465, "grad_norm": 0.0, "learning_rate": 4.294348985530945e-06, "loss": 1.0316, "step": 17954 }, { "epoch": 0.7025197589795759, "grad_norm": 0.0, "learning_rate": 4.293308314783653e-06, "loss": 0.9794, "step": 17955 }, { "epoch": 0.7025588856718054, "grad_norm": 0.0, "learning_rate": 4.292267735678676e-06, "loss": 0.9858, "step": 17956 }, { "epoch": 0.7025980123640347, "grad_norm": 0.0, "learning_rate": 4.29122724823273e-06, "loss": 1.0021, "step": 17957 }, { "epoch": 0.7026371390562642, "grad_norm": 0.0, "learning_rate": 4.290186852462517e-06, "loss": 0.9933, "step": 17958 }, { "epoch": 0.7026762657484936, "grad_norm": 0.0, "learning_rate": 4.289146548384749e-06, "loss": 0.9727, "step": 17959 }, { "epoch": 0.702715392440723, "grad_norm": 0.0, "learning_rate": 4.28810633601613e-06, "loss": 0.8653, "step": 17960 }, { "epoch": 0.7027545191329525, "grad_norm": 0.0, "learning_rate": 4.287066215373371e-06, "loss": 0.9264, "step": 17961 }, { "epoch": 0.7027936458251819, "grad_norm": 0.0, "learning_rate": 4.286026186473165e-06, "loss": 1.0172, "step": 17962 }, { "epoch": 0.7028327725174114, "grad_norm": 0.0, "learning_rate": 4.28498624933222e-06, "loss": 0.9286, "step": 17963 }, { "epoch": 0.7028718992096408, "grad_norm": 0.0, "learning_rate": 4.283946403967233e-06, "loss": 0.9596, "step": 17964 }, { "epoch": 0.7029110259018703, "grad_norm": 0.0, "learning_rate": 4.282906650394909e-06, "loss": 1.0725, "step": 17965 }, { "epoch": 0.7029501525940997, "grad_norm": 0.0, "learning_rate": 4.281866988631936e-06, "loss": 0.9903, "step": 17966 }, { "epoch": 0.7029892792863291, "grad_norm": 0.0, "learning_rate": 4.2808274186950175e-06, "loss": 1.0463, "step": 17967 }, { "epoch": 0.7030284059785585, "grad_norm": 0.0, "learning_rate": 4.279787940600837e-06, "loss": 1.0504, "step": 17968 }, { "epoch": 0.703067532670788, "grad_norm": 0.0, "learning_rate": 4.278748554366102e-06, "loss": 0.9948, "step": 17969 }, { "epoch": 0.7031066593630174, "grad_norm": 0.0, "learning_rate": 4.277709260007492e-06, "loss": 1.0031, "step": 17970 }, { "epoch": 0.7031457860552469, "grad_norm": 0.0, "learning_rate": 4.276670057541704e-06, "loss": 0.9922, "step": 17971 }, { "epoch": 0.7031849127474763, "grad_norm": 0.0, "learning_rate": 4.275630946985421e-06, "loss": 1.0348, "step": 17972 }, { "epoch": 0.7032240394397058, "grad_norm": 0.0, "learning_rate": 4.27459192835533e-06, "loss": 1.0282, "step": 17973 }, { "epoch": 0.7032631661319352, "grad_norm": 0.0, "learning_rate": 4.273553001668119e-06, "loss": 0.9847, "step": 17974 }, { "epoch": 0.7033022928241647, "grad_norm": 0.0, "learning_rate": 4.272514166940476e-06, "loss": 0.9547, "step": 17975 }, { "epoch": 0.7033414195163941, "grad_norm": 0.0, "learning_rate": 4.271475424189074e-06, "loss": 0.9733, "step": 17976 }, { "epoch": 0.7033805462086236, "grad_norm": 0.0, "learning_rate": 4.270436773430599e-06, "loss": 1.0204, "step": 17977 }, { "epoch": 0.7034196729008529, "grad_norm": 0.0, "learning_rate": 4.269398214681733e-06, "loss": 0.9166, "step": 17978 }, { "epoch": 0.7034587995930824, "grad_norm": 0.0, "learning_rate": 4.2683597479591465e-06, "loss": 1.0349, "step": 17979 }, { "epoch": 0.7034979262853118, "grad_norm": 0.0, "learning_rate": 4.26732137327952e-06, "loss": 0.9668, "step": 17980 }, { "epoch": 0.7035370529775413, "grad_norm": 0.0, "learning_rate": 4.266283090659531e-06, "loss": 0.9969, "step": 17981 }, { "epoch": 0.7035761796697707, "grad_norm": 0.0, "learning_rate": 4.265244900115852e-06, "loss": 0.9522, "step": 17982 }, { "epoch": 0.7036153063620002, "grad_norm": 0.0, "learning_rate": 4.264206801665153e-06, "loss": 0.9472, "step": 17983 }, { "epoch": 0.7036544330542296, "grad_norm": 0.0, "learning_rate": 4.263168795324107e-06, "loss": 0.8736, "step": 17984 }, { "epoch": 0.7036935597464591, "grad_norm": 0.0, "learning_rate": 4.262130881109379e-06, "loss": 0.9787, "step": 17985 }, { "epoch": 0.7037326864386885, "grad_norm": 0.0, "learning_rate": 4.261093059037638e-06, "loss": 1.1045, "step": 17986 }, { "epoch": 0.703771813130918, "grad_norm": 0.0, "learning_rate": 4.260055329125551e-06, "loss": 1.0428, "step": 17987 }, { "epoch": 0.7038109398231474, "grad_norm": 0.0, "learning_rate": 4.259017691389788e-06, "loss": 0.8899, "step": 17988 }, { "epoch": 0.7038500665153767, "grad_norm": 0.0, "learning_rate": 4.257980145847002e-06, "loss": 0.9287, "step": 17989 }, { "epoch": 0.7038891932076062, "grad_norm": 0.0, "learning_rate": 4.25694269251386e-06, "loss": 1.0298, "step": 17990 }, { "epoch": 0.7039283198998356, "grad_norm": 0.0, "learning_rate": 4.255905331407022e-06, "loss": 0.9244, "step": 17991 }, { "epoch": 0.7039674465920651, "grad_norm": 0.0, "learning_rate": 4.254868062543151e-06, "loss": 1.0661, "step": 17992 }, { "epoch": 0.7040065732842945, "grad_norm": 0.0, "learning_rate": 4.253830885938895e-06, "loss": 0.9025, "step": 17993 }, { "epoch": 0.704045699976524, "grad_norm": 0.0, "learning_rate": 4.252793801610919e-06, "loss": 1.0492, "step": 17994 }, { "epoch": 0.7040848266687534, "grad_norm": 0.0, "learning_rate": 4.2517568095758655e-06, "loss": 0.973, "step": 17995 }, { "epoch": 0.7041239533609829, "grad_norm": 0.0, "learning_rate": 4.250719909850402e-06, "loss": 0.8536, "step": 17996 }, { "epoch": 0.7041630800532123, "grad_norm": 0.0, "learning_rate": 4.249683102451169e-06, "loss": 1.0826, "step": 17997 }, { "epoch": 0.7042022067454418, "grad_norm": 0.0, "learning_rate": 4.248646387394823e-06, "loss": 0.9633, "step": 17998 }, { "epoch": 0.7042413334376711, "grad_norm": 0.0, "learning_rate": 4.247609764698002e-06, "loss": 1.0343, "step": 17999 }, { "epoch": 0.7042804601299006, "grad_norm": 0.0, "learning_rate": 4.246573234377368e-06, "loss": 0.9246, "step": 18000 }, { "epoch": 0.70431958682213, "grad_norm": 0.0, "learning_rate": 4.245536796449555e-06, "loss": 0.9641, "step": 18001 }, { "epoch": 0.7043587135143595, "grad_norm": 0.0, "learning_rate": 4.244500450931212e-06, "loss": 0.9974, "step": 18002 }, { "epoch": 0.7043978402065889, "grad_norm": 0.0, "learning_rate": 4.243464197838975e-06, "loss": 1.0002, "step": 18003 }, { "epoch": 0.7044369668988184, "grad_norm": 0.0, "learning_rate": 4.242428037189494e-06, "loss": 0.9872, "step": 18004 }, { "epoch": 0.7044760935910478, "grad_norm": 0.0, "learning_rate": 4.241391968999402e-06, "loss": 1.0434, "step": 18005 }, { "epoch": 0.7045152202832773, "grad_norm": 0.0, "learning_rate": 4.240355993285343e-06, "loss": 0.9237, "step": 18006 }, { "epoch": 0.7045543469755067, "grad_norm": 0.0, "learning_rate": 4.239320110063946e-06, "loss": 0.9786, "step": 18007 }, { "epoch": 0.7045934736677362, "grad_norm": 0.0, "learning_rate": 4.238284319351848e-06, "loss": 0.9504, "step": 18008 }, { "epoch": 0.7046326003599656, "grad_norm": 0.0, "learning_rate": 4.237248621165686e-06, "loss": 0.988, "step": 18009 }, { "epoch": 0.704671727052195, "grad_norm": 0.0, "learning_rate": 4.236213015522093e-06, "loss": 0.9252, "step": 18010 }, { "epoch": 0.7047108537444244, "grad_norm": 0.0, "learning_rate": 4.235177502437692e-06, "loss": 0.9101, "step": 18011 }, { "epoch": 0.7047499804366539, "grad_norm": 0.0, "learning_rate": 4.234142081929117e-06, "loss": 1.0196, "step": 18012 }, { "epoch": 0.7047891071288833, "grad_norm": 0.0, "learning_rate": 4.233106754012996e-06, "loss": 1.0114, "step": 18013 }, { "epoch": 0.7048282338211128, "grad_norm": 0.0, "learning_rate": 4.232071518705957e-06, "loss": 0.8524, "step": 18014 }, { "epoch": 0.7048673605133422, "grad_norm": 0.0, "learning_rate": 4.231036376024618e-06, "loss": 1.0204, "step": 18015 }, { "epoch": 0.7049064872055716, "grad_norm": 0.0, "learning_rate": 4.23000132598561e-06, "loss": 1.0135, "step": 18016 }, { "epoch": 0.7049456138978011, "grad_norm": 0.0, "learning_rate": 4.228966368605547e-06, "loss": 0.988, "step": 18017 }, { "epoch": 0.7049847405900305, "grad_norm": 0.0, "learning_rate": 4.227931503901052e-06, "loss": 0.9845, "step": 18018 }, { "epoch": 0.70502386728226, "grad_norm": 0.0, "learning_rate": 4.2268967318887445e-06, "loss": 0.9181, "step": 18019 }, { "epoch": 0.7050629939744893, "grad_norm": 0.0, "learning_rate": 4.225862052585244e-06, "loss": 0.9318, "step": 18020 }, { "epoch": 0.7051021206667188, "grad_norm": 0.0, "learning_rate": 4.224827466007162e-06, "loss": 0.9428, "step": 18021 }, { "epoch": 0.7051412473589482, "grad_norm": 0.0, "learning_rate": 4.223792972171114e-06, "loss": 1.056, "step": 18022 }, { "epoch": 0.7051803740511777, "grad_norm": 0.0, "learning_rate": 4.222758571093715e-06, "loss": 0.9969, "step": 18023 }, { "epoch": 0.7052195007434071, "grad_norm": 0.0, "learning_rate": 4.221724262791571e-06, "loss": 1.0497, "step": 18024 }, { "epoch": 0.7052586274356366, "grad_norm": 0.0, "learning_rate": 4.220690047281295e-06, "loss": 0.9635, "step": 18025 }, { "epoch": 0.705297754127866, "grad_norm": 0.0, "learning_rate": 4.2196559245794944e-06, "loss": 1.0123, "step": 18026 }, { "epoch": 0.7053368808200955, "grad_norm": 0.0, "learning_rate": 4.2186218947027804e-06, "loss": 0.9728, "step": 18027 }, { "epoch": 0.7053760075123249, "grad_norm": 0.0, "learning_rate": 4.217587957667751e-06, "loss": 0.8787, "step": 18028 }, { "epoch": 0.7054151342045544, "grad_norm": 0.0, "learning_rate": 4.216554113491017e-06, "loss": 1.133, "step": 18029 }, { "epoch": 0.7054542608967838, "grad_norm": 0.0, "learning_rate": 4.215520362189169e-06, "loss": 1.0487, "step": 18030 }, { "epoch": 0.7054933875890133, "grad_norm": 0.0, "learning_rate": 4.214486703778823e-06, "loss": 1.219, "step": 18031 }, { "epoch": 0.7055325142812426, "grad_norm": 0.0, "learning_rate": 4.213453138276568e-06, "loss": 1.0229, "step": 18032 }, { "epoch": 0.7055716409734721, "grad_norm": 0.0, "learning_rate": 4.2124196656990065e-06, "loss": 0.8991, "step": 18033 }, { "epoch": 0.7056107676657015, "grad_norm": 0.0, "learning_rate": 4.211386286062731e-06, "loss": 1.0374, "step": 18034 }, { "epoch": 0.705649894357931, "grad_norm": 0.0, "learning_rate": 4.2103529993843385e-06, "loss": 0.957, "step": 18035 }, { "epoch": 0.7056890210501604, "grad_norm": 0.0, "learning_rate": 4.209319805680421e-06, "loss": 0.9893, "step": 18036 }, { "epoch": 0.7057281477423899, "grad_norm": 0.0, "learning_rate": 4.208286704967574e-06, "loss": 0.9283, "step": 18037 }, { "epoch": 0.7057672744346193, "grad_norm": 0.0, "learning_rate": 4.207253697262383e-06, "loss": 1.0201, "step": 18038 }, { "epoch": 0.7058064011268488, "grad_norm": 0.0, "learning_rate": 4.206220782581438e-06, "loss": 1.0847, "step": 18039 }, { "epoch": 0.7058455278190782, "grad_norm": 0.0, "learning_rate": 4.205187960941328e-06, "loss": 1.0129, "step": 18040 }, { "epoch": 0.7058846545113077, "grad_norm": 0.0, "learning_rate": 4.204155232358642e-06, "loss": 0.9684, "step": 18041 }, { "epoch": 0.705923781203537, "grad_norm": 0.0, "learning_rate": 4.203122596849956e-06, "loss": 0.9455, "step": 18042 }, { "epoch": 0.7059629078957665, "grad_norm": 0.0, "learning_rate": 4.202090054431861e-06, "loss": 0.9885, "step": 18043 }, { "epoch": 0.7060020345879959, "grad_norm": 0.0, "learning_rate": 4.201057605120927e-06, "loss": 0.9767, "step": 18044 }, { "epoch": 0.7060411612802253, "grad_norm": 0.0, "learning_rate": 4.200025248933749e-06, "loss": 0.9684, "step": 18045 }, { "epoch": 0.7060802879724548, "grad_norm": 0.0, "learning_rate": 4.198992985886894e-06, "loss": 0.9617, "step": 18046 }, { "epoch": 0.7061194146646842, "grad_norm": 0.0, "learning_rate": 4.197960815996945e-06, "loss": 1.0364, "step": 18047 }, { "epoch": 0.7061585413569137, "grad_norm": 0.0, "learning_rate": 4.19692873928047e-06, "loss": 0.8227, "step": 18048 }, { "epoch": 0.7061976680491431, "grad_norm": 0.0, "learning_rate": 4.195896755754054e-06, "loss": 1.0728, "step": 18049 }, { "epoch": 0.7062367947413726, "grad_norm": 0.0, "learning_rate": 4.19486486543426e-06, "loss": 0.937, "step": 18050 }, { "epoch": 0.706275921433602, "grad_norm": 0.0, "learning_rate": 4.193833068337663e-06, "loss": 1.0535, "step": 18051 }, { "epoch": 0.7063150481258315, "grad_norm": 0.0, "learning_rate": 4.192801364480828e-06, "loss": 1.1069, "step": 18052 }, { "epoch": 0.7063541748180608, "grad_norm": 0.0, "learning_rate": 4.191769753880332e-06, "loss": 0.9036, "step": 18053 }, { "epoch": 0.7063933015102903, "grad_norm": 0.0, "learning_rate": 4.19073823655273e-06, "loss": 0.9716, "step": 18054 }, { "epoch": 0.7064324282025197, "grad_norm": 0.0, "learning_rate": 4.189706812514599e-06, "loss": 0.9026, "step": 18055 }, { "epoch": 0.7064715548947492, "grad_norm": 0.0, "learning_rate": 4.1886754817824904e-06, "loss": 0.9493, "step": 18056 }, { "epoch": 0.7065106815869786, "grad_norm": 0.0, "learning_rate": 4.187644244372973e-06, "loss": 1.035, "step": 18057 }, { "epoch": 0.7065498082792081, "grad_norm": 0.0, "learning_rate": 4.186613100302605e-06, "loss": 1.0014, "step": 18058 }, { "epoch": 0.7065889349714375, "grad_norm": 0.0, "learning_rate": 4.18558204958795e-06, "loss": 0.9461, "step": 18059 }, { "epoch": 0.706628061663667, "grad_norm": 0.0, "learning_rate": 4.184551092245557e-06, "loss": 0.9932, "step": 18060 }, { "epoch": 0.7066671883558964, "grad_norm": 0.0, "learning_rate": 4.183520228291987e-06, "loss": 0.8869, "step": 18061 }, { "epoch": 0.7067063150481259, "grad_norm": 0.0, "learning_rate": 4.182489457743797e-06, "loss": 1.0099, "step": 18062 }, { "epoch": 0.7067454417403553, "grad_norm": 0.0, "learning_rate": 4.1814587806175324e-06, "loss": 1.0339, "step": 18063 }, { "epoch": 0.7067845684325847, "grad_norm": 0.0, "learning_rate": 4.180428196929749e-06, "loss": 0.9286, "step": 18064 }, { "epoch": 0.7068236951248141, "grad_norm": 0.0, "learning_rate": 4.179397706697001e-06, "loss": 0.9482, "step": 18065 }, { "epoch": 0.7068628218170436, "grad_norm": 0.0, "learning_rate": 4.178367309935828e-06, "loss": 0.9906, "step": 18066 }, { "epoch": 0.706901948509273, "grad_norm": 0.0, "learning_rate": 4.177337006662781e-06, "loss": 1.1184, "step": 18067 }, { "epoch": 0.7069410752015025, "grad_norm": 0.0, "learning_rate": 4.176306796894409e-06, "loss": 0.9459, "step": 18068 }, { "epoch": 0.7069802018937319, "grad_norm": 0.0, "learning_rate": 4.175276680647249e-06, "loss": 0.931, "step": 18069 }, { "epoch": 0.7070193285859614, "grad_norm": 0.0, "learning_rate": 4.174246657937846e-06, "loss": 0.9462, "step": 18070 }, { "epoch": 0.7070584552781908, "grad_norm": 0.0, "learning_rate": 4.173216728782743e-06, "loss": 0.9336, "step": 18071 }, { "epoch": 0.7070975819704203, "grad_norm": 0.0, "learning_rate": 4.1721868931984796e-06, "loss": 0.9961, "step": 18072 }, { "epoch": 0.7071367086626497, "grad_norm": 0.0, "learning_rate": 4.1711571512015905e-06, "loss": 0.9143, "step": 18073 }, { "epoch": 0.707175835354879, "grad_norm": 0.0, "learning_rate": 4.170127502808617e-06, "loss": 1.1172, "step": 18074 }, { "epoch": 0.7072149620471085, "grad_norm": 0.0, "learning_rate": 4.169097948036081e-06, "loss": 0.9434, "step": 18075 }, { "epoch": 0.7072540887393379, "grad_norm": 0.0, "learning_rate": 4.168068486900535e-06, "loss": 0.9561, "step": 18076 }, { "epoch": 0.7072932154315674, "grad_norm": 0.0, "learning_rate": 4.167039119418496e-06, "loss": 1.0924, "step": 18077 }, { "epoch": 0.7073323421237968, "grad_norm": 0.0, "learning_rate": 4.166009845606505e-06, "loss": 0.9388, "step": 18078 }, { "epoch": 0.7073714688160263, "grad_norm": 0.0, "learning_rate": 4.164980665481078e-06, "loss": 1.086, "step": 18079 }, { "epoch": 0.7074105955082557, "grad_norm": 0.0, "learning_rate": 4.163951579058756e-06, "loss": 0.8568, "step": 18080 }, { "epoch": 0.7074497222004852, "grad_norm": 0.0, "learning_rate": 4.162922586356055e-06, "loss": 1.0301, "step": 18081 }, { "epoch": 0.7074888488927146, "grad_norm": 0.0, "learning_rate": 4.161893687389508e-06, "loss": 1.0449, "step": 18082 }, { "epoch": 0.7075279755849441, "grad_norm": 0.0, "learning_rate": 4.160864882175628e-06, "loss": 0.9528, "step": 18083 }, { "epoch": 0.7075671022771735, "grad_norm": 0.0, "learning_rate": 4.159836170730942e-06, "loss": 0.9149, "step": 18084 }, { "epoch": 0.707606228969403, "grad_norm": 0.0, "learning_rate": 4.158807553071969e-06, "loss": 0.9377, "step": 18085 }, { "epoch": 0.7076453556616323, "grad_norm": 0.0, "learning_rate": 4.15777902921523e-06, "loss": 1.0653, "step": 18086 }, { "epoch": 0.7076844823538618, "grad_norm": 0.0, "learning_rate": 4.156750599177235e-06, "loss": 0.8922, "step": 18087 }, { "epoch": 0.7077236090460912, "grad_norm": 0.0, "learning_rate": 4.155722262974504e-06, "loss": 0.9196, "step": 18088 }, { "epoch": 0.7077627357383207, "grad_norm": 0.0, "learning_rate": 4.154694020623551e-06, "loss": 0.9974, "step": 18089 }, { "epoch": 0.7078018624305501, "grad_norm": 0.0, "learning_rate": 4.153665872140891e-06, "loss": 1.0197, "step": 18090 }, { "epoch": 0.7078409891227796, "grad_norm": 0.0, "learning_rate": 4.152637817543026e-06, "loss": 1.0049, "step": 18091 }, { "epoch": 0.707880115815009, "grad_norm": 0.0, "learning_rate": 4.151609856846476e-06, "loss": 1.0793, "step": 18092 }, { "epoch": 0.7079192425072385, "grad_norm": 0.0, "learning_rate": 4.1505819900677345e-06, "loss": 0.893, "step": 18093 }, { "epoch": 0.7079583691994679, "grad_norm": 0.0, "learning_rate": 4.149554217223325e-06, "loss": 1.0457, "step": 18094 }, { "epoch": 0.7079974958916974, "grad_norm": 0.0, "learning_rate": 4.1485265383297394e-06, "loss": 1.0854, "step": 18095 }, { "epoch": 0.7080366225839267, "grad_norm": 0.0, "learning_rate": 4.14749895340349e-06, "loss": 0.8219, "step": 18096 }, { "epoch": 0.7080757492761562, "grad_norm": 0.0, "learning_rate": 4.146471462461065e-06, "loss": 1.0166, "step": 18097 }, { "epoch": 0.7081148759683856, "grad_norm": 0.0, "learning_rate": 4.145444065518981e-06, "loss": 0.8531, "step": 18098 }, { "epoch": 0.7081540026606151, "grad_norm": 0.0, "learning_rate": 4.144416762593726e-06, "loss": 0.9632, "step": 18099 }, { "epoch": 0.7081931293528445, "grad_norm": 0.0, "learning_rate": 4.143389553701803e-06, "loss": 0.9012, "step": 18100 }, { "epoch": 0.708232256045074, "grad_norm": 0.0, "learning_rate": 4.142362438859703e-06, "loss": 1.0148, "step": 18101 }, { "epoch": 0.7082713827373034, "grad_norm": 0.0, "learning_rate": 4.1413354180839215e-06, "loss": 1.0027, "step": 18102 }, { "epoch": 0.7083105094295328, "grad_norm": 0.0, "learning_rate": 4.140308491390952e-06, "loss": 0.831, "step": 18103 }, { "epoch": 0.7083496361217623, "grad_norm": 0.0, "learning_rate": 4.139281658797288e-06, "loss": 0.8423, "step": 18104 }, { "epoch": 0.7083887628139917, "grad_norm": 0.0, "learning_rate": 4.138254920319414e-06, "loss": 0.8784, "step": 18105 }, { "epoch": 0.7084278895062212, "grad_norm": 0.0, "learning_rate": 4.137228275973821e-06, "loss": 0.9766, "step": 18106 }, { "epoch": 0.7084670161984505, "grad_norm": 0.0, "learning_rate": 4.136201725776999e-06, "loss": 1.0381, "step": 18107 }, { "epoch": 0.70850614289068, "grad_norm": 0.0, "learning_rate": 4.135175269745426e-06, "loss": 1.0651, "step": 18108 }, { "epoch": 0.7085452695829094, "grad_norm": 0.0, "learning_rate": 4.134148907895589e-06, "loss": 0.9859, "step": 18109 }, { "epoch": 0.7085843962751389, "grad_norm": 0.0, "learning_rate": 4.1331226402439695e-06, "loss": 0.9989, "step": 18110 }, { "epoch": 0.7086235229673683, "grad_norm": 0.0, "learning_rate": 4.132096466807053e-06, "loss": 0.908, "step": 18111 }, { "epoch": 0.7086626496595978, "grad_norm": 0.0, "learning_rate": 4.131070387601312e-06, "loss": 1.0018, "step": 18112 }, { "epoch": 0.7087017763518272, "grad_norm": 0.0, "learning_rate": 4.130044402643228e-06, "loss": 0.8837, "step": 18113 }, { "epoch": 0.7087409030440567, "grad_norm": 0.0, "learning_rate": 4.129018511949272e-06, "loss": 0.9664, "step": 18114 }, { "epoch": 0.7087800297362861, "grad_norm": 0.0, "learning_rate": 4.127992715535922e-06, "loss": 0.9742, "step": 18115 }, { "epoch": 0.7088191564285156, "grad_norm": 0.0, "learning_rate": 4.126967013419652e-06, "loss": 0.9275, "step": 18116 }, { "epoch": 0.708858283120745, "grad_norm": 0.0, "learning_rate": 4.1259414056169355e-06, "loss": 0.985, "step": 18117 }, { "epoch": 0.7088974098129744, "grad_norm": 0.0, "learning_rate": 4.124915892144236e-06, "loss": 1.0756, "step": 18118 }, { "epoch": 0.7089365365052038, "grad_norm": 0.0, "learning_rate": 4.123890473018025e-06, "loss": 0.9905, "step": 18119 }, { "epoch": 0.7089756631974333, "grad_norm": 0.0, "learning_rate": 4.12286514825477e-06, "loss": 0.9389, "step": 18120 }, { "epoch": 0.7090147898896627, "grad_norm": 0.0, "learning_rate": 4.12183991787094e-06, "loss": 1.0247, "step": 18121 }, { "epoch": 0.7090539165818922, "grad_norm": 0.0, "learning_rate": 4.120814781882991e-06, "loss": 0.9402, "step": 18122 }, { "epoch": 0.7090930432741216, "grad_norm": 0.0, "learning_rate": 4.119789740307394e-06, "loss": 0.9771, "step": 18123 }, { "epoch": 0.7091321699663511, "grad_norm": 0.0, "learning_rate": 4.118764793160598e-06, "loss": 1.0654, "step": 18124 }, { "epoch": 0.7091712966585805, "grad_norm": 0.0, "learning_rate": 4.117739940459077e-06, "loss": 0.8769, "step": 18125 }, { "epoch": 0.70921042335081, "grad_norm": 0.0, "learning_rate": 4.1167151822192775e-06, "loss": 0.9146, "step": 18126 }, { "epoch": 0.7092495500430394, "grad_norm": 0.0, "learning_rate": 4.1156905184576646e-06, "loss": 0.979, "step": 18127 }, { "epoch": 0.7092886767352689, "grad_norm": 0.0, "learning_rate": 4.1146659491906805e-06, "loss": 0.9673, "step": 18128 }, { "epoch": 0.7093278034274982, "grad_norm": 0.0, "learning_rate": 4.113641474434794e-06, "loss": 1.0009, "step": 18129 }, { "epoch": 0.7093669301197276, "grad_norm": 0.0, "learning_rate": 4.112617094206445e-06, "loss": 0.9176, "step": 18130 }, { "epoch": 0.7094060568119571, "grad_norm": 0.0, "learning_rate": 4.111592808522093e-06, "loss": 0.8408, "step": 18131 }, { "epoch": 0.7094451835041865, "grad_norm": 0.0, "learning_rate": 4.110568617398178e-06, "loss": 0.9987, "step": 18132 }, { "epoch": 0.709484310196416, "grad_norm": 0.0, "learning_rate": 4.109544520851151e-06, "loss": 0.9797, "step": 18133 }, { "epoch": 0.7095234368886454, "grad_norm": 0.0, "learning_rate": 4.1085205188974575e-06, "loss": 1.0494, "step": 18134 }, { "epoch": 0.7095625635808749, "grad_norm": 0.0, "learning_rate": 4.107496611553547e-06, "loss": 1.1243, "step": 18135 }, { "epoch": 0.7096016902731043, "grad_norm": 0.0, "learning_rate": 4.106472798835852e-06, "loss": 0.8484, "step": 18136 }, { "epoch": 0.7096408169653338, "grad_norm": 0.0, "learning_rate": 4.105449080760819e-06, "loss": 0.8976, "step": 18137 }, { "epoch": 0.7096799436575632, "grad_norm": 0.0, "learning_rate": 4.1044254573448885e-06, "loss": 0.8911, "step": 18138 }, { "epoch": 0.7097190703497926, "grad_norm": 0.0, "learning_rate": 4.1034019286045e-06, "loss": 1.0576, "step": 18139 }, { "epoch": 0.709758197042022, "grad_norm": 0.0, "learning_rate": 4.102378494556085e-06, "loss": 1.0852, "step": 18140 }, { "epoch": 0.7097973237342515, "grad_norm": 0.0, "learning_rate": 4.101355155216084e-06, "loss": 0.8773, "step": 18141 }, { "epoch": 0.7098364504264809, "grad_norm": 0.0, "learning_rate": 4.100331910600922e-06, "loss": 0.9505, "step": 18142 }, { "epoch": 0.7098755771187104, "grad_norm": 0.0, "learning_rate": 4.099308760727043e-06, "loss": 0.9867, "step": 18143 }, { "epoch": 0.7099147038109398, "grad_norm": 0.0, "learning_rate": 4.098285705610867e-06, "loss": 0.962, "step": 18144 }, { "epoch": 0.7099538305031693, "grad_norm": 0.0, "learning_rate": 4.097262745268833e-06, "loss": 0.992, "step": 18145 }, { "epoch": 0.7099929571953987, "grad_norm": 0.0, "learning_rate": 4.0962398797173575e-06, "loss": 0.9155, "step": 18146 }, { "epoch": 0.7100320838876282, "grad_norm": 0.0, "learning_rate": 4.095217108972872e-06, "loss": 0.8997, "step": 18147 }, { "epoch": 0.7100712105798576, "grad_norm": 0.0, "learning_rate": 4.0941944330518004e-06, "loss": 0.9416, "step": 18148 }, { "epoch": 0.710110337272087, "grad_norm": 0.0, "learning_rate": 4.09317185197057e-06, "loss": 0.9871, "step": 18149 }, { "epoch": 0.7101494639643164, "grad_norm": 0.0, "learning_rate": 4.092149365745594e-06, "loss": 1.0251, "step": 18150 }, { "epoch": 0.7101885906565459, "grad_norm": 0.0, "learning_rate": 4.091126974393297e-06, "loss": 1.011, "step": 18151 }, { "epoch": 0.7102277173487753, "grad_norm": 0.0, "learning_rate": 4.090104677930099e-06, "loss": 1.0233, "step": 18152 }, { "epoch": 0.7102668440410048, "grad_norm": 0.0, "learning_rate": 4.0890824763724115e-06, "loss": 0.9592, "step": 18153 }, { "epoch": 0.7103059707332342, "grad_norm": 0.0, "learning_rate": 4.088060369736653e-06, "loss": 0.8635, "step": 18154 }, { "epoch": 0.7103450974254637, "grad_norm": 0.0, "learning_rate": 4.087038358039236e-06, "loss": 0.9082, "step": 18155 }, { "epoch": 0.7103842241176931, "grad_norm": 0.0, "learning_rate": 4.086016441296578e-06, "loss": 1.0714, "step": 18156 }, { "epoch": 0.7104233508099226, "grad_norm": 0.0, "learning_rate": 4.08499461952508e-06, "loss": 1.0318, "step": 18157 }, { "epoch": 0.710462477502152, "grad_norm": 0.0, "learning_rate": 4.083972892741161e-06, "loss": 1.0147, "step": 18158 }, { "epoch": 0.7105016041943814, "grad_norm": 0.0, "learning_rate": 4.082951260961222e-06, "loss": 0.9522, "step": 18159 }, { "epoch": 0.7105407308866108, "grad_norm": 0.0, "learning_rate": 4.08192972420167e-06, "loss": 0.9405, "step": 18160 }, { "epoch": 0.7105798575788402, "grad_norm": 0.0, "learning_rate": 4.080908282478911e-06, "loss": 1.0097, "step": 18161 }, { "epoch": 0.7106189842710697, "grad_norm": 0.0, "learning_rate": 4.079886935809352e-06, "loss": 1.0476, "step": 18162 }, { "epoch": 0.7106581109632991, "grad_norm": 0.0, "learning_rate": 4.078865684209385e-06, "loss": 0.9708, "step": 18163 }, { "epoch": 0.7106972376555286, "grad_norm": 0.0, "learning_rate": 4.077844527695418e-06, "loss": 1.0969, "step": 18164 }, { "epoch": 0.710736364347758, "grad_norm": 0.0, "learning_rate": 4.076823466283846e-06, "loss": 1.005, "step": 18165 }, { "epoch": 0.7107754910399875, "grad_norm": 0.0, "learning_rate": 4.075802499991071e-06, "loss": 1.0276, "step": 18166 }, { "epoch": 0.7108146177322169, "grad_norm": 0.0, "learning_rate": 4.07478162883348e-06, "loss": 1.0826, "step": 18167 }, { "epoch": 0.7108537444244464, "grad_norm": 0.0, "learning_rate": 4.073760852827472e-06, "loss": 0.938, "step": 18168 }, { "epoch": 0.7108928711166758, "grad_norm": 0.0, "learning_rate": 4.072740171989438e-06, "loss": 1.0042, "step": 18169 }, { "epoch": 0.7109319978089053, "grad_norm": 0.0, "learning_rate": 4.071719586335774e-06, "loss": 1.0156, "step": 18170 }, { "epoch": 0.7109711245011346, "grad_norm": 0.0, "learning_rate": 4.07069909588286e-06, "loss": 1.0038, "step": 18171 }, { "epoch": 0.7110102511933641, "grad_norm": 0.0, "learning_rate": 4.069678700647094e-06, "loss": 1.0375, "step": 18172 }, { "epoch": 0.7110493778855935, "grad_norm": 0.0, "learning_rate": 4.068658400644848e-06, "loss": 0.8284, "step": 18173 }, { "epoch": 0.711088504577823, "grad_norm": 0.0, "learning_rate": 4.067638195892525e-06, "loss": 0.9897, "step": 18174 }, { "epoch": 0.7111276312700524, "grad_norm": 0.0, "learning_rate": 4.066618086406494e-06, "loss": 1.0085, "step": 18175 }, { "epoch": 0.7111667579622819, "grad_norm": 0.0, "learning_rate": 4.065598072203145e-06, "loss": 0.8585, "step": 18176 }, { "epoch": 0.7112058846545113, "grad_norm": 0.0, "learning_rate": 4.064578153298848e-06, "loss": 0.9146, "step": 18177 }, { "epoch": 0.7112450113467408, "grad_norm": 0.0, "learning_rate": 4.063558329709996e-06, "loss": 1.0061, "step": 18178 }, { "epoch": 0.7112841380389702, "grad_norm": 0.0, "learning_rate": 4.062538601452954e-06, "loss": 0.8987, "step": 18179 }, { "epoch": 0.7113232647311997, "grad_norm": 0.0, "learning_rate": 4.061518968544106e-06, "loss": 0.9269, "step": 18180 }, { "epoch": 0.711362391423429, "grad_norm": 0.0, "learning_rate": 4.060499430999818e-06, "loss": 0.8856, "step": 18181 }, { "epoch": 0.7114015181156585, "grad_norm": 0.0, "learning_rate": 4.059479988836467e-06, "loss": 1.0102, "step": 18182 }, { "epoch": 0.7114406448078879, "grad_norm": 0.0, "learning_rate": 4.0584606420704235e-06, "loss": 1.0403, "step": 18183 }, { "epoch": 0.7114797715001174, "grad_norm": 0.0, "learning_rate": 4.05744139071806e-06, "loss": 0.9266, "step": 18184 }, { "epoch": 0.7115188981923468, "grad_norm": 0.0, "learning_rate": 4.056422234795738e-06, "loss": 1.0587, "step": 18185 }, { "epoch": 0.7115580248845763, "grad_norm": 0.0, "learning_rate": 4.055403174319828e-06, "loss": 0.9473, "step": 18186 }, { "epoch": 0.7115971515768057, "grad_norm": 0.0, "learning_rate": 4.054384209306692e-06, "loss": 0.9389, "step": 18187 }, { "epoch": 0.7116362782690351, "grad_norm": 0.0, "learning_rate": 4.0533653397727005e-06, "loss": 0.9775, "step": 18188 }, { "epoch": 0.7116754049612646, "grad_norm": 0.0, "learning_rate": 4.052346565734207e-06, "loss": 0.9898, "step": 18189 }, { "epoch": 0.711714531653494, "grad_norm": 0.0, "learning_rate": 4.051327887207577e-06, "loss": 1.0475, "step": 18190 }, { "epoch": 0.7117536583457235, "grad_norm": 0.0, "learning_rate": 4.0503093042091645e-06, "loss": 0.9811, "step": 18191 }, { "epoch": 0.7117927850379528, "grad_norm": 0.0, "learning_rate": 4.049290816755328e-06, "loss": 0.9558, "step": 18192 }, { "epoch": 0.7118319117301823, "grad_norm": 0.0, "learning_rate": 4.048272424862425e-06, "loss": 0.861, "step": 18193 }, { "epoch": 0.7118710384224117, "grad_norm": 0.0, "learning_rate": 4.047254128546813e-06, "loss": 1.0027, "step": 18194 }, { "epoch": 0.7119101651146412, "grad_norm": 0.0, "learning_rate": 4.046235927824836e-06, "loss": 1.1299, "step": 18195 }, { "epoch": 0.7119492918068706, "grad_norm": 0.0, "learning_rate": 4.0452178227128505e-06, "loss": 0.6706, "step": 18196 }, { "epoch": 0.7119884184991001, "grad_norm": 0.0, "learning_rate": 4.044199813227208e-06, "loss": 0.9004, "step": 18197 }, { "epoch": 0.7120275451913295, "grad_norm": 0.0, "learning_rate": 4.04318189938425e-06, "loss": 1.0101, "step": 18198 }, { "epoch": 0.712066671883559, "grad_norm": 0.0, "learning_rate": 4.042164081200326e-06, "loss": 1.0159, "step": 18199 }, { "epoch": 0.7121057985757884, "grad_norm": 0.0, "learning_rate": 4.041146358691782e-06, "loss": 1.0167, "step": 18200 }, { "epoch": 0.7121449252680179, "grad_norm": 0.0, "learning_rate": 4.040128731874964e-06, "loss": 0.9599, "step": 18201 }, { "epoch": 0.7121840519602473, "grad_norm": 0.0, "learning_rate": 4.039111200766207e-06, "loss": 0.9498, "step": 18202 }, { "epoch": 0.7122231786524768, "grad_norm": 0.0, "learning_rate": 4.038093765381857e-06, "loss": 0.9728, "step": 18203 }, { "epoch": 0.7122623053447061, "grad_norm": 0.0, "learning_rate": 4.037076425738245e-06, "loss": 0.9727, "step": 18204 }, { "epoch": 0.7123014320369356, "grad_norm": 0.0, "learning_rate": 4.036059181851721e-06, "loss": 0.9384, "step": 18205 }, { "epoch": 0.712340558729165, "grad_norm": 0.0, "learning_rate": 4.0350420337386075e-06, "loss": 1.0279, "step": 18206 }, { "epoch": 0.7123796854213945, "grad_norm": 0.0, "learning_rate": 4.03402498141525e-06, "loss": 0.8969, "step": 18207 }, { "epoch": 0.7124188121136239, "grad_norm": 0.0, "learning_rate": 4.033008024897971e-06, "loss": 1.0467, "step": 18208 }, { "epoch": 0.7124579388058534, "grad_norm": 0.0, "learning_rate": 4.031991164203106e-06, "loss": 1.011, "step": 18209 }, { "epoch": 0.7124970654980828, "grad_norm": 0.0, "learning_rate": 4.030974399346985e-06, "loss": 0.9373, "step": 18210 }, { "epoch": 0.7125361921903123, "grad_norm": 0.0, "learning_rate": 4.029957730345939e-06, "loss": 0.9365, "step": 18211 }, { "epoch": 0.7125753188825417, "grad_norm": 0.0, "learning_rate": 4.028941157216287e-06, "loss": 0.9915, "step": 18212 }, { "epoch": 0.7126144455747712, "grad_norm": 0.0, "learning_rate": 4.027924679974358e-06, "loss": 1.1229, "step": 18213 }, { "epoch": 0.7126535722670005, "grad_norm": 0.0, "learning_rate": 4.026908298636476e-06, "loss": 0.9879, "step": 18214 }, { "epoch": 0.71269269895923, "grad_norm": 0.0, "learning_rate": 4.025892013218965e-06, "loss": 0.9587, "step": 18215 }, { "epoch": 0.7127318256514594, "grad_norm": 0.0, "learning_rate": 4.0248758237381395e-06, "loss": 1.0146, "step": 18216 }, { "epoch": 0.7127709523436888, "grad_norm": 0.0, "learning_rate": 4.023859730210323e-06, "loss": 1.0357, "step": 18217 }, { "epoch": 0.7128100790359183, "grad_norm": 0.0, "learning_rate": 4.0228437326518245e-06, "loss": 0.9405, "step": 18218 }, { "epoch": 0.7128492057281477, "grad_norm": 0.0, "learning_rate": 4.021827831078972e-06, "loss": 0.9444, "step": 18219 }, { "epoch": 0.7128883324203772, "grad_norm": 0.0, "learning_rate": 4.020812025508072e-06, "loss": 0.8803, "step": 18220 }, { "epoch": 0.7129274591126066, "grad_norm": 0.0, "learning_rate": 4.019796315955441e-06, "loss": 0.9599, "step": 18221 }, { "epoch": 0.7129665858048361, "grad_norm": 0.0, "learning_rate": 4.018780702437381e-06, "loss": 1.0294, "step": 18222 }, { "epoch": 0.7130057124970655, "grad_norm": 0.0, "learning_rate": 4.017765184970213e-06, "loss": 0.8691, "step": 18223 }, { "epoch": 0.713044839189295, "grad_norm": 0.0, "learning_rate": 4.016749763570238e-06, "loss": 0.9666, "step": 18224 }, { "epoch": 0.7130839658815243, "grad_norm": 0.0, "learning_rate": 4.015734438253768e-06, "loss": 1.0296, "step": 18225 }, { "epoch": 0.7131230925737538, "grad_norm": 0.0, "learning_rate": 4.014719209037097e-06, "loss": 0.9847, "step": 18226 }, { "epoch": 0.7131622192659832, "grad_norm": 0.0, "learning_rate": 4.013704075936543e-06, "loss": 0.8811, "step": 18227 }, { "epoch": 0.7132013459582127, "grad_norm": 0.0, "learning_rate": 4.012689038968396e-06, "loss": 0.9435, "step": 18228 }, { "epoch": 0.7132404726504421, "grad_norm": 0.0, "learning_rate": 4.011674098148964e-06, "loss": 0.9901, "step": 18229 }, { "epoch": 0.7132795993426716, "grad_norm": 0.0, "learning_rate": 4.010659253494538e-06, "loss": 1.1058, "step": 18230 }, { "epoch": 0.713318726034901, "grad_norm": 0.0, "learning_rate": 4.009644505021422e-06, "loss": 1.0956, "step": 18231 }, { "epoch": 0.7133578527271305, "grad_norm": 0.0, "learning_rate": 4.008629852745907e-06, "loss": 0.9651, "step": 18232 }, { "epoch": 0.7133969794193599, "grad_norm": 0.0, "learning_rate": 4.007615296684293e-06, "loss": 1.0903, "step": 18233 }, { "epoch": 0.7134361061115894, "grad_norm": 0.0, "learning_rate": 4.006600836852864e-06, "loss": 1.0658, "step": 18234 }, { "epoch": 0.7134752328038187, "grad_norm": 0.0, "learning_rate": 4.005586473267916e-06, "loss": 1.0586, "step": 18235 }, { "epoch": 0.7135143594960482, "grad_norm": 0.0, "learning_rate": 4.0045722059457415e-06, "loss": 0.9864, "step": 18236 }, { "epoch": 0.7135534861882776, "grad_norm": 0.0, "learning_rate": 4.003558034902621e-06, "loss": 0.9756, "step": 18237 }, { "epoch": 0.7135926128805071, "grad_norm": 0.0, "learning_rate": 4.002543960154844e-06, "loss": 0.966, "step": 18238 }, { "epoch": 0.7136317395727365, "grad_norm": 0.0, "learning_rate": 4.001529981718701e-06, "loss": 0.9601, "step": 18239 }, { "epoch": 0.713670866264966, "grad_norm": 0.0, "learning_rate": 4.000516099610465e-06, "loss": 0.876, "step": 18240 }, { "epoch": 0.7137099929571954, "grad_norm": 0.0, "learning_rate": 3.999502313846423e-06, "loss": 0.9805, "step": 18241 }, { "epoch": 0.7137491196494249, "grad_norm": 0.0, "learning_rate": 3.998488624442854e-06, "loss": 1.0519, "step": 18242 }, { "epoch": 0.7137882463416543, "grad_norm": 0.0, "learning_rate": 3.997475031416042e-06, "loss": 1.0391, "step": 18243 }, { "epoch": 0.7138273730338837, "grad_norm": 0.0, "learning_rate": 3.9964615347822555e-06, "loss": 0.9659, "step": 18244 }, { "epoch": 0.7138664997261132, "grad_norm": 0.0, "learning_rate": 3.995448134557775e-06, "loss": 0.8938, "step": 18245 }, { "epoch": 0.7139056264183425, "grad_norm": 0.0, "learning_rate": 3.994434830758875e-06, "loss": 0.9697, "step": 18246 }, { "epoch": 0.713944753110572, "grad_norm": 0.0, "learning_rate": 3.993421623401824e-06, "loss": 1.0589, "step": 18247 }, { "epoch": 0.7139838798028014, "grad_norm": 0.0, "learning_rate": 3.992408512502894e-06, "loss": 1.0269, "step": 18248 }, { "epoch": 0.7140230064950309, "grad_norm": 0.0, "learning_rate": 3.9913954980783565e-06, "loss": 1.0587, "step": 18249 }, { "epoch": 0.7140621331872603, "grad_norm": 0.0, "learning_rate": 3.990382580144481e-06, "loss": 1.0524, "step": 18250 }, { "epoch": 0.7141012598794898, "grad_norm": 0.0, "learning_rate": 3.989369758717528e-06, "loss": 0.9616, "step": 18251 }, { "epoch": 0.7141403865717192, "grad_norm": 0.0, "learning_rate": 3.988357033813767e-06, "loss": 0.9368, "step": 18252 }, { "epoch": 0.7141795132639487, "grad_norm": 0.0, "learning_rate": 3.987344405449453e-06, "loss": 0.9445, "step": 18253 }, { "epoch": 0.7142186399561781, "grad_norm": 0.0, "learning_rate": 3.986331873640861e-06, "loss": 0.9977, "step": 18254 }, { "epoch": 0.7142577666484076, "grad_norm": 0.0, "learning_rate": 3.985319438404238e-06, "loss": 0.9155, "step": 18255 }, { "epoch": 0.714296893340637, "grad_norm": 0.0, "learning_rate": 3.984307099755853e-06, "loss": 1.0781, "step": 18256 }, { "epoch": 0.7143360200328664, "grad_norm": 0.0, "learning_rate": 3.983294857711955e-06, "loss": 1.0675, "step": 18257 }, { "epoch": 0.7143751467250958, "grad_norm": 0.0, "learning_rate": 3.982282712288802e-06, "loss": 0.8663, "step": 18258 }, { "epoch": 0.7144142734173253, "grad_norm": 0.0, "learning_rate": 3.981270663502647e-06, "loss": 1.0665, "step": 18259 }, { "epoch": 0.7144534001095547, "grad_norm": 0.0, "learning_rate": 3.980258711369747e-06, "loss": 1.082, "step": 18260 }, { "epoch": 0.7144925268017842, "grad_norm": 0.0, "learning_rate": 3.979246855906346e-06, "loss": 1.0965, "step": 18261 }, { "epoch": 0.7145316534940136, "grad_norm": 0.0, "learning_rate": 3.9782350971286965e-06, "loss": 0.968, "step": 18262 }, { "epoch": 0.7145707801862431, "grad_norm": 0.0, "learning_rate": 3.977223435053045e-06, "loss": 0.9506, "step": 18263 }, { "epoch": 0.7146099068784725, "grad_norm": 0.0, "learning_rate": 3.976211869695641e-06, "loss": 1.035, "step": 18264 }, { "epoch": 0.714649033570702, "grad_norm": 0.0, "learning_rate": 3.975200401072723e-06, "loss": 0.9878, "step": 18265 }, { "epoch": 0.7146881602629314, "grad_norm": 0.0, "learning_rate": 3.974189029200542e-06, "loss": 0.9865, "step": 18266 }, { "epoch": 0.7147272869551609, "grad_norm": 0.0, "learning_rate": 3.973177754095325e-06, "loss": 1.0046, "step": 18267 }, { "epoch": 0.7147664136473902, "grad_norm": 0.0, "learning_rate": 3.9721665757733295e-06, "loss": 0.9634, "step": 18268 }, { "epoch": 0.7148055403396197, "grad_norm": 0.0, "learning_rate": 3.971155494250783e-06, "loss": 0.9866, "step": 18269 }, { "epoch": 0.7148446670318491, "grad_norm": 0.0, "learning_rate": 3.970144509543927e-06, "loss": 0.9703, "step": 18270 }, { "epoch": 0.7148837937240786, "grad_norm": 0.0, "learning_rate": 3.969133621668987e-06, "loss": 1.0846, "step": 18271 }, { "epoch": 0.714922920416308, "grad_norm": 0.0, "learning_rate": 3.968122830642211e-06, "loss": 1.0243, "step": 18272 }, { "epoch": 0.7149620471085374, "grad_norm": 0.0, "learning_rate": 3.967112136479822e-06, "loss": 1.0187, "step": 18273 }, { "epoch": 0.7150011738007669, "grad_norm": 0.0, "learning_rate": 3.966101539198055e-06, "loss": 1.0763, "step": 18274 }, { "epoch": 0.7150403004929963, "grad_norm": 0.0, "learning_rate": 3.965091038813132e-06, "loss": 0.9347, "step": 18275 }, { "epoch": 0.7150794271852258, "grad_norm": 0.0, "learning_rate": 3.9640806353412866e-06, "loss": 0.9823, "step": 18276 }, { "epoch": 0.7151185538774552, "grad_norm": 0.0, "learning_rate": 3.963070328798741e-06, "loss": 1.0406, "step": 18277 }, { "epoch": 0.7151576805696847, "grad_norm": 0.0, "learning_rate": 3.962060119201726e-06, "loss": 0.9409, "step": 18278 }, { "epoch": 0.715196807261914, "grad_norm": 0.0, "learning_rate": 3.961050006566455e-06, "loss": 0.9358, "step": 18279 }, { "epoch": 0.7152359339541435, "grad_norm": 0.0, "learning_rate": 3.960039990909155e-06, "loss": 1.0783, "step": 18280 }, { "epoch": 0.7152750606463729, "grad_norm": 0.0, "learning_rate": 3.959030072246043e-06, "loss": 1.0445, "step": 18281 }, { "epoch": 0.7153141873386024, "grad_norm": 0.0, "learning_rate": 3.958020250593342e-06, "loss": 0.9789, "step": 18282 }, { "epoch": 0.7153533140308318, "grad_norm": 0.0, "learning_rate": 3.957010525967262e-06, "loss": 1.0402, "step": 18283 }, { "epoch": 0.7153924407230613, "grad_norm": 0.0, "learning_rate": 3.956000898384019e-06, "loss": 1.0362, "step": 18284 }, { "epoch": 0.7154315674152907, "grad_norm": 0.0, "learning_rate": 3.954991367859833e-06, "loss": 0.9147, "step": 18285 }, { "epoch": 0.7154706941075202, "grad_norm": 0.0, "learning_rate": 3.953981934410907e-06, "loss": 0.9615, "step": 18286 }, { "epoch": 0.7155098207997496, "grad_norm": 0.0, "learning_rate": 3.952972598053455e-06, "loss": 1.0566, "step": 18287 }, { "epoch": 0.7155489474919791, "grad_norm": 0.0, "learning_rate": 3.95196335880369e-06, "loss": 1.1979, "step": 18288 }, { "epoch": 0.7155880741842084, "grad_norm": 0.0, "learning_rate": 3.95095421667781e-06, "loss": 0.8674, "step": 18289 }, { "epoch": 0.7156272008764379, "grad_norm": 0.0, "learning_rate": 3.949945171692026e-06, "loss": 1.018, "step": 18290 }, { "epoch": 0.7156663275686673, "grad_norm": 0.0, "learning_rate": 3.948936223862545e-06, "loss": 0.9605, "step": 18291 }, { "epoch": 0.7157054542608968, "grad_norm": 0.0, "learning_rate": 3.947927373205562e-06, "loss": 0.943, "step": 18292 }, { "epoch": 0.7157445809531262, "grad_norm": 0.0, "learning_rate": 3.946918619737282e-06, "loss": 0.9566, "step": 18293 }, { "epoch": 0.7157837076453557, "grad_norm": 0.0, "learning_rate": 3.945909963473904e-06, "loss": 0.9575, "step": 18294 }, { "epoch": 0.7158228343375851, "grad_norm": 0.0, "learning_rate": 3.944901404431629e-06, "loss": 1.0728, "step": 18295 }, { "epoch": 0.7158619610298146, "grad_norm": 0.0, "learning_rate": 3.943892942626647e-06, "loss": 0.9734, "step": 18296 }, { "epoch": 0.715901087722044, "grad_norm": 0.0, "learning_rate": 3.942884578075158e-06, "loss": 0.9158, "step": 18297 }, { "epoch": 0.7159402144142735, "grad_norm": 0.0, "learning_rate": 3.9418763107933465e-06, "loss": 0.8702, "step": 18298 }, { "epoch": 0.7159793411065029, "grad_norm": 0.0, "learning_rate": 3.940868140797418e-06, "loss": 0.8654, "step": 18299 }, { "epoch": 0.7160184677987323, "grad_norm": 0.0, "learning_rate": 3.93986006810355e-06, "loss": 0.9667, "step": 18300 }, { "epoch": 0.7160575944909617, "grad_norm": 0.0, "learning_rate": 3.938852092727939e-06, "loss": 0.9978, "step": 18301 }, { "epoch": 0.7160967211831911, "grad_norm": 0.0, "learning_rate": 3.937844214686763e-06, "loss": 0.9402, "step": 18302 }, { "epoch": 0.7161358478754206, "grad_norm": 0.0, "learning_rate": 3.936836433996217e-06, "loss": 0.9684, "step": 18303 }, { "epoch": 0.71617497456765, "grad_norm": 0.0, "learning_rate": 3.935828750672478e-06, "loss": 0.9176, "step": 18304 }, { "epoch": 0.7162141012598795, "grad_norm": 0.0, "learning_rate": 3.934821164731735e-06, "loss": 0.9908, "step": 18305 }, { "epoch": 0.7162532279521089, "grad_norm": 0.0, "learning_rate": 3.933813676190159e-06, "loss": 0.9051, "step": 18306 }, { "epoch": 0.7162923546443384, "grad_norm": 0.0, "learning_rate": 3.9328062850639346e-06, "loss": 1.0685, "step": 18307 }, { "epoch": 0.7163314813365678, "grad_norm": 0.0, "learning_rate": 3.931798991369239e-06, "loss": 1.0338, "step": 18308 }, { "epoch": 0.7163706080287973, "grad_norm": 0.0, "learning_rate": 3.930791795122251e-06, "loss": 0.9224, "step": 18309 }, { "epoch": 0.7164097347210266, "grad_norm": 0.0, "learning_rate": 3.929784696339137e-06, "loss": 0.8753, "step": 18310 }, { "epoch": 0.7164488614132561, "grad_norm": 0.0, "learning_rate": 3.928777695036076e-06, "loss": 1.0135, "step": 18311 }, { "epoch": 0.7164879881054855, "grad_norm": 0.0, "learning_rate": 3.927770791229236e-06, "loss": 0.8522, "step": 18312 }, { "epoch": 0.716527114797715, "grad_norm": 0.0, "learning_rate": 3.926763984934792e-06, "loss": 0.9381, "step": 18313 }, { "epoch": 0.7165662414899444, "grad_norm": 0.0, "learning_rate": 3.925757276168907e-06, "loss": 1.0504, "step": 18314 }, { "epoch": 0.7166053681821739, "grad_norm": 0.0, "learning_rate": 3.924750664947749e-06, "loss": 1.0437, "step": 18315 }, { "epoch": 0.7166444948744033, "grad_norm": 0.0, "learning_rate": 3.923744151287477e-06, "loss": 0.8938, "step": 18316 }, { "epoch": 0.7166836215666328, "grad_norm": 0.0, "learning_rate": 3.922737735204267e-06, "loss": 0.9723, "step": 18317 }, { "epoch": 0.7167227482588622, "grad_norm": 0.0, "learning_rate": 3.92173141671427e-06, "loss": 0.9841, "step": 18318 }, { "epoch": 0.7167618749510917, "grad_norm": 0.0, "learning_rate": 3.9207251958336545e-06, "loss": 1.0024, "step": 18319 }, { "epoch": 0.7168010016433211, "grad_norm": 0.0, "learning_rate": 3.919719072578567e-06, "loss": 1.0305, "step": 18320 }, { "epoch": 0.7168401283355506, "grad_norm": 0.0, "learning_rate": 3.918713046965179e-06, "loss": 0.966, "step": 18321 }, { "epoch": 0.7168792550277799, "grad_norm": 0.0, "learning_rate": 3.917707119009636e-06, "loss": 0.9731, "step": 18322 }, { "epoch": 0.7169183817200094, "grad_norm": 0.0, "learning_rate": 3.916701288728097e-06, "loss": 0.9948, "step": 18323 }, { "epoch": 0.7169575084122388, "grad_norm": 0.0, "learning_rate": 3.915695556136712e-06, "loss": 1.009, "step": 18324 }, { "epoch": 0.7169966351044683, "grad_norm": 0.0, "learning_rate": 3.91468992125163e-06, "loss": 0.9039, "step": 18325 }, { "epoch": 0.7170357617966977, "grad_norm": 0.0, "learning_rate": 3.913684384089002e-06, "loss": 1.0886, "step": 18326 }, { "epoch": 0.7170748884889272, "grad_norm": 0.0, "learning_rate": 3.9126789446649815e-06, "loss": 0.9548, "step": 18327 }, { "epoch": 0.7171140151811566, "grad_norm": 0.0, "learning_rate": 3.911673602995705e-06, "loss": 1.1428, "step": 18328 }, { "epoch": 0.717153141873386, "grad_norm": 0.0, "learning_rate": 3.910668359097321e-06, "loss": 1.091, "step": 18329 }, { "epoch": 0.7171922685656155, "grad_norm": 0.0, "learning_rate": 3.909663212985978e-06, "loss": 0.9631, "step": 18330 }, { "epoch": 0.7172313952578449, "grad_norm": 0.0, "learning_rate": 3.908658164677807e-06, "loss": 0.9646, "step": 18331 }, { "epoch": 0.7172705219500743, "grad_norm": 0.0, "learning_rate": 3.907653214188953e-06, "loss": 1.0776, "step": 18332 }, { "epoch": 0.7173096486423037, "grad_norm": 0.0, "learning_rate": 3.906648361535559e-06, "loss": 0.8913, "step": 18333 }, { "epoch": 0.7173487753345332, "grad_norm": 0.0, "learning_rate": 3.905643606733752e-06, "loss": 0.8903, "step": 18334 }, { "epoch": 0.7173879020267626, "grad_norm": 0.0, "learning_rate": 3.904638949799673e-06, "loss": 1.0627, "step": 18335 }, { "epoch": 0.7174270287189921, "grad_norm": 0.0, "learning_rate": 3.903634390749458e-06, "loss": 0.8922, "step": 18336 }, { "epoch": 0.7174661554112215, "grad_norm": 0.0, "learning_rate": 3.902629929599231e-06, "loss": 0.8147, "step": 18337 }, { "epoch": 0.717505282103451, "grad_norm": 0.0, "learning_rate": 3.901625566365128e-06, "loss": 0.9372, "step": 18338 }, { "epoch": 0.7175444087956804, "grad_norm": 0.0, "learning_rate": 3.900621301063276e-06, "loss": 0.9669, "step": 18339 }, { "epoch": 0.7175835354879099, "grad_norm": 0.0, "learning_rate": 3.899617133709807e-06, "loss": 0.9479, "step": 18340 }, { "epoch": 0.7176226621801393, "grad_norm": 0.0, "learning_rate": 3.8986130643208385e-06, "loss": 0.9155, "step": 18341 }, { "epoch": 0.7176617888723688, "grad_norm": 0.0, "learning_rate": 3.8976090929125e-06, "loss": 0.9429, "step": 18342 }, { "epoch": 0.7177009155645981, "grad_norm": 0.0, "learning_rate": 3.89660521950091e-06, "loss": 1.0202, "step": 18343 }, { "epoch": 0.7177400422568276, "grad_norm": 0.0, "learning_rate": 3.895601444102198e-06, "loss": 0.9831, "step": 18344 }, { "epoch": 0.717779168949057, "grad_norm": 0.0, "learning_rate": 3.894597766732474e-06, "loss": 1.1303, "step": 18345 }, { "epoch": 0.7178182956412865, "grad_norm": 0.0, "learning_rate": 3.893594187407863e-06, "loss": 1.1356, "step": 18346 }, { "epoch": 0.7178574223335159, "grad_norm": 0.0, "learning_rate": 3.89259070614447e-06, "loss": 1.094, "step": 18347 }, { "epoch": 0.7178965490257454, "grad_norm": 0.0, "learning_rate": 3.891587322958424e-06, "loss": 1.0718, "step": 18348 }, { "epoch": 0.7179356757179748, "grad_norm": 0.0, "learning_rate": 3.890584037865829e-06, "loss": 1.0462, "step": 18349 }, { "epoch": 0.7179748024102043, "grad_norm": 0.0, "learning_rate": 3.889580850882801e-06, "loss": 0.9301, "step": 18350 }, { "epoch": 0.7180139291024337, "grad_norm": 0.0, "learning_rate": 3.888577762025441e-06, "loss": 0.9997, "step": 18351 }, { "epoch": 0.7180530557946632, "grad_norm": 0.0, "learning_rate": 3.887574771309871e-06, "loss": 0.8361, "step": 18352 }, { "epoch": 0.7180921824868925, "grad_norm": 0.0, "learning_rate": 3.886571878752188e-06, "loss": 0.9494, "step": 18353 }, { "epoch": 0.718131309179122, "grad_norm": 0.0, "learning_rate": 3.885569084368503e-06, "loss": 1.0156, "step": 18354 }, { "epoch": 0.7181704358713514, "grad_norm": 0.0, "learning_rate": 3.884566388174914e-06, "loss": 1.0449, "step": 18355 }, { "epoch": 0.7182095625635809, "grad_norm": 0.0, "learning_rate": 3.883563790187526e-06, "loss": 0.9318, "step": 18356 }, { "epoch": 0.7182486892558103, "grad_norm": 0.0, "learning_rate": 3.882561290422437e-06, "loss": 0.8817, "step": 18357 }, { "epoch": 0.7182878159480397, "grad_norm": 0.0, "learning_rate": 3.881558888895754e-06, "loss": 1.1002, "step": 18358 }, { "epoch": 0.7183269426402692, "grad_norm": 0.0, "learning_rate": 3.880556585623564e-06, "loss": 0.995, "step": 18359 }, { "epoch": 0.7183660693324986, "grad_norm": 0.0, "learning_rate": 3.879554380621967e-06, "loss": 0.9389, "step": 18360 }, { "epoch": 0.7184051960247281, "grad_norm": 0.0, "learning_rate": 3.878552273907057e-06, "loss": 0.9782, "step": 18361 }, { "epoch": 0.7184443227169575, "grad_norm": 0.0, "learning_rate": 3.87755026549493e-06, "loss": 0.9914, "step": 18362 }, { "epoch": 0.718483449409187, "grad_norm": 0.0, "learning_rate": 3.876548355401672e-06, "loss": 1.0349, "step": 18363 }, { "epoch": 0.7185225761014163, "grad_norm": 0.0, "learning_rate": 3.875546543643377e-06, "loss": 1.0716, "step": 18364 }, { "epoch": 0.7185617027936458, "grad_norm": 0.0, "learning_rate": 3.874544830236123e-06, "loss": 0.9666, "step": 18365 }, { "epoch": 0.7186008294858752, "grad_norm": 0.0, "learning_rate": 3.87354321519601e-06, "loss": 1.0109, "step": 18366 }, { "epoch": 0.7186399561781047, "grad_norm": 0.0, "learning_rate": 3.872541698539113e-06, "loss": 1.0488, "step": 18367 }, { "epoch": 0.7186790828703341, "grad_norm": 0.0, "learning_rate": 3.871540280281521e-06, "loss": 0.9397, "step": 18368 }, { "epoch": 0.7187182095625636, "grad_norm": 0.0, "learning_rate": 3.870538960439311e-06, "loss": 1.1004, "step": 18369 }, { "epoch": 0.718757336254793, "grad_norm": 0.0, "learning_rate": 3.869537739028563e-06, "loss": 0.9772, "step": 18370 }, { "epoch": 0.7187964629470225, "grad_norm": 0.0, "learning_rate": 3.868536616065358e-06, "loss": 1.1122, "step": 18371 }, { "epoch": 0.7188355896392519, "grad_norm": 0.0, "learning_rate": 3.8675355915657755e-06, "loss": 0.9313, "step": 18372 }, { "epoch": 0.7188747163314814, "grad_norm": 0.0, "learning_rate": 3.866534665545882e-06, "loss": 1.0689, "step": 18373 }, { "epoch": 0.7189138430237108, "grad_norm": 0.0, "learning_rate": 3.8655338380217575e-06, "loss": 0.9174, "step": 18374 }, { "epoch": 0.7189529697159402, "grad_norm": 0.0, "learning_rate": 3.8645331090094755e-06, "loss": 0.9597, "step": 18375 }, { "epoch": 0.7189920964081696, "grad_norm": 0.0, "learning_rate": 3.8635324785251e-06, "loss": 0.984, "step": 18376 }, { "epoch": 0.7190312231003991, "grad_norm": 0.0, "learning_rate": 3.862531946584705e-06, "loss": 0.9665, "step": 18377 }, { "epoch": 0.7190703497926285, "grad_norm": 0.0, "learning_rate": 3.861531513204354e-06, "loss": 0.9275, "step": 18378 }, { "epoch": 0.719109476484858, "grad_norm": 0.0, "learning_rate": 3.86053117840012e-06, "loss": 0.9817, "step": 18379 }, { "epoch": 0.7191486031770874, "grad_norm": 0.0, "learning_rate": 3.859530942188058e-06, "loss": 1.0412, "step": 18380 }, { "epoch": 0.7191877298693169, "grad_norm": 0.0, "learning_rate": 3.8585308045842375e-06, "loss": 0.9162, "step": 18381 }, { "epoch": 0.7192268565615463, "grad_norm": 0.0, "learning_rate": 3.857530765604713e-06, "loss": 0.9742, "step": 18382 }, { "epoch": 0.7192659832537758, "grad_norm": 0.0, "learning_rate": 3.856530825265549e-06, "loss": 0.9547, "step": 18383 }, { "epoch": 0.7193051099460052, "grad_norm": 0.0, "learning_rate": 3.855530983582801e-06, "loss": 0.9725, "step": 18384 }, { "epoch": 0.7193442366382347, "grad_norm": 0.0, "learning_rate": 3.854531240572529e-06, "loss": 0.9813, "step": 18385 }, { "epoch": 0.719383363330464, "grad_norm": 0.0, "learning_rate": 3.853531596250781e-06, "loss": 0.9455, "step": 18386 }, { "epoch": 0.7194224900226934, "grad_norm": 0.0, "learning_rate": 3.852532050633615e-06, "loss": 0.9655, "step": 18387 }, { "epoch": 0.7194616167149229, "grad_norm": 0.0, "learning_rate": 3.8515326037370805e-06, "loss": 0.9689, "step": 18388 }, { "epoch": 0.7195007434071523, "grad_norm": 0.0, "learning_rate": 3.85053325557723e-06, "loss": 0.9897, "step": 18389 }, { "epoch": 0.7195398700993818, "grad_norm": 0.0, "learning_rate": 3.849534006170108e-06, "loss": 0.9897, "step": 18390 }, { "epoch": 0.7195789967916112, "grad_norm": 0.0, "learning_rate": 3.848534855531766e-06, "loss": 0.9573, "step": 18391 }, { "epoch": 0.7196181234838407, "grad_norm": 0.0, "learning_rate": 3.847535803678238e-06, "loss": 1.1685, "step": 18392 }, { "epoch": 0.7196572501760701, "grad_norm": 0.0, "learning_rate": 3.846536850625583e-06, "loss": 1.0887, "step": 18393 }, { "epoch": 0.7196963768682996, "grad_norm": 0.0, "learning_rate": 3.845537996389832e-06, "loss": 1.091, "step": 18394 }, { "epoch": 0.719735503560529, "grad_norm": 0.0, "learning_rate": 3.844539240987033e-06, "loss": 1.0518, "step": 18395 }, { "epoch": 0.7197746302527585, "grad_norm": 0.0, "learning_rate": 3.843540584433213e-06, "loss": 1.0155, "step": 18396 }, { "epoch": 0.7198137569449878, "grad_norm": 0.0, "learning_rate": 3.842542026744423e-06, "loss": 1.1337, "step": 18397 }, { "epoch": 0.7198528836372173, "grad_norm": 0.0, "learning_rate": 3.841543567936691e-06, "loss": 1.0536, "step": 18398 }, { "epoch": 0.7198920103294467, "grad_norm": 0.0, "learning_rate": 3.840545208026054e-06, "loss": 1.0052, "step": 18399 }, { "epoch": 0.7199311370216762, "grad_norm": 0.0, "learning_rate": 3.839546947028536e-06, "loss": 0.9474, "step": 18400 }, { "epoch": 0.7199702637139056, "grad_norm": 0.0, "learning_rate": 3.838548784960182e-06, "loss": 1.0231, "step": 18401 }, { "epoch": 0.7200093904061351, "grad_norm": 0.0, "learning_rate": 3.83755072183701e-06, "loss": 1.0147, "step": 18402 }, { "epoch": 0.7200485170983645, "grad_norm": 0.0, "learning_rate": 3.836552757675055e-06, "loss": 0.9179, "step": 18403 }, { "epoch": 0.720087643790594, "grad_norm": 0.0, "learning_rate": 3.835554892490335e-06, "loss": 0.9844, "step": 18404 }, { "epoch": 0.7201267704828234, "grad_norm": 0.0, "learning_rate": 3.83455712629888e-06, "loss": 0.9676, "step": 18405 }, { "epoch": 0.7201658971750529, "grad_norm": 0.0, "learning_rate": 3.8335594591167114e-06, "loss": 1.0134, "step": 18406 }, { "epoch": 0.7202050238672822, "grad_norm": 0.0, "learning_rate": 3.832561890959855e-06, "loss": 0.9716, "step": 18407 }, { "epoch": 0.7202441505595117, "grad_norm": 0.0, "learning_rate": 3.831564421844323e-06, "loss": 1.0852, "step": 18408 }, { "epoch": 0.7202832772517411, "grad_norm": 0.0, "learning_rate": 3.830567051786136e-06, "loss": 1.0068, "step": 18409 }, { "epoch": 0.7203224039439706, "grad_norm": 0.0, "learning_rate": 3.8295697808013124e-06, "loss": 0.976, "step": 18410 }, { "epoch": 0.7203615306362, "grad_norm": 0.0, "learning_rate": 3.8285726089058685e-06, "loss": 0.9736, "step": 18411 }, { "epoch": 0.7204006573284295, "grad_norm": 0.0, "learning_rate": 3.827575536115813e-06, "loss": 0.8959, "step": 18412 }, { "epoch": 0.7204397840206589, "grad_norm": 0.0, "learning_rate": 3.826578562447162e-06, "loss": 0.8378, "step": 18413 }, { "epoch": 0.7204789107128884, "grad_norm": 0.0, "learning_rate": 3.825581687915922e-06, "loss": 1.022, "step": 18414 }, { "epoch": 0.7205180374051178, "grad_norm": 0.0, "learning_rate": 3.824584912538101e-06, "loss": 1.1023, "step": 18415 }, { "epoch": 0.7205571640973472, "grad_norm": 0.0, "learning_rate": 3.8235882363297095e-06, "loss": 0.9895, "step": 18416 }, { "epoch": 0.7205962907895767, "grad_norm": 0.0, "learning_rate": 3.822591659306754e-06, "loss": 0.9514, "step": 18417 }, { "epoch": 0.720635417481806, "grad_norm": 0.0, "learning_rate": 3.821595181485232e-06, "loss": 0.8389, "step": 18418 }, { "epoch": 0.7206745441740355, "grad_norm": 0.0, "learning_rate": 3.820598802881151e-06, "loss": 0.9642, "step": 18419 }, { "epoch": 0.7207136708662649, "grad_norm": 0.0, "learning_rate": 3.8196025235105124e-06, "loss": 0.9893, "step": 18420 }, { "epoch": 0.7207527975584944, "grad_norm": 0.0, "learning_rate": 3.81860634338931e-06, "loss": 1.0309, "step": 18421 }, { "epoch": 0.7207919242507238, "grad_norm": 0.0, "learning_rate": 3.817610262533543e-06, "loss": 1.0522, "step": 18422 }, { "epoch": 0.7208310509429533, "grad_norm": 0.0, "learning_rate": 3.816614280959209e-06, "loss": 0.9081, "step": 18423 }, { "epoch": 0.7208701776351827, "grad_norm": 0.0, "learning_rate": 3.815618398682305e-06, "loss": 1.0337, "step": 18424 }, { "epoch": 0.7209093043274122, "grad_norm": 0.0, "learning_rate": 3.814622615718816e-06, "loss": 0.9205, "step": 18425 }, { "epoch": 0.7209484310196416, "grad_norm": 0.0, "learning_rate": 3.8136269320847406e-06, "loss": 1.0372, "step": 18426 }, { "epoch": 0.7209875577118711, "grad_norm": 0.0, "learning_rate": 3.8126313477960577e-06, "loss": 0.8879, "step": 18427 }, { "epoch": 0.7210266844041004, "grad_norm": 0.0, "learning_rate": 3.81163586286877e-06, "loss": 1.026, "step": 18428 }, { "epoch": 0.72106581109633, "grad_norm": 0.0, "learning_rate": 3.810640477318851e-06, "loss": 0.9324, "step": 18429 }, { "epoch": 0.7211049377885593, "grad_norm": 0.0, "learning_rate": 3.8096451911622945e-06, "loss": 1.057, "step": 18430 }, { "epoch": 0.7211440644807888, "grad_norm": 0.0, "learning_rate": 3.8086500044150753e-06, "loss": 0.981, "step": 18431 }, { "epoch": 0.7211831911730182, "grad_norm": 0.0, "learning_rate": 3.80765491709318e-06, "loss": 1.0098, "step": 18432 }, { "epoch": 0.7212223178652477, "grad_norm": 0.0, "learning_rate": 3.806659929212586e-06, "loss": 0.9573, "step": 18433 }, { "epoch": 0.7212614445574771, "grad_norm": 0.0, "learning_rate": 3.805665040789277e-06, "loss": 0.9037, "step": 18434 }, { "epoch": 0.7213005712497066, "grad_norm": 0.0, "learning_rate": 3.804670251839222e-06, "loss": 0.9536, "step": 18435 }, { "epoch": 0.721339697941936, "grad_norm": 0.0, "learning_rate": 3.8036755623783994e-06, "loss": 1.0135, "step": 18436 }, { "epoch": 0.7213788246341655, "grad_norm": 0.0, "learning_rate": 3.802680972422783e-06, "loss": 0.9108, "step": 18437 }, { "epoch": 0.7214179513263949, "grad_norm": 0.0, "learning_rate": 3.801686481988348e-06, "loss": 0.9357, "step": 18438 }, { "epoch": 0.7214570780186244, "grad_norm": 0.0, "learning_rate": 3.8006920910910583e-06, "loss": 1.1292, "step": 18439 }, { "epoch": 0.7214962047108537, "grad_norm": 0.0, "learning_rate": 3.7996977997468874e-06, "loss": 0.9808, "step": 18440 }, { "epoch": 0.7215353314030832, "grad_norm": 0.0, "learning_rate": 3.798703607971795e-06, "loss": 0.9613, "step": 18441 }, { "epoch": 0.7215744580953126, "grad_norm": 0.0, "learning_rate": 3.797709515781758e-06, "loss": 1.0553, "step": 18442 }, { "epoch": 0.721613584787542, "grad_norm": 0.0, "learning_rate": 3.796715523192731e-06, "loss": 1.0316, "step": 18443 }, { "epoch": 0.7216527114797715, "grad_norm": 0.0, "learning_rate": 3.7957216302206833e-06, "loss": 1.0647, "step": 18444 }, { "epoch": 0.7216918381720009, "grad_norm": 0.0, "learning_rate": 3.7947278368815644e-06, "loss": 1.0072, "step": 18445 }, { "epoch": 0.7217309648642304, "grad_norm": 0.0, "learning_rate": 3.7937341431913486e-06, "loss": 1.0516, "step": 18446 }, { "epoch": 0.7217700915564598, "grad_norm": 0.0, "learning_rate": 3.7927405491659818e-06, "loss": 0.8924, "step": 18447 }, { "epoch": 0.7218092182486893, "grad_norm": 0.0, "learning_rate": 3.791747054821426e-06, "loss": 0.9987, "step": 18448 }, { "epoch": 0.7218483449409187, "grad_norm": 0.0, "learning_rate": 3.7907536601736306e-06, "loss": 0.9779, "step": 18449 }, { "epoch": 0.7218874716331481, "grad_norm": 0.0, "learning_rate": 3.7897603652385505e-06, "loss": 0.9615, "step": 18450 }, { "epoch": 0.7219265983253775, "grad_norm": 0.0, "learning_rate": 3.7887671700321383e-06, "loss": 0.845, "step": 18451 }, { "epoch": 0.721965725017607, "grad_norm": 0.0, "learning_rate": 3.787774074570344e-06, "loss": 1.0815, "step": 18452 }, { "epoch": 0.7220048517098364, "grad_norm": 0.0, "learning_rate": 3.7867810788691105e-06, "loss": 1.0637, "step": 18453 }, { "epoch": 0.7220439784020659, "grad_norm": 0.0, "learning_rate": 3.7857881829443887e-06, "loss": 1.0721, "step": 18454 }, { "epoch": 0.7220831050942953, "grad_norm": 0.0, "learning_rate": 3.78479538681212e-06, "loss": 0.8058, "step": 18455 }, { "epoch": 0.7221222317865248, "grad_norm": 0.0, "learning_rate": 3.7838026904882543e-06, "loss": 0.8935, "step": 18456 }, { "epoch": 0.7221613584787542, "grad_norm": 0.0, "learning_rate": 3.7828100939887235e-06, "loss": 0.9547, "step": 18457 }, { "epoch": 0.7222004851709837, "grad_norm": 0.0, "learning_rate": 3.7818175973294722e-06, "loss": 0.9296, "step": 18458 }, { "epoch": 0.7222396118632131, "grad_norm": 0.0, "learning_rate": 3.7808252005264422e-06, "loss": 0.9218, "step": 18459 }, { "epoch": 0.7222787385554426, "grad_norm": 0.0, "learning_rate": 3.7798329035955627e-06, "loss": 1.0516, "step": 18460 }, { "epoch": 0.7223178652476719, "grad_norm": 0.0, "learning_rate": 3.778840706552773e-06, "loss": 1.0002, "step": 18461 }, { "epoch": 0.7223569919399014, "grad_norm": 0.0, "learning_rate": 3.7778486094140086e-06, "loss": 0.9609, "step": 18462 }, { "epoch": 0.7223961186321308, "grad_norm": 0.0, "learning_rate": 3.7768566121951966e-06, "loss": 1.0159, "step": 18463 }, { "epoch": 0.7224352453243603, "grad_norm": 0.0, "learning_rate": 3.7758647149122683e-06, "loss": 0.9827, "step": 18464 }, { "epoch": 0.7224743720165897, "grad_norm": 0.0, "learning_rate": 3.7748729175811573e-06, "loss": 0.9669, "step": 18465 }, { "epoch": 0.7225134987088192, "grad_norm": 0.0, "learning_rate": 3.7738812202177832e-06, "loss": 0.9246, "step": 18466 }, { "epoch": 0.7225526254010486, "grad_norm": 0.0, "learning_rate": 3.772889622838074e-06, "loss": 1.0892, "step": 18467 }, { "epoch": 0.7225917520932781, "grad_norm": 0.0, "learning_rate": 3.7718981254579557e-06, "loss": 0.9399, "step": 18468 }, { "epoch": 0.7226308787855075, "grad_norm": 0.0, "learning_rate": 3.770906728093352e-06, "loss": 1.0352, "step": 18469 }, { "epoch": 0.722670005477737, "grad_norm": 0.0, "learning_rate": 3.769915430760178e-06, "loss": 0.8765, "step": 18470 }, { "epoch": 0.7227091321699664, "grad_norm": 0.0, "learning_rate": 3.768924233474358e-06, "loss": 1.033, "step": 18471 }, { "epoch": 0.7227482588621957, "grad_norm": 0.0, "learning_rate": 3.767933136251801e-06, "loss": 0.9126, "step": 18472 }, { "epoch": 0.7227873855544252, "grad_norm": 0.0, "learning_rate": 3.766942139108435e-06, "loss": 0.9108, "step": 18473 }, { "epoch": 0.7228265122466546, "grad_norm": 0.0, "learning_rate": 3.765951242060164e-06, "loss": 0.9582, "step": 18474 }, { "epoch": 0.7228656389388841, "grad_norm": 0.0, "learning_rate": 3.7649604451229082e-06, "loss": 0.9972, "step": 18475 }, { "epoch": 0.7229047656311135, "grad_norm": 0.0, "learning_rate": 3.763969748312568e-06, "loss": 0.9075, "step": 18476 }, { "epoch": 0.722943892323343, "grad_norm": 0.0, "learning_rate": 3.7629791516450652e-06, "loss": 0.9544, "step": 18477 }, { "epoch": 0.7229830190155724, "grad_norm": 0.0, "learning_rate": 3.7619886551362992e-06, "loss": 0.9131, "step": 18478 }, { "epoch": 0.7230221457078019, "grad_norm": 0.0, "learning_rate": 3.7609982588021833e-06, "loss": 0.9773, "step": 18479 }, { "epoch": 0.7230612724000313, "grad_norm": 0.0, "learning_rate": 3.760007962658613e-06, "loss": 1.0682, "step": 18480 }, { "epoch": 0.7231003990922608, "grad_norm": 0.0, "learning_rate": 3.7590177667214957e-06, "loss": 1.0021, "step": 18481 }, { "epoch": 0.7231395257844901, "grad_norm": 0.0, "learning_rate": 3.758027671006732e-06, "loss": 0.9065, "step": 18482 }, { "epoch": 0.7231786524767196, "grad_norm": 0.0, "learning_rate": 3.7570376755302263e-06, "loss": 1.0294, "step": 18483 }, { "epoch": 0.723217779168949, "grad_norm": 0.0, "learning_rate": 3.7560477803078687e-06, "loss": 0.9899, "step": 18484 }, { "epoch": 0.7232569058611785, "grad_norm": 0.0, "learning_rate": 3.75505798535556e-06, "loss": 0.9153, "step": 18485 }, { "epoch": 0.7232960325534079, "grad_norm": 0.0, "learning_rate": 3.7540682906891957e-06, "loss": 1.1453, "step": 18486 }, { "epoch": 0.7233351592456374, "grad_norm": 0.0, "learning_rate": 3.75307869632467e-06, "loss": 0.9128, "step": 18487 }, { "epoch": 0.7233742859378668, "grad_norm": 0.0, "learning_rate": 3.75208920227787e-06, "loss": 0.9955, "step": 18488 }, { "epoch": 0.7234134126300963, "grad_norm": 0.0, "learning_rate": 3.751099808564692e-06, "loss": 1.0104, "step": 18489 }, { "epoch": 0.7234525393223257, "grad_norm": 0.0, "learning_rate": 3.7501105152010132e-06, "loss": 1.2011, "step": 18490 }, { "epoch": 0.7234916660145552, "grad_norm": 0.0, "learning_rate": 3.7491213222027347e-06, "loss": 0.9826, "step": 18491 }, { "epoch": 0.7235307927067846, "grad_norm": 0.0, "learning_rate": 3.7481322295857327e-06, "loss": 0.9057, "step": 18492 }, { "epoch": 0.723569919399014, "grad_norm": 0.0, "learning_rate": 3.7471432373658955e-06, "loss": 0.9673, "step": 18493 }, { "epoch": 0.7236090460912434, "grad_norm": 0.0, "learning_rate": 3.7461543455590952e-06, "loss": 0.9674, "step": 18494 }, { "epoch": 0.7236481727834729, "grad_norm": 0.0, "learning_rate": 3.745165554181228e-06, "loss": 1.1053, "step": 18495 }, { "epoch": 0.7236872994757023, "grad_norm": 0.0, "learning_rate": 3.74417686324816e-06, "loss": 0.8886, "step": 18496 }, { "epoch": 0.7237264261679318, "grad_norm": 0.0, "learning_rate": 3.743188272775776e-06, "loss": 0.9847, "step": 18497 }, { "epoch": 0.7237655528601612, "grad_norm": 0.0, "learning_rate": 3.742199782779945e-06, "loss": 0.9484, "step": 18498 }, { "epoch": 0.7238046795523907, "grad_norm": 0.0, "learning_rate": 3.7412113932765436e-06, "loss": 0.7785, "step": 18499 }, { "epoch": 0.7238438062446201, "grad_norm": 0.0, "learning_rate": 3.740223104281445e-06, "loss": 0.9158, "step": 18500 }, { "epoch": 0.7238829329368495, "grad_norm": 0.0, "learning_rate": 3.7392349158105223e-06, "loss": 0.9964, "step": 18501 }, { "epoch": 0.723922059629079, "grad_norm": 0.0, "learning_rate": 3.7382468278796393e-06, "loss": 0.993, "step": 18502 }, { "epoch": 0.7239611863213083, "grad_norm": 0.0, "learning_rate": 3.737258840504665e-06, "loss": 0.9277, "step": 18503 }, { "epoch": 0.7240003130135378, "grad_norm": 0.0, "learning_rate": 3.7362709537014696e-06, "loss": 0.9345, "step": 18504 }, { "epoch": 0.7240394397057672, "grad_norm": 0.0, "learning_rate": 3.7352831674859103e-06, "loss": 0.8997, "step": 18505 }, { "epoch": 0.7240785663979967, "grad_norm": 0.0, "learning_rate": 3.734295481873853e-06, "loss": 1.1153, "step": 18506 }, { "epoch": 0.7241176930902261, "grad_norm": 0.0, "learning_rate": 3.733307896881162e-06, "loss": 0.9953, "step": 18507 }, { "epoch": 0.7241568197824556, "grad_norm": 0.0, "learning_rate": 3.732320412523691e-06, "loss": 1.0735, "step": 18508 }, { "epoch": 0.724195946474685, "grad_norm": 0.0, "learning_rate": 3.7313330288173e-06, "loss": 0.8842, "step": 18509 }, { "epoch": 0.7242350731669145, "grad_norm": 0.0, "learning_rate": 3.7303457457778493e-06, "loss": 0.9628, "step": 18510 }, { "epoch": 0.7242741998591439, "grad_norm": 0.0, "learning_rate": 3.729358563421186e-06, "loss": 0.9781, "step": 18511 }, { "epoch": 0.7243133265513734, "grad_norm": 0.0, "learning_rate": 3.7283714817631665e-06, "loss": 0.9541, "step": 18512 }, { "epoch": 0.7243524532436028, "grad_norm": 0.0, "learning_rate": 3.727384500819642e-06, "loss": 0.8925, "step": 18513 }, { "epoch": 0.7243915799358323, "grad_norm": 0.0, "learning_rate": 3.7263976206064657e-06, "loss": 0.8785, "step": 18514 }, { "epoch": 0.7244307066280616, "grad_norm": 0.0, "learning_rate": 3.7254108411394794e-06, "loss": 0.9082, "step": 18515 }, { "epoch": 0.7244698333202911, "grad_norm": 0.0, "learning_rate": 3.724424162434532e-06, "loss": 0.9497, "step": 18516 }, { "epoch": 0.7245089600125205, "grad_norm": 0.0, "learning_rate": 3.7234375845074686e-06, "loss": 0.8982, "step": 18517 }, { "epoch": 0.72454808670475, "grad_norm": 0.0, "learning_rate": 3.7224511073741376e-06, "loss": 1.1094, "step": 18518 }, { "epoch": 0.7245872133969794, "grad_norm": 0.0, "learning_rate": 3.7214647310503704e-06, "loss": 0.8817, "step": 18519 }, { "epoch": 0.7246263400892089, "grad_norm": 0.0, "learning_rate": 3.720478455552017e-06, "loss": 0.9825, "step": 18520 }, { "epoch": 0.7246654667814383, "grad_norm": 0.0, "learning_rate": 3.719492280894903e-06, "loss": 0.9795, "step": 18521 }, { "epoch": 0.7247045934736678, "grad_norm": 0.0, "learning_rate": 3.7185062070948806e-06, "loss": 1.1268, "step": 18522 }, { "epoch": 0.7247437201658972, "grad_norm": 0.0, "learning_rate": 3.717520234167773e-06, "loss": 0.91, "step": 18523 }, { "epoch": 0.7247828468581267, "grad_norm": 0.0, "learning_rate": 3.7165343621294227e-06, "loss": 1.0094, "step": 18524 }, { "epoch": 0.724821973550356, "grad_norm": 0.0, "learning_rate": 3.715548590995649e-06, "loss": 0.9585, "step": 18525 }, { "epoch": 0.7248611002425855, "grad_norm": 0.0, "learning_rate": 3.7145629207822973e-06, "loss": 1.0496, "step": 18526 }, { "epoch": 0.7249002269348149, "grad_norm": 0.0, "learning_rate": 3.7135773515051866e-06, "loss": 0.9199, "step": 18527 }, { "epoch": 0.7249393536270444, "grad_norm": 0.0, "learning_rate": 3.712591883180149e-06, "loss": 0.9583, "step": 18528 }, { "epoch": 0.7249784803192738, "grad_norm": 0.0, "learning_rate": 3.711606515823003e-06, "loss": 1.0324, "step": 18529 }, { "epoch": 0.7250176070115032, "grad_norm": 0.0, "learning_rate": 3.7106212494495776e-06, "loss": 0.94, "step": 18530 }, { "epoch": 0.7250567337037327, "grad_norm": 0.0, "learning_rate": 3.709636084075693e-06, "loss": 1.0884, "step": 18531 }, { "epoch": 0.7250958603959621, "grad_norm": 0.0, "learning_rate": 3.7086510197171744e-06, "loss": 0.9059, "step": 18532 }, { "epoch": 0.7251349870881916, "grad_norm": 0.0, "learning_rate": 3.7076660563898336e-06, "loss": 1.0137, "step": 18533 }, { "epoch": 0.725174113780421, "grad_norm": 0.0, "learning_rate": 3.7066811941094915e-06, "loss": 1.0008, "step": 18534 }, { "epoch": 0.7252132404726505, "grad_norm": 0.0, "learning_rate": 3.705696432891963e-06, "loss": 1.062, "step": 18535 }, { "epoch": 0.7252523671648798, "grad_norm": 0.0, "learning_rate": 3.704711772753066e-06, "loss": 1.0058, "step": 18536 }, { "epoch": 0.7252914938571093, "grad_norm": 0.0, "learning_rate": 3.7037272137086067e-06, "loss": 0.9149, "step": 18537 }, { "epoch": 0.7253306205493387, "grad_norm": 0.0, "learning_rate": 3.702742755774401e-06, "loss": 0.9362, "step": 18538 }, { "epoch": 0.7253697472415682, "grad_norm": 0.0, "learning_rate": 3.70175839896625e-06, "loss": 1.0685, "step": 18539 }, { "epoch": 0.7254088739337976, "grad_norm": 0.0, "learning_rate": 3.7007741432999734e-06, "loss": 0.9842, "step": 18540 }, { "epoch": 0.7254480006260271, "grad_norm": 0.0, "learning_rate": 3.699789988791367e-06, "loss": 0.8668, "step": 18541 }, { "epoch": 0.7254871273182565, "grad_norm": 0.0, "learning_rate": 3.698805935456242e-06, "loss": 1.0886, "step": 18542 }, { "epoch": 0.725526254010486, "grad_norm": 0.0, "learning_rate": 3.6978219833103946e-06, "loss": 1.0965, "step": 18543 }, { "epoch": 0.7255653807027154, "grad_norm": 0.0, "learning_rate": 3.69683813236963e-06, "loss": 0.9256, "step": 18544 }, { "epoch": 0.7256045073949449, "grad_norm": 0.0, "learning_rate": 3.6958543826497462e-06, "loss": 0.9311, "step": 18545 }, { "epoch": 0.7256436340871742, "grad_norm": 0.0, "learning_rate": 3.694870734166545e-06, "loss": 0.8863, "step": 18546 }, { "epoch": 0.7256827607794037, "grad_norm": 0.0, "learning_rate": 3.693887186935814e-06, "loss": 0.9347, "step": 18547 }, { "epoch": 0.7257218874716331, "grad_norm": 0.0, "learning_rate": 3.6929037409733546e-06, "loss": 0.9744, "step": 18548 }, { "epoch": 0.7257610141638626, "grad_norm": 0.0, "learning_rate": 3.691920396294957e-06, "loss": 1.0496, "step": 18549 }, { "epoch": 0.725800140856092, "grad_norm": 0.0, "learning_rate": 3.6909371529164174e-06, "loss": 0.9503, "step": 18550 }, { "epoch": 0.7258392675483215, "grad_norm": 0.0, "learning_rate": 3.6899540108535183e-06, "loss": 1.0172, "step": 18551 }, { "epoch": 0.7258783942405509, "grad_norm": 0.0, "learning_rate": 3.68897097012205e-06, "loss": 1.1028, "step": 18552 }, { "epoch": 0.7259175209327804, "grad_norm": 0.0, "learning_rate": 3.6879880307378035e-06, "loss": 1.106, "step": 18553 }, { "epoch": 0.7259566476250098, "grad_norm": 0.0, "learning_rate": 3.6870051927165562e-06, "loss": 0.9484, "step": 18554 }, { "epoch": 0.7259957743172393, "grad_norm": 0.0, "learning_rate": 3.6860224560740956e-06, "loss": 1.0945, "step": 18555 }, { "epoch": 0.7260349010094687, "grad_norm": 0.0, "learning_rate": 3.685039820826205e-06, "loss": 1.0674, "step": 18556 }, { "epoch": 0.726074027701698, "grad_norm": 0.0, "learning_rate": 3.684057286988658e-06, "loss": 1.0325, "step": 18557 }, { "epoch": 0.7261131543939275, "grad_norm": 0.0, "learning_rate": 3.6830748545772377e-06, "loss": 0.9135, "step": 18558 }, { "epoch": 0.7261522810861569, "grad_norm": 0.0, "learning_rate": 3.6820925236077232e-06, "loss": 0.9507, "step": 18559 }, { "epoch": 0.7261914077783864, "grad_norm": 0.0, "learning_rate": 3.681110294095882e-06, "loss": 1.0515, "step": 18560 }, { "epoch": 0.7262305344706158, "grad_norm": 0.0, "learning_rate": 3.6801281660574915e-06, "loss": 0.973, "step": 18561 }, { "epoch": 0.7262696611628453, "grad_norm": 0.0, "learning_rate": 3.6791461395083238e-06, "loss": 0.9799, "step": 18562 }, { "epoch": 0.7263087878550747, "grad_norm": 0.0, "learning_rate": 3.678164214464152e-06, "loss": 0.9163, "step": 18563 }, { "epoch": 0.7263479145473042, "grad_norm": 0.0, "learning_rate": 3.6771823909407377e-06, "loss": 1.0049, "step": 18564 }, { "epoch": 0.7263870412395336, "grad_norm": 0.0, "learning_rate": 3.6762006689538543e-06, "loss": 1.019, "step": 18565 }, { "epoch": 0.7264261679317631, "grad_norm": 0.0, "learning_rate": 3.6752190485192575e-06, "loss": 1.0329, "step": 18566 }, { "epoch": 0.7264652946239925, "grad_norm": 0.0, "learning_rate": 3.6742375296527244e-06, "loss": 0.92, "step": 18567 }, { "epoch": 0.726504421316222, "grad_norm": 0.0, "learning_rate": 3.673256112370006e-06, "loss": 1.0453, "step": 18568 }, { "epoch": 0.7265435480084513, "grad_norm": 0.0, "learning_rate": 3.67227479668687e-06, "loss": 0.8965, "step": 18569 }, { "epoch": 0.7265826747006808, "grad_norm": 0.0, "learning_rate": 3.6712935826190656e-06, "loss": 1.0354, "step": 18570 }, { "epoch": 0.7266218013929102, "grad_norm": 0.0, "learning_rate": 3.6703124701823623e-06, "loss": 0.9711, "step": 18571 }, { "epoch": 0.7266609280851397, "grad_norm": 0.0, "learning_rate": 3.6693314593925054e-06, "loss": 0.8955, "step": 18572 }, { "epoch": 0.7267000547773691, "grad_norm": 0.0, "learning_rate": 3.6683505502652563e-06, "loss": 1.0054, "step": 18573 }, { "epoch": 0.7267391814695986, "grad_norm": 0.0, "learning_rate": 3.6673697428163568e-06, "loss": 0.9917, "step": 18574 }, { "epoch": 0.726778308161828, "grad_norm": 0.0, "learning_rate": 3.6663890370615705e-06, "loss": 1.0978, "step": 18575 }, { "epoch": 0.7268174348540575, "grad_norm": 0.0, "learning_rate": 3.6654084330166362e-06, "loss": 1.0928, "step": 18576 }, { "epoch": 0.7268565615462869, "grad_norm": 0.0, "learning_rate": 3.6644279306973083e-06, "loss": 1.0273, "step": 18577 }, { "epoch": 0.7268956882385164, "grad_norm": 0.0, "learning_rate": 3.6634475301193264e-06, "loss": 1.075, "step": 18578 }, { "epoch": 0.7269348149307457, "grad_norm": 0.0, "learning_rate": 3.6624672312984367e-06, "loss": 1.0994, "step": 18579 }, { "epoch": 0.7269739416229752, "grad_norm": 0.0, "learning_rate": 3.6614870342503806e-06, "loss": 0.9298, "step": 18580 }, { "epoch": 0.7270130683152046, "grad_norm": 0.0, "learning_rate": 3.6605069389909044e-06, "loss": 1.1204, "step": 18581 }, { "epoch": 0.7270521950074341, "grad_norm": 0.0, "learning_rate": 3.6595269455357395e-06, "loss": 0.9583, "step": 18582 }, { "epoch": 0.7270913216996635, "grad_norm": 0.0, "learning_rate": 3.6585470539006274e-06, "loss": 1.0483, "step": 18583 }, { "epoch": 0.727130448391893, "grad_norm": 0.0, "learning_rate": 3.6575672641013028e-06, "loss": 0.9148, "step": 18584 }, { "epoch": 0.7271695750841224, "grad_norm": 0.0, "learning_rate": 3.6565875761535032e-06, "loss": 0.9312, "step": 18585 }, { "epoch": 0.7272087017763518, "grad_norm": 0.0, "learning_rate": 3.6556079900729555e-06, "loss": 0.9705, "step": 18586 }, { "epoch": 0.7272478284685813, "grad_norm": 0.0, "learning_rate": 3.654628505875397e-06, "loss": 1.0225, "step": 18587 }, { "epoch": 0.7272869551608107, "grad_norm": 0.0, "learning_rate": 3.653649123576547e-06, "loss": 0.8918, "step": 18588 }, { "epoch": 0.7273260818530402, "grad_norm": 0.0, "learning_rate": 3.6526698431921458e-06, "loss": 1.0784, "step": 18589 }, { "epoch": 0.7273652085452695, "grad_norm": 0.0, "learning_rate": 3.6516906647379103e-06, "loss": 0.8969, "step": 18590 }, { "epoch": 0.727404335237499, "grad_norm": 0.0, "learning_rate": 3.650711588229572e-06, "loss": 1.0334, "step": 18591 }, { "epoch": 0.7274434619297284, "grad_norm": 0.0, "learning_rate": 3.649732613682845e-06, "loss": 1.0466, "step": 18592 }, { "epoch": 0.7274825886219579, "grad_norm": 0.0, "learning_rate": 3.6487537411134545e-06, "loss": 0.8871, "step": 18593 }, { "epoch": 0.7275217153141873, "grad_norm": 0.0, "learning_rate": 3.6477749705371215e-06, "loss": 0.9821, "step": 18594 }, { "epoch": 0.7275608420064168, "grad_norm": 0.0, "learning_rate": 3.646796301969565e-06, "loss": 1.0359, "step": 18595 }, { "epoch": 0.7275999686986462, "grad_norm": 0.0, "learning_rate": 3.6458177354264967e-06, "loss": 0.9586, "step": 18596 }, { "epoch": 0.7276390953908757, "grad_norm": 0.0, "learning_rate": 3.6448392709236324e-06, "loss": 1.0088, "step": 18597 }, { "epoch": 0.7276782220831051, "grad_norm": 0.0, "learning_rate": 3.6438609084766894e-06, "loss": 1.0083, "step": 18598 }, { "epoch": 0.7277173487753346, "grad_norm": 0.0, "learning_rate": 3.6428826481013725e-06, "loss": 0.9538, "step": 18599 }, { "epoch": 0.727756475467564, "grad_norm": 0.0, "learning_rate": 3.641904489813395e-06, "loss": 1.0193, "step": 18600 }, { "epoch": 0.7277956021597934, "grad_norm": 0.0, "learning_rate": 3.6409264336284635e-06, "loss": 1.011, "step": 18601 }, { "epoch": 0.7278347288520228, "grad_norm": 0.0, "learning_rate": 3.6399484795622874e-06, "loss": 0.8738, "step": 18602 }, { "epoch": 0.7278738555442523, "grad_norm": 0.0, "learning_rate": 3.638970627630567e-06, "loss": 0.9989, "step": 18603 }, { "epoch": 0.7279129822364817, "grad_norm": 0.0, "learning_rate": 3.6379928778490117e-06, "loss": 0.9689, "step": 18604 }, { "epoch": 0.7279521089287112, "grad_norm": 0.0, "learning_rate": 3.637015230233314e-06, "loss": 0.9969, "step": 18605 }, { "epoch": 0.7279912356209406, "grad_norm": 0.0, "learning_rate": 3.6360376847991785e-06, "loss": 0.9385, "step": 18606 }, { "epoch": 0.7280303623131701, "grad_norm": 0.0, "learning_rate": 3.635060241562304e-06, "loss": 1.1137, "step": 18607 }, { "epoch": 0.7280694890053995, "grad_norm": 0.0, "learning_rate": 3.6340829005383893e-06, "loss": 0.9856, "step": 18608 }, { "epoch": 0.728108615697629, "grad_norm": 0.0, "learning_rate": 3.6331056617431224e-06, "loss": 0.8272, "step": 18609 }, { "epoch": 0.7281477423898584, "grad_norm": 0.0, "learning_rate": 3.632128525192201e-06, "loss": 1.0792, "step": 18610 }, { "epoch": 0.7281868690820879, "grad_norm": 0.0, "learning_rate": 3.6311514909013155e-06, "loss": 0.9043, "step": 18611 }, { "epoch": 0.7282259957743172, "grad_norm": 0.0, "learning_rate": 3.63017455888616e-06, "loss": 1.1376, "step": 18612 }, { "epoch": 0.7282651224665467, "grad_norm": 0.0, "learning_rate": 3.629197729162417e-06, "loss": 1.0641, "step": 18613 }, { "epoch": 0.7283042491587761, "grad_norm": 0.0, "learning_rate": 3.6282210017457775e-06, "loss": 1.0013, "step": 18614 }, { "epoch": 0.7283433758510055, "grad_norm": 0.0, "learning_rate": 3.6272443766519183e-06, "loss": 1.0188, "step": 18615 }, { "epoch": 0.728382502543235, "grad_norm": 0.0, "learning_rate": 3.6262678538965357e-06, "loss": 0.9207, "step": 18616 }, { "epoch": 0.7284216292354644, "grad_norm": 0.0, "learning_rate": 3.6252914334953017e-06, "loss": 1.0642, "step": 18617 }, { "epoch": 0.7284607559276939, "grad_norm": 0.0, "learning_rate": 3.624315115463901e-06, "loss": 0.9276, "step": 18618 }, { "epoch": 0.7284998826199233, "grad_norm": 0.0, "learning_rate": 3.6233388998180054e-06, "loss": 0.927, "step": 18619 }, { "epoch": 0.7285390093121528, "grad_norm": 0.0, "learning_rate": 3.6223627865733025e-06, "loss": 0.9797, "step": 18620 }, { "epoch": 0.7285781360043821, "grad_norm": 0.0, "learning_rate": 3.6213867757454578e-06, "loss": 0.9789, "step": 18621 }, { "epoch": 0.7286172626966116, "grad_norm": 0.0, "learning_rate": 3.6204108673501526e-06, "loss": 1.0113, "step": 18622 }, { "epoch": 0.728656389388841, "grad_norm": 0.0, "learning_rate": 3.6194350614030514e-06, "loss": 1.0945, "step": 18623 }, { "epoch": 0.7286955160810705, "grad_norm": 0.0, "learning_rate": 3.618459357919828e-06, "loss": 0.9873, "step": 18624 }, { "epoch": 0.7287346427732999, "grad_norm": 0.0, "learning_rate": 3.6174837569161513e-06, "loss": 1.0565, "step": 18625 }, { "epoch": 0.7287737694655294, "grad_norm": 0.0, "learning_rate": 3.6165082584076906e-06, "loss": 0.976, "step": 18626 }, { "epoch": 0.7288128961577588, "grad_norm": 0.0, "learning_rate": 3.6155328624101036e-06, "loss": 0.9516, "step": 18627 }, { "epoch": 0.7288520228499883, "grad_norm": 0.0, "learning_rate": 3.614557568939061e-06, "loss": 1.0144, "step": 18628 }, { "epoch": 0.7288911495422177, "grad_norm": 0.0, "learning_rate": 3.613582378010221e-06, "loss": 0.9719, "step": 18629 }, { "epoch": 0.7289302762344472, "grad_norm": 0.0, "learning_rate": 3.612607289639248e-06, "loss": 1.0866, "step": 18630 }, { "epoch": 0.7289694029266766, "grad_norm": 0.0, "learning_rate": 3.611632303841797e-06, "loss": 0.9716, "step": 18631 }, { "epoch": 0.729008529618906, "grad_norm": 0.0, "learning_rate": 3.6106574206335244e-06, "loss": 1.0997, "step": 18632 }, { "epoch": 0.7290476563111354, "grad_norm": 0.0, "learning_rate": 3.6096826400300875e-06, "loss": 0.9918, "step": 18633 }, { "epoch": 0.7290867830033649, "grad_norm": 0.0, "learning_rate": 3.6087079620471443e-06, "loss": 1.0207, "step": 18634 }, { "epoch": 0.7291259096955943, "grad_norm": 0.0, "learning_rate": 3.6077333867003382e-06, "loss": 1.0304, "step": 18635 }, { "epoch": 0.7291650363878238, "grad_norm": 0.0, "learning_rate": 3.606758914005327e-06, "loss": 0.9539, "step": 18636 }, { "epoch": 0.7292041630800532, "grad_norm": 0.0, "learning_rate": 3.605784543977754e-06, "loss": 0.8312, "step": 18637 }, { "epoch": 0.7292432897722827, "grad_norm": 0.0, "learning_rate": 3.6048102766332683e-06, "loss": 0.8896, "step": 18638 }, { "epoch": 0.7292824164645121, "grad_norm": 0.0, "learning_rate": 3.6038361119875154e-06, "loss": 0.9284, "step": 18639 }, { "epoch": 0.7293215431567416, "grad_norm": 0.0, "learning_rate": 3.602862050056144e-06, "loss": 0.9865, "step": 18640 }, { "epoch": 0.729360669848971, "grad_norm": 0.0, "learning_rate": 3.6018880908547884e-06, "loss": 1.1137, "step": 18641 }, { "epoch": 0.7293997965412004, "grad_norm": 0.0, "learning_rate": 3.6009142343990934e-06, "loss": 0.9693, "step": 18642 }, { "epoch": 0.7294389232334298, "grad_norm": 0.0, "learning_rate": 3.5999404807047e-06, "loss": 1.0441, "step": 18643 }, { "epoch": 0.7294780499256592, "grad_norm": 0.0, "learning_rate": 3.5989668297872392e-06, "loss": 1.0304, "step": 18644 }, { "epoch": 0.7295171766178887, "grad_norm": 0.0, "learning_rate": 3.59799328166235e-06, "loss": 0.8556, "step": 18645 }, { "epoch": 0.7295563033101181, "grad_norm": 0.0, "learning_rate": 3.5970198363456665e-06, "loss": 1.0248, "step": 18646 }, { "epoch": 0.7295954300023476, "grad_norm": 0.0, "learning_rate": 3.596046493852825e-06, "loss": 0.9718, "step": 18647 }, { "epoch": 0.729634556694577, "grad_norm": 0.0, "learning_rate": 3.5950732541994494e-06, "loss": 0.9668, "step": 18648 }, { "epoch": 0.7296736833868065, "grad_norm": 0.0, "learning_rate": 3.5941001174011738e-06, "loss": 0.9795, "step": 18649 }, { "epoch": 0.7297128100790359, "grad_norm": 0.0, "learning_rate": 3.5931270834736164e-06, "loss": 0.9237, "step": 18650 }, { "epoch": 0.7297519367712654, "grad_norm": 0.0, "learning_rate": 3.5921541524324165e-06, "loss": 0.9897, "step": 18651 }, { "epoch": 0.7297910634634948, "grad_norm": 0.0, "learning_rate": 3.591181324293189e-06, "loss": 1.0971, "step": 18652 }, { "epoch": 0.7298301901557243, "grad_norm": 0.0, "learning_rate": 3.590208599071562e-06, "loss": 1.0456, "step": 18653 }, { "epoch": 0.7298693168479536, "grad_norm": 0.0, "learning_rate": 3.589235976783149e-06, "loss": 0.8835, "step": 18654 }, { "epoch": 0.7299084435401831, "grad_norm": 0.0, "learning_rate": 3.5882634574435737e-06, "loss": 1.0286, "step": 18655 }, { "epoch": 0.7299475702324125, "grad_norm": 0.0, "learning_rate": 3.5872910410684525e-06, "loss": 1.01, "step": 18656 }, { "epoch": 0.729986696924642, "grad_norm": 0.0, "learning_rate": 3.5863187276734045e-06, "loss": 0.8284, "step": 18657 }, { "epoch": 0.7300258236168714, "grad_norm": 0.0, "learning_rate": 3.5853465172740387e-06, "loss": 0.9449, "step": 18658 }, { "epoch": 0.7300649503091009, "grad_norm": 0.0, "learning_rate": 3.584374409885969e-06, "loss": 0.9694, "step": 18659 }, { "epoch": 0.7301040770013303, "grad_norm": 0.0, "learning_rate": 3.5834024055248072e-06, "loss": 0.9373, "step": 18660 }, { "epoch": 0.7301432036935598, "grad_norm": 0.0, "learning_rate": 3.5824305042061656e-06, "loss": 0.9794, "step": 18661 }, { "epoch": 0.7301823303857892, "grad_norm": 0.0, "learning_rate": 3.5814587059456453e-06, "loss": 1.0778, "step": 18662 }, { "epoch": 0.7302214570780187, "grad_norm": 0.0, "learning_rate": 3.5804870107588585e-06, "loss": 0.9858, "step": 18663 }, { "epoch": 0.730260583770248, "grad_norm": 0.0, "learning_rate": 3.579515418661399e-06, "loss": 1.0226, "step": 18664 }, { "epoch": 0.7302997104624775, "grad_norm": 0.0, "learning_rate": 3.578543929668884e-06, "loss": 1.0191, "step": 18665 }, { "epoch": 0.7303388371547069, "grad_norm": 0.0, "learning_rate": 3.5775725437969033e-06, "loss": 1.0194, "step": 18666 }, { "epoch": 0.7303779638469364, "grad_norm": 0.0, "learning_rate": 3.5766012610610635e-06, "loss": 0.9759, "step": 18667 }, { "epoch": 0.7304170905391658, "grad_norm": 0.0, "learning_rate": 3.575630081476952e-06, "loss": 1.0704, "step": 18668 }, { "epoch": 0.7304562172313953, "grad_norm": 0.0, "learning_rate": 3.574659005060177e-06, "loss": 1.0022, "step": 18669 }, { "epoch": 0.7304953439236247, "grad_norm": 0.0, "learning_rate": 3.5736880318263243e-06, "loss": 0.8512, "step": 18670 }, { "epoch": 0.7305344706158541, "grad_norm": 0.0, "learning_rate": 3.572717161790993e-06, "loss": 0.9576, "step": 18671 }, { "epoch": 0.7305735973080836, "grad_norm": 0.0, "learning_rate": 3.5717463949697663e-06, "loss": 0.9981, "step": 18672 }, { "epoch": 0.730612724000313, "grad_norm": 0.0, "learning_rate": 3.5707757313782366e-06, "loss": 0.8874, "step": 18673 }, { "epoch": 0.7306518506925425, "grad_norm": 0.0, "learning_rate": 3.5698051710319936e-06, "loss": 1.0547, "step": 18674 }, { "epoch": 0.7306909773847718, "grad_norm": 0.0, "learning_rate": 3.568834713946625e-06, "loss": 1.1039, "step": 18675 }, { "epoch": 0.7307301040770013, "grad_norm": 0.0, "learning_rate": 3.567864360137708e-06, "loss": 0.9441, "step": 18676 }, { "epoch": 0.7307692307692307, "grad_norm": 0.0, "learning_rate": 3.566894109620831e-06, "loss": 0.9496, "step": 18677 }, { "epoch": 0.7308083574614602, "grad_norm": 0.0, "learning_rate": 3.565923962411573e-06, "loss": 1.0225, "step": 18678 }, { "epoch": 0.7308474841536896, "grad_norm": 0.0, "learning_rate": 3.5649539185255167e-06, "loss": 0.9185, "step": 18679 }, { "epoch": 0.7308866108459191, "grad_norm": 0.0, "learning_rate": 3.5639839779782336e-06, "loss": 0.9585, "step": 18680 }, { "epoch": 0.7309257375381485, "grad_norm": 0.0, "learning_rate": 3.5630141407853068e-06, "loss": 0.9841, "step": 18681 }, { "epoch": 0.730964864230378, "grad_norm": 0.0, "learning_rate": 3.562044406962303e-06, "loss": 1.0084, "step": 18682 }, { "epoch": 0.7310039909226074, "grad_norm": 0.0, "learning_rate": 3.561074776524799e-06, "loss": 0.8344, "step": 18683 }, { "epoch": 0.7310431176148369, "grad_norm": 0.0, "learning_rate": 3.560105249488366e-06, "loss": 0.8331, "step": 18684 }, { "epoch": 0.7310822443070663, "grad_norm": 0.0, "learning_rate": 3.559135825868576e-06, "loss": 0.943, "step": 18685 }, { "epoch": 0.7311213709992957, "grad_norm": 0.0, "learning_rate": 3.5581665056809912e-06, "loss": 0.9916, "step": 18686 }, { "epoch": 0.7311604976915251, "grad_norm": 0.0, "learning_rate": 3.557197288941179e-06, "loss": 0.9001, "step": 18687 }, { "epoch": 0.7311996243837546, "grad_norm": 0.0, "learning_rate": 3.556228175664709e-06, "loss": 0.9999, "step": 18688 }, { "epoch": 0.731238751075984, "grad_norm": 0.0, "learning_rate": 3.5552591658671365e-06, "loss": 1.0369, "step": 18689 }, { "epoch": 0.7312778777682135, "grad_norm": 0.0, "learning_rate": 3.5542902595640273e-06, "loss": 0.9671, "step": 18690 }, { "epoch": 0.7313170044604429, "grad_norm": 0.0, "learning_rate": 3.5533214567709383e-06, "loss": 0.8964, "step": 18691 }, { "epoch": 0.7313561311526724, "grad_norm": 0.0, "learning_rate": 3.552352757503432e-06, "loss": 1.0366, "step": 18692 }, { "epoch": 0.7313952578449018, "grad_norm": 0.0, "learning_rate": 3.5513841617770583e-06, "loss": 0.9774, "step": 18693 }, { "epoch": 0.7314343845371313, "grad_norm": 0.0, "learning_rate": 3.5504156696073767e-06, "loss": 0.9022, "step": 18694 }, { "epoch": 0.7314735112293607, "grad_norm": 0.0, "learning_rate": 3.5494472810099325e-06, "loss": 0.8968, "step": 18695 }, { "epoch": 0.7315126379215902, "grad_norm": 0.0, "learning_rate": 3.5484789960002876e-06, "loss": 1.0055, "step": 18696 }, { "epoch": 0.7315517646138195, "grad_norm": 0.0, "learning_rate": 3.547510814593982e-06, "loss": 1.0938, "step": 18697 }, { "epoch": 0.731590891306049, "grad_norm": 0.0, "learning_rate": 3.5465427368065717e-06, "loss": 0.9459, "step": 18698 }, { "epoch": 0.7316300179982784, "grad_norm": 0.0, "learning_rate": 3.5455747626535907e-06, "loss": 1.0866, "step": 18699 }, { "epoch": 0.7316691446905078, "grad_norm": 0.0, "learning_rate": 3.5446068921505994e-06, "loss": 0.9555, "step": 18700 }, { "epoch": 0.7317082713827373, "grad_norm": 0.0, "learning_rate": 3.5436391253131275e-06, "loss": 0.9703, "step": 18701 }, { "epoch": 0.7317473980749667, "grad_norm": 0.0, "learning_rate": 3.542671462156725e-06, "loss": 1.0636, "step": 18702 }, { "epoch": 0.7317865247671962, "grad_norm": 0.0, "learning_rate": 3.5417039026969246e-06, "loss": 1.0349, "step": 18703 }, { "epoch": 0.7318256514594256, "grad_norm": 0.0, "learning_rate": 3.5407364469492657e-06, "loss": 0.9801, "step": 18704 }, { "epoch": 0.7318647781516551, "grad_norm": 0.0, "learning_rate": 3.539769094929286e-06, "loss": 0.8907, "step": 18705 }, { "epoch": 0.7319039048438845, "grad_norm": 0.0, "learning_rate": 3.5388018466525233e-06, "loss": 1.1174, "step": 18706 }, { "epoch": 0.731943031536114, "grad_norm": 0.0, "learning_rate": 3.5378347021345026e-06, "loss": 0.8555, "step": 18707 }, { "epoch": 0.7319821582283433, "grad_norm": 0.0, "learning_rate": 3.5368676613907595e-06, "loss": 0.9969, "step": 18708 }, { "epoch": 0.7320212849205728, "grad_norm": 0.0, "learning_rate": 3.5359007244368225e-06, "loss": 1.0062, "step": 18709 }, { "epoch": 0.7320604116128022, "grad_norm": 0.0, "learning_rate": 3.5349338912882238e-06, "loss": 0.946, "step": 18710 }, { "epoch": 0.7320995383050317, "grad_norm": 0.0, "learning_rate": 3.533967161960481e-06, "loss": 0.993, "step": 18711 }, { "epoch": 0.7321386649972611, "grad_norm": 0.0, "learning_rate": 3.5330005364691276e-06, "loss": 0.9599, "step": 18712 }, { "epoch": 0.7321777916894906, "grad_norm": 0.0, "learning_rate": 3.532034014829675e-06, "loss": 1.0629, "step": 18713 }, { "epoch": 0.73221691838172, "grad_norm": 0.0, "learning_rate": 3.5310675970576593e-06, "loss": 0.8217, "step": 18714 }, { "epoch": 0.7322560450739495, "grad_norm": 0.0, "learning_rate": 3.5301012831685866e-06, "loss": 1.0504, "step": 18715 }, { "epoch": 0.7322951717661789, "grad_norm": 0.0, "learning_rate": 3.5291350731779848e-06, "loss": 0.8068, "step": 18716 }, { "epoch": 0.7323342984584084, "grad_norm": 0.0, "learning_rate": 3.5281689671013574e-06, "loss": 1.0668, "step": 18717 }, { "epoch": 0.7323734251506377, "grad_norm": 0.0, "learning_rate": 3.527202964954235e-06, "loss": 0.9335, "step": 18718 }, { "epoch": 0.7324125518428672, "grad_norm": 0.0, "learning_rate": 3.526237066752117e-06, "loss": 1.012, "step": 18719 }, { "epoch": 0.7324516785350966, "grad_norm": 0.0, "learning_rate": 3.5252712725105245e-06, "loss": 0.9618, "step": 18720 }, { "epoch": 0.7324908052273261, "grad_norm": 0.0, "learning_rate": 3.5243055822449577e-06, "loss": 1.0019, "step": 18721 }, { "epoch": 0.7325299319195555, "grad_norm": 0.0, "learning_rate": 3.523339995970929e-06, "loss": 1.0292, "step": 18722 }, { "epoch": 0.732569058611785, "grad_norm": 0.0, "learning_rate": 3.5223745137039446e-06, "loss": 0.9477, "step": 18723 }, { "epoch": 0.7326081853040144, "grad_norm": 0.0, "learning_rate": 3.5214091354595125e-06, "loss": 0.9987, "step": 18724 }, { "epoch": 0.7326473119962439, "grad_norm": 0.0, "learning_rate": 3.5204438612531264e-06, "loss": 0.9572, "step": 18725 }, { "epoch": 0.7326864386884733, "grad_norm": 0.0, "learning_rate": 3.5194786911002944e-06, "loss": 0.9754, "step": 18726 }, { "epoch": 0.7327255653807028, "grad_norm": 0.0, "learning_rate": 3.5185136250165163e-06, "loss": 1.0526, "step": 18727 }, { "epoch": 0.7327646920729322, "grad_norm": 0.0, "learning_rate": 3.517548663017285e-06, "loss": 0.9366, "step": 18728 }, { "epoch": 0.7328038187651615, "grad_norm": 0.0, "learning_rate": 3.5165838051180988e-06, "loss": 0.9742, "step": 18729 }, { "epoch": 0.732842945457391, "grad_norm": 0.0, "learning_rate": 3.5156190513344556e-06, "loss": 0.985, "step": 18730 }, { "epoch": 0.7328820721496204, "grad_norm": 0.0, "learning_rate": 3.5146544016818417e-06, "loss": 0.8951, "step": 18731 }, { "epoch": 0.7329211988418499, "grad_norm": 0.0, "learning_rate": 3.5136898561757517e-06, "loss": 0.9468, "step": 18732 }, { "epoch": 0.7329603255340793, "grad_norm": 0.0, "learning_rate": 3.512725414831678e-06, "loss": 0.9051, "step": 18733 }, { "epoch": 0.7329994522263088, "grad_norm": 0.0, "learning_rate": 3.5117610776651023e-06, "loss": 0.8555, "step": 18734 }, { "epoch": 0.7330385789185382, "grad_norm": 0.0, "learning_rate": 3.510796844691513e-06, "loss": 1.0214, "step": 18735 }, { "epoch": 0.7330777056107677, "grad_norm": 0.0, "learning_rate": 3.5098327159263957e-06, "loss": 0.8996, "step": 18736 }, { "epoch": 0.7331168323029971, "grad_norm": 0.0, "learning_rate": 3.5088686913852353e-06, "loss": 0.8393, "step": 18737 }, { "epoch": 0.7331559589952266, "grad_norm": 0.0, "learning_rate": 3.5079047710835055e-06, "loss": 0.9333, "step": 18738 }, { "epoch": 0.733195085687456, "grad_norm": 0.0, "learning_rate": 3.506940955036695e-06, "loss": 0.9893, "step": 18739 }, { "epoch": 0.7332342123796854, "grad_norm": 0.0, "learning_rate": 3.505977243260269e-06, "loss": 0.9117, "step": 18740 }, { "epoch": 0.7332733390719148, "grad_norm": 0.0, "learning_rate": 3.5050136357697174e-06, "loss": 1.0518, "step": 18741 }, { "epoch": 0.7333124657641443, "grad_norm": 0.0, "learning_rate": 3.5040501325805055e-06, "loss": 0.8963, "step": 18742 }, { "epoch": 0.7333515924563737, "grad_norm": 0.0, "learning_rate": 3.503086733708111e-06, "loss": 0.9951, "step": 18743 }, { "epoch": 0.7333907191486032, "grad_norm": 0.0, "learning_rate": 3.502123439167997e-06, "loss": 0.9442, "step": 18744 }, { "epoch": 0.7334298458408326, "grad_norm": 0.0, "learning_rate": 3.5011602489756437e-06, "loss": 0.9785, "step": 18745 }, { "epoch": 0.7334689725330621, "grad_norm": 0.0, "learning_rate": 3.5001971631465117e-06, "loss": 1.0082, "step": 18746 }, { "epoch": 0.7335080992252915, "grad_norm": 0.0, "learning_rate": 3.4992341816960693e-06, "loss": 0.993, "step": 18747 }, { "epoch": 0.733547225917521, "grad_norm": 0.0, "learning_rate": 3.4982713046397755e-06, "loss": 0.9693, "step": 18748 }, { "epoch": 0.7335863526097504, "grad_norm": 0.0, "learning_rate": 3.4973085319931034e-06, "loss": 0.9939, "step": 18749 }, { "epoch": 0.7336254793019799, "grad_norm": 0.0, "learning_rate": 3.496345863771504e-06, "loss": 0.8938, "step": 18750 }, { "epoch": 0.7336646059942092, "grad_norm": 0.0, "learning_rate": 3.4953832999904446e-06, "loss": 0.97, "step": 18751 }, { "epoch": 0.7337037326864387, "grad_norm": 0.0, "learning_rate": 3.494420840665376e-06, "loss": 1.0813, "step": 18752 }, { "epoch": 0.7337428593786681, "grad_norm": 0.0, "learning_rate": 3.493458485811756e-06, "loss": 0.9327, "step": 18753 }, { "epoch": 0.7337819860708976, "grad_norm": 0.0, "learning_rate": 3.4924962354450388e-06, "loss": 0.982, "step": 18754 }, { "epoch": 0.733821112763127, "grad_norm": 0.0, "learning_rate": 3.4915340895806816e-06, "loss": 0.9325, "step": 18755 }, { "epoch": 0.7338602394553564, "grad_norm": 0.0, "learning_rate": 3.4905720482341287e-06, "loss": 0.9654, "step": 18756 }, { "epoch": 0.7338993661475859, "grad_norm": 0.0, "learning_rate": 3.4896101114208313e-06, "loss": 0.8959, "step": 18757 }, { "epoch": 0.7339384928398153, "grad_norm": 0.0, "learning_rate": 3.488648279156237e-06, "loss": 1.0574, "step": 18758 }, { "epoch": 0.7339776195320448, "grad_norm": 0.0, "learning_rate": 3.4876865514557966e-06, "loss": 0.9558, "step": 18759 }, { "epoch": 0.7340167462242742, "grad_norm": 0.0, "learning_rate": 3.486724928334946e-06, "loss": 0.8591, "step": 18760 }, { "epoch": 0.7340558729165036, "grad_norm": 0.0, "learning_rate": 3.4857634098091353e-06, "loss": 1.0481, "step": 18761 }, { "epoch": 0.734094999608733, "grad_norm": 0.0, "learning_rate": 3.484801995893794e-06, "loss": 0.938, "step": 18762 }, { "epoch": 0.7341341263009625, "grad_norm": 0.0, "learning_rate": 3.483840686604375e-06, "loss": 0.9955, "step": 18763 }, { "epoch": 0.7341732529931919, "grad_norm": 0.0, "learning_rate": 3.482879481956307e-06, "loss": 0.9977, "step": 18764 }, { "epoch": 0.7342123796854214, "grad_norm": 0.0, "learning_rate": 3.4819183819650303e-06, "loss": 0.9591, "step": 18765 }, { "epoch": 0.7342515063776508, "grad_norm": 0.0, "learning_rate": 3.4809573866459744e-06, "loss": 0.9593, "step": 18766 }, { "epoch": 0.7342906330698803, "grad_norm": 0.0, "learning_rate": 3.4799964960145738e-06, "loss": 0.9998, "step": 18767 }, { "epoch": 0.7343297597621097, "grad_norm": 0.0, "learning_rate": 3.4790357100862604e-06, "loss": 0.9445, "step": 18768 }, { "epoch": 0.7343688864543392, "grad_norm": 0.0, "learning_rate": 3.478075028876464e-06, "loss": 1.0154, "step": 18769 }, { "epoch": 0.7344080131465686, "grad_norm": 0.0, "learning_rate": 3.4771144524006072e-06, "loss": 0.8857, "step": 18770 }, { "epoch": 0.7344471398387981, "grad_norm": 0.0, "learning_rate": 3.4761539806741194e-06, "loss": 1.0593, "step": 18771 }, { "epoch": 0.7344862665310274, "grad_norm": 0.0, "learning_rate": 3.4751936137124265e-06, "loss": 0.8766, "step": 18772 }, { "epoch": 0.7345253932232569, "grad_norm": 0.0, "learning_rate": 3.4742333515309457e-06, "loss": 1.0251, "step": 18773 }, { "epoch": 0.7345645199154863, "grad_norm": 0.0, "learning_rate": 3.473273194145099e-06, "loss": 0.9702, "step": 18774 }, { "epoch": 0.7346036466077158, "grad_norm": 0.0, "learning_rate": 3.472313141570307e-06, "loss": 0.9907, "step": 18775 }, { "epoch": 0.7346427732999452, "grad_norm": 0.0, "learning_rate": 3.471353193821989e-06, "loss": 1.0269, "step": 18776 }, { "epoch": 0.7346818999921747, "grad_norm": 0.0, "learning_rate": 3.470393350915555e-06, "loss": 0.9456, "step": 18777 }, { "epoch": 0.7347210266844041, "grad_norm": 0.0, "learning_rate": 3.469433612866425e-06, "loss": 1.0518, "step": 18778 }, { "epoch": 0.7347601533766336, "grad_norm": 0.0, "learning_rate": 3.4684739796900045e-06, "loss": 1.0717, "step": 18779 }, { "epoch": 0.734799280068863, "grad_norm": 0.0, "learning_rate": 3.4675144514017078e-06, "loss": 0.8055, "step": 18780 }, { "epoch": 0.7348384067610925, "grad_norm": 0.0, "learning_rate": 3.4665550280169435e-06, "loss": 1.0445, "step": 18781 }, { "epoch": 0.7348775334533219, "grad_norm": 0.0, "learning_rate": 3.4655957095511206e-06, "loss": 0.8891, "step": 18782 }, { "epoch": 0.7349166601455513, "grad_norm": 0.0, "learning_rate": 3.464636496019641e-06, "loss": 0.9402, "step": 18783 }, { "epoch": 0.7349557868377807, "grad_norm": 0.0, "learning_rate": 3.463677387437908e-06, "loss": 1.0401, "step": 18784 }, { "epoch": 0.7349949135300101, "grad_norm": 0.0, "learning_rate": 3.4627183838213274e-06, "loss": 0.9835, "step": 18785 }, { "epoch": 0.7350340402222396, "grad_norm": 0.0, "learning_rate": 3.461759485185301e-06, "loss": 1.0381, "step": 18786 }, { "epoch": 0.735073166914469, "grad_norm": 0.0, "learning_rate": 3.46080069154522e-06, "loss": 1.0397, "step": 18787 }, { "epoch": 0.7351122936066985, "grad_norm": 0.0, "learning_rate": 3.4598420029164905e-06, "loss": 0.9728, "step": 18788 }, { "epoch": 0.7351514202989279, "grad_norm": 0.0, "learning_rate": 3.458883419314495e-06, "loss": 0.9579, "step": 18789 }, { "epoch": 0.7351905469911574, "grad_norm": 0.0, "learning_rate": 3.4579249407546435e-06, "loss": 1.0269, "step": 18790 }, { "epoch": 0.7352296736833868, "grad_norm": 0.0, "learning_rate": 3.4569665672523155e-06, "loss": 1.0291, "step": 18791 }, { "epoch": 0.7352688003756163, "grad_norm": 0.0, "learning_rate": 3.4560082988229093e-06, "loss": 0.9481, "step": 18792 }, { "epoch": 0.7353079270678456, "grad_norm": 0.0, "learning_rate": 3.4550501354818023e-06, "loss": 0.9423, "step": 18793 }, { "epoch": 0.7353470537600751, "grad_norm": 0.0, "learning_rate": 3.4540920772443966e-06, "loss": 0.9521, "step": 18794 }, { "epoch": 0.7353861804523045, "grad_norm": 0.0, "learning_rate": 3.4531341241260653e-06, "loss": 1.087, "step": 18795 }, { "epoch": 0.735425307144534, "grad_norm": 0.0, "learning_rate": 3.4521762761421996e-06, "loss": 0.9325, "step": 18796 }, { "epoch": 0.7354644338367634, "grad_norm": 0.0, "learning_rate": 3.451218533308176e-06, "loss": 0.9924, "step": 18797 }, { "epoch": 0.7355035605289929, "grad_norm": 0.0, "learning_rate": 3.4502608956393756e-06, "loss": 1.012, "step": 18798 }, { "epoch": 0.7355426872212223, "grad_norm": 0.0, "learning_rate": 3.4493033631511786e-06, "loss": 0.9591, "step": 18799 }, { "epoch": 0.7355818139134518, "grad_norm": 0.0, "learning_rate": 3.4483459358589634e-06, "loss": 0.8812, "step": 18800 }, { "epoch": 0.7356209406056812, "grad_norm": 0.0, "learning_rate": 3.4473886137781e-06, "loss": 0.8331, "step": 18801 }, { "epoch": 0.7356600672979107, "grad_norm": 0.0, "learning_rate": 3.446431396923965e-06, "loss": 0.9817, "step": 18802 }, { "epoch": 0.73569919399014, "grad_norm": 0.0, "learning_rate": 3.4454742853119293e-06, "loss": 1.1218, "step": 18803 }, { "epoch": 0.7357383206823696, "grad_norm": 0.0, "learning_rate": 3.4445172789573666e-06, "loss": 1.1722, "step": 18804 }, { "epoch": 0.7357774473745989, "grad_norm": 0.0, "learning_rate": 3.4435603778756386e-06, "loss": 1.0225, "step": 18805 }, { "epoch": 0.7358165740668284, "grad_norm": 0.0, "learning_rate": 3.4426035820821156e-06, "loss": 0.9876, "step": 18806 }, { "epoch": 0.7358557007590578, "grad_norm": 0.0, "learning_rate": 3.4416468915921617e-06, "loss": 0.9468, "step": 18807 }, { "epoch": 0.7358948274512873, "grad_norm": 0.0, "learning_rate": 3.440690306421144e-06, "loss": 1.0018, "step": 18808 }, { "epoch": 0.7359339541435167, "grad_norm": 0.0, "learning_rate": 3.4397338265844184e-06, "loss": 0.9079, "step": 18809 }, { "epoch": 0.7359730808357462, "grad_norm": 0.0, "learning_rate": 3.4387774520973495e-06, "loss": 0.8796, "step": 18810 }, { "epoch": 0.7360122075279756, "grad_norm": 0.0, "learning_rate": 3.4378211829752893e-06, "loss": 1.0471, "step": 18811 }, { "epoch": 0.7360513342202051, "grad_norm": 0.0, "learning_rate": 3.4368650192335985e-06, "loss": 0.9664, "step": 18812 }, { "epoch": 0.7360904609124345, "grad_norm": 0.0, "learning_rate": 3.4359089608876316e-06, "loss": 1.0112, "step": 18813 }, { "epoch": 0.7361295876046638, "grad_norm": 0.0, "learning_rate": 3.434953007952745e-06, "loss": 0.9705, "step": 18814 }, { "epoch": 0.7361687142968933, "grad_norm": 0.0, "learning_rate": 3.4339971604442823e-06, "loss": 1.0536, "step": 18815 }, { "epoch": 0.7362078409891227, "grad_norm": 0.0, "learning_rate": 3.4330414183775985e-06, "loss": 0.9344, "step": 18816 }, { "epoch": 0.7362469676813522, "grad_norm": 0.0, "learning_rate": 3.4320857817680443e-06, "loss": 0.9611, "step": 18817 }, { "epoch": 0.7362860943735816, "grad_norm": 0.0, "learning_rate": 3.4311302506309573e-06, "loss": 1.0045, "step": 18818 }, { "epoch": 0.7363252210658111, "grad_norm": 0.0, "learning_rate": 3.430174824981689e-06, "loss": 0.9837, "step": 18819 }, { "epoch": 0.7363643477580405, "grad_norm": 0.0, "learning_rate": 3.4292195048355804e-06, "loss": 0.985, "step": 18820 }, { "epoch": 0.73640347445027, "grad_norm": 0.0, "learning_rate": 3.4282642902079755e-06, "loss": 0.9666, "step": 18821 }, { "epoch": 0.7364426011424994, "grad_norm": 0.0, "learning_rate": 3.427309181114208e-06, "loss": 0.9754, "step": 18822 }, { "epoch": 0.7364817278347289, "grad_norm": 0.0, "learning_rate": 3.4263541775696195e-06, "loss": 1.0406, "step": 18823 }, { "epoch": 0.7365208545269583, "grad_norm": 0.0, "learning_rate": 3.4253992795895454e-06, "loss": 1.0499, "step": 18824 }, { "epoch": 0.7365599812191878, "grad_norm": 0.0, "learning_rate": 3.4244444871893236e-06, "loss": 0.8846, "step": 18825 }, { "epoch": 0.7365991079114171, "grad_norm": 0.0, "learning_rate": 3.423489800384281e-06, "loss": 0.9861, "step": 18826 }, { "epoch": 0.7366382346036466, "grad_norm": 0.0, "learning_rate": 3.422535219189753e-06, "loss": 0.8671, "step": 18827 }, { "epoch": 0.736677361295876, "grad_norm": 0.0, "learning_rate": 3.421580743621066e-06, "loss": 0.9047, "step": 18828 }, { "epoch": 0.7367164879881055, "grad_norm": 0.0, "learning_rate": 3.4206263736935486e-06, "loss": 0.8688, "step": 18829 }, { "epoch": 0.7367556146803349, "grad_norm": 0.0, "learning_rate": 3.419672109422527e-06, "loss": 0.9736, "step": 18830 }, { "epoch": 0.7367947413725644, "grad_norm": 0.0, "learning_rate": 3.418717950823328e-06, "loss": 0.9256, "step": 18831 }, { "epoch": 0.7368338680647938, "grad_norm": 0.0, "learning_rate": 3.4177638979112706e-06, "loss": 1.13, "step": 18832 }, { "epoch": 0.7368729947570233, "grad_norm": 0.0, "learning_rate": 3.416809950701675e-06, "loss": 0.9699, "step": 18833 }, { "epoch": 0.7369121214492527, "grad_norm": 0.0, "learning_rate": 3.415856109209864e-06, "loss": 0.9832, "step": 18834 }, { "epoch": 0.7369512481414822, "grad_norm": 0.0, "learning_rate": 3.4149023734511553e-06, "loss": 0.9079, "step": 18835 }, { "epoch": 0.7369903748337115, "grad_norm": 0.0, "learning_rate": 3.4139487434408615e-06, "loss": 1.0406, "step": 18836 }, { "epoch": 0.737029501525941, "grad_norm": 0.0, "learning_rate": 3.4129952191942995e-06, "loss": 1.1104, "step": 18837 }, { "epoch": 0.7370686282181704, "grad_norm": 0.0, "learning_rate": 3.412041800726775e-06, "loss": 1.0585, "step": 18838 }, { "epoch": 0.7371077549103999, "grad_norm": 0.0, "learning_rate": 3.41108848805361e-06, "loss": 0.9292, "step": 18839 }, { "epoch": 0.7371468816026293, "grad_norm": 0.0, "learning_rate": 3.4101352811901044e-06, "loss": 0.9647, "step": 18840 }, { "epoch": 0.7371860082948588, "grad_norm": 0.0, "learning_rate": 3.4091821801515724e-06, "loss": 0.8662, "step": 18841 }, { "epoch": 0.7372251349870882, "grad_norm": 0.0, "learning_rate": 3.408229184953308e-06, "loss": 0.8578, "step": 18842 }, { "epoch": 0.7372642616793176, "grad_norm": 0.0, "learning_rate": 3.40727629561063e-06, "loss": 0.9692, "step": 18843 }, { "epoch": 0.7373033883715471, "grad_norm": 0.0, "learning_rate": 3.4063235121388305e-06, "loss": 0.9517, "step": 18844 }, { "epoch": 0.7373425150637765, "grad_norm": 0.0, "learning_rate": 3.4053708345532166e-06, "loss": 0.9917, "step": 18845 }, { "epoch": 0.737381641756006, "grad_norm": 0.0, "learning_rate": 3.4044182628690803e-06, "loss": 0.8956, "step": 18846 }, { "epoch": 0.7374207684482353, "grad_norm": 0.0, "learning_rate": 3.4034657971017215e-06, "loss": 0.9979, "step": 18847 }, { "epoch": 0.7374598951404648, "grad_norm": 0.0, "learning_rate": 3.402513437266436e-06, "loss": 1.0038, "step": 18848 }, { "epoch": 0.7374990218326942, "grad_norm": 0.0, "learning_rate": 3.4015611833785213e-06, "loss": 0.9485, "step": 18849 }, { "epoch": 0.7375381485249237, "grad_norm": 0.0, "learning_rate": 3.4006090354532617e-06, "loss": 1.0115, "step": 18850 }, { "epoch": 0.7375772752171531, "grad_norm": 0.0, "learning_rate": 3.399656993505952e-06, "loss": 1.1462, "step": 18851 }, { "epoch": 0.7376164019093826, "grad_norm": 0.0, "learning_rate": 3.398705057551881e-06, "loss": 1.0567, "step": 18852 }, { "epoch": 0.737655528601612, "grad_norm": 0.0, "learning_rate": 3.3977532276063373e-06, "loss": 0.9447, "step": 18853 }, { "epoch": 0.7376946552938415, "grad_norm": 0.0, "learning_rate": 3.3968015036846003e-06, "loss": 0.9048, "step": 18854 }, { "epoch": 0.7377337819860709, "grad_norm": 0.0, "learning_rate": 3.395849885801961e-06, "loss": 1.0624, "step": 18855 }, { "epoch": 0.7377729086783004, "grad_norm": 0.0, "learning_rate": 3.3948983739736896e-06, "loss": 1.0082, "step": 18856 }, { "epoch": 0.7378120353705298, "grad_norm": 0.0, "learning_rate": 3.3939469682150807e-06, "loss": 0.9921, "step": 18857 }, { "epoch": 0.7378511620627592, "grad_norm": 0.0, "learning_rate": 3.392995668541402e-06, "loss": 0.8195, "step": 18858 }, { "epoch": 0.7378902887549886, "grad_norm": 0.0, "learning_rate": 3.3920444749679372e-06, "loss": 0.8629, "step": 18859 }, { "epoch": 0.7379294154472181, "grad_norm": 0.0, "learning_rate": 3.3910933875099548e-06, "loss": 0.9846, "step": 18860 }, { "epoch": 0.7379685421394475, "grad_norm": 0.0, "learning_rate": 3.3901424061827315e-06, "loss": 1.0361, "step": 18861 }, { "epoch": 0.738007668831677, "grad_norm": 0.0, "learning_rate": 3.3891915310015378e-06, "loss": 1.0124, "step": 18862 }, { "epoch": 0.7380467955239064, "grad_norm": 0.0, "learning_rate": 3.388240761981648e-06, "loss": 1.0079, "step": 18863 }, { "epoch": 0.7380859222161359, "grad_norm": 0.0, "learning_rate": 3.387290099138324e-06, "loss": 0.9478, "step": 18864 }, { "epoch": 0.7381250489083653, "grad_norm": 0.0, "learning_rate": 3.386339542486834e-06, "loss": 1.0186, "step": 18865 }, { "epoch": 0.7381641756005948, "grad_norm": 0.0, "learning_rate": 3.385389092042447e-06, "loss": 0.9893, "step": 18866 }, { "epoch": 0.7382033022928242, "grad_norm": 0.0, "learning_rate": 3.384438747820419e-06, "loss": 0.9154, "step": 18867 }, { "epoch": 0.7382424289850537, "grad_norm": 0.0, "learning_rate": 3.3834885098360148e-06, "loss": 0.8889, "step": 18868 }, { "epoch": 0.738281555677283, "grad_norm": 0.0, "learning_rate": 3.382538378104495e-06, "loss": 0.9699, "step": 18869 }, { "epoch": 0.7383206823695124, "grad_norm": 0.0, "learning_rate": 3.3815883526411197e-06, "loss": 0.8961, "step": 18870 }, { "epoch": 0.7383598090617419, "grad_norm": 0.0, "learning_rate": 3.3806384334611386e-06, "loss": 0.8919, "step": 18871 }, { "epoch": 0.7383989357539713, "grad_norm": 0.0, "learning_rate": 3.379688620579813e-06, "loss": 0.9593, "step": 18872 }, { "epoch": 0.7384380624462008, "grad_norm": 0.0, "learning_rate": 3.378738914012386e-06, "loss": 0.9482, "step": 18873 }, { "epoch": 0.7384771891384302, "grad_norm": 0.0, "learning_rate": 3.3777893137741214e-06, "loss": 1.0522, "step": 18874 }, { "epoch": 0.7385163158306597, "grad_norm": 0.0, "learning_rate": 3.37683981988026e-06, "loss": 0.8546, "step": 18875 }, { "epoch": 0.7385554425228891, "grad_norm": 0.0, "learning_rate": 3.375890432346054e-06, "loss": 0.9054, "step": 18876 }, { "epoch": 0.7385945692151186, "grad_norm": 0.0, "learning_rate": 3.3749411511867436e-06, "loss": 1.1046, "step": 18877 }, { "epoch": 0.738633695907348, "grad_norm": 0.0, "learning_rate": 3.373991976417578e-06, "loss": 1.0106, "step": 18878 }, { "epoch": 0.7386728225995774, "grad_norm": 0.0, "learning_rate": 3.3730429080537975e-06, "loss": 0.9277, "step": 18879 }, { "epoch": 0.7387119492918068, "grad_norm": 0.0, "learning_rate": 3.372093946110647e-06, "loss": 0.8902, "step": 18880 }, { "epoch": 0.7387510759840363, "grad_norm": 0.0, "learning_rate": 3.3711450906033603e-06, "loss": 0.9475, "step": 18881 }, { "epoch": 0.7387902026762657, "grad_norm": 0.0, "learning_rate": 3.370196341547176e-06, "loss": 1.0147, "step": 18882 }, { "epoch": 0.7388293293684952, "grad_norm": 0.0, "learning_rate": 3.3692476989573318e-06, "loss": 1.0436, "step": 18883 }, { "epoch": 0.7388684560607246, "grad_norm": 0.0, "learning_rate": 3.3682991628490634e-06, "loss": 0.8632, "step": 18884 }, { "epoch": 0.7389075827529541, "grad_norm": 0.0, "learning_rate": 3.3673507332375966e-06, "loss": 1.0179, "step": 18885 }, { "epoch": 0.7389467094451835, "grad_norm": 0.0, "learning_rate": 3.36640241013817e-06, "loss": 0.9202, "step": 18886 }, { "epoch": 0.738985836137413, "grad_norm": 0.0, "learning_rate": 3.3654541935660014e-06, "loss": 1.0038, "step": 18887 }, { "epoch": 0.7390249628296424, "grad_norm": 0.0, "learning_rate": 3.364506083536332e-06, "loss": 1.0113, "step": 18888 }, { "epoch": 0.7390640895218719, "grad_norm": 0.0, "learning_rate": 3.3635580800643765e-06, "loss": 0.969, "step": 18889 }, { "epoch": 0.7391032162141012, "grad_norm": 0.0, "learning_rate": 3.362610183165366e-06, "loss": 1.0219, "step": 18890 }, { "epoch": 0.7391423429063307, "grad_norm": 0.0, "learning_rate": 3.3616623928545113e-06, "loss": 1.0519, "step": 18891 }, { "epoch": 0.7391814695985601, "grad_norm": 0.0, "learning_rate": 3.360714709147047e-06, "loss": 1.0187, "step": 18892 }, { "epoch": 0.7392205962907896, "grad_norm": 0.0, "learning_rate": 3.3597671320581825e-06, "loss": 0.9434, "step": 18893 }, { "epoch": 0.739259722983019, "grad_norm": 0.0, "learning_rate": 3.35881966160314e-06, "loss": 0.9146, "step": 18894 }, { "epoch": 0.7392988496752485, "grad_norm": 0.0, "learning_rate": 3.3578722977971277e-06, "loss": 0.9749, "step": 18895 }, { "epoch": 0.7393379763674779, "grad_norm": 0.0, "learning_rate": 3.3569250406553644e-06, "loss": 1.0222, "step": 18896 }, { "epoch": 0.7393771030597074, "grad_norm": 0.0, "learning_rate": 3.3559778901930606e-06, "loss": 0.9898, "step": 18897 }, { "epoch": 0.7394162297519368, "grad_norm": 0.0, "learning_rate": 3.35503084642543e-06, "loss": 0.9659, "step": 18898 }, { "epoch": 0.7394553564441662, "grad_norm": 0.0, "learning_rate": 3.3540839093676735e-06, "loss": 1.0325, "step": 18899 }, { "epoch": 0.7394944831363957, "grad_norm": 0.0, "learning_rate": 3.3531370790350016e-06, "loss": 0.972, "step": 18900 }, { "epoch": 0.739533609828625, "grad_norm": 0.0, "learning_rate": 3.3521903554426193e-06, "loss": 0.899, "step": 18901 }, { "epoch": 0.7395727365208545, "grad_norm": 0.0, "learning_rate": 3.351243738605734e-06, "loss": 0.9464, "step": 18902 }, { "epoch": 0.7396118632130839, "grad_norm": 0.0, "learning_rate": 3.3502972285395384e-06, "loss": 0.9033, "step": 18903 }, { "epoch": 0.7396509899053134, "grad_norm": 0.0, "learning_rate": 3.34935082525924e-06, "loss": 1.003, "step": 18904 }, { "epoch": 0.7396901165975428, "grad_norm": 0.0, "learning_rate": 3.3484045287800317e-06, "loss": 0.8514, "step": 18905 }, { "epoch": 0.7397292432897723, "grad_norm": 0.0, "learning_rate": 3.3474583391171102e-06, "loss": 0.8765, "step": 18906 }, { "epoch": 0.7397683699820017, "grad_norm": 0.0, "learning_rate": 3.346512256285672e-06, "loss": 0.9861, "step": 18907 }, { "epoch": 0.7398074966742312, "grad_norm": 0.0, "learning_rate": 3.345566280300914e-06, "loss": 1.0817, "step": 18908 }, { "epoch": 0.7398466233664606, "grad_norm": 0.0, "learning_rate": 3.344620411178019e-06, "loss": 0.8673, "step": 18909 }, { "epoch": 0.7398857500586901, "grad_norm": 0.0, "learning_rate": 3.3436746489321803e-06, "loss": 0.9675, "step": 18910 }, { "epoch": 0.7399248767509194, "grad_norm": 0.0, "learning_rate": 3.342728993578589e-06, "loss": 0.9616, "step": 18911 }, { "epoch": 0.7399640034431489, "grad_norm": 0.0, "learning_rate": 3.341783445132425e-06, "loss": 0.9259, "step": 18912 }, { "epoch": 0.7400031301353783, "grad_norm": 0.0, "learning_rate": 3.3408380036088762e-06, "loss": 1.0424, "step": 18913 }, { "epoch": 0.7400422568276078, "grad_norm": 0.0, "learning_rate": 3.339892669023125e-06, "loss": 0.9199, "step": 18914 }, { "epoch": 0.7400813835198372, "grad_norm": 0.0, "learning_rate": 3.3389474413903542e-06, "loss": 1.0975, "step": 18915 }, { "epoch": 0.7401205102120667, "grad_norm": 0.0, "learning_rate": 3.3380023207257374e-06, "loss": 0.9833, "step": 18916 }, { "epoch": 0.7401596369042961, "grad_norm": 0.0, "learning_rate": 3.337057307044459e-06, "loss": 1.0715, "step": 18917 }, { "epoch": 0.7401987635965256, "grad_norm": 0.0, "learning_rate": 3.336112400361685e-06, "loss": 0.9101, "step": 18918 }, { "epoch": 0.740237890288755, "grad_norm": 0.0, "learning_rate": 3.3351676006926015e-06, "loss": 0.9985, "step": 18919 }, { "epoch": 0.7402770169809845, "grad_norm": 0.0, "learning_rate": 3.3342229080523715e-06, "loss": 0.9647, "step": 18920 }, { "epoch": 0.7403161436732139, "grad_norm": 0.0, "learning_rate": 3.333278322456173e-06, "loss": 1.0005, "step": 18921 }, { "epoch": 0.7403552703654434, "grad_norm": 0.0, "learning_rate": 3.332333843919163e-06, "loss": 0.8254, "step": 18922 }, { "epoch": 0.7403943970576727, "grad_norm": 0.0, "learning_rate": 3.3313894724565244e-06, "loss": 1.0638, "step": 18923 }, { "epoch": 0.7404335237499022, "grad_norm": 0.0, "learning_rate": 3.3304452080834103e-06, "loss": 0.8342, "step": 18924 }, { "epoch": 0.7404726504421316, "grad_norm": 0.0, "learning_rate": 3.3295010508149916e-06, "loss": 0.9176, "step": 18925 }, { "epoch": 0.7405117771343611, "grad_norm": 0.0, "learning_rate": 3.3285570006664257e-06, "loss": 1.0267, "step": 18926 }, { "epoch": 0.7405509038265905, "grad_norm": 0.0, "learning_rate": 3.327613057652873e-06, "loss": 0.9637, "step": 18927 }, { "epoch": 0.7405900305188199, "grad_norm": 0.0, "learning_rate": 3.3266692217894947e-06, "loss": 0.9835, "step": 18928 }, { "epoch": 0.7406291572110494, "grad_norm": 0.0, "learning_rate": 3.3257254930914497e-06, "loss": 1.0547, "step": 18929 }, { "epoch": 0.7406682839032788, "grad_norm": 0.0, "learning_rate": 3.3247818715738867e-06, "loss": 0.9882, "step": 18930 }, { "epoch": 0.7407074105955083, "grad_norm": 0.0, "learning_rate": 3.3238383572519618e-06, "loss": 0.881, "step": 18931 }, { "epoch": 0.7407465372877376, "grad_norm": 0.0, "learning_rate": 3.322894950140827e-06, "loss": 1.1089, "step": 18932 }, { "epoch": 0.7407856639799671, "grad_norm": 0.0, "learning_rate": 3.321951650255637e-06, "loss": 1.0186, "step": 18933 }, { "epoch": 0.7408247906721965, "grad_norm": 0.0, "learning_rate": 3.321008457611531e-06, "loss": 0.8639, "step": 18934 }, { "epoch": 0.740863917364426, "grad_norm": 0.0, "learning_rate": 3.3200653722236632e-06, "loss": 1.0161, "step": 18935 }, { "epoch": 0.7409030440566554, "grad_norm": 0.0, "learning_rate": 3.3191223941071694e-06, "loss": 1.0874, "step": 18936 }, { "epoch": 0.7409421707488849, "grad_norm": 0.0, "learning_rate": 3.318179523277204e-06, "loss": 1.0339, "step": 18937 }, { "epoch": 0.7409812974411143, "grad_norm": 0.0, "learning_rate": 3.3172367597488998e-06, "loss": 1.0283, "step": 18938 }, { "epoch": 0.7410204241333438, "grad_norm": 0.0, "learning_rate": 3.316294103537402e-06, "loss": 1.0537, "step": 18939 }, { "epoch": 0.7410595508255732, "grad_norm": 0.0, "learning_rate": 3.3153515546578395e-06, "loss": 1.0783, "step": 18940 }, { "epoch": 0.7410986775178027, "grad_norm": 0.0, "learning_rate": 3.314409113125362e-06, "loss": 0.9134, "step": 18941 }, { "epoch": 0.7411378042100321, "grad_norm": 0.0, "learning_rate": 3.3134667789550924e-06, "loss": 0.9322, "step": 18942 }, { "epoch": 0.7411769309022616, "grad_norm": 0.0, "learning_rate": 3.312524552162172e-06, "loss": 0.9538, "step": 18943 }, { "epoch": 0.7412160575944909, "grad_norm": 0.0, "learning_rate": 3.311582432761723e-06, "loss": 1.0594, "step": 18944 }, { "epoch": 0.7412551842867204, "grad_norm": 0.0, "learning_rate": 3.310640420768879e-06, "loss": 1.0134, "step": 18945 }, { "epoch": 0.7412943109789498, "grad_norm": 0.0, "learning_rate": 3.309698516198768e-06, "loss": 1.0114, "step": 18946 }, { "epoch": 0.7413334376711793, "grad_norm": 0.0, "learning_rate": 3.3087567190665194e-06, "loss": 1.005, "step": 18947 }, { "epoch": 0.7413725643634087, "grad_norm": 0.0, "learning_rate": 3.307815029387249e-06, "loss": 0.9362, "step": 18948 }, { "epoch": 0.7414116910556382, "grad_norm": 0.0, "learning_rate": 3.3068734471760846e-06, "loss": 0.929, "step": 18949 }, { "epoch": 0.7414508177478676, "grad_norm": 0.0, "learning_rate": 3.3059319724481477e-06, "loss": 0.9905, "step": 18950 }, { "epoch": 0.7414899444400971, "grad_norm": 0.0, "learning_rate": 3.3049906052185534e-06, "loss": 0.9779, "step": 18951 }, { "epoch": 0.7415290711323265, "grad_norm": 0.0, "learning_rate": 3.304049345502419e-06, "loss": 0.9558, "step": 18952 }, { "epoch": 0.741568197824556, "grad_norm": 0.0, "learning_rate": 3.3031081933148656e-06, "loss": 0.8943, "step": 18953 }, { "epoch": 0.7416073245167853, "grad_norm": 0.0, "learning_rate": 3.3021671486709993e-06, "loss": 0.9553, "step": 18954 }, { "epoch": 0.7416464512090148, "grad_norm": 0.0, "learning_rate": 3.301226211585936e-06, "loss": 0.9238, "step": 18955 }, { "epoch": 0.7416855779012442, "grad_norm": 0.0, "learning_rate": 3.3002853820747893e-06, "loss": 0.9101, "step": 18956 }, { "epoch": 0.7417247045934736, "grad_norm": 0.0, "learning_rate": 3.2993446601526613e-06, "loss": 1.1271, "step": 18957 }, { "epoch": 0.7417638312857031, "grad_norm": 0.0, "learning_rate": 3.298404045834661e-06, "loss": 1.1483, "step": 18958 }, { "epoch": 0.7418029579779325, "grad_norm": 0.0, "learning_rate": 3.2974635391358943e-06, "loss": 0.9487, "step": 18959 }, { "epoch": 0.741842084670162, "grad_norm": 0.0, "learning_rate": 3.296523140071467e-06, "loss": 1.0549, "step": 18960 }, { "epoch": 0.7418812113623914, "grad_norm": 0.0, "learning_rate": 3.2955828486564754e-06, "loss": 1.007, "step": 18961 }, { "epoch": 0.7419203380546209, "grad_norm": 0.0, "learning_rate": 3.2946426649060258e-06, "loss": 0.9064, "step": 18962 }, { "epoch": 0.7419594647468503, "grad_norm": 0.0, "learning_rate": 3.293702588835206e-06, "loss": 0.8696, "step": 18963 }, { "epoch": 0.7419985914390798, "grad_norm": 0.0, "learning_rate": 3.2927626204591257e-06, "loss": 1.0065, "step": 18964 }, { "epoch": 0.7420377181313091, "grad_norm": 0.0, "learning_rate": 3.2918227597928686e-06, "loss": 0.9329, "step": 18965 }, { "epoch": 0.7420768448235386, "grad_norm": 0.0, "learning_rate": 3.2908830068515376e-06, "loss": 1.0673, "step": 18966 }, { "epoch": 0.742115971515768, "grad_norm": 0.0, "learning_rate": 3.289943361650211e-06, "loss": 0.9774, "step": 18967 }, { "epoch": 0.7421550982079975, "grad_norm": 0.0, "learning_rate": 3.289003824203991e-06, "loss": 1.0288, "step": 18968 }, { "epoch": 0.7421942249002269, "grad_norm": 0.0, "learning_rate": 3.2880643945279587e-06, "loss": 0.9373, "step": 18969 }, { "epoch": 0.7422333515924564, "grad_norm": 0.0, "learning_rate": 3.2871250726372052e-06, "loss": 0.9156, "step": 18970 }, { "epoch": 0.7422724782846858, "grad_norm": 0.0, "learning_rate": 3.2861858585468065e-06, "loss": 1.1036, "step": 18971 }, { "epoch": 0.7423116049769153, "grad_norm": 0.0, "learning_rate": 3.285246752271851e-06, "loss": 0.9856, "step": 18972 }, { "epoch": 0.7423507316691447, "grad_norm": 0.0, "learning_rate": 3.2843077538274183e-06, "loss": 1.0218, "step": 18973 }, { "epoch": 0.7423898583613742, "grad_norm": 0.0, "learning_rate": 3.283368863228591e-06, "loss": 0.97, "step": 18974 }, { "epoch": 0.7424289850536036, "grad_norm": 0.0, "learning_rate": 3.282430080490441e-06, "loss": 0.8696, "step": 18975 }, { "epoch": 0.742468111745833, "grad_norm": 0.0, "learning_rate": 3.2814914056280456e-06, "loss": 0.9296, "step": 18976 }, { "epoch": 0.7425072384380624, "grad_norm": 0.0, "learning_rate": 3.2805528386564787e-06, "loss": 0.9498, "step": 18977 }, { "epoch": 0.7425463651302919, "grad_norm": 0.0, "learning_rate": 3.2796143795908174e-06, "loss": 0.8711, "step": 18978 }, { "epoch": 0.7425854918225213, "grad_norm": 0.0, "learning_rate": 3.278676028446125e-06, "loss": 0.9356, "step": 18979 }, { "epoch": 0.7426246185147508, "grad_norm": 0.0, "learning_rate": 3.2777377852374734e-06, "loss": 1.1028, "step": 18980 }, { "epoch": 0.7426637452069802, "grad_norm": 0.0, "learning_rate": 3.2767996499799295e-06, "loss": 0.9283, "step": 18981 }, { "epoch": 0.7427028718992097, "grad_norm": 0.0, "learning_rate": 3.275861622688561e-06, "loss": 0.9345, "step": 18982 }, { "epoch": 0.7427419985914391, "grad_norm": 0.0, "learning_rate": 3.2749237033784267e-06, "loss": 0.9978, "step": 18983 }, { "epoch": 0.7427811252836685, "grad_norm": 0.0, "learning_rate": 3.273985892064593e-06, "loss": 0.9863, "step": 18984 }, { "epoch": 0.742820251975898, "grad_norm": 0.0, "learning_rate": 3.2730481887621125e-06, "loss": 0.9213, "step": 18985 }, { "epoch": 0.7428593786681273, "grad_norm": 0.0, "learning_rate": 3.2721105934860544e-06, "loss": 0.9531, "step": 18986 }, { "epoch": 0.7428985053603568, "grad_norm": 0.0, "learning_rate": 3.271173106251466e-06, "loss": 1.0407, "step": 18987 }, { "epoch": 0.7429376320525862, "grad_norm": 0.0, "learning_rate": 3.27023572707341e-06, "loss": 0.9649, "step": 18988 }, { "epoch": 0.7429767587448157, "grad_norm": 0.0, "learning_rate": 3.2692984559669315e-06, "loss": 0.8475, "step": 18989 }, { "epoch": 0.7430158854370451, "grad_norm": 0.0, "learning_rate": 3.2683612929470854e-06, "loss": 1.0342, "step": 18990 }, { "epoch": 0.7430550121292746, "grad_norm": 0.0, "learning_rate": 3.2674242380289222e-06, "loss": 0.9798, "step": 18991 }, { "epoch": 0.743094138821504, "grad_norm": 0.0, "learning_rate": 3.2664872912274924e-06, "loss": 0.9876, "step": 18992 }, { "epoch": 0.7431332655137335, "grad_norm": 0.0, "learning_rate": 3.2655504525578364e-06, "loss": 1.0153, "step": 18993 }, { "epoch": 0.7431723922059629, "grad_norm": 0.0, "learning_rate": 3.2646137220350016e-06, "loss": 0.9573, "step": 18994 }, { "epoch": 0.7432115188981924, "grad_norm": 0.0, "learning_rate": 3.263677099674033e-06, "loss": 0.9934, "step": 18995 }, { "epoch": 0.7432506455904218, "grad_norm": 0.0, "learning_rate": 3.2627405854899665e-06, "loss": 0.9579, "step": 18996 }, { "epoch": 0.7432897722826513, "grad_norm": 0.0, "learning_rate": 3.2618041794978426e-06, "loss": 1.0198, "step": 18997 }, { "epoch": 0.7433288989748806, "grad_norm": 0.0, "learning_rate": 3.2608678817127014e-06, "loss": 1.0514, "step": 18998 }, { "epoch": 0.7433680256671101, "grad_norm": 0.0, "learning_rate": 3.2599316921495806e-06, "loss": 0.8931, "step": 18999 }, { "epoch": 0.7434071523593395, "grad_norm": 0.0, "learning_rate": 3.258995610823508e-06, "loss": 1.0615, "step": 19000 }, { "epoch": 0.743446279051569, "grad_norm": 0.0, "learning_rate": 3.258059637749521e-06, "loss": 0.9759, "step": 19001 }, { "epoch": 0.7434854057437984, "grad_norm": 0.0, "learning_rate": 3.2571237729426464e-06, "loss": 1.0961, "step": 19002 }, { "epoch": 0.7435245324360279, "grad_norm": 0.0, "learning_rate": 3.2561880164179138e-06, "loss": 0.783, "step": 19003 }, { "epoch": 0.7435636591282573, "grad_norm": 0.0, "learning_rate": 3.2552523681903516e-06, "loss": 1.0278, "step": 19004 }, { "epoch": 0.7436027858204868, "grad_norm": 0.0, "learning_rate": 3.254316828274987e-06, "loss": 1.0528, "step": 19005 }, { "epoch": 0.7436419125127162, "grad_norm": 0.0, "learning_rate": 3.253381396686839e-06, "loss": 0.9618, "step": 19006 }, { "epoch": 0.7436810392049457, "grad_norm": 0.0, "learning_rate": 3.252446073440931e-06, "loss": 1.0196, "step": 19007 }, { "epoch": 0.743720165897175, "grad_norm": 0.0, "learning_rate": 3.251510858552285e-06, "loss": 0.9888, "step": 19008 }, { "epoch": 0.7437592925894045, "grad_norm": 0.0, "learning_rate": 3.2505757520359205e-06, "loss": 1.0426, "step": 19009 }, { "epoch": 0.7437984192816339, "grad_norm": 0.0, "learning_rate": 3.2496407539068497e-06, "loss": 0.9695, "step": 19010 }, { "epoch": 0.7438375459738634, "grad_norm": 0.0, "learning_rate": 3.2487058641800928e-06, "loss": 1.0029, "step": 19011 }, { "epoch": 0.7438766726660928, "grad_norm": 0.0, "learning_rate": 3.247771082870652e-06, "loss": 1.0152, "step": 19012 }, { "epoch": 0.7439157993583222, "grad_norm": 0.0, "learning_rate": 3.2468364099935546e-06, "loss": 0.8634, "step": 19013 }, { "epoch": 0.7439549260505517, "grad_norm": 0.0, "learning_rate": 3.245901845563798e-06, "loss": 0.9739, "step": 19014 }, { "epoch": 0.7439940527427811, "grad_norm": 0.0, "learning_rate": 3.2449673895963985e-06, "loss": 0.9877, "step": 19015 }, { "epoch": 0.7440331794350106, "grad_norm": 0.0, "learning_rate": 3.2440330421063513e-06, "loss": 0.8421, "step": 19016 }, { "epoch": 0.74407230612724, "grad_norm": 0.0, "learning_rate": 3.2430988031086742e-06, "loss": 1.0471, "step": 19017 }, { "epoch": 0.7441114328194695, "grad_norm": 0.0, "learning_rate": 3.242164672618361e-06, "loss": 1.0289, "step": 19018 }, { "epoch": 0.7441505595116988, "grad_norm": 0.0, "learning_rate": 3.2412306506504175e-06, "loss": 0.9554, "step": 19019 }, { "epoch": 0.7441896862039283, "grad_norm": 0.0, "learning_rate": 3.240296737219837e-06, "loss": 0.9474, "step": 19020 }, { "epoch": 0.7442288128961577, "grad_norm": 0.0, "learning_rate": 3.2393629323416207e-06, "loss": 1.0664, "step": 19021 }, { "epoch": 0.7442679395883872, "grad_norm": 0.0, "learning_rate": 3.2384292360307646e-06, "loss": 0.8626, "step": 19022 }, { "epoch": 0.7443070662806166, "grad_norm": 0.0, "learning_rate": 3.2374956483022656e-06, "loss": 1.1008, "step": 19023 }, { "epoch": 0.7443461929728461, "grad_norm": 0.0, "learning_rate": 3.236562169171109e-06, "loss": 0.8844, "step": 19024 }, { "epoch": 0.7443853196650755, "grad_norm": 0.0, "learning_rate": 3.2356287986522895e-06, "loss": 0.9296, "step": 19025 }, { "epoch": 0.744424446357305, "grad_norm": 0.0, "learning_rate": 3.2346955367607944e-06, "loss": 1.0822, "step": 19026 }, { "epoch": 0.7444635730495344, "grad_norm": 0.0, "learning_rate": 3.233762383511615e-06, "loss": 1.1017, "step": 19027 }, { "epoch": 0.7445026997417639, "grad_norm": 0.0, "learning_rate": 3.2328293389197297e-06, "loss": 1.0403, "step": 19028 }, { "epoch": 0.7445418264339932, "grad_norm": 0.0, "learning_rate": 3.2318964030001297e-06, "loss": 1.0927, "step": 19029 }, { "epoch": 0.7445809531262227, "grad_norm": 0.0, "learning_rate": 3.2309635757677847e-06, "loss": 0.8625, "step": 19030 }, { "epoch": 0.7446200798184521, "grad_norm": 0.0, "learning_rate": 3.2300308572376906e-06, "loss": 1.1059, "step": 19031 }, { "epoch": 0.7446592065106816, "grad_norm": 0.0, "learning_rate": 3.2290982474248135e-06, "loss": 0.9617, "step": 19032 }, { "epoch": 0.744698333202911, "grad_norm": 0.0, "learning_rate": 3.2281657463441375e-06, "loss": 0.9014, "step": 19033 }, { "epoch": 0.7447374598951405, "grad_norm": 0.0, "learning_rate": 3.2272333540106305e-06, "loss": 1.0237, "step": 19034 }, { "epoch": 0.7447765865873699, "grad_norm": 0.0, "learning_rate": 3.2263010704392694e-06, "loss": 1.0741, "step": 19035 }, { "epoch": 0.7448157132795994, "grad_norm": 0.0, "learning_rate": 3.2253688956450258e-06, "loss": 0.9355, "step": 19036 }, { "epoch": 0.7448548399718288, "grad_norm": 0.0, "learning_rate": 3.2244368296428706e-06, "loss": 1.0427, "step": 19037 }, { "epoch": 0.7448939666640583, "grad_norm": 0.0, "learning_rate": 3.223504872447768e-06, "loss": 0.9198, "step": 19038 }, { "epoch": 0.7449330933562877, "grad_norm": 0.0, "learning_rate": 3.2225730240746844e-06, "loss": 0.9232, "step": 19039 }, { "epoch": 0.7449722200485172, "grad_norm": 0.0, "learning_rate": 3.2216412845385893e-06, "loss": 1.072, "step": 19040 }, { "epoch": 0.7450113467407465, "grad_norm": 0.0, "learning_rate": 3.220709653854438e-06, "loss": 1.0016, "step": 19041 }, { "epoch": 0.7450504734329759, "grad_norm": 0.0, "learning_rate": 3.2197781320371944e-06, "loss": 0.8226, "step": 19042 }, { "epoch": 0.7450896001252054, "grad_norm": 0.0, "learning_rate": 3.218846719101818e-06, "loss": 0.9529, "step": 19043 }, { "epoch": 0.7451287268174348, "grad_norm": 0.0, "learning_rate": 3.21791541506327e-06, "loss": 0.9685, "step": 19044 }, { "epoch": 0.7451678535096643, "grad_norm": 0.0, "learning_rate": 3.2169842199364977e-06, "loss": 1.0076, "step": 19045 }, { "epoch": 0.7452069802018937, "grad_norm": 0.0, "learning_rate": 3.216053133736463e-06, "loss": 0.9897, "step": 19046 }, { "epoch": 0.7452461068941232, "grad_norm": 0.0, "learning_rate": 3.2151221564781076e-06, "loss": 0.9802, "step": 19047 }, { "epoch": 0.7452852335863526, "grad_norm": 0.0, "learning_rate": 3.214191288176396e-06, "loss": 0.9216, "step": 19048 }, { "epoch": 0.7453243602785821, "grad_norm": 0.0, "learning_rate": 3.213260528846265e-06, "loss": 1.0199, "step": 19049 }, { "epoch": 0.7453634869708115, "grad_norm": 0.0, "learning_rate": 3.212329878502669e-06, "loss": 1.0023, "step": 19050 }, { "epoch": 0.745402613663041, "grad_norm": 0.0, "learning_rate": 3.2113993371605457e-06, "loss": 0.959, "step": 19051 }, { "epoch": 0.7454417403552703, "grad_norm": 0.0, "learning_rate": 3.2104689048348436e-06, "loss": 0.9857, "step": 19052 }, { "epoch": 0.7454808670474998, "grad_norm": 0.0, "learning_rate": 3.2095385815405023e-06, "loss": 0.921, "step": 19053 }, { "epoch": 0.7455199937397292, "grad_norm": 0.0, "learning_rate": 3.208608367292466e-06, "loss": 1.0154, "step": 19054 }, { "epoch": 0.7455591204319587, "grad_norm": 0.0, "learning_rate": 3.207678262105667e-06, "loss": 0.9734, "step": 19055 }, { "epoch": 0.7455982471241881, "grad_norm": 0.0, "learning_rate": 3.206748265995042e-06, "loss": 1.2098, "step": 19056 }, { "epoch": 0.7456373738164176, "grad_norm": 0.0, "learning_rate": 3.2058183789755294e-06, "loss": 1.032, "step": 19057 }, { "epoch": 0.745676500508647, "grad_norm": 0.0, "learning_rate": 3.2048886010620617e-06, "loss": 1.0316, "step": 19058 }, { "epoch": 0.7457156272008765, "grad_norm": 0.0, "learning_rate": 3.2039589322695664e-06, "loss": 0.9305, "step": 19059 }, { "epoch": 0.7457547538931059, "grad_norm": 0.0, "learning_rate": 3.203029372612977e-06, "loss": 0.9903, "step": 19060 }, { "epoch": 0.7457938805853354, "grad_norm": 0.0, "learning_rate": 3.2020999221072125e-06, "loss": 1.0311, "step": 19061 }, { "epoch": 0.7458330072775647, "grad_norm": 0.0, "learning_rate": 3.201170580767211e-06, "loss": 0.9421, "step": 19062 }, { "epoch": 0.7458721339697942, "grad_norm": 0.0, "learning_rate": 3.200241348607889e-06, "loss": 1.0968, "step": 19063 }, { "epoch": 0.7459112606620236, "grad_norm": 0.0, "learning_rate": 3.1993122256441713e-06, "loss": 0.9389, "step": 19064 }, { "epoch": 0.7459503873542531, "grad_norm": 0.0, "learning_rate": 3.198383211890973e-06, "loss": 1.0271, "step": 19065 }, { "epoch": 0.7459895140464825, "grad_norm": 0.0, "learning_rate": 3.1974543073632224e-06, "loss": 0.9622, "step": 19066 }, { "epoch": 0.746028640738712, "grad_norm": 0.0, "learning_rate": 3.1965255120758285e-06, "loss": 0.9439, "step": 19067 }, { "epoch": 0.7460677674309414, "grad_norm": 0.0, "learning_rate": 3.195596826043714e-06, "loss": 1.015, "step": 19068 }, { "epoch": 0.7461068941231708, "grad_norm": 0.0, "learning_rate": 3.194668249281785e-06, "loss": 1.0767, "step": 19069 }, { "epoch": 0.7461460208154003, "grad_norm": 0.0, "learning_rate": 3.1937397818049555e-06, "loss": 0.9103, "step": 19070 }, { "epoch": 0.7461851475076297, "grad_norm": 0.0, "learning_rate": 3.192811423628136e-06, "loss": 0.9658, "step": 19071 }, { "epoch": 0.7462242741998591, "grad_norm": 0.0, "learning_rate": 3.191883174766239e-06, "loss": 0.9928, "step": 19072 }, { "epoch": 0.7462634008920885, "grad_norm": 0.0, "learning_rate": 3.190955035234163e-06, "loss": 0.9749, "step": 19073 }, { "epoch": 0.746302527584318, "grad_norm": 0.0, "learning_rate": 3.1900270050468184e-06, "loss": 0.8244, "step": 19074 }, { "epoch": 0.7463416542765474, "grad_norm": 0.0, "learning_rate": 3.189099084219106e-06, "loss": 0.9479, "step": 19075 }, { "epoch": 0.7463807809687769, "grad_norm": 0.0, "learning_rate": 3.188171272765931e-06, "loss": 1.0121, "step": 19076 }, { "epoch": 0.7464199076610063, "grad_norm": 0.0, "learning_rate": 3.1872435707021865e-06, "loss": 0.9846, "step": 19077 }, { "epoch": 0.7464590343532358, "grad_norm": 0.0, "learning_rate": 3.1863159780427765e-06, "loss": 0.933, "step": 19078 }, { "epoch": 0.7464981610454652, "grad_norm": 0.0, "learning_rate": 3.1853884948025904e-06, "loss": 0.9752, "step": 19079 }, { "epoch": 0.7465372877376947, "grad_norm": 0.0, "learning_rate": 3.1844611209965272e-06, "loss": 0.9578, "step": 19080 }, { "epoch": 0.7465764144299241, "grad_norm": 0.0, "learning_rate": 3.183533856639477e-06, "loss": 0.8609, "step": 19081 }, { "epoch": 0.7466155411221536, "grad_norm": 0.0, "learning_rate": 3.1826067017463346e-06, "loss": 0.7818, "step": 19082 }, { "epoch": 0.7466546678143829, "grad_norm": 0.0, "learning_rate": 3.181679656331983e-06, "loss": 1.0143, "step": 19083 }, { "epoch": 0.7466937945066124, "grad_norm": 0.0, "learning_rate": 3.180752720411312e-06, "loss": 1.0683, "step": 19084 }, { "epoch": 0.7467329211988418, "grad_norm": 0.0, "learning_rate": 3.179825893999211e-06, "loss": 0.9647, "step": 19085 }, { "epoch": 0.7467720478910713, "grad_norm": 0.0, "learning_rate": 3.178899177110556e-06, "loss": 0.8977, "step": 19086 }, { "epoch": 0.7468111745833007, "grad_norm": 0.0, "learning_rate": 3.177972569760234e-06, "loss": 1.1148, "step": 19087 }, { "epoch": 0.7468503012755302, "grad_norm": 0.0, "learning_rate": 3.1770460719631237e-06, "loss": 1.0381, "step": 19088 }, { "epoch": 0.7468894279677596, "grad_norm": 0.0, "learning_rate": 3.1761196837341056e-06, "loss": 1.1831, "step": 19089 }, { "epoch": 0.7469285546599891, "grad_norm": 0.0, "learning_rate": 3.1751934050880527e-06, "loss": 1.0189, "step": 19090 }, { "epoch": 0.7469676813522185, "grad_norm": 0.0, "learning_rate": 3.1742672360398453e-06, "loss": 0.9229, "step": 19091 }, { "epoch": 0.747006808044448, "grad_norm": 0.0, "learning_rate": 3.1733411766043466e-06, "loss": 0.9916, "step": 19092 }, { "epoch": 0.7470459347366774, "grad_norm": 0.0, "learning_rate": 3.1724152267964404e-06, "loss": 1.0059, "step": 19093 }, { "epoch": 0.7470850614289068, "grad_norm": 0.0, "learning_rate": 3.171489386630986e-06, "loss": 0.9398, "step": 19094 }, { "epoch": 0.7471241881211362, "grad_norm": 0.0, "learning_rate": 3.1705636561228605e-06, "loss": 0.958, "step": 19095 }, { "epoch": 0.7471633148133657, "grad_norm": 0.0, "learning_rate": 3.1696380352869173e-06, "loss": 0.9233, "step": 19096 }, { "epoch": 0.7472024415055951, "grad_norm": 0.0, "learning_rate": 3.1687125241380346e-06, "loss": 1.0165, "step": 19097 }, { "epoch": 0.7472415681978245, "grad_norm": 0.0, "learning_rate": 3.1677871226910663e-06, "loss": 0.9383, "step": 19098 }, { "epoch": 0.747280694890054, "grad_norm": 0.0, "learning_rate": 3.166861830960878e-06, "loss": 0.9818, "step": 19099 }, { "epoch": 0.7473198215822834, "grad_norm": 0.0, "learning_rate": 3.1659366489623235e-06, "loss": 0.9516, "step": 19100 }, { "epoch": 0.7473589482745129, "grad_norm": 0.0, "learning_rate": 3.165011576710262e-06, "loss": 0.9758, "step": 19101 }, { "epoch": 0.7473980749667423, "grad_norm": 0.0, "learning_rate": 3.1640866142195504e-06, "loss": 0.9615, "step": 19102 }, { "epoch": 0.7474372016589718, "grad_norm": 0.0, "learning_rate": 3.1631617615050457e-06, "loss": 1.0562, "step": 19103 }, { "epoch": 0.7474763283512011, "grad_norm": 0.0, "learning_rate": 3.162237018581592e-06, "loss": 0.9221, "step": 19104 }, { "epoch": 0.7475154550434306, "grad_norm": 0.0, "learning_rate": 3.161312385464045e-06, "loss": 0.8838, "step": 19105 }, { "epoch": 0.74755458173566, "grad_norm": 0.0, "learning_rate": 3.16038786216725e-06, "loss": 1.054, "step": 19106 }, { "epoch": 0.7475937084278895, "grad_norm": 0.0, "learning_rate": 3.1594634487060595e-06, "loss": 0.9604, "step": 19107 }, { "epoch": 0.7476328351201189, "grad_norm": 0.0, "learning_rate": 3.158539145095312e-06, "loss": 0.9795, "step": 19108 }, { "epoch": 0.7476719618123484, "grad_norm": 0.0, "learning_rate": 3.1576149513498544e-06, "loss": 1.0668, "step": 19109 }, { "epoch": 0.7477110885045778, "grad_norm": 0.0, "learning_rate": 3.156690867484521e-06, "loss": 0.9633, "step": 19110 }, { "epoch": 0.7477502151968073, "grad_norm": 0.0, "learning_rate": 3.155766893514164e-06, "loss": 0.8427, "step": 19111 }, { "epoch": 0.7477893418890367, "grad_norm": 0.0, "learning_rate": 3.1548430294536115e-06, "loss": 0.9483, "step": 19112 }, { "epoch": 0.7478284685812662, "grad_norm": 0.0, "learning_rate": 3.153919275317705e-06, "loss": 0.8711, "step": 19113 }, { "epoch": 0.7478675952734956, "grad_norm": 0.0, "learning_rate": 3.1529956311212693e-06, "loss": 0.934, "step": 19114 }, { "epoch": 0.747906721965725, "grad_norm": 0.0, "learning_rate": 3.1520720968791506e-06, "loss": 0.9302, "step": 19115 }, { "epoch": 0.7479458486579544, "grad_norm": 0.0, "learning_rate": 3.1511486726061712e-06, "loss": 1.0442, "step": 19116 }, { "epoch": 0.7479849753501839, "grad_norm": 0.0, "learning_rate": 3.1502253583171637e-06, "loss": 1.0195, "step": 19117 }, { "epoch": 0.7480241020424133, "grad_norm": 0.0, "learning_rate": 3.149302154026952e-06, "loss": 0.8342, "step": 19118 }, { "epoch": 0.7480632287346428, "grad_norm": 0.0, "learning_rate": 3.1483790597503617e-06, "loss": 0.8613, "step": 19119 }, { "epoch": 0.7481023554268722, "grad_norm": 0.0, "learning_rate": 3.147456075502219e-06, "loss": 0.9549, "step": 19120 }, { "epoch": 0.7481414821191017, "grad_norm": 0.0, "learning_rate": 3.1465332012973483e-06, "loss": 1.0103, "step": 19121 }, { "epoch": 0.7481806088113311, "grad_norm": 0.0, "learning_rate": 3.1456104371505636e-06, "loss": 0.8828, "step": 19122 }, { "epoch": 0.7482197355035606, "grad_norm": 0.0, "learning_rate": 3.1446877830766853e-06, "loss": 1.0948, "step": 19123 }, { "epoch": 0.74825886219579, "grad_norm": 0.0, "learning_rate": 3.1437652390905348e-06, "loss": 1.1025, "step": 19124 }, { "epoch": 0.7482979888880195, "grad_norm": 0.0, "learning_rate": 3.1428428052069195e-06, "loss": 0.9836, "step": 19125 }, { "epoch": 0.7483371155802488, "grad_norm": 0.0, "learning_rate": 3.1419204814406566e-06, "loss": 0.9781, "step": 19126 }, { "epoch": 0.7483762422724782, "grad_norm": 0.0, "learning_rate": 3.140998267806561e-06, "loss": 0.9705, "step": 19127 }, { "epoch": 0.7484153689647077, "grad_norm": 0.0, "learning_rate": 3.1400761643194345e-06, "loss": 0.9651, "step": 19128 }, { "epoch": 0.7484544956569371, "grad_norm": 0.0, "learning_rate": 3.139154170994089e-06, "loss": 0.8396, "step": 19129 }, { "epoch": 0.7484936223491666, "grad_norm": 0.0, "learning_rate": 3.1382322878453298e-06, "loss": 0.9593, "step": 19130 }, { "epoch": 0.748532749041396, "grad_norm": 0.0, "learning_rate": 3.1373105148879656e-06, "loss": 1.0623, "step": 19131 }, { "epoch": 0.7485718757336255, "grad_norm": 0.0, "learning_rate": 3.1363888521367924e-06, "loss": 0.9819, "step": 19132 }, { "epoch": 0.7486110024258549, "grad_norm": 0.0, "learning_rate": 3.1354672996066128e-06, "loss": 0.8193, "step": 19133 }, { "epoch": 0.7486501291180844, "grad_norm": 0.0, "learning_rate": 3.13454585731223e-06, "loss": 1.0282, "step": 19134 }, { "epoch": 0.7486892558103138, "grad_norm": 0.0, "learning_rate": 3.1336245252684348e-06, "loss": 0.8399, "step": 19135 }, { "epoch": 0.7487283825025433, "grad_norm": 0.0, "learning_rate": 3.1327033034900254e-06, "loss": 0.9715, "step": 19136 }, { "epoch": 0.7487675091947726, "grad_norm": 0.0, "learning_rate": 3.1317821919917957e-06, "loss": 1.0543, "step": 19137 }, { "epoch": 0.7488066358870021, "grad_norm": 0.0, "learning_rate": 3.130861190788541e-06, "loss": 0.8804, "step": 19138 }, { "epoch": 0.7488457625792315, "grad_norm": 0.0, "learning_rate": 3.129940299895046e-06, "loss": 0.8422, "step": 19139 }, { "epoch": 0.748884889271461, "grad_norm": 0.0, "learning_rate": 3.129019519326102e-06, "loss": 0.9573, "step": 19140 }, { "epoch": 0.7489240159636904, "grad_norm": 0.0, "learning_rate": 3.1280988490964903e-06, "loss": 1.0297, "step": 19141 }, { "epoch": 0.7489631426559199, "grad_norm": 0.0, "learning_rate": 3.1271782892210055e-06, "loss": 0.9605, "step": 19142 }, { "epoch": 0.7490022693481493, "grad_norm": 0.0, "learning_rate": 3.1262578397144216e-06, "loss": 0.8989, "step": 19143 }, { "epoch": 0.7490413960403788, "grad_norm": 0.0, "learning_rate": 3.1253375005915276e-06, "loss": 0.9411, "step": 19144 }, { "epoch": 0.7490805227326082, "grad_norm": 0.0, "learning_rate": 3.124417271867093e-06, "loss": 0.9799, "step": 19145 }, { "epoch": 0.7491196494248377, "grad_norm": 0.0, "learning_rate": 3.123497153555907e-06, "loss": 0.8805, "step": 19146 }, { "epoch": 0.749158776117067, "grad_norm": 0.0, "learning_rate": 3.1225771456727373e-06, "loss": 0.9521, "step": 19147 }, { "epoch": 0.7491979028092965, "grad_norm": 0.0, "learning_rate": 3.1216572482323628e-06, "loss": 0.9382, "step": 19148 }, { "epoch": 0.7492370295015259, "grad_norm": 0.0, "learning_rate": 3.120737461249551e-06, "loss": 1.0078, "step": 19149 }, { "epoch": 0.7492761561937554, "grad_norm": 0.0, "learning_rate": 3.1198177847390764e-06, "loss": 0.8674, "step": 19150 }, { "epoch": 0.7493152828859848, "grad_norm": 0.0, "learning_rate": 3.1188982187157056e-06, "loss": 0.9789, "step": 19151 }, { "epoch": 0.7493544095782143, "grad_norm": 0.0, "learning_rate": 3.1179787631942117e-06, "loss": 0.9693, "step": 19152 }, { "epoch": 0.7493935362704437, "grad_norm": 0.0, "learning_rate": 3.117059418189351e-06, "loss": 1.0367, "step": 19153 }, { "epoch": 0.7494326629626732, "grad_norm": 0.0, "learning_rate": 3.116140183715891e-06, "loss": 1.1336, "step": 19154 }, { "epoch": 0.7494717896549026, "grad_norm": 0.0, "learning_rate": 3.1152210597885933e-06, "loss": 1.1317, "step": 19155 }, { "epoch": 0.749510916347132, "grad_norm": 0.0, "learning_rate": 3.114302046422223e-06, "loss": 0.9568, "step": 19156 }, { "epoch": 0.7495500430393615, "grad_norm": 0.0, "learning_rate": 3.1133831436315288e-06, "loss": 0.9494, "step": 19157 }, { "epoch": 0.7495891697315908, "grad_norm": 0.0, "learning_rate": 3.112464351431276e-06, "loss": 1.0977, "step": 19158 }, { "epoch": 0.7496282964238203, "grad_norm": 0.0, "learning_rate": 3.111545669836209e-06, "loss": 0.8621, "step": 19159 }, { "epoch": 0.7496674231160497, "grad_norm": 0.0, "learning_rate": 3.110627098861092e-06, "loss": 0.8895, "step": 19160 }, { "epoch": 0.7497065498082792, "grad_norm": 0.0, "learning_rate": 3.109708638520669e-06, "loss": 0.8825, "step": 19161 }, { "epoch": 0.7497456765005086, "grad_norm": 0.0, "learning_rate": 3.108790288829694e-06, "loss": 0.9941, "step": 19162 }, { "epoch": 0.7497848031927381, "grad_norm": 0.0, "learning_rate": 3.107872049802908e-06, "loss": 0.9959, "step": 19163 }, { "epoch": 0.7498239298849675, "grad_norm": 0.0, "learning_rate": 3.1069539214550614e-06, "loss": 1.0531, "step": 19164 }, { "epoch": 0.749863056577197, "grad_norm": 0.0, "learning_rate": 3.106035903800897e-06, "loss": 0.8471, "step": 19165 }, { "epoch": 0.7499021832694264, "grad_norm": 0.0, "learning_rate": 3.1051179968551604e-06, "loss": 0.997, "step": 19166 }, { "epoch": 0.7499413099616559, "grad_norm": 0.0, "learning_rate": 3.104200200632587e-06, "loss": 1.0435, "step": 19167 }, { "epoch": 0.7499804366538853, "grad_norm": 0.0, "learning_rate": 3.1032825151479163e-06, "loss": 0.926, "step": 19168 }, { "epoch": 0.7500195633461147, "grad_norm": 0.0, "learning_rate": 3.102364940415886e-06, "loss": 1.0262, "step": 19169 }, { "epoch": 0.7500586900383441, "grad_norm": 0.0, "learning_rate": 3.1014474764512347e-06, "loss": 0.95, "step": 19170 }, { "epoch": 0.7500978167305736, "grad_norm": 0.0, "learning_rate": 3.10053012326869e-06, "loss": 0.9264, "step": 19171 }, { "epoch": 0.750136943422803, "grad_norm": 0.0, "learning_rate": 3.099612880882986e-06, "loss": 0.8175, "step": 19172 }, { "epoch": 0.7501760701150325, "grad_norm": 0.0, "learning_rate": 3.0986957493088555e-06, "loss": 0.9965, "step": 19173 }, { "epoch": 0.7502151968072619, "grad_norm": 0.0, "learning_rate": 3.0977787285610206e-06, "loss": 1.0378, "step": 19174 }, { "epoch": 0.7502543234994914, "grad_norm": 0.0, "learning_rate": 3.09686181865421e-06, "loss": 0.9682, "step": 19175 }, { "epoch": 0.7502934501917208, "grad_norm": 0.0, "learning_rate": 3.0959450196031516e-06, "loss": 1.0002, "step": 19176 }, { "epoch": 0.7503325768839503, "grad_norm": 0.0, "learning_rate": 3.0950283314225627e-06, "loss": 1.018, "step": 19177 }, { "epoch": 0.7503717035761797, "grad_norm": 0.0, "learning_rate": 3.094111754127165e-06, "loss": 0.8954, "step": 19178 }, { "epoch": 0.7504108302684092, "grad_norm": 0.0, "learning_rate": 3.093195287731683e-06, "loss": 0.8867, "step": 19179 }, { "epoch": 0.7504499569606385, "grad_norm": 0.0, "learning_rate": 3.092278932250826e-06, "loss": 1.032, "step": 19180 }, { "epoch": 0.750489083652868, "grad_norm": 0.0, "learning_rate": 3.0913626876993142e-06, "loss": 0.8618, "step": 19181 }, { "epoch": 0.7505282103450974, "grad_norm": 0.0, "learning_rate": 3.0904465540918605e-06, "loss": 0.9134, "step": 19182 }, { "epoch": 0.7505673370373268, "grad_norm": 0.0, "learning_rate": 3.08953053144318e-06, "loss": 0.9814, "step": 19183 }, { "epoch": 0.7506064637295563, "grad_norm": 0.0, "learning_rate": 3.0886146197679766e-06, "loss": 1.0089, "step": 19184 }, { "epoch": 0.7506455904217857, "grad_norm": 0.0, "learning_rate": 3.0876988190809655e-06, "loss": 1.0519, "step": 19185 }, { "epoch": 0.7506847171140152, "grad_norm": 0.0, "learning_rate": 3.086783129396843e-06, "loss": 0.9715, "step": 19186 }, { "epoch": 0.7507238438062446, "grad_norm": 0.0, "learning_rate": 3.0858675507303273e-06, "loss": 0.9246, "step": 19187 }, { "epoch": 0.7507629704984741, "grad_norm": 0.0, "learning_rate": 3.084952083096111e-06, "loss": 0.9426, "step": 19188 }, { "epoch": 0.7508020971907035, "grad_norm": 0.0, "learning_rate": 3.0840367265089034e-06, "loss": 0.8679, "step": 19189 }, { "epoch": 0.750841223882933, "grad_norm": 0.0, "learning_rate": 3.083121480983393e-06, "loss": 0.9717, "step": 19190 }, { "epoch": 0.7508803505751623, "grad_norm": 0.0, "learning_rate": 3.0822063465342913e-06, "loss": 0.9784, "step": 19191 }, { "epoch": 0.7509194772673918, "grad_norm": 0.0, "learning_rate": 3.0812913231762832e-06, "loss": 0.882, "step": 19192 }, { "epoch": 0.7509586039596212, "grad_norm": 0.0, "learning_rate": 3.0803764109240697e-06, "loss": 0.9489, "step": 19193 }, { "epoch": 0.7509977306518507, "grad_norm": 0.0, "learning_rate": 3.0794616097923378e-06, "loss": 1.0029, "step": 19194 }, { "epoch": 0.7510368573440801, "grad_norm": 0.0, "learning_rate": 3.0785469197957806e-06, "loss": 0.966, "step": 19195 }, { "epoch": 0.7510759840363096, "grad_norm": 0.0, "learning_rate": 3.077632340949086e-06, "loss": 0.9782, "step": 19196 }, { "epoch": 0.751115110728539, "grad_norm": 0.0, "learning_rate": 3.0767178732669454e-06, "loss": 0.9728, "step": 19197 }, { "epoch": 0.7511542374207685, "grad_norm": 0.0, "learning_rate": 3.0758035167640376e-06, "loss": 0.9111, "step": 19198 }, { "epoch": 0.7511933641129979, "grad_norm": 0.0, "learning_rate": 3.0748892714550483e-06, "loss": 0.8748, "step": 19199 }, { "epoch": 0.7512324908052274, "grad_norm": 0.0, "learning_rate": 3.07397513735466e-06, "loss": 1.0236, "step": 19200 }, { "epoch": 0.7512716174974567, "grad_norm": 0.0, "learning_rate": 3.0730611144775547e-06, "loss": 0.946, "step": 19201 }, { "epoch": 0.7513107441896862, "grad_norm": 0.0, "learning_rate": 3.072147202838406e-06, "loss": 0.9306, "step": 19202 }, { "epoch": 0.7513498708819156, "grad_norm": 0.0, "learning_rate": 3.071233402451891e-06, "loss": 1.0031, "step": 19203 }, { "epoch": 0.7513889975741451, "grad_norm": 0.0, "learning_rate": 3.0703197133326856e-06, "loss": 1.0521, "step": 19204 }, { "epoch": 0.7514281242663745, "grad_norm": 0.0, "learning_rate": 3.069406135495466e-06, "loss": 0.9047, "step": 19205 }, { "epoch": 0.751467250958604, "grad_norm": 0.0, "learning_rate": 3.0684926689548954e-06, "loss": 1.0215, "step": 19206 }, { "epoch": 0.7515063776508334, "grad_norm": 0.0, "learning_rate": 3.0675793137256505e-06, "loss": 1.0833, "step": 19207 }, { "epoch": 0.7515455043430629, "grad_norm": 0.0, "learning_rate": 3.0666660698223884e-06, "loss": 0.8901, "step": 19208 }, { "epoch": 0.7515846310352923, "grad_norm": 0.0, "learning_rate": 3.065752937259788e-06, "loss": 0.7934, "step": 19209 }, { "epoch": 0.7516237577275218, "grad_norm": 0.0, "learning_rate": 3.064839916052503e-06, "loss": 1.0383, "step": 19210 }, { "epoch": 0.7516628844197512, "grad_norm": 0.0, "learning_rate": 3.0639270062152014e-06, "loss": 0.9895, "step": 19211 }, { "epoch": 0.7517020111119805, "grad_norm": 0.0, "learning_rate": 3.063014207762538e-06, "loss": 0.9227, "step": 19212 }, { "epoch": 0.75174113780421, "grad_norm": 0.0, "learning_rate": 3.0621015207091744e-06, "loss": 1.1237, "step": 19213 }, { "epoch": 0.7517802644964394, "grad_norm": 0.0, "learning_rate": 3.0611889450697663e-06, "loss": 0.9544, "step": 19214 }, { "epoch": 0.7518193911886689, "grad_norm": 0.0, "learning_rate": 3.0602764808589714e-06, "loss": 0.9868, "step": 19215 }, { "epoch": 0.7518585178808983, "grad_norm": 0.0, "learning_rate": 3.059364128091438e-06, "loss": 1.0438, "step": 19216 }, { "epoch": 0.7518976445731278, "grad_norm": 0.0, "learning_rate": 3.0584518867818192e-06, "loss": 0.9286, "step": 19217 }, { "epoch": 0.7519367712653572, "grad_norm": 0.0, "learning_rate": 3.057539756944767e-06, "loss": 0.866, "step": 19218 }, { "epoch": 0.7519758979575867, "grad_norm": 0.0, "learning_rate": 3.056627738594926e-06, "loss": 1.022, "step": 19219 }, { "epoch": 0.7520150246498161, "grad_norm": 0.0, "learning_rate": 3.0557158317469414e-06, "loss": 0.8974, "step": 19220 }, { "epoch": 0.7520541513420456, "grad_norm": 0.0, "learning_rate": 3.0548040364154597e-06, "loss": 0.8705, "step": 19221 }, { "epoch": 0.752093278034275, "grad_norm": 0.0, "learning_rate": 3.053892352615124e-06, "loss": 1.0671, "step": 19222 }, { "epoch": 0.7521324047265044, "grad_norm": 0.0, "learning_rate": 3.0529807803605717e-06, "loss": 0.9503, "step": 19223 }, { "epoch": 0.7521715314187338, "grad_norm": 0.0, "learning_rate": 3.0520693196664453e-06, "loss": 0.9985, "step": 19224 }, { "epoch": 0.7522106581109633, "grad_norm": 0.0, "learning_rate": 3.051157970547376e-06, "loss": 0.9208, "step": 19225 }, { "epoch": 0.7522497848031927, "grad_norm": 0.0, "learning_rate": 3.050246733018003e-06, "loss": 0.9568, "step": 19226 }, { "epoch": 0.7522889114954222, "grad_norm": 0.0, "learning_rate": 3.049335607092959e-06, "loss": 1.0029, "step": 19227 }, { "epoch": 0.7523280381876516, "grad_norm": 0.0, "learning_rate": 3.048424592786878e-06, "loss": 1.0163, "step": 19228 }, { "epoch": 0.7523671648798811, "grad_norm": 0.0, "learning_rate": 3.047513690114384e-06, "loss": 0.9321, "step": 19229 }, { "epoch": 0.7524062915721105, "grad_norm": 0.0, "learning_rate": 3.0466028990901084e-06, "loss": 1.0782, "step": 19230 }, { "epoch": 0.75244541826434, "grad_norm": 0.0, "learning_rate": 3.0456922197286776e-06, "loss": 0.9141, "step": 19231 }, { "epoch": 0.7524845449565694, "grad_norm": 0.0, "learning_rate": 3.0447816520447182e-06, "loss": 1.045, "step": 19232 }, { "epoch": 0.7525236716487989, "grad_norm": 0.0, "learning_rate": 3.0438711960528476e-06, "loss": 0.9791, "step": 19233 }, { "epoch": 0.7525627983410282, "grad_norm": 0.0, "learning_rate": 3.0429608517676913e-06, "loss": 1.0258, "step": 19234 }, { "epoch": 0.7526019250332577, "grad_norm": 0.0, "learning_rate": 3.0420506192038603e-06, "loss": 0.8466, "step": 19235 }, { "epoch": 0.7526410517254871, "grad_norm": 0.0, "learning_rate": 3.041140498375984e-06, "loss": 0.8956, "step": 19236 }, { "epoch": 0.7526801784177166, "grad_norm": 0.0, "learning_rate": 3.0402304892986677e-06, "loss": 1.0878, "step": 19237 }, { "epoch": 0.752719305109946, "grad_norm": 0.0, "learning_rate": 3.039320591986532e-06, "loss": 1.0345, "step": 19238 }, { "epoch": 0.7527584318021755, "grad_norm": 0.0, "learning_rate": 3.0384108064541795e-06, "loss": 0.9561, "step": 19239 }, { "epoch": 0.7527975584944049, "grad_norm": 0.0, "learning_rate": 3.037501132716232e-06, "loss": 0.8005, "step": 19240 }, { "epoch": 0.7528366851866343, "grad_norm": 0.0, "learning_rate": 3.0365915707872883e-06, "loss": 0.9897, "step": 19241 }, { "epoch": 0.7528758118788638, "grad_norm": 0.0, "learning_rate": 3.035682120681962e-06, "loss": 1.0961, "step": 19242 }, { "epoch": 0.7529149385710932, "grad_norm": 0.0, "learning_rate": 3.0347727824148508e-06, "loss": 0.9488, "step": 19243 }, { "epoch": 0.7529540652633226, "grad_norm": 0.0, "learning_rate": 3.0338635560005614e-06, "loss": 0.9302, "step": 19244 }, { "epoch": 0.752993191955552, "grad_norm": 0.0, "learning_rate": 3.0329544414536927e-06, "loss": 1.1264, "step": 19245 }, { "epoch": 0.7530323186477815, "grad_norm": 0.0, "learning_rate": 3.0320454387888496e-06, "loss": 0.933, "step": 19246 }, { "epoch": 0.7530714453400109, "grad_norm": 0.0, "learning_rate": 3.0311365480206224e-06, "loss": 0.9673, "step": 19247 }, { "epoch": 0.7531105720322404, "grad_norm": 0.0, "learning_rate": 3.0302277691636096e-06, "loss": 0.9583, "step": 19248 }, { "epoch": 0.7531496987244698, "grad_norm": 0.0, "learning_rate": 3.0293191022324055e-06, "loss": 0.9955, "step": 19249 }, { "epoch": 0.7531888254166993, "grad_norm": 0.0, "learning_rate": 3.0284105472416046e-06, "loss": 0.9915, "step": 19250 }, { "epoch": 0.7532279521089287, "grad_norm": 0.0, "learning_rate": 3.0275021042057907e-06, "loss": 1.0174, "step": 19251 }, { "epoch": 0.7532670788011582, "grad_norm": 0.0, "learning_rate": 3.0265937731395602e-06, "loss": 1.0452, "step": 19252 }, { "epoch": 0.7533062054933876, "grad_norm": 0.0, "learning_rate": 3.0256855540574894e-06, "loss": 1.0318, "step": 19253 }, { "epoch": 0.7533453321856171, "grad_norm": 0.0, "learning_rate": 3.0247774469741742e-06, "loss": 1.0574, "step": 19254 }, { "epoch": 0.7533844588778464, "grad_norm": 0.0, "learning_rate": 3.023869451904191e-06, "loss": 1.0516, "step": 19255 }, { "epoch": 0.7534235855700759, "grad_norm": 0.0, "learning_rate": 3.022961568862125e-06, "loss": 0.9652, "step": 19256 }, { "epoch": 0.7534627122623053, "grad_norm": 0.0, "learning_rate": 3.022053797862551e-06, "loss": 0.9313, "step": 19257 }, { "epoch": 0.7535018389545348, "grad_norm": 0.0, "learning_rate": 3.0211461389200493e-06, "loss": 1.0154, "step": 19258 }, { "epoch": 0.7535409656467642, "grad_norm": 0.0, "learning_rate": 3.020238592049195e-06, "loss": 0.8466, "step": 19259 }, { "epoch": 0.7535800923389937, "grad_norm": 0.0, "learning_rate": 3.0193311572645655e-06, "loss": 0.9539, "step": 19260 }, { "epoch": 0.7536192190312231, "grad_norm": 0.0, "learning_rate": 3.0184238345807284e-06, "loss": 1.037, "step": 19261 }, { "epoch": 0.7536583457234526, "grad_norm": 0.0, "learning_rate": 3.0175166240122554e-06, "loss": 1.0652, "step": 19262 }, { "epoch": 0.753697472415682, "grad_norm": 0.0, "learning_rate": 3.0166095255737193e-06, "loss": 0.9344, "step": 19263 }, { "epoch": 0.7537365991079115, "grad_norm": 0.0, "learning_rate": 3.0157025392796803e-06, "loss": 0.9596, "step": 19264 }, { "epoch": 0.7537757258001409, "grad_norm": 0.0, "learning_rate": 3.0147956651447064e-06, "loss": 1.0678, "step": 19265 }, { "epoch": 0.7538148524923703, "grad_norm": 0.0, "learning_rate": 3.0138889031833616e-06, "loss": 0.9864, "step": 19266 }, { "epoch": 0.7538539791845997, "grad_norm": 0.0, "learning_rate": 3.0129822534102093e-06, "loss": 0.9162, "step": 19267 }, { "epoch": 0.7538931058768292, "grad_norm": 0.0, "learning_rate": 3.0120757158398052e-06, "loss": 0.9147, "step": 19268 }, { "epoch": 0.7539322325690586, "grad_norm": 0.0, "learning_rate": 3.011169290486711e-06, "loss": 1.0322, "step": 19269 }, { "epoch": 0.753971359261288, "grad_norm": 0.0, "learning_rate": 3.0102629773654736e-06, "loss": 1.0757, "step": 19270 }, { "epoch": 0.7540104859535175, "grad_norm": 0.0, "learning_rate": 3.0093567764906606e-06, "loss": 0.9182, "step": 19271 }, { "epoch": 0.7540496126457469, "grad_norm": 0.0, "learning_rate": 3.008450687876815e-06, "loss": 0.9888, "step": 19272 }, { "epoch": 0.7540887393379764, "grad_norm": 0.0, "learning_rate": 3.007544711538495e-06, "loss": 0.9976, "step": 19273 }, { "epoch": 0.7541278660302058, "grad_norm": 0.0, "learning_rate": 3.0066388474902395e-06, "loss": 0.9684, "step": 19274 }, { "epoch": 0.7541669927224353, "grad_norm": 0.0, "learning_rate": 3.0057330957466025e-06, "loss": 0.9685, "step": 19275 }, { "epoch": 0.7542061194146646, "grad_norm": 0.0, "learning_rate": 3.0048274563221267e-06, "loss": 1.0632, "step": 19276 }, { "epoch": 0.7542452461068941, "grad_norm": 0.0, "learning_rate": 3.0039219292313603e-06, "loss": 1.1827, "step": 19277 }, { "epoch": 0.7542843727991235, "grad_norm": 0.0, "learning_rate": 3.003016514488838e-06, "loss": 0.9562, "step": 19278 }, { "epoch": 0.754323499491353, "grad_norm": 0.0, "learning_rate": 3.002111212109102e-06, "loss": 0.9767, "step": 19279 }, { "epoch": 0.7543626261835824, "grad_norm": 0.0, "learning_rate": 3.001206022106693e-06, "loss": 0.8708, "step": 19280 }, { "epoch": 0.7544017528758119, "grad_norm": 0.0, "learning_rate": 3.000300944496146e-06, "loss": 0.9407, "step": 19281 }, { "epoch": 0.7544408795680413, "grad_norm": 0.0, "learning_rate": 2.9993959792919934e-06, "loss": 1.1397, "step": 19282 }, { "epoch": 0.7544800062602708, "grad_norm": 0.0, "learning_rate": 2.998491126508771e-06, "loss": 0.9809, "step": 19283 }, { "epoch": 0.7545191329525002, "grad_norm": 0.0, "learning_rate": 2.997586386161002e-06, "loss": 0.9257, "step": 19284 }, { "epoch": 0.7545582596447297, "grad_norm": 0.0, "learning_rate": 2.996681758263228e-06, "loss": 0.9943, "step": 19285 }, { "epoch": 0.754597386336959, "grad_norm": 0.0, "learning_rate": 2.9957772428299657e-06, "loss": 0.9941, "step": 19286 }, { "epoch": 0.7546365130291885, "grad_norm": 0.0, "learning_rate": 2.9948728398757475e-06, "loss": 1.0374, "step": 19287 }, { "epoch": 0.7546756397214179, "grad_norm": 0.0, "learning_rate": 2.993968549415087e-06, "loss": 1.1213, "step": 19288 }, { "epoch": 0.7547147664136474, "grad_norm": 0.0, "learning_rate": 2.9930643714625183e-06, "loss": 1.0229, "step": 19289 }, { "epoch": 0.7547538931058768, "grad_norm": 0.0, "learning_rate": 2.9921603060325533e-06, "loss": 0.8525, "step": 19290 }, { "epoch": 0.7547930197981063, "grad_norm": 0.0, "learning_rate": 2.9912563531397156e-06, "loss": 0.9898, "step": 19291 }, { "epoch": 0.7548321464903357, "grad_norm": 0.0, "learning_rate": 2.9903525127985144e-06, "loss": 0.9361, "step": 19292 }, { "epoch": 0.7548712731825652, "grad_norm": 0.0, "learning_rate": 2.9894487850234687e-06, "loss": 0.9385, "step": 19293 }, { "epoch": 0.7549103998747946, "grad_norm": 0.0, "learning_rate": 2.988545169829091e-06, "loss": 1.0152, "step": 19294 }, { "epoch": 0.7549495265670241, "grad_norm": 0.0, "learning_rate": 2.9876416672298945e-06, "loss": 0.9604, "step": 19295 }, { "epoch": 0.7549886532592535, "grad_norm": 0.0, "learning_rate": 2.986738277240384e-06, "loss": 1.0637, "step": 19296 }, { "epoch": 0.7550277799514828, "grad_norm": 0.0, "learning_rate": 2.985834999875068e-06, "loss": 0.9299, "step": 19297 }, { "epoch": 0.7550669066437123, "grad_norm": 0.0, "learning_rate": 2.9849318351484522e-06, "loss": 0.9101, "step": 19298 }, { "epoch": 0.7551060333359417, "grad_norm": 0.0, "learning_rate": 2.9840287830750446e-06, "loss": 1.0255, "step": 19299 }, { "epoch": 0.7551451600281712, "grad_norm": 0.0, "learning_rate": 2.98312584366934e-06, "loss": 1.0064, "step": 19300 }, { "epoch": 0.7551842867204006, "grad_norm": 0.0, "learning_rate": 2.9822230169458445e-06, "loss": 1.0172, "step": 19301 }, { "epoch": 0.7552234134126301, "grad_norm": 0.0, "learning_rate": 2.9813203029190505e-06, "loss": 0.9058, "step": 19302 }, { "epoch": 0.7552625401048595, "grad_norm": 0.0, "learning_rate": 2.9804177016034576e-06, "loss": 1.0889, "step": 19303 }, { "epoch": 0.755301666797089, "grad_norm": 0.0, "learning_rate": 2.9795152130135606e-06, "loss": 1.0385, "step": 19304 }, { "epoch": 0.7553407934893184, "grad_norm": 0.0, "learning_rate": 2.9786128371638543e-06, "loss": 0.8628, "step": 19305 }, { "epoch": 0.7553799201815479, "grad_norm": 0.0, "learning_rate": 2.977710574068826e-06, "loss": 0.9891, "step": 19306 }, { "epoch": 0.7554190468737773, "grad_norm": 0.0, "learning_rate": 2.976808423742965e-06, "loss": 1.1055, "step": 19307 }, { "epoch": 0.7554581735660068, "grad_norm": 0.0, "learning_rate": 2.9759063862007644e-06, "loss": 0.9832, "step": 19308 }, { "epoch": 0.7554973002582361, "grad_norm": 0.0, "learning_rate": 2.975004461456702e-06, "loss": 0.9842, "step": 19309 }, { "epoch": 0.7555364269504656, "grad_norm": 0.0, "learning_rate": 2.9741026495252657e-06, "loss": 0.8399, "step": 19310 }, { "epoch": 0.755575553642695, "grad_norm": 0.0, "learning_rate": 2.973200950420936e-06, "loss": 1.0709, "step": 19311 }, { "epoch": 0.7556146803349245, "grad_norm": 0.0, "learning_rate": 2.9722993641581975e-06, "loss": 1.1309, "step": 19312 }, { "epoch": 0.7556538070271539, "grad_norm": 0.0, "learning_rate": 2.9713978907515217e-06, "loss": 1.1648, "step": 19313 }, { "epoch": 0.7556929337193834, "grad_norm": 0.0, "learning_rate": 2.970496530215391e-06, "loss": 0.9144, "step": 19314 }, { "epoch": 0.7557320604116128, "grad_norm": 0.0, "learning_rate": 2.9695952825642725e-06, "loss": 0.9254, "step": 19315 }, { "epoch": 0.7557711871038423, "grad_norm": 0.0, "learning_rate": 2.9686941478126494e-06, "loss": 1.0266, "step": 19316 }, { "epoch": 0.7558103137960717, "grad_norm": 0.0, "learning_rate": 2.9677931259749846e-06, "loss": 0.999, "step": 19317 }, { "epoch": 0.7558494404883012, "grad_norm": 0.0, "learning_rate": 2.9668922170657543e-06, "loss": 0.9934, "step": 19318 }, { "epoch": 0.7558885671805305, "grad_norm": 0.0, "learning_rate": 2.9659914210994156e-06, "loss": 1.0214, "step": 19319 }, { "epoch": 0.75592769387276, "grad_norm": 0.0, "learning_rate": 2.965090738090446e-06, "loss": 1.0076, "step": 19320 }, { "epoch": 0.7559668205649894, "grad_norm": 0.0, "learning_rate": 2.9641901680533015e-06, "loss": 1.02, "step": 19321 }, { "epoch": 0.7560059472572189, "grad_norm": 0.0, "learning_rate": 2.9632897110024493e-06, "loss": 0.9674, "step": 19322 }, { "epoch": 0.7560450739494483, "grad_norm": 0.0, "learning_rate": 2.962389366952344e-06, "loss": 1.0697, "step": 19323 }, { "epoch": 0.7560842006416778, "grad_norm": 0.0, "learning_rate": 2.961489135917447e-06, "loss": 0.9569, "step": 19324 }, { "epoch": 0.7561233273339072, "grad_norm": 0.0, "learning_rate": 2.960589017912214e-06, "loss": 1.0197, "step": 19325 }, { "epoch": 0.7561624540261366, "grad_norm": 0.0, "learning_rate": 2.9596890129511047e-06, "loss": 0.8645, "step": 19326 }, { "epoch": 0.7562015807183661, "grad_norm": 0.0, "learning_rate": 2.9587891210485644e-06, "loss": 1.0314, "step": 19327 }, { "epoch": 0.7562407074105955, "grad_norm": 0.0, "learning_rate": 2.9578893422190467e-06, "loss": 0.9968, "step": 19328 }, { "epoch": 0.756279834102825, "grad_norm": 0.0, "learning_rate": 2.9569896764770024e-06, "loss": 1.017, "step": 19329 }, { "epoch": 0.7563189607950543, "grad_norm": 0.0, "learning_rate": 2.9560901238368823e-06, "loss": 0.9893, "step": 19330 }, { "epoch": 0.7563580874872838, "grad_norm": 0.0, "learning_rate": 2.955190684313124e-06, "loss": 1.0185, "step": 19331 }, { "epoch": 0.7563972141795132, "grad_norm": 0.0, "learning_rate": 2.9542913579201803e-06, "loss": 0.8972, "step": 19332 }, { "epoch": 0.7564363408717427, "grad_norm": 0.0, "learning_rate": 2.9533921446724813e-06, "loss": 1.0919, "step": 19333 }, { "epoch": 0.7564754675639721, "grad_norm": 0.0, "learning_rate": 2.9524930445844814e-06, "loss": 0.9686, "step": 19334 }, { "epoch": 0.7565145942562016, "grad_norm": 0.0, "learning_rate": 2.951594057670608e-06, "loss": 1.0355, "step": 19335 }, { "epoch": 0.756553720948431, "grad_norm": 0.0, "learning_rate": 2.9506951839453057e-06, "loss": 0.9854, "step": 19336 }, { "epoch": 0.7565928476406605, "grad_norm": 0.0, "learning_rate": 2.9497964234229993e-06, "loss": 0.8319, "step": 19337 }, { "epoch": 0.7566319743328899, "grad_norm": 0.0, "learning_rate": 2.9488977761181347e-06, "loss": 0.8911, "step": 19338 }, { "epoch": 0.7566711010251194, "grad_norm": 0.0, "learning_rate": 2.947999242045132e-06, "loss": 1.0679, "step": 19339 }, { "epoch": 0.7567102277173487, "grad_norm": 0.0, "learning_rate": 2.9471008212184295e-06, "loss": 1.0613, "step": 19340 }, { "epoch": 0.7567493544095782, "grad_norm": 0.0, "learning_rate": 2.9462025136524453e-06, "loss": 1.0529, "step": 19341 }, { "epoch": 0.7567884811018076, "grad_norm": 0.0, "learning_rate": 2.9453043193616103e-06, "loss": 1.0095, "step": 19342 }, { "epoch": 0.7568276077940371, "grad_norm": 0.0, "learning_rate": 2.944406238360349e-06, "loss": 1.0226, "step": 19343 }, { "epoch": 0.7568667344862665, "grad_norm": 0.0, "learning_rate": 2.9435082706630836e-06, "loss": 0.9529, "step": 19344 }, { "epoch": 0.756905861178496, "grad_norm": 0.0, "learning_rate": 2.9426104162842317e-06, "loss": 0.9747, "step": 19345 }, { "epoch": 0.7569449878707254, "grad_norm": 0.0, "learning_rate": 2.941712675238212e-06, "loss": 0.9931, "step": 19346 }, { "epoch": 0.7569841145629549, "grad_norm": 0.0, "learning_rate": 2.940815047539446e-06, "loss": 0.8206, "step": 19347 }, { "epoch": 0.7570232412551843, "grad_norm": 0.0, "learning_rate": 2.939917533202341e-06, "loss": 0.9673, "step": 19348 }, { "epoch": 0.7570623679474138, "grad_norm": 0.0, "learning_rate": 2.9390201322413137e-06, "loss": 0.9628, "step": 19349 }, { "epoch": 0.7571014946396432, "grad_norm": 0.0, "learning_rate": 2.9381228446707787e-06, "loss": 0.9402, "step": 19350 }, { "epoch": 0.7571406213318727, "grad_norm": 0.0, "learning_rate": 2.9372256705051384e-06, "loss": 0.908, "step": 19351 }, { "epoch": 0.757179748024102, "grad_norm": 0.0, "learning_rate": 2.936328609758804e-06, "loss": 1.0758, "step": 19352 }, { "epoch": 0.7572188747163315, "grad_norm": 0.0, "learning_rate": 2.9354316624461832e-06, "loss": 1.0237, "step": 19353 }, { "epoch": 0.7572580014085609, "grad_norm": 0.0, "learning_rate": 2.9345348285816755e-06, "loss": 0.9605, "step": 19354 }, { "epoch": 0.7572971281007903, "grad_norm": 0.0, "learning_rate": 2.933638108179684e-06, "loss": 0.946, "step": 19355 }, { "epoch": 0.7573362547930198, "grad_norm": 0.0, "learning_rate": 2.93274150125461e-06, "loss": 0.9592, "step": 19356 }, { "epoch": 0.7573753814852492, "grad_norm": 0.0, "learning_rate": 2.931845007820855e-06, "loss": 1.1512, "step": 19357 }, { "epoch": 0.7574145081774787, "grad_norm": 0.0, "learning_rate": 2.930948627892809e-06, "loss": 0.9173, "step": 19358 }, { "epoch": 0.7574536348697081, "grad_norm": 0.0, "learning_rate": 2.9300523614848743e-06, "loss": 1.029, "step": 19359 }, { "epoch": 0.7574927615619376, "grad_norm": 0.0, "learning_rate": 2.9291562086114322e-06, "loss": 0.917, "step": 19360 }, { "epoch": 0.757531888254167, "grad_norm": 0.0, "learning_rate": 2.9282601692868873e-06, "loss": 0.8929, "step": 19361 }, { "epoch": 0.7575710149463964, "grad_norm": 0.0, "learning_rate": 2.927364243525619e-06, "loss": 1.0453, "step": 19362 }, { "epoch": 0.7576101416386258, "grad_norm": 0.0, "learning_rate": 2.9264684313420224e-06, "loss": 1.0216, "step": 19363 }, { "epoch": 0.7576492683308553, "grad_norm": 0.0, "learning_rate": 2.9255727327504735e-06, "loss": 1.0316, "step": 19364 }, { "epoch": 0.7576883950230847, "grad_norm": 0.0, "learning_rate": 2.9246771477653666e-06, "loss": 0.9733, "step": 19365 }, { "epoch": 0.7577275217153142, "grad_norm": 0.0, "learning_rate": 2.9237816764010763e-06, "loss": 0.9006, "step": 19366 }, { "epoch": 0.7577666484075436, "grad_norm": 0.0, "learning_rate": 2.922886318671989e-06, "loss": 0.9083, "step": 19367 }, { "epoch": 0.7578057750997731, "grad_norm": 0.0, "learning_rate": 2.9219910745924764e-06, "loss": 0.9261, "step": 19368 }, { "epoch": 0.7578449017920025, "grad_norm": 0.0, "learning_rate": 2.921095944176916e-06, "loss": 1.1008, "step": 19369 }, { "epoch": 0.757884028484232, "grad_norm": 0.0, "learning_rate": 2.920200927439686e-06, "loss": 1.0097, "step": 19370 }, { "epoch": 0.7579231551764614, "grad_norm": 0.0, "learning_rate": 2.91930602439516e-06, "loss": 1.0206, "step": 19371 }, { "epoch": 0.7579622818686909, "grad_norm": 0.0, "learning_rate": 2.918411235057704e-06, "loss": 0.9542, "step": 19372 }, { "epoch": 0.7580014085609202, "grad_norm": 0.0, "learning_rate": 2.91751655944169e-06, "loss": 1.0056, "step": 19373 }, { "epoch": 0.7580405352531497, "grad_norm": 0.0, "learning_rate": 2.9166219975614852e-06, "loss": 0.9725, "step": 19374 }, { "epoch": 0.7580796619453791, "grad_norm": 0.0, "learning_rate": 2.9157275494314576e-06, "loss": 0.9466, "step": 19375 }, { "epoch": 0.7581187886376086, "grad_norm": 0.0, "learning_rate": 2.914833215065965e-06, "loss": 0.9422, "step": 19376 }, { "epoch": 0.758157915329838, "grad_norm": 0.0, "learning_rate": 2.913938994479374e-06, "loss": 0.8899, "step": 19377 }, { "epoch": 0.7581970420220675, "grad_norm": 0.0, "learning_rate": 2.9130448876860427e-06, "loss": 1.0073, "step": 19378 }, { "epoch": 0.7582361687142969, "grad_norm": 0.0, "learning_rate": 2.9121508947003343e-06, "loss": 0.9684, "step": 19379 }, { "epoch": 0.7582752954065264, "grad_norm": 0.0, "learning_rate": 2.911257015536596e-06, "loss": 0.8301, "step": 19380 }, { "epoch": 0.7583144220987558, "grad_norm": 0.0, "learning_rate": 2.9103632502091917e-06, "loss": 1.1815, "step": 19381 }, { "epoch": 0.7583535487909852, "grad_norm": 0.0, "learning_rate": 2.9094695987324627e-06, "loss": 1.0489, "step": 19382 }, { "epoch": 0.7583926754832147, "grad_norm": 0.0, "learning_rate": 2.9085760611207736e-06, "loss": 0.9843, "step": 19383 }, { "epoch": 0.758431802175444, "grad_norm": 0.0, "learning_rate": 2.9076826373884647e-06, "loss": 0.9286, "step": 19384 }, { "epoch": 0.7584709288676735, "grad_norm": 0.0, "learning_rate": 2.9067893275498872e-06, "loss": 0.9798, "step": 19385 }, { "epoch": 0.7585100555599029, "grad_norm": 0.0, "learning_rate": 2.9058961316193823e-06, "loss": 0.9629, "step": 19386 }, { "epoch": 0.7585491822521324, "grad_norm": 0.0, "learning_rate": 2.905003049611297e-06, "loss": 0.9696, "step": 19387 }, { "epoch": 0.7585883089443618, "grad_norm": 0.0, "learning_rate": 2.9041100815399715e-06, "loss": 0.9404, "step": 19388 }, { "epoch": 0.7586274356365913, "grad_norm": 0.0, "learning_rate": 2.903217227419749e-06, "loss": 1.0674, "step": 19389 }, { "epoch": 0.7586665623288207, "grad_norm": 0.0, "learning_rate": 2.9023244872649626e-06, "loss": 0.9165, "step": 19390 }, { "epoch": 0.7587056890210502, "grad_norm": 0.0, "learning_rate": 2.9014318610899504e-06, "loss": 0.9888, "step": 19391 }, { "epoch": 0.7587448157132796, "grad_norm": 0.0, "learning_rate": 2.9005393489090506e-06, "loss": 0.9481, "step": 19392 }, { "epoch": 0.7587839424055091, "grad_norm": 0.0, "learning_rate": 2.89964695073659e-06, "loss": 1.0099, "step": 19393 }, { "epoch": 0.7588230690977384, "grad_norm": 0.0, "learning_rate": 2.8987546665869025e-06, "loss": 0.876, "step": 19394 }, { "epoch": 0.7588621957899679, "grad_norm": 0.0, "learning_rate": 2.897862496474316e-06, "loss": 1.0276, "step": 19395 }, { "epoch": 0.7589013224821973, "grad_norm": 0.0, "learning_rate": 2.8969704404131626e-06, "loss": 0.973, "step": 19396 }, { "epoch": 0.7589404491744268, "grad_norm": 0.0, "learning_rate": 2.8960784984177594e-06, "loss": 0.9266, "step": 19397 }, { "epoch": 0.7589795758666562, "grad_norm": 0.0, "learning_rate": 2.8951866705024366e-06, "loss": 1.0271, "step": 19398 }, { "epoch": 0.7590187025588857, "grad_norm": 0.0, "learning_rate": 2.8942949566815103e-06, "loss": 1.0266, "step": 19399 }, { "epoch": 0.7590578292511151, "grad_norm": 0.0, "learning_rate": 2.893403356969303e-06, "loss": 1.0404, "step": 19400 }, { "epoch": 0.7590969559433446, "grad_norm": 0.0, "learning_rate": 2.8925118713801325e-06, "loss": 0.9104, "step": 19401 }, { "epoch": 0.759136082635574, "grad_norm": 0.0, "learning_rate": 2.8916204999283184e-06, "loss": 0.9929, "step": 19402 }, { "epoch": 0.7591752093278035, "grad_norm": 0.0, "learning_rate": 2.8907292426281686e-06, "loss": 0.8789, "step": 19403 }, { "epoch": 0.7592143360200329, "grad_norm": 0.0, "learning_rate": 2.889838099493999e-06, "loss": 0.919, "step": 19404 }, { "epoch": 0.7592534627122624, "grad_norm": 0.0, "learning_rate": 2.88894707054012e-06, "loss": 0.8449, "step": 19405 }, { "epoch": 0.7592925894044917, "grad_norm": 0.0, "learning_rate": 2.888056155780844e-06, "loss": 0.9841, "step": 19406 }, { "epoch": 0.7593317160967212, "grad_norm": 0.0, "learning_rate": 2.8871653552304703e-06, "loss": 0.9784, "step": 19407 }, { "epoch": 0.7593708427889506, "grad_norm": 0.0, "learning_rate": 2.8862746689033117e-06, "loss": 1.0973, "step": 19408 }, { "epoch": 0.7594099694811801, "grad_norm": 0.0, "learning_rate": 2.8853840968136614e-06, "loss": 0.8254, "step": 19409 }, { "epoch": 0.7594490961734095, "grad_norm": 0.0, "learning_rate": 2.8844936389758337e-06, "loss": 1.0434, "step": 19410 }, { "epoch": 0.7594882228656389, "grad_norm": 0.0, "learning_rate": 2.8836032954041194e-06, "loss": 0.8993, "step": 19411 }, { "epoch": 0.7595273495578684, "grad_norm": 0.0, "learning_rate": 2.882713066112821e-06, "loss": 1.0246, "step": 19412 }, { "epoch": 0.7595664762500978, "grad_norm": 0.0, "learning_rate": 2.8818229511162265e-06, "loss": 0.9277, "step": 19413 }, { "epoch": 0.7596056029423273, "grad_norm": 0.0, "learning_rate": 2.880932950428642e-06, "loss": 0.9418, "step": 19414 }, { "epoch": 0.7596447296345566, "grad_norm": 0.0, "learning_rate": 2.8800430640643507e-06, "loss": 0.9599, "step": 19415 }, { "epoch": 0.7596838563267861, "grad_norm": 0.0, "learning_rate": 2.8791532920376496e-06, "loss": 0.935, "step": 19416 }, { "epoch": 0.7597229830190155, "grad_norm": 0.0, "learning_rate": 2.8782636343628203e-06, "loss": 1.034, "step": 19417 }, { "epoch": 0.759762109711245, "grad_norm": 0.0, "learning_rate": 2.8773740910541524e-06, "loss": 0.987, "step": 19418 }, { "epoch": 0.7598012364034744, "grad_norm": 0.0, "learning_rate": 2.8764846621259313e-06, "loss": 0.919, "step": 19419 }, { "epoch": 0.7598403630957039, "grad_norm": 0.0, "learning_rate": 2.8755953475924447e-06, "loss": 0.8838, "step": 19420 }, { "epoch": 0.7598794897879333, "grad_norm": 0.0, "learning_rate": 2.874706147467965e-06, "loss": 1.1432, "step": 19421 }, { "epoch": 0.7599186164801628, "grad_norm": 0.0, "learning_rate": 2.873817061766776e-06, "loss": 1.0025, "step": 19422 }, { "epoch": 0.7599577431723922, "grad_norm": 0.0, "learning_rate": 2.8729280905031563e-06, "loss": 0.9006, "step": 19423 }, { "epoch": 0.7599968698646217, "grad_norm": 0.0, "learning_rate": 2.872039233691384e-06, "loss": 0.9237, "step": 19424 }, { "epoch": 0.7600359965568511, "grad_norm": 0.0, "learning_rate": 2.8711504913457256e-06, "loss": 0.9897, "step": 19425 }, { "epoch": 0.7600751232490806, "grad_norm": 0.0, "learning_rate": 2.8702618634804613e-06, "loss": 1.0299, "step": 19426 }, { "epoch": 0.7601142499413099, "grad_norm": 0.0, "learning_rate": 2.869373350109851e-06, "loss": 0.9894, "step": 19427 }, { "epoch": 0.7601533766335394, "grad_norm": 0.0, "learning_rate": 2.868484951248175e-06, "loss": 0.9211, "step": 19428 }, { "epoch": 0.7601925033257688, "grad_norm": 0.0, "learning_rate": 2.867596666909692e-06, "loss": 0.9143, "step": 19429 }, { "epoch": 0.7602316300179983, "grad_norm": 0.0, "learning_rate": 2.8667084971086724e-06, "loss": 0.9202, "step": 19430 }, { "epoch": 0.7602707567102277, "grad_norm": 0.0, "learning_rate": 2.8658204418593726e-06, "loss": 0.9375, "step": 19431 }, { "epoch": 0.7603098834024572, "grad_norm": 0.0, "learning_rate": 2.8649325011760566e-06, "loss": 0.8796, "step": 19432 }, { "epoch": 0.7603490100946866, "grad_norm": 0.0, "learning_rate": 2.8640446750729846e-06, "loss": 0.9223, "step": 19433 }, { "epoch": 0.7603881367869161, "grad_norm": 0.0, "learning_rate": 2.863156963564415e-06, "loss": 0.9337, "step": 19434 }, { "epoch": 0.7604272634791455, "grad_norm": 0.0, "learning_rate": 2.8622693666645996e-06, "loss": 1.0615, "step": 19435 }, { "epoch": 0.760466390171375, "grad_norm": 0.0, "learning_rate": 2.861381884387794e-06, "loss": 1.033, "step": 19436 }, { "epoch": 0.7605055168636043, "grad_norm": 0.0, "learning_rate": 2.8604945167482532e-06, "loss": 0.9799, "step": 19437 }, { "epoch": 0.7605446435558338, "grad_norm": 0.0, "learning_rate": 2.8596072637602213e-06, "loss": 0.9118, "step": 19438 }, { "epoch": 0.7605837702480632, "grad_norm": 0.0, "learning_rate": 2.858720125437948e-06, "loss": 1.0078, "step": 19439 }, { "epoch": 0.7606228969402926, "grad_norm": 0.0, "learning_rate": 2.857833101795683e-06, "loss": 1.0138, "step": 19440 }, { "epoch": 0.7606620236325221, "grad_norm": 0.0, "learning_rate": 2.8569461928476703e-06, "loss": 1.0001, "step": 19441 }, { "epoch": 0.7607011503247515, "grad_norm": 0.0, "learning_rate": 2.8560593986081484e-06, "loss": 0.9146, "step": 19442 }, { "epoch": 0.760740277016981, "grad_norm": 0.0, "learning_rate": 2.85517271909136e-06, "loss": 0.9715, "step": 19443 }, { "epoch": 0.7607794037092104, "grad_norm": 0.0, "learning_rate": 2.8542861543115462e-06, "loss": 0.9286, "step": 19444 }, { "epoch": 0.7608185304014399, "grad_norm": 0.0, "learning_rate": 2.8533997042829444e-06, "loss": 1.0238, "step": 19445 }, { "epoch": 0.7608576570936693, "grad_norm": 0.0, "learning_rate": 2.8525133690197857e-06, "loss": 0.9342, "step": 19446 }, { "epoch": 0.7608967837858988, "grad_norm": 0.0, "learning_rate": 2.851627148536309e-06, "loss": 0.96, "step": 19447 }, { "epoch": 0.7609359104781281, "grad_norm": 0.0, "learning_rate": 2.8507410428467395e-06, "loss": 0.9323, "step": 19448 }, { "epoch": 0.7609750371703576, "grad_norm": 0.0, "learning_rate": 2.849855051965311e-06, "loss": 0.9437, "step": 19449 }, { "epoch": 0.761014163862587, "grad_norm": 0.0, "learning_rate": 2.848969175906251e-06, "loss": 0.979, "step": 19450 }, { "epoch": 0.7610532905548165, "grad_norm": 0.0, "learning_rate": 2.8480834146837877e-06, "loss": 1.032, "step": 19451 }, { "epoch": 0.7610924172470459, "grad_norm": 0.0, "learning_rate": 2.84719776831214e-06, "loss": 0.9919, "step": 19452 }, { "epoch": 0.7611315439392754, "grad_norm": 0.0, "learning_rate": 2.846312236805533e-06, "loss": 0.8361, "step": 19453 }, { "epoch": 0.7611706706315048, "grad_norm": 0.0, "learning_rate": 2.8454268201781876e-06, "loss": 1.0078, "step": 19454 }, { "epoch": 0.7612097973237343, "grad_norm": 0.0, "learning_rate": 2.8445415184443248e-06, "loss": 1.0372, "step": 19455 }, { "epoch": 0.7612489240159637, "grad_norm": 0.0, "learning_rate": 2.8436563316181567e-06, "loss": 1.0199, "step": 19456 }, { "epoch": 0.7612880507081932, "grad_norm": 0.0, "learning_rate": 2.8427712597139032e-06, "loss": 0.9778, "step": 19457 }, { "epoch": 0.7613271774004226, "grad_norm": 0.0, "learning_rate": 2.841886302745769e-06, "loss": 1.0591, "step": 19458 }, { "epoch": 0.761366304092652, "grad_norm": 0.0, "learning_rate": 2.8410014607279767e-06, "loss": 0.9507, "step": 19459 }, { "epoch": 0.7614054307848814, "grad_norm": 0.0, "learning_rate": 2.840116733674727e-06, "loss": 0.9754, "step": 19460 }, { "epoch": 0.7614445574771109, "grad_norm": 0.0, "learning_rate": 2.839232121600234e-06, "loss": 1.0062, "step": 19461 }, { "epoch": 0.7614836841693403, "grad_norm": 0.0, "learning_rate": 2.8383476245186946e-06, "loss": 0.8908, "step": 19462 }, { "epoch": 0.7615228108615698, "grad_norm": 0.0, "learning_rate": 2.8374632424443236e-06, "loss": 1.0203, "step": 19463 }, { "epoch": 0.7615619375537992, "grad_norm": 0.0, "learning_rate": 2.8365789753913154e-06, "loss": 1.0141, "step": 19464 }, { "epoch": 0.7616010642460287, "grad_norm": 0.0, "learning_rate": 2.8356948233738746e-06, "loss": 0.9947, "step": 19465 }, { "epoch": 0.7616401909382581, "grad_norm": 0.0, "learning_rate": 2.834810786406196e-06, "loss": 0.8395, "step": 19466 }, { "epoch": 0.7616793176304876, "grad_norm": 0.0, "learning_rate": 2.8339268645024766e-06, "loss": 1.0071, "step": 19467 }, { "epoch": 0.761718444322717, "grad_norm": 0.0, "learning_rate": 2.833043057676913e-06, "loss": 0.9079, "step": 19468 }, { "epoch": 0.7617575710149463, "grad_norm": 0.0, "learning_rate": 2.8321593659436998e-06, "loss": 0.9971, "step": 19469 }, { "epoch": 0.7617966977071758, "grad_norm": 0.0, "learning_rate": 2.8312757893170216e-06, "loss": 0.9043, "step": 19470 }, { "epoch": 0.7618358243994052, "grad_norm": 0.0, "learning_rate": 2.8303923278110724e-06, "loss": 0.8658, "step": 19471 }, { "epoch": 0.7618749510916347, "grad_norm": 0.0, "learning_rate": 2.829508981440038e-06, "loss": 0.9991, "step": 19472 }, { "epoch": 0.7619140777838641, "grad_norm": 0.0, "learning_rate": 2.828625750218107e-06, "loss": 1.0527, "step": 19473 }, { "epoch": 0.7619532044760936, "grad_norm": 0.0, "learning_rate": 2.8277426341594572e-06, "loss": 1.1654, "step": 19474 }, { "epoch": 0.761992331168323, "grad_norm": 0.0, "learning_rate": 2.826859633278277e-06, "loss": 0.9797, "step": 19475 }, { "epoch": 0.7620314578605525, "grad_norm": 0.0, "learning_rate": 2.8259767475887355e-06, "loss": 0.9324, "step": 19476 }, { "epoch": 0.7620705845527819, "grad_norm": 0.0, "learning_rate": 2.8250939771050257e-06, "loss": 0.9127, "step": 19477 }, { "epoch": 0.7621097112450114, "grad_norm": 0.0, "learning_rate": 2.8242113218413115e-06, "loss": 0.934, "step": 19478 }, { "epoch": 0.7621488379372408, "grad_norm": 0.0, "learning_rate": 2.823328781811775e-06, "loss": 0.9915, "step": 19479 }, { "epoch": 0.7621879646294702, "grad_norm": 0.0, "learning_rate": 2.8224463570305828e-06, "loss": 1.0238, "step": 19480 }, { "epoch": 0.7622270913216996, "grad_norm": 0.0, "learning_rate": 2.8215640475119077e-06, "loss": 0.9824, "step": 19481 }, { "epoch": 0.7622662180139291, "grad_norm": 0.0, "learning_rate": 2.8206818532699186e-06, "loss": 1.0065, "step": 19482 }, { "epoch": 0.7623053447061585, "grad_norm": 0.0, "learning_rate": 2.8197997743187867e-06, "loss": 0.9266, "step": 19483 }, { "epoch": 0.762344471398388, "grad_norm": 0.0, "learning_rate": 2.818917810672669e-06, "loss": 1.028, "step": 19484 }, { "epoch": 0.7623835980906174, "grad_norm": 0.0, "learning_rate": 2.8180359623457345e-06, "loss": 0.947, "step": 19485 }, { "epoch": 0.7624227247828469, "grad_norm": 0.0, "learning_rate": 2.817154229352145e-06, "loss": 1.0063, "step": 19486 }, { "epoch": 0.7624618514750763, "grad_norm": 0.0, "learning_rate": 2.816272611706055e-06, "loss": 1.0551, "step": 19487 }, { "epoch": 0.7625009781673058, "grad_norm": 0.0, "learning_rate": 2.8153911094216246e-06, "loss": 0.996, "step": 19488 }, { "epoch": 0.7625401048595352, "grad_norm": 0.0, "learning_rate": 2.8145097225130104e-06, "loss": 1.0496, "step": 19489 }, { "epoch": 0.7625792315517647, "grad_norm": 0.0, "learning_rate": 2.813628450994369e-06, "loss": 0.8943, "step": 19490 }, { "epoch": 0.762618358243994, "grad_norm": 0.0, "learning_rate": 2.8127472948798474e-06, "loss": 0.9879, "step": 19491 }, { "epoch": 0.7626574849362235, "grad_norm": 0.0, "learning_rate": 2.8118662541836006e-06, "loss": 0.9638, "step": 19492 }, { "epoch": 0.7626966116284529, "grad_norm": 0.0, "learning_rate": 2.8109853289197685e-06, "loss": 1.0597, "step": 19493 }, { "epoch": 0.7627357383206824, "grad_norm": 0.0, "learning_rate": 2.81010451910251e-06, "loss": 1.0113, "step": 19494 }, { "epoch": 0.7627748650129118, "grad_norm": 0.0, "learning_rate": 2.80922382474596e-06, "loss": 0.9549, "step": 19495 }, { "epoch": 0.7628139917051412, "grad_norm": 0.0, "learning_rate": 2.808343245864268e-06, "loss": 1.0129, "step": 19496 }, { "epoch": 0.7628531183973707, "grad_norm": 0.0, "learning_rate": 2.8074627824715683e-06, "loss": 0.8345, "step": 19497 }, { "epoch": 0.7628922450896001, "grad_norm": 0.0, "learning_rate": 2.8065824345820048e-06, "loss": 0.8701, "step": 19498 }, { "epoch": 0.7629313717818296, "grad_norm": 0.0, "learning_rate": 2.8057022022097125e-06, "loss": 0.8839, "step": 19499 }, { "epoch": 0.762970498474059, "grad_norm": 0.0, "learning_rate": 2.8048220853688314e-06, "loss": 0.888, "step": 19500 }, { "epoch": 0.7630096251662885, "grad_norm": 0.0, "learning_rate": 2.8039420840734887e-06, "loss": 1.0005, "step": 19501 }, { "epoch": 0.7630487518585178, "grad_norm": 0.0, "learning_rate": 2.803062198337818e-06, "loss": 0.8009, "step": 19502 }, { "epoch": 0.7630878785507473, "grad_norm": 0.0, "learning_rate": 2.8021824281759514e-06, "loss": 0.984, "step": 19503 }, { "epoch": 0.7631270052429767, "grad_norm": 0.0, "learning_rate": 2.801302773602018e-06, "loss": 1.0792, "step": 19504 }, { "epoch": 0.7631661319352062, "grad_norm": 0.0, "learning_rate": 2.8004232346301384e-06, "loss": 0.9613, "step": 19505 }, { "epoch": 0.7632052586274356, "grad_norm": 0.0, "learning_rate": 2.799543811274443e-06, "loss": 0.9949, "step": 19506 }, { "epoch": 0.7632443853196651, "grad_norm": 0.0, "learning_rate": 2.798664503549047e-06, "loss": 0.9873, "step": 19507 }, { "epoch": 0.7632835120118945, "grad_norm": 0.0, "learning_rate": 2.7977853114680796e-06, "loss": 1.0979, "step": 19508 }, { "epoch": 0.763322638704124, "grad_norm": 0.0, "learning_rate": 2.7969062350456522e-06, "loss": 0.9472, "step": 19509 }, { "epoch": 0.7633617653963534, "grad_norm": 0.0, "learning_rate": 2.796027274295888e-06, "loss": 1.0439, "step": 19510 }, { "epoch": 0.7634008920885829, "grad_norm": 0.0, "learning_rate": 2.7951484292328925e-06, "loss": 0.9558, "step": 19511 }, { "epoch": 0.7634400187808122, "grad_norm": 0.0, "learning_rate": 2.7942696998707918e-06, "loss": 0.8894, "step": 19512 }, { "epoch": 0.7634791454730417, "grad_norm": 0.0, "learning_rate": 2.793391086223687e-06, "loss": 1.0409, "step": 19513 }, { "epoch": 0.7635182721652711, "grad_norm": 0.0, "learning_rate": 2.7925125883056936e-06, "loss": 1.0362, "step": 19514 }, { "epoch": 0.7635573988575006, "grad_norm": 0.0, "learning_rate": 2.791634206130913e-06, "loss": 0.965, "step": 19515 }, { "epoch": 0.76359652554973, "grad_norm": 0.0, "learning_rate": 2.7907559397134554e-06, "loss": 1.0666, "step": 19516 }, { "epoch": 0.7636356522419595, "grad_norm": 0.0, "learning_rate": 2.7898777890674246e-06, "loss": 1.0122, "step": 19517 }, { "epoch": 0.7636747789341889, "grad_norm": 0.0, "learning_rate": 2.7889997542069234e-06, "loss": 0.9614, "step": 19518 }, { "epoch": 0.7637139056264184, "grad_norm": 0.0, "learning_rate": 2.7881218351460473e-06, "loss": 0.9126, "step": 19519 }, { "epoch": 0.7637530323186478, "grad_norm": 0.0, "learning_rate": 2.787244031898898e-06, "loss": 0.9706, "step": 19520 }, { "epoch": 0.7637921590108773, "grad_norm": 0.0, "learning_rate": 2.7863663444795706e-06, "loss": 1.0919, "step": 19521 }, { "epoch": 0.7638312857031067, "grad_norm": 0.0, "learning_rate": 2.7854887729021652e-06, "loss": 1.1076, "step": 19522 }, { "epoch": 0.7638704123953362, "grad_norm": 0.0, "learning_rate": 2.7846113171807656e-06, "loss": 0.9488, "step": 19523 }, { "epoch": 0.7639095390875655, "grad_norm": 0.0, "learning_rate": 2.7837339773294704e-06, "loss": 0.9853, "step": 19524 }, { "epoch": 0.7639486657797949, "grad_norm": 0.0, "learning_rate": 2.782856753362361e-06, "loss": 1.1, "step": 19525 }, { "epoch": 0.7639877924720244, "grad_norm": 0.0, "learning_rate": 2.7819796452935286e-06, "loss": 0.9706, "step": 19526 }, { "epoch": 0.7640269191642538, "grad_norm": 0.0, "learning_rate": 2.78110265313706e-06, "loss": 0.8768, "step": 19527 }, { "epoch": 0.7640660458564833, "grad_norm": 0.0, "learning_rate": 2.7802257769070384e-06, "loss": 1.0143, "step": 19528 }, { "epoch": 0.7641051725487127, "grad_norm": 0.0, "learning_rate": 2.779349016617542e-06, "loss": 1.0145, "step": 19529 }, { "epoch": 0.7641442992409422, "grad_norm": 0.0, "learning_rate": 2.7784723722826522e-06, "loss": 0.9642, "step": 19530 }, { "epoch": 0.7641834259331716, "grad_norm": 0.0, "learning_rate": 2.7775958439164496e-06, "loss": 0.8903, "step": 19531 }, { "epoch": 0.7642225526254011, "grad_norm": 0.0, "learning_rate": 2.7767194315330047e-06, "loss": 0.9504, "step": 19532 }, { "epoch": 0.7642616793176304, "grad_norm": 0.0, "learning_rate": 2.7758431351463944e-06, "loss": 0.8998, "step": 19533 }, { "epoch": 0.76430080600986, "grad_norm": 0.0, "learning_rate": 2.7749669547706914e-06, "loss": 1.039, "step": 19534 }, { "epoch": 0.7643399327020893, "grad_norm": 0.0, "learning_rate": 2.7740908904199683e-06, "loss": 0.9895, "step": 19535 }, { "epoch": 0.7643790593943188, "grad_norm": 0.0, "learning_rate": 2.773214942108288e-06, "loss": 1.1109, "step": 19536 }, { "epoch": 0.7644181860865482, "grad_norm": 0.0, "learning_rate": 2.772339109849723e-06, "loss": 0.9072, "step": 19537 }, { "epoch": 0.7644573127787777, "grad_norm": 0.0, "learning_rate": 2.771463393658329e-06, "loss": 0.9974, "step": 19538 }, { "epoch": 0.7644964394710071, "grad_norm": 0.0, "learning_rate": 2.770587793548182e-06, "loss": 0.9331, "step": 19539 }, { "epoch": 0.7645355661632366, "grad_norm": 0.0, "learning_rate": 2.769712309533332e-06, "loss": 0.9418, "step": 19540 }, { "epoch": 0.764574692855466, "grad_norm": 0.0, "learning_rate": 2.768836941627846e-06, "loss": 0.8229, "step": 19541 }, { "epoch": 0.7646138195476955, "grad_norm": 0.0, "learning_rate": 2.767961689845774e-06, "loss": 1.0129, "step": 19542 }, { "epoch": 0.7646529462399249, "grad_norm": 0.0, "learning_rate": 2.767086554201175e-06, "loss": 1.0043, "step": 19543 }, { "epoch": 0.7646920729321544, "grad_norm": 0.0, "learning_rate": 2.766211534708102e-06, "loss": 0.8429, "step": 19544 }, { "epoch": 0.7647311996243837, "grad_norm": 0.0, "learning_rate": 2.7653366313806117e-06, "loss": 1.1288, "step": 19545 }, { "epoch": 0.7647703263166132, "grad_norm": 0.0, "learning_rate": 2.764461844232745e-06, "loss": 0.926, "step": 19546 }, { "epoch": 0.7648094530088426, "grad_norm": 0.0, "learning_rate": 2.7635871732785557e-06, "loss": 0.89, "step": 19547 }, { "epoch": 0.7648485797010721, "grad_norm": 0.0, "learning_rate": 2.7627126185320884e-06, "loss": 0.9813, "step": 19548 }, { "epoch": 0.7648877063933015, "grad_norm": 0.0, "learning_rate": 2.76183818000739e-06, "loss": 1.02, "step": 19549 }, { "epoch": 0.764926833085531, "grad_norm": 0.0, "learning_rate": 2.7609638577184982e-06, "loss": 0.8663, "step": 19550 }, { "epoch": 0.7649659597777604, "grad_norm": 0.0, "learning_rate": 2.7600896516794563e-06, "loss": 0.9328, "step": 19551 }, { "epoch": 0.7650050864699899, "grad_norm": 0.0, "learning_rate": 2.7592155619043015e-06, "loss": 0.9861, "step": 19552 }, { "epoch": 0.7650442131622193, "grad_norm": 0.0, "learning_rate": 2.758341588407075e-06, "loss": 1.01, "step": 19553 }, { "epoch": 0.7650833398544487, "grad_norm": 0.0, "learning_rate": 2.757467731201805e-06, "loss": 0.9829, "step": 19554 }, { "epoch": 0.7651224665466781, "grad_norm": 0.0, "learning_rate": 2.7565939903025305e-06, "loss": 0.8591, "step": 19555 }, { "epoch": 0.7651615932389075, "grad_norm": 0.0, "learning_rate": 2.7557203657232757e-06, "loss": 0.9805, "step": 19556 }, { "epoch": 0.765200719931137, "grad_norm": 0.0, "learning_rate": 2.7548468574780784e-06, "loss": 0.88, "step": 19557 }, { "epoch": 0.7652398466233664, "grad_norm": 0.0, "learning_rate": 2.7539734655809604e-06, "loss": 0.8987, "step": 19558 }, { "epoch": 0.7652789733155959, "grad_norm": 0.0, "learning_rate": 2.75310019004595e-06, "loss": 0.9941, "step": 19559 }, { "epoch": 0.7653181000078253, "grad_norm": 0.0, "learning_rate": 2.7522270308870647e-06, "loss": 0.9576, "step": 19560 }, { "epoch": 0.7653572267000548, "grad_norm": 0.0, "learning_rate": 2.7513539881183373e-06, "loss": 1.0488, "step": 19561 }, { "epoch": 0.7653963533922842, "grad_norm": 0.0, "learning_rate": 2.7504810617537793e-06, "loss": 0.8735, "step": 19562 }, { "epoch": 0.7654354800845137, "grad_norm": 0.0, "learning_rate": 2.749608251807413e-06, "loss": 0.9283, "step": 19563 }, { "epoch": 0.7654746067767431, "grad_norm": 0.0, "learning_rate": 2.7487355582932505e-06, "loss": 1.0076, "step": 19564 }, { "epoch": 0.7655137334689726, "grad_norm": 0.0, "learning_rate": 2.747862981225309e-06, "loss": 0.9595, "step": 19565 }, { "epoch": 0.7655528601612019, "grad_norm": 0.0, "learning_rate": 2.7469905206176006e-06, "loss": 1.0342, "step": 19566 }, { "epoch": 0.7655919868534314, "grad_norm": 0.0, "learning_rate": 2.7461181764841383e-06, "loss": 1.056, "step": 19567 }, { "epoch": 0.7656311135456608, "grad_norm": 0.0, "learning_rate": 2.7452459488389262e-06, "loss": 0.9467, "step": 19568 }, { "epoch": 0.7656702402378903, "grad_norm": 0.0, "learning_rate": 2.744373837695973e-06, "loss": 0.9302, "step": 19569 }, { "epoch": 0.7657093669301197, "grad_norm": 0.0, "learning_rate": 2.743501843069286e-06, "loss": 0.9192, "step": 19570 }, { "epoch": 0.7657484936223492, "grad_norm": 0.0, "learning_rate": 2.742629964972865e-06, "loss": 0.9818, "step": 19571 }, { "epoch": 0.7657876203145786, "grad_norm": 0.0, "learning_rate": 2.7417582034207122e-06, "loss": 1.0778, "step": 19572 }, { "epoch": 0.7658267470068081, "grad_norm": 0.0, "learning_rate": 2.7408865584268305e-06, "loss": 1.0345, "step": 19573 }, { "epoch": 0.7658658736990375, "grad_norm": 0.0, "learning_rate": 2.740015030005212e-06, "loss": 0.977, "step": 19574 }, { "epoch": 0.765905000391267, "grad_norm": 0.0, "learning_rate": 2.739143618169855e-06, "loss": 1.0043, "step": 19575 }, { "epoch": 0.7659441270834964, "grad_norm": 0.0, "learning_rate": 2.738272322934756e-06, "loss": 1.0146, "step": 19576 }, { "epoch": 0.7659832537757258, "grad_norm": 0.0, "learning_rate": 2.7374011443139004e-06, "loss": 1.0191, "step": 19577 }, { "epoch": 0.7660223804679552, "grad_norm": 0.0, "learning_rate": 2.7365300823212826e-06, "loss": 0.8629, "step": 19578 }, { "epoch": 0.7660615071601847, "grad_norm": 0.0, "learning_rate": 2.7356591369708894e-06, "loss": 0.9287, "step": 19579 }, { "epoch": 0.7661006338524141, "grad_norm": 0.0, "learning_rate": 2.7347883082767113e-06, "loss": 1.0492, "step": 19580 }, { "epoch": 0.7661397605446436, "grad_norm": 0.0, "learning_rate": 2.7339175962527263e-06, "loss": 1.0728, "step": 19581 }, { "epoch": 0.766178887236873, "grad_norm": 0.0, "learning_rate": 2.7330470009129217e-06, "loss": 0.9308, "step": 19582 }, { "epoch": 0.7662180139291024, "grad_norm": 0.0, "learning_rate": 2.7321765222712717e-06, "loss": 0.9365, "step": 19583 }, { "epoch": 0.7662571406213319, "grad_norm": 0.0, "learning_rate": 2.7313061603417646e-06, "loss": 0.9921, "step": 19584 }, { "epoch": 0.7662962673135613, "grad_norm": 0.0, "learning_rate": 2.7304359151383697e-06, "loss": 0.9699, "step": 19585 }, { "epoch": 0.7663353940057908, "grad_norm": 0.0, "learning_rate": 2.729565786675068e-06, "loss": 1.0844, "step": 19586 }, { "epoch": 0.7663745206980201, "grad_norm": 0.0, "learning_rate": 2.728695774965823e-06, "loss": 1.0976, "step": 19587 }, { "epoch": 0.7664136473902496, "grad_norm": 0.0, "learning_rate": 2.7278258800246184e-06, "loss": 0.9509, "step": 19588 }, { "epoch": 0.766452774082479, "grad_norm": 0.0, "learning_rate": 2.7269561018654146e-06, "loss": 1.0366, "step": 19589 }, { "epoch": 0.7664919007747085, "grad_norm": 0.0, "learning_rate": 2.726086440502186e-06, "loss": 1.0735, "step": 19590 }, { "epoch": 0.7665310274669379, "grad_norm": 0.0, "learning_rate": 2.72521689594889e-06, "loss": 0.8486, "step": 19591 }, { "epoch": 0.7665701541591674, "grad_norm": 0.0, "learning_rate": 2.724347468219496e-06, "loss": 1.1319, "step": 19592 }, { "epoch": 0.7666092808513968, "grad_norm": 0.0, "learning_rate": 2.7234781573279645e-06, "loss": 0.9738, "step": 19593 }, { "epoch": 0.7666484075436263, "grad_norm": 0.0, "learning_rate": 2.722608963288258e-06, "loss": 0.9683, "step": 19594 }, { "epoch": 0.7666875342358557, "grad_norm": 0.0, "learning_rate": 2.7217398861143306e-06, "loss": 0.9966, "step": 19595 }, { "epoch": 0.7667266609280852, "grad_norm": 0.0, "learning_rate": 2.720870925820139e-06, "loss": 0.9123, "step": 19596 }, { "epoch": 0.7667657876203146, "grad_norm": 0.0, "learning_rate": 2.7200020824196404e-06, "loss": 0.9327, "step": 19597 }, { "epoch": 0.766804914312544, "grad_norm": 0.0, "learning_rate": 2.7191333559267895e-06, "loss": 0.8216, "step": 19598 }, { "epoch": 0.7668440410047734, "grad_norm": 0.0, "learning_rate": 2.718264746355529e-06, "loss": 0.9723, "step": 19599 }, { "epoch": 0.7668831676970029, "grad_norm": 0.0, "learning_rate": 2.717396253719816e-06, "loss": 0.8924, "step": 19600 }, { "epoch": 0.7669222943892323, "grad_norm": 0.0, "learning_rate": 2.716527878033588e-06, "loss": 0.8304, "step": 19601 }, { "epoch": 0.7669614210814618, "grad_norm": 0.0, "learning_rate": 2.715659619310801e-06, "loss": 1.0037, "step": 19602 }, { "epoch": 0.7670005477736912, "grad_norm": 0.0, "learning_rate": 2.7147914775653896e-06, "loss": 1.0439, "step": 19603 }, { "epoch": 0.7670396744659207, "grad_norm": 0.0, "learning_rate": 2.713923452811301e-06, "loss": 1.0696, "step": 19604 }, { "epoch": 0.7670788011581501, "grad_norm": 0.0, "learning_rate": 2.713055545062465e-06, "loss": 0.9185, "step": 19605 }, { "epoch": 0.7671179278503796, "grad_norm": 0.0, "learning_rate": 2.7121877543328334e-06, "loss": 0.9703, "step": 19606 }, { "epoch": 0.767157054542609, "grad_norm": 0.0, "learning_rate": 2.7113200806363316e-06, "loss": 0.8329, "step": 19607 }, { "epoch": 0.7671961812348385, "grad_norm": 0.0, "learning_rate": 2.710452523986897e-06, "loss": 1.005, "step": 19608 }, { "epoch": 0.7672353079270678, "grad_norm": 0.0, "learning_rate": 2.7095850843984595e-06, "loss": 0.9115, "step": 19609 }, { "epoch": 0.7672744346192972, "grad_norm": 0.0, "learning_rate": 2.708717761884949e-06, "loss": 1.004, "step": 19610 }, { "epoch": 0.7673135613115267, "grad_norm": 0.0, "learning_rate": 2.7078505564602965e-06, "loss": 0.8384, "step": 19611 }, { "epoch": 0.7673526880037561, "grad_norm": 0.0, "learning_rate": 2.706983468138428e-06, "loss": 0.8257, "step": 19612 }, { "epoch": 0.7673918146959856, "grad_norm": 0.0, "learning_rate": 2.7061164969332634e-06, "loss": 0.9026, "step": 19613 }, { "epoch": 0.767430941388215, "grad_norm": 0.0, "learning_rate": 2.705249642858728e-06, "loss": 0.9347, "step": 19614 }, { "epoch": 0.7674700680804445, "grad_norm": 0.0, "learning_rate": 2.7043829059287462e-06, "loss": 1.058, "step": 19615 }, { "epoch": 0.7675091947726739, "grad_norm": 0.0, "learning_rate": 2.7035162861572297e-06, "loss": 1.016, "step": 19616 }, { "epoch": 0.7675483214649034, "grad_norm": 0.0, "learning_rate": 2.7026497835580978e-06, "loss": 1.0554, "step": 19617 }, { "epoch": 0.7675874481571328, "grad_norm": 0.0, "learning_rate": 2.701783398145268e-06, "loss": 0.9076, "step": 19618 }, { "epoch": 0.7676265748493623, "grad_norm": 0.0, "learning_rate": 2.700917129932653e-06, "loss": 0.9541, "step": 19619 }, { "epoch": 0.7676657015415916, "grad_norm": 0.0, "learning_rate": 2.70005097893416e-06, "loss": 0.9742, "step": 19620 }, { "epoch": 0.7677048282338211, "grad_norm": 0.0, "learning_rate": 2.699184945163704e-06, "loss": 0.8453, "step": 19621 }, { "epoch": 0.7677439549260505, "grad_norm": 0.0, "learning_rate": 2.698319028635188e-06, "loss": 0.9752, "step": 19622 }, { "epoch": 0.76778308161828, "grad_norm": 0.0, "learning_rate": 2.6974532293625166e-06, "loss": 1.0572, "step": 19623 }, { "epoch": 0.7678222083105094, "grad_norm": 0.0, "learning_rate": 2.6965875473595972e-06, "loss": 0.9114, "step": 19624 }, { "epoch": 0.7678613350027389, "grad_norm": 0.0, "learning_rate": 2.6957219826403325e-06, "loss": 0.9241, "step": 19625 }, { "epoch": 0.7679004616949683, "grad_norm": 0.0, "learning_rate": 2.694856535218616e-06, "loss": 0.9211, "step": 19626 }, { "epoch": 0.7679395883871978, "grad_norm": 0.0, "learning_rate": 2.6939912051083517e-06, "loss": 0.9649, "step": 19627 }, { "epoch": 0.7679787150794272, "grad_norm": 0.0, "learning_rate": 2.6931259923234323e-06, "loss": 1.1406, "step": 19628 }, { "epoch": 0.7680178417716567, "grad_norm": 0.0, "learning_rate": 2.692260896877756e-06, "loss": 0.9493, "step": 19629 }, { "epoch": 0.768056968463886, "grad_norm": 0.0, "learning_rate": 2.6913959187852114e-06, "loss": 1.0145, "step": 19630 }, { "epoch": 0.7680960951561155, "grad_norm": 0.0, "learning_rate": 2.6905310580596922e-06, "loss": 0.8625, "step": 19631 }, { "epoch": 0.7681352218483449, "grad_norm": 0.0, "learning_rate": 2.689666314715079e-06, "loss": 0.9879, "step": 19632 }, { "epoch": 0.7681743485405744, "grad_norm": 0.0, "learning_rate": 2.6888016887652703e-06, "loss": 1.0212, "step": 19633 }, { "epoch": 0.7682134752328038, "grad_norm": 0.0, "learning_rate": 2.6879371802241418e-06, "loss": 0.9408, "step": 19634 }, { "epoch": 0.7682526019250333, "grad_norm": 0.0, "learning_rate": 2.6870727891055826e-06, "loss": 1.0409, "step": 19635 }, { "epoch": 0.7682917286172627, "grad_norm": 0.0, "learning_rate": 2.686208515423465e-06, "loss": 1.084, "step": 19636 }, { "epoch": 0.7683308553094922, "grad_norm": 0.0, "learning_rate": 2.6853443591916806e-06, "loss": 1.0522, "step": 19637 }, { "epoch": 0.7683699820017216, "grad_norm": 0.0, "learning_rate": 2.6844803204240968e-06, "loss": 0.9413, "step": 19638 }, { "epoch": 0.768409108693951, "grad_norm": 0.0, "learning_rate": 2.6836163991345943e-06, "loss": 1.0426, "step": 19639 }, { "epoch": 0.7684482353861805, "grad_norm": 0.0, "learning_rate": 2.6827525953370425e-06, "loss": 0.9835, "step": 19640 }, { "epoch": 0.7684873620784098, "grad_norm": 0.0, "learning_rate": 2.681888909045315e-06, "loss": 0.9664, "step": 19641 }, { "epoch": 0.7685264887706393, "grad_norm": 0.0, "learning_rate": 2.6810253402732798e-06, "loss": 1.0508, "step": 19642 }, { "epoch": 0.7685656154628687, "grad_norm": 0.0, "learning_rate": 2.6801618890348113e-06, "loss": 1.004, "step": 19643 }, { "epoch": 0.7686047421550982, "grad_norm": 0.0, "learning_rate": 2.679298555343767e-06, "loss": 0.9996, "step": 19644 }, { "epoch": 0.7686438688473276, "grad_norm": 0.0, "learning_rate": 2.678435339214015e-06, "loss": 0.8669, "step": 19645 }, { "epoch": 0.7686829955395571, "grad_norm": 0.0, "learning_rate": 2.677572240659416e-06, "loss": 0.9438, "step": 19646 }, { "epoch": 0.7687221222317865, "grad_norm": 0.0, "learning_rate": 2.6767092596938347e-06, "loss": 0.9391, "step": 19647 }, { "epoch": 0.768761248924016, "grad_norm": 0.0, "learning_rate": 2.675846396331123e-06, "loss": 0.9861, "step": 19648 }, { "epoch": 0.7688003756162454, "grad_norm": 0.0, "learning_rate": 2.6749836505851443e-06, "loss": 1.1159, "step": 19649 }, { "epoch": 0.7688395023084749, "grad_norm": 0.0, "learning_rate": 2.6741210224697435e-06, "loss": 0.8968, "step": 19650 }, { "epoch": 0.7688786290007043, "grad_norm": 0.0, "learning_rate": 2.6732585119987842e-06, "loss": 0.9168, "step": 19651 }, { "epoch": 0.7689177556929337, "grad_norm": 0.0, "learning_rate": 2.6723961191861093e-06, "loss": 0.989, "step": 19652 }, { "epoch": 0.7689568823851631, "grad_norm": 0.0, "learning_rate": 2.671533844045574e-06, "loss": 0.9546, "step": 19653 }, { "epoch": 0.7689960090773926, "grad_norm": 0.0, "learning_rate": 2.67067168659102e-06, "loss": 0.9824, "step": 19654 }, { "epoch": 0.769035135769622, "grad_norm": 0.0, "learning_rate": 2.6698096468362933e-06, "loss": 0.9691, "step": 19655 }, { "epoch": 0.7690742624618515, "grad_norm": 0.0, "learning_rate": 2.668947724795239e-06, "loss": 0.9968, "step": 19656 }, { "epoch": 0.7691133891540809, "grad_norm": 0.0, "learning_rate": 2.668085920481701e-06, "loss": 1.0043, "step": 19657 }, { "epoch": 0.7691525158463104, "grad_norm": 0.0, "learning_rate": 2.6672242339095124e-06, "loss": 1.0062, "step": 19658 }, { "epoch": 0.7691916425385398, "grad_norm": 0.0, "learning_rate": 2.6663626650925146e-06, "loss": 0.8494, "step": 19659 }, { "epoch": 0.7692307692307693, "grad_norm": 0.0, "learning_rate": 2.6655012140445447e-06, "loss": 0.8299, "step": 19660 }, { "epoch": 0.7692698959229987, "grad_norm": 0.0, "learning_rate": 2.6646398807794326e-06, "loss": 0.9403, "step": 19661 }, { "epoch": 0.7693090226152282, "grad_norm": 0.0, "learning_rate": 2.663778665311012e-06, "loss": 0.9544, "step": 19662 }, { "epoch": 0.7693481493074575, "grad_norm": 0.0, "learning_rate": 2.662917567653114e-06, "loss": 0.9084, "step": 19663 }, { "epoch": 0.769387275999687, "grad_norm": 0.0, "learning_rate": 2.662056587819568e-06, "loss": 0.897, "step": 19664 }, { "epoch": 0.7694264026919164, "grad_norm": 0.0, "learning_rate": 2.661195725824195e-06, "loss": 1.239, "step": 19665 }, { "epoch": 0.7694655293841459, "grad_norm": 0.0, "learning_rate": 2.6603349816808268e-06, "loss": 0.9412, "step": 19666 }, { "epoch": 0.7695046560763753, "grad_norm": 0.0, "learning_rate": 2.6594743554032753e-06, "loss": 0.8664, "step": 19667 }, { "epoch": 0.7695437827686047, "grad_norm": 0.0, "learning_rate": 2.6586138470053725e-06, "loss": 0.9401, "step": 19668 }, { "epoch": 0.7695829094608342, "grad_norm": 0.0, "learning_rate": 2.65775345650093e-06, "loss": 1.0392, "step": 19669 }, { "epoch": 0.7696220361530636, "grad_norm": 0.0, "learning_rate": 2.656893183903769e-06, "loss": 0.9837, "step": 19670 }, { "epoch": 0.7696611628452931, "grad_norm": 0.0, "learning_rate": 2.6560330292277e-06, "loss": 1.0089, "step": 19671 }, { "epoch": 0.7697002895375225, "grad_norm": 0.0, "learning_rate": 2.6551729924865377e-06, "loss": 0.9439, "step": 19672 }, { "epoch": 0.769739416229752, "grad_norm": 0.0, "learning_rate": 2.6543130736940936e-06, "loss": 1.0641, "step": 19673 }, { "epoch": 0.7697785429219813, "grad_norm": 0.0, "learning_rate": 2.6534532728641794e-06, "loss": 1.0223, "step": 19674 }, { "epoch": 0.7698176696142108, "grad_norm": 0.0, "learning_rate": 2.652593590010597e-06, "loss": 0.9327, "step": 19675 }, { "epoch": 0.7698567963064402, "grad_norm": 0.0, "learning_rate": 2.6517340251471546e-06, "loss": 0.7505, "step": 19676 }, { "epoch": 0.7698959229986697, "grad_norm": 0.0, "learning_rate": 2.6508745782876564e-06, "loss": 0.9682, "step": 19677 }, { "epoch": 0.7699350496908991, "grad_norm": 0.0, "learning_rate": 2.6500152494459063e-06, "loss": 1.0177, "step": 19678 }, { "epoch": 0.7699741763831286, "grad_norm": 0.0, "learning_rate": 2.6491560386356986e-06, "loss": 1.0736, "step": 19679 }, { "epoch": 0.770013303075358, "grad_norm": 0.0, "learning_rate": 2.6482969458708362e-06, "loss": 0.938, "step": 19680 }, { "epoch": 0.7700524297675875, "grad_norm": 0.0, "learning_rate": 2.6474379711651067e-06, "loss": 1.0063, "step": 19681 }, { "epoch": 0.7700915564598169, "grad_norm": 0.0, "learning_rate": 2.646579114532316e-06, "loss": 0.9745, "step": 19682 }, { "epoch": 0.7701306831520464, "grad_norm": 0.0, "learning_rate": 2.6457203759862473e-06, "loss": 0.8619, "step": 19683 }, { "epoch": 0.7701698098442757, "grad_norm": 0.0, "learning_rate": 2.6448617555406973e-06, "loss": 0.9875, "step": 19684 }, { "epoch": 0.7702089365365052, "grad_norm": 0.0, "learning_rate": 2.6440032532094453e-06, "loss": 0.9684, "step": 19685 }, { "epoch": 0.7702480632287346, "grad_norm": 0.0, "learning_rate": 2.643144869006289e-06, "loss": 0.89, "step": 19686 }, { "epoch": 0.7702871899209641, "grad_norm": 0.0, "learning_rate": 2.6422866029450046e-06, "loss": 1.0536, "step": 19687 }, { "epoch": 0.7703263166131935, "grad_norm": 0.0, "learning_rate": 2.641428455039381e-06, "loss": 1.0067, "step": 19688 }, { "epoch": 0.770365443305423, "grad_norm": 0.0, "learning_rate": 2.6405704253031916e-06, "loss": 1.0216, "step": 19689 }, { "epoch": 0.7704045699976524, "grad_norm": 0.0, "learning_rate": 2.63971251375022e-06, "loss": 0.8478, "step": 19690 }, { "epoch": 0.7704436966898819, "grad_norm": 0.0, "learning_rate": 2.638854720394243e-06, "loss": 0.8535, "step": 19691 }, { "epoch": 0.7704828233821113, "grad_norm": 0.0, "learning_rate": 2.6379970452490368e-06, "loss": 0.9893, "step": 19692 }, { "epoch": 0.7705219500743408, "grad_norm": 0.0, "learning_rate": 2.6371394883283708e-06, "loss": 0.997, "step": 19693 }, { "epoch": 0.7705610767665702, "grad_norm": 0.0, "learning_rate": 2.6362820496460185e-06, "loss": 0.9998, "step": 19694 }, { "epoch": 0.7706002034587996, "grad_norm": 0.0, "learning_rate": 2.6354247292157486e-06, "loss": 0.9492, "step": 19695 }, { "epoch": 0.770639330151029, "grad_norm": 0.0, "learning_rate": 2.6345675270513325e-06, "loss": 1.0852, "step": 19696 }, { "epoch": 0.7706784568432584, "grad_norm": 0.0, "learning_rate": 2.6337104431665294e-06, "loss": 1.0305, "step": 19697 }, { "epoch": 0.7707175835354879, "grad_norm": 0.0, "learning_rate": 2.6328534775751103e-06, "loss": 1.0686, "step": 19698 }, { "epoch": 0.7707567102277173, "grad_norm": 0.0, "learning_rate": 2.6319966302908286e-06, "loss": 1.0465, "step": 19699 }, { "epoch": 0.7707958369199468, "grad_norm": 0.0, "learning_rate": 2.6311399013274484e-06, "loss": 0.9121, "step": 19700 }, { "epoch": 0.7708349636121762, "grad_norm": 0.0, "learning_rate": 2.6302832906987287e-06, "loss": 0.8772, "step": 19701 }, { "epoch": 0.7708740903044057, "grad_norm": 0.0, "learning_rate": 2.6294267984184264e-06, "loss": 1.071, "step": 19702 }, { "epoch": 0.7709132169966351, "grad_norm": 0.0, "learning_rate": 2.6285704245002907e-06, "loss": 1.0387, "step": 19703 }, { "epoch": 0.7709523436888646, "grad_norm": 0.0, "learning_rate": 2.6277141689580777e-06, "loss": 0.8909, "step": 19704 }, { "epoch": 0.770991470381094, "grad_norm": 0.0, "learning_rate": 2.6268580318055403e-06, "loss": 0.8456, "step": 19705 }, { "epoch": 0.7710305970733234, "grad_norm": 0.0, "learning_rate": 2.6260020130564212e-06, "loss": 1.0046, "step": 19706 }, { "epoch": 0.7710697237655528, "grad_norm": 0.0, "learning_rate": 2.625146112724468e-06, "loss": 1.0429, "step": 19707 }, { "epoch": 0.7711088504577823, "grad_norm": 0.0, "learning_rate": 2.624290330823429e-06, "loss": 0.8206, "step": 19708 }, { "epoch": 0.7711479771500117, "grad_norm": 0.0, "learning_rate": 2.6234346673670463e-06, "loss": 0.9653, "step": 19709 }, { "epoch": 0.7711871038422412, "grad_norm": 0.0, "learning_rate": 2.6225791223690577e-06, "loss": 1.0697, "step": 19710 }, { "epoch": 0.7712262305344706, "grad_norm": 0.0, "learning_rate": 2.6217236958432034e-06, "loss": 0.9412, "step": 19711 }, { "epoch": 0.7712653572267001, "grad_norm": 0.0, "learning_rate": 2.6208683878032214e-06, "loss": 0.8821, "step": 19712 }, { "epoch": 0.7713044839189295, "grad_norm": 0.0, "learning_rate": 2.6200131982628497e-06, "loss": 1.0864, "step": 19713 }, { "epoch": 0.771343610611159, "grad_norm": 0.0, "learning_rate": 2.6191581272358145e-06, "loss": 0.8793, "step": 19714 }, { "epoch": 0.7713827373033884, "grad_norm": 0.0, "learning_rate": 2.6183031747358546e-06, "loss": 1.0358, "step": 19715 }, { "epoch": 0.7714218639956179, "grad_norm": 0.0, "learning_rate": 2.6174483407766938e-06, "loss": 0.8956, "step": 19716 }, { "epoch": 0.7714609906878472, "grad_norm": 0.0, "learning_rate": 2.61659362537206e-06, "loss": 1.1234, "step": 19717 }, { "epoch": 0.7715001173800767, "grad_norm": 0.0, "learning_rate": 2.615739028535682e-06, "loss": 0.9909, "step": 19718 }, { "epoch": 0.7715392440723061, "grad_norm": 0.0, "learning_rate": 2.6148845502812846e-06, "loss": 0.9932, "step": 19719 }, { "epoch": 0.7715783707645356, "grad_norm": 0.0, "learning_rate": 2.614030190622584e-06, "loss": 1.0574, "step": 19720 }, { "epoch": 0.771617497456765, "grad_norm": 0.0, "learning_rate": 2.6131759495733046e-06, "loss": 0.9092, "step": 19721 }, { "epoch": 0.7716566241489945, "grad_norm": 0.0, "learning_rate": 2.612321827147162e-06, "loss": 1.0658, "step": 19722 }, { "epoch": 0.7716957508412239, "grad_norm": 0.0, "learning_rate": 2.611467823357877e-06, "loss": 0.9182, "step": 19723 }, { "epoch": 0.7717348775334533, "grad_norm": 0.0, "learning_rate": 2.6106139382191575e-06, "loss": 1.0053, "step": 19724 }, { "epoch": 0.7717740042256828, "grad_norm": 0.0, "learning_rate": 2.6097601717447186e-06, "loss": 0.8955, "step": 19725 }, { "epoch": 0.7718131309179121, "grad_norm": 0.0, "learning_rate": 2.6089065239482714e-06, "loss": 1.0805, "step": 19726 }, { "epoch": 0.7718522576101416, "grad_norm": 0.0, "learning_rate": 2.6080529948435262e-06, "loss": 0.9439, "step": 19727 }, { "epoch": 0.771891384302371, "grad_norm": 0.0, "learning_rate": 2.6071995844441845e-06, "loss": 1.0444, "step": 19728 }, { "epoch": 0.7719305109946005, "grad_norm": 0.0, "learning_rate": 2.606346292763957e-06, "loss": 1.0351, "step": 19729 }, { "epoch": 0.7719696376868299, "grad_norm": 0.0, "learning_rate": 2.605493119816537e-06, "loss": 1.0465, "step": 19730 }, { "epoch": 0.7720087643790594, "grad_norm": 0.0, "learning_rate": 2.604640065615638e-06, "loss": 0.9454, "step": 19731 }, { "epoch": 0.7720478910712888, "grad_norm": 0.0, "learning_rate": 2.6037871301749484e-06, "loss": 0.9823, "step": 19732 }, { "epoch": 0.7720870177635183, "grad_norm": 0.0, "learning_rate": 2.602934313508174e-06, "loss": 1.0998, "step": 19733 }, { "epoch": 0.7721261444557477, "grad_norm": 0.0, "learning_rate": 2.6020816156289986e-06, "loss": 0.9898, "step": 19734 }, { "epoch": 0.7721652711479772, "grad_norm": 0.0, "learning_rate": 2.6012290365511297e-06, "loss": 0.8615, "step": 19735 }, { "epoch": 0.7722043978402066, "grad_norm": 0.0, "learning_rate": 2.6003765762882473e-06, "loss": 0.9733, "step": 19736 }, { "epoch": 0.772243524532436, "grad_norm": 0.0, "learning_rate": 2.599524234854047e-06, "loss": 1.0372, "step": 19737 }, { "epoch": 0.7722826512246654, "grad_norm": 0.0, "learning_rate": 2.598672012262212e-06, "loss": 0.8918, "step": 19738 }, { "epoch": 0.7723217779168949, "grad_norm": 0.0, "learning_rate": 2.59781990852643e-06, "loss": 0.9553, "step": 19739 }, { "epoch": 0.7723609046091243, "grad_norm": 0.0, "learning_rate": 2.596967923660385e-06, "loss": 0.9085, "step": 19740 }, { "epoch": 0.7724000313013538, "grad_norm": 0.0, "learning_rate": 2.596116057677761e-06, "loss": 0.8432, "step": 19741 }, { "epoch": 0.7724391579935832, "grad_norm": 0.0, "learning_rate": 2.595264310592234e-06, "loss": 0.9997, "step": 19742 }, { "epoch": 0.7724782846858127, "grad_norm": 0.0, "learning_rate": 2.594412682417482e-06, "loss": 0.8987, "step": 19743 }, { "epoch": 0.7725174113780421, "grad_norm": 0.0, "learning_rate": 2.593561173167186e-06, "loss": 0.9339, "step": 19744 }, { "epoch": 0.7725565380702716, "grad_norm": 0.0, "learning_rate": 2.592709782855014e-06, "loss": 0.8, "step": 19745 }, { "epoch": 0.772595664762501, "grad_norm": 0.0, "learning_rate": 2.5918585114946415e-06, "loss": 0.9162, "step": 19746 }, { "epoch": 0.7726347914547305, "grad_norm": 0.0, "learning_rate": 2.591007359099741e-06, "loss": 0.9158, "step": 19747 }, { "epoch": 0.7726739181469598, "grad_norm": 0.0, "learning_rate": 2.5901563256839745e-06, "loss": 0.9678, "step": 19748 }, { "epoch": 0.7727130448391893, "grad_norm": 0.0, "learning_rate": 2.589305411261014e-06, "loss": 0.912, "step": 19749 }, { "epoch": 0.7727521715314187, "grad_norm": 0.0, "learning_rate": 2.588454615844521e-06, "loss": 0.9366, "step": 19750 }, { "epoch": 0.7727912982236482, "grad_norm": 0.0, "learning_rate": 2.5876039394481634e-06, "loss": 1.0416, "step": 19751 }, { "epoch": 0.7728304249158776, "grad_norm": 0.0, "learning_rate": 2.586753382085595e-06, "loss": 1.0642, "step": 19752 }, { "epoch": 0.772869551608107, "grad_norm": 0.0, "learning_rate": 2.5859029437704775e-06, "loss": 0.9503, "step": 19753 }, { "epoch": 0.7729086783003365, "grad_norm": 0.0, "learning_rate": 2.585052624516472e-06, "loss": 0.9641, "step": 19754 }, { "epoch": 0.7729478049925659, "grad_norm": 0.0, "learning_rate": 2.5842024243372268e-06, "loss": 0.9081, "step": 19755 }, { "epoch": 0.7729869316847954, "grad_norm": 0.0, "learning_rate": 2.5833523432463982e-06, "loss": 0.9245, "step": 19756 }, { "epoch": 0.7730260583770248, "grad_norm": 0.0, "learning_rate": 2.5825023812576377e-06, "loss": 1.0266, "step": 19757 }, { "epoch": 0.7730651850692543, "grad_norm": 0.0, "learning_rate": 2.5816525383845968e-06, "loss": 1.0833, "step": 19758 }, { "epoch": 0.7731043117614836, "grad_norm": 0.0, "learning_rate": 2.5808028146409182e-06, "loss": 1.0352, "step": 19759 }, { "epoch": 0.7731434384537131, "grad_norm": 0.0, "learning_rate": 2.579953210040251e-06, "loss": 0.9842, "step": 19760 }, { "epoch": 0.7731825651459425, "grad_norm": 0.0, "learning_rate": 2.5791037245962324e-06, "loss": 0.9734, "step": 19761 }, { "epoch": 0.773221691838172, "grad_norm": 0.0, "learning_rate": 2.578254358322515e-06, "loss": 1.1058, "step": 19762 }, { "epoch": 0.7732608185304014, "grad_norm": 0.0, "learning_rate": 2.5774051112327305e-06, "loss": 0.9519, "step": 19763 }, { "epoch": 0.7732999452226309, "grad_norm": 0.0, "learning_rate": 2.5765559833405205e-06, "loss": 0.9885, "step": 19764 }, { "epoch": 0.7733390719148603, "grad_norm": 0.0, "learning_rate": 2.5757069746595175e-06, "loss": 1.0694, "step": 19765 }, { "epoch": 0.7733781986070898, "grad_norm": 0.0, "learning_rate": 2.5748580852033565e-06, "loss": 1.0513, "step": 19766 }, { "epoch": 0.7734173252993192, "grad_norm": 0.0, "learning_rate": 2.5740093149856706e-06, "loss": 0.869, "step": 19767 }, { "epoch": 0.7734564519915487, "grad_norm": 0.0, "learning_rate": 2.5731606640200923e-06, "loss": 0.958, "step": 19768 }, { "epoch": 0.773495578683778, "grad_norm": 0.0, "learning_rate": 2.572312132320246e-06, "loss": 0.8479, "step": 19769 }, { "epoch": 0.7735347053760075, "grad_norm": 0.0, "learning_rate": 2.5714637198997583e-06, "loss": 0.9901, "step": 19770 }, { "epoch": 0.7735738320682369, "grad_norm": 0.0, "learning_rate": 2.570615426772255e-06, "loss": 0.9521, "step": 19771 }, { "epoch": 0.7736129587604664, "grad_norm": 0.0, "learning_rate": 2.5697672529513605e-06, "loss": 0.9025, "step": 19772 }, { "epoch": 0.7736520854526958, "grad_norm": 0.0, "learning_rate": 2.568919198450691e-06, "loss": 0.994, "step": 19773 }, { "epoch": 0.7736912121449253, "grad_norm": 0.0, "learning_rate": 2.5680712632838713e-06, "loss": 0.998, "step": 19774 }, { "epoch": 0.7737303388371547, "grad_norm": 0.0, "learning_rate": 2.5672234474645076e-06, "loss": 0.9254, "step": 19775 }, { "epoch": 0.7737694655293842, "grad_norm": 0.0, "learning_rate": 2.566375751006227e-06, "loss": 0.8698, "step": 19776 }, { "epoch": 0.7738085922216136, "grad_norm": 0.0, "learning_rate": 2.5655281739226356e-06, "loss": 0.9299, "step": 19777 }, { "epoch": 0.7738477189138431, "grad_norm": 0.0, "learning_rate": 2.564680716227348e-06, "loss": 0.9112, "step": 19778 }, { "epoch": 0.7738868456060725, "grad_norm": 0.0, "learning_rate": 2.563833377933964e-06, "loss": 0.9756, "step": 19779 }, { "epoch": 0.773925972298302, "grad_norm": 0.0, "learning_rate": 2.5629861590561055e-06, "loss": 1.0142, "step": 19780 }, { "epoch": 0.7739650989905313, "grad_norm": 0.0, "learning_rate": 2.5621390596073657e-06, "loss": 1.0188, "step": 19781 }, { "epoch": 0.7740042256827607, "grad_norm": 0.0, "learning_rate": 2.5612920796013575e-06, "loss": 1.0431, "step": 19782 }, { "epoch": 0.7740433523749902, "grad_norm": 0.0, "learning_rate": 2.5604452190516693e-06, "loss": 0.882, "step": 19783 }, { "epoch": 0.7740824790672196, "grad_norm": 0.0, "learning_rate": 2.559598477971915e-06, "loss": 0.9645, "step": 19784 }, { "epoch": 0.7741216057594491, "grad_norm": 0.0, "learning_rate": 2.5587518563756843e-06, "loss": 0.9366, "step": 19785 }, { "epoch": 0.7741607324516785, "grad_norm": 0.0, "learning_rate": 2.557905354276575e-06, "loss": 0.9741, "step": 19786 }, { "epoch": 0.774199859143908, "grad_norm": 0.0, "learning_rate": 2.5570589716881787e-06, "loss": 0.9329, "step": 19787 }, { "epoch": 0.7742389858361374, "grad_norm": 0.0, "learning_rate": 2.5562127086240893e-06, "loss": 0.8906, "step": 19788 }, { "epoch": 0.7742781125283669, "grad_norm": 0.0, "learning_rate": 2.5553665650978953e-06, "loss": 1.0296, "step": 19789 }, { "epoch": 0.7743172392205963, "grad_norm": 0.0, "learning_rate": 2.554520541123189e-06, "loss": 1.0166, "step": 19790 }, { "epoch": 0.7743563659128258, "grad_norm": 0.0, "learning_rate": 2.55367463671355e-06, "loss": 0.9698, "step": 19791 }, { "epoch": 0.7743954926050551, "grad_norm": 0.0, "learning_rate": 2.5528288518825652e-06, "loss": 1.0091, "step": 19792 }, { "epoch": 0.7744346192972846, "grad_norm": 0.0, "learning_rate": 2.5519831866438205e-06, "loss": 0.8959, "step": 19793 }, { "epoch": 0.774473745989514, "grad_norm": 0.0, "learning_rate": 2.55113764101089e-06, "loss": 0.9815, "step": 19794 }, { "epoch": 0.7745128726817435, "grad_norm": 0.0, "learning_rate": 2.5502922149973553e-06, "loss": 0.8812, "step": 19795 }, { "epoch": 0.7745519993739729, "grad_norm": 0.0, "learning_rate": 2.549446908616795e-06, "loss": 1.0417, "step": 19796 }, { "epoch": 0.7745911260662024, "grad_norm": 0.0, "learning_rate": 2.5486017218827784e-06, "loss": 0.934, "step": 19797 }, { "epoch": 0.7746302527584318, "grad_norm": 0.0, "learning_rate": 2.547756654808882e-06, "loss": 0.9546, "step": 19798 }, { "epoch": 0.7746693794506613, "grad_norm": 0.0, "learning_rate": 2.546911707408677e-06, "loss": 1.0166, "step": 19799 }, { "epoch": 0.7747085061428907, "grad_norm": 0.0, "learning_rate": 2.546066879695729e-06, "loss": 0.9543, "step": 19800 }, { "epoch": 0.7747476328351202, "grad_norm": 0.0, "learning_rate": 2.545222171683606e-06, "loss": 0.9337, "step": 19801 }, { "epoch": 0.7747867595273495, "grad_norm": 0.0, "learning_rate": 2.544377583385873e-06, "loss": 0.9643, "step": 19802 }, { "epoch": 0.774825886219579, "grad_norm": 0.0, "learning_rate": 2.543533114816098e-06, "loss": 0.9101, "step": 19803 }, { "epoch": 0.7748650129118084, "grad_norm": 0.0, "learning_rate": 2.542688765987833e-06, "loss": 0.964, "step": 19804 }, { "epoch": 0.7749041396040379, "grad_norm": 0.0, "learning_rate": 2.5418445369146462e-06, "loss": 0.969, "step": 19805 }, { "epoch": 0.7749432662962673, "grad_norm": 0.0, "learning_rate": 2.5410004276100842e-06, "loss": 0.9481, "step": 19806 }, { "epoch": 0.7749823929884968, "grad_norm": 0.0, "learning_rate": 2.540156438087714e-06, "loss": 1.0579, "step": 19807 }, { "epoch": 0.7750215196807262, "grad_norm": 0.0, "learning_rate": 2.539312568361082e-06, "loss": 1.019, "step": 19808 }, { "epoch": 0.7750606463729556, "grad_norm": 0.0, "learning_rate": 2.5384688184437433e-06, "loss": 0.9686, "step": 19809 }, { "epoch": 0.7750997730651851, "grad_norm": 0.0, "learning_rate": 2.537625188349241e-06, "loss": 0.9828, "step": 19810 }, { "epoch": 0.7751388997574145, "grad_norm": 0.0, "learning_rate": 2.5367816780911312e-06, "loss": 0.9565, "step": 19811 }, { "epoch": 0.775178026449644, "grad_norm": 0.0, "learning_rate": 2.535938287682954e-06, "loss": 0.8945, "step": 19812 }, { "epoch": 0.7752171531418733, "grad_norm": 0.0, "learning_rate": 2.5350950171382583e-06, "loss": 1.0399, "step": 19813 }, { "epoch": 0.7752562798341028, "grad_norm": 0.0, "learning_rate": 2.5342518664705786e-06, "loss": 1.0017, "step": 19814 }, { "epoch": 0.7752954065263322, "grad_norm": 0.0, "learning_rate": 2.5334088356934592e-06, "loss": 1.0029, "step": 19815 }, { "epoch": 0.7753345332185617, "grad_norm": 0.0, "learning_rate": 2.532565924820438e-06, "loss": 1.006, "step": 19816 }, { "epoch": 0.7753736599107911, "grad_norm": 0.0, "learning_rate": 2.5317231338650538e-06, "loss": 1.0504, "step": 19817 }, { "epoch": 0.7754127866030206, "grad_norm": 0.0, "learning_rate": 2.5308804628408346e-06, "loss": 1.1013, "step": 19818 }, { "epoch": 0.77545191329525, "grad_norm": 0.0, "learning_rate": 2.530037911761315e-06, "loss": 0.9132, "step": 19819 }, { "epoch": 0.7754910399874795, "grad_norm": 0.0, "learning_rate": 2.529195480640028e-06, "loss": 0.9003, "step": 19820 }, { "epoch": 0.7755301666797089, "grad_norm": 0.0, "learning_rate": 2.5283531694905016e-06, "loss": 1.0274, "step": 19821 }, { "epoch": 0.7755692933719384, "grad_norm": 0.0, "learning_rate": 2.5275109783262586e-06, "loss": 1.011, "step": 19822 }, { "epoch": 0.7756084200641677, "grad_norm": 0.0, "learning_rate": 2.5266689071608285e-06, "loss": 1.062, "step": 19823 }, { "epoch": 0.7756475467563972, "grad_norm": 0.0, "learning_rate": 2.525826956007724e-06, "loss": 0.8643, "step": 19824 }, { "epoch": 0.7756866734486266, "grad_norm": 0.0, "learning_rate": 2.5249851248804804e-06, "loss": 0.9611, "step": 19825 }, { "epoch": 0.7757258001408561, "grad_norm": 0.0, "learning_rate": 2.524143413792606e-06, "loss": 1.0054, "step": 19826 }, { "epoch": 0.7757649268330855, "grad_norm": 0.0, "learning_rate": 2.523301822757623e-06, "loss": 0.9096, "step": 19827 }, { "epoch": 0.775804053525315, "grad_norm": 0.0, "learning_rate": 2.5224603517890377e-06, "loss": 0.9222, "step": 19828 }, { "epoch": 0.7758431802175444, "grad_norm": 0.0, "learning_rate": 2.521619000900376e-06, "loss": 0.9294, "step": 19829 }, { "epoch": 0.7758823069097739, "grad_norm": 0.0, "learning_rate": 2.5207777701051385e-06, "loss": 1.0168, "step": 19830 }, { "epoch": 0.7759214336020033, "grad_norm": 0.0, "learning_rate": 2.5199366594168417e-06, "loss": 1.1129, "step": 19831 }, { "epoch": 0.7759605602942328, "grad_norm": 0.0, "learning_rate": 2.5190956688489855e-06, "loss": 1.0177, "step": 19832 }, { "epoch": 0.7759996869864622, "grad_norm": 0.0, "learning_rate": 2.5182547984150794e-06, "loss": 0.9419, "step": 19833 }, { "epoch": 0.7760388136786917, "grad_norm": 0.0, "learning_rate": 2.5174140481286257e-06, "loss": 1.0178, "step": 19834 }, { "epoch": 0.776077940370921, "grad_norm": 0.0, "learning_rate": 2.5165734180031286e-06, "loss": 1.054, "step": 19835 }, { "epoch": 0.7761170670631505, "grad_norm": 0.0, "learning_rate": 2.515732908052083e-06, "loss": 1.1055, "step": 19836 }, { "epoch": 0.7761561937553799, "grad_norm": 0.0, "learning_rate": 2.514892518288988e-06, "loss": 1.0009, "step": 19837 }, { "epoch": 0.7761953204476093, "grad_norm": 0.0, "learning_rate": 2.514052248727343e-06, "loss": 0.8876, "step": 19838 }, { "epoch": 0.7762344471398388, "grad_norm": 0.0, "learning_rate": 2.5132120993806366e-06, "loss": 0.9797, "step": 19839 }, { "epoch": 0.7762735738320682, "grad_norm": 0.0, "learning_rate": 2.5123720702623612e-06, "loss": 1.0221, "step": 19840 }, { "epoch": 0.7763127005242977, "grad_norm": 0.0, "learning_rate": 2.511532161386008e-06, "loss": 0.9426, "step": 19841 }, { "epoch": 0.7763518272165271, "grad_norm": 0.0, "learning_rate": 2.510692372765068e-06, "loss": 1.0336, "step": 19842 }, { "epoch": 0.7763909539087566, "grad_norm": 0.0, "learning_rate": 2.5098527044130207e-06, "loss": 1.047, "step": 19843 }, { "epoch": 0.776430080600986, "grad_norm": 0.0, "learning_rate": 2.509013156343356e-06, "loss": 1.04, "step": 19844 }, { "epoch": 0.7764692072932154, "grad_norm": 0.0, "learning_rate": 2.508173728569551e-06, "loss": 0.9055, "step": 19845 }, { "epoch": 0.7765083339854448, "grad_norm": 0.0, "learning_rate": 2.5073344211050875e-06, "loss": 1.0221, "step": 19846 }, { "epoch": 0.7765474606776743, "grad_norm": 0.0, "learning_rate": 2.506495233963444e-06, "loss": 1.0629, "step": 19847 }, { "epoch": 0.7765865873699037, "grad_norm": 0.0, "learning_rate": 2.5056561671581003e-06, "loss": 1.0078, "step": 19848 }, { "epoch": 0.7766257140621332, "grad_norm": 0.0, "learning_rate": 2.5048172207025257e-06, "loss": 1.0537, "step": 19849 }, { "epoch": 0.7766648407543626, "grad_norm": 0.0, "learning_rate": 2.5039783946101935e-06, "loss": 0.9175, "step": 19850 }, { "epoch": 0.7767039674465921, "grad_norm": 0.0, "learning_rate": 2.503139688894576e-06, "loss": 0.9621, "step": 19851 }, { "epoch": 0.7767430941388215, "grad_norm": 0.0, "learning_rate": 2.5023011035691435e-06, "loss": 0.9601, "step": 19852 }, { "epoch": 0.776782220831051, "grad_norm": 0.0, "learning_rate": 2.501462638647357e-06, "loss": 0.8281, "step": 19853 }, { "epoch": 0.7768213475232804, "grad_norm": 0.0, "learning_rate": 2.5006242941426874e-06, "loss": 0.9291, "step": 19854 }, { "epoch": 0.7768604742155099, "grad_norm": 0.0, "learning_rate": 2.4997860700685883e-06, "loss": 0.8788, "step": 19855 }, { "epoch": 0.7768996009077392, "grad_norm": 0.0, "learning_rate": 2.498947966438533e-06, "loss": 0.9539, "step": 19856 }, { "epoch": 0.7769387275999687, "grad_norm": 0.0, "learning_rate": 2.4981099832659706e-06, "loss": 0.9387, "step": 19857 }, { "epoch": 0.7769778542921981, "grad_norm": 0.0, "learning_rate": 2.497272120564365e-06, "loss": 0.898, "step": 19858 }, { "epoch": 0.7770169809844276, "grad_norm": 0.0, "learning_rate": 2.496434378347161e-06, "loss": 1.0617, "step": 19859 }, { "epoch": 0.777056107676657, "grad_norm": 0.0, "learning_rate": 2.495596756627825e-06, "loss": 0.9244, "step": 19860 }, { "epoch": 0.7770952343688865, "grad_norm": 0.0, "learning_rate": 2.4947592554197988e-06, "loss": 1.0395, "step": 19861 }, { "epoch": 0.7771343610611159, "grad_norm": 0.0, "learning_rate": 2.493921874736537e-06, "loss": 1.0191, "step": 19862 }, { "epoch": 0.7771734877533454, "grad_norm": 0.0, "learning_rate": 2.493084614591481e-06, "loss": 1.0336, "step": 19863 }, { "epoch": 0.7772126144455748, "grad_norm": 0.0, "learning_rate": 2.4922474749980798e-06, "loss": 0.9453, "step": 19864 }, { "epoch": 0.7772517411378043, "grad_norm": 0.0, "learning_rate": 2.491410455969776e-06, "loss": 0.9952, "step": 19865 }, { "epoch": 0.7772908678300336, "grad_norm": 0.0, "learning_rate": 2.490573557520014e-06, "loss": 1.0132, "step": 19866 }, { "epoch": 0.777329994522263, "grad_norm": 0.0, "learning_rate": 2.4897367796622283e-06, "loss": 0.8665, "step": 19867 }, { "epoch": 0.7773691212144925, "grad_norm": 0.0, "learning_rate": 2.4889001224098596e-06, "loss": 1.0019, "step": 19868 }, { "epoch": 0.7774082479067219, "grad_norm": 0.0, "learning_rate": 2.4880635857763424e-06, "loss": 0.9815, "step": 19869 }, { "epoch": 0.7774473745989514, "grad_norm": 0.0, "learning_rate": 2.487227169775115e-06, "loss": 1.0017, "step": 19870 }, { "epoch": 0.7774865012911808, "grad_norm": 0.0, "learning_rate": 2.486390874419601e-06, "loss": 0.9318, "step": 19871 }, { "epoch": 0.7775256279834103, "grad_norm": 0.0, "learning_rate": 2.4855546997232383e-06, "loss": 0.8961, "step": 19872 }, { "epoch": 0.7775647546756397, "grad_norm": 0.0, "learning_rate": 2.484718645699444e-06, "loss": 1.137, "step": 19873 }, { "epoch": 0.7776038813678692, "grad_norm": 0.0, "learning_rate": 2.483882712361658e-06, "loss": 1.0109, "step": 19874 }, { "epoch": 0.7776430080600986, "grad_norm": 0.0, "learning_rate": 2.4830468997232947e-06, "loss": 1.0599, "step": 19875 }, { "epoch": 0.7776821347523281, "grad_norm": 0.0, "learning_rate": 2.482211207797781e-06, "loss": 1.0577, "step": 19876 }, { "epoch": 0.7777212614445574, "grad_norm": 0.0, "learning_rate": 2.481375636598532e-06, "loss": 0.909, "step": 19877 }, { "epoch": 0.7777603881367869, "grad_norm": 0.0, "learning_rate": 2.48054018613897e-06, "loss": 0.9942, "step": 19878 }, { "epoch": 0.7777995148290163, "grad_norm": 0.0, "learning_rate": 2.479704856432509e-06, "loss": 0.9736, "step": 19879 }, { "epoch": 0.7778386415212458, "grad_norm": 0.0, "learning_rate": 2.4788696474925677e-06, "loss": 0.9874, "step": 19880 }, { "epoch": 0.7778777682134752, "grad_norm": 0.0, "learning_rate": 2.4780345593325527e-06, "loss": 1.1449, "step": 19881 }, { "epoch": 0.7779168949057047, "grad_norm": 0.0, "learning_rate": 2.4771995919658777e-06, "loss": 0.9253, "step": 19882 }, { "epoch": 0.7779560215979341, "grad_norm": 0.0, "learning_rate": 2.4763647454059524e-06, "loss": 0.9568, "step": 19883 }, { "epoch": 0.7779951482901636, "grad_norm": 0.0, "learning_rate": 2.475530019666179e-06, "loss": 0.7978, "step": 19884 }, { "epoch": 0.778034274982393, "grad_norm": 0.0, "learning_rate": 2.4746954147599655e-06, "loss": 0.8896, "step": 19885 }, { "epoch": 0.7780734016746225, "grad_norm": 0.0, "learning_rate": 2.473860930700713e-06, "loss": 1.0486, "step": 19886 }, { "epoch": 0.7781125283668519, "grad_norm": 0.0, "learning_rate": 2.4730265675018274e-06, "loss": 0.9277, "step": 19887 }, { "epoch": 0.7781516550590813, "grad_norm": 0.0, "learning_rate": 2.4721923251766998e-06, "loss": 0.9472, "step": 19888 }, { "epoch": 0.7781907817513107, "grad_norm": 0.0, "learning_rate": 2.471358203738733e-06, "loss": 0.9944, "step": 19889 }, { "epoch": 0.7782299084435402, "grad_norm": 0.0, "learning_rate": 2.4705242032013166e-06, "loss": 0.9009, "step": 19890 }, { "epoch": 0.7782690351357696, "grad_norm": 0.0, "learning_rate": 2.4696903235778467e-06, "loss": 1.0329, "step": 19891 }, { "epoch": 0.7783081618279991, "grad_norm": 0.0, "learning_rate": 2.4688565648817153e-06, "loss": 0.8418, "step": 19892 }, { "epoch": 0.7783472885202285, "grad_norm": 0.0, "learning_rate": 2.4680229271263123e-06, "loss": 0.9286, "step": 19893 }, { "epoch": 0.778386415212458, "grad_norm": 0.0, "learning_rate": 2.4671894103250194e-06, "loss": 1.0271, "step": 19894 }, { "epoch": 0.7784255419046874, "grad_norm": 0.0, "learning_rate": 2.4663560144912267e-06, "loss": 0.8675, "step": 19895 }, { "epoch": 0.7784646685969168, "grad_norm": 0.0, "learning_rate": 2.465522739638315e-06, "loss": 0.9089, "step": 19896 }, { "epoch": 0.7785037952891463, "grad_norm": 0.0, "learning_rate": 2.4646895857796715e-06, "loss": 0.897, "step": 19897 }, { "epoch": 0.7785429219813756, "grad_norm": 0.0, "learning_rate": 2.4638565529286664e-06, "loss": 1.0362, "step": 19898 }, { "epoch": 0.7785820486736051, "grad_norm": 0.0, "learning_rate": 2.463023641098683e-06, "loss": 0.9695, "step": 19899 }, { "epoch": 0.7786211753658345, "grad_norm": 0.0, "learning_rate": 2.4621908503030946e-06, "loss": 0.9493, "step": 19900 }, { "epoch": 0.778660302058064, "grad_norm": 0.0, "learning_rate": 2.461358180555279e-06, "loss": 1.0781, "step": 19901 }, { "epoch": 0.7786994287502934, "grad_norm": 0.0, "learning_rate": 2.460525631868602e-06, "loss": 0.9827, "step": 19902 }, { "epoch": 0.7787385554425229, "grad_norm": 0.0, "learning_rate": 2.4596932042564382e-06, "loss": 0.8286, "step": 19903 }, { "epoch": 0.7787776821347523, "grad_norm": 0.0, "learning_rate": 2.458860897732147e-06, "loss": 1.0392, "step": 19904 }, { "epoch": 0.7788168088269818, "grad_norm": 0.0, "learning_rate": 2.4580287123091073e-06, "loss": 0.9418, "step": 19905 }, { "epoch": 0.7788559355192112, "grad_norm": 0.0, "learning_rate": 2.4571966480006713e-06, "loss": 0.9621, "step": 19906 }, { "epoch": 0.7788950622114407, "grad_norm": 0.0, "learning_rate": 2.45636470482021e-06, "loss": 0.9497, "step": 19907 }, { "epoch": 0.77893418890367, "grad_norm": 0.0, "learning_rate": 2.455532882781072e-06, "loss": 1.008, "step": 19908 }, { "epoch": 0.7789733155958996, "grad_norm": 0.0, "learning_rate": 2.4547011818966283e-06, "loss": 0.9137, "step": 19909 }, { "epoch": 0.7790124422881289, "grad_norm": 0.0, "learning_rate": 2.4538696021802257e-06, "loss": 0.948, "step": 19910 }, { "epoch": 0.7790515689803584, "grad_norm": 0.0, "learning_rate": 2.4530381436452244e-06, "loss": 1.0415, "step": 19911 }, { "epoch": 0.7790906956725878, "grad_norm": 0.0, "learning_rate": 2.4522068063049707e-06, "loss": 0.943, "step": 19912 }, { "epoch": 0.7791298223648173, "grad_norm": 0.0, "learning_rate": 2.4513755901728177e-06, "loss": 0.9881, "step": 19913 }, { "epoch": 0.7791689490570467, "grad_norm": 0.0, "learning_rate": 2.450544495262115e-06, "loss": 0.979, "step": 19914 }, { "epoch": 0.7792080757492762, "grad_norm": 0.0, "learning_rate": 2.449713521586209e-06, "loss": 0.9155, "step": 19915 }, { "epoch": 0.7792472024415056, "grad_norm": 0.0, "learning_rate": 2.44888266915844e-06, "loss": 1.058, "step": 19916 }, { "epoch": 0.7792863291337351, "grad_norm": 0.0, "learning_rate": 2.448051937992154e-06, "loss": 0.9389, "step": 19917 }, { "epoch": 0.7793254558259645, "grad_norm": 0.0, "learning_rate": 2.4472213281006905e-06, "loss": 0.9, "step": 19918 }, { "epoch": 0.779364582518194, "grad_norm": 0.0, "learning_rate": 2.446390839497391e-06, "loss": 1.002, "step": 19919 }, { "epoch": 0.7794037092104233, "grad_norm": 0.0, "learning_rate": 2.4455604721955872e-06, "loss": 0.8806, "step": 19920 }, { "epoch": 0.7794428359026528, "grad_norm": 0.0, "learning_rate": 2.444730226208618e-06, "loss": 1.0013, "step": 19921 }, { "epoch": 0.7794819625948822, "grad_norm": 0.0, "learning_rate": 2.443900101549812e-06, "loss": 1.0303, "step": 19922 }, { "epoch": 0.7795210892871116, "grad_norm": 0.0, "learning_rate": 2.4430700982325018e-06, "loss": 0.9787, "step": 19923 }, { "epoch": 0.7795602159793411, "grad_norm": 0.0, "learning_rate": 2.4422402162700153e-06, "loss": 0.9775, "step": 19924 }, { "epoch": 0.7795993426715705, "grad_norm": 0.0, "learning_rate": 2.4414104556756845e-06, "loss": 0.9624, "step": 19925 }, { "epoch": 0.7796384693638, "grad_norm": 0.0, "learning_rate": 2.4405808164628275e-06, "loss": 0.9075, "step": 19926 }, { "epoch": 0.7796775960560294, "grad_norm": 0.0, "learning_rate": 2.43975129864477e-06, "loss": 0.9501, "step": 19927 }, { "epoch": 0.7797167227482589, "grad_norm": 0.0, "learning_rate": 2.4389219022348366e-06, "loss": 1.0756, "step": 19928 }, { "epoch": 0.7797558494404883, "grad_norm": 0.0, "learning_rate": 2.438092627246339e-06, "loss": 0.9654, "step": 19929 }, { "epoch": 0.7797949761327178, "grad_norm": 0.0, "learning_rate": 2.437263473692598e-06, "loss": 0.9237, "step": 19930 }, { "epoch": 0.7798341028249471, "grad_norm": 0.0, "learning_rate": 2.4364344415869303e-06, "loss": 0.949, "step": 19931 }, { "epoch": 0.7798732295171766, "grad_norm": 0.0, "learning_rate": 2.43560553094265e-06, "loss": 0.9169, "step": 19932 }, { "epoch": 0.779912356209406, "grad_norm": 0.0, "learning_rate": 2.4347767417730626e-06, "loss": 0.9534, "step": 19933 }, { "epoch": 0.7799514829016355, "grad_norm": 0.0, "learning_rate": 2.4339480740914844e-06, "loss": 0.9251, "step": 19934 }, { "epoch": 0.7799906095938649, "grad_norm": 0.0, "learning_rate": 2.433119527911214e-06, "loss": 1.0859, "step": 19935 }, { "epoch": 0.7800297362860944, "grad_norm": 0.0, "learning_rate": 2.4322911032455676e-06, "loss": 0.9863, "step": 19936 }, { "epoch": 0.7800688629783238, "grad_norm": 0.0, "learning_rate": 2.43146280010784e-06, "loss": 0.9155, "step": 19937 }, { "epoch": 0.7801079896705533, "grad_norm": 0.0, "learning_rate": 2.4306346185113395e-06, "loss": 0.9193, "step": 19938 }, { "epoch": 0.7801471163627827, "grad_norm": 0.0, "learning_rate": 2.4298065584693586e-06, "loss": 0.8473, "step": 19939 }, { "epoch": 0.7801862430550122, "grad_norm": 0.0, "learning_rate": 2.428978619995198e-06, "loss": 1.0774, "step": 19940 }, { "epoch": 0.7802253697472415, "grad_norm": 0.0, "learning_rate": 2.428150803102155e-06, "loss": 1.0028, "step": 19941 }, { "epoch": 0.780264496439471, "grad_norm": 0.0, "learning_rate": 2.4273231078035244e-06, "loss": 0.913, "step": 19942 }, { "epoch": 0.7803036231317004, "grad_norm": 0.0, "learning_rate": 2.426495534112592e-06, "loss": 0.9368, "step": 19943 }, { "epoch": 0.7803427498239299, "grad_norm": 0.0, "learning_rate": 2.4256680820426515e-06, "loss": 0.9845, "step": 19944 }, { "epoch": 0.7803818765161593, "grad_norm": 0.0, "learning_rate": 2.4248407516069903e-06, "loss": 1.0455, "step": 19945 }, { "epoch": 0.7804210032083888, "grad_norm": 0.0, "learning_rate": 2.424013542818898e-06, "loss": 0.8371, "step": 19946 }, { "epoch": 0.7804601299006182, "grad_norm": 0.0, "learning_rate": 2.423186455691652e-06, "loss": 1.0406, "step": 19947 }, { "epoch": 0.7804992565928477, "grad_norm": 0.0, "learning_rate": 2.4223594902385394e-06, "loss": 0.8084, "step": 19948 }, { "epoch": 0.7805383832850771, "grad_norm": 0.0, "learning_rate": 2.421532646472833e-06, "loss": 0.9335, "step": 19949 }, { "epoch": 0.7805775099773066, "grad_norm": 0.0, "learning_rate": 2.4207059244078212e-06, "loss": 0.9597, "step": 19950 }, { "epoch": 0.780616636669536, "grad_norm": 0.0, "learning_rate": 2.419879324056773e-06, "loss": 0.9035, "step": 19951 }, { "epoch": 0.7806557633617653, "grad_norm": 0.0, "learning_rate": 2.4190528454329666e-06, "loss": 0.9483, "step": 19952 }, { "epoch": 0.7806948900539948, "grad_norm": 0.0, "learning_rate": 2.4182264885496663e-06, "loss": 0.9621, "step": 19953 }, { "epoch": 0.7807340167462242, "grad_norm": 0.0, "learning_rate": 2.4174002534201536e-06, "loss": 0.8837, "step": 19954 }, { "epoch": 0.7807731434384537, "grad_norm": 0.0, "learning_rate": 2.4165741400576894e-06, "loss": 1.0178, "step": 19955 }, { "epoch": 0.7808122701306831, "grad_norm": 0.0, "learning_rate": 2.415748148475543e-06, "loss": 0.939, "step": 19956 }, { "epoch": 0.7808513968229126, "grad_norm": 0.0, "learning_rate": 2.414922278686973e-06, "loss": 1.0837, "step": 19957 }, { "epoch": 0.780890523515142, "grad_norm": 0.0, "learning_rate": 2.4140965307052534e-06, "loss": 0.8988, "step": 19958 }, { "epoch": 0.7809296502073715, "grad_norm": 0.0, "learning_rate": 2.413270904543633e-06, "loss": 1.0498, "step": 19959 }, { "epoch": 0.7809687768996009, "grad_norm": 0.0, "learning_rate": 2.4124454002153796e-06, "loss": 0.8948, "step": 19960 }, { "epoch": 0.7810079035918304, "grad_norm": 0.0, "learning_rate": 2.411620017733741e-06, "loss": 0.9858, "step": 19961 }, { "epoch": 0.7810470302840598, "grad_norm": 0.0, "learning_rate": 2.410794757111977e-06, "loss": 0.9482, "step": 19962 }, { "epoch": 0.7810861569762892, "grad_norm": 0.0, "learning_rate": 2.40996961836334e-06, "loss": 1.0986, "step": 19963 }, { "epoch": 0.7811252836685186, "grad_norm": 0.0, "learning_rate": 2.409144601501082e-06, "loss": 0.9857, "step": 19964 }, { "epoch": 0.7811644103607481, "grad_norm": 0.0, "learning_rate": 2.408319706538448e-06, "loss": 0.8947, "step": 19965 }, { "epoch": 0.7812035370529775, "grad_norm": 0.0, "learning_rate": 2.4074949334886855e-06, "loss": 0.969, "step": 19966 }, { "epoch": 0.781242663745207, "grad_norm": 0.0, "learning_rate": 2.4066702823650434e-06, "loss": 0.9842, "step": 19967 }, { "epoch": 0.7812817904374364, "grad_norm": 0.0, "learning_rate": 2.4058457531807587e-06, "loss": 1.0115, "step": 19968 }, { "epoch": 0.7813209171296659, "grad_norm": 0.0, "learning_rate": 2.4050213459490755e-06, "loss": 0.9583, "step": 19969 }, { "epoch": 0.7813600438218953, "grad_norm": 0.0, "learning_rate": 2.4041970606832355e-06, "loss": 0.9827, "step": 19970 }, { "epoch": 0.7813991705141248, "grad_norm": 0.0, "learning_rate": 2.40337289739647e-06, "loss": 0.9172, "step": 19971 }, { "epoch": 0.7814382972063542, "grad_norm": 0.0, "learning_rate": 2.402548856102016e-06, "loss": 0.8872, "step": 19972 }, { "epoch": 0.7814774238985837, "grad_norm": 0.0, "learning_rate": 2.40172493681311e-06, "loss": 0.9127, "step": 19973 }, { "epoch": 0.781516550590813, "grad_norm": 0.0, "learning_rate": 2.4009011395429784e-06, "loss": 1.0936, "step": 19974 }, { "epoch": 0.7815556772830425, "grad_norm": 0.0, "learning_rate": 2.400077464304852e-06, "loss": 0.9017, "step": 19975 }, { "epoch": 0.7815948039752719, "grad_norm": 0.0, "learning_rate": 2.3992539111119585e-06, "loss": 0.9255, "step": 19976 }, { "epoch": 0.7816339306675014, "grad_norm": 0.0, "learning_rate": 2.398430479977525e-06, "loss": 0.9583, "step": 19977 }, { "epoch": 0.7816730573597308, "grad_norm": 0.0, "learning_rate": 2.39760717091477e-06, "loss": 0.8884, "step": 19978 }, { "epoch": 0.7817121840519603, "grad_norm": 0.0, "learning_rate": 2.39678398393692e-06, "loss": 0.8484, "step": 19979 }, { "epoch": 0.7817513107441897, "grad_norm": 0.0, "learning_rate": 2.3959609190571864e-06, "loss": 0.9744, "step": 19980 }, { "epoch": 0.7817904374364191, "grad_norm": 0.0, "learning_rate": 2.3951379762887983e-06, "loss": 1.0137, "step": 19981 }, { "epoch": 0.7818295641286486, "grad_norm": 0.0, "learning_rate": 2.394315155644962e-06, "loss": 0.936, "step": 19982 }, { "epoch": 0.781868690820878, "grad_norm": 0.0, "learning_rate": 2.393492457138896e-06, "loss": 1.0257, "step": 19983 }, { "epoch": 0.7819078175131075, "grad_norm": 0.0, "learning_rate": 2.3926698807838045e-06, "loss": 0.9797, "step": 19984 }, { "epoch": 0.7819469442053368, "grad_norm": 0.0, "learning_rate": 2.3918474265929082e-06, "loss": 1.0435, "step": 19985 }, { "epoch": 0.7819860708975663, "grad_norm": 0.0, "learning_rate": 2.3910250945794055e-06, "loss": 1.0895, "step": 19986 }, { "epoch": 0.7820251975897957, "grad_norm": 0.0, "learning_rate": 2.3902028847565074e-06, "loss": 1.0884, "step": 19987 }, { "epoch": 0.7820643242820252, "grad_norm": 0.0, "learning_rate": 2.389380797137413e-06, "loss": 0.9995, "step": 19988 }, { "epoch": 0.7821034509742546, "grad_norm": 0.0, "learning_rate": 2.388558831735327e-06, "loss": 1.0356, "step": 19989 }, { "epoch": 0.7821425776664841, "grad_norm": 0.0, "learning_rate": 2.387736988563447e-06, "loss": 1.0725, "step": 19990 }, { "epoch": 0.7821817043587135, "grad_norm": 0.0, "learning_rate": 2.386915267634976e-06, "loss": 0.975, "step": 19991 }, { "epoch": 0.782220831050943, "grad_norm": 0.0, "learning_rate": 2.3860936689631042e-06, "loss": 0.9686, "step": 19992 }, { "epoch": 0.7822599577431724, "grad_norm": 0.0, "learning_rate": 2.3852721925610257e-06, "loss": 1.0195, "step": 19993 }, { "epoch": 0.7822990844354019, "grad_norm": 0.0, "learning_rate": 2.384450838441935e-06, "loss": 1.0097, "step": 19994 }, { "epoch": 0.7823382111276312, "grad_norm": 0.0, "learning_rate": 2.3836296066190235e-06, "loss": 0.9867, "step": 19995 }, { "epoch": 0.7823773378198607, "grad_norm": 0.0, "learning_rate": 2.382808497105473e-06, "loss": 0.9867, "step": 19996 }, { "epoch": 0.7824164645120901, "grad_norm": 0.0, "learning_rate": 2.3819875099144772e-06, "loss": 0.9549, "step": 19997 }, { "epoch": 0.7824555912043196, "grad_norm": 0.0, "learning_rate": 2.3811666450592108e-06, "loss": 0.968, "step": 19998 }, { "epoch": 0.782494717896549, "grad_norm": 0.0, "learning_rate": 2.3803459025528654e-06, "loss": 0.9388, "step": 19999 }, { "epoch": 0.7825338445887785, "grad_norm": 0.0, "learning_rate": 2.3795252824086145e-06, "loss": 0.9673, "step": 20000 }, { "epoch": 0.7825729712810079, "grad_norm": 0.0, "learning_rate": 2.3787047846396418e-06, "loss": 0.9264, "step": 20001 }, { "epoch": 0.7826120979732374, "grad_norm": 0.0, "learning_rate": 2.377884409259115e-06, "loss": 0.9872, "step": 20002 }, { "epoch": 0.7826512246654668, "grad_norm": 0.0, "learning_rate": 2.377064156280219e-06, "loss": 0.9103, "step": 20003 }, { "epoch": 0.7826903513576963, "grad_norm": 0.0, "learning_rate": 2.376244025716117e-06, "loss": 0.8831, "step": 20004 }, { "epoch": 0.7827294780499257, "grad_norm": 0.0, "learning_rate": 2.375424017579986e-06, "loss": 1.0229, "step": 20005 }, { "epoch": 0.7827686047421551, "grad_norm": 0.0, "learning_rate": 2.3746041318849876e-06, "loss": 0.9976, "step": 20006 }, { "epoch": 0.7828077314343845, "grad_norm": 0.0, "learning_rate": 2.373784368644293e-06, "loss": 0.98, "step": 20007 }, { "epoch": 0.782846858126614, "grad_norm": 0.0, "learning_rate": 2.372964727871064e-06, "loss": 0.9834, "step": 20008 }, { "epoch": 0.7828859848188434, "grad_norm": 0.0, "learning_rate": 2.372145209578468e-06, "loss": 0.8915, "step": 20009 }, { "epoch": 0.7829251115110728, "grad_norm": 0.0, "learning_rate": 2.371325813779659e-06, "loss": 0.8263, "step": 20010 }, { "epoch": 0.7829642382033023, "grad_norm": 0.0, "learning_rate": 2.3705065404877982e-06, "loss": 0.9303, "step": 20011 }, { "epoch": 0.7830033648955317, "grad_norm": 0.0, "learning_rate": 2.3696873897160442e-06, "loss": 0.9215, "step": 20012 }, { "epoch": 0.7830424915877612, "grad_norm": 0.0, "learning_rate": 2.3688683614775464e-06, "loss": 0.9632, "step": 20013 }, { "epoch": 0.7830816182799906, "grad_norm": 0.0, "learning_rate": 2.3680494557854604e-06, "loss": 1.012, "step": 20014 }, { "epoch": 0.7831207449722201, "grad_norm": 0.0, "learning_rate": 2.367230672652938e-06, "loss": 0.9075, "step": 20015 }, { "epoch": 0.7831598716644494, "grad_norm": 0.0, "learning_rate": 2.366412012093129e-06, "loss": 0.8337, "step": 20016 }, { "epoch": 0.7831989983566789, "grad_norm": 0.0, "learning_rate": 2.3655934741191734e-06, "loss": 0.9622, "step": 20017 }, { "epoch": 0.7832381250489083, "grad_norm": 0.0, "learning_rate": 2.364775058744224e-06, "loss": 1.063, "step": 20018 }, { "epoch": 0.7832772517411378, "grad_norm": 0.0, "learning_rate": 2.3639567659814176e-06, "loss": 0.939, "step": 20019 }, { "epoch": 0.7833163784333672, "grad_norm": 0.0, "learning_rate": 2.3631385958438958e-06, "loss": 0.8827, "step": 20020 }, { "epoch": 0.7833555051255967, "grad_norm": 0.0, "learning_rate": 2.3623205483448e-06, "loss": 0.9563, "step": 20021 }, { "epoch": 0.7833946318178261, "grad_norm": 0.0, "learning_rate": 2.3615026234972673e-06, "loss": 1.028, "step": 20022 }, { "epoch": 0.7834337585100556, "grad_norm": 0.0, "learning_rate": 2.3606848213144284e-06, "loss": 1.0767, "step": 20023 }, { "epoch": 0.783472885202285, "grad_norm": 0.0, "learning_rate": 2.359867141809419e-06, "loss": 0.9525, "step": 20024 }, { "epoch": 0.7835120118945145, "grad_norm": 0.0, "learning_rate": 2.359049584995369e-06, "loss": 0.9976, "step": 20025 }, { "epoch": 0.7835511385867439, "grad_norm": 0.0, "learning_rate": 2.3582321508854112e-06, "loss": 1.0307, "step": 20026 }, { "epoch": 0.7835902652789734, "grad_norm": 0.0, "learning_rate": 2.3574148394926668e-06, "loss": 1.0161, "step": 20027 }, { "epoch": 0.7836293919712027, "grad_norm": 0.0, "learning_rate": 2.3565976508302657e-06, "loss": 0.9762, "step": 20028 }, { "epoch": 0.7836685186634322, "grad_norm": 0.0, "learning_rate": 2.3557805849113234e-06, "loss": 0.9552, "step": 20029 }, { "epoch": 0.7837076453556616, "grad_norm": 0.0, "learning_rate": 2.354963641748972e-06, "loss": 0.9203, "step": 20030 }, { "epoch": 0.7837467720478911, "grad_norm": 0.0, "learning_rate": 2.3541468213563213e-06, "loss": 1.1042, "step": 20031 }, { "epoch": 0.7837858987401205, "grad_norm": 0.0, "learning_rate": 2.3533301237464945e-06, "loss": 1.0231, "step": 20032 }, { "epoch": 0.78382502543235, "grad_norm": 0.0, "learning_rate": 2.352513548932599e-06, "loss": 1.1117, "step": 20033 }, { "epoch": 0.7838641521245794, "grad_norm": 0.0, "learning_rate": 2.3516970969277585e-06, "loss": 0.994, "step": 20034 }, { "epoch": 0.7839032788168089, "grad_norm": 0.0, "learning_rate": 2.350880767745076e-06, "loss": 1.0526, "step": 20035 }, { "epoch": 0.7839424055090383, "grad_norm": 0.0, "learning_rate": 2.350064561397667e-06, "loss": 1.0861, "step": 20036 }, { "epoch": 0.7839815322012677, "grad_norm": 0.0, "learning_rate": 2.349248477898631e-06, "loss": 0.972, "step": 20037 }, { "epoch": 0.7840206588934971, "grad_norm": 0.0, "learning_rate": 2.3484325172610788e-06, "loss": 0.9581, "step": 20038 }, { "epoch": 0.7840597855857265, "grad_norm": 0.0, "learning_rate": 2.3476166794981124e-06, "loss": 1.1013, "step": 20039 }, { "epoch": 0.784098912277956, "grad_norm": 0.0, "learning_rate": 2.346800964622835e-06, "loss": 0.906, "step": 20040 }, { "epoch": 0.7841380389701854, "grad_norm": 0.0, "learning_rate": 2.345985372648342e-06, "loss": 1.0363, "step": 20041 }, { "epoch": 0.7841771656624149, "grad_norm": 0.0, "learning_rate": 2.345169903587733e-06, "loss": 0.9018, "step": 20042 }, { "epoch": 0.7842162923546443, "grad_norm": 0.0, "learning_rate": 2.3443545574541026e-06, "loss": 0.9091, "step": 20043 }, { "epoch": 0.7842554190468738, "grad_norm": 0.0, "learning_rate": 2.3435393342605484e-06, "loss": 0.7903, "step": 20044 }, { "epoch": 0.7842945457391032, "grad_norm": 0.0, "learning_rate": 2.342724234020155e-06, "loss": 0.9038, "step": 20045 }, { "epoch": 0.7843336724313327, "grad_norm": 0.0, "learning_rate": 2.341909256746019e-06, "loss": 1.1422, "step": 20046 }, { "epoch": 0.7843727991235621, "grad_norm": 0.0, "learning_rate": 2.3410944024512184e-06, "loss": 0.9023, "step": 20047 }, { "epoch": 0.7844119258157916, "grad_norm": 0.0, "learning_rate": 2.3402796711488494e-06, "loss": 0.8974, "step": 20048 }, { "epoch": 0.7844510525080209, "grad_norm": 0.0, "learning_rate": 2.3394650628519876e-06, "loss": 1.0092, "step": 20049 }, { "epoch": 0.7844901792002504, "grad_norm": 0.0, "learning_rate": 2.3386505775737224e-06, "loss": 1.0239, "step": 20050 }, { "epoch": 0.7845293058924798, "grad_norm": 0.0, "learning_rate": 2.3378362153271238e-06, "loss": 0.7791, "step": 20051 }, { "epoch": 0.7845684325847093, "grad_norm": 0.0, "learning_rate": 2.3370219761252757e-06, "loss": 1.0132, "step": 20052 }, { "epoch": 0.7846075592769387, "grad_norm": 0.0, "learning_rate": 2.336207859981252e-06, "loss": 0.897, "step": 20053 }, { "epoch": 0.7846466859691682, "grad_norm": 0.0, "learning_rate": 2.33539386690813e-06, "loss": 0.9306, "step": 20054 }, { "epoch": 0.7846858126613976, "grad_norm": 0.0, "learning_rate": 2.334579996918975e-06, "loss": 0.9937, "step": 20055 }, { "epoch": 0.7847249393536271, "grad_norm": 0.0, "learning_rate": 2.33376625002686e-06, "loss": 0.8123, "step": 20056 }, { "epoch": 0.7847640660458565, "grad_norm": 0.0, "learning_rate": 2.332952626244852e-06, "loss": 1.0593, "step": 20057 }, { "epoch": 0.784803192738086, "grad_norm": 0.0, "learning_rate": 2.3321391255860215e-06, "loss": 0.8983, "step": 20058 }, { "epoch": 0.7848423194303153, "grad_norm": 0.0, "learning_rate": 2.3313257480634244e-06, "loss": 1.0579, "step": 20059 }, { "epoch": 0.7848814461225448, "grad_norm": 0.0, "learning_rate": 2.3305124936901267e-06, "loss": 1.0285, "step": 20060 }, { "epoch": 0.7849205728147742, "grad_norm": 0.0, "learning_rate": 2.329699362479192e-06, "loss": 0.9706, "step": 20061 }, { "epoch": 0.7849596995070037, "grad_norm": 0.0, "learning_rate": 2.32888635444367e-06, "loss": 0.8694, "step": 20062 }, { "epoch": 0.7849988261992331, "grad_norm": 0.0, "learning_rate": 2.328073469596621e-06, "loss": 0.9092, "step": 20063 }, { "epoch": 0.7850379528914626, "grad_norm": 0.0, "learning_rate": 2.327260707951101e-06, "loss": 0.917, "step": 20064 }, { "epoch": 0.785077079583692, "grad_norm": 0.0, "learning_rate": 2.3264480695201575e-06, "loss": 0.9643, "step": 20065 }, { "epoch": 0.7851162062759214, "grad_norm": 0.0, "learning_rate": 2.325635554316842e-06, "loss": 0.8557, "step": 20066 }, { "epoch": 0.7851553329681509, "grad_norm": 0.0, "learning_rate": 2.324823162354206e-06, "loss": 0.9922, "step": 20067 }, { "epoch": 0.7851944596603803, "grad_norm": 0.0, "learning_rate": 2.3240108936452897e-06, "loss": 1.0401, "step": 20068 }, { "epoch": 0.7852335863526098, "grad_norm": 0.0, "learning_rate": 2.3231987482031403e-06, "loss": 0.9979, "step": 20069 }, { "epoch": 0.7852727130448391, "grad_norm": 0.0, "learning_rate": 2.3223867260407985e-06, "loss": 0.8742, "step": 20070 }, { "epoch": 0.7853118397370686, "grad_norm": 0.0, "learning_rate": 2.321574827171309e-06, "loss": 0.7975, "step": 20071 }, { "epoch": 0.785350966429298, "grad_norm": 0.0, "learning_rate": 2.3207630516077027e-06, "loss": 0.9783, "step": 20072 }, { "epoch": 0.7853900931215275, "grad_norm": 0.0, "learning_rate": 2.3199513993630195e-06, "loss": 0.976, "step": 20073 }, { "epoch": 0.7854292198137569, "grad_norm": 0.0, "learning_rate": 2.3191398704502933e-06, "loss": 0.8432, "step": 20074 }, { "epoch": 0.7854683465059864, "grad_norm": 0.0, "learning_rate": 2.3183284648825577e-06, "loss": 1.0163, "step": 20075 }, { "epoch": 0.7855074731982158, "grad_norm": 0.0, "learning_rate": 2.3175171826728403e-06, "loss": 1.0674, "step": 20076 }, { "epoch": 0.7855465998904453, "grad_norm": 0.0, "learning_rate": 2.316706023834172e-06, "loss": 1.0981, "step": 20077 }, { "epoch": 0.7855857265826747, "grad_norm": 0.0, "learning_rate": 2.315894988379572e-06, "loss": 1.0002, "step": 20078 }, { "epoch": 0.7856248532749042, "grad_norm": 0.0, "learning_rate": 2.315084076322075e-06, "loss": 0.977, "step": 20079 }, { "epoch": 0.7856639799671336, "grad_norm": 0.0, "learning_rate": 2.3142732876746954e-06, "loss": 0.9621, "step": 20080 }, { "epoch": 0.785703106659363, "grad_norm": 0.0, "learning_rate": 2.3134626224504586e-06, "loss": 1.0034, "step": 20081 }, { "epoch": 0.7857422333515924, "grad_norm": 0.0, "learning_rate": 2.3126520806623752e-06, "loss": 0.928, "step": 20082 }, { "epoch": 0.7857813600438219, "grad_norm": 0.0, "learning_rate": 2.311841662323472e-06, "loss": 0.8886, "step": 20083 }, { "epoch": 0.7858204867360513, "grad_norm": 0.0, "learning_rate": 2.3110313674467556e-06, "loss": 0.9438, "step": 20084 }, { "epoch": 0.7858596134282808, "grad_norm": 0.0, "learning_rate": 2.3102211960452427e-06, "loss": 0.9785, "step": 20085 }, { "epoch": 0.7858987401205102, "grad_norm": 0.0, "learning_rate": 2.3094111481319394e-06, "loss": 0.9788, "step": 20086 }, { "epoch": 0.7859378668127397, "grad_norm": 0.0, "learning_rate": 2.3086012237198553e-06, "loss": 1.0364, "step": 20087 }, { "epoch": 0.7859769935049691, "grad_norm": 0.0, "learning_rate": 2.307791422821998e-06, "loss": 0.915, "step": 20088 }, { "epoch": 0.7860161201971986, "grad_norm": 0.0, "learning_rate": 2.3069817454513744e-06, "loss": 0.8765, "step": 20089 }, { "epoch": 0.786055246889428, "grad_norm": 0.0, "learning_rate": 2.3061721916209824e-06, "loss": 1.0214, "step": 20090 }, { "epoch": 0.7860943735816575, "grad_norm": 0.0, "learning_rate": 2.3053627613438224e-06, "loss": 0.984, "step": 20091 }, { "epoch": 0.7861335002738868, "grad_norm": 0.0, "learning_rate": 2.3045534546328952e-06, "loss": 0.9636, "step": 20092 }, { "epoch": 0.7861726269661163, "grad_norm": 0.0, "learning_rate": 2.3037442715011994e-06, "loss": 0.973, "step": 20093 }, { "epoch": 0.7862117536583457, "grad_norm": 0.0, "learning_rate": 2.302935211961724e-06, "loss": 0.9434, "step": 20094 }, { "epoch": 0.7862508803505751, "grad_norm": 0.0, "learning_rate": 2.3021262760274667e-06, "loss": 1.0226, "step": 20095 }, { "epoch": 0.7862900070428046, "grad_norm": 0.0, "learning_rate": 2.30131746371141e-06, "loss": 0.8509, "step": 20096 }, { "epoch": 0.786329133735034, "grad_norm": 0.0, "learning_rate": 2.3005087750265532e-06, "loss": 1.0206, "step": 20097 }, { "epoch": 0.7863682604272635, "grad_norm": 0.0, "learning_rate": 2.2997002099858756e-06, "loss": 1.0413, "step": 20098 }, { "epoch": 0.7864073871194929, "grad_norm": 0.0, "learning_rate": 2.298891768602365e-06, "loss": 0.9036, "step": 20099 }, { "epoch": 0.7864465138117224, "grad_norm": 0.0, "learning_rate": 2.2980834508890004e-06, "loss": 0.9626, "step": 20100 }, { "epoch": 0.7864856405039518, "grad_norm": 0.0, "learning_rate": 2.2972752568587652e-06, "loss": 0.9235, "step": 20101 }, { "epoch": 0.7865247671961813, "grad_norm": 0.0, "learning_rate": 2.2964671865246367e-06, "loss": 0.8895, "step": 20102 }, { "epoch": 0.7865638938884106, "grad_norm": 0.0, "learning_rate": 2.2956592398995947e-06, "loss": 0.9901, "step": 20103 }, { "epoch": 0.7866030205806401, "grad_norm": 0.0, "learning_rate": 2.2948514169966086e-06, "loss": 1.0293, "step": 20104 }, { "epoch": 0.7866421472728695, "grad_norm": 0.0, "learning_rate": 2.2940437178286532e-06, "loss": 0.9081, "step": 20105 }, { "epoch": 0.786681273965099, "grad_norm": 0.0, "learning_rate": 2.2932361424087035e-06, "loss": 0.9415, "step": 20106 }, { "epoch": 0.7867204006573284, "grad_norm": 0.0, "learning_rate": 2.2924286907497207e-06, "loss": 0.9471, "step": 20107 }, { "epoch": 0.7867595273495579, "grad_norm": 0.0, "learning_rate": 2.2916213628646754e-06, "loss": 0.9, "step": 20108 }, { "epoch": 0.7867986540417873, "grad_norm": 0.0, "learning_rate": 2.290814158766531e-06, "loss": 1.0547, "step": 20109 }, { "epoch": 0.7868377807340168, "grad_norm": 0.0, "learning_rate": 2.2900070784682548e-06, "loss": 0.9367, "step": 20110 }, { "epoch": 0.7868769074262462, "grad_norm": 0.0, "learning_rate": 2.289200121982801e-06, "loss": 1.0704, "step": 20111 }, { "epoch": 0.7869160341184757, "grad_norm": 0.0, "learning_rate": 2.288393289323133e-06, "loss": 1.008, "step": 20112 }, { "epoch": 0.786955160810705, "grad_norm": 0.0, "learning_rate": 2.2875865805022037e-06, "loss": 1.0015, "step": 20113 }, { "epoch": 0.7869942875029345, "grad_norm": 0.0, "learning_rate": 2.2867799955329695e-06, "loss": 1.0483, "step": 20114 }, { "epoch": 0.7870334141951639, "grad_norm": 0.0, "learning_rate": 2.285973534428383e-06, "loss": 0.9852, "step": 20115 }, { "epoch": 0.7870725408873934, "grad_norm": 0.0, "learning_rate": 2.285167197201398e-06, "loss": 0.9857, "step": 20116 }, { "epoch": 0.7871116675796228, "grad_norm": 0.0, "learning_rate": 2.2843609838649584e-06, "loss": 1.148, "step": 20117 }, { "epoch": 0.7871507942718523, "grad_norm": 0.0, "learning_rate": 2.2835548944320127e-06, "loss": 0.852, "step": 20118 }, { "epoch": 0.7871899209640817, "grad_norm": 0.0, "learning_rate": 2.2827489289155056e-06, "loss": 0.9014, "step": 20119 }, { "epoch": 0.7872290476563112, "grad_norm": 0.0, "learning_rate": 2.2819430873283843e-06, "loss": 0.9106, "step": 20120 }, { "epoch": 0.7872681743485406, "grad_norm": 0.0, "learning_rate": 2.2811373696835827e-06, "loss": 0.9592, "step": 20121 }, { "epoch": 0.78730730104077, "grad_norm": 0.0, "learning_rate": 2.280331775994046e-06, "loss": 0.9735, "step": 20122 }, { "epoch": 0.7873464277329995, "grad_norm": 0.0, "learning_rate": 2.2795263062727013e-06, "loss": 0.9249, "step": 20123 }, { "epoch": 0.7873855544252288, "grad_norm": 0.0, "learning_rate": 2.2787209605324954e-06, "loss": 0.9327, "step": 20124 }, { "epoch": 0.7874246811174583, "grad_norm": 0.0, "learning_rate": 2.2779157387863527e-06, "loss": 0.9731, "step": 20125 }, { "epoch": 0.7874638078096877, "grad_norm": 0.0, "learning_rate": 2.27711064104721e-06, "loss": 0.8684, "step": 20126 }, { "epoch": 0.7875029345019172, "grad_norm": 0.0, "learning_rate": 2.2763056673279873e-06, "loss": 1.0411, "step": 20127 }, { "epoch": 0.7875420611941466, "grad_norm": 0.0, "learning_rate": 2.2755008176416236e-06, "loss": 0.9843, "step": 20128 }, { "epoch": 0.7875811878863761, "grad_norm": 0.0, "learning_rate": 2.274696092001034e-06, "loss": 1.0374, "step": 20129 }, { "epoch": 0.7876203145786055, "grad_norm": 0.0, "learning_rate": 2.2738914904191477e-06, "loss": 0.8611, "step": 20130 }, { "epoch": 0.787659441270835, "grad_norm": 0.0, "learning_rate": 2.2730870129088767e-06, "loss": 1.0607, "step": 20131 }, { "epoch": 0.7876985679630644, "grad_norm": 0.0, "learning_rate": 2.2722826594831515e-06, "loss": 1.0618, "step": 20132 }, { "epoch": 0.7877376946552939, "grad_norm": 0.0, "learning_rate": 2.271478430154881e-06, "loss": 0.9294, "step": 20133 }, { "epoch": 0.7877768213475232, "grad_norm": 0.0, "learning_rate": 2.2706743249369855e-06, "loss": 0.9501, "step": 20134 }, { "epoch": 0.7878159480397527, "grad_norm": 0.0, "learning_rate": 2.269870343842372e-06, "loss": 1.0004, "step": 20135 }, { "epoch": 0.7878550747319821, "grad_norm": 0.0, "learning_rate": 2.269066486883954e-06, "loss": 0.8585, "step": 20136 }, { "epoch": 0.7878942014242116, "grad_norm": 0.0, "learning_rate": 2.2682627540746415e-06, "loss": 0.8376, "step": 20137 }, { "epoch": 0.787933328116441, "grad_norm": 0.0, "learning_rate": 2.267459145427343e-06, "loss": 0.8859, "step": 20138 }, { "epoch": 0.7879724548086705, "grad_norm": 0.0, "learning_rate": 2.2666556609549597e-06, "loss": 1.0775, "step": 20139 }, { "epoch": 0.7880115815008999, "grad_norm": 0.0, "learning_rate": 2.2658523006703948e-06, "loss": 0.8753, "step": 20140 }, { "epoch": 0.7880507081931294, "grad_norm": 0.0, "learning_rate": 2.265049064586551e-06, "loss": 0.8994, "step": 20141 }, { "epoch": 0.7880898348853588, "grad_norm": 0.0, "learning_rate": 2.2642459527163287e-06, "loss": 1.0865, "step": 20142 }, { "epoch": 0.7881289615775883, "grad_norm": 0.0, "learning_rate": 2.263442965072621e-06, "loss": 0.9994, "step": 20143 }, { "epoch": 0.7881680882698177, "grad_norm": 0.0, "learning_rate": 2.2626401016683275e-06, "loss": 1.0322, "step": 20144 }, { "epoch": 0.7882072149620472, "grad_norm": 0.0, "learning_rate": 2.2618373625163347e-06, "loss": 0.8134, "step": 20145 }, { "epoch": 0.7882463416542765, "grad_norm": 0.0, "learning_rate": 2.261034747629539e-06, "loss": 0.8657, "step": 20146 }, { "epoch": 0.788285468346506, "grad_norm": 0.0, "learning_rate": 2.260232257020827e-06, "loss": 0.9724, "step": 20147 }, { "epoch": 0.7883245950387354, "grad_norm": 0.0, "learning_rate": 2.25942989070309e-06, "loss": 1.0123, "step": 20148 }, { "epoch": 0.7883637217309649, "grad_norm": 0.0, "learning_rate": 2.2586276486892055e-06, "loss": 0.9657, "step": 20149 }, { "epoch": 0.7884028484231943, "grad_norm": 0.0, "learning_rate": 2.2578255309920605e-06, "loss": 0.9675, "step": 20150 }, { "epoch": 0.7884419751154237, "grad_norm": 0.0, "learning_rate": 2.2570235376245397e-06, "loss": 1.0171, "step": 20151 }, { "epoch": 0.7884811018076532, "grad_norm": 0.0, "learning_rate": 2.2562216685995154e-06, "loss": 0.9557, "step": 20152 }, { "epoch": 0.7885202284998826, "grad_norm": 0.0, "learning_rate": 2.255419923929867e-06, "loss": 1.0297, "step": 20153 }, { "epoch": 0.7885593551921121, "grad_norm": 0.0, "learning_rate": 2.254618303628471e-06, "loss": 0.9458, "step": 20154 }, { "epoch": 0.7885984818843415, "grad_norm": 0.0, "learning_rate": 2.2538168077082013e-06, "loss": 1.0275, "step": 20155 }, { "epoch": 0.788637608576571, "grad_norm": 0.0, "learning_rate": 2.2530154361819257e-06, "loss": 0.9971, "step": 20156 }, { "epoch": 0.7886767352688003, "grad_norm": 0.0, "learning_rate": 2.252214189062516e-06, "loss": 0.8773, "step": 20157 }, { "epoch": 0.7887158619610298, "grad_norm": 0.0, "learning_rate": 2.2514130663628333e-06, "loss": 1.043, "step": 20158 }, { "epoch": 0.7887549886532592, "grad_norm": 0.0, "learning_rate": 2.2506120680957522e-06, "loss": 0.9444, "step": 20159 }, { "epoch": 0.7887941153454887, "grad_norm": 0.0, "learning_rate": 2.2498111942741284e-06, "loss": 0.9217, "step": 20160 }, { "epoch": 0.7888332420377181, "grad_norm": 0.0, "learning_rate": 2.2490104449108287e-06, "loss": 0.9107, "step": 20161 }, { "epoch": 0.7888723687299476, "grad_norm": 0.0, "learning_rate": 2.2482098200187054e-06, "loss": 0.9727, "step": 20162 }, { "epoch": 0.788911495422177, "grad_norm": 0.0, "learning_rate": 2.2474093196106183e-06, "loss": 0.9089, "step": 20163 }, { "epoch": 0.7889506221144065, "grad_norm": 0.0, "learning_rate": 2.2466089436994232e-06, "loss": 0.9526, "step": 20164 }, { "epoch": 0.7889897488066359, "grad_norm": 0.0, "learning_rate": 2.245808692297976e-06, "loss": 0.8217, "step": 20165 }, { "epoch": 0.7890288754988654, "grad_norm": 0.0, "learning_rate": 2.245008565419121e-06, "loss": 1.0745, "step": 20166 }, { "epoch": 0.7890680021910947, "grad_norm": 0.0, "learning_rate": 2.2442085630757128e-06, "loss": 1.0242, "step": 20167 }, { "epoch": 0.7891071288833242, "grad_norm": 0.0, "learning_rate": 2.243408685280595e-06, "loss": 1.0576, "step": 20168 }, { "epoch": 0.7891462555755536, "grad_norm": 0.0, "learning_rate": 2.242608932046617e-06, "loss": 0.9861, "step": 20169 }, { "epoch": 0.7891853822677831, "grad_norm": 0.0, "learning_rate": 2.241809303386616e-06, "loss": 0.9436, "step": 20170 }, { "epoch": 0.7892245089600125, "grad_norm": 0.0, "learning_rate": 2.2410097993134393e-06, "loss": 0.804, "step": 20171 }, { "epoch": 0.789263635652242, "grad_norm": 0.0, "learning_rate": 2.240210419839918e-06, "loss": 1.0034, "step": 20172 }, { "epoch": 0.7893027623444714, "grad_norm": 0.0, "learning_rate": 2.2394111649788986e-06, "loss": 1.0085, "step": 20173 }, { "epoch": 0.7893418890367009, "grad_norm": 0.0, "learning_rate": 2.238612034743208e-06, "loss": 0.9435, "step": 20174 }, { "epoch": 0.7893810157289303, "grad_norm": 0.0, "learning_rate": 2.2378130291456856e-06, "loss": 1.0454, "step": 20175 }, { "epoch": 0.7894201424211598, "grad_norm": 0.0, "learning_rate": 2.237014148199155e-06, "loss": 0.919, "step": 20176 }, { "epoch": 0.7894592691133892, "grad_norm": 0.0, "learning_rate": 2.236215391916453e-06, "loss": 1.0288, "step": 20177 }, { "epoch": 0.7894983958056186, "grad_norm": 0.0, "learning_rate": 2.2354167603104016e-06, "loss": 1.0086, "step": 20178 }, { "epoch": 0.789537522497848, "grad_norm": 0.0, "learning_rate": 2.234618253393831e-06, "loss": 0.9257, "step": 20179 }, { "epoch": 0.7895766491900774, "grad_norm": 0.0, "learning_rate": 2.2338198711795543e-06, "loss": 0.9652, "step": 20180 }, { "epoch": 0.7896157758823069, "grad_norm": 0.0, "learning_rate": 2.2330216136804038e-06, "loss": 1.06, "step": 20181 }, { "epoch": 0.7896549025745363, "grad_norm": 0.0, "learning_rate": 2.2322234809091925e-06, "loss": 0.9791, "step": 20182 }, { "epoch": 0.7896940292667658, "grad_norm": 0.0, "learning_rate": 2.23142547287874e-06, "loss": 1.0022, "step": 20183 }, { "epoch": 0.7897331559589952, "grad_norm": 0.0, "learning_rate": 2.2306275896018583e-06, "loss": 0.8983, "step": 20184 }, { "epoch": 0.7897722826512247, "grad_norm": 0.0, "learning_rate": 2.2298298310913603e-06, "loss": 0.9224, "step": 20185 }, { "epoch": 0.7898114093434541, "grad_norm": 0.0, "learning_rate": 2.2290321973600593e-06, "loss": 0.9023, "step": 20186 }, { "epoch": 0.7898505360356836, "grad_norm": 0.0, "learning_rate": 2.228234688420767e-06, "loss": 0.9476, "step": 20187 }, { "epoch": 0.789889662727913, "grad_norm": 0.0, "learning_rate": 2.227437304286284e-06, "loss": 0.9895, "step": 20188 }, { "epoch": 0.7899287894201424, "grad_norm": 0.0, "learning_rate": 2.226640044969418e-06, "loss": 0.9139, "step": 20189 }, { "epoch": 0.7899679161123718, "grad_norm": 0.0, "learning_rate": 2.2258429104829747e-06, "loss": 0.9902, "step": 20190 }, { "epoch": 0.7900070428046013, "grad_norm": 0.0, "learning_rate": 2.2250459008397505e-06, "loss": 1.0789, "step": 20191 }, { "epoch": 0.7900461694968307, "grad_norm": 0.0, "learning_rate": 2.2242490160525467e-06, "loss": 0.9473, "step": 20192 }, { "epoch": 0.7900852961890602, "grad_norm": 0.0, "learning_rate": 2.2234522561341632e-06, "loss": 0.8991, "step": 20193 }, { "epoch": 0.7901244228812896, "grad_norm": 0.0, "learning_rate": 2.2226556210973882e-06, "loss": 1.0328, "step": 20194 }, { "epoch": 0.7901635495735191, "grad_norm": 0.0, "learning_rate": 2.221859110955019e-06, "loss": 0.8693, "step": 20195 }, { "epoch": 0.7902026762657485, "grad_norm": 0.0, "learning_rate": 2.2210627257198493e-06, "loss": 0.9375, "step": 20196 }, { "epoch": 0.790241802957978, "grad_norm": 0.0, "learning_rate": 2.22026646540466e-06, "loss": 0.9202, "step": 20197 }, { "epoch": 0.7902809296502074, "grad_norm": 0.0, "learning_rate": 2.219470330022244e-06, "loss": 0.9979, "step": 20198 }, { "epoch": 0.7903200563424368, "grad_norm": 0.0, "learning_rate": 2.2186743195853855e-06, "loss": 0.9371, "step": 20199 }, { "epoch": 0.7903591830346662, "grad_norm": 0.0, "learning_rate": 2.2178784341068683e-06, "loss": 1.0999, "step": 20200 }, { "epoch": 0.7903983097268957, "grad_norm": 0.0, "learning_rate": 2.21708267359947e-06, "loss": 0.976, "step": 20201 }, { "epoch": 0.7904374364191251, "grad_norm": 0.0, "learning_rate": 2.216287038075974e-06, "loss": 0.9705, "step": 20202 }, { "epoch": 0.7904765631113546, "grad_norm": 0.0, "learning_rate": 2.2154915275491493e-06, "loss": 1.0195, "step": 20203 }, { "epoch": 0.790515689803584, "grad_norm": 0.0, "learning_rate": 2.2146961420317815e-06, "loss": 1.0897, "step": 20204 }, { "epoch": 0.7905548164958135, "grad_norm": 0.0, "learning_rate": 2.2139008815366346e-06, "loss": 0.8438, "step": 20205 }, { "epoch": 0.7905939431880429, "grad_norm": 0.0, "learning_rate": 2.213105746076487e-06, "loss": 1.0326, "step": 20206 }, { "epoch": 0.7906330698802724, "grad_norm": 0.0, "learning_rate": 2.212310735664097e-06, "loss": 1.0074, "step": 20207 }, { "epoch": 0.7906721965725018, "grad_norm": 0.0, "learning_rate": 2.2115158503122447e-06, "loss": 0.8892, "step": 20208 }, { "epoch": 0.7907113232647311, "grad_norm": 0.0, "learning_rate": 2.210721090033685e-06, "loss": 0.912, "step": 20209 }, { "epoch": 0.7907504499569606, "grad_norm": 0.0, "learning_rate": 2.209926454841187e-06, "loss": 1.017, "step": 20210 }, { "epoch": 0.79078957664919, "grad_norm": 0.0, "learning_rate": 2.2091319447475057e-06, "loss": 0.8323, "step": 20211 }, { "epoch": 0.7908287033414195, "grad_norm": 0.0, "learning_rate": 2.208337559765403e-06, "loss": 0.9676, "step": 20212 }, { "epoch": 0.7908678300336489, "grad_norm": 0.0, "learning_rate": 2.2075432999076353e-06, "loss": 0.9471, "step": 20213 }, { "epoch": 0.7909069567258784, "grad_norm": 0.0, "learning_rate": 2.206749165186961e-06, "loss": 0.9734, "step": 20214 }, { "epoch": 0.7909460834181078, "grad_norm": 0.0, "learning_rate": 2.2059551556161265e-06, "loss": 1.0102, "step": 20215 }, { "epoch": 0.7909852101103373, "grad_norm": 0.0, "learning_rate": 2.205161271207885e-06, "loss": 0.9564, "step": 20216 }, { "epoch": 0.7910243368025667, "grad_norm": 0.0, "learning_rate": 2.2043675119749874e-06, "loss": 0.993, "step": 20217 }, { "epoch": 0.7910634634947962, "grad_norm": 0.0, "learning_rate": 2.2035738779301806e-06, "loss": 1.0099, "step": 20218 }, { "epoch": 0.7911025901870256, "grad_norm": 0.0, "learning_rate": 2.202780369086206e-06, "loss": 0.8793, "step": 20219 }, { "epoch": 0.791141716879255, "grad_norm": 0.0, "learning_rate": 2.201986985455811e-06, "loss": 1.0259, "step": 20220 }, { "epoch": 0.7911808435714844, "grad_norm": 0.0, "learning_rate": 2.201193727051727e-06, "loss": 0.9049, "step": 20221 }, { "epoch": 0.7912199702637139, "grad_norm": 0.0, "learning_rate": 2.2004005938867055e-06, "loss": 0.8919, "step": 20222 }, { "epoch": 0.7912590969559433, "grad_norm": 0.0, "learning_rate": 2.1996075859734746e-06, "loss": 0.94, "step": 20223 }, { "epoch": 0.7912982236481728, "grad_norm": 0.0, "learning_rate": 2.198814703324774e-06, "loss": 0.9367, "step": 20224 }, { "epoch": 0.7913373503404022, "grad_norm": 0.0, "learning_rate": 2.1980219459533282e-06, "loss": 0.9332, "step": 20225 }, { "epoch": 0.7913764770326317, "grad_norm": 0.0, "learning_rate": 2.197229313871878e-06, "loss": 1.0041, "step": 20226 }, { "epoch": 0.7914156037248611, "grad_norm": 0.0, "learning_rate": 2.196436807093146e-06, "loss": 0.9692, "step": 20227 }, { "epoch": 0.7914547304170906, "grad_norm": 0.0, "learning_rate": 2.1956444256298638e-06, "loss": 0.988, "step": 20228 }, { "epoch": 0.79149385710932, "grad_norm": 0.0, "learning_rate": 2.194852169494749e-06, "loss": 1.0904, "step": 20229 }, { "epoch": 0.7915329838015495, "grad_norm": 0.0, "learning_rate": 2.1940600387005284e-06, "loss": 0.9311, "step": 20230 }, { "epoch": 0.7915721104937788, "grad_norm": 0.0, "learning_rate": 2.193268033259921e-06, "loss": 0.9083, "step": 20231 }, { "epoch": 0.7916112371860083, "grad_norm": 0.0, "learning_rate": 2.192476153185651e-06, "loss": 1.0446, "step": 20232 }, { "epoch": 0.7916503638782377, "grad_norm": 0.0, "learning_rate": 2.1916843984904266e-06, "loss": 1.0251, "step": 20233 }, { "epoch": 0.7916894905704672, "grad_norm": 0.0, "learning_rate": 2.1908927691869673e-06, "loss": 0.8955, "step": 20234 }, { "epoch": 0.7917286172626966, "grad_norm": 0.0, "learning_rate": 2.1901012652879872e-06, "loss": 0.9214, "step": 20235 }, { "epoch": 0.791767743954926, "grad_norm": 0.0, "learning_rate": 2.189309886806191e-06, "loss": 0.9086, "step": 20236 }, { "epoch": 0.7918068706471555, "grad_norm": 0.0, "learning_rate": 2.1885186337542918e-06, "loss": 0.9449, "step": 20237 }, { "epoch": 0.7918459973393849, "grad_norm": 0.0, "learning_rate": 2.187727506144994e-06, "loss": 1.0573, "step": 20238 }, { "epoch": 0.7918851240316144, "grad_norm": 0.0, "learning_rate": 2.1869365039910075e-06, "loss": 0.9336, "step": 20239 }, { "epoch": 0.7919242507238438, "grad_norm": 0.0, "learning_rate": 2.1861456273050265e-06, "loss": 0.9555, "step": 20240 }, { "epoch": 0.7919633774160733, "grad_norm": 0.0, "learning_rate": 2.1853548760997577e-06, "loss": 1.0214, "step": 20241 }, { "epoch": 0.7920025041083026, "grad_norm": 0.0, "learning_rate": 2.184564250387895e-06, "loss": 0.9408, "step": 20242 }, { "epoch": 0.7920416308005321, "grad_norm": 0.0, "learning_rate": 2.1837737501821375e-06, "loss": 0.8964, "step": 20243 }, { "epoch": 0.7920807574927615, "grad_norm": 0.0, "learning_rate": 2.182983375495179e-06, "loss": 0.9443, "step": 20244 }, { "epoch": 0.792119884184991, "grad_norm": 0.0, "learning_rate": 2.1821931263397156e-06, "loss": 0.9177, "step": 20245 }, { "epoch": 0.7921590108772204, "grad_norm": 0.0, "learning_rate": 2.1814030027284306e-06, "loss": 1.0178, "step": 20246 }, { "epoch": 0.7921981375694499, "grad_norm": 0.0, "learning_rate": 2.1806130046740172e-06, "loss": 0.963, "step": 20247 }, { "epoch": 0.7922372642616793, "grad_norm": 0.0, "learning_rate": 2.1798231321891593e-06, "loss": 1.0021, "step": 20248 }, { "epoch": 0.7922763909539088, "grad_norm": 0.0, "learning_rate": 2.1790333852865463e-06, "loss": 0.9681, "step": 20249 }, { "epoch": 0.7923155176461382, "grad_norm": 0.0, "learning_rate": 2.1782437639788535e-06, "loss": 0.9628, "step": 20250 }, { "epoch": 0.7923546443383677, "grad_norm": 0.0, "learning_rate": 2.177454268278768e-06, "loss": 0.9085, "step": 20251 }, { "epoch": 0.792393771030597, "grad_norm": 0.0, "learning_rate": 2.176664898198959e-06, "loss": 0.8659, "step": 20252 }, { "epoch": 0.7924328977228265, "grad_norm": 0.0, "learning_rate": 2.1758756537521143e-06, "loss": 1.0593, "step": 20253 }, { "epoch": 0.7924720244150559, "grad_norm": 0.0, "learning_rate": 2.1750865349508997e-06, "loss": 1.0988, "step": 20254 }, { "epoch": 0.7925111511072854, "grad_norm": 0.0, "learning_rate": 2.1742975418079927e-06, "loss": 0.9023, "step": 20255 }, { "epoch": 0.7925502777995148, "grad_norm": 0.0, "learning_rate": 2.1735086743360556e-06, "loss": 0.9441, "step": 20256 }, { "epoch": 0.7925894044917443, "grad_norm": 0.0, "learning_rate": 2.1727199325477676e-06, "loss": 1.1052, "step": 20257 }, { "epoch": 0.7926285311839737, "grad_norm": 0.0, "learning_rate": 2.1719313164557863e-06, "loss": 1.093, "step": 20258 }, { "epoch": 0.7926676578762032, "grad_norm": 0.0, "learning_rate": 2.171142826072783e-06, "loss": 0.9159, "step": 20259 }, { "epoch": 0.7927067845684326, "grad_norm": 0.0, "learning_rate": 2.1703544614114114e-06, "loss": 1.0022, "step": 20260 }, { "epoch": 0.7927459112606621, "grad_norm": 0.0, "learning_rate": 2.1695662224843373e-06, "loss": 0.9252, "step": 20261 }, { "epoch": 0.7927850379528915, "grad_norm": 0.0, "learning_rate": 2.168778109304217e-06, "loss": 0.9748, "step": 20262 }, { "epoch": 0.792824164645121, "grad_norm": 0.0, "learning_rate": 2.1679901218837098e-06, "loss": 0.9554, "step": 20263 }, { "epoch": 0.7928632913373503, "grad_norm": 0.0, "learning_rate": 2.167202260235465e-06, "loss": 0.9832, "step": 20264 }, { "epoch": 0.7929024180295797, "grad_norm": 0.0, "learning_rate": 2.1664145243721358e-06, "loss": 1.0503, "step": 20265 }, { "epoch": 0.7929415447218092, "grad_norm": 0.0, "learning_rate": 2.1656269143063736e-06, "loss": 0.9193, "step": 20266 }, { "epoch": 0.7929806714140386, "grad_norm": 0.0, "learning_rate": 2.1648394300508293e-06, "loss": 0.9581, "step": 20267 }, { "epoch": 0.7930197981062681, "grad_norm": 0.0, "learning_rate": 2.1640520716181435e-06, "loss": 1.0865, "step": 20268 }, { "epoch": 0.7930589247984975, "grad_norm": 0.0, "learning_rate": 2.163264839020964e-06, "loss": 0.9545, "step": 20269 }, { "epoch": 0.793098051490727, "grad_norm": 0.0, "learning_rate": 2.162477732271926e-06, "loss": 0.9865, "step": 20270 }, { "epoch": 0.7931371781829564, "grad_norm": 0.0, "learning_rate": 2.1616907513836805e-06, "loss": 1.0531, "step": 20271 }, { "epoch": 0.7931763048751859, "grad_norm": 0.0, "learning_rate": 2.1609038963688567e-06, "loss": 0.876, "step": 20272 }, { "epoch": 0.7932154315674153, "grad_norm": 0.0, "learning_rate": 2.1601171672400966e-06, "loss": 0.999, "step": 20273 }, { "epoch": 0.7932545582596447, "grad_norm": 0.0, "learning_rate": 2.159330564010028e-06, "loss": 1.0619, "step": 20274 }, { "epoch": 0.7932936849518741, "grad_norm": 0.0, "learning_rate": 2.1585440866912854e-06, "loss": 0.9849, "step": 20275 }, { "epoch": 0.7933328116441036, "grad_norm": 0.0, "learning_rate": 2.1577577352964984e-06, "loss": 0.9201, "step": 20276 }, { "epoch": 0.793371938336333, "grad_norm": 0.0, "learning_rate": 2.156971509838298e-06, "loss": 1.111, "step": 20277 }, { "epoch": 0.7934110650285625, "grad_norm": 0.0, "learning_rate": 2.1561854103293057e-06, "loss": 0.8114, "step": 20278 }, { "epoch": 0.7934501917207919, "grad_norm": 0.0, "learning_rate": 2.155399436782146e-06, "loss": 1.012, "step": 20279 }, { "epoch": 0.7934893184130214, "grad_norm": 0.0, "learning_rate": 2.1546135892094443e-06, "loss": 0.9398, "step": 20280 }, { "epoch": 0.7935284451052508, "grad_norm": 0.0, "learning_rate": 2.1538278676238156e-06, "loss": 0.9135, "step": 20281 }, { "epoch": 0.7935675717974803, "grad_norm": 0.0, "learning_rate": 2.1530422720378785e-06, "loss": 0.9955, "step": 20282 }, { "epoch": 0.7936066984897097, "grad_norm": 0.0, "learning_rate": 2.1522568024642498e-06, "loss": 0.9342, "step": 20283 }, { "epoch": 0.7936458251819392, "grad_norm": 0.0, "learning_rate": 2.151471458915546e-06, "loss": 0.8957, "step": 20284 }, { "epoch": 0.7936849518741685, "grad_norm": 0.0, "learning_rate": 2.150686241404374e-06, "loss": 0.9617, "step": 20285 }, { "epoch": 0.793724078566398, "grad_norm": 0.0, "learning_rate": 2.1499011499433463e-06, "loss": 1.0559, "step": 20286 }, { "epoch": 0.7937632052586274, "grad_norm": 0.0, "learning_rate": 2.149116184545068e-06, "loss": 0.8569, "step": 20287 }, { "epoch": 0.7938023319508569, "grad_norm": 0.0, "learning_rate": 2.1483313452221453e-06, "loss": 1.0273, "step": 20288 }, { "epoch": 0.7938414586430863, "grad_norm": 0.0, "learning_rate": 2.147546631987183e-06, "loss": 0.9747, "step": 20289 }, { "epoch": 0.7938805853353158, "grad_norm": 0.0, "learning_rate": 2.146762044852785e-06, "loss": 1.0283, "step": 20290 }, { "epoch": 0.7939197120275452, "grad_norm": 0.0, "learning_rate": 2.1459775838315445e-06, "loss": 0.9602, "step": 20291 }, { "epoch": 0.7939588387197747, "grad_norm": 0.0, "learning_rate": 2.1451932489360628e-06, "loss": 1.0381, "step": 20292 }, { "epoch": 0.7939979654120041, "grad_norm": 0.0, "learning_rate": 2.144409040178934e-06, "loss": 1.0007, "step": 20293 }, { "epoch": 0.7940370921042335, "grad_norm": 0.0, "learning_rate": 2.1436249575727564e-06, "loss": 0.9828, "step": 20294 }, { "epoch": 0.794076218796463, "grad_norm": 0.0, "learning_rate": 2.1428410011301136e-06, "loss": 0.9409, "step": 20295 }, { "epoch": 0.7941153454886923, "grad_norm": 0.0, "learning_rate": 2.1420571708635997e-06, "loss": 0.9639, "step": 20296 }, { "epoch": 0.7941544721809218, "grad_norm": 0.0, "learning_rate": 2.1412734667858003e-06, "loss": 0.9252, "step": 20297 }, { "epoch": 0.7941935988731512, "grad_norm": 0.0, "learning_rate": 2.140489888909305e-06, "loss": 1.018, "step": 20298 }, { "epoch": 0.7942327255653807, "grad_norm": 0.0, "learning_rate": 2.1397064372466903e-06, "loss": 0.8202, "step": 20299 }, { "epoch": 0.7942718522576101, "grad_norm": 0.0, "learning_rate": 2.1389231118105437e-06, "loss": 0.8221, "step": 20300 }, { "epoch": 0.7943109789498396, "grad_norm": 0.0, "learning_rate": 2.138139912613436e-06, "loss": 0.9843, "step": 20301 }, { "epoch": 0.794350105642069, "grad_norm": 0.0, "learning_rate": 2.1373568396679557e-06, "loss": 1.0272, "step": 20302 }, { "epoch": 0.7943892323342985, "grad_norm": 0.0, "learning_rate": 2.1365738929866686e-06, "loss": 0.9056, "step": 20303 }, { "epoch": 0.7944283590265279, "grad_norm": 0.0, "learning_rate": 2.135791072582154e-06, "loss": 0.9008, "step": 20304 }, { "epoch": 0.7944674857187574, "grad_norm": 0.0, "learning_rate": 2.135008378466975e-06, "loss": 1.1369, "step": 20305 }, { "epoch": 0.7945066124109867, "grad_norm": 0.0, "learning_rate": 2.134225810653713e-06, "loss": 0.9998, "step": 20306 }, { "epoch": 0.7945457391032162, "grad_norm": 0.0, "learning_rate": 2.133443369154924e-06, "loss": 0.8565, "step": 20307 }, { "epoch": 0.7945848657954456, "grad_norm": 0.0, "learning_rate": 2.1326610539831795e-06, "loss": 0.9523, "step": 20308 }, { "epoch": 0.7946239924876751, "grad_norm": 0.0, "learning_rate": 2.131878865151038e-06, "loss": 0.934, "step": 20309 }, { "epoch": 0.7946631191799045, "grad_norm": 0.0, "learning_rate": 2.131096802671062e-06, "loss": 1.1244, "step": 20310 }, { "epoch": 0.794702245872134, "grad_norm": 0.0, "learning_rate": 2.1303148665558125e-06, "loss": 0.9354, "step": 20311 }, { "epoch": 0.7947413725643634, "grad_norm": 0.0, "learning_rate": 2.1295330568178465e-06, "loss": 0.8744, "step": 20312 }, { "epoch": 0.7947804992565929, "grad_norm": 0.0, "learning_rate": 2.1287513734697153e-06, "loss": 0.8979, "step": 20313 }, { "epoch": 0.7948196259488223, "grad_norm": 0.0, "learning_rate": 2.1279698165239737e-06, "loss": 0.9705, "step": 20314 }, { "epoch": 0.7948587526410518, "grad_norm": 0.0, "learning_rate": 2.127188385993172e-06, "loss": 0.9398, "step": 20315 }, { "epoch": 0.7948978793332812, "grad_norm": 0.0, "learning_rate": 2.126407081889863e-06, "loss": 0.9392, "step": 20316 }, { "epoch": 0.7949370060255107, "grad_norm": 0.0, "learning_rate": 2.125625904226587e-06, "loss": 1.0306, "step": 20317 }, { "epoch": 0.79497613271774, "grad_norm": 0.0, "learning_rate": 2.124844853015895e-06, "loss": 0.9484, "step": 20318 }, { "epoch": 0.7950152594099695, "grad_norm": 0.0, "learning_rate": 2.1240639282703235e-06, "loss": 0.9783, "step": 20319 }, { "epoch": 0.7950543861021989, "grad_norm": 0.0, "learning_rate": 2.123283130002416e-06, "loss": 0.9885, "step": 20320 }, { "epoch": 0.7950935127944284, "grad_norm": 0.0, "learning_rate": 2.1225024582247113e-06, "loss": 0.9558, "step": 20321 }, { "epoch": 0.7951326394866578, "grad_norm": 0.0, "learning_rate": 2.121721912949749e-06, "loss": 0.9481, "step": 20322 }, { "epoch": 0.7951717661788872, "grad_norm": 0.0, "learning_rate": 2.1209414941900584e-06, "loss": 1.0604, "step": 20323 }, { "epoch": 0.7952108928711167, "grad_norm": 0.0, "learning_rate": 2.120161201958174e-06, "loss": 0.8739, "step": 20324 }, { "epoch": 0.7952500195633461, "grad_norm": 0.0, "learning_rate": 2.11938103626663e-06, "loss": 0.9713, "step": 20325 }, { "epoch": 0.7952891462555756, "grad_norm": 0.0, "learning_rate": 2.1186009971279486e-06, "loss": 1.0341, "step": 20326 }, { "epoch": 0.795328272947805, "grad_norm": 0.0, "learning_rate": 2.117821084554659e-06, "loss": 0.9984, "step": 20327 }, { "epoch": 0.7953673996400344, "grad_norm": 0.0, "learning_rate": 2.117041298559286e-06, "loss": 1.0506, "step": 20328 }, { "epoch": 0.7954065263322638, "grad_norm": 0.0, "learning_rate": 2.1162616391543546e-06, "loss": 1.063, "step": 20329 }, { "epoch": 0.7954456530244933, "grad_norm": 0.0, "learning_rate": 2.115482106352379e-06, "loss": 1.0202, "step": 20330 }, { "epoch": 0.7954847797167227, "grad_norm": 0.0, "learning_rate": 2.1147027001658816e-06, "loss": 1.0241, "step": 20331 }, { "epoch": 0.7955239064089522, "grad_norm": 0.0, "learning_rate": 2.1139234206073777e-06, "loss": 0.9057, "step": 20332 }, { "epoch": 0.7955630331011816, "grad_norm": 0.0, "learning_rate": 2.1131442676893843e-06, "loss": 0.9177, "step": 20333 }, { "epoch": 0.7956021597934111, "grad_norm": 0.0, "learning_rate": 2.1123652414244087e-06, "loss": 0.9949, "step": 20334 }, { "epoch": 0.7956412864856405, "grad_norm": 0.0, "learning_rate": 2.111586341824967e-06, "loss": 0.9984, "step": 20335 }, { "epoch": 0.79568041317787, "grad_norm": 0.0, "learning_rate": 2.110807568903561e-06, "loss": 1.0781, "step": 20336 }, { "epoch": 0.7957195398700994, "grad_norm": 0.0, "learning_rate": 2.110028922672699e-06, "loss": 0.9178, "step": 20337 }, { "epoch": 0.7957586665623289, "grad_norm": 0.0, "learning_rate": 2.1092504031448867e-06, "loss": 0.9364, "step": 20338 }, { "epoch": 0.7957977932545582, "grad_norm": 0.0, "learning_rate": 2.1084720103326274e-06, "loss": 0.8946, "step": 20339 }, { "epoch": 0.7958369199467877, "grad_norm": 0.0, "learning_rate": 2.1076937442484156e-06, "loss": 0.9146, "step": 20340 }, { "epoch": 0.7958760466390171, "grad_norm": 0.0, "learning_rate": 2.1069156049047535e-06, "loss": 0.9917, "step": 20341 }, { "epoch": 0.7959151733312466, "grad_norm": 0.0, "learning_rate": 2.106137592314137e-06, "loss": 1.045, "step": 20342 }, { "epoch": 0.795954300023476, "grad_norm": 0.0, "learning_rate": 2.1053597064890607e-06, "loss": 0.9178, "step": 20343 }, { "epoch": 0.7959934267157055, "grad_norm": 0.0, "learning_rate": 2.1045819474420127e-06, "loss": 0.9898, "step": 20344 }, { "epoch": 0.7960325534079349, "grad_norm": 0.0, "learning_rate": 2.103804315185488e-06, "loss": 1.0753, "step": 20345 }, { "epoch": 0.7960716801001644, "grad_norm": 0.0, "learning_rate": 2.103026809731965e-06, "loss": 0.9769, "step": 20346 }, { "epoch": 0.7961108067923938, "grad_norm": 0.0, "learning_rate": 2.102249431093942e-06, "loss": 0.9406, "step": 20347 }, { "epoch": 0.7961499334846233, "grad_norm": 0.0, "learning_rate": 2.101472179283894e-06, "loss": 0.9031, "step": 20348 }, { "epoch": 0.7961890601768526, "grad_norm": 0.0, "learning_rate": 2.100695054314309e-06, "loss": 0.9261, "step": 20349 }, { "epoch": 0.796228186869082, "grad_norm": 0.0, "learning_rate": 2.099918056197657e-06, "loss": 0.9523, "step": 20350 }, { "epoch": 0.7962673135613115, "grad_norm": 0.0, "learning_rate": 2.099141184946427e-06, "loss": 0.9167, "step": 20351 }, { "epoch": 0.7963064402535409, "grad_norm": 0.0, "learning_rate": 2.0983644405730863e-06, "loss": 0.8737, "step": 20352 }, { "epoch": 0.7963455669457704, "grad_norm": 0.0, "learning_rate": 2.0975878230901146e-06, "loss": 1.0768, "step": 20353 }, { "epoch": 0.7963846936379998, "grad_norm": 0.0, "learning_rate": 2.096811332509975e-06, "loss": 0.8334, "step": 20354 }, { "epoch": 0.7964238203302293, "grad_norm": 0.0, "learning_rate": 2.0960349688451463e-06, "loss": 0.9935, "step": 20355 }, { "epoch": 0.7964629470224587, "grad_norm": 0.0, "learning_rate": 2.0952587321080898e-06, "loss": 0.7226, "step": 20356 }, { "epoch": 0.7965020737146882, "grad_norm": 0.0, "learning_rate": 2.0944826223112756e-06, "loss": 0.8934, "step": 20357 }, { "epoch": 0.7965412004069176, "grad_norm": 0.0, "learning_rate": 2.0937066394671624e-06, "loss": 0.8988, "step": 20358 }, { "epoch": 0.7965803270991471, "grad_norm": 0.0, "learning_rate": 2.0929307835882117e-06, "loss": 1.0647, "step": 20359 }, { "epoch": 0.7966194537913764, "grad_norm": 0.0, "learning_rate": 2.0921550546868864e-06, "loss": 0.9381, "step": 20360 }, { "epoch": 0.7966585804836059, "grad_norm": 0.0, "learning_rate": 2.0913794527756425e-06, "loss": 0.8512, "step": 20361 }, { "epoch": 0.7966977071758353, "grad_norm": 0.0, "learning_rate": 2.090603977866934e-06, "loss": 0.9143, "step": 20362 }, { "epoch": 0.7967368338680648, "grad_norm": 0.0, "learning_rate": 2.0898286299732128e-06, "loss": 1.0061, "step": 20363 }, { "epoch": 0.7967759605602942, "grad_norm": 0.0, "learning_rate": 2.0890534091069327e-06, "loss": 1.0153, "step": 20364 }, { "epoch": 0.7968150872525237, "grad_norm": 0.0, "learning_rate": 2.0882783152805443e-06, "loss": 0.9474, "step": 20365 }, { "epoch": 0.7968542139447531, "grad_norm": 0.0, "learning_rate": 2.08750334850649e-06, "loss": 1.0307, "step": 20366 }, { "epoch": 0.7968933406369826, "grad_norm": 0.0, "learning_rate": 2.086728508797219e-06, "loss": 1.0217, "step": 20367 }, { "epoch": 0.796932467329212, "grad_norm": 0.0, "learning_rate": 2.08595379616517e-06, "loss": 0.8613, "step": 20368 }, { "epoch": 0.7969715940214415, "grad_norm": 0.0, "learning_rate": 2.085179210622786e-06, "loss": 0.8284, "step": 20369 }, { "epoch": 0.7970107207136709, "grad_norm": 0.0, "learning_rate": 2.084404752182506e-06, "loss": 0.8776, "step": 20370 }, { "epoch": 0.7970498474059003, "grad_norm": 0.0, "learning_rate": 2.0836304208567705e-06, "loss": 0.9187, "step": 20371 }, { "epoch": 0.7970889740981297, "grad_norm": 0.0, "learning_rate": 2.082856216658007e-06, "loss": 0.8889, "step": 20372 }, { "epoch": 0.7971281007903592, "grad_norm": 0.0, "learning_rate": 2.082082139598651e-06, "loss": 0.9518, "step": 20373 }, { "epoch": 0.7971672274825886, "grad_norm": 0.0, "learning_rate": 2.081308189691138e-06, "loss": 0.9565, "step": 20374 }, { "epoch": 0.7972063541748181, "grad_norm": 0.0, "learning_rate": 2.0805343669478906e-06, "loss": 0.9376, "step": 20375 }, { "epoch": 0.7972454808670475, "grad_norm": 0.0, "learning_rate": 2.079760671381337e-06, "loss": 0.9644, "step": 20376 }, { "epoch": 0.797284607559277, "grad_norm": 0.0, "learning_rate": 2.078987103003902e-06, "loss": 1.0052, "step": 20377 }, { "epoch": 0.7973237342515064, "grad_norm": 0.0, "learning_rate": 2.0782136618280126e-06, "loss": 1.0759, "step": 20378 }, { "epoch": 0.7973628609437358, "grad_norm": 0.0, "learning_rate": 2.0774403478660807e-06, "loss": 0.8779, "step": 20379 }, { "epoch": 0.7974019876359653, "grad_norm": 0.0, "learning_rate": 2.0766671611305334e-06, "loss": 0.9916, "step": 20380 }, { "epoch": 0.7974411143281946, "grad_norm": 0.0, "learning_rate": 2.0758941016337776e-06, "loss": 0.9451, "step": 20381 }, { "epoch": 0.7974802410204241, "grad_norm": 0.0, "learning_rate": 2.075121169388238e-06, "loss": 0.9922, "step": 20382 }, { "epoch": 0.7975193677126535, "grad_norm": 0.0, "learning_rate": 2.074348364406319e-06, "loss": 1.0403, "step": 20383 }, { "epoch": 0.797558494404883, "grad_norm": 0.0, "learning_rate": 2.0735756867004366e-06, "loss": 0.954, "step": 20384 }, { "epoch": 0.7975976210971124, "grad_norm": 0.0, "learning_rate": 2.0728031362829935e-06, "loss": 1.0109, "step": 20385 }, { "epoch": 0.7976367477893419, "grad_norm": 0.0, "learning_rate": 2.0720307131663998e-06, "loss": 0.9724, "step": 20386 }, { "epoch": 0.7976758744815713, "grad_norm": 0.0, "learning_rate": 2.071258417363058e-06, "loss": 0.9876, "step": 20387 }, { "epoch": 0.7977150011738008, "grad_norm": 0.0, "learning_rate": 2.0704862488853726e-06, "loss": 0.8546, "step": 20388 }, { "epoch": 0.7977541278660302, "grad_norm": 0.0, "learning_rate": 2.06971420774574e-06, "loss": 0.9724, "step": 20389 }, { "epoch": 0.7977932545582597, "grad_norm": 0.0, "learning_rate": 2.0689422939565596e-06, "loss": 0.9773, "step": 20390 }, { "epoch": 0.797832381250489, "grad_norm": 0.0, "learning_rate": 2.0681705075302284e-06, "loss": 0.9568, "step": 20391 }, { "epoch": 0.7978715079427185, "grad_norm": 0.0, "learning_rate": 2.067398848479142e-06, "loss": 0.9669, "step": 20392 }, { "epoch": 0.7979106346349479, "grad_norm": 0.0, "learning_rate": 2.066627316815687e-06, "loss": 1.0229, "step": 20393 }, { "epoch": 0.7979497613271774, "grad_norm": 0.0, "learning_rate": 2.0658559125522593e-06, "loss": 0.8971, "step": 20394 }, { "epoch": 0.7979888880194068, "grad_norm": 0.0, "learning_rate": 2.0650846357012376e-06, "loss": 1.0058, "step": 20395 }, { "epoch": 0.7980280147116363, "grad_norm": 0.0, "learning_rate": 2.064313486275019e-06, "loss": 0.9088, "step": 20396 }, { "epoch": 0.7980671414038657, "grad_norm": 0.0, "learning_rate": 2.0635424642859805e-06, "loss": 1.0359, "step": 20397 }, { "epoch": 0.7981062680960952, "grad_norm": 0.0, "learning_rate": 2.0627715697465067e-06, "loss": 0.9725, "step": 20398 }, { "epoch": 0.7981453947883246, "grad_norm": 0.0, "learning_rate": 2.062000802668971e-06, "loss": 0.9302, "step": 20399 }, { "epoch": 0.7981845214805541, "grad_norm": 0.0, "learning_rate": 2.0612301630657595e-06, "loss": 0.9228, "step": 20400 }, { "epoch": 0.7982236481727835, "grad_norm": 0.0, "learning_rate": 2.0604596509492427e-06, "loss": 0.915, "step": 20401 }, { "epoch": 0.798262774865013, "grad_norm": 0.0, "learning_rate": 2.059689266331797e-06, "loss": 0.9032, "step": 20402 }, { "epoch": 0.7983019015572423, "grad_norm": 0.0, "learning_rate": 2.0589190092257895e-06, "loss": 0.9322, "step": 20403 }, { "epoch": 0.7983410282494718, "grad_norm": 0.0, "learning_rate": 2.058148879643591e-06, "loss": 0.9854, "step": 20404 }, { "epoch": 0.7983801549417012, "grad_norm": 0.0, "learning_rate": 2.057378877597571e-06, "loss": 1.1246, "step": 20405 }, { "epoch": 0.7984192816339307, "grad_norm": 0.0, "learning_rate": 2.0566090031000952e-06, "loss": 1.0334, "step": 20406 }, { "epoch": 0.7984584083261601, "grad_norm": 0.0, "learning_rate": 2.055839256163523e-06, "loss": 1.0113, "step": 20407 }, { "epoch": 0.7984975350183895, "grad_norm": 0.0, "learning_rate": 2.055069636800218e-06, "loss": 1.0284, "step": 20408 }, { "epoch": 0.798536661710619, "grad_norm": 0.0, "learning_rate": 2.0543001450225387e-06, "loss": 0.9619, "step": 20409 }, { "epoch": 0.7985757884028484, "grad_norm": 0.0, "learning_rate": 2.0535307808428462e-06, "loss": 0.8301, "step": 20410 }, { "epoch": 0.7986149150950779, "grad_norm": 0.0, "learning_rate": 2.052761544273488e-06, "loss": 0.881, "step": 20411 }, { "epoch": 0.7986540417873073, "grad_norm": 0.0, "learning_rate": 2.0519924353268215e-06, "loss": 0.9002, "step": 20412 }, { "epoch": 0.7986931684795368, "grad_norm": 0.0, "learning_rate": 2.0512234540151997e-06, "loss": 0.9178, "step": 20413 }, { "epoch": 0.7987322951717661, "grad_norm": 0.0, "learning_rate": 2.050454600350966e-06, "loss": 1.0464, "step": 20414 }, { "epoch": 0.7987714218639956, "grad_norm": 0.0, "learning_rate": 2.0496858743464698e-06, "loss": 1.0372, "step": 20415 }, { "epoch": 0.798810548556225, "grad_norm": 0.0, "learning_rate": 2.0489172760140594e-06, "loss": 1.0294, "step": 20416 }, { "epoch": 0.7988496752484545, "grad_norm": 0.0, "learning_rate": 2.0481488053660714e-06, "loss": 0.8934, "step": 20417 }, { "epoch": 0.7988888019406839, "grad_norm": 0.0, "learning_rate": 2.0473804624148498e-06, "loss": 0.9546, "step": 20418 }, { "epoch": 0.7989279286329134, "grad_norm": 0.0, "learning_rate": 2.0466122471727347e-06, "loss": 1.0162, "step": 20419 }, { "epoch": 0.7989670553251428, "grad_norm": 0.0, "learning_rate": 2.0458441596520584e-06, "loss": 0.9834, "step": 20420 }, { "epoch": 0.7990061820173723, "grad_norm": 0.0, "learning_rate": 2.045076199865158e-06, "loss": 0.9583, "step": 20421 }, { "epoch": 0.7990453087096017, "grad_norm": 0.0, "learning_rate": 2.044308367824366e-06, "loss": 1.0076, "step": 20422 }, { "epoch": 0.7990844354018312, "grad_norm": 0.0, "learning_rate": 2.043540663542014e-06, "loss": 0.8592, "step": 20423 }, { "epoch": 0.7991235620940605, "grad_norm": 0.0, "learning_rate": 2.0427730870304276e-06, "loss": 0.9253, "step": 20424 }, { "epoch": 0.79916268878629, "grad_norm": 0.0, "learning_rate": 2.042005638301937e-06, "loss": 0.943, "step": 20425 }, { "epoch": 0.7992018154785194, "grad_norm": 0.0, "learning_rate": 2.041238317368858e-06, "loss": 0.9189, "step": 20426 }, { "epoch": 0.7992409421707489, "grad_norm": 0.0, "learning_rate": 2.0404711242435237e-06, "loss": 0.9824, "step": 20427 }, { "epoch": 0.7992800688629783, "grad_norm": 0.0, "learning_rate": 2.0397040589382476e-06, "loss": 0.8706, "step": 20428 }, { "epoch": 0.7993191955552078, "grad_norm": 0.0, "learning_rate": 2.038937121465352e-06, "loss": 1.0295, "step": 20429 }, { "epoch": 0.7993583222474372, "grad_norm": 0.0, "learning_rate": 2.0381703118371445e-06, "loss": 0.9624, "step": 20430 }, { "epoch": 0.7993974489396667, "grad_norm": 0.0, "learning_rate": 2.0374036300659504e-06, "loss": 0.8995, "step": 20431 }, { "epoch": 0.7994365756318961, "grad_norm": 0.0, "learning_rate": 2.036637076164074e-06, "loss": 0.9481, "step": 20432 }, { "epoch": 0.7994757023241256, "grad_norm": 0.0, "learning_rate": 2.0358706501438308e-06, "loss": 1.0222, "step": 20433 }, { "epoch": 0.799514829016355, "grad_norm": 0.0, "learning_rate": 2.0351043520175216e-06, "loss": 0.9873, "step": 20434 }, { "epoch": 0.7995539557085845, "grad_norm": 0.0, "learning_rate": 2.0343381817974574e-06, "loss": 0.935, "step": 20435 }, { "epoch": 0.7995930824008138, "grad_norm": 0.0, "learning_rate": 2.0335721394959396e-06, "loss": 1.0057, "step": 20436 }, { "epoch": 0.7996322090930432, "grad_norm": 0.0, "learning_rate": 2.0328062251252735e-06, "loss": 0.9866, "step": 20437 }, { "epoch": 0.7996713357852727, "grad_norm": 0.0, "learning_rate": 2.032040438697752e-06, "loss": 0.8881, "step": 20438 }, { "epoch": 0.7997104624775021, "grad_norm": 0.0, "learning_rate": 2.0312747802256783e-06, "loss": 1.0736, "step": 20439 }, { "epoch": 0.7997495891697316, "grad_norm": 0.0, "learning_rate": 2.0305092497213454e-06, "loss": 1.0073, "step": 20440 }, { "epoch": 0.799788715861961, "grad_norm": 0.0, "learning_rate": 2.029743847197051e-06, "loss": 0.9534, "step": 20441 }, { "epoch": 0.7998278425541905, "grad_norm": 0.0, "learning_rate": 2.0289785726650803e-06, "loss": 0.8349, "step": 20442 }, { "epoch": 0.7998669692464199, "grad_norm": 0.0, "learning_rate": 2.0282134261377273e-06, "loss": 1.0621, "step": 20443 }, { "epoch": 0.7999060959386494, "grad_norm": 0.0, "learning_rate": 2.0274484076272726e-06, "loss": 0.8835, "step": 20444 }, { "epoch": 0.7999452226308787, "grad_norm": 0.0, "learning_rate": 2.026683517146012e-06, "loss": 1.1021, "step": 20445 }, { "epoch": 0.7999843493231082, "grad_norm": 0.0, "learning_rate": 2.0259187547062197e-06, "loss": 0.9484, "step": 20446 }, { "epoch": 0.8000234760153376, "grad_norm": 0.0, "learning_rate": 2.025154120320184e-06, "loss": 0.9421, "step": 20447 }, { "epoch": 0.8000626027075671, "grad_norm": 0.0, "learning_rate": 2.024389614000174e-06, "loss": 1.0435, "step": 20448 }, { "epoch": 0.8001017293997965, "grad_norm": 0.0, "learning_rate": 2.0236252357584775e-06, "loss": 0.892, "step": 20449 }, { "epoch": 0.800140856092026, "grad_norm": 0.0, "learning_rate": 2.0228609856073633e-06, "loss": 0.8545, "step": 20450 }, { "epoch": 0.8001799827842554, "grad_norm": 0.0, "learning_rate": 2.0220968635591076e-06, "loss": 1.0038, "step": 20451 }, { "epoch": 0.8002191094764849, "grad_norm": 0.0, "learning_rate": 2.021332869625977e-06, "loss": 0.9, "step": 20452 }, { "epoch": 0.8002582361687143, "grad_norm": 0.0, "learning_rate": 2.020569003820242e-06, "loss": 0.8598, "step": 20453 }, { "epoch": 0.8002973628609438, "grad_norm": 0.0, "learning_rate": 2.019805266154171e-06, "loss": 0.9473, "step": 20454 }, { "epoch": 0.8003364895531732, "grad_norm": 0.0, "learning_rate": 2.0190416566400295e-06, "loss": 1.0199, "step": 20455 }, { "epoch": 0.8003756162454027, "grad_norm": 0.0, "learning_rate": 2.018278175290076e-06, "loss": 1.0414, "step": 20456 }, { "epoch": 0.800414742937632, "grad_norm": 0.0, "learning_rate": 2.017514822116574e-06, "loss": 1.0261, "step": 20457 }, { "epoch": 0.8004538696298615, "grad_norm": 0.0, "learning_rate": 2.016751597131783e-06, "loss": 0.8376, "step": 20458 }, { "epoch": 0.8004929963220909, "grad_norm": 0.0, "learning_rate": 2.015988500347956e-06, "loss": 0.964, "step": 20459 }, { "epoch": 0.8005321230143204, "grad_norm": 0.0, "learning_rate": 2.0152255317773486e-06, "loss": 0.8993, "step": 20460 }, { "epoch": 0.8005712497065498, "grad_norm": 0.0, "learning_rate": 2.014462691432216e-06, "loss": 1.084, "step": 20461 }, { "epoch": 0.8006103763987793, "grad_norm": 0.0, "learning_rate": 2.013699979324805e-06, "loss": 0.9799, "step": 20462 }, { "epoch": 0.8006495030910087, "grad_norm": 0.0, "learning_rate": 2.0129373954673636e-06, "loss": 0.9716, "step": 20463 }, { "epoch": 0.8006886297832381, "grad_norm": 0.0, "learning_rate": 2.012174939872142e-06, "loss": 0.8594, "step": 20464 }, { "epoch": 0.8007277564754676, "grad_norm": 0.0, "learning_rate": 2.011412612551379e-06, "loss": 0.9013, "step": 20465 }, { "epoch": 0.800766883167697, "grad_norm": 0.0, "learning_rate": 2.0106504135173187e-06, "loss": 0.9752, "step": 20466 }, { "epoch": 0.8008060098599264, "grad_norm": 0.0, "learning_rate": 2.0098883427822026e-06, "loss": 0.8542, "step": 20467 }, { "epoch": 0.8008451365521558, "grad_norm": 0.0, "learning_rate": 2.00912640035827e-06, "loss": 0.9366, "step": 20468 }, { "epoch": 0.8008842632443853, "grad_norm": 0.0, "learning_rate": 2.0083645862577515e-06, "loss": 0.9405, "step": 20469 }, { "epoch": 0.8009233899366147, "grad_norm": 0.0, "learning_rate": 2.0076029004928834e-06, "loss": 1.0169, "step": 20470 }, { "epoch": 0.8009625166288442, "grad_norm": 0.0, "learning_rate": 2.006841343075898e-06, "loss": 0.9876, "step": 20471 }, { "epoch": 0.8010016433210736, "grad_norm": 0.0, "learning_rate": 2.006079914019027e-06, "loss": 0.9888, "step": 20472 }, { "epoch": 0.8010407700133031, "grad_norm": 0.0, "learning_rate": 2.0053186133344926e-06, "loss": 1.0033, "step": 20473 }, { "epoch": 0.8010798967055325, "grad_norm": 0.0, "learning_rate": 2.004557441034527e-06, "loss": 1.0313, "step": 20474 }, { "epoch": 0.801119023397762, "grad_norm": 0.0, "learning_rate": 2.0037963971313445e-06, "loss": 0.9788, "step": 20475 }, { "epoch": 0.8011581500899914, "grad_norm": 0.0, "learning_rate": 2.0030354816371767e-06, "loss": 1.0532, "step": 20476 }, { "epoch": 0.8011972767822209, "grad_norm": 0.0, "learning_rate": 2.002274694564236e-06, "loss": 0.9368, "step": 20477 }, { "epoch": 0.8012364034744502, "grad_norm": 0.0, "learning_rate": 2.0015140359247453e-06, "loss": 0.8505, "step": 20478 }, { "epoch": 0.8012755301666797, "grad_norm": 0.0, "learning_rate": 2.00075350573091e-06, "loss": 0.9281, "step": 20479 }, { "epoch": 0.8013146568589091, "grad_norm": 0.0, "learning_rate": 1.9999931039949562e-06, "loss": 0.9965, "step": 20480 }, { "epoch": 0.8013537835511386, "grad_norm": 0.0, "learning_rate": 1.9992328307290854e-06, "loss": 0.9865, "step": 20481 }, { "epoch": 0.801392910243368, "grad_norm": 0.0, "learning_rate": 1.9984726859455127e-06, "loss": 1.1104, "step": 20482 }, { "epoch": 0.8014320369355975, "grad_norm": 0.0, "learning_rate": 1.9977126696564387e-06, "loss": 0.946, "step": 20483 }, { "epoch": 0.8014711636278269, "grad_norm": 0.0, "learning_rate": 1.996952781874073e-06, "loss": 0.857, "step": 20484 }, { "epoch": 0.8015102903200564, "grad_norm": 0.0, "learning_rate": 1.9961930226106162e-06, "loss": 0.8485, "step": 20485 }, { "epoch": 0.8015494170122858, "grad_norm": 0.0, "learning_rate": 1.9954333918782733e-06, "loss": 0.9907, "step": 20486 }, { "epoch": 0.8015885437045153, "grad_norm": 0.0, "learning_rate": 1.994673889689237e-06, "loss": 1.0327, "step": 20487 }, { "epoch": 0.8016276703967447, "grad_norm": 0.0, "learning_rate": 1.993914516055707e-06, "loss": 0.9359, "step": 20488 }, { "epoch": 0.8016667970889741, "grad_norm": 0.0, "learning_rate": 1.9931552709898783e-06, "loss": 0.9154, "step": 20489 }, { "epoch": 0.8017059237812035, "grad_norm": 0.0, "learning_rate": 1.992396154503945e-06, "loss": 0.9957, "step": 20490 }, { "epoch": 0.801745050473433, "grad_norm": 0.0, "learning_rate": 1.991637166610092e-06, "loss": 1.0771, "step": 20491 }, { "epoch": 0.8017841771656624, "grad_norm": 0.0, "learning_rate": 1.990878307320514e-06, "loss": 0.9572, "step": 20492 }, { "epoch": 0.8018233038578918, "grad_norm": 0.0, "learning_rate": 1.9901195766473903e-06, "loss": 0.9557, "step": 20493 }, { "epoch": 0.8018624305501213, "grad_norm": 0.0, "learning_rate": 1.989360974602913e-06, "loss": 0.85, "step": 20494 }, { "epoch": 0.8019015572423507, "grad_norm": 0.0, "learning_rate": 1.988602501199258e-06, "loss": 1.0695, "step": 20495 }, { "epoch": 0.8019406839345802, "grad_norm": 0.0, "learning_rate": 1.987844156448612e-06, "loss": 0.9218, "step": 20496 }, { "epoch": 0.8019798106268096, "grad_norm": 0.0, "learning_rate": 1.987085940363145e-06, "loss": 0.9183, "step": 20497 }, { "epoch": 0.8020189373190391, "grad_norm": 0.0, "learning_rate": 1.986327852955038e-06, "loss": 0.8896, "step": 20498 }, { "epoch": 0.8020580640112684, "grad_norm": 0.0, "learning_rate": 1.985569894236463e-06, "loss": 1.0536, "step": 20499 }, { "epoch": 0.8020971907034979, "grad_norm": 0.0, "learning_rate": 1.984812064219597e-06, "loss": 1.0024, "step": 20500 }, { "epoch": 0.8021363173957273, "grad_norm": 0.0, "learning_rate": 1.984054362916602e-06, "loss": 0.9221, "step": 20501 }, { "epoch": 0.8021754440879568, "grad_norm": 0.0, "learning_rate": 1.9832967903396493e-06, "loss": 1.0348, "step": 20502 }, { "epoch": 0.8022145707801862, "grad_norm": 0.0, "learning_rate": 1.9825393465009068e-06, "loss": 0.9664, "step": 20503 }, { "epoch": 0.8022536974724157, "grad_norm": 0.0, "learning_rate": 1.9817820314125346e-06, "loss": 0.9221, "step": 20504 }, { "epoch": 0.8022928241646451, "grad_norm": 0.0, "learning_rate": 1.9810248450866955e-06, "loss": 0.9367, "step": 20505 }, { "epoch": 0.8023319508568746, "grad_norm": 0.0, "learning_rate": 1.980267787535548e-06, "loss": 1.0228, "step": 20506 }, { "epoch": 0.802371077549104, "grad_norm": 0.0, "learning_rate": 1.979510858771254e-06, "loss": 0.9757, "step": 20507 }, { "epoch": 0.8024102042413335, "grad_norm": 0.0, "learning_rate": 1.9787540588059616e-06, "loss": 0.8896, "step": 20508 }, { "epoch": 0.8024493309335629, "grad_norm": 0.0, "learning_rate": 1.977997387651832e-06, "loss": 0.8568, "step": 20509 }, { "epoch": 0.8024884576257924, "grad_norm": 0.0, "learning_rate": 1.977240845321009e-06, "loss": 0.9477, "step": 20510 }, { "epoch": 0.8025275843180217, "grad_norm": 0.0, "learning_rate": 1.976484431825645e-06, "loss": 0.9849, "step": 20511 }, { "epoch": 0.8025667110102512, "grad_norm": 0.0, "learning_rate": 1.975728147177887e-06, "loss": 0.9218, "step": 20512 }, { "epoch": 0.8026058377024806, "grad_norm": 0.0, "learning_rate": 1.9749719913898824e-06, "loss": 0.9543, "step": 20513 }, { "epoch": 0.8026449643947101, "grad_norm": 0.0, "learning_rate": 1.9742159644737692e-06, "loss": 0.8916, "step": 20514 }, { "epoch": 0.8026840910869395, "grad_norm": 0.0, "learning_rate": 1.9734600664416904e-06, "loss": 1.1271, "step": 20515 }, { "epoch": 0.802723217779169, "grad_norm": 0.0, "learning_rate": 1.9727042973057852e-06, "loss": 0.8914, "step": 20516 }, { "epoch": 0.8027623444713984, "grad_norm": 0.0, "learning_rate": 1.9719486570781933e-06, "loss": 0.9905, "step": 20517 }, { "epoch": 0.8028014711636279, "grad_norm": 0.0, "learning_rate": 1.971193145771043e-06, "loss": 0.9965, "step": 20518 }, { "epoch": 0.8028405978558573, "grad_norm": 0.0, "learning_rate": 1.9704377633964734e-06, "loss": 0.9364, "step": 20519 }, { "epoch": 0.8028797245480868, "grad_norm": 0.0, "learning_rate": 1.969682509966606e-06, "loss": 1.0312, "step": 20520 }, { "epoch": 0.8029188512403161, "grad_norm": 0.0, "learning_rate": 1.968927385493581e-06, "loss": 0.9502, "step": 20521 }, { "epoch": 0.8029579779325455, "grad_norm": 0.0, "learning_rate": 1.9681723899895142e-06, "loss": 0.9964, "step": 20522 }, { "epoch": 0.802997104624775, "grad_norm": 0.0, "learning_rate": 1.9674175234665395e-06, "loss": 0.9772, "step": 20523 }, { "epoch": 0.8030362313170044, "grad_norm": 0.0, "learning_rate": 1.966662785936767e-06, "loss": 1.0243, "step": 20524 }, { "epoch": 0.8030753580092339, "grad_norm": 0.0, "learning_rate": 1.965908177412329e-06, "loss": 0.8868, "step": 20525 }, { "epoch": 0.8031144847014633, "grad_norm": 0.0, "learning_rate": 1.9651536979053367e-06, "loss": 1.0096, "step": 20526 }, { "epoch": 0.8031536113936928, "grad_norm": 0.0, "learning_rate": 1.96439934742791e-06, "loss": 0.9193, "step": 20527 }, { "epoch": 0.8031927380859222, "grad_norm": 0.0, "learning_rate": 1.9636451259921553e-06, "loss": 0.9793, "step": 20528 }, { "epoch": 0.8032318647781517, "grad_norm": 0.0, "learning_rate": 1.9628910336101948e-06, "loss": 0.9982, "step": 20529 }, { "epoch": 0.8032709914703811, "grad_norm": 0.0, "learning_rate": 1.962137070294131e-06, "loss": 0.9047, "step": 20530 }, { "epoch": 0.8033101181626106, "grad_norm": 0.0, "learning_rate": 1.961383236056077e-06, "loss": 0.9482, "step": 20531 }, { "epoch": 0.8033492448548399, "grad_norm": 0.0, "learning_rate": 1.9606295309081312e-06, "loss": 0.9471, "step": 20532 }, { "epoch": 0.8033883715470694, "grad_norm": 0.0, "learning_rate": 1.9598759548624027e-06, "loss": 1.0527, "step": 20533 }, { "epoch": 0.8034274982392988, "grad_norm": 0.0, "learning_rate": 1.9591225079309905e-06, "loss": 1.0269, "step": 20534 }, { "epoch": 0.8034666249315283, "grad_norm": 0.0, "learning_rate": 1.9583691901259983e-06, "loss": 0.9694, "step": 20535 }, { "epoch": 0.8035057516237577, "grad_norm": 0.0, "learning_rate": 1.957616001459517e-06, "loss": 0.9451, "step": 20536 }, { "epoch": 0.8035448783159872, "grad_norm": 0.0, "learning_rate": 1.956862941943646e-06, "loss": 0.9979, "step": 20537 }, { "epoch": 0.8035840050082166, "grad_norm": 0.0, "learning_rate": 1.956110011590476e-06, "loss": 0.928, "step": 20538 }, { "epoch": 0.8036231317004461, "grad_norm": 0.0, "learning_rate": 1.9553572104121043e-06, "loss": 0.9554, "step": 20539 }, { "epoch": 0.8036622583926755, "grad_norm": 0.0, "learning_rate": 1.954604538420611e-06, "loss": 0.9655, "step": 20540 }, { "epoch": 0.803701385084905, "grad_norm": 0.0, "learning_rate": 1.953851995628091e-06, "loss": 0.9825, "step": 20541 }, { "epoch": 0.8037405117771343, "grad_norm": 0.0, "learning_rate": 1.9530995820466223e-06, "loss": 0.846, "step": 20542 }, { "epoch": 0.8037796384693638, "grad_norm": 0.0, "learning_rate": 1.952347297688291e-06, "loss": 0.9173, "step": 20543 }, { "epoch": 0.8038187651615932, "grad_norm": 0.0, "learning_rate": 1.951595142565178e-06, "loss": 0.9149, "step": 20544 }, { "epoch": 0.8038578918538227, "grad_norm": 0.0, "learning_rate": 1.9508431166893647e-06, "loss": 0.9327, "step": 20545 }, { "epoch": 0.8038970185460521, "grad_norm": 0.0, "learning_rate": 1.9500912200729216e-06, "loss": 0.9206, "step": 20546 }, { "epoch": 0.8039361452382816, "grad_norm": 0.0, "learning_rate": 1.9493394527279262e-06, "loss": 0.8708, "step": 20547 }, { "epoch": 0.803975271930511, "grad_norm": 0.0, "learning_rate": 1.948587814666455e-06, "loss": 1.0051, "step": 20548 }, { "epoch": 0.8040143986227404, "grad_norm": 0.0, "learning_rate": 1.947836305900571e-06, "loss": 1.0302, "step": 20549 }, { "epoch": 0.8040535253149699, "grad_norm": 0.0, "learning_rate": 1.9470849264423466e-06, "loss": 1.0187, "step": 20550 }, { "epoch": 0.8040926520071993, "grad_norm": 0.0, "learning_rate": 1.9463336763038465e-06, "loss": 0.9735, "step": 20551 }, { "epoch": 0.8041317786994288, "grad_norm": 0.0, "learning_rate": 1.9455825554971384e-06, "loss": 0.8341, "step": 20552 }, { "epoch": 0.8041709053916581, "grad_norm": 0.0, "learning_rate": 1.9448315640342796e-06, "loss": 0.9366, "step": 20553 }, { "epoch": 0.8042100320838876, "grad_norm": 0.0, "learning_rate": 1.9440807019273346e-06, "loss": 0.9772, "step": 20554 }, { "epoch": 0.804249158776117, "grad_norm": 0.0, "learning_rate": 1.9433299691883546e-06, "loss": 0.8969, "step": 20555 }, { "epoch": 0.8042882854683465, "grad_norm": 0.0, "learning_rate": 1.9425793658294035e-06, "loss": 0.8783, "step": 20556 }, { "epoch": 0.8043274121605759, "grad_norm": 0.0, "learning_rate": 1.9418288918625295e-06, "loss": 1.1025, "step": 20557 }, { "epoch": 0.8043665388528054, "grad_norm": 0.0, "learning_rate": 1.9410785472997884e-06, "loss": 1.097, "step": 20558 }, { "epoch": 0.8044056655450348, "grad_norm": 0.0, "learning_rate": 1.940328332153225e-06, "loss": 0.897, "step": 20559 }, { "epoch": 0.8044447922372643, "grad_norm": 0.0, "learning_rate": 1.939578246434889e-06, "loss": 0.9965, "step": 20560 }, { "epoch": 0.8044839189294937, "grad_norm": 0.0, "learning_rate": 1.9388282901568268e-06, "loss": 1.0581, "step": 20561 }, { "epoch": 0.8045230456217232, "grad_norm": 0.0, "learning_rate": 1.9380784633310823e-06, "loss": 0.8005, "step": 20562 }, { "epoch": 0.8045621723139526, "grad_norm": 0.0, "learning_rate": 1.9373287659696936e-06, "loss": 0.9939, "step": 20563 }, { "epoch": 0.804601299006182, "grad_norm": 0.0, "learning_rate": 1.936579198084703e-06, "loss": 0.889, "step": 20564 }, { "epoch": 0.8046404256984114, "grad_norm": 0.0, "learning_rate": 1.9358297596881446e-06, "loss": 0.8496, "step": 20565 }, { "epoch": 0.8046795523906409, "grad_norm": 0.0, "learning_rate": 1.9350804507920583e-06, "loss": 0.8732, "step": 20566 }, { "epoch": 0.8047186790828703, "grad_norm": 0.0, "learning_rate": 1.9343312714084718e-06, "loss": 0.8934, "step": 20567 }, { "epoch": 0.8047578057750998, "grad_norm": 0.0, "learning_rate": 1.9335822215494213e-06, "loss": 0.904, "step": 20568 }, { "epoch": 0.8047969324673292, "grad_norm": 0.0, "learning_rate": 1.9328333012269264e-06, "loss": 0.8716, "step": 20569 }, { "epoch": 0.8048360591595587, "grad_norm": 0.0, "learning_rate": 1.9320845104530263e-06, "loss": 1.0652, "step": 20570 }, { "epoch": 0.8048751858517881, "grad_norm": 0.0, "learning_rate": 1.931335849239736e-06, "loss": 0.9248, "step": 20571 }, { "epoch": 0.8049143125440176, "grad_norm": 0.0, "learning_rate": 1.930587317599084e-06, "loss": 1.0361, "step": 20572 }, { "epoch": 0.804953439236247, "grad_norm": 0.0, "learning_rate": 1.929838915543083e-06, "loss": 0.96, "step": 20573 }, { "epoch": 0.8049925659284765, "grad_norm": 0.0, "learning_rate": 1.929090643083761e-06, "loss": 0.9161, "step": 20574 }, { "epoch": 0.8050316926207058, "grad_norm": 0.0, "learning_rate": 1.928342500233128e-06, "loss": 1.0261, "step": 20575 }, { "epoch": 0.8050708193129353, "grad_norm": 0.0, "learning_rate": 1.9275944870032026e-06, "loss": 0.9463, "step": 20576 }, { "epoch": 0.8051099460051647, "grad_norm": 0.0, "learning_rate": 1.926846603405992e-06, "loss": 1.0688, "step": 20577 }, { "epoch": 0.8051490726973941, "grad_norm": 0.0, "learning_rate": 1.9260988494535082e-06, "loss": 1.0914, "step": 20578 }, { "epoch": 0.8051881993896236, "grad_norm": 0.0, "learning_rate": 1.92535122515776e-06, "loss": 0.8031, "step": 20579 }, { "epoch": 0.805227326081853, "grad_norm": 0.0, "learning_rate": 1.9246037305307563e-06, "loss": 0.9137, "step": 20580 }, { "epoch": 0.8052664527740825, "grad_norm": 0.0, "learning_rate": 1.9238563655844946e-06, "loss": 0.9082, "step": 20581 }, { "epoch": 0.8053055794663119, "grad_norm": 0.0, "learning_rate": 1.923109130330979e-06, "loss": 0.8723, "step": 20582 }, { "epoch": 0.8053447061585414, "grad_norm": 0.0, "learning_rate": 1.9223620247822107e-06, "loss": 0.9073, "step": 20583 }, { "epoch": 0.8053838328507708, "grad_norm": 0.0, "learning_rate": 1.9216150489501883e-06, "loss": 0.9576, "step": 20584 }, { "epoch": 0.8054229595430002, "grad_norm": 0.0, "learning_rate": 1.920868202846904e-06, "loss": 1.1031, "step": 20585 }, { "epoch": 0.8054620862352296, "grad_norm": 0.0, "learning_rate": 1.920121486484352e-06, "loss": 1.007, "step": 20586 }, { "epoch": 0.8055012129274591, "grad_norm": 0.0, "learning_rate": 1.9193748998745267e-06, "loss": 0.937, "step": 20587 }, { "epoch": 0.8055403396196885, "grad_norm": 0.0, "learning_rate": 1.918628443029412e-06, "loss": 0.969, "step": 20588 }, { "epoch": 0.805579466311918, "grad_norm": 0.0, "learning_rate": 1.917882115960998e-06, "loss": 0.8871, "step": 20589 }, { "epoch": 0.8056185930041474, "grad_norm": 0.0, "learning_rate": 1.917135918681273e-06, "loss": 0.9571, "step": 20590 }, { "epoch": 0.8056577196963769, "grad_norm": 0.0, "learning_rate": 1.916389851202214e-06, "loss": 0.938, "step": 20591 }, { "epoch": 0.8056968463886063, "grad_norm": 0.0, "learning_rate": 1.915643913535805e-06, "loss": 1.0076, "step": 20592 }, { "epoch": 0.8057359730808358, "grad_norm": 0.0, "learning_rate": 1.9148981056940265e-06, "loss": 1.0061, "step": 20593 }, { "epoch": 0.8057750997730652, "grad_norm": 0.0, "learning_rate": 1.9141524276888514e-06, "loss": 1.0704, "step": 20594 }, { "epoch": 0.8058142264652947, "grad_norm": 0.0, "learning_rate": 1.9134068795322546e-06, "loss": 0.9281, "step": 20595 }, { "epoch": 0.805853353157524, "grad_norm": 0.0, "learning_rate": 1.9126614612362114e-06, "loss": 0.9377, "step": 20596 }, { "epoch": 0.8058924798497535, "grad_norm": 0.0, "learning_rate": 1.9119161728126935e-06, "loss": 0.9803, "step": 20597 }, { "epoch": 0.8059316065419829, "grad_norm": 0.0, "learning_rate": 1.911171014273665e-06, "loss": 1.0045, "step": 20598 }, { "epoch": 0.8059707332342124, "grad_norm": 0.0, "learning_rate": 1.910425985631096e-06, "loss": 1.0026, "step": 20599 }, { "epoch": 0.8060098599264418, "grad_norm": 0.0, "learning_rate": 1.909681086896944e-06, "loss": 0.9692, "step": 20600 }, { "epoch": 0.8060489866186713, "grad_norm": 0.0, "learning_rate": 1.9089363180831798e-06, "loss": 0.9593, "step": 20601 }, { "epoch": 0.8060881133109007, "grad_norm": 0.0, "learning_rate": 1.9081916792017584e-06, "loss": 0.9698, "step": 20602 }, { "epoch": 0.8061272400031302, "grad_norm": 0.0, "learning_rate": 1.9074471702646423e-06, "loss": 0.9724, "step": 20603 }, { "epoch": 0.8061663666953596, "grad_norm": 0.0, "learning_rate": 1.9067027912837776e-06, "loss": 0.8496, "step": 20604 }, { "epoch": 0.8062054933875891, "grad_norm": 0.0, "learning_rate": 1.9059585422711302e-06, "loss": 0.9364, "step": 20605 }, { "epoch": 0.8062446200798185, "grad_norm": 0.0, "learning_rate": 1.9052144232386438e-06, "loss": 0.9882, "step": 20606 }, { "epoch": 0.8062837467720478, "grad_norm": 0.0, "learning_rate": 1.9044704341982733e-06, "loss": 1.0148, "step": 20607 }, { "epoch": 0.8063228734642773, "grad_norm": 0.0, "learning_rate": 1.9037265751619606e-06, "loss": 0.9106, "step": 20608 }, { "epoch": 0.8063620001565067, "grad_norm": 0.0, "learning_rate": 1.9029828461416532e-06, "loss": 0.9482, "step": 20609 }, { "epoch": 0.8064011268487362, "grad_norm": 0.0, "learning_rate": 1.9022392471492956e-06, "loss": 0.9154, "step": 20610 }, { "epoch": 0.8064402535409656, "grad_norm": 0.0, "learning_rate": 1.9014957781968313e-06, "loss": 0.954, "step": 20611 }, { "epoch": 0.8064793802331951, "grad_norm": 0.0, "learning_rate": 1.9007524392961941e-06, "loss": 0.9612, "step": 20612 }, { "epoch": 0.8065185069254245, "grad_norm": 0.0, "learning_rate": 1.9000092304593242e-06, "loss": 1.076, "step": 20613 }, { "epoch": 0.806557633617654, "grad_norm": 0.0, "learning_rate": 1.8992661516981558e-06, "loss": 0.9681, "step": 20614 }, { "epoch": 0.8065967603098834, "grad_norm": 0.0, "learning_rate": 1.8985232030246248e-06, "loss": 0.9197, "step": 20615 }, { "epoch": 0.8066358870021129, "grad_norm": 0.0, "learning_rate": 1.897780384450657e-06, "loss": 0.8984, "step": 20616 }, { "epoch": 0.8066750136943422, "grad_norm": 0.0, "learning_rate": 1.8970376959881864e-06, "loss": 0.8744, "step": 20617 }, { "epoch": 0.8067141403865717, "grad_norm": 0.0, "learning_rate": 1.896295137649131e-06, "loss": 0.9377, "step": 20618 }, { "epoch": 0.8067532670788011, "grad_norm": 0.0, "learning_rate": 1.8955527094454262e-06, "loss": 0.8611, "step": 20619 }, { "epoch": 0.8067923937710306, "grad_norm": 0.0, "learning_rate": 1.8948104113889876e-06, "loss": 0.8814, "step": 20620 }, { "epoch": 0.80683152046326, "grad_norm": 0.0, "learning_rate": 1.8940682434917402e-06, "loss": 0.8741, "step": 20621 }, { "epoch": 0.8068706471554895, "grad_norm": 0.0, "learning_rate": 1.8933262057655933e-06, "loss": 0.8809, "step": 20622 }, { "epoch": 0.8069097738477189, "grad_norm": 0.0, "learning_rate": 1.8925842982224752e-06, "loss": 0.9486, "step": 20623 }, { "epoch": 0.8069489005399484, "grad_norm": 0.0, "learning_rate": 1.8918425208742919e-06, "loss": 0.8744, "step": 20624 }, { "epoch": 0.8069880272321778, "grad_norm": 0.0, "learning_rate": 1.8911008737329595e-06, "loss": 0.9062, "step": 20625 }, { "epoch": 0.8070271539244073, "grad_norm": 0.0, "learning_rate": 1.890359356810384e-06, "loss": 0.872, "step": 20626 }, { "epoch": 0.8070662806166367, "grad_norm": 0.0, "learning_rate": 1.8896179701184748e-06, "loss": 0.8861, "step": 20627 }, { "epoch": 0.8071054073088662, "grad_norm": 0.0, "learning_rate": 1.8888767136691378e-06, "loss": 0.8708, "step": 20628 }, { "epoch": 0.8071445340010955, "grad_norm": 0.0, "learning_rate": 1.8881355874742802e-06, "loss": 0.9338, "step": 20629 }, { "epoch": 0.807183660693325, "grad_norm": 0.0, "learning_rate": 1.887394591545798e-06, "loss": 1.019, "step": 20630 }, { "epoch": 0.8072227873855544, "grad_norm": 0.0, "learning_rate": 1.886653725895592e-06, "loss": 0.9605, "step": 20631 }, { "epoch": 0.8072619140777839, "grad_norm": 0.0, "learning_rate": 1.8859129905355645e-06, "loss": 0.9266, "step": 20632 }, { "epoch": 0.8073010407700133, "grad_norm": 0.0, "learning_rate": 1.8851723854776028e-06, "loss": 0.961, "step": 20633 }, { "epoch": 0.8073401674622428, "grad_norm": 0.0, "learning_rate": 1.8844319107336051e-06, "loss": 0.833, "step": 20634 }, { "epoch": 0.8073792941544722, "grad_norm": 0.0, "learning_rate": 1.8836915663154643e-06, "loss": 0.9471, "step": 20635 }, { "epoch": 0.8074184208467016, "grad_norm": 0.0, "learning_rate": 1.8829513522350628e-06, "loss": 1.0576, "step": 20636 }, { "epoch": 0.8074575475389311, "grad_norm": 0.0, "learning_rate": 1.8822112685042927e-06, "loss": 1.0312, "step": 20637 }, { "epoch": 0.8074966742311604, "grad_norm": 0.0, "learning_rate": 1.8814713151350373e-06, "loss": 1.0395, "step": 20638 }, { "epoch": 0.80753580092339, "grad_norm": 0.0, "learning_rate": 1.8807314921391816e-06, "loss": 0.9293, "step": 20639 }, { "epoch": 0.8075749276156193, "grad_norm": 0.0, "learning_rate": 1.879991799528601e-06, "loss": 0.944, "step": 20640 }, { "epoch": 0.8076140543078488, "grad_norm": 0.0, "learning_rate": 1.8792522373151778e-06, "loss": 0.8898, "step": 20641 }, { "epoch": 0.8076531810000782, "grad_norm": 0.0, "learning_rate": 1.8785128055107904e-06, "loss": 1.0295, "step": 20642 }, { "epoch": 0.8076923076923077, "grad_norm": 0.0, "learning_rate": 1.8777735041273083e-06, "loss": 0.9845, "step": 20643 }, { "epoch": 0.8077314343845371, "grad_norm": 0.0, "learning_rate": 1.877034333176606e-06, "loss": 0.907, "step": 20644 }, { "epoch": 0.8077705610767666, "grad_norm": 0.0, "learning_rate": 1.8762952926705536e-06, "loss": 0.9017, "step": 20645 }, { "epoch": 0.807809687768996, "grad_norm": 0.0, "learning_rate": 1.875556382621021e-06, "loss": 0.9193, "step": 20646 }, { "epoch": 0.8078488144612255, "grad_norm": 0.0, "learning_rate": 1.87481760303987e-06, "loss": 0.8822, "step": 20647 }, { "epoch": 0.8078879411534549, "grad_norm": 0.0, "learning_rate": 1.8740789539389703e-06, "loss": 0.9897, "step": 20648 }, { "epoch": 0.8079270678456844, "grad_norm": 0.0, "learning_rate": 1.8733404353301742e-06, "loss": 0.8921, "step": 20649 }, { "epoch": 0.8079661945379137, "grad_norm": 0.0, "learning_rate": 1.8726020472253537e-06, "loss": 1.0068, "step": 20650 }, { "epoch": 0.8080053212301432, "grad_norm": 0.0, "learning_rate": 1.8718637896363567e-06, "loss": 1.0214, "step": 20651 }, { "epoch": 0.8080444479223726, "grad_norm": 0.0, "learning_rate": 1.871125662575045e-06, "loss": 0.9712, "step": 20652 }, { "epoch": 0.8080835746146021, "grad_norm": 0.0, "learning_rate": 1.8703876660532638e-06, "loss": 0.8577, "step": 20653 }, { "epoch": 0.8081227013068315, "grad_norm": 0.0, "learning_rate": 1.8696498000828744e-06, "loss": 0.9987, "step": 20654 }, { "epoch": 0.808161827999061, "grad_norm": 0.0, "learning_rate": 1.8689120646757196e-06, "loss": 0.9833, "step": 20655 }, { "epoch": 0.8082009546912904, "grad_norm": 0.0, "learning_rate": 1.8681744598436503e-06, "loss": 0.9413, "step": 20656 }, { "epoch": 0.8082400813835199, "grad_norm": 0.0, "learning_rate": 1.8674369855985064e-06, "loss": 0.9481, "step": 20657 }, { "epoch": 0.8082792080757493, "grad_norm": 0.0, "learning_rate": 1.8666996419521344e-06, "loss": 1.0059, "step": 20658 }, { "epoch": 0.8083183347679788, "grad_norm": 0.0, "learning_rate": 1.8659624289163748e-06, "loss": 1.0256, "step": 20659 }, { "epoch": 0.8083574614602081, "grad_norm": 0.0, "learning_rate": 1.865225346503069e-06, "loss": 0.9748, "step": 20660 }, { "epoch": 0.8083965881524376, "grad_norm": 0.0, "learning_rate": 1.8644883947240467e-06, "loss": 0.894, "step": 20661 }, { "epoch": 0.808435714844667, "grad_norm": 0.0, "learning_rate": 1.863751573591147e-06, "loss": 0.994, "step": 20662 }, { "epoch": 0.8084748415368964, "grad_norm": 0.0, "learning_rate": 1.863014883116202e-06, "loss": 0.9423, "step": 20663 }, { "epoch": 0.8085139682291259, "grad_norm": 0.0, "learning_rate": 1.8622783233110453e-06, "loss": 0.9662, "step": 20664 }, { "epoch": 0.8085530949213553, "grad_norm": 0.0, "learning_rate": 1.8615418941874973e-06, "loss": 0.8328, "step": 20665 }, { "epoch": 0.8085922216135848, "grad_norm": 0.0, "learning_rate": 1.8608055957573922e-06, "loss": 1.0209, "step": 20666 }, { "epoch": 0.8086313483058142, "grad_norm": 0.0, "learning_rate": 1.860069428032545e-06, "loss": 1.0742, "step": 20667 }, { "epoch": 0.8086704749980437, "grad_norm": 0.0, "learning_rate": 1.8593333910247868e-06, "loss": 0.8893, "step": 20668 }, { "epoch": 0.8087096016902731, "grad_norm": 0.0, "learning_rate": 1.858597484745932e-06, "loss": 1.0006, "step": 20669 }, { "epoch": 0.8087487283825026, "grad_norm": 0.0, "learning_rate": 1.8578617092078021e-06, "loss": 0.9265, "step": 20670 }, { "epoch": 0.8087878550747319, "grad_norm": 0.0, "learning_rate": 1.8571260644222056e-06, "loss": 0.8759, "step": 20671 }, { "epoch": 0.8088269817669614, "grad_norm": 0.0, "learning_rate": 1.856390550400966e-06, "loss": 0.8967, "step": 20672 }, { "epoch": 0.8088661084591908, "grad_norm": 0.0, "learning_rate": 1.8556551671558864e-06, "loss": 1.0552, "step": 20673 }, { "epoch": 0.8089052351514203, "grad_norm": 0.0, "learning_rate": 1.8549199146987827e-06, "loss": 1.0547, "step": 20674 }, { "epoch": 0.8089443618436497, "grad_norm": 0.0, "learning_rate": 1.8541847930414559e-06, "loss": 0.9833, "step": 20675 }, { "epoch": 0.8089834885358792, "grad_norm": 0.0, "learning_rate": 1.8534498021957147e-06, "loss": 0.9418, "step": 20676 }, { "epoch": 0.8090226152281086, "grad_norm": 0.0, "learning_rate": 1.8527149421733604e-06, "loss": 0.8256, "step": 20677 }, { "epoch": 0.8090617419203381, "grad_norm": 0.0, "learning_rate": 1.8519802129861986e-06, "loss": 0.822, "step": 20678 }, { "epoch": 0.8091008686125675, "grad_norm": 0.0, "learning_rate": 1.851245614646021e-06, "loss": 0.9153, "step": 20679 }, { "epoch": 0.809139995304797, "grad_norm": 0.0, "learning_rate": 1.8505111471646287e-06, "loss": 0.9016, "step": 20680 }, { "epoch": 0.8091791219970264, "grad_norm": 0.0, "learning_rate": 1.8497768105538183e-06, "loss": 0.886, "step": 20681 }, { "epoch": 0.8092182486892558, "grad_norm": 0.0, "learning_rate": 1.8490426048253762e-06, "loss": 1.007, "step": 20682 }, { "epoch": 0.8092573753814852, "grad_norm": 0.0, "learning_rate": 1.8483085299910964e-06, "loss": 1.0854, "step": 20683 }, { "epoch": 0.8092965020737147, "grad_norm": 0.0, "learning_rate": 1.8475745860627692e-06, "loss": 0.9385, "step": 20684 }, { "epoch": 0.8093356287659441, "grad_norm": 0.0, "learning_rate": 1.8468407730521764e-06, "loss": 0.9676, "step": 20685 }, { "epoch": 0.8093747554581736, "grad_norm": 0.0, "learning_rate": 1.8461070909711043e-06, "loss": 0.996, "step": 20686 }, { "epoch": 0.809413882150403, "grad_norm": 0.0, "learning_rate": 1.8453735398313376e-06, "loss": 0.9348, "step": 20687 }, { "epoch": 0.8094530088426325, "grad_norm": 0.0, "learning_rate": 1.8446401196446506e-06, "loss": 0.9148, "step": 20688 }, { "epoch": 0.8094921355348619, "grad_norm": 0.0, "learning_rate": 1.8439068304228247e-06, "loss": 0.9936, "step": 20689 }, { "epoch": 0.8095312622270914, "grad_norm": 0.0, "learning_rate": 1.8431736721776338e-06, "loss": 1.0194, "step": 20690 }, { "epoch": 0.8095703889193208, "grad_norm": 0.0, "learning_rate": 1.8424406449208565e-06, "loss": 0.9287, "step": 20691 }, { "epoch": 0.8096095156115501, "grad_norm": 0.0, "learning_rate": 1.8417077486642564e-06, "loss": 0.9799, "step": 20692 }, { "epoch": 0.8096486423037796, "grad_norm": 0.0, "learning_rate": 1.840974983419611e-06, "loss": 0.8877, "step": 20693 }, { "epoch": 0.809687768996009, "grad_norm": 0.0, "learning_rate": 1.8402423491986777e-06, "loss": 0.8288, "step": 20694 }, { "epoch": 0.8097268956882385, "grad_norm": 0.0, "learning_rate": 1.8395098460132322e-06, "loss": 0.8979, "step": 20695 }, { "epoch": 0.8097660223804679, "grad_norm": 0.0, "learning_rate": 1.8387774738750308e-06, "loss": 0.8945, "step": 20696 }, { "epoch": 0.8098051490726974, "grad_norm": 0.0, "learning_rate": 1.8380452327958397e-06, "loss": 1.0755, "step": 20697 }, { "epoch": 0.8098442757649268, "grad_norm": 0.0, "learning_rate": 1.8373131227874085e-06, "loss": 0.8657, "step": 20698 }, { "epoch": 0.8098834024571563, "grad_norm": 0.0, "learning_rate": 1.8365811438615066e-06, "loss": 0.8562, "step": 20699 }, { "epoch": 0.8099225291493857, "grad_norm": 0.0, "learning_rate": 1.835849296029879e-06, "loss": 0.9305, "step": 20700 }, { "epoch": 0.8099616558416152, "grad_norm": 0.0, "learning_rate": 1.835117579304283e-06, "loss": 1.066, "step": 20701 }, { "epoch": 0.8100007825338446, "grad_norm": 0.0, "learning_rate": 1.8343859936964636e-06, "loss": 0.8152, "step": 20702 }, { "epoch": 0.810039909226074, "grad_norm": 0.0, "learning_rate": 1.8336545392181782e-06, "loss": 0.8571, "step": 20703 }, { "epoch": 0.8100790359183034, "grad_norm": 0.0, "learning_rate": 1.8329232158811649e-06, "loss": 0.9614, "step": 20704 }, { "epoch": 0.8101181626105329, "grad_norm": 0.0, "learning_rate": 1.8321920236971735e-06, "loss": 1.0732, "step": 20705 }, { "epoch": 0.8101572893027623, "grad_norm": 0.0, "learning_rate": 1.83146096267794e-06, "loss": 0.998, "step": 20706 }, { "epoch": 0.8101964159949918, "grad_norm": 0.0, "learning_rate": 1.8307300328352084e-06, "loss": 0.8547, "step": 20707 }, { "epoch": 0.8102355426872212, "grad_norm": 0.0, "learning_rate": 1.829999234180716e-06, "loss": 1.0951, "step": 20708 }, { "epoch": 0.8102746693794507, "grad_norm": 0.0, "learning_rate": 1.829268566726201e-06, "loss": 1.0251, "step": 20709 }, { "epoch": 0.8103137960716801, "grad_norm": 0.0, "learning_rate": 1.8285380304833912e-06, "loss": 0.9222, "step": 20710 }, { "epoch": 0.8103529227639096, "grad_norm": 0.0, "learning_rate": 1.827807625464022e-06, "loss": 0.9371, "step": 20711 }, { "epoch": 0.810392049456139, "grad_norm": 0.0, "learning_rate": 1.8270773516798212e-06, "loss": 0.8691, "step": 20712 }, { "epoch": 0.8104311761483685, "grad_norm": 0.0, "learning_rate": 1.8263472091425195e-06, "loss": 1.1442, "step": 20713 }, { "epoch": 0.8104703028405978, "grad_norm": 0.0, "learning_rate": 1.825617197863837e-06, "loss": 1.0154, "step": 20714 }, { "epoch": 0.8105094295328273, "grad_norm": 0.0, "learning_rate": 1.8248873178555026e-06, "loss": 0.845, "step": 20715 }, { "epoch": 0.8105485562250567, "grad_norm": 0.0, "learning_rate": 1.824157569129228e-06, "loss": 0.8394, "step": 20716 }, { "epoch": 0.8105876829172862, "grad_norm": 0.0, "learning_rate": 1.8234279516967435e-06, "loss": 0.8416, "step": 20717 }, { "epoch": 0.8106268096095156, "grad_norm": 0.0, "learning_rate": 1.8226984655697567e-06, "loss": 1.0082, "step": 20718 }, { "epoch": 0.8106659363017451, "grad_norm": 0.0, "learning_rate": 1.8219691107599891e-06, "loss": 0.8365, "step": 20719 }, { "epoch": 0.8107050629939745, "grad_norm": 0.0, "learning_rate": 1.821239887279147e-06, "loss": 0.9191, "step": 20720 }, { "epoch": 0.8107441896862039, "grad_norm": 0.0, "learning_rate": 1.8205107951389444e-06, "loss": 0.9815, "step": 20721 }, { "epoch": 0.8107833163784334, "grad_norm": 0.0, "learning_rate": 1.8197818343510887e-06, "loss": 0.8579, "step": 20722 }, { "epoch": 0.8108224430706628, "grad_norm": 0.0, "learning_rate": 1.8190530049272892e-06, "loss": 0.8876, "step": 20723 }, { "epoch": 0.8108615697628923, "grad_norm": 0.0, "learning_rate": 1.8183243068792444e-06, "loss": 0.8007, "step": 20724 }, { "epoch": 0.8109006964551216, "grad_norm": 0.0, "learning_rate": 1.8175957402186584e-06, "loss": 1.0057, "step": 20725 }, { "epoch": 0.8109398231473511, "grad_norm": 0.0, "learning_rate": 1.816867304957235e-06, "loss": 0.8761, "step": 20726 }, { "epoch": 0.8109789498395805, "grad_norm": 0.0, "learning_rate": 1.816139001106665e-06, "loss": 0.9241, "step": 20727 }, { "epoch": 0.81101807653181, "grad_norm": 0.0, "learning_rate": 1.8154108286786486e-06, "loss": 1.0102, "step": 20728 }, { "epoch": 0.8110572032240394, "grad_norm": 0.0, "learning_rate": 1.8146827876848783e-06, "loss": 1.0247, "step": 20729 }, { "epoch": 0.8110963299162689, "grad_norm": 0.0, "learning_rate": 1.8139548781370486e-06, "loss": 0.9356, "step": 20730 }, { "epoch": 0.8111354566084983, "grad_norm": 0.0, "learning_rate": 1.8132271000468427e-06, "loss": 0.9321, "step": 20731 }, { "epoch": 0.8111745833007278, "grad_norm": 0.0, "learning_rate": 1.8124994534259532e-06, "loss": 0.936, "step": 20732 }, { "epoch": 0.8112137099929572, "grad_norm": 0.0, "learning_rate": 1.811771938286061e-06, "loss": 0.9797, "step": 20733 }, { "epoch": 0.8112528366851867, "grad_norm": 0.0, "learning_rate": 1.811044554638851e-06, "loss": 0.9173, "step": 20734 }, { "epoch": 0.811291963377416, "grad_norm": 0.0, "learning_rate": 1.8103173024960042e-06, "loss": 0.9493, "step": 20735 }, { "epoch": 0.8113310900696455, "grad_norm": 0.0, "learning_rate": 1.8095901818692018e-06, "loss": 0.8553, "step": 20736 }, { "epoch": 0.8113702167618749, "grad_norm": 0.0, "learning_rate": 1.808863192770116e-06, "loss": 0.9473, "step": 20737 }, { "epoch": 0.8114093434541044, "grad_norm": 0.0, "learning_rate": 1.808136335210422e-06, "loss": 0.9292, "step": 20738 }, { "epoch": 0.8114484701463338, "grad_norm": 0.0, "learning_rate": 1.8074096092017944e-06, "loss": 0.9238, "step": 20739 }, { "epoch": 0.8114875968385633, "grad_norm": 0.0, "learning_rate": 1.8066830147559045e-06, "loss": 0.9837, "step": 20740 }, { "epoch": 0.8115267235307927, "grad_norm": 0.0, "learning_rate": 1.805956551884417e-06, "loss": 1.0396, "step": 20741 }, { "epoch": 0.8115658502230222, "grad_norm": 0.0, "learning_rate": 1.8052302205990014e-06, "loss": 1.031, "step": 20742 }, { "epoch": 0.8116049769152516, "grad_norm": 0.0, "learning_rate": 1.804504020911314e-06, "loss": 0.924, "step": 20743 }, { "epoch": 0.8116441036074811, "grad_norm": 0.0, "learning_rate": 1.803777952833029e-06, "loss": 0.8172, "step": 20744 }, { "epoch": 0.8116832302997105, "grad_norm": 0.0, "learning_rate": 1.8030520163757959e-06, "loss": 0.9144, "step": 20745 }, { "epoch": 0.81172235699194, "grad_norm": 0.0, "learning_rate": 1.8023262115512795e-06, "loss": 0.8607, "step": 20746 }, { "epoch": 0.8117614836841693, "grad_norm": 0.0, "learning_rate": 1.8016005383711265e-06, "loss": 0.9296, "step": 20747 }, { "epoch": 0.8118006103763988, "grad_norm": 0.0, "learning_rate": 1.8008749968470007e-06, "loss": 0.9718, "step": 20748 }, { "epoch": 0.8118397370686282, "grad_norm": 0.0, "learning_rate": 1.800149586990545e-06, "loss": 0.9479, "step": 20749 }, { "epoch": 0.8118788637608576, "grad_norm": 0.0, "learning_rate": 1.7994243088134157e-06, "loss": 0.9783, "step": 20750 }, { "epoch": 0.8119179904530871, "grad_norm": 0.0, "learning_rate": 1.7986991623272533e-06, "loss": 1.006, "step": 20751 }, { "epoch": 0.8119571171453165, "grad_norm": 0.0, "learning_rate": 1.797974147543705e-06, "loss": 0.9069, "step": 20752 }, { "epoch": 0.811996243837546, "grad_norm": 0.0, "learning_rate": 1.7972492644744154e-06, "loss": 0.9942, "step": 20753 }, { "epoch": 0.8120353705297754, "grad_norm": 0.0, "learning_rate": 1.796524513131025e-06, "loss": 0.9491, "step": 20754 }, { "epoch": 0.8120744972220049, "grad_norm": 0.0, "learning_rate": 1.7957998935251697e-06, "loss": 0.9107, "step": 20755 }, { "epoch": 0.8121136239142343, "grad_norm": 0.0, "learning_rate": 1.7950754056684882e-06, "loss": 1.0443, "step": 20756 }, { "epoch": 0.8121527506064637, "grad_norm": 0.0, "learning_rate": 1.7943510495726135e-06, "loss": 0.9811, "step": 20757 }, { "epoch": 0.8121918772986931, "grad_norm": 0.0, "learning_rate": 1.7936268252491817e-06, "loss": 0.9271, "step": 20758 }, { "epoch": 0.8122310039909226, "grad_norm": 0.0, "learning_rate": 1.792902732709817e-06, "loss": 0.8727, "step": 20759 }, { "epoch": 0.812270130683152, "grad_norm": 0.0, "learning_rate": 1.7921787719661499e-06, "loss": 0.9419, "step": 20760 }, { "epoch": 0.8123092573753815, "grad_norm": 0.0, "learning_rate": 1.7914549430298078e-06, "loss": 1.1334, "step": 20761 }, { "epoch": 0.8123483840676109, "grad_norm": 0.0, "learning_rate": 1.7907312459124148e-06, "loss": 1.0079, "step": 20762 }, { "epoch": 0.8123875107598404, "grad_norm": 0.0, "learning_rate": 1.7900076806255894e-06, "loss": 1.1044, "step": 20763 }, { "epoch": 0.8124266374520698, "grad_norm": 0.0, "learning_rate": 1.7892842471809558e-06, "loss": 0.874, "step": 20764 }, { "epoch": 0.8124657641442993, "grad_norm": 0.0, "learning_rate": 1.7885609455901253e-06, "loss": 0.9193, "step": 20765 }, { "epoch": 0.8125048908365287, "grad_norm": 0.0, "learning_rate": 1.787837775864717e-06, "loss": 1.0515, "step": 20766 }, { "epoch": 0.8125440175287582, "grad_norm": 0.0, "learning_rate": 1.7871147380163445e-06, "loss": 0.9369, "step": 20767 }, { "epoch": 0.8125831442209875, "grad_norm": 0.0, "learning_rate": 1.7863918320566186e-06, "loss": 1.0055, "step": 20768 }, { "epoch": 0.812622270913217, "grad_norm": 0.0, "learning_rate": 1.7856690579971458e-06, "loss": 0.8837, "step": 20769 }, { "epoch": 0.8126613976054464, "grad_norm": 0.0, "learning_rate": 1.7849464158495355e-06, "loss": 0.9827, "step": 20770 }, { "epoch": 0.8127005242976759, "grad_norm": 0.0, "learning_rate": 1.784223905625394e-06, "loss": 0.9503, "step": 20771 }, { "epoch": 0.8127396509899053, "grad_norm": 0.0, "learning_rate": 1.7835015273363188e-06, "loss": 0.8777, "step": 20772 }, { "epoch": 0.8127787776821348, "grad_norm": 0.0, "learning_rate": 1.7827792809939137e-06, "loss": 0.9473, "step": 20773 }, { "epoch": 0.8128179043743642, "grad_norm": 0.0, "learning_rate": 1.7820571666097764e-06, "loss": 0.9989, "step": 20774 }, { "epoch": 0.8128570310665937, "grad_norm": 0.0, "learning_rate": 1.781335184195505e-06, "loss": 1.0177, "step": 20775 }, { "epoch": 0.8128961577588231, "grad_norm": 0.0, "learning_rate": 1.7806133337626908e-06, "loss": 1.0839, "step": 20776 }, { "epoch": 0.8129352844510525, "grad_norm": 0.0, "learning_rate": 1.7798916153229284e-06, "loss": 0.9195, "step": 20777 }, { "epoch": 0.812974411143282, "grad_norm": 0.0, "learning_rate": 1.779170028887801e-06, "loss": 1.036, "step": 20778 }, { "epoch": 0.8130135378355113, "grad_norm": 0.0, "learning_rate": 1.7784485744689083e-06, "loss": 0.9945, "step": 20779 }, { "epoch": 0.8130526645277408, "grad_norm": 0.0, "learning_rate": 1.7777272520778255e-06, "loss": 0.9906, "step": 20780 }, { "epoch": 0.8130917912199702, "grad_norm": 0.0, "learning_rate": 1.777006061726143e-06, "loss": 1.0416, "step": 20781 }, { "epoch": 0.8131309179121997, "grad_norm": 0.0, "learning_rate": 1.7762850034254364e-06, "loss": 0.9013, "step": 20782 }, { "epoch": 0.8131700446044291, "grad_norm": 0.0, "learning_rate": 1.7755640771872873e-06, "loss": 0.8221, "step": 20783 }, { "epoch": 0.8132091712966586, "grad_norm": 0.0, "learning_rate": 1.7748432830232743e-06, "loss": 0.7912, "step": 20784 }, { "epoch": 0.813248297988888, "grad_norm": 0.0, "learning_rate": 1.7741226209449737e-06, "loss": 0.9432, "step": 20785 }, { "epoch": 0.8132874246811175, "grad_norm": 0.0, "learning_rate": 1.7734020909639538e-06, "loss": 0.9455, "step": 20786 }, { "epoch": 0.8133265513733469, "grad_norm": 0.0, "learning_rate": 1.7726816930917878e-06, "loss": 0.9045, "step": 20787 }, { "epoch": 0.8133656780655764, "grad_norm": 0.0, "learning_rate": 1.7719614273400432e-06, "loss": 0.9088, "step": 20788 }, { "epoch": 0.8134048047578057, "grad_norm": 0.0, "learning_rate": 1.7712412937202917e-06, "loss": 1.0236, "step": 20789 }, { "epoch": 0.8134439314500352, "grad_norm": 0.0, "learning_rate": 1.7705212922440907e-06, "loss": 0.9232, "step": 20790 }, { "epoch": 0.8134830581422646, "grad_norm": 0.0, "learning_rate": 1.7698014229230087e-06, "loss": 1.0597, "step": 20791 }, { "epoch": 0.8135221848344941, "grad_norm": 0.0, "learning_rate": 1.7690816857685978e-06, "loss": 0.9033, "step": 20792 }, { "epoch": 0.8135613115267235, "grad_norm": 0.0, "learning_rate": 1.7683620807924251e-06, "loss": 1.0379, "step": 20793 }, { "epoch": 0.813600438218953, "grad_norm": 0.0, "learning_rate": 1.7676426080060404e-06, "loss": 0.9338, "step": 20794 }, { "epoch": 0.8136395649111824, "grad_norm": 0.0, "learning_rate": 1.7669232674210025e-06, "loss": 0.9502, "step": 20795 }, { "epoch": 0.8136786916034119, "grad_norm": 0.0, "learning_rate": 1.7662040590488562e-06, "loss": 0.9753, "step": 20796 }, { "epoch": 0.8137178182956413, "grad_norm": 0.0, "learning_rate": 1.7654849829011588e-06, "loss": 0.8825, "step": 20797 }, { "epoch": 0.8137569449878708, "grad_norm": 0.0, "learning_rate": 1.7647660389894517e-06, "loss": 0.7832, "step": 20798 }, { "epoch": 0.8137960716801002, "grad_norm": 0.0, "learning_rate": 1.7640472273252861e-06, "loss": 1.0575, "step": 20799 }, { "epoch": 0.8138351983723296, "grad_norm": 0.0, "learning_rate": 1.7633285479201988e-06, "loss": 0.8389, "step": 20800 }, { "epoch": 0.813874325064559, "grad_norm": 0.0, "learning_rate": 1.762610000785734e-06, "loss": 1.1072, "step": 20801 }, { "epoch": 0.8139134517567885, "grad_norm": 0.0, "learning_rate": 1.7618915859334306e-06, "loss": 0.9499, "step": 20802 }, { "epoch": 0.8139525784490179, "grad_norm": 0.0, "learning_rate": 1.761173303374827e-06, "loss": 0.8858, "step": 20803 }, { "epoch": 0.8139917051412474, "grad_norm": 0.0, "learning_rate": 1.7604551531214554e-06, "loss": 0.9731, "step": 20804 }, { "epoch": 0.8140308318334768, "grad_norm": 0.0, "learning_rate": 1.759737135184848e-06, "loss": 0.9477, "step": 20805 }, { "epoch": 0.8140699585257062, "grad_norm": 0.0, "learning_rate": 1.7590192495765369e-06, "loss": 0.9722, "step": 20806 }, { "epoch": 0.8141090852179357, "grad_norm": 0.0, "learning_rate": 1.7583014963080535e-06, "loss": 0.8572, "step": 20807 }, { "epoch": 0.8141482119101651, "grad_norm": 0.0, "learning_rate": 1.757583875390917e-06, "loss": 1.0586, "step": 20808 }, { "epoch": 0.8141873386023946, "grad_norm": 0.0, "learning_rate": 1.7568663868366586e-06, "loss": 0.9155, "step": 20809 }, { "epoch": 0.814226465294624, "grad_norm": 0.0, "learning_rate": 1.756149030656793e-06, "loss": 1.0091, "step": 20810 }, { "epoch": 0.8142655919868534, "grad_norm": 0.0, "learning_rate": 1.755431806862845e-06, "loss": 0.9927, "step": 20811 }, { "epoch": 0.8143047186790828, "grad_norm": 0.0, "learning_rate": 1.7547147154663313e-06, "loss": 0.9006, "step": 20812 }, { "epoch": 0.8143438453713123, "grad_norm": 0.0, "learning_rate": 1.753997756478769e-06, "loss": 0.9606, "step": 20813 }, { "epoch": 0.8143829720635417, "grad_norm": 0.0, "learning_rate": 1.753280929911667e-06, "loss": 1.067, "step": 20814 }, { "epoch": 0.8144220987557712, "grad_norm": 0.0, "learning_rate": 1.7525642357765405e-06, "loss": 0.8013, "step": 20815 }, { "epoch": 0.8144612254480006, "grad_norm": 0.0, "learning_rate": 1.7518476740849e-06, "loss": 1.0075, "step": 20816 }, { "epoch": 0.8145003521402301, "grad_norm": 0.0, "learning_rate": 1.7511312448482488e-06, "loss": 0.8851, "step": 20817 }, { "epoch": 0.8145394788324595, "grad_norm": 0.0, "learning_rate": 1.7504149480780918e-06, "loss": 0.8337, "step": 20818 }, { "epoch": 0.814578605524689, "grad_norm": 0.0, "learning_rate": 1.7496987837859346e-06, "loss": 0.8793, "step": 20819 }, { "epoch": 0.8146177322169184, "grad_norm": 0.0, "learning_rate": 1.748982751983278e-06, "loss": 0.9991, "step": 20820 }, { "epoch": 0.8146568589091479, "grad_norm": 0.0, "learning_rate": 1.7482668526816183e-06, "loss": 0.8875, "step": 20821 }, { "epoch": 0.8146959856013772, "grad_norm": 0.0, "learning_rate": 1.747551085892455e-06, "loss": 1.059, "step": 20822 }, { "epoch": 0.8147351122936067, "grad_norm": 0.0, "learning_rate": 1.7468354516272746e-06, "loss": 0.8405, "step": 20823 }, { "epoch": 0.8147742389858361, "grad_norm": 0.0, "learning_rate": 1.74611994989758e-06, "loss": 0.9073, "step": 20824 }, { "epoch": 0.8148133656780656, "grad_norm": 0.0, "learning_rate": 1.7454045807148545e-06, "loss": 0.8709, "step": 20825 }, { "epoch": 0.814852492370295, "grad_norm": 0.0, "learning_rate": 1.744689344090591e-06, "loss": 1.0662, "step": 20826 }, { "epoch": 0.8148916190625245, "grad_norm": 0.0, "learning_rate": 1.7439742400362658e-06, "loss": 0.9496, "step": 20827 }, { "epoch": 0.8149307457547539, "grad_norm": 0.0, "learning_rate": 1.7432592685633743e-06, "loss": 0.9672, "step": 20828 }, { "epoch": 0.8149698724469834, "grad_norm": 0.0, "learning_rate": 1.7425444296833904e-06, "loss": 1.0024, "step": 20829 }, { "epoch": 0.8150089991392128, "grad_norm": 0.0, "learning_rate": 1.7418297234077986e-06, "loss": 1.03, "step": 20830 }, { "epoch": 0.8150481258314423, "grad_norm": 0.0, "learning_rate": 1.7411151497480704e-06, "loss": 0.954, "step": 20831 }, { "epoch": 0.8150872525236716, "grad_norm": 0.0, "learning_rate": 1.7404007087156839e-06, "loss": 1.0722, "step": 20832 }, { "epoch": 0.8151263792159011, "grad_norm": 0.0, "learning_rate": 1.7396864003221125e-06, "loss": 1.082, "step": 20833 }, { "epoch": 0.8151655059081305, "grad_norm": 0.0, "learning_rate": 1.7389722245788287e-06, "loss": 0.8661, "step": 20834 }, { "epoch": 0.8152046326003599, "grad_norm": 0.0, "learning_rate": 1.7382581814972977e-06, "loss": 0.8965, "step": 20835 }, { "epoch": 0.8152437592925894, "grad_norm": 0.0, "learning_rate": 1.7375442710889868e-06, "loss": 0.8822, "step": 20836 }, { "epoch": 0.8152828859848188, "grad_norm": 0.0, "learning_rate": 1.7368304933653624e-06, "loss": 0.9038, "step": 20837 }, { "epoch": 0.8153220126770483, "grad_norm": 0.0, "learning_rate": 1.7361168483378877e-06, "loss": 0.8827, "step": 20838 }, { "epoch": 0.8153611393692777, "grad_norm": 0.0, "learning_rate": 1.735403336018019e-06, "loss": 0.9473, "step": 20839 }, { "epoch": 0.8154002660615072, "grad_norm": 0.0, "learning_rate": 1.7346899564172193e-06, "loss": 0.9034, "step": 20840 }, { "epoch": 0.8154393927537366, "grad_norm": 0.0, "learning_rate": 1.733976709546936e-06, "loss": 0.9951, "step": 20841 }, { "epoch": 0.815478519445966, "grad_norm": 0.0, "learning_rate": 1.733263595418635e-06, "loss": 0.9727, "step": 20842 }, { "epoch": 0.8155176461381954, "grad_norm": 0.0, "learning_rate": 1.7325506140437587e-06, "loss": 0.8943, "step": 20843 }, { "epoch": 0.8155567728304249, "grad_norm": 0.0, "learning_rate": 1.7318377654337626e-06, "loss": 1.0869, "step": 20844 }, { "epoch": 0.8155958995226543, "grad_norm": 0.0, "learning_rate": 1.7311250496000853e-06, "loss": 0.9242, "step": 20845 }, { "epoch": 0.8156350262148838, "grad_norm": 0.0, "learning_rate": 1.7304124665541843e-06, "loss": 0.9592, "step": 20846 }, { "epoch": 0.8156741529071132, "grad_norm": 0.0, "learning_rate": 1.729700016307495e-06, "loss": 1.0396, "step": 20847 }, { "epoch": 0.8157132795993427, "grad_norm": 0.0, "learning_rate": 1.7289876988714615e-06, "loss": 0.984, "step": 20848 }, { "epoch": 0.8157524062915721, "grad_norm": 0.0, "learning_rate": 1.7282755142575191e-06, "loss": 0.9816, "step": 20849 }, { "epoch": 0.8157915329838016, "grad_norm": 0.0, "learning_rate": 1.7275634624771066e-06, "loss": 0.9958, "step": 20850 }, { "epoch": 0.815830659676031, "grad_norm": 0.0, "learning_rate": 1.72685154354166e-06, "loss": 0.9968, "step": 20851 }, { "epoch": 0.8158697863682605, "grad_norm": 0.0, "learning_rate": 1.7261397574626125e-06, "loss": 1.0164, "step": 20852 }, { "epoch": 0.8159089130604898, "grad_norm": 0.0, "learning_rate": 1.7254281042513898e-06, "loss": 0.9775, "step": 20853 }, { "epoch": 0.8159480397527193, "grad_norm": 0.0, "learning_rate": 1.7247165839194234e-06, "loss": 1.0152, "step": 20854 }, { "epoch": 0.8159871664449487, "grad_norm": 0.0, "learning_rate": 1.7240051964781424e-06, "loss": 1.1125, "step": 20855 }, { "epoch": 0.8160262931371782, "grad_norm": 0.0, "learning_rate": 1.7232939419389648e-06, "loss": 0.903, "step": 20856 }, { "epoch": 0.8160654198294076, "grad_norm": 0.0, "learning_rate": 1.7225828203133143e-06, "loss": 1.0418, "step": 20857 }, { "epoch": 0.8161045465216371, "grad_norm": 0.0, "learning_rate": 1.7218718316126149e-06, "loss": 0.994, "step": 20858 }, { "epoch": 0.8161436732138665, "grad_norm": 0.0, "learning_rate": 1.7211609758482784e-06, "loss": 0.9989, "step": 20859 }, { "epoch": 0.816182799906096, "grad_norm": 0.0, "learning_rate": 1.7204502530317224e-06, "loss": 0.8632, "step": 20860 }, { "epoch": 0.8162219265983254, "grad_norm": 0.0, "learning_rate": 1.7197396631743634e-06, "loss": 0.9708, "step": 20861 }, { "epoch": 0.8162610532905548, "grad_norm": 0.0, "learning_rate": 1.719029206287607e-06, "loss": 1.1278, "step": 20862 }, { "epoch": 0.8163001799827843, "grad_norm": 0.0, "learning_rate": 1.7183188823828644e-06, "loss": 0.9946, "step": 20863 }, { "epoch": 0.8163393066750136, "grad_norm": 0.0, "learning_rate": 1.7176086914715428e-06, "loss": 0.818, "step": 20864 }, { "epoch": 0.8163784333672431, "grad_norm": 0.0, "learning_rate": 1.7168986335650506e-06, "loss": 1.0053, "step": 20865 }, { "epoch": 0.8164175600594725, "grad_norm": 0.0, "learning_rate": 1.7161887086747842e-06, "loss": 0.8599, "step": 20866 }, { "epoch": 0.816456686751702, "grad_norm": 0.0, "learning_rate": 1.7154789168121488e-06, "loss": 0.8823, "step": 20867 }, { "epoch": 0.8164958134439314, "grad_norm": 0.0, "learning_rate": 1.7147692579885366e-06, "loss": 0.9841, "step": 20868 }, { "epoch": 0.8165349401361609, "grad_norm": 0.0, "learning_rate": 1.714059732215353e-06, "loss": 0.9947, "step": 20869 }, { "epoch": 0.8165740668283903, "grad_norm": 0.0, "learning_rate": 1.7133503395039841e-06, "loss": 1.0936, "step": 20870 }, { "epoch": 0.8166131935206198, "grad_norm": 0.0, "learning_rate": 1.7126410798658288e-06, "loss": 0.9233, "step": 20871 }, { "epoch": 0.8166523202128492, "grad_norm": 0.0, "learning_rate": 1.711931953312267e-06, "loss": 0.931, "step": 20872 }, { "epoch": 0.8166914469050787, "grad_norm": 0.0, "learning_rate": 1.7112229598546982e-06, "loss": 0.8926, "step": 20873 }, { "epoch": 0.816730573597308, "grad_norm": 0.0, "learning_rate": 1.710514099504499e-06, "loss": 1.0573, "step": 20874 }, { "epoch": 0.8167697002895375, "grad_norm": 0.0, "learning_rate": 1.709805372273059e-06, "loss": 0.9918, "step": 20875 }, { "epoch": 0.8168088269817669, "grad_norm": 0.0, "learning_rate": 1.7090967781717516e-06, "loss": 0.7952, "step": 20876 }, { "epoch": 0.8168479536739964, "grad_norm": 0.0, "learning_rate": 1.7083883172119665e-06, "loss": 1.0665, "step": 20877 }, { "epoch": 0.8168870803662258, "grad_norm": 0.0, "learning_rate": 1.707679989405071e-06, "loss": 0.797, "step": 20878 }, { "epoch": 0.8169262070584553, "grad_norm": 0.0, "learning_rate": 1.7069717947624475e-06, "loss": 0.924, "step": 20879 }, { "epoch": 0.8169653337506847, "grad_norm": 0.0, "learning_rate": 1.7062637332954634e-06, "loss": 1.0128, "step": 20880 }, { "epoch": 0.8170044604429142, "grad_norm": 0.0, "learning_rate": 1.7055558050154896e-06, "loss": 0.9484, "step": 20881 }, { "epoch": 0.8170435871351436, "grad_norm": 0.0, "learning_rate": 1.7048480099338972e-06, "loss": 0.9229, "step": 20882 }, { "epoch": 0.8170827138273731, "grad_norm": 0.0, "learning_rate": 1.7041403480620534e-06, "loss": 0.9448, "step": 20883 }, { "epoch": 0.8171218405196025, "grad_norm": 0.0, "learning_rate": 1.7034328194113181e-06, "loss": 0.957, "step": 20884 }, { "epoch": 0.817160967211832, "grad_norm": 0.0, "learning_rate": 1.7027254239930547e-06, "loss": 1.0709, "step": 20885 }, { "epoch": 0.8172000939040613, "grad_norm": 0.0, "learning_rate": 1.7020181618186248e-06, "loss": 0.9578, "step": 20886 }, { "epoch": 0.8172392205962908, "grad_norm": 0.0, "learning_rate": 1.7013110328993875e-06, "loss": 1.0642, "step": 20887 }, { "epoch": 0.8172783472885202, "grad_norm": 0.0, "learning_rate": 1.7006040372466937e-06, "loss": 1.0349, "step": 20888 }, { "epoch": 0.8173174739807497, "grad_norm": 0.0, "learning_rate": 1.6998971748719018e-06, "loss": 0.9469, "step": 20889 }, { "epoch": 0.8173566006729791, "grad_norm": 0.0, "learning_rate": 1.699190445786355e-06, "loss": 1.0009, "step": 20890 }, { "epoch": 0.8173957273652085, "grad_norm": 0.0, "learning_rate": 1.6984838500014145e-06, "loss": 0.8962, "step": 20891 }, { "epoch": 0.817434854057438, "grad_norm": 0.0, "learning_rate": 1.6977773875284176e-06, "loss": 0.9382, "step": 20892 }, { "epoch": 0.8174739807496674, "grad_norm": 0.0, "learning_rate": 1.6970710583787153e-06, "loss": 0.9671, "step": 20893 }, { "epoch": 0.8175131074418969, "grad_norm": 0.0, "learning_rate": 1.6963648625636454e-06, "loss": 1.0682, "step": 20894 }, { "epoch": 0.8175522341341263, "grad_norm": 0.0, "learning_rate": 1.6956588000945507e-06, "loss": 0.8904, "step": 20895 }, { "epoch": 0.8175913608263558, "grad_norm": 0.0, "learning_rate": 1.694952870982769e-06, "loss": 1.084, "step": 20896 }, { "epoch": 0.8176304875185851, "grad_norm": 0.0, "learning_rate": 1.69424707523964e-06, "loss": 0.9406, "step": 20897 }, { "epoch": 0.8176696142108146, "grad_norm": 0.0, "learning_rate": 1.6935414128764939e-06, "loss": 0.883, "step": 20898 }, { "epoch": 0.817708740903044, "grad_norm": 0.0, "learning_rate": 1.6928358839046633e-06, "loss": 0.9745, "step": 20899 }, { "epoch": 0.8177478675952735, "grad_norm": 0.0, "learning_rate": 1.6921304883354817e-06, "loss": 0.9794, "step": 20900 }, { "epoch": 0.8177869942875029, "grad_norm": 0.0, "learning_rate": 1.691425226180271e-06, "loss": 0.9252, "step": 20901 }, { "epoch": 0.8178261209797324, "grad_norm": 0.0, "learning_rate": 1.6907200974503601e-06, "loss": 0.9217, "step": 20902 }, { "epoch": 0.8178652476719618, "grad_norm": 0.0, "learning_rate": 1.6900151021570732e-06, "loss": 0.9628, "step": 20903 }, { "epoch": 0.8179043743641913, "grad_norm": 0.0, "learning_rate": 1.6893102403117322e-06, "loss": 0.8937, "step": 20904 }, { "epoch": 0.8179435010564207, "grad_norm": 0.0, "learning_rate": 1.6886055119256529e-06, "loss": 0.8115, "step": 20905 }, { "epoch": 0.8179826277486502, "grad_norm": 0.0, "learning_rate": 1.6879009170101568e-06, "loss": 1.0262, "step": 20906 }, { "epoch": 0.8180217544408795, "grad_norm": 0.0, "learning_rate": 1.687196455576554e-06, "loss": 0.9555, "step": 20907 }, { "epoch": 0.818060881133109, "grad_norm": 0.0, "learning_rate": 1.68649212763616e-06, "loss": 0.9118, "step": 20908 }, { "epoch": 0.8181000078253384, "grad_norm": 0.0, "learning_rate": 1.6857879332002847e-06, "loss": 0.9729, "step": 20909 }, { "epoch": 0.8181391345175679, "grad_norm": 0.0, "learning_rate": 1.6850838722802386e-06, "loss": 1.0209, "step": 20910 }, { "epoch": 0.8181782612097973, "grad_norm": 0.0, "learning_rate": 1.6843799448873244e-06, "loss": 1.0143, "step": 20911 }, { "epoch": 0.8182173879020268, "grad_norm": 0.0, "learning_rate": 1.683676151032848e-06, "loss": 0.9668, "step": 20912 }, { "epoch": 0.8182565145942562, "grad_norm": 0.0, "learning_rate": 1.682972490728112e-06, "loss": 0.9014, "step": 20913 }, { "epoch": 0.8182956412864857, "grad_norm": 0.0, "learning_rate": 1.682268963984418e-06, "loss": 0.903, "step": 20914 }, { "epoch": 0.8183347679787151, "grad_norm": 0.0, "learning_rate": 1.6815655708130597e-06, "loss": 0.9576, "step": 20915 }, { "epoch": 0.8183738946709446, "grad_norm": 0.0, "learning_rate": 1.6808623112253375e-06, "loss": 1.0367, "step": 20916 }, { "epoch": 0.818413021363174, "grad_norm": 0.0, "learning_rate": 1.6801591852325371e-06, "loss": 0.8832, "step": 20917 }, { "epoch": 0.8184521480554035, "grad_norm": 0.0, "learning_rate": 1.6794561928459596e-06, "loss": 0.857, "step": 20918 }, { "epoch": 0.8184912747476328, "grad_norm": 0.0, "learning_rate": 1.678753334076887e-06, "loss": 0.9368, "step": 20919 }, { "epoch": 0.8185304014398622, "grad_norm": 0.0, "learning_rate": 1.6780506089366112e-06, "loss": 0.9473, "step": 20920 }, { "epoch": 0.8185695281320917, "grad_norm": 0.0, "learning_rate": 1.6773480174364088e-06, "loss": 0.8779, "step": 20921 }, { "epoch": 0.8186086548243211, "grad_norm": 0.0, "learning_rate": 1.6766455595875742e-06, "loss": 0.9878, "step": 20922 }, { "epoch": 0.8186477815165506, "grad_norm": 0.0, "learning_rate": 1.6759432354013794e-06, "loss": 0.9257, "step": 20923 }, { "epoch": 0.81868690820878, "grad_norm": 0.0, "learning_rate": 1.6752410448891088e-06, "loss": 1.0336, "step": 20924 }, { "epoch": 0.8187260349010095, "grad_norm": 0.0, "learning_rate": 1.6745389880620322e-06, "loss": 0.9675, "step": 20925 }, { "epoch": 0.8187651615932389, "grad_norm": 0.0, "learning_rate": 1.6738370649314272e-06, "loss": 0.9669, "step": 20926 }, { "epoch": 0.8188042882854684, "grad_norm": 0.0, "learning_rate": 1.673135275508566e-06, "loss": 0.9352, "step": 20927 }, { "epoch": 0.8188434149776977, "grad_norm": 0.0, "learning_rate": 1.6724336198047208e-06, "loss": 0.9243, "step": 20928 }, { "epoch": 0.8188825416699272, "grad_norm": 0.0, "learning_rate": 1.6717320978311535e-06, "loss": 0.9249, "step": 20929 }, { "epoch": 0.8189216683621566, "grad_norm": 0.0, "learning_rate": 1.671030709599133e-06, "loss": 1.002, "step": 20930 }, { "epoch": 0.8189607950543861, "grad_norm": 0.0, "learning_rate": 1.6703294551199222e-06, "loss": 0.9885, "step": 20931 }, { "epoch": 0.8189999217466155, "grad_norm": 0.0, "learning_rate": 1.669628334404786e-06, "loss": 0.9045, "step": 20932 }, { "epoch": 0.819039048438845, "grad_norm": 0.0, "learning_rate": 1.6689273474649781e-06, "loss": 0.9768, "step": 20933 }, { "epoch": 0.8190781751310744, "grad_norm": 0.0, "learning_rate": 1.6682264943117566e-06, "loss": 0.8022, "step": 20934 }, { "epoch": 0.8191173018233039, "grad_norm": 0.0, "learning_rate": 1.6675257749563767e-06, "loss": 0.8122, "step": 20935 }, { "epoch": 0.8191564285155333, "grad_norm": 0.0, "learning_rate": 1.666825189410095e-06, "loss": 0.8885, "step": 20936 }, { "epoch": 0.8191955552077628, "grad_norm": 0.0, "learning_rate": 1.6661247376841573e-06, "loss": 0.8284, "step": 20937 }, { "epoch": 0.8192346818999922, "grad_norm": 0.0, "learning_rate": 1.665424419789814e-06, "loss": 0.8787, "step": 20938 }, { "epoch": 0.8192738085922217, "grad_norm": 0.0, "learning_rate": 1.6647242357383087e-06, "loss": 0.8653, "step": 20939 }, { "epoch": 0.819312935284451, "grad_norm": 0.0, "learning_rate": 1.6640241855408877e-06, "loss": 0.9939, "step": 20940 }, { "epoch": 0.8193520619766805, "grad_norm": 0.0, "learning_rate": 1.6633242692087937e-06, "loss": 0.8849, "step": 20941 }, { "epoch": 0.8193911886689099, "grad_norm": 0.0, "learning_rate": 1.662624486753267e-06, "loss": 1.0396, "step": 20942 }, { "epoch": 0.8194303153611394, "grad_norm": 0.0, "learning_rate": 1.6619248381855413e-06, "loss": 1.0279, "step": 20943 }, { "epoch": 0.8194694420533688, "grad_norm": 0.0, "learning_rate": 1.6612253235168541e-06, "loss": 1.1006, "step": 20944 }, { "epoch": 0.8195085687455983, "grad_norm": 0.0, "learning_rate": 1.6605259427584398e-06, "loss": 0.9481, "step": 20945 }, { "epoch": 0.8195476954378277, "grad_norm": 0.0, "learning_rate": 1.6598266959215315e-06, "loss": 0.9583, "step": 20946 }, { "epoch": 0.8195868221300572, "grad_norm": 0.0, "learning_rate": 1.6591275830173525e-06, "loss": 0.8861, "step": 20947 }, { "epoch": 0.8196259488222866, "grad_norm": 0.0, "learning_rate": 1.6584286040571329e-06, "loss": 0.9829, "step": 20948 }, { "epoch": 0.819665075514516, "grad_norm": 0.0, "learning_rate": 1.6577297590521002e-06, "loss": 0.9974, "step": 20949 }, { "epoch": 0.8197042022067454, "grad_norm": 0.0, "learning_rate": 1.6570310480134722e-06, "loss": 0.8996, "step": 20950 }, { "epoch": 0.8197433288989748, "grad_norm": 0.0, "learning_rate": 1.6563324709524697e-06, "loss": 0.9294, "step": 20951 }, { "epoch": 0.8197824555912043, "grad_norm": 0.0, "learning_rate": 1.655634027880314e-06, "loss": 0.9466, "step": 20952 }, { "epoch": 0.8198215822834337, "grad_norm": 0.0, "learning_rate": 1.6549357188082215e-06, "loss": 1.0024, "step": 20953 }, { "epoch": 0.8198607089756632, "grad_norm": 0.0, "learning_rate": 1.6542375437474023e-06, "loss": 0.9146, "step": 20954 }, { "epoch": 0.8198998356678926, "grad_norm": 0.0, "learning_rate": 1.6535395027090717e-06, "loss": 0.9576, "step": 20955 }, { "epoch": 0.8199389623601221, "grad_norm": 0.0, "learning_rate": 1.6528415957044363e-06, "loss": 0.9042, "step": 20956 }, { "epoch": 0.8199780890523515, "grad_norm": 0.0, "learning_rate": 1.652143822744705e-06, "loss": 0.8889, "step": 20957 }, { "epoch": 0.820017215744581, "grad_norm": 0.0, "learning_rate": 1.6514461838410846e-06, "loss": 1.0105, "step": 20958 }, { "epoch": 0.8200563424368104, "grad_norm": 0.0, "learning_rate": 1.650748679004779e-06, "loss": 0.9874, "step": 20959 }, { "epoch": 0.8200954691290399, "grad_norm": 0.0, "learning_rate": 1.650051308246985e-06, "loss": 1.1024, "step": 20960 }, { "epoch": 0.8201345958212692, "grad_norm": 0.0, "learning_rate": 1.6493540715789036e-06, "loss": 0.9767, "step": 20961 }, { "epoch": 0.8201737225134987, "grad_norm": 0.0, "learning_rate": 1.6486569690117315e-06, "loss": 0.9698, "step": 20962 }, { "epoch": 0.8202128492057281, "grad_norm": 0.0, "learning_rate": 1.6479600005566666e-06, "loss": 0.9041, "step": 20963 }, { "epoch": 0.8202519758979576, "grad_norm": 0.0, "learning_rate": 1.647263166224896e-06, "loss": 1.0401, "step": 20964 }, { "epoch": 0.820291102590187, "grad_norm": 0.0, "learning_rate": 1.646566466027615e-06, "loss": 0.8868, "step": 20965 }, { "epoch": 0.8203302292824165, "grad_norm": 0.0, "learning_rate": 1.6458698999760036e-06, "loss": 1.0162, "step": 20966 }, { "epoch": 0.8203693559746459, "grad_norm": 0.0, "learning_rate": 1.645173468081258e-06, "loss": 0.9412, "step": 20967 }, { "epoch": 0.8204084826668754, "grad_norm": 0.0, "learning_rate": 1.644477170354556e-06, "loss": 0.899, "step": 20968 }, { "epoch": 0.8204476093591048, "grad_norm": 0.0, "learning_rate": 1.6437810068070826e-06, "loss": 0.9782, "step": 20969 }, { "epoch": 0.8204867360513343, "grad_norm": 0.0, "learning_rate": 1.6430849774500102e-06, "loss": 1.0875, "step": 20970 }, { "epoch": 0.8205258627435636, "grad_norm": 0.0, "learning_rate": 1.6423890822945266e-06, "loss": 0.9967, "step": 20971 }, { "epoch": 0.8205649894357931, "grad_norm": 0.0, "learning_rate": 1.6416933213517983e-06, "loss": 0.9409, "step": 20972 }, { "epoch": 0.8206041161280225, "grad_norm": 0.0, "learning_rate": 1.6409976946330042e-06, "loss": 0.9551, "step": 20973 }, { "epoch": 0.820643242820252, "grad_norm": 0.0, "learning_rate": 1.6403022021493109e-06, "loss": 0.9934, "step": 20974 }, { "epoch": 0.8206823695124814, "grad_norm": 0.0, "learning_rate": 1.6396068439118895e-06, "loss": 0.9563, "step": 20975 }, { "epoch": 0.8207214962047108, "grad_norm": 0.0, "learning_rate": 1.6389116199319054e-06, "loss": 1.0085, "step": 20976 }, { "epoch": 0.8207606228969403, "grad_norm": 0.0, "learning_rate": 1.6382165302205255e-06, "loss": 0.8825, "step": 20977 }, { "epoch": 0.8207997495891697, "grad_norm": 0.0, "learning_rate": 1.6375215747889084e-06, "loss": 0.7986, "step": 20978 }, { "epoch": 0.8208388762813992, "grad_norm": 0.0, "learning_rate": 1.6368267536482162e-06, "loss": 0.882, "step": 20979 }, { "epoch": 0.8208780029736286, "grad_norm": 0.0, "learning_rate": 1.636132066809606e-06, "loss": 0.971, "step": 20980 }, { "epoch": 0.8209171296658581, "grad_norm": 0.0, "learning_rate": 1.6354375142842371e-06, "loss": 0.9017, "step": 20981 }, { "epoch": 0.8209562563580874, "grad_norm": 0.0, "learning_rate": 1.634743096083259e-06, "loss": 0.8992, "step": 20982 }, { "epoch": 0.8209953830503169, "grad_norm": 0.0, "learning_rate": 1.6340488122178255e-06, "loss": 1.0906, "step": 20983 }, { "epoch": 0.8210345097425463, "grad_norm": 0.0, "learning_rate": 1.6333546626990804e-06, "loss": 0.864, "step": 20984 }, { "epoch": 0.8210736364347758, "grad_norm": 0.0, "learning_rate": 1.63266064753818e-06, "loss": 1.0001, "step": 20985 }, { "epoch": 0.8211127631270052, "grad_norm": 0.0, "learning_rate": 1.6319667667462635e-06, "loss": 0.9969, "step": 20986 }, { "epoch": 0.8211518898192347, "grad_norm": 0.0, "learning_rate": 1.6312730203344763e-06, "loss": 0.9949, "step": 20987 }, { "epoch": 0.8211910165114641, "grad_norm": 0.0, "learning_rate": 1.6305794083139559e-06, "loss": 0.9865, "step": 20988 }, { "epoch": 0.8212301432036936, "grad_norm": 0.0, "learning_rate": 1.6298859306958425e-06, "loss": 0.875, "step": 20989 }, { "epoch": 0.821269269895923, "grad_norm": 0.0, "learning_rate": 1.6291925874912729e-06, "loss": 1.0393, "step": 20990 }, { "epoch": 0.8213083965881525, "grad_norm": 0.0, "learning_rate": 1.6284993787113834e-06, "loss": 0.9504, "step": 20991 }, { "epoch": 0.8213475232803819, "grad_norm": 0.0, "learning_rate": 1.627806304367301e-06, "loss": 0.9915, "step": 20992 }, { "epoch": 0.8213866499726113, "grad_norm": 0.0, "learning_rate": 1.6271133644701586e-06, "loss": 0.9201, "step": 20993 }, { "epoch": 0.8214257766648407, "grad_norm": 0.0, "learning_rate": 1.6264205590310867e-06, "loss": 0.8876, "step": 20994 }, { "epoch": 0.8214649033570702, "grad_norm": 0.0, "learning_rate": 1.6257278880612048e-06, "loss": 1.0756, "step": 20995 }, { "epoch": 0.8215040300492996, "grad_norm": 0.0, "learning_rate": 1.62503535157164e-06, "loss": 1.0015, "step": 20996 }, { "epoch": 0.8215431567415291, "grad_norm": 0.0, "learning_rate": 1.624342949573513e-06, "loss": 0.8756, "step": 20997 }, { "epoch": 0.8215822834337585, "grad_norm": 0.0, "learning_rate": 1.623650682077945e-06, "loss": 1.0731, "step": 20998 }, { "epoch": 0.821621410125988, "grad_norm": 0.0, "learning_rate": 1.6229585490960487e-06, "loss": 0.9005, "step": 20999 }, { "epoch": 0.8216605368182174, "grad_norm": 0.0, "learning_rate": 1.6222665506389435e-06, "loss": 0.9199, "step": 21000 }, { "epoch": 0.8216996635104469, "grad_norm": 0.0, "learning_rate": 1.6215746867177352e-06, "loss": 1.0027, "step": 21001 }, { "epoch": 0.8217387902026763, "grad_norm": 0.0, "learning_rate": 1.6208829573435436e-06, "loss": 1.0567, "step": 21002 }, { "epoch": 0.8217779168949058, "grad_norm": 0.0, "learning_rate": 1.620191362527469e-06, "loss": 0.895, "step": 21003 }, { "epoch": 0.8218170435871351, "grad_norm": 0.0, "learning_rate": 1.6194999022806235e-06, "loss": 0.9302, "step": 21004 }, { "epoch": 0.8218561702793645, "grad_norm": 0.0, "learning_rate": 1.6188085766141049e-06, "loss": 0.9497, "step": 21005 }, { "epoch": 0.821895296971594, "grad_norm": 0.0, "learning_rate": 1.6181173855390186e-06, "loss": 0.8109, "step": 21006 }, { "epoch": 0.8219344236638234, "grad_norm": 0.0, "learning_rate": 1.6174263290664648e-06, "loss": 1.0009, "step": 21007 }, { "epoch": 0.8219735503560529, "grad_norm": 0.0, "learning_rate": 1.6167354072075425e-06, "loss": 0.9675, "step": 21008 }, { "epoch": 0.8220126770482823, "grad_norm": 0.0, "learning_rate": 1.6160446199733415e-06, "loss": 1.0406, "step": 21009 }, { "epoch": 0.8220518037405118, "grad_norm": 0.0, "learning_rate": 1.6153539673749586e-06, "loss": 0.8864, "step": 21010 }, { "epoch": 0.8220909304327412, "grad_norm": 0.0, "learning_rate": 1.6146634494234848e-06, "loss": 0.9519, "step": 21011 }, { "epoch": 0.8221300571249707, "grad_norm": 0.0, "learning_rate": 1.6139730661300113e-06, "loss": 0.9202, "step": 21012 }, { "epoch": 0.8221691838172, "grad_norm": 0.0, "learning_rate": 1.6132828175056193e-06, "loss": 0.9186, "step": 21013 }, { "epoch": 0.8222083105094296, "grad_norm": 0.0, "learning_rate": 1.6125927035613975e-06, "loss": 1.1562, "step": 21014 }, { "epoch": 0.8222474372016589, "grad_norm": 0.0, "learning_rate": 1.6119027243084228e-06, "loss": 1.0962, "step": 21015 }, { "epoch": 0.8222865638938884, "grad_norm": 0.0, "learning_rate": 1.6112128797577853e-06, "loss": 0.9677, "step": 21016 }, { "epoch": 0.8223256905861178, "grad_norm": 0.0, "learning_rate": 1.6105231699205537e-06, "loss": 0.8908, "step": 21017 }, { "epoch": 0.8223648172783473, "grad_norm": 0.0, "learning_rate": 1.6098335948078104e-06, "loss": 0.8915, "step": 21018 }, { "epoch": 0.8224039439705767, "grad_norm": 0.0, "learning_rate": 1.6091441544306208e-06, "loss": 0.9755, "step": 21019 }, { "epoch": 0.8224430706628062, "grad_norm": 0.0, "learning_rate": 1.6084548488000663e-06, "loss": 0.9786, "step": 21020 }, { "epoch": 0.8224821973550356, "grad_norm": 0.0, "learning_rate": 1.60776567792721e-06, "loss": 0.8295, "step": 21021 }, { "epoch": 0.8225213240472651, "grad_norm": 0.0, "learning_rate": 1.6070766418231222e-06, "loss": 0.8835, "step": 21022 }, { "epoch": 0.8225604507394945, "grad_norm": 0.0, "learning_rate": 1.606387740498865e-06, "loss": 0.9724, "step": 21023 }, { "epoch": 0.822599577431724, "grad_norm": 0.0, "learning_rate": 1.6056989739655027e-06, "loss": 0.8725, "step": 21024 }, { "epoch": 0.8226387041239533, "grad_norm": 0.0, "learning_rate": 1.6050103422340967e-06, "loss": 0.9216, "step": 21025 }, { "epoch": 0.8226778308161828, "grad_norm": 0.0, "learning_rate": 1.6043218453157072e-06, "loss": 0.8402, "step": 21026 }, { "epoch": 0.8227169575084122, "grad_norm": 0.0, "learning_rate": 1.6036334832213863e-06, "loss": 0.9219, "step": 21027 }, { "epoch": 0.8227560842006417, "grad_norm": 0.0, "learning_rate": 1.6029452559621895e-06, "loss": 0.9616, "step": 21028 }, { "epoch": 0.8227952108928711, "grad_norm": 0.0, "learning_rate": 1.6022571635491713e-06, "loss": 0.8076, "step": 21029 }, { "epoch": 0.8228343375851006, "grad_norm": 0.0, "learning_rate": 1.6015692059933808e-06, "loss": 0.9991, "step": 21030 }, { "epoch": 0.82287346427733, "grad_norm": 0.0, "learning_rate": 1.6008813833058635e-06, "loss": 0.9393, "step": 21031 }, { "epoch": 0.8229125909695595, "grad_norm": 0.0, "learning_rate": 1.6001936954976694e-06, "loss": 0.888, "step": 21032 }, { "epoch": 0.8229517176617889, "grad_norm": 0.0, "learning_rate": 1.5995061425798363e-06, "loss": 0.9139, "step": 21033 }, { "epoch": 0.8229908443540183, "grad_norm": 0.0, "learning_rate": 1.598818724563408e-06, "loss": 0.9789, "step": 21034 }, { "epoch": 0.8230299710462478, "grad_norm": 0.0, "learning_rate": 1.5981314414594229e-06, "loss": 0.9558, "step": 21035 }, { "epoch": 0.8230690977384771, "grad_norm": 0.0, "learning_rate": 1.5974442932789224e-06, "loss": 0.9269, "step": 21036 }, { "epoch": 0.8231082244307066, "grad_norm": 0.0, "learning_rate": 1.5967572800329345e-06, "loss": 0.9717, "step": 21037 }, { "epoch": 0.823147351122936, "grad_norm": 0.0, "learning_rate": 1.5960704017324946e-06, "loss": 1.0432, "step": 21038 }, { "epoch": 0.8231864778151655, "grad_norm": 0.0, "learning_rate": 1.595383658388636e-06, "loss": 0.9667, "step": 21039 }, { "epoch": 0.8232256045073949, "grad_norm": 0.0, "learning_rate": 1.5946970500123826e-06, "loss": 0.9299, "step": 21040 }, { "epoch": 0.8232647311996244, "grad_norm": 0.0, "learning_rate": 1.5940105766147618e-06, "loss": 1.0065, "step": 21041 }, { "epoch": 0.8233038578918538, "grad_norm": 0.0, "learning_rate": 1.593324238206797e-06, "loss": 0.8101, "step": 21042 }, { "epoch": 0.8233429845840833, "grad_norm": 0.0, "learning_rate": 1.592638034799514e-06, "loss": 1.0509, "step": 21043 }, { "epoch": 0.8233821112763127, "grad_norm": 0.0, "learning_rate": 1.5919519664039263e-06, "loss": 0.9397, "step": 21044 }, { "epoch": 0.8234212379685422, "grad_norm": 0.0, "learning_rate": 1.591266033031057e-06, "loss": 1.0019, "step": 21045 }, { "epoch": 0.8234603646607715, "grad_norm": 0.0, "learning_rate": 1.590580234691913e-06, "loss": 1.021, "step": 21046 }, { "epoch": 0.823499491353001, "grad_norm": 0.0, "learning_rate": 1.5898945713975178e-06, "loss": 0.9308, "step": 21047 }, { "epoch": 0.8235386180452304, "grad_norm": 0.0, "learning_rate": 1.589209043158876e-06, "loss": 1.0104, "step": 21048 }, { "epoch": 0.8235777447374599, "grad_norm": 0.0, "learning_rate": 1.5885236499869995e-06, "loss": 0.8346, "step": 21049 }, { "epoch": 0.8236168714296893, "grad_norm": 0.0, "learning_rate": 1.5878383918928874e-06, "loss": 0.8968, "step": 21050 }, { "epoch": 0.8236559981219188, "grad_norm": 0.0, "learning_rate": 1.5871532688875547e-06, "loss": 0.9682, "step": 21051 }, { "epoch": 0.8236951248141482, "grad_norm": 0.0, "learning_rate": 1.5864682809819965e-06, "loss": 0.9514, "step": 21052 }, { "epoch": 0.8237342515063777, "grad_norm": 0.0, "learning_rate": 1.5857834281872175e-06, "loss": 0.8586, "step": 21053 }, { "epoch": 0.8237733781986071, "grad_norm": 0.0, "learning_rate": 1.5850987105142113e-06, "loss": 1.0328, "step": 21054 }, { "epoch": 0.8238125048908366, "grad_norm": 0.0, "learning_rate": 1.5844141279739745e-06, "loss": 0.9349, "step": 21055 }, { "epoch": 0.823851631583066, "grad_norm": 0.0, "learning_rate": 1.5837296805775016e-06, "loss": 1.0341, "step": 21056 }, { "epoch": 0.8238907582752955, "grad_norm": 0.0, "learning_rate": 1.583045368335786e-06, "loss": 0.9252, "step": 21057 }, { "epoch": 0.8239298849675248, "grad_norm": 0.0, "learning_rate": 1.5823611912598126e-06, "loss": 1.0502, "step": 21058 }, { "epoch": 0.8239690116597543, "grad_norm": 0.0, "learning_rate": 1.581677149360571e-06, "loss": 0.9237, "step": 21059 }, { "epoch": 0.8240081383519837, "grad_norm": 0.0, "learning_rate": 1.580993242649045e-06, "loss": 0.9696, "step": 21060 }, { "epoch": 0.8240472650442132, "grad_norm": 0.0, "learning_rate": 1.58030947113622e-06, "loss": 0.8151, "step": 21061 }, { "epoch": 0.8240863917364426, "grad_norm": 0.0, "learning_rate": 1.5796258348330728e-06, "loss": 1.022, "step": 21062 }, { "epoch": 0.824125518428672, "grad_norm": 0.0, "learning_rate": 1.5789423337505849e-06, "loss": 0.9439, "step": 21063 }, { "epoch": 0.8241646451209015, "grad_norm": 0.0, "learning_rate": 1.5782589678997274e-06, "loss": 0.926, "step": 21064 }, { "epoch": 0.8242037718131309, "grad_norm": 0.0, "learning_rate": 1.5775757372914824e-06, "loss": 1.0537, "step": 21065 }, { "epoch": 0.8242428985053604, "grad_norm": 0.0, "learning_rate": 1.5768926419368135e-06, "loss": 0.8632, "step": 21066 }, { "epoch": 0.8242820251975898, "grad_norm": 0.0, "learning_rate": 1.5762096818466976e-06, "loss": 0.9333, "step": 21067 }, { "epoch": 0.8243211518898192, "grad_norm": 0.0, "learning_rate": 1.5755268570320936e-06, "loss": 0.9572, "step": 21068 }, { "epoch": 0.8243602785820486, "grad_norm": 0.0, "learning_rate": 1.574844167503976e-06, "loss": 0.962, "step": 21069 }, { "epoch": 0.8243994052742781, "grad_norm": 0.0, "learning_rate": 1.5741616132733029e-06, "loss": 0.9774, "step": 21070 }, { "epoch": 0.8244385319665075, "grad_norm": 0.0, "learning_rate": 1.5734791943510375e-06, "loss": 0.9672, "step": 21071 }, { "epoch": 0.824477658658737, "grad_norm": 0.0, "learning_rate": 1.5727969107481345e-06, "loss": 0.9546, "step": 21072 }, { "epoch": 0.8245167853509664, "grad_norm": 0.0, "learning_rate": 1.5721147624755539e-06, "loss": 0.8964, "step": 21073 }, { "epoch": 0.8245559120431959, "grad_norm": 0.0, "learning_rate": 1.5714327495442483e-06, "loss": 0.9513, "step": 21074 }, { "epoch": 0.8245950387354253, "grad_norm": 0.0, "learning_rate": 1.5707508719651755e-06, "loss": 0.9018, "step": 21075 }, { "epoch": 0.8246341654276548, "grad_norm": 0.0, "learning_rate": 1.5700691297492777e-06, "loss": 0.9496, "step": 21076 }, { "epoch": 0.8246732921198842, "grad_norm": 0.0, "learning_rate": 1.5693875229075062e-06, "loss": 0.9268, "step": 21077 }, { "epoch": 0.8247124188121137, "grad_norm": 0.0, "learning_rate": 1.568706051450809e-06, "loss": 1.0954, "step": 21078 }, { "epoch": 0.824751545504343, "grad_norm": 0.0, "learning_rate": 1.5680247153901263e-06, "loss": 0.8949, "step": 21079 }, { "epoch": 0.8247906721965725, "grad_norm": 0.0, "learning_rate": 1.5673435147364002e-06, "loss": 0.8943, "step": 21080 }, { "epoch": 0.8248297988888019, "grad_norm": 0.0, "learning_rate": 1.5666624495005734e-06, "loss": 1.0442, "step": 21081 }, { "epoch": 0.8248689255810314, "grad_norm": 0.0, "learning_rate": 1.5659815196935767e-06, "loss": 0.9955, "step": 21082 }, { "epoch": 0.8249080522732608, "grad_norm": 0.0, "learning_rate": 1.5653007253263498e-06, "loss": 0.9678, "step": 21083 }, { "epoch": 0.8249471789654903, "grad_norm": 0.0, "learning_rate": 1.5646200664098254e-06, "loss": 0.9341, "step": 21084 }, { "epoch": 0.8249863056577197, "grad_norm": 0.0, "learning_rate": 1.5639395429549297e-06, "loss": 1.0097, "step": 21085 }, { "epoch": 0.8250254323499492, "grad_norm": 0.0, "learning_rate": 1.563259154972595e-06, "loss": 0.9205, "step": 21086 }, { "epoch": 0.8250645590421786, "grad_norm": 0.0, "learning_rate": 1.562578902473746e-06, "loss": 0.9283, "step": 21087 }, { "epoch": 0.8251036857344081, "grad_norm": 0.0, "learning_rate": 1.5618987854693102e-06, "loss": 0.9183, "step": 21088 }, { "epoch": 0.8251428124266375, "grad_norm": 0.0, "learning_rate": 1.5612188039702037e-06, "loss": 0.9131, "step": 21089 }, { "epoch": 0.8251819391188668, "grad_norm": 0.0, "learning_rate": 1.5605389579873508e-06, "loss": 1.0281, "step": 21090 }, { "epoch": 0.8252210658110963, "grad_norm": 0.0, "learning_rate": 1.559859247531662e-06, "loss": 1.013, "step": 21091 }, { "epoch": 0.8252601925033257, "grad_norm": 0.0, "learning_rate": 1.5591796726140629e-06, "loss": 0.9798, "step": 21092 }, { "epoch": 0.8252993191955552, "grad_norm": 0.0, "learning_rate": 1.558500233245459e-06, "loss": 0.9969, "step": 21093 }, { "epoch": 0.8253384458877846, "grad_norm": 0.0, "learning_rate": 1.5578209294367653e-06, "loss": 0.8629, "step": 21094 }, { "epoch": 0.8253775725800141, "grad_norm": 0.0, "learning_rate": 1.5571417611988849e-06, "loss": 0.9156, "step": 21095 }, { "epoch": 0.8254166992722435, "grad_norm": 0.0, "learning_rate": 1.5564627285427325e-06, "loss": 0.9152, "step": 21096 }, { "epoch": 0.825455825964473, "grad_norm": 0.0, "learning_rate": 1.5557838314792062e-06, "loss": 1.0514, "step": 21097 }, { "epoch": 0.8254949526567024, "grad_norm": 0.0, "learning_rate": 1.5551050700192127e-06, "loss": 0.8363, "step": 21098 }, { "epoch": 0.8255340793489319, "grad_norm": 0.0, "learning_rate": 1.5544264441736467e-06, "loss": 0.916, "step": 21099 }, { "epoch": 0.8255732060411612, "grad_norm": 0.0, "learning_rate": 1.5537479539534106e-06, "loss": 0.9258, "step": 21100 }, { "epoch": 0.8256123327333907, "grad_norm": 0.0, "learning_rate": 1.5530695993693978e-06, "loss": 1.0317, "step": 21101 }, { "epoch": 0.8256514594256201, "grad_norm": 0.0, "learning_rate": 1.5523913804325042e-06, "loss": 1.0369, "step": 21102 }, { "epoch": 0.8256905861178496, "grad_norm": 0.0, "learning_rate": 1.5517132971536187e-06, "loss": 1.0121, "step": 21103 }, { "epoch": 0.825729712810079, "grad_norm": 0.0, "learning_rate": 1.5510353495436303e-06, "loss": 0.9017, "step": 21104 }, { "epoch": 0.8257688395023085, "grad_norm": 0.0, "learning_rate": 1.5503575376134272e-06, "loss": 1.056, "step": 21105 }, { "epoch": 0.8258079661945379, "grad_norm": 0.0, "learning_rate": 1.5496798613738974e-06, "loss": 0.9428, "step": 21106 }, { "epoch": 0.8258470928867674, "grad_norm": 0.0, "learning_rate": 1.5490023208359161e-06, "loss": 0.987, "step": 21107 }, { "epoch": 0.8258862195789968, "grad_norm": 0.0, "learning_rate": 1.5483249160103696e-06, "loss": 0.9996, "step": 21108 }, { "epoch": 0.8259253462712263, "grad_norm": 0.0, "learning_rate": 1.5476476469081337e-06, "loss": 0.9896, "step": 21109 }, { "epoch": 0.8259644729634557, "grad_norm": 0.0, "learning_rate": 1.5469705135400869e-06, "loss": 0.899, "step": 21110 }, { "epoch": 0.8260035996556852, "grad_norm": 0.0, "learning_rate": 1.5462935159171e-06, "loss": 0.9067, "step": 21111 }, { "epoch": 0.8260427263479145, "grad_norm": 0.0, "learning_rate": 1.5456166540500473e-06, "loss": 0.9587, "step": 21112 }, { "epoch": 0.826081853040144, "grad_norm": 0.0, "learning_rate": 1.5449399279497934e-06, "loss": 0.8314, "step": 21113 }, { "epoch": 0.8261209797323734, "grad_norm": 0.0, "learning_rate": 1.5442633376272142e-06, "loss": 0.9356, "step": 21114 }, { "epoch": 0.8261601064246029, "grad_norm": 0.0, "learning_rate": 1.5435868830931678e-06, "loss": 0.9286, "step": 21115 }, { "epoch": 0.8261992331168323, "grad_norm": 0.0, "learning_rate": 1.5429105643585218e-06, "loss": 0.9097, "step": 21116 }, { "epoch": 0.8262383598090618, "grad_norm": 0.0, "learning_rate": 1.5422343814341323e-06, "loss": 0.8741, "step": 21117 }, { "epoch": 0.8262774865012912, "grad_norm": 0.0, "learning_rate": 1.5415583343308604e-06, "loss": 0.9799, "step": 21118 }, { "epoch": 0.8263166131935206, "grad_norm": 0.0, "learning_rate": 1.5408824230595632e-06, "loss": 1.1024, "step": 21119 }, { "epoch": 0.8263557398857501, "grad_norm": 0.0, "learning_rate": 1.5402066476310963e-06, "loss": 0.9567, "step": 21120 }, { "epoch": 0.8263948665779794, "grad_norm": 0.0, "learning_rate": 1.5395310080563074e-06, "loss": 0.9463, "step": 21121 }, { "epoch": 0.8264339932702089, "grad_norm": 0.0, "learning_rate": 1.5388555043460495e-06, "loss": 0.8466, "step": 21122 }, { "epoch": 0.8264731199624383, "grad_norm": 0.0, "learning_rate": 1.5381801365111726e-06, "loss": 0.8597, "step": 21123 }, { "epoch": 0.8265122466546678, "grad_norm": 0.0, "learning_rate": 1.5375049045625157e-06, "loss": 1.008, "step": 21124 }, { "epoch": 0.8265513733468972, "grad_norm": 0.0, "learning_rate": 1.5368298085109269e-06, "loss": 0.9358, "step": 21125 }, { "epoch": 0.8265905000391267, "grad_norm": 0.0, "learning_rate": 1.536154848367246e-06, "loss": 0.9189, "step": 21126 }, { "epoch": 0.8266296267313561, "grad_norm": 0.0, "learning_rate": 1.5354800241423152e-06, "loss": 0.877, "step": 21127 }, { "epoch": 0.8266687534235856, "grad_norm": 0.0, "learning_rate": 1.5348053358469661e-06, "loss": 0.9633, "step": 21128 }, { "epoch": 0.826707880115815, "grad_norm": 0.0, "learning_rate": 1.5341307834920383e-06, "loss": 0.8789, "step": 21129 }, { "epoch": 0.8267470068080445, "grad_norm": 0.0, "learning_rate": 1.5334563670883585e-06, "loss": 0.9502, "step": 21130 }, { "epoch": 0.8267861335002739, "grad_norm": 0.0, "learning_rate": 1.5327820866467613e-06, "loss": 0.9788, "step": 21131 }, { "epoch": 0.8268252601925034, "grad_norm": 0.0, "learning_rate": 1.5321079421780738e-06, "loss": 0.984, "step": 21132 }, { "epoch": 0.8268643868847327, "grad_norm": 0.0, "learning_rate": 1.531433933693124e-06, "loss": 0.901, "step": 21133 }, { "epoch": 0.8269035135769622, "grad_norm": 0.0, "learning_rate": 1.5307600612027307e-06, "loss": 0.996, "step": 21134 }, { "epoch": 0.8269426402691916, "grad_norm": 0.0, "learning_rate": 1.5300863247177178e-06, "loss": 0.8651, "step": 21135 }, { "epoch": 0.8269817669614211, "grad_norm": 0.0, "learning_rate": 1.5294127242489065e-06, "loss": 1.0009, "step": 21136 }, { "epoch": 0.8270208936536505, "grad_norm": 0.0, "learning_rate": 1.528739259807115e-06, "loss": 0.9616, "step": 21137 }, { "epoch": 0.82706002034588, "grad_norm": 0.0, "learning_rate": 1.5280659314031521e-06, "loss": 0.8873, "step": 21138 }, { "epoch": 0.8270991470381094, "grad_norm": 0.0, "learning_rate": 1.5273927390478383e-06, "loss": 1.0071, "step": 21139 }, { "epoch": 0.8271382737303389, "grad_norm": 0.0, "learning_rate": 1.5267196827519748e-06, "loss": 0.9687, "step": 21140 }, { "epoch": 0.8271774004225683, "grad_norm": 0.0, "learning_rate": 1.5260467625263798e-06, "loss": 0.9907, "step": 21141 }, { "epoch": 0.8272165271147978, "grad_norm": 0.0, "learning_rate": 1.5253739783818544e-06, "loss": 1.0063, "step": 21142 }, { "epoch": 0.8272556538070271, "grad_norm": 0.0, "learning_rate": 1.5247013303292046e-06, "loss": 0.8983, "step": 21143 }, { "epoch": 0.8272947804992566, "grad_norm": 0.0, "learning_rate": 1.5240288183792273e-06, "loss": 1.0508, "step": 21144 }, { "epoch": 0.827333907191486, "grad_norm": 0.0, "learning_rate": 1.5233564425427305e-06, "loss": 0.9155, "step": 21145 }, { "epoch": 0.8273730338837155, "grad_norm": 0.0, "learning_rate": 1.5226842028305056e-06, "loss": 0.9573, "step": 21146 }, { "epoch": 0.8274121605759449, "grad_norm": 0.0, "learning_rate": 1.5220120992533515e-06, "loss": 0.8715, "step": 21147 }, { "epoch": 0.8274512872681743, "grad_norm": 0.0, "learning_rate": 1.5213401318220577e-06, "loss": 0.9426, "step": 21148 }, { "epoch": 0.8274904139604038, "grad_norm": 0.0, "learning_rate": 1.5206683005474165e-06, "loss": 0.941, "step": 21149 }, { "epoch": 0.8275295406526332, "grad_norm": 0.0, "learning_rate": 1.5199966054402182e-06, "loss": 0.8342, "step": 21150 }, { "epoch": 0.8275686673448627, "grad_norm": 0.0, "learning_rate": 1.5193250465112496e-06, "loss": 1.062, "step": 21151 }, { "epoch": 0.8276077940370921, "grad_norm": 0.0, "learning_rate": 1.5186536237712923e-06, "loss": 1.0703, "step": 21152 }, { "epoch": 0.8276469207293216, "grad_norm": 0.0, "learning_rate": 1.5179823372311298e-06, "loss": 0.8972, "step": 21153 }, { "epoch": 0.8276860474215509, "grad_norm": 0.0, "learning_rate": 1.5173111869015423e-06, "loss": 0.9459, "step": 21154 }, { "epoch": 0.8277251741137804, "grad_norm": 0.0, "learning_rate": 1.51664017279331e-06, "loss": 1.0001, "step": 21155 }, { "epoch": 0.8277643008060098, "grad_norm": 0.0, "learning_rate": 1.5159692949172045e-06, "loss": 0.9187, "step": 21156 }, { "epoch": 0.8278034274982393, "grad_norm": 0.0, "learning_rate": 1.5152985532840026e-06, "loss": 1.0369, "step": 21157 }, { "epoch": 0.8278425541904687, "grad_norm": 0.0, "learning_rate": 1.5146279479044702e-06, "loss": 1.0999, "step": 21158 }, { "epoch": 0.8278816808826982, "grad_norm": 0.0, "learning_rate": 1.513957478789384e-06, "loss": 0.8448, "step": 21159 }, { "epoch": 0.8279208075749276, "grad_norm": 0.0, "learning_rate": 1.5132871459495047e-06, "loss": 0.9104, "step": 21160 }, { "epoch": 0.8279599342671571, "grad_norm": 0.0, "learning_rate": 1.5126169493956022e-06, "loss": 0.9986, "step": 21161 }, { "epoch": 0.8279990609593865, "grad_norm": 0.0, "learning_rate": 1.5119468891384336e-06, "loss": 1.0059, "step": 21162 }, { "epoch": 0.828038187651616, "grad_norm": 0.0, "learning_rate": 1.5112769651887626e-06, "loss": 1.0123, "step": 21163 }, { "epoch": 0.8280773143438453, "grad_norm": 0.0, "learning_rate": 1.510607177557346e-06, "loss": 1.0165, "step": 21164 }, { "epoch": 0.8281164410360748, "grad_norm": 0.0, "learning_rate": 1.509937526254942e-06, "loss": 0.9377, "step": 21165 }, { "epoch": 0.8281555677283042, "grad_norm": 0.0, "learning_rate": 1.509268011292302e-06, "loss": 0.8664, "step": 21166 }, { "epoch": 0.8281946944205337, "grad_norm": 0.0, "learning_rate": 1.5085986326801772e-06, "loss": 1.0032, "step": 21167 }, { "epoch": 0.8282338211127631, "grad_norm": 0.0, "learning_rate": 1.5079293904293202e-06, "loss": 0.9639, "step": 21168 }, { "epoch": 0.8282729478049926, "grad_norm": 0.0, "learning_rate": 1.5072602845504737e-06, "loss": 0.8248, "step": 21169 }, { "epoch": 0.828312074497222, "grad_norm": 0.0, "learning_rate": 1.5065913150543853e-06, "loss": 0.9411, "step": 21170 }, { "epoch": 0.8283512011894515, "grad_norm": 0.0, "learning_rate": 1.5059224819517982e-06, "loss": 0.9276, "step": 21171 }, { "epoch": 0.8283903278816809, "grad_norm": 0.0, "learning_rate": 1.5052537852534532e-06, "loss": 0.9988, "step": 21172 }, { "epoch": 0.8284294545739104, "grad_norm": 0.0, "learning_rate": 1.5045852249700864e-06, "loss": 1.0077, "step": 21173 }, { "epoch": 0.8284685812661398, "grad_norm": 0.0, "learning_rate": 1.5039168011124383e-06, "loss": 0.917, "step": 21174 }, { "epoch": 0.8285077079583693, "grad_norm": 0.0, "learning_rate": 1.5032485136912345e-06, "loss": 0.9378, "step": 21175 }, { "epoch": 0.8285468346505986, "grad_norm": 0.0, "learning_rate": 1.5025803627172187e-06, "loss": 0.9297, "step": 21176 }, { "epoch": 0.828585961342828, "grad_norm": 0.0, "learning_rate": 1.5019123482011111e-06, "loss": 0.8334, "step": 21177 }, { "epoch": 0.8286250880350575, "grad_norm": 0.0, "learning_rate": 1.5012444701536444e-06, "loss": 0.9745, "step": 21178 }, { "epoch": 0.8286642147272869, "grad_norm": 0.0, "learning_rate": 1.5005767285855398e-06, "loss": 1.0865, "step": 21179 }, { "epoch": 0.8287033414195164, "grad_norm": 0.0, "learning_rate": 1.4999091235075237e-06, "loss": 0.9194, "step": 21180 }, { "epoch": 0.8287424681117458, "grad_norm": 0.0, "learning_rate": 1.4992416549303146e-06, "loss": 1.0057, "step": 21181 }, { "epoch": 0.8287815948039753, "grad_norm": 0.0, "learning_rate": 1.4985743228646355e-06, "loss": 1.0544, "step": 21182 }, { "epoch": 0.8288207214962047, "grad_norm": 0.0, "learning_rate": 1.4979071273211964e-06, "loss": 0.8769, "step": 21183 }, { "epoch": 0.8288598481884342, "grad_norm": 0.0, "learning_rate": 1.4972400683107168e-06, "loss": 1.0358, "step": 21184 }, { "epoch": 0.8288989748806636, "grad_norm": 0.0, "learning_rate": 1.4965731458439058e-06, "loss": 0.9805, "step": 21185 }, { "epoch": 0.828938101572893, "grad_norm": 0.0, "learning_rate": 1.4959063599314784e-06, "loss": 1.0133, "step": 21186 }, { "epoch": 0.8289772282651224, "grad_norm": 0.0, "learning_rate": 1.4952397105841355e-06, "loss": 0.8855, "step": 21187 }, { "epoch": 0.8290163549573519, "grad_norm": 0.0, "learning_rate": 1.4945731978125876e-06, "loss": 0.9665, "step": 21188 }, { "epoch": 0.8290554816495813, "grad_norm": 0.0, "learning_rate": 1.493906821627532e-06, "loss": 1.059, "step": 21189 }, { "epoch": 0.8290946083418108, "grad_norm": 0.0, "learning_rate": 1.4932405820396788e-06, "loss": 0.9539, "step": 21190 }, { "epoch": 0.8291337350340402, "grad_norm": 0.0, "learning_rate": 1.4925744790597207e-06, "loss": 0.8228, "step": 21191 }, { "epoch": 0.8291728617262697, "grad_norm": 0.0, "learning_rate": 1.491908512698358e-06, "loss": 0.9244, "step": 21192 }, { "epoch": 0.8292119884184991, "grad_norm": 0.0, "learning_rate": 1.4912426829662774e-06, "loss": 0.9526, "step": 21193 }, { "epoch": 0.8292511151107286, "grad_norm": 0.0, "learning_rate": 1.490576989874184e-06, "loss": 0.9044, "step": 21194 }, { "epoch": 0.829290241802958, "grad_norm": 0.0, "learning_rate": 1.489911433432757e-06, "loss": 1.0135, "step": 21195 }, { "epoch": 0.8293293684951875, "grad_norm": 0.0, "learning_rate": 1.489246013652692e-06, "loss": 0.968, "step": 21196 }, { "epoch": 0.8293684951874168, "grad_norm": 0.0, "learning_rate": 1.4885807305446687e-06, "loss": 0.9266, "step": 21197 }, { "epoch": 0.8294076218796463, "grad_norm": 0.0, "learning_rate": 1.4879155841193738e-06, "loss": 0.9256, "step": 21198 }, { "epoch": 0.8294467485718757, "grad_norm": 0.0, "learning_rate": 1.4872505743874888e-06, "loss": 0.8789, "step": 21199 }, { "epoch": 0.8294858752641052, "grad_norm": 0.0, "learning_rate": 1.486585701359694e-06, "loss": 0.9001, "step": 21200 }, { "epoch": 0.8295250019563346, "grad_norm": 0.0, "learning_rate": 1.4859209650466632e-06, "loss": 0.9476, "step": 21201 }, { "epoch": 0.8295641286485641, "grad_norm": 0.0, "learning_rate": 1.4852563654590724e-06, "loss": 1.0095, "step": 21202 }, { "epoch": 0.8296032553407935, "grad_norm": 0.0, "learning_rate": 1.484591902607595e-06, "loss": 1.0308, "step": 21203 }, { "epoch": 0.8296423820330229, "grad_norm": 0.0, "learning_rate": 1.4839275765029038e-06, "loss": 0.8912, "step": 21204 }, { "epoch": 0.8296815087252524, "grad_norm": 0.0, "learning_rate": 1.4832633871556623e-06, "loss": 0.9306, "step": 21205 }, { "epoch": 0.8297206354174818, "grad_norm": 0.0, "learning_rate": 1.4825993345765399e-06, "loss": 0.8798, "step": 21206 }, { "epoch": 0.8297597621097113, "grad_norm": 0.0, "learning_rate": 1.481935418776198e-06, "loss": 0.8582, "step": 21207 }, { "epoch": 0.8297988888019406, "grad_norm": 0.0, "learning_rate": 1.4812716397652994e-06, "loss": 0.786, "step": 21208 }, { "epoch": 0.8298380154941701, "grad_norm": 0.0, "learning_rate": 1.480607997554504e-06, "loss": 1.0247, "step": 21209 }, { "epoch": 0.8298771421863995, "grad_norm": 0.0, "learning_rate": 1.4799444921544703e-06, "loss": 0.9909, "step": 21210 }, { "epoch": 0.829916268878629, "grad_norm": 0.0, "learning_rate": 1.479281123575851e-06, "loss": 0.923, "step": 21211 }, { "epoch": 0.8299553955708584, "grad_norm": 0.0, "learning_rate": 1.478617891829298e-06, "loss": 0.9118, "step": 21212 }, { "epoch": 0.8299945222630879, "grad_norm": 0.0, "learning_rate": 1.4779547969254671e-06, "loss": 0.9758, "step": 21213 }, { "epoch": 0.8300336489553173, "grad_norm": 0.0, "learning_rate": 1.4772918388750001e-06, "loss": 0.9247, "step": 21214 }, { "epoch": 0.8300727756475468, "grad_norm": 0.0, "learning_rate": 1.4766290176885479e-06, "loss": 1.0244, "step": 21215 }, { "epoch": 0.8301119023397762, "grad_norm": 0.0, "learning_rate": 1.4759663333767527e-06, "loss": 1.0735, "step": 21216 }, { "epoch": 0.8301510290320057, "grad_norm": 0.0, "learning_rate": 1.4753037859502595e-06, "loss": 1.0095, "step": 21217 }, { "epoch": 0.830190155724235, "grad_norm": 0.0, "learning_rate": 1.474641375419703e-06, "loss": 0.9986, "step": 21218 }, { "epoch": 0.8302292824164645, "grad_norm": 0.0, "learning_rate": 1.4739791017957228e-06, "loss": 0.8646, "step": 21219 }, { "epoch": 0.8302684091086939, "grad_norm": 0.0, "learning_rate": 1.4733169650889556e-06, "loss": 0.9604, "step": 21220 }, { "epoch": 0.8303075358009234, "grad_norm": 0.0, "learning_rate": 1.472654965310034e-06, "loss": 0.9107, "step": 21221 }, { "epoch": 0.8303466624931528, "grad_norm": 0.0, "learning_rate": 1.4719931024695877e-06, "loss": 1.0022, "step": 21222 }, { "epoch": 0.8303857891853823, "grad_norm": 0.0, "learning_rate": 1.4713313765782477e-06, "loss": 0.9735, "step": 21223 }, { "epoch": 0.8304249158776117, "grad_norm": 0.0, "learning_rate": 1.4706697876466336e-06, "loss": 0.892, "step": 21224 }, { "epoch": 0.8304640425698412, "grad_norm": 0.0, "learning_rate": 1.47000833568538e-06, "loss": 0.9803, "step": 21225 }, { "epoch": 0.8305031692620706, "grad_norm": 0.0, "learning_rate": 1.469347020705102e-06, "loss": 0.9642, "step": 21226 }, { "epoch": 0.8305422959543001, "grad_norm": 0.0, "learning_rate": 1.468685842716423e-06, "loss": 0.9933, "step": 21227 }, { "epoch": 0.8305814226465295, "grad_norm": 0.0, "learning_rate": 1.468024801729957e-06, "loss": 0.887, "step": 21228 }, { "epoch": 0.830620549338759, "grad_norm": 0.0, "learning_rate": 1.467363897756321e-06, "loss": 1.0014, "step": 21229 }, { "epoch": 0.8306596760309883, "grad_norm": 0.0, "learning_rate": 1.4667031308061287e-06, "loss": 0.9629, "step": 21230 }, { "epoch": 0.8306988027232178, "grad_norm": 0.0, "learning_rate": 1.4660425008899947e-06, "loss": 0.8699, "step": 21231 }, { "epoch": 0.8307379294154472, "grad_norm": 0.0, "learning_rate": 1.465382008018521e-06, "loss": 0.9178, "step": 21232 }, { "epoch": 0.8307770561076766, "grad_norm": 0.0, "learning_rate": 1.4647216522023188e-06, "loss": 0.9376, "step": 21233 }, { "epoch": 0.8308161827999061, "grad_norm": 0.0, "learning_rate": 1.4640614334519908e-06, "loss": 0.97, "step": 21234 }, { "epoch": 0.8308553094921355, "grad_norm": 0.0, "learning_rate": 1.463401351778142e-06, "loss": 0.933, "step": 21235 }, { "epoch": 0.830894436184365, "grad_norm": 0.0, "learning_rate": 1.4627414071913693e-06, "loss": 1.0509, "step": 21236 }, { "epoch": 0.8309335628765944, "grad_norm": 0.0, "learning_rate": 1.4620815997022742e-06, "loss": 0.9481, "step": 21237 }, { "epoch": 0.8309726895688239, "grad_norm": 0.0, "learning_rate": 1.461421929321445e-06, "loss": 0.9471, "step": 21238 }, { "epoch": 0.8310118162610532, "grad_norm": 0.0, "learning_rate": 1.4607623960594842e-06, "loss": 1.0839, "step": 21239 }, { "epoch": 0.8310509429532827, "grad_norm": 0.0, "learning_rate": 1.4601029999269767e-06, "loss": 0.9331, "step": 21240 }, { "epoch": 0.8310900696455121, "grad_norm": 0.0, "learning_rate": 1.4594437409345175e-06, "loss": 0.9573, "step": 21241 }, { "epoch": 0.8311291963377416, "grad_norm": 0.0, "learning_rate": 1.4587846190926846e-06, "loss": 0.9508, "step": 21242 }, { "epoch": 0.831168323029971, "grad_norm": 0.0, "learning_rate": 1.458125634412072e-06, "loss": 0.9221, "step": 21243 }, { "epoch": 0.8312074497222005, "grad_norm": 0.0, "learning_rate": 1.4574667869032567e-06, "loss": 0.8709, "step": 21244 }, { "epoch": 0.8312465764144299, "grad_norm": 0.0, "learning_rate": 1.4568080765768223e-06, "loss": 0.9938, "step": 21245 }, { "epoch": 0.8312857031066594, "grad_norm": 0.0, "learning_rate": 1.4561495034433426e-06, "loss": 0.9168, "step": 21246 }, { "epoch": 0.8313248297988888, "grad_norm": 0.0, "learning_rate": 1.455491067513396e-06, "loss": 0.9072, "step": 21247 }, { "epoch": 0.8313639564911183, "grad_norm": 0.0, "learning_rate": 1.4548327687975562e-06, "loss": 0.9457, "step": 21248 }, { "epoch": 0.8314030831833477, "grad_norm": 0.0, "learning_rate": 1.4541746073063967e-06, "loss": 0.8553, "step": 21249 }, { "epoch": 0.8314422098755772, "grad_norm": 0.0, "learning_rate": 1.4535165830504815e-06, "loss": 1.012, "step": 21250 }, { "epoch": 0.8314813365678065, "grad_norm": 0.0, "learning_rate": 1.4528586960403812e-06, "loss": 0.84, "step": 21251 }, { "epoch": 0.831520463260036, "grad_norm": 0.0, "learning_rate": 1.4522009462866604e-06, "loss": 1.0172, "step": 21252 }, { "epoch": 0.8315595899522654, "grad_norm": 0.0, "learning_rate": 1.451543333799883e-06, "loss": 0.9656, "step": 21253 }, { "epoch": 0.8315987166444949, "grad_norm": 0.0, "learning_rate": 1.4508858585906061e-06, "loss": 1.043, "step": 21254 }, { "epoch": 0.8316378433367243, "grad_norm": 0.0, "learning_rate": 1.450228520669391e-06, "loss": 0.9715, "step": 21255 }, { "epoch": 0.8316769700289538, "grad_norm": 0.0, "learning_rate": 1.449571320046791e-06, "loss": 0.9251, "step": 21256 }, { "epoch": 0.8317160967211832, "grad_norm": 0.0, "learning_rate": 1.4489142567333614e-06, "loss": 0.8959, "step": 21257 }, { "epoch": 0.8317552234134127, "grad_norm": 0.0, "learning_rate": 1.448257330739653e-06, "loss": 1.0175, "step": 21258 }, { "epoch": 0.8317943501056421, "grad_norm": 0.0, "learning_rate": 1.4476005420762196e-06, "loss": 0.9612, "step": 21259 }, { "epoch": 0.8318334767978716, "grad_norm": 0.0, "learning_rate": 1.4469438907536014e-06, "loss": 0.9418, "step": 21260 }, { "epoch": 0.831872603490101, "grad_norm": 0.0, "learning_rate": 1.446287376782346e-06, "loss": 0.9322, "step": 21261 }, { "epoch": 0.8319117301823303, "grad_norm": 0.0, "learning_rate": 1.4456310001730001e-06, "loss": 0.9458, "step": 21262 }, { "epoch": 0.8319508568745598, "grad_norm": 0.0, "learning_rate": 1.4449747609360998e-06, "loss": 0.8749, "step": 21263 }, { "epoch": 0.8319899835667892, "grad_norm": 0.0, "learning_rate": 1.4443186590821835e-06, "loss": 1.0759, "step": 21264 }, { "epoch": 0.8320291102590187, "grad_norm": 0.0, "learning_rate": 1.4436626946217891e-06, "loss": 0.9332, "step": 21265 }, { "epoch": 0.8320682369512481, "grad_norm": 0.0, "learning_rate": 1.443006867565452e-06, "loss": 1.08, "step": 21266 }, { "epoch": 0.8321073636434776, "grad_norm": 0.0, "learning_rate": 1.4423511779236998e-06, "loss": 1.011, "step": 21267 }, { "epoch": 0.832146490335707, "grad_norm": 0.0, "learning_rate": 1.441695625707067e-06, "loss": 0.9406, "step": 21268 }, { "epoch": 0.8321856170279365, "grad_norm": 0.0, "learning_rate": 1.4410402109260734e-06, "loss": 0.8778, "step": 21269 }, { "epoch": 0.8322247437201659, "grad_norm": 0.0, "learning_rate": 1.4403849335912545e-06, "loss": 0.9647, "step": 21270 }, { "epoch": 0.8322638704123954, "grad_norm": 0.0, "learning_rate": 1.439729793713125e-06, "loss": 0.951, "step": 21271 }, { "epoch": 0.8323029971046247, "grad_norm": 0.0, "learning_rate": 1.4390747913022108e-06, "loss": 0.983, "step": 21272 }, { "epoch": 0.8323421237968542, "grad_norm": 0.0, "learning_rate": 1.4384199263690223e-06, "loss": 0.8877, "step": 21273 }, { "epoch": 0.8323812504890836, "grad_norm": 0.0, "learning_rate": 1.437765198924087e-06, "loss": 1.065, "step": 21274 }, { "epoch": 0.8324203771813131, "grad_norm": 0.0, "learning_rate": 1.437110608977912e-06, "loss": 0.9195, "step": 21275 }, { "epoch": 0.8324595038735425, "grad_norm": 0.0, "learning_rate": 1.4364561565410117e-06, "loss": 1.1031, "step": 21276 }, { "epoch": 0.832498630565772, "grad_norm": 0.0, "learning_rate": 1.4358018416238928e-06, "loss": 0.8553, "step": 21277 }, { "epoch": 0.8325377572580014, "grad_norm": 0.0, "learning_rate": 1.4351476642370654e-06, "loss": 0.8354, "step": 21278 }, { "epoch": 0.8325768839502309, "grad_norm": 0.0, "learning_rate": 1.4344936243910335e-06, "loss": 0.9024, "step": 21279 }, { "epoch": 0.8326160106424603, "grad_norm": 0.0, "learning_rate": 1.433839722096303e-06, "loss": 0.9846, "step": 21280 }, { "epoch": 0.8326551373346898, "grad_norm": 0.0, "learning_rate": 1.433185957363371e-06, "loss": 1.0498, "step": 21281 }, { "epoch": 0.8326942640269192, "grad_norm": 0.0, "learning_rate": 1.4325323302027371e-06, "loss": 1.0326, "step": 21282 }, { "epoch": 0.8327333907191486, "grad_norm": 0.0, "learning_rate": 1.4318788406248985e-06, "loss": 1.0126, "step": 21283 }, { "epoch": 0.832772517411378, "grad_norm": 0.0, "learning_rate": 1.4312254886403521e-06, "loss": 0.9881, "step": 21284 }, { "epoch": 0.8328116441036075, "grad_norm": 0.0, "learning_rate": 1.430572274259584e-06, "loss": 1.0184, "step": 21285 }, { "epoch": 0.8328507707958369, "grad_norm": 0.0, "learning_rate": 1.4299191974930904e-06, "loss": 0.8704, "step": 21286 }, { "epoch": 0.8328898974880664, "grad_norm": 0.0, "learning_rate": 1.4292662583513495e-06, "loss": 0.8705, "step": 21287 }, { "epoch": 0.8329290241802958, "grad_norm": 0.0, "learning_rate": 1.4286134568448585e-06, "loss": 0.8804, "step": 21288 }, { "epoch": 0.8329681508725252, "grad_norm": 0.0, "learning_rate": 1.4279607929840922e-06, "loss": 1.0775, "step": 21289 }, { "epoch": 0.8330072775647547, "grad_norm": 0.0, "learning_rate": 1.427308266779537e-06, "loss": 0.9059, "step": 21290 }, { "epoch": 0.8330464042569841, "grad_norm": 0.0, "learning_rate": 1.4266558782416628e-06, "loss": 0.9082, "step": 21291 }, { "epoch": 0.8330855309492136, "grad_norm": 0.0, "learning_rate": 1.4260036273809585e-06, "loss": 0.9477, "step": 21292 }, { "epoch": 0.833124657641443, "grad_norm": 0.0, "learning_rate": 1.4253515142078888e-06, "loss": 0.9509, "step": 21293 }, { "epoch": 0.8331637843336724, "grad_norm": 0.0, "learning_rate": 1.424699538732931e-06, "loss": 1.0138, "step": 21294 }, { "epoch": 0.8332029110259018, "grad_norm": 0.0, "learning_rate": 1.4240477009665521e-06, "loss": 1.0442, "step": 21295 }, { "epoch": 0.8332420377181313, "grad_norm": 0.0, "learning_rate": 1.4233960009192204e-06, "loss": 0.9395, "step": 21296 }, { "epoch": 0.8332811644103607, "grad_norm": 0.0, "learning_rate": 1.422744438601401e-06, "loss": 1.0452, "step": 21297 }, { "epoch": 0.8333202911025902, "grad_norm": 0.0, "learning_rate": 1.4220930140235613e-06, "loss": 1.0301, "step": 21298 }, { "epoch": 0.8333594177948196, "grad_norm": 0.0, "learning_rate": 1.4214417271961567e-06, "loss": 0.8855, "step": 21299 }, { "epoch": 0.8333985444870491, "grad_norm": 0.0, "learning_rate": 1.4207905781296483e-06, "loss": 0.9364, "step": 21300 }, { "epoch": 0.8334376711792785, "grad_norm": 0.0, "learning_rate": 1.4201395668344953e-06, "loss": 1.0471, "step": 21301 }, { "epoch": 0.833476797871508, "grad_norm": 0.0, "learning_rate": 1.4194886933211471e-06, "loss": 1.063, "step": 21302 }, { "epoch": 0.8335159245637374, "grad_norm": 0.0, "learning_rate": 1.418837957600059e-06, "loss": 0.8733, "step": 21303 }, { "epoch": 0.8335550512559669, "grad_norm": 0.0, "learning_rate": 1.418187359681682e-06, "loss": 0.9975, "step": 21304 }, { "epoch": 0.8335941779481962, "grad_norm": 0.0, "learning_rate": 1.4175368995764604e-06, "loss": 0.9657, "step": 21305 }, { "epoch": 0.8336333046404257, "grad_norm": 0.0, "learning_rate": 1.4168865772948414e-06, "loss": 0.9841, "step": 21306 }, { "epoch": 0.8336724313326551, "grad_norm": 0.0, "learning_rate": 1.4162363928472722e-06, "loss": 0.8973, "step": 21307 }, { "epoch": 0.8337115580248846, "grad_norm": 0.0, "learning_rate": 1.4155863462441876e-06, "loss": 0.968, "step": 21308 }, { "epoch": 0.833750684717114, "grad_norm": 0.0, "learning_rate": 1.4149364374960294e-06, "loss": 1.0251, "step": 21309 }, { "epoch": 0.8337898114093435, "grad_norm": 0.0, "learning_rate": 1.4142866666132337e-06, "loss": 0.916, "step": 21310 }, { "epoch": 0.8338289381015729, "grad_norm": 0.0, "learning_rate": 1.413637033606239e-06, "loss": 1.0287, "step": 21311 }, { "epoch": 0.8338680647938024, "grad_norm": 0.0, "learning_rate": 1.4129875384854708e-06, "loss": 0.7816, "step": 21312 }, { "epoch": 0.8339071914860318, "grad_norm": 0.0, "learning_rate": 1.4123381812613657e-06, "loss": 1.0399, "step": 21313 }, { "epoch": 0.8339463181782613, "grad_norm": 0.0, "learning_rate": 1.4116889619443431e-06, "loss": 0.8451, "step": 21314 }, { "epoch": 0.8339854448704906, "grad_norm": 0.0, "learning_rate": 1.41103988054484e-06, "loss": 1.011, "step": 21315 }, { "epoch": 0.8340245715627201, "grad_norm": 0.0, "learning_rate": 1.4103909370732704e-06, "loss": 0.985, "step": 21316 }, { "epoch": 0.8340636982549495, "grad_norm": 0.0, "learning_rate": 1.4097421315400617e-06, "loss": 0.8886, "step": 21317 }, { "epoch": 0.8341028249471789, "grad_norm": 0.0, "learning_rate": 1.4090934639556252e-06, "loss": 0.97, "step": 21318 }, { "epoch": 0.8341419516394084, "grad_norm": 0.0, "learning_rate": 1.4084449343303874e-06, "loss": 0.816, "step": 21319 }, { "epoch": 0.8341810783316378, "grad_norm": 0.0, "learning_rate": 1.4077965426747564e-06, "loss": 0.8493, "step": 21320 }, { "epoch": 0.8342202050238673, "grad_norm": 0.0, "learning_rate": 1.4071482889991472e-06, "loss": 0.9699, "step": 21321 }, { "epoch": 0.8342593317160967, "grad_norm": 0.0, "learning_rate": 1.4065001733139683e-06, "loss": 0.9069, "step": 21322 }, { "epoch": 0.8342984584083262, "grad_norm": 0.0, "learning_rate": 1.4058521956296278e-06, "loss": 0.8895, "step": 21323 }, { "epoch": 0.8343375851005556, "grad_norm": 0.0, "learning_rate": 1.405204355956532e-06, "loss": 0.8512, "step": 21324 }, { "epoch": 0.834376711792785, "grad_norm": 0.0, "learning_rate": 1.404556654305086e-06, "loss": 0.9451, "step": 21325 }, { "epoch": 0.8344158384850144, "grad_norm": 0.0, "learning_rate": 1.4039090906856877e-06, "loss": 0.9846, "step": 21326 }, { "epoch": 0.8344549651772439, "grad_norm": 0.0, "learning_rate": 1.4032616651087382e-06, "loss": 0.9691, "step": 21327 }, { "epoch": 0.8344940918694733, "grad_norm": 0.0, "learning_rate": 1.4026143775846334e-06, "loss": 0.8464, "step": 21328 }, { "epoch": 0.8345332185617028, "grad_norm": 0.0, "learning_rate": 1.4019672281237716e-06, "loss": 0.952, "step": 21329 }, { "epoch": 0.8345723452539322, "grad_norm": 0.0, "learning_rate": 1.401320216736539e-06, "loss": 0.9403, "step": 21330 }, { "epoch": 0.8346114719461617, "grad_norm": 0.0, "learning_rate": 1.4006733434333297e-06, "loss": 0.9908, "step": 21331 }, { "epoch": 0.8346505986383911, "grad_norm": 0.0, "learning_rate": 1.4000266082245305e-06, "loss": 1.0227, "step": 21332 }, { "epoch": 0.8346897253306206, "grad_norm": 0.0, "learning_rate": 1.3993800111205302e-06, "loss": 1.0024, "step": 21333 }, { "epoch": 0.83472885202285, "grad_norm": 0.0, "learning_rate": 1.3987335521317068e-06, "loss": 0.9378, "step": 21334 }, { "epoch": 0.8347679787150795, "grad_norm": 0.0, "learning_rate": 1.398087231268448e-06, "loss": 0.828, "step": 21335 }, { "epoch": 0.8348071054073088, "grad_norm": 0.0, "learning_rate": 1.3974410485411238e-06, "loss": 0.9512, "step": 21336 }, { "epoch": 0.8348462320995383, "grad_norm": 0.0, "learning_rate": 1.396795003960122e-06, "loss": 1.0194, "step": 21337 }, { "epoch": 0.8348853587917677, "grad_norm": 0.0, "learning_rate": 1.3961490975358095e-06, "loss": 1.0229, "step": 21338 }, { "epoch": 0.8349244854839972, "grad_norm": 0.0, "learning_rate": 1.3955033292785636e-06, "loss": 1.0191, "step": 21339 }, { "epoch": 0.8349636121762266, "grad_norm": 0.0, "learning_rate": 1.3948576991987495e-06, "loss": 0.8223, "step": 21340 }, { "epoch": 0.8350027388684561, "grad_norm": 0.0, "learning_rate": 1.3942122073067388e-06, "loss": 0.9495, "step": 21341 }, { "epoch": 0.8350418655606855, "grad_norm": 0.0, "learning_rate": 1.3935668536128955e-06, "loss": 0.9417, "step": 21342 }, { "epoch": 0.835080992252915, "grad_norm": 0.0, "learning_rate": 1.3929216381275866e-06, "loss": 0.8976, "step": 21343 }, { "epoch": 0.8351201189451444, "grad_norm": 0.0, "learning_rate": 1.3922765608611687e-06, "loss": 0.8626, "step": 21344 }, { "epoch": 0.8351592456373739, "grad_norm": 0.0, "learning_rate": 1.3916316218240034e-06, "loss": 1.0572, "step": 21345 }, { "epoch": 0.8351983723296033, "grad_norm": 0.0, "learning_rate": 1.39098682102645e-06, "loss": 0.9258, "step": 21346 }, { "epoch": 0.8352374990218326, "grad_norm": 0.0, "learning_rate": 1.3903421584788579e-06, "loss": 0.9478, "step": 21347 }, { "epoch": 0.8352766257140621, "grad_norm": 0.0, "learning_rate": 1.3896976341915814e-06, "loss": 0.9173, "step": 21348 }, { "epoch": 0.8353157524062915, "grad_norm": 0.0, "learning_rate": 1.389053248174973e-06, "loss": 1.0143, "step": 21349 }, { "epoch": 0.835354879098521, "grad_norm": 0.0, "learning_rate": 1.3884090004393803e-06, "loss": 0.9366, "step": 21350 }, { "epoch": 0.8353940057907504, "grad_norm": 0.0, "learning_rate": 1.3877648909951468e-06, "loss": 0.947, "step": 21351 }, { "epoch": 0.8354331324829799, "grad_norm": 0.0, "learning_rate": 1.3871209198526191e-06, "loss": 0.9763, "step": 21352 }, { "epoch": 0.8354722591752093, "grad_norm": 0.0, "learning_rate": 1.3864770870221344e-06, "loss": 1.0345, "step": 21353 }, { "epoch": 0.8355113858674388, "grad_norm": 0.0, "learning_rate": 1.3858333925140354e-06, "loss": 1.0275, "step": 21354 }, { "epoch": 0.8355505125596682, "grad_norm": 0.0, "learning_rate": 1.3851898363386574e-06, "loss": 0.9193, "step": 21355 }, { "epoch": 0.8355896392518977, "grad_norm": 0.0, "learning_rate": 1.3845464185063373e-06, "loss": 0.9969, "step": 21356 }, { "epoch": 0.835628765944127, "grad_norm": 0.0, "learning_rate": 1.383903139027404e-06, "loss": 0.9254, "step": 21357 }, { "epoch": 0.8356678926363565, "grad_norm": 0.0, "learning_rate": 1.3832599979121907e-06, "loss": 1.0135, "step": 21358 }, { "epoch": 0.8357070193285859, "grad_norm": 0.0, "learning_rate": 1.3826169951710234e-06, "loss": 0.9654, "step": 21359 }, { "epoch": 0.8357461460208154, "grad_norm": 0.0, "learning_rate": 1.381974130814231e-06, "loss": 0.9631, "step": 21360 }, { "epoch": 0.8357852727130448, "grad_norm": 0.0, "learning_rate": 1.3813314048521332e-06, "loss": 0.9417, "step": 21361 }, { "epoch": 0.8358243994052743, "grad_norm": 0.0, "learning_rate": 1.380688817295056e-06, "loss": 1.034, "step": 21362 }, { "epoch": 0.8358635260975037, "grad_norm": 0.0, "learning_rate": 1.3800463681533104e-06, "loss": 1.0363, "step": 21363 }, { "epoch": 0.8359026527897332, "grad_norm": 0.0, "learning_rate": 1.3794040574372247e-06, "loss": 0.9132, "step": 21364 }, { "epoch": 0.8359417794819626, "grad_norm": 0.0, "learning_rate": 1.3787618851571038e-06, "loss": 0.9402, "step": 21365 }, { "epoch": 0.8359809061741921, "grad_norm": 0.0, "learning_rate": 1.3781198513232675e-06, "loss": 0.8636, "step": 21366 }, { "epoch": 0.8360200328664215, "grad_norm": 0.0, "learning_rate": 1.3774779559460171e-06, "loss": 0.8662, "step": 21367 }, { "epoch": 0.836059159558651, "grad_norm": 0.0, "learning_rate": 1.3768361990356705e-06, "loss": 0.9764, "step": 21368 }, { "epoch": 0.8360982862508803, "grad_norm": 0.0, "learning_rate": 1.3761945806025279e-06, "loss": 0.8917, "step": 21369 }, { "epoch": 0.8361374129431098, "grad_norm": 0.0, "learning_rate": 1.3755531006568956e-06, "loss": 0.9948, "step": 21370 }, { "epoch": 0.8361765396353392, "grad_norm": 0.0, "learning_rate": 1.374911759209071e-06, "loss": 0.9473, "step": 21371 }, { "epoch": 0.8362156663275687, "grad_norm": 0.0, "learning_rate": 1.3742705562693559e-06, "loss": 0.9938, "step": 21372 }, { "epoch": 0.8362547930197981, "grad_norm": 0.0, "learning_rate": 1.3736294918480475e-06, "loss": 0.8256, "step": 21373 }, { "epoch": 0.8362939197120276, "grad_norm": 0.0, "learning_rate": 1.372988565955442e-06, "loss": 1.0381, "step": 21374 }, { "epoch": 0.836333046404257, "grad_norm": 0.0, "learning_rate": 1.372347778601828e-06, "loss": 1.1286, "step": 21375 }, { "epoch": 0.8363721730964864, "grad_norm": 0.0, "learning_rate": 1.371707129797497e-06, "loss": 0.8997, "step": 21376 }, { "epoch": 0.8364112997887159, "grad_norm": 0.0, "learning_rate": 1.3710666195527377e-06, "loss": 0.8284, "step": 21377 }, { "epoch": 0.8364504264809453, "grad_norm": 0.0, "learning_rate": 1.3704262478778385e-06, "loss": 0.8908, "step": 21378 }, { "epoch": 0.8364895531731747, "grad_norm": 0.0, "learning_rate": 1.3697860147830778e-06, "loss": 1.0914, "step": 21379 }, { "epoch": 0.8365286798654041, "grad_norm": 0.0, "learning_rate": 1.3691459202787417e-06, "loss": 1.1274, "step": 21380 }, { "epoch": 0.8365678065576336, "grad_norm": 0.0, "learning_rate": 1.368505964375102e-06, "loss": 0.8129, "step": 21381 }, { "epoch": 0.836606933249863, "grad_norm": 0.0, "learning_rate": 1.3678661470824461e-06, "loss": 0.9144, "step": 21382 }, { "epoch": 0.8366460599420925, "grad_norm": 0.0, "learning_rate": 1.3672264684110404e-06, "loss": 0.8026, "step": 21383 }, { "epoch": 0.8366851866343219, "grad_norm": 0.0, "learning_rate": 1.366586928371163e-06, "loss": 1.0786, "step": 21384 }, { "epoch": 0.8367243133265514, "grad_norm": 0.0, "learning_rate": 1.3659475269730782e-06, "loss": 0.9825, "step": 21385 }, { "epoch": 0.8367634400187808, "grad_norm": 0.0, "learning_rate": 1.3653082642270575e-06, "loss": 0.8557, "step": 21386 }, { "epoch": 0.8368025667110103, "grad_norm": 0.0, "learning_rate": 1.3646691401433666e-06, "loss": 0.95, "step": 21387 }, { "epoch": 0.8368416934032397, "grad_norm": 0.0, "learning_rate": 1.3640301547322698e-06, "loss": 0.892, "step": 21388 }, { "epoch": 0.8368808200954692, "grad_norm": 0.0, "learning_rate": 1.363391308004025e-06, "loss": 0.9225, "step": 21389 }, { "epoch": 0.8369199467876985, "grad_norm": 0.0, "learning_rate": 1.3627525999688952e-06, "loss": 1.0656, "step": 21390 }, { "epoch": 0.836959073479928, "grad_norm": 0.0, "learning_rate": 1.3621140306371362e-06, "loss": 0.9826, "step": 21391 }, { "epoch": 0.8369982001721574, "grad_norm": 0.0, "learning_rate": 1.361475600019e-06, "loss": 0.837, "step": 21392 }, { "epoch": 0.8370373268643869, "grad_norm": 0.0, "learning_rate": 1.3608373081247417e-06, "loss": 0.9626, "step": 21393 }, { "epoch": 0.8370764535566163, "grad_norm": 0.0, "learning_rate": 1.3601991549646098e-06, "loss": 1.0288, "step": 21394 }, { "epoch": 0.8371155802488458, "grad_norm": 0.0, "learning_rate": 1.3595611405488563e-06, "loss": 0.9415, "step": 21395 }, { "epoch": 0.8371547069410752, "grad_norm": 0.0, "learning_rate": 1.3589232648877205e-06, "loss": 0.972, "step": 21396 }, { "epoch": 0.8371938336333047, "grad_norm": 0.0, "learning_rate": 1.358285527991453e-06, "loss": 0.896, "step": 21397 }, { "epoch": 0.8372329603255341, "grad_norm": 0.0, "learning_rate": 1.3576479298702849e-06, "loss": 0.9951, "step": 21398 }, { "epoch": 0.8372720870177636, "grad_norm": 0.0, "learning_rate": 1.3570104705344678e-06, "loss": 0.8919, "step": 21399 }, { "epoch": 0.837311213709993, "grad_norm": 0.0, "learning_rate": 1.356373149994229e-06, "loss": 1.0326, "step": 21400 }, { "epoch": 0.8373503404022224, "grad_norm": 0.0, "learning_rate": 1.3557359682598092e-06, "loss": 0.8878, "step": 21401 }, { "epoch": 0.8373894670944518, "grad_norm": 0.0, "learning_rate": 1.355098925341435e-06, "loss": 0.8502, "step": 21402 }, { "epoch": 0.8374285937866812, "grad_norm": 0.0, "learning_rate": 1.3544620212493397e-06, "loss": 0.9245, "step": 21403 }, { "epoch": 0.8374677204789107, "grad_norm": 0.0, "learning_rate": 1.3538252559937504e-06, "loss": 0.9426, "step": 21404 }, { "epoch": 0.8375068471711401, "grad_norm": 0.0, "learning_rate": 1.3531886295848961e-06, "loss": 0.9615, "step": 21405 }, { "epoch": 0.8375459738633696, "grad_norm": 0.0, "learning_rate": 1.3525521420329958e-06, "loss": 0.996, "step": 21406 }, { "epoch": 0.837585100555599, "grad_norm": 0.0, "learning_rate": 1.3519157933482707e-06, "loss": 0.8733, "step": 21407 }, { "epoch": 0.8376242272478285, "grad_norm": 0.0, "learning_rate": 1.351279583540942e-06, "loss": 0.9953, "step": 21408 }, { "epoch": 0.8376633539400579, "grad_norm": 0.0, "learning_rate": 1.350643512621228e-06, "loss": 0.7398, "step": 21409 }, { "epoch": 0.8377024806322874, "grad_norm": 0.0, "learning_rate": 1.3500075805993385e-06, "loss": 1.1234, "step": 21410 }, { "epoch": 0.8377416073245167, "grad_norm": 0.0, "learning_rate": 1.3493717874854905e-06, "loss": 0.8871, "step": 21411 }, { "epoch": 0.8377807340167462, "grad_norm": 0.0, "learning_rate": 1.3487361332898875e-06, "loss": 1.0182, "step": 21412 }, { "epoch": 0.8378198607089756, "grad_norm": 0.0, "learning_rate": 1.3481006180227462e-06, "loss": 0.8879, "step": 21413 }, { "epoch": 0.8378589874012051, "grad_norm": 0.0, "learning_rate": 1.3474652416942647e-06, "loss": 0.9882, "step": 21414 }, { "epoch": 0.8378981140934345, "grad_norm": 0.0, "learning_rate": 1.346830004314652e-06, "loss": 0.9634, "step": 21415 }, { "epoch": 0.837937240785664, "grad_norm": 0.0, "learning_rate": 1.3461949058941015e-06, "loss": 0.9529, "step": 21416 }, { "epoch": 0.8379763674778934, "grad_norm": 0.0, "learning_rate": 1.3455599464428215e-06, "loss": 0.928, "step": 21417 }, { "epoch": 0.8380154941701229, "grad_norm": 0.0, "learning_rate": 1.3449251259710017e-06, "loss": 1.0122, "step": 21418 }, { "epoch": 0.8380546208623523, "grad_norm": 0.0, "learning_rate": 1.3442904444888416e-06, "loss": 0.9988, "step": 21419 }, { "epoch": 0.8380937475545818, "grad_norm": 0.0, "learning_rate": 1.3436559020065288e-06, "loss": 0.9446, "step": 21420 }, { "epoch": 0.8381328742468112, "grad_norm": 0.0, "learning_rate": 1.343021498534255e-06, "loss": 0.9018, "step": 21421 }, { "epoch": 0.8381720009390407, "grad_norm": 0.0, "learning_rate": 1.3423872340822074e-06, "loss": 0.7943, "step": 21422 }, { "epoch": 0.83821112763127, "grad_norm": 0.0, "learning_rate": 1.3417531086605751e-06, "loss": 0.8997, "step": 21423 }, { "epoch": 0.8382502543234995, "grad_norm": 0.0, "learning_rate": 1.3411191222795362e-06, "loss": 0.9214, "step": 21424 }, { "epoch": 0.8382893810157289, "grad_norm": 0.0, "learning_rate": 1.3404852749492737e-06, "loss": 0.9912, "step": 21425 }, { "epoch": 0.8383285077079584, "grad_norm": 0.0, "learning_rate": 1.3398515666799673e-06, "loss": 0.9656, "step": 21426 }, { "epoch": 0.8383676344001878, "grad_norm": 0.0, "learning_rate": 1.3392179974817953e-06, "loss": 0.9534, "step": 21427 }, { "epoch": 0.8384067610924173, "grad_norm": 0.0, "learning_rate": 1.3385845673649268e-06, "loss": 1.0002, "step": 21428 }, { "epoch": 0.8384458877846467, "grad_norm": 0.0, "learning_rate": 1.3379512763395397e-06, "loss": 0.9452, "step": 21429 }, { "epoch": 0.8384850144768762, "grad_norm": 0.0, "learning_rate": 1.3373181244157972e-06, "loss": 0.9312, "step": 21430 }, { "epoch": 0.8385241411691056, "grad_norm": 0.0, "learning_rate": 1.3366851116038726e-06, "loss": 0.9343, "step": 21431 }, { "epoch": 0.838563267861335, "grad_norm": 0.0, "learning_rate": 1.3360522379139285e-06, "loss": 0.9856, "step": 21432 }, { "epoch": 0.8386023945535644, "grad_norm": 0.0, "learning_rate": 1.3354195033561313e-06, "loss": 0.9594, "step": 21433 }, { "epoch": 0.8386415212457938, "grad_norm": 0.0, "learning_rate": 1.3347869079406372e-06, "loss": 0.9205, "step": 21434 }, { "epoch": 0.8386806479380233, "grad_norm": 0.0, "learning_rate": 1.3341544516776072e-06, "loss": 0.9399, "step": 21435 }, { "epoch": 0.8387197746302527, "grad_norm": 0.0, "learning_rate": 1.3335221345771999e-06, "loss": 1.0016, "step": 21436 }, { "epoch": 0.8387589013224822, "grad_norm": 0.0, "learning_rate": 1.3328899566495656e-06, "loss": 0.8138, "step": 21437 }, { "epoch": 0.8387980280147116, "grad_norm": 0.0, "learning_rate": 1.3322579179048578e-06, "loss": 0.9218, "step": 21438 }, { "epoch": 0.8388371547069411, "grad_norm": 0.0, "learning_rate": 1.331626018353226e-06, "loss": 0.9634, "step": 21439 }, { "epoch": 0.8388762813991705, "grad_norm": 0.0, "learning_rate": 1.330994258004822e-06, "loss": 1.0264, "step": 21440 }, { "epoch": 0.8389154080914, "grad_norm": 0.0, "learning_rate": 1.330362636869783e-06, "loss": 0.9308, "step": 21441 }, { "epoch": 0.8389545347836294, "grad_norm": 0.0, "learning_rate": 1.3297311549582603e-06, "loss": 0.8918, "step": 21442 }, { "epoch": 0.8389936614758589, "grad_norm": 0.0, "learning_rate": 1.3290998122803856e-06, "loss": 1.0336, "step": 21443 }, { "epoch": 0.8390327881680882, "grad_norm": 0.0, "learning_rate": 1.3284686088463072e-06, "loss": 0.965, "step": 21444 }, { "epoch": 0.8390719148603177, "grad_norm": 0.0, "learning_rate": 1.327837544666155e-06, "loss": 0.9793, "step": 21445 }, { "epoch": 0.8391110415525471, "grad_norm": 0.0, "learning_rate": 1.3272066197500677e-06, "loss": 0.8042, "step": 21446 }, { "epoch": 0.8391501682447766, "grad_norm": 0.0, "learning_rate": 1.3265758341081692e-06, "loss": 0.885, "step": 21447 }, { "epoch": 0.839189294937006, "grad_norm": 0.0, "learning_rate": 1.325945187750599e-06, "loss": 0.9428, "step": 21448 }, { "epoch": 0.8392284216292355, "grad_norm": 0.0, "learning_rate": 1.3253146806874773e-06, "loss": 0.989, "step": 21449 }, { "epoch": 0.8392675483214649, "grad_norm": 0.0, "learning_rate": 1.3246843129289343e-06, "loss": 1.0248, "step": 21450 }, { "epoch": 0.8393066750136944, "grad_norm": 0.0, "learning_rate": 1.324054084485089e-06, "loss": 0.9412, "step": 21451 }, { "epoch": 0.8393458017059238, "grad_norm": 0.0, "learning_rate": 1.3234239953660633e-06, "loss": 1.0883, "step": 21452 }, { "epoch": 0.8393849283981533, "grad_norm": 0.0, "learning_rate": 1.3227940455819755e-06, "loss": 0.9247, "step": 21453 }, { "epoch": 0.8394240550903826, "grad_norm": 0.0, "learning_rate": 1.3221642351429442e-06, "loss": 0.9602, "step": 21454 }, { "epoch": 0.8394631817826121, "grad_norm": 0.0, "learning_rate": 1.3215345640590793e-06, "loss": 0.9654, "step": 21455 }, { "epoch": 0.8395023084748415, "grad_norm": 0.0, "learning_rate": 1.320905032340495e-06, "loss": 1.0444, "step": 21456 }, { "epoch": 0.839541435167071, "grad_norm": 0.0, "learning_rate": 1.3202756399972993e-06, "loss": 0.914, "step": 21457 }, { "epoch": 0.8395805618593004, "grad_norm": 0.0, "learning_rate": 1.3196463870396037e-06, "loss": 1.0861, "step": 21458 }, { "epoch": 0.8396196885515299, "grad_norm": 0.0, "learning_rate": 1.3190172734775075e-06, "loss": 1.1248, "step": 21459 }, { "epoch": 0.8396588152437593, "grad_norm": 0.0, "learning_rate": 1.3183882993211184e-06, "loss": 0.9155, "step": 21460 }, { "epoch": 0.8396979419359887, "grad_norm": 0.0, "learning_rate": 1.3177594645805304e-06, "loss": 0.9843, "step": 21461 }, { "epoch": 0.8397370686282182, "grad_norm": 0.0, "learning_rate": 1.3171307692658497e-06, "loss": 1.0757, "step": 21462 }, { "epoch": 0.8397761953204476, "grad_norm": 0.0, "learning_rate": 1.3165022133871664e-06, "loss": 0.9062, "step": 21463 }, { "epoch": 0.8398153220126771, "grad_norm": 0.0, "learning_rate": 1.3158737969545788e-06, "loss": 0.9041, "step": 21464 }, { "epoch": 0.8398544487049064, "grad_norm": 0.0, "learning_rate": 1.3152455199781723e-06, "loss": 0.9596, "step": 21465 }, { "epoch": 0.8398935753971359, "grad_norm": 0.0, "learning_rate": 1.3146173824680442e-06, "loss": 0.9767, "step": 21466 }, { "epoch": 0.8399327020893653, "grad_norm": 0.0, "learning_rate": 1.3139893844342756e-06, "loss": 0.8831, "step": 21467 }, { "epoch": 0.8399718287815948, "grad_norm": 0.0, "learning_rate": 1.3133615258869548e-06, "loss": 0.8931, "step": 21468 }, { "epoch": 0.8400109554738242, "grad_norm": 0.0, "learning_rate": 1.3127338068361617e-06, "loss": 0.8997, "step": 21469 }, { "epoch": 0.8400500821660537, "grad_norm": 0.0, "learning_rate": 1.312106227291977e-06, "loss": 0.8994, "step": 21470 }, { "epoch": 0.8400892088582831, "grad_norm": 0.0, "learning_rate": 1.3114787872644808e-06, "loss": 0.9154, "step": 21471 }, { "epoch": 0.8401283355505126, "grad_norm": 0.0, "learning_rate": 1.3108514867637489e-06, "loss": 0.8081, "step": 21472 }, { "epoch": 0.840167462242742, "grad_norm": 0.0, "learning_rate": 1.3102243257998526e-06, "loss": 0.8638, "step": 21473 }, { "epoch": 0.8402065889349715, "grad_norm": 0.0, "learning_rate": 1.3095973043828647e-06, "loss": 1.0236, "step": 21474 }, { "epoch": 0.8402457156272009, "grad_norm": 0.0, "learning_rate": 1.308970422522856e-06, "loss": 0.9713, "step": 21475 }, { "epoch": 0.8402848423194303, "grad_norm": 0.0, "learning_rate": 1.30834368022989e-06, "loss": 0.9161, "step": 21476 }, { "epoch": 0.8403239690116597, "grad_norm": 0.0, "learning_rate": 1.3077170775140336e-06, "loss": 0.9257, "step": 21477 }, { "epoch": 0.8403630957038892, "grad_norm": 0.0, "learning_rate": 1.3070906143853513e-06, "loss": 0.9605, "step": 21478 }, { "epoch": 0.8404022223961186, "grad_norm": 0.0, "learning_rate": 1.3064642908538983e-06, "loss": 1.0287, "step": 21479 }, { "epoch": 0.8404413490883481, "grad_norm": 0.0, "learning_rate": 1.3058381069297343e-06, "loss": 0.9376, "step": 21480 }, { "epoch": 0.8404804757805775, "grad_norm": 0.0, "learning_rate": 1.3052120626229192e-06, "loss": 1.0355, "step": 21481 }, { "epoch": 0.840519602472807, "grad_norm": 0.0, "learning_rate": 1.3045861579435003e-06, "loss": 0.9812, "step": 21482 }, { "epoch": 0.8405587291650364, "grad_norm": 0.0, "learning_rate": 1.3039603929015321e-06, "loss": 0.8158, "step": 21483 }, { "epoch": 0.8405978558572659, "grad_norm": 0.0, "learning_rate": 1.303334767507064e-06, "loss": 0.9676, "step": 21484 }, { "epoch": 0.8406369825494953, "grad_norm": 0.0, "learning_rate": 1.3027092817701437e-06, "loss": 0.834, "step": 21485 }, { "epoch": 0.8406761092417248, "grad_norm": 0.0, "learning_rate": 1.3020839357008108e-06, "loss": 0.9901, "step": 21486 }, { "epoch": 0.8407152359339541, "grad_norm": 0.0, "learning_rate": 1.3014587293091141e-06, "loss": 0.9773, "step": 21487 }, { "epoch": 0.8407543626261836, "grad_norm": 0.0, "learning_rate": 1.3008336626050854e-06, "loss": 1.0328, "step": 21488 }, { "epoch": 0.840793489318413, "grad_norm": 0.0, "learning_rate": 1.3002087355987726e-06, "loss": 0.9051, "step": 21489 }, { "epoch": 0.8408326160106424, "grad_norm": 0.0, "learning_rate": 1.2995839483002027e-06, "loss": 0.8431, "step": 21490 }, { "epoch": 0.8408717427028719, "grad_norm": 0.0, "learning_rate": 1.2989593007194157e-06, "loss": 0.8645, "step": 21491 }, { "epoch": 0.8409108693951013, "grad_norm": 0.0, "learning_rate": 1.2983347928664348e-06, "loss": 0.9247, "step": 21492 }, { "epoch": 0.8409499960873308, "grad_norm": 0.0, "learning_rate": 1.2977104247512973e-06, "loss": 0.9959, "step": 21493 }, { "epoch": 0.8409891227795602, "grad_norm": 0.0, "learning_rate": 1.297086196384023e-06, "loss": 1.0452, "step": 21494 }, { "epoch": 0.8410282494717897, "grad_norm": 0.0, "learning_rate": 1.296462107774642e-06, "loss": 0.9796, "step": 21495 }, { "epoch": 0.841067376164019, "grad_norm": 0.0, "learning_rate": 1.2958381589331714e-06, "loss": 0.9915, "step": 21496 }, { "epoch": 0.8411065028562486, "grad_norm": 0.0, "learning_rate": 1.2952143498696324e-06, "loss": 0.9948, "step": 21497 }, { "epoch": 0.8411456295484779, "grad_norm": 0.0, "learning_rate": 1.2945906805940434e-06, "loss": 0.9922, "step": 21498 }, { "epoch": 0.8411847562407074, "grad_norm": 0.0, "learning_rate": 1.2939671511164221e-06, "loss": 0.869, "step": 21499 }, { "epoch": 0.8412238829329368, "grad_norm": 0.0, "learning_rate": 1.293343761446777e-06, "loss": 0.9442, "step": 21500 }, { "epoch": 0.8412630096251663, "grad_norm": 0.0, "learning_rate": 1.2927205115951202e-06, "loss": 1.0633, "step": 21501 }, { "epoch": 0.8413021363173957, "grad_norm": 0.0, "learning_rate": 1.2920974015714617e-06, "loss": 1.0247, "step": 21502 }, { "epoch": 0.8413412630096252, "grad_norm": 0.0, "learning_rate": 1.2914744313858097e-06, "loss": 1.0535, "step": 21503 }, { "epoch": 0.8413803897018546, "grad_norm": 0.0, "learning_rate": 1.290851601048163e-06, "loss": 0.8448, "step": 21504 }, { "epoch": 0.8414195163940841, "grad_norm": 0.0, "learning_rate": 1.2902289105685272e-06, "loss": 1.0196, "step": 21505 }, { "epoch": 0.8414586430863135, "grad_norm": 0.0, "learning_rate": 1.289606359956901e-06, "loss": 1.0338, "step": 21506 }, { "epoch": 0.841497769778543, "grad_norm": 0.0, "learning_rate": 1.2889839492232836e-06, "loss": 0.9525, "step": 21507 }, { "epoch": 0.8415368964707723, "grad_norm": 0.0, "learning_rate": 1.2883616783776676e-06, "loss": 0.9671, "step": 21508 }, { "epoch": 0.8415760231630018, "grad_norm": 0.0, "learning_rate": 1.2877395474300481e-06, "loss": 0.9336, "step": 21509 }, { "epoch": 0.8416151498552312, "grad_norm": 0.0, "learning_rate": 1.2871175563904092e-06, "loss": 0.9341, "step": 21510 }, { "epoch": 0.8416542765474607, "grad_norm": 0.0, "learning_rate": 1.2864957052687499e-06, "loss": 1.0349, "step": 21511 }, { "epoch": 0.8416934032396901, "grad_norm": 0.0, "learning_rate": 1.2858739940750485e-06, "loss": 1.0294, "step": 21512 }, { "epoch": 0.8417325299319196, "grad_norm": 0.0, "learning_rate": 1.285252422819293e-06, "loss": 0.9298, "step": 21513 }, { "epoch": 0.841771656624149, "grad_norm": 0.0, "learning_rate": 1.2846309915114618e-06, "loss": 0.9327, "step": 21514 }, { "epoch": 0.8418107833163785, "grad_norm": 0.0, "learning_rate": 1.284009700161536e-06, "loss": 0.8663, "step": 21515 }, { "epoch": 0.8418499100086079, "grad_norm": 0.0, "learning_rate": 1.283388548779493e-06, "loss": 0.9008, "step": 21516 }, { "epoch": 0.8418890367008373, "grad_norm": 0.0, "learning_rate": 1.282767537375309e-06, "loss": 0.8813, "step": 21517 }, { "epoch": 0.8419281633930668, "grad_norm": 0.0, "learning_rate": 1.282146665958952e-06, "loss": 0.9274, "step": 21518 }, { "epoch": 0.8419672900852961, "grad_norm": 0.0, "learning_rate": 1.2815259345403975e-06, "loss": 0.9301, "step": 21519 }, { "epoch": 0.8420064167775256, "grad_norm": 0.0, "learning_rate": 1.2809053431296127e-06, "loss": 0.8674, "step": 21520 }, { "epoch": 0.842045543469755, "grad_norm": 0.0, "learning_rate": 1.2802848917365595e-06, "loss": 0.952, "step": 21521 }, { "epoch": 0.8420846701619845, "grad_norm": 0.0, "learning_rate": 1.2796645803712048e-06, "loss": 1.0419, "step": 21522 }, { "epoch": 0.8421237968542139, "grad_norm": 0.0, "learning_rate": 1.2790444090435106e-06, "loss": 0.9639, "step": 21523 }, { "epoch": 0.8421629235464434, "grad_norm": 0.0, "learning_rate": 1.2784243777634375e-06, "loss": 0.9478, "step": 21524 }, { "epoch": 0.8422020502386728, "grad_norm": 0.0, "learning_rate": 1.2778044865409377e-06, "loss": 0.9247, "step": 21525 }, { "epoch": 0.8422411769309023, "grad_norm": 0.0, "learning_rate": 1.277184735385968e-06, "loss": 1.0541, "step": 21526 }, { "epoch": 0.8422803036231317, "grad_norm": 0.0, "learning_rate": 1.2765651243084831e-06, "loss": 0.9921, "step": 21527 }, { "epoch": 0.8423194303153612, "grad_norm": 0.0, "learning_rate": 1.27594565331843e-06, "loss": 0.9818, "step": 21528 }, { "epoch": 0.8423585570075905, "grad_norm": 0.0, "learning_rate": 1.2753263224257572e-06, "loss": 0.9242, "step": 21529 }, { "epoch": 0.84239768369982, "grad_norm": 0.0, "learning_rate": 1.2747071316404126e-06, "loss": 0.8429, "step": 21530 }, { "epoch": 0.8424368103920494, "grad_norm": 0.0, "learning_rate": 1.2740880809723366e-06, "loss": 0.8281, "step": 21531 }, { "epoch": 0.8424759370842789, "grad_norm": 0.0, "learning_rate": 1.2734691704314717e-06, "loss": 0.9346, "step": 21532 }, { "epoch": 0.8425150637765083, "grad_norm": 0.0, "learning_rate": 1.2728504000277575e-06, "loss": 0.949, "step": 21533 }, { "epoch": 0.8425541904687378, "grad_norm": 0.0, "learning_rate": 1.2722317697711318e-06, "loss": 0.9434, "step": 21534 }, { "epoch": 0.8425933171609672, "grad_norm": 0.0, "learning_rate": 1.2716132796715252e-06, "loss": 1.0457, "step": 21535 }, { "epoch": 0.8426324438531967, "grad_norm": 0.0, "learning_rate": 1.2709949297388746e-06, "loss": 0.8623, "step": 21536 }, { "epoch": 0.8426715705454261, "grad_norm": 0.0, "learning_rate": 1.270376719983103e-06, "loss": 0.9507, "step": 21537 }, { "epoch": 0.8427106972376556, "grad_norm": 0.0, "learning_rate": 1.2697586504141458e-06, "loss": 0.8999, "step": 21538 }, { "epoch": 0.842749823929885, "grad_norm": 0.0, "learning_rate": 1.2691407210419238e-06, "loss": 1.021, "step": 21539 }, { "epoch": 0.8427889506221145, "grad_norm": 0.0, "learning_rate": 1.2685229318763637e-06, "loss": 1.0633, "step": 21540 }, { "epoch": 0.8428280773143438, "grad_norm": 0.0, "learning_rate": 1.2679052829273796e-06, "loss": 1.0002, "step": 21541 }, { "epoch": 0.8428672040065733, "grad_norm": 0.0, "learning_rate": 1.2672877742048984e-06, "loss": 0.8498, "step": 21542 }, { "epoch": 0.8429063306988027, "grad_norm": 0.0, "learning_rate": 1.2666704057188317e-06, "loss": 1.0337, "step": 21543 }, { "epoch": 0.8429454573910322, "grad_norm": 0.0, "learning_rate": 1.2660531774790964e-06, "loss": 0.8978, "step": 21544 }, { "epoch": 0.8429845840832616, "grad_norm": 0.0, "learning_rate": 1.2654360894956008e-06, "loss": 0.9748, "step": 21545 }, { "epoch": 0.843023710775491, "grad_norm": 0.0, "learning_rate": 1.2648191417782562e-06, "loss": 1.0693, "step": 21546 }, { "epoch": 0.8430628374677205, "grad_norm": 0.0, "learning_rate": 1.2642023343369714e-06, "loss": 0.9349, "step": 21547 }, { "epoch": 0.8431019641599499, "grad_norm": 0.0, "learning_rate": 1.2635856671816516e-06, "loss": 1.0772, "step": 21548 }, { "epoch": 0.8431410908521794, "grad_norm": 0.0, "learning_rate": 1.2629691403221977e-06, "loss": 1.0145, "step": 21549 }, { "epoch": 0.8431802175444087, "grad_norm": 0.0, "learning_rate": 1.2623527537685098e-06, "loss": 1.0427, "step": 21550 }, { "epoch": 0.8432193442366382, "grad_norm": 0.0, "learning_rate": 1.2617365075304888e-06, "loss": 0.9173, "step": 21551 }, { "epoch": 0.8432584709288676, "grad_norm": 0.0, "learning_rate": 1.2611204016180324e-06, "loss": 0.8784, "step": 21552 }, { "epoch": 0.8432975976210971, "grad_norm": 0.0, "learning_rate": 1.260504436041029e-06, "loss": 1.0945, "step": 21553 }, { "epoch": 0.8433367243133265, "grad_norm": 0.0, "learning_rate": 1.2598886108093755e-06, "loss": 0.9542, "step": 21554 }, { "epoch": 0.843375851005556, "grad_norm": 0.0, "learning_rate": 1.259272925932954e-06, "loss": 1.0022, "step": 21555 }, { "epoch": 0.8434149776977854, "grad_norm": 0.0, "learning_rate": 1.2586573814216619e-06, "loss": 0.8386, "step": 21556 }, { "epoch": 0.8434541043900149, "grad_norm": 0.0, "learning_rate": 1.2580419772853758e-06, "loss": 0.8245, "step": 21557 }, { "epoch": 0.8434932310822443, "grad_norm": 0.0, "learning_rate": 1.2574267135339836e-06, "loss": 0.9692, "step": 21558 }, { "epoch": 0.8435323577744738, "grad_norm": 0.0, "learning_rate": 1.2568115901773593e-06, "loss": 0.9474, "step": 21559 }, { "epoch": 0.8435714844667032, "grad_norm": 0.0, "learning_rate": 1.2561966072253896e-06, "loss": 0.9585, "step": 21560 }, { "epoch": 0.8436106111589327, "grad_norm": 0.0, "learning_rate": 1.255581764687943e-06, "loss": 0.8791, "step": 21561 }, { "epoch": 0.843649737851162, "grad_norm": 0.0, "learning_rate": 1.2549670625748988e-06, "loss": 0.9742, "step": 21562 }, { "epoch": 0.8436888645433915, "grad_norm": 0.0, "learning_rate": 1.2543525008961222e-06, "loss": 0.8661, "step": 21563 }, { "epoch": 0.8437279912356209, "grad_norm": 0.0, "learning_rate": 1.2537380796614863e-06, "loss": 0.9659, "step": 21564 }, { "epoch": 0.8437671179278504, "grad_norm": 0.0, "learning_rate": 1.2531237988808565e-06, "loss": 0.9773, "step": 21565 }, { "epoch": 0.8438062446200798, "grad_norm": 0.0, "learning_rate": 1.2525096585641006e-06, "loss": 0.8665, "step": 21566 }, { "epoch": 0.8438453713123093, "grad_norm": 0.0, "learning_rate": 1.2518956587210761e-06, "loss": 0.9651, "step": 21567 }, { "epoch": 0.8438844980045387, "grad_norm": 0.0, "learning_rate": 1.2512817993616455e-06, "loss": 0.9586, "step": 21568 }, { "epoch": 0.8439236246967682, "grad_norm": 0.0, "learning_rate": 1.2506680804956695e-06, "loss": 0.8979, "step": 21569 }, { "epoch": 0.8439627513889976, "grad_norm": 0.0, "learning_rate": 1.250054502132997e-06, "loss": 0.9964, "step": 21570 }, { "epoch": 0.8440018780812271, "grad_norm": 0.0, "learning_rate": 1.2494410642834853e-06, "loss": 0.7969, "step": 21571 }, { "epoch": 0.8440410047734564, "grad_norm": 0.0, "learning_rate": 1.248827766956986e-06, "loss": 1.0448, "step": 21572 }, { "epoch": 0.844080131465686, "grad_norm": 0.0, "learning_rate": 1.2482146101633474e-06, "loss": 0.8784, "step": 21573 }, { "epoch": 0.8441192581579153, "grad_norm": 0.0, "learning_rate": 1.2476015939124142e-06, "loss": 0.9784, "step": 21574 }, { "epoch": 0.8441583848501447, "grad_norm": 0.0, "learning_rate": 1.246988718214035e-06, "loss": 1.0677, "step": 21575 }, { "epoch": 0.8441975115423742, "grad_norm": 0.0, "learning_rate": 1.2463759830780453e-06, "loss": 1.0263, "step": 21576 }, { "epoch": 0.8442366382346036, "grad_norm": 0.0, "learning_rate": 1.2457633885142896e-06, "loss": 0.7995, "step": 21577 }, { "epoch": 0.8442757649268331, "grad_norm": 0.0, "learning_rate": 1.245150934532603e-06, "loss": 0.9853, "step": 21578 }, { "epoch": 0.8443148916190625, "grad_norm": 0.0, "learning_rate": 1.2445386211428256e-06, "loss": 0.9158, "step": 21579 }, { "epoch": 0.844354018311292, "grad_norm": 0.0, "learning_rate": 1.243926448354783e-06, "loss": 0.8724, "step": 21580 }, { "epoch": 0.8443931450035214, "grad_norm": 0.0, "learning_rate": 1.2433144161783106e-06, "loss": 1.0495, "step": 21581 }, { "epoch": 0.8444322716957509, "grad_norm": 0.0, "learning_rate": 1.242702524623236e-06, "loss": 0.9652, "step": 21582 }, { "epoch": 0.8444713983879802, "grad_norm": 0.0, "learning_rate": 1.242090773699387e-06, "loss": 0.9393, "step": 21583 }, { "epoch": 0.8445105250802097, "grad_norm": 0.0, "learning_rate": 1.2414791634165846e-06, "loss": 0.8979, "step": 21584 }, { "epoch": 0.8445496517724391, "grad_norm": 0.0, "learning_rate": 1.240867693784653e-06, "loss": 0.8757, "step": 21585 }, { "epoch": 0.8445887784646686, "grad_norm": 0.0, "learning_rate": 1.2402563648134059e-06, "loss": 0.9905, "step": 21586 }, { "epoch": 0.844627905156898, "grad_norm": 0.0, "learning_rate": 1.2396451765126704e-06, "loss": 0.9481, "step": 21587 }, { "epoch": 0.8446670318491275, "grad_norm": 0.0, "learning_rate": 1.2390341288922535e-06, "loss": 0.9841, "step": 21588 }, { "epoch": 0.8447061585413569, "grad_norm": 0.0, "learning_rate": 1.2384232219619719e-06, "loss": 1.0276, "step": 21589 }, { "epoch": 0.8447452852335864, "grad_norm": 0.0, "learning_rate": 1.2378124557316306e-06, "loss": 0.7226, "step": 21590 }, { "epoch": 0.8447844119258158, "grad_norm": 0.0, "learning_rate": 1.2372018302110466e-06, "loss": 0.9857, "step": 21591 }, { "epoch": 0.8448235386180453, "grad_norm": 0.0, "learning_rate": 1.2365913454100175e-06, "loss": 0.9023, "step": 21592 }, { "epoch": 0.8448626653102747, "grad_norm": 0.0, "learning_rate": 1.2359810013383534e-06, "loss": 0.9929, "step": 21593 }, { "epoch": 0.8449017920025041, "grad_norm": 0.0, "learning_rate": 1.2353707980058494e-06, "loss": 0.9429, "step": 21594 }, { "epoch": 0.8449409186947335, "grad_norm": 0.0, "learning_rate": 1.234760735422309e-06, "loss": 0.7998, "step": 21595 }, { "epoch": 0.844980045386963, "grad_norm": 0.0, "learning_rate": 1.2341508135975266e-06, "loss": 0.896, "step": 21596 }, { "epoch": 0.8450191720791924, "grad_norm": 0.0, "learning_rate": 1.2335410325413e-06, "loss": 0.8639, "step": 21597 }, { "epoch": 0.8450582987714219, "grad_norm": 0.0, "learning_rate": 1.2329313922634178e-06, "loss": 0.9272, "step": 21598 }, { "epoch": 0.8450974254636513, "grad_norm": 0.0, "learning_rate": 1.2323218927736713e-06, "loss": 0.8461, "step": 21599 }, { "epoch": 0.8451365521558808, "grad_norm": 0.0, "learning_rate": 1.2317125340818492e-06, "loss": 1.0847, "step": 21600 }, { "epoch": 0.8451756788481102, "grad_norm": 0.0, "learning_rate": 1.231103316197738e-06, "loss": 1.0121, "step": 21601 }, { "epoch": 0.8452148055403396, "grad_norm": 0.0, "learning_rate": 1.230494239131118e-06, "loss": 0.9466, "step": 21602 }, { "epoch": 0.8452539322325691, "grad_norm": 0.0, "learning_rate": 1.2298853028917733e-06, "loss": 1.0027, "step": 21603 }, { "epoch": 0.8452930589247984, "grad_norm": 0.0, "learning_rate": 1.2292765074894775e-06, "loss": 0.9543, "step": 21604 }, { "epoch": 0.8453321856170279, "grad_norm": 0.0, "learning_rate": 1.2286678529340146e-06, "loss": 1.1312, "step": 21605 }, { "epoch": 0.8453713123092573, "grad_norm": 0.0, "learning_rate": 1.2280593392351526e-06, "loss": 0.9736, "step": 21606 }, { "epoch": 0.8454104390014868, "grad_norm": 0.0, "learning_rate": 1.227450966402668e-06, "loss": 0.8778, "step": 21607 }, { "epoch": 0.8454495656937162, "grad_norm": 0.0, "learning_rate": 1.2268427344463263e-06, "loss": 0.8939, "step": 21608 }, { "epoch": 0.8454886923859457, "grad_norm": 0.0, "learning_rate": 1.2262346433758965e-06, "loss": 0.953, "step": 21609 }, { "epoch": 0.8455278190781751, "grad_norm": 0.0, "learning_rate": 1.225626693201144e-06, "loss": 1.0069, "step": 21610 }, { "epoch": 0.8455669457704046, "grad_norm": 0.0, "learning_rate": 1.2250188839318344e-06, "loss": 0.9406, "step": 21611 }, { "epoch": 0.845606072462634, "grad_norm": 0.0, "learning_rate": 1.2244112155777243e-06, "loss": 0.8751, "step": 21612 }, { "epoch": 0.8456451991548635, "grad_norm": 0.0, "learning_rate": 1.2238036881485726e-06, "loss": 0.9179, "step": 21613 }, { "epoch": 0.8456843258470929, "grad_norm": 0.0, "learning_rate": 1.2231963016541392e-06, "loss": 0.9344, "step": 21614 }, { "epoch": 0.8457234525393224, "grad_norm": 0.0, "learning_rate": 1.222589056104173e-06, "loss": 0.9376, "step": 21615 }, { "epoch": 0.8457625792315517, "grad_norm": 0.0, "learning_rate": 1.2219819515084275e-06, "loss": 1.0126, "step": 21616 }, { "epoch": 0.8458017059237812, "grad_norm": 0.0, "learning_rate": 1.2213749878766533e-06, "loss": 0.8926, "step": 21617 }, { "epoch": 0.8458408326160106, "grad_norm": 0.0, "learning_rate": 1.2207681652185977e-06, "loss": 0.9418, "step": 21618 }, { "epoch": 0.8458799593082401, "grad_norm": 0.0, "learning_rate": 1.2201614835440034e-06, "loss": 1.0355, "step": 21619 }, { "epoch": 0.8459190860004695, "grad_norm": 0.0, "learning_rate": 1.2195549428626151e-06, "loss": 0.8718, "step": 21620 }, { "epoch": 0.845958212692699, "grad_norm": 0.0, "learning_rate": 1.2189485431841675e-06, "loss": 1.0058, "step": 21621 }, { "epoch": 0.8459973393849284, "grad_norm": 0.0, "learning_rate": 1.2183422845184078e-06, "loss": 0.8901, "step": 21622 }, { "epoch": 0.8460364660771579, "grad_norm": 0.0, "learning_rate": 1.2177361668750641e-06, "loss": 0.912, "step": 21623 }, { "epoch": 0.8460755927693873, "grad_norm": 0.0, "learning_rate": 1.2171301902638766e-06, "loss": 0.9866, "step": 21624 }, { "epoch": 0.8461147194616168, "grad_norm": 0.0, "learning_rate": 1.2165243546945693e-06, "loss": 1.0374, "step": 21625 }, { "epoch": 0.8461538461538461, "grad_norm": 0.0, "learning_rate": 1.2159186601768747e-06, "loss": 0.9292, "step": 21626 }, { "epoch": 0.8461929728460756, "grad_norm": 0.0, "learning_rate": 1.2153131067205192e-06, "loss": 1.0384, "step": 21627 }, { "epoch": 0.846232099538305, "grad_norm": 0.0, "learning_rate": 1.2147076943352298e-06, "loss": 1.0052, "step": 21628 }, { "epoch": 0.8462712262305345, "grad_norm": 0.0, "learning_rate": 1.2141024230307242e-06, "loss": 1.0285, "step": 21629 }, { "epoch": 0.8463103529227639, "grad_norm": 0.0, "learning_rate": 1.2134972928167232e-06, "loss": 0.9726, "step": 21630 }, { "epoch": 0.8463494796149933, "grad_norm": 0.0, "learning_rate": 1.2128923037029471e-06, "loss": 0.8402, "step": 21631 }, { "epoch": 0.8463886063072228, "grad_norm": 0.0, "learning_rate": 1.2122874556991115e-06, "loss": 1.0827, "step": 21632 }, { "epoch": 0.8464277329994522, "grad_norm": 0.0, "learning_rate": 1.2116827488149252e-06, "loss": 1.1321, "step": 21633 }, { "epoch": 0.8464668596916817, "grad_norm": 0.0, "learning_rate": 1.2110781830601036e-06, "loss": 0.9369, "step": 21634 }, { "epoch": 0.8465059863839111, "grad_norm": 0.0, "learning_rate": 1.2104737584443492e-06, "loss": 1.0967, "step": 21635 }, { "epoch": 0.8465451130761406, "grad_norm": 0.0, "learning_rate": 1.2098694749773766e-06, "loss": 1.0433, "step": 21636 }, { "epoch": 0.8465842397683699, "grad_norm": 0.0, "learning_rate": 1.2092653326688842e-06, "loss": 0.928, "step": 21637 }, { "epoch": 0.8466233664605994, "grad_norm": 0.0, "learning_rate": 1.2086613315285756e-06, "loss": 1.0097, "step": 21638 }, { "epoch": 0.8466624931528288, "grad_norm": 0.0, "learning_rate": 1.2080574715661474e-06, "loss": 0.9939, "step": 21639 }, { "epoch": 0.8467016198450583, "grad_norm": 0.0, "learning_rate": 1.2074537527913022e-06, "loss": 1.0314, "step": 21640 }, { "epoch": 0.8467407465372877, "grad_norm": 0.0, "learning_rate": 1.2068501752137308e-06, "loss": 0.9659, "step": 21641 }, { "epoch": 0.8467798732295172, "grad_norm": 0.0, "learning_rate": 1.2062467388431287e-06, "loss": 1.0805, "step": 21642 }, { "epoch": 0.8468189999217466, "grad_norm": 0.0, "learning_rate": 1.205643443689183e-06, "loss": 0.9074, "step": 21643 }, { "epoch": 0.8468581266139761, "grad_norm": 0.0, "learning_rate": 1.2050402897615832e-06, "loss": 1.0882, "step": 21644 }, { "epoch": 0.8468972533062055, "grad_norm": 0.0, "learning_rate": 1.2044372770700162e-06, "loss": 0.912, "step": 21645 }, { "epoch": 0.846936379998435, "grad_norm": 0.0, "learning_rate": 1.203834405624167e-06, "loss": 1.0241, "step": 21646 }, { "epoch": 0.8469755066906643, "grad_norm": 0.0, "learning_rate": 1.2032316754337126e-06, "loss": 1.0231, "step": 21647 }, { "epoch": 0.8470146333828938, "grad_norm": 0.0, "learning_rate": 1.2026290865083356e-06, "loss": 0.975, "step": 21648 }, { "epoch": 0.8470537600751232, "grad_norm": 0.0, "learning_rate": 1.2020266388577106e-06, "loss": 0.8468, "step": 21649 }, { "epoch": 0.8470928867673527, "grad_norm": 0.0, "learning_rate": 1.2014243324915154e-06, "loss": 0.8462, "step": 21650 }, { "epoch": 0.8471320134595821, "grad_norm": 0.0, "learning_rate": 1.2008221674194188e-06, "loss": 0.8982, "step": 21651 }, { "epoch": 0.8471711401518116, "grad_norm": 0.0, "learning_rate": 1.200220143651094e-06, "loss": 1.022, "step": 21652 }, { "epoch": 0.847210266844041, "grad_norm": 0.0, "learning_rate": 1.1996182611962048e-06, "loss": 0.9893, "step": 21653 }, { "epoch": 0.8472493935362705, "grad_norm": 0.0, "learning_rate": 1.1990165200644188e-06, "loss": 0.8889, "step": 21654 }, { "epoch": 0.8472885202284999, "grad_norm": 0.0, "learning_rate": 1.1984149202653995e-06, "loss": 0.9989, "step": 21655 }, { "epoch": 0.8473276469207294, "grad_norm": 0.0, "learning_rate": 1.19781346180881e-06, "loss": 0.9477, "step": 21656 }, { "epoch": 0.8473667736129588, "grad_norm": 0.0, "learning_rate": 1.1972121447043038e-06, "loss": 0.9231, "step": 21657 }, { "epoch": 0.8474059003051883, "grad_norm": 0.0, "learning_rate": 1.1966109689615402e-06, "loss": 1.0108, "step": 21658 }, { "epoch": 0.8474450269974176, "grad_norm": 0.0, "learning_rate": 1.1960099345901743e-06, "loss": 0.959, "step": 21659 }, { "epoch": 0.847484153689647, "grad_norm": 0.0, "learning_rate": 1.1954090415998566e-06, "loss": 1.0513, "step": 21660 }, { "epoch": 0.8475232803818765, "grad_norm": 0.0, "learning_rate": 1.1948082900002357e-06, "loss": 0.9099, "step": 21661 }, { "epoch": 0.8475624070741059, "grad_norm": 0.0, "learning_rate": 1.1942076798009604e-06, "loss": 1.0613, "step": 21662 }, { "epoch": 0.8476015337663354, "grad_norm": 0.0, "learning_rate": 1.1936072110116781e-06, "loss": 1.0781, "step": 21663 }, { "epoch": 0.8476406604585648, "grad_norm": 0.0, "learning_rate": 1.1930068836420261e-06, "loss": 1.0333, "step": 21664 }, { "epoch": 0.8476797871507943, "grad_norm": 0.0, "learning_rate": 1.1924066977016502e-06, "loss": 0.9189, "step": 21665 }, { "epoch": 0.8477189138430237, "grad_norm": 0.0, "learning_rate": 1.1918066532001815e-06, "loss": 1.0473, "step": 21666 }, { "epoch": 0.8477580405352532, "grad_norm": 0.0, "learning_rate": 1.1912067501472656e-06, "loss": 0.8327, "step": 21667 }, { "epoch": 0.8477971672274826, "grad_norm": 0.0, "learning_rate": 1.1906069885525285e-06, "loss": 0.9745, "step": 21668 }, { "epoch": 0.847836293919712, "grad_norm": 0.0, "learning_rate": 1.1900073684256075e-06, "loss": 0.9565, "step": 21669 }, { "epoch": 0.8478754206119414, "grad_norm": 0.0, "learning_rate": 1.1894078897761252e-06, "loss": 0.9208, "step": 21670 }, { "epoch": 0.8479145473041709, "grad_norm": 0.0, "learning_rate": 1.1888085526137127e-06, "loss": 0.8953, "step": 21671 }, { "epoch": 0.8479536739964003, "grad_norm": 0.0, "learning_rate": 1.1882093569479935e-06, "loss": 0.8307, "step": 21672 }, { "epoch": 0.8479928006886298, "grad_norm": 0.0, "learning_rate": 1.1876103027885931e-06, "loss": 0.9881, "step": 21673 }, { "epoch": 0.8480319273808592, "grad_norm": 0.0, "learning_rate": 1.1870113901451264e-06, "loss": 0.8745, "step": 21674 }, { "epoch": 0.8480710540730887, "grad_norm": 0.0, "learning_rate": 1.186412619027213e-06, "loss": 0.8979, "step": 21675 }, { "epoch": 0.8481101807653181, "grad_norm": 0.0, "learning_rate": 1.1858139894444686e-06, "loss": 1.0461, "step": 21676 }, { "epoch": 0.8481493074575476, "grad_norm": 0.0, "learning_rate": 1.1852155014065092e-06, "loss": 0.822, "step": 21677 }, { "epoch": 0.848188434149777, "grad_norm": 0.0, "learning_rate": 1.1846171549229413e-06, "loss": 0.8773, "step": 21678 }, { "epoch": 0.8482275608420065, "grad_norm": 0.0, "learning_rate": 1.184018950003376e-06, "loss": 0.9919, "step": 21679 }, { "epoch": 0.8482666875342358, "grad_norm": 0.0, "learning_rate": 1.1834208866574182e-06, "loss": 1.0306, "step": 21680 }, { "epoch": 0.8483058142264653, "grad_norm": 0.0, "learning_rate": 1.1828229648946764e-06, "loss": 0.8862, "step": 21681 }, { "epoch": 0.8483449409186947, "grad_norm": 0.0, "learning_rate": 1.1822251847247468e-06, "loss": 0.827, "step": 21682 }, { "epoch": 0.8483840676109242, "grad_norm": 0.0, "learning_rate": 1.1816275461572335e-06, "loss": 0.8809, "step": 21683 }, { "epoch": 0.8484231943031536, "grad_norm": 0.0, "learning_rate": 1.1810300492017269e-06, "loss": 1.0005, "step": 21684 }, { "epoch": 0.8484623209953831, "grad_norm": 0.0, "learning_rate": 1.1804326938678323e-06, "loss": 1.0257, "step": 21685 }, { "epoch": 0.8485014476876125, "grad_norm": 0.0, "learning_rate": 1.1798354801651336e-06, "loss": 0.9791, "step": 21686 }, { "epoch": 0.848540574379842, "grad_norm": 0.0, "learning_rate": 1.1792384081032282e-06, "loss": 0.9125, "step": 21687 }, { "epoch": 0.8485797010720714, "grad_norm": 0.0, "learning_rate": 1.1786414776916967e-06, "loss": 0.8569, "step": 21688 }, { "epoch": 0.8486188277643008, "grad_norm": 0.0, "learning_rate": 1.178044688940132e-06, "loss": 0.8896, "step": 21689 }, { "epoch": 0.8486579544565303, "grad_norm": 0.0, "learning_rate": 1.1774480418581147e-06, "loss": 1.0187, "step": 21690 }, { "epoch": 0.8486970811487596, "grad_norm": 0.0, "learning_rate": 1.176851536455228e-06, "loss": 1.0159, "step": 21691 }, { "epoch": 0.8487362078409891, "grad_norm": 0.0, "learning_rate": 1.1762551727410476e-06, "loss": 1.0655, "step": 21692 }, { "epoch": 0.8487753345332185, "grad_norm": 0.0, "learning_rate": 1.1756589507251515e-06, "loss": 0.9449, "step": 21693 }, { "epoch": 0.848814461225448, "grad_norm": 0.0, "learning_rate": 1.1750628704171163e-06, "loss": 0.9482, "step": 21694 }, { "epoch": 0.8488535879176774, "grad_norm": 0.0, "learning_rate": 1.1744669318265146e-06, "loss": 0.9653, "step": 21695 }, { "epoch": 0.8488927146099069, "grad_norm": 0.0, "learning_rate": 1.1738711349629128e-06, "loss": 0.9409, "step": 21696 }, { "epoch": 0.8489318413021363, "grad_norm": 0.0, "learning_rate": 1.1732754798358815e-06, "loss": 0.9282, "step": 21697 }, { "epoch": 0.8489709679943658, "grad_norm": 0.0, "learning_rate": 1.1726799664549871e-06, "loss": 0.8943, "step": 21698 }, { "epoch": 0.8490100946865952, "grad_norm": 0.0, "learning_rate": 1.1720845948297888e-06, "loss": 0.9973, "step": 21699 }, { "epoch": 0.8490492213788247, "grad_norm": 0.0, "learning_rate": 1.17148936496985e-06, "loss": 0.8541, "step": 21700 }, { "epoch": 0.849088348071054, "grad_norm": 0.0, "learning_rate": 1.170894276884732e-06, "loss": 0.7665, "step": 21701 }, { "epoch": 0.8491274747632835, "grad_norm": 0.0, "learning_rate": 1.170299330583986e-06, "loss": 1.0473, "step": 21702 }, { "epoch": 0.8491666014555129, "grad_norm": 0.0, "learning_rate": 1.1697045260771688e-06, "loss": 1.0124, "step": 21703 }, { "epoch": 0.8492057281477424, "grad_norm": 0.0, "learning_rate": 1.169109863373833e-06, "loss": 1.0514, "step": 21704 }, { "epoch": 0.8492448548399718, "grad_norm": 0.0, "learning_rate": 1.168515342483526e-06, "loss": 1.0363, "step": 21705 }, { "epoch": 0.8492839815322013, "grad_norm": 0.0, "learning_rate": 1.1679209634157962e-06, "loss": 0.9389, "step": 21706 }, { "epoch": 0.8493231082244307, "grad_norm": 0.0, "learning_rate": 1.167326726180188e-06, "loss": 0.9657, "step": 21707 }, { "epoch": 0.8493622349166602, "grad_norm": 0.0, "learning_rate": 1.166732630786247e-06, "loss": 0.9997, "step": 21708 }, { "epoch": 0.8494013616088896, "grad_norm": 0.0, "learning_rate": 1.166138677243508e-06, "loss": 0.9526, "step": 21709 }, { "epoch": 0.8494404883011191, "grad_norm": 0.0, "learning_rate": 1.1655448655615165e-06, "loss": 0.9326, "step": 21710 }, { "epoch": 0.8494796149933485, "grad_norm": 0.0, "learning_rate": 1.1649511957497984e-06, "loss": 0.9146, "step": 21711 }, { "epoch": 0.849518741685578, "grad_norm": 0.0, "learning_rate": 1.164357667817898e-06, "loss": 1.0443, "step": 21712 }, { "epoch": 0.8495578683778073, "grad_norm": 0.0, "learning_rate": 1.1637642817753391e-06, "loss": 0.9518, "step": 21713 }, { "epoch": 0.8495969950700368, "grad_norm": 0.0, "learning_rate": 1.1631710376316562e-06, "loss": 0.9564, "step": 21714 }, { "epoch": 0.8496361217622662, "grad_norm": 0.0, "learning_rate": 1.1625779353963695e-06, "loss": 1.0307, "step": 21715 }, { "epoch": 0.8496752484544956, "grad_norm": 0.0, "learning_rate": 1.1619849750790113e-06, "loss": 0.9352, "step": 21716 }, { "epoch": 0.8497143751467251, "grad_norm": 0.0, "learning_rate": 1.1613921566890972e-06, "loss": 0.9641, "step": 21717 }, { "epoch": 0.8497535018389545, "grad_norm": 0.0, "learning_rate": 1.160799480236151e-06, "loss": 0.923, "step": 21718 }, { "epoch": 0.849792628531184, "grad_norm": 0.0, "learning_rate": 1.1602069457296882e-06, "loss": 0.9972, "step": 21719 }, { "epoch": 0.8498317552234134, "grad_norm": 0.0, "learning_rate": 1.1596145531792246e-06, "loss": 0.96, "step": 21720 }, { "epoch": 0.8498708819156429, "grad_norm": 0.0, "learning_rate": 1.1590223025942737e-06, "loss": 0.9021, "step": 21721 }, { "epoch": 0.8499100086078722, "grad_norm": 0.0, "learning_rate": 1.158430193984348e-06, "loss": 0.9256, "step": 21722 }, { "epoch": 0.8499491353001017, "grad_norm": 0.0, "learning_rate": 1.1578382273589516e-06, "loss": 0.9451, "step": 21723 }, { "epoch": 0.8499882619923311, "grad_norm": 0.0, "learning_rate": 1.1572464027275942e-06, "loss": 0.9539, "step": 21724 }, { "epoch": 0.8500273886845606, "grad_norm": 0.0, "learning_rate": 1.1566547200997792e-06, "loss": 0.8078, "step": 21725 }, { "epoch": 0.85006651537679, "grad_norm": 0.0, "learning_rate": 1.1560631794850108e-06, "loss": 0.8554, "step": 21726 }, { "epoch": 0.8501056420690195, "grad_norm": 0.0, "learning_rate": 1.1554717808927817e-06, "loss": 1.0343, "step": 21727 }, { "epoch": 0.8501447687612489, "grad_norm": 0.0, "learning_rate": 1.1548805243325966e-06, "loss": 1.1209, "step": 21728 }, { "epoch": 0.8501838954534784, "grad_norm": 0.0, "learning_rate": 1.1542894098139423e-06, "loss": 0.9078, "step": 21729 }, { "epoch": 0.8502230221457078, "grad_norm": 0.0, "learning_rate": 1.153698437346319e-06, "loss": 0.9709, "step": 21730 }, { "epoch": 0.8502621488379373, "grad_norm": 0.0, "learning_rate": 1.1531076069392133e-06, "loss": 0.9692, "step": 21731 }, { "epoch": 0.8503012755301667, "grad_norm": 0.0, "learning_rate": 1.1525169186021146e-06, "loss": 0.8004, "step": 21732 }, { "epoch": 0.8503404022223962, "grad_norm": 0.0, "learning_rate": 1.1519263723445028e-06, "loss": 0.9895, "step": 21733 }, { "epoch": 0.8503795289146255, "grad_norm": 0.0, "learning_rate": 1.1513359681758717e-06, "loss": 1.053, "step": 21734 }, { "epoch": 0.850418655606855, "grad_norm": 0.0, "learning_rate": 1.1507457061056947e-06, "loss": 0.8235, "step": 21735 }, { "epoch": 0.8504577822990844, "grad_norm": 0.0, "learning_rate": 1.1501555861434544e-06, "loss": 1.0267, "step": 21736 }, { "epoch": 0.8504969089913139, "grad_norm": 0.0, "learning_rate": 1.149565608298624e-06, "loss": 0.907, "step": 21737 }, { "epoch": 0.8505360356835433, "grad_norm": 0.0, "learning_rate": 1.1489757725806804e-06, "loss": 0.9955, "step": 21738 }, { "epoch": 0.8505751623757728, "grad_norm": 0.0, "learning_rate": 1.1483860789990942e-06, "loss": 0.9028, "step": 21739 }, { "epoch": 0.8506142890680022, "grad_norm": 0.0, "learning_rate": 1.1477965275633386e-06, "loss": 0.8237, "step": 21740 }, { "epoch": 0.8506534157602317, "grad_norm": 0.0, "learning_rate": 1.147207118282876e-06, "loss": 0.9582, "step": 21741 }, { "epoch": 0.8506925424524611, "grad_norm": 0.0, "learning_rate": 1.1466178511671732e-06, "loss": 0.9865, "step": 21742 }, { "epoch": 0.8507316691446906, "grad_norm": 0.0, "learning_rate": 1.1460287262256963e-06, "loss": 0.9381, "step": 21743 }, { "epoch": 0.85077079583692, "grad_norm": 0.0, "learning_rate": 1.1454397434679022e-06, "loss": 1.0397, "step": 21744 }, { "epoch": 0.8508099225291493, "grad_norm": 0.0, "learning_rate": 1.1448509029032495e-06, "loss": 0.862, "step": 21745 }, { "epoch": 0.8508490492213788, "grad_norm": 0.0, "learning_rate": 1.1442622045411955e-06, "loss": 0.9359, "step": 21746 }, { "epoch": 0.8508881759136082, "grad_norm": 0.0, "learning_rate": 1.1436736483911948e-06, "loss": 1.0336, "step": 21747 }, { "epoch": 0.8509273026058377, "grad_norm": 0.0, "learning_rate": 1.1430852344626963e-06, "loss": 0.8959, "step": 21748 }, { "epoch": 0.8509664292980671, "grad_norm": 0.0, "learning_rate": 1.142496962765154e-06, "loss": 0.9725, "step": 21749 }, { "epoch": 0.8510055559902966, "grad_norm": 0.0, "learning_rate": 1.1419088333080074e-06, "loss": 1.0051, "step": 21750 }, { "epoch": 0.851044682682526, "grad_norm": 0.0, "learning_rate": 1.1413208461007063e-06, "loss": 0.9824, "step": 21751 }, { "epoch": 0.8510838093747555, "grad_norm": 0.0, "learning_rate": 1.1407330011526907e-06, "loss": 1.0102, "step": 21752 }, { "epoch": 0.8511229360669849, "grad_norm": 0.0, "learning_rate": 1.1401452984734051e-06, "loss": 0.9937, "step": 21753 }, { "epoch": 0.8511620627592144, "grad_norm": 0.0, "learning_rate": 1.139557738072281e-06, "loss": 0.8508, "step": 21754 }, { "epoch": 0.8512011894514437, "grad_norm": 0.0, "learning_rate": 1.1389703199587577e-06, "loss": 1.0302, "step": 21755 }, { "epoch": 0.8512403161436732, "grad_norm": 0.0, "learning_rate": 1.1383830441422671e-06, "loss": 0.9916, "step": 21756 }, { "epoch": 0.8512794428359026, "grad_norm": 0.0, "learning_rate": 1.1377959106322423e-06, "loss": 0.914, "step": 21757 }, { "epoch": 0.8513185695281321, "grad_norm": 0.0, "learning_rate": 1.1372089194381087e-06, "loss": 1.0332, "step": 21758 }, { "epoch": 0.8513576962203615, "grad_norm": 0.0, "learning_rate": 1.1366220705692964e-06, "loss": 0.8952, "step": 21759 }, { "epoch": 0.851396822912591, "grad_norm": 0.0, "learning_rate": 1.1360353640352228e-06, "loss": 0.9185, "step": 21760 }, { "epoch": 0.8514359496048204, "grad_norm": 0.0, "learning_rate": 1.1354487998453178e-06, "loss": 0.9831, "step": 21761 }, { "epoch": 0.8514750762970499, "grad_norm": 0.0, "learning_rate": 1.1348623780089962e-06, "loss": 0.9396, "step": 21762 }, { "epoch": 0.8515142029892793, "grad_norm": 0.0, "learning_rate": 1.1342760985356772e-06, "loss": 0.9346, "step": 21763 }, { "epoch": 0.8515533296815088, "grad_norm": 0.0, "learning_rate": 1.1336899614347707e-06, "loss": 0.9463, "step": 21764 }, { "epoch": 0.8515924563737381, "grad_norm": 0.0, "learning_rate": 1.1331039667156973e-06, "loss": 0.8795, "step": 21765 }, { "epoch": 0.8516315830659676, "grad_norm": 0.0, "learning_rate": 1.1325181143878617e-06, "loss": 0.851, "step": 21766 }, { "epoch": 0.851670709758197, "grad_norm": 0.0, "learning_rate": 1.1319324044606762e-06, "loss": 0.8957, "step": 21767 }, { "epoch": 0.8517098364504265, "grad_norm": 0.0, "learning_rate": 1.131346836943541e-06, "loss": 0.964, "step": 21768 }, { "epoch": 0.8517489631426559, "grad_norm": 0.0, "learning_rate": 1.1307614118458631e-06, "loss": 0.9372, "step": 21769 }, { "epoch": 0.8517880898348854, "grad_norm": 0.0, "learning_rate": 1.130176129177043e-06, "loss": 1.0019, "step": 21770 }, { "epoch": 0.8518272165271148, "grad_norm": 0.0, "learning_rate": 1.1295909889464817e-06, "loss": 0.9031, "step": 21771 }, { "epoch": 0.8518663432193443, "grad_norm": 0.0, "learning_rate": 1.129005991163572e-06, "loss": 0.9753, "step": 21772 }, { "epoch": 0.8519054699115737, "grad_norm": 0.0, "learning_rate": 1.1284211358377094e-06, "loss": 1.0884, "step": 21773 }, { "epoch": 0.8519445966038031, "grad_norm": 0.0, "learning_rate": 1.1278364229782869e-06, "loss": 0.9478, "step": 21774 }, { "epoch": 0.8519837232960326, "grad_norm": 0.0, "learning_rate": 1.1272518525946964e-06, "loss": 0.9091, "step": 21775 }, { "epoch": 0.8520228499882619, "grad_norm": 0.0, "learning_rate": 1.1266674246963216e-06, "loss": 0.8909, "step": 21776 }, { "epoch": 0.8520619766804914, "grad_norm": 0.0, "learning_rate": 1.1260831392925498e-06, "loss": 0.95, "step": 21777 }, { "epoch": 0.8521011033727208, "grad_norm": 0.0, "learning_rate": 1.1254989963927599e-06, "loss": 0.942, "step": 21778 }, { "epoch": 0.8521402300649503, "grad_norm": 0.0, "learning_rate": 1.1249149960063388e-06, "loss": 0.9366, "step": 21779 }, { "epoch": 0.8521793567571797, "grad_norm": 0.0, "learning_rate": 1.1243311381426614e-06, "loss": 0.9575, "step": 21780 }, { "epoch": 0.8522184834494092, "grad_norm": 0.0, "learning_rate": 1.1237474228111046e-06, "loss": 1.0227, "step": 21781 }, { "epoch": 0.8522576101416386, "grad_norm": 0.0, "learning_rate": 1.1231638500210408e-06, "loss": 0.9403, "step": 21782 }, { "epoch": 0.8522967368338681, "grad_norm": 0.0, "learning_rate": 1.1225804197818413e-06, "loss": 1.102, "step": 21783 }, { "epoch": 0.8523358635260975, "grad_norm": 0.0, "learning_rate": 1.1219971321028766e-06, "loss": 1.0047, "step": 21784 }, { "epoch": 0.852374990218327, "grad_norm": 0.0, "learning_rate": 1.1214139869935147e-06, "loss": 1.0143, "step": 21785 }, { "epoch": 0.8524141169105564, "grad_norm": 0.0, "learning_rate": 1.120830984463117e-06, "loss": 0.997, "step": 21786 }, { "epoch": 0.8524532436027858, "grad_norm": 0.0, "learning_rate": 1.1202481245210485e-06, "loss": 0.9682, "step": 21787 }, { "epoch": 0.8524923702950152, "grad_norm": 0.0, "learning_rate": 1.1196654071766689e-06, "loss": 0.8909, "step": 21788 }, { "epoch": 0.8525314969872447, "grad_norm": 0.0, "learning_rate": 1.1190828324393333e-06, "loss": 0.9376, "step": 21789 }, { "epoch": 0.8525706236794741, "grad_norm": 0.0, "learning_rate": 1.1185004003183996e-06, "loss": 0.9384, "step": 21790 }, { "epoch": 0.8526097503717036, "grad_norm": 0.0, "learning_rate": 1.1179181108232196e-06, "loss": 0.959, "step": 21791 }, { "epoch": 0.852648877063933, "grad_norm": 0.0, "learning_rate": 1.1173359639631476e-06, "loss": 0.9164, "step": 21792 }, { "epoch": 0.8526880037561625, "grad_norm": 0.0, "learning_rate": 1.1167539597475273e-06, "loss": 0.8835, "step": 21793 }, { "epoch": 0.8527271304483919, "grad_norm": 0.0, "learning_rate": 1.1161720981857082e-06, "loss": 0.8234, "step": 21794 }, { "epoch": 0.8527662571406214, "grad_norm": 0.0, "learning_rate": 1.1155903792870303e-06, "loss": 0.8661, "step": 21795 }, { "epoch": 0.8528053838328508, "grad_norm": 0.0, "learning_rate": 1.115008803060842e-06, "loss": 0.9655, "step": 21796 }, { "epoch": 0.8528445105250803, "grad_norm": 0.0, "learning_rate": 1.1144273695164764e-06, "loss": 0.9263, "step": 21797 }, { "epoch": 0.8528836372173096, "grad_norm": 0.0, "learning_rate": 1.1138460786632743e-06, "loss": 0.9005, "step": 21798 }, { "epoch": 0.8529227639095391, "grad_norm": 0.0, "learning_rate": 1.113264930510568e-06, "loss": 1.0291, "step": 21799 }, { "epoch": 0.8529618906017685, "grad_norm": 0.0, "learning_rate": 1.1126839250676913e-06, "loss": 0.9613, "step": 21800 }, { "epoch": 0.853001017293998, "grad_norm": 0.0, "learning_rate": 1.1121030623439744e-06, "loss": 0.9264, "step": 21801 }, { "epoch": 0.8530401439862274, "grad_norm": 0.0, "learning_rate": 1.1115223423487463e-06, "loss": 1.0495, "step": 21802 }, { "epoch": 0.8530792706784568, "grad_norm": 0.0, "learning_rate": 1.11094176509133e-06, "loss": 0.9063, "step": 21803 }, { "epoch": 0.8531183973706863, "grad_norm": 0.0, "learning_rate": 1.1103613305810512e-06, "loss": 0.9938, "step": 21804 }, { "epoch": 0.8531575240629157, "grad_norm": 0.0, "learning_rate": 1.109781038827229e-06, "loss": 0.9516, "step": 21805 }, { "epoch": 0.8531966507551452, "grad_norm": 0.0, "learning_rate": 1.1092008898391859e-06, "loss": 1.1035, "step": 21806 }, { "epoch": 0.8532357774473746, "grad_norm": 0.0, "learning_rate": 1.1086208836262336e-06, "loss": 0.9639, "step": 21807 }, { "epoch": 0.853274904139604, "grad_norm": 0.0, "learning_rate": 1.1080410201976898e-06, "loss": 0.895, "step": 21808 }, { "epoch": 0.8533140308318334, "grad_norm": 0.0, "learning_rate": 1.107461299562862e-06, "loss": 0.897, "step": 21809 }, { "epoch": 0.8533531575240629, "grad_norm": 0.0, "learning_rate": 1.1068817217310657e-06, "loss": 0.9554, "step": 21810 }, { "epoch": 0.8533922842162923, "grad_norm": 0.0, "learning_rate": 1.1063022867116035e-06, "loss": 1.0039, "step": 21811 }, { "epoch": 0.8534314109085218, "grad_norm": 0.0, "learning_rate": 1.1057229945137848e-06, "loss": 0.9159, "step": 21812 }, { "epoch": 0.8534705376007512, "grad_norm": 0.0, "learning_rate": 1.1051438451469055e-06, "loss": 0.7787, "step": 21813 }, { "epoch": 0.8535096642929807, "grad_norm": 0.0, "learning_rate": 1.1045648386202735e-06, "loss": 0.9435, "step": 21814 }, { "epoch": 0.8535487909852101, "grad_norm": 0.0, "learning_rate": 1.1039859749431814e-06, "loss": 0.9025, "step": 21815 }, { "epoch": 0.8535879176774396, "grad_norm": 0.0, "learning_rate": 1.1034072541249297e-06, "loss": 1.0587, "step": 21816 }, { "epoch": 0.853627044369669, "grad_norm": 0.0, "learning_rate": 1.1028286761748076e-06, "loss": 0.9459, "step": 21817 }, { "epoch": 0.8536661710618985, "grad_norm": 0.0, "learning_rate": 1.1022502411021086e-06, "loss": 0.9754, "step": 21818 }, { "epoch": 0.8537052977541278, "grad_norm": 0.0, "learning_rate": 1.101671948916121e-06, "loss": 0.9797, "step": 21819 }, { "epoch": 0.8537444244463573, "grad_norm": 0.0, "learning_rate": 1.1010937996261329e-06, "loss": 0.8976, "step": 21820 }, { "epoch": 0.8537835511385867, "grad_norm": 0.0, "learning_rate": 1.1005157932414257e-06, "loss": 0.9951, "step": 21821 }, { "epoch": 0.8538226778308162, "grad_norm": 0.0, "learning_rate": 1.099937929771283e-06, "loss": 0.9385, "step": 21822 }, { "epoch": 0.8538618045230456, "grad_norm": 0.0, "learning_rate": 1.0993602092249855e-06, "loss": 0.8996, "step": 21823 }, { "epoch": 0.8539009312152751, "grad_norm": 0.0, "learning_rate": 1.0987826316118123e-06, "loss": 0.9731, "step": 21824 }, { "epoch": 0.8539400579075045, "grad_norm": 0.0, "learning_rate": 1.0982051969410334e-06, "loss": 0.9608, "step": 21825 }, { "epoch": 0.853979184599734, "grad_norm": 0.0, "learning_rate": 1.0976279052219262e-06, "loss": 0.9501, "step": 21826 }, { "epoch": 0.8540183112919634, "grad_norm": 0.0, "learning_rate": 1.0970507564637579e-06, "loss": 0.8977, "step": 21827 }, { "epoch": 0.8540574379841929, "grad_norm": 0.0, "learning_rate": 1.0964737506757983e-06, "loss": 0.9429, "step": 21828 }, { "epoch": 0.8540965646764223, "grad_norm": 0.0, "learning_rate": 1.0958968878673137e-06, "loss": 1.0587, "step": 21829 }, { "epoch": 0.8541356913686516, "grad_norm": 0.0, "learning_rate": 1.095320168047569e-06, "loss": 0.865, "step": 21830 }, { "epoch": 0.8541748180608811, "grad_norm": 0.0, "learning_rate": 1.0947435912258231e-06, "loss": 0.8919, "step": 21831 }, { "epoch": 0.8542139447531105, "grad_norm": 0.0, "learning_rate": 1.0941671574113355e-06, "loss": 0.9851, "step": 21832 }, { "epoch": 0.85425307144534, "grad_norm": 0.0, "learning_rate": 1.0935908666133644e-06, "loss": 1.0501, "step": 21833 }, { "epoch": 0.8542921981375694, "grad_norm": 0.0, "learning_rate": 1.0930147188411655e-06, "loss": 0.919, "step": 21834 }, { "epoch": 0.8543313248297989, "grad_norm": 0.0, "learning_rate": 1.092438714103986e-06, "loss": 0.9574, "step": 21835 }, { "epoch": 0.8543704515220283, "grad_norm": 0.0, "learning_rate": 1.0918628524110808e-06, "loss": 1.0066, "step": 21836 }, { "epoch": 0.8544095782142578, "grad_norm": 0.0, "learning_rate": 1.0912871337716968e-06, "loss": 1.0262, "step": 21837 }, { "epoch": 0.8544487049064872, "grad_norm": 0.0, "learning_rate": 1.0907115581950755e-06, "loss": 0.9324, "step": 21838 }, { "epoch": 0.8544878315987167, "grad_norm": 0.0, "learning_rate": 1.090136125690463e-06, "loss": 1.0026, "step": 21839 }, { "epoch": 0.854526958290946, "grad_norm": 0.0, "learning_rate": 1.0895608362671005e-06, "loss": 0.9894, "step": 21840 }, { "epoch": 0.8545660849831755, "grad_norm": 0.0, "learning_rate": 1.0889856899342267e-06, "loss": 0.9764, "step": 21841 }, { "epoch": 0.8546052116754049, "grad_norm": 0.0, "learning_rate": 1.0884106867010746e-06, "loss": 0.9979, "step": 21842 }, { "epoch": 0.8546443383676344, "grad_norm": 0.0, "learning_rate": 1.087835826576883e-06, "loss": 1.0507, "step": 21843 }, { "epoch": 0.8546834650598638, "grad_norm": 0.0, "learning_rate": 1.0872611095708773e-06, "loss": 0.9501, "step": 21844 }, { "epoch": 0.8547225917520933, "grad_norm": 0.0, "learning_rate": 1.0866865356922907e-06, "loss": 1.0187, "step": 21845 }, { "epoch": 0.8547617184443227, "grad_norm": 0.0, "learning_rate": 1.0861121049503487e-06, "loss": 0.8914, "step": 21846 }, { "epoch": 0.8548008451365522, "grad_norm": 0.0, "learning_rate": 1.0855378173542786e-06, "loss": 0.8592, "step": 21847 }, { "epoch": 0.8548399718287816, "grad_norm": 0.0, "learning_rate": 1.0849636729132994e-06, "loss": 0.8931, "step": 21848 }, { "epoch": 0.8548790985210111, "grad_norm": 0.0, "learning_rate": 1.084389671636632e-06, "loss": 1.0017, "step": 21849 }, { "epoch": 0.8549182252132405, "grad_norm": 0.0, "learning_rate": 1.0838158135334942e-06, "loss": 0.8854, "step": 21850 }, { "epoch": 0.85495735190547, "grad_norm": 0.0, "learning_rate": 1.0832420986131044e-06, "loss": 0.8887, "step": 21851 }, { "epoch": 0.8549964785976993, "grad_norm": 0.0, "learning_rate": 1.0826685268846704e-06, "loss": 1.04, "step": 21852 }, { "epoch": 0.8550356052899288, "grad_norm": 0.0, "learning_rate": 1.0820950983574064e-06, "loss": 0.9308, "step": 21853 }, { "epoch": 0.8550747319821582, "grad_norm": 0.0, "learning_rate": 1.0815218130405203e-06, "loss": 0.8708, "step": 21854 }, { "epoch": 0.8551138586743877, "grad_norm": 0.0, "learning_rate": 1.0809486709432204e-06, "loss": 1.0982, "step": 21855 }, { "epoch": 0.8551529853666171, "grad_norm": 0.0, "learning_rate": 1.0803756720747072e-06, "loss": 0.8783, "step": 21856 }, { "epoch": 0.8551921120588466, "grad_norm": 0.0, "learning_rate": 1.0798028164441854e-06, "loss": 0.9764, "step": 21857 }, { "epoch": 0.855231238751076, "grad_norm": 0.0, "learning_rate": 1.0792301040608489e-06, "loss": 1.0112, "step": 21858 }, { "epoch": 0.8552703654433054, "grad_norm": 0.0, "learning_rate": 1.0786575349339013e-06, "loss": 1.067, "step": 21859 }, { "epoch": 0.8553094921355349, "grad_norm": 0.0, "learning_rate": 1.0780851090725342e-06, "loss": 1.0621, "step": 21860 }, { "epoch": 0.8553486188277643, "grad_norm": 0.0, "learning_rate": 1.0775128264859413e-06, "loss": 0.9859, "step": 21861 }, { "epoch": 0.8553877455199937, "grad_norm": 0.0, "learning_rate": 1.0769406871833088e-06, "loss": 0.9441, "step": 21862 }, { "epoch": 0.8554268722122231, "grad_norm": 0.0, "learning_rate": 1.0763686911738313e-06, "loss": 0.8719, "step": 21863 }, { "epoch": 0.8554659989044526, "grad_norm": 0.0, "learning_rate": 1.0757968384666894e-06, "loss": 0.982, "step": 21864 }, { "epoch": 0.855505125596682, "grad_norm": 0.0, "learning_rate": 1.075225129071068e-06, "loss": 1.0718, "step": 21865 }, { "epoch": 0.8555442522889115, "grad_norm": 0.0, "learning_rate": 1.0746535629961473e-06, "loss": 0.8931, "step": 21866 }, { "epoch": 0.8555833789811409, "grad_norm": 0.0, "learning_rate": 1.0740821402511049e-06, "loss": 0.9867, "step": 21867 }, { "epoch": 0.8556225056733704, "grad_norm": 0.0, "learning_rate": 1.0735108608451195e-06, "loss": 0.8602, "step": 21868 }, { "epoch": 0.8556616323655998, "grad_norm": 0.0, "learning_rate": 1.0729397247873663e-06, "loss": 1.0119, "step": 21869 }, { "epoch": 0.8557007590578293, "grad_norm": 0.0, "learning_rate": 1.0723687320870125e-06, "loss": 0.8961, "step": 21870 }, { "epoch": 0.8557398857500587, "grad_norm": 0.0, "learning_rate": 1.0717978827532293e-06, "loss": 0.9716, "step": 21871 }, { "epoch": 0.8557790124422882, "grad_norm": 0.0, "learning_rate": 1.0712271767951853e-06, "loss": 1.093, "step": 21872 }, { "epoch": 0.8558181391345175, "grad_norm": 0.0, "learning_rate": 1.0706566142220464e-06, "loss": 0.8768, "step": 21873 }, { "epoch": 0.855857265826747, "grad_norm": 0.0, "learning_rate": 1.0700861950429708e-06, "loss": 0.939, "step": 21874 }, { "epoch": 0.8558963925189764, "grad_norm": 0.0, "learning_rate": 1.0695159192671234e-06, "loss": 0.8906, "step": 21875 }, { "epoch": 0.8559355192112059, "grad_norm": 0.0, "learning_rate": 1.0689457869036579e-06, "loss": 0.8797, "step": 21876 }, { "epoch": 0.8559746459034353, "grad_norm": 0.0, "learning_rate": 1.0683757979617316e-06, "loss": 0.9884, "step": 21877 }, { "epoch": 0.8560137725956648, "grad_norm": 0.0, "learning_rate": 1.067805952450498e-06, "loss": 0.9774, "step": 21878 }, { "epoch": 0.8560528992878942, "grad_norm": 0.0, "learning_rate": 1.06723625037911e-06, "loss": 0.8683, "step": 21879 }, { "epoch": 0.8560920259801237, "grad_norm": 0.0, "learning_rate": 1.0666666917567126e-06, "loss": 0.9991, "step": 21880 }, { "epoch": 0.8561311526723531, "grad_norm": 0.0, "learning_rate": 1.0660972765924537e-06, "loss": 0.9039, "step": 21881 }, { "epoch": 0.8561702793645826, "grad_norm": 0.0, "learning_rate": 1.0655280048954798e-06, "loss": 1.0233, "step": 21882 }, { "epoch": 0.856209406056812, "grad_norm": 0.0, "learning_rate": 1.0649588766749297e-06, "loss": 1.0131, "step": 21883 }, { "epoch": 0.8562485327490414, "grad_norm": 0.0, "learning_rate": 1.0643898919399431e-06, "loss": 0.964, "step": 21884 }, { "epoch": 0.8562876594412708, "grad_norm": 0.0, "learning_rate": 1.063821050699657e-06, "loss": 0.9952, "step": 21885 }, { "epoch": 0.8563267861335003, "grad_norm": 0.0, "learning_rate": 1.0632523529632099e-06, "loss": 1.0565, "step": 21886 }, { "epoch": 0.8563659128257297, "grad_norm": 0.0, "learning_rate": 1.0626837987397299e-06, "loss": 0.8896, "step": 21887 }, { "epoch": 0.8564050395179591, "grad_norm": 0.0, "learning_rate": 1.0621153880383506e-06, "loss": 0.8721, "step": 21888 }, { "epoch": 0.8564441662101886, "grad_norm": 0.0, "learning_rate": 1.061547120868195e-06, "loss": 1.0494, "step": 21889 }, { "epoch": 0.856483292902418, "grad_norm": 0.0, "learning_rate": 1.0609789972383955e-06, "loss": 1.003, "step": 21890 }, { "epoch": 0.8565224195946475, "grad_norm": 0.0, "learning_rate": 1.0604110171580706e-06, "loss": 0.9191, "step": 21891 }, { "epoch": 0.8565615462868769, "grad_norm": 0.0, "learning_rate": 1.059843180636344e-06, "loss": 0.8942, "step": 21892 }, { "epoch": 0.8566006729791064, "grad_norm": 0.0, "learning_rate": 1.059275487682332e-06, "loss": 0.9575, "step": 21893 }, { "epoch": 0.8566397996713357, "grad_norm": 0.0, "learning_rate": 1.0587079383051524e-06, "loss": 1.0388, "step": 21894 }, { "epoch": 0.8566789263635652, "grad_norm": 0.0, "learning_rate": 1.0581405325139194e-06, "loss": 1.0637, "step": 21895 }, { "epoch": 0.8567180530557946, "grad_norm": 0.0, "learning_rate": 1.0575732703177454e-06, "loss": 0.861, "step": 21896 }, { "epoch": 0.8567571797480241, "grad_norm": 0.0, "learning_rate": 1.057006151725738e-06, "loss": 0.9521, "step": 21897 }, { "epoch": 0.8567963064402535, "grad_norm": 0.0, "learning_rate": 1.0564391767470062e-06, "loss": 1.0473, "step": 21898 }, { "epoch": 0.856835433132483, "grad_norm": 0.0, "learning_rate": 1.0558723453906538e-06, "loss": 1.0081, "step": 21899 }, { "epoch": 0.8568745598247124, "grad_norm": 0.0, "learning_rate": 1.055305657665786e-06, "loss": 0.8388, "step": 21900 }, { "epoch": 0.8569136865169419, "grad_norm": 0.0, "learning_rate": 1.0547391135814989e-06, "loss": 0.9166, "step": 21901 }, { "epoch": 0.8569528132091713, "grad_norm": 0.0, "learning_rate": 1.0541727131468937e-06, "loss": 1.0122, "step": 21902 }, { "epoch": 0.8569919399014008, "grad_norm": 0.0, "learning_rate": 1.0536064563710623e-06, "loss": 0.9895, "step": 21903 }, { "epoch": 0.8570310665936302, "grad_norm": 0.0, "learning_rate": 1.0530403432631041e-06, "loss": 0.9507, "step": 21904 }, { "epoch": 0.8570701932858596, "grad_norm": 0.0, "learning_rate": 1.0524743738321052e-06, "loss": 0.9034, "step": 21905 }, { "epoch": 0.857109319978089, "grad_norm": 0.0, "learning_rate": 1.0519085480871583e-06, "loss": 0.9249, "step": 21906 }, { "epoch": 0.8571484466703185, "grad_norm": 0.0, "learning_rate": 1.0513428660373426e-06, "loss": 1.0124, "step": 21907 }, { "epoch": 0.8571875733625479, "grad_norm": 0.0, "learning_rate": 1.050777327691751e-06, "loss": 1.0209, "step": 21908 }, { "epoch": 0.8572267000547774, "grad_norm": 0.0, "learning_rate": 1.0502119330594608e-06, "loss": 0.8888, "step": 21909 }, { "epoch": 0.8572658267470068, "grad_norm": 0.0, "learning_rate": 1.0496466821495532e-06, "loss": 0.9318, "step": 21910 }, { "epoch": 0.8573049534392363, "grad_norm": 0.0, "learning_rate": 1.0490815749711014e-06, "loss": 1.0639, "step": 21911 }, { "epoch": 0.8573440801314657, "grad_norm": 0.0, "learning_rate": 1.048516611533187e-06, "loss": 0.9831, "step": 21912 }, { "epoch": 0.8573832068236952, "grad_norm": 0.0, "learning_rate": 1.0479517918448767e-06, "loss": 0.9252, "step": 21913 }, { "epoch": 0.8574223335159246, "grad_norm": 0.0, "learning_rate": 1.0473871159152448e-06, "loss": 0.9902, "step": 21914 }, { "epoch": 0.857461460208154, "grad_norm": 0.0, "learning_rate": 1.0468225837533563e-06, "loss": 1.0698, "step": 21915 }, { "epoch": 0.8575005869003834, "grad_norm": 0.0, "learning_rate": 1.0462581953682771e-06, "loss": 0.9214, "step": 21916 }, { "epoch": 0.8575397135926128, "grad_norm": 0.0, "learning_rate": 1.0456939507690721e-06, "loss": 0.979, "step": 21917 }, { "epoch": 0.8575788402848423, "grad_norm": 0.0, "learning_rate": 1.0451298499648043e-06, "loss": 0.9718, "step": 21918 }, { "epoch": 0.8576179669770717, "grad_norm": 0.0, "learning_rate": 1.0445658929645275e-06, "loss": 0.9328, "step": 21919 }, { "epoch": 0.8576570936693012, "grad_norm": 0.0, "learning_rate": 1.0440020797773009e-06, "loss": 0.815, "step": 21920 }, { "epoch": 0.8576962203615306, "grad_norm": 0.0, "learning_rate": 1.0434384104121809e-06, "loss": 0.962, "step": 21921 }, { "epoch": 0.8577353470537601, "grad_norm": 0.0, "learning_rate": 1.0428748848782145e-06, "loss": 1.0017, "step": 21922 }, { "epoch": 0.8577744737459895, "grad_norm": 0.0, "learning_rate": 1.0423115031844534e-06, "loss": 0.9302, "step": 21923 }, { "epoch": 0.857813600438219, "grad_norm": 0.0, "learning_rate": 1.041748265339947e-06, "loss": 0.9759, "step": 21924 }, { "epoch": 0.8578527271304484, "grad_norm": 0.0, "learning_rate": 1.0411851713537358e-06, "loss": 0.8934, "step": 21925 }, { "epoch": 0.8578918538226779, "grad_norm": 0.0, "learning_rate": 1.040622221234865e-06, "loss": 0.9256, "step": 21926 }, { "epoch": 0.8579309805149072, "grad_norm": 0.0, "learning_rate": 1.040059414992377e-06, "loss": 0.9192, "step": 21927 }, { "epoch": 0.8579701072071367, "grad_norm": 0.0, "learning_rate": 1.039496752635305e-06, "loss": 0.8548, "step": 21928 }, { "epoch": 0.8580092338993661, "grad_norm": 0.0, "learning_rate": 1.0389342341726872e-06, "loss": 0.9163, "step": 21929 }, { "epoch": 0.8580483605915956, "grad_norm": 0.0, "learning_rate": 1.0383718596135561e-06, "loss": 0.9532, "step": 21930 }, { "epoch": 0.858087487283825, "grad_norm": 0.0, "learning_rate": 1.037809628966946e-06, "loss": 1.013, "step": 21931 }, { "epoch": 0.8581266139760545, "grad_norm": 0.0, "learning_rate": 1.0372475422418816e-06, "loss": 0.967, "step": 21932 }, { "epoch": 0.8581657406682839, "grad_norm": 0.0, "learning_rate": 1.0366855994473913e-06, "loss": 0.9856, "step": 21933 }, { "epoch": 0.8582048673605134, "grad_norm": 0.0, "learning_rate": 1.0361238005924956e-06, "loss": 0.9292, "step": 21934 }, { "epoch": 0.8582439940527428, "grad_norm": 0.0, "learning_rate": 1.035562145686223e-06, "loss": 1.0175, "step": 21935 }, { "epoch": 0.8582831207449723, "grad_norm": 0.0, "learning_rate": 1.0350006347375874e-06, "loss": 0.959, "step": 21936 }, { "epoch": 0.8583222474372016, "grad_norm": 0.0, "learning_rate": 1.0344392677556091e-06, "loss": 0.9631, "step": 21937 }, { "epoch": 0.8583613741294311, "grad_norm": 0.0, "learning_rate": 1.033878044749299e-06, "loss": 1.0095, "step": 21938 }, { "epoch": 0.8584005008216605, "grad_norm": 0.0, "learning_rate": 1.0333169657276754e-06, "loss": 1.0136, "step": 21939 }, { "epoch": 0.85843962751389, "grad_norm": 0.0, "learning_rate": 1.032756030699743e-06, "loss": 1.0349, "step": 21940 }, { "epoch": 0.8584787542061194, "grad_norm": 0.0, "learning_rate": 1.032195239674515e-06, "loss": 0.9087, "step": 21941 }, { "epoch": 0.8585178808983489, "grad_norm": 0.0, "learning_rate": 1.0316345926609927e-06, "loss": 0.9873, "step": 21942 }, { "epoch": 0.8585570075905783, "grad_norm": 0.0, "learning_rate": 1.0310740896681803e-06, "loss": 0.8728, "step": 21943 }, { "epoch": 0.8585961342828077, "grad_norm": 0.0, "learning_rate": 1.0305137307050782e-06, "loss": 0.8493, "step": 21944 }, { "epoch": 0.8586352609750372, "grad_norm": 0.0, "learning_rate": 1.0299535157806894e-06, "loss": 0.9718, "step": 21945 }, { "epoch": 0.8586743876672666, "grad_norm": 0.0, "learning_rate": 1.0293934449040054e-06, "loss": 0.9009, "step": 21946 }, { "epoch": 0.858713514359496, "grad_norm": 0.0, "learning_rate": 1.0288335180840215e-06, "loss": 0.9443, "step": 21947 }, { "epoch": 0.8587526410517254, "grad_norm": 0.0, "learning_rate": 1.028273735329729e-06, "loss": 0.9374, "step": 21948 }, { "epoch": 0.8587917677439549, "grad_norm": 0.0, "learning_rate": 1.0277140966501209e-06, "loss": 0.9667, "step": 21949 }, { "epoch": 0.8588308944361843, "grad_norm": 0.0, "learning_rate": 1.027154602054179e-06, "loss": 0.8763, "step": 21950 }, { "epoch": 0.8588700211284138, "grad_norm": 0.0, "learning_rate": 1.0265952515508925e-06, "loss": 1.0254, "step": 21951 }, { "epoch": 0.8589091478206432, "grad_norm": 0.0, "learning_rate": 1.026036045149239e-06, "loss": 1.0005, "step": 21952 }, { "epoch": 0.8589482745128727, "grad_norm": 0.0, "learning_rate": 1.0254769828582046e-06, "loss": 1.0087, "step": 21953 }, { "epoch": 0.8589874012051021, "grad_norm": 0.0, "learning_rate": 1.0249180646867629e-06, "loss": 0.9737, "step": 21954 }, { "epoch": 0.8590265278973316, "grad_norm": 0.0, "learning_rate": 1.0243592906438916e-06, "loss": 1.0085, "step": 21955 }, { "epoch": 0.859065654589561, "grad_norm": 0.0, "learning_rate": 1.0238006607385597e-06, "loss": 0.892, "step": 21956 }, { "epoch": 0.8591047812817905, "grad_norm": 0.0, "learning_rate": 1.0232421749797462e-06, "loss": 1.1372, "step": 21957 }, { "epoch": 0.8591439079740198, "grad_norm": 0.0, "learning_rate": 1.0226838333764111e-06, "loss": 0.774, "step": 21958 }, { "epoch": 0.8591830346662493, "grad_norm": 0.0, "learning_rate": 1.0221256359375275e-06, "loss": 0.9614, "step": 21959 }, { "epoch": 0.8592221613584787, "grad_norm": 0.0, "learning_rate": 1.021567582672054e-06, "loss": 0.8847, "step": 21960 }, { "epoch": 0.8592612880507082, "grad_norm": 0.0, "learning_rate": 1.0210096735889552e-06, "loss": 0.9809, "step": 21961 }, { "epoch": 0.8593004147429376, "grad_norm": 0.0, "learning_rate": 1.0204519086971886e-06, "loss": 0.7972, "step": 21962 }, { "epoch": 0.8593395414351671, "grad_norm": 0.0, "learning_rate": 1.019894288005715e-06, "loss": 0.9247, "step": 21963 }, { "epoch": 0.8593786681273965, "grad_norm": 0.0, "learning_rate": 1.0193368115234847e-06, "loss": 0.923, "step": 21964 }, { "epoch": 0.859417794819626, "grad_norm": 0.0, "learning_rate": 1.0187794792594507e-06, "loss": 0.8678, "step": 21965 }, { "epoch": 0.8594569215118554, "grad_norm": 0.0, "learning_rate": 1.018222291222567e-06, "loss": 0.9234, "step": 21966 }, { "epoch": 0.8594960482040849, "grad_norm": 0.0, "learning_rate": 1.0176652474217763e-06, "loss": 0.9639, "step": 21967 }, { "epoch": 0.8595351748963143, "grad_norm": 0.0, "learning_rate": 1.017108347866027e-06, "loss": 0.8753, "step": 21968 }, { "epoch": 0.8595743015885438, "grad_norm": 0.0, "learning_rate": 1.01655159256426e-06, "loss": 0.9768, "step": 21969 }, { "epoch": 0.8596134282807731, "grad_norm": 0.0, "learning_rate": 1.015994981525421e-06, "loss": 0.9366, "step": 21970 }, { "epoch": 0.8596525549730026, "grad_norm": 0.0, "learning_rate": 1.0154385147584422e-06, "loss": 0.9646, "step": 21971 }, { "epoch": 0.859691681665232, "grad_norm": 0.0, "learning_rate": 1.0148821922722641e-06, "loss": 0.8519, "step": 21972 }, { "epoch": 0.8597308083574614, "grad_norm": 0.0, "learning_rate": 1.0143260140758182e-06, "loss": 0.9779, "step": 21973 }, { "epoch": 0.8597699350496909, "grad_norm": 0.0, "learning_rate": 1.0137699801780365e-06, "loss": 1.0042, "step": 21974 }, { "epoch": 0.8598090617419203, "grad_norm": 0.0, "learning_rate": 1.0132140905878474e-06, "loss": 0.9199, "step": 21975 }, { "epoch": 0.8598481884341498, "grad_norm": 0.0, "learning_rate": 1.0126583453141826e-06, "loss": 0.9708, "step": 21976 }, { "epoch": 0.8598873151263792, "grad_norm": 0.0, "learning_rate": 1.0121027443659593e-06, "loss": 1.0123, "step": 21977 }, { "epoch": 0.8599264418186087, "grad_norm": 0.0, "learning_rate": 1.0115472877521048e-06, "loss": 0.9805, "step": 21978 }, { "epoch": 0.859965568510838, "grad_norm": 0.0, "learning_rate": 1.0109919754815377e-06, "loss": 0.9003, "step": 21979 }, { "epoch": 0.8600046952030675, "grad_norm": 0.0, "learning_rate": 1.0104368075631764e-06, "loss": 0.9164, "step": 21980 }, { "epoch": 0.8600438218952969, "grad_norm": 0.0, "learning_rate": 1.009881784005935e-06, "loss": 0.8624, "step": 21981 }, { "epoch": 0.8600829485875264, "grad_norm": 0.0, "learning_rate": 1.009326904818727e-06, "loss": 0.9414, "step": 21982 }, { "epoch": 0.8601220752797558, "grad_norm": 0.0, "learning_rate": 1.0087721700104603e-06, "loss": 0.9524, "step": 21983 }, { "epoch": 0.8601612019719853, "grad_norm": 0.0, "learning_rate": 1.0082175795900496e-06, "loss": 0.9135, "step": 21984 }, { "epoch": 0.8602003286642147, "grad_norm": 0.0, "learning_rate": 1.0076631335663956e-06, "loss": 0.816, "step": 21985 }, { "epoch": 0.8602394553564442, "grad_norm": 0.0, "learning_rate": 1.0071088319484057e-06, "loss": 0.929, "step": 21986 }, { "epoch": 0.8602785820486736, "grad_norm": 0.0, "learning_rate": 1.006554674744975e-06, "loss": 0.8645, "step": 21987 }, { "epoch": 0.8603177087409031, "grad_norm": 0.0, "learning_rate": 1.0060006619650108e-06, "loss": 1.0278, "step": 21988 }, { "epoch": 0.8603568354331325, "grad_norm": 0.0, "learning_rate": 1.005446793617403e-06, "loss": 0.8366, "step": 21989 }, { "epoch": 0.860395962125362, "grad_norm": 0.0, "learning_rate": 1.0048930697110514e-06, "loss": 0.9366, "step": 21990 }, { "epoch": 0.8604350888175913, "grad_norm": 0.0, "learning_rate": 1.0043394902548442e-06, "loss": 0.9655, "step": 21991 }, { "epoch": 0.8604742155098208, "grad_norm": 0.0, "learning_rate": 1.0037860552576729e-06, "loss": 0.9288, "step": 21992 }, { "epoch": 0.8605133422020502, "grad_norm": 0.0, "learning_rate": 1.0032327647284234e-06, "loss": 0.9244, "step": 21993 }, { "epoch": 0.8605524688942797, "grad_norm": 0.0, "learning_rate": 1.0026796186759847e-06, "loss": 0.9887, "step": 21994 }, { "epoch": 0.8605915955865091, "grad_norm": 0.0, "learning_rate": 1.0021266171092348e-06, "loss": 0.8981, "step": 21995 }, { "epoch": 0.8606307222787386, "grad_norm": 0.0, "learning_rate": 1.0015737600370568e-06, "loss": 0.8734, "step": 21996 }, { "epoch": 0.860669848970968, "grad_norm": 0.0, "learning_rate": 1.001021047468329e-06, "loss": 0.9821, "step": 21997 }, { "epoch": 0.8607089756631975, "grad_norm": 0.0, "learning_rate": 1.000468479411928e-06, "loss": 0.7531, "step": 21998 }, { "epoch": 0.8607481023554269, "grad_norm": 0.0, "learning_rate": 9.999160558767251e-07, "loss": 0.9677, "step": 21999 }, { "epoch": 0.8607872290476564, "grad_norm": 0.0, "learning_rate": 9.993637768715935e-07, "loss": 1.0662, "step": 22000 }, { "epoch": 0.8608263557398858, "grad_norm": 0.0, "learning_rate": 9.988116424053973e-07, "loss": 0.9812, "step": 22001 }, { "epoch": 0.8608654824321151, "grad_norm": 0.0, "learning_rate": 9.982596524870113e-07, "loss": 0.8317, "step": 22002 }, { "epoch": 0.8609046091243446, "grad_norm": 0.0, "learning_rate": 9.977078071252944e-07, "loss": 0.9191, "step": 22003 }, { "epoch": 0.860943735816574, "grad_norm": 0.0, "learning_rate": 9.971561063291102e-07, "loss": 0.9229, "step": 22004 }, { "epoch": 0.8609828625088035, "grad_norm": 0.0, "learning_rate": 9.966045501073162e-07, "loss": 0.9578, "step": 22005 }, { "epoch": 0.8610219892010329, "grad_norm": 0.0, "learning_rate": 9.96053138468772e-07, "loss": 0.979, "step": 22006 }, { "epoch": 0.8610611158932624, "grad_norm": 0.0, "learning_rate": 9.955018714223308e-07, "loss": 0.9114, "step": 22007 }, { "epoch": 0.8611002425854918, "grad_norm": 0.0, "learning_rate": 9.949507489768484e-07, "loss": 1.0496, "step": 22008 }, { "epoch": 0.8611393692777213, "grad_norm": 0.0, "learning_rate": 9.943997711411712e-07, "loss": 0.9677, "step": 22009 }, { "epoch": 0.8611784959699507, "grad_norm": 0.0, "learning_rate": 9.9384893792415e-07, "loss": 0.9494, "step": 22010 }, { "epoch": 0.8612176226621802, "grad_norm": 0.0, "learning_rate": 9.932982493346299e-07, "loss": 0.9868, "step": 22011 }, { "epoch": 0.8612567493544095, "grad_norm": 0.0, "learning_rate": 9.927477053814528e-07, "loss": 0.9628, "step": 22012 }, { "epoch": 0.861295876046639, "grad_norm": 0.0, "learning_rate": 9.921973060734612e-07, "loss": 1.0152, "step": 22013 }, { "epoch": 0.8613350027388684, "grad_norm": 0.0, "learning_rate": 9.91647051419492e-07, "loss": 1.0667, "step": 22014 }, { "epoch": 0.8613741294310979, "grad_norm": 0.0, "learning_rate": 9.910969414283866e-07, "loss": 0.8821, "step": 22015 }, { "epoch": 0.8614132561233273, "grad_norm": 0.0, "learning_rate": 9.905469761089725e-07, "loss": 0.9243, "step": 22016 }, { "epoch": 0.8614523828155568, "grad_norm": 0.0, "learning_rate": 9.899971554700872e-07, "loss": 0.9278, "step": 22017 }, { "epoch": 0.8614915095077862, "grad_norm": 0.0, "learning_rate": 9.894474795205555e-07, "loss": 1.0275, "step": 22018 }, { "epoch": 0.8615306362000157, "grad_norm": 0.0, "learning_rate": 9.888979482692052e-07, "loss": 0.8671, "step": 22019 }, { "epoch": 0.8615697628922451, "grad_norm": 0.0, "learning_rate": 9.883485617248635e-07, "loss": 0.8803, "step": 22020 }, { "epoch": 0.8616088895844746, "grad_norm": 0.0, "learning_rate": 9.877993198963532e-07, "loss": 1.0108, "step": 22021 }, { "epoch": 0.861648016276704, "grad_norm": 0.0, "learning_rate": 9.872502227924907e-07, "loss": 1.0626, "step": 22022 }, { "epoch": 0.8616871429689335, "grad_norm": 0.0, "learning_rate": 9.867012704220968e-07, "loss": 1.1017, "step": 22023 }, { "epoch": 0.8617262696611628, "grad_norm": 0.0, "learning_rate": 9.861524627939855e-07, "loss": 1.025, "step": 22024 }, { "epoch": 0.8617653963533923, "grad_norm": 0.0, "learning_rate": 9.856037999169731e-07, "loss": 0.9909, "step": 22025 }, { "epoch": 0.8618045230456217, "grad_norm": 0.0, "learning_rate": 9.85055281799866e-07, "loss": 0.8378, "step": 22026 }, { "epoch": 0.8618436497378512, "grad_norm": 0.0, "learning_rate": 9.845069084514746e-07, "loss": 0.9454, "step": 22027 }, { "epoch": 0.8618827764300806, "grad_norm": 0.0, "learning_rate": 9.839586798806044e-07, "loss": 1.043, "step": 22028 }, { "epoch": 0.86192190312231, "grad_norm": 0.0, "learning_rate": 9.834105960960627e-07, "loss": 0.9726, "step": 22029 }, { "epoch": 0.8619610298145395, "grad_norm": 0.0, "learning_rate": 9.828626571066469e-07, "loss": 0.9418, "step": 22030 }, { "epoch": 0.8620001565067689, "grad_norm": 0.0, "learning_rate": 9.823148629211587e-07, "loss": 0.9489, "step": 22031 }, { "epoch": 0.8620392831989984, "grad_norm": 0.0, "learning_rate": 9.817672135483914e-07, "loss": 1.1118, "step": 22032 }, { "epoch": 0.8620784098912277, "grad_norm": 0.0, "learning_rate": 9.812197089971453e-07, "loss": 0.9908, "step": 22033 }, { "epoch": 0.8621175365834572, "grad_norm": 0.0, "learning_rate": 9.806723492762072e-07, "loss": 0.9824, "step": 22034 }, { "epoch": 0.8621566632756866, "grad_norm": 0.0, "learning_rate": 9.801251343943718e-07, "loss": 0.9858, "step": 22035 }, { "epoch": 0.8621957899679161, "grad_norm": 0.0, "learning_rate": 9.795780643604203e-07, "loss": 0.9655, "step": 22036 }, { "epoch": 0.8622349166601455, "grad_norm": 0.0, "learning_rate": 9.790311391831453e-07, "loss": 0.9207, "step": 22037 }, { "epoch": 0.862274043352375, "grad_norm": 0.0, "learning_rate": 9.784843588713255e-07, "loss": 1.013, "step": 22038 }, { "epoch": 0.8623131700446044, "grad_norm": 0.0, "learning_rate": 9.779377234337428e-07, "loss": 1.1321, "step": 22039 }, { "epoch": 0.8623522967368339, "grad_norm": 0.0, "learning_rate": 9.773912328791735e-07, "loss": 1.1049, "step": 22040 }, { "epoch": 0.8623914234290633, "grad_norm": 0.0, "learning_rate": 9.76844887216396e-07, "loss": 0.9769, "step": 22041 }, { "epoch": 0.8624305501212928, "grad_norm": 0.0, "learning_rate": 9.762986864541824e-07, "loss": 0.8754, "step": 22042 }, { "epoch": 0.8624696768135222, "grad_norm": 0.0, "learning_rate": 9.757526306013055e-07, "loss": 0.9804, "step": 22043 }, { "epoch": 0.8625088035057517, "grad_norm": 0.0, "learning_rate": 9.752067196665327e-07, "loss": 1.0369, "step": 22044 }, { "epoch": 0.862547930197981, "grad_norm": 0.0, "learning_rate": 9.746609536586305e-07, "loss": 0.8314, "step": 22045 }, { "epoch": 0.8625870568902105, "grad_norm": 0.0, "learning_rate": 9.74115332586364e-07, "loss": 0.8315, "step": 22046 }, { "epoch": 0.8626261835824399, "grad_norm": 0.0, "learning_rate": 9.735698564584972e-07, "loss": 0.9585, "step": 22047 }, { "epoch": 0.8626653102746694, "grad_norm": 0.0, "learning_rate": 9.730245252837867e-07, "loss": 0.8711, "step": 22048 }, { "epoch": 0.8627044369668988, "grad_norm": 0.0, "learning_rate": 9.724793390709919e-07, "loss": 0.9355, "step": 22049 }, { "epoch": 0.8627435636591283, "grad_norm": 0.0, "learning_rate": 9.71934297828865e-07, "loss": 0.8593, "step": 22050 }, { "epoch": 0.8627826903513577, "grad_norm": 0.0, "learning_rate": 9.713894015661608e-07, "loss": 0.9814, "step": 22051 }, { "epoch": 0.8628218170435872, "grad_norm": 0.0, "learning_rate": 9.70844650291629e-07, "loss": 0.9239, "step": 22052 }, { "epoch": 0.8628609437358166, "grad_norm": 0.0, "learning_rate": 9.703000440140199e-07, "loss": 0.9677, "step": 22053 }, { "epoch": 0.8629000704280461, "grad_norm": 0.0, "learning_rate": 9.697555827420756e-07, "loss": 0.9962, "step": 22054 }, { "epoch": 0.8629391971202754, "grad_norm": 0.0, "learning_rate": 9.69211266484541e-07, "loss": 0.8925, "step": 22055 }, { "epoch": 0.8629783238125049, "grad_norm": 0.0, "learning_rate": 9.686670952501586e-07, "loss": 0.904, "step": 22056 }, { "epoch": 0.8630174505047343, "grad_norm": 0.0, "learning_rate": 9.681230690476651e-07, "loss": 1.0366, "step": 22057 }, { "epoch": 0.8630565771969637, "grad_norm": 0.0, "learning_rate": 9.675791878857966e-07, "loss": 0.876, "step": 22058 }, { "epoch": 0.8630957038891932, "grad_norm": 0.0, "learning_rate": 9.670354517732883e-07, "loss": 1.0618, "step": 22059 }, { "epoch": 0.8631348305814226, "grad_norm": 0.0, "learning_rate": 9.664918607188734e-07, "loss": 0.8608, "step": 22060 }, { "epoch": 0.8631739572736521, "grad_norm": 0.0, "learning_rate": 9.65948414731278e-07, "loss": 0.8843, "step": 22061 }, { "epoch": 0.8632130839658815, "grad_norm": 0.0, "learning_rate": 9.654051138192322e-07, "loss": 0.9148, "step": 22062 }, { "epoch": 0.863252210658111, "grad_norm": 0.0, "learning_rate": 9.648619579914563e-07, "loss": 0.9038, "step": 22063 }, { "epoch": 0.8632913373503404, "grad_norm": 0.0, "learning_rate": 9.643189472566794e-07, "loss": 0.9164, "step": 22064 }, { "epoch": 0.8633304640425699, "grad_norm": 0.0, "learning_rate": 9.637760816236152e-07, "loss": 0.9595, "step": 22065 }, { "epoch": 0.8633695907347992, "grad_norm": 0.0, "learning_rate": 9.63233361100986e-07, "loss": 0.8764, "step": 22066 }, { "epoch": 0.8634087174270287, "grad_norm": 0.0, "learning_rate": 9.626907856975044e-07, "loss": 0.8095, "step": 22067 }, { "epoch": 0.8634478441192581, "grad_norm": 0.0, "learning_rate": 9.621483554218836e-07, "loss": 0.9316, "step": 22068 }, { "epoch": 0.8634869708114876, "grad_norm": 0.0, "learning_rate": 9.616060702828356e-07, "loss": 0.9364, "step": 22069 }, { "epoch": 0.863526097503717, "grad_norm": 0.0, "learning_rate": 9.610639302890701e-07, "loss": 0.9739, "step": 22070 }, { "epoch": 0.8635652241959465, "grad_norm": 0.0, "learning_rate": 9.60521935449289e-07, "loss": 1.0362, "step": 22071 }, { "epoch": 0.8636043508881759, "grad_norm": 0.0, "learning_rate": 9.599800857721986e-07, "loss": 0.9645, "step": 22072 }, { "epoch": 0.8636434775804054, "grad_norm": 0.0, "learning_rate": 9.59438381266501e-07, "loss": 0.9463, "step": 22073 }, { "epoch": 0.8636826042726348, "grad_norm": 0.0, "learning_rate": 9.588968219408967e-07, "loss": 1.1007, "step": 22074 }, { "epoch": 0.8637217309648643, "grad_norm": 0.0, "learning_rate": 9.583554078040769e-07, "loss": 1.0078, "step": 22075 }, { "epoch": 0.8637608576570937, "grad_norm": 0.0, "learning_rate": 9.57814138864742e-07, "loss": 1.0449, "step": 22076 }, { "epoch": 0.8637999843493231, "grad_norm": 0.0, "learning_rate": 9.57273015131579e-07, "loss": 0.9936, "step": 22077 }, { "epoch": 0.8638391110415525, "grad_norm": 0.0, "learning_rate": 9.567320366132826e-07, "loss": 0.9292, "step": 22078 }, { "epoch": 0.863878237733782, "grad_norm": 0.0, "learning_rate": 9.56191203318536e-07, "loss": 0.8108, "step": 22079 }, { "epoch": 0.8639173644260114, "grad_norm": 0.0, "learning_rate": 9.556505152560292e-07, "loss": 1.0208, "step": 22080 }, { "epoch": 0.8639564911182409, "grad_norm": 0.0, "learning_rate": 9.55109972434437e-07, "loss": 0.9151, "step": 22081 }, { "epoch": 0.8639956178104703, "grad_norm": 0.0, "learning_rate": 9.545695748624484e-07, "loss": 0.9759, "step": 22082 }, { "epoch": 0.8640347445026998, "grad_norm": 0.0, "learning_rate": 9.540293225487363e-07, "loss": 0.9008, "step": 22083 }, { "epoch": 0.8640738711949292, "grad_norm": 0.0, "learning_rate": 9.534892155019803e-07, "loss": 0.893, "step": 22084 }, { "epoch": 0.8641129978871587, "grad_norm": 0.0, "learning_rate": 9.529492537308483e-07, "loss": 0.8859, "step": 22085 }, { "epoch": 0.8641521245793881, "grad_norm": 0.0, "learning_rate": 9.524094372440174e-07, "loss": 0.9079, "step": 22086 }, { "epoch": 0.8641912512716174, "grad_norm": 0.0, "learning_rate": 9.518697660501519e-07, "loss": 0.861, "step": 22087 }, { "epoch": 0.8642303779638469, "grad_norm": 0.0, "learning_rate": 9.513302401579217e-07, "loss": 0.9632, "step": 22088 }, { "epoch": 0.8642695046560763, "grad_norm": 0.0, "learning_rate": 9.507908595759885e-07, "loss": 0.8423, "step": 22089 }, { "epoch": 0.8643086313483058, "grad_norm": 0.0, "learning_rate": 9.502516243130133e-07, "loss": 0.9836, "step": 22090 }, { "epoch": 0.8643477580405352, "grad_norm": 0.0, "learning_rate": 9.497125343776581e-07, "loss": 0.9991, "step": 22091 }, { "epoch": 0.8643868847327647, "grad_norm": 0.0, "learning_rate": 9.491735897785804e-07, "loss": 0.9615, "step": 22092 }, { "epoch": 0.8644260114249941, "grad_norm": 0.0, "learning_rate": 9.48634790524432e-07, "loss": 0.9549, "step": 22093 }, { "epoch": 0.8644651381172236, "grad_norm": 0.0, "learning_rate": 9.480961366238662e-07, "loss": 1.0062, "step": 22094 }, { "epoch": 0.864504264809453, "grad_norm": 0.0, "learning_rate": 9.47557628085537e-07, "loss": 0.9515, "step": 22095 }, { "epoch": 0.8645433915016825, "grad_norm": 0.0, "learning_rate": 9.470192649180853e-07, "loss": 1.0318, "step": 22096 }, { "epoch": 0.8645825181939119, "grad_norm": 0.0, "learning_rate": 9.46481047130161e-07, "loss": 0.8978, "step": 22097 }, { "epoch": 0.8646216448861413, "grad_norm": 0.0, "learning_rate": 9.459429747304094e-07, "loss": 0.8798, "step": 22098 }, { "epoch": 0.8646607715783707, "grad_norm": 0.0, "learning_rate": 9.454050477274646e-07, "loss": 0.9925, "step": 22099 }, { "epoch": 0.8646998982706002, "grad_norm": 0.0, "learning_rate": 9.448672661299696e-07, "loss": 0.9278, "step": 22100 }, { "epoch": 0.8647390249628296, "grad_norm": 0.0, "learning_rate": 9.443296299465609e-07, "loss": 0.9603, "step": 22101 }, { "epoch": 0.8647781516550591, "grad_norm": 0.0, "learning_rate": 9.437921391858696e-07, "loss": 0.9986, "step": 22102 }, { "epoch": 0.8648172783472885, "grad_norm": 0.0, "learning_rate": 9.432547938565285e-07, "loss": 1.0144, "step": 22103 }, { "epoch": 0.864856405039518, "grad_norm": 0.0, "learning_rate": 9.427175939671662e-07, "loss": 0.8485, "step": 22104 }, { "epoch": 0.8648955317317474, "grad_norm": 0.0, "learning_rate": 9.421805395264127e-07, "loss": 0.9322, "step": 22105 }, { "epoch": 0.8649346584239769, "grad_norm": 0.0, "learning_rate": 9.416436305428867e-07, "loss": 0.94, "step": 22106 }, { "epoch": 0.8649737851162063, "grad_norm": 0.0, "learning_rate": 9.411068670252144e-07, "loss": 0.9611, "step": 22107 }, { "epoch": 0.8650129118084358, "grad_norm": 0.0, "learning_rate": 9.405702489820135e-07, "loss": 0.8777, "step": 22108 }, { "epoch": 0.8650520385006651, "grad_norm": 0.0, "learning_rate": 9.400337764219036e-07, "loss": 0.9048, "step": 22109 }, { "epoch": 0.8650911651928946, "grad_norm": 0.0, "learning_rate": 9.394974493534981e-07, "loss": 0.9403, "step": 22110 }, { "epoch": 0.865130291885124, "grad_norm": 0.0, "learning_rate": 9.38961267785411e-07, "loss": 0.885, "step": 22111 }, { "epoch": 0.8651694185773535, "grad_norm": 0.0, "learning_rate": 9.384252317262487e-07, "loss": 1.0367, "step": 22112 }, { "epoch": 0.8652085452695829, "grad_norm": 0.0, "learning_rate": 9.378893411846257e-07, "loss": 1.0453, "step": 22113 }, { "epoch": 0.8652476719618124, "grad_norm": 0.0, "learning_rate": 9.373535961691427e-07, "loss": 0.9581, "step": 22114 }, { "epoch": 0.8652867986540418, "grad_norm": 0.0, "learning_rate": 9.368179966884062e-07, "loss": 0.8948, "step": 22115 }, { "epoch": 0.8653259253462712, "grad_norm": 0.0, "learning_rate": 9.362825427510147e-07, "loss": 0.8124, "step": 22116 }, { "epoch": 0.8653650520385007, "grad_norm": 0.0, "learning_rate": 9.357472343655682e-07, "loss": 0.9052, "step": 22117 }, { "epoch": 0.8654041787307301, "grad_norm": 0.0, "learning_rate": 9.352120715406621e-07, "loss": 0.9815, "step": 22118 }, { "epoch": 0.8654433054229596, "grad_norm": 0.0, "learning_rate": 9.346770542848937e-07, "loss": 1.0095, "step": 22119 }, { "epoch": 0.8654824321151889, "grad_norm": 0.0, "learning_rate": 9.341421826068508e-07, "loss": 0.9327, "step": 22120 }, { "epoch": 0.8655215588074184, "grad_norm": 0.0, "learning_rate": 9.336074565151232e-07, "loss": 0.9462, "step": 22121 }, { "epoch": 0.8655606854996478, "grad_norm": 0.0, "learning_rate": 9.330728760183006e-07, "loss": 0.9095, "step": 22122 }, { "epoch": 0.8655998121918773, "grad_norm": 0.0, "learning_rate": 9.325384411249672e-07, "loss": 0.9168, "step": 22123 }, { "epoch": 0.8656389388841067, "grad_norm": 0.0, "learning_rate": 9.320041518437017e-07, "loss": 0.8715, "step": 22124 }, { "epoch": 0.8656780655763362, "grad_norm": 0.0, "learning_rate": 9.314700081830896e-07, "loss": 0.8849, "step": 22125 }, { "epoch": 0.8657171922685656, "grad_norm": 0.0, "learning_rate": 9.309360101517007e-07, "loss": 1.0113, "step": 22126 }, { "epoch": 0.8657563189607951, "grad_norm": 0.0, "learning_rate": 9.304021577581201e-07, "loss": 0.9761, "step": 22127 }, { "epoch": 0.8657954456530245, "grad_norm": 0.0, "learning_rate": 9.298684510109146e-07, "loss": 0.9654, "step": 22128 }, { "epoch": 0.865834572345254, "grad_norm": 0.0, "learning_rate": 9.293348899186572e-07, "loss": 1.0168, "step": 22129 }, { "epoch": 0.8658736990374833, "grad_norm": 0.0, "learning_rate": 9.288014744899121e-07, "loss": 0.9859, "step": 22130 }, { "epoch": 0.8659128257297128, "grad_norm": 0.0, "learning_rate": 9.282682047332514e-07, "loss": 1.0022, "step": 22131 }, { "epoch": 0.8659519524219422, "grad_norm": 0.0, "learning_rate": 9.277350806572338e-07, "loss": 1.1069, "step": 22132 }, { "epoch": 0.8659910791141717, "grad_norm": 0.0, "learning_rate": 9.272021022704258e-07, "loss": 0.8915, "step": 22133 }, { "epoch": 0.8660302058064011, "grad_norm": 0.0, "learning_rate": 9.266692695813806e-07, "loss": 0.9041, "step": 22134 }, { "epoch": 0.8660693324986306, "grad_norm": 0.0, "learning_rate": 9.26136582598658e-07, "loss": 1.0853, "step": 22135 }, { "epoch": 0.86610845919086, "grad_norm": 0.0, "learning_rate": 9.25604041330811e-07, "loss": 1.036, "step": 22136 }, { "epoch": 0.8661475858830895, "grad_norm": 0.0, "learning_rate": 9.25071645786394e-07, "loss": 0.9812, "step": 22137 }, { "epoch": 0.8661867125753189, "grad_norm": 0.0, "learning_rate": 9.245393959739535e-07, "loss": 0.9918, "step": 22138 }, { "epoch": 0.8662258392675484, "grad_norm": 0.0, "learning_rate": 9.240072919020371e-07, "loss": 0.9054, "step": 22139 }, { "epoch": 0.8662649659597778, "grad_norm": 0.0, "learning_rate": 9.234753335791913e-07, "loss": 0.9237, "step": 22140 }, { "epoch": 0.8663040926520073, "grad_norm": 0.0, "learning_rate": 9.229435210139604e-07, "loss": 0.9712, "step": 22141 }, { "epoch": 0.8663432193442366, "grad_norm": 0.0, "learning_rate": 9.224118542148808e-07, "loss": 0.9529, "step": 22142 }, { "epoch": 0.866382346036466, "grad_norm": 0.0, "learning_rate": 9.218803331904913e-07, "loss": 1.0819, "step": 22143 }, { "epoch": 0.8664214727286955, "grad_norm": 0.0, "learning_rate": 9.213489579493295e-07, "loss": 0.9435, "step": 22144 }, { "epoch": 0.8664605994209249, "grad_norm": 0.0, "learning_rate": 9.208177284999264e-07, "loss": 0.9976, "step": 22145 }, { "epoch": 0.8664997261131544, "grad_norm": 0.0, "learning_rate": 9.20286644850813e-07, "loss": 0.846, "step": 22146 }, { "epoch": 0.8665388528053838, "grad_norm": 0.0, "learning_rate": 9.197557070105212e-07, "loss": 0.9174, "step": 22147 }, { "epoch": 0.8665779794976133, "grad_norm": 0.0, "learning_rate": 9.19224914987572e-07, "loss": 0.9652, "step": 22148 }, { "epoch": 0.8666171061898427, "grad_norm": 0.0, "learning_rate": 9.186942687904921e-07, "loss": 0.8668, "step": 22149 }, { "epoch": 0.8666562328820722, "grad_norm": 0.0, "learning_rate": 9.181637684278044e-07, "loss": 0.945, "step": 22150 }, { "epoch": 0.8666953595743015, "grad_norm": 0.0, "learning_rate": 9.176334139080257e-07, "loss": 0.9377, "step": 22151 }, { "epoch": 0.866734486266531, "grad_norm": 0.0, "learning_rate": 9.171032052396723e-07, "loss": 0.937, "step": 22152 }, { "epoch": 0.8667736129587604, "grad_norm": 0.0, "learning_rate": 9.165731424312596e-07, "loss": 1.0402, "step": 22153 }, { "epoch": 0.8668127396509899, "grad_norm": 0.0, "learning_rate": 9.160432254913032e-07, "loss": 0.9771, "step": 22154 }, { "epoch": 0.8668518663432193, "grad_norm": 0.0, "learning_rate": 9.155134544283062e-07, "loss": 0.9601, "step": 22155 }, { "epoch": 0.8668909930354488, "grad_norm": 0.0, "learning_rate": 9.149838292507829e-07, "loss": 0.9729, "step": 22156 }, { "epoch": 0.8669301197276782, "grad_norm": 0.0, "learning_rate": 9.144543499672309e-07, "loss": 0.9054, "step": 22157 }, { "epoch": 0.8669692464199077, "grad_norm": 0.0, "learning_rate": 9.139250165861613e-07, "loss": 0.8503, "step": 22158 }, { "epoch": 0.8670083731121371, "grad_norm": 0.0, "learning_rate": 9.133958291160683e-07, "loss": 0.9092, "step": 22159 }, { "epoch": 0.8670474998043666, "grad_norm": 0.0, "learning_rate": 9.12866787565454e-07, "loss": 0.956, "step": 22160 }, { "epoch": 0.867086626496596, "grad_norm": 0.0, "learning_rate": 9.123378919428083e-07, "loss": 0.9586, "step": 22161 }, { "epoch": 0.8671257531888255, "grad_norm": 0.0, "learning_rate": 9.118091422566333e-07, "loss": 1.0112, "step": 22162 }, { "epoch": 0.8671648798810548, "grad_norm": 0.0, "learning_rate": 9.112805385154122e-07, "loss": 0.9671, "step": 22163 }, { "epoch": 0.8672040065732843, "grad_norm": 0.0, "learning_rate": 9.107520807276394e-07, "loss": 0.8661, "step": 22164 }, { "epoch": 0.8672431332655137, "grad_norm": 0.0, "learning_rate": 9.102237689017967e-07, "loss": 0.9669, "step": 22165 }, { "epoch": 0.8672822599577432, "grad_norm": 0.0, "learning_rate": 9.0969560304637e-07, "loss": 0.9034, "step": 22166 }, { "epoch": 0.8673213866499726, "grad_norm": 0.0, "learning_rate": 9.09167583169841e-07, "loss": 0.9992, "step": 22167 }, { "epoch": 0.8673605133422021, "grad_norm": 0.0, "learning_rate": 9.08639709280692e-07, "loss": 0.8528, "step": 22168 }, { "epoch": 0.8673996400344315, "grad_norm": 0.0, "learning_rate": 9.08111981387394e-07, "loss": 0.954, "step": 22169 }, { "epoch": 0.867438766726661, "grad_norm": 0.0, "learning_rate": 9.075843994984257e-07, "loss": 0.9264, "step": 22170 }, { "epoch": 0.8674778934188904, "grad_norm": 0.0, "learning_rate": 9.070569636222582e-07, "loss": 0.9822, "step": 22171 }, { "epoch": 0.8675170201111198, "grad_norm": 0.0, "learning_rate": 9.065296737673634e-07, "loss": 1.0601, "step": 22172 }, { "epoch": 0.8675561468033492, "grad_norm": 0.0, "learning_rate": 9.060025299422059e-07, "loss": 0.9975, "step": 22173 }, { "epoch": 0.8675952734955786, "grad_norm": 0.0, "learning_rate": 9.054755321552533e-07, "loss": 0.9863, "step": 22174 }, { "epoch": 0.8676344001878081, "grad_norm": 0.0, "learning_rate": 9.049486804149655e-07, "loss": 0.9965, "step": 22175 }, { "epoch": 0.8676735268800375, "grad_norm": 0.0, "learning_rate": 9.044219747298078e-07, "loss": 1.0634, "step": 22176 }, { "epoch": 0.867712653572267, "grad_norm": 0.0, "learning_rate": 9.038954151082336e-07, "loss": 0.9501, "step": 22177 }, { "epoch": 0.8677517802644964, "grad_norm": 0.0, "learning_rate": 9.033690015587038e-07, "loss": 0.9586, "step": 22178 }, { "epoch": 0.8677909069567259, "grad_norm": 0.0, "learning_rate": 9.028427340896639e-07, "loss": 0.9726, "step": 22179 }, { "epoch": 0.8678300336489553, "grad_norm": 0.0, "learning_rate": 9.02316612709575e-07, "loss": 0.8803, "step": 22180 }, { "epoch": 0.8678691603411848, "grad_norm": 0.0, "learning_rate": 9.017906374268804e-07, "loss": 1.0248, "step": 22181 }, { "epoch": 0.8679082870334142, "grad_norm": 0.0, "learning_rate": 9.012648082500275e-07, "loss": 0.924, "step": 22182 }, { "epoch": 0.8679474137256437, "grad_norm": 0.0, "learning_rate": 9.007391251874598e-07, "loss": 0.8113, "step": 22183 }, { "epoch": 0.867986540417873, "grad_norm": 0.0, "learning_rate": 9.002135882476193e-07, "loss": 0.9631, "step": 22184 }, { "epoch": 0.8680256671101025, "grad_norm": 0.0, "learning_rate": 8.99688197438946e-07, "loss": 0.9355, "step": 22185 }, { "epoch": 0.8680647938023319, "grad_norm": 0.0, "learning_rate": 8.991629527698786e-07, "loss": 0.9126, "step": 22186 }, { "epoch": 0.8681039204945614, "grad_norm": 0.0, "learning_rate": 8.986378542488483e-07, "loss": 1.0268, "step": 22187 }, { "epoch": 0.8681430471867908, "grad_norm": 0.0, "learning_rate": 8.981129018842893e-07, "loss": 0.9198, "step": 22188 }, { "epoch": 0.8681821738790203, "grad_norm": 0.0, "learning_rate": 8.97588095684635e-07, "loss": 0.9393, "step": 22189 }, { "epoch": 0.8682213005712497, "grad_norm": 0.0, "learning_rate": 8.970634356583064e-07, "loss": 0.9863, "step": 22190 }, { "epoch": 0.8682604272634792, "grad_norm": 0.0, "learning_rate": 8.965389218137333e-07, "loss": 1.087, "step": 22191 }, { "epoch": 0.8682995539557086, "grad_norm": 0.0, "learning_rate": 8.96014554159339e-07, "loss": 0.9799, "step": 22192 }, { "epoch": 0.8683386806479381, "grad_norm": 0.0, "learning_rate": 8.954903327035414e-07, "loss": 0.8736, "step": 22193 }, { "epoch": 0.8683778073401675, "grad_norm": 0.0, "learning_rate": 8.949662574547613e-07, "loss": 0.9642, "step": 22194 }, { "epoch": 0.868416934032397, "grad_norm": 0.0, "learning_rate": 8.944423284214143e-07, "loss": 0.8761, "step": 22195 }, { "epoch": 0.8684560607246263, "grad_norm": 0.0, "learning_rate": 8.939185456119126e-07, "loss": 1.0389, "step": 22196 }, { "epoch": 0.8684951874168558, "grad_norm": 0.0, "learning_rate": 8.933949090346683e-07, "loss": 0.9186, "step": 22197 }, { "epoch": 0.8685343141090852, "grad_norm": 0.0, "learning_rate": 8.928714186980913e-07, "loss": 0.9985, "step": 22198 }, { "epoch": 0.8685734408013147, "grad_norm": 0.0, "learning_rate": 8.923480746105884e-07, "loss": 1.0219, "step": 22199 }, { "epoch": 0.8686125674935441, "grad_norm": 0.0, "learning_rate": 8.918248767805615e-07, "loss": 0.9875, "step": 22200 }, { "epoch": 0.8686516941857735, "grad_norm": 0.0, "learning_rate": 8.91301825216413e-07, "loss": 1.0052, "step": 22201 }, { "epoch": 0.868690820878003, "grad_norm": 0.0, "learning_rate": 8.907789199265449e-07, "loss": 0.8929, "step": 22202 }, { "epoch": 0.8687299475702324, "grad_norm": 0.0, "learning_rate": 8.902561609193539e-07, "loss": 0.9529, "step": 22203 }, { "epoch": 0.8687690742624619, "grad_norm": 0.0, "learning_rate": 8.89733548203231e-07, "loss": 0.9565, "step": 22204 }, { "epoch": 0.8688082009546912, "grad_norm": 0.0, "learning_rate": 8.892110817865751e-07, "loss": 0.91, "step": 22205 }, { "epoch": 0.8688473276469207, "grad_norm": 0.0, "learning_rate": 8.886887616777673e-07, "loss": 0.9837, "step": 22206 }, { "epoch": 0.8688864543391501, "grad_norm": 0.0, "learning_rate": 8.881665878852063e-07, "loss": 1.0266, "step": 22207 }, { "epoch": 0.8689255810313796, "grad_norm": 0.0, "learning_rate": 8.87644560417269e-07, "loss": 0.9656, "step": 22208 }, { "epoch": 0.868964707723609, "grad_norm": 0.0, "learning_rate": 8.87122679282344e-07, "loss": 0.9293, "step": 22209 }, { "epoch": 0.8690038344158385, "grad_norm": 0.0, "learning_rate": 8.866009444888057e-07, "loss": 0.8358, "step": 22210 }, { "epoch": 0.8690429611080679, "grad_norm": 0.0, "learning_rate": 8.860793560450409e-07, "loss": 0.9417, "step": 22211 }, { "epoch": 0.8690820878002974, "grad_norm": 0.0, "learning_rate": 8.855579139594184e-07, "loss": 1.0053, "step": 22212 }, { "epoch": 0.8691212144925268, "grad_norm": 0.0, "learning_rate": 8.85036618240318e-07, "loss": 1.1019, "step": 22213 }, { "epoch": 0.8691603411847563, "grad_norm": 0.0, "learning_rate": 8.845154688961044e-07, "loss": 0.9489, "step": 22214 }, { "epoch": 0.8691994678769857, "grad_norm": 0.0, "learning_rate": 8.839944659351507e-07, "loss": 1.0828, "step": 22215 }, { "epoch": 0.8692385945692152, "grad_norm": 0.0, "learning_rate": 8.834736093658237e-07, "loss": 0.9106, "step": 22216 }, { "epoch": 0.8692777212614445, "grad_norm": 0.0, "learning_rate": 8.829528991964875e-07, "loss": 0.9889, "step": 22217 }, { "epoch": 0.869316847953674, "grad_norm": 0.0, "learning_rate": 8.824323354355024e-07, "loss": 0.9498, "step": 22218 }, { "epoch": 0.8693559746459034, "grad_norm": 0.0, "learning_rate": 8.819119180912283e-07, "loss": 1.0519, "step": 22219 }, { "epoch": 0.8693951013381329, "grad_norm": 0.0, "learning_rate": 8.81391647172024e-07, "loss": 1.0462, "step": 22220 }, { "epoch": 0.8694342280303623, "grad_norm": 0.0, "learning_rate": 8.808715226862452e-07, "loss": 1.0011, "step": 22221 }, { "epoch": 0.8694733547225918, "grad_norm": 0.0, "learning_rate": 8.803515446422406e-07, "loss": 0.9581, "step": 22222 }, { "epoch": 0.8695124814148212, "grad_norm": 0.0, "learning_rate": 8.798317130483647e-07, "loss": 0.9521, "step": 22223 }, { "epoch": 0.8695516081070507, "grad_norm": 0.0, "learning_rate": 8.793120279129596e-07, "loss": 0.8593, "step": 22224 }, { "epoch": 0.8695907347992801, "grad_norm": 0.0, "learning_rate": 8.787924892443789e-07, "loss": 1.0127, "step": 22225 }, { "epoch": 0.8696298614915096, "grad_norm": 0.0, "learning_rate": 8.782730970509589e-07, "loss": 0.9729, "step": 22226 }, { "epoch": 0.869668988183739, "grad_norm": 0.0, "learning_rate": 8.777538513410445e-07, "loss": 0.8212, "step": 22227 }, { "epoch": 0.8697081148759684, "grad_norm": 0.0, "learning_rate": 8.772347521229696e-07, "loss": 0.9355, "step": 22228 }, { "epoch": 0.8697472415681978, "grad_norm": 0.0, "learning_rate": 8.767157994050746e-07, "loss": 0.8959, "step": 22229 }, { "epoch": 0.8697863682604272, "grad_norm": 0.0, "learning_rate": 8.761969931956915e-07, "loss": 0.8654, "step": 22230 }, { "epoch": 0.8698254949526567, "grad_norm": 0.0, "learning_rate": 8.756783335031538e-07, "loss": 0.8414, "step": 22231 }, { "epoch": 0.8698646216448861, "grad_norm": 0.0, "learning_rate": 8.751598203357858e-07, "loss": 0.885, "step": 22232 }, { "epoch": 0.8699037483371156, "grad_norm": 0.0, "learning_rate": 8.746414537019177e-07, "loss": 0.958, "step": 22233 }, { "epoch": 0.869942875029345, "grad_norm": 0.0, "learning_rate": 8.741232336098749e-07, "loss": 0.9976, "step": 22234 }, { "epoch": 0.8699820017215745, "grad_norm": 0.0, "learning_rate": 8.736051600679763e-07, "loss": 1.1742, "step": 22235 }, { "epoch": 0.8700211284138039, "grad_norm": 0.0, "learning_rate": 8.73087233084542e-07, "loss": 1.0298, "step": 22236 }, { "epoch": 0.8700602551060334, "grad_norm": 0.0, "learning_rate": 8.725694526678907e-07, "loss": 0.9997, "step": 22237 }, { "epoch": 0.8700993817982627, "grad_norm": 0.0, "learning_rate": 8.720518188263382e-07, "loss": 1.0021, "step": 22238 }, { "epoch": 0.8701385084904922, "grad_norm": 0.0, "learning_rate": 8.715343315681945e-07, "loss": 0.8769, "step": 22239 }, { "epoch": 0.8701776351827216, "grad_norm": 0.0, "learning_rate": 8.710169909017718e-07, "loss": 0.9408, "step": 22240 }, { "epoch": 0.8702167618749511, "grad_norm": 0.0, "learning_rate": 8.704997968353746e-07, "loss": 0.9411, "step": 22241 }, { "epoch": 0.8702558885671805, "grad_norm": 0.0, "learning_rate": 8.699827493773116e-07, "loss": 0.8741, "step": 22242 }, { "epoch": 0.87029501525941, "grad_norm": 0.0, "learning_rate": 8.694658485358853e-07, "loss": 0.8603, "step": 22243 }, { "epoch": 0.8703341419516394, "grad_norm": 0.0, "learning_rate": 8.689490943193979e-07, "loss": 0.9026, "step": 22244 }, { "epoch": 0.8703732686438689, "grad_norm": 0.0, "learning_rate": 8.684324867361438e-07, "loss": 0.8864, "step": 22245 }, { "epoch": 0.8704123953360983, "grad_norm": 0.0, "learning_rate": 8.679160257944219e-07, "loss": 0.8762, "step": 22246 }, { "epoch": 0.8704515220283278, "grad_norm": 0.0, "learning_rate": 8.673997115025257e-07, "loss": 0.9593, "step": 22247 }, { "epoch": 0.8704906487205571, "grad_norm": 0.0, "learning_rate": 8.668835438687484e-07, "loss": 0.8774, "step": 22248 }, { "epoch": 0.8705297754127866, "grad_norm": 0.0, "learning_rate": 8.663675229013746e-07, "loss": 0.8954, "step": 22249 }, { "epoch": 0.870568902105016, "grad_norm": 0.0, "learning_rate": 8.658516486086943e-07, "loss": 0.9866, "step": 22250 }, { "epoch": 0.8706080287972455, "grad_norm": 0.0, "learning_rate": 8.653359209989887e-07, "loss": 1.0682, "step": 22251 }, { "epoch": 0.8706471554894749, "grad_norm": 0.0, "learning_rate": 8.648203400805444e-07, "loss": 0.9969, "step": 22252 }, { "epoch": 0.8706862821817044, "grad_norm": 0.0, "learning_rate": 8.643049058616371e-07, "loss": 0.8743, "step": 22253 }, { "epoch": 0.8707254088739338, "grad_norm": 0.0, "learning_rate": 8.637896183505467e-07, "loss": 0.9736, "step": 22254 }, { "epoch": 0.8707645355661633, "grad_norm": 0.0, "learning_rate": 8.632744775555435e-07, "loss": 1.0621, "step": 22255 }, { "epoch": 0.8708036622583927, "grad_norm": 0.0, "learning_rate": 8.627594834849073e-07, "loss": 1.0595, "step": 22256 }, { "epoch": 0.8708427889506221, "grad_norm": 0.0, "learning_rate": 8.622446361469017e-07, "loss": 1.0558, "step": 22257 }, { "epoch": 0.8708819156428516, "grad_norm": 0.0, "learning_rate": 8.617299355497988e-07, "loss": 0.9772, "step": 22258 }, { "epoch": 0.8709210423350809, "grad_norm": 0.0, "learning_rate": 8.612153817018598e-07, "loss": 0.8757, "step": 22259 }, { "epoch": 0.8709601690273104, "grad_norm": 0.0, "learning_rate": 8.607009746113526e-07, "loss": 0.9692, "step": 22260 }, { "epoch": 0.8709992957195398, "grad_norm": 0.0, "learning_rate": 8.601867142865339e-07, "loss": 0.8951, "step": 22261 }, { "epoch": 0.8710384224117693, "grad_norm": 0.0, "learning_rate": 8.596726007356659e-07, "loss": 0.9921, "step": 22262 }, { "epoch": 0.8710775491039987, "grad_norm": 0.0, "learning_rate": 8.59158633967e-07, "loss": 0.8509, "step": 22263 }, { "epoch": 0.8711166757962282, "grad_norm": 0.0, "learning_rate": 8.586448139887927e-07, "loss": 0.9026, "step": 22264 }, { "epoch": 0.8711558024884576, "grad_norm": 0.0, "learning_rate": 8.581311408092952e-07, "loss": 0.9919, "step": 22265 }, { "epoch": 0.8711949291806871, "grad_norm": 0.0, "learning_rate": 8.576176144367576e-07, "loss": 0.9106, "step": 22266 }, { "epoch": 0.8712340558729165, "grad_norm": 0.0, "learning_rate": 8.571042348794234e-07, "loss": 1.0081, "step": 22267 }, { "epoch": 0.871273182565146, "grad_norm": 0.0, "learning_rate": 8.565910021455393e-07, "loss": 0.907, "step": 22268 }, { "epoch": 0.8713123092573754, "grad_norm": 0.0, "learning_rate": 8.560779162433452e-07, "loss": 0.9922, "step": 22269 }, { "epoch": 0.8713514359496048, "grad_norm": 0.0, "learning_rate": 8.555649771810837e-07, "loss": 0.9833, "step": 22270 }, { "epoch": 0.8713905626418342, "grad_norm": 0.0, "learning_rate": 8.550521849669891e-07, "loss": 0.9883, "step": 22271 }, { "epoch": 0.8714296893340637, "grad_norm": 0.0, "learning_rate": 8.545395396092981e-07, "loss": 1.0444, "step": 22272 }, { "epoch": 0.8714688160262931, "grad_norm": 0.0, "learning_rate": 8.54027041116241e-07, "loss": 0.9682, "step": 22273 }, { "epoch": 0.8715079427185226, "grad_norm": 0.0, "learning_rate": 8.535146894960488e-07, "loss": 0.8552, "step": 22274 }, { "epoch": 0.871547069410752, "grad_norm": 0.0, "learning_rate": 8.530024847569496e-07, "loss": 0.9568, "step": 22275 }, { "epoch": 0.8715861961029815, "grad_norm": 0.0, "learning_rate": 8.524904269071698e-07, "loss": 0.9671, "step": 22276 }, { "epoch": 0.8716253227952109, "grad_norm": 0.0, "learning_rate": 8.519785159549299e-07, "loss": 0.9531, "step": 22277 }, { "epoch": 0.8716644494874404, "grad_norm": 0.0, "learning_rate": 8.514667519084518e-07, "loss": 1.0434, "step": 22278 }, { "epoch": 0.8717035761796698, "grad_norm": 0.0, "learning_rate": 8.509551347759559e-07, "loss": 0.8155, "step": 22279 }, { "epoch": 0.8717427028718993, "grad_norm": 0.0, "learning_rate": 8.504436645656545e-07, "loss": 0.7421, "step": 22280 }, { "epoch": 0.8717818295641286, "grad_norm": 0.0, "learning_rate": 8.49932341285763e-07, "loss": 0.8067, "step": 22281 }, { "epoch": 0.8718209562563581, "grad_norm": 0.0, "learning_rate": 8.494211649444917e-07, "loss": 1.1249, "step": 22282 }, { "epoch": 0.8718600829485875, "grad_norm": 0.0, "learning_rate": 8.489101355500529e-07, "loss": 0.9594, "step": 22283 }, { "epoch": 0.871899209640817, "grad_norm": 0.0, "learning_rate": 8.483992531106477e-07, "loss": 0.823, "step": 22284 }, { "epoch": 0.8719383363330464, "grad_norm": 0.0, "learning_rate": 8.478885176344853e-07, "loss": 1.0241, "step": 22285 }, { "epoch": 0.8719774630252758, "grad_norm": 0.0, "learning_rate": 8.473779291297612e-07, "loss": 1.0345, "step": 22286 }, { "epoch": 0.8720165897175053, "grad_norm": 0.0, "learning_rate": 8.468674876046823e-07, "loss": 0.9122, "step": 22287 }, { "epoch": 0.8720557164097347, "grad_norm": 0.0, "learning_rate": 8.463571930674397e-07, "loss": 0.9725, "step": 22288 }, { "epoch": 0.8720948431019642, "grad_norm": 0.0, "learning_rate": 8.458470455262335e-07, "loss": 0.9373, "step": 22289 }, { "epoch": 0.8721339697941936, "grad_norm": 0.0, "learning_rate": 8.453370449892506e-07, "loss": 0.9876, "step": 22290 }, { "epoch": 0.872173096486423, "grad_norm": 0.0, "learning_rate": 8.448271914646822e-07, "loss": 1.1749, "step": 22291 }, { "epoch": 0.8722122231786524, "grad_norm": 0.0, "learning_rate": 8.443174849607183e-07, "loss": 0.9806, "step": 22292 }, { "epoch": 0.8722513498708819, "grad_norm": 0.0, "learning_rate": 8.438079254855447e-07, "loss": 1.0372, "step": 22293 }, { "epoch": 0.8722904765631113, "grad_norm": 0.0, "learning_rate": 8.432985130473403e-07, "loss": 0.9332, "step": 22294 }, { "epoch": 0.8723296032553408, "grad_norm": 0.0, "learning_rate": 8.427892476542876e-07, "loss": 0.9482, "step": 22295 }, { "epoch": 0.8723687299475702, "grad_norm": 0.0, "learning_rate": 8.422801293145655e-07, "loss": 0.9288, "step": 22296 }, { "epoch": 0.8724078566397997, "grad_norm": 0.0, "learning_rate": 8.417711580363508e-07, "loss": 1.0655, "step": 22297 }, { "epoch": 0.8724469833320291, "grad_norm": 0.0, "learning_rate": 8.412623338278125e-07, "loss": 1.013, "step": 22298 }, { "epoch": 0.8724861100242586, "grad_norm": 0.0, "learning_rate": 8.407536566971275e-07, "loss": 0.9258, "step": 22299 }, { "epoch": 0.872525236716488, "grad_norm": 0.0, "learning_rate": 8.402451266524581e-07, "loss": 0.961, "step": 22300 }, { "epoch": 0.8725643634087175, "grad_norm": 0.0, "learning_rate": 8.397367437019777e-07, "loss": 0.9919, "step": 22301 }, { "epoch": 0.8726034901009468, "grad_norm": 0.0, "learning_rate": 8.392285078538453e-07, "loss": 0.9692, "step": 22302 }, { "epoch": 0.8726426167931763, "grad_norm": 0.0, "learning_rate": 8.387204191162246e-07, "loss": 0.9336, "step": 22303 }, { "epoch": 0.8726817434854057, "grad_norm": 0.0, "learning_rate": 8.38212477497271e-07, "loss": 0.8862, "step": 22304 }, { "epoch": 0.8727208701776352, "grad_norm": 0.0, "learning_rate": 8.377046830051494e-07, "loss": 0.9612, "step": 22305 }, { "epoch": 0.8727599968698646, "grad_norm": 0.0, "learning_rate": 8.371970356480064e-07, "loss": 1.0095, "step": 22306 }, { "epoch": 0.8727991235620941, "grad_norm": 0.0, "learning_rate": 8.366895354339999e-07, "loss": 0.9787, "step": 22307 }, { "epoch": 0.8728382502543235, "grad_norm": 0.0, "learning_rate": 8.361821823712757e-07, "loss": 1.0415, "step": 22308 }, { "epoch": 0.872877376946553, "grad_norm": 0.0, "learning_rate": 8.356749764679816e-07, "loss": 0.9037, "step": 22309 }, { "epoch": 0.8729165036387824, "grad_norm": 0.0, "learning_rate": 8.351679177322647e-07, "loss": 1.1341, "step": 22310 }, { "epoch": 0.8729556303310119, "grad_norm": 0.0, "learning_rate": 8.346610061722682e-07, "loss": 0.8856, "step": 22311 }, { "epoch": 0.8729947570232413, "grad_norm": 0.0, "learning_rate": 8.341542417961301e-07, "loss": 0.8675, "step": 22312 }, { "epoch": 0.8730338837154707, "grad_norm": 0.0, "learning_rate": 8.336476246119884e-07, "loss": 0.9735, "step": 22313 }, { "epoch": 0.8730730104077001, "grad_norm": 0.0, "learning_rate": 8.3314115462798e-07, "loss": 1.098, "step": 22314 }, { "epoch": 0.8731121370999295, "grad_norm": 0.0, "learning_rate": 8.326348318522404e-07, "loss": 1.0031, "step": 22315 }, { "epoch": 0.873151263792159, "grad_norm": 0.0, "learning_rate": 8.321286562928954e-07, "loss": 0.8869, "step": 22316 }, { "epoch": 0.8731903904843884, "grad_norm": 0.0, "learning_rate": 8.316226279580775e-07, "loss": 1.0378, "step": 22317 }, { "epoch": 0.8732295171766179, "grad_norm": 0.0, "learning_rate": 8.311167468559122e-07, "loss": 0.9711, "step": 22318 }, { "epoch": 0.8732686438688473, "grad_norm": 0.0, "learning_rate": 8.30611012994521e-07, "loss": 0.9857, "step": 22319 }, { "epoch": 0.8733077705610768, "grad_norm": 0.0, "learning_rate": 8.301054263820274e-07, "loss": 0.9532, "step": 22320 }, { "epoch": 0.8733468972533062, "grad_norm": 0.0, "learning_rate": 8.295999870265514e-07, "loss": 0.985, "step": 22321 }, { "epoch": 0.8733860239455357, "grad_norm": 0.0, "learning_rate": 8.290946949362078e-07, "loss": 0.9747, "step": 22322 }, { "epoch": 0.873425150637765, "grad_norm": 0.0, "learning_rate": 8.28589550119111e-07, "loss": 1.0844, "step": 22323 }, { "epoch": 0.8734642773299945, "grad_norm": 0.0, "learning_rate": 8.280845525833747e-07, "loss": 0.9318, "step": 22324 }, { "epoch": 0.8735034040222239, "grad_norm": 0.0, "learning_rate": 8.275797023371058e-07, "loss": 0.9062, "step": 22325 }, { "epoch": 0.8735425307144534, "grad_norm": 0.0, "learning_rate": 8.270749993884142e-07, "loss": 0.9713, "step": 22326 }, { "epoch": 0.8735816574066828, "grad_norm": 0.0, "learning_rate": 8.265704437454025e-07, "loss": 0.9932, "step": 22327 }, { "epoch": 0.8736207840989123, "grad_norm": 0.0, "learning_rate": 8.260660354161776e-07, "loss": 0.8292, "step": 22328 }, { "epoch": 0.8736599107911417, "grad_norm": 0.0, "learning_rate": 8.25561774408834e-07, "loss": 0.9619, "step": 22329 }, { "epoch": 0.8736990374833712, "grad_norm": 0.0, "learning_rate": 8.250576607314742e-07, "loss": 0.93, "step": 22330 }, { "epoch": 0.8737381641756006, "grad_norm": 0.0, "learning_rate": 8.245536943921884e-07, "loss": 0.994, "step": 22331 }, { "epoch": 0.8737772908678301, "grad_norm": 0.0, "learning_rate": 8.240498753990756e-07, "loss": 0.9922, "step": 22332 }, { "epoch": 0.8738164175600595, "grad_norm": 0.0, "learning_rate": 8.235462037602215e-07, "loss": 0.9343, "step": 22333 }, { "epoch": 0.873855544252289, "grad_norm": 0.0, "learning_rate": 8.230426794837187e-07, "loss": 1.0423, "step": 22334 }, { "epoch": 0.8738946709445183, "grad_norm": 0.0, "learning_rate": 8.225393025776484e-07, "loss": 1.0516, "step": 22335 }, { "epoch": 0.8739337976367478, "grad_norm": 0.0, "learning_rate": 8.220360730500998e-07, "loss": 1.0626, "step": 22336 }, { "epoch": 0.8739729243289772, "grad_norm": 0.0, "learning_rate": 8.215329909091496e-07, "loss": 0.8868, "step": 22337 }, { "epoch": 0.8740120510212067, "grad_norm": 0.0, "learning_rate": 8.210300561628803e-07, "loss": 0.8873, "step": 22338 }, { "epoch": 0.8740511777134361, "grad_norm": 0.0, "learning_rate": 8.205272688193644e-07, "loss": 1.0515, "step": 22339 }, { "epoch": 0.8740903044056656, "grad_norm": 0.0, "learning_rate": 8.200246288866775e-07, "loss": 0.9733, "step": 22340 }, { "epoch": 0.874129431097895, "grad_norm": 0.0, "learning_rate": 8.195221363728923e-07, "loss": 0.9679, "step": 22341 }, { "epoch": 0.8741685577901244, "grad_norm": 0.0, "learning_rate": 8.190197912860798e-07, "loss": 0.9716, "step": 22342 }, { "epoch": 0.8742076844823539, "grad_norm": 0.0, "learning_rate": 8.185175936343037e-07, "loss": 0.92, "step": 22343 }, { "epoch": 0.8742468111745832, "grad_norm": 0.0, "learning_rate": 8.180155434256288e-07, "loss": 0.9814, "step": 22344 }, { "epoch": 0.8742859378668127, "grad_norm": 0.0, "learning_rate": 8.175136406681194e-07, "loss": 0.9376, "step": 22345 }, { "epoch": 0.8743250645590421, "grad_norm": 0.0, "learning_rate": 8.170118853698361e-07, "loss": 0.7532, "step": 22346 }, { "epoch": 0.8743641912512716, "grad_norm": 0.0, "learning_rate": 8.165102775388334e-07, "loss": 0.9606, "step": 22347 }, { "epoch": 0.874403317943501, "grad_norm": 0.0, "learning_rate": 8.160088171831704e-07, "loss": 0.9185, "step": 22348 }, { "epoch": 0.8744424446357305, "grad_norm": 0.0, "learning_rate": 8.155075043108928e-07, "loss": 1.1293, "step": 22349 }, { "epoch": 0.8744815713279599, "grad_norm": 0.0, "learning_rate": 8.150063389300611e-07, "loss": 0.8429, "step": 22350 }, { "epoch": 0.8745206980201894, "grad_norm": 0.0, "learning_rate": 8.145053210487152e-07, "loss": 1.1121, "step": 22351 }, { "epoch": 0.8745598247124188, "grad_norm": 0.0, "learning_rate": 8.140044506749056e-07, "loss": 1.0145, "step": 22352 }, { "epoch": 0.8745989514046483, "grad_norm": 0.0, "learning_rate": 8.135037278166702e-07, "loss": 1.0116, "step": 22353 }, { "epoch": 0.8746380780968777, "grad_norm": 0.0, "learning_rate": 8.130031524820569e-07, "loss": 0.9271, "step": 22354 }, { "epoch": 0.8746772047891072, "grad_norm": 0.0, "learning_rate": 8.125027246791006e-07, "loss": 0.9393, "step": 22355 }, { "epoch": 0.8747163314813365, "grad_norm": 0.0, "learning_rate": 8.120024444158381e-07, "loss": 1.0332, "step": 22356 }, { "epoch": 0.874755458173566, "grad_norm": 0.0, "learning_rate": 8.115023117003029e-07, "loss": 0.9536, "step": 22357 }, { "epoch": 0.8747945848657954, "grad_norm": 0.0, "learning_rate": 8.110023265405253e-07, "loss": 1.0251, "step": 22358 }, { "epoch": 0.8748337115580249, "grad_norm": 0.0, "learning_rate": 8.105024889445367e-07, "loss": 0.9708, "step": 22359 }, { "epoch": 0.8748728382502543, "grad_norm": 0.0, "learning_rate": 8.100027989203651e-07, "loss": 0.9133, "step": 22360 }, { "epoch": 0.8749119649424838, "grad_norm": 0.0, "learning_rate": 8.095032564760308e-07, "loss": 1.0164, "step": 22361 }, { "epoch": 0.8749510916347132, "grad_norm": 0.0, "learning_rate": 8.090038616195572e-07, "loss": 1.0364, "step": 22362 }, { "epoch": 0.8749902183269427, "grad_norm": 0.0, "learning_rate": 8.08504614358967e-07, "loss": 0.963, "step": 22363 }, { "epoch": 0.8750293450191721, "grad_norm": 0.0, "learning_rate": 8.080055147022737e-07, "loss": 1.0439, "step": 22364 }, { "epoch": 0.8750684717114016, "grad_norm": 0.0, "learning_rate": 8.075065626574929e-07, "loss": 0.8164, "step": 22365 }, { "epoch": 0.875107598403631, "grad_norm": 0.0, "learning_rate": 8.070077582326374e-07, "loss": 0.9867, "step": 22366 }, { "epoch": 0.8751467250958604, "grad_norm": 0.0, "learning_rate": 8.065091014357207e-07, "loss": 0.9694, "step": 22367 }, { "epoch": 0.8751858517880898, "grad_norm": 0.0, "learning_rate": 8.060105922747463e-07, "loss": 0.9016, "step": 22368 }, { "epoch": 0.8752249784803193, "grad_norm": 0.0, "learning_rate": 8.055122307577212e-07, "loss": 1.0017, "step": 22369 }, { "epoch": 0.8752641051725487, "grad_norm": 0.0, "learning_rate": 8.050140168926479e-07, "loss": 0.9897, "step": 22370 }, { "epoch": 0.8753032318647781, "grad_norm": 0.0, "learning_rate": 8.045159506875266e-07, "loss": 0.8854, "step": 22371 }, { "epoch": 0.8753423585570076, "grad_norm": 0.0, "learning_rate": 8.040180321503577e-07, "loss": 1.028, "step": 22372 }, { "epoch": 0.875381485249237, "grad_norm": 0.0, "learning_rate": 8.035202612891368e-07, "loss": 0.9697, "step": 22373 }, { "epoch": 0.8754206119414665, "grad_norm": 0.0, "learning_rate": 8.030226381118555e-07, "loss": 1.0604, "step": 22374 }, { "epoch": 0.8754597386336959, "grad_norm": 0.0, "learning_rate": 8.025251626265063e-07, "loss": 0.9396, "step": 22375 }, { "epoch": 0.8754988653259254, "grad_norm": 0.0, "learning_rate": 8.02027834841077e-07, "loss": 0.9443, "step": 22376 }, { "epoch": 0.8755379920181547, "grad_norm": 0.0, "learning_rate": 8.015306547635571e-07, "loss": 0.9459, "step": 22377 }, { "epoch": 0.8755771187103842, "grad_norm": 0.0, "learning_rate": 8.010336224019278e-07, "loss": 0.8435, "step": 22378 }, { "epoch": 0.8756162454026136, "grad_norm": 0.0, "learning_rate": 8.005367377641715e-07, "loss": 0.9863, "step": 22379 }, { "epoch": 0.8756553720948431, "grad_norm": 0.0, "learning_rate": 8.000400008582654e-07, "loss": 0.8686, "step": 22380 }, { "epoch": 0.8756944987870725, "grad_norm": 0.0, "learning_rate": 7.995434116921919e-07, "loss": 0.9555, "step": 22381 }, { "epoch": 0.875733625479302, "grad_norm": 0.0, "learning_rate": 7.990469702739212e-07, "loss": 0.8575, "step": 22382 }, { "epoch": 0.8757727521715314, "grad_norm": 0.0, "learning_rate": 7.98550676611427e-07, "loss": 0.9482, "step": 22383 }, { "epoch": 0.8758118788637609, "grad_norm": 0.0, "learning_rate": 7.980545307126763e-07, "loss": 0.8688, "step": 22384 }, { "epoch": 0.8758510055559903, "grad_norm": 0.0, "learning_rate": 7.975585325856427e-07, "loss": 0.8751, "step": 22385 }, { "epoch": 0.8758901322482198, "grad_norm": 0.0, "learning_rate": 7.970626822382866e-07, "loss": 0.9321, "step": 22386 }, { "epoch": 0.8759292589404492, "grad_norm": 0.0, "learning_rate": 7.965669796785725e-07, "loss": 0.991, "step": 22387 }, { "epoch": 0.8759683856326786, "grad_norm": 0.0, "learning_rate": 7.960714249144586e-07, "loss": 1.0127, "step": 22388 }, { "epoch": 0.876007512324908, "grad_norm": 0.0, "learning_rate": 7.955760179539052e-07, "loss": 1.0278, "step": 22389 }, { "epoch": 0.8760466390171375, "grad_norm": 0.0, "learning_rate": 7.95080758804867e-07, "loss": 0.9273, "step": 22390 }, { "epoch": 0.8760857657093669, "grad_norm": 0.0, "learning_rate": 7.945856474752989e-07, "loss": 0.9779, "step": 22391 }, { "epoch": 0.8761248924015964, "grad_norm": 0.0, "learning_rate": 7.940906839731477e-07, "loss": 0.935, "step": 22392 }, { "epoch": 0.8761640190938258, "grad_norm": 0.0, "learning_rate": 7.93595868306366e-07, "loss": 0.9799, "step": 22393 }, { "epoch": 0.8762031457860553, "grad_norm": 0.0, "learning_rate": 7.931012004828975e-07, "loss": 0.9501, "step": 22394 }, { "epoch": 0.8762422724782847, "grad_norm": 0.0, "learning_rate": 7.92606680510688e-07, "loss": 0.9021, "step": 22395 }, { "epoch": 0.8762813991705142, "grad_norm": 0.0, "learning_rate": 7.921123083976768e-07, "loss": 0.9234, "step": 22396 }, { "epoch": 0.8763205258627436, "grad_norm": 0.0, "learning_rate": 7.916180841518062e-07, "loss": 0.9703, "step": 22397 }, { "epoch": 0.8763596525549731, "grad_norm": 0.0, "learning_rate": 7.911240077810057e-07, "loss": 1.1051, "step": 22398 }, { "epoch": 0.8763987792472024, "grad_norm": 0.0, "learning_rate": 7.906300792932186e-07, "loss": 0.8607, "step": 22399 }, { "epoch": 0.8764379059394318, "grad_norm": 0.0, "learning_rate": 7.901362986963701e-07, "loss": 0.8866, "step": 22400 }, { "epoch": 0.8764770326316613, "grad_norm": 0.0, "learning_rate": 7.896426659983936e-07, "loss": 0.9772, "step": 22401 }, { "epoch": 0.8765161593238907, "grad_norm": 0.0, "learning_rate": 7.891491812072139e-07, "loss": 1.0525, "step": 22402 }, { "epoch": 0.8765552860161202, "grad_norm": 0.0, "learning_rate": 7.886558443307557e-07, "loss": 1.0947, "step": 22403 }, { "epoch": 0.8765944127083496, "grad_norm": 0.0, "learning_rate": 7.881626553769417e-07, "loss": 0.944, "step": 22404 }, { "epoch": 0.8766335394005791, "grad_norm": 0.0, "learning_rate": 7.876696143536955e-07, "loss": 0.9263, "step": 22405 }, { "epoch": 0.8766726660928085, "grad_norm": 0.0, "learning_rate": 7.871767212689285e-07, "loss": 0.9839, "step": 22406 }, { "epoch": 0.876711792785038, "grad_norm": 0.0, "learning_rate": 7.8668397613056e-07, "loss": 1.0064, "step": 22407 }, { "epoch": 0.8767509194772674, "grad_norm": 0.0, "learning_rate": 7.861913789465037e-07, "loss": 0.9384, "step": 22408 }, { "epoch": 0.8767900461694969, "grad_norm": 0.0, "learning_rate": 7.856989297246664e-07, "loss": 0.9458, "step": 22409 }, { "epoch": 0.8768291728617262, "grad_norm": 0.0, "learning_rate": 7.852066284729576e-07, "loss": 0.8389, "step": 22410 }, { "epoch": 0.8768682995539557, "grad_norm": 0.0, "learning_rate": 7.847144751992842e-07, "loss": 0.8421, "step": 22411 }, { "epoch": 0.8769074262461851, "grad_norm": 0.0, "learning_rate": 7.842224699115497e-07, "loss": 0.8266, "step": 22412 }, { "epoch": 0.8769465529384146, "grad_norm": 0.0, "learning_rate": 7.837306126176536e-07, "loss": 1.0367, "step": 22413 }, { "epoch": 0.876985679630644, "grad_norm": 0.0, "learning_rate": 7.83238903325495e-07, "loss": 0.9177, "step": 22414 }, { "epoch": 0.8770248063228735, "grad_norm": 0.0, "learning_rate": 7.827473420429721e-07, "loss": 0.9724, "step": 22415 }, { "epoch": 0.8770639330151029, "grad_norm": 0.0, "learning_rate": 7.822559287779752e-07, "loss": 1.0638, "step": 22416 }, { "epoch": 0.8771030597073324, "grad_norm": 0.0, "learning_rate": 7.817646635383969e-07, "loss": 0.8951, "step": 22417 }, { "epoch": 0.8771421863995618, "grad_norm": 0.0, "learning_rate": 7.812735463321297e-07, "loss": 1.0584, "step": 22418 }, { "epoch": 0.8771813130917913, "grad_norm": 0.0, "learning_rate": 7.807825771670552e-07, "loss": 0.8747, "step": 22419 }, { "epoch": 0.8772204397840206, "grad_norm": 0.0, "learning_rate": 7.802917560510614e-07, "loss": 1.0057, "step": 22420 }, { "epoch": 0.8772595664762501, "grad_norm": 0.0, "learning_rate": 7.798010829920278e-07, "loss": 1.0438, "step": 22421 }, { "epoch": 0.8772986931684795, "grad_norm": 0.0, "learning_rate": 7.793105579978377e-07, "loss": 0.9338, "step": 22422 }, { "epoch": 0.877337819860709, "grad_norm": 0.0, "learning_rate": 7.78820181076364e-07, "loss": 0.9167, "step": 22423 }, { "epoch": 0.8773769465529384, "grad_norm": 0.0, "learning_rate": 7.783299522354826e-07, "loss": 1.0577, "step": 22424 }, { "epoch": 0.8774160732451679, "grad_norm": 0.0, "learning_rate": 7.778398714830682e-07, "loss": 0.8948, "step": 22425 }, { "epoch": 0.8774551999373973, "grad_norm": 0.0, "learning_rate": 7.773499388269901e-07, "loss": 1.03, "step": 22426 }, { "epoch": 0.8774943266296268, "grad_norm": 0.0, "learning_rate": 7.768601542751142e-07, "loss": 0.9119, "step": 22427 }, { "epoch": 0.8775334533218562, "grad_norm": 0.0, "learning_rate": 7.763705178353076e-07, "loss": 0.9741, "step": 22428 }, { "epoch": 0.8775725800140856, "grad_norm": 0.0, "learning_rate": 7.758810295154306e-07, "loss": 1.0368, "step": 22429 }, { "epoch": 0.877611706706315, "grad_norm": 0.0, "learning_rate": 7.753916893233482e-07, "loss": 1.0504, "step": 22430 }, { "epoch": 0.8776508333985444, "grad_norm": 0.0, "learning_rate": 7.74902497266915e-07, "loss": 1.0062, "step": 22431 }, { "epoch": 0.8776899600907739, "grad_norm": 0.0, "learning_rate": 7.744134533539905e-07, "loss": 1.0487, "step": 22432 }, { "epoch": 0.8777290867830033, "grad_norm": 0.0, "learning_rate": 7.739245575924215e-07, "loss": 1.0267, "step": 22433 }, { "epoch": 0.8777682134752328, "grad_norm": 0.0, "learning_rate": 7.734358099900663e-07, "loss": 1.0336, "step": 22434 }, { "epoch": 0.8778073401674622, "grad_norm": 0.0, "learning_rate": 7.729472105547687e-07, "loss": 0.9543, "step": 22435 }, { "epoch": 0.8778464668596917, "grad_norm": 0.0, "learning_rate": 7.724587592943788e-07, "loss": 1.0169, "step": 22436 }, { "epoch": 0.8778855935519211, "grad_norm": 0.0, "learning_rate": 7.719704562167363e-07, "loss": 0.8957, "step": 22437 }, { "epoch": 0.8779247202441506, "grad_norm": 0.0, "learning_rate": 7.714823013296857e-07, "loss": 1.0315, "step": 22438 }, { "epoch": 0.87796384693638, "grad_norm": 0.0, "learning_rate": 7.709942946410642e-07, "loss": 0.8509, "step": 22439 }, { "epoch": 0.8780029736286095, "grad_norm": 0.0, "learning_rate": 7.705064361587122e-07, "loss": 0.8886, "step": 22440 }, { "epoch": 0.8780421003208388, "grad_norm": 0.0, "learning_rate": 7.700187258904601e-07, "loss": 0.9606, "step": 22441 }, { "epoch": 0.8780812270130683, "grad_norm": 0.0, "learning_rate": 7.695311638441416e-07, "loss": 0.8965, "step": 22442 }, { "epoch": 0.8781203537052977, "grad_norm": 0.0, "learning_rate": 7.69043750027586e-07, "loss": 0.8753, "step": 22443 }, { "epoch": 0.8781594803975272, "grad_norm": 0.0, "learning_rate": 7.685564844486215e-07, "loss": 1.014, "step": 22444 }, { "epoch": 0.8781986070897566, "grad_norm": 0.0, "learning_rate": 7.680693671150718e-07, "loss": 0.9984, "step": 22445 }, { "epoch": 0.8782377337819861, "grad_norm": 0.0, "learning_rate": 7.675823980347607e-07, "loss": 0.9872, "step": 22446 }, { "epoch": 0.8782768604742155, "grad_norm": 0.0, "learning_rate": 7.670955772155042e-07, "loss": 0.9504, "step": 22447 }, { "epoch": 0.878315987166445, "grad_norm": 0.0, "learning_rate": 7.66608904665127e-07, "loss": 0.9352, "step": 22448 }, { "epoch": 0.8783551138586744, "grad_norm": 0.0, "learning_rate": 7.661223803914386e-07, "loss": 0.9648, "step": 22449 }, { "epoch": 0.8783942405509039, "grad_norm": 0.0, "learning_rate": 7.656360044022559e-07, "loss": 0.9465, "step": 22450 }, { "epoch": 0.8784333672431333, "grad_norm": 0.0, "learning_rate": 7.651497767053862e-07, "loss": 1.0341, "step": 22451 }, { "epoch": 0.8784724939353628, "grad_norm": 0.0, "learning_rate": 7.646636973086396e-07, "loss": 0.8611, "step": 22452 }, { "epoch": 0.8785116206275921, "grad_norm": 0.0, "learning_rate": 7.641777662198202e-07, "loss": 0.8713, "step": 22453 }, { "epoch": 0.8785507473198216, "grad_norm": 0.0, "learning_rate": 7.636919834467349e-07, "loss": 1.04, "step": 22454 }, { "epoch": 0.878589874012051, "grad_norm": 0.0, "learning_rate": 7.632063489971819e-07, "loss": 1.0602, "step": 22455 }, { "epoch": 0.8786290007042804, "grad_norm": 0.0, "learning_rate": 7.627208628789595e-07, "loss": 0.9766, "step": 22456 }, { "epoch": 0.8786681273965099, "grad_norm": 0.0, "learning_rate": 7.62235525099867e-07, "loss": 0.9255, "step": 22457 }, { "epoch": 0.8787072540887393, "grad_norm": 0.0, "learning_rate": 7.617503356676948e-07, "loss": 0.9931, "step": 22458 }, { "epoch": 0.8787463807809688, "grad_norm": 0.0, "learning_rate": 7.612652945902366e-07, "loss": 0.9057, "step": 22459 }, { "epoch": 0.8787855074731982, "grad_norm": 0.0, "learning_rate": 7.607804018752795e-07, "loss": 0.7593, "step": 22460 }, { "epoch": 0.8788246341654277, "grad_norm": 0.0, "learning_rate": 7.602956575306153e-07, "loss": 0.891, "step": 22461 }, { "epoch": 0.878863760857657, "grad_norm": 0.0, "learning_rate": 7.59811061564022e-07, "loss": 0.9714, "step": 22462 }, { "epoch": 0.8789028875498865, "grad_norm": 0.0, "learning_rate": 7.593266139832856e-07, "loss": 0.9388, "step": 22463 }, { "epoch": 0.8789420142421159, "grad_norm": 0.0, "learning_rate": 7.588423147961843e-07, "loss": 0.9855, "step": 22464 }, { "epoch": 0.8789811409343454, "grad_norm": 0.0, "learning_rate": 7.583581640104942e-07, "loss": 0.9472, "step": 22465 }, { "epoch": 0.8790202676265748, "grad_norm": 0.0, "learning_rate": 7.578741616339925e-07, "loss": 1.0173, "step": 22466 }, { "epoch": 0.8790593943188043, "grad_norm": 0.0, "learning_rate": 7.573903076744527e-07, "loss": 0.8638, "step": 22467 }, { "epoch": 0.8790985210110337, "grad_norm": 0.0, "learning_rate": 7.569066021396409e-07, "loss": 1.0818, "step": 22468 }, { "epoch": 0.8791376477032632, "grad_norm": 0.0, "learning_rate": 7.564230450373267e-07, "loss": 0.9486, "step": 22469 }, { "epoch": 0.8791767743954926, "grad_norm": 0.0, "learning_rate": 7.559396363752747e-07, "loss": 0.8549, "step": 22470 }, { "epoch": 0.8792159010877221, "grad_norm": 0.0, "learning_rate": 7.554563761612521e-07, "loss": 0.921, "step": 22471 }, { "epoch": 0.8792550277799515, "grad_norm": 0.0, "learning_rate": 7.549732644030127e-07, "loss": 1.0699, "step": 22472 }, { "epoch": 0.879294154472181, "grad_norm": 0.0, "learning_rate": 7.544903011083205e-07, "loss": 0.7946, "step": 22473 }, { "epoch": 0.8793332811644103, "grad_norm": 0.0, "learning_rate": 7.540074862849256e-07, "loss": 1.0449, "step": 22474 }, { "epoch": 0.8793724078566398, "grad_norm": 0.0, "learning_rate": 7.535248199405875e-07, "loss": 0.9833, "step": 22475 }, { "epoch": 0.8794115345488692, "grad_norm": 0.0, "learning_rate": 7.530423020830536e-07, "loss": 0.931, "step": 22476 }, { "epoch": 0.8794506612410987, "grad_norm": 0.0, "learning_rate": 7.525599327200739e-07, "loss": 0.9348, "step": 22477 }, { "epoch": 0.8794897879333281, "grad_norm": 0.0, "learning_rate": 7.520777118593903e-07, "loss": 1.0197, "step": 22478 }, { "epoch": 0.8795289146255576, "grad_norm": 0.0, "learning_rate": 7.515956395087542e-07, "loss": 0.947, "step": 22479 }, { "epoch": 0.879568041317787, "grad_norm": 0.0, "learning_rate": 7.511137156759019e-07, "loss": 0.9897, "step": 22480 }, { "epoch": 0.8796071680100165, "grad_norm": 0.0, "learning_rate": 7.506319403685758e-07, "loss": 1.0482, "step": 22481 }, { "epoch": 0.8796462947022459, "grad_norm": 0.0, "learning_rate": 7.501503135945065e-07, "loss": 0.97, "step": 22482 }, { "epoch": 0.8796854213944754, "grad_norm": 0.0, "learning_rate": 7.496688353614357e-07, "loss": 1.0237, "step": 22483 }, { "epoch": 0.8797245480867047, "grad_norm": 0.0, "learning_rate": 7.491875056770914e-07, "loss": 0.9319, "step": 22484 }, { "epoch": 0.8797636747789341, "grad_norm": 0.0, "learning_rate": 7.487063245492043e-07, "loss": 0.929, "step": 22485 }, { "epoch": 0.8798028014711636, "grad_norm": 0.0, "learning_rate": 7.482252919855004e-07, "loss": 0.9977, "step": 22486 }, { "epoch": 0.879841928163393, "grad_norm": 0.0, "learning_rate": 7.477444079937046e-07, "loss": 0.9604, "step": 22487 }, { "epoch": 0.8798810548556225, "grad_norm": 0.0, "learning_rate": 7.472636725815396e-07, "loss": 1.0021, "step": 22488 }, { "epoch": 0.8799201815478519, "grad_norm": 0.0, "learning_rate": 7.467830857567282e-07, "loss": 0.8386, "step": 22489 }, { "epoch": 0.8799593082400814, "grad_norm": 0.0, "learning_rate": 7.463026475269841e-07, "loss": 0.9355, "step": 22490 }, { "epoch": 0.8799984349323108, "grad_norm": 0.0, "learning_rate": 7.458223579000223e-07, "loss": 0.9367, "step": 22491 }, { "epoch": 0.8800375616245403, "grad_norm": 0.0, "learning_rate": 7.453422168835589e-07, "loss": 0.9306, "step": 22492 }, { "epoch": 0.8800766883167697, "grad_norm": 0.0, "learning_rate": 7.448622244853043e-07, "loss": 0.8927, "step": 22493 }, { "epoch": 0.8801158150089992, "grad_norm": 0.0, "learning_rate": 7.443823807129624e-07, "loss": 0.9789, "step": 22494 }, { "epoch": 0.8801549417012285, "grad_norm": 0.0, "learning_rate": 7.439026855742437e-07, "loss": 1.0062, "step": 22495 }, { "epoch": 0.880194068393458, "grad_norm": 0.0, "learning_rate": 7.434231390768476e-07, "loss": 1.0394, "step": 22496 }, { "epoch": 0.8802331950856874, "grad_norm": 0.0, "learning_rate": 7.429437412284768e-07, "loss": 0.8344, "step": 22497 }, { "epoch": 0.8802723217779169, "grad_norm": 0.0, "learning_rate": 7.424644920368296e-07, "loss": 0.9894, "step": 22498 }, { "epoch": 0.8803114484701463, "grad_norm": 0.0, "learning_rate": 7.419853915096042e-07, "loss": 0.9491, "step": 22499 }, { "epoch": 0.8803505751623758, "grad_norm": 0.0, "learning_rate": 7.415064396544913e-07, "loss": 1.0403, "step": 22500 }, { "epoch": 0.8803897018546052, "grad_norm": 0.0, "learning_rate": 7.410276364791824e-07, "loss": 0.9932, "step": 22501 }, { "epoch": 0.8804288285468347, "grad_norm": 0.0, "learning_rate": 7.405489819913703e-07, "loss": 0.9932, "step": 22502 }, { "epoch": 0.8804679552390641, "grad_norm": 0.0, "learning_rate": 7.400704761987365e-07, "loss": 0.9647, "step": 22503 }, { "epoch": 0.8805070819312936, "grad_norm": 0.0, "learning_rate": 7.395921191089673e-07, "loss": 0.8388, "step": 22504 }, { "epoch": 0.880546208623523, "grad_norm": 0.0, "learning_rate": 7.391139107297451e-07, "loss": 0.9584, "step": 22505 }, { "epoch": 0.8805853353157524, "grad_norm": 0.0, "learning_rate": 7.386358510687508e-07, "loss": 0.9847, "step": 22506 }, { "epoch": 0.8806244620079818, "grad_norm": 0.0, "learning_rate": 7.381579401336581e-07, "loss": 0.9308, "step": 22507 }, { "epoch": 0.8806635887002113, "grad_norm": 0.0, "learning_rate": 7.376801779321441e-07, "loss": 0.9805, "step": 22508 }, { "epoch": 0.8807027153924407, "grad_norm": 0.0, "learning_rate": 7.372025644718772e-07, "loss": 0.9412, "step": 22509 }, { "epoch": 0.8807418420846702, "grad_norm": 0.0, "learning_rate": 7.367250997605324e-07, "loss": 1.0331, "step": 22510 }, { "epoch": 0.8807809687768996, "grad_norm": 0.0, "learning_rate": 7.362477838057747e-07, "loss": 1.0016, "step": 22511 }, { "epoch": 0.8808200954691291, "grad_norm": 0.0, "learning_rate": 7.357706166152711e-07, "loss": 0.968, "step": 22512 }, { "epoch": 0.8808592221613585, "grad_norm": 0.0, "learning_rate": 7.352935981966802e-07, "loss": 1.0514, "step": 22513 }, { "epoch": 0.8808983488535879, "grad_norm": 0.0, "learning_rate": 7.348167285576646e-07, "loss": 0.9411, "step": 22514 }, { "epoch": 0.8809374755458174, "grad_norm": 0.0, "learning_rate": 7.343400077058838e-07, "loss": 0.9743, "step": 22515 }, { "epoch": 0.8809766022380467, "grad_norm": 0.0, "learning_rate": 7.338634356489926e-07, "loss": 0.9916, "step": 22516 }, { "epoch": 0.8810157289302762, "grad_norm": 0.0, "learning_rate": 7.333870123946418e-07, "loss": 0.9701, "step": 22517 }, { "epoch": 0.8810548556225056, "grad_norm": 0.0, "learning_rate": 7.32910737950484e-07, "loss": 0.9656, "step": 22518 }, { "epoch": 0.8810939823147351, "grad_norm": 0.0, "learning_rate": 7.324346123241677e-07, "loss": 0.8786, "step": 22519 }, { "epoch": 0.8811331090069645, "grad_norm": 0.0, "learning_rate": 7.319586355233399e-07, "loss": 0.9621, "step": 22520 }, { "epoch": 0.881172235699194, "grad_norm": 0.0, "learning_rate": 7.314828075556412e-07, "loss": 1.0544, "step": 22521 }, { "epoch": 0.8812113623914234, "grad_norm": 0.0, "learning_rate": 7.310071284287168e-07, "loss": 0.9908, "step": 22522 }, { "epoch": 0.8812504890836529, "grad_norm": 0.0, "learning_rate": 7.305315981501993e-07, "loss": 0.9103, "step": 22523 }, { "epoch": 0.8812896157758823, "grad_norm": 0.0, "learning_rate": 7.300562167277325e-07, "loss": 0.9457, "step": 22524 }, { "epoch": 0.8813287424681118, "grad_norm": 0.0, "learning_rate": 7.29580984168946e-07, "loss": 0.8667, "step": 22525 }, { "epoch": 0.8813678691603412, "grad_norm": 0.0, "learning_rate": 7.291059004814738e-07, "loss": 1.0176, "step": 22526 }, { "epoch": 0.8814069958525707, "grad_norm": 0.0, "learning_rate": 7.286309656729396e-07, "loss": 0.9993, "step": 22527 }, { "epoch": 0.8814461225448, "grad_norm": 0.0, "learning_rate": 7.281561797509784e-07, "loss": 1.0472, "step": 22528 }, { "epoch": 0.8814852492370295, "grad_norm": 0.0, "learning_rate": 7.276815427232087e-07, "loss": 0.9362, "step": 22529 }, { "epoch": 0.8815243759292589, "grad_norm": 0.0, "learning_rate": 7.272070545972564e-07, "loss": 1.0342, "step": 22530 }, { "epoch": 0.8815635026214884, "grad_norm": 0.0, "learning_rate": 7.26732715380738e-07, "loss": 0.9798, "step": 22531 }, { "epoch": 0.8816026293137178, "grad_norm": 0.0, "learning_rate": 7.262585250812715e-07, "loss": 1.0612, "step": 22532 }, { "epoch": 0.8816417560059473, "grad_norm": 0.0, "learning_rate": 7.257844837064732e-07, "loss": 0.9357, "step": 22533 }, { "epoch": 0.8816808826981767, "grad_norm": 0.0, "learning_rate": 7.253105912639557e-07, "loss": 1.0216, "step": 22534 }, { "epoch": 0.8817200093904062, "grad_norm": 0.0, "learning_rate": 7.248368477613265e-07, "loss": 0.9619, "step": 22535 }, { "epoch": 0.8817591360826356, "grad_norm": 0.0, "learning_rate": 7.243632532061962e-07, "loss": 1.012, "step": 22536 }, { "epoch": 0.8817982627748651, "grad_norm": 0.0, "learning_rate": 7.238898076061685e-07, "loss": 0.967, "step": 22537 }, { "epoch": 0.8818373894670944, "grad_norm": 0.0, "learning_rate": 7.234165109688485e-07, "loss": 1.0094, "step": 22538 }, { "epoch": 0.8818765161593239, "grad_norm": 0.0, "learning_rate": 7.229433633018335e-07, "loss": 0.9038, "step": 22539 }, { "epoch": 0.8819156428515533, "grad_norm": 0.0, "learning_rate": 7.224703646127229e-07, "loss": 0.8047, "step": 22540 }, { "epoch": 0.8819547695437828, "grad_norm": 0.0, "learning_rate": 7.219975149091141e-07, "loss": 0.8658, "step": 22541 }, { "epoch": 0.8819938962360122, "grad_norm": 0.0, "learning_rate": 7.215248141985986e-07, "loss": 1.0084, "step": 22542 }, { "epoch": 0.8820330229282416, "grad_norm": 0.0, "learning_rate": 7.210522624887672e-07, "loss": 0.9093, "step": 22543 }, { "epoch": 0.8820721496204711, "grad_norm": 0.0, "learning_rate": 7.205798597872116e-07, "loss": 0.9005, "step": 22544 }, { "epoch": 0.8821112763127005, "grad_norm": 0.0, "learning_rate": 7.201076061015144e-07, "loss": 0.8975, "step": 22545 }, { "epoch": 0.88215040300493, "grad_norm": 0.0, "learning_rate": 7.196355014392597e-07, "loss": 0.9329, "step": 22546 }, { "epoch": 0.8821895296971594, "grad_norm": 0.0, "learning_rate": 7.191635458080326e-07, "loss": 0.8822, "step": 22547 }, { "epoch": 0.8822286563893889, "grad_norm": 0.0, "learning_rate": 7.186917392154069e-07, "loss": 0.9309, "step": 22548 }, { "epoch": 0.8822677830816182, "grad_norm": 0.0, "learning_rate": 7.182200816689622e-07, "loss": 1.0071, "step": 22549 }, { "epoch": 0.8823069097738477, "grad_norm": 0.0, "learning_rate": 7.177485731762712e-07, "loss": 0.8661, "step": 22550 }, { "epoch": 0.8823460364660771, "grad_norm": 0.0, "learning_rate": 7.172772137449091e-07, "loss": 0.9456, "step": 22551 }, { "epoch": 0.8823851631583066, "grad_norm": 0.0, "learning_rate": 7.16806003382442e-07, "loss": 0.9865, "step": 22552 }, { "epoch": 0.882424289850536, "grad_norm": 0.0, "learning_rate": 7.163349420964394e-07, "loss": 0.9559, "step": 22553 }, { "epoch": 0.8824634165427655, "grad_norm": 0.0, "learning_rate": 7.158640298944608e-07, "loss": 1.0328, "step": 22554 }, { "epoch": 0.8825025432349949, "grad_norm": 0.0, "learning_rate": 7.153932667840757e-07, "loss": 1.0034, "step": 22555 }, { "epoch": 0.8825416699272244, "grad_norm": 0.0, "learning_rate": 7.149226527728393e-07, "loss": 0.9597, "step": 22556 }, { "epoch": 0.8825807966194538, "grad_norm": 0.0, "learning_rate": 7.144521878683108e-07, "loss": 0.9551, "step": 22557 }, { "epoch": 0.8826199233116833, "grad_norm": 0.0, "learning_rate": 7.139818720780423e-07, "loss": 0.9291, "step": 22558 }, { "epoch": 0.8826590500039126, "grad_norm": 0.0, "learning_rate": 7.135117054095919e-07, "loss": 0.9557, "step": 22559 }, { "epoch": 0.8826981766961421, "grad_norm": 0.0, "learning_rate": 7.130416878705059e-07, "loss": 1.0315, "step": 22560 }, { "epoch": 0.8827373033883715, "grad_norm": 0.0, "learning_rate": 7.12571819468334e-07, "loss": 0.9082, "step": 22561 }, { "epoch": 0.882776430080601, "grad_norm": 0.0, "learning_rate": 7.121021002106198e-07, "loss": 0.9467, "step": 22562 }, { "epoch": 0.8828155567728304, "grad_norm": 0.0, "learning_rate": 7.116325301049076e-07, "loss": 0.9481, "step": 22563 }, { "epoch": 0.8828546834650599, "grad_norm": 0.0, "learning_rate": 7.111631091587368e-07, "loss": 0.9393, "step": 22564 }, { "epoch": 0.8828938101572893, "grad_norm": 0.0, "learning_rate": 7.106938373796501e-07, "loss": 0.9964, "step": 22565 }, { "epoch": 0.8829329368495188, "grad_norm": 0.0, "learning_rate": 7.102247147751773e-07, "loss": 0.8131, "step": 22566 }, { "epoch": 0.8829720635417482, "grad_norm": 0.0, "learning_rate": 7.097557413528555e-07, "loss": 0.9211, "step": 22567 }, { "epoch": 0.8830111902339777, "grad_norm": 0.0, "learning_rate": 7.092869171202155e-07, "loss": 0.9861, "step": 22568 }, { "epoch": 0.8830503169262071, "grad_norm": 0.0, "learning_rate": 7.088182420847867e-07, "loss": 0.8969, "step": 22569 }, { "epoch": 0.8830894436184364, "grad_norm": 0.0, "learning_rate": 7.083497162540931e-07, "loss": 0.7963, "step": 22570 }, { "epoch": 0.8831285703106659, "grad_norm": 0.0, "learning_rate": 7.07881339635661e-07, "loss": 0.8327, "step": 22571 }, { "epoch": 0.8831676970028953, "grad_norm": 0.0, "learning_rate": 7.074131122370076e-07, "loss": 1.0116, "step": 22572 }, { "epoch": 0.8832068236951248, "grad_norm": 0.0, "learning_rate": 7.069450340656592e-07, "loss": 0.9156, "step": 22573 }, { "epoch": 0.8832459503873542, "grad_norm": 0.0, "learning_rate": 7.064771051291275e-07, "loss": 0.9906, "step": 22574 }, { "epoch": 0.8832850770795837, "grad_norm": 0.0, "learning_rate": 7.060093254349287e-07, "loss": 0.92, "step": 22575 }, { "epoch": 0.8833242037718131, "grad_norm": 0.0, "learning_rate": 7.055416949905714e-07, "loss": 0.953, "step": 22576 }, { "epoch": 0.8833633304640426, "grad_norm": 0.0, "learning_rate": 7.050742138035716e-07, "loss": 0.937, "step": 22577 }, { "epoch": 0.883402457156272, "grad_norm": 0.0, "learning_rate": 7.0460688188143e-07, "loss": 0.9542, "step": 22578 }, { "epoch": 0.8834415838485015, "grad_norm": 0.0, "learning_rate": 7.041396992316563e-07, "loss": 0.9977, "step": 22579 }, { "epoch": 0.8834807105407309, "grad_norm": 0.0, "learning_rate": 7.036726658617499e-07, "loss": 0.9673, "step": 22580 }, { "epoch": 0.8835198372329603, "grad_norm": 0.0, "learning_rate": 7.032057817792104e-07, "loss": 0.8721, "step": 22581 }, { "epoch": 0.8835589639251897, "grad_norm": 0.0, "learning_rate": 7.027390469915363e-07, "loss": 0.8285, "step": 22582 }, { "epoch": 0.8835980906174192, "grad_norm": 0.0, "learning_rate": 7.022724615062249e-07, "loss": 0.9515, "step": 22583 }, { "epoch": 0.8836372173096486, "grad_norm": 0.0, "learning_rate": 7.018060253307657e-07, "loss": 0.9835, "step": 22584 }, { "epoch": 0.8836763440018781, "grad_norm": 0.0, "learning_rate": 7.013397384726505e-07, "loss": 0.9628, "step": 22585 }, { "epoch": 0.8837154706941075, "grad_norm": 0.0, "learning_rate": 7.00873600939369e-07, "loss": 1.0005, "step": 22586 }, { "epoch": 0.883754597386337, "grad_norm": 0.0, "learning_rate": 7.00407612738403e-07, "loss": 0.9797, "step": 22587 }, { "epoch": 0.8837937240785664, "grad_norm": 0.0, "learning_rate": 6.999417738772374e-07, "loss": 0.9345, "step": 22588 }, { "epoch": 0.8838328507707959, "grad_norm": 0.0, "learning_rate": 6.994760843633552e-07, "loss": 0.9498, "step": 22589 }, { "epoch": 0.8838719774630253, "grad_norm": 0.0, "learning_rate": 6.990105442042316e-07, "loss": 0.7824, "step": 22590 }, { "epoch": 0.8839111041552548, "grad_norm": 0.0, "learning_rate": 6.985451534073439e-07, "loss": 0.8839, "step": 22591 }, { "epoch": 0.8839502308474841, "grad_norm": 0.0, "learning_rate": 6.980799119801674e-07, "loss": 0.8634, "step": 22592 }, { "epoch": 0.8839893575397136, "grad_norm": 0.0, "learning_rate": 6.976148199301691e-07, "loss": 0.9239, "step": 22593 }, { "epoch": 0.884028484231943, "grad_norm": 0.0, "learning_rate": 6.971498772648211e-07, "loss": 1.0374, "step": 22594 }, { "epoch": 0.8840676109241725, "grad_norm": 0.0, "learning_rate": 6.966850839915884e-07, "loss": 0.937, "step": 22595 }, { "epoch": 0.8841067376164019, "grad_norm": 0.0, "learning_rate": 6.962204401179373e-07, "loss": 0.9185, "step": 22596 }, { "epoch": 0.8841458643086314, "grad_norm": 0.0, "learning_rate": 6.957559456513263e-07, "loss": 0.9421, "step": 22597 }, { "epoch": 0.8841849910008608, "grad_norm": 0.0, "learning_rate": 6.95291600599215e-07, "loss": 0.9937, "step": 22598 }, { "epoch": 0.8842241176930902, "grad_norm": 0.0, "learning_rate": 6.948274049690618e-07, "loss": 0.8995, "step": 22599 }, { "epoch": 0.8842632443853197, "grad_norm": 0.0, "learning_rate": 6.943633587683218e-07, "loss": 0.9795, "step": 22600 }, { "epoch": 0.884302371077549, "grad_norm": 0.0, "learning_rate": 6.938994620044448e-07, "loss": 1.019, "step": 22601 }, { "epoch": 0.8843414977697786, "grad_norm": 0.0, "learning_rate": 6.934357146848824e-07, "loss": 0.9497, "step": 22602 }, { "epoch": 0.8843806244620079, "grad_norm": 0.0, "learning_rate": 6.929721168170778e-07, "loss": 0.9496, "step": 22603 }, { "epoch": 0.8844197511542374, "grad_norm": 0.0, "learning_rate": 6.925086684084814e-07, "loss": 0.8582, "step": 22604 }, { "epoch": 0.8844588778464668, "grad_norm": 0.0, "learning_rate": 6.920453694665308e-07, "loss": 0.8946, "step": 22605 }, { "epoch": 0.8844980045386963, "grad_norm": 0.0, "learning_rate": 6.915822199986699e-07, "loss": 0.9387, "step": 22606 }, { "epoch": 0.8845371312309257, "grad_norm": 0.0, "learning_rate": 6.911192200123318e-07, "loss": 0.8708, "step": 22607 }, { "epoch": 0.8845762579231552, "grad_norm": 0.0, "learning_rate": 6.906563695149571e-07, "loss": 0.9456, "step": 22608 }, { "epoch": 0.8846153846153846, "grad_norm": 0.0, "learning_rate": 6.901936685139743e-07, "loss": 0.9271, "step": 22609 }, { "epoch": 0.8846545113076141, "grad_norm": 0.0, "learning_rate": 6.897311170168175e-07, "loss": 0.9729, "step": 22610 }, { "epoch": 0.8846936379998435, "grad_norm": 0.0, "learning_rate": 6.892687150309108e-07, "loss": 1.0097, "step": 22611 }, { "epoch": 0.884732764692073, "grad_norm": 0.0, "learning_rate": 6.888064625636803e-07, "loss": 0.8854, "step": 22612 }, { "epoch": 0.8847718913843023, "grad_norm": 0.0, "learning_rate": 6.883443596225514e-07, "loss": 0.887, "step": 22613 }, { "epoch": 0.8848110180765318, "grad_norm": 0.0, "learning_rate": 6.878824062149459e-07, "loss": 0.9062, "step": 22614 }, { "epoch": 0.8848501447687612, "grad_norm": 0.0, "learning_rate": 6.874206023482777e-07, "loss": 1.0052, "step": 22615 }, { "epoch": 0.8848892714609907, "grad_norm": 0.0, "learning_rate": 6.869589480299665e-07, "loss": 0.8767, "step": 22616 }, { "epoch": 0.8849283981532201, "grad_norm": 0.0, "learning_rate": 6.864974432674232e-07, "loss": 0.9019, "step": 22617 }, { "epoch": 0.8849675248454496, "grad_norm": 0.0, "learning_rate": 6.860360880680639e-07, "loss": 0.9432, "step": 22618 }, { "epoch": 0.885006651537679, "grad_norm": 0.0, "learning_rate": 6.855748824392904e-07, "loss": 1.0101, "step": 22619 }, { "epoch": 0.8850457782299085, "grad_norm": 0.0, "learning_rate": 6.85113826388516e-07, "loss": 0.9594, "step": 22620 }, { "epoch": 0.8850849049221379, "grad_norm": 0.0, "learning_rate": 6.846529199231366e-07, "loss": 0.9415, "step": 22621 }, { "epoch": 0.8851240316143674, "grad_norm": 0.0, "learning_rate": 6.841921630505632e-07, "loss": 0.9663, "step": 22622 }, { "epoch": 0.8851631583065968, "grad_norm": 0.0, "learning_rate": 6.837315557781876e-07, "loss": 1.1804, "step": 22623 }, { "epoch": 0.8852022849988262, "grad_norm": 0.0, "learning_rate": 6.832710981134116e-07, "loss": 1.0216, "step": 22624 }, { "epoch": 0.8852414116910556, "grad_norm": 0.0, "learning_rate": 6.828107900636249e-07, "loss": 0.9599, "step": 22625 }, { "epoch": 0.8852805383832851, "grad_norm": 0.0, "learning_rate": 6.823506316362227e-07, "loss": 0.9185, "step": 22626 }, { "epoch": 0.8853196650755145, "grad_norm": 0.0, "learning_rate": 6.818906228385924e-07, "loss": 0.9799, "step": 22627 }, { "epoch": 0.8853587917677439, "grad_norm": 0.0, "learning_rate": 6.814307636781248e-07, "loss": 0.97, "step": 22628 }, { "epoch": 0.8853979184599734, "grad_norm": 0.0, "learning_rate": 6.809710541622017e-07, "loss": 0.8942, "step": 22629 }, { "epoch": 0.8854370451522028, "grad_norm": 0.0, "learning_rate": 6.80511494298205e-07, "loss": 1.024, "step": 22630 }, { "epoch": 0.8854761718444323, "grad_norm": 0.0, "learning_rate": 6.800520840935176e-07, "loss": 1.0364, "step": 22631 }, { "epoch": 0.8855152985366617, "grad_norm": 0.0, "learning_rate": 6.795928235555127e-07, "loss": 0.9239, "step": 22632 }, { "epoch": 0.8855544252288912, "grad_norm": 0.0, "learning_rate": 6.791337126915687e-07, "loss": 0.9063, "step": 22633 }, { "epoch": 0.8855935519211205, "grad_norm": 0.0, "learning_rate": 6.786747515090574e-07, "loss": 0.881, "step": 22634 }, { "epoch": 0.88563267861335, "grad_norm": 0.0, "learning_rate": 6.782159400153521e-07, "loss": 0.989, "step": 22635 }, { "epoch": 0.8856718053055794, "grad_norm": 0.0, "learning_rate": 6.777572782178155e-07, "loss": 0.9739, "step": 22636 }, { "epoch": 0.8857109319978089, "grad_norm": 0.0, "learning_rate": 6.772987661238161e-07, "loss": 0.9613, "step": 22637 }, { "epoch": 0.8857500586900383, "grad_norm": 0.0, "learning_rate": 6.768404037407162e-07, "loss": 1.0044, "step": 22638 }, { "epoch": 0.8857891853822678, "grad_norm": 0.0, "learning_rate": 6.763821910758761e-07, "loss": 1.0416, "step": 22639 }, { "epoch": 0.8858283120744972, "grad_norm": 0.0, "learning_rate": 6.759241281366558e-07, "loss": 1.0006, "step": 22640 }, { "epoch": 0.8858674387667267, "grad_norm": 0.0, "learning_rate": 6.754662149304115e-07, "loss": 0.8714, "step": 22641 }, { "epoch": 0.8859065654589561, "grad_norm": 0.0, "learning_rate": 6.750084514644939e-07, "loss": 0.9409, "step": 22642 }, { "epoch": 0.8859456921511856, "grad_norm": 0.0, "learning_rate": 6.745508377462551e-07, "loss": 0.9551, "step": 22643 }, { "epoch": 0.885984818843415, "grad_norm": 0.0, "learning_rate": 6.740933737830446e-07, "loss": 0.9692, "step": 22644 }, { "epoch": 0.8860239455356445, "grad_norm": 0.0, "learning_rate": 6.7363605958221e-07, "loss": 0.9919, "step": 22645 }, { "epoch": 0.8860630722278738, "grad_norm": 0.0, "learning_rate": 6.731788951510932e-07, "loss": 1.0277, "step": 22646 }, { "epoch": 0.8861021989201033, "grad_norm": 0.0, "learning_rate": 6.72721880497037e-07, "loss": 0.8544, "step": 22647 }, { "epoch": 0.8861413256123327, "grad_norm": 0.0, "learning_rate": 6.722650156273758e-07, "loss": 0.9209, "step": 22648 }, { "epoch": 0.8861804523045622, "grad_norm": 0.0, "learning_rate": 6.718083005494547e-07, "loss": 0.9441, "step": 22649 }, { "epoch": 0.8862195789967916, "grad_norm": 0.0, "learning_rate": 6.713517352706012e-07, "loss": 1.0588, "step": 22650 }, { "epoch": 0.8862587056890211, "grad_norm": 0.0, "learning_rate": 6.708953197981504e-07, "loss": 1.0248, "step": 22651 }, { "epoch": 0.8862978323812505, "grad_norm": 0.0, "learning_rate": 6.704390541394278e-07, "loss": 0.952, "step": 22652 }, { "epoch": 0.88633695907348, "grad_norm": 0.0, "learning_rate": 6.699829383017675e-07, "loss": 0.8991, "step": 22653 }, { "epoch": 0.8863760857657094, "grad_norm": 0.0, "learning_rate": 6.69526972292488e-07, "loss": 0.9296, "step": 22654 }, { "epoch": 0.8864152124579388, "grad_norm": 0.0, "learning_rate": 6.690711561189145e-07, "loss": 0.8551, "step": 22655 }, { "epoch": 0.8864543391501682, "grad_norm": 0.0, "learning_rate": 6.686154897883634e-07, "loss": 0.9582, "step": 22656 }, { "epoch": 0.8864934658423976, "grad_norm": 0.0, "learning_rate": 6.681599733081579e-07, "loss": 0.9601, "step": 22657 }, { "epoch": 0.8865325925346271, "grad_norm": 0.0, "learning_rate": 6.677046066856075e-07, "loss": 0.8555, "step": 22658 }, { "epoch": 0.8865717192268565, "grad_norm": 0.0, "learning_rate": 6.672493899280297e-07, "loss": 0.856, "step": 22659 }, { "epoch": 0.886610845919086, "grad_norm": 0.0, "learning_rate": 6.667943230427298e-07, "loss": 1.0015, "step": 22660 }, { "epoch": 0.8866499726113154, "grad_norm": 0.0, "learning_rate": 6.663394060370177e-07, "loss": 1.042, "step": 22661 }, { "epoch": 0.8866890993035449, "grad_norm": 0.0, "learning_rate": 6.658846389181994e-07, "loss": 1.0369, "step": 22662 }, { "epoch": 0.8867282259957743, "grad_norm": 0.0, "learning_rate": 6.654300216935794e-07, "loss": 0.9235, "step": 22663 }, { "epoch": 0.8867673526880038, "grad_norm": 0.0, "learning_rate": 6.649755543704539e-07, "loss": 0.9993, "step": 22664 }, { "epoch": 0.8868064793802332, "grad_norm": 0.0, "learning_rate": 6.645212369561249e-07, "loss": 0.8969, "step": 22665 }, { "epoch": 0.8868456060724627, "grad_norm": 0.0, "learning_rate": 6.640670694578855e-07, "loss": 0.9924, "step": 22666 }, { "epoch": 0.886884732764692, "grad_norm": 0.0, "learning_rate": 6.63613051883033e-07, "loss": 0.9899, "step": 22667 }, { "epoch": 0.8869238594569215, "grad_norm": 0.0, "learning_rate": 6.631591842388529e-07, "loss": 0.8168, "step": 22668 }, { "epoch": 0.8869629861491509, "grad_norm": 0.0, "learning_rate": 6.627054665326394e-07, "loss": 0.9896, "step": 22669 }, { "epoch": 0.8870021128413804, "grad_norm": 0.0, "learning_rate": 6.622518987716742e-07, "loss": 0.8826, "step": 22670 }, { "epoch": 0.8870412395336098, "grad_norm": 0.0, "learning_rate": 6.617984809632416e-07, "loss": 0.8347, "step": 22671 }, { "epoch": 0.8870803662258393, "grad_norm": 0.0, "learning_rate": 6.613452131146248e-07, "loss": 0.8989, "step": 22672 }, { "epoch": 0.8871194929180687, "grad_norm": 0.0, "learning_rate": 6.608920952331033e-07, "loss": 1.0107, "step": 22673 }, { "epoch": 0.8871586196102982, "grad_norm": 0.0, "learning_rate": 6.604391273259503e-07, "loss": 1.0276, "step": 22674 }, { "epoch": 0.8871977463025276, "grad_norm": 0.0, "learning_rate": 6.599863094004422e-07, "loss": 0.986, "step": 22675 }, { "epoch": 0.8872368729947571, "grad_norm": 0.0, "learning_rate": 6.59533641463852e-07, "loss": 0.8634, "step": 22676 }, { "epoch": 0.8872759996869864, "grad_norm": 0.0, "learning_rate": 6.590811235234451e-07, "loss": 1.0439, "step": 22677 }, { "epoch": 0.887315126379216, "grad_norm": 0.0, "learning_rate": 6.586287555864912e-07, "loss": 1.0029, "step": 22678 }, { "epoch": 0.8873542530714453, "grad_norm": 0.0, "learning_rate": 6.581765376602533e-07, "loss": 1.0038, "step": 22679 }, { "epoch": 0.8873933797636748, "grad_norm": 0.0, "learning_rate": 6.577244697519969e-07, "loss": 0.9881, "step": 22680 }, { "epoch": 0.8874325064559042, "grad_norm": 0.0, "learning_rate": 6.57272551868976e-07, "loss": 0.9061, "step": 22681 }, { "epoch": 0.8874716331481337, "grad_norm": 0.0, "learning_rate": 6.568207840184537e-07, "loss": 0.9963, "step": 22682 }, { "epoch": 0.8875107598403631, "grad_norm": 0.0, "learning_rate": 6.563691662076777e-07, "loss": 0.9812, "step": 22683 }, { "epoch": 0.8875498865325925, "grad_norm": 0.0, "learning_rate": 6.559176984439087e-07, "loss": 0.9401, "step": 22684 }, { "epoch": 0.887589013224822, "grad_norm": 0.0, "learning_rate": 6.554663807343908e-07, "loss": 1.0536, "step": 22685 }, { "epoch": 0.8876281399170514, "grad_norm": 0.0, "learning_rate": 6.550152130863751e-07, "loss": 0.998, "step": 22686 }, { "epoch": 0.8876672666092809, "grad_norm": 0.0, "learning_rate": 6.545641955071036e-07, "loss": 0.9379, "step": 22687 }, { "epoch": 0.8877063933015102, "grad_norm": 0.0, "learning_rate": 6.541133280038203e-07, "loss": 1.0473, "step": 22688 }, { "epoch": 0.8877455199937397, "grad_norm": 0.0, "learning_rate": 6.536626105837662e-07, "loss": 0.9875, "step": 22689 }, { "epoch": 0.8877846466859691, "grad_norm": 0.0, "learning_rate": 6.53212043254181e-07, "loss": 1.0521, "step": 22690 }, { "epoch": 0.8878237733781986, "grad_norm": 0.0, "learning_rate": 6.527616260222958e-07, "loss": 0.853, "step": 22691 }, { "epoch": 0.887862900070428, "grad_norm": 0.0, "learning_rate": 6.523113588953466e-07, "loss": 0.931, "step": 22692 }, { "epoch": 0.8879020267626575, "grad_norm": 0.0, "learning_rate": 6.518612418805637e-07, "loss": 1.0115, "step": 22693 }, { "epoch": 0.8879411534548869, "grad_norm": 0.0, "learning_rate": 6.514112749851764e-07, "loss": 0.8738, "step": 22694 }, { "epoch": 0.8879802801471164, "grad_norm": 0.0, "learning_rate": 6.509614582164081e-07, "loss": 1.0117, "step": 22695 }, { "epoch": 0.8880194068393458, "grad_norm": 0.0, "learning_rate": 6.505117915814863e-07, "loss": 1.0169, "step": 22696 }, { "epoch": 0.8880585335315753, "grad_norm": 0.0, "learning_rate": 6.500622750876251e-07, "loss": 0.8741, "step": 22697 }, { "epoch": 0.8880976602238047, "grad_norm": 0.0, "learning_rate": 6.496129087420511e-07, "loss": 1.1364, "step": 22698 }, { "epoch": 0.8881367869160341, "grad_norm": 0.0, "learning_rate": 6.491636925519762e-07, "loss": 0.9612, "step": 22699 }, { "epoch": 0.8881759136082635, "grad_norm": 0.0, "learning_rate": 6.487146265246169e-07, "loss": 0.8698, "step": 22700 }, { "epoch": 0.888215040300493, "grad_norm": 0.0, "learning_rate": 6.482657106671785e-07, "loss": 1.0555, "step": 22701 }, { "epoch": 0.8882541669927224, "grad_norm": 0.0, "learning_rate": 6.478169449868787e-07, "loss": 0.894, "step": 22702 }, { "epoch": 0.8882932936849519, "grad_norm": 0.0, "learning_rate": 6.473683294909172e-07, "loss": 0.8911, "step": 22703 }, { "epoch": 0.8883324203771813, "grad_norm": 0.0, "learning_rate": 6.469198641865038e-07, "loss": 1.0596, "step": 22704 }, { "epoch": 0.8883715470694108, "grad_norm": 0.0, "learning_rate": 6.464715490808349e-07, "loss": 0.9179, "step": 22705 }, { "epoch": 0.8884106737616402, "grad_norm": 0.0, "learning_rate": 6.460233841811125e-07, "loss": 0.9383, "step": 22706 }, { "epoch": 0.8884498004538697, "grad_norm": 0.0, "learning_rate": 6.455753694945332e-07, "loss": 0.989, "step": 22707 }, { "epoch": 0.8884889271460991, "grad_norm": 0.0, "learning_rate": 6.451275050282935e-07, "loss": 0.8073, "step": 22708 }, { "epoch": 0.8885280538383286, "grad_norm": 0.0, "learning_rate": 6.446797907895819e-07, "loss": 1.0069, "step": 22709 }, { "epoch": 0.8885671805305579, "grad_norm": 0.0, "learning_rate": 6.442322267855894e-07, "loss": 0.9676, "step": 22710 }, { "epoch": 0.8886063072227874, "grad_norm": 0.0, "learning_rate": 6.437848130235047e-07, "loss": 0.8445, "step": 22711 }, { "epoch": 0.8886454339150168, "grad_norm": 0.0, "learning_rate": 6.433375495105132e-07, "loss": 0.9001, "step": 22712 }, { "epoch": 0.8886845606072462, "grad_norm": 0.0, "learning_rate": 6.428904362537946e-07, "loss": 0.8872, "step": 22713 }, { "epoch": 0.8887236872994757, "grad_norm": 0.0, "learning_rate": 6.424434732605312e-07, "loss": 0.9418, "step": 22714 }, { "epoch": 0.8887628139917051, "grad_norm": 0.0, "learning_rate": 6.419966605379002e-07, "loss": 0.93, "step": 22715 }, { "epoch": 0.8888019406839346, "grad_norm": 0.0, "learning_rate": 6.415499980930761e-07, "loss": 1.0034, "step": 22716 }, { "epoch": 0.888841067376164, "grad_norm": 0.0, "learning_rate": 6.411034859332321e-07, "loss": 1.052, "step": 22717 }, { "epoch": 0.8888801940683935, "grad_norm": 0.0, "learning_rate": 6.406571240655402e-07, "loss": 0.9336, "step": 22718 }, { "epoch": 0.8889193207606229, "grad_norm": 0.0, "learning_rate": 6.402109124971645e-07, "loss": 1.0294, "step": 22719 }, { "epoch": 0.8889584474528524, "grad_norm": 0.0, "learning_rate": 6.397648512352739e-07, "loss": 0.9839, "step": 22720 }, { "epoch": 0.8889975741450817, "grad_norm": 0.0, "learning_rate": 6.393189402870315e-07, "loss": 0.8857, "step": 22721 }, { "epoch": 0.8890367008373112, "grad_norm": 0.0, "learning_rate": 6.388731796595971e-07, "loss": 0.9547, "step": 22722 }, { "epoch": 0.8890758275295406, "grad_norm": 0.0, "learning_rate": 6.384275693601293e-07, "loss": 0.9612, "step": 22723 }, { "epoch": 0.8891149542217701, "grad_norm": 0.0, "learning_rate": 6.379821093957838e-07, "loss": 0.9548, "step": 22724 }, { "epoch": 0.8891540809139995, "grad_norm": 0.0, "learning_rate": 6.375367997737147e-07, "loss": 0.8759, "step": 22725 }, { "epoch": 0.889193207606229, "grad_norm": 0.0, "learning_rate": 6.37091640501073e-07, "loss": 0.9813, "step": 22726 }, { "epoch": 0.8892323342984584, "grad_norm": 0.0, "learning_rate": 6.366466315850062e-07, "loss": 0.9853, "step": 22727 }, { "epoch": 0.8892714609906879, "grad_norm": 0.0, "learning_rate": 6.362017730326609e-07, "loss": 0.9607, "step": 22728 }, { "epoch": 0.8893105876829173, "grad_norm": 0.0, "learning_rate": 6.357570648511846e-07, "loss": 0.981, "step": 22729 }, { "epoch": 0.8893497143751468, "grad_norm": 0.0, "learning_rate": 6.353125070477129e-07, "loss": 1.0011, "step": 22730 }, { "epoch": 0.8893888410673761, "grad_norm": 0.0, "learning_rate": 6.348680996293899e-07, "loss": 0.957, "step": 22731 }, { "epoch": 0.8894279677596056, "grad_norm": 0.0, "learning_rate": 6.344238426033478e-07, "loss": 0.8621, "step": 22732 }, { "epoch": 0.889467094451835, "grad_norm": 0.0, "learning_rate": 6.339797359767253e-07, "loss": 1.0065, "step": 22733 }, { "epoch": 0.8895062211440645, "grad_norm": 0.0, "learning_rate": 6.335357797566499e-07, "loss": 0.9495, "step": 22734 }, { "epoch": 0.8895453478362939, "grad_norm": 0.0, "learning_rate": 6.33091973950255e-07, "loss": 0.9383, "step": 22735 }, { "epoch": 0.8895844745285234, "grad_norm": 0.0, "learning_rate": 6.326483185646648e-07, "loss": 0.8921, "step": 22736 }, { "epoch": 0.8896236012207528, "grad_norm": 0.0, "learning_rate": 6.322048136070036e-07, "loss": 0.9894, "step": 22737 }, { "epoch": 0.8896627279129823, "grad_norm": 0.0, "learning_rate": 6.317614590843945e-07, "loss": 0.9338, "step": 22738 }, { "epoch": 0.8897018546052117, "grad_norm": 0.0, "learning_rate": 6.313182550039598e-07, "loss": 1.1033, "step": 22739 }, { "epoch": 0.8897409812974412, "grad_norm": 0.0, "learning_rate": 6.308752013728126e-07, "loss": 0.889, "step": 22740 }, { "epoch": 0.8897801079896706, "grad_norm": 0.0, "learning_rate": 6.304322981980693e-07, "loss": 1.0337, "step": 22741 }, { "epoch": 0.8898192346818999, "grad_norm": 0.0, "learning_rate": 6.299895454868421e-07, "loss": 0.8551, "step": 22742 }, { "epoch": 0.8898583613741294, "grad_norm": 0.0, "learning_rate": 6.295469432462442e-07, "loss": 0.9005, "step": 22743 }, { "epoch": 0.8898974880663588, "grad_norm": 0.0, "learning_rate": 6.291044914833777e-07, "loss": 0.923, "step": 22744 }, { "epoch": 0.8899366147585883, "grad_norm": 0.0, "learning_rate": 6.286621902053524e-07, "loss": 0.9675, "step": 22745 }, { "epoch": 0.8899757414508177, "grad_norm": 0.0, "learning_rate": 6.282200394192673e-07, "loss": 0.9979, "step": 22746 }, { "epoch": 0.8900148681430472, "grad_norm": 0.0, "learning_rate": 6.277780391322275e-07, "loss": 0.969, "step": 22747 }, { "epoch": 0.8900539948352766, "grad_norm": 0.0, "learning_rate": 6.273361893513264e-07, "loss": 0.844, "step": 22748 }, { "epoch": 0.8900931215275061, "grad_norm": 0.0, "learning_rate": 6.268944900836638e-07, "loss": 0.9794, "step": 22749 }, { "epoch": 0.8901322482197355, "grad_norm": 0.0, "learning_rate": 6.264529413363263e-07, "loss": 1.0135, "step": 22750 }, { "epoch": 0.890171374911965, "grad_norm": 0.0, "learning_rate": 6.260115431164127e-07, "loss": 0.8951, "step": 22751 }, { "epoch": 0.8902105016041943, "grad_norm": 0.0, "learning_rate": 6.255702954310061e-07, "loss": 1.0095, "step": 22752 }, { "epoch": 0.8902496282964238, "grad_norm": 0.0, "learning_rate": 6.251291982871943e-07, "loss": 0.9602, "step": 22753 }, { "epoch": 0.8902887549886532, "grad_norm": 0.0, "learning_rate": 6.246882516920593e-07, "loss": 0.9048, "step": 22754 }, { "epoch": 0.8903278816808827, "grad_norm": 0.0, "learning_rate": 6.242474556526834e-07, "loss": 0.9836, "step": 22755 }, { "epoch": 0.8903670083731121, "grad_norm": 0.0, "learning_rate": 6.238068101761452e-07, "loss": 0.944, "step": 22756 }, { "epoch": 0.8904061350653416, "grad_norm": 0.0, "learning_rate": 6.233663152695213e-07, "loss": 0.9632, "step": 22757 }, { "epoch": 0.890445261757571, "grad_norm": 0.0, "learning_rate": 6.229259709398828e-07, "loss": 0.8817, "step": 22758 }, { "epoch": 0.8904843884498005, "grad_norm": 0.0, "learning_rate": 6.224857771943038e-07, "loss": 0.9438, "step": 22759 }, { "epoch": 0.8905235151420299, "grad_norm": 0.0, "learning_rate": 6.220457340398533e-07, "loss": 0.9459, "step": 22760 }, { "epoch": 0.8905626418342594, "grad_norm": 0.0, "learning_rate": 6.216058414835969e-07, "loss": 1.0088, "step": 22761 }, { "epoch": 0.8906017685264888, "grad_norm": 0.0, "learning_rate": 6.211660995325986e-07, "loss": 1.1251, "step": 22762 }, { "epoch": 0.8906408952187183, "grad_norm": 0.0, "learning_rate": 6.207265081939218e-07, "loss": 0.872, "step": 22763 }, { "epoch": 0.8906800219109476, "grad_norm": 0.0, "learning_rate": 6.202870674746231e-07, "loss": 0.9238, "step": 22764 }, { "epoch": 0.8907191486031771, "grad_norm": 0.0, "learning_rate": 6.198477773817601e-07, "loss": 0.9868, "step": 22765 }, { "epoch": 0.8907582752954065, "grad_norm": 0.0, "learning_rate": 6.194086379223874e-07, "loss": 0.9898, "step": 22766 }, { "epoch": 0.890797401987636, "grad_norm": 0.0, "learning_rate": 6.18969649103559e-07, "loss": 0.901, "step": 22767 }, { "epoch": 0.8908365286798654, "grad_norm": 0.0, "learning_rate": 6.18530810932323e-07, "loss": 0.975, "step": 22768 }, { "epoch": 0.8908756553720948, "grad_norm": 0.0, "learning_rate": 6.180921234157245e-07, "loss": 0.9812, "step": 22769 }, { "epoch": 0.8909147820643243, "grad_norm": 0.0, "learning_rate": 6.176535865608135e-07, "loss": 0.8414, "step": 22770 }, { "epoch": 0.8909539087565537, "grad_norm": 0.0, "learning_rate": 6.172152003746268e-07, "loss": 0.9874, "step": 22771 }, { "epoch": 0.8909930354487832, "grad_norm": 0.0, "learning_rate": 6.167769648642064e-07, "loss": 0.8965, "step": 22772 }, { "epoch": 0.8910321621410126, "grad_norm": 0.0, "learning_rate": 6.16338880036591e-07, "loss": 0.9372, "step": 22773 }, { "epoch": 0.891071288833242, "grad_norm": 0.0, "learning_rate": 6.159009458988152e-07, "loss": 0.896, "step": 22774 }, { "epoch": 0.8911104155254714, "grad_norm": 0.0, "learning_rate": 6.154631624579111e-07, "loss": 0.9361, "step": 22775 }, { "epoch": 0.8911495422177009, "grad_norm": 0.0, "learning_rate": 6.150255297209095e-07, "loss": 1.0624, "step": 22776 }, { "epoch": 0.8911886689099303, "grad_norm": 0.0, "learning_rate": 6.145880476948352e-07, "loss": 0.9848, "step": 22777 }, { "epoch": 0.8912277956021598, "grad_norm": 0.0, "learning_rate": 6.1415071638672e-07, "loss": 1.0491, "step": 22778 }, { "epoch": 0.8912669222943892, "grad_norm": 0.0, "learning_rate": 6.137135358035806e-07, "loss": 1.0195, "step": 22779 }, { "epoch": 0.8913060489866187, "grad_norm": 0.0, "learning_rate": 6.132765059524426e-07, "loss": 0.8699, "step": 22780 }, { "epoch": 0.8913451756788481, "grad_norm": 0.0, "learning_rate": 6.12839626840318e-07, "loss": 0.9015, "step": 22781 }, { "epoch": 0.8913843023710776, "grad_norm": 0.0, "learning_rate": 6.124028984742292e-07, "loss": 0.9929, "step": 22782 }, { "epoch": 0.891423429063307, "grad_norm": 0.0, "learning_rate": 6.119663208611848e-07, "loss": 0.9096, "step": 22783 }, { "epoch": 0.8914625557555365, "grad_norm": 0.0, "learning_rate": 6.115298940081993e-07, "loss": 1.0396, "step": 22784 }, { "epoch": 0.8915016824477658, "grad_norm": 0.0, "learning_rate": 6.110936179222782e-07, "loss": 0.8902, "step": 22785 }, { "epoch": 0.8915408091399953, "grad_norm": 0.0, "learning_rate": 6.106574926104281e-07, "loss": 0.9189, "step": 22786 }, { "epoch": 0.8915799358322247, "grad_norm": 0.0, "learning_rate": 6.102215180796533e-07, "loss": 0.9279, "step": 22787 }, { "epoch": 0.8916190625244542, "grad_norm": 0.0, "learning_rate": 6.097856943369562e-07, "loss": 0.9472, "step": 22788 }, { "epoch": 0.8916581892166836, "grad_norm": 0.0, "learning_rate": 6.093500213893333e-07, "loss": 0.9686, "step": 22789 }, { "epoch": 0.8916973159089131, "grad_norm": 0.0, "learning_rate": 6.089144992437812e-07, "loss": 1.0892, "step": 22790 }, { "epoch": 0.8917364426011425, "grad_norm": 0.0, "learning_rate": 6.084791279072955e-07, "loss": 1.0433, "step": 22791 }, { "epoch": 0.891775569293372, "grad_norm": 0.0, "learning_rate": 6.080439073868671e-07, "loss": 0.964, "step": 22792 }, { "epoch": 0.8918146959856014, "grad_norm": 0.0, "learning_rate": 6.07608837689484e-07, "loss": 0.8936, "step": 22793 }, { "epoch": 0.8918538226778309, "grad_norm": 0.0, "learning_rate": 6.071739188221349e-07, "loss": 1.0885, "step": 22794 }, { "epoch": 0.8918929493700603, "grad_norm": 0.0, "learning_rate": 6.067391507917997e-07, "loss": 0.8761, "step": 22795 }, { "epoch": 0.8919320760622897, "grad_norm": 0.0, "learning_rate": 6.063045336054674e-07, "loss": 0.9616, "step": 22796 }, { "epoch": 0.8919712027545191, "grad_norm": 0.0, "learning_rate": 6.058700672701101e-07, "loss": 0.9719, "step": 22797 }, { "epoch": 0.8920103294467485, "grad_norm": 0.0, "learning_rate": 6.054357517927112e-07, "loss": 0.9478, "step": 22798 }, { "epoch": 0.892049456138978, "grad_norm": 0.0, "learning_rate": 6.050015871802384e-07, "loss": 0.9443, "step": 22799 }, { "epoch": 0.8920885828312074, "grad_norm": 0.0, "learning_rate": 6.045675734396694e-07, "loss": 0.974, "step": 22800 }, { "epoch": 0.8921277095234369, "grad_norm": 0.0, "learning_rate": 6.041337105779721e-07, "loss": 0.9928, "step": 22801 }, { "epoch": 0.8921668362156663, "grad_norm": 0.0, "learning_rate": 6.036999986021141e-07, "loss": 1.0072, "step": 22802 }, { "epoch": 0.8922059629078958, "grad_norm": 0.0, "learning_rate": 6.032664375190588e-07, "loss": 0.7997, "step": 22803 }, { "epoch": 0.8922450896001252, "grad_norm": 0.0, "learning_rate": 6.028330273357708e-07, "loss": 0.9991, "step": 22804 }, { "epoch": 0.8922842162923547, "grad_norm": 0.0, "learning_rate": 6.023997680592075e-07, "loss": 1.0437, "step": 22805 }, { "epoch": 0.892323342984584, "grad_norm": 0.0, "learning_rate": 6.019666596963303e-07, "loss": 0.9711, "step": 22806 }, { "epoch": 0.8923624696768135, "grad_norm": 0.0, "learning_rate": 6.015337022540912e-07, "loss": 0.9565, "step": 22807 }, { "epoch": 0.8924015963690429, "grad_norm": 0.0, "learning_rate": 6.011008957394426e-07, "loss": 0.9762, "step": 22808 }, { "epoch": 0.8924407230612724, "grad_norm": 0.0, "learning_rate": 6.006682401593389e-07, "loss": 0.9028, "step": 22809 }, { "epoch": 0.8924798497535018, "grad_norm": 0.0, "learning_rate": 6.002357355207234e-07, "loss": 0.9807, "step": 22810 }, { "epoch": 0.8925189764457313, "grad_norm": 0.0, "learning_rate": 5.998033818305426e-07, "loss": 0.9211, "step": 22811 }, { "epoch": 0.8925581031379607, "grad_norm": 0.0, "learning_rate": 5.993711790957423e-07, "loss": 0.9962, "step": 22812 }, { "epoch": 0.8925972298301902, "grad_norm": 0.0, "learning_rate": 5.989391273232603e-07, "loss": 0.935, "step": 22813 }, { "epoch": 0.8926363565224196, "grad_norm": 0.0, "learning_rate": 5.985072265200354e-07, "loss": 0.9581, "step": 22814 }, { "epoch": 0.8926754832146491, "grad_norm": 0.0, "learning_rate": 5.980754766930052e-07, "loss": 1.1258, "step": 22815 }, { "epoch": 0.8927146099068785, "grad_norm": 0.0, "learning_rate": 5.976438778491001e-07, "loss": 1.0111, "step": 22816 }, { "epoch": 0.892753736599108, "grad_norm": 0.0, "learning_rate": 5.97212429995252e-07, "loss": 0.8847, "step": 22817 }, { "epoch": 0.8927928632913373, "grad_norm": 0.0, "learning_rate": 5.967811331383899e-07, "loss": 0.9876, "step": 22818 }, { "epoch": 0.8928319899835668, "grad_norm": 0.0, "learning_rate": 5.963499872854417e-07, "loss": 0.9872, "step": 22819 }, { "epoch": 0.8928711166757962, "grad_norm": 0.0, "learning_rate": 5.959189924433284e-07, "loss": 1.1448, "step": 22820 }, { "epoch": 0.8929102433680257, "grad_norm": 0.0, "learning_rate": 5.954881486189734e-07, "loss": 0.9286, "step": 22821 }, { "epoch": 0.8929493700602551, "grad_norm": 0.0, "learning_rate": 5.9505745581929e-07, "loss": 0.983, "step": 22822 }, { "epoch": 0.8929884967524846, "grad_norm": 0.0, "learning_rate": 5.946269140512028e-07, "loss": 0.921, "step": 22823 }, { "epoch": 0.893027623444714, "grad_norm": 0.0, "learning_rate": 5.941965233216207e-07, "loss": 0.8663, "step": 22824 }, { "epoch": 0.8930667501369435, "grad_norm": 0.0, "learning_rate": 5.937662836374569e-07, "loss": 0.9111, "step": 22825 }, { "epoch": 0.8931058768291729, "grad_norm": 0.0, "learning_rate": 5.933361950056183e-07, "loss": 1.022, "step": 22826 }, { "epoch": 0.8931450035214022, "grad_norm": 0.0, "learning_rate": 5.929062574330147e-07, "loss": 0.982, "step": 22827 }, { "epoch": 0.8931841302136317, "grad_norm": 0.0, "learning_rate": 5.924764709265473e-07, "loss": 0.9588, "step": 22828 }, { "epoch": 0.8932232569058611, "grad_norm": 0.0, "learning_rate": 5.920468354931219e-07, "loss": 0.9557, "step": 22829 }, { "epoch": 0.8932623835980906, "grad_norm": 0.0, "learning_rate": 5.916173511396328e-07, "loss": 0.9292, "step": 22830 }, { "epoch": 0.89330151029032, "grad_norm": 0.0, "learning_rate": 5.911880178729812e-07, "loss": 0.8717, "step": 22831 }, { "epoch": 0.8933406369825495, "grad_norm": 0.0, "learning_rate": 5.907588357000604e-07, "loss": 0.8399, "step": 22832 }, { "epoch": 0.8933797636747789, "grad_norm": 0.0, "learning_rate": 5.903298046277628e-07, "loss": 1.0154, "step": 22833 }, { "epoch": 0.8934188903670084, "grad_norm": 0.0, "learning_rate": 5.899009246629761e-07, "loss": 1.0159, "step": 22834 }, { "epoch": 0.8934580170592378, "grad_norm": 0.0, "learning_rate": 5.894721958125882e-07, "loss": 1.0387, "step": 22835 }, { "epoch": 0.8934971437514673, "grad_norm": 0.0, "learning_rate": 5.890436180834857e-07, "loss": 0.9453, "step": 22836 }, { "epoch": 0.8935362704436967, "grad_norm": 0.0, "learning_rate": 5.886151914825522e-07, "loss": 0.9271, "step": 22837 }, { "epoch": 0.8935753971359262, "grad_norm": 0.0, "learning_rate": 5.881869160166632e-07, "loss": 0.8808, "step": 22838 }, { "epoch": 0.8936145238281555, "grad_norm": 0.0, "learning_rate": 5.877587916926986e-07, "loss": 0.8932, "step": 22839 }, { "epoch": 0.893653650520385, "grad_norm": 0.0, "learning_rate": 5.873308185175341e-07, "loss": 1.0164, "step": 22840 }, { "epoch": 0.8936927772126144, "grad_norm": 0.0, "learning_rate": 5.869029964980433e-07, "loss": 0.8884, "step": 22841 }, { "epoch": 0.8937319039048439, "grad_norm": 0.0, "learning_rate": 5.864753256410938e-07, "loss": 0.9575, "step": 22842 }, { "epoch": 0.8937710305970733, "grad_norm": 0.0, "learning_rate": 5.860478059535557e-07, "loss": 1.0115, "step": 22843 }, { "epoch": 0.8938101572893028, "grad_norm": 0.0, "learning_rate": 5.856204374422903e-07, "loss": 0.9097, "step": 22844 }, { "epoch": 0.8938492839815322, "grad_norm": 0.0, "learning_rate": 5.851932201141674e-07, "loss": 0.892, "step": 22845 }, { "epoch": 0.8938884106737617, "grad_norm": 0.0, "learning_rate": 5.84766153976043e-07, "loss": 0.8847, "step": 22846 }, { "epoch": 0.8939275373659911, "grad_norm": 0.0, "learning_rate": 5.843392390347768e-07, "loss": 0.9562, "step": 22847 }, { "epoch": 0.8939666640582206, "grad_norm": 0.0, "learning_rate": 5.839124752972225e-07, "loss": 1.0186, "step": 22848 }, { "epoch": 0.89400579075045, "grad_norm": 0.0, "learning_rate": 5.834858627702355e-07, "loss": 0.9224, "step": 22849 }, { "epoch": 0.8940449174426794, "grad_norm": 0.0, "learning_rate": 5.83059401460665e-07, "loss": 0.9339, "step": 22850 }, { "epoch": 0.8940840441349088, "grad_norm": 0.0, "learning_rate": 5.826330913753631e-07, "loss": 0.8146, "step": 22851 }, { "epoch": 0.8941231708271383, "grad_norm": 0.0, "learning_rate": 5.822069325211699e-07, "loss": 0.9494, "step": 22852 }, { "epoch": 0.8941622975193677, "grad_norm": 0.0, "learning_rate": 5.817809249049333e-07, "loss": 0.9738, "step": 22853 }, { "epoch": 0.8942014242115972, "grad_norm": 0.0, "learning_rate": 5.813550685334957e-07, "loss": 0.9386, "step": 22854 }, { "epoch": 0.8942405509038266, "grad_norm": 0.0, "learning_rate": 5.809293634136903e-07, "loss": 1.0068, "step": 22855 }, { "epoch": 0.894279677596056, "grad_norm": 0.0, "learning_rate": 5.805038095523574e-07, "loss": 0.9564, "step": 22856 }, { "epoch": 0.8943188042882855, "grad_norm": 0.0, "learning_rate": 5.800784069563304e-07, "loss": 0.8528, "step": 22857 }, { "epoch": 0.8943579309805149, "grad_norm": 0.0, "learning_rate": 5.796531556324414e-07, "loss": 0.9633, "step": 22858 }, { "epoch": 0.8943970576727444, "grad_norm": 0.0, "learning_rate": 5.792280555875174e-07, "loss": 1.0733, "step": 22859 }, { "epoch": 0.8944361843649737, "grad_norm": 0.0, "learning_rate": 5.788031068283872e-07, "loss": 0.8915, "step": 22860 }, { "epoch": 0.8944753110572032, "grad_norm": 0.0, "learning_rate": 5.783783093618711e-07, "loss": 0.9064, "step": 22861 }, { "epoch": 0.8945144377494326, "grad_norm": 0.0, "learning_rate": 5.779536631947947e-07, "loss": 0.9995, "step": 22862 }, { "epoch": 0.8945535644416621, "grad_norm": 0.0, "learning_rate": 5.775291683339757e-07, "loss": 0.9598, "step": 22863 }, { "epoch": 0.8945926911338915, "grad_norm": 0.0, "learning_rate": 5.77104824786232e-07, "loss": 0.9848, "step": 22864 }, { "epoch": 0.894631817826121, "grad_norm": 0.0, "learning_rate": 5.766806325583763e-07, "loss": 1.0122, "step": 22865 }, { "epoch": 0.8946709445183504, "grad_norm": 0.0, "learning_rate": 5.762565916572216e-07, "loss": 1.0132, "step": 22866 }, { "epoch": 0.8947100712105799, "grad_norm": 0.0, "learning_rate": 5.758327020895782e-07, "loss": 0.9467, "step": 22867 }, { "epoch": 0.8947491979028093, "grad_norm": 0.0, "learning_rate": 5.754089638622529e-07, "loss": 0.8951, "step": 22868 }, { "epoch": 0.8947883245950388, "grad_norm": 0.0, "learning_rate": 5.74985376982049e-07, "loss": 0.9189, "step": 22869 }, { "epoch": 0.8948274512872681, "grad_norm": 0.0, "learning_rate": 5.745619414557713e-07, "loss": 1.022, "step": 22870 }, { "epoch": 0.8948665779794976, "grad_norm": 0.0, "learning_rate": 5.741386572902141e-07, "loss": 1.0281, "step": 22871 }, { "epoch": 0.894905704671727, "grad_norm": 0.0, "learning_rate": 5.737155244921833e-07, "loss": 0.9371, "step": 22872 }, { "epoch": 0.8949448313639565, "grad_norm": 0.0, "learning_rate": 5.732925430684666e-07, "loss": 1.0244, "step": 22873 }, { "epoch": 0.8949839580561859, "grad_norm": 0.0, "learning_rate": 5.72869713025861e-07, "loss": 0.9384, "step": 22874 }, { "epoch": 0.8950230847484154, "grad_norm": 0.0, "learning_rate": 5.72447034371153e-07, "loss": 0.8918, "step": 22875 }, { "epoch": 0.8950622114406448, "grad_norm": 0.0, "learning_rate": 5.720245071111341e-07, "loss": 0.9835, "step": 22876 }, { "epoch": 0.8951013381328743, "grad_norm": 0.0, "learning_rate": 5.716021312525866e-07, "loss": 0.9273, "step": 22877 }, { "epoch": 0.8951404648251037, "grad_norm": 0.0, "learning_rate": 5.71179906802296e-07, "loss": 0.867, "step": 22878 }, { "epoch": 0.8951795915173332, "grad_norm": 0.0, "learning_rate": 5.707578337670394e-07, "loss": 1.0039, "step": 22879 }, { "epoch": 0.8952187182095626, "grad_norm": 0.0, "learning_rate": 5.703359121535967e-07, "loss": 1.0027, "step": 22880 }, { "epoch": 0.895257844901792, "grad_norm": 0.0, "learning_rate": 5.699141419687437e-07, "loss": 0.9341, "step": 22881 }, { "epoch": 0.8952969715940214, "grad_norm": 0.0, "learning_rate": 5.694925232192538e-07, "loss": 0.875, "step": 22882 }, { "epoch": 0.8953360982862508, "grad_norm": 0.0, "learning_rate": 5.69071055911895e-07, "loss": 0.9489, "step": 22883 }, { "epoch": 0.8953752249784803, "grad_norm": 0.0, "learning_rate": 5.686497400534385e-07, "loss": 1.0004, "step": 22884 }, { "epoch": 0.8954143516707097, "grad_norm": 0.0, "learning_rate": 5.682285756506489e-07, "loss": 1.0436, "step": 22885 }, { "epoch": 0.8954534783629392, "grad_norm": 0.0, "learning_rate": 5.678075627102908e-07, "loss": 0.9962, "step": 22886 }, { "epoch": 0.8954926050551686, "grad_norm": 0.0, "learning_rate": 5.673867012391221e-07, "loss": 0.9557, "step": 22887 }, { "epoch": 0.8955317317473981, "grad_norm": 0.0, "learning_rate": 5.669659912439041e-07, "loss": 0.8781, "step": 22888 }, { "epoch": 0.8955708584396275, "grad_norm": 0.0, "learning_rate": 5.665454327313913e-07, "loss": 0.8287, "step": 22889 }, { "epoch": 0.895609985131857, "grad_norm": 0.0, "learning_rate": 5.661250257083395e-07, "loss": 0.967, "step": 22890 }, { "epoch": 0.8956491118240864, "grad_norm": 0.0, "learning_rate": 5.657047701814978e-07, "loss": 0.9195, "step": 22891 }, { "epoch": 0.8956882385163158, "grad_norm": 0.0, "learning_rate": 5.652846661576172e-07, "loss": 0.9608, "step": 22892 }, { "epoch": 0.8957273652085452, "grad_norm": 0.0, "learning_rate": 5.648647136434404e-07, "loss": 0.9372, "step": 22893 }, { "epoch": 0.8957664919007747, "grad_norm": 0.0, "learning_rate": 5.64444912645713e-07, "loss": 0.9079, "step": 22894 }, { "epoch": 0.8958056185930041, "grad_norm": 0.0, "learning_rate": 5.640252631711784e-07, "loss": 0.9039, "step": 22895 }, { "epoch": 0.8958447452852336, "grad_norm": 0.0, "learning_rate": 5.636057652265747e-07, "loss": 0.8531, "step": 22896 }, { "epoch": 0.895883871977463, "grad_norm": 0.0, "learning_rate": 5.631864188186364e-07, "loss": 0.98, "step": 22897 }, { "epoch": 0.8959229986696925, "grad_norm": 0.0, "learning_rate": 5.627672239540993e-07, "loss": 0.8947, "step": 22898 }, { "epoch": 0.8959621253619219, "grad_norm": 0.0, "learning_rate": 5.623481806396958e-07, "loss": 0.9676, "step": 22899 }, { "epoch": 0.8960012520541514, "grad_norm": 0.0, "learning_rate": 5.619292888821537e-07, "loss": 0.9006, "step": 22900 }, { "epoch": 0.8960403787463808, "grad_norm": 0.0, "learning_rate": 5.615105486882011e-07, "loss": 1.0062, "step": 22901 }, { "epoch": 0.8960795054386103, "grad_norm": 0.0, "learning_rate": 5.610919600645615e-07, "loss": 0.9988, "step": 22902 }, { "epoch": 0.8961186321308396, "grad_norm": 0.0, "learning_rate": 5.606735230179594e-07, "loss": 0.9261, "step": 22903 }, { "epoch": 0.8961577588230691, "grad_norm": 0.0, "learning_rate": 5.602552375551107e-07, "loss": 0.9679, "step": 22904 }, { "epoch": 0.8961968855152985, "grad_norm": 0.0, "learning_rate": 5.598371036827355e-07, "loss": 0.9779, "step": 22905 }, { "epoch": 0.896236012207528, "grad_norm": 0.0, "learning_rate": 5.59419121407544e-07, "loss": 0.9579, "step": 22906 }, { "epoch": 0.8962751388997574, "grad_norm": 0.0, "learning_rate": 5.590012907362552e-07, "loss": 1.045, "step": 22907 }, { "epoch": 0.8963142655919869, "grad_norm": 0.0, "learning_rate": 5.585836116755739e-07, "loss": 0.9374, "step": 22908 }, { "epoch": 0.8963533922842163, "grad_norm": 0.0, "learning_rate": 5.581660842322101e-07, "loss": 0.9898, "step": 22909 }, { "epoch": 0.8963925189764458, "grad_norm": 0.0, "learning_rate": 5.577487084128664e-07, "loss": 0.9587, "step": 22910 }, { "epoch": 0.8964316456686752, "grad_norm": 0.0, "learning_rate": 5.573314842242461e-07, "loss": 0.9898, "step": 22911 }, { "epoch": 0.8964707723609046, "grad_norm": 0.0, "learning_rate": 5.569144116730507e-07, "loss": 0.9548, "step": 22912 }, { "epoch": 0.896509899053134, "grad_norm": 0.0, "learning_rate": 5.564974907659781e-07, "loss": 0.9802, "step": 22913 }, { "epoch": 0.8965490257453634, "grad_norm": 0.0, "learning_rate": 5.560807215097208e-07, "loss": 0.9271, "step": 22914 }, { "epoch": 0.8965881524375929, "grad_norm": 0.0, "learning_rate": 5.556641039109734e-07, "loss": 0.9481, "step": 22915 }, { "epoch": 0.8966272791298223, "grad_norm": 0.0, "learning_rate": 5.55247637976426e-07, "loss": 0.9677, "step": 22916 }, { "epoch": 0.8966664058220518, "grad_norm": 0.0, "learning_rate": 5.548313237127689e-07, "loss": 0.8231, "step": 22917 }, { "epoch": 0.8967055325142812, "grad_norm": 0.0, "learning_rate": 5.544151611266823e-07, "loss": 1.0363, "step": 22918 }, { "epoch": 0.8967446592065107, "grad_norm": 0.0, "learning_rate": 5.539991502248554e-07, "loss": 0.9777, "step": 22919 }, { "epoch": 0.8967837858987401, "grad_norm": 0.0, "learning_rate": 5.535832910139616e-07, "loss": 0.893, "step": 22920 }, { "epoch": 0.8968229125909696, "grad_norm": 0.0, "learning_rate": 5.531675835006867e-07, "loss": 0.8738, "step": 22921 }, { "epoch": 0.896862039283199, "grad_norm": 0.0, "learning_rate": 5.527520276917009e-07, "loss": 1.0215, "step": 22922 }, { "epoch": 0.8969011659754285, "grad_norm": 0.0, "learning_rate": 5.523366235936811e-07, "loss": 0.9836, "step": 22923 }, { "epoch": 0.8969402926676578, "grad_norm": 0.0, "learning_rate": 5.519213712132931e-07, "loss": 0.9236, "step": 22924 }, { "epoch": 0.8969794193598873, "grad_norm": 0.0, "learning_rate": 5.515062705572116e-07, "loss": 1.1003, "step": 22925 }, { "epoch": 0.8970185460521167, "grad_norm": 0.0, "learning_rate": 5.51091321632099e-07, "loss": 0.9622, "step": 22926 }, { "epoch": 0.8970576727443462, "grad_norm": 0.0, "learning_rate": 5.506765244446211e-07, "loss": 1.004, "step": 22927 }, { "epoch": 0.8970967994365756, "grad_norm": 0.0, "learning_rate": 5.502618790014358e-07, "loss": 1.0068, "step": 22928 }, { "epoch": 0.8971359261288051, "grad_norm": 0.0, "learning_rate": 5.498473853092034e-07, "loss": 1.0617, "step": 22929 }, { "epoch": 0.8971750528210345, "grad_norm": 0.0, "learning_rate": 5.494330433745809e-07, "loss": 0.8661, "step": 22930 }, { "epoch": 0.897214179513264, "grad_norm": 0.0, "learning_rate": 5.490188532042229e-07, "loss": 1.0596, "step": 22931 }, { "epoch": 0.8972533062054934, "grad_norm": 0.0, "learning_rate": 5.486048148047774e-07, "loss": 0.9015, "step": 22932 }, { "epoch": 0.8972924328977229, "grad_norm": 0.0, "learning_rate": 5.481909281828956e-07, "loss": 1.0402, "step": 22933 }, { "epoch": 0.8973315595899523, "grad_norm": 0.0, "learning_rate": 5.477771933452237e-07, "loss": 0.9067, "step": 22934 }, { "epoch": 0.8973706862821818, "grad_norm": 0.0, "learning_rate": 5.47363610298407e-07, "loss": 1.0216, "step": 22935 }, { "epoch": 0.8974098129744111, "grad_norm": 0.0, "learning_rate": 5.46950179049085e-07, "loss": 0.9436, "step": 22936 }, { "epoch": 0.8974489396666406, "grad_norm": 0.0, "learning_rate": 5.465368996038989e-07, "loss": 0.8922, "step": 22937 }, { "epoch": 0.89748806635887, "grad_norm": 0.0, "learning_rate": 5.461237719694823e-07, "loss": 0.9341, "step": 22938 }, { "epoch": 0.8975271930510995, "grad_norm": 0.0, "learning_rate": 5.457107961524721e-07, "loss": 0.958, "step": 22939 }, { "epoch": 0.8975663197433289, "grad_norm": 0.0, "learning_rate": 5.452979721594997e-07, "loss": 1.0278, "step": 22940 }, { "epoch": 0.8976054464355583, "grad_norm": 0.0, "learning_rate": 5.448852999971965e-07, "loss": 0.9628, "step": 22941 }, { "epoch": 0.8976445731277878, "grad_norm": 0.0, "learning_rate": 5.444727796721849e-07, "loss": 0.9497, "step": 22942 }, { "epoch": 0.8976836998200172, "grad_norm": 0.0, "learning_rate": 5.440604111910929e-07, "loss": 0.9217, "step": 22943 }, { "epoch": 0.8977228265122467, "grad_norm": 0.0, "learning_rate": 5.43648194560542e-07, "loss": 0.7874, "step": 22944 }, { "epoch": 0.897761953204476, "grad_norm": 0.0, "learning_rate": 5.432361297871513e-07, "loss": 1.0131, "step": 22945 }, { "epoch": 0.8978010798967055, "grad_norm": 0.0, "learning_rate": 5.428242168775378e-07, "loss": 0.8692, "step": 22946 }, { "epoch": 0.8978402065889349, "grad_norm": 0.0, "learning_rate": 5.42412455838317e-07, "loss": 0.913, "step": 22947 }, { "epoch": 0.8978793332811644, "grad_norm": 0.0, "learning_rate": 5.420008466761028e-07, "loss": 0.8535, "step": 22948 }, { "epoch": 0.8979184599733938, "grad_norm": 0.0, "learning_rate": 5.415893893975022e-07, "loss": 0.9939, "step": 22949 }, { "epoch": 0.8979575866656233, "grad_norm": 0.0, "learning_rate": 5.411780840091252e-07, "loss": 0.9348, "step": 22950 }, { "epoch": 0.8979967133578527, "grad_norm": 0.0, "learning_rate": 5.407669305175723e-07, "loss": 1.0634, "step": 22951 }, { "epoch": 0.8980358400500822, "grad_norm": 0.0, "learning_rate": 5.403559289294525e-07, "loss": 0.9693, "step": 22952 }, { "epoch": 0.8980749667423116, "grad_norm": 0.0, "learning_rate": 5.399450792513616e-07, "loss": 0.8897, "step": 22953 }, { "epoch": 0.8981140934345411, "grad_norm": 0.0, "learning_rate": 5.395343814899001e-07, "loss": 1.0205, "step": 22954 }, { "epoch": 0.8981532201267705, "grad_norm": 0.0, "learning_rate": 5.391238356516593e-07, "loss": 0.8204, "step": 22955 }, { "epoch": 0.898192346819, "grad_norm": 0.0, "learning_rate": 5.387134417432372e-07, "loss": 0.9935, "step": 22956 }, { "epoch": 0.8982314735112293, "grad_norm": 0.0, "learning_rate": 5.383031997712195e-07, "loss": 0.9595, "step": 22957 }, { "epoch": 0.8982706002034588, "grad_norm": 0.0, "learning_rate": 5.37893109742198e-07, "loss": 0.9753, "step": 22958 }, { "epoch": 0.8983097268956882, "grad_norm": 0.0, "learning_rate": 5.374831716627549e-07, "loss": 0.9406, "step": 22959 }, { "epoch": 0.8983488535879177, "grad_norm": 0.0, "learning_rate": 5.37073385539475e-07, "loss": 1.007, "step": 22960 }, { "epoch": 0.8983879802801471, "grad_norm": 0.0, "learning_rate": 5.366637513789397e-07, "loss": 1.0695, "step": 22961 }, { "epoch": 0.8984271069723766, "grad_norm": 0.0, "learning_rate": 5.36254269187726e-07, "loss": 0.9558, "step": 22962 }, { "epoch": 0.898466233664606, "grad_norm": 0.0, "learning_rate": 5.358449389724097e-07, "loss": 0.9221, "step": 22963 }, { "epoch": 0.8985053603568355, "grad_norm": 0.0, "learning_rate": 5.354357607395644e-07, "loss": 0.8939, "step": 22964 }, { "epoch": 0.8985444870490649, "grad_norm": 0.0, "learning_rate": 5.350267344957605e-07, "loss": 0.9977, "step": 22965 }, { "epoch": 0.8985836137412944, "grad_norm": 0.0, "learning_rate": 5.346178602475693e-07, "loss": 0.9047, "step": 22966 }, { "epoch": 0.8986227404335237, "grad_norm": 0.0, "learning_rate": 5.342091380015524e-07, "loss": 0.9758, "step": 22967 }, { "epoch": 0.8986618671257532, "grad_norm": 0.0, "learning_rate": 5.338005677642776e-07, "loss": 0.9716, "step": 22968 }, { "epoch": 0.8987009938179826, "grad_norm": 0.0, "learning_rate": 5.333921495423e-07, "loss": 0.9274, "step": 22969 }, { "epoch": 0.898740120510212, "grad_norm": 0.0, "learning_rate": 5.329838833421852e-07, "loss": 0.8904, "step": 22970 }, { "epoch": 0.8987792472024415, "grad_norm": 0.0, "learning_rate": 5.325757691704858e-07, "loss": 0.9369, "step": 22971 }, { "epoch": 0.8988183738946709, "grad_norm": 0.0, "learning_rate": 5.321678070337566e-07, "loss": 0.8996, "step": 22972 }, { "epoch": 0.8988575005869004, "grad_norm": 0.0, "learning_rate": 5.317599969385456e-07, "loss": 0.9541, "step": 22973 }, { "epoch": 0.8988966272791298, "grad_norm": 0.0, "learning_rate": 5.313523388914088e-07, "loss": 0.9253, "step": 22974 }, { "epoch": 0.8989357539713593, "grad_norm": 0.0, "learning_rate": 5.309448328988865e-07, "loss": 1.0359, "step": 22975 }, { "epoch": 0.8989748806635887, "grad_norm": 0.0, "learning_rate": 5.305374789675255e-07, "loss": 0.8906, "step": 22976 }, { "epoch": 0.8990140073558182, "grad_norm": 0.0, "learning_rate": 5.301302771038663e-07, "loss": 1.018, "step": 22977 }, { "epoch": 0.8990531340480475, "grad_norm": 0.0, "learning_rate": 5.297232273144481e-07, "loss": 0.938, "step": 22978 }, { "epoch": 0.899092260740277, "grad_norm": 0.0, "learning_rate": 5.293163296058079e-07, "loss": 0.7455, "step": 22979 }, { "epoch": 0.8991313874325064, "grad_norm": 0.0, "learning_rate": 5.289095839844816e-07, "loss": 0.9966, "step": 22980 }, { "epoch": 0.8991705141247359, "grad_norm": 0.0, "learning_rate": 5.285029904569972e-07, "loss": 0.8277, "step": 22981 }, { "epoch": 0.8992096408169653, "grad_norm": 0.0, "learning_rate": 5.280965490298873e-07, "loss": 0.9574, "step": 22982 }, { "epoch": 0.8992487675091948, "grad_norm": 0.0, "learning_rate": 5.276902597096789e-07, "loss": 0.9005, "step": 22983 }, { "epoch": 0.8992878942014242, "grad_norm": 0.0, "learning_rate": 5.272841225028935e-07, "loss": 0.9218, "step": 22984 }, { "epoch": 0.8993270208936537, "grad_norm": 0.0, "learning_rate": 5.268781374160559e-07, "loss": 0.929, "step": 22985 }, { "epoch": 0.8993661475858831, "grad_norm": 0.0, "learning_rate": 5.264723044556863e-07, "loss": 1.019, "step": 22986 }, { "epoch": 0.8994052742781126, "grad_norm": 0.0, "learning_rate": 5.260666236282985e-07, "loss": 1.1081, "step": 22987 }, { "epoch": 0.899444400970342, "grad_norm": 0.0, "learning_rate": 5.256610949404106e-07, "loss": 0.8484, "step": 22988 }, { "epoch": 0.8994835276625714, "grad_norm": 0.0, "learning_rate": 5.25255718398533e-07, "loss": 1.0097, "step": 22989 }, { "epoch": 0.8995226543548008, "grad_norm": 0.0, "learning_rate": 5.248504940091758e-07, "loss": 0.9402, "step": 22990 }, { "epoch": 0.8995617810470303, "grad_norm": 0.0, "learning_rate": 5.244454217788464e-07, "loss": 0.8556, "step": 22991 }, { "epoch": 0.8996009077392597, "grad_norm": 0.0, "learning_rate": 5.240405017140504e-07, "loss": 1.0044, "step": 22992 }, { "epoch": 0.8996400344314892, "grad_norm": 0.0, "learning_rate": 5.236357338212905e-07, "loss": 0.9397, "step": 22993 }, { "epoch": 0.8996791611237186, "grad_norm": 0.0, "learning_rate": 5.23231118107066e-07, "loss": 0.9519, "step": 22994 }, { "epoch": 0.8997182878159481, "grad_norm": 0.0, "learning_rate": 5.228266545778737e-07, "loss": 0.8594, "step": 22995 }, { "epoch": 0.8997574145081775, "grad_norm": 0.0, "learning_rate": 5.224223432402098e-07, "loss": 0.9578, "step": 22996 }, { "epoch": 0.8997965412004069, "grad_norm": 0.0, "learning_rate": 5.220181841005689e-07, "loss": 1.0099, "step": 22997 }, { "epoch": 0.8998356678926364, "grad_norm": 0.0, "learning_rate": 5.216141771654371e-07, "loss": 0.996, "step": 22998 }, { "epoch": 0.8998747945848657, "grad_norm": 0.0, "learning_rate": 5.212103224413068e-07, "loss": 0.9897, "step": 22999 }, { "epoch": 0.8999139212770952, "grad_norm": 0.0, "learning_rate": 5.208066199346573e-07, "loss": 0.8578, "step": 23000 }, { "epoch": 0.8999530479693246, "grad_norm": 0.0, "learning_rate": 5.204030696519791e-07, "loss": 1.0436, "step": 23001 }, { "epoch": 0.8999921746615541, "grad_norm": 0.0, "learning_rate": 5.19999671599748e-07, "loss": 0.9948, "step": 23002 }, { "epoch": 0.9000313013537835, "grad_norm": 0.0, "learning_rate": 5.195964257844433e-07, "loss": 1.0574, "step": 23003 }, { "epoch": 0.900070428046013, "grad_norm": 0.0, "learning_rate": 5.191933322125387e-07, "loss": 0.9391, "step": 23004 }, { "epoch": 0.9001095547382424, "grad_norm": 0.0, "learning_rate": 5.187903908905112e-07, "loss": 1.0819, "step": 23005 }, { "epoch": 0.9001486814304719, "grad_norm": 0.0, "learning_rate": 5.18387601824828e-07, "loss": 0.8499, "step": 23006 }, { "epoch": 0.9001878081227013, "grad_norm": 0.0, "learning_rate": 5.179849650219604e-07, "loss": 0.9538, "step": 23007 }, { "epoch": 0.9002269348149308, "grad_norm": 0.0, "learning_rate": 5.175824804883711e-07, "loss": 0.9648, "step": 23008 }, { "epoch": 0.9002660615071602, "grad_norm": 0.0, "learning_rate": 5.17180148230525e-07, "loss": 0.8849, "step": 23009 }, { "epoch": 0.9003051881993896, "grad_norm": 0.0, "learning_rate": 5.167779682548824e-07, "loss": 0.9657, "step": 23010 }, { "epoch": 0.900344314891619, "grad_norm": 0.0, "learning_rate": 5.163759405679048e-07, "loss": 0.8697, "step": 23011 }, { "epoch": 0.9003834415838485, "grad_norm": 0.0, "learning_rate": 5.159740651760447e-07, "loss": 0.9877, "step": 23012 }, { "epoch": 0.9004225682760779, "grad_norm": 0.0, "learning_rate": 5.155723420857561e-07, "loss": 0.8987, "step": 23013 }, { "epoch": 0.9004616949683074, "grad_norm": 0.0, "learning_rate": 5.151707713034926e-07, "loss": 1.0528, "step": 23014 }, { "epoch": 0.9005008216605368, "grad_norm": 0.0, "learning_rate": 5.147693528357012e-07, "loss": 0.9085, "step": 23015 }, { "epoch": 0.9005399483527663, "grad_norm": 0.0, "learning_rate": 5.143680866888279e-07, "loss": 0.9512, "step": 23016 }, { "epoch": 0.9005790750449957, "grad_norm": 0.0, "learning_rate": 5.139669728693176e-07, "loss": 0.9741, "step": 23017 }, { "epoch": 0.9006182017372252, "grad_norm": 0.0, "learning_rate": 5.135660113836083e-07, "loss": 0.9113, "step": 23018 }, { "epoch": 0.9006573284294546, "grad_norm": 0.0, "learning_rate": 5.13165202238145e-07, "loss": 1.0478, "step": 23019 }, { "epoch": 0.9006964551216841, "grad_norm": 0.0, "learning_rate": 5.127645454393593e-07, "loss": 0.9611, "step": 23020 }, { "epoch": 0.9007355818139134, "grad_norm": 0.0, "learning_rate": 5.123640409936881e-07, "loss": 0.9846, "step": 23021 }, { "epoch": 0.9007747085061429, "grad_norm": 0.0, "learning_rate": 5.119636889075608e-07, "loss": 0.9161, "step": 23022 }, { "epoch": 0.9008138351983723, "grad_norm": 0.0, "learning_rate": 5.115634891874066e-07, "loss": 0.8085, "step": 23023 }, { "epoch": 0.9008529618906018, "grad_norm": 0.0, "learning_rate": 5.111634418396538e-07, "loss": 1.1064, "step": 23024 }, { "epoch": 0.9008920885828312, "grad_norm": 0.0, "learning_rate": 5.107635468707273e-07, "loss": 1.0209, "step": 23025 }, { "epoch": 0.9009312152750606, "grad_norm": 0.0, "learning_rate": 5.103638042870462e-07, "loss": 0.8593, "step": 23026 }, { "epoch": 0.9009703419672901, "grad_norm": 0.0, "learning_rate": 5.099642140950301e-07, "loss": 0.9346, "step": 23027 }, { "epoch": 0.9010094686595195, "grad_norm": 0.0, "learning_rate": 5.095647763010981e-07, "loss": 0.8778, "step": 23028 }, { "epoch": 0.901048595351749, "grad_norm": 0.0, "learning_rate": 5.09165490911665e-07, "loss": 0.8849, "step": 23029 }, { "epoch": 0.9010877220439784, "grad_norm": 0.0, "learning_rate": 5.087663579331403e-07, "loss": 0.961, "step": 23030 }, { "epoch": 0.9011268487362079, "grad_norm": 0.0, "learning_rate": 5.083673773719344e-07, "loss": 1.0215, "step": 23031 }, { "epoch": 0.9011659754284372, "grad_norm": 0.0, "learning_rate": 5.079685492344555e-07, "loss": 0.9437, "step": 23032 }, { "epoch": 0.9012051021206667, "grad_norm": 0.0, "learning_rate": 5.075698735271073e-07, "loss": 0.9305, "step": 23033 }, { "epoch": 0.9012442288128961, "grad_norm": 0.0, "learning_rate": 5.071713502562913e-07, "loss": 0.9535, "step": 23034 }, { "epoch": 0.9012833555051256, "grad_norm": 0.0, "learning_rate": 5.067729794284104e-07, "loss": 0.9733, "step": 23035 }, { "epoch": 0.901322482197355, "grad_norm": 0.0, "learning_rate": 5.063747610498571e-07, "loss": 0.9503, "step": 23036 }, { "epoch": 0.9013616088895845, "grad_norm": 0.0, "learning_rate": 5.059766951270307e-07, "loss": 0.9876, "step": 23037 }, { "epoch": 0.9014007355818139, "grad_norm": 0.0, "learning_rate": 5.055787816663216e-07, "loss": 0.9729, "step": 23038 }, { "epoch": 0.9014398622740434, "grad_norm": 0.0, "learning_rate": 5.051810206741192e-07, "loss": 0.9375, "step": 23039 }, { "epoch": 0.9014789889662728, "grad_norm": 0.0, "learning_rate": 5.047834121568129e-07, "loss": 0.9747, "step": 23040 }, { "epoch": 0.9015181156585023, "grad_norm": 0.0, "learning_rate": 5.043859561207853e-07, "loss": 1.0063, "step": 23041 }, { "epoch": 0.9015572423507316, "grad_norm": 0.0, "learning_rate": 5.039886525724236e-07, "loss": 0.9421, "step": 23042 }, { "epoch": 0.9015963690429611, "grad_norm": 0.0, "learning_rate": 5.035915015181025e-07, "loss": 0.9474, "step": 23043 }, { "epoch": 0.9016354957351905, "grad_norm": 0.0, "learning_rate": 5.031945029642038e-07, "loss": 0.9208, "step": 23044 }, { "epoch": 0.90167462242742, "grad_norm": 0.0, "learning_rate": 5.027976569170989e-07, "loss": 0.801, "step": 23045 }, { "epoch": 0.9017137491196494, "grad_norm": 0.0, "learning_rate": 5.02400963383165e-07, "loss": 0.9681, "step": 23046 }, { "epoch": 0.9017528758118789, "grad_norm": 0.0, "learning_rate": 5.020044223687692e-07, "loss": 1.0904, "step": 23047 }, { "epoch": 0.9017920025041083, "grad_norm": 0.0, "learning_rate": 5.016080338802831e-07, "loss": 0.8055, "step": 23048 }, { "epoch": 0.9018311291963378, "grad_norm": 0.0, "learning_rate": 5.01211797924066e-07, "loss": 0.9458, "step": 23049 }, { "epoch": 0.9018702558885672, "grad_norm": 0.0, "learning_rate": 5.008157145064885e-07, "loss": 1.0672, "step": 23050 }, { "epoch": 0.9019093825807967, "grad_norm": 0.0, "learning_rate": 5.004197836339054e-07, "loss": 0.903, "step": 23051 }, { "epoch": 0.9019485092730261, "grad_norm": 0.0, "learning_rate": 5.000240053126781e-07, "loss": 0.9299, "step": 23052 }, { "epoch": 0.9019876359652556, "grad_norm": 0.0, "learning_rate": 4.996283795491597e-07, "loss": 1.0328, "step": 23053 }, { "epoch": 0.9020267626574849, "grad_norm": 0.0, "learning_rate": 4.992329063497059e-07, "loss": 0.8948, "step": 23054 }, { "epoch": 0.9020658893497143, "grad_norm": 0.0, "learning_rate": 4.988375857206651e-07, "loss": 0.9977, "step": 23055 }, { "epoch": 0.9021050160419438, "grad_norm": 0.0, "learning_rate": 4.984424176683888e-07, "loss": 1.1035, "step": 23056 }, { "epoch": 0.9021441427341732, "grad_norm": 0.0, "learning_rate": 4.98047402199221e-07, "loss": 1.0863, "step": 23057 }, { "epoch": 0.9021832694264027, "grad_norm": 0.0, "learning_rate": 4.976525393195042e-07, "loss": 0.929, "step": 23058 }, { "epoch": 0.9022223961186321, "grad_norm": 0.0, "learning_rate": 4.972578290355812e-07, "loss": 0.991, "step": 23059 }, { "epoch": 0.9022615228108616, "grad_norm": 0.0, "learning_rate": 4.968632713537902e-07, "loss": 0.9799, "step": 23060 }, { "epoch": 0.902300649503091, "grad_norm": 0.0, "learning_rate": 4.964688662804662e-07, "loss": 1.0324, "step": 23061 }, { "epoch": 0.9023397761953205, "grad_norm": 0.0, "learning_rate": 4.960746138219441e-07, "loss": 0.8607, "step": 23062 }, { "epoch": 0.9023789028875498, "grad_norm": 0.0, "learning_rate": 4.956805139845533e-07, "loss": 0.9626, "step": 23063 }, { "epoch": 0.9024180295797793, "grad_norm": 0.0, "learning_rate": 4.952865667746265e-07, "loss": 0.9174, "step": 23064 }, { "epoch": 0.9024571562720087, "grad_norm": 0.0, "learning_rate": 4.948927721984853e-07, "loss": 0.8894, "step": 23065 }, { "epoch": 0.9024962829642382, "grad_norm": 0.0, "learning_rate": 4.94499130262458e-07, "loss": 0.8726, "step": 23066 }, { "epoch": 0.9025354096564676, "grad_norm": 0.0, "learning_rate": 4.941056409728595e-07, "loss": 0.9938, "step": 23067 }, { "epoch": 0.9025745363486971, "grad_norm": 0.0, "learning_rate": 4.937123043360159e-07, "loss": 0.952, "step": 23068 }, { "epoch": 0.9026136630409265, "grad_norm": 0.0, "learning_rate": 4.933191203582399e-07, "loss": 0.9529, "step": 23069 }, { "epoch": 0.902652789733156, "grad_norm": 0.0, "learning_rate": 4.929260890458476e-07, "loss": 0.8703, "step": 23070 }, { "epoch": 0.9026919164253854, "grad_norm": 0.0, "learning_rate": 4.925332104051472e-07, "loss": 0.9361, "step": 23071 }, { "epoch": 0.9027310431176149, "grad_norm": 0.0, "learning_rate": 4.921404844424504e-07, "loss": 1.0046, "step": 23072 }, { "epoch": 0.9027701698098443, "grad_norm": 0.0, "learning_rate": 4.917479111640633e-07, "loss": 0.9817, "step": 23073 }, { "epoch": 0.9028092965020738, "grad_norm": 0.0, "learning_rate": 4.913554905762919e-07, "loss": 1.008, "step": 23074 }, { "epoch": 0.9028484231943031, "grad_norm": 0.0, "learning_rate": 4.909632226854343e-07, "loss": 0.9096, "step": 23075 }, { "epoch": 0.9028875498865326, "grad_norm": 0.0, "learning_rate": 4.905711074977926e-07, "loss": 0.8721, "step": 23076 }, { "epoch": 0.902926676578762, "grad_norm": 0.0, "learning_rate": 4.901791450196646e-07, "loss": 1.0278, "step": 23077 }, { "epoch": 0.9029658032709915, "grad_norm": 0.0, "learning_rate": 4.897873352573401e-07, "loss": 0.9332, "step": 23078 }, { "epoch": 0.9030049299632209, "grad_norm": 0.0, "learning_rate": 4.893956782171161e-07, "loss": 0.9294, "step": 23079 }, { "epoch": 0.9030440566554504, "grad_norm": 0.0, "learning_rate": 4.890041739052786e-07, "loss": 1.1138, "step": 23080 }, { "epoch": 0.9030831833476798, "grad_norm": 0.0, "learning_rate": 4.886128223281173e-07, "loss": 0.8341, "step": 23081 }, { "epoch": 0.9031223100399092, "grad_norm": 0.0, "learning_rate": 4.882216234919157e-07, "loss": 0.9985, "step": 23082 }, { "epoch": 0.9031614367321387, "grad_norm": 0.0, "learning_rate": 4.878305774029557e-07, "loss": 0.9915, "step": 23083 }, { "epoch": 0.903200563424368, "grad_norm": 0.0, "learning_rate": 4.874396840675166e-07, "loss": 1.0164, "step": 23084 }, { "epoch": 0.9032396901165975, "grad_norm": 0.0, "learning_rate": 4.870489434918768e-07, "loss": 1.0433, "step": 23085 }, { "epoch": 0.9032788168088269, "grad_norm": 0.0, "learning_rate": 4.86658355682309e-07, "loss": 0.9368, "step": 23086 }, { "epoch": 0.9033179435010564, "grad_norm": 0.0, "learning_rate": 4.862679206450904e-07, "loss": 0.9626, "step": 23087 }, { "epoch": 0.9033570701932858, "grad_norm": 0.0, "learning_rate": 4.858776383864849e-07, "loss": 1.0632, "step": 23088 }, { "epoch": 0.9033961968855153, "grad_norm": 0.0, "learning_rate": 4.854875089127631e-07, "loss": 0.9496, "step": 23089 }, { "epoch": 0.9034353235777447, "grad_norm": 0.0, "learning_rate": 4.850975322301898e-07, "loss": 0.9659, "step": 23090 }, { "epoch": 0.9034744502699742, "grad_norm": 0.0, "learning_rate": 4.84707708345028e-07, "loss": 0.7902, "step": 23091 }, { "epoch": 0.9035135769622036, "grad_norm": 0.0, "learning_rate": 4.843180372635358e-07, "loss": 0.9164, "step": 23092 }, { "epoch": 0.9035527036544331, "grad_norm": 0.0, "learning_rate": 4.83928518991974e-07, "loss": 0.9153, "step": 23093 }, { "epoch": 0.9035918303466625, "grad_norm": 0.0, "learning_rate": 4.83539153536593e-07, "loss": 0.8767, "step": 23094 }, { "epoch": 0.903630957038892, "grad_norm": 0.0, "learning_rate": 4.83149940903651e-07, "loss": 0.8177, "step": 23095 }, { "epoch": 0.9036700837311213, "grad_norm": 0.0, "learning_rate": 4.827608810993945e-07, "loss": 0.8258, "step": 23096 }, { "epoch": 0.9037092104233508, "grad_norm": 0.0, "learning_rate": 4.823719741300737e-07, "loss": 0.9571, "step": 23097 }, { "epoch": 0.9037483371155802, "grad_norm": 0.0, "learning_rate": 4.819832200019303e-07, "loss": 0.808, "step": 23098 }, { "epoch": 0.9037874638078097, "grad_norm": 0.0, "learning_rate": 4.815946187212117e-07, "loss": 0.8951, "step": 23099 }, { "epoch": 0.9038265905000391, "grad_norm": 0.0, "learning_rate": 4.812061702941562e-07, "loss": 0.8775, "step": 23100 }, { "epoch": 0.9038657171922686, "grad_norm": 0.0, "learning_rate": 4.808178747270021e-07, "loss": 0.9213, "step": 23101 }, { "epoch": 0.903904843884498, "grad_norm": 0.0, "learning_rate": 4.804297320259832e-07, "loss": 0.8976, "step": 23102 }, { "epoch": 0.9039439705767275, "grad_norm": 0.0, "learning_rate": 4.800417421973347e-07, "loss": 1.0203, "step": 23103 }, { "epoch": 0.9039830972689569, "grad_norm": 0.0, "learning_rate": 4.79653905247287e-07, "loss": 0.9476, "step": 23104 }, { "epoch": 0.9040222239611864, "grad_norm": 0.0, "learning_rate": 4.792662211820687e-07, "loss": 0.8837, "step": 23105 }, { "epoch": 0.9040613506534158, "grad_norm": 0.0, "learning_rate": 4.788786900079034e-07, "loss": 0.9589, "step": 23106 }, { "epoch": 0.9041004773456452, "grad_norm": 0.0, "learning_rate": 4.784913117310153e-07, "loss": 0.9474, "step": 23107 }, { "epoch": 0.9041396040378746, "grad_norm": 0.0, "learning_rate": 4.781040863576258e-07, "loss": 0.8238, "step": 23108 }, { "epoch": 0.9041787307301041, "grad_norm": 0.0, "learning_rate": 4.777170138939546e-07, "loss": 0.9742, "step": 23109 }, { "epoch": 0.9042178574223335, "grad_norm": 0.0, "learning_rate": 4.773300943462156e-07, "loss": 1.0412, "step": 23110 }, { "epoch": 0.9042569841145629, "grad_norm": 0.0, "learning_rate": 4.769433277206226e-07, "loss": 0.9565, "step": 23111 }, { "epoch": 0.9042961108067924, "grad_norm": 0.0, "learning_rate": 4.765567140233851e-07, "loss": 0.9282, "step": 23112 }, { "epoch": 0.9043352374990218, "grad_norm": 0.0, "learning_rate": 4.7617025326071597e-07, "loss": 0.9337, "step": 23113 }, { "epoch": 0.9043743641912513, "grad_norm": 0.0, "learning_rate": 4.757839454388169e-07, "loss": 0.986, "step": 23114 }, { "epoch": 0.9044134908834807, "grad_norm": 0.0, "learning_rate": 4.7539779056389404e-07, "loss": 0.9196, "step": 23115 }, { "epoch": 0.9044526175757102, "grad_norm": 0.0, "learning_rate": 4.750117886421468e-07, "loss": 0.989, "step": 23116 }, { "epoch": 0.9044917442679395, "grad_norm": 0.0, "learning_rate": 4.7462593967977475e-07, "loss": 0.9102, "step": 23117 }, { "epoch": 0.904530870960169, "grad_norm": 0.0, "learning_rate": 4.7424024368297296e-07, "loss": 0.9811, "step": 23118 }, { "epoch": 0.9045699976523984, "grad_norm": 0.0, "learning_rate": 4.738547006579397e-07, "loss": 0.9804, "step": 23119 }, { "epoch": 0.9046091243446279, "grad_norm": 0.0, "learning_rate": 4.734693106108601e-07, "loss": 1.1262, "step": 23120 }, { "epoch": 0.9046482510368573, "grad_norm": 0.0, "learning_rate": 4.73084073547927e-07, "loss": 0.9222, "step": 23121 }, { "epoch": 0.9046873777290868, "grad_norm": 0.0, "learning_rate": 4.7269898947532644e-07, "loss": 0.956, "step": 23122 }, { "epoch": 0.9047265044213162, "grad_norm": 0.0, "learning_rate": 4.723140583992414e-07, "loss": 0.8679, "step": 23123 }, { "epoch": 0.9047656311135457, "grad_norm": 0.0, "learning_rate": 4.719292803258524e-07, "loss": 0.8937, "step": 23124 }, { "epoch": 0.9048047578057751, "grad_norm": 0.0, "learning_rate": 4.715446552613401e-07, "loss": 0.8972, "step": 23125 }, { "epoch": 0.9048438844980046, "grad_norm": 0.0, "learning_rate": 4.711601832118828e-07, "loss": 1.0111, "step": 23126 }, { "epoch": 0.904883011190234, "grad_norm": 0.0, "learning_rate": 4.7077586418365126e-07, "loss": 1.0006, "step": 23127 }, { "epoch": 0.9049221378824635, "grad_norm": 0.0, "learning_rate": 4.703916981828194e-07, "loss": 0.9576, "step": 23128 }, { "epoch": 0.9049612645746928, "grad_norm": 0.0, "learning_rate": 4.700076852155533e-07, "loss": 1.0029, "step": 23129 }, { "epoch": 0.9050003912669223, "grad_norm": 0.0, "learning_rate": 4.6962382528802476e-07, "loss": 0.8291, "step": 23130 }, { "epoch": 0.9050395179591517, "grad_norm": 0.0, "learning_rate": 4.6924011840639327e-07, "loss": 0.9539, "step": 23131 }, { "epoch": 0.9050786446513812, "grad_norm": 0.0, "learning_rate": 4.6885656457682505e-07, "loss": 0.8795, "step": 23132 }, { "epoch": 0.9051177713436106, "grad_norm": 0.0, "learning_rate": 4.6847316380547513e-07, "loss": 0.8474, "step": 23133 }, { "epoch": 0.9051568980358401, "grad_norm": 0.0, "learning_rate": 4.6808991609850307e-07, "loss": 0.953, "step": 23134 }, { "epoch": 0.9051960247280695, "grad_norm": 0.0, "learning_rate": 4.6770682146206283e-07, "loss": 0.8096, "step": 23135 }, { "epoch": 0.905235151420299, "grad_norm": 0.0, "learning_rate": 4.673238799023072e-07, "loss": 1.0114, "step": 23136 }, { "epoch": 0.9052742781125284, "grad_norm": 0.0, "learning_rate": 4.6694109142538467e-07, "loss": 1.0103, "step": 23137 }, { "epoch": 0.9053134048047579, "grad_norm": 0.0, "learning_rate": 4.665584560374414e-07, "loss": 0.9208, "step": 23138 }, { "epoch": 0.9053525314969872, "grad_norm": 0.0, "learning_rate": 4.6617597374462366e-07, "loss": 0.9167, "step": 23139 }, { "epoch": 0.9053916581892166, "grad_norm": 0.0, "learning_rate": 4.6579364455307527e-07, "loss": 0.9716, "step": 23140 }, { "epoch": 0.9054307848814461, "grad_norm": 0.0, "learning_rate": 4.654114684689315e-07, "loss": 0.8672, "step": 23141 }, { "epoch": 0.9054699115736755, "grad_norm": 0.0, "learning_rate": 4.6502944549833397e-07, "loss": 1.0201, "step": 23142 }, { "epoch": 0.905509038265905, "grad_norm": 0.0, "learning_rate": 4.6464757564741223e-07, "loss": 0.9678, "step": 23143 }, { "epoch": 0.9055481649581344, "grad_norm": 0.0, "learning_rate": 4.6426585892230593e-07, "loss": 1.0081, "step": 23144 }, { "epoch": 0.9055872916503639, "grad_norm": 0.0, "learning_rate": 4.638842953291389e-07, "loss": 0.8066, "step": 23145 }, { "epoch": 0.9056264183425933, "grad_norm": 0.0, "learning_rate": 4.6350288487404194e-07, "loss": 0.9656, "step": 23146 }, { "epoch": 0.9056655450348228, "grad_norm": 0.0, "learning_rate": 4.631216275631356e-07, "loss": 1.082, "step": 23147 }, { "epoch": 0.9057046717270522, "grad_norm": 0.0, "learning_rate": 4.627405234025495e-07, "loss": 1.061, "step": 23148 }, { "epoch": 0.9057437984192817, "grad_norm": 0.0, "learning_rate": 4.6235957239839755e-07, "loss": 0.8877, "step": 23149 }, { "epoch": 0.905782925111511, "grad_norm": 0.0, "learning_rate": 4.619787745568005e-07, "loss": 0.849, "step": 23150 }, { "epoch": 0.9058220518037405, "grad_norm": 0.0, "learning_rate": 4.615981298838712e-07, "loss": 0.935, "step": 23151 }, { "epoch": 0.9058611784959699, "grad_norm": 0.0, "learning_rate": 4.6121763838572473e-07, "loss": 1.0701, "step": 23152 }, { "epoch": 0.9059003051881994, "grad_norm": 0.0, "learning_rate": 4.6083730006846963e-07, "loss": 0.9994, "step": 23153 }, { "epoch": 0.9059394318804288, "grad_norm": 0.0, "learning_rate": 4.604571149382153e-07, "loss": 0.9815, "step": 23154 }, { "epoch": 0.9059785585726583, "grad_norm": 0.0, "learning_rate": 4.600770830010648e-07, "loss": 0.972, "step": 23155 }, { "epoch": 0.9060176852648877, "grad_norm": 0.0, "learning_rate": 4.5969720426312204e-07, "loss": 1.009, "step": 23156 }, { "epoch": 0.9060568119571172, "grad_norm": 0.0, "learning_rate": 4.593174787304877e-07, "loss": 0.7866, "step": 23157 }, { "epoch": 0.9060959386493466, "grad_norm": 0.0, "learning_rate": 4.5893790640926137e-07, "loss": 0.8508, "step": 23158 }, { "epoch": 0.9061350653415761, "grad_norm": 0.0, "learning_rate": 4.5855848730553486e-07, "loss": 0.9852, "step": 23159 }, { "epoch": 0.9061741920338054, "grad_norm": 0.0, "learning_rate": 4.581792214254044e-07, "loss": 0.888, "step": 23160 }, { "epoch": 0.9062133187260349, "grad_norm": 0.0, "learning_rate": 4.578001087749573e-07, "loss": 1.0849, "step": 23161 }, { "epoch": 0.9062524454182643, "grad_norm": 0.0, "learning_rate": 4.5742114936028315e-07, "loss": 1.0714, "step": 23162 }, { "epoch": 0.9062915721104938, "grad_norm": 0.0, "learning_rate": 4.570423431874693e-07, "loss": 1.0134, "step": 23163 }, { "epoch": 0.9063306988027232, "grad_norm": 0.0, "learning_rate": 4.566636902625976e-07, "loss": 1.071, "step": 23164 }, { "epoch": 0.9063698254949527, "grad_norm": 0.0, "learning_rate": 4.562851905917476e-07, "loss": 0.9601, "step": 23165 }, { "epoch": 0.9064089521871821, "grad_norm": 0.0, "learning_rate": 4.5590684418099776e-07, "loss": 0.9662, "step": 23166 }, { "epoch": 0.9064480788794116, "grad_norm": 0.0, "learning_rate": 4.555286510364265e-07, "loss": 0.9605, "step": 23167 }, { "epoch": 0.906487205571641, "grad_norm": 0.0, "learning_rate": 4.551506111641035e-07, "loss": 0.9545, "step": 23168 }, { "epoch": 0.9065263322638704, "grad_norm": 0.0, "learning_rate": 4.547727245701028e-07, "loss": 1.0606, "step": 23169 }, { "epoch": 0.9065654589560999, "grad_norm": 0.0, "learning_rate": 4.5439499126048945e-07, "loss": 0.9661, "step": 23170 }, { "epoch": 0.9066045856483292, "grad_norm": 0.0, "learning_rate": 4.5401741124133315e-07, "loss": 0.9839, "step": 23171 }, { "epoch": 0.9066437123405587, "grad_norm": 0.0, "learning_rate": 4.536399845186945e-07, "loss": 0.9582, "step": 23172 }, { "epoch": 0.9066828390327881, "grad_norm": 0.0, "learning_rate": 4.532627110986365e-07, "loss": 0.9417, "step": 23173 }, { "epoch": 0.9067219657250176, "grad_norm": 0.0, "learning_rate": 4.5288559098721427e-07, "loss": 1.0028, "step": 23174 }, { "epoch": 0.906761092417247, "grad_norm": 0.0, "learning_rate": 4.5250862419048856e-07, "loss": 0.914, "step": 23175 }, { "epoch": 0.9068002191094765, "grad_norm": 0.0, "learning_rate": 4.5213181071450894e-07, "loss": 1.0586, "step": 23176 }, { "epoch": 0.9068393458017059, "grad_norm": 0.0, "learning_rate": 4.517551505653306e-07, "loss": 0.9457, "step": 23177 }, { "epoch": 0.9068784724939354, "grad_norm": 0.0, "learning_rate": 4.513786437489964e-07, "loss": 0.9736, "step": 23178 }, { "epoch": 0.9069175991861648, "grad_norm": 0.0, "learning_rate": 4.510022902715594e-07, "loss": 0.8966, "step": 23179 }, { "epoch": 0.9069567258783943, "grad_norm": 0.0, "learning_rate": 4.50626090139058e-07, "loss": 0.9882, "step": 23180 }, { "epoch": 0.9069958525706237, "grad_norm": 0.0, "learning_rate": 4.502500433575374e-07, "loss": 0.8731, "step": 23181 }, { "epoch": 0.9070349792628531, "grad_norm": 0.0, "learning_rate": 4.498741499330339e-07, "loss": 0.9521, "step": 23182 }, { "epoch": 0.9070741059550825, "grad_norm": 0.0, "learning_rate": 4.4949840987158377e-07, "loss": 1.0666, "step": 23183 }, { "epoch": 0.907113232647312, "grad_norm": 0.0, "learning_rate": 4.4912282317922107e-07, "loss": 0.9822, "step": 23184 }, { "epoch": 0.9071523593395414, "grad_norm": 0.0, "learning_rate": 4.4874738986198096e-07, "loss": 0.9977, "step": 23185 }, { "epoch": 0.9071914860317709, "grad_norm": 0.0, "learning_rate": 4.4837210992588643e-07, "loss": 0.9733, "step": 23186 }, { "epoch": 0.9072306127240003, "grad_norm": 0.0, "learning_rate": 4.4799698337696815e-07, "loss": 0.9299, "step": 23187 }, { "epoch": 0.9072697394162298, "grad_norm": 0.0, "learning_rate": 4.476220102212481e-07, "loss": 0.9525, "step": 23188 }, { "epoch": 0.9073088661084592, "grad_norm": 0.0, "learning_rate": 4.472471904647502e-07, "loss": 0.9221, "step": 23189 }, { "epoch": 0.9073479928006887, "grad_norm": 0.0, "learning_rate": 4.468725241134908e-07, "loss": 0.9942, "step": 23190 }, { "epoch": 0.9073871194929181, "grad_norm": 0.0, "learning_rate": 4.4649801117348957e-07, "loss": 1.0428, "step": 23191 }, { "epoch": 0.9074262461851476, "grad_norm": 0.0, "learning_rate": 4.4612365165075724e-07, "loss": 0.9601, "step": 23192 }, { "epoch": 0.9074653728773769, "grad_norm": 0.0, "learning_rate": 4.4574944555130895e-07, "loss": 1.0178, "step": 23193 }, { "epoch": 0.9075044995696064, "grad_norm": 0.0, "learning_rate": 4.4537539288115106e-07, "loss": 0.8322, "step": 23194 }, { "epoch": 0.9075436262618358, "grad_norm": 0.0, "learning_rate": 4.4500149364629317e-07, "loss": 0.9697, "step": 23195 }, { "epoch": 0.9075827529540652, "grad_norm": 0.0, "learning_rate": 4.446277478527361e-07, "loss": 0.9897, "step": 23196 }, { "epoch": 0.9076218796462947, "grad_norm": 0.0, "learning_rate": 4.442541555064861e-07, "loss": 0.9321, "step": 23197 }, { "epoch": 0.9076610063385241, "grad_norm": 0.0, "learning_rate": 4.438807166135384e-07, "loss": 0.9341, "step": 23198 }, { "epoch": 0.9077001330307536, "grad_norm": 0.0, "learning_rate": 4.435074311798948e-07, "loss": 0.8796, "step": 23199 }, { "epoch": 0.907739259722983, "grad_norm": 0.0, "learning_rate": 4.4313429921154394e-07, "loss": 0.9157, "step": 23200 }, { "epoch": 0.9077783864152125, "grad_norm": 0.0, "learning_rate": 4.427613207144821e-07, "loss": 1.0139, "step": 23201 }, { "epoch": 0.9078175131074419, "grad_norm": 0.0, "learning_rate": 4.4238849569469664e-07, "loss": 0.9472, "step": 23202 }, { "epoch": 0.9078566397996713, "grad_norm": 0.0, "learning_rate": 4.4201582415817734e-07, "loss": 0.9547, "step": 23203 }, { "epoch": 0.9078957664919007, "grad_norm": 0.0, "learning_rate": 4.416433061109049e-07, "loss": 0.88, "step": 23204 }, { "epoch": 0.9079348931841302, "grad_norm": 0.0, "learning_rate": 4.412709415588645e-07, "loss": 0.8768, "step": 23205 }, { "epoch": 0.9079740198763596, "grad_norm": 0.0, "learning_rate": 4.40898730508037e-07, "loss": 0.8793, "step": 23206 }, { "epoch": 0.9080131465685891, "grad_norm": 0.0, "learning_rate": 4.4052667296439533e-07, "loss": 0.9285, "step": 23207 }, { "epoch": 0.9080522732608185, "grad_norm": 0.0, "learning_rate": 4.4015476893391695e-07, "loss": 0.8887, "step": 23208 }, { "epoch": 0.908091399953048, "grad_norm": 0.0, "learning_rate": 4.3978301842257486e-07, "loss": 0.8936, "step": 23209 }, { "epoch": 0.9081305266452774, "grad_norm": 0.0, "learning_rate": 4.3941142143633654e-07, "loss": 0.9705, "step": 23210 }, { "epoch": 0.9081696533375069, "grad_norm": 0.0, "learning_rate": 4.390399779811716e-07, "loss": 0.9309, "step": 23211 }, { "epoch": 0.9082087800297363, "grad_norm": 0.0, "learning_rate": 4.386686880630442e-07, "loss": 0.8754, "step": 23212 }, { "epoch": 0.9082479067219658, "grad_norm": 0.0, "learning_rate": 4.3829755168791623e-07, "loss": 0.981, "step": 23213 }, { "epoch": 0.9082870334141951, "grad_norm": 0.0, "learning_rate": 4.3792656886174733e-07, "loss": 0.9762, "step": 23214 }, { "epoch": 0.9083261601064246, "grad_norm": 0.0, "learning_rate": 4.375557395904961e-07, "loss": 0.8978, "step": 23215 }, { "epoch": 0.908365286798654, "grad_norm": 0.0, "learning_rate": 4.3718506388011895e-07, "loss": 1.0379, "step": 23216 }, { "epoch": 0.9084044134908835, "grad_norm": 0.0, "learning_rate": 4.3681454173656546e-07, "loss": 0.9753, "step": 23217 }, { "epoch": 0.9084435401831129, "grad_norm": 0.0, "learning_rate": 4.364441731657876e-07, "loss": 0.9093, "step": 23218 }, { "epoch": 0.9084826668753424, "grad_norm": 0.0, "learning_rate": 4.3607395817373056e-07, "loss": 0.9253, "step": 23219 }, { "epoch": 0.9085217935675718, "grad_norm": 0.0, "learning_rate": 4.357038967663441e-07, "loss": 0.9512, "step": 23220 }, { "epoch": 0.9085609202598013, "grad_norm": 0.0, "learning_rate": 4.353339889495667e-07, "loss": 0.9917, "step": 23221 }, { "epoch": 0.9086000469520307, "grad_norm": 0.0, "learning_rate": 4.3496423472934146e-07, "loss": 0.9695, "step": 23222 }, { "epoch": 0.9086391736442602, "grad_norm": 0.0, "learning_rate": 4.345946341116025e-07, "loss": 1.0082, "step": 23223 }, { "epoch": 0.9086783003364896, "grad_norm": 0.0, "learning_rate": 4.3422518710229067e-07, "loss": 1.0195, "step": 23224 }, { "epoch": 0.9087174270287189, "grad_norm": 0.0, "learning_rate": 4.338558937073345e-07, "loss": 0.9702, "step": 23225 }, { "epoch": 0.9087565537209484, "grad_norm": 0.0, "learning_rate": 4.3348675393266594e-07, "loss": 1.0085, "step": 23226 }, { "epoch": 0.9087956804131778, "grad_norm": 0.0, "learning_rate": 4.3311776778421243e-07, "loss": 0.8757, "step": 23227 }, { "epoch": 0.9088348071054073, "grad_norm": 0.0, "learning_rate": 4.3274893526789816e-07, "loss": 0.9982, "step": 23228 }, { "epoch": 0.9088739337976367, "grad_norm": 0.0, "learning_rate": 4.3238025638964843e-07, "loss": 0.8718, "step": 23229 }, { "epoch": 0.9089130604898662, "grad_norm": 0.0, "learning_rate": 4.3201173115538507e-07, "loss": 0.8979, "step": 23230 }, { "epoch": 0.9089521871820956, "grad_norm": 0.0, "learning_rate": 4.316433595710212e-07, "loss": 0.8713, "step": 23231 }, { "epoch": 0.9089913138743251, "grad_norm": 0.0, "learning_rate": 4.3127514164247543e-07, "loss": 0.9207, "step": 23232 }, { "epoch": 0.9090304405665545, "grad_norm": 0.0, "learning_rate": 4.309070773756607e-07, "loss": 1.0287, "step": 23233 }, { "epoch": 0.909069567258784, "grad_norm": 0.0, "learning_rate": 4.305391667764891e-07, "loss": 0.9038, "step": 23234 }, { "epoch": 0.9091086939510133, "grad_norm": 0.0, "learning_rate": 4.301714098508658e-07, "loss": 0.9436, "step": 23235 }, { "epoch": 0.9091478206432428, "grad_norm": 0.0, "learning_rate": 4.2980380660469834e-07, "loss": 0.8722, "step": 23236 }, { "epoch": 0.9091869473354722, "grad_norm": 0.0, "learning_rate": 4.2943635704388973e-07, "loss": 1.0272, "step": 23237 }, { "epoch": 0.9092260740277017, "grad_norm": 0.0, "learning_rate": 4.29069061174342e-07, "loss": 0.8857, "step": 23238 }, { "epoch": 0.9092652007199311, "grad_norm": 0.0, "learning_rate": 4.2870191900195034e-07, "loss": 0.9233, "step": 23239 }, { "epoch": 0.9093043274121606, "grad_norm": 0.0, "learning_rate": 4.2833493053261343e-07, "loss": 1.0267, "step": 23240 }, { "epoch": 0.90934345410439, "grad_norm": 0.0, "learning_rate": 4.27968095772221e-07, "loss": 0.9586, "step": 23241 }, { "epoch": 0.9093825807966195, "grad_norm": 0.0, "learning_rate": 4.276014147266694e-07, "loss": 1.0316, "step": 23242 }, { "epoch": 0.9094217074888489, "grad_norm": 0.0, "learning_rate": 4.2723488740184285e-07, "loss": 0.95, "step": 23243 }, { "epoch": 0.9094608341810784, "grad_norm": 0.0, "learning_rate": 4.2686851380362994e-07, "loss": 0.9313, "step": 23244 }, { "epoch": 0.9094999608733078, "grad_norm": 0.0, "learning_rate": 4.2650229393791156e-07, "loss": 1.035, "step": 23245 }, { "epoch": 0.9095390875655373, "grad_norm": 0.0, "learning_rate": 4.261362278105707e-07, "loss": 0.9529, "step": 23246 }, { "epoch": 0.9095782142577666, "grad_norm": 0.0, "learning_rate": 4.2577031542748393e-07, "loss": 0.947, "step": 23247 }, { "epoch": 0.9096173409499961, "grad_norm": 0.0, "learning_rate": 4.254045567945309e-07, "loss": 1.043, "step": 23248 }, { "epoch": 0.9096564676422255, "grad_norm": 0.0, "learning_rate": 4.250389519175824e-07, "loss": 1.0964, "step": 23249 }, { "epoch": 0.909695594334455, "grad_norm": 0.0, "learning_rate": 4.2467350080250934e-07, "loss": 0.9472, "step": 23250 }, { "epoch": 0.9097347210266844, "grad_norm": 0.0, "learning_rate": 4.2430820345518265e-07, "loss": 0.9388, "step": 23251 }, { "epoch": 0.9097738477189139, "grad_norm": 0.0, "learning_rate": 4.2394305988146643e-07, "loss": 0.9264, "step": 23252 }, { "epoch": 0.9098129744111433, "grad_norm": 0.0, "learning_rate": 4.235780700872238e-07, "loss": 0.8279, "step": 23253 }, { "epoch": 0.9098521011033727, "grad_norm": 0.0, "learning_rate": 4.2321323407831907e-07, "loss": 0.828, "step": 23254 }, { "epoch": 0.9098912277956022, "grad_norm": 0.0, "learning_rate": 4.228485518606096e-07, "loss": 1.111, "step": 23255 }, { "epoch": 0.9099303544878315, "grad_norm": 0.0, "learning_rate": 4.2248402343995076e-07, "loss": 0.9596, "step": 23256 }, { "epoch": 0.909969481180061, "grad_norm": 0.0, "learning_rate": 4.22119648822199e-07, "loss": 0.9842, "step": 23257 }, { "epoch": 0.9100086078722904, "grad_norm": 0.0, "learning_rate": 4.2175542801320193e-07, "loss": 0.9204, "step": 23258 }, { "epoch": 0.9100477345645199, "grad_norm": 0.0, "learning_rate": 4.213913610188103e-07, "loss": 0.8775, "step": 23259 }, { "epoch": 0.9100868612567493, "grad_norm": 0.0, "learning_rate": 4.210274478448717e-07, "loss": 0.8716, "step": 23260 }, { "epoch": 0.9101259879489788, "grad_norm": 0.0, "learning_rate": 4.206636884972293e-07, "loss": 0.9865, "step": 23261 }, { "epoch": 0.9101651146412082, "grad_norm": 0.0, "learning_rate": 4.2030008298172384e-07, "loss": 0.9368, "step": 23262 }, { "epoch": 0.9102042413334377, "grad_norm": 0.0, "learning_rate": 4.1993663130419526e-07, "loss": 1.0698, "step": 23263 }, { "epoch": 0.9102433680256671, "grad_norm": 0.0, "learning_rate": 4.195733334704788e-07, "loss": 0.7874, "step": 23264 }, { "epoch": 0.9102824947178966, "grad_norm": 0.0, "learning_rate": 4.19210189486412e-07, "loss": 0.9776, "step": 23265 }, { "epoch": 0.910321621410126, "grad_norm": 0.0, "learning_rate": 4.188471993578225e-07, "loss": 1.1099, "step": 23266 }, { "epoch": 0.9103607481023555, "grad_norm": 0.0, "learning_rate": 4.184843630905422e-07, "loss": 0.9521, "step": 23267 }, { "epoch": 0.9103998747945848, "grad_norm": 0.0, "learning_rate": 4.1812168069039426e-07, "loss": 0.9397, "step": 23268 }, { "epoch": 0.9104390014868143, "grad_norm": 0.0, "learning_rate": 4.1775915216320853e-07, "loss": 0.8338, "step": 23269 }, { "epoch": 0.9104781281790437, "grad_norm": 0.0, "learning_rate": 4.1739677751480135e-07, "loss": 0.9187, "step": 23270 }, { "epoch": 0.9105172548712732, "grad_norm": 0.0, "learning_rate": 4.17034556750997e-07, "loss": 1.1099, "step": 23271 }, { "epoch": 0.9105563815635026, "grad_norm": 0.0, "learning_rate": 4.1667248987760534e-07, "loss": 1.0247, "step": 23272 }, { "epoch": 0.9105955082557321, "grad_norm": 0.0, "learning_rate": 4.163105769004483e-07, "loss": 0.9801, "step": 23273 }, { "epoch": 0.9106346349479615, "grad_norm": 0.0, "learning_rate": 4.1594881782533235e-07, "loss": 0.9252, "step": 23274 }, { "epoch": 0.910673761640191, "grad_norm": 0.0, "learning_rate": 4.155872126580718e-07, "loss": 0.8626, "step": 23275 }, { "epoch": 0.9107128883324204, "grad_norm": 0.0, "learning_rate": 4.1522576140446747e-07, "loss": 1.0724, "step": 23276 }, { "epoch": 0.9107520150246499, "grad_norm": 0.0, "learning_rate": 4.148644640703281e-07, "loss": 0.9998, "step": 23277 }, { "epoch": 0.9107911417168792, "grad_norm": 0.0, "learning_rate": 4.145033206614546e-07, "loss": 0.9914, "step": 23278 }, { "epoch": 0.9108302684091087, "grad_norm": 0.0, "learning_rate": 4.1414233118364787e-07, "loss": 1.0145, "step": 23279 }, { "epoch": 0.9108693951013381, "grad_norm": 0.0, "learning_rate": 4.137814956427011e-07, "loss": 0.9054, "step": 23280 }, { "epoch": 0.9109085217935676, "grad_norm": 0.0, "learning_rate": 4.134208140444129e-07, "loss": 0.9678, "step": 23281 }, { "epoch": 0.910947648485797, "grad_norm": 0.0, "learning_rate": 4.130602863945732e-07, "loss": 0.8853, "step": 23282 }, { "epoch": 0.9109867751780264, "grad_norm": 0.0, "learning_rate": 4.126999126989728e-07, "loss": 0.8927, "step": 23283 }, { "epoch": 0.9110259018702559, "grad_norm": 0.0, "learning_rate": 4.1233969296339716e-07, "loss": 0.9084, "step": 23284 }, { "epoch": 0.9110650285624853, "grad_norm": 0.0, "learning_rate": 4.1197962719363383e-07, "loss": 0.9782, "step": 23285 }, { "epoch": 0.9111041552547148, "grad_norm": 0.0, "learning_rate": 4.116197153954604e-07, "loss": 0.8494, "step": 23286 }, { "epoch": 0.9111432819469442, "grad_norm": 0.0, "learning_rate": 4.112599575746623e-07, "loss": 0.8466, "step": 23287 }, { "epoch": 0.9111824086391737, "grad_norm": 0.0, "learning_rate": 4.1090035373701154e-07, "loss": 0.9782, "step": 23288 }, { "epoch": 0.911221535331403, "grad_norm": 0.0, "learning_rate": 4.105409038882879e-07, "loss": 0.9256, "step": 23289 }, { "epoch": 0.9112606620236325, "grad_norm": 0.0, "learning_rate": 4.101816080342591e-07, "loss": 1.0403, "step": 23290 }, { "epoch": 0.9112997887158619, "grad_norm": 0.0, "learning_rate": 4.098224661806971e-07, "loss": 0.8536, "step": 23291 }, { "epoch": 0.9113389154080914, "grad_norm": 0.0, "learning_rate": 4.0946347833336954e-07, "loss": 1.045, "step": 23292 }, { "epoch": 0.9113780421003208, "grad_norm": 0.0, "learning_rate": 4.0910464449804176e-07, "loss": 0.9973, "step": 23293 }, { "epoch": 0.9114171687925503, "grad_norm": 0.0, "learning_rate": 4.087459646804737e-07, "loss": 1.0389, "step": 23294 }, { "epoch": 0.9114562954847797, "grad_norm": 0.0, "learning_rate": 4.083874388864273e-07, "loss": 0.9902, "step": 23295 }, { "epoch": 0.9114954221770092, "grad_norm": 0.0, "learning_rate": 4.0802906712166134e-07, "loss": 0.9583, "step": 23296 }, { "epoch": 0.9115345488692386, "grad_norm": 0.0, "learning_rate": 4.076708493919279e-07, "loss": 1.0055, "step": 23297 }, { "epoch": 0.9115736755614681, "grad_norm": 0.0, "learning_rate": 4.073127857029802e-07, "loss": 0.9388, "step": 23298 }, { "epoch": 0.9116128022536975, "grad_norm": 0.0, "learning_rate": 4.0695487606056903e-07, "loss": 0.9281, "step": 23299 }, { "epoch": 0.911651928945927, "grad_norm": 0.0, "learning_rate": 4.065971204704433e-07, "loss": 0.8964, "step": 23300 }, { "epoch": 0.9116910556381563, "grad_norm": 0.0, "learning_rate": 4.06239518938345e-07, "loss": 0.8995, "step": 23301 }, { "epoch": 0.9117301823303858, "grad_norm": 0.0, "learning_rate": 4.0588207147001845e-07, "loss": 1.1295, "step": 23302 }, { "epoch": 0.9117693090226152, "grad_norm": 0.0, "learning_rate": 4.055247780712035e-07, "loss": 1.0333, "step": 23303 }, { "epoch": 0.9118084357148447, "grad_norm": 0.0, "learning_rate": 4.0516763874763996e-07, "loss": 0.9405, "step": 23304 }, { "epoch": 0.9118475624070741, "grad_norm": 0.0, "learning_rate": 4.048106535050589e-07, "loss": 0.9678, "step": 23305 }, { "epoch": 0.9118866890993036, "grad_norm": 0.0, "learning_rate": 4.0445382234919674e-07, "loss": 1.0595, "step": 23306 }, { "epoch": 0.911925815791533, "grad_norm": 0.0, "learning_rate": 4.0409714528578224e-07, "loss": 0.969, "step": 23307 }, { "epoch": 0.9119649424837625, "grad_norm": 0.0, "learning_rate": 4.03740622320542e-07, "loss": 0.9544, "step": 23308 }, { "epoch": 0.9120040691759919, "grad_norm": 0.0, "learning_rate": 4.0338425345920364e-07, "loss": 0.8832, "step": 23309 }, { "epoch": 0.9120431958682212, "grad_norm": 0.0, "learning_rate": 4.030280387074892e-07, "loss": 1.0022, "step": 23310 }, { "epoch": 0.9120823225604507, "grad_norm": 0.0, "learning_rate": 4.026719780711175e-07, "loss": 0.9718, "step": 23311 }, { "epoch": 0.9121214492526801, "grad_norm": 0.0, "learning_rate": 4.023160715558083e-07, "loss": 0.8611, "step": 23312 }, { "epoch": 0.9121605759449096, "grad_norm": 0.0, "learning_rate": 4.0196031916727606e-07, "loss": 1.1202, "step": 23313 }, { "epoch": 0.912199702637139, "grad_norm": 0.0, "learning_rate": 4.0160472091123616e-07, "loss": 0.9846, "step": 23314 }, { "epoch": 0.9122388293293685, "grad_norm": 0.0, "learning_rate": 4.012492767933951e-07, "loss": 0.958, "step": 23315 }, { "epoch": 0.9122779560215979, "grad_norm": 0.0, "learning_rate": 4.008939868194639e-07, "loss": 0.8847, "step": 23316 }, { "epoch": 0.9123170827138274, "grad_norm": 0.0, "learning_rate": 4.005388509951447e-07, "loss": 0.8457, "step": 23317 }, { "epoch": 0.9123562094060568, "grad_norm": 0.0, "learning_rate": 4.0018386932614504e-07, "loss": 1.0583, "step": 23318 }, { "epoch": 0.9123953360982863, "grad_norm": 0.0, "learning_rate": 3.9982904181816163e-07, "loss": 1.0529, "step": 23319 }, { "epoch": 0.9124344627905157, "grad_norm": 0.0, "learning_rate": 3.9947436847689536e-07, "loss": 0.9633, "step": 23320 }, { "epoch": 0.9124735894827452, "grad_norm": 0.0, "learning_rate": 3.991198493080384e-07, "loss": 0.8371, "step": 23321 }, { "epoch": 0.9125127161749745, "grad_norm": 0.0, "learning_rate": 3.9876548431728943e-07, "loss": 0.7608, "step": 23322 }, { "epoch": 0.912551842867204, "grad_norm": 0.0, "learning_rate": 3.9841127351033295e-07, "loss": 0.8335, "step": 23323 }, { "epoch": 0.9125909695594334, "grad_norm": 0.0, "learning_rate": 3.9805721689286205e-07, "loss": 0.9914, "step": 23324 }, { "epoch": 0.9126300962516629, "grad_norm": 0.0, "learning_rate": 3.9770331447055886e-07, "loss": 0.9839, "step": 23325 }, { "epoch": 0.9126692229438923, "grad_norm": 0.0, "learning_rate": 3.973495662491089e-07, "loss": 0.9708, "step": 23326 }, { "epoch": 0.9127083496361218, "grad_norm": 0.0, "learning_rate": 3.9699597223419097e-07, "loss": 1.0306, "step": 23327 }, { "epoch": 0.9127474763283512, "grad_norm": 0.0, "learning_rate": 3.96642532431486e-07, "loss": 0.8637, "step": 23328 }, { "epoch": 0.9127866030205807, "grad_norm": 0.0, "learning_rate": 3.9628924684666727e-07, "loss": 0.9548, "step": 23329 }, { "epoch": 0.9128257297128101, "grad_norm": 0.0, "learning_rate": 3.959361154854091e-07, "loss": 0.9273, "step": 23330 }, { "epoch": 0.9128648564050396, "grad_norm": 0.0, "learning_rate": 3.9558313835338257e-07, "loss": 0.997, "step": 23331 }, { "epoch": 0.912903983097269, "grad_norm": 0.0, "learning_rate": 3.952303154562576e-07, "loss": 0.963, "step": 23332 }, { "epoch": 0.9129431097894984, "grad_norm": 0.0, "learning_rate": 3.948776467996962e-07, "loss": 0.9667, "step": 23333 }, { "epoch": 0.9129822364817278, "grad_norm": 0.0, "learning_rate": 3.9452513238936505e-07, "loss": 0.9283, "step": 23334 }, { "epoch": 0.9130213631739573, "grad_norm": 0.0, "learning_rate": 3.94172772230923e-07, "loss": 0.821, "step": 23335 }, { "epoch": 0.9130604898661867, "grad_norm": 0.0, "learning_rate": 3.9382056633002876e-07, "loss": 0.9184, "step": 23336 }, { "epoch": 0.9130996165584162, "grad_norm": 0.0, "learning_rate": 3.9346851469234006e-07, "loss": 0.9836, "step": 23337 }, { "epoch": 0.9131387432506456, "grad_norm": 0.0, "learning_rate": 3.931166173235101e-07, "loss": 0.9754, "step": 23338 }, { "epoch": 0.913177869942875, "grad_norm": 0.0, "learning_rate": 3.927648742291879e-07, "loss": 0.9059, "step": 23339 }, { "epoch": 0.9132169966351045, "grad_norm": 0.0, "learning_rate": 3.924132854150231e-07, "loss": 0.8716, "step": 23340 }, { "epoch": 0.9132561233273339, "grad_norm": 0.0, "learning_rate": 3.9206185088666246e-07, "loss": 1.0188, "step": 23341 }, { "epoch": 0.9132952500195634, "grad_norm": 0.0, "learning_rate": 3.9171057064975035e-07, "loss": 1.0767, "step": 23342 }, { "epoch": 0.9133343767117927, "grad_norm": 0.0, "learning_rate": 3.913594447099245e-07, "loss": 1.0139, "step": 23343 }, { "epoch": 0.9133735034040222, "grad_norm": 0.0, "learning_rate": 3.9100847307282696e-07, "loss": 0.9512, "step": 23344 }, { "epoch": 0.9134126300962516, "grad_norm": 0.0, "learning_rate": 3.906576557440922e-07, "loss": 0.9697, "step": 23345 }, { "epoch": 0.9134517567884811, "grad_norm": 0.0, "learning_rate": 3.9030699272935455e-07, "loss": 1.0096, "step": 23346 }, { "epoch": 0.9134908834807105, "grad_norm": 0.0, "learning_rate": 3.8995648403424404e-07, "loss": 0.944, "step": 23347 }, { "epoch": 0.91353001017294, "grad_norm": 0.0, "learning_rate": 3.896061296643905e-07, "loss": 0.9022, "step": 23348 }, { "epoch": 0.9135691368651694, "grad_norm": 0.0, "learning_rate": 3.892559296254217e-07, "loss": 0.9228, "step": 23349 }, { "epoch": 0.9136082635573989, "grad_norm": 0.0, "learning_rate": 3.889058839229587e-07, "loss": 0.8802, "step": 23350 }, { "epoch": 0.9136473902496283, "grad_norm": 0.0, "learning_rate": 3.8855599256262475e-07, "loss": 0.9066, "step": 23351 }, { "epoch": 0.9136865169418578, "grad_norm": 0.0, "learning_rate": 3.8820625555003543e-07, "loss": 0.9272, "step": 23352 }, { "epoch": 0.9137256436340871, "grad_norm": 0.0, "learning_rate": 3.8785667289081066e-07, "loss": 0.9028, "step": 23353 }, { "epoch": 0.9137647703263166, "grad_norm": 0.0, "learning_rate": 3.8750724459056367e-07, "loss": 0.8684, "step": 23354 }, { "epoch": 0.913803897018546, "grad_norm": 0.0, "learning_rate": 3.8715797065490446e-07, "loss": 0.9723, "step": 23355 }, { "epoch": 0.9138430237107755, "grad_norm": 0.0, "learning_rate": 3.86808851089443e-07, "loss": 1.0732, "step": 23356 }, { "epoch": 0.9138821504030049, "grad_norm": 0.0, "learning_rate": 3.8645988589978477e-07, "loss": 0.9828, "step": 23357 }, { "epoch": 0.9139212770952344, "grad_norm": 0.0, "learning_rate": 3.8611107509153423e-07, "loss": 0.9561, "step": 23358 }, { "epoch": 0.9139604037874638, "grad_norm": 0.0, "learning_rate": 3.857624186702946e-07, "loss": 0.9581, "step": 23359 }, { "epoch": 0.9139995304796933, "grad_norm": 0.0, "learning_rate": 3.854139166416615e-07, "loss": 1.0844, "step": 23360 }, { "epoch": 0.9140386571719227, "grad_norm": 0.0, "learning_rate": 3.8506556901123373e-07, "loss": 0.9275, "step": 23361 }, { "epoch": 0.9140777838641522, "grad_norm": 0.0, "learning_rate": 3.8471737578460453e-07, "loss": 0.9398, "step": 23362 }, { "epoch": 0.9141169105563816, "grad_norm": 0.0, "learning_rate": 3.8436933696736734e-07, "loss": 0.9051, "step": 23363 }, { "epoch": 0.914156037248611, "grad_norm": 0.0, "learning_rate": 3.840214525651076e-07, "loss": 1.0973, "step": 23364 }, { "epoch": 0.9141951639408404, "grad_norm": 0.0, "learning_rate": 3.8367372258341527e-07, "loss": 0.9658, "step": 23365 }, { "epoch": 0.9142342906330699, "grad_norm": 0.0, "learning_rate": 3.8332614702787043e-07, "loss": 0.9086, "step": 23366 }, { "epoch": 0.9142734173252993, "grad_norm": 0.0, "learning_rate": 3.829787259040596e-07, "loss": 0.9568, "step": 23367 }, { "epoch": 0.9143125440175287, "grad_norm": 0.0, "learning_rate": 3.826314592175584e-07, "loss": 0.9201, "step": 23368 }, { "epoch": 0.9143516707097582, "grad_norm": 0.0, "learning_rate": 3.822843469739468e-07, "loss": 1.0382, "step": 23369 }, { "epoch": 0.9143907974019876, "grad_norm": 0.0, "learning_rate": 3.819373891787936e-07, "loss": 0.9102, "step": 23370 }, { "epoch": 0.9144299240942171, "grad_norm": 0.0, "learning_rate": 3.815905858376767e-07, "loss": 0.7984, "step": 23371 }, { "epoch": 0.9144690507864465, "grad_norm": 0.0, "learning_rate": 3.8124393695616047e-07, "loss": 0.9283, "step": 23372 }, { "epoch": 0.914508177478676, "grad_norm": 0.0, "learning_rate": 3.8089744253981596e-07, "loss": 0.9096, "step": 23373 }, { "epoch": 0.9145473041709054, "grad_norm": 0.0, "learning_rate": 3.805511025942032e-07, "loss": 1.035, "step": 23374 }, { "epoch": 0.9145864308631348, "grad_norm": 0.0, "learning_rate": 3.802049171248856e-07, "loss": 1.0087, "step": 23375 }, { "epoch": 0.9146255575553642, "grad_norm": 0.0, "learning_rate": 3.7985888613742416e-07, "loss": 0.8297, "step": 23376 }, { "epoch": 0.9146646842475937, "grad_norm": 0.0, "learning_rate": 3.7951300963737445e-07, "loss": 0.9085, "step": 23377 }, { "epoch": 0.9147038109398231, "grad_norm": 0.0, "learning_rate": 3.7916728763028874e-07, "loss": 0.8847, "step": 23378 }, { "epoch": 0.9147429376320526, "grad_norm": 0.0, "learning_rate": 3.788217201217226e-07, "loss": 1.0375, "step": 23379 }, { "epoch": 0.914782064324282, "grad_norm": 0.0, "learning_rate": 3.784763071172226e-07, "loss": 0.9994, "step": 23380 }, { "epoch": 0.9148211910165115, "grad_norm": 0.0, "learning_rate": 3.781310486223377e-07, "loss": 0.8015, "step": 23381 }, { "epoch": 0.9148603177087409, "grad_norm": 0.0, "learning_rate": 3.7778594464261023e-07, "loss": 0.9963, "step": 23382 }, { "epoch": 0.9148994444009704, "grad_norm": 0.0, "learning_rate": 3.7744099518358447e-07, "loss": 0.8451, "step": 23383 }, { "epoch": 0.9149385710931998, "grad_norm": 0.0, "learning_rate": 3.770962002507972e-07, "loss": 1.0168, "step": 23384 }, { "epoch": 0.9149776977854293, "grad_norm": 0.0, "learning_rate": 3.7675155984978726e-07, "loss": 0.8663, "step": 23385 }, { "epoch": 0.9150168244776586, "grad_norm": 0.0, "learning_rate": 3.764070739860881e-07, "loss": 0.9877, "step": 23386 }, { "epoch": 0.9150559511698881, "grad_norm": 0.0, "learning_rate": 3.7606274266523415e-07, "loss": 0.907, "step": 23387 }, { "epoch": 0.9150950778621175, "grad_norm": 0.0, "learning_rate": 3.7571856589275093e-07, "loss": 1.0015, "step": 23388 }, { "epoch": 0.915134204554347, "grad_norm": 0.0, "learning_rate": 3.7537454367416847e-07, "loss": 0.8974, "step": 23389 }, { "epoch": 0.9151733312465764, "grad_norm": 0.0, "learning_rate": 3.750306760150113e-07, "loss": 0.8885, "step": 23390 }, { "epoch": 0.9152124579388059, "grad_norm": 0.0, "learning_rate": 3.746869629207994e-07, "loss": 0.9936, "step": 23391 }, { "epoch": 0.9152515846310353, "grad_norm": 0.0, "learning_rate": 3.7434340439705396e-07, "loss": 0.9582, "step": 23392 }, { "epoch": 0.9152907113232648, "grad_norm": 0.0, "learning_rate": 3.7400000044929273e-07, "loss": 0.9761, "step": 23393 }, { "epoch": 0.9153298380154942, "grad_norm": 0.0, "learning_rate": 3.736567510830291e-07, "loss": 1.073, "step": 23394 }, { "epoch": 0.9153689647077236, "grad_norm": 0.0, "learning_rate": 3.7331365630377537e-07, "loss": 0.9568, "step": 23395 }, { "epoch": 0.915408091399953, "grad_norm": 0.0, "learning_rate": 3.729707161170415e-07, "loss": 0.9865, "step": 23396 }, { "epoch": 0.9154472180921824, "grad_norm": 0.0, "learning_rate": 3.726279305283331e-07, "loss": 0.9327, "step": 23397 }, { "epoch": 0.9154863447844119, "grad_norm": 0.0, "learning_rate": 3.722852995431592e-07, "loss": 1.0692, "step": 23398 }, { "epoch": 0.9155254714766413, "grad_norm": 0.0, "learning_rate": 3.719428231670175e-07, "loss": 1.1176, "step": 23399 }, { "epoch": 0.9155645981688708, "grad_norm": 0.0, "learning_rate": 3.716005014054103e-07, "loss": 0.9748, "step": 23400 }, { "epoch": 0.9156037248611002, "grad_norm": 0.0, "learning_rate": 3.712583342638332e-07, "loss": 0.9003, "step": 23401 }, { "epoch": 0.9156428515533297, "grad_norm": 0.0, "learning_rate": 3.709163217477807e-07, "loss": 1.0242, "step": 23402 }, { "epoch": 0.9156819782455591, "grad_norm": 0.0, "learning_rate": 3.705744638627473e-07, "loss": 0.9986, "step": 23403 }, { "epoch": 0.9157211049377886, "grad_norm": 0.0, "learning_rate": 3.702327606142231e-07, "loss": 1.0388, "step": 23404 }, { "epoch": 0.915760231630018, "grad_norm": 0.0, "learning_rate": 3.698912120076914e-07, "loss": 0.8084, "step": 23405 }, { "epoch": 0.9157993583222475, "grad_norm": 0.0, "learning_rate": 3.695498180486412e-07, "loss": 0.902, "step": 23406 }, { "epoch": 0.9158384850144768, "grad_norm": 0.0, "learning_rate": 3.692085787425526e-07, "loss": 0.9641, "step": 23407 }, { "epoch": 0.9158776117067063, "grad_norm": 0.0, "learning_rate": 3.688674940949066e-07, "loss": 0.8637, "step": 23408 }, { "epoch": 0.9159167383989357, "grad_norm": 0.0, "learning_rate": 3.685265641111802e-07, "loss": 1.0638, "step": 23409 }, { "epoch": 0.9159558650911652, "grad_norm": 0.0, "learning_rate": 3.681857887968476e-07, "loss": 0.8258, "step": 23410 }, { "epoch": 0.9159949917833946, "grad_norm": 0.0, "learning_rate": 3.678451681573825e-07, "loss": 1.016, "step": 23411 }, { "epoch": 0.9160341184756241, "grad_norm": 0.0, "learning_rate": 3.675047021982547e-07, "loss": 0.9604, "step": 23412 }, { "epoch": 0.9160732451678535, "grad_norm": 0.0, "learning_rate": 3.6716439092493007e-07, "loss": 0.9963, "step": 23413 }, { "epoch": 0.916112371860083, "grad_norm": 0.0, "learning_rate": 3.668242343428763e-07, "loss": 1.0038, "step": 23414 }, { "epoch": 0.9161514985523124, "grad_norm": 0.0, "learning_rate": 3.6648423245755125e-07, "loss": 0.9799, "step": 23415 }, { "epoch": 0.9161906252445419, "grad_norm": 0.0, "learning_rate": 3.6614438527442067e-07, "loss": 0.8788, "step": 23416 }, { "epoch": 0.9162297519367713, "grad_norm": 0.0, "learning_rate": 3.658046927989389e-07, "loss": 0.8735, "step": 23417 }, { "epoch": 0.9162688786290007, "grad_norm": 0.0, "learning_rate": 3.6546515503656176e-07, "loss": 1.0758, "step": 23418 }, { "epoch": 0.9163080053212301, "grad_norm": 0.0, "learning_rate": 3.6512577199273924e-07, "loss": 0.8432, "step": 23419 }, { "epoch": 0.9163471320134596, "grad_norm": 0.0, "learning_rate": 3.64786543672927e-07, "loss": 0.9166, "step": 23420 }, { "epoch": 0.916386258705689, "grad_norm": 0.0, "learning_rate": 3.6444747008256733e-07, "loss": 0.8458, "step": 23421 }, { "epoch": 0.9164253853979185, "grad_norm": 0.0, "learning_rate": 3.641085512271081e-07, "loss": 0.8047, "step": 23422 }, { "epoch": 0.9164645120901479, "grad_norm": 0.0, "learning_rate": 3.637697871119894e-07, "loss": 0.9104, "step": 23423 }, { "epoch": 0.9165036387823773, "grad_norm": 0.0, "learning_rate": 3.6343117774265467e-07, "loss": 0.9744, "step": 23424 }, { "epoch": 0.9165427654746068, "grad_norm": 0.0, "learning_rate": 3.630927231245385e-07, "loss": 0.9268, "step": 23425 }, { "epoch": 0.9165818921668362, "grad_norm": 0.0, "learning_rate": 3.6275442326307974e-07, "loss": 0.9695, "step": 23426 }, { "epoch": 0.9166210188590657, "grad_norm": 0.0, "learning_rate": 3.624162781637064e-07, "loss": 1.0593, "step": 23427 }, { "epoch": 0.916660145551295, "grad_norm": 0.0, "learning_rate": 3.6207828783185184e-07, "loss": 0.9297, "step": 23428 }, { "epoch": 0.9166992722435245, "grad_norm": 0.0, "learning_rate": 3.617404522729451e-07, "loss": 0.9387, "step": 23429 }, { "epoch": 0.9167383989357539, "grad_norm": 0.0, "learning_rate": 3.6140277149240623e-07, "loss": 1.0067, "step": 23430 }, { "epoch": 0.9167775256279834, "grad_norm": 0.0, "learning_rate": 3.6106524549566203e-07, "loss": 1.0128, "step": 23431 }, { "epoch": 0.9168166523202128, "grad_norm": 0.0, "learning_rate": 3.607278742881326e-07, "loss": 0.9461, "step": 23432 }, { "epoch": 0.9168557790124423, "grad_norm": 0.0, "learning_rate": 3.6039065787523365e-07, "loss": 0.9423, "step": 23433 }, { "epoch": 0.9168949057046717, "grad_norm": 0.0, "learning_rate": 3.600535962623819e-07, "loss": 0.9315, "step": 23434 }, { "epoch": 0.9169340323969012, "grad_norm": 0.0, "learning_rate": 3.5971668945499084e-07, "loss": 0.9306, "step": 23435 }, { "epoch": 0.9169731590891306, "grad_norm": 0.0, "learning_rate": 3.5937993745846946e-07, "loss": 0.9206, "step": 23436 }, { "epoch": 0.9170122857813601, "grad_norm": 0.0, "learning_rate": 3.590433402782245e-07, "loss": 0.9058, "step": 23437 }, { "epoch": 0.9170514124735895, "grad_norm": 0.0, "learning_rate": 3.5870689791966394e-07, "loss": 0.9521, "step": 23438 }, { "epoch": 0.917090539165819, "grad_norm": 0.0, "learning_rate": 3.583706103881901e-07, "loss": 0.9733, "step": 23439 }, { "epoch": 0.9171296658580483, "grad_norm": 0.0, "learning_rate": 3.580344776892009e-07, "loss": 0.9136, "step": 23440 }, { "epoch": 0.9171687925502778, "grad_norm": 0.0, "learning_rate": 3.5769849982809746e-07, "loss": 0.877, "step": 23441 }, { "epoch": 0.9172079192425072, "grad_norm": 0.0, "learning_rate": 3.5736267681027117e-07, "loss": 0.9071, "step": 23442 }, { "epoch": 0.9172470459347367, "grad_norm": 0.0, "learning_rate": 3.5702700864112095e-07, "loss": 0.9901, "step": 23443 }, { "epoch": 0.9172861726269661, "grad_norm": 0.0, "learning_rate": 3.566914953260314e-07, "loss": 0.9427, "step": 23444 }, { "epoch": 0.9173252993191956, "grad_norm": 0.0, "learning_rate": 3.563561368703938e-07, "loss": 0.9699, "step": 23445 }, { "epoch": 0.917364426011425, "grad_norm": 0.0, "learning_rate": 3.560209332795894e-07, "loss": 0.9138, "step": 23446 }, { "epoch": 0.9174035527036545, "grad_norm": 0.0, "learning_rate": 3.556858845590083e-07, "loss": 0.919, "step": 23447 }, { "epoch": 0.9174426793958839, "grad_norm": 0.0, "learning_rate": 3.55350990714024e-07, "loss": 0.9805, "step": 23448 }, { "epoch": 0.9174818060881134, "grad_norm": 0.0, "learning_rate": 3.5501625175001995e-07, "loss": 1.0131, "step": 23449 }, { "epoch": 0.9175209327803427, "grad_norm": 0.0, "learning_rate": 3.5468166767236746e-07, "loss": 0.8377, "step": 23450 }, { "epoch": 0.9175600594725722, "grad_norm": 0.0, "learning_rate": 3.5434723848644105e-07, "loss": 0.9454, "step": 23451 }, { "epoch": 0.9175991861648016, "grad_norm": 0.0, "learning_rate": 3.5401296419761086e-07, "loss": 0.8293, "step": 23452 }, { "epoch": 0.917638312857031, "grad_norm": 0.0, "learning_rate": 3.5367884481124715e-07, "loss": 1.0232, "step": 23453 }, { "epoch": 0.9176774395492605, "grad_norm": 0.0, "learning_rate": 3.533448803327122e-07, "loss": 0.8145, "step": 23454 }, { "epoch": 0.9177165662414899, "grad_norm": 0.0, "learning_rate": 3.5301107076737064e-07, "loss": 0.8745, "step": 23455 }, { "epoch": 0.9177556929337194, "grad_norm": 0.0, "learning_rate": 3.526774161205826e-07, "loss": 0.9559, "step": 23456 }, { "epoch": 0.9177948196259488, "grad_norm": 0.0, "learning_rate": 3.523439163977083e-07, "loss": 1.0196, "step": 23457 }, { "epoch": 0.9178339463181783, "grad_norm": 0.0, "learning_rate": 3.5201057160410003e-07, "loss": 0.7997, "step": 23458 }, { "epoch": 0.9178730730104077, "grad_norm": 0.0, "learning_rate": 3.5167738174511245e-07, "loss": 1.078, "step": 23459 }, { "epoch": 0.9179121997026372, "grad_norm": 0.0, "learning_rate": 3.5134434682609573e-07, "loss": 1.0104, "step": 23460 }, { "epoch": 0.9179513263948665, "grad_norm": 0.0, "learning_rate": 3.5101146685240005e-07, "loss": 0.8406, "step": 23461 }, { "epoch": 0.917990453087096, "grad_norm": 0.0, "learning_rate": 3.506787418293678e-07, "loss": 1.0051, "step": 23462 }, { "epoch": 0.9180295797793254, "grad_norm": 0.0, "learning_rate": 3.503461717623446e-07, "loss": 1.0674, "step": 23463 }, { "epoch": 0.9180687064715549, "grad_norm": 0.0, "learning_rate": 3.500137566566686e-07, "loss": 0.8768, "step": 23464 }, { "epoch": 0.9181078331637843, "grad_norm": 0.0, "learning_rate": 3.49681496517682e-07, "loss": 0.9545, "step": 23465 }, { "epoch": 0.9181469598560138, "grad_norm": 0.0, "learning_rate": 3.493493913507162e-07, "loss": 1.0174, "step": 23466 }, { "epoch": 0.9181860865482432, "grad_norm": 0.0, "learning_rate": 3.490174411611069e-07, "loss": 0.9183, "step": 23467 }, { "epoch": 0.9182252132404727, "grad_norm": 0.0, "learning_rate": 3.486856459541843e-07, "loss": 0.8568, "step": 23468 }, { "epoch": 0.9182643399327021, "grad_norm": 0.0, "learning_rate": 3.4835400573527525e-07, "loss": 0.9152, "step": 23469 }, { "epoch": 0.9183034666249316, "grad_norm": 0.0, "learning_rate": 3.4802252050970763e-07, "loss": 1.0983, "step": 23470 }, { "epoch": 0.918342593317161, "grad_norm": 0.0, "learning_rate": 3.4769119028280396e-07, "loss": 0.8281, "step": 23471 }, { "epoch": 0.9183817200093904, "grad_norm": 0.0, "learning_rate": 3.473600150598844e-07, "loss": 1.0073, "step": 23472 }, { "epoch": 0.9184208467016198, "grad_norm": 0.0, "learning_rate": 3.47028994846268e-07, "loss": 0.9807, "step": 23473 }, { "epoch": 0.9184599733938493, "grad_norm": 0.0, "learning_rate": 3.4669812964727043e-07, "loss": 1.0476, "step": 23474 }, { "epoch": 0.9184991000860787, "grad_norm": 0.0, "learning_rate": 3.463674194682032e-07, "loss": 0.9727, "step": 23475 }, { "epoch": 0.9185382267783082, "grad_norm": 0.0, "learning_rate": 3.4603686431437855e-07, "loss": 1.0685, "step": 23476 }, { "epoch": 0.9185773534705376, "grad_norm": 0.0, "learning_rate": 3.4570646419110564e-07, "loss": 0.8336, "step": 23477 }, { "epoch": 0.9186164801627671, "grad_norm": 0.0, "learning_rate": 3.4537621910369026e-07, "loss": 0.9073, "step": 23478 }, { "epoch": 0.9186556068549965, "grad_norm": 0.0, "learning_rate": 3.450461290574336e-07, "loss": 0.8385, "step": 23479 }, { "epoch": 0.918694733547226, "grad_norm": 0.0, "learning_rate": 3.4471619405763825e-07, "loss": 0.9656, "step": 23480 }, { "epoch": 0.9187338602394554, "grad_norm": 0.0, "learning_rate": 3.4438641410960203e-07, "loss": 0.9361, "step": 23481 }, { "epoch": 0.9187729869316847, "grad_norm": 0.0, "learning_rate": 3.4405678921861976e-07, "loss": 0.9573, "step": 23482 }, { "epoch": 0.9188121136239142, "grad_norm": 0.0, "learning_rate": 3.4372731938998703e-07, "loss": 0.9549, "step": 23483 }, { "epoch": 0.9188512403161436, "grad_norm": 0.0, "learning_rate": 3.433980046289942e-07, "loss": 0.9083, "step": 23484 }, { "epoch": 0.9188903670083731, "grad_norm": 0.0, "learning_rate": 3.430688449409281e-07, "loss": 0.9789, "step": 23485 }, { "epoch": 0.9189294937006025, "grad_norm": 0.0, "learning_rate": 3.4273984033107443e-07, "loss": 0.9712, "step": 23486 }, { "epoch": 0.918968620392832, "grad_norm": 0.0, "learning_rate": 3.4241099080471904e-07, "loss": 1.0545, "step": 23487 }, { "epoch": 0.9190077470850614, "grad_norm": 0.0, "learning_rate": 3.4208229636714216e-07, "loss": 0.8659, "step": 23488 }, { "epoch": 0.9190468737772909, "grad_norm": 0.0, "learning_rate": 3.417537570236207e-07, "loss": 0.9899, "step": 23489 }, { "epoch": 0.9190860004695203, "grad_norm": 0.0, "learning_rate": 3.4142537277943146e-07, "loss": 0.9702, "step": 23490 }, { "epoch": 0.9191251271617498, "grad_norm": 0.0, "learning_rate": 3.410971436398469e-07, "loss": 1.0694, "step": 23491 }, { "epoch": 0.9191642538539792, "grad_norm": 0.0, "learning_rate": 3.4076906961014066e-07, "loss": 1.0059, "step": 23492 }, { "epoch": 0.9192033805462086, "grad_norm": 0.0, "learning_rate": 3.4044115069557847e-07, "loss": 1.073, "step": 23493 }, { "epoch": 0.919242507238438, "grad_norm": 0.0, "learning_rate": 3.401133869014284e-07, "loss": 0.9285, "step": 23494 }, { "epoch": 0.9192816339306675, "grad_norm": 0.0, "learning_rate": 3.3978577823295056e-07, "loss": 1.0181, "step": 23495 }, { "epoch": 0.9193207606228969, "grad_norm": 0.0, "learning_rate": 3.394583246954097e-07, "loss": 0.9625, "step": 23496 }, { "epoch": 0.9193598873151264, "grad_norm": 0.0, "learning_rate": 3.391310262940628e-07, "loss": 0.8813, "step": 23497 }, { "epoch": 0.9193990140073558, "grad_norm": 0.0, "learning_rate": 3.3880388303416666e-07, "loss": 1.0591, "step": 23498 }, { "epoch": 0.9194381406995853, "grad_norm": 0.0, "learning_rate": 3.384768949209727e-07, "loss": 0.8522, "step": 23499 }, { "epoch": 0.9194772673918147, "grad_norm": 0.0, "learning_rate": 3.3815006195973333e-07, "loss": 1.0097, "step": 23500 }, { "epoch": 0.9195163940840442, "grad_norm": 0.0, "learning_rate": 3.378233841556966e-07, "loss": 1.0195, "step": 23501 }, { "epoch": 0.9195555207762736, "grad_norm": 0.0, "learning_rate": 3.3749686151411056e-07, "loss": 1.0175, "step": 23502 }, { "epoch": 0.9195946474685031, "grad_norm": 0.0, "learning_rate": 3.3717049404021653e-07, "loss": 0.9975, "step": 23503 }, { "epoch": 0.9196337741607324, "grad_norm": 0.0, "learning_rate": 3.368442817392548e-07, "loss": 0.9698, "step": 23504 }, { "epoch": 0.9196729008529619, "grad_norm": 0.0, "learning_rate": 3.365182246164667e-07, "loss": 0.9247, "step": 23505 }, { "epoch": 0.9197120275451913, "grad_norm": 0.0, "learning_rate": 3.361923226770869e-07, "loss": 1.0099, "step": 23506 }, { "epoch": 0.9197511542374208, "grad_norm": 0.0, "learning_rate": 3.3586657592634797e-07, "loss": 0.9173, "step": 23507 }, { "epoch": 0.9197902809296502, "grad_norm": 0.0, "learning_rate": 3.3554098436948347e-07, "loss": 0.747, "step": 23508 }, { "epoch": 0.9198294076218796, "grad_norm": 0.0, "learning_rate": 3.3521554801171807e-07, "loss": 0.9306, "step": 23509 }, { "epoch": 0.9198685343141091, "grad_norm": 0.0, "learning_rate": 3.348902668582832e-07, "loss": 0.9757, "step": 23510 }, { "epoch": 0.9199076610063385, "grad_norm": 0.0, "learning_rate": 3.3456514091439796e-07, "loss": 1.0098, "step": 23511 }, { "epoch": 0.919946787698568, "grad_norm": 0.0, "learning_rate": 3.3424017018528596e-07, "loss": 0.8594, "step": 23512 }, { "epoch": 0.9199859143907974, "grad_norm": 0.0, "learning_rate": 3.339153546761642e-07, "loss": 1.0376, "step": 23513 }, { "epoch": 0.9200250410830269, "grad_norm": 0.0, "learning_rate": 3.3359069439224956e-07, "loss": 0.9754, "step": 23514 }, { "epoch": 0.9200641677752562, "grad_norm": 0.0, "learning_rate": 3.3326618933875565e-07, "loss": 0.9646, "step": 23515 }, { "epoch": 0.9201032944674857, "grad_norm": 0.0, "learning_rate": 3.32941839520895e-07, "loss": 0.9716, "step": 23516 }, { "epoch": 0.9201424211597151, "grad_norm": 0.0, "learning_rate": 3.326176449438734e-07, "loss": 0.9504, "step": 23517 }, { "epoch": 0.9201815478519446, "grad_norm": 0.0, "learning_rate": 3.322936056128978e-07, "loss": 0.939, "step": 23518 }, { "epoch": 0.920220674544174, "grad_norm": 0.0, "learning_rate": 3.319697215331752e-07, "loss": 0.9781, "step": 23519 }, { "epoch": 0.9202598012364035, "grad_norm": 0.0, "learning_rate": 3.316459927099025e-07, "loss": 0.9346, "step": 23520 }, { "epoch": 0.9202989279286329, "grad_norm": 0.0, "learning_rate": 3.3132241914828e-07, "loss": 0.9879, "step": 23521 }, { "epoch": 0.9203380546208624, "grad_norm": 0.0, "learning_rate": 3.3099900085350355e-07, "loss": 1.0258, "step": 23522 }, { "epoch": 0.9203771813130918, "grad_norm": 0.0, "learning_rate": 3.306757378307679e-07, "loss": 0.9147, "step": 23523 }, { "epoch": 0.9204163080053213, "grad_norm": 0.0, "learning_rate": 3.303526300852633e-07, "loss": 0.9182, "step": 23524 }, { "epoch": 0.9204554346975506, "grad_norm": 0.0, "learning_rate": 3.300296776221801e-07, "loss": 0.9681, "step": 23525 }, { "epoch": 0.9204945613897801, "grad_norm": 0.0, "learning_rate": 3.297068804466996e-07, "loss": 0.9376, "step": 23526 }, { "epoch": 0.9205336880820095, "grad_norm": 0.0, "learning_rate": 3.2938423856401226e-07, "loss": 0.9518, "step": 23527 }, { "epoch": 0.920572814774239, "grad_norm": 0.0, "learning_rate": 3.2906175197929493e-07, "loss": 0.9355, "step": 23528 }, { "epoch": 0.9206119414664684, "grad_norm": 0.0, "learning_rate": 3.28739420697729e-07, "loss": 0.9675, "step": 23529 }, { "epoch": 0.9206510681586979, "grad_norm": 0.0, "learning_rate": 3.284172447244871e-07, "loss": 1.0494, "step": 23530 }, { "epoch": 0.9206901948509273, "grad_norm": 0.0, "learning_rate": 3.2809522406474616e-07, "loss": 0.9554, "step": 23531 }, { "epoch": 0.9207293215431568, "grad_norm": 0.0, "learning_rate": 3.2777335872367534e-07, "loss": 0.832, "step": 23532 }, { "epoch": 0.9207684482353862, "grad_norm": 0.0, "learning_rate": 3.274516487064461e-07, "loss": 0.9594, "step": 23533 }, { "epoch": 0.9208075749276157, "grad_norm": 0.0, "learning_rate": 3.27130094018222e-07, "loss": 0.9488, "step": 23534 }, { "epoch": 0.920846701619845, "grad_norm": 0.0, "learning_rate": 3.2680869466416687e-07, "loss": 1.0786, "step": 23535 }, { "epoch": 0.9208858283120746, "grad_norm": 0.0, "learning_rate": 3.264874506494442e-07, "loss": 0.8969, "step": 23536 }, { "epoch": 0.9209249550043039, "grad_norm": 0.0, "learning_rate": 3.2616636197921106e-07, "loss": 0.8351, "step": 23537 }, { "epoch": 0.9209640816965333, "grad_norm": 0.0, "learning_rate": 3.2584542865862435e-07, "loss": 1.02, "step": 23538 }, { "epoch": 0.9210032083887628, "grad_norm": 0.0, "learning_rate": 3.255246506928389e-07, "loss": 0.919, "step": 23539 }, { "epoch": 0.9210423350809922, "grad_norm": 0.0, "learning_rate": 3.252040280870017e-07, "loss": 0.9568, "step": 23540 }, { "epoch": 0.9210814617732217, "grad_norm": 0.0, "learning_rate": 3.2488356084626747e-07, "loss": 0.809, "step": 23541 }, { "epoch": 0.9211205884654511, "grad_norm": 0.0, "learning_rate": 3.2456324897577774e-07, "loss": 0.9131, "step": 23542 }, { "epoch": 0.9211597151576806, "grad_norm": 0.0, "learning_rate": 3.2424309248067944e-07, "loss": 0.9213, "step": 23543 }, { "epoch": 0.92119884184991, "grad_norm": 0.0, "learning_rate": 3.2392309136611066e-07, "loss": 0.8675, "step": 23544 }, { "epoch": 0.9212379685421395, "grad_norm": 0.0, "learning_rate": 3.2360324563721514e-07, "loss": 1.0464, "step": 23545 }, { "epoch": 0.9212770952343688, "grad_norm": 0.0, "learning_rate": 3.2328355529912423e-07, "loss": 0.972, "step": 23546 }, { "epoch": 0.9213162219265983, "grad_norm": 0.0, "learning_rate": 3.2296402035697616e-07, "loss": 0.9789, "step": 23547 }, { "epoch": 0.9213553486188277, "grad_norm": 0.0, "learning_rate": 3.2264464081589785e-07, "loss": 1.0311, "step": 23548 }, { "epoch": 0.9213944753110572, "grad_norm": 0.0, "learning_rate": 3.223254166810197e-07, "loss": 0.9504, "step": 23549 }, { "epoch": 0.9214336020032866, "grad_norm": 0.0, "learning_rate": 3.220063479574698e-07, "loss": 1.008, "step": 23550 }, { "epoch": 0.9214727286955161, "grad_norm": 0.0, "learning_rate": 3.2168743465037066e-07, "loss": 1.0579, "step": 23551 }, { "epoch": 0.9215118553877455, "grad_norm": 0.0, "learning_rate": 3.2136867676484384e-07, "loss": 0.9471, "step": 23552 }, { "epoch": 0.921550982079975, "grad_norm": 0.0, "learning_rate": 3.210500743060074e-07, "loss": 0.8568, "step": 23553 }, { "epoch": 0.9215901087722044, "grad_norm": 0.0, "learning_rate": 3.2073162727897847e-07, "loss": 0.7865, "step": 23554 }, { "epoch": 0.9216292354644339, "grad_norm": 0.0, "learning_rate": 3.204133356888717e-07, "loss": 0.8539, "step": 23555 }, { "epoch": 0.9216683621566633, "grad_norm": 0.0, "learning_rate": 3.2009519954079635e-07, "loss": 0.8844, "step": 23556 }, { "epoch": 0.9217074888488928, "grad_norm": 0.0, "learning_rate": 3.19777218839864e-07, "loss": 0.9618, "step": 23557 }, { "epoch": 0.9217466155411221, "grad_norm": 0.0, "learning_rate": 3.194593935911783e-07, "loss": 0.9334, "step": 23558 }, { "epoch": 0.9217857422333516, "grad_norm": 0.0, "learning_rate": 3.19141723799844e-07, "loss": 0.9523, "step": 23559 }, { "epoch": 0.921824868925581, "grad_norm": 0.0, "learning_rate": 3.188242094709637e-07, "loss": 0.9515, "step": 23560 }, { "epoch": 0.9218639956178105, "grad_norm": 0.0, "learning_rate": 3.1850685060963557e-07, "loss": 0.8704, "step": 23561 }, { "epoch": 0.9219031223100399, "grad_norm": 0.0, "learning_rate": 3.181896472209556e-07, "loss": 0.9233, "step": 23562 }, { "epoch": 0.9219422490022694, "grad_norm": 0.0, "learning_rate": 3.1787259931001737e-07, "loss": 0.9778, "step": 23563 }, { "epoch": 0.9219813756944988, "grad_norm": 0.0, "learning_rate": 3.175557068819135e-07, "loss": 0.8716, "step": 23564 }, { "epoch": 0.9220205023867283, "grad_norm": 0.0, "learning_rate": 3.172389699417311e-07, "loss": 0.8302, "step": 23565 }, { "epoch": 0.9220596290789577, "grad_norm": 0.0, "learning_rate": 3.1692238849455823e-07, "loss": 0.9579, "step": 23566 }, { "epoch": 0.922098755771187, "grad_norm": 0.0, "learning_rate": 3.1660596254547873e-07, "loss": 1.0552, "step": 23567 }, { "epoch": 0.9221378824634165, "grad_norm": 0.0, "learning_rate": 3.1628969209957394e-07, "loss": 0.8773, "step": 23568 }, { "epoch": 0.9221770091556459, "grad_norm": 0.0, "learning_rate": 3.15973577161921e-07, "loss": 1.0496, "step": 23569 }, { "epoch": 0.9222161358478754, "grad_norm": 0.0, "learning_rate": 3.156576177375992e-07, "loss": 0.8979, "step": 23570 }, { "epoch": 0.9222552625401048, "grad_norm": 0.0, "learning_rate": 3.153418138316788e-07, "loss": 1.0616, "step": 23571 }, { "epoch": 0.9222943892323343, "grad_norm": 0.0, "learning_rate": 3.150261654492348e-07, "loss": 1.029, "step": 23572 }, { "epoch": 0.9223335159245637, "grad_norm": 0.0, "learning_rate": 3.147106725953342e-07, "loss": 1.0007, "step": 23573 }, { "epoch": 0.9223726426167932, "grad_norm": 0.0, "learning_rate": 3.1439533527504396e-07, "loss": 1.0308, "step": 23574 }, { "epoch": 0.9224117693090226, "grad_norm": 0.0, "learning_rate": 3.140801534934268e-07, "loss": 1.0035, "step": 23575 }, { "epoch": 0.9224508960012521, "grad_norm": 0.0, "learning_rate": 3.1376512725554755e-07, "loss": 0.9895, "step": 23576 }, { "epoch": 0.9224900226934815, "grad_norm": 0.0, "learning_rate": 3.13450256566461e-07, "loss": 0.9444, "step": 23577 }, { "epoch": 0.922529149385711, "grad_norm": 0.0, "learning_rate": 3.1313554143122647e-07, "loss": 0.9124, "step": 23578 }, { "epoch": 0.9225682760779403, "grad_norm": 0.0, "learning_rate": 3.128209818548955e-07, "loss": 1.0414, "step": 23579 }, { "epoch": 0.9226074027701698, "grad_norm": 0.0, "learning_rate": 3.125065778425218e-07, "loss": 0.9489, "step": 23580 }, { "epoch": 0.9226465294623992, "grad_norm": 0.0, "learning_rate": 3.1219232939915244e-07, "loss": 1.0083, "step": 23581 }, { "epoch": 0.9226856561546287, "grad_norm": 0.0, "learning_rate": 3.1187823652983675e-07, "loss": 0.9956, "step": 23582 }, { "epoch": 0.9227247828468581, "grad_norm": 0.0, "learning_rate": 3.115642992396151e-07, "loss": 0.8947, "step": 23583 }, { "epoch": 0.9227639095390876, "grad_norm": 0.0, "learning_rate": 3.112505175335312e-07, "loss": 0.9594, "step": 23584 }, { "epoch": 0.922803036231317, "grad_norm": 0.0, "learning_rate": 3.109368914166222e-07, "loss": 0.9595, "step": 23585 }, { "epoch": 0.9228421629235465, "grad_norm": 0.0, "learning_rate": 3.1062342089392737e-07, "loss": 0.9887, "step": 23586 }, { "epoch": 0.9228812896157759, "grad_norm": 0.0, "learning_rate": 3.103101059704772e-07, "loss": 0.9371, "step": 23587 }, { "epoch": 0.9229204163080054, "grad_norm": 0.0, "learning_rate": 3.0999694665130643e-07, "loss": 0.9871, "step": 23588 }, { "epoch": 0.9229595430002348, "grad_norm": 0.0, "learning_rate": 3.0968394294144113e-07, "loss": 0.8835, "step": 23589 }, { "epoch": 0.9229986696924642, "grad_norm": 0.0, "learning_rate": 3.0937109484590943e-07, "loss": 0.8665, "step": 23590 }, { "epoch": 0.9230377963846936, "grad_norm": 0.0, "learning_rate": 3.090584023697352e-07, "loss": 0.8696, "step": 23591 }, { "epoch": 0.9230769230769231, "grad_norm": 0.0, "learning_rate": 3.0874586551793983e-07, "loss": 0.8752, "step": 23592 }, { "epoch": 0.9231160497691525, "grad_norm": 0.0, "learning_rate": 3.0843348429554054e-07, "loss": 0.9933, "step": 23593 }, { "epoch": 0.923155176461382, "grad_norm": 0.0, "learning_rate": 3.081212587075577e-07, "loss": 0.9239, "step": 23594 }, { "epoch": 0.9231943031536114, "grad_norm": 0.0, "learning_rate": 3.078091887590007e-07, "loss": 0.9509, "step": 23595 }, { "epoch": 0.9232334298458408, "grad_norm": 0.0, "learning_rate": 3.074972744548854e-07, "loss": 1.0051, "step": 23596 }, { "epoch": 0.9232725565380703, "grad_norm": 0.0, "learning_rate": 3.071855158002168e-07, "loss": 1.0403, "step": 23597 }, { "epoch": 0.9233116832302997, "grad_norm": 0.0, "learning_rate": 3.0687391280000313e-07, "loss": 0.9034, "step": 23598 }, { "epoch": 0.9233508099225292, "grad_norm": 0.0, "learning_rate": 3.0656246545924804e-07, "loss": 1.0571, "step": 23599 }, { "epoch": 0.9233899366147585, "grad_norm": 0.0, "learning_rate": 3.062511737829543e-07, "loss": 1.0354, "step": 23600 }, { "epoch": 0.923429063306988, "grad_norm": 0.0, "learning_rate": 3.05940037776119e-07, "loss": 0.9235, "step": 23601 }, { "epoch": 0.9234681899992174, "grad_norm": 0.0, "learning_rate": 3.0562905744373926e-07, "loss": 0.8964, "step": 23602 }, { "epoch": 0.9235073166914469, "grad_norm": 0.0, "learning_rate": 3.0531823279080995e-07, "loss": 0.9749, "step": 23603 }, { "epoch": 0.9235464433836763, "grad_norm": 0.0, "learning_rate": 3.0500756382232045e-07, "loss": 0.9388, "step": 23604 }, { "epoch": 0.9235855700759058, "grad_norm": 0.0, "learning_rate": 3.0469705054326117e-07, "loss": 0.9742, "step": 23605 }, { "epoch": 0.9236246967681352, "grad_norm": 0.0, "learning_rate": 3.043866929586192e-07, "loss": 0.9178, "step": 23606 }, { "epoch": 0.9236638234603647, "grad_norm": 0.0, "learning_rate": 3.0407649107337734e-07, "loss": 0.9548, "step": 23607 }, { "epoch": 0.9237029501525941, "grad_norm": 0.0, "learning_rate": 3.0376644489251707e-07, "loss": 0.9504, "step": 23608 }, { "epoch": 0.9237420768448236, "grad_norm": 0.0, "learning_rate": 3.034565544210177e-07, "loss": 0.8641, "step": 23609 }, { "epoch": 0.923781203537053, "grad_norm": 0.0, "learning_rate": 3.0314681966385653e-07, "loss": 0.947, "step": 23610 }, { "epoch": 0.9238203302292824, "grad_norm": 0.0, "learning_rate": 3.0283724062600496e-07, "loss": 1.0065, "step": 23611 }, { "epoch": 0.9238594569215118, "grad_norm": 0.0, "learning_rate": 3.0252781731243686e-07, "loss": 1.0175, "step": 23612 }, { "epoch": 0.9238985836137413, "grad_norm": 0.0, "learning_rate": 3.022185497281216e-07, "loss": 0.9122, "step": 23613 }, { "epoch": 0.9239377103059707, "grad_norm": 0.0, "learning_rate": 3.01909437878023e-07, "loss": 1.0042, "step": 23614 }, { "epoch": 0.9239768369982002, "grad_norm": 0.0, "learning_rate": 3.0160048176710586e-07, "loss": 1.0674, "step": 23615 }, { "epoch": 0.9240159636904296, "grad_norm": 0.0, "learning_rate": 3.0129168140033305e-07, "loss": 0.8232, "step": 23616 }, { "epoch": 0.9240550903826591, "grad_norm": 0.0, "learning_rate": 3.0098303678266383e-07, "loss": 0.8919, "step": 23617 }, { "epoch": 0.9240942170748885, "grad_norm": 0.0, "learning_rate": 3.0067454791905206e-07, "loss": 0.9834, "step": 23618 }, { "epoch": 0.924133343767118, "grad_norm": 0.0, "learning_rate": 3.003662148144537e-07, "loss": 1.0004, "step": 23619 }, { "epoch": 0.9241724704593474, "grad_norm": 0.0, "learning_rate": 3.0005803747381824e-07, "loss": 0.9407, "step": 23620 }, { "epoch": 0.9242115971515769, "grad_norm": 0.0, "learning_rate": 2.997500159020983e-07, "loss": 0.7923, "step": 23621 }, { "epoch": 0.9242507238438062, "grad_norm": 0.0, "learning_rate": 2.994421501042355e-07, "loss": 0.9886, "step": 23622 }, { "epoch": 0.9242898505360356, "grad_norm": 0.0, "learning_rate": 2.9913444008517924e-07, "loss": 1.0038, "step": 23623 }, { "epoch": 0.9243289772282651, "grad_norm": 0.0, "learning_rate": 2.9882688584986554e-07, "loss": 0.9429, "step": 23624 }, { "epoch": 0.9243681039204945, "grad_norm": 0.0, "learning_rate": 2.98519487403236e-07, "loss": 0.9324, "step": 23625 }, { "epoch": 0.924407230612724, "grad_norm": 0.0, "learning_rate": 2.982122447502278e-07, "loss": 0.9664, "step": 23626 }, { "epoch": 0.9244463573049534, "grad_norm": 0.0, "learning_rate": 2.979051578957748e-07, "loss": 0.9042, "step": 23627 }, { "epoch": 0.9244854839971829, "grad_norm": 0.0, "learning_rate": 2.9759822684480524e-07, "loss": 0.9601, "step": 23628 }, { "epoch": 0.9245246106894123, "grad_norm": 0.0, "learning_rate": 2.972914516022518e-07, "loss": 0.8759, "step": 23629 }, { "epoch": 0.9245637373816418, "grad_norm": 0.0, "learning_rate": 2.969848321730384e-07, "loss": 0.9886, "step": 23630 }, { "epoch": 0.9246028640738712, "grad_norm": 0.0, "learning_rate": 2.966783685620922e-07, "loss": 0.9261, "step": 23631 }, { "epoch": 0.9246419907661007, "grad_norm": 0.0, "learning_rate": 2.963720607743303e-07, "loss": 1.0531, "step": 23632 }, { "epoch": 0.92468111745833, "grad_norm": 0.0, "learning_rate": 2.9606590881467445e-07, "loss": 0.8252, "step": 23633 }, { "epoch": 0.9247202441505595, "grad_norm": 0.0, "learning_rate": 2.957599126880395e-07, "loss": 0.8195, "step": 23634 }, { "epoch": 0.9247593708427889, "grad_norm": 0.0, "learning_rate": 2.9545407239934265e-07, "loss": 0.8981, "step": 23635 }, { "epoch": 0.9247984975350184, "grad_norm": 0.0, "learning_rate": 2.951483879534911e-07, "loss": 0.8924, "step": 23636 }, { "epoch": 0.9248376242272478, "grad_norm": 0.0, "learning_rate": 2.9484285935539645e-07, "loss": 0.8464, "step": 23637 }, { "epoch": 0.9248767509194773, "grad_norm": 0.0, "learning_rate": 2.9453748660996264e-07, "loss": 0.9498, "step": 23638 }, { "epoch": 0.9249158776117067, "grad_norm": 0.0, "learning_rate": 2.9423226972209673e-07, "loss": 0.8667, "step": 23639 }, { "epoch": 0.9249550043039362, "grad_norm": 0.0, "learning_rate": 2.9392720869669823e-07, "loss": 1.0489, "step": 23640 }, { "epoch": 0.9249941309961656, "grad_norm": 0.0, "learning_rate": 2.936223035386665e-07, "loss": 1.0217, "step": 23641 }, { "epoch": 0.9250332576883951, "grad_norm": 0.0, "learning_rate": 2.933175542528977e-07, "loss": 0.9865, "step": 23642 }, { "epoch": 0.9250723843806244, "grad_norm": 0.0, "learning_rate": 2.9301296084428553e-07, "loss": 0.9487, "step": 23643 }, { "epoch": 0.9251115110728539, "grad_norm": 0.0, "learning_rate": 2.927085233177218e-07, "loss": 1.0258, "step": 23644 }, { "epoch": 0.9251506377650833, "grad_norm": 0.0, "learning_rate": 2.924042416780959e-07, "loss": 0.9954, "step": 23645 }, { "epoch": 0.9251897644573128, "grad_norm": 0.0, "learning_rate": 2.9210011593029276e-07, "loss": 1.0942, "step": 23646 }, { "epoch": 0.9252288911495422, "grad_norm": 0.0, "learning_rate": 2.9179614607919737e-07, "loss": 0.9659, "step": 23647 }, { "epoch": 0.9252680178417717, "grad_norm": 0.0, "learning_rate": 2.914923321296903e-07, "loss": 1.0544, "step": 23648 }, { "epoch": 0.9253071445340011, "grad_norm": 0.0, "learning_rate": 2.9118867408665206e-07, "loss": 0.9491, "step": 23649 }, { "epoch": 0.9253462712262306, "grad_norm": 0.0, "learning_rate": 2.908851719549566e-07, "loss": 0.9205, "step": 23650 }, { "epoch": 0.92538539791846, "grad_norm": 0.0, "learning_rate": 2.905818257394799e-07, "loss": 0.9175, "step": 23651 }, { "epoch": 0.9254245246106894, "grad_norm": 0.0, "learning_rate": 2.9027863544509263e-07, "loss": 1.0089, "step": 23652 }, { "epoch": 0.9254636513029189, "grad_norm": 0.0, "learning_rate": 2.8997560107666303e-07, "loss": 0.9325, "step": 23653 }, { "epoch": 0.9255027779951482, "grad_norm": 0.0, "learning_rate": 2.896727226390572e-07, "loss": 0.8094, "step": 23654 }, { "epoch": 0.9255419046873777, "grad_norm": 0.0, "learning_rate": 2.8937000013714247e-07, "loss": 0.8768, "step": 23655 }, { "epoch": 0.9255810313796071, "grad_norm": 0.0, "learning_rate": 2.890674335757748e-07, "loss": 0.9825, "step": 23656 }, { "epoch": 0.9256201580718366, "grad_norm": 0.0, "learning_rate": 2.88765022959816e-07, "loss": 0.8681, "step": 23657 }, { "epoch": 0.925659284764066, "grad_norm": 0.0, "learning_rate": 2.884627682941232e-07, "loss": 0.9251, "step": 23658 }, { "epoch": 0.9256984114562955, "grad_norm": 0.0, "learning_rate": 2.8816066958354815e-07, "loss": 0.9507, "step": 23659 }, { "epoch": 0.9257375381485249, "grad_norm": 0.0, "learning_rate": 2.878587268329436e-07, "loss": 0.8719, "step": 23660 }, { "epoch": 0.9257766648407544, "grad_norm": 0.0, "learning_rate": 2.8755694004715674e-07, "loss": 0.9123, "step": 23661 }, { "epoch": 0.9258157915329838, "grad_norm": 0.0, "learning_rate": 2.8725530923103705e-07, "loss": 1.0323, "step": 23662 }, { "epoch": 0.9258549182252133, "grad_norm": 0.0, "learning_rate": 2.8695383438942514e-07, "loss": 0.9443, "step": 23663 }, { "epoch": 0.9258940449174426, "grad_norm": 0.0, "learning_rate": 2.866525155271649e-07, "loss": 0.9949, "step": 23664 }, { "epoch": 0.9259331716096721, "grad_norm": 0.0, "learning_rate": 2.863513526490902e-07, "loss": 1.0112, "step": 23665 }, { "epoch": 0.9259722983019015, "grad_norm": 0.0, "learning_rate": 2.8605034576004497e-07, "loss": 1.0184, "step": 23666 }, { "epoch": 0.926011424994131, "grad_norm": 0.0, "learning_rate": 2.8574949486485647e-07, "loss": 0.9799, "step": 23667 }, { "epoch": 0.9260505516863604, "grad_norm": 0.0, "learning_rate": 2.854487999683597e-07, "loss": 0.8994, "step": 23668 }, { "epoch": 0.9260896783785899, "grad_norm": 0.0, "learning_rate": 2.851482610753808e-07, "loss": 1.0201, "step": 23669 }, { "epoch": 0.9261288050708193, "grad_norm": 0.0, "learning_rate": 2.848478781907493e-07, "loss": 1.083, "step": 23670 }, { "epoch": 0.9261679317630488, "grad_norm": 0.0, "learning_rate": 2.8454765131928573e-07, "loss": 0.9821, "step": 23671 }, { "epoch": 0.9262070584552782, "grad_norm": 0.0, "learning_rate": 2.8424758046581405e-07, "loss": 0.8691, "step": 23672 }, { "epoch": 0.9262461851475077, "grad_norm": 0.0, "learning_rate": 2.839476656351503e-07, "loss": 0.9538, "step": 23673 }, { "epoch": 0.9262853118397371, "grad_norm": 0.0, "learning_rate": 2.8364790683211183e-07, "loss": 1.0284, "step": 23674 }, { "epoch": 0.9263244385319666, "grad_norm": 0.0, "learning_rate": 2.833483040615137e-07, "loss": 1.0294, "step": 23675 }, { "epoch": 0.9263635652241959, "grad_norm": 0.0, "learning_rate": 2.8304885732816647e-07, "loss": 0.8692, "step": 23676 }, { "epoch": 0.9264026919164254, "grad_norm": 0.0, "learning_rate": 2.827495666368774e-07, "loss": 0.9859, "step": 23677 }, { "epoch": 0.9264418186086548, "grad_norm": 0.0, "learning_rate": 2.824504319924548e-07, "loss": 1.0228, "step": 23678 }, { "epoch": 0.9264809453008843, "grad_norm": 0.0, "learning_rate": 2.821514533997005e-07, "loss": 0.977, "step": 23679 }, { "epoch": 0.9265200719931137, "grad_norm": 0.0, "learning_rate": 2.818526308634184e-07, "loss": 0.8599, "step": 23680 }, { "epoch": 0.9265591986853431, "grad_norm": 0.0, "learning_rate": 2.8155396438840464e-07, "loss": 0.9557, "step": 23681 }, { "epoch": 0.9265983253775726, "grad_norm": 0.0, "learning_rate": 2.8125545397945653e-07, "loss": 0.982, "step": 23682 }, { "epoch": 0.926637452069802, "grad_norm": 0.0, "learning_rate": 2.8095709964136573e-07, "loss": 0.9059, "step": 23683 }, { "epoch": 0.9266765787620315, "grad_norm": 0.0, "learning_rate": 2.806589013789274e-07, "loss": 1.0772, "step": 23684 }, { "epoch": 0.9267157054542609, "grad_norm": 0.0, "learning_rate": 2.803608591969276e-07, "loss": 0.9087, "step": 23685 }, { "epoch": 0.9267548321464903, "grad_norm": 0.0, "learning_rate": 2.800629731001536e-07, "loss": 0.9354, "step": 23686 }, { "epoch": 0.9267939588387197, "grad_norm": 0.0, "learning_rate": 2.7976524309338726e-07, "loss": 1.048, "step": 23687 }, { "epoch": 0.9268330855309492, "grad_norm": 0.0, "learning_rate": 2.7946766918141134e-07, "loss": 1.0855, "step": 23688 }, { "epoch": 0.9268722122231786, "grad_norm": 0.0, "learning_rate": 2.7917025136900423e-07, "loss": 0.8244, "step": 23689 }, { "epoch": 0.9269113389154081, "grad_norm": 0.0, "learning_rate": 2.7887298966094325e-07, "loss": 1.0112, "step": 23690 }, { "epoch": 0.9269504656076375, "grad_norm": 0.0, "learning_rate": 2.78575884061999e-07, "loss": 0.9871, "step": 23691 }, { "epoch": 0.926989592299867, "grad_norm": 0.0, "learning_rate": 2.7827893457694545e-07, "loss": 0.9203, "step": 23692 }, { "epoch": 0.9270287189920964, "grad_norm": 0.0, "learning_rate": 2.7798214121054877e-07, "loss": 0.9886, "step": 23693 }, { "epoch": 0.9270678456843259, "grad_norm": 0.0, "learning_rate": 2.7768550396757853e-07, "loss": 0.9798, "step": 23694 }, { "epoch": 0.9271069723765553, "grad_norm": 0.0, "learning_rate": 2.7738902285279534e-07, "loss": 0.9566, "step": 23695 }, { "epoch": 0.9271460990687848, "grad_norm": 0.0, "learning_rate": 2.7709269787096093e-07, "loss": 0.9229, "step": 23696 }, { "epoch": 0.9271852257610141, "grad_norm": 0.0, "learning_rate": 2.7679652902683596e-07, "loss": 0.8352, "step": 23697 }, { "epoch": 0.9272243524532436, "grad_norm": 0.0, "learning_rate": 2.765005163251733e-07, "loss": 0.9904, "step": 23698 }, { "epoch": 0.927263479145473, "grad_norm": 0.0, "learning_rate": 2.7620465977072794e-07, "loss": 0.972, "step": 23699 }, { "epoch": 0.9273026058377025, "grad_norm": 0.0, "learning_rate": 2.759089593682518e-07, "loss": 1.1124, "step": 23700 }, { "epoch": 0.9273417325299319, "grad_norm": 0.0, "learning_rate": 2.7561341512249316e-07, "loss": 0.9416, "step": 23701 }, { "epoch": 0.9273808592221614, "grad_norm": 0.0, "learning_rate": 2.753180270381972e-07, "loss": 0.9547, "step": 23702 }, { "epoch": 0.9274199859143908, "grad_norm": 0.0, "learning_rate": 2.7502279512010897e-07, "loss": 0.8436, "step": 23703 }, { "epoch": 0.9274591126066203, "grad_norm": 0.0, "learning_rate": 2.7472771937296805e-07, "loss": 0.9467, "step": 23704 }, { "epoch": 0.9274982392988497, "grad_norm": 0.0, "learning_rate": 2.744327998015128e-07, "loss": 0.7803, "step": 23705 }, { "epoch": 0.9275373659910792, "grad_norm": 0.0, "learning_rate": 2.7413803641047954e-07, "loss": 0.9747, "step": 23706 }, { "epoch": 0.9275764926833086, "grad_norm": 0.0, "learning_rate": 2.738434292046044e-07, "loss": 0.9894, "step": 23707 }, { "epoch": 0.927615619375538, "grad_norm": 0.0, "learning_rate": 2.735489781886147e-07, "loss": 1.0663, "step": 23708 }, { "epoch": 0.9276547460677674, "grad_norm": 0.0, "learning_rate": 2.7325468336724e-07, "loss": 0.9388, "step": 23709 }, { "epoch": 0.9276938727599968, "grad_norm": 0.0, "learning_rate": 2.729605447452077e-07, "loss": 0.9675, "step": 23710 }, { "epoch": 0.9277329994522263, "grad_norm": 0.0, "learning_rate": 2.7266656232724063e-07, "loss": 1.0353, "step": 23711 }, { "epoch": 0.9277721261444557, "grad_norm": 0.0, "learning_rate": 2.7237273611805836e-07, "loss": 1.0219, "step": 23712 }, { "epoch": 0.9278112528366852, "grad_norm": 0.0, "learning_rate": 2.7207906612238264e-07, "loss": 0.8874, "step": 23713 }, { "epoch": 0.9278503795289146, "grad_norm": 0.0, "learning_rate": 2.717855523449242e-07, "loss": 0.9559, "step": 23714 }, { "epoch": 0.9278895062211441, "grad_norm": 0.0, "learning_rate": 2.7149219479040257e-07, "loss": 1.0215, "step": 23715 }, { "epoch": 0.9279286329133735, "grad_norm": 0.0, "learning_rate": 2.7119899346352395e-07, "loss": 0.8989, "step": 23716 }, { "epoch": 0.927967759605603, "grad_norm": 0.0, "learning_rate": 2.709059483690002e-07, "loss": 0.949, "step": 23717 }, { "epoch": 0.9280068862978323, "grad_norm": 0.0, "learning_rate": 2.7061305951153415e-07, "loss": 0.9102, "step": 23718 }, { "epoch": 0.9280460129900618, "grad_norm": 0.0, "learning_rate": 2.703203268958321e-07, "loss": 1.0089, "step": 23719 }, { "epoch": 0.9280851396822912, "grad_norm": 0.0, "learning_rate": 2.700277505265925e-07, "loss": 0.9702, "step": 23720 }, { "epoch": 0.9281242663745207, "grad_norm": 0.0, "learning_rate": 2.69735330408516e-07, "loss": 1.0567, "step": 23721 }, { "epoch": 0.9281633930667501, "grad_norm": 0.0, "learning_rate": 2.694430665462966e-07, "loss": 0.9093, "step": 23722 }, { "epoch": 0.9282025197589796, "grad_norm": 0.0, "learning_rate": 2.691509589446284e-07, "loss": 0.9588, "step": 23723 }, { "epoch": 0.928241646451209, "grad_norm": 0.0, "learning_rate": 2.6885900760820204e-07, "loss": 0.8401, "step": 23724 }, { "epoch": 0.9282807731434385, "grad_norm": 0.0, "learning_rate": 2.6856721254170714e-07, "loss": 0.932, "step": 23725 }, { "epoch": 0.9283198998356679, "grad_norm": 0.0, "learning_rate": 2.682755737498277e-07, "loss": 0.9313, "step": 23726 }, { "epoch": 0.9283590265278974, "grad_norm": 0.0, "learning_rate": 2.679840912372489e-07, "loss": 0.9505, "step": 23727 }, { "epoch": 0.9283981532201268, "grad_norm": 0.0, "learning_rate": 2.676927650086503e-07, "loss": 0.9467, "step": 23728 }, { "epoch": 0.9284372799123563, "grad_norm": 0.0, "learning_rate": 2.6740159506871146e-07, "loss": 0.9495, "step": 23729 }, { "epoch": 0.9284764066045856, "grad_norm": 0.0, "learning_rate": 2.6711058142210643e-07, "loss": 0.8905, "step": 23730 }, { "epoch": 0.9285155332968151, "grad_norm": 0.0, "learning_rate": 2.668197240735104e-07, "loss": 0.8708, "step": 23731 }, { "epoch": 0.9285546599890445, "grad_norm": 0.0, "learning_rate": 2.6652902302759077e-07, "loss": 1.0479, "step": 23732 }, { "epoch": 0.928593786681274, "grad_norm": 0.0, "learning_rate": 2.6623847828902037e-07, "loss": 0.797, "step": 23733 }, { "epoch": 0.9286329133735034, "grad_norm": 0.0, "learning_rate": 2.659480898624622e-07, "loss": 1.0309, "step": 23734 }, { "epoch": 0.9286720400657329, "grad_norm": 0.0, "learning_rate": 2.6565785775258143e-07, "loss": 0.9853, "step": 23735 }, { "epoch": 0.9287111667579623, "grad_norm": 0.0, "learning_rate": 2.6536778196403657e-07, "loss": 0.9418, "step": 23736 }, { "epoch": 0.9287502934501917, "grad_norm": 0.0, "learning_rate": 2.6507786250148714e-07, "loss": 0.908, "step": 23737 }, { "epoch": 0.9287894201424212, "grad_norm": 0.0, "learning_rate": 2.647880993695884e-07, "loss": 1.0809, "step": 23738 }, { "epoch": 0.9288285468346505, "grad_norm": 0.0, "learning_rate": 2.6449849257299545e-07, "loss": 0.8864, "step": 23739 }, { "epoch": 0.92886767352688, "grad_norm": 0.0, "learning_rate": 2.6420904211635567e-07, "loss": 0.9758, "step": 23740 }, { "epoch": 0.9289068002191094, "grad_norm": 0.0, "learning_rate": 2.6391974800431985e-07, "loss": 0.8785, "step": 23741 }, { "epoch": 0.9289459269113389, "grad_norm": 0.0, "learning_rate": 2.636306102415331e-07, "loss": 0.9796, "step": 23742 }, { "epoch": 0.9289850536035683, "grad_norm": 0.0, "learning_rate": 2.6334162883263736e-07, "loss": 0.9361, "step": 23743 }, { "epoch": 0.9290241802957978, "grad_norm": 0.0, "learning_rate": 2.630528037822755e-07, "loss": 0.8847, "step": 23744 }, { "epoch": 0.9290633069880272, "grad_norm": 0.0, "learning_rate": 2.627641350950838e-07, "loss": 0.9603, "step": 23745 }, { "epoch": 0.9291024336802567, "grad_norm": 0.0, "learning_rate": 2.624756227756986e-07, "loss": 0.9628, "step": 23746 }, { "epoch": 0.9291415603724861, "grad_norm": 0.0, "learning_rate": 2.6218726682875395e-07, "loss": 0.9074, "step": 23747 }, { "epoch": 0.9291806870647156, "grad_norm": 0.0, "learning_rate": 2.6189906725887946e-07, "loss": 0.9397, "step": 23748 }, { "epoch": 0.929219813756945, "grad_norm": 0.0, "learning_rate": 2.616110240707015e-07, "loss": 0.9553, "step": 23749 }, { "epoch": 0.9292589404491745, "grad_norm": 0.0, "learning_rate": 2.613231372688496e-07, "loss": 1.0391, "step": 23750 }, { "epoch": 0.9292980671414038, "grad_norm": 0.0, "learning_rate": 2.610354068579446e-07, "loss": 0.8771, "step": 23751 }, { "epoch": 0.9293371938336333, "grad_norm": 0.0, "learning_rate": 2.6074783284260717e-07, "loss": 1.0714, "step": 23752 }, { "epoch": 0.9293763205258627, "grad_norm": 0.0, "learning_rate": 2.604604152274559e-07, "loss": 0.8849, "step": 23753 }, { "epoch": 0.9294154472180922, "grad_norm": 0.0, "learning_rate": 2.6017315401710486e-07, "loss": 0.963, "step": 23754 }, { "epoch": 0.9294545739103216, "grad_norm": 0.0, "learning_rate": 2.598860492161692e-07, "loss": 0.9979, "step": 23755 }, { "epoch": 0.9294937006025511, "grad_norm": 0.0, "learning_rate": 2.5959910082925975e-07, "loss": 0.9246, "step": 23756 }, { "epoch": 0.9295328272947805, "grad_norm": 0.0, "learning_rate": 2.593123088609817e-07, "loss": 0.9852, "step": 23757 }, { "epoch": 0.92957195398701, "grad_norm": 0.0, "learning_rate": 2.590256733159413e-07, "loss": 0.9777, "step": 23758 }, { "epoch": 0.9296110806792394, "grad_norm": 0.0, "learning_rate": 2.5873919419874383e-07, "loss": 0.9919, "step": 23759 }, { "epoch": 0.9296502073714689, "grad_norm": 0.0, "learning_rate": 2.584528715139889e-07, "loss": 0.8158, "step": 23760 }, { "epoch": 0.9296893340636982, "grad_norm": 0.0, "learning_rate": 2.5816670526627285e-07, "loss": 0.9167, "step": 23761 }, { "epoch": 0.9297284607559277, "grad_norm": 0.0, "learning_rate": 2.5788069546019423e-07, "loss": 0.8373, "step": 23762 }, { "epoch": 0.9297675874481571, "grad_norm": 0.0, "learning_rate": 2.575948421003416e-07, "loss": 0.9082, "step": 23763 }, { "epoch": 0.9298067141403866, "grad_norm": 0.0, "learning_rate": 2.57309145191309e-07, "loss": 0.964, "step": 23764 }, { "epoch": 0.929845840832616, "grad_norm": 0.0, "learning_rate": 2.5702360473768396e-07, "loss": 1.0624, "step": 23765 }, { "epoch": 0.9298849675248454, "grad_norm": 0.0, "learning_rate": 2.5673822074405053e-07, "loss": 0.9474, "step": 23766 }, { "epoch": 0.9299240942170749, "grad_norm": 0.0, "learning_rate": 2.564529932149906e-07, "loss": 0.9472, "step": 23767 }, { "epoch": 0.9299632209093043, "grad_norm": 0.0, "learning_rate": 2.5616792215508943e-07, "loss": 1.0247, "step": 23768 }, { "epoch": 0.9300023476015338, "grad_norm": 0.0, "learning_rate": 2.5588300756892004e-07, "loss": 0.9884, "step": 23769 }, { "epoch": 0.9300414742937632, "grad_norm": 0.0, "learning_rate": 2.555982494610598e-07, "loss": 1.0797, "step": 23770 }, { "epoch": 0.9300806009859927, "grad_norm": 0.0, "learning_rate": 2.5531364783608073e-07, "loss": 0.9992, "step": 23771 }, { "epoch": 0.930119727678222, "grad_norm": 0.0, "learning_rate": 2.5502920269855346e-07, "loss": 0.8183, "step": 23772 }, { "epoch": 0.9301588543704515, "grad_norm": 0.0, "learning_rate": 2.547449140530467e-07, "loss": 0.86, "step": 23773 }, { "epoch": 0.9301979810626809, "grad_norm": 0.0, "learning_rate": 2.544607819041256e-07, "loss": 0.8992, "step": 23774 }, { "epoch": 0.9302371077549104, "grad_norm": 0.0, "learning_rate": 2.541768062563521e-07, "loss": 1.0599, "step": 23775 }, { "epoch": 0.9302762344471398, "grad_norm": 0.0, "learning_rate": 2.53892987114287e-07, "loss": 0.7633, "step": 23776 }, { "epoch": 0.9303153611393693, "grad_norm": 0.0, "learning_rate": 2.5360932448248663e-07, "loss": 0.9028, "step": 23777 }, { "epoch": 0.9303544878315987, "grad_norm": 0.0, "learning_rate": 2.533258183655096e-07, "loss": 0.9398, "step": 23778 }, { "epoch": 0.9303936145238282, "grad_norm": 0.0, "learning_rate": 2.530424687679056e-07, "loss": 0.8362, "step": 23779 }, { "epoch": 0.9304327412160576, "grad_norm": 0.0, "learning_rate": 2.527592756942265e-07, "loss": 0.9834, "step": 23780 }, { "epoch": 0.9304718679082871, "grad_norm": 0.0, "learning_rate": 2.5247623914901984e-07, "loss": 0.9341, "step": 23781 }, { "epoch": 0.9305109946005165, "grad_norm": 0.0, "learning_rate": 2.5219335913682864e-07, "loss": 0.9734, "step": 23782 }, { "epoch": 0.930550121292746, "grad_norm": 0.0, "learning_rate": 2.519106356621981e-07, "loss": 1.0081, "step": 23783 }, { "epoch": 0.9305892479849753, "grad_norm": 0.0, "learning_rate": 2.5162806872966903e-07, "loss": 0.9023, "step": 23784 }, { "epoch": 0.9306283746772048, "grad_norm": 0.0, "learning_rate": 2.513456583437768e-07, "loss": 0.9545, "step": 23785 }, { "epoch": 0.9306675013694342, "grad_norm": 0.0, "learning_rate": 2.5106340450905763e-07, "loss": 0.9478, "step": 23786 }, { "epoch": 0.9307066280616637, "grad_norm": 0.0, "learning_rate": 2.507813072300447e-07, "loss": 1.0941, "step": 23787 }, { "epoch": 0.9307457547538931, "grad_norm": 0.0, "learning_rate": 2.504993665112665e-07, "loss": 0.876, "step": 23788 }, { "epoch": 0.9307848814461226, "grad_norm": 0.0, "learning_rate": 2.502175823572517e-07, "loss": 0.9959, "step": 23789 }, { "epoch": 0.930824008138352, "grad_norm": 0.0, "learning_rate": 2.499359547725255e-07, "loss": 0.9039, "step": 23790 }, { "epoch": 0.9308631348305815, "grad_norm": 0.0, "learning_rate": 2.496544837616111e-07, "loss": 1.0159, "step": 23791 }, { "epoch": 0.9309022615228109, "grad_norm": 0.0, "learning_rate": 2.493731693290269e-07, "loss": 1.0103, "step": 23792 }, { "epoch": 0.9309413882150404, "grad_norm": 0.0, "learning_rate": 2.4909201147929275e-07, "loss": 0.8198, "step": 23793 }, { "epoch": 0.9309805149072697, "grad_norm": 0.0, "learning_rate": 2.4881101021691945e-07, "loss": 0.9025, "step": 23794 }, { "epoch": 0.9310196415994991, "grad_norm": 0.0, "learning_rate": 2.485301655464256e-07, "loss": 0.97, "step": 23795 }, { "epoch": 0.9310587682917286, "grad_norm": 0.0, "learning_rate": 2.4824947747231544e-07, "loss": 0.8935, "step": 23796 }, { "epoch": 0.931097894983958, "grad_norm": 0.0, "learning_rate": 2.4796894599910194e-07, "loss": 1.0655, "step": 23797 }, { "epoch": 0.9311370216761875, "grad_norm": 0.0, "learning_rate": 2.476885711312849e-07, "loss": 0.9744, "step": 23798 }, { "epoch": 0.9311761483684169, "grad_norm": 0.0, "learning_rate": 2.474083528733695e-07, "loss": 0.9644, "step": 23799 }, { "epoch": 0.9312152750606464, "grad_norm": 0.0, "learning_rate": 2.4712829122985447e-07, "loss": 0.9426, "step": 23800 }, { "epoch": 0.9312544017528758, "grad_norm": 0.0, "learning_rate": 2.468483862052407e-07, "loss": 0.9902, "step": 23801 }, { "epoch": 0.9312935284451053, "grad_norm": 0.0, "learning_rate": 2.4656863780401775e-07, "loss": 1.0438, "step": 23802 }, { "epoch": 0.9313326551373347, "grad_norm": 0.0, "learning_rate": 2.462890460306822e-07, "loss": 1.0042, "step": 23803 }, { "epoch": 0.9313717818295641, "grad_norm": 0.0, "learning_rate": 2.4600961088972143e-07, "loss": 1.0106, "step": 23804 }, { "epoch": 0.9314109085217935, "grad_norm": 0.0, "learning_rate": 2.4573033238562525e-07, "loss": 0.976, "step": 23805 }, { "epoch": 0.931450035214023, "grad_norm": 0.0, "learning_rate": 2.454512105228768e-07, "loss": 0.9888, "step": 23806 }, { "epoch": 0.9314891619062524, "grad_norm": 0.0, "learning_rate": 2.4517224530595794e-07, "loss": 0.9837, "step": 23807 }, { "epoch": 0.9315282885984819, "grad_norm": 0.0, "learning_rate": 2.4489343673934964e-07, "loss": 0.9161, "step": 23808 }, { "epoch": 0.9315674152907113, "grad_norm": 0.0, "learning_rate": 2.4461478482752933e-07, "loss": 0.9099, "step": 23809 }, { "epoch": 0.9316065419829408, "grad_norm": 0.0, "learning_rate": 2.443362895749712e-07, "loss": 0.9585, "step": 23810 }, { "epoch": 0.9316456686751702, "grad_norm": 0.0, "learning_rate": 2.440579509861485e-07, "loss": 0.9683, "step": 23811 }, { "epoch": 0.9316847953673997, "grad_norm": 0.0, "learning_rate": 2.4377976906552746e-07, "loss": 0.8962, "step": 23812 }, { "epoch": 0.9317239220596291, "grad_norm": 0.0, "learning_rate": 2.435017438175813e-07, "loss": 0.9619, "step": 23813 }, { "epoch": 0.9317630487518586, "grad_norm": 0.0, "learning_rate": 2.432238752467697e-07, "loss": 1.0159, "step": 23814 }, { "epoch": 0.9318021754440879, "grad_norm": 0.0, "learning_rate": 2.4294616335755806e-07, "loss": 1.0676, "step": 23815 }, { "epoch": 0.9318413021363174, "grad_norm": 0.0, "learning_rate": 2.4266860815440275e-07, "loss": 0.84, "step": 23816 }, { "epoch": 0.9318804288285468, "grad_norm": 0.0, "learning_rate": 2.4239120964176577e-07, "loss": 1.0385, "step": 23817 }, { "epoch": 0.9319195555207763, "grad_norm": 0.0, "learning_rate": 2.421139678240969e-07, "loss": 1.0818, "step": 23818 }, { "epoch": 0.9319586822130057, "grad_norm": 0.0, "learning_rate": 2.4183688270585147e-07, "loss": 0.9781, "step": 23819 }, { "epoch": 0.9319978089052352, "grad_norm": 0.0, "learning_rate": 2.4155995429147705e-07, "loss": 1.0082, "step": 23820 }, { "epoch": 0.9320369355974646, "grad_norm": 0.0, "learning_rate": 2.4128318258542227e-07, "loss": 0.931, "step": 23821 }, { "epoch": 0.932076062289694, "grad_norm": 0.0, "learning_rate": 2.4100656759213135e-07, "loss": 0.849, "step": 23822 }, { "epoch": 0.9321151889819235, "grad_norm": 0.0, "learning_rate": 2.4073010931604635e-07, "loss": 0.9645, "step": 23823 }, { "epoch": 0.9321543156741529, "grad_norm": 0.0, "learning_rate": 2.4045380776160587e-07, "loss": 0.909, "step": 23824 }, { "epoch": 0.9321934423663824, "grad_norm": 0.0, "learning_rate": 2.4017766293324863e-07, "loss": 0.9326, "step": 23825 }, { "epoch": 0.9322325690586117, "grad_norm": 0.0, "learning_rate": 2.399016748354088e-07, "loss": 0.9088, "step": 23826 }, { "epoch": 0.9322716957508412, "grad_norm": 0.0, "learning_rate": 2.3962584347251737e-07, "loss": 0.9024, "step": 23827 }, { "epoch": 0.9323108224430706, "grad_norm": 0.0, "learning_rate": 2.393501688490041e-07, "loss": 1.0126, "step": 23828 }, { "epoch": 0.9323499491353001, "grad_norm": 0.0, "learning_rate": 2.390746509692976e-07, "loss": 0.8611, "step": 23829 }, { "epoch": 0.9323890758275295, "grad_norm": 0.0, "learning_rate": 2.3879928983782e-07, "loss": 0.8925, "step": 23830 }, { "epoch": 0.932428202519759, "grad_norm": 0.0, "learning_rate": 2.385240854589943e-07, "loss": 0.9108, "step": 23831 }, { "epoch": 0.9324673292119884, "grad_norm": 0.0, "learning_rate": 2.382490378372404e-07, "loss": 0.9065, "step": 23832 }, { "epoch": 0.9325064559042179, "grad_norm": 0.0, "learning_rate": 2.3797414697697475e-07, "loss": 1.0207, "step": 23833 }, { "epoch": 0.9325455825964473, "grad_norm": 0.0, "learning_rate": 2.3769941288261156e-07, "loss": 0.959, "step": 23834 }, { "epoch": 0.9325847092886768, "grad_norm": 0.0, "learning_rate": 2.374248355585629e-07, "loss": 0.9266, "step": 23835 }, { "epoch": 0.9326238359809061, "grad_norm": 0.0, "learning_rate": 2.3715041500923853e-07, "loss": 0.8086, "step": 23836 }, { "epoch": 0.9326629626731356, "grad_norm": 0.0, "learning_rate": 2.3687615123904495e-07, "loss": 1.0641, "step": 23837 }, { "epoch": 0.932702089365365, "grad_norm": 0.0, "learning_rate": 2.3660204425238754e-07, "loss": 0.9399, "step": 23838 }, { "epoch": 0.9327412160575945, "grad_norm": 0.0, "learning_rate": 2.3632809405366387e-07, "loss": 0.9348, "step": 23839 }, { "epoch": 0.9327803427498239, "grad_norm": 0.0, "learning_rate": 2.360543006472793e-07, "loss": 0.9001, "step": 23840 }, { "epoch": 0.9328194694420534, "grad_norm": 0.0, "learning_rate": 2.3578066403762701e-07, "loss": 0.9695, "step": 23841 }, { "epoch": 0.9328585961342828, "grad_norm": 0.0, "learning_rate": 2.3550718422910235e-07, "loss": 0.9891, "step": 23842 }, { "epoch": 0.9328977228265123, "grad_norm": 0.0, "learning_rate": 2.3523386122609514e-07, "loss": 0.9372, "step": 23843 }, { "epoch": 0.9329368495187417, "grad_norm": 0.0, "learning_rate": 2.3496069503299745e-07, "loss": 1.0427, "step": 23844 }, { "epoch": 0.9329759762109712, "grad_norm": 0.0, "learning_rate": 2.3468768565419463e-07, "loss": 0.9077, "step": 23845 }, { "epoch": 0.9330151029032006, "grad_norm": 0.0, "learning_rate": 2.344148330940721e-07, "loss": 0.8845, "step": 23846 }, { "epoch": 0.93305422959543, "grad_norm": 0.0, "learning_rate": 2.3414213735700853e-07, "loss": 0.9665, "step": 23847 }, { "epoch": 0.9330933562876594, "grad_norm": 0.0, "learning_rate": 2.3386959844738598e-07, "loss": 0.9044, "step": 23848 }, { "epoch": 0.9331324829798889, "grad_norm": 0.0, "learning_rate": 2.3359721636957878e-07, "loss": 0.9747, "step": 23849 }, { "epoch": 0.9331716096721183, "grad_norm": 0.0, "learning_rate": 2.3332499112796447e-07, "loss": 0.8276, "step": 23850 }, { "epoch": 0.9332107363643477, "grad_norm": 0.0, "learning_rate": 2.330529227269107e-07, "loss": 1.0239, "step": 23851 }, { "epoch": 0.9332498630565772, "grad_norm": 0.0, "learning_rate": 2.3278101117078843e-07, "loss": 0.9498, "step": 23852 }, { "epoch": 0.9332889897488066, "grad_norm": 0.0, "learning_rate": 2.3250925646396416e-07, "loss": 0.9126, "step": 23853 }, { "epoch": 0.9333281164410361, "grad_norm": 0.0, "learning_rate": 2.3223765861080218e-07, "loss": 0.8869, "step": 23854 }, { "epoch": 0.9333672431332655, "grad_norm": 0.0, "learning_rate": 2.3196621761566452e-07, "loss": 0.8913, "step": 23855 }, { "epoch": 0.933406369825495, "grad_norm": 0.0, "learning_rate": 2.3169493348290884e-07, "loss": 0.8504, "step": 23856 }, { "epoch": 0.9334454965177243, "grad_norm": 0.0, "learning_rate": 2.3142380621689052e-07, "loss": 1.0777, "step": 23857 }, { "epoch": 0.9334846232099538, "grad_norm": 0.0, "learning_rate": 2.3115283582196725e-07, "loss": 0.8984, "step": 23858 }, { "epoch": 0.9335237499021832, "grad_norm": 0.0, "learning_rate": 2.3088202230248768e-07, "loss": 1.0955, "step": 23859 }, { "epoch": 0.9335628765944127, "grad_norm": 0.0, "learning_rate": 2.3061136566280175e-07, "loss": 0.9867, "step": 23860 }, { "epoch": 0.9336020032866421, "grad_norm": 0.0, "learning_rate": 2.3034086590725368e-07, "loss": 1.0365, "step": 23861 }, { "epoch": 0.9336411299788716, "grad_norm": 0.0, "learning_rate": 2.3007052304019228e-07, "loss": 1.0078, "step": 23862 }, { "epoch": 0.933680256671101, "grad_norm": 0.0, "learning_rate": 2.2980033706595407e-07, "loss": 1.0023, "step": 23863 }, { "epoch": 0.9337193833633305, "grad_norm": 0.0, "learning_rate": 2.2953030798888e-07, "loss": 1.0352, "step": 23864 }, { "epoch": 0.9337585100555599, "grad_norm": 0.0, "learning_rate": 2.292604358133066e-07, "loss": 0.9934, "step": 23865 }, { "epoch": 0.9337976367477894, "grad_norm": 0.0, "learning_rate": 2.2899072054356598e-07, "loss": 0.9238, "step": 23866 }, { "epoch": 0.9338367634400188, "grad_norm": 0.0, "learning_rate": 2.287211621839913e-07, "loss": 0.871, "step": 23867 }, { "epoch": 0.9338758901322483, "grad_norm": 0.0, "learning_rate": 2.284517607389114e-07, "loss": 0.861, "step": 23868 }, { "epoch": 0.9339150168244776, "grad_norm": 0.0, "learning_rate": 2.2818251621265052e-07, "loss": 0.8417, "step": 23869 }, { "epoch": 0.9339541435167071, "grad_norm": 0.0, "learning_rate": 2.2791342860953415e-07, "loss": 0.89, "step": 23870 }, { "epoch": 0.9339932702089365, "grad_norm": 0.0, "learning_rate": 2.2764449793388432e-07, "loss": 0.9999, "step": 23871 }, { "epoch": 0.934032396901166, "grad_norm": 0.0, "learning_rate": 2.2737572419001653e-07, "loss": 1.007, "step": 23872 }, { "epoch": 0.9340715235933954, "grad_norm": 0.0, "learning_rate": 2.271071073822495e-07, "loss": 0.8522, "step": 23873 }, { "epoch": 0.9341106502856249, "grad_norm": 0.0, "learning_rate": 2.2683864751489537e-07, "loss": 1.0494, "step": 23874 }, { "epoch": 0.9341497769778543, "grad_norm": 0.0, "learning_rate": 2.2657034459226735e-07, "loss": 0.9505, "step": 23875 }, { "epoch": 0.9341889036700838, "grad_norm": 0.0, "learning_rate": 2.263021986186731e-07, "loss": 0.9464, "step": 23876 }, { "epoch": 0.9342280303623132, "grad_norm": 0.0, "learning_rate": 2.2603420959841805e-07, "loss": 0.9713, "step": 23877 }, { "epoch": 0.9342671570545427, "grad_norm": 0.0, "learning_rate": 2.2576637753580544e-07, "loss": 1.0467, "step": 23878 }, { "epoch": 0.934306283746772, "grad_norm": 0.0, "learning_rate": 2.2549870243513737e-07, "loss": 0.8035, "step": 23879 }, { "epoch": 0.9343454104390014, "grad_norm": 0.0, "learning_rate": 2.2523118430071157e-07, "loss": 1.0486, "step": 23880 }, { "epoch": 0.9343845371312309, "grad_norm": 0.0, "learning_rate": 2.2496382313682562e-07, "loss": 0.9953, "step": 23881 }, { "epoch": 0.9344236638234603, "grad_norm": 0.0, "learning_rate": 2.246966189477706e-07, "loss": 0.975, "step": 23882 }, { "epoch": 0.9344627905156898, "grad_norm": 0.0, "learning_rate": 2.2442957173783863e-07, "loss": 1.0256, "step": 23883 }, { "epoch": 0.9345019172079192, "grad_norm": 0.0, "learning_rate": 2.2416268151131848e-07, "loss": 0.8993, "step": 23884 }, { "epoch": 0.9345410439001487, "grad_norm": 0.0, "learning_rate": 2.2389594827249784e-07, "loss": 1.0125, "step": 23885 }, { "epoch": 0.9345801705923781, "grad_norm": 0.0, "learning_rate": 2.2362937202565549e-07, "loss": 0.9681, "step": 23886 }, { "epoch": 0.9346192972846076, "grad_norm": 0.0, "learning_rate": 2.2336295277507691e-07, "loss": 0.9852, "step": 23887 }, { "epoch": 0.934658423976837, "grad_norm": 0.0, "learning_rate": 2.2309669052503646e-07, "loss": 0.9352, "step": 23888 }, { "epoch": 0.9346975506690665, "grad_norm": 0.0, "learning_rate": 2.2283058527981405e-07, "loss": 1.0909, "step": 23889 }, { "epoch": 0.9347366773612958, "grad_norm": 0.0, "learning_rate": 2.225646370436796e-07, "loss": 0.8703, "step": 23890 }, { "epoch": 0.9347758040535253, "grad_norm": 0.0, "learning_rate": 2.2229884582090634e-07, "loss": 0.9474, "step": 23891 }, { "epoch": 0.9348149307457547, "grad_norm": 0.0, "learning_rate": 2.2203321161575974e-07, "loss": 0.9508, "step": 23892 }, { "epoch": 0.9348540574379842, "grad_norm": 0.0, "learning_rate": 2.2176773443250865e-07, "loss": 0.9914, "step": 23893 }, { "epoch": 0.9348931841302136, "grad_norm": 0.0, "learning_rate": 2.2150241427541518e-07, "loss": 1.0222, "step": 23894 }, { "epoch": 0.9349323108224431, "grad_norm": 0.0, "learning_rate": 2.2123725114873927e-07, "loss": 0.9085, "step": 23895 }, { "epoch": 0.9349714375146725, "grad_norm": 0.0, "learning_rate": 2.2097224505673976e-07, "loss": 0.9819, "step": 23896 }, { "epoch": 0.935010564206902, "grad_norm": 0.0, "learning_rate": 2.2070739600367207e-07, "loss": 0.9702, "step": 23897 }, { "epoch": 0.9350496908991314, "grad_norm": 0.0, "learning_rate": 2.204427039937884e-07, "loss": 0.9219, "step": 23898 }, { "epoch": 0.9350888175913609, "grad_norm": 0.0, "learning_rate": 2.2017816903134314e-07, "loss": 0.9107, "step": 23899 }, { "epoch": 0.9351279442835903, "grad_norm": 0.0, "learning_rate": 2.1991379112057954e-07, "loss": 0.9053, "step": 23900 }, { "epoch": 0.9351670709758197, "grad_norm": 0.0, "learning_rate": 2.196495702657453e-07, "loss": 0.8531, "step": 23901 }, { "epoch": 0.9352061976680491, "grad_norm": 0.0, "learning_rate": 2.193855064710837e-07, "loss": 1.1429, "step": 23902 }, { "epoch": 0.9352453243602786, "grad_norm": 0.0, "learning_rate": 2.191215997408358e-07, "loss": 0.9218, "step": 23903 }, { "epoch": 0.935284451052508, "grad_norm": 0.0, "learning_rate": 2.188578500792382e-07, "loss": 1.0553, "step": 23904 }, { "epoch": 0.9353235777447375, "grad_norm": 0.0, "learning_rate": 2.1859425749052865e-07, "loss": 0.9672, "step": 23905 }, { "epoch": 0.9353627044369669, "grad_norm": 0.0, "learning_rate": 2.1833082197893595e-07, "loss": 0.9034, "step": 23906 }, { "epoch": 0.9354018311291964, "grad_norm": 0.0, "learning_rate": 2.1806754354869452e-07, "loss": 1.0, "step": 23907 }, { "epoch": 0.9354409578214258, "grad_norm": 0.0, "learning_rate": 2.1780442220403097e-07, "loss": 0.8711, "step": 23908 }, { "epoch": 0.9354800845136552, "grad_norm": 0.0, "learning_rate": 2.1754145794917082e-07, "loss": 0.8305, "step": 23909 }, { "epoch": 0.9355192112058847, "grad_norm": 0.0, "learning_rate": 2.172786507883362e-07, "loss": 0.9694, "step": 23910 }, { "epoch": 0.935558337898114, "grad_norm": 0.0, "learning_rate": 2.1701600072574825e-07, "loss": 0.9819, "step": 23911 }, { "epoch": 0.9355974645903435, "grad_norm": 0.0, "learning_rate": 2.1675350776562466e-07, "loss": 1.0279, "step": 23912 }, { "epoch": 0.9356365912825729, "grad_norm": 0.0, "learning_rate": 2.1649117191218094e-07, "loss": 0.9753, "step": 23913 }, { "epoch": 0.9356757179748024, "grad_norm": 0.0, "learning_rate": 2.162289931696282e-07, "loss": 0.9375, "step": 23914 }, { "epoch": 0.9357148446670318, "grad_norm": 0.0, "learning_rate": 2.1596697154217972e-07, "loss": 0.9138, "step": 23915 }, { "epoch": 0.9357539713592613, "grad_norm": 0.0, "learning_rate": 2.15705107034041e-07, "loss": 1.0131, "step": 23916 }, { "epoch": 0.9357930980514907, "grad_norm": 0.0, "learning_rate": 2.1544339964941762e-07, "loss": 0.9243, "step": 23917 }, { "epoch": 0.9358322247437202, "grad_norm": 0.0, "learning_rate": 2.1518184939251174e-07, "loss": 1.0691, "step": 23918 }, { "epoch": 0.9358713514359496, "grad_norm": 0.0, "learning_rate": 2.1492045626752554e-07, "loss": 0.9746, "step": 23919 }, { "epoch": 0.9359104781281791, "grad_norm": 0.0, "learning_rate": 2.146592202786557e-07, "loss": 0.9454, "step": 23920 }, { "epoch": 0.9359496048204085, "grad_norm": 0.0, "learning_rate": 2.1439814143009553e-07, "loss": 1.0503, "step": 23921 }, { "epoch": 0.935988731512638, "grad_norm": 0.0, "learning_rate": 2.1413721972604052e-07, "loss": 0.9406, "step": 23922 }, { "epoch": 0.9360278582048673, "grad_norm": 0.0, "learning_rate": 2.1387645517067845e-07, "loss": 0.9565, "step": 23923 }, { "epoch": 0.9360669848970968, "grad_norm": 0.0, "learning_rate": 2.1361584776819933e-07, "loss": 0.8121, "step": 23924 }, { "epoch": 0.9361061115893262, "grad_norm": 0.0, "learning_rate": 2.1335539752278532e-07, "loss": 0.8664, "step": 23925 }, { "epoch": 0.9361452382815557, "grad_norm": 0.0, "learning_rate": 2.1309510443862092e-07, "loss": 1.0511, "step": 23926 }, { "epoch": 0.9361843649737851, "grad_norm": 0.0, "learning_rate": 2.1283496851988495e-07, "loss": 0.8785, "step": 23927 }, { "epoch": 0.9362234916660146, "grad_norm": 0.0, "learning_rate": 2.125749897707552e-07, "loss": 0.9402, "step": 23928 }, { "epoch": 0.936262618358244, "grad_norm": 0.0, "learning_rate": 2.123151681954072e-07, "loss": 0.9164, "step": 23929 }, { "epoch": 0.9363017450504735, "grad_norm": 0.0, "learning_rate": 2.1205550379801322e-07, "loss": 0.908, "step": 23930 }, { "epoch": 0.9363408717427029, "grad_norm": 0.0, "learning_rate": 2.1179599658274208e-07, "loss": 1.0064, "step": 23931 }, { "epoch": 0.9363799984349324, "grad_norm": 0.0, "learning_rate": 2.115366465537627e-07, "loss": 1.1729, "step": 23932 }, { "epoch": 0.9364191251271617, "grad_norm": 0.0, "learning_rate": 2.1127745371523845e-07, "loss": 0.8987, "step": 23933 }, { "epoch": 0.9364582518193912, "grad_norm": 0.0, "learning_rate": 2.110184180713326e-07, "loss": 0.9733, "step": 23934 }, { "epoch": 0.9364973785116206, "grad_norm": 0.0, "learning_rate": 2.1075953962620522e-07, "loss": 1.0069, "step": 23935 }, { "epoch": 0.93653650520385, "grad_norm": 0.0, "learning_rate": 2.1050081838401294e-07, "loss": 1.0276, "step": 23936 }, { "epoch": 0.9365756318960795, "grad_norm": 0.0, "learning_rate": 2.102422543489091e-07, "loss": 0.9709, "step": 23937 }, { "epoch": 0.9366147585883089, "grad_norm": 0.0, "learning_rate": 2.099838475250493e-07, "loss": 0.7798, "step": 23938 }, { "epoch": 0.9366538852805384, "grad_norm": 0.0, "learning_rate": 2.097255979165802e-07, "loss": 0.9615, "step": 23939 }, { "epoch": 0.9366930119727678, "grad_norm": 0.0, "learning_rate": 2.0946750552765072e-07, "loss": 0.9855, "step": 23940 }, { "epoch": 0.9367321386649973, "grad_norm": 0.0, "learning_rate": 2.092095703624042e-07, "loss": 0.9565, "step": 23941 }, { "epoch": 0.9367712653572267, "grad_norm": 0.0, "learning_rate": 2.0895179242498398e-07, "loss": 1.1479, "step": 23942 }, { "epoch": 0.9368103920494562, "grad_norm": 0.0, "learning_rate": 2.0869417171952899e-07, "loss": 0.9514, "step": 23943 }, { "epoch": 0.9368495187416855, "grad_norm": 0.0, "learning_rate": 2.0843670825017703e-07, "loss": 0.9838, "step": 23944 }, { "epoch": 0.936888645433915, "grad_norm": 0.0, "learning_rate": 2.0817940202106145e-07, "loss": 0.9467, "step": 23945 }, { "epoch": 0.9369277721261444, "grad_norm": 0.0, "learning_rate": 2.0792225303631452e-07, "loss": 0.9683, "step": 23946 }, { "epoch": 0.9369668988183739, "grad_norm": 0.0, "learning_rate": 2.0766526130006626e-07, "loss": 0.8665, "step": 23947 }, { "epoch": 0.9370060255106033, "grad_norm": 0.0, "learning_rate": 2.0740842681644334e-07, "loss": 0.9456, "step": 23948 }, { "epoch": 0.9370451522028328, "grad_norm": 0.0, "learning_rate": 2.0715174958957029e-07, "loss": 0.9709, "step": 23949 }, { "epoch": 0.9370842788950622, "grad_norm": 0.0, "learning_rate": 2.0689522962356933e-07, "loss": 0.8823, "step": 23950 }, { "epoch": 0.9371234055872917, "grad_norm": 0.0, "learning_rate": 2.066388669225594e-07, "loss": 0.914, "step": 23951 }, { "epoch": 0.9371625322795211, "grad_norm": 0.0, "learning_rate": 2.0638266149065722e-07, "loss": 1.0077, "step": 23952 }, { "epoch": 0.9372016589717506, "grad_norm": 0.0, "learning_rate": 2.0612661333197725e-07, "loss": 0.9843, "step": 23953 }, { "epoch": 0.93724078566398, "grad_norm": 0.0, "learning_rate": 2.0587072245063289e-07, "loss": 0.9018, "step": 23954 }, { "epoch": 0.9372799123562094, "grad_norm": 0.0, "learning_rate": 2.0561498885072973e-07, "loss": 0.9023, "step": 23955 }, { "epoch": 0.9373190390484388, "grad_norm": 0.0, "learning_rate": 2.0535941253637892e-07, "loss": 0.9916, "step": 23956 }, { "epoch": 0.9373581657406683, "grad_norm": 0.0, "learning_rate": 2.0510399351168165e-07, "loss": 0.9143, "step": 23957 }, { "epoch": 0.9373972924328977, "grad_norm": 0.0, "learning_rate": 2.0484873178074128e-07, "loss": 0.8412, "step": 23958 }, { "epoch": 0.9374364191251272, "grad_norm": 0.0, "learning_rate": 2.0459362734765454e-07, "loss": 1.0188, "step": 23959 }, { "epoch": 0.9374755458173566, "grad_norm": 0.0, "learning_rate": 2.043386802165215e-07, "loss": 1.0173, "step": 23960 }, { "epoch": 0.9375146725095861, "grad_norm": 0.0, "learning_rate": 2.040838903914333e-07, "loss": 0.821, "step": 23961 }, { "epoch": 0.9375537992018155, "grad_norm": 0.0, "learning_rate": 2.0382925787648444e-07, "loss": 1.0586, "step": 23962 }, { "epoch": 0.937592925894045, "grad_norm": 0.0, "learning_rate": 2.0357478267576058e-07, "loss": 0.9701, "step": 23963 }, { "epoch": 0.9376320525862744, "grad_norm": 0.0, "learning_rate": 2.0332046479335066e-07, "loss": 0.9232, "step": 23964 }, { "epoch": 0.9376711792785037, "grad_norm": 0.0, "learning_rate": 2.0306630423333917e-07, "loss": 1.0096, "step": 23965 }, { "epoch": 0.9377103059707332, "grad_norm": 0.0, "learning_rate": 2.028123009998062e-07, "loss": 0.9646, "step": 23966 }, { "epoch": 0.9377494326629626, "grad_norm": 0.0, "learning_rate": 2.025584550968296e-07, "loss": 0.8561, "step": 23967 }, { "epoch": 0.9377885593551921, "grad_norm": 0.0, "learning_rate": 2.023047665284883e-07, "loss": 0.9902, "step": 23968 }, { "epoch": 0.9378276860474215, "grad_norm": 0.0, "learning_rate": 2.0205123529885683e-07, "loss": 0.8823, "step": 23969 }, { "epoch": 0.937866812739651, "grad_norm": 0.0, "learning_rate": 2.017978614120031e-07, "loss": 0.9662, "step": 23970 }, { "epoch": 0.9379059394318804, "grad_norm": 0.0, "learning_rate": 2.0154464487199932e-07, "loss": 1.0573, "step": 23971 }, { "epoch": 0.9379450661241099, "grad_norm": 0.0, "learning_rate": 2.0129158568290896e-07, "loss": 1.0137, "step": 23972 }, { "epoch": 0.9379841928163393, "grad_norm": 0.0, "learning_rate": 2.0103868384879765e-07, "loss": 0.8624, "step": 23973 }, { "epoch": 0.9380233195085688, "grad_norm": 0.0, "learning_rate": 2.0078593937372658e-07, "loss": 1.0421, "step": 23974 }, { "epoch": 0.9380624462007982, "grad_norm": 0.0, "learning_rate": 2.0053335226175475e-07, "loss": 0.9154, "step": 23975 }, { "epoch": 0.9381015728930276, "grad_norm": 0.0, "learning_rate": 2.0028092251693664e-07, "loss": 0.9083, "step": 23976 }, { "epoch": 0.938140699585257, "grad_norm": 0.0, "learning_rate": 2.0002865014332795e-07, "loss": 0.8689, "step": 23977 }, { "epoch": 0.9381798262774865, "grad_norm": 0.0, "learning_rate": 1.9977653514497764e-07, "loss": 1.0157, "step": 23978 }, { "epoch": 0.9382189529697159, "grad_norm": 0.0, "learning_rate": 1.99524577525938e-07, "loss": 0.9165, "step": 23979 }, { "epoch": 0.9382580796619454, "grad_norm": 0.0, "learning_rate": 1.9927277729025251e-07, "loss": 0.9064, "step": 23980 }, { "epoch": 0.9382972063541748, "grad_norm": 0.0, "learning_rate": 1.9902113444196348e-07, "loss": 1.0819, "step": 23981 }, { "epoch": 0.9383363330464043, "grad_norm": 0.0, "learning_rate": 1.987696489851154e-07, "loss": 0.7748, "step": 23982 }, { "epoch": 0.9383754597386337, "grad_norm": 0.0, "learning_rate": 1.9851832092374512e-07, "loss": 0.9872, "step": 23983 }, { "epoch": 0.9384145864308632, "grad_norm": 0.0, "learning_rate": 1.9826715026188824e-07, "loss": 0.9576, "step": 23984 }, { "epoch": 0.9384537131230926, "grad_norm": 0.0, "learning_rate": 1.9801613700357936e-07, "loss": 0.9317, "step": 23985 }, { "epoch": 0.938492839815322, "grad_norm": 0.0, "learning_rate": 1.977652811528463e-07, "loss": 0.9574, "step": 23986 }, { "epoch": 0.9385319665075514, "grad_norm": 0.0, "learning_rate": 1.9751458271372259e-07, "loss": 0.9349, "step": 23987 }, { "epoch": 0.9385710931997809, "grad_norm": 0.0, "learning_rate": 1.972640416902305e-07, "loss": 0.8804, "step": 23988 }, { "epoch": 0.9386102198920103, "grad_norm": 0.0, "learning_rate": 1.970136580863946e-07, "loss": 0.9687, "step": 23989 }, { "epoch": 0.9386493465842398, "grad_norm": 0.0, "learning_rate": 1.9676343190623505e-07, "loss": 1.0075, "step": 23990 }, { "epoch": 0.9386884732764692, "grad_norm": 0.0, "learning_rate": 1.9651336315377079e-07, "loss": 0.945, "step": 23991 }, { "epoch": 0.9387275999686987, "grad_norm": 0.0, "learning_rate": 1.9626345183301753e-07, "loss": 0.9572, "step": 23992 }, { "epoch": 0.9387667266609281, "grad_norm": 0.0, "learning_rate": 1.9601369794798986e-07, "loss": 0.9521, "step": 23993 }, { "epoch": 0.9388058533531575, "grad_norm": 0.0, "learning_rate": 1.9576410150269566e-07, "loss": 0.8964, "step": 23994 }, { "epoch": 0.938844980045387, "grad_norm": 0.0, "learning_rate": 1.9551466250114393e-07, "loss": 0.8199, "step": 23995 }, { "epoch": 0.9388841067376164, "grad_norm": 0.0, "learning_rate": 1.952653809473415e-07, "loss": 0.9801, "step": 23996 }, { "epoch": 0.9389232334298458, "grad_norm": 0.0, "learning_rate": 1.950162568452918e-07, "loss": 0.9498, "step": 23997 }, { "epoch": 0.9389623601220752, "grad_norm": 0.0, "learning_rate": 1.94767290198995e-07, "loss": 1.016, "step": 23998 }, { "epoch": 0.9390014868143047, "grad_norm": 0.0, "learning_rate": 1.9451848101244786e-07, "loss": 0.8599, "step": 23999 }, { "epoch": 0.9390406135065341, "grad_norm": 0.0, "learning_rate": 1.942698292896461e-07, "loss": 0.9704, "step": 24000 }, { "epoch": 0.9390797401987636, "grad_norm": 0.0, "learning_rate": 1.9402133503458543e-07, "loss": 1.0367, "step": 24001 }, { "epoch": 0.939118866890993, "grad_norm": 0.0, "learning_rate": 1.9377299825125373e-07, "loss": 0.9037, "step": 24002 }, { "epoch": 0.9391579935832225, "grad_norm": 0.0, "learning_rate": 1.9352481894364117e-07, "loss": 1.0016, "step": 24003 }, { "epoch": 0.9391971202754519, "grad_norm": 0.0, "learning_rate": 1.932767971157301e-07, "loss": 0.8828, "step": 24004 }, { "epoch": 0.9392362469676814, "grad_norm": 0.0, "learning_rate": 1.9302893277150513e-07, "loss": 0.8619, "step": 24005 }, { "epoch": 0.9392753736599108, "grad_norm": 0.0, "learning_rate": 1.9278122591494753e-07, "loss": 0.9809, "step": 24006 }, { "epoch": 0.9393145003521403, "grad_norm": 0.0, "learning_rate": 1.9253367655003406e-07, "loss": 0.8924, "step": 24007 }, { "epoch": 0.9393536270443696, "grad_norm": 0.0, "learning_rate": 1.922862846807405e-07, "loss": 0.9828, "step": 24008 }, { "epoch": 0.9393927537365991, "grad_norm": 0.0, "learning_rate": 1.9203905031103808e-07, "loss": 1.0045, "step": 24009 }, { "epoch": 0.9394318804288285, "grad_norm": 0.0, "learning_rate": 1.917919734449003e-07, "loss": 0.8941, "step": 24010 }, { "epoch": 0.939471007121058, "grad_norm": 0.0, "learning_rate": 1.9154505408629177e-07, "loss": 1.0043, "step": 24011 }, { "epoch": 0.9395101338132874, "grad_norm": 0.0, "learning_rate": 1.9129829223917928e-07, "loss": 1.0169, "step": 24012 }, { "epoch": 0.9395492605055169, "grad_norm": 0.0, "learning_rate": 1.9105168790752527e-07, "loss": 0.8351, "step": 24013 }, { "epoch": 0.9395883871977463, "grad_norm": 0.0, "learning_rate": 1.9080524109529096e-07, "loss": 1.0406, "step": 24014 }, { "epoch": 0.9396275138899758, "grad_norm": 0.0, "learning_rate": 1.9055895180643213e-07, "loss": 1.1168, "step": 24015 }, { "epoch": 0.9396666405822052, "grad_norm": 0.0, "learning_rate": 1.9031282004490447e-07, "loss": 0.8687, "step": 24016 }, { "epoch": 0.9397057672744347, "grad_norm": 0.0, "learning_rate": 1.900668458146593e-07, "loss": 1.0547, "step": 24017 }, { "epoch": 0.939744893966664, "grad_norm": 0.0, "learning_rate": 1.8982102911965006e-07, "loss": 0.9464, "step": 24018 }, { "epoch": 0.9397840206588935, "grad_norm": 0.0, "learning_rate": 1.8957536996382142e-07, "loss": 0.9719, "step": 24019 }, { "epoch": 0.9398231473511229, "grad_norm": 0.0, "learning_rate": 1.893298683511202e-07, "loss": 0.7711, "step": 24020 }, { "epoch": 0.9398622740433524, "grad_norm": 0.0, "learning_rate": 1.8908452428548663e-07, "loss": 0.9162, "step": 24021 }, { "epoch": 0.9399014007355818, "grad_norm": 0.0, "learning_rate": 1.8883933777086194e-07, "loss": 0.8777, "step": 24022 }, { "epoch": 0.9399405274278112, "grad_norm": 0.0, "learning_rate": 1.8859430881118303e-07, "loss": 0.9519, "step": 24023 }, { "epoch": 0.9399796541200407, "grad_norm": 0.0, "learning_rate": 1.8834943741038668e-07, "loss": 1.0175, "step": 24024 }, { "epoch": 0.9400187808122701, "grad_norm": 0.0, "learning_rate": 1.8810472357240316e-07, "loss": 1.1065, "step": 24025 }, { "epoch": 0.9400579075044996, "grad_norm": 0.0, "learning_rate": 1.8786016730116152e-07, "loss": 0.841, "step": 24026 }, { "epoch": 0.940097034196729, "grad_norm": 0.0, "learning_rate": 1.8761576860059084e-07, "loss": 0.878, "step": 24027 }, { "epoch": 0.9401361608889585, "grad_norm": 0.0, "learning_rate": 1.8737152747461686e-07, "loss": 0.9792, "step": 24028 }, { "epoch": 0.9401752875811878, "grad_norm": 0.0, "learning_rate": 1.8712744392715864e-07, "loss": 0.9541, "step": 24029 }, { "epoch": 0.9402144142734173, "grad_norm": 0.0, "learning_rate": 1.8688351796213865e-07, "loss": 1.0098, "step": 24030 }, { "epoch": 0.9402535409656467, "grad_norm": 0.0, "learning_rate": 1.866397495834704e-07, "loss": 0.9143, "step": 24031 }, { "epoch": 0.9402926676578762, "grad_norm": 0.0, "learning_rate": 1.86396138795073e-07, "loss": 0.9639, "step": 24032 }, { "epoch": 0.9403317943501056, "grad_norm": 0.0, "learning_rate": 1.8615268560085665e-07, "loss": 0.9178, "step": 24033 }, { "epoch": 0.9403709210423351, "grad_norm": 0.0, "learning_rate": 1.859093900047304e-07, "loss": 0.9475, "step": 24034 }, { "epoch": 0.9404100477345645, "grad_norm": 0.0, "learning_rate": 1.8566625201060007e-07, "loss": 0.9142, "step": 24035 }, { "epoch": 0.940449174426794, "grad_norm": 0.0, "learning_rate": 1.8542327162237361e-07, "loss": 0.9472, "step": 24036 }, { "epoch": 0.9404883011190234, "grad_norm": 0.0, "learning_rate": 1.8518044884395015e-07, "loss": 0.8751, "step": 24037 }, { "epoch": 0.9405274278112529, "grad_norm": 0.0, "learning_rate": 1.8493778367923097e-07, "loss": 0.9039, "step": 24038 }, { "epoch": 0.9405665545034823, "grad_norm": 0.0, "learning_rate": 1.8469527613210969e-07, "loss": 0.8618, "step": 24039 }, { "epoch": 0.9406056811957118, "grad_norm": 0.0, "learning_rate": 1.8445292620648536e-07, "loss": 0.9859, "step": 24040 }, { "epoch": 0.9406448078879411, "grad_norm": 0.0, "learning_rate": 1.842107339062471e-07, "loss": 1.0376, "step": 24041 }, { "epoch": 0.9406839345801706, "grad_norm": 0.0, "learning_rate": 1.8396869923528404e-07, "loss": 0.9941, "step": 24042 }, { "epoch": 0.9407230612724, "grad_norm": 0.0, "learning_rate": 1.8372682219748417e-07, "loss": 1.047, "step": 24043 }, { "epoch": 0.9407621879646295, "grad_norm": 0.0, "learning_rate": 1.8348510279672994e-07, "loss": 0.9922, "step": 24044 }, { "epoch": 0.9408013146568589, "grad_norm": 0.0, "learning_rate": 1.8324354103690488e-07, "loss": 0.9396, "step": 24045 }, { "epoch": 0.9408404413490884, "grad_norm": 0.0, "learning_rate": 1.8300213692188816e-07, "loss": 1.0088, "step": 24046 }, { "epoch": 0.9408795680413178, "grad_norm": 0.0, "learning_rate": 1.8276089045555444e-07, "loss": 1.0516, "step": 24047 }, { "epoch": 0.9409186947335473, "grad_norm": 0.0, "learning_rate": 1.8251980164177952e-07, "loss": 1.0099, "step": 24048 }, { "epoch": 0.9409578214257767, "grad_norm": 0.0, "learning_rate": 1.822788704844347e-07, "loss": 1.0838, "step": 24049 }, { "epoch": 0.940996948118006, "grad_norm": 0.0, "learning_rate": 1.8203809698738917e-07, "loss": 0.9369, "step": 24050 }, { "epoch": 0.9410360748102355, "grad_norm": 0.0, "learning_rate": 1.8179748115450868e-07, "loss": 0.9176, "step": 24051 }, { "epoch": 0.9410752015024649, "grad_norm": 0.0, "learning_rate": 1.8155702298965904e-07, "loss": 0.9605, "step": 24052 }, { "epoch": 0.9411143281946944, "grad_norm": 0.0, "learning_rate": 1.813167224966994e-07, "loss": 0.9547, "step": 24053 }, { "epoch": 0.9411534548869238, "grad_norm": 0.0, "learning_rate": 1.8107657967948889e-07, "loss": 0.8102, "step": 24054 }, { "epoch": 0.9411925815791533, "grad_norm": 0.0, "learning_rate": 1.8083659454188663e-07, "loss": 0.957, "step": 24055 }, { "epoch": 0.9412317082713827, "grad_norm": 0.0, "learning_rate": 1.8059676708774289e-07, "loss": 0.9097, "step": 24056 }, { "epoch": 0.9412708349636122, "grad_norm": 0.0, "learning_rate": 1.8035709732091123e-07, "loss": 1.0832, "step": 24057 }, { "epoch": 0.9413099616558416, "grad_norm": 0.0, "learning_rate": 1.801175852452397e-07, "loss": 0.9414, "step": 24058 }, { "epoch": 0.9413490883480711, "grad_norm": 0.0, "learning_rate": 1.7987823086457524e-07, "loss": 0.8999, "step": 24059 }, { "epoch": 0.9413882150403005, "grad_norm": 0.0, "learning_rate": 1.796390341827603e-07, "loss": 0.9777, "step": 24060 }, { "epoch": 0.94142734173253, "grad_norm": 0.0, "learning_rate": 1.7939999520363849e-07, "loss": 0.9825, "step": 24061 }, { "epoch": 0.9414664684247593, "grad_norm": 0.0, "learning_rate": 1.7916111393104452e-07, "loss": 0.8714, "step": 24062 }, { "epoch": 0.9415055951169888, "grad_norm": 0.0, "learning_rate": 1.7892239036881864e-07, "loss": 1.0618, "step": 24063 }, { "epoch": 0.9415447218092182, "grad_norm": 0.0, "learning_rate": 1.7868382452079224e-07, "loss": 0.9474, "step": 24064 }, { "epoch": 0.9415838485014477, "grad_norm": 0.0, "learning_rate": 1.784454163907967e-07, "loss": 0.9399, "step": 24065 }, { "epoch": 0.9416229751936771, "grad_norm": 0.0, "learning_rate": 1.782071659826601e-07, "loss": 0.8917, "step": 24066 }, { "epoch": 0.9416621018859066, "grad_norm": 0.0, "learning_rate": 1.7796907330021042e-07, "loss": 0.9911, "step": 24067 }, { "epoch": 0.941701228578136, "grad_norm": 0.0, "learning_rate": 1.7773113834726796e-07, "loss": 0.8542, "step": 24068 }, { "epoch": 0.9417403552703655, "grad_norm": 0.0, "learning_rate": 1.7749336112765747e-07, "loss": 0.8665, "step": 24069 }, { "epoch": 0.9417794819625949, "grad_norm": 0.0, "learning_rate": 1.7725574164519365e-07, "loss": 0.9671, "step": 24070 }, { "epoch": 0.9418186086548244, "grad_norm": 0.0, "learning_rate": 1.770182799036946e-07, "loss": 0.914, "step": 24071 }, { "epoch": 0.9418577353470537, "grad_norm": 0.0, "learning_rate": 1.7678097590697275e-07, "loss": 0.9966, "step": 24072 }, { "epoch": 0.9418968620392832, "grad_norm": 0.0, "learning_rate": 1.7654382965884066e-07, "loss": 0.8588, "step": 24073 }, { "epoch": 0.9419359887315126, "grad_norm": 0.0, "learning_rate": 1.7630684116310416e-07, "loss": 1.0259, "step": 24074 }, { "epoch": 0.9419751154237421, "grad_norm": 0.0, "learning_rate": 1.760700104235691e-07, "loss": 1.1031, "step": 24075 }, { "epoch": 0.9420142421159715, "grad_norm": 0.0, "learning_rate": 1.7583333744404018e-07, "loss": 0.8985, "step": 24076 }, { "epoch": 0.942053368808201, "grad_norm": 0.0, "learning_rate": 1.7559682222831887e-07, "loss": 0.8401, "step": 24077 }, { "epoch": 0.9420924955004304, "grad_norm": 0.0, "learning_rate": 1.7536046478019987e-07, "loss": 1.0553, "step": 24078 }, { "epoch": 0.9421316221926598, "grad_norm": 0.0, "learning_rate": 1.7512426510348234e-07, "loss": 1.0161, "step": 24079 }, { "epoch": 0.9421707488848893, "grad_norm": 0.0, "learning_rate": 1.7488822320195665e-07, "loss": 0.7915, "step": 24080 }, { "epoch": 0.9422098755771187, "grad_norm": 0.0, "learning_rate": 1.7465233907941636e-07, "loss": 0.9144, "step": 24081 }, { "epoch": 0.9422490022693482, "grad_norm": 0.0, "learning_rate": 1.7441661273964628e-07, "loss": 0.9363, "step": 24082 }, { "epoch": 0.9422881289615775, "grad_norm": 0.0, "learning_rate": 1.7418104418643335e-07, "loss": 0.9447, "step": 24083 }, { "epoch": 0.942327255653807, "grad_norm": 0.0, "learning_rate": 1.7394563342356008e-07, "loss": 0.8745, "step": 24084 }, { "epoch": 0.9423663823460364, "grad_norm": 0.0, "learning_rate": 1.7371038045480792e-07, "loss": 0.9037, "step": 24085 }, { "epoch": 0.9424055090382659, "grad_norm": 0.0, "learning_rate": 1.7347528528395386e-07, "loss": 0.9435, "step": 24086 }, { "epoch": 0.9424446357304953, "grad_norm": 0.0, "learning_rate": 1.7324034791477374e-07, "loss": 0.9353, "step": 24087 }, { "epoch": 0.9424837624227248, "grad_norm": 0.0, "learning_rate": 1.73005568351039e-07, "loss": 0.9912, "step": 24088 }, { "epoch": 0.9425228891149542, "grad_norm": 0.0, "learning_rate": 1.7277094659652104e-07, "loss": 0.9239, "step": 24089 }, { "epoch": 0.9425620158071837, "grad_norm": 0.0, "learning_rate": 1.72536482654988e-07, "loss": 0.958, "step": 24090 }, { "epoch": 0.9426011424994131, "grad_norm": 0.0, "learning_rate": 1.7230217653020576e-07, "loss": 0.8818, "step": 24091 }, { "epoch": 0.9426402691916426, "grad_norm": 0.0, "learning_rate": 1.720680282259335e-07, "loss": 0.8405, "step": 24092 }, { "epoch": 0.942679395883872, "grad_norm": 0.0, "learning_rate": 1.7183403774593488e-07, "loss": 1.0781, "step": 24093 }, { "epoch": 0.9427185225761014, "grad_norm": 0.0, "learning_rate": 1.716002050939658e-07, "loss": 1.0605, "step": 24094 }, { "epoch": 0.9427576492683308, "grad_norm": 0.0, "learning_rate": 1.7136653027378214e-07, "loss": 0.9239, "step": 24095 }, { "epoch": 0.9427967759605603, "grad_norm": 0.0, "learning_rate": 1.7113301328913535e-07, "loss": 0.8791, "step": 24096 }, { "epoch": 0.9428359026527897, "grad_norm": 0.0, "learning_rate": 1.7089965414377685e-07, "loss": 0.9286, "step": 24097 }, { "epoch": 0.9428750293450192, "grad_norm": 0.0, "learning_rate": 1.7066645284145367e-07, "loss": 0.9838, "step": 24098 }, { "epoch": 0.9429141560372486, "grad_norm": 0.0, "learning_rate": 1.7043340938590946e-07, "loss": 1.0658, "step": 24099 }, { "epoch": 0.9429532827294781, "grad_norm": 0.0, "learning_rate": 1.7020052378088793e-07, "loss": 1.0736, "step": 24100 }, { "epoch": 0.9429924094217075, "grad_norm": 0.0, "learning_rate": 1.6996779603012825e-07, "loss": 0.9186, "step": 24101 }, { "epoch": 0.943031536113937, "grad_norm": 0.0, "learning_rate": 1.697352261373686e-07, "loss": 0.8288, "step": 24102 }, { "epoch": 0.9430706628061664, "grad_norm": 0.0, "learning_rate": 1.6950281410634262e-07, "loss": 1.0552, "step": 24103 }, { "epoch": 0.9431097894983959, "grad_norm": 0.0, "learning_rate": 1.69270559940784e-07, "loss": 0.8926, "step": 24104 }, { "epoch": 0.9431489161906252, "grad_norm": 0.0, "learning_rate": 1.6903846364442089e-07, "loss": 0.9177, "step": 24105 }, { "epoch": 0.9431880428828547, "grad_norm": 0.0, "learning_rate": 1.688065252209814e-07, "loss": 0.9748, "step": 24106 }, { "epoch": 0.9432271695750841, "grad_norm": 0.0, "learning_rate": 1.6857474467418921e-07, "loss": 0.9197, "step": 24107 }, { "epoch": 0.9432662962673135, "grad_norm": 0.0, "learning_rate": 1.6834312200776804e-07, "loss": 0.8849, "step": 24108 }, { "epoch": 0.943305422959543, "grad_norm": 0.0, "learning_rate": 1.681116572254371e-07, "loss": 0.8537, "step": 24109 }, { "epoch": 0.9433445496517724, "grad_norm": 0.0, "learning_rate": 1.678803503309123e-07, "loss": 0.79, "step": 24110 }, { "epoch": 0.9433836763440019, "grad_norm": 0.0, "learning_rate": 1.676492013279074e-07, "loss": 0.8884, "step": 24111 }, { "epoch": 0.9434228030362313, "grad_norm": 0.0, "learning_rate": 1.6741821022013716e-07, "loss": 1.015, "step": 24112 }, { "epoch": 0.9434619297284608, "grad_norm": 0.0, "learning_rate": 1.671873770113097e-07, "loss": 0.8792, "step": 24113 }, { "epoch": 0.9435010564206902, "grad_norm": 0.0, "learning_rate": 1.6695670170513212e-07, "loss": 1.0586, "step": 24114 }, { "epoch": 0.9435401831129197, "grad_norm": 0.0, "learning_rate": 1.6672618430530584e-07, "loss": 1.0432, "step": 24115 }, { "epoch": 0.943579309805149, "grad_norm": 0.0, "learning_rate": 1.6649582481553794e-07, "loss": 1.0432, "step": 24116 }, { "epoch": 0.9436184364973785, "grad_norm": 0.0, "learning_rate": 1.6626562323952434e-07, "loss": 0.9116, "step": 24117 }, { "epoch": 0.9436575631896079, "grad_norm": 0.0, "learning_rate": 1.6603557958096206e-07, "loss": 1.0153, "step": 24118 }, { "epoch": 0.9436966898818374, "grad_norm": 0.0, "learning_rate": 1.6580569384354594e-07, "loss": 0.9883, "step": 24119 }, { "epoch": 0.9437358165740668, "grad_norm": 0.0, "learning_rate": 1.6557596603096748e-07, "loss": 1.0235, "step": 24120 }, { "epoch": 0.9437749432662963, "grad_norm": 0.0, "learning_rate": 1.6534639614691594e-07, "loss": 0.9145, "step": 24121 }, { "epoch": 0.9438140699585257, "grad_norm": 0.0, "learning_rate": 1.6511698419507728e-07, "loss": 0.8923, "step": 24122 }, { "epoch": 0.9438531966507552, "grad_norm": 0.0, "learning_rate": 1.6488773017913629e-07, "loss": 0.8952, "step": 24123 }, { "epoch": 0.9438923233429846, "grad_norm": 0.0, "learning_rate": 1.646586341027745e-07, "loss": 0.8384, "step": 24124 }, { "epoch": 0.9439314500352141, "grad_norm": 0.0, "learning_rate": 1.6442969596967006e-07, "loss": 0.9557, "step": 24125 }, { "epoch": 0.9439705767274434, "grad_norm": 0.0, "learning_rate": 1.6420091578350117e-07, "loss": 1.0549, "step": 24126 }, { "epoch": 0.9440097034196729, "grad_norm": 0.0, "learning_rate": 1.639722935479393e-07, "loss": 1.0306, "step": 24127 }, { "epoch": 0.9440488301119023, "grad_norm": 0.0, "learning_rate": 1.6374382926665931e-07, "loss": 1.0681, "step": 24128 }, { "epoch": 0.9440879568041318, "grad_norm": 0.0, "learning_rate": 1.6351552294332496e-07, "loss": 0.8578, "step": 24129 }, { "epoch": 0.9441270834963612, "grad_norm": 0.0, "learning_rate": 1.6328737458160771e-07, "loss": 0.9713, "step": 24130 }, { "epoch": 0.9441662101885907, "grad_norm": 0.0, "learning_rate": 1.63059384185168e-07, "loss": 0.8653, "step": 24131 }, { "epoch": 0.9442053368808201, "grad_norm": 0.0, "learning_rate": 1.628315517576695e-07, "loss": 0.9437, "step": 24132 }, { "epoch": 0.9442444635730496, "grad_norm": 0.0, "learning_rate": 1.6260387730276827e-07, "loss": 1.0034, "step": 24133 }, { "epoch": 0.944283590265279, "grad_norm": 0.0, "learning_rate": 1.6237636082412135e-07, "loss": 0.971, "step": 24134 }, { "epoch": 0.9443227169575084, "grad_norm": 0.0, "learning_rate": 1.6214900232538356e-07, "loss": 0.9677, "step": 24135 }, { "epoch": 0.9443618436497379, "grad_norm": 0.0, "learning_rate": 1.6192180181020645e-07, "loss": 1.0071, "step": 24136 }, { "epoch": 0.9444009703419672, "grad_norm": 0.0, "learning_rate": 1.61694759282236e-07, "loss": 0.9118, "step": 24137 }, { "epoch": 0.9444400970341967, "grad_norm": 0.0, "learning_rate": 1.6146787474512037e-07, "loss": 0.9053, "step": 24138 }, { "epoch": 0.9444792237264261, "grad_norm": 0.0, "learning_rate": 1.6124114820250225e-07, "loss": 0.9796, "step": 24139 }, { "epoch": 0.9445183504186556, "grad_norm": 0.0, "learning_rate": 1.6101457965802204e-07, "loss": 0.8712, "step": 24140 }, { "epoch": 0.944557477110885, "grad_norm": 0.0, "learning_rate": 1.6078816911531904e-07, "loss": 1.0473, "step": 24141 }, { "epoch": 0.9445966038031145, "grad_norm": 0.0, "learning_rate": 1.605619165780281e-07, "loss": 0.9238, "step": 24142 }, { "epoch": 0.9446357304953439, "grad_norm": 0.0, "learning_rate": 1.6033582204978526e-07, "loss": 0.7951, "step": 24143 }, { "epoch": 0.9446748571875734, "grad_norm": 0.0, "learning_rate": 1.6010988553421757e-07, "loss": 1.0145, "step": 24144 }, { "epoch": 0.9447139838798028, "grad_norm": 0.0, "learning_rate": 1.5988410703495548e-07, "loss": 0.8051, "step": 24145 }, { "epoch": 0.9447531105720323, "grad_norm": 0.0, "learning_rate": 1.59658486555625e-07, "loss": 0.9729, "step": 24146 }, { "epoch": 0.9447922372642616, "grad_norm": 0.0, "learning_rate": 1.594330240998476e-07, "loss": 1.1395, "step": 24147 }, { "epoch": 0.9448313639564911, "grad_norm": 0.0, "learning_rate": 1.5920771967124494e-07, "loss": 1.0254, "step": 24148 }, { "epoch": 0.9448704906487205, "grad_norm": 0.0, "learning_rate": 1.5898257327343624e-07, "loss": 0.8835, "step": 24149 }, { "epoch": 0.94490961734095, "grad_norm": 0.0, "learning_rate": 1.5875758491003422e-07, "loss": 0.8902, "step": 24150 }, { "epoch": 0.9449487440331794, "grad_norm": 0.0, "learning_rate": 1.585327545846549e-07, "loss": 1.0015, "step": 24151 }, { "epoch": 0.9449878707254089, "grad_norm": 0.0, "learning_rate": 1.5830808230090644e-07, "loss": 1.0474, "step": 24152 }, { "epoch": 0.9450269974176383, "grad_norm": 0.0, "learning_rate": 1.5808356806239932e-07, "loss": 0.983, "step": 24153 }, { "epoch": 0.9450661241098678, "grad_norm": 0.0, "learning_rate": 1.5785921187273622e-07, "loss": 0.9866, "step": 24154 }, { "epoch": 0.9451052508020972, "grad_norm": 0.0, "learning_rate": 1.5763501373552092e-07, "loss": 0.8979, "step": 24155 }, { "epoch": 0.9451443774943267, "grad_norm": 0.0, "learning_rate": 1.57410973654355e-07, "loss": 0.8893, "step": 24156 }, { "epoch": 0.9451835041865561, "grad_norm": 0.0, "learning_rate": 1.5718709163283663e-07, "loss": 1.0736, "step": 24157 }, { "epoch": 0.9452226308787856, "grad_norm": 0.0, "learning_rate": 1.5696336767455743e-07, "loss": 1.0471, "step": 24158 }, { "epoch": 0.9452617575710149, "grad_norm": 0.0, "learning_rate": 1.5673980178311455e-07, "loss": 0.841, "step": 24159 }, { "epoch": 0.9453008842632444, "grad_norm": 0.0, "learning_rate": 1.5651639396209395e-07, "loss": 0.9087, "step": 24160 }, { "epoch": 0.9453400109554738, "grad_norm": 0.0, "learning_rate": 1.5629314421508724e-07, "loss": 0.9307, "step": 24161 }, { "epoch": 0.9453791376477033, "grad_norm": 0.0, "learning_rate": 1.560700525456771e-07, "loss": 0.9929, "step": 24162 }, { "epoch": 0.9454182643399327, "grad_norm": 0.0, "learning_rate": 1.5584711895744841e-07, "loss": 0.9653, "step": 24163 }, { "epoch": 0.9454573910321621, "grad_norm": 0.0, "learning_rate": 1.5562434345397725e-07, "loss": 0.9507, "step": 24164 }, { "epoch": 0.9454965177243916, "grad_norm": 0.0, "learning_rate": 1.5540172603884407e-07, "loss": 0.9468, "step": 24165 }, { "epoch": 0.945535644416621, "grad_norm": 0.0, "learning_rate": 1.5517926671562378e-07, "loss": 0.7769, "step": 24166 }, { "epoch": 0.9455747711088505, "grad_norm": 0.0, "learning_rate": 1.5495696548788797e-07, "loss": 1.0319, "step": 24167 }, { "epoch": 0.9456138978010799, "grad_norm": 0.0, "learning_rate": 1.5473482235920712e-07, "loss": 0.9677, "step": 24168 }, { "epoch": 0.9456530244933093, "grad_norm": 0.0, "learning_rate": 1.5451283733314726e-07, "loss": 0.9844, "step": 24169 }, { "epoch": 0.9456921511855387, "grad_norm": 0.0, "learning_rate": 1.5429101041327444e-07, "loss": 1.0369, "step": 24170 }, { "epoch": 0.9457312778777682, "grad_norm": 0.0, "learning_rate": 1.5406934160315135e-07, "loss": 0.9023, "step": 24171 }, { "epoch": 0.9457704045699976, "grad_norm": 0.0, "learning_rate": 1.5384783090633627e-07, "loss": 0.9014, "step": 24172 }, { "epoch": 0.9458095312622271, "grad_norm": 0.0, "learning_rate": 1.5362647832638743e-07, "loss": 0.8365, "step": 24173 }, { "epoch": 0.9458486579544565, "grad_norm": 0.0, "learning_rate": 1.5340528386685872e-07, "loss": 0.9225, "step": 24174 }, { "epoch": 0.945887784646686, "grad_norm": 0.0, "learning_rate": 1.531842475313039e-07, "loss": 0.8356, "step": 24175 }, { "epoch": 0.9459269113389154, "grad_norm": 0.0, "learning_rate": 1.5296336932327016e-07, "loss": 1.0333, "step": 24176 }, { "epoch": 0.9459660380311449, "grad_norm": 0.0, "learning_rate": 1.5274264924630578e-07, "loss": 0.9858, "step": 24177 }, { "epoch": 0.9460051647233743, "grad_norm": 0.0, "learning_rate": 1.5252208730395567e-07, "loss": 0.9409, "step": 24178 }, { "epoch": 0.9460442914156038, "grad_norm": 0.0, "learning_rate": 1.5230168349976037e-07, "loss": 1.0191, "step": 24179 }, { "epoch": 0.9460834181078331, "grad_norm": 0.0, "learning_rate": 1.5208143783726038e-07, "loss": 0.9804, "step": 24180 }, { "epoch": 0.9461225448000626, "grad_norm": 0.0, "learning_rate": 1.5186135031999394e-07, "loss": 0.918, "step": 24181 }, { "epoch": 0.946161671492292, "grad_norm": 0.0, "learning_rate": 1.5164142095149158e-07, "loss": 0.9146, "step": 24182 }, { "epoch": 0.9462007981845215, "grad_norm": 0.0, "learning_rate": 1.5142164973528827e-07, "loss": 0.922, "step": 24183 }, { "epoch": 0.9462399248767509, "grad_norm": 0.0, "learning_rate": 1.512020366749134e-07, "loss": 0.9545, "step": 24184 }, { "epoch": 0.9462790515689804, "grad_norm": 0.0, "learning_rate": 1.509825817738908e-07, "loss": 0.8372, "step": 24185 }, { "epoch": 0.9463181782612098, "grad_norm": 0.0, "learning_rate": 1.5076328503574766e-07, "loss": 0.8915, "step": 24186 }, { "epoch": 0.9463573049534393, "grad_norm": 0.0, "learning_rate": 1.505441464640034e-07, "loss": 0.9483, "step": 24187 }, { "epoch": 0.9463964316456687, "grad_norm": 0.0, "learning_rate": 1.5032516606217961e-07, "loss": 1.0222, "step": 24188 }, { "epoch": 0.9464355583378982, "grad_norm": 0.0, "learning_rate": 1.501063438337891e-07, "loss": 0.9301, "step": 24189 }, { "epoch": 0.9464746850301275, "grad_norm": 0.0, "learning_rate": 1.4988767978235007e-07, "loss": 0.9544, "step": 24190 }, { "epoch": 0.946513811722357, "grad_norm": 0.0, "learning_rate": 1.4966917391137093e-07, "loss": 1.0532, "step": 24191 }, { "epoch": 0.9465529384145864, "grad_norm": 0.0, "learning_rate": 1.4945082622436212e-07, "loss": 0.9468, "step": 24192 }, { "epoch": 0.9465920651068158, "grad_norm": 0.0, "learning_rate": 1.4923263672482978e-07, "loss": 0.9021, "step": 24193 }, { "epoch": 0.9466311917990453, "grad_norm": 0.0, "learning_rate": 1.4901460541627777e-07, "loss": 1.0212, "step": 24194 }, { "epoch": 0.9466703184912747, "grad_norm": 0.0, "learning_rate": 1.487967323022066e-07, "loss": 0.8569, "step": 24195 }, { "epoch": 0.9467094451835042, "grad_norm": 0.0, "learning_rate": 1.4857901738611569e-07, "loss": 0.9737, "step": 24196 }, { "epoch": 0.9467485718757336, "grad_norm": 0.0, "learning_rate": 1.4836146067150113e-07, "loss": 0.9716, "step": 24197 }, { "epoch": 0.9467876985679631, "grad_norm": 0.0, "learning_rate": 1.4814406216185685e-07, "loss": 0.8655, "step": 24198 }, { "epoch": 0.9468268252601925, "grad_norm": 0.0, "learning_rate": 1.4792682186067442e-07, "loss": 0.9071, "step": 24199 }, { "epoch": 0.946865951952422, "grad_norm": 0.0, "learning_rate": 1.477097397714411e-07, "loss": 1.0358, "step": 24200 }, { "epoch": 0.9469050786446513, "grad_norm": 0.0, "learning_rate": 1.474928158976441e-07, "loss": 0.9501, "step": 24201 }, { "epoch": 0.9469442053368808, "grad_norm": 0.0, "learning_rate": 1.4727605024276615e-07, "loss": 0.961, "step": 24202 }, { "epoch": 0.9469833320291102, "grad_norm": 0.0, "learning_rate": 1.4705944281028893e-07, "loss": 0.916, "step": 24203 }, { "epoch": 0.9470224587213397, "grad_norm": 0.0, "learning_rate": 1.468429936036919e-07, "loss": 0.8772, "step": 24204 }, { "epoch": 0.9470615854135691, "grad_norm": 0.0, "learning_rate": 1.4662670262644784e-07, "loss": 0.8501, "step": 24205 }, { "epoch": 0.9471007121057986, "grad_norm": 0.0, "learning_rate": 1.4641056988203285e-07, "loss": 1.0525, "step": 24206 }, { "epoch": 0.947139838798028, "grad_norm": 0.0, "learning_rate": 1.4619459537391635e-07, "loss": 0.8562, "step": 24207 }, { "epoch": 0.9471789654902575, "grad_norm": 0.0, "learning_rate": 1.459787791055689e-07, "loss": 0.9306, "step": 24208 }, { "epoch": 0.9472180921824869, "grad_norm": 0.0, "learning_rate": 1.4576312108045222e-07, "loss": 0.9281, "step": 24209 }, { "epoch": 0.9472572188747164, "grad_norm": 0.0, "learning_rate": 1.4554762130203236e-07, "loss": 0.9322, "step": 24210 }, { "epoch": 0.9472963455669458, "grad_norm": 0.0, "learning_rate": 1.4533227977376994e-07, "loss": 0.901, "step": 24211 }, { "epoch": 0.9473354722591752, "grad_norm": 0.0, "learning_rate": 1.4511709649912332e-07, "loss": 0.9223, "step": 24212 }, { "epoch": 0.9473745989514046, "grad_norm": 0.0, "learning_rate": 1.4490207148154522e-07, "loss": 0.9606, "step": 24213 }, { "epoch": 0.9474137256436341, "grad_norm": 0.0, "learning_rate": 1.4468720472449182e-07, "loss": 1.0484, "step": 24214 }, { "epoch": 0.9474528523358635, "grad_norm": 0.0, "learning_rate": 1.4447249623141258e-07, "loss": 0.8711, "step": 24215 }, { "epoch": 0.947491979028093, "grad_norm": 0.0, "learning_rate": 1.442579460057558e-07, "loss": 0.9466, "step": 24216 }, { "epoch": 0.9475311057203224, "grad_norm": 0.0, "learning_rate": 1.4404355405096548e-07, "loss": 0.9919, "step": 24217 }, { "epoch": 0.9475702324125519, "grad_norm": 0.0, "learning_rate": 1.4382932037048547e-07, "loss": 0.8152, "step": 24218 }, { "epoch": 0.9476093591047813, "grad_norm": 0.0, "learning_rate": 1.4361524496775636e-07, "loss": 0.9952, "step": 24219 }, { "epoch": 0.9476484857970108, "grad_norm": 0.0, "learning_rate": 1.434013278462165e-07, "loss": 0.9566, "step": 24220 }, { "epoch": 0.9476876124892402, "grad_norm": 0.0, "learning_rate": 1.4318756900929986e-07, "loss": 0.9388, "step": 24221 }, { "epoch": 0.9477267391814695, "grad_norm": 0.0, "learning_rate": 1.429739684604392e-07, "loss": 1.0205, "step": 24222 }, { "epoch": 0.947765865873699, "grad_norm": 0.0, "learning_rate": 1.4276052620306513e-07, "loss": 0.8681, "step": 24223 }, { "epoch": 0.9478049925659284, "grad_norm": 0.0, "learning_rate": 1.425472422406049e-07, "loss": 0.8511, "step": 24224 }, { "epoch": 0.9478441192581579, "grad_norm": 0.0, "learning_rate": 1.4233411657648465e-07, "loss": 1.0295, "step": 24225 }, { "epoch": 0.9478832459503873, "grad_norm": 0.0, "learning_rate": 1.421211492141261e-07, "loss": 0.9023, "step": 24226 }, { "epoch": 0.9479223726426168, "grad_norm": 0.0, "learning_rate": 1.4190834015694876e-07, "loss": 0.8363, "step": 24227 }, { "epoch": 0.9479614993348462, "grad_norm": 0.0, "learning_rate": 1.4169568940837098e-07, "loss": 0.9357, "step": 24228 }, { "epoch": 0.9480006260270757, "grad_norm": 0.0, "learning_rate": 1.414831969718067e-07, "loss": 1.0347, "step": 24229 }, { "epoch": 0.9480397527193051, "grad_norm": 0.0, "learning_rate": 1.412708628506698e-07, "loss": 0.9446, "step": 24230 }, { "epoch": 0.9480788794115346, "grad_norm": 0.0, "learning_rate": 1.4105868704836767e-07, "loss": 0.9375, "step": 24231 }, { "epoch": 0.948118006103764, "grad_norm": 0.0, "learning_rate": 1.4084666956831083e-07, "loss": 0.9092, "step": 24232 }, { "epoch": 0.9481571327959935, "grad_norm": 0.0, "learning_rate": 1.4063481041390102e-07, "loss": 0.8464, "step": 24233 }, { "epoch": 0.9481962594882228, "grad_norm": 0.0, "learning_rate": 1.4042310958854222e-07, "loss": 1.0892, "step": 24234 }, { "epoch": 0.9482353861804523, "grad_norm": 0.0, "learning_rate": 1.402115670956339e-07, "loss": 0.9501, "step": 24235 }, { "epoch": 0.9482745128726817, "grad_norm": 0.0, "learning_rate": 1.400001829385722e-07, "loss": 0.9875, "step": 24236 }, { "epoch": 0.9483136395649112, "grad_norm": 0.0, "learning_rate": 1.3978895712075223e-07, "loss": 0.908, "step": 24237 }, { "epoch": 0.9483527662571406, "grad_norm": 0.0, "learning_rate": 1.3957788964556685e-07, "loss": 1.0086, "step": 24238 }, { "epoch": 0.9483918929493701, "grad_norm": 0.0, "learning_rate": 1.393669805164044e-07, "loss": 0.8633, "step": 24239 }, { "epoch": 0.9484310196415995, "grad_norm": 0.0, "learning_rate": 1.391562297366511e-07, "loss": 1.0252, "step": 24240 }, { "epoch": 0.948470146333829, "grad_norm": 0.0, "learning_rate": 1.3894563730969312e-07, "loss": 0.7968, "step": 24241 }, { "epoch": 0.9485092730260584, "grad_norm": 0.0, "learning_rate": 1.387352032389122e-07, "loss": 0.96, "step": 24242 }, { "epoch": 0.9485483997182879, "grad_norm": 0.0, "learning_rate": 1.3852492752768676e-07, "loss": 0.9277, "step": 24243 }, { "epoch": 0.9485875264105172, "grad_norm": 0.0, "learning_rate": 1.3831481017939406e-07, "loss": 1.0001, "step": 24244 }, { "epoch": 0.9486266531027467, "grad_norm": 0.0, "learning_rate": 1.3810485119740703e-07, "loss": 0.9902, "step": 24245 }, { "epoch": 0.9486657797949761, "grad_norm": 0.0, "learning_rate": 1.3789505058509955e-07, "loss": 0.8729, "step": 24246 }, { "epoch": 0.9487049064872056, "grad_norm": 0.0, "learning_rate": 1.376854083458401e-07, "loss": 0.7907, "step": 24247 }, { "epoch": 0.948744033179435, "grad_norm": 0.0, "learning_rate": 1.3747592448299375e-07, "loss": 0.9111, "step": 24248 }, { "epoch": 0.9487831598716644, "grad_norm": 0.0, "learning_rate": 1.3726659899992555e-07, "loss": 0.972, "step": 24249 }, { "epoch": 0.9488222865638939, "grad_norm": 0.0, "learning_rate": 1.3705743189999732e-07, "loss": 1.0901, "step": 24250 }, { "epoch": 0.9488614132561233, "grad_norm": 0.0, "learning_rate": 1.3684842318656854e-07, "loss": 0.8644, "step": 24251 }, { "epoch": 0.9489005399483528, "grad_norm": 0.0, "learning_rate": 1.3663957286299324e-07, "loss": 1.0313, "step": 24252 }, { "epoch": 0.9489396666405822, "grad_norm": 0.0, "learning_rate": 1.3643088093262867e-07, "loss": 1.007, "step": 24253 }, { "epoch": 0.9489787933328117, "grad_norm": 0.0, "learning_rate": 1.3622234739882335e-07, "loss": 1.0372, "step": 24254 }, { "epoch": 0.949017920025041, "grad_norm": 0.0, "learning_rate": 1.3601397226492784e-07, "loss": 0.9998, "step": 24255 }, { "epoch": 0.9490570467172705, "grad_norm": 0.0, "learning_rate": 1.358057555342862e-07, "loss": 0.9665, "step": 24256 }, { "epoch": 0.9490961734094999, "grad_norm": 0.0, "learning_rate": 1.3559769721024573e-07, "loss": 1.0385, "step": 24257 }, { "epoch": 0.9491353001017294, "grad_norm": 0.0, "learning_rate": 1.3538979729614376e-07, "loss": 1.0237, "step": 24258 }, { "epoch": 0.9491744267939588, "grad_norm": 0.0, "learning_rate": 1.3518205579532096e-07, "loss": 1.0861, "step": 24259 }, { "epoch": 0.9492135534861883, "grad_norm": 0.0, "learning_rate": 1.3497447271111353e-07, "loss": 0.8937, "step": 24260 }, { "epoch": 0.9492526801784177, "grad_norm": 0.0, "learning_rate": 1.3476704804685436e-07, "loss": 0.8588, "step": 24261 }, { "epoch": 0.9492918068706472, "grad_norm": 0.0, "learning_rate": 1.3455978180587414e-07, "loss": 1.0013, "step": 24262 }, { "epoch": 0.9493309335628766, "grad_norm": 0.0, "learning_rate": 1.3435267399150243e-07, "loss": 0.9508, "step": 24263 }, { "epoch": 0.9493700602551061, "grad_norm": 0.0, "learning_rate": 1.3414572460706432e-07, "loss": 0.9432, "step": 24264 }, { "epoch": 0.9494091869473354, "grad_norm": 0.0, "learning_rate": 1.3393893365588272e-07, "loss": 0.8395, "step": 24265 }, { "epoch": 0.949448313639565, "grad_norm": 0.0, "learning_rate": 1.3373230114127943e-07, "loss": 0.8718, "step": 24266 }, { "epoch": 0.9494874403317943, "grad_norm": 0.0, "learning_rate": 1.335258270665718e-07, "loss": 0.9685, "step": 24267 }, { "epoch": 0.9495265670240238, "grad_norm": 0.0, "learning_rate": 1.3331951143507604e-07, "loss": 1.0168, "step": 24268 }, { "epoch": 0.9495656937162532, "grad_norm": 0.0, "learning_rate": 1.331133542501062e-07, "loss": 0.9197, "step": 24269 }, { "epoch": 0.9496048204084827, "grad_norm": 0.0, "learning_rate": 1.329073555149707e-07, "loss": 0.9321, "step": 24270 }, { "epoch": 0.9496439471007121, "grad_norm": 0.0, "learning_rate": 1.3270151523297915e-07, "loss": 1.0914, "step": 24271 }, { "epoch": 0.9496830737929416, "grad_norm": 0.0, "learning_rate": 1.3249583340743778e-07, "loss": 0.9569, "step": 24272 }, { "epoch": 0.949722200485171, "grad_norm": 0.0, "learning_rate": 1.3229031004164839e-07, "loss": 0.9082, "step": 24273 }, { "epoch": 0.9497613271774005, "grad_norm": 0.0, "learning_rate": 1.3208494513891168e-07, "loss": 0.9614, "step": 24274 }, { "epoch": 0.9498004538696299, "grad_norm": 0.0, "learning_rate": 1.3187973870252501e-07, "loss": 0.9503, "step": 24275 }, { "epoch": 0.9498395805618594, "grad_norm": 0.0, "learning_rate": 1.3167469073578465e-07, "loss": 0.8924, "step": 24276 }, { "epoch": 0.9498787072540887, "grad_norm": 0.0, "learning_rate": 1.3146980124198238e-07, "loss": 0.9691, "step": 24277 }, { "epoch": 0.9499178339463181, "grad_norm": 0.0, "learning_rate": 1.3126507022441116e-07, "loss": 0.9365, "step": 24278 }, { "epoch": 0.9499569606385476, "grad_norm": 0.0, "learning_rate": 1.31060497686355e-07, "loss": 0.9235, "step": 24279 }, { "epoch": 0.949996087330777, "grad_norm": 0.0, "learning_rate": 1.3085608363110014e-07, "loss": 1.0148, "step": 24280 }, { "epoch": 0.9500352140230065, "grad_norm": 0.0, "learning_rate": 1.3065182806193066e-07, "loss": 0.9775, "step": 24281 }, { "epoch": 0.9500743407152359, "grad_norm": 0.0, "learning_rate": 1.3044773098212616e-07, "loss": 0.9955, "step": 24282 }, { "epoch": 0.9501134674074654, "grad_norm": 0.0, "learning_rate": 1.30243792394964e-07, "loss": 0.9281, "step": 24283 }, { "epoch": 0.9501525940996948, "grad_norm": 0.0, "learning_rate": 1.3004001230371932e-07, "loss": 0.78, "step": 24284 }, { "epoch": 0.9501917207919243, "grad_norm": 0.0, "learning_rate": 1.2983639071166288e-07, "loss": 1.0145, "step": 24285 }, { "epoch": 0.9502308474841537, "grad_norm": 0.0, "learning_rate": 1.2963292762206648e-07, "loss": 0.8477, "step": 24286 }, { "epoch": 0.9502699741763831, "grad_norm": 0.0, "learning_rate": 1.294296230381975e-07, "loss": 0.965, "step": 24287 }, { "epoch": 0.9503091008686125, "grad_norm": 0.0, "learning_rate": 1.2922647696332004e-07, "loss": 0.8791, "step": 24288 }, { "epoch": 0.950348227560842, "grad_norm": 0.0, "learning_rate": 1.2902348940069587e-07, "loss": 1.0397, "step": 24289 }, { "epoch": 0.9503873542530714, "grad_norm": 0.0, "learning_rate": 1.2882066035358686e-07, "loss": 0.8734, "step": 24290 }, { "epoch": 0.9504264809453009, "grad_norm": 0.0, "learning_rate": 1.286179898252482e-07, "loss": 0.9454, "step": 24291 }, { "epoch": 0.9504656076375303, "grad_norm": 0.0, "learning_rate": 1.2841547781893505e-07, "loss": 1.0169, "step": 24292 }, { "epoch": 0.9505047343297598, "grad_norm": 0.0, "learning_rate": 1.2821312433789924e-07, "loss": 0.9689, "step": 24293 }, { "epoch": 0.9505438610219892, "grad_norm": 0.0, "learning_rate": 1.2801092938539038e-07, "loss": 0.9008, "step": 24294 }, { "epoch": 0.9505829877142187, "grad_norm": 0.0, "learning_rate": 1.2780889296465592e-07, "loss": 0.9336, "step": 24295 }, { "epoch": 0.9506221144064481, "grad_norm": 0.0, "learning_rate": 1.2760701507894102e-07, "loss": 0.9841, "step": 24296 }, { "epoch": 0.9506612410986776, "grad_norm": 0.0, "learning_rate": 1.274052957314853e-07, "loss": 0.8757, "step": 24297 }, { "epoch": 0.9507003677909069, "grad_norm": 0.0, "learning_rate": 1.272037349255306e-07, "loss": 1.0966, "step": 24298 }, { "epoch": 0.9507394944831364, "grad_norm": 0.0, "learning_rate": 1.2700233266431217e-07, "loss": 0.9784, "step": 24299 }, { "epoch": 0.9507786211753658, "grad_norm": 0.0, "learning_rate": 1.2680108895106514e-07, "loss": 1.0755, "step": 24300 }, { "epoch": 0.9508177478675953, "grad_norm": 0.0, "learning_rate": 1.2660000378902026e-07, "loss": 0.9646, "step": 24301 }, { "epoch": 0.9508568745598247, "grad_norm": 0.0, "learning_rate": 1.263990771814072e-07, "loss": 0.8814, "step": 24302 }, { "epoch": 0.9508960012520542, "grad_norm": 0.0, "learning_rate": 1.2619830913145225e-07, "loss": 0.9231, "step": 24303 }, { "epoch": 0.9509351279442836, "grad_norm": 0.0, "learning_rate": 1.259976996423806e-07, "loss": 0.9736, "step": 24304 }, { "epoch": 0.9509742546365131, "grad_norm": 0.0, "learning_rate": 1.2579724871741305e-07, "loss": 0.9156, "step": 24305 }, { "epoch": 0.9510133813287425, "grad_norm": 0.0, "learning_rate": 1.255969563597692e-07, "loss": 0.9755, "step": 24306 }, { "epoch": 0.9510525080209719, "grad_norm": 0.0, "learning_rate": 1.2539682257266316e-07, "loss": 0.8716, "step": 24307 }, { "epoch": 0.9510916347132014, "grad_norm": 0.0, "learning_rate": 1.2519684735931125e-07, "loss": 1.0163, "step": 24308 }, { "epoch": 0.9511307614054307, "grad_norm": 0.0, "learning_rate": 1.2499703072292424e-07, "loss": 0.9252, "step": 24309 }, { "epoch": 0.9511698880976602, "grad_norm": 0.0, "learning_rate": 1.2479737266671176e-07, "loss": 1.0435, "step": 24310 }, { "epoch": 0.9512090147898896, "grad_norm": 0.0, "learning_rate": 1.2459787319387907e-07, "loss": 1.007, "step": 24311 }, { "epoch": 0.9512481414821191, "grad_norm": 0.0, "learning_rate": 1.243985323076291e-07, "loss": 0.8748, "step": 24312 }, { "epoch": 0.9512872681743485, "grad_norm": 0.0, "learning_rate": 1.241993500111638e-07, "loss": 0.9427, "step": 24313 }, { "epoch": 0.951326394866578, "grad_norm": 0.0, "learning_rate": 1.2400032630768278e-07, "loss": 0.878, "step": 24314 }, { "epoch": 0.9513655215588074, "grad_norm": 0.0, "learning_rate": 1.2380146120038016e-07, "loss": 0.8553, "step": 24315 }, { "epoch": 0.9514046482510369, "grad_norm": 0.0, "learning_rate": 1.236027546924512e-07, "loss": 0.9627, "step": 24316 }, { "epoch": 0.9514437749432663, "grad_norm": 0.0, "learning_rate": 1.2340420678708664e-07, "loss": 1.0632, "step": 24317 }, { "epoch": 0.9514829016354958, "grad_norm": 0.0, "learning_rate": 1.2320581748747286e-07, "loss": 0.8382, "step": 24318 }, { "epoch": 0.9515220283277251, "grad_norm": 0.0, "learning_rate": 1.2300758679679835e-07, "loss": 0.9129, "step": 24319 }, { "epoch": 0.9515611550199546, "grad_norm": 0.0, "learning_rate": 1.228095147182462e-07, "loss": 0.9317, "step": 24320 }, { "epoch": 0.951600281712184, "grad_norm": 0.0, "learning_rate": 1.2261160125499495e-07, "loss": 1.0356, "step": 24321 }, { "epoch": 0.9516394084044135, "grad_norm": 0.0, "learning_rate": 1.2241384641022425e-07, "loss": 0.9543, "step": 24322 }, { "epoch": 0.9516785350966429, "grad_norm": 0.0, "learning_rate": 1.2221625018711158e-07, "loss": 0.8782, "step": 24323 }, { "epoch": 0.9517176617888724, "grad_norm": 0.0, "learning_rate": 1.2201881258882775e-07, "loss": 0.9593, "step": 24324 }, { "epoch": 0.9517567884811018, "grad_norm": 0.0, "learning_rate": 1.2182153361854244e-07, "loss": 0.9568, "step": 24325 }, { "epoch": 0.9517959151733313, "grad_norm": 0.0, "learning_rate": 1.2162441327942642e-07, "loss": 0.8972, "step": 24326 }, { "epoch": 0.9518350418655607, "grad_norm": 0.0, "learning_rate": 1.2142745157464498e-07, "loss": 1.0383, "step": 24327 }, { "epoch": 0.9518741685577902, "grad_norm": 0.0, "learning_rate": 1.212306485073589e-07, "loss": 1.0292, "step": 24328 }, { "epoch": 0.9519132952500196, "grad_norm": 0.0, "learning_rate": 1.2103400408073006e-07, "loss": 0.8845, "step": 24329 }, { "epoch": 0.951952421942249, "grad_norm": 0.0, "learning_rate": 1.20837518297916e-07, "loss": 0.9891, "step": 24330 }, { "epoch": 0.9519915486344784, "grad_norm": 0.0, "learning_rate": 1.2064119116207195e-07, "loss": 0.9549, "step": 24331 }, { "epoch": 0.9520306753267079, "grad_norm": 0.0, "learning_rate": 1.2044502267635093e-07, "loss": 0.9012, "step": 24332 }, { "epoch": 0.9520698020189373, "grad_norm": 0.0, "learning_rate": 1.2024901284390377e-07, "loss": 0.9177, "step": 24333 }, { "epoch": 0.9521089287111668, "grad_norm": 0.0, "learning_rate": 1.2005316166787574e-07, "loss": 0.9815, "step": 24334 }, { "epoch": 0.9521480554033962, "grad_norm": 0.0, "learning_rate": 1.198574691514154e-07, "loss": 0.8542, "step": 24335 }, { "epoch": 0.9521871820956256, "grad_norm": 0.0, "learning_rate": 1.1966193529766356e-07, "loss": 0.9211, "step": 24336 }, { "epoch": 0.9522263087878551, "grad_norm": 0.0, "learning_rate": 1.1946656010976e-07, "loss": 1.0063, "step": 24337 }, { "epoch": 0.9522654354800845, "grad_norm": 0.0, "learning_rate": 1.1927134359084104e-07, "loss": 0.8548, "step": 24338 }, { "epoch": 0.952304562172314, "grad_norm": 0.0, "learning_rate": 1.1907628574404528e-07, "loss": 0.9097, "step": 24339 }, { "epoch": 0.9523436888645433, "grad_norm": 0.0, "learning_rate": 1.1888138657250136e-07, "loss": 0.9707, "step": 24340 }, { "epoch": 0.9523828155567728, "grad_norm": 0.0, "learning_rate": 1.1868664607934121e-07, "loss": 0.8904, "step": 24341 }, { "epoch": 0.9524219422490022, "grad_norm": 0.0, "learning_rate": 1.1849206426769233e-07, "loss": 0.9095, "step": 24342 }, { "epoch": 0.9524610689412317, "grad_norm": 0.0, "learning_rate": 1.1829764114067666e-07, "loss": 0.9659, "step": 24343 }, { "epoch": 0.9525001956334611, "grad_norm": 0.0, "learning_rate": 1.1810337670141947e-07, "loss": 1.0351, "step": 24344 }, { "epoch": 0.9525393223256906, "grad_norm": 0.0, "learning_rate": 1.179092709530405e-07, "loss": 1.0283, "step": 24345 }, { "epoch": 0.95257844901792, "grad_norm": 0.0, "learning_rate": 1.1771532389865393e-07, "loss": 0.9743, "step": 24346 }, { "epoch": 0.9526175757101495, "grad_norm": 0.0, "learning_rate": 1.1752153554137724e-07, "loss": 1.0823, "step": 24347 }, { "epoch": 0.9526567024023789, "grad_norm": 0.0, "learning_rate": 1.1732790588432019e-07, "loss": 0.9578, "step": 24348 }, { "epoch": 0.9526958290946084, "grad_norm": 0.0, "learning_rate": 1.1713443493059473e-07, "loss": 1.0412, "step": 24349 }, { "epoch": 0.9527349557868378, "grad_norm": 0.0, "learning_rate": 1.1694112268330505e-07, "loss": 0.9704, "step": 24350 }, { "epoch": 0.9527740824790673, "grad_norm": 0.0, "learning_rate": 1.1674796914555753e-07, "loss": 1.017, "step": 24351 }, { "epoch": 0.9528132091712966, "grad_norm": 0.0, "learning_rate": 1.1655497432045193e-07, "loss": 0.984, "step": 24352 }, { "epoch": 0.9528523358635261, "grad_norm": 0.0, "learning_rate": 1.1636213821109021e-07, "loss": 0.9857, "step": 24353 }, { "epoch": 0.9528914625557555, "grad_norm": 0.0, "learning_rate": 1.1616946082056657e-07, "loss": 0.9828, "step": 24354 }, { "epoch": 0.952930589247985, "grad_norm": 0.0, "learning_rate": 1.1597694215197741e-07, "loss": 0.8132, "step": 24355 }, { "epoch": 0.9529697159402144, "grad_norm": 0.0, "learning_rate": 1.1578458220841249e-07, "loss": 0.7852, "step": 24356 }, { "epoch": 0.9530088426324439, "grad_norm": 0.0, "learning_rate": 1.1559238099296155e-07, "loss": 0.8003, "step": 24357 }, { "epoch": 0.9530479693246733, "grad_norm": 0.0, "learning_rate": 1.1540033850871102e-07, "loss": 1.0505, "step": 24358 }, { "epoch": 0.9530870960169028, "grad_norm": 0.0, "learning_rate": 1.152084547587462e-07, "loss": 0.9155, "step": 24359 }, { "epoch": 0.9531262227091322, "grad_norm": 0.0, "learning_rate": 1.1501672974614575e-07, "loss": 1.0239, "step": 24360 }, { "epoch": 0.9531653494013617, "grad_norm": 0.0, "learning_rate": 1.1482516347399052e-07, "loss": 0.8963, "step": 24361 }, { "epoch": 0.953204476093591, "grad_norm": 0.0, "learning_rate": 1.1463375594535697e-07, "loss": 0.9327, "step": 24362 }, { "epoch": 0.9532436027858204, "grad_norm": 0.0, "learning_rate": 1.1444250716331706e-07, "loss": 0.8922, "step": 24363 }, { "epoch": 0.9532827294780499, "grad_norm": 0.0, "learning_rate": 1.142514171309439e-07, "loss": 0.9761, "step": 24364 }, { "epoch": 0.9533218561702793, "grad_norm": 0.0, "learning_rate": 1.1406048585130503e-07, "loss": 1.0213, "step": 24365 }, { "epoch": 0.9533609828625088, "grad_norm": 0.0, "learning_rate": 1.1386971332746798e-07, "loss": 0.9109, "step": 24366 }, { "epoch": 0.9534001095547382, "grad_norm": 0.0, "learning_rate": 1.1367909956249479e-07, "loss": 1.042, "step": 24367 }, { "epoch": 0.9534392362469677, "grad_norm": 0.0, "learning_rate": 1.1348864455944742e-07, "loss": 0.9161, "step": 24368 }, { "epoch": 0.9534783629391971, "grad_norm": 0.0, "learning_rate": 1.1329834832138232e-07, "loss": 0.9468, "step": 24369 }, { "epoch": 0.9535174896314266, "grad_norm": 0.0, "learning_rate": 1.1310821085135815e-07, "loss": 0.9229, "step": 24370 }, { "epoch": 0.953556616323656, "grad_norm": 0.0, "learning_rate": 1.1291823215242693e-07, "loss": 0.8056, "step": 24371 }, { "epoch": 0.9535957430158855, "grad_norm": 0.0, "learning_rate": 1.1272841222764063e-07, "loss": 0.9133, "step": 24372 }, { "epoch": 0.9536348697081148, "grad_norm": 0.0, "learning_rate": 1.1253875108004575e-07, "loss": 0.8925, "step": 24373 }, { "epoch": 0.9536739964003443, "grad_norm": 0.0, "learning_rate": 1.1234924871268871e-07, "loss": 0.9238, "step": 24374 }, { "epoch": 0.9537131230925737, "grad_norm": 0.0, "learning_rate": 1.1215990512861263e-07, "loss": 0.969, "step": 24375 }, { "epoch": 0.9537522497848032, "grad_norm": 0.0, "learning_rate": 1.1197072033085844e-07, "loss": 0.9573, "step": 24376 }, { "epoch": 0.9537913764770326, "grad_norm": 0.0, "learning_rate": 1.1178169432246477e-07, "loss": 1.0328, "step": 24377 }, { "epoch": 0.9538305031692621, "grad_norm": 0.0, "learning_rate": 1.1159282710646591e-07, "loss": 1.0017, "step": 24378 }, { "epoch": 0.9538696298614915, "grad_norm": 0.0, "learning_rate": 1.1140411868589496e-07, "loss": 0.9076, "step": 24379 }, { "epoch": 0.953908756553721, "grad_norm": 0.0, "learning_rate": 1.1121556906378394e-07, "loss": 0.8911, "step": 24380 }, { "epoch": 0.9539478832459504, "grad_norm": 0.0, "learning_rate": 1.1102717824315823e-07, "loss": 0.9653, "step": 24381 }, { "epoch": 0.9539870099381799, "grad_norm": 0.0, "learning_rate": 1.1083894622704539e-07, "loss": 0.9962, "step": 24382 }, { "epoch": 0.9540261366304092, "grad_norm": 0.0, "learning_rate": 1.1065087301846633e-07, "loss": 1.0513, "step": 24383 }, { "epoch": 0.9540652633226387, "grad_norm": 0.0, "learning_rate": 1.1046295862044309e-07, "loss": 0.9991, "step": 24384 }, { "epoch": 0.9541043900148681, "grad_norm": 0.0, "learning_rate": 1.1027520303599215e-07, "loss": 0.8882, "step": 24385 }, { "epoch": 0.9541435167070976, "grad_norm": 0.0, "learning_rate": 1.1008760626812997e-07, "loss": 0.9502, "step": 24386 }, { "epoch": 0.954182643399327, "grad_norm": 0.0, "learning_rate": 1.0990016831986639e-07, "loss": 0.87, "step": 24387 }, { "epoch": 0.9542217700915565, "grad_norm": 0.0, "learning_rate": 1.0971288919421341e-07, "loss": 1.0394, "step": 24388 }, { "epoch": 0.9542608967837859, "grad_norm": 0.0, "learning_rate": 1.0952576889417866e-07, "loss": 1.0267, "step": 24389 }, { "epoch": 0.9543000234760154, "grad_norm": 0.0, "learning_rate": 1.0933880742276748e-07, "loss": 0.9649, "step": 24390 }, { "epoch": 0.9543391501682448, "grad_norm": 0.0, "learning_rate": 1.0915200478297972e-07, "loss": 0.9312, "step": 24391 }, { "epoch": 0.9543782768604742, "grad_norm": 0.0, "learning_rate": 1.0896536097781741e-07, "loss": 0.9712, "step": 24392 }, { "epoch": 0.9544174035527037, "grad_norm": 0.0, "learning_rate": 1.0877887601027704e-07, "loss": 0.8967, "step": 24393 }, { "epoch": 0.954456530244933, "grad_norm": 0.0, "learning_rate": 1.085925498833551e-07, "loss": 1.0131, "step": 24394 }, { "epoch": 0.9544956569371625, "grad_norm": 0.0, "learning_rate": 1.0840638260004033e-07, "loss": 0.9425, "step": 24395 }, { "epoch": 0.9545347836293919, "grad_norm": 0.0, "learning_rate": 1.0822037416332475e-07, "loss": 1.0009, "step": 24396 }, { "epoch": 0.9545739103216214, "grad_norm": 0.0, "learning_rate": 1.080345245761949e-07, "loss": 1.002, "step": 24397 }, { "epoch": 0.9546130370138508, "grad_norm": 0.0, "learning_rate": 1.0784883384163502e-07, "loss": 0.9162, "step": 24398 }, { "epoch": 0.9546521637060803, "grad_norm": 0.0, "learning_rate": 1.0766330196262831e-07, "loss": 0.9742, "step": 24399 }, { "epoch": 0.9546912903983097, "grad_norm": 0.0, "learning_rate": 1.0747792894215236e-07, "loss": 0.9219, "step": 24400 }, { "epoch": 0.9547304170905392, "grad_norm": 0.0, "learning_rate": 1.0729271478318481e-07, "loss": 0.8666, "step": 24401 }, { "epoch": 0.9547695437827686, "grad_norm": 0.0, "learning_rate": 1.0710765948869995e-07, "loss": 0.9094, "step": 24402 }, { "epoch": 0.9548086704749981, "grad_norm": 0.0, "learning_rate": 1.0692276306166982e-07, "loss": 1.0022, "step": 24403 }, { "epoch": 0.9548477971672275, "grad_norm": 0.0, "learning_rate": 1.0673802550506318e-07, "loss": 1.0214, "step": 24404 }, { "epoch": 0.954886923859457, "grad_norm": 0.0, "learning_rate": 1.0655344682184764e-07, "loss": 0.9637, "step": 24405 }, { "epoch": 0.9549260505516863, "grad_norm": 0.0, "learning_rate": 1.063690270149853e-07, "loss": 0.9537, "step": 24406 }, { "epoch": 0.9549651772439158, "grad_norm": 0.0, "learning_rate": 1.0618476608744043e-07, "loss": 0.9913, "step": 24407 }, { "epoch": 0.9550043039361452, "grad_norm": 0.0, "learning_rate": 1.0600066404216957e-07, "loss": 0.9679, "step": 24408 }, { "epoch": 0.9550434306283747, "grad_norm": 0.0, "learning_rate": 1.0581672088213036e-07, "loss": 0.8873, "step": 24409 }, { "epoch": 0.9550825573206041, "grad_norm": 0.0, "learning_rate": 1.0563293661027596e-07, "loss": 0.9474, "step": 24410 }, { "epoch": 0.9551216840128336, "grad_norm": 0.0, "learning_rate": 1.054493112295596e-07, "loss": 0.8578, "step": 24411 }, { "epoch": 0.955160810705063, "grad_norm": 0.0, "learning_rate": 1.0526584474292778e-07, "loss": 1.0257, "step": 24412 }, { "epoch": 0.9551999373972925, "grad_norm": 0.0, "learning_rate": 1.0508253715332817e-07, "loss": 1.0294, "step": 24413 }, { "epoch": 0.9552390640895219, "grad_norm": 0.0, "learning_rate": 1.0489938846370285e-07, "loss": 1.0148, "step": 24414 }, { "epoch": 0.9552781907817514, "grad_norm": 0.0, "learning_rate": 1.0471639867699612e-07, "loss": 0.9982, "step": 24415 }, { "epoch": 0.9553173174739807, "grad_norm": 0.0, "learning_rate": 1.0453356779614233e-07, "loss": 1.0839, "step": 24416 }, { "epoch": 0.9553564441662102, "grad_norm": 0.0, "learning_rate": 1.0435089582408131e-07, "loss": 0.9931, "step": 24417 }, { "epoch": 0.9553955708584396, "grad_norm": 0.0, "learning_rate": 1.041683827637452e-07, "loss": 0.8781, "step": 24418 }, { "epoch": 0.9554346975506691, "grad_norm": 0.0, "learning_rate": 1.0398602861806383e-07, "loss": 0.9516, "step": 24419 }, { "epoch": 0.9554738242428985, "grad_norm": 0.0, "learning_rate": 1.0380383338996603e-07, "loss": 0.944, "step": 24420 }, { "epoch": 0.9555129509351279, "grad_norm": 0.0, "learning_rate": 1.036217970823783e-07, "loss": 1.0328, "step": 24421 }, { "epoch": 0.9555520776273574, "grad_norm": 0.0, "learning_rate": 1.0343991969822387e-07, "loss": 0.9092, "step": 24422 }, { "epoch": 0.9555912043195868, "grad_norm": 0.0, "learning_rate": 1.0325820124042374e-07, "loss": 0.9639, "step": 24423 }, { "epoch": 0.9556303310118163, "grad_norm": 0.0, "learning_rate": 1.0307664171189446e-07, "loss": 0.9314, "step": 24424 }, { "epoch": 0.9556694577040457, "grad_norm": 0.0, "learning_rate": 1.0289524111555371e-07, "loss": 1.0127, "step": 24425 }, { "epoch": 0.9557085843962752, "grad_norm": 0.0, "learning_rate": 1.027139994543136e-07, "loss": 0.9656, "step": 24426 }, { "epoch": 0.9557477110885045, "grad_norm": 0.0, "learning_rate": 1.0253291673108512e-07, "loss": 0.9867, "step": 24427 }, { "epoch": 0.955786837780734, "grad_norm": 0.0, "learning_rate": 1.0235199294877374e-07, "loss": 0.9221, "step": 24428 }, { "epoch": 0.9558259644729634, "grad_norm": 0.0, "learning_rate": 1.0217122811028934e-07, "loss": 0.852, "step": 24429 }, { "epoch": 0.9558650911651929, "grad_norm": 0.0, "learning_rate": 1.0199062221853074e-07, "loss": 0.9476, "step": 24430 }, { "epoch": 0.9559042178574223, "grad_norm": 0.0, "learning_rate": 1.0181017527640114e-07, "loss": 0.8773, "step": 24431 }, { "epoch": 0.9559433445496518, "grad_norm": 0.0, "learning_rate": 1.016298872867949e-07, "loss": 1.0283, "step": 24432 }, { "epoch": 0.9559824712418812, "grad_norm": 0.0, "learning_rate": 1.0144975825261083e-07, "loss": 1.0731, "step": 24433 }, { "epoch": 0.9560215979341107, "grad_norm": 0.0, "learning_rate": 1.0126978817673994e-07, "loss": 0.8925, "step": 24434 }, { "epoch": 0.9560607246263401, "grad_norm": 0.0, "learning_rate": 1.0108997706207324e-07, "loss": 1.022, "step": 24435 }, { "epoch": 0.9560998513185696, "grad_norm": 0.0, "learning_rate": 1.0091032491149621e-07, "loss": 1.0272, "step": 24436 }, { "epoch": 0.956138978010799, "grad_norm": 0.0, "learning_rate": 1.0073083172789543e-07, "loss": 0.943, "step": 24437 }, { "epoch": 0.9561781047030284, "grad_norm": 0.0, "learning_rate": 1.0055149751415305e-07, "loss": 1.1159, "step": 24438 }, { "epoch": 0.9562172313952578, "grad_norm": 0.0, "learning_rate": 1.0037232227314897e-07, "loss": 0.8768, "step": 24439 }, { "epoch": 0.9562563580874873, "grad_norm": 0.0, "learning_rate": 1.0019330600776089e-07, "loss": 0.998, "step": 24440 }, { "epoch": 0.9562954847797167, "grad_norm": 0.0, "learning_rate": 1.0001444872086208e-07, "loss": 0.9794, "step": 24441 }, { "epoch": 0.9563346114719462, "grad_norm": 0.0, "learning_rate": 9.98357504153269e-08, "loss": 0.906, "step": 24442 }, { "epoch": 0.9563737381641756, "grad_norm": 0.0, "learning_rate": 9.965721109402415e-08, "loss": 0.8988, "step": 24443 }, { "epoch": 0.9564128648564051, "grad_norm": 0.0, "learning_rate": 9.947883075981934e-08, "loss": 0.9733, "step": 24444 }, { "epoch": 0.9564519915486345, "grad_norm": 0.0, "learning_rate": 9.930060941557907e-08, "loss": 0.9901, "step": 24445 }, { "epoch": 0.956491118240864, "grad_norm": 0.0, "learning_rate": 9.912254706416546e-08, "loss": 0.8566, "step": 24446 }, { "epoch": 0.9565302449330934, "grad_norm": 0.0, "learning_rate": 9.894464370843626e-08, "loss": 0.9263, "step": 24447 }, { "epoch": 0.9565693716253229, "grad_norm": 0.0, "learning_rate": 9.876689935125027e-08, "loss": 0.9413, "step": 24448 }, { "epoch": 0.9566084983175522, "grad_norm": 0.0, "learning_rate": 9.858931399546079e-08, "loss": 1.004, "step": 24449 }, { "epoch": 0.9566476250097816, "grad_norm": 0.0, "learning_rate": 9.841188764391996e-08, "loss": 0.9507, "step": 24450 }, { "epoch": 0.9566867517020111, "grad_norm": 0.0, "learning_rate": 9.82346202994755e-08, "loss": 0.9977, "step": 24451 }, { "epoch": 0.9567258783942405, "grad_norm": 0.0, "learning_rate": 9.805751196497737e-08, "loss": 0.7929, "step": 24452 }, { "epoch": 0.95676500508647, "grad_norm": 0.0, "learning_rate": 9.788056264326661e-08, "loss": 1.0478, "step": 24453 }, { "epoch": 0.9568041317786994, "grad_norm": 0.0, "learning_rate": 9.770377233718543e-08, "loss": 0.9885, "step": 24454 }, { "epoch": 0.9568432584709289, "grad_norm": 0.0, "learning_rate": 9.752714104957372e-08, "loss": 0.984, "step": 24455 }, { "epoch": 0.9568823851631583, "grad_norm": 0.0, "learning_rate": 9.735066878326816e-08, "loss": 0.957, "step": 24456 }, { "epoch": 0.9569215118553878, "grad_norm": 0.0, "learning_rate": 9.7174355541102e-08, "loss": 1.0522, "step": 24457 }, { "epoch": 0.9569606385476171, "grad_norm": 0.0, "learning_rate": 9.699820132590742e-08, "loss": 0.8881, "step": 24458 }, { "epoch": 0.9569997652398466, "grad_norm": 0.0, "learning_rate": 9.682220614051108e-08, "loss": 0.8724, "step": 24459 }, { "epoch": 0.957038891932076, "grad_norm": 0.0, "learning_rate": 9.664636998774179e-08, "loss": 1.0157, "step": 24460 }, { "epoch": 0.9570780186243055, "grad_norm": 0.0, "learning_rate": 9.647069287042176e-08, "loss": 1.0169, "step": 24461 }, { "epoch": 0.9571171453165349, "grad_norm": 0.0, "learning_rate": 9.629517479137318e-08, "loss": 0.9716, "step": 24462 }, { "epoch": 0.9571562720087644, "grad_norm": 0.0, "learning_rate": 9.611981575341267e-08, "loss": 1.0997, "step": 24463 }, { "epoch": 0.9571953987009938, "grad_norm": 0.0, "learning_rate": 9.594461575935909e-08, "loss": 0.9282, "step": 24464 }, { "epoch": 0.9572345253932233, "grad_norm": 0.0, "learning_rate": 9.576957481202353e-08, "loss": 1.0157, "step": 24465 }, { "epoch": 0.9572736520854527, "grad_norm": 0.0, "learning_rate": 9.55946929142193e-08, "loss": 0.9649, "step": 24466 }, { "epoch": 0.9573127787776822, "grad_norm": 0.0, "learning_rate": 9.541997006875192e-08, "loss": 0.9731, "step": 24467 }, { "epoch": 0.9573519054699116, "grad_norm": 0.0, "learning_rate": 9.524540627842915e-08, "loss": 0.9422, "step": 24468 }, { "epoch": 0.957391032162141, "grad_norm": 0.0, "learning_rate": 9.50710015460543e-08, "loss": 0.9245, "step": 24469 }, { "epoch": 0.9574301588543704, "grad_norm": 0.0, "learning_rate": 9.489675587442737e-08, "loss": 0.883, "step": 24470 }, { "epoch": 0.9574692855465999, "grad_norm": 0.0, "learning_rate": 9.47226692663461e-08, "loss": 0.9231, "step": 24471 }, { "epoch": 0.9575084122388293, "grad_norm": 0.0, "learning_rate": 9.454874172460715e-08, "loss": 1.0245, "step": 24472 }, { "epoch": 0.9575475389310588, "grad_norm": 0.0, "learning_rate": 9.437497325200385e-08, "loss": 0.937, "step": 24473 }, { "epoch": 0.9575866656232882, "grad_norm": 0.0, "learning_rate": 9.420136385132505e-08, "loss": 0.9988, "step": 24474 }, { "epoch": 0.9576257923155177, "grad_norm": 0.0, "learning_rate": 9.402791352536078e-08, "loss": 1.0546, "step": 24475 }, { "epoch": 0.9576649190077471, "grad_norm": 0.0, "learning_rate": 9.385462227689435e-08, "loss": 0.9271, "step": 24476 }, { "epoch": 0.9577040456999765, "grad_norm": 0.0, "learning_rate": 9.368149010870909e-08, "loss": 0.9547, "step": 24477 }, { "epoch": 0.957743172392206, "grad_norm": 0.0, "learning_rate": 9.350851702358721e-08, "loss": 0.9664, "step": 24478 }, { "epoch": 0.9577822990844354, "grad_norm": 0.0, "learning_rate": 9.333570302430428e-08, "loss": 0.8338, "step": 24479 }, { "epoch": 0.9578214257766648, "grad_norm": 0.0, "learning_rate": 9.316304811363696e-08, "loss": 0.9222, "step": 24480 }, { "epoch": 0.9578605524688942, "grad_norm": 0.0, "learning_rate": 9.299055229435527e-08, "loss": 0.8606, "step": 24481 }, { "epoch": 0.9578996791611237, "grad_norm": 0.0, "learning_rate": 9.281821556923365e-08, "loss": 0.8335, "step": 24482 }, { "epoch": 0.9579388058533531, "grad_norm": 0.0, "learning_rate": 9.264603794103544e-08, "loss": 1.0286, "step": 24483 }, { "epoch": 0.9579779325455826, "grad_norm": 0.0, "learning_rate": 9.247401941252732e-08, "loss": 0.9164, "step": 24484 }, { "epoch": 0.958017059237812, "grad_norm": 0.0, "learning_rate": 9.230215998647263e-08, "loss": 0.8877, "step": 24485 }, { "epoch": 0.9580561859300415, "grad_norm": 0.0, "learning_rate": 9.213045966562917e-08, "loss": 1.1136, "step": 24486 }, { "epoch": 0.9580953126222709, "grad_norm": 0.0, "learning_rate": 9.195891845275473e-08, "loss": 0.9679, "step": 24487 }, { "epoch": 0.9581344393145004, "grad_norm": 0.0, "learning_rate": 9.1787536350606e-08, "loss": 0.9563, "step": 24488 }, { "epoch": 0.9581735660067298, "grad_norm": 0.0, "learning_rate": 9.1616313361933e-08, "loss": 0.8988, "step": 24489 }, { "epoch": 0.9582126926989593, "grad_norm": 0.0, "learning_rate": 9.144524948948685e-08, "loss": 0.9706, "step": 24490 }, { "epoch": 0.9582518193911886, "grad_norm": 0.0, "learning_rate": 9.127434473601315e-08, "loss": 1.0472, "step": 24491 }, { "epoch": 0.9582909460834181, "grad_norm": 0.0, "learning_rate": 9.110359910425748e-08, "loss": 0.8328, "step": 24492 }, { "epoch": 0.9583300727756475, "grad_norm": 0.0, "learning_rate": 9.093301259696096e-08, "loss": 0.9216, "step": 24493 }, { "epoch": 0.958369199467877, "grad_norm": 0.0, "learning_rate": 9.076258521686366e-08, "loss": 0.94, "step": 24494 }, { "epoch": 0.9584083261601064, "grad_norm": 0.0, "learning_rate": 9.059231696670334e-08, "loss": 0.9345, "step": 24495 }, { "epoch": 0.9584474528523359, "grad_norm": 0.0, "learning_rate": 9.04222078492123e-08, "loss": 0.9412, "step": 24496 }, { "epoch": 0.9584865795445653, "grad_norm": 0.0, "learning_rate": 9.025225786712388e-08, "loss": 1.1846, "step": 24497 }, { "epoch": 0.9585257062367948, "grad_norm": 0.0, "learning_rate": 9.008246702316481e-08, "loss": 0.8776, "step": 24498 }, { "epoch": 0.9585648329290242, "grad_norm": 0.0, "learning_rate": 8.991283532006511e-08, "loss": 0.9915, "step": 24499 }, { "epoch": 0.9586039596212537, "grad_norm": 0.0, "learning_rate": 8.974336276054707e-08, "loss": 1.0645, "step": 24500 }, { "epoch": 0.958643086313483, "grad_norm": 0.0, "learning_rate": 8.957404934733183e-08, "loss": 1.0706, "step": 24501 }, { "epoch": 0.9586822130057125, "grad_norm": 0.0, "learning_rate": 8.940489508313944e-08, "loss": 0.8938, "step": 24502 }, { "epoch": 0.9587213396979419, "grad_norm": 0.0, "learning_rate": 8.923589997068549e-08, "loss": 0.8873, "step": 24503 }, { "epoch": 0.9587604663901714, "grad_norm": 0.0, "learning_rate": 8.906706401268339e-08, "loss": 0.8984, "step": 24504 }, { "epoch": 0.9587995930824008, "grad_norm": 0.0, "learning_rate": 8.889838721184652e-08, "loss": 0.9448, "step": 24505 }, { "epoch": 0.9588387197746302, "grad_norm": 0.0, "learning_rate": 8.872986957088159e-08, "loss": 0.8866, "step": 24506 }, { "epoch": 0.9588778464668597, "grad_norm": 0.0, "learning_rate": 8.856151109249533e-08, "loss": 1.0053, "step": 24507 }, { "epoch": 0.9589169731590891, "grad_norm": 0.0, "learning_rate": 8.839331177939114e-08, "loss": 0.8881, "step": 24508 }, { "epoch": 0.9589560998513186, "grad_norm": 0.0, "learning_rate": 8.822527163427131e-08, "loss": 0.9311, "step": 24509 }, { "epoch": 0.958995226543548, "grad_norm": 0.0, "learning_rate": 8.805739065983254e-08, "loss": 0.8551, "step": 24510 }, { "epoch": 0.9590343532357775, "grad_norm": 0.0, "learning_rate": 8.78896688587727e-08, "loss": 1.0584, "step": 24511 }, { "epoch": 0.9590734799280068, "grad_norm": 0.0, "learning_rate": 8.772210623378297e-08, "loss": 0.9682, "step": 24512 }, { "epoch": 0.9591126066202363, "grad_norm": 0.0, "learning_rate": 8.755470278755674e-08, "loss": 0.9666, "step": 24513 }, { "epoch": 0.9591517333124657, "grad_norm": 0.0, "learning_rate": 8.738745852278075e-08, "loss": 1.0016, "step": 24514 }, { "epoch": 0.9591908600046952, "grad_norm": 0.0, "learning_rate": 8.722037344214062e-08, "loss": 0.7907, "step": 24515 }, { "epoch": 0.9592299866969246, "grad_norm": 0.0, "learning_rate": 8.705344754831979e-08, "loss": 0.9384, "step": 24516 }, { "epoch": 0.9592691133891541, "grad_norm": 0.0, "learning_rate": 8.688668084399942e-08, "loss": 0.9679, "step": 24517 }, { "epoch": 0.9593082400813835, "grad_norm": 0.0, "learning_rate": 8.672007333185628e-08, "loss": 1.1091, "step": 24518 }, { "epoch": 0.959347366773613, "grad_norm": 0.0, "learning_rate": 8.655362501456821e-08, "loss": 1.037, "step": 24519 }, { "epoch": 0.9593864934658424, "grad_norm": 0.0, "learning_rate": 8.63873358948053e-08, "loss": 0.9652, "step": 24520 }, { "epoch": 0.9594256201580719, "grad_norm": 0.0, "learning_rate": 8.622120597523986e-08, "loss": 0.9344, "step": 24521 }, { "epoch": 0.9594647468503013, "grad_norm": 0.0, "learning_rate": 8.605523525853865e-08, "loss": 0.9097, "step": 24522 }, { "epoch": 0.9595038735425307, "grad_norm": 0.0, "learning_rate": 8.588942374736841e-08, "loss": 0.9373, "step": 24523 }, { "epoch": 0.9595430002347601, "grad_norm": 0.0, "learning_rate": 8.572377144439037e-08, "loss": 1.0536, "step": 24524 }, { "epoch": 0.9595821269269896, "grad_norm": 0.0, "learning_rate": 8.555827835226571e-08, "loss": 0.9643, "step": 24525 }, { "epoch": 0.959621253619219, "grad_norm": 0.0, "learning_rate": 8.53929444736501e-08, "loss": 0.9103, "step": 24526 }, { "epoch": 0.9596603803114485, "grad_norm": 0.0, "learning_rate": 8.522776981120251e-08, "loss": 0.8263, "step": 24527 }, { "epoch": 0.9596995070036779, "grad_norm": 0.0, "learning_rate": 8.506275436757083e-08, "loss": 0.9179, "step": 24528 }, { "epoch": 0.9597386336959074, "grad_norm": 0.0, "learning_rate": 8.48978981454096e-08, "loss": 0.8109, "step": 24529 }, { "epoch": 0.9597777603881368, "grad_norm": 0.0, "learning_rate": 8.473320114736228e-08, "loss": 0.9792, "step": 24530 }, { "epoch": 0.9598168870803663, "grad_norm": 0.0, "learning_rate": 8.45686633760745e-08, "loss": 1.0599, "step": 24531 }, { "epoch": 0.9598560137725957, "grad_norm": 0.0, "learning_rate": 8.440428483418972e-08, "loss": 0.9282, "step": 24532 }, { "epoch": 0.9598951404648252, "grad_norm": 0.0, "learning_rate": 8.424006552434805e-08, "loss": 0.9941, "step": 24533 }, { "epoch": 0.9599342671570545, "grad_norm": 0.0, "learning_rate": 8.407600544918515e-08, "loss": 0.9862, "step": 24534 }, { "epoch": 0.9599733938492839, "grad_norm": 0.0, "learning_rate": 8.39121046113367e-08, "loss": 1.0316, "step": 24535 }, { "epoch": 0.9600125205415134, "grad_norm": 0.0, "learning_rate": 8.374836301343393e-08, "loss": 0.9759, "step": 24536 }, { "epoch": 0.9600516472337428, "grad_norm": 0.0, "learning_rate": 8.358478065810693e-08, "loss": 0.8733, "step": 24537 }, { "epoch": 0.9600907739259723, "grad_norm": 0.0, "learning_rate": 8.342135754798253e-08, "loss": 0.9613, "step": 24538 }, { "epoch": 0.9601299006182017, "grad_norm": 0.0, "learning_rate": 8.325809368568417e-08, "loss": 0.9951, "step": 24539 }, { "epoch": 0.9601690273104312, "grad_norm": 0.0, "learning_rate": 8.309498907383528e-08, "loss": 0.8785, "step": 24540 }, { "epoch": 0.9602081540026606, "grad_norm": 0.0, "learning_rate": 8.293204371505381e-08, "loss": 0.9879, "step": 24541 }, { "epoch": 0.9602472806948901, "grad_norm": 0.0, "learning_rate": 8.276925761195654e-08, "loss": 1.0155, "step": 24542 }, { "epoch": 0.9602864073871195, "grad_norm": 0.0, "learning_rate": 8.260663076715913e-08, "loss": 0.9097, "step": 24543 }, { "epoch": 0.960325534079349, "grad_norm": 0.0, "learning_rate": 8.244416318327175e-08, "loss": 0.9386, "step": 24544 }, { "epoch": 0.9603646607715783, "grad_norm": 0.0, "learning_rate": 8.228185486290341e-08, "loss": 0.9563, "step": 24545 }, { "epoch": 0.9604037874638078, "grad_norm": 0.0, "learning_rate": 8.211970580866091e-08, "loss": 0.9284, "step": 24546 }, { "epoch": 0.9604429141560372, "grad_norm": 0.0, "learning_rate": 8.195771602314773e-08, "loss": 1.0311, "step": 24547 }, { "epoch": 0.9604820408482667, "grad_norm": 0.0, "learning_rate": 8.179588550896622e-08, "loss": 0.8638, "step": 24548 }, { "epoch": 0.9605211675404961, "grad_norm": 0.0, "learning_rate": 8.163421426871432e-08, "loss": 0.988, "step": 24549 }, { "epoch": 0.9605602942327256, "grad_norm": 0.0, "learning_rate": 8.147270230498883e-08, "loss": 0.8907, "step": 24550 }, { "epoch": 0.960599420924955, "grad_norm": 0.0, "learning_rate": 8.131134962038211e-08, "loss": 0.9186, "step": 24551 }, { "epoch": 0.9606385476171845, "grad_norm": 0.0, "learning_rate": 8.115015621748768e-08, "loss": 0.9105, "step": 24552 }, { "epoch": 0.9606776743094139, "grad_norm": 0.0, "learning_rate": 8.09891220988912e-08, "loss": 0.9724, "step": 24553 }, { "epoch": 0.9607168010016434, "grad_norm": 0.0, "learning_rate": 8.082824726718174e-08, "loss": 0.9421, "step": 24554 }, { "epoch": 0.9607559276938727, "grad_norm": 0.0, "learning_rate": 8.066753172494057e-08, "loss": 0.9438, "step": 24555 }, { "epoch": 0.9607950543861022, "grad_norm": 0.0, "learning_rate": 8.050697547475006e-08, "loss": 0.8972, "step": 24556 }, { "epoch": 0.9608341810783316, "grad_norm": 0.0, "learning_rate": 8.034657851918592e-08, "loss": 0.9304, "step": 24557 }, { "epoch": 0.9608733077705611, "grad_norm": 0.0, "learning_rate": 8.018634086082721e-08, "loss": 0.8464, "step": 24558 }, { "epoch": 0.9609124344627905, "grad_norm": 0.0, "learning_rate": 8.002626250224521e-08, "loss": 0.8318, "step": 24559 }, { "epoch": 0.96095156115502, "grad_norm": 0.0, "learning_rate": 7.986634344601119e-08, "loss": 0.9105, "step": 24560 }, { "epoch": 0.9609906878472494, "grad_norm": 0.0, "learning_rate": 7.970658369469198e-08, "loss": 0.9509, "step": 24561 }, { "epoch": 0.9610298145394788, "grad_norm": 0.0, "learning_rate": 7.954698325085553e-08, "loss": 0.9307, "step": 24562 }, { "epoch": 0.9610689412317083, "grad_norm": 0.0, "learning_rate": 7.938754211706312e-08, "loss": 0.9053, "step": 24563 }, { "epoch": 0.9611080679239377, "grad_norm": 0.0, "learning_rate": 7.922826029587494e-08, "loss": 1.0189, "step": 24564 }, { "epoch": 0.9611471946161672, "grad_norm": 0.0, "learning_rate": 7.906913778985004e-08, "loss": 1.0381, "step": 24565 }, { "epoch": 0.9611863213083965, "grad_norm": 0.0, "learning_rate": 7.891017460154305e-08, "loss": 0.9475, "step": 24566 }, { "epoch": 0.961225448000626, "grad_norm": 0.0, "learning_rate": 7.875137073350636e-08, "loss": 1.041, "step": 24567 }, { "epoch": 0.9612645746928554, "grad_norm": 0.0, "learning_rate": 7.859272618829239e-08, "loss": 1.0217, "step": 24568 }, { "epoch": 0.9613037013850849, "grad_norm": 0.0, "learning_rate": 7.843424096844576e-08, "loss": 1.0377, "step": 24569 }, { "epoch": 0.9613428280773143, "grad_norm": 0.0, "learning_rate": 7.827591507651222e-08, "loss": 0.9392, "step": 24570 }, { "epoch": 0.9613819547695438, "grad_norm": 0.0, "learning_rate": 7.811774851503528e-08, "loss": 0.9292, "step": 24571 }, { "epoch": 0.9614210814617732, "grad_norm": 0.0, "learning_rate": 7.795974128655514e-08, "loss": 0.9207, "step": 24572 }, { "epoch": 0.9614602081540027, "grad_norm": 0.0, "learning_rate": 7.780189339360755e-08, "loss": 1.0219, "step": 24573 }, { "epoch": 0.9614993348462321, "grad_norm": 0.0, "learning_rate": 7.764420483872937e-08, "loss": 0.9216, "step": 24574 }, { "epoch": 0.9615384615384616, "grad_norm": 0.0, "learning_rate": 7.748667562445078e-08, "loss": 0.8583, "step": 24575 }, { "epoch": 0.961577588230691, "grad_norm": 0.0, "learning_rate": 7.732930575330422e-08, "loss": 0.9688, "step": 24576 }, { "epoch": 0.9616167149229204, "grad_norm": 0.0, "learning_rate": 7.717209522781322e-08, "loss": 0.9037, "step": 24577 }, { "epoch": 0.9616558416151498, "grad_norm": 0.0, "learning_rate": 7.701504405050575e-08, "loss": 1.0445, "step": 24578 }, { "epoch": 0.9616949683073793, "grad_norm": 0.0, "learning_rate": 7.685815222390202e-08, "loss": 0.9482, "step": 24579 }, { "epoch": 0.9617340949996087, "grad_norm": 0.0, "learning_rate": 7.670141975052226e-08, "loss": 1.0012, "step": 24580 }, { "epoch": 0.9617732216918382, "grad_norm": 0.0, "learning_rate": 7.65448466328822e-08, "loss": 0.92, "step": 24581 }, { "epoch": 0.9618123483840676, "grad_norm": 0.0, "learning_rate": 7.638843287349873e-08, "loss": 0.9567, "step": 24582 }, { "epoch": 0.9618514750762971, "grad_norm": 0.0, "learning_rate": 7.623217847488096e-08, "loss": 0.8846, "step": 24583 }, { "epoch": 0.9618906017685265, "grad_norm": 0.0, "learning_rate": 7.607608343953909e-08, "loss": 0.9672, "step": 24584 }, { "epoch": 0.961929728460756, "grad_norm": 0.0, "learning_rate": 7.592014776998003e-08, "loss": 1.0517, "step": 24585 }, { "epoch": 0.9619688551529854, "grad_norm": 0.0, "learning_rate": 7.57643714687084e-08, "loss": 0.9053, "step": 24586 }, { "epoch": 0.9620079818452149, "grad_norm": 0.0, "learning_rate": 7.560875453822447e-08, "loss": 1.0745, "step": 24587 }, { "epoch": 0.9620471085374442, "grad_norm": 0.0, "learning_rate": 7.545329698102843e-08, "loss": 0.9653, "step": 24588 }, { "epoch": 0.9620862352296737, "grad_norm": 0.0, "learning_rate": 7.529799879961608e-08, "loss": 1.0138, "step": 24589 }, { "epoch": 0.9621253619219031, "grad_norm": 0.0, "learning_rate": 7.514285999648208e-08, "loss": 0.858, "step": 24590 }, { "epoch": 0.9621644886141325, "grad_norm": 0.0, "learning_rate": 7.498788057411665e-08, "loss": 0.8233, "step": 24591 }, { "epoch": 0.962203615306362, "grad_norm": 0.0, "learning_rate": 7.483306053500894e-08, "loss": 1.0699, "step": 24592 }, { "epoch": 0.9622427419985914, "grad_norm": 0.0, "learning_rate": 7.467839988164583e-08, "loss": 1.0078, "step": 24593 }, { "epoch": 0.9622818686908209, "grad_norm": 0.0, "learning_rate": 7.452389861650977e-08, "loss": 0.9777, "step": 24594 }, { "epoch": 0.9623209953830503, "grad_norm": 0.0, "learning_rate": 7.436955674208324e-08, "loss": 1.0214, "step": 24595 }, { "epoch": 0.9623601220752798, "grad_norm": 0.0, "learning_rate": 7.421537426084425e-08, "loss": 0.9292, "step": 24596 }, { "epoch": 0.9623992487675092, "grad_norm": 0.0, "learning_rate": 7.406135117526858e-08, "loss": 0.974, "step": 24597 }, { "epoch": 0.9624383754597386, "grad_norm": 0.0, "learning_rate": 7.390748748782873e-08, "loss": 1.0192, "step": 24598 }, { "epoch": 0.962477502151968, "grad_norm": 0.0, "learning_rate": 7.375378320099824e-08, "loss": 1.0253, "step": 24599 }, { "epoch": 0.9625166288441975, "grad_norm": 0.0, "learning_rate": 7.360023831724183e-08, "loss": 0.9175, "step": 24600 }, { "epoch": 0.9625557555364269, "grad_norm": 0.0, "learning_rate": 7.344685283902863e-08, "loss": 0.9059, "step": 24601 }, { "epoch": 0.9625948822286564, "grad_norm": 0.0, "learning_rate": 7.32936267688189e-08, "loss": 0.9431, "step": 24602 }, { "epoch": 0.9626340089208858, "grad_norm": 0.0, "learning_rate": 7.314056010907622e-08, "loss": 0.9373, "step": 24603 }, { "epoch": 0.9626731356131153, "grad_norm": 0.0, "learning_rate": 7.298765286225639e-08, "loss": 0.9217, "step": 24604 }, { "epoch": 0.9627122623053447, "grad_norm": 0.0, "learning_rate": 7.283490503081636e-08, "loss": 0.9302, "step": 24605 }, { "epoch": 0.9627513889975742, "grad_norm": 0.0, "learning_rate": 7.268231661720637e-08, "loss": 0.8644, "step": 24606 }, { "epoch": 0.9627905156898036, "grad_norm": 0.0, "learning_rate": 7.252988762388003e-08, "loss": 0.9348, "step": 24607 }, { "epoch": 0.9628296423820331, "grad_norm": 0.0, "learning_rate": 7.237761805328425e-08, "loss": 0.913, "step": 24608 }, { "epoch": 0.9628687690742624, "grad_norm": 0.0, "learning_rate": 7.222550790786376e-08, "loss": 0.8613, "step": 24609 }, { "epoch": 0.9629078957664919, "grad_norm": 0.0, "learning_rate": 7.207355719005993e-08, "loss": 0.9617, "step": 24610 }, { "epoch": 0.9629470224587213, "grad_norm": 0.0, "learning_rate": 7.192176590231636e-08, "loss": 0.8593, "step": 24611 }, { "epoch": 0.9629861491509508, "grad_norm": 0.0, "learning_rate": 7.177013404706779e-08, "loss": 0.9818, "step": 24612 }, { "epoch": 0.9630252758431802, "grad_norm": 0.0, "learning_rate": 7.161866162675001e-08, "loss": 0.9493, "step": 24613 }, { "epoch": 0.9630644025354097, "grad_norm": 0.0, "learning_rate": 7.146734864379556e-08, "loss": 0.9388, "step": 24614 }, { "epoch": 0.9631035292276391, "grad_norm": 0.0, "learning_rate": 7.13161951006347e-08, "loss": 0.9362, "step": 24615 }, { "epoch": 0.9631426559198686, "grad_norm": 0.0, "learning_rate": 7.116520099969437e-08, "loss": 0.9043, "step": 24616 }, { "epoch": 0.963181782612098, "grad_norm": 0.0, "learning_rate": 7.101436634339931e-08, "loss": 0.89, "step": 24617 }, { "epoch": 0.9632209093043275, "grad_norm": 0.0, "learning_rate": 7.086369113417201e-08, "loss": 0.9571, "step": 24618 }, { "epoch": 0.9632600359965569, "grad_norm": 0.0, "learning_rate": 7.071317537443167e-08, "loss": 1.1205, "step": 24619 }, { "epoch": 0.9632991626887862, "grad_norm": 0.0, "learning_rate": 7.056281906659524e-08, "loss": 1.0262, "step": 24620 }, { "epoch": 0.9633382893810157, "grad_norm": 0.0, "learning_rate": 7.041262221307854e-08, "loss": 0.9196, "step": 24621 }, { "epoch": 0.9633774160732451, "grad_norm": 0.0, "learning_rate": 7.02625848162919e-08, "loss": 1.054, "step": 24622 }, { "epoch": 0.9634165427654746, "grad_norm": 0.0, "learning_rate": 7.011270687864557e-08, "loss": 0.8467, "step": 24623 }, { "epoch": 0.963455669457704, "grad_norm": 0.0, "learning_rate": 6.996298840254656e-08, "loss": 0.9771, "step": 24624 }, { "epoch": 0.9634947961499335, "grad_norm": 0.0, "learning_rate": 6.981342939039848e-08, "loss": 1.0265, "step": 24625 }, { "epoch": 0.9635339228421629, "grad_norm": 0.0, "learning_rate": 6.966402984460385e-08, "loss": 0.8607, "step": 24626 }, { "epoch": 0.9635730495343924, "grad_norm": 0.0, "learning_rate": 6.951478976756188e-08, "loss": 0.9984, "step": 24627 }, { "epoch": 0.9636121762266218, "grad_norm": 0.0, "learning_rate": 6.93657091616684e-08, "loss": 0.9091, "step": 24628 }, { "epoch": 0.9636513029188513, "grad_norm": 0.0, "learning_rate": 6.92167880293182e-08, "loss": 0.9171, "step": 24629 }, { "epoch": 0.9636904296110806, "grad_norm": 0.0, "learning_rate": 6.906802637290266e-08, "loss": 0.9202, "step": 24630 }, { "epoch": 0.9637295563033101, "grad_norm": 0.0, "learning_rate": 6.891942419480879e-08, "loss": 0.9044, "step": 24631 }, { "epoch": 0.9637686829955395, "grad_norm": 0.0, "learning_rate": 6.877098149742579e-08, "loss": 0.9586, "step": 24632 }, { "epoch": 0.963807809687769, "grad_norm": 0.0, "learning_rate": 6.862269828313616e-08, "loss": 0.9932, "step": 24633 }, { "epoch": 0.9638469363799984, "grad_norm": 0.0, "learning_rate": 6.847457455432249e-08, "loss": 0.8306, "step": 24634 }, { "epoch": 0.9638860630722279, "grad_norm": 0.0, "learning_rate": 6.832661031336063e-08, "loss": 0.9902, "step": 24635 }, { "epoch": 0.9639251897644573, "grad_norm": 0.0, "learning_rate": 6.817880556262868e-08, "loss": 0.8657, "step": 24636 }, { "epoch": 0.9639643164566868, "grad_norm": 0.0, "learning_rate": 6.803116030449919e-08, "loss": 1.0145, "step": 24637 }, { "epoch": 0.9640034431489162, "grad_norm": 0.0, "learning_rate": 6.788367454134581e-08, "loss": 0.893, "step": 24638 }, { "epoch": 0.9640425698411457, "grad_norm": 0.0, "learning_rate": 6.773634827553333e-08, "loss": 0.9937, "step": 24639 }, { "epoch": 0.964081696533375, "grad_norm": 0.0, "learning_rate": 6.758918150942984e-08, "loss": 1.0078, "step": 24640 }, { "epoch": 0.9641208232256046, "grad_norm": 0.0, "learning_rate": 6.74421742453979e-08, "loss": 1.0476, "step": 24641 }, { "epoch": 0.9641599499178339, "grad_norm": 0.0, "learning_rate": 6.729532648579895e-08, "loss": 0.8524, "step": 24642 }, { "epoch": 0.9641990766100634, "grad_norm": 0.0, "learning_rate": 6.714863823299e-08, "loss": 0.961, "step": 24643 }, { "epoch": 0.9642382033022928, "grad_norm": 0.0, "learning_rate": 6.700210948932805e-08, "loss": 0.9755, "step": 24644 }, { "epoch": 0.9642773299945223, "grad_norm": 0.0, "learning_rate": 6.685574025716457e-08, "loss": 1.0093, "step": 24645 }, { "epoch": 0.9643164566867517, "grad_norm": 0.0, "learning_rate": 6.670953053885099e-08, "loss": 0.9266, "step": 24646 }, { "epoch": 0.9643555833789812, "grad_norm": 0.0, "learning_rate": 6.656348033673654e-08, "loss": 0.9814, "step": 24647 }, { "epoch": 0.9643947100712106, "grad_norm": 0.0, "learning_rate": 6.64175896531638e-08, "loss": 0.8839, "step": 24648 }, { "epoch": 0.96443383676344, "grad_norm": 0.0, "learning_rate": 6.627185849047868e-08, "loss": 0.8829, "step": 24649 }, { "epoch": 0.9644729634556695, "grad_norm": 0.0, "learning_rate": 6.61262868510193e-08, "loss": 0.918, "step": 24650 }, { "epoch": 0.9645120901478988, "grad_norm": 0.0, "learning_rate": 6.598087473712267e-08, "loss": 0.9783, "step": 24651 }, { "epoch": 0.9645512168401283, "grad_norm": 0.0, "learning_rate": 6.583562215112582e-08, "loss": 0.9221, "step": 24652 }, { "epoch": 0.9645903435323577, "grad_norm": 0.0, "learning_rate": 6.569052909536134e-08, "loss": 1.0015, "step": 24653 }, { "epoch": 0.9646294702245872, "grad_norm": 0.0, "learning_rate": 6.554559557215844e-08, "loss": 0.9072, "step": 24654 }, { "epoch": 0.9646685969168166, "grad_norm": 0.0, "learning_rate": 6.540082158384309e-08, "loss": 0.9428, "step": 24655 }, { "epoch": 0.9647077236090461, "grad_norm": 0.0, "learning_rate": 6.52562071327445e-08, "loss": 0.8926, "step": 24656 }, { "epoch": 0.9647468503012755, "grad_norm": 0.0, "learning_rate": 6.511175222117971e-08, "loss": 1.0342, "step": 24657 }, { "epoch": 0.964785976993505, "grad_norm": 0.0, "learning_rate": 6.496745685147355e-08, "loss": 0.8516, "step": 24658 }, { "epoch": 0.9648251036857344, "grad_norm": 0.0, "learning_rate": 6.482332102593858e-08, "loss": 0.9211, "step": 24659 }, { "epoch": 0.9648642303779639, "grad_norm": 0.0, "learning_rate": 6.467934474689296e-08, "loss": 1.0056, "step": 24660 }, { "epoch": 0.9649033570701933, "grad_norm": 0.0, "learning_rate": 6.453552801664708e-08, "loss": 1.0196, "step": 24661 }, { "epoch": 0.9649424837624228, "grad_norm": 0.0, "learning_rate": 6.43918708375102e-08, "loss": 0.8604, "step": 24662 }, { "epoch": 0.9649816104546521, "grad_norm": 0.0, "learning_rate": 6.424837321179045e-08, "loss": 0.9442, "step": 24663 }, { "epoch": 0.9650207371468816, "grad_norm": 0.0, "learning_rate": 6.410503514179157e-08, "loss": 0.9676, "step": 24664 }, { "epoch": 0.965059863839111, "grad_norm": 0.0, "learning_rate": 6.396185662981502e-08, "loss": 0.981, "step": 24665 }, { "epoch": 0.9650989905313405, "grad_norm": 0.0, "learning_rate": 6.381883767816122e-08, "loss": 0.9347, "step": 24666 }, { "epoch": 0.9651381172235699, "grad_norm": 0.0, "learning_rate": 6.367597828912609e-08, "loss": 1.034, "step": 24667 }, { "epoch": 0.9651772439157994, "grad_norm": 0.0, "learning_rate": 6.353327846500335e-08, "loss": 1.1054, "step": 24668 }, { "epoch": 0.9652163706080288, "grad_norm": 0.0, "learning_rate": 6.339073820808561e-08, "loss": 0.9209, "step": 24669 }, { "epoch": 0.9652554973002583, "grad_norm": 0.0, "learning_rate": 6.324835752065994e-08, "loss": 0.9875, "step": 24670 }, { "epoch": 0.9652946239924877, "grad_norm": 0.0, "learning_rate": 6.310613640501562e-08, "loss": 0.9606, "step": 24671 }, { "epoch": 0.9653337506847172, "grad_norm": 0.0, "learning_rate": 6.296407486343525e-08, "loss": 1.0735, "step": 24672 }, { "epoch": 0.9653728773769465, "grad_norm": 0.0, "learning_rate": 6.282217289819925e-08, "loss": 0.9712, "step": 24673 }, { "epoch": 0.965412004069176, "grad_norm": 0.0, "learning_rate": 6.26804305115869e-08, "loss": 0.952, "step": 24674 }, { "epoch": 0.9654511307614054, "grad_norm": 0.0, "learning_rate": 6.253884770587636e-08, "loss": 0.813, "step": 24675 }, { "epoch": 0.9654902574536348, "grad_norm": 0.0, "learning_rate": 6.239742448333807e-08, "loss": 0.9549, "step": 24676 }, { "epoch": 0.9655293841458643, "grad_norm": 0.0, "learning_rate": 6.225616084624463e-08, "loss": 0.9749, "step": 24677 }, { "epoch": 0.9655685108380937, "grad_norm": 0.0, "learning_rate": 6.211505679686536e-08, "loss": 0.9926, "step": 24678 }, { "epoch": 0.9656076375303232, "grad_norm": 0.0, "learning_rate": 6.197411233746509e-08, "loss": 1.0113, "step": 24679 }, { "epoch": 0.9656467642225526, "grad_norm": 0.0, "learning_rate": 6.183332747030757e-08, "loss": 0.9905, "step": 24680 }, { "epoch": 0.9656858909147821, "grad_norm": 0.0, "learning_rate": 6.169270219765322e-08, "loss": 0.9931, "step": 24681 }, { "epoch": 0.9657250176070115, "grad_norm": 0.0, "learning_rate": 6.155223652176023e-08, "loss": 0.8412, "step": 24682 }, { "epoch": 0.965764144299241, "grad_norm": 0.0, "learning_rate": 6.141193044488569e-08, "loss": 0.8922, "step": 24683 }, { "epoch": 0.9658032709914703, "grad_norm": 0.0, "learning_rate": 6.127178396928224e-08, "loss": 0.9772, "step": 24684 }, { "epoch": 0.9658423976836998, "grad_norm": 0.0, "learning_rate": 6.113179709719919e-08, "loss": 0.9875, "step": 24685 }, { "epoch": 0.9658815243759292, "grad_norm": 0.0, "learning_rate": 6.099196983088473e-08, "loss": 0.9957, "step": 24686 }, { "epoch": 0.9659206510681587, "grad_norm": 0.0, "learning_rate": 6.085230217258597e-08, "loss": 0.9912, "step": 24687 }, { "epoch": 0.9659597777603881, "grad_norm": 0.0, "learning_rate": 6.071279412454445e-08, "loss": 0.8954, "step": 24688 }, { "epoch": 0.9659989044526176, "grad_norm": 0.0, "learning_rate": 6.05734456890017e-08, "loss": 0.8686, "step": 24689 }, { "epoch": 0.966038031144847, "grad_norm": 0.0, "learning_rate": 6.043425686819371e-08, "loss": 0.9449, "step": 24690 }, { "epoch": 0.9660771578370765, "grad_norm": 0.0, "learning_rate": 6.029522766435648e-08, "loss": 1.1002, "step": 24691 }, { "epoch": 0.9661162845293059, "grad_norm": 0.0, "learning_rate": 6.015635807972265e-08, "loss": 1.0289, "step": 24692 }, { "epoch": 0.9661554112215354, "grad_norm": 0.0, "learning_rate": 6.001764811652378e-08, "loss": 0.7967, "step": 24693 }, { "epoch": 0.9661945379137648, "grad_norm": 0.0, "learning_rate": 5.987909777698475e-08, "loss": 0.9453, "step": 24694 }, { "epoch": 0.9662336646059942, "grad_norm": 0.0, "learning_rate": 5.974070706333157e-08, "loss": 1.0283, "step": 24695 }, { "epoch": 0.9662727912982236, "grad_norm": 0.0, "learning_rate": 5.96024759777869e-08, "loss": 0.9999, "step": 24696 }, { "epoch": 0.9663119179904531, "grad_norm": 0.0, "learning_rate": 5.9464404522571185e-08, "loss": 0.9878, "step": 24697 }, { "epoch": 0.9663510446826825, "grad_norm": 0.0, "learning_rate": 5.932649269990043e-08, "loss": 0.9067, "step": 24698 }, { "epoch": 0.966390171374912, "grad_norm": 0.0, "learning_rate": 5.9188740511989526e-08, "loss": 0.8684, "step": 24699 }, { "epoch": 0.9664292980671414, "grad_norm": 0.0, "learning_rate": 5.905114796105116e-08, "loss": 0.9032, "step": 24700 }, { "epoch": 0.9664684247593709, "grad_norm": 0.0, "learning_rate": 5.891371504929466e-08, "loss": 0.9631, "step": 24701 }, { "epoch": 0.9665075514516003, "grad_norm": 0.0, "learning_rate": 5.8776441778927165e-08, "loss": 0.8541, "step": 24702 }, { "epoch": 0.9665466781438298, "grad_norm": 0.0, "learning_rate": 5.8639328152153566e-08, "loss": 0.8713, "step": 24703 }, { "epoch": 0.9665858048360592, "grad_norm": 0.0, "learning_rate": 5.8502374171174324e-08, "loss": 1.0372, "step": 24704 }, { "epoch": 0.9666249315282885, "grad_norm": 0.0, "learning_rate": 5.836557983818991e-08, "loss": 1.0568, "step": 24705 }, { "epoch": 0.966664058220518, "grad_norm": 0.0, "learning_rate": 5.8228945155396344e-08, "loss": 1.0135, "step": 24706 }, { "epoch": 0.9667031849127474, "grad_norm": 0.0, "learning_rate": 5.809247012498853e-08, "loss": 0.9566, "step": 24707 }, { "epoch": 0.9667423116049769, "grad_norm": 0.0, "learning_rate": 5.795615474915694e-08, "loss": 1.1037, "step": 24708 }, { "epoch": 0.9667814382972063, "grad_norm": 0.0, "learning_rate": 5.781999903009206e-08, "loss": 0.9851, "step": 24709 }, { "epoch": 0.9668205649894358, "grad_norm": 0.0, "learning_rate": 5.768400296997989e-08, "loss": 0.9993, "step": 24710 }, { "epoch": 0.9668596916816652, "grad_norm": 0.0, "learning_rate": 5.754816657100426e-08, "loss": 0.849, "step": 24711 }, { "epoch": 0.9668988183738947, "grad_norm": 0.0, "learning_rate": 5.741248983534675e-08, "loss": 1.0215, "step": 24712 }, { "epoch": 0.9669379450661241, "grad_norm": 0.0, "learning_rate": 5.727697276518451e-08, "loss": 1.1736, "step": 24713 }, { "epoch": 0.9669770717583536, "grad_norm": 0.0, "learning_rate": 5.714161536269691e-08, "loss": 1.0106, "step": 24714 }, { "epoch": 0.967016198450583, "grad_norm": 0.0, "learning_rate": 5.700641763005443e-08, "loss": 0.9835, "step": 24715 }, { "epoch": 0.9670553251428124, "grad_norm": 0.0, "learning_rate": 5.6871379569430894e-08, "loss": 0.9855, "step": 24716 }, { "epoch": 0.9670944518350418, "grad_norm": 0.0, "learning_rate": 5.673650118299234e-08, "loss": 0.881, "step": 24717 }, { "epoch": 0.9671335785272713, "grad_norm": 0.0, "learning_rate": 5.660178247290704e-08, "loss": 0.9125, "step": 24718 }, { "epoch": 0.9671727052195007, "grad_norm": 0.0, "learning_rate": 5.646722344133659e-08, "loss": 0.9156, "step": 24719 }, { "epoch": 0.9672118319117302, "grad_norm": 0.0, "learning_rate": 5.633282409044372e-08, "loss": 0.9674, "step": 24720 }, { "epoch": 0.9672509586039596, "grad_norm": 0.0, "learning_rate": 5.619858442238446e-08, "loss": 0.8871, "step": 24721 }, { "epoch": 0.9672900852961891, "grad_norm": 0.0, "learning_rate": 5.606450443931488e-08, "loss": 0.9152, "step": 24722 }, { "epoch": 0.9673292119884185, "grad_norm": 0.0, "learning_rate": 5.593058414338992e-08, "loss": 0.8873, "step": 24723 }, { "epoch": 0.967368338680648, "grad_norm": 0.0, "learning_rate": 5.579682353676008e-08, "loss": 0.9205, "step": 24724 }, { "epoch": 0.9674074653728774, "grad_norm": 0.0, "learning_rate": 5.566322262157142e-08, "loss": 0.8906, "step": 24725 }, { "epoch": 0.9674465920651069, "grad_norm": 0.0, "learning_rate": 5.5529781399970005e-08, "loss": 0.8352, "step": 24726 }, { "epoch": 0.9674857187573362, "grad_norm": 0.0, "learning_rate": 5.539649987409967e-08, "loss": 1.014, "step": 24727 }, { "epoch": 0.9675248454495657, "grad_norm": 0.0, "learning_rate": 5.526337804610094e-08, "loss": 0.971, "step": 24728 }, { "epoch": 0.9675639721417951, "grad_norm": 0.0, "learning_rate": 5.513041591810986e-08, "loss": 1.0051, "step": 24729 }, { "epoch": 0.9676030988340246, "grad_norm": 0.0, "learning_rate": 5.4997613492263626e-08, "loss": 0.9812, "step": 24730 }, { "epoch": 0.967642225526254, "grad_norm": 0.0, "learning_rate": 5.486497077069275e-08, "loss": 1.0636, "step": 24731 }, { "epoch": 0.9676813522184835, "grad_norm": 0.0, "learning_rate": 5.473248775552997e-08, "loss": 0.9588, "step": 24732 }, { "epoch": 0.9677204789107129, "grad_norm": 0.0, "learning_rate": 5.460016444890026e-08, "loss": 1.05, "step": 24733 }, { "epoch": 0.9677596056029423, "grad_norm": 0.0, "learning_rate": 5.4468000852929695e-08, "loss": 1.0006, "step": 24734 }, { "epoch": 0.9677987322951718, "grad_norm": 0.0, "learning_rate": 5.433599696974101e-08, "loss": 0.9129, "step": 24735 }, { "epoch": 0.9678378589874012, "grad_norm": 0.0, "learning_rate": 5.420415280145364e-08, "loss": 0.939, "step": 24736 }, { "epoch": 0.9678769856796307, "grad_norm": 0.0, "learning_rate": 5.407246835018365e-08, "loss": 0.9743, "step": 24737 }, { "epoch": 0.96791611237186, "grad_norm": 0.0, "learning_rate": 5.394094361804825e-08, "loss": 1.0306, "step": 24738 }, { "epoch": 0.9679552390640895, "grad_norm": 0.0, "learning_rate": 5.380957860715796e-08, "loss": 0.9191, "step": 24739 }, { "epoch": 0.9679943657563189, "grad_norm": 0.0, "learning_rate": 5.367837331962222e-08, "loss": 0.8243, "step": 24740 }, { "epoch": 0.9680334924485484, "grad_norm": 0.0, "learning_rate": 5.354732775754823e-08, "loss": 0.8731, "step": 24741 }, { "epoch": 0.9680726191407778, "grad_norm": 0.0, "learning_rate": 5.3416441923040964e-08, "loss": 0.98, "step": 24742 }, { "epoch": 0.9681117458330073, "grad_norm": 0.0, "learning_rate": 5.328571581820208e-08, "loss": 0.9997, "step": 24743 }, { "epoch": 0.9681508725252367, "grad_norm": 0.0, "learning_rate": 5.3155149445129895e-08, "loss": 0.8943, "step": 24744 }, { "epoch": 0.9681899992174662, "grad_norm": 0.0, "learning_rate": 5.3024742805921625e-08, "loss": 0.9525, "step": 24745 }, { "epoch": 0.9682291259096956, "grad_norm": 0.0, "learning_rate": 5.289449590267337e-08, "loss": 0.8919, "step": 24746 }, { "epoch": 0.9682682526019251, "grad_norm": 0.0, "learning_rate": 5.2764408737473464e-08, "loss": 0.9094, "step": 24747 }, { "epoch": 0.9683073792941544, "grad_norm": 0.0, "learning_rate": 5.2634481312413555e-08, "loss": 0.9357, "step": 24748 }, { "epoch": 0.9683465059863839, "grad_norm": 0.0, "learning_rate": 5.250471362957865e-08, "loss": 0.9712, "step": 24749 }, { "epoch": 0.9683856326786133, "grad_norm": 0.0, "learning_rate": 5.237510569105375e-08, "loss": 0.9611, "step": 24750 }, { "epoch": 0.9684247593708428, "grad_norm": 0.0, "learning_rate": 5.22456574989183e-08, "loss": 0.9405, "step": 24751 }, { "epoch": 0.9684638860630722, "grad_norm": 0.0, "learning_rate": 5.211636905525397e-08, "loss": 0.8693, "step": 24752 }, { "epoch": 0.9685030127553017, "grad_norm": 0.0, "learning_rate": 5.198724036213465e-08, "loss": 0.9077, "step": 24753 }, { "epoch": 0.9685421394475311, "grad_norm": 0.0, "learning_rate": 5.1858271421634245e-08, "loss": 1.0311, "step": 24754 }, { "epoch": 0.9685812661397606, "grad_norm": 0.0, "learning_rate": 5.172946223582442e-08, "loss": 0.9852, "step": 24755 }, { "epoch": 0.96862039283199, "grad_norm": 0.0, "learning_rate": 5.160081280677465e-08, "loss": 0.9435, "step": 24756 }, { "epoch": 0.9686595195242195, "grad_norm": 0.0, "learning_rate": 5.147232313654882e-08, "loss": 1.0289, "step": 24757 }, { "epoch": 0.9686986462164489, "grad_norm": 0.0, "learning_rate": 5.1343993227210845e-08, "loss": 0.901, "step": 24758 }, { "epoch": 0.9687377729086784, "grad_norm": 0.0, "learning_rate": 5.12158230808224e-08, "loss": 0.841, "step": 24759 }, { "epoch": 0.9687768996009077, "grad_norm": 0.0, "learning_rate": 5.108781269944185e-08, "loss": 1.0639, "step": 24760 }, { "epoch": 0.9688160262931372, "grad_norm": 0.0, "learning_rate": 5.095996208512311e-08, "loss": 0.9027, "step": 24761 }, { "epoch": 0.9688551529853666, "grad_norm": 0.0, "learning_rate": 5.083227123992118e-08, "loss": 0.941, "step": 24762 }, { "epoch": 0.968894279677596, "grad_norm": 0.0, "learning_rate": 5.070474016588667e-08, "loss": 1.0144, "step": 24763 }, { "epoch": 0.9689334063698255, "grad_norm": 0.0, "learning_rate": 5.057736886506681e-08, "loss": 0.8707, "step": 24764 }, { "epoch": 0.9689725330620549, "grad_norm": 0.0, "learning_rate": 5.0450157339506646e-08, "loss": 1.0321, "step": 24765 }, { "epoch": 0.9690116597542844, "grad_norm": 0.0, "learning_rate": 5.032310559124898e-08, "loss": 0.8693, "step": 24766 }, { "epoch": 0.9690507864465138, "grad_norm": 0.0, "learning_rate": 5.019621362233551e-08, "loss": 0.9349, "step": 24767 }, { "epoch": 0.9690899131387433, "grad_norm": 0.0, "learning_rate": 5.0069481434802395e-08, "loss": 0.9041, "step": 24768 }, { "epoch": 0.9691290398309726, "grad_norm": 0.0, "learning_rate": 4.9942909030685774e-08, "loss": 1.0312, "step": 24769 }, { "epoch": 0.9691681665232021, "grad_norm": 0.0, "learning_rate": 4.981649641201736e-08, "loss": 1.0197, "step": 24770 }, { "epoch": 0.9692072932154315, "grad_norm": 0.0, "learning_rate": 4.969024358082775e-08, "loss": 0.8978, "step": 24771 }, { "epoch": 0.969246419907661, "grad_norm": 0.0, "learning_rate": 4.956415053914532e-08, "loss": 0.9238, "step": 24772 }, { "epoch": 0.9692855465998904, "grad_norm": 0.0, "learning_rate": 4.9438217288994005e-08, "loss": 0.9944, "step": 24773 }, { "epoch": 0.9693246732921199, "grad_norm": 0.0, "learning_rate": 4.931244383239553e-08, "loss": 0.8781, "step": 24774 }, { "epoch": 0.9693637999843493, "grad_norm": 0.0, "learning_rate": 4.918683017137049e-08, "loss": 0.9746, "step": 24775 }, { "epoch": 0.9694029266765788, "grad_norm": 0.0, "learning_rate": 4.906137630793506e-08, "loss": 0.9104, "step": 24776 }, { "epoch": 0.9694420533688082, "grad_norm": 0.0, "learning_rate": 4.89360822441054e-08, "loss": 1.0447, "step": 24777 }, { "epoch": 0.9694811800610377, "grad_norm": 0.0, "learning_rate": 4.881094798189323e-08, "loss": 0.9453, "step": 24778 }, { "epoch": 0.9695203067532671, "grad_norm": 0.0, "learning_rate": 4.8685973523308064e-08, "loss": 0.9132, "step": 24779 }, { "epoch": 0.9695594334454966, "grad_norm": 0.0, "learning_rate": 4.856115887035495e-08, "loss": 0.9016, "step": 24780 }, { "epoch": 0.9695985601377259, "grad_norm": 0.0, "learning_rate": 4.8436504025041185e-08, "loss": 0.9466, "step": 24781 }, { "epoch": 0.9696376868299554, "grad_norm": 0.0, "learning_rate": 4.831200898936628e-08, "loss": 1.0258, "step": 24782 }, { "epoch": 0.9696768135221848, "grad_norm": 0.0, "learning_rate": 4.818767376533084e-08, "loss": 0.9744, "step": 24783 }, { "epoch": 0.9697159402144143, "grad_norm": 0.0, "learning_rate": 4.806349835492996e-08, "loss": 0.9736, "step": 24784 }, { "epoch": 0.9697550669066437, "grad_norm": 0.0, "learning_rate": 4.7939482760160915e-08, "loss": 0.8615, "step": 24785 }, { "epoch": 0.9697941935988732, "grad_norm": 0.0, "learning_rate": 4.781562698301101e-08, "loss": 1.0284, "step": 24786 }, { "epoch": 0.9698333202911026, "grad_norm": 0.0, "learning_rate": 4.7691931025473095e-08, "loss": 0.9913, "step": 24787 }, { "epoch": 0.9698724469833321, "grad_norm": 0.0, "learning_rate": 4.756839488953113e-08, "loss": 0.9852, "step": 24788 }, { "epoch": 0.9699115736755615, "grad_norm": 0.0, "learning_rate": 4.74450185771691e-08, "loss": 1.0404, "step": 24789 }, { "epoch": 0.9699507003677909, "grad_norm": 0.0, "learning_rate": 4.732180209036985e-08, "loss": 0.8931, "step": 24790 }, { "epoch": 0.9699898270600203, "grad_norm": 0.0, "learning_rate": 4.719874543111069e-08, "loss": 0.9093, "step": 24791 }, { "epoch": 0.9700289537522497, "grad_norm": 0.0, "learning_rate": 4.7075848601367825e-08, "loss": 0.9077, "step": 24792 }, { "epoch": 0.9700680804444792, "grad_norm": 0.0, "learning_rate": 4.695311160311522e-08, "loss": 0.982, "step": 24793 }, { "epoch": 0.9701072071367086, "grad_norm": 0.0, "learning_rate": 4.6830534438323524e-08, "loss": 0.8763, "step": 24794 }, { "epoch": 0.9701463338289381, "grad_norm": 0.0, "learning_rate": 4.670811710896117e-08, "loss": 0.9045, "step": 24795 }, { "epoch": 0.9701854605211675, "grad_norm": 0.0, "learning_rate": 4.6585859616995464e-08, "loss": 0.8967, "step": 24796 }, { "epoch": 0.970224587213397, "grad_norm": 0.0, "learning_rate": 4.6463761964388176e-08, "loss": 1.0052, "step": 24797 }, { "epoch": 0.9702637139056264, "grad_norm": 0.0, "learning_rate": 4.634182415309996e-08, "loss": 0.8797, "step": 24798 }, { "epoch": 0.9703028405978559, "grad_norm": 0.0, "learning_rate": 4.622004618508924e-08, "loss": 0.9446, "step": 24799 }, { "epoch": 0.9703419672900853, "grad_norm": 0.0, "learning_rate": 4.609842806231224e-08, "loss": 0.9263, "step": 24800 }, { "epoch": 0.9703810939823148, "grad_norm": 0.0, "learning_rate": 4.597696978672073e-08, "loss": 0.9798, "step": 24801 }, { "epoch": 0.9704202206745441, "grad_norm": 0.0, "learning_rate": 4.5855671360267585e-08, "loss": 1.0037, "step": 24802 }, { "epoch": 0.9704593473667736, "grad_norm": 0.0, "learning_rate": 4.573453278489792e-08, "loss": 0.8477, "step": 24803 }, { "epoch": 0.970498474059003, "grad_norm": 0.0, "learning_rate": 4.561355406255796e-08, "loss": 1.0697, "step": 24804 }, { "epoch": 0.9705376007512325, "grad_norm": 0.0, "learning_rate": 4.549273519519171e-08, "loss": 0.9758, "step": 24805 }, { "epoch": 0.9705767274434619, "grad_norm": 0.0, "learning_rate": 4.53720761847376e-08, "loss": 1.0359, "step": 24806 }, { "epoch": 0.9706158541356914, "grad_norm": 0.0, "learning_rate": 4.525157703313521e-08, "loss": 0.915, "step": 24807 }, { "epoch": 0.9706549808279208, "grad_norm": 0.0, "learning_rate": 4.513123774231742e-08, "loss": 0.9809, "step": 24808 }, { "epoch": 0.9706941075201503, "grad_norm": 0.0, "learning_rate": 4.501105831421826e-08, "loss": 0.9409, "step": 24809 }, { "epoch": 0.9707332342123797, "grad_norm": 0.0, "learning_rate": 4.489103875076728e-08, "loss": 0.9937, "step": 24810 }, { "epoch": 0.9707723609046092, "grad_norm": 0.0, "learning_rate": 4.4771179053891835e-08, "loss": 0.933, "step": 24811 }, { "epoch": 0.9708114875968386, "grad_norm": 0.0, "learning_rate": 4.465147922551705e-08, "loss": 0.9627, "step": 24812 }, { "epoch": 0.970850614289068, "grad_norm": 0.0, "learning_rate": 4.453193926756361e-08, "loss": 0.9924, "step": 24813 }, { "epoch": 0.9708897409812974, "grad_norm": 0.0, "learning_rate": 4.441255918195331e-08, "loss": 0.8808, "step": 24814 }, { "epoch": 0.9709288676735269, "grad_norm": 0.0, "learning_rate": 4.4293338970601284e-08, "loss": 0.8302, "step": 24815 }, { "epoch": 0.9709679943657563, "grad_norm": 0.0, "learning_rate": 4.4174278635423787e-08, "loss": 0.9002, "step": 24816 }, { "epoch": 0.9710071210579858, "grad_norm": 0.0, "learning_rate": 4.40553781783315e-08, "loss": 0.8951, "step": 24817 }, { "epoch": 0.9710462477502152, "grad_norm": 0.0, "learning_rate": 4.393663760123512e-08, "loss": 0.994, "step": 24818 }, { "epoch": 0.9710853744424446, "grad_norm": 0.0, "learning_rate": 4.381805690604091e-08, "loss": 0.9181, "step": 24819 }, { "epoch": 0.9711245011346741, "grad_norm": 0.0, "learning_rate": 4.3699636094651773e-08, "loss": 0.9265, "step": 24820 }, { "epoch": 0.9711636278269035, "grad_norm": 0.0, "learning_rate": 4.3581375168970654e-08, "loss": 0.9022, "step": 24821 }, { "epoch": 0.971202754519133, "grad_norm": 0.0, "learning_rate": 4.346327413089713e-08, "loss": 0.946, "step": 24822 }, { "epoch": 0.9712418812113623, "grad_norm": 0.0, "learning_rate": 4.334533298232746e-08, "loss": 0.9781, "step": 24823 }, { "epoch": 0.9712810079035918, "grad_norm": 0.0, "learning_rate": 4.322755172515458e-08, "loss": 0.8765, "step": 24824 }, { "epoch": 0.9713201345958212, "grad_norm": 0.0, "learning_rate": 4.310993036127142e-08, "loss": 0.9482, "step": 24825 }, { "epoch": 0.9713592612880507, "grad_norm": 0.0, "learning_rate": 4.2992468892565365e-08, "loss": 1.0087, "step": 24826 }, { "epoch": 0.9713983879802801, "grad_norm": 0.0, "learning_rate": 4.2875167320923786e-08, "loss": 1.0706, "step": 24827 }, { "epoch": 0.9714375146725096, "grad_norm": 0.0, "learning_rate": 4.275802564823073e-08, "loss": 0.9214, "step": 24828 }, { "epoch": 0.971476641364739, "grad_norm": 0.0, "learning_rate": 4.2641043876364694e-08, "loss": 0.9458, "step": 24829 }, { "epoch": 0.9715157680569685, "grad_norm": 0.0, "learning_rate": 4.252422200720863e-08, "loss": 0.9017, "step": 24830 }, { "epoch": 0.9715548947491979, "grad_norm": 0.0, "learning_rate": 4.2407560042634355e-08, "loss": 0.994, "step": 24831 }, { "epoch": 0.9715940214414274, "grad_norm": 0.0, "learning_rate": 4.229105798451816e-08, "loss": 0.9113, "step": 24832 }, { "epoch": 0.9716331481336568, "grad_norm": 0.0, "learning_rate": 4.217471583472965e-08, "loss": 0.9156, "step": 24833 }, { "epoch": 0.9716722748258863, "grad_norm": 0.0, "learning_rate": 4.205853359513623e-08, "loss": 1.0217, "step": 24834 }, { "epoch": 0.9717114015181156, "grad_norm": 0.0, "learning_rate": 4.19425112676064e-08, "loss": 0.9496, "step": 24835 }, { "epoch": 0.9717505282103451, "grad_norm": 0.0, "learning_rate": 4.182664885400089e-08, "loss": 0.8763, "step": 24836 }, { "epoch": 0.9717896549025745, "grad_norm": 0.0, "learning_rate": 4.171094635618045e-08, "loss": 0.9692, "step": 24837 }, { "epoch": 0.971828781594804, "grad_norm": 0.0, "learning_rate": 4.1595403776004684e-08, "loss": 1.1253, "step": 24838 }, { "epoch": 0.9718679082870334, "grad_norm": 0.0, "learning_rate": 4.148002111532767e-08, "loss": 0.9952, "step": 24839 }, { "epoch": 0.9719070349792629, "grad_norm": 0.0, "learning_rate": 4.136479837600349e-08, "loss": 1.0059, "step": 24840 }, { "epoch": 0.9719461616714923, "grad_norm": 0.0, "learning_rate": 4.1249735559881764e-08, "loss": 0.8981, "step": 24841 }, { "epoch": 0.9719852883637218, "grad_norm": 0.0, "learning_rate": 4.113483266880991e-08, "loss": 1.0327, "step": 24842 }, { "epoch": 0.9720244150559512, "grad_norm": 0.0, "learning_rate": 4.1020089704633115e-08, "loss": 0.9272, "step": 24843 }, { "epoch": 0.9720635417481807, "grad_norm": 0.0, "learning_rate": 4.090550666919546e-08, "loss": 0.8524, "step": 24844 }, { "epoch": 0.97210266844041, "grad_norm": 0.0, "learning_rate": 4.0791083564334365e-08, "loss": 0.9849, "step": 24845 }, { "epoch": 0.9721417951326395, "grad_norm": 0.0, "learning_rate": 4.067682039189058e-08, "loss": 1.0291, "step": 24846 }, { "epoch": 0.9721809218248689, "grad_norm": 0.0, "learning_rate": 4.056271715369597e-08, "loss": 0.9669, "step": 24847 }, { "epoch": 0.9722200485170983, "grad_norm": 0.0, "learning_rate": 4.0448773851584635e-08, "loss": 1.0045, "step": 24848 }, { "epoch": 0.9722591752093278, "grad_norm": 0.0, "learning_rate": 4.0334990487386206e-08, "loss": 0.9182, "step": 24849 }, { "epoch": 0.9722983019015572, "grad_norm": 0.0, "learning_rate": 4.022136706292812e-08, "loss": 1.0236, "step": 24850 }, { "epoch": 0.9723374285937867, "grad_norm": 0.0, "learning_rate": 4.0107903580033355e-08, "loss": 0.9249, "step": 24851 }, { "epoch": 0.9723765552860161, "grad_norm": 0.0, "learning_rate": 3.999460004052602e-08, "loss": 0.87, "step": 24852 }, { "epoch": 0.9724156819782456, "grad_norm": 0.0, "learning_rate": 3.988145644622465e-08, "loss": 0.8744, "step": 24853 }, { "epoch": 0.972454808670475, "grad_norm": 0.0, "learning_rate": 3.976847279894669e-08, "loss": 0.9207, "step": 24854 }, { "epoch": 0.9724939353627045, "grad_norm": 0.0, "learning_rate": 3.9655649100506235e-08, "loss": 1.0703, "step": 24855 }, { "epoch": 0.9725330620549338, "grad_norm": 0.0, "learning_rate": 3.954298535271406e-08, "loss": 0.9681, "step": 24856 }, { "epoch": 0.9725721887471633, "grad_norm": 0.0, "learning_rate": 3.943048155738094e-08, "loss": 0.9852, "step": 24857 }, { "epoch": 0.9726113154393927, "grad_norm": 0.0, "learning_rate": 3.931813771631321e-08, "loss": 0.8936, "step": 24858 }, { "epoch": 0.9726504421316222, "grad_norm": 0.0, "learning_rate": 3.920595383131498e-08, "loss": 0.8725, "step": 24859 }, { "epoch": 0.9726895688238516, "grad_norm": 0.0, "learning_rate": 3.909392990418703e-08, "loss": 1.0589, "step": 24860 }, { "epoch": 0.9727286955160811, "grad_norm": 0.0, "learning_rate": 3.898206593672904e-08, "loss": 0.835, "step": 24861 }, { "epoch": 0.9727678222083105, "grad_norm": 0.0, "learning_rate": 3.887036193073734e-08, "loss": 1.0273, "step": 24862 }, { "epoch": 0.97280694890054, "grad_norm": 0.0, "learning_rate": 3.875881788800606e-08, "loss": 0.9886, "step": 24863 }, { "epoch": 0.9728460755927694, "grad_norm": 0.0, "learning_rate": 3.864743381032487e-08, "loss": 1.0786, "step": 24864 }, { "epoch": 0.9728852022849989, "grad_norm": 0.0, "learning_rate": 3.853620969948457e-08, "loss": 0.8999, "step": 24865 }, { "epoch": 0.9729243289772282, "grad_norm": 0.0, "learning_rate": 3.842514555726928e-08, "loss": 0.939, "step": 24866 }, { "epoch": 0.9729634556694577, "grad_norm": 0.0, "learning_rate": 3.8314241385465354e-08, "loss": 1.0297, "step": 24867 }, { "epoch": 0.9730025823616871, "grad_norm": 0.0, "learning_rate": 3.8203497185850256e-08, "loss": 0.9004, "step": 24868 }, { "epoch": 0.9730417090539166, "grad_norm": 0.0, "learning_rate": 3.809291296020479e-08, "loss": 1.0915, "step": 24869 }, { "epoch": 0.973080835746146, "grad_norm": 0.0, "learning_rate": 3.7982488710304186e-08, "loss": 0.993, "step": 24870 }, { "epoch": 0.9731199624383755, "grad_norm": 0.0, "learning_rate": 3.78722244379226e-08, "loss": 0.8806, "step": 24871 }, { "epoch": 0.9731590891306049, "grad_norm": 0.0, "learning_rate": 3.776212014482861e-08, "loss": 0.8769, "step": 24872 }, { "epoch": 0.9731982158228344, "grad_norm": 0.0, "learning_rate": 3.7652175832791906e-08, "loss": 0.9807, "step": 24873 }, { "epoch": 0.9732373425150638, "grad_norm": 0.0, "learning_rate": 3.7542391503577745e-08, "loss": 0.9148, "step": 24874 }, { "epoch": 0.9732764692072932, "grad_norm": 0.0, "learning_rate": 3.743276715894917e-08, "loss": 0.922, "step": 24875 }, { "epoch": 0.9733155958995227, "grad_norm": 0.0, "learning_rate": 3.7323302800666986e-08, "loss": 1.1012, "step": 24876 }, { "epoch": 0.973354722591752, "grad_norm": 0.0, "learning_rate": 3.721399843048867e-08, "loss": 0.8488, "step": 24877 }, { "epoch": 0.9733938492839815, "grad_norm": 0.0, "learning_rate": 3.710485405016839e-08, "loss": 0.8474, "step": 24878 }, { "epoch": 0.9734329759762109, "grad_norm": 0.0, "learning_rate": 3.6995869661460294e-08, "loss": 0.8397, "step": 24879 }, { "epoch": 0.9734721026684404, "grad_norm": 0.0, "learning_rate": 3.6887045266115196e-08, "loss": 1.0853, "step": 24880 }, { "epoch": 0.9735112293606698, "grad_norm": 0.0, "learning_rate": 3.6778380865878374e-08, "loss": 1.0428, "step": 24881 }, { "epoch": 0.9735503560528993, "grad_norm": 0.0, "learning_rate": 3.666987646249731e-08, "loss": 1.0081, "step": 24882 }, { "epoch": 0.9735894827451287, "grad_norm": 0.0, "learning_rate": 3.6561532057712844e-08, "loss": 1.085, "step": 24883 }, { "epoch": 0.9736286094373582, "grad_norm": 0.0, "learning_rate": 3.6453347653264695e-08, "loss": 0.8243, "step": 24884 }, { "epoch": 0.9736677361295876, "grad_norm": 0.0, "learning_rate": 3.634532325089146e-08, "loss": 0.9739, "step": 24885 }, { "epoch": 0.9737068628218171, "grad_norm": 0.0, "learning_rate": 3.623745885232621e-08, "loss": 1.0761, "step": 24886 }, { "epoch": 0.9737459895140465, "grad_norm": 0.0, "learning_rate": 3.6129754459303114e-08, "loss": 0.8801, "step": 24887 }, { "epoch": 0.973785116206276, "grad_norm": 0.0, "learning_rate": 3.602221007355078e-08, "loss": 0.9505, "step": 24888 }, { "epoch": 0.9738242428985053, "grad_norm": 0.0, "learning_rate": 3.591482569679561e-08, "loss": 0.9425, "step": 24889 }, { "epoch": 0.9738633695907348, "grad_norm": 0.0, "learning_rate": 3.5807601330762886e-08, "loss": 0.9821, "step": 24890 }, { "epoch": 0.9739024962829642, "grad_norm": 0.0, "learning_rate": 3.570053697717457e-08, "loss": 0.8313, "step": 24891 }, { "epoch": 0.9739416229751937, "grad_norm": 0.0, "learning_rate": 3.559363263774929e-08, "loss": 0.942, "step": 24892 }, { "epoch": 0.9739807496674231, "grad_norm": 0.0, "learning_rate": 3.548688831420344e-08, "loss": 0.8935, "step": 24893 }, { "epoch": 0.9740198763596526, "grad_norm": 0.0, "learning_rate": 3.538030400825343e-08, "loss": 0.9167, "step": 24894 }, { "epoch": 0.974059003051882, "grad_norm": 0.0, "learning_rate": 3.527387972160901e-08, "loss": 0.9675, "step": 24895 }, { "epoch": 0.9740981297441115, "grad_norm": 0.0, "learning_rate": 3.51676154559788e-08, "loss": 0.9069, "step": 24896 }, { "epoch": 0.9741372564363409, "grad_norm": 0.0, "learning_rate": 3.506151121307033e-08, "loss": 0.9838, "step": 24897 }, { "epoch": 0.9741763831285704, "grad_norm": 0.0, "learning_rate": 3.49555669945878e-08, "loss": 0.9749, "step": 24898 }, { "epoch": 0.9742155098207997, "grad_norm": 0.0, "learning_rate": 3.484978280223095e-08, "loss": 1.0253, "step": 24899 }, { "epoch": 0.9742546365130292, "grad_norm": 0.0, "learning_rate": 3.474415863770064e-08, "loss": 0.939, "step": 24900 }, { "epoch": 0.9742937632052586, "grad_norm": 0.0, "learning_rate": 3.4638694502691083e-08, "loss": 0.9979, "step": 24901 }, { "epoch": 0.9743328898974881, "grad_norm": 0.0, "learning_rate": 3.453339039889758e-08, "loss": 0.8939, "step": 24902 }, { "epoch": 0.9743720165897175, "grad_norm": 0.0, "learning_rate": 3.44282463280099e-08, "loss": 0.931, "step": 24903 }, { "epoch": 0.9744111432819469, "grad_norm": 0.0, "learning_rate": 3.432326229171668e-08, "loss": 1.0627, "step": 24904 }, { "epoch": 0.9744502699741764, "grad_norm": 0.0, "learning_rate": 3.4218438291704346e-08, "loss": 1.0049, "step": 24905 }, { "epoch": 0.9744893966664058, "grad_norm": 0.0, "learning_rate": 3.411377432965712e-08, "loss": 0.9355, "step": 24906 }, { "epoch": 0.9745285233586353, "grad_norm": 0.0, "learning_rate": 3.4009270407253656e-08, "loss": 0.9168, "step": 24907 }, { "epoch": 0.9745676500508647, "grad_norm": 0.0, "learning_rate": 3.390492652617372e-08, "loss": 0.9342, "step": 24908 }, { "epoch": 0.9746067767430941, "grad_norm": 0.0, "learning_rate": 3.3800742688091526e-08, "loss": 0.972, "step": 24909 }, { "epoch": 0.9746459034353235, "grad_norm": 0.0, "learning_rate": 3.36967188946824e-08, "loss": 1.0905, "step": 24910 }, { "epoch": 0.974685030127553, "grad_norm": 0.0, "learning_rate": 3.35928551476139e-08, "loss": 1.0095, "step": 24911 }, { "epoch": 0.9747241568197824, "grad_norm": 0.0, "learning_rate": 3.348915144855691e-08, "loss": 0.9077, "step": 24912 }, { "epoch": 0.9747632835120119, "grad_norm": 0.0, "learning_rate": 3.3385607799175654e-08, "loss": 0.8055, "step": 24913 }, { "epoch": 0.9748024102042413, "grad_norm": 0.0, "learning_rate": 3.328222420113103e-08, "loss": 0.876, "step": 24914 }, { "epoch": 0.9748415368964708, "grad_norm": 0.0, "learning_rate": 3.3179000656086147e-08, "loss": 1.0064, "step": 24915 }, { "epoch": 0.9748806635887002, "grad_norm": 0.0, "learning_rate": 3.307593716569746e-08, "loss": 1.0275, "step": 24916 }, { "epoch": 0.9749197902809297, "grad_norm": 0.0, "learning_rate": 3.2973033731619197e-08, "loss": 1.0579, "step": 24917 }, { "epoch": 0.9749589169731591, "grad_norm": 0.0, "learning_rate": 3.2870290355505593e-08, "loss": 0.9195, "step": 24918 }, { "epoch": 0.9749980436653886, "grad_norm": 0.0, "learning_rate": 3.276770703900423e-08, "loss": 0.977, "step": 24919 }, { "epoch": 0.9750371703576179, "grad_norm": 0.0, "learning_rate": 3.2665283783764876e-08, "loss": 1.0087, "step": 24920 }, { "epoch": 0.9750762970498474, "grad_norm": 0.0, "learning_rate": 3.2563020591431794e-08, "loss": 0.8713, "step": 24921 }, { "epoch": 0.9751154237420768, "grad_norm": 0.0, "learning_rate": 3.246091746364588e-08, "loss": 0.9636, "step": 24922 }, { "epoch": 0.9751545504343063, "grad_norm": 0.0, "learning_rate": 3.235897440204694e-08, "loss": 0.9666, "step": 24923 }, { "epoch": 0.9751936771265357, "grad_norm": 0.0, "learning_rate": 3.225719140827477e-08, "loss": 0.9637, "step": 24924 }, { "epoch": 0.9752328038187652, "grad_norm": 0.0, "learning_rate": 3.215556848396029e-08, "loss": 0.8696, "step": 24925 }, { "epoch": 0.9752719305109946, "grad_norm": 0.0, "learning_rate": 3.2054105630737745e-08, "loss": 0.9944, "step": 24926 }, { "epoch": 0.9753110572032241, "grad_norm": 0.0, "learning_rate": 3.195280285023472e-08, "loss": 0.8889, "step": 24927 }, { "epoch": 0.9753501838954535, "grad_norm": 0.0, "learning_rate": 3.185166014407881e-08, "loss": 0.8674, "step": 24928 }, { "epoch": 0.975389310587683, "grad_norm": 0.0, "learning_rate": 3.1750677513895376e-08, "loss": 0.9333, "step": 24929 }, { "epoch": 0.9754284372799124, "grad_norm": 0.0, "learning_rate": 3.164985496130535e-08, "loss": 0.9079, "step": 24930 }, { "epoch": 0.9754675639721418, "grad_norm": 0.0, "learning_rate": 3.154919248792743e-08, "loss": 0.8865, "step": 24931 }, { "epoch": 0.9755066906643712, "grad_norm": 0.0, "learning_rate": 3.14486900953781e-08, "loss": 0.8175, "step": 24932 }, { "epoch": 0.9755458173566006, "grad_norm": 0.0, "learning_rate": 3.1348347785271625e-08, "loss": 0.9655, "step": 24933 }, { "epoch": 0.9755849440488301, "grad_norm": 0.0, "learning_rate": 3.1248165559218946e-08, "loss": 0.8647, "step": 24934 }, { "epoch": 0.9756240707410595, "grad_norm": 0.0, "learning_rate": 3.1148143418828766e-08, "loss": 1.1038, "step": 24935 }, { "epoch": 0.975663197433289, "grad_norm": 0.0, "learning_rate": 3.10482813657087e-08, "loss": 0.8619, "step": 24936 }, { "epoch": 0.9757023241255184, "grad_norm": 0.0, "learning_rate": 3.0948579401461896e-08, "loss": 0.8943, "step": 24937 }, { "epoch": 0.9757414508177479, "grad_norm": 0.0, "learning_rate": 3.084903752768709e-08, "loss": 0.8374, "step": 24938 }, { "epoch": 0.9757805775099773, "grad_norm": 0.0, "learning_rate": 3.074965574598632e-08, "loss": 1.0001, "step": 24939 }, { "epoch": 0.9758197042022068, "grad_norm": 0.0, "learning_rate": 3.065043405795387e-08, "loss": 0.9639, "step": 24940 }, { "epoch": 0.9758588308944361, "grad_norm": 0.0, "learning_rate": 3.0551372465181805e-08, "loss": 1.0133, "step": 24941 }, { "epoch": 0.9758979575866656, "grad_norm": 0.0, "learning_rate": 3.0452470969263293e-08, "loss": 1.0516, "step": 24942 }, { "epoch": 0.975937084278895, "grad_norm": 0.0, "learning_rate": 3.035372957178595e-08, "loss": 0.883, "step": 24943 }, { "epoch": 0.9759762109711245, "grad_norm": 0.0, "learning_rate": 3.025514827433407e-08, "loss": 1.0369, "step": 24944 }, { "epoch": 0.9760153376633539, "grad_norm": 0.0, "learning_rate": 3.0156727078491935e-08, "loss": 0.7977, "step": 24945 }, { "epoch": 0.9760544643555834, "grad_norm": 0.0, "learning_rate": 3.00584659858405e-08, "loss": 0.9962, "step": 24946 }, { "epoch": 0.9760935910478128, "grad_norm": 0.0, "learning_rate": 2.9960364997956294e-08, "loss": 0.8554, "step": 24947 }, { "epoch": 0.9761327177400423, "grad_norm": 0.0, "learning_rate": 2.986242411641582e-08, "loss": 0.9188, "step": 24948 }, { "epoch": 0.9761718444322717, "grad_norm": 0.0, "learning_rate": 2.9764643342792276e-08, "loss": 0.9971, "step": 24949 }, { "epoch": 0.9762109711245012, "grad_norm": 0.0, "learning_rate": 2.966702267865329e-08, "loss": 0.8167, "step": 24950 }, { "epoch": 0.9762500978167306, "grad_norm": 0.0, "learning_rate": 2.9569562125569827e-08, "loss": 0.9917, "step": 24951 }, { "epoch": 0.97628922450896, "grad_norm": 0.0, "learning_rate": 2.9472261685105084e-08, "loss": 0.9572, "step": 24952 }, { "epoch": 0.9763283512011894, "grad_norm": 0.0, "learning_rate": 2.9375121358822257e-08, "loss": 1.0069, "step": 24953 }, { "epoch": 0.9763674778934189, "grad_norm": 0.0, "learning_rate": 2.9278141148281202e-08, "loss": 1.0588, "step": 24954 }, { "epoch": 0.9764066045856483, "grad_norm": 0.0, "learning_rate": 2.9181321055039567e-08, "loss": 0.9898, "step": 24955 }, { "epoch": 0.9764457312778778, "grad_norm": 0.0, "learning_rate": 2.9084661080650556e-08, "loss": 0.9224, "step": 24956 }, { "epoch": 0.9764848579701072, "grad_norm": 0.0, "learning_rate": 2.8988161226668478e-08, "loss": 1.0451, "step": 24957 }, { "epoch": 0.9765239846623367, "grad_norm": 0.0, "learning_rate": 2.8891821494642092e-08, "loss": 0.9453, "step": 24958 }, { "epoch": 0.9765631113545661, "grad_norm": 0.0, "learning_rate": 2.8795641886117943e-08, "loss": 0.9236, "step": 24959 }, { "epoch": 0.9766022380467956, "grad_norm": 0.0, "learning_rate": 2.8699622402641457e-08, "loss": 0.9047, "step": 24960 }, { "epoch": 0.976641364739025, "grad_norm": 0.0, "learning_rate": 2.860376304575474e-08, "loss": 0.9185, "step": 24961 }, { "epoch": 0.9766804914312543, "grad_norm": 0.0, "learning_rate": 2.8508063816996557e-08, "loss": 0.9091, "step": 24962 }, { "epoch": 0.9767196181234838, "grad_norm": 0.0, "learning_rate": 2.8412524717903454e-08, "loss": 0.9367, "step": 24963 }, { "epoch": 0.9767587448157132, "grad_norm": 0.0, "learning_rate": 2.8317145750010876e-08, "loss": 1.0349, "step": 24964 }, { "epoch": 0.9767978715079427, "grad_norm": 0.0, "learning_rate": 2.822192691484982e-08, "loss": 0.8641, "step": 24965 }, { "epoch": 0.9768369982001721, "grad_norm": 0.0, "learning_rate": 2.8126868213947945e-08, "loss": 1.0262, "step": 24966 }, { "epoch": 0.9768761248924016, "grad_norm": 0.0, "learning_rate": 2.8031969648834033e-08, "loss": 0.9973, "step": 24967 }, { "epoch": 0.976915251584631, "grad_norm": 0.0, "learning_rate": 2.7937231221031313e-08, "loss": 1.0684, "step": 24968 }, { "epoch": 0.9769543782768605, "grad_norm": 0.0, "learning_rate": 2.7842652932060786e-08, "loss": 0.9927, "step": 24969 }, { "epoch": 0.9769935049690899, "grad_norm": 0.0, "learning_rate": 2.774823478344124e-08, "loss": 1.0079, "step": 24970 }, { "epoch": 0.9770326316613194, "grad_norm": 0.0, "learning_rate": 2.765397677668924e-08, "loss": 0.9887, "step": 24971 }, { "epoch": 0.9770717583535488, "grad_norm": 0.0, "learning_rate": 2.7559878913318018e-08, "loss": 0.9496, "step": 24972 }, { "epoch": 0.9771108850457783, "grad_norm": 0.0, "learning_rate": 2.7465941194839697e-08, "loss": 0.9626, "step": 24973 }, { "epoch": 0.9771500117380076, "grad_norm": 0.0, "learning_rate": 2.7372163622760852e-08, "loss": 0.7837, "step": 24974 }, { "epoch": 0.9771891384302371, "grad_norm": 0.0, "learning_rate": 2.7278546198590272e-08, "loss": 0.8459, "step": 24975 }, { "epoch": 0.9772282651224665, "grad_norm": 0.0, "learning_rate": 2.7185088923828984e-08, "loss": 1.0299, "step": 24976 }, { "epoch": 0.977267391814696, "grad_norm": 0.0, "learning_rate": 2.7091791799978005e-08, "loss": 0.8895, "step": 24977 }, { "epoch": 0.9773065185069254, "grad_norm": 0.0, "learning_rate": 2.699865482853614e-08, "loss": 0.9478, "step": 24978 }, { "epoch": 0.9773456451991549, "grad_norm": 0.0, "learning_rate": 2.690567801099997e-08, "loss": 0.9321, "step": 24979 }, { "epoch": 0.9773847718913843, "grad_norm": 0.0, "learning_rate": 2.681286134886052e-08, "loss": 1.0222, "step": 24980 }, { "epoch": 0.9774238985836138, "grad_norm": 0.0, "learning_rate": 2.672020484360993e-08, "loss": 1.0127, "step": 24981 }, { "epoch": 0.9774630252758432, "grad_norm": 0.0, "learning_rate": 2.6627708496735906e-08, "loss": 1.0012, "step": 24982 }, { "epoch": 0.9775021519680727, "grad_norm": 0.0, "learning_rate": 2.6535372309722807e-08, "loss": 1.0135, "step": 24983 }, { "epoch": 0.977541278660302, "grad_norm": 0.0, "learning_rate": 2.6443196284055007e-08, "loss": 1.0175, "step": 24984 }, { "epoch": 0.9775804053525315, "grad_norm": 0.0, "learning_rate": 2.635118042121132e-08, "loss": 1.0232, "step": 24985 }, { "epoch": 0.9776195320447609, "grad_norm": 0.0, "learning_rate": 2.6259324722670565e-08, "loss": 0.9837, "step": 24986 }, { "epoch": 0.9776586587369904, "grad_norm": 0.0, "learning_rate": 2.6167629189907117e-08, "loss": 1.0153, "step": 24987 }, { "epoch": 0.9776977854292198, "grad_norm": 0.0, "learning_rate": 2.6076093824393134e-08, "loss": 0.8685, "step": 24988 }, { "epoch": 0.9777369121214492, "grad_norm": 0.0, "learning_rate": 2.598471862759966e-08, "loss": 1.0073, "step": 24989 }, { "epoch": 0.9777760388136787, "grad_norm": 0.0, "learning_rate": 2.5893503600993296e-08, "loss": 0.9479, "step": 24990 }, { "epoch": 0.9778151655059081, "grad_norm": 0.0, "learning_rate": 2.5802448746038432e-08, "loss": 1.0182, "step": 24991 }, { "epoch": 0.9778542921981376, "grad_norm": 0.0, "learning_rate": 2.571155406419834e-08, "loss": 0.8631, "step": 24992 }, { "epoch": 0.977893418890367, "grad_norm": 0.0, "learning_rate": 2.5620819556931852e-08, "loss": 1.0001, "step": 24993 }, { "epoch": 0.9779325455825965, "grad_norm": 0.0, "learning_rate": 2.5530245225696693e-08, "loss": 0.8804, "step": 24994 }, { "epoch": 0.9779716722748258, "grad_norm": 0.0, "learning_rate": 2.543983107194725e-08, "loss": 0.8802, "step": 24995 }, { "epoch": 0.9780107989670553, "grad_norm": 0.0, "learning_rate": 2.5349577097135703e-08, "loss": 0.9431, "step": 24996 }, { "epoch": 0.9780499256592847, "grad_norm": 0.0, "learning_rate": 2.5259483302709776e-08, "loss": 0.9883, "step": 24997 }, { "epoch": 0.9780890523515142, "grad_norm": 0.0, "learning_rate": 2.5169549690118312e-08, "loss": 1.0005, "step": 24998 }, { "epoch": 0.9781281790437436, "grad_norm": 0.0, "learning_rate": 2.50797762608046e-08, "loss": 0.9396, "step": 24999 }, { "epoch": 0.9781673057359731, "grad_norm": 0.0, "learning_rate": 2.499016301621082e-08, "loss": 0.8438, "step": 25000 }, { "epoch": 0.9782064324282025, "grad_norm": 0.0, "learning_rate": 2.4900709957774715e-08, "loss": 0.9647, "step": 25001 }, { "epoch": 0.978245559120432, "grad_norm": 0.0, "learning_rate": 2.481141708693513e-08, "loss": 0.9506, "step": 25002 }, { "epoch": 0.9782846858126614, "grad_norm": 0.0, "learning_rate": 2.4722284405123142e-08, "loss": 1.0718, "step": 25003 }, { "epoch": 0.9783238125048909, "grad_norm": 0.0, "learning_rate": 2.4633311913772052e-08, "loss": 0.9495, "step": 25004 }, { "epoch": 0.9783629391971203, "grad_norm": 0.0, "learning_rate": 2.45444996143096e-08, "loss": 0.9, "step": 25005 }, { "epoch": 0.9784020658893497, "grad_norm": 0.0, "learning_rate": 2.4455847508163545e-08, "loss": 0.9526, "step": 25006 }, { "epoch": 0.9784411925815791, "grad_norm": 0.0, "learning_rate": 2.436735559675607e-08, "loss": 0.9257, "step": 25007 }, { "epoch": 0.9784803192738086, "grad_norm": 0.0, "learning_rate": 2.427902388150827e-08, "loss": 0.7621, "step": 25008 }, { "epoch": 0.978519445966038, "grad_norm": 0.0, "learning_rate": 2.419085236384011e-08, "loss": 0.966, "step": 25009 }, { "epoch": 0.9785585726582675, "grad_norm": 0.0, "learning_rate": 2.4102841045166027e-08, "loss": 0.8676, "step": 25010 }, { "epoch": 0.9785976993504969, "grad_norm": 0.0, "learning_rate": 2.4014989926899323e-08, "loss": 1.0123, "step": 25011 }, { "epoch": 0.9786368260427264, "grad_norm": 0.0, "learning_rate": 2.39272990104511e-08, "loss": 0.9865, "step": 25012 }, { "epoch": 0.9786759527349558, "grad_norm": 0.0, "learning_rate": 2.3839768297230226e-08, "loss": 0.9601, "step": 25013 }, { "epoch": 0.9787150794271853, "grad_norm": 0.0, "learning_rate": 2.3752397788642245e-08, "loss": 0.9891, "step": 25014 }, { "epoch": 0.9787542061194147, "grad_norm": 0.0, "learning_rate": 2.366518748608937e-08, "loss": 0.968, "step": 25015 }, { "epoch": 0.9787933328116442, "grad_norm": 0.0, "learning_rate": 2.35781373909727e-08, "loss": 0.9463, "step": 25016 }, { "epoch": 0.9788324595038735, "grad_norm": 0.0, "learning_rate": 2.3491247504690008e-08, "loss": 0.9601, "step": 25017 }, { "epoch": 0.9788715861961029, "grad_norm": 0.0, "learning_rate": 2.3404517828636843e-08, "loss": 0.8114, "step": 25018 }, { "epoch": 0.9789107128883324, "grad_norm": 0.0, "learning_rate": 2.3317948364205424e-08, "loss": 0.8564, "step": 25019 }, { "epoch": 0.9789498395805618, "grad_norm": 0.0, "learning_rate": 2.323153911278575e-08, "loss": 0.9978, "step": 25020 }, { "epoch": 0.9789889662727913, "grad_norm": 0.0, "learning_rate": 2.3145290075766713e-08, "loss": 1.0533, "step": 25021 }, { "epoch": 0.9790280929650207, "grad_norm": 0.0, "learning_rate": 2.305920125453165e-08, "loss": 0.9923, "step": 25022 }, { "epoch": 0.9790672196572502, "grad_norm": 0.0, "learning_rate": 2.2973272650465006e-08, "loss": 0.912, "step": 25023 }, { "epoch": 0.9791063463494796, "grad_norm": 0.0, "learning_rate": 2.2887504264945682e-08, "loss": 0.9853, "step": 25024 }, { "epoch": 0.9791454730417091, "grad_norm": 0.0, "learning_rate": 2.2801896099350352e-08, "loss": 0.9438, "step": 25025 }, { "epoch": 0.9791845997339385, "grad_norm": 0.0, "learning_rate": 2.2716448155054583e-08, "loss": 0.9293, "step": 25026 }, { "epoch": 0.979223726426168, "grad_norm": 0.0, "learning_rate": 2.263116043343172e-08, "loss": 0.873, "step": 25027 }, { "epoch": 0.9792628531183973, "grad_norm": 0.0, "learning_rate": 2.254603293584845e-08, "loss": 1.0212, "step": 25028 }, { "epoch": 0.9793019798106268, "grad_norm": 0.0, "learning_rate": 2.2461065663673676e-08, "loss": 1.0181, "step": 25029 }, { "epoch": 0.9793411065028562, "grad_norm": 0.0, "learning_rate": 2.237625861827186e-08, "loss": 0.8589, "step": 25030 }, { "epoch": 0.9793802331950857, "grad_norm": 0.0, "learning_rate": 2.2291611801005253e-08, "loss": 0.9352, "step": 25031 }, { "epoch": 0.9794193598873151, "grad_norm": 0.0, "learning_rate": 2.220712521323165e-08, "loss": 0.9957, "step": 25032 }, { "epoch": 0.9794584865795446, "grad_norm": 0.0, "learning_rate": 2.2122798856309967e-08, "loss": 1.0684, "step": 25033 }, { "epoch": 0.979497613271774, "grad_norm": 0.0, "learning_rate": 2.2038632731592456e-08, "loss": 0.9655, "step": 25034 }, { "epoch": 0.9795367399640035, "grad_norm": 0.0, "learning_rate": 2.195462684043137e-08, "loss": 0.898, "step": 25035 }, { "epoch": 0.9795758666562329, "grad_norm": 0.0, "learning_rate": 2.1870781184175625e-08, "loss": 1.1117, "step": 25036 }, { "epoch": 0.9796149933484624, "grad_norm": 0.0, "learning_rate": 2.1787095764171928e-08, "loss": 0.9159, "step": 25037 }, { "epoch": 0.9796541200406917, "grad_norm": 0.0, "learning_rate": 2.1703570581763644e-08, "loss": 0.9619, "step": 25038 }, { "epoch": 0.9796932467329212, "grad_norm": 0.0, "learning_rate": 2.1620205638293033e-08, "loss": 0.9063, "step": 25039 }, { "epoch": 0.9797323734251506, "grad_norm": 0.0, "learning_rate": 2.153700093509903e-08, "loss": 0.9005, "step": 25040 }, { "epoch": 0.9797715001173801, "grad_norm": 0.0, "learning_rate": 2.1453956473516114e-08, "loss": 1.0088, "step": 25041 }, { "epoch": 0.9798106268096095, "grad_norm": 0.0, "learning_rate": 2.1371072254878778e-08, "loss": 0.9791, "step": 25042 }, { "epoch": 0.979849753501839, "grad_norm": 0.0, "learning_rate": 2.1288348280518178e-08, "loss": 0.7656, "step": 25043 }, { "epoch": 0.9798888801940684, "grad_norm": 0.0, "learning_rate": 2.1205784551763253e-08, "loss": 1.0596, "step": 25044 }, { "epoch": 0.9799280068862979, "grad_norm": 0.0, "learning_rate": 2.1123381069938497e-08, "loss": 0.9298, "step": 25045 }, { "epoch": 0.9799671335785273, "grad_norm": 0.0, "learning_rate": 2.104113783636841e-08, "loss": 0.9487, "step": 25046 }, { "epoch": 0.9800062602707567, "grad_norm": 0.0, "learning_rate": 2.095905485237415e-08, "loss": 0.8761, "step": 25047 }, { "epoch": 0.9800453869629862, "grad_norm": 0.0, "learning_rate": 2.0877132119271336e-08, "loss": 1.0067, "step": 25048 }, { "epoch": 0.9800845136552155, "grad_norm": 0.0, "learning_rate": 2.0795369638378915e-08, "loss": 0.8296, "step": 25049 }, { "epoch": 0.980123640347445, "grad_norm": 0.0, "learning_rate": 2.0713767411006946e-08, "loss": 1.017, "step": 25050 }, { "epoch": 0.9801627670396744, "grad_norm": 0.0, "learning_rate": 2.0632325438467714e-08, "loss": 0.9494, "step": 25051 }, { "epoch": 0.9802018937319039, "grad_norm": 0.0, "learning_rate": 2.0551043722067953e-08, "loss": 0.9113, "step": 25052 }, { "epoch": 0.9802410204241333, "grad_norm": 0.0, "learning_rate": 2.0469922263114396e-08, "loss": 1.0266, "step": 25053 }, { "epoch": 0.9802801471163628, "grad_norm": 0.0, "learning_rate": 2.038896106290822e-08, "loss": 0.9891, "step": 25054 }, { "epoch": 0.9803192738085922, "grad_norm": 0.0, "learning_rate": 2.030816012275061e-08, "loss": 0.937, "step": 25055 }, { "epoch": 0.9803584005008217, "grad_norm": 0.0, "learning_rate": 2.0227519443938305e-08, "loss": 0.9979, "step": 25056 }, { "epoch": 0.9803975271930511, "grad_norm": 0.0, "learning_rate": 2.0147039027766935e-08, "loss": 0.8228, "step": 25057 }, { "epoch": 0.9804366538852806, "grad_norm": 0.0, "learning_rate": 2.006671887552769e-08, "loss": 0.9192, "step": 25058 }, { "epoch": 0.98047578057751, "grad_norm": 0.0, "learning_rate": 1.998655898851287e-08, "loss": 0.9266, "step": 25059 }, { "epoch": 0.9805149072697394, "grad_norm": 0.0, "learning_rate": 1.9906559368007006e-08, "loss": 0.7606, "step": 25060 }, { "epoch": 0.9805540339619688, "grad_norm": 0.0, "learning_rate": 1.9826720015296842e-08, "loss": 0.9813, "step": 25061 }, { "epoch": 0.9805931606541983, "grad_norm": 0.0, "learning_rate": 1.9747040931663573e-08, "loss": 0.9668, "step": 25062 }, { "epoch": 0.9806322873464277, "grad_norm": 0.0, "learning_rate": 1.9667522118387295e-08, "loss": 0.953, "step": 25063 }, { "epoch": 0.9806714140386572, "grad_norm": 0.0, "learning_rate": 1.9588163576743647e-08, "loss": 0.9724, "step": 25064 }, { "epoch": 0.9807105407308866, "grad_norm": 0.0, "learning_rate": 1.9508965308008276e-08, "loss": 0.959, "step": 25065 }, { "epoch": 0.9807496674231161, "grad_norm": 0.0, "learning_rate": 1.942992731345239e-08, "loss": 0.8081, "step": 25066 }, { "epoch": 0.9807887941153455, "grad_norm": 0.0, "learning_rate": 1.9351049594346083e-08, "loss": 0.8514, "step": 25067 }, { "epoch": 0.980827920807575, "grad_norm": 0.0, "learning_rate": 1.927233215195501e-08, "loss": 0.8455, "step": 25068 }, { "epoch": 0.9808670474998044, "grad_norm": 0.0, "learning_rate": 1.919377498754482e-08, "loss": 0.9047, "step": 25069 }, { "epoch": 0.9809061741920339, "grad_norm": 0.0, "learning_rate": 1.9115378102374517e-08, "loss": 0.9184, "step": 25070 }, { "epoch": 0.9809453008842632, "grad_norm": 0.0, "learning_rate": 1.9037141497705302e-08, "loss": 0.9105, "step": 25071 }, { "epoch": 0.9809844275764927, "grad_norm": 0.0, "learning_rate": 1.8959065174792844e-08, "loss": 0.9628, "step": 25072 }, { "epoch": 0.9810235542687221, "grad_norm": 0.0, "learning_rate": 1.8881149134889477e-08, "loss": 0.8631, "step": 25073 }, { "epoch": 0.9810626809609516, "grad_norm": 0.0, "learning_rate": 1.8803393379248635e-08, "loss": 0.8385, "step": 25074 }, { "epoch": 0.981101807653181, "grad_norm": 0.0, "learning_rate": 1.8725797909119324e-08, "loss": 0.8446, "step": 25075 }, { "epoch": 0.9811409343454104, "grad_norm": 0.0, "learning_rate": 1.8648362725744995e-08, "loss": 0.9757, "step": 25076 }, { "epoch": 0.9811800610376399, "grad_norm": 0.0, "learning_rate": 1.8571087830371315e-08, "loss": 0.959, "step": 25077 }, { "epoch": 0.9812191877298693, "grad_norm": 0.0, "learning_rate": 1.8493973224238405e-08, "loss": 0.941, "step": 25078 }, { "epoch": 0.9812583144220988, "grad_norm": 0.0, "learning_rate": 1.8417018908584163e-08, "loss": 0.918, "step": 25079 }, { "epoch": 0.9812974411143282, "grad_norm": 0.0, "learning_rate": 1.8340224884646484e-08, "loss": 1.0732, "step": 25080 }, { "epoch": 0.9813365678065576, "grad_norm": 0.0, "learning_rate": 1.82635911536555e-08, "loss": 0.9844, "step": 25081 }, { "epoch": 0.981375694498787, "grad_norm": 0.0, "learning_rate": 1.8187117716844672e-08, "loss": 0.8883, "step": 25082 }, { "epoch": 0.9814148211910165, "grad_norm": 0.0, "learning_rate": 1.8110804575439677e-08, "loss": 1.0151, "step": 25083 }, { "epoch": 0.9814539478832459, "grad_norm": 0.0, "learning_rate": 1.803465173066732e-08, "loss": 0.9949, "step": 25084 }, { "epoch": 0.9814930745754754, "grad_norm": 0.0, "learning_rate": 1.795865918375106e-08, "loss": 1.0101, "step": 25085 }, { "epoch": 0.9815322012677048, "grad_norm": 0.0, "learning_rate": 1.7882826935909924e-08, "loss": 1.0247, "step": 25086 }, { "epoch": 0.9815713279599343, "grad_norm": 0.0, "learning_rate": 1.780715498836294e-08, "loss": 0.868, "step": 25087 }, { "epoch": 0.9816104546521637, "grad_norm": 0.0, "learning_rate": 1.773164334232358e-08, "loss": 0.8685, "step": 25088 }, { "epoch": 0.9816495813443932, "grad_norm": 0.0, "learning_rate": 1.7656291999006424e-08, "loss": 1.1204, "step": 25089 }, { "epoch": 0.9816887080366226, "grad_norm": 0.0, "learning_rate": 1.7581100959620513e-08, "loss": 0.8547, "step": 25090 }, { "epoch": 0.9817278347288521, "grad_norm": 0.0, "learning_rate": 1.7506070225373763e-08, "loss": 0.9001, "step": 25091 }, { "epoch": 0.9817669614210814, "grad_norm": 0.0, "learning_rate": 1.7431199797469654e-08, "loss": 1.0395, "step": 25092 }, { "epoch": 0.9818060881133109, "grad_norm": 0.0, "learning_rate": 1.7356489677112788e-08, "loss": 0.9382, "step": 25093 }, { "epoch": 0.9818452148055403, "grad_norm": 0.0, "learning_rate": 1.7281939865501084e-08, "loss": 0.9655, "step": 25094 }, { "epoch": 0.9818843414977698, "grad_norm": 0.0, "learning_rate": 1.720755036383248e-08, "loss": 0.9609, "step": 25095 }, { "epoch": 0.9819234681899992, "grad_norm": 0.0, "learning_rate": 1.7133321173301576e-08, "loss": 1.0184, "step": 25096 }, { "epoch": 0.9819625948822287, "grad_norm": 0.0, "learning_rate": 1.7059252295100748e-08, "loss": 1.0668, "step": 25097 }, { "epoch": 0.9820017215744581, "grad_norm": 0.0, "learning_rate": 1.6985343730417937e-08, "loss": 0.9811, "step": 25098 }, { "epoch": 0.9820408482666876, "grad_norm": 0.0, "learning_rate": 1.691159548044219e-08, "loss": 0.9842, "step": 25099 }, { "epoch": 0.982079974958917, "grad_norm": 0.0, "learning_rate": 1.6838007546355895e-08, "loss": 0.8777, "step": 25100 }, { "epoch": 0.9821191016511465, "grad_norm": 0.0, "learning_rate": 1.676457992934255e-08, "loss": 0.9727, "step": 25101 }, { "epoch": 0.9821582283433758, "grad_norm": 0.0, "learning_rate": 1.6691312630578992e-08, "loss": 1.022, "step": 25102 }, { "epoch": 0.9821973550356052, "grad_norm": 0.0, "learning_rate": 1.661820565124428e-08, "loss": 0.915, "step": 25103 }, { "epoch": 0.9822364817278347, "grad_norm": 0.0, "learning_rate": 1.6545258992510803e-08, "loss": 0.8058, "step": 25104 }, { "epoch": 0.9822756084200641, "grad_norm": 0.0, "learning_rate": 1.6472472655550964e-08, "loss": 1.0163, "step": 25105 }, { "epoch": 0.9823147351122936, "grad_norm": 0.0, "learning_rate": 1.6399846641532714e-08, "loss": 0.8256, "step": 25106 }, { "epoch": 0.982353861804523, "grad_norm": 0.0, "learning_rate": 1.6327380951622895e-08, "loss": 1.0329, "step": 25107 }, { "epoch": 0.9823929884967525, "grad_norm": 0.0, "learning_rate": 1.6255075586985024e-08, "loss": 0.9308, "step": 25108 }, { "epoch": 0.9824321151889819, "grad_norm": 0.0, "learning_rate": 1.6182930548780395e-08, "loss": 0.9229, "step": 25109 }, { "epoch": 0.9824712418812114, "grad_norm": 0.0, "learning_rate": 1.611094583816808e-08, "loss": 0.9373, "step": 25110 }, { "epoch": 0.9825103685734408, "grad_norm": 0.0, "learning_rate": 1.6039121456302708e-08, "loss": 0.9251, "step": 25111 }, { "epoch": 0.9825494952656703, "grad_norm": 0.0, "learning_rate": 1.5967457404337805e-08, "loss": 1.0459, "step": 25112 }, { "epoch": 0.9825886219578996, "grad_norm": 0.0, "learning_rate": 1.589595368342578e-08, "loss": 1.0492, "step": 25113 }, { "epoch": 0.9826277486501291, "grad_norm": 0.0, "learning_rate": 1.5824610294714605e-08, "loss": 0.908, "step": 25114 }, { "epoch": 0.9826668753423585, "grad_norm": 0.0, "learning_rate": 1.5753427239347807e-08, "loss": 1.0454, "step": 25115 }, { "epoch": 0.982706002034588, "grad_norm": 0.0, "learning_rate": 1.568240451847114e-08, "loss": 0.8614, "step": 25116 }, { "epoch": 0.9827451287268174, "grad_norm": 0.0, "learning_rate": 1.561154213322369e-08, "loss": 1.0404, "step": 25117 }, { "epoch": 0.9827842554190469, "grad_norm": 0.0, "learning_rate": 1.5540840084743435e-08, "loss": 0.8182, "step": 25118 }, { "epoch": 0.9828233821112763, "grad_norm": 0.0, "learning_rate": 1.547029837416614e-08, "loss": 0.8887, "step": 25119 }, { "epoch": 0.9828625088035058, "grad_norm": 0.0, "learning_rate": 1.539991700262422e-08, "loss": 1.031, "step": 25120 }, { "epoch": 0.9829016354957352, "grad_norm": 0.0, "learning_rate": 1.5329695971249003e-08, "loss": 0.8049, "step": 25121 }, { "epoch": 0.9829407621879647, "grad_norm": 0.0, "learning_rate": 1.525963528116736e-08, "loss": 0.977, "step": 25122 }, { "epoch": 0.982979888880194, "grad_norm": 0.0, "learning_rate": 1.5189734933503954e-08, "loss": 0.9941, "step": 25123 }, { "epoch": 0.9830190155724235, "grad_norm": 0.0, "learning_rate": 1.511999492938121e-08, "loss": 0.9906, "step": 25124 }, { "epoch": 0.9830581422646529, "grad_norm": 0.0, "learning_rate": 1.505041526992046e-08, "loss": 0.8321, "step": 25125 }, { "epoch": 0.9830972689568824, "grad_norm": 0.0, "learning_rate": 1.4980995956238588e-08, "loss": 1.0897, "step": 25126 }, { "epoch": 0.9831363956491118, "grad_norm": 0.0, "learning_rate": 1.4911736989449143e-08, "loss": 0.8412, "step": 25127 }, { "epoch": 0.9831755223413413, "grad_norm": 0.0, "learning_rate": 1.4842638370664575e-08, "loss": 0.9904, "step": 25128 }, { "epoch": 0.9832146490335707, "grad_norm": 0.0, "learning_rate": 1.4773700100996214e-08, "loss": 0.8316, "step": 25129 }, { "epoch": 0.9832537757258002, "grad_norm": 0.0, "learning_rate": 1.470492218154984e-08, "loss": 0.9759, "step": 25130 }, { "epoch": 0.9832929024180296, "grad_norm": 0.0, "learning_rate": 1.463630461343013e-08, "loss": 0.9777, "step": 25131 }, { "epoch": 0.983332029110259, "grad_norm": 0.0, "learning_rate": 1.4567847397739532e-08, "loss": 1.135, "step": 25132 }, { "epoch": 0.9833711558024885, "grad_norm": 0.0, "learning_rate": 1.4499550535576057e-08, "loss": 0.9434, "step": 25133 }, { "epoch": 0.9834102824947178, "grad_norm": 0.0, "learning_rate": 1.4431414028037715e-08, "loss": 0.9412, "step": 25134 }, { "epoch": 0.9834494091869473, "grad_norm": 0.0, "learning_rate": 1.4363437876218078e-08, "loss": 1.0269, "step": 25135 }, { "epoch": 0.9834885358791767, "grad_norm": 0.0, "learning_rate": 1.4295622081208493e-08, "loss": 0.9069, "step": 25136 }, { "epoch": 0.9835276625714062, "grad_norm": 0.0, "learning_rate": 1.42279666440992e-08, "loss": 1.0283, "step": 25137 }, { "epoch": 0.9835667892636356, "grad_norm": 0.0, "learning_rate": 1.4160471565974887e-08, "loss": 0.9736, "step": 25138 }, { "epoch": 0.9836059159558651, "grad_norm": 0.0, "learning_rate": 1.4093136847921352e-08, "loss": 0.9291, "step": 25139 }, { "epoch": 0.9836450426480945, "grad_norm": 0.0, "learning_rate": 1.402596249101884e-08, "loss": 0.9961, "step": 25140 }, { "epoch": 0.983684169340324, "grad_norm": 0.0, "learning_rate": 1.3958948496346492e-08, "loss": 1.0621, "step": 25141 }, { "epoch": 0.9837232960325534, "grad_norm": 0.0, "learning_rate": 1.3892094864979e-08, "loss": 1.0552, "step": 25142 }, { "epoch": 0.9837624227247829, "grad_norm": 0.0, "learning_rate": 1.3825401597992172e-08, "loss": 0.8802, "step": 25143 }, { "epoch": 0.9838015494170123, "grad_norm": 0.0, "learning_rate": 1.3758868696456263e-08, "loss": 0.961, "step": 25144 }, { "epoch": 0.9838406761092418, "grad_norm": 0.0, "learning_rate": 1.3692496161439305e-08, "loss": 0.9027, "step": 25145 }, { "epoch": 0.9838798028014711, "grad_norm": 0.0, "learning_rate": 1.3626283994007116e-08, "loss": 0.9408, "step": 25146 }, { "epoch": 0.9839189294937006, "grad_norm": 0.0, "learning_rate": 1.3560232195223288e-08, "loss": 0.8061, "step": 25147 }, { "epoch": 0.98395805618593, "grad_norm": 0.0, "learning_rate": 1.3494340766148084e-08, "loss": 0.9141, "step": 25148 }, { "epoch": 0.9839971828781595, "grad_norm": 0.0, "learning_rate": 1.3428609707839547e-08, "loss": 0.9228, "step": 25149 }, { "epoch": 0.9840363095703889, "grad_norm": 0.0, "learning_rate": 1.3363039021354607e-08, "loss": 0.9161, "step": 25150 }, { "epoch": 0.9840754362626184, "grad_norm": 0.0, "learning_rate": 1.3297628707744647e-08, "loss": 1.0183, "step": 25151 }, { "epoch": 0.9841145629548478, "grad_norm": 0.0, "learning_rate": 1.323237876806105e-08, "loss": 0.975, "step": 25152 }, { "epoch": 0.9841536896470773, "grad_norm": 0.0, "learning_rate": 1.3167289203351862e-08, "loss": 0.9297, "step": 25153 }, { "epoch": 0.9841928163393067, "grad_norm": 0.0, "learning_rate": 1.3102360014660697e-08, "loss": 1.023, "step": 25154 }, { "epoch": 0.9842319430315362, "grad_norm": 0.0, "learning_rate": 1.3037591203032273e-08, "loss": 1.0357, "step": 25155 }, { "epoch": 0.9842710697237655, "grad_norm": 0.0, "learning_rate": 1.297298276950576e-08, "loss": 0.9297, "step": 25156 }, { "epoch": 0.984310196415995, "grad_norm": 0.0, "learning_rate": 1.2908534715118104e-08, "loss": 1.0075, "step": 25157 }, { "epoch": 0.9843493231082244, "grad_norm": 0.0, "learning_rate": 1.2844247040905145e-08, "loss": 0.8972, "step": 25158 }, { "epoch": 0.9843884498004539, "grad_norm": 0.0, "learning_rate": 1.2780119747899388e-08, "loss": 0.9428, "step": 25159 }, { "epoch": 0.9844275764926833, "grad_norm": 0.0, "learning_rate": 1.2716152837128904e-08, "loss": 0.994, "step": 25160 }, { "epoch": 0.9844667031849127, "grad_norm": 0.0, "learning_rate": 1.2652346309623974e-08, "loss": 1.002, "step": 25161 }, { "epoch": 0.9845058298771422, "grad_norm": 0.0, "learning_rate": 1.258870016640601e-08, "loss": 0.9723, "step": 25162 }, { "epoch": 0.9845449565693716, "grad_norm": 0.0, "learning_rate": 1.2525214408499743e-08, "loss": 1.059, "step": 25163 }, { "epoch": 0.9845840832616011, "grad_norm": 0.0, "learning_rate": 1.2461889036923247e-08, "loss": 0.8935, "step": 25164 }, { "epoch": 0.9846232099538305, "grad_norm": 0.0, "learning_rate": 1.239872405269349e-08, "loss": 0.9281, "step": 25165 }, { "epoch": 0.98466233664606, "grad_norm": 0.0, "learning_rate": 1.2335719456825212e-08, "loss": 0.9085, "step": 25166 }, { "epoch": 0.9847014633382893, "grad_norm": 0.0, "learning_rate": 1.227287525032983e-08, "loss": 0.8549, "step": 25167 }, { "epoch": 0.9847405900305188, "grad_norm": 0.0, "learning_rate": 1.2210191434216534e-08, "loss": 0.7346, "step": 25168 }, { "epoch": 0.9847797167227482, "grad_norm": 0.0, "learning_rate": 1.2147668009492297e-08, "loss": 0.9544, "step": 25169 }, { "epoch": 0.9848188434149777, "grad_norm": 0.0, "learning_rate": 1.208530497716076e-08, "loss": 1.0396, "step": 25170 }, { "epoch": 0.9848579701072071, "grad_norm": 0.0, "learning_rate": 1.2023102338224457e-08, "loss": 0.8653, "step": 25171 }, { "epoch": 0.9848970967994366, "grad_norm": 0.0, "learning_rate": 1.1961060093681476e-08, "loss": 0.9381, "step": 25172 }, { "epoch": 0.984936223491666, "grad_norm": 0.0, "learning_rate": 1.1899178244527687e-08, "loss": 0.9143, "step": 25173 }, { "epoch": 0.9849753501838955, "grad_norm": 0.0, "learning_rate": 1.183745679175785e-08, "loss": 0.9657, "step": 25174 }, { "epoch": 0.9850144768761249, "grad_norm": 0.0, "learning_rate": 1.1775895736361176e-08, "loss": 1.0356, "step": 25175 }, { "epoch": 0.9850536035683544, "grad_norm": 0.0, "learning_rate": 1.1714495079329091e-08, "loss": 0.9308, "step": 25176 }, { "epoch": 0.9850927302605837, "grad_norm": 0.0, "learning_rate": 1.1653254821645254e-08, "loss": 0.953, "step": 25177 }, { "epoch": 0.9851318569528132, "grad_norm": 0.0, "learning_rate": 1.1592174964295545e-08, "loss": 0.9095, "step": 25178 }, { "epoch": 0.9851709836450426, "grad_norm": 0.0, "learning_rate": 1.1531255508258066e-08, "loss": 0.896, "step": 25179 }, { "epoch": 0.9852101103372721, "grad_norm": 0.0, "learning_rate": 1.1470496454512037e-08, "loss": 0.9126, "step": 25180 }, { "epoch": 0.9852492370295015, "grad_norm": 0.0, "learning_rate": 1.1409897804034452e-08, "loss": 0.9451, "step": 25181 }, { "epoch": 0.985288363721731, "grad_norm": 0.0, "learning_rate": 1.1349459557796761e-08, "loss": 0.8111, "step": 25182 }, { "epoch": 0.9853274904139604, "grad_norm": 0.0, "learning_rate": 1.1289181716771513e-08, "loss": 0.9482, "step": 25183 }, { "epoch": 0.9853666171061899, "grad_norm": 0.0, "learning_rate": 1.1229064281924606e-08, "loss": 0.8894, "step": 25184 }, { "epoch": 0.9854057437984193, "grad_norm": 0.0, "learning_rate": 1.1169107254221934e-08, "loss": 0.963, "step": 25185 }, { "epoch": 0.9854448704906488, "grad_norm": 0.0, "learning_rate": 1.1109310634627169e-08, "loss": 0.8792, "step": 25186 }, { "epoch": 0.9854839971828782, "grad_norm": 0.0, "learning_rate": 1.1049674424099543e-08, "loss": 0.9288, "step": 25187 }, { "epoch": 0.9855231238751077, "grad_norm": 0.0, "learning_rate": 1.0990198623598292e-08, "loss": 0.8907, "step": 25188 }, { "epoch": 0.985562250567337, "grad_norm": 0.0, "learning_rate": 1.0930883234077095e-08, "loss": 0.9964, "step": 25189 }, { "epoch": 0.9856013772595664, "grad_norm": 0.0, "learning_rate": 1.0871728256488524e-08, "loss": 0.9114, "step": 25190 }, { "epoch": 0.9856405039517959, "grad_norm": 0.0, "learning_rate": 1.0812733691782929e-08, "loss": 0.9035, "step": 25191 }, { "epoch": 0.9856796306440253, "grad_norm": 0.0, "learning_rate": 1.0753899540908441e-08, "loss": 0.9666, "step": 25192 }, { "epoch": 0.9857187573362548, "grad_norm": 0.0, "learning_rate": 1.0695225804808752e-08, "loss": 0.9028, "step": 25193 }, { "epoch": 0.9857578840284842, "grad_norm": 0.0, "learning_rate": 1.0636712484426436e-08, "loss": 0.9865, "step": 25194 }, { "epoch": 0.9857970107207137, "grad_norm": 0.0, "learning_rate": 1.0578359580700747e-08, "loss": 0.9758, "step": 25195 }, { "epoch": 0.9858361374129431, "grad_norm": 0.0, "learning_rate": 1.0520167094568712e-08, "loss": 1.0177, "step": 25196 }, { "epoch": 0.9858752641051726, "grad_norm": 0.0, "learning_rate": 1.0462135026966246e-08, "loss": 0.9051, "step": 25197 }, { "epoch": 0.985914390797402, "grad_norm": 0.0, "learning_rate": 1.040426337882372e-08, "loss": 0.8505, "step": 25198 }, { "epoch": 0.9859535174896314, "grad_norm": 0.0, "learning_rate": 1.0346552151071499e-08, "loss": 0.9023, "step": 25199 }, { "epoch": 0.9859926441818608, "grad_norm": 0.0, "learning_rate": 1.0289001344635507e-08, "loss": 0.8795, "step": 25200 }, { "epoch": 0.9860317708740903, "grad_norm": 0.0, "learning_rate": 1.0231610960440564e-08, "loss": 0.9352, "step": 25201 }, { "epoch": 0.9860708975663197, "grad_norm": 0.0, "learning_rate": 1.017438099940815e-08, "loss": 0.9483, "step": 25202 }, { "epoch": 0.9861100242585492, "grad_norm": 0.0, "learning_rate": 1.0117311462457535e-08, "loss": 1.0206, "step": 25203 }, { "epoch": 0.9861491509507786, "grad_norm": 0.0, "learning_rate": 1.006040235050465e-08, "loss": 0.961, "step": 25204 }, { "epoch": 0.9861882776430081, "grad_norm": 0.0, "learning_rate": 1.000365366446321e-08, "loss": 0.9384, "step": 25205 }, { "epoch": 0.9862274043352375, "grad_norm": 0.0, "learning_rate": 9.947065405245815e-09, "loss": 1.1445, "step": 25206 }, { "epoch": 0.986266531027467, "grad_norm": 0.0, "learning_rate": 9.89063757375952e-09, "loss": 0.9789, "step": 25207 }, { "epoch": 0.9863056577196964, "grad_norm": 0.0, "learning_rate": 9.834370170911378e-09, "loss": 0.9557, "step": 25208 }, { "epoch": 0.9863447844119259, "grad_norm": 0.0, "learning_rate": 9.77826319760511e-09, "loss": 0.9898, "step": 25209 }, { "epoch": 0.9863839111041552, "grad_norm": 0.0, "learning_rate": 9.722316654741103e-09, "loss": 1.0626, "step": 25210 }, { "epoch": 0.9864230377963847, "grad_norm": 0.0, "learning_rate": 9.666530543218644e-09, "loss": 1.0765, "step": 25211 }, { "epoch": 0.9864621644886141, "grad_norm": 0.0, "learning_rate": 9.610904863932568e-09, "loss": 0.9713, "step": 25212 }, { "epoch": 0.9865012911808436, "grad_norm": 0.0, "learning_rate": 9.555439617776607e-09, "loss": 0.8608, "step": 25213 }, { "epoch": 0.986540417873073, "grad_norm": 0.0, "learning_rate": 9.500134805642269e-09, "loss": 1.0038, "step": 25214 }, { "epoch": 0.9865795445653025, "grad_norm": 0.0, "learning_rate": 9.444990428416623e-09, "loss": 0.8657, "step": 25215 }, { "epoch": 0.9866186712575319, "grad_norm": 0.0, "learning_rate": 9.390006486985625e-09, "loss": 0.8609, "step": 25216 }, { "epoch": 0.9866577979497613, "grad_norm": 0.0, "learning_rate": 9.335182982231905e-09, "loss": 0.8777, "step": 25217 }, { "epoch": 0.9866969246419908, "grad_norm": 0.0, "learning_rate": 9.280519915036979e-09, "loss": 0.9535, "step": 25218 }, { "epoch": 0.9867360513342202, "grad_norm": 0.0, "learning_rate": 9.226017286276812e-09, "loss": 0.8087, "step": 25219 }, { "epoch": 0.9867751780264497, "grad_norm": 0.0, "learning_rate": 9.17167509682848e-09, "loss": 0.9134, "step": 25220 }, { "epoch": 0.986814304718679, "grad_norm": 0.0, "learning_rate": 9.11749334756351e-09, "loss": 1.0157, "step": 25221 }, { "epoch": 0.9868534314109085, "grad_norm": 0.0, "learning_rate": 9.063472039352317e-09, "loss": 1.0501, "step": 25222 }, { "epoch": 0.9868925581031379, "grad_norm": 0.0, "learning_rate": 9.009611173061983e-09, "loss": 1.0752, "step": 25223 }, { "epoch": 0.9869316847953674, "grad_norm": 0.0, "learning_rate": 8.955910749558482e-09, "loss": 1.0298, "step": 25224 }, { "epoch": 0.9869708114875968, "grad_norm": 0.0, "learning_rate": 8.90237076970335e-09, "loss": 0.849, "step": 25225 }, { "epoch": 0.9870099381798263, "grad_norm": 0.0, "learning_rate": 8.848991234357007e-09, "loss": 0.7822, "step": 25226 }, { "epoch": 0.9870490648720557, "grad_norm": 0.0, "learning_rate": 8.795772144375436e-09, "loss": 0.9424, "step": 25227 }, { "epoch": 0.9870881915642852, "grad_norm": 0.0, "learning_rate": 8.742713500614619e-09, "loss": 0.936, "step": 25228 }, { "epoch": 0.9871273182565146, "grad_norm": 0.0, "learning_rate": 8.689815303926096e-09, "loss": 0.8778, "step": 25229 }, { "epoch": 0.9871664449487441, "grad_norm": 0.0, "learning_rate": 8.63707755515919e-09, "loss": 1.0781, "step": 25230 }, { "epoch": 0.9872055716409734, "grad_norm": 0.0, "learning_rate": 8.584500255160999e-09, "loss": 0.9939, "step": 25231 }, { "epoch": 0.9872446983332029, "grad_norm": 0.0, "learning_rate": 8.532083404775294e-09, "loss": 0.876, "step": 25232 }, { "epoch": 0.9872838250254323, "grad_norm": 0.0, "learning_rate": 8.479827004844732e-09, "loss": 0.8642, "step": 25233 }, { "epoch": 0.9873229517176618, "grad_norm": 0.0, "learning_rate": 8.427731056208643e-09, "loss": 1.0251, "step": 25234 }, { "epoch": 0.9873620784098912, "grad_norm": 0.0, "learning_rate": 8.375795559701916e-09, "loss": 0.8993, "step": 25235 }, { "epoch": 0.9874012051021207, "grad_norm": 0.0, "learning_rate": 8.324020516160547e-09, "loss": 0.9094, "step": 25236 }, { "epoch": 0.9874403317943501, "grad_norm": 0.0, "learning_rate": 8.272405926414984e-09, "loss": 0.9103, "step": 25237 }, { "epoch": 0.9874794584865796, "grad_norm": 0.0, "learning_rate": 8.220951791294562e-09, "loss": 0.9036, "step": 25238 }, { "epoch": 0.987518585178809, "grad_norm": 0.0, "learning_rate": 8.169658111624179e-09, "loss": 1.0174, "step": 25239 }, { "epoch": 0.9875577118710385, "grad_norm": 0.0, "learning_rate": 8.118524888229839e-09, "loss": 0.9272, "step": 25240 }, { "epoch": 0.9875968385632679, "grad_norm": 0.0, "learning_rate": 8.067552121930888e-09, "loss": 0.998, "step": 25241 }, { "epoch": 0.9876359652554974, "grad_norm": 0.0, "learning_rate": 8.01673981354667e-09, "loss": 0.8709, "step": 25242 }, { "epoch": 0.9876750919477267, "grad_norm": 0.0, "learning_rate": 7.966087963892089e-09, "loss": 0.9101, "step": 25243 }, { "epoch": 0.9877142186399562, "grad_norm": 0.0, "learning_rate": 7.915596573782047e-09, "loss": 1.0387, "step": 25244 }, { "epoch": 0.9877533453321856, "grad_norm": 0.0, "learning_rate": 7.865265644027009e-09, "loss": 1.0246, "step": 25245 }, { "epoch": 0.987792472024415, "grad_norm": 0.0, "learning_rate": 7.815095175435217e-09, "loss": 0.8973, "step": 25246 }, { "epoch": 0.9878315987166445, "grad_norm": 0.0, "learning_rate": 7.765085168811581e-09, "loss": 0.9626, "step": 25247 }, { "epoch": 0.9878707254088739, "grad_norm": 0.0, "learning_rate": 7.715235624958794e-09, "loss": 0.905, "step": 25248 }, { "epoch": 0.9879098521011034, "grad_norm": 0.0, "learning_rate": 7.665546544679547e-09, "loss": 0.9594, "step": 25249 }, { "epoch": 0.9879489787933328, "grad_norm": 0.0, "learning_rate": 7.61601792877098e-09, "loss": 0.8748, "step": 25250 }, { "epoch": 0.9879881054855623, "grad_norm": 0.0, "learning_rate": 7.566649778026902e-09, "loss": 1.0295, "step": 25251 }, { "epoch": 0.9880272321777916, "grad_norm": 0.0, "learning_rate": 7.517442093241123e-09, "loss": 0.9415, "step": 25252 }, { "epoch": 0.9880663588700211, "grad_norm": 0.0, "learning_rate": 7.468394875204121e-09, "loss": 0.9737, "step": 25253 }, { "epoch": 0.9881054855622505, "grad_norm": 0.0, "learning_rate": 7.419508124704156e-09, "loss": 0.807, "step": 25254 }, { "epoch": 0.98814461225448, "grad_norm": 0.0, "learning_rate": 7.370781842525044e-09, "loss": 0.9513, "step": 25255 }, { "epoch": 0.9881837389467094, "grad_norm": 0.0, "learning_rate": 7.322216029449491e-09, "loss": 1.0239, "step": 25256 }, { "epoch": 0.9882228656389389, "grad_norm": 0.0, "learning_rate": 7.273810686257987e-09, "loss": 1.0386, "step": 25257 }, { "epoch": 0.9882619923311683, "grad_norm": 0.0, "learning_rate": 7.225565813726576e-09, "loss": 0.9669, "step": 25258 }, { "epoch": 0.9883011190233978, "grad_norm": 0.0, "learning_rate": 7.177481412632414e-09, "loss": 0.9381, "step": 25259 }, { "epoch": 0.9883402457156272, "grad_norm": 0.0, "learning_rate": 7.129557483744886e-09, "loss": 0.9245, "step": 25260 }, { "epoch": 0.9883793724078567, "grad_norm": 0.0, "learning_rate": 7.081794027835598e-09, "loss": 1.0129, "step": 25261 }, { "epoch": 0.9884184991000861, "grad_norm": 0.0, "learning_rate": 7.034191045670602e-09, "loss": 0.945, "step": 25262 }, { "epoch": 0.9884576257923156, "grad_norm": 0.0, "learning_rate": 6.986748538014843e-09, "loss": 0.9533, "step": 25263 }, { "epoch": 0.9884967524845449, "grad_norm": 0.0, "learning_rate": 6.939466505629933e-09, "loss": 0.866, "step": 25264 }, { "epoch": 0.9885358791767744, "grad_norm": 0.0, "learning_rate": 6.8923449492741545e-09, "loss": 1.0103, "step": 25265 }, { "epoch": 0.9885750058690038, "grad_norm": 0.0, "learning_rate": 6.845383869706901e-09, "loss": 1.03, "step": 25266 }, { "epoch": 0.9886141325612333, "grad_norm": 0.0, "learning_rate": 6.7985832676797925e-09, "loss": 0.9434, "step": 25267 }, { "epoch": 0.9886532592534627, "grad_norm": 0.0, "learning_rate": 6.751943143945561e-09, "loss": 1.0506, "step": 25268 }, { "epoch": 0.9886923859456922, "grad_norm": 0.0, "learning_rate": 6.705463499252496e-09, "loss": 0.9866, "step": 25269 }, { "epoch": 0.9887315126379216, "grad_norm": 0.0, "learning_rate": 6.659144334347778e-09, "loss": 0.9744, "step": 25270 }, { "epoch": 0.9887706393301511, "grad_norm": 0.0, "learning_rate": 6.612985649974146e-09, "loss": 1.1151, "step": 25271 }, { "epoch": 0.9888097660223805, "grad_norm": 0.0, "learning_rate": 6.56698744687434e-09, "loss": 1.0247, "step": 25272 }, { "epoch": 0.98884889271461, "grad_norm": 0.0, "learning_rate": 6.521149725785547e-09, "loss": 0.9201, "step": 25273 }, { "epoch": 0.9888880194068393, "grad_norm": 0.0, "learning_rate": 6.475472487444956e-09, "loss": 0.9477, "step": 25274 }, { "epoch": 0.9889271460990687, "grad_norm": 0.0, "learning_rate": 6.429955732585313e-09, "loss": 1.1053, "step": 25275 }, { "epoch": 0.9889662727912982, "grad_norm": 0.0, "learning_rate": 6.384599461938256e-09, "loss": 1.043, "step": 25276 }, { "epoch": 0.9890053994835276, "grad_norm": 0.0, "learning_rate": 6.339403676232092e-09, "loss": 0.9462, "step": 25277 }, { "epoch": 0.9890445261757571, "grad_norm": 0.0, "learning_rate": 6.294368376192906e-09, "loss": 0.9736, "step": 25278 }, { "epoch": 0.9890836528679865, "grad_norm": 0.0, "learning_rate": 6.2494935625423415e-09, "loss": 1.0001, "step": 25279 }, { "epoch": 0.989122779560216, "grad_norm": 0.0, "learning_rate": 6.204779236002045e-09, "loss": 0.9451, "step": 25280 }, { "epoch": 0.9891619062524454, "grad_norm": 0.0, "learning_rate": 6.160225397290332e-09, "loss": 0.9874, "step": 25281 }, { "epoch": 0.9892010329446749, "grad_norm": 0.0, "learning_rate": 6.1158320471221834e-09, "loss": 0.9353, "step": 25282 }, { "epoch": 0.9892401596369043, "grad_norm": 0.0, "learning_rate": 6.071599186211474e-09, "loss": 0.9409, "step": 25283 }, { "epoch": 0.9892792863291338, "grad_norm": 0.0, "learning_rate": 6.0275268152665264e-09, "loss": 1.0053, "step": 25284 }, { "epoch": 0.9893184130213631, "grad_norm": 0.0, "learning_rate": 5.983614934997883e-09, "loss": 0.9123, "step": 25285 }, { "epoch": 0.9893575397135926, "grad_norm": 0.0, "learning_rate": 5.9398635461083155e-09, "loss": 0.8828, "step": 25286 }, { "epoch": 0.989396666405822, "grad_norm": 0.0, "learning_rate": 5.896272649300594e-09, "loss": 0.9272, "step": 25287 }, { "epoch": 0.9894357930980515, "grad_norm": 0.0, "learning_rate": 5.85284224527638e-09, "loss": 1.0089, "step": 25288 }, { "epoch": 0.9894749197902809, "grad_norm": 0.0, "learning_rate": 5.809572334731783e-09, "loss": 0.9034, "step": 25289 }, { "epoch": 0.9895140464825104, "grad_norm": 0.0, "learning_rate": 5.766462918362914e-09, "loss": 0.9064, "step": 25290 }, { "epoch": 0.9895531731747398, "grad_norm": 0.0, "learning_rate": 5.72351399686033e-09, "loss": 0.9739, "step": 25291 }, { "epoch": 0.9895922998669693, "grad_norm": 0.0, "learning_rate": 5.680725570914592e-09, "loss": 0.9735, "step": 25292 }, { "epoch": 0.9896314265591987, "grad_norm": 0.0, "learning_rate": 5.638097641212925e-09, "loss": 0.8776, "step": 25293 }, { "epoch": 0.9896705532514282, "grad_norm": 0.0, "learning_rate": 5.595630208439229e-09, "loss": 1.0125, "step": 25294 }, { "epoch": 0.9897096799436575, "grad_norm": 0.0, "learning_rate": 5.5533232732774e-09, "loss": 1.0084, "step": 25295 }, { "epoch": 0.989748806635887, "grad_norm": 0.0, "learning_rate": 5.511176836404674e-09, "loss": 0.7971, "step": 25296 }, { "epoch": 0.9897879333281164, "grad_norm": 0.0, "learning_rate": 5.469190898498289e-09, "loss": 0.9065, "step": 25297 }, { "epoch": 0.9898270600203459, "grad_norm": 0.0, "learning_rate": 5.427365460233258e-09, "loss": 1.0209, "step": 25298 }, { "epoch": 0.9898661867125753, "grad_norm": 0.0, "learning_rate": 5.385700522280158e-09, "loss": 0.8334, "step": 25299 }, { "epoch": 0.9899053134048048, "grad_norm": 0.0, "learning_rate": 5.344196085309561e-09, "loss": 0.823, "step": 25300 }, { "epoch": 0.9899444400970342, "grad_norm": 0.0, "learning_rate": 5.302852149987603e-09, "loss": 0.93, "step": 25301 }, { "epoch": 0.9899835667892636, "grad_norm": 0.0, "learning_rate": 5.261668716975976e-09, "loss": 0.9764, "step": 25302 }, { "epoch": 0.9900226934814931, "grad_norm": 0.0, "learning_rate": 5.2206457869397045e-09, "loss": 0.9151, "step": 25303 }, { "epoch": 0.9900618201737225, "grad_norm": 0.0, "learning_rate": 5.179783360534929e-09, "loss": 0.9906, "step": 25304 }, { "epoch": 0.990100946865952, "grad_norm": 0.0, "learning_rate": 5.139081438418902e-09, "loss": 0.8925, "step": 25305 }, { "epoch": 0.9901400735581813, "grad_norm": 0.0, "learning_rate": 5.0985400212444355e-09, "loss": 0.9497, "step": 25306 }, { "epoch": 0.9901792002504108, "grad_norm": 0.0, "learning_rate": 5.05815910966323e-09, "loss": 0.9435, "step": 25307 }, { "epoch": 0.9902183269426402, "grad_norm": 0.0, "learning_rate": 5.017938704323655e-09, "loss": 0.9777, "step": 25308 }, { "epoch": 0.9902574536348697, "grad_norm": 0.0, "learning_rate": 4.977878805871861e-09, "loss": 1.0546, "step": 25309 }, { "epoch": 0.9902965803270991, "grad_norm": 0.0, "learning_rate": 4.937979414950667e-09, "loss": 0.9198, "step": 25310 }, { "epoch": 0.9903357070193286, "grad_norm": 0.0, "learning_rate": 4.898240532201781e-09, "loss": 0.8762, "step": 25311 }, { "epoch": 0.990374833711558, "grad_norm": 0.0, "learning_rate": 4.858662158261363e-09, "loss": 0.8662, "step": 25312 }, { "epoch": 0.9904139604037875, "grad_norm": 0.0, "learning_rate": 4.819244293766678e-09, "loss": 0.9675, "step": 25313 }, { "epoch": 0.9904530870960169, "grad_norm": 0.0, "learning_rate": 4.779986939350556e-09, "loss": 0.9151, "step": 25314 }, { "epoch": 0.9904922137882464, "grad_norm": 0.0, "learning_rate": 4.740890095642492e-09, "loss": 0.9321, "step": 25315 }, { "epoch": 0.9905313404804758, "grad_norm": 0.0, "learning_rate": 4.701953763271983e-09, "loss": 0.905, "step": 25316 }, { "epoch": 0.9905704671727052, "grad_norm": 0.0, "learning_rate": 4.663177942862973e-09, "loss": 0.9695, "step": 25317 }, { "epoch": 0.9906095938649346, "grad_norm": 0.0, "learning_rate": 4.624562635038299e-09, "loss": 0.9002, "step": 25318 }, { "epoch": 0.9906487205571641, "grad_norm": 0.0, "learning_rate": 4.586107840417464e-09, "loss": 0.847, "step": 25319 }, { "epoch": 0.9906878472493935, "grad_norm": 0.0, "learning_rate": 4.547813559619974e-09, "loss": 0.9629, "step": 25320 }, { "epoch": 0.990726973941623, "grad_norm": 0.0, "learning_rate": 4.50967979325867e-09, "loss": 0.9675, "step": 25321 }, { "epoch": 0.9907661006338524, "grad_norm": 0.0, "learning_rate": 4.471706541947507e-09, "loss": 0.9732, "step": 25322 }, { "epoch": 0.9908052273260819, "grad_norm": 0.0, "learning_rate": 4.433893806294887e-09, "loss": 1.0008, "step": 25323 }, { "epoch": 0.9908443540183113, "grad_norm": 0.0, "learning_rate": 4.396241586909211e-09, "loss": 0.9811, "step": 25324 }, { "epoch": 0.9908834807105408, "grad_norm": 0.0, "learning_rate": 4.358749884395552e-09, "loss": 0.816, "step": 25325 }, { "epoch": 0.9909226074027702, "grad_norm": 0.0, "learning_rate": 4.321418699354541e-09, "loss": 0.8692, "step": 25326 }, { "epoch": 0.9909617340949997, "grad_norm": 0.0, "learning_rate": 4.284248032385696e-09, "loss": 1.025, "step": 25327 }, { "epoch": 0.991000860787229, "grad_norm": 0.0, "learning_rate": 4.247237884086319e-09, "loss": 0.9405, "step": 25328 }, { "epoch": 0.9910399874794585, "grad_norm": 0.0, "learning_rate": 4.210388255051489e-09, "loss": 1.0406, "step": 25329 }, { "epoch": 0.9910791141716879, "grad_norm": 0.0, "learning_rate": 4.173699145872956e-09, "loss": 0.9977, "step": 25330 }, { "epoch": 0.9911182408639173, "grad_norm": 0.0, "learning_rate": 4.137170557139136e-09, "loss": 0.8588, "step": 25331 }, { "epoch": 0.9911573675561468, "grad_norm": 0.0, "learning_rate": 4.100802489436228e-09, "loss": 1.0609, "step": 25332 }, { "epoch": 0.9911964942483762, "grad_norm": 0.0, "learning_rate": 4.06459494334932e-09, "loss": 0.8818, "step": 25333 }, { "epoch": 0.9912356209406057, "grad_norm": 0.0, "learning_rate": 4.028547919459058e-09, "loss": 1.04, "step": 25334 }, { "epoch": 0.9912747476328351, "grad_norm": 0.0, "learning_rate": 3.9926614183449785e-09, "loss": 0.9131, "step": 25335 }, { "epoch": 0.9913138743250646, "grad_norm": 0.0, "learning_rate": 3.956935440582177e-09, "loss": 0.835, "step": 25336 }, { "epoch": 0.991353001017294, "grad_norm": 0.0, "learning_rate": 3.921369986746859e-09, "loss": 0.9862, "step": 25337 }, { "epoch": 0.9913921277095235, "grad_norm": 0.0, "learning_rate": 3.885965057407459e-09, "loss": 0.8332, "step": 25338 }, { "epoch": 0.9914312544017528, "grad_norm": 0.0, "learning_rate": 3.850720653133522e-09, "loss": 1.0427, "step": 25339 }, { "epoch": 0.9914703810939823, "grad_norm": 0.0, "learning_rate": 3.8156367744912604e-09, "loss": 0.9232, "step": 25340 }, { "epoch": 0.9915095077862117, "grad_norm": 0.0, "learning_rate": 3.780713422044668e-09, "loss": 1.0403, "step": 25341 }, { "epoch": 0.9915486344784412, "grad_norm": 0.0, "learning_rate": 3.745950596353298e-09, "loss": 1.018, "step": 25342 }, { "epoch": 0.9915877611706706, "grad_norm": 0.0, "learning_rate": 3.711348297975592e-09, "loss": 0.982, "step": 25343 }, { "epoch": 0.9916268878629001, "grad_norm": 0.0, "learning_rate": 3.6769065274677718e-09, "loss": 1.0253, "step": 25344 }, { "epoch": 0.9916660145551295, "grad_norm": 0.0, "learning_rate": 3.6426252853838385e-09, "loss": 0.8203, "step": 25345 }, { "epoch": 0.991705141247359, "grad_norm": 0.0, "learning_rate": 3.6085045722722424e-09, "loss": 0.9079, "step": 25346 }, { "epoch": 0.9917442679395884, "grad_norm": 0.0, "learning_rate": 3.5745443886814334e-09, "loss": 0.985, "step": 25347 }, { "epoch": 0.9917833946318179, "grad_norm": 0.0, "learning_rate": 3.540744735157642e-09, "loss": 0.8976, "step": 25348 }, { "epoch": 0.9918225213240472, "grad_norm": 0.0, "learning_rate": 3.5071056122437663e-09, "loss": 0.8921, "step": 25349 }, { "epoch": 0.9918616480162767, "grad_norm": 0.0, "learning_rate": 3.4736270204793752e-09, "loss": 0.9816, "step": 25350 }, { "epoch": 0.9919007747085061, "grad_norm": 0.0, "learning_rate": 3.4403089604007066e-09, "loss": 0.961, "step": 25351 }, { "epoch": 0.9919399014007356, "grad_norm": 0.0, "learning_rate": 3.4071514325462184e-09, "loss": 0.8623, "step": 25352 }, { "epoch": 0.991979028092965, "grad_norm": 0.0, "learning_rate": 3.374154437445487e-09, "loss": 0.9294, "step": 25353 }, { "epoch": 0.9920181547851945, "grad_norm": 0.0, "learning_rate": 3.341317975629199e-09, "loss": 0.9157, "step": 25354 }, { "epoch": 0.9920572814774239, "grad_norm": 0.0, "learning_rate": 3.308642047625821e-09, "loss": 1.0764, "step": 25355 }, { "epoch": 0.9920964081696534, "grad_norm": 0.0, "learning_rate": 3.276126653958267e-09, "loss": 0.8987, "step": 25356 }, { "epoch": 0.9921355348618828, "grad_norm": 0.0, "learning_rate": 3.243771795149453e-09, "loss": 0.9694, "step": 25357 }, { "epoch": 0.9921746615541123, "grad_norm": 0.0, "learning_rate": 3.211577471718963e-09, "loss": 1.0568, "step": 25358 }, { "epoch": 0.9922137882463417, "grad_norm": 0.0, "learning_rate": 3.179543684184161e-09, "loss": 0.9232, "step": 25359 }, { "epoch": 0.992252914938571, "grad_norm": 0.0, "learning_rate": 3.14767043305908e-09, "loss": 0.9827, "step": 25360 }, { "epoch": 0.9922920416308005, "grad_norm": 0.0, "learning_rate": 3.1159577188555334e-09, "loss": 0.94, "step": 25361 }, { "epoch": 0.9923311683230299, "grad_norm": 0.0, "learning_rate": 3.084405542083113e-09, "loss": 0.9204, "step": 25362 }, { "epoch": 0.9923702950152594, "grad_norm": 0.0, "learning_rate": 3.053013903249191e-09, "loss": 1.0146, "step": 25363 }, { "epoch": 0.9924094217074888, "grad_norm": 0.0, "learning_rate": 3.0217828028555883e-09, "loss": 0.8834, "step": 25364 }, { "epoch": 0.9924485483997183, "grad_norm": 0.0, "learning_rate": 2.990712241405236e-09, "loss": 0.825, "step": 25365 }, { "epoch": 0.9924876750919477, "grad_norm": 0.0, "learning_rate": 2.959802219397734e-09, "loss": 0.8803, "step": 25366 }, { "epoch": 0.9925268017841772, "grad_norm": 0.0, "learning_rate": 2.929052737329352e-09, "loss": 0.806, "step": 25367 }, { "epoch": 0.9925659284764066, "grad_norm": 0.0, "learning_rate": 2.8984637956930297e-09, "loss": 1.0035, "step": 25368 }, { "epoch": 0.9926050551686361, "grad_norm": 0.0, "learning_rate": 2.868035394979485e-09, "loss": 0.9926, "step": 25369 }, { "epoch": 0.9926441818608654, "grad_norm": 0.0, "learning_rate": 2.8377675356783262e-09, "loss": 0.9287, "step": 25370 }, { "epoch": 0.992683308553095, "grad_norm": 0.0, "learning_rate": 2.8076602182758317e-09, "loss": 0.9663, "step": 25371 }, { "epoch": 0.9927224352453243, "grad_norm": 0.0, "learning_rate": 2.7777134432549478e-09, "loss": 1.021, "step": 25372 }, { "epoch": 0.9927615619375538, "grad_norm": 0.0, "learning_rate": 2.7479272110975117e-09, "loss": 0.897, "step": 25373 }, { "epoch": 0.9928006886297832, "grad_norm": 0.0, "learning_rate": 2.7183015222809197e-09, "loss": 0.9592, "step": 25374 }, { "epoch": 0.9928398153220127, "grad_norm": 0.0, "learning_rate": 2.688836377280346e-09, "loss": 0.8389, "step": 25375 }, { "epoch": 0.9928789420142421, "grad_norm": 0.0, "learning_rate": 2.659531776569857e-09, "loss": 1.0742, "step": 25376 }, { "epoch": 0.9929180687064716, "grad_norm": 0.0, "learning_rate": 2.6303877206201868e-09, "loss": 1.0544, "step": 25377 }, { "epoch": 0.992957195398701, "grad_norm": 0.0, "learning_rate": 2.601404209898739e-09, "loss": 0.9054, "step": 25378 }, { "epoch": 0.9929963220909305, "grad_norm": 0.0, "learning_rate": 2.572581244871808e-09, "loss": 0.7827, "step": 25379 }, { "epoch": 0.9930354487831599, "grad_norm": 0.0, "learning_rate": 2.543918826002356e-09, "loss": 0.8216, "step": 25380 }, { "epoch": 0.9930745754753894, "grad_norm": 0.0, "learning_rate": 2.515416953748906e-09, "loss": 0.9793, "step": 25381 }, { "epoch": 0.9931137021676187, "grad_norm": 0.0, "learning_rate": 2.4870756285699795e-09, "loss": 1.0358, "step": 25382 }, { "epoch": 0.9931528288598482, "grad_norm": 0.0, "learning_rate": 2.4588948509218778e-09, "loss": 0.9019, "step": 25383 }, { "epoch": 0.9931919555520776, "grad_norm": 0.0, "learning_rate": 2.4308746212564627e-09, "loss": 0.929, "step": 25384 }, { "epoch": 0.9932310822443071, "grad_norm": 0.0, "learning_rate": 2.4030149400233737e-09, "loss": 0.9156, "step": 25385 }, { "epoch": 0.9932702089365365, "grad_norm": 0.0, "learning_rate": 2.375315807670031e-09, "loss": 0.91, "step": 25386 }, { "epoch": 0.993309335628766, "grad_norm": 0.0, "learning_rate": 2.3477772246416342e-09, "loss": 1.0054, "step": 25387 }, { "epoch": 0.9933484623209954, "grad_norm": 0.0, "learning_rate": 2.3203991913800515e-09, "loss": 1.0016, "step": 25388 }, { "epoch": 0.9933875890132248, "grad_norm": 0.0, "learning_rate": 2.293181708324932e-09, "loss": 0.911, "step": 25389 }, { "epoch": 0.9934267157054543, "grad_norm": 0.0, "learning_rate": 2.2661247759125927e-09, "loss": 1.0106, "step": 25390 }, { "epoch": 0.9934658423976837, "grad_norm": 0.0, "learning_rate": 2.2392283945793513e-09, "loss": 0.9062, "step": 25391 }, { "epoch": 0.9935049690899131, "grad_norm": 0.0, "learning_rate": 2.212492564757085e-09, "loss": 0.9956, "step": 25392 }, { "epoch": 0.9935440957821425, "grad_norm": 0.0, "learning_rate": 2.185917286873229e-09, "loss": 0.9537, "step": 25393 }, { "epoch": 0.993583222474372, "grad_norm": 0.0, "learning_rate": 2.15950256135522e-09, "loss": 1.0121, "step": 25394 }, { "epoch": 0.9936223491666014, "grad_norm": 0.0, "learning_rate": 2.133248388628273e-09, "loss": 0.918, "step": 25395 }, { "epoch": 0.9936614758588309, "grad_norm": 0.0, "learning_rate": 2.1071547691131624e-09, "loss": 0.9384, "step": 25396 }, { "epoch": 0.9937006025510603, "grad_norm": 0.0, "learning_rate": 2.0812217032295523e-09, "loss": 0.9226, "step": 25397 }, { "epoch": 0.9937397292432898, "grad_norm": 0.0, "learning_rate": 2.055449191392667e-09, "loss": 1.0703, "step": 25398 }, { "epoch": 0.9937788559355192, "grad_norm": 0.0, "learning_rate": 2.0298372340177287e-09, "loss": 1.019, "step": 25399 }, { "epoch": 0.9938179826277487, "grad_norm": 0.0, "learning_rate": 2.0043858315144103e-09, "loss": 0.8479, "step": 25400 }, { "epoch": 0.9938571093199781, "grad_norm": 0.0, "learning_rate": 1.979094984293495e-09, "loss": 0.906, "step": 25401 }, { "epoch": 0.9938962360122076, "grad_norm": 0.0, "learning_rate": 1.953964692760213e-09, "loss": 0.9326, "step": 25402 }, { "epoch": 0.9939353627044369, "grad_norm": 0.0, "learning_rate": 1.928994957317576e-09, "loss": 0.921, "step": 25403 }, { "epoch": 0.9939744893966664, "grad_norm": 0.0, "learning_rate": 1.904185778366374e-09, "loss": 0.8656, "step": 25404 }, { "epoch": 0.9940136160888958, "grad_norm": 0.0, "learning_rate": 1.879537156306288e-09, "loss": 1.0543, "step": 25405 }, { "epoch": 0.9940527427811253, "grad_norm": 0.0, "learning_rate": 1.855049091532557e-09, "loss": 0.8835, "step": 25406 }, { "epoch": 0.9940918694733547, "grad_norm": 0.0, "learning_rate": 1.8307215844381998e-09, "loss": 1.0612, "step": 25407 }, { "epoch": 0.9941309961655842, "grad_norm": 0.0, "learning_rate": 1.806554635414015e-09, "loss": 1.0072, "step": 25408 }, { "epoch": 0.9941701228578136, "grad_norm": 0.0, "learning_rate": 1.7825482448485808e-09, "loss": 1.0204, "step": 25409 }, { "epoch": 0.9942092495500431, "grad_norm": 0.0, "learning_rate": 1.7587024131260344e-09, "loss": 0.8279, "step": 25410 }, { "epoch": 0.9942483762422725, "grad_norm": 0.0, "learning_rate": 1.7350171406316229e-09, "loss": 0.9156, "step": 25411 }, { "epoch": 0.994287502934502, "grad_norm": 0.0, "learning_rate": 1.7114924277428223e-09, "loss": 0.941, "step": 25412 }, { "epoch": 0.9943266296267314, "grad_norm": 0.0, "learning_rate": 1.6881282748404392e-09, "loss": 1.0009, "step": 25413 }, { "epoch": 0.9943657563189608, "grad_norm": 0.0, "learning_rate": 1.6649246822963982e-09, "loss": 0.7682, "step": 25414 }, { "epoch": 0.9944048830111902, "grad_norm": 0.0, "learning_rate": 1.6418816504859548e-09, "loss": 0.9282, "step": 25415 }, { "epoch": 0.9944440097034196, "grad_norm": 0.0, "learning_rate": 1.618999179778813e-09, "loss": 0.967, "step": 25416 }, { "epoch": 0.9944831363956491, "grad_norm": 0.0, "learning_rate": 1.5962772705413465e-09, "loss": 1.0701, "step": 25417 }, { "epoch": 0.9945222630878785, "grad_norm": 0.0, "learning_rate": 1.5737159231388188e-09, "loss": 1.0514, "step": 25418 }, { "epoch": 0.994561389780108, "grad_norm": 0.0, "learning_rate": 1.5513151379331625e-09, "loss": 0.9099, "step": 25419 }, { "epoch": 0.9946005164723374, "grad_norm": 0.0, "learning_rate": 1.5290749152852003e-09, "loss": 0.9549, "step": 25420 }, { "epoch": 0.9946396431645669, "grad_norm": 0.0, "learning_rate": 1.5069952555513135e-09, "loss": 0.8736, "step": 25421 }, { "epoch": 0.9946787698567963, "grad_norm": 0.0, "learning_rate": 1.4850761590867734e-09, "loss": 1.0094, "step": 25422 }, { "epoch": 0.9947178965490258, "grad_norm": 0.0, "learning_rate": 1.463317626242411e-09, "loss": 0.8381, "step": 25423 }, { "epoch": 0.9947570232412551, "grad_norm": 0.0, "learning_rate": 1.4417196573690561e-09, "loss": 0.8656, "step": 25424 }, { "epoch": 0.9947961499334846, "grad_norm": 0.0, "learning_rate": 1.4202822528119886e-09, "loss": 0.9139, "step": 25425 }, { "epoch": 0.994835276625714, "grad_norm": 0.0, "learning_rate": 1.3990054129175979e-09, "loss": 0.8534, "step": 25426 }, { "epoch": 0.9948744033179435, "grad_norm": 0.0, "learning_rate": 1.3778891380245018e-09, "loss": 0.9438, "step": 25427 }, { "epoch": 0.9949135300101729, "grad_norm": 0.0, "learning_rate": 1.3569334284746493e-09, "loss": 0.9642, "step": 25428 }, { "epoch": 0.9949526567024024, "grad_norm": 0.0, "learning_rate": 1.3361382846033278e-09, "loss": 0.9786, "step": 25429 }, { "epoch": 0.9949917833946318, "grad_norm": 0.0, "learning_rate": 1.315503706744714e-09, "loss": 0.9281, "step": 25430 }, { "epoch": 0.9950309100868613, "grad_norm": 0.0, "learning_rate": 1.295029695229655e-09, "loss": 1.1008, "step": 25431 }, { "epoch": 0.9950700367790907, "grad_norm": 0.0, "learning_rate": 1.274716250386776e-09, "loss": 0.9137, "step": 25432 }, { "epoch": 0.9951091634713202, "grad_norm": 0.0, "learning_rate": 1.2545633725435936e-09, "loss": 0.8369, "step": 25433 }, { "epoch": 0.9951482901635496, "grad_norm": 0.0, "learning_rate": 1.234571062023182e-09, "loss": 0.9237, "step": 25434 }, { "epoch": 0.995187416855779, "grad_norm": 0.0, "learning_rate": 1.2147393191463963e-09, "loss": 1.0163, "step": 25435 }, { "epoch": 0.9952265435480084, "grad_norm": 0.0, "learning_rate": 1.1950681442307599e-09, "loss": 0.9152, "step": 25436 }, { "epoch": 0.9952656702402379, "grad_norm": 0.0, "learning_rate": 1.1755575375937966e-09, "loss": 0.9299, "step": 25437 }, { "epoch": 0.9953047969324673, "grad_norm": 0.0, "learning_rate": 1.1562074995485895e-09, "loss": 0.8853, "step": 25438 }, { "epoch": 0.9953439236246968, "grad_norm": 0.0, "learning_rate": 1.1370180304048906e-09, "loss": 0.9113, "step": 25439 }, { "epoch": 0.9953830503169262, "grad_norm": 0.0, "learning_rate": 1.117989130471342e-09, "loss": 0.8663, "step": 25440 }, { "epoch": 0.9954221770091557, "grad_norm": 0.0, "learning_rate": 1.099120800054365e-09, "loss": 0.9245, "step": 25441 }, { "epoch": 0.9954613037013851, "grad_norm": 0.0, "learning_rate": 1.0804130394548306e-09, "loss": 1.0305, "step": 25442 }, { "epoch": 0.9955004303936146, "grad_norm": 0.0, "learning_rate": 1.0618658489758293e-09, "loss": 0.982, "step": 25443 }, { "epoch": 0.995539557085844, "grad_norm": 0.0, "learning_rate": 1.0434792289137908e-09, "loss": 0.8974, "step": 25444 }, { "epoch": 0.9955786837780733, "grad_norm": 0.0, "learning_rate": 1.0252531795629238e-09, "loss": 1.0247, "step": 25445 }, { "epoch": 0.9956178104703028, "grad_norm": 0.0, "learning_rate": 1.0071877012185483e-09, "loss": 0.9789, "step": 25446 }, { "epoch": 0.9956569371625322, "grad_norm": 0.0, "learning_rate": 9.892827941682114e-10, "loss": 0.801, "step": 25447 }, { "epoch": 0.9956960638547617, "grad_norm": 0.0, "learning_rate": 9.715384587005715e-10, "loss": 0.9821, "step": 25448 }, { "epoch": 0.9957351905469911, "grad_norm": 0.0, "learning_rate": 9.539546951009559e-10, "loss": 0.8754, "step": 25449 }, { "epoch": 0.9957743172392206, "grad_norm": 0.0, "learning_rate": 9.36531503651361e-10, "loss": 1.0411, "step": 25450 }, { "epoch": 0.99581344393145, "grad_norm": 0.0, "learning_rate": 9.192688846315634e-10, "loss": 0.9886, "step": 25451 }, { "epoch": 0.9958525706236795, "grad_norm": 0.0, "learning_rate": 9.021668383180082e-10, "loss": 1.0372, "step": 25452 }, { "epoch": 0.9958916973159089, "grad_norm": 0.0, "learning_rate": 8.852253649871412e-10, "loss": 0.9976, "step": 25453 }, { "epoch": 0.9959308240081384, "grad_norm": 0.0, "learning_rate": 8.684444649087465e-10, "loss": 0.9872, "step": 25454 }, { "epoch": 0.9959699507003678, "grad_norm": 0.0, "learning_rate": 8.518241383548286e-10, "loss": 0.9803, "step": 25455 }, { "epoch": 0.9960090773925973, "grad_norm": 0.0, "learning_rate": 8.353643855907312e-10, "loss": 0.8208, "step": 25456 }, { "epoch": 0.9960482040848266, "grad_norm": 0.0, "learning_rate": 8.19065206880687e-10, "loss": 0.9204, "step": 25457 }, { "epoch": 0.9960873307770561, "grad_norm": 0.0, "learning_rate": 8.029266024867088e-10, "loss": 0.9674, "step": 25458 }, { "epoch": 0.9961264574692855, "grad_norm": 0.0, "learning_rate": 7.869485726674786e-10, "loss": 0.9371, "step": 25459 }, { "epoch": 0.996165584161515, "grad_norm": 0.0, "learning_rate": 7.71131117680568e-10, "loss": 0.8357, "step": 25460 }, { "epoch": 0.9962047108537444, "grad_norm": 0.0, "learning_rate": 7.554742377802183e-10, "loss": 0.8564, "step": 25461 }, { "epoch": 0.9962438375459739, "grad_norm": 0.0, "learning_rate": 7.399779332162293e-10, "loss": 0.9948, "step": 25462 }, { "epoch": 0.9962829642382033, "grad_norm": 0.0, "learning_rate": 7.246422042395118e-10, "loss": 1.0504, "step": 25463 }, { "epoch": 0.9963220909304328, "grad_norm": 0.0, "learning_rate": 7.094670510943147e-10, "loss": 0.9886, "step": 25464 }, { "epoch": 0.9963612176226622, "grad_norm": 0.0, "learning_rate": 6.944524740259973e-10, "loss": 0.9395, "step": 25465 }, { "epoch": 0.9964003443148917, "grad_norm": 0.0, "learning_rate": 6.795984732743677e-10, "loss": 1.0415, "step": 25466 }, { "epoch": 0.996439471007121, "grad_norm": 0.0, "learning_rate": 6.649050490792341e-10, "loss": 0.9453, "step": 25467 }, { "epoch": 0.9964785976993505, "grad_norm": 0.0, "learning_rate": 6.503722016748537e-10, "loss": 0.9736, "step": 25468 }, { "epoch": 0.9965177243915799, "grad_norm": 0.0, "learning_rate": 6.359999312965937e-10, "loss": 0.9827, "step": 25469 }, { "epoch": 0.9965568510838094, "grad_norm": 0.0, "learning_rate": 6.217882381731599e-10, "loss": 0.9613, "step": 25470 }, { "epoch": 0.9965959777760388, "grad_norm": 0.0, "learning_rate": 6.077371225343687e-10, "loss": 0.9748, "step": 25471 }, { "epoch": 0.9966351044682683, "grad_norm": 0.0, "learning_rate": 5.938465846055952e-10, "loss": 0.9392, "step": 25472 }, { "epoch": 0.9966742311604977, "grad_norm": 0.0, "learning_rate": 5.801166246099943e-10, "loss": 0.9371, "step": 25473 }, { "epoch": 0.9967133578527271, "grad_norm": 0.0, "learning_rate": 5.665472427673901e-10, "loss": 0.9531, "step": 25474 }, { "epoch": 0.9967524845449566, "grad_norm": 0.0, "learning_rate": 5.531384392953865e-10, "loss": 0.9703, "step": 25475 }, { "epoch": 0.996791611237186, "grad_norm": 0.0, "learning_rate": 5.398902144104768e-10, "loss": 1.0028, "step": 25476 }, { "epoch": 0.9968307379294155, "grad_norm": 0.0, "learning_rate": 5.268025683258238e-10, "loss": 0.9872, "step": 25477 }, { "epoch": 0.9968698646216448, "grad_norm": 0.0, "learning_rate": 5.138755012501495e-10, "loss": 0.885, "step": 25478 }, { "epoch": 0.9969089913138743, "grad_norm": 0.0, "learning_rate": 5.011090133910657e-10, "loss": 0.9457, "step": 25479 }, { "epoch": 0.9969481180061037, "grad_norm": 0.0, "learning_rate": 4.885031049539635e-10, "loss": 1.0286, "step": 25480 }, { "epoch": 0.9969872446983332, "grad_norm": 0.0, "learning_rate": 4.760577761420137e-10, "loss": 0.9785, "step": 25481 }, { "epoch": 0.9970263713905626, "grad_norm": 0.0, "learning_rate": 4.6377302715394644e-10, "loss": 0.7946, "step": 25482 }, { "epoch": 0.9970654980827921, "grad_norm": 0.0, "learning_rate": 4.516488581884915e-10, "loss": 0.9927, "step": 25483 }, { "epoch": 0.9971046247750215, "grad_norm": 0.0, "learning_rate": 4.396852694388276e-10, "loss": 0.9003, "step": 25484 }, { "epoch": 0.997143751467251, "grad_norm": 0.0, "learning_rate": 4.278822610970235e-10, "loss": 1.0372, "step": 25485 }, { "epoch": 0.9971828781594804, "grad_norm": 0.0, "learning_rate": 4.162398333540374e-10, "loss": 0.8668, "step": 25486 }, { "epoch": 0.9972220048517099, "grad_norm": 0.0, "learning_rate": 4.047579863963869e-10, "loss": 1.0959, "step": 25487 }, { "epoch": 0.9972611315439392, "grad_norm": 0.0, "learning_rate": 3.9343672040725866e-10, "loss": 0.9311, "step": 25488 }, { "epoch": 0.9973002582361687, "grad_norm": 0.0, "learning_rate": 3.822760355698396e-10, "loss": 0.9571, "step": 25489 }, { "epoch": 0.9973393849283981, "grad_norm": 0.0, "learning_rate": 3.712759320628756e-10, "loss": 0.9797, "step": 25490 }, { "epoch": 0.9973785116206276, "grad_norm": 0.0, "learning_rate": 3.6043641006289207e-10, "loss": 1.0429, "step": 25491 }, { "epoch": 0.997417638312857, "grad_norm": 0.0, "learning_rate": 3.4975746974419413e-10, "loss": 0.9656, "step": 25492 }, { "epoch": 0.9974567650050865, "grad_norm": 0.0, "learning_rate": 3.39239111277756e-10, "loss": 0.9276, "step": 25493 }, { "epoch": 0.9974958916973159, "grad_norm": 0.0, "learning_rate": 3.2888133483233163e-10, "loss": 1.0541, "step": 25494 }, { "epoch": 0.9975350183895454, "grad_norm": 0.0, "learning_rate": 3.1868414057556475e-10, "loss": 0.9312, "step": 25495 }, { "epoch": 0.9975741450817748, "grad_norm": 0.0, "learning_rate": 3.0864752867065807e-10, "loss": 1.0227, "step": 25496 }, { "epoch": 0.9976132717740043, "grad_norm": 0.0, "learning_rate": 2.987714992785939e-10, "loss": 1.0142, "step": 25497 }, { "epoch": 0.9976523984662337, "grad_norm": 0.0, "learning_rate": 2.8905605255702405e-10, "loss": 0.8493, "step": 25498 }, { "epoch": 0.9976915251584632, "grad_norm": 0.0, "learning_rate": 2.795011886636001e-10, "loss": 1.065, "step": 25499 }, { "epoch": 0.9977306518506925, "grad_norm": 0.0, "learning_rate": 2.701069077504226e-10, "loss": 0.8917, "step": 25500 }, { "epoch": 0.997769778542922, "grad_norm": 0.0, "learning_rate": 2.608732099695921e-10, "loss": 0.972, "step": 25501 }, { "epoch": 0.9978089052351514, "grad_norm": 0.0, "learning_rate": 2.518000954687683e-10, "loss": 0.9539, "step": 25502 }, { "epoch": 0.9978480319273808, "grad_norm": 0.0, "learning_rate": 2.4288756439339034e-10, "loss": 0.9171, "step": 25503 }, { "epoch": 0.9978871586196103, "grad_norm": 0.0, "learning_rate": 2.3413561688667706e-10, "loss": 0.8685, "step": 25504 }, { "epoch": 0.9979262853118397, "grad_norm": 0.0, "learning_rate": 2.255442530896268e-10, "loss": 0.8392, "step": 25505 }, { "epoch": 0.9979654120040692, "grad_norm": 0.0, "learning_rate": 2.1711347313990715e-10, "loss": 1.0085, "step": 25506 }, { "epoch": 0.9980045386962986, "grad_norm": 0.0, "learning_rate": 2.0884327717296538e-10, "loss": 1.0119, "step": 25507 }, { "epoch": 0.9980436653885281, "grad_norm": 0.0, "learning_rate": 2.0073366532202821e-10, "loss": 0.8276, "step": 25508 }, { "epoch": 0.9980827920807575, "grad_norm": 0.0, "learning_rate": 1.927846377158815e-10, "loss": 0.9329, "step": 25509 }, { "epoch": 0.998121918772987, "grad_norm": 0.0, "learning_rate": 1.8499619448331118e-10, "loss": 1.004, "step": 25510 }, { "epoch": 0.9981610454652163, "grad_norm": 0.0, "learning_rate": 1.773683357497724e-10, "loss": 0.9632, "step": 25511 }, { "epoch": 0.9982001721574458, "grad_norm": 0.0, "learning_rate": 1.6990106163738972e-10, "loss": 0.9595, "step": 25512 }, { "epoch": 0.9982392988496752, "grad_norm": 0.0, "learning_rate": 1.62594372264957e-10, "loss": 0.9314, "step": 25513 }, { "epoch": 0.9982784255419047, "grad_norm": 0.0, "learning_rate": 1.5544826775126808e-10, "loss": 1.0457, "step": 25514 }, { "epoch": 0.9983175522341341, "grad_norm": 0.0, "learning_rate": 1.4846274821067596e-10, "loss": 1.0126, "step": 25515 }, { "epoch": 0.9983566789263636, "grad_norm": 0.0, "learning_rate": 1.4163781375531316e-10, "loss": 1.0237, "step": 25516 }, { "epoch": 0.998395805618593, "grad_norm": 0.0, "learning_rate": 1.3497346449509175e-10, "loss": 1.0689, "step": 25517 }, { "epoch": 0.9984349323108225, "grad_norm": 0.0, "learning_rate": 1.2846970053548292e-10, "loss": 1.0157, "step": 25518 }, { "epoch": 0.9984740590030519, "grad_norm": 0.0, "learning_rate": 1.2212652198306807e-10, "loss": 0.9278, "step": 25519 }, { "epoch": 0.9985131856952814, "grad_norm": 0.0, "learning_rate": 1.1594392893776729e-10, "loss": 0.9198, "step": 25520 }, { "epoch": 0.9985523123875107, "grad_norm": 0.0, "learning_rate": 1.0992192150061087e-10, "loss": 1.0786, "step": 25521 }, { "epoch": 0.9985914390797402, "grad_norm": 0.0, "learning_rate": 1.0406049976818822e-10, "loss": 0.9662, "step": 25522 }, { "epoch": 0.9986305657719696, "grad_norm": 0.0, "learning_rate": 9.835966383264784e-11, "loss": 0.9218, "step": 25523 }, { "epoch": 0.9986696924641991, "grad_norm": 0.0, "learning_rate": 9.281941378835869e-11, "loss": 0.8834, "step": 25524 }, { "epoch": 0.9987088191564285, "grad_norm": 0.0, "learning_rate": 8.743974972191816e-11, "loss": 1.013, "step": 25525 }, { "epoch": 0.998747945848658, "grad_norm": 0.0, "learning_rate": 8.222067171992365e-11, "loss": 0.9872, "step": 25526 }, { "epoch": 0.9987870725408874, "grad_norm": 0.0, "learning_rate": 7.716217986786234e-11, "loss": 1.0458, "step": 25527 }, { "epoch": 0.9988261992331169, "grad_norm": 0.0, "learning_rate": 7.226427424567028e-11, "loss": 0.9969, "step": 25528 }, { "epoch": 0.9988653259253463, "grad_norm": 0.0, "learning_rate": 6.752695493217331e-11, "loss": 1.0835, "step": 25529 }, { "epoch": 0.9989044526175757, "grad_norm": 0.0, "learning_rate": 6.295022200397682e-11, "loss": 0.8658, "step": 25530 }, { "epoch": 0.9989435793098052, "grad_norm": 0.0, "learning_rate": 5.85340755332453e-11, "loss": 0.9093, "step": 25531 }, { "epoch": 0.9989827060020345, "grad_norm": 0.0, "learning_rate": 5.427851559325348e-11, "loss": 0.9929, "step": 25532 }, { "epoch": 0.999021832694264, "grad_norm": 0.0, "learning_rate": 5.01835422495045e-11, "loss": 0.9086, "step": 25533 }, { "epoch": 0.9990609593864934, "grad_norm": 0.0, "learning_rate": 4.6249155569721983e-11, "loss": 0.8902, "step": 25534 }, { "epoch": 0.9991000860787229, "grad_norm": 0.0, "learning_rate": 4.247535561718863e-11, "loss": 1.0076, "step": 25535 }, { "epoch": 0.9991392127709523, "grad_norm": 0.0, "learning_rate": 3.886214245074627e-11, "loss": 0.9008, "step": 25536 }, { "epoch": 0.9991783394631818, "grad_norm": 0.0, "learning_rate": 3.5409516130346936e-11, "loss": 0.9891, "step": 25537 }, { "epoch": 0.9992174661554112, "grad_norm": 0.0, "learning_rate": 3.211747671039156e-11, "loss": 0.994, "step": 25538 }, { "epoch": 0.9992565928476407, "grad_norm": 0.0, "learning_rate": 2.898602424417085e-11, "loss": 0.9009, "step": 25539 }, { "epoch": 0.9992957195398701, "grad_norm": 0.0, "learning_rate": 2.6015158781644844e-11, "loss": 0.9208, "step": 25540 }, { "epoch": 0.9993348462320996, "grad_norm": 0.0, "learning_rate": 2.320488037055313e-11, "loss": 0.9936, "step": 25541 }, { "epoch": 0.999373972924329, "grad_norm": 0.0, "learning_rate": 2.0555189055304627e-11, "loss": 1.0728, "step": 25542 }, { "epoch": 0.9994130996165584, "grad_norm": 0.0, "learning_rate": 1.8066084880308255e-11, "loss": 0.9539, "step": 25543 }, { "epoch": 0.9994522263087878, "grad_norm": 0.0, "learning_rate": 1.5737567884421824e-11, "loss": 0.8921, "step": 25544 }, { "epoch": 0.9994913530010173, "grad_norm": 0.0, "learning_rate": 1.3569638105392913e-11, "loss": 0.9147, "step": 25545 }, { "epoch": 0.9995304796932467, "grad_norm": 0.0, "learning_rate": 1.1562295576528216e-11, "loss": 1.0902, "step": 25546 }, { "epoch": 0.9995696063854762, "grad_norm": 0.0, "learning_rate": 9.715540332244645e-12, "loss": 1.018, "step": 25547 }, { "epoch": 0.9996087330777056, "grad_norm": 0.0, "learning_rate": 8.029372401407997e-12, "loss": 0.8777, "step": 25548 }, { "epoch": 0.9996478597699351, "grad_norm": 0.0, "learning_rate": 6.503791810663629e-12, "loss": 0.892, "step": 25549 }, { "epoch": 0.9996869864621645, "grad_norm": 0.0, "learning_rate": 5.138798584436444e-12, "loss": 0.9996, "step": 25550 }, { "epoch": 0.999726113154394, "grad_norm": 0.0, "learning_rate": 3.934392744930904e-12, "loss": 1.0075, "step": 25551 }, { "epoch": 0.9997652398466234, "grad_norm": 0.0, "learning_rate": 2.8905743121310225e-12, "loss": 0.9733, "step": 25552 }, { "epoch": 0.9998043665388529, "grad_norm": 0.0, "learning_rate": 2.0073433015799227e-12, "loss": 1.0447, "step": 25553 }, { "epoch": 0.9998434932310822, "grad_norm": 0.0, "learning_rate": 1.2846997288207263e-12, "loss": 1.0254, "step": 25554 }, { "epoch": 0.9998826199233117, "grad_norm": 0.0, "learning_rate": 7.22643603845441e-13, "loss": 1.0316, "step": 25555 }, { "epoch": 0.9999217466155411, "grad_norm": 0.0, "learning_rate": 3.211749377562967e-13, "loss": 1.054, "step": 25556 }, { "epoch": 0.9999608733077706, "grad_norm": 0.0, "learning_rate": 8.029373499418569e-14, "loss": 0.9539, "step": 25557 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.8918, "step": 25558 }, { "epoch": 1.0, "step": 25558, "total_flos": 4.286949096797556e+19, "train_loss": 1.0705728959623044, "train_runtime": 171615.6303, "train_samples_per_second": 19.062, "train_steps_per_second": 0.149 } ], "logging_steps": 1.0, "max_steps": 25558, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.286949096797556e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }