{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7828, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 4.813821465484795, "learning_rate": 8.510638297872341e-08, "loss": 0.8625, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.877510882980871, "learning_rate": 1.7021276595744683e-07, "loss": 0.8597, "step": 2 }, { "epoch": 0.0, "grad_norm": 4.864946846329796, "learning_rate": 2.553191489361702e-07, "loss": 0.8505, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.832432124339777, "learning_rate": 3.4042553191489365e-07, "loss": 0.8534, "step": 4 }, { "epoch": 0.0, "grad_norm": 5.009510084866663, "learning_rate": 4.2553191489361704e-07, "loss": 0.8704, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.677003541096888, "learning_rate": 5.106382978723404e-07, "loss": 0.8653, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.43836316129943, "learning_rate": 5.957446808510639e-07, "loss": 0.8585, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.87102875858295, "learning_rate": 6.808510638297873e-07, "loss": 0.8595, "step": 8 }, { "epoch": 0.0, "grad_norm": 4.630437024570521, "learning_rate": 7.659574468085107e-07, "loss": 0.8519, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.993399582994152, "learning_rate": 8.510638297872341e-07, "loss": 0.8361, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.267442450003034, "learning_rate": 9.361702127659575e-07, "loss": 0.8443, "step": 11 }, { "epoch": 0.0, "grad_norm": 2.931476862341291, "learning_rate": 1.0212765957446809e-06, "loss": 0.8283, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.0874168561792104, "learning_rate": 1.1063829787234042e-06, "loss": 0.8916, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.2071709644046007, "learning_rate": 1.1914893617021278e-06, "loss": 0.7999, "step": 14 }, { "epoch": 0.0, "grad_norm": 2.423820704220183, "learning_rate": 1.276595744680851e-06, "loss": 0.8166, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.8234250834736165, "learning_rate": 1.3617021276595746e-06, "loss": 0.8007, "step": 16 }, { "epoch": 0.0, "grad_norm": 1.559914585986834, "learning_rate": 1.4468085106382978e-06, "loss": 0.8016, "step": 17 }, { "epoch": 0.0, "grad_norm": 1.7852558537989587, "learning_rate": 1.5319148936170214e-06, "loss": 0.7687, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.6067385707817425, "learning_rate": 1.617021276595745e-06, "loss": 0.7739, "step": 19 }, { "epoch": 0.0, "grad_norm": 1.5716253831970848, "learning_rate": 1.7021276595744682e-06, "loss": 0.7601, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.1424162477263904, "learning_rate": 1.7872340425531918e-06, "loss": 0.7771, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.3894091797633124, "learning_rate": 1.872340425531915e-06, "loss": 0.8037, "step": 22 }, { "epoch": 0.0, "grad_norm": 1.9037481367900009, "learning_rate": 1.9574468085106385e-06, "loss": 0.7885, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.9044366245634077, "learning_rate": 2.0425531914893617e-06, "loss": 0.7752, "step": 24 }, { "epoch": 0.0, "grad_norm": 1.8329524727090325, "learning_rate": 2.1276595744680853e-06, "loss": 0.745, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.3035196286801003, "learning_rate": 2.2127659574468085e-06, "loss": 0.7468, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.1644580476774526, "learning_rate": 2.297872340425532e-06, "loss": 0.7914, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.1920971250289583, "learning_rate": 2.3829787234042557e-06, "loss": 0.782, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.1859110169221518, "learning_rate": 2.468085106382979e-06, "loss": 0.7554, "step": 29 }, { "epoch": 0.0, "grad_norm": 1.2663663229276239, "learning_rate": 2.553191489361702e-06, "loss": 0.7699, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.5121781320857868, "learning_rate": 2.6382978723404256e-06, "loss": 0.7623, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.217341110619103, "learning_rate": 2.7234042553191492e-06, "loss": 0.7381, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.1626613275910347, "learning_rate": 2.808510638297873e-06, "loss": 0.7407, "step": 33 }, { "epoch": 0.0, "grad_norm": 0.9902854572955616, "learning_rate": 2.8936170212765956e-06, "loss": 0.726, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.0946258701593374, "learning_rate": 2.978723404255319e-06, "loss": 0.7321, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.4918162594998283, "learning_rate": 3.0638297872340428e-06, "loss": 0.7249, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.004302028954229, "learning_rate": 3.1489361702127664e-06, "loss": 0.7436, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.7754507580618064, "learning_rate": 3.23404255319149e-06, "loss": 0.7198, "step": 38 }, { "epoch": 0.0, "grad_norm": 0.6997233678441288, "learning_rate": 3.3191489361702127e-06, "loss": 0.7297, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.7862908706455211, "learning_rate": 3.4042553191489363e-06, "loss": 0.712, "step": 40 }, { "epoch": 0.01, "grad_norm": 0.9470360533404181, "learning_rate": 3.48936170212766e-06, "loss": 0.7234, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.8372580097504584, "learning_rate": 3.5744680851063835e-06, "loss": 0.7524, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.7013876832989343, "learning_rate": 3.6595744680851063e-06, "loss": 0.7413, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.7027236487862718, "learning_rate": 3.74468085106383e-06, "loss": 0.7383, "step": 44 }, { "epoch": 0.01, "grad_norm": 0.8581898900673243, "learning_rate": 3.8297872340425535e-06, "loss": 0.7342, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.8049923348888463, "learning_rate": 3.914893617021277e-06, "loss": 0.7009, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.7659345421305237, "learning_rate": 4.000000000000001e-06, "loss": 0.7238, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.6753149544319615, "learning_rate": 4.085106382978723e-06, "loss": 0.7277, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.9622618772609132, "learning_rate": 4.170212765957447e-06, "loss": 0.7257, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.6836706592104519, "learning_rate": 4.255319148936171e-06, "loss": 0.7016, "step": 50 }, { "epoch": 0.01, "grad_norm": 0.7532823294653909, "learning_rate": 4.340425531914894e-06, "loss": 0.7564, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.804422410250238, "learning_rate": 4.425531914893617e-06, "loss": 0.7632, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.6951109873959237, "learning_rate": 4.5106382978723406e-06, "loss": 0.7596, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.6766664659239755, "learning_rate": 4.595744680851064e-06, "loss": 0.6979, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.6537722551185673, "learning_rate": 4.680851063829788e-06, "loss": 0.6943, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.676675723395113, "learning_rate": 4.765957446808511e-06, "loss": 0.6924, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.7016404721799121, "learning_rate": 4.851063829787234e-06, "loss": 0.7121, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.5873401236493501, "learning_rate": 4.936170212765958e-06, "loss": 0.6972, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.6607298314255127, "learning_rate": 5.0212765957446805e-06, "loss": 0.7461, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.6911717808072398, "learning_rate": 5.106382978723404e-06, "loss": 0.7017, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.7915215601253437, "learning_rate": 5.191489361702128e-06, "loss": 0.7126, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.6537017041354621, "learning_rate": 5.276595744680851e-06, "loss": 0.686, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.5553984373446943, "learning_rate": 5.361702127659575e-06, "loss": 0.6837, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.6759883281223004, "learning_rate": 5.4468085106382985e-06, "loss": 0.7035, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.7177655765801251, "learning_rate": 5.531914893617022e-06, "loss": 0.7457, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.6267606596090599, "learning_rate": 5.617021276595746e-06, "loss": 0.6977, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.714780951866869, "learning_rate": 5.702127659574469e-06, "loss": 0.7077, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.6466802990107242, "learning_rate": 5.787234042553191e-06, "loss": 0.6706, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.6433252929115906, "learning_rate": 5.872340425531915e-06, "loss": 0.6992, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.7204119857983909, "learning_rate": 5.957446808510638e-06, "loss": 0.7762, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.6173357983998237, "learning_rate": 6.042553191489362e-06, "loss": 0.6848, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.7151819071050377, "learning_rate": 6.1276595744680855e-06, "loss": 0.7376, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.8240051652086531, "learning_rate": 6.212765957446809e-06, "loss": 0.6787, "step": 73 }, { "epoch": 0.01, "grad_norm": 0.6931718793431657, "learning_rate": 6.297872340425533e-06, "loss": 0.7409, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.7683042061113133, "learning_rate": 6.382978723404256e-06, "loss": 0.6882, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.7099466438475135, "learning_rate": 6.46808510638298e-06, "loss": 0.686, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.6009279499333515, "learning_rate": 6.553191489361702e-06, "loss": 0.6854, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.6940209681748523, "learning_rate": 6.6382978723404254e-06, "loss": 0.6782, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.7077384755408, "learning_rate": 6.723404255319149e-06, "loss": 0.7316, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.7867880491441767, "learning_rate": 6.808510638297873e-06, "loss": 0.6711, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.742744802659502, "learning_rate": 6.893617021276596e-06, "loss": 0.7479, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.6766974114760749, "learning_rate": 6.97872340425532e-06, "loss": 0.6962, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.742058170438728, "learning_rate": 7.0638297872340434e-06, "loss": 0.6789, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.661309376583039, "learning_rate": 7.148936170212767e-06, "loss": 0.7031, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.7079190278125546, "learning_rate": 7.234042553191491e-06, "loss": 0.6649, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.6888808552664665, "learning_rate": 7.3191489361702125e-06, "loss": 0.6982, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.8462322818539508, "learning_rate": 7.404255319148936e-06, "loss": 0.7074, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.6195629831920522, "learning_rate": 7.48936170212766e-06, "loss": 0.7005, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.6469097557463226, "learning_rate": 7.574468085106383e-06, "loss": 0.6795, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.6745980712343227, "learning_rate": 7.659574468085107e-06, "loss": 0.6678, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.687179670009129, "learning_rate": 7.74468085106383e-06, "loss": 0.7102, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.651953403102994, "learning_rate": 7.829787234042554e-06, "loss": 0.6946, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.6499605200617691, "learning_rate": 7.914893617021278e-06, "loss": 0.707, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.669592885357561, "learning_rate": 8.000000000000001e-06, "loss": 0.6813, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.6634704720341664, "learning_rate": 8.085106382978723e-06, "loss": 0.6727, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.6657533095081376, "learning_rate": 8.170212765957447e-06, "loss": 0.6724, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.6212533242674179, "learning_rate": 8.25531914893617e-06, "loss": 0.6501, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.8352370912116462, "learning_rate": 8.340425531914894e-06, "loss": 0.677, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.7233165782097782, "learning_rate": 8.425531914893618e-06, "loss": 0.6859, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.8324024242403527, "learning_rate": 8.510638297872341e-06, "loss": 0.7341, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.6571975248523981, "learning_rate": 8.595744680851065e-06, "loss": 0.6819, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.8241812676876785, "learning_rate": 8.680851063829788e-06, "loss": 0.6949, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.7587568441507614, "learning_rate": 8.765957446808512e-06, "loss": 0.666, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.5901661330013652, "learning_rate": 8.851063829787234e-06, "loss": 0.7011, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.005407457093822, "learning_rate": 8.936170212765958e-06, "loss": 0.6905, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.8651825714978785, "learning_rate": 9.021276595744681e-06, "loss": 0.6808, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.693660413621126, "learning_rate": 9.106382978723405e-06, "loss": 0.6803, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.0589287279030555, "learning_rate": 9.191489361702128e-06, "loss": 0.668, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.659132416248969, "learning_rate": 9.276595744680852e-06, "loss": 0.6635, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.8708362069263281, "learning_rate": 9.361702127659576e-06, "loss": 0.6751, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.8317164946313254, "learning_rate": 9.446808510638299e-06, "loss": 0.6638, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.7475252535976808, "learning_rate": 9.531914893617023e-06, "loss": 0.6617, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.068454295586046, "learning_rate": 9.617021276595745e-06, "loss": 0.6562, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.7755646548714216, "learning_rate": 9.702127659574468e-06, "loss": 0.6819, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.8006972763599953, "learning_rate": 9.787234042553192e-06, "loss": 0.6845, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.9795970612004046, "learning_rate": 9.872340425531915e-06, "loss": 0.6434, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.8148345113925624, "learning_rate": 9.957446808510639e-06, "loss": 0.6948, "step": 117 }, { "epoch": 0.02, "grad_norm": 0.8925554740512956, "learning_rate": 1.0042553191489361e-05, "loss": 0.6409, "step": 118 }, { "epoch": 0.02, "grad_norm": 0.722372318073987, "learning_rate": 1.0127659574468085e-05, "loss": 0.7033, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.8015852053354778, "learning_rate": 1.0212765957446808e-05, "loss": 0.6514, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.7044894696514069, "learning_rate": 1.0297872340425532e-05, "loss": 0.6819, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.7884547670855487, "learning_rate": 1.0382978723404255e-05, "loss": 0.6818, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.6805489374087611, "learning_rate": 1.0468085106382979e-05, "loss": 0.6932, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.7521742992577556, "learning_rate": 1.0553191489361703e-05, "loss": 0.6652, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.6846002458555718, "learning_rate": 1.0638297872340426e-05, "loss": 0.6655, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.783447705691831, "learning_rate": 1.072340425531915e-05, "loss": 0.7009, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.6607450888941677, "learning_rate": 1.0808510638297873e-05, "loss": 0.6548, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.7968717007715418, "learning_rate": 1.0893617021276597e-05, "loss": 0.6902, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.7246643057935804, "learning_rate": 1.097872340425532e-05, "loss": 0.6511, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.7723577848247539, "learning_rate": 1.1063829787234044e-05, "loss": 0.6601, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.784174059070335, "learning_rate": 1.1148936170212768e-05, "loss": 0.7219, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.6773400962486261, "learning_rate": 1.1234042553191491e-05, "loss": 0.6441, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.7732984170244388, "learning_rate": 1.1319148936170215e-05, "loss": 0.6393, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.7809982425785745, "learning_rate": 1.1404255319148939e-05, "loss": 0.6434, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.8237582139927856, "learning_rate": 1.1489361702127662e-05, "loss": 0.6786, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.6998521134106436, "learning_rate": 1.1574468085106382e-05, "loss": 0.6644, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.8274286306647792, "learning_rate": 1.1659574468085106e-05, "loss": 0.6888, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.8045325568746169, "learning_rate": 1.174468085106383e-05, "loss": 0.6789, "step": 138 }, { "epoch": 0.02, "grad_norm": 0.740242417449262, "learning_rate": 1.1829787234042553e-05, "loss": 0.6606, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.9196408882841491, "learning_rate": 1.1914893617021277e-05, "loss": 0.6935, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.8740601912224649, "learning_rate": 1.2e-05, "loss": 0.6856, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.004904357874125, "learning_rate": 1.2085106382978724e-05, "loss": 0.6573, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.8794655758782557, "learning_rate": 1.2170212765957448e-05, "loss": 0.6674, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.8844643215835286, "learning_rate": 1.2255319148936171e-05, "loss": 0.6869, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.7435931396897757, "learning_rate": 1.2340425531914895e-05, "loss": 0.6478, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.2346335208123198, "learning_rate": 1.2425531914893618e-05, "loss": 0.639, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.8775139504057733, "learning_rate": 1.2510638297872342e-05, "loss": 0.6607, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.9148756711702534, "learning_rate": 1.2595744680851065e-05, "loss": 0.6582, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.706086737480357, "learning_rate": 1.2680851063829789e-05, "loss": 0.7014, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.9566895557767259, "learning_rate": 1.2765957446808513e-05, "loss": 0.6794, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.7794312599163374, "learning_rate": 1.2851063829787236e-05, "loss": 0.6793, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.8821603299261918, "learning_rate": 1.293617021276596e-05, "loss": 0.6878, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.6774173861340257, "learning_rate": 1.3021276595744683e-05, "loss": 0.6492, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.7589605595418736, "learning_rate": 1.3106382978723404e-05, "loss": 0.6479, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.8967662415079538, "learning_rate": 1.3191489361702127e-05, "loss": 0.7189, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.7308193047768302, "learning_rate": 1.3276595744680851e-05, "loss": 0.6674, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.8021649261946883, "learning_rate": 1.3361702127659574e-05, "loss": 0.6926, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.7260724000056266, "learning_rate": 1.3446808510638298e-05, "loss": 0.651, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.8233389530671268, "learning_rate": 1.3531914893617022e-05, "loss": 0.7416, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.73615842634899, "learning_rate": 1.3617021276595745e-05, "loss": 0.6493, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.8821300148657055, "learning_rate": 1.3702127659574469e-05, "loss": 0.6972, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.926866603832045, "learning_rate": 1.3787234042553192e-05, "loss": 0.6419, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.6665129866423932, "learning_rate": 1.3872340425531916e-05, "loss": 0.6295, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.7999367710261016, "learning_rate": 1.395744680851064e-05, "loss": 0.6723, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.7419354818162488, "learning_rate": 1.4042553191489363e-05, "loss": 0.685, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.6913770232580232, "learning_rate": 1.4127659574468087e-05, "loss": 0.6161, "step": 166 }, { "epoch": 0.02, "grad_norm": 0.8121117486363099, "learning_rate": 1.421276595744681e-05, "loss": 0.6934, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.7363266621743058, "learning_rate": 1.4297872340425534e-05, "loss": 0.6626, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.6797403276454996, "learning_rate": 1.4382978723404258e-05, "loss": 0.6695, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.7545932277074779, "learning_rate": 1.4468085106382981e-05, "loss": 0.6444, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.796516249768416, "learning_rate": 1.4553191489361705e-05, "loss": 0.6943, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.7985324458464956, "learning_rate": 1.4638297872340425e-05, "loss": 0.6542, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.725005561241094, "learning_rate": 1.4723404255319149e-05, "loss": 0.6366, "step": 173 }, { "epoch": 0.02, "grad_norm": 1.0452262843568065, "learning_rate": 1.4808510638297872e-05, "loss": 0.7001, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.8584685914876478, "learning_rate": 1.4893617021276596e-05, "loss": 0.6408, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.8679852503051217, "learning_rate": 1.497872340425532e-05, "loss": 0.704, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.8489286344045474, "learning_rate": 1.5063829787234043e-05, "loss": 0.6604, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.7976936098985068, "learning_rate": 1.5148936170212767e-05, "loss": 0.6709, "step": 178 }, { "epoch": 0.02, "grad_norm": 1.0348517201410654, "learning_rate": 1.523404255319149e-05, "loss": 0.6371, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.7317891967213641, "learning_rate": 1.5319148936170214e-05, "loss": 0.6519, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.7683961904907767, "learning_rate": 1.5404255319148937e-05, "loss": 0.6893, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.7099468642371798, "learning_rate": 1.548936170212766e-05, "loss": 0.6675, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.7904120097597087, "learning_rate": 1.5574468085106385e-05, "loss": 0.6933, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.7983686802400729, "learning_rate": 1.5659574468085108e-05, "loss": 0.6681, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.8294643713940539, "learning_rate": 1.5744680851063832e-05, "loss": 0.7043, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.6558133911152552, "learning_rate": 1.5829787234042555e-05, "loss": 0.6435, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.8694404434465349, "learning_rate": 1.591489361702128e-05, "loss": 0.6262, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.7290207078050474, "learning_rate": 1.6000000000000003e-05, "loss": 0.6763, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.7965237781088768, "learning_rate": 1.6085106382978726e-05, "loss": 0.6487, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.8983287608410415, "learning_rate": 1.6170212765957446e-05, "loss": 0.6727, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.7839085348679199, "learning_rate": 1.625531914893617e-05, "loss": 0.6776, "step": 191 }, { "epoch": 0.02, "grad_norm": 0.7386245101731441, "learning_rate": 1.6340425531914894e-05, "loss": 0.653, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.7623257124234708, "learning_rate": 1.6425531914893617e-05, "loss": 0.6769, "step": 193 }, { "epoch": 0.02, "grad_norm": 0.8309341754137334, "learning_rate": 1.651063829787234e-05, "loss": 0.6742, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.8612049845939163, "learning_rate": 1.6595744680851064e-05, "loss": 0.6348, "step": 195 }, { "epoch": 0.03, "grad_norm": 0.8288216417608228, "learning_rate": 1.6680851063829788e-05, "loss": 0.693, "step": 196 }, { "epoch": 0.03, "grad_norm": 1.1356971871697563, "learning_rate": 1.676595744680851e-05, "loss": 0.668, "step": 197 }, { "epoch": 0.03, "grad_norm": 1.127691522263628, "learning_rate": 1.6851063829787235e-05, "loss": 0.6882, "step": 198 }, { "epoch": 0.03, "grad_norm": 0.8783507256537378, "learning_rate": 1.693617021276596e-05, "loss": 0.6941, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.6172640577376374, "learning_rate": 1.7021276595744682e-05, "loss": 0.6796, "step": 200 }, { "epoch": 0.03, "grad_norm": 0.8482047219722901, "learning_rate": 1.7106382978723406e-05, "loss": 0.6337, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.0649859500603258, "learning_rate": 1.719148936170213e-05, "loss": 0.6646, "step": 202 }, { "epoch": 0.03, "grad_norm": 0.7378452748769033, "learning_rate": 1.7276595744680853e-05, "loss": 0.6557, "step": 203 }, { "epoch": 0.03, "grad_norm": 1.1618570059577193, "learning_rate": 1.7361702127659577e-05, "loss": 0.7155, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.7528602748150949, "learning_rate": 1.74468085106383e-05, "loss": 0.6671, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.8697908223565296, "learning_rate": 1.7531914893617024e-05, "loss": 0.6875, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.7586487515358432, "learning_rate": 1.7617021276595748e-05, "loss": 0.652, "step": 207 }, { "epoch": 0.03, "grad_norm": 0.7594223728563457, "learning_rate": 1.7702127659574468e-05, "loss": 0.68, "step": 208 }, { "epoch": 0.03, "grad_norm": 0.7295656211377606, "learning_rate": 1.778723404255319e-05, "loss": 0.6856, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.777808765626819, "learning_rate": 1.7872340425531915e-05, "loss": 0.6263, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.7510064627788249, "learning_rate": 1.795744680851064e-05, "loss": 0.6842, "step": 211 }, { "epoch": 0.03, "grad_norm": 0.74312723504832, "learning_rate": 1.8042553191489362e-05, "loss": 0.6665, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.7547519475429285, "learning_rate": 1.8127659574468086e-05, "loss": 0.6537, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.7384507493532143, "learning_rate": 1.821276595744681e-05, "loss": 0.6592, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.7979895640915945, "learning_rate": 1.8297872340425533e-05, "loss": 0.6255, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.7041095208803198, "learning_rate": 1.8382978723404257e-05, "loss": 0.6639, "step": 216 }, { "epoch": 0.03, "grad_norm": 0.7281344983285941, "learning_rate": 1.846808510638298e-05, "loss": 0.6713, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.831109256074698, "learning_rate": 1.8553191489361704e-05, "loss": 0.6464, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.8106335941038763, "learning_rate": 1.8638297872340427e-05, "loss": 0.6593, "step": 219 }, { "epoch": 0.03, "grad_norm": 0.6765788607890277, "learning_rate": 1.872340425531915e-05, "loss": 0.6888, "step": 220 }, { "epoch": 0.03, "grad_norm": 0.846278069709127, "learning_rate": 1.8808510638297875e-05, "loss": 0.6566, "step": 221 }, { "epoch": 0.03, "grad_norm": 0.6274979439145946, "learning_rate": 1.8893617021276598e-05, "loss": 0.6387, "step": 222 }, { "epoch": 0.03, "grad_norm": 0.8002065046477359, "learning_rate": 1.8978723404255322e-05, "loss": 0.6576, "step": 223 }, { "epoch": 0.03, "grad_norm": 0.6595944001181364, "learning_rate": 1.9063829787234045e-05, "loss": 0.6529, "step": 224 }, { "epoch": 0.03, "grad_norm": 0.8440427467306217, "learning_rate": 1.914893617021277e-05, "loss": 0.6908, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.7122766954815534, "learning_rate": 1.923404255319149e-05, "loss": 0.6606, "step": 226 }, { "epoch": 0.03, "grad_norm": 0.7534460170304522, "learning_rate": 1.9319148936170213e-05, "loss": 0.6429, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.6792914716842772, "learning_rate": 1.9404255319148936e-05, "loss": 0.6728, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.6931099471234613, "learning_rate": 1.948936170212766e-05, "loss": 0.6506, "step": 229 }, { "epoch": 0.03, "grad_norm": 0.7078369593819922, "learning_rate": 1.9574468085106384e-05, "loss": 0.6499, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.7728512446580942, "learning_rate": 1.9659574468085107e-05, "loss": 0.6561, "step": 231 }, { "epoch": 0.03, "grad_norm": 0.7775437351458244, "learning_rate": 1.974468085106383e-05, "loss": 0.657, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.7116385420322988, "learning_rate": 1.9829787234042554e-05, "loss": 0.6781, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.9654881447282546, "learning_rate": 1.9914893617021278e-05, "loss": 0.6532, "step": 234 }, { "epoch": 0.03, "grad_norm": 0.8998580304543459, "learning_rate": 2e-05, "loss": 0.6779, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.9008885168858056, "learning_rate": 1.999999914406075e-05, "loss": 0.7171, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.9349976398692847, "learning_rate": 1.9999996576243132e-05, "loss": 0.6209, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.6779114396421753, "learning_rate": 1.9999992296547597e-05, "loss": 0.6746, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.775456761539178, "learning_rate": 1.9999986304974873e-05, "loss": 0.6366, "step": 239 }, { "epoch": 0.03, "grad_norm": 0.8388980058624823, "learning_rate": 1.9999978601525985e-05, "loss": 0.7179, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.8149808139305609, "learning_rate": 1.9999969186202253e-05, "loss": 0.6671, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.7438005433941678, "learning_rate": 1.9999958059005288e-05, "loss": 0.6388, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.6390660246307692, "learning_rate": 1.9999945219936995e-05, "loss": 0.6333, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.7197618485846196, "learning_rate": 1.999993066899957e-05, "loss": 0.6461, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.6865521462286319, "learning_rate": 1.999991440619551e-05, "loss": 0.6893, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.7387514059768638, "learning_rate": 1.9999896431527593e-05, "loss": 0.6975, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.6843316374576116, "learning_rate": 1.99998767449989e-05, "loss": 0.6524, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.6932437786799653, "learning_rate": 1.99998553466128e-05, "loss": 0.6755, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.6791002814983109, "learning_rate": 1.9999832236372952e-05, "loss": 0.7018, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.7806597995070187, "learning_rate": 1.9999807414283317e-05, "loss": 0.6954, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.7126953178992154, "learning_rate": 1.9999780880348142e-05, "loss": 0.6447, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.6477099125701063, "learning_rate": 1.999975263457197e-05, "loss": 0.6494, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.7470907908937903, "learning_rate": 1.999972267695964e-05, "loss": 0.6251, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.8391582360203828, "learning_rate": 1.9999691007516272e-05, "loss": 0.6911, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.7318650710829626, "learning_rate": 1.9999657626247294e-05, "loss": 0.6656, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.6995414824521561, "learning_rate": 1.999962253315842e-05, "loss": 0.6596, "step": 256 }, { "epoch": 0.03, "grad_norm": 0.7659301702499622, "learning_rate": 1.9999585728255658e-05, "loss": 0.6923, "step": 257 }, { "epoch": 0.03, "grad_norm": 0.7255656153866022, "learning_rate": 1.9999547211545302e-05, "loss": 0.6353, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.7945814082282251, "learning_rate": 1.9999506983033953e-05, "loss": 0.6415, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.7326387959223681, "learning_rate": 1.9999465042728494e-05, "loss": 0.671, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.7759844791988763, "learning_rate": 1.9999421390636106e-05, "loss": 0.619, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.6887692792374268, "learning_rate": 1.9999376026764264e-05, "loss": 0.6555, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.6898111255075303, "learning_rate": 1.9999328951120727e-05, "loss": 0.6426, "step": 263 }, { "epoch": 0.03, "grad_norm": 2.4476803352026346, "learning_rate": 1.9999280163713558e-05, "loss": 0.6379, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.6461138597941706, "learning_rate": 1.9999229664551113e-05, "loss": 0.6429, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.7097605969393802, "learning_rate": 1.999917745364203e-05, "loss": 0.6717, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.7516157230429473, "learning_rate": 1.9999123530995247e-05, "loss": 0.7086, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.8121254973667168, "learning_rate": 1.999906789662e-05, "loss": 0.6515, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.7006655018101225, "learning_rate": 1.999901055052581e-05, "loss": 0.6936, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.8592582755154389, "learning_rate": 1.9998951492722493e-05, "loss": 0.6453, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.7205369308272287, "learning_rate": 1.9998890723220165e-05, "loss": 0.6635, "step": 271 }, { "epoch": 0.03, "grad_norm": 0.772537853220286, "learning_rate": 1.999882824202922e-05, "loss": 0.6467, "step": 272 }, { "epoch": 0.03, "grad_norm": 0.8419084491548148, "learning_rate": 1.9998764049160356e-05, "loss": 0.7205, "step": 273 }, { "epoch": 0.04, "grad_norm": 0.7043060140496541, "learning_rate": 1.9998698144624567e-05, "loss": 0.6277, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.8054554683363606, "learning_rate": 1.9998630528433133e-05, "loss": 0.6499, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.7042919587133946, "learning_rate": 1.9998561200597628e-05, "loss": 0.6575, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.6723040320639915, "learning_rate": 1.9998490161129923e-05, "loss": 0.6492, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.7503116074874533, "learning_rate": 1.9998417410042173e-05, "loss": 0.6544, "step": 278 }, { "epoch": 0.04, "grad_norm": 1.0150369510438566, "learning_rate": 1.9998342947346835e-05, "loss": 0.629, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.8539590092167831, "learning_rate": 1.9998266773056656e-05, "loss": 0.6584, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.9175266086749317, "learning_rate": 1.9998188887184682e-05, "loss": 0.7147, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.9397161185851981, "learning_rate": 1.9998109289744237e-05, "loss": 0.7466, "step": 282 }, { "epoch": 0.04, "grad_norm": 0.7859800187318303, "learning_rate": 1.999802798074895e-05, "loss": 0.6393, "step": 283 }, { "epoch": 0.04, "grad_norm": 0.7635280772696308, "learning_rate": 1.9997944960212745e-05, "loss": 0.6573, "step": 284 }, { "epoch": 0.04, "grad_norm": 0.7127509542396591, "learning_rate": 1.9997860228149826e-05, "loss": 0.6375, "step": 285 }, { "epoch": 0.04, "grad_norm": 0.7158771081032458, "learning_rate": 1.999777378457471e-05, "loss": 0.6469, "step": 286 }, { "epoch": 0.04, "grad_norm": 0.8261247582924472, "learning_rate": 1.999768562950218e-05, "loss": 0.7052, "step": 287 }, { "epoch": 0.04, "grad_norm": 0.7328751173129521, "learning_rate": 1.999759576294734e-05, "loss": 0.7013, "step": 288 }, { "epoch": 0.04, "grad_norm": 0.7618315030017183, "learning_rate": 1.9997504184925565e-05, "loss": 0.6367, "step": 289 }, { "epoch": 0.04, "grad_norm": 0.7368995759058233, "learning_rate": 1.9997410895452532e-05, "loss": 0.6632, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.7721259307935955, "learning_rate": 1.999731589454422e-05, "loss": 0.6435, "step": 291 }, { "epoch": 0.04, "grad_norm": 0.7178537430262437, "learning_rate": 1.9997219182216886e-05, "loss": 0.6536, "step": 292 }, { "epoch": 0.04, "grad_norm": 0.8306279550583174, "learning_rate": 1.999712075848709e-05, "loss": 0.6613, "step": 293 }, { "epoch": 0.04, "grad_norm": 0.7489298580639432, "learning_rate": 1.999702062337167e-05, "loss": 0.6795, "step": 294 }, { "epoch": 0.04, "grad_norm": 0.7625213161077633, "learning_rate": 1.9996918776887785e-05, "loss": 0.6616, "step": 295 }, { "epoch": 0.04, "grad_norm": 0.8251546335675974, "learning_rate": 1.9996815219052853e-05, "loss": 0.643, "step": 296 }, { "epoch": 0.04, "grad_norm": 0.8106628495805511, "learning_rate": 1.9996709949884613e-05, "loss": 0.6413, "step": 297 }, { "epoch": 0.04, "grad_norm": 0.7388092904895807, "learning_rate": 1.999660296940108e-05, "loss": 0.6158, "step": 298 }, { "epoch": 0.04, "grad_norm": 0.8216595950786724, "learning_rate": 1.9996494277620572e-05, "loss": 0.638, "step": 299 }, { "epoch": 0.04, "grad_norm": 0.7422374340976767, "learning_rate": 1.999638387456169e-05, "loss": 0.622, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.7843128159433538, "learning_rate": 1.999627176024334e-05, "loss": 0.64, "step": 301 }, { "epoch": 0.04, "grad_norm": 0.7481251295934163, "learning_rate": 1.999615793468471e-05, "loss": 0.689, "step": 302 }, { "epoch": 0.04, "grad_norm": 0.6550229752395934, "learning_rate": 1.9996042397905292e-05, "loss": 0.6416, "step": 303 }, { "epoch": 0.04, "grad_norm": 0.9215014689253938, "learning_rate": 1.9995925149924857e-05, "loss": 0.6653, "step": 304 }, { "epoch": 0.04, "grad_norm": 0.6458634448894808, "learning_rate": 1.9995806190763477e-05, "loss": 0.6509, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.7464717365730608, "learning_rate": 1.999568552044152e-05, "loss": 0.6631, "step": 306 }, { "epoch": 0.04, "grad_norm": 0.8756759259241319, "learning_rate": 1.9995563138979642e-05, "loss": 0.6332, "step": 307 }, { "epoch": 0.04, "grad_norm": 0.6415486648186383, "learning_rate": 1.9995439046398796e-05, "loss": 0.6462, "step": 308 }, { "epoch": 0.04, "grad_norm": 0.8483935916766185, "learning_rate": 1.999531324272022e-05, "loss": 0.6125, "step": 309 }, { "epoch": 0.04, "grad_norm": 0.8881836016881265, "learning_rate": 1.999518572796545e-05, "loss": 0.6802, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.6029873545223414, "learning_rate": 1.999505650215632e-05, "loss": 0.6518, "step": 311 }, { "epoch": 0.04, "grad_norm": 0.7205241080370798, "learning_rate": 1.9994925565314948e-05, "loss": 0.6411, "step": 312 }, { "epoch": 0.04, "grad_norm": 0.776522131191741, "learning_rate": 1.9994792917463752e-05, "loss": 0.6948, "step": 313 }, { "epoch": 0.04, "grad_norm": 0.6533680663816916, "learning_rate": 1.9994658558625437e-05, "loss": 0.6711, "step": 314 }, { "epoch": 0.04, "grad_norm": 0.7592311194577782, "learning_rate": 1.9994522488823005e-05, "loss": 0.6606, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.6797767755423386, "learning_rate": 1.9994384708079746e-05, "loss": 0.6481, "step": 316 }, { "epoch": 0.04, "grad_norm": 0.6815274970151337, "learning_rate": 1.999424521641925e-05, "loss": 0.6364, "step": 317 }, { "epoch": 0.04, "grad_norm": 0.748560359525391, "learning_rate": 1.9994104013865397e-05, "loss": 0.6673, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.7409810866128937, "learning_rate": 1.999396110044236e-05, "loss": 0.6359, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.6887659513212506, "learning_rate": 1.99938164761746e-05, "loss": 0.6474, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.638816379685004, "learning_rate": 1.9993670141086876e-05, "loss": 0.688, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.7400062770482906, "learning_rate": 1.9993522095204238e-05, "loss": 0.646, "step": 322 }, { "epoch": 0.04, "grad_norm": 0.68212853416924, "learning_rate": 1.999337233855203e-05, "loss": 0.6315, "step": 323 }, { "epoch": 0.04, "grad_norm": 0.7870060903119882, "learning_rate": 1.9993220871155895e-05, "loss": 0.6455, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.696637082496092, "learning_rate": 1.9993067693041754e-05, "loss": 0.6459, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.690983709970985, "learning_rate": 1.999291280423583e-05, "loss": 0.6438, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.7504588656107939, "learning_rate": 1.9992756204764644e-05, "loss": 0.6715, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.7279425442855708, "learning_rate": 1.9992597894654997e-05, "loss": 0.6665, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.7169804303104832, "learning_rate": 1.9992437873933995e-05, "loss": 0.6642, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.600720363097781, "learning_rate": 1.999227614262903e-05, "loss": 0.6465, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.6911330567344602, "learning_rate": 1.9992112700767787e-05, "loss": 0.6726, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.7627801778862694, "learning_rate": 1.9991947548378247e-05, "loss": 0.6389, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.6656027734076307, "learning_rate": 1.9991780685488683e-05, "loss": 0.6551, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.6430397225625524, "learning_rate": 1.9991612112127653e-05, "loss": 0.6315, "step": 334 }, { "epoch": 0.04, "grad_norm": 0.6791292384880837, "learning_rate": 1.9991441828324024e-05, "loss": 0.6831, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.7692902201131815, "learning_rate": 1.999126983410694e-05, "loss": 0.6781, "step": 336 }, { "epoch": 0.04, "grad_norm": 0.653030423373098, "learning_rate": 1.999109612950585e-05, "loss": 0.6555, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.6408779722695179, "learning_rate": 1.9990920714550486e-05, "loss": 0.6868, "step": 338 }, { "epoch": 0.04, "grad_norm": 0.6612213613702557, "learning_rate": 1.9990743589270876e-05, "loss": 0.6747, "step": 339 }, { "epoch": 0.04, "grad_norm": 0.7138821955351432, "learning_rate": 1.9990564753697346e-05, "loss": 0.6883, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.6716227644679114, "learning_rate": 1.9990384207860507e-05, "loss": 0.651, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.6404774475637224, "learning_rate": 1.9990201951791265e-05, "loss": 0.6242, "step": 342 }, { "epoch": 0.04, "grad_norm": 0.609507027609884, "learning_rate": 1.9990017985520825e-05, "loss": 0.6605, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.7894966993810159, "learning_rate": 1.9989832309080676e-05, "loss": 0.6689, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.6760310704740611, "learning_rate": 1.9989644922502607e-05, "loss": 0.6462, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.7468652689955587, "learning_rate": 1.998945582581869e-05, "loss": 0.6564, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.7548957067639507, "learning_rate": 1.9989265019061304e-05, "loss": 0.6319, "step": 347 }, { "epoch": 0.04, "grad_norm": 0.64789755423703, "learning_rate": 1.9989072502263106e-05, "loss": 0.6501, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.7107134107821969, "learning_rate": 1.9988878275457057e-05, "loss": 0.6571, "step": 349 }, { "epoch": 0.04, "grad_norm": 0.7112641303701541, "learning_rate": 1.9988682338676405e-05, "loss": 0.6788, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.7116106483536949, "learning_rate": 1.998848469195469e-05, "loss": 0.6427, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.7032276587962348, "learning_rate": 1.9988285335325746e-05, "loss": 0.7088, "step": 352 }, { "epoch": 0.05, "grad_norm": 0.646431030555889, "learning_rate": 1.998808426882371e-05, "loss": 0.637, "step": 353 }, { "epoch": 0.05, "grad_norm": 0.8089754678025237, "learning_rate": 1.9987881492482987e-05, "loss": 0.6913, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.6739400041614799, "learning_rate": 1.9987677006338302e-05, "loss": 0.671, "step": 355 }, { "epoch": 0.05, "grad_norm": 0.6610602126719018, "learning_rate": 1.9987470810424657e-05, "loss": 0.617, "step": 356 }, { "epoch": 0.05, "grad_norm": 0.6346265151734386, "learning_rate": 1.9987262904777345e-05, "loss": 0.6302, "step": 357 }, { "epoch": 0.05, "grad_norm": 0.6346776422148687, "learning_rate": 1.9987053289431966e-05, "loss": 0.6771, "step": 358 }, { "epoch": 0.05, "grad_norm": 0.6141852772097299, "learning_rate": 1.9986841964424397e-05, "loss": 0.643, "step": 359 }, { "epoch": 0.05, "grad_norm": 0.6669262867189167, "learning_rate": 1.9986628929790817e-05, "loss": 0.6412, "step": 360 }, { "epoch": 0.05, "grad_norm": 0.6184023481068622, "learning_rate": 1.9986414185567694e-05, "loss": 0.6477, "step": 361 }, { "epoch": 0.05, "grad_norm": 0.5966412999498272, "learning_rate": 1.9986197731791787e-05, "loss": 0.6273, "step": 362 }, { "epoch": 0.05, "grad_norm": 0.6689639484668365, "learning_rate": 1.9985979568500156e-05, "loss": 0.6467, "step": 363 }, { "epoch": 0.05, "grad_norm": 0.6650649929830172, "learning_rate": 1.9985759695730148e-05, "loss": 0.6405, "step": 364 }, { "epoch": 0.05, "grad_norm": 0.6774561505974732, "learning_rate": 1.9985538113519397e-05, "loss": 0.6385, "step": 365 }, { "epoch": 0.05, "grad_norm": 0.6620798895998852, "learning_rate": 1.9985314821905835e-05, "loss": 0.597, "step": 366 }, { "epoch": 0.05, "grad_norm": 0.6752784294395032, "learning_rate": 1.9985089820927693e-05, "loss": 0.6403, "step": 367 }, { "epoch": 0.05, "grad_norm": 0.7133147110177718, "learning_rate": 1.9984863110623483e-05, "loss": 0.636, "step": 368 }, { "epoch": 0.05, "grad_norm": 0.7361391520457726, "learning_rate": 1.9984634691032018e-05, "loss": 0.639, "step": 369 }, { "epoch": 0.05, "grad_norm": 1.086536533414234, "learning_rate": 1.99844045621924e-05, "loss": 0.6736, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.6964969256064851, "learning_rate": 1.9984172724144026e-05, "loss": 0.6555, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.7366476826693756, "learning_rate": 1.9983939176926575e-05, "loss": 0.6522, "step": 372 }, { "epoch": 0.05, "grad_norm": 0.6183356039858139, "learning_rate": 1.998370392058004e-05, "loss": 0.6867, "step": 373 }, { "epoch": 0.05, "grad_norm": 0.6330000632664571, "learning_rate": 1.998346695514469e-05, "loss": 0.6602, "step": 374 }, { "epoch": 0.05, "grad_norm": 0.7168304210727618, "learning_rate": 1.9983228280661083e-05, "loss": 0.6464, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.7469944327035098, "learning_rate": 1.9982987897170084e-05, "loss": 0.6781, "step": 376 }, { "epoch": 0.05, "grad_norm": 0.6257752153996178, "learning_rate": 1.9982745804712848e-05, "loss": 0.6208, "step": 377 }, { "epoch": 0.05, "grad_norm": 0.6468229491806011, "learning_rate": 1.998250200333081e-05, "loss": 0.6153, "step": 378 }, { "epoch": 0.05, "grad_norm": 0.6247532637038427, "learning_rate": 1.9982256493065708e-05, "loss": 0.6448, "step": 379 }, { "epoch": 0.05, "grad_norm": 0.6675803869980299, "learning_rate": 1.9982009273959573e-05, "loss": 0.6161, "step": 380 }, { "epoch": 0.05, "grad_norm": 0.7751687862301487, "learning_rate": 1.998176034605472e-05, "loss": 0.6453, "step": 381 }, { "epoch": 0.05, "grad_norm": 0.6800561809911132, "learning_rate": 1.9981509709393772e-05, "loss": 0.6298, "step": 382 }, { "epoch": 0.05, "grad_norm": 0.7436554386455345, "learning_rate": 1.9981257364019626e-05, "loss": 0.6676, "step": 383 }, { "epoch": 0.05, "grad_norm": 0.628752172182092, "learning_rate": 1.9981003309975487e-05, "loss": 0.6219, "step": 384 }, { "epoch": 0.05, "grad_norm": 0.6875377261080103, "learning_rate": 1.9980747547304842e-05, "loss": 0.6518, "step": 385 }, { "epoch": 0.05, "grad_norm": 0.6880613551597734, "learning_rate": 1.9980490076051472e-05, "loss": 0.6669, "step": 386 }, { "epoch": 0.05, "grad_norm": 0.8012041493070149, "learning_rate": 1.9980230896259464e-05, "loss": 0.6811, "step": 387 }, { "epoch": 0.05, "grad_norm": 0.651669705776999, "learning_rate": 1.997997000797317e-05, "loss": 0.6259, "step": 388 }, { "epoch": 0.05, "grad_norm": 0.7568711005501488, "learning_rate": 1.9979707411237264e-05, "loss": 0.6757, "step": 389 }, { "epoch": 0.05, "grad_norm": 0.6988459588281201, "learning_rate": 1.9979443106096696e-05, "loss": 0.6824, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.6534796411130116, "learning_rate": 1.9979177092596714e-05, "loss": 0.6521, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.6433125915465675, "learning_rate": 1.997890937078285e-05, "loss": 0.6274, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.6344921951388859, "learning_rate": 1.9978639940700936e-05, "loss": 0.657, "step": 393 }, { "epoch": 0.05, "grad_norm": 0.6857095996437428, "learning_rate": 1.9978368802397097e-05, "loss": 0.6526, "step": 394 }, { "epoch": 0.05, "grad_norm": 0.6045691761931036, "learning_rate": 1.9978095955917753e-05, "loss": 0.6768, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.878313459571014, "learning_rate": 1.9977821401309602e-05, "loss": 0.7154, "step": 396 }, { "epoch": 0.05, "grad_norm": 0.6541245574490518, "learning_rate": 1.9977545138619655e-05, "loss": 0.6649, "step": 397 }, { "epoch": 0.05, "grad_norm": 0.7439847438946736, "learning_rate": 1.9977267167895197e-05, "loss": 0.6528, "step": 398 }, { "epoch": 0.05, "grad_norm": 0.6433822055495357, "learning_rate": 1.9976987489183817e-05, "loss": 0.6346, "step": 399 }, { "epoch": 0.05, "grad_norm": 0.6391899058018562, "learning_rate": 1.9976706102533394e-05, "loss": 0.6393, "step": 400 }, { "epoch": 0.05, "grad_norm": 0.5880801527985304, "learning_rate": 1.997642300799209e-05, "loss": 0.6698, "step": 401 }, { "epoch": 0.05, "grad_norm": 0.6590442220619173, "learning_rate": 1.997613820560838e-05, "loss": 0.6692, "step": 402 }, { "epoch": 0.05, "grad_norm": 0.6040874867524574, "learning_rate": 1.9975851695431006e-05, "loss": 0.6213, "step": 403 }, { "epoch": 0.05, "grad_norm": 0.6372605672522306, "learning_rate": 1.9975563477509025e-05, "loss": 0.6645, "step": 404 }, { "epoch": 0.05, "grad_norm": 0.6424903031282367, "learning_rate": 1.9975273551891772e-05, "loss": 0.6286, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.7229816064795725, "learning_rate": 1.9974981918628877e-05, "loss": 0.6868, "step": 406 }, { "epoch": 0.05, "grad_norm": 0.6763502574077694, "learning_rate": 1.9974688577770265e-05, "loss": 0.6806, "step": 407 }, { "epoch": 0.05, "grad_norm": 0.6625599347304552, "learning_rate": 1.9974393529366157e-05, "loss": 0.6594, "step": 408 }, { "epoch": 0.05, "grad_norm": 0.670324558238911, "learning_rate": 1.9974096773467056e-05, "loss": 0.6396, "step": 409 }, { "epoch": 0.05, "grad_norm": 0.6015153496444154, "learning_rate": 1.9973798310123763e-05, "loss": 0.6622, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.6783781577453797, "learning_rate": 1.9973498139387378e-05, "loss": 0.664, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.6108611037395808, "learning_rate": 1.9973196261309277e-05, "loss": 0.6299, "step": 412 }, { "epoch": 0.05, "grad_norm": 1.0464619047391288, "learning_rate": 1.997289267594115e-05, "loss": 0.6505, "step": 413 }, { "epoch": 0.05, "grad_norm": 0.6852028233425627, "learning_rate": 1.9972587383334952e-05, "loss": 0.6579, "step": 414 }, { "epoch": 0.05, "grad_norm": 0.6530173688800508, "learning_rate": 1.997228038354296e-05, "loss": 0.6653, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.6726262109089197, "learning_rate": 1.997197167661772e-05, "loss": 0.6196, "step": 416 }, { "epoch": 0.05, "grad_norm": 0.6321438021762186, "learning_rate": 1.997166126261208e-05, "loss": 0.6412, "step": 417 }, { "epoch": 0.05, "grad_norm": 0.7035252257452286, "learning_rate": 1.997134914157918e-05, "loss": 0.6179, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.7004655420877307, "learning_rate": 1.9971035313572452e-05, "loss": 0.6467, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.7403755417037653, "learning_rate": 1.9970719778645618e-05, "loss": 0.676, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.6414356402275375, "learning_rate": 1.9970402536852694e-05, "loss": 0.6454, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.6962235409075329, "learning_rate": 1.997008358824799e-05, "loss": 0.6531, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.6140217278024828, "learning_rate": 1.9969762932886105e-05, "loss": 0.6206, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.6824155351283717, "learning_rate": 1.996944057082193e-05, "loss": 0.6136, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.6444612603115641, "learning_rate": 1.9969116502110654e-05, "loss": 0.6442, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.7821944389375514, "learning_rate": 1.9968790726807744e-05, "loss": 0.6196, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.6677382845339324, "learning_rate": 1.996846324496898e-05, "loss": 0.6351, "step": 427 }, { "epoch": 0.05, "grad_norm": 1.0329391247739361, "learning_rate": 1.9968134056650416e-05, "loss": 0.6653, "step": 428 }, { "epoch": 0.05, "grad_norm": 0.6632694013245255, "learning_rate": 1.9967803161908407e-05, "loss": 0.6283, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.6786901531537254, "learning_rate": 1.9967470560799595e-05, "loss": 0.639, "step": 430 }, { "epoch": 0.06, "grad_norm": 0.7305424223841721, "learning_rate": 1.9967136253380923e-05, "loss": 0.6012, "step": 431 }, { "epoch": 0.06, "grad_norm": 0.6955570228495634, "learning_rate": 1.996680023970962e-05, "loss": 0.6574, "step": 432 }, { "epoch": 0.06, "grad_norm": 0.6432091634026794, "learning_rate": 1.99664625198432e-05, "loss": 0.6591, "step": 433 }, { "epoch": 0.06, "grad_norm": 0.7992451071657704, "learning_rate": 1.996612309383949e-05, "loss": 0.6966, "step": 434 }, { "epoch": 0.06, "grad_norm": 0.7095455158519584, "learning_rate": 1.9965781961756576e-05, "loss": 0.6645, "step": 435 }, { "epoch": 0.06, "grad_norm": 0.6907537601625793, "learning_rate": 1.9965439123652875e-05, "loss": 0.6645, "step": 436 }, { "epoch": 0.06, "grad_norm": 0.6722843734072718, "learning_rate": 1.9965094579587065e-05, "loss": 0.7017, "step": 437 }, { "epoch": 0.06, "grad_norm": 0.7317035000111296, "learning_rate": 1.9964748329618135e-05, "loss": 0.6997, "step": 438 }, { "epoch": 0.06, "grad_norm": 0.6739901642521623, "learning_rate": 1.996440037380535e-05, "loss": 0.7046, "step": 439 }, { "epoch": 0.06, "grad_norm": 0.6596732875206789, "learning_rate": 1.996405071220828e-05, "loss": 0.6867, "step": 440 }, { "epoch": 0.06, "grad_norm": 0.6692210848864182, "learning_rate": 1.9963699344886792e-05, "loss": 0.6581, "step": 441 }, { "epoch": 0.06, "grad_norm": 0.6592328073089633, "learning_rate": 1.9963346271901023e-05, "loss": 0.6356, "step": 442 }, { "epoch": 0.06, "grad_norm": 0.6198141782580554, "learning_rate": 1.996299149331142e-05, "loss": 0.6262, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.668548630545305, "learning_rate": 1.9962635009178713e-05, "loss": 0.6163, "step": 444 }, { "epoch": 0.06, "grad_norm": 0.6520434294426236, "learning_rate": 1.9962276819563934e-05, "loss": 0.6448, "step": 445 }, { "epoch": 0.06, "grad_norm": 0.5756355462550686, "learning_rate": 1.9961916924528398e-05, "loss": 0.6293, "step": 446 }, { "epoch": 0.06, "grad_norm": 0.6472320449911949, "learning_rate": 1.9961555324133714e-05, "loss": 0.6889, "step": 447 }, { "epoch": 0.06, "grad_norm": 0.6397560046118317, "learning_rate": 1.9961192018441784e-05, "loss": 0.6446, "step": 448 }, { "epoch": 0.06, "grad_norm": 0.6398453582049498, "learning_rate": 1.99608270075148e-05, "loss": 0.6522, "step": 449 }, { "epoch": 0.06, "grad_norm": 0.6720564213012563, "learning_rate": 1.9960460291415252e-05, "loss": 0.661, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.6993158439025496, "learning_rate": 1.996009187020591e-05, "loss": 0.6446, "step": 451 }, { "epoch": 0.06, "grad_norm": 0.6279587305019277, "learning_rate": 1.995972174394985e-05, "loss": 0.6272, "step": 452 }, { "epoch": 0.06, "grad_norm": 0.695144504544679, "learning_rate": 1.995934991271043e-05, "loss": 0.668, "step": 453 }, { "epoch": 0.06, "grad_norm": 0.6115679424488201, "learning_rate": 1.9958976376551303e-05, "loss": 0.6377, "step": 454 }, { "epoch": 0.06, "grad_norm": 0.6189107014936653, "learning_rate": 1.9958601135536417e-05, "loss": 0.6383, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.651603577712965, "learning_rate": 1.9958224189730004e-05, "loss": 0.6514, "step": 456 }, { "epoch": 0.06, "grad_norm": 0.6333562081400902, "learning_rate": 1.9957845539196594e-05, "loss": 0.6513, "step": 457 }, { "epoch": 0.06, "grad_norm": 0.6500529538909703, "learning_rate": 1.995746518400101e-05, "loss": 0.6857, "step": 458 }, { "epoch": 0.06, "grad_norm": 0.6273687108432533, "learning_rate": 1.9957083124208362e-05, "loss": 0.6462, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.6376801908596116, "learning_rate": 1.995669935988405e-05, "loss": 0.6759, "step": 460 }, { "epoch": 0.06, "grad_norm": 0.5657782528264674, "learning_rate": 1.9956313891093778e-05, "loss": 0.622, "step": 461 }, { "epoch": 0.06, "grad_norm": 0.6945745319603669, "learning_rate": 1.995592671790353e-05, "loss": 0.6528, "step": 462 }, { "epoch": 0.06, "grad_norm": 0.5775536934148969, "learning_rate": 1.9955537840379585e-05, "loss": 0.6146, "step": 463 }, { "epoch": 0.06, "grad_norm": 0.6913001800405506, "learning_rate": 1.995514725858851e-05, "loss": 0.6898, "step": 464 }, { "epoch": 0.06, "grad_norm": 0.6617625011381669, "learning_rate": 1.9954754972597174e-05, "loss": 0.643, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.7353288121431489, "learning_rate": 1.9954360982472733e-05, "loss": 0.6941, "step": 466 }, { "epoch": 0.06, "grad_norm": 0.6272050625643859, "learning_rate": 1.9953965288282625e-05, "loss": 0.6299, "step": 467 }, { "epoch": 0.06, "grad_norm": 0.6137542719720683, "learning_rate": 1.9953567890094596e-05, "loss": 0.6431, "step": 468 }, { "epoch": 0.06, "grad_norm": 0.562753993443392, "learning_rate": 1.995316878797667e-05, "loss": 0.6162, "step": 469 }, { "epoch": 0.06, "grad_norm": 0.6386761856111818, "learning_rate": 1.9952767981997174e-05, "loss": 0.6522, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.5679780202925698, "learning_rate": 1.9952365472224717e-05, "loss": 0.623, "step": 471 }, { "epoch": 0.06, "grad_norm": 0.6060663360428846, "learning_rate": 1.9951961258728204e-05, "loss": 0.6348, "step": 472 }, { "epoch": 0.06, "grad_norm": 0.6746929478999129, "learning_rate": 1.995155534157683e-05, "loss": 0.6378, "step": 473 }, { "epoch": 0.06, "grad_norm": 0.7092300517758314, "learning_rate": 1.9951147720840093e-05, "loss": 0.6708, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.6901749005558111, "learning_rate": 1.9950738396587757e-05, "loss": 0.6522, "step": 475 }, { "epoch": 0.06, "grad_norm": 0.6401679285408047, "learning_rate": 1.9950327368889904e-05, "loss": 0.6518, "step": 476 }, { "epoch": 0.06, "grad_norm": 0.6176114416043998, "learning_rate": 1.99499146378169e-05, "loss": 0.6404, "step": 477 }, { "epoch": 0.06, "grad_norm": 0.6280054615243779, "learning_rate": 1.9949500203439387e-05, "loss": 0.5989, "step": 478 }, { "epoch": 0.06, "grad_norm": 0.6630129859834514, "learning_rate": 1.9949084065828322e-05, "loss": 0.6718, "step": 479 }, { "epoch": 0.06, "grad_norm": 0.6551708431298925, "learning_rate": 1.9948666225054934e-05, "loss": 0.6487, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.6745750311190638, "learning_rate": 1.9948246681190763e-05, "loss": 0.6346, "step": 481 }, { "epoch": 0.06, "grad_norm": 0.5480018975670844, "learning_rate": 1.9947825434307623e-05, "loss": 0.6258, "step": 482 }, { "epoch": 0.06, "grad_norm": 0.625546406785138, "learning_rate": 1.994740248447762e-05, "loss": 0.6644, "step": 483 }, { "epoch": 0.06, "grad_norm": 0.6641937538365369, "learning_rate": 1.9946977831773172e-05, "loss": 0.6369, "step": 484 }, { "epoch": 0.06, "grad_norm": 0.6087946065744838, "learning_rate": 1.9946551476266967e-05, "loss": 0.6097, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.6629109181261431, "learning_rate": 1.9946123418031995e-05, "loss": 0.6469, "step": 486 }, { "epoch": 0.06, "grad_norm": 0.6595952410954361, "learning_rate": 1.994569365714153e-05, "loss": 0.6152, "step": 487 }, { "epoch": 0.06, "grad_norm": 0.6215034615951762, "learning_rate": 1.9945262193669136e-05, "loss": 0.6437, "step": 488 }, { "epoch": 0.06, "grad_norm": 0.6747650722996229, "learning_rate": 1.994482902768869e-05, "loss": 0.6561, "step": 489 }, { "epoch": 0.06, "grad_norm": 0.7114971337016353, "learning_rate": 1.9944394159274332e-05, "loss": 0.6394, "step": 490 }, { "epoch": 0.06, "grad_norm": 0.6062317426681242, "learning_rate": 1.9943957588500518e-05, "loss": 0.6629, "step": 491 }, { "epoch": 0.06, "grad_norm": 0.5982511941746086, "learning_rate": 1.994351931544197e-05, "loss": 0.6383, "step": 492 }, { "epoch": 0.06, "grad_norm": 0.6835713278537908, "learning_rate": 1.9943079340173722e-05, "loss": 0.6576, "step": 493 }, { "epoch": 0.06, "grad_norm": 0.5735422732011377, "learning_rate": 1.9942637662771096e-05, "loss": 0.6295, "step": 494 }, { "epoch": 0.06, "grad_norm": 0.6910943856552789, "learning_rate": 1.9942194283309692e-05, "loss": 0.6348, "step": 495 }, { "epoch": 0.06, "grad_norm": 0.689568102190342, "learning_rate": 1.994174920186542e-05, "loss": 0.6385, "step": 496 }, { "epoch": 0.06, "grad_norm": 0.654707712719486, "learning_rate": 1.994130241851447e-05, "loss": 0.6618, "step": 497 }, { "epoch": 0.06, "grad_norm": 0.7861334652044721, "learning_rate": 1.9940853933333325e-05, "loss": 0.6669, "step": 498 }, { "epoch": 0.06, "grad_norm": 0.6713831960275212, "learning_rate": 1.994040374639876e-05, "loss": 0.6744, "step": 499 }, { "epoch": 0.06, "grad_norm": 0.8159908220081165, "learning_rate": 1.993995185778784e-05, "loss": 0.6798, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.6746288741002626, "learning_rate": 1.9939498267577927e-05, "loss": 0.6357, "step": 501 }, { "epoch": 0.06, "grad_norm": 0.8441430557104669, "learning_rate": 1.993904297584667e-05, "loss": 0.6695, "step": 502 }, { "epoch": 0.06, "grad_norm": 0.6474499999063055, "learning_rate": 1.9938585982672004e-05, "loss": 0.6436, "step": 503 }, { "epoch": 0.06, "grad_norm": 0.6799688893803558, "learning_rate": 1.9938127288132167e-05, "loss": 0.6288, "step": 504 }, { "epoch": 0.06, "grad_norm": 0.6632225956497506, "learning_rate": 1.9937666892305677e-05, "loss": 0.6753, "step": 505 }, { "epoch": 0.06, "grad_norm": 0.5890293280683175, "learning_rate": 1.9937204795271354e-05, "loss": 0.6389, "step": 506 }, { "epoch": 0.06, "grad_norm": 0.7556951460546197, "learning_rate": 1.9936740997108294e-05, "loss": 0.6276, "step": 507 }, { "epoch": 0.06, "grad_norm": 0.6700722130497752, "learning_rate": 1.99362754978959e-05, "loss": 0.6411, "step": 508 }, { "epoch": 0.07, "grad_norm": 0.685455327802261, "learning_rate": 1.993580829771386e-05, "loss": 0.6442, "step": 509 }, { "epoch": 0.07, "grad_norm": 0.7060541932703234, "learning_rate": 1.9935339396642157e-05, "loss": 0.7002, "step": 510 }, { "epoch": 0.07, "grad_norm": 0.6410583155565452, "learning_rate": 1.993486879476105e-05, "loss": 0.6597, "step": 511 }, { "epoch": 0.07, "grad_norm": 0.6767558114258717, "learning_rate": 1.993439649215111e-05, "loss": 0.629, "step": 512 }, { "epoch": 0.07, "grad_norm": 0.645587381813191, "learning_rate": 1.9933922488893183e-05, "loss": 0.648, "step": 513 }, { "epoch": 0.07, "grad_norm": 0.6756908486892611, "learning_rate": 1.993344678506842e-05, "loss": 0.6559, "step": 514 }, { "epoch": 0.07, "grad_norm": 0.6506602840739509, "learning_rate": 1.9932969380758246e-05, "loss": 0.6261, "step": 515 }, { "epoch": 0.07, "grad_norm": 0.5943882212331437, "learning_rate": 1.9932490276044394e-05, "loss": 0.6665, "step": 516 }, { "epoch": 0.07, "grad_norm": 0.5403431789655727, "learning_rate": 1.993200947100888e-05, "loss": 0.642, "step": 517 }, { "epoch": 0.07, "grad_norm": 0.6823557503319251, "learning_rate": 1.993152696573401e-05, "loss": 0.6886, "step": 518 }, { "epoch": 0.07, "grad_norm": 0.5785461204186834, "learning_rate": 1.9931042760302385e-05, "loss": 0.6059, "step": 519 }, { "epoch": 0.07, "grad_norm": 0.6080220374182204, "learning_rate": 1.9930556854796893e-05, "loss": 0.6244, "step": 520 }, { "epoch": 0.07, "grad_norm": 0.6941727035505598, "learning_rate": 1.9930069249300715e-05, "loss": 0.6563, "step": 521 }, { "epoch": 0.07, "grad_norm": 0.736953382605704, "learning_rate": 1.992957994389733e-05, "loss": 0.6399, "step": 522 }, { "epoch": 0.07, "grad_norm": 0.6457111450421367, "learning_rate": 1.992908893867049e-05, "loss": 0.6425, "step": 523 }, { "epoch": 0.07, "grad_norm": 0.7071405739849171, "learning_rate": 1.9928596233704257e-05, "loss": 0.6318, "step": 524 }, { "epoch": 0.07, "grad_norm": 0.6251647367649558, "learning_rate": 1.9928101829082977e-05, "loss": 0.6188, "step": 525 }, { "epoch": 0.07, "grad_norm": 0.7766565395273469, "learning_rate": 1.9927605724891276e-05, "loss": 0.6006, "step": 526 }, { "epoch": 0.07, "grad_norm": 0.6207563159367397, "learning_rate": 1.9927107921214094e-05, "loss": 0.6467, "step": 527 }, { "epoch": 0.07, "grad_norm": 0.6826639734426166, "learning_rate": 1.9926608418136637e-05, "loss": 0.651, "step": 528 }, { "epoch": 0.07, "grad_norm": 0.6383646319244859, "learning_rate": 1.9926107215744423e-05, "loss": 0.6912, "step": 529 }, { "epoch": 0.07, "grad_norm": 0.7401498527894675, "learning_rate": 1.9925604314123248e-05, "loss": 0.6708, "step": 530 }, { "epoch": 0.07, "grad_norm": 0.5921899887918881, "learning_rate": 1.9925099713359202e-05, "loss": 0.6469, "step": 531 }, { "epoch": 0.07, "grad_norm": 0.6646271868648245, "learning_rate": 1.992459341353867e-05, "loss": 0.656, "step": 532 }, { "epoch": 0.07, "grad_norm": 0.5722086073629804, "learning_rate": 1.992408541474832e-05, "loss": 0.6096, "step": 533 }, { "epoch": 0.07, "grad_norm": 0.5948054780354988, "learning_rate": 1.992357571707512e-05, "loss": 0.5909, "step": 534 }, { "epoch": 0.07, "grad_norm": 0.6751506193794988, "learning_rate": 1.9923064320606315e-05, "loss": 0.6362, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.7106146847077265, "learning_rate": 1.9922551225429464e-05, "loss": 0.6511, "step": 536 }, { "epoch": 0.07, "grad_norm": 0.5795103230795771, "learning_rate": 1.992203643163239e-05, "loss": 0.6198, "step": 537 }, { "epoch": 0.07, "grad_norm": 0.7091116796194455, "learning_rate": 1.9921519939303225e-05, "loss": 0.6979, "step": 538 }, { "epoch": 0.07, "grad_norm": 0.594747535905356, "learning_rate": 1.9921001748530385e-05, "loss": 0.652, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.6381878226143999, "learning_rate": 1.9920481859402577e-05, "loss": 0.6357, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.5963627195170277, "learning_rate": 1.99199602720088e-05, "loss": 0.6095, "step": 541 }, { "epoch": 0.07, "grad_norm": 0.5834889821020468, "learning_rate": 1.991943698643835e-05, "loss": 0.6324, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.68701141279362, "learning_rate": 1.99189120027808e-05, "loss": 0.6541, "step": 543 }, { "epoch": 0.07, "grad_norm": 0.6301184544970694, "learning_rate": 1.991838532112602e-05, "loss": 0.648, "step": 544 }, { "epoch": 0.07, "grad_norm": 0.6001939982038607, "learning_rate": 1.9917856941564176e-05, "loss": 0.6397, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.6972784863924607, "learning_rate": 1.9917326864185718e-05, "loss": 0.6378, "step": 546 }, { "epoch": 0.07, "grad_norm": 0.6272664327721428, "learning_rate": 1.991679508908139e-05, "loss": 0.6189, "step": 547 }, { "epoch": 0.07, "grad_norm": 0.6976795534995465, "learning_rate": 1.991626161634222e-05, "loss": 0.6579, "step": 548 }, { "epoch": 0.07, "grad_norm": 0.7439088111486063, "learning_rate": 1.991572644605954e-05, "loss": 0.638, "step": 549 }, { "epoch": 0.07, "grad_norm": 0.704135260778888, "learning_rate": 1.9915189578324957e-05, "loss": 0.658, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.6507698066806812, "learning_rate": 1.9914651013230387e-05, "loss": 0.6187, "step": 551 }, { "epoch": 0.07, "grad_norm": 0.6791151421745584, "learning_rate": 1.9914110750868013e-05, "loss": 0.6534, "step": 552 }, { "epoch": 0.07, "grad_norm": 0.6441235852462119, "learning_rate": 1.9913568791330334e-05, "loss": 0.6312, "step": 553 }, { "epoch": 0.07, "grad_norm": 0.5659597134272116, "learning_rate": 1.9913025134710113e-05, "loss": 0.6255, "step": 554 }, { "epoch": 0.07, "grad_norm": 0.7215239844477321, "learning_rate": 1.991247978110043e-05, "loss": 0.6449, "step": 555 }, { "epoch": 0.07, "grad_norm": 0.6343142996661402, "learning_rate": 1.9911932730594637e-05, "loss": 0.6147, "step": 556 }, { "epoch": 0.07, "grad_norm": 0.6587946032983699, "learning_rate": 1.991138398328638e-05, "loss": 0.6586, "step": 557 }, { "epoch": 0.07, "grad_norm": 0.6170551749753937, "learning_rate": 1.9910833539269604e-05, "loss": 0.6333, "step": 558 }, { "epoch": 0.07, "grad_norm": 0.5706248863195481, "learning_rate": 1.9910281398638533e-05, "loss": 0.6143, "step": 559 }, { "epoch": 0.07, "grad_norm": 0.6565795432581798, "learning_rate": 1.990972756148769e-05, "loss": 0.6551, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.6680004031653853, "learning_rate": 1.9909172027911886e-05, "loss": 0.6377, "step": 561 }, { "epoch": 0.07, "grad_norm": 0.8227369403946628, "learning_rate": 1.9908614798006215e-05, "loss": 0.6385, "step": 562 }, { "epoch": 0.07, "grad_norm": 0.790269306210185, "learning_rate": 1.9908055871866077e-05, "loss": 0.692, "step": 563 }, { "epoch": 0.07, "grad_norm": 0.7741234267976633, "learning_rate": 1.9907495249587147e-05, "loss": 0.6812, "step": 564 }, { "epoch": 0.07, "grad_norm": 0.7960851173961886, "learning_rate": 1.99069329312654e-05, "loss": 0.6354, "step": 565 }, { "epoch": 0.07, "grad_norm": 0.7764005565426769, "learning_rate": 1.9906368916997095e-05, "loss": 0.7155, "step": 566 }, { "epoch": 0.07, "grad_norm": 0.5956288870776009, "learning_rate": 1.990580320687879e-05, "loss": 0.6577, "step": 567 }, { "epoch": 0.07, "grad_norm": 0.6835309362941439, "learning_rate": 1.990523580100732e-05, "loss": 0.6573, "step": 568 }, { "epoch": 0.07, "grad_norm": 0.5547759060000632, "learning_rate": 1.9904666699479828e-05, "loss": 0.6243, "step": 569 }, { "epoch": 0.07, "grad_norm": 0.7874086838129715, "learning_rate": 1.9904095902393727e-05, "loss": 0.645, "step": 570 }, { "epoch": 0.07, "grad_norm": 0.661713566732365, "learning_rate": 1.9903523409846734e-05, "loss": 0.6478, "step": 571 }, { "epoch": 0.07, "grad_norm": 0.6688661844103444, "learning_rate": 1.9902949221936855e-05, "loss": 0.6558, "step": 572 }, { "epoch": 0.07, "grad_norm": 0.6264132104618612, "learning_rate": 1.9902373338762385e-05, "loss": 0.669, "step": 573 }, { "epoch": 0.07, "grad_norm": 0.767680884194396, "learning_rate": 1.99017957604219e-05, "loss": 0.6247, "step": 574 }, { "epoch": 0.07, "grad_norm": 0.6759692073220513, "learning_rate": 1.9901216487014284e-05, "loss": 0.6411, "step": 575 }, { "epoch": 0.07, "grad_norm": 0.644227454871241, "learning_rate": 1.99006355186387e-05, "loss": 0.6566, "step": 576 }, { "epoch": 0.07, "grad_norm": 0.5759089296397346, "learning_rate": 1.9900052855394597e-05, "loss": 0.623, "step": 577 }, { "epoch": 0.07, "grad_norm": 0.6471878247171627, "learning_rate": 1.9899468497381725e-05, "loss": 0.6283, "step": 578 }, { "epoch": 0.07, "grad_norm": 0.611336229480258, "learning_rate": 1.9898882444700115e-05, "loss": 0.6208, "step": 579 }, { "epoch": 0.07, "grad_norm": 0.5606392797669343, "learning_rate": 1.9898294697450096e-05, "loss": 0.6207, "step": 580 }, { "epoch": 0.07, "grad_norm": 0.6175409358417024, "learning_rate": 1.9897705255732282e-05, "loss": 0.6474, "step": 581 }, { "epoch": 0.07, "grad_norm": 0.5692026217812939, "learning_rate": 1.989711411964758e-05, "loss": 0.6041, "step": 582 }, { "epoch": 0.07, "grad_norm": 0.6409501134434707, "learning_rate": 1.9896521289297177e-05, "loss": 0.6541, "step": 583 }, { "epoch": 0.07, "grad_norm": 0.586174329997128, "learning_rate": 1.989592676478257e-05, "loss": 0.6395, "step": 584 }, { "epoch": 0.07, "grad_norm": 0.5893310602064576, "learning_rate": 1.9895330546205528e-05, "loss": 0.6409, "step": 585 }, { "epoch": 0.07, "grad_norm": 0.6619418009963494, "learning_rate": 1.9894732633668115e-05, "loss": 0.6688, "step": 586 }, { "epoch": 0.07, "grad_norm": 0.5770675759070201, "learning_rate": 1.989413302727269e-05, "loss": 0.6074, "step": 587 }, { "epoch": 0.08, "grad_norm": 0.6775918013910666, "learning_rate": 1.98935317271219e-05, "loss": 0.6531, "step": 588 }, { "epoch": 0.08, "grad_norm": 0.5978512367519803, "learning_rate": 1.9892928733318673e-05, "loss": 0.6792, "step": 589 }, { "epoch": 0.08, "grad_norm": 0.636300542483217, "learning_rate": 1.9892324045966237e-05, "loss": 0.6181, "step": 590 }, { "epoch": 0.08, "grad_norm": 0.6274143868178835, "learning_rate": 1.9891717665168114e-05, "loss": 0.6581, "step": 591 }, { "epoch": 0.08, "grad_norm": 0.6152649642285434, "learning_rate": 1.9891109591028103e-05, "loss": 0.6732, "step": 592 }, { "epoch": 0.08, "grad_norm": 0.5968257611656365, "learning_rate": 1.9890499823650297e-05, "loss": 0.6076, "step": 593 }, { "epoch": 0.08, "grad_norm": 0.728415542992055, "learning_rate": 1.9889888363139086e-05, "loss": 0.6716, "step": 594 }, { "epoch": 0.08, "grad_norm": 0.6262567810755814, "learning_rate": 1.988927520959914e-05, "loss": 0.6313, "step": 595 }, { "epoch": 0.08, "grad_norm": 0.6738061921271815, "learning_rate": 1.9888660363135424e-05, "loss": 0.6669, "step": 596 }, { "epoch": 0.08, "grad_norm": 0.6358630002530067, "learning_rate": 1.9888043823853197e-05, "loss": 0.6209, "step": 597 }, { "epoch": 0.08, "grad_norm": 0.6102195373948012, "learning_rate": 1.9887425591857998e-05, "loss": 0.6165, "step": 598 }, { "epoch": 0.08, "grad_norm": 0.6855737862371398, "learning_rate": 1.9886805667255664e-05, "loss": 0.6435, "step": 599 }, { "epoch": 0.08, "grad_norm": 0.5849679419704035, "learning_rate": 1.988618405015231e-05, "loss": 0.667, "step": 600 }, { "epoch": 0.08, "grad_norm": 0.8221021711940485, "learning_rate": 1.9885560740654363e-05, "loss": 0.6097, "step": 601 }, { "epoch": 0.08, "grad_norm": 0.6660023591035582, "learning_rate": 1.988493573886852e-05, "loss": 0.6449, "step": 602 }, { "epoch": 0.08, "grad_norm": 0.7126694199267498, "learning_rate": 1.988430904490177e-05, "loss": 0.6364, "step": 603 }, { "epoch": 0.08, "grad_norm": 0.7831484310870638, "learning_rate": 1.9883680658861396e-05, "loss": 0.6632, "step": 604 }, { "epoch": 0.08, "grad_norm": 0.7573482418592345, "learning_rate": 1.9883050580854977e-05, "loss": 0.6268, "step": 605 }, { "epoch": 0.08, "grad_norm": 0.6040343352260965, "learning_rate": 1.988241881099037e-05, "loss": 0.6086, "step": 606 }, { "epoch": 0.08, "grad_norm": 0.6444126921902101, "learning_rate": 1.9881785349375726e-05, "loss": 0.642, "step": 607 }, { "epoch": 0.08, "grad_norm": 0.6866253938777711, "learning_rate": 1.9881150196119486e-05, "loss": 0.6505, "step": 608 }, { "epoch": 0.08, "grad_norm": 0.6271700627678295, "learning_rate": 1.988051335133038e-05, "loss": 0.644, "step": 609 }, { "epoch": 0.08, "grad_norm": 0.6827205417417875, "learning_rate": 1.987987481511743e-05, "loss": 0.6061, "step": 610 }, { "epoch": 0.08, "grad_norm": 0.549744089279641, "learning_rate": 1.9879234587589948e-05, "loss": 0.6244, "step": 611 }, { "epoch": 0.08, "grad_norm": 0.6505418912061914, "learning_rate": 1.9878592668857525e-05, "loss": 0.6551, "step": 612 }, { "epoch": 0.08, "grad_norm": 0.6972895762201925, "learning_rate": 1.9877949059030057e-05, "loss": 0.6596, "step": 613 }, { "epoch": 0.08, "grad_norm": 0.5750067315279471, "learning_rate": 1.987730375821772e-05, "loss": 0.6523, "step": 614 }, { "epoch": 0.08, "grad_norm": 0.6954176550138895, "learning_rate": 1.987665676653098e-05, "loss": 0.6579, "step": 615 }, { "epoch": 0.08, "grad_norm": 0.5199981617219489, "learning_rate": 1.9876008084080596e-05, "loss": 0.6186, "step": 616 }, { "epoch": 0.08, "grad_norm": 0.6778227054381298, "learning_rate": 1.9875357710977615e-05, "loss": 0.6425, "step": 617 }, { "epoch": 0.08, "grad_norm": 0.6140763785420272, "learning_rate": 1.987470564733337e-05, "loss": 0.6365, "step": 618 }, { "epoch": 0.08, "grad_norm": 0.738672390844558, "learning_rate": 1.987405189325949e-05, "loss": 0.6042, "step": 619 }, { "epoch": 0.08, "grad_norm": 0.5693774944617821, "learning_rate": 1.9873396448867886e-05, "loss": 0.617, "step": 620 }, { "epoch": 0.08, "grad_norm": 0.645515593205295, "learning_rate": 1.987273931427077e-05, "loss": 0.6438, "step": 621 }, { "epoch": 0.08, "grad_norm": 0.6922281160935153, "learning_rate": 1.9872080489580626e-05, "loss": 0.6521, "step": 622 }, { "epoch": 0.08, "grad_norm": 0.7713698204025791, "learning_rate": 1.987141997491024e-05, "loss": 0.6722, "step": 623 }, { "epoch": 0.08, "grad_norm": 0.6170386844404585, "learning_rate": 1.9870757770372683e-05, "loss": 0.6323, "step": 624 }, { "epoch": 0.08, "grad_norm": 0.6539839862402854, "learning_rate": 1.987009387608132e-05, "loss": 0.6365, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.668093324079177, "learning_rate": 1.9869428292149798e-05, "loss": 0.6336, "step": 626 }, { "epoch": 0.08, "grad_norm": 0.6891048363818066, "learning_rate": 1.986876101869206e-05, "loss": 0.6422, "step": 627 }, { "epoch": 0.08, "grad_norm": 0.7296548755970409, "learning_rate": 1.9868092055822336e-05, "loss": 0.683, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.5794201617773139, "learning_rate": 1.986742140365514e-05, "loss": 0.614, "step": 629 }, { "epoch": 0.08, "grad_norm": 0.6934526137124349, "learning_rate": 1.986674906230528e-05, "loss": 0.6481, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.7056449091522797, "learning_rate": 1.9866075031887857e-05, "loss": 0.6591, "step": 631 }, { "epoch": 0.08, "grad_norm": 0.6313157416029265, "learning_rate": 1.9865399312518252e-05, "loss": 0.6141, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.5923395515911154, "learning_rate": 1.986472190431214e-05, "loss": 0.6287, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.7017132598790848, "learning_rate": 1.986404280738549e-05, "loss": 0.6885, "step": 634 }, { "epoch": 0.08, "grad_norm": 0.6360189473634602, "learning_rate": 1.9863362021854554e-05, "loss": 0.6491, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.6328732397512029, "learning_rate": 1.9862679547835867e-05, "loss": 0.6474, "step": 636 }, { "epoch": 0.08, "grad_norm": 1.0364404811424524, "learning_rate": 1.986199538544627e-05, "loss": 0.7057, "step": 637 }, { "epoch": 0.08, "grad_norm": 0.5827986330155411, "learning_rate": 1.9861309534802875e-05, "loss": 0.6333, "step": 638 }, { "epoch": 0.08, "grad_norm": 0.9416191106903443, "learning_rate": 1.9860621996023097e-05, "loss": 0.7008, "step": 639 }, { "epoch": 0.08, "grad_norm": 0.6473211086424228, "learning_rate": 1.9859932769224633e-05, "loss": 0.6588, "step": 640 }, { "epoch": 0.08, "grad_norm": 0.764002556755044, "learning_rate": 1.985924185452547e-05, "loss": 0.6434, "step": 641 }, { "epoch": 0.08, "grad_norm": 0.6113275716889836, "learning_rate": 1.9858549252043883e-05, "loss": 0.6642, "step": 642 }, { "epoch": 0.08, "grad_norm": 0.5871211655567607, "learning_rate": 1.9857854961898437e-05, "loss": 0.6352, "step": 643 }, { "epoch": 0.08, "grad_norm": 0.5774235342507021, "learning_rate": 1.9857158984207987e-05, "loss": 0.6412, "step": 644 }, { "epoch": 0.08, "grad_norm": 0.5805046744545254, "learning_rate": 1.985646131909168e-05, "loss": 0.6507, "step": 645 }, { "epoch": 0.08, "grad_norm": 0.6455056214278758, "learning_rate": 1.985576196666894e-05, "loss": 0.6422, "step": 646 }, { "epoch": 0.08, "grad_norm": 0.6437675795769635, "learning_rate": 1.985506092705949e-05, "loss": 0.6488, "step": 647 }, { "epoch": 0.08, "grad_norm": 0.6841132034406457, "learning_rate": 1.9854358200383343e-05, "loss": 0.6653, "step": 648 }, { "epoch": 0.08, "grad_norm": 0.6536894188352317, "learning_rate": 1.9853653786760797e-05, "loss": 0.635, "step": 649 }, { "epoch": 0.08, "grad_norm": 0.6969341325121402, "learning_rate": 1.9852947686312433e-05, "loss": 0.642, "step": 650 }, { "epoch": 0.08, "grad_norm": 0.6422244196642025, "learning_rate": 1.9852239899159134e-05, "loss": 0.6162, "step": 651 }, { "epoch": 0.08, "grad_norm": 0.6452531272389469, "learning_rate": 1.9851530425422064e-05, "loss": 0.6375, "step": 652 }, { "epoch": 0.08, "grad_norm": 0.5413979110607559, "learning_rate": 1.985081926522267e-05, "loss": 0.6133, "step": 653 }, { "epoch": 0.08, "grad_norm": 0.7611756362971919, "learning_rate": 1.98501064186827e-05, "loss": 0.6261, "step": 654 }, { "epoch": 0.08, "grad_norm": 0.6730942925092348, "learning_rate": 1.984939188592418e-05, "loss": 0.6771, "step": 655 }, { "epoch": 0.08, "grad_norm": 0.5491687099521021, "learning_rate": 1.9848675667069434e-05, "loss": 0.6279, "step": 656 }, { "epoch": 0.08, "grad_norm": 1.1117045696109613, "learning_rate": 1.9847957762241066e-05, "loss": 0.7498, "step": 657 }, { "epoch": 0.08, "grad_norm": 0.6038395424922474, "learning_rate": 1.9847238171561972e-05, "loss": 0.6337, "step": 658 }, { "epoch": 0.08, "grad_norm": 0.7924048310060121, "learning_rate": 1.9846516895155346e-05, "loss": 0.6824, "step": 659 }, { "epoch": 0.08, "grad_norm": 0.7155014666378341, "learning_rate": 1.984579393314465e-05, "loss": 0.6575, "step": 660 }, { "epoch": 0.08, "grad_norm": 0.6694515171161406, "learning_rate": 1.9845069285653655e-05, "loss": 0.6391, "step": 661 }, { "epoch": 0.08, "grad_norm": 0.7322323899931688, "learning_rate": 1.9844342952806407e-05, "loss": 0.695, "step": 662 }, { "epoch": 0.08, "grad_norm": 0.5809146430983497, "learning_rate": 1.984361493472725e-05, "loss": 0.626, "step": 663 }, { "epoch": 0.08, "grad_norm": 0.5823471823706685, "learning_rate": 1.9842885231540805e-05, "loss": 0.6406, "step": 664 }, { "epoch": 0.08, "grad_norm": 0.6573337983761294, "learning_rate": 1.984215384337199e-05, "loss": 0.6385, "step": 665 }, { "epoch": 0.09, "grad_norm": 0.6024660214011411, "learning_rate": 1.9841420770346017e-05, "loss": 0.6267, "step": 666 }, { "epoch": 0.09, "grad_norm": 0.7165442670016834, "learning_rate": 1.984068601258837e-05, "loss": 0.6788, "step": 667 }, { "epoch": 0.09, "grad_norm": 0.6932791356973554, "learning_rate": 1.9839949570224837e-05, "loss": 0.654, "step": 668 }, { "epoch": 0.09, "grad_norm": 0.5927306417027434, "learning_rate": 1.983921144338148e-05, "loss": 0.6454, "step": 669 }, { "epoch": 0.09, "grad_norm": 0.5690546557717788, "learning_rate": 1.9838471632184674e-05, "loss": 0.6288, "step": 670 }, { "epoch": 0.09, "grad_norm": 0.5853693952368275, "learning_rate": 1.9837730136761045e-05, "loss": 0.6201, "step": 671 }, { "epoch": 0.09, "grad_norm": 0.5730058622451522, "learning_rate": 1.9836986957237543e-05, "loss": 0.6239, "step": 672 }, { "epoch": 0.09, "grad_norm": 0.5993753780923007, "learning_rate": 1.9836242093741383e-05, "loss": 0.6556, "step": 673 }, { "epoch": 0.09, "grad_norm": 0.7767714032776728, "learning_rate": 1.983549554640008e-05, "loss": 0.6889, "step": 674 }, { "epoch": 0.09, "grad_norm": 0.5704804634342932, "learning_rate": 1.9834747315341435e-05, "loss": 0.608, "step": 675 }, { "epoch": 0.09, "grad_norm": 0.7512513512478552, "learning_rate": 1.9833997400693532e-05, "loss": 0.63, "step": 676 }, { "epoch": 0.09, "grad_norm": 0.7128978712099759, "learning_rate": 1.983324580258475e-05, "loss": 0.6111, "step": 677 }, { "epoch": 0.09, "grad_norm": 0.7620320042221803, "learning_rate": 1.9832492521143755e-05, "loss": 0.6789, "step": 678 }, { "epoch": 0.09, "grad_norm": 0.7505151476163022, "learning_rate": 1.9831737556499498e-05, "loss": 0.633, "step": 679 }, { "epoch": 0.09, "grad_norm": 0.5856239902926798, "learning_rate": 1.9830980908781218e-05, "loss": 0.6471, "step": 680 }, { "epoch": 0.09, "grad_norm": 0.8663155032058102, "learning_rate": 1.9830222578118444e-05, "loss": 0.6363, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.5816613224518613, "learning_rate": 1.9829462564640995e-05, "loss": 0.6365, "step": 682 }, { "epoch": 0.09, "grad_norm": 0.6576331754795092, "learning_rate": 1.982870086847898e-05, "loss": 0.6395, "step": 683 }, { "epoch": 0.09, "grad_norm": 0.6182818348602158, "learning_rate": 1.9827937489762783e-05, "loss": 0.6334, "step": 684 }, { "epoch": 0.09, "grad_norm": 0.5125229367039577, "learning_rate": 1.9827172428623092e-05, "loss": 0.6203, "step": 685 }, { "epoch": 0.09, "grad_norm": 0.6773247979188867, "learning_rate": 1.982640568519087e-05, "loss": 0.6207, "step": 686 }, { "epoch": 0.09, "grad_norm": 0.6621122336895076, "learning_rate": 1.9825637259597384e-05, "loss": 0.6578, "step": 687 }, { "epoch": 0.09, "grad_norm": 0.6049299803101019, "learning_rate": 1.982486715197417e-05, "loss": 0.647, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.6400662564921251, "learning_rate": 1.9824095362453064e-05, "loss": 0.6568, "step": 689 }, { "epoch": 0.09, "grad_norm": 0.5973255609977354, "learning_rate": 1.982332189116619e-05, "loss": 0.6464, "step": 690 }, { "epoch": 0.09, "grad_norm": 0.5952258660921167, "learning_rate": 1.9822546738245952e-05, "loss": 0.6117, "step": 691 }, { "epoch": 0.09, "grad_norm": 0.8438912858546673, "learning_rate": 1.982176990382505e-05, "loss": 0.707, "step": 692 }, { "epoch": 0.09, "grad_norm": 0.650405787301775, "learning_rate": 1.9820991388036463e-05, "loss": 0.6652, "step": 693 }, { "epoch": 0.09, "grad_norm": 0.7359673369502246, "learning_rate": 1.9820211191013475e-05, "loss": 0.6582, "step": 694 }, { "epoch": 0.09, "grad_norm": 0.6026201251862682, "learning_rate": 1.9819429312889635e-05, "loss": 0.6545, "step": 695 }, { "epoch": 0.09, "grad_norm": 0.655159278216179, "learning_rate": 1.9818645753798798e-05, "loss": 0.6743, "step": 696 }, { "epoch": 0.09, "grad_norm": 0.5946547481258558, "learning_rate": 1.9817860513875095e-05, "loss": 0.6322, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.6021384880992926, "learning_rate": 1.9817073593252952e-05, "loss": 0.6646, "step": 698 }, { "epoch": 0.09, "grad_norm": 0.5296538838322191, "learning_rate": 1.981628499206708e-05, "loss": 0.6191, "step": 699 }, { "epoch": 0.09, "grad_norm": 0.7043140714768668, "learning_rate": 1.9815494710452476e-05, "loss": 0.6876, "step": 700 }, { "epoch": 0.09, "grad_norm": 0.5851406017961059, "learning_rate": 1.9814702748544433e-05, "loss": 0.6307, "step": 701 }, { "epoch": 0.09, "grad_norm": 0.5977784253602176, "learning_rate": 1.9813909106478518e-05, "loss": 0.6594, "step": 702 }, { "epoch": 0.09, "grad_norm": 0.6558054438042142, "learning_rate": 1.9813113784390596e-05, "loss": 0.6797, "step": 703 }, { "epoch": 0.09, "grad_norm": 0.550286446999375, "learning_rate": 1.9812316782416812e-05, "loss": 0.6237, "step": 704 }, { "epoch": 0.09, "grad_norm": 0.5312955696920317, "learning_rate": 1.9811518100693614e-05, "loss": 0.6207, "step": 705 }, { "epoch": 0.09, "grad_norm": 1.541307108550392, "learning_rate": 1.9810717739357716e-05, "loss": 0.5869, "step": 706 }, { "epoch": 0.09, "grad_norm": 0.6610284760309167, "learning_rate": 1.9809915698546136e-05, "loss": 0.6617, "step": 707 }, { "epoch": 0.09, "grad_norm": 0.6432298624282211, "learning_rate": 1.9809111978396173e-05, "loss": 0.6428, "step": 708 }, { "epoch": 0.09, "grad_norm": 0.8551549189670725, "learning_rate": 1.9808306579045405e-05, "loss": 0.6129, "step": 709 }, { "epoch": 0.09, "grad_norm": 0.5630656493628161, "learning_rate": 1.9807499500631722e-05, "loss": 0.6234, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.6492679232203952, "learning_rate": 1.9806690743293275e-05, "loss": 0.6367, "step": 711 }, { "epoch": 0.09, "grad_norm": 0.6356549436574338, "learning_rate": 1.9805880307168514e-05, "loss": 0.6767, "step": 712 }, { "epoch": 0.09, "grad_norm": 0.6024578800870715, "learning_rate": 1.9805068192396182e-05, "loss": 0.6551, "step": 713 }, { "epoch": 0.09, "grad_norm": 0.5723692933733965, "learning_rate": 1.9804254399115298e-05, "loss": 0.6493, "step": 714 }, { "epoch": 0.09, "grad_norm": 0.5329474238598375, "learning_rate": 1.9803438927465178e-05, "loss": 0.6101, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.6295992753143798, "learning_rate": 1.9802621777585417e-05, "loss": 0.6446, "step": 716 }, { "epoch": 0.09, "grad_norm": 0.6214036228930353, "learning_rate": 1.9801802949615904e-05, "loss": 0.6596, "step": 717 }, { "epoch": 0.09, "grad_norm": 0.6089195989007975, "learning_rate": 1.9800982443696806e-05, "loss": 0.6408, "step": 718 }, { "epoch": 0.09, "grad_norm": 0.7258045483683039, "learning_rate": 1.9800160259968592e-05, "loss": 0.6993, "step": 719 }, { "epoch": 0.09, "grad_norm": 0.6261745068861431, "learning_rate": 1.9799336398572e-05, "loss": 0.6401, "step": 720 }, { "epoch": 0.09, "grad_norm": 0.672768321323521, "learning_rate": 1.9798510859648078e-05, "loss": 0.6458, "step": 721 }, { "epoch": 0.09, "grad_norm": 0.6291888739655886, "learning_rate": 1.9797683643338138e-05, "loss": 0.6784, "step": 722 }, { "epoch": 0.09, "grad_norm": 0.5124687637634998, "learning_rate": 1.9796854749783795e-05, "loss": 0.6145, "step": 723 }, { "epoch": 0.09, "grad_norm": 0.6104205498660383, "learning_rate": 1.979602417912694e-05, "loss": 0.6319, "step": 724 }, { "epoch": 0.09, "grad_norm": 0.5664149688796116, "learning_rate": 1.979519193150976e-05, "loss": 0.6367, "step": 725 }, { "epoch": 0.09, "grad_norm": 0.6660741801649793, "learning_rate": 1.979435800707473e-05, "loss": 0.657, "step": 726 }, { "epoch": 0.09, "grad_norm": 0.6276103371741523, "learning_rate": 1.97935224059646e-05, "loss": 0.5893, "step": 727 }, { "epoch": 0.09, "grad_norm": 0.613416055346597, "learning_rate": 1.979268512832242e-05, "loss": 0.6499, "step": 728 }, { "epoch": 0.09, "grad_norm": 0.5825047077010387, "learning_rate": 1.9791846174291515e-05, "loss": 0.635, "step": 729 }, { "epoch": 0.09, "grad_norm": 0.6001504730465527, "learning_rate": 1.9791005544015514e-05, "loss": 0.6498, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.647698198797009, "learning_rate": 1.9790163237638317e-05, "loss": 0.6169, "step": 731 }, { "epoch": 0.09, "grad_norm": 0.576475226160607, "learning_rate": 1.9789319255304115e-05, "loss": 0.6239, "step": 732 }, { "epoch": 0.09, "grad_norm": 0.5253249208466747, "learning_rate": 1.9788473597157386e-05, "loss": 0.6385, "step": 733 }, { "epoch": 0.09, "grad_norm": 0.6622310231526221, "learning_rate": 1.9787626263342904e-05, "loss": 0.6366, "step": 734 }, { "epoch": 0.09, "grad_norm": 0.5740877568619348, "learning_rate": 1.9786777254005715e-05, "loss": 0.6494, "step": 735 }, { "epoch": 0.09, "grad_norm": 0.5944572490509868, "learning_rate": 1.9785926569291167e-05, "loss": 0.6111, "step": 736 }, { "epoch": 0.09, "grad_norm": 0.6240303443625322, "learning_rate": 1.9785074209344878e-05, "loss": 0.6351, "step": 737 }, { "epoch": 0.09, "grad_norm": 0.539591743830665, "learning_rate": 1.9784220174312768e-05, "loss": 0.6107, "step": 738 }, { "epoch": 0.09, "grad_norm": 0.6679370759833779, "learning_rate": 1.978336446434103e-05, "loss": 0.6778, "step": 739 }, { "epoch": 0.09, "grad_norm": 0.5512428747364696, "learning_rate": 1.9782507079576162e-05, "loss": 0.6078, "step": 740 }, { "epoch": 0.09, "grad_norm": 0.5800661793826546, "learning_rate": 1.9781648020164932e-05, "loss": 0.6253, "step": 741 }, { "epoch": 0.09, "grad_norm": 0.7049711207218733, "learning_rate": 1.9780787286254398e-05, "loss": 0.6983, "step": 742 }, { "epoch": 0.09, "grad_norm": 0.6639070507949241, "learning_rate": 1.977992487799191e-05, "loss": 0.6453, "step": 743 }, { "epoch": 0.1, "grad_norm": 0.5879979037713362, "learning_rate": 1.9779060795525103e-05, "loss": 0.6606, "step": 744 }, { "epoch": 0.1, "grad_norm": 0.7618101836231916, "learning_rate": 1.9778195039001896e-05, "loss": 0.7128, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.582735661811856, "learning_rate": 1.9777327608570494e-05, "loss": 0.6307, "step": 746 }, { "epoch": 0.1, "grad_norm": 0.724930569337393, "learning_rate": 1.9776458504379393e-05, "loss": 0.6665, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.5875245616104626, "learning_rate": 1.9775587726577376e-05, "loss": 0.6403, "step": 748 }, { "epoch": 0.1, "grad_norm": 0.5789560736165131, "learning_rate": 1.9774715275313504e-05, "loss": 0.6161, "step": 749 }, { "epoch": 0.1, "grad_norm": 0.6310863109772441, "learning_rate": 1.9773841150737132e-05, "loss": 0.6504, "step": 750 }, { "epoch": 0.1, "grad_norm": 0.5321228859969467, "learning_rate": 1.97729653529979e-05, "loss": 0.6443, "step": 751 }, { "epoch": 0.1, "grad_norm": 0.5777126728113381, "learning_rate": 1.9772087882245734e-05, "loss": 0.6038, "step": 752 }, { "epoch": 0.1, "grad_norm": 0.6858357515876978, "learning_rate": 1.9771208738630846e-05, "loss": 0.6338, "step": 753 }, { "epoch": 0.1, "grad_norm": 0.5989701226261375, "learning_rate": 1.9770327922303734e-05, "loss": 0.6249, "step": 754 }, { "epoch": 0.1, "grad_norm": 0.700266998864001, "learning_rate": 1.9769445433415186e-05, "loss": 0.6378, "step": 755 }, { "epoch": 0.1, "grad_norm": 0.6858748152664074, "learning_rate": 1.976856127211627e-05, "loss": 0.6768, "step": 756 }, { "epoch": 0.1, "grad_norm": 0.6927361944761008, "learning_rate": 1.976767543855834e-05, "loss": 0.6449, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.6122342197586665, "learning_rate": 1.976678793289305e-05, "loss": 0.6539, "step": 758 }, { "epoch": 0.1, "grad_norm": 0.6973259094949953, "learning_rate": 1.9765898755272328e-05, "loss": 0.6394, "step": 759 }, { "epoch": 0.1, "grad_norm": 0.7019874579271077, "learning_rate": 1.9765007905848385e-05, "loss": 0.6536, "step": 760 }, { "epoch": 0.1, "grad_norm": 0.5626502082831508, "learning_rate": 1.9764115384773722e-05, "loss": 0.607, "step": 761 }, { "epoch": 0.1, "grad_norm": 0.6777698311615515, "learning_rate": 1.9763221192201135e-05, "loss": 0.6181, "step": 762 }, { "epoch": 0.1, "grad_norm": 0.6636592810875847, "learning_rate": 1.9762325328283696e-05, "loss": 0.6454, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.6300047509341475, "learning_rate": 1.9761427793174765e-05, "loss": 0.6685, "step": 764 }, { "epoch": 0.1, "grad_norm": 0.6513433160406037, "learning_rate": 1.976052858702799e-05, "loss": 0.609, "step": 765 }, { "epoch": 0.1, "grad_norm": 0.6286898734473909, "learning_rate": 1.9759627709997304e-05, "loss": 0.653, "step": 766 }, { "epoch": 0.1, "grad_norm": 0.7492567500285919, "learning_rate": 1.9758725162236926e-05, "loss": 0.6482, "step": 767 }, { "epoch": 0.1, "grad_norm": 0.7215886202142671, "learning_rate": 1.975782094390136e-05, "loss": 0.6389, "step": 768 }, { "epoch": 0.1, "grad_norm": 0.6946751275263455, "learning_rate": 1.97569150551454e-05, "loss": 0.66, "step": 769 }, { "epoch": 0.1, "grad_norm": 0.6622618386918137, "learning_rate": 1.9756007496124122e-05, "loss": 0.6049, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.5604465367443975, "learning_rate": 1.975509826699289e-05, "loss": 0.6263, "step": 771 }, { "epoch": 0.1, "grad_norm": 0.586557567694135, "learning_rate": 1.975418736790735e-05, "loss": 0.6162, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.88074940240971, "learning_rate": 1.975327479902344e-05, "loss": 0.6437, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.5618388107519253, "learning_rate": 1.9752360560497376e-05, "loss": 0.6588, "step": 774 }, { "epoch": 0.1, "grad_norm": 0.666359965209519, "learning_rate": 1.975144465248567e-05, "loss": 0.6533, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.6325536351497723, "learning_rate": 1.9750527075145115e-05, "loss": 0.6495, "step": 776 }, { "epoch": 0.1, "grad_norm": 0.8082318161948079, "learning_rate": 1.9749607828632783e-05, "loss": 0.6042, "step": 777 }, { "epoch": 0.1, "grad_norm": 0.5764088600667929, "learning_rate": 1.9748686913106045e-05, "loss": 0.6166, "step": 778 }, { "epoch": 0.1, "grad_norm": 0.589526508678739, "learning_rate": 1.9747764328722542e-05, "loss": 0.6322, "step": 779 }, { "epoch": 0.1, "grad_norm": 0.569937457714446, "learning_rate": 1.9746840075640213e-05, "loss": 0.6583, "step": 780 }, { "epoch": 0.1, "grad_norm": 0.6196004785967826, "learning_rate": 1.9745914154017283e-05, "loss": 0.6551, "step": 781 }, { "epoch": 0.1, "grad_norm": 0.6061564851369282, "learning_rate": 1.9744986564012256e-05, "loss": 0.6342, "step": 782 }, { "epoch": 0.1, "grad_norm": 0.5867268049492693, "learning_rate": 1.9744057305783924e-05, "loss": 0.6708, "step": 783 }, { "epoch": 0.1, "grad_norm": 0.5821571215548799, "learning_rate": 1.9743126379491362e-05, "loss": 0.6684, "step": 784 }, { "epoch": 0.1, "grad_norm": 0.5683630998060758, "learning_rate": 1.9742193785293935e-05, "loss": 0.6715, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.5899704607157412, "learning_rate": 1.9741259523351295e-05, "loss": 0.6642, "step": 786 }, { "epoch": 0.1, "grad_norm": 0.5684846955091579, "learning_rate": 1.9740323593823372e-05, "loss": 0.667, "step": 787 }, { "epoch": 0.1, "grad_norm": 0.6923208819074771, "learning_rate": 1.9739385996870387e-05, "loss": 0.6329, "step": 788 }, { "epoch": 0.1, "grad_norm": 0.5752135667005667, "learning_rate": 1.9738446732652847e-05, "loss": 0.6256, "step": 789 }, { "epoch": 0.1, "grad_norm": 0.6411539089229052, "learning_rate": 1.973750580133154e-05, "loss": 0.634, "step": 790 }, { "epoch": 0.1, "grad_norm": 0.6370644589210233, "learning_rate": 1.9736563203067542e-05, "loss": 0.6659, "step": 791 }, { "epoch": 0.1, "grad_norm": 0.5712124058378751, "learning_rate": 1.973561893802222e-05, "loss": 0.6449, "step": 792 }, { "epoch": 0.1, "grad_norm": 0.6536659354955671, "learning_rate": 1.973467300635721e-05, "loss": 0.6874, "step": 793 }, { "epoch": 0.1, "grad_norm": 0.5971838468867255, "learning_rate": 1.9733725408234453e-05, "loss": 0.6305, "step": 794 }, { "epoch": 0.1, "grad_norm": 0.6955108486035582, "learning_rate": 1.9732776143816165e-05, "loss": 0.6836, "step": 795 }, { "epoch": 0.1, "grad_norm": 0.6576651744011942, "learning_rate": 1.9731825213264846e-05, "loss": 0.68, "step": 796 }, { "epoch": 0.1, "grad_norm": 0.6058029814864676, "learning_rate": 1.9730872616743284e-05, "loss": 0.6541, "step": 797 }, { "epoch": 0.1, "grad_norm": 0.619956342992786, "learning_rate": 1.9729918354414555e-05, "loss": 0.6282, "step": 798 }, { "epoch": 0.1, "grad_norm": 0.5645438311244262, "learning_rate": 1.9728962426442013e-05, "loss": 0.6376, "step": 799 }, { "epoch": 0.1, "grad_norm": 0.6073098797093919, "learning_rate": 1.9728004832989303e-05, "loss": 0.6494, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.5901249992528224, "learning_rate": 1.9727045574220354e-05, "loss": 0.6294, "step": 801 }, { "epoch": 0.1, "grad_norm": 0.6290000821755504, "learning_rate": 1.972608465029938e-05, "loss": 0.6454, "step": 802 }, { "epoch": 0.1, "grad_norm": 0.554253456285075, "learning_rate": 1.9725122061390877e-05, "loss": 0.6528, "step": 803 }, { "epoch": 0.1, "grad_norm": 0.6485007351302607, "learning_rate": 1.9724157807659633e-05, "loss": 0.6426, "step": 804 }, { "epoch": 0.1, "grad_norm": 0.6358604464726486, "learning_rate": 1.972319188927071e-05, "loss": 0.6752, "step": 805 }, { "epoch": 0.1, "grad_norm": 0.5687782124320284, "learning_rate": 1.9722224306389467e-05, "loss": 0.6399, "step": 806 }, { "epoch": 0.1, "grad_norm": 0.5546541678529234, "learning_rate": 1.972125505918154e-05, "loss": 0.6247, "step": 807 }, { "epoch": 0.1, "grad_norm": 0.5924337542329928, "learning_rate": 1.972028414781285e-05, "loss": 0.6429, "step": 808 }, { "epoch": 0.1, "grad_norm": 0.6232909128549393, "learning_rate": 1.9719311572449612e-05, "loss": 0.6647, "step": 809 }, { "epoch": 0.1, "grad_norm": 0.6067715158556676, "learning_rate": 1.9718337333258315e-05, "loss": 0.6315, "step": 810 }, { "epoch": 0.1, "grad_norm": 0.5060109233964944, "learning_rate": 1.9717361430405733e-05, "loss": 0.6269, "step": 811 }, { "epoch": 0.1, "grad_norm": 0.5518702298970853, "learning_rate": 1.9716383864058936e-05, "loss": 0.641, "step": 812 }, { "epoch": 0.1, "grad_norm": 0.529585112709423, "learning_rate": 1.971540463438527e-05, "loss": 0.622, "step": 813 }, { "epoch": 0.1, "grad_norm": 0.6373878772642273, "learning_rate": 1.9714423741552363e-05, "loss": 0.6516, "step": 814 }, { "epoch": 0.1, "grad_norm": 0.5843611415722382, "learning_rate": 1.9713441185728134e-05, "loss": 0.669, "step": 815 }, { "epoch": 0.1, "grad_norm": 0.5711092754380963, "learning_rate": 1.9712456967080787e-05, "loss": 0.6347, "step": 816 }, { "epoch": 0.1, "grad_norm": 0.5876376228867585, "learning_rate": 1.9711471085778803e-05, "loss": 0.6253, "step": 817 }, { "epoch": 0.1, "grad_norm": 0.6177274265076647, "learning_rate": 1.9710483541990962e-05, "loss": 0.6545, "step": 818 }, { "epoch": 0.1, "grad_norm": 0.7633089213209584, "learning_rate": 1.970949433588631e-05, "loss": 0.6736, "step": 819 }, { "epoch": 0.1, "grad_norm": 0.5789156326295469, "learning_rate": 1.970850346763419e-05, "loss": 0.6464, "step": 820 }, { "epoch": 0.1, "grad_norm": 0.6117269327212728, "learning_rate": 1.9707510937404225e-05, "loss": 0.6384, "step": 821 }, { "epoch": 0.11, "grad_norm": 0.7118470623762596, "learning_rate": 1.970651674536633e-05, "loss": 0.6448, "step": 822 }, { "epoch": 0.11, "grad_norm": 0.8390786650989326, "learning_rate": 1.9705520891690697e-05, "loss": 0.6644, "step": 823 }, { "epoch": 0.11, "grad_norm": 1.0339337062656153, "learning_rate": 1.9704523376547796e-05, "loss": 0.596, "step": 824 }, { "epoch": 0.11, "grad_norm": 0.6541797765743038, "learning_rate": 1.97035242001084e-05, "loss": 0.6489, "step": 825 }, { "epoch": 0.11, "grad_norm": 0.591030065090138, "learning_rate": 1.9702523362543552e-05, "loss": 0.6191, "step": 826 }, { "epoch": 0.11, "grad_norm": 0.662459468961751, "learning_rate": 1.9701520864024583e-05, "loss": 0.6359, "step": 827 }, { "epoch": 0.11, "grad_norm": 0.5800836396268284, "learning_rate": 1.9700516704723104e-05, "loss": 0.6229, "step": 828 }, { "epoch": 0.11, "grad_norm": 0.696867521628428, "learning_rate": 1.969951088481102e-05, "loss": 0.6671, "step": 829 }, { "epoch": 0.11, "grad_norm": 0.6356124540764878, "learning_rate": 1.9698503404460522e-05, "loss": 0.604, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.7000361209973213, "learning_rate": 1.9697494263844064e-05, "loss": 0.6527, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.5992519518031945, "learning_rate": 1.9696483463134404e-05, "loss": 0.5864, "step": 832 }, { "epoch": 0.11, "grad_norm": 0.6528110492431415, "learning_rate": 1.9695471002504584e-05, "loss": 0.6534, "step": 833 }, { "epoch": 0.11, "grad_norm": 0.7533542432335769, "learning_rate": 1.9694456882127917e-05, "loss": 0.6508, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.5812783280360867, "learning_rate": 1.9693441102178014e-05, "loss": 0.6565, "step": 835 }, { "epoch": 0.11, "grad_norm": 0.6306108275526119, "learning_rate": 1.9692423662828764e-05, "loss": 0.6508, "step": 836 }, { "epoch": 0.11, "grad_norm": 0.590720035791422, "learning_rate": 1.969140456425434e-05, "loss": 0.6357, "step": 837 }, { "epoch": 0.11, "grad_norm": 0.5938192487526828, "learning_rate": 1.9690383806629195e-05, "loss": 0.6595, "step": 838 }, { "epoch": 0.11, "grad_norm": 0.5295387939231456, "learning_rate": 1.968936139012807e-05, "loss": 0.6168, "step": 839 }, { "epoch": 0.11, "grad_norm": 0.5510514653652359, "learning_rate": 1.9688337314926e-05, "loss": 0.6158, "step": 840 }, { "epoch": 0.11, "grad_norm": 0.6305699290812861, "learning_rate": 1.9687311581198284e-05, "loss": 0.6249, "step": 841 }, { "epoch": 0.11, "grad_norm": 0.6587882211128383, "learning_rate": 1.9686284189120517e-05, "loss": 0.6787, "step": 842 }, { "epoch": 0.11, "grad_norm": 0.5693387809562087, "learning_rate": 1.968525513886858e-05, "loss": 0.647, "step": 843 }, { "epoch": 0.11, "grad_norm": 0.575320568345402, "learning_rate": 1.9684224430618634e-05, "loss": 0.6393, "step": 844 }, { "epoch": 0.11, "grad_norm": 0.6653973644709057, "learning_rate": 1.968319206454712e-05, "loss": 0.6804, "step": 845 }, { "epoch": 0.11, "grad_norm": 0.5979279944627486, "learning_rate": 1.9682158040830767e-05, "loss": 0.6217, "step": 846 }, { "epoch": 0.11, "grad_norm": 0.6774099426504863, "learning_rate": 1.968112235964659e-05, "loss": 0.6124, "step": 847 }, { "epoch": 0.11, "grad_norm": 0.596745960660372, "learning_rate": 1.968008502117188e-05, "loss": 0.6505, "step": 848 }, { "epoch": 0.11, "grad_norm": 0.6348292389364691, "learning_rate": 1.9679046025584222e-05, "loss": 0.6534, "step": 849 }, { "epoch": 0.11, "grad_norm": 0.5599643125368168, "learning_rate": 1.967800537306148e-05, "loss": 0.6387, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.5303384782492828, "learning_rate": 1.9676963063781795e-05, "loss": 0.6077, "step": 851 }, { "epoch": 0.11, "grad_norm": 0.5623981398338277, "learning_rate": 1.9675919097923603e-05, "loss": 0.6105, "step": 852 }, { "epoch": 0.11, "grad_norm": 0.5318706055775675, "learning_rate": 1.967487347566562e-05, "loss": 0.6126, "step": 853 }, { "epoch": 0.11, "grad_norm": 0.6372028418384882, "learning_rate": 1.9673826197186836e-05, "loss": 0.6602, "step": 854 }, { "epoch": 0.11, "grad_norm": 0.5763113567580008, "learning_rate": 1.967277726266654e-05, "loss": 0.6343, "step": 855 }, { "epoch": 0.11, "grad_norm": 0.6469084761754818, "learning_rate": 1.9671726672284288e-05, "loss": 0.6585, "step": 856 }, { "epoch": 0.11, "grad_norm": 0.6180647430981827, "learning_rate": 1.967067442621994e-05, "loss": 0.6543, "step": 857 }, { "epoch": 0.11, "grad_norm": 0.6593213706468323, "learning_rate": 1.9669620524653617e-05, "loss": 0.6775, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.6275352403249027, "learning_rate": 1.9668564967765742e-05, "loss": 0.6048, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.6574185385748393, "learning_rate": 1.966750775573701e-05, "loss": 0.672, "step": 860 }, { "epoch": 0.11, "grad_norm": 0.6008578575357809, "learning_rate": 1.96664488887484e-05, "loss": 0.6322, "step": 861 }, { "epoch": 0.11, "grad_norm": 0.5776177154629152, "learning_rate": 1.9665388366981185e-05, "loss": 0.6426, "step": 862 }, { "epoch": 0.11, "grad_norm": 0.6206826011732526, "learning_rate": 1.9664326190616906e-05, "loss": 0.6576, "step": 863 }, { "epoch": 0.11, "grad_norm": 0.6371294846117488, "learning_rate": 1.9663262359837395e-05, "loss": 0.6424, "step": 864 }, { "epoch": 0.11, "grad_norm": 0.5678454810323986, "learning_rate": 1.9662196874824774e-05, "loss": 0.6343, "step": 865 }, { "epoch": 0.11, "grad_norm": 0.6202800787152287, "learning_rate": 1.9661129735761434e-05, "loss": 0.6483, "step": 866 }, { "epoch": 0.11, "grad_norm": 0.6789533133438714, "learning_rate": 1.966006094283006e-05, "loss": 0.6868, "step": 867 }, { "epoch": 0.11, "grad_norm": 0.6143625804076635, "learning_rate": 1.9658990496213614e-05, "loss": 0.5956, "step": 868 }, { "epoch": 0.11, "grad_norm": 0.5894434882787132, "learning_rate": 1.9657918396095347e-05, "loss": 0.6046, "step": 869 }, { "epoch": 0.11, "grad_norm": 0.5484703965164476, "learning_rate": 1.9656844642658783e-05, "loss": 0.6283, "step": 870 }, { "epoch": 0.11, "grad_norm": 0.5530869562952979, "learning_rate": 1.965576923608774e-05, "loss": 0.6112, "step": 871 }, { "epoch": 0.11, "grad_norm": 0.5484871891319214, "learning_rate": 1.9654692176566317e-05, "loss": 0.6216, "step": 872 }, { "epoch": 0.11, "grad_norm": 0.6143525312189001, "learning_rate": 1.965361346427889e-05, "loss": 0.6355, "step": 873 }, { "epoch": 0.11, "grad_norm": 0.6167881394579287, "learning_rate": 1.9652533099410123e-05, "loss": 0.652, "step": 874 }, { "epoch": 0.11, "grad_norm": 0.6880543152508561, "learning_rate": 1.965145108214496e-05, "loss": 0.6393, "step": 875 }, { "epoch": 0.11, "grad_norm": 0.4945505005000287, "learning_rate": 1.9650367412668628e-05, "loss": 0.6056, "step": 876 }, { "epoch": 0.11, "grad_norm": 0.7016874325240393, "learning_rate": 1.9649282091166636e-05, "loss": 0.6383, "step": 877 }, { "epoch": 0.11, "grad_norm": 1.205089479918453, "learning_rate": 1.9648195117824786e-05, "loss": 0.707, "step": 878 }, { "epoch": 0.11, "grad_norm": 0.6960766685787422, "learning_rate": 1.9647106492829153e-05, "loss": 0.6592, "step": 879 }, { "epoch": 0.11, "grad_norm": 0.5845567681051098, "learning_rate": 1.9646016216366088e-05, "loss": 0.6517, "step": 880 }, { "epoch": 0.11, "grad_norm": 0.643937230324734, "learning_rate": 1.964492428862224e-05, "loss": 0.6285, "step": 881 }, { "epoch": 0.11, "grad_norm": 0.6005789172877711, "learning_rate": 1.9643830709784535e-05, "loss": 0.6235, "step": 882 }, { "epoch": 0.11, "grad_norm": 0.6042712237390545, "learning_rate": 1.9642735480040175e-05, "loss": 0.6424, "step": 883 }, { "epoch": 0.11, "grad_norm": 0.7248361752196929, "learning_rate": 1.9641638599576655e-05, "loss": 0.6693, "step": 884 }, { "epoch": 0.11, "grad_norm": 0.5739165927501104, "learning_rate": 1.9640540068581743e-05, "loss": 0.6395, "step": 885 }, { "epoch": 0.11, "grad_norm": 0.595143938120411, "learning_rate": 1.96394398872435e-05, "loss": 0.583, "step": 886 }, { "epoch": 0.11, "grad_norm": 0.582531033849866, "learning_rate": 1.9638338055750257e-05, "loss": 0.6291, "step": 887 }, { "epoch": 0.11, "grad_norm": 0.5913040429405796, "learning_rate": 1.963723457429064e-05, "loss": 0.6178, "step": 888 }, { "epoch": 0.11, "grad_norm": 0.7760727754598451, "learning_rate": 1.9636129443053545e-05, "loss": 0.6347, "step": 889 }, { "epoch": 0.11, "grad_norm": 0.5542530974817929, "learning_rate": 1.9635022662228164e-05, "loss": 0.6466, "step": 890 }, { "epoch": 0.11, "grad_norm": 0.6467958648989338, "learning_rate": 1.9633914232003964e-05, "loss": 0.6258, "step": 891 }, { "epoch": 0.11, "grad_norm": 0.6053413718591839, "learning_rate": 1.9632804152570685e-05, "loss": 0.6284, "step": 892 }, { "epoch": 0.11, "grad_norm": 0.5711061312744138, "learning_rate": 1.9631692424118374e-05, "loss": 0.5854, "step": 893 }, { "epoch": 0.11, "grad_norm": 0.5605872185333433, "learning_rate": 1.9630579046837335e-05, "loss": 0.609, "step": 894 }, { "epoch": 0.11, "grad_norm": 0.5990941456401807, "learning_rate": 1.9629464020918164e-05, "loss": 0.6165, "step": 895 }, { "epoch": 0.11, "grad_norm": 0.6011313944911776, "learning_rate": 1.9628347346551748e-05, "loss": 0.6415, "step": 896 }, { "epoch": 0.11, "grad_norm": 0.6253467411155867, "learning_rate": 1.9627229023929243e-05, "loss": 0.6107, "step": 897 }, { "epoch": 0.11, "grad_norm": 0.7022225362080013, "learning_rate": 1.9626109053242092e-05, "loss": 0.6857, "step": 898 }, { "epoch": 0.11, "grad_norm": 0.6742275080667947, "learning_rate": 1.962498743468202e-05, "loss": 0.6401, "step": 899 }, { "epoch": 0.11, "grad_norm": 0.6282234004864075, "learning_rate": 1.9623864168441038e-05, "loss": 0.6829, "step": 900 }, { "epoch": 0.12, "grad_norm": 0.6000690867005796, "learning_rate": 1.962273925471143e-05, "loss": 0.6211, "step": 901 }, { "epoch": 0.12, "grad_norm": 0.6046412643238872, "learning_rate": 1.9621612693685772e-05, "loss": 0.6458, "step": 902 }, { "epoch": 0.12, "grad_norm": 0.5942752948983622, "learning_rate": 1.9620484485556916e-05, "loss": 0.6442, "step": 903 }, { "epoch": 0.12, "grad_norm": 0.7128727409628098, "learning_rate": 1.9619354630518e-05, "loss": 0.6696, "step": 904 }, { "epoch": 0.12, "grad_norm": 0.6004703868744012, "learning_rate": 1.9618223128762436e-05, "loss": 0.6444, "step": 905 }, { "epoch": 0.12, "grad_norm": 0.6433296793108491, "learning_rate": 1.961708998048393e-05, "loss": 0.6285, "step": 906 }, { "epoch": 0.12, "grad_norm": 0.6032038542186711, "learning_rate": 1.9615955185876454e-05, "loss": 0.6705, "step": 907 }, { "epoch": 0.12, "grad_norm": 0.6482139803113889, "learning_rate": 1.961481874513428e-05, "loss": 0.6239, "step": 908 }, { "epoch": 0.12, "grad_norm": 0.5865548786438484, "learning_rate": 1.961368065845195e-05, "loss": 0.6572, "step": 909 }, { "epoch": 0.12, "grad_norm": 0.6640430035045246, "learning_rate": 1.961254092602429e-05, "loss": 0.6617, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.5406884798487346, "learning_rate": 1.961139954804641e-05, "loss": 0.607, "step": 911 }, { "epoch": 0.12, "grad_norm": 0.5688189722072281, "learning_rate": 1.96102565247137e-05, "loss": 0.6485, "step": 912 }, { "epoch": 0.12, "grad_norm": 0.6395456002437978, "learning_rate": 1.9609111856221827e-05, "loss": 0.5851, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.5646019132222537, "learning_rate": 1.9607965542766746e-05, "loss": 0.6192, "step": 914 }, { "epoch": 0.12, "grad_norm": 0.6508985913044142, "learning_rate": 1.9606817584544698e-05, "loss": 0.6623, "step": 915 }, { "epoch": 0.12, "grad_norm": 0.7197240694148868, "learning_rate": 1.9605667981752193e-05, "loss": 0.6347, "step": 916 }, { "epoch": 0.12, "grad_norm": 0.5809544943508039, "learning_rate": 1.960451673458603e-05, "loss": 0.6416, "step": 917 }, { "epoch": 0.12, "grad_norm": 0.6412727723113241, "learning_rate": 1.9603363843243293e-05, "loss": 0.6431, "step": 918 }, { "epoch": 0.12, "grad_norm": 0.6238225289704794, "learning_rate": 1.9602209307921336e-05, "loss": 0.599, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.7013482394091042, "learning_rate": 1.9601053128817807e-05, "loss": 0.6414, "step": 920 }, { "epoch": 0.12, "grad_norm": 0.6017426783237735, "learning_rate": 1.9599895306130628e-05, "loss": 0.6313, "step": 921 }, { "epoch": 0.12, "grad_norm": 0.6648813967065245, "learning_rate": 1.9598735840058002e-05, "loss": 0.6535, "step": 922 }, { "epoch": 0.12, "grad_norm": 0.6302296259519148, "learning_rate": 1.9597574730798423e-05, "loss": 0.6408, "step": 923 }, { "epoch": 0.12, "grad_norm": 0.5731155936188661, "learning_rate": 1.959641197855065e-05, "loss": 0.604, "step": 924 }, { "epoch": 0.12, "grad_norm": 0.612565769608997, "learning_rate": 1.9595247583513733e-05, "loss": 0.6244, "step": 925 }, { "epoch": 0.12, "grad_norm": 0.61081032638244, "learning_rate": 1.959408154588701e-05, "loss": 0.6428, "step": 926 }, { "epoch": 0.12, "grad_norm": 0.5197097912664315, "learning_rate": 1.959291386587008e-05, "loss": 0.5999, "step": 927 }, { "epoch": 0.12, "grad_norm": 0.6155866085035205, "learning_rate": 1.959174454366285e-05, "loss": 0.6281, "step": 928 }, { "epoch": 0.12, "grad_norm": 0.6006353789466106, "learning_rate": 1.9590573579465483e-05, "loss": 0.6404, "step": 929 }, { "epoch": 0.12, "grad_norm": 0.6199209537940791, "learning_rate": 1.9589400973478438e-05, "loss": 0.6483, "step": 930 }, { "epoch": 0.12, "grad_norm": 0.5732757872411528, "learning_rate": 1.9588226725902453e-05, "loss": 0.6115, "step": 931 }, { "epoch": 0.12, "grad_norm": 0.530621505784736, "learning_rate": 1.9587050836938538e-05, "loss": 0.6189, "step": 932 }, { "epoch": 0.12, "grad_norm": 0.6009818691013186, "learning_rate": 1.9585873306787996e-05, "loss": 0.6401, "step": 933 }, { "epoch": 0.12, "grad_norm": 0.6870655535514266, "learning_rate": 1.958469413565241e-05, "loss": 0.676, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.599801973422235, "learning_rate": 1.958351332373363e-05, "loss": 0.642, "step": 935 }, { "epoch": 0.12, "grad_norm": 0.6719067327616178, "learning_rate": 1.9582330871233803e-05, "loss": 0.6494, "step": 936 }, { "epoch": 0.12, "grad_norm": 1.024902324759718, "learning_rate": 1.9581146778355347e-05, "loss": 0.6203, "step": 937 }, { "epoch": 0.12, "grad_norm": 0.6583789751708764, "learning_rate": 1.9579961045300968e-05, "loss": 0.6627, "step": 938 }, { "epoch": 0.12, "grad_norm": 0.6572498329051489, "learning_rate": 1.9578773672273647e-05, "loss": 0.6891, "step": 939 }, { "epoch": 0.12, "grad_norm": 0.662049602999746, "learning_rate": 1.9577584659476647e-05, "loss": 0.6834, "step": 940 }, { "epoch": 0.12, "grad_norm": 0.6089826145521401, "learning_rate": 1.9576394007113516e-05, "loss": 0.6255, "step": 941 }, { "epoch": 0.12, "grad_norm": 0.6506871961482493, "learning_rate": 1.9575201715388077e-05, "loss": 0.6474, "step": 942 }, { "epoch": 0.12, "grad_norm": 0.7058739263642669, "learning_rate": 1.9574007784504435e-05, "loss": 0.64, "step": 943 }, { "epoch": 0.12, "grad_norm": 0.6477481441370561, "learning_rate": 1.9572812214666978e-05, "loss": 0.639, "step": 944 }, { "epoch": 0.12, "grad_norm": 0.6213082851145784, "learning_rate": 1.957161500608037e-05, "loss": 0.6407, "step": 945 }, { "epoch": 0.12, "grad_norm": 0.619534218536301, "learning_rate": 1.9570416158949563e-05, "loss": 0.6269, "step": 946 }, { "epoch": 0.12, "grad_norm": 0.6361647087667565, "learning_rate": 1.9569215673479783e-05, "loss": 0.6573, "step": 947 }, { "epoch": 0.12, "grad_norm": 0.628574375094248, "learning_rate": 1.9568013549876536e-05, "loss": 0.662, "step": 948 }, { "epoch": 0.12, "grad_norm": 0.6026424377143714, "learning_rate": 1.9566809788345614e-05, "loss": 0.6357, "step": 949 }, { "epoch": 0.12, "grad_norm": 0.578311970870176, "learning_rate": 1.956560438909309e-05, "loss": 0.6544, "step": 950 }, { "epoch": 0.12, "grad_norm": 0.628815908035834, "learning_rate": 1.9564397352325306e-05, "loss": 0.6721, "step": 951 }, { "epoch": 0.12, "grad_norm": 0.5961507757284692, "learning_rate": 1.9563188678248893e-05, "loss": 0.6472, "step": 952 }, { "epoch": 0.12, "grad_norm": 0.5812976121139461, "learning_rate": 1.956197836707077e-05, "loss": 0.6762, "step": 953 }, { "epoch": 0.12, "grad_norm": 0.5207328571282133, "learning_rate": 1.956076641899812e-05, "loss": 0.6128, "step": 954 }, { "epoch": 0.12, "grad_norm": 0.6085198497413806, "learning_rate": 1.955955283423841e-05, "loss": 0.5897, "step": 955 }, { "epoch": 0.12, "grad_norm": 0.6150280937687022, "learning_rate": 1.95583376129994e-05, "loss": 0.6319, "step": 956 }, { "epoch": 0.12, "grad_norm": 0.5791246182421204, "learning_rate": 1.9557120755489117e-05, "loss": 0.6039, "step": 957 }, { "epoch": 0.12, "grad_norm": 0.8558320807065908, "learning_rate": 1.9555902261915875e-05, "loss": 0.6489, "step": 958 }, { "epoch": 0.12, "grad_norm": 0.5874549265788265, "learning_rate": 1.9554682132488257e-05, "loss": 0.6224, "step": 959 }, { "epoch": 0.12, "grad_norm": 0.7111551594663519, "learning_rate": 1.9553460367415146e-05, "loss": 0.6619, "step": 960 }, { "epoch": 0.12, "grad_norm": 0.6633134470054496, "learning_rate": 1.955223696690568e-05, "loss": 0.6566, "step": 961 }, { "epoch": 0.12, "grad_norm": 0.6911268834979369, "learning_rate": 1.9551011931169306e-05, "loss": 0.6355, "step": 962 }, { "epoch": 0.12, "grad_norm": 0.6304498903528781, "learning_rate": 1.9549785260415722e-05, "loss": 0.6478, "step": 963 }, { "epoch": 0.12, "grad_norm": 0.5925264714988157, "learning_rate": 1.9548556954854926e-05, "loss": 0.656, "step": 964 }, { "epoch": 0.12, "grad_norm": 0.5481204434095786, "learning_rate": 1.954732701469719e-05, "loss": 0.6135, "step": 965 }, { "epoch": 0.12, "grad_norm": 0.5551481753250457, "learning_rate": 1.9546095440153057e-05, "loss": 0.611, "step": 966 }, { "epoch": 0.12, "grad_norm": 0.6024550342597558, "learning_rate": 1.9544862231433365e-05, "loss": 0.6454, "step": 967 }, { "epoch": 0.12, "grad_norm": 0.5816937035114577, "learning_rate": 1.954362738874922e-05, "loss": 0.631, "step": 968 }, { "epoch": 0.12, "grad_norm": 0.6492360671888313, "learning_rate": 1.954239091231202e-05, "loss": 0.6505, "step": 969 }, { "epoch": 0.12, "grad_norm": 0.6346836837999771, "learning_rate": 1.954115280233342e-05, "loss": 0.655, "step": 970 }, { "epoch": 0.12, "grad_norm": 0.5492300082307372, "learning_rate": 1.953991305902538e-05, "loss": 0.6154, "step": 971 }, { "epoch": 0.12, "grad_norm": 0.6129329296390751, "learning_rate": 1.953867168260013e-05, "loss": 0.6173, "step": 972 }, { "epoch": 0.12, "grad_norm": 0.6102414050123521, "learning_rate": 1.9537428673270176e-05, "loss": 0.6575, "step": 973 }, { "epoch": 0.12, "grad_norm": 0.6436631435684956, "learning_rate": 1.9536184031248303e-05, "loss": 0.656, "step": 974 }, { "epoch": 0.12, "grad_norm": 0.6463617000147447, "learning_rate": 1.9534937756747583e-05, "loss": 0.652, "step": 975 }, { "epoch": 0.12, "grad_norm": 0.5493515438672257, "learning_rate": 1.953368984998136e-05, "loss": 0.6216, "step": 976 }, { "epoch": 0.12, "grad_norm": 0.5590992225587543, "learning_rate": 1.9532440311163265e-05, "loss": 0.6343, "step": 977 }, { "epoch": 0.12, "grad_norm": 0.6121069135303666, "learning_rate": 1.9531189140507198e-05, "loss": 0.6634, "step": 978 }, { "epoch": 0.13, "grad_norm": 0.576183081482826, "learning_rate": 1.9529936338227346e-05, "loss": 0.632, "step": 979 }, { "epoch": 0.13, "grad_norm": 0.5367175469880755, "learning_rate": 1.9528681904538176e-05, "loss": 0.6323, "step": 980 }, { "epoch": 0.13, "grad_norm": 0.6729718018614982, "learning_rate": 1.952742583965443e-05, "loss": 0.66, "step": 981 }, { "epoch": 0.13, "grad_norm": 0.6693335907769907, "learning_rate": 1.9526168143791133e-05, "loss": 0.6062, "step": 982 }, { "epoch": 0.13, "grad_norm": 0.6163259174138652, "learning_rate": 1.9524908817163583e-05, "loss": 0.6353, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.527102367298447, "learning_rate": 1.9523647859987367e-05, "loss": 0.6112, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.689168576956625, "learning_rate": 1.952238527247834e-05, "loss": 0.6348, "step": 985 }, { "epoch": 0.13, "grad_norm": 0.6290530069791691, "learning_rate": 1.9521121054852643e-05, "loss": 0.6132, "step": 986 }, { "epoch": 0.13, "grad_norm": 0.6553359676288152, "learning_rate": 1.9519855207326695e-05, "loss": 0.6533, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.6800899344634901, "learning_rate": 1.95185877301172e-05, "loss": 0.6989, "step": 988 }, { "epoch": 0.13, "grad_norm": 0.616121162818249, "learning_rate": 1.9517318623441128e-05, "loss": 0.6542, "step": 989 }, { "epoch": 0.13, "grad_norm": 0.6707545234501346, "learning_rate": 1.951604788751573e-05, "loss": 0.6794, "step": 990 }, { "epoch": 0.13, "grad_norm": 0.6084880998720754, "learning_rate": 1.9514775522558555e-05, "loss": 0.6054, "step": 991 }, { "epoch": 0.13, "grad_norm": 0.6276960592811088, "learning_rate": 1.9513501528787404e-05, "loss": 0.6674, "step": 992 }, { "epoch": 0.13, "grad_norm": 0.59973582700107, "learning_rate": 1.9512225906420373e-05, "loss": 0.6443, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.6314235624628821, "learning_rate": 1.9510948655675836e-05, "loss": 0.6368, "step": 994 }, { "epoch": 0.13, "grad_norm": 0.5542030879054274, "learning_rate": 1.9509669776772434e-05, "loss": 0.6054, "step": 995 }, { "epoch": 0.13, "grad_norm": 0.5598421941388445, "learning_rate": 1.9508389269929103e-05, "loss": 0.6129, "step": 996 }, { "epoch": 0.13, "grad_norm": 0.6451594760113435, "learning_rate": 1.950710713536505e-05, "loss": 0.6511, "step": 997 }, { "epoch": 0.13, "grad_norm": 0.6126231883209187, "learning_rate": 1.9505823373299765e-05, "loss": 0.6297, "step": 998 }, { "epoch": 0.13, "grad_norm": 0.6073997424858663, "learning_rate": 1.9504537983953e-05, "loss": 0.6694, "step": 999 }, { "epoch": 0.13, "grad_norm": 0.5821755426617294, "learning_rate": 1.950325096754481e-05, "loss": 0.6377, "step": 1000 }, { "epoch": 0.13, "grad_norm": 0.6517104184437209, "learning_rate": 1.950196232429551e-05, "loss": 0.6871, "step": 1001 }, { "epoch": 0.13, "grad_norm": 0.5812260569695867, "learning_rate": 1.9500672054425697e-05, "loss": 0.6304, "step": 1002 }, { "epoch": 0.13, "grad_norm": 0.6033082061097863, "learning_rate": 1.9499380158156258e-05, "loss": 0.6529, "step": 1003 }, { "epoch": 0.13, "grad_norm": 0.5445428141150256, "learning_rate": 1.9498086635708348e-05, "loss": 0.6627, "step": 1004 }, { "epoch": 0.13, "grad_norm": 0.5701180332000068, "learning_rate": 1.94967914873034e-05, "loss": 0.6242, "step": 1005 }, { "epoch": 0.13, "grad_norm": 0.5895219087087583, "learning_rate": 1.9495494713163127e-05, "loss": 0.6617, "step": 1006 }, { "epoch": 0.13, "grad_norm": 0.5688315168817594, "learning_rate": 1.9494196313509523e-05, "loss": 0.6132, "step": 1007 }, { "epoch": 0.13, "grad_norm": 0.6282781370076127, "learning_rate": 1.9492896288564857e-05, "loss": 0.6772, "step": 1008 }, { "epoch": 0.13, "grad_norm": 0.6923128584675309, "learning_rate": 1.9491594638551676e-05, "loss": 0.6409, "step": 1009 }, { "epoch": 0.13, "grad_norm": 0.63622424778598, "learning_rate": 1.9490291363692814e-05, "loss": 0.6491, "step": 1010 }, { "epoch": 0.13, "grad_norm": 0.5856932479697757, "learning_rate": 1.9488986464211366e-05, "loss": 0.6568, "step": 1011 }, { "epoch": 0.13, "grad_norm": 0.7046290131276685, "learning_rate": 1.948767994033072e-05, "loss": 0.5991, "step": 1012 }, { "epoch": 0.13, "grad_norm": 0.5766084417297631, "learning_rate": 1.9486371792274535e-05, "loss": 0.6267, "step": 1013 }, { "epoch": 0.13, "grad_norm": 0.6102021319283679, "learning_rate": 1.9485062020266754e-05, "loss": 0.6374, "step": 1014 }, { "epoch": 0.13, "grad_norm": 0.5630238521737179, "learning_rate": 1.9483750624531593e-05, "loss": 0.6113, "step": 1015 }, { "epoch": 0.13, "grad_norm": 0.5701761520969734, "learning_rate": 1.9482437605293542e-05, "loss": 0.6434, "step": 1016 }, { "epoch": 0.13, "grad_norm": 0.6065184194988289, "learning_rate": 1.9481122962777376e-05, "loss": 0.689, "step": 1017 }, { "epoch": 0.13, "grad_norm": 0.5807164296746014, "learning_rate": 1.9479806697208153e-05, "loss": 0.6236, "step": 1018 }, { "epoch": 0.13, "grad_norm": 0.576725585375623, "learning_rate": 1.9478488808811192e-05, "loss": 0.6539, "step": 1019 }, { "epoch": 0.13, "grad_norm": 0.6120226050383882, "learning_rate": 1.9477169297812108e-05, "loss": 0.6246, "step": 1020 }, { "epoch": 0.13, "grad_norm": 0.5982386930356159, "learning_rate": 1.947584816443677e-05, "loss": 0.6889, "step": 1021 }, { "epoch": 0.13, "grad_norm": 0.5579240757219102, "learning_rate": 1.9474525408911362e-05, "loss": 0.6244, "step": 1022 }, { "epoch": 0.13, "grad_norm": 0.6627588629642347, "learning_rate": 1.9473201031462308e-05, "loss": 0.6967, "step": 1023 }, { "epoch": 0.13, "grad_norm": 0.5803734680082587, "learning_rate": 1.947187503231633e-05, "loss": 0.6478, "step": 1024 }, { "epoch": 0.13, "grad_norm": 0.5615847706905223, "learning_rate": 1.9470547411700423e-05, "loss": 0.6661, "step": 1025 }, { "epoch": 0.13, "grad_norm": 0.6104154791831881, "learning_rate": 1.946921816984186e-05, "loss": 0.6192, "step": 1026 }, { "epoch": 0.13, "grad_norm": 0.5821342199435271, "learning_rate": 1.9467887306968187e-05, "loss": 0.6551, "step": 1027 }, { "epoch": 0.13, "grad_norm": 0.596570495117899, "learning_rate": 1.9466554823307238e-05, "loss": 0.6508, "step": 1028 }, { "epoch": 0.13, "grad_norm": 0.6249353443372666, "learning_rate": 1.9465220719087112e-05, "loss": 0.6592, "step": 1029 }, { "epoch": 0.13, "grad_norm": 0.5412411461402274, "learning_rate": 1.9463884994536198e-05, "loss": 0.6163, "step": 1030 }, { "epoch": 0.13, "grad_norm": 0.6028290695943033, "learning_rate": 1.9462547649883146e-05, "loss": 0.6568, "step": 1031 }, { "epoch": 0.13, "grad_norm": 0.6820457761044807, "learning_rate": 1.9461208685356903e-05, "loss": 0.6491, "step": 1032 }, { "epoch": 0.13, "grad_norm": 0.5705466094377294, "learning_rate": 1.9459868101186684e-05, "loss": 0.6187, "step": 1033 }, { "epoch": 0.13, "grad_norm": 0.6319574476341487, "learning_rate": 1.945852589760197e-05, "loss": 0.653, "step": 1034 }, { "epoch": 0.13, "grad_norm": 0.5592581502578178, "learning_rate": 1.9457182074832537e-05, "loss": 0.5992, "step": 1035 }, { "epoch": 0.13, "grad_norm": 0.5808161410673791, "learning_rate": 1.9455836633108433e-05, "loss": 0.649, "step": 1036 }, { "epoch": 0.13, "grad_norm": 0.7228580707272552, "learning_rate": 1.9454489572659976e-05, "loss": 0.6722, "step": 1037 }, { "epoch": 0.13, "grad_norm": 0.5160816156119239, "learning_rate": 1.945314089371777e-05, "loss": 0.5999, "step": 1038 }, { "epoch": 0.13, "grad_norm": 0.7244008401632522, "learning_rate": 1.9451790596512696e-05, "loss": 0.641, "step": 1039 }, { "epoch": 0.13, "grad_norm": 0.6582354323519889, "learning_rate": 1.9450438681275897e-05, "loss": 0.6965, "step": 1040 }, { "epoch": 0.13, "grad_norm": 0.5250640027497065, "learning_rate": 1.9449085148238815e-05, "loss": 0.6082, "step": 1041 }, { "epoch": 0.13, "grad_norm": 0.552603001377956, "learning_rate": 1.9447729997633155e-05, "loss": 0.6186, "step": 1042 }, { "epoch": 0.13, "grad_norm": 0.5747994588971826, "learning_rate": 1.94463732296909e-05, "loss": 0.6632, "step": 1043 }, { "epoch": 0.13, "grad_norm": 0.509301331942919, "learning_rate": 1.9445014844644317e-05, "loss": 0.5982, "step": 1044 }, { "epoch": 0.13, "grad_norm": 0.6639266137840945, "learning_rate": 1.944365484272594e-05, "loss": 0.6284, "step": 1045 }, { "epoch": 0.13, "grad_norm": 0.5838073500384379, "learning_rate": 1.944229322416859e-05, "loss": 0.6311, "step": 1046 }, { "epoch": 0.13, "grad_norm": 0.6066299957076883, "learning_rate": 1.944092998920536e-05, "loss": 0.6238, "step": 1047 }, { "epoch": 0.13, "grad_norm": 0.6895165016013078, "learning_rate": 1.9439565138069606e-05, "loss": 0.7055, "step": 1048 }, { "epoch": 0.13, "grad_norm": 0.6774287283400998, "learning_rate": 1.943819867099499e-05, "loss": 0.6699, "step": 1049 }, { "epoch": 0.13, "grad_norm": 0.5931448602724915, "learning_rate": 1.943683058821543e-05, "loss": 0.6289, "step": 1050 }, { "epoch": 0.13, "grad_norm": 0.6143242380595526, "learning_rate": 1.943546088996512e-05, "loss": 0.6518, "step": 1051 }, { "epoch": 0.13, "grad_norm": 0.5903639977111051, "learning_rate": 1.9434089576478543e-05, "loss": 0.651, "step": 1052 }, { "epoch": 0.13, "grad_norm": 0.5901892787832766, "learning_rate": 1.9432716647990445e-05, "loss": 0.6458, "step": 1053 }, { "epoch": 0.13, "grad_norm": 0.6276224230968964, "learning_rate": 1.943134210473586e-05, "loss": 0.6406, "step": 1054 }, { "epoch": 0.13, "grad_norm": 0.6139388287543148, "learning_rate": 1.9429965946950088e-05, "loss": 0.6742, "step": 1055 }, { "epoch": 0.13, "grad_norm": 0.5229409597493249, "learning_rate": 1.942858817486871e-05, "loss": 0.6299, "step": 1056 }, { "epoch": 0.14, "grad_norm": 0.6914433282668523, "learning_rate": 1.942720878872759e-05, "loss": 0.6841, "step": 1057 }, { "epoch": 0.14, "grad_norm": 0.5939188977351328, "learning_rate": 1.942582778876286e-05, "loss": 0.6552, "step": 1058 }, { "epoch": 0.14, "grad_norm": 0.5538415617993366, "learning_rate": 1.942444517521093e-05, "loss": 0.6227, "step": 1059 }, { "epoch": 0.14, "grad_norm": 0.5586307213393791, "learning_rate": 1.9423060948308487e-05, "loss": 0.6309, "step": 1060 }, { "epoch": 0.14, "grad_norm": 0.5555695336275793, "learning_rate": 1.9421675108292487e-05, "loss": 0.6196, "step": 1061 }, { "epoch": 0.14, "grad_norm": 0.6348138769022336, "learning_rate": 1.9420287655400178e-05, "loss": 0.6766, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.6319165366590856, "learning_rate": 1.941889858986907e-05, "loss": 0.657, "step": 1063 }, { "epoch": 0.14, "grad_norm": 0.5425665063261337, "learning_rate": 1.941750791193696e-05, "loss": 0.6087, "step": 1064 }, { "epoch": 0.14, "grad_norm": 0.5335363762343155, "learning_rate": 1.9416115621841906e-05, "loss": 0.6484, "step": 1065 }, { "epoch": 0.14, "grad_norm": 0.5967501017701419, "learning_rate": 1.941472171982226e-05, "loss": 0.6495, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.5618161874952553, "learning_rate": 1.9413326206116635e-05, "loss": 0.6374, "step": 1067 }, { "epoch": 0.14, "grad_norm": 0.6082941202384978, "learning_rate": 1.9411929080963928e-05, "loss": 0.6244, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.6713839230231693, "learning_rate": 1.9410530344603312e-05, "loss": 0.6595, "step": 1069 }, { "epoch": 0.14, "grad_norm": 0.5832914717129255, "learning_rate": 1.9409129997274233e-05, "loss": 0.6452, "step": 1070 }, { "epoch": 0.14, "grad_norm": 0.5650736680725454, "learning_rate": 1.9407728039216413e-05, "loss": 0.6061, "step": 1071 }, { "epoch": 0.14, "grad_norm": 0.580801924312911, "learning_rate": 1.9406324470669846e-05, "loss": 0.6305, "step": 1072 }, { "epoch": 0.14, "grad_norm": 0.5607382878232081, "learning_rate": 1.940491929187481e-05, "loss": 0.6222, "step": 1073 }, { "epoch": 0.14, "grad_norm": 0.6190057007760668, "learning_rate": 1.9403512503071855e-05, "loss": 0.6376, "step": 1074 }, { "epoch": 0.14, "grad_norm": 0.6186032388149361, "learning_rate": 1.9402104104501805e-05, "loss": 0.6456, "step": 1075 }, { "epoch": 0.14, "grad_norm": 0.5935545127565042, "learning_rate": 1.9400694096405762e-05, "loss": 0.6536, "step": 1076 }, { "epoch": 0.14, "grad_norm": 0.5672921637219815, "learning_rate": 1.93992824790251e-05, "loss": 0.6173, "step": 1077 }, { "epoch": 0.14, "grad_norm": 0.7361745610554232, "learning_rate": 1.9397869252601472e-05, "loss": 0.657, "step": 1078 }, { "epoch": 0.14, "grad_norm": 0.6204316341418958, "learning_rate": 1.9396454417376803e-05, "loss": 0.6328, "step": 1079 }, { "epoch": 0.14, "grad_norm": 0.5940430468238721, "learning_rate": 1.93950379735933e-05, "loss": 0.6565, "step": 1080 }, { "epoch": 0.14, "grad_norm": 0.6057247463511846, "learning_rate": 1.9393619921493437e-05, "loss": 0.6367, "step": 1081 }, { "epoch": 0.14, "grad_norm": 0.6065453310417313, "learning_rate": 1.9392200261319968e-05, "loss": 0.6166, "step": 1082 }, { "epoch": 0.14, "grad_norm": 0.5823881632923704, "learning_rate": 1.9390778993315927e-05, "loss": 0.6769, "step": 1083 }, { "epoch": 0.14, "grad_norm": 0.6222225688668315, "learning_rate": 1.938935611772461e-05, "loss": 0.6406, "step": 1084 }, { "epoch": 0.14, "grad_norm": 0.6409630824599579, "learning_rate": 1.9387931634789597e-05, "loss": 0.6499, "step": 1085 }, { "epoch": 0.14, "grad_norm": 0.5823789943662435, "learning_rate": 1.938650554475475e-05, "loss": 0.6285, "step": 1086 }, { "epoch": 0.14, "grad_norm": 0.6383329423818738, "learning_rate": 1.938507784786419e-05, "loss": 0.6454, "step": 1087 }, { "epoch": 0.14, "grad_norm": 0.5679275120593041, "learning_rate": 1.9383648544362322e-05, "loss": 0.6417, "step": 1088 }, { "epoch": 0.14, "grad_norm": 0.5904951184961237, "learning_rate": 1.938221763449383e-05, "loss": 0.6258, "step": 1089 }, { "epoch": 0.14, "grad_norm": 0.5730175217972205, "learning_rate": 1.9380785118503668e-05, "loss": 0.6352, "step": 1090 }, { "epoch": 0.14, "grad_norm": 0.5672793347847268, "learning_rate": 1.9379350996637058e-05, "loss": 0.6115, "step": 1091 }, { "epoch": 0.14, "grad_norm": 0.7222491833487432, "learning_rate": 1.9377915269139512e-05, "loss": 0.6978, "step": 1092 }, { "epoch": 0.14, "grad_norm": 0.5350745556689215, "learning_rate": 1.9376477936256806e-05, "loss": 0.6036, "step": 1093 }, { "epoch": 0.14, "grad_norm": 0.6409133947155636, "learning_rate": 1.9375038998234993e-05, "loss": 0.6859, "step": 1094 }, { "epoch": 0.14, "grad_norm": 0.6549319538001855, "learning_rate": 1.9373598455320407e-05, "loss": 0.6278, "step": 1095 }, { "epoch": 0.14, "grad_norm": 0.5384052913743443, "learning_rate": 1.9372156307759647e-05, "loss": 0.6325, "step": 1096 }, { "epoch": 0.14, "grad_norm": 0.5835908041870662, "learning_rate": 1.9370712555799588e-05, "loss": 0.6419, "step": 1097 }, { "epoch": 0.14, "grad_norm": 0.5654367161667682, "learning_rate": 1.9369267199687388e-05, "loss": 0.6212, "step": 1098 }, { "epoch": 0.14, "grad_norm": 0.5566382645873985, "learning_rate": 1.9367820239670475e-05, "loss": 0.6297, "step": 1099 }, { "epoch": 0.14, "grad_norm": 0.6677579020621627, "learning_rate": 1.936637167599655e-05, "loss": 0.6694, "step": 1100 }, { "epoch": 0.14, "grad_norm": 0.5466271174603334, "learning_rate": 1.9364921508913586e-05, "loss": 0.6421, "step": 1101 }, { "epoch": 0.14, "grad_norm": 0.5534569438173779, "learning_rate": 1.9363469738669834e-05, "loss": 0.6158, "step": 1102 }, { "epoch": 0.14, "grad_norm": 0.583684772128725, "learning_rate": 1.9362016365513825e-05, "loss": 0.6694, "step": 1103 }, { "epoch": 0.14, "grad_norm": 0.5605856502766111, "learning_rate": 1.9360561389694355e-05, "loss": 0.6329, "step": 1104 }, { "epoch": 0.14, "grad_norm": 0.56199640949541, "learning_rate": 1.93591048114605e-05, "loss": 0.6552, "step": 1105 }, { "epoch": 0.14, "grad_norm": 0.5733833466193927, "learning_rate": 1.9357646631061607e-05, "loss": 0.6549, "step": 1106 }, { "epoch": 0.14, "grad_norm": 0.882162507463989, "learning_rate": 1.93561868487473e-05, "loss": 0.6182, "step": 1107 }, { "epoch": 0.14, "grad_norm": 0.5377978477635488, "learning_rate": 1.9354725464767474e-05, "loss": 0.6496, "step": 1108 }, { "epoch": 0.14, "grad_norm": 0.6111935370932524, "learning_rate": 1.93532624793723e-05, "loss": 0.6205, "step": 1109 }, { "epoch": 0.14, "grad_norm": 0.5789065775781597, "learning_rate": 1.9351797892812224e-05, "loss": 0.6537, "step": 1110 }, { "epoch": 0.14, "grad_norm": 0.5148559782702514, "learning_rate": 1.9350331705337967e-05, "loss": 0.5855, "step": 1111 }, { "epoch": 0.14, "grad_norm": 0.6145748949578786, "learning_rate": 1.9348863917200526e-05, "loss": 0.6509, "step": 1112 }, { "epoch": 0.14, "grad_norm": 0.5582974320529792, "learning_rate": 1.934739452865116e-05, "loss": 0.6249, "step": 1113 }, { "epoch": 0.14, "grad_norm": 0.5334609411411299, "learning_rate": 1.9345923539941417e-05, "loss": 0.6425, "step": 1114 }, { "epoch": 0.14, "grad_norm": 0.5457166432211933, "learning_rate": 1.9344450951323104e-05, "loss": 0.6435, "step": 1115 }, { "epoch": 0.14, "grad_norm": 0.5999774489041445, "learning_rate": 1.934297676304832e-05, "loss": 0.638, "step": 1116 }, { "epoch": 0.14, "grad_norm": 0.6131509348889639, "learning_rate": 1.9341500975369424e-05, "loss": 0.6344, "step": 1117 }, { "epoch": 0.14, "grad_norm": 0.734046990089815, "learning_rate": 1.9340023588539055e-05, "loss": 0.7078, "step": 1118 }, { "epoch": 0.14, "grad_norm": 0.6220422710477044, "learning_rate": 1.933854460281012e-05, "loss": 0.6392, "step": 1119 }, { "epoch": 0.14, "grad_norm": 0.5944328991407587, "learning_rate": 1.9337064018435807e-05, "loss": 0.6497, "step": 1120 }, { "epoch": 0.14, "grad_norm": 0.5559839802538474, "learning_rate": 1.933558183566957e-05, "loss": 0.6399, "step": 1121 }, { "epoch": 0.14, "grad_norm": 0.5762269389706773, "learning_rate": 1.9334098054765145e-05, "loss": 0.6023, "step": 1122 }, { "epoch": 0.14, "grad_norm": 0.6262856889553063, "learning_rate": 1.9332612675976536e-05, "loss": 0.6667, "step": 1123 }, { "epoch": 0.14, "grad_norm": 0.5099732827748039, "learning_rate": 1.9331125699558016e-05, "loss": 0.5998, "step": 1124 }, { "epoch": 0.14, "grad_norm": 0.5590051944628253, "learning_rate": 1.932963712576415e-05, "loss": 0.6512, "step": 1125 }, { "epoch": 0.14, "grad_norm": 0.6534154249820116, "learning_rate": 1.932814695484975e-05, "loss": 0.652, "step": 1126 }, { "epoch": 0.14, "grad_norm": 0.5700676642211304, "learning_rate": 1.9326655187069923e-05, "loss": 0.6268, "step": 1127 }, { "epoch": 0.14, "grad_norm": 0.690595340042066, "learning_rate": 1.932516182268004e-05, "loss": 0.6212, "step": 1128 }, { "epoch": 0.14, "grad_norm": 0.5890546797848449, "learning_rate": 1.932366686193575e-05, "loss": 0.6476, "step": 1129 }, { "epoch": 0.14, "grad_norm": 0.6189899192987094, "learning_rate": 1.932217030509296e-05, "loss": 0.6159, "step": 1130 }, { "epoch": 0.14, "grad_norm": 0.5915030333462298, "learning_rate": 1.932067215240788e-05, "loss": 0.6078, "step": 1131 }, { "epoch": 0.14, "grad_norm": 0.6313399751774911, "learning_rate": 1.9319172404136963e-05, "loss": 0.6527, "step": 1132 }, { "epoch": 0.14, "grad_norm": 0.6041196373770902, "learning_rate": 1.931767106053695e-05, "loss": 0.6934, "step": 1133 }, { "epoch": 0.14, "grad_norm": 0.6270222989356997, "learning_rate": 1.931616812186486e-05, "loss": 0.6617, "step": 1134 }, { "epoch": 0.14, "grad_norm": 0.5495445852590997, "learning_rate": 1.9314663588377968e-05, "loss": 0.5968, "step": 1135 }, { "epoch": 0.15, "grad_norm": 0.7866050060962427, "learning_rate": 1.9313157460333834e-05, "loss": 0.7054, "step": 1136 }, { "epoch": 0.15, "grad_norm": 0.7427298437695232, "learning_rate": 1.9311649737990294e-05, "loss": 0.6827, "step": 1137 }, { "epoch": 0.15, "grad_norm": 0.5312139722664222, "learning_rate": 1.9310140421605445e-05, "loss": 0.6434, "step": 1138 }, { "epoch": 0.15, "grad_norm": 0.6114149163817295, "learning_rate": 1.930862951143767e-05, "loss": 0.6398, "step": 1139 }, { "epoch": 0.15, "grad_norm": 0.7069745597886873, "learning_rate": 1.930711700774561e-05, "loss": 0.6608, "step": 1140 }, { "epoch": 0.15, "grad_norm": 0.6076749773900282, "learning_rate": 1.93056029107882e-05, "loss": 0.6329, "step": 1141 }, { "epoch": 0.15, "grad_norm": 0.6467886250249628, "learning_rate": 1.9304087220824625e-05, "loss": 0.651, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.6681524084977978, "learning_rate": 1.9302569938114355e-05, "loss": 0.6581, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.5298834134827576, "learning_rate": 1.930105106291713e-05, "loss": 0.631, "step": 1144 }, { "epoch": 0.15, "grad_norm": 0.5733992938877388, "learning_rate": 1.9299530595492967e-05, "loss": 0.6085, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.628502238978412, "learning_rate": 1.9298008536102147e-05, "loss": 0.6379, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.5625337340722228, "learning_rate": 1.9296484885005228e-05, "loss": 0.6409, "step": 1147 }, { "epoch": 0.15, "grad_norm": 0.6139663109172169, "learning_rate": 1.9294959642463044e-05, "loss": 0.6565, "step": 1148 }, { "epoch": 0.15, "grad_norm": 0.6906263732795438, "learning_rate": 1.9293432808736695e-05, "loss": 0.638, "step": 1149 }, { "epoch": 0.15, "grad_norm": 0.7003992293860863, "learning_rate": 1.9291904384087556e-05, "loss": 0.633, "step": 1150 }, { "epoch": 0.15, "grad_norm": 0.6382552343342688, "learning_rate": 1.9290374368777277e-05, "loss": 0.6413, "step": 1151 }, { "epoch": 0.15, "grad_norm": 0.6891437747527036, "learning_rate": 1.9288842763067776e-05, "loss": 0.6218, "step": 1152 }, { "epoch": 0.15, "grad_norm": 0.7181538603519722, "learning_rate": 1.928730956722125e-05, "loss": 0.6187, "step": 1153 }, { "epoch": 0.15, "grad_norm": 0.5830565846662787, "learning_rate": 1.9285774781500156e-05, "loss": 0.5881, "step": 1154 }, { "epoch": 0.15, "grad_norm": 0.8063488139864146, "learning_rate": 1.9284238406167238e-05, "loss": 0.669, "step": 1155 }, { "epoch": 0.15, "grad_norm": 0.6832328754825183, "learning_rate": 1.92827004414855e-05, "loss": 0.6164, "step": 1156 }, { "epoch": 0.15, "grad_norm": 0.8161936471426284, "learning_rate": 1.928116088771822e-05, "loss": 0.6613, "step": 1157 }, { "epoch": 0.15, "grad_norm": 0.6681026133702486, "learning_rate": 1.927961974512896e-05, "loss": 0.6514, "step": 1158 }, { "epoch": 0.15, "grad_norm": 0.6875964972341962, "learning_rate": 1.927807701398154e-05, "loss": 0.6163, "step": 1159 }, { "epoch": 0.15, "grad_norm": 0.5727678384363426, "learning_rate": 1.927653269454006e-05, "loss": 0.6116, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.55211591570742, "learning_rate": 1.9274986787068882e-05, "loss": 0.6108, "step": 1161 }, { "epoch": 0.15, "grad_norm": 0.6231814541338538, "learning_rate": 1.9273439291832646e-05, "loss": 0.6957, "step": 1162 }, { "epoch": 0.15, "grad_norm": 0.5311989869639658, "learning_rate": 1.9271890209096273e-05, "loss": 0.615, "step": 1163 }, { "epoch": 0.15, "grad_norm": 0.6339136028634569, "learning_rate": 1.9270339539124942e-05, "loss": 0.6434, "step": 1164 }, { "epoch": 0.15, "grad_norm": 0.5850640120110483, "learning_rate": 1.926878728218411e-05, "loss": 0.6404, "step": 1165 }, { "epoch": 0.15, "grad_norm": 0.5340506126675338, "learning_rate": 1.9267233438539502e-05, "loss": 0.6305, "step": 1166 }, { "epoch": 0.15, "grad_norm": 0.6344879111310554, "learning_rate": 1.9265678008457123e-05, "loss": 0.6397, "step": 1167 }, { "epoch": 0.15, "grad_norm": 0.6364004704553513, "learning_rate": 1.9264120992203236e-05, "loss": 0.6514, "step": 1168 }, { "epoch": 0.15, "grad_norm": 0.5834393550750322, "learning_rate": 1.926256239004439e-05, "loss": 0.6373, "step": 1169 }, { "epoch": 0.15, "grad_norm": 0.48532743270151635, "learning_rate": 1.9261002202247398e-05, "loss": 0.5911, "step": 1170 }, { "epoch": 0.15, "grad_norm": 0.5314620788553694, "learning_rate": 1.9259440429079335e-05, "loss": 0.6215, "step": 1171 }, { "epoch": 0.15, "grad_norm": 0.6354464985321852, "learning_rate": 1.9257877070807575e-05, "loss": 0.6489, "step": 1172 }, { "epoch": 0.15, "grad_norm": 0.6636825938722244, "learning_rate": 1.925631212769973e-05, "loss": 0.6937, "step": 1173 }, { "epoch": 0.15, "grad_norm": 0.4958831159832153, "learning_rate": 1.9254745600023713e-05, "loss": 0.5898, "step": 1174 }, { "epoch": 0.15, "grad_norm": 0.6143530964221586, "learning_rate": 1.9253177488047683e-05, "loss": 0.6691, "step": 1175 }, { "epoch": 0.15, "grad_norm": 0.5381705039354795, "learning_rate": 1.9251607792040085e-05, "loss": 0.6007, "step": 1176 }, { "epoch": 0.15, "grad_norm": 0.5157410081345875, "learning_rate": 1.9250036512269635e-05, "loss": 0.6072, "step": 1177 }, { "epoch": 0.15, "grad_norm": 0.5494981733287436, "learning_rate": 1.9248463649005314e-05, "loss": 0.6257, "step": 1178 }, { "epoch": 0.15, "grad_norm": 0.5924135229672144, "learning_rate": 1.924688920251638e-05, "loss": 0.6187, "step": 1179 }, { "epoch": 0.15, "grad_norm": 0.5694897229731773, "learning_rate": 1.9245313173072354e-05, "loss": 0.6474, "step": 1180 }, { "epoch": 0.15, "grad_norm": 0.570937144044657, "learning_rate": 1.924373556094304e-05, "loss": 0.6646, "step": 1181 }, { "epoch": 0.15, "grad_norm": 0.6420875411048454, "learning_rate": 1.92421563663985e-05, "loss": 0.6666, "step": 1182 }, { "epoch": 0.15, "grad_norm": 0.6446738338568972, "learning_rate": 1.924057558970908e-05, "loss": 0.6376, "step": 1183 }, { "epoch": 0.15, "grad_norm": 0.6251854160079963, "learning_rate": 1.923899323114538e-05, "loss": 0.6434, "step": 1184 }, { "epoch": 0.15, "grad_norm": 0.5046702714435963, "learning_rate": 1.9237409290978285e-05, "loss": 0.6318, "step": 1185 }, { "epoch": 0.15, "grad_norm": 0.5406355282811351, "learning_rate": 1.923582376947895e-05, "loss": 0.6054, "step": 1186 }, { "epoch": 0.15, "grad_norm": 0.6273998787638915, "learning_rate": 1.9234236666918792e-05, "loss": 0.6417, "step": 1187 }, { "epoch": 0.15, "grad_norm": 0.5906976224047985, "learning_rate": 1.9232647983569506e-05, "loss": 0.6349, "step": 1188 }, { "epoch": 0.15, "grad_norm": 0.5440493823662442, "learning_rate": 1.9231057719703057e-05, "loss": 0.6134, "step": 1189 }, { "epoch": 0.15, "grad_norm": 0.5568969409200502, "learning_rate": 1.9229465875591677e-05, "loss": 0.6128, "step": 1190 }, { "epoch": 0.15, "grad_norm": 0.6396260474872614, "learning_rate": 1.9227872451507866e-05, "loss": 0.6512, "step": 1191 }, { "epoch": 0.15, "grad_norm": 0.6192954438965519, "learning_rate": 1.9226277447724407e-05, "loss": 0.6591, "step": 1192 }, { "epoch": 0.15, "grad_norm": 0.5430792475316704, "learning_rate": 1.922468086451434e-05, "loss": 0.6241, "step": 1193 }, { "epoch": 0.15, "grad_norm": 0.6544490399013658, "learning_rate": 1.922308270215098e-05, "loss": 0.6055, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.5968636751008105, "learning_rate": 1.9221482960907914e-05, "loss": 0.5973, "step": 1195 }, { "epoch": 0.15, "grad_norm": 0.5996637183801375, "learning_rate": 1.9219881641059004e-05, "loss": 0.614, "step": 1196 }, { "epoch": 0.15, "grad_norm": 0.5885791266685939, "learning_rate": 1.921827874287837e-05, "loss": 0.6136, "step": 1197 }, { "epoch": 0.15, "grad_norm": 0.7218889259275586, "learning_rate": 1.921667426664041e-05, "loss": 0.6483, "step": 1198 }, { "epoch": 0.15, "grad_norm": 0.5602667415528637, "learning_rate": 1.9215068212619792e-05, "loss": 0.6415, "step": 1199 }, { "epoch": 0.15, "grad_norm": 0.5721509680404762, "learning_rate": 1.9213460581091448e-05, "loss": 0.618, "step": 1200 }, { "epoch": 0.15, "grad_norm": 0.6229205413345495, "learning_rate": 1.9211851372330595e-05, "loss": 0.6617, "step": 1201 }, { "epoch": 0.15, "grad_norm": 0.6308235340051249, "learning_rate": 1.9210240586612702e-05, "loss": 0.6449, "step": 1202 }, { "epoch": 0.15, "grad_norm": 0.5763862379605528, "learning_rate": 1.920862822421352e-05, "loss": 0.6048, "step": 1203 }, { "epoch": 0.15, "grad_norm": 0.7129217900115156, "learning_rate": 1.920701428540906e-05, "loss": 0.6293, "step": 1204 }, { "epoch": 0.15, "grad_norm": 0.5978234101371629, "learning_rate": 1.920539877047561e-05, "loss": 0.6504, "step": 1205 }, { "epoch": 0.15, "grad_norm": 0.6577819560007339, "learning_rate": 1.9203781679689736e-05, "loss": 0.6313, "step": 1206 }, { "epoch": 0.15, "grad_norm": 0.6722657231472025, "learning_rate": 1.9202163013328256e-05, "loss": 0.6187, "step": 1207 }, { "epoch": 0.15, "grad_norm": 0.66285662053015, "learning_rate": 1.9200542771668264e-05, "loss": 0.6826, "step": 1208 }, { "epoch": 0.15, "grad_norm": 0.6422166829487032, "learning_rate": 1.9198920954987135e-05, "loss": 0.6208, "step": 1209 }, { "epoch": 0.15, "grad_norm": 0.6575131721100949, "learning_rate": 1.9197297563562494e-05, "loss": 0.6328, "step": 1210 }, { "epoch": 0.15, "grad_norm": 0.5634528033083085, "learning_rate": 1.919567259767225e-05, "loss": 0.5995, "step": 1211 }, { "epoch": 0.15, "grad_norm": 0.5818028773280194, "learning_rate": 1.919404605759458e-05, "loss": 0.6476, "step": 1212 }, { "epoch": 0.15, "grad_norm": 0.6380396319074655, "learning_rate": 1.9192417943607923e-05, "loss": 0.6055, "step": 1213 }, { "epoch": 0.16, "grad_norm": 0.5566921868704898, "learning_rate": 1.9190788255991e-05, "loss": 0.6224, "step": 1214 }, { "epoch": 0.16, "grad_norm": 0.7321322126076953, "learning_rate": 1.9189156995022785e-05, "loss": 0.7079, "step": 1215 }, { "epoch": 0.16, "grad_norm": 0.6073162497747022, "learning_rate": 1.9187524160982533e-05, "loss": 0.6213, "step": 1216 }, { "epoch": 0.16, "grad_norm": 0.6395216149283258, "learning_rate": 1.9185889754149766e-05, "loss": 0.6211, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.5796129433712167, "learning_rate": 1.918425377480428e-05, "loss": 0.6178, "step": 1218 }, { "epoch": 0.16, "grad_norm": 0.6478771734290938, "learning_rate": 1.9182616223226126e-05, "loss": 0.6805, "step": 1219 }, { "epoch": 0.16, "grad_norm": 0.6035740665081087, "learning_rate": 1.918097709969563e-05, "loss": 0.6467, "step": 1220 }, { "epoch": 0.16, "grad_norm": 0.6025009277637798, "learning_rate": 1.9179336404493408e-05, "loss": 0.5955, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.5223731956285387, "learning_rate": 1.917769413790031e-05, "loss": 0.6044, "step": 1222 }, { "epoch": 0.16, "grad_norm": 0.6196301847696708, "learning_rate": 1.9176050300197476e-05, "loss": 0.6641, "step": 1223 }, { "epoch": 0.16, "grad_norm": 0.6816590992421478, "learning_rate": 1.9174404891666317e-05, "loss": 0.6852, "step": 1224 }, { "epoch": 0.16, "grad_norm": 0.5489761243219325, "learning_rate": 1.91727579125885e-05, "loss": 0.6304, "step": 1225 }, { "epoch": 0.16, "grad_norm": 0.5308600351109949, "learning_rate": 1.917110936324597e-05, "loss": 0.618, "step": 1226 }, { "epoch": 0.16, "grad_norm": 0.5575764320603469, "learning_rate": 1.916945924392094e-05, "loss": 0.6287, "step": 1227 }, { "epoch": 0.16, "grad_norm": 0.5489129082268176, "learning_rate": 1.916780755489589e-05, "loss": 0.6393, "step": 1228 }, { "epoch": 0.16, "grad_norm": 0.5396406930333956, "learning_rate": 1.916615429645357e-05, "loss": 0.6497, "step": 1229 }, { "epoch": 0.16, "grad_norm": 0.5371146745316202, "learning_rate": 1.9164499468876992e-05, "loss": 0.6021, "step": 1230 }, { "epoch": 0.16, "grad_norm": 0.6001652103194776, "learning_rate": 1.916284307244945e-05, "loss": 0.6257, "step": 1231 }, { "epoch": 0.16, "grad_norm": 0.541712117838929, "learning_rate": 1.9161185107454496e-05, "loss": 0.6454, "step": 1232 }, { "epoch": 0.16, "grad_norm": 0.6792883919957651, "learning_rate": 1.915952557417595e-05, "loss": 0.6961, "step": 1233 }, { "epoch": 0.16, "grad_norm": 0.6353820702397005, "learning_rate": 1.9157864472897908e-05, "loss": 0.6144, "step": 1234 }, { "epoch": 0.16, "grad_norm": 0.6220777091390014, "learning_rate": 1.915620180390473e-05, "loss": 0.658, "step": 1235 }, { "epoch": 0.16, "grad_norm": 0.5384007468579576, "learning_rate": 1.9154537567481045e-05, "loss": 0.6319, "step": 1236 }, { "epoch": 0.16, "grad_norm": 0.6112357360374355, "learning_rate": 1.915287176391175e-05, "loss": 0.7016, "step": 1237 }, { "epoch": 0.16, "grad_norm": 0.6082624482468709, "learning_rate": 1.9151204393482008e-05, "loss": 0.6397, "step": 1238 }, { "epoch": 0.16, "grad_norm": 0.5914338108736888, "learning_rate": 1.9149535456477256e-05, "loss": 0.66, "step": 1239 }, { "epoch": 0.16, "grad_norm": 0.5101782132644557, "learning_rate": 1.914786495318319e-05, "loss": 0.61, "step": 1240 }, { "epoch": 0.16, "grad_norm": 0.6532035017993905, "learning_rate": 1.9146192883885787e-05, "loss": 0.6844, "step": 1241 }, { "epoch": 0.16, "grad_norm": 0.6379420389401056, "learning_rate": 1.914451924887128e-05, "loss": 0.6798, "step": 1242 }, { "epoch": 0.16, "grad_norm": 0.5683160995896406, "learning_rate": 1.9142844048426177e-05, "loss": 0.659, "step": 1243 }, { "epoch": 0.16, "grad_norm": 0.6263440296522573, "learning_rate": 1.9141167282837253e-05, "loss": 0.6586, "step": 1244 }, { "epoch": 0.16, "grad_norm": 0.5267517359057594, "learning_rate": 1.9139488952391546e-05, "loss": 0.6134, "step": 1245 }, { "epoch": 0.16, "grad_norm": 0.5620418968324231, "learning_rate": 1.913780905737637e-05, "loss": 0.6363, "step": 1246 }, { "epoch": 0.16, "grad_norm": 0.6542462565573496, "learning_rate": 1.91361275980793e-05, "loss": 0.6737, "step": 1247 }, { "epoch": 0.16, "grad_norm": 0.5378914640116609, "learning_rate": 1.9134444574788184e-05, "loss": 0.6179, "step": 1248 }, { "epoch": 0.16, "grad_norm": 0.6154692792970622, "learning_rate": 1.913275998779113e-05, "loss": 0.6517, "step": 1249 }, { "epoch": 0.16, "grad_norm": 0.5408539034401666, "learning_rate": 1.9131073837376524e-05, "loss": 0.6185, "step": 1250 }, { "epoch": 0.16, "grad_norm": 0.5459914791427385, "learning_rate": 1.9129386123833017e-05, "loss": 0.6045, "step": 1251 }, { "epoch": 0.16, "grad_norm": 0.5834437191573822, "learning_rate": 1.9127696847449514e-05, "loss": 0.6849, "step": 1252 }, { "epoch": 0.16, "grad_norm": 0.6571241332561059, "learning_rate": 1.912600600851521e-05, "loss": 0.6653, "step": 1253 }, { "epoch": 0.16, "grad_norm": 0.6664177681618362, "learning_rate": 1.9124313607319548e-05, "loss": 0.6577, "step": 1254 }, { "epoch": 0.16, "grad_norm": 0.6963082817244496, "learning_rate": 1.912261964415225e-05, "loss": 0.6202, "step": 1255 }, { "epoch": 0.16, "grad_norm": 0.599960522485086, "learning_rate": 1.9120924119303307e-05, "loss": 0.6623, "step": 1256 }, { "epoch": 0.16, "grad_norm": 0.6613873855349207, "learning_rate": 1.911922703306296e-05, "loss": 0.6689, "step": 1257 }, { "epoch": 0.16, "grad_norm": 0.5901271970423, "learning_rate": 1.9117528385721743e-05, "loss": 0.5967, "step": 1258 }, { "epoch": 0.16, "grad_norm": 0.5774641839910095, "learning_rate": 1.9115828177570436e-05, "loss": 0.6208, "step": 1259 }, { "epoch": 0.16, "grad_norm": 0.5615516743133336, "learning_rate": 1.911412640890009e-05, "loss": 0.6328, "step": 1260 }, { "epoch": 0.16, "grad_norm": 0.5677295075308714, "learning_rate": 1.9112423080002038e-05, "loss": 0.6482, "step": 1261 }, { "epoch": 0.16, "grad_norm": 0.6255982784957995, "learning_rate": 1.9110718191167864e-05, "loss": 0.6357, "step": 1262 }, { "epoch": 0.16, "grad_norm": 0.6101421609196684, "learning_rate": 1.9109011742689425e-05, "loss": 0.68, "step": 1263 }, { "epoch": 0.16, "grad_norm": 0.6208023951504444, "learning_rate": 1.910730373485884e-05, "loss": 0.621, "step": 1264 }, { "epoch": 0.16, "grad_norm": 0.5437682199799417, "learning_rate": 1.9105594167968503e-05, "loss": 0.6029, "step": 1265 }, { "epoch": 0.16, "grad_norm": 0.5681207119117814, "learning_rate": 1.910388304231107e-05, "loss": 0.6065, "step": 1266 }, { "epoch": 0.16, "grad_norm": 0.6432572360971094, "learning_rate": 1.910217035817947e-05, "loss": 0.6614, "step": 1267 }, { "epoch": 0.16, "grad_norm": 0.6304517302915077, "learning_rate": 1.9100456115866885e-05, "loss": 0.6684, "step": 1268 }, { "epoch": 0.16, "grad_norm": 0.6259216325374968, "learning_rate": 1.9098740315666777e-05, "loss": 0.6316, "step": 1269 }, { "epoch": 0.16, "grad_norm": 0.5944111064111487, "learning_rate": 1.909702295787287e-05, "loss": 0.6312, "step": 1270 }, { "epoch": 0.16, "grad_norm": 0.7739497400649084, "learning_rate": 1.9095304042779156e-05, "loss": 0.7059, "step": 1271 }, { "epoch": 0.16, "grad_norm": 0.667918949821286, "learning_rate": 1.9093583570679886e-05, "loss": 0.6995, "step": 1272 }, { "epoch": 0.16, "grad_norm": 0.5551203961199569, "learning_rate": 1.9091861541869595e-05, "loss": 0.601, "step": 1273 }, { "epoch": 0.16, "grad_norm": 0.679061164693476, "learning_rate": 1.9090137956643063e-05, "loss": 0.6414, "step": 1274 }, { "epoch": 0.16, "grad_norm": 0.5334245503678515, "learning_rate": 1.9088412815295356e-05, "loss": 0.6039, "step": 1275 }, { "epoch": 0.16, "grad_norm": 0.5940182760179058, "learning_rate": 1.908668611812179e-05, "loss": 0.6539, "step": 1276 }, { "epoch": 0.16, "grad_norm": 0.626950075965485, "learning_rate": 1.9084957865417957e-05, "loss": 0.6219, "step": 1277 }, { "epoch": 0.16, "grad_norm": 0.5677994230559115, "learning_rate": 1.9083228057479714e-05, "loss": 0.6614, "step": 1278 }, { "epoch": 0.16, "grad_norm": 0.6339123577401578, "learning_rate": 1.9081496694603178e-05, "loss": 0.6266, "step": 1279 }, { "epoch": 0.16, "grad_norm": 0.5417970531816083, "learning_rate": 1.9079763777084745e-05, "loss": 0.6021, "step": 1280 }, { "epoch": 0.16, "grad_norm": 0.6694653663098534, "learning_rate": 1.9078029305221064e-05, "loss": 0.6484, "step": 1281 }, { "epoch": 0.16, "grad_norm": 0.6542696348605651, "learning_rate": 1.9076293279309057e-05, "loss": 0.656, "step": 1282 }, { "epoch": 0.16, "grad_norm": 0.5639912915112729, "learning_rate": 1.907455569964591e-05, "loss": 0.6505, "step": 1283 }, { "epoch": 0.16, "grad_norm": 0.591003349736729, "learning_rate": 1.907281656652908e-05, "loss": 0.6338, "step": 1284 }, { "epoch": 0.16, "grad_norm": 0.5595436637324696, "learning_rate": 1.907107588025628e-05, "loss": 0.6379, "step": 1285 }, { "epoch": 0.16, "grad_norm": 0.6233589530834623, "learning_rate": 1.9069333641125493e-05, "loss": 0.6531, "step": 1286 }, { "epoch": 0.16, "grad_norm": 0.6239644079972699, "learning_rate": 1.9067589849434977e-05, "loss": 0.656, "step": 1287 }, { "epoch": 0.16, "grad_norm": 0.5891449947743744, "learning_rate": 1.906584450548324e-05, "loss": 0.6393, "step": 1288 }, { "epoch": 0.16, "grad_norm": 0.5835072764082262, "learning_rate": 1.9064097609569064e-05, "loss": 0.6154, "step": 1289 }, { "epoch": 0.16, "grad_norm": 0.9834993144081091, "learning_rate": 1.9062349161991503e-05, "loss": 0.6871, "step": 1290 }, { "epoch": 0.16, "grad_norm": 0.609316168228829, "learning_rate": 1.906059916304987e-05, "loss": 0.6536, "step": 1291 }, { "epoch": 0.17, "grad_norm": 0.5291839813962347, "learning_rate": 1.9058847613043734e-05, "loss": 0.6265, "step": 1292 }, { "epoch": 0.17, "grad_norm": 0.6593808649465065, "learning_rate": 1.9057094512272948e-05, "loss": 0.6284, "step": 1293 }, { "epoch": 0.17, "grad_norm": 0.508765372128406, "learning_rate": 1.9055339861037618e-05, "loss": 0.571, "step": 1294 }, { "epoch": 0.17, "grad_norm": 0.6581315347027002, "learning_rate": 1.9053583659638116e-05, "loss": 0.6661, "step": 1295 }, { "epoch": 0.17, "grad_norm": 0.6066351901894339, "learning_rate": 1.9051825908375088e-05, "loss": 0.6724, "step": 1296 }, { "epoch": 0.17, "grad_norm": 0.5352822161834218, "learning_rate": 1.9050066607549438e-05, "loss": 0.6055, "step": 1297 }, { "epoch": 0.17, "grad_norm": 0.6424176682634927, "learning_rate": 1.9048305757462336e-05, "loss": 0.6942, "step": 1298 }, { "epoch": 0.17, "grad_norm": 0.5936790292153992, "learning_rate": 1.9046543358415218e-05, "loss": 0.6294, "step": 1299 }, { "epoch": 0.17, "grad_norm": 0.6781288971491759, "learning_rate": 1.9044779410709786e-05, "loss": 0.7332, "step": 1300 }, { "epoch": 0.17, "grad_norm": 0.6196400846091568, "learning_rate": 1.9043013914648006e-05, "loss": 0.6527, "step": 1301 }, { "epoch": 0.17, "grad_norm": 0.5538881013740046, "learning_rate": 1.9041246870532107e-05, "loss": 0.5951, "step": 1302 }, { "epoch": 0.17, "grad_norm": 0.5327230847780066, "learning_rate": 1.9039478278664593e-05, "loss": 0.6292, "step": 1303 }, { "epoch": 0.17, "grad_norm": 0.5110164953975223, "learning_rate": 1.903770813934822e-05, "loss": 0.5899, "step": 1304 }, { "epoch": 0.17, "grad_norm": 0.6214684444519407, "learning_rate": 1.9035936452886013e-05, "loss": 0.6096, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.5452591347317639, "learning_rate": 1.9034163219581264e-05, "loss": 0.6358, "step": 1306 }, { "epoch": 0.17, "grad_norm": 0.5837581671833718, "learning_rate": 1.9032388439737532e-05, "loss": 0.6186, "step": 1307 }, { "epoch": 0.17, "grad_norm": 0.5586606794986344, "learning_rate": 1.9030612113658638e-05, "loss": 0.6083, "step": 1308 }, { "epoch": 0.17, "grad_norm": 0.5049961190650392, "learning_rate": 1.902883424164866e-05, "loss": 0.6051, "step": 1309 }, { "epoch": 0.17, "grad_norm": 0.4987284712818054, "learning_rate": 1.9027054824011957e-05, "loss": 0.616, "step": 1310 }, { "epoch": 0.17, "grad_norm": 0.6302040468940391, "learning_rate": 1.9025273861053137e-05, "loss": 0.5991, "step": 1311 }, { "epoch": 0.17, "grad_norm": 0.6143825867539442, "learning_rate": 1.9023491353077085e-05, "loss": 0.6425, "step": 1312 }, { "epoch": 0.17, "grad_norm": 0.5995026829766662, "learning_rate": 1.9021707300388942e-05, "loss": 0.644, "step": 1313 }, { "epoch": 0.17, "grad_norm": 0.6274370179010537, "learning_rate": 1.9019921703294114e-05, "loss": 0.6659, "step": 1314 }, { "epoch": 0.17, "grad_norm": 0.6136765589864368, "learning_rate": 1.901813456209828e-05, "loss": 0.677, "step": 1315 }, { "epoch": 0.17, "grad_norm": 0.5747880647629485, "learning_rate": 1.9016345877107366e-05, "loss": 0.6245, "step": 1316 }, { "epoch": 0.17, "grad_norm": 0.5582001015276181, "learning_rate": 1.9014555648627585e-05, "loss": 0.6251, "step": 1317 }, { "epoch": 0.17, "grad_norm": 0.5924711182018106, "learning_rate": 1.9012763876965395e-05, "loss": 0.6325, "step": 1318 }, { "epoch": 0.17, "grad_norm": 0.5762060385362174, "learning_rate": 1.901097056242753e-05, "loss": 0.6328, "step": 1319 }, { "epoch": 0.17, "grad_norm": 0.5631811224336102, "learning_rate": 1.900917570532098e-05, "loss": 0.6441, "step": 1320 }, { "epoch": 0.17, "grad_norm": 0.6175201124688242, "learning_rate": 1.9007379305953003e-05, "loss": 0.6006, "step": 1321 }, { "epoch": 0.17, "grad_norm": 0.5835441509882253, "learning_rate": 1.900558136463112e-05, "loss": 0.6328, "step": 1322 }, { "epoch": 0.17, "grad_norm": 0.6858150993241321, "learning_rate": 1.900378188166312e-05, "loss": 0.6718, "step": 1323 }, { "epoch": 0.17, "grad_norm": 0.5500648303887606, "learning_rate": 1.9001980857357053e-05, "loss": 0.6151, "step": 1324 }, { "epoch": 0.17, "grad_norm": 0.6250724798632386, "learning_rate": 1.9000178292021227e-05, "loss": 0.6525, "step": 1325 }, { "epoch": 0.17, "grad_norm": 0.6682254279317636, "learning_rate": 1.8998374185964227e-05, "loss": 0.6279, "step": 1326 }, { "epoch": 0.17, "grad_norm": 0.6219403023647976, "learning_rate": 1.8996568539494887e-05, "loss": 0.6422, "step": 1327 }, { "epoch": 0.17, "grad_norm": 0.5231665645901876, "learning_rate": 1.8994761352922315e-05, "loss": 0.6166, "step": 1328 }, { "epoch": 0.17, "grad_norm": 0.6378602116351789, "learning_rate": 1.899295262655588e-05, "loss": 0.65, "step": 1329 }, { "epoch": 0.17, "grad_norm": 0.5299839943607476, "learning_rate": 1.899114236070521e-05, "loss": 0.5979, "step": 1330 }, { "epoch": 0.17, "grad_norm": 0.6621516194495463, "learning_rate": 1.8989330555680207e-05, "loss": 0.6502, "step": 1331 }, { "epoch": 0.17, "grad_norm": 0.5862548230391581, "learning_rate": 1.8987517211791024e-05, "loss": 0.6355, "step": 1332 }, { "epoch": 0.17, "grad_norm": 0.5433471532324992, "learning_rate": 1.898570232934809e-05, "loss": 0.6169, "step": 1333 }, { "epoch": 0.17, "grad_norm": 0.6236529818298048, "learning_rate": 1.898388590866208e-05, "loss": 0.6804, "step": 1334 }, { "epoch": 0.17, "grad_norm": 0.6798447048575983, "learning_rate": 1.8982067950043953e-05, "loss": 0.6385, "step": 1335 }, { "epoch": 0.17, "grad_norm": 0.676074308746186, "learning_rate": 1.8980248453804918e-05, "loss": 0.6168, "step": 1336 }, { "epoch": 0.17, "grad_norm": 0.6292425372159096, "learning_rate": 1.8978427420256452e-05, "loss": 0.6165, "step": 1337 }, { "epoch": 0.17, "grad_norm": 0.5015623921155478, "learning_rate": 1.8976604849710295e-05, "loss": 0.6223, "step": 1338 }, { "epoch": 0.17, "grad_norm": 0.6161557111954117, "learning_rate": 1.8974780742478445e-05, "loss": 0.6405, "step": 1339 }, { "epoch": 0.17, "grad_norm": 0.585017580130654, "learning_rate": 1.8972955098873166e-05, "loss": 0.608, "step": 1340 }, { "epoch": 0.17, "grad_norm": 0.5760454868384244, "learning_rate": 1.8971127919206994e-05, "loss": 0.6491, "step": 1341 }, { "epoch": 0.17, "grad_norm": 0.5426904028167157, "learning_rate": 1.8969299203792707e-05, "loss": 0.6129, "step": 1342 }, { "epoch": 0.17, "grad_norm": 0.5938899475097459, "learning_rate": 1.8967468952943372e-05, "loss": 0.6483, "step": 1343 }, { "epoch": 0.17, "grad_norm": 0.5397111852646069, "learning_rate": 1.8965637166972303e-05, "loss": 0.645, "step": 1344 }, { "epoch": 0.17, "grad_norm": 0.5590740214549788, "learning_rate": 1.896380384619307e-05, "loss": 0.6294, "step": 1345 }, { "epoch": 0.17, "grad_norm": 0.5818053902919216, "learning_rate": 1.8961968990919527e-05, "loss": 0.6002, "step": 1346 }, { "epoch": 0.17, "grad_norm": 0.6423902874633833, "learning_rate": 1.8960132601465772e-05, "loss": 0.6772, "step": 1347 }, { "epoch": 0.17, "grad_norm": 0.6904113248733679, "learning_rate": 1.8958294678146173e-05, "loss": 0.658, "step": 1348 }, { "epoch": 0.17, "grad_norm": 0.5906799942634363, "learning_rate": 1.8956455221275364e-05, "loss": 0.6241, "step": 1349 }, { "epoch": 0.17, "grad_norm": 0.6330223809538231, "learning_rate": 1.8954614231168235e-05, "loss": 0.6335, "step": 1350 }, { "epoch": 0.17, "grad_norm": 0.6333213044330384, "learning_rate": 1.895277170813994e-05, "loss": 0.6085, "step": 1351 }, { "epoch": 0.17, "grad_norm": 2.2656783368295526, "learning_rate": 1.89509276525059e-05, "loss": 0.6428, "step": 1352 }, { "epoch": 0.17, "grad_norm": 0.5756418266011142, "learning_rate": 1.8949082064581792e-05, "loss": 0.5835, "step": 1353 }, { "epoch": 0.17, "grad_norm": 0.6068879466840402, "learning_rate": 1.8947234944683557e-05, "loss": 0.6507, "step": 1354 }, { "epoch": 0.17, "grad_norm": 0.5430938421745172, "learning_rate": 1.8945386293127405e-05, "loss": 0.6104, "step": 1355 }, { "epoch": 0.17, "grad_norm": 0.616008633045972, "learning_rate": 1.89435361102298e-05, "loss": 0.6245, "step": 1356 }, { "epoch": 0.17, "grad_norm": 0.5571257993946725, "learning_rate": 1.8941684396307466e-05, "loss": 0.6394, "step": 1357 }, { "epoch": 0.17, "grad_norm": 0.5618991885307019, "learning_rate": 1.89398311516774e-05, "loss": 0.6396, "step": 1358 }, { "epoch": 0.17, "grad_norm": 0.5483731754317355, "learning_rate": 1.8937976376656854e-05, "loss": 0.6241, "step": 1359 }, { "epoch": 0.17, "grad_norm": 0.5789597786017222, "learning_rate": 1.893612007156334e-05, "loss": 0.6745, "step": 1360 }, { "epoch": 0.17, "grad_norm": 0.5957287570058938, "learning_rate": 1.893426223671464e-05, "loss": 0.6309, "step": 1361 }, { "epoch": 0.17, "grad_norm": 0.5320283357958463, "learning_rate": 1.893240287242879e-05, "loss": 0.6553, "step": 1362 }, { "epoch": 0.17, "grad_norm": 0.5739745312862631, "learning_rate": 1.8930541979024086e-05, "loss": 0.6461, "step": 1363 }, { "epoch": 0.17, "grad_norm": 0.5983291110027171, "learning_rate": 1.8928679556819097e-05, "loss": 0.6349, "step": 1364 }, { "epoch": 0.17, "grad_norm": 0.5479422898515288, "learning_rate": 1.8926815606132644e-05, "loss": 0.6231, "step": 1365 }, { "epoch": 0.17, "grad_norm": 0.6060385500043501, "learning_rate": 1.8924950127283813e-05, "loss": 0.6192, "step": 1366 }, { "epoch": 0.17, "grad_norm": 0.6364385322621573, "learning_rate": 1.8923083120591954e-05, "loss": 0.6824, "step": 1367 }, { "epoch": 0.17, "grad_norm": 0.5751429168097587, "learning_rate": 1.892121458637667e-05, "loss": 0.6505, "step": 1368 }, { "epoch": 0.17, "grad_norm": 0.6563252966934335, "learning_rate": 1.8919344524957838e-05, "loss": 0.7109, "step": 1369 }, { "epoch": 0.18, "grad_norm": 0.6327328116634306, "learning_rate": 1.891747293665559e-05, "loss": 0.6571, "step": 1370 }, { "epoch": 0.18, "grad_norm": 0.619130010204447, "learning_rate": 1.8915599821790308e-05, "loss": 0.6035, "step": 1371 }, { "epoch": 0.18, "grad_norm": 0.4685272612446313, "learning_rate": 1.8913725180682658e-05, "loss": 0.6235, "step": 1372 }, { "epoch": 0.18, "grad_norm": 0.5989234092041031, "learning_rate": 1.891184901365355e-05, "loss": 0.6532, "step": 1373 }, { "epoch": 0.18, "grad_norm": 0.589632147289773, "learning_rate": 1.8909971321024167e-05, "loss": 0.6897, "step": 1374 }, { "epoch": 0.18, "grad_norm": 0.5199779775136317, "learning_rate": 1.8908092103115942e-05, "loss": 0.6281, "step": 1375 }, { "epoch": 0.18, "grad_norm": 0.48543964637257875, "learning_rate": 1.8906211360250572e-05, "loss": 0.5846, "step": 1376 }, { "epoch": 0.18, "grad_norm": 0.5781225012092377, "learning_rate": 1.8904329092750024e-05, "loss": 0.6429, "step": 1377 }, { "epoch": 0.18, "grad_norm": 0.5752725556501085, "learning_rate": 1.8902445300936515e-05, "loss": 0.6298, "step": 1378 }, { "epoch": 0.18, "grad_norm": 0.6623936409560285, "learning_rate": 1.890055998513253e-05, "loss": 0.6603, "step": 1379 }, { "epoch": 0.18, "grad_norm": 0.5709808900845701, "learning_rate": 1.8898673145660812e-05, "loss": 0.63, "step": 1380 }, { "epoch": 0.18, "grad_norm": 0.58060839638746, "learning_rate": 1.8896784782844358e-05, "loss": 0.6144, "step": 1381 }, { "epoch": 0.18, "grad_norm": 0.6124621485643453, "learning_rate": 1.8894894897006442e-05, "loss": 0.6345, "step": 1382 }, { "epoch": 0.18, "grad_norm": 0.5949307858329705, "learning_rate": 1.889300348847059e-05, "loss": 0.6398, "step": 1383 }, { "epoch": 0.18, "grad_norm": 0.5493309011438315, "learning_rate": 1.889111055756058e-05, "loss": 0.6531, "step": 1384 }, { "epoch": 0.18, "grad_norm": 0.592534963138269, "learning_rate": 1.888921610460046e-05, "loss": 0.6476, "step": 1385 }, { "epoch": 0.18, "grad_norm": 0.5913279446305053, "learning_rate": 1.8887320129914545e-05, "loss": 0.6602, "step": 1386 }, { "epoch": 0.18, "grad_norm": 0.5616071169298279, "learning_rate": 1.8885422633827396e-05, "loss": 0.6402, "step": 1387 }, { "epoch": 0.18, "grad_norm": 0.5540242435452389, "learning_rate": 1.8883523616663844e-05, "loss": 0.6177, "step": 1388 }, { "epoch": 0.18, "grad_norm": 0.5615538309812251, "learning_rate": 1.8881623078748977e-05, "loss": 0.6203, "step": 1389 }, { "epoch": 0.18, "grad_norm": 0.5454502355374546, "learning_rate": 1.8879721020408143e-05, "loss": 0.6529, "step": 1390 }, { "epoch": 0.18, "grad_norm": 0.5870261096421923, "learning_rate": 1.887781744196695e-05, "loss": 0.6225, "step": 1391 }, { "epoch": 0.18, "grad_norm": 0.6770965049554287, "learning_rate": 1.8875912343751275e-05, "loss": 0.6525, "step": 1392 }, { "epoch": 0.18, "grad_norm": 0.5623430988774848, "learning_rate": 1.8874005726087237e-05, "loss": 0.6118, "step": 1393 }, { "epoch": 0.18, "grad_norm": 0.5973114921668388, "learning_rate": 1.887209758930124e-05, "loss": 0.6601, "step": 1394 }, { "epoch": 0.18, "grad_norm": 0.5392708991194983, "learning_rate": 1.8870187933719914e-05, "loss": 0.6147, "step": 1395 }, { "epoch": 0.18, "grad_norm": 0.5560809490384945, "learning_rate": 1.8868276759670184e-05, "loss": 0.6666, "step": 1396 }, { "epoch": 0.18, "grad_norm": 0.5344293350920667, "learning_rate": 1.886636406747922e-05, "loss": 0.6776, "step": 1397 }, { "epoch": 0.18, "grad_norm": 0.5018698334766174, "learning_rate": 1.8864449857474444e-05, "loss": 0.6162, "step": 1398 }, { "epoch": 0.18, "grad_norm": 0.5795851536688871, "learning_rate": 1.8862534129983546e-05, "loss": 0.6493, "step": 1399 }, { "epoch": 0.18, "grad_norm": 0.5678149283053312, "learning_rate": 1.886061688533448e-05, "loss": 0.6619, "step": 1400 }, { "epoch": 0.18, "grad_norm": 0.6284947281181761, "learning_rate": 1.8858698123855455e-05, "loss": 0.6681, "step": 1401 }, { "epoch": 0.18, "grad_norm": 0.5425052460829543, "learning_rate": 1.8856777845874937e-05, "loss": 0.6211, "step": 1402 }, { "epoch": 0.18, "grad_norm": 0.643256081971896, "learning_rate": 1.8854856051721652e-05, "loss": 0.6301, "step": 1403 }, { "epoch": 0.18, "grad_norm": 0.5407060504556074, "learning_rate": 1.8852932741724595e-05, "loss": 0.6232, "step": 1404 }, { "epoch": 0.18, "grad_norm": 0.5139359466059547, "learning_rate": 1.8851007916213007e-05, "loss": 0.6008, "step": 1405 }, { "epoch": 0.18, "grad_norm": 0.6273582403750702, "learning_rate": 1.8849081575516397e-05, "loss": 0.658, "step": 1406 }, { "epoch": 0.18, "grad_norm": 0.6656760731000932, "learning_rate": 1.8847153719964533e-05, "loss": 0.6838, "step": 1407 }, { "epoch": 0.18, "grad_norm": 0.5887292704567646, "learning_rate": 1.8845224349887433e-05, "loss": 0.6137, "step": 1408 }, { "epoch": 0.18, "grad_norm": 0.5522529738878121, "learning_rate": 1.8843293465615394e-05, "loss": 0.6493, "step": 1409 }, { "epoch": 0.18, "grad_norm": 0.6119199117719988, "learning_rate": 1.884136106747895e-05, "loss": 0.6098, "step": 1410 }, { "epoch": 0.18, "grad_norm": 0.600474374735567, "learning_rate": 1.8839427155808904e-05, "loss": 0.6197, "step": 1411 }, { "epoch": 0.18, "grad_norm": 0.7051328090603655, "learning_rate": 1.8837491730936327e-05, "loss": 0.6317, "step": 1412 }, { "epoch": 0.18, "grad_norm": 0.5687854292120865, "learning_rate": 1.8835554793192532e-05, "loss": 0.6098, "step": 1413 }, { "epoch": 0.18, "grad_norm": 0.6228876364327877, "learning_rate": 1.88336163429091e-05, "loss": 0.6155, "step": 1414 }, { "epoch": 0.18, "grad_norm": 0.5467978859127939, "learning_rate": 1.8831676380417874e-05, "loss": 0.6202, "step": 1415 }, { "epoch": 0.18, "grad_norm": 0.5883573274785632, "learning_rate": 1.8829734906050947e-05, "loss": 0.6329, "step": 1416 }, { "epoch": 0.18, "grad_norm": 0.5689803693694359, "learning_rate": 1.882779192014068e-05, "loss": 0.637, "step": 1417 }, { "epoch": 0.18, "grad_norm": 0.5585932965743513, "learning_rate": 1.882584742301969e-05, "loss": 0.6083, "step": 1418 }, { "epoch": 0.18, "grad_norm": 0.7173626296940976, "learning_rate": 1.882390141502085e-05, "loss": 0.6243, "step": 1419 }, { "epoch": 0.18, "grad_norm": 0.5899965185560394, "learning_rate": 1.8821953896477288e-05, "loss": 0.6297, "step": 1420 }, { "epoch": 0.18, "grad_norm": 0.6172260883135074, "learning_rate": 1.8820004867722398e-05, "loss": 0.647, "step": 1421 }, { "epoch": 0.18, "grad_norm": 0.5126829651456497, "learning_rate": 1.8818054329089833e-05, "loss": 0.625, "step": 1422 }, { "epoch": 0.18, "grad_norm": 0.6710882758055161, "learning_rate": 1.8816102280913496e-05, "loss": 0.6804, "step": 1423 }, { "epoch": 0.18, "grad_norm": 0.5670230867179545, "learning_rate": 1.8814148723527562e-05, "loss": 0.6116, "step": 1424 }, { "epoch": 0.18, "grad_norm": 0.5218640316253814, "learning_rate": 1.8812193657266454e-05, "loss": 0.5937, "step": 1425 }, { "epoch": 0.18, "grad_norm": 0.6792832204466268, "learning_rate": 1.8810237082464847e-05, "loss": 0.6421, "step": 1426 }, { "epoch": 0.18, "grad_norm": 0.5299403911465559, "learning_rate": 1.8808278999457692e-05, "loss": 0.6086, "step": 1427 }, { "epoch": 0.18, "grad_norm": 0.5533014483228473, "learning_rate": 1.8806319408580188e-05, "loss": 0.6597, "step": 1428 }, { "epoch": 0.18, "grad_norm": 0.5238332988632545, "learning_rate": 1.880435831016779e-05, "loss": 0.6025, "step": 1429 }, { "epoch": 0.18, "grad_norm": 0.5292057150254639, "learning_rate": 1.8802395704556216e-05, "loss": 0.6306, "step": 1430 }, { "epoch": 0.18, "grad_norm": 0.5428075425381064, "learning_rate": 1.880043159208144e-05, "loss": 0.6375, "step": 1431 }, { "epoch": 0.18, "grad_norm": 0.5264194519709843, "learning_rate": 1.879846597307969e-05, "loss": 0.6087, "step": 1432 }, { "epoch": 0.18, "grad_norm": 0.6459606335181071, "learning_rate": 1.8796498847887464e-05, "loss": 0.6517, "step": 1433 }, { "epoch": 0.18, "grad_norm": 0.5655760298813819, "learning_rate": 1.879453021684151e-05, "loss": 0.6442, "step": 1434 }, { "epoch": 0.18, "grad_norm": 0.5059865243556735, "learning_rate": 1.8792560080278822e-05, "loss": 0.6163, "step": 1435 }, { "epoch": 0.18, "grad_norm": 0.5532343560860383, "learning_rate": 1.8790588438536675e-05, "loss": 0.594, "step": 1436 }, { "epoch": 0.18, "grad_norm": 0.5261745965677036, "learning_rate": 1.8788615291952585e-05, "loss": 0.6087, "step": 1437 }, { "epoch": 0.18, "grad_norm": 0.5958949155430887, "learning_rate": 1.8786640640864332e-05, "loss": 0.6212, "step": 1438 }, { "epoch": 0.18, "grad_norm": 0.535115867092364, "learning_rate": 1.8784664485609957e-05, "loss": 0.6205, "step": 1439 }, { "epoch": 0.18, "grad_norm": 0.5958396034463561, "learning_rate": 1.8782686826527744e-05, "loss": 0.6208, "step": 1440 }, { "epoch": 0.18, "grad_norm": 0.6321481317543293, "learning_rate": 1.878070766395625e-05, "loss": 0.6618, "step": 1441 }, { "epoch": 0.18, "grad_norm": 0.5859907337737282, "learning_rate": 1.8778726998234286e-05, "loss": 0.6245, "step": 1442 }, { "epoch": 0.18, "grad_norm": 0.5549204561365563, "learning_rate": 1.877674482970091e-05, "loss": 0.6518, "step": 1443 }, { "epoch": 0.18, "grad_norm": 0.5517679979523747, "learning_rate": 1.8774761158695453e-05, "loss": 0.6323, "step": 1444 }, { "epoch": 0.18, "grad_norm": 0.5881066049240788, "learning_rate": 1.8772775985557494e-05, "loss": 0.666, "step": 1445 }, { "epoch": 0.18, "grad_norm": 0.5769163275500944, "learning_rate": 1.8770789310626867e-05, "loss": 0.7141, "step": 1446 }, { "epoch": 0.18, "grad_norm": 0.5298978980427295, "learning_rate": 1.876880113424367e-05, "loss": 0.6332, "step": 1447 }, { "epoch": 0.18, "grad_norm": 0.5279929585282681, "learning_rate": 1.876681145674825e-05, "loss": 0.6216, "step": 1448 }, { "epoch": 0.19, "grad_norm": 0.5531187607862984, "learning_rate": 1.876482027848122e-05, "loss": 0.6279, "step": 1449 }, { "epoch": 0.19, "grad_norm": 0.5911391241096329, "learning_rate": 1.8762827599783447e-05, "loss": 0.6526, "step": 1450 }, { "epoch": 0.19, "grad_norm": 0.7596349763567458, "learning_rate": 1.8760833420996048e-05, "loss": 0.6762, "step": 1451 }, { "epoch": 0.19, "grad_norm": 0.5626774433119267, "learning_rate": 1.8758837742460406e-05, "loss": 0.6281, "step": 1452 }, { "epoch": 0.19, "grad_norm": 0.5784372680036001, "learning_rate": 1.8756840564518155e-05, "loss": 0.6363, "step": 1453 }, { "epoch": 0.19, "grad_norm": 0.572051778700836, "learning_rate": 1.875484188751119e-05, "loss": 0.6378, "step": 1454 }, { "epoch": 0.19, "grad_norm": 0.5869391165030541, "learning_rate": 1.8752841711781653e-05, "loss": 0.6215, "step": 1455 }, { "epoch": 0.19, "grad_norm": 0.6391136472322632, "learning_rate": 1.8750840037671963e-05, "loss": 0.6369, "step": 1456 }, { "epoch": 0.19, "grad_norm": 0.7681368159500482, "learning_rate": 1.874883686552477e-05, "loss": 0.6389, "step": 1457 }, { "epoch": 0.19, "grad_norm": 0.6400412588115973, "learning_rate": 1.8746832195682997e-05, "loss": 0.6278, "step": 1458 }, { "epoch": 0.19, "grad_norm": 0.6209359547579127, "learning_rate": 1.8744826028489823e-05, "loss": 0.6242, "step": 1459 }, { "epoch": 0.19, "grad_norm": 0.5553900852414718, "learning_rate": 1.8742818364288673e-05, "loss": 0.5742, "step": 1460 }, { "epoch": 0.19, "grad_norm": 0.5576772108826272, "learning_rate": 1.874080920342324e-05, "loss": 0.5966, "step": 1461 }, { "epoch": 0.19, "grad_norm": 0.6673919188431316, "learning_rate": 1.8738798546237465e-05, "loss": 0.6485, "step": 1462 }, { "epoch": 0.19, "grad_norm": 0.5608198909872836, "learning_rate": 1.8736786393075544e-05, "loss": 0.6272, "step": 1463 }, { "epoch": 0.19, "grad_norm": 0.6217974347154406, "learning_rate": 1.8734772744281944e-05, "loss": 0.6606, "step": 1464 }, { "epoch": 0.19, "grad_norm": 0.5604257648122488, "learning_rate": 1.8732757600201374e-05, "loss": 0.6098, "step": 1465 }, { "epoch": 0.19, "grad_norm": 0.5020980773901051, "learning_rate": 1.873074096117879e-05, "loss": 0.5987, "step": 1466 }, { "epoch": 0.19, "grad_norm": 0.5576143748677121, "learning_rate": 1.8728722827559432e-05, "loss": 0.577, "step": 1467 }, { "epoch": 0.19, "grad_norm": 0.6050054125043601, "learning_rate": 1.8726703199688773e-05, "loss": 0.635, "step": 1468 }, { "epoch": 0.19, "grad_norm": 0.5423429522547489, "learning_rate": 1.8724682077912545e-05, "loss": 0.6244, "step": 1469 }, { "epoch": 0.19, "grad_norm": 0.5662955528258817, "learning_rate": 1.8722659462576748e-05, "loss": 0.6314, "step": 1470 }, { "epoch": 0.19, "grad_norm": 0.6303104497330108, "learning_rate": 1.872063535402762e-05, "loss": 0.6109, "step": 1471 }, { "epoch": 0.19, "grad_norm": 0.5630556013956962, "learning_rate": 1.8718609752611673e-05, "loss": 0.6421, "step": 1472 }, { "epoch": 0.19, "grad_norm": 0.5966219254856424, "learning_rate": 1.871658265867566e-05, "loss": 0.6535, "step": 1473 }, { "epoch": 0.19, "grad_norm": 0.6051447937275384, "learning_rate": 1.8714554072566593e-05, "loss": 0.6503, "step": 1474 }, { "epoch": 0.19, "grad_norm": 0.6379902221853186, "learning_rate": 1.8712523994631746e-05, "loss": 0.6902, "step": 1475 }, { "epoch": 0.19, "grad_norm": 0.6033450514864707, "learning_rate": 1.8710492425218642e-05, "loss": 0.6786, "step": 1476 }, { "epoch": 0.19, "grad_norm": 0.6326493876254347, "learning_rate": 1.870845936467506e-05, "loss": 0.6669, "step": 1477 }, { "epoch": 0.19, "grad_norm": 0.5833294390329256, "learning_rate": 1.870642481334904e-05, "loss": 0.6143, "step": 1478 }, { "epoch": 0.19, "grad_norm": 0.5553594878696059, "learning_rate": 1.870438877158886e-05, "loss": 0.612, "step": 1479 }, { "epoch": 0.19, "grad_norm": 0.5679438262796195, "learning_rate": 1.870235123974308e-05, "loss": 0.6025, "step": 1480 }, { "epoch": 0.19, "grad_norm": 0.584473602610593, "learning_rate": 1.8700312218160495e-05, "loss": 0.6249, "step": 1481 }, { "epoch": 0.19, "grad_norm": 0.7057256473756168, "learning_rate": 1.869827170719016e-05, "loss": 0.6691, "step": 1482 }, { "epoch": 0.19, "grad_norm": 0.543536332719835, "learning_rate": 1.8696229707181385e-05, "loss": 0.6269, "step": 1483 }, { "epoch": 0.19, "grad_norm": 0.5667354068311921, "learning_rate": 1.8694186218483736e-05, "loss": 0.6272, "step": 1484 }, { "epoch": 0.19, "grad_norm": 0.650978917065568, "learning_rate": 1.8692141241447035e-05, "loss": 0.6558, "step": 1485 }, { "epoch": 0.19, "grad_norm": 0.5022649121870059, "learning_rate": 1.869009477642136e-05, "loss": 0.6075, "step": 1486 }, { "epoch": 0.19, "grad_norm": 0.5499179954134339, "learning_rate": 1.868804682375703e-05, "loss": 0.6041, "step": 1487 }, { "epoch": 0.19, "grad_norm": 0.6253412112543569, "learning_rate": 1.8685997383804644e-05, "loss": 0.6314, "step": 1488 }, { "epoch": 0.19, "grad_norm": 0.6086858041999634, "learning_rate": 1.8683946456915032e-05, "loss": 0.6189, "step": 1489 }, { "epoch": 0.19, "grad_norm": 0.6833278147756404, "learning_rate": 1.8681894043439287e-05, "loss": 0.6406, "step": 1490 }, { "epoch": 0.19, "grad_norm": 0.5899492267228562, "learning_rate": 1.8679840143728765e-05, "loss": 0.6383, "step": 1491 }, { "epoch": 0.19, "grad_norm": 0.6455293255220325, "learning_rate": 1.867778475813506e-05, "loss": 0.6805, "step": 1492 }, { "epoch": 0.19, "grad_norm": 0.5500712434167088, "learning_rate": 1.8675727887010038e-05, "loss": 0.6254, "step": 1493 }, { "epoch": 0.19, "grad_norm": 0.6030478242676881, "learning_rate": 1.8673669530705802e-05, "loss": 0.6687, "step": 1494 }, { "epoch": 0.19, "grad_norm": 0.5334217159679151, "learning_rate": 1.867160968957472e-05, "loss": 0.6386, "step": 1495 }, { "epoch": 0.19, "grad_norm": 0.5913807990144109, "learning_rate": 1.8669548363969413e-05, "loss": 0.6523, "step": 1496 }, { "epoch": 0.19, "grad_norm": 0.5882226856874058, "learning_rate": 1.8667485554242754e-05, "loss": 0.6167, "step": 1497 }, { "epoch": 0.19, "grad_norm": 0.5714832766888439, "learning_rate": 1.8665421260747875e-05, "loss": 0.6438, "step": 1498 }, { "epoch": 0.19, "grad_norm": 0.6112128019254157, "learning_rate": 1.8663355483838154e-05, "loss": 0.6251, "step": 1499 }, { "epoch": 0.19, "grad_norm": 0.6316037786632913, "learning_rate": 1.866128822386722e-05, "loss": 0.6423, "step": 1500 }, { "epoch": 0.19, "grad_norm": 0.5596565790480954, "learning_rate": 1.865921948118898e-05, "loss": 0.6312, "step": 1501 }, { "epoch": 0.19, "grad_norm": 0.5730154909307077, "learning_rate": 1.865714925615756e-05, "loss": 0.6479, "step": 1502 }, { "epoch": 0.19, "grad_norm": 0.6255425543837304, "learning_rate": 1.8655077549127374e-05, "loss": 0.6437, "step": 1503 }, { "epoch": 0.19, "grad_norm": 0.5779093389919638, "learning_rate": 1.8653004360453058e-05, "loss": 0.6394, "step": 1504 }, { "epoch": 0.19, "grad_norm": 0.6255252076060233, "learning_rate": 1.8650929690489526e-05, "loss": 0.6402, "step": 1505 }, { "epoch": 0.19, "grad_norm": 0.5296722475728637, "learning_rate": 1.864885353959193e-05, "loss": 0.6066, "step": 1506 }, { "epoch": 0.19, "grad_norm": 0.5867030691222801, "learning_rate": 1.8646775908115685e-05, "loss": 0.6216, "step": 1507 }, { "epoch": 0.19, "grad_norm": 0.7530973726366255, "learning_rate": 1.864469679641646e-05, "loss": 0.6438, "step": 1508 }, { "epoch": 0.19, "grad_norm": 0.6722262162182403, "learning_rate": 1.864261620485017e-05, "loss": 0.6656, "step": 1509 }, { "epoch": 0.19, "grad_norm": 0.5925800077397011, "learning_rate": 1.8640534133772982e-05, "loss": 0.6007, "step": 1510 }, { "epoch": 0.19, "grad_norm": 0.6723369906132626, "learning_rate": 1.863845058354133e-05, "loss": 0.6173, "step": 1511 }, { "epoch": 0.19, "grad_norm": 0.5161398724480621, "learning_rate": 1.863636555451189e-05, "loss": 0.6004, "step": 1512 }, { "epoch": 0.19, "grad_norm": 0.5773593218509055, "learning_rate": 1.863427904704159e-05, "loss": 0.5922, "step": 1513 }, { "epoch": 0.19, "grad_norm": 0.5375652385868924, "learning_rate": 1.8632191061487615e-05, "loss": 0.6193, "step": 1514 }, { "epoch": 0.19, "grad_norm": 0.5924629370658344, "learning_rate": 1.863010159820741e-05, "loss": 0.6453, "step": 1515 }, { "epoch": 0.19, "grad_norm": 0.561436226627274, "learning_rate": 1.8628010657558656e-05, "loss": 0.5831, "step": 1516 }, { "epoch": 0.19, "grad_norm": 0.5823700429374025, "learning_rate": 1.8625918239899304e-05, "loss": 0.6068, "step": 1517 }, { "epoch": 0.19, "grad_norm": 0.5469415501373577, "learning_rate": 1.8623824345587547e-05, "loss": 0.6407, "step": 1518 }, { "epoch": 0.19, "grad_norm": 0.5516197249538982, "learning_rate": 1.8621728974981836e-05, "loss": 0.6561, "step": 1519 }, { "epoch": 0.19, "grad_norm": 0.48745605338831177, "learning_rate": 1.8619632128440872e-05, "loss": 0.6235, "step": 1520 }, { "epoch": 0.19, "grad_norm": 0.5506395153892385, "learning_rate": 1.8617533806323608e-05, "loss": 0.6484, "step": 1521 }, { "epoch": 0.19, "grad_norm": 0.5643287747500658, "learning_rate": 1.8615434008989253e-05, "loss": 0.6071, "step": 1522 }, { "epoch": 0.19, "grad_norm": 0.6951682096568911, "learning_rate": 1.861333273679727e-05, "loss": 0.6744, "step": 1523 }, { "epoch": 0.19, "grad_norm": 0.5084256933015897, "learning_rate": 1.8611229990107364e-05, "loss": 0.6345, "step": 1524 }, { "epoch": 0.19, "grad_norm": 0.47319836285189715, "learning_rate": 1.8609125769279507e-05, "loss": 0.6126, "step": 1525 }, { "epoch": 0.19, "grad_norm": 0.6692588360615718, "learning_rate": 1.860702007467391e-05, "loss": 0.643, "step": 1526 }, { "epoch": 0.2, "grad_norm": 0.5616405286334195, "learning_rate": 1.8604912906651048e-05, "loss": 0.638, "step": 1527 }, { "epoch": 0.2, "grad_norm": 0.527604359885704, "learning_rate": 1.8602804265571634e-05, "loss": 0.6357, "step": 1528 }, { "epoch": 0.2, "grad_norm": 0.5193466675191988, "learning_rate": 1.8600694151796652e-05, "loss": 0.6214, "step": 1529 }, { "epoch": 0.2, "grad_norm": 0.548909500615067, "learning_rate": 1.859858256568732e-05, "loss": 0.6628, "step": 1530 }, { "epoch": 0.2, "grad_norm": 0.535287237343712, "learning_rate": 1.8596469507605123e-05, "loss": 0.6315, "step": 1531 }, { "epoch": 0.2, "grad_norm": 0.5650178340694517, "learning_rate": 1.8594354977911786e-05, "loss": 0.6026, "step": 1532 }, { "epoch": 0.2, "grad_norm": 0.6155029997352515, "learning_rate": 1.859223897696929e-05, "loss": 0.6649, "step": 1533 }, { "epoch": 0.2, "grad_norm": 0.593179957296339, "learning_rate": 1.859012150513987e-05, "loss": 0.6166, "step": 1534 }, { "epoch": 0.2, "grad_norm": 0.523655848183261, "learning_rate": 1.8588002562786015e-05, "loss": 0.6145, "step": 1535 }, { "epoch": 0.2, "grad_norm": 0.507813507557162, "learning_rate": 1.8585882150270457e-05, "loss": 0.5844, "step": 1536 }, { "epoch": 0.2, "grad_norm": 0.5809280679606755, "learning_rate": 1.8583760267956187e-05, "loss": 0.6488, "step": 1537 }, { "epoch": 0.2, "grad_norm": 0.5967662872078815, "learning_rate": 1.8581636916206445e-05, "loss": 0.6259, "step": 1538 }, { "epoch": 0.2, "grad_norm": 0.6422006149399221, "learning_rate": 1.8579512095384722e-05, "loss": 0.6682, "step": 1539 }, { "epoch": 0.2, "grad_norm": 0.5692706409240271, "learning_rate": 1.8577385805854767e-05, "loss": 0.6313, "step": 1540 }, { "epoch": 0.2, "grad_norm": 0.5603667673553191, "learning_rate": 1.8575258047980567e-05, "loss": 0.6471, "step": 1541 }, { "epoch": 0.2, "grad_norm": 0.5957361675109042, "learning_rate": 1.8573128822126377e-05, "loss": 0.6446, "step": 1542 }, { "epoch": 0.2, "grad_norm": 0.5607167776014788, "learning_rate": 1.8570998128656685e-05, "loss": 0.6708, "step": 1543 }, { "epoch": 0.2, "grad_norm": 0.6476613362976893, "learning_rate": 1.8568865967936243e-05, "loss": 0.6814, "step": 1544 }, { "epoch": 0.2, "grad_norm": 0.6202081704821868, "learning_rate": 1.856673234033006e-05, "loss": 0.622, "step": 1545 }, { "epoch": 0.2, "grad_norm": 0.8056658722936172, "learning_rate": 1.8564597246203374e-05, "loss": 0.7027, "step": 1546 }, { "epoch": 0.2, "grad_norm": 0.5896389625728234, "learning_rate": 1.856246068592169e-05, "loss": 0.6276, "step": 1547 }, { "epoch": 0.2, "grad_norm": 0.629693649281368, "learning_rate": 1.856032265985077e-05, "loss": 0.6199, "step": 1548 }, { "epoch": 0.2, "grad_norm": 0.5312492767978184, "learning_rate": 1.8558183168356612e-05, "loss": 0.6097, "step": 1549 }, { "epoch": 0.2, "grad_norm": 0.4901977907707367, "learning_rate": 1.855604221180547e-05, "loss": 0.6017, "step": 1550 }, { "epoch": 0.2, "grad_norm": 0.5952138503056309, "learning_rate": 1.855389979056385e-05, "loss": 0.6574, "step": 1551 }, { "epoch": 0.2, "grad_norm": 0.6589849066666373, "learning_rate": 1.8551755904998506e-05, "loss": 0.6744, "step": 1552 }, { "epoch": 0.2, "grad_norm": 0.5422625280867557, "learning_rate": 1.8549610555476455e-05, "loss": 0.6072, "step": 1553 }, { "epoch": 0.2, "grad_norm": 0.6482349708893528, "learning_rate": 1.8547463742364944e-05, "loss": 0.6933, "step": 1554 }, { "epoch": 0.2, "grad_norm": 0.5278755373097663, "learning_rate": 1.8545315466031484e-05, "loss": 0.6141, "step": 1555 }, { "epoch": 0.2, "grad_norm": 0.5055846559697182, "learning_rate": 1.854316572684384e-05, "loss": 0.6089, "step": 1556 }, { "epoch": 0.2, "grad_norm": 0.5381512104318145, "learning_rate": 1.8541014525170015e-05, "loss": 0.607, "step": 1557 }, { "epoch": 0.2, "grad_norm": 0.5978499698185092, "learning_rate": 1.853886186137827e-05, "loss": 0.6305, "step": 1558 }, { "epoch": 0.2, "grad_norm": 0.6720895963960577, "learning_rate": 1.8536707735837116e-05, "loss": 0.6685, "step": 1559 }, { "epoch": 0.2, "grad_norm": 0.5518216657708591, "learning_rate": 1.853455214891531e-05, "loss": 0.6159, "step": 1560 }, { "epoch": 0.2, "grad_norm": 0.5388518486128981, "learning_rate": 1.8532395100981867e-05, "loss": 0.6173, "step": 1561 }, { "epoch": 0.2, "grad_norm": 0.5651344182681826, "learning_rate": 1.8530236592406044e-05, "loss": 0.6663, "step": 1562 }, { "epoch": 0.2, "grad_norm": 0.47970906885115455, "learning_rate": 1.8528076623557352e-05, "loss": 0.6025, "step": 1563 }, { "epoch": 0.2, "grad_norm": 0.5337073603467952, "learning_rate": 1.852591519480555e-05, "loss": 0.5919, "step": 1564 }, { "epoch": 0.2, "grad_norm": 0.4896118254590776, "learning_rate": 1.852375230652065e-05, "loss": 0.6175, "step": 1565 }, { "epoch": 0.2, "grad_norm": 0.49813448078389244, "learning_rate": 1.8521587959072916e-05, "loss": 0.6081, "step": 1566 }, { "epoch": 0.2, "grad_norm": 0.5108294900901587, "learning_rate": 1.8519422152832853e-05, "loss": 0.5979, "step": 1567 }, { "epoch": 0.2, "grad_norm": 0.5738822213087512, "learning_rate": 1.851725488817122e-05, "loss": 0.6421, "step": 1568 }, { "epoch": 0.2, "grad_norm": 0.5422108149803198, "learning_rate": 1.851508616545903e-05, "loss": 0.6148, "step": 1569 }, { "epoch": 0.2, "grad_norm": 0.49897712165812774, "learning_rate": 1.851291598506754e-05, "loss": 0.6065, "step": 1570 }, { "epoch": 0.2, "grad_norm": 0.5610748214395007, "learning_rate": 1.8510744347368258e-05, "loss": 0.6327, "step": 1571 }, { "epoch": 0.2, "grad_norm": 0.5932633582967327, "learning_rate": 1.8508571252732946e-05, "loss": 0.6412, "step": 1572 }, { "epoch": 0.2, "grad_norm": 0.5627872733610354, "learning_rate": 1.8506396701533607e-05, "loss": 0.6538, "step": 1573 }, { "epoch": 0.2, "grad_norm": 0.5401299649147219, "learning_rate": 1.8504220694142497e-05, "loss": 0.6275, "step": 1574 }, { "epoch": 0.2, "grad_norm": 0.5198949410855882, "learning_rate": 1.8502043230932126e-05, "loss": 0.6286, "step": 1575 }, { "epoch": 0.2, "grad_norm": 0.5017486258087722, "learning_rate": 1.8499864312275244e-05, "loss": 0.6029, "step": 1576 }, { "epoch": 0.2, "grad_norm": 0.5500833071616488, "learning_rate": 1.849768393854486e-05, "loss": 0.6234, "step": 1577 }, { "epoch": 0.2, "grad_norm": 0.634206588506586, "learning_rate": 1.849550211011423e-05, "loss": 0.658, "step": 1578 }, { "epoch": 0.2, "grad_norm": 0.6273901316663308, "learning_rate": 1.849331882735685e-05, "loss": 0.6611, "step": 1579 }, { "epoch": 0.2, "grad_norm": 0.539746237409198, "learning_rate": 1.8491134090646477e-05, "loss": 0.6462, "step": 1580 }, { "epoch": 0.2, "grad_norm": 0.5917242548420788, "learning_rate": 1.8488947900357104e-05, "loss": 0.6507, "step": 1581 }, { "epoch": 0.2, "grad_norm": 0.5496556644966076, "learning_rate": 1.8486760256862986e-05, "loss": 0.6026, "step": 1582 }, { "epoch": 0.2, "grad_norm": 0.5253872216304328, "learning_rate": 1.8484571160538622e-05, "loss": 0.6042, "step": 1583 }, { "epoch": 0.2, "grad_norm": 0.5122419391026367, "learning_rate": 1.8482380611758755e-05, "loss": 0.627, "step": 1584 }, { "epoch": 0.2, "grad_norm": 0.5025026408602866, "learning_rate": 1.848018861089838e-05, "loss": 0.6156, "step": 1585 }, { "epoch": 0.2, "grad_norm": 0.6186014956353143, "learning_rate": 1.8477995158332746e-05, "loss": 0.6514, "step": 1586 }, { "epoch": 0.2, "grad_norm": 0.5720824237289147, "learning_rate": 1.847580025443734e-05, "loss": 0.6404, "step": 1587 }, { "epoch": 0.2, "grad_norm": 1.774935652863824, "learning_rate": 1.8473603899587906e-05, "loss": 0.7017, "step": 1588 }, { "epoch": 0.2, "grad_norm": 0.5208876598328619, "learning_rate": 1.847140609416043e-05, "loss": 0.6198, "step": 1589 }, { "epoch": 0.2, "grad_norm": 0.534766887999107, "learning_rate": 1.8469206838531155e-05, "loss": 0.6449, "step": 1590 }, { "epoch": 0.2, "grad_norm": 0.5235199861951187, "learning_rate": 1.8467006133076565e-05, "loss": 0.6038, "step": 1591 }, { "epoch": 0.2, "grad_norm": 0.5596630738587999, "learning_rate": 1.8464803978173388e-05, "loss": 0.611, "step": 1592 }, { "epoch": 0.2, "grad_norm": 0.4620738621760588, "learning_rate": 1.8462600374198616e-05, "loss": 0.6014, "step": 1593 }, { "epoch": 0.2, "grad_norm": 0.6863674916608539, "learning_rate": 1.8460395321529468e-05, "loss": 0.6561, "step": 1594 }, { "epoch": 0.2, "grad_norm": 0.5821837584153328, "learning_rate": 1.8458188820543435e-05, "loss": 0.6268, "step": 1595 }, { "epoch": 0.2, "grad_norm": 0.6016769272584869, "learning_rate": 1.8455980871618234e-05, "loss": 0.6542, "step": 1596 }, { "epoch": 0.2, "grad_norm": 0.6056389577793185, "learning_rate": 1.845377147513184e-05, "loss": 0.6101, "step": 1597 }, { "epoch": 0.2, "grad_norm": 0.6447142604255399, "learning_rate": 1.8451560631462478e-05, "loss": 0.6327, "step": 1598 }, { "epoch": 0.2, "grad_norm": 0.6627146720022785, "learning_rate": 1.8449348340988615e-05, "loss": 0.6496, "step": 1599 }, { "epoch": 0.2, "grad_norm": 0.6101275521279869, "learning_rate": 1.844713460408897e-05, "loss": 0.6845, "step": 1600 }, { "epoch": 0.2, "grad_norm": 0.6476372841008373, "learning_rate": 1.844491942114251e-05, "loss": 0.6506, "step": 1601 }, { "epoch": 0.2, "grad_norm": 0.6242640293777719, "learning_rate": 1.8442702792528438e-05, "loss": 0.6155, "step": 1602 }, { "epoch": 0.2, "grad_norm": 0.6041988508805006, "learning_rate": 1.8440484718626222e-05, "loss": 0.6384, "step": 1603 }, { "epoch": 0.2, "grad_norm": 0.6483779765492061, "learning_rate": 1.8438265199815574e-05, "loss": 0.6394, "step": 1604 }, { "epoch": 0.21, "grad_norm": 0.5832679114643157, "learning_rate": 1.843604423647644e-05, "loss": 0.6291, "step": 1605 }, { "epoch": 0.21, "grad_norm": 0.6089430103561113, "learning_rate": 1.8433821828989018e-05, "loss": 0.6473, "step": 1606 }, { "epoch": 0.21, "grad_norm": 0.5596956469969059, "learning_rate": 1.843159797773377e-05, "loss": 0.6048, "step": 1607 }, { "epoch": 0.21, "grad_norm": 0.6158778454351389, "learning_rate": 1.842937268309138e-05, "loss": 0.6408, "step": 1608 }, { "epoch": 0.21, "grad_norm": 0.5557682087574927, "learning_rate": 1.8427145945442802e-05, "loss": 0.6522, "step": 1609 }, { "epoch": 0.21, "grad_norm": 0.4979881314982503, "learning_rate": 1.8424917765169222e-05, "loss": 0.5851, "step": 1610 }, { "epoch": 0.21, "grad_norm": 0.5718267345961218, "learning_rate": 1.8422688142652074e-05, "loss": 0.6047, "step": 1611 }, { "epoch": 0.21, "grad_norm": 0.5501940823468954, "learning_rate": 1.8420457078273047e-05, "loss": 0.6108, "step": 1612 }, { "epoch": 0.21, "grad_norm": 0.6828402089758681, "learning_rate": 1.8418224572414073e-05, "loss": 0.6524, "step": 1613 }, { "epoch": 0.21, "grad_norm": 0.6431849459309236, "learning_rate": 1.841599062545732e-05, "loss": 0.6606, "step": 1614 }, { "epoch": 0.21, "grad_norm": 0.5586635479204848, "learning_rate": 1.8413755237785228e-05, "loss": 0.6504, "step": 1615 }, { "epoch": 0.21, "grad_norm": 0.5273540990398083, "learning_rate": 1.8411518409780456e-05, "loss": 0.653, "step": 1616 }, { "epoch": 0.21, "grad_norm": 0.538848003571195, "learning_rate": 1.8409280141825925e-05, "loss": 0.6191, "step": 1617 }, { "epoch": 0.21, "grad_norm": 0.6372032212313166, "learning_rate": 1.8407040434304805e-05, "loss": 0.6351, "step": 1618 }, { "epoch": 0.21, "grad_norm": 0.6207318056641634, "learning_rate": 1.8404799287600494e-05, "loss": 0.6553, "step": 1619 }, { "epoch": 0.21, "grad_norm": 0.568486562289631, "learning_rate": 1.840255670209666e-05, "loss": 0.6194, "step": 1620 }, { "epoch": 0.21, "grad_norm": 1.1485235200878299, "learning_rate": 1.8400312678177207e-05, "loss": 0.6411, "step": 1621 }, { "epoch": 0.21, "grad_norm": 0.5289697207835532, "learning_rate": 1.8398067216226277e-05, "loss": 0.5908, "step": 1622 }, { "epoch": 0.21, "grad_norm": 0.5106756979867081, "learning_rate": 1.8395820316628274e-05, "loss": 0.6371, "step": 1623 }, { "epoch": 0.21, "grad_norm": 0.6841922910011513, "learning_rate": 1.839357197976783e-05, "loss": 0.6495, "step": 1624 }, { "epoch": 0.21, "grad_norm": 0.5747851871083437, "learning_rate": 1.8391322206029846e-05, "loss": 0.6446, "step": 1625 }, { "epoch": 0.21, "grad_norm": 0.5304021111827104, "learning_rate": 1.8389070995799442e-05, "loss": 0.6194, "step": 1626 }, { "epoch": 0.21, "grad_norm": 0.6751578355142989, "learning_rate": 1.8386818349462007e-05, "loss": 0.6537, "step": 1627 }, { "epoch": 0.21, "grad_norm": 0.5994260524964138, "learning_rate": 1.8384564267403163e-05, "loss": 0.6047, "step": 1628 }, { "epoch": 0.21, "grad_norm": 0.6079972316864195, "learning_rate": 1.8382308750008783e-05, "loss": 0.6401, "step": 1629 }, { "epoch": 0.21, "grad_norm": 0.6079105518588159, "learning_rate": 1.8380051797664987e-05, "loss": 0.6396, "step": 1630 }, { "epoch": 0.21, "grad_norm": 0.5747847696991547, "learning_rate": 1.8377793410758128e-05, "loss": 0.6395, "step": 1631 }, { "epoch": 0.21, "grad_norm": 0.6101055134234589, "learning_rate": 1.8375533589674828e-05, "loss": 0.6717, "step": 1632 }, { "epoch": 0.21, "grad_norm": 0.5950203833781329, "learning_rate": 1.837327233480193e-05, "loss": 0.6283, "step": 1633 }, { "epoch": 0.21, "grad_norm": 0.5727234290489641, "learning_rate": 1.837100964652654e-05, "loss": 0.6715, "step": 1634 }, { "epoch": 0.21, "grad_norm": 0.6487144438940534, "learning_rate": 1.8368745525235995e-05, "loss": 0.6627, "step": 1635 }, { "epoch": 0.21, "grad_norm": 0.5225442064975814, "learning_rate": 1.8366479971317895e-05, "loss": 0.6385, "step": 1636 }, { "epoch": 0.21, "grad_norm": 0.629578046223506, "learning_rate": 1.836421298516007e-05, "loss": 0.6543, "step": 1637 }, { "epoch": 0.21, "grad_norm": 0.6029343558205166, "learning_rate": 1.8361944567150596e-05, "loss": 0.6236, "step": 1638 }, { "epoch": 0.21, "grad_norm": 0.5648284410441935, "learning_rate": 1.8359674717677808e-05, "loss": 0.6148, "step": 1639 }, { "epoch": 0.21, "grad_norm": 0.5994342353656518, "learning_rate": 1.835740343713027e-05, "loss": 0.6117, "step": 1640 }, { "epoch": 0.21, "grad_norm": 0.6537468001228051, "learning_rate": 1.83551307258968e-05, "loss": 0.6323, "step": 1641 }, { "epoch": 0.21, "grad_norm": 0.6022332275713319, "learning_rate": 1.835285658436646e-05, "loss": 0.6052, "step": 1642 }, { "epoch": 0.21, "grad_norm": 0.6237525565398535, "learning_rate": 1.8350581012928553e-05, "loss": 0.6381, "step": 1643 }, { "epoch": 0.21, "grad_norm": 0.5774724783631247, "learning_rate": 1.8348304011972627e-05, "loss": 0.5923, "step": 1644 }, { "epoch": 0.21, "grad_norm": 0.6758541715599107, "learning_rate": 1.8346025581888482e-05, "loss": 0.6828, "step": 1645 }, { "epoch": 0.21, "grad_norm": 0.609616682884901, "learning_rate": 1.8343745723066157e-05, "loss": 0.6129, "step": 1646 }, { "epoch": 0.21, "grad_norm": 0.5260650548605103, "learning_rate": 1.8341464435895933e-05, "loss": 0.6109, "step": 1647 }, { "epoch": 0.21, "grad_norm": 0.5571289170477226, "learning_rate": 1.833918172076834e-05, "loss": 0.618, "step": 1648 }, { "epoch": 0.21, "grad_norm": 0.6459453574760148, "learning_rate": 1.8336897578074148e-05, "loss": 0.5986, "step": 1649 }, { "epoch": 0.21, "grad_norm": 0.6789602845956173, "learning_rate": 1.8334612008204383e-05, "loss": 0.6421, "step": 1650 }, { "epoch": 0.21, "grad_norm": 0.543698009872309, "learning_rate": 1.83323250115503e-05, "loss": 0.6255, "step": 1651 }, { "epoch": 0.21, "grad_norm": 0.5941056681136304, "learning_rate": 1.8330036588503405e-05, "loss": 0.6283, "step": 1652 }, { "epoch": 0.21, "grad_norm": 0.5103879034106159, "learning_rate": 1.832774673945545e-05, "loss": 0.6262, "step": 1653 }, { "epoch": 0.21, "grad_norm": 0.5587918513052277, "learning_rate": 1.832545546479843e-05, "loss": 0.6134, "step": 1654 }, { "epoch": 0.21, "grad_norm": 0.5269720468605689, "learning_rate": 1.832316276492458e-05, "loss": 0.6344, "step": 1655 }, { "epoch": 0.21, "grad_norm": 0.6982760574841469, "learning_rate": 1.832086864022639e-05, "loss": 0.6527, "step": 1656 }, { "epoch": 0.21, "grad_norm": 0.588581627956681, "learning_rate": 1.8318573091096578e-05, "loss": 0.6225, "step": 1657 }, { "epoch": 0.21, "grad_norm": 0.5708428632106576, "learning_rate": 1.8316276117928115e-05, "loss": 0.6135, "step": 1658 }, { "epoch": 0.21, "grad_norm": 0.5486391686009757, "learning_rate": 1.831397772111422e-05, "loss": 0.6174, "step": 1659 }, { "epoch": 0.21, "grad_norm": 0.5565558611735126, "learning_rate": 1.8311677901048347e-05, "loss": 0.6081, "step": 1660 }, { "epoch": 0.21, "grad_norm": 0.7203000771292108, "learning_rate": 1.8309376658124196e-05, "loss": 0.6228, "step": 1661 }, { "epoch": 0.21, "grad_norm": 0.6335165612734619, "learning_rate": 1.830707399273572e-05, "loss": 0.6679, "step": 1662 }, { "epoch": 0.21, "grad_norm": 0.5339822337617066, "learning_rate": 1.8304769905277094e-05, "loss": 0.6548, "step": 1663 }, { "epoch": 0.21, "grad_norm": 0.5443872048265019, "learning_rate": 1.8302464396142762e-05, "loss": 0.6429, "step": 1664 }, { "epoch": 0.21, "grad_norm": 0.5282274563319265, "learning_rate": 1.830015746572739e-05, "loss": 0.6531, "step": 1665 }, { "epoch": 0.21, "grad_norm": 0.5046951157681387, "learning_rate": 1.82978491144259e-05, "loss": 0.6199, "step": 1666 }, { "epoch": 0.21, "grad_norm": 0.46510531716160824, "learning_rate": 1.8295539342633458e-05, "loss": 0.6081, "step": 1667 }, { "epoch": 0.21, "grad_norm": 0.5898837561351287, "learning_rate": 1.8293228150745467e-05, "loss": 0.6434, "step": 1668 }, { "epoch": 0.21, "grad_norm": 0.5491494597146959, "learning_rate": 1.8290915539157568e-05, "loss": 0.6221, "step": 1669 }, { "epoch": 0.21, "grad_norm": 0.5442055134733451, "learning_rate": 1.8288601508265656e-05, "loss": 0.6411, "step": 1670 }, { "epoch": 0.21, "grad_norm": 0.5368692818271195, "learning_rate": 1.8286286058465873e-05, "loss": 0.6184, "step": 1671 }, { "epoch": 0.21, "grad_norm": 0.5226676592997229, "learning_rate": 1.8283969190154582e-05, "loss": 0.6311, "step": 1672 }, { "epoch": 0.21, "grad_norm": 0.480717576634785, "learning_rate": 1.8281650903728415e-05, "loss": 0.6344, "step": 1673 }, { "epoch": 0.21, "grad_norm": 0.5285946957197298, "learning_rate": 1.8279331199584226e-05, "loss": 0.5849, "step": 1674 }, { "epoch": 0.21, "grad_norm": 0.6087471206951219, "learning_rate": 1.8277010078119123e-05, "loss": 0.6619, "step": 1675 }, { "epoch": 0.21, "grad_norm": 0.4768170054529353, "learning_rate": 1.8274687539730455e-05, "loss": 0.5997, "step": 1676 }, { "epoch": 0.21, "grad_norm": 0.5858722337242418, "learning_rate": 1.827236358481581e-05, "loss": 0.6424, "step": 1677 }, { "epoch": 0.21, "grad_norm": 0.6277401946762841, "learning_rate": 1.8270038213773027e-05, "loss": 0.6653, "step": 1678 }, { "epoch": 0.21, "grad_norm": 0.643059212955951, "learning_rate": 1.8267711427000172e-05, "loss": 0.6688, "step": 1679 }, { "epoch": 0.21, "grad_norm": 0.5475448976602079, "learning_rate": 1.826538322489557e-05, "loss": 0.6369, "step": 1680 }, { "epoch": 0.21, "grad_norm": 0.6003646579377926, "learning_rate": 1.8263053607857775e-05, "loss": 0.6077, "step": 1681 }, { "epoch": 0.21, "grad_norm": 0.5002548584144602, "learning_rate": 1.8260722576285594e-05, "loss": 0.5966, "step": 1682 }, { "epoch": 0.21, "grad_norm": 0.5655063530612826, "learning_rate": 1.825839013057807e-05, "loss": 0.6498, "step": 1683 }, { "epoch": 0.22, "grad_norm": 0.6357016718267096, "learning_rate": 1.8256056271134487e-05, "loss": 0.6421, "step": 1684 }, { "epoch": 0.22, "grad_norm": 0.5649816040641066, "learning_rate": 1.825372099835438e-05, "loss": 0.625, "step": 1685 }, { "epoch": 0.22, "grad_norm": 0.6084975140795676, "learning_rate": 1.825138431263751e-05, "loss": 0.6474, "step": 1686 }, { "epoch": 0.22, "grad_norm": 0.5330498191257006, "learning_rate": 1.8249046214383893e-05, "loss": 0.593, "step": 1687 }, { "epoch": 0.22, "grad_norm": 0.5031621645394293, "learning_rate": 1.8246706703993788e-05, "loss": 0.6298, "step": 1688 }, { "epoch": 0.22, "grad_norm": 0.5002164104320136, "learning_rate": 1.8244365781867682e-05, "loss": 0.6167, "step": 1689 }, { "epoch": 0.22, "grad_norm": 0.5393024963907074, "learning_rate": 1.8242023448406322e-05, "loss": 0.6195, "step": 1690 }, { "epoch": 0.22, "grad_norm": 0.6392324776115572, "learning_rate": 1.823967970401068e-05, "loss": 0.6, "step": 1691 }, { "epoch": 0.22, "grad_norm": 0.5906578776679292, "learning_rate": 1.8237334549081975e-05, "loss": 0.6544, "step": 1692 }, { "epoch": 0.22, "grad_norm": 0.6189726783196037, "learning_rate": 1.8234987984021673e-05, "loss": 0.6678, "step": 1693 }, { "epoch": 0.22, "grad_norm": 0.6179457869553616, "learning_rate": 1.8232640009231485e-05, "loss": 0.6078, "step": 1694 }, { "epoch": 0.22, "grad_norm": 0.7158530034536379, "learning_rate": 1.823029062511334e-05, "loss": 0.6848, "step": 1695 }, { "epoch": 0.22, "grad_norm": 0.7364379704674675, "learning_rate": 1.8227939832069435e-05, "loss": 0.6679, "step": 1696 }, { "epoch": 0.22, "grad_norm": 0.49383950223018586, "learning_rate": 1.8225587630502193e-05, "loss": 0.6161, "step": 1697 }, { "epoch": 0.22, "grad_norm": 0.6296344301642101, "learning_rate": 1.822323402081428e-05, "loss": 0.6368, "step": 1698 }, { "epoch": 0.22, "grad_norm": 0.5295436794099758, "learning_rate": 1.8220879003408616e-05, "loss": 0.6049, "step": 1699 }, { "epoch": 0.22, "grad_norm": 0.5072272883269056, "learning_rate": 1.821852257868834e-05, "loss": 0.6226, "step": 1700 }, { "epoch": 0.22, "grad_norm": 0.5745701589397315, "learning_rate": 1.8216164747056847e-05, "loss": 0.6238, "step": 1701 }, { "epoch": 0.22, "grad_norm": 0.5939142672418072, "learning_rate": 1.8213805508917773e-05, "loss": 0.6317, "step": 1702 }, { "epoch": 0.22, "grad_norm": 0.5315604187019456, "learning_rate": 1.8211444864674984e-05, "loss": 0.641, "step": 1703 }, { "epoch": 0.22, "grad_norm": 0.5692266682401671, "learning_rate": 1.82090828147326e-05, "loss": 0.6351, "step": 1704 }, { "epoch": 0.22, "grad_norm": 0.7002075916032096, "learning_rate": 1.8206719359494967e-05, "loss": 0.6719, "step": 1705 }, { "epoch": 0.22, "grad_norm": 0.6806107315050938, "learning_rate": 1.8204354499366687e-05, "loss": 0.7062, "step": 1706 }, { "epoch": 0.22, "grad_norm": 0.6074609340170308, "learning_rate": 1.8201988234752596e-05, "loss": 0.6231, "step": 1707 }, { "epoch": 0.22, "grad_norm": 0.5193951937082981, "learning_rate": 1.8199620566057766e-05, "loss": 0.6126, "step": 1708 }, { "epoch": 0.22, "grad_norm": 0.5796105246680183, "learning_rate": 1.8197251493687514e-05, "loss": 0.6057, "step": 1709 }, { "epoch": 0.22, "grad_norm": 0.6672333360530085, "learning_rate": 1.8194881018047398e-05, "loss": 0.7063, "step": 1710 }, { "epoch": 0.22, "grad_norm": 0.5639873810546713, "learning_rate": 1.819250913954321e-05, "loss": 0.6054, "step": 1711 }, { "epoch": 0.22, "grad_norm": 0.5583280568718185, "learning_rate": 1.8190135858580994e-05, "loss": 0.6051, "step": 1712 }, { "epoch": 0.22, "grad_norm": 0.6268021602281456, "learning_rate": 1.818776117556702e-05, "loss": 0.616, "step": 1713 }, { "epoch": 0.22, "grad_norm": 0.5773606653796658, "learning_rate": 1.818538509090781e-05, "loss": 0.6356, "step": 1714 }, { "epoch": 0.22, "grad_norm": 0.5030589389319037, "learning_rate": 1.818300760501012e-05, "loss": 0.6045, "step": 1715 }, { "epoch": 0.22, "grad_norm": 0.5299152340089437, "learning_rate": 1.818062871828094e-05, "loss": 0.634, "step": 1716 }, { "epoch": 0.22, "grad_norm": 0.5198632031486815, "learning_rate": 1.8178248431127518e-05, "loss": 0.5997, "step": 1717 }, { "epoch": 0.22, "grad_norm": 0.5313431639622628, "learning_rate": 1.8175866743957322e-05, "loss": 0.6401, "step": 1718 }, { "epoch": 0.22, "grad_norm": 0.6527728588831384, "learning_rate": 1.817348365717807e-05, "loss": 0.6759, "step": 1719 }, { "epoch": 0.22, "grad_norm": 0.5159213248478756, "learning_rate": 1.8171099171197715e-05, "loss": 0.5978, "step": 1720 }, { "epoch": 0.22, "grad_norm": 0.5766238514754876, "learning_rate": 1.816871328642446e-05, "loss": 0.6667, "step": 1721 }, { "epoch": 0.22, "grad_norm": 0.5249498186186584, "learning_rate": 1.816632600326673e-05, "loss": 0.5906, "step": 1722 }, { "epoch": 0.22, "grad_norm": 0.5165940758789814, "learning_rate": 1.8163937322133205e-05, "loss": 0.6208, "step": 1723 }, { "epoch": 0.22, "grad_norm": 0.5079142498107807, "learning_rate": 1.8161547243432794e-05, "loss": 0.6394, "step": 1724 }, { "epoch": 0.22, "grad_norm": 0.5868657905704275, "learning_rate": 1.815915576757465e-05, "loss": 0.6409, "step": 1725 }, { "epoch": 0.22, "grad_norm": 0.5093391242070887, "learning_rate": 1.8156762894968173e-05, "loss": 0.6334, "step": 1726 }, { "epoch": 0.22, "grad_norm": 0.5566173761530675, "learning_rate": 1.8154368626022982e-05, "loss": 0.66, "step": 1727 }, { "epoch": 0.22, "grad_norm": 0.5522597973272819, "learning_rate": 1.815197296114895e-05, "loss": 0.6504, "step": 1728 }, { "epoch": 0.22, "grad_norm": 0.4936520061292225, "learning_rate": 1.8149575900756193e-05, "loss": 0.6363, "step": 1729 }, { "epoch": 0.22, "grad_norm": 0.5801438271674864, "learning_rate": 1.8147177445255047e-05, "loss": 0.6098, "step": 1730 }, { "epoch": 0.22, "grad_norm": 0.5622781587368962, "learning_rate": 1.814477759505611e-05, "loss": 0.5775, "step": 1731 }, { "epoch": 0.22, "grad_norm": 1.7231301217731925, "learning_rate": 1.8142376350570198e-05, "loss": 0.6463, "step": 1732 }, { "epoch": 0.22, "grad_norm": 0.6198618425595324, "learning_rate": 1.8139973712208377e-05, "loss": 0.6389, "step": 1733 }, { "epoch": 0.22, "grad_norm": 0.5825404159331514, "learning_rate": 1.813756968038196e-05, "loss": 0.6516, "step": 1734 }, { "epoch": 0.22, "grad_norm": 0.5672592657430666, "learning_rate": 1.8135164255502468e-05, "loss": 0.6604, "step": 1735 }, { "epoch": 0.22, "grad_norm": 0.501617874576185, "learning_rate": 1.81327574379817e-05, "loss": 0.5964, "step": 1736 }, { "epoch": 0.22, "grad_norm": 0.6557161553075437, "learning_rate": 1.8130349228231658e-05, "loss": 0.6516, "step": 1737 }, { "epoch": 0.22, "grad_norm": 0.5448987298546855, "learning_rate": 1.812793962666461e-05, "loss": 0.6102, "step": 1738 }, { "epoch": 0.22, "grad_norm": 0.6606128936353627, "learning_rate": 1.8125528633693047e-05, "loss": 0.6716, "step": 1739 }, { "epoch": 0.22, "grad_norm": 0.5681845157710822, "learning_rate": 1.8123116249729697e-05, "loss": 0.6203, "step": 1740 }, { "epoch": 0.22, "grad_norm": 0.5100778096525558, "learning_rate": 1.812070247518754e-05, "loss": 0.608, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.557019079629742, "learning_rate": 1.8118287310479778e-05, "loss": 0.622, "step": 1742 }, { "epoch": 0.22, "grad_norm": 0.648603815894894, "learning_rate": 1.8115870756019855e-05, "loss": 0.6351, "step": 1743 }, { "epoch": 0.22, "grad_norm": 0.508402405379392, "learning_rate": 1.811345281222147e-05, "loss": 0.6333, "step": 1744 }, { "epoch": 0.22, "grad_norm": 0.5812771152287893, "learning_rate": 1.8111033479498525e-05, "loss": 0.6128, "step": 1745 }, { "epoch": 0.22, "grad_norm": 0.4665551813189307, "learning_rate": 1.81086127582652e-05, "loss": 0.6337, "step": 1746 }, { "epoch": 0.22, "grad_norm": 0.5701145036176798, "learning_rate": 1.8106190648935877e-05, "loss": 0.637, "step": 1747 }, { "epoch": 0.22, "grad_norm": 0.5130745884873967, "learning_rate": 1.8103767151925206e-05, "loss": 0.6192, "step": 1748 }, { "epoch": 0.22, "grad_norm": 0.5131270776799072, "learning_rate": 1.8101342267648046e-05, "loss": 0.6044, "step": 1749 }, { "epoch": 0.22, "grad_norm": 0.4816811666423447, "learning_rate": 1.8098915996519522e-05, "loss": 0.5731, "step": 1750 }, { "epoch": 0.22, "grad_norm": 0.5206048943276599, "learning_rate": 1.8096488338954968e-05, "loss": 0.6303, "step": 1751 }, { "epoch": 0.22, "grad_norm": 0.5105165405003427, "learning_rate": 1.8094059295369982e-05, "loss": 0.6109, "step": 1752 }, { "epoch": 0.22, "grad_norm": 0.6043568228421407, "learning_rate": 1.809162886618038e-05, "loss": 0.6184, "step": 1753 }, { "epoch": 0.22, "grad_norm": 0.5622965509877583, "learning_rate": 1.8089197051802222e-05, "loss": 0.6275, "step": 1754 }, { "epoch": 0.22, "grad_norm": 0.557485095574559, "learning_rate": 1.808676385265181e-05, "loss": 0.6573, "step": 1755 }, { "epoch": 0.22, "grad_norm": 0.490704000175524, "learning_rate": 1.8084329269145673e-05, "loss": 0.6134, "step": 1756 }, { "epoch": 0.22, "grad_norm": 0.5137108991869143, "learning_rate": 1.808189330170058e-05, "loss": 0.5935, "step": 1757 }, { "epoch": 0.22, "grad_norm": 0.6512597030121441, "learning_rate": 1.807945595073355e-05, "loss": 0.6554, "step": 1758 }, { "epoch": 0.22, "grad_norm": 0.6146999013987364, "learning_rate": 1.8077017216661815e-05, "loss": 0.6888, "step": 1759 }, { "epoch": 0.22, "grad_norm": 0.5384738216706417, "learning_rate": 1.8074577099902862e-05, "loss": 0.6205, "step": 1760 }, { "epoch": 0.22, "grad_norm": 0.6710665596860607, "learning_rate": 1.807213560087441e-05, "loss": 0.6384, "step": 1761 }, { "epoch": 0.23, "grad_norm": 0.5202410495102456, "learning_rate": 1.8069692719994413e-05, "loss": 0.6133, "step": 1762 }, { "epoch": 0.23, "grad_norm": 0.6213850266900439, "learning_rate": 1.8067248457681066e-05, "loss": 0.6587, "step": 1763 }, { "epoch": 0.23, "grad_norm": 0.539781433115645, "learning_rate": 1.8064802814352792e-05, "loss": 0.6044, "step": 1764 }, { "epoch": 0.23, "grad_norm": 0.5393446756857904, "learning_rate": 1.8062355790428258e-05, "loss": 0.6232, "step": 1765 }, { "epoch": 0.23, "grad_norm": 0.593252084608135, "learning_rate": 1.8059907386326366e-05, "loss": 0.65, "step": 1766 }, { "epoch": 0.23, "grad_norm": 0.5164944661644124, "learning_rate": 1.8057457602466248e-05, "loss": 0.6265, "step": 1767 }, { "epoch": 0.23, "grad_norm": 0.5462515445747084, "learning_rate": 1.805500643926728e-05, "loss": 0.6322, "step": 1768 }, { "epoch": 0.23, "grad_norm": 0.5508739526115096, "learning_rate": 1.8052553897149077e-05, "loss": 0.6327, "step": 1769 }, { "epoch": 0.23, "grad_norm": 0.4857226803161464, "learning_rate": 1.8050099976531473e-05, "loss": 0.5874, "step": 1770 }, { "epoch": 0.23, "grad_norm": 0.5285442791891795, "learning_rate": 1.8047644677834562e-05, "loss": 0.6256, "step": 1771 }, { "epoch": 0.23, "grad_norm": 0.554639166704424, "learning_rate": 1.804518800147865e-05, "loss": 0.6461, "step": 1772 }, { "epoch": 0.23, "grad_norm": 0.6580456380239877, "learning_rate": 1.8042729947884297e-05, "loss": 0.6567, "step": 1773 }, { "epoch": 0.23, "grad_norm": 0.5455381900336208, "learning_rate": 1.804027051747229e-05, "loss": 0.6368, "step": 1774 }, { "epoch": 0.23, "grad_norm": 0.5506550313728844, "learning_rate": 1.8037809710663655e-05, "loss": 0.6679, "step": 1775 }, { "epoch": 0.23, "grad_norm": 0.6411817053775558, "learning_rate": 1.803534752787965e-05, "loss": 0.6953, "step": 1776 }, { "epoch": 0.23, "grad_norm": 0.5462674444196697, "learning_rate": 1.803288396954177e-05, "loss": 0.6221, "step": 1777 }, { "epoch": 0.23, "grad_norm": 0.4906171108832708, "learning_rate": 1.803041903607175e-05, "loss": 0.5974, "step": 1778 }, { "epoch": 0.23, "grad_norm": 0.6147316802252505, "learning_rate": 1.8027952727891552e-05, "loss": 0.6437, "step": 1779 }, { "epoch": 0.23, "grad_norm": 0.562929019741507, "learning_rate": 1.8025485045423386e-05, "loss": 0.6475, "step": 1780 }, { "epoch": 0.23, "grad_norm": 0.6292346968119322, "learning_rate": 1.8023015989089677e-05, "loss": 0.6645, "step": 1781 }, { "epoch": 0.23, "grad_norm": 0.5831453093576444, "learning_rate": 1.802054555931311e-05, "loss": 0.6251, "step": 1782 }, { "epoch": 0.23, "grad_norm": 0.471624899597013, "learning_rate": 1.801807375651658e-05, "loss": 0.6182, "step": 1783 }, { "epoch": 0.23, "grad_norm": 0.5303576970841262, "learning_rate": 1.801560058112324e-05, "loss": 0.5977, "step": 1784 }, { "epoch": 0.23, "grad_norm": 0.5516548614585893, "learning_rate": 1.8013126033556467e-05, "loss": 0.6144, "step": 1785 }, { "epoch": 0.23, "grad_norm": 0.6491660919834973, "learning_rate": 1.801065011423987e-05, "loss": 0.6555, "step": 1786 }, { "epoch": 0.23, "grad_norm": 0.540092345963165, "learning_rate": 1.8008172823597295e-05, "loss": 0.6126, "step": 1787 }, { "epoch": 0.23, "grad_norm": 0.5804632832357682, "learning_rate": 1.8005694162052825e-05, "loss": 0.6542, "step": 1788 }, { "epoch": 0.23, "grad_norm": 0.5888982827120162, "learning_rate": 1.8003214130030778e-05, "loss": 0.6046, "step": 1789 }, { "epoch": 0.23, "grad_norm": 0.5606552246222749, "learning_rate": 1.8000732727955707e-05, "loss": 0.6393, "step": 1790 }, { "epoch": 0.23, "grad_norm": 0.6120833787034671, "learning_rate": 1.7998249956252395e-05, "loss": 0.6466, "step": 1791 }, { "epoch": 0.23, "grad_norm": 0.5449039655121856, "learning_rate": 1.799576581534586e-05, "loss": 0.6582, "step": 1792 }, { "epoch": 0.23, "grad_norm": 0.6102498614469934, "learning_rate": 1.7993280305661364e-05, "loss": 0.6236, "step": 1793 }, { "epoch": 0.23, "grad_norm": 0.5425984426296385, "learning_rate": 1.799079342762439e-05, "loss": 0.5961, "step": 1794 }, { "epoch": 0.23, "grad_norm": 0.5242166731420658, "learning_rate": 1.7988305181660665e-05, "loss": 0.6111, "step": 1795 }, { "epoch": 0.23, "grad_norm": 0.5173281898845589, "learning_rate": 1.7985815568196143e-05, "loss": 0.6198, "step": 1796 }, { "epoch": 0.23, "grad_norm": 0.6014915709337776, "learning_rate": 1.798332458765702e-05, "loss": 0.6183, "step": 1797 }, { "epoch": 0.23, "grad_norm": 0.5178726294032768, "learning_rate": 1.7980832240469716e-05, "loss": 0.5871, "step": 1798 }, { "epoch": 0.23, "grad_norm": 0.5247208255895541, "learning_rate": 1.7978338527060892e-05, "loss": 0.6326, "step": 1799 }, { "epoch": 0.23, "grad_norm": 0.5189377261267836, "learning_rate": 1.7975843447857445e-05, "loss": 0.6083, "step": 1800 }, { "epoch": 0.23, "grad_norm": 0.49921034323485486, "learning_rate": 1.7973347003286503e-05, "loss": 0.6124, "step": 1801 }, { "epoch": 0.23, "grad_norm": 0.5181736526665205, "learning_rate": 1.797084919377542e-05, "loss": 0.5914, "step": 1802 }, { "epoch": 0.23, "grad_norm": 0.5327900176892061, "learning_rate": 1.7968350019751797e-05, "loss": 0.6135, "step": 1803 }, { "epoch": 0.23, "grad_norm": 0.5916811076285594, "learning_rate": 1.796584948164346e-05, "loss": 0.6592, "step": 1804 }, { "epoch": 0.23, "grad_norm": 0.48664951759172215, "learning_rate": 1.796334757987847e-05, "loss": 0.6344, "step": 1805 }, { "epoch": 0.23, "grad_norm": 0.5740284200889235, "learning_rate": 1.7960844314885128e-05, "loss": 0.6448, "step": 1806 }, { "epoch": 0.23, "grad_norm": 0.45895536142564164, "learning_rate": 1.795833968709195e-05, "loss": 0.6068, "step": 1807 }, { "epoch": 0.23, "grad_norm": 0.5344575224978233, "learning_rate": 1.795583369692771e-05, "loss": 0.6161, "step": 1808 }, { "epoch": 0.23, "grad_norm": 0.5584043202742852, "learning_rate": 1.7953326344821398e-05, "loss": 0.5947, "step": 1809 }, { "epoch": 0.23, "grad_norm": 0.5271591844662529, "learning_rate": 1.795081763120224e-05, "loss": 0.585, "step": 1810 }, { "epoch": 0.23, "grad_norm": 0.5779956673997669, "learning_rate": 1.7948307556499706e-05, "loss": 0.6097, "step": 1811 }, { "epoch": 0.23, "grad_norm": 0.5567528708009007, "learning_rate": 1.7945796121143478e-05, "loss": 0.6166, "step": 1812 }, { "epoch": 0.23, "grad_norm": 0.5560155880587995, "learning_rate": 1.794328332556349e-05, "loss": 0.6117, "step": 1813 }, { "epoch": 0.23, "grad_norm": 0.4923872354859527, "learning_rate": 1.7940769170189903e-05, "loss": 0.6128, "step": 1814 }, { "epoch": 0.23, "grad_norm": 0.5828882222441099, "learning_rate": 1.793825365545311e-05, "loss": 0.5948, "step": 1815 }, { "epoch": 0.23, "grad_norm": 0.5356660310893544, "learning_rate": 1.7935736781783736e-05, "loss": 0.6294, "step": 1816 }, { "epoch": 0.23, "grad_norm": 0.6204050914398815, "learning_rate": 1.7933218549612634e-05, "loss": 0.6244, "step": 1817 }, { "epoch": 0.23, "grad_norm": 0.4889344169463688, "learning_rate": 1.79306989593709e-05, "loss": 0.6244, "step": 1818 }, { "epoch": 0.23, "grad_norm": 0.4996986095628371, "learning_rate": 1.792817801148986e-05, "loss": 0.606, "step": 1819 }, { "epoch": 0.23, "grad_norm": 0.5855187166177146, "learning_rate": 1.792565570640106e-05, "loss": 0.6105, "step": 1820 }, { "epoch": 0.23, "grad_norm": 0.667219329345179, "learning_rate": 1.7923132044536297e-05, "loss": 0.6754, "step": 1821 }, { "epoch": 0.23, "grad_norm": 0.573160815078035, "learning_rate": 1.792060702632759e-05, "loss": 0.6082, "step": 1822 }, { "epoch": 0.23, "grad_norm": 0.6064678868307072, "learning_rate": 1.791808065220719e-05, "loss": 0.6469, "step": 1823 }, { "epoch": 0.23, "grad_norm": 0.596302293081221, "learning_rate": 1.791555292260758e-05, "loss": 0.656, "step": 1824 }, { "epoch": 0.23, "grad_norm": 0.6334255234929936, "learning_rate": 1.791302383796147e-05, "loss": 0.623, "step": 1825 }, { "epoch": 0.23, "grad_norm": 0.5376452057197487, "learning_rate": 1.7910493398701825e-05, "loss": 0.6236, "step": 1826 }, { "epoch": 0.23, "grad_norm": 0.5728460178896203, "learning_rate": 1.790796160526181e-05, "loss": 0.6389, "step": 1827 }, { "epoch": 0.23, "grad_norm": 0.6439870387238738, "learning_rate": 1.790542845807485e-05, "loss": 0.6722, "step": 1828 }, { "epoch": 0.23, "grad_norm": 0.5919493683073114, "learning_rate": 1.790289395757458e-05, "loss": 0.6614, "step": 1829 }, { "epoch": 0.23, "grad_norm": 0.5896325611443174, "learning_rate": 1.7900358104194882e-05, "loss": 0.6132, "step": 1830 }, { "epoch": 0.23, "grad_norm": 0.5768968761152933, "learning_rate": 1.7897820898369857e-05, "loss": 0.6535, "step": 1831 }, { "epoch": 0.23, "grad_norm": 0.492303904487005, "learning_rate": 1.7895282340533847e-05, "loss": 0.6453, "step": 1832 }, { "epoch": 0.23, "grad_norm": 0.6214740004979973, "learning_rate": 1.7892742431121424e-05, "loss": 0.6489, "step": 1833 }, { "epoch": 0.23, "grad_norm": 0.5357709346763541, "learning_rate": 1.7890201170567383e-05, "loss": 0.6107, "step": 1834 }, { "epoch": 0.23, "grad_norm": 0.6212533829977556, "learning_rate": 1.7887658559306768e-05, "loss": 0.6596, "step": 1835 }, { "epoch": 0.23, "grad_norm": 0.5550252155834766, "learning_rate": 1.7885114597774835e-05, "loss": 0.6243, "step": 1836 }, { "epoch": 0.23, "grad_norm": 0.4995759176961249, "learning_rate": 1.788256928640708e-05, "loss": 0.5955, "step": 1837 }, { "epoch": 0.23, "grad_norm": 0.4684182524202191, "learning_rate": 1.7880022625639228e-05, "loss": 0.6036, "step": 1838 }, { "epoch": 0.23, "grad_norm": 0.5053366142989162, "learning_rate": 1.787747461590724e-05, "loss": 0.647, "step": 1839 }, { "epoch": 0.24, "grad_norm": 0.43419035384048155, "learning_rate": 1.7874925257647303e-05, "loss": 0.5834, "step": 1840 }, { "epoch": 0.24, "grad_norm": 0.5203395560661631, "learning_rate": 1.787237455129584e-05, "loss": 0.608, "step": 1841 }, { "epoch": 0.24, "grad_norm": 0.47691196860017926, "learning_rate": 1.7869822497289496e-05, "loss": 0.6373, "step": 1842 }, { "epoch": 0.24, "grad_norm": 0.6486149822679957, "learning_rate": 1.786726909606515e-05, "loss": 0.6307, "step": 1843 }, { "epoch": 0.24, "grad_norm": 0.5035333404229225, "learning_rate": 1.786471434805992e-05, "loss": 0.5961, "step": 1844 }, { "epoch": 0.24, "grad_norm": 0.5704098830143041, "learning_rate": 1.786215825371114e-05, "loss": 0.6376, "step": 1845 }, { "epoch": 0.24, "grad_norm": 0.6160222887058944, "learning_rate": 1.7859600813456386e-05, "loss": 0.6716, "step": 1846 }, { "epoch": 0.24, "grad_norm": 0.5369604953520863, "learning_rate": 1.7857042027733468e-05, "loss": 0.5803, "step": 1847 }, { "epoch": 0.24, "grad_norm": 0.5203757414886049, "learning_rate": 1.78544818969804e-05, "loss": 0.6147, "step": 1848 }, { "epoch": 0.24, "grad_norm": 0.5868429363344659, "learning_rate": 1.785192042163547e-05, "loss": 0.645, "step": 1849 }, { "epoch": 0.24, "grad_norm": 0.4889117495508485, "learning_rate": 1.7849357602137153e-05, "loss": 0.584, "step": 1850 }, { "epoch": 0.24, "grad_norm": 0.5929790780659032, "learning_rate": 1.7846793438924177e-05, "loss": 0.6393, "step": 1851 }, { "epoch": 0.24, "grad_norm": 0.6792156598988853, "learning_rate": 1.78442279324355e-05, "loss": 0.6763, "step": 1852 }, { "epoch": 0.24, "grad_norm": 0.5520363231281776, "learning_rate": 1.78416610831103e-05, "loss": 0.5851, "step": 1853 }, { "epoch": 0.24, "grad_norm": 0.5198978398954711, "learning_rate": 1.783909289138799e-05, "loss": 0.613, "step": 1854 }, { "epoch": 0.24, "grad_norm": 0.48537233170676575, "learning_rate": 1.783652335770822e-05, "loss": 0.6165, "step": 1855 }, { "epoch": 0.24, "grad_norm": 0.558947084651956, "learning_rate": 1.7833952482510863e-05, "loss": 0.6359, "step": 1856 }, { "epoch": 0.24, "grad_norm": 0.5629482625081264, "learning_rate": 1.783138026623601e-05, "loss": 0.6383, "step": 1857 }, { "epoch": 0.24, "grad_norm": 0.5142854817719335, "learning_rate": 1.7828806709324005e-05, "loss": 0.631, "step": 1858 }, { "epoch": 0.24, "grad_norm": 0.6143981794150305, "learning_rate": 1.7826231812215404e-05, "loss": 0.6174, "step": 1859 }, { "epoch": 0.24, "grad_norm": 0.5892543410375956, "learning_rate": 1.7823655575351e-05, "loss": 0.6416, "step": 1860 }, { "epoch": 0.24, "grad_norm": 0.542186392669437, "learning_rate": 1.7821077999171813e-05, "loss": 0.6099, "step": 1861 }, { "epoch": 0.24, "grad_norm": 0.5977032409804601, "learning_rate": 1.7818499084119092e-05, "loss": 0.6025, "step": 1862 }, { "epoch": 0.24, "grad_norm": 0.5750268894988311, "learning_rate": 1.781591883063432e-05, "loss": 0.6341, "step": 1863 }, { "epoch": 0.24, "grad_norm": 0.5498125109266225, "learning_rate": 1.78133372391592e-05, "loss": 0.5898, "step": 1864 }, { "epoch": 0.24, "grad_norm": 0.5355466856284208, "learning_rate": 1.781075431013567e-05, "loss": 0.5842, "step": 1865 }, { "epoch": 0.24, "grad_norm": 0.5424317370189664, "learning_rate": 1.7808170044005897e-05, "loss": 0.6391, "step": 1866 }, { "epoch": 0.24, "grad_norm": 0.7207003413156132, "learning_rate": 1.7805584441212277e-05, "loss": 0.6291, "step": 1867 }, { "epoch": 0.24, "grad_norm": 0.5269960951003296, "learning_rate": 1.780299750219743e-05, "loss": 0.6319, "step": 1868 }, { "epoch": 0.24, "grad_norm": 0.6338333415855314, "learning_rate": 1.780040922740421e-05, "loss": 0.661, "step": 1869 }, { "epoch": 0.24, "grad_norm": 0.5541416441959408, "learning_rate": 1.7797819617275704e-05, "loss": 0.6359, "step": 1870 }, { "epoch": 0.24, "grad_norm": 0.5391269529420364, "learning_rate": 1.7795228672255215e-05, "loss": 0.5968, "step": 1871 }, { "epoch": 0.24, "grad_norm": 0.5672379244001748, "learning_rate": 1.7792636392786286e-05, "loss": 0.6051, "step": 1872 }, { "epoch": 0.24, "grad_norm": 0.5958036673271127, "learning_rate": 1.7790042779312676e-05, "loss": 0.6368, "step": 1873 }, { "epoch": 0.24, "grad_norm": 0.5420003204150368, "learning_rate": 1.778744783227839e-05, "loss": 0.6155, "step": 1874 }, { "epoch": 0.24, "grad_norm": 0.5513003387460684, "learning_rate": 1.7784851552127645e-05, "loss": 0.589, "step": 1875 }, { "epoch": 0.24, "grad_norm": 0.5306359063878842, "learning_rate": 1.7782253939304897e-05, "loss": 0.6481, "step": 1876 }, { "epoch": 0.24, "grad_norm": 0.525206947843577, "learning_rate": 1.777965499425482e-05, "loss": 0.5918, "step": 1877 }, { "epoch": 0.24, "grad_norm": 0.627419548194796, "learning_rate": 1.7777054717422324e-05, "loss": 0.6216, "step": 1878 }, { "epoch": 0.24, "grad_norm": 0.542374119906655, "learning_rate": 1.7774453109252548e-05, "loss": 0.6244, "step": 1879 }, { "epoch": 0.24, "grad_norm": 0.5145629429929759, "learning_rate": 1.7771850170190852e-05, "loss": 0.6062, "step": 1880 }, { "epoch": 0.24, "grad_norm": 0.6502940011930372, "learning_rate": 1.776924590068283e-05, "loss": 0.6242, "step": 1881 }, { "epoch": 0.24, "grad_norm": 0.5372929357755639, "learning_rate": 1.77666403011743e-05, "loss": 0.6483, "step": 1882 }, { "epoch": 0.24, "grad_norm": 0.5858038452341825, "learning_rate": 1.776403337211131e-05, "loss": 0.6059, "step": 1883 }, { "epoch": 0.24, "grad_norm": 0.5469774469846883, "learning_rate": 1.7761425113940132e-05, "loss": 0.6797, "step": 1884 }, { "epoch": 0.24, "grad_norm": 0.6236924202274268, "learning_rate": 1.7758815527107273e-05, "loss": 0.642, "step": 1885 }, { "epoch": 0.24, "grad_norm": 0.5234863153997781, "learning_rate": 1.7756204612059457e-05, "loss": 0.5974, "step": 1886 }, { "epoch": 0.24, "grad_norm": 0.6263733229182185, "learning_rate": 1.7753592369243644e-05, "loss": 0.7111, "step": 1887 }, { "epoch": 0.24, "grad_norm": 0.5708462505951932, "learning_rate": 1.775097879910702e-05, "loss": 0.6422, "step": 1888 }, { "epoch": 0.24, "grad_norm": 0.54776203267033, "learning_rate": 1.774836390209699e-05, "loss": 0.6173, "step": 1889 }, { "epoch": 0.24, "grad_norm": 0.5227490191961419, "learning_rate": 1.77457476786612e-05, "loss": 0.593, "step": 1890 }, { "epoch": 0.24, "grad_norm": 0.6051914808206589, "learning_rate": 1.774313012924751e-05, "loss": 0.6491, "step": 1891 }, { "epoch": 0.24, "grad_norm": 0.5477252908142813, "learning_rate": 1.7740511254304018e-05, "loss": 0.6408, "step": 1892 }, { "epoch": 0.24, "grad_norm": 0.5387654119732308, "learning_rate": 1.7737891054279037e-05, "loss": 0.6212, "step": 1893 }, { "epoch": 0.24, "grad_norm": 0.4868231993453604, "learning_rate": 1.773526952962112e-05, "loss": 0.6135, "step": 1894 }, { "epoch": 0.24, "grad_norm": 0.510046056228274, "learning_rate": 1.7732646680779034e-05, "loss": 0.6126, "step": 1895 }, { "epoch": 0.24, "grad_norm": 0.6162157214921007, "learning_rate": 1.7730022508201786e-05, "loss": 0.6562, "step": 1896 }, { "epoch": 0.24, "grad_norm": 0.6026229873554201, "learning_rate": 1.7727397012338598e-05, "loss": 0.6798, "step": 1897 }, { "epoch": 0.24, "grad_norm": 0.7000600223943196, "learning_rate": 1.7724770193638923e-05, "loss": 0.6577, "step": 1898 }, { "epoch": 0.24, "grad_norm": 0.5760243007276925, "learning_rate": 1.772214205255244e-05, "loss": 0.6088, "step": 1899 }, { "epoch": 0.24, "grad_norm": 0.5415909430995252, "learning_rate": 1.7719512589529054e-05, "loss": 0.6384, "step": 1900 }, { "epoch": 0.24, "grad_norm": 0.6184994476662242, "learning_rate": 1.7716881805018905e-05, "loss": 0.6391, "step": 1901 }, { "epoch": 0.24, "grad_norm": 0.5448176401909841, "learning_rate": 1.771424969947234e-05, "loss": 0.6049, "step": 1902 }, { "epoch": 0.24, "grad_norm": 0.5585317649968593, "learning_rate": 1.771161627333995e-05, "loss": 0.6364, "step": 1903 }, { "epoch": 0.24, "grad_norm": 0.5370233630575401, "learning_rate": 1.7708981527072547e-05, "loss": 0.6046, "step": 1904 }, { "epoch": 0.24, "grad_norm": 0.5821652893183465, "learning_rate": 1.770634546112116e-05, "loss": 0.6284, "step": 1905 }, { "epoch": 0.24, "grad_norm": 0.594083235958051, "learning_rate": 1.770370807593706e-05, "loss": 0.6431, "step": 1906 }, { "epoch": 0.24, "grad_norm": 0.5636378359876315, "learning_rate": 1.770106937197173e-05, "loss": 0.6025, "step": 1907 }, { "epoch": 0.24, "grad_norm": 0.5348814568155454, "learning_rate": 1.7698429349676887e-05, "loss": 0.6332, "step": 1908 }, { "epoch": 0.24, "grad_norm": 0.565106269887924, "learning_rate": 1.769578800950447e-05, "loss": 0.6343, "step": 1909 }, { "epoch": 0.24, "grad_norm": 0.5589802343622724, "learning_rate": 1.7693145351906643e-05, "loss": 0.6701, "step": 1910 }, { "epoch": 0.24, "grad_norm": 0.4640304503276962, "learning_rate": 1.76905013773358e-05, "loss": 0.6062, "step": 1911 }, { "epoch": 0.24, "grad_norm": 0.5572665523109036, "learning_rate": 1.768785608624455e-05, "loss": 0.6478, "step": 1912 }, { "epoch": 0.24, "grad_norm": 0.5340076677093082, "learning_rate": 1.768520947908574e-05, "loss": 0.6347, "step": 1913 }, { "epoch": 0.24, "grad_norm": 0.5481130892309343, "learning_rate": 1.768256155631244e-05, "loss": 0.6137, "step": 1914 }, { "epoch": 0.24, "grad_norm": 0.5992295727620273, "learning_rate": 1.767991231837794e-05, "loss": 0.6244, "step": 1915 }, { "epoch": 0.24, "grad_norm": 0.5449217226395618, "learning_rate": 1.7677261765735753e-05, "loss": 0.587, "step": 1916 }, { "epoch": 0.24, "grad_norm": 0.6857618376835581, "learning_rate": 1.7674609898839623e-05, "loss": 0.6599, "step": 1917 }, { "epoch": 0.25, "grad_norm": 0.5505154220243071, "learning_rate": 1.7671956718143524e-05, "loss": 0.6756, "step": 1918 }, { "epoch": 0.25, "grad_norm": 0.8220221644552762, "learning_rate": 1.766930222410164e-05, "loss": 0.6216, "step": 1919 }, { "epoch": 0.25, "grad_norm": 0.6017704013462103, "learning_rate": 1.7666646417168392e-05, "loss": 0.6294, "step": 1920 }, { "epoch": 0.25, "grad_norm": 0.4773223016687485, "learning_rate": 1.7663989297798423e-05, "loss": 0.6151, "step": 1921 }, { "epoch": 0.25, "grad_norm": 0.47487252895363596, "learning_rate": 1.7661330866446595e-05, "loss": 0.646, "step": 1922 }, { "epoch": 0.25, "grad_norm": 0.552297286268171, "learning_rate": 1.7658671123568004e-05, "loss": 0.5676, "step": 1923 }, { "epoch": 0.25, "grad_norm": 0.6171539398996937, "learning_rate": 1.7656010069617962e-05, "loss": 0.643, "step": 1924 }, { "epoch": 0.25, "grad_norm": 0.5566750549387065, "learning_rate": 1.7653347705052015e-05, "loss": 0.631, "step": 1925 }, { "epoch": 0.25, "grad_norm": 0.6981129066847652, "learning_rate": 1.765068403032592e-05, "loss": 0.6365, "step": 1926 }, { "epoch": 0.25, "grad_norm": 0.676454810965438, "learning_rate": 1.7648019045895674e-05, "loss": 0.6589, "step": 1927 }, { "epoch": 0.25, "grad_norm": 0.6078734143465241, "learning_rate": 1.7645352752217476e-05, "loss": 0.6206, "step": 1928 }, { "epoch": 0.25, "grad_norm": 0.6233426392289227, "learning_rate": 1.764268514974778e-05, "loss": 0.6412, "step": 1929 }, { "epoch": 0.25, "grad_norm": 0.5687944529585575, "learning_rate": 1.7640016238943235e-05, "loss": 0.6054, "step": 1930 }, { "epoch": 0.25, "grad_norm": 0.6274248590483932, "learning_rate": 1.7637346020260734e-05, "loss": 0.6333, "step": 1931 }, { "epoch": 0.25, "grad_norm": 0.5612244697397567, "learning_rate": 1.7634674494157376e-05, "loss": 0.5846, "step": 1932 }, { "epoch": 0.25, "grad_norm": 0.5256350824037235, "learning_rate": 1.7632001661090505e-05, "loss": 0.6144, "step": 1933 }, { "epoch": 0.25, "grad_norm": 0.6072882896863179, "learning_rate": 1.7629327521517675e-05, "loss": 0.6698, "step": 1934 }, { "epoch": 0.25, "grad_norm": 0.6195158358534228, "learning_rate": 1.7626652075896656e-05, "loss": 0.6659, "step": 1935 }, { "epoch": 0.25, "grad_norm": 0.6136470288638709, "learning_rate": 1.7623975324685467e-05, "loss": 0.6785, "step": 1936 }, { "epoch": 0.25, "grad_norm": 0.5753654686147731, "learning_rate": 1.7621297268342327e-05, "loss": 0.6409, "step": 1937 }, { "epoch": 0.25, "grad_norm": 0.5551387289090514, "learning_rate": 1.7618617907325685e-05, "loss": 0.6518, "step": 1938 }, { "epoch": 0.25, "grad_norm": 0.4775535862445281, "learning_rate": 1.761593724209422e-05, "loss": 0.6098, "step": 1939 }, { "epoch": 0.25, "grad_norm": 0.46980217171234456, "learning_rate": 1.7613255273106827e-05, "loss": 0.5947, "step": 1940 }, { "epoch": 0.25, "grad_norm": 0.5501097697501229, "learning_rate": 1.7610572000822622e-05, "loss": 0.6134, "step": 1941 }, { "epoch": 0.25, "grad_norm": 0.5615416284028574, "learning_rate": 1.7607887425700957e-05, "loss": 0.6618, "step": 1942 }, { "epoch": 0.25, "grad_norm": 0.5051720737609424, "learning_rate": 1.7605201548201395e-05, "loss": 0.6179, "step": 1943 }, { "epoch": 0.25, "grad_norm": 0.5534016651566848, "learning_rate": 1.7602514368783727e-05, "loss": 0.6196, "step": 1944 }, { "epoch": 0.25, "grad_norm": 0.540138990891529, "learning_rate": 1.759982588790796e-05, "loss": 0.6192, "step": 1945 }, { "epoch": 0.25, "grad_norm": 0.49440484275850305, "learning_rate": 1.7597136106034335e-05, "loss": 0.6041, "step": 1946 }, { "epoch": 0.25, "grad_norm": 0.5780189981956229, "learning_rate": 1.7594445023623307e-05, "loss": 0.6437, "step": 1947 }, { "epoch": 0.25, "grad_norm": 0.5491936006336688, "learning_rate": 1.759175264113556e-05, "loss": 0.6588, "step": 1948 }, { "epoch": 0.25, "grad_norm": 0.4997808778796376, "learning_rate": 1.7589058959031996e-05, "loss": 0.6002, "step": 1949 }, { "epoch": 0.25, "grad_norm": 0.5733956187530164, "learning_rate": 1.7586363977773737e-05, "loss": 0.6249, "step": 1950 }, { "epoch": 0.25, "grad_norm": 0.5108210593855049, "learning_rate": 1.7583667697822133e-05, "loss": 0.6137, "step": 1951 }, { "epoch": 0.25, "grad_norm": 0.6169945275869113, "learning_rate": 1.7580970119638756e-05, "loss": 0.6398, "step": 1952 }, { "epoch": 0.25, "grad_norm": 0.5809397871108608, "learning_rate": 1.75782712436854e-05, "loss": 0.6754, "step": 1953 }, { "epoch": 0.25, "grad_norm": 0.48732390575480455, "learning_rate": 1.7575571070424073e-05, "loss": 0.5897, "step": 1954 }, { "epoch": 0.25, "grad_norm": 0.5894206423159044, "learning_rate": 1.7572869600317018e-05, "loss": 0.6509, "step": 1955 }, { "epoch": 0.25, "grad_norm": 0.5856038299570371, "learning_rate": 1.7570166833826694e-05, "loss": 0.6222, "step": 1956 }, { "epoch": 0.25, "grad_norm": 0.6152878403401545, "learning_rate": 1.7567462771415775e-05, "loss": 0.6384, "step": 1957 }, { "epoch": 0.25, "grad_norm": 0.5386514943625174, "learning_rate": 1.7564757413547174e-05, "loss": 0.6277, "step": 1958 }, { "epoch": 0.25, "grad_norm": 0.5195273751578331, "learning_rate": 1.7562050760684005e-05, "loss": 0.5974, "step": 1959 }, { "epoch": 0.25, "grad_norm": 0.6064639199099743, "learning_rate": 1.7559342813289623e-05, "loss": 0.639, "step": 1960 }, { "epoch": 0.25, "grad_norm": 0.5773638757520141, "learning_rate": 1.7556633571827593e-05, "loss": 0.6022, "step": 1961 }, { "epoch": 0.25, "grad_norm": 0.4792186008583918, "learning_rate": 1.75539230367617e-05, "loss": 0.6192, "step": 1962 }, { "epoch": 0.25, "grad_norm": 0.5884273698888408, "learning_rate": 1.7551211208555957e-05, "loss": 0.6629, "step": 1963 }, { "epoch": 0.25, "grad_norm": 0.5427672844531606, "learning_rate": 1.75484980876746e-05, "loss": 0.6247, "step": 1964 }, { "epoch": 0.25, "grad_norm": 0.5303131027442378, "learning_rate": 1.7545783674582075e-05, "loss": 0.6372, "step": 1965 }, { "epoch": 0.25, "grad_norm": 0.7722992511437068, "learning_rate": 1.754306796974306e-05, "loss": 0.6464, "step": 1966 }, { "epoch": 0.25, "grad_norm": 0.6211579022483177, "learning_rate": 1.7540350973622454e-05, "loss": 0.6056, "step": 1967 }, { "epoch": 0.25, "grad_norm": 0.5155700708587959, "learning_rate": 1.7537632686685372e-05, "loss": 0.6103, "step": 1968 }, { "epoch": 0.25, "grad_norm": 0.5457299214210911, "learning_rate": 1.7534913109397147e-05, "loss": 0.6238, "step": 1969 }, { "epoch": 0.25, "grad_norm": 0.6757176880599691, "learning_rate": 1.753219224222334e-05, "loss": 0.6034, "step": 1970 }, { "epoch": 0.25, "grad_norm": 0.5889746502463226, "learning_rate": 1.752947008562974e-05, "loss": 0.6368, "step": 1971 }, { "epoch": 0.25, "grad_norm": 0.5688117367593091, "learning_rate": 1.7526746640082332e-05, "loss": 0.6283, "step": 1972 }, { "epoch": 0.25, "grad_norm": 0.596574582345115, "learning_rate": 1.7524021906047343e-05, "loss": 0.6234, "step": 1973 }, { "epoch": 0.25, "grad_norm": 0.6671731125518668, "learning_rate": 1.7521295883991215e-05, "loss": 0.6075, "step": 1974 }, { "epoch": 0.25, "grad_norm": 0.619960184559358, "learning_rate": 1.7518568574380615e-05, "loss": 0.6179, "step": 1975 }, { "epoch": 0.25, "grad_norm": 0.5776372787811582, "learning_rate": 1.7515839977682415e-05, "loss": 0.6265, "step": 1976 }, { "epoch": 0.25, "grad_norm": 0.6213643258211674, "learning_rate": 1.7513110094363727e-05, "loss": 0.6368, "step": 1977 }, { "epoch": 0.25, "grad_norm": 0.5416192173892521, "learning_rate": 1.7510378924891864e-05, "loss": 0.5987, "step": 1978 }, { "epoch": 0.25, "grad_norm": 0.638967968254364, "learning_rate": 1.750764646973438e-05, "loss": 0.6753, "step": 1979 }, { "epoch": 0.25, "grad_norm": 0.6751088044819765, "learning_rate": 1.7504912729359026e-05, "loss": 0.6821, "step": 1980 }, { "epoch": 0.25, "grad_norm": 0.619127393619351, "learning_rate": 1.7502177704233798e-05, "loss": 0.6342, "step": 1981 }, { "epoch": 0.25, "grad_norm": 0.4724382821629467, "learning_rate": 1.7499441394826888e-05, "loss": 0.6478, "step": 1982 }, { "epoch": 0.25, "grad_norm": 0.5718424176055632, "learning_rate": 1.749670380160673e-05, "loss": 0.6294, "step": 1983 }, { "epoch": 0.25, "grad_norm": 0.5721799558983616, "learning_rate": 1.7493964925041955e-05, "loss": 0.6573, "step": 1984 }, { "epoch": 0.25, "grad_norm": 0.5024100213046413, "learning_rate": 1.7491224765601434e-05, "loss": 0.6022, "step": 1985 }, { "epoch": 0.25, "grad_norm": 0.6005769139564704, "learning_rate": 1.7488483323754245e-05, "loss": 0.67, "step": 1986 }, { "epoch": 0.25, "grad_norm": 0.5796668829493519, "learning_rate": 1.7485740599969686e-05, "loss": 0.6301, "step": 1987 }, { "epoch": 0.25, "grad_norm": 0.5154182881816252, "learning_rate": 1.7482996594717286e-05, "loss": 0.5916, "step": 1988 }, { "epoch": 0.25, "grad_norm": 0.6131243280115425, "learning_rate": 1.7480251308466782e-05, "loss": 0.6843, "step": 1989 }, { "epoch": 0.25, "grad_norm": 0.6391167827733172, "learning_rate": 1.7477504741688135e-05, "loss": 0.6326, "step": 1990 }, { "epoch": 0.25, "grad_norm": 0.552903064373865, "learning_rate": 1.7474756894851522e-05, "loss": 0.6051, "step": 1991 }, { "epoch": 0.25, "grad_norm": 0.5275703676849343, "learning_rate": 1.7472007768427338e-05, "loss": 0.6148, "step": 1992 }, { "epoch": 0.25, "grad_norm": 0.5261123581896752, "learning_rate": 1.7469257362886206e-05, "loss": 0.6155, "step": 1993 }, { "epoch": 0.25, "grad_norm": 0.6053983985836339, "learning_rate": 1.746650567869896e-05, "loss": 0.6282, "step": 1994 }, { "epoch": 0.25, "grad_norm": 0.9037395338794387, "learning_rate": 1.746375271633665e-05, "loss": 0.6665, "step": 1995 }, { "epoch": 0.25, "grad_norm": 0.5971934133942423, "learning_rate": 1.746099847627056e-05, "loss": 0.6084, "step": 1996 }, { "epoch": 0.26, "grad_norm": 0.6104890591253533, "learning_rate": 1.7458242958972173e-05, "loss": 0.6465, "step": 1997 }, { "epoch": 0.26, "grad_norm": 0.542336968675384, "learning_rate": 1.7455486164913203e-05, "loss": 0.6221, "step": 1998 }, { "epoch": 0.26, "grad_norm": 0.5409805023784681, "learning_rate": 1.745272809456558e-05, "loss": 0.5716, "step": 1999 }, { "epoch": 0.26, "grad_norm": 0.5319277163168228, "learning_rate": 1.7449968748401452e-05, "loss": 0.5962, "step": 2000 }, { "epoch": 0.26, "grad_norm": 0.5907229529288455, "learning_rate": 1.7447208126893186e-05, "loss": 0.6348, "step": 2001 }, { "epoch": 0.26, "grad_norm": 0.7030910209720149, "learning_rate": 1.744444623051337e-05, "loss": 0.6599, "step": 2002 }, { "epoch": 0.26, "grad_norm": 0.6038014142794851, "learning_rate": 1.7441683059734803e-05, "loss": 0.6559, "step": 2003 }, { "epoch": 0.26, "grad_norm": 0.5070414050829589, "learning_rate": 1.7438918615030504e-05, "loss": 0.6329, "step": 2004 }, { "epoch": 0.26, "grad_norm": 0.4787288004561333, "learning_rate": 1.7436152896873717e-05, "loss": 0.6293, "step": 2005 }, { "epoch": 0.26, "grad_norm": 0.507985667087478, "learning_rate": 1.7433385905737898e-05, "loss": 0.6379, "step": 2006 }, { "epoch": 0.26, "grad_norm": 0.5860991903321647, "learning_rate": 1.7430617642096723e-05, "loss": 0.6059, "step": 2007 }, { "epoch": 0.26, "grad_norm": 0.6014074920687641, "learning_rate": 1.742784810642408e-05, "loss": 0.6602, "step": 2008 }, { "epoch": 0.26, "grad_norm": 0.5177169520339077, "learning_rate": 1.7425077299194088e-05, "loss": 0.6148, "step": 2009 }, { "epoch": 0.26, "grad_norm": 0.598573941225379, "learning_rate": 1.7422305220881067e-05, "loss": 0.6192, "step": 2010 }, { "epoch": 0.26, "grad_norm": 0.5569961063748131, "learning_rate": 1.741953187195957e-05, "loss": 0.5755, "step": 2011 }, { "epoch": 0.26, "grad_norm": 0.5890541730687748, "learning_rate": 1.741675725290436e-05, "loss": 0.675, "step": 2012 }, { "epoch": 0.26, "grad_norm": 0.541716060026264, "learning_rate": 1.741398136419042e-05, "loss": 0.6009, "step": 2013 }, { "epoch": 0.26, "grad_norm": 0.5746220099409641, "learning_rate": 1.7411204206292938e-05, "loss": 0.6107, "step": 2014 }, { "epoch": 0.26, "grad_norm": 0.5520627743737315, "learning_rate": 1.7408425779687336e-05, "loss": 0.6401, "step": 2015 }, { "epoch": 0.26, "grad_norm": 0.624422671292786, "learning_rate": 1.740564608484925e-05, "loss": 0.6552, "step": 2016 }, { "epoch": 0.26, "grad_norm": 0.4919185007458202, "learning_rate": 1.7402865122254525e-05, "loss": 0.6035, "step": 2017 }, { "epoch": 0.26, "grad_norm": 0.5807734787437595, "learning_rate": 1.7400082892379236e-05, "loss": 0.6772, "step": 2018 }, { "epoch": 0.26, "grad_norm": 0.5811378088580812, "learning_rate": 1.7397299395699655e-05, "loss": 0.6192, "step": 2019 }, { "epoch": 0.26, "grad_norm": 0.5542225264220922, "learning_rate": 1.7394514632692296e-05, "loss": 0.6391, "step": 2020 }, { "epoch": 0.26, "grad_norm": 0.4833861868238149, "learning_rate": 1.7391728603833865e-05, "loss": 0.5904, "step": 2021 }, { "epoch": 0.26, "grad_norm": 0.5946046770812992, "learning_rate": 1.73889413096013e-05, "loss": 0.5969, "step": 2022 }, { "epoch": 0.26, "grad_norm": 0.565783365588745, "learning_rate": 1.7386152750471755e-05, "loss": 0.6152, "step": 2023 }, { "epoch": 0.26, "grad_norm": 0.5085926806739208, "learning_rate": 1.7383362926922594e-05, "loss": 0.6458, "step": 2024 }, { "epoch": 0.26, "grad_norm": 0.5423734894776019, "learning_rate": 1.73805718394314e-05, "loss": 0.6225, "step": 2025 }, { "epoch": 0.26, "grad_norm": 0.5830800414646758, "learning_rate": 1.737777948847598e-05, "loss": 0.6144, "step": 2026 }, { "epoch": 0.26, "grad_norm": 0.5211171245126613, "learning_rate": 1.7374985874534346e-05, "loss": 0.6381, "step": 2027 }, { "epoch": 0.26, "grad_norm": 0.5894974911647843, "learning_rate": 1.7372190998084728e-05, "loss": 0.6535, "step": 2028 }, { "epoch": 0.26, "grad_norm": 0.5088743798605107, "learning_rate": 1.7369394859605576e-05, "loss": 0.6318, "step": 2029 }, { "epoch": 0.26, "grad_norm": 0.5241500684730949, "learning_rate": 1.7366597459575563e-05, "loss": 0.6185, "step": 2030 }, { "epoch": 0.26, "grad_norm": 0.44610849140977, "learning_rate": 1.736379879847356e-05, "loss": 0.5744, "step": 2031 }, { "epoch": 0.26, "grad_norm": 0.5515754793430246, "learning_rate": 1.7360998876778667e-05, "loss": 0.6608, "step": 2032 }, { "epoch": 0.26, "grad_norm": 0.5689362078304555, "learning_rate": 1.7358197694970195e-05, "loss": 0.6607, "step": 2033 }, { "epoch": 0.26, "grad_norm": 0.5164746390449642, "learning_rate": 1.7355395253527677e-05, "loss": 0.6137, "step": 2034 }, { "epoch": 0.26, "grad_norm": 0.5129169166372288, "learning_rate": 1.7352591552930852e-05, "loss": 0.6177, "step": 2035 }, { "epoch": 0.26, "grad_norm": 0.5614613810508374, "learning_rate": 1.7349786593659686e-05, "loss": 0.6093, "step": 2036 }, { "epoch": 0.26, "grad_norm": 0.5668197401643904, "learning_rate": 1.7346980376194346e-05, "loss": 0.6312, "step": 2037 }, { "epoch": 0.26, "grad_norm": 0.6319892183137155, "learning_rate": 1.7344172901015227e-05, "loss": 0.6479, "step": 2038 }, { "epoch": 0.26, "grad_norm": 0.5961458563167602, "learning_rate": 1.7341364168602936e-05, "loss": 0.6267, "step": 2039 }, { "epoch": 0.26, "grad_norm": 0.5287296334893943, "learning_rate": 1.7338554179438286e-05, "loss": 0.6213, "step": 2040 }, { "epoch": 0.26, "grad_norm": 0.5477658061697477, "learning_rate": 1.7335742934002322e-05, "loss": 0.6709, "step": 2041 }, { "epoch": 0.26, "grad_norm": 0.4798386092246597, "learning_rate": 1.733293043277629e-05, "loss": 0.6117, "step": 2042 }, { "epoch": 0.26, "grad_norm": 0.5562705259999435, "learning_rate": 1.7330116676241658e-05, "loss": 0.603, "step": 2043 }, { "epoch": 0.26, "grad_norm": 0.489359156636726, "learning_rate": 1.7327301664880106e-05, "loss": 0.6027, "step": 2044 }, { "epoch": 0.26, "grad_norm": 0.5520742138134367, "learning_rate": 1.7324485399173528e-05, "loss": 0.6746, "step": 2045 }, { "epoch": 0.26, "grad_norm": 0.5968830195650658, "learning_rate": 1.732166787960404e-05, "loss": 0.6606, "step": 2046 }, { "epoch": 0.26, "grad_norm": 0.6032003600409983, "learning_rate": 1.7318849106653962e-05, "loss": 0.5885, "step": 2047 }, { "epoch": 0.26, "grad_norm": 0.672495814836593, "learning_rate": 1.731602908080584e-05, "loss": 0.6526, "step": 2048 }, { "epoch": 0.26, "grad_norm": 0.49964855619718573, "learning_rate": 1.7313207802542418e-05, "loss": 0.6228, "step": 2049 }, { "epoch": 0.26, "grad_norm": 0.688138102513676, "learning_rate": 1.731038527234667e-05, "loss": 0.6211, "step": 2050 }, { "epoch": 0.26, "grad_norm": 0.6361977592009597, "learning_rate": 1.7307561490701785e-05, "loss": 0.653, "step": 2051 }, { "epoch": 0.26, "grad_norm": 0.47441128191687437, "learning_rate": 1.7304736458091148e-05, "loss": 0.5849, "step": 2052 }, { "epoch": 0.26, "grad_norm": 0.6056007700023216, "learning_rate": 1.730191017499838e-05, "loss": 0.6452, "step": 2053 }, { "epoch": 0.26, "grad_norm": 0.5292966185631732, "learning_rate": 1.7299082641907303e-05, "loss": 0.6355, "step": 2054 }, { "epoch": 0.26, "grad_norm": 0.5968568699825344, "learning_rate": 1.7296253859301956e-05, "loss": 0.647, "step": 2055 }, { "epoch": 0.26, "grad_norm": 0.542627214995714, "learning_rate": 1.7293423827666588e-05, "loss": 0.6286, "step": 2056 }, { "epoch": 0.26, "grad_norm": 0.47320472775476025, "learning_rate": 1.7290592547485673e-05, "loss": 0.5899, "step": 2057 }, { "epoch": 0.26, "grad_norm": 0.5350995730444336, "learning_rate": 1.7287760019243892e-05, "loss": 0.5999, "step": 2058 }, { "epoch": 0.26, "grad_norm": 0.5345528300830138, "learning_rate": 1.728492624342613e-05, "loss": 0.6159, "step": 2059 }, { "epoch": 0.26, "grad_norm": 0.5515446939999896, "learning_rate": 1.7282091220517507e-05, "loss": 0.6395, "step": 2060 }, { "epoch": 0.26, "grad_norm": 0.49581233426134746, "learning_rate": 1.7279254951003338e-05, "loss": 0.592, "step": 2061 }, { "epoch": 0.26, "grad_norm": 0.4895664862447351, "learning_rate": 1.727641743536916e-05, "loss": 0.6193, "step": 2062 }, { "epoch": 0.26, "grad_norm": 1.1389317877191365, "learning_rate": 1.7273578674100715e-05, "loss": 0.6633, "step": 2063 }, { "epoch": 0.26, "grad_norm": 0.6328722185339493, "learning_rate": 1.7270738667683974e-05, "loss": 0.6389, "step": 2064 }, { "epoch": 0.26, "grad_norm": 0.5196491751755697, "learning_rate": 1.7267897416605104e-05, "loss": 0.6279, "step": 2065 }, { "epoch": 0.26, "grad_norm": 0.5679710174833342, "learning_rate": 1.7265054921350503e-05, "loss": 0.64, "step": 2066 }, { "epoch": 0.26, "grad_norm": 0.5318453013465224, "learning_rate": 1.7262211182406755e-05, "loss": 0.6259, "step": 2067 }, { "epoch": 0.26, "grad_norm": 0.5174989722715052, "learning_rate": 1.725936620026069e-05, "loss": 0.6304, "step": 2068 }, { "epoch": 0.26, "grad_norm": 0.5025791838220176, "learning_rate": 1.7256519975399322e-05, "loss": 0.6263, "step": 2069 }, { "epoch": 0.26, "grad_norm": 0.6024310766780734, "learning_rate": 1.7253672508309897e-05, "loss": 0.6548, "step": 2070 }, { "epoch": 0.26, "grad_norm": 0.5059540565357612, "learning_rate": 1.7250823799479868e-05, "loss": 0.6256, "step": 2071 }, { "epoch": 0.26, "grad_norm": 0.5441773963833311, "learning_rate": 1.7247973849396892e-05, "loss": 0.6049, "step": 2072 }, { "epoch": 0.26, "grad_norm": 0.5869263922550287, "learning_rate": 1.7245122658548857e-05, "loss": 0.6185, "step": 2073 }, { "epoch": 0.26, "grad_norm": 0.5541697436565358, "learning_rate": 1.724227022742384e-05, "loss": 0.6319, "step": 2074 }, { "epoch": 0.27, "grad_norm": 0.5145550959797012, "learning_rate": 1.723941655651015e-05, "loss": 0.6271, "step": 2075 }, { "epoch": 0.27, "grad_norm": 0.5114447397360322, "learning_rate": 1.72365616462963e-05, "loss": 0.6244, "step": 2076 }, { "epoch": 0.27, "grad_norm": 0.5836497099861837, "learning_rate": 1.7233705497271015e-05, "loss": 0.6374, "step": 2077 }, { "epoch": 0.27, "grad_norm": 0.5152075563938727, "learning_rate": 1.7230848109923232e-05, "loss": 0.6255, "step": 2078 }, { "epoch": 0.27, "grad_norm": 0.660024390903293, "learning_rate": 1.72279894847421e-05, "loss": 0.6749, "step": 2079 }, { "epoch": 0.27, "grad_norm": 0.5975156865180243, "learning_rate": 1.7225129622216984e-05, "loss": 0.6157, "step": 2080 }, { "epoch": 0.27, "grad_norm": 0.5596908117005405, "learning_rate": 1.7222268522837458e-05, "loss": 0.6052, "step": 2081 }, { "epoch": 0.27, "grad_norm": 0.5985891841198689, "learning_rate": 1.7219406187093304e-05, "loss": 0.6069, "step": 2082 }, { "epoch": 0.27, "grad_norm": 0.49691197314236846, "learning_rate": 1.7216542615474524e-05, "loss": 0.6284, "step": 2083 }, { "epoch": 0.27, "grad_norm": 0.670095124303904, "learning_rate": 1.7213677808471323e-05, "loss": 0.6764, "step": 2084 }, { "epoch": 0.27, "grad_norm": 0.6769511121849917, "learning_rate": 1.721081176657412e-05, "loss": 0.6776, "step": 2085 }, { "epoch": 0.27, "grad_norm": 0.46592192117268005, "learning_rate": 1.720794449027355e-05, "loss": 0.6049, "step": 2086 }, { "epoch": 0.27, "grad_norm": 0.579293187479945, "learning_rate": 1.7205075980060455e-05, "loss": 0.6044, "step": 2087 }, { "epoch": 0.27, "grad_norm": 0.6138656320240625, "learning_rate": 1.7202206236425885e-05, "loss": 0.6205, "step": 2088 }, { "epoch": 0.27, "grad_norm": 0.5438736206376213, "learning_rate": 1.7199335259861115e-05, "loss": 0.6305, "step": 2089 }, { "epoch": 0.27, "grad_norm": 0.7060043174626611, "learning_rate": 1.719646305085761e-05, "loss": 0.6257, "step": 2090 }, { "epoch": 0.27, "grad_norm": 0.5705904056252022, "learning_rate": 1.7193589609907064e-05, "loss": 0.6275, "step": 2091 }, { "epoch": 0.27, "grad_norm": 0.5487741375420286, "learning_rate": 1.7190714937501375e-05, "loss": 0.6153, "step": 2092 }, { "epoch": 0.27, "grad_norm": 0.540910029497161, "learning_rate": 1.7187839034132648e-05, "loss": 0.6221, "step": 2093 }, { "epoch": 0.27, "grad_norm": 0.5515708483019919, "learning_rate": 1.7184961900293206e-05, "loss": 0.6257, "step": 2094 }, { "epoch": 0.27, "grad_norm": 0.5653868303186855, "learning_rate": 1.7182083536475577e-05, "loss": 0.6396, "step": 2095 }, { "epoch": 0.27, "grad_norm": 0.5371787985918838, "learning_rate": 1.7179203943172503e-05, "loss": 0.5969, "step": 2096 }, { "epoch": 0.27, "grad_norm": 0.6263109452298721, "learning_rate": 1.7176323120876938e-05, "loss": 0.6667, "step": 2097 }, { "epoch": 0.27, "grad_norm": 0.4762268233946747, "learning_rate": 1.7173441070082042e-05, "loss": 0.6052, "step": 2098 }, { "epoch": 0.27, "grad_norm": 0.4880904089429343, "learning_rate": 1.7170557791281185e-05, "loss": 0.615, "step": 2099 }, { "epoch": 0.27, "grad_norm": 0.5503077081507968, "learning_rate": 1.7167673284967953e-05, "loss": 0.6384, "step": 2100 }, { "epoch": 0.27, "grad_norm": 0.4714818584828879, "learning_rate": 1.7164787551636136e-05, "loss": 0.6054, "step": 2101 }, { "epoch": 0.27, "grad_norm": 0.6672071740332374, "learning_rate": 1.7161900591779737e-05, "loss": 0.6474, "step": 2102 }, { "epoch": 0.27, "grad_norm": 0.5464850723856133, "learning_rate": 1.7159012405892966e-05, "loss": 0.6116, "step": 2103 }, { "epoch": 0.27, "grad_norm": 0.8075158072573431, "learning_rate": 1.7156122994470253e-05, "loss": 0.6482, "step": 2104 }, { "epoch": 0.27, "grad_norm": 0.5501207000802596, "learning_rate": 1.7153232358006218e-05, "loss": 0.6249, "step": 2105 }, { "epoch": 0.27, "grad_norm": 0.5450611722835191, "learning_rate": 1.7150340496995715e-05, "loss": 0.6648, "step": 2106 }, { "epoch": 0.27, "grad_norm": 0.48750369858446113, "learning_rate": 1.714744741193379e-05, "loss": 0.5854, "step": 2107 }, { "epoch": 0.27, "grad_norm": 0.5507307759052489, "learning_rate": 1.71445531033157e-05, "loss": 0.6339, "step": 2108 }, { "epoch": 0.27, "grad_norm": 0.5567140613298267, "learning_rate": 1.7141657571636923e-05, "loss": 0.6161, "step": 2109 }, { "epoch": 0.27, "grad_norm": 0.5199009139836569, "learning_rate": 1.7138760817393133e-05, "loss": 0.646, "step": 2110 }, { "epoch": 0.27, "grad_norm": 0.5727941104376241, "learning_rate": 1.7135862841080226e-05, "loss": 0.6211, "step": 2111 }, { "epoch": 0.27, "grad_norm": 0.6393063865273781, "learning_rate": 1.713296364319429e-05, "loss": 0.6715, "step": 2112 }, { "epoch": 0.27, "grad_norm": 0.5468277616645475, "learning_rate": 1.7130063224231643e-05, "loss": 0.632, "step": 2113 }, { "epoch": 0.27, "grad_norm": 0.5751470476064493, "learning_rate": 1.71271615846888e-05, "loss": 0.663, "step": 2114 }, { "epoch": 0.27, "grad_norm": 0.6278442652125719, "learning_rate": 1.7124258725062476e-05, "loss": 0.6276, "step": 2115 }, { "epoch": 0.27, "grad_norm": 0.5675644080703491, "learning_rate": 1.7121354645849618e-05, "loss": 0.6077, "step": 2116 }, { "epoch": 0.27, "grad_norm": 0.5351043103393328, "learning_rate": 1.711844934754736e-05, "loss": 0.6297, "step": 2117 }, { "epoch": 0.27, "grad_norm": 0.4880179328232256, "learning_rate": 1.711554283065306e-05, "loss": 0.6257, "step": 2118 }, { "epoch": 0.27, "grad_norm": 0.5708419354216029, "learning_rate": 1.7112635095664272e-05, "loss": 0.6167, "step": 2119 }, { "epoch": 0.27, "grad_norm": 0.5649050116926791, "learning_rate": 1.710972614307877e-05, "loss": 0.6214, "step": 2120 }, { "epoch": 0.27, "grad_norm": 0.5999748353722549, "learning_rate": 1.7106815973394533e-05, "loss": 0.6536, "step": 2121 }, { "epoch": 0.27, "grad_norm": 0.575615046083512, "learning_rate": 1.7103904587109745e-05, "loss": 0.7058, "step": 2122 }, { "epoch": 0.27, "grad_norm": 0.5513700205473893, "learning_rate": 1.7100991984722798e-05, "loss": 0.6269, "step": 2123 }, { "epoch": 0.27, "grad_norm": 0.6352639439758307, "learning_rate": 1.709807816673229e-05, "loss": 0.6606, "step": 2124 }, { "epoch": 0.27, "grad_norm": 0.5640394390787117, "learning_rate": 1.709516313363704e-05, "loss": 0.6343, "step": 2125 }, { "epoch": 0.27, "grad_norm": 0.6527905659425342, "learning_rate": 1.709224688593606e-05, "loss": 0.5888, "step": 2126 }, { "epoch": 0.27, "grad_norm": 0.5470268330893874, "learning_rate": 1.7089329424128586e-05, "loss": 0.6351, "step": 2127 }, { "epoch": 0.27, "grad_norm": 0.5532357820053284, "learning_rate": 1.7086410748714035e-05, "loss": 0.6473, "step": 2128 }, { "epoch": 0.27, "grad_norm": 0.5405673784393212, "learning_rate": 1.708349086019206e-05, "loss": 0.6528, "step": 2129 }, { "epoch": 0.27, "grad_norm": 0.5007585724872531, "learning_rate": 1.7080569759062517e-05, "loss": 0.6121, "step": 2130 }, { "epoch": 0.27, "grad_norm": 0.5687885633860142, "learning_rate": 1.7077647445825447e-05, "loss": 0.6549, "step": 2131 }, { "epoch": 0.27, "grad_norm": 0.5092939982985241, "learning_rate": 1.7074723920981123e-05, "loss": 0.6111, "step": 2132 }, { "epoch": 0.27, "grad_norm": 0.5138374905719174, "learning_rate": 1.707179918503002e-05, "loss": 0.6396, "step": 2133 }, { "epoch": 0.27, "grad_norm": 0.5921273923018128, "learning_rate": 1.706887323847281e-05, "loss": 0.6124, "step": 2134 }, { "epoch": 0.27, "grad_norm": 0.5347695581629829, "learning_rate": 1.7065946081810384e-05, "loss": 0.6465, "step": 2135 }, { "epoch": 0.27, "grad_norm": 0.550213949690101, "learning_rate": 1.7063017715543837e-05, "loss": 0.635, "step": 2136 }, { "epoch": 0.27, "grad_norm": 0.5232189845315925, "learning_rate": 1.7060088140174466e-05, "loss": 0.6138, "step": 2137 }, { "epoch": 0.27, "grad_norm": 0.5470114076433944, "learning_rate": 1.7057157356203777e-05, "loss": 0.6178, "step": 2138 }, { "epoch": 0.27, "grad_norm": 0.4937082893993453, "learning_rate": 1.7054225364133493e-05, "loss": 0.5818, "step": 2139 }, { "epoch": 0.27, "grad_norm": 0.5188175441338019, "learning_rate": 1.7051292164465526e-05, "loss": 0.6205, "step": 2140 }, { "epoch": 0.27, "grad_norm": 0.5612225354966803, "learning_rate": 1.7048357757702013e-05, "loss": 0.6616, "step": 2141 }, { "epoch": 0.27, "grad_norm": 0.5063526874384922, "learning_rate": 1.704542214434528e-05, "loss": 0.5867, "step": 2142 }, { "epoch": 0.27, "grad_norm": 0.47121089369891583, "learning_rate": 1.704248532489787e-05, "loss": 0.5907, "step": 2143 }, { "epoch": 0.27, "grad_norm": 0.49727859729968615, "learning_rate": 1.703954729986254e-05, "loss": 0.552, "step": 2144 }, { "epoch": 0.27, "grad_norm": 0.5894214461348221, "learning_rate": 1.7036608069742234e-05, "loss": 0.6164, "step": 2145 }, { "epoch": 0.27, "grad_norm": 0.5543999706269099, "learning_rate": 1.7033667635040115e-05, "loss": 0.6383, "step": 2146 }, { "epoch": 0.27, "grad_norm": 0.48672992367762874, "learning_rate": 1.7030725996259554e-05, "loss": 0.5918, "step": 2147 }, { "epoch": 0.27, "grad_norm": 0.5344950286326945, "learning_rate": 1.702778315390412e-05, "loss": 0.6189, "step": 2148 }, { "epoch": 0.27, "grad_norm": 0.5529134817250072, "learning_rate": 1.7024839108477592e-05, "loss": 0.6251, "step": 2149 }, { "epoch": 0.27, "grad_norm": 0.554117343794066, "learning_rate": 1.7021893860483954e-05, "loss": 0.6675, "step": 2150 }, { "epoch": 0.27, "grad_norm": 0.5236172821669567, "learning_rate": 1.7018947410427395e-05, "loss": 0.6286, "step": 2151 }, { "epoch": 0.27, "grad_norm": 0.532951293818748, "learning_rate": 1.701599975881232e-05, "loss": 0.6162, "step": 2152 }, { "epoch": 0.28, "grad_norm": 0.5464602757222633, "learning_rate": 1.7013050906143322e-05, "loss": 0.5992, "step": 2153 }, { "epoch": 0.28, "grad_norm": 0.613694550589608, "learning_rate": 1.701010085292521e-05, "loss": 0.6379, "step": 2154 }, { "epoch": 0.28, "grad_norm": 0.6255007229870839, "learning_rate": 1.7007149599663005e-05, "loss": 0.6673, "step": 2155 }, { "epoch": 0.28, "grad_norm": 0.5445953885371064, "learning_rate": 1.7004197146861917e-05, "loss": 0.5876, "step": 2156 }, { "epoch": 0.28, "grad_norm": 0.5813071692839253, "learning_rate": 1.7001243495027373e-05, "loss": 0.6575, "step": 2157 }, { "epoch": 0.28, "grad_norm": 0.5410521448798467, "learning_rate": 1.6998288644665006e-05, "loss": 0.611, "step": 2158 }, { "epoch": 0.28, "grad_norm": 0.4931527072938356, "learning_rate": 1.699533259628064e-05, "loss": 0.6334, "step": 2159 }, { "epoch": 0.28, "grad_norm": 0.5586920709673027, "learning_rate": 1.6992375350380327e-05, "loss": 0.6032, "step": 2160 }, { "epoch": 0.28, "grad_norm": 0.5755650267600635, "learning_rate": 1.6989416907470304e-05, "loss": 0.6075, "step": 2161 }, { "epoch": 0.28, "grad_norm": 0.5407030340339801, "learning_rate": 1.6986457268057023e-05, "loss": 0.6327, "step": 2162 }, { "epoch": 0.28, "grad_norm": 0.5283057081785942, "learning_rate": 1.698349643264714e-05, "loss": 0.6293, "step": 2163 }, { "epoch": 0.28, "grad_norm": 0.5045110535828489, "learning_rate": 1.6980534401747507e-05, "loss": 0.6242, "step": 2164 }, { "epoch": 0.28, "grad_norm": 0.5380585569861883, "learning_rate": 1.69775711758652e-05, "loss": 0.5869, "step": 2165 }, { "epoch": 0.28, "grad_norm": 0.5681472003878375, "learning_rate": 1.6974606755507475e-05, "loss": 0.6564, "step": 2166 }, { "epoch": 0.28, "grad_norm": 0.5690609174823863, "learning_rate": 1.697164114118181e-05, "loss": 0.6254, "step": 2167 }, { "epoch": 0.28, "grad_norm": 0.5414695802149793, "learning_rate": 1.696867433339588e-05, "loss": 0.673, "step": 2168 }, { "epoch": 0.28, "grad_norm": 0.5420959032955501, "learning_rate": 1.6965706332657573e-05, "loss": 0.6484, "step": 2169 }, { "epoch": 0.28, "grad_norm": 0.5138019729403539, "learning_rate": 1.6962737139474966e-05, "loss": 0.6156, "step": 2170 }, { "epoch": 0.28, "grad_norm": 0.5645381465578142, "learning_rate": 1.6959766754356355e-05, "loss": 0.6392, "step": 2171 }, { "epoch": 0.28, "grad_norm": 0.4704242409608901, "learning_rate": 1.6956795177810233e-05, "loss": 0.5917, "step": 2172 }, { "epoch": 0.28, "grad_norm": 0.5722805153055524, "learning_rate": 1.6953822410345295e-05, "loss": 0.6779, "step": 2173 }, { "epoch": 0.28, "grad_norm": 0.5940351645045666, "learning_rate": 1.695084845247044e-05, "loss": 0.6448, "step": 2174 }, { "epoch": 0.28, "grad_norm": 0.5268902499084269, "learning_rate": 1.6947873304694784e-05, "loss": 0.6123, "step": 2175 }, { "epoch": 0.28, "grad_norm": 0.5084883141235519, "learning_rate": 1.6944896967527623e-05, "loss": 0.5894, "step": 2176 }, { "epoch": 0.28, "grad_norm": 0.4833892821161022, "learning_rate": 1.694191944147848e-05, "loss": 0.6402, "step": 2177 }, { "epoch": 0.28, "grad_norm": 0.6372893415475722, "learning_rate": 1.6938940727057067e-05, "loss": 0.6467, "step": 2178 }, { "epoch": 0.28, "grad_norm": 0.5860147343466691, "learning_rate": 1.6935960824773307e-05, "loss": 0.6624, "step": 2179 }, { "epoch": 0.28, "grad_norm": 0.5222320206296782, "learning_rate": 1.6932979735137318e-05, "loss": 0.598, "step": 2180 }, { "epoch": 0.28, "grad_norm": 0.5800264527464705, "learning_rate": 1.6929997458659427e-05, "loss": 0.628, "step": 2181 }, { "epoch": 0.28, "grad_norm": 0.5533850372475759, "learning_rate": 1.692701399585017e-05, "loss": 0.6218, "step": 2182 }, { "epoch": 0.28, "grad_norm": 0.5898510809602276, "learning_rate": 1.6924029347220274e-05, "loss": 0.6388, "step": 2183 }, { "epoch": 0.28, "grad_norm": 0.5374916848591483, "learning_rate": 1.6921043513280672e-05, "loss": 0.5953, "step": 2184 }, { "epoch": 0.28, "grad_norm": 0.5325369250810211, "learning_rate": 1.691805649454251e-05, "loss": 0.6529, "step": 2185 }, { "epoch": 0.28, "grad_norm": 0.5326862528441616, "learning_rate": 1.691506829151712e-05, "loss": 0.6114, "step": 2186 }, { "epoch": 0.28, "grad_norm": 0.5037187372302796, "learning_rate": 1.6912078904716057e-05, "loss": 0.5968, "step": 2187 }, { "epoch": 0.28, "grad_norm": 0.5428137358634717, "learning_rate": 1.690908833465106e-05, "loss": 0.6399, "step": 2188 }, { "epoch": 0.28, "grad_norm": 0.5804096568761243, "learning_rate": 1.690609658183408e-05, "loss": 0.6338, "step": 2189 }, { "epoch": 0.28, "grad_norm": 0.4971206131026922, "learning_rate": 1.690310364677727e-05, "loss": 0.6151, "step": 2190 }, { "epoch": 0.28, "grad_norm": 0.5776542738227597, "learning_rate": 1.6900109529992983e-05, "loss": 0.6322, "step": 2191 }, { "epoch": 0.28, "grad_norm": 0.5200623015462296, "learning_rate": 1.6897114231993776e-05, "loss": 0.62, "step": 2192 }, { "epoch": 0.28, "grad_norm": 0.5753186513214527, "learning_rate": 1.689411775329241e-05, "loss": 0.6224, "step": 2193 }, { "epoch": 0.28, "grad_norm": 0.5087469726571346, "learning_rate": 1.6891120094401835e-05, "loss": 0.619, "step": 2194 }, { "epoch": 0.28, "grad_norm": 0.5360748015173817, "learning_rate": 1.6888121255835225e-05, "loss": 0.6586, "step": 2195 }, { "epoch": 0.28, "grad_norm": 0.575706360209097, "learning_rate": 1.6885121238105945e-05, "loss": 0.6671, "step": 2196 }, { "epoch": 0.28, "grad_norm": 0.540975128421046, "learning_rate": 1.6882120041727557e-05, "loss": 0.6587, "step": 2197 }, { "epoch": 0.28, "grad_norm": 0.549942237325279, "learning_rate": 1.6879117667213828e-05, "loss": 0.662, "step": 2198 }, { "epoch": 0.28, "grad_norm": 0.5429069190379784, "learning_rate": 1.687611411507873e-05, "loss": 0.6105, "step": 2199 }, { "epoch": 0.28, "grad_norm": 0.506820019679203, "learning_rate": 1.687310938583644e-05, "loss": 0.5972, "step": 2200 }, { "epoch": 0.28, "grad_norm": 0.5493296922534322, "learning_rate": 1.687010348000132e-05, "loss": 0.613, "step": 2201 }, { "epoch": 0.28, "grad_norm": 0.549259469467641, "learning_rate": 1.6867096398087954e-05, "loss": 0.6338, "step": 2202 }, { "epoch": 0.28, "grad_norm": 0.5622741580927316, "learning_rate": 1.6864088140611115e-05, "loss": 0.6282, "step": 2203 }, { "epoch": 0.28, "grad_norm": 0.5453487819384298, "learning_rate": 1.686107870808578e-05, "loss": 0.6225, "step": 2204 }, { "epoch": 0.28, "grad_norm": 0.5292339765258006, "learning_rate": 1.6858068101027127e-05, "loss": 0.5797, "step": 2205 }, { "epoch": 0.28, "grad_norm": 0.6404672818122775, "learning_rate": 1.6855056319950535e-05, "loss": 0.6485, "step": 2206 }, { "epoch": 0.28, "grad_norm": 0.5385989842989324, "learning_rate": 1.6852043365371583e-05, "loss": 0.6147, "step": 2207 }, { "epoch": 0.28, "grad_norm": 0.575083537892424, "learning_rate": 1.6849029237806055e-05, "loss": 0.6206, "step": 2208 }, { "epoch": 0.28, "grad_norm": 0.5339009857408844, "learning_rate": 1.6846013937769935e-05, "loss": 0.6241, "step": 2209 }, { "epoch": 0.28, "grad_norm": 0.6787743985355988, "learning_rate": 1.6842997465779402e-05, "loss": 0.6796, "step": 2210 }, { "epoch": 0.28, "grad_norm": 0.6563702734859835, "learning_rate": 1.6839979822350838e-05, "loss": 0.6574, "step": 2211 }, { "epoch": 0.28, "grad_norm": 0.6293733697813824, "learning_rate": 1.683696100800083e-05, "loss": 0.6753, "step": 2212 }, { "epoch": 0.28, "grad_norm": 0.5776714770478499, "learning_rate": 1.6833941023246162e-05, "loss": 0.63, "step": 2213 }, { "epoch": 0.28, "grad_norm": 0.5659224354861714, "learning_rate": 1.6830919868603817e-05, "loss": 0.6334, "step": 2214 }, { "epoch": 0.28, "grad_norm": 0.669457718939766, "learning_rate": 1.682789754459098e-05, "loss": 0.6112, "step": 2215 }, { "epoch": 0.28, "grad_norm": 0.5742092893140854, "learning_rate": 1.6824874051725037e-05, "loss": 0.6194, "step": 2216 }, { "epoch": 0.28, "grad_norm": 0.524413916108691, "learning_rate": 1.6821849390523578e-05, "loss": 0.6048, "step": 2217 }, { "epoch": 0.28, "grad_norm": 0.5801452903037029, "learning_rate": 1.681882356150438e-05, "loss": 0.6439, "step": 2218 }, { "epoch": 0.28, "grad_norm": 0.6032303122324185, "learning_rate": 1.6815796565185433e-05, "loss": 0.6165, "step": 2219 }, { "epoch": 0.28, "grad_norm": 0.557605269648875, "learning_rate": 1.681276840208492e-05, "loss": 0.6069, "step": 2220 }, { "epoch": 0.28, "grad_norm": 0.6423066998167971, "learning_rate": 1.6809739072721227e-05, "loss": 0.6478, "step": 2221 }, { "epoch": 0.28, "grad_norm": 0.4823956250784282, "learning_rate": 1.6806708577612937e-05, "loss": 0.5944, "step": 2222 }, { "epoch": 0.28, "grad_norm": 0.5397831359047273, "learning_rate": 1.6803676917278837e-05, "loss": 0.5818, "step": 2223 }, { "epoch": 0.28, "grad_norm": 0.54718133691037, "learning_rate": 1.6800644092237907e-05, "loss": 0.5965, "step": 2224 }, { "epoch": 0.28, "grad_norm": 0.48595979642102943, "learning_rate": 1.679761010300933e-05, "loss": 0.6077, "step": 2225 }, { "epoch": 0.28, "grad_norm": 0.5201363258312037, "learning_rate": 1.6794574950112492e-05, "loss": 0.6078, "step": 2226 }, { "epoch": 0.28, "grad_norm": 0.5051883870081508, "learning_rate": 1.679153863406697e-05, "loss": 0.6159, "step": 2227 }, { "epoch": 0.28, "grad_norm": 0.5341203649705445, "learning_rate": 1.6788501155392544e-05, "loss": 0.6566, "step": 2228 }, { "epoch": 0.28, "grad_norm": 0.523915300094377, "learning_rate": 1.6785462514609202e-05, "loss": 0.6031, "step": 2229 }, { "epoch": 0.28, "grad_norm": 0.6415432707377065, "learning_rate": 1.6782422712237107e-05, "loss": 0.6523, "step": 2230 }, { "epoch": 0.29, "grad_norm": 0.5116680432135962, "learning_rate": 1.677938174879665e-05, "loss": 0.5935, "step": 2231 }, { "epoch": 0.29, "grad_norm": 0.5413440084991145, "learning_rate": 1.6776339624808403e-05, "loss": 0.6145, "step": 2232 }, { "epoch": 0.29, "grad_norm": 0.5574628972009726, "learning_rate": 1.6773296340793135e-05, "loss": 0.6328, "step": 2233 }, { "epoch": 0.29, "grad_norm": 0.5550466669978852, "learning_rate": 1.6770251897271827e-05, "loss": 0.6151, "step": 2234 }, { "epoch": 0.29, "grad_norm": 0.6682574421962434, "learning_rate": 1.6767206294765647e-05, "loss": 0.644, "step": 2235 }, { "epoch": 0.29, "grad_norm": 0.5129959268700852, "learning_rate": 1.6764159533795968e-05, "loss": 0.6055, "step": 2236 }, { "epoch": 0.29, "grad_norm": 0.5348264620909425, "learning_rate": 1.6761111614884352e-05, "loss": 0.6037, "step": 2237 }, { "epoch": 0.29, "grad_norm": 0.6863364286825416, "learning_rate": 1.6758062538552575e-05, "loss": 0.6671, "step": 2238 }, { "epoch": 0.29, "grad_norm": 0.457587977353613, "learning_rate": 1.6755012305322595e-05, "loss": 0.6247, "step": 2239 }, { "epoch": 0.29, "grad_norm": 0.5454817195304177, "learning_rate": 1.6751960915716574e-05, "loss": 0.6227, "step": 2240 }, { "epoch": 0.29, "grad_norm": 0.497883764056223, "learning_rate": 1.6748908370256875e-05, "loss": 0.56, "step": 2241 }, { "epoch": 0.29, "grad_norm": 0.5641772555028829, "learning_rate": 1.6745854669466065e-05, "loss": 0.6352, "step": 2242 }, { "epoch": 0.29, "grad_norm": 0.523536266662608, "learning_rate": 1.6742799813866883e-05, "loss": 0.6246, "step": 2243 }, { "epoch": 0.29, "grad_norm": 0.526076878539938, "learning_rate": 1.6739743803982297e-05, "loss": 0.6149, "step": 2244 }, { "epoch": 0.29, "grad_norm": 0.6232008005021547, "learning_rate": 1.6736686640335455e-05, "loss": 0.6112, "step": 2245 }, { "epoch": 0.29, "grad_norm": 0.5241749367688986, "learning_rate": 1.6733628323449707e-05, "loss": 0.5943, "step": 2246 }, { "epoch": 0.29, "grad_norm": 0.5441649354756466, "learning_rate": 1.6730568853848596e-05, "loss": 0.6332, "step": 2247 }, { "epoch": 0.29, "grad_norm": 0.50543828636438, "learning_rate": 1.6727508232055867e-05, "loss": 0.6289, "step": 2248 }, { "epoch": 0.29, "grad_norm": 0.5669006657615963, "learning_rate": 1.672444645859547e-05, "loss": 0.6082, "step": 2249 }, { "epoch": 0.29, "grad_norm": 0.4717501868606642, "learning_rate": 1.672138353399153e-05, "loss": 0.6309, "step": 2250 }, { "epoch": 0.29, "grad_norm": 0.5700274805054147, "learning_rate": 1.671831945876839e-05, "loss": 0.6442, "step": 2251 }, { "epoch": 0.29, "grad_norm": 0.4612605456446643, "learning_rate": 1.671525423345058e-05, "loss": 0.6082, "step": 2252 }, { "epoch": 0.29, "grad_norm": 0.5559476462432588, "learning_rate": 1.6712187858562834e-05, "loss": 0.6518, "step": 2253 }, { "epoch": 0.29, "grad_norm": 0.5229512750858122, "learning_rate": 1.670912033463007e-05, "loss": 0.6092, "step": 2254 }, { "epoch": 0.29, "grad_norm": 0.5099674690530341, "learning_rate": 1.670605166217742e-05, "loss": 0.583, "step": 2255 }, { "epoch": 0.29, "grad_norm": 0.5369677937595743, "learning_rate": 1.6702981841730196e-05, "loss": 0.6418, "step": 2256 }, { "epoch": 0.29, "grad_norm": 0.46587512561140243, "learning_rate": 1.669991087381392e-05, "loss": 0.6069, "step": 2257 }, { "epoch": 0.29, "grad_norm": 0.5323026250131456, "learning_rate": 1.6696838758954297e-05, "loss": 0.6649, "step": 2258 }, { "epoch": 0.29, "grad_norm": 0.5231409435868638, "learning_rate": 1.6693765497677244e-05, "loss": 0.6159, "step": 2259 }, { "epoch": 0.29, "grad_norm": 0.5906256019202065, "learning_rate": 1.669069109050886e-05, "loss": 0.6115, "step": 2260 }, { "epoch": 0.29, "grad_norm": 0.5819280668462391, "learning_rate": 1.668761553797545e-05, "loss": 0.6522, "step": 2261 }, { "epoch": 0.29, "grad_norm": 0.5023769595801101, "learning_rate": 1.6684538840603506e-05, "loss": 0.6192, "step": 2262 }, { "epoch": 0.29, "grad_norm": 0.49609999666128307, "learning_rate": 1.6681460998919727e-05, "loss": 0.5866, "step": 2263 }, { "epoch": 0.29, "grad_norm": 0.555702012299919, "learning_rate": 1.6678382013451e-05, "loss": 0.5958, "step": 2264 }, { "epoch": 0.29, "grad_norm": 0.6458342306745813, "learning_rate": 1.6675301884724408e-05, "loss": 0.6437, "step": 2265 }, { "epoch": 0.29, "grad_norm": 0.5215118105688241, "learning_rate": 1.6672220613267233e-05, "loss": 0.6225, "step": 2266 }, { "epoch": 0.29, "grad_norm": 0.506344480789305, "learning_rate": 1.6669138199606956e-05, "loss": 0.6105, "step": 2267 }, { "epoch": 0.29, "grad_norm": 0.6266503727317836, "learning_rate": 1.6666054644271238e-05, "loss": 0.6299, "step": 2268 }, { "epoch": 0.29, "grad_norm": 0.5554141661911428, "learning_rate": 1.6662969947787955e-05, "loss": 0.6175, "step": 2269 }, { "epoch": 0.29, "grad_norm": 0.5620073275574369, "learning_rate": 1.665988411068517e-05, "loss": 0.6259, "step": 2270 }, { "epoch": 0.29, "grad_norm": 0.5240690438420537, "learning_rate": 1.6656797133491133e-05, "loss": 0.6363, "step": 2271 }, { "epoch": 0.29, "grad_norm": 0.5586759730260941, "learning_rate": 1.6653709016734306e-05, "loss": 0.6488, "step": 2272 }, { "epoch": 0.29, "grad_norm": 0.493191761859981, "learning_rate": 1.665061976094333e-05, "loss": 0.6178, "step": 2273 }, { "epoch": 0.29, "grad_norm": 0.4847449360473002, "learning_rate": 1.664752936664705e-05, "loss": 0.6019, "step": 2274 }, { "epoch": 0.29, "grad_norm": 0.47011499707508747, "learning_rate": 1.6644437834374508e-05, "loss": 0.6035, "step": 2275 }, { "epoch": 0.29, "grad_norm": 0.524430731020937, "learning_rate": 1.664134516465493e-05, "loss": 0.598, "step": 2276 }, { "epoch": 0.29, "grad_norm": 0.51960695574915, "learning_rate": 1.663825135801775e-05, "loss": 0.624, "step": 2277 }, { "epoch": 0.29, "grad_norm": 0.6960617250270107, "learning_rate": 1.663515641499259e-05, "loss": 0.6968, "step": 2278 }, { "epoch": 0.29, "grad_norm": 0.47855112623068946, "learning_rate": 1.663206033610926e-05, "loss": 0.5784, "step": 2279 }, { "epoch": 0.29, "grad_norm": 0.5140965350181204, "learning_rate": 1.6628963121897774e-05, "loss": 0.6181, "step": 2280 }, { "epoch": 0.29, "grad_norm": 0.5888494710567548, "learning_rate": 1.662586477288834e-05, "loss": 0.6489, "step": 2281 }, { "epoch": 0.29, "grad_norm": 0.5526306238962339, "learning_rate": 1.6622765289611356e-05, "loss": 0.6494, "step": 2282 }, { "epoch": 0.29, "grad_norm": 0.49793707178640056, "learning_rate": 1.661966467259742e-05, "loss": 0.6165, "step": 2283 }, { "epoch": 0.29, "grad_norm": 0.49835367928102503, "learning_rate": 1.661656292237731e-05, "loss": 0.6444, "step": 2284 }, { "epoch": 0.29, "grad_norm": 0.4816282810739686, "learning_rate": 1.661346003948202e-05, "loss": 0.6229, "step": 2285 }, { "epoch": 0.29, "grad_norm": 0.5860581343043223, "learning_rate": 1.6610356024442714e-05, "loss": 0.6157, "step": 2286 }, { "epoch": 0.29, "grad_norm": 0.5458806032467766, "learning_rate": 1.660725087779077e-05, "loss": 0.6367, "step": 2287 }, { "epoch": 0.29, "grad_norm": 0.5358158887801897, "learning_rate": 1.6604144600057746e-05, "loss": 0.614, "step": 2288 }, { "epoch": 0.29, "grad_norm": 0.46152271409492823, "learning_rate": 1.6601037191775405e-05, "loss": 0.5878, "step": 2289 }, { "epoch": 0.29, "grad_norm": 0.5184766562019937, "learning_rate": 1.6597928653475692e-05, "loss": 0.6212, "step": 2290 }, { "epoch": 0.29, "grad_norm": 0.47862834160713225, "learning_rate": 1.659481898569076e-05, "loss": 0.6122, "step": 2291 }, { "epoch": 0.29, "grad_norm": 0.556818573564729, "learning_rate": 1.659170818895293e-05, "loss": 0.622, "step": 2292 }, { "epoch": 0.29, "grad_norm": 0.5647795617526041, "learning_rate": 1.6588596263794752e-05, "loss": 0.6416, "step": 2293 }, { "epoch": 0.29, "grad_norm": 0.6119717910837262, "learning_rate": 1.6585483210748932e-05, "loss": 0.6669, "step": 2294 }, { "epoch": 0.29, "grad_norm": 0.5280034878859624, "learning_rate": 1.65823690303484e-05, "loss": 0.6215, "step": 2295 }, { "epoch": 0.29, "grad_norm": 0.49430569571112015, "learning_rate": 1.6579253723126258e-05, "loss": 0.589, "step": 2296 }, { "epoch": 0.29, "grad_norm": 0.46310934383680175, "learning_rate": 1.657613728961581e-05, "loss": 0.6129, "step": 2297 }, { "epoch": 0.29, "grad_norm": 0.49339882948250174, "learning_rate": 1.6573019730350558e-05, "loss": 0.5769, "step": 2298 }, { "epoch": 0.29, "grad_norm": 0.5785601815783461, "learning_rate": 1.6569901045864186e-05, "loss": 0.6164, "step": 2299 }, { "epoch": 0.29, "grad_norm": 0.4854125965318488, "learning_rate": 1.6566781236690574e-05, "loss": 0.5999, "step": 2300 }, { "epoch": 0.29, "grad_norm": 0.5499908376960829, "learning_rate": 1.6563660303363792e-05, "loss": 0.6417, "step": 2301 }, { "epoch": 0.29, "grad_norm": 0.515408055560331, "learning_rate": 1.656053824641811e-05, "loss": 0.6249, "step": 2302 }, { "epoch": 0.29, "grad_norm": 0.5474491807503067, "learning_rate": 1.6557415066387985e-05, "loss": 0.6448, "step": 2303 }, { "epoch": 0.29, "grad_norm": 0.5690459990456339, "learning_rate": 1.655429076380807e-05, "loss": 0.5873, "step": 2304 }, { "epoch": 0.29, "grad_norm": 0.5100736246672221, "learning_rate": 1.6551165339213208e-05, "loss": 0.6139, "step": 2305 }, { "epoch": 0.29, "grad_norm": 0.5721346933530593, "learning_rate": 1.654803879313843e-05, "loss": 0.6596, "step": 2306 }, { "epoch": 0.29, "grad_norm": 0.5563666123857263, "learning_rate": 1.6544911126118962e-05, "loss": 0.6303, "step": 2307 }, { "epoch": 0.29, "grad_norm": 0.5371700627469171, "learning_rate": 1.654178233869023e-05, "loss": 0.6161, "step": 2308 }, { "epoch": 0.29, "grad_norm": 0.5655037436100843, "learning_rate": 1.6538652431387836e-05, "loss": 0.5969, "step": 2309 }, { "epoch": 0.3, "grad_norm": 0.5383460011536592, "learning_rate": 1.6535521404747584e-05, "loss": 0.624, "step": 2310 }, { "epoch": 0.3, "grad_norm": 0.5955311277382024, "learning_rate": 1.653238925930547e-05, "loss": 0.6497, "step": 2311 }, { "epoch": 0.3, "grad_norm": 0.5715261956895256, "learning_rate": 1.652925599559768e-05, "loss": 0.6173, "step": 2312 }, { "epoch": 0.3, "grad_norm": 0.5293177361427684, "learning_rate": 1.652612161416059e-05, "loss": 0.6028, "step": 2313 }, { "epoch": 0.3, "grad_norm": 0.6755417541471987, "learning_rate": 1.6522986115530767e-05, "loss": 0.6142, "step": 2314 }, { "epoch": 0.3, "grad_norm": 0.514976676636031, "learning_rate": 1.6519849500244968e-05, "loss": 0.5889, "step": 2315 }, { "epoch": 0.3, "grad_norm": 0.5537251218014558, "learning_rate": 1.6516711768840145e-05, "loss": 0.655, "step": 2316 }, { "epoch": 0.3, "grad_norm": 0.5170173666050636, "learning_rate": 1.6513572921853445e-05, "loss": 0.6184, "step": 2317 }, { "epoch": 0.3, "grad_norm": 0.5328644814363522, "learning_rate": 1.6510432959822192e-05, "loss": 0.6111, "step": 2318 }, { "epoch": 0.3, "grad_norm": 0.5105049091613754, "learning_rate": 1.6507291883283913e-05, "loss": 0.6086, "step": 2319 }, { "epoch": 0.3, "grad_norm": 0.6072594209445937, "learning_rate": 1.6504149692776326e-05, "loss": 0.6196, "step": 2320 }, { "epoch": 0.3, "grad_norm": 0.5161619824814927, "learning_rate": 1.650100638883733e-05, "loss": 0.5984, "step": 2321 }, { "epoch": 0.3, "grad_norm": 0.6119629651529007, "learning_rate": 1.6497861972005022e-05, "loss": 0.6404, "step": 2322 }, { "epoch": 0.3, "grad_norm": 0.6057136032215771, "learning_rate": 1.6494716442817685e-05, "loss": 0.6152, "step": 2323 }, { "epoch": 0.3, "grad_norm": 0.5017187616351187, "learning_rate": 1.6491569801813804e-05, "loss": 0.5828, "step": 2324 }, { "epoch": 0.3, "grad_norm": 0.6193317791323074, "learning_rate": 1.648842204953204e-05, "loss": 0.6219, "step": 2325 }, { "epoch": 0.3, "grad_norm": 0.6152876275074294, "learning_rate": 1.6485273186511247e-05, "loss": 0.6137, "step": 2326 }, { "epoch": 0.3, "grad_norm": 0.6233251646587248, "learning_rate": 1.648212321329048e-05, "loss": 0.6378, "step": 2327 }, { "epoch": 0.3, "grad_norm": 0.5720933519736014, "learning_rate": 1.647897213040897e-05, "loss": 0.6222, "step": 2328 }, { "epoch": 0.3, "grad_norm": 0.5434475895257047, "learning_rate": 1.6475819938406146e-05, "loss": 0.6177, "step": 2329 }, { "epoch": 0.3, "grad_norm": 0.6328164997170703, "learning_rate": 1.647266663782163e-05, "loss": 0.6739, "step": 2330 }, { "epoch": 0.3, "grad_norm": 0.4585748153802035, "learning_rate": 1.6469512229195218e-05, "loss": 0.5833, "step": 2331 }, { "epoch": 0.3, "grad_norm": 0.5337319421943721, "learning_rate": 1.6466356713066913e-05, "loss": 0.6417, "step": 2332 }, { "epoch": 0.3, "grad_norm": 0.5162284468948215, "learning_rate": 1.6463200089976897e-05, "loss": 0.596, "step": 2333 }, { "epoch": 0.3, "grad_norm": 0.5203247447514779, "learning_rate": 1.6460042360465553e-05, "loss": 0.6171, "step": 2334 }, { "epoch": 0.3, "grad_norm": 0.557662389325728, "learning_rate": 1.645688352507344e-05, "loss": 0.6022, "step": 2335 }, { "epoch": 0.3, "grad_norm": 0.6444567610042592, "learning_rate": 1.6453723584341316e-05, "loss": 0.6948, "step": 2336 }, { "epoch": 0.3, "grad_norm": 0.5311967657320302, "learning_rate": 1.6450562538810117e-05, "loss": 0.6272, "step": 2337 }, { "epoch": 0.3, "grad_norm": 0.5652282407148399, "learning_rate": 1.6447400389020983e-05, "loss": 0.6079, "step": 2338 }, { "epoch": 0.3, "grad_norm": 0.5194014218522347, "learning_rate": 1.6444237135515235e-05, "loss": 0.6315, "step": 2339 }, { "epoch": 0.3, "grad_norm": 0.5946810218847203, "learning_rate": 1.6441072778834382e-05, "loss": 0.6398, "step": 2340 }, { "epoch": 0.3, "grad_norm": 0.5854997240508367, "learning_rate": 1.6437907319520123e-05, "loss": 0.6166, "step": 2341 }, { "epoch": 0.3, "grad_norm": 0.5487174918709499, "learning_rate": 1.6434740758114345e-05, "loss": 0.6116, "step": 2342 }, { "epoch": 0.3, "grad_norm": 0.564614104868581, "learning_rate": 1.643157309515913e-05, "loss": 0.6437, "step": 2343 }, { "epoch": 0.3, "grad_norm": 0.5520596808177417, "learning_rate": 1.6428404331196738e-05, "loss": 0.6247, "step": 2344 }, { "epoch": 0.3, "grad_norm": 0.5884978116541607, "learning_rate": 1.6425234466769624e-05, "loss": 0.5906, "step": 2345 }, { "epoch": 0.3, "grad_norm": 0.5359983709620303, "learning_rate": 1.6422063502420434e-05, "loss": 0.629, "step": 2346 }, { "epoch": 0.3, "grad_norm": 0.6873242847814267, "learning_rate": 1.6418891438691992e-05, "loss": 0.6911, "step": 2347 }, { "epoch": 0.3, "grad_norm": 0.5311294240834452, "learning_rate": 1.6415718276127322e-05, "loss": 0.6146, "step": 2348 }, { "epoch": 0.3, "grad_norm": 0.5468343266855415, "learning_rate": 1.641254401526963e-05, "loss": 0.6209, "step": 2349 }, { "epoch": 0.3, "grad_norm": 0.5392310694676485, "learning_rate": 1.6409368656662308e-05, "loss": 0.6293, "step": 2350 }, { "epoch": 0.3, "grad_norm": 0.4976673255882988, "learning_rate": 1.6406192200848944e-05, "loss": 0.6156, "step": 2351 }, { "epoch": 0.3, "grad_norm": 0.5321752198768023, "learning_rate": 1.64030146483733e-05, "loss": 0.6338, "step": 2352 }, { "epoch": 0.3, "grad_norm": 0.4953290044920598, "learning_rate": 1.6399835999779345e-05, "loss": 0.6345, "step": 2353 }, { "epoch": 0.3, "grad_norm": 0.4442491128143689, "learning_rate": 1.639665625561122e-05, "loss": 0.6017, "step": 2354 }, { "epoch": 0.3, "grad_norm": 0.5824819972452014, "learning_rate": 1.639347541641326e-05, "loss": 0.6164, "step": 2355 }, { "epoch": 0.3, "grad_norm": 0.5387768836206691, "learning_rate": 1.639029348272998e-05, "loss": 0.6178, "step": 2356 }, { "epoch": 0.3, "grad_norm": 0.4929916219652808, "learning_rate": 1.6387110455106095e-05, "loss": 0.6044, "step": 2357 }, { "epoch": 0.3, "grad_norm": 0.48805167323023163, "learning_rate": 1.63839263340865e-05, "loss": 0.615, "step": 2358 }, { "epoch": 0.3, "grad_norm": 0.5716084207623416, "learning_rate": 1.6380741120216275e-05, "loss": 0.5964, "step": 2359 }, { "epoch": 0.3, "grad_norm": 0.5301819306403367, "learning_rate": 1.6377554814040693e-05, "loss": 0.6064, "step": 2360 }, { "epoch": 0.3, "grad_norm": 0.6229285347560934, "learning_rate": 1.6374367416105206e-05, "loss": 0.6531, "step": 2361 }, { "epoch": 0.3, "grad_norm": 0.5414695558331561, "learning_rate": 1.6371178926955466e-05, "loss": 0.6301, "step": 2362 }, { "epoch": 0.3, "grad_norm": 0.6066926301138472, "learning_rate": 1.6367989347137293e-05, "loss": 0.625, "step": 2363 }, { "epoch": 0.3, "grad_norm": 0.6020315800520486, "learning_rate": 1.6364798677196713e-05, "loss": 0.6778, "step": 2364 }, { "epoch": 0.3, "grad_norm": 0.48391907404699025, "learning_rate": 1.6361606917679926e-05, "loss": 0.5826, "step": 2365 }, { "epoch": 0.3, "grad_norm": 0.5681692431872015, "learning_rate": 1.6358414069133326e-05, "loss": 0.648, "step": 2366 }, { "epoch": 0.3, "grad_norm": 0.4843619085372432, "learning_rate": 1.635522013210349e-05, "loss": 0.6174, "step": 2367 }, { "epoch": 0.3, "grad_norm": 0.5320838870793322, "learning_rate": 1.6352025107137173e-05, "loss": 0.6125, "step": 2368 }, { "epoch": 0.3, "grad_norm": 0.5312102113695647, "learning_rate": 1.634882899478133e-05, "loss": 0.6042, "step": 2369 }, { "epoch": 0.3, "grad_norm": 0.572571507476506, "learning_rate": 1.63456317955831e-05, "loss": 0.6314, "step": 2370 }, { "epoch": 0.3, "grad_norm": 0.5290449888068212, "learning_rate": 1.63424335100898e-05, "loss": 0.6745, "step": 2371 }, { "epoch": 0.3, "grad_norm": 0.5275799162074238, "learning_rate": 1.6339234138848935e-05, "loss": 0.6048, "step": 2372 }, { "epoch": 0.3, "grad_norm": 0.6125345481042559, "learning_rate": 1.6336033682408205e-05, "loss": 0.6708, "step": 2373 }, { "epoch": 0.3, "grad_norm": 0.5510997000851711, "learning_rate": 1.6332832141315485e-05, "loss": 0.6176, "step": 2374 }, { "epoch": 0.3, "grad_norm": 0.5992612148375973, "learning_rate": 1.6329629516118845e-05, "loss": 0.653, "step": 2375 }, { "epoch": 0.3, "grad_norm": 0.5666471305357147, "learning_rate": 1.632642580736653e-05, "loss": 0.6549, "step": 2376 }, { "epoch": 0.3, "grad_norm": 0.5169593978983071, "learning_rate": 1.632322101560698e-05, "loss": 0.6325, "step": 2377 }, { "epoch": 0.3, "grad_norm": 0.533574909629546, "learning_rate": 1.6320015141388814e-05, "loss": 0.6222, "step": 2378 }, { "epoch": 0.3, "grad_norm": 0.5168629262029125, "learning_rate": 1.631680818526084e-05, "loss": 0.5765, "step": 2379 }, { "epoch": 0.3, "grad_norm": 0.5364045447953621, "learning_rate": 1.6313600147772046e-05, "loss": 0.599, "step": 2380 }, { "epoch": 0.3, "grad_norm": 0.5743095430099174, "learning_rate": 1.6310391029471615e-05, "loss": 0.6155, "step": 2381 }, { "epoch": 0.3, "grad_norm": 0.6129899972650932, "learning_rate": 1.6307180830908907e-05, "loss": 0.6559, "step": 2382 }, { "epoch": 0.3, "grad_norm": 0.49409388379866986, "learning_rate": 1.6303969552633464e-05, "loss": 0.6218, "step": 2383 }, { "epoch": 0.3, "grad_norm": 0.6446433399864748, "learning_rate": 1.6300757195195026e-05, "loss": 0.6303, "step": 2384 }, { "epoch": 0.3, "grad_norm": 0.5947550232342159, "learning_rate": 1.6297543759143503e-05, "loss": 0.6469, "step": 2385 }, { "epoch": 0.3, "grad_norm": 0.5267357704894643, "learning_rate": 1.6294329245029e-05, "loss": 0.6113, "step": 2386 }, { "epoch": 0.3, "grad_norm": 0.5395832435145995, "learning_rate": 1.62911136534018e-05, "loss": 0.6177, "step": 2387 }, { "epoch": 0.31, "grad_norm": 0.5528506046665987, "learning_rate": 1.6287896984812376e-05, "loss": 0.6347, "step": 2388 }, { "epoch": 0.31, "grad_norm": 0.5356963787074257, "learning_rate": 1.6284679239811385e-05, "loss": 0.6398, "step": 2389 }, { "epoch": 0.31, "grad_norm": 0.5844034852700881, "learning_rate": 1.628146041894966e-05, "loss": 0.6472, "step": 2390 }, { "epoch": 0.31, "grad_norm": 0.5068194263360998, "learning_rate": 1.6278240522778224e-05, "loss": 0.6075, "step": 2391 }, { "epoch": 0.31, "grad_norm": 0.5643050788276555, "learning_rate": 1.627501955184829e-05, "loss": 0.6227, "step": 2392 }, { "epoch": 0.31, "grad_norm": 0.6472686116916954, "learning_rate": 1.627179750671124e-05, "loss": 0.6315, "step": 2393 }, { "epoch": 0.31, "grad_norm": 0.5589549170058418, "learning_rate": 1.6268574387918658e-05, "loss": 0.6298, "step": 2394 }, { "epoch": 0.31, "grad_norm": 0.5296384279129951, "learning_rate": 1.6265350196022298e-05, "loss": 0.6132, "step": 2395 }, { "epoch": 0.31, "grad_norm": 0.5269914120791137, "learning_rate": 1.6262124931574107e-05, "loss": 0.6177, "step": 2396 }, { "epoch": 0.31, "grad_norm": 0.5462343324614602, "learning_rate": 1.6258898595126205e-05, "loss": 0.6343, "step": 2397 }, { "epoch": 0.31, "grad_norm": 0.5344021228561113, "learning_rate": 1.6255671187230905e-05, "loss": 0.6024, "step": 2398 }, { "epoch": 0.31, "grad_norm": 0.5309316528772344, "learning_rate": 1.62524427084407e-05, "loss": 0.6199, "step": 2399 }, { "epoch": 0.31, "grad_norm": 0.5038629942401484, "learning_rate": 1.6249213159308265e-05, "loss": 0.6051, "step": 2400 }, { "epoch": 0.31, "grad_norm": 0.4822854488562515, "learning_rate": 1.624598254038646e-05, "loss": 0.5838, "step": 2401 }, { "epoch": 0.31, "grad_norm": 0.5857465575368495, "learning_rate": 1.624275085222833e-05, "loss": 0.6609, "step": 2402 }, { "epoch": 0.31, "grad_norm": 0.5543333261947206, "learning_rate": 1.62395180953871e-05, "loss": 0.6093, "step": 2403 }, { "epoch": 0.31, "grad_norm": 0.48503158005160135, "learning_rate": 1.623628427041617e-05, "loss": 0.598, "step": 2404 }, { "epoch": 0.31, "grad_norm": 0.5920755975326165, "learning_rate": 1.6233049377869146e-05, "loss": 0.6616, "step": 2405 }, { "epoch": 0.31, "grad_norm": 0.6044044389721821, "learning_rate": 1.6229813418299795e-05, "loss": 0.6111, "step": 2406 }, { "epoch": 0.31, "grad_norm": 0.4849257991685663, "learning_rate": 1.622657639226207e-05, "loss": 0.6251, "step": 2407 }, { "epoch": 0.31, "grad_norm": 0.5826842551657972, "learning_rate": 1.622333830031012e-05, "loss": 0.6297, "step": 2408 }, { "epoch": 0.31, "grad_norm": 0.5887119227197668, "learning_rate": 1.6220099142998262e-05, "loss": 0.6175, "step": 2409 }, { "epoch": 0.31, "grad_norm": 0.5288177631533838, "learning_rate": 1.6216858920881e-05, "loss": 0.6361, "step": 2410 }, { "epoch": 0.31, "grad_norm": 0.48648407661694903, "learning_rate": 1.621361763451301e-05, "loss": 0.571, "step": 2411 }, { "epoch": 0.31, "grad_norm": 0.5828661729311938, "learning_rate": 1.6210375284449186e-05, "loss": 0.6188, "step": 2412 }, { "epoch": 0.31, "grad_norm": 0.631839061666036, "learning_rate": 1.6207131871244558e-05, "loss": 0.6699, "step": 2413 }, { "epoch": 0.31, "grad_norm": 0.5848739438611131, "learning_rate": 1.6203887395454367e-05, "loss": 0.6121, "step": 2414 }, { "epoch": 0.31, "grad_norm": 0.7139276525230883, "learning_rate": 1.6200641857634026e-05, "loss": 0.6803, "step": 2415 }, { "epoch": 0.31, "grad_norm": 0.4717695633797493, "learning_rate": 1.6197395258339135e-05, "loss": 0.6267, "step": 2416 }, { "epoch": 0.31, "grad_norm": 0.5158884398848107, "learning_rate": 1.6194147598125468e-05, "loss": 0.6131, "step": 2417 }, { "epoch": 0.31, "grad_norm": 0.5035634047884731, "learning_rate": 1.6190898877548982e-05, "loss": 0.6129, "step": 2418 }, { "epoch": 0.31, "grad_norm": 0.4691036958683673, "learning_rate": 1.6187649097165827e-05, "loss": 0.604, "step": 2419 }, { "epoch": 0.31, "grad_norm": 0.5005005429932465, "learning_rate": 1.6184398257532322e-05, "loss": 0.6039, "step": 2420 }, { "epoch": 0.31, "grad_norm": 0.49336487232956455, "learning_rate": 1.618114635920497e-05, "loss": 0.5706, "step": 2421 }, { "epoch": 0.31, "grad_norm": 0.5620990312398872, "learning_rate": 1.6177893402740457e-05, "loss": 0.6423, "step": 2422 }, { "epoch": 0.31, "grad_norm": 0.5963522489893971, "learning_rate": 1.6174639388695653e-05, "loss": 0.6408, "step": 2423 }, { "epoch": 0.31, "grad_norm": 0.5044126085559646, "learning_rate": 1.61713843176276e-05, "loss": 0.6097, "step": 2424 }, { "epoch": 0.31, "grad_norm": 0.5251785590460109, "learning_rate": 1.616812819009353e-05, "loss": 0.6218, "step": 2425 }, { "epoch": 0.31, "grad_norm": 0.5041447313536171, "learning_rate": 1.616487100665085e-05, "loss": 0.5893, "step": 2426 }, { "epoch": 0.31, "grad_norm": 0.5911091357724128, "learning_rate": 1.6161612767857156e-05, "loss": 0.6895, "step": 2427 }, { "epoch": 0.31, "grad_norm": 0.5556678237006294, "learning_rate": 1.6158353474270216e-05, "loss": 0.6282, "step": 2428 }, { "epoch": 0.31, "grad_norm": 0.5374145259800556, "learning_rate": 1.6155093126447977e-05, "loss": 0.6058, "step": 2429 }, { "epoch": 0.31, "grad_norm": 0.4708766151803712, "learning_rate": 1.6151831724948576e-05, "loss": 0.5999, "step": 2430 }, { "epoch": 0.31, "grad_norm": 0.5517639421215399, "learning_rate": 1.6148569270330323e-05, "loss": 0.6385, "step": 2431 }, { "epoch": 0.31, "grad_norm": 0.5520024939763359, "learning_rate": 1.614530576315171e-05, "loss": 0.6131, "step": 2432 }, { "epoch": 0.31, "grad_norm": 0.6030935295679745, "learning_rate": 1.6142041203971417e-05, "loss": 0.6698, "step": 2433 }, { "epoch": 0.31, "grad_norm": 0.5432317859281127, "learning_rate": 1.6138775593348288e-05, "loss": 0.6449, "step": 2434 }, { "epoch": 0.31, "grad_norm": 0.5297321479516612, "learning_rate": 1.6135508931841356e-05, "loss": 0.5933, "step": 2435 }, { "epoch": 0.31, "grad_norm": 0.5905590998687068, "learning_rate": 1.613224122000984e-05, "loss": 0.6159, "step": 2436 }, { "epoch": 0.31, "grad_norm": 0.5380868448727981, "learning_rate": 1.6128972458413128e-05, "loss": 0.6336, "step": 2437 }, { "epoch": 0.31, "grad_norm": 0.5320387485054694, "learning_rate": 1.6125702647610792e-05, "loss": 0.6133, "step": 2438 }, { "epoch": 0.31, "grad_norm": 0.5721443015421228, "learning_rate": 1.612243178816259e-05, "loss": 0.636, "step": 2439 }, { "epoch": 0.31, "grad_norm": 0.5297870518978494, "learning_rate": 1.6119159880628443e-05, "loss": 0.6239, "step": 2440 }, { "epoch": 0.31, "grad_norm": 0.6262952935788447, "learning_rate": 1.611588692556847e-05, "loss": 0.6306, "step": 2441 }, { "epoch": 0.31, "grad_norm": 0.6113898241237021, "learning_rate": 1.6112612923542962e-05, "loss": 0.6312, "step": 2442 }, { "epoch": 0.31, "grad_norm": 0.5350657842840677, "learning_rate": 1.6109337875112384e-05, "loss": 0.6349, "step": 2443 }, { "epoch": 0.31, "grad_norm": 0.5756730833911952, "learning_rate": 1.610606178083738e-05, "loss": 0.6424, "step": 2444 }, { "epoch": 0.31, "grad_norm": 0.5813013113943656, "learning_rate": 1.610278464127879e-05, "loss": 0.6514, "step": 2445 }, { "epoch": 0.31, "grad_norm": 0.5620161971012798, "learning_rate": 1.6099506456997613e-05, "loss": 0.6455, "step": 2446 }, { "epoch": 0.31, "grad_norm": 0.526949143414107, "learning_rate": 1.609622722855503e-05, "loss": 0.6008, "step": 2447 }, { "epoch": 0.31, "grad_norm": 0.49423485618581114, "learning_rate": 1.6092946956512415e-05, "loss": 0.5564, "step": 2448 }, { "epoch": 0.31, "grad_norm": 0.5594851373133793, "learning_rate": 1.6089665641431302e-05, "loss": 0.6237, "step": 2449 }, { "epoch": 0.31, "grad_norm": 0.4699924932595956, "learning_rate": 1.608638328387342e-05, "loss": 0.6324, "step": 2450 }, { "epoch": 0.31, "grad_norm": 0.5689029416465073, "learning_rate": 1.6083099884400662e-05, "loss": 0.6863, "step": 2451 }, { "epoch": 0.31, "grad_norm": 0.6235607927060557, "learning_rate": 1.6079815443575107e-05, "loss": 0.6527, "step": 2452 }, { "epoch": 0.31, "grad_norm": 0.5712753751820473, "learning_rate": 1.6076529961959018e-05, "loss": 0.6493, "step": 2453 }, { "epoch": 0.31, "grad_norm": 0.6270796197878536, "learning_rate": 1.6073243440114824e-05, "loss": 0.6481, "step": 2454 }, { "epoch": 0.31, "grad_norm": 0.5757357093868711, "learning_rate": 1.6069955878605135e-05, "loss": 0.6312, "step": 2455 }, { "epoch": 0.31, "grad_norm": 0.5592715588017454, "learning_rate": 1.6066667277992747e-05, "loss": 0.5998, "step": 2456 }, { "epoch": 0.31, "grad_norm": 0.4913810286080838, "learning_rate": 1.6063377638840627e-05, "loss": 0.6327, "step": 2457 }, { "epoch": 0.31, "grad_norm": 0.5639232495311299, "learning_rate": 1.606008696171192e-05, "loss": 0.6468, "step": 2458 }, { "epoch": 0.31, "grad_norm": 0.5546703827042653, "learning_rate": 1.6056795247169948e-05, "loss": 0.5937, "step": 2459 }, { "epoch": 0.31, "grad_norm": 0.619119769411886, "learning_rate": 1.605350249577822e-05, "loss": 0.634, "step": 2460 }, { "epoch": 0.31, "grad_norm": 0.46407996147576575, "learning_rate": 1.6050208708100407e-05, "loss": 0.6248, "step": 2461 }, { "epoch": 0.31, "grad_norm": 0.4825035688737674, "learning_rate": 1.604691388470037e-05, "loss": 0.6184, "step": 2462 }, { "epoch": 0.31, "grad_norm": 0.5049947544046218, "learning_rate": 1.6043618026142143e-05, "loss": 0.6054, "step": 2463 }, { "epoch": 0.31, "grad_norm": 0.5039284483584724, "learning_rate": 1.6040321132989933e-05, "loss": 0.5701, "step": 2464 }, { "epoch": 0.31, "grad_norm": 0.5114719117400629, "learning_rate": 1.6037023205808127e-05, "loss": 0.5917, "step": 2465 }, { "epoch": 0.32, "grad_norm": 0.5437217937432626, "learning_rate": 1.60337242451613e-05, "loss": 0.6017, "step": 2466 }, { "epoch": 0.32, "grad_norm": 0.5634371154232506, "learning_rate": 1.6030424251614187e-05, "loss": 0.6352, "step": 2467 }, { "epoch": 0.32, "grad_norm": 0.5425357265706898, "learning_rate": 1.6027123225731703e-05, "loss": 0.6194, "step": 2468 }, { "epoch": 0.32, "grad_norm": 0.5388841316752552, "learning_rate": 1.602382116807895e-05, "loss": 0.6119, "step": 2469 }, { "epoch": 0.32, "grad_norm": 0.533758020847645, "learning_rate": 1.60205180792212e-05, "loss": 0.6278, "step": 2470 }, { "epoch": 0.32, "grad_norm": 0.4924659236878759, "learning_rate": 1.6017213959723896e-05, "loss": 0.588, "step": 2471 }, { "epoch": 0.32, "grad_norm": 0.5701992507477753, "learning_rate": 1.6013908810152666e-05, "loss": 0.6076, "step": 2472 }, { "epoch": 0.32, "grad_norm": 0.5679423725431146, "learning_rate": 1.6010602631073316e-05, "loss": 0.6356, "step": 2473 }, { "epoch": 0.32, "grad_norm": 0.5285272558905046, "learning_rate": 1.600729542305182e-05, "loss": 0.6117, "step": 2474 }, { "epoch": 0.32, "grad_norm": 0.5326763427582872, "learning_rate": 1.6003987186654327e-05, "loss": 0.6111, "step": 2475 }, { "epoch": 0.32, "grad_norm": 0.6257796866412061, "learning_rate": 1.6000677922447175e-05, "loss": 0.6342, "step": 2476 }, { "epoch": 0.32, "grad_norm": 0.5572096868795108, "learning_rate": 1.599736763099687e-05, "loss": 0.6132, "step": 2477 }, { "epoch": 0.32, "grad_norm": 0.5231191659309944, "learning_rate": 1.5994056312870084e-05, "loss": 0.6276, "step": 2478 }, { "epoch": 0.32, "grad_norm": 0.7694289262615335, "learning_rate": 1.5990743968633682e-05, "loss": 0.6997, "step": 2479 }, { "epoch": 0.32, "grad_norm": 0.5314992866726228, "learning_rate": 1.5987430598854697e-05, "loss": 0.6553, "step": 2480 }, { "epoch": 0.32, "grad_norm": 0.5355177240961927, "learning_rate": 1.5984116204100333e-05, "loss": 0.6048, "step": 2481 }, { "epoch": 0.32, "grad_norm": 0.6413187879986982, "learning_rate": 1.598080078493798e-05, "loss": 0.6511, "step": 2482 }, { "epoch": 0.32, "grad_norm": 0.5278321711851165, "learning_rate": 1.5977484341935194e-05, "loss": 0.6351, "step": 2483 }, { "epoch": 0.32, "grad_norm": 0.4767032016022338, "learning_rate": 1.5974166875659714e-05, "loss": 0.5973, "step": 2484 }, { "epoch": 0.32, "grad_norm": 0.5534013256299052, "learning_rate": 1.5970848386679442e-05, "loss": 0.6138, "step": 2485 }, { "epoch": 0.32, "grad_norm": 0.6051973072859492, "learning_rate": 1.596752887556247e-05, "loss": 0.6262, "step": 2486 }, { "epoch": 0.32, "grad_norm": 0.5410418826295887, "learning_rate": 1.5964208342877054e-05, "loss": 0.6156, "step": 2487 }, { "epoch": 0.32, "grad_norm": 0.5801003024647757, "learning_rate": 1.5960886789191632e-05, "loss": 0.5991, "step": 2488 }, { "epoch": 0.32, "grad_norm": 0.47599544123183984, "learning_rate": 1.595756421507481e-05, "loss": 0.6062, "step": 2489 }, { "epoch": 0.32, "grad_norm": 0.5060185326458714, "learning_rate": 1.5954240621095375e-05, "loss": 0.609, "step": 2490 }, { "epoch": 0.32, "grad_norm": 0.5030353871221855, "learning_rate": 1.5950916007822285e-05, "loss": 0.6276, "step": 2491 }, { "epoch": 0.32, "grad_norm": 0.516613901365016, "learning_rate": 1.594759037582468e-05, "loss": 0.6041, "step": 2492 }, { "epoch": 0.32, "grad_norm": 0.5351464847460996, "learning_rate": 1.594426372567185e-05, "loss": 0.6217, "step": 2493 }, { "epoch": 0.32, "grad_norm": 0.5562367780621209, "learning_rate": 1.5940936057933293e-05, "loss": 0.6008, "step": 2494 }, { "epoch": 0.32, "grad_norm": 0.4722720878487809, "learning_rate": 1.5937607373178665e-05, "loss": 0.6066, "step": 2495 }, { "epoch": 0.32, "grad_norm": 0.5428564818201921, "learning_rate": 1.5934277671977788e-05, "loss": 0.6165, "step": 2496 }, { "epoch": 0.32, "grad_norm": 0.49325382236467186, "learning_rate": 1.593094695490067e-05, "loss": 0.6071, "step": 2497 }, { "epoch": 0.32, "grad_norm": 0.5493903111691743, "learning_rate": 1.5927615222517494e-05, "loss": 0.6228, "step": 2498 }, { "epoch": 0.32, "grad_norm": 0.5378967750014273, "learning_rate": 1.5924282475398607e-05, "loss": 0.612, "step": 2499 }, { "epoch": 0.32, "grad_norm": 0.5231639107494029, "learning_rate": 1.592094871411453e-05, "loss": 0.6154, "step": 2500 }, { "epoch": 0.32, "grad_norm": 0.5111806663766852, "learning_rate": 1.5917613939235974e-05, "loss": 0.6263, "step": 2501 }, { "epoch": 0.32, "grad_norm": 0.5121924873663498, "learning_rate": 1.5914278151333804e-05, "loss": 0.6205, "step": 2502 }, { "epoch": 0.32, "grad_norm": 0.5450093704450596, "learning_rate": 1.591094135097907e-05, "loss": 0.6022, "step": 2503 }, { "epoch": 0.32, "grad_norm": 0.5139512299278968, "learning_rate": 1.590760353874299e-05, "loss": 0.5924, "step": 2504 }, { "epoch": 0.32, "grad_norm": 0.5240916452373449, "learning_rate": 1.590426471519696e-05, "loss": 0.5814, "step": 2505 }, { "epoch": 0.32, "grad_norm": 0.5689153159774921, "learning_rate": 1.5900924880912535e-05, "loss": 0.6961, "step": 2506 }, { "epoch": 0.32, "grad_norm": 0.5139266571804061, "learning_rate": 1.5897584036461466e-05, "loss": 0.6224, "step": 2507 }, { "epoch": 0.32, "grad_norm": 0.5493668297930275, "learning_rate": 1.5894242182415663e-05, "loss": 0.6242, "step": 2508 }, { "epoch": 0.32, "grad_norm": 0.6488213086818879, "learning_rate": 1.589089931934721e-05, "loss": 0.6244, "step": 2509 }, { "epoch": 0.32, "grad_norm": 0.5259417399950728, "learning_rate": 1.5887555447828363e-05, "loss": 0.6249, "step": 2510 }, { "epoch": 0.32, "grad_norm": 0.5486347678822044, "learning_rate": 1.588421056843155e-05, "loss": 0.6288, "step": 2511 }, { "epoch": 0.32, "grad_norm": 0.5991591389428057, "learning_rate": 1.5880864681729376e-05, "loss": 0.6399, "step": 2512 }, { "epoch": 0.32, "grad_norm": 0.5794455751821588, "learning_rate": 1.587751778829462e-05, "loss": 0.5833, "step": 2513 }, { "epoch": 0.32, "grad_norm": 0.5715647950024567, "learning_rate": 1.5874169888700222e-05, "loss": 0.6902, "step": 2514 }, { "epoch": 0.32, "grad_norm": 0.5964037254516703, "learning_rate": 1.587082098351931e-05, "loss": 0.6317, "step": 2515 }, { "epoch": 0.32, "grad_norm": 0.5661892434218592, "learning_rate": 1.586747107332517e-05, "loss": 0.6046, "step": 2516 }, { "epoch": 0.32, "grad_norm": 0.6031045918388638, "learning_rate": 1.5864120158691268e-05, "loss": 0.5833, "step": 2517 }, { "epoch": 0.32, "grad_norm": 0.4935744252060136, "learning_rate": 1.5860768240191237e-05, "loss": 0.5945, "step": 2518 }, { "epoch": 0.32, "grad_norm": 0.5118264698175019, "learning_rate": 1.5857415318398888e-05, "loss": 0.6256, "step": 2519 }, { "epoch": 0.32, "grad_norm": 0.6248543201490283, "learning_rate": 1.58540613938882e-05, "loss": 0.6236, "step": 2520 }, { "epoch": 0.32, "grad_norm": 0.6156226685600471, "learning_rate": 1.585070646723333e-05, "loss": 0.5996, "step": 2521 }, { "epoch": 0.32, "grad_norm": 0.5058531680785827, "learning_rate": 1.5847350539008582e-05, "loss": 0.6234, "step": 2522 }, { "epoch": 0.32, "grad_norm": 0.5874386809037946, "learning_rate": 1.5843993609788472e-05, "loss": 0.6115, "step": 2523 }, { "epoch": 0.32, "grad_norm": 0.5282554475536652, "learning_rate": 1.5840635680147654e-05, "loss": 0.6258, "step": 2524 }, { "epoch": 0.32, "grad_norm": 0.5333770712669043, "learning_rate": 1.5837276750660967e-05, "loss": 0.6238, "step": 2525 }, { "epoch": 0.32, "grad_norm": 0.5725669581830692, "learning_rate": 1.583391682190342e-05, "loss": 0.6501, "step": 2526 }, { "epoch": 0.32, "grad_norm": 0.5696442152812439, "learning_rate": 1.5830555894450188e-05, "loss": 0.5997, "step": 2527 }, { "epoch": 0.32, "grad_norm": 0.5887263893056813, "learning_rate": 1.5827193968876625e-05, "loss": 0.6229, "step": 2528 }, { "epoch": 0.32, "grad_norm": 0.6038368963457901, "learning_rate": 1.5823831045758252e-05, "loss": 0.6558, "step": 2529 }, { "epoch": 0.32, "grad_norm": 0.5746193638339392, "learning_rate": 1.5820467125670757e-05, "loss": 0.6319, "step": 2530 }, { "epoch": 0.32, "grad_norm": 0.495134734094537, "learning_rate": 1.5817102209190006e-05, "loss": 0.6038, "step": 2531 }, { "epoch": 0.32, "grad_norm": 0.5341336152143007, "learning_rate": 1.5813736296892027e-05, "loss": 0.6295, "step": 2532 }, { "epoch": 0.32, "grad_norm": 0.6106456911429395, "learning_rate": 1.581036938935303e-05, "loss": 0.6643, "step": 2533 }, { "epoch": 0.32, "grad_norm": 0.48658151374916114, "learning_rate": 1.5807001487149386e-05, "loss": 0.6148, "step": 2534 }, { "epoch": 0.32, "grad_norm": 0.5427653987149086, "learning_rate": 1.5803632590857634e-05, "loss": 0.6482, "step": 2535 }, { "epoch": 0.32, "grad_norm": 0.5478566643120981, "learning_rate": 1.5800262701054494e-05, "loss": 0.632, "step": 2536 }, { "epoch": 0.32, "grad_norm": 0.4885510222707787, "learning_rate": 1.579689181831685e-05, "loss": 0.615, "step": 2537 }, { "epoch": 0.32, "grad_norm": 0.44526808847543187, "learning_rate": 1.5793519943221754e-05, "loss": 0.5887, "step": 2538 }, { "epoch": 0.32, "grad_norm": 0.5173510359096213, "learning_rate": 1.579014707634643e-05, "loss": 0.6262, "step": 2539 }, { "epoch": 0.32, "grad_norm": 0.5910689188553766, "learning_rate": 1.5786773218268273e-05, "loss": 0.6162, "step": 2540 }, { "epoch": 0.32, "grad_norm": 0.5235938600135315, "learning_rate": 1.5783398369564845e-05, "loss": 0.6441, "step": 2541 }, { "epoch": 0.32, "grad_norm": 0.5329980784332325, "learning_rate": 1.5780022530813883e-05, "loss": 0.6144, "step": 2542 }, { "epoch": 0.32, "grad_norm": 0.5573040325786464, "learning_rate": 1.577664570259328e-05, "loss": 0.6221, "step": 2543 }, { "epoch": 0.32, "grad_norm": 0.4634418517207426, "learning_rate": 1.577326788548112e-05, "loss": 0.5886, "step": 2544 }, { "epoch": 0.33, "grad_norm": 0.6403877501875799, "learning_rate": 1.5769889080055638e-05, "loss": 0.6534, "step": 2545 }, { "epoch": 0.33, "grad_norm": 0.4723300933182176, "learning_rate": 1.5766509286895243e-05, "loss": 0.61, "step": 2546 }, { "epoch": 0.33, "grad_norm": 0.4810957517598309, "learning_rate": 1.576312850657852e-05, "loss": 0.5832, "step": 2547 }, { "epoch": 0.33, "grad_norm": 0.5722999279449106, "learning_rate": 1.5759746739684213e-05, "loss": 0.5963, "step": 2548 }, { "epoch": 0.33, "grad_norm": 0.5735270318043466, "learning_rate": 1.5756363986791245e-05, "loss": 0.5944, "step": 2549 }, { "epoch": 0.33, "grad_norm": 0.6342312812999259, "learning_rate": 1.575298024847869e-05, "loss": 0.6433, "step": 2550 }, { "epoch": 0.33, "grad_norm": 0.6053302502970926, "learning_rate": 1.5749595525325814e-05, "loss": 0.6328, "step": 2551 }, { "epoch": 0.33, "grad_norm": 0.5164668094572651, "learning_rate": 1.574620981791204e-05, "loss": 0.6365, "step": 2552 }, { "epoch": 0.33, "grad_norm": 0.6999800489960724, "learning_rate": 1.5742823126816954e-05, "loss": 0.6247, "step": 2553 }, { "epoch": 0.33, "grad_norm": 0.6326521251290499, "learning_rate": 1.573943545262032e-05, "loss": 0.5942, "step": 2554 }, { "epoch": 0.33, "grad_norm": 0.5011148504537221, "learning_rate": 1.573604679590207e-05, "loss": 0.6094, "step": 2555 }, { "epoch": 0.33, "grad_norm": 0.5359991727068575, "learning_rate": 1.5732657157242293e-05, "loss": 0.6155, "step": 2556 }, { "epoch": 0.33, "grad_norm": 0.5170834277450365, "learning_rate": 1.572926653722126e-05, "loss": 0.6034, "step": 2557 }, { "epoch": 0.33, "grad_norm": 0.5024024239256812, "learning_rate": 1.57258749364194e-05, "loss": 0.632, "step": 2558 }, { "epoch": 0.33, "grad_norm": 0.47565193390136457, "learning_rate": 1.572248235541732e-05, "loss": 0.5796, "step": 2559 }, { "epoch": 0.33, "grad_norm": 0.49592091178634184, "learning_rate": 1.5719088794795782e-05, "loss": 0.6053, "step": 2560 }, { "epoch": 0.33, "grad_norm": 0.49205819986769606, "learning_rate": 1.5715694255135725e-05, "loss": 0.6163, "step": 2561 }, { "epoch": 0.33, "grad_norm": 0.6465485087473951, "learning_rate": 1.5712298737018255e-05, "loss": 0.6522, "step": 2562 }, { "epoch": 0.33, "grad_norm": 0.4899204890529568, "learning_rate": 1.5708902241024638e-05, "loss": 0.5953, "step": 2563 }, { "epoch": 0.33, "grad_norm": 0.5397963671330369, "learning_rate": 1.5705504767736316e-05, "loss": 0.6127, "step": 2564 }, { "epoch": 0.33, "grad_norm": 0.45191502465942407, "learning_rate": 1.57021063177349e-05, "loss": 0.5917, "step": 2565 }, { "epoch": 0.33, "grad_norm": 0.4906251967627166, "learning_rate": 1.5698706891602156e-05, "loss": 0.5901, "step": 2566 }, { "epoch": 0.33, "grad_norm": 0.6467313756470376, "learning_rate": 1.5695306489920026e-05, "loss": 0.6198, "step": 2567 }, { "epoch": 0.33, "grad_norm": 0.4925526287967276, "learning_rate": 1.569190511327062e-05, "loss": 0.6258, "step": 2568 }, { "epoch": 0.33, "grad_norm": 0.5826704156654132, "learning_rate": 1.5688502762236207e-05, "loss": 0.6302, "step": 2569 }, { "epoch": 0.33, "grad_norm": 0.5999525601840806, "learning_rate": 1.568509943739924e-05, "loss": 0.6813, "step": 2570 }, { "epoch": 0.33, "grad_norm": 0.6425771288141681, "learning_rate": 1.5681695139342316e-05, "loss": 0.6343, "step": 2571 }, { "epoch": 0.33, "grad_norm": 0.6730454881374995, "learning_rate": 1.567828986864821e-05, "loss": 0.629, "step": 2572 }, { "epoch": 0.33, "grad_norm": 0.5771808255408177, "learning_rate": 1.5674883625899866e-05, "loss": 0.6097, "step": 2573 }, { "epoch": 0.33, "grad_norm": 0.6063696234140773, "learning_rate": 1.5671476411680393e-05, "loss": 0.6394, "step": 2574 }, { "epoch": 0.33, "grad_norm": 0.5870519081847826, "learning_rate": 1.566806822657306e-05, "loss": 0.603, "step": 2575 }, { "epoch": 0.33, "grad_norm": 0.5989459996666066, "learning_rate": 1.566465907116131e-05, "loss": 0.6119, "step": 2576 }, { "epoch": 0.33, "grad_norm": 0.4931215499034339, "learning_rate": 1.5661248946028747e-05, "loss": 0.6265, "step": 2577 }, { "epoch": 0.33, "grad_norm": 0.5175758026372226, "learning_rate": 1.565783785175915e-05, "loss": 0.5983, "step": 2578 }, { "epoch": 0.33, "grad_norm": 0.5623267536701027, "learning_rate": 1.5654425788936444e-05, "loss": 0.6017, "step": 2579 }, { "epoch": 0.33, "grad_norm": 0.49221822057385034, "learning_rate": 1.5651012758144743e-05, "loss": 0.625, "step": 2580 }, { "epoch": 0.33, "grad_norm": 0.5350658997832992, "learning_rate": 1.5647598759968318e-05, "loss": 0.6084, "step": 2581 }, { "epoch": 0.33, "grad_norm": 0.5378748452769275, "learning_rate": 1.564418379499159e-05, "loss": 0.6288, "step": 2582 }, { "epoch": 0.33, "grad_norm": 0.550148567416239, "learning_rate": 1.564076786379918e-05, "loss": 0.6223, "step": 2583 }, { "epoch": 0.33, "grad_norm": 0.5160304894880762, "learning_rate": 1.5637350966975832e-05, "loss": 0.6228, "step": 2584 }, { "epoch": 0.33, "grad_norm": 0.5688444222261035, "learning_rate": 1.5633933105106495e-05, "loss": 0.6517, "step": 2585 }, { "epoch": 0.33, "grad_norm": 0.636800811986953, "learning_rate": 1.5630514278776253e-05, "loss": 0.6602, "step": 2586 }, { "epoch": 0.33, "grad_norm": 0.5398577674112909, "learning_rate": 1.5627094488570378e-05, "loss": 0.6098, "step": 2587 }, { "epoch": 0.33, "grad_norm": 0.5117953584815055, "learning_rate": 1.562367373507429e-05, "loss": 0.604, "step": 2588 }, { "epoch": 0.33, "grad_norm": 0.5261777995907642, "learning_rate": 1.5620252018873584e-05, "loss": 0.629, "step": 2589 }, { "epoch": 0.33, "grad_norm": 0.5408448033009611, "learning_rate": 1.561682934055401e-05, "loss": 0.6359, "step": 2590 }, { "epoch": 0.33, "grad_norm": 0.4502244592363756, "learning_rate": 1.5613405700701496e-05, "loss": 0.5897, "step": 2591 }, { "epoch": 0.33, "grad_norm": 0.5334543418648134, "learning_rate": 1.560998109990212e-05, "loss": 0.6375, "step": 2592 }, { "epoch": 0.33, "grad_norm": 0.5473833205995573, "learning_rate": 1.5606555538742144e-05, "loss": 0.6289, "step": 2593 }, { "epoch": 0.33, "grad_norm": 0.5292208600510945, "learning_rate": 1.5603129017807972e-05, "loss": 0.6444, "step": 2594 }, { "epoch": 0.33, "grad_norm": 0.5191174223957132, "learning_rate": 1.5599701537686186e-05, "loss": 0.5985, "step": 2595 }, { "epoch": 0.33, "grad_norm": 0.5086773207827288, "learning_rate": 1.5596273098963526e-05, "loss": 0.6503, "step": 2596 }, { "epoch": 0.33, "grad_norm": 0.5580781874584455, "learning_rate": 1.5592843702226907e-05, "loss": 0.6215, "step": 2597 }, { "epoch": 0.33, "grad_norm": 0.5283636083211984, "learning_rate": 1.558941334806339e-05, "loss": 0.6095, "step": 2598 }, { "epoch": 0.33, "grad_norm": 0.474172205585446, "learning_rate": 1.558598203706022e-05, "loss": 0.6182, "step": 2599 }, { "epoch": 0.33, "grad_norm": 0.5647229330641549, "learning_rate": 1.5582549769804785e-05, "loss": 0.6632, "step": 2600 }, { "epoch": 0.33, "grad_norm": 0.6379119196361815, "learning_rate": 1.557911654688466e-05, "loss": 0.6704, "step": 2601 }, { "epoch": 0.33, "grad_norm": 0.6059825954321366, "learning_rate": 1.5575682368887563e-05, "loss": 0.649, "step": 2602 }, { "epoch": 0.33, "grad_norm": 0.5078539985589904, "learning_rate": 1.5572247236401378e-05, "loss": 0.6204, "step": 2603 }, { "epoch": 0.33, "grad_norm": 0.5682223900719431, "learning_rate": 1.5568811150014172e-05, "loss": 0.6571, "step": 2604 }, { "epoch": 0.33, "grad_norm": 0.5151429816093759, "learning_rate": 1.556537411031415e-05, "loss": 0.6239, "step": 2605 }, { "epoch": 0.33, "grad_norm": 0.5421037095251728, "learning_rate": 1.55619361178897e-05, "loss": 0.6557, "step": 2606 }, { "epoch": 0.33, "grad_norm": 3.237871844221997, "learning_rate": 1.5558497173329354e-05, "loss": 0.6213, "step": 2607 }, { "epoch": 0.33, "grad_norm": 0.47895608036538756, "learning_rate": 1.5555057277221827e-05, "loss": 0.6283, "step": 2608 }, { "epoch": 0.33, "grad_norm": 0.4930915306735535, "learning_rate": 1.555161643015598e-05, "loss": 0.5846, "step": 2609 }, { "epoch": 0.33, "grad_norm": 0.5156239642759985, "learning_rate": 1.5548174632720855e-05, "loss": 0.6094, "step": 2610 }, { "epoch": 0.33, "grad_norm": 0.5237760026064902, "learning_rate": 1.5544731885505636e-05, "loss": 0.6427, "step": 2611 }, { "epoch": 0.33, "grad_norm": 0.49987050641582464, "learning_rate": 1.554128818909968e-05, "loss": 0.643, "step": 2612 }, { "epoch": 0.33, "grad_norm": 0.4459346545398388, "learning_rate": 1.553784354409251e-05, "loss": 0.5979, "step": 2613 }, { "epoch": 0.33, "grad_norm": 0.4998175661825949, "learning_rate": 1.5534397951073804e-05, "loss": 0.6083, "step": 2614 }, { "epoch": 0.33, "grad_norm": 0.5347973239136796, "learning_rate": 1.553095141063341e-05, "loss": 0.5669, "step": 2615 }, { "epoch": 0.33, "grad_norm": 0.5480412748986981, "learning_rate": 1.5527503923361334e-05, "loss": 0.6257, "step": 2616 }, { "epoch": 0.33, "grad_norm": 0.529827461025817, "learning_rate": 1.5524055489847742e-05, "loss": 0.5976, "step": 2617 }, { "epoch": 0.33, "grad_norm": 0.5433943658521058, "learning_rate": 1.5520606110682956e-05, "loss": 0.6005, "step": 2618 }, { "epoch": 0.33, "grad_norm": 0.5208501440665785, "learning_rate": 1.5517155786457482e-05, "loss": 0.6029, "step": 2619 }, { "epoch": 0.33, "grad_norm": 0.6237180310711112, "learning_rate": 1.551370451776196e-05, "loss": 0.6389, "step": 2620 }, { "epoch": 0.33, "grad_norm": 0.49673499528930215, "learning_rate": 1.5510252305187216e-05, "loss": 0.6037, "step": 2621 }, { "epoch": 0.33, "grad_norm": 0.5481992655416048, "learning_rate": 1.550679914932422e-05, "loss": 0.5857, "step": 2622 }, { "epoch": 0.34, "grad_norm": 0.5598250961989024, "learning_rate": 1.550334505076412e-05, "loss": 0.6681, "step": 2623 }, { "epoch": 0.34, "grad_norm": 0.5415340388893721, "learning_rate": 1.54998900100982e-05, "loss": 0.6138, "step": 2624 }, { "epoch": 0.34, "grad_norm": 0.5974904156011533, "learning_rate": 1.5496434027917934e-05, "loss": 0.5904, "step": 2625 }, { "epoch": 0.34, "grad_norm": 0.5771182670537687, "learning_rate": 1.5492977104814943e-05, "loss": 0.6645, "step": 2626 }, { "epoch": 0.34, "grad_norm": 0.5145490634262581, "learning_rate": 1.5489519241381005e-05, "loss": 0.6028, "step": 2627 }, { "epoch": 0.34, "grad_norm": 0.5490661466663912, "learning_rate": 1.548606043820806e-05, "loss": 0.6264, "step": 2628 }, { "epoch": 0.34, "grad_norm": 0.572590402033801, "learning_rate": 1.548260069588823e-05, "loss": 0.6387, "step": 2629 }, { "epoch": 0.34, "grad_norm": 0.5223441573371485, "learning_rate": 1.5479140015013762e-05, "loss": 0.6046, "step": 2630 }, { "epoch": 0.34, "grad_norm": 0.5798885380193907, "learning_rate": 1.5475678396177098e-05, "loss": 0.6491, "step": 2631 }, { "epoch": 0.34, "grad_norm": 0.5624319235010893, "learning_rate": 1.547221583997081e-05, "loss": 0.6269, "step": 2632 }, { "epoch": 0.34, "grad_norm": 0.5747879869184945, "learning_rate": 1.546875234698766e-05, "loss": 0.6539, "step": 2633 }, { "epoch": 0.34, "grad_norm": 0.5547629738836222, "learning_rate": 1.5465287917820545e-05, "loss": 0.6621, "step": 2634 }, { "epoch": 0.34, "grad_norm": 0.4795198082774915, "learning_rate": 1.546182255306254e-05, "loss": 0.6179, "step": 2635 }, { "epoch": 0.34, "grad_norm": 0.44062851942502534, "learning_rate": 1.545835625330687e-05, "loss": 0.5854, "step": 2636 }, { "epoch": 0.34, "grad_norm": 0.5326040101125358, "learning_rate": 1.5454889019146926e-05, "loss": 0.5788, "step": 2637 }, { "epoch": 0.34, "grad_norm": 0.5628225650921865, "learning_rate": 1.5451420851176257e-05, "loss": 0.6191, "step": 2638 }, { "epoch": 0.34, "grad_norm": 0.6096432115815905, "learning_rate": 1.5447951749988563e-05, "loss": 0.6594, "step": 2639 }, { "epoch": 0.34, "grad_norm": 0.45373998888300027, "learning_rate": 1.5444481716177723e-05, "loss": 0.5921, "step": 2640 }, { "epoch": 0.34, "grad_norm": 0.6692127424857577, "learning_rate": 1.5441010750337754e-05, "loss": 0.6964, "step": 2641 }, { "epoch": 0.34, "grad_norm": 0.469388817504303, "learning_rate": 1.5437538853062856e-05, "loss": 0.5941, "step": 2642 }, { "epoch": 0.34, "grad_norm": 0.5441290032997134, "learning_rate": 1.5434066024947362e-05, "loss": 0.6473, "step": 2643 }, { "epoch": 0.34, "grad_norm": 0.5296258486205736, "learning_rate": 1.5430592266585787e-05, "loss": 0.5997, "step": 2644 }, { "epoch": 0.34, "grad_norm": 0.4708805776824803, "learning_rate": 1.5427117578572793e-05, "loss": 0.5888, "step": 2645 }, { "epoch": 0.34, "grad_norm": 0.5821792431132684, "learning_rate": 1.54236419615032e-05, "loss": 0.6494, "step": 2646 }, { "epoch": 0.34, "grad_norm": 0.5559916467054944, "learning_rate": 1.5420165415972005e-05, "loss": 0.6413, "step": 2647 }, { "epoch": 0.34, "grad_norm": 0.5587788201761915, "learning_rate": 1.541668794257434e-05, "loss": 0.6645, "step": 2648 }, { "epoch": 0.34, "grad_norm": 0.5689735069775052, "learning_rate": 1.5413209541905504e-05, "loss": 0.6112, "step": 2649 }, { "epoch": 0.34, "grad_norm": 0.579125574750751, "learning_rate": 1.5409730214560962e-05, "loss": 0.662, "step": 2650 }, { "epoch": 0.34, "grad_norm": 0.49618494392622914, "learning_rate": 1.540624996113633e-05, "loss": 0.5975, "step": 2651 }, { "epoch": 0.34, "grad_norm": 0.6034589910678091, "learning_rate": 1.5402768782227395e-05, "loss": 0.655, "step": 2652 }, { "epoch": 0.34, "grad_norm": 0.5090339984987146, "learning_rate": 1.5399286678430075e-05, "loss": 0.6112, "step": 2653 }, { "epoch": 0.34, "grad_norm": 0.5815566325677084, "learning_rate": 1.5395803650340476e-05, "loss": 0.6653, "step": 2654 }, { "epoch": 0.34, "grad_norm": 0.6127897052033057, "learning_rate": 1.5392319698554848e-05, "loss": 0.694, "step": 2655 }, { "epoch": 0.34, "grad_norm": 0.4951382070371061, "learning_rate": 1.53888348236696e-05, "loss": 0.5989, "step": 2656 }, { "epoch": 0.34, "grad_norm": 0.5797750096028167, "learning_rate": 1.53853490262813e-05, "loss": 0.6341, "step": 2657 }, { "epoch": 0.34, "grad_norm": 0.5743246949179046, "learning_rate": 1.5381862306986673e-05, "loss": 0.594, "step": 2658 }, { "epoch": 0.34, "grad_norm": 0.4774078711103616, "learning_rate": 1.5378374666382606e-05, "loss": 0.588, "step": 2659 }, { "epoch": 0.34, "grad_norm": 0.5236202218611605, "learning_rate": 1.537488610506614e-05, "loss": 0.6106, "step": 2660 }, { "epoch": 0.34, "grad_norm": 0.5857765461106589, "learning_rate": 1.537139662363447e-05, "loss": 0.6441, "step": 2661 }, { "epoch": 0.34, "grad_norm": 0.497482876737522, "learning_rate": 1.536790622268496e-05, "loss": 0.615, "step": 2662 }, { "epoch": 0.34, "grad_norm": 0.597495266157517, "learning_rate": 1.536441490281512e-05, "loss": 0.6146, "step": 2663 }, { "epoch": 0.34, "grad_norm": 0.5901681230782634, "learning_rate": 1.5360922664622624e-05, "loss": 0.6487, "step": 2664 }, { "epoch": 0.34, "grad_norm": 0.5212973331273868, "learning_rate": 1.5357429508705297e-05, "loss": 0.6134, "step": 2665 }, { "epoch": 0.34, "grad_norm": 0.5619497227248685, "learning_rate": 1.5353935435661128e-05, "loss": 0.6145, "step": 2666 }, { "epoch": 0.34, "grad_norm": 0.4650956893903827, "learning_rate": 1.5350440446088256e-05, "loss": 0.6184, "step": 2667 }, { "epoch": 0.34, "grad_norm": 0.5597865370898863, "learning_rate": 1.5346944540584986e-05, "loss": 0.5981, "step": 2668 }, { "epoch": 0.34, "grad_norm": 0.49073303336280816, "learning_rate": 1.5343447719749772e-05, "loss": 0.6229, "step": 2669 }, { "epoch": 0.34, "grad_norm": 0.5012268686477267, "learning_rate": 1.5339949984181225e-05, "loss": 0.6143, "step": 2670 }, { "epoch": 0.34, "grad_norm": 0.5336157836819566, "learning_rate": 1.533645133447812e-05, "loss": 0.5833, "step": 2671 }, { "epoch": 0.34, "grad_norm": 0.49393977417210266, "learning_rate": 1.533295177123938e-05, "loss": 0.5959, "step": 2672 }, { "epoch": 0.34, "grad_norm": 0.5129483732097035, "learning_rate": 1.5329451295064085e-05, "loss": 0.6069, "step": 2673 }, { "epoch": 0.34, "grad_norm": 0.5330533068899116, "learning_rate": 1.532594990655148e-05, "loss": 0.6217, "step": 2674 }, { "epoch": 0.34, "grad_norm": 0.5211112354481331, "learning_rate": 1.5322447606300955e-05, "loss": 0.5995, "step": 2675 }, { "epoch": 0.34, "grad_norm": 0.5335667876043798, "learning_rate": 1.531894439491207e-05, "loss": 0.6076, "step": 2676 }, { "epoch": 0.34, "grad_norm": 0.4742934986775102, "learning_rate": 1.5315440272984518e-05, "loss": 0.569, "step": 2677 }, { "epoch": 0.34, "grad_norm": 0.4747979791485002, "learning_rate": 1.531193524111817e-05, "loss": 0.5967, "step": 2678 }, { "epoch": 0.34, "grad_norm": 0.5662534593366596, "learning_rate": 1.530842929991305e-05, "loss": 0.6353, "step": 2679 }, { "epoch": 0.34, "grad_norm": 0.5184812832847523, "learning_rate": 1.5304922449969326e-05, "loss": 0.6055, "step": 2680 }, { "epoch": 0.34, "grad_norm": 0.615809961628917, "learning_rate": 1.5301414691887327e-05, "loss": 0.616, "step": 2681 }, { "epoch": 0.34, "grad_norm": 0.5121825027621257, "learning_rate": 1.5297906026267543e-05, "loss": 0.5908, "step": 2682 }, { "epoch": 0.34, "grad_norm": 0.4820086323250259, "learning_rate": 1.5294396453710613e-05, "loss": 0.6126, "step": 2683 }, { "epoch": 0.34, "grad_norm": 0.4394410972194294, "learning_rate": 1.529088597481733e-05, "loss": 0.5936, "step": 2684 }, { "epoch": 0.34, "grad_norm": 0.48886267731621247, "learning_rate": 1.5287374590188652e-05, "loss": 0.6528, "step": 2685 }, { "epoch": 0.34, "grad_norm": 0.5011365799884255, "learning_rate": 1.528386230042568e-05, "loss": 0.6415, "step": 2686 }, { "epoch": 0.34, "grad_norm": 0.6818389120149406, "learning_rate": 1.5280349106129678e-05, "loss": 0.7032, "step": 2687 }, { "epoch": 0.34, "grad_norm": 0.47421442036323475, "learning_rate": 1.527683500790206e-05, "loss": 0.5753, "step": 2688 }, { "epoch": 0.34, "grad_norm": 0.4959702702073579, "learning_rate": 1.52733200063444e-05, "loss": 0.6088, "step": 2689 }, { "epoch": 0.34, "grad_norm": 0.5062394235629272, "learning_rate": 1.526980410205842e-05, "loss": 0.6297, "step": 2690 }, { "epoch": 0.34, "grad_norm": 0.48715302567125457, "learning_rate": 1.5266287295646e-05, "loss": 0.6058, "step": 2691 }, { "epoch": 0.34, "grad_norm": 0.4524055402616698, "learning_rate": 1.526276958770918e-05, "loss": 0.6058, "step": 2692 }, { "epoch": 0.34, "grad_norm": 0.5265002371942304, "learning_rate": 1.5259250978850144e-05, "loss": 0.6213, "step": 2693 }, { "epoch": 0.34, "grad_norm": 0.6003490646182241, "learning_rate": 1.5255731469671238e-05, "loss": 0.6494, "step": 2694 }, { "epoch": 0.34, "grad_norm": 0.6174073171425604, "learning_rate": 1.5252211060774956e-05, "loss": 0.6361, "step": 2695 }, { "epoch": 0.34, "grad_norm": 0.49479751493984103, "learning_rate": 1.524868975276395e-05, "loss": 0.6444, "step": 2696 }, { "epoch": 0.34, "grad_norm": 0.4596592703664518, "learning_rate": 1.5245167546241028e-05, "loss": 0.5805, "step": 2697 }, { "epoch": 0.34, "grad_norm": 0.5207720570819974, "learning_rate": 1.5241644441809146e-05, "loss": 0.6062, "step": 2698 }, { "epoch": 0.34, "grad_norm": 0.46983463813880344, "learning_rate": 1.5238120440071416e-05, "loss": 0.6069, "step": 2699 }, { "epoch": 0.34, "grad_norm": 0.5773241580654923, "learning_rate": 1.523459554163111e-05, "loss": 0.6525, "step": 2700 }, { "epoch": 0.35, "grad_norm": 0.526201107317219, "learning_rate": 1.523106974709164e-05, "loss": 0.5967, "step": 2701 }, { "epoch": 0.35, "grad_norm": 0.5438546112617929, "learning_rate": 1.5227543057056587e-05, "loss": 0.5896, "step": 2702 }, { "epoch": 0.35, "grad_norm": 0.512455740747029, "learning_rate": 1.5224015472129668e-05, "loss": 0.6397, "step": 2703 }, { "epoch": 0.35, "grad_norm": 0.5934044272401131, "learning_rate": 1.522048699291477e-05, "loss": 0.6458, "step": 2704 }, { "epoch": 0.35, "grad_norm": 0.5968543584158524, "learning_rate": 1.5216957620015924e-05, "loss": 0.6866, "step": 2705 }, { "epoch": 0.35, "grad_norm": 0.4973024485963665, "learning_rate": 1.5213427354037315e-05, "loss": 0.5963, "step": 2706 }, { "epoch": 0.35, "grad_norm": 0.44788171572803387, "learning_rate": 1.5209896195583284e-05, "loss": 0.614, "step": 2707 }, { "epoch": 0.35, "grad_norm": 0.48408944937558707, "learning_rate": 1.520636414525832e-05, "loss": 0.6187, "step": 2708 }, { "epoch": 0.35, "grad_norm": 0.5289645446687027, "learning_rate": 1.5202831203667066e-05, "loss": 0.6187, "step": 2709 }, { "epoch": 0.35, "grad_norm": 0.5469340191658792, "learning_rate": 1.5199297371414324e-05, "loss": 0.6307, "step": 2710 }, { "epoch": 0.35, "grad_norm": 0.4650761352292489, "learning_rate": 1.5195762649105038e-05, "loss": 0.5949, "step": 2711 }, { "epoch": 0.35, "grad_norm": 0.5084017036708363, "learning_rate": 1.519222703734431e-05, "loss": 0.6046, "step": 2712 }, { "epoch": 0.35, "grad_norm": 0.5617278674450626, "learning_rate": 1.5188690536737394e-05, "loss": 0.6235, "step": 2713 }, { "epoch": 0.35, "grad_norm": 0.5597634321982542, "learning_rate": 1.5185153147889697e-05, "loss": 0.5913, "step": 2714 }, { "epoch": 0.35, "grad_norm": 0.7472873636488304, "learning_rate": 1.5181614871406778e-05, "loss": 0.6348, "step": 2715 }, { "epoch": 0.35, "grad_norm": 0.5131373353878392, "learning_rate": 1.5178075707894346e-05, "loss": 0.6484, "step": 2716 }, { "epoch": 0.35, "grad_norm": 0.5854941151183667, "learning_rate": 1.517453565795826e-05, "loss": 0.6486, "step": 2717 }, { "epoch": 0.35, "grad_norm": 0.5912016220630193, "learning_rate": 1.517099472220454e-05, "loss": 0.6464, "step": 2718 }, { "epoch": 0.35, "grad_norm": 0.5616318623969726, "learning_rate": 1.5167452901239346e-05, "loss": 0.6019, "step": 2719 }, { "epoch": 0.35, "grad_norm": 0.5190826755498513, "learning_rate": 1.5163910195668996e-05, "loss": 0.6296, "step": 2720 }, { "epoch": 0.35, "grad_norm": 0.5040564401844927, "learning_rate": 1.5160366606099955e-05, "loss": 0.6278, "step": 2721 }, { "epoch": 0.35, "grad_norm": 0.6045483803690475, "learning_rate": 1.5156822133138852e-05, "loss": 0.6458, "step": 2722 }, { "epoch": 0.35, "grad_norm": 0.6170675013751007, "learning_rate": 1.5153276777392445e-05, "loss": 0.67, "step": 2723 }, { "epoch": 0.35, "grad_norm": 0.575358541062649, "learning_rate": 1.5149730539467665e-05, "loss": 0.6317, "step": 2724 }, { "epoch": 0.35, "grad_norm": 0.4894239656730188, "learning_rate": 1.514618341997158e-05, "loss": 0.5878, "step": 2725 }, { "epoch": 0.35, "grad_norm": 0.7462708120325466, "learning_rate": 1.514263541951142e-05, "loss": 0.6857, "step": 2726 }, { "epoch": 0.35, "grad_norm": 0.5334126272098058, "learning_rate": 1.5139086538694554e-05, "loss": 0.6165, "step": 2727 }, { "epoch": 0.35, "grad_norm": 0.6193942676892301, "learning_rate": 1.5135536778128504e-05, "loss": 0.701, "step": 2728 }, { "epoch": 0.35, "grad_norm": 0.5052854179368758, "learning_rate": 1.5131986138420956e-05, "loss": 0.6234, "step": 2729 }, { "epoch": 0.35, "grad_norm": 0.5372581068064376, "learning_rate": 1.5128434620179728e-05, "loss": 0.6416, "step": 2730 }, { "epoch": 0.35, "grad_norm": 0.5254466619086111, "learning_rate": 1.5124882224012802e-05, "loss": 0.6319, "step": 2731 }, { "epoch": 0.35, "grad_norm": 0.5272598350726434, "learning_rate": 1.51213289505283e-05, "loss": 0.6342, "step": 2732 }, { "epoch": 0.35, "grad_norm": 0.5525995229860501, "learning_rate": 1.5117774800334503e-05, "loss": 0.6444, "step": 2733 }, { "epoch": 0.35, "grad_norm": 0.5311401439284894, "learning_rate": 1.5114219774039836e-05, "loss": 0.6287, "step": 2734 }, { "epoch": 0.35, "grad_norm": 0.48188625655181805, "learning_rate": 1.511066387225288e-05, "loss": 0.5997, "step": 2735 }, { "epoch": 0.35, "grad_norm": 0.5694905137633341, "learning_rate": 1.5107107095582358e-05, "loss": 0.6301, "step": 2736 }, { "epoch": 0.35, "grad_norm": 0.48496548729382083, "learning_rate": 1.5103549444637151e-05, "loss": 0.6163, "step": 2737 }, { "epoch": 0.35, "grad_norm": 0.5180566260065823, "learning_rate": 1.5099990920026281e-05, "loss": 0.6215, "step": 2738 }, { "epoch": 0.35, "grad_norm": 0.48029349408685074, "learning_rate": 1.509643152235893e-05, "loss": 0.6165, "step": 2739 }, { "epoch": 0.35, "grad_norm": 0.5201956457351694, "learning_rate": 1.5092871252244416e-05, "loss": 0.6102, "step": 2740 }, { "epoch": 0.35, "grad_norm": 0.5243039478455624, "learning_rate": 1.5089310110292221e-05, "loss": 0.6201, "step": 2741 }, { "epoch": 0.35, "grad_norm": 0.5753542249504373, "learning_rate": 1.5085748097111963e-05, "loss": 0.6438, "step": 2742 }, { "epoch": 0.35, "grad_norm": 0.5163879408571957, "learning_rate": 1.5082185213313423e-05, "loss": 0.6167, "step": 2743 }, { "epoch": 0.35, "grad_norm": 0.5175284632542786, "learning_rate": 1.5078621459506518e-05, "loss": 0.6368, "step": 2744 }, { "epoch": 0.35, "grad_norm": 0.49519537149118936, "learning_rate": 1.5075056836301318e-05, "loss": 0.6158, "step": 2745 }, { "epoch": 0.35, "grad_norm": 0.44990618345724875, "learning_rate": 1.5071491344308048e-05, "loss": 0.5914, "step": 2746 }, { "epoch": 0.35, "grad_norm": 0.5051291613051139, "learning_rate": 1.5067924984137072e-05, "loss": 0.5896, "step": 2747 }, { "epoch": 0.35, "grad_norm": 0.5849411883115548, "learning_rate": 1.5064357756398916e-05, "loss": 0.6583, "step": 2748 }, { "epoch": 0.35, "grad_norm": 0.5576266991914567, "learning_rate": 1.5060789661704234e-05, "loss": 0.611, "step": 2749 }, { "epoch": 0.35, "grad_norm": 0.4914084662614916, "learning_rate": 1.5057220700663848e-05, "loss": 0.6319, "step": 2750 }, { "epoch": 0.35, "grad_norm": 0.567273005297592, "learning_rate": 1.5053650873888721e-05, "loss": 0.6165, "step": 2751 }, { "epoch": 0.35, "grad_norm": 0.5843162795919727, "learning_rate": 1.5050080181989966e-05, "loss": 0.6399, "step": 2752 }, { "epoch": 0.35, "grad_norm": 0.5371674941566272, "learning_rate": 1.5046508625578834e-05, "loss": 0.6007, "step": 2753 }, { "epoch": 0.35, "grad_norm": 0.48192499565191776, "learning_rate": 1.5042936205266735e-05, "loss": 0.6188, "step": 2754 }, { "epoch": 0.35, "grad_norm": 0.5160890012694482, "learning_rate": 1.503936292166523e-05, "loss": 0.6336, "step": 2755 }, { "epoch": 0.35, "grad_norm": 0.5576242481215077, "learning_rate": 1.5035788775386015e-05, "loss": 0.6418, "step": 2756 }, { "epoch": 0.35, "grad_norm": 0.5332649203256865, "learning_rate": 1.5032213767040942e-05, "loss": 0.65, "step": 2757 }, { "epoch": 0.35, "grad_norm": 0.43094888182435204, "learning_rate": 1.5028637897242011e-05, "loss": 0.5779, "step": 2758 }, { "epoch": 0.35, "grad_norm": 0.5135416004596183, "learning_rate": 1.5025061166601367e-05, "loss": 0.629, "step": 2759 }, { "epoch": 0.35, "grad_norm": 0.6171869982614528, "learning_rate": 1.50214835757313e-05, "loss": 0.6068, "step": 2760 }, { "epoch": 0.35, "grad_norm": 0.5202608783006566, "learning_rate": 1.5017905125244254e-05, "loss": 0.6273, "step": 2761 }, { "epoch": 0.35, "grad_norm": 0.5024149005511473, "learning_rate": 1.5014325815752813e-05, "loss": 0.6265, "step": 2762 }, { "epoch": 0.35, "grad_norm": 0.8005918260384353, "learning_rate": 1.501074564786971e-05, "loss": 0.6088, "step": 2763 }, { "epoch": 0.35, "grad_norm": 0.6679130794535049, "learning_rate": 1.5007164622207832e-05, "loss": 0.6794, "step": 2764 }, { "epoch": 0.35, "grad_norm": 0.4891549009783221, "learning_rate": 1.5003582739380207e-05, "loss": 0.6147, "step": 2765 }, { "epoch": 0.35, "grad_norm": 0.5402387206279415, "learning_rate": 1.5000000000000002e-05, "loss": 0.6116, "step": 2766 }, { "epoch": 0.35, "grad_norm": 0.4559880372924158, "learning_rate": 1.4996416404680543e-05, "loss": 0.5797, "step": 2767 }, { "epoch": 0.35, "grad_norm": 0.5187355228069722, "learning_rate": 1.49928319540353e-05, "loss": 0.5994, "step": 2768 }, { "epoch": 0.35, "grad_norm": 0.5363123488758966, "learning_rate": 1.4989246648677887e-05, "loss": 0.626, "step": 2769 }, { "epoch": 0.35, "grad_norm": 0.5349813391862498, "learning_rate": 1.4985660489222063e-05, "loss": 0.6423, "step": 2770 }, { "epoch": 0.35, "grad_norm": 0.5238208259490359, "learning_rate": 1.4982073476281733e-05, "loss": 0.6333, "step": 2771 }, { "epoch": 0.35, "grad_norm": 0.4913042793615852, "learning_rate": 1.4978485610470953e-05, "loss": 0.6043, "step": 2772 }, { "epoch": 0.35, "grad_norm": 0.47144378163219275, "learning_rate": 1.4974896892403923e-05, "loss": 0.579, "step": 2773 }, { "epoch": 0.35, "grad_norm": 0.48261514282461976, "learning_rate": 1.4971307322694983e-05, "loss": 0.5772, "step": 2774 }, { "epoch": 0.35, "grad_norm": 0.5017174082325486, "learning_rate": 1.496771690195863e-05, "loss": 0.621, "step": 2775 }, { "epoch": 0.35, "grad_norm": 0.4651989212498093, "learning_rate": 1.4964125630809496e-05, "loss": 0.6204, "step": 2776 }, { "epoch": 0.35, "grad_norm": 0.5317496379947819, "learning_rate": 1.4960533509862367e-05, "loss": 0.6257, "step": 2777 }, { "epoch": 0.35, "grad_norm": 0.48221062746669413, "learning_rate": 1.4956940539732166e-05, "loss": 0.6276, "step": 2778 }, { "epoch": 0.36, "grad_norm": 0.45883452578706174, "learning_rate": 1.4953346721033966e-05, "loss": 0.5682, "step": 2779 }, { "epoch": 0.36, "grad_norm": 0.4614365524888773, "learning_rate": 1.4949752054382989e-05, "loss": 0.6069, "step": 2780 }, { "epoch": 0.36, "grad_norm": 0.561009522287218, "learning_rate": 1.4946156540394593e-05, "loss": 0.6275, "step": 2781 }, { "epoch": 0.36, "grad_norm": 0.5138323728218803, "learning_rate": 1.4942560179684293e-05, "loss": 0.6366, "step": 2782 }, { "epoch": 0.36, "grad_norm": 0.5195243685951648, "learning_rate": 1.4938962972867738e-05, "loss": 0.6022, "step": 2783 }, { "epoch": 0.36, "grad_norm": 0.5628196988391517, "learning_rate": 1.4935364920560727e-05, "loss": 0.6398, "step": 2784 }, { "epoch": 0.36, "grad_norm": 0.48367039809029955, "learning_rate": 1.4931766023379202e-05, "loss": 0.5932, "step": 2785 }, { "epoch": 0.36, "grad_norm": 0.48963149107824266, "learning_rate": 1.4928166281939249e-05, "loss": 0.5909, "step": 2786 }, { "epoch": 0.36, "grad_norm": 0.5355952168130962, "learning_rate": 1.4924565696857104e-05, "loss": 0.6275, "step": 2787 }, { "epoch": 0.36, "grad_norm": 0.6291627007502867, "learning_rate": 1.4920964268749141e-05, "loss": 0.6681, "step": 2788 }, { "epoch": 0.36, "grad_norm": 0.5163177595455313, "learning_rate": 1.4917361998231883e-05, "loss": 0.643, "step": 2789 }, { "epoch": 0.36, "grad_norm": 0.4827738846114343, "learning_rate": 1.4913758885921991e-05, "loss": 0.6131, "step": 2790 }, { "epoch": 0.36, "grad_norm": 0.5438942874354109, "learning_rate": 1.4910154932436278e-05, "loss": 0.6013, "step": 2791 }, { "epoch": 0.36, "grad_norm": 0.5499361469638696, "learning_rate": 1.490655013839169e-05, "loss": 0.6268, "step": 2792 }, { "epoch": 0.36, "grad_norm": 0.48405606867872225, "learning_rate": 1.4902944504405335e-05, "loss": 0.5931, "step": 2793 }, { "epoch": 0.36, "grad_norm": 0.5094925746140047, "learning_rate": 1.4899338031094448e-05, "loss": 0.6307, "step": 2794 }, { "epoch": 0.36, "grad_norm": 0.47502086052119824, "learning_rate": 1.489573071907641e-05, "loss": 0.6099, "step": 2795 }, { "epoch": 0.36, "grad_norm": 0.5325684624404627, "learning_rate": 1.4892122568968754e-05, "loss": 0.608, "step": 2796 }, { "epoch": 0.36, "grad_norm": 0.5513840869589228, "learning_rate": 1.488851358138915e-05, "loss": 0.6013, "step": 2797 }, { "epoch": 0.36, "grad_norm": 0.5731774250810164, "learning_rate": 1.4884903756955413e-05, "loss": 0.6669, "step": 2798 }, { "epoch": 0.36, "grad_norm": 0.5515902865945375, "learning_rate": 1.4881293096285498e-05, "loss": 0.6155, "step": 2799 }, { "epoch": 0.36, "grad_norm": 0.5808167308177057, "learning_rate": 1.4877681599997514e-05, "loss": 0.6326, "step": 2800 }, { "epoch": 0.36, "grad_norm": 0.5238714179091226, "learning_rate": 1.4874069268709694e-05, "loss": 0.5918, "step": 2801 }, { "epoch": 0.36, "grad_norm": 0.4455269885323906, "learning_rate": 1.4870456103040436e-05, "loss": 0.6123, "step": 2802 }, { "epoch": 0.36, "grad_norm": 0.5954450950969717, "learning_rate": 1.4866842103608265e-05, "loss": 0.6365, "step": 2803 }, { "epoch": 0.36, "grad_norm": 0.6700626481312258, "learning_rate": 1.4863227271031853e-05, "loss": 0.6514, "step": 2804 }, { "epoch": 0.36, "grad_norm": 0.5522342141379433, "learning_rate": 1.4859611605930017e-05, "loss": 0.6271, "step": 2805 }, { "epoch": 0.36, "grad_norm": 0.5681302826395738, "learning_rate": 1.4855995108921714e-05, "loss": 0.6121, "step": 2806 }, { "epoch": 0.36, "grad_norm": 0.5478780496221204, "learning_rate": 1.4852377780626048e-05, "loss": 0.6256, "step": 2807 }, { "epoch": 0.36, "grad_norm": 0.4702260663596253, "learning_rate": 1.4848759621662256e-05, "loss": 0.6022, "step": 2808 }, { "epoch": 0.36, "grad_norm": 0.5204958631866234, "learning_rate": 1.4845140632649729e-05, "loss": 0.6175, "step": 2809 }, { "epoch": 0.36, "grad_norm": 0.5446180587640811, "learning_rate": 1.4841520814207986e-05, "loss": 0.6276, "step": 2810 }, { "epoch": 0.36, "grad_norm": 0.5852352315299767, "learning_rate": 1.4837900166956703e-05, "loss": 0.6426, "step": 2811 }, { "epoch": 0.36, "grad_norm": 0.5367212663561173, "learning_rate": 1.4834278691515687e-05, "loss": 0.588, "step": 2812 }, { "epoch": 0.36, "grad_norm": 0.5908861911497824, "learning_rate": 1.4830656388504892e-05, "loss": 0.6393, "step": 2813 }, { "epoch": 0.36, "grad_norm": 0.5696713043812935, "learning_rate": 1.4827033258544412e-05, "loss": 0.6329, "step": 2814 }, { "epoch": 0.36, "grad_norm": 0.4524638661985515, "learning_rate": 1.4823409302254485e-05, "loss": 0.5884, "step": 2815 }, { "epoch": 0.36, "grad_norm": 0.6281032011242669, "learning_rate": 1.4819784520255484e-05, "loss": 0.6448, "step": 2816 }, { "epoch": 0.36, "grad_norm": 0.536204413551814, "learning_rate": 1.481615891316793e-05, "loss": 0.6061, "step": 2817 }, { "epoch": 0.36, "grad_norm": 0.4866614458404748, "learning_rate": 1.4812532481612483e-05, "loss": 0.5864, "step": 2818 }, { "epoch": 0.36, "grad_norm": 0.530824026782885, "learning_rate": 1.4808905226209947e-05, "loss": 0.6349, "step": 2819 }, { "epoch": 0.36, "grad_norm": 0.5916888205551921, "learning_rate": 1.4805277147581257e-05, "loss": 0.6278, "step": 2820 }, { "epoch": 0.36, "grad_norm": 0.5361664405653582, "learning_rate": 1.48016482463475e-05, "loss": 0.6001, "step": 2821 }, { "epoch": 0.36, "grad_norm": 0.507535258603522, "learning_rate": 1.47980185231299e-05, "loss": 0.5833, "step": 2822 }, { "epoch": 0.36, "grad_norm": 0.505585502013123, "learning_rate": 1.4794387978549823e-05, "loss": 0.5832, "step": 2823 }, { "epoch": 0.36, "grad_norm": 0.502090253078168, "learning_rate": 1.479075661322877e-05, "loss": 0.6044, "step": 2824 }, { "epoch": 0.36, "grad_norm": 0.6080032850554696, "learning_rate": 1.4787124427788391e-05, "loss": 0.6937, "step": 2825 }, { "epoch": 0.36, "grad_norm": 0.5524959971890799, "learning_rate": 1.4783491422850469e-05, "loss": 0.623, "step": 2826 }, { "epoch": 0.36, "grad_norm": 0.5159679388414933, "learning_rate": 1.4779857599036931e-05, "loss": 0.6243, "step": 2827 }, { "epoch": 0.36, "grad_norm": 0.5003822785532639, "learning_rate": 1.4776222956969844e-05, "loss": 0.6277, "step": 2828 }, { "epoch": 0.36, "grad_norm": 0.5493261470666281, "learning_rate": 1.4772587497271413e-05, "loss": 0.6317, "step": 2829 }, { "epoch": 0.36, "grad_norm": 0.4953928121672609, "learning_rate": 1.4768951220563988e-05, "loss": 0.6374, "step": 2830 }, { "epoch": 0.36, "grad_norm": 0.6857849147607428, "learning_rate": 1.4765314127470052e-05, "loss": 0.7012, "step": 2831 }, { "epoch": 0.36, "grad_norm": 0.45634417228653124, "learning_rate": 1.4761676218612233e-05, "loss": 0.6126, "step": 2832 }, { "epoch": 0.36, "grad_norm": 0.5655347151603373, "learning_rate": 1.4758037494613295e-05, "loss": 0.5978, "step": 2833 }, { "epoch": 0.36, "grad_norm": 0.48380202757042656, "learning_rate": 1.4754397956096145e-05, "loss": 0.6235, "step": 2834 }, { "epoch": 0.36, "grad_norm": 0.4855810526118355, "learning_rate": 1.4750757603683827e-05, "loss": 0.5945, "step": 2835 }, { "epoch": 0.36, "grad_norm": 0.5022157860374122, "learning_rate": 1.4747116437999526e-05, "loss": 0.5951, "step": 2836 }, { "epoch": 0.36, "grad_norm": 0.4887409419162315, "learning_rate": 1.4743474459666567e-05, "loss": 0.5766, "step": 2837 }, { "epoch": 0.36, "grad_norm": 0.5207164820532887, "learning_rate": 1.4739831669308405e-05, "loss": 0.6176, "step": 2838 }, { "epoch": 0.36, "grad_norm": 0.4963080011553241, "learning_rate": 1.4736188067548648e-05, "loss": 0.6086, "step": 2839 }, { "epoch": 0.36, "grad_norm": 0.5173019581934654, "learning_rate": 1.473254365501104e-05, "loss": 0.6397, "step": 2840 }, { "epoch": 0.36, "grad_norm": 0.5526969980538413, "learning_rate": 1.4728898432319452e-05, "loss": 0.6391, "step": 2841 }, { "epoch": 0.36, "grad_norm": 0.655917703374485, "learning_rate": 1.4725252400097903e-05, "loss": 0.6538, "step": 2842 }, { "epoch": 0.36, "grad_norm": 0.5248044388806319, "learning_rate": 1.472160555897055e-05, "loss": 0.5997, "step": 2843 }, { "epoch": 0.36, "grad_norm": 0.46952165345895946, "learning_rate": 1.4717957909561693e-05, "loss": 0.607, "step": 2844 }, { "epoch": 0.36, "grad_norm": 0.5976905945430625, "learning_rate": 1.4714309452495761e-05, "loss": 0.6717, "step": 2845 }, { "epoch": 0.36, "grad_norm": 0.5667701906589108, "learning_rate": 1.4710660188397326e-05, "loss": 0.6729, "step": 2846 }, { "epoch": 0.36, "grad_norm": 0.4756796530651046, "learning_rate": 1.4707010117891098e-05, "loss": 0.5753, "step": 2847 }, { "epoch": 0.36, "grad_norm": 0.524908328538003, "learning_rate": 1.4703359241601921e-05, "loss": 0.6043, "step": 2848 }, { "epoch": 0.36, "grad_norm": 0.5559748995487147, "learning_rate": 1.4699707560154787e-05, "loss": 0.623, "step": 2849 }, { "epoch": 0.36, "grad_norm": 0.4917090168664156, "learning_rate": 1.469605507417482e-05, "loss": 0.5947, "step": 2850 }, { "epoch": 0.36, "grad_norm": 0.5120814232872561, "learning_rate": 1.4692401784287274e-05, "loss": 0.6243, "step": 2851 }, { "epoch": 0.36, "grad_norm": 0.5957225524676397, "learning_rate": 1.468874769111755e-05, "loss": 0.6659, "step": 2852 }, { "epoch": 0.36, "grad_norm": 0.5195152643849315, "learning_rate": 1.468509279529119e-05, "loss": 0.6058, "step": 2853 }, { "epoch": 0.36, "grad_norm": 0.5806019660419917, "learning_rate": 1.4681437097433862e-05, "loss": 0.6133, "step": 2854 }, { "epoch": 0.36, "grad_norm": 0.5196105592797855, "learning_rate": 1.4677780598171378e-05, "loss": 0.626, "step": 2855 }, { "epoch": 0.36, "grad_norm": 0.5404622799426883, "learning_rate": 1.4674123298129685e-05, "loss": 0.6493, "step": 2856 }, { "epoch": 0.36, "grad_norm": 0.530741140210669, "learning_rate": 1.4670465197934875e-05, "loss": 0.6159, "step": 2857 }, { "epoch": 0.37, "grad_norm": 0.4998944051838991, "learning_rate": 1.4666806298213165e-05, "loss": 0.6018, "step": 2858 }, { "epoch": 0.37, "grad_norm": 0.48132763616052676, "learning_rate": 1.4663146599590914e-05, "loss": 0.6131, "step": 2859 }, { "epoch": 0.37, "grad_norm": 0.9972598514784381, "learning_rate": 1.4659486102694618e-05, "loss": 0.6536, "step": 2860 }, { "epoch": 0.37, "grad_norm": 0.5698218883708196, "learning_rate": 1.4655824808150913e-05, "loss": 0.6236, "step": 2861 }, { "epoch": 0.37, "grad_norm": 0.5219309013948706, "learning_rate": 1.4652162716586564e-05, "loss": 0.6233, "step": 2862 }, { "epoch": 0.37, "grad_norm": 0.4945128993962258, "learning_rate": 1.4648499828628476e-05, "loss": 0.5755, "step": 2863 }, { "epoch": 0.37, "grad_norm": 0.5522388499488508, "learning_rate": 1.4644836144903694e-05, "loss": 0.5912, "step": 2864 }, { "epoch": 0.37, "grad_norm": 0.5393299944209577, "learning_rate": 1.46411716660394e-05, "loss": 0.6069, "step": 2865 }, { "epoch": 0.37, "grad_norm": 0.4844937092441754, "learning_rate": 1.4637506392662899e-05, "loss": 0.581, "step": 2866 }, { "epoch": 0.37, "grad_norm": 0.620353512578174, "learning_rate": 1.4633840325401645e-05, "loss": 0.6499, "step": 2867 }, { "epoch": 0.37, "grad_norm": 0.6009082909886976, "learning_rate": 1.4630173464883229e-05, "loss": 0.654, "step": 2868 }, { "epoch": 0.37, "grad_norm": 0.550198850671633, "learning_rate": 1.4626505811735365e-05, "loss": 0.6025, "step": 2869 }, { "epoch": 0.37, "grad_norm": 0.4830702357093116, "learning_rate": 1.4622837366585917e-05, "loss": 0.584, "step": 2870 }, { "epoch": 0.37, "grad_norm": 0.4455048667666227, "learning_rate": 1.4619168130062874e-05, "loss": 0.6028, "step": 2871 }, { "epoch": 0.37, "grad_norm": 0.48191923957706206, "learning_rate": 1.4615498102794367e-05, "loss": 0.6166, "step": 2872 }, { "epoch": 0.37, "grad_norm": 0.5103612484321293, "learning_rate": 1.4611827285408661e-05, "loss": 0.6078, "step": 2873 }, { "epoch": 0.37, "grad_norm": 0.4878267720530818, "learning_rate": 1.4608155678534156e-05, "loss": 0.6238, "step": 2874 }, { "epoch": 0.37, "grad_norm": 0.5610062282232746, "learning_rate": 1.460448328279938e-05, "loss": 0.6211, "step": 2875 }, { "epoch": 0.37, "grad_norm": 0.5057467398484821, "learning_rate": 1.4600810098833009e-05, "loss": 0.6182, "step": 2876 }, { "epoch": 0.37, "grad_norm": 0.5629695558319834, "learning_rate": 1.4597136127263847e-05, "loss": 0.6204, "step": 2877 }, { "epoch": 0.37, "grad_norm": 0.5410459759717844, "learning_rate": 1.459346136872083e-05, "loss": 0.6312, "step": 2878 }, { "epoch": 0.37, "grad_norm": 0.5527312294992608, "learning_rate": 1.4589785823833031e-05, "loss": 0.6003, "step": 2879 }, { "epoch": 0.37, "grad_norm": 0.5789475927055477, "learning_rate": 1.4586109493229666e-05, "loss": 0.6602, "step": 2880 }, { "epoch": 0.37, "grad_norm": 0.5947429294918174, "learning_rate": 1.458243237754007e-05, "loss": 0.6671, "step": 2881 }, { "epoch": 0.37, "grad_norm": 0.49785288655506754, "learning_rate": 1.4578754477393728e-05, "loss": 0.5929, "step": 2882 }, { "epoch": 0.37, "grad_norm": 0.5362921326102787, "learning_rate": 1.4575075793420246e-05, "loss": 0.6546, "step": 2883 }, { "epoch": 0.37, "grad_norm": 0.6149773904102139, "learning_rate": 1.4571396326249371e-05, "loss": 0.6417, "step": 2884 }, { "epoch": 0.37, "grad_norm": 0.48683943475785435, "learning_rate": 1.4567716076510983e-05, "loss": 0.5822, "step": 2885 }, { "epoch": 0.37, "grad_norm": 0.5537529436452547, "learning_rate": 1.4564035044835101e-05, "loss": 0.6422, "step": 2886 }, { "epoch": 0.37, "grad_norm": 0.4901503390007699, "learning_rate": 1.4560353231851868e-05, "loss": 0.5842, "step": 2887 }, { "epoch": 0.37, "grad_norm": 0.6034024828448598, "learning_rate": 1.4556670638191562e-05, "loss": 0.6602, "step": 2888 }, { "epoch": 0.37, "grad_norm": 0.49005486321056674, "learning_rate": 1.4552987264484605e-05, "loss": 0.5929, "step": 2889 }, { "epoch": 0.37, "grad_norm": 0.5614216455208847, "learning_rate": 1.4549303111361544e-05, "loss": 0.6368, "step": 2890 }, { "epoch": 0.37, "grad_norm": 0.4841916724299915, "learning_rate": 1.4545618179453065e-05, "loss": 0.5706, "step": 2891 }, { "epoch": 0.37, "grad_norm": 0.5276542379504979, "learning_rate": 1.4541932469389974e-05, "loss": 0.6338, "step": 2892 }, { "epoch": 0.37, "grad_norm": 0.49459543457622673, "learning_rate": 1.4538245981803227e-05, "loss": 0.6232, "step": 2893 }, { "epoch": 0.37, "grad_norm": 0.5542967215899024, "learning_rate": 1.4534558717323905e-05, "loss": 0.6074, "step": 2894 }, { "epoch": 0.37, "grad_norm": 0.48804815578726013, "learning_rate": 1.4530870676583221e-05, "loss": 0.6049, "step": 2895 }, { "epoch": 0.37, "grad_norm": 0.5364879237475889, "learning_rate": 1.4527181860212524e-05, "loss": 0.6147, "step": 2896 }, { "epoch": 0.37, "grad_norm": 0.536126551847162, "learning_rate": 1.4523492268843296e-05, "loss": 0.6281, "step": 2897 }, { "epoch": 0.37, "grad_norm": 0.5324206463827192, "learning_rate": 1.4519801903107147e-05, "loss": 0.6479, "step": 2898 }, { "epoch": 0.37, "grad_norm": 0.47457394633595473, "learning_rate": 1.4516110763635827e-05, "loss": 0.5961, "step": 2899 }, { "epoch": 0.37, "grad_norm": 0.5981079350542567, "learning_rate": 1.451241885106121e-05, "loss": 0.6287, "step": 2900 }, { "epoch": 0.37, "grad_norm": 0.5262978570063764, "learning_rate": 1.4508726166015308e-05, "loss": 0.6275, "step": 2901 }, { "epoch": 0.37, "grad_norm": 0.5418579109869832, "learning_rate": 1.450503270913026e-05, "loss": 0.634, "step": 2902 }, { "epoch": 0.37, "grad_norm": 0.5288452632712999, "learning_rate": 1.4501338481038352e-05, "loss": 0.6463, "step": 2903 }, { "epoch": 0.37, "grad_norm": 0.7510526145650945, "learning_rate": 1.449764348237198e-05, "loss": 0.6589, "step": 2904 }, { "epoch": 0.37, "grad_norm": 0.44168272846444934, "learning_rate": 1.4493947713763687e-05, "loss": 0.5603, "step": 2905 }, { "epoch": 0.37, "grad_norm": 0.49145846638461277, "learning_rate": 1.4490251175846142e-05, "loss": 0.6395, "step": 2906 }, { "epoch": 0.37, "grad_norm": 0.5151050516100039, "learning_rate": 1.4486553869252154e-05, "loss": 0.6059, "step": 2907 }, { "epoch": 0.37, "grad_norm": 0.55716156005462, "learning_rate": 1.4482855794614647e-05, "loss": 0.6316, "step": 2908 }, { "epoch": 0.37, "grad_norm": 0.5041663833622743, "learning_rate": 1.4479156952566693e-05, "loss": 0.6002, "step": 2909 }, { "epoch": 0.37, "grad_norm": 0.6059549381587, "learning_rate": 1.4475457343741486e-05, "loss": 0.672, "step": 2910 }, { "epoch": 0.37, "grad_norm": 0.5612529010708395, "learning_rate": 1.4471756968772357e-05, "loss": 0.6482, "step": 2911 }, { "epoch": 0.37, "grad_norm": 0.47472830151314366, "learning_rate": 1.4468055828292765e-05, "loss": 0.5804, "step": 2912 }, { "epoch": 0.37, "grad_norm": 0.545732105779446, "learning_rate": 1.4464353922936294e-05, "loss": 0.5987, "step": 2913 }, { "epoch": 0.37, "grad_norm": 0.5199409684784619, "learning_rate": 1.4460651253336673e-05, "loss": 0.6199, "step": 2914 }, { "epoch": 0.37, "grad_norm": 0.4563115744392842, "learning_rate": 1.445694782012775e-05, "loss": 0.5831, "step": 2915 }, { "epoch": 0.37, "grad_norm": 0.5510018570909052, "learning_rate": 1.4453243623943509e-05, "loss": 0.6659, "step": 2916 }, { "epoch": 0.37, "grad_norm": 0.5384324251445075, "learning_rate": 1.4449538665418064e-05, "loss": 0.6112, "step": 2917 }, { "epoch": 0.37, "grad_norm": 0.523352024882058, "learning_rate": 1.4445832945185655e-05, "loss": 0.6471, "step": 2918 }, { "epoch": 0.37, "grad_norm": 0.6370050179112932, "learning_rate": 1.4442126463880663e-05, "loss": 0.6224, "step": 2919 }, { "epoch": 0.37, "grad_norm": 0.5126861149673505, "learning_rate": 1.4438419222137588e-05, "loss": 0.6247, "step": 2920 }, { "epoch": 0.37, "grad_norm": 0.5387584696245001, "learning_rate": 1.4434711220591065e-05, "loss": 0.6374, "step": 2921 }, { "epoch": 0.37, "grad_norm": 0.6716137466292778, "learning_rate": 1.4431002459875858e-05, "loss": 0.6552, "step": 2922 }, { "epoch": 0.37, "grad_norm": 0.47237829314497864, "learning_rate": 1.4427292940626867e-05, "loss": 0.5911, "step": 2923 }, { "epoch": 0.37, "grad_norm": 0.5801972930432556, "learning_rate": 1.442358266347911e-05, "loss": 0.6322, "step": 2924 }, { "epoch": 0.37, "grad_norm": 0.5300049326649632, "learning_rate": 1.4419871629067746e-05, "loss": 0.6216, "step": 2925 }, { "epoch": 0.37, "grad_norm": 0.5513910598723172, "learning_rate": 1.4416159838028054e-05, "loss": 0.645, "step": 2926 }, { "epoch": 0.37, "grad_norm": 0.5771069354364465, "learning_rate": 1.441244729099545e-05, "loss": 0.6226, "step": 2927 }, { "epoch": 0.37, "grad_norm": 0.5272386821704669, "learning_rate": 1.4408733988605484e-05, "loss": 0.5974, "step": 2928 }, { "epoch": 0.37, "grad_norm": 0.468705700103884, "learning_rate": 1.4405019931493818e-05, "loss": 0.6242, "step": 2929 }, { "epoch": 0.37, "grad_norm": 0.5154497378937006, "learning_rate": 1.4401305120296259e-05, "loss": 0.5958, "step": 2930 }, { "epoch": 0.37, "grad_norm": 0.5181155365748794, "learning_rate": 1.4397589555648732e-05, "loss": 0.5963, "step": 2931 }, { "epoch": 0.37, "grad_norm": 0.5279738263135157, "learning_rate": 1.4393873238187304e-05, "loss": 0.607, "step": 2932 }, { "epoch": 0.37, "grad_norm": 0.5785907731753167, "learning_rate": 1.4390156168548162e-05, "loss": 0.5969, "step": 2933 }, { "epoch": 0.37, "grad_norm": 0.6001097725207263, "learning_rate": 1.4386438347367614e-05, "loss": 0.6068, "step": 2934 }, { "epoch": 0.37, "grad_norm": 0.5083203413426577, "learning_rate": 1.4382719775282119e-05, "loss": 0.6134, "step": 2935 }, { "epoch": 0.38, "grad_norm": 0.5489958499050815, "learning_rate": 1.4379000452928242e-05, "loss": 0.6067, "step": 2936 }, { "epoch": 0.38, "grad_norm": 0.5646066827135166, "learning_rate": 1.4375280380942693e-05, "loss": 0.6504, "step": 2937 }, { "epoch": 0.38, "grad_norm": 0.5192544401191912, "learning_rate": 1.4371559559962296e-05, "loss": 0.638, "step": 2938 }, { "epoch": 0.38, "grad_norm": 0.5353259558446043, "learning_rate": 1.4367837990624013e-05, "loss": 0.6483, "step": 2939 }, { "epoch": 0.38, "grad_norm": 0.8702083713890919, "learning_rate": 1.4364115673564933e-05, "loss": 0.6314, "step": 2940 }, { "epoch": 0.38, "grad_norm": 0.5466764082629342, "learning_rate": 1.4360392609422271e-05, "loss": 0.6372, "step": 2941 }, { "epoch": 0.38, "grad_norm": 0.6496270826786931, "learning_rate": 1.435666879883337e-05, "loss": 0.6555, "step": 2942 }, { "epoch": 0.38, "grad_norm": 0.6168265678973881, "learning_rate": 1.4352944242435698e-05, "loss": 0.6214, "step": 2943 }, { "epoch": 0.38, "grad_norm": 0.5718679160494474, "learning_rate": 1.4349218940866858e-05, "loss": 0.6408, "step": 2944 }, { "epoch": 0.38, "grad_norm": 0.5572767339406629, "learning_rate": 1.4345492894764577e-05, "loss": 0.6187, "step": 2945 }, { "epoch": 0.38, "grad_norm": 0.5638743581111293, "learning_rate": 1.4341766104766706e-05, "loss": 0.623, "step": 2946 }, { "epoch": 0.38, "grad_norm": 0.520169953587298, "learning_rate": 1.4338038571511225e-05, "loss": 0.591, "step": 2947 }, { "epoch": 0.38, "grad_norm": 0.5781048409661402, "learning_rate": 1.4334310295636247e-05, "loss": 0.6191, "step": 2948 }, { "epoch": 0.38, "grad_norm": 0.581776050579677, "learning_rate": 1.4330581277780004e-05, "loss": 0.6419, "step": 2949 }, { "epoch": 0.38, "grad_norm": 0.49785139662074157, "learning_rate": 1.432685151858086e-05, "loss": 0.6188, "step": 2950 }, { "epoch": 0.38, "grad_norm": 0.5168384706233481, "learning_rate": 1.4323121018677304e-05, "loss": 0.5916, "step": 2951 }, { "epoch": 0.38, "grad_norm": 0.5083768649531504, "learning_rate": 1.431938977870795e-05, "loss": 0.6099, "step": 2952 }, { "epoch": 0.38, "grad_norm": 0.5670902523000362, "learning_rate": 1.4315657799311548e-05, "loss": 0.617, "step": 2953 }, { "epoch": 0.38, "grad_norm": 0.49863439009956007, "learning_rate": 1.431192508112696e-05, "loss": 0.5979, "step": 2954 }, { "epoch": 0.38, "grad_norm": 0.5617192831141395, "learning_rate": 1.4308191624793184e-05, "loss": 0.5988, "step": 2955 }, { "epoch": 0.38, "grad_norm": 0.5015742658636168, "learning_rate": 1.4304457430949343e-05, "loss": 0.612, "step": 2956 }, { "epoch": 0.38, "grad_norm": 0.5626821976960618, "learning_rate": 1.4300722500234687e-05, "loss": 0.6277, "step": 2957 }, { "epoch": 0.38, "grad_norm": 0.6209944457265609, "learning_rate": 1.4296986833288591e-05, "loss": 0.6485, "step": 2958 }, { "epoch": 0.38, "grad_norm": 0.48630897327113515, "learning_rate": 1.4293250430750549e-05, "loss": 0.5647, "step": 2959 }, { "epoch": 0.38, "grad_norm": 0.5469374886733979, "learning_rate": 1.4289513293260197e-05, "loss": 0.6031, "step": 2960 }, { "epoch": 0.38, "grad_norm": 0.6028893982161724, "learning_rate": 1.4285775421457284e-05, "loss": 0.63, "step": 2961 }, { "epoch": 0.38, "grad_norm": 0.5894585675717232, "learning_rate": 1.4282036815981688e-05, "loss": 0.6605, "step": 2962 }, { "epoch": 0.38, "grad_norm": 0.5569993803259272, "learning_rate": 1.4278297477473411e-05, "loss": 0.6235, "step": 2963 }, { "epoch": 0.38, "grad_norm": 0.5213049937131683, "learning_rate": 1.4274557406572585e-05, "loss": 0.6144, "step": 2964 }, { "epoch": 0.38, "grad_norm": 0.5169256593720173, "learning_rate": 1.4270816603919461e-05, "loss": 0.6019, "step": 2965 }, { "epoch": 0.38, "grad_norm": 0.507836016279403, "learning_rate": 1.4267075070154424e-05, "loss": 0.6142, "step": 2966 }, { "epoch": 0.38, "grad_norm": 0.4790145359536167, "learning_rate": 1.4263332805917975e-05, "loss": 0.573, "step": 2967 }, { "epoch": 0.38, "grad_norm": 0.49494501527550855, "learning_rate": 1.4259589811850749e-05, "loss": 0.6317, "step": 2968 }, { "epoch": 0.38, "grad_norm": 0.5536779296633558, "learning_rate": 1.4255846088593498e-05, "loss": 0.6166, "step": 2969 }, { "epoch": 0.38, "grad_norm": 0.6401891215151914, "learning_rate": 1.42521016367871e-05, "loss": 0.6292, "step": 2970 }, { "epoch": 0.38, "grad_norm": 0.5262292328602902, "learning_rate": 1.424835645707256e-05, "loss": 0.6595, "step": 2971 }, { "epoch": 0.38, "grad_norm": 0.6930045566288888, "learning_rate": 1.424461055009101e-05, "loss": 0.7124, "step": 2972 }, { "epoch": 0.38, "grad_norm": 0.581905363101545, "learning_rate": 1.4240863916483703e-05, "loss": 0.631, "step": 2973 }, { "epoch": 0.38, "grad_norm": 0.4906641592905693, "learning_rate": 1.423711655689202e-05, "loss": 0.6095, "step": 2974 }, { "epoch": 0.38, "grad_norm": 0.5953302815852715, "learning_rate": 1.4233368471957457e-05, "loss": 0.6317, "step": 2975 }, { "epoch": 0.38, "grad_norm": 0.45696501906136955, "learning_rate": 1.4229619662321645e-05, "loss": 0.6011, "step": 2976 }, { "epoch": 0.38, "grad_norm": 0.5430485540489233, "learning_rate": 1.422587012862633e-05, "loss": 0.6212, "step": 2977 }, { "epoch": 0.38, "grad_norm": 0.49311683707718634, "learning_rate": 1.4222119871513393e-05, "loss": 0.5605, "step": 2978 }, { "epoch": 0.38, "grad_norm": 0.5663347515030249, "learning_rate": 1.4218368891624833e-05, "loss": 0.6375, "step": 2979 }, { "epoch": 0.38, "grad_norm": 0.5303636089834912, "learning_rate": 1.4214617189602765e-05, "loss": 0.5953, "step": 2980 }, { "epoch": 0.38, "grad_norm": 0.6510853571788798, "learning_rate": 1.4210864766089437e-05, "loss": 0.6486, "step": 2981 }, { "epoch": 0.38, "grad_norm": 0.5495113965441777, "learning_rate": 1.4207111621727221e-05, "loss": 0.5994, "step": 2982 }, { "epoch": 0.38, "grad_norm": 0.51014783646211, "learning_rate": 1.420335775715861e-05, "loss": 0.6171, "step": 2983 }, { "epoch": 0.38, "grad_norm": 0.5477158542446335, "learning_rate": 1.4199603173026217e-05, "loss": 0.6425, "step": 2984 }, { "epoch": 0.38, "grad_norm": 0.5161217779330254, "learning_rate": 1.4195847869972786e-05, "loss": 0.6187, "step": 2985 }, { "epoch": 0.38, "grad_norm": 0.5820990784756495, "learning_rate": 1.4192091848641174e-05, "loss": 0.6435, "step": 2986 }, { "epoch": 0.38, "grad_norm": 0.5063554705107615, "learning_rate": 1.4188335109674366e-05, "loss": 0.6104, "step": 2987 }, { "epoch": 0.38, "grad_norm": 0.5656820446248538, "learning_rate": 1.4184577653715477e-05, "loss": 0.6014, "step": 2988 }, { "epoch": 0.38, "grad_norm": 0.5675722705181697, "learning_rate": 1.418081948140773e-05, "loss": 0.6408, "step": 2989 }, { "epoch": 0.38, "grad_norm": 0.5018194797655712, "learning_rate": 1.4177060593394483e-05, "loss": 0.6091, "step": 2990 }, { "epoch": 0.38, "grad_norm": 0.5607064429205223, "learning_rate": 1.4173300990319208e-05, "loss": 0.6156, "step": 2991 }, { "epoch": 0.38, "grad_norm": 0.5532981803618228, "learning_rate": 1.4169540672825509e-05, "loss": 0.6188, "step": 2992 }, { "epoch": 0.38, "grad_norm": 0.5194700571595208, "learning_rate": 1.4165779641557103e-05, "loss": 0.6168, "step": 2993 }, { "epoch": 0.38, "grad_norm": 0.4971295029033867, "learning_rate": 1.4162017897157832e-05, "loss": 0.5911, "step": 2994 }, { "epoch": 0.38, "grad_norm": 0.5394642293312797, "learning_rate": 1.4158255440271664e-05, "loss": 0.6322, "step": 2995 }, { "epoch": 0.38, "grad_norm": 0.49707237122163667, "learning_rate": 1.4154492271542684e-05, "loss": 0.6261, "step": 2996 }, { "epoch": 0.38, "grad_norm": 0.4761800039394537, "learning_rate": 1.41507283916151e-05, "loss": 0.6032, "step": 2997 }, { "epoch": 0.38, "grad_norm": 0.5166688796821116, "learning_rate": 1.4146963801133242e-05, "loss": 0.6104, "step": 2998 }, { "epoch": 0.38, "grad_norm": 0.6655223773136594, "learning_rate": 1.4143198500741567e-05, "loss": 0.5926, "step": 2999 }, { "epoch": 0.38, "grad_norm": 0.5574677844450568, "learning_rate": 1.4139432491084646e-05, "loss": 0.6441, "step": 3000 }, { "epoch": 0.38, "grad_norm": 0.5132188847126004, "learning_rate": 1.413566577280717e-05, "loss": 0.6004, "step": 3001 }, { "epoch": 0.38, "grad_norm": 0.5059391932816562, "learning_rate": 1.4131898346553961e-05, "loss": 0.6147, "step": 3002 }, { "epoch": 0.38, "grad_norm": 0.49487742367392923, "learning_rate": 1.4128130212969953e-05, "loss": 0.5745, "step": 3003 }, { "epoch": 0.38, "grad_norm": 0.5613675790155841, "learning_rate": 1.412436137270021e-05, "loss": 0.6087, "step": 3004 }, { "epoch": 0.38, "grad_norm": 0.5028702293129854, "learning_rate": 1.4120591826389904e-05, "loss": 0.5851, "step": 3005 }, { "epoch": 0.38, "grad_norm": 0.6009000898479755, "learning_rate": 1.411682157468434e-05, "loss": 0.6373, "step": 3006 }, { "epoch": 0.38, "grad_norm": 0.5074586718219073, "learning_rate": 1.411305061822894e-05, "loss": 0.6007, "step": 3007 }, { "epoch": 0.38, "grad_norm": 0.5419554730924645, "learning_rate": 1.4109278957669244e-05, "loss": 0.6364, "step": 3008 }, { "epoch": 0.38, "grad_norm": 0.5156715203969731, "learning_rate": 1.4105506593650916e-05, "loss": 0.6402, "step": 3009 }, { "epoch": 0.38, "grad_norm": 0.5863248640333576, "learning_rate": 1.4101733526819737e-05, "loss": 0.6128, "step": 3010 }, { "epoch": 0.38, "grad_norm": 0.5576659909810728, "learning_rate": 1.4097959757821612e-05, "loss": 0.6324, "step": 3011 }, { "epoch": 0.38, "grad_norm": 0.6252092863111318, "learning_rate": 1.4094185287302561e-05, "loss": 0.6225, "step": 3012 }, { "epoch": 0.38, "grad_norm": 0.49801742849475955, "learning_rate": 1.4090410115908734e-05, "loss": 0.6082, "step": 3013 }, { "epoch": 0.39, "grad_norm": 0.5855189404531642, "learning_rate": 1.4086634244286389e-05, "loss": 0.6314, "step": 3014 }, { "epoch": 0.39, "grad_norm": 0.5207131256339725, "learning_rate": 1.4082857673081912e-05, "loss": 0.634, "step": 3015 }, { "epoch": 0.39, "grad_norm": 0.5196539958634978, "learning_rate": 1.40790804029418e-05, "loss": 0.5875, "step": 3016 }, { "epoch": 0.39, "grad_norm": 0.5675176810983062, "learning_rate": 1.4075302434512685e-05, "loss": 0.6356, "step": 3017 }, { "epoch": 0.39, "grad_norm": 0.476686842728214, "learning_rate": 1.4071523768441307e-05, "loss": 0.5719, "step": 3018 }, { "epoch": 0.39, "grad_norm": 0.5140304583588254, "learning_rate": 1.4067744405374522e-05, "loss": 0.6192, "step": 3019 }, { "epoch": 0.39, "grad_norm": 0.5672769730686341, "learning_rate": 1.4063964345959314e-05, "loss": 0.6522, "step": 3020 }, { "epoch": 0.39, "grad_norm": 0.4933756419965084, "learning_rate": 1.4060183590842789e-05, "loss": 0.6109, "step": 3021 }, { "epoch": 0.39, "grad_norm": 0.5368618821245584, "learning_rate": 1.4056402140672159e-05, "loss": 0.597, "step": 3022 }, { "epoch": 0.39, "grad_norm": 0.541873213443531, "learning_rate": 1.4052619996094761e-05, "loss": 0.5937, "step": 3023 }, { "epoch": 0.39, "grad_norm": 0.5144176939518488, "learning_rate": 1.404883715775806e-05, "loss": 0.6239, "step": 3024 }, { "epoch": 0.39, "grad_norm": 0.519356107008824, "learning_rate": 1.4045053626309631e-05, "loss": 0.6221, "step": 3025 }, { "epoch": 0.39, "grad_norm": 0.5216317023109907, "learning_rate": 1.404126940239716e-05, "loss": 0.6046, "step": 3026 }, { "epoch": 0.39, "grad_norm": 0.49746925702908906, "learning_rate": 1.4037484486668467e-05, "loss": 0.5887, "step": 3027 }, { "epoch": 0.39, "grad_norm": 0.46131886981136383, "learning_rate": 1.403369887977148e-05, "loss": 0.5977, "step": 3028 }, { "epoch": 0.39, "grad_norm": 0.5029501015870882, "learning_rate": 1.4029912582354257e-05, "loss": 0.5932, "step": 3029 }, { "epoch": 0.39, "grad_norm": 0.5491241118670223, "learning_rate": 1.4026125595064955e-05, "loss": 0.6232, "step": 3030 }, { "epoch": 0.39, "grad_norm": 0.5692181980091432, "learning_rate": 1.402233791855187e-05, "loss": 0.6189, "step": 3031 }, { "epoch": 0.39, "grad_norm": 0.46956308506204864, "learning_rate": 1.4018549553463398e-05, "loss": 0.5877, "step": 3032 }, { "epoch": 0.39, "grad_norm": 0.5698868158416996, "learning_rate": 1.4014760500448066e-05, "loss": 0.6272, "step": 3033 }, { "epoch": 0.39, "grad_norm": 0.5079868426765182, "learning_rate": 1.4010970760154514e-05, "loss": 0.6074, "step": 3034 }, { "epoch": 0.39, "grad_norm": 0.5312926302616116, "learning_rate": 1.4007180333231497e-05, "loss": 0.6331, "step": 3035 }, { "epoch": 0.39, "grad_norm": 0.5828741534236472, "learning_rate": 1.400338922032789e-05, "loss": 0.6452, "step": 3036 }, { "epoch": 0.39, "grad_norm": 0.5821129358232577, "learning_rate": 1.3999597422092688e-05, "loss": 0.7066, "step": 3037 }, { "epoch": 0.39, "grad_norm": 0.6359578413854411, "learning_rate": 1.3995804939175002e-05, "loss": 0.6533, "step": 3038 }, { "epoch": 0.39, "grad_norm": 0.4941504524813065, "learning_rate": 1.3992011772224052e-05, "loss": 0.598, "step": 3039 }, { "epoch": 0.39, "grad_norm": 0.5298992139170249, "learning_rate": 1.3988217921889188e-05, "loss": 0.6349, "step": 3040 }, { "epoch": 0.39, "grad_norm": 0.4732741584290399, "learning_rate": 1.3984423388819867e-05, "loss": 0.6016, "step": 3041 }, { "epoch": 0.39, "grad_norm": 0.4970770160387025, "learning_rate": 1.3980628173665677e-05, "loss": 0.5975, "step": 3042 }, { "epoch": 0.39, "grad_norm": 0.45015442415755286, "learning_rate": 1.39768322770763e-05, "loss": 0.5802, "step": 3043 }, { "epoch": 0.39, "grad_norm": 0.5358444945994238, "learning_rate": 1.3973035699701553e-05, "loss": 0.6042, "step": 3044 }, { "epoch": 0.39, "grad_norm": 0.5078086521294831, "learning_rate": 1.3969238442191364e-05, "loss": 0.592, "step": 3045 }, { "epoch": 0.39, "grad_norm": 0.5108442105606358, "learning_rate": 1.3965440505195779e-05, "loss": 0.616, "step": 3046 }, { "epoch": 0.39, "grad_norm": 0.6092542043888775, "learning_rate": 1.3961641889364954e-05, "loss": 0.6278, "step": 3047 }, { "epoch": 0.39, "grad_norm": 0.5276307949571636, "learning_rate": 1.3957842595349167e-05, "loss": 0.6361, "step": 3048 }, { "epoch": 0.39, "grad_norm": 0.5332900170829705, "learning_rate": 1.3954042623798815e-05, "loss": 0.623, "step": 3049 }, { "epoch": 0.39, "grad_norm": 0.5427929931583247, "learning_rate": 1.3950241975364406e-05, "loss": 0.6361, "step": 3050 }, { "epoch": 0.39, "grad_norm": 0.5189975666389434, "learning_rate": 1.3946440650696562e-05, "loss": 0.6248, "step": 3051 }, { "epoch": 0.39, "grad_norm": 0.5105442371655513, "learning_rate": 1.3942638650446023e-05, "loss": 0.6014, "step": 3052 }, { "epoch": 0.39, "grad_norm": 0.6153154061645827, "learning_rate": 1.393883597526365e-05, "loss": 0.6328, "step": 3053 }, { "epoch": 0.39, "grad_norm": 0.5681477884220044, "learning_rate": 1.3935032625800409e-05, "loss": 0.6248, "step": 3054 }, { "epoch": 0.39, "grad_norm": 0.5428390733998796, "learning_rate": 1.3931228602707391e-05, "loss": 0.6183, "step": 3055 }, { "epoch": 0.39, "grad_norm": 0.5440417479123861, "learning_rate": 1.3927423906635799e-05, "loss": 0.6197, "step": 3056 }, { "epoch": 0.39, "grad_norm": 0.5158410034117114, "learning_rate": 1.392361853823695e-05, "loss": 0.5824, "step": 3057 }, { "epoch": 0.39, "grad_norm": 0.5781676243554749, "learning_rate": 1.3919812498162273e-05, "loss": 0.607, "step": 3058 }, { "epoch": 0.39, "grad_norm": 0.5435757582325069, "learning_rate": 1.3916005787063322e-05, "loss": 0.5862, "step": 3059 }, { "epoch": 0.39, "grad_norm": 0.5060660316881064, "learning_rate": 1.3912198405591757e-05, "loss": 0.6508, "step": 3060 }, { "epoch": 0.39, "grad_norm": 0.5257826624251914, "learning_rate": 1.3908390354399352e-05, "loss": 0.6067, "step": 3061 }, { "epoch": 0.39, "grad_norm": 0.6128612438602119, "learning_rate": 1.3904581634138006e-05, "loss": 0.6542, "step": 3062 }, { "epoch": 0.39, "grad_norm": 1.534500776020811, "learning_rate": 1.3900772245459719e-05, "loss": 0.6304, "step": 3063 }, { "epoch": 0.39, "grad_norm": 0.5687588211564355, "learning_rate": 1.3896962189016619e-05, "loss": 0.6168, "step": 3064 }, { "epoch": 0.39, "grad_norm": 0.5627961603576539, "learning_rate": 1.3893151465460934e-05, "loss": 0.662, "step": 3065 }, { "epoch": 0.39, "grad_norm": 0.5313199111199033, "learning_rate": 1.3889340075445015e-05, "loss": 0.5958, "step": 3066 }, { "epoch": 0.39, "grad_norm": 0.5797847756649617, "learning_rate": 1.3885528019621333e-05, "loss": 0.6331, "step": 3067 }, { "epoch": 0.39, "grad_norm": 0.743543728746483, "learning_rate": 1.3881715298642457e-05, "loss": 0.6269, "step": 3068 }, { "epoch": 0.39, "grad_norm": 0.584221206991114, "learning_rate": 1.3877901913161083e-05, "loss": 0.6874, "step": 3069 }, { "epoch": 0.39, "grad_norm": 0.7421678141305215, "learning_rate": 1.3874087863830013e-05, "loss": 0.685, "step": 3070 }, { "epoch": 0.39, "grad_norm": 0.5640377836680717, "learning_rate": 1.3870273151302171e-05, "loss": 0.6334, "step": 3071 }, { "epoch": 0.39, "grad_norm": 0.5782588014842726, "learning_rate": 1.3866457776230586e-05, "loss": 0.6261, "step": 3072 }, { "epoch": 0.39, "grad_norm": 0.516938136464604, "learning_rate": 1.38626417392684e-05, "loss": 0.6386, "step": 3073 }, { "epoch": 0.39, "grad_norm": 0.4903051969069183, "learning_rate": 1.3858825041068882e-05, "loss": 0.6184, "step": 3074 }, { "epoch": 0.39, "grad_norm": 0.6068265071597992, "learning_rate": 1.3855007682285396e-05, "loss": 0.626, "step": 3075 }, { "epoch": 0.39, "grad_norm": 0.5193499857185683, "learning_rate": 1.3851189663571432e-05, "loss": 0.6053, "step": 3076 }, { "epoch": 0.39, "grad_norm": 0.5422026172804824, "learning_rate": 1.3847370985580583e-05, "loss": 0.5779, "step": 3077 }, { "epoch": 0.39, "grad_norm": 0.5288479318791641, "learning_rate": 1.3843551648966566e-05, "loss": 0.6111, "step": 3078 }, { "epoch": 0.39, "grad_norm": 0.5011101564999376, "learning_rate": 1.3839731654383203e-05, "loss": 0.6379, "step": 3079 }, { "epoch": 0.39, "grad_norm": 0.4948865928086103, "learning_rate": 1.3835911002484432e-05, "loss": 0.603, "step": 3080 }, { "epoch": 0.39, "grad_norm": 0.48751632753099666, "learning_rate": 1.38320896939243e-05, "loss": 0.5838, "step": 3081 }, { "epoch": 0.39, "grad_norm": 0.5023748374055309, "learning_rate": 1.382826772935697e-05, "loss": 0.6173, "step": 3082 }, { "epoch": 0.39, "grad_norm": 0.9218820136387323, "learning_rate": 1.3824445109436715e-05, "loss": 0.6677, "step": 3083 }, { "epoch": 0.39, "grad_norm": 0.5413941501536486, "learning_rate": 1.382062183481792e-05, "loss": 0.6049, "step": 3084 }, { "epoch": 0.39, "grad_norm": 0.5102477245786688, "learning_rate": 1.3816797906155086e-05, "loss": 0.5912, "step": 3085 }, { "epoch": 0.39, "grad_norm": 0.5679704595065052, "learning_rate": 1.3812973324102821e-05, "loss": 0.6351, "step": 3086 }, { "epoch": 0.39, "grad_norm": 0.4975414879180645, "learning_rate": 1.3809148089315844e-05, "loss": 0.5855, "step": 3087 }, { "epoch": 0.39, "grad_norm": 0.4690945466952135, "learning_rate": 1.3805322202448999e-05, "loss": 0.5942, "step": 3088 }, { "epoch": 0.39, "grad_norm": 0.4693481093008755, "learning_rate": 1.3801495664157222e-05, "loss": 0.5922, "step": 3089 }, { "epoch": 0.39, "grad_norm": 0.5392673862185378, "learning_rate": 1.3797668475095573e-05, "loss": 0.6192, "step": 3090 }, { "epoch": 0.39, "grad_norm": 0.5415752333556108, "learning_rate": 1.3793840635919217e-05, "loss": 0.6789, "step": 3091 }, { "epoch": 0.39, "grad_norm": 0.5416191532383893, "learning_rate": 1.3790012147283442e-05, "loss": 0.6248, "step": 3092 }, { "epoch": 0.4, "grad_norm": 0.46813324753642077, "learning_rate": 1.378618300984363e-05, "loss": 0.609, "step": 3093 }, { "epoch": 0.4, "grad_norm": 0.47002014962697103, "learning_rate": 1.3782353224255283e-05, "loss": 0.6128, "step": 3094 }, { "epoch": 0.4, "grad_norm": 0.752069749718912, "learning_rate": 1.377852279117402e-05, "loss": 0.6457, "step": 3095 }, { "epoch": 0.4, "grad_norm": 0.49758318131432505, "learning_rate": 1.377469171125556e-05, "loss": 0.6485, "step": 3096 }, { "epoch": 0.4, "grad_norm": 0.4246850530123474, "learning_rate": 1.3770859985155744e-05, "loss": 0.595, "step": 3097 }, { "epoch": 0.4, "grad_norm": 0.5814713252579703, "learning_rate": 1.3767027613530503e-05, "loss": 0.6919, "step": 3098 }, { "epoch": 0.4, "grad_norm": 0.46159408941515717, "learning_rate": 1.3763194597035907e-05, "loss": 0.5729, "step": 3099 }, { "epoch": 0.4, "grad_norm": 0.44609775743394736, "learning_rate": 1.3759360936328117e-05, "loss": 0.556, "step": 3100 }, { "epoch": 0.4, "grad_norm": 0.5422108169866632, "learning_rate": 1.3755526632063405e-05, "loss": 0.6196, "step": 3101 }, { "epoch": 0.4, "grad_norm": 0.5152750745432484, "learning_rate": 1.375169168489816e-05, "loss": 0.6095, "step": 3102 }, { "epoch": 0.4, "grad_norm": 0.5879499635252835, "learning_rate": 1.3747856095488881e-05, "loss": 0.6139, "step": 3103 }, { "epoch": 0.4, "grad_norm": 0.5254034357398855, "learning_rate": 1.3744019864492171e-05, "loss": 0.606, "step": 3104 }, { "epoch": 0.4, "grad_norm": 0.5596472265620903, "learning_rate": 1.3740182992564746e-05, "loss": 0.623, "step": 3105 }, { "epoch": 0.4, "grad_norm": 0.5340995036218008, "learning_rate": 1.3736345480363435e-05, "loss": 0.6379, "step": 3106 }, { "epoch": 0.4, "grad_norm": 0.5577197855065075, "learning_rate": 1.373250732854517e-05, "loss": 0.6316, "step": 3107 }, { "epoch": 0.4, "grad_norm": 0.5116678884418491, "learning_rate": 1.3728668537766999e-05, "loss": 0.5933, "step": 3108 }, { "epoch": 0.4, "grad_norm": 0.5685453424460976, "learning_rate": 1.3724829108686076e-05, "loss": 0.6343, "step": 3109 }, { "epoch": 0.4, "grad_norm": 0.5142937382463398, "learning_rate": 1.372098904195966e-05, "loss": 0.6158, "step": 3110 }, { "epoch": 0.4, "grad_norm": 0.5055141444992848, "learning_rate": 1.371714833824513e-05, "loss": 0.5794, "step": 3111 }, { "epoch": 0.4, "grad_norm": 0.4739096253754006, "learning_rate": 1.3713306998199962e-05, "loss": 0.5667, "step": 3112 }, { "epoch": 0.4, "grad_norm": 0.5419868627947309, "learning_rate": 1.3709465022481751e-05, "loss": 0.6309, "step": 3113 }, { "epoch": 0.4, "grad_norm": 0.6071480747033896, "learning_rate": 1.37056224117482e-05, "loss": 0.6325, "step": 3114 }, { "epoch": 0.4, "grad_norm": 0.6803486197788493, "learning_rate": 1.3701779166657108e-05, "loss": 0.6521, "step": 3115 }, { "epoch": 0.4, "grad_norm": 0.5591872266141785, "learning_rate": 1.3697935287866396e-05, "loss": 0.6338, "step": 3116 }, { "epoch": 0.4, "grad_norm": 0.5303970294331458, "learning_rate": 1.369409077603409e-05, "loss": 0.6065, "step": 3117 }, { "epoch": 0.4, "grad_norm": 0.5844588122784238, "learning_rate": 1.3690245631818326e-05, "loss": 0.6291, "step": 3118 }, { "epoch": 0.4, "grad_norm": 0.5668887358901051, "learning_rate": 1.3686399855877342e-05, "loss": 0.6315, "step": 3119 }, { "epoch": 0.4, "grad_norm": 0.5012296592730573, "learning_rate": 1.368255344886949e-05, "loss": 0.5977, "step": 3120 }, { "epoch": 0.4, "grad_norm": 0.6664308648200007, "learning_rate": 1.3678706411453227e-05, "loss": 0.677, "step": 3121 }, { "epoch": 0.4, "grad_norm": 0.5001201076322875, "learning_rate": 1.3674858744287121e-05, "loss": 0.6176, "step": 3122 }, { "epoch": 0.4, "grad_norm": 0.5149331438723661, "learning_rate": 1.3671010448029845e-05, "loss": 0.6126, "step": 3123 }, { "epoch": 0.4, "grad_norm": 0.554083455239532, "learning_rate": 1.3667161523340177e-05, "loss": 0.6125, "step": 3124 }, { "epoch": 0.4, "grad_norm": 0.4985440072044938, "learning_rate": 1.3663311970877013e-05, "loss": 0.6121, "step": 3125 }, { "epoch": 0.4, "grad_norm": 0.448419905386683, "learning_rate": 1.3659461791299347e-05, "loss": 0.5767, "step": 3126 }, { "epoch": 0.4, "grad_norm": 0.5113265804572815, "learning_rate": 1.3655610985266279e-05, "loss": 0.6056, "step": 3127 }, { "epoch": 0.4, "grad_norm": 0.5791782687105244, "learning_rate": 1.3651759553437023e-05, "loss": 0.6539, "step": 3128 }, { "epoch": 0.4, "grad_norm": 0.594662971901392, "learning_rate": 1.36479074964709e-05, "loss": 0.5954, "step": 3129 }, { "epoch": 0.4, "grad_norm": 0.6364888941214009, "learning_rate": 1.3644054815027333e-05, "loss": 0.646, "step": 3130 }, { "epoch": 0.4, "grad_norm": 0.47308542530219033, "learning_rate": 1.364020150976585e-05, "loss": 0.5955, "step": 3131 }, { "epoch": 0.4, "grad_norm": 0.5316578246293803, "learning_rate": 1.3636347581346102e-05, "loss": 0.6309, "step": 3132 }, { "epoch": 0.4, "grad_norm": 0.4490593182566759, "learning_rate": 1.363249303042782e-05, "loss": 0.5705, "step": 3133 }, { "epoch": 0.4, "grad_norm": 0.519586472131258, "learning_rate": 1.3628637857670866e-05, "loss": 0.5956, "step": 3134 }, { "epoch": 0.4, "grad_norm": 0.49648675725181773, "learning_rate": 1.3624782063735195e-05, "loss": 0.6054, "step": 3135 }, { "epoch": 0.4, "grad_norm": 0.4577579688746194, "learning_rate": 1.3620925649280873e-05, "loss": 0.5793, "step": 3136 }, { "epoch": 0.4, "grad_norm": 0.5218932667031823, "learning_rate": 1.3617068614968073e-05, "loss": 0.6359, "step": 3137 }, { "epoch": 0.4, "grad_norm": 0.5869230060157244, "learning_rate": 1.361321096145707e-05, "loss": 0.6541, "step": 3138 }, { "epoch": 0.4, "grad_norm": 0.6068342388128033, "learning_rate": 1.3609352689408248e-05, "loss": 0.6556, "step": 3139 }, { "epoch": 0.4, "grad_norm": 0.6141570881223107, "learning_rate": 1.3605493799482096e-05, "loss": 0.7169, "step": 3140 }, { "epoch": 0.4, "grad_norm": 0.48015384872142963, "learning_rate": 1.360163429233921e-05, "loss": 0.6008, "step": 3141 }, { "epoch": 0.4, "grad_norm": 0.543918452708754, "learning_rate": 1.359777416864029e-05, "loss": 0.62, "step": 3142 }, { "epoch": 0.4, "grad_norm": 0.48194485840137785, "learning_rate": 1.3593913429046146e-05, "loss": 0.5925, "step": 3143 }, { "epoch": 0.4, "grad_norm": 0.5090255336957503, "learning_rate": 1.3590052074217682e-05, "loss": 0.6396, "step": 3144 }, { "epoch": 0.4, "grad_norm": 0.49852531071967326, "learning_rate": 1.3586190104815919e-05, "loss": 0.5809, "step": 3145 }, { "epoch": 0.4, "grad_norm": 0.4996637400444612, "learning_rate": 1.3582327521501981e-05, "loss": 0.6008, "step": 3146 }, { "epoch": 0.4, "grad_norm": 0.5375054658910529, "learning_rate": 1.3578464324937097e-05, "loss": 0.6164, "step": 3147 }, { "epoch": 0.4, "grad_norm": 0.5332816703447582, "learning_rate": 1.357460051578259e-05, "loss": 0.6156, "step": 3148 }, { "epoch": 0.4, "grad_norm": 0.5054086087963053, "learning_rate": 1.3570736094699909e-05, "loss": 0.5922, "step": 3149 }, { "epoch": 0.4, "grad_norm": 0.5339473981690017, "learning_rate": 1.3566871062350586e-05, "loss": 0.6309, "step": 3150 }, { "epoch": 0.4, "grad_norm": 0.4893592403678643, "learning_rate": 1.3563005419396276e-05, "loss": 0.6307, "step": 3151 }, { "epoch": 0.4, "grad_norm": 0.7570491508925394, "learning_rate": 1.3559139166498722e-05, "loss": 0.5809, "step": 3152 }, { "epoch": 0.4, "grad_norm": 0.5525846602184711, "learning_rate": 1.3555272304319787e-05, "loss": 0.5951, "step": 3153 }, { "epoch": 0.4, "grad_norm": 0.5169585142868306, "learning_rate": 1.3551404833521425e-05, "loss": 0.5857, "step": 3154 }, { "epoch": 0.4, "grad_norm": 0.570893522872078, "learning_rate": 1.3547536754765705e-05, "loss": 0.6639, "step": 3155 }, { "epoch": 0.4, "grad_norm": 0.5281169583417639, "learning_rate": 1.354366806871479e-05, "loss": 0.6326, "step": 3156 }, { "epoch": 0.4, "grad_norm": 0.5347729362105133, "learning_rate": 1.3539798776030954e-05, "loss": 0.6026, "step": 3157 }, { "epoch": 0.4, "grad_norm": 0.507431464820195, "learning_rate": 1.3535928877376575e-05, "loss": 0.5851, "step": 3158 }, { "epoch": 0.4, "grad_norm": 0.537372094831905, "learning_rate": 1.3532058373414131e-05, "loss": 0.6615, "step": 3159 }, { "epoch": 0.4, "grad_norm": 0.49793211671965143, "learning_rate": 1.3528187264806208e-05, "loss": 0.6396, "step": 3160 }, { "epoch": 0.4, "grad_norm": 0.46541044281406235, "learning_rate": 1.3524315552215486e-05, "loss": 0.5924, "step": 3161 }, { "epoch": 0.4, "grad_norm": 0.4689139023439087, "learning_rate": 1.3520443236304758e-05, "loss": 0.5787, "step": 3162 }, { "epoch": 0.4, "grad_norm": 0.5134404452298782, "learning_rate": 1.351657031773692e-05, "loss": 0.643, "step": 3163 }, { "epoch": 0.4, "grad_norm": 0.5409449588746668, "learning_rate": 1.3512696797174973e-05, "loss": 0.6565, "step": 3164 }, { "epoch": 0.4, "grad_norm": 0.6979833544073638, "learning_rate": 1.3508822675282005e-05, "loss": 0.6523, "step": 3165 }, { "epoch": 0.4, "grad_norm": 0.5446678984345215, "learning_rate": 1.3504947952721226e-05, "loss": 0.6701, "step": 3166 }, { "epoch": 0.4, "grad_norm": 0.526158893459912, "learning_rate": 1.3501072630155938e-05, "loss": 0.6453, "step": 3167 }, { "epoch": 0.4, "grad_norm": 0.5381578373864935, "learning_rate": 1.3497196708249556e-05, "loss": 0.6215, "step": 3168 }, { "epoch": 0.4, "grad_norm": 0.52084162554837, "learning_rate": 1.3493320187665582e-05, "loss": 0.6413, "step": 3169 }, { "epoch": 0.4, "grad_norm": 0.5679604969904061, "learning_rate": 1.3489443069067633e-05, "loss": 0.6343, "step": 3170 }, { "epoch": 0.41, "grad_norm": 0.5066538393701755, "learning_rate": 1.3485565353119428e-05, "loss": 0.5874, "step": 3171 }, { "epoch": 0.41, "grad_norm": 0.5412021750158836, "learning_rate": 1.348168704048478e-05, "loss": 0.6034, "step": 3172 }, { "epoch": 0.41, "grad_norm": 0.5152514585664324, "learning_rate": 1.3477808131827607e-05, "loss": 0.6284, "step": 3173 }, { "epoch": 0.41, "grad_norm": 0.5555014291788006, "learning_rate": 1.3473928627811938e-05, "loss": 0.6335, "step": 3174 }, { "epoch": 0.41, "grad_norm": 0.619601706129031, "learning_rate": 1.3470048529101893e-05, "loss": 0.6046, "step": 3175 }, { "epoch": 0.41, "grad_norm": 0.5346476423476875, "learning_rate": 1.3466167836361699e-05, "loss": 0.6196, "step": 3176 }, { "epoch": 0.41, "grad_norm": 0.5544745637446924, "learning_rate": 1.346228655025568e-05, "loss": 0.6442, "step": 3177 }, { "epoch": 0.41, "grad_norm": 0.5736412209928035, "learning_rate": 1.3458404671448273e-05, "loss": 0.6251, "step": 3178 }, { "epoch": 0.41, "grad_norm": 0.5209110260026991, "learning_rate": 1.3454522200603999e-05, "loss": 0.5962, "step": 3179 }, { "epoch": 0.41, "grad_norm": 0.4532391465533693, "learning_rate": 1.3450639138387495e-05, "loss": 0.6066, "step": 3180 }, { "epoch": 0.41, "grad_norm": 0.4820626675649429, "learning_rate": 1.3446755485463495e-05, "loss": 0.578, "step": 3181 }, { "epoch": 0.41, "grad_norm": 0.5038813455515068, "learning_rate": 1.3442871242496829e-05, "loss": 0.6121, "step": 3182 }, { "epoch": 0.41, "grad_norm": 0.5666744629669502, "learning_rate": 1.3438986410152433e-05, "loss": 0.6173, "step": 3183 }, { "epoch": 0.41, "grad_norm": 0.4840635275091642, "learning_rate": 1.3435100989095346e-05, "loss": 0.593, "step": 3184 }, { "epoch": 0.41, "grad_norm": 0.5152406676464658, "learning_rate": 1.3431214979990707e-05, "loss": 0.6316, "step": 3185 }, { "epoch": 0.41, "grad_norm": 0.519088468481307, "learning_rate": 1.3427328383503748e-05, "loss": 0.6298, "step": 3186 }, { "epoch": 0.41, "grad_norm": 0.516654723584068, "learning_rate": 1.3423441200299807e-05, "loss": 0.6222, "step": 3187 }, { "epoch": 0.41, "grad_norm": 0.5378492330175727, "learning_rate": 1.3419553431044327e-05, "loss": 0.624, "step": 3188 }, { "epoch": 0.41, "grad_norm": 0.5979709432438237, "learning_rate": 1.3415665076402846e-05, "loss": 0.6391, "step": 3189 }, { "epoch": 0.41, "grad_norm": 0.5243637713111646, "learning_rate": 1.3411776137041002e-05, "loss": 0.6251, "step": 3190 }, { "epoch": 0.41, "grad_norm": 0.4909552452849061, "learning_rate": 1.3407886613624527e-05, "loss": 0.5879, "step": 3191 }, { "epoch": 0.41, "grad_norm": 0.5169128457261751, "learning_rate": 1.3403996506819272e-05, "loss": 0.6274, "step": 3192 }, { "epoch": 0.41, "grad_norm": 0.5923033144681091, "learning_rate": 1.3400105817291174e-05, "loss": 0.6194, "step": 3193 }, { "epoch": 0.41, "grad_norm": 0.5734214961145082, "learning_rate": 1.3396214545706262e-05, "loss": 0.6324, "step": 3194 }, { "epoch": 0.41, "grad_norm": 0.5673122298173433, "learning_rate": 1.3392322692730689e-05, "loss": 0.6697, "step": 3195 }, { "epoch": 0.41, "grad_norm": 0.5255692398758616, "learning_rate": 1.338843025903068e-05, "loss": 0.6249, "step": 3196 }, { "epoch": 0.41, "grad_norm": 0.4663394773576857, "learning_rate": 1.3384537245272578e-05, "loss": 0.5957, "step": 3197 }, { "epoch": 0.41, "grad_norm": 0.5168236247160719, "learning_rate": 1.338064365212282e-05, "loss": 0.6066, "step": 3198 }, { "epoch": 0.41, "grad_norm": 0.48172733153087427, "learning_rate": 1.3376749480247941e-05, "loss": 0.5986, "step": 3199 }, { "epoch": 0.41, "grad_norm": 0.5514819929493423, "learning_rate": 1.3372854730314575e-05, "loss": 0.5961, "step": 3200 }, { "epoch": 0.41, "grad_norm": 0.5549825168674549, "learning_rate": 1.3368959402989458e-05, "loss": 0.6462, "step": 3201 }, { "epoch": 0.41, "grad_norm": 0.4593816572896031, "learning_rate": 1.3365063498939422e-05, "loss": 0.5767, "step": 3202 }, { "epoch": 0.41, "grad_norm": 0.6017218517604661, "learning_rate": 1.3361167018831396e-05, "loss": 0.673, "step": 3203 }, { "epoch": 0.41, "grad_norm": 0.5775230996034467, "learning_rate": 1.3357269963332413e-05, "loss": 0.5755, "step": 3204 }, { "epoch": 0.41, "grad_norm": 0.5132086730184137, "learning_rate": 1.33533723331096e-05, "loss": 0.614, "step": 3205 }, { "epoch": 0.41, "grad_norm": 0.5274439792801309, "learning_rate": 1.3349474128830186e-05, "loss": 0.5919, "step": 3206 }, { "epoch": 0.41, "grad_norm": 0.5174217441590632, "learning_rate": 1.3345575351161494e-05, "loss": 0.6167, "step": 3207 }, { "epoch": 0.41, "grad_norm": 0.4923701949411351, "learning_rate": 1.3341676000770946e-05, "loss": 0.6157, "step": 3208 }, { "epoch": 0.41, "grad_norm": 0.5492837843646939, "learning_rate": 1.3337776078326067e-05, "loss": 0.6151, "step": 3209 }, { "epoch": 0.41, "grad_norm": 0.5437214677198268, "learning_rate": 1.3333875584494478e-05, "loss": 0.6395, "step": 3210 }, { "epoch": 0.41, "grad_norm": 0.5189411256863229, "learning_rate": 1.3329974519943892e-05, "loss": 0.631, "step": 3211 }, { "epoch": 0.41, "grad_norm": 0.5656129609066959, "learning_rate": 1.332607288534212e-05, "loss": 0.6366, "step": 3212 }, { "epoch": 0.41, "grad_norm": 0.5313250475662578, "learning_rate": 1.3322170681357083e-05, "loss": 0.6098, "step": 3213 }, { "epoch": 0.41, "grad_norm": 0.49679799004374436, "learning_rate": 1.331826790865679e-05, "loss": 0.583, "step": 3214 }, { "epoch": 0.41, "grad_norm": 0.49984880809463283, "learning_rate": 1.3314364567909339e-05, "loss": 0.5883, "step": 3215 }, { "epoch": 0.41, "grad_norm": 0.5701880787605693, "learning_rate": 1.3310460659782946e-05, "loss": 0.6154, "step": 3216 }, { "epoch": 0.41, "grad_norm": 0.5246697880012249, "learning_rate": 1.3306556184945904e-05, "loss": 0.6248, "step": 3217 }, { "epoch": 0.41, "grad_norm": 0.5409603775961219, "learning_rate": 1.3302651144066618e-05, "loss": 0.6219, "step": 3218 }, { "epoch": 0.41, "grad_norm": 0.5391051907286808, "learning_rate": 1.3298745537813582e-05, "loss": 0.6355, "step": 3219 }, { "epoch": 0.41, "grad_norm": 0.5273325786655974, "learning_rate": 1.3294839366855386e-05, "loss": 0.6147, "step": 3220 }, { "epoch": 0.41, "grad_norm": 0.4874129343772607, "learning_rate": 1.3290932631860718e-05, "loss": 0.6183, "step": 3221 }, { "epoch": 0.41, "grad_norm": 0.5685370726474189, "learning_rate": 1.3287025333498367e-05, "loss": 0.619, "step": 3222 }, { "epoch": 0.41, "grad_norm": 0.5099996696849517, "learning_rate": 1.3283117472437215e-05, "loss": 0.5801, "step": 3223 }, { "epoch": 0.41, "grad_norm": 0.5951156840339298, "learning_rate": 1.327920904934624e-05, "loss": 0.6196, "step": 3224 }, { "epoch": 0.41, "grad_norm": 0.49373545915143385, "learning_rate": 1.3275300064894513e-05, "loss": 0.6099, "step": 3225 }, { "epoch": 0.41, "grad_norm": 0.5781122902799337, "learning_rate": 1.3271390519751206e-05, "loss": 0.6318, "step": 3226 }, { "epoch": 0.41, "grad_norm": 0.5926753043600917, "learning_rate": 1.3267480414585592e-05, "loss": 0.6573, "step": 3227 }, { "epoch": 0.41, "grad_norm": 0.5814737772255951, "learning_rate": 1.3263569750067028e-05, "loss": 0.6409, "step": 3228 }, { "epoch": 0.41, "grad_norm": 0.5617471934202496, "learning_rate": 1.3259658526864967e-05, "loss": 0.6461, "step": 3229 }, { "epoch": 0.41, "grad_norm": 0.5387380076889448, "learning_rate": 1.3255746745648972e-05, "loss": 0.6177, "step": 3230 }, { "epoch": 0.41, "grad_norm": 0.5238092222781119, "learning_rate": 1.325183440708869e-05, "loss": 0.6373, "step": 3231 }, { "epoch": 0.41, "grad_norm": 0.5638963033063413, "learning_rate": 1.3247921511853865e-05, "loss": 0.6123, "step": 3232 }, { "epoch": 0.41, "grad_norm": 0.521576878167889, "learning_rate": 1.3244008060614332e-05, "loss": 0.5989, "step": 3233 }, { "epoch": 0.41, "grad_norm": 0.5578875855906988, "learning_rate": 1.3240094054040035e-05, "loss": 0.64, "step": 3234 }, { "epoch": 0.41, "grad_norm": 0.5613405278735472, "learning_rate": 1.3236179492801005e-05, "loss": 0.6306, "step": 3235 }, { "epoch": 0.41, "grad_norm": 0.5423838165973088, "learning_rate": 1.3232264377567359e-05, "loss": 0.586, "step": 3236 }, { "epoch": 0.41, "grad_norm": 0.48274444776473924, "learning_rate": 1.3228348709009318e-05, "loss": 0.5879, "step": 3237 }, { "epoch": 0.41, "grad_norm": 0.5023216271905092, "learning_rate": 1.3224432487797203e-05, "loss": 0.5975, "step": 3238 }, { "epoch": 0.41, "grad_norm": 0.5550737769775211, "learning_rate": 1.322051571460142e-05, "loss": 0.5936, "step": 3239 }, { "epoch": 0.41, "grad_norm": 0.5329817180206002, "learning_rate": 1.3216598390092475e-05, "loss": 0.6453, "step": 3240 }, { "epoch": 0.41, "grad_norm": 0.5204770998099674, "learning_rate": 1.3212680514940965e-05, "loss": 0.623, "step": 3241 }, { "epoch": 0.41, "grad_norm": 0.5372743486645862, "learning_rate": 1.3208762089817582e-05, "loss": 0.6413, "step": 3242 }, { "epoch": 0.41, "grad_norm": 0.5311176379128149, "learning_rate": 1.3204843115393111e-05, "loss": 0.6285, "step": 3243 }, { "epoch": 0.41, "grad_norm": 0.549699258420831, "learning_rate": 1.3200923592338436e-05, "loss": 0.6047, "step": 3244 }, { "epoch": 0.41, "grad_norm": 0.5648900879694047, "learning_rate": 1.3197003521324534e-05, "loss": 0.6392, "step": 3245 }, { "epoch": 0.41, "grad_norm": 0.4739453586608133, "learning_rate": 1.3193082903022467e-05, "loss": 0.6131, "step": 3246 }, { "epoch": 0.41, "grad_norm": 0.5450907332253805, "learning_rate": 1.3189161738103401e-05, "loss": 0.6017, "step": 3247 }, { "epoch": 0.41, "grad_norm": 0.567168478922013, "learning_rate": 1.3185240027238592e-05, "loss": 0.6113, "step": 3248 }, { "epoch": 0.42, "grad_norm": 0.48423500878241454, "learning_rate": 1.318131777109939e-05, "loss": 0.5883, "step": 3249 }, { "epoch": 0.42, "grad_norm": 0.5565237416150364, "learning_rate": 1.3177394970357233e-05, "loss": 0.6276, "step": 3250 }, { "epoch": 0.42, "grad_norm": 0.5155026973137006, "learning_rate": 1.317347162568366e-05, "loss": 0.6295, "step": 3251 }, { "epoch": 0.42, "grad_norm": 0.4879670681585004, "learning_rate": 1.3169547737750303e-05, "loss": 0.5834, "step": 3252 }, { "epoch": 0.42, "grad_norm": 0.45326861586634504, "learning_rate": 1.3165623307228882e-05, "loss": 0.6145, "step": 3253 }, { "epoch": 0.42, "grad_norm": 0.6093669414517007, "learning_rate": 1.3161698334791208e-05, "loss": 0.6619, "step": 3254 }, { "epoch": 0.42, "grad_norm": 0.5290345296211504, "learning_rate": 1.3157772821109189e-05, "loss": 0.6276, "step": 3255 }, { "epoch": 0.42, "grad_norm": 0.5147684768597677, "learning_rate": 1.3153846766854831e-05, "loss": 0.5925, "step": 3256 }, { "epoch": 0.42, "grad_norm": 0.46468940714624124, "learning_rate": 1.3149920172700223e-05, "loss": 0.6051, "step": 3257 }, { "epoch": 0.42, "grad_norm": 0.5308666473017322, "learning_rate": 1.3145993039317548e-05, "loss": 0.6311, "step": 3258 }, { "epoch": 0.42, "grad_norm": 0.4881520859626484, "learning_rate": 1.3142065367379089e-05, "loss": 0.6213, "step": 3259 }, { "epoch": 0.42, "grad_norm": 0.5104724721549986, "learning_rate": 1.3138137157557213e-05, "loss": 0.6088, "step": 3260 }, { "epoch": 0.42, "grad_norm": 0.5706952074449086, "learning_rate": 1.3134208410524383e-05, "loss": 0.6216, "step": 3261 }, { "epoch": 0.42, "grad_norm": 0.49503949680498643, "learning_rate": 1.3130279126953147e-05, "loss": 0.6139, "step": 3262 }, { "epoch": 0.42, "grad_norm": 0.5360907969497405, "learning_rate": 1.3126349307516158e-05, "loss": 0.609, "step": 3263 }, { "epoch": 0.42, "grad_norm": 0.4939972842006541, "learning_rate": 1.3122418952886149e-05, "loss": 0.5874, "step": 3264 }, { "epoch": 0.42, "grad_norm": 0.503682467347296, "learning_rate": 1.311848806373595e-05, "loss": 0.646, "step": 3265 }, { "epoch": 0.42, "grad_norm": 0.6052057528352259, "learning_rate": 1.3114556640738486e-05, "loss": 0.6487, "step": 3266 }, { "epoch": 0.42, "grad_norm": 0.5116758862743613, "learning_rate": 1.311062468456676e-05, "loss": 0.6386, "step": 3267 }, { "epoch": 0.42, "grad_norm": 0.46983805530849393, "learning_rate": 1.310669219589388e-05, "loss": 0.6056, "step": 3268 }, { "epoch": 0.42, "grad_norm": 0.5666236594188814, "learning_rate": 1.3102759175393041e-05, "loss": 0.6104, "step": 3269 }, { "epoch": 0.42, "grad_norm": 0.548167600876288, "learning_rate": 1.3098825623737528e-05, "loss": 0.6012, "step": 3270 }, { "epoch": 0.42, "grad_norm": 0.5039067252125073, "learning_rate": 1.3094891541600716e-05, "loss": 0.5791, "step": 3271 }, { "epoch": 0.42, "grad_norm": 0.5508886951629618, "learning_rate": 1.3090956929656072e-05, "loss": 0.6164, "step": 3272 }, { "epoch": 0.42, "grad_norm": 0.5894699098178408, "learning_rate": 1.3087021788577156e-05, "loss": 0.6152, "step": 3273 }, { "epoch": 0.42, "grad_norm": 0.5939349920292238, "learning_rate": 1.3083086119037613e-05, "loss": 0.6561, "step": 3274 }, { "epoch": 0.42, "grad_norm": 0.5702852737466146, "learning_rate": 1.3079149921711186e-05, "loss": 0.6484, "step": 3275 }, { "epoch": 0.42, "grad_norm": 0.5408981676372517, "learning_rate": 1.3075213197271699e-05, "loss": 0.6187, "step": 3276 }, { "epoch": 0.42, "grad_norm": 0.5200165985481381, "learning_rate": 1.3071275946393076e-05, "loss": 0.6309, "step": 3277 }, { "epoch": 0.42, "grad_norm": 0.5210344903091259, "learning_rate": 1.3067338169749326e-05, "loss": 0.6275, "step": 3278 }, { "epoch": 0.42, "grad_norm": 0.6200920282027241, "learning_rate": 1.3063399868014543e-05, "loss": 0.6508, "step": 3279 }, { "epoch": 0.42, "grad_norm": 0.574350486631379, "learning_rate": 1.3059461041862922e-05, "loss": 0.6322, "step": 3280 }, { "epoch": 0.42, "grad_norm": 0.5210740833175642, "learning_rate": 1.3055521691968743e-05, "loss": 0.6129, "step": 3281 }, { "epoch": 0.42, "grad_norm": 0.5979838696045838, "learning_rate": 1.3051581819006374e-05, "loss": 0.6737, "step": 3282 }, { "epoch": 0.42, "grad_norm": 0.551645798745319, "learning_rate": 1.3047641423650267e-05, "loss": 0.6528, "step": 3283 }, { "epoch": 0.42, "grad_norm": 0.5086163977621532, "learning_rate": 1.3043700506574976e-05, "loss": 0.6254, "step": 3284 }, { "epoch": 0.42, "grad_norm": 0.484913296615348, "learning_rate": 1.3039759068455137e-05, "loss": 0.6024, "step": 3285 }, { "epoch": 0.42, "grad_norm": 0.5309592799544572, "learning_rate": 1.3035817109965483e-05, "loss": 0.6347, "step": 3286 }, { "epoch": 0.42, "grad_norm": 0.4894995337016809, "learning_rate": 1.3031874631780815e-05, "loss": 0.6208, "step": 3287 }, { "epoch": 0.42, "grad_norm": 0.5345749599502905, "learning_rate": 1.3027931634576045e-05, "loss": 0.6327, "step": 3288 }, { "epoch": 0.42, "grad_norm": 0.5421257887347981, "learning_rate": 1.3023988119026169e-05, "loss": 0.6179, "step": 3289 }, { "epoch": 0.42, "grad_norm": 0.5226675856613472, "learning_rate": 1.3020044085806265e-05, "loss": 0.5974, "step": 3290 }, { "epoch": 0.42, "grad_norm": 0.605529775986194, "learning_rate": 1.3016099535591507e-05, "loss": 0.6789, "step": 3291 }, { "epoch": 0.42, "grad_norm": 0.6419252180555045, "learning_rate": 1.301215446905715e-05, "loss": 0.5919, "step": 3292 }, { "epoch": 0.42, "grad_norm": 0.5334862778593487, "learning_rate": 1.3008208886878543e-05, "loss": 0.6052, "step": 3293 }, { "epoch": 0.42, "grad_norm": 0.5470018013598303, "learning_rate": 1.3004262789731124e-05, "loss": 0.6209, "step": 3294 }, { "epoch": 0.42, "grad_norm": 0.591527007629617, "learning_rate": 1.3000316178290414e-05, "loss": 0.6702, "step": 3295 }, { "epoch": 0.42, "grad_norm": 0.5035586995212346, "learning_rate": 1.2996369053232023e-05, "loss": 0.5933, "step": 3296 }, { "epoch": 0.42, "grad_norm": 0.5345263547637958, "learning_rate": 1.2992421415231655e-05, "loss": 0.6141, "step": 3297 }, { "epoch": 0.42, "grad_norm": 0.5021282682564292, "learning_rate": 1.29884732649651e-05, "loss": 0.6232, "step": 3298 }, { "epoch": 0.42, "grad_norm": 0.5236480539130488, "learning_rate": 1.2984524603108226e-05, "loss": 0.5814, "step": 3299 }, { "epoch": 0.42, "grad_norm": 0.5714539122534343, "learning_rate": 1.2980575430337004e-05, "loss": 0.6117, "step": 3300 }, { "epoch": 0.42, "grad_norm": 0.5986694607870958, "learning_rate": 1.2976625747327473e-05, "loss": 0.6197, "step": 3301 }, { "epoch": 0.42, "grad_norm": 0.5337536090579869, "learning_rate": 1.2972675554755782e-05, "loss": 0.6127, "step": 3302 }, { "epoch": 0.42, "grad_norm": 0.5178707785759289, "learning_rate": 1.2968724853298154e-05, "loss": 0.5961, "step": 3303 }, { "epoch": 0.42, "grad_norm": 0.6119138472646868, "learning_rate": 1.2964773643630895e-05, "loss": 0.6498, "step": 3304 }, { "epoch": 0.42, "grad_norm": 0.4849038208649443, "learning_rate": 1.296082192643041e-05, "loss": 0.5906, "step": 3305 }, { "epoch": 0.42, "grad_norm": 0.5183647787511789, "learning_rate": 1.295686970237318e-05, "loss": 0.6026, "step": 3306 }, { "epoch": 0.42, "grad_norm": 0.4846663176990391, "learning_rate": 1.2952916972135786e-05, "loss": 0.582, "step": 3307 }, { "epoch": 0.42, "grad_norm": 0.5597192693279304, "learning_rate": 1.2948963736394877e-05, "loss": 0.6499, "step": 3308 }, { "epoch": 0.42, "grad_norm": 0.531387404041624, "learning_rate": 1.2945009995827207e-05, "loss": 0.6071, "step": 3309 }, { "epoch": 0.42, "grad_norm": 0.560879002972198, "learning_rate": 1.2941055751109604e-05, "loss": 0.6595, "step": 3310 }, { "epoch": 0.42, "grad_norm": 0.4870993246182389, "learning_rate": 1.293710100291899e-05, "loss": 0.6311, "step": 3311 }, { "epoch": 0.42, "grad_norm": 0.5020845702753198, "learning_rate": 1.2933145751932365e-05, "loss": 0.6181, "step": 3312 }, { "epoch": 0.42, "grad_norm": 0.5286510377282787, "learning_rate": 1.2929189998826826e-05, "loss": 0.6332, "step": 3313 }, { "epoch": 0.42, "grad_norm": 0.5949955924391657, "learning_rate": 1.2925233744279545e-05, "loss": 0.618, "step": 3314 }, { "epoch": 0.42, "grad_norm": 0.5101547746083349, "learning_rate": 1.2921276988967785e-05, "loss": 0.585, "step": 3315 }, { "epoch": 0.42, "grad_norm": 0.5410169104733029, "learning_rate": 1.2917319733568899e-05, "loss": 0.6484, "step": 3316 }, { "epoch": 0.42, "grad_norm": 0.5078449337171428, "learning_rate": 1.2913361978760315e-05, "loss": 0.5983, "step": 3317 }, { "epoch": 0.42, "grad_norm": 0.5674714421032373, "learning_rate": 1.2909403725219555e-05, "loss": 0.6978, "step": 3318 }, { "epoch": 0.42, "grad_norm": 0.5415707472854585, "learning_rate": 1.2905444973624227e-05, "loss": 0.6148, "step": 3319 }, { "epoch": 0.42, "grad_norm": 0.5196175464021805, "learning_rate": 1.2901485724652018e-05, "loss": 0.6004, "step": 3320 }, { "epoch": 0.42, "grad_norm": 0.501999077927181, "learning_rate": 1.2897525978980702e-05, "loss": 0.611, "step": 3321 }, { "epoch": 0.42, "grad_norm": 0.4904475063390489, "learning_rate": 1.289356573728814e-05, "loss": 0.6015, "step": 3322 }, { "epoch": 0.42, "grad_norm": 1.1121342136276984, "learning_rate": 1.2889605000252283e-05, "loss": 0.6198, "step": 3323 }, { "epoch": 0.42, "grad_norm": 0.5134737782551139, "learning_rate": 1.2885643768551155e-05, "loss": 0.5973, "step": 3324 }, { "epoch": 0.42, "grad_norm": 0.6303424282031584, "learning_rate": 1.288168204286287e-05, "loss": 0.6694, "step": 3325 }, { "epoch": 0.42, "grad_norm": 0.7219188535493288, "learning_rate": 1.2877719823865631e-05, "loss": 0.7144, "step": 3326 }, { "epoch": 0.43, "grad_norm": 0.4786380776699207, "learning_rate": 1.287375711223772e-05, "loss": 0.5878, "step": 3327 }, { "epoch": 0.43, "grad_norm": 0.5483090013431373, "learning_rate": 1.2869793908657507e-05, "loss": 0.6175, "step": 3328 }, { "epoch": 0.43, "grad_norm": 0.610625345291048, "learning_rate": 1.2865830213803439e-05, "loss": 0.6137, "step": 3329 }, { "epoch": 0.43, "grad_norm": 0.5299464060585598, "learning_rate": 1.286186602835406e-05, "loss": 0.6374, "step": 3330 }, { "epoch": 0.43, "grad_norm": 0.8338079142339018, "learning_rate": 1.2857901352987983e-05, "loss": 0.5931, "step": 3331 }, { "epoch": 0.43, "grad_norm": 0.5227031447821368, "learning_rate": 1.285393618838392e-05, "loss": 0.6121, "step": 3332 }, { "epoch": 0.43, "grad_norm": 0.47035647566006894, "learning_rate": 1.284997053522065e-05, "loss": 0.5807, "step": 3333 }, { "epoch": 0.43, "grad_norm": 0.4663963976735657, "learning_rate": 1.2846004394177051e-05, "loss": 0.5931, "step": 3334 }, { "epoch": 0.43, "grad_norm": 0.5392254620293538, "learning_rate": 1.2842037765932078e-05, "loss": 0.5997, "step": 3335 }, { "epoch": 0.43, "grad_norm": 0.627843208321786, "learning_rate": 1.2838070651164767e-05, "loss": 0.6508, "step": 3336 }, { "epoch": 0.43, "grad_norm": 0.48878958220446134, "learning_rate": 1.2834103050554241e-05, "loss": 0.6168, "step": 3337 }, { "epoch": 0.43, "grad_norm": 0.5195365877258681, "learning_rate": 1.2830134964779706e-05, "loss": 0.6149, "step": 3338 }, { "epoch": 0.43, "grad_norm": 0.47246781052546893, "learning_rate": 1.2826166394520447e-05, "loss": 0.5954, "step": 3339 }, { "epoch": 0.43, "grad_norm": 0.42365959148715243, "learning_rate": 1.282219734045584e-05, "loss": 0.5723, "step": 3340 }, { "epoch": 0.43, "grad_norm": 0.46295630456327613, "learning_rate": 1.2818227803265333e-05, "loss": 0.5863, "step": 3341 }, { "epoch": 0.43, "grad_norm": 0.4774392892210411, "learning_rate": 1.2814257783628468e-05, "loss": 0.5933, "step": 3342 }, { "epoch": 0.43, "grad_norm": 0.4623825384935038, "learning_rate": 1.2810287282224859e-05, "loss": 0.595, "step": 3343 }, { "epoch": 0.43, "grad_norm": 0.5534766519969726, "learning_rate": 1.2806316299734211e-05, "loss": 0.6275, "step": 3344 }, { "epoch": 0.43, "grad_norm": 0.4625455591991474, "learning_rate": 1.2802344836836307e-05, "loss": 0.6021, "step": 3345 }, { "epoch": 0.43, "grad_norm": 0.5455385072436681, "learning_rate": 1.2798372894211015e-05, "loss": 0.6255, "step": 3346 }, { "epoch": 0.43, "grad_norm": 0.4909715959438912, "learning_rate": 1.2794400472538277e-05, "loss": 0.6006, "step": 3347 }, { "epoch": 0.43, "grad_norm": 0.5231665400812993, "learning_rate": 1.2790427572498132e-05, "loss": 0.6456, "step": 3348 }, { "epoch": 0.43, "grad_norm": 0.5233618167209968, "learning_rate": 1.278645419477069e-05, "loss": 0.6413, "step": 3349 }, { "epoch": 0.43, "grad_norm": 0.46802419322010175, "learning_rate": 1.278248034003614e-05, "loss": 0.5832, "step": 3350 }, { "epoch": 0.43, "grad_norm": 0.5206464496852212, "learning_rate": 1.2778506008974757e-05, "loss": 0.6379, "step": 3351 }, { "epoch": 0.43, "grad_norm": 0.48189839450491223, "learning_rate": 1.2774531202266908e-05, "loss": 0.6023, "step": 3352 }, { "epoch": 0.43, "grad_norm": 0.44548980715341685, "learning_rate": 1.2770555920593028e-05, "loss": 0.5842, "step": 3353 }, { "epoch": 0.43, "grad_norm": 0.5199520012315949, "learning_rate": 1.276658016463363e-05, "loss": 0.5947, "step": 3354 }, { "epoch": 0.43, "grad_norm": 0.6460752176063054, "learning_rate": 1.2762603935069317e-05, "loss": 0.6878, "step": 3355 }, { "epoch": 0.43, "grad_norm": 0.5146906001761301, "learning_rate": 1.2758627232580777e-05, "loss": 0.5941, "step": 3356 }, { "epoch": 0.43, "grad_norm": 0.5703870941717926, "learning_rate": 1.2754650057848772e-05, "loss": 0.6285, "step": 3357 }, { "epoch": 0.43, "grad_norm": 0.4852815760420083, "learning_rate": 1.2750672411554144e-05, "loss": 0.6056, "step": 3358 }, { "epoch": 0.43, "grad_norm": 0.5270786508344406, "learning_rate": 1.2746694294377815e-05, "loss": 0.6112, "step": 3359 }, { "epoch": 0.43, "grad_norm": 0.4877619351489317, "learning_rate": 1.2742715707000795e-05, "loss": 0.625, "step": 3360 }, { "epoch": 0.43, "grad_norm": 0.47113862925490146, "learning_rate": 1.2738736650104166e-05, "loss": 0.6055, "step": 3361 }, { "epoch": 0.43, "grad_norm": 0.519471808341537, "learning_rate": 1.2734757124369097e-05, "loss": 0.6482, "step": 3362 }, { "epoch": 0.43, "grad_norm": 0.6243015220274603, "learning_rate": 1.2730777130476834e-05, "loss": 0.6307, "step": 3363 }, { "epoch": 0.43, "grad_norm": 0.49427302227991193, "learning_rate": 1.2726796669108703e-05, "loss": 0.6064, "step": 3364 }, { "epoch": 0.43, "grad_norm": 0.5468746542925305, "learning_rate": 1.272281574094611e-05, "loss": 0.6481, "step": 3365 }, { "epoch": 0.43, "grad_norm": 0.5147329574456229, "learning_rate": 1.2718834346670543e-05, "loss": 0.5999, "step": 3366 }, { "epoch": 0.43, "grad_norm": 0.5676903445996362, "learning_rate": 1.2714852486963568e-05, "loss": 0.6049, "step": 3367 }, { "epoch": 0.43, "grad_norm": 0.4900827344636608, "learning_rate": 1.2710870162506827e-05, "loss": 0.5996, "step": 3368 }, { "epoch": 0.43, "grad_norm": 0.4556654849570544, "learning_rate": 1.2706887373982053e-05, "loss": 0.6033, "step": 3369 }, { "epoch": 0.43, "grad_norm": 0.5398444026860906, "learning_rate": 1.2702904122071049e-05, "loss": 0.62, "step": 3370 }, { "epoch": 0.43, "grad_norm": 0.5142575729681148, "learning_rate": 1.2698920407455697e-05, "loss": 0.5901, "step": 3371 }, { "epoch": 0.43, "grad_norm": 0.5964894666153139, "learning_rate": 1.2694936230817957e-05, "loss": 0.6186, "step": 3372 }, { "epoch": 0.43, "grad_norm": 0.5568778252334083, "learning_rate": 1.2690951592839878e-05, "loss": 0.6905, "step": 3373 }, { "epoch": 0.43, "grad_norm": 0.5320194997151773, "learning_rate": 1.2686966494203581e-05, "loss": 0.6037, "step": 3374 }, { "epoch": 0.43, "grad_norm": 0.5457895518268635, "learning_rate": 1.2682980935591262e-05, "loss": 0.6136, "step": 3375 }, { "epoch": 0.43, "grad_norm": 0.49714015014540036, "learning_rate": 1.2678994917685204e-05, "loss": 0.5905, "step": 3376 }, { "epoch": 0.43, "grad_norm": 0.466415368888753, "learning_rate": 1.2675008441167763e-05, "loss": 0.5923, "step": 3377 }, { "epoch": 0.43, "grad_norm": 0.5236012129531317, "learning_rate": 1.2671021506721377e-05, "loss": 0.6101, "step": 3378 }, { "epoch": 0.43, "grad_norm": 0.4861705655810287, "learning_rate": 1.266703411502856e-05, "loss": 0.6215, "step": 3379 }, { "epoch": 0.43, "grad_norm": 0.5388586035796368, "learning_rate": 1.2663046266771905e-05, "loss": 0.6267, "step": 3380 }, { "epoch": 0.43, "grad_norm": 0.4887479323183796, "learning_rate": 1.2659057962634084e-05, "loss": 0.5991, "step": 3381 }, { "epoch": 0.43, "grad_norm": 0.4741504893276884, "learning_rate": 1.2655069203297845e-05, "loss": 0.5807, "step": 3382 }, { "epoch": 0.43, "grad_norm": 0.49456284489097196, "learning_rate": 1.2651079989446015e-05, "loss": 0.5867, "step": 3383 }, { "epoch": 0.43, "grad_norm": 0.517363958732916, "learning_rate": 1.2647090321761502e-05, "loss": 0.6286, "step": 3384 }, { "epoch": 0.43, "grad_norm": 0.5376499451804708, "learning_rate": 1.2643100200927283e-05, "loss": 0.6229, "step": 3385 }, { "epoch": 0.43, "grad_norm": 0.48482912907940034, "learning_rate": 1.2639109627626422e-05, "loss": 0.6035, "step": 3386 }, { "epoch": 0.43, "grad_norm": 0.5261979959086225, "learning_rate": 1.2635118602542055e-05, "loss": 0.598, "step": 3387 }, { "epoch": 0.43, "grad_norm": 0.4757746595119629, "learning_rate": 1.26311271263574e-05, "loss": 0.5868, "step": 3388 }, { "epoch": 0.43, "grad_norm": 0.46042154948688346, "learning_rate": 1.2627135199755745e-05, "loss": 0.5725, "step": 3389 }, { "epoch": 0.43, "grad_norm": 0.5036613443210068, "learning_rate": 1.2623142823420462e-05, "loss": 0.615, "step": 3390 }, { "epoch": 0.43, "grad_norm": 0.5220435153713132, "learning_rate": 1.2619149998034998e-05, "loss": 0.61, "step": 3391 }, { "epoch": 0.43, "grad_norm": 0.5312627670775328, "learning_rate": 1.2615156724282876e-05, "loss": 0.5927, "step": 3392 }, { "epoch": 0.43, "grad_norm": 0.5084853656118306, "learning_rate": 1.261116300284769e-05, "loss": 0.6157, "step": 3393 }, { "epoch": 0.43, "grad_norm": 0.543758770832181, "learning_rate": 1.2607168834413126e-05, "loss": 0.6324, "step": 3394 }, { "epoch": 0.43, "grad_norm": 0.5284403152096767, "learning_rate": 1.2603174219662935e-05, "loss": 0.567, "step": 3395 }, { "epoch": 0.43, "grad_norm": 0.4574508807702379, "learning_rate": 1.2599179159280941e-05, "loss": 0.5952, "step": 3396 }, { "epoch": 0.43, "grad_norm": 0.5784821088170057, "learning_rate": 1.2595183653951048e-05, "loss": 0.6333, "step": 3397 }, { "epoch": 0.43, "grad_norm": 0.531804851344372, "learning_rate": 1.259118770435725e-05, "loss": 0.6181, "step": 3398 }, { "epoch": 0.43, "grad_norm": 0.5502420196036194, "learning_rate": 1.2587191311183597e-05, "loss": 0.5974, "step": 3399 }, { "epoch": 0.43, "grad_norm": 0.5533003224397658, "learning_rate": 1.2583194475114223e-05, "loss": 0.6328, "step": 3400 }, { "epoch": 0.43, "grad_norm": 0.6043408154143246, "learning_rate": 1.2579197196833337e-05, "loss": 0.6446, "step": 3401 }, { "epoch": 0.43, "grad_norm": 0.5751151050858946, "learning_rate": 1.2575199477025229e-05, "loss": 0.6191, "step": 3402 }, { "epoch": 0.43, "grad_norm": 0.5176329105548496, "learning_rate": 1.2571201316374255e-05, "loss": 0.6126, "step": 3403 }, { "epoch": 0.43, "grad_norm": 0.5472991139447304, "learning_rate": 1.2567202715564854e-05, "loss": 0.6331, "step": 3404 }, { "epoch": 0.43, "grad_norm": 0.5968734146240132, "learning_rate": 1.2563203675281539e-05, "loss": 0.626, "step": 3405 }, { "epoch": 0.44, "grad_norm": 0.5369649559360918, "learning_rate": 1.2559204196208896e-05, "loss": 0.6088, "step": 3406 }, { "epoch": 0.44, "grad_norm": 0.5353083567750629, "learning_rate": 1.2555204279031588e-05, "loss": 0.6327, "step": 3407 }, { "epoch": 0.44, "grad_norm": 0.5895558571321188, "learning_rate": 1.255120392443435e-05, "loss": 0.6228, "step": 3408 }, { "epoch": 0.44, "grad_norm": 0.5269759730156018, "learning_rate": 1.2547203133101996e-05, "loss": 0.6237, "step": 3409 }, { "epoch": 0.44, "grad_norm": 0.5039174345461706, "learning_rate": 1.2543201905719408e-05, "loss": 0.6173, "step": 3410 }, { "epoch": 0.44, "grad_norm": 0.5576771588037032, "learning_rate": 1.2539200242971554e-05, "loss": 0.6065, "step": 3411 }, { "epoch": 0.44, "grad_norm": 0.5039583194283278, "learning_rate": 1.253519814554347e-05, "loss": 0.5967, "step": 3412 }, { "epoch": 0.44, "grad_norm": 0.45611797302259494, "learning_rate": 1.2531195614120263e-05, "loss": 0.6054, "step": 3413 }, { "epoch": 0.44, "grad_norm": 0.6856868666577631, "learning_rate": 1.2527192649387119e-05, "loss": 0.698, "step": 3414 }, { "epoch": 0.44, "grad_norm": 0.511745510048781, "learning_rate": 1.2523189252029296e-05, "loss": 0.635, "step": 3415 }, { "epoch": 0.44, "grad_norm": 0.5551718881726205, "learning_rate": 1.2519185422732129e-05, "loss": 0.6226, "step": 3416 }, { "epoch": 0.44, "grad_norm": 0.4827484536446958, "learning_rate": 1.2515181162181025e-05, "loss": 0.5844, "step": 3417 }, { "epoch": 0.44, "grad_norm": 0.5101707151638443, "learning_rate": 1.2511176471061462e-05, "loss": 0.6123, "step": 3418 }, { "epoch": 0.44, "grad_norm": 0.5213199845474658, "learning_rate": 1.2507171350058995e-05, "loss": 0.5998, "step": 3419 }, { "epoch": 0.44, "grad_norm": 0.5240678755813732, "learning_rate": 1.2503165799859257e-05, "loss": 0.6186, "step": 3420 }, { "epoch": 0.44, "grad_norm": 0.57652813276558, "learning_rate": 1.2499159821147942e-05, "loss": 0.6626, "step": 3421 }, { "epoch": 0.44, "grad_norm": 0.5653186051621159, "learning_rate": 1.2495153414610829e-05, "loss": 0.6169, "step": 3422 }, { "epoch": 0.44, "grad_norm": 0.6139402737322087, "learning_rate": 1.2491146580933766e-05, "loss": 0.6494, "step": 3423 }, { "epoch": 0.44, "grad_norm": 0.46217077008959206, "learning_rate": 1.2487139320802673e-05, "loss": 0.5804, "step": 3424 }, { "epoch": 0.44, "grad_norm": 0.5687539429100027, "learning_rate": 1.2483131634903546e-05, "loss": 0.6006, "step": 3425 }, { "epoch": 0.44, "grad_norm": 0.5533786250644783, "learning_rate": 1.2479123523922451e-05, "loss": 0.637, "step": 3426 }, { "epoch": 0.44, "grad_norm": 0.5732178047897654, "learning_rate": 1.247511498854553e-05, "loss": 0.6045, "step": 3427 }, { "epoch": 0.44, "grad_norm": 0.4918562061679106, "learning_rate": 1.2471106029458992e-05, "loss": 0.5785, "step": 3428 }, { "epoch": 0.44, "grad_norm": 0.8657345163453462, "learning_rate": 1.2467096647349126e-05, "loss": 0.621, "step": 3429 }, { "epoch": 0.44, "grad_norm": 0.5812034549216384, "learning_rate": 1.2463086842902285e-05, "loss": 0.6513, "step": 3430 }, { "epoch": 0.44, "grad_norm": 0.5352245428479532, "learning_rate": 1.2459076616804901e-05, "loss": 0.6355, "step": 3431 }, { "epoch": 0.44, "grad_norm": 0.5370150255891168, "learning_rate": 1.2455065969743479e-05, "loss": 0.6336, "step": 3432 }, { "epoch": 0.44, "grad_norm": 0.521375859208822, "learning_rate": 1.2451054902404586e-05, "loss": 0.5985, "step": 3433 }, { "epoch": 0.44, "grad_norm": 0.5702444411512443, "learning_rate": 1.2447043415474876e-05, "loss": 0.5957, "step": 3434 }, { "epoch": 0.44, "grad_norm": 0.5378370296973651, "learning_rate": 1.2443031509641062e-05, "loss": 0.6139, "step": 3435 }, { "epoch": 0.44, "grad_norm": 0.5071909276958274, "learning_rate": 1.243901918558993e-05, "loss": 0.64, "step": 3436 }, { "epoch": 0.44, "grad_norm": 0.5812998958230213, "learning_rate": 1.2435006444008352e-05, "loss": 0.6788, "step": 3437 }, { "epoch": 0.44, "grad_norm": 0.5637725066905966, "learning_rate": 1.2430993285583253e-05, "loss": 0.6409, "step": 3438 }, { "epoch": 0.44, "grad_norm": 0.5342299587820294, "learning_rate": 1.2426979711001637e-05, "loss": 0.6225, "step": 3439 }, { "epoch": 0.44, "grad_norm": 0.5093617650328871, "learning_rate": 1.2422965720950577e-05, "loss": 0.5883, "step": 3440 }, { "epoch": 0.44, "grad_norm": 0.5197310844108912, "learning_rate": 1.241895131611723e-05, "loss": 0.6257, "step": 3441 }, { "epoch": 0.44, "grad_norm": 0.4683810958961066, "learning_rate": 1.2414936497188803e-05, "loss": 0.6036, "step": 3442 }, { "epoch": 0.44, "grad_norm": 0.6120277677288555, "learning_rate": 1.2410921264852585e-05, "loss": 0.5921, "step": 3443 }, { "epoch": 0.44, "grad_norm": 0.4850907220318917, "learning_rate": 1.2406905619795939e-05, "loss": 0.6446, "step": 3444 }, { "epoch": 0.44, "grad_norm": 0.4972534501374091, "learning_rate": 1.2402889562706294e-05, "loss": 0.5736, "step": 3445 }, { "epoch": 0.44, "grad_norm": 0.5033989329895846, "learning_rate": 1.2398873094271149e-05, "loss": 0.5786, "step": 3446 }, { "epoch": 0.44, "grad_norm": 0.4639674441623738, "learning_rate": 1.2394856215178073e-05, "loss": 0.6074, "step": 3447 }, { "epoch": 0.44, "grad_norm": 0.528913240261468, "learning_rate": 1.2390838926114709e-05, "loss": 0.601, "step": 3448 }, { "epoch": 0.44, "grad_norm": 0.5621273596444032, "learning_rate": 1.2386821227768768e-05, "loss": 0.6475, "step": 3449 }, { "epoch": 0.44, "grad_norm": 0.4636725255508809, "learning_rate": 1.2382803120828031e-05, "loss": 0.6005, "step": 3450 }, { "epoch": 0.44, "grad_norm": 0.49328324648716226, "learning_rate": 1.2378784605980349e-05, "loss": 0.5918, "step": 3451 }, { "epoch": 0.44, "grad_norm": 0.4981997169627736, "learning_rate": 1.2374765683913643e-05, "loss": 0.5959, "step": 3452 }, { "epoch": 0.44, "grad_norm": 0.5292345804735932, "learning_rate": 1.2370746355315906e-05, "loss": 0.6239, "step": 3453 }, { "epoch": 0.44, "grad_norm": 0.5360861541698356, "learning_rate": 1.2366726620875194e-05, "loss": 0.6401, "step": 3454 }, { "epoch": 0.44, "grad_norm": 0.5138859363210181, "learning_rate": 1.2362706481279636e-05, "loss": 0.6017, "step": 3455 }, { "epoch": 0.44, "grad_norm": 0.5616553358067394, "learning_rate": 1.2358685937217436e-05, "loss": 0.625, "step": 3456 }, { "epoch": 0.44, "grad_norm": 0.5829458624002202, "learning_rate": 1.2354664989376856e-05, "loss": 0.6197, "step": 3457 }, { "epoch": 0.44, "grad_norm": 0.53619356312523, "learning_rate": 1.2350643638446245e-05, "loss": 0.6118, "step": 3458 }, { "epoch": 0.44, "grad_norm": 0.5080922632641344, "learning_rate": 1.2346621885113996e-05, "loss": 0.5869, "step": 3459 }, { "epoch": 0.44, "grad_norm": 0.4881559260544242, "learning_rate": 1.2342599730068592e-05, "loss": 0.6053, "step": 3460 }, { "epoch": 0.44, "grad_norm": 0.4996902955321987, "learning_rate": 1.2338577173998571e-05, "loss": 0.5967, "step": 3461 }, { "epoch": 0.44, "grad_norm": 0.49679015564391227, "learning_rate": 1.2334554217592555e-05, "loss": 0.5943, "step": 3462 }, { "epoch": 0.44, "grad_norm": 0.5264370816373014, "learning_rate": 1.2330530861539219e-05, "loss": 0.6146, "step": 3463 }, { "epoch": 0.44, "grad_norm": 0.590895993376164, "learning_rate": 1.232650710652731e-05, "loss": 0.6695, "step": 3464 }, { "epoch": 0.44, "grad_norm": 0.5013942455614018, "learning_rate": 1.2322482953245653e-05, "loss": 0.6048, "step": 3465 }, { "epoch": 0.44, "grad_norm": 0.5500228222244821, "learning_rate": 1.2318458402383131e-05, "loss": 0.5903, "step": 3466 }, { "epoch": 0.44, "grad_norm": 0.5444903053958208, "learning_rate": 1.2314433454628697e-05, "loss": 0.6271, "step": 3467 }, { "epoch": 0.44, "grad_norm": 0.5490989118249404, "learning_rate": 1.2310408110671375e-05, "loss": 0.6278, "step": 3468 }, { "epoch": 0.44, "grad_norm": 0.5315783865535308, "learning_rate": 1.2306382371200253e-05, "loss": 0.6075, "step": 3469 }, { "epoch": 0.44, "grad_norm": 0.600987767370665, "learning_rate": 1.230235623690449e-05, "loss": 0.6541, "step": 3470 }, { "epoch": 0.44, "grad_norm": 0.5160164662558951, "learning_rate": 1.2298329708473314e-05, "loss": 0.6395, "step": 3471 }, { "epoch": 0.44, "grad_norm": 0.5524894060171495, "learning_rate": 1.229430278659601e-05, "loss": 0.6019, "step": 3472 }, { "epoch": 0.44, "grad_norm": 0.5026674349327972, "learning_rate": 1.2290275471961945e-05, "loss": 0.6077, "step": 3473 }, { "epoch": 0.44, "grad_norm": 0.5206460433849653, "learning_rate": 1.2286247765260545e-05, "loss": 0.6395, "step": 3474 }, { "epoch": 0.44, "grad_norm": 0.5008098486862047, "learning_rate": 1.2282219667181306e-05, "loss": 0.6253, "step": 3475 }, { "epoch": 0.44, "grad_norm": 0.4889898993873465, "learning_rate": 1.2278191178413783e-05, "loss": 0.5921, "step": 3476 }, { "epoch": 0.44, "grad_norm": 0.6121900957188156, "learning_rate": 1.2274162299647612e-05, "loss": 0.6602, "step": 3477 }, { "epoch": 0.44, "grad_norm": 0.5471295602525397, "learning_rate": 1.2270133031572482e-05, "loss": 0.6417, "step": 3478 }, { "epoch": 0.44, "grad_norm": 0.5394269809649839, "learning_rate": 1.226610337487816e-05, "loss": 0.5989, "step": 3479 }, { "epoch": 0.44, "grad_norm": 0.5843882776503159, "learning_rate": 1.2262073330254469e-05, "loss": 0.6893, "step": 3480 }, { "epoch": 0.44, "grad_norm": 0.48009273648632267, "learning_rate": 1.2258042898391306e-05, "loss": 0.6118, "step": 3481 }, { "epoch": 0.44, "grad_norm": 0.5135479303437134, "learning_rate": 1.2254012079978632e-05, "loss": 0.635, "step": 3482 }, { "epoch": 0.44, "grad_norm": 0.5389550836255468, "learning_rate": 1.224998087570648e-05, "loss": 0.5848, "step": 3483 }, { "epoch": 0.45, "grad_norm": 0.5592447988381001, "learning_rate": 1.2245949286264933e-05, "loss": 0.6561, "step": 3484 }, { "epoch": 0.45, "grad_norm": 0.505960482405505, "learning_rate": 1.2241917312344155e-05, "loss": 0.5666, "step": 3485 }, { "epoch": 0.45, "grad_norm": 0.4907926151746369, "learning_rate": 1.2237884954634371e-05, "loss": 0.5773, "step": 3486 }, { "epoch": 0.45, "grad_norm": 0.5167981553445511, "learning_rate": 1.2233852213825872e-05, "loss": 0.6011, "step": 3487 }, { "epoch": 0.45, "grad_norm": 0.5658268307968592, "learning_rate": 1.2229819090609016e-05, "loss": 0.6311, "step": 3488 }, { "epoch": 0.45, "grad_norm": 0.5188263036029045, "learning_rate": 1.2225785585674217e-05, "loss": 0.601, "step": 3489 }, { "epoch": 0.45, "grad_norm": 0.4696019013707691, "learning_rate": 1.222175169971197e-05, "loss": 0.6044, "step": 3490 }, { "epoch": 0.45, "grad_norm": 0.46214238707755945, "learning_rate": 1.2217717433412825e-05, "loss": 0.5987, "step": 3491 }, { "epoch": 0.45, "grad_norm": 0.5322115430719135, "learning_rate": 1.22136827874674e-05, "loss": 0.6391, "step": 3492 }, { "epoch": 0.45, "grad_norm": 0.5225486648476392, "learning_rate": 1.2209647762566374e-05, "loss": 0.6421, "step": 3493 }, { "epoch": 0.45, "grad_norm": 0.4965015456503836, "learning_rate": 1.2205612359400498e-05, "loss": 0.613, "step": 3494 }, { "epoch": 0.45, "grad_norm": 0.5259429192202002, "learning_rate": 1.2201576578660581e-05, "loss": 0.5917, "step": 3495 }, { "epoch": 0.45, "grad_norm": 0.5576109793462851, "learning_rate": 1.2197540421037505e-05, "loss": 0.6355, "step": 3496 }, { "epoch": 0.45, "grad_norm": 0.5592608350594674, "learning_rate": 1.2193503887222205e-05, "loss": 0.6334, "step": 3497 }, { "epoch": 0.45, "grad_norm": 0.6490932450655472, "learning_rate": 1.2189466977905688e-05, "loss": 0.6547, "step": 3498 }, { "epoch": 0.45, "grad_norm": 0.5256452960677687, "learning_rate": 1.2185429693779025e-05, "loss": 0.6167, "step": 3499 }, { "epoch": 0.45, "grad_norm": 0.5568395092718993, "learning_rate": 1.218139203553335e-05, "loss": 0.5913, "step": 3500 }, { "epoch": 0.45, "grad_norm": 0.6243535820530719, "learning_rate": 1.2177354003859862e-05, "loss": 0.6254, "step": 3501 }, { "epoch": 0.45, "grad_norm": 0.5377598957823502, "learning_rate": 1.2173315599449821e-05, "loss": 0.6275, "step": 3502 }, { "epoch": 0.45, "grad_norm": 0.5336137389306327, "learning_rate": 1.2169276822994554e-05, "loss": 0.6352, "step": 3503 }, { "epoch": 0.45, "grad_norm": 0.48681253643283257, "learning_rate": 1.2165237675185449e-05, "loss": 0.6007, "step": 3504 }, { "epoch": 0.45, "grad_norm": 0.4830128455352398, "learning_rate": 1.2161198156713961e-05, "loss": 0.6259, "step": 3505 }, { "epoch": 0.45, "grad_norm": 0.5272969448969185, "learning_rate": 1.2157158268271605e-05, "loss": 0.6268, "step": 3506 }, { "epoch": 0.45, "grad_norm": 0.5367697979235004, "learning_rate": 1.2153118010549958e-05, "loss": 0.6417, "step": 3507 }, { "epoch": 0.45, "grad_norm": 0.4931009306156763, "learning_rate": 1.2149077384240673e-05, "loss": 0.607, "step": 3508 }, { "epoch": 0.45, "grad_norm": 0.5041502200529838, "learning_rate": 1.2145036390035445e-05, "loss": 0.5951, "step": 3509 }, { "epoch": 0.45, "grad_norm": 0.4898088893559143, "learning_rate": 1.214099502862605e-05, "loss": 0.6063, "step": 3510 }, { "epoch": 0.45, "grad_norm": 0.5181734644740194, "learning_rate": 1.2136953300704315e-05, "loss": 0.5816, "step": 3511 }, { "epoch": 0.45, "grad_norm": 0.5126226886285257, "learning_rate": 1.2132911206962137e-05, "loss": 0.6071, "step": 3512 }, { "epoch": 0.45, "grad_norm": 0.6029564279143236, "learning_rate": 1.2128868748091478e-05, "loss": 0.6009, "step": 3513 }, { "epoch": 0.45, "grad_norm": 0.4929060432265885, "learning_rate": 1.2124825924784349e-05, "loss": 0.5894, "step": 3514 }, { "epoch": 0.45, "grad_norm": 0.5201328732976411, "learning_rate": 1.2120782737732839e-05, "loss": 0.6112, "step": 3515 }, { "epoch": 0.45, "grad_norm": 0.5094724051653776, "learning_rate": 1.211673918762909e-05, "loss": 0.6292, "step": 3516 }, { "epoch": 0.45, "grad_norm": 0.5306494264955463, "learning_rate": 1.2112695275165309e-05, "loss": 0.5969, "step": 3517 }, { "epoch": 0.45, "grad_norm": 0.5203547045598327, "learning_rate": 1.2108651001033762e-05, "loss": 0.6489, "step": 3518 }, { "epoch": 0.45, "grad_norm": 0.5967318677521445, "learning_rate": 1.2104606365926786e-05, "loss": 0.6527, "step": 3519 }, { "epoch": 0.45, "grad_norm": 0.593855559508886, "learning_rate": 1.2100561370536767e-05, "loss": 0.6238, "step": 3520 }, { "epoch": 0.45, "grad_norm": 0.522012906647677, "learning_rate": 1.209651601555616e-05, "loss": 0.6303, "step": 3521 }, { "epoch": 0.45, "grad_norm": 0.5184422402306186, "learning_rate": 1.2092470301677489e-05, "loss": 0.6649, "step": 3522 }, { "epoch": 0.45, "grad_norm": 0.5942301084868659, "learning_rate": 1.2088424229593318e-05, "loss": 0.6673, "step": 3523 }, { "epoch": 0.45, "grad_norm": 0.504442669624105, "learning_rate": 1.2084377799996294e-05, "loss": 0.5896, "step": 3524 }, { "epoch": 0.45, "grad_norm": 0.5148095683063844, "learning_rate": 1.2080331013579114e-05, "loss": 0.6063, "step": 3525 }, { "epoch": 0.45, "grad_norm": 0.484050510727943, "learning_rate": 1.2076283871034537e-05, "loss": 0.6195, "step": 3526 }, { "epoch": 0.45, "grad_norm": 0.524455815343834, "learning_rate": 1.207223637305539e-05, "loss": 0.6039, "step": 3527 }, { "epoch": 0.45, "grad_norm": 0.5074985089818306, "learning_rate": 1.206818852033455e-05, "loss": 0.6122, "step": 3528 }, { "epoch": 0.45, "grad_norm": 0.5577275858734699, "learning_rate": 1.2064140313564963e-05, "loss": 0.6145, "step": 3529 }, { "epoch": 0.45, "grad_norm": 0.5296754373038365, "learning_rate": 1.206009175343963e-05, "loss": 0.6511, "step": 3530 }, { "epoch": 0.45, "grad_norm": 0.47209694329107416, "learning_rate": 1.205604284065162e-05, "loss": 0.5975, "step": 3531 }, { "epoch": 0.45, "grad_norm": 0.5235761712237115, "learning_rate": 1.2051993575894052e-05, "loss": 0.6193, "step": 3532 }, { "epoch": 0.45, "grad_norm": 0.5350523727031288, "learning_rate": 1.2047943959860117e-05, "loss": 0.6269, "step": 3533 }, { "epoch": 0.45, "grad_norm": 0.5224868870795617, "learning_rate": 1.204389399324306e-05, "loss": 0.613, "step": 3534 }, { "epoch": 0.45, "grad_norm": 0.540279940605052, "learning_rate": 1.2039843676736177e-05, "loss": 0.6647, "step": 3535 }, { "epoch": 0.45, "grad_norm": 0.5664285433296915, "learning_rate": 1.2035793011032841e-05, "loss": 0.617, "step": 3536 }, { "epoch": 0.45, "grad_norm": 0.5282717368791672, "learning_rate": 1.2031741996826474e-05, "loss": 0.6137, "step": 3537 }, { "epoch": 0.45, "grad_norm": 0.5674901574102494, "learning_rate": 1.2027690634810565e-05, "loss": 0.5964, "step": 3538 }, { "epoch": 0.45, "grad_norm": 0.5235580370906078, "learning_rate": 1.202363892567865e-05, "loss": 0.6101, "step": 3539 }, { "epoch": 0.45, "grad_norm": 0.5189074421835881, "learning_rate": 1.2019586870124339e-05, "loss": 0.6282, "step": 3540 }, { "epoch": 0.45, "grad_norm": 0.48724185867982933, "learning_rate": 1.2015534468841293e-05, "loss": 0.5958, "step": 3541 }, { "epoch": 0.45, "grad_norm": 0.5370266129718796, "learning_rate": 1.201148172252323e-05, "loss": 0.6006, "step": 3542 }, { "epoch": 0.45, "grad_norm": 0.6573879679117133, "learning_rate": 1.2007428631863935e-05, "loss": 0.6618, "step": 3543 }, { "epoch": 0.45, "grad_norm": 0.517207940620155, "learning_rate": 1.200337519755725e-05, "loss": 0.6249, "step": 3544 }, { "epoch": 0.45, "grad_norm": 0.5134882097019282, "learning_rate": 1.1999321420297067e-05, "loss": 0.6458, "step": 3545 }, { "epoch": 0.45, "grad_norm": 0.5331358761129732, "learning_rate": 1.1995267300777349e-05, "loss": 0.6418, "step": 3546 }, { "epoch": 0.45, "grad_norm": 0.5570007601549993, "learning_rate": 1.199121283969211e-05, "loss": 0.6152, "step": 3547 }, { "epoch": 0.45, "grad_norm": 0.49070054873455826, "learning_rate": 1.1987158037735424e-05, "loss": 0.6057, "step": 3548 }, { "epoch": 0.45, "grad_norm": 0.5026981267227704, "learning_rate": 1.1983102895601426e-05, "loss": 0.6082, "step": 3549 }, { "epoch": 0.45, "grad_norm": 0.5498806174218308, "learning_rate": 1.1979047413984306e-05, "loss": 0.6152, "step": 3550 }, { "epoch": 0.45, "grad_norm": 0.5046618028582235, "learning_rate": 1.197499159357831e-05, "loss": 0.609, "step": 3551 }, { "epoch": 0.45, "grad_norm": 0.5557306308510112, "learning_rate": 1.1970935435077748e-05, "loss": 0.646, "step": 3552 }, { "epoch": 0.45, "grad_norm": 0.5064730289531925, "learning_rate": 1.1966878939176985e-05, "loss": 0.6404, "step": 3553 }, { "epoch": 0.45, "grad_norm": 0.5180025124871461, "learning_rate": 1.1962822106570445e-05, "loss": 0.5935, "step": 3554 }, { "epoch": 0.45, "grad_norm": 0.578862832698133, "learning_rate": 1.1958764937952608e-05, "loss": 0.6242, "step": 3555 }, { "epoch": 0.45, "grad_norm": 0.5775877981836878, "learning_rate": 1.1954707434018008e-05, "loss": 0.6142, "step": 3556 }, { "epoch": 0.45, "grad_norm": 0.580075122697191, "learning_rate": 1.1950649595461246e-05, "loss": 0.6687, "step": 3557 }, { "epoch": 0.45, "grad_norm": 0.5377967345206794, "learning_rate": 1.194659142297697e-05, "loss": 0.6092, "step": 3558 }, { "epoch": 0.45, "grad_norm": 0.5485258212035955, "learning_rate": 1.1942532917259897e-05, "loss": 0.6264, "step": 3559 }, { "epoch": 0.45, "grad_norm": 0.5832709545870468, "learning_rate": 1.1938474079004786e-05, "loss": 0.6549, "step": 3560 }, { "epoch": 0.45, "grad_norm": 0.4966665967382828, "learning_rate": 1.193441490890646e-05, "loss": 0.5879, "step": 3561 }, { "epoch": 0.46, "grad_norm": 0.5114160040798357, "learning_rate": 1.1930355407659805e-05, "loss": 0.5878, "step": 3562 }, { "epoch": 0.46, "grad_norm": 0.5460610924162698, "learning_rate": 1.1926295575959759e-05, "loss": 0.6161, "step": 3563 }, { "epoch": 0.46, "grad_norm": 0.583623330613743, "learning_rate": 1.1922235414501314e-05, "loss": 0.6442, "step": 3564 }, { "epoch": 0.46, "grad_norm": 0.49559554035503295, "learning_rate": 1.1918174923979517e-05, "loss": 0.5968, "step": 3565 }, { "epoch": 0.46, "grad_norm": 0.5320649971552117, "learning_rate": 1.1914114105089479e-05, "loss": 0.6374, "step": 3566 }, { "epoch": 0.46, "grad_norm": 0.4818190148097673, "learning_rate": 1.191005295852636e-05, "loss": 0.604, "step": 3567 }, { "epoch": 0.46, "grad_norm": 0.5549369320069095, "learning_rate": 1.1905991484985381e-05, "loss": 0.6199, "step": 3568 }, { "epoch": 0.46, "grad_norm": 0.5621512920313074, "learning_rate": 1.1901929685161817e-05, "loss": 0.6401, "step": 3569 }, { "epoch": 0.46, "grad_norm": 0.5434875830208507, "learning_rate": 1.1897867559750994e-05, "loss": 0.6178, "step": 3570 }, { "epoch": 0.46, "grad_norm": 0.4600271487210411, "learning_rate": 1.1893805109448308e-05, "loss": 0.586, "step": 3571 }, { "epoch": 0.46, "grad_norm": 0.4652078748893885, "learning_rate": 1.1889742334949193e-05, "loss": 0.5907, "step": 3572 }, { "epoch": 0.46, "grad_norm": 0.47668502459232215, "learning_rate": 1.1885679236949149e-05, "loss": 0.6117, "step": 3573 }, { "epoch": 0.46, "grad_norm": 0.5476913217949658, "learning_rate": 1.188161581614373e-05, "loss": 0.6483, "step": 3574 }, { "epoch": 0.46, "grad_norm": 0.45588574924036307, "learning_rate": 1.187755207322854e-05, "loss": 0.5522, "step": 3575 }, { "epoch": 0.46, "grad_norm": 0.5307048754027934, "learning_rate": 1.1873488008899251e-05, "loss": 0.6173, "step": 3576 }, { "epoch": 0.46, "grad_norm": 0.4768746971032733, "learning_rate": 1.1869423623851575e-05, "loss": 0.6013, "step": 3577 }, { "epoch": 0.46, "grad_norm": 0.5670871411430217, "learning_rate": 1.1865358918781285e-05, "loss": 0.6047, "step": 3578 }, { "epoch": 0.46, "grad_norm": 0.4622952304022858, "learning_rate": 1.1861293894384209e-05, "loss": 0.5586, "step": 3579 }, { "epoch": 0.46, "grad_norm": 0.5721921448690849, "learning_rate": 1.1857228551356236e-05, "loss": 0.6336, "step": 3580 }, { "epoch": 0.46, "grad_norm": 0.5558708814625244, "learning_rate": 1.1853162890393299e-05, "loss": 0.6063, "step": 3581 }, { "epoch": 0.46, "grad_norm": 0.5270814009429599, "learning_rate": 1.1849096912191388e-05, "loss": 0.6168, "step": 3582 }, { "epoch": 0.46, "grad_norm": 0.4748863121281357, "learning_rate": 1.1845030617446549e-05, "loss": 0.5714, "step": 3583 }, { "epoch": 0.46, "grad_norm": 0.5210450343932197, "learning_rate": 1.1840964006854887e-05, "loss": 0.6067, "step": 3584 }, { "epoch": 0.46, "grad_norm": 0.5163089444601877, "learning_rate": 1.1836897081112551e-05, "loss": 0.6303, "step": 3585 }, { "epoch": 0.46, "grad_norm": 0.565400533186939, "learning_rate": 1.1832829840915752e-05, "loss": 0.5939, "step": 3586 }, { "epoch": 0.46, "grad_norm": 0.6020875728614724, "learning_rate": 1.1828762286960753e-05, "loss": 0.5768, "step": 3587 }, { "epoch": 0.46, "grad_norm": 0.5683550608049895, "learning_rate": 1.1824694419943867e-05, "loss": 0.6375, "step": 3588 }, { "epoch": 0.46, "grad_norm": 0.5609837652795915, "learning_rate": 1.1820626240561468e-05, "loss": 0.6409, "step": 3589 }, { "epoch": 0.46, "grad_norm": 0.573794246750874, "learning_rate": 1.1816557749509974e-05, "loss": 0.6002, "step": 3590 }, { "epoch": 0.46, "grad_norm": 0.5099101839088623, "learning_rate": 1.1812488947485862e-05, "loss": 0.5952, "step": 3591 }, { "epoch": 0.46, "grad_norm": 0.6055071264731301, "learning_rate": 1.1808419835185662e-05, "loss": 0.6357, "step": 3592 }, { "epoch": 0.46, "grad_norm": 0.5099788600202164, "learning_rate": 1.180435041330596e-05, "loss": 0.6077, "step": 3593 }, { "epoch": 0.46, "grad_norm": 0.5112884083698165, "learning_rate": 1.1800280682543387e-05, "loss": 0.6222, "step": 3594 }, { "epoch": 0.46, "grad_norm": 0.605460183415489, "learning_rate": 1.1796210643594634e-05, "loss": 0.6398, "step": 3595 }, { "epoch": 0.46, "grad_norm": 0.5201885245274298, "learning_rate": 1.1792140297156442e-05, "loss": 0.5756, "step": 3596 }, { "epoch": 0.46, "grad_norm": 0.642465600858013, "learning_rate": 1.1788069643925604e-05, "loss": 0.6939, "step": 3597 }, { "epoch": 0.46, "grad_norm": 0.6552892743904825, "learning_rate": 1.1783998684598966e-05, "loss": 0.656, "step": 3598 }, { "epoch": 0.46, "grad_norm": 0.5842000724217219, "learning_rate": 1.1779927419873427e-05, "loss": 0.6437, "step": 3599 }, { "epoch": 0.46, "grad_norm": 0.7716266426402161, "learning_rate": 1.1775855850445937e-05, "loss": 0.6121, "step": 3600 }, { "epoch": 0.46, "grad_norm": 0.502838463450707, "learning_rate": 1.1771783977013504e-05, "loss": 0.6037, "step": 3601 }, { "epoch": 0.46, "grad_norm": 0.5638351825732598, "learning_rate": 1.1767711800273179e-05, "loss": 0.6306, "step": 3602 }, { "epoch": 0.46, "grad_norm": 0.4534717091971364, "learning_rate": 1.176363932092207e-05, "loss": 0.5518, "step": 3603 }, { "epoch": 0.46, "grad_norm": 0.5251618659830217, "learning_rate": 1.1759566539657334e-05, "loss": 0.6118, "step": 3604 }, { "epoch": 0.46, "grad_norm": 0.5587497983139761, "learning_rate": 1.1755493457176188e-05, "loss": 0.6187, "step": 3605 }, { "epoch": 0.46, "grad_norm": 0.4872556238091139, "learning_rate": 1.1751420074175889e-05, "loss": 0.6019, "step": 3606 }, { "epoch": 0.46, "grad_norm": 0.4980066586919965, "learning_rate": 1.1747346391353746e-05, "loss": 0.6214, "step": 3607 }, { "epoch": 0.46, "grad_norm": 0.4516410798469455, "learning_rate": 1.1743272409407136e-05, "loss": 0.5891, "step": 3608 }, { "epoch": 0.46, "grad_norm": 0.510156439624811, "learning_rate": 1.1739198129033466e-05, "loss": 0.6011, "step": 3609 }, { "epoch": 0.46, "grad_norm": 0.46238830052750224, "learning_rate": 1.173512355093021e-05, "loss": 0.6118, "step": 3610 }, { "epoch": 0.46, "grad_norm": 0.5332564415788672, "learning_rate": 1.1731048675794879e-05, "loss": 0.6325, "step": 3611 }, { "epoch": 0.46, "grad_norm": 0.541646174475972, "learning_rate": 1.1726973504325046e-05, "loss": 0.6441, "step": 3612 }, { "epoch": 0.46, "grad_norm": 0.5114803279221178, "learning_rate": 1.1722898037218332e-05, "loss": 0.6004, "step": 3613 }, { "epoch": 0.46, "grad_norm": 0.5242659993141319, "learning_rate": 1.1718822275172406e-05, "loss": 0.6028, "step": 3614 }, { "epoch": 0.46, "grad_norm": 0.4764252026766098, "learning_rate": 1.1714746218884986e-05, "loss": 0.5884, "step": 3615 }, { "epoch": 0.46, "grad_norm": 0.5872666073132266, "learning_rate": 1.171066986905385e-05, "loss": 0.5958, "step": 3616 }, { "epoch": 0.46, "grad_norm": 0.5523358423072563, "learning_rate": 1.1706593226376812e-05, "loss": 0.6651, "step": 3617 }, { "epoch": 0.46, "grad_norm": 0.49741943174055253, "learning_rate": 1.1702516291551751e-05, "loss": 0.6198, "step": 3618 }, { "epoch": 0.46, "grad_norm": 0.4929644874049676, "learning_rate": 1.1698439065276584e-05, "loss": 0.6139, "step": 3619 }, { "epoch": 0.46, "grad_norm": 0.5217865734387154, "learning_rate": 1.1694361548249283e-05, "loss": 0.5973, "step": 3620 }, { "epoch": 0.46, "grad_norm": 0.5176903773015961, "learning_rate": 1.169028374116787e-05, "loss": 0.6396, "step": 3621 }, { "epoch": 0.46, "grad_norm": 0.5667778157678474, "learning_rate": 1.1686205644730417e-05, "loss": 0.6043, "step": 3622 }, { "epoch": 0.46, "grad_norm": 0.5559230736809151, "learning_rate": 1.1682127259635043e-05, "loss": 0.6231, "step": 3623 }, { "epoch": 0.46, "grad_norm": 0.4693984804581771, "learning_rate": 1.167804858657992e-05, "loss": 0.6061, "step": 3624 }, { "epoch": 0.46, "grad_norm": 0.6091110935996972, "learning_rate": 1.1673969626263265e-05, "loss": 0.6427, "step": 3625 }, { "epoch": 0.46, "grad_norm": 0.48147537770054105, "learning_rate": 1.1669890379383349e-05, "loss": 0.611, "step": 3626 }, { "epoch": 0.46, "grad_norm": 0.5422887954047466, "learning_rate": 1.1665810846638486e-05, "loss": 0.6276, "step": 3627 }, { "epoch": 0.46, "grad_norm": 0.5172376657875247, "learning_rate": 1.1661731028727043e-05, "loss": 0.6036, "step": 3628 }, { "epoch": 0.46, "grad_norm": 0.5103773442543212, "learning_rate": 1.1657650926347439e-05, "loss": 0.6047, "step": 3629 }, { "epoch": 0.46, "grad_norm": 0.5663104066535865, "learning_rate": 1.1653570540198136e-05, "loss": 0.6256, "step": 3630 }, { "epoch": 0.46, "grad_norm": 0.47830011286747975, "learning_rate": 1.1649489870977648e-05, "loss": 0.5829, "step": 3631 }, { "epoch": 0.46, "grad_norm": 0.7518502567710689, "learning_rate": 1.1645408919384529e-05, "loss": 0.5878, "step": 3632 }, { "epoch": 0.46, "grad_norm": 0.5160701809130398, "learning_rate": 1.1641327686117396e-05, "loss": 0.6048, "step": 3633 }, { "epoch": 0.46, "grad_norm": 0.46613011434706203, "learning_rate": 1.1637246171874905e-05, "loss": 0.5975, "step": 3634 }, { "epoch": 0.46, "grad_norm": 0.5156148269557523, "learning_rate": 1.1633164377355759e-05, "loss": 0.6407, "step": 3635 }, { "epoch": 0.46, "grad_norm": 0.5061873349560816, "learning_rate": 1.1629082303258714e-05, "loss": 0.6071, "step": 3636 }, { "epoch": 0.46, "grad_norm": 0.4176339450453117, "learning_rate": 1.1624999950282572e-05, "loss": 0.5859, "step": 3637 }, { "epoch": 0.46, "grad_norm": 0.4288868856166597, "learning_rate": 1.1620917319126179e-05, "loss": 0.5646, "step": 3638 }, { "epoch": 0.46, "grad_norm": 0.5140168860307869, "learning_rate": 1.1616834410488436e-05, "loss": 0.6326, "step": 3639 }, { "epoch": 0.46, "grad_norm": 0.49237127830598715, "learning_rate": 1.1612751225068283e-05, "loss": 0.5998, "step": 3640 }, { "epoch": 0.47, "grad_norm": 0.5554121466178366, "learning_rate": 1.1608667763564716e-05, "loss": 0.6433, "step": 3641 }, { "epoch": 0.47, "grad_norm": 0.4537311496157762, "learning_rate": 1.1604584026676771e-05, "loss": 0.6014, "step": 3642 }, { "epoch": 0.47, "grad_norm": 0.46573182200126595, "learning_rate": 1.1600500015103538e-05, "loss": 0.6218, "step": 3643 }, { "epoch": 0.47, "grad_norm": 0.6101838581095744, "learning_rate": 1.1596415729544144e-05, "loss": 0.6088, "step": 3644 }, { "epoch": 0.47, "grad_norm": 0.5114399054851296, "learning_rate": 1.1592331170697773e-05, "loss": 0.6088, "step": 3645 }, { "epoch": 0.47, "grad_norm": 0.5200680022659824, "learning_rate": 1.1588246339263649e-05, "loss": 0.6294, "step": 3646 }, { "epoch": 0.47, "grad_norm": 0.5144882535124146, "learning_rate": 1.1584161235941051e-05, "loss": 0.5932, "step": 3647 }, { "epoch": 0.47, "grad_norm": 0.4698688170225877, "learning_rate": 1.1580075861429295e-05, "loss": 0.5861, "step": 3648 }, { "epoch": 0.47, "grad_norm": 0.5486326502117057, "learning_rate": 1.1575990216427748e-05, "loss": 0.6292, "step": 3649 }, { "epoch": 0.47, "grad_norm": 0.5486898870949898, "learning_rate": 1.1571904301635821e-05, "loss": 0.6541, "step": 3650 }, { "epoch": 0.47, "grad_norm": 0.4665746782006352, "learning_rate": 1.1567818117752976e-05, "loss": 0.6187, "step": 3651 }, { "epoch": 0.47, "grad_norm": 0.5081590124554783, "learning_rate": 1.1563731665478722e-05, "loss": 0.5972, "step": 3652 }, { "epoch": 0.47, "grad_norm": 0.48617555664165013, "learning_rate": 1.1559644945512598e-05, "loss": 0.6202, "step": 3653 }, { "epoch": 0.47, "grad_norm": 0.5275549516518689, "learning_rate": 1.155555795855421e-05, "loss": 0.6006, "step": 3654 }, { "epoch": 0.47, "grad_norm": 0.4773974201469514, "learning_rate": 1.15514707053032e-05, "loss": 0.6132, "step": 3655 }, { "epoch": 0.47, "grad_norm": 0.5502358350464405, "learning_rate": 1.1547383186459256e-05, "loss": 0.6361, "step": 3656 }, { "epoch": 0.47, "grad_norm": 0.494882067362054, "learning_rate": 1.1543295402722106e-05, "loss": 0.5775, "step": 3657 }, { "epoch": 0.47, "grad_norm": 0.5673171791185262, "learning_rate": 1.1539207354791534e-05, "loss": 0.6359, "step": 3658 }, { "epoch": 0.47, "grad_norm": 0.4627565920681831, "learning_rate": 1.1535119043367365e-05, "loss": 0.587, "step": 3659 }, { "epoch": 0.47, "grad_norm": 0.5685497618606077, "learning_rate": 1.1531030469149465e-05, "loss": 0.6307, "step": 3660 }, { "epoch": 0.47, "grad_norm": 0.5533929347119568, "learning_rate": 1.152694163283775e-05, "loss": 0.6356, "step": 3661 }, { "epoch": 0.47, "grad_norm": 0.48479243978700076, "learning_rate": 1.1522852535132177e-05, "loss": 0.6261, "step": 3662 }, { "epoch": 0.47, "grad_norm": 0.4346009738110372, "learning_rate": 1.1518763176732753e-05, "loss": 0.5896, "step": 3663 }, { "epoch": 0.47, "grad_norm": 0.6265397989810094, "learning_rate": 1.1514673558339524e-05, "loss": 0.6183, "step": 3664 }, { "epoch": 0.47, "grad_norm": 0.5284391995213396, "learning_rate": 1.1510583680652584e-05, "loss": 0.611, "step": 3665 }, { "epoch": 0.47, "grad_norm": 0.5211853502015926, "learning_rate": 1.1506493544372073e-05, "loss": 0.5783, "step": 3666 }, { "epoch": 0.47, "grad_norm": 0.5312299237827861, "learning_rate": 1.1502403150198164e-05, "loss": 0.5975, "step": 3667 }, { "epoch": 0.47, "grad_norm": 0.5771714490328365, "learning_rate": 1.1498312498831097e-05, "loss": 0.626, "step": 3668 }, { "epoch": 0.47, "grad_norm": 0.48434689471844805, "learning_rate": 1.1494221590971128e-05, "loss": 0.5978, "step": 3669 }, { "epoch": 0.47, "grad_norm": 0.4737490923977384, "learning_rate": 1.1490130427318578e-05, "loss": 0.5933, "step": 3670 }, { "epoch": 0.47, "grad_norm": 0.47016271181520036, "learning_rate": 1.14860390085738e-05, "loss": 0.5975, "step": 3671 }, { "epoch": 0.47, "grad_norm": 0.4902333445120168, "learning_rate": 1.1481947335437203e-05, "loss": 0.5864, "step": 3672 }, { "epoch": 0.47, "grad_norm": 0.5471892405772595, "learning_rate": 1.1477855408609225e-05, "loss": 0.6527, "step": 3673 }, { "epoch": 0.47, "grad_norm": 0.4882268810681782, "learning_rate": 1.147376322879035e-05, "loss": 0.6106, "step": 3674 }, { "epoch": 0.47, "grad_norm": 0.5164601141972894, "learning_rate": 1.1469670796681122e-05, "loss": 0.6304, "step": 3675 }, { "epoch": 0.47, "grad_norm": 0.46587246726891113, "learning_rate": 1.1465578112982107e-05, "loss": 0.5923, "step": 3676 }, { "epoch": 0.47, "grad_norm": 0.5108070125842804, "learning_rate": 1.1461485178393927e-05, "loss": 0.6369, "step": 3677 }, { "epoch": 0.47, "grad_norm": 0.5641045003247683, "learning_rate": 1.1457391993617237e-05, "loss": 0.6274, "step": 3678 }, { "epoch": 0.47, "grad_norm": 0.4959240144837011, "learning_rate": 1.1453298559352746e-05, "loss": 0.5982, "step": 3679 }, { "epoch": 0.47, "grad_norm": 0.51255153257841, "learning_rate": 1.1449204876301198e-05, "loss": 0.599, "step": 3680 }, { "epoch": 0.47, "grad_norm": 0.6070929877749462, "learning_rate": 1.1445110945163382e-05, "loss": 0.6232, "step": 3681 }, { "epoch": 0.47, "grad_norm": 0.5432053073236904, "learning_rate": 1.1441016766640127e-05, "loss": 0.5943, "step": 3682 }, { "epoch": 0.47, "grad_norm": 0.49278838905866373, "learning_rate": 1.143692234143231e-05, "loss": 0.5959, "step": 3683 }, { "epoch": 0.47, "grad_norm": 0.5283985113312056, "learning_rate": 1.1432827670240848e-05, "loss": 0.6511, "step": 3684 }, { "epoch": 0.47, "grad_norm": 0.4845789914425463, "learning_rate": 1.1428732753766694e-05, "loss": 0.5896, "step": 3685 }, { "epoch": 0.47, "grad_norm": 0.5461437075911302, "learning_rate": 1.1424637592710853e-05, "loss": 0.6173, "step": 3686 }, { "epoch": 0.47, "grad_norm": 0.476484274469607, "learning_rate": 1.1420542187774362e-05, "loss": 0.5938, "step": 3687 }, { "epoch": 0.47, "grad_norm": 0.5508532346364814, "learning_rate": 1.1416446539658308e-05, "loss": 0.6336, "step": 3688 }, { "epoch": 0.47, "grad_norm": 0.5782502584991717, "learning_rate": 1.1412350649063815e-05, "loss": 0.6038, "step": 3689 }, { "epoch": 0.47, "grad_norm": 0.4666097263487832, "learning_rate": 1.140825451669205e-05, "loss": 0.5967, "step": 3690 }, { "epoch": 0.47, "grad_norm": 0.5031246295747775, "learning_rate": 1.140415814324422e-05, "loss": 0.596, "step": 3691 }, { "epoch": 0.47, "grad_norm": 0.5128596585500969, "learning_rate": 1.1400061529421573e-05, "loss": 0.6079, "step": 3692 }, { "epoch": 0.47, "grad_norm": 0.46957508274055926, "learning_rate": 1.1395964675925408e-05, "loss": 0.5676, "step": 3693 }, { "epoch": 0.47, "grad_norm": 0.49131880886351104, "learning_rate": 1.1391867583457048e-05, "loss": 0.6185, "step": 3694 }, { "epoch": 0.47, "grad_norm": 0.4964026610500851, "learning_rate": 1.1387770252717866e-05, "loss": 0.5984, "step": 3695 }, { "epoch": 0.47, "grad_norm": 0.5217773914025063, "learning_rate": 1.1383672684409277e-05, "loss": 0.5826, "step": 3696 }, { "epoch": 0.47, "grad_norm": 0.5387225673362478, "learning_rate": 1.1379574879232736e-05, "loss": 0.6497, "step": 3697 }, { "epoch": 0.47, "grad_norm": 0.49847564164312547, "learning_rate": 1.137547683788974e-05, "loss": 0.6097, "step": 3698 }, { "epoch": 0.47, "grad_norm": 0.516301244973483, "learning_rate": 1.1371378561081815e-05, "loss": 0.5901, "step": 3699 }, { "epoch": 0.47, "grad_norm": 0.5679435090109956, "learning_rate": 1.1367280049510544e-05, "loss": 0.6033, "step": 3700 }, { "epoch": 0.47, "grad_norm": 0.5450879431402795, "learning_rate": 1.136318130387754e-05, "loss": 0.6396, "step": 3701 }, { "epoch": 0.47, "grad_norm": 0.5255733050006837, "learning_rate": 1.1359082324884459e-05, "loss": 0.5809, "step": 3702 }, { "epoch": 0.47, "grad_norm": 0.5003751482442003, "learning_rate": 1.1354983113232993e-05, "loss": 0.5991, "step": 3703 }, { "epoch": 0.47, "grad_norm": 0.5052572653940535, "learning_rate": 1.1350883669624882e-05, "loss": 0.6203, "step": 3704 }, { "epoch": 0.47, "grad_norm": 0.4536458623750883, "learning_rate": 1.1346783994761898e-05, "loss": 0.6029, "step": 3705 }, { "epoch": 0.47, "grad_norm": 0.5787059986263994, "learning_rate": 1.1342684089345859e-05, "loss": 0.6014, "step": 3706 }, { "epoch": 0.47, "grad_norm": 0.47404420489545446, "learning_rate": 1.1338583954078615e-05, "loss": 0.5933, "step": 3707 }, { "epoch": 0.47, "grad_norm": 0.5094177263406365, "learning_rate": 1.133448358966206e-05, "loss": 0.6281, "step": 3708 }, { "epoch": 0.47, "grad_norm": 0.5601453795720883, "learning_rate": 1.1330382996798128e-05, "loss": 0.6309, "step": 3709 }, { "epoch": 0.47, "grad_norm": 0.4969812382575101, "learning_rate": 1.132628217618879e-05, "loss": 0.6177, "step": 3710 }, { "epoch": 0.47, "grad_norm": 0.5866370942971628, "learning_rate": 1.1322181128536058e-05, "loss": 0.6314, "step": 3711 }, { "epoch": 0.47, "grad_norm": 0.5040320526349644, "learning_rate": 1.131807985454198e-05, "loss": 0.6261, "step": 3712 }, { "epoch": 0.47, "grad_norm": 0.5495867525047363, "learning_rate": 1.1313978354908643e-05, "loss": 0.6036, "step": 3713 }, { "epoch": 0.47, "grad_norm": 0.4604148589136291, "learning_rate": 1.1309876630338176e-05, "loss": 0.587, "step": 3714 }, { "epoch": 0.47, "grad_norm": 0.5300177496377494, "learning_rate": 1.1305774681532747e-05, "loss": 0.6289, "step": 3715 }, { "epoch": 0.47, "grad_norm": 0.5435317377850302, "learning_rate": 1.1301672509194551e-05, "loss": 0.6359, "step": 3716 }, { "epoch": 0.47, "grad_norm": 0.4488881667196246, "learning_rate": 1.1297570114025837e-05, "loss": 0.5713, "step": 3717 }, { "epoch": 0.47, "grad_norm": 0.49948548613915983, "learning_rate": 1.1293467496728886e-05, "loss": 0.5951, "step": 3718 }, { "epoch": 0.48, "grad_norm": 0.4993138888826644, "learning_rate": 1.1289364658006018e-05, "loss": 0.6074, "step": 3719 }, { "epoch": 0.48, "grad_norm": 0.46048371237627433, "learning_rate": 1.128526159855958e-05, "loss": 0.6126, "step": 3720 }, { "epoch": 0.48, "grad_norm": 0.5163662493659993, "learning_rate": 1.1281158319091971e-05, "loss": 0.5852, "step": 3721 }, { "epoch": 0.48, "grad_norm": 0.4738802232926938, "learning_rate": 1.1277054820305624e-05, "loss": 0.5887, "step": 3722 }, { "epoch": 0.48, "grad_norm": 0.5301059194962422, "learning_rate": 1.1272951102903012e-05, "loss": 0.671, "step": 3723 }, { "epoch": 0.48, "grad_norm": 0.50180559891686, "learning_rate": 1.1268847167586629e-05, "loss": 0.6369, "step": 3724 }, { "epoch": 0.48, "grad_norm": 0.5198564146044278, "learning_rate": 1.126474301505903e-05, "loss": 0.6119, "step": 3725 }, { "epoch": 0.48, "grad_norm": 0.5303903831506291, "learning_rate": 1.1260638646022792e-05, "loss": 0.6406, "step": 3726 }, { "epoch": 0.48, "grad_norm": 0.5679229312443795, "learning_rate": 1.1256534061180534e-05, "loss": 0.6485, "step": 3727 }, { "epoch": 0.48, "grad_norm": 0.5921847681633697, "learning_rate": 1.1252429261234909e-05, "loss": 0.6118, "step": 3728 }, { "epoch": 0.48, "grad_norm": 0.47595313246615845, "learning_rate": 1.1248324246888613e-05, "loss": 0.5937, "step": 3729 }, { "epoch": 0.48, "grad_norm": 0.5807120195945082, "learning_rate": 1.124421901884437e-05, "loss": 0.6384, "step": 3730 }, { "epoch": 0.48, "grad_norm": 0.5106828412079574, "learning_rate": 1.124011357780495e-05, "loss": 0.5927, "step": 3731 }, { "epoch": 0.48, "grad_norm": 0.5244260805816565, "learning_rate": 1.123600792447315e-05, "loss": 0.587, "step": 3732 }, { "epoch": 0.48, "grad_norm": 0.5483186122600171, "learning_rate": 1.1231902059551809e-05, "loss": 0.6253, "step": 3733 }, { "epoch": 0.48, "grad_norm": 0.5001487590806127, "learning_rate": 1.1227795983743803e-05, "loss": 0.6321, "step": 3734 }, { "epoch": 0.48, "grad_norm": 0.4910971929064404, "learning_rate": 1.1223689697752042e-05, "loss": 0.575, "step": 3735 }, { "epoch": 0.48, "grad_norm": 0.5002886123785767, "learning_rate": 1.1219583202279471e-05, "loss": 0.5902, "step": 3736 }, { "epoch": 0.48, "grad_norm": 0.5428971326842185, "learning_rate": 1.1215476498029075e-05, "loss": 0.6265, "step": 3737 }, { "epoch": 0.48, "grad_norm": 0.5753282913471645, "learning_rate": 1.1211369585703867e-05, "loss": 0.6715, "step": 3738 }, { "epoch": 0.48, "grad_norm": 0.5691251767841086, "learning_rate": 1.1207262466006904e-05, "loss": 0.6215, "step": 3739 }, { "epoch": 0.48, "grad_norm": 0.5234566806639551, "learning_rate": 1.1203155139641276e-05, "loss": 0.6386, "step": 3740 }, { "epoch": 0.48, "grad_norm": 0.5237311228269155, "learning_rate": 1.1199047607310103e-05, "loss": 0.6022, "step": 3741 }, { "epoch": 0.48, "grad_norm": 0.5228378602214806, "learning_rate": 1.1194939869716546e-05, "loss": 0.6155, "step": 3742 }, { "epoch": 0.48, "grad_norm": 0.5512296713025169, "learning_rate": 1.1190831927563801e-05, "loss": 0.6067, "step": 3743 }, { "epoch": 0.48, "grad_norm": 0.5630894675611141, "learning_rate": 1.1186723781555102e-05, "loss": 0.6295, "step": 3744 }, { "epoch": 0.48, "grad_norm": 0.5569022027384902, "learning_rate": 1.1182615432393707e-05, "loss": 0.615, "step": 3745 }, { "epoch": 0.48, "grad_norm": 0.4539819804510772, "learning_rate": 1.1178506880782915e-05, "loss": 0.588, "step": 3746 }, { "epoch": 0.48, "grad_norm": 0.4036896379942333, "learning_rate": 1.1174398127426067e-05, "loss": 0.5733, "step": 3747 }, { "epoch": 0.48, "grad_norm": 0.5815222889993942, "learning_rate": 1.1170289173026527e-05, "loss": 0.6204, "step": 3748 }, { "epoch": 0.48, "grad_norm": 0.5313853947455256, "learning_rate": 1.1166180018287696e-05, "loss": 0.6013, "step": 3749 }, { "epoch": 0.48, "grad_norm": 0.5066159785404787, "learning_rate": 1.1162070663913015e-05, "loss": 0.6155, "step": 3750 }, { "epoch": 0.48, "grad_norm": 0.577570348059271, "learning_rate": 1.1157961110605958e-05, "loss": 0.6293, "step": 3751 }, { "epoch": 0.48, "grad_norm": 0.4495016470386275, "learning_rate": 1.1153851359070025e-05, "loss": 0.6044, "step": 3752 }, { "epoch": 0.48, "grad_norm": 0.539643732581661, "learning_rate": 1.1149741410008756e-05, "loss": 0.6013, "step": 3753 }, { "epoch": 0.48, "grad_norm": 0.5018875817480504, "learning_rate": 1.1145631264125729e-05, "loss": 0.6267, "step": 3754 }, { "epoch": 0.48, "grad_norm": 0.4939315459804027, "learning_rate": 1.1141520922124547e-05, "loss": 0.5949, "step": 3755 }, { "epoch": 0.48, "grad_norm": 0.5061037249219462, "learning_rate": 1.1137410384708851e-05, "loss": 0.6057, "step": 3756 }, { "epoch": 0.48, "grad_norm": 0.5228041389895405, "learning_rate": 1.1133299652582317e-05, "loss": 0.6223, "step": 3757 }, { "epoch": 0.48, "grad_norm": 0.5215069945991834, "learning_rate": 1.1129188726448651e-05, "loss": 0.6203, "step": 3758 }, { "epoch": 0.48, "grad_norm": 0.6802687644502423, "learning_rate": 1.1125077607011594e-05, "loss": 0.6661, "step": 3759 }, { "epoch": 0.48, "grad_norm": 0.5227424868693219, "learning_rate": 1.1120966294974917e-05, "loss": 0.6006, "step": 3760 }, { "epoch": 0.48, "grad_norm": 0.5036381053341933, "learning_rate": 1.1116854791042432e-05, "loss": 0.6163, "step": 3761 }, { "epoch": 0.48, "grad_norm": 0.49247824246848204, "learning_rate": 1.1112743095917978e-05, "loss": 0.6009, "step": 3762 }, { "epoch": 0.48, "grad_norm": 0.49256302963419635, "learning_rate": 1.1108631210305418e-05, "loss": 0.5878, "step": 3763 }, { "epoch": 0.48, "grad_norm": 0.5129205449623729, "learning_rate": 1.110451913490867e-05, "loss": 0.6267, "step": 3764 }, { "epoch": 0.48, "grad_norm": 0.4880110037586828, "learning_rate": 1.1100406870431664e-05, "loss": 0.6098, "step": 3765 }, { "epoch": 0.48, "grad_norm": 0.44985979278497884, "learning_rate": 1.1096294417578369e-05, "loss": 0.5759, "step": 3766 }, { "epoch": 0.48, "grad_norm": 0.5513124082715986, "learning_rate": 1.1092181777052788e-05, "loss": 0.6512, "step": 3767 }, { "epoch": 0.48, "grad_norm": 0.5629640382396431, "learning_rate": 1.1088068949558956e-05, "loss": 0.6484, "step": 3768 }, { "epoch": 0.48, "grad_norm": 0.49004655103920564, "learning_rate": 1.1083955935800946e-05, "loss": 0.5739, "step": 3769 }, { "epoch": 0.48, "grad_norm": 0.44540825826694647, "learning_rate": 1.107984273648284e-05, "loss": 0.5784, "step": 3770 }, { "epoch": 0.48, "grad_norm": 0.5557279452161284, "learning_rate": 1.1075729352308781e-05, "loss": 0.6222, "step": 3771 }, { "epoch": 0.48, "grad_norm": 0.5954680293177377, "learning_rate": 1.1071615783982925e-05, "loss": 0.6358, "step": 3772 }, { "epoch": 0.48, "grad_norm": 0.4790484473768679, "learning_rate": 1.1067502032209467e-05, "loss": 0.6035, "step": 3773 }, { "epoch": 0.48, "grad_norm": 0.5224117125475172, "learning_rate": 1.1063388097692632e-05, "loss": 0.6362, "step": 3774 }, { "epoch": 0.48, "grad_norm": 0.63003897361169, "learning_rate": 1.1059273981136671e-05, "loss": 0.641, "step": 3775 }, { "epoch": 0.48, "grad_norm": 0.48098073901123517, "learning_rate": 1.1055159683245876e-05, "loss": 0.5775, "step": 3776 }, { "epoch": 0.48, "grad_norm": 0.5033089219750887, "learning_rate": 1.1051045204724563e-05, "loss": 0.5735, "step": 3777 }, { "epoch": 0.48, "grad_norm": 0.6623835934805098, "learning_rate": 1.1046930546277077e-05, "loss": 0.6958, "step": 3778 }, { "epoch": 0.48, "grad_norm": 0.47510288426487657, "learning_rate": 1.1042815708607804e-05, "loss": 0.6203, "step": 3779 }, { "epoch": 0.48, "grad_norm": 0.5568745772731665, "learning_rate": 1.103870069242115e-05, "loss": 0.6513, "step": 3780 }, { "epoch": 0.48, "grad_norm": 0.5804940529098337, "learning_rate": 1.1034585498421557e-05, "loss": 0.6214, "step": 3781 }, { "epoch": 0.48, "grad_norm": 0.5164043639246442, "learning_rate": 1.1030470127313496e-05, "loss": 0.5803, "step": 3782 }, { "epoch": 0.48, "grad_norm": 0.49609506303398215, "learning_rate": 1.1026354579801468e-05, "loss": 0.5961, "step": 3783 }, { "epoch": 0.48, "grad_norm": 0.5988818229095753, "learning_rate": 1.1022238856590006e-05, "loss": 0.6213, "step": 3784 }, { "epoch": 0.48, "grad_norm": 0.4658155327670523, "learning_rate": 1.101812295838367e-05, "loss": 0.6244, "step": 3785 }, { "epoch": 0.48, "grad_norm": 0.537129473046371, "learning_rate": 1.1014006885887056e-05, "loss": 0.6155, "step": 3786 }, { "epoch": 0.48, "grad_norm": 0.603318436109203, "learning_rate": 1.1009890639804782e-05, "loss": 0.6377, "step": 3787 }, { "epoch": 0.48, "grad_norm": 0.5652151894147138, "learning_rate": 1.1005774220841496e-05, "loss": 0.582, "step": 3788 }, { "epoch": 0.48, "grad_norm": 0.5224630552215884, "learning_rate": 1.1001657629701885e-05, "loss": 0.6125, "step": 3789 }, { "epoch": 0.48, "grad_norm": 0.508647245194714, "learning_rate": 1.099754086709066e-05, "loss": 0.6013, "step": 3790 }, { "epoch": 0.48, "grad_norm": 0.4889532430872199, "learning_rate": 1.0993423933712556e-05, "loss": 0.5813, "step": 3791 }, { "epoch": 0.48, "grad_norm": 0.5503340575642491, "learning_rate": 1.0989306830272342e-05, "loss": 0.61, "step": 3792 }, { "epoch": 0.48, "grad_norm": 0.532408470842085, "learning_rate": 1.098518955747482e-05, "loss": 0.6314, "step": 3793 }, { "epoch": 0.48, "grad_norm": 0.5881680923993953, "learning_rate": 1.0981072116024814e-05, "loss": 0.6875, "step": 3794 }, { "epoch": 0.48, "grad_norm": 0.5789231087239224, "learning_rate": 1.0976954506627181e-05, "loss": 0.6127, "step": 3795 }, { "epoch": 0.48, "grad_norm": 0.48415899670757095, "learning_rate": 1.0972836729986807e-05, "loss": 0.6034, "step": 3796 }, { "epoch": 0.49, "grad_norm": 0.5040089585252077, "learning_rate": 1.0968718786808603e-05, "loss": 0.5867, "step": 3797 }, { "epoch": 0.49, "grad_norm": 0.5954944758302431, "learning_rate": 1.0964600677797513e-05, "loss": 0.6261, "step": 3798 }, { "epoch": 0.49, "grad_norm": 0.4457438140227092, "learning_rate": 1.0960482403658505e-05, "loss": 0.6034, "step": 3799 }, { "epoch": 0.49, "grad_norm": 0.4884666472002569, "learning_rate": 1.095636396509658e-05, "loss": 0.5757, "step": 3800 }, { "epoch": 0.49, "grad_norm": 0.5675211986376327, "learning_rate": 1.0952245362816762e-05, "loss": 0.6105, "step": 3801 }, { "epoch": 0.49, "grad_norm": 0.5159994630309338, "learning_rate": 1.0948126597524108e-05, "loss": 0.5893, "step": 3802 }, { "epoch": 0.49, "grad_norm": 0.5519097904993772, "learning_rate": 1.0944007669923699e-05, "loss": 0.624, "step": 3803 }, { "epoch": 0.49, "grad_norm": 0.5040058818115783, "learning_rate": 1.0939888580720646e-05, "loss": 0.5918, "step": 3804 }, { "epoch": 0.49, "grad_norm": 0.5433773305289366, "learning_rate": 1.0935769330620085e-05, "loss": 0.613, "step": 3805 }, { "epoch": 0.49, "grad_norm": 0.4685876058254292, "learning_rate": 1.0931649920327185e-05, "loss": 0.5827, "step": 3806 }, { "epoch": 0.49, "grad_norm": 0.5505442490425262, "learning_rate": 1.092753035054714e-05, "loss": 0.6425, "step": 3807 }, { "epoch": 0.49, "grad_norm": 0.526509817231073, "learning_rate": 1.0923410621985165e-05, "loss": 0.5892, "step": 3808 }, { "epoch": 0.49, "grad_norm": 0.5366847300712737, "learning_rate": 1.091929073534651e-05, "loss": 0.6118, "step": 3809 }, { "epoch": 0.49, "grad_norm": 0.5038233070348496, "learning_rate": 1.0915170691336448e-05, "loss": 0.6147, "step": 3810 }, { "epoch": 0.49, "grad_norm": 0.4835153859860178, "learning_rate": 1.0911050490660289e-05, "loss": 0.6055, "step": 3811 }, { "epoch": 0.49, "grad_norm": 0.5076393647568075, "learning_rate": 1.0906930134023348e-05, "loss": 0.6172, "step": 3812 }, { "epoch": 0.49, "grad_norm": 0.5159016739402711, "learning_rate": 1.0902809622130988e-05, "loss": 0.5808, "step": 3813 }, { "epoch": 0.49, "grad_norm": 0.5364328519693093, "learning_rate": 1.089868895568859e-05, "loss": 0.5904, "step": 3814 }, { "epoch": 0.49, "grad_norm": 0.493565694970529, "learning_rate": 1.0894568135401562e-05, "loss": 0.5812, "step": 3815 }, { "epoch": 0.49, "grad_norm": 0.45641099435112414, "learning_rate": 1.089044716197534e-05, "loss": 0.5912, "step": 3816 }, { "epoch": 0.49, "grad_norm": 0.5765106619684902, "learning_rate": 1.0886326036115376e-05, "loss": 0.6591, "step": 3817 }, { "epoch": 0.49, "grad_norm": 0.4978514488158043, "learning_rate": 1.0882204758527164e-05, "loss": 0.582, "step": 3818 }, { "epoch": 0.49, "grad_norm": 0.5645214425968709, "learning_rate": 1.0878083329916218e-05, "loss": 0.6358, "step": 3819 }, { "epoch": 0.49, "grad_norm": 0.49252009734368674, "learning_rate": 1.0873961750988074e-05, "loss": 0.5936, "step": 3820 }, { "epoch": 0.49, "grad_norm": 0.5402695511653278, "learning_rate": 1.0869840022448294e-05, "loss": 0.6024, "step": 3821 }, { "epoch": 0.49, "grad_norm": 0.5508338850775071, "learning_rate": 1.0865718145002473e-05, "loss": 0.6234, "step": 3822 }, { "epoch": 0.49, "grad_norm": 0.5414766846806944, "learning_rate": 1.0861596119356221e-05, "loss": 0.6234, "step": 3823 }, { "epoch": 0.49, "grad_norm": 0.5332220372177374, "learning_rate": 1.0857473946215183e-05, "loss": 0.6489, "step": 3824 }, { "epoch": 0.49, "grad_norm": 0.5335295297834803, "learning_rate": 1.085335162628502e-05, "loss": 0.6109, "step": 3825 }, { "epoch": 0.49, "grad_norm": 0.5667813321370468, "learning_rate": 1.0849229160271431e-05, "loss": 0.6391, "step": 3826 }, { "epoch": 0.49, "grad_norm": 0.5519936454679139, "learning_rate": 1.0845106548880122e-05, "loss": 0.6255, "step": 3827 }, { "epoch": 0.49, "grad_norm": 0.49922902703335525, "learning_rate": 1.0840983792816844e-05, "loss": 0.6095, "step": 3828 }, { "epoch": 0.49, "grad_norm": 0.5380928397025614, "learning_rate": 1.0836860892787355e-05, "loss": 0.6292, "step": 3829 }, { "epoch": 0.49, "grad_norm": 0.5532941639277893, "learning_rate": 1.083273784949745e-05, "loss": 0.6545, "step": 3830 }, { "epoch": 0.49, "grad_norm": 0.5168170923569795, "learning_rate": 1.0828614663652938e-05, "loss": 0.6109, "step": 3831 }, { "epoch": 0.49, "grad_norm": 0.45312302976093616, "learning_rate": 1.0824491335959669e-05, "loss": 0.5943, "step": 3832 }, { "epoch": 0.49, "grad_norm": 0.4696205817171171, "learning_rate": 1.0820367867123495e-05, "loss": 0.5759, "step": 3833 }, { "epoch": 0.49, "grad_norm": 0.5274850827960939, "learning_rate": 1.0816244257850312e-05, "loss": 0.6408, "step": 3834 }, { "epoch": 0.49, "grad_norm": 0.5425218317851503, "learning_rate": 1.0812120508846026e-05, "loss": 0.6276, "step": 3835 }, { "epoch": 0.49, "grad_norm": 0.5260941605539227, "learning_rate": 1.080799662081658e-05, "loss": 0.5962, "step": 3836 }, { "epoch": 0.49, "grad_norm": 0.4404940913379684, "learning_rate": 1.0803872594467928e-05, "loss": 0.5379, "step": 3837 }, { "epoch": 0.49, "grad_norm": 0.5065153962026935, "learning_rate": 1.079974843050605e-05, "loss": 0.598, "step": 3838 }, { "epoch": 0.49, "grad_norm": 0.4892722172797314, "learning_rate": 1.0795624129636959e-05, "loss": 0.6046, "step": 3839 }, { "epoch": 0.49, "grad_norm": 0.4491209123337429, "learning_rate": 1.0791499692566685e-05, "loss": 0.5794, "step": 3840 }, { "epoch": 0.49, "grad_norm": 0.5518401765238297, "learning_rate": 1.0787375120001281e-05, "loss": 0.6394, "step": 3841 }, { "epoch": 0.49, "grad_norm": 0.5037304078231569, "learning_rate": 1.0783250412646819e-05, "loss": 0.6283, "step": 3842 }, { "epoch": 0.49, "grad_norm": 0.5290302895691078, "learning_rate": 1.0779125571209404e-05, "loss": 0.5885, "step": 3843 }, { "epoch": 0.49, "grad_norm": 0.648793439677211, "learning_rate": 1.0775000596395157e-05, "loss": 0.5988, "step": 3844 }, { "epoch": 0.49, "grad_norm": 0.5258995832147756, "learning_rate": 1.0770875488910221e-05, "loss": 0.5885, "step": 3845 }, { "epoch": 0.49, "grad_norm": 0.5598054985312432, "learning_rate": 1.0766750249460771e-05, "loss": 0.6069, "step": 3846 }, { "epoch": 0.49, "grad_norm": 0.5095228467828495, "learning_rate": 1.0762624878752993e-05, "loss": 0.5718, "step": 3847 }, { "epoch": 0.49, "grad_norm": 0.5207390235882952, "learning_rate": 1.0758499377493099e-05, "loss": 0.6185, "step": 3848 }, { "epoch": 0.49, "grad_norm": 0.5503617716257454, "learning_rate": 1.0754373746387328e-05, "loss": 0.6152, "step": 3849 }, { "epoch": 0.49, "grad_norm": 0.629427620882634, "learning_rate": 1.0750247986141935e-05, "loss": 0.6463, "step": 3850 }, { "epoch": 0.49, "grad_norm": 0.48418206473287684, "learning_rate": 1.0746122097463205e-05, "loss": 0.6036, "step": 3851 }, { "epoch": 0.49, "grad_norm": 0.5173977097781249, "learning_rate": 1.0741996081057433e-05, "loss": 0.6036, "step": 3852 }, { "epoch": 0.49, "grad_norm": 0.5636309020993839, "learning_rate": 1.0737869937630949e-05, "loss": 0.6002, "step": 3853 }, { "epoch": 0.49, "grad_norm": 0.5145705738187651, "learning_rate": 1.0733743667890098e-05, "loss": 0.5846, "step": 3854 }, { "epoch": 0.49, "grad_norm": 0.5301860939255566, "learning_rate": 1.0729617272541241e-05, "loss": 0.6122, "step": 3855 }, { "epoch": 0.49, "grad_norm": 0.563101733521406, "learning_rate": 1.0725490752290772e-05, "loss": 0.6149, "step": 3856 }, { "epoch": 0.49, "grad_norm": 0.516488889295913, "learning_rate": 1.0721364107845104e-05, "loss": 0.6116, "step": 3857 }, { "epoch": 0.49, "grad_norm": 0.506453736668589, "learning_rate": 1.0717237339910662e-05, "loss": 0.6152, "step": 3858 }, { "epoch": 0.49, "grad_norm": 0.4418165496935043, "learning_rate": 1.07131104491939e-05, "loss": 0.5916, "step": 3859 }, { "epoch": 0.49, "grad_norm": 0.6648799049697625, "learning_rate": 1.0708983436401295e-05, "loss": 0.6523, "step": 3860 }, { "epoch": 0.49, "grad_norm": 0.574077828748746, "learning_rate": 1.070485630223934e-05, "loss": 0.635, "step": 3861 }, { "epoch": 0.49, "grad_norm": 0.5594974369103024, "learning_rate": 1.0700729047414551e-05, "loss": 0.647, "step": 3862 }, { "epoch": 0.49, "grad_norm": 0.48888810389412046, "learning_rate": 1.0696601672633457e-05, "loss": 0.6129, "step": 3863 }, { "epoch": 0.49, "grad_norm": 0.5366888806890536, "learning_rate": 1.0692474178602623e-05, "loss": 0.6364, "step": 3864 }, { "epoch": 0.49, "grad_norm": 0.5480053831409147, "learning_rate": 1.0688346566028622e-05, "loss": 0.6225, "step": 3865 }, { "epoch": 0.49, "grad_norm": 0.535487985340772, "learning_rate": 1.0684218835618054e-05, "loss": 0.5956, "step": 3866 }, { "epoch": 0.49, "grad_norm": 0.48673032170797836, "learning_rate": 1.0680090988077532e-05, "loss": 0.5558, "step": 3867 }, { "epoch": 0.49, "grad_norm": 0.5049038449659993, "learning_rate": 1.0675963024113696e-05, "loss": 0.6278, "step": 3868 }, { "epoch": 0.49, "grad_norm": 0.5139674264640811, "learning_rate": 1.0671834944433201e-05, "loss": 0.6186, "step": 3869 }, { "epoch": 0.49, "grad_norm": 0.5321604165610271, "learning_rate": 1.0667706749742727e-05, "loss": 0.5912, "step": 3870 }, { "epoch": 0.49, "grad_norm": 0.5364191772468857, "learning_rate": 1.0663578440748972e-05, "loss": 0.6192, "step": 3871 }, { "epoch": 0.49, "grad_norm": 0.5187779909795647, "learning_rate": 1.0659450018158648e-05, "loss": 0.6099, "step": 3872 }, { "epoch": 0.49, "grad_norm": 0.4339030593853997, "learning_rate": 1.0655321482678491e-05, "loss": 0.5805, "step": 3873 }, { "epoch": 0.49, "grad_norm": 0.48278708583083857, "learning_rate": 1.0651192835015261e-05, "loss": 0.626, "step": 3874 }, { "epoch": 0.5, "grad_norm": 0.48504371784277084, "learning_rate": 1.0647064075875726e-05, "loss": 0.5899, "step": 3875 }, { "epoch": 0.5, "grad_norm": 0.5233696158240742, "learning_rate": 1.0642935205966685e-05, "loss": 0.6273, "step": 3876 }, { "epoch": 0.5, "grad_norm": 0.5084750794457764, "learning_rate": 1.0638806225994945e-05, "loss": 0.6148, "step": 3877 }, { "epoch": 0.5, "grad_norm": 0.47262959433912577, "learning_rate": 1.0634677136667343e-05, "loss": 0.5998, "step": 3878 }, { "epoch": 0.5, "grad_norm": 0.5572583500198292, "learning_rate": 1.0630547938690728e-05, "loss": 0.6276, "step": 3879 }, { "epoch": 0.5, "grad_norm": 0.48843025450500077, "learning_rate": 1.0626418632771962e-05, "loss": 0.6063, "step": 3880 }, { "epoch": 0.5, "grad_norm": 0.48098402200091317, "learning_rate": 1.0622289219617937e-05, "loss": 0.6014, "step": 3881 }, { "epoch": 0.5, "grad_norm": 0.5154723872469658, "learning_rate": 1.061815969993556e-05, "loss": 0.6389, "step": 3882 }, { "epoch": 0.5, "grad_norm": 0.46151624432485994, "learning_rate": 1.0614030074431754e-05, "loss": 0.5932, "step": 3883 }, { "epoch": 0.5, "grad_norm": 0.5053068740218969, "learning_rate": 1.0609900343813453e-05, "loss": 0.6322, "step": 3884 }, { "epoch": 0.5, "grad_norm": 0.4769609378455114, "learning_rate": 1.0605770508787627e-05, "loss": 0.6084, "step": 3885 }, { "epoch": 0.5, "grad_norm": 0.5697908658813186, "learning_rate": 1.060164057006125e-05, "loss": 0.6022, "step": 3886 }, { "epoch": 0.5, "grad_norm": 0.4630142447001233, "learning_rate": 1.059751052834132e-05, "loss": 0.5742, "step": 3887 }, { "epoch": 0.5, "grad_norm": 0.5798033888663638, "learning_rate": 1.0593380384334842e-05, "loss": 0.5967, "step": 3888 }, { "epoch": 0.5, "grad_norm": 0.5902900992887047, "learning_rate": 1.0589250138748852e-05, "loss": 0.6275, "step": 3889 }, { "epoch": 0.5, "grad_norm": 0.5937378601795473, "learning_rate": 1.0585119792290397e-05, "loss": 0.6078, "step": 3890 }, { "epoch": 0.5, "grad_norm": 0.47171341982520815, "learning_rate": 1.0580989345666544e-05, "loss": 0.579, "step": 3891 }, { "epoch": 0.5, "grad_norm": 0.5378725874603877, "learning_rate": 1.0576858799584373e-05, "loss": 0.6045, "step": 3892 }, { "epoch": 0.5, "grad_norm": 0.5368536385604671, "learning_rate": 1.0572728154750984e-05, "loss": 0.6322, "step": 3893 }, { "epoch": 0.5, "grad_norm": 0.5232583457826074, "learning_rate": 1.0568597411873492e-05, "loss": 0.6088, "step": 3894 }, { "epoch": 0.5, "grad_norm": 0.550991937818215, "learning_rate": 1.0564466571659032e-05, "loss": 0.6026, "step": 3895 }, { "epoch": 0.5, "grad_norm": 0.5462104477957237, "learning_rate": 1.0560335634814753e-05, "loss": 0.623, "step": 3896 }, { "epoch": 0.5, "grad_norm": 0.5117569637734976, "learning_rate": 1.0556204602047822e-05, "loss": 0.6156, "step": 3897 }, { "epoch": 0.5, "grad_norm": 0.49509242476164733, "learning_rate": 1.055207347406542e-05, "loss": 0.5914, "step": 3898 }, { "epoch": 0.5, "grad_norm": 0.5255245640080105, "learning_rate": 1.0547942251574743e-05, "loss": 0.6028, "step": 3899 }, { "epoch": 0.5, "grad_norm": 0.5404938077422691, "learning_rate": 1.0543810935283014e-05, "loss": 0.6536, "step": 3900 }, { "epoch": 0.5, "grad_norm": 0.5076882998740208, "learning_rate": 1.0539679525897457e-05, "loss": 0.6131, "step": 3901 }, { "epoch": 0.5, "grad_norm": 0.5287882630495336, "learning_rate": 1.0535548024125322e-05, "loss": 0.6027, "step": 3902 }, { "epoch": 0.5, "grad_norm": 0.4955358127182332, "learning_rate": 1.0531416430673872e-05, "loss": 0.5692, "step": 3903 }, { "epoch": 0.5, "grad_norm": 0.5359004692060383, "learning_rate": 1.052728474625039e-05, "loss": 0.664, "step": 3904 }, { "epoch": 0.5, "grad_norm": 0.6299780677706267, "learning_rate": 1.052315297156216e-05, "loss": 0.6693, "step": 3905 }, { "epoch": 0.5, "grad_norm": 0.4720156080980273, "learning_rate": 1.0519021107316499e-05, "loss": 0.606, "step": 3906 }, { "epoch": 0.5, "grad_norm": 0.510892751452944, "learning_rate": 1.0514889154220728e-05, "loss": 0.6274, "step": 3907 }, { "epoch": 0.5, "grad_norm": 0.4844009285284467, "learning_rate": 1.0510757112982195e-05, "loss": 0.6013, "step": 3908 }, { "epoch": 0.5, "grad_norm": 0.49957340113061816, "learning_rate": 1.0506624984308242e-05, "loss": 0.5824, "step": 3909 }, { "epoch": 0.5, "grad_norm": 0.5094517958830941, "learning_rate": 1.0502492768906251e-05, "loss": 0.5974, "step": 3910 }, { "epoch": 0.5, "grad_norm": 0.8066789046321203, "learning_rate": 1.0498360467483604e-05, "loss": 0.6807, "step": 3911 }, { "epoch": 0.5, "grad_norm": 0.6286842193222674, "learning_rate": 1.0494228080747698e-05, "loss": 0.6444, "step": 3912 }, { "epoch": 0.5, "grad_norm": 0.5152633805573783, "learning_rate": 1.0490095609405951e-05, "loss": 0.6007, "step": 3913 }, { "epoch": 0.5, "grad_norm": 0.47547258640954165, "learning_rate": 1.0485963054165786e-05, "loss": 0.5892, "step": 3914 }, { "epoch": 0.5, "grad_norm": 0.49173661899634347, "learning_rate": 1.0481830415734655e-05, "loss": 0.6013, "step": 3915 }, { "epoch": 0.5, "grad_norm": 0.5397576525590371, "learning_rate": 1.0477697694820008e-05, "loss": 0.6377, "step": 3916 }, { "epoch": 0.5, "grad_norm": 0.5378972443647984, "learning_rate": 1.047356489212932e-05, "loss": 0.6076, "step": 3917 }, { "epoch": 0.5, "grad_norm": 0.532080908637131, "learning_rate": 1.0469432008370076e-05, "loss": 0.6248, "step": 3918 }, { "epoch": 0.5, "grad_norm": 0.5176346804213914, "learning_rate": 1.0465299044249776e-05, "loss": 0.6134, "step": 3919 }, { "epoch": 0.5, "grad_norm": 0.5429343385668529, "learning_rate": 1.0461166000475929e-05, "loss": 0.623, "step": 3920 }, { "epoch": 0.5, "grad_norm": 0.5334226696097836, "learning_rate": 1.0457032877756068e-05, "loss": 0.6089, "step": 3921 }, { "epoch": 0.5, "grad_norm": 0.5770045395242989, "learning_rate": 1.0452899676797733e-05, "loss": 0.686, "step": 3922 }, { "epoch": 0.5, "grad_norm": 0.5665205921233333, "learning_rate": 1.044876639830847e-05, "loss": 0.6037, "step": 3923 }, { "epoch": 0.5, "grad_norm": 0.5211440219915913, "learning_rate": 1.0444633042995855e-05, "loss": 0.62, "step": 3924 }, { "epoch": 0.5, "grad_norm": 0.5128972769710067, "learning_rate": 1.044049961156747e-05, "loss": 0.5888, "step": 3925 }, { "epoch": 0.5, "grad_norm": 0.48762703394076873, "learning_rate": 1.0436366104730897e-05, "loss": 0.5898, "step": 3926 }, { "epoch": 0.5, "grad_norm": 0.5989777135563942, "learning_rate": 1.0432232523193748e-05, "loss": 0.615, "step": 3927 }, { "epoch": 0.5, "grad_norm": 0.46535657502637545, "learning_rate": 1.0428098867663645e-05, "loss": 0.6283, "step": 3928 }, { "epoch": 0.5, "grad_norm": 0.5697995990002666, "learning_rate": 1.042396513884822e-05, "loss": 0.6515, "step": 3929 }, { "epoch": 0.5, "grad_norm": 0.4951050626942574, "learning_rate": 1.041983133745511e-05, "loss": 0.6117, "step": 3930 }, { "epoch": 0.5, "grad_norm": 0.5982058453914462, "learning_rate": 1.0415697464191974e-05, "loss": 0.6537, "step": 3931 }, { "epoch": 0.5, "grad_norm": 0.4985757818070894, "learning_rate": 1.0411563519766485e-05, "loss": 0.5866, "step": 3932 }, { "epoch": 0.5, "grad_norm": 0.4796868435814525, "learning_rate": 1.0407429504886323e-05, "loss": 0.5876, "step": 3933 }, { "epoch": 0.5, "grad_norm": 0.5199705315582769, "learning_rate": 1.0403295420259177e-05, "loss": 0.6095, "step": 3934 }, { "epoch": 0.5, "grad_norm": 0.5042884119156683, "learning_rate": 1.0399161266592755e-05, "loss": 0.5957, "step": 3935 }, { "epoch": 0.5, "grad_norm": 0.5254043609046, "learning_rate": 1.0395027044594774e-05, "loss": 0.6112, "step": 3936 }, { "epoch": 0.5, "grad_norm": 0.5463651655637694, "learning_rate": 1.0390892754972964e-05, "loss": 0.6251, "step": 3937 }, { "epoch": 0.5, "grad_norm": 0.5546510031041657, "learning_rate": 1.0386758398435061e-05, "loss": 0.6147, "step": 3938 }, { "epoch": 0.5, "grad_norm": 0.5520099738862704, "learning_rate": 1.0382623975688821e-05, "loss": 0.6568, "step": 3939 }, { "epoch": 0.5, "grad_norm": 0.47379193307187983, "learning_rate": 1.0378489487442005e-05, "loss": 0.6008, "step": 3940 }, { "epoch": 0.5, "grad_norm": 0.504985427646881, "learning_rate": 1.0374354934402386e-05, "loss": 0.6003, "step": 3941 }, { "epoch": 0.5, "grad_norm": 0.5169449625032932, "learning_rate": 1.037022031727775e-05, "loss": 0.6179, "step": 3942 }, { "epoch": 0.5, "grad_norm": 0.5372382974111477, "learning_rate": 1.0366085636775896e-05, "loss": 0.6009, "step": 3943 }, { "epoch": 0.5, "grad_norm": 0.549536901175337, "learning_rate": 1.0361950893604626e-05, "loss": 0.6117, "step": 3944 }, { "epoch": 0.5, "grad_norm": 0.5819212121663919, "learning_rate": 1.035781608847176e-05, "loss": 0.6055, "step": 3945 }, { "epoch": 0.5, "grad_norm": 0.4997555631544356, "learning_rate": 1.035368122208513e-05, "loss": 0.5963, "step": 3946 }, { "epoch": 0.5, "grad_norm": 0.49330654694278153, "learning_rate": 1.034954629515257e-05, "loss": 0.5907, "step": 3947 }, { "epoch": 0.5, "grad_norm": 0.5962932837889228, "learning_rate": 1.0345411308381927e-05, "loss": 0.6561, "step": 3948 }, { "epoch": 0.5, "grad_norm": 0.509083499857228, "learning_rate": 1.034127626248107e-05, "loss": 0.5854, "step": 3949 }, { "epoch": 0.5, "grad_norm": 0.4981381762726286, "learning_rate": 1.0337141158157863e-05, "loss": 0.5965, "step": 3950 }, { "epoch": 0.5, "grad_norm": 0.5758694379720098, "learning_rate": 1.0333005996120185e-05, "loss": 0.6405, "step": 3951 }, { "epoch": 0.5, "grad_norm": 0.5703824503604126, "learning_rate": 1.0328870777075923e-05, "loss": 0.6412, "step": 3952 }, { "epoch": 0.5, "grad_norm": 0.5568723157996361, "learning_rate": 1.0324735501732984e-05, "loss": 0.6526, "step": 3953 }, { "epoch": 0.51, "grad_norm": 0.4946499308548798, "learning_rate": 1.0320600170799272e-05, "loss": 0.6035, "step": 3954 }, { "epoch": 0.51, "grad_norm": 0.6341784468015035, "learning_rate": 1.0316464784982703e-05, "loss": 0.6755, "step": 3955 }, { "epoch": 0.51, "grad_norm": 0.4818084398575927, "learning_rate": 1.0312329344991207e-05, "loss": 0.6101, "step": 3956 }, { "epoch": 0.51, "grad_norm": 0.466536048295194, "learning_rate": 1.0308193851532723e-05, "loss": 0.5855, "step": 3957 }, { "epoch": 0.51, "grad_norm": 0.48897371995815025, "learning_rate": 1.0304058305315196e-05, "loss": 0.6163, "step": 3958 }, { "epoch": 0.51, "grad_norm": 0.5453868956731304, "learning_rate": 1.0299922707046583e-05, "loss": 0.6027, "step": 3959 }, { "epoch": 0.51, "grad_norm": 0.4470059649898265, "learning_rate": 1.0295787057434844e-05, "loss": 0.5916, "step": 3960 }, { "epoch": 0.51, "grad_norm": 0.4821377589844453, "learning_rate": 1.0291651357187957e-05, "loss": 0.5928, "step": 3961 }, { "epoch": 0.51, "grad_norm": 0.5100538468928073, "learning_rate": 1.0287515607013899e-05, "loss": 0.5994, "step": 3962 }, { "epoch": 0.51, "grad_norm": 0.46057936652310694, "learning_rate": 1.0283379807620663e-05, "loss": 0.5865, "step": 3963 }, { "epoch": 0.51, "grad_norm": 0.4755463091352955, "learning_rate": 1.0279243959716245e-05, "loss": 0.606, "step": 3964 }, { "epoch": 0.51, "grad_norm": 0.4921912727152543, "learning_rate": 1.0275108064008656e-05, "loss": 0.5921, "step": 3965 }, { "epoch": 0.51, "grad_norm": 0.5388377272047804, "learning_rate": 1.0270972121205909e-05, "loss": 0.62, "step": 3966 }, { "epoch": 0.51, "grad_norm": 0.4961793846693212, "learning_rate": 1.0266836132016024e-05, "loss": 0.6148, "step": 3967 }, { "epoch": 0.51, "grad_norm": 0.5163370617724461, "learning_rate": 1.0262700097147038e-05, "loss": 0.6035, "step": 3968 }, { "epoch": 0.51, "grad_norm": 0.490768356448309, "learning_rate": 1.0258564017306987e-05, "loss": 0.5835, "step": 3969 }, { "epoch": 0.51, "grad_norm": 0.5069619927171816, "learning_rate": 1.0254427893203914e-05, "loss": 0.5855, "step": 3970 }, { "epoch": 0.51, "grad_norm": 0.5109383785265886, "learning_rate": 1.025029172554588e-05, "loss": 0.6226, "step": 3971 }, { "epoch": 0.51, "grad_norm": 0.5509047952228452, "learning_rate": 1.0246155515040944e-05, "loss": 0.6225, "step": 3972 }, { "epoch": 0.51, "grad_norm": 0.48246512785469986, "learning_rate": 1.0242019262397171e-05, "loss": 0.6022, "step": 3973 }, { "epoch": 0.51, "grad_norm": 0.5106111554352916, "learning_rate": 1.0237882968322644e-05, "loss": 0.6214, "step": 3974 }, { "epoch": 0.51, "grad_norm": 0.4694418309723568, "learning_rate": 1.0233746633525443e-05, "loss": 0.6141, "step": 3975 }, { "epoch": 0.51, "grad_norm": 0.4833337431388268, "learning_rate": 1.0229610258713656e-05, "loss": 0.5698, "step": 3976 }, { "epoch": 0.51, "grad_norm": 0.5527046578995198, "learning_rate": 1.0225473844595381e-05, "loss": 0.6145, "step": 3977 }, { "epoch": 0.51, "grad_norm": 0.5110747380657702, "learning_rate": 1.0221337391878725e-05, "loss": 0.6182, "step": 3978 }, { "epoch": 0.51, "grad_norm": 0.4949874696470057, "learning_rate": 1.0217200901271798e-05, "loss": 0.6011, "step": 3979 }, { "epoch": 0.51, "grad_norm": 0.5379473148953905, "learning_rate": 1.0213064373482716e-05, "loss": 0.5866, "step": 3980 }, { "epoch": 0.51, "grad_norm": 0.5122926174576488, "learning_rate": 1.0208927809219598e-05, "loss": 0.5844, "step": 3981 }, { "epoch": 0.51, "grad_norm": 0.5421043094688358, "learning_rate": 1.0204791209190581e-05, "loss": 0.6207, "step": 3982 }, { "epoch": 0.51, "grad_norm": 0.5660587851454555, "learning_rate": 1.0200654574103795e-05, "loss": 0.5853, "step": 3983 }, { "epoch": 0.51, "grad_norm": 0.4591985885195636, "learning_rate": 1.0196517904667384e-05, "loss": 0.5919, "step": 3984 }, { "epoch": 0.51, "grad_norm": 0.5572961314389145, "learning_rate": 1.0192381201589497e-05, "loss": 0.6597, "step": 3985 }, { "epoch": 0.51, "grad_norm": 0.47723391499219653, "learning_rate": 1.0188244465578284e-05, "loss": 0.6014, "step": 3986 }, { "epoch": 0.51, "grad_norm": 0.4984907821144709, "learning_rate": 1.0184107697341906e-05, "loss": 0.6035, "step": 3987 }, { "epoch": 0.51, "grad_norm": 0.5105892577307259, "learning_rate": 1.0179970897588528e-05, "loss": 0.5597, "step": 3988 }, { "epoch": 0.51, "grad_norm": 0.5984051451127581, "learning_rate": 1.0175834067026315e-05, "loss": 0.6484, "step": 3989 }, { "epoch": 0.51, "grad_norm": 0.5464691057437974, "learning_rate": 1.017169720636345e-05, "loss": 0.6535, "step": 3990 }, { "epoch": 0.51, "grad_norm": 0.49467968586465094, "learning_rate": 1.0167560316308104e-05, "loss": 0.6126, "step": 3991 }, { "epoch": 0.51, "grad_norm": 0.543426974661915, "learning_rate": 1.0163423397568474e-05, "loss": 0.6624, "step": 3992 }, { "epoch": 0.51, "grad_norm": 0.5070198005707719, "learning_rate": 1.015928645085274e-05, "loss": 0.6015, "step": 3993 }, { "epoch": 0.51, "grad_norm": 0.5802464430846263, "learning_rate": 1.0155149476869103e-05, "loss": 0.6224, "step": 3994 }, { "epoch": 0.51, "grad_norm": 0.504041296160587, "learning_rate": 1.0151012476325759e-05, "loss": 0.6032, "step": 3995 }, { "epoch": 0.51, "grad_norm": 0.5436119644396672, "learning_rate": 1.0146875449930916e-05, "loss": 0.6076, "step": 3996 }, { "epoch": 0.51, "grad_norm": 0.4290743918297381, "learning_rate": 1.014273839839278e-05, "loss": 0.5879, "step": 3997 }, { "epoch": 0.51, "grad_norm": 0.5051779269161719, "learning_rate": 1.0138601322419558e-05, "loss": 0.6316, "step": 3998 }, { "epoch": 0.51, "grad_norm": 0.6580640030002071, "learning_rate": 1.013446422271948e-05, "loss": 0.6259, "step": 3999 }, { "epoch": 0.51, "grad_norm": 0.5085057574280507, "learning_rate": 1.0130327100000758e-05, "loss": 0.5742, "step": 4000 }, { "epoch": 0.51, "grad_norm": 0.5021663417510049, "learning_rate": 1.0126189954971626e-05, "loss": 0.6132, "step": 4001 }, { "epoch": 0.51, "grad_norm": 0.5566649362613655, "learning_rate": 1.01220527883403e-05, "loss": 0.6003, "step": 4002 }, { "epoch": 0.51, "grad_norm": 0.5195548189368842, "learning_rate": 1.0117915600815021e-05, "loss": 0.6283, "step": 4003 }, { "epoch": 0.51, "grad_norm": 0.4600076103786346, "learning_rate": 1.0113778393104023e-05, "loss": 0.5798, "step": 4004 }, { "epoch": 0.51, "grad_norm": 0.4729167256927425, "learning_rate": 1.010964116591555e-05, "loss": 0.5721, "step": 4005 }, { "epoch": 0.51, "grad_norm": 0.4780205685392494, "learning_rate": 1.0105503919957837e-05, "loss": 0.582, "step": 4006 }, { "epoch": 0.51, "grad_norm": 0.49193429277565087, "learning_rate": 1.0101366655939138e-05, "loss": 0.5817, "step": 4007 }, { "epoch": 0.51, "grad_norm": 0.5603851759138216, "learning_rate": 1.0097229374567696e-05, "loss": 0.6238, "step": 4008 }, { "epoch": 0.51, "grad_norm": 0.48032071467012893, "learning_rate": 1.009309207655177e-05, "loss": 0.5936, "step": 4009 }, { "epoch": 0.51, "grad_norm": 0.5387160481069236, "learning_rate": 1.0088954762599609e-05, "loss": 0.6459, "step": 4010 }, { "epoch": 0.51, "grad_norm": 0.4706139843995788, "learning_rate": 1.0084817433419473e-05, "loss": 0.5911, "step": 4011 }, { "epoch": 0.51, "grad_norm": 0.5298196755178389, "learning_rate": 1.0080680089719624e-05, "loss": 0.6035, "step": 4012 }, { "epoch": 0.51, "grad_norm": 0.49932551109769324, "learning_rate": 1.0076542732208325e-05, "loss": 0.6064, "step": 4013 }, { "epoch": 0.51, "grad_norm": 0.5016039224617151, "learning_rate": 1.0072405361593838e-05, "loss": 0.6071, "step": 4014 }, { "epoch": 0.51, "grad_norm": 0.536556677661046, "learning_rate": 1.0068267978584434e-05, "loss": 0.6113, "step": 4015 }, { "epoch": 0.51, "grad_norm": 0.46506896156735594, "learning_rate": 1.0064130583888379e-05, "loss": 0.591, "step": 4016 }, { "epoch": 0.51, "grad_norm": 0.47955542452878747, "learning_rate": 1.0059993178213951e-05, "loss": 0.597, "step": 4017 }, { "epoch": 0.51, "grad_norm": 0.4364253746926834, "learning_rate": 1.0055855762269418e-05, "loss": 0.5765, "step": 4018 }, { "epoch": 0.51, "grad_norm": 0.4801546049839049, "learning_rate": 1.005171833676306e-05, "loss": 0.5974, "step": 4019 }, { "epoch": 0.51, "grad_norm": 0.5836192493676248, "learning_rate": 1.0047580902403144e-05, "loss": 0.5967, "step": 4020 }, { "epoch": 0.51, "grad_norm": 0.5026550454395864, "learning_rate": 1.0043443459897959e-05, "loss": 0.6004, "step": 4021 }, { "epoch": 0.51, "grad_norm": 0.47389610811865707, "learning_rate": 1.0039306009955785e-05, "loss": 0.5865, "step": 4022 }, { "epoch": 0.51, "grad_norm": 0.592208486424045, "learning_rate": 1.0035168553284895e-05, "loss": 0.6797, "step": 4023 }, { "epoch": 0.51, "grad_norm": 0.5211680199711545, "learning_rate": 1.0031031090593576e-05, "loss": 0.591, "step": 4024 }, { "epoch": 0.51, "grad_norm": 0.4287675181673293, "learning_rate": 1.0026893622590116e-05, "loss": 0.5414, "step": 4025 }, { "epoch": 0.51, "grad_norm": 0.5480159267881479, "learning_rate": 1.0022756149982794e-05, "loss": 0.6266, "step": 4026 }, { "epoch": 0.51, "grad_norm": 0.5249648432577714, "learning_rate": 1.001861867347989e-05, "loss": 0.6148, "step": 4027 }, { "epoch": 0.51, "grad_norm": 0.520986799223378, "learning_rate": 1.0014481193789698e-05, "loss": 0.6411, "step": 4028 }, { "epoch": 0.51, "grad_norm": 0.4810882532085323, "learning_rate": 1.0010343711620503e-05, "loss": 0.5929, "step": 4029 }, { "epoch": 0.51, "grad_norm": 0.474998141845159, "learning_rate": 1.000620622768059e-05, "loss": 0.5826, "step": 4030 }, { "epoch": 0.51, "grad_norm": 0.4778784522885456, "learning_rate": 1.0002068742678245e-05, "loss": 0.6285, "step": 4031 }, { "epoch": 0.52, "grad_norm": 0.5265292179456146, "learning_rate": 9.997931257321756e-06, "loss": 0.6012, "step": 4032 }, { "epoch": 0.52, "grad_norm": 0.45455807067982257, "learning_rate": 9.993793772319412e-06, "loss": 0.5878, "step": 4033 }, { "epoch": 0.52, "grad_norm": 0.5026448043874155, "learning_rate": 9.9896562883795e-06, "loss": 0.6155, "step": 4034 }, { "epoch": 0.52, "grad_norm": 0.4904352259060339, "learning_rate": 9.985518806210303e-06, "loss": 0.5729, "step": 4035 }, { "epoch": 0.52, "grad_norm": 0.5093057693003328, "learning_rate": 9.981381326520112e-06, "loss": 0.5966, "step": 4036 }, { "epoch": 0.52, "grad_norm": 0.5531460511059807, "learning_rate": 9.977243850017211e-06, "loss": 0.6295, "step": 4037 }, { "epoch": 0.52, "grad_norm": 0.5296350078443233, "learning_rate": 9.973106377409888e-06, "loss": 0.6201, "step": 4038 }, { "epoch": 0.52, "grad_norm": 0.5069918217405144, "learning_rate": 9.968968909406422e-06, "loss": 0.5844, "step": 4039 }, { "epoch": 0.52, "grad_norm": 0.4547540128708595, "learning_rate": 9.964831446715105e-06, "loss": 0.5932, "step": 4040 }, { "epoch": 0.52, "grad_norm": 0.6410789458568312, "learning_rate": 9.960693990044218e-06, "loss": 0.6434, "step": 4041 }, { "epoch": 0.52, "grad_norm": 0.5749947475149316, "learning_rate": 9.956556540102043e-06, "loss": 0.6204, "step": 4042 }, { "epoch": 0.52, "grad_norm": 0.516470174741019, "learning_rate": 9.952419097596859e-06, "loss": 0.6188, "step": 4043 }, { "epoch": 0.52, "grad_norm": 0.5633213826964243, "learning_rate": 9.948281663236947e-06, "loss": 0.5805, "step": 4044 }, { "epoch": 0.52, "grad_norm": 0.4697049776938414, "learning_rate": 9.944144237730587e-06, "loss": 0.6072, "step": 4045 }, { "epoch": 0.52, "grad_norm": 0.44953889627393195, "learning_rate": 9.94000682178605e-06, "loss": 0.5744, "step": 4046 }, { "epoch": 0.52, "grad_norm": 0.5049246250114618, "learning_rate": 9.935869416111621e-06, "loss": 0.619, "step": 4047 }, { "epoch": 0.52, "grad_norm": 0.5595660146726098, "learning_rate": 9.931732021415569e-06, "loss": 0.6788, "step": 4048 }, { "epoch": 0.52, "grad_norm": 0.551331055072578, "learning_rate": 9.927594638406166e-06, "loss": 0.6157, "step": 4049 }, { "epoch": 0.52, "grad_norm": 0.5085425030222999, "learning_rate": 9.923457267791679e-06, "loss": 0.606, "step": 4050 }, { "epoch": 0.52, "grad_norm": 0.6178240474543687, "learning_rate": 9.919319910280381e-06, "loss": 0.6337, "step": 4051 }, { "epoch": 0.52, "grad_norm": 0.4626080104260685, "learning_rate": 9.915182566580532e-06, "loss": 0.5857, "step": 4052 }, { "epoch": 0.52, "grad_norm": 0.45686974718519935, "learning_rate": 9.911045237400393e-06, "loss": 0.5641, "step": 4053 }, { "epoch": 0.52, "grad_norm": 0.6325736199128205, "learning_rate": 9.906907923448232e-06, "loss": 0.6519, "step": 4054 }, { "epoch": 0.52, "grad_norm": 0.5222240862160582, "learning_rate": 9.902770625432307e-06, "loss": 0.5955, "step": 4055 }, { "epoch": 0.52, "grad_norm": 0.5047934518278112, "learning_rate": 9.898633344060865e-06, "loss": 0.5828, "step": 4056 }, { "epoch": 0.52, "grad_norm": 0.5216055462839209, "learning_rate": 9.894496080042165e-06, "loss": 0.6101, "step": 4057 }, { "epoch": 0.52, "grad_norm": 0.5414865704573015, "learning_rate": 9.890358834084454e-06, "loss": 0.6088, "step": 4058 }, { "epoch": 0.52, "grad_norm": 0.5197727741867144, "learning_rate": 9.886221606895979e-06, "loss": 0.5889, "step": 4059 }, { "epoch": 0.52, "grad_norm": 0.5205672210630514, "learning_rate": 9.88208439918498e-06, "loss": 0.6015, "step": 4060 }, { "epoch": 0.52, "grad_norm": 0.5145747286259343, "learning_rate": 9.877947211659702e-06, "loss": 0.6177, "step": 4061 }, { "epoch": 0.52, "grad_norm": 0.5505015772115864, "learning_rate": 9.873810045028377e-06, "loss": 0.6138, "step": 4062 }, { "epoch": 0.52, "grad_norm": 0.5222788442668139, "learning_rate": 9.869672899999244e-06, "loss": 0.6275, "step": 4063 }, { "epoch": 0.52, "grad_norm": 0.5863239342122597, "learning_rate": 9.865535777280522e-06, "loss": 0.6088, "step": 4064 }, { "epoch": 0.52, "grad_norm": 0.6230090322887186, "learning_rate": 9.861398677580443e-06, "loss": 0.6576, "step": 4065 }, { "epoch": 0.52, "grad_norm": 0.5452715224319673, "learning_rate": 9.857261601607226e-06, "loss": 0.6234, "step": 4066 }, { "epoch": 0.52, "grad_norm": 0.4434048327986926, "learning_rate": 9.853124550069089e-06, "loss": 0.5729, "step": 4067 }, { "epoch": 0.52, "grad_norm": 0.5154350863696293, "learning_rate": 9.848987523674243e-06, "loss": 0.6044, "step": 4068 }, { "epoch": 0.52, "grad_norm": 0.49352588856412677, "learning_rate": 9.844850523130899e-06, "loss": 0.6196, "step": 4069 }, { "epoch": 0.52, "grad_norm": 0.45856289847570064, "learning_rate": 9.840713549147261e-06, "loss": 0.6065, "step": 4070 }, { "epoch": 0.52, "grad_norm": 0.599610893927169, "learning_rate": 9.836576602431527e-06, "loss": 0.5969, "step": 4071 }, { "epoch": 0.52, "grad_norm": 0.4776487190588695, "learning_rate": 9.832439683691897e-06, "loss": 0.6195, "step": 4072 }, { "epoch": 0.52, "grad_norm": 0.5143303929685137, "learning_rate": 9.828302793636555e-06, "loss": 0.5991, "step": 4073 }, { "epoch": 0.52, "grad_norm": 0.427033655715984, "learning_rate": 9.82416593297369e-06, "loss": 0.572, "step": 4074 }, { "epoch": 0.52, "grad_norm": 0.4683526147586168, "learning_rate": 9.820029102411477e-06, "loss": 0.5873, "step": 4075 }, { "epoch": 0.52, "grad_norm": 0.4941517105488219, "learning_rate": 9.815892302658098e-06, "loss": 0.6068, "step": 4076 }, { "epoch": 0.52, "grad_norm": 0.509526749972722, "learning_rate": 9.81175553442172e-06, "loss": 0.5953, "step": 4077 }, { "epoch": 0.52, "grad_norm": 0.4673088899818288, "learning_rate": 9.807618798410507e-06, "loss": 0.5828, "step": 4078 }, { "epoch": 0.52, "grad_norm": 0.4779472289070098, "learning_rate": 9.803482095332619e-06, "loss": 0.6107, "step": 4079 }, { "epoch": 0.52, "grad_norm": 0.514048130572636, "learning_rate": 9.799345425896209e-06, "loss": 0.5571, "step": 4080 }, { "epoch": 0.52, "grad_norm": 0.5812472352912299, "learning_rate": 9.795208790809425e-06, "loss": 0.6358, "step": 4081 }, { "epoch": 0.52, "grad_norm": 0.5044519805073424, "learning_rate": 9.791072190780402e-06, "loss": 0.5701, "step": 4082 }, { "epoch": 0.52, "grad_norm": 0.4974188832436912, "learning_rate": 9.786935626517286e-06, "loss": 0.6235, "step": 4083 }, { "epoch": 0.52, "grad_norm": 0.5595553244660963, "learning_rate": 9.782799098728205e-06, "loss": 0.6284, "step": 4084 }, { "epoch": 0.52, "grad_norm": 0.4833808031251326, "learning_rate": 9.778662608121278e-06, "loss": 0.5764, "step": 4085 }, { "epoch": 0.52, "grad_norm": 0.512599636363432, "learning_rate": 9.77452615540462e-06, "loss": 0.5653, "step": 4086 }, { "epoch": 0.52, "grad_norm": 0.4769586541217207, "learning_rate": 9.77038974128635e-06, "loss": 0.6028, "step": 4087 }, { "epoch": 0.52, "grad_norm": 0.5595035249727958, "learning_rate": 9.766253366474563e-06, "loss": 0.6069, "step": 4088 }, { "epoch": 0.52, "grad_norm": 0.5079226996061355, "learning_rate": 9.762117031677358e-06, "loss": 0.6028, "step": 4089 }, { "epoch": 0.52, "grad_norm": 0.47080745291875226, "learning_rate": 9.75798073760283e-06, "loss": 0.5975, "step": 4090 }, { "epoch": 0.52, "grad_norm": 0.5317801838642622, "learning_rate": 9.75384448495906e-06, "loss": 0.6109, "step": 4091 }, { "epoch": 0.52, "grad_norm": 0.5588783729352866, "learning_rate": 9.74970827445412e-06, "loss": 0.6059, "step": 4092 }, { "epoch": 0.52, "grad_norm": 0.48050528991078684, "learning_rate": 9.74557210679609e-06, "loss": 0.5872, "step": 4093 }, { "epoch": 0.52, "grad_norm": 0.5422897743737743, "learning_rate": 9.74143598269302e-06, "loss": 0.6337, "step": 4094 }, { "epoch": 0.52, "grad_norm": 0.5243405827192479, "learning_rate": 9.737299902852967e-06, "loss": 0.6083, "step": 4095 }, { "epoch": 0.52, "grad_norm": 0.6217568069508098, "learning_rate": 9.733163867983978e-06, "loss": 0.6629, "step": 4096 }, { "epoch": 0.52, "grad_norm": 0.49709497860409735, "learning_rate": 9.729027878794094e-06, "loss": 0.6007, "step": 4097 }, { "epoch": 0.52, "grad_norm": 0.458055097454336, "learning_rate": 9.724891935991347e-06, "loss": 0.5937, "step": 4098 }, { "epoch": 0.52, "grad_norm": 0.7081785499705006, "learning_rate": 9.720756040283757e-06, "loss": 0.603, "step": 4099 }, { "epoch": 0.52, "grad_norm": 0.6637191662624325, "learning_rate": 9.71662019237934e-06, "loss": 0.6441, "step": 4100 }, { "epoch": 0.52, "grad_norm": 0.536613916219038, "learning_rate": 9.712484392986103e-06, "loss": 0.6215, "step": 4101 }, { "epoch": 0.52, "grad_norm": 0.5126686128354818, "learning_rate": 9.70834864281205e-06, "loss": 0.6224, "step": 4102 }, { "epoch": 0.52, "grad_norm": 0.5672797595225999, "learning_rate": 9.704212942565156e-06, "loss": 0.6537, "step": 4103 }, { "epoch": 0.52, "grad_norm": 0.5404443336102971, "learning_rate": 9.700077292953418e-06, "loss": 0.6103, "step": 4104 }, { "epoch": 0.52, "grad_norm": 0.5447690567437727, "learning_rate": 9.695941694684805e-06, "loss": 0.6162, "step": 4105 }, { "epoch": 0.52, "grad_norm": 0.46170174751310683, "learning_rate": 9.691806148467278e-06, "loss": 0.6199, "step": 4106 }, { "epoch": 0.52, "grad_norm": 0.6013152011911781, "learning_rate": 9.687670655008796e-06, "loss": 0.6341, "step": 4107 }, { "epoch": 0.52, "grad_norm": 0.5513882960281822, "learning_rate": 9.683535215017302e-06, "loss": 0.5834, "step": 4108 }, { "epoch": 0.52, "grad_norm": 0.5359772674544031, "learning_rate": 9.679399829200733e-06, "loss": 0.6097, "step": 4109 }, { "epoch": 0.53, "grad_norm": 0.5297470473693392, "learning_rate": 9.675264498267018e-06, "loss": 0.6575, "step": 4110 }, { "epoch": 0.53, "grad_norm": 0.46298004778142554, "learning_rate": 9.671129222924076e-06, "loss": 0.5731, "step": 4111 }, { "epoch": 0.53, "grad_norm": 0.5297342580686988, "learning_rate": 9.666994003879819e-06, "loss": 0.5913, "step": 4112 }, { "epoch": 0.53, "grad_norm": 0.561462309285733, "learning_rate": 9.662858841842138e-06, "loss": 0.6302, "step": 4113 }, { "epoch": 0.53, "grad_norm": 0.5634105103012603, "learning_rate": 9.658723737518932e-06, "loss": 0.628, "step": 4114 }, { "epoch": 0.53, "grad_norm": 0.481471413151951, "learning_rate": 9.654588691618074e-06, "loss": 0.5818, "step": 4115 }, { "epoch": 0.53, "grad_norm": 0.4508315308932125, "learning_rate": 9.650453704847435e-06, "loss": 0.5998, "step": 4116 }, { "epoch": 0.53, "grad_norm": 0.533914313094741, "learning_rate": 9.646318777914874e-06, "loss": 0.6163, "step": 4117 }, { "epoch": 0.53, "grad_norm": 0.552255317414967, "learning_rate": 9.64218391152824e-06, "loss": 0.6072, "step": 4118 }, { "epoch": 0.53, "grad_norm": 0.5083907702511995, "learning_rate": 9.638049106395377e-06, "loss": 0.5987, "step": 4119 }, { "epoch": 0.53, "grad_norm": 0.44543605932195507, "learning_rate": 9.633914363224107e-06, "loss": 0.5645, "step": 4120 }, { "epoch": 0.53, "grad_norm": 0.5877647671260193, "learning_rate": 9.629779682722252e-06, "loss": 0.5929, "step": 4121 }, { "epoch": 0.53, "grad_norm": 0.5026133749839915, "learning_rate": 9.625645065597616e-06, "loss": 0.5878, "step": 4122 }, { "epoch": 0.53, "grad_norm": 0.5079083042048944, "learning_rate": 9.621510512558e-06, "loss": 0.6066, "step": 4123 }, { "epoch": 0.53, "grad_norm": 0.5508672632082828, "learning_rate": 9.617376024311184e-06, "loss": 0.6413, "step": 4124 }, { "epoch": 0.53, "grad_norm": 0.5837960318253245, "learning_rate": 9.613241601564939e-06, "loss": 0.6347, "step": 4125 }, { "epoch": 0.53, "grad_norm": 0.562976478060569, "learning_rate": 9.609107245027038e-06, "loss": 0.6307, "step": 4126 }, { "epoch": 0.53, "grad_norm": 0.5107244197517615, "learning_rate": 9.604972955405228e-06, "loss": 0.5809, "step": 4127 }, { "epoch": 0.53, "grad_norm": 0.5890792346659434, "learning_rate": 9.600838733407247e-06, "loss": 0.6546, "step": 4128 }, { "epoch": 0.53, "grad_norm": 0.5183646937504942, "learning_rate": 9.596704579740827e-06, "loss": 0.6227, "step": 4129 }, { "epoch": 0.53, "grad_norm": 0.5115739615807494, "learning_rate": 9.592570495113682e-06, "loss": 0.6489, "step": 4130 }, { "epoch": 0.53, "grad_norm": 0.48649723080130247, "learning_rate": 9.588436480233518e-06, "loss": 0.5853, "step": 4131 }, { "epoch": 0.53, "grad_norm": 0.5415338526393364, "learning_rate": 9.584302535808027e-06, "loss": 0.6585, "step": 4132 }, { "epoch": 0.53, "grad_norm": 0.4971765599754702, "learning_rate": 9.580168662544893e-06, "loss": 0.6158, "step": 4133 }, { "epoch": 0.53, "grad_norm": 0.44092893704966596, "learning_rate": 9.576034861151785e-06, "loss": 0.5756, "step": 4134 }, { "epoch": 0.53, "grad_norm": 0.500245853278705, "learning_rate": 9.571901132336357e-06, "loss": 0.5937, "step": 4135 }, { "epoch": 0.53, "grad_norm": 0.4990687143784631, "learning_rate": 9.567767476806254e-06, "loss": 0.6074, "step": 4136 }, { "epoch": 0.53, "grad_norm": 0.545707113696661, "learning_rate": 9.563633895269108e-06, "loss": 0.5952, "step": 4137 }, { "epoch": 0.53, "grad_norm": 0.48858339529223327, "learning_rate": 9.559500388432537e-06, "loss": 0.5875, "step": 4138 }, { "epoch": 0.53, "grad_norm": 0.479741196792482, "learning_rate": 9.555366957004145e-06, "loss": 0.5906, "step": 4139 }, { "epoch": 0.53, "grad_norm": 0.4586666533010618, "learning_rate": 9.551233601691529e-06, "loss": 0.6036, "step": 4140 }, { "epoch": 0.53, "grad_norm": 0.5024018905119396, "learning_rate": 9.54710032320227e-06, "loss": 0.6072, "step": 4141 }, { "epoch": 0.53, "grad_norm": 0.5062183364109515, "learning_rate": 9.542967122243935e-06, "loss": 0.6161, "step": 4142 }, { "epoch": 0.53, "grad_norm": 0.5358744672282827, "learning_rate": 9.538833999524073e-06, "loss": 0.6239, "step": 4143 }, { "epoch": 0.53, "grad_norm": 0.48629849684503407, "learning_rate": 9.534700955750231e-06, "loss": 0.6343, "step": 4144 }, { "epoch": 0.53, "grad_norm": 0.5130511814705199, "learning_rate": 9.53056799162993e-06, "loss": 0.6332, "step": 4145 }, { "epoch": 0.53, "grad_norm": 0.561450452741112, "learning_rate": 9.526435107870682e-06, "loss": 0.5971, "step": 4146 }, { "epoch": 0.53, "grad_norm": 0.5160479581231238, "learning_rate": 9.522302305179995e-06, "loss": 0.625, "step": 4147 }, { "epoch": 0.53, "grad_norm": 0.5145646844182377, "learning_rate": 9.51816958426535e-06, "loss": 0.5893, "step": 4148 }, { "epoch": 0.53, "grad_norm": 0.4510996586459437, "learning_rate": 9.514036945834215e-06, "loss": 0.6033, "step": 4149 }, { "epoch": 0.53, "grad_norm": 0.47783693981682074, "learning_rate": 9.509904390594052e-06, "loss": 0.6192, "step": 4150 }, { "epoch": 0.53, "grad_norm": 0.5382070213032029, "learning_rate": 9.505771919252305e-06, "loss": 0.608, "step": 4151 }, { "epoch": 0.53, "grad_norm": 0.5677386567577871, "learning_rate": 9.5016395325164e-06, "loss": 0.5917, "step": 4152 }, { "epoch": 0.53, "grad_norm": 0.5203833308835142, "learning_rate": 9.497507231093749e-06, "loss": 0.6095, "step": 4153 }, { "epoch": 0.53, "grad_norm": 0.5037722765915373, "learning_rate": 9.493375015691758e-06, "loss": 0.5967, "step": 4154 }, { "epoch": 0.53, "grad_norm": 0.5573414278516342, "learning_rate": 9.489242887017807e-06, "loss": 0.6449, "step": 4155 }, { "epoch": 0.53, "grad_norm": 0.4839939709268094, "learning_rate": 9.485110845779273e-06, "loss": 0.5878, "step": 4156 }, { "epoch": 0.53, "grad_norm": 0.5380350521310858, "learning_rate": 9.480978892683506e-06, "loss": 0.6315, "step": 4157 }, { "epoch": 0.53, "grad_norm": 0.4598204166170565, "learning_rate": 9.476847028437844e-06, "loss": 0.5987, "step": 4158 }, { "epoch": 0.53, "grad_norm": 0.5093231735821967, "learning_rate": 9.472715253749617e-06, "loss": 0.6061, "step": 4159 }, { "epoch": 0.53, "grad_norm": 0.5366790873728261, "learning_rate": 9.468583569326129e-06, "loss": 0.5893, "step": 4160 }, { "epoch": 0.53, "grad_norm": 0.5270982291609161, "learning_rate": 9.464451975874678e-06, "loss": 0.6405, "step": 4161 }, { "epoch": 0.53, "grad_norm": 0.5016800711166095, "learning_rate": 9.460320474102546e-06, "loss": 0.621, "step": 4162 }, { "epoch": 0.53, "grad_norm": 0.46999962417151736, "learning_rate": 9.45618906471699e-06, "loss": 0.5995, "step": 4163 }, { "epoch": 0.53, "grad_norm": 0.43731331247599975, "learning_rate": 9.452057748425259e-06, "loss": 0.5915, "step": 4164 }, { "epoch": 0.53, "grad_norm": 0.49030571135549333, "learning_rate": 9.447926525934586e-06, "loss": 0.5949, "step": 4165 }, { "epoch": 0.53, "grad_norm": 0.5604078223269804, "learning_rate": 9.443795397952185e-06, "loss": 0.6348, "step": 4166 }, { "epoch": 0.53, "grad_norm": 0.6500507161678294, "learning_rate": 9.439664365185248e-06, "loss": 0.6209, "step": 4167 }, { "epoch": 0.53, "grad_norm": 0.527760895160244, "learning_rate": 9.43553342834097e-06, "loss": 0.5986, "step": 4168 }, { "epoch": 0.53, "grad_norm": 0.48503950475753405, "learning_rate": 9.43140258812651e-06, "loss": 0.597, "step": 4169 }, { "epoch": 0.53, "grad_norm": 0.524061213086411, "learning_rate": 9.427271845249019e-06, "loss": 0.6297, "step": 4170 }, { "epoch": 0.53, "grad_norm": 0.5396032218866866, "learning_rate": 9.42314120041563e-06, "loss": 0.6143, "step": 4171 }, { "epoch": 0.53, "grad_norm": 0.6134524399051747, "learning_rate": 9.41901065433346e-06, "loss": 0.6777, "step": 4172 }, { "epoch": 0.53, "grad_norm": 0.5236056174074506, "learning_rate": 9.414880207709606e-06, "loss": 0.6419, "step": 4173 }, { "epoch": 0.53, "grad_norm": 0.5225477316587601, "learning_rate": 9.41074986125115e-06, "loss": 0.595, "step": 4174 }, { "epoch": 0.53, "grad_norm": 0.5870557181144461, "learning_rate": 9.40661961566516e-06, "loss": 0.652, "step": 4175 }, { "epoch": 0.53, "grad_norm": 0.4738187821242029, "learning_rate": 9.402489471658684e-06, "loss": 0.6011, "step": 4176 }, { "epoch": 0.53, "grad_norm": 0.5160770405611996, "learning_rate": 9.398359429938753e-06, "loss": 0.6079, "step": 4177 }, { "epoch": 0.53, "grad_norm": 0.5014397274349166, "learning_rate": 9.394229491212375e-06, "loss": 0.5922, "step": 4178 }, { "epoch": 0.53, "grad_norm": 0.44890839187684817, "learning_rate": 9.390099656186549e-06, "loss": 0.6011, "step": 4179 }, { "epoch": 0.53, "grad_norm": 0.487261599088101, "learning_rate": 9.385969925568253e-06, "loss": 0.5856, "step": 4180 }, { "epoch": 0.53, "grad_norm": 0.5105340182659712, "learning_rate": 9.381840300064442e-06, "loss": 0.6102, "step": 4181 }, { "epoch": 0.53, "grad_norm": 0.5664118194268812, "learning_rate": 9.377710780382065e-06, "loss": 0.6677, "step": 4182 }, { "epoch": 0.53, "grad_norm": 0.5733411432588819, "learning_rate": 9.37358136722804e-06, "loss": 0.6349, "step": 4183 }, { "epoch": 0.53, "grad_norm": 0.480184799817175, "learning_rate": 9.369452061309276e-06, "loss": 0.5961, "step": 4184 }, { "epoch": 0.53, "grad_norm": 0.5004186702699825, "learning_rate": 9.365322863332658e-06, "loss": 0.6184, "step": 4185 }, { "epoch": 0.53, "grad_norm": 0.4855443577813279, "learning_rate": 9.361193774005057e-06, "loss": 0.6149, "step": 4186 }, { "epoch": 0.53, "grad_norm": 0.5224108698037884, "learning_rate": 9.35706479403332e-06, "loss": 0.587, "step": 4187 }, { "epoch": 0.54, "grad_norm": 0.5216087667881218, "learning_rate": 9.352935924124278e-06, "loss": 0.6121, "step": 4188 }, { "epoch": 0.54, "grad_norm": 0.46982702052411024, "learning_rate": 9.348807164984742e-06, "loss": 0.5916, "step": 4189 }, { "epoch": 0.54, "grad_norm": 0.49980932519031435, "learning_rate": 9.34467851732151e-06, "loss": 0.5925, "step": 4190 }, { "epoch": 0.54, "grad_norm": 0.48954234674781466, "learning_rate": 9.340549981841357e-06, "loss": 0.5942, "step": 4191 }, { "epoch": 0.54, "grad_norm": 0.5396394258304442, "learning_rate": 9.336421559251032e-06, "loss": 0.6146, "step": 4192 }, { "epoch": 0.54, "grad_norm": 0.5847271454633723, "learning_rate": 9.332293250257275e-06, "loss": 0.6262, "step": 4193 }, { "epoch": 0.54, "grad_norm": 0.5075471933690823, "learning_rate": 9.328165055566802e-06, "loss": 0.578, "step": 4194 }, { "epoch": 0.54, "grad_norm": 0.5248705511445068, "learning_rate": 9.324036975886309e-06, "loss": 0.6307, "step": 4195 }, { "epoch": 0.54, "grad_norm": 0.506800608129747, "learning_rate": 9.319909011922468e-06, "loss": 0.6064, "step": 4196 }, { "epoch": 0.54, "grad_norm": 0.5125424789861948, "learning_rate": 9.315781164381947e-06, "loss": 0.6213, "step": 4197 }, { "epoch": 0.54, "grad_norm": 0.5023705815879648, "learning_rate": 9.31165343397138e-06, "loss": 0.6194, "step": 4198 }, { "epoch": 0.54, "grad_norm": 0.5290708677390319, "learning_rate": 9.30752582139738e-06, "loss": 0.6296, "step": 4199 }, { "epoch": 0.54, "grad_norm": 0.4975121616406111, "learning_rate": 9.303398327366544e-06, "loss": 0.5932, "step": 4200 }, { "epoch": 0.54, "grad_norm": 0.5173297233686922, "learning_rate": 9.299270952585454e-06, "loss": 0.6041, "step": 4201 }, { "epoch": 0.54, "grad_norm": 0.5418570878410925, "learning_rate": 9.295143697760664e-06, "loss": 0.67, "step": 4202 }, { "epoch": 0.54, "grad_norm": 0.5077508211893682, "learning_rate": 9.291016563598705e-06, "loss": 0.6273, "step": 4203 }, { "epoch": 0.54, "grad_norm": 0.496241205110346, "learning_rate": 9.2868895508061e-06, "loss": 0.6043, "step": 4204 }, { "epoch": 0.54, "grad_norm": 0.4314267954767645, "learning_rate": 9.28276266008934e-06, "loss": 0.5738, "step": 4205 }, { "epoch": 0.54, "grad_norm": 0.571053797554078, "learning_rate": 9.2786358921549e-06, "loss": 0.6194, "step": 4206 }, { "epoch": 0.54, "grad_norm": 0.4527269821457088, "learning_rate": 9.274509247709231e-06, "loss": 0.5936, "step": 4207 }, { "epoch": 0.54, "grad_norm": 0.5451507173009366, "learning_rate": 9.270382727458764e-06, "loss": 0.615, "step": 4208 }, { "epoch": 0.54, "grad_norm": 0.5842322600379438, "learning_rate": 9.266256332109909e-06, "loss": 0.6073, "step": 4209 }, { "epoch": 0.54, "grad_norm": 0.5409530789908554, "learning_rate": 9.262130062369053e-06, "loss": 0.6393, "step": 4210 }, { "epoch": 0.54, "grad_norm": 0.5109475725009615, "learning_rate": 9.258003918942568e-06, "loss": 0.5853, "step": 4211 }, { "epoch": 0.54, "grad_norm": 0.48356219113424037, "learning_rate": 9.253877902536799e-06, "loss": 0.5965, "step": 4212 }, { "epoch": 0.54, "grad_norm": 0.46431097637145463, "learning_rate": 9.249752013858067e-06, "loss": 0.5627, "step": 4213 }, { "epoch": 0.54, "grad_norm": 0.5337088276859898, "learning_rate": 9.245626253612675e-06, "loss": 0.6196, "step": 4214 }, { "epoch": 0.54, "grad_norm": 0.5344249473065441, "learning_rate": 9.241500622506906e-06, "loss": 0.6199, "step": 4215 }, { "epoch": 0.54, "grad_norm": 0.4965493774782296, "learning_rate": 9.237375121247012e-06, "loss": 0.6209, "step": 4216 }, { "epoch": 0.54, "grad_norm": 0.5216234167062508, "learning_rate": 9.233249750539229e-06, "loss": 0.6046, "step": 4217 }, { "epoch": 0.54, "grad_norm": 0.5882343953555381, "learning_rate": 9.229124511089777e-06, "loss": 0.6385, "step": 4218 }, { "epoch": 0.54, "grad_norm": 0.5022235462732868, "learning_rate": 9.224999403604846e-06, "loss": 0.6365, "step": 4219 }, { "epoch": 0.54, "grad_norm": 0.47667465446939034, "learning_rate": 9.2208744287906e-06, "loss": 0.5946, "step": 4220 }, { "epoch": 0.54, "grad_norm": 0.5309069513738547, "learning_rate": 9.216749587353185e-06, "loss": 0.5952, "step": 4221 }, { "epoch": 0.54, "grad_norm": 0.5651708354376109, "learning_rate": 9.212624879998724e-06, "loss": 0.6533, "step": 4222 }, { "epoch": 0.54, "grad_norm": 0.5897446313024749, "learning_rate": 9.208500307433317e-06, "loss": 0.6354, "step": 4223 }, { "epoch": 0.54, "grad_norm": 0.4894953832635739, "learning_rate": 9.204375870363041e-06, "loss": 0.6002, "step": 4224 }, { "epoch": 0.54, "grad_norm": 0.5725816256706661, "learning_rate": 9.20025156949395e-06, "loss": 0.6413, "step": 4225 }, { "epoch": 0.54, "grad_norm": 0.48691984220329243, "learning_rate": 9.196127405532076e-06, "loss": 0.6033, "step": 4226 }, { "epoch": 0.54, "grad_norm": 0.5230279185792763, "learning_rate": 9.192003379183424e-06, "loss": 0.6123, "step": 4227 }, { "epoch": 0.54, "grad_norm": 0.530227990759488, "learning_rate": 9.187879491153975e-06, "loss": 0.6335, "step": 4228 }, { "epoch": 0.54, "grad_norm": 0.49882245110155476, "learning_rate": 9.183755742149693e-06, "loss": 0.5927, "step": 4229 }, { "epoch": 0.54, "grad_norm": 0.5370046587763995, "learning_rate": 9.179632132876508e-06, "loss": 0.6427, "step": 4230 }, { "epoch": 0.54, "grad_norm": 0.5095450318765187, "learning_rate": 9.175508664040335e-06, "loss": 0.6068, "step": 4231 }, { "epoch": 0.54, "grad_norm": 0.5151099084248202, "learning_rate": 9.171385336347063e-06, "loss": 0.6319, "step": 4232 }, { "epoch": 0.54, "grad_norm": 0.4806090605038239, "learning_rate": 9.167262150502554e-06, "loss": 0.6004, "step": 4233 }, { "epoch": 0.54, "grad_norm": 0.47481775180713187, "learning_rate": 9.163139107212649e-06, "loss": 0.6063, "step": 4234 }, { "epoch": 0.54, "grad_norm": 0.5114469571248069, "learning_rate": 9.15901620718316e-06, "loss": 0.5994, "step": 4235 }, { "epoch": 0.54, "grad_norm": 0.540275174339047, "learning_rate": 9.154893451119882e-06, "loss": 0.6436, "step": 4236 }, { "epoch": 0.54, "grad_norm": 0.539157956907163, "learning_rate": 9.150770839728576e-06, "loss": 0.6208, "step": 4237 }, { "epoch": 0.54, "grad_norm": 0.4594977446509714, "learning_rate": 9.14664837371498e-06, "loss": 0.5729, "step": 4238 }, { "epoch": 0.54, "grad_norm": 0.47754914842960366, "learning_rate": 9.14252605378482e-06, "loss": 0.6314, "step": 4239 }, { "epoch": 0.54, "grad_norm": 0.529490852048264, "learning_rate": 9.13840388064378e-06, "loss": 0.6194, "step": 4240 }, { "epoch": 0.54, "grad_norm": 0.5099134338243795, "learning_rate": 9.134281854997532e-06, "loss": 0.6094, "step": 4241 }, { "epoch": 0.54, "grad_norm": 0.48079033639485613, "learning_rate": 9.130159977551708e-06, "loss": 0.6036, "step": 4242 }, { "epoch": 0.54, "grad_norm": 0.540575362959098, "learning_rate": 9.12603824901193e-06, "loss": 0.6278, "step": 4243 }, { "epoch": 0.54, "grad_norm": 0.5442056975792465, "learning_rate": 9.121916670083786e-06, "loss": 0.612, "step": 4244 }, { "epoch": 0.54, "grad_norm": 0.4955374607349748, "learning_rate": 9.117795241472836e-06, "loss": 0.615, "step": 4245 }, { "epoch": 0.54, "grad_norm": 0.46993694621155324, "learning_rate": 9.113673963884625e-06, "loss": 0.5722, "step": 4246 }, { "epoch": 0.54, "grad_norm": 0.523219633568497, "learning_rate": 9.109552838024664e-06, "loss": 0.5929, "step": 4247 }, { "epoch": 0.54, "grad_norm": 0.5687675084739165, "learning_rate": 9.105431864598441e-06, "loss": 0.6143, "step": 4248 }, { "epoch": 0.54, "grad_norm": 0.5917034364679987, "learning_rate": 9.101311044311413e-06, "loss": 0.6647, "step": 4249 }, { "epoch": 0.54, "grad_norm": 0.5476810084837029, "learning_rate": 9.097190377869015e-06, "loss": 0.5758, "step": 4250 }, { "epoch": 0.54, "grad_norm": 0.5748163464255214, "learning_rate": 9.093069865976656e-06, "loss": 0.6049, "step": 4251 }, { "epoch": 0.54, "grad_norm": 0.5830481550268445, "learning_rate": 9.088949509339718e-06, "loss": 0.6188, "step": 4252 }, { "epoch": 0.54, "grad_norm": 0.543796006891201, "learning_rate": 9.084829308663552e-06, "loss": 0.6326, "step": 4253 }, { "epoch": 0.54, "grad_norm": 0.5713652259777894, "learning_rate": 9.080709264653492e-06, "loss": 0.6046, "step": 4254 }, { "epoch": 0.54, "grad_norm": 0.5036058852914528, "learning_rate": 9.076589378014838e-06, "loss": 0.63, "step": 4255 }, { "epoch": 0.54, "grad_norm": 0.48851911493468386, "learning_rate": 9.072469649452865e-06, "loss": 0.5903, "step": 4256 }, { "epoch": 0.54, "grad_norm": 0.49737689707723237, "learning_rate": 9.068350079672818e-06, "loss": 0.6067, "step": 4257 }, { "epoch": 0.54, "grad_norm": 0.6192474456726276, "learning_rate": 9.064230669379919e-06, "loss": 0.6645, "step": 4258 }, { "epoch": 0.54, "grad_norm": 0.4585266769331877, "learning_rate": 9.060111419279359e-06, "loss": 0.6074, "step": 4259 }, { "epoch": 0.54, "grad_norm": 0.4984751983507183, "learning_rate": 9.055992330076304e-06, "loss": 0.5861, "step": 4260 }, { "epoch": 0.54, "grad_norm": 0.560271615104753, "learning_rate": 9.051873402475894e-06, "loss": 0.604, "step": 4261 }, { "epoch": 0.54, "grad_norm": 0.5199004172623833, "learning_rate": 9.047754637183241e-06, "loss": 0.6069, "step": 4262 }, { "epoch": 0.54, "grad_norm": 0.5234585301430585, "learning_rate": 9.043636034903422e-06, "loss": 0.6049, "step": 4263 }, { "epoch": 0.54, "grad_norm": 0.5190000011949292, "learning_rate": 9.039517596341497e-06, "loss": 0.6219, "step": 4264 }, { "epoch": 0.54, "grad_norm": 0.5777733963095026, "learning_rate": 9.035399322202489e-06, "loss": 0.6114, "step": 4265 }, { "epoch": 0.54, "grad_norm": 0.5083452443311369, "learning_rate": 9.0312812131914e-06, "loss": 0.5844, "step": 4266 }, { "epoch": 0.55, "grad_norm": 0.527348966490169, "learning_rate": 9.027163270013193e-06, "loss": 0.6115, "step": 4267 }, { "epoch": 0.55, "grad_norm": 0.5345883879965537, "learning_rate": 9.023045493372819e-06, "loss": 0.6313, "step": 4268 }, { "epoch": 0.55, "grad_norm": 0.5741820335325569, "learning_rate": 9.018927883975188e-06, "loss": 0.6346, "step": 4269 }, { "epoch": 0.55, "grad_norm": 0.5040713456151196, "learning_rate": 9.014810442525183e-06, "loss": 0.5981, "step": 4270 }, { "epoch": 0.55, "grad_norm": 0.49608428375355984, "learning_rate": 9.010693169727662e-06, "loss": 0.6067, "step": 4271 }, { "epoch": 0.55, "grad_norm": 0.5168287594848293, "learning_rate": 9.006576066287449e-06, "loss": 0.6135, "step": 4272 }, { "epoch": 0.55, "grad_norm": 0.5266164655214749, "learning_rate": 9.002459132909344e-06, "loss": 0.6128, "step": 4273 }, { "epoch": 0.55, "grad_norm": 0.4738496818990027, "learning_rate": 8.998342370298115e-06, "loss": 0.6203, "step": 4274 }, { "epoch": 0.55, "grad_norm": 0.474773870464687, "learning_rate": 8.994225779158504e-06, "loss": 0.5869, "step": 4275 }, { "epoch": 0.55, "grad_norm": 0.548356179978702, "learning_rate": 8.99010936019522e-06, "loss": 0.6222, "step": 4276 }, { "epoch": 0.55, "grad_norm": 0.4403748248228835, "learning_rate": 8.985993114112947e-06, "loss": 0.5732, "step": 4277 }, { "epoch": 0.55, "grad_norm": 0.5152839090773338, "learning_rate": 8.981877041616333e-06, "loss": 0.5935, "step": 4278 }, { "epoch": 0.55, "grad_norm": 0.5048171047197871, "learning_rate": 8.977761143409997e-06, "loss": 0.607, "step": 4279 }, { "epoch": 0.55, "grad_norm": 0.4373254320299969, "learning_rate": 8.973645420198535e-06, "loss": 0.5808, "step": 4280 }, { "epoch": 0.55, "grad_norm": 0.5715854965538022, "learning_rate": 8.969529872686506e-06, "loss": 0.627, "step": 4281 }, { "epoch": 0.55, "grad_norm": 0.5463359143808172, "learning_rate": 8.965414501578446e-06, "loss": 0.5965, "step": 4282 }, { "epoch": 0.55, "grad_norm": 0.5073548141564737, "learning_rate": 8.961299307578853e-06, "loss": 0.6074, "step": 4283 }, { "epoch": 0.55, "grad_norm": 0.5414283290294775, "learning_rate": 8.9571842913922e-06, "loss": 0.577, "step": 4284 }, { "epoch": 0.55, "grad_norm": 0.4317877821600358, "learning_rate": 8.953069453722926e-06, "loss": 0.5734, "step": 4285 }, { "epoch": 0.55, "grad_norm": 0.5895867523420163, "learning_rate": 8.948954795275442e-06, "loss": 0.665, "step": 4286 }, { "epoch": 0.55, "grad_norm": 0.44762964719402826, "learning_rate": 8.944840316754129e-06, "loss": 0.5802, "step": 4287 }, { "epoch": 0.55, "grad_norm": 0.462397319724092, "learning_rate": 8.940726018863329e-06, "loss": 0.5821, "step": 4288 }, { "epoch": 0.55, "grad_norm": 0.4916743774025602, "learning_rate": 8.93661190230737e-06, "loss": 0.599, "step": 4289 }, { "epoch": 0.55, "grad_norm": 0.5211326343992563, "learning_rate": 8.932497967790536e-06, "loss": 0.6197, "step": 4290 }, { "epoch": 0.55, "grad_norm": 0.6071696582336065, "learning_rate": 8.928384216017078e-06, "loss": 0.628, "step": 4291 }, { "epoch": 0.55, "grad_norm": 0.505484145752093, "learning_rate": 8.924270647691222e-06, "loss": 0.5861, "step": 4292 }, { "epoch": 0.55, "grad_norm": 0.5171180333009802, "learning_rate": 8.920157263517163e-06, "loss": 0.6252, "step": 4293 }, { "epoch": 0.55, "grad_norm": 0.531486654501117, "learning_rate": 8.91604406419906e-06, "loss": 0.6218, "step": 4294 }, { "epoch": 0.55, "grad_norm": 0.46227081740748005, "learning_rate": 8.911931050441042e-06, "loss": 0.5942, "step": 4295 }, { "epoch": 0.55, "grad_norm": 0.4695221464202855, "learning_rate": 8.907818222947214e-06, "loss": 0.5832, "step": 4296 }, { "epoch": 0.55, "grad_norm": 0.5056515240805146, "learning_rate": 8.903705582421633e-06, "loss": 0.5895, "step": 4297 }, { "epoch": 0.55, "grad_norm": 0.5222949657158384, "learning_rate": 8.89959312956834e-06, "loss": 0.5937, "step": 4298 }, { "epoch": 0.55, "grad_norm": 0.48298001970224536, "learning_rate": 8.895480865091336e-06, "loss": 0.5898, "step": 4299 }, { "epoch": 0.55, "grad_norm": 0.5408260352216087, "learning_rate": 8.891368789694585e-06, "loss": 0.6699, "step": 4300 }, { "epoch": 0.55, "grad_norm": 0.465886073063443, "learning_rate": 8.88725690408203e-06, "loss": 0.5723, "step": 4301 }, { "epoch": 0.55, "grad_norm": 0.47003681541450193, "learning_rate": 8.88314520895757e-06, "loss": 0.5913, "step": 4302 }, { "epoch": 0.55, "grad_norm": 0.5491872389757809, "learning_rate": 8.879033705025085e-06, "loss": 0.6238, "step": 4303 }, { "epoch": 0.55, "grad_norm": 0.5025223868117091, "learning_rate": 8.874922392988409e-06, "loss": 0.6248, "step": 4304 }, { "epoch": 0.55, "grad_norm": 0.5343663946351913, "learning_rate": 8.870811273551352e-06, "loss": 0.5934, "step": 4305 }, { "epoch": 0.55, "grad_norm": 0.5391895230803581, "learning_rate": 8.866700347417684e-06, "loss": 0.6328, "step": 4306 }, { "epoch": 0.55, "grad_norm": 0.46093094563121895, "learning_rate": 8.86258961529115e-06, "loss": 0.5952, "step": 4307 }, { "epoch": 0.55, "grad_norm": 0.477454055030976, "learning_rate": 8.858479077875458e-06, "loss": 0.6349, "step": 4308 }, { "epoch": 0.55, "grad_norm": 0.7113255772680682, "learning_rate": 8.854368735874276e-06, "loss": 0.6083, "step": 4309 }, { "epoch": 0.55, "grad_norm": 0.4594737464925036, "learning_rate": 8.850258589991242e-06, "loss": 0.59, "step": 4310 }, { "epoch": 0.55, "grad_norm": 0.49582345922628396, "learning_rate": 8.846148640929978e-06, "loss": 0.5858, "step": 4311 }, { "epoch": 0.55, "grad_norm": 0.5366670443097553, "learning_rate": 8.842038889394046e-06, "loss": 0.6257, "step": 4312 }, { "epoch": 0.55, "grad_norm": 0.5155918724818435, "learning_rate": 8.837929336086986e-06, "loss": 0.5984, "step": 4313 }, { "epoch": 0.55, "grad_norm": 0.4983165451602739, "learning_rate": 8.833819981712306e-06, "loss": 0.5977, "step": 4314 }, { "epoch": 0.55, "grad_norm": 0.47441230206449575, "learning_rate": 8.829710826973476e-06, "loss": 0.5802, "step": 4315 }, { "epoch": 0.55, "grad_norm": 0.46948408921907087, "learning_rate": 8.825601872573937e-06, "loss": 0.6172, "step": 4316 }, { "epoch": 0.55, "grad_norm": 0.4771295452275976, "learning_rate": 8.821493119217085e-06, "loss": 0.5664, "step": 4317 }, { "epoch": 0.55, "grad_norm": 0.4982341067379695, "learning_rate": 8.817384567606294e-06, "loss": 0.6275, "step": 4318 }, { "epoch": 0.55, "grad_norm": 0.47970255865536, "learning_rate": 8.813276218444901e-06, "loss": 0.5994, "step": 4319 }, { "epoch": 0.55, "grad_norm": 0.5600544972941369, "learning_rate": 8.809168072436202e-06, "loss": 0.5958, "step": 4320 }, { "epoch": 0.55, "grad_norm": 0.5402752089375993, "learning_rate": 8.805060130283458e-06, "loss": 0.6135, "step": 4321 }, { "epoch": 0.55, "grad_norm": 0.5615363408440214, "learning_rate": 8.800952392689903e-06, "loss": 0.6235, "step": 4322 }, { "epoch": 0.55, "grad_norm": 0.5091828651052728, "learning_rate": 8.796844860358731e-06, "loss": 0.5816, "step": 4323 }, { "epoch": 0.55, "grad_norm": 0.46012244275552516, "learning_rate": 8.792737533993098e-06, "loss": 0.6103, "step": 4324 }, { "epoch": 0.55, "grad_norm": 0.5344778205628044, "learning_rate": 8.788630414296135e-06, "loss": 0.5848, "step": 4325 }, { "epoch": 0.55, "grad_norm": 0.5572597212167572, "learning_rate": 8.784523501970928e-06, "loss": 0.6041, "step": 4326 }, { "epoch": 0.55, "grad_norm": 0.4560637880103133, "learning_rate": 8.78041679772053e-06, "loss": 0.5839, "step": 4327 }, { "epoch": 0.55, "grad_norm": 0.5227396288947532, "learning_rate": 8.776310302247961e-06, "loss": 0.6103, "step": 4328 }, { "epoch": 0.55, "grad_norm": 0.563677565392688, "learning_rate": 8.7722040162562e-06, "loss": 0.6284, "step": 4329 }, { "epoch": 0.55, "grad_norm": 0.5669939647471582, "learning_rate": 8.768097940448194e-06, "loss": 0.6063, "step": 4330 }, { "epoch": 0.55, "grad_norm": 0.6073752115019004, "learning_rate": 8.76399207552685e-06, "loss": 0.6401, "step": 4331 }, { "epoch": 0.55, "grad_norm": 0.4970236763331936, "learning_rate": 8.759886422195054e-06, "loss": 0.6466, "step": 4332 }, { "epoch": 0.55, "grad_norm": 0.4845069931542619, "learning_rate": 8.755780981155631e-06, "loss": 0.5861, "step": 4333 }, { "epoch": 0.55, "grad_norm": 0.48053371684695684, "learning_rate": 8.75167575311139e-06, "loss": 0.593, "step": 4334 }, { "epoch": 0.55, "grad_norm": 0.49728531759695255, "learning_rate": 8.747570738765093e-06, "loss": 0.6117, "step": 4335 }, { "epoch": 0.55, "grad_norm": 0.5419409056071787, "learning_rate": 8.74346593881947e-06, "loss": 0.605, "step": 4336 }, { "epoch": 0.55, "grad_norm": 0.555016007441618, "learning_rate": 8.739361353977211e-06, "loss": 0.6262, "step": 4337 }, { "epoch": 0.55, "grad_norm": 0.49853259268850286, "learning_rate": 8.735256984940972e-06, "loss": 0.5986, "step": 4338 }, { "epoch": 0.55, "grad_norm": 0.5315868544587032, "learning_rate": 8.731152832413373e-06, "loss": 0.5968, "step": 4339 }, { "epoch": 0.55, "grad_norm": 0.4686357877391836, "learning_rate": 8.727048897096993e-06, "loss": 0.6069, "step": 4340 }, { "epoch": 0.55, "grad_norm": 0.41800734954959107, "learning_rate": 8.722945179694378e-06, "loss": 0.5617, "step": 4341 }, { "epoch": 0.55, "grad_norm": 0.5366472459640008, "learning_rate": 8.718841680908032e-06, "loss": 0.6115, "step": 4342 }, { "epoch": 0.55, "grad_norm": 0.5777373224523978, "learning_rate": 8.714738401440425e-06, "loss": 0.6314, "step": 4343 }, { "epoch": 0.55, "grad_norm": 0.4698200040312719, "learning_rate": 8.710635341993988e-06, "loss": 0.5873, "step": 4344 }, { "epoch": 0.56, "grad_norm": 0.6195032326191082, "learning_rate": 8.706532503271113e-06, "loss": 0.6467, "step": 4345 }, { "epoch": 0.56, "grad_norm": 0.48977730004720066, "learning_rate": 8.702429885974161e-06, "loss": 0.5965, "step": 4346 }, { "epoch": 0.56, "grad_norm": 0.5121899546599552, "learning_rate": 8.69832749080545e-06, "loss": 0.6238, "step": 4347 }, { "epoch": 0.56, "grad_norm": 0.5654818820220209, "learning_rate": 8.694225318467258e-06, "loss": 0.5995, "step": 4348 }, { "epoch": 0.56, "grad_norm": 0.4874426979607686, "learning_rate": 8.690123369661825e-06, "loss": 0.5891, "step": 4349 }, { "epoch": 0.56, "grad_norm": 0.4859187650863227, "learning_rate": 8.686021645091362e-06, "loss": 0.5809, "step": 4350 }, { "epoch": 0.56, "grad_norm": 0.4681950748138308, "learning_rate": 8.681920145458024e-06, "loss": 0.593, "step": 4351 }, { "epoch": 0.56, "grad_norm": 0.4806766432497025, "learning_rate": 8.677818871463942e-06, "loss": 0.6295, "step": 4352 }, { "epoch": 0.56, "grad_norm": 0.5680825670122833, "learning_rate": 8.673717823811212e-06, "loss": 0.6258, "step": 4353 }, { "epoch": 0.56, "grad_norm": 0.5932996123587657, "learning_rate": 8.669617003201875e-06, "loss": 0.6758, "step": 4354 }, { "epoch": 0.56, "grad_norm": 0.5047165482518634, "learning_rate": 8.665516410337941e-06, "loss": 0.5911, "step": 4355 }, { "epoch": 0.56, "grad_norm": 0.5675913712717333, "learning_rate": 8.661416045921388e-06, "loss": 0.6233, "step": 4356 }, { "epoch": 0.56, "grad_norm": 0.49077051585434545, "learning_rate": 8.657315910654144e-06, "loss": 0.5738, "step": 4357 }, { "epoch": 0.56, "grad_norm": 0.5092963544515128, "learning_rate": 8.653216005238103e-06, "loss": 0.6251, "step": 4358 }, { "epoch": 0.56, "grad_norm": 0.48883030696748725, "learning_rate": 8.649116330375118e-06, "loss": 0.6038, "step": 4359 }, { "epoch": 0.56, "grad_norm": 0.45181707045847785, "learning_rate": 8.645016886767009e-06, "loss": 0.5956, "step": 4360 }, { "epoch": 0.56, "grad_norm": 0.5049224520017708, "learning_rate": 8.640917675115542e-06, "loss": 0.596, "step": 4361 }, { "epoch": 0.56, "grad_norm": 0.5264455076846638, "learning_rate": 8.636818696122465e-06, "loss": 0.5679, "step": 4362 }, { "epoch": 0.56, "grad_norm": 0.5431779790125111, "learning_rate": 8.63271995048946e-06, "loss": 0.6174, "step": 4363 }, { "epoch": 0.56, "grad_norm": 0.4870530844772562, "learning_rate": 8.628621438918187e-06, "loss": 0.5877, "step": 4364 }, { "epoch": 0.56, "grad_norm": 0.4547216359487199, "learning_rate": 8.624523162110266e-06, "loss": 0.586, "step": 4365 }, { "epoch": 0.56, "grad_norm": 0.5218973004644243, "learning_rate": 8.620425120767264e-06, "loss": 0.6116, "step": 4366 }, { "epoch": 0.56, "grad_norm": 0.573835392597298, "learning_rate": 8.616327315590723e-06, "loss": 0.6157, "step": 4367 }, { "epoch": 0.56, "grad_norm": 0.5001055867755403, "learning_rate": 8.612229747282136e-06, "loss": 0.6071, "step": 4368 }, { "epoch": 0.56, "grad_norm": 0.509879851225112, "learning_rate": 8.608132416542955e-06, "loss": 0.6157, "step": 4369 }, { "epoch": 0.56, "grad_norm": 0.4782139070225606, "learning_rate": 8.604035324074594e-06, "loss": 0.5878, "step": 4370 }, { "epoch": 0.56, "grad_norm": 0.48546802857373167, "learning_rate": 8.599938470578428e-06, "loss": 0.5905, "step": 4371 }, { "epoch": 0.56, "grad_norm": 0.47356289602798507, "learning_rate": 8.595841856755785e-06, "loss": 0.5801, "step": 4372 }, { "epoch": 0.56, "grad_norm": 0.49492744291143964, "learning_rate": 8.591745483307956e-06, "loss": 0.6047, "step": 4373 }, { "epoch": 0.56, "grad_norm": 0.5907153707753011, "learning_rate": 8.587649350936188e-06, "loss": 0.6609, "step": 4374 }, { "epoch": 0.56, "grad_norm": 0.4659196899053629, "learning_rate": 8.583553460341695e-06, "loss": 0.5909, "step": 4375 }, { "epoch": 0.56, "grad_norm": 0.45225446760144766, "learning_rate": 8.579457812225642e-06, "loss": 0.5583, "step": 4376 }, { "epoch": 0.56, "grad_norm": 0.5237551089715886, "learning_rate": 8.575362407289152e-06, "loss": 0.6047, "step": 4377 }, { "epoch": 0.56, "grad_norm": 0.5026363076753309, "learning_rate": 8.571267246233308e-06, "loss": 0.5994, "step": 4378 }, { "epoch": 0.56, "grad_norm": 0.527500257545306, "learning_rate": 8.567172329759157e-06, "loss": 0.6118, "step": 4379 }, { "epoch": 0.56, "grad_norm": 0.7434918546248428, "learning_rate": 8.563077658567693e-06, "loss": 0.6172, "step": 4380 }, { "epoch": 0.56, "grad_norm": 0.5118572029940434, "learning_rate": 8.558983233359873e-06, "loss": 0.5819, "step": 4381 }, { "epoch": 0.56, "grad_norm": 0.5169489249591638, "learning_rate": 8.55488905483662e-06, "loss": 0.612, "step": 4382 }, { "epoch": 0.56, "grad_norm": 0.5067669306721058, "learning_rate": 8.550795123698805e-06, "loss": 0.6042, "step": 4383 }, { "epoch": 0.56, "grad_norm": 0.5700884210265189, "learning_rate": 8.546701440647258e-06, "loss": 0.6561, "step": 4384 }, { "epoch": 0.56, "grad_norm": 0.5208823967388203, "learning_rate": 8.542608006382765e-06, "loss": 0.6027, "step": 4385 }, { "epoch": 0.56, "grad_norm": 0.485900137828132, "learning_rate": 8.538514821606077e-06, "loss": 0.6164, "step": 4386 }, { "epoch": 0.56, "grad_norm": 0.567231289992075, "learning_rate": 8.534421887017895e-06, "loss": 0.6385, "step": 4387 }, { "epoch": 0.56, "grad_norm": 0.5141589469672557, "learning_rate": 8.530329203318878e-06, "loss": 0.6181, "step": 4388 }, { "epoch": 0.56, "grad_norm": 0.48142954491552026, "learning_rate": 8.526236771209648e-06, "loss": 0.5995, "step": 4389 }, { "epoch": 0.56, "grad_norm": 0.47588751233132137, "learning_rate": 8.522144591390778e-06, "loss": 0.5837, "step": 4390 }, { "epoch": 0.56, "grad_norm": 0.5583434226499392, "learning_rate": 8.518052664562798e-06, "loss": 0.5956, "step": 4391 }, { "epoch": 0.56, "grad_norm": 0.56103039812304, "learning_rate": 8.513960991426202e-06, "loss": 0.581, "step": 4392 }, { "epoch": 0.56, "grad_norm": 0.47345415348419595, "learning_rate": 8.509869572681427e-06, "loss": 0.5879, "step": 4393 }, { "epoch": 0.56, "grad_norm": 0.4975318801454967, "learning_rate": 8.505778409028877e-06, "loss": 0.6226, "step": 4394 }, { "epoch": 0.56, "grad_norm": 0.5549352514340007, "learning_rate": 8.501687501168907e-06, "loss": 0.6179, "step": 4395 }, { "epoch": 0.56, "grad_norm": 0.4547646370442827, "learning_rate": 8.497596849801836e-06, "loss": 0.5891, "step": 4396 }, { "epoch": 0.56, "grad_norm": 0.5646744091622363, "learning_rate": 8.49350645562793e-06, "loss": 0.639, "step": 4397 }, { "epoch": 0.56, "grad_norm": 0.505662756335317, "learning_rate": 8.489416319347417e-06, "loss": 0.6088, "step": 4398 }, { "epoch": 0.56, "grad_norm": 0.4950802814245462, "learning_rate": 8.48532644166048e-06, "loss": 0.587, "step": 4399 }, { "epoch": 0.56, "grad_norm": 0.5356536388733738, "learning_rate": 8.481236823267252e-06, "loss": 0.608, "step": 4400 }, { "epoch": 0.56, "grad_norm": 0.5423424810725038, "learning_rate": 8.477147464867828e-06, "loss": 0.6172, "step": 4401 }, { "epoch": 0.56, "grad_norm": 0.43807071748588683, "learning_rate": 8.473058367162252e-06, "loss": 0.581, "step": 4402 }, { "epoch": 0.56, "grad_norm": 0.5340194854738509, "learning_rate": 8.468969530850537e-06, "loss": 0.62, "step": 4403 }, { "epoch": 0.56, "grad_norm": 0.47508494228575343, "learning_rate": 8.464880956632639e-06, "loss": 0.5779, "step": 4404 }, { "epoch": 0.56, "grad_norm": 0.5581983448160701, "learning_rate": 8.460792645208469e-06, "loss": 0.6051, "step": 4405 }, { "epoch": 0.56, "grad_norm": 0.525615124593692, "learning_rate": 8.456704597277897e-06, "loss": 0.602, "step": 4406 }, { "epoch": 0.56, "grad_norm": 0.4660593910748604, "learning_rate": 8.45261681354075e-06, "loss": 0.5692, "step": 4407 }, { "epoch": 0.56, "grad_norm": 0.48411059160302705, "learning_rate": 8.448529294696803e-06, "loss": 0.5806, "step": 4408 }, { "epoch": 0.56, "grad_norm": 0.495143800425852, "learning_rate": 8.444442041445791e-06, "loss": 0.6142, "step": 4409 }, { "epoch": 0.56, "grad_norm": 0.5047872227843825, "learning_rate": 8.440355054487403e-06, "loss": 0.627, "step": 4410 }, { "epoch": 0.56, "grad_norm": 0.5089252273145958, "learning_rate": 8.436268334521283e-06, "loss": 0.6096, "step": 4411 }, { "epoch": 0.56, "grad_norm": 0.5464488298509209, "learning_rate": 8.432181882247026e-06, "loss": 0.6077, "step": 4412 }, { "epoch": 0.56, "grad_norm": 0.511889195681209, "learning_rate": 8.428095698364182e-06, "loss": 0.6005, "step": 4413 }, { "epoch": 0.56, "grad_norm": 0.5114729547995784, "learning_rate": 8.424009783572259e-06, "loss": 0.6003, "step": 4414 }, { "epoch": 0.56, "grad_norm": 0.4880945278370817, "learning_rate": 8.41992413857071e-06, "loss": 0.6261, "step": 4415 }, { "epoch": 0.56, "grad_norm": 0.5196311029573322, "learning_rate": 8.415838764058952e-06, "loss": 0.6097, "step": 4416 }, { "epoch": 0.56, "grad_norm": 0.6103948503247816, "learning_rate": 8.411753660736353e-06, "loss": 0.6093, "step": 4417 }, { "epoch": 0.56, "grad_norm": 0.4503392906237626, "learning_rate": 8.407668829302232e-06, "loss": 0.6116, "step": 4418 }, { "epoch": 0.56, "grad_norm": 0.45279184099138753, "learning_rate": 8.40358427045586e-06, "loss": 0.5726, "step": 4419 }, { "epoch": 0.56, "grad_norm": 0.5038859933963246, "learning_rate": 8.399499984896467e-06, "loss": 0.5854, "step": 4420 }, { "epoch": 0.56, "grad_norm": 0.5757904189583839, "learning_rate": 8.395415973323234e-06, "loss": 0.6175, "step": 4421 }, { "epoch": 0.56, "grad_norm": 0.5258321234740893, "learning_rate": 8.391332236435288e-06, "loss": 0.6027, "step": 4422 }, { "epoch": 0.57, "grad_norm": 0.5325731123096031, "learning_rate": 8.387248774931717e-06, "loss": 0.6247, "step": 4423 }, { "epoch": 0.57, "grad_norm": 0.609231031835968, "learning_rate": 8.383165589511567e-06, "loss": 0.6802, "step": 4424 }, { "epoch": 0.57, "grad_norm": 0.49870115168188, "learning_rate": 8.379082680873823e-06, "loss": 0.5659, "step": 4425 }, { "epoch": 0.57, "grad_norm": 0.5163661317738635, "learning_rate": 8.375000049717431e-06, "loss": 0.6077, "step": 4426 }, { "epoch": 0.57, "grad_norm": 0.5398923526526738, "learning_rate": 8.370917696741287e-06, "loss": 0.6091, "step": 4427 }, { "epoch": 0.57, "grad_norm": 0.4507318865115666, "learning_rate": 8.366835622644243e-06, "loss": 0.5716, "step": 4428 }, { "epoch": 0.57, "grad_norm": 0.555472654608384, "learning_rate": 8.362753828125099e-06, "loss": 0.5716, "step": 4429 }, { "epoch": 0.57, "grad_norm": 0.6079822071865805, "learning_rate": 8.358672313882609e-06, "loss": 0.636, "step": 4430 }, { "epoch": 0.57, "grad_norm": 0.5032572957543954, "learning_rate": 8.354591080615471e-06, "loss": 0.5842, "step": 4431 }, { "epoch": 0.57, "grad_norm": 0.5829815866549233, "learning_rate": 8.350510129022355e-06, "loss": 0.6781, "step": 4432 }, { "epoch": 0.57, "grad_norm": 0.48110685126700437, "learning_rate": 8.346429459801867e-06, "loss": 0.6106, "step": 4433 }, { "epoch": 0.57, "grad_norm": 0.5203349292204312, "learning_rate": 8.342349073652563e-06, "loss": 0.6058, "step": 4434 }, { "epoch": 0.57, "grad_norm": 0.4507056873184274, "learning_rate": 8.338268971272959e-06, "loss": 0.5962, "step": 4435 }, { "epoch": 0.57, "grad_norm": 0.5055779840939174, "learning_rate": 8.334189153361518e-06, "loss": 0.5804, "step": 4436 }, { "epoch": 0.57, "grad_norm": 0.542527432342381, "learning_rate": 8.330109620616656e-06, "loss": 0.6049, "step": 4437 }, { "epoch": 0.57, "grad_norm": 0.5176536572580108, "learning_rate": 8.326030373736737e-06, "loss": 0.5992, "step": 4438 }, { "epoch": 0.57, "grad_norm": 0.4591455091799038, "learning_rate": 8.321951413420082e-06, "loss": 0.5908, "step": 4439 }, { "epoch": 0.57, "grad_norm": 0.45568451722847125, "learning_rate": 8.317872740364959e-06, "loss": 0.5738, "step": 4440 }, { "epoch": 0.57, "grad_norm": 0.45901577001474486, "learning_rate": 8.313794355269586e-06, "loss": 0.5802, "step": 4441 }, { "epoch": 0.57, "grad_norm": 0.49132240050504983, "learning_rate": 8.309716258832135e-06, "loss": 0.6049, "step": 4442 }, { "epoch": 0.57, "grad_norm": 0.510836084204218, "learning_rate": 8.305638451750722e-06, "loss": 0.6241, "step": 4443 }, { "epoch": 0.57, "grad_norm": 0.5135728517722608, "learning_rate": 8.301560934723421e-06, "loss": 0.5859, "step": 4444 }, { "epoch": 0.57, "grad_norm": 0.4909388856311013, "learning_rate": 8.297483708448252e-06, "loss": 0.5839, "step": 4445 }, { "epoch": 0.57, "grad_norm": 0.4626174831641241, "learning_rate": 8.29340677362319e-06, "loss": 0.5835, "step": 4446 }, { "epoch": 0.57, "grad_norm": 0.4870877897717343, "learning_rate": 8.289330130946153e-06, "loss": 0.5875, "step": 4447 }, { "epoch": 0.57, "grad_norm": 0.530950326851748, "learning_rate": 8.285253781115015e-06, "loss": 0.5875, "step": 4448 }, { "epoch": 0.57, "grad_norm": 0.54870295114927, "learning_rate": 8.281177724827597e-06, "loss": 0.6022, "step": 4449 }, { "epoch": 0.57, "grad_norm": 0.5782268388814362, "learning_rate": 8.27710196278167e-06, "loss": 0.6322, "step": 4450 }, { "epoch": 0.57, "grad_norm": 0.5492644235355738, "learning_rate": 8.273026495674958e-06, "loss": 0.6278, "step": 4451 }, { "epoch": 0.57, "grad_norm": 0.4862816851131577, "learning_rate": 8.268951324205123e-06, "loss": 0.5801, "step": 4452 }, { "epoch": 0.57, "grad_norm": 0.5133449305361452, "learning_rate": 8.264876449069791e-06, "loss": 0.6168, "step": 4453 }, { "epoch": 0.57, "grad_norm": 0.45311562075199446, "learning_rate": 8.260801870966536e-06, "loss": 0.5741, "step": 4454 }, { "epoch": 0.57, "grad_norm": 0.5221845444042308, "learning_rate": 8.256727590592868e-06, "loss": 0.648, "step": 4455 }, { "epoch": 0.57, "grad_norm": 0.5433495783300762, "learning_rate": 8.252653608646256e-06, "loss": 0.5946, "step": 4456 }, { "epoch": 0.57, "grad_norm": 0.5739507735368123, "learning_rate": 8.248579925824116e-06, "loss": 0.6641, "step": 4457 }, { "epoch": 0.57, "grad_norm": 0.5199532027645943, "learning_rate": 8.244506542823817e-06, "loss": 0.6051, "step": 4458 }, { "epoch": 0.57, "grad_norm": 0.5638097210334493, "learning_rate": 8.240433460342668e-06, "loss": 0.6288, "step": 4459 }, { "epoch": 0.57, "grad_norm": 0.47502658127412317, "learning_rate": 8.236360679077932e-06, "loss": 0.5741, "step": 4460 }, { "epoch": 0.57, "grad_norm": 0.5543915206154404, "learning_rate": 8.232288199726823e-06, "loss": 0.6127, "step": 4461 }, { "epoch": 0.57, "grad_norm": 0.4639116093114038, "learning_rate": 8.228216022986498e-06, "loss": 0.6025, "step": 4462 }, { "epoch": 0.57, "grad_norm": 0.5186287902911781, "learning_rate": 8.224144149554067e-06, "loss": 0.5866, "step": 4463 }, { "epoch": 0.57, "grad_norm": 0.6023960326268141, "learning_rate": 8.220072580126578e-06, "loss": 0.673, "step": 4464 }, { "epoch": 0.57, "grad_norm": 0.46307718521498914, "learning_rate": 8.21600131540104e-06, "loss": 0.5883, "step": 4465 }, { "epoch": 0.57, "grad_norm": 0.5599568457255583, "learning_rate": 8.2119303560744e-06, "loss": 0.63, "step": 4466 }, { "epoch": 0.57, "grad_norm": 0.7212174998781661, "learning_rate": 8.207859702843561e-06, "loss": 0.6783, "step": 4467 }, { "epoch": 0.57, "grad_norm": 0.5263342631880759, "learning_rate": 8.203789356405368e-06, "loss": 0.6211, "step": 4468 }, { "epoch": 0.57, "grad_norm": 0.5877936952245818, "learning_rate": 8.199719317456614e-06, "loss": 0.6452, "step": 4469 }, { "epoch": 0.57, "grad_norm": 0.5315102013956702, "learning_rate": 8.195649586694043e-06, "loss": 0.6242, "step": 4470 }, { "epoch": 0.57, "grad_norm": 0.5602239607415878, "learning_rate": 8.19158016481434e-06, "loss": 0.6227, "step": 4471 }, { "epoch": 0.57, "grad_norm": 0.5540902368507764, "learning_rate": 8.187511052514143e-06, "loss": 0.5759, "step": 4472 }, { "epoch": 0.57, "grad_norm": 0.543598135100832, "learning_rate": 8.183442250490028e-06, "loss": 0.6202, "step": 4473 }, { "epoch": 0.57, "grad_norm": 0.5490099417810068, "learning_rate": 8.179373759438534e-06, "loss": 0.6461, "step": 4474 }, { "epoch": 0.57, "grad_norm": 0.5133510237584017, "learning_rate": 8.175305580056134e-06, "loss": 0.5983, "step": 4475 }, { "epoch": 0.57, "grad_norm": 0.5673689906127625, "learning_rate": 8.171237713039249e-06, "loss": 0.6436, "step": 4476 }, { "epoch": 0.57, "grad_norm": 0.44495995272933114, "learning_rate": 8.16717015908425e-06, "loss": 0.5662, "step": 4477 }, { "epoch": 0.57, "grad_norm": 0.5392833815191391, "learning_rate": 8.163102918887452e-06, "loss": 0.5949, "step": 4478 }, { "epoch": 0.57, "grad_norm": 0.5182921191753037, "learning_rate": 8.159035993145118e-06, "loss": 0.5804, "step": 4479 }, { "epoch": 0.57, "grad_norm": 0.5322329143018716, "learning_rate": 8.154969382553453e-06, "loss": 0.6264, "step": 4480 }, { "epoch": 0.57, "grad_norm": 0.5840692597685151, "learning_rate": 8.150903087808614e-06, "loss": 0.6018, "step": 4481 }, { "epoch": 0.57, "grad_norm": 0.4863675183813366, "learning_rate": 8.146837109606705e-06, "loss": 0.6078, "step": 4482 }, { "epoch": 0.57, "grad_norm": 0.5452232959943618, "learning_rate": 8.142771448643765e-06, "loss": 0.6286, "step": 4483 }, { "epoch": 0.57, "grad_norm": 0.6134721647524972, "learning_rate": 8.138706105615793e-06, "loss": 0.643, "step": 4484 }, { "epoch": 0.57, "grad_norm": 0.5179236051061304, "learning_rate": 8.134641081218719e-06, "loss": 0.6213, "step": 4485 }, { "epoch": 0.57, "grad_norm": 0.5166766588768718, "learning_rate": 8.13057637614843e-06, "loss": 0.6121, "step": 4486 }, { "epoch": 0.57, "grad_norm": 0.5786618567652134, "learning_rate": 8.126511991100752e-06, "loss": 0.6707, "step": 4487 }, { "epoch": 0.57, "grad_norm": 0.4971725776269134, "learning_rate": 8.12244792677146e-06, "loss": 0.6005, "step": 4488 }, { "epoch": 0.57, "grad_norm": 0.5043231093247795, "learning_rate": 8.118384183856273e-06, "loss": 0.577, "step": 4489 }, { "epoch": 0.57, "grad_norm": 0.4726297601131171, "learning_rate": 8.114320763050853e-06, "loss": 0.6192, "step": 4490 }, { "epoch": 0.57, "grad_norm": 0.5147900120138423, "learning_rate": 8.11025766505081e-06, "loss": 0.6069, "step": 4491 }, { "epoch": 0.57, "grad_norm": 0.41673679416358167, "learning_rate": 8.106194890551695e-06, "loss": 0.5813, "step": 4492 }, { "epoch": 0.57, "grad_norm": 0.4827777501626188, "learning_rate": 8.102132440249009e-06, "loss": 0.5862, "step": 4493 }, { "epoch": 0.57, "grad_norm": 0.5355932742240624, "learning_rate": 8.098070314838188e-06, "loss": 0.619, "step": 4494 }, { "epoch": 0.57, "grad_norm": 0.5156414115381702, "learning_rate": 8.09400851501462e-06, "loss": 0.607, "step": 4495 }, { "epoch": 0.57, "grad_norm": 0.4772904192147835, "learning_rate": 8.089947041473642e-06, "loss": 0.5807, "step": 4496 }, { "epoch": 0.57, "grad_norm": 0.5508258826352349, "learning_rate": 8.085885894910525e-06, "loss": 0.6253, "step": 4497 }, { "epoch": 0.57, "grad_norm": 0.5535405622325252, "learning_rate": 8.081825076020487e-06, "loss": 0.6153, "step": 4498 }, { "epoch": 0.57, "grad_norm": 0.4550801327110271, "learning_rate": 8.077764585498691e-06, "loss": 0.5618, "step": 4499 }, { "epoch": 0.57, "grad_norm": 0.46388606958249007, "learning_rate": 8.073704424040244e-06, "loss": 0.5727, "step": 4500 }, { "epoch": 0.57, "grad_norm": 0.593170733798684, "learning_rate": 8.069644592340196e-06, "loss": 0.6126, "step": 4501 }, { "epoch": 0.58, "grad_norm": 0.46366570812556573, "learning_rate": 8.065585091093542e-06, "loss": 0.5972, "step": 4502 }, { "epoch": 0.58, "grad_norm": 0.505532550981573, "learning_rate": 8.061525920995218e-06, "loss": 0.6139, "step": 4503 }, { "epoch": 0.58, "grad_norm": 0.5999469512389134, "learning_rate": 8.057467082740106e-06, "loss": 0.6474, "step": 4504 }, { "epoch": 0.58, "grad_norm": 0.5994621436952001, "learning_rate": 8.053408577023032e-06, "loss": 0.656, "step": 4505 }, { "epoch": 0.58, "grad_norm": 0.4852441035772842, "learning_rate": 8.049350404538757e-06, "loss": 0.576, "step": 4506 }, { "epoch": 0.58, "grad_norm": 0.5439151405438114, "learning_rate": 8.045292565981995e-06, "loss": 0.6447, "step": 4507 }, { "epoch": 0.58, "grad_norm": 0.5104731224113029, "learning_rate": 8.041235062047397e-06, "loss": 0.5959, "step": 4508 }, { "epoch": 0.58, "grad_norm": 0.5486730413901434, "learning_rate": 8.037177893429557e-06, "loss": 0.5799, "step": 4509 }, { "epoch": 0.58, "grad_norm": 0.5141049458186041, "learning_rate": 8.033121060823015e-06, "loss": 0.6152, "step": 4510 }, { "epoch": 0.58, "grad_norm": 0.530907699940565, "learning_rate": 8.029064564922255e-06, "loss": 0.5927, "step": 4511 }, { "epoch": 0.58, "grad_norm": 0.5049534250766823, "learning_rate": 8.025008406421692e-06, "loss": 0.5838, "step": 4512 }, { "epoch": 0.58, "grad_norm": 0.5144029885919509, "learning_rate": 8.020952586015698e-06, "loss": 0.5877, "step": 4513 }, { "epoch": 0.58, "grad_norm": 0.44775684737496635, "learning_rate": 8.016897104398579e-06, "loss": 0.6008, "step": 4514 }, { "epoch": 0.58, "grad_norm": 0.5639843910773312, "learning_rate": 8.01284196226458e-06, "loss": 0.6434, "step": 4515 }, { "epoch": 0.58, "grad_norm": 0.5606679799391043, "learning_rate": 8.00878716030789e-06, "loss": 0.6112, "step": 4516 }, { "epoch": 0.58, "grad_norm": 0.5350800007398026, "learning_rate": 8.004732699222653e-06, "loss": 0.6291, "step": 4517 }, { "epoch": 0.58, "grad_norm": 0.5371784434195471, "learning_rate": 8.000678579702935e-06, "loss": 0.6068, "step": 4518 }, { "epoch": 0.58, "grad_norm": 0.5132373454488587, "learning_rate": 7.996624802442754e-06, "loss": 0.6141, "step": 4519 }, { "epoch": 0.58, "grad_norm": 0.486386993231231, "learning_rate": 7.992571368136066e-06, "loss": 0.5982, "step": 4520 }, { "epoch": 0.58, "grad_norm": 0.4829655903622703, "learning_rate": 7.988518277476773e-06, "loss": 0.6031, "step": 4521 }, { "epoch": 0.58, "grad_norm": 0.5134002021039288, "learning_rate": 7.984465531158712e-06, "loss": 0.5919, "step": 4522 }, { "epoch": 0.58, "grad_norm": 0.5270162613059396, "learning_rate": 7.980413129875661e-06, "loss": 0.5946, "step": 4523 }, { "epoch": 0.58, "grad_norm": 0.5017581612412054, "learning_rate": 7.976361074321351e-06, "loss": 0.5897, "step": 4524 }, { "epoch": 0.58, "grad_norm": 0.5022649775453656, "learning_rate": 7.972309365189437e-06, "loss": 0.5957, "step": 4525 }, { "epoch": 0.58, "grad_norm": 0.5546357389573575, "learning_rate": 7.968258003173527e-06, "loss": 0.6219, "step": 4526 }, { "epoch": 0.58, "grad_norm": 0.47337203232515773, "learning_rate": 7.964206988967162e-06, "loss": 0.5735, "step": 4527 }, { "epoch": 0.58, "grad_norm": 0.6057536529463903, "learning_rate": 7.960156323263828e-06, "loss": 0.6346, "step": 4528 }, { "epoch": 0.58, "grad_norm": 0.5363615388467059, "learning_rate": 7.956106006756946e-06, "loss": 0.6214, "step": 4529 }, { "epoch": 0.58, "grad_norm": 0.5182603982420189, "learning_rate": 7.952056040139884e-06, "loss": 0.5779, "step": 4530 }, { "epoch": 0.58, "grad_norm": 0.5047439394350651, "learning_rate": 7.948006424105948e-06, "loss": 0.6186, "step": 4531 }, { "epoch": 0.58, "grad_norm": 0.4663274147648794, "learning_rate": 7.943957159348383e-06, "loss": 0.5766, "step": 4532 }, { "epoch": 0.58, "grad_norm": 0.5084374438621435, "learning_rate": 7.939908246560371e-06, "loss": 0.6163, "step": 4533 }, { "epoch": 0.58, "grad_norm": 0.5296700527663255, "learning_rate": 7.93585968643504e-06, "loss": 0.5957, "step": 4534 }, { "epoch": 0.58, "grad_norm": 0.5080391452094031, "learning_rate": 7.931811479665455e-06, "loss": 0.5948, "step": 4535 }, { "epoch": 0.58, "grad_norm": 0.4647529523557161, "learning_rate": 7.927763626944615e-06, "loss": 0.5932, "step": 4536 }, { "epoch": 0.58, "grad_norm": 0.464262903208299, "learning_rate": 7.923716128965463e-06, "loss": 0.5968, "step": 4537 }, { "epoch": 0.58, "grad_norm": 0.5070636200188198, "learning_rate": 7.91966898642089e-06, "loss": 0.5837, "step": 4538 }, { "epoch": 0.58, "grad_norm": 0.5300009472920516, "learning_rate": 7.91562220000371e-06, "loss": 0.6183, "step": 4539 }, { "epoch": 0.58, "grad_norm": 0.5931813882943889, "learning_rate": 7.911575770406685e-06, "loss": 0.6326, "step": 4540 }, { "epoch": 0.58, "grad_norm": 0.5258118920434346, "learning_rate": 7.907529698322516e-06, "loss": 0.6264, "step": 4541 }, { "epoch": 0.58, "grad_norm": 0.4922025645170125, "learning_rate": 7.903483984443841e-06, "loss": 0.5978, "step": 4542 }, { "epoch": 0.58, "grad_norm": 0.5176339207386341, "learning_rate": 7.899438629463236e-06, "loss": 0.5869, "step": 4543 }, { "epoch": 0.58, "grad_norm": 0.5630684277024872, "learning_rate": 7.895393634073216e-06, "loss": 0.6243, "step": 4544 }, { "epoch": 0.58, "grad_norm": 0.5404814850986591, "learning_rate": 7.891348998966238e-06, "loss": 0.6321, "step": 4545 }, { "epoch": 0.58, "grad_norm": 0.5511585556420708, "learning_rate": 7.887304724834693e-06, "loss": 0.6208, "step": 4546 }, { "epoch": 0.58, "grad_norm": 0.5090587728247835, "learning_rate": 7.883260812370914e-06, "loss": 0.6075, "step": 4547 }, { "epoch": 0.58, "grad_norm": 0.4869905133219549, "learning_rate": 7.879217262267164e-06, "loss": 0.6213, "step": 4548 }, { "epoch": 0.58, "grad_norm": 0.5132855151351257, "learning_rate": 7.875174075215654e-06, "loss": 0.6103, "step": 4549 }, { "epoch": 0.58, "grad_norm": 0.5239872396684563, "learning_rate": 7.871131251908527e-06, "loss": 0.6501, "step": 4550 }, { "epoch": 0.58, "grad_norm": 0.4718651676703956, "learning_rate": 7.867088793037863e-06, "loss": 0.5822, "step": 4551 }, { "epoch": 0.58, "grad_norm": 0.4418774675066207, "learning_rate": 7.863046699295687e-06, "loss": 0.5585, "step": 4552 }, { "epoch": 0.58, "grad_norm": 0.532100109473559, "learning_rate": 7.859004971373953e-06, "loss": 0.5979, "step": 4553 }, { "epoch": 0.58, "grad_norm": 0.636110831419233, "learning_rate": 7.854963609964557e-06, "loss": 0.6138, "step": 4554 }, { "epoch": 0.58, "grad_norm": 0.505452909070726, "learning_rate": 7.85092261575933e-06, "loss": 0.6063, "step": 4555 }, { "epoch": 0.58, "grad_norm": 0.48307772552953704, "learning_rate": 7.846881989450044e-06, "loss": 0.5896, "step": 4556 }, { "epoch": 0.58, "grad_norm": 0.5517993970733757, "learning_rate": 7.842841731728401e-06, "loss": 0.5894, "step": 4557 }, { "epoch": 0.58, "grad_norm": 0.5128086574785133, "learning_rate": 7.838801843286044e-06, "loss": 0.5843, "step": 4558 }, { "epoch": 0.58, "grad_norm": 0.49136731922290283, "learning_rate": 7.834762324814553e-06, "loss": 0.6285, "step": 4559 }, { "epoch": 0.58, "grad_norm": 0.539982000885415, "learning_rate": 7.83072317700545e-06, "loss": 0.6436, "step": 4560 }, { "epoch": 0.58, "grad_norm": 0.5314266275510106, "learning_rate": 7.826684400550182e-06, "loss": 0.6047, "step": 4561 }, { "epoch": 0.58, "grad_norm": 0.545929939640043, "learning_rate": 7.822645996140141e-06, "loss": 0.6117, "step": 4562 }, { "epoch": 0.58, "grad_norm": 0.48976391626497195, "learning_rate": 7.818607964466653e-06, "loss": 0.6147, "step": 4563 }, { "epoch": 0.58, "grad_norm": 0.5317230314952827, "learning_rate": 7.81457030622098e-06, "loss": 0.6281, "step": 4564 }, { "epoch": 0.58, "grad_norm": 0.5340195389485576, "learning_rate": 7.810533022094317e-06, "loss": 0.6032, "step": 4565 }, { "epoch": 0.58, "grad_norm": 0.4921546085002043, "learning_rate": 7.806496112777796e-06, "loss": 0.5782, "step": 4566 }, { "epoch": 0.58, "grad_norm": 0.4982534473991218, "learning_rate": 7.802459578962496e-06, "loss": 0.5968, "step": 4567 }, { "epoch": 0.58, "grad_norm": 0.564051084071875, "learning_rate": 7.79842342133942e-06, "loss": 0.629, "step": 4568 }, { "epoch": 0.58, "grad_norm": 0.5058556063847334, "learning_rate": 7.794387640599506e-06, "loss": 0.5959, "step": 4569 }, { "epoch": 0.58, "grad_norm": 0.5261472786658578, "learning_rate": 7.79035223743363e-06, "loss": 0.6137, "step": 4570 }, { "epoch": 0.58, "grad_norm": 0.4881789585211204, "learning_rate": 7.786317212532603e-06, "loss": 0.5729, "step": 4571 }, { "epoch": 0.58, "grad_norm": 0.5144580124377761, "learning_rate": 7.782282566587178e-06, "loss": 0.6217, "step": 4572 }, { "epoch": 0.58, "grad_norm": 0.5496752373815705, "learning_rate": 7.77824830028803e-06, "loss": 0.6192, "step": 4573 }, { "epoch": 0.58, "grad_norm": 0.502441789669366, "learning_rate": 7.774214414325784e-06, "loss": 0.5937, "step": 4574 }, { "epoch": 0.58, "grad_norm": 0.5557618895440708, "learning_rate": 7.770180909390987e-06, "loss": 0.6255, "step": 4575 }, { "epoch": 0.58, "grad_norm": 0.5054571522319282, "learning_rate": 7.766147786174132e-06, "loss": 0.6243, "step": 4576 }, { "epoch": 0.58, "grad_norm": 0.5169297386504891, "learning_rate": 7.762115045365632e-06, "loss": 0.6145, "step": 4577 }, { "epoch": 0.58, "grad_norm": 0.49936388640607954, "learning_rate": 7.758082687655848e-06, "loss": 0.5579, "step": 4578 }, { "epoch": 0.58, "grad_norm": 0.5449325736258412, "learning_rate": 7.754050713735072e-06, "loss": 0.6114, "step": 4579 }, { "epoch": 0.59, "grad_norm": 0.4963241537137529, "learning_rate": 7.750019124293522e-06, "loss": 0.583, "step": 4580 }, { "epoch": 0.59, "grad_norm": 0.5656720071111739, "learning_rate": 7.745987920021368e-06, "loss": 0.6698, "step": 4581 }, { "epoch": 0.59, "grad_norm": 0.4654613846964812, "learning_rate": 7.741957101608696e-06, "loss": 0.5831, "step": 4582 }, { "epoch": 0.59, "grad_norm": 0.5045154073052107, "learning_rate": 7.737926669745535e-06, "loss": 0.6007, "step": 4583 }, { "epoch": 0.59, "grad_norm": 0.4690619794195035, "learning_rate": 7.733896625121845e-06, "loss": 0.5843, "step": 4584 }, { "epoch": 0.59, "grad_norm": 0.4238746127298947, "learning_rate": 7.729866968427523e-06, "loss": 0.5603, "step": 4585 }, { "epoch": 0.59, "grad_norm": 0.5221817366619218, "learning_rate": 7.725837700352395e-06, "loss": 0.6062, "step": 4586 }, { "epoch": 0.59, "grad_norm": 0.4902721277219194, "learning_rate": 7.721808821586217e-06, "loss": 0.58, "step": 4587 }, { "epoch": 0.59, "grad_norm": 0.5316904133743282, "learning_rate": 7.717780332818696e-06, "loss": 0.616, "step": 4588 }, { "epoch": 0.59, "grad_norm": 0.49546103943279507, "learning_rate": 7.713752234739456e-06, "loss": 0.5849, "step": 4589 }, { "epoch": 0.59, "grad_norm": 0.4407187531015587, "learning_rate": 7.709724528038056e-06, "loss": 0.5827, "step": 4590 }, { "epoch": 0.59, "grad_norm": 0.5378420120729123, "learning_rate": 7.705697213403991e-06, "loss": 0.6255, "step": 4591 }, { "epoch": 0.59, "grad_norm": 0.5200909244498518, "learning_rate": 7.701670291526691e-06, "loss": 0.5974, "step": 4592 }, { "epoch": 0.59, "grad_norm": 0.6035615404415796, "learning_rate": 7.697643763095513e-06, "loss": 0.6215, "step": 4593 }, { "epoch": 0.59, "grad_norm": 0.45111235777421116, "learning_rate": 7.693617628799749e-06, "loss": 0.5807, "step": 4594 }, { "epoch": 0.59, "grad_norm": 0.5119454169719306, "learning_rate": 7.689591889328626e-06, "loss": 0.6318, "step": 4595 }, { "epoch": 0.59, "grad_norm": 0.4674911905010613, "learning_rate": 7.685566545371304e-06, "loss": 0.6087, "step": 4596 }, { "epoch": 0.59, "grad_norm": 0.5360586572914812, "learning_rate": 7.681541597616872e-06, "loss": 0.6156, "step": 4597 }, { "epoch": 0.59, "grad_norm": 0.5489824047408657, "learning_rate": 7.67751704675435e-06, "loss": 0.6295, "step": 4598 }, { "epoch": 0.59, "grad_norm": 0.49951719870894135, "learning_rate": 7.67349289347269e-06, "loss": 0.5843, "step": 4599 }, { "epoch": 0.59, "grad_norm": 0.510727054117526, "learning_rate": 7.669469138460785e-06, "loss": 0.6143, "step": 4600 }, { "epoch": 0.59, "grad_norm": 0.4973480846421822, "learning_rate": 7.665445782407446e-06, "loss": 0.5608, "step": 4601 }, { "epoch": 0.59, "grad_norm": 0.451486661004134, "learning_rate": 7.661422826001429e-06, "loss": 0.5855, "step": 4602 }, { "epoch": 0.59, "grad_norm": 0.5785070489220203, "learning_rate": 7.657400269931412e-06, "loss": 0.6722, "step": 4603 }, { "epoch": 0.59, "grad_norm": 0.4519385785350109, "learning_rate": 7.653378114886006e-06, "loss": 0.6124, "step": 4604 }, { "epoch": 0.59, "grad_norm": 0.4945406161460703, "learning_rate": 7.649356361553758e-06, "loss": 0.5985, "step": 4605 }, { "epoch": 0.59, "grad_norm": 0.49437869696635733, "learning_rate": 7.645335010623145e-06, "loss": 0.5937, "step": 4606 }, { "epoch": 0.59, "grad_norm": 0.5604518574189234, "learning_rate": 7.641314062782569e-06, "loss": 0.6639, "step": 4607 }, { "epoch": 0.59, "grad_norm": 0.5604726082503746, "learning_rate": 7.637293518720363e-06, "loss": 0.6336, "step": 4608 }, { "epoch": 0.59, "grad_norm": 0.5194216504749707, "learning_rate": 7.63327337912481e-06, "loss": 0.6022, "step": 4609 }, { "epoch": 0.59, "grad_norm": 0.5198358328322651, "learning_rate": 7.6292536446840985e-06, "loss": 0.6058, "step": 4610 }, { "epoch": 0.59, "grad_norm": 0.5291538823811384, "learning_rate": 7.625234316086358e-06, "loss": 0.5934, "step": 4611 }, { "epoch": 0.59, "grad_norm": 0.5147468539433743, "learning_rate": 7.621215394019653e-06, "loss": 0.5886, "step": 4612 }, { "epoch": 0.59, "grad_norm": 0.4793645508348034, "learning_rate": 7.6171968791719705e-06, "loss": 0.6021, "step": 4613 }, { "epoch": 0.59, "grad_norm": 0.43631671825267887, "learning_rate": 7.613178772231235e-06, "loss": 0.5607, "step": 4614 }, { "epoch": 0.59, "grad_norm": 0.5097127446210753, "learning_rate": 7.6091610738852965e-06, "loss": 0.6109, "step": 4615 }, { "epoch": 0.59, "grad_norm": 0.5333791732486464, "learning_rate": 7.605143784821929e-06, "loss": 0.618, "step": 4616 }, { "epoch": 0.59, "grad_norm": 0.4845834050975592, "learning_rate": 7.601126905728854e-06, "loss": 0.6007, "step": 4617 }, { "epoch": 0.59, "grad_norm": 0.5080318850033643, "learning_rate": 7.597110437293711e-06, "loss": 0.6071, "step": 4618 }, { "epoch": 0.59, "grad_norm": 0.5127964606982419, "learning_rate": 7.593094380204064e-06, "loss": 0.5759, "step": 4619 }, { "epoch": 0.59, "grad_norm": 0.4491478085196052, "learning_rate": 7.58907873514742e-06, "loss": 0.5699, "step": 4620 }, { "epoch": 0.59, "grad_norm": 0.524226739437036, "learning_rate": 7.585063502811203e-06, "loss": 0.6308, "step": 4621 }, { "epoch": 0.59, "grad_norm": 0.5373989837790003, "learning_rate": 7.581048683882776e-06, "loss": 0.6517, "step": 4622 }, { "epoch": 0.59, "grad_norm": 0.5462569857368288, "learning_rate": 7.577034279049422e-06, "loss": 0.62, "step": 4623 }, { "epoch": 0.59, "grad_norm": 0.5083888427848562, "learning_rate": 7.5730202889983665e-06, "loss": 0.5656, "step": 4624 }, { "epoch": 0.59, "grad_norm": 0.5042701448002815, "learning_rate": 7.56900671441675e-06, "loss": 0.6162, "step": 4625 }, { "epoch": 0.59, "grad_norm": 0.45096760270324515, "learning_rate": 7.564993555991651e-06, "loss": 0.6052, "step": 4626 }, { "epoch": 0.59, "grad_norm": 0.5288585114226738, "learning_rate": 7.5609808144100726e-06, "loss": 0.6134, "step": 4627 }, { "epoch": 0.59, "grad_norm": 0.4969992139671953, "learning_rate": 7.556968490358944e-06, "loss": 0.6257, "step": 4628 }, { "epoch": 0.59, "grad_norm": 0.5498329043083008, "learning_rate": 7.5529565845251295e-06, "loss": 0.6424, "step": 4629 }, { "epoch": 0.59, "grad_norm": 0.5362075016228839, "learning_rate": 7.548945097595416e-06, "loss": 0.5921, "step": 4630 }, { "epoch": 0.59, "grad_norm": 0.5131519058339638, "learning_rate": 7.544934030256525e-06, "loss": 0.5978, "step": 4631 }, { "epoch": 0.59, "grad_norm": 0.5586481617754662, "learning_rate": 7.540923383195101e-06, "loss": 0.6063, "step": 4632 }, { "epoch": 0.59, "grad_norm": 0.47256267055869516, "learning_rate": 7.536913157097718e-06, "loss": 0.5938, "step": 4633 }, { "epoch": 0.59, "grad_norm": 0.7087480624691405, "learning_rate": 7.532903352650878e-06, "loss": 0.6451, "step": 4634 }, { "epoch": 0.59, "grad_norm": 0.4960268536922231, "learning_rate": 7.52889397054101e-06, "loss": 0.6204, "step": 4635 }, { "epoch": 0.59, "grad_norm": 0.5571646470734901, "learning_rate": 7.524885011454474e-06, "loss": 0.5952, "step": 4636 }, { "epoch": 0.59, "grad_norm": 0.5528299252786337, "learning_rate": 7.520876476077548e-06, "loss": 0.6007, "step": 4637 }, { "epoch": 0.59, "grad_norm": 0.5492586403541567, "learning_rate": 7.516868365096454e-06, "loss": 0.5771, "step": 4638 }, { "epoch": 0.59, "grad_norm": 0.5063550296066464, "learning_rate": 7.512860679197329e-06, "loss": 0.6068, "step": 4639 }, { "epoch": 0.59, "grad_norm": 0.5064852549282524, "learning_rate": 7.508853419066238e-06, "loss": 0.6209, "step": 4640 }, { "epoch": 0.59, "grad_norm": 0.4682487292379895, "learning_rate": 7.504846585389175e-06, "loss": 0.5993, "step": 4641 }, { "epoch": 0.59, "grad_norm": 0.48706310540997144, "learning_rate": 7.500840178852063e-06, "loss": 0.6247, "step": 4642 }, { "epoch": 0.59, "grad_norm": 0.7407957558630296, "learning_rate": 7.4968342001407476e-06, "loss": 0.655, "step": 4643 }, { "epoch": 0.59, "grad_norm": 0.48978739494640194, "learning_rate": 7.492828649941007e-06, "loss": 0.6133, "step": 4644 }, { "epoch": 0.59, "grad_norm": 0.502318625707548, "learning_rate": 7.4888235289385405e-06, "loss": 0.6159, "step": 4645 }, { "epoch": 0.59, "grad_norm": 0.48919403522303184, "learning_rate": 7.484818837818977e-06, "loss": 0.5761, "step": 4646 }, { "epoch": 0.59, "grad_norm": 0.4738256560812014, "learning_rate": 7.480814577267872e-06, "loss": 0.5807, "step": 4647 }, { "epoch": 0.59, "grad_norm": 0.6310554425762941, "learning_rate": 7.476810747970707e-06, "loss": 0.5903, "step": 4648 }, { "epoch": 0.59, "grad_norm": 0.5066328596323458, "learning_rate": 7.472807350612885e-06, "loss": 0.5993, "step": 4649 }, { "epoch": 0.59, "grad_norm": 0.48104785274257295, "learning_rate": 7.468804385879741e-06, "loss": 0.6077, "step": 4650 }, { "epoch": 0.59, "grad_norm": 0.5095465892877309, "learning_rate": 7.4648018544565315e-06, "loss": 0.5941, "step": 4651 }, { "epoch": 0.59, "grad_norm": 0.7999597969729281, "learning_rate": 7.460799757028447e-06, "loss": 0.607, "step": 4652 }, { "epoch": 0.59, "grad_norm": 0.5905368324742187, "learning_rate": 7.456798094280593e-06, "loss": 0.6655, "step": 4653 }, { "epoch": 0.59, "grad_norm": 0.4533965484484161, "learning_rate": 7.4527968668980085e-06, "loss": 0.5964, "step": 4654 }, { "epoch": 0.59, "grad_norm": 0.517938848788023, "learning_rate": 7.448796075565654e-06, "loss": 0.6161, "step": 4655 }, { "epoch": 0.59, "grad_norm": 0.49723794608878835, "learning_rate": 7.444795720968416e-06, "loss": 0.6278, "step": 4656 }, { "epoch": 0.59, "grad_norm": 0.5685934589957654, "learning_rate": 7.440795803791108e-06, "loss": 0.674, "step": 4657 }, { "epoch": 0.6, "grad_norm": 0.5229577983159852, "learning_rate": 7.4367963247184604e-06, "loss": 0.6191, "step": 4658 }, { "epoch": 0.6, "grad_norm": 0.5045155754000569, "learning_rate": 7.432797284435145e-06, "loss": 0.6169, "step": 4659 }, { "epoch": 0.6, "grad_norm": 0.4579278344861845, "learning_rate": 7.428798683625747e-06, "loss": 0.5771, "step": 4660 }, { "epoch": 0.6, "grad_norm": 0.40878422733722763, "learning_rate": 7.4248005229747744e-06, "loss": 0.5718, "step": 4661 }, { "epoch": 0.6, "grad_norm": 0.5068959972276589, "learning_rate": 7.420802803166664e-06, "loss": 0.621, "step": 4662 }, { "epoch": 0.6, "grad_norm": 0.4715391058876133, "learning_rate": 7.416805524885782e-06, "loss": 0.6398, "step": 4663 }, { "epoch": 0.6, "grad_norm": 0.573934204951864, "learning_rate": 7.4128086888164065e-06, "loss": 0.6435, "step": 4664 }, { "epoch": 0.6, "grad_norm": 0.4827377808639548, "learning_rate": 7.408812295642751e-06, "loss": 0.6019, "step": 4665 }, { "epoch": 0.6, "grad_norm": 0.5515125801669524, "learning_rate": 7.4048163460489505e-06, "loss": 0.5893, "step": 4666 }, { "epoch": 0.6, "grad_norm": 0.4709896345807336, "learning_rate": 7.400820840719063e-06, "loss": 0.6028, "step": 4667 }, { "epoch": 0.6, "grad_norm": 0.5853829529636567, "learning_rate": 7.396825780337069e-06, "loss": 0.6046, "step": 4668 }, { "epoch": 0.6, "grad_norm": 0.5039097391295001, "learning_rate": 7.392831165586877e-06, "loss": 0.6201, "step": 4669 }, { "epoch": 0.6, "grad_norm": 0.4933981300949997, "learning_rate": 7.388836997152312e-06, "loss": 0.6017, "step": 4670 }, { "epoch": 0.6, "grad_norm": 0.5580733012662825, "learning_rate": 7.38484327571713e-06, "loss": 0.604, "step": 4671 }, { "epoch": 0.6, "grad_norm": 0.4605103537368148, "learning_rate": 7.3808500019650044e-06, "loss": 0.5755, "step": 4672 }, { "epoch": 0.6, "grad_norm": 0.5800054614252934, "learning_rate": 7.376857176579539e-06, "loss": 0.6639, "step": 4673 }, { "epoch": 0.6, "grad_norm": 0.5010232948952906, "learning_rate": 7.372864800244257e-06, "loss": 0.6047, "step": 4674 }, { "epoch": 0.6, "grad_norm": 0.49052224297383024, "learning_rate": 7.368872873642603e-06, "loss": 0.6011, "step": 4675 }, { "epoch": 0.6, "grad_norm": 0.7513033901069746, "learning_rate": 7.364881397457948e-06, "loss": 0.5921, "step": 4676 }, { "epoch": 0.6, "grad_norm": 0.5083157901586188, "learning_rate": 7.360890372373582e-06, "loss": 0.5897, "step": 4677 }, { "epoch": 0.6, "grad_norm": 0.5481137568344346, "learning_rate": 7.356899799072723e-06, "loss": 0.6418, "step": 4678 }, { "epoch": 0.6, "grad_norm": 0.5750386681879931, "learning_rate": 7.352909678238504e-06, "loss": 0.6516, "step": 4679 }, { "epoch": 0.6, "grad_norm": 0.6017272143891125, "learning_rate": 7.3489200105539845e-06, "loss": 0.6554, "step": 4680 }, { "epoch": 0.6, "grad_norm": 0.5715838566535782, "learning_rate": 7.3449307967021565e-06, "loss": 0.6349, "step": 4681 }, { "epoch": 0.6, "grad_norm": 0.553596183463174, "learning_rate": 7.340942037365918e-06, "loss": 0.6035, "step": 4682 }, { "epoch": 0.6, "grad_norm": 0.46214002563554746, "learning_rate": 7.336953733228096e-06, "loss": 0.583, "step": 4683 }, { "epoch": 0.6, "grad_norm": 0.4923384001126493, "learning_rate": 7.332965884971441e-06, "loss": 0.6046, "step": 4684 }, { "epoch": 0.6, "grad_norm": 0.5577243636679424, "learning_rate": 7.328978493278624e-06, "loss": 0.65, "step": 4685 }, { "epoch": 0.6, "grad_norm": 0.5306651259676431, "learning_rate": 7.32499155883224e-06, "loss": 0.6102, "step": 4686 }, { "epoch": 0.6, "grad_norm": 0.5917516504017845, "learning_rate": 7.321005082314798e-06, "loss": 0.6534, "step": 4687 }, { "epoch": 0.6, "grad_norm": 0.5382057287149424, "learning_rate": 7.317019064408739e-06, "loss": 0.6243, "step": 4688 }, { "epoch": 0.6, "grad_norm": 0.4453469115119303, "learning_rate": 7.313033505796422e-06, "loss": 0.5478, "step": 4689 }, { "epoch": 0.6, "grad_norm": 0.4840816878108133, "learning_rate": 7.309048407160126e-06, "loss": 0.5995, "step": 4690 }, { "epoch": 0.6, "grad_norm": 0.6189576028353194, "learning_rate": 7.305063769182047e-06, "loss": 0.6509, "step": 4691 }, { "epoch": 0.6, "grad_norm": 0.4650338735253515, "learning_rate": 7.301079592544309e-06, "loss": 0.5993, "step": 4692 }, { "epoch": 0.6, "grad_norm": 0.4555992496547345, "learning_rate": 7.297095877928955e-06, "loss": 0.5714, "step": 4693 }, { "epoch": 0.6, "grad_norm": 0.5169780135863352, "learning_rate": 7.2931126260179465e-06, "loss": 0.6057, "step": 4694 }, { "epoch": 0.6, "grad_norm": 0.5082649149192717, "learning_rate": 7.289129837493172e-06, "loss": 0.6166, "step": 4695 }, { "epoch": 0.6, "grad_norm": 0.4893227260720826, "learning_rate": 7.285147513036435e-06, "loss": 0.6107, "step": 4696 }, { "epoch": 0.6, "grad_norm": 0.5074399560218152, "learning_rate": 7.281165653329459e-06, "loss": 0.602, "step": 4697 }, { "epoch": 0.6, "grad_norm": 0.5310880774636899, "learning_rate": 7.277184259053892e-06, "loss": 0.6324, "step": 4698 }, { "epoch": 0.6, "grad_norm": 0.5511796489196881, "learning_rate": 7.273203330891303e-06, "loss": 0.602, "step": 4699 }, { "epoch": 0.6, "grad_norm": 0.47647884244749195, "learning_rate": 7.269222869523171e-06, "loss": 0.5644, "step": 4700 }, { "epoch": 0.6, "grad_norm": 0.5328720814139271, "learning_rate": 7.265242875630903e-06, "loss": 0.6174, "step": 4701 }, { "epoch": 0.6, "grad_norm": 0.49322609875939855, "learning_rate": 7.2612633498958375e-06, "loss": 0.6152, "step": 4702 }, { "epoch": 0.6, "grad_norm": 0.5139319319725132, "learning_rate": 7.257284292999209e-06, "loss": 0.6112, "step": 4703 }, { "epoch": 0.6, "grad_norm": 0.4403385389601252, "learning_rate": 7.253305705622189e-06, "loss": 0.6011, "step": 4704 }, { "epoch": 0.6, "grad_norm": 0.48044907901749184, "learning_rate": 7.24932758844586e-06, "loss": 0.6278, "step": 4705 }, { "epoch": 0.6, "grad_norm": 0.4569614672299521, "learning_rate": 7.245349942151231e-06, "loss": 0.5984, "step": 4706 }, { "epoch": 0.6, "grad_norm": 0.5455984386793257, "learning_rate": 7.2413727674192235e-06, "loss": 0.66, "step": 4707 }, { "epoch": 0.6, "grad_norm": 0.5070499917812664, "learning_rate": 7.237396064930682e-06, "loss": 0.5718, "step": 4708 }, { "epoch": 0.6, "grad_norm": 0.5716575897934727, "learning_rate": 7.233419835366373e-06, "loss": 0.5952, "step": 4709 }, { "epoch": 0.6, "grad_norm": 0.46393340369757374, "learning_rate": 7.229444079406976e-06, "loss": 0.592, "step": 4710 }, { "epoch": 0.6, "grad_norm": 0.5054220546129711, "learning_rate": 7.225468797733094e-06, "loss": 0.5867, "step": 4711 }, { "epoch": 0.6, "grad_norm": 0.47858766440103195, "learning_rate": 7.221493991025243e-06, "loss": 0.562, "step": 4712 }, { "epoch": 0.6, "grad_norm": 0.5684133623400821, "learning_rate": 7.217519659963865e-06, "loss": 0.649, "step": 4713 }, { "epoch": 0.6, "grad_norm": 0.503493816552067, "learning_rate": 7.213545805229316e-06, "loss": 0.6024, "step": 4714 }, { "epoch": 0.6, "grad_norm": 0.6043226935720002, "learning_rate": 7.20957242750187e-06, "loss": 0.6151, "step": 4715 }, { "epoch": 0.6, "grad_norm": 0.5061321581766749, "learning_rate": 7.205599527461724e-06, "loss": 0.569, "step": 4716 }, { "epoch": 0.6, "grad_norm": 0.5163424958620947, "learning_rate": 7.2016271057889885e-06, "loss": 0.5732, "step": 4717 }, { "epoch": 0.6, "grad_norm": 0.4976851173516435, "learning_rate": 7.197655163163695e-06, "loss": 0.6066, "step": 4718 }, { "epoch": 0.6, "grad_norm": 0.5605834408279056, "learning_rate": 7.193683700265792e-06, "loss": 0.6566, "step": 4719 }, { "epoch": 0.6, "grad_norm": 0.6308659707087537, "learning_rate": 7.1897127177751455e-06, "loss": 0.7005, "step": 4720 }, { "epoch": 0.6, "grad_norm": 0.5372756447987097, "learning_rate": 7.185742216371539e-06, "loss": 0.6166, "step": 4721 }, { "epoch": 0.6, "grad_norm": 0.5307755181183648, "learning_rate": 7.181772196734668e-06, "loss": 0.6332, "step": 4722 }, { "epoch": 0.6, "grad_norm": 0.581052466421118, "learning_rate": 7.177802659544164e-06, "loss": 0.6158, "step": 4723 }, { "epoch": 0.6, "grad_norm": 0.6207543243319109, "learning_rate": 7.173833605479556e-06, "loss": 0.6434, "step": 4724 }, { "epoch": 0.6, "grad_norm": 0.5097727111882434, "learning_rate": 7.169865035220298e-06, "loss": 0.5793, "step": 4725 }, { "epoch": 0.6, "grad_norm": 0.5459048021449754, "learning_rate": 7.1658969494457605e-06, "loss": 0.63, "step": 4726 }, { "epoch": 0.6, "grad_norm": 0.5487355127580664, "learning_rate": 7.161929348835236e-06, "loss": 0.6219, "step": 4727 }, { "epoch": 0.6, "grad_norm": 0.47869357069727053, "learning_rate": 7.157962234067928e-06, "loss": 0.5966, "step": 4728 }, { "epoch": 0.6, "grad_norm": 0.553653887554062, "learning_rate": 7.15399560582295e-06, "loss": 0.6544, "step": 4729 }, { "epoch": 0.6, "grad_norm": 0.49875694367760437, "learning_rate": 7.150029464779351e-06, "loss": 0.5802, "step": 4730 }, { "epoch": 0.6, "grad_norm": 0.4751694288761689, "learning_rate": 7.146063811616083e-06, "loss": 0.6155, "step": 4731 }, { "epoch": 0.6, "grad_norm": 0.48662034588028263, "learning_rate": 7.142098647012019e-06, "loss": 0.6006, "step": 4732 }, { "epoch": 0.6, "grad_norm": 0.5336352016597641, "learning_rate": 7.138133971645944e-06, "loss": 0.6067, "step": 4733 }, { "epoch": 0.6, "grad_norm": 0.5568353524394754, "learning_rate": 7.134169786196563e-06, "loss": 0.6409, "step": 4734 }, { "epoch": 0.6, "grad_norm": 0.4930839861107672, "learning_rate": 7.130206091342498e-06, "loss": 0.6191, "step": 4735 }, { "epoch": 0.61, "grad_norm": 0.4939723906423243, "learning_rate": 7.126242887762284e-06, "loss": 0.5997, "step": 4736 }, { "epoch": 0.61, "grad_norm": 0.4982058461787992, "learning_rate": 7.12228017613437e-06, "loss": 0.6203, "step": 4737 }, { "epoch": 0.61, "grad_norm": 0.46590820643369135, "learning_rate": 7.118317957137131e-06, "loss": 0.5938, "step": 4738 }, { "epoch": 0.61, "grad_norm": 0.4901962538015545, "learning_rate": 7.114356231448848e-06, "loss": 0.5866, "step": 4739 }, { "epoch": 0.61, "grad_norm": 0.5569544936292276, "learning_rate": 7.110394999747718e-06, "loss": 0.6163, "step": 4740 }, { "epoch": 0.61, "grad_norm": 0.5373117581676917, "learning_rate": 7.106434262711862e-06, "loss": 0.5686, "step": 4741 }, { "epoch": 0.61, "grad_norm": 0.5751917775177813, "learning_rate": 7.102474021019302e-06, "loss": 0.6522, "step": 4742 }, { "epoch": 0.61, "grad_norm": 0.4895152536536468, "learning_rate": 7.0985142753479875e-06, "loss": 0.6101, "step": 4743 }, { "epoch": 0.61, "grad_norm": 0.50861875154536, "learning_rate": 7.094555026375776e-06, "loss": 0.5917, "step": 4744 }, { "epoch": 0.61, "grad_norm": 0.5125878888457376, "learning_rate": 7.090596274780446e-06, "loss": 0.6057, "step": 4745 }, { "epoch": 0.61, "grad_norm": 0.5390757979972634, "learning_rate": 7.086638021239686e-06, "loss": 0.628, "step": 4746 }, { "epoch": 0.61, "grad_norm": 0.5526869454270317, "learning_rate": 7.082680266431104e-06, "loss": 0.6398, "step": 4747 }, { "epoch": 0.61, "grad_norm": 0.5182893840590472, "learning_rate": 7.078723011032217e-06, "loss": 0.6116, "step": 4748 }, { "epoch": 0.61, "grad_norm": 0.511271061775949, "learning_rate": 7.07476625572046e-06, "loss": 0.6063, "step": 4749 }, { "epoch": 0.61, "grad_norm": 0.43671384535950414, "learning_rate": 7.07081000117318e-06, "loss": 0.5841, "step": 4750 }, { "epoch": 0.61, "grad_norm": 0.4980598356022004, "learning_rate": 7.066854248067634e-06, "loss": 0.5979, "step": 4751 }, { "epoch": 0.61, "grad_norm": 0.5899537632630033, "learning_rate": 7.062898997081011e-06, "loss": 0.653, "step": 4752 }, { "epoch": 0.61, "grad_norm": 0.48715688471528334, "learning_rate": 7.058944248890398e-06, "loss": 0.5943, "step": 4753 }, { "epoch": 0.61, "grad_norm": 0.4938865861968191, "learning_rate": 7.0549900041727955e-06, "loss": 0.6176, "step": 4754 }, { "epoch": 0.61, "grad_norm": 0.493138112370289, "learning_rate": 7.0510362636051244e-06, "loss": 0.6169, "step": 4755 }, { "epoch": 0.61, "grad_norm": 0.5746585361930161, "learning_rate": 7.047083027864219e-06, "loss": 0.6393, "step": 4756 }, { "epoch": 0.61, "grad_norm": 0.5750265122710909, "learning_rate": 7.043130297626821e-06, "loss": 0.6389, "step": 4757 }, { "epoch": 0.61, "grad_norm": 0.45866723739978055, "learning_rate": 7.039178073569592e-06, "loss": 0.5729, "step": 4758 }, { "epoch": 0.61, "grad_norm": 0.4803506685895751, "learning_rate": 7.035226356369107e-06, "loss": 0.6071, "step": 4759 }, { "epoch": 0.61, "grad_norm": 0.5089628993499122, "learning_rate": 7.031275146701849e-06, "loss": 0.6095, "step": 4760 }, { "epoch": 0.61, "grad_norm": 0.4973340314647106, "learning_rate": 7.027324445244222e-06, "loss": 0.5938, "step": 4761 }, { "epoch": 0.61, "grad_norm": 0.641798525118932, "learning_rate": 7.02337425267253e-06, "loss": 0.6143, "step": 4762 }, { "epoch": 0.61, "grad_norm": 0.4781656003559285, "learning_rate": 7.019424569663003e-06, "loss": 0.58, "step": 4763 }, { "epoch": 0.61, "grad_norm": 0.5269527620915606, "learning_rate": 7.0154753968917776e-06, "loss": 0.6039, "step": 4764 }, { "epoch": 0.61, "grad_norm": 0.5453783278743295, "learning_rate": 7.0115267350349035e-06, "loss": 0.6056, "step": 4765 }, { "epoch": 0.61, "grad_norm": 0.46861160089593007, "learning_rate": 7.007578584768345e-06, "loss": 0.564, "step": 4766 }, { "epoch": 0.61, "grad_norm": 0.6261580885643948, "learning_rate": 7.003630946767979e-06, "loss": 0.6399, "step": 4767 }, { "epoch": 0.61, "grad_norm": 0.5448040983224113, "learning_rate": 6.9996838217095906e-06, "loss": 0.6365, "step": 4768 }, { "epoch": 0.61, "grad_norm": 0.5453199228269636, "learning_rate": 6.99573721026888e-06, "loss": 0.6107, "step": 4769 }, { "epoch": 0.61, "grad_norm": 0.524314513680935, "learning_rate": 6.991791113121462e-06, "loss": 0.5771, "step": 4770 }, { "epoch": 0.61, "grad_norm": 0.5851067166145371, "learning_rate": 6.987845530942855e-06, "loss": 0.63, "step": 4771 }, { "epoch": 0.61, "grad_norm": 0.5237501704506649, "learning_rate": 6.983900464408494e-06, "loss": 0.5954, "step": 4772 }, { "epoch": 0.61, "grad_norm": 0.5038522519245887, "learning_rate": 6.979955914193734e-06, "loss": 0.593, "step": 4773 }, { "epoch": 0.61, "grad_norm": 0.5497180678149995, "learning_rate": 6.976011880973834e-06, "loss": 0.6403, "step": 4774 }, { "epoch": 0.61, "grad_norm": 0.5885675185819834, "learning_rate": 6.972068365423956e-06, "loss": 0.6364, "step": 4775 }, { "epoch": 0.61, "grad_norm": 0.5425570283476285, "learning_rate": 6.968125368219189e-06, "loss": 0.5906, "step": 4776 }, { "epoch": 0.61, "grad_norm": 0.5282331187737458, "learning_rate": 6.964182890034523e-06, "loss": 0.5777, "step": 4777 }, { "epoch": 0.61, "grad_norm": 0.46373498918308653, "learning_rate": 6.9602409315448635e-06, "loss": 0.5756, "step": 4778 }, { "epoch": 0.61, "grad_norm": 0.5444610395771892, "learning_rate": 6.956299493425024e-06, "loss": 0.6054, "step": 4779 }, { "epoch": 0.61, "grad_norm": 0.559335798003747, "learning_rate": 6.952358576349733e-06, "loss": 0.6101, "step": 4780 }, { "epoch": 0.61, "grad_norm": 0.5380021573678324, "learning_rate": 6.94841818099363e-06, "loss": 0.5873, "step": 4781 }, { "epoch": 0.61, "grad_norm": 0.5342558330981406, "learning_rate": 6.94447830803126e-06, "loss": 0.6196, "step": 4782 }, { "epoch": 0.61, "grad_norm": 0.5133233631679092, "learning_rate": 6.9405389581370795e-06, "loss": 0.6291, "step": 4783 }, { "epoch": 0.61, "grad_norm": 0.5389032354453833, "learning_rate": 6.9366001319854605e-06, "loss": 0.6551, "step": 4784 }, { "epoch": 0.61, "grad_norm": 0.4940319900834703, "learning_rate": 6.93266183025068e-06, "loss": 0.5747, "step": 4785 }, { "epoch": 0.61, "grad_norm": 0.5081794367879039, "learning_rate": 6.9287240536069255e-06, "loss": 0.6094, "step": 4786 }, { "epoch": 0.61, "grad_norm": 0.4801173560263546, "learning_rate": 6.924786802728303e-06, "loss": 0.594, "step": 4787 }, { "epoch": 0.61, "grad_norm": 0.4852875531255404, "learning_rate": 6.920850078288817e-06, "loss": 0.5912, "step": 4788 }, { "epoch": 0.61, "grad_norm": 0.4884325175911815, "learning_rate": 6.916913880962388e-06, "loss": 0.6214, "step": 4789 }, { "epoch": 0.61, "grad_norm": 0.5209257248435476, "learning_rate": 6.912978211422847e-06, "loss": 0.5778, "step": 4790 }, { "epoch": 0.61, "grad_norm": 0.445668584737088, "learning_rate": 6.909043070343933e-06, "loss": 0.5853, "step": 4791 }, { "epoch": 0.61, "grad_norm": 0.6208435683932566, "learning_rate": 6.905108458399289e-06, "loss": 0.7046, "step": 4792 }, { "epoch": 0.61, "grad_norm": 0.5729484479928597, "learning_rate": 6.901174376262473e-06, "loss": 0.6481, "step": 4793 }, { "epoch": 0.61, "grad_norm": 0.5310653089924579, "learning_rate": 6.8972408246069614e-06, "loss": 0.6134, "step": 4794 }, { "epoch": 0.61, "grad_norm": 0.4556728917604472, "learning_rate": 6.893307804106122e-06, "loss": 0.5649, "step": 4795 }, { "epoch": 0.61, "grad_norm": 0.5626822933487124, "learning_rate": 6.889375315433243e-06, "loss": 0.6172, "step": 4796 }, { "epoch": 0.61, "grad_norm": 0.5315506992663377, "learning_rate": 6.8854433592615185e-06, "loss": 0.6151, "step": 4797 }, { "epoch": 0.61, "grad_norm": 0.489608285780309, "learning_rate": 6.881511936264052e-06, "loss": 0.6012, "step": 4798 }, { "epoch": 0.61, "grad_norm": 0.4915561963829631, "learning_rate": 6.8775810471138535e-06, "loss": 0.6041, "step": 4799 }, { "epoch": 0.61, "grad_norm": 0.4627625348966946, "learning_rate": 6.8736506924838475e-06, "loss": 0.566, "step": 4800 }, { "epoch": 0.61, "grad_norm": 0.485810510038775, "learning_rate": 6.869720873046853e-06, "loss": 0.6275, "step": 4801 }, { "epoch": 0.61, "grad_norm": 0.6055802207225597, "learning_rate": 6.865791589475619e-06, "loss": 0.6509, "step": 4802 }, { "epoch": 0.61, "grad_norm": 0.483061358490362, "learning_rate": 6.86186284244279e-06, "loss": 0.6147, "step": 4803 }, { "epoch": 0.61, "grad_norm": 0.4735109995401817, "learning_rate": 6.857934632620913e-06, "loss": 0.5839, "step": 4804 }, { "epoch": 0.61, "grad_norm": 0.5280247758433118, "learning_rate": 6.854006960682453e-06, "loss": 0.5988, "step": 4805 }, { "epoch": 0.61, "grad_norm": 0.48233396389473004, "learning_rate": 6.850079827299781e-06, "loss": 0.609, "step": 4806 }, { "epoch": 0.61, "grad_norm": 0.5071084730876629, "learning_rate": 6.846153233145172e-06, "loss": 0.5823, "step": 4807 }, { "epoch": 0.61, "grad_norm": 0.4735587879754463, "learning_rate": 6.842227178890812e-06, "loss": 0.59, "step": 4808 }, { "epoch": 0.61, "grad_norm": 0.5496596086478768, "learning_rate": 6.838301665208795e-06, "loss": 0.6555, "step": 4809 }, { "epoch": 0.61, "grad_norm": 0.5357400008138907, "learning_rate": 6.834376692771122e-06, "loss": 0.6102, "step": 4810 }, { "epoch": 0.61, "grad_norm": 0.4975171315703975, "learning_rate": 6.8304522622496985e-06, "loss": 0.6049, "step": 4811 }, { "epoch": 0.61, "grad_norm": 0.5364498933278617, "learning_rate": 6.826528374316343e-06, "loss": 0.6336, "step": 4812 }, { "epoch": 0.61, "grad_norm": 0.5106354568194084, "learning_rate": 6.82260502964277e-06, "loss": 0.615, "step": 4813 }, { "epoch": 0.61, "grad_norm": 0.6355594714397955, "learning_rate": 6.818682228900615e-06, "loss": 0.633, "step": 4814 }, { "epoch": 0.62, "grad_norm": 0.5146420050800544, "learning_rate": 6.81475997276141e-06, "loss": 0.5789, "step": 4815 }, { "epoch": 0.62, "grad_norm": 0.5351295439674443, "learning_rate": 6.810838261896602e-06, "loss": 0.5793, "step": 4816 }, { "epoch": 0.62, "grad_norm": 0.504702101376047, "learning_rate": 6.8069170969775355e-06, "loss": 0.622, "step": 4817 }, { "epoch": 0.62, "grad_norm": 0.5303645144556393, "learning_rate": 6.80299647867547e-06, "loss": 0.6325, "step": 4818 }, { "epoch": 0.62, "grad_norm": 0.5393932005279166, "learning_rate": 6.799076407661564e-06, "loss": 0.6013, "step": 4819 }, { "epoch": 0.62, "grad_norm": 0.45995551181273137, "learning_rate": 6.79515688460689e-06, "loss": 0.5936, "step": 4820 }, { "epoch": 0.62, "grad_norm": 0.5639140540934362, "learning_rate": 6.791237910182425e-06, "loss": 0.6341, "step": 4821 }, { "epoch": 0.62, "grad_norm": 0.5659994749961808, "learning_rate": 6.787319485059036e-06, "loss": 0.6121, "step": 4822 }, { "epoch": 0.62, "grad_norm": 0.5420538065940778, "learning_rate": 6.783401609907525e-06, "loss": 0.6202, "step": 4823 }, { "epoch": 0.62, "grad_norm": 0.5788123330578192, "learning_rate": 6.779484285398581e-06, "loss": 0.6466, "step": 4824 }, { "epoch": 0.62, "grad_norm": 0.5328758102949006, "learning_rate": 6.775567512202799e-06, "loss": 0.6373, "step": 4825 }, { "epoch": 0.62, "grad_norm": 0.5151974089315237, "learning_rate": 6.771651290990684e-06, "loss": 0.5668, "step": 4826 }, { "epoch": 0.62, "grad_norm": 0.5081707986499886, "learning_rate": 6.767735622432646e-06, "loss": 0.5882, "step": 4827 }, { "epoch": 0.62, "grad_norm": 0.5319656022220077, "learning_rate": 6.763820507199e-06, "loss": 0.6091, "step": 4828 }, { "epoch": 0.62, "grad_norm": 0.5532151681193432, "learning_rate": 6.759905945959964e-06, "loss": 0.6351, "step": 4829 }, { "epoch": 0.62, "grad_norm": 0.491848338713878, "learning_rate": 6.755991939385667e-06, "loss": 0.5849, "step": 4830 }, { "epoch": 0.62, "grad_norm": 0.5583268200198312, "learning_rate": 6.752078488146139e-06, "loss": 0.5975, "step": 4831 }, { "epoch": 0.62, "grad_norm": 0.5215836027264391, "learning_rate": 6.748165592911312e-06, "loss": 0.6053, "step": 4832 }, { "epoch": 0.62, "grad_norm": 0.5171689570154338, "learning_rate": 6.744253254351032e-06, "loss": 0.6211, "step": 4833 }, { "epoch": 0.62, "grad_norm": 0.4664214464759565, "learning_rate": 6.740341473135037e-06, "loss": 0.5981, "step": 4834 }, { "epoch": 0.62, "grad_norm": 0.5777153596598804, "learning_rate": 6.736430249932979e-06, "loss": 0.6479, "step": 4835 }, { "epoch": 0.62, "grad_norm": 0.5115316429035945, "learning_rate": 6.7325195854144106e-06, "loss": 0.5966, "step": 4836 }, { "epoch": 0.62, "grad_norm": 0.5104119074680838, "learning_rate": 6.728609480248795e-06, "loss": 0.6024, "step": 4837 }, { "epoch": 0.62, "grad_norm": 0.46140785020277, "learning_rate": 6.72469993510549e-06, "loss": 0.5697, "step": 4838 }, { "epoch": 0.62, "grad_norm": 0.5171897241329695, "learning_rate": 6.720790950653764e-06, "loss": 0.5951, "step": 4839 }, { "epoch": 0.62, "grad_norm": 0.52327901559373, "learning_rate": 6.716882527562787e-06, "loss": 0.6314, "step": 4840 }, { "epoch": 0.62, "grad_norm": 0.47093626327326343, "learning_rate": 6.712974666501635e-06, "loss": 0.5925, "step": 4841 }, { "epoch": 0.62, "grad_norm": 0.5355971868935319, "learning_rate": 6.709067368139286e-06, "loss": 0.6525, "step": 4842 }, { "epoch": 0.62, "grad_norm": 0.5030817813034787, "learning_rate": 6.705160633144616e-06, "loss": 0.5951, "step": 4843 }, { "epoch": 0.62, "grad_norm": 0.4481661446344311, "learning_rate": 6.701254462186419e-06, "loss": 0.5714, "step": 4844 }, { "epoch": 0.62, "grad_norm": 0.4731626554826207, "learning_rate": 6.697348855933383e-06, "loss": 0.5976, "step": 4845 }, { "epoch": 0.62, "grad_norm": 0.5293891817089749, "learning_rate": 6.693443815054097e-06, "loss": 0.6016, "step": 4846 }, { "epoch": 0.62, "grad_norm": 0.5230670131650427, "learning_rate": 6.689539340217057e-06, "loss": 0.6072, "step": 4847 }, { "epoch": 0.62, "grad_norm": 0.519895499267344, "learning_rate": 6.685635432090663e-06, "loss": 0.6163, "step": 4848 }, { "epoch": 0.62, "grad_norm": 0.5782972141808795, "learning_rate": 6.681732091343216e-06, "loss": 0.5929, "step": 4849 }, { "epoch": 0.62, "grad_norm": 0.5072351646560168, "learning_rate": 6.677829318642916e-06, "loss": 0.5836, "step": 4850 }, { "epoch": 0.62, "grad_norm": 0.4810295611986939, "learning_rate": 6.67392711465788e-06, "loss": 0.5972, "step": 4851 }, { "epoch": 0.62, "grad_norm": 0.4422885405975175, "learning_rate": 6.6700254800561115e-06, "loss": 0.5974, "step": 4852 }, { "epoch": 0.62, "grad_norm": 0.5130407949518693, "learning_rate": 6.666124415505523e-06, "loss": 0.5949, "step": 4853 }, { "epoch": 0.62, "grad_norm": 0.4766838807284104, "learning_rate": 6.6622239216739335e-06, "loss": 0.5989, "step": 4854 }, { "epoch": 0.62, "grad_norm": 0.49530372515949606, "learning_rate": 6.658323999229056e-06, "loss": 0.5858, "step": 4855 }, { "epoch": 0.62, "grad_norm": 0.524135186666181, "learning_rate": 6.65442464883851e-06, "loss": 0.5899, "step": 4856 }, { "epoch": 0.62, "grad_norm": 0.5264593951373452, "learning_rate": 6.650525871169816e-06, "loss": 0.5996, "step": 4857 }, { "epoch": 0.62, "grad_norm": 0.5725530889042192, "learning_rate": 6.646627666890402e-06, "loss": 0.6553, "step": 4858 }, { "epoch": 0.62, "grad_norm": 0.5330650482842021, "learning_rate": 6.64273003666759e-06, "loss": 0.5986, "step": 4859 }, { "epoch": 0.62, "grad_norm": 0.489788145174553, "learning_rate": 6.638832981168606e-06, "loss": 0.5823, "step": 4860 }, { "epoch": 0.62, "grad_norm": 0.5531173555358448, "learning_rate": 6.6349365010605814e-06, "loss": 0.626, "step": 4861 }, { "epoch": 0.62, "grad_norm": 0.4777478164756835, "learning_rate": 6.631040597010545e-06, "loss": 0.5941, "step": 4862 }, { "epoch": 0.62, "grad_norm": 0.515270539322788, "learning_rate": 6.627145269685429e-06, "loss": 0.6059, "step": 4863 }, { "epoch": 0.62, "grad_norm": 0.4637169529620515, "learning_rate": 6.623250519752065e-06, "loss": 0.6123, "step": 4864 }, { "epoch": 0.62, "grad_norm": 0.5340434303583526, "learning_rate": 6.619356347877181e-06, "loss": 0.6142, "step": 4865 }, { "epoch": 0.62, "grad_norm": 0.5406178037016245, "learning_rate": 6.615462754727425e-06, "loss": 0.5972, "step": 4866 }, { "epoch": 0.62, "grad_norm": 0.4826958697615323, "learning_rate": 6.611569740969324e-06, "loss": 0.6102, "step": 4867 }, { "epoch": 0.62, "grad_norm": 0.45834263294431615, "learning_rate": 6.607677307269316e-06, "loss": 0.6039, "step": 4868 }, { "epoch": 0.62, "grad_norm": 0.4689825282909744, "learning_rate": 6.6037854542937385e-06, "loss": 0.5813, "step": 4869 }, { "epoch": 0.62, "grad_norm": 0.5857981468468018, "learning_rate": 6.599894182708831e-06, "loss": 0.65, "step": 4870 }, { "epoch": 0.62, "grad_norm": 0.5063637146786124, "learning_rate": 6.59600349318073e-06, "loss": 0.6291, "step": 4871 }, { "epoch": 0.62, "grad_norm": 0.5834469996614158, "learning_rate": 6.592113386375472e-06, "loss": 0.616, "step": 4872 }, { "epoch": 0.62, "grad_norm": 0.53614702663955, "learning_rate": 6.5882238629590025e-06, "loss": 0.6038, "step": 4873 }, { "epoch": 0.62, "grad_norm": 0.6207476790677744, "learning_rate": 6.584334923597155e-06, "loss": 0.6266, "step": 4874 }, { "epoch": 0.62, "grad_norm": 0.4808172735663939, "learning_rate": 6.580446568955676e-06, "loss": 0.5594, "step": 4875 }, { "epoch": 0.62, "grad_norm": 0.5543772925333188, "learning_rate": 6.576558799700195e-06, "loss": 0.6654, "step": 4876 }, { "epoch": 0.62, "grad_norm": 0.5305103721090119, "learning_rate": 6.5726716164962556e-06, "loss": 0.575, "step": 4877 }, { "epoch": 0.62, "grad_norm": 0.4794023272823103, "learning_rate": 6.568785020009296e-06, "loss": 0.5885, "step": 4878 }, { "epoch": 0.62, "grad_norm": 0.5829312001941058, "learning_rate": 6.564899010904653e-06, "loss": 0.6496, "step": 4879 }, { "epoch": 0.62, "grad_norm": 0.5269675029460783, "learning_rate": 6.561013589847567e-06, "loss": 0.6281, "step": 4880 }, { "epoch": 0.62, "grad_norm": 0.49685001069023704, "learning_rate": 6.557128757503174e-06, "loss": 0.6107, "step": 4881 }, { "epoch": 0.62, "grad_norm": 0.5250579121407165, "learning_rate": 6.553244514536509e-06, "loss": 0.5858, "step": 4882 }, { "epoch": 0.62, "grad_norm": 0.49083066590020463, "learning_rate": 6.549360861612507e-06, "loss": 0.6073, "step": 4883 }, { "epoch": 0.62, "grad_norm": 0.46485586441841686, "learning_rate": 6.545477799396005e-06, "loss": 0.5962, "step": 4884 }, { "epoch": 0.62, "grad_norm": 0.5257243720306046, "learning_rate": 6.541595328551733e-06, "loss": 0.6062, "step": 4885 }, { "epoch": 0.62, "grad_norm": 0.5479677514900182, "learning_rate": 6.537713449744318e-06, "loss": 0.6003, "step": 4886 }, { "epoch": 0.62, "grad_norm": 0.4930722975061792, "learning_rate": 6.533832163638304e-06, "loss": 0.5821, "step": 4887 }, { "epoch": 0.62, "grad_norm": 0.5002138486488595, "learning_rate": 6.5299514708981095e-06, "loss": 0.6254, "step": 4888 }, { "epoch": 0.62, "grad_norm": 0.522232574765113, "learning_rate": 6.526071372188064e-06, "loss": 0.6128, "step": 4889 }, { "epoch": 0.62, "grad_norm": 0.4633755329730746, "learning_rate": 6.522191868172394e-06, "loss": 0.5663, "step": 4890 }, { "epoch": 0.62, "grad_norm": 0.4845414877318996, "learning_rate": 6.518312959515225e-06, "loss": 0.5924, "step": 4891 }, { "epoch": 0.62, "grad_norm": 0.4552052730594476, "learning_rate": 6.514434646880577e-06, "loss": 0.5969, "step": 4892 }, { "epoch": 0.63, "grad_norm": 0.5567128994679771, "learning_rate": 6.510556930932367e-06, "loss": 0.6049, "step": 4893 }, { "epoch": 0.63, "grad_norm": 0.48047981315329313, "learning_rate": 6.506679812334419e-06, "loss": 0.5787, "step": 4894 }, { "epoch": 0.63, "grad_norm": 0.4399271537721485, "learning_rate": 6.502803291750446e-06, "loss": 0.5516, "step": 4895 }, { "epoch": 0.63, "grad_norm": 0.4750925375680419, "learning_rate": 6.4989273698440635e-06, "loss": 0.5876, "step": 4896 }, { "epoch": 0.63, "grad_norm": 0.45047192197698976, "learning_rate": 6.495052047278778e-06, "loss": 0.5734, "step": 4897 }, { "epoch": 0.63, "grad_norm": 0.6423194617640172, "learning_rate": 6.491177324717999e-06, "loss": 0.63, "step": 4898 }, { "epoch": 0.63, "grad_norm": 0.48927112395665967, "learning_rate": 6.487303202825033e-06, "loss": 0.5953, "step": 4899 }, { "epoch": 0.63, "grad_norm": 0.5186120175660037, "learning_rate": 6.483429682263079e-06, "loss": 0.6018, "step": 4900 }, { "epoch": 0.63, "grad_norm": 0.6006523643269841, "learning_rate": 6.479556763695243e-06, "loss": 0.6033, "step": 4901 }, { "epoch": 0.63, "grad_norm": 0.5523277665791116, "learning_rate": 6.475684447784516e-06, "loss": 0.5868, "step": 4902 }, { "epoch": 0.63, "grad_norm": 0.5254481919382509, "learning_rate": 6.471812735193797e-06, "loss": 0.6152, "step": 4903 }, { "epoch": 0.63, "grad_norm": 0.5330736242203962, "learning_rate": 6.467941626585869e-06, "loss": 0.6372, "step": 4904 }, { "epoch": 0.63, "grad_norm": 0.6034554990250153, "learning_rate": 6.464071122623429e-06, "loss": 0.6312, "step": 4905 }, { "epoch": 0.63, "grad_norm": 0.5502286101928646, "learning_rate": 6.460201223969049e-06, "loss": 0.6098, "step": 4906 }, { "epoch": 0.63, "grad_norm": 0.48630569744798813, "learning_rate": 6.4563319312852115e-06, "loss": 0.5783, "step": 4907 }, { "epoch": 0.63, "grad_norm": 0.5359907843745245, "learning_rate": 6.4524632452342995e-06, "loss": 0.6374, "step": 4908 }, { "epoch": 0.63, "grad_norm": 0.5776164544903177, "learning_rate": 6.448595166478577e-06, "loss": 0.6087, "step": 4909 }, { "epoch": 0.63, "grad_norm": 0.44293915831674474, "learning_rate": 6.444727695680216e-06, "loss": 0.5877, "step": 4910 }, { "epoch": 0.63, "grad_norm": 0.5351469514393882, "learning_rate": 6.44086083350128e-06, "loss": 0.6085, "step": 4911 }, { "epoch": 0.63, "grad_norm": 0.5145246465666901, "learning_rate": 6.4369945806037284e-06, "loss": 0.6064, "step": 4912 }, { "epoch": 0.63, "grad_norm": 0.4713510839668913, "learning_rate": 6.433128937649417e-06, "loss": 0.5722, "step": 4913 }, { "epoch": 0.63, "grad_norm": 0.4493454193343278, "learning_rate": 6.429263905300094e-06, "loss": 0.5595, "step": 4914 }, { "epoch": 0.63, "grad_norm": 0.5161231731204933, "learning_rate": 6.425399484217409e-06, "loss": 0.6101, "step": 4915 }, { "epoch": 0.63, "grad_norm": 0.5585935821767757, "learning_rate": 6.421535675062907e-06, "loss": 0.6319, "step": 4916 }, { "epoch": 0.63, "grad_norm": 0.4997097694825686, "learning_rate": 6.417672478498021e-06, "loss": 0.5946, "step": 4917 }, { "epoch": 0.63, "grad_norm": 0.4919407970271438, "learning_rate": 6.413809895184084e-06, "loss": 0.5878, "step": 4918 }, { "epoch": 0.63, "grad_norm": 0.48181174040025787, "learning_rate": 6.4099479257823225e-06, "loss": 0.5839, "step": 4919 }, { "epoch": 0.63, "grad_norm": 0.5078375105650056, "learning_rate": 6.40608657095386e-06, "loss": 0.6089, "step": 4920 }, { "epoch": 0.63, "grad_norm": 0.5133456935554866, "learning_rate": 6.402225831359713e-06, "loss": 0.6177, "step": 4921 }, { "epoch": 0.63, "grad_norm": 0.5410850703732368, "learning_rate": 6.398365707660792e-06, "loss": 0.6165, "step": 4922 }, { "epoch": 0.63, "grad_norm": 0.48529174000384906, "learning_rate": 6.394506200517905e-06, "loss": 0.5726, "step": 4923 }, { "epoch": 0.63, "grad_norm": 0.49805393531852216, "learning_rate": 6.3906473105917535e-06, "loss": 0.5696, "step": 4924 }, { "epoch": 0.63, "grad_norm": 0.553194898213253, "learning_rate": 6.386789038542932e-06, "loss": 0.6365, "step": 4925 }, { "epoch": 0.63, "grad_norm": 0.6074184907754567, "learning_rate": 6.382931385031932e-06, "loss": 0.6265, "step": 4926 }, { "epoch": 0.63, "grad_norm": 0.49386421532332414, "learning_rate": 6.37907435071913e-06, "loss": 0.5967, "step": 4927 }, { "epoch": 0.63, "grad_norm": 0.5306684847496066, "learning_rate": 6.375217936264809e-06, "loss": 0.619, "step": 4928 }, { "epoch": 0.63, "grad_norm": 0.467807269144757, "learning_rate": 6.3713621423291365e-06, "loss": 0.5925, "step": 4929 }, { "epoch": 0.63, "grad_norm": 0.5850758645607914, "learning_rate": 6.367506969572182e-06, "loss": 0.6751, "step": 4930 }, { "epoch": 0.63, "grad_norm": 0.6428567672984529, "learning_rate": 6.363652418653903e-06, "loss": 0.6014, "step": 4931 }, { "epoch": 0.63, "grad_norm": 0.5305544466046928, "learning_rate": 6.35979849023415e-06, "loss": 0.6475, "step": 4932 }, { "epoch": 0.63, "grad_norm": 0.5161776391913794, "learning_rate": 6.355945184972671e-06, "loss": 0.6257, "step": 4933 }, { "epoch": 0.63, "grad_norm": 0.48374064242151965, "learning_rate": 6.352092503529104e-06, "loss": 0.6025, "step": 4934 }, { "epoch": 0.63, "grad_norm": 0.5163476980008402, "learning_rate": 6.348240446562981e-06, "loss": 0.5938, "step": 4935 }, { "epoch": 0.63, "grad_norm": 0.6632338874135713, "learning_rate": 6.344389014733723e-06, "loss": 0.5938, "step": 4936 }, { "epoch": 0.63, "grad_norm": 0.5460693793248539, "learning_rate": 6.340538208700655e-06, "loss": 0.6182, "step": 4937 }, { "epoch": 0.63, "grad_norm": 0.4770938634684813, "learning_rate": 6.33668802912299e-06, "loss": 0.5527, "step": 4938 }, { "epoch": 0.63, "grad_norm": 0.5381803105960928, "learning_rate": 6.332838476659825e-06, "loss": 0.5793, "step": 4939 }, { "epoch": 0.63, "grad_norm": 0.45166999675948377, "learning_rate": 6.328989551970159e-06, "loss": 0.5987, "step": 4940 }, { "epoch": 0.63, "grad_norm": 0.5890059848212487, "learning_rate": 6.325141255712883e-06, "loss": 0.639, "step": 4941 }, { "epoch": 0.63, "grad_norm": 0.4968833973716374, "learning_rate": 6.3212935885467775e-06, "loss": 0.5721, "step": 4942 }, { "epoch": 0.63, "grad_norm": 0.5283929304325362, "learning_rate": 6.317446551130512e-06, "loss": 0.659, "step": 4943 }, { "epoch": 0.63, "grad_norm": 0.5534002072243475, "learning_rate": 6.31360014412266e-06, "loss": 0.6254, "step": 4944 }, { "epoch": 0.63, "grad_norm": 0.4532476390768686, "learning_rate": 6.3097543681816765e-06, "loss": 0.5706, "step": 4945 }, { "epoch": 0.63, "grad_norm": 0.5165102178271785, "learning_rate": 6.305909223965913e-06, "loss": 0.6128, "step": 4946 }, { "epoch": 0.63, "grad_norm": 0.5118994355517409, "learning_rate": 6.3020647121336085e-06, "loss": 0.6043, "step": 4947 }, { "epoch": 0.63, "grad_norm": 0.5367944319640111, "learning_rate": 6.298220833342898e-06, "loss": 0.6038, "step": 4948 }, { "epoch": 0.63, "grad_norm": 0.49284913242954126, "learning_rate": 6.294377588251807e-06, "loss": 0.5598, "step": 4949 }, { "epoch": 0.63, "grad_norm": 0.5225056582424281, "learning_rate": 6.29053497751825e-06, "loss": 0.5838, "step": 4950 }, { "epoch": 0.63, "grad_norm": 0.5893695142019099, "learning_rate": 6.286693001800039e-06, "loss": 0.633, "step": 4951 }, { "epoch": 0.63, "grad_norm": 0.467250387973358, "learning_rate": 6.282851661754874e-06, "loss": 0.5838, "step": 4952 }, { "epoch": 0.63, "grad_norm": 0.5259144029179907, "learning_rate": 6.279010958040342e-06, "loss": 0.6511, "step": 4953 }, { "epoch": 0.63, "grad_norm": 0.4674782504447103, "learning_rate": 6.275170891313928e-06, "loss": 0.6028, "step": 4954 }, { "epoch": 0.63, "grad_norm": 0.4919988735047482, "learning_rate": 6.271331462233006e-06, "loss": 0.5931, "step": 4955 }, { "epoch": 0.63, "grad_norm": 0.5480858387873183, "learning_rate": 6.267492671454834e-06, "loss": 0.6107, "step": 4956 }, { "epoch": 0.63, "grad_norm": 0.5250191434184994, "learning_rate": 6.263654519636566e-06, "loss": 0.5708, "step": 4957 }, { "epoch": 0.63, "grad_norm": 0.538173673984267, "learning_rate": 6.259817007435254e-06, "loss": 0.5992, "step": 4958 }, { "epoch": 0.63, "grad_norm": 0.5308427353139339, "learning_rate": 6.255980135507833e-06, "loss": 0.5961, "step": 4959 }, { "epoch": 0.63, "grad_norm": 0.44181587770443326, "learning_rate": 6.252143904511123e-06, "loss": 0.595, "step": 4960 }, { "epoch": 0.63, "grad_norm": 0.5704645272158311, "learning_rate": 6.248308315101843e-06, "loss": 0.6222, "step": 4961 }, { "epoch": 0.63, "grad_norm": 0.4921316224859126, "learning_rate": 6.244473367936599e-06, "loss": 0.5938, "step": 4962 }, { "epoch": 0.63, "grad_norm": 0.5264408755762453, "learning_rate": 6.240639063671887e-06, "loss": 0.6261, "step": 4963 }, { "epoch": 0.63, "grad_norm": 0.4445605502803397, "learning_rate": 6.236805402964092e-06, "loss": 0.5708, "step": 4964 }, { "epoch": 0.63, "grad_norm": 0.5391355943855813, "learning_rate": 6.232972386469496e-06, "loss": 0.6043, "step": 4965 }, { "epoch": 0.63, "grad_norm": 0.5161316714103071, "learning_rate": 6.22914001484426e-06, "loss": 0.6049, "step": 4966 }, { "epoch": 0.63, "grad_norm": 0.510099359536809, "learning_rate": 6.22530828874444e-06, "loss": 0.6068, "step": 4967 }, { "epoch": 0.63, "grad_norm": 0.55492938945328, "learning_rate": 6.2214772088259825e-06, "loss": 0.6564, "step": 4968 }, { "epoch": 0.63, "grad_norm": 0.5485774500735225, "learning_rate": 6.217646775744719e-06, "loss": 0.6133, "step": 4969 }, { "epoch": 0.63, "grad_norm": 0.5058709801069627, "learning_rate": 6.213816990156376e-06, "loss": 0.5916, "step": 4970 }, { "epoch": 0.64, "grad_norm": 0.5575737960717533, "learning_rate": 6.209987852716561e-06, "loss": 0.5997, "step": 4971 }, { "epoch": 0.64, "grad_norm": 0.5421806921977833, "learning_rate": 6.206159364080784e-06, "loss": 0.625, "step": 4972 }, { "epoch": 0.64, "grad_norm": 0.468225761407729, "learning_rate": 6.20233152490443e-06, "loss": 0.6049, "step": 4973 }, { "epoch": 0.64, "grad_norm": 0.5211628748180563, "learning_rate": 6.198504335842781e-06, "loss": 0.5808, "step": 4974 }, { "epoch": 0.64, "grad_norm": 0.5126992293978346, "learning_rate": 6.194677797551003e-06, "loss": 0.6074, "step": 4975 }, { "epoch": 0.64, "grad_norm": 0.4519526734365714, "learning_rate": 6.190851910684157e-06, "loss": 0.5852, "step": 4976 }, { "epoch": 0.64, "grad_norm": 0.576146310501459, "learning_rate": 6.187026675897185e-06, "loss": 0.6675, "step": 4977 }, { "epoch": 0.64, "grad_norm": 0.5548679406221753, "learning_rate": 6.183202093844916e-06, "loss": 0.6262, "step": 4978 }, { "epoch": 0.64, "grad_norm": 0.5006557657563399, "learning_rate": 6.179378165182082e-06, "loss": 0.5668, "step": 4979 }, { "epoch": 0.64, "grad_norm": 0.5447686691472345, "learning_rate": 6.175554890563289e-06, "loss": 0.5763, "step": 4980 }, { "epoch": 0.64, "grad_norm": 0.4968774402970118, "learning_rate": 6.171732270643034e-06, "loss": 0.5954, "step": 4981 }, { "epoch": 0.64, "grad_norm": 0.5005677528754434, "learning_rate": 6.167910306075702e-06, "loss": 0.5919, "step": 4982 }, { "epoch": 0.64, "grad_norm": 0.4966110933106879, "learning_rate": 6.16408899751557e-06, "loss": 0.6086, "step": 4983 }, { "epoch": 0.64, "grad_norm": 0.5067054680873265, "learning_rate": 6.1602683456167975e-06, "loss": 0.5832, "step": 4984 }, { "epoch": 0.64, "grad_norm": 0.48797528271998214, "learning_rate": 6.156448351033437e-06, "loss": 0.5907, "step": 4985 }, { "epoch": 0.64, "grad_norm": 0.5720238098920332, "learning_rate": 6.152629014419417e-06, "loss": 0.6396, "step": 4986 }, { "epoch": 0.64, "grad_norm": 0.5202306428150113, "learning_rate": 6.14881033642857e-06, "loss": 0.5935, "step": 4987 }, { "epoch": 0.64, "grad_norm": 0.5246513443238731, "learning_rate": 6.1449923177146066e-06, "loss": 0.613, "step": 4988 }, { "epoch": 0.64, "grad_norm": 0.48929752823337774, "learning_rate": 6.141174958931122e-06, "loss": 0.6109, "step": 4989 }, { "epoch": 0.64, "grad_norm": 0.5073289266897231, "learning_rate": 6.137358260731601e-06, "loss": 0.6183, "step": 4990 }, { "epoch": 0.64, "grad_norm": 0.5180580920056516, "learning_rate": 6.133542223769419e-06, "loss": 0.6063, "step": 4991 }, { "epoch": 0.64, "grad_norm": 0.525680087419349, "learning_rate": 6.129726848697832e-06, "loss": 0.6059, "step": 4992 }, { "epoch": 0.64, "grad_norm": 0.5054584548005306, "learning_rate": 6.125912136169987e-06, "loss": 0.567, "step": 4993 }, { "epoch": 0.64, "grad_norm": 0.5264346381464861, "learning_rate": 6.122098086838919e-06, "loss": 0.6133, "step": 4994 }, { "epoch": 0.64, "grad_norm": 0.4956509080148353, "learning_rate": 6.118284701357544e-06, "loss": 0.5862, "step": 4995 }, { "epoch": 0.64, "grad_norm": 0.499070837793851, "learning_rate": 6.114471980378669e-06, "loss": 0.5692, "step": 4996 }, { "epoch": 0.64, "grad_norm": 0.6476111445249967, "learning_rate": 6.110659924554987e-06, "loss": 0.6088, "step": 4997 }, { "epoch": 0.64, "grad_norm": 0.5416198299573083, "learning_rate": 6.106848534539072e-06, "loss": 0.6226, "step": 4998 }, { "epoch": 0.64, "grad_norm": 0.4432648113334902, "learning_rate": 6.103037810983388e-06, "loss": 0.6008, "step": 4999 }, { "epoch": 0.64, "grad_norm": 0.4958606358415809, "learning_rate": 6.099227754540282e-06, "loss": 0.578, "step": 5000 }, { "epoch": 0.64, "grad_norm": 0.536371508171581, "learning_rate": 6.095418365861997e-06, "loss": 0.6325, "step": 5001 }, { "epoch": 0.64, "grad_norm": 0.4975576555490357, "learning_rate": 6.091609645600649e-06, "loss": 0.6064, "step": 5002 }, { "epoch": 0.64, "grad_norm": 0.5101468510011098, "learning_rate": 6.087801594408247e-06, "loss": 0.5806, "step": 5003 }, { "epoch": 0.64, "grad_norm": 0.517771268493719, "learning_rate": 6.0839942129366815e-06, "loss": 0.6157, "step": 5004 }, { "epoch": 0.64, "grad_norm": 0.5433589285477983, "learning_rate": 6.080187501837728e-06, "loss": 0.6328, "step": 5005 }, { "epoch": 0.64, "grad_norm": 0.5279576666556042, "learning_rate": 6.076381461763056e-06, "loss": 0.5963, "step": 5006 }, { "epoch": 0.64, "grad_norm": 0.5552486068293593, "learning_rate": 6.072576093364202e-06, "loss": 0.6215, "step": 5007 }, { "epoch": 0.64, "grad_norm": 0.5230844453500486, "learning_rate": 6.068771397292609e-06, "loss": 0.6219, "step": 5008 }, { "epoch": 0.64, "grad_norm": 0.5200047821456039, "learning_rate": 6.064967374199593e-06, "loss": 0.5776, "step": 5009 }, { "epoch": 0.64, "grad_norm": 0.5184022748380755, "learning_rate": 6.061164024736354e-06, "loss": 0.61, "step": 5010 }, { "epoch": 0.64, "grad_norm": 0.4540425702443932, "learning_rate": 6.05736134955398e-06, "loss": 0.5629, "step": 5011 }, { "epoch": 0.64, "grad_norm": 0.5562049153212841, "learning_rate": 6.053559349303442e-06, "loss": 0.6435, "step": 5012 }, { "epoch": 0.64, "grad_norm": 0.5391001451366789, "learning_rate": 6.049758024635598e-06, "loss": 0.6201, "step": 5013 }, { "epoch": 0.64, "grad_norm": 0.4589846719546909, "learning_rate": 6.045957376201186e-06, "loss": 0.5629, "step": 5014 }, { "epoch": 0.64, "grad_norm": 0.48926993196209106, "learning_rate": 6.042157404650833e-06, "loss": 0.5997, "step": 5015 }, { "epoch": 0.64, "grad_norm": 0.4838712095459227, "learning_rate": 6.038358110635047e-06, "loss": 0.5858, "step": 5016 }, { "epoch": 0.64, "grad_norm": 0.506145177246683, "learning_rate": 6.034559494804224e-06, "loss": 0.5903, "step": 5017 }, { "epoch": 0.64, "grad_norm": 0.4938772585028679, "learning_rate": 6.03076155780864e-06, "loss": 0.6092, "step": 5018 }, { "epoch": 0.64, "grad_norm": 0.5104392402847681, "learning_rate": 6.02696430029845e-06, "loss": 0.5978, "step": 5019 }, { "epoch": 0.64, "grad_norm": 0.4570832238111186, "learning_rate": 6.023167722923704e-06, "loss": 0.5664, "step": 5020 }, { "epoch": 0.64, "grad_norm": 0.45608716780558345, "learning_rate": 6.019371826334326e-06, "loss": 0.56, "step": 5021 }, { "epoch": 0.64, "grad_norm": 0.48930744651111835, "learning_rate": 6.015576611180131e-06, "loss": 0.5874, "step": 5022 }, { "epoch": 0.64, "grad_norm": 0.5654201783721368, "learning_rate": 6.011782078110814e-06, "loss": 0.6275, "step": 5023 }, { "epoch": 0.64, "grad_norm": 0.49100298734559666, "learning_rate": 6.0079882277759504e-06, "loss": 0.6066, "step": 5024 }, { "epoch": 0.64, "grad_norm": 0.525181564363004, "learning_rate": 6.004195060825002e-06, "loss": 0.5892, "step": 5025 }, { "epoch": 0.64, "grad_norm": 0.5305048306363294, "learning_rate": 6.0004025779073134e-06, "loss": 0.588, "step": 5026 }, { "epoch": 0.64, "grad_norm": 0.5727364717651245, "learning_rate": 5.9966107796721144e-06, "loss": 0.6214, "step": 5027 }, { "epoch": 0.64, "grad_norm": 0.47957753193513614, "learning_rate": 5.992819666768503e-06, "loss": 0.5827, "step": 5028 }, { "epoch": 0.64, "grad_norm": 0.49079385819613014, "learning_rate": 5.9890292398454876e-06, "loss": 0.6066, "step": 5029 }, { "epoch": 0.64, "grad_norm": 0.5160256104557555, "learning_rate": 5.985239499551936e-06, "loss": 0.6153, "step": 5030 }, { "epoch": 0.64, "grad_norm": 0.5431521256589258, "learning_rate": 5.981450446536605e-06, "loss": 0.6133, "step": 5031 }, { "epoch": 0.64, "grad_norm": 0.4776632853479839, "learning_rate": 5.977662081448134e-06, "loss": 0.5896, "step": 5032 }, { "epoch": 0.64, "grad_norm": 0.5641591143411115, "learning_rate": 5.973874404935047e-06, "loss": 0.628, "step": 5033 }, { "epoch": 0.64, "grad_norm": 0.4740506866875809, "learning_rate": 5.970087417645747e-06, "loss": 0.6038, "step": 5034 }, { "epoch": 0.64, "grad_norm": 0.4555269671030887, "learning_rate": 5.966301120228519e-06, "loss": 0.5756, "step": 5035 }, { "epoch": 0.64, "grad_norm": 0.5227331551387225, "learning_rate": 5.9625155133315346e-06, "loss": 0.6386, "step": 5036 }, { "epoch": 0.64, "grad_norm": 0.5509861854856534, "learning_rate": 5.958730597602842e-06, "loss": 0.6124, "step": 5037 }, { "epoch": 0.64, "grad_norm": 0.5164754628089928, "learning_rate": 5.954946373690373e-06, "loss": 0.6131, "step": 5038 }, { "epoch": 0.64, "grad_norm": 0.5816981757490536, "learning_rate": 5.951162842241942e-06, "loss": 0.6087, "step": 5039 }, { "epoch": 0.64, "grad_norm": 0.5664003196111999, "learning_rate": 5.947380003905241e-06, "loss": 0.6145, "step": 5040 }, { "epoch": 0.64, "grad_norm": 0.5219191202328037, "learning_rate": 5.943597859327847e-06, "loss": 0.5901, "step": 5041 }, { "epoch": 0.64, "grad_norm": 0.46276306139235923, "learning_rate": 5.939816409157216e-06, "loss": 0.575, "step": 5042 }, { "epoch": 0.64, "grad_norm": 0.4941350708986133, "learning_rate": 5.936035654040686e-06, "loss": 0.6263, "step": 5043 }, { "epoch": 0.64, "grad_norm": 0.5930835660408498, "learning_rate": 5.93225559462548e-06, "loss": 0.6091, "step": 5044 }, { "epoch": 0.64, "grad_norm": 0.5333695791590343, "learning_rate": 5.928476231558698e-06, "loss": 0.6031, "step": 5045 }, { "epoch": 0.64, "grad_norm": 0.566512167683405, "learning_rate": 5.9246975654873164e-06, "loss": 0.6355, "step": 5046 }, { "epoch": 0.64, "grad_norm": 0.5292732509542188, "learning_rate": 5.9209195970582015e-06, "loss": 0.6238, "step": 5047 }, { "epoch": 0.64, "grad_norm": 0.6992418856640943, "learning_rate": 5.917142326918095e-06, "loss": 0.618, "step": 5048 }, { "epoch": 0.64, "grad_norm": 0.5149827415877658, "learning_rate": 5.913365755713616e-06, "loss": 0.5725, "step": 5049 }, { "epoch": 0.65, "grad_norm": 0.4952035996429622, "learning_rate": 5.909589884091267e-06, "loss": 0.5775, "step": 5050 }, { "epoch": 0.65, "grad_norm": 0.5620732466259923, "learning_rate": 5.90581471269744e-06, "loss": 0.5927, "step": 5051 }, { "epoch": 0.65, "grad_norm": 0.5660462201616305, "learning_rate": 5.902040242178391e-06, "loss": 0.6291, "step": 5052 }, { "epoch": 0.65, "grad_norm": 0.6204053693215332, "learning_rate": 5.898266473180265e-06, "loss": 0.6397, "step": 5053 }, { "epoch": 0.65, "grad_norm": 0.4830701388294874, "learning_rate": 5.894493406349087e-06, "loss": 0.571, "step": 5054 }, { "epoch": 0.65, "grad_norm": 0.4395337438097979, "learning_rate": 5.890721042330758e-06, "loss": 0.5777, "step": 5055 }, { "epoch": 0.65, "grad_norm": 0.5033502749619951, "learning_rate": 5.886949381771063e-06, "loss": 0.6052, "step": 5056 }, { "epoch": 0.65, "grad_norm": 0.5023616911826215, "learning_rate": 5.883178425315661e-06, "loss": 0.5845, "step": 5057 }, { "epoch": 0.65, "grad_norm": 0.5664996939466158, "learning_rate": 5.879408173610097e-06, "loss": 0.6424, "step": 5058 }, { "epoch": 0.65, "grad_norm": 0.49599465375475443, "learning_rate": 5.8756386272997935e-06, "loss": 0.6087, "step": 5059 }, { "epoch": 0.65, "grad_norm": 0.5060053790740777, "learning_rate": 5.871869787030049e-06, "loss": 0.6026, "step": 5060 }, { "epoch": 0.65, "grad_norm": 0.5367093473376741, "learning_rate": 5.868101653446043e-06, "loss": 0.6121, "step": 5061 }, { "epoch": 0.65, "grad_norm": 0.5711544504483765, "learning_rate": 5.8643342271928335e-06, "loss": 0.6375, "step": 5062 }, { "epoch": 0.65, "grad_norm": 0.5298417658189838, "learning_rate": 5.86056750891536e-06, "loss": 0.5932, "step": 5063 }, { "epoch": 0.65, "grad_norm": 0.46353683909220667, "learning_rate": 5.856801499258436e-06, "loss": 0.59, "step": 5064 }, { "epoch": 0.65, "grad_norm": 0.5494669649400571, "learning_rate": 5.853036198866759e-06, "loss": 0.6434, "step": 5065 }, { "epoch": 0.65, "grad_norm": 0.5212825730177121, "learning_rate": 5.849271608384903e-06, "loss": 0.5788, "step": 5066 }, { "epoch": 0.65, "grad_norm": 0.5432937598056609, "learning_rate": 5.84550772845732e-06, "loss": 0.6249, "step": 5067 }, { "epoch": 0.65, "grad_norm": 0.502062424578169, "learning_rate": 5.84174455972834e-06, "loss": 0.6134, "step": 5068 }, { "epoch": 0.65, "grad_norm": 0.4857097090485549, "learning_rate": 5.837982102842172e-06, "loss": 0.5803, "step": 5069 }, { "epoch": 0.65, "grad_norm": 0.5261444885779744, "learning_rate": 5.834220358442903e-06, "loss": 0.594, "step": 5070 }, { "epoch": 0.65, "grad_norm": 0.489651386876007, "learning_rate": 5.830459327174491e-06, "loss": 0.5877, "step": 5071 }, { "epoch": 0.65, "grad_norm": 0.4930101199338038, "learning_rate": 5.8266990096807915e-06, "loss": 0.6139, "step": 5072 }, { "epoch": 0.65, "grad_norm": 0.5413314268874215, "learning_rate": 5.822939406605519e-06, "loss": 0.6165, "step": 5073 }, { "epoch": 0.65, "grad_norm": 0.5437103446004765, "learning_rate": 5.819180518592271e-06, "loss": 0.6271, "step": 5074 }, { "epoch": 0.65, "grad_norm": 0.4746565211355141, "learning_rate": 5.815422346284529e-06, "loss": 0.575, "step": 5075 }, { "epoch": 0.65, "grad_norm": 0.5031433133456518, "learning_rate": 5.811664890325637e-06, "loss": 0.6204, "step": 5076 }, { "epoch": 0.65, "grad_norm": 0.5897996503378029, "learning_rate": 5.807908151358832e-06, "loss": 0.583, "step": 5077 }, { "epoch": 0.65, "grad_norm": 0.4874090271284036, "learning_rate": 5.804152130027216e-06, "loss": 0.5711, "step": 5078 }, { "epoch": 0.65, "grad_norm": 0.5776657780626088, "learning_rate": 5.800396826973782e-06, "loss": 0.6365, "step": 5079 }, { "epoch": 0.65, "grad_norm": 0.5267620333789859, "learning_rate": 5.79664224284139e-06, "loss": 0.6101, "step": 5080 }, { "epoch": 0.65, "grad_norm": 0.5503759230378318, "learning_rate": 5.7928883782727785e-06, "loss": 0.61, "step": 5081 }, { "epoch": 0.65, "grad_norm": 0.5380042409530401, "learning_rate": 5.789135233910563e-06, "loss": 0.6087, "step": 5082 }, { "epoch": 0.65, "grad_norm": 0.4910304974555153, "learning_rate": 5.785382810397238e-06, "loss": 0.5881, "step": 5083 }, { "epoch": 0.65, "grad_norm": 0.5322497551265962, "learning_rate": 5.781631108375174e-06, "loss": 0.6125, "step": 5084 }, { "epoch": 0.65, "grad_norm": 0.5060536987909405, "learning_rate": 5.777880128486607e-06, "loss": 0.6058, "step": 5085 }, { "epoch": 0.65, "grad_norm": 0.46460196479187355, "learning_rate": 5.77412987137367e-06, "loss": 0.5601, "step": 5086 }, { "epoch": 0.65, "grad_norm": 0.562628655348775, "learning_rate": 5.770380337678358e-06, "loss": 0.6284, "step": 5087 }, { "epoch": 0.65, "grad_norm": 0.519345614414718, "learning_rate": 5.766631528042546e-06, "loss": 0.622, "step": 5088 }, { "epoch": 0.65, "grad_norm": 0.43191345630014355, "learning_rate": 5.762883443107983e-06, "loss": 0.6033, "step": 5089 }, { "epoch": 0.65, "grad_norm": 0.4586765032556065, "learning_rate": 5.759136083516296e-06, "loss": 0.5708, "step": 5090 }, { "epoch": 0.65, "grad_norm": 0.5856224486977173, "learning_rate": 5.75538944990899e-06, "loss": 0.6469, "step": 5091 }, { "epoch": 0.65, "grad_norm": 0.5401854111778237, "learning_rate": 5.751643542927442e-06, "loss": 0.6211, "step": 5092 }, { "epoch": 0.65, "grad_norm": 0.49708050030612516, "learning_rate": 5.747898363212904e-06, "loss": 0.6103, "step": 5093 }, { "epoch": 0.65, "grad_norm": 0.5237576851699747, "learning_rate": 5.744153911406507e-06, "loss": 0.5786, "step": 5094 }, { "epoch": 0.65, "grad_norm": 0.4834324987114272, "learning_rate": 5.740410188149253e-06, "loss": 0.5857, "step": 5095 }, { "epoch": 0.65, "grad_norm": 0.5127917586366164, "learning_rate": 5.7366671940820265e-06, "loss": 0.5862, "step": 5096 }, { "epoch": 0.65, "grad_norm": 0.4897597790305186, "learning_rate": 5.732924929845578e-06, "loss": 0.5793, "step": 5097 }, { "epoch": 0.65, "grad_norm": 0.5209659299896622, "learning_rate": 5.729183396080541e-06, "loss": 0.624, "step": 5098 }, { "epoch": 0.65, "grad_norm": 0.5196477253659503, "learning_rate": 5.72544259342742e-06, "loss": 0.5896, "step": 5099 }, { "epoch": 0.65, "grad_norm": 0.4948702645581644, "learning_rate": 5.721702522526593e-06, "loss": 0.6059, "step": 5100 }, { "epoch": 0.65, "grad_norm": 0.6063732340386758, "learning_rate": 5.717963184018316e-06, "loss": 0.6566, "step": 5101 }, { "epoch": 0.65, "grad_norm": 0.49470369519889923, "learning_rate": 5.71422457854272e-06, "loss": 0.5855, "step": 5102 }, { "epoch": 0.65, "grad_norm": 0.5981229666410451, "learning_rate": 5.710486706739805e-06, "loss": 0.5989, "step": 5103 }, { "epoch": 0.65, "grad_norm": 0.4770038977789443, "learning_rate": 5.7067495692494525e-06, "loss": 0.5927, "step": 5104 }, { "epoch": 0.65, "grad_norm": 0.46855291501307067, "learning_rate": 5.7030131667114145e-06, "loss": 0.6227, "step": 5105 }, { "epoch": 0.65, "grad_norm": 0.5122599165933246, "learning_rate": 5.699277499765317e-06, "loss": 0.5884, "step": 5106 }, { "epoch": 0.65, "grad_norm": 0.911968316391434, "learning_rate": 5.6955425690506565e-06, "loss": 0.5874, "step": 5107 }, { "epoch": 0.65, "grad_norm": 0.6651930986424942, "learning_rate": 5.69180837520682e-06, "loss": 0.6148, "step": 5108 }, { "epoch": 0.65, "grad_norm": 0.5091943896176677, "learning_rate": 5.688074918873046e-06, "loss": 0.5833, "step": 5109 }, { "epoch": 0.65, "grad_norm": 0.8021738931218464, "learning_rate": 5.684342200688457e-06, "loss": 0.6403, "step": 5110 }, { "epoch": 0.65, "grad_norm": 0.5627890392252155, "learning_rate": 5.680610221292052e-06, "loss": 0.6298, "step": 5111 }, { "epoch": 0.65, "grad_norm": 0.5410484402154618, "learning_rate": 5.6768789813227e-06, "loss": 0.6271, "step": 5112 }, { "epoch": 0.65, "grad_norm": 0.561063154460339, "learning_rate": 5.673148481419144e-06, "loss": 0.6189, "step": 5113 }, { "epoch": 0.65, "grad_norm": 0.46482266600508493, "learning_rate": 5.669418722219996e-06, "loss": 0.5802, "step": 5114 }, { "epoch": 0.65, "grad_norm": 0.526372180822249, "learning_rate": 5.665689704363753e-06, "loss": 0.642, "step": 5115 }, { "epoch": 0.65, "grad_norm": 0.501750510297655, "learning_rate": 5.661961428488774e-06, "loss": 0.6015, "step": 5116 }, { "epoch": 0.65, "grad_norm": 0.5511736677179536, "learning_rate": 5.658233895233299e-06, "loss": 0.5983, "step": 5117 }, { "epoch": 0.65, "grad_norm": 0.4826148546162904, "learning_rate": 5.654507105235427e-06, "loss": 0.571, "step": 5118 }, { "epoch": 0.65, "grad_norm": 0.48572888865570124, "learning_rate": 5.650781059133146e-06, "loss": 0.6017, "step": 5119 }, { "epoch": 0.65, "grad_norm": 0.5714102186227049, "learning_rate": 5.647055757564306e-06, "loss": 0.646, "step": 5120 }, { "epoch": 0.65, "grad_norm": 0.5214933472958824, "learning_rate": 5.643331201166633e-06, "loss": 0.595, "step": 5121 }, { "epoch": 0.65, "grad_norm": 0.47352210760776137, "learning_rate": 5.639607390577731e-06, "loss": 0.5696, "step": 5122 }, { "epoch": 0.65, "grad_norm": 0.5272205677431121, "learning_rate": 5.635884326435068e-06, "loss": 0.6184, "step": 5123 }, { "epoch": 0.65, "grad_norm": 0.4981558488745699, "learning_rate": 5.6321620093759875e-06, "loss": 0.6038, "step": 5124 }, { "epoch": 0.65, "grad_norm": 0.5132422499496607, "learning_rate": 5.62844044003771e-06, "loss": 0.6028, "step": 5125 }, { "epoch": 0.65, "grad_norm": 0.5710162039393009, "learning_rate": 5.624719619057313e-06, "loss": 0.5985, "step": 5126 }, { "epoch": 0.65, "grad_norm": 0.5221862545372714, "learning_rate": 5.620999547071762e-06, "loss": 0.6022, "step": 5127 }, { "epoch": 0.66, "grad_norm": 0.5001608968791051, "learning_rate": 5.617280224717882e-06, "loss": 0.6281, "step": 5128 }, { "epoch": 0.66, "grad_norm": 0.5053865200667206, "learning_rate": 5.613561652632386e-06, "loss": 0.5941, "step": 5129 }, { "epoch": 0.66, "grad_norm": 0.4726068890468962, "learning_rate": 5.6098438314518424e-06, "loss": 0.6151, "step": 5130 }, { "epoch": 0.66, "grad_norm": 0.5386188585085511, "learning_rate": 5.606126761812697e-06, "loss": 0.6402, "step": 5131 }, { "epoch": 0.66, "grad_norm": 0.4833249739866661, "learning_rate": 5.602410444351268e-06, "loss": 0.5945, "step": 5132 }, { "epoch": 0.66, "grad_norm": 0.49278510162238476, "learning_rate": 5.598694879703745e-06, "loss": 0.5934, "step": 5133 }, { "epoch": 0.66, "grad_norm": 0.6251070698816867, "learning_rate": 5.5949800685061885e-06, "loss": 0.6356, "step": 5134 }, { "epoch": 0.66, "grad_norm": 0.628343661790136, "learning_rate": 5.591266011394518e-06, "loss": 0.6343, "step": 5135 }, { "epoch": 0.66, "grad_norm": 0.5039204028238378, "learning_rate": 5.5875527090045485e-06, "loss": 0.5784, "step": 5136 }, { "epoch": 0.66, "grad_norm": 0.5061595236398266, "learning_rate": 5.5838401619719475e-06, "loss": 0.6335, "step": 5137 }, { "epoch": 0.66, "grad_norm": 0.49930799956457, "learning_rate": 5.580128370932258e-06, "loss": 0.5924, "step": 5138 }, { "epoch": 0.66, "grad_norm": 0.5025196896171144, "learning_rate": 5.576417336520893e-06, "loss": 0.6136, "step": 5139 }, { "epoch": 0.66, "grad_norm": 0.5048645954629046, "learning_rate": 5.572707059373137e-06, "loss": 0.5978, "step": 5140 }, { "epoch": 0.66, "grad_norm": 0.4899668526010825, "learning_rate": 5.568997540124144e-06, "loss": 0.6074, "step": 5141 }, { "epoch": 0.66, "grad_norm": 0.5045881118158201, "learning_rate": 5.565288779408938e-06, "loss": 0.5695, "step": 5142 }, { "epoch": 0.66, "grad_norm": 0.5434317338402692, "learning_rate": 5.561580777862415e-06, "loss": 0.5913, "step": 5143 }, { "epoch": 0.66, "grad_norm": 0.5127233588352155, "learning_rate": 5.5578735361193405e-06, "loss": 0.5872, "step": 5144 }, { "epoch": 0.66, "grad_norm": 0.5309852837613822, "learning_rate": 5.554167054814347e-06, "loss": 0.5875, "step": 5145 }, { "epoch": 0.66, "grad_norm": 0.5080534094471123, "learning_rate": 5.550461334581941e-06, "loss": 0.5966, "step": 5146 }, { "epoch": 0.66, "grad_norm": 0.5162615601233663, "learning_rate": 5.546756376056495e-06, "loss": 0.5919, "step": 5147 }, { "epoch": 0.66, "grad_norm": 0.5547582562657014, "learning_rate": 5.543052179872252e-06, "loss": 0.6024, "step": 5148 }, { "epoch": 0.66, "grad_norm": 0.5363636876798964, "learning_rate": 5.539348746663327e-06, "loss": 0.6424, "step": 5149 }, { "epoch": 0.66, "grad_norm": 0.5411467018224015, "learning_rate": 5.53564607706371e-06, "loss": 0.6009, "step": 5150 }, { "epoch": 0.66, "grad_norm": 0.45494461074179593, "learning_rate": 5.531944171707241e-06, "loss": 0.5683, "step": 5151 }, { "epoch": 0.66, "grad_norm": 0.49413064377203264, "learning_rate": 5.528243031227647e-06, "loss": 0.5751, "step": 5152 }, { "epoch": 0.66, "grad_norm": 0.5189467174570103, "learning_rate": 5.524542656258517e-06, "loss": 0.5948, "step": 5153 }, { "epoch": 0.66, "grad_norm": 0.5830262904231387, "learning_rate": 5.52084304743331e-06, "loss": 0.6773, "step": 5154 }, { "epoch": 0.66, "grad_norm": 0.5837669851019782, "learning_rate": 5.517144205385356e-06, "loss": 0.6487, "step": 5155 }, { "epoch": 0.66, "grad_norm": 0.5585763003147655, "learning_rate": 5.513446130747848e-06, "loss": 0.6603, "step": 5156 }, { "epoch": 0.66, "grad_norm": 0.5022921851944038, "learning_rate": 5.509748824153857e-06, "loss": 0.6196, "step": 5157 }, { "epoch": 0.66, "grad_norm": 0.5460564079181923, "learning_rate": 5.5060522862363176e-06, "loss": 0.5952, "step": 5158 }, { "epoch": 0.66, "grad_norm": 0.556635984705336, "learning_rate": 5.502356517628025e-06, "loss": 0.657, "step": 5159 }, { "epoch": 0.66, "grad_norm": 0.6272881175916992, "learning_rate": 5.498661518961654e-06, "loss": 0.6189, "step": 5160 }, { "epoch": 0.66, "grad_norm": 0.523924363303802, "learning_rate": 5.494967290869742e-06, "loss": 0.5938, "step": 5161 }, { "epoch": 0.66, "grad_norm": 0.47817362321327117, "learning_rate": 5.491273833984698e-06, "loss": 0.5827, "step": 5162 }, { "epoch": 0.66, "grad_norm": 0.6395009454716138, "learning_rate": 5.487581148938792e-06, "loss": 0.658, "step": 5163 }, { "epoch": 0.66, "grad_norm": 0.49649573711178346, "learning_rate": 5.483889236364175e-06, "loss": 0.5998, "step": 5164 }, { "epoch": 0.66, "grad_norm": 0.47562221366523566, "learning_rate": 5.480198096892853e-06, "loss": 0.5941, "step": 5165 }, { "epoch": 0.66, "grad_norm": 0.44408640401263705, "learning_rate": 5.476507731156704e-06, "loss": 0.5562, "step": 5166 }, { "epoch": 0.66, "grad_norm": 0.4643706061358098, "learning_rate": 5.472818139787479e-06, "loss": 0.6028, "step": 5167 }, { "epoch": 0.66, "grad_norm": 0.4874768443693203, "learning_rate": 5.469129323416783e-06, "loss": 0.5884, "step": 5168 }, { "epoch": 0.66, "grad_norm": 0.5204883704452236, "learning_rate": 5.465441282676101e-06, "loss": 0.6212, "step": 5169 }, { "epoch": 0.66, "grad_norm": 0.49389009814120344, "learning_rate": 5.461754018196777e-06, "loss": 0.5779, "step": 5170 }, { "epoch": 0.66, "grad_norm": 0.4482643102335128, "learning_rate": 5.458067530610027e-06, "loss": 0.5958, "step": 5171 }, { "epoch": 0.66, "grad_norm": 0.5050349721480887, "learning_rate": 5.454381820546939e-06, "loss": 0.5823, "step": 5172 }, { "epoch": 0.66, "grad_norm": 0.5500971529715692, "learning_rate": 5.450696888638456e-06, "loss": 0.6407, "step": 5173 }, { "epoch": 0.66, "grad_norm": 0.49575500751227153, "learning_rate": 5.447012735515396e-06, "loss": 0.628, "step": 5174 }, { "epoch": 0.66, "grad_norm": 0.5057367014769093, "learning_rate": 5.4433293618084406e-06, "loss": 0.6236, "step": 5175 }, { "epoch": 0.66, "grad_norm": 0.48093258208714923, "learning_rate": 5.439646768148142e-06, "loss": 0.6063, "step": 5176 }, { "epoch": 0.66, "grad_norm": 0.5335055510996756, "learning_rate": 5.4359649551649065e-06, "loss": 0.63, "step": 5177 }, { "epoch": 0.66, "grad_norm": 0.47620311369809115, "learning_rate": 5.432283923489018e-06, "loss": 0.6197, "step": 5178 }, { "epoch": 0.66, "grad_norm": 0.5279201689416547, "learning_rate": 5.428603673750632e-06, "loss": 0.5968, "step": 5179 }, { "epoch": 0.66, "grad_norm": 0.5056242964072812, "learning_rate": 5.424924206579758e-06, "loss": 0.5875, "step": 5180 }, { "epoch": 0.66, "grad_norm": 0.4926604350247316, "learning_rate": 5.421245522606275e-06, "loss": 0.591, "step": 5181 }, { "epoch": 0.66, "grad_norm": 0.5141652772334422, "learning_rate": 5.417567622459931e-06, "loss": 0.6269, "step": 5182 }, { "epoch": 0.66, "grad_norm": 0.47751460659617856, "learning_rate": 5.413890506770336e-06, "loss": 0.582, "step": 5183 }, { "epoch": 0.66, "grad_norm": 0.4979805424622252, "learning_rate": 5.410214176166971e-06, "loss": 0.6073, "step": 5184 }, { "epoch": 0.66, "grad_norm": 0.5225488782938135, "learning_rate": 5.406538631279174e-06, "loss": 0.6424, "step": 5185 }, { "epoch": 0.66, "grad_norm": 0.5623680625301567, "learning_rate": 5.402863872736157e-06, "loss": 0.6366, "step": 5186 }, { "epoch": 0.66, "grad_norm": 0.48304332370699776, "learning_rate": 5.399189901166994e-06, "loss": 0.5929, "step": 5187 }, { "epoch": 0.66, "grad_norm": 0.5287491274340268, "learning_rate": 5.395516717200622e-06, "loss": 0.6094, "step": 5188 }, { "epoch": 0.66, "grad_norm": 0.5189974204261864, "learning_rate": 5.3918443214658486e-06, "loss": 0.6093, "step": 5189 }, { "epoch": 0.66, "grad_norm": 0.4501097744746421, "learning_rate": 5.388172714591341e-06, "loss": 0.5841, "step": 5190 }, { "epoch": 0.66, "grad_norm": 0.52195166890581, "learning_rate": 5.384501897205634e-06, "loss": 0.6018, "step": 5191 }, { "epoch": 0.66, "grad_norm": 0.5275578192261924, "learning_rate": 5.380831869937129e-06, "loss": 0.6024, "step": 5192 }, { "epoch": 0.66, "grad_norm": 0.4640306739018392, "learning_rate": 5.377162633414086e-06, "loss": 0.5839, "step": 5193 }, { "epoch": 0.66, "grad_norm": 0.4848315509510151, "learning_rate": 5.373494188264638e-06, "loss": 0.5877, "step": 5194 }, { "epoch": 0.66, "grad_norm": 0.5574258009293892, "learning_rate": 5.369826535116775e-06, "loss": 0.6458, "step": 5195 }, { "epoch": 0.66, "grad_norm": 0.5053592318335269, "learning_rate": 5.366159674598357e-06, "loss": 0.5755, "step": 5196 }, { "epoch": 0.66, "grad_norm": 0.42948599643444024, "learning_rate": 5.362493607337105e-06, "loss": 0.5833, "step": 5197 }, { "epoch": 0.66, "grad_norm": 0.5062343807354056, "learning_rate": 5.358828333960605e-06, "loss": 0.6037, "step": 5198 }, { "epoch": 0.66, "grad_norm": 0.4614986879481953, "learning_rate": 5.355163855096303e-06, "loss": 0.5756, "step": 5199 }, { "epoch": 0.66, "grad_norm": 0.524159292840353, "learning_rate": 5.351500171371527e-06, "loss": 0.6072, "step": 5200 }, { "epoch": 0.66, "grad_norm": 0.46864731998985804, "learning_rate": 5.3478372834134415e-06, "loss": 0.5731, "step": 5201 }, { "epoch": 0.66, "grad_norm": 0.479746917094519, "learning_rate": 5.344175191849093e-06, "loss": 0.5956, "step": 5202 }, { "epoch": 0.66, "grad_norm": 0.5322046319374591, "learning_rate": 5.340513897305386e-06, "loss": 0.5998, "step": 5203 }, { "epoch": 0.66, "grad_norm": 0.5108001672526856, "learning_rate": 5.33685340040909e-06, "loss": 0.6116, "step": 5204 }, { "epoch": 0.66, "grad_norm": 0.49687011051284236, "learning_rate": 5.333193701786839e-06, "loss": 0.5913, "step": 5205 }, { "epoch": 0.67, "grad_norm": 0.5560286675961291, "learning_rate": 5.329534802065125e-06, "loss": 0.5938, "step": 5206 }, { "epoch": 0.67, "grad_norm": 0.4973805531727436, "learning_rate": 5.325876701870314e-06, "loss": 0.5824, "step": 5207 }, { "epoch": 0.67, "grad_norm": 0.5069458312911693, "learning_rate": 5.3222194018286236e-06, "loss": 0.6104, "step": 5208 }, { "epoch": 0.67, "grad_norm": 0.5134215365050775, "learning_rate": 5.318562902566145e-06, "loss": 0.6297, "step": 5209 }, { "epoch": 0.67, "grad_norm": 0.5550375506458697, "learning_rate": 5.314907204708816e-06, "loss": 0.6481, "step": 5210 }, { "epoch": 0.67, "grad_norm": 0.5439578164547133, "learning_rate": 5.311252308882453e-06, "loss": 0.6223, "step": 5211 }, { "epoch": 0.67, "grad_norm": 0.4541010002941806, "learning_rate": 5.307598215712732e-06, "loss": 0.5884, "step": 5212 }, { "epoch": 0.67, "grad_norm": 0.49875440792915743, "learning_rate": 5.303944925825182e-06, "loss": 0.598, "step": 5213 }, { "epoch": 0.67, "grad_norm": 0.5225262939380932, "learning_rate": 5.300292439845212e-06, "loss": 0.6229, "step": 5214 }, { "epoch": 0.67, "grad_norm": 0.5014810441486505, "learning_rate": 5.296640758398078e-06, "loss": 0.5978, "step": 5215 }, { "epoch": 0.67, "grad_norm": 0.5074487963806967, "learning_rate": 5.292989882108903e-06, "loss": 0.6462, "step": 5216 }, { "epoch": 0.67, "grad_norm": 0.4995760765857769, "learning_rate": 5.289339811602675e-06, "loss": 0.5859, "step": 5217 }, { "epoch": 0.67, "grad_norm": 0.5343127703419125, "learning_rate": 5.285690547504243e-06, "loss": 0.6092, "step": 5218 }, { "epoch": 0.67, "grad_norm": 0.5240933787018764, "learning_rate": 5.28204209043831e-06, "loss": 0.5984, "step": 5219 }, { "epoch": 0.67, "grad_norm": 0.5034680897489952, "learning_rate": 5.278394441029449e-06, "loss": 0.5956, "step": 5220 }, { "epoch": 0.67, "grad_norm": 0.5254086035630023, "learning_rate": 5.2747475999020995e-06, "loss": 0.5886, "step": 5221 }, { "epoch": 0.67, "grad_norm": 0.49402535465956554, "learning_rate": 5.2711015676805525e-06, "loss": 0.5656, "step": 5222 }, { "epoch": 0.67, "grad_norm": 0.5085617291717793, "learning_rate": 5.2674563449889635e-06, "loss": 0.6257, "step": 5223 }, { "epoch": 0.67, "grad_norm": 0.4883868301963988, "learning_rate": 5.263811932451352e-06, "loss": 0.5932, "step": 5224 }, { "epoch": 0.67, "grad_norm": 0.5171086971117073, "learning_rate": 5.260168330691595e-06, "loss": 0.6017, "step": 5225 }, { "epoch": 0.67, "grad_norm": 0.5281879075827216, "learning_rate": 5.256525540333437e-06, "loss": 0.6214, "step": 5226 }, { "epoch": 0.67, "grad_norm": 0.49065884469975435, "learning_rate": 5.252883562000479e-06, "loss": 0.5659, "step": 5227 }, { "epoch": 0.67, "grad_norm": 0.5217028610182113, "learning_rate": 5.249242396316174e-06, "loss": 0.6228, "step": 5228 }, { "epoch": 0.67, "grad_norm": 0.511852152690778, "learning_rate": 5.245602043903857e-06, "loss": 0.6267, "step": 5229 }, { "epoch": 0.67, "grad_norm": 0.5427419956346866, "learning_rate": 5.241962505386706e-06, "loss": 0.605, "step": 5230 }, { "epoch": 0.67, "grad_norm": 0.46935275845675994, "learning_rate": 5.23832378138777e-06, "loss": 0.5789, "step": 5231 }, { "epoch": 0.67, "grad_norm": 0.48212236332057884, "learning_rate": 5.23468587252995e-06, "loss": 0.616, "step": 5232 }, { "epoch": 0.67, "grad_norm": 0.5464171000149022, "learning_rate": 5.231048779436014e-06, "loss": 0.6154, "step": 5233 }, { "epoch": 0.67, "grad_norm": 0.5142163917969462, "learning_rate": 5.227412502728588e-06, "loss": 0.598, "step": 5234 }, { "epoch": 0.67, "grad_norm": 0.43967491793985597, "learning_rate": 5.223777043030158e-06, "loss": 0.5736, "step": 5235 }, { "epoch": 0.67, "grad_norm": 0.5480159169681389, "learning_rate": 5.220142400963072e-06, "loss": 0.6486, "step": 5236 }, { "epoch": 0.67, "grad_norm": 0.5039406619785052, "learning_rate": 5.2165085771495335e-06, "loss": 0.5995, "step": 5237 }, { "epoch": 0.67, "grad_norm": 0.5678181470773307, "learning_rate": 5.212875572211612e-06, "loss": 0.6067, "step": 5238 }, { "epoch": 0.67, "grad_norm": 0.505828647169707, "learning_rate": 5.2092433867712325e-06, "loss": 0.5938, "step": 5239 }, { "epoch": 0.67, "grad_norm": 0.5299534912183783, "learning_rate": 5.2056120214501794e-06, "loss": 0.5924, "step": 5240 }, { "epoch": 0.67, "grad_norm": 0.48988176441644526, "learning_rate": 5.2019814768701015e-06, "loss": 0.6237, "step": 5241 }, { "epoch": 0.67, "grad_norm": 0.47283098966132725, "learning_rate": 5.1983517536525015e-06, "loss": 0.5919, "step": 5242 }, { "epoch": 0.67, "grad_norm": 0.5475194344454818, "learning_rate": 5.194722852418747e-06, "loss": 0.6589, "step": 5243 }, { "epoch": 0.67, "grad_norm": 0.45322213591115573, "learning_rate": 5.191094773790058e-06, "loss": 0.581, "step": 5244 }, { "epoch": 0.67, "grad_norm": 0.49673396159232897, "learning_rate": 5.187467518387519e-06, "loss": 0.6097, "step": 5245 }, { "epoch": 0.67, "grad_norm": 0.5633361169426379, "learning_rate": 5.183841086832073e-06, "loss": 0.6158, "step": 5246 }, { "epoch": 0.67, "grad_norm": 0.5523404180186037, "learning_rate": 5.180215479744519e-06, "loss": 0.6236, "step": 5247 }, { "epoch": 0.67, "grad_norm": 0.5178852690830054, "learning_rate": 5.17659069774552e-06, "loss": 0.6029, "step": 5248 }, { "epoch": 0.67, "grad_norm": 0.5347374386529932, "learning_rate": 5.1729667414555875e-06, "loss": 0.5904, "step": 5249 }, { "epoch": 0.67, "grad_norm": 0.5216395176022417, "learning_rate": 5.169343611495108e-06, "loss": 0.6001, "step": 5250 }, { "epoch": 0.67, "grad_norm": 0.4818307205606218, "learning_rate": 5.1657213084843175e-06, "loss": 0.5586, "step": 5251 }, { "epoch": 0.67, "grad_norm": 0.46830070011343133, "learning_rate": 5.162099833043302e-06, "loss": 0.587, "step": 5252 }, { "epoch": 0.67, "grad_norm": 0.5462458816399027, "learning_rate": 5.158479185792018e-06, "loss": 0.6375, "step": 5253 }, { "epoch": 0.67, "grad_norm": 0.4889761499069313, "learning_rate": 5.154859367350278e-06, "loss": 0.5964, "step": 5254 }, { "epoch": 0.67, "grad_norm": 0.47141492674679175, "learning_rate": 5.151240378337747e-06, "loss": 0.5956, "step": 5255 }, { "epoch": 0.67, "grad_norm": 0.44893007730321816, "learning_rate": 5.147622219373953e-06, "loss": 0.5757, "step": 5256 }, { "epoch": 0.67, "grad_norm": 0.48844314310830017, "learning_rate": 5.144004891078285e-06, "loss": 0.5792, "step": 5257 }, { "epoch": 0.67, "grad_norm": 0.5222878218740226, "learning_rate": 5.140388394069983e-06, "loss": 0.6155, "step": 5258 }, { "epoch": 0.67, "grad_norm": 0.5118592435380521, "learning_rate": 5.136772728968147e-06, "loss": 0.594, "step": 5259 }, { "epoch": 0.67, "grad_norm": 0.540695739343322, "learning_rate": 5.133157896391741e-06, "loss": 0.6107, "step": 5260 }, { "epoch": 0.67, "grad_norm": 0.5358303316765951, "learning_rate": 5.129543896959568e-06, "loss": 0.6143, "step": 5261 }, { "epoch": 0.67, "grad_norm": 0.5212647427399956, "learning_rate": 5.12593073129031e-06, "loss": 0.5855, "step": 5262 }, { "epoch": 0.67, "grad_norm": 0.5067970577188734, "learning_rate": 5.1223184000024885e-06, "loss": 0.6034, "step": 5263 }, { "epoch": 0.67, "grad_norm": 0.5314920885164146, "learning_rate": 5.118706903714502e-06, "loss": 0.6035, "step": 5264 }, { "epoch": 0.67, "grad_norm": 0.5102330501820922, "learning_rate": 5.115096243044588e-06, "loss": 0.5783, "step": 5265 }, { "epoch": 0.67, "grad_norm": 0.48722151022700594, "learning_rate": 5.111486418610851e-06, "loss": 0.591, "step": 5266 }, { "epoch": 0.67, "grad_norm": 0.4979345022852787, "learning_rate": 5.107877431031248e-06, "loss": 0.5815, "step": 5267 }, { "epoch": 0.67, "grad_norm": 0.4557463763741102, "learning_rate": 5.10426928092359e-06, "loss": 0.5954, "step": 5268 }, { "epoch": 0.67, "grad_norm": 0.5081001683797076, "learning_rate": 5.100661968905558e-06, "loss": 0.5781, "step": 5269 }, { "epoch": 0.67, "grad_norm": 0.498033093417759, "learning_rate": 5.097055495594666e-06, "loss": 0.592, "step": 5270 }, { "epoch": 0.67, "grad_norm": 0.508555814965877, "learning_rate": 5.093449861608309e-06, "loss": 0.5997, "step": 5271 }, { "epoch": 0.67, "grad_norm": 0.5327376844655057, "learning_rate": 5.089845067563726e-06, "loss": 0.6319, "step": 5272 }, { "epoch": 0.67, "grad_norm": 0.5029628302675719, "learning_rate": 5.086241114078012e-06, "loss": 0.6207, "step": 5273 }, { "epoch": 0.67, "grad_norm": 0.5370803790864633, "learning_rate": 5.082638001768121e-06, "loss": 0.5967, "step": 5274 }, { "epoch": 0.67, "grad_norm": 0.5497118294590784, "learning_rate": 5.079035731250861e-06, "loss": 0.6211, "step": 5275 }, { "epoch": 0.67, "grad_norm": 0.5811310480608474, "learning_rate": 5.075434303142899e-06, "loss": 0.6883, "step": 5276 }, { "epoch": 0.67, "grad_norm": 0.4718122407927375, "learning_rate": 5.071833718060753e-06, "loss": 0.5937, "step": 5277 }, { "epoch": 0.67, "grad_norm": 0.4951464013085227, "learning_rate": 5.068233976620802e-06, "loss": 0.5981, "step": 5278 }, { "epoch": 0.67, "grad_norm": 0.47108473724830885, "learning_rate": 5.064635079439276e-06, "loss": 0.5595, "step": 5279 }, { "epoch": 0.67, "grad_norm": 0.5161383684077798, "learning_rate": 5.061037027132265e-06, "loss": 0.6215, "step": 5280 }, { "epoch": 0.67, "grad_norm": 0.517587335382018, "learning_rate": 5.057439820315708e-06, "loss": 0.6044, "step": 5281 }, { "epoch": 0.67, "grad_norm": 0.4588015363192835, "learning_rate": 5.0538434596054075e-06, "loss": 0.5924, "step": 5282 }, { "epoch": 0.67, "grad_norm": 0.4336490218743648, "learning_rate": 5.050247945617015e-06, "loss": 0.5643, "step": 5283 }, { "epoch": 0.68, "grad_norm": 0.4381736540010683, "learning_rate": 5.0466532789660364e-06, "loss": 0.5713, "step": 5284 }, { "epoch": 0.68, "grad_norm": 0.5244945425895335, "learning_rate": 5.0430594602678385e-06, "loss": 0.5739, "step": 5285 }, { "epoch": 0.68, "grad_norm": 0.4429932780620721, "learning_rate": 5.039466490137637e-06, "loss": 0.594, "step": 5286 }, { "epoch": 0.68, "grad_norm": 0.5425012271127266, "learning_rate": 5.035874369190505e-06, "loss": 0.6176, "step": 5287 }, { "epoch": 0.68, "grad_norm": 0.5313662351637132, "learning_rate": 5.032283098041373e-06, "loss": 0.5975, "step": 5288 }, { "epoch": 0.68, "grad_norm": 0.48984307865073723, "learning_rate": 5.0286926773050185e-06, "loss": 0.6053, "step": 5289 }, { "epoch": 0.68, "grad_norm": 0.44247330396096674, "learning_rate": 5.025103107596081e-06, "loss": 0.5756, "step": 5290 }, { "epoch": 0.68, "grad_norm": 0.512484119615456, "learning_rate": 5.02151438952905e-06, "loss": 0.5687, "step": 5291 }, { "epoch": 0.68, "grad_norm": 0.48645665349216394, "learning_rate": 5.017926523718266e-06, "loss": 0.5963, "step": 5292 }, { "epoch": 0.68, "grad_norm": 0.48971569373766927, "learning_rate": 5.014339510777941e-06, "loss": 0.5578, "step": 5293 }, { "epoch": 0.68, "grad_norm": 0.48066059942429973, "learning_rate": 5.0107533513221176e-06, "loss": 0.5924, "step": 5294 }, { "epoch": 0.68, "grad_norm": 0.5259545371614562, "learning_rate": 5.007168045964703e-06, "loss": 0.6028, "step": 5295 }, { "epoch": 0.68, "grad_norm": 0.49070298137102214, "learning_rate": 5.003583595319459e-06, "loss": 0.6077, "step": 5296 }, { "epoch": 0.68, "grad_norm": 0.44337782929388314, "learning_rate": 5.000000000000003e-06, "loss": 0.574, "step": 5297 }, { "epoch": 0.68, "grad_norm": 0.4959561278288021, "learning_rate": 4.996417260619799e-06, "loss": 0.6202, "step": 5298 }, { "epoch": 0.68, "grad_norm": 0.47465326023137366, "learning_rate": 4.992835377792167e-06, "loss": 0.603, "step": 5299 }, { "epoch": 0.68, "grad_norm": 0.5394310714260587, "learning_rate": 4.989254352130288e-06, "loss": 0.6238, "step": 5300 }, { "epoch": 0.68, "grad_norm": 0.5275152772142372, "learning_rate": 4.985674184247189e-06, "loss": 0.5846, "step": 5301 }, { "epoch": 0.68, "grad_norm": 0.547251326182506, "learning_rate": 4.9820948747557515e-06, "loss": 0.6078, "step": 5302 }, { "epoch": 0.68, "grad_norm": 0.4724432562300856, "learning_rate": 4.978516424268704e-06, "loss": 0.5734, "step": 5303 }, { "epoch": 0.68, "grad_norm": 0.4700529861121477, "learning_rate": 4.9749388333986384e-06, "loss": 0.5842, "step": 5304 }, { "epoch": 0.68, "grad_norm": 0.4846187396398847, "learning_rate": 4.971362102757994e-06, "loss": 0.5854, "step": 5305 }, { "epoch": 0.68, "grad_norm": 0.49774604559945834, "learning_rate": 4.967786232959059e-06, "loss": 0.6007, "step": 5306 }, { "epoch": 0.68, "grad_norm": 0.5243079017168188, "learning_rate": 4.964211224613986e-06, "loss": 0.6007, "step": 5307 }, { "epoch": 0.68, "grad_norm": 0.4980528558289492, "learning_rate": 4.960637078334772e-06, "loss": 0.5935, "step": 5308 }, { "epoch": 0.68, "grad_norm": 0.5436395068351267, "learning_rate": 4.957063794733265e-06, "loss": 0.573, "step": 5309 }, { "epoch": 0.68, "grad_norm": 0.5001946755300415, "learning_rate": 4.953491374421172e-06, "loss": 0.6128, "step": 5310 }, { "epoch": 0.68, "grad_norm": 0.46730887759495726, "learning_rate": 4.94991981801004e-06, "loss": 0.5694, "step": 5311 }, { "epoch": 0.68, "grad_norm": 0.5263471083156013, "learning_rate": 4.9463491261112824e-06, "loss": 0.6041, "step": 5312 }, { "epoch": 0.68, "grad_norm": 0.49296952814120054, "learning_rate": 4.942779299336151e-06, "loss": 0.5968, "step": 5313 }, { "epoch": 0.68, "grad_norm": 0.5174923747017225, "learning_rate": 4.939210338295767e-06, "loss": 0.5877, "step": 5314 }, { "epoch": 0.68, "grad_norm": 0.49650181172164254, "learning_rate": 4.9356422436010874e-06, "loss": 0.5703, "step": 5315 }, { "epoch": 0.68, "grad_norm": 0.45341808736384465, "learning_rate": 4.932075015862929e-06, "loss": 0.5701, "step": 5316 }, { "epoch": 0.68, "grad_norm": 0.5667030785983491, "learning_rate": 4.928508655691955e-06, "loss": 0.6368, "step": 5317 }, { "epoch": 0.68, "grad_norm": 0.5174796036922142, "learning_rate": 4.9249431636986845e-06, "loss": 0.5502, "step": 5318 }, { "epoch": 0.68, "grad_norm": 0.507490930703033, "learning_rate": 4.9213785404934895e-06, "loss": 0.5956, "step": 5319 }, { "epoch": 0.68, "grad_norm": 0.5126689578594839, "learning_rate": 4.91781478668658e-06, "loss": 0.5835, "step": 5320 }, { "epoch": 0.68, "grad_norm": 0.5196372788430081, "learning_rate": 4.9142519028880386e-06, "loss": 0.58, "step": 5321 }, { "epoch": 0.68, "grad_norm": 0.46090955901437863, "learning_rate": 4.910689889707783e-06, "loss": 0.5944, "step": 5322 }, { "epoch": 0.68, "grad_norm": 0.4764289788964044, "learning_rate": 4.907128747755587e-06, "loss": 0.5616, "step": 5323 }, { "epoch": 0.68, "grad_norm": 0.5542636123373246, "learning_rate": 4.903568477641074e-06, "loss": 0.6182, "step": 5324 }, { "epoch": 0.68, "grad_norm": 0.5234811033225548, "learning_rate": 4.900009079973721e-06, "loss": 0.6006, "step": 5325 }, { "epoch": 0.68, "grad_norm": 0.4850374114383739, "learning_rate": 4.896450555362852e-06, "loss": 0.569, "step": 5326 }, { "epoch": 0.68, "grad_norm": 0.4704628021764082, "learning_rate": 4.892892904417643e-06, "loss": 0.5872, "step": 5327 }, { "epoch": 0.68, "grad_norm": 0.5272318529877261, "learning_rate": 4.889336127747123e-06, "loss": 0.5906, "step": 5328 }, { "epoch": 0.68, "grad_norm": 0.4723631376647075, "learning_rate": 4.885780225960165e-06, "loss": 0.5969, "step": 5329 }, { "epoch": 0.68, "grad_norm": 0.5776979460502133, "learning_rate": 4.8822251996655e-06, "loss": 0.6252, "step": 5330 }, { "epoch": 0.68, "grad_norm": 0.4664042485683863, "learning_rate": 4.878671049471702e-06, "loss": 0.602, "step": 5331 }, { "epoch": 0.68, "grad_norm": 0.5368738154448991, "learning_rate": 4.875117775987202e-06, "loss": 0.5895, "step": 5332 }, { "epoch": 0.68, "grad_norm": 0.457251303441715, "learning_rate": 4.871565379820274e-06, "loss": 0.567, "step": 5333 }, { "epoch": 0.68, "grad_norm": 0.47353978968250826, "learning_rate": 4.868013861579047e-06, "loss": 0.5855, "step": 5334 }, { "epoch": 0.68, "grad_norm": 0.5679896191172911, "learning_rate": 4.864463221871497e-06, "loss": 0.658, "step": 5335 }, { "epoch": 0.68, "grad_norm": 0.44230724320883796, "learning_rate": 4.860913461305451e-06, "loss": 0.5967, "step": 5336 }, { "epoch": 0.68, "grad_norm": 0.4845026827858913, "learning_rate": 4.857364580488584e-06, "loss": 0.588, "step": 5337 }, { "epoch": 0.68, "grad_norm": 0.5150557444224712, "learning_rate": 4.853816580028422e-06, "loss": 0.5845, "step": 5338 }, { "epoch": 0.68, "grad_norm": 0.4740385297965369, "learning_rate": 4.850269460532339e-06, "loss": 0.6274, "step": 5339 }, { "epoch": 0.68, "grad_norm": 0.5642464890911105, "learning_rate": 4.8467232226075575e-06, "loss": 0.6255, "step": 5340 }, { "epoch": 0.68, "grad_norm": 0.4696015927403071, "learning_rate": 4.843177866861149e-06, "loss": 0.6056, "step": 5341 }, { "epoch": 0.68, "grad_norm": 0.5427249563737634, "learning_rate": 4.8396333939000425e-06, "loss": 0.6203, "step": 5342 }, { "epoch": 0.68, "grad_norm": 0.5083755036163752, "learning_rate": 4.836089804331009e-06, "loss": 0.6007, "step": 5343 }, { "epoch": 0.68, "grad_norm": 0.4552213504165974, "learning_rate": 4.832547098760658e-06, "loss": 0.594, "step": 5344 }, { "epoch": 0.68, "grad_norm": 0.45633947530811797, "learning_rate": 4.829005277795463e-06, "loss": 0.5763, "step": 5345 }, { "epoch": 0.68, "grad_norm": 0.5933369724693868, "learning_rate": 4.825464342041742e-06, "loss": 0.6257, "step": 5346 }, { "epoch": 0.68, "grad_norm": 0.5192900411842588, "learning_rate": 4.821924292105659e-06, "loss": 0.5725, "step": 5347 }, { "epoch": 0.68, "grad_norm": 0.5507645250751263, "learning_rate": 4.818385128593223e-06, "loss": 0.5927, "step": 5348 }, { "epoch": 0.68, "grad_norm": 0.46875969806866064, "learning_rate": 4.814846852110303e-06, "loss": 0.5873, "step": 5349 }, { "epoch": 0.68, "grad_norm": 0.5231241355052673, "learning_rate": 4.811309463262607e-06, "loss": 0.6181, "step": 5350 }, { "epoch": 0.68, "grad_norm": 0.4933063640381382, "learning_rate": 4.807772962655692e-06, "loss": 0.5907, "step": 5351 }, { "epoch": 0.68, "grad_norm": 0.5150541421874046, "learning_rate": 4.804237350894969e-06, "loss": 0.6086, "step": 5352 }, { "epoch": 0.68, "grad_norm": 0.4406547226789904, "learning_rate": 4.800702628585683e-06, "loss": 0.5429, "step": 5353 }, { "epoch": 0.68, "grad_norm": 0.5032343724395615, "learning_rate": 4.7971687963329385e-06, "loss": 0.6143, "step": 5354 }, { "epoch": 0.68, "grad_norm": 0.4980692911342653, "learning_rate": 4.793635854741685e-06, "loss": 0.5563, "step": 5355 }, { "epoch": 0.68, "grad_norm": 0.4882739613016256, "learning_rate": 4.790103804416717e-06, "loss": 0.6127, "step": 5356 }, { "epoch": 0.68, "grad_norm": 0.4921849345968208, "learning_rate": 4.786572645962685e-06, "loss": 0.5828, "step": 5357 }, { "epoch": 0.68, "grad_norm": 0.5104700966532661, "learning_rate": 4.783042379984077e-06, "loss": 0.6426, "step": 5358 }, { "epoch": 0.68, "grad_norm": 0.5140772281012496, "learning_rate": 4.779513007085231e-06, "loss": 0.5847, "step": 5359 }, { "epoch": 0.68, "grad_norm": 0.45919545278225016, "learning_rate": 4.775984527870334e-06, "loss": 0.5786, "step": 5360 }, { "epoch": 0.68, "grad_norm": 0.6302108330442786, "learning_rate": 4.7724569429434206e-06, "loss": 0.6872, "step": 5361 }, { "epoch": 0.68, "grad_norm": 0.4811544436893605, "learning_rate": 4.768930252908365e-06, "loss": 0.5872, "step": 5362 }, { "epoch": 0.69, "grad_norm": 0.4568707837220755, "learning_rate": 4.765404458368893e-06, "loss": 0.5873, "step": 5363 }, { "epoch": 0.69, "grad_norm": 0.523105012393175, "learning_rate": 4.761879559928584e-06, "loss": 0.6073, "step": 5364 }, { "epoch": 0.69, "grad_norm": 0.5301520092906212, "learning_rate": 4.758355558190856e-06, "loss": 0.5807, "step": 5365 }, { "epoch": 0.69, "grad_norm": 0.5242449080322196, "learning_rate": 4.754832453758974e-06, "loss": 0.6096, "step": 5366 }, { "epoch": 0.69, "grad_norm": 0.45005176687220566, "learning_rate": 4.7513102472360504e-06, "loss": 0.5802, "step": 5367 }, { "epoch": 0.69, "grad_norm": 0.5109367780298961, "learning_rate": 4.747788939225047e-06, "loss": 0.5897, "step": 5368 }, { "epoch": 0.69, "grad_norm": 0.5312810166807448, "learning_rate": 4.744268530328764e-06, "loss": 0.5898, "step": 5369 }, { "epoch": 0.69, "grad_norm": 0.5139012488694714, "learning_rate": 4.740749021149857e-06, "loss": 0.5921, "step": 5370 }, { "epoch": 0.69, "grad_norm": 0.5515826392100655, "learning_rate": 4.737230412290822e-06, "loss": 0.6048, "step": 5371 }, { "epoch": 0.69, "grad_norm": 0.5487392650512395, "learning_rate": 4.733712704354001e-06, "loss": 0.6143, "step": 5372 }, { "epoch": 0.69, "grad_norm": 0.4735720222229113, "learning_rate": 4.730195897941583e-06, "loss": 0.5778, "step": 5373 }, { "epoch": 0.69, "grad_norm": 0.44683701119855734, "learning_rate": 4.726679993655604e-06, "loss": 0.5731, "step": 5374 }, { "epoch": 0.69, "grad_norm": 0.4992982916224773, "learning_rate": 4.7231649920979424e-06, "loss": 0.5972, "step": 5375 }, { "epoch": 0.69, "grad_norm": 0.4984672674358538, "learning_rate": 4.719650893870326e-06, "loss": 0.6017, "step": 5376 }, { "epoch": 0.69, "grad_norm": 0.4851083636823502, "learning_rate": 4.716137699574323e-06, "loss": 0.6106, "step": 5377 }, { "epoch": 0.69, "grad_norm": 0.6324037362496082, "learning_rate": 4.712625409811352e-06, "loss": 0.6705, "step": 5378 }, { "epoch": 0.69, "grad_norm": 0.48874867287309987, "learning_rate": 4.709114025182673e-06, "loss": 0.5997, "step": 5379 }, { "epoch": 0.69, "grad_norm": 0.5456670275800899, "learning_rate": 4.705603546289392e-06, "loss": 0.6173, "step": 5380 }, { "epoch": 0.69, "grad_norm": 0.5148240862054158, "learning_rate": 4.702093973732461e-06, "loss": 0.6344, "step": 5381 }, { "epoch": 0.69, "grad_norm": 0.4902713932224161, "learning_rate": 4.6985853081126756e-06, "loss": 0.604, "step": 5382 }, { "epoch": 0.69, "grad_norm": 0.5321604106841261, "learning_rate": 4.695077550030678e-06, "loss": 0.6218, "step": 5383 }, { "epoch": 0.69, "grad_norm": 0.5373942420938823, "learning_rate": 4.691570700086949e-06, "loss": 0.6057, "step": 5384 }, { "epoch": 0.69, "grad_norm": 0.5049744306086283, "learning_rate": 4.68806475888183e-06, "loss": 0.5908, "step": 5385 }, { "epoch": 0.69, "grad_norm": 0.5121061434742195, "learning_rate": 4.684559727015486e-06, "loss": 0.5851, "step": 5386 }, { "epoch": 0.69, "grad_norm": 0.4534903153424493, "learning_rate": 4.681055605087936e-06, "loss": 0.5756, "step": 5387 }, { "epoch": 0.69, "grad_norm": 0.4971035923544485, "learning_rate": 4.6775523936990464e-06, "loss": 0.5891, "step": 5388 }, { "epoch": 0.69, "grad_norm": 0.4960685821346672, "learning_rate": 4.674050093448523e-06, "loss": 0.5812, "step": 5389 }, { "epoch": 0.69, "grad_norm": 0.5321354948750145, "learning_rate": 4.670548704935918e-06, "loss": 0.6101, "step": 5390 }, { "epoch": 0.69, "grad_norm": 0.5490063934886333, "learning_rate": 4.667048228760621e-06, "loss": 0.6225, "step": 5391 }, { "epoch": 0.69, "grad_norm": 0.5596762004679109, "learning_rate": 4.663548665521881e-06, "loss": 0.5948, "step": 5392 }, { "epoch": 0.69, "grad_norm": 0.5492112175017351, "learning_rate": 4.660050015818774e-06, "loss": 0.6097, "step": 5393 }, { "epoch": 0.69, "grad_norm": 0.6083759304213352, "learning_rate": 4.656552280250233e-06, "loss": 0.6004, "step": 5394 }, { "epoch": 0.69, "grad_norm": 0.5457550388275316, "learning_rate": 4.653055459415018e-06, "loss": 0.618, "step": 5395 }, { "epoch": 0.69, "grad_norm": 0.532703445511728, "learning_rate": 4.649559553911748e-06, "loss": 0.6197, "step": 5396 }, { "epoch": 0.69, "grad_norm": 0.45854749757556446, "learning_rate": 4.646064564338877e-06, "loss": 0.5973, "step": 5397 }, { "epoch": 0.69, "grad_norm": 0.4634811483031097, "learning_rate": 4.642570491294703e-06, "loss": 0.6019, "step": 5398 }, { "epoch": 0.69, "grad_norm": 0.4922532966978632, "learning_rate": 4.6390773353773776e-06, "loss": 0.5689, "step": 5399 }, { "epoch": 0.69, "grad_norm": 0.5462452735007983, "learning_rate": 4.635585097184879e-06, "loss": 0.6076, "step": 5400 }, { "epoch": 0.69, "grad_norm": 0.5524269590284677, "learning_rate": 4.63209377731504e-06, "loss": 0.6155, "step": 5401 }, { "epoch": 0.69, "grad_norm": 0.49341163361389334, "learning_rate": 4.628603376365529e-06, "loss": 0.5875, "step": 5402 }, { "epoch": 0.69, "grad_norm": 0.5077795358273414, "learning_rate": 4.6251138949338655e-06, "loss": 0.5794, "step": 5403 }, { "epoch": 0.69, "grad_norm": 0.5791982033707064, "learning_rate": 4.621625333617399e-06, "loss": 0.591, "step": 5404 }, { "epoch": 0.69, "grad_norm": 0.4952216645413217, "learning_rate": 4.618137693013328e-06, "loss": 0.6094, "step": 5405 }, { "epoch": 0.69, "grad_norm": 0.47352660557132864, "learning_rate": 4.614650973718703e-06, "loss": 0.5905, "step": 5406 }, { "epoch": 0.69, "grad_norm": 0.5229512161301975, "learning_rate": 4.611165176330402e-06, "loss": 0.5907, "step": 5407 }, { "epoch": 0.69, "grad_norm": 0.4605836584799615, "learning_rate": 4.607680301445154e-06, "loss": 0.5729, "step": 5408 }, { "epoch": 0.69, "grad_norm": 0.5492700827410355, "learning_rate": 4.604196349659525e-06, "loss": 0.5939, "step": 5409 }, { "epoch": 0.69, "grad_norm": 0.5303355800336148, "learning_rate": 4.600713321569926e-06, "loss": 0.5751, "step": 5410 }, { "epoch": 0.69, "grad_norm": 0.5561325243378851, "learning_rate": 4.597231217772608e-06, "loss": 0.6382, "step": 5411 }, { "epoch": 0.69, "grad_norm": 0.5180041450094032, "learning_rate": 4.593750038863671e-06, "loss": 0.6293, "step": 5412 }, { "epoch": 0.69, "grad_norm": 0.5274361345781468, "learning_rate": 4.590269785439039e-06, "loss": 0.599, "step": 5413 }, { "epoch": 0.69, "grad_norm": 0.5343083289709686, "learning_rate": 4.586790458094498e-06, "loss": 0.5853, "step": 5414 }, { "epoch": 0.69, "grad_norm": 0.4633461064706324, "learning_rate": 4.583312057425664e-06, "loss": 0.5784, "step": 5415 }, { "epoch": 0.69, "grad_norm": 0.5144873629652843, "learning_rate": 4.579834584027997e-06, "loss": 0.6108, "step": 5416 }, { "epoch": 0.69, "grad_norm": 0.5592098920568539, "learning_rate": 4.576358038496799e-06, "loss": 0.6226, "step": 5417 }, { "epoch": 0.69, "grad_norm": 0.43666622588817067, "learning_rate": 4.572882421427211e-06, "loss": 0.559, "step": 5418 }, { "epoch": 0.69, "grad_norm": 0.4946492815000949, "learning_rate": 4.5694077334142176e-06, "loss": 0.612, "step": 5419 }, { "epoch": 0.69, "grad_norm": 0.4608273165654016, "learning_rate": 4.565933975052642e-06, "loss": 0.5897, "step": 5420 }, { "epoch": 0.69, "grad_norm": 0.5271404139678089, "learning_rate": 4.562461146937151e-06, "loss": 0.5984, "step": 5421 }, { "epoch": 0.69, "grad_norm": 0.507079717191861, "learning_rate": 4.558989249662248e-06, "loss": 0.6067, "step": 5422 }, { "epoch": 0.69, "grad_norm": 0.5309130017591651, "learning_rate": 4.555518283822282e-06, "loss": 0.6077, "step": 5423 }, { "epoch": 0.69, "grad_norm": 0.4829569645183211, "learning_rate": 4.552048250011439e-06, "loss": 0.5998, "step": 5424 }, { "epoch": 0.69, "grad_norm": 0.45008383538590696, "learning_rate": 4.548579148823748e-06, "loss": 0.6062, "step": 5425 }, { "epoch": 0.69, "grad_norm": 0.5197343125616841, "learning_rate": 4.545110980853077e-06, "loss": 0.5984, "step": 5426 }, { "epoch": 0.69, "grad_norm": 0.5037267261853624, "learning_rate": 4.541643746693132e-06, "loss": 0.5841, "step": 5427 }, { "epoch": 0.69, "grad_norm": 0.4752125679880441, "learning_rate": 4.538177446937462e-06, "loss": 0.5896, "step": 5428 }, { "epoch": 0.69, "grad_norm": 0.4595449829363658, "learning_rate": 4.534712082179456e-06, "loss": 0.5852, "step": 5429 }, { "epoch": 0.69, "grad_norm": 0.48049723815762824, "learning_rate": 4.531247653012344e-06, "loss": 0.5787, "step": 5430 }, { "epoch": 0.69, "grad_norm": 0.5105184794224702, "learning_rate": 4.5277841600291915e-06, "loss": 0.6113, "step": 5431 }, { "epoch": 0.69, "grad_norm": 0.49418823870066586, "learning_rate": 4.5243216038229075e-06, "loss": 0.6141, "step": 5432 }, { "epoch": 0.69, "grad_norm": 0.4893911321423946, "learning_rate": 4.52085998498624e-06, "loss": 0.5791, "step": 5433 }, { "epoch": 0.69, "grad_norm": 0.5701038651537822, "learning_rate": 4.517399304111773e-06, "loss": 0.667, "step": 5434 }, { "epoch": 0.69, "grad_norm": 0.541454291002997, "learning_rate": 4.513939561791937e-06, "loss": 0.5804, "step": 5435 }, { "epoch": 0.69, "grad_norm": 0.5015647903586765, "learning_rate": 4.510480758619002e-06, "loss": 0.5782, "step": 5436 }, { "epoch": 0.69, "grad_norm": 0.5105568226293561, "learning_rate": 4.507022895185062e-06, "loss": 0.5852, "step": 5437 }, { "epoch": 0.69, "grad_norm": 0.5334569075435962, "learning_rate": 4.503565972082069e-06, "loss": 0.5951, "step": 5438 }, { "epoch": 0.69, "grad_norm": 0.49051363414112714, "learning_rate": 4.500109989901802e-06, "loss": 0.5872, "step": 5439 }, { "epoch": 0.69, "grad_norm": 0.5477969529285225, "learning_rate": 4.496654949235887e-06, "loss": 0.6531, "step": 5440 }, { "epoch": 0.7, "grad_norm": 0.46489449474222827, "learning_rate": 4.493200850675779e-06, "loss": 0.6157, "step": 5441 }, { "epoch": 0.7, "grad_norm": 0.5586823288749333, "learning_rate": 4.489747694812785e-06, "loss": 0.6146, "step": 5442 }, { "epoch": 0.7, "grad_norm": 0.477348122642158, "learning_rate": 4.48629548223804e-06, "loss": 0.5794, "step": 5443 }, { "epoch": 0.7, "grad_norm": 0.4815442111404748, "learning_rate": 4.482844213542522e-06, "loss": 0.5849, "step": 5444 }, { "epoch": 0.7, "grad_norm": 0.4950427772010186, "learning_rate": 4.479393889317049e-06, "loss": 0.5904, "step": 5445 }, { "epoch": 0.7, "grad_norm": 0.42706144840207944, "learning_rate": 4.475944510152266e-06, "loss": 0.5531, "step": 5446 }, { "epoch": 0.7, "grad_norm": 0.5003616456069603, "learning_rate": 4.47249607663867e-06, "loss": 0.5928, "step": 5447 }, { "epoch": 0.7, "grad_norm": 0.5431308356254717, "learning_rate": 4.469048589366588e-06, "loss": 0.6296, "step": 5448 }, { "epoch": 0.7, "grad_norm": 0.4691425339416381, "learning_rate": 4.465602048926194e-06, "loss": 0.5844, "step": 5449 }, { "epoch": 0.7, "grad_norm": 0.5252378724392343, "learning_rate": 4.46215645590749e-06, "loss": 0.6017, "step": 5450 }, { "epoch": 0.7, "grad_norm": 0.5008592831752523, "learning_rate": 4.458711810900321e-06, "loss": 0.6, "step": 5451 }, { "epoch": 0.7, "grad_norm": 0.5427423200861552, "learning_rate": 4.455268114494366e-06, "loss": 0.596, "step": 5452 }, { "epoch": 0.7, "grad_norm": 0.46244589448907847, "learning_rate": 4.451825367279147e-06, "loss": 0.5767, "step": 5453 }, { "epoch": 0.7, "grad_norm": 0.5230504336231843, "learning_rate": 4.448383569844022e-06, "loss": 0.5916, "step": 5454 }, { "epoch": 0.7, "grad_norm": 0.494988421960294, "learning_rate": 4.444942722778175e-06, "loss": 0.6135, "step": 5455 }, { "epoch": 0.7, "grad_norm": 0.46474468100222677, "learning_rate": 4.441502826670647e-06, "loss": 0.5601, "step": 5456 }, { "epoch": 0.7, "grad_norm": 0.5025590393095483, "learning_rate": 4.438063882110304e-06, "loss": 0.5996, "step": 5457 }, { "epoch": 0.7, "grad_norm": 0.5814658107474905, "learning_rate": 4.434625889685851e-06, "loss": 0.6373, "step": 5458 }, { "epoch": 0.7, "grad_norm": 0.5199660163778567, "learning_rate": 4.43118884998583e-06, "loss": 0.6047, "step": 5459 }, { "epoch": 0.7, "grad_norm": 0.6058386259596327, "learning_rate": 4.427752763598622e-06, "loss": 0.6244, "step": 5460 }, { "epoch": 0.7, "grad_norm": 0.4544568133556691, "learning_rate": 4.424317631112441e-06, "loss": 0.5759, "step": 5461 }, { "epoch": 0.7, "grad_norm": 0.47746349568384516, "learning_rate": 4.4208834531153425e-06, "loss": 0.5772, "step": 5462 }, { "epoch": 0.7, "grad_norm": 0.4929547853621322, "learning_rate": 4.417450230195214e-06, "loss": 0.6183, "step": 5463 }, { "epoch": 0.7, "grad_norm": 0.44612828059664905, "learning_rate": 4.414017962939783e-06, "loss": 0.5724, "step": 5464 }, { "epoch": 0.7, "grad_norm": 0.5222369420218516, "learning_rate": 4.410586651936611e-06, "loss": 0.5858, "step": 5465 }, { "epoch": 0.7, "grad_norm": 0.4970756698431107, "learning_rate": 4.4071562977730955e-06, "loss": 0.5943, "step": 5466 }, { "epoch": 0.7, "grad_norm": 0.5144989793441072, "learning_rate": 4.403726901036475e-06, "loss": 0.5954, "step": 5467 }, { "epoch": 0.7, "grad_norm": 0.4924858778740382, "learning_rate": 4.400298462313817e-06, "loss": 0.6073, "step": 5468 }, { "epoch": 0.7, "grad_norm": 0.5064158350100717, "learning_rate": 4.396870982192031e-06, "loss": 0.5816, "step": 5469 }, { "epoch": 0.7, "grad_norm": 1.147666399550127, "learning_rate": 4.393444461257858e-06, "loss": 0.5883, "step": 5470 }, { "epoch": 0.7, "grad_norm": 0.5433235981391867, "learning_rate": 4.39001890009788e-06, "loss": 0.6059, "step": 5471 }, { "epoch": 0.7, "grad_norm": 0.5469258922910156, "learning_rate": 4.386594299298507e-06, "loss": 0.6414, "step": 5472 }, { "epoch": 0.7, "grad_norm": 0.47784817908998906, "learning_rate": 4.383170659445992e-06, "loss": 0.6008, "step": 5473 }, { "epoch": 0.7, "grad_norm": 0.4639772665487003, "learning_rate": 4.3797479811264206e-06, "loss": 0.5633, "step": 5474 }, { "epoch": 0.7, "grad_norm": 0.564255504509194, "learning_rate": 4.3763262649257125e-06, "loss": 0.631, "step": 5475 }, { "epoch": 0.7, "grad_norm": 0.5260829603224575, "learning_rate": 4.372905511429625e-06, "loss": 0.6249, "step": 5476 }, { "epoch": 0.7, "grad_norm": 0.4772388933975033, "learning_rate": 4.369485721223744e-06, "loss": 0.5774, "step": 5477 }, { "epoch": 0.7, "grad_norm": 0.5720519550209457, "learning_rate": 4.36606689489351e-06, "loss": 0.6089, "step": 5478 }, { "epoch": 0.7, "grad_norm": 0.5009926406161592, "learning_rate": 4.36264903302417e-06, "loss": 0.571, "step": 5479 }, { "epoch": 0.7, "grad_norm": 0.4632153986871332, "learning_rate": 4.3592321362008275e-06, "loss": 0.5595, "step": 5480 }, { "epoch": 0.7, "grad_norm": 0.5216194303078904, "learning_rate": 4.355816205008411e-06, "loss": 0.5828, "step": 5481 }, { "epoch": 0.7, "grad_norm": 0.5389920290826157, "learning_rate": 4.352401240031689e-06, "loss": 0.6313, "step": 5482 }, { "epoch": 0.7, "grad_norm": 0.4832132715163692, "learning_rate": 4.348987241855259e-06, "loss": 0.5776, "step": 5483 }, { "epoch": 0.7, "grad_norm": 0.437334119997498, "learning_rate": 4.345574211063555e-06, "loss": 0.5792, "step": 5484 }, { "epoch": 0.7, "grad_norm": 0.5272140625259849, "learning_rate": 4.342162148240853e-06, "loss": 0.5963, "step": 5485 }, { "epoch": 0.7, "grad_norm": 0.5224943612267655, "learning_rate": 4.338751053971252e-06, "loss": 0.6383, "step": 5486 }, { "epoch": 0.7, "grad_norm": 0.5087570646923434, "learning_rate": 4.335340928838694e-06, "loss": 0.5578, "step": 5487 }, { "epoch": 0.7, "grad_norm": 0.46685378820514345, "learning_rate": 4.331931773426945e-06, "loss": 0.5848, "step": 5488 }, { "epoch": 0.7, "grad_norm": 0.571813834759201, "learning_rate": 4.328523588319613e-06, "loss": 0.6179, "step": 5489 }, { "epoch": 0.7, "grad_norm": 0.5198938411951196, "learning_rate": 4.325116374100138e-06, "loss": 0.6079, "step": 5490 }, { "epoch": 0.7, "grad_norm": 0.46040196144206297, "learning_rate": 4.3217101313517915e-06, "loss": 0.555, "step": 5491 }, { "epoch": 0.7, "grad_norm": 0.4684280267591219, "learning_rate": 4.318304860657687e-06, "loss": 0.5901, "step": 5492 }, { "epoch": 0.7, "grad_norm": 0.49240017339984243, "learning_rate": 4.3149005626007625e-06, "loss": 0.6132, "step": 5493 }, { "epoch": 0.7, "grad_norm": 0.4593307808525848, "learning_rate": 4.311497237763791e-06, "loss": 0.571, "step": 5494 }, { "epoch": 0.7, "grad_norm": 0.6226235057420106, "learning_rate": 4.308094886729385e-06, "loss": 0.6719, "step": 5495 }, { "epoch": 0.7, "grad_norm": 0.43603529042532424, "learning_rate": 4.304693510079979e-06, "loss": 0.5616, "step": 5496 }, { "epoch": 0.7, "grad_norm": 0.5088258931643983, "learning_rate": 4.30129310839785e-06, "loss": 0.6056, "step": 5497 }, { "epoch": 0.7, "grad_norm": 0.4808139995734045, "learning_rate": 4.2978936822651015e-06, "loss": 0.6027, "step": 5498 }, { "epoch": 0.7, "grad_norm": 0.5994048977829501, "learning_rate": 4.294495232263684e-06, "loss": 0.6186, "step": 5499 }, { "epoch": 0.7, "grad_norm": 0.5209963226218516, "learning_rate": 4.2910977589753635e-06, "loss": 0.6037, "step": 5500 }, { "epoch": 0.7, "grad_norm": 0.5905456667984507, "learning_rate": 4.287701262981749e-06, "loss": 0.6284, "step": 5501 }, { "epoch": 0.7, "grad_norm": 0.5595711688397326, "learning_rate": 4.284305744864276e-06, "loss": 0.6212, "step": 5502 }, { "epoch": 0.7, "grad_norm": 0.5001568979693553, "learning_rate": 4.28091120520422e-06, "loss": 0.5835, "step": 5503 }, { "epoch": 0.7, "grad_norm": 0.5170003805425597, "learning_rate": 4.277517644582686e-06, "loss": 0.6163, "step": 5504 }, { "epoch": 0.7, "grad_norm": 0.551057694179567, "learning_rate": 4.274125063580601e-06, "loss": 0.5683, "step": 5505 }, { "epoch": 0.7, "grad_norm": 0.5077721034862273, "learning_rate": 4.270733462778742e-06, "loss": 0.5734, "step": 5506 }, { "epoch": 0.7, "grad_norm": 0.47924505256621225, "learning_rate": 4.267342842757709e-06, "loss": 0.5781, "step": 5507 }, { "epoch": 0.7, "grad_norm": 0.5821281987128822, "learning_rate": 4.263953204097933e-06, "loss": 0.6127, "step": 5508 }, { "epoch": 0.7, "grad_norm": 0.48349176914377084, "learning_rate": 4.2605645473796806e-06, "loss": 0.5727, "step": 5509 }, { "epoch": 0.7, "grad_norm": 0.5593306601333713, "learning_rate": 4.257176873183049e-06, "loss": 0.6301, "step": 5510 }, { "epoch": 0.7, "grad_norm": 0.5131985831195008, "learning_rate": 4.2537901820879635e-06, "loss": 0.6013, "step": 5511 }, { "epoch": 0.7, "grad_norm": 0.5194003169837965, "learning_rate": 4.250404474674188e-06, "loss": 0.5798, "step": 5512 }, { "epoch": 0.7, "grad_norm": 0.4857664709826061, "learning_rate": 4.247019751521313e-06, "loss": 0.5621, "step": 5513 }, { "epoch": 0.7, "grad_norm": 0.5556605003015662, "learning_rate": 4.243636013208762e-06, "loss": 0.6049, "step": 5514 }, { "epoch": 0.7, "grad_norm": 0.5685576854418263, "learning_rate": 4.24025326031579e-06, "loss": 0.65, "step": 5515 }, { "epoch": 0.7, "grad_norm": 0.5091555697393998, "learning_rate": 4.236871493421483e-06, "loss": 0.6038, "step": 5516 }, { "epoch": 0.7, "grad_norm": 0.4928720076701853, "learning_rate": 4.233490713104758e-06, "loss": 0.5771, "step": 5517 }, { "epoch": 0.7, "grad_norm": 0.5523701283333468, "learning_rate": 4.230110919944365e-06, "loss": 0.6127, "step": 5518 }, { "epoch": 0.71, "grad_norm": 0.4718185010884205, "learning_rate": 4.226732114518883e-06, "loss": 0.6024, "step": 5519 }, { "epoch": 0.71, "grad_norm": 0.4459840422620082, "learning_rate": 4.223354297406722e-06, "loss": 0.5738, "step": 5520 }, { "epoch": 0.71, "grad_norm": 0.49952114855655844, "learning_rate": 4.219977469186124e-06, "loss": 0.6049, "step": 5521 }, { "epoch": 0.71, "grad_norm": 0.5216099966143741, "learning_rate": 4.2166016304351596e-06, "loss": 0.5991, "step": 5522 }, { "epoch": 0.71, "grad_norm": 0.4979181211484655, "learning_rate": 4.213226781731732e-06, "loss": 0.6054, "step": 5523 }, { "epoch": 0.71, "grad_norm": 0.48558070563701505, "learning_rate": 4.2098529236535746e-06, "loss": 0.5755, "step": 5524 }, { "epoch": 0.71, "grad_norm": 0.5074052663221953, "learning_rate": 4.2064800567782504e-06, "loss": 0.5763, "step": 5525 }, { "epoch": 0.71, "grad_norm": 0.4422769644636347, "learning_rate": 4.203108181683149e-06, "loss": 0.573, "step": 5526 }, { "epoch": 0.71, "grad_norm": 0.4878235753741712, "learning_rate": 4.199737298945506e-06, "loss": 0.5684, "step": 5527 }, { "epoch": 0.71, "grad_norm": 0.5032445209210109, "learning_rate": 4.196367409142369e-06, "loss": 0.6266, "step": 5528 }, { "epoch": 0.71, "grad_norm": 0.5606872249901682, "learning_rate": 4.192998512850619e-06, "loss": 0.6487, "step": 5529 }, { "epoch": 0.71, "grad_norm": 0.5285518614509501, "learning_rate": 4.1896306106469735e-06, "loss": 0.6436, "step": 5530 }, { "epoch": 0.71, "grad_norm": 0.45931043671095145, "learning_rate": 4.186263703107976e-06, "loss": 0.5946, "step": 5531 }, { "epoch": 0.71, "grad_norm": 0.5026612857863616, "learning_rate": 4.182897790809999e-06, "loss": 0.6227, "step": 5532 }, { "epoch": 0.71, "grad_norm": 0.5024978190035571, "learning_rate": 4.179532874329248e-06, "loss": 0.5929, "step": 5533 }, { "epoch": 0.71, "grad_norm": 0.543652466615378, "learning_rate": 4.17616895424175e-06, "loss": 0.6132, "step": 5534 }, { "epoch": 0.71, "grad_norm": 0.5655211988783327, "learning_rate": 4.172806031123375e-06, "loss": 0.6191, "step": 5535 }, { "epoch": 0.71, "grad_norm": 0.5202866239040067, "learning_rate": 4.169444105549812e-06, "loss": 0.5986, "step": 5536 }, { "epoch": 0.71, "grad_norm": 0.5635952756613603, "learning_rate": 4.166083178096585e-06, "loss": 0.6428, "step": 5537 }, { "epoch": 0.71, "grad_norm": 0.4429151006417516, "learning_rate": 4.162723249339037e-06, "loss": 0.5753, "step": 5538 }, { "epoch": 0.71, "grad_norm": 0.4919281899837113, "learning_rate": 4.159364319852349e-06, "loss": 0.5706, "step": 5539 }, { "epoch": 0.71, "grad_norm": 0.501146629972671, "learning_rate": 4.156006390211532e-06, "loss": 0.6209, "step": 5540 }, { "epoch": 0.71, "grad_norm": 0.5016894795093108, "learning_rate": 4.152649460991416e-06, "loss": 0.6276, "step": 5541 }, { "epoch": 0.71, "grad_norm": 0.46118105877704296, "learning_rate": 4.149293532766675e-06, "loss": 0.5806, "step": 5542 }, { "epoch": 0.71, "grad_norm": 0.47471259440074026, "learning_rate": 4.1459386061118e-06, "loss": 0.5636, "step": 5543 }, { "epoch": 0.71, "grad_norm": 0.47909235690083884, "learning_rate": 4.142584681601113e-06, "loss": 0.5989, "step": 5544 }, { "epoch": 0.71, "grad_norm": 0.506148894876906, "learning_rate": 4.139231759808765e-06, "loss": 0.6001, "step": 5545 }, { "epoch": 0.71, "grad_norm": 0.5314975665938003, "learning_rate": 4.13587984130874e-06, "loss": 0.5868, "step": 5546 }, { "epoch": 0.71, "grad_norm": 0.5410502334662844, "learning_rate": 4.132528926674837e-06, "loss": 0.6148, "step": 5547 }, { "epoch": 0.71, "grad_norm": 0.5535891706866293, "learning_rate": 4.129179016480693e-06, "loss": 0.6485, "step": 5548 }, { "epoch": 0.71, "grad_norm": 0.49963087961618247, "learning_rate": 4.1258301112997785e-06, "loss": 0.5716, "step": 5549 }, { "epoch": 0.71, "grad_norm": 0.5277504046616877, "learning_rate": 4.1224822117053835e-06, "loss": 0.5928, "step": 5550 }, { "epoch": 0.71, "grad_norm": 0.5370616275007932, "learning_rate": 4.119135318270626e-06, "loss": 0.5941, "step": 5551 }, { "epoch": 0.71, "grad_norm": 0.4980786793135067, "learning_rate": 4.115789431568453e-06, "loss": 0.5837, "step": 5552 }, { "epoch": 0.71, "grad_norm": 0.5018141810856317, "learning_rate": 4.1124445521716415e-06, "loss": 0.568, "step": 5553 }, { "epoch": 0.71, "grad_norm": 0.4988127753362254, "learning_rate": 4.109100680652793e-06, "loss": 0.6291, "step": 5554 }, { "epoch": 0.71, "grad_norm": 0.4800729963880009, "learning_rate": 4.105757817584338e-06, "loss": 0.5648, "step": 5555 }, { "epoch": 0.71, "grad_norm": 0.4892114632425265, "learning_rate": 4.102415963538534e-06, "loss": 0.5702, "step": 5556 }, { "epoch": 0.71, "grad_norm": 0.5469824290551712, "learning_rate": 4.099075119087467e-06, "loss": 0.6024, "step": 5557 }, { "epoch": 0.71, "grad_norm": 0.5436850026459698, "learning_rate": 4.095735284803047e-06, "loss": 0.6143, "step": 5558 }, { "epoch": 0.71, "grad_norm": 0.5397594038724004, "learning_rate": 4.092396461257013e-06, "loss": 0.5835, "step": 5559 }, { "epoch": 0.71, "grad_norm": 0.5119626901691273, "learning_rate": 4.0890586490209325e-06, "loss": 0.603, "step": 5560 }, { "epoch": 0.71, "grad_norm": 0.48838074600044346, "learning_rate": 4.0857218486661975e-06, "loss": 0.5613, "step": 5561 }, { "epoch": 0.71, "grad_norm": 0.5270448507020776, "learning_rate": 4.082386060764029e-06, "loss": 0.5991, "step": 5562 }, { "epoch": 0.71, "grad_norm": 0.5156372994669871, "learning_rate": 4.079051285885471e-06, "loss": 0.589, "step": 5563 }, { "epoch": 0.71, "grad_norm": 0.5056680905719104, "learning_rate": 4.075717524601398e-06, "loss": 0.6113, "step": 5564 }, { "epoch": 0.71, "grad_norm": 0.5071456070247068, "learning_rate": 4.07238477748251e-06, "loss": 0.5921, "step": 5565 }, { "epoch": 0.71, "grad_norm": 0.4731260028460402, "learning_rate": 4.069053045099332e-06, "loss": 0.5958, "step": 5566 }, { "epoch": 0.71, "grad_norm": 0.5158102866005974, "learning_rate": 4.065722328022215e-06, "loss": 0.6052, "step": 5567 }, { "epoch": 0.71, "grad_norm": 0.4791528177347545, "learning_rate": 4.062392626821339e-06, "loss": 0.5575, "step": 5568 }, { "epoch": 0.71, "grad_norm": 0.541534107786303, "learning_rate": 4.059063942066704e-06, "loss": 0.5932, "step": 5569 }, { "epoch": 0.71, "grad_norm": 0.6133319174860213, "learning_rate": 4.0557362743281525e-06, "loss": 0.6421, "step": 5570 }, { "epoch": 0.71, "grad_norm": 0.4634217039565455, "learning_rate": 4.052409624175329e-06, "loss": 0.5804, "step": 5571 }, { "epoch": 0.71, "grad_norm": 0.521701191572865, "learning_rate": 4.049083992177717e-06, "loss": 0.5848, "step": 5572 }, { "epoch": 0.71, "grad_norm": 0.5870550869412406, "learning_rate": 4.045759378904628e-06, "loss": 0.6311, "step": 5573 }, { "epoch": 0.71, "grad_norm": 0.4643903298974422, "learning_rate": 4.042435784925194e-06, "loss": 0.594, "step": 5574 }, { "epoch": 0.71, "grad_norm": 0.48518587075393765, "learning_rate": 4.039113210808373e-06, "loss": 0.5785, "step": 5575 }, { "epoch": 0.71, "grad_norm": 0.5513729186643118, "learning_rate": 4.035791657122946e-06, "loss": 0.6461, "step": 5576 }, { "epoch": 0.71, "grad_norm": 0.5423538702780386, "learning_rate": 4.032471124437531e-06, "loss": 0.6471, "step": 5577 }, { "epoch": 0.71, "grad_norm": 0.4875458598810434, "learning_rate": 4.029151613320558e-06, "loss": 0.6006, "step": 5578 }, { "epoch": 0.71, "grad_norm": 0.5145473815920213, "learning_rate": 4.025833124340291e-06, "loss": 0.6038, "step": 5579 }, { "epoch": 0.71, "grad_norm": 0.5035638028418303, "learning_rate": 4.022515658064808e-06, "loss": 0.5754, "step": 5580 }, { "epoch": 0.71, "grad_norm": 0.5275196449977497, "learning_rate": 4.019199215062023e-06, "loss": 0.6245, "step": 5581 }, { "epoch": 0.71, "grad_norm": 0.4785065528550983, "learning_rate": 4.01588379589967e-06, "loss": 0.5775, "step": 5582 }, { "epoch": 0.71, "grad_norm": 0.4729592564649905, "learning_rate": 4.012569401145306e-06, "loss": 0.5791, "step": 5583 }, { "epoch": 0.71, "grad_norm": 0.5246085254804016, "learning_rate": 4.00925603136632e-06, "loss": 0.6103, "step": 5584 }, { "epoch": 0.71, "grad_norm": 0.5206784076997043, "learning_rate": 4.005943687129918e-06, "loss": 0.5807, "step": 5585 }, { "epoch": 0.71, "grad_norm": 0.4739375829915717, "learning_rate": 4.0026323690031345e-06, "loss": 0.5959, "step": 5586 }, { "epoch": 0.71, "grad_norm": 0.5831942134409506, "learning_rate": 3.999322077552825e-06, "loss": 0.6392, "step": 5587 }, { "epoch": 0.71, "grad_norm": 0.5033460108307327, "learning_rate": 3.996012813345675e-06, "loss": 0.5913, "step": 5588 }, { "epoch": 0.71, "grad_norm": 0.508592904485161, "learning_rate": 3.992704576948185e-06, "loss": 0.6043, "step": 5589 }, { "epoch": 0.71, "grad_norm": 0.5387227335249782, "learning_rate": 3.9893973689266854e-06, "loss": 0.5853, "step": 5590 }, { "epoch": 0.71, "grad_norm": 0.4454230002806385, "learning_rate": 3.986091189847333e-06, "loss": 0.566, "step": 5591 }, { "epoch": 0.71, "grad_norm": 0.4797613828655146, "learning_rate": 3.982786040276105e-06, "loss": 0.5851, "step": 5592 }, { "epoch": 0.71, "grad_norm": 0.5283591856505238, "learning_rate": 3.979481920778803e-06, "loss": 0.5968, "step": 5593 }, { "epoch": 0.71, "grad_norm": 0.44312595888402756, "learning_rate": 3.97617883192105e-06, "loss": 0.569, "step": 5594 }, { "epoch": 0.71, "grad_norm": 0.5815404350530666, "learning_rate": 3.972876774268297e-06, "loss": 0.6305, "step": 5595 }, { "epoch": 0.71, "grad_norm": 0.46287235347327677, "learning_rate": 3.969575748385816e-06, "loss": 0.5867, "step": 5596 }, { "epoch": 0.71, "grad_norm": 0.47020562130212623, "learning_rate": 3.9662757548387045e-06, "loss": 0.5892, "step": 5597 }, { "epoch": 0.72, "grad_norm": 0.5214661053341556, "learning_rate": 3.962976794191872e-06, "loss": 0.594, "step": 5598 }, { "epoch": 0.72, "grad_norm": 0.547416550370643, "learning_rate": 3.95967886701007e-06, "loss": 0.6061, "step": 5599 }, { "epoch": 0.72, "grad_norm": 0.5761420950496446, "learning_rate": 3.95638197385786e-06, "loss": 0.6437, "step": 5600 }, { "epoch": 0.72, "grad_norm": 0.5219285273448426, "learning_rate": 3.953086115299631e-06, "loss": 0.6229, "step": 5601 }, { "epoch": 0.72, "grad_norm": 0.45139105493519877, "learning_rate": 3.949791291899594e-06, "loss": 0.5805, "step": 5602 }, { "epoch": 0.72, "grad_norm": 0.4489095415571454, "learning_rate": 3.9464975042217825e-06, "loss": 0.5844, "step": 5603 }, { "epoch": 0.72, "grad_norm": 0.45256716563789956, "learning_rate": 3.943204752830052e-06, "loss": 0.6008, "step": 5604 }, { "epoch": 0.72, "grad_norm": 0.5153555529053762, "learning_rate": 3.939913038288084e-06, "loss": 0.5988, "step": 5605 }, { "epoch": 0.72, "grad_norm": 0.5357172960565256, "learning_rate": 3.936622361159377e-06, "loss": 0.6232, "step": 5606 }, { "epoch": 0.72, "grad_norm": 0.46967486843007855, "learning_rate": 3.933332722007256e-06, "loss": 0.5763, "step": 5607 }, { "epoch": 0.72, "grad_norm": 0.5519326918583112, "learning_rate": 3.930044121394868e-06, "loss": 0.5924, "step": 5608 }, { "epoch": 0.72, "grad_norm": 0.46768239651371524, "learning_rate": 3.926756559885181e-06, "loss": 0.5382, "step": 5609 }, { "epoch": 0.72, "grad_norm": 0.5743223794832651, "learning_rate": 3.923470038040986e-06, "loss": 0.5706, "step": 5610 }, { "epoch": 0.72, "grad_norm": 0.4624111021363413, "learning_rate": 3.920184556424894e-06, "loss": 0.5774, "step": 5611 }, { "epoch": 0.72, "grad_norm": 0.47365138199237783, "learning_rate": 3.916900115599342e-06, "loss": 0.6017, "step": 5612 }, { "epoch": 0.72, "grad_norm": 0.6407482940681762, "learning_rate": 3.913616716126585e-06, "loss": 0.7142, "step": 5613 }, { "epoch": 0.72, "grad_norm": 0.47986333434897077, "learning_rate": 3.910334358568701e-06, "loss": 0.5814, "step": 5614 }, { "epoch": 0.72, "grad_norm": 0.5604961061320644, "learning_rate": 3.90705304348759e-06, "loss": 0.6543, "step": 5615 }, { "epoch": 0.72, "grad_norm": 0.4822541196624783, "learning_rate": 3.903772771444972e-06, "loss": 0.5844, "step": 5616 }, { "epoch": 0.72, "grad_norm": 0.5468291899788394, "learning_rate": 3.900493543002393e-06, "loss": 0.586, "step": 5617 }, { "epoch": 0.72, "grad_norm": 0.5332694253511929, "learning_rate": 3.8972153587212136e-06, "loss": 0.6609, "step": 5618 }, { "epoch": 0.72, "grad_norm": 0.49319357102671374, "learning_rate": 3.893938219162618e-06, "loss": 0.5803, "step": 5619 }, { "epoch": 0.72, "grad_norm": 0.5454066917573812, "learning_rate": 3.890662124887619e-06, "loss": 0.64, "step": 5620 }, { "epoch": 0.72, "grad_norm": 0.533880448549957, "learning_rate": 3.887387076457042e-06, "loss": 0.5969, "step": 5621 }, { "epoch": 0.72, "grad_norm": 0.5062574412301062, "learning_rate": 3.8841130744315315e-06, "loss": 0.5871, "step": 5622 }, { "epoch": 0.72, "grad_norm": 0.5612910784091125, "learning_rate": 3.880840119371559e-06, "loss": 0.6014, "step": 5623 }, { "epoch": 0.72, "grad_norm": 0.4962142505385829, "learning_rate": 3.877568211837416e-06, "loss": 0.5767, "step": 5624 }, { "epoch": 0.72, "grad_norm": 0.5176652428587546, "learning_rate": 3.87429735238921e-06, "loss": 0.6265, "step": 5625 }, { "epoch": 0.72, "grad_norm": 0.47066021161666766, "learning_rate": 3.8710275415868724e-06, "loss": 0.5855, "step": 5626 }, { "epoch": 0.72, "grad_norm": 0.5346632830414604, "learning_rate": 3.867758779990161e-06, "loss": 0.6035, "step": 5627 }, { "epoch": 0.72, "grad_norm": 0.49814577793046544, "learning_rate": 3.864491068158644e-06, "loss": 0.6082, "step": 5628 }, { "epoch": 0.72, "grad_norm": 0.5303352736242054, "learning_rate": 3.861224406651715e-06, "loss": 0.5837, "step": 5629 }, { "epoch": 0.72, "grad_norm": 0.5141430024717639, "learning_rate": 3.857958796028589e-06, "loss": 0.5946, "step": 5630 }, { "epoch": 0.72, "grad_norm": 0.578059139806989, "learning_rate": 3.854694236848292e-06, "loss": 0.634, "step": 5631 }, { "epoch": 0.72, "grad_norm": 0.49635239753105875, "learning_rate": 3.85143072966968e-06, "loss": 0.6074, "step": 5632 }, { "epoch": 0.72, "grad_norm": 0.4642516171534901, "learning_rate": 3.848168275051425e-06, "loss": 0.5714, "step": 5633 }, { "epoch": 0.72, "grad_norm": 0.5315539697500226, "learning_rate": 3.844906873552024e-06, "loss": 0.5912, "step": 5634 }, { "epoch": 0.72, "grad_norm": 0.4637466098799487, "learning_rate": 3.8416465257297865e-06, "loss": 0.5822, "step": 5635 }, { "epoch": 0.72, "grad_norm": 0.5027851800779976, "learning_rate": 3.838387232142845e-06, "loss": 0.5926, "step": 5636 }, { "epoch": 0.72, "grad_norm": 0.5708871313328701, "learning_rate": 3.83512899334915e-06, "loss": 0.608, "step": 5637 }, { "epoch": 0.72, "grad_norm": 0.49403187343463734, "learning_rate": 3.831871809906472e-06, "loss": 0.5719, "step": 5638 }, { "epoch": 0.72, "grad_norm": 0.5143665369809736, "learning_rate": 3.8286156823724065e-06, "loss": 0.6133, "step": 5639 }, { "epoch": 0.72, "grad_norm": 0.49156034766751294, "learning_rate": 3.825360611304351e-06, "loss": 0.6217, "step": 5640 }, { "epoch": 0.72, "grad_norm": 0.5076161068790191, "learning_rate": 3.822106597259545e-06, "loss": 0.5959, "step": 5641 }, { "epoch": 0.72, "grad_norm": 0.5018445911966088, "learning_rate": 3.818853640795033e-06, "loss": 0.6032, "step": 5642 }, { "epoch": 0.72, "grad_norm": 0.44828595414094624, "learning_rate": 3.815601742467681e-06, "loss": 0.5917, "step": 5643 }, { "epoch": 0.72, "grad_norm": 0.4455299997386983, "learning_rate": 3.8123509028341755e-06, "loss": 0.5774, "step": 5644 }, { "epoch": 0.72, "grad_norm": 0.5305444672089008, "learning_rate": 3.8091011224510198e-06, "loss": 0.6073, "step": 5645 }, { "epoch": 0.72, "grad_norm": 0.5263551256137697, "learning_rate": 3.805852401874537e-06, "loss": 0.5892, "step": 5646 }, { "epoch": 0.72, "grad_norm": 0.5450781996960221, "learning_rate": 3.802604741660869e-06, "loss": 0.6624, "step": 5647 }, { "epoch": 0.72, "grad_norm": 0.5322164416788357, "learning_rate": 3.7993581423659754e-06, "loss": 0.5595, "step": 5648 }, { "epoch": 0.72, "grad_norm": 0.5038940756028316, "learning_rate": 3.7961126045456353e-06, "loss": 0.614, "step": 5649 }, { "epoch": 0.72, "grad_norm": 0.4294991850208381, "learning_rate": 3.792868128755444e-06, "loss": 0.5415, "step": 5650 }, { "epoch": 0.72, "grad_norm": 0.5148913885879445, "learning_rate": 3.789624715550817e-06, "loss": 0.6142, "step": 5651 }, { "epoch": 0.72, "grad_norm": 0.46148953949475907, "learning_rate": 3.786382365486988e-06, "loss": 0.6093, "step": 5652 }, { "epoch": 0.72, "grad_norm": 0.556889996236135, "learning_rate": 3.783141079119007e-06, "loss": 0.6417, "step": 5653 }, { "epoch": 0.72, "grad_norm": 0.5656832543373451, "learning_rate": 3.7799008570017424e-06, "loss": 0.5875, "step": 5654 }, { "epoch": 0.72, "grad_norm": 0.4514086418878498, "learning_rate": 3.776661699689883e-06, "loss": 0.5594, "step": 5655 }, { "epoch": 0.72, "grad_norm": 0.49050388743063966, "learning_rate": 3.7734236077379304e-06, "loss": 0.6094, "step": 5656 }, { "epoch": 0.72, "grad_norm": 0.5047642986659241, "learning_rate": 3.770186581700208e-06, "loss": 0.6096, "step": 5657 }, { "epoch": 0.72, "grad_norm": 0.4688491819674142, "learning_rate": 3.766950622130856e-06, "loss": 0.5705, "step": 5658 }, { "epoch": 0.72, "grad_norm": 0.5135650149014501, "learning_rate": 3.7637157295838313e-06, "loss": 0.6184, "step": 5659 }, { "epoch": 0.72, "grad_norm": 0.5329398378021795, "learning_rate": 3.760481904612906e-06, "loss": 0.5948, "step": 5660 }, { "epoch": 0.72, "grad_norm": 0.5537588569053515, "learning_rate": 3.757249147771673e-06, "loss": 0.5874, "step": 5661 }, { "epoch": 0.72, "grad_norm": 0.633848131098791, "learning_rate": 3.7540174596135393e-06, "loss": 0.6725, "step": 5662 }, { "epoch": 0.72, "grad_norm": 0.5239565806328482, "learning_rate": 3.750786840691738e-06, "loss": 0.6229, "step": 5663 }, { "epoch": 0.72, "grad_norm": 0.4836915653441037, "learning_rate": 3.747557291559304e-06, "loss": 0.6066, "step": 5664 }, { "epoch": 0.72, "grad_norm": 0.48056133170251536, "learning_rate": 3.744328812769098e-06, "loss": 0.5803, "step": 5665 }, { "epoch": 0.72, "grad_norm": 0.6656260864190199, "learning_rate": 3.741101404873798e-06, "loss": 0.5993, "step": 5666 }, { "epoch": 0.72, "grad_norm": 0.45901694906404056, "learning_rate": 3.737875068425897e-06, "loss": 0.5747, "step": 5667 }, { "epoch": 0.72, "grad_norm": 0.5287645289191963, "learning_rate": 3.7346498039777044e-06, "loss": 0.6229, "step": 5668 }, { "epoch": 0.72, "grad_norm": 0.4883797694906195, "learning_rate": 3.731425612081342e-06, "loss": 0.6083, "step": 5669 }, { "epoch": 0.72, "grad_norm": 0.48180941780782355, "learning_rate": 3.7282024932887597e-06, "loss": 0.5953, "step": 5670 }, { "epoch": 0.72, "grad_norm": 0.5220652640593739, "learning_rate": 3.7249804481517173e-06, "loss": 0.6363, "step": 5671 }, { "epoch": 0.72, "grad_norm": 0.7762742324121708, "learning_rate": 3.7217594772217803e-06, "loss": 0.591, "step": 5672 }, { "epoch": 0.72, "grad_norm": 0.4368676830145264, "learning_rate": 3.7185395810503468e-06, "loss": 0.5525, "step": 5673 }, { "epoch": 0.72, "grad_norm": 0.48367315759224144, "learning_rate": 3.715320760188621e-06, "loss": 0.5975, "step": 5674 }, { "epoch": 0.72, "grad_norm": 0.5011957512918341, "learning_rate": 3.712103015187627e-06, "loss": 0.5971, "step": 5675 }, { "epoch": 0.73, "grad_norm": 0.49059465529772367, "learning_rate": 3.7088863465981996e-06, "loss": 0.6021, "step": 5676 }, { "epoch": 0.73, "grad_norm": 0.5737692489666246, "learning_rate": 3.7056707549710012e-06, "loss": 0.6058, "step": 5677 }, { "epoch": 0.73, "grad_norm": 0.48863966761117317, "learning_rate": 3.702456240856498e-06, "loss": 0.5826, "step": 5678 }, { "epoch": 0.73, "grad_norm": 0.4863733235734726, "learning_rate": 3.699242804804977e-06, "loss": 0.6044, "step": 5679 }, { "epoch": 0.73, "grad_norm": 0.5158307777927152, "learning_rate": 3.696030447366541e-06, "loss": 0.6184, "step": 5680 }, { "epoch": 0.73, "grad_norm": 0.5311013446488521, "learning_rate": 3.6928191690910997e-06, "loss": 0.5977, "step": 5681 }, { "epoch": 0.73, "grad_norm": 0.5153899507454726, "learning_rate": 3.68960897052839e-06, "loss": 0.6136, "step": 5682 }, { "epoch": 0.73, "grad_norm": 0.5740231765661074, "learning_rate": 3.6863998522279555e-06, "loss": 0.6331, "step": 5683 }, { "epoch": 0.73, "grad_norm": 0.522261905697916, "learning_rate": 3.683191814739163e-06, "loss": 0.6087, "step": 5684 }, { "epoch": 0.73, "grad_norm": 0.4862487791595951, "learning_rate": 3.6799848586111875e-06, "loss": 0.5802, "step": 5685 }, { "epoch": 0.73, "grad_norm": 0.48176925909295765, "learning_rate": 3.6767789843930213e-06, "loss": 0.6342, "step": 5686 }, { "epoch": 0.73, "grad_norm": 0.46910416455951753, "learning_rate": 3.67357419263347e-06, "loss": 0.6064, "step": 5687 }, { "epoch": 0.73, "grad_norm": 0.5020372816380027, "learning_rate": 3.670370483881155e-06, "loss": 0.5994, "step": 5688 }, { "epoch": 0.73, "grad_norm": 0.47563117951355055, "learning_rate": 3.6671678586845173e-06, "loss": 0.5985, "step": 5689 }, { "epoch": 0.73, "grad_norm": 0.5591107167049031, "learning_rate": 3.663966317591796e-06, "loss": 0.6022, "step": 5690 }, { "epoch": 0.73, "grad_norm": 0.5444666328975054, "learning_rate": 3.6607658611510665e-06, "loss": 0.6097, "step": 5691 }, { "epoch": 0.73, "grad_norm": 0.502417558043413, "learning_rate": 3.657566489910205e-06, "loss": 0.6016, "step": 5692 }, { "epoch": 0.73, "grad_norm": 0.5070078320097537, "learning_rate": 3.6543682044169037e-06, "loss": 0.5736, "step": 5693 }, { "epoch": 0.73, "grad_norm": 0.5617043557945485, "learning_rate": 3.651171005218672e-06, "loss": 0.6249, "step": 5694 }, { "epoch": 0.73, "grad_norm": 0.5237913750570224, "learning_rate": 3.647974892862831e-06, "loss": 0.6216, "step": 5695 }, { "epoch": 0.73, "grad_norm": 0.5033374561433749, "learning_rate": 3.644779867896515e-06, "loss": 0.5822, "step": 5696 }, { "epoch": 0.73, "grad_norm": 0.513654606468409, "learning_rate": 3.6415859308666756e-06, "loss": 0.5604, "step": 5697 }, { "epoch": 0.73, "grad_norm": 0.5716745709895545, "learning_rate": 3.638393082320074e-06, "loss": 0.6137, "step": 5698 }, { "epoch": 0.73, "grad_norm": 0.4792562963415885, "learning_rate": 3.635201322803288e-06, "loss": 0.5994, "step": 5699 }, { "epoch": 0.73, "grad_norm": 0.5138643752614335, "learning_rate": 3.6320106528627088e-06, "loss": 0.593, "step": 5700 }, { "epoch": 0.73, "grad_norm": 0.5145914960194329, "learning_rate": 3.6288210730445382e-06, "loss": 0.6006, "step": 5701 }, { "epoch": 0.73, "grad_norm": 0.616084214937001, "learning_rate": 3.625632583894796e-06, "loss": 0.6672, "step": 5702 }, { "epoch": 0.73, "grad_norm": 0.4745763365571809, "learning_rate": 3.622445185959311e-06, "loss": 0.6038, "step": 5703 }, { "epoch": 0.73, "grad_norm": 0.46466751643773985, "learning_rate": 3.619258879783728e-06, "loss": 0.5943, "step": 5704 }, { "epoch": 0.73, "grad_norm": 0.6312954303626641, "learning_rate": 3.616073665913503e-06, "loss": 0.6076, "step": 5705 }, { "epoch": 0.73, "grad_norm": 0.46756940202731256, "learning_rate": 3.6128895448939073e-06, "loss": 0.5652, "step": 5706 }, { "epoch": 0.73, "grad_norm": 0.6493840010286933, "learning_rate": 3.609706517270023e-06, "loss": 0.5629, "step": 5707 }, { "epoch": 0.73, "grad_norm": 0.5347730789510996, "learning_rate": 3.6065245835867444e-06, "loss": 0.62, "step": 5708 }, { "epoch": 0.73, "grad_norm": 0.5276424161871252, "learning_rate": 3.603343744388783e-06, "loss": 0.6039, "step": 5709 }, { "epoch": 0.73, "grad_norm": 0.5231252443732008, "learning_rate": 3.6001640002206573e-06, "loss": 0.606, "step": 5710 }, { "epoch": 0.73, "grad_norm": 0.4987738395060413, "learning_rate": 3.596985351626697e-06, "loss": 0.6008, "step": 5711 }, { "epoch": 0.73, "grad_norm": 0.5144118664732246, "learning_rate": 3.593807799151058e-06, "loss": 0.5941, "step": 5712 }, { "epoch": 0.73, "grad_norm": 0.5016221885802575, "learning_rate": 3.590631343337695e-06, "loss": 0.603, "step": 5713 }, { "epoch": 0.73, "grad_norm": 0.5612778954826302, "learning_rate": 3.587455984730375e-06, "loss": 0.5954, "step": 5714 }, { "epoch": 0.73, "grad_norm": 0.532297061985889, "learning_rate": 3.584281723872681e-06, "loss": 0.5968, "step": 5715 }, { "epoch": 0.73, "grad_norm": 0.527062450640355, "learning_rate": 3.581108561308012e-06, "loss": 0.5856, "step": 5716 }, { "epoch": 0.73, "grad_norm": 0.4699709675884964, "learning_rate": 3.5779364975795715e-06, "loss": 0.6211, "step": 5717 }, { "epoch": 0.73, "grad_norm": 0.5636263560882556, "learning_rate": 3.574765533230379e-06, "loss": 0.6039, "step": 5718 }, { "epoch": 0.73, "grad_norm": 0.5310781210998009, "learning_rate": 3.5715956688032627e-06, "loss": 0.5682, "step": 5719 }, { "epoch": 0.73, "grad_norm": 0.499460292983605, "learning_rate": 3.5684269048408705e-06, "loss": 0.5857, "step": 5720 }, { "epoch": 0.73, "grad_norm": 0.49404270561357166, "learning_rate": 3.5652592418856537e-06, "loss": 0.5709, "step": 5721 }, { "epoch": 0.73, "grad_norm": 0.4659188107349315, "learning_rate": 3.5620926804798805e-06, "loss": 0.6131, "step": 5722 }, { "epoch": 0.73, "grad_norm": 0.4650920888001028, "learning_rate": 3.5589272211656224e-06, "loss": 0.5809, "step": 5723 }, { "epoch": 0.73, "grad_norm": 0.4476744278487661, "learning_rate": 3.5557628644847687e-06, "loss": 0.5667, "step": 5724 }, { "epoch": 0.73, "grad_norm": 0.4937903682540415, "learning_rate": 3.5525996109790195e-06, "loss": 0.6022, "step": 5725 }, { "epoch": 0.73, "grad_norm": 0.5005805721011902, "learning_rate": 3.549437461189884e-06, "loss": 0.5797, "step": 5726 }, { "epoch": 0.73, "grad_norm": 0.4859993080851019, "learning_rate": 3.5462764156586883e-06, "loss": 0.5636, "step": 5727 }, { "epoch": 0.73, "grad_norm": 0.5980568827757243, "learning_rate": 3.5431164749265624e-06, "loss": 0.6222, "step": 5728 }, { "epoch": 0.73, "grad_norm": 0.4874226652011355, "learning_rate": 3.539957639534449e-06, "loss": 0.5886, "step": 5729 }, { "epoch": 0.73, "grad_norm": 0.4882773659687382, "learning_rate": 3.5367999100231033e-06, "loss": 0.5789, "step": 5730 }, { "epoch": 0.73, "grad_norm": 0.5345650626427236, "learning_rate": 3.533643286933094e-06, "loss": 0.6189, "step": 5731 }, { "epoch": 0.73, "grad_norm": 0.5512257770213194, "learning_rate": 3.530487770804789e-06, "loss": 0.6301, "step": 5732 }, { "epoch": 0.73, "grad_norm": 0.519517928645011, "learning_rate": 3.5273333621783743e-06, "loss": 0.6067, "step": 5733 }, { "epoch": 0.73, "grad_norm": 0.5433556698000575, "learning_rate": 3.5241800615938536e-06, "loss": 0.6102, "step": 5734 }, { "epoch": 0.73, "grad_norm": 0.5008688134331778, "learning_rate": 3.5210278695910304e-06, "loss": 0.606, "step": 5735 }, { "epoch": 0.73, "grad_norm": 0.5409916522347504, "learning_rate": 3.517876786709521e-06, "loss": 0.5884, "step": 5736 }, { "epoch": 0.73, "grad_norm": 0.5611827720985267, "learning_rate": 3.5147268134887526e-06, "loss": 0.6136, "step": 5737 }, { "epoch": 0.73, "grad_norm": 0.5149523580662263, "learning_rate": 3.5115779504679614e-06, "loss": 0.6311, "step": 5738 }, { "epoch": 0.73, "grad_norm": 0.5577492222824285, "learning_rate": 3.508430198186198e-06, "loss": 0.5741, "step": 5739 }, { "epoch": 0.73, "grad_norm": 0.49307318312659254, "learning_rate": 3.505283557182315e-06, "loss": 0.5849, "step": 5740 }, { "epoch": 0.73, "grad_norm": 0.503900237282677, "learning_rate": 3.5021380279949824e-06, "loss": 0.6193, "step": 5741 }, { "epoch": 0.73, "grad_norm": 0.4805870452793264, "learning_rate": 3.4989936111626745e-06, "loss": 0.5736, "step": 5742 }, { "epoch": 0.73, "grad_norm": 0.4632952067667694, "learning_rate": 3.4958503072236782e-06, "loss": 0.5744, "step": 5743 }, { "epoch": 0.73, "grad_norm": 0.5217080378568114, "learning_rate": 3.4927081167160883e-06, "loss": 0.614, "step": 5744 }, { "epoch": 0.73, "grad_norm": 0.4415053781233048, "learning_rate": 3.4895670401778103e-06, "loss": 0.5694, "step": 5745 }, { "epoch": 0.73, "grad_norm": 0.5529512165303755, "learning_rate": 3.486427078146559e-06, "loss": 0.6093, "step": 5746 }, { "epoch": 0.73, "grad_norm": 0.5071629909563501, "learning_rate": 3.483288231159856e-06, "loss": 0.609, "step": 5747 }, { "epoch": 0.73, "grad_norm": 0.4706691342163516, "learning_rate": 3.480150499755035e-06, "loss": 0.5945, "step": 5748 }, { "epoch": 0.73, "grad_norm": 0.4740742879790818, "learning_rate": 3.477013884469238e-06, "loss": 0.5728, "step": 5749 }, { "epoch": 0.73, "grad_norm": 0.5100760591439178, "learning_rate": 3.473878385839413e-06, "loss": 0.5582, "step": 5750 }, { "epoch": 0.73, "grad_norm": 0.5657566234134144, "learning_rate": 3.470744004402322e-06, "loss": 0.6286, "step": 5751 }, { "epoch": 0.73, "grad_norm": 0.45722969307178046, "learning_rate": 3.4676107406945315e-06, "loss": 0.5979, "step": 5752 }, { "epoch": 0.73, "grad_norm": 0.6150529172858673, "learning_rate": 3.4644785952524184e-06, "loss": 0.637, "step": 5753 }, { "epoch": 0.74, "grad_norm": 0.5441358839864924, "learning_rate": 3.4613475686121655e-06, "loss": 0.5934, "step": 5754 }, { "epoch": 0.74, "grad_norm": 0.5520953649900362, "learning_rate": 3.458217661309775e-06, "loss": 0.6115, "step": 5755 }, { "epoch": 0.74, "grad_norm": 0.452071011969097, "learning_rate": 3.455088873881041e-06, "loss": 0.581, "step": 5756 }, { "epoch": 0.74, "grad_norm": 0.5707111501839385, "learning_rate": 3.451961206861575e-06, "loss": 0.6293, "step": 5757 }, { "epoch": 0.74, "grad_norm": 0.5167673891070753, "learning_rate": 3.448834660786796e-06, "loss": 0.6127, "step": 5758 }, { "epoch": 0.74, "grad_norm": 0.4933454887316033, "learning_rate": 3.445709236191933e-06, "loss": 0.5869, "step": 5759 }, { "epoch": 0.74, "grad_norm": 0.6485088355927701, "learning_rate": 3.4425849336120175e-06, "loss": 0.665, "step": 5760 }, { "epoch": 0.74, "grad_norm": 0.4547819657700004, "learning_rate": 3.439461753581892e-06, "loss": 0.5854, "step": 5761 }, { "epoch": 0.74, "grad_norm": 0.5641868903035109, "learning_rate": 3.4363396966362107e-06, "loss": 0.6306, "step": 5762 }, { "epoch": 0.74, "grad_norm": 0.4771849105792322, "learning_rate": 3.4332187633094304e-06, "loss": 0.5964, "step": 5763 }, { "epoch": 0.74, "grad_norm": 0.48408062382499495, "learning_rate": 3.4300989541358186e-06, "loss": 0.6144, "step": 5764 }, { "epoch": 0.74, "grad_norm": 0.5850249531541841, "learning_rate": 3.4269802696494446e-06, "loss": 0.6142, "step": 5765 }, { "epoch": 0.74, "grad_norm": 0.5170307531699688, "learning_rate": 3.4238627103841904e-06, "loss": 0.6022, "step": 5766 }, { "epoch": 0.74, "grad_norm": 0.4683329397942463, "learning_rate": 3.4207462768737453e-06, "loss": 0.5831, "step": 5767 }, { "epoch": 0.74, "grad_norm": 0.47324711413818715, "learning_rate": 3.417630969651602e-06, "loss": 0.5808, "step": 5768 }, { "epoch": 0.74, "grad_norm": 0.5795922122499795, "learning_rate": 3.4145167892510677e-06, "loss": 0.5966, "step": 5769 }, { "epoch": 0.74, "grad_norm": 0.5193607002600389, "learning_rate": 3.411403736205251e-06, "loss": 0.5633, "step": 5770 }, { "epoch": 0.74, "grad_norm": 0.4898214070063343, "learning_rate": 3.4082918110470684e-06, "loss": 0.6064, "step": 5771 }, { "epoch": 0.74, "grad_norm": 0.49236857918600496, "learning_rate": 3.4051810143092432e-06, "loss": 0.5834, "step": 5772 }, { "epoch": 0.74, "grad_norm": 0.4841231039549192, "learning_rate": 3.4020713465243105e-06, "loss": 0.5902, "step": 5773 }, { "epoch": 0.74, "grad_norm": 0.5516533607390374, "learning_rate": 3.3989628082245994e-06, "loss": 0.6226, "step": 5774 }, { "epoch": 0.74, "grad_norm": 0.47471795902697295, "learning_rate": 3.395855399942254e-06, "loss": 0.6073, "step": 5775 }, { "epoch": 0.74, "grad_norm": 0.5293191218953889, "learning_rate": 3.3927491222092335e-06, "loss": 0.6202, "step": 5776 }, { "epoch": 0.74, "grad_norm": 0.5243705624292597, "learning_rate": 3.3896439755572885e-06, "loss": 0.6013, "step": 5777 }, { "epoch": 0.74, "grad_norm": 0.4824358207915709, "learning_rate": 3.386539960517985e-06, "loss": 0.6131, "step": 5778 }, { "epoch": 0.74, "grad_norm": 0.6297962766732453, "learning_rate": 3.383437077622691e-06, "loss": 0.6487, "step": 5779 }, { "epoch": 0.74, "grad_norm": 0.5609492381138376, "learning_rate": 3.380335327402584e-06, "loss": 0.5943, "step": 5780 }, { "epoch": 0.74, "grad_norm": 0.5376943270211204, "learning_rate": 3.3772347103886437e-06, "loss": 0.6096, "step": 5781 }, { "epoch": 0.74, "grad_norm": 0.5013545604453539, "learning_rate": 3.3741352271116644e-06, "loss": 0.617, "step": 5782 }, { "epoch": 0.74, "grad_norm": 0.5102667198109467, "learning_rate": 3.3710368781022273e-06, "loss": 0.6084, "step": 5783 }, { "epoch": 0.74, "grad_norm": 0.55143461415209, "learning_rate": 3.3679396638907424e-06, "loss": 0.6074, "step": 5784 }, { "epoch": 0.74, "grad_norm": 0.5007279830919864, "learning_rate": 3.3648435850074136e-06, "loss": 0.5915, "step": 5785 }, { "epoch": 0.74, "grad_norm": 0.54420666348146, "learning_rate": 3.361748641982251e-06, "loss": 0.612, "step": 5786 }, { "epoch": 0.74, "grad_norm": 0.4710291350069079, "learning_rate": 3.3586548353450697e-06, "loss": 0.5923, "step": 5787 }, { "epoch": 0.74, "grad_norm": 0.46996790832814933, "learning_rate": 3.3555621656254944e-06, "loss": 0.5876, "step": 5788 }, { "epoch": 0.74, "grad_norm": 0.47937796736213223, "learning_rate": 3.352470633352951e-06, "loss": 0.568, "step": 5789 }, { "epoch": 0.74, "grad_norm": 0.5233031975942009, "learning_rate": 3.3493802390566733e-06, "loss": 0.6276, "step": 5790 }, { "epoch": 0.74, "grad_norm": 0.47859443995570483, "learning_rate": 3.3462909832656986e-06, "loss": 0.6101, "step": 5791 }, { "epoch": 0.74, "grad_norm": 0.4989958717131638, "learning_rate": 3.343202866508869e-06, "loss": 0.6223, "step": 5792 }, { "epoch": 0.74, "grad_norm": 0.5362957795220915, "learning_rate": 3.3401158893148345e-06, "loss": 0.6432, "step": 5793 }, { "epoch": 0.74, "grad_norm": 0.5724627265256406, "learning_rate": 3.337030052212047e-06, "loss": 0.6174, "step": 5794 }, { "epoch": 0.74, "grad_norm": 0.49365880337865314, "learning_rate": 3.3339453557287637e-06, "loss": 0.6164, "step": 5795 }, { "epoch": 0.74, "grad_norm": 0.5110179814204188, "learning_rate": 3.330861800393049e-06, "loss": 0.5936, "step": 5796 }, { "epoch": 0.74, "grad_norm": 0.4832483958634816, "learning_rate": 3.327779386732769e-06, "loss": 0.5824, "step": 5797 }, { "epoch": 0.74, "grad_norm": 0.5335620713910515, "learning_rate": 3.3246981152755955e-06, "loss": 0.6076, "step": 5798 }, { "epoch": 0.74, "grad_norm": 0.47335809819609675, "learning_rate": 3.321617986549004e-06, "loss": 0.6013, "step": 5799 }, { "epoch": 0.74, "grad_norm": 0.4860989017879574, "learning_rate": 3.3185390010802767e-06, "loss": 0.6249, "step": 5800 }, { "epoch": 0.74, "grad_norm": 0.5093803266278129, "learning_rate": 3.315461159396497e-06, "loss": 0.6082, "step": 5801 }, { "epoch": 0.74, "grad_norm": 0.4972426224215262, "learning_rate": 3.312384462024555e-06, "loss": 0.5947, "step": 5802 }, { "epoch": 0.74, "grad_norm": 0.5998922463617513, "learning_rate": 3.3093089094911434e-06, "loss": 0.6432, "step": 5803 }, { "epoch": 0.74, "grad_norm": 0.5045069652795403, "learning_rate": 3.306234502322756e-06, "loss": 0.5615, "step": 5804 }, { "epoch": 0.74, "grad_norm": 0.4470830186477097, "learning_rate": 3.3031612410457016e-06, "loss": 0.5911, "step": 5805 }, { "epoch": 0.74, "grad_norm": 0.46812998209324563, "learning_rate": 3.3000891261860844e-06, "loss": 0.5909, "step": 5806 }, { "epoch": 0.74, "grad_norm": 0.5415702775839305, "learning_rate": 3.2970181582698058e-06, "loss": 0.6131, "step": 5807 }, { "epoch": 0.74, "grad_norm": 0.49565286250963453, "learning_rate": 3.2939483378225836e-06, "loss": 0.5972, "step": 5808 }, { "epoch": 0.74, "grad_norm": 0.47907989337252965, "learning_rate": 3.2908796653699314e-06, "loss": 0.5912, "step": 5809 }, { "epoch": 0.74, "grad_norm": 0.5263181372545028, "learning_rate": 3.2878121414371698e-06, "loss": 0.6244, "step": 5810 }, { "epoch": 0.74, "grad_norm": 0.6271104108717244, "learning_rate": 3.2847457665494197e-06, "loss": 0.6171, "step": 5811 }, { "epoch": 0.74, "grad_norm": 0.6133204215179703, "learning_rate": 3.281680541231611e-06, "loss": 0.6535, "step": 5812 }, { "epoch": 0.74, "grad_norm": 0.5274983932986417, "learning_rate": 3.2786164660084717e-06, "loss": 0.6023, "step": 5813 }, { "epoch": 0.74, "grad_norm": 0.5844259193966729, "learning_rate": 3.2755535414045327e-06, "loss": 0.6853, "step": 5814 }, { "epoch": 0.74, "grad_norm": 0.613658340383667, "learning_rate": 3.272491767944135e-06, "loss": 0.6411, "step": 5815 }, { "epoch": 0.74, "grad_norm": 0.5699956842489503, "learning_rate": 3.2694311461514085e-06, "loss": 0.6136, "step": 5816 }, { "epoch": 0.74, "grad_norm": 0.5264496270210687, "learning_rate": 3.2663716765502986e-06, "loss": 0.5989, "step": 5817 }, { "epoch": 0.74, "grad_norm": 0.4962173479588005, "learning_rate": 3.263313359664545e-06, "loss": 0.5972, "step": 5818 }, { "epoch": 0.74, "grad_norm": 0.4712354990520669, "learning_rate": 3.2602561960177024e-06, "loss": 0.5795, "step": 5819 }, { "epoch": 0.74, "grad_norm": 0.5135810954828635, "learning_rate": 3.257200186133117e-06, "loss": 0.5726, "step": 5820 }, { "epoch": 0.74, "grad_norm": 0.4768874548386879, "learning_rate": 3.254145330533939e-06, "loss": 0.5619, "step": 5821 }, { "epoch": 0.74, "grad_norm": 0.5308942972173732, "learning_rate": 3.2510916297431237e-06, "loss": 0.6048, "step": 5822 }, { "epoch": 0.74, "grad_norm": 0.4640649229227748, "learning_rate": 3.2480390842834307e-06, "loss": 0.5849, "step": 5823 }, { "epoch": 0.74, "grad_norm": 0.5961910216113393, "learning_rate": 3.2449876946774115e-06, "loss": 0.6369, "step": 5824 }, { "epoch": 0.74, "grad_norm": 0.48344858295012905, "learning_rate": 3.2419374614474275e-06, "loss": 0.5974, "step": 5825 }, { "epoch": 0.74, "grad_norm": 0.6233457646317723, "learning_rate": 3.2388883851156472e-06, "loss": 0.6546, "step": 5826 }, { "epoch": 0.74, "grad_norm": 0.5106579523885484, "learning_rate": 3.235840466204034e-06, "loss": 0.5994, "step": 5827 }, { "epoch": 0.74, "grad_norm": 0.47906776523858824, "learning_rate": 3.232793705234354e-06, "loss": 0.598, "step": 5828 }, { "epoch": 0.74, "grad_norm": 0.46067674594946467, "learning_rate": 3.229748102728174e-06, "loss": 0.5892, "step": 5829 }, { "epoch": 0.74, "grad_norm": 0.4384187517703925, "learning_rate": 3.2267036592068656e-06, "loss": 0.568, "step": 5830 }, { "epoch": 0.74, "grad_norm": 0.44646392395929485, "learning_rate": 3.2236603751916006e-06, "loss": 0.584, "step": 5831 }, { "epoch": 0.75, "grad_norm": 0.47595100309230337, "learning_rate": 3.2206182512033514e-06, "loss": 0.5989, "step": 5832 }, { "epoch": 0.75, "grad_norm": 0.5559832669567442, "learning_rate": 3.217577287762894e-06, "loss": 0.6308, "step": 5833 }, { "epoch": 0.75, "grad_norm": 0.46366374563966944, "learning_rate": 3.214537485390803e-06, "loss": 0.6003, "step": 5834 }, { "epoch": 0.75, "grad_norm": 0.5624926356833317, "learning_rate": 3.211498844607457e-06, "loss": 0.6152, "step": 5835 }, { "epoch": 0.75, "grad_norm": 0.4908834097737493, "learning_rate": 3.208461365933032e-06, "loss": 0.6127, "step": 5836 }, { "epoch": 0.75, "grad_norm": 0.5345186435440081, "learning_rate": 3.2054250498875105e-06, "loss": 0.5876, "step": 5837 }, { "epoch": 0.75, "grad_norm": 0.579785001745578, "learning_rate": 3.202389896990671e-06, "loss": 0.6603, "step": 5838 }, { "epoch": 0.75, "grad_norm": 0.5273218393384871, "learning_rate": 3.199355907762095e-06, "loss": 0.5653, "step": 5839 }, { "epoch": 0.75, "grad_norm": 0.5099607763206867, "learning_rate": 3.1963230827211654e-06, "loss": 0.6128, "step": 5840 }, { "epoch": 0.75, "grad_norm": 0.5210277055706495, "learning_rate": 3.193291422387065e-06, "loss": 0.6045, "step": 5841 }, { "epoch": 0.75, "grad_norm": 0.5303458579908268, "learning_rate": 3.1902609272787755e-06, "loss": 0.6219, "step": 5842 }, { "epoch": 0.75, "grad_norm": 0.48562670852593054, "learning_rate": 3.1872315979150826e-06, "loss": 0.5977, "step": 5843 }, { "epoch": 0.75, "grad_norm": 0.4955619023610682, "learning_rate": 3.18420343481457e-06, "loss": 0.5804, "step": 5844 }, { "epoch": 0.75, "grad_norm": 0.5334051749744241, "learning_rate": 3.1811764384956223e-06, "loss": 0.6499, "step": 5845 }, { "epoch": 0.75, "grad_norm": 0.47928264148535676, "learning_rate": 3.1781506094764258e-06, "loss": 0.5563, "step": 5846 }, { "epoch": 0.75, "grad_norm": 0.5004503203569687, "learning_rate": 3.1751259482749596e-06, "loss": 0.5996, "step": 5847 }, { "epoch": 0.75, "grad_norm": 0.48329051711756743, "learning_rate": 3.172102455409022e-06, "loss": 0.622, "step": 5848 }, { "epoch": 0.75, "grad_norm": 0.5141509143529898, "learning_rate": 3.169080131396186e-06, "loss": 0.5899, "step": 5849 }, { "epoch": 0.75, "grad_norm": 0.4773943979627525, "learning_rate": 3.166058976753842e-06, "loss": 0.5865, "step": 5850 }, { "epoch": 0.75, "grad_norm": 0.4778102336704562, "learning_rate": 3.1630389919991733e-06, "loss": 0.582, "step": 5851 }, { "epoch": 0.75, "grad_norm": 0.5217171652269499, "learning_rate": 3.1600201776491657e-06, "loss": 0.595, "step": 5852 }, { "epoch": 0.75, "grad_norm": 0.49653437883417445, "learning_rate": 3.157002534220602e-06, "loss": 0.5972, "step": 5853 }, { "epoch": 0.75, "grad_norm": 0.4961075378123872, "learning_rate": 3.1539860622300646e-06, "loss": 0.5967, "step": 5854 }, { "epoch": 0.75, "grad_norm": 0.5535421446752344, "learning_rate": 3.150970762193942e-06, "loss": 0.5924, "step": 5855 }, { "epoch": 0.75, "grad_norm": 0.5515307452084097, "learning_rate": 3.1479566346284186e-06, "loss": 0.6237, "step": 5856 }, { "epoch": 0.75, "grad_norm": 0.47006086358759686, "learning_rate": 3.144943680049469e-06, "loss": 0.623, "step": 5857 }, { "epoch": 0.75, "grad_norm": 0.48330952800614224, "learning_rate": 3.1419318989728777e-06, "loss": 0.6093, "step": 5858 }, { "epoch": 0.75, "grad_norm": 0.5064762959575342, "learning_rate": 3.138921291914223e-06, "loss": 0.5615, "step": 5859 }, { "epoch": 0.75, "grad_norm": 0.7011376046644034, "learning_rate": 3.1359118593888883e-06, "loss": 0.5861, "step": 5860 }, { "epoch": 0.75, "grad_norm": 0.5433793401426531, "learning_rate": 3.132903601912045e-06, "loss": 0.6336, "step": 5861 }, { "epoch": 0.75, "grad_norm": 0.4823237474116058, "learning_rate": 3.12989651999868e-06, "loss": 0.5853, "step": 5862 }, { "epoch": 0.75, "grad_norm": 0.46964078814992044, "learning_rate": 3.1268906141635626e-06, "loss": 0.5729, "step": 5863 }, { "epoch": 0.75, "grad_norm": 0.456718734109039, "learning_rate": 3.1238858849212694e-06, "loss": 0.5824, "step": 5864 }, { "epoch": 0.75, "grad_norm": 0.534478751260527, "learning_rate": 3.120882332786177e-06, "loss": 0.5826, "step": 5865 }, { "epoch": 0.75, "grad_norm": 0.5866348267550945, "learning_rate": 3.1178799582724496e-06, "loss": 0.6371, "step": 5866 }, { "epoch": 0.75, "grad_norm": 0.5323917000166559, "learning_rate": 3.11487876189406e-06, "loss": 0.6259, "step": 5867 }, { "epoch": 0.75, "grad_norm": 0.45197547556066986, "learning_rate": 3.1118787441647748e-06, "loss": 0.6001, "step": 5868 }, { "epoch": 0.75, "grad_norm": 0.5140759484431722, "learning_rate": 3.1088799055981656e-06, "loss": 0.6137, "step": 5869 }, { "epoch": 0.75, "grad_norm": 0.4907286421775403, "learning_rate": 3.1058822467075956e-06, "loss": 0.5977, "step": 5870 }, { "epoch": 0.75, "grad_norm": 0.450055442388771, "learning_rate": 3.102885768006226e-06, "loss": 0.5624, "step": 5871 }, { "epoch": 0.75, "grad_norm": 0.5212195170147168, "learning_rate": 3.0998904700070186e-06, "loss": 0.5982, "step": 5872 }, { "epoch": 0.75, "grad_norm": 0.5781051305777528, "learning_rate": 3.0968963532227313e-06, "loss": 0.6389, "step": 5873 }, { "epoch": 0.75, "grad_norm": 0.5130317200458829, "learning_rate": 3.0939034181659232e-06, "loss": 0.6274, "step": 5874 }, { "epoch": 0.75, "grad_norm": 0.5001171892069827, "learning_rate": 3.0909116653489413e-06, "loss": 0.5983, "step": 5875 }, { "epoch": 0.75, "grad_norm": 0.5298415735635718, "learning_rate": 3.0879210952839433e-06, "loss": 0.6271, "step": 5876 }, { "epoch": 0.75, "grad_norm": 0.5222538718620631, "learning_rate": 3.084931708482879e-06, "loss": 0.5964, "step": 5877 }, { "epoch": 0.75, "grad_norm": 0.4909535592300157, "learning_rate": 3.081943505457492e-06, "loss": 0.5784, "step": 5878 }, { "epoch": 0.75, "grad_norm": 0.5633628631071135, "learning_rate": 3.0789564867193287e-06, "loss": 0.5896, "step": 5879 }, { "epoch": 0.75, "grad_norm": 0.5523826221617364, "learning_rate": 3.0759706527797295e-06, "loss": 0.6417, "step": 5880 }, { "epoch": 0.75, "grad_norm": 0.491130698344554, "learning_rate": 3.0729860041498318e-06, "loss": 0.5926, "step": 5881 }, { "epoch": 0.75, "grad_norm": 0.4906478683879836, "learning_rate": 3.0700025413405734e-06, "loss": 0.5974, "step": 5882 }, { "epoch": 0.75, "grad_norm": 0.5075433300664478, "learning_rate": 3.0670202648626847e-06, "loss": 0.6194, "step": 5883 }, { "epoch": 0.75, "grad_norm": 0.4962558010786082, "learning_rate": 3.0640391752266962e-06, "loss": 0.5796, "step": 5884 }, { "epoch": 0.75, "grad_norm": 0.5268671705494227, "learning_rate": 3.0610592729429343e-06, "loss": 0.6442, "step": 5885 }, { "epoch": 0.75, "grad_norm": 0.5127953099571553, "learning_rate": 3.0580805585215224e-06, "loss": 0.5769, "step": 5886 }, { "epoch": 0.75, "grad_norm": 0.4968790506949913, "learning_rate": 3.0551030324723786e-06, "loss": 0.5926, "step": 5887 }, { "epoch": 0.75, "grad_norm": 0.5463408540464679, "learning_rate": 3.052126695305222e-06, "loss": 0.6141, "step": 5888 }, { "epoch": 0.75, "grad_norm": 0.5566398133995851, "learning_rate": 3.049151547529563e-06, "loss": 0.6009, "step": 5889 }, { "epoch": 0.75, "grad_norm": 0.5201043972060987, "learning_rate": 3.046177589654711e-06, "loss": 0.5953, "step": 5890 }, { "epoch": 0.75, "grad_norm": 0.46372265193538753, "learning_rate": 3.0432048221897725e-06, "loss": 0.5727, "step": 5891 }, { "epoch": 0.75, "grad_norm": 0.5224714005564942, "learning_rate": 3.040233245643648e-06, "loss": 0.6195, "step": 5892 }, { "epoch": 0.75, "grad_norm": 0.4825088337052828, "learning_rate": 3.0372628605250364e-06, "loss": 0.5837, "step": 5893 }, { "epoch": 0.75, "grad_norm": 0.5348040131550184, "learning_rate": 3.0342936673424308e-06, "loss": 0.6055, "step": 5894 }, { "epoch": 0.75, "grad_norm": 0.4670941668327143, "learning_rate": 3.0313256666041215e-06, "loss": 0.5855, "step": 5895 }, { "epoch": 0.75, "grad_norm": 0.5381498263229199, "learning_rate": 3.028358858818191e-06, "loss": 0.6352, "step": 5896 }, { "epoch": 0.75, "grad_norm": 0.4709828616291155, "learning_rate": 3.025393244492526e-06, "loss": 0.5804, "step": 5897 }, { "epoch": 0.75, "grad_norm": 0.5388927879532467, "learning_rate": 3.0224288241348053e-06, "loss": 0.6031, "step": 5898 }, { "epoch": 0.75, "grad_norm": 0.45793465067498945, "learning_rate": 3.0194655982524946e-06, "loss": 0.5595, "step": 5899 }, { "epoch": 0.75, "grad_norm": 0.505203920716395, "learning_rate": 3.0165035673528644e-06, "loss": 0.6071, "step": 5900 }, { "epoch": 0.75, "grad_norm": 0.5336483468347153, "learning_rate": 3.01354273194298e-06, "loss": 0.6337, "step": 5901 }, { "epoch": 0.75, "grad_norm": 0.4562831800124901, "learning_rate": 3.0105830925296998e-06, "loss": 0.5825, "step": 5902 }, { "epoch": 0.75, "grad_norm": 0.5239376380706039, "learning_rate": 3.007624649619677e-06, "loss": 0.6011, "step": 5903 }, { "epoch": 0.75, "grad_norm": 0.5332757363814983, "learning_rate": 3.004667403719359e-06, "loss": 0.6148, "step": 5904 }, { "epoch": 0.75, "grad_norm": 0.5404732387038662, "learning_rate": 3.0017113553349975e-06, "loss": 0.613, "step": 5905 }, { "epoch": 0.75, "grad_norm": 0.4604700351001587, "learning_rate": 2.9987565049726274e-06, "loss": 0.5639, "step": 5906 }, { "epoch": 0.75, "grad_norm": 0.5334122391635787, "learning_rate": 2.995802853138088e-06, "loss": 0.5972, "step": 5907 }, { "epoch": 0.75, "grad_norm": 0.5142269849105513, "learning_rate": 2.9928504003369995e-06, "loss": 0.6078, "step": 5908 }, { "epoch": 0.75, "grad_norm": 0.5230820936866273, "learning_rate": 2.9898991470747917e-06, "loss": 0.6139, "step": 5909 }, { "epoch": 0.75, "grad_norm": 0.5408228668887006, "learning_rate": 2.9869490938566826e-06, "loss": 0.6474, "step": 5910 }, { "epoch": 0.76, "grad_norm": 0.4989681036855538, "learning_rate": 2.9840002411876822e-06, "loss": 0.5694, "step": 5911 }, { "epoch": 0.76, "grad_norm": 0.548498453589005, "learning_rate": 2.981052589572605e-06, "loss": 0.6177, "step": 5912 }, { "epoch": 0.76, "grad_norm": 0.4517051026453767, "learning_rate": 2.978106139516049e-06, "loss": 0.5659, "step": 5913 }, { "epoch": 0.76, "grad_norm": 0.48524044028910496, "learning_rate": 2.9751608915224106e-06, "loss": 0.5558, "step": 5914 }, { "epoch": 0.76, "grad_norm": 0.5568961955752595, "learning_rate": 2.972216846095882e-06, "loss": 0.6637, "step": 5915 }, { "epoch": 0.76, "grad_norm": 0.5266048196908376, "learning_rate": 2.96927400374045e-06, "loss": 0.6124, "step": 5916 }, { "epoch": 0.76, "grad_norm": 0.5617026520124615, "learning_rate": 2.9663323649598873e-06, "loss": 0.6051, "step": 5917 }, { "epoch": 0.76, "grad_norm": 0.4789583095565492, "learning_rate": 2.963391930257766e-06, "loss": 0.5846, "step": 5918 }, { "epoch": 0.76, "grad_norm": 0.4620518272789238, "learning_rate": 2.9604527001374605e-06, "loss": 0.5718, "step": 5919 }, { "epoch": 0.76, "grad_norm": 0.47709745036298695, "learning_rate": 2.9575146751021276e-06, "loss": 0.5673, "step": 5920 }, { "epoch": 0.76, "grad_norm": 0.483369608958923, "learning_rate": 2.954577855654722e-06, "loss": 0.5772, "step": 5921 }, { "epoch": 0.76, "grad_norm": 0.5481789544748784, "learning_rate": 2.951642242297991e-06, "loss": 0.5993, "step": 5922 }, { "epoch": 0.76, "grad_norm": 0.5867982817845954, "learning_rate": 2.9487078355344744e-06, "loss": 0.6421, "step": 5923 }, { "epoch": 0.76, "grad_norm": 0.47122866112061634, "learning_rate": 2.9457746358665094e-06, "loss": 0.5843, "step": 5924 }, { "epoch": 0.76, "grad_norm": 0.5127960623660124, "learning_rate": 2.942842643796223e-06, "loss": 0.6273, "step": 5925 }, { "epoch": 0.76, "grad_norm": 0.4712982978988773, "learning_rate": 2.9399118598255373e-06, "loss": 0.5843, "step": 5926 }, { "epoch": 0.76, "grad_norm": 0.4533467653676068, "learning_rate": 2.936982284456166e-06, "loss": 0.537, "step": 5927 }, { "epoch": 0.76, "grad_norm": 0.56776822476551, "learning_rate": 2.9340539181896176e-06, "loss": 0.6012, "step": 5928 }, { "epoch": 0.76, "grad_norm": 0.50687099427839, "learning_rate": 2.9311267615271922e-06, "loss": 0.6052, "step": 5929 }, { "epoch": 0.76, "grad_norm": 0.4977829698198538, "learning_rate": 2.9282008149699836e-06, "loss": 0.6033, "step": 5930 }, { "epoch": 0.76, "grad_norm": 0.48059273644753775, "learning_rate": 2.925276079018878e-06, "loss": 0.5846, "step": 5931 }, { "epoch": 0.76, "grad_norm": 0.4370297995680777, "learning_rate": 2.9223525541745555e-06, "loss": 0.5599, "step": 5932 }, { "epoch": 0.76, "grad_norm": 0.559751223556476, "learning_rate": 2.919430240937489e-06, "loss": 0.6478, "step": 5933 }, { "epoch": 0.76, "grad_norm": 0.4585947078531923, "learning_rate": 2.916509139807939e-06, "loss": 0.5961, "step": 5934 }, { "epoch": 0.76, "grad_norm": 0.5041372514073541, "learning_rate": 2.9135892512859677e-06, "loss": 0.5932, "step": 5935 }, { "epoch": 0.76, "grad_norm": 0.4775388323393156, "learning_rate": 2.9106705758714203e-06, "loss": 0.5891, "step": 5936 }, { "epoch": 0.76, "grad_norm": 0.5035402910524791, "learning_rate": 2.9077531140639416e-06, "loss": 0.5887, "step": 5937 }, { "epoch": 0.76, "grad_norm": 0.534616738563821, "learning_rate": 2.904836866362963e-06, "loss": 0.6338, "step": 5938 }, { "epoch": 0.76, "grad_norm": 0.44421075022306933, "learning_rate": 2.901921833267709e-06, "loss": 0.5684, "step": 5939 }, { "epoch": 0.76, "grad_norm": 0.5443488792191501, "learning_rate": 2.899008015277208e-06, "loss": 0.5995, "step": 5940 }, { "epoch": 0.76, "grad_norm": 0.5504206955761147, "learning_rate": 2.8960954128902596e-06, "loss": 0.5888, "step": 5941 }, { "epoch": 0.76, "grad_norm": 0.4579021723234598, "learning_rate": 2.893184026605469e-06, "loss": 0.5797, "step": 5942 }, { "epoch": 0.76, "grad_norm": 0.49894421270487943, "learning_rate": 2.8902738569212306e-06, "loss": 0.592, "step": 5943 }, { "epoch": 0.76, "grad_norm": 0.5210554757112851, "learning_rate": 2.887364904335731e-06, "loss": 0.5792, "step": 5944 }, { "epoch": 0.76, "grad_norm": 0.482962228703753, "learning_rate": 2.884457169346946e-06, "loss": 0.6114, "step": 5945 }, { "epoch": 0.76, "grad_norm": 0.5326113866928891, "learning_rate": 2.8815506524526415e-06, "loss": 0.6153, "step": 5946 }, { "epoch": 0.76, "grad_norm": 0.50237533916481, "learning_rate": 2.8786453541503846e-06, "loss": 0.5976, "step": 5947 }, { "epoch": 0.76, "grad_norm": 0.5046092840325056, "learning_rate": 2.8757412749375247e-06, "loss": 0.5717, "step": 5948 }, { "epoch": 0.76, "grad_norm": 0.5279107378159911, "learning_rate": 2.8728384153112066e-06, "loss": 0.6324, "step": 5949 }, { "epoch": 0.76, "grad_norm": 0.49334469232326134, "learning_rate": 2.869936775768359e-06, "loss": 0.5954, "step": 5950 }, { "epoch": 0.76, "grad_norm": 0.4976658736242538, "learning_rate": 2.8670363568057113e-06, "loss": 0.5954, "step": 5951 }, { "epoch": 0.76, "grad_norm": 0.5463351466457456, "learning_rate": 2.864137158919779e-06, "loss": 0.5998, "step": 5952 }, { "epoch": 0.76, "grad_norm": 0.4755791693228019, "learning_rate": 2.8612391826068664e-06, "loss": 0.5883, "step": 5953 }, { "epoch": 0.76, "grad_norm": 0.5880085782241427, "learning_rate": 2.8583424283630777e-06, "loss": 0.6461, "step": 5954 }, { "epoch": 0.76, "grad_norm": 0.4744449254530118, "learning_rate": 2.8554468966843006e-06, "loss": 0.5979, "step": 5955 }, { "epoch": 0.76, "grad_norm": 0.48692335451565255, "learning_rate": 2.852552588066213e-06, "loss": 0.5944, "step": 5956 }, { "epoch": 0.76, "grad_norm": 0.5355206346333914, "learning_rate": 2.849659503004286e-06, "loss": 0.5679, "step": 5957 }, { "epoch": 0.76, "grad_norm": 0.4740632689639393, "learning_rate": 2.846767641993785e-06, "loss": 0.6149, "step": 5958 }, { "epoch": 0.76, "grad_norm": 0.5643317039089659, "learning_rate": 2.843877005529754e-06, "loss": 0.62, "step": 5959 }, { "epoch": 0.76, "grad_norm": 0.5087666010501181, "learning_rate": 2.840987594107034e-06, "loss": 0.5945, "step": 5960 }, { "epoch": 0.76, "grad_norm": 0.49722204173644347, "learning_rate": 2.838099408220265e-06, "loss": 0.5473, "step": 5961 }, { "epoch": 0.76, "grad_norm": 0.44969215669354073, "learning_rate": 2.8352124483638653e-06, "loss": 0.5718, "step": 5962 }, { "epoch": 0.76, "grad_norm": 0.4892685252911653, "learning_rate": 2.8323267150320475e-06, "loss": 0.5687, "step": 5963 }, { "epoch": 0.76, "grad_norm": 0.4946428280896037, "learning_rate": 2.8294422087188156e-06, "loss": 0.5906, "step": 5964 }, { "epoch": 0.76, "grad_norm": 0.48996499726246934, "learning_rate": 2.82655892991796e-06, "loss": 0.578, "step": 5965 }, { "epoch": 0.76, "grad_norm": 0.5030370077828351, "learning_rate": 2.8236768791230627e-06, "loss": 0.5863, "step": 5966 }, { "epoch": 0.76, "grad_norm": 0.5496687570479383, "learning_rate": 2.8207960568275006e-06, "loss": 0.6088, "step": 5967 }, { "epoch": 0.76, "grad_norm": 0.529492087294512, "learning_rate": 2.817916463524425e-06, "loss": 0.6191, "step": 5968 }, { "epoch": 0.76, "grad_norm": 0.5573996942504723, "learning_rate": 2.815038099706798e-06, "loss": 0.6236, "step": 5969 }, { "epoch": 0.76, "grad_norm": 0.5393529325304809, "learning_rate": 2.8121609658673555e-06, "loss": 0.5919, "step": 5970 }, { "epoch": 0.76, "grad_norm": 0.45992563295419536, "learning_rate": 2.809285062498629e-06, "loss": 0.5968, "step": 5971 }, { "epoch": 0.76, "grad_norm": 0.42901030176182764, "learning_rate": 2.8064103900929383e-06, "loss": 0.5542, "step": 5972 }, { "epoch": 0.76, "grad_norm": 0.47973342150997006, "learning_rate": 2.8035369491423926e-06, "loss": 0.5732, "step": 5973 }, { "epoch": 0.76, "grad_norm": 0.494174474764595, "learning_rate": 2.8006647401388886e-06, "loss": 0.5897, "step": 5974 }, { "epoch": 0.76, "grad_norm": 0.5050202314758049, "learning_rate": 2.7977937635741148e-06, "loss": 0.5974, "step": 5975 }, { "epoch": 0.76, "grad_norm": 0.45992358349512735, "learning_rate": 2.7949240199395477e-06, "loss": 0.5984, "step": 5976 }, { "epoch": 0.76, "grad_norm": 0.46411198513009977, "learning_rate": 2.7920555097264525e-06, "loss": 0.592, "step": 5977 }, { "epoch": 0.76, "grad_norm": 0.47689830040737696, "learning_rate": 2.7891882334258813e-06, "loss": 0.5879, "step": 5978 }, { "epoch": 0.76, "grad_norm": 0.5210841735490229, "learning_rate": 2.78632219152868e-06, "loss": 0.5936, "step": 5979 }, { "epoch": 0.76, "grad_norm": 0.5457323097198122, "learning_rate": 2.7834573845254786e-06, "loss": 0.6091, "step": 5980 }, { "epoch": 0.76, "grad_norm": 0.49241116299314464, "learning_rate": 2.7805938129066968e-06, "loss": 0.6112, "step": 5981 }, { "epoch": 0.76, "grad_norm": 0.38810774160187167, "learning_rate": 2.777731477162544e-06, "loss": 0.5614, "step": 5982 }, { "epoch": 0.76, "grad_norm": 0.48068050472973156, "learning_rate": 2.774870377783018e-06, "loss": 0.5846, "step": 5983 }, { "epoch": 0.76, "grad_norm": 0.5035195731980019, "learning_rate": 2.7720105152579025e-06, "loss": 0.6357, "step": 5984 }, { "epoch": 0.76, "grad_norm": 0.50618139374146, "learning_rate": 2.7691518900767724e-06, "loss": 0.63, "step": 5985 }, { "epoch": 0.76, "grad_norm": 0.5109241624486672, "learning_rate": 2.766294502728989e-06, "loss": 0.6103, "step": 5986 }, { "epoch": 0.76, "grad_norm": 0.5129560540609771, "learning_rate": 2.7634383537037033e-06, "loss": 0.6205, "step": 5987 }, { "epoch": 0.76, "grad_norm": 0.4647574769548278, "learning_rate": 2.7605834434898528e-06, "loss": 0.5828, "step": 5988 }, { "epoch": 0.77, "grad_norm": 0.5069263563165275, "learning_rate": 2.7577297725761597e-06, "loss": 0.6162, "step": 5989 }, { "epoch": 0.77, "grad_norm": 0.49902589825608584, "learning_rate": 2.754877341451144e-06, "loss": 0.6397, "step": 5990 }, { "epoch": 0.77, "grad_norm": 0.5685652327204584, "learning_rate": 2.752026150603109e-06, "loss": 0.6197, "step": 5991 }, { "epoch": 0.77, "grad_norm": 0.49766561191303177, "learning_rate": 2.749176200520136e-06, "loss": 0.612, "step": 5992 }, { "epoch": 0.77, "grad_norm": 0.5004695313076585, "learning_rate": 2.746327491690105e-06, "loss": 0.6149, "step": 5993 }, { "epoch": 0.77, "grad_norm": 0.5062612907204038, "learning_rate": 2.743480024600681e-06, "loss": 0.6208, "step": 5994 }, { "epoch": 0.77, "grad_norm": 0.47433433409844655, "learning_rate": 2.740633799739315e-06, "loss": 0.5607, "step": 5995 }, { "epoch": 0.77, "grad_norm": 0.5284239038711709, "learning_rate": 2.7377888175932444e-06, "loss": 0.5775, "step": 5996 }, { "epoch": 0.77, "grad_norm": 0.5307149745969209, "learning_rate": 2.734945078649501e-06, "loss": 0.5858, "step": 5997 }, { "epoch": 0.77, "grad_norm": 0.4576423039565399, "learning_rate": 2.7321025833948946e-06, "loss": 0.5745, "step": 5998 }, { "epoch": 0.77, "grad_norm": 0.46944927942059983, "learning_rate": 2.7292613323160265e-06, "loss": 0.5863, "step": 5999 }, { "epoch": 0.77, "grad_norm": 0.5387551691296725, "learning_rate": 2.7264213258992877e-06, "loss": 0.617, "step": 6000 }, { "epoch": 0.77, "grad_norm": 0.4947434690086734, "learning_rate": 2.7235825646308455e-06, "loss": 0.5603, "step": 6001 }, { "epoch": 0.77, "grad_norm": 0.5756186082758342, "learning_rate": 2.720745048996667e-06, "loss": 0.6244, "step": 6002 }, { "epoch": 0.77, "grad_norm": 0.5040306300685398, "learning_rate": 2.717908779482494e-06, "loss": 0.5777, "step": 6003 }, { "epoch": 0.77, "grad_norm": 0.4996094466884814, "learning_rate": 2.715073756573869e-06, "loss": 0.5674, "step": 6004 }, { "epoch": 0.77, "grad_norm": 0.4830749382712722, "learning_rate": 2.712239980756112e-06, "loss": 0.5799, "step": 6005 }, { "epoch": 0.77, "grad_norm": 0.5144684884427319, "learning_rate": 2.7094074525143275e-06, "loss": 0.5924, "step": 6006 }, { "epoch": 0.77, "grad_norm": 0.46853091307778016, "learning_rate": 2.706576172333413e-06, "loss": 0.5693, "step": 6007 }, { "epoch": 0.77, "grad_norm": 0.5133908259405398, "learning_rate": 2.7037461406980514e-06, "loss": 0.6206, "step": 6008 }, { "epoch": 0.77, "grad_norm": 0.5446165909759815, "learning_rate": 2.700917358092703e-06, "loss": 0.6225, "step": 6009 }, { "epoch": 0.77, "grad_norm": 0.4582400493286139, "learning_rate": 2.698089825001622e-06, "loss": 0.5801, "step": 6010 }, { "epoch": 0.77, "grad_norm": 0.5284641418970284, "learning_rate": 2.6952635419088524e-06, "loss": 0.5943, "step": 6011 }, { "epoch": 0.77, "grad_norm": 0.5146973713968022, "learning_rate": 2.6924385092982188e-06, "loss": 0.5867, "step": 6012 }, { "epoch": 0.77, "grad_norm": 0.5384145808769938, "learning_rate": 2.689614727653329e-06, "loss": 0.5943, "step": 6013 }, { "epoch": 0.77, "grad_norm": 0.5331533719470726, "learning_rate": 2.686792197457584e-06, "loss": 0.5902, "step": 6014 }, { "epoch": 0.77, "grad_norm": 0.4928293304910089, "learning_rate": 2.6839709191941634e-06, "loss": 0.5755, "step": 6015 }, { "epoch": 0.77, "grad_norm": 0.4879508439175233, "learning_rate": 2.6811508933460383e-06, "loss": 0.6104, "step": 6016 }, { "epoch": 0.77, "grad_norm": 0.4649410150367061, "learning_rate": 2.678332120395961e-06, "loss": 0.5527, "step": 6017 }, { "epoch": 0.77, "grad_norm": 0.561293819244818, "learning_rate": 2.675514600826472e-06, "loss": 0.6349, "step": 6018 }, { "epoch": 0.77, "grad_norm": 0.5473704976560194, "learning_rate": 2.6726983351198965e-06, "loss": 0.5954, "step": 6019 }, { "epoch": 0.77, "grad_norm": 0.4902983124288187, "learning_rate": 2.6698833237583454e-06, "loss": 0.6085, "step": 6020 }, { "epoch": 0.77, "grad_norm": 0.4859587895481, "learning_rate": 2.6670695672237133e-06, "loss": 0.5869, "step": 6021 }, { "epoch": 0.77, "grad_norm": 0.4768877386118434, "learning_rate": 2.664257065997682e-06, "loss": 0.596, "step": 6022 }, { "epoch": 0.77, "grad_norm": 0.5966830952601663, "learning_rate": 2.6614458205617165e-06, "loss": 0.5684, "step": 6023 }, { "epoch": 0.77, "grad_norm": 0.46681045172327845, "learning_rate": 2.6586358313970695e-06, "loss": 0.5974, "step": 6024 }, { "epoch": 0.77, "grad_norm": 0.5650068428575024, "learning_rate": 2.6558270989847756e-06, "loss": 0.5779, "step": 6025 }, { "epoch": 0.77, "grad_norm": 0.5611987021892566, "learning_rate": 2.6530196238056562e-06, "loss": 0.6296, "step": 6026 }, { "epoch": 0.77, "grad_norm": 0.484586214125933, "learning_rate": 2.6502134063403173e-06, "loss": 0.5791, "step": 6027 }, { "epoch": 0.77, "grad_norm": 0.48463747348726655, "learning_rate": 2.6474084470691485e-06, "loss": 0.5761, "step": 6028 }, { "epoch": 0.77, "grad_norm": 0.5479935669461019, "learning_rate": 2.6446047464723248e-06, "loss": 0.6132, "step": 6029 }, { "epoch": 0.77, "grad_norm": 0.5588580406283044, "learning_rate": 2.641802305029806e-06, "loss": 0.5833, "step": 6030 }, { "epoch": 0.77, "grad_norm": 0.47373189697880064, "learning_rate": 2.6390011232213365e-06, "loss": 0.5584, "step": 6031 }, { "epoch": 0.77, "grad_norm": 0.5632518779454273, "learning_rate": 2.6362012015264414e-06, "loss": 0.6157, "step": 6032 }, { "epoch": 0.77, "grad_norm": 0.48470050934442793, "learning_rate": 2.6334025404244414e-06, "loss": 0.5924, "step": 6033 }, { "epoch": 0.77, "grad_norm": 0.5287624754021593, "learning_rate": 2.6306051403944246e-06, "loss": 0.6006, "step": 6034 }, { "epoch": 0.77, "grad_norm": 0.558965804684086, "learning_rate": 2.627809001915276e-06, "loss": 0.6375, "step": 6035 }, { "epoch": 0.77, "grad_norm": 0.48046973844414465, "learning_rate": 2.625014125465659e-06, "loss": 0.5975, "step": 6036 }, { "epoch": 0.77, "grad_norm": 0.5721636400262363, "learning_rate": 2.622220511524023e-06, "loss": 0.5939, "step": 6037 }, { "epoch": 0.77, "grad_norm": 0.4832758914607123, "learning_rate": 2.6194281605686013e-06, "loss": 0.5777, "step": 6038 }, { "epoch": 0.77, "grad_norm": 0.5188792963606139, "learning_rate": 2.616637073077407e-06, "loss": 0.6169, "step": 6039 }, { "epoch": 0.77, "grad_norm": 0.5045652704020089, "learning_rate": 2.6138472495282475e-06, "loss": 0.6073, "step": 6040 }, { "epoch": 0.77, "grad_norm": 0.5368889172221467, "learning_rate": 2.6110586903987045e-06, "loss": 0.581, "step": 6041 }, { "epoch": 0.77, "grad_norm": 0.5134590908854315, "learning_rate": 2.608271396166141e-06, "loss": 0.5925, "step": 6042 }, { "epoch": 0.77, "grad_norm": 0.4790737449006718, "learning_rate": 2.605485367307711e-06, "loss": 0.5806, "step": 6043 }, { "epoch": 0.77, "grad_norm": 0.4850833060391109, "learning_rate": 2.6027006043003466e-06, "loss": 0.5773, "step": 6044 }, { "epoch": 0.77, "grad_norm": 0.5255396302595582, "learning_rate": 2.5999171076207686e-06, "loss": 0.6026, "step": 6045 }, { "epoch": 0.77, "grad_norm": 0.5145549474161855, "learning_rate": 2.597134877745473e-06, "loss": 0.6045, "step": 6046 }, { "epoch": 0.77, "grad_norm": 0.46861263294525, "learning_rate": 2.59435391515075e-06, "loss": 0.6067, "step": 6047 }, { "epoch": 0.77, "grad_norm": 0.5430737990635982, "learning_rate": 2.5915742203126635e-06, "loss": 0.6179, "step": 6048 }, { "epoch": 0.77, "grad_norm": 0.500870705125375, "learning_rate": 2.588795793707064e-06, "loss": 0.5628, "step": 6049 }, { "epoch": 0.77, "grad_norm": 0.4711258092417725, "learning_rate": 2.586018635809587e-06, "loss": 0.5939, "step": 6050 }, { "epoch": 0.77, "grad_norm": 0.4807224150490628, "learning_rate": 2.583242747095642e-06, "loss": 0.5918, "step": 6051 }, { "epoch": 0.77, "grad_norm": 0.4568236388481282, "learning_rate": 2.580468128040432e-06, "loss": 0.5831, "step": 6052 }, { "epoch": 0.77, "grad_norm": 0.5615379127754674, "learning_rate": 2.577694779118932e-06, "loss": 0.6013, "step": 6053 }, { "epoch": 0.77, "grad_norm": 0.4597469220522455, "learning_rate": 2.574922700805914e-06, "loss": 0.6066, "step": 6054 }, { "epoch": 0.77, "grad_norm": 0.5096211278244478, "learning_rate": 2.5721518935759214e-06, "loss": 0.6265, "step": 6055 }, { "epoch": 0.77, "grad_norm": 0.5328602617027479, "learning_rate": 2.5693823579032808e-06, "loss": 0.6202, "step": 6056 }, { "epoch": 0.77, "grad_norm": 0.49710285877265564, "learning_rate": 2.5666140942621044e-06, "loss": 0.5659, "step": 6057 }, { "epoch": 0.77, "grad_norm": 0.46827274564365334, "learning_rate": 2.5638471031262848e-06, "loss": 0.5736, "step": 6058 }, { "epoch": 0.77, "grad_norm": 0.5239565075842305, "learning_rate": 2.5610813849695005e-06, "loss": 0.6205, "step": 6059 }, { "epoch": 0.77, "grad_norm": 0.5081652377661078, "learning_rate": 2.5583169402652e-06, "loss": 0.5804, "step": 6060 }, { "epoch": 0.77, "grad_norm": 0.42948919275486996, "learning_rate": 2.5555537694866315e-06, "loss": 0.568, "step": 6061 }, { "epoch": 0.77, "grad_norm": 0.5688382800142693, "learning_rate": 2.5527918731068134e-06, "loss": 0.6128, "step": 6062 }, { "epoch": 0.77, "grad_norm": 0.6134589854192974, "learning_rate": 2.5500312515985483e-06, "loss": 0.5976, "step": 6063 }, { "epoch": 0.77, "grad_norm": 0.49779593580046555, "learning_rate": 2.547271905434421e-06, "loss": 0.6054, "step": 6064 }, { "epoch": 0.77, "grad_norm": 0.5331878891380044, "learning_rate": 2.544513835086799e-06, "loss": 0.6349, "step": 6065 }, { "epoch": 0.77, "grad_norm": 0.5071744585657094, "learning_rate": 2.5417570410278304e-06, "loss": 0.587, "step": 6066 }, { "epoch": 0.78, "grad_norm": 0.46130698175016194, "learning_rate": 2.539001523729443e-06, "loss": 0.5744, "step": 6067 }, { "epoch": 0.78, "grad_norm": 0.5131471702490852, "learning_rate": 2.53624728366335e-06, "loss": 0.5684, "step": 6068 }, { "epoch": 0.78, "grad_norm": 0.605459019789999, "learning_rate": 2.533494321301043e-06, "loss": 0.661, "step": 6069 }, { "epoch": 0.78, "grad_norm": 0.6985990951481894, "learning_rate": 2.5307426371137965e-06, "loss": 0.5786, "step": 6070 }, { "epoch": 0.78, "grad_norm": 0.5415319976469133, "learning_rate": 2.5279922315726634e-06, "loss": 0.6234, "step": 6071 }, { "epoch": 0.78, "grad_norm": 0.5135738680750276, "learning_rate": 2.5252431051484814e-06, "loss": 0.5841, "step": 6072 }, { "epoch": 0.78, "grad_norm": 0.5433201956589749, "learning_rate": 2.5224952583118677e-06, "loss": 0.5965, "step": 6073 }, { "epoch": 0.78, "grad_norm": 0.5068832498125437, "learning_rate": 2.5197486915332194e-06, "loss": 0.5473, "step": 6074 }, { "epoch": 0.78, "grad_norm": 0.48664942431028635, "learning_rate": 2.517003405282715e-06, "loss": 0.5963, "step": 6075 }, { "epoch": 0.78, "grad_norm": 0.5488012916893853, "learning_rate": 2.5142594000303156e-06, "loss": 0.6464, "step": 6076 }, { "epoch": 0.78, "grad_norm": 0.6034383394548734, "learning_rate": 2.511516676245761e-06, "loss": 0.6045, "step": 6077 }, { "epoch": 0.78, "grad_norm": 0.49304476243204587, "learning_rate": 2.5087752343985707e-06, "loss": 0.601, "step": 6078 }, { "epoch": 0.78, "grad_norm": 0.49841772545037333, "learning_rate": 2.5060350749580487e-06, "loss": 0.5917, "step": 6079 }, { "epoch": 0.78, "grad_norm": 0.46333850423060163, "learning_rate": 2.503296198393276e-06, "loss": 0.5729, "step": 6080 }, { "epoch": 0.78, "grad_norm": 0.5051926874387767, "learning_rate": 2.5005586051731114e-06, "loss": 0.6203, "step": 6081 }, { "epoch": 0.78, "grad_norm": 0.5093712393505692, "learning_rate": 2.4978222957662037e-06, "loss": 0.5846, "step": 6082 }, { "epoch": 0.78, "grad_norm": 0.4658250578228656, "learning_rate": 2.4950872706409755e-06, "loss": 0.5854, "step": 6083 }, { "epoch": 0.78, "grad_norm": 0.4950721614673089, "learning_rate": 2.4923535302656256e-06, "loss": 0.5929, "step": 6084 }, { "epoch": 0.78, "grad_norm": 0.48513364653731184, "learning_rate": 2.4896210751081396e-06, "loss": 0.6046, "step": 6085 }, { "epoch": 0.78, "grad_norm": 0.4841411075770622, "learning_rate": 2.4868899056362784e-06, "loss": 0.5869, "step": 6086 }, { "epoch": 0.78, "grad_norm": 0.520585273194814, "learning_rate": 2.4841600223175877e-06, "loss": 0.6142, "step": 6087 }, { "epoch": 0.78, "grad_norm": 0.4729775368149344, "learning_rate": 2.4814314256193895e-06, "loss": 0.5945, "step": 6088 }, { "epoch": 0.78, "grad_norm": 0.5475756245079587, "learning_rate": 2.478704116008783e-06, "loss": 0.6155, "step": 6089 }, { "epoch": 0.78, "grad_norm": 0.49537331116238065, "learning_rate": 2.475978093952657e-06, "loss": 0.5821, "step": 6090 }, { "epoch": 0.78, "grad_norm": 0.47791170711825065, "learning_rate": 2.47325335991767e-06, "loss": 0.5388, "step": 6091 }, { "epoch": 0.78, "grad_norm": 0.5684319061156471, "learning_rate": 2.4705299143702667e-06, "loss": 0.6131, "step": 6092 }, { "epoch": 0.78, "grad_norm": 0.4696738081338294, "learning_rate": 2.4678077577766614e-06, "loss": 0.59, "step": 6093 }, { "epoch": 0.78, "grad_norm": 0.6261725641348329, "learning_rate": 2.465086890602857e-06, "loss": 0.682, "step": 6094 }, { "epoch": 0.78, "grad_norm": 0.5461414496183151, "learning_rate": 2.462367313314634e-06, "loss": 0.6002, "step": 6095 }, { "epoch": 0.78, "grad_norm": 0.51418870223014, "learning_rate": 2.4596490263775475e-06, "loss": 0.5737, "step": 6096 }, { "epoch": 0.78, "grad_norm": 0.5317568613553795, "learning_rate": 2.45693203025694e-06, "loss": 0.6141, "step": 6097 }, { "epoch": 0.78, "grad_norm": 0.5419518930808844, "learning_rate": 2.4542163254179273e-06, "loss": 0.6474, "step": 6098 }, { "epoch": 0.78, "grad_norm": 0.47268441976789255, "learning_rate": 2.4515019123254037e-06, "loss": 0.5942, "step": 6099 }, { "epoch": 0.78, "grad_norm": 0.4603706641158467, "learning_rate": 2.4487887914440446e-06, "loss": 0.547, "step": 6100 }, { "epoch": 0.78, "grad_norm": 0.5198253846465222, "learning_rate": 2.4460769632383052e-06, "loss": 0.6148, "step": 6101 }, { "epoch": 0.78, "grad_norm": 0.5525188905812433, "learning_rate": 2.4433664281724135e-06, "loss": 0.6618, "step": 6102 }, { "epoch": 0.78, "grad_norm": 0.5267466295318205, "learning_rate": 2.440657186710378e-06, "loss": 0.6079, "step": 6103 }, { "epoch": 0.78, "grad_norm": 0.46553739883543144, "learning_rate": 2.437949239315994e-06, "loss": 0.6, "step": 6104 }, { "epoch": 0.78, "grad_norm": 0.5470581919929272, "learning_rate": 2.4352425864528286e-06, "loss": 0.6002, "step": 6105 }, { "epoch": 0.78, "grad_norm": 0.54467994255346, "learning_rate": 2.4325372285842253e-06, "loss": 0.6296, "step": 6106 }, { "epoch": 0.78, "grad_norm": 0.4062192276441213, "learning_rate": 2.42983316617331e-06, "loss": 0.565, "step": 6107 }, { "epoch": 0.78, "grad_norm": 0.4884198786076868, "learning_rate": 2.427130399682984e-06, "loss": 0.5907, "step": 6108 }, { "epoch": 0.78, "grad_norm": 0.46940263524046943, "learning_rate": 2.4244289295759293e-06, "loss": 0.5667, "step": 6109 }, { "epoch": 0.78, "grad_norm": 0.577884168674644, "learning_rate": 2.4217287563146043e-06, "loss": 0.6016, "step": 6110 }, { "epoch": 0.78, "grad_norm": 0.5413070986405857, "learning_rate": 2.4190298803612456e-06, "loss": 0.6238, "step": 6111 }, { "epoch": 0.78, "grad_norm": 0.5279269295147341, "learning_rate": 2.4163323021778694e-06, "loss": 0.6139, "step": 6112 }, { "epoch": 0.78, "grad_norm": 0.506928414159099, "learning_rate": 2.4136360222262667e-06, "loss": 0.6023, "step": 6113 }, { "epoch": 0.78, "grad_norm": 0.5103597773903585, "learning_rate": 2.4109410409680078e-06, "loss": 0.5658, "step": 6114 }, { "epoch": 0.78, "grad_norm": 0.5856309676022056, "learning_rate": 2.4082473588644417e-06, "loss": 0.5959, "step": 6115 }, { "epoch": 0.78, "grad_norm": 0.5538122012039738, "learning_rate": 2.405554976376694e-06, "loss": 0.5973, "step": 6116 }, { "epoch": 0.78, "grad_norm": 0.5252195809896726, "learning_rate": 2.402863893965667e-06, "loss": 0.5915, "step": 6117 }, { "epoch": 0.78, "grad_norm": 0.4991344680047187, "learning_rate": 2.400174112092042e-06, "loss": 0.5668, "step": 6118 }, { "epoch": 0.78, "grad_norm": 0.5996661846031172, "learning_rate": 2.3974856312162776e-06, "loss": 0.6236, "step": 6119 }, { "epoch": 0.78, "grad_norm": 0.43711230845200755, "learning_rate": 2.3947984517986067e-06, "loss": 0.5658, "step": 6120 }, { "epoch": 0.78, "grad_norm": 0.5720679751597609, "learning_rate": 2.3921125742990446e-06, "loss": 0.6596, "step": 6121 }, { "epoch": 0.78, "grad_norm": 0.5379434760381742, "learning_rate": 2.3894279991773793e-06, "loss": 0.6003, "step": 6122 }, { "epoch": 0.78, "grad_norm": 0.5100862184011181, "learning_rate": 2.386744726893178e-06, "loss": 0.6178, "step": 6123 }, { "epoch": 0.78, "grad_norm": 0.5248247286848234, "learning_rate": 2.384062757905782e-06, "loss": 0.5899, "step": 6124 }, { "epoch": 0.78, "grad_norm": 0.4585587709913571, "learning_rate": 2.381382092674318e-06, "loss": 0.5791, "step": 6125 }, { "epoch": 0.78, "grad_norm": 0.5679523042609002, "learning_rate": 2.378702731657677e-06, "loss": 0.6395, "step": 6126 }, { "epoch": 0.78, "grad_norm": 0.4326094456313921, "learning_rate": 2.3760246753145356e-06, "loss": 0.534, "step": 6127 }, { "epoch": 0.78, "grad_norm": 0.42647809223269995, "learning_rate": 2.373347924103344e-06, "loss": 0.5717, "step": 6128 }, { "epoch": 0.78, "grad_norm": 0.5165689640065527, "learning_rate": 2.3706724784823297e-06, "loss": 0.6077, "step": 6129 }, { "epoch": 0.78, "grad_norm": 0.5480439024027329, "learning_rate": 2.3679983389094975e-06, "loss": 0.6338, "step": 6130 }, { "epoch": 0.78, "grad_norm": 0.49016487210874166, "learning_rate": 2.3653255058426226e-06, "loss": 0.5815, "step": 6131 }, { "epoch": 0.78, "grad_norm": 0.45464023199384307, "learning_rate": 2.362653979739269e-06, "loss": 0.5997, "step": 6132 }, { "epoch": 0.78, "grad_norm": 0.48584693577715754, "learning_rate": 2.3599837610567653e-06, "loss": 0.5624, "step": 6133 }, { "epoch": 0.78, "grad_norm": 0.7571702859615946, "learning_rate": 2.3573148502522245e-06, "loss": 0.5588, "step": 6134 }, { "epoch": 0.78, "grad_norm": 0.47167859815634383, "learning_rate": 2.3546472477825257e-06, "loss": 0.6043, "step": 6135 }, { "epoch": 0.78, "grad_norm": 0.5442656216168157, "learning_rate": 2.3519809541043327e-06, "loss": 0.6289, "step": 6136 }, { "epoch": 0.78, "grad_norm": 0.510300657862663, "learning_rate": 2.349315969674082e-06, "loss": 0.603, "step": 6137 }, { "epoch": 0.78, "grad_norm": 0.536979108001724, "learning_rate": 2.346652294947985e-06, "loss": 0.6115, "step": 6138 }, { "epoch": 0.78, "grad_norm": 0.5387355324216524, "learning_rate": 2.3439899303820356e-06, "loss": 0.6225, "step": 6139 }, { "epoch": 0.78, "grad_norm": 0.5625773011784867, "learning_rate": 2.341328876431996e-06, "loss": 0.642, "step": 6140 }, { "epoch": 0.78, "grad_norm": 0.4966461718993808, "learning_rate": 2.338669133553405e-06, "loss": 0.5971, "step": 6141 }, { "epoch": 0.78, "grad_norm": 0.5317277905949453, "learning_rate": 2.336010702201579e-06, "loss": 0.5821, "step": 6142 }, { "epoch": 0.78, "grad_norm": 0.5837274781339639, "learning_rate": 2.3333535828316114e-06, "loss": 0.6085, "step": 6143 }, { "epoch": 0.78, "grad_norm": 0.5113725869118316, "learning_rate": 2.330697775898364e-06, "loss": 0.5934, "step": 6144 }, { "epoch": 0.79, "grad_norm": 0.5789141367311716, "learning_rate": 2.3280432818564814e-06, "loss": 0.6151, "step": 6145 }, { "epoch": 0.79, "grad_norm": 0.4815854373898072, "learning_rate": 2.3253901011603774e-06, "loss": 0.6021, "step": 6146 }, { "epoch": 0.79, "grad_norm": 0.525948121516921, "learning_rate": 2.3227382342642504e-06, "loss": 0.5915, "step": 6147 }, { "epoch": 0.79, "grad_norm": 0.5481832017758439, "learning_rate": 2.320087681622064e-06, "loss": 0.5997, "step": 6148 }, { "epoch": 0.79, "grad_norm": 0.48113190149228685, "learning_rate": 2.317438443687562e-06, "loss": 0.5761, "step": 6149 }, { "epoch": 0.79, "grad_norm": 0.48452946809812875, "learning_rate": 2.314790520914261e-06, "loss": 0.5803, "step": 6150 }, { "epoch": 0.79, "grad_norm": 0.4586474055555128, "learning_rate": 2.3121439137554524e-06, "loss": 0.5814, "step": 6151 }, { "epoch": 0.79, "grad_norm": 0.46498579321144634, "learning_rate": 2.309498622664208e-06, "loss": 0.5787, "step": 6152 }, { "epoch": 0.79, "grad_norm": 0.47407474833558927, "learning_rate": 2.3068546480933597e-06, "loss": 0.5881, "step": 6153 }, { "epoch": 0.79, "grad_norm": 0.5075789694156296, "learning_rate": 2.304211990495533e-06, "loss": 0.5982, "step": 6154 }, { "epoch": 0.79, "grad_norm": 0.45218041392391006, "learning_rate": 2.301570650323114e-06, "loss": 0.5611, "step": 6155 }, { "epoch": 0.79, "grad_norm": 0.5632169619179319, "learning_rate": 2.298930628028271e-06, "loss": 0.5834, "step": 6156 }, { "epoch": 0.79, "grad_norm": 0.6692111827193229, "learning_rate": 2.296291924062941e-06, "loss": 0.6194, "step": 6157 }, { "epoch": 0.79, "grad_norm": 0.4585425195539004, "learning_rate": 2.2936545388788414e-06, "loss": 0.5912, "step": 6158 }, { "epoch": 0.79, "grad_norm": 0.5177404692278149, "learning_rate": 2.2910184729274564e-06, "loss": 0.5884, "step": 6159 }, { "epoch": 0.79, "grad_norm": 0.4548301743458929, "learning_rate": 2.2883837266600517e-06, "loss": 0.587, "step": 6160 }, { "epoch": 0.79, "grad_norm": 0.5299650338756303, "learning_rate": 2.2857503005276616e-06, "loss": 0.5966, "step": 6161 }, { "epoch": 0.79, "grad_norm": 0.5808758759541404, "learning_rate": 2.2831181949810987e-06, "loss": 0.6117, "step": 6162 }, { "epoch": 0.79, "grad_norm": 0.4825864356039101, "learning_rate": 2.2804874104709464e-06, "loss": 0.579, "step": 6163 }, { "epoch": 0.79, "grad_norm": 0.47843067374394016, "learning_rate": 2.2778579474475627e-06, "loss": 0.5967, "step": 6164 }, { "epoch": 0.79, "grad_norm": 0.47084713304171955, "learning_rate": 2.27522980636108e-06, "loss": 0.5751, "step": 6165 }, { "epoch": 0.79, "grad_norm": 0.6097396479263211, "learning_rate": 2.2726029876614044e-06, "loss": 0.6286, "step": 6166 }, { "epoch": 0.79, "grad_norm": 0.47776196553040573, "learning_rate": 2.2699774917982155e-06, "loss": 0.5827, "step": 6167 }, { "epoch": 0.79, "grad_norm": 0.47385283714699733, "learning_rate": 2.267353319220966e-06, "loss": 0.5864, "step": 6168 }, { "epoch": 0.79, "grad_norm": 0.5453977244990321, "learning_rate": 2.2647304703788832e-06, "loss": 0.605, "step": 6169 }, { "epoch": 0.79, "grad_norm": 0.4628041538066611, "learning_rate": 2.2621089457209644e-06, "loss": 0.578, "step": 6170 }, { "epoch": 0.79, "grad_norm": 0.5053391474420339, "learning_rate": 2.2594887456959857e-06, "loss": 0.5653, "step": 6171 }, { "epoch": 0.79, "grad_norm": 0.541685547580131, "learning_rate": 2.2568698707524928e-06, "loss": 0.6318, "step": 6172 }, { "epoch": 0.79, "grad_norm": 0.4629739211884928, "learning_rate": 2.2542523213388033e-06, "loss": 0.6153, "step": 6173 }, { "epoch": 0.79, "grad_norm": 0.5506922091468994, "learning_rate": 2.2516360979030093e-06, "loss": 0.6338, "step": 6174 }, { "epoch": 0.79, "grad_norm": 0.465579232265049, "learning_rate": 2.249021200892982e-06, "loss": 0.621, "step": 6175 }, { "epoch": 0.79, "grad_norm": 0.4916541530312722, "learning_rate": 2.2464076307563586e-06, "loss": 0.5931, "step": 6176 }, { "epoch": 0.79, "grad_norm": 0.5285799462506703, "learning_rate": 2.243795387940546e-06, "loss": 0.5857, "step": 6177 }, { "epoch": 0.79, "grad_norm": 0.48365171296947485, "learning_rate": 2.241184472892731e-06, "loss": 0.6028, "step": 6178 }, { "epoch": 0.79, "grad_norm": 0.4919919906834703, "learning_rate": 2.2385748860598698e-06, "loss": 0.5726, "step": 6179 }, { "epoch": 0.79, "grad_norm": 0.4919150695537373, "learning_rate": 2.235966627888694e-06, "loss": 0.5803, "step": 6180 }, { "epoch": 0.79, "grad_norm": 0.5497937179985881, "learning_rate": 2.2333596988257e-06, "loss": 0.6073, "step": 6181 }, { "epoch": 0.79, "grad_norm": 0.4786048127631353, "learning_rate": 2.2307540993171706e-06, "loss": 0.5678, "step": 6182 }, { "epoch": 0.79, "grad_norm": 0.5201063263598009, "learning_rate": 2.2281498298091487e-06, "loss": 0.6165, "step": 6183 }, { "epoch": 0.79, "grad_norm": 0.5410148949798556, "learning_rate": 2.225546890747453e-06, "loss": 0.5913, "step": 6184 }, { "epoch": 0.79, "grad_norm": 0.536454124829972, "learning_rate": 2.22294528257768e-06, "loss": 0.6633, "step": 6185 }, { "epoch": 0.79, "grad_norm": 0.4853928929487192, "learning_rate": 2.2203450057451847e-06, "loss": 0.57, "step": 6186 }, { "epoch": 0.79, "grad_norm": 0.5067098743879653, "learning_rate": 2.2177460606951084e-06, "loss": 0.608, "step": 6187 }, { "epoch": 0.79, "grad_norm": 0.47780101222142224, "learning_rate": 2.215148447872356e-06, "loss": 0.5713, "step": 6188 }, { "epoch": 0.79, "grad_norm": 0.6510736007801329, "learning_rate": 2.212552167721611e-06, "loss": 0.6065, "step": 6189 }, { "epoch": 0.79, "grad_norm": 0.5975021932993332, "learning_rate": 2.209957220687323e-06, "loss": 0.6569, "step": 6190 }, { "epoch": 0.79, "grad_norm": 0.4782876051725505, "learning_rate": 2.207363607213716e-06, "loss": 0.561, "step": 6191 }, { "epoch": 0.79, "grad_norm": 0.4842537773129591, "learning_rate": 2.204771327744786e-06, "loss": 0.5993, "step": 6192 }, { "epoch": 0.79, "grad_norm": 0.5662366946894622, "learning_rate": 2.202180382724299e-06, "loss": 0.5978, "step": 6193 }, { "epoch": 0.79, "grad_norm": 0.5697234588383645, "learning_rate": 2.199590772595792e-06, "loss": 0.5992, "step": 6194 }, { "epoch": 0.79, "grad_norm": 0.4943306314984183, "learning_rate": 2.1970024978025717e-06, "loss": 0.5674, "step": 6195 }, { "epoch": 0.79, "grad_norm": 0.5349988161653015, "learning_rate": 2.194415558787727e-06, "loss": 0.6204, "step": 6196 }, { "epoch": 0.79, "grad_norm": 0.4766014034062558, "learning_rate": 2.1918299559941057e-06, "loss": 0.5939, "step": 6197 }, { "epoch": 0.79, "grad_norm": 0.4785578004615064, "learning_rate": 2.1892456898643334e-06, "loss": 0.5956, "step": 6198 }, { "epoch": 0.79, "grad_norm": 0.48918196655418, "learning_rate": 2.186662760840803e-06, "loss": 0.5894, "step": 6199 }, { "epoch": 0.79, "grad_norm": 0.5687892994871744, "learning_rate": 2.1840811693656825e-06, "loss": 0.6069, "step": 6200 }, { "epoch": 0.79, "grad_norm": 0.541202838990243, "learning_rate": 2.1815009158809086e-06, "loss": 0.5836, "step": 6201 }, { "epoch": 0.79, "grad_norm": 0.5048072772047043, "learning_rate": 2.1789220008281888e-06, "loss": 0.5757, "step": 6202 }, { "epoch": 0.79, "grad_norm": 0.4702167737787878, "learning_rate": 2.1763444246490018e-06, "loss": 0.5428, "step": 6203 }, { "epoch": 0.79, "grad_norm": 0.4826786553116238, "learning_rate": 2.1737681877845975e-06, "loss": 0.5908, "step": 6204 }, { "epoch": 0.79, "grad_norm": 0.46749234596159417, "learning_rate": 2.1711932906759967e-06, "loss": 0.5795, "step": 6205 }, { "epoch": 0.79, "grad_norm": 0.4654850520612603, "learning_rate": 2.1686197337639913e-06, "loss": 0.5808, "step": 6206 }, { "epoch": 0.79, "grad_norm": 0.4985133877181807, "learning_rate": 2.1660475174891416e-06, "loss": 0.6109, "step": 6207 }, { "epoch": 0.79, "grad_norm": 0.5504505483848625, "learning_rate": 2.1634766422917807e-06, "loss": 0.5981, "step": 6208 }, { "epoch": 0.79, "grad_norm": 0.4856966206111263, "learning_rate": 2.1609071086120104e-06, "loss": 0.6098, "step": 6209 }, { "epoch": 0.79, "grad_norm": 0.4561928329068401, "learning_rate": 2.158338916889704e-06, "loss": 0.5611, "step": 6210 }, { "epoch": 0.79, "grad_norm": 0.5163064887590852, "learning_rate": 2.1557720675645044e-06, "loss": 0.5662, "step": 6211 }, { "epoch": 0.79, "grad_norm": 0.4980457819699713, "learning_rate": 2.1532065610758255e-06, "loss": 0.5917, "step": 6212 }, { "epoch": 0.79, "grad_norm": 0.5430883230753937, "learning_rate": 2.150642397862851e-06, "loss": 0.6128, "step": 6213 }, { "epoch": 0.79, "grad_norm": 0.5828251519861067, "learning_rate": 2.1480795783645347e-06, "loss": 0.6325, "step": 6214 }, { "epoch": 0.79, "grad_norm": 0.5597926473383493, "learning_rate": 2.1455181030195992e-06, "loss": 0.6071, "step": 6215 }, { "epoch": 0.79, "grad_norm": 0.5476476672164935, "learning_rate": 2.1429579722665385e-06, "loss": 0.6137, "step": 6216 }, { "epoch": 0.79, "grad_norm": 0.5637700290995477, "learning_rate": 2.1403991865436123e-06, "loss": 0.5673, "step": 6217 }, { "epoch": 0.79, "grad_norm": 0.4762611800572432, "learning_rate": 2.1378417462888633e-06, "loss": 0.5902, "step": 6218 }, { "epoch": 0.79, "grad_norm": 0.46294157889913456, "learning_rate": 2.1352856519400844e-06, "loss": 0.5719, "step": 6219 }, { "epoch": 0.79, "grad_norm": 0.48546360585391246, "learning_rate": 2.1327309039348532e-06, "loss": 0.5782, "step": 6220 }, { "epoch": 0.79, "grad_norm": 0.5046334490875991, "learning_rate": 2.130177502710509e-06, "loss": 0.6256, "step": 6221 }, { "epoch": 0.79, "grad_norm": 0.5338712342266757, "learning_rate": 2.1276254487041636e-06, "loss": 0.597, "step": 6222 }, { "epoch": 0.79, "grad_norm": 0.47448694005250597, "learning_rate": 2.1250747423526976e-06, "loss": 0.5761, "step": 6223 }, { "epoch": 0.8, "grad_norm": 0.5110423870293236, "learning_rate": 2.1225253840927596e-06, "loss": 0.6086, "step": 6224 }, { "epoch": 0.8, "grad_norm": 0.5118780271055604, "learning_rate": 2.119977374360772e-06, "loss": 0.568, "step": 6225 }, { "epoch": 0.8, "grad_norm": 0.5221883664773564, "learning_rate": 2.117430713592925e-06, "loss": 0.6194, "step": 6226 }, { "epoch": 0.8, "grad_norm": 0.5552414747364922, "learning_rate": 2.11488540222517e-06, "loss": 0.5812, "step": 6227 }, { "epoch": 0.8, "grad_norm": 0.4570736323237132, "learning_rate": 2.112341440693235e-06, "loss": 0.6002, "step": 6228 }, { "epoch": 0.8, "grad_norm": 0.4919753438547367, "learning_rate": 2.109798829432618e-06, "loss": 0.5696, "step": 6229 }, { "epoch": 0.8, "grad_norm": 0.4924576702446975, "learning_rate": 2.1072575688785802e-06, "loss": 0.5755, "step": 6230 }, { "epoch": 0.8, "grad_norm": 0.49726662464528254, "learning_rate": 2.1047176594661525e-06, "loss": 0.6036, "step": 6231 }, { "epoch": 0.8, "grad_norm": 0.49223638245185697, "learning_rate": 2.102179101630143e-06, "loss": 0.5732, "step": 6232 }, { "epoch": 0.8, "grad_norm": 0.47155961882627206, "learning_rate": 2.099641895805119e-06, "loss": 0.5917, "step": 6233 }, { "epoch": 0.8, "grad_norm": 0.5066872391028985, "learning_rate": 2.097106042425419e-06, "loss": 0.5968, "step": 6234 }, { "epoch": 0.8, "grad_norm": 0.5324424723313486, "learning_rate": 2.094571541925152e-06, "loss": 0.5967, "step": 6235 }, { "epoch": 0.8, "grad_norm": 0.6161092163205921, "learning_rate": 2.0920383947381907e-06, "loss": 0.6384, "step": 6236 }, { "epoch": 0.8, "grad_norm": 0.47010249087866546, "learning_rate": 2.0895066012981804e-06, "loss": 0.581, "step": 6237 }, { "epoch": 0.8, "grad_norm": 0.5005076308531208, "learning_rate": 2.0869761620385296e-06, "loss": 0.5936, "step": 6238 }, { "epoch": 0.8, "grad_norm": 0.5624555940301331, "learning_rate": 2.084447077392425e-06, "loss": 0.6127, "step": 6239 }, { "epoch": 0.8, "grad_norm": 0.5053802588703037, "learning_rate": 2.0819193477928147e-06, "loss": 0.5901, "step": 6240 }, { "epoch": 0.8, "grad_norm": 0.4597266036417923, "learning_rate": 2.0793929736724118e-06, "loss": 0.5692, "step": 6241 }, { "epoch": 0.8, "grad_norm": 0.5950465982946792, "learning_rate": 2.076867955463703e-06, "loss": 0.6655, "step": 6242 }, { "epoch": 0.8, "grad_norm": 0.5277627546227914, "learning_rate": 2.07434429359894e-06, "loss": 0.6121, "step": 6243 }, { "epoch": 0.8, "grad_norm": 0.527175094530529, "learning_rate": 2.0718219885101454e-06, "loss": 0.5908, "step": 6244 }, { "epoch": 0.8, "grad_norm": 0.44135597769044627, "learning_rate": 2.0693010406291005e-06, "loss": 0.5646, "step": 6245 }, { "epoch": 0.8, "grad_norm": 0.4580461218476512, "learning_rate": 2.0667814503873673e-06, "loss": 0.5615, "step": 6246 }, { "epoch": 0.8, "grad_norm": 0.48960366509370057, "learning_rate": 2.064263218216267e-06, "loss": 0.5988, "step": 6247 }, { "epoch": 0.8, "grad_norm": 0.48606481176542354, "learning_rate": 2.061746344546891e-06, "loss": 0.587, "step": 6248 }, { "epoch": 0.8, "grad_norm": 0.5325982756113937, "learning_rate": 2.0592308298100973e-06, "loss": 0.5963, "step": 6249 }, { "epoch": 0.8, "grad_norm": 0.5350091926259372, "learning_rate": 2.05671667443651e-06, "loss": 0.6291, "step": 6250 }, { "epoch": 0.8, "grad_norm": 0.46587314963313553, "learning_rate": 2.0542038788565244e-06, "loss": 0.5687, "step": 6251 }, { "epoch": 0.8, "grad_norm": 0.4974493322198657, "learning_rate": 2.0516924435002983e-06, "loss": 0.5903, "step": 6252 }, { "epoch": 0.8, "grad_norm": 0.6127926962662855, "learning_rate": 2.049182368797761e-06, "loss": 0.6421, "step": 6253 }, { "epoch": 0.8, "grad_norm": 0.5491863257350683, "learning_rate": 2.046673655178605e-06, "loss": 0.6394, "step": 6254 }, { "epoch": 0.8, "grad_norm": 0.5356460922667436, "learning_rate": 2.0441663030722914e-06, "loss": 0.6432, "step": 6255 }, { "epoch": 0.8, "grad_norm": 0.5483523674165083, "learning_rate": 2.04166031290805e-06, "loss": 0.6084, "step": 6256 }, { "epoch": 0.8, "grad_norm": 0.45671838533756304, "learning_rate": 2.0391556851148763e-06, "loss": 0.5772, "step": 6257 }, { "epoch": 0.8, "grad_norm": 0.45077505197164824, "learning_rate": 2.03665242012153e-06, "loss": 0.585, "step": 6258 }, { "epoch": 0.8, "grad_norm": 0.5423939650998659, "learning_rate": 2.0341505183565414e-06, "loss": 0.5807, "step": 6259 }, { "epoch": 0.8, "grad_norm": 0.4321935962954531, "learning_rate": 2.0316499802482037e-06, "loss": 0.5769, "step": 6260 }, { "epoch": 0.8, "grad_norm": 0.5095900259882238, "learning_rate": 2.029150806224581e-06, "loss": 0.5777, "step": 6261 }, { "epoch": 0.8, "grad_norm": 0.5020377885707554, "learning_rate": 2.0266529967134997e-06, "loss": 0.5795, "step": 6262 }, { "epoch": 0.8, "grad_norm": 0.46957219916914195, "learning_rate": 2.0241565521425556e-06, "loss": 0.5648, "step": 6263 }, { "epoch": 0.8, "grad_norm": 0.49184270433193644, "learning_rate": 2.0216614729391093e-06, "loss": 0.571, "step": 6264 }, { "epoch": 0.8, "grad_norm": 0.5390431302835663, "learning_rate": 2.0191677595302885e-06, "loss": 0.5697, "step": 6265 }, { "epoch": 0.8, "grad_norm": 0.5002380809581555, "learning_rate": 2.016675412342982e-06, "loss": 0.5882, "step": 6266 }, { "epoch": 0.8, "grad_norm": 0.5651189015120169, "learning_rate": 2.0141844318038585e-06, "loss": 0.6116, "step": 6267 }, { "epoch": 0.8, "grad_norm": 0.4937570257725636, "learning_rate": 2.0116948183393394e-06, "loss": 0.606, "step": 6268 }, { "epoch": 0.8, "grad_norm": 0.5910871661013197, "learning_rate": 2.0092065723756137e-06, "loss": 0.6012, "step": 6269 }, { "epoch": 0.8, "grad_norm": 0.5482810784491416, "learning_rate": 2.0067196943386402e-06, "loss": 0.5808, "step": 6270 }, { "epoch": 0.8, "grad_norm": 0.5710865541581136, "learning_rate": 2.0042341846541425e-06, "loss": 0.6242, "step": 6271 }, { "epoch": 0.8, "grad_norm": 0.4946159988277677, "learning_rate": 2.00175004374761e-06, "loss": 0.6096, "step": 6272 }, { "epoch": 0.8, "grad_norm": 0.5502135248389758, "learning_rate": 1.9992672720442972e-06, "loss": 0.6083, "step": 6273 }, { "epoch": 0.8, "grad_norm": 0.4708745523983748, "learning_rate": 1.996785869969222e-06, "loss": 0.5722, "step": 6274 }, { "epoch": 0.8, "grad_norm": 0.470604074430496, "learning_rate": 1.994305837947176e-06, "loss": 0.6021, "step": 6275 }, { "epoch": 0.8, "grad_norm": 0.5223435970910866, "learning_rate": 1.9918271764027075e-06, "loss": 0.6137, "step": 6276 }, { "epoch": 0.8, "grad_norm": 0.4562865810497055, "learning_rate": 1.9893498857601347e-06, "loss": 0.5768, "step": 6277 }, { "epoch": 0.8, "grad_norm": 0.5706573991463123, "learning_rate": 1.9868739664435367e-06, "loss": 0.6157, "step": 6278 }, { "epoch": 0.8, "grad_norm": 0.520094966537527, "learning_rate": 1.984399418876761e-06, "loss": 0.5873, "step": 6279 }, { "epoch": 0.8, "grad_norm": 0.49017571089259027, "learning_rate": 1.981926243483422e-06, "loss": 0.5888, "step": 6280 }, { "epoch": 0.8, "grad_norm": 0.4974667289614776, "learning_rate": 1.979454440686893e-06, "loss": 0.5861, "step": 6281 }, { "epoch": 0.8, "grad_norm": 0.5145817605429862, "learning_rate": 1.9769840109103234e-06, "loss": 0.6291, "step": 6282 }, { "epoch": 0.8, "grad_norm": 0.5422329547954733, "learning_rate": 1.9745149545766174e-06, "loss": 0.5958, "step": 6283 }, { "epoch": 0.8, "grad_norm": 0.5134101892644127, "learning_rate": 1.9720472721084485e-06, "loss": 0.5998, "step": 6284 }, { "epoch": 0.8, "grad_norm": 0.50834046707294, "learning_rate": 1.9695809639282514e-06, "loss": 0.5873, "step": 6285 }, { "epoch": 0.8, "grad_norm": 0.4994783667607933, "learning_rate": 1.967116030458234e-06, "loss": 0.5587, "step": 6286 }, { "epoch": 0.8, "grad_norm": 0.5416152154856652, "learning_rate": 1.9646524721203553e-06, "loss": 0.6361, "step": 6287 }, { "epoch": 0.8, "grad_norm": 0.7090057674696476, "learning_rate": 1.9621902893363465e-06, "loss": 0.621, "step": 6288 }, { "epoch": 0.8, "grad_norm": 0.5806538818548936, "learning_rate": 1.95972948252771e-06, "loss": 0.6138, "step": 6289 }, { "epoch": 0.8, "grad_norm": 0.4964899264686341, "learning_rate": 1.957270052115704e-06, "loss": 0.5846, "step": 6290 }, { "epoch": 0.8, "grad_norm": 0.5424501509340258, "learning_rate": 1.954811998521351e-06, "loss": 0.6356, "step": 6291 }, { "epoch": 0.8, "grad_norm": 0.5279958823533825, "learning_rate": 1.95235532216544e-06, "loss": 0.6167, "step": 6292 }, { "epoch": 0.8, "grad_norm": 0.4886856986039038, "learning_rate": 1.949900023468526e-06, "loss": 0.5631, "step": 6293 }, { "epoch": 0.8, "grad_norm": 0.5574634086145536, "learning_rate": 1.947446102850925e-06, "loss": 0.6461, "step": 6294 }, { "epoch": 0.8, "grad_norm": 0.5174535890025969, "learning_rate": 1.94499356073272e-06, "loss": 0.5908, "step": 6295 }, { "epoch": 0.8, "grad_norm": 0.4665586081048601, "learning_rate": 1.942542397533753e-06, "loss": 0.5562, "step": 6296 }, { "epoch": 0.8, "grad_norm": 0.5178321433331102, "learning_rate": 1.9400926136736365e-06, "loss": 0.6017, "step": 6297 }, { "epoch": 0.8, "grad_norm": 0.5000754200656665, "learning_rate": 1.937644209571743e-06, "loss": 0.5862, "step": 6298 }, { "epoch": 0.8, "grad_norm": 0.49741485503852695, "learning_rate": 1.9351971856472085e-06, "loss": 0.611, "step": 6299 }, { "epoch": 0.8, "grad_norm": 0.48658015219162426, "learning_rate": 1.932751542318936e-06, "loss": 0.5822, "step": 6300 }, { "epoch": 0.8, "grad_norm": 0.4899949995500138, "learning_rate": 1.9303072800055867e-06, "loss": 0.6099, "step": 6301 }, { "epoch": 0.81, "grad_norm": 0.5344449714526739, "learning_rate": 1.927864399125592e-06, "loss": 0.6018, "step": 6302 }, { "epoch": 0.81, "grad_norm": 0.46127946731677805, "learning_rate": 1.92542290009714e-06, "loss": 0.5719, "step": 6303 }, { "epoch": 0.81, "grad_norm": 0.5214138249636765, "learning_rate": 1.9229827833381885e-06, "loss": 0.5802, "step": 6304 }, { "epoch": 0.81, "grad_norm": 0.5305468619507138, "learning_rate": 1.920544049266455e-06, "loss": 0.6063, "step": 6305 }, { "epoch": 0.81, "grad_norm": 0.4690589534810556, "learning_rate": 1.91810669829942e-06, "loss": 0.5798, "step": 6306 }, { "epoch": 0.81, "grad_norm": 0.4519674422106028, "learning_rate": 1.915670730854331e-06, "loss": 0.5666, "step": 6307 }, { "epoch": 0.81, "grad_norm": 0.5547912030300087, "learning_rate": 1.913236147348193e-06, "loss": 0.6009, "step": 6308 }, { "epoch": 0.81, "grad_norm": 0.4764757071832361, "learning_rate": 1.9108029481977764e-06, "loss": 0.5948, "step": 6309 }, { "epoch": 0.81, "grad_norm": 0.4654576611022047, "learning_rate": 1.908371133819622e-06, "loss": 0.5573, "step": 6310 }, { "epoch": 0.81, "grad_norm": 0.43723924183909835, "learning_rate": 1.9059407046300205e-06, "loss": 0.5846, "step": 6311 }, { "epoch": 0.81, "grad_norm": 0.49632164192436634, "learning_rate": 1.9035116610450322e-06, "loss": 0.5921, "step": 6312 }, { "epoch": 0.81, "grad_norm": 0.488956105095822, "learning_rate": 1.9010840034804822e-06, "loss": 0.5844, "step": 6313 }, { "epoch": 0.81, "grad_norm": 0.5219200055972888, "learning_rate": 1.898657732351956e-06, "loss": 0.5644, "step": 6314 }, { "epoch": 0.81, "grad_norm": 0.5281172043504561, "learning_rate": 1.8962328480747992e-06, "loss": 0.5952, "step": 6315 }, { "epoch": 0.81, "grad_norm": 0.4542256881794851, "learning_rate": 1.893809351064123e-06, "loss": 0.5764, "step": 6316 }, { "epoch": 0.81, "grad_norm": 0.6185314435719075, "learning_rate": 1.8913872417348034e-06, "loss": 0.5923, "step": 6317 }, { "epoch": 0.81, "grad_norm": 0.445228201339628, "learning_rate": 1.888966520501474e-06, "loss": 0.566, "step": 6318 }, { "epoch": 0.81, "grad_norm": 0.5271242902976703, "learning_rate": 1.8865471877785368e-06, "loss": 0.6261, "step": 6319 }, { "epoch": 0.81, "grad_norm": 0.5315415383576924, "learning_rate": 1.8841292439801463e-06, "loss": 0.6159, "step": 6320 }, { "epoch": 0.81, "grad_norm": 0.4574990076955498, "learning_rate": 1.8817126895202276e-06, "loss": 0.5446, "step": 6321 }, { "epoch": 0.81, "grad_norm": 0.4586049335863515, "learning_rate": 1.8792975248124645e-06, "loss": 0.5889, "step": 6322 }, { "epoch": 0.81, "grad_norm": 0.45356228062021, "learning_rate": 1.8768837502703019e-06, "loss": 0.5843, "step": 6323 }, { "epoch": 0.81, "grad_norm": 0.560218525347291, "learning_rate": 1.8744713663069547e-06, "loss": 0.6444, "step": 6324 }, { "epoch": 0.81, "grad_norm": 0.5086033614084854, "learning_rate": 1.8720603733353916e-06, "loss": 0.6071, "step": 6325 }, { "epoch": 0.81, "grad_norm": 0.5179913663510409, "learning_rate": 1.869650771768342e-06, "loss": 0.621, "step": 6326 }, { "epoch": 0.81, "grad_norm": 0.5174108039975681, "learning_rate": 1.8672425620183043e-06, "loss": 0.6133, "step": 6327 }, { "epoch": 0.81, "grad_norm": 0.5180533651857137, "learning_rate": 1.864835744497535e-06, "loss": 0.6014, "step": 6328 }, { "epoch": 0.81, "grad_norm": 0.575595978380595, "learning_rate": 1.8624303196180483e-06, "loss": 0.6072, "step": 6329 }, { "epoch": 0.81, "grad_norm": 0.48489372245904483, "learning_rate": 1.8600262877916242e-06, "loss": 0.59, "step": 6330 }, { "epoch": 0.81, "grad_norm": 0.45555067054393084, "learning_rate": 1.857623649429804e-06, "loss": 0.5661, "step": 6331 }, { "epoch": 0.81, "grad_norm": 0.43573710301170754, "learning_rate": 1.8552224049438926e-06, "loss": 0.5661, "step": 6332 }, { "epoch": 0.81, "grad_norm": 0.44964572722689655, "learning_rate": 1.852822554744953e-06, "loss": 0.576, "step": 6333 }, { "epoch": 0.81, "grad_norm": 0.47098497778861326, "learning_rate": 1.8504240992438093e-06, "loss": 0.5893, "step": 6334 }, { "epoch": 0.81, "grad_norm": 0.5152625453834571, "learning_rate": 1.8480270388510502e-06, "loss": 0.5979, "step": 6335 }, { "epoch": 0.81, "grad_norm": 0.5303689735204326, "learning_rate": 1.8456313739770205e-06, "loss": 0.5887, "step": 6336 }, { "epoch": 0.81, "grad_norm": 0.4801953900115399, "learning_rate": 1.8432371050318332e-06, "loss": 0.578, "step": 6337 }, { "epoch": 0.81, "grad_norm": 0.48888125671307925, "learning_rate": 1.8408442324253494e-06, "loss": 0.5771, "step": 6338 }, { "epoch": 0.81, "grad_norm": 0.5702536228369648, "learning_rate": 1.8384527565672084e-06, "loss": 0.6128, "step": 6339 }, { "epoch": 0.81, "grad_norm": 0.5331740660141673, "learning_rate": 1.8360626778667977e-06, "loss": 0.6266, "step": 6340 }, { "epoch": 0.81, "grad_norm": 0.4790420779390431, "learning_rate": 1.8336739967332718e-06, "loss": 0.5859, "step": 6341 }, { "epoch": 0.81, "grad_norm": 0.49635283129009267, "learning_rate": 1.831286713575543e-06, "loss": 0.5921, "step": 6342 }, { "epoch": 0.81, "grad_norm": 0.8907019209855241, "learning_rate": 1.8289008288022846e-06, "loss": 0.6059, "step": 6343 }, { "epoch": 0.81, "grad_norm": 0.5113029431982026, "learning_rate": 1.8265163428219324e-06, "loss": 0.6029, "step": 6344 }, { "epoch": 0.81, "grad_norm": 0.4900624632786747, "learning_rate": 1.8241332560426794e-06, "loss": 0.5985, "step": 6345 }, { "epoch": 0.81, "grad_norm": 0.5256282321939, "learning_rate": 1.821751568872484e-06, "loss": 0.601, "step": 6346 }, { "epoch": 0.81, "grad_norm": 0.551347342848806, "learning_rate": 1.819371281719059e-06, "loss": 0.5597, "step": 6347 }, { "epoch": 0.81, "grad_norm": 0.4649784726289934, "learning_rate": 1.8169923949898826e-06, "loss": 0.5747, "step": 6348 }, { "epoch": 0.81, "grad_norm": 0.4862065064619502, "learning_rate": 1.8146149090921906e-06, "loss": 0.5617, "step": 6349 }, { "epoch": 0.81, "grad_norm": 0.5030940716858705, "learning_rate": 1.8122388244329804e-06, "loss": 0.6099, "step": 6350 }, { "epoch": 0.81, "grad_norm": 0.4716079019107642, "learning_rate": 1.8098641414190076e-06, "loss": 0.5758, "step": 6351 }, { "epoch": 0.81, "grad_norm": 0.5534185296896605, "learning_rate": 1.8074908604567908e-06, "loss": 0.6225, "step": 6352 }, { "epoch": 0.81, "grad_norm": 0.537425442091057, "learning_rate": 1.805118981952605e-06, "loss": 0.6364, "step": 6353 }, { "epoch": 0.81, "grad_norm": 0.5324440955420846, "learning_rate": 1.8027485063124883e-06, "loss": 0.6084, "step": 6354 }, { "epoch": 0.81, "grad_norm": 0.5644950445540864, "learning_rate": 1.8003794339422365e-06, "loss": 0.5834, "step": 6355 }, { "epoch": 0.81, "grad_norm": 0.5399132403519418, "learning_rate": 1.798011765247406e-06, "loss": 0.581, "step": 6356 }, { "epoch": 0.81, "grad_norm": 0.5803227877546289, "learning_rate": 1.795645500633314e-06, "loss": 0.6061, "step": 6357 }, { "epoch": 0.81, "grad_norm": 0.5042001041424394, "learning_rate": 1.793280640505035e-06, "loss": 0.6112, "step": 6358 }, { "epoch": 0.81, "grad_norm": 0.5315084824756621, "learning_rate": 1.7909171852674035e-06, "loss": 0.586, "step": 6359 }, { "epoch": 0.81, "grad_norm": 0.49743958933583926, "learning_rate": 1.7885551353250175e-06, "loss": 0.6063, "step": 6360 }, { "epoch": 0.81, "grad_norm": 0.5133123155677468, "learning_rate": 1.7861944910822316e-06, "loss": 0.5879, "step": 6361 }, { "epoch": 0.81, "grad_norm": 0.42514752791557425, "learning_rate": 1.7838352529431546e-06, "loss": 0.5737, "step": 6362 }, { "epoch": 0.81, "grad_norm": 0.515776239116715, "learning_rate": 1.7814774213116625e-06, "loss": 0.6203, "step": 6363 }, { "epoch": 0.81, "grad_norm": 0.5474579005620848, "learning_rate": 1.779120996591388e-06, "loss": 0.5876, "step": 6364 }, { "epoch": 0.81, "grad_norm": 0.5281094179860485, "learning_rate": 1.7767659791857194e-06, "loss": 0.6127, "step": 6365 }, { "epoch": 0.81, "grad_norm": 0.5525609294573774, "learning_rate": 1.7744123694978078e-06, "loss": 0.6013, "step": 6366 }, { "epoch": 0.81, "grad_norm": 0.49993661575894727, "learning_rate": 1.772060167930566e-06, "loss": 0.5781, "step": 6367 }, { "epoch": 0.81, "grad_norm": 0.48658768930616536, "learning_rate": 1.7697093748866613e-06, "loss": 0.6038, "step": 6368 }, { "epoch": 0.81, "grad_norm": 0.4654676732601686, "learning_rate": 1.767359990768518e-06, "loss": 0.5852, "step": 6369 }, { "epoch": 0.81, "grad_norm": 0.5456175246731966, "learning_rate": 1.7650120159783268e-06, "loss": 0.5543, "step": 6370 }, { "epoch": 0.81, "grad_norm": 0.5465105242270768, "learning_rate": 1.7626654509180273e-06, "loss": 0.5858, "step": 6371 }, { "epoch": 0.81, "grad_norm": 0.4674903044839876, "learning_rate": 1.7603202959893262e-06, "loss": 0.593, "step": 6372 }, { "epoch": 0.81, "grad_norm": 0.4850887909476392, "learning_rate": 1.7579765515936808e-06, "loss": 0.5555, "step": 6373 }, { "epoch": 0.81, "grad_norm": 0.4681695550802759, "learning_rate": 1.7556342181323182e-06, "loss": 0.575, "step": 6374 }, { "epoch": 0.81, "grad_norm": 0.4837100518468587, "learning_rate": 1.7532932960062143e-06, "loss": 0.5842, "step": 6375 }, { "epoch": 0.81, "grad_norm": 0.4834217088070526, "learning_rate": 1.7509537856161074e-06, "loss": 0.588, "step": 6376 }, { "epoch": 0.81, "grad_norm": 0.5130435561898162, "learning_rate": 1.7486156873624915e-06, "loss": 0.6077, "step": 6377 }, { "epoch": 0.81, "grad_norm": 0.5069333072944748, "learning_rate": 1.7462790016456255e-06, "loss": 0.6084, "step": 6378 }, { "epoch": 0.81, "grad_norm": 0.4890603252898973, "learning_rate": 1.7439437288655147e-06, "loss": 0.5964, "step": 6379 }, { "epoch": 0.82, "grad_norm": 0.5687457451904557, "learning_rate": 1.7416098694219307e-06, "loss": 0.6119, "step": 6380 }, { "epoch": 0.82, "grad_norm": 0.5878002667146401, "learning_rate": 1.7392774237144062e-06, "loss": 0.6595, "step": 6381 }, { "epoch": 0.82, "grad_norm": 0.587232468593751, "learning_rate": 1.736946392142226e-06, "loss": 0.6389, "step": 6382 }, { "epoch": 0.82, "grad_norm": 0.49882231578464475, "learning_rate": 1.7346167751044318e-06, "loss": 0.5506, "step": 6383 }, { "epoch": 0.82, "grad_norm": 0.5156613139664511, "learning_rate": 1.7322885729998285e-06, "loss": 0.6059, "step": 6384 }, { "epoch": 0.82, "grad_norm": 0.5069067340640243, "learning_rate": 1.7299617862269759e-06, "loss": 0.5856, "step": 6385 }, { "epoch": 0.82, "grad_norm": 0.5220470981278498, "learning_rate": 1.7276364151841895e-06, "loss": 0.6018, "step": 6386 }, { "epoch": 0.82, "grad_norm": 0.48840942010973876, "learning_rate": 1.725312460269546e-06, "loss": 0.5969, "step": 6387 }, { "epoch": 0.82, "grad_norm": 0.5473622018803034, "learning_rate": 1.7229899218808777e-06, "loss": 0.6634, "step": 6388 }, { "epoch": 0.82, "grad_norm": 0.46670849041401513, "learning_rate": 1.7206688004157757e-06, "loss": 0.5632, "step": 6389 }, { "epoch": 0.82, "grad_norm": 0.4730443408097303, "learning_rate": 1.7183490962715877e-06, "loss": 0.5824, "step": 6390 }, { "epoch": 0.82, "grad_norm": 0.5379036342606351, "learning_rate": 1.716030809845418e-06, "loss": 0.6247, "step": 6391 }, { "epoch": 0.82, "grad_norm": 0.44875852701472985, "learning_rate": 1.7137139415341308e-06, "loss": 0.5566, "step": 6392 }, { "epoch": 0.82, "grad_norm": 0.518633064159133, "learning_rate": 1.711398491734344e-06, "loss": 0.599, "step": 6393 }, { "epoch": 0.82, "grad_norm": 0.5458304550813554, "learning_rate": 1.709084460842435e-06, "loss": 0.643, "step": 6394 }, { "epoch": 0.82, "grad_norm": 0.5250919220679919, "learning_rate": 1.7067718492545382e-06, "loss": 0.5506, "step": 6395 }, { "epoch": 0.82, "grad_norm": 0.5200431818169409, "learning_rate": 1.7044606573665434e-06, "loss": 0.6008, "step": 6396 }, { "epoch": 0.82, "grad_norm": 0.479598124178908, "learning_rate": 1.7021508855740999e-06, "loss": 0.5762, "step": 6397 }, { "epoch": 0.82, "grad_norm": 0.4507995022768543, "learning_rate": 1.6998425342726133e-06, "loss": 0.5676, "step": 6398 }, { "epoch": 0.82, "grad_norm": 0.5458290595617438, "learning_rate": 1.6975356038572432e-06, "loss": 0.6448, "step": 6399 }, { "epoch": 0.82, "grad_norm": 0.5719575107639738, "learning_rate": 1.6952300947229083e-06, "loss": 0.6177, "step": 6400 }, { "epoch": 0.82, "grad_norm": 0.5135863637431751, "learning_rate": 1.692926007264286e-06, "loss": 0.6122, "step": 6401 }, { "epoch": 0.82, "grad_norm": 0.5039555199493501, "learning_rate": 1.6906233418758033e-06, "loss": 0.5829, "step": 6402 }, { "epoch": 0.82, "grad_norm": 0.4804918586982831, "learning_rate": 1.6883220989516569e-06, "loss": 0.5823, "step": 6403 }, { "epoch": 0.82, "grad_norm": 0.5278337938738501, "learning_rate": 1.6860222788857838e-06, "loss": 0.6141, "step": 6404 }, { "epoch": 0.82, "grad_norm": 0.5449277461003764, "learning_rate": 1.6837238820718882e-06, "loss": 0.6103, "step": 6405 }, { "epoch": 0.82, "grad_norm": 0.48926933120665267, "learning_rate": 1.6814269089034274e-06, "loss": 0.5903, "step": 6406 }, { "epoch": 0.82, "grad_norm": 0.5412612162604278, "learning_rate": 1.6791313597736148e-06, "loss": 0.5996, "step": 6407 }, { "epoch": 0.82, "grad_norm": 0.5500520191504313, "learning_rate": 1.6768372350754213e-06, "loss": 0.5939, "step": 6408 }, { "epoch": 0.82, "grad_norm": 0.524451860328799, "learning_rate": 1.674544535201571e-06, "loss": 0.6384, "step": 6409 }, { "epoch": 0.82, "grad_norm": 0.4631939370356525, "learning_rate": 1.6722532605445506e-06, "loss": 0.5789, "step": 6410 }, { "epoch": 0.82, "grad_norm": 0.42531520262979433, "learning_rate": 1.669963411496599e-06, "loss": 0.5615, "step": 6411 }, { "epoch": 0.82, "grad_norm": 0.48704305332299935, "learning_rate": 1.6676749884497044e-06, "loss": 0.6156, "step": 6412 }, { "epoch": 0.82, "grad_norm": 0.5545237278730071, "learning_rate": 1.6653879917956205e-06, "loss": 0.6103, "step": 6413 }, { "epoch": 0.82, "grad_norm": 0.4437795821783426, "learning_rate": 1.663102421925854e-06, "loss": 0.5838, "step": 6414 }, { "epoch": 0.82, "grad_norm": 0.4604802398530657, "learning_rate": 1.6608182792316652e-06, "loss": 0.6, "step": 6415 }, { "epoch": 0.82, "grad_norm": 0.5766252881852597, "learning_rate": 1.6585355641040702e-06, "loss": 0.611, "step": 6416 }, { "epoch": 0.82, "grad_norm": 0.47876551994952804, "learning_rate": 1.6562542769338452e-06, "loss": 0.5736, "step": 6417 }, { "epoch": 0.82, "grad_norm": 0.4741126458791376, "learning_rate": 1.6539744181115192e-06, "loss": 0.5221, "step": 6418 }, { "epoch": 0.82, "grad_norm": 0.5288141831993134, "learning_rate": 1.6516959880273743e-06, "loss": 0.6002, "step": 6419 }, { "epoch": 0.82, "grad_norm": 0.49307160553114004, "learning_rate": 1.6494189870714527e-06, "loss": 0.5865, "step": 6420 }, { "epoch": 0.82, "grad_norm": 0.4375486454679086, "learning_rate": 1.6471434156335452e-06, "loss": 0.593, "step": 6421 }, { "epoch": 0.82, "grad_norm": 0.4997764076909119, "learning_rate": 1.6448692741032036e-06, "loss": 0.5676, "step": 6422 }, { "epoch": 0.82, "grad_norm": 0.45452905161076257, "learning_rate": 1.6425965628697315e-06, "loss": 0.5555, "step": 6423 }, { "epoch": 0.82, "grad_norm": 0.54268944172341, "learning_rate": 1.6403252823221948e-06, "loss": 0.5827, "step": 6424 }, { "epoch": 0.82, "grad_norm": 0.5804632794445292, "learning_rate": 1.6380554328494058e-06, "loss": 0.6362, "step": 6425 }, { "epoch": 0.82, "grad_norm": 0.492145912758271, "learning_rate": 1.6357870148399346e-06, "loss": 0.5937, "step": 6426 }, { "epoch": 0.82, "grad_norm": 0.5271439799856104, "learning_rate": 1.6335200286821073e-06, "loss": 0.5866, "step": 6427 }, { "epoch": 0.82, "grad_norm": 0.5942833512193767, "learning_rate": 1.6312544747640057e-06, "loss": 0.6396, "step": 6428 }, { "epoch": 0.82, "grad_norm": 0.5392918018910136, "learning_rate": 1.6289903534734663e-06, "loss": 0.594, "step": 6429 }, { "epoch": 0.82, "grad_norm": 0.5111397775536877, "learning_rate": 1.6267276651980711e-06, "loss": 0.5913, "step": 6430 }, { "epoch": 0.82, "grad_norm": 0.512610834621503, "learning_rate": 1.624466410325175e-06, "loss": 0.6199, "step": 6431 }, { "epoch": 0.82, "grad_norm": 0.5229410641458289, "learning_rate": 1.6222065892418715e-06, "loss": 0.5809, "step": 6432 }, { "epoch": 0.82, "grad_norm": 0.4898476832381264, "learning_rate": 1.6199482023350167e-06, "loss": 0.5694, "step": 6433 }, { "epoch": 0.82, "grad_norm": 0.5375929023164965, "learning_rate": 1.6176912499912178e-06, "loss": 0.615, "step": 6434 }, { "epoch": 0.82, "grad_norm": 0.5212857521490651, "learning_rate": 1.6154357325968383e-06, "loss": 0.5731, "step": 6435 }, { "epoch": 0.82, "grad_norm": 0.5805389386944089, "learning_rate": 1.6131816505379949e-06, "loss": 0.6054, "step": 6436 }, { "epoch": 0.82, "grad_norm": 0.47056860103100545, "learning_rate": 1.61092900420056e-06, "loss": 0.5894, "step": 6437 }, { "epoch": 0.82, "grad_norm": 0.4945016537416457, "learning_rate": 1.608677793970158e-06, "loss": 0.5898, "step": 6438 }, { "epoch": 0.82, "grad_norm": 0.5418434123633046, "learning_rate": 1.6064280202321702e-06, "loss": 0.5762, "step": 6439 }, { "epoch": 0.82, "grad_norm": 0.5228089614076233, "learning_rate": 1.6041796833717294e-06, "loss": 0.6193, "step": 6440 }, { "epoch": 0.82, "grad_norm": 0.5159305456654175, "learning_rate": 1.6019327837737242e-06, "loss": 0.6087, "step": 6441 }, { "epoch": 0.82, "grad_norm": 0.533709138840943, "learning_rate": 1.5996873218227949e-06, "loss": 0.644, "step": 6442 }, { "epoch": 0.82, "grad_norm": 0.5449137785901297, "learning_rate": 1.5974432979033405e-06, "loss": 0.652, "step": 6443 }, { "epoch": 0.82, "grad_norm": 0.45607986233485087, "learning_rate": 1.5952007123995071e-06, "loss": 0.5931, "step": 6444 }, { "epoch": 0.82, "grad_norm": 0.5190762074415333, "learning_rate": 1.5929595656952012e-06, "loss": 0.6221, "step": 6445 }, { "epoch": 0.82, "grad_norm": 0.479779839035318, "learning_rate": 1.5907198581740768e-06, "loss": 0.5604, "step": 6446 }, { "epoch": 0.82, "grad_norm": 0.5369090143046422, "learning_rate": 1.5884815902195472e-06, "loss": 0.6398, "step": 6447 }, { "epoch": 0.82, "grad_norm": 0.5549386345947022, "learning_rate": 1.586244762214776e-06, "loss": 0.6049, "step": 6448 }, { "epoch": 0.82, "grad_norm": 0.4631063831551289, "learning_rate": 1.584009374542681e-06, "loss": 0.5616, "step": 6449 }, { "epoch": 0.82, "grad_norm": 0.5204509528204698, "learning_rate": 1.581775427585932e-06, "loss": 0.572, "step": 6450 }, { "epoch": 0.82, "grad_norm": 0.5980640933101454, "learning_rate": 1.5795429217269553e-06, "loss": 0.6377, "step": 6451 }, { "epoch": 0.82, "grad_norm": 0.46874426802364705, "learning_rate": 1.5773118573479262e-06, "loss": 0.5739, "step": 6452 }, { "epoch": 0.82, "grad_norm": 0.4967110697036346, "learning_rate": 1.5750822348307815e-06, "loss": 0.5795, "step": 6453 }, { "epoch": 0.82, "grad_norm": 0.49998741445959344, "learning_rate": 1.5728540545572003e-06, "loss": 0.6239, "step": 6454 }, { "epoch": 0.82, "grad_norm": 0.47323838451102207, "learning_rate": 1.5706273169086205e-06, "loss": 0.5931, "step": 6455 }, { "epoch": 0.82, "grad_norm": 0.4827417508587897, "learning_rate": 1.5684020222662343e-06, "loss": 0.6053, "step": 6456 }, { "epoch": 0.82, "grad_norm": 0.4910789674397882, "learning_rate": 1.5661781710109846e-06, "loss": 0.6045, "step": 6457 }, { "epoch": 0.82, "grad_norm": 0.5116038246226587, "learning_rate": 1.5639557635235669e-06, "loss": 0.5946, "step": 6458 }, { "epoch": 0.83, "grad_norm": 0.507251808160613, "learning_rate": 1.561734800184428e-06, "loss": 0.6071, "step": 6459 }, { "epoch": 0.83, "grad_norm": 0.5520881842260438, "learning_rate": 1.5595152813737756e-06, "loss": 0.6201, "step": 6460 }, { "epoch": 0.83, "grad_norm": 0.542293175346529, "learning_rate": 1.557297207471562e-06, "loss": 0.6132, "step": 6461 }, { "epoch": 0.83, "grad_norm": 0.4356626618094731, "learning_rate": 1.5550805788574964e-06, "loss": 0.5586, "step": 6462 }, { "epoch": 0.83, "grad_norm": 0.4696747882413109, "learning_rate": 1.552865395911033e-06, "loss": 0.5823, "step": 6463 }, { "epoch": 0.83, "grad_norm": 0.48922140253884905, "learning_rate": 1.5506516590113885e-06, "loss": 0.5848, "step": 6464 }, { "epoch": 0.83, "grad_norm": 0.5631249300000284, "learning_rate": 1.5484393685375254e-06, "loss": 0.6409, "step": 6465 }, { "epoch": 0.83, "grad_norm": 0.4569483103958929, "learning_rate": 1.5462285248681607e-06, "loss": 0.5815, "step": 6466 }, { "epoch": 0.83, "grad_norm": 0.45371980199207534, "learning_rate": 1.5440191283817684e-06, "loss": 0.5743, "step": 6467 }, { "epoch": 0.83, "grad_norm": 0.4791459607685973, "learning_rate": 1.541811179456567e-06, "loss": 0.5781, "step": 6468 }, { "epoch": 0.83, "grad_norm": 0.4914656540278162, "learning_rate": 1.539604678470531e-06, "loss": 0.5793, "step": 6469 }, { "epoch": 0.83, "grad_norm": 0.5087008135472111, "learning_rate": 1.5373996258013869e-06, "loss": 0.5894, "step": 6470 }, { "epoch": 0.83, "grad_norm": 0.5095222723633646, "learning_rate": 1.5351960218266148e-06, "loss": 0.5956, "step": 6471 }, { "epoch": 0.83, "grad_norm": 0.5152510918373804, "learning_rate": 1.5329938669234401e-06, "loss": 0.5913, "step": 6472 }, { "epoch": 0.83, "grad_norm": 0.5348508949837623, "learning_rate": 1.5307931614688453e-06, "loss": 0.5716, "step": 6473 }, { "epoch": 0.83, "grad_norm": 0.49229014539291743, "learning_rate": 1.5285939058395694e-06, "loss": 0.6104, "step": 6474 }, { "epoch": 0.83, "grad_norm": 0.4964183831661262, "learning_rate": 1.5263961004120952e-06, "loss": 0.5864, "step": 6475 }, { "epoch": 0.83, "grad_norm": 0.5355916115249679, "learning_rate": 1.524199745562661e-06, "loss": 0.5976, "step": 6476 }, { "epoch": 0.83, "grad_norm": 0.5320000520687596, "learning_rate": 1.5220048416672562e-06, "loss": 0.6062, "step": 6477 }, { "epoch": 0.83, "grad_norm": 0.5673354158724759, "learning_rate": 1.5198113891016208e-06, "loss": 0.6204, "step": 6478 }, { "epoch": 0.83, "grad_norm": 0.46960548761096693, "learning_rate": 1.5176193882412472e-06, "loss": 0.5993, "step": 6479 }, { "epoch": 0.83, "grad_norm": 0.49332085593636366, "learning_rate": 1.5154288394613804e-06, "loss": 0.5696, "step": 6480 }, { "epoch": 0.83, "grad_norm": 0.4586882249644384, "learning_rate": 1.513239743137015e-06, "loss": 0.5909, "step": 6481 }, { "epoch": 0.83, "grad_norm": 0.4854426760843164, "learning_rate": 1.5110520996428978e-06, "loss": 0.5959, "step": 6482 }, { "epoch": 0.83, "grad_norm": 0.5406789039968933, "learning_rate": 1.5088659093535274e-06, "loss": 0.6273, "step": 6483 }, { "epoch": 0.83, "grad_norm": 0.4842609699132563, "learning_rate": 1.506681172643152e-06, "loss": 0.5684, "step": 6484 }, { "epoch": 0.83, "grad_norm": 0.5163869754232363, "learning_rate": 1.504497889885772e-06, "loss": 0.5832, "step": 6485 }, { "epoch": 0.83, "grad_norm": 0.4926209627687408, "learning_rate": 1.502316061455139e-06, "loss": 0.6216, "step": 6486 }, { "epoch": 0.83, "grad_norm": 0.4438349517727881, "learning_rate": 1.5001356877247563e-06, "loss": 0.5829, "step": 6487 }, { "epoch": 0.83, "grad_norm": 0.46854052581508826, "learning_rate": 1.4979567690678775e-06, "loss": 0.5736, "step": 6488 }, { "epoch": 0.83, "grad_norm": 0.4487722689684917, "learning_rate": 1.4957793058575054e-06, "loss": 0.5743, "step": 6489 }, { "epoch": 0.83, "grad_norm": 0.8571862479583551, "learning_rate": 1.4936032984663974e-06, "loss": 0.592, "step": 6490 }, { "epoch": 0.83, "grad_norm": 0.5248715581269149, "learning_rate": 1.4914287472670574e-06, "loss": 0.5852, "step": 6491 }, { "epoch": 0.83, "grad_norm": 0.4978037387639517, "learning_rate": 1.4892556526317436e-06, "loss": 0.5634, "step": 6492 }, { "epoch": 0.83, "grad_norm": 0.47287975960791295, "learning_rate": 1.4870840149324617e-06, "loss": 0.5975, "step": 6493 }, { "epoch": 0.83, "grad_norm": 0.5282708739531567, "learning_rate": 1.4849138345409696e-06, "loss": 0.6206, "step": 6494 }, { "epoch": 0.83, "grad_norm": 0.5146750973570069, "learning_rate": 1.4827451118287828e-06, "loss": 0.5915, "step": 6495 }, { "epoch": 0.83, "grad_norm": 0.4965165660025011, "learning_rate": 1.4805778471671506e-06, "loss": 0.6523, "step": 6496 }, { "epoch": 0.83, "grad_norm": 0.4818948552573344, "learning_rate": 1.4784120409270875e-06, "loss": 0.5967, "step": 6497 }, { "epoch": 0.83, "grad_norm": 0.5087649731627202, "learning_rate": 1.4762476934793512e-06, "loss": 0.6029, "step": 6498 }, { "epoch": 0.83, "grad_norm": 0.5544753263440403, "learning_rate": 1.4740848051944534e-06, "loss": 0.5933, "step": 6499 }, { "epoch": 0.83, "grad_norm": 0.547673077618066, "learning_rate": 1.4719233764426532e-06, "loss": 0.6187, "step": 6500 }, { "epoch": 0.83, "grad_norm": 0.4769922241305385, "learning_rate": 1.4697634075939582e-06, "loss": 0.5936, "step": 6501 }, { "epoch": 0.83, "grad_norm": 0.4933271349720804, "learning_rate": 1.4676048990181346e-06, "loss": 0.5823, "step": 6502 }, { "epoch": 0.83, "grad_norm": 0.5421892211829342, "learning_rate": 1.4654478510846904e-06, "loss": 0.647, "step": 6503 }, { "epoch": 0.83, "grad_norm": 0.47691609358417036, "learning_rate": 1.4632922641628877e-06, "loss": 0.5993, "step": 6504 }, { "epoch": 0.83, "grad_norm": 0.5106884774743327, "learning_rate": 1.4611381386217327e-06, "loss": 0.5962, "step": 6505 }, { "epoch": 0.83, "grad_norm": 0.5056630827764785, "learning_rate": 1.458985474829988e-06, "loss": 0.5847, "step": 6506 }, { "epoch": 0.83, "grad_norm": 0.507286410832936, "learning_rate": 1.4568342731561625e-06, "loss": 0.5745, "step": 6507 }, { "epoch": 0.83, "grad_norm": 0.46561657565655756, "learning_rate": 1.4546845339685146e-06, "loss": 0.5726, "step": 6508 }, { "epoch": 0.83, "grad_norm": 0.4917764694655876, "learning_rate": 1.4525362576350576e-06, "loss": 0.5985, "step": 6509 }, { "epoch": 0.83, "grad_norm": 0.47695567952352597, "learning_rate": 1.4503894445235478e-06, "loss": 0.5868, "step": 6510 }, { "epoch": 0.83, "grad_norm": 0.5368607205322022, "learning_rate": 1.448244095001493e-06, "loss": 0.6179, "step": 6511 }, { "epoch": 0.83, "grad_norm": 0.5273085144483796, "learning_rate": 1.4461002094361532e-06, "loss": 0.6288, "step": 6512 }, { "epoch": 0.83, "grad_norm": 0.4698591548290457, "learning_rate": 1.4439577881945356e-06, "loss": 0.5872, "step": 6513 }, { "epoch": 0.83, "grad_norm": 0.5051321037687522, "learning_rate": 1.4418168316433923e-06, "loss": 0.5925, "step": 6514 }, { "epoch": 0.83, "grad_norm": 0.42053760745532814, "learning_rate": 1.4396773401492325e-06, "loss": 0.5599, "step": 6515 }, { "epoch": 0.83, "grad_norm": 0.4075277751401332, "learning_rate": 1.437539314078309e-06, "loss": 0.5805, "step": 6516 }, { "epoch": 0.83, "grad_norm": 0.5308397031661228, "learning_rate": 1.4354027537966285e-06, "loss": 0.5856, "step": 6517 }, { "epoch": 0.83, "grad_norm": 0.4532513199336814, "learning_rate": 1.4332676596699436e-06, "loss": 0.5667, "step": 6518 }, { "epoch": 0.83, "grad_norm": 0.5358995102553361, "learning_rate": 1.4311340320637567e-06, "loss": 0.5928, "step": 6519 }, { "epoch": 0.83, "grad_norm": 0.4793686919404408, "learning_rate": 1.4290018713433174e-06, "loss": 0.5855, "step": 6520 }, { "epoch": 0.83, "grad_norm": 0.5396920265870794, "learning_rate": 1.4268711778736266e-06, "loss": 0.6159, "step": 6521 }, { "epoch": 0.83, "grad_norm": 0.5625744474536152, "learning_rate": 1.424741952019436e-06, "loss": 0.6479, "step": 6522 }, { "epoch": 0.83, "grad_norm": 0.4966224554907417, "learning_rate": 1.4226141941452354e-06, "loss": 0.6023, "step": 6523 }, { "epoch": 0.83, "grad_norm": 0.5111086512059783, "learning_rate": 1.4204879046152775e-06, "loss": 0.6052, "step": 6524 }, { "epoch": 0.83, "grad_norm": 0.4885951756921298, "learning_rate": 1.4183630837935569e-06, "loss": 0.5696, "step": 6525 }, { "epoch": 0.83, "grad_norm": 0.6981818519774139, "learning_rate": 1.416239732043815e-06, "loss": 0.6557, "step": 6526 }, { "epoch": 0.83, "grad_norm": 0.6318335324715201, "learning_rate": 1.4141178497295448e-06, "loss": 0.6227, "step": 6527 }, { "epoch": 0.83, "grad_norm": 0.5104889693781977, "learning_rate": 1.4119974372139867e-06, "loss": 0.5842, "step": 6528 }, { "epoch": 0.83, "grad_norm": 0.4988128939373805, "learning_rate": 1.4098784948601308e-06, "loss": 0.5671, "step": 6529 }, { "epoch": 0.83, "grad_norm": 0.5573976282010672, "learning_rate": 1.4077610230307116e-06, "loss": 0.6109, "step": 6530 }, { "epoch": 0.83, "grad_norm": 0.4839004605646364, "learning_rate": 1.4056450220882168e-06, "loss": 0.5882, "step": 6531 }, { "epoch": 0.83, "grad_norm": 0.5218093379656122, "learning_rate": 1.403530492394879e-06, "loss": 0.6176, "step": 6532 }, { "epoch": 0.83, "grad_norm": 0.49547780200673913, "learning_rate": 1.4014174343126796e-06, "loss": 0.5838, "step": 6533 }, { "epoch": 0.83, "grad_norm": 0.47885966756191756, "learning_rate": 1.3993058482033505e-06, "loss": 0.5869, "step": 6534 }, { "epoch": 0.83, "grad_norm": 0.43346342267068777, "learning_rate": 1.3971957344283671e-06, "loss": 0.5551, "step": 6535 }, { "epoch": 0.83, "grad_norm": 0.4742970415862851, "learning_rate": 1.395087093348957e-06, "loss": 0.5995, "step": 6536 }, { "epoch": 0.84, "grad_norm": 0.4526157042055194, "learning_rate": 1.3929799253260922e-06, "loss": 0.5716, "step": 6537 }, { "epoch": 0.84, "grad_norm": 0.579198669725281, "learning_rate": 1.390874230720497e-06, "loss": 0.5808, "step": 6538 }, { "epoch": 0.84, "grad_norm": 0.4290233852076531, "learning_rate": 1.3887700098926381e-06, "loss": 0.5702, "step": 6539 }, { "epoch": 0.84, "grad_norm": 0.44501762425995545, "learning_rate": 1.3866672632027335e-06, "loss": 0.5635, "step": 6540 }, { "epoch": 0.84, "grad_norm": 0.4815644802874901, "learning_rate": 1.3845659910107478e-06, "loss": 0.6013, "step": 6541 }, { "epoch": 0.84, "grad_norm": 0.47965583758936536, "learning_rate": 1.382466193676394e-06, "loss": 0.5613, "step": 6542 }, { "epoch": 0.84, "grad_norm": 0.5285887841580278, "learning_rate": 1.3803678715591317e-06, "loss": 0.5802, "step": 6543 }, { "epoch": 0.84, "grad_norm": 0.5157762259373121, "learning_rate": 1.378271025018164e-06, "loss": 0.5938, "step": 6544 }, { "epoch": 0.84, "grad_norm": 0.5237550779053144, "learning_rate": 1.3761756544124528e-06, "loss": 0.6102, "step": 6545 }, { "epoch": 0.84, "grad_norm": 0.5334068215655334, "learning_rate": 1.3740817601006985e-06, "loss": 0.6288, "step": 6546 }, { "epoch": 0.84, "grad_norm": 0.5315683719303478, "learning_rate": 1.3719893424413455e-06, "loss": 0.6426, "step": 6547 }, { "epoch": 0.84, "grad_norm": 0.51100511113432, "learning_rate": 1.3698984017925942e-06, "loss": 0.6152, "step": 6548 }, { "epoch": 0.84, "grad_norm": 0.5314854006704584, "learning_rate": 1.3678089385123873e-06, "loss": 0.5759, "step": 6549 }, { "epoch": 0.84, "grad_norm": 0.5023912917591025, "learning_rate": 1.3657209529584147e-06, "loss": 0.5854, "step": 6550 }, { "epoch": 0.84, "grad_norm": 0.47063004731666386, "learning_rate": 1.3636344454881134e-06, "loss": 0.584, "step": 6551 }, { "epoch": 0.84, "grad_norm": 0.5639329120429903, "learning_rate": 1.3615494164586706e-06, "loss": 0.6405, "step": 6552 }, { "epoch": 0.84, "grad_norm": 0.4987202973287521, "learning_rate": 1.3594658662270187e-06, "loss": 0.6158, "step": 6553 }, { "epoch": 0.84, "grad_norm": 0.4975225371959418, "learning_rate": 1.3573837951498337e-06, "loss": 0.5659, "step": 6554 }, { "epoch": 0.84, "grad_norm": 0.4483123858238924, "learning_rate": 1.3553032035835445e-06, "loss": 0.581, "step": 6555 }, { "epoch": 0.84, "grad_norm": 0.4863494286673974, "learning_rate": 1.3532240918843175e-06, "loss": 0.5838, "step": 6556 }, { "epoch": 0.84, "grad_norm": 0.4835150969963449, "learning_rate": 1.3511464604080736e-06, "loss": 0.6031, "step": 6557 }, { "epoch": 0.84, "grad_norm": 0.4956219322270733, "learning_rate": 1.3490703095104774e-06, "loss": 0.5677, "step": 6558 }, { "epoch": 0.84, "grad_norm": 0.5099955062868783, "learning_rate": 1.346995639546943e-06, "loss": 0.6132, "step": 6559 }, { "epoch": 0.84, "grad_norm": 0.5374823507641591, "learning_rate": 1.3449224508726287e-06, "loss": 0.6135, "step": 6560 }, { "epoch": 0.84, "grad_norm": 0.509775089214009, "learning_rate": 1.3428507438424376e-06, "loss": 0.5775, "step": 6561 }, { "epoch": 0.84, "grad_norm": 0.45472205357122736, "learning_rate": 1.3407805188110224e-06, "loss": 0.5694, "step": 6562 }, { "epoch": 0.84, "grad_norm": 0.5064378326449156, "learning_rate": 1.3387117761327806e-06, "loss": 0.5982, "step": 6563 }, { "epoch": 0.84, "grad_norm": 0.5594729489619872, "learning_rate": 1.3366445161618525e-06, "loss": 0.6372, "step": 6564 }, { "epoch": 0.84, "grad_norm": 0.5593587100960136, "learning_rate": 1.334578739252127e-06, "loss": 0.5787, "step": 6565 }, { "epoch": 0.84, "grad_norm": 0.5062378420642643, "learning_rate": 1.332514445757246e-06, "loss": 0.5675, "step": 6566 }, { "epoch": 0.84, "grad_norm": 0.5361636569592243, "learning_rate": 1.3304516360305886e-06, "loss": 0.5925, "step": 6567 }, { "epoch": 0.84, "grad_norm": 0.5309920760392062, "learning_rate": 1.3283903104252815e-06, "loss": 0.5792, "step": 6568 }, { "epoch": 0.84, "grad_norm": 0.566540809153864, "learning_rate": 1.3263304692942014e-06, "loss": 0.6404, "step": 6569 }, { "epoch": 0.84, "grad_norm": 0.4771953776080576, "learning_rate": 1.324272112989965e-06, "loss": 0.5595, "step": 6570 }, { "epoch": 0.84, "grad_norm": 0.4700353836518012, "learning_rate": 1.3222152418649404e-06, "loss": 0.5714, "step": 6571 }, { "epoch": 0.84, "grad_norm": 0.5015635707682015, "learning_rate": 1.320159856271237e-06, "loss": 0.5724, "step": 6572 }, { "epoch": 0.84, "grad_norm": 0.44899378227144454, "learning_rate": 1.3181059565607124e-06, "loss": 0.5951, "step": 6573 }, { "epoch": 0.84, "grad_norm": 0.4478198420994385, "learning_rate": 1.3160535430849709e-06, "loss": 0.5548, "step": 6574 }, { "epoch": 0.84, "grad_norm": 0.48965974255675276, "learning_rate": 1.3140026161953578e-06, "loss": 0.6179, "step": 6575 }, { "epoch": 0.84, "grad_norm": 0.5007599094288707, "learning_rate": 1.3119531762429694e-06, "loss": 0.609, "step": 6576 }, { "epoch": 0.84, "grad_norm": 0.5431856097829345, "learning_rate": 1.3099052235786435e-06, "loss": 0.6178, "step": 6577 }, { "epoch": 0.84, "grad_norm": 0.5067741827941711, "learning_rate": 1.3078587585529657e-06, "loss": 0.5986, "step": 6578 }, { "epoch": 0.84, "grad_norm": 0.5232175443580542, "learning_rate": 1.3058137815162653e-06, "loss": 0.5917, "step": 6579 }, { "epoch": 0.84, "grad_norm": 0.544052518922055, "learning_rate": 1.303770292818618e-06, "loss": 0.614, "step": 6580 }, { "epoch": 0.84, "grad_norm": 0.5066331767503862, "learning_rate": 1.301728292809844e-06, "loss": 0.595, "step": 6581 }, { "epoch": 0.84, "grad_norm": 0.59104033231099, "learning_rate": 1.2996877818395082e-06, "loss": 0.6398, "step": 6582 }, { "epoch": 0.84, "grad_norm": 0.49176729395776925, "learning_rate": 1.2976487602569222e-06, "loss": 0.5944, "step": 6583 }, { "epoch": 0.84, "grad_norm": 0.6033080744762721, "learning_rate": 1.2956112284111411e-06, "loss": 0.6197, "step": 6584 }, { "epoch": 0.84, "grad_norm": 0.5109609105298285, "learning_rate": 1.293575186650966e-06, "loss": 0.5662, "step": 6585 }, { "epoch": 0.84, "grad_norm": 0.4813128779870783, "learning_rate": 1.2915406353249428e-06, "loss": 0.6015, "step": 6586 }, { "epoch": 0.84, "grad_norm": 0.5252778864948373, "learning_rate": 1.289507574781359e-06, "loss": 0.6102, "step": 6587 }, { "epoch": 0.84, "grad_norm": 0.5147116177202489, "learning_rate": 1.2874760053682567e-06, "loss": 0.6097, "step": 6588 }, { "epoch": 0.84, "grad_norm": 0.4919617701352733, "learning_rate": 1.2854459274334097e-06, "loss": 0.6087, "step": 6589 }, { "epoch": 0.84, "grad_norm": 0.525438985597571, "learning_rate": 1.2834173413243446e-06, "loss": 0.6118, "step": 6590 }, { "epoch": 0.84, "grad_norm": 0.4905490144580917, "learning_rate": 1.2813902473883312e-06, "loss": 0.5591, "step": 6591 }, { "epoch": 0.84, "grad_norm": 0.522117000778162, "learning_rate": 1.279364645972383e-06, "loss": 0.5971, "step": 6592 }, { "epoch": 0.84, "grad_norm": 0.5072568720566463, "learning_rate": 1.2773405374232573e-06, "loss": 0.5948, "step": 6593 }, { "epoch": 0.84, "grad_norm": 0.4991125211007835, "learning_rate": 1.2753179220874556e-06, "loss": 0.617, "step": 6594 }, { "epoch": 0.84, "grad_norm": 0.5120673353139897, "learning_rate": 1.27329680031123e-06, "loss": 0.607, "step": 6595 }, { "epoch": 0.84, "grad_norm": 0.5288704189767929, "learning_rate": 1.2712771724405725e-06, "loss": 0.6178, "step": 6596 }, { "epoch": 0.84, "grad_norm": 0.45698339445028185, "learning_rate": 1.2692590388212123e-06, "loss": 0.586, "step": 6597 }, { "epoch": 0.84, "grad_norm": 0.4487601627630079, "learning_rate": 1.2672423997986327e-06, "loss": 0.5639, "step": 6598 }, { "epoch": 0.84, "grad_norm": 0.47098988923110713, "learning_rate": 1.2652272557180578e-06, "loss": 0.6, "step": 6599 }, { "epoch": 0.84, "grad_norm": 0.4458791272122251, "learning_rate": 1.263213606924456e-06, "loss": 0.5758, "step": 6600 }, { "epoch": 0.84, "grad_norm": 0.5427914422468408, "learning_rate": 1.2612014537625371e-06, "loss": 0.6497, "step": 6601 }, { "epoch": 0.84, "grad_norm": 0.5221384035475022, "learning_rate": 1.2591907965767613e-06, "loss": 0.5842, "step": 6602 }, { "epoch": 0.84, "grad_norm": 0.47862948510028896, "learning_rate": 1.2571816357113277e-06, "loss": 0.5671, "step": 6603 }, { "epoch": 0.84, "grad_norm": 0.5583029721567535, "learning_rate": 1.2551739715101785e-06, "loss": 0.6438, "step": 6604 }, { "epoch": 0.84, "grad_norm": 0.5458660516357058, "learning_rate": 1.253167804317006e-06, "loss": 0.5953, "step": 6605 }, { "epoch": 0.84, "grad_norm": 0.5443472906247574, "learning_rate": 1.251163134475234e-06, "loss": 0.6298, "step": 6606 }, { "epoch": 0.84, "grad_norm": 0.6588212224946968, "learning_rate": 1.2491599623280426e-06, "loss": 0.6549, "step": 6607 }, { "epoch": 0.84, "grad_norm": 0.5111932488487062, "learning_rate": 1.2471582882183464e-06, "loss": 0.6124, "step": 6608 }, { "epoch": 0.84, "grad_norm": 0.4753004691285278, "learning_rate": 1.2451581124888145e-06, "loss": 0.5774, "step": 6609 }, { "epoch": 0.84, "grad_norm": 0.4981080807253359, "learning_rate": 1.2431594354818478e-06, "loss": 0.596, "step": 6610 }, { "epoch": 0.84, "grad_norm": 0.5098116928086956, "learning_rate": 1.2411622575395965e-06, "loss": 0.6369, "step": 6611 }, { "epoch": 0.84, "grad_norm": 0.5275739168668644, "learning_rate": 1.2391665790039543e-06, "loss": 0.5998, "step": 6612 }, { "epoch": 0.84, "grad_norm": 0.5488011306753184, "learning_rate": 1.2371724002165563e-06, "loss": 0.6316, "step": 6613 }, { "epoch": 0.84, "grad_norm": 0.524640759985224, "learning_rate": 1.2351797215187832e-06, "loss": 0.5963, "step": 6614 }, { "epoch": 0.85, "grad_norm": 0.5035626210153779, "learning_rate": 1.233188543251751e-06, "loss": 0.5904, "step": 6615 }, { "epoch": 0.85, "grad_norm": 0.5586078349149477, "learning_rate": 1.2311988657563335e-06, "loss": 0.6355, "step": 6616 }, { "epoch": 0.85, "grad_norm": 0.4647058023824334, "learning_rate": 1.2292106893731349e-06, "loss": 0.6008, "step": 6617 }, { "epoch": 0.85, "grad_norm": 0.47825372029513535, "learning_rate": 1.2272240144425086e-06, "loss": 0.5867, "step": 6618 }, { "epoch": 0.85, "grad_norm": 0.530483731153226, "learning_rate": 1.2252388413045468e-06, "loss": 0.6228, "step": 6619 }, { "epoch": 0.85, "grad_norm": 0.45122478363355994, "learning_rate": 1.2232551702990903e-06, "loss": 0.5818, "step": 6620 }, { "epoch": 0.85, "grad_norm": 0.4852485547108883, "learning_rate": 1.221273001765717e-06, "loss": 0.5871, "step": 6621 }, { "epoch": 0.85, "grad_norm": 0.806656005681559, "learning_rate": 1.2192923360437504e-06, "loss": 0.5854, "step": 6622 }, { "epoch": 0.85, "grad_norm": 0.5006608602038627, "learning_rate": 1.2173131734722577e-06, "loss": 0.5731, "step": 6623 }, { "epoch": 0.85, "grad_norm": 0.5440530689840223, "learning_rate": 1.215335514390047e-06, "loss": 0.6186, "step": 6624 }, { "epoch": 0.85, "grad_norm": 0.5218444367366372, "learning_rate": 1.2133593591356686e-06, "loss": 0.5804, "step": 6625 }, { "epoch": 0.85, "grad_norm": 0.5375421382342368, "learning_rate": 1.2113847080474162e-06, "loss": 0.6318, "step": 6626 }, { "epoch": 0.85, "grad_norm": 0.47367712048493354, "learning_rate": 1.2094115614633274e-06, "loss": 0.6019, "step": 6627 }, { "epoch": 0.85, "grad_norm": 0.4724477049933573, "learning_rate": 1.2074399197211805e-06, "loss": 0.5972, "step": 6628 }, { "epoch": 0.85, "grad_norm": 0.6870138545401604, "learning_rate": 1.205469783158496e-06, "loss": 0.6065, "step": 6629 }, { "epoch": 0.85, "grad_norm": 0.519422519334149, "learning_rate": 1.2035011521125372e-06, "loss": 0.6701, "step": 6630 }, { "epoch": 0.85, "grad_norm": 0.4809492056462927, "learning_rate": 1.2015340269203113e-06, "loss": 0.5846, "step": 6631 }, { "epoch": 0.85, "grad_norm": 0.45226399163648934, "learning_rate": 1.1995684079185642e-06, "loss": 0.5836, "step": 6632 }, { "epoch": 0.85, "grad_norm": 0.4783994150410245, "learning_rate": 1.1976042954437883e-06, "loss": 0.582, "step": 6633 }, { "epoch": 0.85, "grad_norm": 0.48525330959390567, "learning_rate": 1.1956416898322132e-06, "loss": 0.5747, "step": 6634 }, { "epoch": 0.85, "grad_norm": 0.520362555638353, "learning_rate": 1.1936805914198157e-06, "loss": 0.5709, "step": 6635 }, { "epoch": 0.85, "grad_norm": 0.5113175104138189, "learning_rate": 1.1917210005423096e-06, "loss": 0.5888, "step": 6636 }, { "epoch": 0.85, "grad_norm": 0.5577925932580543, "learning_rate": 1.1897629175351532e-06, "loss": 0.6337, "step": 6637 }, { "epoch": 0.85, "grad_norm": 0.5520343764785495, "learning_rate": 1.1878063427335507e-06, "loss": 0.6005, "step": 6638 }, { "epoch": 0.85, "grad_norm": 0.5312679669153748, "learning_rate": 1.1858512764724394e-06, "loss": 0.5906, "step": 6639 }, { "epoch": 0.85, "grad_norm": 0.5376651639075392, "learning_rate": 1.1838977190865042e-06, "loss": 0.597, "step": 6640 }, { "epoch": 0.85, "grad_norm": 0.5033649981016814, "learning_rate": 1.1819456709101707e-06, "loss": 0.6107, "step": 6641 }, { "epoch": 0.85, "grad_norm": 0.5204633354605022, "learning_rate": 1.179995132277605e-06, "loss": 0.6049, "step": 6642 }, { "epoch": 0.85, "grad_norm": 0.5145242463443738, "learning_rate": 1.1780461035227164e-06, "loss": 0.5853, "step": 6643 }, { "epoch": 0.85, "grad_norm": 0.5201521994774311, "learning_rate": 1.176098584979153e-06, "loss": 0.5869, "step": 6644 }, { "epoch": 0.85, "grad_norm": 0.5263988145964802, "learning_rate": 1.1741525769803098e-06, "loss": 0.6304, "step": 6645 }, { "epoch": 0.85, "grad_norm": 0.500963346469832, "learning_rate": 1.1722080798593183e-06, "loss": 0.6041, "step": 6646 }, { "epoch": 0.85, "grad_norm": 0.4661103583368812, "learning_rate": 1.1702650939490546e-06, "loss": 0.5898, "step": 6647 }, { "epoch": 0.85, "grad_norm": 0.4906828645503267, "learning_rate": 1.16832361958213e-06, "loss": 0.5841, "step": 6648 }, { "epoch": 0.85, "grad_norm": 0.46832097953731955, "learning_rate": 1.1663836570909037e-06, "loss": 0.5991, "step": 6649 }, { "epoch": 0.85, "grad_norm": 0.5826501954845862, "learning_rate": 1.164445206807473e-06, "loss": 0.6421, "step": 6650 }, { "epoch": 0.85, "grad_norm": 0.5002305770955321, "learning_rate": 1.1625082690636758e-06, "loss": 0.571, "step": 6651 }, { "epoch": 0.85, "grad_norm": 0.5414578414783207, "learning_rate": 1.1605728441910956e-06, "loss": 0.6254, "step": 6652 }, { "epoch": 0.85, "grad_norm": 0.5372318187884413, "learning_rate": 1.1586389325210534e-06, "loss": 0.6091, "step": 6653 }, { "epoch": 0.85, "grad_norm": 0.494886615720855, "learning_rate": 1.1567065343846085e-06, "loss": 0.5829, "step": 6654 }, { "epoch": 0.85, "grad_norm": 0.5347323464300613, "learning_rate": 1.1547756501125661e-06, "loss": 0.6268, "step": 6655 }, { "epoch": 0.85, "grad_norm": 0.51751490714716, "learning_rate": 1.1528462800354722e-06, "loss": 0.6276, "step": 6656 }, { "epoch": 0.85, "grad_norm": 0.5428952527070647, "learning_rate": 1.1509184244836058e-06, "loss": 0.5953, "step": 6657 }, { "epoch": 0.85, "grad_norm": 0.4902612095142405, "learning_rate": 1.148992083786995e-06, "loss": 0.5786, "step": 6658 }, { "epoch": 0.85, "grad_norm": 0.4389392889583733, "learning_rate": 1.1470672582754073e-06, "loss": 0.5685, "step": 6659 }, { "epoch": 0.85, "grad_norm": 0.5736481466130228, "learning_rate": 1.1451439482783478e-06, "loss": 0.5996, "step": 6660 }, { "epoch": 0.85, "grad_norm": 0.4659347365636649, "learning_rate": 1.1432221541250655e-06, "loss": 0.5764, "step": 6661 }, { "epoch": 0.85, "grad_norm": 0.5262037266812775, "learning_rate": 1.141301876144547e-06, "loss": 0.5691, "step": 6662 }, { "epoch": 0.85, "grad_norm": 0.48541313959343874, "learning_rate": 1.139383114665521e-06, "loss": 0.5932, "step": 6663 }, { "epoch": 0.85, "grad_norm": 0.4871725783342103, "learning_rate": 1.1374658700164553e-06, "loss": 0.5727, "step": 6664 }, { "epoch": 0.85, "grad_norm": 0.5274297206776244, "learning_rate": 1.13555014252556e-06, "loss": 0.5888, "step": 6665 }, { "epoch": 0.85, "grad_norm": 0.45207205422698477, "learning_rate": 1.1336359325207846e-06, "loss": 0.5864, "step": 6666 }, { "epoch": 0.85, "grad_norm": 0.47283067309329835, "learning_rate": 1.1317232403298172e-06, "loss": 0.5766, "step": 6667 }, { "epoch": 0.85, "grad_norm": 0.433466545074158, "learning_rate": 1.1298120662800883e-06, "loss": 0.5992, "step": 6668 }, { "epoch": 0.85, "grad_norm": 0.468064700992031, "learning_rate": 1.127902410698767e-06, "loss": 0.5601, "step": 6669 }, { "epoch": 0.85, "grad_norm": 0.5438471316512375, "learning_rate": 1.1259942739127637e-06, "loss": 0.6093, "step": 6670 }, { "epoch": 0.85, "grad_norm": 0.5073233880103257, "learning_rate": 1.124087656248729e-06, "loss": 0.5674, "step": 6671 }, { "epoch": 0.85, "grad_norm": 0.5387482464663587, "learning_rate": 1.1221825580330515e-06, "loss": 0.6041, "step": 6672 }, { "epoch": 0.85, "grad_norm": 0.5219357122619448, "learning_rate": 1.120278979591861e-06, "loss": 0.5977, "step": 6673 }, { "epoch": 0.85, "grad_norm": 0.567237602762615, "learning_rate": 1.1183769212510275e-06, "loss": 0.5941, "step": 6674 }, { "epoch": 0.85, "grad_norm": 0.5645991778505809, "learning_rate": 1.1164763833361592e-06, "loss": 0.6141, "step": 6675 }, { "epoch": 0.85, "grad_norm": 0.5043903139739808, "learning_rate": 1.1145773661726066e-06, "loss": 0.6125, "step": 6676 }, { "epoch": 0.85, "grad_norm": 0.5265039694482372, "learning_rate": 1.112679870085458e-06, "loss": 0.6263, "step": 6677 }, { "epoch": 0.85, "grad_norm": 0.5419343874343282, "learning_rate": 1.110783895399542e-06, "loss": 0.5791, "step": 6678 }, { "epoch": 0.85, "grad_norm": 0.4754149858245489, "learning_rate": 1.1088894424394226e-06, "loss": 0.5957, "step": 6679 }, { "epoch": 0.85, "grad_norm": 0.516110229852473, "learning_rate": 1.1069965115294145e-06, "loss": 0.6032, "step": 6680 }, { "epoch": 0.85, "grad_norm": 0.5040373826205682, "learning_rate": 1.105105102993559e-06, "loss": 0.589, "step": 6681 }, { "epoch": 0.85, "grad_norm": 0.5119668930853822, "learning_rate": 1.1032152171556433e-06, "loss": 0.604, "step": 6682 }, { "epoch": 0.85, "grad_norm": 0.5668431724882194, "learning_rate": 1.101326854339193e-06, "loss": 0.6064, "step": 6683 }, { "epoch": 0.85, "grad_norm": 0.49894608368635196, "learning_rate": 1.0994400148674722e-06, "loss": 0.5806, "step": 6684 }, { "epoch": 0.85, "grad_norm": 0.5556128715367843, "learning_rate": 1.0975546990634866e-06, "loss": 0.6381, "step": 6685 }, { "epoch": 0.85, "grad_norm": 0.4915430194704006, "learning_rate": 1.0956709072499772e-06, "loss": 0.594, "step": 6686 }, { "epoch": 0.85, "grad_norm": 0.46711470992643994, "learning_rate": 1.0937886397494278e-06, "loss": 0.5667, "step": 6687 }, { "epoch": 0.85, "grad_norm": 0.6465547547396763, "learning_rate": 1.0919078968840613e-06, "loss": 0.6913, "step": 6688 }, { "epoch": 0.85, "grad_norm": 0.4830778980545446, "learning_rate": 1.0900286789758374e-06, "loss": 0.5757, "step": 6689 }, { "epoch": 0.85, "grad_norm": 0.48390006108438555, "learning_rate": 1.0881509863464522e-06, "loss": 0.5664, "step": 6690 }, { "epoch": 0.85, "grad_norm": 0.5013150256469743, "learning_rate": 1.0862748193173467e-06, "loss": 0.6048, "step": 6691 }, { "epoch": 0.85, "grad_norm": 0.527601354428541, "learning_rate": 1.0844001782096959e-06, "loss": 0.5822, "step": 6692 }, { "epoch": 0.86, "grad_norm": 0.48499807025095226, "learning_rate": 1.082527063344415e-06, "loss": 0.5852, "step": 6693 }, { "epoch": 0.86, "grad_norm": 0.43221061362336377, "learning_rate": 1.0806554750421628e-06, "loss": 0.5566, "step": 6694 }, { "epoch": 0.86, "grad_norm": 0.5014958530173254, "learning_rate": 1.0787854136233289e-06, "loss": 0.6164, "step": 6695 }, { "epoch": 0.86, "grad_norm": 0.5375652503898063, "learning_rate": 1.0769168794080475e-06, "loss": 0.6325, "step": 6696 }, { "epoch": 0.86, "grad_norm": 0.5047513370247884, "learning_rate": 1.0750498727161862e-06, "loss": 0.5819, "step": 6697 }, { "epoch": 0.86, "grad_norm": 0.4970427669145711, "learning_rate": 1.0731843938673581e-06, "loss": 0.5761, "step": 6698 }, { "epoch": 0.86, "grad_norm": 0.45162482268156906, "learning_rate": 1.0713204431809054e-06, "loss": 0.575, "step": 6699 }, { "epoch": 0.86, "grad_norm": 0.4518783153358594, "learning_rate": 1.0694580209759165e-06, "loss": 0.5771, "step": 6700 }, { "epoch": 0.86, "grad_norm": 0.5246018862449406, "learning_rate": 1.0675971275712128e-06, "loss": 0.5967, "step": 6701 }, { "epoch": 0.86, "grad_norm": 0.5143724904943037, "learning_rate": 1.0657377632853604e-06, "loss": 0.6091, "step": 6702 }, { "epoch": 0.86, "grad_norm": 0.5003588090045413, "learning_rate": 1.0638799284366596e-06, "loss": 0.5932, "step": 6703 }, { "epoch": 0.86, "grad_norm": 0.5395232266442923, "learning_rate": 1.0620236233431468e-06, "loss": 0.6419, "step": 6704 }, { "epoch": 0.86, "grad_norm": 0.4849117130278779, "learning_rate": 1.0601688483226002e-06, "loss": 0.5839, "step": 6705 }, { "epoch": 0.86, "grad_norm": 0.49827996937203606, "learning_rate": 1.0583156036925346e-06, "loss": 0.6041, "step": 6706 }, { "epoch": 0.86, "grad_norm": 0.4317058788557176, "learning_rate": 1.056463889770205e-06, "loss": 0.5912, "step": 6707 }, { "epoch": 0.86, "grad_norm": 0.4841337077126378, "learning_rate": 1.054613706872596e-06, "loss": 0.5724, "step": 6708 }, { "epoch": 0.86, "grad_norm": 0.4617344217320599, "learning_rate": 1.0527650553164425e-06, "loss": 0.5873, "step": 6709 }, { "epoch": 0.86, "grad_norm": 0.4677487588127383, "learning_rate": 1.0509179354182097e-06, "loss": 0.5793, "step": 6710 }, { "epoch": 0.86, "grad_norm": 0.5409674422730373, "learning_rate": 1.0490723474941023e-06, "loss": 0.6072, "step": 6711 }, { "epoch": 0.86, "grad_norm": 0.48053850614174176, "learning_rate": 1.0472282918600617e-06, "loss": 0.5806, "step": 6712 }, { "epoch": 0.86, "grad_norm": 0.5882329486791272, "learning_rate": 1.045385768831767e-06, "loss": 0.5957, "step": 6713 }, { "epoch": 0.86, "grad_norm": 0.5337750291248058, "learning_rate": 1.0435447787246378e-06, "loss": 0.594, "step": 6714 }, { "epoch": 0.86, "grad_norm": 0.42936437279214645, "learning_rate": 1.0417053218538287e-06, "loss": 0.5725, "step": 6715 }, { "epoch": 0.86, "grad_norm": 0.5131426709554482, "learning_rate": 1.0398673985342312e-06, "loss": 0.6046, "step": 6716 }, { "epoch": 0.86, "grad_norm": 0.5381701851860594, "learning_rate": 1.0380310090804757e-06, "loss": 0.6104, "step": 6717 }, { "epoch": 0.86, "grad_norm": 0.7451864951453789, "learning_rate": 1.0361961538069309e-06, "loss": 0.6101, "step": 6718 }, { "epoch": 0.86, "grad_norm": 0.49614640120007053, "learning_rate": 1.0343628330277011e-06, "loss": 0.6063, "step": 6719 }, { "epoch": 0.86, "grad_norm": 0.5056717861510469, "learning_rate": 1.0325310470566285e-06, "loss": 0.5666, "step": 6720 }, { "epoch": 0.86, "grad_norm": 0.5082201234209014, "learning_rate": 1.0307007962072923e-06, "loss": 0.5913, "step": 6721 }, { "epoch": 0.86, "grad_norm": 0.5315820502401194, "learning_rate": 1.0288720807930108e-06, "loss": 0.5814, "step": 6722 }, { "epoch": 0.86, "grad_norm": 0.5247787906848912, "learning_rate": 1.0270449011268358e-06, "loss": 0.6013, "step": 6723 }, { "epoch": 0.86, "grad_norm": 0.4813502889161255, "learning_rate": 1.025219257521558e-06, "loss": 0.6029, "step": 6724 }, { "epoch": 0.86, "grad_norm": 0.45667006347836414, "learning_rate": 1.0233951502897077e-06, "loss": 0.5788, "step": 6725 }, { "epoch": 0.86, "grad_norm": 0.4840893499665355, "learning_rate": 1.0215725797435482e-06, "loss": 0.591, "step": 6726 }, { "epoch": 0.86, "grad_norm": 0.48313094519554917, "learning_rate": 1.0197515461950824e-06, "loss": 0.6033, "step": 6727 }, { "epoch": 0.86, "grad_norm": 0.49269870630956114, "learning_rate": 1.0179320499560486e-06, "loss": 0.578, "step": 6728 }, { "epoch": 0.86, "grad_norm": 0.4786874378606339, "learning_rate": 1.0161140913379196e-06, "loss": 0.6201, "step": 6729 }, { "epoch": 0.86, "grad_norm": 0.5030772521849017, "learning_rate": 1.0142976706519136e-06, "loss": 0.5792, "step": 6730 }, { "epoch": 0.86, "grad_norm": 0.4588169344620808, "learning_rate": 1.0124827882089784e-06, "loss": 0.5863, "step": 6731 }, { "epoch": 0.86, "grad_norm": 0.5185771412310747, "learning_rate": 1.0106694443197963e-06, "loss": 0.6197, "step": 6732 }, { "epoch": 0.86, "grad_norm": 0.5280789473011401, "learning_rate": 1.0088576392947924e-06, "loss": 0.5851, "step": 6733 }, { "epoch": 0.86, "grad_norm": 0.4691035941798438, "learning_rate": 1.0070473734441244e-06, "loss": 0.5843, "step": 6734 }, { "epoch": 0.86, "grad_norm": 0.4840291203865761, "learning_rate": 1.0052386470776886e-06, "loss": 0.5774, "step": 6735 }, { "epoch": 0.86, "grad_norm": 0.4559147834114507, "learning_rate": 1.0034314605051143e-06, "loss": 0.564, "step": 6736 }, { "epoch": 0.86, "grad_norm": 0.5323654189904962, "learning_rate": 1.0016258140357748e-06, "loss": 0.6035, "step": 6737 }, { "epoch": 0.86, "grad_norm": 0.5888594412552277, "learning_rate": 9.99821707978773e-07, "loss": 0.6631, "step": 6738 }, { "epoch": 0.86, "grad_norm": 0.46696795079342923, "learning_rate": 9.980191426429486e-07, "loss": 0.5896, "step": 6739 }, { "epoch": 0.86, "grad_norm": 0.4975131712522518, "learning_rate": 9.962181183368823e-07, "loss": 0.5925, "step": 6740 }, { "epoch": 0.86, "grad_norm": 0.5457773020594031, "learning_rate": 9.944186353688823e-07, "loss": 0.6024, "step": 6741 }, { "epoch": 0.86, "grad_norm": 0.5402112396453795, "learning_rate": 9.926206940470018e-07, "loss": 0.5908, "step": 6742 }, { "epoch": 0.86, "grad_norm": 0.47718109717723595, "learning_rate": 9.908242946790226e-07, "loss": 0.6102, "step": 6743 }, { "epoch": 0.86, "grad_norm": 0.4571980914902644, "learning_rate": 9.890294375724718e-07, "loss": 0.5503, "step": 6744 }, { "epoch": 0.86, "grad_norm": 0.4897639113692208, "learning_rate": 9.87236123034605e-07, "loss": 0.579, "step": 6745 }, { "epoch": 0.86, "grad_norm": 0.4672019639141513, "learning_rate": 9.854443513724154e-07, "loss": 0.6047, "step": 6746 }, { "epoch": 0.86, "grad_norm": 0.4554893628098164, "learning_rate": 9.836541228926321e-07, "loss": 0.572, "step": 6747 }, { "epoch": 0.86, "grad_norm": 0.4913234150033016, "learning_rate": 9.818654379017245e-07, "loss": 0.5753, "step": 6748 }, { "epoch": 0.86, "grad_norm": 0.5237428962705606, "learning_rate": 9.800782967058876e-07, "loss": 0.5911, "step": 6749 }, { "epoch": 0.86, "grad_norm": 0.4690435455850048, "learning_rate": 9.782926996110598e-07, "loss": 0.5809, "step": 6750 }, { "epoch": 0.86, "grad_norm": 0.5343085944318129, "learning_rate": 9.765086469229158e-07, "loss": 0.6234, "step": 6751 }, { "epoch": 0.86, "grad_norm": 0.49456138857330284, "learning_rate": 9.74726138946863e-07, "loss": 0.5992, "step": 6752 }, { "epoch": 0.86, "grad_norm": 0.4634746711876447, "learning_rate": 9.729451759880448e-07, "loss": 0.5917, "step": 6753 }, { "epoch": 0.86, "grad_norm": 0.5000855397397721, "learning_rate": 9.711657583513412e-07, "loss": 0.5967, "step": 6754 }, { "epoch": 0.86, "grad_norm": 0.5151824929738137, "learning_rate": 9.693878863413664e-07, "loss": 0.607, "step": 6755 }, { "epoch": 0.86, "grad_norm": 0.49410098865084984, "learning_rate": 9.676115602624691e-07, "loss": 0.5884, "step": 6756 }, { "epoch": 0.86, "grad_norm": 0.5639052924290399, "learning_rate": 9.658367804187385e-07, "loss": 0.6426, "step": 6757 }, { "epoch": 0.86, "grad_norm": 0.5206786533310511, "learning_rate": 9.640635471139893e-07, "loss": 0.6334, "step": 6758 }, { "epoch": 0.86, "grad_norm": 0.4811145170596888, "learning_rate": 9.622918606517827e-07, "loss": 0.5761, "step": 6759 }, { "epoch": 0.86, "grad_norm": 0.5139426964479191, "learning_rate": 9.60521721335408e-07, "loss": 0.5946, "step": 6760 }, { "epoch": 0.86, "grad_norm": 0.47386326102303516, "learning_rate": 9.58753129467892e-07, "loss": 0.5896, "step": 6761 }, { "epoch": 0.86, "grad_norm": 0.5964981991733558, "learning_rate": 9.569860853519963e-07, "loss": 0.6184, "step": 6762 }, { "epoch": 0.86, "grad_norm": 0.5104543001728191, "learning_rate": 9.552205892902155e-07, "loss": 0.5922, "step": 6763 }, { "epoch": 0.86, "grad_norm": 0.48342527182370887, "learning_rate": 9.534566415847846e-07, "loss": 0.5708, "step": 6764 }, { "epoch": 0.86, "grad_norm": 0.5050490547892429, "learning_rate": 9.516942425376663e-07, "loss": 0.5603, "step": 6765 }, { "epoch": 0.86, "grad_norm": 0.4720186867075207, "learning_rate": 9.499333924505638e-07, "loss": 0.5952, "step": 6766 }, { "epoch": 0.86, "grad_norm": 0.5471454874405115, "learning_rate": 9.481740916249138e-07, "loss": 0.6217, "step": 6767 }, { "epoch": 0.86, "grad_norm": 0.48893552430257115, "learning_rate": 9.464163403618853e-07, "loss": 0.585, "step": 6768 }, { "epoch": 0.86, "grad_norm": 0.500786886994176, "learning_rate": 9.446601389623866e-07, "loss": 0.5986, "step": 6769 }, { "epoch": 0.86, "grad_norm": 0.45366110821861894, "learning_rate": 9.42905487727055e-07, "loss": 0.5558, "step": 6770 }, { "epoch": 0.86, "grad_norm": 0.4739312904405977, "learning_rate": 9.411523869562678e-07, "loss": 0.5828, "step": 6771 }, { "epoch": 0.87, "grad_norm": 0.47200723681644835, "learning_rate": 9.394008369501317e-07, "loss": 0.5868, "step": 6772 }, { "epoch": 0.87, "grad_norm": 0.4676586037531217, "learning_rate": 9.37650838008497e-07, "loss": 0.5659, "step": 6773 }, { "epoch": 0.87, "grad_norm": 0.4836638317323355, "learning_rate": 9.359023904309362e-07, "loss": 0.5869, "step": 6774 }, { "epoch": 0.87, "grad_norm": 0.5268996873883686, "learning_rate": 9.341554945167631e-07, "loss": 0.5919, "step": 6775 }, { "epoch": 0.87, "grad_norm": 0.5011907499008335, "learning_rate": 9.324101505650273e-07, "loss": 0.6083, "step": 6776 }, { "epoch": 0.87, "grad_norm": 0.5786149492920655, "learning_rate": 9.306663588745091e-07, "loss": 0.6057, "step": 6777 }, { "epoch": 0.87, "grad_norm": 0.5552378135846018, "learning_rate": 9.289241197437238e-07, "loss": 0.6344, "step": 6778 }, { "epoch": 0.87, "grad_norm": 0.5044190432533403, "learning_rate": 9.271834334709218e-07, "loss": 0.5862, "step": 6779 }, { "epoch": 0.87, "grad_norm": 0.4841959106779858, "learning_rate": 9.254443003540891e-07, "loss": 0.5997, "step": 6780 }, { "epoch": 0.87, "grad_norm": 0.551940280404377, "learning_rate": 9.237067206909456e-07, "loss": 0.6393, "step": 6781 }, { "epoch": 0.87, "grad_norm": 0.5018208146537875, "learning_rate": 9.219706947789386e-07, "loss": 0.573, "step": 6782 }, { "epoch": 0.87, "grad_norm": 0.46559720886493056, "learning_rate": 9.202362229152584e-07, "loss": 0.5467, "step": 6783 }, { "epoch": 0.87, "grad_norm": 0.4566162492898576, "learning_rate": 9.185033053968239e-07, "loss": 0.5552, "step": 6784 }, { "epoch": 0.87, "grad_norm": 0.4950161718491182, "learning_rate": 9.167719425202915e-07, "loss": 0.5905, "step": 6785 }, { "epoch": 0.87, "grad_norm": 0.5759543873392166, "learning_rate": 9.150421345820448e-07, "loss": 0.6303, "step": 6786 }, { "epoch": 0.87, "grad_norm": 0.4963212415604598, "learning_rate": 9.133138818782117e-07, "loss": 0.5909, "step": 6787 }, { "epoch": 0.87, "grad_norm": 0.5705807179455256, "learning_rate": 9.115871847046453e-07, "loss": 0.6428, "step": 6788 }, { "epoch": 0.87, "grad_norm": 0.522978658120548, "learning_rate": 9.098620433569361e-07, "loss": 0.6035, "step": 6789 }, { "epoch": 0.87, "grad_norm": 0.5077937383394581, "learning_rate": 9.081384581304076e-07, "loss": 0.612, "step": 6790 }, { "epoch": 0.87, "grad_norm": 0.49027344081111196, "learning_rate": 9.064164293201149e-07, "loss": 0.5714, "step": 6791 }, { "epoch": 0.87, "grad_norm": 0.586025272833102, "learning_rate": 9.04695957220848e-07, "loss": 0.5846, "step": 6792 }, { "epoch": 0.87, "grad_norm": 0.5919844957885856, "learning_rate": 9.029770421271323e-07, "loss": 0.6105, "step": 6793 }, { "epoch": 0.87, "grad_norm": 0.5234886220363254, "learning_rate": 9.012596843332255e-07, "loss": 0.5868, "step": 6794 }, { "epoch": 0.87, "grad_norm": 0.5253666645057787, "learning_rate": 8.995438841331172e-07, "loss": 0.5867, "step": 6795 }, { "epoch": 0.87, "grad_norm": 0.5807062164652921, "learning_rate": 8.978296418205335e-07, "loss": 0.617, "step": 6796 }, { "epoch": 0.87, "grad_norm": 0.5105729992391784, "learning_rate": 8.961169576889306e-07, "loss": 0.6101, "step": 6797 }, { "epoch": 0.87, "grad_norm": 0.4854119172535483, "learning_rate": 8.944058320314986e-07, "loss": 0.6125, "step": 6798 }, { "epoch": 0.87, "grad_norm": 0.5959193577176609, "learning_rate": 8.926962651411641e-07, "loss": 0.6257, "step": 6799 }, { "epoch": 0.87, "grad_norm": 0.49927704077444546, "learning_rate": 8.909882573105777e-07, "loss": 0.5618, "step": 6800 }, { "epoch": 0.87, "grad_norm": 0.49269506444943206, "learning_rate": 8.892818088321375e-07, "loss": 0.5745, "step": 6801 }, { "epoch": 0.87, "grad_norm": 0.5327413519738315, "learning_rate": 8.875769199979623e-07, "loss": 0.5865, "step": 6802 }, { "epoch": 0.87, "grad_norm": 0.48970631124610126, "learning_rate": 8.858735910999084e-07, "loss": 0.5902, "step": 6803 }, { "epoch": 0.87, "grad_norm": 0.5287592112206269, "learning_rate": 8.84171822429567e-07, "loss": 0.6156, "step": 6804 }, { "epoch": 0.87, "grad_norm": 0.45073848445032516, "learning_rate": 8.824716142782597e-07, "loss": 0.5637, "step": 6805 }, { "epoch": 0.87, "grad_norm": 0.5085520180844685, "learning_rate": 8.807729669370401e-07, "loss": 0.5988, "step": 6806 }, { "epoch": 0.87, "grad_norm": 0.5341934105107183, "learning_rate": 8.79075880696697e-07, "loss": 0.6158, "step": 6807 }, { "epoch": 0.87, "grad_norm": 0.592922803550442, "learning_rate": 8.7738035584775e-07, "loss": 0.6375, "step": 6808 }, { "epoch": 0.87, "grad_norm": 0.5843443136559895, "learning_rate": 8.756863926804538e-07, "loss": 0.6606, "step": 6809 }, { "epoch": 0.87, "grad_norm": 0.5578141805083252, "learning_rate": 8.739939914847939e-07, "loss": 0.6202, "step": 6810 }, { "epoch": 0.87, "grad_norm": 0.48340744091510796, "learning_rate": 8.723031525504877e-07, "loss": 0.5985, "step": 6811 }, { "epoch": 0.87, "grad_norm": 0.4619851391627624, "learning_rate": 8.70613876166988e-07, "loss": 0.5913, "step": 6812 }, { "epoch": 0.87, "grad_norm": 0.5091701100749129, "learning_rate": 8.689261626234768e-07, "loss": 0.631, "step": 6813 }, { "epoch": 0.87, "grad_norm": 0.45019028351551965, "learning_rate": 8.672400122088709e-07, "loss": 0.5924, "step": 6814 }, { "epoch": 0.87, "grad_norm": 0.4757977766965235, "learning_rate": 8.655554252118192e-07, "loss": 0.5726, "step": 6815 }, { "epoch": 0.87, "grad_norm": 0.43869316112288215, "learning_rate": 8.638724019207023e-07, "loss": 0.5542, "step": 6816 }, { "epoch": 0.87, "grad_norm": 0.4875408357784153, "learning_rate": 8.62190942623633e-07, "loss": 0.5677, "step": 6817 }, { "epoch": 0.87, "grad_norm": 0.6440122271689306, "learning_rate": 8.605110476084566e-07, "loss": 0.6509, "step": 6818 }, { "epoch": 0.87, "grad_norm": 0.5813425522654001, "learning_rate": 8.588327171627508e-07, "loss": 0.6347, "step": 6819 }, { "epoch": 0.87, "grad_norm": 0.5540611760644322, "learning_rate": 8.571559515738248e-07, "loss": 0.6083, "step": 6820 }, { "epoch": 0.87, "grad_norm": 0.5010647700195835, "learning_rate": 8.554807511287222e-07, "loss": 0.5637, "step": 6821 }, { "epoch": 0.87, "grad_norm": 0.484886958964947, "learning_rate": 8.538071161142147e-07, "loss": 0.5807, "step": 6822 }, { "epoch": 0.87, "grad_norm": 0.500434205910857, "learning_rate": 8.521350468168121e-07, "loss": 0.6207, "step": 6823 }, { "epoch": 0.87, "grad_norm": 0.5498818001233358, "learning_rate": 8.504645435227476e-07, "loss": 0.6088, "step": 6824 }, { "epoch": 0.87, "grad_norm": 0.5612184293145728, "learning_rate": 8.487956065179947e-07, "loss": 0.6143, "step": 6825 }, { "epoch": 0.87, "grad_norm": 0.42033275805148995, "learning_rate": 8.471282360882526e-07, "loss": 0.559, "step": 6826 }, { "epoch": 0.87, "grad_norm": 0.512059206215877, "learning_rate": 8.454624325189564e-07, "loss": 0.5875, "step": 6827 }, { "epoch": 0.87, "grad_norm": 0.5476850959263371, "learning_rate": 8.437981960952713e-07, "loss": 0.652, "step": 6828 }, { "epoch": 0.87, "grad_norm": 0.6344932335989816, "learning_rate": 8.421355271020915e-07, "loss": 0.6463, "step": 6829 }, { "epoch": 0.87, "grad_norm": 0.5096386108640021, "learning_rate": 8.404744258240505e-07, "loss": 0.57, "step": 6830 }, { "epoch": 0.87, "grad_norm": 0.49423472601015345, "learning_rate": 8.388148925455064e-07, "loss": 0.563, "step": 6831 }, { "epoch": 0.87, "grad_norm": 0.5237561970776564, "learning_rate": 8.371569275505531e-07, "loss": 0.6241, "step": 6832 }, { "epoch": 0.87, "grad_norm": 0.48542030577949385, "learning_rate": 8.355005311230103e-07, "loss": 0.5889, "step": 6833 }, { "epoch": 0.87, "grad_norm": 0.5217039815726294, "learning_rate": 8.338457035464353e-07, "loss": 0.615, "step": 6834 }, { "epoch": 0.87, "grad_norm": 0.5029036218860748, "learning_rate": 8.32192445104113e-07, "loss": 0.5727, "step": 6835 }, { "epoch": 0.87, "grad_norm": 0.4921551293159528, "learning_rate": 8.305407560790613e-07, "loss": 0.5771, "step": 6836 }, { "epoch": 0.87, "grad_norm": 0.5366906606789278, "learning_rate": 8.288906367540306e-07, "loss": 0.5589, "step": 6837 }, { "epoch": 0.87, "grad_norm": 0.525795780919774, "learning_rate": 8.272420874115017e-07, "loss": 0.6273, "step": 6838 }, { "epoch": 0.87, "grad_norm": 0.5288263219955068, "learning_rate": 8.255951083336855e-07, "loss": 0.5727, "step": 6839 }, { "epoch": 0.87, "grad_norm": 0.45896289819103053, "learning_rate": 8.239496998025243e-07, "loss": 0.5419, "step": 6840 }, { "epoch": 0.87, "grad_norm": 0.5182353289643605, "learning_rate": 8.223058620996938e-07, "loss": 0.6107, "step": 6841 }, { "epoch": 0.87, "grad_norm": 0.5010685616786938, "learning_rate": 8.206635955065967e-07, "loss": 0.592, "step": 6842 }, { "epoch": 0.87, "grad_norm": 0.47988377235192564, "learning_rate": 8.190229003043671e-07, "loss": 0.5824, "step": 6843 }, { "epoch": 0.87, "grad_norm": 0.465679333405076, "learning_rate": 8.173837767738779e-07, "loss": 0.5647, "step": 6844 }, { "epoch": 0.87, "grad_norm": 0.549706849203697, "learning_rate": 8.157462251957237e-07, "loss": 0.6058, "step": 6845 }, { "epoch": 0.87, "grad_norm": 0.5191864747416276, "learning_rate": 8.141102458502336e-07, "loss": 0.6068, "step": 6846 }, { "epoch": 0.87, "grad_norm": 0.8163839942222597, "learning_rate": 8.124758390174692e-07, "loss": 0.5817, "step": 6847 }, { "epoch": 0.87, "grad_norm": 0.5101748737794063, "learning_rate": 8.108430049772175e-07, "loss": 0.6051, "step": 6848 }, { "epoch": 0.87, "grad_norm": 0.4681266894002012, "learning_rate": 8.092117440090041e-07, "loss": 0.5829, "step": 6849 }, { "epoch": 0.88, "grad_norm": 0.5859039298695177, "learning_rate": 8.075820563920777e-07, "loss": 0.6183, "step": 6850 }, { "epoch": 0.88, "grad_norm": 0.5104278645333676, "learning_rate": 8.05953942405423e-07, "loss": 0.6074, "step": 6851 }, { "epoch": 0.88, "grad_norm": 0.5444089625884867, "learning_rate": 8.043274023277514e-07, "loss": 0.6001, "step": 6852 }, { "epoch": 0.88, "grad_norm": 0.47503520967173674, "learning_rate": 8.027024364375091e-07, "loss": 0.5724, "step": 6853 }, { "epoch": 0.88, "grad_norm": 0.5216956632724975, "learning_rate": 8.010790450128691e-07, "loss": 0.596, "step": 6854 }, { "epoch": 0.88, "grad_norm": 0.5072876236112442, "learning_rate": 7.99457228331737e-07, "loss": 0.6013, "step": 6855 }, { "epoch": 0.88, "grad_norm": 0.5271757376417239, "learning_rate": 7.978369866717472e-07, "loss": 0.6121, "step": 6856 }, { "epoch": 0.88, "grad_norm": 0.5023224754602423, "learning_rate": 7.962183203102658e-07, "loss": 0.5948, "step": 6857 }, { "epoch": 0.88, "grad_norm": 0.5141764292207149, "learning_rate": 7.946012295243888e-07, "loss": 0.6211, "step": 6858 }, { "epoch": 0.88, "grad_norm": 0.48764856438570925, "learning_rate": 7.929857145909437e-07, "loss": 0.5898, "step": 6859 }, { "epoch": 0.88, "grad_norm": 0.41755094509608603, "learning_rate": 7.913717757864848e-07, "loss": 0.542, "step": 6860 }, { "epoch": 0.88, "grad_norm": 0.5314504923355732, "learning_rate": 7.89759413387301e-07, "loss": 0.6104, "step": 6861 }, { "epoch": 0.88, "grad_norm": 0.5325023155046229, "learning_rate": 7.881486276694073e-07, "loss": 0.6278, "step": 6862 }, { "epoch": 0.88, "grad_norm": 0.5202830468140984, "learning_rate": 7.865394189085517e-07, "loss": 0.5946, "step": 6863 }, { "epoch": 0.88, "grad_norm": 0.5446289391978085, "learning_rate": 7.849317873802098e-07, "loss": 0.6188, "step": 6864 }, { "epoch": 0.88, "grad_norm": 0.47935222124956356, "learning_rate": 7.833257333595923e-07, "loss": 0.5851, "step": 6865 }, { "epoch": 0.88, "grad_norm": 0.5721482292031177, "learning_rate": 7.817212571216327e-07, "loss": 0.6401, "step": 6866 }, { "epoch": 0.88, "grad_norm": 0.4653413703625468, "learning_rate": 7.801183589409978e-07, "loss": 0.5757, "step": 6867 }, { "epoch": 0.88, "grad_norm": 0.4758499477323844, "learning_rate": 7.785170390920859e-07, "loss": 0.5833, "step": 6868 }, { "epoch": 0.88, "grad_norm": 0.5695934171414946, "learning_rate": 7.769172978490225e-07, "loss": 0.6206, "step": 6869 }, { "epoch": 0.88, "grad_norm": 0.5518254608401076, "learning_rate": 7.753191354856638e-07, "loss": 0.5687, "step": 6870 }, { "epoch": 0.88, "grad_norm": 0.44002975097753055, "learning_rate": 7.737225522755954e-07, "loss": 0.5835, "step": 6871 }, { "epoch": 0.88, "grad_norm": 0.5030788462732798, "learning_rate": 7.721275484921353e-07, "loss": 0.5758, "step": 6872 }, { "epoch": 0.88, "grad_norm": 0.48230275932552724, "learning_rate": 7.705341244083264e-07, "loss": 0.5996, "step": 6873 }, { "epoch": 0.88, "grad_norm": 0.5359477350209965, "learning_rate": 7.689422802969459e-07, "loss": 0.6289, "step": 6874 }, { "epoch": 0.88, "grad_norm": 0.46593982666676154, "learning_rate": 7.67352016430496e-07, "loss": 0.5905, "step": 6875 }, { "epoch": 0.88, "grad_norm": 0.4516781077123631, "learning_rate": 7.657633330812109e-07, "loss": 0.5635, "step": 6876 }, { "epoch": 0.88, "grad_norm": 0.48460960859080987, "learning_rate": 7.641762305210532e-07, "loss": 0.5687, "step": 6877 }, { "epoch": 0.88, "grad_norm": 0.5210661290516295, "learning_rate": 7.625907090217155e-07, "loss": 0.6168, "step": 6878 }, { "epoch": 0.88, "grad_norm": 0.9228267095012448, "learning_rate": 7.61006768854623e-07, "loss": 0.6161, "step": 6879 }, { "epoch": 0.88, "grad_norm": 0.5006965728288749, "learning_rate": 7.594244102909243e-07, "loss": 0.5908, "step": 6880 }, { "epoch": 0.88, "grad_norm": 0.4843567597503653, "learning_rate": 7.578436336015005e-07, "loss": 0.585, "step": 6881 }, { "epoch": 0.88, "grad_norm": 0.5157826337384896, "learning_rate": 7.562644390569618e-07, "loss": 0.6002, "step": 6882 }, { "epoch": 0.88, "grad_norm": 0.5464902244907757, "learning_rate": 7.546868269276475e-07, "loss": 0.5884, "step": 6883 }, { "epoch": 0.88, "grad_norm": 0.5108228210582741, "learning_rate": 7.531107974836238e-07, "loss": 0.5981, "step": 6884 }, { "epoch": 0.88, "grad_norm": 0.4974377440858692, "learning_rate": 7.515363509946894e-07, "loss": 0.5786, "step": 6885 }, { "epoch": 0.88, "grad_norm": 0.4817005696254249, "learning_rate": 7.499634877303674e-07, "loss": 0.6092, "step": 6886 }, { "epoch": 0.88, "grad_norm": 0.4883895480543215, "learning_rate": 7.48392207959916e-07, "loss": 0.5879, "step": 6887 }, { "epoch": 0.88, "grad_norm": 0.6078289263493452, "learning_rate": 7.468225119523198e-07, "loss": 0.6546, "step": 6888 }, { "epoch": 0.88, "grad_norm": 0.4346416490675457, "learning_rate": 7.452543999762907e-07, "loss": 0.5761, "step": 6889 }, { "epoch": 0.88, "grad_norm": 0.49403100564601005, "learning_rate": 7.436878723002694e-07, "loss": 0.5843, "step": 6890 }, { "epoch": 0.88, "grad_norm": 0.5060308533698478, "learning_rate": 7.421229291924271e-07, "loss": 0.6047, "step": 6891 }, { "epoch": 0.88, "grad_norm": 0.4927842994538696, "learning_rate": 7.405595709206648e-07, "loss": 0.5995, "step": 6892 }, { "epoch": 0.88, "grad_norm": 0.48986369450715034, "learning_rate": 7.389977977526053e-07, "loss": 0.568, "step": 6893 }, { "epoch": 0.88, "grad_norm": 0.5322136006667814, "learning_rate": 7.374376099556113e-07, "loss": 0.6141, "step": 6894 }, { "epoch": 0.88, "grad_norm": 0.46933534572416585, "learning_rate": 7.358790077967637e-07, "loss": 0.5293, "step": 6895 }, { "epoch": 0.88, "grad_norm": 0.5326589259614187, "learning_rate": 7.343219915428789e-07, "loss": 0.6064, "step": 6896 }, { "epoch": 0.88, "grad_norm": 0.5191591804015231, "learning_rate": 7.327665614604984e-07, "loss": 0.6005, "step": 6897 }, { "epoch": 0.88, "grad_norm": 0.5760788282469311, "learning_rate": 7.312127178158912e-07, "loss": 0.6456, "step": 6898 }, { "epoch": 0.88, "grad_norm": 0.47373693531908806, "learning_rate": 7.296604608750591e-07, "loss": 0.5979, "step": 6899 }, { "epoch": 0.88, "grad_norm": 0.4810336950721404, "learning_rate": 7.281097909037283e-07, "loss": 0.5489, "step": 6900 }, { "epoch": 0.88, "grad_norm": 0.525393118985253, "learning_rate": 7.265607081673543e-07, "loss": 0.6735, "step": 6901 }, { "epoch": 0.88, "grad_norm": 0.502453543676979, "learning_rate": 7.250132129311227e-07, "loss": 0.6009, "step": 6902 }, { "epoch": 0.88, "grad_norm": 0.49095358818862356, "learning_rate": 7.234673054599439e-07, "loss": 0.5847, "step": 6903 }, { "epoch": 0.88, "grad_norm": 0.45110677032342494, "learning_rate": 7.219229860184606e-07, "loss": 0.5594, "step": 6904 }, { "epoch": 0.88, "grad_norm": 0.47894789710233093, "learning_rate": 7.203802548710392e-07, "loss": 0.5674, "step": 6905 }, { "epoch": 0.88, "grad_norm": 0.495481961748616, "learning_rate": 7.188391122817784e-07, "loss": 0.5644, "step": 6906 }, { "epoch": 0.88, "grad_norm": 0.564160300411101, "learning_rate": 7.172995585145026e-07, "loss": 0.5852, "step": 6907 }, { "epoch": 0.88, "grad_norm": 0.508832378338681, "learning_rate": 7.157615938327645e-07, "loss": 0.5989, "step": 6908 }, { "epoch": 0.88, "grad_norm": 0.4637992046697878, "learning_rate": 7.142252184998444e-07, "loss": 0.5723, "step": 6909 }, { "epoch": 0.88, "grad_norm": 0.5525277256213261, "learning_rate": 7.12690432778752e-07, "loss": 0.6324, "step": 6910 }, { "epoch": 0.88, "grad_norm": 0.5146736673465859, "learning_rate": 7.11157236932224e-07, "loss": 0.596, "step": 6911 }, { "epoch": 0.88, "grad_norm": 0.5110882895026901, "learning_rate": 7.096256312227245e-07, "loss": 0.5693, "step": 6912 }, { "epoch": 0.88, "grad_norm": 0.5317690315003463, "learning_rate": 7.080956159124464e-07, "loss": 0.5985, "step": 6913 }, { "epoch": 0.88, "grad_norm": 0.49557463711747174, "learning_rate": 7.065671912633066e-07, "loss": 0.5839, "step": 6914 }, { "epoch": 0.88, "grad_norm": 0.5140272172676701, "learning_rate": 7.050403575369568e-07, "loss": 0.6166, "step": 6915 }, { "epoch": 0.88, "grad_norm": 0.4339216509525383, "learning_rate": 7.035151149947739e-07, "loss": 0.5704, "step": 6916 }, { "epoch": 0.88, "grad_norm": 0.5761496707744079, "learning_rate": 7.019914638978553e-07, "loss": 0.6377, "step": 6917 }, { "epoch": 0.88, "grad_norm": 0.444020816594514, "learning_rate": 7.004694045070349e-07, "loss": 0.5526, "step": 6918 }, { "epoch": 0.88, "grad_norm": 0.577708281513747, "learning_rate": 6.989489370828706e-07, "loss": 0.5999, "step": 6919 }, { "epoch": 0.88, "grad_norm": 0.46097342335596886, "learning_rate": 6.974300618856466e-07, "loss": 0.5804, "step": 6920 }, { "epoch": 0.88, "grad_norm": 0.5806828812819119, "learning_rate": 6.959127791753751e-07, "loss": 0.6146, "step": 6921 }, { "epoch": 0.88, "grad_norm": 0.480661913977255, "learning_rate": 6.943970892118002e-07, "loss": 0.5828, "step": 6922 }, { "epoch": 0.88, "grad_norm": 0.45917960654349826, "learning_rate": 6.928829922543868e-07, "loss": 0.595, "step": 6923 }, { "epoch": 0.88, "grad_norm": 0.5291998735280323, "learning_rate": 6.913704885623318e-07, "loss": 0.5943, "step": 6924 }, { "epoch": 0.88, "grad_norm": 0.47604836895382063, "learning_rate": 6.898595783945572e-07, "loss": 0.6056, "step": 6925 }, { "epoch": 0.88, "grad_norm": 0.5464204499999129, "learning_rate": 6.883502620097104e-07, "loss": 0.6429, "step": 6926 }, { "epoch": 0.88, "grad_norm": 0.4473272429333381, "learning_rate": 6.868425396661682e-07, "loss": 0.5636, "step": 6927 }, { "epoch": 0.89, "grad_norm": 0.5009584779955161, "learning_rate": 6.853364116220351e-07, "loss": 0.6102, "step": 6928 }, { "epoch": 0.89, "grad_norm": 0.48251615278189686, "learning_rate": 6.838318781351428e-07, "loss": 0.5992, "step": 6929 }, { "epoch": 0.89, "grad_norm": 0.4272431903105545, "learning_rate": 6.823289394630495e-07, "loss": 0.5296, "step": 6930 }, { "epoch": 0.89, "grad_norm": 0.45527247965957335, "learning_rate": 6.808275958630383e-07, "loss": 0.5839, "step": 6931 }, { "epoch": 0.89, "grad_norm": 0.537933782259524, "learning_rate": 6.793278475921217e-07, "loss": 0.5964, "step": 6932 }, { "epoch": 0.89, "grad_norm": 0.5356908182018622, "learning_rate": 6.778296949070395e-07, "loss": 0.6428, "step": 6933 }, { "epoch": 0.89, "grad_norm": 0.5803315150385969, "learning_rate": 6.763331380642557e-07, "loss": 0.5646, "step": 6934 }, { "epoch": 0.89, "grad_norm": 0.46567931192864465, "learning_rate": 6.748381773199609e-07, "loss": 0.5662, "step": 6935 }, { "epoch": 0.89, "grad_norm": 0.5172966166986872, "learning_rate": 6.73344812930078e-07, "loss": 0.604, "step": 6936 }, { "epoch": 0.89, "grad_norm": 0.5114045381199774, "learning_rate": 6.718530451502514e-07, "loss": 0.5858, "step": 6937 }, { "epoch": 0.89, "grad_norm": 0.5034106466031447, "learning_rate": 6.703628742358536e-07, "loss": 0.59, "step": 6938 }, { "epoch": 0.89, "grad_norm": 0.4801265313008266, "learning_rate": 6.68874300441984e-07, "loss": 0.5687, "step": 6939 }, { "epoch": 0.89, "grad_norm": 0.5760548083530428, "learning_rate": 6.673873240234674e-07, "loss": 0.6108, "step": 6940 }, { "epoch": 0.89, "grad_norm": 0.4723248505722079, "learning_rate": 6.659019452348569e-07, "loss": 0.5778, "step": 6941 }, { "epoch": 0.89, "grad_norm": 0.4888642664940203, "learning_rate": 6.644181643304326e-07, "loss": 0.6044, "step": 6942 }, { "epoch": 0.89, "grad_norm": 0.47092149834948616, "learning_rate": 6.629359815641956e-07, "loss": 0.5793, "step": 6943 }, { "epoch": 0.89, "grad_norm": 0.5086126717655827, "learning_rate": 6.614553971898818e-07, "loss": 0.5787, "step": 6944 }, { "epoch": 0.89, "grad_norm": 0.45728224674526186, "learning_rate": 6.599764114609475e-07, "loss": 0.5753, "step": 6945 }, { "epoch": 0.89, "grad_norm": 0.501484613261935, "learning_rate": 6.584990246305767e-07, "loss": 0.5988, "step": 6946 }, { "epoch": 0.89, "grad_norm": 0.5271198077264585, "learning_rate": 6.570232369516805e-07, "loss": 0.5713, "step": 6947 }, { "epoch": 0.89, "grad_norm": 0.5496840047358801, "learning_rate": 6.555490486768967e-07, "loss": 0.6432, "step": 6948 }, { "epoch": 0.89, "grad_norm": 0.48438244951870746, "learning_rate": 6.540764600585881e-07, "loss": 0.5906, "step": 6949 }, { "epoch": 0.89, "grad_norm": 0.5447056174049619, "learning_rate": 6.526054713488427e-07, "loss": 0.5574, "step": 6950 }, { "epoch": 0.89, "grad_norm": 0.5634899388515008, "learning_rate": 6.511360827994773e-07, "loss": 0.6182, "step": 6951 }, { "epoch": 0.89, "grad_norm": 0.4496426953560672, "learning_rate": 6.496682946620326e-07, "loss": 0.5916, "step": 6952 }, { "epoch": 0.89, "grad_norm": 0.5870974957692666, "learning_rate": 6.482021071877764e-07, "loss": 0.625, "step": 6953 }, { "epoch": 0.89, "grad_norm": 0.5183195646011884, "learning_rate": 6.467375206277027e-07, "loss": 0.6038, "step": 6954 }, { "epoch": 0.89, "grad_norm": 0.49863207108533364, "learning_rate": 6.452745352325295e-07, "loss": 0.5974, "step": 6955 }, { "epoch": 0.89, "grad_norm": 0.49422603425240996, "learning_rate": 6.438131512527035e-07, "loss": 0.5979, "step": 6956 }, { "epoch": 0.89, "grad_norm": 0.5058156804111951, "learning_rate": 6.423533689383954e-07, "loss": 0.5582, "step": 6957 }, { "epoch": 0.89, "grad_norm": 0.6716441282980825, "learning_rate": 6.408951885395021e-07, "loss": 0.6175, "step": 6958 }, { "epoch": 0.89, "grad_norm": 0.5256850744024727, "learning_rate": 6.394386103056461e-07, "loss": 0.5928, "step": 6959 }, { "epoch": 0.89, "grad_norm": 0.5936694135881483, "learning_rate": 6.379836344861767e-07, "loss": 0.6506, "step": 6960 }, { "epoch": 0.89, "grad_norm": 0.5655266558232973, "learning_rate": 6.365302613301671e-07, "loss": 0.596, "step": 6961 }, { "epoch": 0.89, "grad_norm": 0.49631005939677353, "learning_rate": 6.350784910864183e-07, "loss": 0.6052, "step": 6962 }, { "epoch": 0.89, "grad_norm": 0.5398930211411986, "learning_rate": 6.336283240034546e-07, "loss": 0.589, "step": 6963 }, { "epoch": 0.89, "grad_norm": 0.42570717799088326, "learning_rate": 6.321797603295255e-07, "loss": 0.5574, "step": 6964 }, { "epoch": 0.89, "grad_norm": 0.6611285380637082, "learning_rate": 6.307328003126112e-07, "loss": 0.6032, "step": 6965 }, { "epoch": 0.89, "grad_norm": 0.5189938073809206, "learning_rate": 6.292874442004138e-07, "loss": 0.5732, "step": 6966 }, { "epoch": 0.89, "grad_norm": 0.4929326113154019, "learning_rate": 6.278436922403575e-07, "loss": 0.5932, "step": 6967 }, { "epoch": 0.89, "grad_norm": 0.507058370888048, "learning_rate": 6.264015446795956e-07, "loss": 0.6116, "step": 6968 }, { "epoch": 0.89, "grad_norm": 0.5483656410420731, "learning_rate": 6.249610017650076e-07, "loss": 0.625, "step": 6969 }, { "epoch": 0.89, "grad_norm": 0.5378533535786539, "learning_rate": 6.235220637431971e-07, "loss": 0.6017, "step": 6970 }, { "epoch": 0.89, "grad_norm": 0.4146829893742024, "learning_rate": 6.220847308604894e-07, "loss": 0.5595, "step": 6971 }, { "epoch": 0.89, "grad_norm": 0.48519851356468097, "learning_rate": 6.206490033629442e-07, "loss": 0.6012, "step": 6972 }, { "epoch": 0.89, "grad_norm": 0.5424227976235865, "learning_rate": 6.192148814963361e-07, "loss": 0.5813, "step": 6973 }, { "epoch": 0.89, "grad_norm": 0.5196619261719672, "learning_rate": 6.17782365506171e-07, "loss": 0.5886, "step": 6974 }, { "epoch": 0.89, "grad_norm": 0.5526942659263527, "learning_rate": 6.163514556376804e-07, "loss": 0.6163, "step": 6975 }, { "epoch": 0.89, "grad_norm": 0.49120418282575695, "learning_rate": 6.149221521358139e-07, "loss": 0.5615, "step": 6976 }, { "epoch": 0.89, "grad_norm": 0.542709854338283, "learning_rate": 6.134944552452538e-07, "loss": 0.6087, "step": 6977 }, { "epoch": 0.89, "grad_norm": 0.48647426222685386, "learning_rate": 6.12068365210402e-07, "loss": 0.5886, "step": 6978 }, { "epoch": 0.89, "grad_norm": 0.42293710839034354, "learning_rate": 6.106438822753924e-07, "loss": 0.5844, "step": 6979 }, { "epoch": 0.89, "grad_norm": 0.5109440199421763, "learning_rate": 6.092210066840753e-07, "loss": 0.5975, "step": 6980 }, { "epoch": 0.89, "grad_norm": 0.5027525209862249, "learning_rate": 6.077997386800316e-07, "loss": 0.5918, "step": 6981 }, { "epoch": 0.89, "grad_norm": 0.460426837445796, "learning_rate": 6.063800785065643e-07, "loss": 0.5776, "step": 6982 }, { "epoch": 0.89, "grad_norm": 0.4676501188652924, "learning_rate": 6.049620264067013e-07, "loss": 0.5816, "step": 6983 }, { "epoch": 0.89, "grad_norm": 0.5097983734689061, "learning_rate": 6.035455826231995e-07, "loss": 0.5934, "step": 6984 }, { "epoch": 0.89, "grad_norm": 0.46781588619264824, "learning_rate": 6.021307473985306e-07, "loss": 0.5849, "step": 6985 }, { "epoch": 0.89, "grad_norm": 0.5234437073722084, "learning_rate": 6.007175209749016e-07, "loss": 0.5989, "step": 6986 }, { "epoch": 0.89, "grad_norm": 0.5708353544119628, "learning_rate": 5.993059035942395e-07, "loss": 0.6441, "step": 6987 }, { "epoch": 0.89, "grad_norm": 0.5520120596746201, "learning_rate": 5.978958954981962e-07, "loss": 0.6347, "step": 6988 }, { "epoch": 0.89, "grad_norm": 0.5065516239463287, "learning_rate": 5.964874969281453e-07, "loss": 0.6223, "step": 6989 }, { "epoch": 0.89, "grad_norm": 0.5614398196789832, "learning_rate": 5.950807081251908e-07, "loss": 0.6319, "step": 6990 }, { "epoch": 0.89, "grad_norm": 0.48322027208951873, "learning_rate": 5.936755293301555e-07, "loss": 0.5948, "step": 6991 }, { "epoch": 0.89, "grad_norm": 0.46527763804820704, "learning_rate": 5.922719607835902e-07, "loss": 0.556, "step": 6992 }, { "epoch": 0.89, "grad_norm": 0.47068877533453995, "learning_rate": 5.908700027257674e-07, "loss": 0.603, "step": 6993 }, { "epoch": 0.89, "grad_norm": 0.4911879034755025, "learning_rate": 5.894696553966872e-07, "loss": 0.5878, "step": 6994 }, { "epoch": 0.89, "grad_norm": 0.4791718281904606, "learning_rate": 5.880709190360712e-07, "loss": 0.5847, "step": 6995 }, { "epoch": 0.89, "grad_norm": 0.5303378829845985, "learning_rate": 5.866737938833655e-07, "loss": 0.6174, "step": 6996 }, { "epoch": 0.89, "grad_norm": 0.47560269323612303, "learning_rate": 5.852782801777424e-07, "loss": 0.5607, "step": 6997 }, { "epoch": 0.89, "grad_norm": 0.4657293197968695, "learning_rate": 5.83884378158095e-07, "loss": 0.596, "step": 6998 }, { "epoch": 0.89, "grad_norm": 0.4369823476339077, "learning_rate": 5.824920880630436e-07, "loss": 0.5774, "step": 6999 }, { "epoch": 0.89, "grad_norm": 0.5768944304743097, "learning_rate": 5.811014101309309e-07, "loss": 0.6216, "step": 7000 }, { "epoch": 0.89, "grad_norm": 0.49076122514976317, "learning_rate": 5.797123445998243e-07, "loss": 0.5696, "step": 7001 }, { "epoch": 0.89, "grad_norm": 0.5439801166140452, "learning_rate": 5.783248917075157e-07, "loss": 0.5943, "step": 7002 }, { "epoch": 0.89, "grad_norm": 0.4401450803368909, "learning_rate": 5.769390516915186e-07, "loss": 0.5789, "step": 7003 }, { "epoch": 0.89, "grad_norm": 0.5112359599376348, "learning_rate": 5.75554824789073e-07, "loss": 0.6033, "step": 7004 }, { "epoch": 0.89, "grad_norm": 0.47205103965089545, "learning_rate": 5.741722112371406e-07, "loss": 0.6136, "step": 7005 }, { "epoch": 0.89, "grad_norm": 0.49723574485430555, "learning_rate": 5.727912112724099e-07, "loss": 0.5849, "step": 7006 }, { "epoch": 0.9, "grad_norm": 0.46720975854179103, "learning_rate": 5.714118251312884e-07, "loss": 0.5773, "step": 7007 }, { "epoch": 0.9, "grad_norm": 0.48191191066573874, "learning_rate": 5.70034053049916e-07, "loss": 0.5588, "step": 7008 }, { "epoch": 0.9, "grad_norm": 0.5383573168014085, "learning_rate": 5.686578952641442e-07, "loss": 0.6381, "step": 7009 }, { "epoch": 0.9, "grad_norm": 0.4909679222883882, "learning_rate": 5.672833520095577e-07, "loss": 0.5721, "step": 7010 }, { "epoch": 0.9, "grad_norm": 0.49950126945260753, "learning_rate": 5.659104235214596e-07, "loss": 0.5988, "step": 7011 }, { "epoch": 0.9, "grad_norm": 0.5373715696511536, "learning_rate": 5.645391100348818e-07, "loss": 0.6467, "step": 7012 }, { "epoch": 0.9, "grad_norm": 0.48936723253966496, "learning_rate": 5.631694117845732e-07, "loss": 0.5673, "step": 7013 }, { "epoch": 0.9, "grad_norm": 0.5607872015227269, "learning_rate": 5.618013290050095e-07, "loss": 0.6288, "step": 7014 }, { "epoch": 0.9, "grad_norm": 0.5037549589427341, "learning_rate": 5.604348619303934e-07, "loss": 0.6014, "step": 7015 }, { "epoch": 0.9, "grad_norm": 0.560411319839843, "learning_rate": 5.590700107946456e-07, "loss": 0.6564, "step": 7016 }, { "epoch": 0.9, "grad_norm": 0.6383225460456036, "learning_rate": 5.577067758314125e-07, "loss": 0.6413, "step": 7017 }, { "epoch": 0.9, "grad_norm": 0.5008012980545228, "learning_rate": 5.563451572740608e-07, "loss": 0.6007, "step": 7018 }, { "epoch": 0.9, "grad_norm": 0.4698422945558058, "learning_rate": 5.54985155355685e-07, "loss": 0.5968, "step": 7019 }, { "epoch": 0.9, "grad_norm": 0.5118802559662113, "learning_rate": 5.536267703091014e-07, "loss": 0.553, "step": 7020 }, { "epoch": 0.9, "grad_norm": 0.487662491563629, "learning_rate": 5.52270002366847e-07, "loss": 0.6181, "step": 7021 }, { "epoch": 0.9, "grad_norm": 0.5264116081560914, "learning_rate": 5.509148517611862e-07, "loss": 0.6054, "step": 7022 }, { "epoch": 0.9, "grad_norm": 0.45984909330251283, "learning_rate": 5.495613187241044e-07, "loss": 0.5851, "step": 7023 }, { "epoch": 0.9, "grad_norm": 0.47114880268267073, "learning_rate": 5.482094034873087e-07, "loss": 0.6066, "step": 7024 }, { "epoch": 0.9, "grad_norm": 0.5009423346816069, "learning_rate": 5.468591062822304e-07, "loss": 0.6005, "step": 7025 }, { "epoch": 0.9, "grad_norm": 0.4354816313040192, "learning_rate": 5.455104273400258e-07, "loss": 0.5416, "step": 7026 }, { "epoch": 0.9, "grad_norm": 0.5281095607669299, "learning_rate": 5.441633668915703e-07, "loss": 0.5795, "step": 7027 }, { "epoch": 0.9, "grad_norm": 0.5084751616767684, "learning_rate": 5.428179251674636e-07, "loss": 0.6054, "step": 7028 }, { "epoch": 0.9, "grad_norm": 0.47251899964877814, "learning_rate": 5.414741023980319e-07, "loss": 0.5641, "step": 7029 }, { "epoch": 0.9, "grad_norm": 0.48128725505944503, "learning_rate": 5.401318988133198e-07, "loss": 0.5682, "step": 7030 }, { "epoch": 0.9, "grad_norm": 0.5058227138223479, "learning_rate": 5.387913146430967e-07, "loss": 0.6243, "step": 7031 }, { "epoch": 0.9, "grad_norm": 0.48045862551298385, "learning_rate": 5.374523501168538e-07, "loss": 0.5765, "step": 7032 }, { "epoch": 0.9, "grad_norm": 0.453264079210095, "learning_rate": 5.361150054638053e-07, "loss": 0.5625, "step": 7033 }, { "epoch": 0.9, "grad_norm": 0.4774791163021619, "learning_rate": 5.347792809128893e-07, "loss": 0.5911, "step": 7034 }, { "epoch": 0.9, "grad_norm": 0.5102808263658483, "learning_rate": 5.334451766927651e-07, "loss": 0.584, "step": 7035 }, { "epoch": 0.9, "grad_norm": 0.519794275099743, "learning_rate": 5.321126930318144e-07, "loss": 0.5981, "step": 7036 }, { "epoch": 0.9, "grad_norm": 0.5453346359306855, "learning_rate": 5.307818301581435e-07, "loss": 0.6132, "step": 7037 }, { "epoch": 0.9, "grad_norm": 0.4895209568750799, "learning_rate": 5.294525882995794e-07, "loss": 0.5903, "step": 7038 }, { "epoch": 0.9, "grad_norm": 0.5347476393441368, "learning_rate": 5.28124967683672e-07, "loss": 0.6052, "step": 7039 }, { "epoch": 0.9, "grad_norm": 0.4185249418402822, "learning_rate": 5.267989685376939e-07, "loss": 0.5418, "step": 7040 }, { "epoch": 0.9, "grad_norm": 0.4932913743887567, "learning_rate": 5.254745910886406e-07, "loss": 0.6077, "step": 7041 }, { "epoch": 0.9, "grad_norm": 0.5031497523758469, "learning_rate": 5.24151835563228e-07, "loss": 0.6265, "step": 7042 }, { "epoch": 0.9, "grad_norm": 0.4905933620466232, "learning_rate": 5.228307021878975e-07, "loss": 0.5939, "step": 7043 }, { "epoch": 0.9, "grad_norm": 0.5235140119598071, "learning_rate": 5.215111911888093e-07, "loss": 0.6062, "step": 7044 }, { "epoch": 0.9, "grad_norm": 0.5214723037675066, "learning_rate": 5.201933027918493e-07, "loss": 0.6488, "step": 7045 }, { "epoch": 0.9, "grad_norm": 0.5060748237001144, "learning_rate": 5.188770372226237e-07, "loss": 0.6072, "step": 7046 }, { "epoch": 0.9, "grad_norm": 0.4894841779604237, "learning_rate": 5.175623947064611e-07, "loss": 0.5849, "step": 7047 }, { "epoch": 0.9, "grad_norm": 0.5739900723877869, "learning_rate": 5.162493754684116e-07, "loss": 0.5988, "step": 7048 }, { "epoch": 0.9, "grad_norm": 0.4921274906485756, "learning_rate": 5.149379797332465e-07, "loss": 0.5776, "step": 7049 }, { "epoch": 0.9, "grad_norm": 0.48458112550766314, "learning_rate": 5.136282077254661e-07, "loss": 0.5743, "step": 7050 }, { "epoch": 0.9, "grad_norm": 0.6304186429334411, "learning_rate": 5.123200596692835e-07, "loss": 0.6038, "step": 7051 }, { "epoch": 0.9, "grad_norm": 0.5349820623129633, "learning_rate": 5.110135357886381e-07, "loss": 0.6343, "step": 7052 }, { "epoch": 0.9, "grad_norm": 0.4805811834485083, "learning_rate": 5.097086363071913e-07, "loss": 0.5435, "step": 7053 }, { "epoch": 0.9, "grad_norm": 0.46431918226982066, "learning_rate": 5.084053614483253e-07, "loss": 0.5885, "step": 7054 }, { "epoch": 0.9, "grad_norm": 0.5526093850150665, "learning_rate": 5.071037114351462e-07, "loss": 0.6278, "step": 7055 }, { "epoch": 0.9, "grad_norm": 0.4761388572819908, "learning_rate": 5.058036864904792e-07, "loss": 0.5905, "step": 7056 }, { "epoch": 0.9, "grad_norm": 0.5318469294963922, "learning_rate": 5.045052868368749e-07, "loss": 0.6078, "step": 7057 }, { "epoch": 0.9, "grad_norm": 0.4699252620287792, "learning_rate": 5.032085126966024e-07, "loss": 0.5724, "step": 7058 }, { "epoch": 0.9, "grad_norm": 0.4601341694546858, "learning_rate": 5.019133642916551e-07, "loss": 0.5687, "step": 7059 }, { "epoch": 0.9, "grad_norm": 0.43730056337867157, "learning_rate": 5.006198418437436e-07, "loss": 0.5823, "step": 7060 }, { "epoch": 0.9, "grad_norm": 0.5106222483029264, "learning_rate": 4.993279455743049e-07, "loss": 0.5947, "step": 7061 }, { "epoch": 0.9, "grad_norm": 0.5809616940073247, "learning_rate": 4.980376757044958e-07, "loss": 0.6367, "step": 7062 }, { "epoch": 0.9, "grad_norm": 0.5105412501885614, "learning_rate": 4.967490324551949e-07, "loss": 0.6067, "step": 7063 }, { "epoch": 0.9, "grad_norm": 0.5446538293844663, "learning_rate": 4.954620160470014e-07, "loss": 0.6157, "step": 7064 }, { "epoch": 0.9, "grad_norm": 0.5071588170249238, "learning_rate": 4.94176626700239e-07, "loss": 0.5939, "step": 7065 }, { "epoch": 0.9, "grad_norm": 0.48971086721804824, "learning_rate": 4.928928646349496e-07, "loss": 0.6202, "step": 7066 }, { "epoch": 0.9, "grad_norm": 0.5284345990470791, "learning_rate": 4.916107300708961e-07, "loss": 0.5862, "step": 7067 }, { "epoch": 0.9, "grad_norm": 0.5194322431663583, "learning_rate": 4.903302232275687e-07, "loss": 0.5888, "step": 7068 }, { "epoch": 0.9, "grad_norm": 0.49523400934013306, "learning_rate": 4.890513443241706e-07, "loss": 0.6128, "step": 7069 }, { "epoch": 0.9, "grad_norm": 0.4968127755045267, "learning_rate": 4.877740935796304e-07, "loss": 0.6043, "step": 7070 }, { "epoch": 0.9, "grad_norm": 0.5086911671871391, "learning_rate": 4.864984712125986e-07, "loss": 0.5614, "step": 7071 }, { "epoch": 0.9, "grad_norm": 0.5195113883169492, "learning_rate": 4.852244774414472e-07, "loss": 0.5738, "step": 7072 }, { "epoch": 0.9, "grad_norm": 0.5144851844050821, "learning_rate": 4.839521124842683e-07, "loss": 0.5958, "step": 7073 }, { "epoch": 0.9, "grad_norm": 0.539303339861555, "learning_rate": 4.826813765588745e-07, "loss": 0.5893, "step": 7074 }, { "epoch": 0.9, "grad_norm": 0.469915467254766, "learning_rate": 4.814122698828017e-07, "loss": 0.5919, "step": 7075 }, { "epoch": 0.9, "grad_norm": 0.47369026769712785, "learning_rate": 4.801447926733039e-07, "loss": 0.5773, "step": 7076 }, { "epoch": 0.9, "grad_norm": 0.47798927067957253, "learning_rate": 4.788789451473596e-07, "loss": 0.5899, "step": 7077 }, { "epoch": 0.9, "grad_norm": 0.5246828296077037, "learning_rate": 4.776147275216624e-07, "loss": 0.5749, "step": 7078 }, { "epoch": 0.9, "grad_norm": 0.481351382466512, "learning_rate": 4.7635214001263697e-07, "loss": 0.5835, "step": 7079 }, { "epoch": 0.9, "grad_norm": 0.4427236191706465, "learning_rate": 4.750911828364191e-07, "loss": 0.5688, "step": 7080 }, { "epoch": 0.9, "grad_norm": 0.4414604863586913, "learning_rate": 4.738318562088695e-07, "loss": 0.5625, "step": 7081 }, { "epoch": 0.9, "grad_norm": 0.4629162702192052, "learning_rate": 4.7257416034557134e-07, "loss": 0.5467, "step": 7082 }, { "epoch": 0.9, "grad_norm": 0.49991246803765294, "learning_rate": 4.713180954618257e-07, "loss": 0.5865, "step": 7083 }, { "epoch": 0.9, "grad_norm": 0.5330679267074515, "learning_rate": 4.7006366177265504e-07, "loss": 0.6139, "step": 7084 }, { "epoch": 0.91, "grad_norm": 0.5131889769010264, "learning_rate": 4.688108594928054e-07, "loss": 0.59, "step": 7085 }, { "epoch": 0.91, "grad_norm": 0.46555975190915794, "learning_rate": 4.675596888367384e-07, "loss": 0.5963, "step": 7086 }, { "epoch": 0.91, "grad_norm": 0.5500953259557085, "learning_rate": 4.663101500186418e-07, "loss": 0.627, "step": 7087 }, { "epoch": 0.91, "grad_norm": 0.5276323581142357, "learning_rate": 4.650622432524199e-07, "loss": 0.582, "step": 7088 }, { "epoch": 0.91, "grad_norm": 0.5779045307772026, "learning_rate": 4.6381596875169967e-07, "loss": 0.6443, "step": 7089 }, { "epoch": 0.91, "grad_norm": 0.5086006840930166, "learning_rate": 4.625713267298271e-07, "loss": 0.6078, "step": 7090 }, { "epoch": 0.91, "grad_norm": 0.49056132007973097, "learning_rate": 4.613283173998717e-07, "loss": 0.5889, "step": 7091 }, { "epoch": 0.91, "grad_norm": 0.4726829996774439, "learning_rate": 4.6008694097462115e-07, "loss": 0.5711, "step": 7092 }, { "epoch": 0.91, "grad_norm": 0.509691509460613, "learning_rate": 4.588471976665831e-07, "loss": 0.6039, "step": 7093 }, { "epoch": 0.91, "grad_norm": 0.5572244199552252, "learning_rate": 4.5760908768798684e-07, "loss": 0.6372, "step": 7094 }, { "epoch": 0.91, "grad_norm": 0.4916745851470032, "learning_rate": 4.5637261125078157e-07, "loss": 0.582, "step": 7095 }, { "epoch": 0.91, "grad_norm": 0.443616356120641, "learning_rate": 4.5513776856663803e-07, "loss": 0.5786, "step": 7096 }, { "epoch": 0.91, "grad_norm": 0.5583128126689438, "learning_rate": 4.5390455984694494e-07, "loss": 0.6, "step": 7097 }, { "epoch": 0.91, "grad_norm": 0.5754647159276117, "learning_rate": 4.5267298530281446e-07, "loss": 0.6169, "step": 7098 }, { "epoch": 0.91, "grad_norm": 0.4981093092051295, "learning_rate": 4.514430451450735e-07, "loss": 0.5851, "step": 7099 }, { "epoch": 0.91, "grad_norm": 0.49829207993929886, "learning_rate": 4.5021473958427707e-07, "loss": 0.5775, "step": 7100 }, { "epoch": 0.91, "grad_norm": 0.5165477130649169, "learning_rate": 4.489880688306958e-07, "loss": 0.5911, "step": 7101 }, { "epoch": 0.91, "grad_norm": 0.48938605504808547, "learning_rate": 4.4776303309431836e-07, "loss": 0.5365, "step": 7102 }, { "epoch": 0.91, "grad_norm": 0.4881782930409067, "learning_rate": 4.465396325848581e-07, "loss": 0.5976, "step": 7103 }, { "epoch": 0.91, "grad_norm": 0.5039465224599533, "learning_rate": 4.4531786751174423e-07, "loss": 0.5911, "step": 7104 }, { "epoch": 0.91, "grad_norm": 0.48880416851457464, "learning_rate": 4.440977380841294e-07, "loss": 0.568, "step": 7105 }, { "epoch": 0.91, "grad_norm": 0.4936542022605423, "learning_rate": 4.428792445108832e-07, "loss": 0.5732, "step": 7106 }, { "epoch": 0.91, "grad_norm": 0.4913038623914315, "learning_rate": 4.4166238700059983e-07, "loss": 0.603, "step": 7107 }, { "epoch": 0.91, "grad_norm": 0.4770854133300316, "learning_rate": 4.404471657615894e-07, "loss": 0.5827, "step": 7108 }, { "epoch": 0.91, "grad_norm": 0.47451499673119607, "learning_rate": 4.392335810018844e-07, "loss": 0.5803, "step": 7109 }, { "epoch": 0.91, "grad_norm": 0.5228262280950398, "learning_rate": 4.3802163292923304e-07, "loss": 0.6151, "step": 7110 }, { "epoch": 0.91, "grad_norm": 0.5192091534621599, "learning_rate": 4.368113217511072e-07, "loss": 0.5913, "step": 7111 }, { "epoch": 0.91, "grad_norm": 0.5302344933820978, "learning_rate": 4.356026476746966e-07, "loss": 0.5911, "step": 7112 }, { "epoch": 0.91, "grad_norm": 0.49699173916720435, "learning_rate": 4.343956109069125e-07, "loss": 0.6184, "step": 7113 }, { "epoch": 0.91, "grad_norm": 0.5159304647601506, "learning_rate": 4.3319021165438515e-07, "loss": 0.5721, "step": 7114 }, { "epoch": 0.91, "grad_norm": 0.4931281684521271, "learning_rate": 4.31986450123465e-07, "loss": 0.5896, "step": 7115 }, { "epoch": 0.91, "grad_norm": 0.45583068354666945, "learning_rate": 4.307843265202194e-07, "loss": 0.5515, "step": 7116 }, { "epoch": 0.91, "grad_norm": 0.5560499858767192, "learning_rate": 4.2958384105043824e-07, "loss": 0.6295, "step": 7117 }, { "epoch": 0.91, "grad_norm": 0.45980563542426744, "learning_rate": 4.283849939196327e-07, "loss": 0.5843, "step": 7118 }, { "epoch": 0.91, "grad_norm": 0.44498265184371827, "learning_rate": 4.271877853330264e-07, "loss": 0.5695, "step": 7119 }, { "epoch": 0.91, "grad_norm": 0.563404070572636, "learning_rate": 4.259922154955665e-07, "loss": 0.6141, "step": 7120 }, { "epoch": 0.91, "grad_norm": 0.49102634210545215, "learning_rate": 4.247982846119247e-07, "loss": 0.596, "step": 7121 }, { "epoch": 0.91, "grad_norm": 0.4691202677695146, "learning_rate": 4.236059928864844e-07, "loss": 0.5737, "step": 7122 }, { "epoch": 0.91, "grad_norm": 0.5446381366247295, "learning_rate": 4.224153405233533e-07, "loss": 0.5975, "step": 7123 }, { "epoch": 0.91, "grad_norm": 0.6124931873138855, "learning_rate": 4.21226327726354e-07, "loss": 0.571, "step": 7124 }, { "epoch": 0.91, "grad_norm": 0.5032663418843581, "learning_rate": 4.2003895469903354e-07, "loss": 0.597, "step": 7125 }, { "epoch": 0.91, "grad_norm": 0.4883275431197625, "learning_rate": 4.188532216446539e-07, "loss": 0.612, "step": 7126 }, { "epoch": 0.91, "grad_norm": 0.4838966738328337, "learning_rate": 4.1766912876620045e-07, "loss": 0.6017, "step": 7127 }, { "epoch": 0.91, "grad_norm": 0.4983718398832078, "learning_rate": 4.1648667626637217e-07, "loss": 0.5858, "step": 7128 }, { "epoch": 0.91, "grad_norm": 0.5152124789578711, "learning_rate": 4.153058643475927e-07, "loss": 0.5687, "step": 7129 }, { "epoch": 0.91, "grad_norm": 0.4445774950411701, "learning_rate": 4.141266932120036e-07, "loss": 0.5806, "step": 7130 }, { "epoch": 0.91, "grad_norm": 0.6076697024996683, "learning_rate": 4.129491630614624e-07, "loss": 0.6458, "step": 7131 }, { "epoch": 0.91, "grad_norm": 0.47709809078442017, "learning_rate": 4.1177327409755e-07, "loss": 0.5771, "step": 7132 }, { "epoch": 0.91, "grad_norm": 0.48474344949352505, "learning_rate": 4.10599026521562e-07, "loss": 0.5632, "step": 7133 }, { "epoch": 0.91, "grad_norm": 0.5340986173391742, "learning_rate": 4.094264205345178e-07, "loss": 0.6253, "step": 7134 }, { "epoch": 0.91, "grad_norm": 0.509159343587345, "learning_rate": 4.0825545633715216e-07, "loss": 0.6011, "step": 7135 }, { "epoch": 0.91, "grad_norm": 0.5190881691244659, "learning_rate": 4.0708613412991927e-07, "loss": 0.5884, "step": 7136 }, { "epoch": 0.91, "grad_norm": 0.4767946384563321, "learning_rate": 4.059184541129935e-07, "loss": 0.5778, "step": 7137 }, { "epoch": 0.91, "grad_norm": 0.5129338580415238, "learning_rate": 4.0475241648626817e-07, "loss": 0.6171, "step": 7138 }, { "epoch": 0.91, "grad_norm": 0.5125724192613859, "learning_rate": 4.0358802144935373e-07, "loss": 0.5998, "step": 7139 }, { "epoch": 0.91, "grad_norm": 0.5589874247371445, "learning_rate": 4.0242526920158063e-07, "loss": 0.6163, "step": 7140 }, { "epoch": 0.91, "grad_norm": 0.43906844918570725, "learning_rate": 4.012641599419975e-07, "loss": 0.5793, "step": 7141 }, { "epoch": 0.91, "grad_norm": 0.5682413883985216, "learning_rate": 4.001046938693742e-07, "loss": 0.583, "step": 7142 }, { "epoch": 0.91, "grad_norm": 0.5459206531545421, "learning_rate": 3.9894687118219535e-07, "loss": 0.5993, "step": 7143 }, { "epoch": 0.91, "grad_norm": 0.5496864707484475, "learning_rate": 3.9779069207866563e-07, "loss": 0.6225, "step": 7144 }, { "epoch": 0.91, "grad_norm": 0.48769942975311087, "learning_rate": 3.966361567567112e-07, "loss": 0.5777, "step": 7145 }, { "epoch": 0.91, "grad_norm": 0.5080251853312814, "learning_rate": 3.9548326541397177e-07, "loss": 0.6041, "step": 7146 }, { "epoch": 0.91, "grad_norm": 0.5341067883853392, "learning_rate": 3.943320182478105e-07, "loss": 0.6221, "step": 7147 }, { "epoch": 0.91, "grad_norm": 0.5067481395731539, "learning_rate": 3.9318241545530546e-07, "loss": 0.5622, "step": 7148 }, { "epoch": 0.91, "grad_norm": 0.4715133714118766, "learning_rate": 3.9203445723325463e-07, "loss": 0.5992, "step": 7149 }, { "epoch": 0.91, "grad_norm": 0.5877260240594608, "learning_rate": 3.908881437781764e-07, "loss": 0.5979, "step": 7150 }, { "epoch": 0.91, "grad_norm": 0.4507051525232864, "learning_rate": 3.8974347528630497e-07, "loss": 0.5789, "step": 7151 }, { "epoch": 0.91, "grad_norm": 0.5060155029110813, "learning_rate": 3.886004519535924e-07, "loss": 0.5706, "step": 7152 }, { "epoch": 0.91, "grad_norm": 0.5090704244434002, "learning_rate": 3.8745907397571226e-07, "loss": 0.5846, "step": 7153 }, { "epoch": 0.91, "grad_norm": 0.5547188991011853, "learning_rate": 3.863193415480515e-07, "loss": 0.6225, "step": 7154 }, { "epoch": 0.91, "grad_norm": 0.5626848483523533, "learning_rate": 3.851812548657219e-07, "loss": 0.6576, "step": 7155 }, { "epoch": 0.91, "grad_norm": 0.4618635129307676, "learning_rate": 3.8404481412354646e-07, "loss": 0.5671, "step": 7156 }, { "epoch": 0.91, "grad_norm": 0.5496425269596449, "learning_rate": 3.82910019516074e-07, "loss": 0.622, "step": 7157 }, { "epoch": 0.91, "grad_norm": 0.4370109080357072, "learning_rate": 3.817768712375658e-07, "loss": 0.5535, "step": 7158 }, { "epoch": 0.91, "grad_norm": 0.48704604436993243, "learning_rate": 3.8064536948200335e-07, "loss": 0.576, "step": 7159 }, { "epoch": 0.91, "grad_norm": 0.49993939696198814, "learning_rate": 3.795155144430862e-07, "loss": 0.5857, "step": 7160 }, { "epoch": 0.91, "grad_norm": 0.44867649711169244, "learning_rate": 3.783873063142307e-07, "loss": 0.5785, "step": 7161 }, { "epoch": 0.91, "grad_norm": 0.44129605228669483, "learning_rate": 3.7726074528857235e-07, "loss": 0.5648, "step": 7162 }, { "epoch": 0.92, "grad_norm": 0.4796273376405072, "learning_rate": 3.761358315589647e-07, "loss": 0.595, "step": 7163 }, { "epoch": 0.92, "grad_norm": 0.5638835762527664, "learning_rate": 3.7501256531798147e-07, "loss": 0.609, "step": 7164 }, { "epoch": 0.92, "grad_norm": 0.49209908997833784, "learning_rate": 3.7389094675790995e-07, "loss": 0.608, "step": 7165 }, { "epoch": 0.92, "grad_norm": 0.4645119277206799, "learning_rate": 3.727709760707587e-07, "loss": 0.5686, "step": 7166 }, { "epoch": 0.92, "grad_norm": 0.4967951831533258, "learning_rate": 3.716526534482534e-07, "loss": 0.6303, "step": 7167 }, { "epoch": 0.92, "grad_norm": 0.4990440433491681, "learning_rate": 3.7053597908183525e-07, "loss": 0.5829, "step": 7168 }, { "epoch": 0.92, "grad_norm": 0.5691404476116407, "learning_rate": 3.6942095316266913e-07, "loss": 0.6294, "step": 7169 }, { "epoch": 0.92, "grad_norm": 0.489302992604274, "learning_rate": 3.6830757588162793e-07, "loss": 0.5786, "step": 7170 }, { "epoch": 0.92, "grad_norm": 0.4780437782992099, "learning_rate": 3.671958474293147e-07, "loss": 0.5976, "step": 7171 }, { "epoch": 0.92, "grad_norm": 0.45412285148172016, "learning_rate": 3.6608576799603945e-07, "loss": 0.5549, "step": 7172 }, { "epoch": 0.92, "grad_norm": 0.5347694471991005, "learning_rate": 3.6497733777183686e-07, "loss": 0.5969, "step": 7173 }, { "epoch": 0.92, "grad_norm": 0.48348369737830166, "learning_rate": 3.6387055694645624e-07, "loss": 0.5639, "step": 7174 }, { "epoch": 0.92, "grad_norm": 0.5648783358912074, "learning_rate": 3.6276542570936377e-07, "loss": 0.6252, "step": 7175 }, { "epoch": 0.92, "grad_norm": 0.5590620537677649, "learning_rate": 3.616619442497449e-07, "loss": 0.618, "step": 7176 }, { "epoch": 0.92, "grad_norm": 0.48994212189337516, "learning_rate": 3.6056011275650284e-07, "loss": 0.5904, "step": 7177 }, { "epoch": 0.92, "grad_norm": 0.4792465237815516, "learning_rate": 3.5945993141825786e-07, "loss": 0.5875, "step": 7178 }, { "epoch": 0.92, "grad_norm": 0.5608785524184682, "learning_rate": 3.583614004233471e-07, "loss": 0.5706, "step": 7179 }, { "epoch": 0.92, "grad_norm": 0.47370540832887953, "learning_rate": 3.572645199598257e-07, "loss": 0.5974, "step": 7180 }, { "epoch": 0.92, "grad_norm": 0.5587970106151574, "learning_rate": 3.5616929021546677e-07, "loss": 0.6147, "step": 7181 }, { "epoch": 0.92, "grad_norm": 0.46227222409775337, "learning_rate": 3.550757113777603e-07, "loss": 0.5704, "step": 7182 }, { "epoch": 0.92, "grad_norm": 0.6194370172065804, "learning_rate": 3.5398378363391327e-07, "loss": 0.6596, "step": 7183 }, { "epoch": 0.92, "grad_norm": 0.6098831518518864, "learning_rate": 3.5289350717085056e-07, "loss": 0.6129, "step": 7184 }, { "epoch": 0.92, "grad_norm": 0.44341178373374374, "learning_rate": 3.5180488217521405e-07, "loss": 0.574, "step": 7185 }, { "epoch": 0.92, "grad_norm": 0.4660328523981488, "learning_rate": 3.507179088333645e-07, "loss": 0.5649, "step": 7186 }, { "epoch": 0.92, "grad_norm": 0.5640559574676227, "learning_rate": 3.496325873313766e-07, "loss": 0.6289, "step": 7187 }, { "epoch": 0.92, "grad_norm": 0.4877907670789478, "learning_rate": 3.4854891785504495e-07, "loss": 0.5967, "step": 7188 }, { "epoch": 0.92, "grad_norm": 0.5462771001915552, "learning_rate": 3.474669005898812e-07, "loss": 0.6053, "step": 7189 }, { "epoch": 0.92, "grad_norm": 0.5072442782475924, "learning_rate": 3.463865357211127e-07, "loss": 0.5719, "step": 7190 }, { "epoch": 0.92, "grad_norm": 0.47933395166632337, "learning_rate": 3.4530782343368484e-07, "loss": 0.5617, "step": 7191 }, { "epoch": 0.92, "grad_norm": 0.4772606442837965, "learning_rate": 3.4423076391225885e-07, "loss": 0.5972, "step": 7192 }, { "epoch": 0.92, "grad_norm": 0.5061821312207999, "learning_rate": 3.431553573412194e-07, "loss": 0.5803, "step": 7193 }, { "epoch": 0.92, "grad_norm": 0.5311235370190535, "learning_rate": 3.4208160390465707e-07, "loss": 0.5897, "step": 7194 }, { "epoch": 0.92, "grad_norm": 0.47963997453366286, "learning_rate": 3.410095037863881e-07, "loss": 0.5702, "step": 7195 }, { "epoch": 0.92, "grad_norm": 0.5522583762937672, "learning_rate": 3.3993905716994237e-07, "loss": 0.6182, "step": 7196 }, { "epoch": 0.92, "grad_norm": 0.4796130515902988, "learning_rate": 3.3887026423856775e-07, "loss": 0.6069, "step": 7197 }, { "epoch": 0.92, "grad_norm": 0.49976650179926785, "learning_rate": 3.3780312517522896e-07, "loss": 0.5678, "step": 7198 }, { "epoch": 0.92, "grad_norm": 0.5641269488711865, "learning_rate": 3.3673764016260544e-07, "loss": 0.5967, "step": 7199 }, { "epoch": 0.92, "grad_norm": 0.5168332619711768, "learning_rate": 3.356738093830969e-07, "loss": 0.5949, "step": 7200 }, { "epoch": 0.92, "grad_norm": 0.5024171169551428, "learning_rate": 3.3461163301881874e-07, "loss": 0.5982, "step": 7201 }, { "epoch": 0.92, "grad_norm": 0.578669402675311, "learning_rate": 3.335511112516021e-07, "loss": 0.5924, "step": 7202 }, { "epoch": 0.92, "grad_norm": 0.5564265946808207, "learning_rate": 3.3249224426299406e-07, "loss": 0.597, "step": 7203 }, { "epoch": 0.92, "grad_norm": 0.5024995429877276, "learning_rate": 3.314350322342619e-07, "loss": 0.5972, "step": 7204 }, { "epoch": 0.92, "grad_norm": 0.49018377537473906, "learning_rate": 3.3037947534638515e-07, "loss": 0.5749, "step": 7205 }, { "epoch": 0.92, "grad_norm": 0.45223825413378127, "learning_rate": 3.293255737800638e-07, "loss": 0.5752, "step": 7206 }, { "epoch": 0.92, "grad_norm": 0.5214722187949318, "learning_rate": 3.282733277157124e-07, "loss": 0.6242, "step": 7207 }, { "epoch": 0.92, "grad_norm": 0.45712548043865847, "learning_rate": 3.2722273733346355e-07, "loss": 0.5726, "step": 7208 }, { "epoch": 0.92, "grad_norm": 0.47808697734360484, "learning_rate": 3.261738028131656e-07, "loss": 0.5769, "step": 7209 }, { "epoch": 0.92, "grad_norm": 0.4973404886959864, "learning_rate": 3.2512652433438266e-07, "loss": 0.602, "step": 7210 }, { "epoch": 0.92, "grad_norm": 0.5315421977618258, "learning_rate": 3.24080902076398e-07, "loss": 0.5695, "step": 7211 }, { "epoch": 0.92, "grad_norm": 0.519965646736461, "learning_rate": 3.230369362182062e-07, "loss": 0.63, "step": 7212 }, { "epoch": 0.92, "grad_norm": 0.5118932535251423, "learning_rate": 3.219946269385221e-07, "loss": 0.5681, "step": 7213 }, { "epoch": 0.92, "grad_norm": 0.4970455598839605, "learning_rate": 3.209539744157775e-07, "loss": 0.5953, "step": 7214 }, { "epoch": 0.92, "grad_norm": 0.46325750061305476, "learning_rate": 3.1991497882812083e-07, "loss": 0.5693, "step": 7215 }, { "epoch": 0.92, "grad_norm": 0.5968603276345447, "learning_rate": 3.188776403534133e-07, "loss": 0.6191, "step": 7216 }, { "epoch": 0.92, "grad_norm": 0.5301371032520704, "learning_rate": 3.17841959169235e-07, "loss": 0.6003, "step": 7217 }, { "epoch": 0.92, "grad_norm": 0.5469231738844569, "learning_rate": 3.16807935452883e-07, "loss": 0.5914, "step": 7218 }, { "epoch": 0.92, "grad_norm": 0.5197544694832669, "learning_rate": 3.15775569381368e-07, "loss": 0.6286, "step": 7219 }, { "epoch": 0.92, "grad_norm": 0.5060549581347691, "learning_rate": 3.147448611314208e-07, "loss": 0.5707, "step": 7220 }, { "epoch": 0.92, "grad_norm": 0.4462120132290171, "learning_rate": 3.1371581087948355e-07, "loss": 0.5579, "step": 7221 }, { "epoch": 0.92, "grad_norm": 0.4572301228507167, "learning_rate": 3.1268841880171987e-07, "loss": 0.5598, "step": 7222 }, { "epoch": 0.92, "grad_norm": 0.43879522183974623, "learning_rate": 3.1166268507400455e-07, "loss": 0.5784, "step": 7223 }, { "epoch": 0.92, "grad_norm": 0.519353370538581, "learning_rate": 3.106386098719305e-07, "loss": 0.5684, "step": 7224 }, { "epoch": 0.92, "grad_norm": 0.4747194825910002, "learning_rate": 3.0961619337080974e-07, "loss": 0.5928, "step": 7225 }, { "epoch": 0.92, "grad_norm": 0.5389071027066629, "learning_rate": 3.085954357456644e-07, "loss": 0.6069, "step": 7226 }, { "epoch": 0.92, "grad_norm": 0.4794823136533383, "learning_rate": 3.0757633717123815e-07, "loss": 0.5944, "step": 7227 }, { "epoch": 0.92, "grad_norm": 0.46893151419970264, "learning_rate": 3.065588978219869e-07, "loss": 0.5736, "step": 7228 }, { "epoch": 0.92, "grad_norm": 0.5122082990993814, "learning_rate": 3.055431178720847e-07, "loss": 0.5839, "step": 7229 }, { "epoch": 0.92, "grad_norm": 0.5611641935591696, "learning_rate": 3.045289974954191e-07, "loss": 0.6717, "step": 7230 }, { "epoch": 0.92, "grad_norm": 0.50410711769439, "learning_rate": 3.035165368655979e-07, "loss": 0.5645, "step": 7231 }, { "epoch": 0.92, "grad_norm": 0.7473942866014149, "learning_rate": 3.0250573615594027e-07, "loss": 0.604, "step": 7232 }, { "epoch": 0.92, "grad_norm": 0.39851131348004315, "learning_rate": 3.014965955394833e-07, "loss": 0.5375, "step": 7233 }, { "epoch": 0.92, "grad_norm": 0.4577573065164038, "learning_rate": 3.0048911518897774e-07, "loss": 0.5724, "step": 7234 }, { "epoch": 0.92, "grad_norm": 0.5166863347467602, "learning_rate": 2.994832952768967e-07, "loss": 0.6016, "step": 7235 }, { "epoch": 0.92, "grad_norm": 0.48056348102327334, "learning_rate": 2.984791359754202e-07, "loss": 0.5726, "step": 7236 }, { "epoch": 0.92, "grad_norm": 0.5981818775397317, "learning_rate": 2.974766374564497e-07, "loss": 0.6301, "step": 7237 }, { "epoch": 0.92, "grad_norm": 0.4839930467016331, "learning_rate": 2.9647579989160125e-07, "loss": 0.5992, "step": 7238 }, { "epoch": 0.92, "grad_norm": 0.5491232626505715, "learning_rate": 2.954766234522044e-07, "loss": 0.6363, "step": 7239 }, { "epoch": 0.92, "grad_norm": 0.4846471420241556, "learning_rate": 2.944791083093068e-07, "loss": 0.5997, "step": 7240 }, { "epoch": 0.93, "grad_norm": 0.45061107325153377, "learning_rate": 2.934832546336708e-07, "loss": 0.5678, "step": 7241 }, { "epoch": 0.93, "grad_norm": 0.4923038480771426, "learning_rate": 2.9248906259577437e-07, "loss": 0.6045, "step": 7242 }, { "epoch": 0.93, "grad_norm": 0.4697806650452181, "learning_rate": 2.9149653236581257e-07, "loss": 0.5682, "step": 7243 }, { "epoch": 0.93, "grad_norm": 0.49555648711516687, "learning_rate": 2.90505664113695e-07, "loss": 0.5934, "step": 7244 }, { "epoch": 0.93, "grad_norm": 0.43473110925201597, "learning_rate": 2.895164580090426e-07, "loss": 0.5778, "step": 7245 }, { "epoch": 0.93, "grad_norm": 0.48272786811676033, "learning_rate": 2.8852891422119777e-07, "loss": 0.5756, "step": 7246 }, { "epoch": 0.93, "grad_norm": 0.500785055738075, "learning_rate": 2.875430329192153e-07, "loss": 0.5906, "step": 7247 }, { "epoch": 0.93, "grad_norm": 0.5056466317859203, "learning_rate": 2.865588142718678e-07, "loss": 0.5963, "step": 7248 }, { "epoch": 0.93, "grad_norm": 0.4844188804897101, "learning_rate": 2.8557625844763846e-07, "loss": 0.5827, "step": 7249 }, { "epoch": 0.93, "grad_norm": 0.581087550581453, "learning_rate": 2.8459536561473153e-07, "loss": 0.632, "step": 7250 }, { "epoch": 0.93, "grad_norm": 0.4853590905631725, "learning_rate": 2.836161359410639e-07, "loss": 0.6131, "step": 7251 }, { "epoch": 0.93, "grad_norm": 0.4990409389733519, "learning_rate": 2.826385695942657e-07, "loss": 0.5814, "step": 7252 }, { "epoch": 0.93, "grad_norm": 0.47860919741221797, "learning_rate": 2.8166266674168776e-07, "loss": 0.5843, "step": 7253 }, { "epoch": 0.93, "grad_norm": 0.4709047346146818, "learning_rate": 2.806884275503896e-07, "loss": 0.5897, "step": 7254 }, { "epoch": 0.93, "grad_norm": 0.5174226554000666, "learning_rate": 2.7971585218715014e-07, "loss": 0.5958, "step": 7255 }, { "epoch": 0.93, "grad_norm": 0.49533613223824735, "learning_rate": 2.787449408184617e-07, "loss": 0.5714, "step": 7256 }, { "epoch": 0.93, "grad_norm": 0.4391734507954386, "learning_rate": 2.777756936105347e-07, "loss": 0.5851, "step": 7257 }, { "epoch": 0.93, "grad_norm": 0.4728758306392253, "learning_rate": 2.768081107292919e-07, "loss": 0.606, "step": 7258 }, { "epoch": 0.93, "grad_norm": 0.5374230991338845, "learning_rate": 2.758421923403698e-07, "loss": 0.5836, "step": 7259 }, { "epoch": 0.93, "grad_norm": 0.5017536061725099, "learning_rate": 2.748779386091238e-07, "loss": 0.591, "step": 7260 }, { "epoch": 0.93, "grad_norm": 0.46832022652695215, "learning_rate": 2.7391534970062195e-07, "loss": 0.5615, "step": 7261 }, { "epoch": 0.93, "grad_norm": 0.5214028552969158, "learning_rate": 2.729544257796479e-07, "loss": 0.5801, "step": 7262 }, { "epoch": 0.93, "grad_norm": 0.5322042260339076, "learning_rate": 2.7199516701069796e-07, "loss": 0.5904, "step": 7263 }, { "epoch": 0.93, "grad_norm": 0.5191420927099829, "learning_rate": 2.710375735579884e-07, "loss": 0.6037, "step": 7264 }, { "epoch": 0.93, "grad_norm": 0.5027753469405695, "learning_rate": 2.7008164558544713e-07, "loss": 0.6021, "step": 7265 }, { "epoch": 0.93, "grad_norm": 0.5121836049813056, "learning_rate": 2.691273832567176e-07, "loss": 0.6026, "step": 7266 }, { "epoch": 0.93, "grad_norm": 0.5762588481174054, "learning_rate": 2.681747867351558e-07, "loss": 0.6917, "step": 7267 }, { "epoch": 0.93, "grad_norm": 0.5386002677497803, "learning_rate": 2.6722385618383695e-07, "loss": 0.6157, "step": 7268 }, { "epoch": 0.93, "grad_norm": 0.5098669626477735, "learning_rate": 2.662745917655485e-07, "loss": 0.5849, "step": 7269 }, { "epoch": 0.93, "grad_norm": 0.4775435864351032, "learning_rate": 2.6532699364279155e-07, "loss": 0.567, "step": 7270 }, { "epoch": 0.93, "grad_norm": 0.46158828412506675, "learning_rate": 2.6438106197778426e-07, "loss": 0.5561, "step": 7271 }, { "epoch": 0.93, "grad_norm": 0.5255047239293132, "learning_rate": 2.6343679693245914e-07, "loss": 0.6101, "step": 7272 }, { "epoch": 0.93, "grad_norm": 0.545758021484668, "learning_rate": 2.6249419866846257e-07, "loss": 0.6194, "step": 7273 }, { "epoch": 0.93, "grad_norm": 0.5479016062368876, "learning_rate": 2.6155326734715545e-07, "loss": 0.5997, "step": 7274 }, { "epoch": 0.93, "grad_norm": 0.4328671656267807, "learning_rate": 2.606140031296145e-07, "loss": 0.5659, "step": 7275 }, { "epoch": 0.93, "grad_norm": 0.47877134918280423, "learning_rate": 2.596764061766299e-07, "loss": 0.5822, "step": 7276 }, { "epoch": 0.93, "grad_norm": 0.4739689866100054, "learning_rate": 2.5874047664870784e-07, "loss": 0.5863, "step": 7277 }, { "epoch": 0.93, "grad_norm": 0.52239268360339, "learning_rate": 2.578062147060656e-07, "loss": 0.5929, "step": 7278 }, { "epoch": 0.93, "grad_norm": 0.501385191210828, "learning_rate": 2.5687362050863974e-07, "loss": 0.6136, "step": 7279 }, { "epoch": 0.93, "grad_norm": 0.5188802881601409, "learning_rate": 2.559426942160781e-07, "loss": 0.587, "step": 7280 }, { "epoch": 0.93, "grad_norm": 0.5372993059973938, "learning_rate": 2.550134359877454e-07, "loss": 0.5978, "step": 7281 }, { "epoch": 0.93, "grad_norm": 0.5306667539086836, "learning_rate": 2.540858459827167e-07, "loss": 0.6144, "step": 7282 }, { "epoch": 0.93, "grad_norm": 0.4795474512119504, "learning_rate": 2.53159924359786e-07, "loss": 0.5848, "step": 7283 }, { "epoch": 0.93, "grad_norm": 0.531783559375745, "learning_rate": 2.522356712774587e-07, "loss": 0.6094, "step": 7284 }, { "epoch": 0.93, "grad_norm": 0.5127682546205325, "learning_rate": 2.513130868939584e-07, "loss": 0.582, "step": 7285 }, { "epoch": 0.93, "grad_norm": 0.5018063942704701, "learning_rate": 2.503921713672186e-07, "loss": 0.5796, "step": 7286 }, { "epoch": 0.93, "grad_norm": 0.49524246601635913, "learning_rate": 2.494729248548866e-07, "loss": 0.5908, "step": 7287 }, { "epoch": 0.93, "grad_norm": 0.4875669193433607, "learning_rate": 2.4855534751432985e-07, "loss": 0.5586, "step": 7288 }, { "epoch": 0.93, "grad_norm": 0.5994038232996466, "learning_rate": 2.4763943950262383e-07, "loss": 0.673, "step": 7289 }, { "epoch": 0.93, "grad_norm": 0.4698992262734932, "learning_rate": 2.4672520097656307e-07, "loss": 0.5725, "step": 7290 }, { "epoch": 0.93, "grad_norm": 0.5007379340261385, "learning_rate": 2.458126320926513e-07, "loss": 0.58, "step": 7291 }, { "epoch": 0.93, "grad_norm": 0.5986283800846744, "learning_rate": 2.449017330071113e-07, "loss": 0.6263, "step": 7292 }, { "epoch": 0.93, "grad_norm": 0.5259957857181268, "learning_rate": 2.4399250387587837e-07, "loss": 0.6182, "step": 7293 }, { "epoch": 0.93, "grad_norm": 0.5141725096448753, "learning_rate": 2.430849448546013e-07, "loss": 0.5853, "step": 7294 }, { "epoch": 0.93, "grad_norm": 0.5200661722320181, "learning_rate": 2.4217905609864147e-07, "loss": 0.6088, "step": 7295 }, { "epoch": 0.93, "grad_norm": 0.4429045603610417, "learning_rate": 2.412748377630769e-07, "loss": 0.5632, "step": 7296 }, { "epoch": 0.93, "grad_norm": 0.4928676400407597, "learning_rate": 2.4037229000269835e-07, "loss": 0.5708, "step": 7297 }, { "epoch": 0.93, "grad_norm": 0.5189287642812279, "learning_rate": 2.3947141297201213e-07, "loss": 0.5983, "step": 7298 }, { "epoch": 0.93, "grad_norm": 0.5069745196709422, "learning_rate": 2.385722068252372e-07, "loss": 0.5997, "step": 7299 }, { "epoch": 0.93, "grad_norm": 0.5134012562260285, "learning_rate": 2.3767467171630587e-07, "loss": 0.6206, "step": 7300 }, { "epoch": 0.93, "grad_norm": 0.4810912644903387, "learning_rate": 2.367788077988664e-07, "loss": 0.5951, "step": 7301 }, { "epoch": 0.93, "grad_norm": 0.4895215104874144, "learning_rate": 2.3588461522627948e-07, "loss": 0.5828, "step": 7302 }, { "epoch": 0.93, "grad_norm": 0.5178570282548549, "learning_rate": 2.3499209415161928e-07, "loss": 0.591, "step": 7303 }, { "epoch": 0.93, "grad_norm": 0.5119705101776268, "learning_rate": 2.341012447276758e-07, "loss": 0.6087, "step": 7304 }, { "epoch": 0.93, "grad_norm": 0.4679695570643445, "learning_rate": 2.3321206710694933e-07, "loss": 0.5761, "step": 7305 }, { "epoch": 0.93, "grad_norm": 0.5469822814071916, "learning_rate": 2.3232456144165916e-07, "loss": 0.5825, "step": 7306 }, { "epoch": 0.93, "grad_norm": 0.5068028826567113, "learning_rate": 2.3143872788373377e-07, "loss": 0.5809, "step": 7307 }, { "epoch": 0.93, "grad_norm": 0.4586147424340755, "learning_rate": 2.3055456658481746e-07, "loss": 0.5771, "step": 7308 }, { "epoch": 0.93, "grad_norm": 0.5389646402572903, "learning_rate": 2.2967207769626797e-07, "loss": 0.624, "step": 7309 }, { "epoch": 0.93, "grad_norm": 0.4631046888225266, "learning_rate": 2.2879126136915675e-07, "loss": 0.5809, "step": 7310 }, { "epoch": 0.93, "grad_norm": 0.6286444117796116, "learning_rate": 2.279121177542687e-07, "loss": 0.6552, "step": 7311 }, { "epoch": 0.93, "grad_norm": 0.5023128720711408, "learning_rate": 2.2703464700210343e-07, "loss": 0.5986, "step": 7312 }, { "epoch": 0.93, "grad_norm": 0.5401743476090969, "learning_rate": 2.2615884926286968e-07, "loss": 0.621, "step": 7313 }, { "epoch": 0.93, "grad_norm": 0.48454205666985517, "learning_rate": 2.2528472468649866e-07, "loss": 0.5706, "step": 7314 }, { "epoch": 0.93, "grad_norm": 0.4603626729232239, "learning_rate": 2.2441227342262616e-07, "loss": 0.5863, "step": 7315 }, { "epoch": 0.93, "grad_norm": 0.43941634224238035, "learning_rate": 2.2354149562060722e-07, "loss": 0.5852, "step": 7316 }, { "epoch": 0.93, "grad_norm": 0.5449422565489241, "learning_rate": 2.2267239142950702e-07, "loss": 0.6013, "step": 7317 }, { "epoch": 0.93, "grad_norm": 0.46933402825350706, "learning_rate": 2.2180496099810655e-07, "loss": 0.5735, "step": 7318 }, { "epoch": 0.93, "grad_norm": 0.4851321744186205, "learning_rate": 2.2093920447489814e-07, "loss": 0.5923, "step": 7319 }, { "epoch": 0.94, "grad_norm": 0.5004895576842934, "learning_rate": 2.20075122008091e-07, "loss": 0.6044, "step": 7320 }, { "epoch": 0.94, "grad_norm": 0.510497689388834, "learning_rate": 2.192127137456035e-07, "loss": 0.5944, "step": 7321 }, { "epoch": 0.94, "grad_norm": 0.5151772700337234, "learning_rate": 2.1835197983507083e-07, "loss": 0.6354, "step": 7322 }, { "epoch": 0.94, "grad_norm": 0.54215385107074, "learning_rate": 2.174929204238385e-07, "loss": 0.629, "step": 7323 }, { "epoch": 0.94, "grad_norm": 0.42727168480713557, "learning_rate": 2.1663553565896888e-07, "loss": 0.5594, "step": 7324 }, { "epoch": 0.94, "grad_norm": 0.5076594189859169, "learning_rate": 2.1577982568723453e-07, "loss": 0.6022, "step": 7325 }, { "epoch": 0.94, "grad_norm": 0.5024178388064193, "learning_rate": 2.1492579065512388e-07, "loss": 0.5968, "step": 7326 }, { "epoch": 0.94, "grad_norm": 0.5003293621054631, "learning_rate": 2.1407343070883546e-07, "loss": 0.5941, "step": 7327 }, { "epoch": 0.94, "grad_norm": 0.57151782411251, "learning_rate": 2.1322274599428484e-07, "loss": 0.6111, "step": 7328 }, { "epoch": 0.94, "grad_norm": 0.5147149514477318, "learning_rate": 2.123737366570977e-07, "loss": 0.6347, "step": 7329 }, { "epoch": 0.94, "grad_norm": 0.5567987921768945, "learning_rate": 2.115264028426156e-07, "loss": 0.5956, "step": 7330 }, { "epoch": 0.94, "grad_norm": 0.5616759985577198, "learning_rate": 2.1068074469588916e-07, "loss": 0.6517, "step": 7331 }, { "epoch": 0.94, "grad_norm": 0.47380031308681037, "learning_rate": 2.0983676236168705e-07, "loss": 0.564, "step": 7332 }, { "epoch": 0.94, "grad_norm": 0.5025128214157766, "learning_rate": 2.089944559844892e-07, "loss": 0.5992, "step": 7333 }, { "epoch": 0.94, "grad_norm": 0.5171708918720889, "learning_rate": 2.0815382570848475e-07, "loss": 0.604, "step": 7334 }, { "epoch": 0.94, "grad_norm": 0.5629312790054447, "learning_rate": 2.07314871677583e-07, "loss": 0.6323, "step": 7335 }, { "epoch": 0.94, "grad_norm": 0.5372422182110991, "learning_rate": 2.0647759403540247e-07, "loss": 0.6013, "step": 7336 }, { "epoch": 0.94, "grad_norm": 0.5730162865091756, "learning_rate": 2.0564199292527286e-07, "loss": 0.6156, "step": 7337 }, { "epoch": 0.94, "grad_norm": 0.4455125307133777, "learning_rate": 2.0480806849023982e-07, "loss": 0.5436, "step": 7338 }, { "epoch": 0.94, "grad_norm": 0.4932923854322894, "learning_rate": 2.0397582087306244e-07, "loss": 0.5697, "step": 7339 }, { "epoch": 0.94, "grad_norm": 0.5343131117456782, "learning_rate": 2.0314525021620901e-07, "loss": 0.6266, "step": 7340 }, { "epoch": 0.94, "grad_norm": 0.5286458827475542, "learning_rate": 2.0231635666186357e-07, "loss": 0.622, "step": 7341 }, { "epoch": 0.94, "grad_norm": 0.5236559600901785, "learning_rate": 2.014891403519248e-07, "loss": 0.5878, "step": 7342 }, { "epoch": 0.94, "grad_norm": 0.48545938230526614, "learning_rate": 2.0066360142799946e-07, "loss": 0.5935, "step": 7343 }, { "epoch": 0.94, "grad_norm": 0.49322333163059234, "learning_rate": 1.9983974003141227e-07, "loss": 0.5754, "step": 7344 }, { "epoch": 0.94, "grad_norm": 0.8632864026205561, "learning_rate": 1.9901755630319708e-07, "loss": 0.5956, "step": 7345 }, { "epoch": 0.94, "grad_norm": 0.49145708331772714, "learning_rate": 1.981970503841013e-07, "loss": 0.5869, "step": 7346 }, { "epoch": 0.94, "grad_norm": 0.8496518515695723, "learning_rate": 1.9737822241458593e-07, "loss": 0.6249, "step": 7347 }, { "epoch": 0.94, "grad_norm": 0.49484412423085716, "learning_rate": 1.9656107253482325e-07, "loss": 0.5932, "step": 7348 }, { "epoch": 0.94, "grad_norm": 0.47341146383920724, "learning_rate": 1.957456008847014e-07, "loss": 0.5959, "step": 7349 }, { "epoch": 0.94, "grad_norm": 0.4686620641629008, "learning_rate": 1.949318076038187e-07, "loss": 0.5835, "step": 7350 }, { "epoch": 0.94, "grad_norm": 0.44741443579874085, "learning_rate": 1.941196928314859e-07, "loss": 0.5888, "step": 7351 }, { "epoch": 0.94, "grad_norm": 0.48260824921658735, "learning_rate": 1.933092567067274e-07, "loss": 0.5964, "step": 7352 }, { "epoch": 0.94, "grad_norm": 0.5363602872834397, "learning_rate": 1.9250049936828108e-07, "loss": 0.6177, "step": 7353 }, { "epoch": 0.94, "grad_norm": 0.5865125272648092, "learning_rate": 1.916934209545962e-07, "loss": 0.6562, "step": 7354 }, { "epoch": 0.94, "grad_norm": 0.513314120469956, "learning_rate": 1.9088802160383224e-07, "loss": 0.6057, "step": 7355 }, { "epoch": 0.94, "grad_norm": 0.46850981104798306, "learning_rate": 1.900843014538656e-07, "loss": 0.573, "step": 7356 }, { "epoch": 0.94, "grad_norm": 0.5335377375671124, "learning_rate": 1.8928226064228505e-07, "loss": 0.6014, "step": 7357 }, { "epoch": 0.94, "grad_norm": 0.6751355285232896, "learning_rate": 1.884818993063875e-07, "loss": 0.5963, "step": 7358 }, { "epoch": 0.94, "grad_norm": 0.4443452658792282, "learning_rate": 1.8768321758318665e-07, "loss": 0.571, "step": 7359 }, { "epoch": 0.94, "grad_norm": 0.47569276930298393, "learning_rate": 1.8688621560940644e-07, "loss": 0.5875, "step": 7360 }, { "epoch": 0.94, "grad_norm": 0.4992566855066843, "learning_rate": 1.8609089352148447e-07, "loss": 0.6141, "step": 7361 }, { "epoch": 0.94, "grad_norm": 0.48178425638004413, "learning_rate": 1.8529725145556953e-07, "loss": 0.5661, "step": 7362 }, { "epoch": 0.94, "grad_norm": 0.4494407043477364, "learning_rate": 1.8450528954752412e-07, "loss": 0.5659, "step": 7363 }, { "epoch": 0.94, "grad_norm": 0.5026287322002017, "learning_rate": 1.8371500793292196e-07, "loss": 0.586, "step": 7364 }, { "epoch": 0.94, "grad_norm": 0.4530551554566217, "learning_rate": 1.8292640674705042e-07, "loss": 0.5707, "step": 7365 }, { "epoch": 0.94, "grad_norm": 0.5067334624605866, "learning_rate": 1.8213948612490706e-07, "loss": 0.6009, "step": 7366 }, { "epoch": 0.94, "grad_norm": 0.5030778279665291, "learning_rate": 1.8135424620120523e-07, "loss": 0.5689, "step": 7367 }, { "epoch": 0.94, "grad_norm": 0.5728522641861867, "learning_rate": 1.805706871103663e-07, "loss": 0.6453, "step": 7368 }, { "epoch": 0.94, "grad_norm": 0.5130932976471269, "learning_rate": 1.797888089865274e-07, "loss": 0.6036, "step": 7369 }, { "epoch": 0.94, "grad_norm": 0.49542961390208556, "learning_rate": 1.7900861196353704e-07, "loss": 0.5713, "step": 7370 }, { "epoch": 0.94, "grad_norm": 0.4682005696428052, "learning_rate": 1.782300961749539e-07, "loss": 0.5793, "step": 7371 }, { "epoch": 0.94, "grad_norm": 0.529681854777451, "learning_rate": 1.7745326175405143e-07, "loss": 0.6108, "step": 7372 }, { "epoch": 0.94, "grad_norm": 0.5318010328650015, "learning_rate": 1.766781088338132e-07, "loss": 0.5863, "step": 7373 }, { "epoch": 0.94, "grad_norm": 0.5145123851919574, "learning_rate": 1.7590463754693753e-07, "loss": 0.5876, "step": 7374 }, { "epoch": 0.94, "grad_norm": 0.46174647978770894, "learning_rate": 1.7513284802583296e-07, "loss": 0.591, "step": 7375 }, { "epoch": 0.94, "grad_norm": 0.4635634914933894, "learning_rate": 1.7436274040261934e-07, "loss": 0.6133, "step": 7376 }, { "epoch": 0.94, "grad_norm": 0.4966095325544989, "learning_rate": 1.7359431480912903e-07, "loss": 0.5748, "step": 7377 }, { "epoch": 0.94, "grad_norm": 0.5069494053900896, "learning_rate": 1.728275713769112e-07, "loss": 0.5789, "step": 7378 }, { "epoch": 0.94, "grad_norm": 0.5436828767041011, "learning_rate": 1.7206251023721975e-07, "loss": 0.5939, "step": 7379 }, { "epoch": 0.94, "grad_norm": 0.5036106871743181, "learning_rate": 1.7129913152102328e-07, "loss": 0.5734, "step": 7380 }, { "epoch": 0.94, "grad_norm": 0.5407326182141073, "learning_rate": 1.7053743535900502e-07, "loss": 0.6185, "step": 7381 }, { "epoch": 0.94, "grad_norm": 0.5674608564960965, "learning_rate": 1.697774218815573e-07, "loss": 0.6049, "step": 7382 }, { "epoch": 0.94, "grad_norm": 0.5220544985295925, "learning_rate": 1.69019091218785e-07, "loss": 0.6151, "step": 7383 }, { "epoch": 0.94, "grad_norm": 0.4965857978021453, "learning_rate": 1.6826244350050424e-07, "loss": 0.5982, "step": 7384 }, { "epoch": 0.94, "grad_norm": 0.45452254082901344, "learning_rate": 1.6750747885624587e-07, "loss": 0.5774, "step": 7385 }, { "epoch": 0.94, "grad_norm": 0.4687017887384819, "learning_rate": 1.667541974152498e-07, "loss": 0.5556, "step": 7386 }, { "epoch": 0.94, "grad_norm": 0.5389621948637842, "learning_rate": 1.6600259930646955e-07, "loss": 0.5748, "step": 7387 }, { "epoch": 0.94, "grad_norm": 0.5075105347034381, "learning_rate": 1.652526846585678e-07, "loss": 0.5875, "step": 7388 }, { "epoch": 0.94, "grad_norm": 0.44519291916261594, "learning_rate": 1.6450445359992184e-07, "loss": 0.5779, "step": 7389 }, { "epoch": 0.94, "grad_norm": 0.48466411798999764, "learning_rate": 1.637579062586192e-07, "loss": 0.5796, "step": 7390 }, { "epoch": 0.94, "grad_norm": 0.4450238866617613, "learning_rate": 1.6301304276245877e-07, "loss": 0.5515, "step": 7391 }, { "epoch": 0.94, "grad_norm": 0.6108374130647928, "learning_rate": 1.622698632389552e-07, "loss": 0.6169, "step": 7392 }, { "epoch": 0.94, "grad_norm": 0.5779151347784534, "learning_rate": 1.6152836781532898e-07, "loss": 0.5958, "step": 7393 }, { "epoch": 0.94, "grad_norm": 0.5275839846622763, "learning_rate": 1.6078855661851744e-07, "loss": 0.6023, "step": 7394 }, { "epoch": 0.94, "grad_norm": 0.44605987091030813, "learning_rate": 1.6005042977516482e-07, "loss": 0.5746, "step": 7395 }, { "epoch": 0.94, "grad_norm": 0.5293132431257097, "learning_rate": 1.5931398741163228e-07, "loss": 0.579, "step": 7396 }, { "epoch": 0.94, "grad_norm": 0.5176848591476602, "learning_rate": 1.5857922965398674e-07, "loss": 0.6044, "step": 7397 }, { "epoch": 0.95, "grad_norm": 0.5065894143583861, "learning_rate": 1.5784615662801096e-07, "loss": 0.5898, "step": 7398 }, { "epoch": 0.95, "grad_norm": 0.48440747779319326, "learning_rate": 1.571147684591978e-07, "loss": 0.6077, "step": 7399 }, { "epoch": 0.95, "grad_norm": 0.47676466153610153, "learning_rate": 1.5638506527275388e-07, "loss": 0.5794, "step": 7400 }, { "epoch": 0.95, "grad_norm": 0.5081010445198311, "learning_rate": 1.5565704719359477e-07, "loss": 0.5941, "step": 7401 }, { "epoch": 0.95, "grad_norm": 0.5074607283055007, "learning_rate": 1.5493071434634631e-07, "loss": 0.6164, "step": 7402 }, { "epoch": 0.95, "grad_norm": 0.49164330835701403, "learning_rate": 1.5420606685535023e-07, "loss": 0.5645, "step": 7403 }, { "epoch": 0.95, "grad_norm": 0.5088701895684685, "learning_rate": 1.5348310484465724e-07, "loss": 0.6205, "step": 7404 }, { "epoch": 0.95, "grad_norm": 0.4825979878840759, "learning_rate": 1.5276182843802722e-07, "loss": 0.5672, "step": 7405 }, { "epoch": 0.95, "grad_norm": 0.46387703798300955, "learning_rate": 1.52042237758937e-07, "loss": 0.597, "step": 7406 }, { "epoch": 0.95, "grad_norm": 0.5106898187320701, "learning_rate": 1.5132433293057025e-07, "loss": 0.6024, "step": 7407 }, { "epoch": 0.95, "grad_norm": 0.536586243966802, "learning_rate": 1.506081140758231e-07, "loss": 0.6122, "step": 7408 }, { "epoch": 0.95, "grad_norm": 0.4915729425234905, "learning_rate": 1.4989358131730415e-07, "loss": 0.5709, "step": 7409 }, { "epoch": 0.95, "grad_norm": 0.6030527161973919, "learning_rate": 1.491807347773333e-07, "loss": 0.6089, "step": 7410 }, { "epoch": 0.95, "grad_norm": 0.5008818799314886, "learning_rate": 1.4846957457793965e-07, "loss": 0.5987, "step": 7411 }, { "epoch": 0.95, "grad_norm": 0.4605751890617134, "learning_rate": 1.477601008408669e-07, "loss": 0.6003, "step": 7412 }, { "epoch": 0.95, "grad_norm": 0.4548471755969013, "learning_rate": 1.4705231368756678e-07, "loss": 0.6076, "step": 7413 }, { "epoch": 0.95, "grad_norm": 0.49798082147743183, "learning_rate": 1.4634621323920571e-07, "loss": 0.5711, "step": 7414 }, { "epoch": 0.95, "grad_norm": 0.45496313734922206, "learning_rate": 1.4564179961665813e-07, "loss": 0.5645, "step": 7415 }, { "epoch": 0.95, "grad_norm": 0.5541311584393438, "learning_rate": 1.4493907294051092e-07, "loss": 0.6018, "step": 7416 }, { "epoch": 0.95, "grad_norm": 0.5254587624281843, "learning_rate": 1.4423803333106334e-07, "loss": 0.6069, "step": 7417 }, { "epoch": 0.95, "grad_norm": 0.4933252081215695, "learning_rate": 1.435386809083239e-07, "loss": 0.5917, "step": 7418 }, { "epoch": 0.95, "grad_norm": 0.4349563160319453, "learning_rate": 1.4284101579201348e-07, "loss": 0.5542, "step": 7419 }, { "epoch": 0.95, "grad_norm": 0.5111986184041303, "learning_rate": 1.4214503810156543e-07, "loss": 0.6021, "step": 7420 }, { "epoch": 0.95, "grad_norm": 0.491063221778742, "learning_rate": 1.4145074795612001e-07, "loss": 0.5832, "step": 7421 }, { "epoch": 0.95, "grad_norm": 0.4987596360101816, "learning_rate": 1.4075814547453326e-07, "loss": 0.6068, "step": 7422 }, { "epoch": 0.95, "grad_norm": 0.6028580573113195, "learning_rate": 1.4006723077536922e-07, "loss": 0.5861, "step": 7423 }, { "epoch": 0.95, "grad_norm": 0.44992330642563855, "learning_rate": 1.3937800397690438e-07, "loss": 0.5772, "step": 7424 }, { "epoch": 0.95, "grad_norm": 0.5381770179330105, "learning_rate": 1.386904651971266e-07, "loss": 0.6203, "step": 7425 }, { "epoch": 0.95, "grad_norm": 0.4991233097106389, "learning_rate": 1.380046145537328e-07, "loss": 0.5759, "step": 7426 }, { "epoch": 0.95, "grad_norm": 0.4854167916437808, "learning_rate": 1.3732045216413358e-07, "loss": 0.6172, "step": 7427 }, { "epoch": 0.95, "grad_norm": 0.5413422231725112, "learning_rate": 1.3663797814544965e-07, "loss": 0.5851, "step": 7428 }, { "epoch": 0.95, "grad_norm": 0.48320643038499533, "learning_rate": 1.359571926145109e-07, "loss": 0.5789, "step": 7429 }, { "epoch": 0.95, "grad_norm": 0.49461907642294356, "learning_rate": 1.3527809568786077e-07, "loss": 0.622, "step": 7430 }, { "epoch": 0.95, "grad_norm": 0.5529287770417484, "learning_rate": 1.346006874817507e-07, "loss": 0.6177, "step": 7431 }, { "epoch": 0.95, "grad_norm": 0.5046564055386441, "learning_rate": 1.3392496811214573e-07, "loss": 0.5713, "step": 7432 }, { "epoch": 0.95, "grad_norm": 0.5068541340523973, "learning_rate": 1.332509376947211e-07, "loss": 0.6056, "step": 7433 }, { "epoch": 0.95, "grad_norm": 0.4860968112449773, "learning_rate": 1.3257859634486224e-07, "loss": 0.6048, "step": 7434 }, { "epoch": 0.95, "grad_norm": 0.4764464138935778, "learning_rate": 1.3190794417766606e-07, "loss": 0.5781, "step": 7435 }, { "epoch": 0.95, "grad_norm": 0.5589269836599497, "learning_rate": 1.3123898130793955e-07, "loss": 0.6217, "step": 7436 }, { "epoch": 0.95, "grad_norm": 0.4678627631444429, "learning_rate": 1.3057170785020112e-07, "loss": 0.6066, "step": 7437 }, { "epoch": 0.95, "grad_norm": 0.5147044784098304, "learning_rate": 1.2990612391868163e-07, "loss": 0.6178, "step": 7438 }, { "epoch": 0.95, "grad_norm": 0.48995986978900147, "learning_rate": 1.292422296273188e-07, "loss": 0.5935, "step": 7439 }, { "epoch": 0.95, "grad_norm": 0.49172529407977456, "learning_rate": 1.2858002508976396e-07, "loss": 0.5941, "step": 7440 }, { "epoch": 0.95, "grad_norm": 0.5202658921694682, "learning_rate": 1.2791951041937755e-07, "loss": 0.5559, "step": 7441 }, { "epoch": 0.95, "grad_norm": 0.488901571460743, "learning_rate": 1.2726068572923355e-07, "loss": 0.5562, "step": 7442 }, { "epoch": 0.95, "grad_norm": 0.5140806122201631, "learning_rate": 1.2660355113211398e-07, "loss": 0.5918, "step": 7443 }, { "epoch": 0.95, "grad_norm": 0.44096357809059133, "learning_rate": 1.2594810674051217e-07, "loss": 0.564, "step": 7444 }, { "epoch": 0.95, "grad_norm": 0.5396085733241124, "learning_rate": 1.252943526666317e-07, "loss": 0.5665, "step": 7445 }, { "epoch": 0.95, "grad_norm": 0.5389565745276051, "learning_rate": 1.2464228902238862e-07, "loss": 0.624, "step": 7446 }, { "epoch": 0.95, "grad_norm": 0.5175134284906224, "learning_rate": 1.2399191591940585e-07, "loss": 0.5911, "step": 7447 }, { "epoch": 0.95, "grad_norm": 0.4942009920410909, "learning_rate": 1.233432334690221e-07, "loss": 0.6122, "step": 7448 }, { "epoch": 0.95, "grad_norm": 0.5037277413324631, "learning_rate": 1.226962417822819e-07, "loss": 0.587, "step": 7449 }, { "epoch": 0.95, "grad_norm": 0.4906514651687576, "learning_rate": 1.2205094096994442e-07, "loss": 0.587, "step": 7450 }, { "epoch": 0.95, "grad_norm": 0.5702948712416342, "learning_rate": 1.214073311424757e-07, "loss": 0.5936, "step": 7451 }, { "epoch": 0.95, "grad_norm": 0.46455230722179464, "learning_rate": 1.2076541241005436e-07, "loss": 0.5754, "step": 7452 }, { "epoch": 0.95, "grad_norm": 0.46398606376433604, "learning_rate": 1.2012518488256908e-07, "loss": 0.5753, "step": 7453 }, { "epoch": 0.95, "grad_norm": 0.45134829154922035, "learning_rate": 1.1948664866962e-07, "loss": 0.5746, "step": 7454 }, { "epoch": 0.95, "grad_norm": 0.5003526711782785, "learning_rate": 1.1884980388051526e-07, "loss": 0.5837, "step": 7455 }, { "epoch": 0.95, "grad_norm": 0.5154411580751384, "learning_rate": 1.182146506242754e-07, "loss": 0.5904, "step": 7456 }, { "epoch": 0.95, "grad_norm": 0.5280222231520896, "learning_rate": 1.1758118900963122e-07, "loss": 0.6276, "step": 7457 }, { "epoch": 0.95, "grad_norm": 0.4443255305070581, "learning_rate": 1.1694941914502266e-07, "loss": 0.5877, "step": 7458 }, { "epoch": 0.95, "grad_norm": 0.5312747493806681, "learning_rate": 1.1631934113860322e-07, "loss": 0.6068, "step": 7459 }, { "epoch": 0.95, "grad_norm": 0.4868750768704931, "learning_rate": 1.1569095509823214e-07, "loss": 0.5846, "step": 7460 }, { "epoch": 0.95, "grad_norm": 0.5189984746642704, "learning_rate": 1.1506426113148339e-07, "loss": 0.6172, "step": 7461 }, { "epoch": 0.95, "grad_norm": 0.4588379179089601, "learning_rate": 1.1443925934563894e-07, "loss": 0.5523, "step": 7462 }, { "epoch": 0.95, "grad_norm": 0.5062951430964334, "learning_rate": 1.1381594984768984e-07, "loss": 0.6226, "step": 7463 }, { "epoch": 0.95, "grad_norm": 0.49706752202157817, "learning_rate": 1.1319433274434189e-07, "loss": 0.5526, "step": 7464 }, { "epoch": 0.95, "grad_norm": 0.4116158691540443, "learning_rate": 1.1257440814200548e-07, "loss": 0.5708, "step": 7465 }, { "epoch": 0.95, "grad_norm": 0.5591517880640776, "learning_rate": 1.1195617614680687e-07, "loss": 0.64, "step": 7466 }, { "epoch": 0.95, "grad_norm": 0.5016742739306083, "learning_rate": 1.1133963686457805e-07, "loss": 0.6157, "step": 7467 }, { "epoch": 0.95, "grad_norm": 0.49685669992187864, "learning_rate": 1.1072479040086348e-07, "loss": 0.573, "step": 7468 }, { "epoch": 0.95, "grad_norm": 0.4685062156026941, "learning_rate": 1.1011163686091675e-07, "loss": 0.6007, "step": 7469 }, { "epoch": 0.95, "grad_norm": 0.4581842442415088, "learning_rate": 1.09500176349705e-07, "loss": 0.5886, "step": 7470 }, { "epoch": 0.95, "grad_norm": 0.4933494829837346, "learning_rate": 1.0889040897190006e-07, "loss": 0.6079, "step": 7471 }, { "epoch": 0.95, "grad_norm": 0.5292838166621525, "learning_rate": 1.0828233483188732e-07, "loss": 0.5922, "step": 7472 }, { "epoch": 0.95, "grad_norm": 0.5242617672433306, "learning_rate": 1.0767595403376241e-07, "loss": 0.6591, "step": 7473 }, { "epoch": 0.95, "grad_norm": 0.5541010474744051, "learning_rate": 1.0707126668132894e-07, "loss": 0.5955, "step": 7474 }, { "epoch": 0.95, "grad_norm": 0.4889796187267287, "learning_rate": 1.0646827287810413e-07, "loss": 0.601, "step": 7475 }, { "epoch": 0.96, "grad_norm": 0.43106430867244977, "learning_rate": 1.0586697272731095e-07, "loss": 0.5856, "step": 7476 }, { "epoch": 0.96, "grad_norm": 0.5034584858650605, "learning_rate": 1.0526736633188595e-07, "loss": 0.6248, "step": 7477 }, { "epoch": 0.96, "grad_norm": 0.47438727611783066, "learning_rate": 1.046694537944748e-07, "loss": 0.58, "step": 7478 }, { "epoch": 0.96, "grad_norm": 0.5089521076750618, "learning_rate": 1.0407323521743229e-07, "loss": 0.6186, "step": 7479 }, { "epoch": 0.96, "grad_norm": 0.5087992511783966, "learning_rate": 1.0347871070282345e-07, "loss": 0.6128, "step": 7480 }, { "epoch": 0.96, "grad_norm": 0.496153314093277, "learning_rate": 1.0288588035242464e-07, "loss": 0.6023, "step": 7481 }, { "epoch": 0.96, "grad_norm": 0.5325097242218456, "learning_rate": 1.0229474426772024e-07, "loss": 0.5814, "step": 7482 }, { "epoch": 0.96, "grad_norm": 0.4288670609064129, "learning_rate": 1.0170530254990485e-07, "loss": 0.5882, "step": 7483 }, { "epoch": 0.96, "grad_norm": 0.5254319196500382, "learning_rate": 1.0111755529988665e-07, "loss": 0.6184, "step": 7484 }, { "epoch": 0.96, "grad_norm": 0.5139705814125962, "learning_rate": 1.0053150261827738e-07, "loss": 0.5823, "step": 7485 }, { "epoch": 0.96, "grad_norm": 0.49749115692418333, "learning_rate": 9.994714460540455e-08, "loss": 0.624, "step": 7486 }, { "epoch": 0.96, "grad_norm": 0.5117704770982752, "learning_rate": 9.93644813613026e-08, "loss": 0.607, "step": 7487 }, { "epoch": 0.96, "grad_norm": 0.48640292222557474, "learning_rate": 9.878351298571732e-08, "loss": 0.6141, "step": 7488 }, { "epoch": 0.96, "grad_norm": 0.5567593957123995, "learning_rate": 9.820423957810022e-08, "loss": 0.6211, "step": 7489 }, { "epoch": 0.96, "grad_norm": 0.5205357183652044, "learning_rate": 9.762666123761866e-08, "loss": 0.604, "step": 7490 }, { "epoch": 0.96, "grad_norm": 0.5212415193890401, "learning_rate": 9.705077806314578e-08, "loss": 0.624, "step": 7491 }, { "epoch": 0.96, "grad_norm": 0.5617672924416046, "learning_rate": 9.647659015326716e-08, "loss": 0.6169, "step": 7492 }, { "epoch": 0.96, "grad_norm": 0.5205446243738491, "learning_rate": 9.590409760627528e-08, "loss": 0.5934, "step": 7493 }, { "epoch": 0.96, "grad_norm": 0.4861507006054977, "learning_rate": 9.533330052017508e-08, "loss": 0.5721, "step": 7494 }, { "epoch": 0.96, "grad_norm": 0.5519792743392133, "learning_rate": 9.476419899267953e-08, "loss": 0.6052, "step": 7495 }, { "epoch": 0.96, "grad_norm": 0.5037682095575081, "learning_rate": 9.419679312121177e-08, "loss": 0.5987, "step": 7496 }, { "epoch": 0.96, "grad_norm": 0.4910814762315334, "learning_rate": 9.363108300290524e-08, "loss": 0.5912, "step": 7497 }, { "epoch": 0.96, "grad_norm": 0.49437436764579645, "learning_rate": 9.306706873460136e-08, "loss": 0.5631, "step": 7498 }, { "epoch": 0.96, "grad_norm": 0.5108988308039308, "learning_rate": 9.250475041285401e-08, "loss": 0.6021, "step": 7499 }, { "epoch": 0.96, "grad_norm": 0.5452919848221223, "learning_rate": 9.194412813392506e-08, "loss": 0.6105, "step": 7500 }, { "epoch": 0.96, "grad_norm": 0.4491450443405501, "learning_rate": 9.138520199378664e-08, "loss": 0.5841, "step": 7501 }, { "epoch": 0.96, "grad_norm": 0.5369854350535823, "learning_rate": 9.082797208811778e-08, "loss": 0.6087, "step": 7502 }, { "epoch": 0.96, "grad_norm": 0.505189411916218, "learning_rate": 9.027243851231216e-08, "loss": 0.5863, "step": 7503 }, { "epoch": 0.96, "grad_norm": 0.5103918155234936, "learning_rate": 8.971860136146926e-08, "loss": 0.5906, "step": 7504 }, { "epoch": 0.96, "grad_norm": 0.503641530550766, "learning_rate": 8.916646073039992e-08, "loss": 0.6074, "step": 7505 }, { "epoch": 0.96, "grad_norm": 0.7773672127487435, "learning_rate": 8.861601671362185e-08, "loss": 0.5886, "step": 7506 }, { "epoch": 0.96, "grad_norm": 0.5058143653267084, "learning_rate": 8.806726940536747e-08, "loss": 0.5945, "step": 7507 }, { "epoch": 0.96, "grad_norm": 0.5356789034378471, "learning_rate": 8.752021889957274e-08, "loss": 0.615, "step": 7508 }, { "epoch": 0.96, "grad_norm": 0.45986984030920686, "learning_rate": 8.69748652898883e-08, "loss": 0.5959, "step": 7509 }, { "epoch": 0.96, "grad_norm": 0.4961637212393773, "learning_rate": 8.643120866967059e-08, "loss": 0.6169, "step": 7510 }, { "epoch": 0.96, "grad_norm": 0.48462962070020993, "learning_rate": 8.588924913198737e-08, "loss": 0.5761, "step": 7511 }, { "epoch": 0.96, "grad_norm": 0.5426641100997771, "learning_rate": 8.534898676961556e-08, "loss": 0.6035, "step": 7512 }, { "epoch": 0.96, "grad_norm": 0.4911246360860346, "learning_rate": 8.481042167504228e-08, "loss": 0.5521, "step": 7513 }, { "epoch": 0.96, "grad_norm": 0.43157309480422673, "learning_rate": 8.427355394046266e-08, "loss": 0.5842, "step": 7514 }, { "epoch": 0.96, "grad_norm": 0.5221048514622217, "learning_rate": 8.373838365778098e-08, "loss": 0.6098, "step": 7515 }, { "epoch": 0.96, "grad_norm": 0.5128831473439819, "learning_rate": 8.320491091861393e-08, "loss": 0.5949, "step": 7516 }, { "epoch": 0.96, "grad_norm": 0.5181339279531426, "learning_rate": 8.267313581428405e-08, "loss": 0.5838, "step": 7517 }, { "epoch": 0.96, "grad_norm": 0.47374568398606387, "learning_rate": 8.214305843582627e-08, "loss": 0.5684, "step": 7518 }, { "epoch": 0.96, "grad_norm": 0.5742725143346571, "learning_rate": 8.161467887398022e-08, "loss": 0.6174, "step": 7519 }, { "epoch": 0.96, "grad_norm": 0.47918825360467265, "learning_rate": 8.108799721920246e-08, "loss": 0.5691, "step": 7520 }, { "epoch": 0.96, "grad_norm": 0.49935797935204856, "learning_rate": 8.056301356165197e-08, "loss": 0.5959, "step": 7521 }, { "epoch": 0.96, "grad_norm": 0.4840473537022811, "learning_rate": 8.003972799119908e-08, "loss": 0.6034, "step": 7522 }, { "epoch": 0.96, "grad_norm": 0.4952512609174114, "learning_rate": 7.951814059742436e-08, "loss": 0.5917, "step": 7523 }, { "epoch": 0.96, "grad_norm": 0.4544522256364023, "learning_rate": 7.89982514696186e-08, "loss": 0.5663, "step": 7524 }, { "epoch": 0.96, "grad_norm": 0.6472104138314604, "learning_rate": 7.848006069677839e-08, "loss": 0.6004, "step": 7525 }, { "epoch": 0.96, "grad_norm": 0.4800690448318727, "learning_rate": 7.796356836761277e-08, "loss": 0.598, "step": 7526 }, { "epoch": 0.96, "grad_norm": 0.47705277736749063, "learning_rate": 7.74487745705399e-08, "loss": 0.5855, "step": 7527 }, { "epoch": 0.96, "grad_norm": 0.6488262663790274, "learning_rate": 7.693567939368484e-08, "loss": 0.6879, "step": 7528 }, { "epoch": 0.96, "grad_norm": 0.5164392938784637, "learning_rate": 7.642428292488402e-08, "loss": 0.597, "step": 7529 }, { "epoch": 0.96, "grad_norm": 0.5121450641816973, "learning_rate": 7.591458525168183e-08, "loss": 0.5847, "step": 7530 }, { "epoch": 0.96, "grad_norm": 0.47814948927776557, "learning_rate": 7.540658646133292e-08, "loss": 0.5924, "step": 7531 }, { "epoch": 0.96, "grad_norm": 0.4906488026593894, "learning_rate": 7.490028664079884e-08, "loss": 0.5778, "step": 7532 }, { "epoch": 0.96, "grad_norm": 0.45119422160223516, "learning_rate": 7.439568587675361e-08, "loss": 0.5847, "step": 7533 }, { "epoch": 0.96, "grad_norm": 0.45927158625158604, "learning_rate": 7.38927842555781e-08, "loss": 0.5961, "step": 7534 }, { "epoch": 0.96, "grad_norm": 0.5647026616299679, "learning_rate": 7.339158186336459e-08, "loss": 0.6153, "step": 7535 }, { "epoch": 0.96, "grad_norm": 0.5095853243947673, "learning_rate": 7.289207878590998e-08, "loss": 0.5921, "step": 7536 }, { "epoch": 0.96, "grad_norm": 0.4886281980680844, "learning_rate": 7.239427510872476e-08, "loss": 0.5865, "step": 7537 }, { "epoch": 0.96, "grad_norm": 0.5090684931514627, "learning_rate": 7.189817091702744e-08, "loss": 0.5881, "step": 7538 }, { "epoch": 0.96, "grad_norm": 0.4971484134797898, "learning_rate": 7.140376629574452e-08, "loss": 0.6039, "step": 7539 }, { "epoch": 0.96, "grad_norm": 0.47649196215342665, "learning_rate": 7.091106132951054e-08, "loss": 0.5981, "step": 7540 }, { "epoch": 0.96, "grad_norm": 0.5056173341220754, "learning_rate": 7.042005610267245e-08, "loss": 0.6108, "step": 7541 }, { "epoch": 0.96, "grad_norm": 0.5581069597756307, "learning_rate": 6.993075069928412e-08, "loss": 0.6269, "step": 7542 }, { "epoch": 0.96, "grad_norm": 0.5857570529961926, "learning_rate": 6.944314520310747e-08, "loss": 0.656, "step": 7543 }, { "epoch": 0.96, "grad_norm": 0.4630867381623639, "learning_rate": 6.89572396976168e-08, "loss": 0.5726, "step": 7544 }, { "epoch": 0.96, "grad_norm": 0.5287394455701441, "learning_rate": 6.847303426599117e-08, "loss": 0.5803, "step": 7545 }, { "epoch": 0.96, "grad_norm": 0.504745856046949, "learning_rate": 6.799052899112201e-08, "loss": 0.5669, "step": 7546 }, { "epoch": 0.96, "grad_norm": 0.5473858995990002, "learning_rate": 6.750972395560774e-08, "loss": 0.577, "step": 7547 }, { "epoch": 0.96, "grad_norm": 0.46712921242154415, "learning_rate": 6.70306192417558e-08, "loss": 0.5733, "step": 7548 }, { "epoch": 0.96, "grad_norm": 0.5241697175799156, "learning_rate": 6.655321493158396e-08, "loss": 0.5886, "step": 7549 }, { "epoch": 0.96, "grad_norm": 0.48549722744175466, "learning_rate": 6.607751110681904e-08, "loss": 0.5973, "step": 7550 }, { "epoch": 0.96, "grad_norm": 0.46973152386608447, "learning_rate": 6.560350784889258e-08, "loss": 0.5537, "step": 7551 }, { "epoch": 0.96, "grad_norm": 0.4849632008226446, "learning_rate": 6.513120523895189e-08, "loss": 0.6023, "step": 7552 }, { "epoch": 0.96, "grad_norm": 0.495459514209492, "learning_rate": 6.466060335784674e-08, "loss": 0.5687, "step": 7553 }, { "epoch": 0.96, "grad_norm": 0.5119955957153449, "learning_rate": 6.419170228613935e-08, "loss": 0.6267, "step": 7554 }, { "epoch": 0.97, "grad_norm": 0.5378381584946869, "learning_rate": 6.372450210409997e-08, "loss": 0.5891, "step": 7555 }, { "epoch": 0.97, "grad_norm": 0.48460265415261977, "learning_rate": 6.325900289170794e-08, "loss": 0.5662, "step": 7556 }, { "epoch": 0.97, "grad_norm": 0.49939492527810064, "learning_rate": 6.279520472865064e-08, "loss": 0.5755, "step": 7557 }, { "epoch": 0.97, "grad_norm": 0.46584417834818626, "learning_rate": 6.233310769432455e-08, "loss": 0.5667, "step": 7558 }, { "epoch": 0.97, "grad_norm": 0.49666955514454986, "learning_rate": 6.187271186783528e-08, "loss": 0.6039, "step": 7559 }, { "epoch": 0.97, "grad_norm": 0.5135906277732575, "learning_rate": 6.141401732799645e-08, "loss": 0.6025, "step": 7560 }, { "epoch": 0.97, "grad_norm": 0.6465224879392838, "learning_rate": 6.095702415333193e-08, "loss": 0.677, "step": 7561 }, { "epoch": 0.97, "grad_norm": 0.5120402225444805, "learning_rate": 6.050173242207247e-08, "loss": 0.5984, "step": 7562 }, { "epoch": 0.97, "grad_norm": 0.519533954363767, "learning_rate": 6.004814221216015e-08, "loss": 0.6059, "step": 7563 }, { "epoch": 0.97, "grad_norm": 0.6391746616196206, "learning_rate": 5.959625360124177e-08, "loss": 0.5976, "step": 7564 }, { "epoch": 0.97, "grad_norm": 0.4677288437361776, "learning_rate": 5.914606666667655e-08, "loss": 0.5998, "step": 7565 }, { "epoch": 0.97, "grad_norm": 0.4429500931686939, "learning_rate": 5.869758148553173e-08, "loss": 0.5799, "step": 7566 }, { "epoch": 0.97, "grad_norm": 0.4871694959816292, "learning_rate": 5.8250798134580345e-08, "loss": 0.5876, "step": 7567 }, { "epoch": 0.97, "grad_norm": 0.49794237350714665, "learning_rate": 5.7805716690308986e-08, "loss": 0.5832, "step": 7568 }, { "epoch": 0.97, "grad_norm": 0.5338711617642573, "learning_rate": 5.7362337228906716e-08, "loss": 0.6155, "step": 7569 }, { "epoch": 0.97, "grad_norm": 0.5252779016875272, "learning_rate": 5.692065982627837e-08, "loss": 0.6161, "step": 7570 }, { "epoch": 0.97, "grad_norm": 0.5659725053759057, "learning_rate": 5.648068455803124e-08, "loss": 0.6272, "step": 7571 }, { "epoch": 0.97, "grad_norm": 0.44116661520800443, "learning_rate": 5.604241149948619e-08, "loss": 0.5676, "step": 7572 }, { "epoch": 0.97, "grad_norm": 0.4365841377189397, "learning_rate": 5.560584072566766e-08, "loss": 0.5806, "step": 7573 }, { "epoch": 0.97, "grad_norm": 0.5455278445151234, "learning_rate": 5.517097231131141e-08, "loss": 0.577, "step": 7574 }, { "epoch": 0.97, "grad_norm": 0.5727192493552828, "learning_rate": 5.473780633086345e-08, "loss": 0.6531, "step": 7575 }, { "epoch": 0.97, "grad_norm": 0.5190530402839871, "learning_rate": 5.430634285847558e-08, "loss": 0.5698, "step": 7576 }, { "epoch": 0.97, "grad_norm": 0.4807054584890005, "learning_rate": 5.387658196800871e-08, "loss": 0.5826, "step": 7577 }, { "epoch": 0.97, "grad_norm": 0.513451726177557, "learning_rate": 5.344852373303289e-08, "loss": 0.6416, "step": 7578 }, { "epoch": 0.97, "grad_norm": 0.45984180089862386, "learning_rate": 5.302216822682726e-08, "loss": 0.5543, "step": 7579 }, { "epoch": 0.97, "grad_norm": 0.6044712598578185, "learning_rate": 5.2597515522377904e-08, "loss": 0.5924, "step": 7580 }, { "epoch": 0.97, "grad_norm": 0.5289880011419706, "learning_rate": 5.217456569238111e-08, "loss": 0.5981, "step": 7581 }, { "epoch": 0.97, "grad_norm": 0.4984005467054487, "learning_rate": 5.175331880923895e-08, "loss": 0.5569, "step": 7582 }, { "epoch": 0.97, "grad_norm": 0.5219199739879093, "learning_rate": 5.1333774945065975e-08, "loss": 0.5971, "step": 7583 }, { "epoch": 0.97, "grad_norm": 0.6307906153144948, "learning_rate": 5.091593417168028e-08, "loss": 0.5811, "step": 7584 }, { "epoch": 0.97, "grad_norm": 0.5126405719439323, "learning_rate": 5.0499796560614656e-08, "loss": 0.577, "step": 7585 }, { "epoch": 0.97, "grad_norm": 0.4634265770163565, "learning_rate": 5.008536218310322e-08, "loss": 0.5506, "step": 7586 }, { "epoch": 0.97, "grad_norm": 0.5987847212386973, "learning_rate": 4.967263111009479e-08, "loss": 0.6484, "step": 7587 }, { "epoch": 0.97, "grad_norm": 0.5560338749517975, "learning_rate": 4.926160341224284e-08, "loss": 0.6265, "step": 7588 }, { "epoch": 0.97, "grad_norm": 0.5352229524468578, "learning_rate": 4.8852279159911085e-08, "loss": 0.6171, "step": 7589 }, { "epoch": 0.97, "grad_norm": 0.4677047197470512, "learning_rate": 4.8444658423169036e-08, "loss": 0.5769, "step": 7590 }, { "epoch": 0.97, "grad_norm": 0.47661082604175264, "learning_rate": 4.803874127179753e-08, "loss": 0.5692, "step": 7591 }, { "epoch": 0.97, "grad_norm": 0.47044845400980134, "learning_rate": 4.763452777528543e-08, "loss": 0.569, "step": 7592 }, { "epoch": 0.97, "grad_norm": 0.5561601056865807, "learning_rate": 4.72320180028274e-08, "loss": 0.6215, "step": 7593 }, { "epoch": 0.97, "grad_norm": 0.551014217101942, "learning_rate": 4.6831212023330516e-08, "loss": 0.6445, "step": 7594 }, { "epoch": 0.97, "grad_norm": 0.5141595976930846, "learning_rate": 4.6432109905405475e-08, "loss": 0.6101, "step": 7595 }, { "epoch": 0.97, "grad_norm": 0.49861722308105105, "learning_rate": 4.603471171737539e-08, "loss": 0.5866, "step": 7596 }, { "epoch": 0.97, "grad_norm": 0.4744037676717764, "learning_rate": 4.563901752726918e-08, "loss": 0.5901, "step": 7597 }, { "epoch": 0.97, "grad_norm": 0.5458924772500712, "learning_rate": 4.524502740282599e-08, "loss": 0.6286, "step": 7598 }, { "epoch": 0.97, "grad_norm": 0.44416760210008877, "learning_rate": 4.485274141149076e-08, "loss": 0.5928, "step": 7599 }, { "epoch": 0.97, "grad_norm": 0.47889871145162766, "learning_rate": 4.4462159620418665e-08, "loss": 0.5773, "step": 7600 }, { "epoch": 0.97, "grad_norm": 0.5416621233038622, "learning_rate": 4.407328209647177e-08, "loss": 0.5816, "step": 7601 }, { "epoch": 0.97, "grad_norm": 0.5182945052749633, "learning_rate": 4.368610890622349e-08, "loss": 0.5974, "step": 7602 }, { "epoch": 0.97, "grad_norm": 0.5293677781601176, "learning_rate": 4.3300640115950806e-08, "loss": 0.5776, "step": 7603 }, { "epoch": 0.97, "grad_norm": 0.5143697734232072, "learning_rate": 4.2916875791640946e-08, "loss": 0.6015, "step": 7604 }, { "epoch": 0.97, "grad_norm": 0.45597595099272065, "learning_rate": 4.253481599899245e-08, "loss": 0.5595, "step": 7605 }, { "epoch": 0.97, "grad_norm": 0.5045253096556533, "learning_rate": 4.215446080340746e-08, "loss": 0.6052, "step": 7606 }, { "epoch": 0.97, "grad_norm": 0.49663808727597464, "learning_rate": 4.177581026999833e-08, "loss": 0.6109, "step": 7607 }, { "epoch": 0.97, "grad_norm": 0.4618838953989105, "learning_rate": 4.139886446358543e-08, "loss": 0.57, "step": 7608 }, { "epoch": 0.97, "grad_norm": 0.5437057809810568, "learning_rate": 4.102362344869826e-08, "loss": 0.6398, "step": 7609 }, { "epoch": 0.97, "grad_norm": 0.4853968862577725, "learning_rate": 4.0650087289570986e-08, "loss": 0.6061, "step": 7610 }, { "epoch": 0.97, "grad_norm": 0.5319119858092063, "learning_rate": 4.0278256050151346e-08, "loss": 0.5988, "step": 7611 }, { "epoch": 0.97, "grad_norm": 0.48566816097235666, "learning_rate": 3.990812979409064e-08, "loss": 0.5939, "step": 7612 }, { "epoch": 0.97, "grad_norm": 0.4811210165031595, "learning_rate": 3.953970858475043e-08, "loss": 0.5993, "step": 7613 }, { "epoch": 0.97, "grad_norm": 0.4240492946538571, "learning_rate": 3.9172992485201344e-08, "loss": 0.5647, "step": 7614 }, { "epoch": 0.97, "grad_norm": 0.5138045567556009, "learning_rate": 3.880798155821763e-08, "loss": 0.574, "step": 7615 }, { "epoch": 0.97, "grad_norm": 0.4941835841910442, "learning_rate": 3.844467586628708e-08, "loss": 0.6052, "step": 7616 }, { "epoch": 0.97, "grad_norm": 0.4959378484877252, "learning_rate": 3.808307547160328e-08, "loss": 0.5932, "step": 7617 }, { "epoch": 0.97, "grad_norm": 0.47065225405139316, "learning_rate": 3.772318043606671e-08, "loss": 0.5676, "step": 7618 }, { "epoch": 0.97, "grad_norm": 0.5361895441665041, "learning_rate": 3.736499082128697e-08, "loss": 0.6302, "step": 7619 }, { "epoch": 0.97, "grad_norm": 0.5623302714421933, "learning_rate": 3.70085066885828e-08, "loss": 0.6287, "step": 7620 }, { "epoch": 0.97, "grad_norm": 0.4861423064323589, "learning_rate": 3.6653728098978716e-08, "loss": 0.6125, "step": 7621 }, { "epoch": 0.97, "grad_norm": 0.5412258682346373, "learning_rate": 3.630065511321057e-08, "loss": 0.6367, "step": 7622 }, { "epoch": 0.97, "grad_norm": 0.558974713056813, "learning_rate": 3.59492877917178e-08, "loss": 0.5998, "step": 7623 }, { "epoch": 0.97, "grad_norm": 0.5792166757077589, "learning_rate": 3.559962619465118e-08, "loss": 0.6344, "step": 7624 }, { "epoch": 0.97, "grad_norm": 0.4958749643827007, "learning_rate": 3.5251670381869496e-08, "loss": 0.6107, "step": 7625 }, { "epoch": 0.97, "grad_norm": 0.8781721275963212, "learning_rate": 3.490542041293621e-08, "loss": 0.5919, "step": 7626 }, { "epoch": 0.97, "grad_norm": 0.47795097044348583, "learning_rate": 3.456087634712724e-08, "loss": 0.6116, "step": 7627 }, { "epoch": 0.97, "grad_norm": 0.5505026640862422, "learning_rate": 3.421803824342429e-08, "loss": 0.6, "step": 7628 }, { "epoch": 0.97, "grad_norm": 0.5127941647535086, "learning_rate": 3.3876906160515975e-08, "loss": 0.57, "step": 7629 }, { "epoch": 0.97, "grad_norm": 0.48767407405310237, "learning_rate": 3.353748015680003e-08, "loss": 0.6074, "step": 7630 }, { "epoch": 0.97, "grad_norm": 0.5084859160052857, "learning_rate": 3.31997602903833e-08, "loss": 0.5986, "step": 7631 }, { "epoch": 0.97, "grad_norm": 0.553717210606296, "learning_rate": 3.286374661907732e-08, "loss": 0.5993, "step": 7632 }, { "epoch": 0.98, "grad_norm": 0.4851526642822787, "learning_rate": 3.252943920040497e-08, "loss": 0.5911, "step": 7633 }, { "epoch": 0.98, "grad_norm": 0.4840671406718824, "learning_rate": 3.2196838091596014e-08, "loss": 0.5838, "step": 7634 }, { "epoch": 0.98, "grad_norm": 0.5461362402761671, "learning_rate": 3.1865943349586036e-08, "loss": 0.6129, "step": 7635 }, { "epoch": 0.98, "grad_norm": 0.4836200215347429, "learning_rate": 3.153675503102194e-08, "loss": 0.5815, "step": 7636 }, { "epoch": 0.98, "grad_norm": 0.5579467957856392, "learning_rate": 3.120927319225642e-08, "loss": 0.6034, "step": 7637 }, { "epoch": 0.98, "grad_norm": 0.4919717980458213, "learning_rate": 3.088349788934908e-08, "loss": 0.594, "step": 7638 }, { "epoch": 0.98, "grad_norm": 0.5267073700487586, "learning_rate": 3.0559429178070867e-08, "loss": 0.5945, "step": 7639 }, { "epoch": 0.98, "grad_norm": 0.5076649662629008, "learning_rate": 3.023706711389629e-08, "loss": 0.6262, "step": 7640 }, { "epoch": 0.98, "grad_norm": 0.4629722724961795, "learning_rate": 2.9916411752011206e-08, "loss": 0.5738, "step": 7641 }, { "epoch": 0.98, "grad_norm": 0.4690035731625509, "learning_rate": 2.959746314730616e-08, "loss": 0.5535, "step": 7642 }, { "epoch": 0.98, "grad_norm": 0.5624694237188139, "learning_rate": 2.9280221354384132e-08, "loss": 0.6098, "step": 7643 }, { "epoch": 0.98, "grad_norm": 0.455341165685988, "learning_rate": 2.896468642755057e-08, "loss": 0.5575, "step": 7644 }, { "epoch": 0.98, "grad_norm": 0.4836038629769194, "learning_rate": 2.865085842082227e-08, "loss": 0.6253, "step": 7645 }, { "epoch": 0.98, "grad_norm": 0.48487732641595166, "learning_rate": 2.833873738792292e-08, "loss": 0.593, "step": 7646 }, { "epoch": 0.98, "grad_norm": 0.5069615246126634, "learning_rate": 2.8028323382283117e-08, "loss": 0.611, "step": 7647 }, { "epoch": 0.98, "grad_norm": 0.5078916839907979, "learning_rate": 2.7719616457042576e-08, "loss": 0.6094, "step": 7648 }, { "epoch": 0.98, "grad_norm": 0.480425137800022, "learning_rate": 2.7412616665047907e-08, "loss": 0.5647, "step": 7649 }, { "epoch": 0.98, "grad_norm": 0.5085994871048796, "learning_rate": 2.7107324058853745e-08, "loss": 0.6004, "step": 7650 }, { "epoch": 0.98, "grad_norm": 0.5060088757423604, "learning_rate": 2.6803738690722724e-08, "loss": 0.5988, "step": 7651 }, { "epoch": 0.98, "grad_norm": 0.5765136883590969, "learning_rate": 2.6501860612624387e-08, "loss": 0.6561, "step": 7652 }, { "epoch": 0.98, "grad_norm": 0.4954358731471952, "learning_rate": 2.620168987623739e-08, "loss": 0.5862, "step": 7653 }, { "epoch": 0.98, "grad_norm": 0.4968107236306722, "learning_rate": 2.5903226532946192e-08, "loss": 0.5987, "step": 7654 }, { "epoch": 0.98, "grad_norm": 0.46765126020192593, "learning_rate": 2.560647063384547e-08, "loss": 0.5839, "step": 7655 }, { "epoch": 0.98, "grad_norm": 0.4615406965608641, "learning_rate": 2.5311422229736815e-08, "loss": 0.5888, "step": 7656 }, { "epoch": 0.98, "grad_norm": 0.5083168277733715, "learning_rate": 2.501808137112649e-08, "loss": 0.6106, "step": 7657 }, { "epoch": 0.98, "grad_norm": 0.512900109172689, "learning_rate": 2.4726448108232103e-08, "loss": 0.6281, "step": 7658 }, { "epoch": 0.98, "grad_norm": 0.4874570066347455, "learning_rate": 2.4436522490977055e-08, "loss": 0.5851, "step": 7659 }, { "epoch": 0.98, "grad_norm": 0.5094120915938577, "learning_rate": 2.4148304568994974e-08, "loss": 0.5958, "step": 7660 }, { "epoch": 0.98, "grad_norm": 0.4998505289888279, "learning_rate": 2.3861794391623062e-08, "loss": 0.5884, "step": 7661 }, { "epoch": 0.98, "grad_norm": 0.5679483046922705, "learning_rate": 2.357699200790986e-08, "loss": 0.587, "step": 7662 }, { "epoch": 0.98, "grad_norm": 0.4716948755908058, "learning_rate": 2.3293897466609705e-08, "loss": 0.5891, "step": 7663 }, { "epoch": 0.98, "grad_norm": 0.5516900269087036, "learning_rate": 2.3012510816184942e-08, "loss": 0.5714, "step": 7664 }, { "epoch": 0.98, "grad_norm": 0.5063265117355523, "learning_rate": 2.2732832104804814e-08, "loss": 0.5873, "step": 7665 }, { "epoch": 0.98, "grad_norm": 0.455443020800473, "learning_rate": 2.245486138034769e-08, "loss": 0.5707, "step": 7666 }, { "epoch": 0.98, "grad_norm": 0.550540626886236, "learning_rate": 2.2178598690398844e-08, "loss": 0.6081, "step": 7667 }, { "epoch": 0.98, "grad_norm": 0.5127287674876846, "learning_rate": 2.1904044082250442e-08, "loss": 0.6057, "step": 7668 }, { "epoch": 0.98, "grad_norm": 0.5378117271385824, "learning_rate": 2.1631197602903775e-08, "loss": 0.5861, "step": 7669 }, { "epoch": 0.98, "grad_norm": 0.5140155069162733, "learning_rate": 2.1360059299067038e-08, "loss": 0.5627, "step": 7670 }, { "epoch": 0.98, "grad_norm": 0.47602136393337335, "learning_rate": 2.109062921715421e-08, "loss": 0.5758, "step": 7671 }, { "epoch": 0.98, "grad_norm": 0.49254144789555876, "learning_rate": 2.0822907403289516e-08, "loss": 0.5969, "step": 7672 }, { "epoch": 0.98, "grad_norm": 0.5011535012584057, "learning_rate": 2.0556893903304065e-08, "loss": 0.5906, "step": 7673 }, { "epoch": 0.98, "grad_norm": 0.5516276120042829, "learning_rate": 2.0292588762735877e-08, "loss": 0.6088, "step": 7674 }, { "epoch": 0.98, "grad_norm": 0.4511668494670884, "learning_rate": 2.002999202682987e-08, "loss": 0.59, "step": 7675 }, { "epoch": 0.98, "grad_norm": 0.47879094572284947, "learning_rate": 1.9769103740540087e-08, "loss": 0.5776, "step": 7676 }, { "epoch": 0.98, "grad_norm": 0.5263562650860227, "learning_rate": 1.9509923948527465e-08, "loss": 0.6016, "step": 7677 }, { "epoch": 0.98, "grad_norm": 0.4806055364168368, "learning_rate": 1.925245269515985e-08, "loss": 0.586, "step": 7678 }, { "epoch": 0.98, "grad_norm": 0.5097649635416798, "learning_rate": 1.899669002451532e-08, "loss": 0.5757, "step": 7679 }, { "epoch": 0.98, "grad_norm": 0.4886482140593663, "learning_rate": 1.8742635980374403e-08, "loss": 0.5714, "step": 7680 }, { "epoch": 0.98, "grad_norm": 0.5019601492782809, "learning_rate": 1.8490290606230088e-08, "loss": 0.5866, "step": 7681 }, { "epoch": 0.98, "grad_norm": 0.49535970849704475, "learning_rate": 1.8239653945280046e-08, "loss": 0.604, "step": 7682 }, { "epoch": 0.98, "grad_norm": 0.5489591595074269, "learning_rate": 1.7990726040429952e-08, "loss": 0.5667, "step": 7683 }, { "epoch": 0.98, "grad_norm": 0.49839758518599203, "learning_rate": 1.77435069342935e-08, "loss": 0.5897, "step": 7684 }, { "epoch": 0.98, "grad_norm": 0.5181032832144489, "learning_rate": 1.7497996669192386e-08, "loss": 0.5983, "step": 7685 }, { "epoch": 0.98, "grad_norm": 0.4644115749324498, "learning_rate": 1.7254195287154108e-08, "loss": 0.5738, "step": 7686 }, { "epoch": 0.98, "grad_norm": 0.44901572751425817, "learning_rate": 1.7012102829915278e-08, "loss": 0.5948, "step": 7687 }, { "epoch": 0.98, "grad_norm": 0.611019036518167, "learning_rate": 1.6771719338918303e-08, "loss": 0.6739, "step": 7688 }, { "epoch": 0.98, "grad_norm": 0.5377670343678634, "learning_rate": 1.6533044855313595e-08, "loss": 0.6475, "step": 7689 }, { "epoch": 0.98, "grad_norm": 0.48583435910588624, "learning_rate": 1.6296079419960697e-08, "loss": 0.5938, "step": 7690 }, { "epoch": 0.98, "grad_norm": 0.4827515625041817, "learning_rate": 1.6060823073424936e-08, "loss": 0.5667, "step": 7691 }, { "epoch": 0.98, "grad_norm": 0.483833587993642, "learning_rate": 1.582727585597854e-08, "loss": 0.5761, "step": 7692 }, { "epoch": 0.98, "grad_norm": 0.44780828397077405, "learning_rate": 1.5595437807601756e-08, "loss": 0.5722, "step": 7693 }, { "epoch": 0.98, "grad_norm": 0.4682790642306355, "learning_rate": 1.5365308967982828e-08, "loss": 0.5773, "step": 7694 }, { "epoch": 0.98, "grad_norm": 0.5077708437027888, "learning_rate": 1.5136889376518026e-08, "loss": 0.5825, "step": 7695 }, { "epoch": 0.98, "grad_norm": 0.5130277696234897, "learning_rate": 1.4910179072309406e-08, "loss": 0.5795, "step": 7696 }, { "epoch": 0.98, "grad_norm": 0.4577517628457763, "learning_rate": 1.468517809416592e-08, "loss": 0.5849, "step": 7697 }, { "epoch": 0.98, "grad_norm": 0.5048000589828627, "learning_rate": 1.4461886480605646e-08, "loss": 0.611, "step": 7698 }, { "epoch": 0.98, "grad_norm": 0.5447120789938652, "learning_rate": 1.4240304269854676e-08, "loss": 0.6146, "step": 7699 }, { "epoch": 0.98, "grad_norm": 0.4657777622096595, "learning_rate": 1.4020431499843778e-08, "loss": 0.5725, "step": 7700 }, { "epoch": 0.98, "grad_norm": 0.5144466578892192, "learning_rate": 1.3802268208212843e-08, "loss": 0.6227, "step": 7701 }, { "epoch": 0.98, "grad_norm": 0.47710997681734485, "learning_rate": 1.3585814432308664e-08, "loss": 0.5899, "step": 7702 }, { "epoch": 0.98, "grad_norm": 0.5363854691266409, "learning_rate": 1.3371070209184933e-08, "loss": 0.608, "step": 7703 }, { "epoch": 0.98, "grad_norm": 0.479827284884872, "learning_rate": 1.3158035575604466e-08, "loss": 0.5796, "step": 7704 }, { "epoch": 0.98, "grad_norm": 0.4835197070840931, "learning_rate": 1.2946710568035869e-08, "loss": 0.5652, "step": 7705 }, { "epoch": 0.98, "grad_norm": 0.49770171105903876, "learning_rate": 1.273709522265576e-08, "loss": 0.6273, "step": 7706 }, { "epoch": 0.98, "grad_norm": 0.5021816934948884, "learning_rate": 1.2529189575346545e-08, "loss": 0.5937, "step": 7707 }, { "epoch": 0.98, "grad_norm": 0.49710058362729886, "learning_rate": 1.2322993661699756e-08, "loss": 0.5776, "step": 7708 }, { "epoch": 0.98, "grad_norm": 0.48036901625361433, "learning_rate": 1.2118507517013822e-08, "loss": 0.5642, "step": 7709 }, { "epoch": 0.98, "grad_norm": 0.5801774736772292, "learning_rate": 1.1915731176295187e-08, "loss": 0.6234, "step": 7710 }, { "epoch": 0.99, "grad_norm": 0.49061863937403116, "learning_rate": 1.1714664674253861e-08, "loss": 0.5756, "step": 7711 }, { "epoch": 0.99, "grad_norm": 0.44681731137302416, "learning_rate": 1.1515308045312312e-08, "loss": 0.5668, "step": 7712 }, { "epoch": 0.99, "grad_norm": 0.4635692870580581, "learning_rate": 1.1317661323597683e-08, "loss": 0.603, "step": 7713 }, { "epoch": 0.99, "grad_norm": 0.4809128685741001, "learning_rate": 1.1121724542945133e-08, "loss": 0.5458, "step": 7714 }, { "epoch": 0.99, "grad_norm": 0.543783500696441, "learning_rate": 1.092749773689561e-08, "loss": 0.633, "step": 7715 }, { "epoch": 0.99, "grad_norm": 0.5089675684147639, "learning_rate": 1.073498093869807e-08, "loss": 0.6171, "step": 7716 }, { "epoch": 0.99, "grad_norm": 0.4849851172709452, "learning_rate": 1.0544174181310596e-08, "loss": 0.5974, "step": 7717 }, { "epoch": 0.99, "grad_norm": 0.48529172578642565, "learning_rate": 1.0355077497394838e-08, "loss": 0.5803, "step": 7718 }, { "epoch": 0.99, "grad_norm": 0.47565156637006606, "learning_rate": 1.0167690919324902e-08, "loss": 0.5851, "step": 7719 }, { "epoch": 0.99, "grad_norm": 0.5001974851489497, "learning_rate": 9.982014479176239e-09, "loss": 0.6285, "step": 7720 }, { "epoch": 0.99, "grad_norm": 0.5238255342078962, "learning_rate": 9.798048208735644e-09, "loss": 0.5926, "step": 7721 }, { "epoch": 0.99, "grad_norm": 0.5215769048157126, "learning_rate": 9.615792139495705e-09, "loss": 0.5873, "step": 7722 }, { "epoch": 0.99, "grad_norm": 0.44285868072818957, "learning_rate": 9.435246302655909e-09, "loss": 0.5482, "step": 7723 }, { "epoch": 0.99, "grad_norm": 0.5194339016541122, "learning_rate": 9.256410729124864e-09, "loss": 0.6243, "step": 7724 }, { "epoch": 0.99, "grad_norm": 0.47455752852821376, "learning_rate": 9.079285449515862e-09, "loss": 0.585, "step": 7725 }, { "epoch": 0.99, "grad_norm": 0.4667280528370166, "learning_rate": 8.903870494151312e-09, "loss": 0.5677, "step": 7726 }, { "epoch": 0.99, "grad_norm": 0.5260299884596572, "learning_rate": 8.730165893060526e-09, "loss": 0.6155, "step": 7727 }, { "epoch": 0.99, "grad_norm": 0.507388261530197, "learning_rate": 8.5581716759775e-09, "loss": 0.601, "step": 7728 }, { "epoch": 0.99, "grad_norm": 0.4989631974133916, "learning_rate": 8.387887872347567e-09, "loss": 0.6137, "step": 7729 }, { "epoch": 0.99, "grad_norm": 0.5512673934135139, "learning_rate": 8.219314511320743e-09, "loss": 0.6374, "step": 7730 }, { "epoch": 0.99, "grad_norm": 0.5348639134066804, "learning_rate": 8.052451621755054e-09, "loss": 0.641, "step": 7731 }, { "epoch": 0.99, "grad_norm": 0.481582393923049, "learning_rate": 7.887299232214318e-09, "loss": 0.6039, "step": 7732 }, { "epoch": 0.99, "grad_norm": 0.4680825452597562, "learning_rate": 7.723857370971477e-09, "loss": 0.5932, "step": 7733 }, { "epoch": 0.99, "grad_norm": 0.5179042003667285, "learning_rate": 7.56212606600637e-09, "loss": 0.6017, "step": 7734 }, { "epoch": 0.99, "grad_norm": 0.5375869594797333, "learning_rate": 7.4021053450035185e-09, "loss": 0.6384, "step": 7735 }, { "epoch": 0.99, "grad_norm": 0.5502510969481009, "learning_rate": 7.243795235357676e-09, "loss": 0.6006, "step": 7736 }, { "epoch": 0.99, "grad_norm": 0.557223075318479, "learning_rate": 7.087195764170496e-09, "loss": 0.6185, "step": 7737 }, { "epoch": 0.99, "grad_norm": 0.4916103976469746, "learning_rate": 6.932306958248314e-09, "loss": 0.5876, "step": 7738 }, { "epoch": 0.99, "grad_norm": 0.4525040017028575, "learning_rate": 6.779128844107696e-09, "loss": 0.5625, "step": 7739 }, { "epoch": 0.99, "grad_norm": 0.5116577035495898, "learning_rate": 6.627661447969891e-09, "loss": 0.6011, "step": 7740 }, { "epoch": 0.99, "grad_norm": 0.44917662589694485, "learning_rate": 6.477904795764156e-09, "loss": 0.5586, "step": 7741 }, { "epoch": 0.99, "grad_norm": 0.5097626184692299, "learning_rate": 6.3298589131266515e-09, "loss": 0.5852, "step": 7742 }, { "epoch": 0.99, "grad_norm": 0.5011100708649724, "learning_rate": 6.18352382540266e-09, "loss": 0.5895, "step": 7743 }, { "epoch": 0.99, "grad_norm": 0.5154967093079603, "learning_rate": 6.038899557642142e-09, "loss": 0.603, "step": 7744 }, { "epoch": 0.99, "grad_norm": 0.5297148144554703, "learning_rate": 5.895986134603071e-09, "loss": 0.6159, "step": 7745 }, { "epoch": 0.99, "grad_norm": 0.48699821069989335, "learning_rate": 5.754783580749212e-09, "loss": 0.5858, "step": 7746 }, { "epoch": 0.99, "grad_norm": 0.4925515521683665, "learning_rate": 5.615291920254562e-09, "loss": 0.6008, "step": 7747 }, { "epoch": 0.99, "grad_norm": 0.510755504023902, "learning_rate": 5.477511176997796e-09, "loss": 0.6207, "step": 7748 }, { "epoch": 0.99, "grad_norm": 0.5150917344409647, "learning_rate": 5.341441374564494e-09, "loss": 0.6123, "step": 7749 }, { "epoch": 0.99, "grad_norm": 0.47235618854588113, "learning_rate": 5.207082536249353e-09, "loss": 0.617, "step": 7750 }, { "epoch": 0.99, "grad_norm": 0.5268552825036505, "learning_rate": 5.074434685052865e-09, "loss": 0.581, "step": 7751 }, { "epoch": 0.99, "grad_norm": 0.5111624356635587, "learning_rate": 4.943497843681311e-09, "loss": 0.6147, "step": 7752 }, { "epoch": 0.99, "grad_norm": 0.5729879399449863, "learning_rate": 4.814272034551204e-09, "loss": 0.6263, "step": 7753 }, { "epoch": 0.99, "grad_norm": 0.4942589785391733, "learning_rate": 4.686757279783738e-09, "loss": 0.5933, "step": 7754 }, { "epoch": 0.99, "grad_norm": 0.488247287414191, "learning_rate": 4.5609536012070074e-09, "loss": 0.5871, "step": 7755 }, { "epoch": 0.99, "grad_norm": 0.5509833670696273, "learning_rate": 4.43686102035934e-09, "loss": 0.6089, "step": 7756 }, { "epoch": 0.99, "grad_norm": 0.4644890194853629, "learning_rate": 4.314479558481522e-09, "loss": 0.5625, "step": 7757 }, { "epoch": 0.99, "grad_norm": 0.5306381775708047, "learning_rate": 4.193809236525681e-09, "loss": 0.579, "step": 7758 }, { "epoch": 0.99, "grad_norm": 0.5096440851629784, "learning_rate": 4.0748500751475184e-09, "loss": 0.6022, "step": 7759 }, { "epoch": 0.99, "grad_norm": 0.550959597949576, "learning_rate": 3.9576020947118545e-09, "loss": 0.6165, "step": 7760 }, { "epoch": 0.99, "grad_norm": 0.6249082311171005, "learning_rate": 3.842065315290411e-09, "loss": 0.6612, "step": 7761 }, { "epoch": 0.99, "grad_norm": 0.499680691766629, "learning_rate": 3.7282397566618114e-09, "loss": 0.5969, "step": 7762 }, { "epoch": 0.99, "grad_norm": 0.4757555377863335, "learning_rate": 3.6161254383104695e-09, "loss": 0.5771, "step": 7763 }, { "epoch": 0.99, "grad_norm": 0.5575797239380036, "learning_rate": 3.5057223794310313e-09, "loss": 0.6154, "step": 7764 }, { "epoch": 0.99, "grad_norm": 0.5190821153710827, "learning_rate": 3.3970305989217134e-09, "loss": 0.6104, "step": 7765 }, { "epoch": 0.99, "grad_norm": 0.49164942714659465, "learning_rate": 3.2900501153898536e-09, "loss": 0.588, "step": 7766 }, { "epoch": 0.99, "grad_norm": 0.4427514196987971, "learning_rate": 3.1847809471485804e-09, "loss": 0.5894, "step": 7767 }, { "epoch": 0.99, "grad_norm": 0.5042555963507348, "learning_rate": 3.0812231122179238e-09, "loss": 0.5913, "step": 7768 }, { "epoch": 0.99, "grad_norm": 0.5347694936502536, "learning_rate": 2.9793766283281458e-09, "loss": 0.6077, "step": 7769 }, { "epoch": 0.99, "grad_norm": 0.457380008695075, "learning_rate": 2.879241512913078e-09, "loss": 0.5757, "step": 7770 }, { "epoch": 0.99, "grad_norm": 0.49907194777610614, "learning_rate": 2.780817783113454e-09, "loss": 0.5861, "step": 7771 }, { "epoch": 0.99, "grad_norm": 0.44278310823773964, "learning_rate": 2.684105455779129e-09, "loss": 0.5762, "step": 7772 }, { "epoch": 0.99, "grad_norm": 0.5121475391463276, "learning_rate": 2.5891045474668585e-09, "loss": 0.5795, "step": 7773 }, { "epoch": 0.99, "grad_norm": 0.5511260345623415, "learning_rate": 2.4958150744380795e-09, "loss": 0.6268, "step": 7774 }, { "epoch": 0.99, "grad_norm": 0.5023450289916153, "learning_rate": 2.40423705266446e-09, "loss": 0.5728, "step": 7775 }, { "epoch": 0.99, "grad_norm": 0.4807604570256495, "learning_rate": 2.314370497821239e-09, "loss": 0.5606, "step": 7776 }, { "epoch": 0.99, "grad_norm": 0.5063367121727509, "learning_rate": 2.2262154252938873e-09, "loss": 0.5703, "step": 7777 }, { "epoch": 0.99, "grad_norm": 0.5421981847969982, "learning_rate": 2.139771850172556e-09, "loss": 0.6076, "step": 7778 }, { "epoch": 0.99, "grad_norm": 0.4905917731334263, "learning_rate": 2.0550397872565187e-09, "loss": 0.5879, "step": 7779 }, { "epoch": 0.99, "grad_norm": 0.47750513658883514, "learning_rate": 1.9720192510497283e-09, "loss": 0.599, "step": 7780 }, { "epoch": 0.99, "grad_norm": 0.4634380326408872, "learning_rate": 1.8907102557652603e-09, "loss": 0.5758, "step": 7781 }, { "epoch": 0.99, "grad_norm": 0.5648017489105038, "learning_rate": 1.8111128153208702e-09, "loss": 0.6301, "step": 7782 }, { "epoch": 0.99, "grad_norm": 0.4776702426601703, "learning_rate": 1.7332269433445458e-09, "loss": 0.5849, "step": 7783 }, { "epoch": 0.99, "grad_norm": 0.5121459450016697, "learning_rate": 1.6570526531667352e-09, "loss": 0.5859, "step": 7784 }, { "epoch": 0.99, "grad_norm": 0.6172570154180921, "learning_rate": 1.5825899578303384e-09, "loss": 0.6095, "step": 7785 }, { "epoch": 0.99, "grad_norm": 0.4856127915164475, "learning_rate": 1.5098388700807154e-09, "loss": 0.5624, "step": 7786 }, { "epoch": 0.99, "grad_norm": 0.48211327788499814, "learning_rate": 1.4387994023734585e-09, "loss": 0.5899, "step": 7787 }, { "epoch": 0.99, "grad_norm": 0.5024260383390002, "learning_rate": 1.3694715668677305e-09, "loss": 0.5892, "step": 7788 }, { "epoch": 1.0, "grad_norm": 0.542737700445468, "learning_rate": 1.3018553754329255e-09, "loss": 0.5834, "step": 7789 }, { "epoch": 1.0, "grad_norm": 0.48520579334990566, "learning_rate": 1.2359508396442289e-09, "loss": 0.5741, "step": 7790 }, { "epoch": 1.0, "grad_norm": 0.5018941029406425, "learning_rate": 1.1717579707826166e-09, "loss": 0.5749, "step": 7791 }, { "epoch": 1.0, "grad_norm": 0.4795414642039698, "learning_rate": 1.1092767798381866e-09, "loss": 0.6139, "step": 7792 }, { "epoch": 1.0, "grad_norm": 0.4898726440265662, "learning_rate": 1.0485072775068272e-09, "loss": 0.5871, "step": 7793 }, { "epoch": 1.0, "grad_norm": 0.5279842974556178, "learning_rate": 9.894494741913286e-10, "loss": 0.6485, "step": 7794 }, { "epoch": 1.0, "grad_norm": 0.46629474297352946, "learning_rate": 9.321033800013813e-10, "loss": 0.5759, "step": 7795 }, { "epoch": 1.0, "grad_norm": 0.5379051481640914, "learning_rate": 8.764690047535773e-10, "loss": 0.638, "step": 7796 }, { "epoch": 1.0, "grad_norm": 0.4605295105189852, "learning_rate": 8.225463579725202e-10, "loss": 0.5687, "step": 7797 }, { "epoch": 1.0, "grad_norm": 0.4640285593738485, "learning_rate": 7.703354488897141e-10, "loss": 0.5764, "step": 7798 }, { "epoch": 1.0, "grad_norm": 0.4603251628946631, "learning_rate": 7.198362864424546e-10, "loss": 0.5717, "step": 7799 }, { "epoch": 1.0, "grad_norm": 0.5343268559956881, "learning_rate": 6.710488792749381e-10, "loss": 0.6064, "step": 7800 }, { "epoch": 1.0, "grad_norm": 0.4897408885044689, "learning_rate": 6.239732357393724e-10, "loss": 0.5978, "step": 7801 }, { "epoch": 1.0, "grad_norm": 0.5130916083778227, "learning_rate": 5.786093638948664e-10, "loss": 0.5916, "step": 7802 }, { "epoch": 1.0, "grad_norm": 0.4679507087581497, "learning_rate": 5.349572715074303e-10, "loss": 0.5718, "step": 7803 }, { "epoch": 1.0, "grad_norm": 0.5697831097838834, "learning_rate": 4.930169660488649e-10, "loss": 0.6193, "step": 7804 }, { "epoch": 1.0, "grad_norm": 1.0287886413409857, "learning_rate": 4.5278845469898245e-10, "loss": 0.5583, "step": 7805 }, { "epoch": 1.0, "grad_norm": 0.5566275441554632, "learning_rate": 4.142717443456068e-10, "loss": 0.6001, "step": 7806 }, { "epoch": 1.0, "grad_norm": 0.48864241487668103, "learning_rate": 3.7746684158124216e-10, "loss": 0.6079, "step": 7807 }, { "epoch": 1.0, "grad_norm": 0.5156862358547846, "learning_rate": 3.4237375270640415e-10, "loss": 0.5892, "step": 7808 }, { "epoch": 1.0, "grad_norm": 0.5204894971823397, "learning_rate": 3.0899248372850965e-10, "loss": 0.6015, "step": 7809 }, { "epoch": 1.0, "grad_norm": 0.48353776335773796, "learning_rate": 2.7732304036298673e-10, "loss": 0.6096, "step": 7810 }, { "epoch": 1.0, "grad_norm": 0.5078026582536105, "learning_rate": 2.473654280299442e-10, "loss": 0.593, "step": 7811 }, { "epoch": 1.0, "grad_norm": 0.5106923367346852, "learning_rate": 2.1911965185972273e-10, "loss": 0.5871, "step": 7812 }, { "epoch": 1.0, "grad_norm": 0.4999674524921842, "learning_rate": 1.9258571668512306e-10, "loss": 0.6055, "step": 7813 }, { "epoch": 1.0, "grad_norm": 0.5106683040855352, "learning_rate": 1.6776362705028804e-10, "loss": 0.6305, "step": 7814 }, { "epoch": 1.0, "grad_norm": 0.48572426926328577, "learning_rate": 1.446533872040412e-10, "loss": 0.5931, "step": 7815 }, { "epoch": 1.0, "grad_norm": 0.5034094382491632, "learning_rate": 1.232550011021072e-10, "loss": 0.6163, "step": 7816 }, { "epoch": 1.0, "grad_norm": 0.5925113769391236, "learning_rate": 1.0356847240822199e-10, "loss": 0.5964, "step": 7817 }, { "epoch": 1.0, "grad_norm": 0.4914554976042341, "learning_rate": 8.559380449191246e-11, "loss": 0.5773, "step": 7818 }, { "epoch": 1.0, "grad_norm": 0.4870376799357439, "learning_rate": 6.933100042960661e-11, "loss": 0.5707, "step": 7819 }, { "epoch": 1.0, "grad_norm": 0.48774985212857896, "learning_rate": 5.478006300796423e-11, "loss": 0.5848, "step": 7820 }, { "epoch": 1.0, "grad_norm": 0.4896867574203753, "learning_rate": 4.194099471499513e-11, "loss": 0.6049, "step": 7821 }, { "epoch": 1.0, "grad_norm": 0.44697079161501285, "learning_rate": 3.081379775005111e-11, "loss": 0.5527, "step": 7822 }, { "epoch": 1.0, "grad_norm": 0.4700355175165198, "learning_rate": 2.139847401716466e-11, "loss": 0.5995, "step": 7823 }, { "epoch": 1.0, "grad_norm": 0.4961279226046673, "learning_rate": 1.3695025129489836e-11, "loss": 0.5714, "step": 7824 }, { "epoch": 1.0, "grad_norm": 0.5086446719973264, "learning_rate": 7.703452404861367e-12, "loss": 0.5956, "step": 7825 }, { "epoch": 1.0, "grad_norm": 0.46831032388512833, "learning_rate": 3.4237568680151045e-12, "loss": 0.5851, "step": 7826 }, { "epoch": 1.0, "grad_norm": 0.5004729199342829, "learning_rate": 8.559392539186917e-13, "loss": 0.5879, "step": 7827 }, { "epoch": 1.0, "grad_norm": 0.470615412940254, "learning_rate": 0.0, "loss": 0.5581, "step": 7828 }, { "epoch": 1.0, "step": 7828, "total_flos": 0.0, "train_loss": 0.29682457268877666, "train_runtime": 13573.7773, "train_samples_per_second": 148.76, "train_steps_per_second": 0.577 } ], "logging_steps": 1.0, "max_steps": 7828, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }