{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1251700680272109, "eval_steps": 500, "global_step": 529, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 2.2147, "step": 1 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 2.4802, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 2.0475, "step": 3 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.6983, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 1.6212, "step": 5 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 2.0843, "step": 6 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 1.8442, "step": 7 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.3359, "step": 8 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 2.3761, "step": 9 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.6338, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.999999826891235e-05, "loss": 1.7487, "step": 11 }, { "epoch": 0.0, "learning_rate": 4.999999307564964e-05, "loss": 2.0834, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.9999984420212596e-05, "loss": 1.5794, "step": 13 }, { "epoch": 0.0, "learning_rate": 4.999997230260242e-05, "loss": 1.7222, "step": 14 }, { "epoch": 0.0, "learning_rate": 4.9999956722820765e-05, "loss": 1.4002, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.999993768086981e-05, "loss": 1.7314, "step": 16 }, { "epoch": 0.0, "learning_rate": 4.999991517675219e-05, "loss": 1.7053, "step": 17 }, { "epoch": 0.0, "learning_rate": 4.999988921047102e-05, "loss": 1.9546, "step": 18 }, { "epoch": 0.0, "learning_rate": 4.99998597820299e-05, "loss": 1.6122, "step": 19 }, { "epoch": 0.0, "learning_rate": 4.9999826891432896e-05, "loss": 1.7275, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.999979053868456e-05, "loss": 1.8279, "step": 21 }, { "epoch": 0.01, "learning_rate": 4.9999750723789936e-05, "loss": 1.6764, "step": 22 }, { "epoch": 0.01, "learning_rate": 4.9999707446754546e-05, "loss": 1.7501, "step": 23 }, { "epoch": 0.01, "learning_rate": 4.999966070758437e-05, "loss": 1.6349, "step": 24 }, { "epoch": 0.01, "learning_rate": 4.999961050628588e-05, "loss": 1.6796, "step": 25 }, { "epoch": 0.01, "learning_rate": 4.9999556842866034e-05, "loss": 1.6249, "step": 26 }, { "epoch": 0.01, "learning_rate": 4.999949971733225e-05, "loss": 2.0866, "step": 27 }, { "epoch": 0.01, "learning_rate": 4.999943912969247e-05, "loss": 1.6658, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.999937507995506e-05, "loss": 1.7794, "step": 29 }, { "epoch": 0.01, "learning_rate": 4.999930756812889e-05, "loss": 1.743, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999923659422332e-05, "loss": 2.1359, "step": 31 }, { "epoch": 0.01, "learning_rate": 4.999916215824817e-05, "loss": 1.7393, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.999908426021375e-05, "loss": 1.6387, "step": 33 }, { "epoch": 0.01, "learning_rate": 4.999900290013085e-05, "loss": 1.974, "step": 34 }, { "epoch": 0.01, "learning_rate": 4.999891807801075e-05, "loss": 1.7787, "step": 35 }, { "epoch": 0.01, "learning_rate": 4.9998829793865176e-05, "loss": 2.1738, "step": 36 }, { "epoch": 0.01, "learning_rate": 4.999873804770636e-05, "loss": 1.73, "step": 37 }, { "epoch": 0.01, "learning_rate": 4.999864283954701e-05, "loss": 1.9363, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.999854416940032e-05, "loss": 1.248, "step": 39 }, { "epoch": 0.01, "learning_rate": 4.999844203727993e-05, "loss": 2.0461, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999833644320002e-05, "loss": 2.1294, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.999822738717518e-05, "loss": 1.3963, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.9998114869220525e-05, "loss": 1.7942, "step": 43 }, { "epoch": 0.01, "learning_rate": 4.999799888935164e-05, "loss": 1.4804, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.99978794475846e-05, "loss": 1.4482, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.999775654393591e-05, "loss": 1.0604, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.9997630178422624e-05, "loss": 1.6902, "step": 47 }, { "epoch": 0.01, "learning_rate": 4.999750035106222e-05, "loss": 1.4982, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.9997367061872694e-05, "loss": 2.0546, "step": 49 }, { "epoch": 0.01, "learning_rate": 4.99972303108725e-05, "loss": 1.581, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.999709009808057e-05, "loss": 1.4683, "step": 51 }, { "epoch": 0.01, "learning_rate": 4.9996946423516326e-05, "loss": 1.9496, "step": 52 }, { "epoch": 0.01, "learning_rate": 4.9996799287199666e-05, "loss": 1.3316, "step": 53 }, { "epoch": 0.01, "learning_rate": 4.9996648689150966e-05, "loss": 2.0274, "step": 54 }, { "epoch": 0.01, "learning_rate": 4.9996494629391076e-05, "loss": 2.1621, "step": 55 }, { "epoch": 0.01, "learning_rate": 4.999633710794135e-05, "loss": 1.6314, "step": 56 }, { "epoch": 0.01, "learning_rate": 4.999617612482358e-05, "loss": 1.8074, "step": 57 }, { "epoch": 0.01, "learning_rate": 4.9996011680060064e-05, "loss": 1.8467, "step": 58 }, { "epoch": 0.01, "learning_rate": 4.999584377367359e-05, "loss": 1.2585, "step": 59 }, { "epoch": 0.01, "learning_rate": 4.99956724056874e-05, "loss": 1.8106, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.9995497576125225e-05, "loss": 1.7955, "step": 61 }, { "epoch": 0.01, "learning_rate": 4.999531928501128e-05, "loss": 1.9608, "step": 62 }, { "epoch": 0.01, "learning_rate": 4.999513753237025e-05, "loss": 2.1124, "step": 63 }, { "epoch": 0.02, "learning_rate": 4.999495231822732e-05, "loss": 1.8793, "step": 64 }, { "epoch": 0.02, "learning_rate": 4.999476364260812e-05, "loss": 1.6794, "step": 65 }, { "epoch": 0.02, "learning_rate": 4.99945715055388e-05, "loss": 1.7991, "step": 66 }, { "epoch": 0.02, "learning_rate": 4.999437590704595e-05, "loss": 2.0194, "step": 67 }, { "epoch": 0.02, "learning_rate": 4.999417684715668e-05, "loss": 1.4938, "step": 68 }, { "epoch": 0.02, "learning_rate": 4.999397432589853e-05, "loss": 1.9159, "step": 69 }, { "epoch": 0.02, "learning_rate": 4.999376834329956e-05, "loss": 1.3969, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.9993558899388305e-05, "loss": 1.727, "step": 71 }, { "epoch": 0.02, "learning_rate": 4.999334599419375e-05, "loss": 1.8063, "step": 72 }, { "epoch": 0.02, "learning_rate": 4.99931296277454e-05, "loss": 1.9437, "step": 73 }, { "epoch": 0.02, "learning_rate": 4.999290980007321e-05, "loss": 1.5978, "step": 74 }, { "epoch": 0.02, "learning_rate": 4.9992686511207614e-05, "loss": 1.9653, "step": 75 }, { "epoch": 0.02, "learning_rate": 4.9992459761179545e-05, "loss": 2.2229, "step": 76 }, { "epoch": 0.02, "learning_rate": 4.999222955002041e-05, "loss": 1.7991, "step": 77 }, { "epoch": 0.02, "learning_rate": 4.9991995877762074e-05, "loss": 2.0293, "step": 78 }, { "epoch": 0.02, "learning_rate": 4.999175874443692e-05, "loss": 1.5131, "step": 79 }, { "epoch": 0.02, "learning_rate": 4.9991518150077765e-05, "loss": 1.41, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.999127409471794e-05, "loss": 2.0329, "step": 81 }, { "epoch": 0.02, "learning_rate": 4.9991026578391245e-05, "loss": 1.7566, "step": 82 }, { "epoch": 0.02, "learning_rate": 4.999077560113196e-05, "loss": 2.1238, "step": 83 }, { "epoch": 0.02, "learning_rate": 4.9990521162974824e-05, "loss": 1.4365, "step": 84 }, { "epoch": 0.02, "learning_rate": 4.999026326395509e-05, "loss": 1.7559, "step": 85 }, { "epoch": 0.02, "learning_rate": 4.999000190410848e-05, "loss": 2.3886, "step": 86 }, { "epoch": 0.02, "learning_rate": 4.998973708347116e-05, "loss": 1.5783, "step": 87 }, { "epoch": 0.02, "learning_rate": 4.998946880207983e-05, "loss": 2.0597, "step": 88 }, { "epoch": 0.02, "learning_rate": 4.998919705997164e-05, "loss": 1.684, "step": 89 }, { "epoch": 0.02, "learning_rate": 4.998892185718422e-05, "loss": 1.5005, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.998864319375568e-05, "loss": 1.8982, "step": 91 }, { "epoch": 0.02, "learning_rate": 4.998836106972461e-05, "loss": 2.4997, "step": 92 }, { "epoch": 0.02, "learning_rate": 4.998807548513008e-05, "loss": 1.9038, "step": 93 }, { "epoch": 0.02, "learning_rate": 4.998778644001165e-05, "loss": 1.4326, "step": 94 }, { "epoch": 0.02, "learning_rate": 4.998749393440933e-05, "loss": 1.9809, "step": 95 }, { "epoch": 0.02, "learning_rate": 4.998719796836366e-05, "loss": 1.727, "step": 96 }, { "epoch": 0.02, "learning_rate": 4.9986898541915595e-05, "loss": 1.5198, "step": 97 }, { "epoch": 0.02, "learning_rate": 4.998659565510662e-05, "loss": 1.7004, "step": 98 }, { "epoch": 0.02, "learning_rate": 4.998628930797866e-05, "loss": 1.7584, "step": 99 }, { "epoch": 0.02, "learning_rate": 4.9985979500574166e-05, "loss": 1.7247, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.998566623293603e-05, "loss": 1.7967, "step": 101 }, { "epoch": 0.02, "learning_rate": 4.998534950510764e-05, "loss": 1.282, "step": 102 }, { "epoch": 0.02, "learning_rate": 4.9985029317132845e-05, "loss": 1.7519, "step": 103 }, { "epoch": 0.02, "learning_rate": 4.998470566905601e-05, "loss": 1.8924, "step": 104 }, { "epoch": 0.02, "learning_rate": 4.9984378560921937e-05, "loss": 1.411, "step": 105 }, { "epoch": 0.03, "learning_rate": 4.9984047992775926e-05, "loss": 1.7251, "step": 106 }, { "epoch": 0.03, "learning_rate": 4.9983713964663776e-05, "loss": 1.6784, "step": 107 }, { "epoch": 0.03, "learning_rate": 4.998337647663173e-05, "loss": 1.9045, "step": 108 }, { "epoch": 0.03, "learning_rate": 4.998303552872652e-05, "loss": 1.1327, "step": 109 }, { "epoch": 0.03, "learning_rate": 4.998269112099538e-05, "loss": 1.6382, "step": 110 }, { "epoch": 0.03, "learning_rate": 4.998234325348599e-05, "loss": 1.8228, "step": 111 }, { "epoch": 0.03, "learning_rate": 4.998199192624654e-05, "loss": 1.9773, "step": 112 }, { "epoch": 0.03, "learning_rate": 4.998163713932567e-05, "loss": 1.6035, "step": 113 }, { "epoch": 0.03, "learning_rate": 4.998127889277252e-05, "loss": 2.3095, "step": 114 }, { "epoch": 0.03, "learning_rate": 4.998091718663671e-05, "loss": 1.4727, "step": 115 }, { "epoch": 0.03, "learning_rate": 4.998055202096832e-05, "loss": 1.8166, "step": 116 }, { "epoch": 0.03, "learning_rate": 4.998018339581792e-05, "loss": 1.2842, "step": 117 }, { "epoch": 0.03, "learning_rate": 4.997981131123657e-05, "loss": 1.8485, "step": 118 }, { "epoch": 0.03, "learning_rate": 4.997943576727579e-05, "loss": 1.1914, "step": 119 }, { "epoch": 0.03, "learning_rate": 4.997905676398759e-05, "loss": 1.9409, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.997867430142446e-05, "loss": 2.3757, "step": 121 }, { "epoch": 0.03, "learning_rate": 4.997828837963937e-05, "loss": 1.6337, "step": 122 }, { "epoch": 0.03, "learning_rate": 4.997789899868575e-05, "loss": 1.5789, "step": 123 }, { "epoch": 0.03, "learning_rate": 4.9977506158617535e-05, "loss": 1.61, "step": 124 }, { "epoch": 0.03, "learning_rate": 4.9977109859489133e-05, "loss": 1.3976, "step": 125 }, { "epoch": 0.03, "learning_rate": 4.997671010135542e-05, "loss": 1.3944, "step": 126 }, { "epoch": 0.03, "learning_rate": 4.997630688427176e-05, "loss": 1.5922, "step": 127 }, { "epoch": 0.03, "learning_rate": 4.9975900208293984e-05, "loss": 2.0075, "step": 128 }, { "epoch": 0.03, "learning_rate": 4.997549007347842e-05, "loss": 1.4363, "step": 129 }, { "epoch": 0.03, "learning_rate": 4.9975076479881864e-05, "loss": 1.9972, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.99746594275616e-05, "loss": 1.8052, "step": 131 }, { "epoch": 0.03, "learning_rate": 4.997423891657538e-05, "loss": 1.9088, "step": 132 }, { "epoch": 0.03, "learning_rate": 4.997381494698143e-05, "loss": 1.7322, "step": 133 }, { "epoch": 0.03, "learning_rate": 4.9973387518838474e-05, "loss": 1.6391, "step": 134 }, { "epoch": 0.03, "learning_rate": 4.9972956632205704e-05, "loss": 1.8346, "step": 135 }, { "epoch": 0.03, "learning_rate": 4.997252228714279e-05, "loss": 1.9651, "step": 136 }, { "epoch": 0.03, "learning_rate": 4.997208448370988e-05, "loss": 1.5544, "step": 137 }, { "epoch": 0.03, "learning_rate": 4.997164322196762e-05, "loss": 1.4443, "step": 138 }, { "epoch": 0.03, "learning_rate": 4.99711985019771e-05, "loss": 2.1679, "step": 139 }, { "epoch": 0.03, "learning_rate": 4.997075032379992e-05, "loss": 1.6557, "step": 140 }, { "epoch": 0.03, "learning_rate": 4.997029868749813e-05, "loss": 1.5759, "step": 141 }, { "epoch": 0.03, "learning_rate": 4.99698435931343e-05, "loss": 1.8057, "step": 142 }, { "epoch": 0.03, "learning_rate": 4.9969385040771445e-05, "loss": 1.1004, "step": 143 }, { "epoch": 0.03, "learning_rate": 4.996892303047306e-05, "loss": 1.6915, "step": 144 }, { "epoch": 0.03, "learning_rate": 4.996845756230314e-05, "loss": 1.7905, "step": 145 }, { "epoch": 0.03, "learning_rate": 4.9967988636326136e-05, "loss": 2.0294, "step": 146 }, { "epoch": 0.03, "learning_rate": 4.996751625260699e-05, "loss": 1.9933, "step": 147 }, { "epoch": 0.04, "learning_rate": 4.996704041121113e-05, "loss": 1.5048, "step": 148 }, { "epoch": 0.04, "learning_rate": 4.996656111220443e-05, "loss": 2.0881, "step": 149 }, { "epoch": 0.04, "learning_rate": 4.996607835565331e-05, "loss": 1.3425, "step": 150 }, { "epoch": 0.04, "learning_rate": 4.9965592141624586e-05, "loss": 1.3072, "step": 151 }, { "epoch": 0.04, "learning_rate": 4.9965102470185596e-05, "loss": 2.5566, "step": 152 }, { "epoch": 0.04, "learning_rate": 4.9964609341404175e-05, "loss": 1.3695, "step": 153 }, { "epoch": 0.04, "learning_rate": 4.99641127553486e-05, "loss": 2.0205, "step": 154 }, { "epoch": 0.04, "learning_rate": 4.9963612712087646e-05, "loss": 2.1791, "step": 155 }, { "epoch": 0.04, "learning_rate": 4.996310921169056e-05, "loss": 1.6322, "step": 156 }, { "epoch": 0.04, "learning_rate": 4.996260225422707e-05, "loss": 1.8695, "step": 157 }, { "epoch": 0.04, "learning_rate": 4.9962091839767386e-05, "loss": 2.1406, "step": 158 }, { "epoch": 0.04, "learning_rate": 4.99615779683822e-05, "loss": 1.7668, "step": 159 }, { "epoch": 0.04, "learning_rate": 4.996106064014265e-05, "loss": 1.6439, "step": 160 }, { "epoch": 0.04, "learning_rate": 4.9960539855120415e-05, "loss": 1.5069, "step": 161 }, { "epoch": 0.04, "learning_rate": 4.996001561338759e-05, "loss": 1.9099, "step": 162 }, { "epoch": 0.04, "learning_rate": 4.995948791501679e-05, "loss": 1.5446, "step": 163 }, { "epoch": 0.04, "learning_rate": 4.9958956760081085e-05, "loss": 1.8427, "step": 164 }, { "epoch": 0.04, "learning_rate": 4.9958422148654045e-05, "loss": 1.4109, "step": 165 }, { "epoch": 0.04, "learning_rate": 4.995788408080969e-05, "loss": 1.7544, "step": 166 }, { "epoch": 0.04, "learning_rate": 4.9957342556622557e-05, "loss": 1.2727, "step": 167 }, { "epoch": 0.04, "learning_rate": 4.995679757616762e-05, "loss": 1.4266, "step": 168 }, { "epoch": 0.04, "learning_rate": 4.995624913952036e-05, "loss": 1.3209, "step": 169 }, { "epoch": 0.04, "learning_rate": 4.995569724675673e-05, "loss": 1.5993, "step": 170 }, { "epoch": 0.04, "learning_rate": 4.995514189795316e-05, "loss": 1.8296, "step": 171 }, { "epoch": 0.04, "learning_rate": 4.9954583093186544e-05, "loss": 1.5827, "step": 172 }, { "epoch": 0.04, "learning_rate": 4.9954020832534296e-05, "loss": 1.6316, "step": 173 }, { "epoch": 0.04, "learning_rate": 4.9953455116074263e-05, "loss": 1.7216, "step": 174 }, { "epoch": 0.04, "learning_rate": 4.9952885943884795e-05, "loss": 1.7878, "step": 175 }, { "epoch": 0.04, "learning_rate": 4.995231331604472e-05, "loss": 1.7959, "step": 176 }, { "epoch": 0.04, "learning_rate": 4.995173723263332e-05, "loss": 1.4636, "step": 177 }, { "epoch": 0.04, "learning_rate": 4.99511576937304e-05, "loss": 1.365, "step": 178 }, { "epoch": 0.04, "learning_rate": 4.995057469941621e-05, "loss": 1.9666, "step": 179 }, { "epoch": 0.04, "learning_rate": 4.994998824977147e-05, "loss": 1.4686, "step": 180 }, { "epoch": 0.04, "learning_rate": 4.994939834487742e-05, "loss": 1.4651, "step": 181 }, { "epoch": 0.04, "learning_rate": 4.994880498481575e-05, "loss": 1.8047, "step": 182 }, { "epoch": 0.04, "learning_rate": 4.9948208169668606e-05, "loss": 1.779, "step": 183 }, { "epoch": 0.04, "learning_rate": 4.9947607899518675e-05, "loss": 1.172, "step": 184 }, { "epoch": 0.04, "learning_rate": 4.994700417444908e-05, "loss": 1.6413, "step": 185 }, { "epoch": 0.04, "learning_rate": 4.9946396994543405e-05, "loss": 2.0329, "step": 186 }, { "epoch": 0.04, "learning_rate": 4.994578635988576e-05, "loss": 1.8365, "step": 187 }, { "epoch": 0.04, "learning_rate": 4.9945172270560695e-05, "loss": 1.8724, "step": 188 }, { "epoch": 0.04, "learning_rate": 4.994455472665327e-05, "loss": 1.7607, "step": 189 }, { "epoch": 0.04, "learning_rate": 4.9943933728248996e-05, "loss": 1.6619, "step": 190 }, { "epoch": 0.05, "learning_rate": 4.994330927543387e-05, "loss": 1.9951, "step": 191 }, { "epoch": 0.05, "learning_rate": 4.994268136829438e-05, "loss": 1.5133, "step": 192 }, { "epoch": 0.05, "learning_rate": 4.994205000691747e-05, "loss": 2.238, "step": 193 }, { "epoch": 0.05, "learning_rate": 4.994141519139058e-05, "loss": 1.6153, "step": 194 }, { "epoch": 0.05, "learning_rate": 4.994077692180164e-05, "loss": 1.2494, "step": 195 }, { "epoch": 0.05, "learning_rate": 4.9940135198239024e-05, "loss": 1.7389, "step": 196 }, { "epoch": 0.05, "learning_rate": 4.9939490020791614e-05, "loss": 1.2335, "step": 197 }, { "epoch": 0.05, "learning_rate": 4.993884138954875e-05, "loss": 1.4512, "step": 198 }, { "epoch": 0.05, "learning_rate": 4.993818930460026e-05, "loss": 1.8298, "step": 199 }, { "epoch": 0.05, "learning_rate": 4.993753376603645e-05, "loss": 1.333, "step": 200 }, { "epoch": 0.05, "learning_rate": 4.9936874773948094e-05, "loss": 1.8395, "step": 201 }, { "epoch": 0.05, "learning_rate": 4.993621232842648e-05, "loss": 1.7438, "step": 202 }, { "epoch": 0.05, "learning_rate": 4.993554642956332e-05, "loss": 1.2777, "step": 203 }, { "epoch": 0.05, "learning_rate": 4.993487707745086e-05, "loss": 1.5628, "step": 204 }, { "epoch": 0.05, "learning_rate": 4.9934204272181764e-05, "loss": 1.6458, "step": 205 }, { "epoch": 0.05, "learning_rate": 4.9933528013849235e-05, "loss": 1.3878, "step": 206 }, { "epoch": 0.05, "learning_rate": 4.993284830254691e-05, "loss": 1.6489, "step": 207 }, { "epoch": 0.05, "learning_rate": 4.993216513836893e-05, "loss": 1.4492, "step": 208 }, { "epoch": 0.05, "learning_rate": 4.9931478521409895e-05, "loss": 1.598, "step": 209 }, { "epoch": 0.05, "learning_rate": 4.99307884517649e-05, "loss": 1.9357, "step": 210 }, { "epoch": 0.05, "learning_rate": 4.9930094929529506e-05, "loss": 1.4987, "step": 211 }, { "epoch": 0.05, "learning_rate": 4.992939795479976e-05, "loss": 1.6394, "step": 212 }, { "epoch": 0.05, "learning_rate": 4.992869752767218e-05, "loss": 2.1398, "step": 213 }, { "epoch": 0.05, "learning_rate": 4.992799364824377e-05, "loss": 1.4962, "step": 214 }, { "epoch": 0.05, "learning_rate": 4.992728631661201e-05, "loss": 1.1668, "step": 215 }, { "epoch": 0.05, "learning_rate": 4.9926575532874847e-05, "loss": 1.3898, "step": 216 }, { "epoch": 0.05, "learning_rate": 4.9925861297130724e-05, "loss": 1.828, "step": 217 }, { "epoch": 0.05, "learning_rate": 4.992514360947855e-05, "loss": 1.4453, "step": 218 }, { "epoch": 0.05, "learning_rate": 4.9924422470017715e-05, "loss": 1.774, "step": 219 }, { "epoch": 0.05, "learning_rate": 4.992369787884809e-05, "loss": 2.0532, "step": 220 }, { "epoch": 0.05, "learning_rate": 4.992296983607002e-05, "loss": 1.881, "step": 221 }, { "epoch": 0.05, "learning_rate": 4.992223834178433e-05, "loss": 1.7959, "step": 222 }, { "epoch": 0.05, "learning_rate": 4.992150339609232e-05, "loss": 2.1218, "step": 223 }, { "epoch": 0.05, "learning_rate": 4.992076499909578e-05, "loss": 1.9065, "step": 224 }, { "epoch": 0.05, "learning_rate": 4.992002315089695e-05, "loss": 1.783, "step": 225 }, { "epoch": 0.05, "learning_rate": 4.9919277851598575e-05, "loss": 1.5766, "step": 226 }, { "epoch": 0.05, "learning_rate": 4.991852910130388e-05, "loss": 1.2045, "step": 227 }, { "epoch": 0.05, "learning_rate": 4.9917776900116556e-05, "loss": 1.7663, "step": 228 }, { "epoch": 0.05, "learning_rate": 4.991702124814075e-05, "loss": 1.6417, "step": 229 }, { "epoch": 0.05, "learning_rate": 4.991626214548113e-05, "loss": 1.6406, "step": 230 }, { "epoch": 0.05, "learning_rate": 4.991549959224282e-05, "loss": 1.9329, "step": 231 }, { "epoch": 0.05, "learning_rate": 4.991473358853142e-05, "loss": 2.0136, "step": 232 }, { "epoch": 0.06, "learning_rate": 4.9913964134453014e-05, "loss": 1.4504, "step": 233 }, { "epoch": 0.06, "learning_rate": 4.9913191230114156e-05, "loss": 1.9222, "step": 234 }, { "epoch": 0.06, "learning_rate": 4.991241487562189e-05, "loss": 1.9192, "step": 235 }, { "epoch": 0.06, "learning_rate": 4.991163507108373e-05, "loss": 1.5397, "step": 236 }, { "epoch": 0.06, "learning_rate": 4.991085181660766e-05, "loss": 2.1056, "step": 237 }, { "epoch": 0.06, "learning_rate": 4.9910065112302175e-05, "loss": 1.8552, "step": 238 }, { "epoch": 0.06, "learning_rate": 4.9909274958276185e-05, "loss": 1.5675, "step": 239 }, { "epoch": 0.06, "learning_rate": 4.990848135463915e-05, "loss": 1.7246, "step": 240 }, { "epoch": 0.06, "learning_rate": 4.990768430150096e-05, "loss": 1.6873, "step": 241 }, { "epoch": 0.06, "learning_rate": 4.9906883798971995e-05, "loss": 2.0968, "step": 242 }, { "epoch": 0.06, "learning_rate": 4.9906079847163115e-05, "loss": 1.662, "step": 243 }, { "epoch": 0.06, "learning_rate": 4.990527244618566e-05, "loss": 2.1308, "step": 244 }, { "epoch": 0.06, "learning_rate": 4.990446159615144e-05, "loss": 1.6716, "step": 245 }, { "epoch": 0.06, "learning_rate": 4.9903647297172764e-05, "loss": 1.833, "step": 246 }, { "epoch": 0.06, "learning_rate": 4.990282954936237e-05, "loss": 2.0275, "step": 247 }, { "epoch": 0.06, "learning_rate": 4.990200835283353e-05, "loss": 1.7828, "step": 248 }, { "epoch": 0.06, "learning_rate": 4.990118370769997e-05, "loss": 1.8333, "step": 249 }, { "epoch": 0.06, "learning_rate": 4.990035561407588e-05, "loss": 1.5876, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.989952407207594e-05, "loss": 1.6541, "step": 251 }, { "epoch": 0.06, "learning_rate": 4.989868908181532e-05, "loss": 1.9533, "step": 252 }, { "epoch": 0.06, "learning_rate": 4.989785064340965e-05, "loss": 2.1998, "step": 253 }, { "epoch": 0.06, "learning_rate": 4.989700875697503e-05, "loss": 1.2615, "step": 254 }, { "epoch": 0.06, "learning_rate": 4.9896163422628076e-05, "loss": 2.2554, "step": 255 }, { "epoch": 0.06, "learning_rate": 4.9895314640485835e-05, "loss": 1.6735, "step": 256 }, { "epoch": 0.06, "learning_rate": 4.9894462410665856e-05, "loss": 1.6169, "step": 257 }, { "epoch": 0.06, "learning_rate": 4.989360673328617e-05, "loss": 0.9541, "step": 258 }, { "epoch": 0.06, "learning_rate": 4.989274760846527e-05, "loss": 1.4256, "step": 259 }, { "epoch": 0.06, "learning_rate": 4.989188503632213e-05, "loss": 1.576, "step": 260 }, { "epoch": 0.06, "learning_rate": 4.989101901697621e-05, "loss": 1.315, "step": 261 }, { "epoch": 0.06, "learning_rate": 4.9890149550547454e-05, "loss": 1.6165, "step": 262 }, { "epoch": 0.06, "learning_rate": 4.988927663715626e-05, "loss": 1.7303, "step": 263 }, { "epoch": 0.06, "learning_rate": 4.9888400276923505e-05, "loss": 1.389, "step": 264 }, { "epoch": 0.06, "learning_rate": 4.9887520469970574e-05, "loss": 1.6816, "step": 265 }, { "epoch": 0.06, "learning_rate": 4.9886637216419295e-05, "loss": 1.7488, "step": 266 }, { "epoch": 0.06, "learning_rate": 4.988575051639199e-05, "loss": 1.3843, "step": 267 }, { "epoch": 0.06, "learning_rate": 4.9884860370011453e-05, "loss": 2.1209, "step": 268 }, { "epoch": 0.06, "learning_rate": 4.988396677740097e-05, "loss": 1.7382, "step": 269 }, { "epoch": 0.06, "learning_rate": 4.9883069738684286e-05, "loss": 1.3885, "step": 270 }, { "epoch": 0.06, "learning_rate": 4.988216925398562e-05, "loss": 1.8881, "step": 271 }, { "epoch": 0.06, "learning_rate": 4.988126532342968e-05, "loss": 2.1026, "step": 272 }, { "epoch": 0.06, "learning_rate": 4.9880357947141664e-05, "loss": 1.7111, "step": 273 }, { "epoch": 0.06, "learning_rate": 4.9879447125247215e-05, "loss": 1.3994, "step": 274 }, { "epoch": 0.07, "learning_rate": 4.9878532857872476e-05, "loss": 1.9436, "step": 275 }, { "epoch": 0.07, "learning_rate": 4.9877615145144055e-05, "loss": 1.2814, "step": 276 }, { "epoch": 0.07, "learning_rate": 4.9876693987189054e-05, "loss": 1.5255, "step": 277 }, { "epoch": 0.07, "learning_rate": 4.987576938413504e-05, "loss": 1.7781, "step": 278 }, { "epoch": 0.07, "learning_rate": 4.9874841336110044e-05, "loss": 1.834, "step": 279 }, { "epoch": 0.07, "learning_rate": 4.987390984324261e-05, "loss": 1.0663, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.9872974905661726e-05, "loss": 1.8486, "step": 281 }, { "epoch": 0.07, "learning_rate": 4.9872036523496866e-05, "loss": 1.4602, "step": 282 }, { "epoch": 0.07, "learning_rate": 4.9871094696877995e-05, "loss": 2.1106, "step": 283 }, { "epoch": 0.07, "learning_rate": 4.987014942593553e-05, "loss": 1.1824, "step": 284 }, { "epoch": 0.07, "learning_rate": 4.986920071080039e-05, "loss": 1.5234, "step": 285 }, { "epoch": 0.07, "learning_rate": 4.9868248551603945e-05, "loss": 1.2072, "step": 286 }, { "epoch": 0.07, "learning_rate": 4.986729294847807e-05, "loss": 1.961, "step": 287 }, { "epoch": 0.07, "learning_rate": 4.986633390155511e-05, "loss": 0.9717, "step": 288 }, { "epoch": 0.07, "learning_rate": 4.986537141096786e-05, "loss": 1.7839, "step": 289 }, { "epoch": 0.07, "learning_rate": 4.986440547684963e-05, "loss": 2.0075, "step": 290 }, { "epoch": 0.07, "learning_rate": 4.986343609933418e-05, "loss": 1.4374, "step": 291 }, { "epoch": 0.07, "learning_rate": 4.986246327855576e-05, "loss": 1.3924, "step": 292 }, { "epoch": 0.07, "learning_rate": 4.986148701464909e-05, "loss": 1.5884, "step": 293 }, { "epoch": 0.07, "learning_rate": 4.986050730774937e-05, "loss": 1.6415, "step": 294 }, { "epoch": 0.07, "learning_rate": 4.985952415799228e-05, "loss": 1.8095, "step": 295 }, { "epoch": 0.07, "learning_rate": 4.9858537565513976e-05, "loss": 1.9536, "step": 296 }, { "epoch": 0.07, "learning_rate": 4.985754753045107e-05, "loss": 1.1484, "step": 297 }, { "epoch": 0.07, "learning_rate": 4.9856554052940705e-05, "loss": 1.6013, "step": 298 }, { "epoch": 0.07, "learning_rate": 4.9855557133120436e-05, "loss": 1.5254, "step": 299 }, { "epoch": 0.07, "learning_rate": 4.985455677112832e-05, "loss": 1.9474, "step": 300 }, { "epoch": 0.07, "learning_rate": 4.985355296710291e-05, "loss": 1.816, "step": 301 }, { "epoch": 0.07, "learning_rate": 4.985254572118321e-05, "loss": 1.451, "step": 302 }, { "epoch": 0.07, "learning_rate": 4.985153503350872e-05, "loss": 1.353, "step": 303 }, { "epoch": 0.07, "learning_rate": 4.98505209042194e-05, "loss": 1.6463, "step": 304 }, { "epoch": 0.07, "learning_rate": 4.98495033334557e-05, "loss": 1.8436, "step": 305 }, { "epoch": 0.07, "learning_rate": 4.984848232135853e-05, "loss": 1.5973, "step": 306 }, { "epoch": 0.07, "learning_rate": 4.984745786806929e-05, "loss": 1.6524, "step": 307 }, { "epoch": 0.07, "learning_rate": 4.984642997372987e-05, "loss": 1.499, "step": 308 }, { "epoch": 0.07, "learning_rate": 4.984539863848259e-05, "loss": 1.8387, "step": 309 }, { "epoch": 0.07, "learning_rate": 4.98443638624703e-05, "loss": 2.1306, "step": 310 }, { "epoch": 0.07, "learning_rate": 4.98433256458363e-05, "loss": 1.9113, "step": 311 }, { "epoch": 0.07, "learning_rate": 4.984228398872436e-05, "loss": 1.7941, "step": 312 }, { "epoch": 0.07, "learning_rate": 4.984123889127874e-05, "loss": 1.957, "step": 313 }, { "epoch": 0.07, "learning_rate": 4.984019035364418e-05, "loss": 1.5927, "step": 314 }, { "epoch": 0.07, "learning_rate": 4.983913837596588e-05, "loss": 1.6275, "step": 315 }, { "epoch": 0.07, "learning_rate": 4.983808295838953e-05, "loss": 1.5985, "step": 316 }, { "epoch": 0.08, "learning_rate": 4.983702410106128e-05, "loss": 1.5638, "step": 317 }, { "epoch": 0.08, "learning_rate": 4.983596180412778e-05, "loss": 1.431, "step": 318 }, { "epoch": 0.08, "learning_rate": 4.983489606773615e-05, "loss": 1.6035, "step": 319 }, { "epoch": 0.08, "learning_rate": 4.9833826892033966e-05, "loss": 2.0482, "step": 320 }, { "epoch": 0.08, "learning_rate": 4.98327542771693e-05, "loss": 1.7167, "step": 321 }, { "epoch": 0.08, "learning_rate": 4.9831678223290705e-05, "loss": 1.2944, "step": 322 }, { "epoch": 0.08, "learning_rate": 4.9830598730547185e-05, "loss": 1.6265, "step": 323 }, { "epoch": 0.08, "learning_rate": 4.982951579908824e-05, "loss": 1.6205, "step": 324 }, { "epoch": 0.08, "learning_rate": 4.982842942906386e-05, "loss": 2.1644, "step": 325 }, { "epoch": 0.08, "learning_rate": 4.9827339620624466e-05, "loss": 1.4672, "step": 326 }, { "epoch": 0.08, "learning_rate": 4.9826246373920994e-05, "loss": 1.6029, "step": 327 }, { "epoch": 0.08, "learning_rate": 4.9825149689104846e-05, "loss": 1.5438, "step": 328 }, { "epoch": 0.08, "learning_rate": 4.98240495663279e-05, "loss": 1.9061, "step": 329 }, { "epoch": 0.08, "learning_rate": 4.9822946005742497e-05, "loss": 1.8342, "step": 330 }, { "epoch": 0.08, "learning_rate": 4.9821839007501484e-05, "loss": 1.8016, "step": 331 }, { "epoch": 0.08, "learning_rate": 4.9820728571758155e-05, "loss": 1.6384, "step": 332 }, { "epoch": 0.08, "learning_rate": 4.9819614698666295e-05, "loss": 1.4692, "step": 333 }, { "epoch": 0.08, "learning_rate": 4.9818497388380154e-05, "loss": 1.4886, "step": 334 }, { "epoch": 0.08, "learning_rate": 4.9817376641054466e-05, "loss": 1.4903, "step": 335 }, { "epoch": 0.08, "learning_rate": 4.981625245684445e-05, "loss": 2.093, "step": 336 }, { "epoch": 0.08, "learning_rate": 4.981512483590578e-05, "loss": 1.9255, "step": 337 }, { "epoch": 0.08, "learning_rate": 4.981399377839463e-05, "loss": 1.6346, "step": 338 }, { "epoch": 0.08, "learning_rate": 4.981285928446762e-05, "loss": 1.6704, "step": 339 }, { "epoch": 0.08, "learning_rate": 4.981172135428188e-05, "loss": 0.981, "step": 340 }, { "epoch": 0.08, "learning_rate": 4.9810579987994974e-05, "loss": 1.7046, "step": 341 }, { "epoch": 0.08, "learning_rate": 4.9809435185765e-05, "loss": 1.6879, "step": 342 }, { "epoch": 0.08, "learning_rate": 4.980828694775046e-05, "loss": 2.1196, "step": 343 }, { "epoch": 0.08, "learning_rate": 4.980713527411041e-05, "loss": 1.6099, "step": 344 }, { "epoch": 0.08, "learning_rate": 4.9805980165004304e-05, "loss": 1.6977, "step": 345 }, { "epoch": 0.08, "learning_rate": 4.980482162059213e-05, "loss": 1.8916, "step": 346 }, { "epoch": 0.08, "learning_rate": 4.980365964103434e-05, "loss": 2.0914, "step": 347 }, { "epoch": 0.08, "learning_rate": 4.980249422649183e-05, "loss": 1.268, "step": 348 }, { "epoch": 0.08, "learning_rate": 4.980132537712602e-05, "loss": 1.7568, "step": 349 }, { "epoch": 0.08, "learning_rate": 4.9800153093098756e-05, "loss": 1.3604, "step": 350 }, { "epoch": 0.08, "learning_rate": 4.9798977374572395e-05, "loss": 1.5185, "step": 351 }, { "epoch": 0.08, "learning_rate": 4.979779822170977e-05, "loss": 1.8898, "step": 352 }, { "epoch": 0.08, "learning_rate": 4.9796615634674155e-05, "loss": 2.0664, "step": 353 }, { "epoch": 0.08, "learning_rate": 4.979542961362934e-05, "loss": 1.4639, "step": 354 }, { "epoch": 0.08, "learning_rate": 4.9794240158739566e-05, "loss": 1.2679, "step": 355 }, { "epoch": 0.08, "learning_rate": 4.9793047270169566e-05, "loss": 1.6697, "step": 356 }, { "epoch": 0.08, "learning_rate": 4.979185094808453e-05, "loss": 1.3827, "step": 357 }, { "epoch": 0.08, "learning_rate": 4.979065119265013e-05, "loss": 1.471, "step": 358 }, { "epoch": 0.08, "learning_rate": 4.9789448004032533e-05, "loss": 2.0245, "step": 359 }, { "epoch": 0.09, "learning_rate": 4.978824138239835e-05, "loss": 1.6898, "step": 360 }, { "epoch": 0.09, "learning_rate": 4.978703132791469e-05, "loss": 1.8281, "step": 361 }, { "epoch": 0.09, "learning_rate": 4.978581784074913e-05, "loss": 1.8089, "step": 362 }, { "epoch": 0.09, "learning_rate": 4.978460092106971e-05, "loss": 1.6343, "step": 363 }, { "epoch": 0.09, "learning_rate": 4.9783380569044974e-05, "loss": 2.2785, "step": 364 }, { "epoch": 0.09, "learning_rate": 4.9782156784843916e-05, "loss": 1.9567, "step": 365 }, { "epoch": 0.09, "learning_rate": 4.9780929568636015e-05, "loss": 1.3461, "step": 366 }, { "epoch": 0.09, "learning_rate": 4.977969892059123e-05, "loss": 1.4954, "step": 367 }, { "epoch": 0.09, "learning_rate": 4.977846484087998e-05, "loss": 1.305, "step": 368 }, { "epoch": 0.09, "learning_rate": 4.9777227329673174e-05, "loss": 1.8285, "step": 369 }, { "epoch": 0.09, "learning_rate": 4.977598638714219e-05, "loss": 1.4243, "step": 370 }, { "epoch": 0.09, "learning_rate": 4.97747420134589e-05, "loss": 1.7667, "step": 371 }, { "epoch": 0.09, "learning_rate": 4.9773494208795604e-05, "loss": 1.9451, "step": 372 }, { "epoch": 0.09, "learning_rate": 4.9772242973325125e-05, "loss": 1.5445, "step": 373 }, { "epoch": 0.09, "learning_rate": 4.9770988307220736e-05, "loss": 1.6328, "step": 374 }, { "epoch": 0.09, "learning_rate": 4.976973021065619e-05, "loss": 1.6148, "step": 375 }, { "epoch": 0.09, "learning_rate": 4.976846868380572e-05, "loss": 1.3625, "step": 376 }, { "epoch": 0.09, "learning_rate": 4.976720372684404e-05, "loss": 1.4694, "step": 377 }, { "epoch": 0.09, "learning_rate": 4.9765935339946326e-05, "loss": 1.5375, "step": 378 }, { "epoch": 0.09, "learning_rate": 4.976466352328822e-05, "loss": 1.805, "step": 379 }, { "epoch": 0.09, "learning_rate": 4.976338827704586e-05, "loss": 1.8427, "step": 380 }, { "epoch": 0.09, "learning_rate": 4.976210960139586e-05, "loss": 1.9649, "step": 381 }, { "epoch": 0.09, "learning_rate": 4.976082749651529e-05, "loss": 1.4552, "step": 382 }, { "epoch": 0.09, "learning_rate": 4.9759541962581715e-05, "loss": 1.7111, "step": 383 }, { "epoch": 0.09, "learning_rate": 4.975825299977315e-05, "loss": 2.0492, "step": 384 }, { "epoch": 0.09, "learning_rate": 4.9756960608268104e-05, "loss": 2.4812, "step": 385 }, { "epoch": 0.09, "learning_rate": 4.975566478824556e-05, "loss": 1.3533, "step": 386 }, { "epoch": 0.09, "learning_rate": 4.975436553988498e-05, "loss": 1.7609, "step": 387 }, { "epoch": 0.09, "learning_rate": 4.9753062863366276e-05, "loss": 1.3683, "step": 388 }, { "epoch": 0.09, "learning_rate": 4.975175675886986e-05, "loss": 1.5215, "step": 389 }, { "epoch": 0.09, "learning_rate": 4.9750447226576617e-05, "loss": 1.2101, "step": 390 }, { "epoch": 0.09, "learning_rate": 4.9749134266667894e-05, "loss": 1.5135, "step": 391 }, { "epoch": 0.09, "learning_rate": 4.9747817879325514e-05, "loss": 1.9498, "step": 392 }, { "epoch": 0.09, "learning_rate": 4.9746498064731786e-05, "loss": 1.9672, "step": 393 }, { "epoch": 0.09, "learning_rate": 4.9745174823069486e-05, "loss": 2.1517, "step": 394 }, { "epoch": 0.09, "learning_rate": 4.9743848154521863e-05, "loss": 1.411, "step": 395 }, { "epoch": 0.09, "learning_rate": 4.974251805927266e-05, "loss": 1.3972, "step": 396 }, { "epoch": 0.09, "learning_rate": 4.974118453750605e-05, "loss": 1.6883, "step": 397 }, { "epoch": 0.09, "learning_rate": 4.973984758940672e-05, "loss": 1.4422, "step": 398 }, { "epoch": 0.09, "learning_rate": 4.973850721515983e-05, "loss": 1.5046, "step": 399 }, { "epoch": 0.09, "learning_rate": 4.973716341495099e-05, "loss": 1.9012, "step": 400 }, { "epoch": 0.09, "learning_rate": 4.973581618896631e-05, "loss": 1.6414, "step": 401 }, { "epoch": 0.1, "learning_rate": 4.973446553739236e-05, "loss": 1.4029, "step": 402 }, { "epoch": 0.1, "learning_rate": 4.973311146041619e-05, "loss": 1.3923, "step": 403 }, { "epoch": 0.1, "learning_rate": 4.9731753958225316e-05, "loss": 0.9104, "step": 404 }, { "epoch": 0.1, "learning_rate": 4.973039303100773e-05, "loss": 1.4314, "step": 405 }, { "epoch": 0.1, "learning_rate": 4.972902867895191e-05, "loss": 1.5725, "step": 406 }, { "epoch": 0.1, "learning_rate": 4.972766090224681e-05, "loss": 1.8915, "step": 407 }, { "epoch": 0.1, "learning_rate": 4.972628970108183e-05, "loss": 2.0525, "step": 408 }, { "epoch": 0.1, "learning_rate": 4.972491507564688e-05, "loss": 1.4409, "step": 409 }, { "epoch": 0.1, "learning_rate": 4.9723537026132315e-05, "loss": 1.4878, "step": 410 }, { "epoch": 0.1, "learning_rate": 4.972215555272899e-05, "loss": 1.6305, "step": 411 }, { "epoch": 0.1, "learning_rate": 4.972077065562821e-05, "loss": 1.6004, "step": 412 }, { "epoch": 0.1, "learning_rate": 4.971938233502178e-05, "loss": 1.6837, "step": 413 }, { "epoch": 0.1, "learning_rate": 4.971799059110195e-05, "loss": 1.6939, "step": 414 }, { "epoch": 0.1, "learning_rate": 4.971659542406145e-05, "loss": 1.6326, "step": 415 }, { "epoch": 0.1, "learning_rate": 4.971519683409352e-05, "loss": 2.0437, "step": 416 }, { "epoch": 0.1, "learning_rate": 4.9713794821391825e-05, "loss": 1.8919, "step": 417 }, { "epoch": 0.1, "learning_rate": 4.9712389386150535e-05, "loss": 1.4444, "step": 418 }, { "epoch": 0.1, "learning_rate": 4.971098052856428e-05, "loss": 1.7376, "step": 419 }, { "epoch": 0.1, "learning_rate": 4.970956824882816e-05, "loss": 1.2145, "step": 420 }, { "epoch": 0.1, "learning_rate": 4.970815254713779e-05, "loss": 1.7657, "step": 421 }, { "epoch": 0.1, "learning_rate": 4.970673342368919e-05, "loss": 1.4632, "step": 422 }, { "epoch": 0.1, "learning_rate": 4.97053108786789e-05, "loss": 2.0926, "step": 423 }, { "epoch": 0.1, "learning_rate": 4.970388491230393e-05, "loss": 1.6105, "step": 424 }, { "epoch": 0.1, "learning_rate": 4.9702455524761764e-05, "loss": 0.8295, "step": 425 }, { "epoch": 0.1, "learning_rate": 4.9701022716250346e-05, "loss": 1.5312, "step": 426 }, { "epoch": 0.1, "learning_rate": 4.9699586486968094e-05, "loss": 1.2171, "step": 427 }, { "epoch": 0.1, "learning_rate": 4.969814683711391e-05, "loss": 1.6475, "step": 428 }, { "epoch": 0.1, "learning_rate": 4.9696703766887174e-05, "loss": 1.3856, "step": 429 }, { "epoch": 0.1, "learning_rate": 4.9695257276487736e-05, "loss": 1.413, "step": 430 }, { "epoch": 0.1, "learning_rate": 4.9693807366115905e-05, "loss": 1.2724, "step": 431 }, { "epoch": 0.1, "learning_rate": 4.969235403597248e-05, "loss": 2.0263, "step": 432 }, { "epoch": 0.1, "learning_rate": 4.969089728625873e-05, "loss": 1.6963, "step": 433 }, { "epoch": 0.1, "learning_rate": 4.968943711717638e-05, "loss": 1.6354, "step": 434 }, { "epoch": 0.1, "learning_rate": 4.968797352892768e-05, "loss": 1.7197, "step": 435 }, { "epoch": 0.1, "learning_rate": 4.968650652171528e-05, "loss": 1.7091, "step": 436 }, { "epoch": 0.1, "learning_rate": 4.9685036095742365e-05, "loss": 2.0653, "step": 437 }, { "epoch": 0.1, "learning_rate": 4.968356225121256e-05, "loss": 1.8299, "step": 438 }, { "epoch": 0.1, "learning_rate": 4.968208498832997e-05, "loss": 2.0181, "step": 439 }, { "epoch": 0.1, "learning_rate": 4.968060430729918e-05, "loss": 1.7343, "step": 440 }, { "epoch": 0.1, "learning_rate": 4.967912020832526e-05, "loss": 1.9446, "step": 441 }, { "epoch": 0.1, "learning_rate": 4.967763269161372e-05, "loss": 1.4359, "step": 442 }, { "epoch": 0.1, "learning_rate": 4.967614175737056e-05, "loss": 2.211, "step": 443 }, { "epoch": 0.11, "learning_rate": 4.967464740580227e-05, "loss": 1.5243, "step": 444 }, { "epoch": 0.11, "learning_rate": 4.967314963711579e-05, "loss": 1.0646, "step": 445 }, { "epoch": 0.11, "learning_rate": 4.967164845151855e-05, "loss": 1.9905, "step": 446 }, { "epoch": 0.11, "learning_rate": 4.967014384921842e-05, "loss": 1.9194, "step": 447 }, { "epoch": 0.11, "learning_rate": 4.96686358304238e-05, "loss": 1.5094, "step": 448 }, { "epoch": 0.11, "learning_rate": 4.966712439534351e-05, "loss": 1.8683, "step": 449 }, { "epoch": 0.11, "learning_rate": 4.9665609544186867e-05, "loss": 1.422, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.966409127716367e-05, "loss": 1.8794, "step": 451 }, { "epoch": 0.11, "learning_rate": 4.966256959448416e-05, "loss": 1.8767, "step": 452 }, { "epoch": 0.11, "learning_rate": 4.966104449635909e-05, "loss": 1.4035, "step": 453 }, { "epoch": 0.11, "learning_rate": 4.965951598299965e-05, "loss": 1.9975, "step": 454 }, { "epoch": 0.11, "learning_rate": 4.9657984054617526e-05, "loss": 2.1903, "step": 455 }, { "epoch": 0.11, "learning_rate": 4.9656448711424876e-05, "loss": 1.2803, "step": 456 }, { "epoch": 0.11, "learning_rate": 4.9654909953634316e-05, "loss": 1.4093, "step": 457 }, { "epoch": 0.11, "learning_rate": 4.965336778145895e-05, "loss": 1.5228, "step": 458 }, { "epoch": 0.11, "learning_rate": 4.965182219511234e-05, "loss": 1.8209, "step": 459 }, { "epoch": 0.11, "learning_rate": 4.9650273194808546e-05, "loss": 1.7129, "step": 460 }, { "epoch": 0.11, "learning_rate": 4.964872078076207e-05, "loss": 1.9, "step": 461 }, { "epoch": 0.11, "learning_rate": 4.96471649531879e-05, "loss": 1.5979, "step": 462 }, { "epoch": 0.11, "learning_rate": 4.964560571230151e-05, "loss": 1.6498, "step": 463 }, { "epoch": 0.11, "learning_rate": 4.964404305831883e-05, "loss": 2.0043, "step": 464 }, { "epoch": 0.11, "learning_rate": 4.964247699145626e-05, "loss": 1.7263, "step": 465 }, { "epoch": 0.11, "learning_rate": 4.964090751193069e-05, "loss": 2.1116, "step": 466 }, { "epoch": 0.11, "learning_rate": 4.9639334619959464e-05, "loss": 2.2474, "step": 467 }, { "epoch": 0.11, "learning_rate": 4.963775831576041e-05, "loss": 1.6965, "step": 468 }, { "epoch": 0.11, "learning_rate": 4.963617859955183e-05, "loss": 1.7008, "step": 469 }, { "epoch": 0.11, "learning_rate": 4.963459547155249e-05, "loss": 1.7393, "step": 470 }, { "epoch": 0.11, "learning_rate": 4.963300893198164e-05, "loss": 1.9297, "step": 471 }, { "epoch": 0.11, "learning_rate": 4.9631418981058974e-05, "loss": 1.6991, "step": 472 }, { "epoch": 0.11, "learning_rate": 4.9629825619004704e-05, "loss": 1.2928, "step": 473 }, { "epoch": 0.11, "learning_rate": 4.962822884603948e-05, "loss": 1.7254, "step": 474 }, { "epoch": 0.11, "learning_rate": 4.9626628662384434e-05, "loss": 1.9312, "step": 475 }, { "epoch": 0.11, "learning_rate": 4.962502506826117e-05, "loss": 1.5214, "step": 476 }, { "epoch": 0.11, "learning_rate": 4.962341806389176e-05, "loss": 1.3667, "step": 477 }, { "epoch": 0.11, "learning_rate": 4.9621807649498764e-05, "loss": 1.7049, "step": 478 }, { "epoch": 0.11, "learning_rate": 4.962019382530521e-05, "loss": 1.7479, "step": 479 }, { "epoch": 0.11, "learning_rate": 4.961857659153456e-05, "loss": 1.5101, "step": 480 }, { "epoch": 0.11, "learning_rate": 4.961695594841082e-05, "loss": 1.3799, "step": 481 }, { "epoch": 0.11, "learning_rate": 4.961533189615839e-05, "loss": 2.4575, "step": 482 }, { "epoch": 0.11, "learning_rate": 4.961370443500221e-05, "loss": 1.5559, "step": 483 }, { "epoch": 0.11, "learning_rate": 4.961207356516765e-05, "loss": 1.5502, "step": 484 }, { "epoch": 0.11, "learning_rate": 4.961043928688056e-05, "loss": 1.3477, "step": 485 }, { "epoch": 0.11, "learning_rate": 4.960880160036728e-05, "loss": 1.7157, "step": 486 }, { "epoch": 0.12, "learning_rate": 4.960716050585459e-05, "loss": 1.4801, "step": 487 }, { "epoch": 0.12, "learning_rate": 4.960551600356977e-05, "loss": 1.3484, "step": 488 }, { "epoch": 0.12, "learning_rate": 4.960386809374057e-05, "loss": 1.8751, "step": 489 }, { "epoch": 0.12, "learning_rate": 4.960221677659519e-05, "loss": 1.6072, "step": 490 }, { "epoch": 0.12, "learning_rate": 4.9600562052362333e-05, "loss": 2.4259, "step": 491 }, { "epoch": 0.12, "learning_rate": 4.9598903921271135e-05, "loss": 1.9676, "step": 492 }, { "epoch": 0.12, "learning_rate": 4.959724238355123e-05, "loss": 2.193, "step": 493 }, { "epoch": 0.12, "learning_rate": 4.959557743943274e-05, "loss": 1.8709, "step": 494 }, { "epoch": 0.12, "learning_rate": 4.9593909089146224e-05, "loss": 1.5639, "step": 495 }, { "epoch": 0.12, "learning_rate": 4.959223733292272e-05, "loss": 1.5942, "step": 496 }, { "epoch": 0.12, "learning_rate": 4.9590562170993755e-05, "loss": 2.474, "step": 497 }, { "epoch": 0.12, "learning_rate": 4.958888360359131e-05, "loss": 1.8667, "step": 498 }, { "epoch": 0.12, "learning_rate": 4.958720163094786e-05, "loss": 1.9014, "step": 499 }, { "epoch": 0.12, "learning_rate": 4.958551625329631e-05, "loss": 1.3199, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.958382747087008e-05, "loss": 1.4193, "step": 501 }, { "epoch": 0.12, "learning_rate": 4.958213528390305e-05, "loss": 1.6693, "step": 502 }, { "epoch": 0.12, "learning_rate": 4.958043969262955e-05, "loss": 1.3332, "step": 503 }, { "epoch": 0.12, "learning_rate": 4.957874069728441e-05, "loss": 1.3463, "step": 504 }, { "epoch": 0.12, "learning_rate": 4.957703829810292e-05, "loss": 1.3705, "step": 505 }, { "epoch": 0.12, "learning_rate": 4.9575332495320826e-05, "loss": 2.1182, "step": 506 }, { "epoch": 0.12, "learning_rate": 4.957362328917437e-05, "loss": 1.7346, "step": 507 }, { "epoch": 0.12, "learning_rate": 4.9571910679900255e-05, "loss": 1.3646, "step": 508 }, { "epoch": 0.12, "learning_rate": 4.957019466773565e-05, "loss": 1.9329, "step": 509 }, { "epoch": 0.12, "learning_rate": 4.95684752529182e-05, "loss": 1.8554, "step": 510 }, { "epoch": 0.12, "learning_rate": 4.9566752435686036e-05, "loss": 1.9678, "step": 511 }, { "epoch": 0.12, "learning_rate": 4.956502621627773e-05, "loss": 1.9655, "step": 512 }, { "epoch": 0.12, "learning_rate": 4.956329659493234e-05, "loss": 1.5136, "step": 513 }, { "epoch": 0.12, "learning_rate": 4.95615635718894e-05, "loss": 1.5812, "step": 514 }, { "epoch": 0.12, "learning_rate": 4.955982714738892e-05, "loss": 2.4143, "step": 515 }, { "epoch": 0.12, "learning_rate": 4.9558087321671374e-05, "loss": 2.0219, "step": 516 }, { "epoch": 0.12, "learning_rate": 4.955634409497768e-05, "loss": 1.3464, "step": 517 }, { "epoch": 0.12, "learning_rate": 4.955459746754928e-05, "loss": 2.011, "step": 518 }, { "epoch": 0.12, "learning_rate": 4.955284743962804e-05, "loss": 1.1818, "step": 519 }, { "epoch": 0.12, "learning_rate": 4.9551094011456335e-05, "loss": 1.9715, "step": 520 }, { "epoch": 0.12, "learning_rate": 4.954933718327697e-05, "loss": 2.2617, "step": 521 }, { "epoch": 0.12, "learning_rate": 4.954757695533326e-05, "loss": 2.0038, "step": 522 }, { "epoch": 0.12, "learning_rate": 4.954581332786896e-05, "loss": 1.2001, "step": 523 }, { "epoch": 0.12, "learning_rate": 4.954404630112833e-05, "loss": 1.1356, "step": 524 }, { "epoch": 0.12, "learning_rate": 4.954227587535606e-05, "loss": 1.6941, "step": 525 }, { "epoch": 0.12, "learning_rate": 4.9540502050797335e-05, "loss": 1.3234, "step": 526 }, { "epoch": 0.12, "learning_rate": 4.9538724827697814e-05, "loss": 2.1369, "step": 527 }, { "epoch": 0.12, "learning_rate": 4.953694420630361e-05, "loss": 2.0145, "step": 528 }, { "epoch": 0.13, "learning_rate": 4.953516018686133e-05, "loss": 1.5505, "step": 529 } ], "logging_steps": 1, "max_steps": 8452, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 529, "total_flos": 3.00665305300992e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }