{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5886410669119337, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.358013336399172e-05, "grad_norm": 12.875, "learning_rate": 0.0, "loss": 1.3441, "step": 1 }, { "epoch": 0.00014716026672798344, "grad_norm": 15.0, "learning_rate": 1.6666666666666667e-06, "loss": 1.7824, "step": 2 }, { "epoch": 0.00022074040009197518, "grad_norm": 16.625, "learning_rate": 3.3333333333333333e-06, "loss": 2.041, "step": 3 }, { "epoch": 0.0002943205334559669, "grad_norm": 11.875, "learning_rate": 5e-06, "loss": 1.1798, "step": 4 }, { "epoch": 0.0003679006668199586, "grad_norm": 12.125, "learning_rate": 6.666666666666667e-06, "loss": 1.3734, "step": 5 }, { "epoch": 0.00044148080018395036, "grad_norm": 9.5, "learning_rate": 8.333333333333334e-06, "loss": 1.5999, "step": 6 }, { "epoch": 0.0005150609335479421, "grad_norm": 7.03125, "learning_rate": 1e-05, "loss": 1.5471, "step": 7 }, { "epoch": 0.0005886410669119338, "grad_norm": 4.71875, "learning_rate": 1.1666666666666668e-05, "loss": 1.0944, "step": 8 }, { "epoch": 0.0006622212002759255, "grad_norm": 3.09375, "learning_rate": 1.3333333333333333e-05, "loss": 0.8931, "step": 9 }, { "epoch": 0.0007358013336399172, "grad_norm": 2.375, "learning_rate": 1.5e-05, "loss": 1.0534, "step": 10 }, { "epoch": 0.0008093814670039089, "grad_norm": 2.640625, "learning_rate": 1.6666666666666667e-05, "loss": 0.8836, "step": 11 }, { "epoch": 0.0008829616003679007, "grad_norm": 2.15625, "learning_rate": 1.8333333333333333e-05, "loss": 1.1655, "step": 12 }, { "epoch": 0.0009565417337318924, "grad_norm": 2.265625, "learning_rate": 2e-05, "loss": 1.7158, "step": 13 }, { "epoch": 0.0010301218670958842, "grad_norm": 2.171875, "learning_rate": 2.1666666666666667e-05, "loss": 1.2426, "step": 14 }, { "epoch": 0.0011037020004598759, "grad_norm": 1.6796875, "learning_rate": 2.3333333333333336e-05, "loss": 1.048, "step": 15 }, { "epoch": 0.0011772821338238676, "grad_norm": 1.875, "learning_rate": 2.5e-05, "loss": 1.3412, "step": 16 }, { "epoch": 0.0012508622671878592, "grad_norm": 1.5078125, "learning_rate": 2.6666666666666667e-05, "loss": 1.0901, "step": 17 }, { "epoch": 0.001324442400551851, "grad_norm": 1.1875, "learning_rate": 2.8333333333333335e-05, "loss": 0.9881, "step": 18 }, { "epoch": 0.0013980225339158428, "grad_norm": 1.4609375, "learning_rate": 3e-05, "loss": 0.9804, "step": 19 }, { "epoch": 0.0014716026672798345, "grad_norm": 1.5625, "learning_rate": 3.1666666666666666e-05, "loss": 1.3544, "step": 20 }, { "epoch": 0.0015451828006438262, "grad_norm": 2.140625, "learning_rate": 3.3333333333333335e-05, "loss": 0.9491, "step": 21 }, { "epoch": 0.0016187629340078179, "grad_norm": 1.2890625, "learning_rate": 3.5e-05, "loss": 0.9426, "step": 22 }, { "epoch": 0.0016923430673718095, "grad_norm": 1.2109375, "learning_rate": 3.6666666666666666e-05, "loss": 1.0448, "step": 23 }, { "epoch": 0.0017659232007358014, "grad_norm": 1.3359375, "learning_rate": 3.8333333333333334e-05, "loss": 1.2337, "step": 24 }, { "epoch": 0.0018395033340997931, "grad_norm": 1.3046875, "learning_rate": 4e-05, "loss": 1.5146, "step": 25 }, { "epoch": 0.0019130834674637848, "grad_norm": 1.4609375, "learning_rate": 4.166666666666667e-05, "loss": 1.1482, "step": 26 }, { "epoch": 0.0019866636008277765, "grad_norm": 1.109375, "learning_rate": 4.3333333333333334e-05, "loss": 0.8161, "step": 27 }, { "epoch": 0.0020602437341917684, "grad_norm": 1.2265625, "learning_rate": 4.5e-05, "loss": 0.991, "step": 28 }, { "epoch": 0.00213382386755576, "grad_norm": 0.98828125, "learning_rate": 4.666666666666667e-05, "loss": 0.7794, "step": 29 }, { "epoch": 0.0022074040009197517, "grad_norm": 1.0703125, "learning_rate": 4.8333333333333334e-05, "loss": 0.8139, "step": 30 }, { "epoch": 0.002280984134283743, "grad_norm": 1.09375, "learning_rate": 5e-05, "loss": 0.8812, "step": 31 }, { "epoch": 0.002354564267647735, "grad_norm": 1.2109375, "learning_rate": 4.9999999329148614e-05, "loss": 0.6803, "step": 32 }, { "epoch": 0.002428144401011727, "grad_norm": 1.265625, "learning_rate": 4.999999731659447e-05, "loss": 0.9949, "step": 33 }, { "epoch": 0.0025017245343757185, "grad_norm": 1.1171875, "learning_rate": 4.9999993962337696e-05, "loss": 0.7634, "step": 34 }, { "epoch": 0.0025753046677397104, "grad_norm": 1.296875, "learning_rate": 4.9999989266378464e-05, "loss": 0.9535, "step": 35 }, { "epoch": 0.002648884801103702, "grad_norm": 1.078125, "learning_rate": 4.999998322871703e-05, "loss": 0.9741, "step": 36 }, { "epoch": 0.0027224649344676937, "grad_norm": 1.4453125, "learning_rate": 4.999997584935371e-05, "loss": 0.8228, "step": 37 }, { "epoch": 0.0027960450678316856, "grad_norm": 1.1015625, "learning_rate": 4.9999967128288905e-05, "loss": 0.8874, "step": 38 }, { "epoch": 0.002869625201195677, "grad_norm": 0.90625, "learning_rate": 4.9999957065523085e-05, "loss": 0.7428, "step": 39 }, { "epoch": 0.002943205334559669, "grad_norm": 0.9296875, "learning_rate": 4.9999945661056786e-05, "loss": 0.7836, "step": 40 }, { "epoch": 0.0030167854679236604, "grad_norm": 1.1796875, "learning_rate": 4.999993291489062e-05, "loss": 0.9396, "step": 41 }, { "epoch": 0.0030903656012876523, "grad_norm": 1.3359375, "learning_rate": 4.9999918827025275e-05, "loss": 0.9364, "step": 42 }, { "epoch": 0.0031639457346516442, "grad_norm": 1.2109375, "learning_rate": 4.99999033974615e-05, "loss": 0.8486, "step": 43 }, { "epoch": 0.0032375258680156357, "grad_norm": 1.21875, "learning_rate": 4.999988662620013e-05, "loss": 1.0065, "step": 44 }, { "epoch": 0.0033111060013796276, "grad_norm": 1.1640625, "learning_rate": 4.9999868513242065e-05, "loss": 1.1676, "step": 45 }, { "epoch": 0.003384686134743619, "grad_norm": 1.4609375, "learning_rate": 4.9999849058588276e-05, "loss": 1.1662, "step": 46 }, { "epoch": 0.003458266268107611, "grad_norm": 1.0, "learning_rate": 4.99998282622398e-05, "loss": 0.7682, "step": 47 }, { "epoch": 0.003531846401471603, "grad_norm": 1.3359375, "learning_rate": 4.999980612419777e-05, "loss": 1.3762, "step": 48 }, { "epoch": 0.0036054265348355943, "grad_norm": 1.046875, "learning_rate": 4.999978264446335e-05, "loss": 1.1862, "step": 49 }, { "epoch": 0.0036790066681995862, "grad_norm": 1.125, "learning_rate": 4.9999757823037816e-05, "loss": 1.0162, "step": 50 }, { "epoch": 0.0037525868015635777, "grad_norm": 0.96484375, "learning_rate": 4.999973165992251e-05, "loss": 0.9144, "step": 51 }, { "epoch": 0.0038261669349275696, "grad_norm": 1.3828125, "learning_rate": 4.999970415511881e-05, "loss": 1.7539, "step": 52 }, { "epoch": 0.0038997470682915615, "grad_norm": 1.03125, "learning_rate": 4.999967530862821e-05, "loss": 1.0273, "step": 53 }, { "epoch": 0.003973327201655553, "grad_norm": 1.1484375, "learning_rate": 4.999964512045226e-05, "loss": 1.1683, "step": 54 }, { "epoch": 0.004046907335019545, "grad_norm": 1.203125, "learning_rate": 4.999961359059257e-05, "loss": 1.0855, "step": 55 }, { "epoch": 0.004120487468383537, "grad_norm": 1.03125, "learning_rate": 4.999958071905082e-05, "loss": 0.9039, "step": 56 }, { "epoch": 0.004194067601747528, "grad_norm": 1.515625, "learning_rate": 4.999954650582881e-05, "loss": 0.8736, "step": 57 }, { "epoch": 0.00426764773511152, "grad_norm": 1.3515625, "learning_rate": 4.999951095092835e-05, "loss": 0.9829, "step": 58 }, { "epoch": 0.004341227868475512, "grad_norm": 1.234375, "learning_rate": 4.9999474054351357e-05, "loss": 1.087, "step": 59 }, { "epoch": 0.0044148080018395035, "grad_norm": 1.125, "learning_rate": 4.99994358160998e-05, "loss": 1.0548, "step": 60 }, { "epoch": 0.004488388135203495, "grad_norm": 1.109375, "learning_rate": 4.9999396236175754e-05, "loss": 1.2358, "step": 61 }, { "epoch": 0.004561968268567486, "grad_norm": 1.2109375, "learning_rate": 4.999935531458132e-05, "loss": 0.9955, "step": 62 }, { "epoch": 0.004635548401931478, "grad_norm": 1.03125, "learning_rate": 4.999931305131871e-05, "loss": 1.0251, "step": 63 }, { "epoch": 0.00470912853529547, "grad_norm": 0.98828125, "learning_rate": 4.999926944639018e-05, "loss": 1.0653, "step": 64 }, { "epoch": 0.004782708668659462, "grad_norm": 1.1796875, "learning_rate": 4.999922449979808e-05, "loss": 1.0879, "step": 65 }, { "epoch": 0.004856288802023454, "grad_norm": 1.0546875, "learning_rate": 4.999917821154481e-05, "loss": 1.0973, "step": 66 }, { "epoch": 0.004929868935387445, "grad_norm": 1.015625, "learning_rate": 4.999913058163287e-05, "loss": 0.893, "step": 67 }, { "epoch": 0.005003449068751437, "grad_norm": 1.0, "learning_rate": 4.99990816100648e-05, "loss": 1.3085, "step": 68 }, { "epoch": 0.005077029202115429, "grad_norm": 0.8828125, "learning_rate": 4.9999031296843244e-05, "loss": 0.9653, "step": 69 }, { "epoch": 0.005150609335479421, "grad_norm": 1.2265625, "learning_rate": 4.99989796419709e-05, "loss": 1.3415, "step": 70 }, { "epoch": 0.005224189468843413, "grad_norm": 0.96875, "learning_rate": 4.999892664545053e-05, "loss": 1.1798, "step": 71 }, { "epoch": 0.005297769602207404, "grad_norm": 1.09375, "learning_rate": 4.999887230728497e-05, "loss": 0.9253, "step": 72 }, { "epoch": 0.0053713497355713955, "grad_norm": 0.8359375, "learning_rate": 4.9998816627477166e-05, "loss": 0.7838, "step": 73 }, { "epoch": 0.0054449298689353874, "grad_norm": 1.2109375, "learning_rate": 4.999875960603008e-05, "loss": 1.5514, "step": 74 }, { "epoch": 0.005518510002299379, "grad_norm": 1.3671875, "learning_rate": 4.9998701242946785e-05, "loss": 1.3704, "step": 75 }, { "epoch": 0.005592090135663371, "grad_norm": 1.1875, "learning_rate": 4.9998641538230415e-05, "loss": 1.0745, "step": 76 }, { "epoch": 0.005665670269027362, "grad_norm": 1.1015625, "learning_rate": 4.999858049188417e-05, "loss": 1.1878, "step": 77 }, { "epoch": 0.005739250402391354, "grad_norm": 1.0390625, "learning_rate": 4.999851810391132e-05, "loss": 1.0663, "step": 78 }, { "epoch": 0.005812830535755346, "grad_norm": 1.0625, "learning_rate": 4.9998454374315216e-05, "loss": 1.0414, "step": 79 }, { "epoch": 0.005886410669119338, "grad_norm": 1.046875, "learning_rate": 4.9998389303099284e-05, "loss": 0.7892, "step": 80 }, { "epoch": 0.00595999080248333, "grad_norm": 1.140625, "learning_rate": 4.999832289026701e-05, "loss": 1.0337, "step": 81 }, { "epoch": 0.006033570935847321, "grad_norm": 0.8828125, "learning_rate": 4.999825513582197e-05, "loss": 0.7221, "step": 82 }, { "epoch": 0.006107151069211313, "grad_norm": 0.953125, "learning_rate": 4.999818603976779e-05, "loss": 0.9394, "step": 83 }, { "epoch": 0.006180731202575305, "grad_norm": 0.83984375, "learning_rate": 4.999811560210817e-05, "loss": 0.6104, "step": 84 }, { "epoch": 0.006254311335939297, "grad_norm": 0.8671875, "learning_rate": 4.999804382284692e-05, "loss": 0.7438, "step": 85 }, { "epoch": 0.0063278914693032885, "grad_norm": 1.5078125, "learning_rate": 4.9997970701987855e-05, "loss": 1.26, "step": 86 }, { "epoch": 0.0064014716026672795, "grad_norm": 0.83203125, "learning_rate": 4.999789623953493e-05, "loss": 0.8338, "step": 87 }, { "epoch": 0.006475051736031271, "grad_norm": 1.453125, "learning_rate": 4.9997820435492116e-05, "loss": 1.2148, "step": 88 }, { "epoch": 0.006548631869395263, "grad_norm": 1.234375, "learning_rate": 4.99977432898635e-05, "loss": 1.1692, "step": 89 }, { "epoch": 0.006622212002759255, "grad_norm": 0.94921875, "learning_rate": 4.999766480265321e-05, "loss": 0.9819, "step": 90 }, { "epoch": 0.006695792136123247, "grad_norm": 1.1484375, "learning_rate": 4.999758497386547e-05, "loss": 1.4166, "step": 91 }, { "epoch": 0.006769372269487238, "grad_norm": 0.9921875, "learning_rate": 4.999750380350456e-05, "loss": 0.7442, "step": 92 }, { "epoch": 0.00684295240285123, "grad_norm": 1.015625, "learning_rate": 4.999742129157483e-05, "loss": 1.0275, "step": 93 }, { "epoch": 0.006916532536215222, "grad_norm": 1.0859375, "learning_rate": 4.999733743808071e-05, "loss": 0.8956, "step": 94 }, { "epoch": 0.006990112669579214, "grad_norm": 1.015625, "learning_rate": 4.999725224302671e-05, "loss": 0.8214, "step": 95 }, { "epoch": 0.007063692802943206, "grad_norm": 1.0390625, "learning_rate": 4.9997165706417395e-05, "loss": 1.1712, "step": 96 }, { "epoch": 0.007137272936307197, "grad_norm": 0.96484375, "learning_rate": 4.99970778282574e-05, "loss": 0.8904, "step": 97 }, { "epoch": 0.007210853069671189, "grad_norm": 1.015625, "learning_rate": 4.9996988608551454e-05, "loss": 1.0042, "step": 98 }, { "epoch": 0.0072844332030351806, "grad_norm": 0.85546875, "learning_rate": 4.999689804730435e-05, "loss": 0.8162, "step": 99 }, { "epoch": 0.0073580133363991725, "grad_norm": 1.0703125, "learning_rate": 4.9996806144520936e-05, "loss": 0.8645, "step": 100 }, { "epoch": 0.007431593469763164, "grad_norm": 0.90234375, "learning_rate": 4.999671290020615e-05, "loss": 1.0243, "step": 101 }, { "epoch": 0.007505173603127155, "grad_norm": 0.859375, "learning_rate": 4.999661831436499e-05, "loss": 0.9468, "step": 102 }, { "epoch": 0.007578753736491147, "grad_norm": 1.0390625, "learning_rate": 4.999652238700253e-05, "loss": 1.154, "step": 103 }, { "epoch": 0.007652333869855139, "grad_norm": 1.296875, "learning_rate": 4.999642511812394e-05, "loss": 1.4333, "step": 104 }, { "epoch": 0.007725914003219131, "grad_norm": 0.9140625, "learning_rate": 4.999632650773442e-05, "loss": 0.7866, "step": 105 }, { "epoch": 0.007799494136583123, "grad_norm": 0.82421875, "learning_rate": 4.999622655583927e-05, "loss": 0.7542, "step": 106 }, { "epoch": 0.007873074269947114, "grad_norm": 1.0625, "learning_rate": 4.999612526244385e-05, "loss": 1.2052, "step": 107 }, { "epoch": 0.007946654403311106, "grad_norm": 1.2109375, "learning_rate": 4.99960226275536e-05, "loss": 1.369, "step": 108 }, { "epoch": 0.008020234536675098, "grad_norm": 0.859375, "learning_rate": 4.9995918651174016e-05, "loss": 0.9614, "step": 109 }, { "epoch": 0.00809381467003909, "grad_norm": 0.99609375, "learning_rate": 4.99958133333107e-05, "loss": 1.0933, "step": 110 }, { "epoch": 0.008167394803403082, "grad_norm": 0.9921875, "learning_rate": 4.999570667396929e-05, "loss": 0.9463, "step": 111 }, { "epoch": 0.008240974936767074, "grad_norm": 0.953125, "learning_rate": 4.999559867315551e-05, "loss": 1.0003, "step": 112 }, { "epoch": 0.008314555070131065, "grad_norm": 1.59375, "learning_rate": 4.999548933087516e-05, "loss": 0.7679, "step": 113 }, { "epoch": 0.008388135203495056, "grad_norm": 1.34375, "learning_rate": 4.9995378647134106e-05, "loss": 1.2124, "step": 114 }, { "epoch": 0.008461715336859047, "grad_norm": 1.0234375, "learning_rate": 4.99952666219383e-05, "loss": 0.9484, "step": 115 }, { "epoch": 0.00853529547022304, "grad_norm": 1.0546875, "learning_rate": 4.999515325529373e-05, "loss": 0.8705, "step": 116 }, { "epoch": 0.008608875603587031, "grad_norm": 1.0859375, "learning_rate": 4.99950385472065e-05, "loss": 1.4054, "step": 117 }, { "epoch": 0.008682455736951023, "grad_norm": 0.765625, "learning_rate": 4.999492249768276e-05, "loss": 0.688, "step": 118 }, { "epoch": 0.008756035870315015, "grad_norm": 1.1484375, "learning_rate": 4.999480510672874e-05, "loss": 1.208, "step": 119 }, { "epoch": 0.008829616003679007, "grad_norm": 1.1015625, "learning_rate": 4.9994686374350744e-05, "loss": 0.8497, "step": 120 }, { "epoch": 0.008903196137042999, "grad_norm": 1.046875, "learning_rate": 4.9994566300555124e-05, "loss": 0.9617, "step": 121 }, { "epoch": 0.00897677627040699, "grad_norm": 1.1171875, "learning_rate": 4.9994444885348344e-05, "loss": 1.2366, "step": 122 }, { "epoch": 0.009050356403770983, "grad_norm": 1.0234375, "learning_rate": 4.999432212873692e-05, "loss": 0.7457, "step": 123 }, { "epoch": 0.009123936537134973, "grad_norm": 1.1171875, "learning_rate": 4.999419803072743e-05, "loss": 1.1876, "step": 124 }, { "epoch": 0.009197516670498965, "grad_norm": 1.0078125, "learning_rate": 4.999407259132655e-05, "loss": 0.9813, "step": 125 }, { "epoch": 0.009271096803862957, "grad_norm": 0.8203125, "learning_rate": 4.9993945810540985e-05, "loss": 0.7469, "step": 126 }, { "epoch": 0.009344676937226949, "grad_norm": 1.171875, "learning_rate": 4.9993817688377566e-05, "loss": 1.1302, "step": 127 }, { "epoch": 0.00941825707059094, "grad_norm": 1.0859375, "learning_rate": 4.999368822484315e-05, "loss": 0.9609, "step": 128 }, { "epoch": 0.009491837203954932, "grad_norm": 1.3125, "learning_rate": 4.9993557419944696e-05, "loss": 1.1844, "step": 129 }, { "epoch": 0.009565417337318924, "grad_norm": 0.8984375, "learning_rate": 4.999342527368922e-05, "loss": 0.8395, "step": 130 }, { "epoch": 0.009638997470682916, "grad_norm": 1.0625, "learning_rate": 4.999329178608382e-05, "loss": 1.1009, "step": 131 }, { "epoch": 0.009712577604046908, "grad_norm": 1.3125, "learning_rate": 4.999315695713566e-05, "loss": 1.1639, "step": 132 }, { "epoch": 0.0097861577374109, "grad_norm": 1.1328125, "learning_rate": 4.999302078685196e-05, "loss": 1.095, "step": 133 }, { "epoch": 0.00985973787077489, "grad_norm": 1.015625, "learning_rate": 4.999288327524004e-05, "loss": 1.2376, "step": 134 }, { "epoch": 0.009933318004138882, "grad_norm": 0.85546875, "learning_rate": 4.999274442230729e-05, "loss": 0.8503, "step": 135 }, { "epoch": 0.010006898137502874, "grad_norm": 1.0234375, "learning_rate": 4.9992604228061145e-05, "loss": 1.1394, "step": 136 }, { "epoch": 0.010080478270866866, "grad_norm": 1.1953125, "learning_rate": 4.999246269250914e-05, "loss": 0.8703, "step": 137 }, { "epoch": 0.010154058404230858, "grad_norm": 1.0703125, "learning_rate": 4.999231981565886e-05, "loss": 1.0047, "step": 138 }, { "epoch": 0.01022763853759485, "grad_norm": 1.234375, "learning_rate": 4.999217559751799e-05, "loss": 1.3795, "step": 139 }, { "epoch": 0.010301218670958841, "grad_norm": 1.3828125, "learning_rate": 4.9992030038094243e-05, "loss": 1.2553, "step": 140 }, { "epoch": 0.010374798804322833, "grad_norm": 1.2265625, "learning_rate": 4.999188313739546e-05, "loss": 1.3212, "step": 141 }, { "epoch": 0.010448378937686825, "grad_norm": 1.2890625, "learning_rate": 4.999173489542951e-05, "loss": 1.5042, "step": 142 }, { "epoch": 0.010521959071050817, "grad_norm": 1.2109375, "learning_rate": 4.999158531220434e-05, "loss": 1.1514, "step": 143 }, { "epoch": 0.010595539204414807, "grad_norm": 1.0078125, "learning_rate": 4.9991434387728e-05, "loss": 0.8765, "step": 144 }, { "epoch": 0.0106691193377788, "grad_norm": 1.1171875, "learning_rate": 4.999128212200858e-05, "loss": 1.4513, "step": 145 }, { "epoch": 0.010742699471142791, "grad_norm": 0.984375, "learning_rate": 4.999112851505424e-05, "loss": 0.9521, "step": 146 }, { "epoch": 0.010816279604506783, "grad_norm": 1.03125, "learning_rate": 4.999097356687324e-05, "loss": 0.9354, "step": 147 }, { "epoch": 0.010889859737870775, "grad_norm": 1.1484375, "learning_rate": 4.999081727747389e-05, "loss": 1.1123, "step": 148 }, { "epoch": 0.010963439871234767, "grad_norm": 2.421875, "learning_rate": 4.999065964686458e-05, "loss": 0.6834, "step": 149 }, { "epoch": 0.011037020004598759, "grad_norm": 1.1015625, "learning_rate": 4.9990500675053765e-05, "loss": 1.0249, "step": 150 }, { "epoch": 0.01111060013796275, "grad_norm": 0.87890625, "learning_rate": 4.9990340362049974e-05, "loss": 0.7333, "step": 151 }, { "epoch": 0.011184180271326742, "grad_norm": 1.25, "learning_rate": 4.999017870786182e-05, "loss": 1.1691, "step": 152 }, { "epoch": 0.011257760404690733, "grad_norm": 1.0390625, "learning_rate": 4.9990015712497974e-05, "loss": 1.3583, "step": 153 }, { "epoch": 0.011331340538054725, "grad_norm": 0.88671875, "learning_rate": 4.998985137596719e-05, "loss": 1.1294, "step": 154 }, { "epoch": 0.011404920671418716, "grad_norm": 1.0703125, "learning_rate": 4.9989685698278274e-05, "loss": 1.2782, "step": 155 }, { "epoch": 0.011478500804782708, "grad_norm": 1.0234375, "learning_rate": 4.998951867944013e-05, "loss": 0.8186, "step": 156 }, { "epoch": 0.0115520809381467, "grad_norm": 0.984375, "learning_rate": 4.998935031946171e-05, "loss": 1.305, "step": 157 }, { "epoch": 0.011625661071510692, "grad_norm": 1.046875, "learning_rate": 4.998918061835207e-05, "loss": 1.0957, "step": 158 }, { "epoch": 0.011699241204874684, "grad_norm": 0.8359375, "learning_rate": 4.998900957612029e-05, "loss": 0.7775, "step": 159 }, { "epoch": 0.011772821338238676, "grad_norm": 1.109375, "learning_rate": 4.998883719277557e-05, "loss": 0.8133, "step": 160 }, { "epoch": 0.011846401471602668, "grad_norm": 0.765625, "learning_rate": 4.9988663468327156e-05, "loss": 0.6707, "step": 161 }, { "epoch": 0.01191998160496666, "grad_norm": 0.86328125, "learning_rate": 4.998848840278437e-05, "loss": 1.0147, "step": 162 }, { "epoch": 0.01199356173833065, "grad_norm": 1.546875, "learning_rate": 4.998831199615661e-05, "loss": 1.0418, "step": 163 }, { "epoch": 0.012067141871694642, "grad_norm": 1.0, "learning_rate": 4.9988134248453333e-05, "loss": 1.0659, "step": 164 }, { "epoch": 0.012140722005058634, "grad_norm": 1.2890625, "learning_rate": 4.9987955159684095e-05, "loss": 1.0881, "step": 165 }, { "epoch": 0.012214302138422626, "grad_norm": 1.09375, "learning_rate": 4.9987774729858494e-05, "loss": 0.8431, "step": 166 }, { "epoch": 0.012287882271786617, "grad_norm": 0.8671875, "learning_rate": 4.998759295898622e-05, "loss": 0.7996, "step": 167 }, { "epoch": 0.01236146240515061, "grad_norm": 1.1796875, "learning_rate": 4.9987409847077033e-05, "loss": 1.1523, "step": 168 }, { "epoch": 0.012435042538514601, "grad_norm": 0.82421875, "learning_rate": 4.9987225394140744e-05, "loss": 1.0186, "step": 169 }, { "epoch": 0.012508622671878593, "grad_norm": 1.484375, "learning_rate": 4.9987039600187266e-05, "loss": 1.361, "step": 170 }, { "epoch": 0.012582202805242585, "grad_norm": 0.8984375, "learning_rate": 4.9986852465226574e-05, "loss": 0.9546, "step": 171 }, { "epoch": 0.012655782938606577, "grad_norm": 1.1171875, "learning_rate": 4.99866639892687e-05, "loss": 1.074, "step": 172 }, { "epoch": 0.012729363071970567, "grad_norm": 0.80859375, "learning_rate": 4.998647417232375e-05, "loss": 0.7568, "step": 173 }, { "epoch": 0.012802943205334559, "grad_norm": 0.9921875, "learning_rate": 4.998628301440194e-05, "loss": 1.0731, "step": 174 }, { "epoch": 0.012876523338698551, "grad_norm": 0.95703125, "learning_rate": 4.9986090515513506e-05, "loss": 0.8271, "step": 175 }, { "epoch": 0.012950103472062543, "grad_norm": 1.15625, "learning_rate": 4.9985896675668784e-05, "loss": 1.154, "step": 176 }, { "epoch": 0.013023683605426535, "grad_norm": 1.09375, "learning_rate": 4.998570149487819e-05, "loss": 0.9304, "step": 177 }, { "epoch": 0.013097263738790527, "grad_norm": 0.8671875, "learning_rate": 4.998550497315218e-05, "loss": 1.0314, "step": 178 }, { "epoch": 0.013170843872154519, "grad_norm": 0.9375, "learning_rate": 4.99853071105013e-05, "loss": 0.7165, "step": 179 }, { "epoch": 0.01324442400551851, "grad_norm": 1.03125, "learning_rate": 4.998510790693619e-05, "loss": 0.7941, "step": 180 }, { "epoch": 0.013318004138882502, "grad_norm": 0.92578125, "learning_rate": 4.9984907362467525e-05, "loss": 0.9811, "step": 181 }, { "epoch": 0.013391584272246494, "grad_norm": 1.2421875, "learning_rate": 4.9984705477106076e-05, "loss": 0.9994, "step": 182 }, { "epoch": 0.013465164405610484, "grad_norm": 1.1171875, "learning_rate": 4.9984502250862666e-05, "loss": 0.9321, "step": 183 }, { "epoch": 0.013538744538974476, "grad_norm": 0.984375, "learning_rate": 4.998429768374822e-05, "loss": 0.8844, "step": 184 }, { "epoch": 0.013612324672338468, "grad_norm": 0.9453125, "learning_rate": 4.998409177577369e-05, "loss": 1.1324, "step": 185 }, { "epoch": 0.01368590480570246, "grad_norm": 1.0234375, "learning_rate": 4.998388452695015e-05, "loss": 0.7917, "step": 186 }, { "epoch": 0.013759484939066452, "grad_norm": 1.0078125, "learning_rate": 4.998367593728872e-05, "loss": 0.8042, "step": 187 }, { "epoch": 0.013833065072430444, "grad_norm": 0.91015625, "learning_rate": 4.998346600680059e-05, "loss": 1.1884, "step": 188 }, { "epoch": 0.013906645205794436, "grad_norm": 1.328125, "learning_rate": 4.998325473549702e-05, "loss": 1.6683, "step": 189 }, { "epoch": 0.013980225339158428, "grad_norm": 1.1171875, "learning_rate": 4.998304212338936e-05, "loss": 1.213, "step": 190 }, { "epoch": 0.01405380547252242, "grad_norm": 0.84765625, "learning_rate": 4.998282817048902e-05, "loss": 0.8588, "step": 191 }, { "epoch": 0.014127385605886411, "grad_norm": 1.09375, "learning_rate": 4.998261287680747e-05, "loss": 1.2776, "step": 192 }, { "epoch": 0.014200965739250402, "grad_norm": 0.875, "learning_rate": 4.9982396242356265e-05, "loss": 0.7973, "step": 193 }, { "epoch": 0.014274545872614394, "grad_norm": 1.03125, "learning_rate": 4.998217826714705e-05, "loss": 0.9769, "step": 194 }, { "epoch": 0.014348126005978385, "grad_norm": 1.1484375, "learning_rate": 4.9981958951191507e-05, "loss": 1.0079, "step": 195 }, { "epoch": 0.014421706139342377, "grad_norm": 0.96875, "learning_rate": 4.9981738294501416e-05, "loss": 1.121, "step": 196 }, { "epoch": 0.01449528627270637, "grad_norm": 1.171875, "learning_rate": 4.998151629708861e-05, "loss": 1.2495, "step": 197 }, { "epoch": 0.014568866406070361, "grad_norm": 0.9609375, "learning_rate": 4.998129295896502e-05, "loss": 0.8233, "step": 198 }, { "epoch": 0.014642446539434353, "grad_norm": 1.0546875, "learning_rate": 4.998106828014261e-05, "loss": 1.0031, "step": 199 }, { "epoch": 0.014716026672798345, "grad_norm": 0.84765625, "learning_rate": 4.998084226063344e-05, "loss": 0.7791, "step": 200 }, { "epoch": 0.014789606806162337, "grad_norm": 0.89453125, "learning_rate": 4.998061490044966e-05, "loss": 0.8824, "step": 201 }, { "epoch": 0.014863186939526329, "grad_norm": 1.09375, "learning_rate": 4.998038619960346e-05, "loss": 1.125, "step": 202 }, { "epoch": 0.014936767072890319, "grad_norm": 0.96484375, "learning_rate": 4.998015615810711e-05, "loss": 0.828, "step": 203 }, { "epoch": 0.01501034720625431, "grad_norm": 0.953125, "learning_rate": 4.997992477597295e-05, "loss": 0.9252, "step": 204 }, { "epoch": 0.015083927339618303, "grad_norm": 1.15625, "learning_rate": 4.9979692053213425e-05, "loss": 1.188, "step": 205 }, { "epoch": 0.015157507472982295, "grad_norm": 1.078125, "learning_rate": 4.9979457989841e-05, "loss": 0.9181, "step": 206 }, { "epoch": 0.015231087606346286, "grad_norm": 0.90234375, "learning_rate": 4.9979222585868245e-05, "loss": 0.6557, "step": 207 }, { "epoch": 0.015304667739710278, "grad_norm": 1.2265625, "learning_rate": 4.997898584130779e-05, "loss": 1.1323, "step": 208 }, { "epoch": 0.01537824787307427, "grad_norm": 0.90234375, "learning_rate": 4.9978747756172345e-05, "loss": 0.9684, "step": 209 }, { "epoch": 0.015451828006438262, "grad_norm": 1.3828125, "learning_rate": 4.9978508330474686e-05, "loss": 0.9946, "step": 210 }, { "epoch": 0.015525408139802254, "grad_norm": 0.96875, "learning_rate": 4.9978267564227666e-05, "loss": 0.995, "step": 211 }, { "epoch": 0.015598988273166246, "grad_norm": 0.93359375, "learning_rate": 4.99780254574442e-05, "loss": 0.756, "step": 212 }, { "epoch": 0.015672568406530238, "grad_norm": 0.8828125, "learning_rate": 4.997778201013729e-05, "loss": 0.9518, "step": 213 }, { "epoch": 0.015746148539894228, "grad_norm": 1.03125, "learning_rate": 4.997753722231998e-05, "loss": 1.4175, "step": 214 }, { "epoch": 0.01581972867325822, "grad_norm": 0.95703125, "learning_rate": 4.997729109400544e-05, "loss": 0.6918, "step": 215 }, { "epoch": 0.015893308806622212, "grad_norm": 1.265625, "learning_rate": 4.9977043625206853e-05, "loss": 1.2282, "step": 216 }, { "epoch": 0.015966888939986205, "grad_norm": 0.83984375, "learning_rate": 4.997679481593751e-05, "loss": 0.6038, "step": 217 }, { "epoch": 0.016040469073350196, "grad_norm": 1.265625, "learning_rate": 4.9976544666210765e-05, "loss": 1.4195, "step": 218 }, { "epoch": 0.016114049206714186, "grad_norm": 1.0390625, "learning_rate": 4.997629317604005e-05, "loss": 1.161, "step": 219 }, { "epoch": 0.01618762934007818, "grad_norm": 1.234375, "learning_rate": 4.997604034543885e-05, "loss": 1.6678, "step": 220 }, { "epoch": 0.01626120947344217, "grad_norm": 0.9765625, "learning_rate": 4.997578617442073e-05, "loss": 1.1499, "step": 221 }, { "epoch": 0.016334789606806163, "grad_norm": 1.0234375, "learning_rate": 4.9975530662999344e-05, "loss": 0.8027, "step": 222 }, { "epoch": 0.016408369740170153, "grad_norm": 1.140625, "learning_rate": 4.997527381118839e-05, "loss": 1.3696, "step": 223 }, { "epoch": 0.016481949873534147, "grad_norm": 1.203125, "learning_rate": 4.997501561900167e-05, "loss": 1.175, "step": 224 }, { "epoch": 0.016555530006898137, "grad_norm": 0.890625, "learning_rate": 4.997475608645304e-05, "loss": 1.0295, "step": 225 }, { "epoch": 0.01662911014026213, "grad_norm": 0.85546875, "learning_rate": 4.9974495213556414e-05, "loss": 0.6764, "step": 226 }, { "epoch": 0.01670269027362612, "grad_norm": 1.1015625, "learning_rate": 4.9974233000325806e-05, "loss": 0.9065, "step": 227 }, { "epoch": 0.01677627040699011, "grad_norm": 1.125, "learning_rate": 4.9973969446775275e-05, "loss": 1.1367, "step": 228 }, { "epoch": 0.016849850540354105, "grad_norm": 1.15625, "learning_rate": 4.9973704552918974e-05, "loss": 1.4926, "step": 229 }, { "epoch": 0.016923430673718095, "grad_norm": 1.3359375, "learning_rate": 4.997343831877112e-05, "loss": 1.0596, "step": 230 }, { "epoch": 0.01699701080708209, "grad_norm": 1.3203125, "learning_rate": 4.9973170744346e-05, "loss": 1.6111, "step": 231 }, { "epoch": 0.01707059094044608, "grad_norm": 0.96875, "learning_rate": 4.997290182965797e-05, "loss": 0.8489, "step": 232 }, { "epoch": 0.017144171073810072, "grad_norm": 0.8359375, "learning_rate": 4.997263157472147e-05, "loss": 0.8698, "step": 233 }, { "epoch": 0.017217751207174063, "grad_norm": 1.2265625, "learning_rate": 4.9972359979551e-05, "loss": 1.535, "step": 234 }, { "epoch": 0.017291331340538056, "grad_norm": 1.140625, "learning_rate": 4.997208704416113e-05, "loss": 0.9235, "step": 235 }, { "epoch": 0.017364911473902046, "grad_norm": 0.890625, "learning_rate": 4.9971812768566527e-05, "loss": 0.9751, "step": 236 }, { "epoch": 0.017438491607266036, "grad_norm": 1.03125, "learning_rate": 4.997153715278189e-05, "loss": 1.4104, "step": 237 }, { "epoch": 0.01751207174063003, "grad_norm": 1.3828125, "learning_rate": 4.9971260196822015e-05, "loss": 1.7236, "step": 238 }, { "epoch": 0.01758565187399402, "grad_norm": 1.0703125, "learning_rate": 4.9970981900701776e-05, "loss": 1.0166, "step": 239 }, { "epoch": 0.017659232007358014, "grad_norm": 1.1015625, "learning_rate": 4.9970702264436095e-05, "loss": 0.9333, "step": 240 }, { "epoch": 0.017732812140722004, "grad_norm": 1.140625, "learning_rate": 4.9970421288039994e-05, "loss": 1.1386, "step": 241 }, { "epoch": 0.017806392274085998, "grad_norm": 1.1875, "learning_rate": 4.997013897152854e-05, "loss": 1.3691, "step": 242 }, { "epoch": 0.017879972407449988, "grad_norm": 0.9609375, "learning_rate": 4.996985531491688e-05, "loss": 0.8539, "step": 243 }, { "epoch": 0.01795355254081398, "grad_norm": 0.890625, "learning_rate": 4.996957031822026e-05, "loss": 0.7394, "step": 244 }, { "epoch": 0.01802713267417797, "grad_norm": 1.0078125, "learning_rate": 4.996928398145396e-05, "loss": 0.9116, "step": 245 }, { "epoch": 0.018100712807541965, "grad_norm": 1.109375, "learning_rate": 4.996899630463335e-05, "loss": 1.235, "step": 246 }, { "epoch": 0.018174292940905955, "grad_norm": 0.99609375, "learning_rate": 4.996870728777386e-05, "loss": 0.906, "step": 247 }, { "epoch": 0.018247873074269946, "grad_norm": 0.984375, "learning_rate": 4.9968416930891016e-05, "loss": 0.5867, "step": 248 }, { "epoch": 0.01832145320763394, "grad_norm": 1.375, "learning_rate": 4.9968125234000396e-05, "loss": 0.7491, "step": 249 }, { "epoch": 0.01839503334099793, "grad_norm": 1.375, "learning_rate": 4.9967832197117645e-05, "loss": 1.0552, "step": 250 }, { "epoch": 0.018468613474361923, "grad_norm": 0.87890625, "learning_rate": 4.99675378202585e-05, "loss": 1.0264, "step": 251 }, { "epoch": 0.018542193607725913, "grad_norm": 1.2734375, "learning_rate": 4.996724210343876e-05, "loss": 1.0846, "step": 252 }, { "epoch": 0.018615773741089907, "grad_norm": 1.0546875, "learning_rate": 4.9966945046674294e-05, "loss": 1.0058, "step": 253 }, { "epoch": 0.018689353874453897, "grad_norm": 1.28125, "learning_rate": 4.996664664998104e-05, "loss": 1.09, "step": 254 }, { "epoch": 0.01876293400781789, "grad_norm": 0.97265625, "learning_rate": 4.9966346913375016e-05, "loss": 0.8106, "step": 255 }, { "epoch": 0.01883651414118188, "grad_norm": 1.03125, "learning_rate": 4.996604583687231e-05, "loss": 1.338, "step": 256 }, { "epoch": 0.01891009427454587, "grad_norm": 0.88671875, "learning_rate": 4.9965743420489076e-05, "loss": 0.845, "step": 257 }, { "epoch": 0.018983674407909865, "grad_norm": 1.15625, "learning_rate": 4.996543966424155e-05, "loss": 1.1945, "step": 258 }, { "epoch": 0.019057254541273855, "grad_norm": 0.91796875, "learning_rate": 4.996513456814602e-05, "loss": 0.8249, "step": 259 }, { "epoch": 0.01913083467463785, "grad_norm": 1.171875, "learning_rate": 4.996482813221888e-05, "loss": 1.2986, "step": 260 }, { "epoch": 0.01920441480800184, "grad_norm": 0.9296875, "learning_rate": 4.996452035647656e-05, "loss": 0.6122, "step": 261 }, { "epoch": 0.019277994941365832, "grad_norm": 0.984375, "learning_rate": 4.996421124093559e-05, "loss": 0.8912, "step": 262 }, { "epoch": 0.019351575074729822, "grad_norm": 0.9765625, "learning_rate": 4.9963900785612546e-05, "loss": 1.1519, "step": 263 }, { "epoch": 0.019425155208093816, "grad_norm": 1.046875, "learning_rate": 4.99635889905241e-05, "loss": 0.8285, "step": 264 }, { "epoch": 0.019498735341457806, "grad_norm": 0.90234375, "learning_rate": 4.996327585568699e-05, "loss": 1.0632, "step": 265 }, { "epoch": 0.0195723154748218, "grad_norm": 0.90234375, "learning_rate": 4.9962961381118005e-05, "loss": 1.0233, "step": 266 }, { "epoch": 0.01964589560818579, "grad_norm": 0.953125, "learning_rate": 4.996264556683403e-05, "loss": 0.9842, "step": 267 }, { "epoch": 0.01971947574154978, "grad_norm": 1.171875, "learning_rate": 4.996232841285202e-05, "loss": 1.4804, "step": 268 }, { "epoch": 0.019793055874913774, "grad_norm": 1.296875, "learning_rate": 4.996200991918899e-05, "loss": 1.5742, "step": 269 }, { "epoch": 0.019866636008277764, "grad_norm": 0.91796875, "learning_rate": 4.9961690085862035e-05, "loss": 0.7653, "step": 270 }, { "epoch": 0.019940216141641758, "grad_norm": 1.0234375, "learning_rate": 4.996136891288832e-05, "loss": 0.8671, "step": 271 }, { "epoch": 0.020013796275005748, "grad_norm": 0.94921875, "learning_rate": 4.9961046400285075e-05, "loss": 1.0324, "step": 272 }, { "epoch": 0.02008737640836974, "grad_norm": 1.0625, "learning_rate": 4.9960722548069624e-05, "loss": 0.728, "step": 273 }, { "epoch": 0.02016095654173373, "grad_norm": 0.8671875, "learning_rate": 4.996039735625932e-05, "loss": 0.7373, "step": 274 }, { "epoch": 0.020234536675097725, "grad_norm": 0.98828125, "learning_rate": 4.996007082487165e-05, "loss": 0.9377, "step": 275 }, { "epoch": 0.020308116808461715, "grad_norm": 1.1484375, "learning_rate": 4.9959742953924125e-05, "loss": 1.3766, "step": 276 }, { "epoch": 0.020381696941825705, "grad_norm": 1.0, "learning_rate": 4.995941374343432e-05, "loss": 0.9286, "step": 277 }, { "epoch": 0.0204552770751897, "grad_norm": 1.140625, "learning_rate": 4.9959083193419934e-05, "loss": 1.1085, "step": 278 }, { "epoch": 0.02052885720855369, "grad_norm": 0.859375, "learning_rate": 4.995875130389869e-05, "loss": 0.7682, "step": 279 }, { "epoch": 0.020602437341917683, "grad_norm": 1.03125, "learning_rate": 4.995841807488841e-05, "loss": 0.9412, "step": 280 }, { "epoch": 0.020676017475281673, "grad_norm": 0.9765625, "learning_rate": 4.995808350640697e-05, "loss": 1.086, "step": 281 }, { "epoch": 0.020749597608645667, "grad_norm": 0.98046875, "learning_rate": 4.995774759847232e-05, "loss": 1.0259, "step": 282 }, { "epoch": 0.020823177742009657, "grad_norm": 1.2890625, "learning_rate": 4.995741035110249e-05, "loss": 1.4607, "step": 283 }, { "epoch": 0.02089675787537365, "grad_norm": 0.9375, "learning_rate": 4.9957071764315596e-05, "loss": 0.8889, "step": 284 }, { "epoch": 0.02097033800873764, "grad_norm": 1.0625, "learning_rate": 4.99567318381298e-05, "loss": 1.0494, "step": 285 }, { "epoch": 0.021043918142101634, "grad_norm": 1.1171875, "learning_rate": 4.995639057256334e-05, "loss": 1.1224, "step": 286 }, { "epoch": 0.021117498275465624, "grad_norm": 1.1015625, "learning_rate": 4.995604796763453e-05, "loss": 1.0876, "step": 287 }, { "epoch": 0.021191078408829615, "grad_norm": 0.9921875, "learning_rate": 4.995570402336176e-05, "loss": 0.8649, "step": 288 }, { "epoch": 0.021264658542193608, "grad_norm": 0.796875, "learning_rate": 4.995535873976349e-05, "loss": 0.5695, "step": 289 }, { "epoch": 0.0213382386755576, "grad_norm": 0.94921875, "learning_rate": 4.9955012116858256e-05, "loss": 0.8223, "step": 290 }, { "epoch": 0.021411818808921592, "grad_norm": 1.125, "learning_rate": 4.995466415466465e-05, "loss": 1.0022, "step": 291 }, { "epoch": 0.021485398942285582, "grad_norm": 1.0390625, "learning_rate": 4.9954314853201355e-05, "loss": 0.894, "step": 292 }, { "epoch": 0.021558979075649576, "grad_norm": 0.93359375, "learning_rate": 4.995396421248712e-05, "loss": 0.9251, "step": 293 }, { "epoch": 0.021632559209013566, "grad_norm": 0.796875, "learning_rate": 4.9953612232540734e-05, "loss": 0.7351, "step": 294 }, { "epoch": 0.02170613934237756, "grad_norm": 1.015625, "learning_rate": 4.9953258913381126e-05, "loss": 1.2922, "step": 295 }, { "epoch": 0.02177971947574155, "grad_norm": 0.68359375, "learning_rate": 4.9952904255027246e-05, "loss": 0.5528, "step": 296 }, { "epoch": 0.02185329960910554, "grad_norm": 1.1328125, "learning_rate": 4.995254825749812e-05, "loss": 0.9348, "step": 297 }, { "epoch": 0.021926879742469534, "grad_norm": 1.09375, "learning_rate": 4.9952190920812856e-05, "loss": 0.7843, "step": 298 }, { "epoch": 0.022000459875833524, "grad_norm": 1.015625, "learning_rate": 4.995183224499064e-05, "loss": 1.1061, "step": 299 }, { "epoch": 0.022074040009197517, "grad_norm": 0.97265625, "learning_rate": 4.99514722300507e-05, "loss": 0.8966, "step": 300 }, { "epoch": 0.022147620142561508, "grad_norm": 0.96484375, "learning_rate": 4.995111087601239e-05, "loss": 0.7591, "step": 301 }, { "epoch": 0.0222212002759255, "grad_norm": 0.9375, "learning_rate": 4.995074818289507e-05, "loss": 0.8156, "step": 302 }, { "epoch": 0.02229478040928949, "grad_norm": 1.03125, "learning_rate": 4.995038415071823e-05, "loss": 0.7609, "step": 303 }, { "epoch": 0.022368360542653485, "grad_norm": 1.5234375, "learning_rate": 4.995001877950139e-05, "loss": 1.1763, "step": 304 }, { "epoch": 0.022441940676017475, "grad_norm": 0.859375, "learning_rate": 4.994965206926417e-05, "loss": 1.0027, "step": 305 }, { "epoch": 0.022515520809381465, "grad_norm": 1.1953125, "learning_rate": 4.994928402002625e-05, "loss": 1.2505, "step": 306 }, { "epoch": 0.02258910094274546, "grad_norm": 1.03125, "learning_rate": 4.994891463180737e-05, "loss": 1.0475, "step": 307 }, { "epoch": 0.02266268107610945, "grad_norm": 1.0625, "learning_rate": 4.994854390462737e-05, "loss": 1.2729, "step": 308 }, { "epoch": 0.022736261209473443, "grad_norm": 0.8828125, "learning_rate": 4.994817183850614e-05, "loss": 0.6231, "step": 309 }, { "epoch": 0.022809841342837433, "grad_norm": 1.2578125, "learning_rate": 4.994779843346365e-05, "loss": 1.3261, "step": 310 }, { "epoch": 0.022883421476201427, "grad_norm": 0.98046875, "learning_rate": 4.994742368951993e-05, "loss": 1.1003, "step": 311 }, { "epoch": 0.022957001609565417, "grad_norm": 1.1640625, "learning_rate": 4.9947047606695104e-05, "loss": 1.0592, "step": 312 }, { "epoch": 0.02303058174292941, "grad_norm": 0.95703125, "learning_rate": 4.994667018500935e-05, "loss": 0.8715, "step": 313 }, { "epoch": 0.0231041618762934, "grad_norm": 1.453125, "learning_rate": 4.9946291424482927e-05, "loss": 1.2742, "step": 314 }, { "epoch": 0.023177742009657394, "grad_norm": 1.0078125, "learning_rate": 4.994591132513616e-05, "loss": 1.0977, "step": 315 }, { "epoch": 0.023251322143021384, "grad_norm": 0.8515625, "learning_rate": 4.994552988698945e-05, "loss": 0.8418, "step": 316 }, { "epoch": 0.023324902276385374, "grad_norm": 1.0390625, "learning_rate": 4.9945147110063264e-05, "loss": 1.0759, "step": 317 }, { "epoch": 0.023398482409749368, "grad_norm": 0.7734375, "learning_rate": 4.994476299437814e-05, "loss": 0.8482, "step": 318 }, { "epoch": 0.023472062543113358, "grad_norm": 0.91796875, "learning_rate": 4.994437753995471e-05, "loss": 0.8143, "step": 319 }, { "epoch": 0.023545642676477352, "grad_norm": 1.0234375, "learning_rate": 4.994399074681364e-05, "loss": 0.8498, "step": 320 }, { "epoch": 0.023619222809841342, "grad_norm": 1.109375, "learning_rate": 4.9943602614975714e-05, "loss": 0.9454, "step": 321 }, { "epoch": 0.023692802943205336, "grad_norm": 1.015625, "learning_rate": 4.994321314446174e-05, "loss": 0.7395, "step": 322 }, { "epoch": 0.023766383076569326, "grad_norm": 0.94921875, "learning_rate": 4.9942822335292624e-05, "loss": 0.9949, "step": 323 }, { "epoch": 0.02383996320993332, "grad_norm": 0.80859375, "learning_rate": 4.9942430187489354e-05, "loss": 0.7036, "step": 324 }, { "epoch": 0.02391354334329731, "grad_norm": 1.1484375, "learning_rate": 4.994203670107295e-05, "loss": 1.0528, "step": 325 }, { "epoch": 0.0239871234766613, "grad_norm": 0.96484375, "learning_rate": 4.994164187606456e-05, "loss": 0.9755, "step": 326 }, { "epoch": 0.024060703610025293, "grad_norm": 1.109375, "learning_rate": 4.9941245712485354e-05, "loss": 0.9171, "step": 327 }, { "epoch": 0.024134283743389284, "grad_norm": 1.0390625, "learning_rate": 4.994084821035659e-05, "loss": 1.0351, "step": 328 }, { "epoch": 0.024207863876753277, "grad_norm": 0.953125, "learning_rate": 4.994044936969961e-05, "loss": 0.8692, "step": 329 }, { "epoch": 0.024281444010117267, "grad_norm": 1.15625, "learning_rate": 4.9940049190535833e-05, "loss": 1.2068, "step": 330 }, { "epoch": 0.02435502414348126, "grad_norm": 0.80078125, "learning_rate": 4.9939647672886714e-05, "loss": 0.896, "step": 331 }, { "epoch": 0.02442860427684525, "grad_norm": 1.3046875, "learning_rate": 4.99392448167738e-05, "loss": 1.3046, "step": 332 }, { "epoch": 0.024502184410209245, "grad_norm": 0.875, "learning_rate": 4.993884062221873e-05, "loss": 0.6141, "step": 333 }, { "epoch": 0.024575764543573235, "grad_norm": 0.79296875, "learning_rate": 4.9938435089243187e-05, "loss": 0.8297, "step": 334 }, { "epoch": 0.02464934467693723, "grad_norm": 1.15625, "learning_rate": 4.993802821786893e-05, "loss": 1.2474, "step": 335 }, { "epoch": 0.02472292481030122, "grad_norm": 1.171875, "learning_rate": 4.99376200081178e-05, "loss": 1.4081, "step": 336 }, { "epoch": 0.02479650494366521, "grad_norm": 0.8671875, "learning_rate": 4.99372104600117e-05, "loss": 0.8162, "step": 337 }, { "epoch": 0.024870085077029203, "grad_norm": 1.2109375, "learning_rate": 4.9936799573572626e-05, "loss": 0.7549, "step": 338 }, { "epoch": 0.024943665210393193, "grad_norm": 1.21875, "learning_rate": 4.9936387348822604e-05, "loss": 0.8515, "step": 339 }, { "epoch": 0.025017245343757186, "grad_norm": 0.7734375, "learning_rate": 4.993597378578378e-05, "loss": 0.4967, "step": 340 }, { "epoch": 0.025090825477121177, "grad_norm": 0.953125, "learning_rate": 4.993555888447834e-05, "loss": 0.7248, "step": 341 }, { "epoch": 0.02516440561048517, "grad_norm": 0.84765625, "learning_rate": 4.9935142644928545e-05, "loss": 0.8738, "step": 342 }, { "epoch": 0.02523798574384916, "grad_norm": 1.1953125, "learning_rate": 4.993472506715675e-05, "loss": 1.2474, "step": 343 }, { "epoch": 0.025311565877213154, "grad_norm": 0.9609375, "learning_rate": 4.993430615118535e-05, "loss": 0.834, "step": 344 }, { "epoch": 0.025385146010577144, "grad_norm": 1.0625, "learning_rate": 4.993388589703684e-05, "loss": 1.1228, "step": 345 }, { "epoch": 0.025458726143941134, "grad_norm": 0.96875, "learning_rate": 4.993346430473376e-05, "loss": 0.8792, "step": 346 }, { "epoch": 0.025532306277305128, "grad_norm": 0.93359375, "learning_rate": 4.993304137429874e-05, "loss": 0.9142, "step": 347 }, { "epoch": 0.025605886410669118, "grad_norm": 0.89453125, "learning_rate": 4.9932617105754486e-05, "loss": 1.0518, "step": 348 }, { "epoch": 0.02567946654403311, "grad_norm": 0.9921875, "learning_rate": 4.993219149912376e-05, "loss": 1.2483, "step": 349 }, { "epoch": 0.025753046677397102, "grad_norm": 0.90234375, "learning_rate": 4.993176455442941e-05, "loss": 1.0214, "step": 350 }, { "epoch": 0.025826626810761096, "grad_norm": 1.09375, "learning_rate": 4.993133627169435e-05, "loss": 1.2017, "step": 351 }, { "epoch": 0.025900206944125086, "grad_norm": 0.8203125, "learning_rate": 4.993090665094156e-05, "loss": 0.7452, "step": 352 }, { "epoch": 0.02597378707748908, "grad_norm": 1.0234375, "learning_rate": 4.993047569219408e-05, "loss": 1.2199, "step": 353 }, { "epoch": 0.02604736721085307, "grad_norm": 0.8984375, "learning_rate": 4.993004339547508e-05, "loss": 0.9061, "step": 354 }, { "epoch": 0.026120947344217063, "grad_norm": 1.1328125, "learning_rate": 4.992960976080772e-05, "loss": 0.9567, "step": 355 }, { "epoch": 0.026194527477581053, "grad_norm": 0.8828125, "learning_rate": 4.9929174788215296e-05, "loss": 0.8923, "step": 356 }, { "epoch": 0.026268107610945043, "grad_norm": 1.1875, "learning_rate": 4.992873847772115e-05, "loss": 0.8817, "step": 357 }, { "epoch": 0.026341687744309037, "grad_norm": 0.69921875, "learning_rate": 4.992830082934869e-05, "loss": 0.6113, "step": 358 }, { "epoch": 0.026415267877673027, "grad_norm": 1.0546875, "learning_rate": 4.9927861843121403e-05, "loss": 1.0466, "step": 359 }, { "epoch": 0.02648884801103702, "grad_norm": 1.28125, "learning_rate": 4.9927421519062864e-05, "loss": 1.1488, "step": 360 }, { "epoch": 0.02656242814440101, "grad_norm": 0.94140625, "learning_rate": 4.9926979857196686e-05, "loss": 0.8077, "step": 361 }, { "epoch": 0.026636008277765005, "grad_norm": 1.265625, "learning_rate": 4.992653685754658e-05, "loss": 1.7968, "step": 362 }, { "epoch": 0.026709588411128995, "grad_norm": 1.0, "learning_rate": 4.992609252013632e-05, "loss": 1.1667, "step": 363 }, { "epoch": 0.02678316854449299, "grad_norm": 0.88671875, "learning_rate": 4.9925646844989756e-05, "loss": 0.9725, "step": 364 }, { "epoch": 0.02685674867785698, "grad_norm": 0.9921875, "learning_rate": 4.99251998321308e-05, "loss": 1.125, "step": 365 }, { "epoch": 0.02693032881122097, "grad_norm": 1.078125, "learning_rate": 4.992475148158344e-05, "loss": 1.1913, "step": 366 }, { "epoch": 0.027003908944584962, "grad_norm": 1.0078125, "learning_rate": 4.992430179337176e-05, "loss": 1.0423, "step": 367 }, { "epoch": 0.027077489077948953, "grad_norm": 0.890625, "learning_rate": 4.9923850767519865e-05, "loss": 0.9187, "step": 368 }, { "epoch": 0.027151069211312946, "grad_norm": 1.296875, "learning_rate": 4.992339840405198e-05, "loss": 1.6005, "step": 369 }, { "epoch": 0.027224649344676936, "grad_norm": 0.83984375, "learning_rate": 4.9922944702992375e-05, "loss": 0.6701, "step": 370 }, { "epoch": 0.02729822947804093, "grad_norm": 1.078125, "learning_rate": 4.9922489664365405e-05, "loss": 1.3002, "step": 371 }, { "epoch": 0.02737180961140492, "grad_norm": 1.125, "learning_rate": 4.992203328819548e-05, "loss": 0.8571, "step": 372 }, { "epoch": 0.027445389744768914, "grad_norm": 1.0234375, "learning_rate": 4.9921575574507095e-05, "loss": 0.9139, "step": 373 }, { "epoch": 0.027518969878132904, "grad_norm": 1.0625, "learning_rate": 4.992111652332483e-05, "loss": 1.1773, "step": 374 }, { "epoch": 0.027592550011496894, "grad_norm": 1.1796875, "learning_rate": 4.99206561346733e-05, "loss": 1.0986, "step": 375 }, { "epoch": 0.027666130144860888, "grad_norm": 1.2421875, "learning_rate": 4.992019440857724e-05, "loss": 1.3481, "step": 376 }, { "epoch": 0.027739710278224878, "grad_norm": 1.1171875, "learning_rate": 4.9919731345061396e-05, "loss": 1.1635, "step": 377 }, { "epoch": 0.02781329041158887, "grad_norm": 1.0078125, "learning_rate": 4.9919266944150643e-05, "loss": 1.1497, "step": 378 }, { "epoch": 0.02788687054495286, "grad_norm": 1.0390625, "learning_rate": 4.9918801205869904e-05, "loss": 1.1652, "step": 379 }, { "epoch": 0.027960450678316855, "grad_norm": 0.84765625, "learning_rate": 4.991833413024416e-05, "loss": 0.95, "step": 380 }, { "epoch": 0.028034030811680846, "grad_norm": 0.95703125, "learning_rate": 4.99178657172985e-05, "loss": 0.8521, "step": 381 }, { "epoch": 0.02810761094504484, "grad_norm": 0.83203125, "learning_rate": 4.991739596705804e-05, "loss": 1.1388, "step": 382 }, { "epoch": 0.02818119107840883, "grad_norm": 0.85546875, "learning_rate": 4.9916924879548e-05, "loss": 0.9586, "step": 383 }, { "epoch": 0.028254771211772823, "grad_norm": 0.9453125, "learning_rate": 4.991645245479367e-05, "loss": 0.8291, "step": 384 }, { "epoch": 0.028328351345136813, "grad_norm": 1.1875, "learning_rate": 4.9915978692820395e-05, "loss": 1.2366, "step": 385 }, { "epoch": 0.028401931478500803, "grad_norm": 0.94921875, "learning_rate": 4.99155035936536e-05, "loss": 0.766, "step": 386 }, { "epoch": 0.028475511611864797, "grad_norm": 0.8984375, "learning_rate": 4.991502715731879e-05, "loss": 1.0383, "step": 387 }, { "epoch": 0.028549091745228787, "grad_norm": 0.9296875, "learning_rate": 4.991454938384153e-05, "loss": 0.8598, "step": 388 }, { "epoch": 0.02862267187859278, "grad_norm": 0.9921875, "learning_rate": 4.991407027324746e-05, "loss": 0.9006, "step": 389 }, { "epoch": 0.02869625201195677, "grad_norm": 0.85546875, "learning_rate": 4.9913589825562294e-05, "loss": 0.7649, "step": 390 }, { "epoch": 0.028769832145320764, "grad_norm": 1.1484375, "learning_rate": 4.991310804081182e-05, "loss": 1.4923, "step": 391 }, { "epoch": 0.028843412278684755, "grad_norm": 0.9921875, "learning_rate": 4.991262491902189e-05, "loss": 1.223, "step": 392 }, { "epoch": 0.02891699241204875, "grad_norm": 0.84765625, "learning_rate": 4.991214046021843e-05, "loss": 0.8991, "step": 393 }, { "epoch": 0.02899057254541274, "grad_norm": 1.046875, "learning_rate": 4.991165466442744e-05, "loss": 0.9163, "step": 394 }, { "epoch": 0.02906415267877673, "grad_norm": 0.90234375, "learning_rate": 4.9911167531675006e-05, "loss": 0.7929, "step": 395 }, { "epoch": 0.029137732812140722, "grad_norm": 0.92578125, "learning_rate": 4.9910679061987266e-05, "loss": 1.0313, "step": 396 }, { "epoch": 0.029211312945504712, "grad_norm": 1.28125, "learning_rate": 4.9910189255390414e-05, "loss": 0.9564, "step": 397 }, { "epoch": 0.029284893078868706, "grad_norm": 0.8125, "learning_rate": 4.990969811191076e-05, "loss": 0.674, "step": 398 }, { "epoch": 0.029358473212232696, "grad_norm": 0.94140625, "learning_rate": 4.990920563157466e-05, "loss": 0.7931, "step": 399 }, { "epoch": 0.02943205334559669, "grad_norm": 0.9609375, "learning_rate": 4.990871181440854e-05, "loss": 0.898, "step": 400 }, { "epoch": 0.02950563347896068, "grad_norm": 1.1171875, "learning_rate": 4.99082166604389e-05, "loss": 1.0065, "step": 401 }, { "epoch": 0.029579213612324674, "grad_norm": 1.1328125, "learning_rate": 4.9907720169692315e-05, "loss": 1.0143, "step": 402 }, { "epoch": 0.029652793745688664, "grad_norm": 0.98828125, "learning_rate": 4.990722234219544e-05, "loss": 1.067, "step": 403 }, { "epoch": 0.029726373879052657, "grad_norm": 0.87890625, "learning_rate": 4.9906723177974976e-05, "loss": 0.8685, "step": 404 }, { "epoch": 0.029799954012416648, "grad_norm": 0.97265625, "learning_rate": 4.990622267705772e-05, "loss": 0.858, "step": 405 }, { "epoch": 0.029873534145780638, "grad_norm": 1.0234375, "learning_rate": 4.990572083947054e-05, "loss": 0.9264, "step": 406 }, { "epoch": 0.02994711427914463, "grad_norm": 0.97265625, "learning_rate": 4.990521766524037e-05, "loss": 1.0102, "step": 407 }, { "epoch": 0.03002069441250862, "grad_norm": 1.203125, "learning_rate": 4.9904713154394197e-05, "loss": 1.2105, "step": 408 }, { "epoch": 0.030094274545872615, "grad_norm": 1.1796875, "learning_rate": 4.9904207306959104e-05, "loss": 1.1406, "step": 409 }, { "epoch": 0.030167854679236605, "grad_norm": 1.0703125, "learning_rate": 4.990370012296225e-05, "loss": 0.9657, "step": 410 }, { "epoch": 0.0302414348126006, "grad_norm": 1.0, "learning_rate": 4.990319160243084e-05, "loss": 1.0093, "step": 411 }, { "epoch": 0.03031501494596459, "grad_norm": 1.1640625, "learning_rate": 4.9902681745392186e-05, "loss": 0.8834, "step": 412 }, { "epoch": 0.030388595079328583, "grad_norm": 1.0234375, "learning_rate": 4.990217055187362e-05, "loss": 1.045, "step": 413 }, { "epoch": 0.030462175212692573, "grad_norm": 1.03125, "learning_rate": 4.990165802190261e-05, "loss": 1.1901, "step": 414 }, { "epoch": 0.030535755346056563, "grad_norm": 0.97265625, "learning_rate": 4.990114415550663e-05, "loss": 1.0724, "step": 415 }, { "epoch": 0.030609335479420557, "grad_norm": 1.4140625, "learning_rate": 4.990062895271329e-05, "loss": 0.7789, "step": 416 }, { "epoch": 0.030682915612784547, "grad_norm": 1.140625, "learning_rate": 4.9900112413550216e-05, "loss": 1.0514, "step": 417 }, { "epoch": 0.03075649574614854, "grad_norm": 1.1328125, "learning_rate": 4.9899594538045136e-05, "loss": 0.9318, "step": 418 }, { "epoch": 0.03083007587951253, "grad_norm": 0.86328125, "learning_rate": 4.989907532622585e-05, "loss": 0.7865, "step": 419 }, { "epoch": 0.030903656012876524, "grad_norm": 0.96484375, "learning_rate": 4.989855477812022e-05, "loss": 1.2269, "step": 420 }, { "epoch": 0.030977236146240514, "grad_norm": 1.140625, "learning_rate": 4.989803289375618e-05, "loss": 1.0722, "step": 421 }, { "epoch": 0.031050816279604508, "grad_norm": 1.0546875, "learning_rate": 4.989750967316174e-05, "loss": 0.9473, "step": 422 }, { "epoch": 0.0311243964129685, "grad_norm": 1.1328125, "learning_rate": 4.989698511636498e-05, "loss": 1.2507, "step": 423 }, { "epoch": 0.031197976546332492, "grad_norm": 1.15625, "learning_rate": 4.989645922339406e-05, "loss": 0.9462, "step": 424 }, { "epoch": 0.03127155667969648, "grad_norm": 1.03125, "learning_rate": 4.9895931994277187e-05, "loss": 1.3416, "step": 425 }, { "epoch": 0.031345136813060476, "grad_norm": 0.79296875, "learning_rate": 4.989540342904267e-05, "loss": 0.8029, "step": 426 }, { "epoch": 0.031418716946424466, "grad_norm": 0.8984375, "learning_rate": 4.989487352771887e-05, "loss": 1.3004, "step": 427 }, { "epoch": 0.031492297079788456, "grad_norm": 0.8359375, "learning_rate": 4.9894342290334227e-05, "loss": 0.7786, "step": 428 }, { "epoch": 0.031565877213152446, "grad_norm": 0.88671875, "learning_rate": 4.989380971691725e-05, "loss": 0.834, "step": 429 }, { "epoch": 0.03163945734651644, "grad_norm": 0.76953125, "learning_rate": 4.989327580749653e-05, "loss": 0.6176, "step": 430 }, { "epoch": 0.031713037479880433, "grad_norm": 1.0703125, "learning_rate": 4.989274056210071e-05, "loss": 1.259, "step": 431 }, { "epoch": 0.031786617613244424, "grad_norm": 1.03125, "learning_rate": 4.989220398075852e-05, "loss": 0.9209, "step": 432 }, { "epoch": 0.031860197746608414, "grad_norm": 1.0859375, "learning_rate": 4.9891666063498756e-05, "loss": 0.8342, "step": 433 }, { "epoch": 0.03193377787997241, "grad_norm": 0.87109375, "learning_rate": 4.989112681035029e-05, "loss": 0.747, "step": 434 }, { "epoch": 0.0320073580133364, "grad_norm": 0.94140625, "learning_rate": 4.9890586221342064e-05, "loss": 1.0977, "step": 435 }, { "epoch": 0.03208093814670039, "grad_norm": 0.890625, "learning_rate": 4.989004429650308e-05, "loss": 0.8333, "step": 436 }, { "epoch": 0.03215451828006438, "grad_norm": 1.1484375, "learning_rate": 4.9889501035862426e-05, "loss": 1.3032, "step": 437 }, { "epoch": 0.03222809841342837, "grad_norm": 0.80859375, "learning_rate": 4.9888956439449274e-05, "loss": 0.6674, "step": 438 }, { "epoch": 0.03230167854679237, "grad_norm": 0.96484375, "learning_rate": 4.9888410507292825e-05, "loss": 0.7147, "step": 439 }, { "epoch": 0.03237525868015636, "grad_norm": 0.89453125, "learning_rate": 4.988786323942241e-05, "loss": 0.9657, "step": 440 }, { "epoch": 0.03244883881352035, "grad_norm": 1.0625, "learning_rate": 4.988731463586737e-05, "loss": 1.1865, "step": 441 }, { "epoch": 0.03252241894688434, "grad_norm": 0.96484375, "learning_rate": 4.988676469665715e-05, "loss": 0.7875, "step": 442 }, { "epoch": 0.032595999080248336, "grad_norm": 1.1015625, "learning_rate": 4.988621342182128e-05, "loss": 1.2024, "step": 443 }, { "epoch": 0.032669579213612326, "grad_norm": 1.2734375, "learning_rate": 4.9885660811389347e-05, "loss": 1.3675, "step": 444 }, { "epoch": 0.03274315934697632, "grad_norm": 0.97265625, "learning_rate": 4.988510686539099e-05, "loss": 1.0933, "step": 445 }, { "epoch": 0.03281673948034031, "grad_norm": 0.96484375, "learning_rate": 4.9884551583855954e-05, "loss": 0.7227, "step": 446 }, { "epoch": 0.0328903196137043, "grad_norm": 0.8828125, "learning_rate": 4.9883994966814034e-05, "loss": 0.6531, "step": 447 }, { "epoch": 0.032963899747068294, "grad_norm": 0.91796875, "learning_rate": 4.98834370142951e-05, "loss": 0.8958, "step": 448 }, { "epoch": 0.033037479880432284, "grad_norm": 0.91796875, "learning_rate": 4.9882877726329106e-05, "loss": 0.6407, "step": 449 }, { "epoch": 0.033111060013796274, "grad_norm": 0.83203125, "learning_rate": 4.988231710294606e-05, "loss": 0.8751, "step": 450 }, { "epoch": 0.033184640147160265, "grad_norm": 0.890625, "learning_rate": 4.9881755144176044e-05, "loss": 0.7451, "step": 451 }, { "epoch": 0.03325822028052426, "grad_norm": 1.234375, "learning_rate": 4.988119185004923e-05, "loss": 0.927, "step": 452 }, { "epoch": 0.03333180041388825, "grad_norm": 1.171875, "learning_rate": 4.988062722059585e-05, "loss": 1.1373, "step": 453 }, { "epoch": 0.03340538054725224, "grad_norm": 0.98046875, "learning_rate": 4.988006125584619e-05, "loss": 1.0013, "step": 454 }, { "epoch": 0.03347896068061623, "grad_norm": 1.078125, "learning_rate": 4.987949395583064e-05, "loss": 0.8468, "step": 455 }, { "epoch": 0.03355254081398022, "grad_norm": 0.96875, "learning_rate": 4.987892532057964e-05, "loss": 0.9814, "step": 456 }, { "epoch": 0.03362612094734422, "grad_norm": 0.9765625, "learning_rate": 4.98783553501237e-05, "loss": 1.2678, "step": 457 }, { "epoch": 0.03369970108070821, "grad_norm": 0.90234375, "learning_rate": 4.987778404449342e-05, "loss": 0.8017, "step": 458 }, { "epoch": 0.0337732812140722, "grad_norm": 1.078125, "learning_rate": 4.987721140371946e-05, "loss": 1.1828, "step": 459 }, { "epoch": 0.03384686134743619, "grad_norm": 1.1015625, "learning_rate": 4.987663742783255e-05, "loss": 1.1213, "step": 460 }, { "epoch": 0.03392044148080019, "grad_norm": 0.87109375, "learning_rate": 4.98760621168635e-05, "loss": 0.8114, "step": 461 }, { "epoch": 0.03399402161416418, "grad_norm": 1.03125, "learning_rate": 4.987548547084317e-05, "loss": 0.9586, "step": 462 }, { "epoch": 0.03406760174752817, "grad_norm": 0.94921875, "learning_rate": 4.9874907489802526e-05, "loss": 1.0378, "step": 463 }, { "epoch": 0.03414118188089216, "grad_norm": 0.99609375, "learning_rate": 4.987432817377258e-05, "loss": 1.0166, "step": 464 }, { "epoch": 0.03421476201425615, "grad_norm": 1.03125, "learning_rate": 4.987374752278441e-05, "loss": 1.1897, "step": 465 }, { "epoch": 0.034288342147620145, "grad_norm": 1.0390625, "learning_rate": 4.987316553686921e-05, "loss": 0.9125, "step": 466 }, { "epoch": 0.034361922280984135, "grad_norm": 1.1171875, "learning_rate": 4.9872582216058174e-05, "loss": 0.8171, "step": 467 }, { "epoch": 0.034435502414348125, "grad_norm": 1.203125, "learning_rate": 4.987199756038263e-05, "loss": 1.4177, "step": 468 }, { "epoch": 0.034509082547712115, "grad_norm": 0.9609375, "learning_rate": 4.987141156987396e-05, "loss": 1.0405, "step": 469 }, { "epoch": 0.03458266268107611, "grad_norm": 1.109375, "learning_rate": 4.987082424456361e-05, "loss": 1.4172, "step": 470 }, { "epoch": 0.0346562428144401, "grad_norm": 0.84765625, "learning_rate": 4.9870235584483096e-05, "loss": 0.5733, "step": 471 }, { "epoch": 0.03472982294780409, "grad_norm": 1.0078125, "learning_rate": 4.9869645589664e-05, "loss": 1.2018, "step": 472 }, { "epoch": 0.03480340308116808, "grad_norm": 0.921875, "learning_rate": 4.9869054260138015e-05, "loss": 0.7661, "step": 473 }, { "epoch": 0.03487698321453207, "grad_norm": 1.03125, "learning_rate": 4.986846159593685e-05, "loss": 0.9603, "step": 474 }, { "epoch": 0.03495056334789607, "grad_norm": 0.9609375, "learning_rate": 4.986786759709232e-05, "loss": 1.0224, "step": 475 }, { "epoch": 0.03502414348126006, "grad_norm": 1.1484375, "learning_rate": 4.986727226363631e-05, "loss": 1.3507, "step": 476 }, { "epoch": 0.03509772361462405, "grad_norm": 1.265625, "learning_rate": 4.986667559560075e-05, "loss": 1.1848, "step": 477 }, { "epoch": 0.03517130374798804, "grad_norm": 0.921875, "learning_rate": 4.98660775930177e-05, "loss": 0.8361, "step": 478 }, { "epoch": 0.03524488388135204, "grad_norm": 1.21875, "learning_rate": 4.986547825591922e-05, "loss": 1.3683, "step": 479 }, { "epoch": 0.03531846401471603, "grad_norm": 0.7890625, "learning_rate": 4.986487758433748e-05, "loss": 0.673, "step": 480 }, { "epoch": 0.03539204414808002, "grad_norm": 0.9375, "learning_rate": 4.986427557830473e-05, "loss": 0.9089, "step": 481 }, { "epoch": 0.03546562428144401, "grad_norm": 0.8984375, "learning_rate": 4.9863672237853274e-05, "loss": 0.6803, "step": 482 }, { "epoch": 0.035539204414808005, "grad_norm": 0.98046875, "learning_rate": 4.986306756301548e-05, "loss": 0.9858, "step": 483 }, { "epoch": 0.035612784548171995, "grad_norm": 1.0390625, "learning_rate": 4.986246155382381e-05, "loss": 0.9134, "step": 484 }, { "epoch": 0.035686364681535986, "grad_norm": 0.9453125, "learning_rate": 4.98618542103108e-05, "loss": 1.0081, "step": 485 }, { "epoch": 0.035759944814899976, "grad_norm": 0.953125, "learning_rate": 4.986124553250903e-05, "loss": 1.3159, "step": 486 }, { "epoch": 0.035833524948263966, "grad_norm": 1.03125, "learning_rate": 4.986063552045116e-05, "loss": 1.2957, "step": 487 }, { "epoch": 0.03590710508162796, "grad_norm": 0.9609375, "learning_rate": 4.9860024174169936e-05, "loss": 0.8686, "step": 488 }, { "epoch": 0.03598068521499195, "grad_norm": 1.1640625, "learning_rate": 4.985941149369817e-05, "loss": 0.875, "step": 489 }, { "epoch": 0.03605426534835594, "grad_norm": 1.0390625, "learning_rate": 4.9858797479068746e-05, "loss": 0.9396, "step": 490 }, { "epoch": 0.036127845481719933, "grad_norm": 0.90234375, "learning_rate": 4.985818213031461e-05, "loss": 0.9777, "step": 491 }, { "epoch": 0.03620142561508393, "grad_norm": 0.9921875, "learning_rate": 4.985756544746879e-05, "loss": 1.4404, "step": 492 }, { "epoch": 0.03627500574844792, "grad_norm": 0.8046875, "learning_rate": 4.985694743056438e-05, "loss": 0.8215, "step": 493 }, { "epoch": 0.03634858588181191, "grad_norm": 1.1015625, "learning_rate": 4.985632807963456e-05, "loss": 1.0838, "step": 494 }, { "epoch": 0.0364221660151759, "grad_norm": 0.83203125, "learning_rate": 4.9855707394712546e-05, "loss": 0.9544, "step": 495 }, { "epoch": 0.03649574614853989, "grad_norm": 1.3828125, "learning_rate": 4.9855085375831665e-05, "loss": 1.1742, "step": 496 }, { "epoch": 0.03656932628190389, "grad_norm": 1.015625, "learning_rate": 4.98544620230253e-05, "loss": 1.3149, "step": 497 }, { "epoch": 0.03664290641526788, "grad_norm": 1.09375, "learning_rate": 4.98538373363269e-05, "loss": 0.9338, "step": 498 }, { "epoch": 0.03671648654863187, "grad_norm": 1.015625, "learning_rate": 4.985321131576999e-05, "loss": 1.1112, "step": 499 }, { "epoch": 0.03679006668199586, "grad_norm": 0.87890625, "learning_rate": 4.985258396138817e-05, "loss": 1.1511, "step": 500 }, { "epoch": 0.036863646815359856, "grad_norm": 0.96484375, "learning_rate": 4.985195527321511e-05, "loss": 0.9077, "step": 501 }, { "epoch": 0.036937226948723846, "grad_norm": 0.9453125, "learning_rate": 4.985132525128455e-05, "loss": 0.801, "step": 502 }, { "epoch": 0.037010807082087836, "grad_norm": 1.140625, "learning_rate": 4.9850693895630305e-05, "loss": 1.2966, "step": 503 }, { "epoch": 0.037084387215451826, "grad_norm": 0.78125, "learning_rate": 4.985006120628625e-05, "loss": 0.6195, "step": 504 }, { "epoch": 0.03715796734881582, "grad_norm": 0.8828125, "learning_rate": 4.984942718328635e-05, "loss": 1.0418, "step": 505 }, { "epoch": 0.037231547482179814, "grad_norm": 1.0234375, "learning_rate": 4.984879182666462e-05, "loss": 1.0322, "step": 506 }, { "epoch": 0.037305127615543804, "grad_norm": 0.7890625, "learning_rate": 4.984815513645516e-05, "loss": 1.0583, "step": 507 }, { "epoch": 0.037378707748907794, "grad_norm": 0.99609375, "learning_rate": 4.984751711269216e-05, "loss": 0.9566, "step": 508 }, { "epoch": 0.037452287882271784, "grad_norm": 1.3359375, "learning_rate": 4.9846877755409836e-05, "loss": 1.5125, "step": 509 }, { "epoch": 0.03752586801563578, "grad_norm": 1.0234375, "learning_rate": 4.984623706464252e-05, "loss": 0.9958, "step": 510 }, { "epoch": 0.03759944814899977, "grad_norm": 0.90625, "learning_rate": 4.984559504042459e-05, "loss": 0.9692, "step": 511 }, { "epoch": 0.03767302828236376, "grad_norm": 0.88671875, "learning_rate": 4.984495168279049e-05, "loss": 0.8629, "step": 512 }, { "epoch": 0.03774660841572775, "grad_norm": 0.828125, "learning_rate": 4.984430699177477e-05, "loss": 0.7285, "step": 513 }, { "epoch": 0.03782018854909174, "grad_norm": 0.875, "learning_rate": 4.984366096741201e-05, "loss": 0.6445, "step": 514 }, { "epoch": 0.03789376868245574, "grad_norm": 0.9296875, "learning_rate": 4.984301360973689e-05, "loss": 0.963, "step": 515 }, { "epoch": 0.03796734881581973, "grad_norm": 1.0625, "learning_rate": 4.984236491878415e-05, "loss": 0.9509, "step": 516 }, { "epoch": 0.03804092894918372, "grad_norm": 1.1328125, "learning_rate": 4.984171489458861e-05, "loss": 0.7679, "step": 517 }, { "epoch": 0.03811450908254771, "grad_norm": 0.890625, "learning_rate": 4.984106353718515e-05, "loss": 1.0823, "step": 518 }, { "epoch": 0.03818808921591171, "grad_norm": 0.8203125, "learning_rate": 4.984041084660872e-05, "loss": 0.7272, "step": 519 }, { "epoch": 0.0382616693492757, "grad_norm": 0.97265625, "learning_rate": 4.983975682289437e-05, "loss": 0.9714, "step": 520 }, { "epoch": 0.03833524948263969, "grad_norm": 1.0625, "learning_rate": 4.9839101466077173e-05, "loss": 1.0269, "step": 521 }, { "epoch": 0.03840882961600368, "grad_norm": 0.98828125, "learning_rate": 4.983844477619233e-05, "loss": 0.883, "step": 522 }, { "epoch": 0.03848240974936767, "grad_norm": 0.78125, "learning_rate": 4.983778675327506e-05, "loss": 0.7904, "step": 523 }, { "epoch": 0.038555989882731664, "grad_norm": 1.078125, "learning_rate": 4.9837127397360684e-05, "loss": 1.0432, "step": 524 }, { "epoch": 0.038629570016095655, "grad_norm": 0.93359375, "learning_rate": 4.98364667084846e-05, "loss": 1.0571, "step": 525 }, { "epoch": 0.038703150149459645, "grad_norm": 0.953125, "learning_rate": 4.983580468668225e-05, "loss": 0.7834, "step": 526 }, { "epoch": 0.038776730282823635, "grad_norm": 0.95703125, "learning_rate": 4.9835141331989175e-05, "loss": 0.9083, "step": 527 }, { "epoch": 0.03885031041618763, "grad_norm": 1.0546875, "learning_rate": 4.983447664444097e-05, "loss": 1.0536, "step": 528 }, { "epoch": 0.03892389054955162, "grad_norm": 1.03125, "learning_rate": 4.983381062407331e-05, "loss": 1.0827, "step": 529 }, { "epoch": 0.03899747068291561, "grad_norm": 0.8984375, "learning_rate": 4.9833143270921936e-05, "loss": 0.891, "step": 530 }, { "epoch": 0.0390710508162796, "grad_norm": 1.15625, "learning_rate": 4.983247458502267e-05, "loss": 0.8906, "step": 531 }, { "epoch": 0.0391446309496436, "grad_norm": 1.03125, "learning_rate": 4.983180456641139e-05, "loss": 0.7901, "step": 532 }, { "epoch": 0.03921821108300759, "grad_norm": 0.95703125, "learning_rate": 4.983113321512406e-05, "loss": 0.8367, "step": 533 }, { "epoch": 0.03929179121637158, "grad_norm": 1.1953125, "learning_rate": 4.983046053119671e-05, "loss": 1.4232, "step": 534 }, { "epoch": 0.03936537134973557, "grad_norm": 0.97265625, "learning_rate": 4.982978651466544e-05, "loss": 0.9988, "step": 535 }, { "epoch": 0.03943895148309956, "grad_norm": 1.109375, "learning_rate": 4.982911116556643e-05, "loss": 1.3246, "step": 536 }, { "epoch": 0.03951253161646356, "grad_norm": 0.87109375, "learning_rate": 4.9828434483935915e-05, "loss": 0.6563, "step": 537 }, { "epoch": 0.03958611174982755, "grad_norm": 1.2265625, "learning_rate": 4.9827756469810216e-05, "loss": 1.0454, "step": 538 }, { "epoch": 0.03965969188319154, "grad_norm": 0.92578125, "learning_rate": 4.982707712322572e-05, "loss": 0.6676, "step": 539 }, { "epoch": 0.03973327201655553, "grad_norm": 1.0546875, "learning_rate": 4.982639644421889e-05, "loss": 0.8891, "step": 540 }, { "epoch": 0.039806852149919525, "grad_norm": 1.1796875, "learning_rate": 4.982571443282625e-05, "loss": 0.9849, "step": 541 }, { "epoch": 0.039880432283283515, "grad_norm": 0.99609375, "learning_rate": 4.982503108908441e-05, "loss": 0.9774, "step": 542 }, { "epoch": 0.039954012416647505, "grad_norm": 0.9765625, "learning_rate": 4.982434641303003e-05, "loss": 0.8891, "step": 543 }, { "epoch": 0.040027592550011495, "grad_norm": 0.984375, "learning_rate": 4.982366040469988e-05, "loss": 0.7822, "step": 544 }, { "epoch": 0.040101172683375486, "grad_norm": 1.1171875, "learning_rate": 4.982297306413075e-05, "loss": 1.4514, "step": 545 }, { "epoch": 0.04017475281673948, "grad_norm": 0.8359375, "learning_rate": 4.982228439135954e-05, "loss": 0.6972, "step": 546 }, { "epoch": 0.04024833295010347, "grad_norm": 1.125, "learning_rate": 4.982159438642321e-05, "loss": 0.8759, "step": 547 }, { "epoch": 0.04032191308346746, "grad_norm": 0.80078125, "learning_rate": 4.982090304935879e-05, "loss": 0.6461, "step": 548 }, { "epoch": 0.04039549321683145, "grad_norm": 0.796875, "learning_rate": 4.982021038020338e-05, "loss": 0.6901, "step": 549 }, { "epoch": 0.04046907335019545, "grad_norm": 0.92578125, "learning_rate": 4.981951637899417e-05, "loss": 1.034, "step": 550 }, { "epoch": 0.04054265348355944, "grad_norm": 1.34375, "learning_rate": 4.9818821045768384e-05, "loss": 1.7883, "step": 551 }, { "epoch": 0.04061623361692343, "grad_norm": 0.83203125, "learning_rate": 4.981812438056335e-05, "loss": 0.6118, "step": 552 }, { "epoch": 0.04068981375028742, "grad_norm": 1.0390625, "learning_rate": 4.9817426383416456e-05, "loss": 1.4013, "step": 553 }, { "epoch": 0.04076339388365141, "grad_norm": 0.90625, "learning_rate": 4.981672705436516e-05, "loss": 0.8169, "step": 554 }, { "epoch": 0.04083697401701541, "grad_norm": 1.3046875, "learning_rate": 4.981602639344699e-05, "loss": 1.2825, "step": 555 }, { "epoch": 0.0409105541503794, "grad_norm": 1.0546875, "learning_rate": 4.9815324400699564e-05, "loss": 0.9921, "step": 556 }, { "epoch": 0.04098413428374339, "grad_norm": 1.0546875, "learning_rate": 4.981462107616054e-05, "loss": 0.7899, "step": 557 }, { "epoch": 0.04105771441710738, "grad_norm": 0.890625, "learning_rate": 4.981391641986768e-05, "loss": 0.8169, "step": 558 }, { "epoch": 0.041131294550471376, "grad_norm": 0.89453125, "learning_rate": 4.981321043185878e-05, "loss": 0.7177, "step": 559 }, { "epoch": 0.041204874683835366, "grad_norm": 0.7421875, "learning_rate": 4.9812503112171746e-05, "loss": 0.6038, "step": 560 }, { "epoch": 0.041278454817199356, "grad_norm": 0.8125, "learning_rate": 4.981179446084454e-05, "loss": 0.6604, "step": 561 }, { "epoch": 0.041352034950563346, "grad_norm": 0.80078125, "learning_rate": 4.9811084477915184e-05, "loss": 0.7858, "step": 562 }, { "epoch": 0.041425615083927336, "grad_norm": 0.78125, "learning_rate": 4.9810373163421786e-05, "loss": 0.6443, "step": 563 }, { "epoch": 0.04149919521729133, "grad_norm": 0.7265625, "learning_rate": 4.980966051740252e-05, "loss": 0.6786, "step": 564 }, { "epoch": 0.041572775350655324, "grad_norm": 1.078125, "learning_rate": 4.980894653989563e-05, "loss": 1.1585, "step": 565 }, { "epoch": 0.041646355484019314, "grad_norm": 1.0, "learning_rate": 4.980823123093945e-05, "loss": 0.679, "step": 566 }, { "epoch": 0.041719935617383304, "grad_norm": 1.1875, "learning_rate": 4.980751459057234e-05, "loss": 1.1809, "step": 567 }, { "epoch": 0.0417935157507473, "grad_norm": 0.85546875, "learning_rate": 4.9806796618832786e-05, "loss": 0.8619, "step": 568 }, { "epoch": 0.04186709588411129, "grad_norm": 1.1015625, "learning_rate": 4.98060773157593e-05, "loss": 0.9854, "step": 569 }, { "epoch": 0.04194067601747528, "grad_norm": 0.87109375, "learning_rate": 4.980535668139051e-05, "loss": 0.9795, "step": 570 }, { "epoch": 0.04201425615083927, "grad_norm": 1.15625, "learning_rate": 4.980463471576507e-05, "loss": 1.1124, "step": 571 }, { "epoch": 0.04208783628420327, "grad_norm": 1.015625, "learning_rate": 4.9803911418921736e-05, "loss": 0.9321, "step": 572 }, { "epoch": 0.04216141641756726, "grad_norm": 0.98046875, "learning_rate": 4.980318679089933e-05, "loss": 0.8999, "step": 573 }, { "epoch": 0.04223499655093125, "grad_norm": 0.921875, "learning_rate": 4.980246083173672e-05, "loss": 0.7831, "step": 574 }, { "epoch": 0.04230857668429524, "grad_norm": 0.890625, "learning_rate": 4.98017335414729e-05, "loss": 0.9732, "step": 575 }, { "epoch": 0.04238215681765923, "grad_norm": 0.921875, "learning_rate": 4.980100492014687e-05, "loss": 0.7804, "step": 576 }, { "epoch": 0.042455736951023226, "grad_norm": 1.03125, "learning_rate": 4.9800274967797755e-05, "loss": 1.0128, "step": 577 }, { "epoch": 0.042529317084387216, "grad_norm": 0.953125, "learning_rate": 4.979954368446472e-05, "loss": 1.0641, "step": 578 }, { "epoch": 0.04260289721775121, "grad_norm": 1.0546875, "learning_rate": 4.9798811070187025e-05, "loss": 1.2838, "step": 579 }, { "epoch": 0.0426764773511152, "grad_norm": 0.84765625, "learning_rate": 4.979807712500397e-05, "loss": 0.8511, "step": 580 }, { "epoch": 0.042750057484479194, "grad_norm": 0.8671875, "learning_rate": 4.979734184895495e-05, "loss": 0.8227, "step": 581 }, { "epoch": 0.042823637617843184, "grad_norm": 0.76953125, "learning_rate": 4.979660524207943e-05, "loss": 0.6334, "step": 582 }, { "epoch": 0.042897217751207174, "grad_norm": 0.90625, "learning_rate": 4.9795867304416945e-05, "loss": 1.0858, "step": 583 }, { "epoch": 0.042970797884571164, "grad_norm": 1.0, "learning_rate": 4.9795128036007096e-05, "loss": 0.9474, "step": 584 }, { "epoch": 0.043044378017935155, "grad_norm": 0.75, "learning_rate": 4.9794387436889555e-05, "loss": 0.5507, "step": 585 }, { "epoch": 0.04311795815129915, "grad_norm": 0.8046875, "learning_rate": 4.979364550710407e-05, "loss": 0.7823, "step": 586 }, { "epoch": 0.04319153828466314, "grad_norm": 0.734375, "learning_rate": 4.979290224669045e-05, "loss": 0.8691, "step": 587 }, { "epoch": 0.04326511841802713, "grad_norm": 0.796875, "learning_rate": 4.97921576556886e-05, "loss": 0.795, "step": 588 }, { "epoch": 0.04333869855139112, "grad_norm": 1.015625, "learning_rate": 4.979141173413848e-05, "loss": 0.9836, "step": 589 }, { "epoch": 0.04341227868475512, "grad_norm": 1.109375, "learning_rate": 4.9790664482080114e-05, "loss": 1.2645, "step": 590 }, { "epoch": 0.04348585881811911, "grad_norm": 0.84375, "learning_rate": 4.978991589955361e-05, "loss": 0.7881, "step": 591 }, { "epoch": 0.0435594389514831, "grad_norm": 1.046875, "learning_rate": 4.978916598659913e-05, "loss": 1.2378, "step": 592 }, { "epoch": 0.04363301908484709, "grad_norm": 0.890625, "learning_rate": 4.978841474325694e-05, "loss": 1.2396, "step": 593 }, { "epoch": 0.04370659921821108, "grad_norm": 1.140625, "learning_rate": 4.978766216956735e-05, "loss": 0.9337, "step": 594 }, { "epoch": 0.04378017935157508, "grad_norm": 0.86328125, "learning_rate": 4.9786908265570746e-05, "loss": 0.7447, "step": 595 }, { "epoch": 0.04385375948493907, "grad_norm": 0.8984375, "learning_rate": 4.978615303130759e-05, "loss": 0.862, "step": 596 }, { "epoch": 0.04392733961830306, "grad_norm": 0.98046875, "learning_rate": 4.9785396466818426e-05, "loss": 1.2706, "step": 597 }, { "epoch": 0.04400091975166705, "grad_norm": 1.046875, "learning_rate": 4.9784638572143835e-05, "loss": 1.3343, "step": 598 }, { "epoch": 0.044074499885031045, "grad_norm": 1.0546875, "learning_rate": 4.9783879347324503e-05, "loss": 1.4976, "step": 599 }, { "epoch": 0.044148080018395035, "grad_norm": 0.875, "learning_rate": 4.978311879240118e-05, "loss": 0.8082, "step": 600 }, { "epoch": 0.044221660151759025, "grad_norm": 0.9609375, "learning_rate": 4.9782356907414686e-05, "loss": 0.9675, "step": 601 }, { "epoch": 0.044295240285123015, "grad_norm": 0.93359375, "learning_rate": 4.9781593692405896e-05, "loss": 0.8128, "step": 602 }, { "epoch": 0.044368820418487005, "grad_norm": 1.125, "learning_rate": 4.978082914741577e-05, "loss": 1.0967, "step": 603 }, { "epoch": 0.044442400551851, "grad_norm": 1.0546875, "learning_rate": 4.978006327248537e-05, "loss": 0.8955, "step": 604 }, { "epoch": 0.04451598068521499, "grad_norm": 2.078125, "learning_rate": 4.977929606765576e-05, "loss": 1.1202, "step": 605 }, { "epoch": 0.04458956081857898, "grad_norm": 0.8984375, "learning_rate": 4.977852753296814e-05, "loss": 0.8542, "step": 606 }, { "epoch": 0.04466314095194297, "grad_norm": 0.82421875, "learning_rate": 4.9777757668463744e-05, "loss": 0.7782, "step": 607 }, { "epoch": 0.04473672108530697, "grad_norm": 1.1015625, "learning_rate": 4.9776986474183894e-05, "loss": 0.8192, "step": 608 }, { "epoch": 0.04481030121867096, "grad_norm": 1.203125, "learning_rate": 4.977621395016998e-05, "loss": 1.1688, "step": 609 }, { "epoch": 0.04488388135203495, "grad_norm": 1.0234375, "learning_rate": 4.9775440096463447e-05, "loss": 0.7741, "step": 610 }, { "epoch": 0.04495746148539894, "grad_norm": 1.1640625, "learning_rate": 4.977466491310585e-05, "loss": 1.3333, "step": 611 }, { "epoch": 0.04503104161876293, "grad_norm": 0.83984375, "learning_rate": 4.977388840013877e-05, "loss": 0.7828, "step": 612 }, { "epoch": 0.04510462175212693, "grad_norm": 0.82421875, "learning_rate": 4.97731105576039e-05, "loss": 0.9337, "step": 613 }, { "epoch": 0.04517820188549092, "grad_norm": 0.97265625, "learning_rate": 4.977233138554297e-05, "loss": 0.9152, "step": 614 }, { "epoch": 0.04525178201885491, "grad_norm": 0.9375, "learning_rate": 4.977155088399781e-05, "loss": 0.6864, "step": 615 }, { "epoch": 0.0453253621522189, "grad_norm": 1.046875, "learning_rate": 4.977076905301029e-05, "loss": 0.883, "step": 616 }, { "epoch": 0.045398942285582895, "grad_norm": 1.0, "learning_rate": 4.9769985892622393e-05, "loss": 0.9834, "step": 617 }, { "epoch": 0.045472522418946885, "grad_norm": 0.87890625, "learning_rate": 4.976920140287613e-05, "loss": 0.9779, "step": 618 }, { "epoch": 0.045546102552310876, "grad_norm": 0.9921875, "learning_rate": 4.976841558381361e-05, "loss": 0.8692, "step": 619 }, { "epoch": 0.045619682685674866, "grad_norm": 1.15625, "learning_rate": 4.976762843547701e-05, "loss": 1.6504, "step": 620 }, { "epoch": 0.04569326281903886, "grad_norm": 0.859375, "learning_rate": 4.976683995790856e-05, "loss": 0.8912, "step": 621 }, { "epoch": 0.04576684295240285, "grad_norm": 1.09375, "learning_rate": 4.97660501511506e-05, "loss": 1.1648, "step": 622 }, { "epoch": 0.04584042308576684, "grad_norm": 0.96484375, "learning_rate": 4.97652590152455e-05, "loss": 0.9922, "step": 623 }, { "epoch": 0.04591400321913083, "grad_norm": 0.8828125, "learning_rate": 4.976446655023572e-05, "loss": 0.9775, "step": 624 }, { "epoch": 0.045987583352494824, "grad_norm": 1.046875, "learning_rate": 4.9763672756163804e-05, "loss": 1.387, "step": 625 }, { "epoch": 0.04606116348585882, "grad_norm": 0.90234375, "learning_rate": 4.976287763307234e-05, "loss": 0.9029, "step": 626 }, { "epoch": 0.04613474361922281, "grad_norm": 0.76171875, "learning_rate": 4.976208118100399e-05, "loss": 0.7147, "step": 627 }, { "epoch": 0.0462083237525868, "grad_norm": 1.140625, "learning_rate": 4.976128340000153e-05, "loss": 1.1444, "step": 628 }, { "epoch": 0.04628190388595079, "grad_norm": 1.0390625, "learning_rate": 4.976048429010775e-05, "loss": 0.9068, "step": 629 }, { "epoch": 0.04635548401931479, "grad_norm": 0.98046875, "learning_rate": 4.9759683851365545e-05, "loss": 0.7561, "step": 630 }, { "epoch": 0.04642906415267878, "grad_norm": 0.859375, "learning_rate": 4.975888208381787e-05, "loss": 0.6867, "step": 631 }, { "epoch": 0.04650264428604277, "grad_norm": 1.375, "learning_rate": 4.975807898750776e-05, "loss": 1.1213, "step": 632 }, { "epoch": 0.04657622441940676, "grad_norm": 1.0390625, "learning_rate": 4.975727456247831e-05, "loss": 0.7334, "step": 633 }, { "epoch": 0.04664980455277075, "grad_norm": 1.0625, "learning_rate": 4.9756468808772696e-05, "loss": 0.6628, "step": 634 }, { "epoch": 0.046723384686134746, "grad_norm": 0.99609375, "learning_rate": 4.975566172643415e-05, "loss": 0.7996, "step": 635 }, { "epoch": 0.046796964819498736, "grad_norm": 0.7734375, "learning_rate": 4.975485331550601e-05, "loss": 0.6378, "step": 636 }, { "epoch": 0.046870544952862726, "grad_norm": 0.8515625, "learning_rate": 4.9754043576031636e-05, "loss": 0.9027, "step": 637 }, { "epoch": 0.046944125086226716, "grad_norm": 1.2421875, "learning_rate": 4.9753232508054506e-05, "loss": 1.0751, "step": 638 }, { "epoch": 0.047017705219590714, "grad_norm": 0.9140625, "learning_rate": 4.9752420111618136e-05, "loss": 0.7651, "step": 639 }, { "epoch": 0.047091285352954704, "grad_norm": 0.9296875, "learning_rate": 4.975160638676612e-05, "loss": 1.1114, "step": 640 }, { "epoch": 0.047164865486318694, "grad_norm": 1.0625, "learning_rate": 4.9750791333542154e-05, "loss": 1.0256, "step": 641 }, { "epoch": 0.047238445619682684, "grad_norm": 0.828125, "learning_rate": 4.974997495198996e-05, "loss": 0.6675, "step": 642 }, { "epoch": 0.047312025753046674, "grad_norm": 1.0390625, "learning_rate": 4.9749157242153354e-05, "loss": 1.1, "step": 643 }, { "epoch": 0.04738560588641067, "grad_norm": 1.3828125, "learning_rate": 4.974833820407622e-05, "loss": 1.2962, "step": 644 }, { "epoch": 0.04745918601977466, "grad_norm": 1.0859375, "learning_rate": 4.974751783780253e-05, "loss": 1.1137, "step": 645 }, { "epoch": 0.04753276615313865, "grad_norm": 0.93359375, "learning_rate": 4.974669614337628e-05, "loss": 0.8745, "step": 646 }, { "epoch": 0.04760634628650264, "grad_norm": 1.046875, "learning_rate": 4.9745873120841603e-05, "loss": 1.1793, "step": 647 }, { "epoch": 0.04767992641986664, "grad_norm": 1.109375, "learning_rate": 4.9745048770242645e-05, "loss": 1.2452, "step": 648 }, { "epoch": 0.04775350655323063, "grad_norm": 0.94921875, "learning_rate": 4.974422309162366e-05, "loss": 1.0531, "step": 649 }, { "epoch": 0.04782708668659462, "grad_norm": 0.8203125, "learning_rate": 4.974339608502896e-05, "loss": 0.6883, "step": 650 }, { "epoch": 0.04790066681995861, "grad_norm": 1.1484375, "learning_rate": 4.974256775050292e-05, "loss": 1.2653, "step": 651 }, { "epoch": 0.0479742469533226, "grad_norm": 1.125, "learning_rate": 4.9741738088090006e-05, "loss": 1.1366, "step": 652 }, { "epoch": 0.0480478270866866, "grad_norm": 1.078125, "learning_rate": 4.974090709783474e-05, "loss": 1.3409, "step": 653 }, { "epoch": 0.04812140722005059, "grad_norm": 0.96875, "learning_rate": 4.974007477978171e-05, "loss": 1.1575, "step": 654 }, { "epoch": 0.04819498735341458, "grad_norm": 1.234375, "learning_rate": 4.97392411339756e-05, "loss": 1.6028, "step": 655 }, { "epoch": 0.04826856748677857, "grad_norm": 0.96875, "learning_rate": 4.973840616046115e-05, "loss": 0.7437, "step": 656 }, { "epoch": 0.048342147620142564, "grad_norm": 1.0390625, "learning_rate": 4.973756985928316e-05, "loss": 0.8768, "step": 657 }, { "epoch": 0.048415727753506554, "grad_norm": 0.9375, "learning_rate": 4.973673223048652e-05, "loss": 0.885, "step": 658 }, { "epoch": 0.048489307886870545, "grad_norm": 1.234375, "learning_rate": 4.973589327411617e-05, "loss": 1.4515, "step": 659 }, { "epoch": 0.048562888020234535, "grad_norm": 1.1796875, "learning_rate": 4.9735052990217165e-05, "loss": 1.393, "step": 660 }, { "epoch": 0.048636468153598525, "grad_norm": 1.1640625, "learning_rate": 4.973421137883458e-05, "loss": 1.3093, "step": 661 }, { "epoch": 0.04871004828696252, "grad_norm": 0.80859375, "learning_rate": 4.9733368440013585e-05, "loss": 0.8184, "step": 662 }, { "epoch": 0.04878362842032651, "grad_norm": 0.8984375, "learning_rate": 4.973252417379941e-05, "loss": 0.8445, "step": 663 }, { "epoch": 0.0488572085536905, "grad_norm": 0.77734375, "learning_rate": 4.9731678580237386e-05, "loss": 0.6878, "step": 664 }, { "epoch": 0.04893078868705449, "grad_norm": 0.91015625, "learning_rate": 4.9730831659372886e-05, "loss": 0.7854, "step": 665 }, { "epoch": 0.04900436882041849, "grad_norm": 0.85546875, "learning_rate": 4.9729983411251355e-05, "loss": 0.8962, "step": 666 }, { "epoch": 0.04907794895378248, "grad_norm": 0.8984375, "learning_rate": 4.972913383591833e-05, "loss": 1.0992, "step": 667 }, { "epoch": 0.04915152908714647, "grad_norm": 0.796875, "learning_rate": 4.972828293341939e-05, "loss": 0.7466, "step": 668 }, { "epoch": 0.04922510922051046, "grad_norm": 1.2265625, "learning_rate": 4.9727430703800214e-05, "loss": 1.155, "step": 669 }, { "epoch": 0.04929868935387446, "grad_norm": 0.96875, "learning_rate": 4.972657714710653e-05, "loss": 0.9673, "step": 670 }, { "epoch": 0.04937226948723845, "grad_norm": 1.3046875, "learning_rate": 4.972572226338416e-05, "loss": 1.392, "step": 671 }, { "epoch": 0.04944584962060244, "grad_norm": 1.015625, "learning_rate": 4.9724866052678974e-05, "loss": 1.004, "step": 672 }, { "epoch": 0.04951942975396643, "grad_norm": 0.73828125, "learning_rate": 4.972400851503693e-05, "loss": 0.6966, "step": 673 }, { "epoch": 0.04959300988733042, "grad_norm": 0.98046875, "learning_rate": 4.972314965050404e-05, "loss": 1.0663, "step": 674 }, { "epoch": 0.049666590020694415, "grad_norm": 0.83203125, "learning_rate": 4.97222894591264e-05, "loss": 0.9235, "step": 675 }, { "epoch": 0.049740170154058405, "grad_norm": 0.92578125, "learning_rate": 4.972142794095019e-05, "loss": 0.9954, "step": 676 }, { "epoch": 0.049813750287422395, "grad_norm": 1.03125, "learning_rate": 4.972056509602163e-05, "loss": 1.1419, "step": 677 }, { "epoch": 0.049887330420786385, "grad_norm": 1.125, "learning_rate": 4.971970092438702e-05, "loss": 0.7516, "step": 678 }, { "epoch": 0.04996091055415038, "grad_norm": 0.9375, "learning_rate": 4.9718835426092766e-05, "loss": 1.015, "step": 679 }, { "epoch": 0.05003449068751437, "grad_norm": 0.76171875, "learning_rate": 4.97179686011853e-05, "loss": 0.5791, "step": 680 }, { "epoch": 0.05010807082087836, "grad_norm": 1.03125, "learning_rate": 4.971710044971114e-05, "loss": 1.0074, "step": 681 }, { "epoch": 0.05018165095424235, "grad_norm": 1.265625, "learning_rate": 4.971623097171688e-05, "loss": 0.9841, "step": 682 }, { "epoch": 0.05025523108760634, "grad_norm": 1.125, "learning_rate": 4.971536016724919e-05, "loss": 0.8017, "step": 683 }, { "epoch": 0.05032881122097034, "grad_norm": 1.0703125, "learning_rate": 4.9714488036354803e-05, "loss": 1.0498, "step": 684 }, { "epoch": 0.05040239135433433, "grad_norm": 1.046875, "learning_rate": 4.971361457908053e-05, "loss": 1.3636, "step": 685 }, { "epoch": 0.05047597148769832, "grad_norm": 1.15625, "learning_rate": 4.971273979547322e-05, "loss": 0.8792, "step": 686 }, { "epoch": 0.05054955162106231, "grad_norm": 1.0390625, "learning_rate": 4.9711863685579855e-05, "loss": 0.9422, "step": 687 }, { "epoch": 0.05062313175442631, "grad_norm": 1.015625, "learning_rate": 4.9710986249447436e-05, "loss": 0.8948, "step": 688 }, { "epoch": 0.0506967118877903, "grad_norm": 0.83203125, "learning_rate": 4.9710107487123054e-05, "loss": 0.8737, "step": 689 }, { "epoch": 0.05077029202115429, "grad_norm": 1.21875, "learning_rate": 4.970922739865388e-05, "loss": 1.5617, "step": 690 }, { "epoch": 0.05084387215451828, "grad_norm": 0.7734375, "learning_rate": 4.9708345984087137e-05, "loss": 0.7197, "step": 691 }, { "epoch": 0.05091745228788227, "grad_norm": 0.94140625, "learning_rate": 4.970746324347013e-05, "loss": 1.0125, "step": 692 }, { "epoch": 0.050991032421246266, "grad_norm": 0.84375, "learning_rate": 4.9706579176850246e-05, "loss": 0.742, "step": 693 }, { "epoch": 0.051064612554610256, "grad_norm": 0.73046875, "learning_rate": 4.970569378427491e-05, "loss": 0.6562, "step": 694 }, { "epoch": 0.051138192687974246, "grad_norm": 1.140625, "learning_rate": 4.9704807065791656e-05, "loss": 1.3955, "step": 695 }, { "epoch": 0.051211772821338236, "grad_norm": 0.984375, "learning_rate": 4.970391902144806e-05, "loss": 0.8316, "step": 696 }, { "epoch": 0.05128535295470223, "grad_norm": 0.828125, "learning_rate": 4.9703029651291806e-05, "loss": 0.9463, "step": 697 }, { "epoch": 0.05135893308806622, "grad_norm": 1.03125, "learning_rate": 4.97021389553706e-05, "loss": 1.069, "step": 698 }, { "epoch": 0.051432513221430214, "grad_norm": 0.9375, "learning_rate": 4.970124693373225e-05, "loss": 0.939, "step": 699 }, { "epoch": 0.051506093354794204, "grad_norm": 0.921875, "learning_rate": 4.9700353586424634e-05, "loss": 0.7969, "step": 700 }, { "epoch": 0.051579673488158194, "grad_norm": 1.25, "learning_rate": 4.969945891349569e-05, "loss": 1.1327, "step": 701 }, { "epoch": 0.05165325362152219, "grad_norm": 1.1015625, "learning_rate": 4.969856291499344e-05, "loss": 1.6125, "step": 702 }, { "epoch": 0.05172683375488618, "grad_norm": 0.94140625, "learning_rate": 4.9697665590965964e-05, "loss": 0.6791, "step": 703 }, { "epoch": 0.05180041388825017, "grad_norm": 0.84765625, "learning_rate": 4.969676694146143e-05, "loss": 0.8961, "step": 704 }, { "epoch": 0.05187399402161416, "grad_norm": 0.88671875, "learning_rate": 4.9695866966528046e-05, "loss": 0.89, "step": 705 }, { "epoch": 0.05194757415497816, "grad_norm": 1.1328125, "learning_rate": 4.9694965666214136e-05, "loss": 1.1385, "step": 706 }, { "epoch": 0.05202115428834215, "grad_norm": 0.89453125, "learning_rate": 4.969406304056806e-05, "loss": 0.8156, "step": 707 }, { "epoch": 0.05209473442170614, "grad_norm": 0.98046875, "learning_rate": 4.969315908963826e-05, "loss": 0.7964, "step": 708 }, { "epoch": 0.05216831455507013, "grad_norm": 1.0, "learning_rate": 4.969225381347325e-05, "loss": 1.0184, "step": 709 }, { "epoch": 0.052241894688434126, "grad_norm": 1.015625, "learning_rate": 4.969134721212161e-05, "loss": 0.986, "step": 710 }, { "epoch": 0.052315474821798116, "grad_norm": 1.1328125, "learning_rate": 4.9690439285632006e-05, "loss": 1.3072, "step": 711 }, { "epoch": 0.052389054955162107, "grad_norm": 0.98046875, "learning_rate": 4.9689530034053165e-05, "loss": 1.2645, "step": 712 }, { "epoch": 0.0524626350885261, "grad_norm": 1.203125, "learning_rate": 4.968861945743387e-05, "loss": 1.2966, "step": 713 }, { "epoch": 0.05253621522189009, "grad_norm": 0.88671875, "learning_rate": 4.968770755582301e-05, "loss": 0.8053, "step": 714 }, { "epoch": 0.052609795355254084, "grad_norm": 0.96875, "learning_rate": 4.96867943292695e-05, "loss": 0.9755, "step": 715 }, { "epoch": 0.052683375488618074, "grad_norm": 0.9609375, "learning_rate": 4.968587977782237e-05, "loss": 0.8545, "step": 716 }, { "epoch": 0.052756955621982064, "grad_norm": 0.93359375, "learning_rate": 4.96849639015307e-05, "loss": 0.998, "step": 717 }, { "epoch": 0.052830535755346054, "grad_norm": 0.85546875, "learning_rate": 4.9684046700443636e-05, "loss": 1.0482, "step": 718 }, { "epoch": 0.05290411588871005, "grad_norm": 1.0546875, "learning_rate": 4.968312817461041e-05, "loss": 1.1994, "step": 719 }, { "epoch": 0.05297769602207404, "grad_norm": 0.93359375, "learning_rate": 4.968220832408031e-05, "loss": 0.9737, "step": 720 }, { "epoch": 0.05305127615543803, "grad_norm": 1.15625, "learning_rate": 4.968128714890272e-05, "loss": 1.1198, "step": 721 }, { "epoch": 0.05312485628880202, "grad_norm": 0.9296875, "learning_rate": 4.9680364649127054e-05, "loss": 0.7691, "step": 722 }, { "epoch": 0.05319843642216601, "grad_norm": 1.1015625, "learning_rate": 4.967944082480284e-05, "loss": 1.0819, "step": 723 }, { "epoch": 0.05327201655553001, "grad_norm": 1.421875, "learning_rate": 4.967851567597964e-05, "loss": 1.0186, "step": 724 }, { "epoch": 0.053345596688894, "grad_norm": 0.99609375, "learning_rate": 4.967758920270712e-05, "loss": 1.0298, "step": 725 }, { "epoch": 0.05341917682225799, "grad_norm": 1.03125, "learning_rate": 4.9676661405035e-05, "loss": 0.9255, "step": 726 }, { "epoch": 0.05349275695562198, "grad_norm": 1.0234375, "learning_rate": 4.9675732283013064e-05, "loss": 1.2375, "step": 727 }, { "epoch": 0.05356633708898598, "grad_norm": 0.95703125, "learning_rate": 4.967480183669118e-05, "loss": 1.0639, "step": 728 }, { "epoch": 0.05363991722234997, "grad_norm": 1.1484375, "learning_rate": 4.9673870066119294e-05, "loss": 1.2414, "step": 729 }, { "epoch": 0.05371349735571396, "grad_norm": 1.0625, "learning_rate": 4.9672936971347394e-05, "loss": 1.0286, "step": 730 }, { "epoch": 0.05378707748907795, "grad_norm": 0.95703125, "learning_rate": 4.967200255242558e-05, "loss": 1.2123, "step": 731 }, { "epoch": 0.05386065762244194, "grad_norm": 1.03125, "learning_rate": 4.9671066809403976e-05, "loss": 0.9672, "step": 732 }, { "epoch": 0.053934237755805935, "grad_norm": 0.9921875, "learning_rate": 4.967012974233282e-05, "loss": 0.9823, "step": 733 }, { "epoch": 0.054007817889169925, "grad_norm": 1.015625, "learning_rate": 4.966919135126239e-05, "loss": 0.7568, "step": 734 }, { "epoch": 0.054081398022533915, "grad_norm": 1.03125, "learning_rate": 4.9668251636243065e-05, "loss": 1.4507, "step": 735 }, { "epoch": 0.054154978155897905, "grad_norm": 1.0390625, "learning_rate": 4.9667310597325255e-05, "loss": 1.4083, "step": 736 }, { "epoch": 0.0542285582892619, "grad_norm": 0.921875, "learning_rate": 4.966636823455948e-05, "loss": 0.9924, "step": 737 }, { "epoch": 0.05430213842262589, "grad_norm": 0.96484375, "learning_rate": 4.9665424547996306e-05, "loss": 0.9729, "step": 738 }, { "epoch": 0.05437571855598988, "grad_norm": 0.9140625, "learning_rate": 4.966447953768639e-05, "loss": 0.9148, "step": 739 }, { "epoch": 0.05444929868935387, "grad_norm": 1.1484375, "learning_rate": 4.966353320368044e-05, "loss": 1.2248, "step": 740 }, { "epoch": 0.05452287882271786, "grad_norm": 1.4375, "learning_rate": 4.9662585546029246e-05, "loss": 1.2476, "step": 741 }, { "epoch": 0.05459645895608186, "grad_norm": 0.77734375, "learning_rate": 4.9661636564783664e-05, "loss": 0.703, "step": 742 }, { "epoch": 0.05467003908944585, "grad_norm": 0.9140625, "learning_rate": 4.966068625999463e-05, "loss": 0.9597, "step": 743 }, { "epoch": 0.05474361922280984, "grad_norm": 1.0703125, "learning_rate": 4.965973463171314e-05, "loss": 0.8979, "step": 744 }, { "epoch": 0.05481719935617383, "grad_norm": 0.875, "learning_rate": 4.9658781679990265e-05, "loss": 0.8228, "step": 745 }, { "epoch": 0.05489077948953783, "grad_norm": 0.81640625, "learning_rate": 4.965782740487715e-05, "loss": 0.7687, "step": 746 }, { "epoch": 0.05496435962290182, "grad_norm": 1.0234375, "learning_rate": 4.965687180642501e-05, "loss": 0.897, "step": 747 }, { "epoch": 0.05503793975626581, "grad_norm": 1.1953125, "learning_rate": 4.965591488468514e-05, "loss": 0.9762, "step": 748 }, { "epoch": 0.0551115198896298, "grad_norm": 1.0234375, "learning_rate": 4.9654956639708885e-05, "loss": 1.0854, "step": 749 }, { "epoch": 0.05518510002299379, "grad_norm": 1.078125, "learning_rate": 4.965399707154766e-05, "loss": 0.8447, "step": 750 }, { "epoch": 0.055258680156357785, "grad_norm": 0.87890625, "learning_rate": 4.965303618025299e-05, "loss": 1.0321, "step": 751 }, { "epoch": 0.055332260289721776, "grad_norm": 0.94921875, "learning_rate": 4.9652073965876425e-05, "loss": 0.9771, "step": 752 }, { "epoch": 0.055405840423085766, "grad_norm": 0.8828125, "learning_rate": 4.9651110428469614e-05, "loss": 0.6425, "step": 753 }, { "epoch": 0.055479420556449756, "grad_norm": 0.921875, "learning_rate": 4.9650145568084266e-05, "loss": 0.84, "step": 754 }, { "epoch": 0.05555300068981375, "grad_norm": 1.0, "learning_rate": 4.964917938477216e-05, "loss": 1.1714, "step": 755 }, { "epoch": 0.05562658082317774, "grad_norm": 0.91015625, "learning_rate": 4.964821187858516e-05, "loss": 0.7757, "step": 756 }, { "epoch": 0.05570016095654173, "grad_norm": 1.015625, "learning_rate": 4.964724304957518e-05, "loss": 0.9814, "step": 757 }, { "epoch": 0.05577374108990572, "grad_norm": 0.9765625, "learning_rate": 4.964627289779421e-05, "loss": 0.7757, "step": 758 }, { "epoch": 0.05584732122326972, "grad_norm": 0.71875, "learning_rate": 4.964530142329433e-05, "loss": 0.6509, "step": 759 }, { "epoch": 0.05592090135663371, "grad_norm": 0.92578125, "learning_rate": 4.964432862612767e-05, "loss": 0.8458, "step": 760 }, { "epoch": 0.0559944814899977, "grad_norm": 1.171875, "learning_rate": 4.964335450634644e-05, "loss": 1.7253, "step": 761 }, { "epoch": 0.05606806162336169, "grad_norm": 1.3203125, "learning_rate": 4.964237906400291e-05, "loss": 1.1215, "step": 762 }, { "epoch": 0.05614164175672568, "grad_norm": 0.84765625, "learning_rate": 4.964140229914944e-05, "loss": 0.8251, "step": 763 }, { "epoch": 0.05621522189008968, "grad_norm": 1.0703125, "learning_rate": 4.964042421183846e-05, "loss": 1.0398, "step": 764 }, { "epoch": 0.05628880202345367, "grad_norm": 1.046875, "learning_rate": 4.9639444802122445e-05, "loss": 0.9634, "step": 765 }, { "epoch": 0.05636238215681766, "grad_norm": 0.87109375, "learning_rate": 4.963846407005397e-05, "loss": 1.187, "step": 766 }, { "epoch": 0.05643596229018165, "grad_norm": 0.8359375, "learning_rate": 4.963748201568565e-05, "loss": 0.861, "step": 767 }, { "epoch": 0.056509542423545646, "grad_norm": 0.93359375, "learning_rate": 4.963649863907022e-05, "loss": 0.7786, "step": 768 }, { "epoch": 0.056583122556909636, "grad_norm": 1.1015625, "learning_rate": 4.9635513940260434e-05, "loss": 1.0423, "step": 769 }, { "epoch": 0.056656702690273626, "grad_norm": 0.97265625, "learning_rate": 4.9634527919309145e-05, "loss": 1.2678, "step": 770 }, { "epoch": 0.056730282823637616, "grad_norm": 0.93359375, "learning_rate": 4.9633540576269265e-05, "loss": 0.9998, "step": 771 }, { "epoch": 0.05680386295700161, "grad_norm": 0.69140625, "learning_rate": 4.96325519111938e-05, "loss": 0.6784, "step": 772 }, { "epoch": 0.056877443090365604, "grad_norm": 0.98046875, "learning_rate": 4.963156192413579e-05, "loss": 0.9524, "step": 773 }, { "epoch": 0.056951023223729594, "grad_norm": 0.90234375, "learning_rate": 4.963057061514838e-05, "loss": 1.2812, "step": 774 }, { "epoch": 0.057024603357093584, "grad_norm": 0.98828125, "learning_rate": 4.962957798428475e-05, "loss": 0.8375, "step": 775 }, { "epoch": 0.057098183490457574, "grad_norm": 0.74609375, "learning_rate": 4.96285840315982e-05, "loss": 0.6929, "step": 776 }, { "epoch": 0.05717176362382157, "grad_norm": 1.15625, "learning_rate": 4.962758875714206e-05, "loss": 0.6131, "step": 777 }, { "epoch": 0.05724534375718556, "grad_norm": 1.078125, "learning_rate": 4.962659216096976e-05, "loss": 1.079, "step": 778 }, { "epoch": 0.05731892389054955, "grad_norm": 0.78515625, "learning_rate": 4.9625594243134755e-05, "loss": 0.8023, "step": 779 }, { "epoch": 0.05739250402391354, "grad_norm": 0.76953125, "learning_rate": 4.962459500369062e-05, "loss": 0.6319, "step": 780 }, { "epoch": 0.05746608415727753, "grad_norm": 1.1796875, "learning_rate": 4.962359444269098e-05, "loss": 0.9587, "step": 781 }, { "epoch": 0.05753966429064153, "grad_norm": 1.1015625, "learning_rate": 4.962259256018954e-05, "loss": 1.3477, "step": 782 }, { "epoch": 0.05761324442400552, "grad_norm": 1.03125, "learning_rate": 4.962158935624006e-05, "loss": 1.0746, "step": 783 }, { "epoch": 0.05768682455736951, "grad_norm": 0.9765625, "learning_rate": 4.962058483089638e-05, "loss": 0.9957, "step": 784 }, { "epoch": 0.0577604046907335, "grad_norm": 1.40625, "learning_rate": 4.9619578984212415e-05, "loss": 1.3374, "step": 785 }, { "epoch": 0.0578339848240975, "grad_norm": 1.1640625, "learning_rate": 4.961857181624214e-05, "loss": 1.3233, "step": 786 }, { "epoch": 0.05790756495746149, "grad_norm": 1.2421875, "learning_rate": 4.961756332703962e-05, "loss": 1.5237, "step": 787 }, { "epoch": 0.05798114509082548, "grad_norm": 0.84375, "learning_rate": 4.9616553516658974e-05, "loss": 1.0261, "step": 788 }, { "epoch": 0.05805472522418947, "grad_norm": 0.8359375, "learning_rate": 4.961554238515439e-05, "loss": 0.7153, "step": 789 }, { "epoch": 0.05812830535755346, "grad_norm": 1.046875, "learning_rate": 4.961452993258015e-05, "loss": 1.025, "step": 790 }, { "epoch": 0.058201885490917454, "grad_norm": 1.109375, "learning_rate": 4.961351615899057e-05, "loss": 1.0908, "step": 791 }, { "epoch": 0.058275465624281444, "grad_norm": 1.046875, "learning_rate": 4.961250106444007e-05, "loss": 1.0561, "step": 792 }, { "epoch": 0.058349045757645435, "grad_norm": 1.109375, "learning_rate": 4.9611484648983106e-05, "loss": 1.1467, "step": 793 }, { "epoch": 0.058422625891009425, "grad_norm": 1.109375, "learning_rate": 4.961046691267427e-05, "loss": 1.1308, "step": 794 }, { "epoch": 0.05849620602437342, "grad_norm": 0.94921875, "learning_rate": 4.960944785556814e-05, "loss": 1.2358, "step": 795 }, { "epoch": 0.05856978615773741, "grad_norm": 1.1328125, "learning_rate": 4.960842747771943e-05, "loss": 0.8898, "step": 796 }, { "epoch": 0.0586433662911014, "grad_norm": 0.83984375, "learning_rate": 4.96074057791829e-05, "loss": 1.0806, "step": 797 }, { "epoch": 0.05871694642446539, "grad_norm": 1.4375, "learning_rate": 4.9606382760013374e-05, "loss": 0.9684, "step": 798 }, { "epoch": 0.05879052655782939, "grad_norm": 1.2109375, "learning_rate": 4.960535842026577e-05, "loss": 1.5033, "step": 799 }, { "epoch": 0.05886410669119338, "grad_norm": 0.87109375, "learning_rate": 4.960433275999504e-05, "loss": 0.816, "step": 800 }, { "epoch": 0.05893768682455737, "grad_norm": 1.0390625, "learning_rate": 4.960330577925625e-05, "loss": 1.0746, "step": 801 }, { "epoch": 0.05901126695792136, "grad_norm": 2.765625, "learning_rate": 4.9602277478104496e-05, "loss": 1.2213, "step": 802 }, { "epoch": 0.05908484709128535, "grad_norm": 1.0, "learning_rate": 4.960124785659499e-05, "loss": 1.298, "step": 803 }, { "epoch": 0.05915842722464935, "grad_norm": 0.828125, "learning_rate": 4.960021691478297e-05, "loss": 0.7752, "step": 804 }, { "epoch": 0.05923200735801334, "grad_norm": 0.89453125, "learning_rate": 4.959918465272377e-05, "loss": 0.9438, "step": 805 }, { "epoch": 0.05930558749137733, "grad_norm": 0.9140625, "learning_rate": 4.95981510704728e-05, "loss": 0.7657, "step": 806 }, { "epoch": 0.05937916762474132, "grad_norm": 1.0078125, "learning_rate": 4.959711616808551e-05, "loss": 1.2213, "step": 807 }, { "epoch": 0.059452747758105315, "grad_norm": 1.1796875, "learning_rate": 4.959607994561746e-05, "loss": 1.065, "step": 808 }, { "epoch": 0.059526327891469305, "grad_norm": 0.84765625, "learning_rate": 4.9595042403124254e-05, "loss": 0.8997, "step": 809 }, { "epoch": 0.059599908024833295, "grad_norm": 0.85546875, "learning_rate": 4.959400354066157e-05, "loss": 0.8904, "step": 810 }, { "epoch": 0.059673488158197285, "grad_norm": 0.83984375, "learning_rate": 4.959296335828517e-05, "loss": 0.7214, "step": 811 }, { "epoch": 0.059747068291561276, "grad_norm": 0.94140625, "learning_rate": 4.959192185605088e-05, "loss": 0.8603, "step": 812 }, { "epoch": 0.05982064842492527, "grad_norm": 0.94921875, "learning_rate": 4.959087903401459e-05, "loss": 1.0214, "step": 813 }, { "epoch": 0.05989422855828926, "grad_norm": 1.0390625, "learning_rate": 4.958983489223227e-05, "loss": 1.1437, "step": 814 }, { "epoch": 0.05996780869165325, "grad_norm": 1.0546875, "learning_rate": 4.958878943075995e-05, "loss": 1.3009, "step": 815 }, { "epoch": 0.06004138882501724, "grad_norm": 0.9609375, "learning_rate": 4.9587742649653746e-05, "loss": 1.1063, "step": 816 }, { "epoch": 0.06011496895838124, "grad_norm": 1.8125, "learning_rate": 4.958669454896983e-05, "loss": 0.7659, "step": 817 }, { "epoch": 0.06018854909174523, "grad_norm": 0.92578125, "learning_rate": 4.9585645128764456e-05, "loss": 1.0277, "step": 818 }, { "epoch": 0.06026212922510922, "grad_norm": 0.7890625, "learning_rate": 4.958459438909395e-05, "loss": 0.6037, "step": 819 }, { "epoch": 0.06033570935847321, "grad_norm": 8.4375, "learning_rate": 4.9583542330014694e-05, "loss": 0.9885, "step": 820 }, { "epoch": 0.0604092894918372, "grad_norm": 1.09375, "learning_rate": 4.958248895158315e-05, "loss": 1.1207, "step": 821 }, { "epoch": 0.0604828696252012, "grad_norm": 0.97265625, "learning_rate": 4.9581434253855855e-05, "loss": 0.9179, "step": 822 }, { "epoch": 0.06055644975856519, "grad_norm": 0.90234375, "learning_rate": 4.958037823688941e-05, "loss": 1.0005, "step": 823 }, { "epoch": 0.06063002989192918, "grad_norm": 1.1171875, "learning_rate": 4.95793209007405e-05, "loss": 1.3325, "step": 824 }, { "epoch": 0.06070361002529317, "grad_norm": 0.92578125, "learning_rate": 4.957826224546585e-05, "loss": 1.1585, "step": 825 }, { "epoch": 0.060777190158657166, "grad_norm": 0.7734375, "learning_rate": 4.9577202271122286e-05, "loss": 0.8979, "step": 826 }, { "epoch": 0.060850770292021156, "grad_norm": 0.98828125, "learning_rate": 4.95761409777667e-05, "loss": 0.8202, "step": 827 }, { "epoch": 0.060924350425385146, "grad_norm": 0.98828125, "learning_rate": 4.9575078365456043e-05, "loss": 1.1022, "step": 828 }, { "epoch": 0.060997930558749136, "grad_norm": 0.90234375, "learning_rate": 4.957401443424735e-05, "loss": 0.763, "step": 829 }, { "epoch": 0.061071510692113126, "grad_norm": 1.21875, "learning_rate": 4.957294918419772e-05, "loss": 1.3115, "step": 830 }, { "epoch": 0.06114509082547712, "grad_norm": 0.90625, "learning_rate": 4.957188261536431e-05, "loss": 0.6945, "step": 831 }, { "epoch": 0.061218670958841113, "grad_norm": 0.85546875, "learning_rate": 4.9570814727804374e-05, "loss": 1.0005, "step": 832 }, { "epoch": 0.061292251092205104, "grad_norm": 0.8515625, "learning_rate": 4.956974552157522e-05, "loss": 0.7747, "step": 833 }, { "epoch": 0.061365831225569094, "grad_norm": 1.03125, "learning_rate": 4.9568674996734224e-05, "loss": 0.9603, "step": 834 }, { "epoch": 0.06143941135893309, "grad_norm": 1.109375, "learning_rate": 4.9567603153338847e-05, "loss": 1.3527, "step": 835 }, { "epoch": 0.06151299149229708, "grad_norm": 0.80859375, "learning_rate": 4.956652999144661e-05, "loss": 0.7923, "step": 836 }, { "epoch": 0.06158657162566107, "grad_norm": 1.0, "learning_rate": 4.956545551111511e-05, "loss": 1.0774, "step": 837 }, { "epoch": 0.06166015175902506, "grad_norm": 0.8671875, "learning_rate": 4.956437971240201e-05, "loss": 0.9355, "step": 838 }, { "epoch": 0.06173373189238905, "grad_norm": 0.87109375, "learning_rate": 4.9563302595365045e-05, "loss": 0.8825, "step": 839 }, { "epoch": 0.06180731202575305, "grad_norm": 0.7734375, "learning_rate": 4.9562224160062024e-05, "loss": 0.7766, "step": 840 }, { "epoch": 0.06188089215911704, "grad_norm": 0.8828125, "learning_rate": 4.956114440655083e-05, "loss": 0.9358, "step": 841 }, { "epoch": 0.06195447229248103, "grad_norm": 0.84765625, "learning_rate": 4.9560063334889396e-05, "loss": 0.8375, "step": 842 }, { "epoch": 0.06202805242584502, "grad_norm": 1.0625, "learning_rate": 4.955898094513576e-05, "loss": 1.0116, "step": 843 }, { "epoch": 0.062101632559209016, "grad_norm": 0.93359375, "learning_rate": 4.955789723734799e-05, "loss": 0.9917, "step": 844 }, { "epoch": 0.062175212692573006, "grad_norm": 1.0234375, "learning_rate": 4.955681221158426e-05, "loss": 1.3232, "step": 845 }, { "epoch": 0.062248792825937, "grad_norm": 0.9609375, "learning_rate": 4.955572586790281e-05, "loss": 0.8649, "step": 846 }, { "epoch": 0.06232237295930099, "grad_norm": 0.82421875, "learning_rate": 4.9554638206361924e-05, "loss": 0.8878, "step": 847 }, { "epoch": 0.062395953092664984, "grad_norm": 1.0546875, "learning_rate": 4.955354922701998e-05, "loss": 1.0983, "step": 848 }, { "epoch": 0.062469533226028974, "grad_norm": 0.9453125, "learning_rate": 4.955245892993543e-05, "loss": 1.2533, "step": 849 }, { "epoch": 0.06254311335939296, "grad_norm": 0.98046875, "learning_rate": 4.955136731516679e-05, "loss": 0.9845, "step": 850 }, { "epoch": 0.06261669349275696, "grad_norm": 1.28125, "learning_rate": 4.955027438277262e-05, "loss": 1.934, "step": 851 }, { "epoch": 0.06269027362612095, "grad_norm": 0.91015625, "learning_rate": 4.9549180132811603e-05, "loss": 0.8889, "step": 852 }, { "epoch": 0.06276385375948494, "grad_norm": 0.8828125, "learning_rate": 4.954808456534245e-05, "loss": 0.9598, "step": 853 }, { "epoch": 0.06283743389284893, "grad_norm": 1.0078125, "learning_rate": 4.954698768042396e-05, "loss": 1.1668, "step": 854 }, { "epoch": 0.06291101402621292, "grad_norm": 0.84765625, "learning_rate": 4.954588947811502e-05, "loss": 0.716, "step": 855 }, { "epoch": 0.06298459415957691, "grad_norm": 0.98046875, "learning_rate": 4.9544789958474535e-05, "loss": 0.8584, "step": 856 }, { "epoch": 0.0630581742929409, "grad_norm": 0.78515625, "learning_rate": 4.954368912156154e-05, "loss": 0.6127, "step": 857 }, { "epoch": 0.06313175442630489, "grad_norm": 1.046875, "learning_rate": 4.954258696743511e-05, "loss": 1.0402, "step": 858 }, { "epoch": 0.06320533455966888, "grad_norm": 0.90625, "learning_rate": 4.954148349615439e-05, "loss": 0.9507, "step": 859 }, { "epoch": 0.06327891469303289, "grad_norm": 1.1328125, "learning_rate": 4.954037870777859e-05, "loss": 1.3577, "step": 860 }, { "epoch": 0.06335249482639688, "grad_norm": 1.2578125, "learning_rate": 4.9539272602367025e-05, "loss": 1.1217, "step": 861 }, { "epoch": 0.06342607495976087, "grad_norm": 0.9765625, "learning_rate": 4.953816517997905e-05, "loss": 0.8124, "step": 862 }, { "epoch": 0.06349965509312486, "grad_norm": 0.890625, "learning_rate": 4.953705644067409e-05, "loss": 0.9313, "step": 863 }, { "epoch": 0.06357323522648885, "grad_norm": 1.25, "learning_rate": 4.953594638451166e-05, "loss": 0.9996, "step": 864 }, { "epoch": 0.06364681535985284, "grad_norm": 3.390625, "learning_rate": 4.953483501155133e-05, "loss": 0.6392, "step": 865 }, { "epoch": 0.06372039549321683, "grad_norm": 1.203125, "learning_rate": 4.9533722321852737e-05, "loss": 1.9765, "step": 866 }, { "epoch": 0.06379397562658082, "grad_norm": 1.1875, "learning_rate": 4.953260831547561e-05, "loss": 1.0975, "step": 867 }, { "epoch": 0.06386755575994482, "grad_norm": 1.0078125, "learning_rate": 4.953149299247973e-05, "loss": 0.8411, "step": 868 }, { "epoch": 0.06394113589330881, "grad_norm": 0.84765625, "learning_rate": 4.953037635292495e-05, "loss": 0.6156, "step": 869 }, { "epoch": 0.0640147160266728, "grad_norm": 0.83984375, "learning_rate": 4.952925839687121e-05, "loss": 0.6623, "step": 870 }, { "epoch": 0.06408829616003679, "grad_norm": 1.109375, "learning_rate": 4.952813912437849e-05, "loss": 1.2113, "step": 871 }, { "epoch": 0.06416187629340078, "grad_norm": 1.3046875, "learning_rate": 4.9527018535506885e-05, "loss": 0.7784, "step": 872 }, { "epoch": 0.06423545642676477, "grad_norm": 1.0546875, "learning_rate": 4.952589663031651e-05, "loss": 0.9496, "step": 873 }, { "epoch": 0.06430903656012876, "grad_norm": 0.83984375, "learning_rate": 4.9524773408867584e-05, "loss": 0.7851, "step": 874 }, { "epoch": 0.06438261669349275, "grad_norm": 0.78125, "learning_rate": 4.95236488712204e-05, "loss": 0.578, "step": 875 }, { "epoch": 0.06445619682685674, "grad_norm": 0.89453125, "learning_rate": 4.9522523017435294e-05, "loss": 1.0817, "step": 876 }, { "epoch": 0.06452977696022075, "grad_norm": 1.140625, "learning_rate": 4.952139584757269e-05, "loss": 1.3044, "step": 877 }, { "epoch": 0.06460335709358474, "grad_norm": 1.046875, "learning_rate": 4.9520267361693095e-05, "loss": 1.0066, "step": 878 }, { "epoch": 0.06467693722694873, "grad_norm": 1.109375, "learning_rate": 4.951913755985705e-05, "loss": 1.0606, "step": 879 }, { "epoch": 0.06475051736031272, "grad_norm": 1.125, "learning_rate": 4.951800644212522e-05, "loss": 1.3807, "step": 880 }, { "epoch": 0.06482409749367671, "grad_norm": 0.88671875, "learning_rate": 4.951687400855828e-05, "loss": 0.8976, "step": 881 }, { "epoch": 0.0648976776270407, "grad_norm": 0.953125, "learning_rate": 4.951574025921701e-05, "loss": 1.037, "step": 882 }, { "epoch": 0.06497125776040469, "grad_norm": 0.83203125, "learning_rate": 4.9514605194162275e-05, "loss": 0.7599, "step": 883 }, { "epoch": 0.06504483789376868, "grad_norm": 0.78125, "learning_rate": 4.951346881345498e-05, "loss": 0.6583, "step": 884 }, { "epoch": 0.06511841802713267, "grad_norm": 0.9453125, "learning_rate": 4.951233111715611e-05, "loss": 0.9101, "step": 885 }, { "epoch": 0.06519199816049667, "grad_norm": 0.85546875, "learning_rate": 4.951119210532673e-05, "loss": 0.8401, "step": 886 }, { "epoch": 0.06526557829386066, "grad_norm": 0.80078125, "learning_rate": 4.951005177802796e-05, "loss": 0.8865, "step": 887 }, { "epoch": 0.06533915842722465, "grad_norm": 1.109375, "learning_rate": 4.950891013532101e-05, "loss": 0.9345, "step": 888 }, { "epoch": 0.06541273856058864, "grad_norm": 1.09375, "learning_rate": 4.950776717726715e-05, "loss": 1.1009, "step": 889 }, { "epoch": 0.06548631869395263, "grad_norm": 1.0703125, "learning_rate": 4.95066229039277e-05, "loss": 1.0859, "step": 890 }, { "epoch": 0.06555989882731662, "grad_norm": 0.99609375, "learning_rate": 4.950547731536409e-05, "loss": 0.9128, "step": 891 }, { "epoch": 0.06563347896068061, "grad_norm": 0.96875, "learning_rate": 4.9504330411637794e-05, "loss": 0.8637, "step": 892 }, { "epoch": 0.0657070590940446, "grad_norm": 0.87109375, "learning_rate": 4.950318219281037e-05, "loss": 1.0806, "step": 893 }, { "epoch": 0.0657806392274086, "grad_norm": 0.90234375, "learning_rate": 4.950203265894344e-05, "loss": 0.6772, "step": 894 }, { "epoch": 0.0658542193607726, "grad_norm": 0.7578125, "learning_rate": 4.950088181009869e-05, "loss": 0.7715, "step": 895 }, { "epoch": 0.06592779949413659, "grad_norm": 1.0390625, "learning_rate": 4.949972964633789e-05, "loss": 1.4542, "step": 896 }, { "epoch": 0.06600137962750058, "grad_norm": 1.234375, "learning_rate": 4.949857616772288e-05, "loss": 1.1663, "step": 897 }, { "epoch": 0.06607495976086457, "grad_norm": 0.95703125, "learning_rate": 4.949742137431555e-05, "loss": 0.7811, "step": 898 }, { "epoch": 0.06614853989422856, "grad_norm": 1.078125, "learning_rate": 4.949626526617789e-05, "loss": 0.9057, "step": 899 }, { "epoch": 0.06622212002759255, "grad_norm": 0.984375, "learning_rate": 4.9495107843371934e-05, "loss": 0.9346, "step": 900 }, { "epoch": 0.06629570016095654, "grad_norm": 0.98828125, "learning_rate": 4.949394910595981e-05, "loss": 0.9646, "step": 901 }, { "epoch": 0.06636928029432053, "grad_norm": 0.953125, "learning_rate": 4.9492789054003695e-05, "loss": 0.945, "step": 902 }, { "epoch": 0.06644286042768452, "grad_norm": 0.78125, "learning_rate": 4.9491627687565855e-05, "loss": 0.7129, "step": 903 }, { "epoch": 0.06651644056104852, "grad_norm": 0.89453125, "learning_rate": 4.949046500670862e-05, "loss": 0.8891, "step": 904 }, { "epoch": 0.06659002069441251, "grad_norm": 0.98046875, "learning_rate": 4.948930101149437e-05, "loss": 0.7404, "step": 905 }, { "epoch": 0.0666636008277765, "grad_norm": 0.9375, "learning_rate": 4.94881357019856e-05, "loss": 1.1495, "step": 906 }, { "epoch": 0.0667371809611405, "grad_norm": 1.234375, "learning_rate": 4.9486969078244835e-05, "loss": 1.0328, "step": 907 }, { "epoch": 0.06681076109450448, "grad_norm": 1.0078125, "learning_rate": 4.9485801140334687e-05, "loss": 0.9204, "step": 908 }, { "epoch": 0.06688434122786847, "grad_norm": 0.93359375, "learning_rate": 4.948463188831784e-05, "loss": 0.9084, "step": 909 }, { "epoch": 0.06695792136123246, "grad_norm": 1.15625, "learning_rate": 4.9483461322257054e-05, "loss": 1.5526, "step": 910 }, { "epoch": 0.06703150149459645, "grad_norm": 0.90625, "learning_rate": 4.948228944221513e-05, "loss": 0.904, "step": 911 }, { "epoch": 0.06710508162796044, "grad_norm": 1.296875, "learning_rate": 4.9481116248254976e-05, "loss": 1.0075, "step": 912 }, { "epoch": 0.06717866176132445, "grad_norm": 0.87109375, "learning_rate": 4.947994174043955e-05, "loss": 0.7878, "step": 913 }, { "epoch": 0.06725224189468844, "grad_norm": 1.1328125, "learning_rate": 4.947876591883189e-05, "loss": 1.3461, "step": 914 }, { "epoch": 0.06732582202805243, "grad_norm": 0.8515625, "learning_rate": 4.947758878349509e-05, "loss": 0.9798, "step": 915 }, { "epoch": 0.06739940216141642, "grad_norm": 0.82421875, "learning_rate": 4.947641033449234e-05, "loss": 0.7173, "step": 916 }, { "epoch": 0.06747298229478041, "grad_norm": 0.890625, "learning_rate": 4.947523057188686e-05, "loss": 0.8946, "step": 917 }, { "epoch": 0.0675465624281444, "grad_norm": 1.1484375, "learning_rate": 4.9474049495742006e-05, "loss": 1.2353, "step": 918 }, { "epoch": 0.06762014256150839, "grad_norm": 0.875, "learning_rate": 4.947286710612112e-05, "loss": 0.956, "step": 919 }, { "epoch": 0.06769372269487238, "grad_norm": 1.171875, "learning_rate": 4.9471683403087695e-05, "loss": 1.2927, "step": 920 }, { "epoch": 0.06776730282823637, "grad_norm": 1.0390625, "learning_rate": 4.9470498386705236e-05, "loss": 1.1632, "step": 921 }, { "epoch": 0.06784088296160037, "grad_norm": 0.9609375, "learning_rate": 4.9469312057037346e-05, "loss": 1.243, "step": 922 }, { "epoch": 0.06791446309496436, "grad_norm": 1.078125, "learning_rate": 4.94681244141477e-05, "loss": 0.7535, "step": 923 }, { "epoch": 0.06798804322832835, "grad_norm": 1.0, "learning_rate": 4.946693545810002e-05, "loss": 1.3773, "step": 924 }, { "epoch": 0.06806162336169234, "grad_norm": 1.1328125, "learning_rate": 4.946574518895813e-05, "loss": 1.0478, "step": 925 }, { "epoch": 0.06813520349505633, "grad_norm": 1.390625, "learning_rate": 4.94645536067859e-05, "loss": 2.1322, "step": 926 }, { "epoch": 0.06820878362842032, "grad_norm": 1.2890625, "learning_rate": 4.94633607116473e-05, "loss": 1.1893, "step": 927 }, { "epoch": 0.06828236376178431, "grad_norm": 1.171875, "learning_rate": 4.946216650360632e-05, "loss": 1.2635, "step": 928 }, { "epoch": 0.0683559438951483, "grad_norm": 1.03125, "learning_rate": 4.9460970982727074e-05, "loss": 1.1346, "step": 929 }, { "epoch": 0.0684295240285123, "grad_norm": 0.8359375, "learning_rate": 4.945977414907371e-05, "loss": 1.0327, "step": 930 }, { "epoch": 0.0685031041618763, "grad_norm": 1.28125, "learning_rate": 4.9458576002710474e-05, "loss": 0.8676, "step": 931 }, { "epoch": 0.06857668429524029, "grad_norm": 0.828125, "learning_rate": 4.945737654370165e-05, "loss": 0.7823, "step": 932 }, { "epoch": 0.06865026442860428, "grad_norm": 1.0625, "learning_rate": 4.9456175772111624e-05, "loss": 1.0325, "step": 933 }, { "epoch": 0.06872384456196827, "grad_norm": 1.0546875, "learning_rate": 4.9454973688004835e-05, "loss": 1.1964, "step": 934 }, { "epoch": 0.06879742469533226, "grad_norm": 1.0, "learning_rate": 4.94537702914458e-05, "loss": 0.9395, "step": 935 }, { "epoch": 0.06887100482869625, "grad_norm": 0.84375, "learning_rate": 4.94525655824991e-05, "loss": 1.1721, "step": 936 }, { "epoch": 0.06894458496206024, "grad_norm": 0.84765625, "learning_rate": 4.945135956122938e-05, "loss": 0.8203, "step": 937 }, { "epoch": 0.06901816509542423, "grad_norm": 1.0390625, "learning_rate": 4.945015222770139e-05, "loss": 0.9418, "step": 938 }, { "epoch": 0.06909174522878822, "grad_norm": 0.9140625, "learning_rate": 4.94489435819799e-05, "loss": 0.9211, "step": 939 }, { "epoch": 0.06916532536215222, "grad_norm": 0.9609375, "learning_rate": 4.944773362412979e-05, "loss": 0.974, "step": 940 }, { "epoch": 0.06923890549551621, "grad_norm": 0.890625, "learning_rate": 4.944652235421599e-05, "loss": 1.1142, "step": 941 }, { "epoch": 0.0693124856288802, "grad_norm": 0.9921875, "learning_rate": 4.94453097723035e-05, "loss": 1.0195, "step": 942 }, { "epoch": 0.0693860657622442, "grad_norm": 0.95703125, "learning_rate": 4.944409587845741e-05, "loss": 1.0767, "step": 943 }, { "epoch": 0.06945964589560819, "grad_norm": 0.88671875, "learning_rate": 4.9442880672742866e-05, "loss": 1.1185, "step": 944 }, { "epoch": 0.06953322602897218, "grad_norm": 0.95703125, "learning_rate": 4.944166415522509e-05, "loss": 1.3641, "step": 945 }, { "epoch": 0.06960680616233617, "grad_norm": 1.0078125, "learning_rate": 4.944044632596935e-05, "loss": 0.9978, "step": 946 }, { "epoch": 0.06968038629570016, "grad_norm": 1.03125, "learning_rate": 4.9439227185041015e-05, "loss": 1.0883, "step": 947 }, { "epoch": 0.06975396642906415, "grad_norm": 1.2890625, "learning_rate": 4.943800673250553e-05, "loss": 1.2315, "step": 948 }, { "epoch": 0.06982754656242815, "grad_norm": 0.90625, "learning_rate": 4.943678496842837e-05, "loss": 0.934, "step": 949 }, { "epoch": 0.06990112669579214, "grad_norm": 0.6484375, "learning_rate": 4.9435561892875114e-05, "loss": 0.6817, "step": 950 }, { "epoch": 0.06997470682915613, "grad_norm": 0.90625, "learning_rate": 4.943433750591141e-05, "loss": 0.9523, "step": 951 }, { "epoch": 0.07004828696252012, "grad_norm": 0.73046875, "learning_rate": 4.9433111807602957e-05, "loss": 0.6998, "step": 952 }, { "epoch": 0.07012186709588411, "grad_norm": 0.94140625, "learning_rate": 4.943188479801554e-05, "loss": 1.1778, "step": 953 }, { "epoch": 0.0701954472292481, "grad_norm": 0.96484375, "learning_rate": 4.9430656477215016e-05, "loss": 0.8764, "step": 954 }, { "epoch": 0.07026902736261209, "grad_norm": 0.99609375, "learning_rate": 4.94294268452673e-05, "loss": 1.3627, "step": 955 }, { "epoch": 0.07034260749597608, "grad_norm": 1.1875, "learning_rate": 4.942819590223838e-05, "loss": 0.9452, "step": 956 }, { "epoch": 0.07041618762934007, "grad_norm": 1.2109375, "learning_rate": 4.942696364819433e-05, "loss": 1.3631, "step": 957 }, { "epoch": 0.07048976776270408, "grad_norm": 1.0859375, "learning_rate": 4.942573008320128e-05, "loss": 1.4654, "step": 958 }, { "epoch": 0.07056334789606807, "grad_norm": 0.91015625, "learning_rate": 4.942449520732543e-05, "loss": 0.801, "step": 959 }, { "epoch": 0.07063692802943206, "grad_norm": 0.8203125, "learning_rate": 4.942325902063305e-05, "loss": 0.868, "step": 960 }, { "epoch": 0.07071050816279605, "grad_norm": 1.0859375, "learning_rate": 4.9422021523190486e-05, "loss": 0.9592, "step": 961 }, { "epoch": 0.07078408829616004, "grad_norm": 0.96875, "learning_rate": 4.9420782715064154e-05, "loss": 0.941, "step": 962 }, { "epoch": 0.07085766842952403, "grad_norm": 1.15625, "learning_rate": 4.9419542596320544e-05, "loss": 1.2685, "step": 963 }, { "epoch": 0.07093124856288802, "grad_norm": 0.8828125, "learning_rate": 4.94183011670262e-05, "loss": 0.8669, "step": 964 }, { "epoch": 0.071004828696252, "grad_norm": 0.86328125, "learning_rate": 4.941705842724775e-05, "loss": 0.8033, "step": 965 }, { "epoch": 0.07107840882961601, "grad_norm": 0.859375, "learning_rate": 4.941581437705189e-05, "loss": 0.7802, "step": 966 }, { "epoch": 0.07115198896298, "grad_norm": 0.83203125, "learning_rate": 4.9414569016505394e-05, "loss": 0.8719, "step": 967 }, { "epoch": 0.07122556909634399, "grad_norm": 1.109375, "learning_rate": 4.941332234567509e-05, "loss": 1.311, "step": 968 }, { "epoch": 0.07129914922970798, "grad_norm": 0.875, "learning_rate": 4.941207436462788e-05, "loss": 1.0031, "step": 969 }, { "epoch": 0.07137272936307197, "grad_norm": 0.90234375, "learning_rate": 4.9410825073430756e-05, "loss": 1.2397, "step": 970 }, { "epoch": 0.07144630949643596, "grad_norm": 1.0390625, "learning_rate": 4.940957447215075e-05, "loss": 1.4965, "step": 971 }, { "epoch": 0.07151988962979995, "grad_norm": 1.046875, "learning_rate": 4.9408322560854985e-05, "loss": 1.0148, "step": 972 }, { "epoch": 0.07159346976316394, "grad_norm": 0.8125, "learning_rate": 4.940706933961065e-05, "loss": 0.6665, "step": 973 }, { "epoch": 0.07166704989652793, "grad_norm": 0.90234375, "learning_rate": 4.9405814808485e-05, "loss": 0.7528, "step": 974 }, { "epoch": 0.07174063002989194, "grad_norm": 1.0859375, "learning_rate": 4.940455896754537e-05, "loss": 0.9434, "step": 975 }, { "epoch": 0.07181421016325593, "grad_norm": 0.89453125, "learning_rate": 4.940330181685915e-05, "loss": 0.7931, "step": 976 }, { "epoch": 0.07188779029661992, "grad_norm": 0.9765625, "learning_rate": 4.9402043356493816e-05, "loss": 1.1085, "step": 977 }, { "epoch": 0.0719613704299839, "grad_norm": 0.8359375, "learning_rate": 4.9400783586516895e-05, "loss": 0.6421, "step": 978 }, { "epoch": 0.0720349505633479, "grad_norm": 0.8515625, "learning_rate": 4.939952250699602e-05, "loss": 0.7732, "step": 979 }, { "epoch": 0.07210853069671189, "grad_norm": 0.703125, "learning_rate": 4.939826011799885e-05, "loss": 0.713, "step": 980 }, { "epoch": 0.07218211083007588, "grad_norm": 0.78125, "learning_rate": 4.9396996419593136e-05, "loss": 0.6547, "step": 981 }, { "epoch": 0.07225569096343987, "grad_norm": 1.0234375, "learning_rate": 4.939573141184671e-05, "loss": 0.7736, "step": 982 }, { "epoch": 0.07232927109680386, "grad_norm": 1.171875, "learning_rate": 4.9394465094827446e-05, "loss": 1.1406, "step": 983 }, { "epoch": 0.07240285123016786, "grad_norm": 1.296875, "learning_rate": 4.9393197468603324e-05, "loss": 1.4124, "step": 984 }, { "epoch": 0.07247643136353185, "grad_norm": 1.203125, "learning_rate": 4.939192853324236e-05, "loss": 1.4157, "step": 985 }, { "epoch": 0.07255001149689584, "grad_norm": 0.9453125, "learning_rate": 4.9390658288812675e-05, "loss": 0.9396, "step": 986 }, { "epoch": 0.07262359163025983, "grad_norm": 1.046875, "learning_rate": 4.938938673538242e-05, "loss": 0.6932, "step": 987 }, { "epoch": 0.07269717176362382, "grad_norm": 1.0078125, "learning_rate": 4.9388113873019835e-05, "loss": 1.0153, "step": 988 }, { "epoch": 0.07277075189698781, "grad_norm": 0.91015625, "learning_rate": 4.938683970179325e-05, "loss": 1.0883, "step": 989 }, { "epoch": 0.0728443320303518, "grad_norm": 0.92578125, "learning_rate": 4.938556422177104e-05, "loss": 0.9006, "step": 990 }, { "epoch": 0.07291791216371579, "grad_norm": 0.828125, "learning_rate": 4.9384287433021646e-05, "loss": 0.8341, "step": 991 }, { "epoch": 0.07299149229707978, "grad_norm": 1.1171875, "learning_rate": 4.9383009335613606e-05, "loss": 1.3068, "step": 992 }, { "epoch": 0.07306507243044379, "grad_norm": 1.0078125, "learning_rate": 4.938172992961551e-05, "loss": 1.0953, "step": 993 }, { "epoch": 0.07313865256380778, "grad_norm": 0.92578125, "learning_rate": 4.938044921509602e-05, "loss": 0.9872, "step": 994 }, { "epoch": 0.07321223269717177, "grad_norm": 0.953125, "learning_rate": 4.9379167192123864e-05, "loss": 1.1377, "step": 995 }, { "epoch": 0.07328581283053576, "grad_norm": 0.8203125, "learning_rate": 4.937788386076786e-05, "loss": 0.9147, "step": 996 }, { "epoch": 0.07335939296389975, "grad_norm": 1.3828125, "learning_rate": 4.9376599221096865e-05, "loss": 1.6023, "step": 997 }, { "epoch": 0.07343297309726374, "grad_norm": 0.8671875, "learning_rate": 4.937531327317983e-05, "loss": 1.0771, "step": 998 }, { "epoch": 0.07350655323062773, "grad_norm": 0.890625, "learning_rate": 4.9374026017085776e-05, "loss": 0.7791, "step": 999 }, { "epoch": 0.07358013336399172, "grad_norm": 0.84375, "learning_rate": 4.9372737452883774e-05, "loss": 1.045, "step": 1000 }, { "epoch": 0.07365371349735571, "grad_norm": 0.8515625, "learning_rate": 4.937144758064299e-05, "loss": 0.5552, "step": 1001 }, { "epoch": 0.07372729363071971, "grad_norm": 0.79296875, "learning_rate": 4.937015640043264e-05, "loss": 0.6764, "step": 1002 }, { "epoch": 0.0738008737640837, "grad_norm": 0.98046875, "learning_rate": 4.9368863912322036e-05, "loss": 1.0907, "step": 1003 }, { "epoch": 0.07387445389744769, "grad_norm": 0.83984375, "learning_rate": 4.936757011638052e-05, "loss": 0.7189, "step": 1004 }, { "epoch": 0.07394803403081168, "grad_norm": 0.98828125, "learning_rate": 4.936627501267755e-05, "loss": 0.6947, "step": 1005 }, { "epoch": 0.07402161416417567, "grad_norm": 0.890625, "learning_rate": 4.936497860128262e-05, "loss": 0.9126, "step": 1006 }, { "epoch": 0.07409519429753966, "grad_norm": 0.75, "learning_rate": 4.93636808822653e-05, "loss": 0.8022, "step": 1007 }, { "epoch": 0.07416877443090365, "grad_norm": 0.9296875, "learning_rate": 4.936238185569525e-05, "loss": 0.8789, "step": 1008 }, { "epoch": 0.07424235456426764, "grad_norm": 0.8671875, "learning_rate": 4.9361081521642176e-05, "loss": 1.1579, "step": 1009 }, { "epoch": 0.07431593469763163, "grad_norm": 0.9453125, "learning_rate": 4.935977988017587e-05, "loss": 1.1439, "step": 1010 }, { "epoch": 0.07438951483099564, "grad_norm": 0.95703125, "learning_rate": 4.935847693136619e-05, "loss": 1.4934, "step": 1011 }, { "epoch": 0.07446309496435963, "grad_norm": 0.765625, "learning_rate": 4.935717267528305e-05, "loss": 0.7336, "step": 1012 }, { "epoch": 0.07453667509772362, "grad_norm": 0.69140625, "learning_rate": 4.935586711199647e-05, "loss": 0.7628, "step": 1013 }, { "epoch": 0.07461025523108761, "grad_norm": 1.0625, "learning_rate": 4.9354560241576494e-05, "loss": 0.9268, "step": 1014 }, { "epoch": 0.0746838353644516, "grad_norm": 0.81640625, "learning_rate": 4.935325206409327e-05, "loss": 0.8797, "step": 1015 }, { "epoch": 0.07475741549781559, "grad_norm": 0.7421875, "learning_rate": 4.935194257961701e-05, "loss": 0.6649, "step": 1016 }, { "epoch": 0.07483099563117958, "grad_norm": 0.8984375, "learning_rate": 4.935063178821798e-05, "loss": 1.018, "step": 1017 }, { "epoch": 0.07490457576454357, "grad_norm": 0.91015625, "learning_rate": 4.934931968996654e-05, "loss": 1.0185, "step": 1018 }, { "epoch": 0.07497815589790756, "grad_norm": 1.1171875, "learning_rate": 4.9348006284933094e-05, "loss": 1.1034, "step": 1019 }, { "epoch": 0.07505173603127156, "grad_norm": 1.0546875, "learning_rate": 4.934669157318814e-05, "loss": 1.3453, "step": 1020 }, { "epoch": 0.07512531616463555, "grad_norm": 0.98046875, "learning_rate": 4.9345375554802246e-05, "loss": 1.0863, "step": 1021 }, { "epoch": 0.07519889629799954, "grad_norm": 1.328125, "learning_rate": 4.9344058229846015e-05, "loss": 1.431, "step": 1022 }, { "epoch": 0.07527247643136353, "grad_norm": 0.82421875, "learning_rate": 4.934273959839015e-05, "loss": 1.0453, "step": 1023 }, { "epoch": 0.07534605656472752, "grad_norm": 1.1015625, "learning_rate": 4.9341419660505444e-05, "loss": 1.4968, "step": 1024 }, { "epoch": 0.07541963669809151, "grad_norm": 1.1953125, "learning_rate": 4.9340098416262714e-05, "loss": 1.0828, "step": 1025 }, { "epoch": 0.0754932168314555, "grad_norm": 0.875, "learning_rate": 4.9338775865732874e-05, "loss": 0.8269, "step": 1026 }, { "epoch": 0.0755667969648195, "grad_norm": 1.171875, "learning_rate": 4.93374520089869e-05, "loss": 1.1307, "step": 1027 }, { "epoch": 0.07564037709818348, "grad_norm": 0.91796875, "learning_rate": 4.9336126846095846e-05, "loss": 0.7401, "step": 1028 }, { "epoch": 0.07571395723154749, "grad_norm": 1.0625, "learning_rate": 4.933480037713083e-05, "loss": 1.185, "step": 1029 }, { "epoch": 0.07578753736491148, "grad_norm": 0.76171875, "learning_rate": 4.9333472602163035e-05, "loss": 0.6656, "step": 1030 }, { "epoch": 0.07586111749827547, "grad_norm": 0.94921875, "learning_rate": 4.933214352126373e-05, "loss": 1.0686, "step": 1031 }, { "epoch": 0.07593469763163946, "grad_norm": 0.8359375, "learning_rate": 4.933081313450423e-05, "loss": 0.6938, "step": 1032 }, { "epoch": 0.07600827776500345, "grad_norm": 0.8984375, "learning_rate": 4.932948144195596e-05, "loss": 0.8716, "step": 1033 }, { "epoch": 0.07608185789836744, "grad_norm": 1.0234375, "learning_rate": 4.9328148443690356e-05, "loss": 1.202, "step": 1034 }, { "epoch": 0.07615543803173143, "grad_norm": 1.0078125, "learning_rate": 4.9326814139778985e-05, "loss": 1.1178, "step": 1035 }, { "epoch": 0.07622901816509542, "grad_norm": 0.79296875, "learning_rate": 4.932547853029344e-05, "loss": 0.6975, "step": 1036 }, { "epoch": 0.07630259829845941, "grad_norm": 0.90234375, "learning_rate": 4.932414161530541e-05, "loss": 0.7133, "step": 1037 }, { "epoch": 0.07637617843182341, "grad_norm": 1.203125, "learning_rate": 4.932280339488664e-05, "loss": 1.4606, "step": 1038 }, { "epoch": 0.0764497585651874, "grad_norm": 0.83984375, "learning_rate": 4.9321463869108954e-05, "loss": 0.9715, "step": 1039 }, { "epoch": 0.0765233386985514, "grad_norm": 0.90625, "learning_rate": 4.932012303804423e-05, "loss": 1.0028, "step": 1040 }, { "epoch": 0.07659691883191538, "grad_norm": 0.875, "learning_rate": 4.931878090176445e-05, "loss": 0.9707, "step": 1041 }, { "epoch": 0.07667049896527937, "grad_norm": 0.9140625, "learning_rate": 4.931743746034162e-05, "loss": 0.8805, "step": 1042 }, { "epoch": 0.07674407909864336, "grad_norm": 0.7578125, "learning_rate": 4.931609271384785e-05, "loss": 0.7408, "step": 1043 }, { "epoch": 0.07681765923200735, "grad_norm": 1.046875, "learning_rate": 4.931474666235531e-05, "loss": 1.489, "step": 1044 }, { "epoch": 0.07689123936537134, "grad_norm": 0.78515625, "learning_rate": 4.931339930593625e-05, "loss": 0.636, "step": 1045 }, { "epoch": 0.07696481949873533, "grad_norm": 0.91796875, "learning_rate": 4.931205064466297e-05, "loss": 1.081, "step": 1046 }, { "epoch": 0.07703839963209934, "grad_norm": 0.890625, "learning_rate": 4.931070067860785e-05, "loss": 0.7789, "step": 1047 }, { "epoch": 0.07711197976546333, "grad_norm": 0.82421875, "learning_rate": 4.930934940784333e-05, "loss": 0.7309, "step": 1048 }, { "epoch": 0.07718555989882732, "grad_norm": 1.0078125, "learning_rate": 4.930799683244195e-05, "loss": 1.0883, "step": 1049 }, { "epoch": 0.07725914003219131, "grad_norm": 1.1171875, "learning_rate": 4.930664295247629e-05, "loss": 1.576, "step": 1050 }, { "epoch": 0.0773327201655553, "grad_norm": 1.0625, "learning_rate": 4.930528776801901e-05, "loss": 0.8766, "step": 1051 }, { "epoch": 0.07740630029891929, "grad_norm": 0.6796875, "learning_rate": 4.9303931279142844e-05, "loss": 0.705, "step": 1052 }, { "epoch": 0.07747988043228328, "grad_norm": 1.6953125, "learning_rate": 4.9302573485920587e-05, "loss": 0.6259, "step": 1053 }, { "epoch": 0.07755346056564727, "grad_norm": 1.171875, "learning_rate": 4.930121438842512e-05, "loss": 1.1955, "step": 1054 }, { "epoch": 0.07762704069901127, "grad_norm": 0.83984375, "learning_rate": 4.9299853986729365e-05, "loss": 0.6949, "step": 1055 }, { "epoch": 0.07770062083237526, "grad_norm": 0.9609375, "learning_rate": 4.929849228090635e-05, "loss": 0.8881, "step": 1056 }, { "epoch": 0.07777420096573925, "grad_norm": 0.89453125, "learning_rate": 4.929712927102914e-05, "loss": 0.8625, "step": 1057 }, { "epoch": 0.07784778109910324, "grad_norm": 0.9296875, "learning_rate": 4.92957649571709e-05, "loss": 1.0278, "step": 1058 }, { "epoch": 0.07792136123246723, "grad_norm": 0.83984375, "learning_rate": 4.929439933940484e-05, "loss": 0.8735, "step": 1059 }, { "epoch": 0.07799494136583122, "grad_norm": 0.875, "learning_rate": 4.929303241780425e-05, "loss": 0.8721, "step": 1060 }, { "epoch": 0.07806852149919521, "grad_norm": 0.80078125, "learning_rate": 4.92916641924425e-05, "loss": 0.7614, "step": 1061 }, { "epoch": 0.0781421016325592, "grad_norm": 1.0078125, "learning_rate": 4.929029466339301e-05, "loss": 1.2894, "step": 1062 }, { "epoch": 0.0782156817659232, "grad_norm": 1.0078125, "learning_rate": 4.928892383072928e-05, "loss": 0.9365, "step": 1063 }, { "epoch": 0.0782892618992872, "grad_norm": 1.0703125, "learning_rate": 4.9287551694524894e-05, "loss": 1.2815, "step": 1064 }, { "epoch": 0.07836284203265119, "grad_norm": 1.0078125, "learning_rate": 4.928617825485347e-05, "loss": 0.7414, "step": 1065 }, { "epoch": 0.07843642216601518, "grad_norm": 1.2578125, "learning_rate": 4.928480351178873e-05, "loss": 0.8891, "step": 1066 }, { "epoch": 0.07851000229937917, "grad_norm": 0.8515625, "learning_rate": 4.928342746540446e-05, "loss": 0.8198, "step": 1067 }, { "epoch": 0.07858358243274316, "grad_norm": 0.7578125, "learning_rate": 4.92820501157745e-05, "loss": 0.7424, "step": 1068 }, { "epoch": 0.07865716256610715, "grad_norm": 1.0625, "learning_rate": 4.928067146297277e-05, "loss": 1.1136, "step": 1069 }, { "epoch": 0.07873074269947114, "grad_norm": 0.98046875, "learning_rate": 4.927929150707326e-05, "loss": 1.0194, "step": 1070 }, { "epoch": 0.07880432283283513, "grad_norm": 0.87109375, "learning_rate": 4.927791024815004e-05, "loss": 0.828, "step": 1071 }, { "epoch": 0.07887790296619912, "grad_norm": 0.9296875, "learning_rate": 4.927652768627722e-05, "loss": 1.1475, "step": 1072 }, { "epoch": 0.07895148309956312, "grad_norm": 1.0703125, "learning_rate": 4.9275143821529025e-05, "loss": 1.2907, "step": 1073 }, { "epoch": 0.07902506323292711, "grad_norm": 0.87109375, "learning_rate": 4.927375865397969e-05, "loss": 0.7493, "step": 1074 }, { "epoch": 0.0790986433662911, "grad_norm": 0.90234375, "learning_rate": 4.9272372183703594e-05, "loss": 0.8228, "step": 1075 }, { "epoch": 0.0791722234996551, "grad_norm": 0.7890625, "learning_rate": 4.9270984410775125e-05, "loss": 0.7071, "step": 1076 }, { "epoch": 0.07924580363301909, "grad_norm": 0.87109375, "learning_rate": 4.926959533526876e-05, "loss": 0.98, "step": 1077 }, { "epoch": 0.07931938376638308, "grad_norm": 0.91796875, "learning_rate": 4.926820495725905e-05, "loss": 0.968, "step": 1078 }, { "epoch": 0.07939296389974707, "grad_norm": 1.09375, "learning_rate": 4.926681327682061e-05, "loss": 1.0755, "step": 1079 }, { "epoch": 0.07946654403311106, "grad_norm": 0.94140625, "learning_rate": 4.926542029402815e-05, "loss": 1.4966, "step": 1080 }, { "epoch": 0.07954012416647505, "grad_norm": 1.0625, "learning_rate": 4.9264026008956403e-05, "loss": 0.8471, "step": 1081 }, { "epoch": 0.07961370429983905, "grad_norm": 0.6875, "learning_rate": 4.926263042168021e-05, "loss": 0.5744, "step": 1082 }, { "epoch": 0.07968728443320304, "grad_norm": 0.92578125, "learning_rate": 4.926123353227447e-05, "loss": 1.1622, "step": 1083 }, { "epoch": 0.07976086456656703, "grad_norm": 0.84765625, "learning_rate": 4.925983534081416e-05, "loss": 0.9022, "step": 1084 }, { "epoch": 0.07983444469993102, "grad_norm": 0.90625, "learning_rate": 4.9258435847374286e-05, "loss": 0.7833, "step": 1085 }, { "epoch": 0.07990802483329501, "grad_norm": 1.140625, "learning_rate": 4.925703505202999e-05, "loss": 0.8629, "step": 1086 }, { "epoch": 0.079981604966659, "grad_norm": 1.2421875, "learning_rate": 4.925563295485644e-05, "loss": 1.7629, "step": 1087 }, { "epoch": 0.08005518510002299, "grad_norm": 1.046875, "learning_rate": 4.925422955592887e-05, "loss": 1.1017, "step": 1088 }, { "epoch": 0.08012876523338698, "grad_norm": 1.109375, "learning_rate": 4.9252824855322624e-05, "loss": 1.2518, "step": 1089 }, { "epoch": 0.08020234536675097, "grad_norm": 0.84765625, "learning_rate": 4.925141885311306e-05, "loss": 1.0385, "step": 1090 }, { "epoch": 0.08027592550011498, "grad_norm": 0.94921875, "learning_rate": 4.9250011549375664e-05, "loss": 0.9763, "step": 1091 }, { "epoch": 0.08034950563347897, "grad_norm": 0.94921875, "learning_rate": 4.924860294418594e-05, "loss": 1.0588, "step": 1092 }, { "epoch": 0.08042308576684296, "grad_norm": 0.92578125, "learning_rate": 4.92471930376195e-05, "loss": 0.6802, "step": 1093 }, { "epoch": 0.08049666590020695, "grad_norm": 1.078125, "learning_rate": 4.924578182975201e-05, "loss": 0.8913, "step": 1094 }, { "epoch": 0.08057024603357094, "grad_norm": 2.5, "learning_rate": 4.9244369320659204e-05, "loss": 1.4806, "step": 1095 }, { "epoch": 0.08064382616693493, "grad_norm": 1.2578125, "learning_rate": 4.9242955510416877e-05, "loss": 1.084, "step": 1096 }, { "epoch": 0.08071740630029892, "grad_norm": 1.1328125, "learning_rate": 4.924154039910092e-05, "loss": 0.9423, "step": 1097 }, { "epoch": 0.0807909864336629, "grad_norm": 1.078125, "learning_rate": 4.924012398678728e-05, "loss": 1.2512, "step": 1098 }, { "epoch": 0.0808645665670269, "grad_norm": 0.8515625, "learning_rate": 4.923870627355196e-05, "loss": 0.8654, "step": 1099 }, { "epoch": 0.0809381467003909, "grad_norm": 0.8984375, "learning_rate": 4.923728725947106e-05, "loss": 0.7415, "step": 1100 }, { "epoch": 0.08101172683375489, "grad_norm": 0.88671875, "learning_rate": 4.923586694462073e-05, "loss": 0.9244, "step": 1101 }, { "epoch": 0.08108530696711888, "grad_norm": 0.99609375, "learning_rate": 4.9234445329077207e-05, "loss": 0.7599, "step": 1102 }, { "epoch": 0.08115888710048287, "grad_norm": 0.88671875, "learning_rate": 4.9233022412916766e-05, "loss": 0.9252, "step": 1103 }, { "epoch": 0.08123246723384686, "grad_norm": 0.8828125, "learning_rate": 4.923159819621578e-05, "loss": 0.7523, "step": 1104 }, { "epoch": 0.08130604736721085, "grad_norm": 0.875, "learning_rate": 4.9230172679050686e-05, "loss": 1.0046, "step": 1105 }, { "epoch": 0.08137962750057484, "grad_norm": 0.90234375, "learning_rate": 4.922874586149799e-05, "loss": 0.7878, "step": 1106 }, { "epoch": 0.08145320763393883, "grad_norm": 0.8828125, "learning_rate": 4.922731774363426e-05, "loss": 0.9076, "step": 1107 }, { "epoch": 0.08152678776730282, "grad_norm": 1.203125, "learning_rate": 4.9225888325536154e-05, "loss": 1.5259, "step": 1108 }, { "epoch": 0.08160036790066683, "grad_norm": 0.75, "learning_rate": 4.922445760728037e-05, "loss": 0.7926, "step": 1109 }, { "epoch": 0.08167394803403082, "grad_norm": 1.0, "learning_rate": 4.9223025588943704e-05, "loss": 0.9363, "step": 1110 }, { "epoch": 0.0817475281673948, "grad_norm": 0.91796875, "learning_rate": 4.922159227060301e-05, "loss": 0.8192, "step": 1111 }, { "epoch": 0.0818211083007588, "grad_norm": 0.99609375, "learning_rate": 4.92201576523352e-05, "loss": 1.3752, "step": 1112 }, { "epoch": 0.08189468843412279, "grad_norm": 0.8671875, "learning_rate": 4.9218721734217274e-05, "loss": 0.7666, "step": 1113 }, { "epoch": 0.08196826856748678, "grad_norm": 1.0546875, "learning_rate": 4.9217284516326304e-05, "loss": 1.1508, "step": 1114 }, { "epoch": 0.08204184870085077, "grad_norm": 3.3125, "learning_rate": 4.921584599873941e-05, "loss": 0.8317, "step": 1115 }, { "epoch": 0.08211542883421476, "grad_norm": 1.03125, "learning_rate": 4.9214406181533795e-05, "loss": 1.1477, "step": 1116 }, { "epoch": 0.08218900896757875, "grad_norm": 0.859375, "learning_rate": 4.921296506478674e-05, "loss": 0.7427, "step": 1117 }, { "epoch": 0.08226258910094275, "grad_norm": 0.8828125, "learning_rate": 4.921152264857557e-05, "loss": 0.7214, "step": 1118 }, { "epoch": 0.08233616923430674, "grad_norm": 0.890625, "learning_rate": 4.921007893297772e-05, "loss": 0.8582, "step": 1119 }, { "epoch": 0.08240974936767073, "grad_norm": 0.74609375, "learning_rate": 4.920863391807066e-05, "loss": 0.7496, "step": 1120 }, { "epoch": 0.08248332950103472, "grad_norm": 0.7578125, "learning_rate": 4.920718760393194e-05, "loss": 0.6417, "step": 1121 }, { "epoch": 0.08255690963439871, "grad_norm": 1.203125, "learning_rate": 4.920573999063918e-05, "loss": 1.1525, "step": 1122 }, { "epoch": 0.0826304897677627, "grad_norm": 1.28125, "learning_rate": 4.920429107827007e-05, "loss": 1.1709, "step": 1123 }, { "epoch": 0.08270406990112669, "grad_norm": 1.015625, "learning_rate": 4.9202840866902374e-05, "loss": 0.9924, "step": 1124 }, { "epoch": 0.08277765003449068, "grad_norm": 0.95703125, "learning_rate": 4.9201389356613925e-05, "loss": 1.2457, "step": 1125 }, { "epoch": 0.08285123016785467, "grad_norm": 0.87109375, "learning_rate": 4.919993654748262e-05, "loss": 0.7907, "step": 1126 }, { "epoch": 0.08292481030121868, "grad_norm": 0.9296875, "learning_rate": 4.919848243958642e-05, "loss": 0.8108, "step": 1127 }, { "epoch": 0.08299839043458267, "grad_norm": 1.109375, "learning_rate": 4.9197027033003376e-05, "loss": 0.9923, "step": 1128 }, { "epoch": 0.08307197056794666, "grad_norm": 0.85546875, "learning_rate": 4.9195570327811595e-05, "loss": 0.747, "step": 1129 }, { "epoch": 0.08314555070131065, "grad_norm": 0.73828125, "learning_rate": 4.9194112324089256e-05, "loss": 0.7146, "step": 1130 }, { "epoch": 0.08321913083467464, "grad_norm": 1.5546875, "learning_rate": 4.91926530219146e-05, "loss": 0.8503, "step": 1131 }, { "epoch": 0.08329271096803863, "grad_norm": 0.921875, "learning_rate": 4.919119242136595e-05, "loss": 0.8781, "step": 1132 }, { "epoch": 0.08336629110140262, "grad_norm": 0.93359375, "learning_rate": 4.918973052252169e-05, "loss": 0.7988, "step": 1133 }, { "epoch": 0.08343987123476661, "grad_norm": 0.8203125, "learning_rate": 4.918826732546029e-05, "loss": 0.923, "step": 1134 }, { "epoch": 0.0835134513681306, "grad_norm": 0.7421875, "learning_rate": 4.918680283026026e-05, "loss": 0.6523, "step": 1135 }, { "epoch": 0.0835870315014946, "grad_norm": 0.99609375, "learning_rate": 4.9185337037000215e-05, "loss": 1.0275, "step": 1136 }, { "epoch": 0.08366061163485859, "grad_norm": 1.25, "learning_rate": 4.9183869945758795e-05, "loss": 1.8992, "step": 1137 }, { "epoch": 0.08373419176822258, "grad_norm": 0.80859375, "learning_rate": 4.918240155661477e-05, "loss": 1.0567, "step": 1138 }, { "epoch": 0.08380777190158657, "grad_norm": 0.984375, "learning_rate": 4.9180931869646904e-05, "loss": 0.8504, "step": 1139 }, { "epoch": 0.08388135203495056, "grad_norm": 1.296875, "learning_rate": 4.917946088493412e-05, "loss": 1.3163, "step": 1140 }, { "epoch": 0.08395493216831455, "grad_norm": 0.875, "learning_rate": 4.917798860255533e-05, "loss": 0.9608, "step": 1141 }, { "epoch": 0.08402851230167854, "grad_norm": 0.82421875, "learning_rate": 4.917651502258955e-05, "loss": 0.8979, "step": 1142 }, { "epoch": 0.08410209243504253, "grad_norm": 0.8671875, "learning_rate": 4.917504014511587e-05, "loss": 0.7785, "step": 1143 }, { "epoch": 0.08417567256840654, "grad_norm": 0.91796875, "learning_rate": 4.917356397021346e-05, "loss": 0.8761, "step": 1144 }, { "epoch": 0.08424925270177053, "grad_norm": 0.99609375, "learning_rate": 4.917208649796152e-05, "loss": 0.8724, "step": 1145 }, { "epoch": 0.08432283283513452, "grad_norm": 0.984375, "learning_rate": 4.9170607728439355e-05, "loss": 0.8965, "step": 1146 }, { "epoch": 0.08439641296849851, "grad_norm": 0.875, "learning_rate": 4.916912766172632e-05, "loss": 0.82, "step": 1147 }, { "epoch": 0.0844699931018625, "grad_norm": 0.83203125, "learning_rate": 4.916764629790186e-05, "loss": 0.9645, "step": 1148 }, { "epoch": 0.08454357323522649, "grad_norm": 0.984375, "learning_rate": 4.916616363704547e-05, "loss": 0.9855, "step": 1149 }, { "epoch": 0.08461715336859048, "grad_norm": 0.921875, "learning_rate": 4.916467967923671e-05, "loss": 0.7346, "step": 1150 }, { "epoch": 0.08469073350195447, "grad_norm": 0.80859375, "learning_rate": 4.916319442455524e-05, "loss": 0.8516, "step": 1151 }, { "epoch": 0.08476431363531846, "grad_norm": 1.015625, "learning_rate": 4.916170787308076e-05, "loss": 0.9306, "step": 1152 }, { "epoch": 0.08483789376868246, "grad_norm": 0.91796875, "learning_rate": 4.9160220024893064e-05, "loss": 1.1613, "step": 1153 }, { "epoch": 0.08491147390204645, "grad_norm": 0.76171875, "learning_rate": 4.915873088007198e-05, "loss": 0.7963, "step": 1154 }, { "epoch": 0.08498505403541044, "grad_norm": 1.1171875, "learning_rate": 4.9157240438697446e-05, "loss": 0.9718, "step": 1155 }, { "epoch": 0.08505863416877443, "grad_norm": 1.0078125, "learning_rate": 4.915574870084944e-05, "loss": 1.3683, "step": 1156 }, { "epoch": 0.08513221430213842, "grad_norm": 0.85546875, "learning_rate": 4.9154255666608026e-05, "loss": 0.5786, "step": 1157 }, { "epoch": 0.08520579443550241, "grad_norm": 1.1875, "learning_rate": 4.915276133605333e-05, "loss": 0.793, "step": 1158 }, { "epoch": 0.0852793745688664, "grad_norm": 1.0703125, "learning_rate": 4.915126570926555e-05, "loss": 1.0918, "step": 1159 }, { "epoch": 0.0853529547022304, "grad_norm": 3.828125, "learning_rate": 4.914976878632496e-05, "loss": 1.3251, "step": 1160 }, { "epoch": 0.08542653483559438, "grad_norm": 0.98828125, "learning_rate": 4.914827056731188e-05, "loss": 0.9405, "step": 1161 }, { "epoch": 0.08550011496895839, "grad_norm": 0.73828125, "learning_rate": 4.914677105230674e-05, "loss": 0.6488, "step": 1162 }, { "epoch": 0.08557369510232238, "grad_norm": 1.1171875, "learning_rate": 4.914527024139e-05, "loss": 1.388, "step": 1163 }, { "epoch": 0.08564727523568637, "grad_norm": 0.98828125, "learning_rate": 4.914376813464221e-05, "loss": 0.9075, "step": 1164 }, { "epoch": 0.08572085536905036, "grad_norm": 0.75, "learning_rate": 4.914226473214399e-05, "loss": 0.6648, "step": 1165 }, { "epoch": 0.08579443550241435, "grad_norm": 1.1640625, "learning_rate": 4.9140760033976016e-05, "loss": 1.1211, "step": 1166 }, { "epoch": 0.08586801563577834, "grad_norm": 0.765625, "learning_rate": 4.913925404021905e-05, "loss": 0.6689, "step": 1167 }, { "epoch": 0.08594159576914233, "grad_norm": 0.84765625, "learning_rate": 4.9137746750953907e-05, "loss": 0.8045, "step": 1168 }, { "epoch": 0.08601517590250632, "grad_norm": 0.984375, "learning_rate": 4.9136238166261485e-05, "loss": 0.9566, "step": 1169 }, { "epoch": 0.08608875603587031, "grad_norm": 0.9609375, "learning_rate": 4.913472828622275e-05, "loss": 1.16, "step": 1170 }, { "epoch": 0.08616233616923431, "grad_norm": 1.046875, "learning_rate": 4.9133217110918726e-05, "loss": 1.3695, "step": 1171 }, { "epoch": 0.0862359163025983, "grad_norm": 0.92578125, "learning_rate": 4.913170464043053e-05, "loss": 0.8043, "step": 1172 }, { "epoch": 0.0863094964359623, "grad_norm": 1.1484375, "learning_rate": 4.913019087483932e-05, "loss": 1.3014, "step": 1173 }, { "epoch": 0.08638307656932628, "grad_norm": 1.0234375, "learning_rate": 4.9128675814226346e-05, "loss": 1.0793, "step": 1174 }, { "epoch": 0.08645665670269027, "grad_norm": 0.90234375, "learning_rate": 4.912715945867291e-05, "loss": 1.1457, "step": 1175 }, { "epoch": 0.08653023683605426, "grad_norm": 0.80078125, "learning_rate": 4.9125641808260395e-05, "loss": 0.7501, "step": 1176 }, { "epoch": 0.08660381696941825, "grad_norm": 1.0390625, "learning_rate": 4.9124122863070255e-05, "loss": 0.9969, "step": 1177 }, { "epoch": 0.08667739710278224, "grad_norm": 0.97265625, "learning_rate": 4.9122602623184e-05, "loss": 0.8683, "step": 1178 }, { "epoch": 0.08675097723614623, "grad_norm": 0.828125, "learning_rate": 4.912108108868322e-05, "loss": 0.7663, "step": 1179 }, { "epoch": 0.08682455736951024, "grad_norm": 0.96484375, "learning_rate": 4.911955825964958e-05, "loss": 0.8899, "step": 1180 }, { "epoch": 0.08689813750287423, "grad_norm": 0.71484375, "learning_rate": 4.91180341361648e-05, "loss": 0.7079, "step": 1181 }, { "epoch": 0.08697171763623822, "grad_norm": 0.95703125, "learning_rate": 4.91165087183107e-05, "loss": 0.8011, "step": 1182 }, { "epoch": 0.08704529776960221, "grad_norm": 1.0546875, "learning_rate": 4.911498200616911e-05, "loss": 1.0859, "step": 1183 }, { "epoch": 0.0871188779029662, "grad_norm": 0.9609375, "learning_rate": 4.911345399982198e-05, "loss": 1.1429, "step": 1184 }, { "epoch": 0.08719245803633019, "grad_norm": 0.8671875, "learning_rate": 4.911192469935132e-05, "loss": 0.8551, "step": 1185 }, { "epoch": 0.08726603816969418, "grad_norm": 0.78515625, "learning_rate": 4.9110394104839206e-05, "loss": 0.9391, "step": 1186 }, { "epoch": 0.08733961830305817, "grad_norm": 1.046875, "learning_rate": 4.910886221636777e-05, "loss": 1.4532, "step": 1187 }, { "epoch": 0.08741319843642216, "grad_norm": 0.94140625, "learning_rate": 4.9107329034019245e-05, "loss": 0.7465, "step": 1188 }, { "epoch": 0.08748677856978616, "grad_norm": 1.015625, "learning_rate": 4.910579455787589e-05, "loss": 1.4572, "step": 1189 }, { "epoch": 0.08756035870315015, "grad_norm": 1.0078125, "learning_rate": 4.910425878802008e-05, "loss": 0.8336, "step": 1190 }, { "epoch": 0.08763393883651414, "grad_norm": 1.1015625, "learning_rate": 4.9102721724534233e-05, "loss": 0.9518, "step": 1191 }, { "epoch": 0.08770751896987813, "grad_norm": 0.8515625, "learning_rate": 4.910118336750083e-05, "loss": 1.0194, "step": 1192 }, { "epoch": 0.08778109910324212, "grad_norm": 1.1328125, "learning_rate": 4.909964371700243e-05, "loss": 1.04, "step": 1193 }, { "epoch": 0.08785467923660611, "grad_norm": 1.3125, "learning_rate": 4.909810277312168e-05, "loss": 1.8463, "step": 1194 }, { "epoch": 0.0879282593699701, "grad_norm": 0.76953125, "learning_rate": 4.9096560535941264e-05, "loss": 0.7999, "step": 1195 }, { "epoch": 0.0880018395033341, "grad_norm": 1.171875, "learning_rate": 4.9095017005543956e-05, "loss": 1.28, "step": 1196 }, { "epoch": 0.08807541963669809, "grad_norm": 1.109375, "learning_rate": 4.909347218201259e-05, "loss": 1.0784, "step": 1197 }, { "epoch": 0.08814899977006209, "grad_norm": 0.921875, "learning_rate": 4.9091926065430084e-05, "loss": 0.7425, "step": 1198 }, { "epoch": 0.08822257990342608, "grad_norm": 0.91796875, "learning_rate": 4.90903786558794e-05, "loss": 0.8728, "step": 1199 }, { "epoch": 0.08829616003679007, "grad_norm": 1.109375, "learning_rate": 4.9088829953443605e-05, "loss": 1.4336, "step": 1200 }, { "epoch": 0.08836974017015406, "grad_norm": 0.921875, "learning_rate": 4.9087279958205794e-05, "loss": 0.9031, "step": 1201 }, { "epoch": 0.08844332030351805, "grad_norm": 0.75, "learning_rate": 4.908572867024917e-05, "loss": 0.7032, "step": 1202 }, { "epoch": 0.08851690043688204, "grad_norm": 0.95703125, "learning_rate": 4.9084176089656975e-05, "loss": 0.9688, "step": 1203 }, { "epoch": 0.08859048057024603, "grad_norm": 1.09375, "learning_rate": 4.908262221651254e-05, "loss": 1.2545, "step": 1204 }, { "epoch": 0.08866406070361002, "grad_norm": 1.0, "learning_rate": 4.9081067050899257e-05, "loss": 1.2364, "step": 1205 }, { "epoch": 0.08873764083697401, "grad_norm": 1.1640625, "learning_rate": 4.9079510592900583e-05, "loss": 1.2256, "step": 1206 }, { "epoch": 0.08881122097033801, "grad_norm": 1.0078125, "learning_rate": 4.907795284260006e-05, "loss": 1.3518, "step": 1207 }, { "epoch": 0.088884801103702, "grad_norm": 1.0234375, "learning_rate": 4.907639380008129e-05, "loss": 1.0418, "step": 1208 }, { "epoch": 0.088958381237066, "grad_norm": 1.0703125, "learning_rate": 4.9074833465427925e-05, "loss": 0.8984, "step": 1209 }, { "epoch": 0.08903196137042999, "grad_norm": 0.87890625, "learning_rate": 4.9073271838723734e-05, "loss": 0.8314, "step": 1210 }, { "epoch": 0.08910554150379398, "grad_norm": 1.0859375, "learning_rate": 4.90717089200525e-05, "loss": 0.9828, "step": 1211 }, { "epoch": 0.08917912163715797, "grad_norm": 0.921875, "learning_rate": 4.907014470949812e-05, "loss": 0.8529, "step": 1212 }, { "epoch": 0.08925270177052196, "grad_norm": 1.0546875, "learning_rate": 4.9068579207144525e-05, "loss": 1.2085, "step": 1213 }, { "epoch": 0.08932628190388595, "grad_norm": 1.0625, "learning_rate": 4.906701241307575e-05, "loss": 1.207, "step": 1214 }, { "epoch": 0.08939986203724994, "grad_norm": 0.96484375, "learning_rate": 4.9065444327375876e-05, "loss": 1.1959, "step": 1215 }, { "epoch": 0.08947344217061394, "grad_norm": 0.9140625, "learning_rate": 4.9063874950129066e-05, "loss": 1.0856, "step": 1216 }, { "epoch": 0.08954702230397793, "grad_norm": 0.8828125, "learning_rate": 4.9062304281419525e-05, "loss": 0.9216, "step": 1217 }, { "epoch": 0.08962060243734192, "grad_norm": 1.078125, "learning_rate": 4.906073232133157e-05, "loss": 1.2706, "step": 1218 }, { "epoch": 0.08969418257070591, "grad_norm": 0.91015625, "learning_rate": 4.905915906994955e-05, "loss": 0.9168, "step": 1219 }, { "epoch": 0.0897677627040699, "grad_norm": 1.1328125, "learning_rate": 4.9057584527357894e-05, "loss": 1.1207, "step": 1220 }, { "epoch": 0.08984134283743389, "grad_norm": 0.7890625, "learning_rate": 4.905600869364113e-05, "loss": 0.8928, "step": 1221 }, { "epoch": 0.08991492297079788, "grad_norm": 1.0546875, "learning_rate": 4.905443156888381e-05, "loss": 1.1832, "step": 1222 }, { "epoch": 0.08998850310416187, "grad_norm": 0.91796875, "learning_rate": 4.905285315317058e-05, "loss": 0.9636, "step": 1223 }, { "epoch": 0.09006208323752586, "grad_norm": 0.81640625, "learning_rate": 4.905127344658615e-05, "loss": 0.7163, "step": 1224 }, { "epoch": 0.09013566337088987, "grad_norm": 1.3046875, "learning_rate": 4.90496924492153e-05, "loss": 1.7359, "step": 1225 }, { "epoch": 0.09020924350425386, "grad_norm": 0.984375, "learning_rate": 4.904811016114288e-05, "loss": 1.1708, "step": 1226 }, { "epoch": 0.09028282363761785, "grad_norm": 1.0859375, "learning_rate": 4.9046526582453814e-05, "loss": 0.8912, "step": 1227 }, { "epoch": 0.09035640377098184, "grad_norm": 0.85546875, "learning_rate": 4.904494171323307e-05, "loss": 0.7695, "step": 1228 }, { "epoch": 0.09042998390434583, "grad_norm": 1.0390625, "learning_rate": 4.904335555356573e-05, "loss": 0.9219, "step": 1229 }, { "epoch": 0.09050356403770982, "grad_norm": 0.8984375, "learning_rate": 4.9041768103536904e-05, "loss": 0.8511, "step": 1230 }, { "epoch": 0.0905771441710738, "grad_norm": 0.92578125, "learning_rate": 4.90401793632318e-05, "loss": 0.8241, "step": 1231 }, { "epoch": 0.0906507243044378, "grad_norm": 0.99609375, "learning_rate": 4.903858933273566e-05, "loss": 1.0765, "step": 1232 }, { "epoch": 0.0907243044378018, "grad_norm": 1.0234375, "learning_rate": 4.903699801213385e-05, "loss": 1.3338, "step": 1233 }, { "epoch": 0.09079788457116579, "grad_norm": 0.89453125, "learning_rate": 4.9035405401511745e-05, "loss": 1.0775, "step": 1234 }, { "epoch": 0.09087146470452978, "grad_norm": 1.0703125, "learning_rate": 4.9033811500954836e-05, "loss": 0.9106, "step": 1235 }, { "epoch": 0.09094504483789377, "grad_norm": 0.7109375, "learning_rate": 4.903221631054865e-05, "loss": 0.6733, "step": 1236 }, { "epoch": 0.09101862497125776, "grad_norm": 0.87109375, "learning_rate": 4.903061983037881e-05, "loss": 0.9866, "step": 1237 }, { "epoch": 0.09109220510462175, "grad_norm": 0.9140625, "learning_rate": 4.902902206053099e-05, "loss": 1.0376, "step": 1238 }, { "epoch": 0.09116578523798574, "grad_norm": 0.859375, "learning_rate": 4.902742300109094e-05, "loss": 0.9408, "step": 1239 }, { "epoch": 0.09123936537134973, "grad_norm": 1.0234375, "learning_rate": 4.902582265214448e-05, "loss": 1.5036, "step": 1240 }, { "epoch": 0.09131294550471372, "grad_norm": 0.87109375, "learning_rate": 4.9024221013777494e-05, "loss": 1.1578, "step": 1241 }, { "epoch": 0.09138652563807773, "grad_norm": 0.859375, "learning_rate": 4.902261808607594e-05, "loss": 0.8581, "step": 1242 }, { "epoch": 0.09146010577144172, "grad_norm": 0.87109375, "learning_rate": 4.902101386912585e-05, "loss": 0.941, "step": 1243 }, { "epoch": 0.0915336859048057, "grad_norm": 0.9921875, "learning_rate": 4.9019408363013305e-05, "loss": 0.9933, "step": 1244 }, { "epoch": 0.0916072660381697, "grad_norm": 0.90625, "learning_rate": 4.9017801567824494e-05, "loss": 0.9652, "step": 1245 }, { "epoch": 0.09168084617153369, "grad_norm": 1.0, "learning_rate": 4.901619348364563e-05, "loss": 1.0716, "step": 1246 }, { "epoch": 0.09175442630489768, "grad_norm": 1.1171875, "learning_rate": 4.901458411056302e-05, "loss": 1.1948, "step": 1247 }, { "epoch": 0.09182800643826167, "grad_norm": 0.9453125, "learning_rate": 4.901297344866304e-05, "loss": 0.9243, "step": 1248 }, { "epoch": 0.09190158657162566, "grad_norm": 1.015625, "learning_rate": 4.901136149803213e-05, "loss": 1.1386, "step": 1249 }, { "epoch": 0.09197516670498965, "grad_norm": 1.0390625, "learning_rate": 4.900974825875679e-05, "loss": 1.0933, "step": 1250 }, { "epoch": 0.09204874683835365, "grad_norm": 0.71484375, "learning_rate": 4.900813373092362e-05, "loss": 0.8543, "step": 1251 }, { "epoch": 0.09212232697171764, "grad_norm": 0.97265625, "learning_rate": 4.900651791461926e-05, "loss": 0.9306, "step": 1252 }, { "epoch": 0.09219590710508163, "grad_norm": 0.82421875, "learning_rate": 4.900490080993042e-05, "loss": 0.8872, "step": 1253 }, { "epoch": 0.09226948723844562, "grad_norm": 1.0546875, "learning_rate": 4.900328241694389e-05, "loss": 1.4837, "step": 1254 }, { "epoch": 0.09234306737180961, "grad_norm": 0.75, "learning_rate": 4.900166273574653e-05, "loss": 0.508, "step": 1255 }, { "epoch": 0.0924166475051736, "grad_norm": 0.87890625, "learning_rate": 4.9000041766425264e-05, "loss": 0.8884, "step": 1256 }, { "epoch": 0.09249022763853759, "grad_norm": 0.80859375, "learning_rate": 4.8998419509067086e-05, "loss": 0.776, "step": 1257 }, { "epoch": 0.09256380777190158, "grad_norm": 1.0078125, "learning_rate": 4.899679596375906e-05, "loss": 0.9784, "step": 1258 }, { "epoch": 0.09263738790526557, "grad_norm": 1.1015625, "learning_rate": 4.899517113058831e-05, "loss": 1.8613, "step": 1259 }, { "epoch": 0.09271096803862958, "grad_norm": 1.109375, "learning_rate": 4.899354500964206e-05, "loss": 1.2688, "step": 1260 }, { "epoch": 0.09278454817199357, "grad_norm": 0.78515625, "learning_rate": 4.8991917601007566e-05, "loss": 0.7221, "step": 1261 }, { "epoch": 0.09285812830535756, "grad_norm": 1.9296875, "learning_rate": 4.899028890477216e-05, "loss": 0.9086, "step": 1262 }, { "epoch": 0.09293170843872155, "grad_norm": 0.83203125, "learning_rate": 4.898865892102326e-05, "loss": 0.8697, "step": 1263 }, { "epoch": 0.09300528857208554, "grad_norm": 1.0625, "learning_rate": 4.898702764984835e-05, "loss": 1.2328, "step": 1264 }, { "epoch": 0.09307886870544953, "grad_norm": 0.9375, "learning_rate": 4.898539509133496e-05, "loss": 1.3106, "step": 1265 }, { "epoch": 0.09315244883881352, "grad_norm": 0.828125, "learning_rate": 4.898376124557073e-05, "loss": 0.9223, "step": 1266 }, { "epoch": 0.09322602897217751, "grad_norm": 1.0625, "learning_rate": 4.898212611264333e-05, "loss": 1.0953, "step": 1267 }, { "epoch": 0.0932996091055415, "grad_norm": 0.80859375, "learning_rate": 4.898048969264051e-05, "loss": 0.8959, "step": 1268 }, { "epoch": 0.0933731892389055, "grad_norm": 0.9296875, "learning_rate": 4.897885198565011e-05, "loss": 0.7999, "step": 1269 }, { "epoch": 0.09344676937226949, "grad_norm": 0.96875, "learning_rate": 4.8977212991760014e-05, "loss": 0.9548, "step": 1270 }, { "epoch": 0.09352034950563348, "grad_norm": 0.671875, "learning_rate": 4.897557271105817e-05, "loss": 0.6872, "step": 1271 }, { "epoch": 0.09359392963899747, "grad_norm": 0.9140625, "learning_rate": 4.897393114363264e-05, "loss": 0.8207, "step": 1272 }, { "epoch": 0.09366750977236146, "grad_norm": 1.2265625, "learning_rate": 4.8972288289571494e-05, "loss": 1.1634, "step": 1273 }, { "epoch": 0.09374108990572545, "grad_norm": 0.78515625, "learning_rate": 4.8970644148962916e-05, "loss": 0.9048, "step": 1274 }, { "epoch": 0.09381467003908944, "grad_norm": 0.8046875, "learning_rate": 4.8968998721895145e-05, "loss": 1.0288, "step": 1275 }, { "epoch": 0.09388825017245343, "grad_norm": 0.83984375, "learning_rate": 4.896735200845647e-05, "loss": 0.8646, "step": 1276 }, { "epoch": 0.09396183030581742, "grad_norm": 0.94140625, "learning_rate": 4.896570400873529e-05, "loss": 0.87, "step": 1277 }, { "epoch": 0.09403541043918143, "grad_norm": 0.83203125, "learning_rate": 4.896405472282004e-05, "loss": 1.0347, "step": 1278 }, { "epoch": 0.09410899057254542, "grad_norm": 0.7109375, "learning_rate": 4.8962404150799236e-05, "loss": 0.841, "step": 1279 }, { "epoch": 0.09418257070590941, "grad_norm": 0.73828125, "learning_rate": 4.896075229276146e-05, "loss": 0.8673, "step": 1280 }, { "epoch": 0.0942561508392734, "grad_norm": 0.9921875, "learning_rate": 4.8959099148795365e-05, "loss": 0.9358, "step": 1281 }, { "epoch": 0.09432973097263739, "grad_norm": 0.84765625, "learning_rate": 4.895744471898967e-05, "loss": 0.6897, "step": 1282 }, { "epoch": 0.09440331110600138, "grad_norm": 0.88671875, "learning_rate": 4.895578900343316e-05, "loss": 0.806, "step": 1283 }, { "epoch": 0.09447689123936537, "grad_norm": 1.203125, "learning_rate": 4.89541320022147e-05, "loss": 0.9246, "step": 1284 }, { "epoch": 0.09455047137272936, "grad_norm": 1.140625, "learning_rate": 4.895247371542323e-05, "loss": 1.5952, "step": 1285 }, { "epoch": 0.09462405150609335, "grad_norm": 1.09375, "learning_rate": 4.8950814143147725e-05, "loss": 1.1463, "step": 1286 }, { "epoch": 0.09469763163945735, "grad_norm": 0.765625, "learning_rate": 4.894915328547727e-05, "loss": 0.7581, "step": 1287 }, { "epoch": 0.09477121177282134, "grad_norm": 0.9296875, "learning_rate": 4.894749114250098e-05, "loss": 0.9762, "step": 1288 }, { "epoch": 0.09484479190618533, "grad_norm": 0.89453125, "learning_rate": 4.894582771430808e-05, "loss": 0.6978, "step": 1289 }, { "epoch": 0.09491837203954932, "grad_norm": 0.9375, "learning_rate": 4.8944163000987834e-05, "loss": 1.0474, "step": 1290 }, { "epoch": 0.09499195217291331, "grad_norm": 0.94140625, "learning_rate": 4.894249700262958e-05, "loss": 0.8466, "step": 1291 }, { "epoch": 0.0950655323062773, "grad_norm": 1.1015625, "learning_rate": 4.894082971932274e-05, "loss": 1.1338, "step": 1292 }, { "epoch": 0.0951391124396413, "grad_norm": 1.0703125, "learning_rate": 4.893916115115678e-05, "loss": 1.1831, "step": 1293 }, { "epoch": 0.09521269257300528, "grad_norm": 0.8125, "learning_rate": 4.893749129822125e-05, "loss": 1.0496, "step": 1294 }, { "epoch": 0.09528627270636927, "grad_norm": 0.7578125, "learning_rate": 4.893582016060578e-05, "loss": 0.7504, "step": 1295 }, { "epoch": 0.09535985283973328, "grad_norm": 0.75, "learning_rate": 4.893414773840005e-05, "loss": 0.673, "step": 1296 }, { "epoch": 0.09543343297309727, "grad_norm": 1.0078125, "learning_rate": 4.893247403169382e-05, "loss": 1.3115, "step": 1297 }, { "epoch": 0.09550701310646126, "grad_norm": 1.015625, "learning_rate": 4.89307990405769e-05, "loss": 0.915, "step": 1298 }, { "epoch": 0.09558059323982525, "grad_norm": 0.83984375, "learning_rate": 4.8929122765139206e-05, "loss": 0.7835, "step": 1299 }, { "epoch": 0.09565417337318924, "grad_norm": 1.0390625, "learning_rate": 4.892744520547069e-05, "loss": 1.058, "step": 1300 }, { "epoch": 0.09572775350655323, "grad_norm": 0.86328125, "learning_rate": 4.8925766361661376e-05, "loss": 1.0108, "step": 1301 }, { "epoch": 0.09580133363991722, "grad_norm": 1.09375, "learning_rate": 4.892408623380137e-05, "loss": 0.8173, "step": 1302 }, { "epoch": 0.09587491377328121, "grad_norm": 1.09375, "learning_rate": 4.892240482198084e-05, "loss": 0.7766, "step": 1303 }, { "epoch": 0.0959484939066452, "grad_norm": 0.7265625, "learning_rate": 4.892072212629003e-05, "loss": 0.6871, "step": 1304 }, { "epoch": 0.0960220740400092, "grad_norm": 0.8359375, "learning_rate": 4.8919038146819244e-05, "loss": 1.2853, "step": 1305 }, { "epoch": 0.0960956541733732, "grad_norm": 1.09375, "learning_rate": 4.891735288365886e-05, "loss": 1.4199, "step": 1306 }, { "epoch": 0.09616923430673718, "grad_norm": 1.0546875, "learning_rate": 4.8915666336899315e-05, "loss": 1.5016, "step": 1307 }, { "epoch": 0.09624281444010117, "grad_norm": 0.86328125, "learning_rate": 4.8913978506631134e-05, "loss": 0.9186, "step": 1308 }, { "epoch": 0.09631639457346516, "grad_norm": 0.85546875, "learning_rate": 4.8912289392944885e-05, "loss": 0.9134, "step": 1309 }, { "epoch": 0.09638997470682915, "grad_norm": 0.91015625, "learning_rate": 4.8910598995931236e-05, "loss": 0.7541, "step": 1310 }, { "epoch": 0.09646355484019314, "grad_norm": 0.96484375, "learning_rate": 4.890890731568089e-05, "loss": 1.0797, "step": 1311 }, { "epoch": 0.09653713497355713, "grad_norm": 0.94140625, "learning_rate": 4.890721435228465e-05, "loss": 1.0187, "step": 1312 }, { "epoch": 0.09661071510692112, "grad_norm": 1.09375, "learning_rate": 4.8905520105833375e-05, "loss": 1.4667, "step": 1313 }, { "epoch": 0.09668429524028513, "grad_norm": 0.9765625, "learning_rate": 4.8903824576417986e-05, "loss": 0.9393, "step": 1314 }, { "epoch": 0.09675787537364912, "grad_norm": 1.171875, "learning_rate": 4.890212776412948e-05, "loss": 1.2802, "step": 1315 }, { "epoch": 0.09683145550701311, "grad_norm": 0.93359375, "learning_rate": 4.8900429669058914e-05, "loss": 0.9362, "step": 1316 }, { "epoch": 0.0969050356403771, "grad_norm": 1.171875, "learning_rate": 4.889873029129743e-05, "loss": 1.23, "step": 1317 }, { "epoch": 0.09697861577374109, "grad_norm": 0.890625, "learning_rate": 4.8897029630936244e-05, "loss": 0.7691, "step": 1318 }, { "epoch": 0.09705219590710508, "grad_norm": 0.890625, "learning_rate": 4.88953276880666e-05, "loss": 0.739, "step": 1319 }, { "epoch": 0.09712577604046907, "grad_norm": 1.015625, "learning_rate": 4.8893624462779854e-05, "loss": 0.9665, "step": 1320 }, { "epoch": 0.09719935617383306, "grad_norm": 1.0234375, "learning_rate": 4.889191995516741e-05, "loss": 1.2012, "step": 1321 }, { "epoch": 0.09727293630719705, "grad_norm": 0.9765625, "learning_rate": 4.8890214165320746e-05, "loss": 0.9751, "step": 1322 }, { "epoch": 0.09734651644056105, "grad_norm": 1.0859375, "learning_rate": 4.888850709333141e-05, "loss": 1.0713, "step": 1323 }, { "epoch": 0.09742009657392504, "grad_norm": 1.015625, "learning_rate": 4.888679873929103e-05, "loss": 0.8512, "step": 1324 }, { "epoch": 0.09749367670728903, "grad_norm": 1.015625, "learning_rate": 4.888508910329126e-05, "loss": 1.0594, "step": 1325 }, { "epoch": 0.09756725684065302, "grad_norm": 1.2265625, "learning_rate": 4.888337818542388e-05, "loss": 1.2818, "step": 1326 }, { "epoch": 0.09764083697401701, "grad_norm": 1.078125, "learning_rate": 4.88816659857807e-05, "loss": 1.1116, "step": 1327 }, { "epoch": 0.097714417107381, "grad_norm": 0.71484375, "learning_rate": 4.887995250445361e-05, "loss": 0.6879, "step": 1328 }, { "epoch": 0.097787997240745, "grad_norm": 1.09375, "learning_rate": 4.8878237741534584e-05, "loss": 1.156, "step": 1329 }, { "epoch": 0.09786157737410899, "grad_norm": 0.9140625, "learning_rate": 4.8876521697115627e-05, "loss": 0.971, "step": 1330 }, { "epoch": 0.09793515750747299, "grad_norm": 0.78515625, "learning_rate": 4.887480437128885e-05, "loss": 0.8463, "step": 1331 }, { "epoch": 0.09800873764083698, "grad_norm": 1.0078125, "learning_rate": 4.8873085764146406e-05, "loss": 1.2374, "step": 1332 }, { "epoch": 0.09808231777420097, "grad_norm": 1.015625, "learning_rate": 4.887136587578055e-05, "loss": 1.104, "step": 1333 }, { "epoch": 0.09815589790756496, "grad_norm": 1.0, "learning_rate": 4.886964470628357e-05, "loss": 0.9857, "step": 1334 }, { "epoch": 0.09822947804092895, "grad_norm": 0.84765625, "learning_rate": 4.886792225574784e-05, "loss": 0.6217, "step": 1335 }, { "epoch": 0.09830305817429294, "grad_norm": 0.8046875, "learning_rate": 4.88661985242658e-05, "loss": 0.7217, "step": 1336 }, { "epoch": 0.09837663830765693, "grad_norm": 1.2421875, "learning_rate": 4.8864473511929975e-05, "loss": 1.8299, "step": 1337 }, { "epoch": 0.09845021844102092, "grad_norm": 0.88671875, "learning_rate": 4.8862747218832924e-05, "loss": 0.8521, "step": 1338 }, { "epoch": 0.09852379857438491, "grad_norm": 0.98046875, "learning_rate": 4.8861019645067296e-05, "loss": 0.7501, "step": 1339 }, { "epoch": 0.09859737870774891, "grad_norm": 0.94140625, "learning_rate": 4.8859290790725816e-05, "loss": 0.9327, "step": 1340 }, { "epoch": 0.0986709588411129, "grad_norm": 1.171875, "learning_rate": 4.885756065590126e-05, "loss": 1.3556, "step": 1341 }, { "epoch": 0.0987445389744769, "grad_norm": 0.91015625, "learning_rate": 4.8855829240686486e-05, "loss": 0.8993, "step": 1342 }, { "epoch": 0.09881811910784088, "grad_norm": 1.0, "learning_rate": 4.885409654517441e-05, "loss": 0.963, "step": 1343 }, { "epoch": 0.09889169924120488, "grad_norm": 1.2734375, "learning_rate": 4.885236256945803e-05, "loss": 1.1411, "step": 1344 }, { "epoch": 0.09896527937456887, "grad_norm": 0.9296875, "learning_rate": 4.8850627313630405e-05, "loss": 0.8609, "step": 1345 }, { "epoch": 0.09903885950793286, "grad_norm": 1.03125, "learning_rate": 4.884889077778465e-05, "loss": 0.8643, "step": 1346 }, { "epoch": 0.09911243964129685, "grad_norm": 1.0078125, "learning_rate": 4.8847152962013974e-05, "loss": 0.8406, "step": 1347 }, { "epoch": 0.09918601977466084, "grad_norm": 1.0859375, "learning_rate": 4.884541386641165e-05, "loss": 0.9565, "step": 1348 }, { "epoch": 0.09925959990802484, "grad_norm": 0.9140625, "learning_rate": 4.8843673491070984e-05, "loss": 0.8559, "step": 1349 }, { "epoch": 0.09933318004138883, "grad_norm": 0.90234375, "learning_rate": 4.884193183608541e-05, "loss": 1.0358, "step": 1350 }, { "epoch": 0.09940676017475282, "grad_norm": 1.0234375, "learning_rate": 4.8840188901548375e-05, "loss": 1.3706, "step": 1351 }, { "epoch": 0.09948034030811681, "grad_norm": 1.0390625, "learning_rate": 4.883844468755344e-05, "loss": 0.9408, "step": 1352 }, { "epoch": 0.0995539204414808, "grad_norm": 0.7109375, "learning_rate": 4.88366991941942e-05, "loss": 0.655, "step": 1353 }, { "epoch": 0.09962750057484479, "grad_norm": 0.984375, "learning_rate": 4.883495242156433e-05, "loss": 0.9964, "step": 1354 }, { "epoch": 0.09970108070820878, "grad_norm": 1.6640625, "learning_rate": 4.8833204369757586e-05, "loss": 1.0287, "step": 1355 }, { "epoch": 0.09977466084157277, "grad_norm": 0.8515625, "learning_rate": 4.883145503886778e-05, "loss": 1.0197, "step": 1356 }, { "epoch": 0.09984824097493676, "grad_norm": 1.0859375, "learning_rate": 4.882970442898879e-05, "loss": 1.3703, "step": 1357 }, { "epoch": 0.09992182110830077, "grad_norm": 0.85546875, "learning_rate": 4.8827952540214564e-05, "loss": 0.9072, "step": 1358 }, { "epoch": 0.09999540124166476, "grad_norm": 0.67578125, "learning_rate": 4.8826199372639136e-05, "loss": 0.6855, "step": 1359 }, { "epoch": 0.10006898137502875, "grad_norm": 0.9609375, "learning_rate": 4.8824444926356593e-05, "loss": 0.9797, "step": 1360 }, { "epoch": 0.10014256150839274, "grad_norm": 0.66015625, "learning_rate": 4.882268920146109e-05, "loss": 0.7094, "step": 1361 }, { "epoch": 0.10021614164175673, "grad_norm": 1.15625, "learning_rate": 4.882093219804684e-05, "loss": 1.0087, "step": 1362 }, { "epoch": 0.10028972177512072, "grad_norm": 0.83984375, "learning_rate": 4.881917391620816e-05, "loss": 0.6034, "step": 1363 }, { "epoch": 0.1003633019084847, "grad_norm": 1.0859375, "learning_rate": 4.8817414356039406e-05, "loss": 0.937, "step": 1364 }, { "epoch": 0.1004368820418487, "grad_norm": 0.86328125, "learning_rate": 4.8815653517635e-05, "loss": 0.7939, "step": 1365 }, { "epoch": 0.10051046217521269, "grad_norm": 1.09375, "learning_rate": 4.881389140108946e-05, "loss": 0.91, "step": 1366 }, { "epoch": 0.10058404230857669, "grad_norm": 0.8203125, "learning_rate": 4.8812128006497335e-05, "loss": 0.9494, "step": 1367 }, { "epoch": 0.10065762244194068, "grad_norm": 0.96484375, "learning_rate": 4.881036333395329e-05, "loss": 1.3114, "step": 1368 }, { "epoch": 0.10073120257530467, "grad_norm": 0.83984375, "learning_rate": 4.8808597383552e-05, "loss": 0.831, "step": 1369 }, { "epoch": 0.10080478270866866, "grad_norm": 1.078125, "learning_rate": 4.880683015538827e-05, "loss": 1.3189, "step": 1370 }, { "epoch": 0.10087836284203265, "grad_norm": 1.3125, "learning_rate": 4.8805061649556924e-05, "loss": 1.0953, "step": 1371 }, { "epoch": 0.10095194297539664, "grad_norm": 1.078125, "learning_rate": 4.8803291866152876e-05, "loss": 1.2109, "step": 1372 }, { "epoch": 0.10102552310876063, "grad_norm": 0.859375, "learning_rate": 4.880152080527112e-05, "loss": 0.7837, "step": 1373 }, { "epoch": 0.10109910324212462, "grad_norm": 0.96484375, "learning_rate": 4.8799748467006694e-05, "loss": 1.2422, "step": 1374 }, { "epoch": 0.10117268337548861, "grad_norm": 1.3203125, "learning_rate": 4.879797485145472e-05, "loss": 1.377, "step": 1375 }, { "epoch": 0.10124626350885262, "grad_norm": 1.0703125, "learning_rate": 4.879619995871038e-05, "loss": 1.1502, "step": 1376 }, { "epoch": 0.1013198436422166, "grad_norm": 0.9453125, "learning_rate": 4.8794423788868934e-05, "loss": 1.0014, "step": 1377 }, { "epoch": 0.1013934237755806, "grad_norm": 0.7578125, "learning_rate": 4.879264634202571e-05, "loss": 0.7864, "step": 1378 }, { "epoch": 0.10146700390894459, "grad_norm": 1.046875, "learning_rate": 4.879086761827609e-05, "loss": 0.9298, "step": 1379 }, { "epoch": 0.10154058404230858, "grad_norm": 1.0234375, "learning_rate": 4.878908761771554e-05, "loss": 0.9901, "step": 1380 }, { "epoch": 0.10161416417567257, "grad_norm": 1.140625, "learning_rate": 4.8787306340439587e-05, "loss": 1.1022, "step": 1381 }, { "epoch": 0.10168774430903656, "grad_norm": 1.0859375, "learning_rate": 4.8785523786543836e-05, "loss": 1.1433, "step": 1382 }, { "epoch": 0.10176132444240055, "grad_norm": 0.8203125, "learning_rate": 4.878373995612394e-05, "loss": 0.9623, "step": 1383 }, { "epoch": 0.10183490457576454, "grad_norm": 1.015625, "learning_rate": 4.878195484927565e-05, "loss": 0.8162, "step": 1384 }, { "epoch": 0.10190848470912854, "grad_norm": 0.92578125, "learning_rate": 4.8780168466094757e-05, "loss": 1.0115, "step": 1385 }, { "epoch": 0.10198206484249253, "grad_norm": 1.5390625, "learning_rate": 4.877838080667714e-05, "loss": 0.7371, "step": 1386 }, { "epoch": 0.10205564497585652, "grad_norm": 0.7109375, "learning_rate": 4.877659187111873e-05, "loss": 0.659, "step": 1387 }, { "epoch": 0.10212922510922051, "grad_norm": 0.96875, "learning_rate": 4.877480165951555e-05, "loss": 1.0697, "step": 1388 }, { "epoch": 0.1022028052425845, "grad_norm": 0.81640625, "learning_rate": 4.877301017196366e-05, "loss": 0.9541, "step": 1389 }, { "epoch": 0.10227638537594849, "grad_norm": 0.7734375, "learning_rate": 4.877121740855922e-05, "loss": 0.7799, "step": 1390 }, { "epoch": 0.10234996550931248, "grad_norm": 0.890625, "learning_rate": 4.876942336939844e-05, "loss": 0.7024, "step": 1391 }, { "epoch": 0.10242354564267647, "grad_norm": 0.7578125, "learning_rate": 4.87676280545776e-05, "loss": 0.9268, "step": 1392 }, { "epoch": 0.10249712577604046, "grad_norm": 0.88671875, "learning_rate": 4.876583146419305e-05, "loss": 0.7755, "step": 1393 }, { "epoch": 0.10257070590940447, "grad_norm": 0.72265625, "learning_rate": 4.8764033598341214e-05, "loss": 0.6314, "step": 1394 }, { "epoch": 0.10264428604276846, "grad_norm": 1.09375, "learning_rate": 4.876223445711858e-05, "loss": 1.1173, "step": 1395 }, { "epoch": 0.10271786617613245, "grad_norm": 0.75390625, "learning_rate": 4.87604340406217e-05, "loss": 0.6412, "step": 1396 }, { "epoch": 0.10279144630949644, "grad_norm": 0.87890625, "learning_rate": 4.875863234894721e-05, "loss": 0.816, "step": 1397 }, { "epoch": 0.10286502644286043, "grad_norm": 0.77734375, "learning_rate": 4.8756829382191796e-05, "loss": 0.7737, "step": 1398 }, { "epoch": 0.10293860657622442, "grad_norm": 0.92578125, "learning_rate": 4.87550251404522e-05, "loss": 0.8287, "step": 1399 }, { "epoch": 0.10301218670958841, "grad_norm": 0.81640625, "learning_rate": 4.8753219623825296e-05, "loss": 0.8514, "step": 1400 }, { "epoch": 0.1030857668429524, "grad_norm": 1.2265625, "learning_rate": 4.875141283240794e-05, "loss": 1.509, "step": 1401 }, { "epoch": 0.10315934697631639, "grad_norm": 0.921875, "learning_rate": 4.874960476629713e-05, "loss": 1.1237, "step": 1402 }, { "epoch": 0.10323292710968039, "grad_norm": 0.9375, "learning_rate": 4.874779542558988e-05, "loss": 0.9198, "step": 1403 }, { "epoch": 0.10330650724304438, "grad_norm": 0.9375, "learning_rate": 4.8745984810383316e-05, "loss": 0.9819, "step": 1404 }, { "epoch": 0.10338008737640837, "grad_norm": 0.8515625, "learning_rate": 4.874417292077458e-05, "loss": 0.886, "step": 1405 }, { "epoch": 0.10345366750977236, "grad_norm": 0.90625, "learning_rate": 4.874235975686095e-05, "loss": 1.3282, "step": 1406 }, { "epoch": 0.10352724764313635, "grad_norm": 0.86328125, "learning_rate": 4.874054531873969e-05, "loss": 1.0641, "step": 1407 }, { "epoch": 0.10360082777650034, "grad_norm": 0.88671875, "learning_rate": 4.873872960650822e-05, "loss": 0.8754, "step": 1408 }, { "epoch": 0.10367440790986433, "grad_norm": 0.90234375, "learning_rate": 4.873691262026396e-05, "loss": 1.068, "step": 1409 }, { "epoch": 0.10374798804322832, "grad_norm": 0.79296875, "learning_rate": 4.873509436010444e-05, "loss": 1.0116, "step": 1410 }, { "epoch": 0.10382156817659231, "grad_norm": 1.0234375, "learning_rate": 4.873327482612723e-05, "loss": 0.9322, "step": 1411 }, { "epoch": 0.10389514830995632, "grad_norm": 0.953125, "learning_rate": 4.873145401842999e-05, "loss": 1.0501, "step": 1412 }, { "epoch": 0.10396872844332031, "grad_norm": 0.79296875, "learning_rate": 4.872963193711042e-05, "loss": 0.9781, "step": 1413 }, { "epoch": 0.1040423085766843, "grad_norm": 0.91796875, "learning_rate": 4.872780858226634e-05, "loss": 0.9769, "step": 1414 }, { "epoch": 0.10411588871004829, "grad_norm": 0.97265625, "learning_rate": 4.872598395399558e-05, "loss": 0.9307, "step": 1415 }, { "epoch": 0.10418946884341228, "grad_norm": 1.1015625, "learning_rate": 4.872415805239607e-05, "loss": 0.9257, "step": 1416 }, { "epoch": 0.10426304897677627, "grad_norm": 1.1015625, "learning_rate": 4.872233087756582e-05, "loss": 1.0647, "step": 1417 }, { "epoch": 0.10433662911014026, "grad_norm": 0.921875, "learning_rate": 4.872050242960287e-05, "loss": 1.3918, "step": 1418 }, { "epoch": 0.10441020924350425, "grad_norm": 1.0234375, "learning_rate": 4.871867270860535e-05, "loss": 0.8001, "step": 1419 }, { "epoch": 0.10448378937686825, "grad_norm": 0.71875, "learning_rate": 4.8716841714671465e-05, "loss": 0.587, "step": 1420 }, { "epoch": 0.10455736951023224, "grad_norm": 0.77734375, "learning_rate": 4.871500944789949e-05, "loss": 0.7017, "step": 1421 }, { "epoch": 0.10463094964359623, "grad_norm": 0.859375, "learning_rate": 4.871317590838774e-05, "loss": 0.853, "step": 1422 }, { "epoch": 0.10470452977696022, "grad_norm": 1.03125, "learning_rate": 4.871134109623463e-05, "loss": 1.0274, "step": 1423 }, { "epoch": 0.10477810991032421, "grad_norm": 1.078125, "learning_rate": 4.8709505011538634e-05, "loss": 0.9413, "step": 1424 }, { "epoch": 0.1048516900436882, "grad_norm": 0.7109375, "learning_rate": 4.870766765439827e-05, "loss": 0.7312, "step": 1425 }, { "epoch": 0.1049252701770522, "grad_norm": 1.0234375, "learning_rate": 4.870582902491218e-05, "loss": 0.8642, "step": 1426 }, { "epoch": 0.10499885031041618, "grad_norm": 0.734375, "learning_rate": 4.8703989123179004e-05, "loss": 0.7064, "step": 1427 }, { "epoch": 0.10507243044378017, "grad_norm": 0.9453125, "learning_rate": 4.87021479492975e-05, "loss": 1.2552, "step": 1428 }, { "epoch": 0.10514601057714418, "grad_norm": 1.0625, "learning_rate": 4.87003055033665e-05, "loss": 1.1853, "step": 1429 }, { "epoch": 0.10521959071050817, "grad_norm": 0.953125, "learning_rate": 4.869846178548486e-05, "loss": 1.0046, "step": 1430 }, { "epoch": 0.10529317084387216, "grad_norm": 1.0625, "learning_rate": 4.869661679575153e-05, "loss": 0.7922, "step": 1431 }, { "epoch": 0.10536675097723615, "grad_norm": 0.80078125, "learning_rate": 4.8694770534265535e-05, "loss": 0.9383, "step": 1432 }, { "epoch": 0.10544033111060014, "grad_norm": 0.9140625, "learning_rate": 4.8692923001125956e-05, "loss": 1.0083, "step": 1433 }, { "epoch": 0.10551391124396413, "grad_norm": 1.0546875, "learning_rate": 4.8691074196431956e-05, "loss": 1.2046, "step": 1434 }, { "epoch": 0.10558749137732812, "grad_norm": 0.9140625, "learning_rate": 4.868922412028275e-05, "loss": 0.8876, "step": 1435 }, { "epoch": 0.10566107151069211, "grad_norm": 0.91796875, "learning_rate": 4.868737277277762e-05, "loss": 0.9315, "step": 1436 }, { "epoch": 0.1057346516440561, "grad_norm": 0.91015625, "learning_rate": 4.868552015401594e-05, "loss": 0.8065, "step": 1437 }, { "epoch": 0.1058082317774201, "grad_norm": 0.8046875, "learning_rate": 4.868366626409713e-05, "loss": 0.7333, "step": 1438 }, { "epoch": 0.1058818119107841, "grad_norm": 1.015625, "learning_rate": 4.868181110312068e-05, "loss": 1.2215, "step": 1439 }, { "epoch": 0.10595539204414808, "grad_norm": 0.90625, "learning_rate": 4.867995467118616e-05, "loss": 1.3123, "step": 1440 }, { "epoch": 0.10602897217751207, "grad_norm": 1.0703125, "learning_rate": 4.867809696839319e-05, "loss": 1.0686, "step": 1441 }, { "epoch": 0.10610255231087606, "grad_norm": 0.984375, "learning_rate": 4.867623799484148e-05, "loss": 1.177, "step": 1442 }, { "epoch": 0.10617613244424005, "grad_norm": 0.76171875, "learning_rate": 4.867437775063079e-05, "loss": 0.6957, "step": 1443 }, { "epoch": 0.10624971257760404, "grad_norm": 0.8046875, "learning_rate": 4.8672516235860975e-05, "loss": 0.8737, "step": 1444 }, { "epoch": 0.10632329271096803, "grad_norm": 0.91796875, "learning_rate": 4.867065345063192e-05, "loss": 1.1496, "step": 1445 }, { "epoch": 0.10639687284433202, "grad_norm": 0.79296875, "learning_rate": 4.86687893950436e-05, "loss": 1.2304, "step": 1446 }, { "epoch": 0.10647045297769603, "grad_norm": 1.0546875, "learning_rate": 4.866692406919605e-05, "loss": 1.3171, "step": 1447 }, { "epoch": 0.10654403311106002, "grad_norm": 0.8984375, "learning_rate": 4.866505747318939e-05, "loss": 1.1928, "step": 1448 }, { "epoch": 0.10661761324442401, "grad_norm": 0.8671875, "learning_rate": 4.8663189607123796e-05, "loss": 1.0115, "step": 1449 }, { "epoch": 0.106691193377788, "grad_norm": 0.90234375, "learning_rate": 4.866132047109951e-05, "loss": 0.7637, "step": 1450 }, { "epoch": 0.10676477351115199, "grad_norm": 0.88671875, "learning_rate": 4.865945006521684e-05, "loss": 0.7481, "step": 1451 }, { "epoch": 0.10683835364451598, "grad_norm": 0.78515625, "learning_rate": 4.8657578389576175e-05, "loss": 0.8649, "step": 1452 }, { "epoch": 0.10691193377787997, "grad_norm": 0.9609375, "learning_rate": 4.865570544427795e-05, "loss": 1.1992, "step": 1453 }, { "epoch": 0.10698551391124396, "grad_norm": 0.828125, "learning_rate": 4.8653831229422705e-05, "loss": 0.7719, "step": 1454 }, { "epoch": 0.10705909404460795, "grad_norm": 1.0703125, "learning_rate": 4.865195574511101e-05, "loss": 1.3225, "step": 1455 }, { "epoch": 0.10713267417797195, "grad_norm": 1.0234375, "learning_rate": 4.8650078991443523e-05, "loss": 0.9803, "step": 1456 }, { "epoch": 0.10720625431133594, "grad_norm": 1.046875, "learning_rate": 4.8648200968520965e-05, "loss": 1.5307, "step": 1457 }, { "epoch": 0.10727983444469993, "grad_norm": 1.125, "learning_rate": 4.864632167644413e-05, "loss": 1.355, "step": 1458 }, { "epoch": 0.10735341457806392, "grad_norm": 0.96875, "learning_rate": 4.864444111531386e-05, "loss": 1.348, "step": 1459 }, { "epoch": 0.10742699471142791, "grad_norm": 0.97265625, "learning_rate": 4.86425592852311e-05, "loss": 1.061, "step": 1460 }, { "epoch": 0.1075005748447919, "grad_norm": 0.92578125, "learning_rate": 4.8640676186296844e-05, "loss": 1.1889, "step": 1461 }, { "epoch": 0.1075741549781559, "grad_norm": 0.921875, "learning_rate": 4.863879181861215e-05, "loss": 1.0199, "step": 1462 }, { "epoch": 0.10764773511151988, "grad_norm": 1.0859375, "learning_rate": 4.8636906182278134e-05, "loss": 1.0298, "step": 1463 }, { "epoch": 0.10772131524488388, "grad_norm": 1.203125, "learning_rate": 4.863501927739601e-05, "loss": 1.0112, "step": 1464 }, { "epoch": 0.10779489537824788, "grad_norm": 1.0390625, "learning_rate": 4.863313110406704e-05, "loss": 1.2276, "step": 1465 }, { "epoch": 0.10786847551161187, "grad_norm": 0.8046875, "learning_rate": 4.863124166239257e-05, "loss": 0.9624, "step": 1466 }, { "epoch": 0.10794205564497586, "grad_norm": 1.0546875, "learning_rate": 4.862935095247398e-05, "loss": 1.678, "step": 1467 }, { "epoch": 0.10801563577833985, "grad_norm": 0.80078125, "learning_rate": 4.862745897441277e-05, "loss": 0.8602, "step": 1468 }, { "epoch": 0.10808921591170384, "grad_norm": 0.8515625, "learning_rate": 4.862556572831045e-05, "loss": 0.8751, "step": 1469 }, { "epoch": 0.10816279604506783, "grad_norm": 0.9375, "learning_rate": 4.862367121426865e-05, "loss": 0.8047, "step": 1470 }, { "epoch": 0.10823637617843182, "grad_norm": 0.9296875, "learning_rate": 4.862177543238903e-05, "loss": 0.9325, "step": 1471 }, { "epoch": 0.10830995631179581, "grad_norm": 1.1953125, "learning_rate": 4.861987838277333e-05, "loss": 1.3588, "step": 1472 }, { "epoch": 0.1083835364451598, "grad_norm": 1.109375, "learning_rate": 4.861798006552338e-05, "loss": 1.3279, "step": 1473 }, { "epoch": 0.1084571165785238, "grad_norm": 0.94140625, "learning_rate": 4.861608048074104e-05, "loss": 0.7374, "step": 1474 }, { "epoch": 0.1085306967118878, "grad_norm": 1.34375, "learning_rate": 4.8614179628528265e-05, "loss": 1.157, "step": 1475 }, { "epoch": 0.10860427684525178, "grad_norm": 0.859375, "learning_rate": 4.861227750898708e-05, "loss": 0.8502, "step": 1476 }, { "epoch": 0.10867785697861577, "grad_norm": 0.88671875, "learning_rate": 4.861037412221955e-05, "loss": 0.9861, "step": 1477 }, { "epoch": 0.10875143711197977, "grad_norm": 0.7578125, "learning_rate": 4.860846946832783e-05, "loss": 0.9201, "step": 1478 }, { "epoch": 0.10882501724534376, "grad_norm": 0.97265625, "learning_rate": 4.860656354741415e-05, "loss": 1.1797, "step": 1479 }, { "epoch": 0.10889859737870775, "grad_norm": 1.1953125, "learning_rate": 4.860465635958079e-05, "loss": 1.3831, "step": 1480 }, { "epoch": 0.10897217751207174, "grad_norm": 1.296875, "learning_rate": 4.860274790493011e-05, "loss": 1.5445, "step": 1481 }, { "epoch": 0.10904575764543573, "grad_norm": 1.046875, "learning_rate": 4.860083818356452e-05, "loss": 0.8569, "step": 1482 }, { "epoch": 0.10911933777879973, "grad_norm": 0.83203125, "learning_rate": 4.859892719558653e-05, "loss": 1.044, "step": 1483 }, { "epoch": 0.10919291791216372, "grad_norm": 0.6953125, "learning_rate": 4.859701494109868e-05, "loss": 0.5431, "step": 1484 }, { "epoch": 0.10926649804552771, "grad_norm": 0.7265625, "learning_rate": 4.859510142020362e-05, "loss": 0.6719, "step": 1485 }, { "epoch": 0.1093400781788917, "grad_norm": 0.76953125, "learning_rate": 4.859318663300402e-05, "loss": 0.8481, "step": 1486 }, { "epoch": 0.10941365831225569, "grad_norm": 0.75, "learning_rate": 4.859127057960266e-05, "loss": 0.7634, "step": 1487 }, { "epoch": 0.10948723844561968, "grad_norm": 1.171875, "learning_rate": 4.858935326010237e-05, "loss": 1.5947, "step": 1488 }, { "epoch": 0.10956081857898367, "grad_norm": 0.84765625, "learning_rate": 4.8587434674606036e-05, "loss": 0.7334, "step": 1489 }, { "epoch": 0.10963439871234766, "grad_norm": 0.9765625, "learning_rate": 4.858551482321663e-05, "loss": 1.0087, "step": 1490 }, { "epoch": 0.10970797884571165, "grad_norm": 0.96875, "learning_rate": 4.85835937060372e-05, "loss": 1.0311, "step": 1491 }, { "epoch": 0.10978155897907566, "grad_norm": 0.99609375, "learning_rate": 4.858167132317083e-05, "loss": 0.7996, "step": 1492 }, { "epoch": 0.10985513911243965, "grad_norm": 0.7734375, "learning_rate": 4.857974767472071e-05, "loss": 0.6179, "step": 1493 }, { "epoch": 0.10992871924580364, "grad_norm": 0.99609375, "learning_rate": 4.857782276079006e-05, "loss": 1.0871, "step": 1494 }, { "epoch": 0.11000229937916763, "grad_norm": 0.91796875, "learning_rate": 4.8575896581482195e-05, "loss": 0.9696, "step": 1495 }, { "epoch": 0.11007587951253162, "grad_norm": 0.82421875, "learning_rate": 4.8573969136900495e-05, "loss": 0.7804, "step": 1496 }, { "epoch": 0.1101494596458956, "grad_norm": 0.87890625, "learning_rate": 4.857204042714839e-05, "loss": 0.8615, "step": 1497 }, { "epoch": 0.1102230397792596, "grad_norm": 0.8203125, "learning_rate": 4.8570110452329395e-05, "loss": 1.0255, "step": 1498 }, { "epoch": 0.11029661991262359, "grad_norm": 0.8203125, "learning_rate": 4.85681792125471e-05, "loss": 0.7892, "step": 1499 }, { "epoch": 0.11037020004598758, "grad_norm": 0.72265625, "learning_rate": 4.856624670790513e-05, "loss": 0.7113, "step": 1500 }, { "epoch": 0.11044378017935158, "grad_norm": 0.875, "learning_rate": 4.856431293850722e-05, "loss": 0.9847, "step": 1501 }, { "epoch": 0.11051736031271557, "grad_norm": 0.8359375, "learning_rate": 4.856237790445714e-05, "loss": 0.6973, "step": 1502 }, { "epoch": 0.11059094044607956, "grad_norm": 0.84375, "learning_rate": 4.856044160585872e-05, "loss": 0.7668, "step": 1503 }, { "epoch": 0.11066452057944355, "grad_norm": 0.90234375, "learning_rate": 4.855850404281592e-05, "loss": 0.6934, "step": 1504 }, { "epoch": 0.11073810071280754, "grad_norm": 0.765625, "learning_rate": 4.85565652154327e-05, "loss": 0.8594, "step": 1505 }, { "epoch": 0.11081168084617153, "grad_norm": 0.76171875, "learning_rate": 4.8554625123813116e-05, "loss": 0.8429, "step": 1506 }, { "epoch": 0.11088526097953552, "grad_norm": 0.890625, "learning_rate": 4.855268376806129e-05, "loss": 0.9555, "step": 1507 }, { "epoch": 0.11095884111289951, "grad_norm": 1.046875, "learning_rate": 4.8550741148281406e-05, "loss": 1.3677, "step": 1508 }, { "epoch": 0.11103242124626352, "grad_norm": 0.91796875, "learning_rate": 4.854879726457773e-05, "loss": 1.0277, "step": 1509 }, { "epoch": 0.1111060013796275, "grad_norm": 0.90625, "learning_rate": 4.854685211705458e-05, "loss": 0.9829, "step": 1510 }, { "epoch": 0.1111795815129915, "grad_norm": 1.1171875, "learning_rate": 4.854490570581635e-05, "loss": 1.2195, "step": 1511 }, { "epoch": 0.11125316164635549, "grad_norm": 0.8125, "learning_rate": 4.854295803096751e-05, "loss": 0.6009, "step": 1512 }, { "epoch": 0.11132674177971948, "grad_norm": 0.9453125, "learning_rate": 4.854100909261256e-05, "loss": 1.1862, "step": 1513 }, { "epoch": 0.11140032191308347, "grad_norm": 0.97265625, "learning_rate": 4.853905889085613e-05, "loss": 1.0422, "step": 1514 }, { "epoch": 0.11147390204644746, "grad_norm": 0.73046875, "learning_rate": 4.8537107425802865e-05, "loss": 0.7051, "step": 1515 }, { "epoch": 0.11154748217981145, "grad_norm": 0.73828125, "learning_rate": 4.85351546975575e-05, "loss": 0.6874, "step": 1516 }, { "epoch": 0.11162106231317544, "grad_norm": 0.8046875, "learning_rate": 4.853320070622484e-05, "loss": 1.0476, "step": 1517 }, { "epoch": 0.11169464244653944, "grad_norm": 0.74609375, "learning_rate": 4.853124545190973e-05, "loss": 0.7274, "step": 1518 }, { "epoch": 0.11176822257990343, "grad_norm": 0.83203125, "learning_rate": 4.852928893471713e-05, "loss": 0.8882, "step": 1519 }, { "epoch": 0.11184180271326742, "grad_norm": 0.9296875, "learning_rate": 4.8527331154752035e-05, "loss": 1.0539, "step": 1520 }, { "epoch": 0.11191538284663141, "grad_norm": 0.8984375, "learning_rate": 4.852537211211951e-05, "loss": 0.9637, "step": 1521 }, { "epoch": 0.1119889629799954, "grad_norm": 1.015625, "learning_rate": 4.8523411806924704e-05, "loss": 1.4332, "step": 1522 }, { "epoch": 0.11206254311335939, "grad_norm": 0.84375, "learning_rate": 4.852145023927281e-05, "loss": 0.7953, "step": 1523 }, { "epoch": 0.11213612324672338, "grad_norm": 0.81640625, "learning_rate": 4.85194874092691e-05, "loss": 0.8669, "step": 1524 }, { "epoch": 0.11220970338008737, "grad_norm": 0.84765625, "learning_rate": 4.851752331701894e-05, "loss": 0.9211, "step": 1525 }, { "epoch": 0.11228328351345136, "grad_norm": 1.1015625, "learning_rate": 4.851555796262771e-05, "loss": 1.3201, "step": 1526 }, { "epoch": 0.11235686364681537, "grad_norm": 0.86328125, "learning_rate": 4.85135913462009e-05, "loss": 0.8635, "step": 1527 }, { "epoch": 0.11243044378017936, "grad_norm": 1.09375, "learning_rate": 4.851162346784406e-05, "loss": 0.9325, "step": 1528 }, { "epoch": 0.11250402391354335, "grad_norm": 0.8828125, "learning_rate": 4.850965432766279e-05, "loss": 1.066, "step": 1529 }, { "epoch": 0.11257760404690734, "grad_norm": 1.1953125, "learning_rate": 4.850768392576277e-05, "loss": 1.2734, "step": 1530 }, { "epoch": 0.11265118418027133, "grad_norm": 0.89453125, "learning_rate": 4.850571226224976e-05, "loss": 0.8032, "step": 1531 }, { "epoch": 0.11272476431363532, "grad_norm": 0.80078125, "learning_rate": 4.850373933722957e-05, "loss": 0.7884, "step": 1532 }, { "epoch": 0.11279834444699931, "grad_norm": 0.921875, "learning_rate": 4.8501765150808085e-05, "loss": 0.8609, "step": 1533 }, { "epoch": 0.1128719245803633, "grad_norm": 0.75390625, "learning_rate": 4.849978970309125e-05, "loss": 0.841, "step": 1534 }, { "epoch": 0.11294550471372729, "grad_norm": 0.77734375, "learning_rate": 4.8497812994185075e-05, "loss": 0.8201, "step": 1535 }, { "epoch": 0.11301908484709129, "grad_norm": 0.7421875, "learning_rate": 4.8495835024195665e-05, "loss": 0.8334, "step": 1536 }, { "epoch": 0.11309266498045528, "grad_norm": 0.86328125, "learning_rate": 4.8493855793229174e-05, "loss": 1.2785, "step": 1537 }, { "epoch": 0.11316624511381927, "grad_norm": 0.80859375, "learning_rate": 4.8491875301391806e-05, "loss": 0.6987, "step": 1538 }, { "epoch": 0.11323982524718326, "grad_norm": 0.98046875, "learning_rate": 4.8489893548789874e-05, "loss": 1.2081, "step": 1539 }, { "epoch": 0.11331340538054725, "grad_norm": 0.8359375, "learning_rate": 4.848791053552971e-05, "loss": 0.9546, "step": 1540 }, { "epoch": 0.11338698551391124, "grad_norm": 0.80859375, "learning_rate": 4.848592626171775e-05, "loss": 0.7666, "step": 1541 }, { "epoch": 0.11346056564727523, "grad_norm": 0.9453125, "learning_rate": 4.84839407274605e-05, "loss": 0.8671, "step": 1542 }, { "epoch": 0.11353414578063922, "grad_norm": 1.015625, "learning_rate": 4.848195393286449e-05, "loss": 1.3558, "step": 1543 }, { "epoch": 0.11360772591400321, "grad_norm": 0.84375, "learning_rate": 4.8479965878036373e-05, "loss": 0.8777, "step": 1544 }, { "epoch": 0.11368130604736722, "grad_norm": 1.0546875, "learning_rate": 4.8477976563082824e-05, "loss": 0.9407, "step": 1545 }, { "epoch": 0.11375488618073121, "grad_norm": 0.9609375, "learning_rate": 4.847598598811062e-05, "loss": 1.3234, "step": 1546 }, { "epoch": 0.1138284663140952, "grad_norm": 0.953125, "learning_rate": 4.8473994153226594e-05, "loss": 1.0208, "step": 1547 }, { "epoch": 0.11390204644745919, "grad_norm": 0.84375, "learning_rate": 4.847200105853763e-05, "loss": 1.0776, "step": 1548 }, { "epoch": 0.11397562658082318, "grad_norm": 0.9375, "learning_rate": 4.8470006704150705e-05, "loss": 1.1107, "step": 1549 }, { "epoch": 0.11404920671418717, "grad_norm": 1.0546875, "learning_rate": 4.846801109017285e-05, "loss": 0.8734, "step": 1550 }, { "epoch": 0.11412278684755116, "grad_norm": 1.046875, "learning_rate": 4.846601421671116e-05, "loss": 1.2276, "step": 1551 }, { "epoch": 0.11419636698091515, "grad_norm": 0.85546875, "learning_rate": 4.846401608387282e-05, "loss": 1.2157, "step": 1552 }, { "epoch": 0.11426994711427914, "grad_norm": 0.8984375, "learning_rate": 4.846201669176505e-05, "loss": 0.8714, "step": 1553 }, { "epoch": 0.11434352724764314, "grad_norm": 1.3828125, "learning_rate": 4.846001604049515e-05, "loss": 1.3024, "step": 1554 }, { "epoch": 0.11441710738100713, "grad_norm": 1.5546875, "learning_rate": 4.8458014130170506e-05, "loss": 0.8927, "step": 1555 }, { "epoch": 0.11449068751437112, "grad_norm": 0.91015625, "learning_rate": 4.845601096089855e-05, "loss": 1.1201, "step": 1556 }, { "epoch": 0.11456426764773511, "grad_norm": 0.9140625, "learning_rate": 4.845400653278678e-05, "loss": 1.013, "step": 1557 }, { "epoch": 0.1146378477810991, "grad_norm": 0.8046875, "learning_rate": 4.845200084594278e-05, "loss": 0.7813, "step": 1558 }, { "epoch": 0.1147114279144631, "grad_norm": 1.1328125, "learning_rate": 4.8449993900474187e-05, "loss": 1.1068, "step": 1559 }, { "epoch": 0.11478500804782708, "grad_norm": 1.0, "learning_rate": 4.844798569648872e-05, "loss": 1.1226, "step": 1560 }, { "epoch": 0.11485858818119107, "grad_norm": 0.9296875, "learning_rate": 4.844597623409414e-05, "loss": 0.816, "step": 1561 }, { "epoch": 0.11493216831455506, "grad_norm": 0.9609375, "learning_rate": 4.84439655133983e-05, "loss": 0.908, "step": 1562 }, { "epoch": 0.11500574844791907, "grad_norm": 0.91015625, "learning_rate": 4.8441953534509116e-05, "loss": 1.1246, "step": 1563 }, { "epoch": 0.11507932858128306, "grad_norm": 1.140625, "learning_rate": 4.843994029753456e-05, "loss": 1.2163, "step": 1564 }, { "epoch": 0.11515290871464705, "grad_norm": 0.85546875, "learning_rate": 4.843792580258267e-05, "loss": 1.0191, "step": 1565 }, { "epoch": 0.11522648884801104, "grad_norm": 1.0078125, "learning_rate": 4.843591004976158e-05, "loss": 0.8801, "step": 1566 }, { "epoch": 0.11530006898137503, "grad_norm": 1.1875, "learning_rate": 4.843389303917946e-05, "loss": 1.2238, "step": 1567 }, { "epoch": 0.11537364911473902, "grad_norm": 0.921875, "learning_rate": 4.843187477094456e-05, "loss": 0.8479, "step": 1568 }, { "epoch": 0.11544722924810301, "grad_norm": 0.94921875, "learning_rate": 4.84298552451652e-05, "loss": 1.1632, "step": 1569 }, { "epoch": 0.115520809381467, "grad_norm": 1.0703125, "learning_rate": 4.8427834461949764e-05, "loss": 1.2675, "step": 1570 }, { "epoch": 0.11559438951483099, "grad_norm": 0.70703125, "learning_rate": 4.842581242140669e-05, "loss": 0.6874, "step": 1571 }, { "epoch": 0.115667969648195, "grad_norm": 1.0234375, "learning_rate": 4.842378912364452e-05, "loss": 1.2296, "step": 1572 }, { "epoch": 0.11574154978155898, "grad_norm": 1.03125, "learning_rate": 4.842176456877182e-05, "loss": 1.1198, "step": 1573 }, { "epoch": 0.11581512991492297, "grad_norm": 1.5, "learning_rate": 4.841973875689726e-05, "loss": 0.8049, "step": 1574 }, { "epoch": 0.11588871004828696, "grad_norm": 1.0390625, "learning_rate": 4.841771168812955e-05, "loss": 0.8588, "step": 1575 }, { "epoch": 0.11596229018165095, "grad_norm": 0.85546875, "learning_rate": 4.841568336257749e-05, "loss": 0.7734, "step": 1576 }, { "epoch": 0.11603587031501494, "grad_norm": 1.0859375, "learning_rate": 4.8413653780349925e-05, "loss": 0.997, "step": 1577 }, { "epoch": 0.11610945044837893, "grad_norm": 0.875, "learning_rate": 4.841162294155578e-05, "loss": 0.8062, "step": 1578 }, { "epoch": 0.11618303058174292, "grad_norm": 1.015625, "learning_rate": 4.840959084630406e-05, "loss": 0.8249, "step": 1579 }, { "epoch": 0.11625661071510691, "grad_norm": 0.80078125, "learning_rate": 4.84075574947038e-05, "loss": 0.7494, "step": 1580 }, { "epoch": 0.11633019084847092, "grad_norm": 0.703125, "learning_rate": 4.840552288686415e-05, "loss": 0.6936, "step": 1581 }, { "epoch": 0.11640377098183491, "grad_norm": 0.80078125, "learning_rate": 4.8403487022894294e-05, "loss": 0.7006, "step": 1582 }, { "epoch": 0.1164773511151989, "grad_norm": 0.79296875, "learning_rate": 4.840144990290349e-05, "loss": 0.9448, "step": 1583 }, { "epoch": 0.11655093124856289, "grad_norm": 0.765625, "learning_rate": 4.839941152700107e-05, "loss": 1.0342, "step": 1584 }, { "epoch": 0.11662451138192688, "grad_norm": 0.87890625, "learning_rate": 4.839737189529643e-05, "loss": 0.959, "step": 1585 }, { "epoch": 0.11669809151529087, "grad_norm": 1.0546875, "learning_rate": 4.839533100789903e-05, "loss": 0.8676, "step": 1586 }, { "epoch": 0.11677167164865486, "grad_norm": 0.94921875, "learning_rate": 4.83932888649184e-05, "loss": 0.7941, "step": 1587 }, { "epoch": 0.11684525178201885, "grad_norm": 0.96875, "learning_rate": 4.839124546646414e-05, "loss": 0.6636, "step": 1588 }, { "epoch": 0.11691883191538284, "grad_norm": 1.0390625, "learning_rate": 4.838920081264592e-05, "loss": 1.021, "step": 1589 }, { "epoch": 0.11699241204874684, "grad_norm": 0.9765625, "learning_rate": 4.8387154903573464e-05, "loss": 1.5042, "step": 1590 }, { "epoch": 0.11706599218211083, "grad_norm": 1.03125, "learning_rate": 4.838510773935657e-05, "loss": 1.0714, "step": 1591 }, { "epoch": 0.11713957231547482, "grad_norm": 0.76953125, "learning_rate": 4.8383059320105124e-05, "loss": 0.6976, "step": 1592 }, { "epoch": 0.11721315244883881, "grad_norm": 0.8359375, "learning_rate": 4.8381009645929044e-05, "loss": 0.8119, "step": 1593 }, { "epoch": 0.1172867325822028, "grad_norm": 0.71875, "learning_rate": 4.837895871693834e-05, "loss": 0.7397, "step": 1594 }, { "epoch": 0.1173603127155668, "grad_norm": 1.0234375, "learning_rate": 4.837690653324307e-05, "loss": 1.279, "step": 1595 }, { "epoch": 0.11743389284893078, "grad_norm": 0.90234375, "learning_rate": 4.8374853094953384e-05, "loss": 0.7486, "step": 1596 }, { "epoch": 0.11750747298229477, "grad_norm": 0.6796875, "learning_rate": 4.837279840217948e-05, "loss": 0.7038, "step": 1597 }, { "epoch": 0.11758105311565878, "grad_norm": 0.74609375, "learning_rate": 4.837074245503164e-05, "loss": 0.9153, "step": 1598 }, { "epoch": 0.11765463324902277, "grad_norm": 0.8984375, "learning_rate": 4.836868525362018e-05, "loss": 1.0461, "step": 1599 }, { "epoch": 0.11772821338238676, "grad_norm": 0.7578125, "learning_rate": 4.836662679805553e-05, "loss": 0.798, "step": 1600 }, { "epoch": 0.11780179351575075, "grad_norm": 0.8828125, "learning_rate": 4.836456708844814e-05, "loss": 1.1684, "step": 1601 }, { "epoch": 0.11787537364911474, "grad_norm": 0.75390625, "learning_rate": 4.8362506124908574e-05, "loss": 0.8785, "step": 1602 }, { "epoch": 0.11794895378247873, "grad_norm": 1.0, "learning_rate": 4.8360443907547423e-05, "loss": 1.3117, "step": 1603 }, { "epoch": 0.11802253391584272, "grad_norm": 0.9453125, "learning_rate": 4.835838043647538e-05, "loss": 0.9618, "step": 1604 }, { "epoch": 0.11809611404920671, "grad_norm": 0.86328125, "learning_rate": 4.8356315711803166e-05, "loss": 0.9472, "step": 1605 }, { "epoch": 0.1181696941825707, "grad_norm": 0.7734375, "learning_rate": 4.835424973364161e-05, "loss": 0.805, "step": 1606 }, { "epoch": 0.1182432743159347, "grad_norm": 0.8828125, "learning_rate": 4.835218250210157e-05, "loss": 0.8101, "step": 1607 }, { "epoch": 0.1183168544492987, "grad_norm": 1.1953125, "learning_rate": 4.8350114017294015e-05, "loss": 1.0567, "step": 1608 }, { "epoch": 0.11839043458266268, "grad_norm": 0.91796875, "learning_rate": 4.834804427932993e-05, "loss": 0.9547, "step": 1609 }, { "epoch": 0.11846401471602667, "grad_norm": 0.71484375, "learning_rate": 4.8345973288320414e-05, "loss": 0.9241, "step": 1610 }, { "epoch": 0.11853759484939067, "grad_norm": 0.9375, "learning_rate": 4.8343901044376604e-05, "loss": 1.2618, "step": 1611 }, { "epoch": 0.11861117498275466, "grad_norm": 1.078125, "learning_rate": 4.834182754760972e-05, "loss": 1.0232, "step": 1612 }, { "epoch": 0.11868475511611865, "grad_norm": 0.98828125, "learning_rate": 4.8339752798131034e-05, "loss": 1.1661, "step": 1613 }, { "epoch": 0.11875833524948264, "grad_norm": 0.75390625, "learning_rate": 4.8337676796051895e-05, "loss": 0.6603, "step": 1614 }, { "epoch": 0.11883191538284663, "grad_norm": 0.89453125, "learning_rate": 4.8335599541483724e-05, "loss": 0.8218, "step": 1615 }, { "epoch": 0.11890549551621063, "grad_norm": 0.77734375, "learning_rate": 4.8333521034538e-05, "loss": 0.8218, "step": 1616 }, { "epoch": 0.11897907564957462, "grad_norm": 0.7421875, "learning_rate": 4.833144127532627e-05, "loss": 0.8746, "step": 1617 }, { "epoch": 0.11905265578293861, "grad_norm": 1.03125, "learning_rate": 4.8329360263960155e-05, "loss": 1.3788, "step": 1618 }, { "epoch": 0.1191262359163026, "grad_norm": 0.6953125, "learning_rate": 4.832727800055134e-05, "loss": 0.7577, "step": 1619 }, { "epoch": 0.11919981604966659, "grad_norm": 0.88671875, "learning_rate": 4.8325194485211575e-05, "loss": 1.0625, "step": 1620 }, { "epoch": 0.11927339618303058, "grad_norm": 0.8828125, "learning_rate": 4.8323109718052675e-05, "loss": 1.0207, "step": 1621 }, { "epoch": 0.11934697631639457, "grad_norm": 0.85546875, "learning_rate": 4.832102369918652e-05, "loss": 1.2307, "step": 1622 }, { "epoch": 0.11942055644975856, "grad_norm": 0.83203125, "learning_rate": 4.8318936428725083e-05, "loss": 0.7842, "step": 1623 }, { "epoch": 0.11949413658312255, "grad_norm": 0.99609375, "learning_rate": 4.831684790678036e-05, "loss": 1.0629, "step": 1624 }, { "epoch": 0.11956771671648656, "grad_norm": 0.83203125, "learning_rate": 4.831475813346445e-05, "loss": 0.7109, "step": 1625 }, { "epoch": 0.11964129684985055, "grad_norm": 0.71875, "learning_rate": 4.831266710888952e-05, "loss": 0.6695, "step": 1626 }, { "epoch": 0.11971487698321454, "grad_norm": 0.78125, "learning_rate": 4.8310574833167763e-05, "loss": 1.0373, "step": 1627 }, { "epoch": 0.11978845711657853, "grad_norm": 0.703125, "learning_rate": 4.8308481306411493e-05, "loss": 0.4827, "step": 1628 }, { "epoch": 0.11986203724994252, "grad_norm": 0.8828125, "learning_rate": 4.830638652873305e-05, "loss": 0.8619, "step": 1629 }, { "epoch": 0.1199356173833065, "grad_norm": 0.79296875, "learning_rate": 4.8304290500244855e-05, "loss": 0.8509, "step": 1630 }, { "epoch": 0.1200091975166705, "grad_norm": 0.82421875, "learning_rate": 4.830219322105941e-05, "loss": 0.8503, "step": 1631 }, { "epoch": 0.12008277765003449, "grad_norm": 1.15625, "learning_rate": 4.830009469128927e-05, "loss": 1.3459, "step": 1632 }, { "epoch": 0.12015635778339848, "grad_norm": 0.83984375, "learning_rate": 4.829799491104705e-05, "loss": 0.8609, "step": 1633 }, { "epoch": 0.12022993791676248, "grad_norm": 0.90625, "learning_rate": 4.829589388044545e-05, "loss": 1.1231, "step": 1634 }, { "epoch": 0.12030351805012647, "grad_norm": 1.0546875, "learning_rate": 4.829379159959722e-05, "loss": 0.9082, "step": 1635 }, { "epoch": 0.12037709818349046, "grad_norm": 0.9375, "learning_rate": 4.8291688068615196e-05, "loss": 1.1797, "step": 1636 }, { "epoch": 0.12045067831685445, "grad_norm": 1.0078125, "learning_rate": 4.828958328761226e-05, "loss": 0.9844, "step": 1637 }, { "epoch": 0.12052425845021844, "grad_norm": 0.80859375, "learning_rate": 4.828747725670138e-05, "loss": 0.9034, "step": 1638 }, { "epoch": 0.12059783858358243, "grad_norm": 1.0, "learning_rate": 4.828536997599559e-05, "loss": 1.5587, "step": 1639 }, { "epoch": 0.12067141871694642, "grad_norm": 1.0390625, "learning_rate": 4.828326144560795e-05, "loss": 1.0669, "step": 1640 }, { "epoch": 0.12074499885031041, "grad_norm": 0.8984375, "learning_rate": 4.8281151665651664e-05, "loss": 0.9971, "step": 1641 }, { "epoch": 0.1208185789836744, "grad_norm": 0.78125, "learning_rate": 4.8279040636239934e-05, "loss": 0.8416, "step": 1642 }, { "epoch": 0.1208921591170384, "grad_norm": 0.7734375, "learning_rate": 4.8276928357486056e-05, "loss": 0.9918, "step": 1643 }, { "epoch": 0.1209657392504024, "grad_norm": 0.90625, "learning_rate": 4.82748148295034e-05, "loss": 0.9947, "step": 1644 }, { "epoch": 0.12103931938376639, "grad_norm": 0.87109375, "learning_rate": 4.8272700052405396e-05, "loss": 0.8783, "step": 1645 }, { "epoch": 0.12111289951713038, "grad_norm": 0.90625, "learning_rate": 4.827058402630553e-05, "loss": 1.3422, "step": 1646 }, { "epoch": 0.12118647965049437, "grad_norm": 0.83984375, "learning_rate": 4.826846675131738e-05, "loss": 0.8693, "step": 1647 }, { "epoch": 0.12126005978385836, "grad_norm": 0.90625, "learning_rate": 4.826634822755456e-05, "loss": 1.0177, "step": 1648 }, { "epoch": 0.12133363991722235, "grad_norm": 1.0546875, "learning_rate": 4.826422845513077e-05, "loss": 1.4612, "step": 1649 }, { "epoch": 0.12140722005058634, "grad_norm": 0.8046875, "learning_rate": 4.826210743415979e-05, "loss": 1.0388, "step": 1650 }, { "epoch": 0.12148080018395033, "grad_norm": 1.0, "learning_rate": 4.825998516475543e-05, "loss": 1.1511, "step": 1651 }, { "epoch": 0.12155438031731433, "grad_norm": 1.015625, "learning_rate": 4.8257861647031607e-05, "loss": 1.3444, "step": 1652 }, { "epoch": 0.12162796045067832, "grad_norm": 1.0234375, "learning_rate": 4.825573688110227e-05, "loss": 1.3203, "step": 1653 }, { "epoch": 0.12170154058404231, "grad_norm": 1.0078125, "learning_rate": 4.825361086708146e-05, "loss": 0.9841, "step": 1654 }, { "epoch": 0.1217751207174063, "grad_norm": 0.8359375, "learning_rate": 4.825148360508328e-05, "loss": 0.7331, "step": 1655 }, { "epoch": 0.12184870085077029, "grad_norm": 0.84375, "learning_rate": 4.824935509522188e-05, "loss": 0.7633, "step": 1656 }, { "epoch": 0.12192228098413428, "grad_norm": 0.9453125, "learning_rate": 4.824722533761151e-05, "loss": 0.944, "step": 1657 }, { "epoch": 0.12199586111749827, "grad_norm": 1.0546875, "learning_rate": 4.824509433236647e-05, "loss": 1.0866, "step": 1658 }, { "epoch": 0.12206944125086226, "grad_norm": 0.9453125, "learning_rate": 4.8242962079601115e-05, "loss": 1.2952, "step": 1659 }, { "epoch": 0.12214302138422625, "grad_norm": 1.03125, "learning_rate": 4.8240828579429886e-05, "loss": 1.3609, "step": 1660 }, { "epoch": 0.12221660151759026, "grad_norm": 0.796875, "learning_rate": 4.823869383196728e-05, "loss": 0.7113, "step": 1661 }, { "epoch": 0.12229018165095425, "grad_norm": 0.95703125, "learning_rate": 4.823655783732787e-05, "loss": 1.1002, "step": 1662 }, { "epoch": 0.12236376178431824, "grad_norm": 0.88671875, "learning_rate": 4.8234420595626286e-05, "loss": 0.9645, "step": 1663 }, { "epoch": 0.12243734191768223, "grad_norm": 1.046875, "learning_rate": 4.823228210697723e-05, "loss": 1.3537, "step": 1664 }, { "epoch": 0.12251092205104622, "grad_norm": 0.96875, "learning_rate": 4.823014237149548e-05, "loss": 1.0574, "step": 1665 }, { "epoch": 0.12258450218441021, "grad_norm": 1.0234375, "learning_rate": 4.8228001389295864e-05, "loss": 0.9434, "step": 1666 }, { "epoch": 0.1226580823177742, "grad_norm": 0.859375, "learning_rate": 4.822585916049328e-05, "loss": 0.8898, "step": 1667 }, { "epoch": 0.12273166245113819, "grad_norm": 0.97265625, "learning_rate": 4.8223715685202706e-05, "loss": 1.2811, "step": 1668 }, { "epoch": 0.12280524258450218, "grad_norm": 0.80078125, "learning_rate": 4.8221570963539175e-05, "loss": 0.6959, "step": 1669 }, { "epoch": 0.12287882271786618, "grad_norm": 0.91015625, "learning_rate": 4.821942499561779e-05, "loss": 0.7751, "step": 1670 }, { "epoch": 0.12295240285123017, "grad_norm": 0.921875, "learning_rate": 4.8217277781553716e-05, "loss": 0.8852, "step": 1671 }, { "epoch": 0.12302598298459416, "grad_norm": 1.0625, "learning_rate": 4.82151293214622e-05, "loss": 1.1222, "step": 1672 }, { "epoch": 0.12309956311795815, "grad_norm": 0.9609375, "learning_rate": 4.8212979615458534e-05, "loss": 0.9906, "step": 1673 }, { "epoch": 0.12317314325132214, "grad_norm": 0.84375, "learning_rate": 4.8210828663658106e-05, "loss": 0.9114, "step": 1674 }, { "epoch": 0.12324672338468613, "grad_norm": 0.8203125, "learning_rate": 4.8208676466176325e-05, "loss": 0.7124, "step": 1675 }, { "epoch": 0.12332030351805012, "grad_norm": 0.93359375, "learning_rate": 4.820652302312873e-05, "loss": 0.8773, "step": 1676 }, { "epoch": 0.12339388365141411, "grad_norm": 0.96484375, "learning_rate": 4.820436833463087e-05, "loss": 0.712, "step": 1677 }, { "epoch": 0.1234674637847781, "grad_norm": 0.87109375, "learning_rate": 4.820221240079838e-05, "loss": 0.8774, "step": 1678 }, { "epoch": 0.12354104391814211, "grad_norm": 0.75, "learning_rate": 4.820005522174699e-05, "loss": 0.8815, "step": 1679 }, { "epoch": 0.1236146240515061, "grad_norm": 1.0625, "learning_rate": 4.8197896797592444e-05, "loss": 1.206, "step": 1680 }, { "epoch": 0.12368820418487009, "grad_norm": 1.1015625, "learning_rate": 4.819573712845059e-05, "loss": 1.4705, "step": 1681 }, { "epoch": 0.12376178431823408, "grad_norm": 0.796875, "learning_rate": 4.819357621443734e-05, "loss": 0.8625, "step": 1682 }, { "epoch": 0.12383536445159807, "grad_norm": 0.85546875, "learning_rate": 4.819141405566866e-05, "loss": 0.8718, "step": 1683 }, { "epoch": 0.12390894458496206, "grad_norm": 0.8359375, "learning_rate": 4.818925065226059e-05, "loss": 0.7995, "step": 1684 }, { "epoch": 0.12398252471832605, "grad_norm": 0.8125, "learning_rate": 4.818708600432923e-05, "loss": 0.8796, "step": 1685 }, { "epoch": 0.12405610485169004, "grad_norm": 1.1875, "learning_rate": 4.818492011199076e-05, "loss": 1.2425, "step": 1686 }, { "epoch": 0.12412968498505403, "grad_norm": 0.88671875, "learning_rate": 4.8182752975361425e-05, "loss": 0.6448, "step": 1687 }, { "epoch": 0.12420326511841803, "grad_norm": 1.0, "learning_rate": 4.818058459455752e-05, "loss": 1.1012, "step": 1688 }, { "epoch": 0.12427684525178202, "grad_norm": 0.84765625, "learning_rate": 4.817841496969542e-05, "loss": 1.0217, "step": 1689 }, { "epoch": 0.12435042538514601, "grad_norm": 0.921875, "learning_rate": 4.8176244100891566e-05, "loss": 0.9944, "step": 1690 }, { "epoch": 0.12442400551851, "grad_norm": 0.91796875, "learning_rate": 4.817407198826247e-05, "loss": 0.8699, "step": 1691 }, { "epoch": 0.124497585651874, "grad_norm": 0.6953125, "learning_rate": 4.8171898631924695e-05, "loss": 0.7512, "step": 1692 }, { "epoch": 0.12457116578523798, "grad_norm": 0.78515625, "learning_rate": 4.816972403199489e-05, "loss": 0.8817, "step": 1693 }, { "epoch": 0.12464474591860197, "grad_norm": 1.0, "learning_rate": 4.816754818858975e-05, "loss": 1.0115, "step": 1694 }, { "epoch": 0.12471832605196596, "grad_norm": 0.83203125, "learning_rate": 4.8165371101826064e-05, "loss": 0.8853, "step": 1695 }, { "epoch": 0.12479190618532997, "grad_norm": 1.0859375, "learning_rate": 4.8163192771820655e-05, "loss": 1.1001, "step": 1696 }, { "epoch": 0.12486548631869396, "grad_norm": 0.94921875, "learning_rate": 4.816101319869045e-05, "loss": 0.6952, "step": 1697 }, { "epoch": 0.12493906645205795, "grad_norm": 0.8359375, "learning_rate": 4.8158832382552406e-05, "loss": 0.6278, "step": 1698 }, { "epoch": 0.12501264658542194, "grad_norm": 0.76171875, "learning_rate": 4.8156650323523566e-05, "loss": 0.7689, "step": 1699 }, { "epoch": 0.12508622671878591, "grad_norm": 1.0078125, "learning_rate": 4.8154467021721046e-05, "loss": 0.9663, "step": 1700 }, { "epoch": 0.12515980685214992, "grad_norm": 0.9140625, "learning_rate": 4.815228247726201e-05, "loss": 1.3419, "step": 1701 }, { "epoch": 0.12523338698551392, "grad_norm": 0.8984375, "learning_rate": 4.8150096690263704e-05, "loss": 1.0555, "step": 1702 }, { "epoch": 0.1253069671188779, "grad_norm": 0.90234375, "learning_rate": 4.814790966084343e-05, "loss": 0.8885, "step": 1703 }, { "epoch": 0.1253805472522419, "grad_norm": 0.7734375, "learning_rate": 4.814572138911857e-05, "loss": 0.733, "step": 1704 }, { "epoch": 0.12545412738560588, "grad_norm": 1.0625, "learning_rate": 4.8143531875206546e-05, "loss": 1.1117, "step": 1705 }, { "epoch": 0.12552770751896988, "grad_norm": 0.8984375, "learning_rate": 4.814134111922489e-05, "loss": 0.9932, "step": 1706 }, { "epoch": 0.12560128765233386, "grad_norm": 0.9375, "learning_rate": 4.813914912129116e-05, "loss": 1.3604, "step": 1707 }, { "epoch": 0.12567486778569786, "grad_norm": 0.8828125, "learning_rate": 4.8136955881523004e-05, "loss": 0.8963, "step": 1708 }, { "epoch": 0.12574844791906184, "grad_norm": 0.86328125, "learning_rate": 4.813476140003812e-05, "loss": 0.7431, "step": 1709 }, { "epoch": 0.12582202805242584, "grad_norm": 0.8671875, "learning_rate": 4.813256567695429e-05, "loss": 1.0601, "step": 1710 }, { "epoch": 0.12589560818578985, "grad_norm": 0.79296875, "learning_rate": 4.813036871238935e-05, "loss": 0.9191, "step": 1711 }, { "epoch": 0.12596918831915382, "grad_norm": 0.89453125, "learning_rate": 4.8128170506461215e-05, "loss": 1.2271, "step": 1712 }, { "epoch": 0.12604276845251783, "grad_norm": 1.1171875, "learning_rate": 4.812597105928784e-05, "loss": 1.1664, "step": 1713 }, { "epoch": 0.1261163485858818, "grad_norm": 0.96875, "learning_rate": 4.8123770370987284e-05, "loss": 1.2332, "step": 1714 }, { "epoch": 0.1261899287192458, "grad_norm": 0.80078125, "learning_rate": 4.8121568441677656e-05, "loss": 0.8885, "step": 1715 }, { "epoch": 0.12626350885260978, "grad_norm": 0.89453125, "learning_rate": 4.8119365271477105e-05, "loss": 0.9694, "step": 1716 }, { "epoch": 0.1263370889859738, "grad_norm": 0.8984375, "learning_rate": 4.811716086050389e-05, "loss": 1.0888, "step": 1717 }, { "epoch": 0.12641066911933777, "grad_norm": 1.1640625, "learning_rate": 4.8114955208876325e-05, "loss": 0.9996, "step": 1718 }, { "epoch": 0.12648424925270177, "grad_norm": 0.94921875, "learning_rate": 4.811274831671275e-05, "loss": 1.2635, "step": 1719 }, { "epoch": 0.12655782938606577, "grad_norm": 0.8203125, "learning_rate": 4.811054018413165e-05, "loss": 0.9658, "step": 1720 }, { "epoch": 0.12663140951942975, "grad_norm": 1.0, "learning_rate": 4.810833081125149e-05, "loss": 1.4049, "step": 1721 }, { "epoch": 0.12670498965279375, "grad_norm": 0.7890625, "learning_rate": 4.810612019819087e-05, "loss": 0.8666, "step": 1722 }, { "epoch": 0.12677856978615773, "grad_norm": 0.7734375, "learning_rate": 4.8103908345068416e-05, "loss": 0.773, "step": 1723 }, { "epoch": 0.12685214991952173, "grad_norm": 0.765625, "learning_rate": 4.8101695252002846e-05, "loss": 0.7949, "step": 1724 }, { "epoch": 0.1269257300528857, "grad_norm": 1.4375, "learning_rate": 4.809948091911292e-05, "loss": 0.8907, "step": 1725 }, { "epoch": 0.12699931018624971, "grad_norm": 1.046875, "learning_rate": 4.8097265346517474e-05, "loss": 0.8715, "step": 1726 }, { "epoch": 0.1270728903196137, "grad_norm": 1.03125, "learning_rate": 4.809504853433543e-05, "loss": 1.3757, "step": 1727 }, { "epoch": 0.1271464704529777, "grad_norm": 1.0859375, "learning_rate": 4.809283048268575e-05, "loss": 1.327, "step": 1728 }, { "epoch": 0.1272200505863417, "grad_norm": 0.65234375, "learning_rate": 4.809061119168747e-05, "loss": 0.8474, "step": 1729 }, { "epoch": 0.12729363071970567, "grad_norm": 0.75, "learning_rate": 4.80883906614597e-05, "loss": 0.6874, "step": 1730 }, { "epoch": 0.12736721085306968, "grad_norm": 1.015625, "learning_rate": 4.808616889212162e-05, "loss": 0.9943, "step": 1731 }, { "epoch": 0.12744079098643366, "grad_norm": 1.1171875, "learning_rate": 4.808394588379245e-05, "loss": 1.1231, "step": 1732 }, { "epoch": 0.12751437111979766, "grad_norm": 1.078125, "learning_rate": 4.80817216365915e-05, "loss": 1.0542, "step": 1733 }, { "epoch": 0.12758795125316164, "grad_norm": 1.0, "learning_rate": 4.807949615063816e-05, "loss": 0.9884, "step": 1734 }, { "epoch": 0.12766153138652564, "grad_norm": 0.8359375, "learning_rate": 4.807726942605184e-05, "loss": 0.8242, "step": 1735 }, { "epoch": 0.12773511151988964, "grad_norm": 0.953125, "learning_rate": 4.8075041462952066e-05, "loss": 0.9871, "step": 1736 }, { "epoch": 0.12780869165325362, "grad_norm": 0.890625, "learning_rate": 4.807281226145839e-05, "loss": 0.5976, "step": 1737 }, { "epoch": 0.12788227178661762, "grad_norm": 1.0234375, "learning_rate": 4.807058182169046e-05, "loss": 0.8272, "step": 1738 }, { "epoch": 0.1279558519199816, "grad_norm": 1.09375, "learning_rate": 4.8068350143767985e-05, "loss": 1.5913, "step": 1739 }, { "epoch": 0.1280294320533456, "grad_norm": 0.94140625, "learning_rate": 4.806611722781073e-05, "loss": 0.8145, "step": 1740 }, { "epoch": 0.12810301218670958, "grad_norm": 1.3828125, "learning_rate": 4.8063883073938515e-05, "loss": 1.3143, "step": 1741 }, { "epoch": 0.12817659232007358, "grad_norm": 1.3359375, "learning_rate": 4.8061647682271266e-05, "loss": 0.7281, "step": 1742 }, { "epoch": 0.12825017245343756, "grad_norm": 1.1171875, "learning_rate": 4.805941105292894e-05, "loss": 1.1643, "step": 1743 }, { "epoch": 0.12832375258680156, "grad_norm": 1.0, "learning_rate": 4.805717318603158e-05, "loss": 1.0296, "step": 1744 }, { "epoch": 0.12839733272016557, "grad_norm": 0.8359375, "learning_rate": 4.8054934081699275e-05, "loss": 0.8484, "step": 1745 }, { "epoch": 0.12847091285352955, "grad_norm": 1.0546875, "learning_rate": 4.8052693740052214e-05, "loss": 1.1198, "step": 1746 }, { "epoch": 0.12854449298689355, "grad_norm": 0.85546875, "learning_rate": 4.8050452161210616e-05, "loss": 0.9856, "step": 1747 }, { "epoch": 0.12861807312025753, "grad_norm": 0.6796875, "learning_rate": 4.804820934529478e-05, "loss": 0.7506, "step": 1748 }, { "epoch": 0.12869165325362153, "grad_norm": 1.0859375, "learning_rate": 4.8045965292425085e-05, "loss": 1.194, "step": 1749 }, { "epoch": 0.1287652333869855, "grad_norm": 1.0078125, "learning_rate": 4.804372000272196e-05, "loss": 0.8777, "step": 1750 }, { "epoch": 0.1288388135203495, "grad_norm": 0.890625, "learning_rate": 4.804147347630591e-05, "loss": 0.9836, "step": 1751 }, { "epoch": 0.1289123936537135, "grad_norm": 0.9296875, "learning_rate": 4.80392257132975e-05, "loss": 0.7982, "step": 1752 }, { "epoch": 0.1289859737870775, "grad_norm": 1.125, "learning_rate": 4.803697671381735e-05, "loss": 1.4899, "step": 1753 }, { "epoch": 0.1290595539204415, "grad_norm": 0.81640625, "learning_rate": 4.8034726477986175e-05, "loss": 0.7501, "step": 1754 }, { "epoch": 0.12913313405380547, "grad_norm": 0.99609375, "learning_rate": 4.8032475005924734e-05, "loss": 1.5302, "step": 1755 }, { "epoch": 0.12920671418716947, "grad_norm": 0.72265625, "learning_rate": 4.8030222297753867e-05, "loss": 0.7802, "step": 1756 }, { "epoch": 0.12928029432053345, "grad_norm": 0.90625, "learning_rate": 4.802796835359447e-05, "loss": 0.7931, "step": 1757 }, { "epoch": 0.12935387445389745, "grad_norm": 0.875, "learning_rate": 4.80257131735675e-05, "loss": 0.7262, "step": 1758 }, { "epoch": 0.12942745458726143, "grad_norm": 1.0859375, "learning_rate": 4.8023456757793986e-05, "loss": 1.3814, "step": 1759 }, { "epoch": 0.12950103472062544, "grad_norm": 0.76171875, "learning_rate": 4.802119910639504e-05, "loss": 0.7002, "step": 1760 }, { "epoch": 0.1295746148539894, "grad_norm": 1.046875, "learning_rate": 4.801894021949183e-05, "loss": 1.6328, "step": 1761 }, { "epoch": 0.12964819498735342, "grad_norm": 0.96484375, "learning_rate": 4.801668009720556e-05, "loss": 0.9086, "step": 1762 }, { "epoch": 0.12972177512071742, "grad_norm": 0.734375, "learning_rate": 4.801441873965754e-05, "loss": 0.9095, "step": 1763 }, { "epoch": 0.1297953552540814, "grad_norm": 0.9140625, "learning_rate": 4.801215614696915e-05, "loss": 0.8858, "step": 1764 }, { "epoch": 0.1298689353874454, "grad_norm": 0.9765625, "learning_rate": 4.800989231926178e-05, "loss": 0.9846, "step": 1765 }, { "epoch": 0.12994251552080938, "grad_norm": 0.859375, "learning_rate": 4.8007627256656965e-05, "loss": 1.1344, "step": 1766 }, { "epoch": 0.13001609565417338, "grad_norm": 0.87890625, "learning_rate": 4.8005360959276255e-05, "loss": 0.721, "step": 1767 }, { "epoch": 0.13008967578753736, "grad_norm": 0.90625, "learning_rate": 4.8003093427241266e-05, "loss": 0.9318, "step": 1768 }, { "epoch": 0.13016325592090136, "grad_norm": 1.15625, "learning_rate": 4.800082466067369e-05, "loss": 1.6782, "step": 1769 }, { "epoch": 0.13023683605426534, "grad_norm": 0.9296875, "learning_rate": 4.799855465969531e-05, "loss": 0.799, "step": 1770 }, { "epoch": 0.13031041618762934, "grad_norm": 0.97265625, "learning_rate": 4.7996283424427935e-05, "loss": 1.383, "step": 1771 }, { "epoch": 0.13038399632099335, "grad_norm": 0.94921875, "learning_rate": 4.7994010954993465e-05, "loss": 1.0181, "step": 1772 }, { "epoch": 0.13045757645435732, "grad_norm": 0.890625, "learning_rate": 4.7991737251513855e-05, "loss": 0.9318, "step": 1773 }, { "epoch": 0.13053115658772133, "grad_norm": 1.0390625, "learning_rate": 4.798946231411113e-05, "loss": 1.2828, "step": 1774 }, { "epoch": 0.1306047367210853, "grad_norm": 0.80078125, "learning_rate": 4.798718614290739e-05, "loss": 0.8909, "step": 1775 }, { "epoch": 0.1306783168544493, "grad_norm": 1.015625, "learning_rate": 4.798490873802478e-05, "loss": 0.9724, "step": 1776 }, { "epoch": 0.13075189698781328, "grad_norm": 0.76171875, "learning_rate": 4.798263009958553e-05, "loss": 0.7469, "step": 1777 }, { "epoch": 0.13082547712117729, "grad_norm": 0.859375, "learning_rate": 4.7980350227711935e-05, "loss": 0.9373, "step": 1778 }, { "epoch": 0.13089905725454126, "grad_norm": 0.90234375, "learning_rate": 4.7978069122526334e-05, "loss": 0.8338, "step": 1779 }, { "epoch": 0.13097263738790527, "grad_norm": 0.82421875, "learning_rate": 4.797578678415118e-05, "loss": 0.6374, "step": 1780 }, { "epoch": 0.13104621752126927, "grad_norm": 0.8984375, "learning_rate": 4.797350321270894e-05, "loss": 0.9759, "step": 1781 }, { "epoch": 0.13111979765463325, "grad_norm": 1.2578125, "learning_rate": 4.7971218408322175e-05, "loss": 1.3983, "step": 1782 }, { "epoch": 0.13119337778799725, "grad_norm": 0.96875, "learning_rate": 4.7968932371113495e-05, "loss": 1.0449, "step": 1783 }, { "epoch": 0.13126695792136123, "grad_norm": 0.8828125, "learning_rate": 4.7966645101205596e-05, "loss": 0.7516, "step": 1784 }, { "epoch": 0.13134053805472523, "grad_norm": 0.8046875, "learning_rate": 4.7964356598721245e-05, "loss": 0.7642, "step": 1785 }, { "epoch": 0.1314141181880892, "grad_norm": 0.8125, "learning_rate": 4.796206686378324e-05, "loss": 0.8153, "step": 1786 }, { "epoch": 0.1314876983214532, "grad_norm": 0.953125, "learning_rate": 4.7959775896514484e-05, "loss": 1.0266, "step": 1787 }, { "epoch": 0.1315612784548172, "grad_norm": 0.828125, "learning_rate": 4.795748369703791e-05, "loss": 0.9504, "step": 1788 }, { "epoch": 0.1316348585881812, "grad_norm": 0.80859375, "learning_rate": 4.795519026547655e-05, "loss": 0.7396, "step": 1789 }, { "epoch": 0.1317084387215452, "grad_norm": 0.87890625, "learning_rate": 4.7952895601953484e-05, "loss": 0.885, "step": 1790 }, { "epoch": 0.13178201885490917, "grad_norm": 1.03125, "learning_rate": 4.795059970659187e-05, "loss": 0.8995, "step": 1791 }, { "epoch": 0.13185559898827318, "grad_norm": 0.91796875, "learning_rate": 4.7948302579514906e-05, "loss": 0.7972, "step": 1792 }, { "epoch": 0.13192917912163715, "grad_norm": 0.74609375, "learning_rate": 4.794600422084589e-05, "loss": 0.7332, "step": 1793 }, { "epoch": 0.13200275925500116, "grad_norm": 1.0625, "learning_rate": 4.7943704630708163e-05, "loss": 1.256, "step": 1794 }, { "epoch": 0.13207633938836513, "grad_norm": 0.81640625, "learning_rate": 4.794140380922515e-05, "loss": 1.0226, "step": 1795 }, { "epoch": 0.13214991952172914, "grad_norm": 1.015625, "learning_rate": 4.793910175652032e-05, "loss": 0.8569, "step": 1796 }, { "epoch": 0.1322234996550931, "grad_norm": 0.97265625, "learning_rate": 4.793679847271723e-05, "loss": 1.2505, "step": 1797 }, { "epoch": 0.13229707978845712, "grad_norm": 0.671875, "learning_rate": 4.7934493957939485e-05, "loss": 0.8825, "step": 1798 }, { "epoch": 0.13237065992182112, "grad_norm": 0.88671875, "learning_rate": 4.793218821231077e-05, "loss": 0.9187, "step": 1799 }, { "epoch": 0.1324442400551851, "grad_norm": 0.9375, "learning_rate": 4.792988123595482e-05, "loss": 1.1352, "step": 1800 }, { "epoch": 0.1325178201885491, "grad_norm": 1.0, "learning_rate": 4.7927573028995453e-05, "loss": 1.1833, "step": 1801 }, { "epoch": 0.13259140032191308, "grad_norm": 0.84375, "learning_rate": 4.792526359155655e-05, "loss": 0.7917, "step": 1802 }, { "epoch": 0.13266498045527708, "grad_norm": 0.796875, "learning_rate": 4.792295292376204e-05, "loss": 0.9438, "step": 1803 }, { "epoch": 0.13273856058864106, "grad_norm": 0.75390625, "learning_rate": 4.792064102573595e-05, "loss": 0.6914, "step": 1804 }, { "epoch": 0.13281214072200506, "grad_norm": 0.79296875, "learning_rate": 4.7918327897602344e-05, "loss": 0.9107, "step": 1805 }, { "epoch": 0.13288572085536904, "grad_norm": 0.76953125, "learning_rate": 4.791601353948537e-05, "loss": 1.0726, "step": 1806 }, { "epoch": 0.13295930098873304, "grad_norm": 0.75390625, "learning_rate": 4.791369795150923e-05, "loss": 0.9249, "step": 1807 }, { "epoch": 0.13303288112209705, "grad_norm": 0.890625, "learning_rate": 4.7911381133798197e-05, "loss": 0.9071, "step": 1808 }, { "epoch": 0.13310646125546102, "grad_norm": 0.98046875, "learning_rate": 4.790906308647661e-05, "loss": 1.2928, "step": 1809 }, { "epoch": 0.13318004138882503, "grad_norm": 0.84375, "learning_rate": 4.7906743809668885e-05, "loss": 1.1867, "step": 1810 }, { "epoch": 0.133253621522189, "grad_norm": 0.734375, "learning_rate": 4.790442330349948e-05, "loss": 0.6382, "step": 1811 }, { "epoch": 0.133327201655553, "grad_norm": 0.86328125, "learning_rate": 4.7902101568092935e-05, "loss": 1.0395, "step": 1812 }, { "epoch": 0.13340078178891698, "grad_norm": 0.83203125, "learning_rate": 4.789977860357385e-05, "loss": 1.1669, "step": 1813 }, { "epoch": 0.133474361922281, "grad_norm": 1.09375, "learning_rate": 4.789745441006691e-05, "loss": 1.3609, "step": 1814 }, { "epoch": 0.13354794205564496, "grad_norm": 1.1328125, "learning_rate": 4.7895128987696834e-05, "loss": 0.9618, "step": 1815 }, { "epoch": 0.13362152218900897, "grad_norm": 0.7578125, "learning_rate": 4.7892802336588425e-05, "loss": 0.74, "step": 1816 }, { "epoch": 0.13369510232237297, "grad_norm": 0.91796875, "learning_rate": 4.789047445686656e-05, "loss": 0.9331, "step": 1817 }, { "epoch": 0.13376868245573695, "grad_norm": 0.98828125, "learning_rate": 4.788814534865615e-05, "loss": 0.9501, "step": 1818 }, { "epoch": 0.13384226258910095, "grad_norm": 1.171875, "learning_rate": 4.788581501208222e-05, "loss": 1.0475, "step": 1819 }, { "epoch": 0.13391584272246493, "grad_norm": 0.87890625, "learning_rate": 4.788348344726983e-05, "loss": 0.8627, "step": 1820 }, { "epoch": 0.13398942285582893, "grad_norm": 0.91796875, "learning_rate": 4.7881150654344093e-05, "loss": 0.8419, "step": 1821 }, { "epoch": 0.1340630029891929, "grad_norm": 1.0234375, "learning_rate": 4.787881663343022e-05, "loss": 0.8597, "step": 1822 }, { "epoch": 0.1341365831225569, "grad_norm": 0.93359375, "learning_rate": 4.787648138465347e-05, "loss": 1.136, "step": 1823 }, { "epoch": 0.1342101632559209, "grad_norm": 0.78515625, "learning_rate": 4.7874144908139175e-05, "loss": 0.7524, "step": 1824 }, { "epoch": 0.1342837433892849, "grad_norm": 0.80859375, "learning_rate": 4.787180720401272e-05, "loss": 0.9548, "step": 1825 }, { "epoch": 0.1343573235226489, "grad_norm": 0.9609375, "learning_rate": 4.7869468272399574e-05, "loss": 0.8754, "step": 1826 }, { "epoch": 0.13443090365601287, "grad_norm": 0.8046875, "learning_rate": 4.7867128113425265e-05, "loss": 0.8539, "step": 1827 }, { "epoch": 0.13450448378937688, "grad_norm": 0.75390625, "learning_rate": 4.786478672721537e-05, "loss": 0.7618, "step": 1828 }, { "epoch": 0.13457806392274085, "grad_norm": 1.0, "learning_rate": 4.7862444113895565e-05, "loss": 0.9876, "step": 1829 }, { "epoch": 0.13465164405610486, "grad_norm": 0.75, "learning_rate": 4.786010027359156e-05, "loss": 0.8211, "step": 1830 }, { "epoch": 0.13472522418946883, "grad_norm": 0.98046875, "learning_rate": 4.785775520642916e-05, "loss": 1.0766, "step": 1831 }, { "epoch": 0.13479880432283284, "grad_norm": 1.1484375, "learning_rate": 4.785540891253419e-05, "loss": 1.0317, "step": 1832 }, { "epoch": 0.13487238445619681, "grad_norm": 0.84765625, "learning_rate": 4.78530613920326e-05, "loss": 1.0577, "step": 1833 }, { "epoch": 0.13494596458956082, "grad_norm": 0.91796875, "learning_rate": 4.785071264505038e-05, "loss": 0.6501, "step": 1834 }, { "epoch": 0.13501954472292482, "grad_norm": 0.8046875, "learning_rate": 4.784836267171356e-05, "loss": 0.8578, "step": 1835 }, { "epoch": 0.1350931248562888, "grad_norm": 1.1328125, "learning_rate": 4.784601147214828e-05, "loss": 1.4335, "step": 1836 }, { "epoch": 0.1351667049896528, "grad_norm": 0.7578125, "learning_rate": 4.78436590464807e-05, "loss": 0.7884, "step": 1837 }, { "epoch": 0.13524028512301678, "grad_norm": 0.890625, "learning_rate": 4.7841305394837096e-05, "loss": 0.9455, "step": 1838 }, { "epoch": 0.13531386525638078, "grad_norm": 0.97265625, "learning_rate": 4.7838950517343774e-05, "loss": 0.9894, "step": 1839 }, { "epoch": 0.13538744538974476, "grad_norm": 0.72265625, "learning_rate": 4.78365944141271e-05, "loss": 0.7096, "step": 1840 }, { "epoch": 0.13546102552310876, "grad_norm": 0.890625, "learning_rate": 4.783423708531355e-05, "loss": 0.9349, "step": 1841 }, { "epoch": 0.13553460565647274, "grad_norm": 0.71484375, "learning_rate": 4.783187853102962e-05, "loss": 0.9142, "step": 1842 }, { "epoch": 0.13560818578983674, "grad_norm": 0.984375, "learning_rate": 4.7829518751401893e-05, "loss": 0.7758, "step": 1843 }, { "epoch": 0.13568176592320075, "grad_norm": 1.0546875, "learning_rate": 4.7827157746557e-05, "loss": 1.3119, "step": 1844 }, { "epoch": 0.13575534605656472, "grad_norm": 0.8828125, "learning_rate": 4.782479551662168e-05, "loss": 1.2775, "step": 1845 }, { "epoch": 0.13582892618992873, "grad_norm": 0.86328125, "learning_rate": 4.7822432061722693e-05, "loss": 0.9439, "step": 1846 }, { "epoch": 0.1359025063232927, "grad_norm": 0.9765625, "learning_rate": 4.7820067381986885e-05, "loss": 1.2515, "step": 1847 }, { "epoch": 0.1359760864566567, "grad_norm": 0.9453125, "learning_rate": 4.7817701477541154e-05, "loss": 0.8253, "step": 1848 }, { "epoch": 0.13604966659002068, "grad_norm": 0.92578125, "learning_rate": 4.781533434851249e-05, "loss": 0.6957, "step": 1849 }, { "epoch": 0.1361232467233847, "grad_norm": 0.99609375, "learning_rate": 4.7812965995027915e-05, "loss": 1.0547, "step": 1850 }, { "epoch": 0.13619682685674867, "grad_norm": 1.3359375, "learning_rate": 4.7810596417214543e-05, "loss": 1.3689, "step": 1851 }, { "epoch": 0.13627040699011267, "grad_norm": 0.8671875, "learning_rate": 4.780822561519955e-05, "loss": 1.0853, "step": 1852 }, { "epoch": 0.13634398712347667, "grad_norm": 0.84765625, "learning_rate": 4.780585358911016e-05, "loss": 0.9258, "step": 1853 }, { "epoch": 0.13641756725684065, "grad_norm": 0.734375, "learning_rate": 4.780348033907369e-05, "loss": 0.8984, "step": 1854 }, { "epoch": 0.13649114739020465, "grad_norm": 0.93359375, "learning_rate": 4.78011058652175e-05, "loss": 1.2386, "step": 1855 }, { "epoch": 0.13656472752356863, "grad_norm": 0.828125, "learning_rate": 4.7798730167669016e-05, "loss": 0.9408, "step": 1856 }, { "epoch": 0.13663830765693263, "grad_norm": 0.72265625, "learning_rate": 4.7796353246555746e-05, "loss": 0.6505, "step": 1857 }, { "epoch": 0.1367118877902966, "grad_norm": 1.25, "learning_rate": 4.779397510200525e-05, "loss": 1.2214, "step": 1858 }, { "epoch": 0.13678546792366061, "grad_norm": 0.96484375, "learning_rate": 4.7791595734145164e-05, "loss": 0.976, "step": 1859 }, { "epoch": 0.1368590480570246, "grad_norm": 0.9296875, "learning_rate": 4.7789215143103195e-05, "loss": 0.8904, "step": 1860 }, { "epoch": 0.1369326281903886, "grad_norm": 0.98828125, "learning_rate": 4.778683332900708e-05, "loss": 1.0398, "step": 1861 }, { "epoch": 0.1370062083237526, "grad_norm": 0.765625, "learning_rate": 4.7784450291984664e-05, "loss": 0.9263, "step": 1862 }, { "epoch": 0.13707978845711657, "grad_norm": 0.88671875, "learning_rate": 4.778206603216383e-05, "loss": 0.9906, "step": 1863 }, { "epoch": 0.13715336859048058, "grad_norm": 0.8125, "learning_rate": 4.777968054967254e-05, "loss": 0.9936, "step": 1864 }, { "epoch": 0.13722694872384456, "grad_norm": 0.82421875, "learning_rate": 4.777729384463882e-05, "loss": 0.7839, "step": 1865 }, { "epoch": 0.13730052885720856, "grad_norm": 0.9375, "learning_rate": 4.777490591719076e-05, "loss": 0.9634, "step": 1866 }, { "epoch": 0.13737410899057254, "grad_norm": 0.98828125, "learning_rate": 4.777251676745652e-05, "loss": 0.9631, "step": 1867 }, { "epoch": 0.13744768912393654, "grad_norm": 0.66796875, "learning_rate": 4.7770126395564315e-05, "loss": 0.595, "step": 1868 }, { "epoch": 0.13752126925730052, "grad_norm": 0.83984375, "learning_rate": 4.776773480164243e-05, "loss": 0.869, "step": 1869 }, { "epoch": 0.13759484939066452, "grad_norm": 0.8125, "learning_rate": 4.776534198581922e-05, "loss": 0.8312, "step": 1870 }, { "epoch": 0.13766842952402852, "grad_norm": 0.98828125, "learning_rate": 4.77629479482231e-05, "loss": 0.9036, "step": 1871 }, { "epoch": 0.1377420096573925, "grad_norm": 0.98828125, "learning_rate": 4.776055268898256e-05, "loss": 1.1004, "step": 1872 }, { "epoch": 0.1378155897907565, "grad_norm": 0.84765625, "learning_rate": 4.7758156208226156e-05, "loss": 0.7739, "step": 1873 }, { "epoch": 0.13788916992412048, "grad_norm": 1.1328125, "learning_rate": 4.775575850608248e-05, "loss": 0.9294, "step": 1874 }, { "epoch": 0.13796275005748448, "grad_norm": 0.8671875, "learning_rate": 4.775335958268023e-05, "loss": 0.7431, "step": 1875 }, { "epoch": 0.13803633019084846, "grad_norm": 0.9140625, "learning_rate": 4.7750959438148146e-05, "loss": 0.9908, "step": 1876 }, { "epoch": 0.13810991032421246, "grad_norm": 0.83203125, "learning_rate": 4.7748558072615034e-05, "loss": 0.9226, "step": 1877 }, { "epoch": 0.13818349045757644, "grad_norm": 0.7890625, "learning_rate": 4.7746155486209784e-05, "loss": 1.1121, "step": 1878 }, { "epoch": 0.13825707059094045, "grad_norm": 0.96484375, "learning_rate": 4.774375167906132e-05, "loss": 1.1865, "step": 1879 }, { "epoch": 0.13833065072430445, "grad_norm": 0.73828125, "learning_rate": 4.7741346651298665e-05, "loss": 0.825, "step": 1880 }, { "epoch": 0.13840423085766843, "grad_norm": 0.7890625, "learning_rate": 4.773894040305089e-05, "loss": 0.6942, "step": 1881 }, { "epoch": 0.13847781099103243, "grad_norm": 0.92578125, "learning_rate": 4.7736532934447134e-05, "loss": 0.8538, "step": 1882 }, { "epoch": 0.1385513911243964, "grad_norm": 0.6171875, "learning_rate": 4.77341242456166e-05, "loss": 0.5544, "step": 1883 }, { "epoch": 0.1386249712577604, "grad_norm": 1.140625, "learning_rate": 4.773171433668855e-05, "loss": 0.9721, "step": 1884 }, { "epoch": 0.1386985513911244, "grad_norm": 0.7109375, "learning_rate": 4.772930320779232e-05, "loss": 0.7255, "step": 1885 }, { "epoch": 0.1387721315244884, "grad_norm": 0.796875, "learning_rate": 4.772689085905733e-05, "loss": 0.6076, "step": 1886 }, { "epoch": 0.13884571165785237, "grad_norm": 0.98046875, "learning_rate": 4.7724477290613024e-05, "loss": 1.1983, "step": 1887 }, { "epoch": 0.13891929179121637, "grad_norm": 0.92578125, "learning_rate": 4.772206250258894e-05, "loss": 1.039, "step": 1888 }, { "epoch": 0.13899287192458037, "grad_norm": 0.71875, "learning_rate": 4.771964649511469e-05, "loss": 0.7667, "step": 1889 }, { "epoch": 0.13906645205794435, "grad_norm": 0.69140625, "learning_rate": 4.771722926831991e-05, "loss": 0.7998, "step": 1890 }, { "epoch": 0.13914003219130835, "grad_norm": 1.0546875, "learning_rate": 4.771481082233434e-05, "loss": 1.4976, "step": 1891 }, { "epoch": 0.13921361232467233, "grad_norm": 0.828125, "learning_rate": 4.771239115728779e-05, "loss": 0.8863, "step": 1892 }, { "epoch": 0.13928719245803634, "grad_norm": 0.796875, "learning_rate": 4.7709970273310095e-05, "loss": 0.7846, "step": 1893 }, { "epoch": 0.1393607725914003, "grad_norm": 0.84765625, "learning_rate": 4.770754817053119e-05, "loss": 0.9603, "step": 1894 }, { "epoch": 0.13943435272476432, "grad_norm": 1.0859375, "learning_rate": 4.7705124849081063e-05, "loss": 1.2508, "step": 1895 }, { "epoch": 0.1395079328581283, "grad_norm": 0.87890625, "learning_rate": 4.7702700309089776e-05, "loss": 0.6949, "step": 1896 }, { "epoch": 0.1395815129914923, "grad_norm": 0.9921875, "learning_rate": 4.770027455068743e-05, "loss": 1.0184, "step": 1897 }, { "epoch": 0.1396550931248563, "grad_norm": 0.92578125, "learning_rate": 4.7697847574004234e-05, "loss": 1.1851, "step": 1898 }, { "epoch": 0.13972867325822028, "grad_norm": 0.984375, "learning_rate": 4.7695419379170426e-05, "loss": 1.2688, "step": 1899 }, { "epoch": 0.13980225339158428, "grad_norm": 0.98046875, "learning_rate": 4.7692989966316324e-05, "loss": 0.8647, "step": 1900 }, { "epoch": 0.13987583352494826, "grad_norm": 0.90234375, "learning_rate": 4.769055933557231e-05, "loss": 1.0615, "step": 1901 }, { "epoch": 0.13994941365831226, "grad_norm": 0.7890625, "learning_rate": 4.7688127487068836e-05, "loss": 1.0232, "step": 1902 }, { "epoch": 0.14002299379167624, "grad_norm": 0.9296875, "learning_rate": 4.768569442093641e-05, "loss": 1.0361, "step": 1903 }, { "epoch": 0.14009657392504024, "grad_norm": 0.8515625, "learning_rate": 4.7683260137305615e-05, "loss": 0.9985, "step": 1904 }, { "epoch": 0.14017015405840422, "grad_norm": 1.0703125, "learning_rate": 4.7680824636307086e-05, "loss": 1.0015, "step": 1905 }, { "epoch": 0.14024373419176822, "grad_norm": 0.86328125, "learning_rate": 4.767838791807154e-05, "loss": 0.8507, "step": 1906 }, { "epoch": 0.14031731432513223, "grad_norm": 0.8828125, "learning_rate": 4.767594998272974e-05, "loss": 0.9166, "step": 1907 }, { "epoch": 0.1403908944584962, "grad_norm": 0.875, "learning_rate": 4.7673510830412546e-05, "loss": 1.0552, "step": 1908 }, { "epoch": 0.1404644745918602, "grad_norm": 0.8046875, "learning_rate": 4.7671070461250846e-05, "loss": 1.0193, "step": 1909 }, { "epoch": 0.14053805472522418, "grad_norm": 1.0859375, "learning_rate": 4.766862887537561e-05, "loss": 1.2958, "step": 1910 }, { "epoch": 0.14061163485858819, "grad_norm": 0.8671875, "learning_rate": 4.766618607291787e-05, "loss": 0.8358, "step": 1911 }, { "epoch": 0.14068521499195216, "grad_norm": 0.89453125, "learning_rate": 4.766374205400875e-05, "loss": 1.1922, "step": 1912 }, { "epoch": 0.14075879512531617, "grad_norm": 1.171875, "learning_rate": 4.766129681877939e-05, "loss": 1.3769, "step": 1913 }, { "epoch": 0.14083237525868014, "grad_norm": 0.98828125, "learning_rate": 4.765885036736104e-05, "loss": 0.8358, "step": 1914 }, { "epoch": 0.14090595539204415, "grad_norm": 1.046875, "learning_rate": 4.765640269988497e-05, "loss": 1.398, "step": 1915 }, { "epoch": 0.14097953552540815, "grad_norm": 0.85546875, "learning_rate": 4.7653953816482576e-05, "loss": 1.0017, "step": 1916 }, { "epoch": 0.14105311565877213, "grad_norm": 0.86328125, "learning_rate": 4.7651503717285265e-05, "loss": 0.8333, "step": 1917 }, { "epoch": 0.14112669579213613, "grad_norm": 0.92578125, "learning_rate": 4.764905240242452e-05, "loss": 0.8153, "step": 1918 }, { "epoch": 0.1412002759255001, "grad_norm": 0.92578125, "learning_rate": 4.764659987203192e-05, "loss": 1.147, "step": 1919 }, { "epoch": 0.1412738560588641, "grad_norm": 0.84765625, "learning_rate": 4.764414612623907e-05, "loss": 0.7301, "step": 1920 }, { "epoch": 0.1413474361922281, "grad_norm": 0.9375, "learning_rate": 4.764169116517768e-05, "loss": 0.8859, "step": 1921 }, { "epoch": 0.1414210163255921, "grad_norm": 0.70703125, "learning_rate": 4.763923498897948e-05, "loss": 0.6014, "step": 1922 }, { "epoch": 0.1414945964589561, "grad_norm": 1.3125, "learning_rate": 4.7636777597776306e-05, "loss": 1.2059, "step": 1923 }, { "epoch": 0.14156817659232007, "grad_norm": 0.875, "learning_rate": 4.763431899170002e-05, "loss": 0.791, "step": 1924 }, { "epoch": 0.14164175672568408, "grad_norm": 0.80859375, "learning_rate": 4.76318591708826e-05, "loss": 0.7207, "step": 1925 }, { "epoch": 0.14171533685904805, "grad_norm": 0.80078125, "learning_rate": 4.7629398135456035e-05, "loss": 0.8129, "step": 1926 }, { "epoch": 0.14178891699241206, "grad_norm": 0.890625, "learning_rate": 4.7626935885552406e-05, "loss": 0.8824, "step": 1927 }, { "epoch": 0.14186249712577603, "grad_norm": 1.25, "learning_rate": 4.7624472421303876e-05, "loss": 1.1402, "step": 1928 }, { "epoch": 0.14193607725914004, "grad_norm": 0.77734375, "learning_rate": 4.7622007742842635e-05, "loss": 0.9439, "step": 1929 }, { "epoch": 0.142009657392504, "grad_norm": 0.97265625, "learning_rate": 4.7619541850300976e-05, "loss": 0.9133, "step": 1930 }, { "epoch": 0.14208323752586802, "grad_norm": 1.1875, "learning_rate": 4.761707474381122e-05, "loss": 1.0942, "step": 1931 }, { "epoch": 0.14215681765923202, "grad_norm": 1.0625, "learning_rate": 4.7614606423505773e-05, "loss": 1.0403, "step": 1932 }, { "epoch": 0.142230397792596, "grad_norm": 0.8671875, "learning_rate": 4.761213688951712e-05, "loss": 0.9698, "step": 1933 }, { "epoch": 0.14230397792596, "grad_norm": 0.8125, "learning_rate": 4.760966614197779e-05, "loss": 0.8362, "step": 1934 }, { "epoch": 0.14237755805932398, "grad_norm": 1.0625, "learning_rate": 4.760719418102038e-05, "loss": 1.0727, "step": 1935 }, { "epoch": 0.14245113819268798, "grad_norm": 0.79296875, "learning_rate": 4.760472100677755e-05, "loss": 0.9647, "step": 1936 }, { "epoch": 0.14252471832605196, "grad_norm": 0.9453125, "learning_rate": 4.760224661938205e-05, "loss": 0.9848, "step": 1937 }, { "epoch": 0.14259829845941596, "grad_norm": 0.9765625, "learning_rate": 4.759977101896666e-05, "loss": 1.0689, "step": 1938 }, { "epoch": 0.14267187859277994, "grad_norm": 0.93359375, "learning_rate": 4.7597294205664244e-05, "loss": 1.0485, "step": 1939 }, { "epoch": 0.14274545872614394, "grad_norm": 0.91015625, "learning_rate": 4.759481617960772e-05, "loss": 1.0691, "step": 1940 }, { "epoch": 0.14281903885950795, "grad_norm": 0.80859375, "learning_rate": 4.75923369409301e-05, "loss": 0.9305, "step": 1941 }, { "epoch": 0.14289261899287192, "grad_norm": 1.1328125, "learning_rate": 4.7589856489764414e-05, "loss": 1.23, "step": 1942 }, { "epoch": 0.14296619912623593, "grad_norm": 1.125, "learning_rate": 4.758737482624381e-05, "loss": 1.4511, "step": 1943 }, { "epoch": 0.1430397792595999, "grad_norm": 1.046875, "learning_rate": 4.758489195050145e-05, "loss": 0.85, "step": 1944 }, { "epoch": 0.1431133593929639, "grad_norm": 0.86328125, "learning_rate": 4.758240786267061e-05, "loss": 0.8774, "step": 1945 }, { "epoch": 0.14318693952632788, "grad_norm": 0.84375, "learning_rate": 4.757992256288458e-05, "loss": 0.9928, "step": 1946 }, { "epoch": 0.1432605196596919, "grad_norm": 0.828125, "learning_rate": 4.7577436051276754e-05, "loss": 1.0241, "step": 1947 }, { "epoch": 0.14333409979305586, "grad_norm": 1.1640625, "learning_rate": 4.757494832798057e-05, "loss": 0.9499, "step": 1948 }, { "epoch": 0.14340767992641987, "grad_norm": 0.9921875, "learning_rate": 4.7572459393129567e-05, "loss": 1.0906, "step": 1949 }, { "epoch": 0.14348126005978387, "grad_norm": 0.921875, "learning_rate": 4.7569969246857285e-05, "loss": 0.8805, "step": 1950 }, { "epoch": 0.14355484019314785, "grad_norm": 0.78515625, "learning_rate": 4.756747788929739e-05, "loss": 0.9583, "step": 1951 }, { "epoch": 0.14362842032651185, "grad_norm": 1.0, "learning_rate": 4.7564985320583575e-05, "loss": 1.0505, "step": 1952 }, { "epoch": 0.14370200045987583, "grad_norm": 0.76171875, "learning_rate": 4.756249154084963e-05, "loss": 0.9484, "step": 1953 }, { "epoch": 0.14377558059323983, "grad_norm": 0.91015625, "learning_rate": 4.755999655022937e-05, "loss": 1.1815, "step": 1954 }, { "epoch": 0.1438491607266038, "grad_norm": 2.46875, "learning_rate": 4.75575003488567e-05, "loss": 0.9117, "step": 1955 }, { "epoch": 0.1439227408599678, "grad_norm": 0.703125, "learning_rate": 4.75550029368656e-05, "loss": 0.7588, "step": 1956 }, { "epoch": 0.1439963209933318, "grad_norm": 0.828125, "learning_rate": 4.755250431439009e-05, "loss": 0.6512, "step": 1957 }, { "epoch": 0.1440699011266958, "grad_norm": 0.96875, "learning_rate": 4.7550004481564266e-05, "loss": 1.0559, "step": 1958 }, { "epoch": 0.1441434812600598, "grad_norm": 0.9375, "learning_rate": 4.754750343852229e-05, "loss": 0.932, "step": 1959 }, { "epoch": 0.14421706139342377, "grad_norm": 0.8515625, "learning_rate": 4.75450011853984e-05, "loss": 0.8623, "step": 1960 }, { "epoch": 0.14429064152678778, "grad_norm": 0.9140625, "learning_rate": 4.7542497722326874e-05, "loss": 0.6626, "step": 1961 }, { "epoch": 0.14436422166015175, "grad_norm": 1.0546875, "learning_rate": 4.753999304944207e-05, "loss": 1.1838, "step": 1962 }, { "epoch": 0.14443780179351576, "grad_norm": 1.0546875, "learning_rate": 4.753748716687841e-05, "loss": 0.9884, "step": 1963 }, { "epoch": 0.14451138192687973, "grad_norm": 0.75, "learning_rate": 4.753498007477038e-05, "loss": 0.8274, "step": 1964 }, { "epoch": 0.14458496206024374, "grad_norm": 0.98046875, "learning_rate": 4.7532471773252535e-05, "loss": 1.5581, "step": 1965 }, { "epoch": 0.14465854219360771, "grad_norm": 0.8125, "learning_rate": 4.7529962262459484e-05, "loss": 0.856, "step": 1966 }, { "epoch": 0.14473212232697172, "grad_norm": 0.82421875, "learning_rate": 4.752745154252591e-05, "loss": 0.8135, "step": 1967 }, { "epoch": 0.14480570246033572, "grad_norm": 1.1171875, "learning_rate": 4.752493961358657e-05, "loss": 1.2011, "step": 1968 }, { "epoch": 0.1448792825936997, "grad_norm": 0.90234375, "learning_rate": 4.752242647577626e-05, "loss": 0.7709, "step": 1969 }, { "epoch": 0.1449528627270637, "grad_norm": 0.9140625, "learning_rate": 4.751991212922986e-05, "loss": 0.8145, "step": 1970 }, { "epoch": 0.14502644286042768, "grad_norm": 0.99609375, "learning_rate": 4.751739657408231e-05, "loss": 1.091, "step": 1971 }, { "epoch": 0.14510002299379168, "grad_norm": 0.9296875, "learning_rate": 4.751487981046861e-05, "loss": 1.2284, "step": 1972 }, { "epoch": 0.14517360312715566, "grad_norm": 2.59375, "learning_rate": 4.751236183852385e-05, "loss": 1.9465, "step": 1973 }, { "epoch": 0.14524718326051966, "grad_norm": 0.8125, "learning_rate": 4.750984265838313e-05, "loss": 0.9955, "step": 1974 }, { "epoch": 0.14532076339388364, "grad_norm": 0.828125, "learning_rate": 4.750732227018168e-05, "loss": 1.0679, "step": 1975 }, { "epoch": 0.14539434352724764, "grad_norm": 1.0078125, "learning_rate": 4.750480067405476e-05, "loss": 1.0622, "step": 1976 }, { "epoch": 0.14546792366061165, "grad_norm": 1.0234375, "learning_rate": 4.750227787013768e-05, "loss": 1.7516, "step": 1977 }, { "epoch": 0.14554150379397562, "grad_norm": 0.8046875, "learning_rate": 4.749975385856586e-05, "loss": 0.8107, "step": 1978 }, { "epoch": 0.14561508392733963, "grad_norm": 0.76953125, "learning_rate": 4.749722863947473e-05, "loss": 0.5357, "step": 1979 }, { "epoch": 0.1456886640607036, "grad_norm": 0.7578125, "learning_rate": 4.749470221299984e-05, "loss": 0.8121, "step": 1980 }, { "epoch": 0.1457622441940676, "grad_norm": 0.8984375, "learning_rate": 4.7492174579276774e-05, "loss": 0.9148, "step": 1981 }, { "epoch": 0.14583582432743158, "grad_norm": 1.0859375, "learning_rate": 4.748964573844118e-05, "loss": 1.3749, "step": 1982 }, { "epoch": 0.1459094044607956, "grad_norm": 0.82421875, "learning_rate": 4.748711569062877e-05, "loss": 0.7866, "step": 1983 }, { "epoch": 0.14598298459415956, "grad_norm": 0.828125, "learning_rate": 4.748458443597533e-05, "loss": 0.9804, "step": 1984 }, { "epoch": 0.14605656472752357, "grad_norm": 0.8671875, "learning_rate": 4.748205197461671e-05, "loss": 0.8009, "step": 1985 }, { "epoch": 0.14613014486088757, "grad_norm": 0.72265625, "learning_rate": 4.747951830668884e-05, "loss": 1.013, "step": 1986 }, { "epoch": 0.14620372499425155, "grad_norm": 0.92578125, "learning_rate": 4.747698343232766e-05, "loss": 1.1268, "step": 1987 }, { "epoch": 0.14627730512761555, "grad_norm": 0.8671875, "learning_rate": 4.7474447351669244e-05, "loss": 1.0835, "step": 1988 }, { "epoch": 0.14635088526097953, "grad_norm": 0.94921875, "learning_rate": 4.7471910064849685e-05, "loss": 1.0355, "step": 1989 }, { "epoch": 0.14642446539434353, "grad_norm": 1.0546875, "learning_rate": 4.746937157200515e-05, "loss": 1.3579, "step": 1990 }, { "epoch": 0.1464980455277075, "grad_norm": 0.9296875, "learning_rate": 4.746683187327189e-05, "loss": 1.1034, "step": 1991 }, { "epoch": 0.14657162566107151, "grad_norm": 0.76953125, "learning_rate": 4.746429096878619e-05, "loss": 0.7161, "step": 1992 }, { "epoch": 0.1466452057944355, "grad_norm": 0.8828125, "learning_rate": 4.746174885868443e-05, "loss": 1.0823, "step": 1993 }, { "epoch": 0.1467187859277995, "grad_norm": 0.7578125, "learning_rate": 4.7459205543103026e-05, "loss": 0.7984, "step": 1994 }, { "epoch": 0.1467923660611635, "grad_norm": 0.8671875, "learning_rate": 4.745666102217848e-05, "loss": 0.8845, "step": 1995 }, { "epoch": 0.14686594619452747, "grad_norm": 1.046875, "learning_rate": 4.745411529604736e-05, "loss": 1.076, "step": 1996 }, { "epoch": 0.14693952632789148, "grad_norm": 0.96484375, "learning_rate": 4.745156836484627e-05, "loss": 1.0053, "step": 1997 }, { "epoch": 0.14701310646125546, "grad_norm": 0.8203125, "learning_rate": 4.744902022871192e-05, "loss": 0.7948, "step": 1998 }, { "epoch": 0.14708668659461946, "grad_norm": 0.90234375, "learning_rate": 4.744647088778105e-05, "loss": 1.0067, "step": 1999 }, { "epoch": 0.14716026672798344, "grad_norm": 0.859375, "learning_rate": 4.7443920342190485e-05, "loss": 1.0513, "step": 2000 }, { "epoch": 0.14723384686134744, "grad_norm": 0.80078125, "learning_rate": 4.744136859207711e-05, "loss": 0.7567, "step": 2001 }, { "epoch": 0.14730742699471142, "grad_norm": 0.96484375, "learning_rate": 4.743881563757786e-05, "loss": 0.9063, "step": 2002 }, { "epoch": 0.14738100712807542, "grad_norm": 1.0390625, "learning_rate": 4.7436261478829756e-05, "loss": 1.4177, "step": 2003 }, { "epoch": 0.14745458726143942, "grad_norm": 0.72265625, "learning_rate": 4.743370611596988e-05, "loss": 0.6505, "step": 2004 }, { "epoch": 0.1475281673948034, "grad_norm": 0.8984375, "learning_rate": 4.7431149549135364e-05, "loss": 0.6567, "step": 2005 }, { "epoch": 0.1476017475281674, "grad_norm": 0.80078125, "learning_rate": 4.742859177846342e-05, "loss": 0.9314, "step": 2006 }, { "epoch": 0.14767532766153138, "grad_norm": 0.80859375, "learning_rate": 4.7426032804091315e-05, "loss": 0.8187, "step": 2007 }, { "epoch": 0.14774890779489538, "grad_norm": 0.90234375, "learning_rate": 4.742347262615639e-05, "loss": 1.0127, "step": 2008 }, { "epoch": 0.14782248792825936, "grad_norm": 1.5390625, "learning_rate": 4.742091124479604e-05, "loss": 0.562, "step": 2009 }, { "epoch": 0.14789606806162336, "grad_norm": 0.99609375, "learning_rate": 4.741834866014773e-05, "loss": 1.4887, "step": 2010 }, { "epoch": 0.14796964819498734, "grad_norm": 0.83984375, "learning_rate": 4.741578487234899e-05, "loss": 1.1488, "step": 2011 }, { "epoch": 0.14804322832835135, "grad_norm": 1.109375, "learning_rate": 4.7413219881537404e-05, "loss": 1.2171, "step": 2012 }, { "epoch": 0.14811680846171535, "grad_norm": 1.0078125, "learning_rate": 4.741065368785066e-05, "loss": 1.8913, "step": 2013 }, { "epoch": 0.14819038859507933, "grad_norm": 0.78515625, "learning_rate": 4.740808629142645e-05, "loss": 0.7099, "step": 2014 }, { "epoch": 0.14826396872844333, "grad_norm": 0.8515625, "learning_rate": 4.740551769240256e-05, "loss": 1.0766, "step": 2015 }, { "epoch": 0.1483375488618073, "grad_norm": 0.734375, "learning_rate": 4.7402947890916865e-05, "loss": 0.7768, "step": 2016 }, { "epoch": 0.1484111289951713, "grad_norm": 1.2421875, "learning_rate": 4.740037688710727e-05, "loss": 1.2981, "step": 2017 }, { "epoch": 0.14848470912853529, "grad_norm": 0.81640625, "learning_rate": 4.739780468111175e-05, "loss": 1.0663, "step": 2018 }, { "epoch": 0.1485582892618993, "grad_norm": 0.96875, "learning_rate": 4.739523127306837e-05, "loss": 0.8822, "step": 2019 }, { "epoch": 0.14863186939526327, "grad_norm": 0.7265625, "learning_rate": 4.739265666311521e-05, "loss": 0.7141, "step": 2020 }, { "epoch": 0.14870544952862727, "grad_norm": 2.453125, "learning_rate": 4.739008085139046e-05, "loss": 1.0347, "step": 2021 }, { "epoch": 0.14877902966199127, "grad_norm": 0.796875, "learning_rate": 4.738750383803237e-05, "loss": 1.0854, "step": 2022 }, { "epoch": 0.14885260979535525, "grad_norm": 0.81640625, "learning_rate": 4.738492562317923e-05, "loss": 0.8448, "step": 2023 }, { "epoch": 0.14892618992871925, "grad_norm": 0.8125, "learning_rate": 4.7382346206969405e-05, "loss": 1.1947, "step": 2024 }, { "epoch": 0.14899977006208323, "grad_norm": 0.828125, "learning_rate": 4.737976558954135e-05, "loss": 0.8409, "step": 2025 }, { "epoch": 0.14907335019544724, "grad_norm": 0.84765625, "learning_rate": 4.737718377103353e-05, "loss": 0.8744, "step": 2026 }, { "epoch": 0.1491469303288112, "grad_norm": 2.09375, "learning_rate": 4.737460075158452e-05, "loss": 0.8834, "step": 2027 }, { "epoch": 0.14922051046217522, "grad_norm": 0.796875, "learning_rate": 4.737201653133295e-05, "loss": 0.7441, "step": 2028 }, { "epoch": 0.1492940905955392, "grad_norm": 1.078125, "learning_rate": 4.736943111041752e-05, "loss": 1.0531, "step": 2029 }, { "epoch": 0.1493676707289032, "grad_norm": 0.91015625, "learning_rate": 4.736684448897696e-05, "loss": 1.1605, "step": 2030 }, { "epoch": 0.1494412508622672, "grad_norm": 0.8828125, "learning_rate": 4.7364256667150095e-05, "loss": 0.8815, "step": 2031 }, { "epoch": 0.14951483099563118, "grad_norm": 1.0859375, "learning_rate": 4.736166764507583e-05, "loss": 1.0073, "step": 2032 }, { "epoch": 0.14958841112899518, "grad_norm": 0.84765625, "learning_rate": 4.7359077422893094e-05, "loss": 0.9168, "step": 2033 }, { "epoch": 0.14966199126235916, "grad_norm": 0.92578125, "learning_rate": 4.73564860007409e-05, "loss": 1.0928, "step": 2034 }, { "epoch": 0.14973557139572316, "grad_norm": 0.796875, "learning_rate": 4.7353893378758326e-05, "loss": 0.9621, "step": 2035 }, { "epoch": 0.14980915152908714, "grad_norm": 0.984375, "learning_rate": 4.7351299557084515e-05, "loss": 1.1565, "step": 2036 }, { "epoch": 0.14988273166245114, "grad_norm": 2.28125, "learning_rate": 4.734870453585867e-05, "loss": 0.9596, "step": 2037 }, { "epoch": 0.14995631179581512, "grad_norm": 0.8359375, "learning_rate": 4.734610831522007e-05, "loss": 0.8283, "step": 2038 }, { "epoch": 0.15002989192917912, "grad_norm": 0.67578125, "learning_rate": 4.7343510895308044e-05, "loss": 0.6312, "step": 2039 }, { "epoch": 0.15010347206254313, "grad_norm": 1.0078125, "learning_rate": 4.734091227626198e-05, "loss": 1.0671, "step": 2040 }, { "epoch": 0.1501770521959071, "grad_norm": 1.015625, "learning_rate": 4.733831245822136e-05, "loss": 1.1031, "step": 2041 }, { "epoch": 0.1502506323292711, "grad_norm": 0.8203125, "learning_rate": 4.733571144132569e-05, "loss": 0.8168, "step": 2042 }, { "epoch": 0.15032421246263508, "grad_norm": 0.90234375, "learning_rate": 4.733310922571458e-05, "loss": 1.0221, "step": 2043 }, { "epoch": 0.15039779259599909, "grad_norm": 0.80859375, "learning_rate": 4.733050581152768e-05, "loss": 0.8938, "step": 2044 }, { "epoch": 0.15047137272936306, "grad_norm": 0.96484375, "learning_rate": 4.732790119890471e-05, "loss": 1.0355, "step": 2045 }, { "epoch": 0.15054495286272707, "grad_norm": 0.78125, "learning_rate": 4.732529538798545e-05, "loss": 0.6696, "step": 2046 }, { "epoch": 0.15061853299609104, "grad_norm": 0.87109375, "learning_rate": 4.7322688378909754e-05, "loss": 0.8844, "step": 2047 }, { "epoch": 0.15069211312945505, "grad_norm": 0.796875, "learning_rate": 4.7320080171817536e-05, "loss": 0.9323, "step": 2048 }, { "epoch": 0.15076569326281905, "grad_norm": 0.90625, "learning_rate": 4.731747076684877e-05, "loss": 1.1546, "step": 2049 }, { "epoch": 0.15083927339618303, "grad_norm": 2.0625, "learning_rate": 4.731486016414351e-05, "loss": 0.8309, "step": 2050 }, { "epoch": 0.15091285352954703, "grad_norm": 1.0703125, "learning_rate": 4.731224836384184e-05, "loss": 1.1214, "step": 2051 }, { "epoch": 0.150986433662911, "grad_norm": 0.82421875, "learning_rate": 4.730963536608394e-05, "loss": 1.1951, "step": 2052 }, { "epoch": 0.151060013796275, "grad_norm": 0.98046875, "learning_rate": 4.7307021171010054e-05, "loss": 0.8857, "step": 2053 }, { "epoch": 0.151133593929639, "grad_norm": 1.046875, "learning_rate": 4.7304405778760466e-05, "loss": 1.3764, "step": 2054 }, { "epoch": 0.151207174063003, "grad_norm": 0.9453125, "learning_rate": 4.7301789189475556e-05, "loss": 1.0744, "step": 2055 }, { "epoch": 0.15128075419636697, "grad_norm": 1.140625, "learning_rate": 4.729917140329574e-05, "loss": 1.1497, "step": 2056 }, { "epoch": 0.15135433432973097, "grad_norm": 0.95703125, "learning_rate": 4.7296552420361505e-05, "loss": 1.4336, "step": 2057 }, { "epoch": 0.15142791446309498, "grad_norm": 0.9296875, "learning_rate": 4.7293932240813424e-05, "loss": 0.8087, "step": 2058 }, { "epoch": 0.15150149459645895, "grad_norm": 0.75390625, "learning_rate": 4.7291310864792104e-05, "loss": 0.7935, "step": 2059 }, { "epoch": 0.15157507472982296, "grad_norm": 0.79296875, "learning_rate": 4.728868829243823e-05, "loss": 0.5489, "step": 2060 }, { "epoch": 0.15164865486318693, "grad_norm": 1.0625, "learning_rate": 4.728606452389255e-05, "loss": 0.9969, "step": 2061 }, { "epoch": 0.15172223499655094, "grad_norm": 0.84375, "learning_rate": 4.7283439559295884e-05, "loss": 0.8286, "step": 2062 }, { "epoch": 0.1517958151299149, "grad_norm": 0.9140625, "learning_rate": 4.72808133987891e-05, "loss": 0.9153, "step": 2063 }, { "epoch": 0.15186939526327892, "grad_norm": 0.8671875, "learning_rate": 4.727818604251315e-05, "loss": 0.955, "step": 2064 }, { "epoch": 0.1519429753966429, "grad_norm": 0.9375, "learning_rate": 4.7275557490609026e-05, "loss": 0.8011, "step": 2065 }, { "epoch": 0.1520165555300069, "grad_norm": 0.859375, "learning_rate": 4.727292774321781e-05, "loss": 1.0804, "step": 2066 }, { "epoch": 0.1520901356633709, "grad_norm": 0.79296875, "learning_rate": 4.727029680048063e-05, "loss": 0.7592, "step": 2067 }, { "epoch": 0.15216371579673488, "grad_norm": 0.828125, "learning_rate": 4.726766466253867e-05, "loss": 1.0333, "step": 2068 }, { "epoch": 0.15223729593009888, "grad_norm": 1.0234375, "learning_rate": 4.7265031329533215e-05, "loss": 1.2509, "step": 2069 }, { "epoch": 0.15231087606346286, "grad_norm": 0.8984375, "learning_rate": 4.726239680160559e-05, "loss": 1.1099, "step": 2070 }, { "epoch": 0.15238445619682686, "grad_norm": 0.875, "learning_rate": 4.7259761078897166e-05, "loss": 0.9803, "step": 2071 }, { "epoch": 0.15245803633019084, "grad_norm": 0.8671875, "learning_rate": 4.72571241615494e-05, "loss": 0.9686, "step": 2072 }, { "epoch": 0.15253161646355484, "grad_norm": 0.8125, "learning_rate": 4.7254486049703834e-05, "loss": 0.9403, "step": 2073 }, { "epoch": 0.15260519659691882, "grad_norm": 0.80078125, "learning_rate": 4.725184674350203e-05, "loss": 1.0394, "step": 2074 }, { "epoch": 0.15267877673028282, "grad_norm": 0.90625, "learning_rate": 4.724920624308563e-05, "loss": 0.6761, "step": 2075 }, { "epoch": 0.15275235686364683, "grad_norm": 0.8046875, "learning_rate": 4.724656454859636e-05, "loss": 1.0326, "step": 2076 }, { "epoch": 0.1528259369970108, "grad_norm": 0.7734375, "learning_rate": 4.7243921660175996e-05, "loss": 0.7864, "step": 2077 }, { "epoch": 0.1528995171303748, "grad_norm": 1.0625, "learning_rate": 4.724127757796636e-05, "loss": 0.9623, "step": 2078 }, { "epoch": 0.15297309726373878, "grad_norm": 0.99609375, "learning_rate": 4.7238632302109364e-05, "loss": 1.1846, "step": 2079 }, { "epoch": 0.1530466773971028, "grad_norm": 0.7421875, "learning_rate": 4.7235985832746976e-05, "loss": 0.8224, "step": 2080 }, { "epoch": 0.15312025753046676, "grad_norm": 0.953125, "learning_rate": 4.723333817002123e-05, "loss": 0.9363, "step": 2081 }, { "epoch": 0.15319383766383077, "grad_norm": 0.84375, "learning_rate": 4.723068931407422e-05, "loss": 0.9264, "step": 2082 }, { "epoch": 0.15326741779719474, "grad_norm": 0.81640625, "learning_rate": 4.72280392650481e-05, "loss": 1.0241, "step": 2083 }, { "epoch": 0.15334099793055875, "grad_norm": 1.1640625, "learning_rate": 4.722538802308508e-05, "loss": 1.3347, "step": 2084 }, { "epoch": 0.15341457806392275, "grad_norm": 0.99609375, "learning_rate": 4.722273558832748e-05, "loss": 1.1008, "step": 2085 }, { "epoch": 0.15348815819728673, "grad_norm": 0.79296875, "learning_rate": 4.7220081960917625e-05, "loss": 0.9284, "step": 2086 }, { "epoch": 0.15356173833065073, "grad_norm": 1.0859375, "learning_rate": 4.721742714099795e-05, "loss": 1.1102, "step": 2087 }, { "epoch": 0.1536353184640147, "grad_norm": 1.5859375, "learning_rate": 4.721477112871091e-05, "loss": 1.1702, "step": 2088 }, { "epoch": 0.1537088985973787, "grad_norm": 0.86328125, "learning_rate": 4.721211392419907e-05, "loss": 0.9287, "step": 2089 }, { "epoch": 0.1537824787307427, "grad_norm": 0.8828125, "learning_rate": 4.720945552760503e-05, "loss": 1.1566, "step": 2090 }, { "epoch": 0.1538560588641067, "grad_norm": 1.25, "learning_rate": 4.720679593907145e-05, "loss": 1.5028, "step": 2091 }, { "epoch": 0.15392963899747067, "grad_norm": 0.7890625, "learning_rate": 4.720413515874108e-05, "loss": 0.6543, "step": 2092 }, { "epoch": 0.15400321913083467, "grad_norm": 0.94921875, "learning_rate": 4.7201473186756716e-05, "loss": 0.9032, "step": 2093 }, { "epoch": 0.15407679926419868, "grad_norm": 0.93359375, "learning_rate": 4.719881002326121e-05, "loss": 0.7149, "step": 2094 }, { "epoch": 0.15415037939756265, "grad_norm": 0.77734375, "learning_rate": 4.7196145668397504e-05, "loss": 0.7797, "step": 2095 }, { "epoch": 0.15422395953092666, "grad_norm": 0.77734375, "learning_rate": 4.719348012230859e-05, "loss": 0.6621, "step": 2096 }, { "epoch": 0.15429753966429063, "grad_norm": 0.890625, "learning_rate": 4.7190813385137503e-05, "loss": 1.0652, "step": 2097 }, { "epoch": 0.15437111979765464, "grad_norm": 0.97265625, "learning_rate": 4.718814545702738e-05, "loss": 0.9207, "step": 2098 }, { "epoch": 0.15444469993101861, "grad_norm": 0.8046875, "learning_rate": 4.7185476338121395e-05, "loss": 1.0707, "step": 2099 }, { "epoch": 0.15451828006438262, "grad_norm": 0.8828125, "learning_rate": 4.71828060285628e-05, "loss": 0.8185, "step": 2100 }, { "epoch": 0.15459186019774662, "grad_norm": 1.03125, "learning_rate": 4.7180134528494903e-05, "loss": 1.5909, "step": 2101 }, { "epoch": 0.1546654403311106, "grad_norm": 0.90625, "learning_rate": 4.717746183806108e-05, "loss": 0.7151, "step": 2102 }, { "epoch": 0.1547390204644746, "grad_norm": 0.85546875, "learning_rate": 4.717478795740477e-05, "loss": 0.6298, "step": 2103 }, { "epoch": 0.15481260059783858, "grad_norm": 0.9921875, "learning_rate": 4.717211288666946e-05, "loss": 1.4797, "step": 2104 }, { "epoch": 0.15488618073120258, "grad_norm": 1.5078125, "learning_rate": 4.716943662599873e-05, "loss": 0.8167, "step": 2105 }, { "epoch": 0.15495976086456656, "grad_norm": 0.79296875, "learning_rate": 4.716675917553622e-05, "loss": 0.8225, "step": 2106 }, { "epoch": 0.15503334099793056, "grad_norm": 0.7421875, "learning_rate": 4.71640805354256e-05, "loss": 0.7856, "step": 2107 }, { "epoch": 0.15510692113129454, "grad_norm": 0.7578125, "learning_rate": 4.7161400705810646e-05, "loss": 0.7681, "step": 2108 }, { "epoch": 0.15518050126465854, "grad_norm": 0.80078125, "learning_rate": 4.7158719686835176e-05, "loss": 1.1255, "step": 2109 }, { "epoch": 0.15525408139802255, "grad_norm": 0.75390625, "learning_rate": 4.715603747864307e-05, "loss": 0.6637, "step": 2110 }, { "epoch": 0.15532766153138652, "grad_norm": 0.83203125, "learning_rate": 4.715335408137827e-05, "loss": 0.8472, "step": 2111 }, { "epoch": 0.15540124166475053, "grad_norm": 0.92578125, "learning_rate": 4.715066949518481e-05, "loss": 0.8604, "step": 2112 }, { "epoch": 0.1554748217981145, "grad_norm": 0.9140625, "learning_rate": 4.7147983720206755e-05, "loss": 0.9236, "step": 2113 }, { "epoch": 0.1555484019314785, "grad_norm": 1.0625, "learning_rate": 4.714529675658824e-05, "loss": 1.0879, "step": 2114 }, { "epoch": 0.15562198206484248, "grad_norm": 0.765625, "learning_rate": 4.714260860447348e-05, "loss": 0.7562, "step": 2115 }, { "epoch": 0.1556955621982065, "grad_norm": 0.875, "learning_rate": 4.713991926400673e-05, "loss": 1.0826, "step": 2116 }, { "epoch": 0.15576914233157046, "grad_norm": 0.90234375, "learning_rate": 4.713722873533234e-05, "loss": 1.4371, "step": 2117 }, { "epoch": 0.15584272246493447, "grad_norm": 1.015625, "learning_rate": 4.713453701859468e-05, "loss": 1.0721, "step": 2118 }, { "epoch": 0.15591630259829847, "grad_norm": 1.0546875, "learning_rate": 4.713184411393824e-05, "loss": 0.9858, "step": 2119 }, { "epoch": 0.15598988273166245, "grad_norm": 0.94921875, "learning_rate": 4.712915002150752e-05, "loss": 1.2844, "step": 2120 }, { "epoch": 0.15606346286502645, "grad_norm": 0.92578125, "learning_rate": 4.712645474144711e-05, "loss": 0.9151, "step": 2121 }, { "epoch": 0.15613704299839043, "grad_norm": 0.9609375, "learning_rate": 4.712375827390167e-05, "loss": 1.0682, "step": 2122 }, { "epoch": 0.15621062313175443, "grad_norm": 1.140625, "learning_rate": 4.712106061901591e-05, "loss": 1.1453, "step": 2123 }, { "epoch": 0.1562842032651184, "grad_norm": 0.83984375, "learning_rate": 4.711836177693461e-05, "loss": 0.9225, "step": 2124 }, { "epoch": 0.15635778339848241, "grad_norm": 0.85546875, "learning_rate": 4.7115661747802604e-05, "loss": 1.0966, "step": 2125 }, { "epoch": 0.1564313635318464, "grad_norm": 0.79296875, "learning_rate": 4.71129605317648e-05, "loss": 0.9089, "step": 2126 }, { "epoch": 0.1565049436652104, "grad_norm": 0.70703125, "learning_rate": 4.711025812896618e-05, "loss": 0.6613, "step": 2127 }, { "epoch": 0.1565785237985744, "grad_norm": 0.73046875, "learning_rate": 4.710755453955176e-05, "loss": 0.9499, "step": 2128 }, { "epoch": 0.15665210393193837, "grad_norm": 0.86328125, "learning_rate": 4.710484976366664e-05, "loss": 0.948, "step": 2129 }, { "epoch": 0.15672568406530238, "grad_norm": 0.71875, "learning_rate": 4.710214380145599e-05, "loss": 0.7208, "step": 2130 }, { "epoch": 0.15679926419866635, "grad_norm": 0.73828125, "learning_rate": 4.709943665306502e-05, "loss": 0.6862, "step": 2131 }, { "epoch": 0.15687284433203036, "grad_norm": 0.86328125, "learning_rate": 4.7096728318639025e-05, "loss": 1.1438, "step": 2132 }, { "epoch": 0.15694642446539434, "grad_norm": 1.3828125, "learning_rate": 4.7094018798323365e-05, "loss": 1.1915, "step": 2133 }, { "epoch": 0.15702000459875834, "grad_norm": 0.859375, "learning_rate": 4.709130809226344e-05, "loss": 1.2771, "step": 2134 }, { "epoch": 0.15709358473212232, "grad_norm": 0.73828125, "learning_rate": 4.7088596200604735e-05, "loss": 0.7715, "step": 2135 }, { "epoch": 0.15716716486548632, "grad_norm": 1.015625, "learning_rate": 4.708588312349279e-05, "loss": 1.2178, "step": 2136 }, { "epoch": 0.15724074499885032, "grad_norm": 0.8671875, "learning_rate": 4.708316886107321e-05, "loss": 0.8211, "step": 2137 }, { "epoch": 0.1573143251322143, "grad_norm": 0.859375, "learning_rate": 4.708045341349168e-05, "loss": 0.8783, "step": 2138 }, { "epoch": 0.1573879052655783, "grad_norm": 0.79296875, "learning_rate": 4.70777367808939e-05, "loss": 0.951, "step": 2139 }, { "epoch": 0.15746148539894228, "grad_norm": 0.73046875, "learning_rate": 4.70750189634257e-05, "loss": 0.792, "step": 2140 }, { "epoch": 0.15753506553230628, "grad_norm": 0.83984375, "learning_rate": 4.7072299961232915e-05, "loss": 0.6977, "step": 2141 }, { "epoch": 0.15760864566567026, "grad_norm": 0.78125, "learning_rate": 4.7069579774461485e-05, "loss": 0.7599, "step": 2142 }, { "epoch": 0.15768222579903426, "grad_norm": 1.140625, "learning_rate": 4.70668584032574e-05, "loss": 1.1872, "step": 2143 }, { "epoch": 0.15775580593239824, "grad_norm": 1.0859375, "learning_rate": 4.7064135847766686e-05, "loss": 0.9709, "step": 2144 }, { "epoch": 0.15782938606576224, "grad_norm": 0.84765625, "learning_rate": 4.706141210813549e-05, "loss": 0.9034, "step": 2145 }, { "epoch": 0.15790296619912625, "grad_norm": 0.828125, "learning_rate": 4.705868718450996e-05, "loss": 0.9864, "step": 2146 }, { "epoch": 0.15797654633249023, "grad_norm": 1.234375, "learning_rate": 4.7055961077036364e-05, "loss": 1.5484, "step": 2147 }, { "epoch": 0.15805012646585423, "grad_norm": 0.77734375, "learning_rate": 4.7053233785860996e-05, "loss": 0.9305, "step": 2148 }, { "epoch": 0.1581237065992182, "grad_norm": 0.859375, "learning_rate": 4.705050531113021e-05, "loss": 0.8664, "step": 2149 }, { "epoch": 0.1581972867325822, "grad_norm": 6.03125, "learning_rate": 4.7047775652990464e-05, "loss": 1.0787, "step": 2150 }, { "epoch": 0.15827086686594619, "grad_norm": 0.984375, "learning_rate": 4.704504481158823e-05, "loss": 0.9074, "step": 2151 }, { "epoch": 0.1583444469993102, "grad_norm": 0.83984375, "learning_rate": 4.7042312787070084e-05, "loss": 0.9109, "step": 2152 }, { "epoch": 0.15841802713267417, "grad_norm": 1.2734375, "learning_rate": 4.703957957958265e-05, "loss": 1.3144, "step": 2153 }, { "epoch": 0.15849160726603817, "grad_norm": 0.80078125, "learning_rate": 4.70368451892726e-05, "loss": 0.7123, "step": 2154 }, { "epoch": 0.15856518739940217, "grad_norm": 0.77734375, "learning_rate": 4.7034109616286694e-05, "loss": 0.9552, "step": 2155 }, { "epoch": 0.15863876753276615, "grad_norm": 0.77734375, "learning_rate": 4.7031372860771735e-05, "loss": 0.937, "step": 2156 }, { "epoch": 0.15871234766613015, "grad_norm": 0.75390625, "learning_rate": 4.70286349228746e-05, "loss": 0.739, "step": 2157 }, { "epoch": 0.15878592779949413, "grad_norm": 0.95703125, "learning_rate": 4.702589580274225e-05, "loss": 1.2628, "step": 2158 }, { "epoch": 0.15885950793285813, "grad_norm": 1.109375, "learning_rate": 4.7023155500521666e-05, "loss": 1.3584, "step": 2159 }, { "epoch": 0.1589330880662221, "grad_norm": 0.84375, "learning_rate": 4.7020414016359926e-05, "loss": 0.9563, "step": 2160 }, { "epoch": 0.15900666819958612, "grad_norm": 0.84765625, "learning_rate": 4.701767135040414e-05, "loss": 0.9553, "step": 2161 }, { "epoch": 0.1590802483329501, "grad_norm": 1.1015625, "learning_rate": 4.701492750280154e-05, "loss": 1.3369, "step": 2162 }, { "epoch": 0.1591538284663141, "grad_norm": 0.9921875, "learning_rate": 4.701218247369935e-05, "loss": 1.0824, "step": 2163 }, { "epoch": 0.1592274085996781, "grad_norm": 0.921875, "learning_rate": 4.7009436263244914e-05, "loss": 1.254, "step": 2164 }, { "epoch": 0.15930098873304208, "grad_norm": 0.8671875, "learning_rate": 4.700668887158559e-05, "loss": 1.0145, "step": 2165 }, { "epoch": 0.15937456886640608, "grad_norm": 0.9375, "learning_rate": 4.700394029886884e-05, "loss": 0.7849, "step": 2166 }, { "epoch": 0.15944814899977006, "grad_norm": 0.91796875, "learning_rate": 4.700119054524218e-05, "loss": 0.9497, "step": 2167 }, { "epoch": 0.15952172913313406, "grad_norm": 0.8046875, "learning_rate": 4.699843961085317e-05, "loss": 0.784, "step": 2168 }, { "epoch": 0.15959530926649804, "grad_norm": 0.9375, "learning_rate": 4.699568749584946e-05, "loss": 0.8025, "step": 2169 }, { "epoch": 0.15966888939986204, "grad_norm": 0.89453125, "learning_rate": 4.6992934200378744e-05, "loss": 0.8684, "step": 2170 }, { "epoch": 0.15974246953322602, "grad_norm": 0.80859375, "learning_rate": 4.69901797245888e-05, "loss": 1.0372, "step": 2171 }, { "epoch": 0.15981604966659002, "grad_norm": 0.671875, "learning_rate": 4.6987424068627425e-05, "loss": 0.7471, "step": 2172 }, { "epoch": 0.15988962979995403, "grad_norm": 1.0546875, "learning_rate": 4.6984667232642546e-05, "loss": 1.122, "step": 2173 }, { "epoch": 0.159963209933318, "grad_norm": 0.828125, "learning_rate": 4.698190921678208e-05, "loss": 1.0807, "step": 2174 }, { "epoch": 0.160036790066682, "grad_norm": 1.0703125, "learning_rate": 4.697915002119408e-05, "loss": 1.258, "step": 2175 }, { "epoch": 0.16011037020004598, "grad_norm": 1.015625, "learning_rate": 4.697638964602661e-05, "loss": 1.2759, "step": 2176 }, { "epoch": 0.16018395033340999, "grad_norm": 0.75390625, "learning_rate": 4.6973628091427805e-05, "loss": 0.8642, "step": 2177 }, { "epoch": 0.16025753046677396, "grad_norm": 0.875, "learning_rate": 4.697086535754589e-05, "loss": 0.838, "step": 2178 }, { "epoch": 0.16033111060013797, "grad_norm": 1.0234375, "learning_rate": 4.6968101444529136e-05, "loss": 1.2733, "step": 2179 }, { "epoch": 0.16040469073350194, "grad_norm": 0.9765625, "learning_rate": 4.696533635252586e-05, "loss": 0.7795, "step": 2180 }, { "epoch": 0.16047827086686595, "grad_norm": 0.84765625, "learning_rate": 4.6962570081684464e-05, "loss": 0.9203, "step": 2181 }, { "epoch": 0.16055185100022995, "grad_norm": 0.85546875, "learning_rate": 4.695980263215342e-05, "loss": 0.853, "step": 2182 }, { "epoch": 0.16062543113359393, "grad_norm": 0.98828125, "learning_rate": 4.695703400408124e-05, "loss": 1.5444, "step": 2183 }, { "epoch": 0.16069901126695793, "grad_norm": 0.81640625, "learning_rate": 4.695426419761652e-05, "loss": 1.1323, "step": 2184 }, { "epoch": 0.1607725914003219, "grad_norm": 0.9296875, "learning_rate": 4.6951493212907905e-05, "loss": 1.099, "step": 2185 }, { "epoch": 0.1608461715336859, "grad_norm": 0.8671875, "learning_rate": 4.694872105010412e-05, "loss": 1.1622, "step": 2186 }, { "epoch": 0.1609197516670499, "grad_norm": 1.140625, "learning_rate": 4.694594770935391e-05, "loss": 1.0416, "step": 2187 }, { "epoch": 0.1609933318004139, "grad_norm": 1.21875, "learning_rate": 4.694317319080615e-05, "loss": 1.1845, "step": 2188 }, { "epoch": 0.16106691193377787, "grad_norm": 0.859375, "learning_rate": 4.694039749460973e-05, "loss": 0.9836, "step": 2189 }, { "epoch": 0.16114049206714187, "grad_norm": 0.68359375, "learning_rate": 4.6937620620913617e-05, "loss": 0.7475, "step": 2190 }, { "epoch": 0.16121407220050588, "grad_norm": 1.03125, "learning_rate": 4.693484256986683e-05, "loss": 1.038, "step": 2191 }, { "epoch": 0.16128765233386985, "grad_norm": 0.9375, "learning_rate": 4.693206334161848e-05, "loss": 1.0436, "step": 2192 }, { "epoch": 0.16136123246723386, "grad_norm": 0.94140625, "learning_rate": 4.692928293631772e-05, "loss": 0.791, "step": 2193 }, { "epoch": 0.16143481260059783, "grad_norm": 0.97265625, "learning_rate": 4.692650135411375e-05, "loss": 1.2259, "step": 2194 }, { "epoch": 0.16150839273396184, "grad_norm": 0.83984375, "learning_rate": 4.692371859515587e-05, "loss": 0.9201, "step": 2195 }, { "epoch": 0.1615819728673258, "grad_norm": 0.98046875, "learning_rate": 4.692093465959342e-05, "loss": 1.0345, "step": 2196 }, { "epoch": 0.16165555300068982, "grad_norm": 0.73828125, "learning_rate": 4.691814954757582e-05, "loss": 0.6074, "step": 2197 }, { "epoch": 0.1617291331340538, "grad_norm": 0.94921875, "learning_rate": 4.691536325925252e-05, "loss": 1.0246, "step": 2198 }, { "epoch": 0.1618027132674178, "grad_norm": 0.9296875, "learning_rate": 4.691257579477306e-05, "loss": 1.0069, "step": 2199 }, { "epoch": 0.1618762934007818, "grad_norm": 1.0234375, "learning_rate": 4.690978715428705e-05, "loss": 1.6378, "step": 2200 }, { "epoch": 0.16194987353414578, "grad_norm": 0.86328125, "learning_rate": 4.690699733794416e-05, "loss": 0.8153, "step": 2201 }, { "epoch": 0.16202345366750978, "grad_norm": 1.046875, "learning_rate": 4.690420634589408e-05, "loss": 0.9973, "step": 2202 }, { "epoch": 0.16209703380087376, "grad_norm": 1.15625, "learning_rate": 4.690141417828663e-05, "loss": 0.9972, "step": 2203 }, { "epoch": 0.16217061393423776, "grad_norm": 0.80078125, "learning_rate": 4.689862083527164e-05, "loss": 0.7335, "step": 2204 }, { "epoch": 0.16224419406760174, "grad_norm": 0.91015625, "learning_rate": 4.689582631699903e-05, "loss": 0.6909, "step": 2205 }, { "epoch": 0.16231777420096574, "grad_norm": 0.703125, "learning_rate": 4.689303062361878e-05, "loss": 0.6839, "step": 2206 }, { "epoch": 0.16239135433432972, "grad_norm": 0.921875, "learning_rate": 4.689023375528092e-05, "loss": 0.9808, "step": 2207 }, { "epoch": 0.16246493446769372, "grad_norm": 1.015625, "learning_rate": 4.688743571213557e-05, "loss": 1.6107, "step": 2208 }, { "epoch": 0.16253851460105773, "grad_norm": 0.97265625, "learning_rate": 4.688463649433288e-05, "loss": 1.2718, "step": 2209 }, { "epoch": 0.1626120947344217, "grad_norm": 0.77734375, "learning_rate": 4.688183610202308e-05, "loss": 0.9009, "step": 2210 }, { "epoch": 0.1626856748677857, "grad_norm": 0.765625, "learning_rate": 4.687903453535647e-05, "loss": 1.0113, "step": 2211 }, { "epoch": 0.16275925500114968, "grad_norm": 1.1953125, "learning_rate": 4.687623179448339e-05, "loss": 1.2858, "step": 2212 }, { "epoch": 0.1628328351345137, "grad_norm": 0.953125, "learning_rate": 4.6873427879554274e-05, "loss": 1.2891, "step": 2213 }, { "epoch": 0.16290641526787766, "grad_norm": 0.859375, "learning_rate": 4.687062279071961e-05, "loss": 1.1064, "step": 2214 }, { "epoch": 0.16297999540124167, "grad_norm": 1.046875, "learning_rate": 4.686781652812992e-05, "loss": 1.3416, "step": 2215 }, { "epoch": 0.16305357553460564, "grad_norm": 0.77734375, "learning_rate": 4.686500909193581e-05, "loss": 0.8285, "step": 2216 }, { "epoch": 0.16312715566796965, "grad_norm": 0.87890625, "learning_rate": 4.686220048228796e-05, "loss": 1.0324, "step": 2217 }, { "epoch": 0.16320073580133365, "grad_norm": 1.015625, "learning_rate": 4.685939069933711e-05, "loss": 1.0747, "step": 2218 }, { "epoch": 0.16327431593469763, "grad_norm": 1.0, "learning_rate": 4.6856579743234044e-05, "loss": 1.0746, "step": 2219 }, { "epoch": 0.16334789606806163, "grad_norm": 1.015625, "learning_rate": 4.685376761412963e-05, "loss": 1.3434, "step": 2220 }, { "epoch": 0.1634214762014256, "grad_norm": 0.7421875, "learning_rate": 4.6850954312174775e-05, "loss": 0.602, "step": 2221 }, { "epoch": 0.1634950563347896, "grad_norm": 0.8046875, "learning_rate": 4.684813983752048e-05, "loss": 0.9007, "step": 2222 }, { "epoch": 0.1635686364681536, "grad_norm": 0.8359375, "learning_rate": 4.684532419031778e-05, "loss": 0.6229, "step": 2223 }, { "epoch": 0.1636422166015176, "grad_norm": 0.88671875, "learning_rate": 4.684250737071779e-05, "loss": 0.7356, "step": 2224 }, { "epoch": 0.16371579673488157, "grad_norm": 0.734375, "learning_rate": 4.683968937887169e-05, "loss": 0.75, "step": 2225 }, { "epoch": 0.16378937686824557, "grad_norm": 1.265625, "learning_rate": 4.6836870214930704e-05, "loss": 1.3397, "step": 2226 }, { "epoch": 0.16386295700160958, "grad_norm": 0.984375, "learning_rate": 4.683404987904615e-05, "loss": 1.1249, "step": 2227 }, { "epoch": 0.16393653713497355, "grad_norm": 1.046875, "learning_rate": 4.683122837136937e-05, "loss": 1.0647, "step": 2228 }, { "epoch": 0.16401011726833756, "grad_norm": 0.859375, "learning_rate": 4.682840569205179e-05, "loss": 0.8404, "step": 2229 }, { "epoch": 0.16408369740170153, "grad_norm": 1.078125, "learning_rate": 4.6825581841244916e-05, "loss": 1.2112, "step": 2230 }, { "epoch": 0.16415727753506554, "grad_norm": 0.78125, "learning_rate": 4.6822756819100275e-05, "loss": 1.0361, "step": 2231 }, { "epoch": 0.16423085766842951, "grad_norm": 1.0234375, "learning_rate": 4.68199306257695e-05, "loss": 1.0725, "step": 2232 }, { "epoch": 0.16430443780179352, "grad_norm": 0.84375, "learning_rate": 4.681710326140426e-05, "loss": 1.0729, "step": 2233 }, { "epoch": 0.1643780179351575, "grad_norm": 0.94140625, "learning_rate": 4.6814274726156296e-05, "loss": 0.9688, "step": 2234 }, { "epoch": 0.1644515980685215, "grad_norm": 0.80859375, "learning_rate": 4.68114450201774e-05, "loss": 1.3399, "step": 2235 }, { "epoch": 0.1645251782018855, "grad_norm": 0.8984375, "learning_rate": 4.680861414361945e-05, "loss": 1.3039, "step": 2236 }, { "epoch": 0.16459875833524948, "grad_norm": 0.953125, "learning_rate": 4.680578209663438e-05, "loss": 1.2941, "step": 2237 }, { "epoch": 0.16467233846861348, "grad_norm": 0.84765625, "learning_rate": 4.680294887937416e-05, "loss": 0.8394, "step": 2238 }, { "epoch": 0.16474591860197746, "grad_norm": 0.89453125, "learning_rate": 4.680011449199085e-05, "loss": 1.136, "step": 2239 }, { "epoch": 0.16481949873534146, "grad_norm": 0.84375, "learning_rate": 4.679727893463658e-05, "loss": 1.0401, "step": 2240 }, { "epoch": 0.16489307886870544, "grad_norm": 0.67578125, "learning_rate": 4.679444220746352e-05, "loss": 0.6675, "step": 2241 }, { "epoch": 0.16496665900206944, "grad_norm": 0.90625, "learning_rate": 4.679160431062391e-05, "loss": 0.9519, "step": 2242 }, { "epoch": 0.16504023913543342, "grad_norm": 0.83984375, "learning_rate": 4.678876524427004e-05, "loss": 0.9513, "step": 2243 }, { "epoch": 0.16511381926879742, "grad_norm": 0.92578125, "learning_rate": 4.6785925008554305e-05, "loss": 0.7941, "step": 2244 }, { "epoch": 0.16518739940216143, "grad_norm": 0.84375, "learning_rate": 4.678308360362912e-05, "loss": 1.0033, "step": 2245 }, { "epoch": 0.1652609795355254, "grad_norm": 1.1328125, "learning_rate": 4.6780241029646975e-05, "loss": 1.5162, "step": 2246 }, { "epoch": 0.1653345596688894, "grad_norm": 1.3359375, "learning_rate": 4.677739728676044e-05, "loss": 1.1994, "step": 2247 }, { "epoch": 0.16540813980225338, "grad_norm": 0.9140625, "learning_rate": 4.677455237512212e-05, "loss": 1.0858, "step": 2248 }, { "epoch": 0.1654817199356174, "grad_norm": 0.921875, "learning_rate": 4.6771706294884696e-05, "loss": 0.7822, "step": 2249 }, { "epoch": 0.16555530006898136, "grad_norm": 0.859375, "learning_rate": 4.6768859046200924e-05, "loss": 1.2687, "step": 2250 }, { "epoch": 0.16562888020234537, "grad_norm": 0.875, "learning_rate": 4.67660106292236e-05, "loss": 1.0894, "step": 2251 }, { "epoch": 0.16570246033570935, "grad_norm": 0.7265625, "learning_rate": 4.6763161044105595e-05, "loss": 0.7779, "step": 2252 }, { "epoch": 0.16577604046907335, "grad_norm": 0.890625, "learning_rate": 4.6760310290999844e-05, "loss": 1.0385, "step": 2253 }, { "epoch": 0.16584962060243735, "grad_norm": 0.95703125, "learning_rate": 4.6757458370059336e-05, "loss": 1.1762, "step": 2254 }, { "epoch": 0.16592320073580133, "grad_norm": 0.8515625, "learning_rate": 4.6754605281437134e-05, "loss": 0.7289, "step": 2255 }, { "epoch": 0.16599678086916533, "grad_norm": 0.78125, "learning_rate": 4.675175102528635e-05, "loss": 0.7508, "step": 2256 }, { "epoch": 0.1660703610025293, "grad_norm": 1.125, "learning_rate": 4.674889560176018e-05, "loss": 1.3737, "step": 2257 }, { "epoch": 0.1661439411358933, "grad_norm": 0.828125, "learning_rate": 4.674603901101186e-05, "loss": 0.8059, "step": 2258 }, { "epoch": 0.1662175212692573, "grad_norm": 0.9375, "learning_rate": 4.674318125319469e-05, "loss": 0.723, "step": 2259 }, { "epoch": 0.1662911014026213, "grad_norm": 0.9453125, "learning_rate": 4.674032232846205e-05, "loss": 0.841, "step": 2260 }, { "epoch": 0.16636468153598527, "grad_norm": 0.94921875, "learning_rate": 4.6737462236967374e-05, "loss": 0.9784, "step": 2261 }, { "epoch": 0.16643826166934927, "grad_norm": 0.87109375, "learning_rate": 4.6734600978864164e-05, "loss": 0.8706, "step": 2262 }, { "epoch": 0.16651184180271328, "grad_norm": 0.9296875, "learning_rate": 4.673173855430596e-05, "loss": 0.9619, "step": 2263 }, { "epoch": 0.16658542193607725, "grad_norm": 0.80078125, "learning_rate": 4.67288749634464e-05, "loss": 0.6256, "step": 2264 }, { "epoch": 0.16665900206944126, "grad_norm": 0.72265625, "learning_rate": 4.6726010206439155e-05, "loss": 0.7546, "step": 2265 }, { "epoch": 0.16673258220280524, "grad_norm": 2.53125, "learning_rate": 4.672314428343798e-05, "loss": 0.8527, "step": 2266 }, { "epoch": 0.16680616233616924, "grad_norm": 0.98046875, "learning_rate": 4.672027719459668e-05, "loss": 0.833, "step": 2267 }, { "epoch": 0.16687974246953322, "grad_norm": 0.875, "learning_rate": 4.671740894006913e-05, "loss": 1.0085, "step": 2268 }, { "epoch": 0.16695332260289722, "grad_norm": 0.93359375, "learning_rate": 4.671453952000926e-05, "loss": 1.1527, "step": 2269 }, { "epoch": 0.1670269027362612, "grad_norm": 0.78125, "learning_rate": 4.671166893457106e-05, "loss": 0.944, "step": 2270 }, { "epoch": 0.1671004828696252, "grad_norm": 1.203125, "learning_rate": 4.67087971839086e-05, "loss": 1.3125, "step": 2271 }, { "epoch": 0.1671740630029892, "grad_norm": 0.90234375, "learning_rate": 4.6705924268176e-05, "loss": 0.7686, "step": 2272 }, { "epoch": 0.16724764313635318, "grad_norm": 0.79296875, "learning_rate": 4.670305018752744e-05, "loss": 0.7776, "step": 2273 }, { "epoch": 0.16732122326971718, "grad_norm": 1.03125, "learning_rate": 4.6700174942117165e-05, "loss": 1.1477, "step": 2274 }, { "epoch": 0.16739480340308116, "grad_norm": 0.95703125, "learning_rate": 4.669729853209949e-05, "loss": 1.0077, "step": 2275 }, { "epoch": 0.16746838353644516, "grad_norm": 0.7890625, "learning_rate": 4.6694420957628785e-05, "loss": 0.7071, "step": 2276 }, { "epoch": 0.16754196366980914, "grad_norm": 0.76953125, "learning_rate": 4.6691542218859476e-05, "loss": 1.012, "step": 2277 }, { "epoch": 0.16761554380317314, "grad_norm": 0.796875, "learning_rate": 4.668866231594606e-05, "loss": 0.8759, "step": 2278 }, { "epoch": 0.16768912393653712, "grad_norm": 0.78125, "learning_rate": 4.668578124904312e-05, "loss": 0.9433, "step": 2279 }, { "epoch": 0.16776270406990113, "grad_norm": 0.9609375, "learning_rate": 4.668289901830524e-05, "loss": 0.8655, "step": 2280 }, { "epoch": 0.16783628420326513, "grad_norm": 0.8984375, "learning_rate": 4.668001562388713e-05, "loss": 0.7792, "step": 2281 }, { "epoch": 0.1679098643366291, "grad_norm": 0.984375, "learning_rate": 4.667713106594353e-05, "loss": 1.2321, "step": 2282 }, { "epoch": 0.1679834444699931, "grad_norm": 1.015625, "learning_rate": 4.667424534462925e-05, "loss": 1.2005, "step": 2283 }, { "epoch": 0.16805702460335709, "grad_norm": 1.015625, "learning_rate": 4.667135846009916e-05, "loss": 1.1626, "step": 2284 }, { "epoch": 0.1681306047367211, "grad_norm": 0.9609375, "learning_rate": 4.666847041250819e-05, "loss": 1.0837, "step": 2285 }, { "epoch": 0.16820418487008507, "grad_norm": 1.1015625, "learning_rate": 4.6665581202011345e-05, "loss": 1.2828, "step": 2286 }, { "epoch": 0.16827776500344907, "grad_norm": 0.8359375, "learning_rate": 4.666269082876367e-05, "loss": 0.7669, "step": 2287 }, { "epoch": 0.16835134513681307, "grad_norm": 0.859375, "learning_rate": 4.665979929292029e-05, "loss": 1.0711, "step": 2288 }, { "epoch": 0.16842492527017705, "grad_norm": 0.94140625, "learning_rate": 4.665690659463641e-05, "loss": 0.7772, "step": 2289 }, { "epoch": 0.16849850540354105, "grad_norm": 0.8046875, "learning_rate": 4.6654012734067236e-05, "loss": 0.7368, "step": 2290 }, { "epoch": 0.16857208553690503, "grad_norm": 0.8515625, "learning_rate": 4.665111771136811e-05, "loss": 1.0117, "step": 2291 }, { "epoch": 0.16864566567026903, "grad_norm": 0.89453125, "learning_rate": 4.664822152669438e-05, "loss": 0.8384, "step": 2292 }, { "epoch": 0.168719245803633, "grad_norm": 0.796875, "learning_rate": 4.6645324180201494e-05, "loss": 0.7722, "step": 2293 }, { "epoch": 0.16879282593699702, "grad_norm": 0.703125, "learning_rate": 4.664242567204494e-05, "loss": 0.7028, "step": 2294 }, { "epoch": 0.168866406070361, "grad_norm": 0.84765625, "learning_rate": 4.6639526002380275e-05, "loss": 1.0704, "step": 2295 }, { "epoch": 0.168939986203725, "grad_norm": 1.203125, "learning_rate": 4.663662517136312e-05, "loss": 1.3884, "step": 2296 }, { "epoch": 0.169013566337089, "grad_norm": 0.71484375, "learning_rate": 4.6633723179149166e-05, "loss": 0.7911, "step": 2297 }, { "epoch": 0.16908714647045298, "grad_norm": 1.03125, "learning_rate": 4.6630820025894145e-05, "loss": 1.1098, "step": 2298 }, { "epoch": 0.16916072660381698, "grad_norm": 0.921875, "learning_rate": 4.6627915711753866e-05, "loss": 0.8922, "step": 2299 }, { "epoch": 0.16923430673718096, "grad_norm": 1.0234375, "learning_rate": 4.6625010236884204e-05, "loss": 1.4102, "step": 2300 }, { "epoch": 0.16930788687054496, "grad_norm": 0.89453125, "learning_rate": 4.662210360144108e-05, "loss": 0.8983, "step": 2301 }, { "epoch": 0.16938146700390894, "grad_norm": 0.859375, "learning_rate": 4.66191958055805e-05, "loss": 0.8754, "step": 2302 }, { "epoch": 0.16945504713727294, "grad_norm": 0.78125, "learning_rate": 4.6616286849458515e-05, "loss": 1.2154, "step": 2303 }, { "epoch": 0.16952862727063692, "grad_norm": 0.84375, "learning_rate": 4.6613376733231236e-05, "loss": 0.8395, "step": 2304 }, { "epoch": 0.16960220740400092, "grad_norm": 0.91015625, "learning_rate": 4.661046545705485e-05, "loss": 0.8515, "step": 2305 }, { "epoch": 0.16967578753736492, "grad_norm": 1.0546875, "learning_rate": 4.660755302108561e-05, "loss": 0.8633, "step": 2306 }, { "epoch": 0.1697493676707289, "grad_norm": 0.87890625, "learning_rate": 4.66046394254798e-05, "loss": 0.9357, "step": 2307 }, { "epoch": 0.1698229478040929, "grad_norm": 0.91796875, "learning_rate": 4.66017246703938e-05, "loss": 1.0117, "step": 2308 }, { "epoch": 0.16989652793745688, "grad_norm": 0.88671875, "learning_rate": 4.6598808755984034e-05, "loss": 1.0174, "step": 2309 }, { "epoch": 0.16997010807082089, "grad_norm": 0.93359375, "learning_rate": 4.6595891682407e-05, "loss": 1.234, "step": 2310 }, { "epoch": 0.17004368820418486, "grad_norm": 0.73046875, "learning_rate": 4.6592973449819244e-05, "loss": 0.5915, "step": 2311 }, { "epoch": 0.17011726833754887, "grad_norm": 0.97265625, "learning_rate": 4.659005405837739e-05, "loss": 0.9665, "step": 2312 }, { "epoch": 0.17019084847091284, "grad_norm": 1.046875, "learning_rate": 4.6587133508238115e-05, "loss": 1.6637, "step": 2313 }, { "epoch": 0.17026442860427685, "grad_norm": 0.7578125, "learning_rate": 4.658421179955815e-05, "loss": 0.8542, "step": 2314 }, { "epoch": 0.17033800873764085, "grad_norm": 0.80078125, "learning_rate": 4.6581288932494304e-05, "loss": 0.7991, "step": 2315 }, { "epoch": 0.17041158887100483, "grad_norm": 1.109375, "learning_rate": 4.657836490720345e-05, "loss": 1.3094, "step": 2316 }, { "epoch": 0.17048516900436883, "grad_norm": 0.984375, "learning_rate": 4.657543972384251e-05, "loss": 0.86, "step": 2317 }, { "epoch": 0.1705587491377328, "grad_norm": 0.81640625, "learning_rate": 4.657251338256846e-05, "loss": 0.7694, "step": 2318 }, { "epoch": 0.1706323292710968, "grad_norm": 0.95703125, "learning_rate": 4.656958588353836e-05, "loss": 0.9706, "step": 2319 }, { "epoch": 0.1707059094044608, "grad_norm": 0.76171875, "learning_rate": 4.656665722690933e-05, "loss": 0.9386, "step": 2320 }, { "epoch": 0.1707794895378248, "grad_norm": 0.92578125, "learning_rate": 4.656372741283854e-05, "loss": 1.0143, "step": 2321 }, { "epoch": 0.17085306967118877, "grad_norm": 1.03125, "learning_rate": 4.6560796441483234e-05, "loss": 1.4127, "step": 2322 }, { "epoch": 0.17092664980455277, "grad_norm": 0.83984375, "learning_rate": 4.6557864313000695e-05, "loss": 0.7166, "step": 2323 }, { "epoch": 0.17100022993791678, "grad_norm": 0.796875, "learning_rate": 4.65549310275483e-05, "loss": 0.9397, "step": 2324 }, { "epoch": 0.17107381007128075, "grad_norm": 0.8203125, "learning_rate": 4.6551996585283476e-05, "loss": 1.0492, "step": 2325 }, { "epoch": 0.17114739020464476, "grad_norm": 0.8828125, "learning_rate": 4.654906098636369e-05, "loss": 0.6942, "step": 2326 }, { "epoch": 0.17122097033800873, "grad_norm": 0.89453125, "learning_rate": 4.6546124230946505e-05, "loss": 0.9402, "step": 2327 }, { "epoch": 0.17129455047137274, "grad_norm": 0.8359375, "learning_rate": 4.6543186319189526e-05, "loss": 0.7261, "step": 2328 }, { "epoch": 0.1713681306047367, "grad_norm": 0.87109375, "learning_rate": 4.6540247251250424e-05, "loss": 0.8986, "step": 2329 }, { "epoch": 0.17144171073810072, "grad_norm": 0.71875, "learning_rate": 4.653730702728694e-05, "loss": 0.7865, "step": 2330 }, { "epoch": 0.1715152908714647, "grad_norm": 1.59375, "learning_rate": 4.653436564745687e-05, "loss": 1.1759, "step": 2331 }, { "epoch": 0.1715888710048287, "grad_norm": 0.8984375, "learning_rate": 4.653142311191806e-05, "loss": 0.978, "step": 2332 }, { "epoch": 0.1716624511381927, "grad_norm": 1.125, "learning_rate": 4.652847942082844e-05, "loss": 1.0943, "step": 2333 }, { "epoch": 0.17173603127155668, "grad_norm": 0.8515625, "learning_rate": 4.6525534574346e-05, "loss": 0.942, "step": 2334 }, { "epoch": 0.17180961140492068, "grad_norm": 1.1484375, "learning_rate": 4.6522588572628765e-05, "loss": 1.0099, "step": 2335 }, { "epoch": 0.17188319153828466, "grad_norm": 0.78515625, "learning_rate": 4.651964141583486e-05, "loss": 0.7833, "step": 2336 }, { "epoch": 0.17195677167164866, "grad_norm": 1.0234375, "learning_rate": 4.6516693104122435e-05, "loss": 1.1503, "step": 2337 }, { "epoch": 0.17203035180501264, "grad_norm": 0.87109375, "learning_rate": 4.6513743637649736e-05, "loss": 0.6584, "step": 2338 }, { "epoch": 0.17210393193837664, "grad_norm": 0.80078125, "learning_rate": 4.651079301657505e-05, "loss": 0.8908, "step": 2339 }, { "epoch": 0.17217751207174062, "grad_norm": 0.7890625, "learning_rate": 4.6507841241056735e-05, "loss": 0.8444, "step": 2340 }, { "epoch": 0.17225109220510462, "grad_norm": 0.765625, "learning_rate": 4.6504888311253196e-05, "loss": 0.8007, "step": 2341 }, { "epoch": 0.17232467233846863, "grad_norm": 0.78125, "learning_rate": 4.650193422732292e-05, "loss": 0.7184, "step": 2342 }, { "epoch": 0.1723982524718326, "grad_norm": 0.8515625, "learning_rate": 4.649897898942445e-05, "loss": 0.903, "step": 2343 }, { "epoch": 0.1724718326051966, "grad_norm": 0.73046875, "learning_rate": 4.649602259771638e-05, "loss": 0.7455, "step": 2344 }, { "epoch": 0.17254541273856058, "grad_norm": 0.765625, "learning_rate": 4.649306505235738e-05, "loss": 0.7128, "step": 2345 }, { "epoch": 0.1726189928719246, "grad_norm": 0.84375, "learning_rate": 4.649010635350617e-05, "loss": 0.9328, "step": 2346 }, { "epoch": 0.17269257300528856, "grad_norm": 0.87109375, "learning_rate": 4.6487146501321535e-05, "loss": 0.7971, "step": 2347 }, { "epoch": 0.17276615313865257, "grad_norm": 0.91015625, "learning_rate": 4.648418549596234e-05, "loss": 0.9742, "step": 2348 }, { "epoch": 0.17283973327201654, "grad_norm": 0.8125, "learning_rate": 4.648122333758749e-05, "loss": 0.9682, "step": 2349 }, { "epoch": 0.17291331340538055, "grad_norm": 1.03125, "learning_rate": 4.647826002635595e-05, "loss": 0.7815, "step": 2350 }, { "epoch": 0.17298689353874455, "grad_norm": 0.84375, "learning_rate": 4.647529556242676e-05, "loss": 0.9898, "step": 2351 }, { "epoch": 0.17306047367210853, "grad_norm": 0.8515625, "learning_rate": 4.6472329945959014e-05, "loss": 1.1301, "step": 2352 }, { "epoch": 0.17313405380547253, "grad_norm": 0.79296875, "learning_rate": 4.646936317711188e-05, "loss": 0.8775, "step": 2353 }, { "epoch": 0.1732076339388365, "grad_norm": 0.9375, "learning_rate": 4.6466395256044574e-05, "loss": 1.4642, "step": 2354 }, { "epoch": 0.1732812140722005, "grad_norm": 0.96484375, "learning_rate": 4.646342618291638e-05, "loss": 0.7355, "step": 2355 }, { "epoch": 0.1733547942055645, "grad_norm": 0.59765625, "learning_rate": 4.6460455957886646e-05, "loss": 0.6268, "step": 2356 }, { "epoch": 0.1734283743389285, "grad_norm": 0.89453125, "learning_rate": 4.645748458111476e-05, "loss": 1.1092, "step": 2357 }, { "epoch": 0.17350195447229247, "grad_norm": 0.9765625, "learning_rate": 4.6454512052760225e-05, "loss": 1.0355, "step": 2358 }, { "epoch": 0.17357553460565647, "grad_norm": 0.91015625, "learning_rate": 4.6451538372982527e-05, "loss": 1.0261, "step": 2359 }, { "epoch": 0.17364911473902048, "grad_norm": 0.7734375, "learning_rate": 4.6448563541941295e-05, "loss": 1.1197, "step": 2360 }, { "epoch": 0.17372269487238445, "grad_norm": 0.94140625, "learning_rate": 4.6445587559796166e-05, "loss": 0.8631, "step": 2361 }, { "epoch": 0.17379627500574846, "grad_norm": 0.91796875, "learning_rate": 4.6442610426706856e-05, "loss": 1.0614, "step": 2362 }, { "epoch": 0.17386985513911243, "grad_norm": 0.76953125, "learning_rate": 4.643963214283314e-05, "loss": 0.8628, "step": 2363 }, { "epoch": 0.17394343527247644, "grad_norm": 0.91015625, "learning_rate": 4.643665270833487e-05, "loss": 0.8224, "step": 2364 }, { "epoch": 0.17401701540584041, "grad_norm": 0.71875, "learning_rate": 4.643367212337193e-05, "loss": 0.8321, "step": 2365 }, { "epoch": 0.17409059553920442, "grad_norm": 0.76171875, "learning_rate": 4.643069038810429e-05, "loss": 0.9729, "step": 2366 }, { "epoch": 0.1741641756725684, "grad_norm": 0.76171875, "learning_rate": 4.642770750269198e-05, "loss": 0.7919, "step": 2367 }, { "epoch": 0.1742377558059324, "grad_norm": 1.0078125, "learning_rate": 4.642472346729507e-05, "loss": 1.251, "step": 2368 }, { "epoch": 0.1743113359392964, "grad_norm": 0.98828125, "learning_rate": 4.642173828207372e-05, "loss": 1.0962, "step": 2369 }, { "epoch": 0.17438491607266038, "grad_norm": 1.015625, "learning_rate": 4.6418751947188145e-05, "loss": 1.1477, "step": 2370 }, { "epoch": 0.17445849620602438, "grad_norm": 0.80078125, "learning_rate": 4.641576446279861e-05, "loss": 0.7813, "step": 2371 }, { "epoch": 0.17453207633938836, "grad_norm": 1.1171875, "learning_rate": 4.641277582906542e-05, "loss": 1.1445, "step": 2372 }, { "epoch": 0.17460565647275236, "grad_norm": 0.765625, "learning_rate": 4.640978604614902e-05, "loss": 0.9483, "step": 2373 }, { "epoch": 0.17467923660611634, "grad_norm": 0.85546875, "learning_rate": 4.640679511420983e-05, "loss": 0.9605, "step": 2374 }, { "epoch": 0.17475281673948034, "grad_norm": 0.8046875, "learning_rate": 4.6403803033408375e-05, "loss": 0.8761, "step": 2375 }, { "epoch": 0.17482639687284432, "grad_norm": 0.69140625, "learning_rate": 4.6400809803905244e-05, "loss": 0.6966, "step": 2376 }, { "epoch": 0.17489997700620832, "grad_norm": 0.79296875, "learning_rate": 4.639781542586106e-05, "loss": 0.7238, "step": 2377 }, { "epoch": 0.17497355713957233, "grad_norm": 0.83203125, "learning_rate": 4.639481989943655e-05, "loss": 0.8654, "step": 2378 }, { "epoch": 0.1750471372729363, "grad_norm": 0.93359375, "learning_rate": 4.6391823224792456e-05, "loss": 0.9046, "step": 2379 }, { "epoch": 0.1751207174063003, "grad_norm": 0.83984375, "learning_rate": 4.638882540208962e-05, "loss": 0.817, "step": 2380 }, { "epoch": 0.17519429753966428, "grad_norm": 1.0078125, "learning_rate": 4.6385826431488914e-05, "loss": 1.4932, "step": 2381 }, { "epoch": 0.1752678776730283, "grad_norm": 0.75390625, "learning_rate": 4.63828263131513e-05, "loss": 0.8208, "step": 2382 }, { "epoch": 0.17534145780639226, "grad_norm": 0.7734375, "learning_rate": 4.637982504723779e-05, "loss": 0.847, "step": 2383 }, { "epoch": 0.17541503793975627, "grad_norm": 0.82421875, "learning_rate": 4.637682263390944e-05, "loss": 0.6693, "step": 2384 }, { "epoch": 0.17548861807312024, "grad_norm": 0.7578125, "learning_rate": 4.6373819073327403e-05, "loss": 0.7124, "step": 2385 }, { "epoch": 0.17556219820648425, "grad_norm": 1.015625, "learning_rate": 4.637081436565286e-05, "loss": 1.0774, "step": 2386 }, { "epoch": 0.17563577833984825, "grad_norm": 0.98828125, "learning_rate": 4.636780851104707e-05, "loss": 0.9835, "step": 2387 }, { "epoch": 0.17570935847321223, "grad_norm": 0.79296875, "learning_rate": 4.636480150967136e-05, "loss": 0.9992, "step": 2388 }, { "epoch": 0.17578293860657623, "grad_norm": 0.859375, "learning_rate": 4.636179336168711e-05, "loss": 1.0624, "step": 2389 }, { "epoch": 0.1758565187399402, "grad_norm": 0.81640625, "learning_rate": 4.6358784067255755e-05, "loss": 0.9743, "step": 2390 }, { "epoch": 0.1759300988733042, "grad_norm": 0.91015625, "learning_rate": 4.63557736265388e-05, "loss": 0.8359, "step": 2391 }, { "epoch": 0.1760036790066682, "grad_norm": 0.82421875, "learning_rate": 4.635276203969781e-05, "loss": 0.8541, "step": 2392 }, { "epoch": 0.1760772591400322, "grad_norm": 0.859375, "learning_rate": 4.634974930689441e-05, "loss": 0.8042, "step": 2393 }, { "epoch": 0.17615083927339617, "grad_norm": 0.8828125, "learning_rate": 4.634673542829029e-05, "loss": 0.9784, "step": 2394 }, { "epoch": 0.17622441940676017, "grad_norm": 0.890625, "learning_rate": 4.634372040404719e-05, "loss": 0.9491, "step": 2395 }, { "epoch": 0.17629799954012418, "grad_norm": 0.88671875, "learning_rate": 4.6340704234326934e-05, "loss": 0.9291, "step": 2396 }, { "epoch": 0.17637157967348815, "grad_norm": 1.234375, "learning_rate": 4.633768691929139e-05, "loss": 1.245, "step": 2397 }, { "epoch": 0.17644515980685216, "grad_norm": 0.75, "learning_rate": 4.6334668459102484e-05, "loss": 0.9182, "step": 2398 }, { "epoch": 0.17651873994021614, "grad_norm": 0.984375, "learning_rate": 4.6331648853922225e-05, "loss": 1.4692, "step": 2399 }, { "epoch": 0.17659232007358014, "grad_norm": 0.9609375, "learning_rate": 4.6328628103912666e-05, "loss": 0.989, "step": 2400 }, { "epoch": 0.17666590020694412, "grad_norm": 0.953125, "learning_rate": 4.632560620923591e-05, "loss": 0.8725, "step": 2401 }, { "epoch": 0.17673948034030812, "grad_norm": 0.91796875, "learning_rate": 4.6322583170054146e-05, "loss": 1.1465, "step": 2402 }, { "epoch": 0.1768130604736721, "grad_norm": 0.76171875, "learning_rate": 4.631955898652962e-05, "loss": 0.6109, "step": 2403 }, { "epoch": 0.1768866406070361, "grad_norm": 0.82421875, "learning_rate": 4.631653365882463e-05, "loss": 1.066, "step": 2404 }, { "epoch": 0.1769602207404001, "grad_norm": 0.76171875, "learning_rate": 4.6313507187101544e-05, "loss": 0.8509, "step": 2405 }, { "epoch": 0.17703380087376408, "grad_norm": 0.89453125, "learning_rate": 4.631047957152278e-05, "loss": 0.7434, "step": 2406 }, { "epoch": 0.17710738100712808, "grad_norm": 0.83984375, "learning_rate": 4.630745081225083e-05, "loss": 1.0909, "step": 2407 }, { "epoch": 0.17718096114049206, "grad_norm": 0.78515625, "learning_rate": 4.6304420909448235e-05, "loss": 1.0208, "step": 2408 }, { "epoch": 0.17725454127385606, "grad_norm": 0.84375, "learning_rate": 4.63013898632776e-05, "loss": 1.0963, "step": 2409 }, { "epoch": 0.17732812140722004, "grad_norm": 0.78125, "learning_rate": 4.6298357673901615e-05, "loss": 0.8698, "step": 2410 }, { "epoch": 0.17740170154058404, "grad_norm": 1.015625, "learning_rate": 4.6295324341483e-05, "loss": 0.945, "step": 2411 }, { "epoch": 0.17747528167394802, "grad_norm": 0.8046875, "learning_rate": 4.6292289866184546e-05, "loss": 0.9404, "step": 2412 }, { "epoch": 0.17754886180731203, "grad_norm": 0.7734375, "learning_rate": 4.628925424816911e-05, "loss": 0.7535, "step": 2413 }, { "epoch": 0.17762244194067603, "grad_norm": 0.828125, "learning_rate": 4.6286217487599616e-05, "loss": 0.991, "step": 2414 }, { "epoch": 0.17769602207404, "grad_norm": 1.125, "learning_rate": 4.628317958463902e-05, "loss": 1.5002, "step": 2415 }, { "epoch": 0.177769602207404, "grad_norm": 0.84375, "learning_rate": 4.628014053945038e-05, "loss": 0.8467, "step": 2416 }, { "epoch": 0.17784318234076799, "grad_norm": 1.03125, "learning_rate": 4.627710035219679e-05, "loss": 0.8584, "step": 2417 }, { "epoch": 0.177916762474132, "grad_norm": 0.76171875, "learning_rate": 4.627405902304141e-05, "loss": 0.6925, "step": 2418 }, { "epoch": 0.17799034260749597, "grad_norm": 0.85546875, "learning_rate": 4.627101655214746e-05, "loss": 1.1767, "step": 2419 }, { "epoch": 0.17806392274085997, "grad_norm": 0.84765625, "learning_rate": 4.626797293967824e-05, "loss": 1.1363, "step": 2420 }, { "epoch": 0.17813750287422395, "grad_norm": 0.828125, "learning_rate": 4.626492818579707e-05, "loss": 0.9227, "step": 2421 }, { "epoch": 0.17821108300758795, "grad_norm": 0.8359375, "learning_rate": 4.626188229066737e-05, "loss": 0.6957, "step": 2422 }, { "epoch": 0.17828466314095195, "grad_norm": 0.7734375, "learning_rate": 4.62588352544526e-05, "loss": 0.9482, "step": 2423 }, { "epoch": 0.17835824327431593, "grad_norm": 0.7890625, "learning_rate": 4.62557870773163e-05, "loss": 0.9393, "step": 2424 }, { "epoch": 0.17843182340767993, "grad_norm": 0.984375, "learning_rate": 4.625273775942206e-05, "loss": 1.0312, "step": 2425 }, { "epoch": 0.1785054035410439, "grad_norm": 0.99609375, "learning_rate": 4.6249687300933516e-05, "loss": 1.1365, "step": 2426 }, { "epoch": 0.17857898367440792, "grad_norm": 0.83203125, "learning_rate": 4.6246635702014396e-05, "loss": 0.6551, "step": 2427 }, { "epoch": 0.1786525638077719, "grad_norm": 0.95703125, "learning_rate": 4.6243582962828466e-05, "loss": 1.3892, "step": 2428 }, { "epoch": 0.1787261439411359, "grad_norm": 0.81640625, "learning_rate": 4.6240529083539564e-05, "loss": 0.9388, "step": 2429 }, { "epoch": 0.17879972407449987, "grad_norm": 1.1015625, "learning_rate": 4.6237474064311574e-05, "loss": 1.5013, "step": 2430 }, { "epoch": 0.17887330420786388, "grad_norm": 0.77734375, "learning_rate": 4.623441790530847e-05, "loss": 0.9194, "step": 2431 }, { "epoch": 0.17894688434122788, "grad_norm": 0.84375, "learning_rate": 4.6231360606694263e-05, "loss": 1.0206, "step": 2432 }, { "epoch": 0.17902046447459186, "grad_norm": 0.84765625, "learning_rate": 4.622830216863303e-05, "loss": 0.9787, "step": 2433 }, { "epoch": 0.17909404460795586, "grad_norm": 0.78125, "learning_rate": 4.6225242591288914e-05, "loss": 0.692, "step": 2434 }, { "epoch": 0.17916762474131984, "grad_norm": 1.03125, "learning_rate": 4.622218187482612e-05, "loss": 1.0808, "step": 2435 }, { "epoch": 0.17924120487468384, "grad_norm": 0.73046875, "learning_rate": 4.62191200194089e-05, "loss": 0.9005, "step": 2436 }, { "epoch": 0.17931478500804782, "grad_norm": 0.87890625, "learning_rate": 4.62160570252016e-05, "loss": 0.9334, "step": 2437 }, { "epoch": 0.17938836514141182, "grad_norm": 0.78125, "learning_rate": 4.621299289236858e-05, "loss": 0.9676, "step": 2438 }, { "epoch": 0.1794619452747758, "grad_norm": 1.1875, "learning_rate": 4.62099276210743e-05, "loss": 1.203, "step": 2439 }, { "epoch": 0.1795355254081398, "grad_norm": 0.90625, "learning_rate": 4.620686121148326e-05, "loss": 1.0788, "step": 2440 }, { "epoch": 0.1796091055415038, "grad_norm": 0.7421875, "learning_rate": 4.620379366376004e-05, "loss": 0.7248, "step": 2441 }, { "epoch": 0.17968268567486778, "grad_norm": 1.0546875, "learning_rate": 4.620072497806926e-05, "loss": 1.0184, "step": 2442 }, { "epoch": 0.17975626580823179, "grad_norm": 0.8984375, "learning_rate": 4.6197655154575615e-05, "loss": 0.9649, "step": 2443 }, { "epoch": 0.17982984594159576, "grad_norm": 0.87109375, "learning_rate": 4.619458419344385e-05, "loss": 0.9972, "step": 2444 }, { "epoch": 0.17990342607495977, "grad_norm": 0.81640625, "learning_rate": 4.619151209483878e-05, "loss": 1.0228, "step": 2445 }, { "epoch": 0.17997700620832374, "grad_norm": 0.80078125, "learning_rate": 4.618843885892529e-05, "loss": 0.9985, "step": 2446 }, { "epoch": 0.18005058634168775, "grad_norm": 0.98828125, "learning_rate": 4.61853644858683e-05, "loss": 1.6175, "step": 2447 }, { "epoch": 0.18012416647505172, "grad_norm": 1.25, "learning_rate": 4.618228897583281e-05, "loss": 1.115, "step": 2448 }, { "epoch": 0.18019774660841573, "grad_norm": 0.95703125, "learning_rate": 4.617921232898388e-05, "loss": 1.3514, "step": 2449 }, { "epoch": 0.18027132674177973, "grad_norm": 0.81640625, "learning_rate": 4.617613454548663e-05, "loss": 0.9386, "step": 2450 }, { "epoch": 0.1803449068751437, "grad_norm": 0.890625, "learning_rate": 4.6173055625506236e-05, "loss": 0.8495, "step": 2451 }, { "epoch": 0.1804184870085077, "grad_norm": 0.78515625, "learning_rate": 4.616997556920793e-05, "loss": 0.7815, "step": 2452 }, { "epoch": 0.1804920671418717, "grad_norm": 0.9765625, "learning_rate": 4.616689437675702e-05, "loss": 0.9896, "step": 2453 }, { "epoch": 0.1805656472752357, "grad_norm": 0.8671875, "learning_rate": 4.616381204831887e-05, "loss": 0.9494, "step": 2454 }, { "epoch": 0.18063922740859967, "grad_norm": 0.76953125, "learning_rate": 4.61607285840589e-05, "loss": 0.6253, "step": 2455 }, { "epoch": 0.18071280754196367, "grad_norm": 0.828125, "learning_rate": 4.6157643984142595e-05, "loss": 0.9087, "step": 2456 }, { "epoch": 0.18078638767532765, "grad_norm": 1.0390625, "learning_rate": 4.61545582487355e-05, "loss": 1.4828, "step": 2457 }, { "epoch": 0.18085996780869165, "grad_norm": 0.9765625, "learning_rate": 4.615147137800321e-05, "loss": 1.1915, "step": 2458 }, { "epoch": 0.18093354794205566, "grad_norm": 0.953125, "learning_rate": 4.6148383372111406e-05, "loss": 1.0188, "step": 2459 }, { "epoch": 0.18100712807541963, "grad_norm": 0.96875, "learning_rate": 4.6145294231225816e-05, "loss": 1.111, "step": 2460 }, { "epoch": 0.18108070820878364, "grad_norm": 0.81640625, "learning_rate": 4.614220395551222e-05, "loss": 1.0745, "step": 2461 }, { "epoch": 0.1811542883421476, "grad_norm": 0.81640625, "learning_rate": 4.6139112545136466e-05, "loss": 0.8347, "step": 2462 }, { "epoch": 0.18122786847551162, "grad_norm": 0.81640625, "learning_rate": 4.6136020000264466e-05, "loss": 0.7947, "step": 2463 }, { "epoch": 0.1813014486088756, "grad_norm": 1.0703125, "learning_rate": 4.61329263210622e-05, "loss": 0.8838, "step": 2464 }, { "epoch": 0.1813750287422396, "grad_norm": 0.69921875, "learning_rate": 4.6129831507695684e-05, "loss": 0.7092, "step": 2465 }, { "epoch": 0.1814486088756036, "grad_norm": 0.82421875, "learning_rate": 4.612673556033103e-05, "loss": 0.6203, "step": 2466 }, { "epoch": 0.18152218900896758, "grad_norm": 1.09375, "learning_rate": 4.612363847913437e-05, "loss": 1.364, "step": 2467 }, { "epoch": 0.18159576914233158, "grad_norm": 1.0234375, "learning_rate": 4.612054026427193e-05, "loss": 0.9108, "step": 2468 }, { "epoch": 0.18166934927569556, "grad_norm": 1.4921875, "learning_rate": 4.611744091590999e-05, "loss": 0.9871, "step": 2469 }, { "epoch": 0.18174292940905956, "grad_norm": 1.078125, "learning_rate": 4.611434043421489e-05, "loss": 1.2519, "step": 2470 }, { "epoch": 0.18181650954242354, "grad_norm": 0.93359375, "learning_rate": 4.6111238819353005e-05, "loss": 0.969, "step": 2471 }, { "epoch": 0.18189008967578754, "grad_norm": 0.82421875, "learning_rate": 4.610813607149081e-05, "loss": 0.8348, "step": 2472 }, { "epoch": 0.18196366980915152, "grad_norm": 0.87890625, "learning_rate": 4.6105032190794816e-05, "loss": 1.1528, "step": 2473 }, { "epoch": 0.18203724994251552, "grad_norm": 0.94140625, "learning_rate": 4.610192717743162e-05, "loss": 1.0308, "step": 2474 }, { "epoch": 0.18211083007587953, "grad_norm": 0.9453125, "learning_rate": 4.609882103156783e-05, "loss": 1.0408, "step": 2475 }, { "epoch": 0.1821844102092435, "grad_norm": 0.703125, "learning_rate": 4.6095713753370174e-05, "loss": 0.7387, "step": 2476 }, { "epoch": 0.1822579903426075, "grad_norm": 1.3515625, "learning_rate": 4.609260534300541e-05, "loss": 0.9955, "step": 2477 }, { "epoch": 0.18233157047597148, "grad_norm": 0.7578125, "learning_rate": 4.608949580064035e-05, "loss": 0.6883, "step": 2478 }, { "epoch": 0.1824051506093355, "grad_norm": 0.859375, "learning_rate": 4.608638512644188e-05, "loss": 0.7779, "step": 2479 }, { "epoch": 0.18247873074269946, "grad_norm": 0.8671875, "learning_rate": 4.6083273320576945e-05, "loss": 1.2412, "step": 2480 }, { "epoch": 0.18255231087606347, "grad_norm": 1.09375, "learning_rate": 4.6080160383212556e-05, "loss": 1.2351, "step": 2481 }, { "epoch": 0.18262589100942744, "grad_norm": 0.8359375, "learning_rate": 4.607704631451578e-05, "loss": 0.96, "step": 2482 }, { "epoch": 0.18269947114279145, "grad_norm": 0.96875, "learning_rate": 4.607393111465373e-05, "loss": 0.9822, "step": 2483 }, { "epoch": 0.18277305127615545, "grad_norm": 1.1171875, "learning_rate": 4.60708147837936e-05, "loss": 1.2662, "step": 2484 }, { "epoch": 0.18284663140951943, "grad_norm": 0.87890625, "learning_rate": 4.6067697322102646e-05, "loss": 1.0083, "step": 2485 }, { "epoch": 0.18292021154288343, "grad_norm": 0.89453125, "learning_rate": 4.606457872974816e-05, "loss": 0.9201, "step": 2486 }, { "epoch": 0.1829937916762474, "grad_norm": 0.89453125, "learning_rate": 4.606145900689751e-05, "loss": 1.021, "step": 2487 }, { "epoch": 0.1830673718096114, "grad_norm": 0.88671875, "learning_rate": 4.605833815371815e-05, "loss": 0.9086, "step": 2488 }, { "epoch": 0.1831409519429754, "grad_norm": 0.703125, "learning_rate": 4.605521617037755e-05, "loss": 0.643, "step": 2489 }, { "epoch": 0.1832145320763394, "grad_norm": 0.92578125, "learning_rate": 4.6052093057043264e-05, "loss": 1.0791, "step": 2490 }, { "epoch": 0.18328811220970337, "grad_norm": 0.83984375, "learning_rate": 4.604896881388291e-05, "loss": 0.8522, "step": 2491 }, { "epoch": 0.18336169234306737, "grad_norm": 0.9375, "learning_rate": 4.6045843441064153e-05, "loss": 0.7927, "step": 2492 }, { "epoch": 0.18343527247643138, "grad_norm": 1.0625, "learning_rate": 4.6042716938754726e-05, "loss": 0.9329, "step": 2493 }, { "epoch": 0.18350885260979535, "grad_norm": 0.796875, "learning_rate": 4.603958930712242e-05, "loss": 0.8862, "step": 2494 }, { "epoch": 0.18358243274315936, "grad_norm": 1.0546875, "learning_rate": 4.60364605463351e-05, "loss": 1.2713, "step": 2495 }, { "epoch": 0.18365601287652333, "grad_norm": 0.921875, "learning_rate": 4.603333065656068e-05, "loss": 1.0224, "step": 2496 }, { "epoch": 0.18372959300988734, "grad_norm": 0.84375, "learning_rate": 4.6030199637967126e-05, "loss": 1.1144, "step": 2497 }, { "epoch": 0.1838031731432513, "grad_norm": 0.73046875, "learning_rate": 4.6027067490722475e-05, "loss": 0.5707, "step": 2498 }, { "epoch": 0.18387675327661532, "grad_norm": 0.82421875, "learning_rate": 4.602393421499483e-05, "loss": 0.7865, "step": 2499 }, { "epoch": 0.1839503334099793, "grad_norm": 0.71484375, "learning_rate": 4.602079981095234e-05, "loss": 0.8931, "step": 2500 }, { "epoch": 0.1840239135433433, "grad_norm": 1.0234375, "learning_rate": 4.6017664278763225e-05, "loss": 1.0626, "step": 2501 }, { "epoch": 0.1840974936767073, "grad_norm": 0.828125, "learning_rate": 4.6014527618595776e-05, "loss": 0.8422, "step": 2502 }, { "epoch": 0.18417107381007128, "grad_norm": 0.89453125, "learning_rate": 4.601138983061831e-05, "loss": 0.9965, "step": 2503 }, { "epoch": 0.18424465394343528, "grad_norm": 1.2578125, "learning_rate": 4.600825091499924e-05, "loss": 1.3074, "step": 2504 }, { "epoch": 0.18431823407679926, "grad_norm": 0.91796875, "learning_rate": 4.6005110871907024e-05, "loss": 0.6125, "step": 2505 }, { "epoch": 0.18439181421016326, "grad_norm": 0.93359375, "learning_rate": 4.6001969701510186e-05, "loss": 1.3469, "step": 2506 }, { "epoch": 0.18446539434352724, "grad_norm": 1.09375, "learning_rate": 4.599882740397729e-05, "loss": 1.126, "step": 2507 }, { "epoch": 0.18453897447689124, "grad_norm": 0.98046875, "learning_rate": 4.5995683979476995e-05, "loss": 0.9961, "step": 2508 }, { "epoch": 0.18461255461025522, "grad_norm": 0.84375, "learning_rate": 4.599253942817799e-05, "loss": 0.789, "step": 2509 }, { "epoch": 0.18468613474361922, "grad_norm": 0.78125, "learning_rate": 4.598939375024905e-05, "loss": 0.7734, "step": 2510 }, { "epoch": 0.18475971487698323, "grad_norm": 1.1484375, "learning_rate": 4.598624694585899e-05, "loss": 1.2937, "step": 2511 }, { "epoch": 0.1848332950103472, "grad_norm": 0.83203125, "learning_rate": 4.598309901517669e-05, "loss": 0.9011, "step": 2512 }, { "epoch": 0.1849068751437112, "grad_norm": 1.1484375, "learning_rate": 4.59799499583711e-05, "loss": 1.3486, "step": 2513 }, { "epoch": 0.18498045527707518, "grad_norm": 1.0234375, "learning_rate": 4.597679977561122e-05, "loss": 0.8625, "step": 2514 }, { "epoch": 0.1850540354104392, "grad_norm": 0.8984375, "learning_rate": 4.597364846706612e-05, "loss": 0.6354, "step": 2515 }, { "epoch": 0.18512761554380316, "grad_norm": 0.73046875, "learning_rate": 4.597049603290491e-05, "loss": 0.649, "step": 2516 }, { "epoch": 0.18520119567716717, "grad_norm": 0.77734375, "learning_rate": 4.5967342473296794e-05, "loss": 0.984, "step": 2517 }, { "epoch": 0.18527477581053114, "grad_norm": 0.88671875, "learning_rate": 4.5964187788411004e-05, "loss": 0.7508, "step": 2518 }, { "epoch": 0.18534835594389515, "grad_norm": 0.71875, "learning_rate": 4.596103197841686e-05, "loss": 0.7298, "step": 2519 }, { "epoch": 0.18542193607725915, "grad_norm": 0.8828125, "learning_rate": 4.595787504348371e-05, "loss": 0.8501, "step": 2520 }, { "epoch": 0.18549551621062313, "grad_norm": 1.7421875, "learning_rate": 4.5954716983780995e-05, "loss": 1.4432, "step": 2521 }, { "epoch": 0.18556909634398713, "grad_norm": 0.921875, "learning_rate": 4.59515577994782e-05, "loss": 0.9085, "step": 2522 }, { "epoch": 0.1856426764773511, "grad_norm": 0.8359375, "learning_rate": 4.594839749074486e-05, "loss": 1.2709, "step": 2523 }, { "epoch": 0.1857162566107151, "grad_norm": 0.83984375, "learning_rate": 4.59452360577506e-05, "loss": 1.0379, "step": 2524 }, { "epoch": 0.1857898367440791, "grad_norm": 0.80078125, "learning_rate": 4.5942073500665076e-05, "loss": 1.0317, "step": 2525 }, { "epoch": 0.1858634168774431, "grad_norm": 0.9921875, "learning_rate": 4.593890981965803e-05, "loss": 0.8333, "step": 2526 }, { "epoch": 0.18593699701080707, "grad_norm": 0.82421875, "learning_rate": 4.593574501489923e-05, "loss": 1.1169, "step": 2527 }, { "epoch": 0.18601057714417107, "grad_norm": 0.87890625, "learning_rate": 4.5932579086558545e-05, "loss": 0.9457, "step": 2528 }, { "epoch": 0.18608415727753508, "grad_norm": 0.86328125, "learning_rate": 4.592941203480587e-05, "loss": 1.0812, "step": 2529 }, { "epoch": 0.18615773741089905, "grad_norm": 0.73046875, "learning_rate": 4.592624385981119e-05, "loss": 0.7941, "step": 2530 }, { "epoch": 0.18623131754426306, "grad_norm": 0.91796875, "learning_rate": 4.592307456174452e-05, "loss": 1.1419, "step": 2531 }, { "epoch": 0.18630489767762703, "grad_norm": 0.8359375, "learning_rate": 4.591990414077596e-05, "loss": 0.9335, "step": 2532 }, { "epoch": 0.18637847781099104, "grad_norm": 0.80078125, "learning_rate": 4.5916732597075653e-05, "loss": 0.694, "step": 2533 }, { "epoch": 0.18645205794435502, "grad_norm": 0.87890625, "learning_rate": 4.591355993081382e-05, "loss": 1.0429, "step": 2534 }, { "epoch": 0.18652563807771902, "grad_norm": 1.0546875, "learning_rate": 4.591038614216072e-05, "loss": 1.0098, "step": 2535 }, { "epoch": 0.186599218211083, "grad_norm": 0.84765625, "learning_rate": 4.590721123128669e-05, "loss": 0.8707, "step": 2536 }, { "epoch": 0.186672798344447, "grad_norm": 0.90234375, "learning_rate": 4.590403519836212e-05, "loss": 0.9409, "step": 2537 }, { "epoch": 0.186746378477811, "grad_norm": 0.9921875, "learning_rate": 4.590085804355747e-05, "loss": 1.2149, "step": 2538 }, { "epoch": 0.18681995861117498, "grad_norm": 0.87109375, "learning_rate": 4.5897679767043244e-05, "loss": 0.6591, "step": 2539 }, { "epoch": 0.18689353874453898, "grad_norm": 0.8984375, "learning_rate": 4.589450036899001e-05, "loss": 1.385, "step": 2540 }, { "epoch": 0.18696711887790296, "grad_norm": 1.015625, "learning_rate": 4.5891319849568406e-05, "loss": 1.0407, "step": 2541 }, { "epoch": 0.18704069901126696, "grad_norm": 0.87109375, "learning_rate": 4.5888138208949126e-05, "loss": 0.8642, "step": 2542 }, { "epoch": 0.18711427914463094, "grad_norm": 0.890625, "learning_rate": 4.5884955447302916e-05, "loss": 0.9204, "step": 2543 }, { "epoch": 0.18718785927799494, "grad_norm": 0.76171875, "learning_rate": 4.58817715648006e-05, "loss": 0.8527, "step": 2544 }, { "epoch": 0.18726143941135892, "grad_norm": 0.91015625, "learning_rate": 4.587858656161303e-05, "loss": 1.162, "step": 2545 }, { "epoch": 0.18733501954472292, "grad_norm": 1.0546875, "learning_rate": 4.587540043791117e-05, "loss": 1.3641, "step": 2546 }, { "epoch": 0.18740859967808693, "grad_norm": 0.75390625, "learning_rate": 4.587221319386599e-05, "loss": 0.6579, "step": 2547 }, { "epoch": 0.1874821798114509, "grad_norm": 1.0625, "learning_rate": 4.5869024829648555e-05, "loss": 1.2344, "step": 2548 }, { "epoch": 0.1875557599448149, "grad_norm": 0.734375, "learning_rate": 4.586583534542996e-05, "loss": 0.7285, "step": 2549 }, { "epoch": 0.18762934007817889, "grad_norm": 1.03125, "learning_rate": 4.586264474138141e-05, "loss": 1.0602, "step": 2550 }, { "epoch": 0.1877029202115429, "grad_norm": 0.8828125, "learning_rate": 4.58594530176741e-05, "loss": 0.9655, "step": 2551 }, { "epoch": 0.18777650034490687, "grad_norm": 0.79296875, "learning_rate": 4.585626017447936e-05, "loss": 0.7523, "step": 2552 }, { "epoch": 0.18785008047827087, "grad_norm": 1.0078125, "learning_rate": 4.585306621196853e-05, "loss": 1.1229, "step": 2553 }, { "epoch": 0.18792366061163485, "grad_norm": 1.3203125, "learning_rate": 4.584987113031301e-05, "loss": 1.4735, "step": 2554 }, { "epoch": 0.18799724074499885, "grad_norm": 0.87890625, "learning_rate": 4.58466749296843e-05, "loss": 0.7991, "step": 2555 }, { "epoch": 0.18807082087836285, "grad_norm": 0.89453125, "learning_rate": 4.5843477610253906e-05, "loss": 0.8562, "step": 2556 }, { "epoch": 0.18814440101172683, "grad_norm": 1.0078125, "learning_rate": 4.584027917219345e-05, "loss": 1.2277, "step": 2557 }, { "epoch": 0.18821798114509083, "grad_norm": 0.671875, "learning_rate": 4.583707961567456e-05, "loss": 0.7215, "step": 2558 }, { "epoch": 0.1882915612784548, "grad_norm": 0.82421875, "learning_rate": 4.5833878940868966e-05, "loss": 0.7097, "step": 2559 }, { "epoch": 0.18836514141181881, "grad_norm": 0.91015625, "learning_rate": 4.5830677147948445e-05, "loss": 0.9554, "step": 2560 }, { "epoch": 0.1884387215451828, "grad_norm": 0.67578125, "learning_rate": 4.5827474237084824e-05, "loss": 0.7611, "step": 2561 }, { "epoch": 0.1885123016785468, "grad_norm": 0.98046875, "learning_rate": 4.5824270208449996e-05, "loss": 1.4304, "step": 2562 }, { "epoch": 0.18858588181191077, "grad_norm": 0.76953125, "learning_rate": 4.582106506221591e-05, "loss": 0.7863, "step": 2563 }, { "epoch": 0.18865946194527478, "grad_norm": 0.859375, "learning_rate": 4.5817858798554594e-05, "loss": 0.9078, "step": 2564 }, { "epoch": 0.18873304207863878, "grad_norm": 1.1171875, "learning_rate": 4.581465141763812e-05, "loss": 1.2897, "step": 2565 }, { "epoch": 0.18880662221200276, "grad_norm": 0.79296875, "learning_rate": 4.5811442919638614e-05, "loss": 0.8086, "step": 2566 }, { "epoch": 0.18888020234536676, "grad_norm": 0.80859375, "learning_rate": 4.580823330472827e-05, "loss": 0.9277, "step": 2567 }, { "epoch": 0.18895378247873074, "grad_norm": 0.8125, "learning_rate": 4.580502257307935e-05, "loss": 0.6848, "step": 2568 }, { "epoch": 0.18902736261209474, "grad_norm": 0.921875, "learning_rate": 4.5801810724864165e-05, "loss": 0.9872, "step": 2569 }, { "epoch": 0.18910094274545872, "grad_norm": 0.8515625, "learning_rate": 4.5798597760255076e-05, "loss": 1.2729, "step": 2570 }, { "epoch": 0.18917452287882272, "grad_norm": 0.6796875, "learning_rate": 4.579538367942454e-05, "loss": 0.6431, "step": 2571 }, { "epoch": 0.1892481030121867, "grad_norm": 1.015625, "learning_rate": 4.579216848254504e-05, "loss": 1.1885, "step": 2572 }, { "epoch": 0.1893216831455507, "grad_norm": 0.8125, "learning_rate": 4.578895216978912e-05, "loss": 0.9254, "step": 2573 }, { "epoch": 0.1893952632789147, "grad_norm": 0.875, "learning_rate": 4.578573474132941e-05, "loss": 1.0611, "step": 2574 }, { "epoch": 0.18946884341227868, "grad_norm": 0.953125, "learning_rate": 4.578251619733858e-05, "loss": 0.9559, "step": 2575 }, { "epoch": 0.18954242354564269, "grad_norm": 0.9921875, "learning_rate": 4.5779296537989344e-05, "loss": 1.5196, "step": 2576 }, { "epoch": 0.18961600367900666, "grad_norm": 0.8984375, "learning_rate": 4.577607576345452e-05, "loss": 1.1808, "step": 2577 }, { "epoch": 0.18968958381237067, "grad_norm": 0.8671875, "learning_rate": 4.577285387390694e-05, "loss": 0.8348, "step": 2578 }, { "epoch": 0.18976316394573464, "grad_norm": 1.203125, "learning_rate": 4.576963086951953e-05, "loss": 1.0998, "step": 2579 }, { "epoch": 0.18983674407909865, "grad_norm": 0.8359375, "learning_rate": 4.576640675046526e-05, "loss": 1.0317, "step": 2580 }, { "epoch": 0.18991032421246262, "grad_norm": 0.703125, "learning_rate": 4.576318151691716e-05, "loss": 0.882, "step": 2581 }, { "epoch": 0.18998390434582663, "grad_norm": 0.86328125, "learning_rate": 4.5759955169048334e-05, "loss": 0.9327, "step": 2582 }, { "epoch": 0.19005748447919063, "grad_norm": 0.90625, "learning_rate": 4.575672770703192e-05, "loss": 1.2367, "step": 2583 }, { "epoch": 0.1901310646125546, "grad_norm": 0.87890625, "learning_rate": 4.575349913104113e-05, "loss": 0.913, "step": 2584 }, { "epoch": 0.1902046447459186, "grad_norm": 1.0390625, "learning_rate": 4.575026944124924e-05, "loss": 0.8268, "step": 2585 }, { "epoch": 0.1902782248792826, "grad_norm": 1.1015625, "learning_rate": 4.5747038637829584e-05, "loss": 1.0053, "step": 2586 }, { "epoch": 0.1903518050126466, "grad_norm": 0.8359375, "learning_rate": 4.5743806720955546e-05, "loss": 1.0264, "step": 2587 }, { "epoch": 0.19042538514601057, "grad_norm": 0.9140625, "learning_rate": 4.574057369080058e-05, "loss": 0.8716, "step": 2588 }, { "epoch": 0.19049896527937457, "grad_norm": 0.81640625, "learning_rate": 4.5737339547538204e-05, "loss": 0.9254, "step": 2589 }, { "epoch": 0.19057254541273855, "grad_norm": 0.953125, "learning_rate": 4.5734104291341974e-05, "loss": 0.8532, "step": 2590 }, { "epoch": 0.19064612554610255, "grad_norm": 0.84765625, "learning_rate": 4.5730867922385536e-05, "loss": 1.2998, "step": 2591 }, { "epoch": 0.19071970567946656, "grad_norm": 0.859375, "learning_rate": 4.572763044084256e-05, "loss": 0.8121, "step": 2592 }, { "epoch": 0.19079328581283053, "grad_norm": 0.71875, "learning_rate": 4.572439184688682e-05, "loss": 0.9667, "step": 2593 }, { "epoch": 0.19086686594619454, "grad_norm": 0.921875, "learning_rate": 4.5721152140692105e-05, "loss": 1.044, "step": 2594 }, { "epoch": 0.1909404460795585, "grad_norm": 0.9375, "learning_rate": 4.5717911322432295e-05, "loss": 0.8499, "step": 2595 }, { "epoch": 0.19101402621292252, "grad_norm": 0.84765625, "learning_rate": 4.571466939228131e-05, "loss": 1.106, "step": 2596 }, { "epoch": 0.1910876063462865, "grad_norm": 0.89453125, "learning_rate": 4.571142635041314e-05, "loss": 0.5618, "step": 2597 }, { "epoch": 0.1911611864796505, "grad_norm": 0.93359375, "learning_rate": 4.570818219700185e-05, "loss": 0.9936, "step": 2598 }, { "epoch": 0.19123476661301447, "grad_norm": 0.671875, "learning_rate": 4.570493693222152e-05, "loss": 0.603, "step": 2599 }, { "epoch": 0.19130834674637848, "grad_norm": 1.03125, "learning_rate": 4.570169055624634e-05, "loss": 0.8996, "step": 2600 }, { "epoch": 0.19138192687974248, "grad_norm": 0.89453125, "learning_rate": 4.569844306925052e-05, "loss": 1.1775, "step": 2601 }, { "epoch": 0.19145550701310646, "grad_norm": 0.98828125, "learning_rate": 4.5695194471408366e-05, "loss": 0.8291, "step": 2602 }, { "epoch": 0.19152908714647046, "grad_norm": 0.9921875, "learning_rate": 4.56919447628942e-05, "loss": 0.9419, "step": 2603 }, { "epoch": 0.19160266727983444, "grad_norm": 0.875, "learning_rate": 4.5688693943882446e-05, "loss": 1.0143, "step": 2604 }, { "epoch": 0.19167624741319844, "grad_norm": 1.03125, "learning_rate": 4.5685442014547563e-05, "loss": 1.0611, "step": 2605 }, { "epoch": 0.19174982754656242, "grad_norm": 1.1484375, "learning_rate": 4.568218897506408e-05, "loss": 0.9734, "step": 2606 }, { "epoch": 0.19182340767992642, "grad_norm": 0.76953125, "learning_rate": 4.567893482560657e-05, "loss": 0.8655, "step": 2607 }, { "epoch": 0.1918969878132904, "grad_norm": 0.94140625, "learning_rate": 4.567567956634969e-05, "loss": 1.0493, "step": 2608 }, { "epoch": 0.1919705679466544, "grad_norm": 1.046875, "learning_rate": 4.567242319746814e-05, "loss": 1.328, "step": 2609 }, { "epoch": 0.1920441480800184, "grad_norm": 0.9453125, "learning_rate": 4.5669165719136675e-05, "loss": 1.341, "step": 2610 }, { "epoch": 0.19211772821338238, "grad_norm": 1.0859375, "learning_rate": 4.566590713153013e-05, "loss": 0.9836, "step": 2611 }, { "epoch": 0.1921913083467464, "grad_norm": 1.3203125, "learning_rate": 4.566264743482338e-05, "loss": 0.8758, "step": 2612 }, { "epoch": 0.19226488848011036, "grad_norm": 0.89453125, "learning_rate": 4.565938662919137e-05, "loss": 0.8905, "step": 2613 }, { "epoch": 0.19233846861347437, "grad_norm": 0.86328125, "learning_rate": 4.5656124714809096e-05, "loss": 0.7487, "step": 2614 }, { "epoch": 0.19241204874683834, "grad_norm": 0.8125, "learning_rate": 4.5652861691851624e-05, "loss": 0.6465, "step": 2615 }, { "epoch": 0.19248562888020235, "grad_norm": 1.15625, "learning_rate": 4.564959756049407e-05, "loss": 1.4017, "step": 2616 }, { "epoch": 0.19255920901356632, "grad_norm": 0.88671875, "learning_rate": 4.564633232091162e-05, "loss": 0.9095, "step": 2617 }, { "epoch": 0.19263278914693033, "grad_norm": 0.9140625, "learning_rate": 4.564306597327951e-05, "loss": 0.6735, "step": 2618 }, { "epoch": 0.19270636928029433, "grad_norm": 0.90625, "learning_rate": 4.563979851777304e-05, "loss": 1.0966, "step": 2619 }, { "epoch": 0.1927799494136583, "grad_norm": 0.99609375, "learning_rate": 4.563652995456756e-05, "loss": 0.9686, "step": 2620 }, { "epoch": 0.1928535295470223, "grad_norm": 0.92578125, "learning_rate": 4.5633260283838504e-05, "loss": 0.862, "step": 2621 }, { "epoch": 0.1929271096803863, "grad_norm": 0.80859375, "learning_rate": 4.5629989505761325e-05, "loss": 1.1745, "step": 2622 }, { "epoch": 0.1930006898137503, "grad_norm": 0.8984375, "learning_rate": 4.562671762051159e-05, "loss": 0.8492, "step": 2623 }, { "epoch": 0.19307426994711427, "grad_norm": 0.7890625, "learning_rate": 4.5623444628264864e-05, "loss": 1.0222, "step": 2624 }, { "epoch": 0.19314785008047827, "grad_norm": 0.796875, "learning_rate": 4.562017052919683e-05, "loss": 1.0419, "step": 2625 }, { "epoch": 0.19322143021384225, "grad_norm": 1.0078125, "learning_rate": 4.5616895323483184e-05, "loss": 1.004, "step": 2626 }, { "epoch": 0.19329501034720625, "grad_norm": 0.9375, "learning_rate": 4.56136190112997e-05, "loss": 0.8283, "step": 2627 }, { "epoch": 0.19336859048057026, "grad_norm": 0.796875, "learning_rate": 4.5610341592822224e-05, "loss": 0.984, "step": 2628 }, { "epoch": 0.19344217061393423, "grad_norm": 0.81640625, "learning_rate": 4.560706306822664e-05, "loss": 1.3144, "step": 2629 }, { "epoch": 0.19351575074729824, "grad_norm": 0.9765625, "learning_rate": 4.5603783437688906e-05, "loss": 1.0334, "step": 2630 }, { "epoch": 0.1935893308806622, "grad_norm": 0.625, "learning_rate": 4.5600502701385026e-05, "loss": 0.6328, "step": 2631 }, { "epoch": 0.19366291101402622, "grad_norm": 0.80078125, "learning_rate": 4.559722085949107e-05, "loss": 0.8362, "step": 2632 }, { "epoch": 0.1937364911473902, "grad_norm": 0.71484375, "learning_rate": 4.559393791218318e-05, "loss": 0.7109, "step": 2633 }, { "epoch": 0.1938100712807542, "grad_norm": 0.8515625, "learning_rate": 4.5590653859637545e-05, "loss": 0.7784, "step": 2634 }, { "epoch": 0.19388365141411817, "grad_norm": 1.0625, "learning_rate": 4.5587368702030396e-05, "loss": 1.3132, "step": 2635 }, { "epoch": 0.19395723154748218, "grad_norm": 0.84375, "learning_rate": 4.5584082439538055e-05, "loss": 0.6926, "step": 2636 }, { "epoch": 0.19403081168084618, "grad_norm": 0.953125, "learning_rate": 4.5580795072336894e-05, "loss": 0.7574, "step": 2637 }, { "epoch": 0.19410439181421016, "grad_norm": 0.78125, "learning_rate": 4.5577506600603326e-05, "loss": 1.2969, "step": 2638 }, { "epoch": 0.19417797194757416, "grad_norm": 0.76953125, "learning_rate": 4.5574217024513846e-05, "loss": 0.6515, "step": 2639 }, { "epoch": 0.19425155208093814, "grad_norm": 0.99609375, "learning_rate": 4.5570926344245003e-05, "loss": 0.9621, "step": 2640 }, { "epoch": 0.19432513221430214, "grad_norm": 0.7578125, "learning_rate": 4.5567634559973394e-05, "loss": 0.7323, "step": 2641 }, { "epoch": 0.19439871234766612, "grad_norm": 0.953125, "learning_rate": 4.5564341671875674e-05, "loss": 1.1237, "step": 2642 }, { "epoch": 0.19447229248103012, "grad_norm": 1.0625, "learning_rate": 4.556104768012859e-05, "loss": 1.3252, "step": 2643 }, { "epoch": 0.1945458726143941, "grad_norm": 1.125, "learning_rate": 4.5557752584908906e-05, "loss": 1.3102, "step": 2644 }, { "epoch": 0.1946194527477581, "grad_norm": 0.9453125, "learning_rate": 4.555445638639347e-05, "loss": 0.8272, "step": 2645 }, { "epoch": 0.1946930328811221, "grad_norm": 0.74609375, "learning_rate": 4.555115908475918e-05, "loss": 0.8254, "step": 2646 }, { "epoch": 0.19476661301448608, "grad_norm": 0.8046875, "learning_rate": 4.5547860680183e-05, "loss": 0.9639, "step": 2647 }, { "epoch": 0.1948401931478501, "grad_norm": 2.0, "learning_rate": 4.5544561172841936e-05, "loss": 0.7421, "step": 2648 }, { "epoch": 0.19491377328121406, "grad_norm": 0.984375, "learning_rate": 4.5541260562913093e-05, "loss": 0.7926, "step": 2649 }, { "epoch": 0.19498735341457807, "grad_norm": 0.73828125, "learning_rate": 4.553795885057358e-05, "loss": 0.8785, "step": 2650 }, { "epoch": 0.19506093354794204, "grad_norm": 1.0703125, "learning_rate": 4.553465603600062e-05, "loss": 1.2337, "step": 2651 }, { "epoch": 0.19513451368130605, "grad_norm": 0.87109375, "learning_rate": 4.553135211937144e-05, "loss": 0.7467, "step": 2652 }, { "epoch": 0.19520809381467005, "grad_norm": 2.625, "learning_rate": 4.552804710086338e-05, "loss": 1.179, "step": 2653 }, { "epoch": 0.19528167394803403, "grad_norm": 0.73046875, "learning_rate": 4.55247409806538e-05, "loss": 0.5965, "step": 2654 }, { "epoch": 0.19535525408139803, "grad_norm": 0.79296875, "learning_rate": 4.552143375892014e-05, "loss": 1.0021, "step": 2655 }, { "epoch": 0.195428834214762, "grad_norm": 1.0234375, "learning_rate": 4.5518125435839884e-05, "loss": 0.9996, "step": 2656 }, { "epoch": 0.195502414348126, "grad_norm": 0.84765625, "learning_rate": 4.551481601159059e-05, "loss": 0.8108, "step": 2657 }, { "epoch": 0.19557599448149, "grad_norm": 0.85546875, "learning_rate": 4.551150548634987e-05, "loss": 1.0242, "step": 2658 }, { "epoch": 0.195649574614854, "grad_norm": 0.92578125, "learning_rate": 4.5508193860295396e-05, "loss": 0.9833, "step": 2659 }, { "epoch": 0.19572315474821797, "grad_norm": 1.0625, "learning_rate": 4.5504881133604885e-05, "loss": 1.6644, "step": 2660 }, { "epoch": 0.19579673488158197, "grad_norm": 1.0390625, "learning_rate": 4.550156730645614e-05, "loss": 1.1838, "step": 2661 }, { "epoch": 0.19587031501494598, "grad_norm": 0.9921875, "learning_rate": 4.549825237902699e-05, "loss": 1.1663, "step": 2662 }, { "epoch": 0.19594389514830995, "grad_norm": 1.0, "learning_rate": 4.549493635149535e-05, "loss": 1.432, "step": 2663 }, { "epoch": 0.19601747528167396, "grad_norm": 0.73828125, "learning_rate": 4.549161922403919e-05, "loss": 1.0644, "step": 2664 }, { "epoch": 0.19609105541503793, "grad_norm": 0.80078125, "learning_rate": 4.5488300996836534e-05, "loss": 0.8674, "step": 2665 }, { "epoch": 0.19616463554840194, "grad_norm": 1.109375, "learning_rate": 4.5484981670065454e-05, "loss": 1.1175, "step": 2666 }, { "epoch": 0.19623821568176592, "grad_norm": 1.3984375, "learning_rate": 4.548166124390411e-05, "loss": 0.6985, "step": 2667 }, { "epoch": 0.19631179581512992, "grad_norm": 0.859375, "learning_rate": 4.547833971853067e-05, "loss": 1.034, "step": 2668 }, { "epoch": 0.1963853759484939, "grad_norm": 0.796875, "learning_rate": 4.547501709412343e-05, "loss": 0.8328, "step": 2669 }, { "epoch": 0.1964589560818579, "grad_norm": 0.9296875, "learning_rate": 4.547169337086069e-05, "loss": 1.0259, "step": 2670 }, { "epoch": 0.1965325362152219, "grad_norm": 1.03125, "learning_rate": 4.546836854892084e-05, "loss": 0.8838, "step": 2671 }, { "epoch": 0.19660611634858588, "grad_norm": 0.734375, "learning_rate": 4.54650426284823e-05, "loss": 0.8251, "step": 2672 }, { "epoch": 0.19667969648194988, "grad_norm": 0.6796875, "learning_rate": 4.5461715609723575e-05, "loss": 0.7196, "step": 2673 }, { "epoch": 0.19675327661531386, "grad_norm": 0.81640625, "learning_rate": 4.5458387492823226e-05, "loss": 1.1062, "step": 2674 }, { "epoch": 0.19682685674867786, "grad_norm": 0.7578125, "learning_rate": 4.545505827795986e-05, "loss": 0.8082, "step": 2675 }, { "epoch": 0.19690043688204184, "grad_norm": 0.74609375, "learning_rate": 4.545172796531214e-05, "loss": 0.6577, "step": 2676 }, { "epoch": 0.19697401701540584, "grad_norm": 0.84375, "learning_rate": 4.544839655505882e-05, "loss": 0.8486, "step": 2677 }, { "epoch": 0.19704759714876982, "grad_norm": 1.328125, "learning_rate": 4.544506404737868e-05, "loss": 1.3335, "step": 2678 }, { "epoch": 0.19712117728213382, "grad_norm": 1.2109375, "learning_rate": 4.544173044245056e-05, "loss": 1.4339, "step": 2679 }, { "epoch": 0.19719475741549783, "grad_norm": 0.9453125, "learning_rate": 4.5438395740453375e-05, "loss": 0.8955, "step": 2680 }, { "epoch": 0.1972683375488618, "grad_norm": 0.6796875, "learning_rate": 4.54350599415661e-05, "loss": 0.7659, "step": 2681 }, { "epoch": 0.1973419176822258, "grad_norm": 0.94921875, "learning_rate": 4.543172304596774e-05, "loss": 0.847, "step": 2682 }, { "epoch": 0.19741549781558979, "grad_norm": 0.98046875, "learning_rate": 4.542838505383741e-05, "loss": 1.1848, "step": 2683 }, { "epoch": 0.1974890779489538, "grad_norm": 0.83984375, "learning_rate": 4.542504596535424e-05, "loss": 0.8251, "step": 2684 }, { "epoch": 0.19756265808231777, "grad_norm": 0.8515625, "learning_rate": 4.542170578069742e-05, "loss": 0.772, "step": 2685 }, { "epoch": 0.19763623821568177, "grad_norm": 0.8515625, "learning_rate": 4.5418364500046226e-05, "loss": 0.7522, "step": 2686 }, { "epoch": 0.19770981834904575, "grad_norm": 0.96484375, "learning_rate": 4.541502212357998e-05, "loss": 0.8216, "step": 2687 }, { "epoch": 0.19778339848240975, "grad_norm": 0.90625, "learning_rate": 4.541167865147804e-05, "loss": 0.805, "step": 2688 }, { "epoch": 0.19785697861577375, "grad_norm": 0.8359375, "learning_rate": 4.540833408391987e-05, "loss": 0.8098, "step": 2689 }, { "epoch": 0.19793055874913773, "grad_norm": 0.84765625, "learning_rate": 4.540498842108495e-05, "loss": 0.7311, "step": 2690 }, { "epoch": 0.19800413888250173, "grad_norm": 1.4453125, "learning_rate": 4.540164166315284e-05, "loss": 0.8703, "step": 2691 }, { "epoch": 0.1980777190158657, "grad_norm": 0.88671875, "learning_rate": 4.539829381030316e-05, "loss": 1.0887, "step": 2692 }, { "epoch": 0.19815129914922971, "grad_norm": 1.15625, "learning_rate": 4.539494486271558e-05, "loss": 0.9607, "step": 2693 }, { "epoch": 0.1982248792825937, "grad_norm": 0.90234375, "learning_rate": 4.5391594820569827e-05, "loss": 0.8763, "step": 2694 }, { "epoch": 0.1982984594159577, "grad_norm": 1.0546875, "learning_rate": 4.538824368404569e-05, "loss": 1.3082, "step": 2695 }, { "epoch": 0.19837203954932167, "grad_norm": 0.8046875, "learning_rate": 4.538489145332303e-05, "loss": 0.8263, "step": 2696 }, { "epoch": 0.19844561968268568, "grad_norm": 0.91015625, "learning_rate": 4.5381538128581746e-05, "loss": 0.9347, "step": 2697 }, { "epoch": 0.19851919981604968, "grad_norm": 0.84375, "learning_rate": 4.5378183710001806e-05, "loss": 0.7509, "step": 2698 }, { "epoch": 0.19859277994941366, "grad_norm": 0.9921875, "learning_rate": 4.537482819776323e-05, "loss": 1.0527, "step": 2699 }, { "epoch": 0.19866636008277766, "grad_norm": 0.828125, "learning_rate": 4.537147159204611e-05, "loss": 0.7255, "step": 2700 }, { "epoch": 0.19873994021614164, "grad_norm": 0.9453125, "learning_rate": 4.536811389303058e-05, "loss": 0.7585, "step": 2701 }, { "epoch": 0.19881352034950564, "grad_norm": 0.96875, "learning_rate": 4.5364755100896864e-05, "loss": 0.7841, "step": 2702 }, { "epoch": 0.19888710048286962, "grad_norm": 0.8671875, "learning_rate": 4.536139521582519e-05, "loss": 0.8066, "step": 2703 }, { "epoch": 0.19896068061623362, "grad_norm": 1.0703125, "learning_rate": 4.53580342379959e-05, "loss": 1.1482, "step": 2704 }, { "epoch": 0.1990342607495976, "grad_norm": 0.796875, "learning_rate": 4.535467216758936e-05, "loss": 0.6796, "step": 2705 }, { "epoch": 0.1991078408829616, "grad_norm": 0.8046875, "learning_rate": 4.5351309004786e-05, "loss": 0.7754, "step": 2706 }, { "epoch": 0.1991814210163256, "grad_norm": 0.9140625, "learning_rate": 4.534794474976634e-05, "loss": 1.2938, "step": 2707 }, { "epoch": 0.19925500114968958, "grad_norm": 1.015625, "learning_rate": 4.534457940271091e-05, "loss": 1.0347, "step": 2708 }, { "epoch": 0.19932858128305359, "grad_norm": 0.6640625, "learning_rate": 4.534121296380033e-05, "loss": 0.6704, "step": 2709 }, { "epoch": 0.19940216141641756, "grad_norm": 0.90625, "learning_rate": 4.5337845433215266e-05, "loss": 0.8379, "step": 2710 }, { "epoch": 0.19947574154978157, "grad_norm": 0.85546875, "learning_rate": 4.5334476811136455e-05, "loss": 0.8135, "step": 2711 }, { "epoch": 0.19954932168314554, "grad_norm": 0.97265625, "learning_rate": 4.533110709774468e-05, "loss": 1.1644, "step": 2712 }, { "epoch": 0.19962290181650955, "grad_norm": 0.8828125, "learning_rate": 4.532773629322078e-05, "loss": 0.8303, "step": 2713 }, { "epoch": 0.19969648194987352, "grad_norm": 0.9453125, "learning_rate": 4.532436439774567e-05, "loss": 0.8286, "step": 2714 }, { "epoch": 0.19977006208323753, "grad_norm": 0.85546875, "learning_rate": 4.532099141150031e-05, "loss": 0.8525, "step": 2715 }, { "epoch": 0.19984364221660153, "grad_norm": 0.8046875, "learning_rate": 4.531761733466573e-05, "loss": 0.8361, "step": 2716 }, { "epoch": 0.1999172223499655, "grad_norm": 0.82421875, "learning_rate": 4.5314242167422996e-05, "loss": 0.7961, "step": 2717 }, { "epoch": 0.1999908024833295, "grad_norm": 1.0625, "learning_rate": 4.531086590995325e-05, "loss": 1.0901, "step": 2718 }, { "epoch": 0.2000643826166935, "grad_norm": 0.85546875, "learning_rate": 4.53074885624377e-05, "loss": 0.9963, "step": 2719 }, { "epoch": 0.2001379627500575, "grad_norm": 0.8984375, "learning_rate": 4.5304110125057584e-05, "loss": 1.4383, "step": 2720 }, { "epoch": 0.20021154288342147, "grad_norm": 0.91015625, "learning_rate": 4.5300730597994236e-05, "loss": 0.8235, "step": 2721 }, { "epoch": 0.20028512301678547, "grad_norm": 0.6875, "learning_rate": 4.5297349981429014e-05, "loss": 0.6616, "step": 2722 }, { "epoch": 0.20035870315014945, "grad_norm": 1.03125, "learning_rate": 4.529396827554335e-05, "loss": 1.1743, "step": 2723 }, { "epoch": 0.20043228328351345, "grad_norm": 0.95703125, "learning_rate": 4.529058548051875e-05, "loss": 1.0245, "step": 2724 }, { "epoch": 0.20050586341687746, "grad_norm": 1.28125, "learning_rate": 4.528720159653674e-05, "loss": 1.3024, "step": 2725 }, { "epoch": 0.20057944355024143, "grad_norm": 0.6953125, "learning_rate": 4.528381662377895e-05, "loss": 0.6621, "step": 2726 }, { "epoch": 0.20065302368360544, "grad_norm": 0.85546875, "learning_rate": 4.528043056242702e-05, "loss": 1.2441, "step": 2727 }, { "epoch": 0.2007266038169694, "grad_norm": 0.83203125, "learning_rate": 4.527704341266269e-05, "loss": 1.2101, "step": 2728 }, { "epoch": 0.20080018395033342, "grad_norm": 0.90234375, "learning_rate": 4.5273655174667745e-05, "loss": 0.9185, "step": 2729 }, { "epoch": 0.2008737640836974, "grad_norm": 0.7578125, "learning_rate": 4.5270265848624004e-05, "loss": 0.8128, "step": 2730 }, { "epoch": 0.2009473442170614, "grad_norm": 0.91015625, "learning_rate": 4.52668754347134e-05, "loss": 1.1705, "step": 2731 }, { "epoch": 0.20102092435042537, "grad_norm": 0.72265625, "learning_rate": 4.526348393311786e-05, "loss": 0.8057, "step": 2732 }, { "epoch": 0.20109450448378938, "grad_norm": 0.92578125, "learning_rate": 4.52600913440194e-05, "loss": 1.1357, "step": 2733 }, { "epoch": 0.20116808461715338, "grad_norm": 0.75, "learning_rate": 4.525669766760011e-05, "loss": 0.6603, "step": 2734 }, { "epoch": 0.20124166475051736, "grad_norm": 0.80859375, "learning_rate": 4.525330290404212e-05, "loss": 0.6845, "step": 2735 }, { "epoch": 0.20131524488388136, "grad_norm": 1.2265625, "learning_rate": 4.524990705352761e-05, "loss": 1.2872, "step": 2736 }, { "epoch": 0.20138882501724534, "grad_norm": 0.94921875, "learning_rate": 4.524651011623884e-05, "loss": 0.9801, "step": 2737 }, { "epoch": 0.20146240515060934, "grad_norm": 0.9296875, "learning_rate": 4.524311209235811e-05, "loss": 0.8503, "step": 2738 }, { "epoch": 0.20153598528397332, "grad_norm": 0.81640625, "learning_rate": 4.523971298206779e-05, "loss": 0.925, "step": 2739 }, { "epoch": 0.20160956541733732, "grad_norm": 0.99609375, "learning_rate": 4.52363127855503e-05, "loss": 1.5058, "step": 2740 }, { "epoch": 0.2016831455507013, "grad_norm": 0.75390625, "learning_rate": 4.523291150298813e-05, "loss": 0.7355, "step": 2741 }, { "epoch": 0.2017567256840653, "grad_norm": 0.91796875, "learning_rate": 4.522950913456381e-05, "loss": 0.9563, "step": 2742 }, { "epoch": 0.2018303058174293, "grad_norm": 0.84765625, "learning_rate": 4.522610568045994e-05, "loss": 0.7932, "step": 2743 }, { "epoch": 0.20190388595079328, "grad_norm": 0.8203125, "learning_rate": 4.522270114085917e-05, "loss": 0.8767, "step": 2744 }, { "epoch": 0.2019774660841573, "grad_norm": 1.0859375, "learning_rate": 4.5219295515944244e-05, "loss": 0.9709, "step": 2745 }, { "epoch": 0.20205104621752126, "grad_norm": 0.890625, "learning_rate": 4.5215888805897906e-05, "loss": 0.8323, "step": 2746 }, { "epoch": 0.20212462635088527, "grad_norm": 0.92578125, "learning_rate": 4.5212481010903e-05, "loss": 0.9393, "step": 2747 }, { "epoch": 0.20219820648424924, "grad_norm": 0.90625, "learning_rate": 4.5209072131142415e-05, "loss": 1.0489, "step": 2748 }, { "epoch": 0.20227178661761325, "grad_norm": 0.84375, "learning_rate": 4.52056621667991e-05, "loss": 0.7525, "step": 2749 }, { "epoch": 0.20234536675097722, "grad_norm": 0.84765625, "learning_rate": 4.5202251118056056e-05, "loss": 1.0862, "step": 2750 }, { "epoch": 0.20241894688434123, "grad_norm": 1.2109375, "learning_rate": 4.519883898509635e-05, "loss": 1.1756, "step": 2751 }, { "epoch": 0.20249252701770523, "grad_norm": 0.89453125, "learning_rate": 4.519542576810311e-05, "loss": 0.9582, "step": 2752 }, { "epoch": 0.2025661071510692, "grad_norm": 1.0625, "learning_rate": 4.519201146725951e-05, "loss": 1.0155, "step": 2753 }, { "epoch": 0.2026396872844332, "grad_norm": 1.3046875, "learning_rate": 4.518859608274879e-05, "loss": 1.1281, "step": 2754 }, { "epoch": 0.2027132674177972, "grad_norm": 1.0859375, "learning_rate": 4.518517961475426e-05, "loss": 1.1438, "step": 2755 }, { "epoch": 0.2027868475511612, "grad_norm": 1.171875, "learning_rate": 4.518176206345925e-05, "loss": 1.2908, "step": 2756 }, { "epoch": 0.20286042768452517, "grad_norm": 1.0390625, "learning_rate": 4.51783434290472e-05, "loss": 0.9362, "step": 2757 }, { "epoch": 0.20293400781788917, "grad_norm": 0.91796875, "learning_rate": 4.517492371170156e-05, "loss": 1.2026, "step": 2758 }, { "epoch": 0.20300758795125315, "grad_norm": 0.71875, "learning_rate": 4.517150291160588e-05, "loss": 0.8406, "step": 2759 }, { "epoch": 0.20308116808461715, "grad_norm": 0.78125, "learning_rate": 4.5168081028943726e-05, "loss": 0.7653, "step": 2760 }, { "epoch": 0.20315474821798116, "grad_norm": 0.77734375, "learning_rate": 4.516465806389876e-05, "loss": 0.8063, "step": 2761 }, { "epoch": 0.20322832835134513, "grad_norm": 0.90234375, "learning_rate": 4.5161234016654684e-05, "loss": 0.928, "step": 2762 }, { "epoch": 0.20330190848470914, "grad_norm": 1.0, "learning_rate": 4.515780888739525e-05, "loss": 0.7376, "step": 2763 }, { "epoch": 0.2033754886180731, "grad_norm": 1.3125, "learning_rate": 4.5154382676304295e-05, "loss": 1.3377, "step": 2764 }, { "epoch": 0.20344906875143712, "grad_norm": 0.7890625, "learning_rate": 4.515095538356568e-05, "loss": 1.0057, "step": 2765 }, { "epoch": 0.2035226488848011, "grad_norm": 0.9609375, "learning_rate": 4.5147527009363354e-05, "loss": 0.9021, "step": 2766 }, { "epoch": 0.2035962290181651, "grad_norm": 0.79296875, "learning_rate": 4.51440975538813e-05, "loss": 0.7933, "step": 2767 }, { "epoch": 0.20366980915152907, "grad_norm": 0.90234375, "learning_rate": 4.51406670173036e-05, "loss": 0.995, "step": 2768 }, { "epoch": 0.20374338928489308, "grad_norm": 0.82421875, "learning_rate": 4.513723539981432e-05, "loss": 1.0368, "step": 2769 }, { "epoch": 0.20381696941825708, "grad_norm": 0.79296875, "learning_rate": 4.513380270159765e-05, "loss": 0.7137, "step": 2770 }, { "epoch": 0.20389054955162106, "grad_norm": 0.98828125, "learning_rate": 4.513036892283782e-05, "loss": 0.912, "step": 2771 }, { "epoch": 0.20396412968498506, "grad_norm": 1.015625, "learning_rate": 4.5126934063719113e-05, "loss": 1.8423, "step": 2772 }, { "epoch": 0.20403770981834904, "grad_norm": 0.92578125, "learning_rate": 4.5123498124425864e-05, "loss": 0.9329, "step": 2773 }, { "epoch": 0.20411128995171304, "grad_norm": 0.85546875, "learning_rate": 4.5120061105142486e-05, "loss": 0.9621, "step": 2774 }, { "epoch": 0.20418487008507702, "grad_norm": 0.99609375, "learning_rate": 4.511662300605343e-05, "loss": 1.0549, "step": 2775 }, { "epoch": 0.20425845021844102, "grad_norm": 1.1015625, "learning_rate": 4.5113183827343206e-05, "loss": 1.184, "step": 2776 }, { "epoch": 0.204332030351805, "grad_norm": 0.93359375, "learning_rate": 4.510974356919639e-05, "loss": 1.4337, "step": 2777 }, { "epoch": 0.204405610485169, "grad_norm": 0.8984375, "learning_rate": 4.5106302231797624e-05, "loss": 1.0132, "step": 2778 }, { "epoch": 0.204479190618533, "grad_norm": 0.765625, "learning_rate": 4.51028598153316e-05, "loss": 0.9231, "step": 2779 }, { "epoch": 0.20455277075189698, "grad_norm": 0.859375, "learning_rate": 4.509941631998305e-05, "loss": 0.8495, "step": 2780 }, { "epoch": 0.204626350885261, "grad_norm": 0.9609375, "learning_rate": 4.509597174593679e-05, "loss": 0.8408, "step": 2781 }, { "epoch": 0.20469993101862496, "grad_norm": 0.66015625, "learning_rate": 4.5092526093377685e-05, "loss": 0.7082, "step": 2782 }, { "epoch": 0.20477351115198897, "grad_norm": 0.93359375, "learning_rate": 4.508907936249065e-05, "loss": 1.2779, "step": 2783 }, { "epoch": 0.20484709128535294, "grad_norm": 0.76953125, "learning_rate": 4.508563155346067e-05, "loss": 0.6749, "step": 2784 }, { "epoch": 0.20492067141871695, "grad_norm": 1.0078125, "learning_rate": 4.508218266647278e-05, "loss": 1.1346, "step": 2785 }, { "epoch": 0.20499425155208092, "grad_norm": 1.109375, "learning_rate": 4.507873270171208e-05, "loss": 1.1635, "step": 2786 }, { "epoch": 0.20506783168544493, "grad_norm": 1.1171875, "learning_rate": 4.507528165936372e-05, "loss": 1.3338, "step": 2787 }, { "epoch": 0.20514141181880893, "grad_norm": 0.8515625, "learning_rate": 4.50718295396129e-05, "loss": 1.0394, "step": 2788 }, { "epoch": 0.2052149919521729, "grad_norm": 0.83984375, "learning_rate": 4.506837634264492e-05, "loss": 1.0295, "step": 2789 }, { "epoch": 0.2052885720855369, "grad_norm": 1.1015625, "learning_rate": 4.5064922068645064e-05, "loss": 1.2822, "step": 2790 }, { "epoch": 0.2053621522189009, "grad_norm": 0.765625, "learning_rate": 4.506146671779874e-05, "loss": 0.6555, "step": 2791 }, { "epoch": 0.2054357323522649, "grad_norm": 0.6953125, "learning_rate": 4.50580102902914e-05, "loss": 0.721, "step": 2792 }, { "epoch": 0.20550931248562887, "grad_norm": 0.89453125, "learning_rate": 4.505455278630852e-05, "loss": 0.8942, "step": 2793 }, { "epoch": 0.20558289261899287, "grad_norm": 1.03125, "learning_rate": 4.505109420603568e-05, "loss": 0.8244, "step": 2794 }, { "epoch": 0.20565647275235685, "grad_norm": 0.8984375, "learning_rate": 4.5047634549658475e-05, "loss": 0.8666, "step": 2795 }, { "epoch": 0.20573005288572085, "grad_norm": 2.109375, "learning_rate": 4.5044173817362594e-05, "loss": 1.1106, "step": 2796 }, { "epoch": 0.20580363301908486, "grad_norm": 0.7421875, "learning_rate": 4.5040712009333764e-05, "loss": 0.9823, "step": 2797 }, { "epoch": 0.20587721315244883, "grad_norm": 0.8828125, "learning_rate": 4.5037249125757766e-05, "loss": 1.3864, "step": 2798 }, { "epoch": 0.20595079328581284, "grad_norm": 0.75390625, "learning_rate": 4.503378516682046e-05, "loss": 1.0485, "step": 2799 }, { "epoch": 0.20602437341917682, "grad_norm": 0.8671875, "learning_rate": 4.503032013270774e-05, "loss": 0.8076, "step": 2800 }, { "epoch": 0.20609795355254082, "grad_norm": 0.8203125, "learning_rate": 4.502685402360556e-05, "loss": 0.8173, "step": 2801 }, { "epoch": 0.2061715336859048, "grad_norm": 0.71875, "learning_rate": 4.502338683969997e-05, "loss": 1.0205, "step": 2802 }, { "epoch": 0.2062451138192688, "grad_norm": 1.0078125, "learning_rate": 4.5019918581177015e-05, "loss": 0.8801, "step": 2803 }, { "epoch": 0.20631869395263278, "grad_norm": 0.921875, "learning_rate": 4.5016449248222835e-05, "loss": 0.8997, "step": 2804 }, { "epoch": 0.20639227408599678, "grad_norm": 1.0859375, "learning_rate": 4.501297884102363e-05, "loss": 0.9744, "step": 2805 }, { "epoch": 0.20646585421936078, "grad_norm": 0.98828125, "learning_rate": 4.5009507359765666e-05, "loss": 0.967, "step": 2806 }, { "epoch": 0.20653943435272476, "grad_norm": 0.796875, "learning_rate": 4.500603480463523e-05, "loss": 0.8408, "step": 2807 }, { "epoch": 0.20661301448608876, "grad_norm": 0.98828125, "learning_rate": 4.500256117581868e-05, "loss": 1.3726, "step": 2808 }, { "epoch": 0.20668659461945274, "grad_norm": 0.97265625, "learning_rate": 4.499908647350246e-05, "loss": 1.1918, "step": 2809 }, { "epoch": 0.20676017475281674, "grad_norm": 1.0703125, "learning_rate": 4.499561069787305e-05, "loss": 1.2052, "step": 2810 }, { "epoch": 0.20683375488618072, "grad_norm": 0.7734375, "learning_rate": 4.499213384911696e-05, "loss": 0.7237, "step": 2811 }, { "epoch": 0.20690733501954472, "grad_norm": 0.8984375, "learning_rate": 4.498865592742082e-05, "loss": 0.76, "step": 2812 }, { "epoch": 0.2069809151529087, "grad_norm": 0.89453125, "learning_rate": 4.498517693297127e-05, "loss": 0.9004, "step": 2813 }, { "epoch": 0.2070544952862727, "grad_norm": 1.015625, "learning_rate": 4.498169686595501e-05, "loss": 0.9867, "step": 2814 }, { "epoch": 0.2071280754196367, "grad_norm": 0.73828125, "learning_rate": 4.497821572655883e-05, "loss": 0.9329, "step": 2815 }, { "epoch": 0.20720165555300069, "grad_norm": 0.8046875, "learning_rate": 4.497473351496955e-05, "loss": 0.8902, "step": 2816 }, { "epoch": 0.2072752356863647, "grad_norm": 0.71484375, "learning_rate": 4.497125023137403e-05, "loss": 0.6786, "step": 2817 }, { "epoch": 0.20734881581972867, "grad_norm": 0.7421875, "learning_rate": 4.496776587595924e-05, "loss": 0.7063, "step": 2818 }, { "epoch": 0.20742239595309267, "grad_norm": 0.828125, "learning_rate": 4.496428044891218e-05, "loss": 1.05, "step": 2819 }, { "epoch": 0.20749597608645665, "grad_norm": 0.96875, "learning_rate": 4.4960793950419884e-05, "loss": 1.2781, "step": 2820 }, { "epoch": 0.20756955621982065, "grad_norm": 0.7265625, "learning_rate": 4.4957306380669475e-05, "loss": 0.8069, "step": 2821 }, { "epoch": 0.20764313635318463, "grad_norm": 0.78515625, "learning_rate": 4.4953817739848134e-05, "loss": 0.6579, "step": 2822 }, { "epoch": 0.20771671648654863, "grad_norm": 0.71484375, "learning_rate": 4.495032802814308e-05, "loss": 1.2859, "step": 2823 }, { "epoch": 0.20779029661991263, "grad_norm": 1.0703125, "learning_rate": 4.49468372457416e-05, "loss": 1.3279, "step": 2824 }, { "epoch": 0.2078638767532766, "grad_norm": 0.72265625, "learning_rate": 4.494334539283104e-05, "loss": 0.6983, "step": 2825 }, { "epoch": 0.20793745688664061, "grad_norm": 1.21875, "learning_rate": 4.49398524695988e-05, "loss": 0.9231, "step": 2826 }, { "epoch": 0.2080110370200046, "grad_norm": 0.93359375, "learning_rate": 4.4936358476232346e-05, "loss": 0.8578, "step": 2827 }, { "epoch": 0.2080846171533686, "grad_norm": 0.9921875, "learning_rate": 4.493286341291918e-05, "loss": 1.246, "step": 2828 }, { "epoch": 0.20815819728673257, "grad_norm": 0.93359375, "learning_rate": 4.492936727984688e-05, "loss": 0.9413, "step": 2829 }, { "epoch": 0.20823177742009658, "grad_norm": 0.90234375, "learning_rate": 4.492587007720308e-05, "loss": 0.9834, "step": 2830 }, { "epoch": 0.20830535755346058, "grad_norm": 1.0546875, "learning_rate": 4.4922371805175475e-05, "loss": 1.1971, "step": 2831 }, { "epoch": 0.20837893768682456, "grad_norm": 1.015625, "learning_rate": 4.491887246395179e-05, "loss": 1.4261, "step": 2832 }, { "epoch": 0.20845251782018856, "grad_norm": 0.90625, "learning_rate": 4.4915372053719856e-05, "loss": 1.1543, "step": 2833 }, { "epoch": 0.20852609795355254, "grad_norm": 0.921875, "learning_rate": 4.4911870574667515e-05, "loss": 0.7451, "step": 2834 }, { "epoch": 0.20859967808691654, "grad_norm": 0.93359375, "learning_rate": 4.4908368026982686e-05, "loss": 0.7919, "step": 2835 }, { "epoch": 0.20867325822028052, "grad_norm": 1.28125, "learning_rate": 4.4904864410853344e-05, "loss": 1.1645, "step": 2836 }, { "epoch": 0.20874683835364452, "grad_norm": 1.109375, "learning_rate": 4.490135972646752e-05, "loss": 1.2171, "step": 2837 }, { "epoch": 0.2088204184870085, "grad_norm": 0.86328125, "learning_rate": 4.489785397401332e-05, "loss": 0.8273, "step": 2838 }, { "epoch": 0.2088939986203725, "grad_norm": 0.98828125, "learning_rate": 4.489434715367887e-05, "loss": 1.4749, "step": 2839 }, { "epoch": 0.2089675787537365, "grad_norm": 0.90625, "learning_rate": 4.489083926565238e-05, "loss": 1.0795, "step": 2840 }, { "epoch": 0.20904115888710048, "grad_norm": 0.859375, "learning_rate": 4.488733031012213e-05, "loss": 0.9256, "step": 2841 }, { "epoch": 0.20911473902046449, "grad_norm": 0.87890625, "learning_rate": 4.4883820287276415e-05, "loss": 0.688, "step": 2842 }, { "epoch": 0.20918831915382846, "grad_norm": 0.99609375, "learning_rate": 4.4880309197303615e-05, "loss": 0.8873, "step": 2843 }, { "epoch": 0.20926189928719247, "grad_norm": 0.8515625, "learning_rate": 4.4876797040392185e-05, "loss": 0.98, "step": 2844 }, { "epoch": 0.20933547942055644, "grad_norm": 1.0078125, "learning_rate": 4.4873283816730584e-05, "loss": 0.9738, "step": 2845 }, { "epoch": 0.20940905955392045, "grad_norm": 1.1171875, "learning_rate": 4.4869769526507376e-05, "loss": 1.3381, "step": 2846 }, { "epoch": 0.20948263968728442, "grad_norm": 0.98828125, "learning_rate": 4.486625416991118e-05, "loss": 1.1353, "step": 2847 }, { "epoch": 0.20955621982064843, "grad_norm": 0.89453125, "learning_rate": 4.486273774713064e-05, "loss": 1.043, "step": 2848 }, { "epoch": 0.20962979995401243, "grad_norm": 0.74609375, "learning_rate": 4.4859220258354475e-05, "loss": 0.7113, "step": 2849 }, { "epoch": 0.2097033800873764, "grad_norm": 0.83203125, "learning_rate": 4.485570170377146e-05, "loss": 0.8909, "step": 2850 }, { "epoch": 0.2097769602207404, "grad_norm": 0.953125, "learning_rate": 4.485218208357045e-05, "loss": 0.8449, "step": 2851 }, { "epoch": 0.2098505403541044, "grad_norm": 0.8046875, "learning_rate": 4.484866139794032e-05, "loss": 1.045, "step": 2852 }, { "epoch": 0.2099241204874684, "grad_norm": 0.8984375, "learning_rate": 4.484513964707002e-05, "loss": 0.9923, "step": 2853 }, { "epoch": 0.20999770062083237, "grad_norm": 0.88671875, "learning_rate": 4.484161683114856e-05, "loss": 1.2173, "step": 2854 }, { "epoch": 0.21007128075419637, "grad_norm": 0.83984375, "learning_rate": 4.4838092950364995e-05, "loss": 0.6753, "step": 2855 }, { "epoch": 0.21014486088756035, "grad_norm": 1.0390625, "learning_rate": 4.483456800490845e-05, "loss": 0.8739, "step": 2856 }, { "epoch": 0.21021844102092435, "grad_norm": 0.80859375, "learning_rate": 4.483104199496811e-05, "loss": 0.8887, "step": 2857 }, { "epoch": 0.21029202115428836, "grad_norm": 0.8671875, "learning_rate": 4.482751492073319e-05, "loss": 0.9067, "step": 2858 }, { "epoch": 0.21036560128765233, "grad_norm": 1.046875, "learning_rate": 4.4823986782393e-05, "loss": 1.1571, "step": 2859 }, { "epoch": 0.21043918142101634, "grad_norm": 1.1015625, "learning_rate": 4.482045758013689e-05, "loss": 1.4446, "step": 2860 }, { "epoch": 0.2105127615543803, "grad_norm": 0.8359375, "learning_rate": 4.481692731415424e-05, "loss": 1.0334, "step": 2861 }, { "epoch": 0.21058634168774432, "grad_norm": 0.74609375, "learning_rate": 4.481339598463454e-05, "loss": 0.8166, "step": 2862 }, { "epoch": 0.2106599218211083, "grad_norm": 0.90625, "learning_rate": 4.48098635917673e-05, "loss": 0.9122, "step": 2863 }, { "epoch": 0.2107335019544723, "grad_norm": 0.96875, "learning_rate": 4.4806330135742106e-05, "loss": 1.5158, "step": 2864 }, { "epoch": 0.21080708208783627, "grad_norm": 0.7109375, "learning_rate": 4.480279561674856e-05, "loss": 0.8485, "step": 2865 }, { "epoch": 0.21088066222120028, "grad_norm": 0.875, "learning_rate": 4.479926003497639e-05, "loss": 0.7725, "step": 2866 }, { "epoch": 0.21095424235456428, "grad_norm": 0.86328125, "learning_rate": 4.479572339061533e-05, "loss": 0.9324, "step": 2867 }, { "epoch": 0.21102782248792826, "grad_norm": 0.875, "learning_rate": 4.479218568385518e-05, "loss": 1.1622, "step": 2868 }, { "epoch": 0.21110140262129226, "grad_norm": 1.0390625, "learning_rate": 4.47886469148858e-05, "loss": 1.1396, "step": 2869 }, { "epoch": 0.21117498275465624, "grad_norm": 0.84375, "learning_rate": 4.478510708389713e-05, "loss": 0.841, "step": 2870 }, { "epoch": 0.21124856288802024, "grad_norm": 0.7734375, "learning_rate": 4.478156619107912e-05, "loss": 0.8657, "step": 2871 }, { "epoch": 0.21132214302138422, "grad_norm": 1.0, "learning_rate": 4.477802423662182e-05, "loss": 1.0363, "step": 2872 }, { "epoch": 0.21139572315474822, "grad_norm": 0.94921875, "learning_rate": 4.4774481220715317e-05, "loss": 1.0469, "step": 2873 }, { "epoch": 0.2114693032881122, "grad_norm": 0.77734375, "learning_rate": 4.477093714354975e-05, "loss": 0.857, "step": 2874 }, { "epoch": 0.2115428834214762, "grad_norm": 0.8125, "learning_rate": 4.4767392005315334e-05, "loss": 0.7133, "step": 2875 }, { "epoch": 0.2116164635548402, "grad_norm": 0.78515625, "learning_rate": 4.4763845806202323e-05, "loss": 0.9766, "step": 2876 }, { "epoch": 0.21169004368820418, "grad_norm": 0.87890625, "learning_rate": 4.4760298546401026e-05, "loss": 1.3768, "step": 2877 }, { "epoch": 0.2117636238215682, "grad_norm": 0.66015625, "learning_rate": 4.475675022610184e-05, "loss": 0.6088, "step": 2878 }, { "epoch": 0.21183720395493216, "grad_norm": 0.8046875, "learning_rate": 4.475320084549518e-05, "loss": 0.9634, "step": 2879 }, { "epoch": 0.21191078408829617, "grad_norm": 0.8046875, "learning_rate": 4.474965040477154e-05, "loss": 0.8918, "step": 2880 }, { "epoch": 0.21198436422166014, "grad_norm": 0.8046875, "learning_rate": 4.4746098904121467e-05, "loss": 1.0243, "step": 2881 }, { "epoch": 0.21205794435502415, "grad_norm": 11.0625, "learning_rate": 4.474254634373556e-05, "loss": 1.5296, "step": 2882 }, { "epoch": 0.21213152448838812, "grad_norm": 0.98828125, "learning_rate": 4.473899272380447e-05, "loss": 1.1332, "step": 2883 }, { "epoch": 0.21220510462175213, "grad_norm": 1.03125, "learning_rate": 4.473543804451893e-05, "loss": 1.2872, "step": 2884 }, { "epoch": 0.21227868475511613, "grad_norm": 0.96875, "learning_rate": 4.4731882306069706e-05, "loss": 0.9887, "step": 2885 }, { "epoch": 0.2123522648884801, "grad_norm": 0.66796875, "learning_rate": 4.472832550864763e-05, "loss": 0.6461, "step": 2886 }, { "epoch": 0.2124258450218441, "grad_norm": 0.765625, "learning_rate": 4.472476765244358e-05, "loss": 0.9668, "step": 2887 }, { "epoch": 0.2124994251552081, "grad_norm": 0.953125, "learning_rate": 4.47212087376485e-05, "loss": 0.9317, "step": 2888 }, { "epoch": 0.2125730052885721, "grad_norm": 0.796875, "learning_rate": 4.47176487644534e-05, "loss": 0.6655, "step": 2889 }, { "epoch": 0.21264658542193607, "grad_norm": 0.70703125, "learning_rate": 4.471408773304934e-05, "loss": 0.7572, "step": 2890 }, { "epoch": 0.21272016555530007, "grad_norm": 0.75, "learning_rate": 4.471052564362742e-05, "loss": 0.7705, "step": 2891 }, { "epoch": 0.21279374568866405, "grad_norm": 0.8359375, "learning_rate": 4.470696249637881e-05, "loss": 0.7093, "step": 2892 }, { "epoch": 0.21286732582202805, "grad_norm": 0.66796875, "learning_rate": 4.4703398291494745e-05, "loss": 0.6706, "step": 2893 }, { "epoch": 0.21294090595539206, "grad_norm": 0.98046875, "learning_rate": 4.4699833029166514e-05, "loss": 1.0228, "step": 2894 }, { "epoch": 0.21301448608875603, "grad_norm": 0.8984375, "learning_rate": 4.4696266709585454e-05, "loss": 0.9742, "step": 2895 }, { "epoch": 0.21308806622212004, "grad_norm": 1.046875, "learning_rate": 4.469269933294296e-05, "loss": 0.7757, "step": 2896 }, { "epoch": 0.213161646355484, "grad_norm": 0.9296875, "learning_rate": 4.468913089943049e-05, "loss": 0.9448, "step": 2897 }, { "epoch": 0.21323522648884802, "grad_norm": 0.796875, "learning_rate": 4.468556140923954e-05, "loss": 0.964, "step": 2898 }, { "epoch": 0.213308806622212, "grad_norm": 1.1171875, "learning_rate": 4.468199086256169e-05, "loss": 0.8315, "step": 2899 }, { "epoch": 0.213382386755576, "grad_norm": 0.921875, "learning_rate": 4.4678419259588576e-05, "loss": 0.873, "step": 2900 }, { "epoch": 0.21345596688893997, "grad_norm": 1.03125, "learning_rate": 4.467484660051187e-05, "loss": 0.943, "step": 2901 }, { "epoch": 0.21352954702230398, "grad_norm": 0.8125, "learning_rate": 4.4671272885523294e-05, "loss": 0.8535, "step": 2902 }, { "epoch": 0.21360312715566798, "grad_norm": 0.79296875, "learning_rate": 4.466769811481466e-05, "loss": 1.138, "step": 2903 }, { "epoch": 0.21367670728903196, "grad_norm": 0.83984375, "learning_rate": 4.466412228857782e-05, "loss": 1.1713, "step": 2904 }, { "epoch": 0.21375028742239596, "grad_norm": 0.9609375, "learning_rate": 4.466054540700467e-05, "loss": 0.9006, "step": 2905 }, { "epoch": 0.21382386755575994, "grad_norm": 0.8671875, "learning_rate": 4.465696747028719e-05, "loss": 1.2365, "step": 2906 }, { "epoch": 0.21389744768912394, "grad_norm": 0.73046875, "learning_rate": 4.4653388478617385e-05, "loss": 0.6855, "step": 2907 }, { "epoch": 0.21397102782248792, "grad_norm": 0.84765625, "learning_rate": 4.464980843218734e-05, "loss": 0.9935, "step": 2908 }, { "epoch": 0.21404460795585192, "grad_norm": 0.85546875, "learning_rate": 4.46462273311892e-05, "loss": 1.0645, "step": 2909 }, { "epoch": 0.2141181880892159, "grad_norm": 0.75, "learning_rate": 4.464264517581514e-05, "loss": 0.875, "step": 2910 }, { "epoch": 0.2141917682225799, "grad_norm": 1.0234375, "learning_rate": 4.463906196625741e-05, "loss": 1.2615, "step": 2911 }, { "epoch": 0.2142653483559439, "grad_norm": 0.9453125, "learning_rate": 4.463547770270832e-05, "loss": 1.1156, "step": 2912 }, { "epoch": 0.21433892848930788, "grad_norm": 0.83984375, "learning_rate": 4.463189238536023e-05, "loss": 0.681, "step": 2913 }, { "epoch": 0.2144125086226719, "grad_norm": 0.7890625, "learning_rate": 4.4628306014405554e-05, "loss": 0.8803, "step": 2914 }, { "epoch": 0.21448608875603586, "grad_norm": 0.87890625, "learning_rate": 4.4624718590036763e-05, "loss": 0.7558, "step": 2915 }, { "epoch": 0.21455966888939987, "grad_norm": 0.8359375, "learning_rate": 4.462113011244639e-05, "loss": 0.7771, "step": 2916 }, { "epoch": 0.21463324902276384, "grad_norm": 0.83203125, "learning_rate": 4.461754058182703e-05, "loss": 0.7916, "step": 2917 }, { "epoch": 0.21470682915612785, "grad_norm": 0.671875, "learning_rate": 4.461394999837131e-05, "loss": 0.5213, "step": 2918 }, { "epoch": 0.21478040928949182, "grad_norm": 0.87109375, "learning_rate": 4.461035836227194e-05, "loss": 1.3971, "step": 2919 }, { "epoch": 0.21485398942285583, "grad_norm": 0.66015625, "learning_rate": 4.4606765673721684e-05, "loss": 0.6099, "step": 2920 }, { "epoch": 0.21492756955621983, "grad_norm": 0.890625, "learning_rate": 4.460317193291335e-05, "loss": 0.868, "step": 2921 }, { "epoch": 0.2150011496895838, "grad_norm": 0.8359375, "learning_rate": 4.459957714003979e-05, "loss": 1.1265, "step": 2922 }, { "epoch": 0.2150747298229478, "grad_norm": 0.63671875, "learning_rate": 4.459598129529395e-05, "loss": 0.8591, "step": 2923 }, { "epoch": 0.2151483099563118, "grad_norm": 0.81640625, "learning_rate": 4.4592384398868804e-05, "loss": 1.0894, "step": 2924 }, { "epoch": 0.2152218900896758, "grad_norm": 0.87890625, "learning_rate": 4.4588786450957384e-05, "loss": 1.0027, "step": 2925 }, { "epoch": 0.21529547022303977, "grad_norm": 0.80078125, "learning_rate": 4.458518745175281e-05, "loss": 0.9333, "step": 2926 }, { "epoch": 0.21536905035640377, "grad_norm": 0.92578125, "learning_rate": 4.458158740144821e-05, "loss": 1.0506, "step": 2927 }, { "epoch": 0.21544263048976775, "grad_norm": 0.77734375, "learning_rate": 4.45779863002368e-05, "loss": 0.8345, "step": 2928 }, { "epoch": 0.21551621062313175, "grad_norm": 1.125, "learning_rate": 4.457438414831183e-05, "loss": 1.1072, "step": 2929 }, { "epoch": 0.21558979075649576, "grad_norm": 0.96875, "learning_rate": 4.4570780945866644e-05, "loss": 0.861, "step": 2930 }, { "epoch": 0.21566337088985973, "grad_norm": 1.1171875, "learning_rate": 4.4567176693094606e-05, "loss": 1.2429, "step": 2931 }, { "epoch": 0.21573695102322374, "grad_norm": 0.93359375, "learning_rate": 4.456357139018915e-05, "loss": 0.7611, "step": 2932 }, { "epoch": 0.21581053115658771, "grad_norm": 1.0859375, "learning_rate": 4.4559965037343776e-05, "loss": 0.9303, "step": 2933 }, { "epoch": 0.21588411128995172, "grad_norm": 0.9921875, "learning_rate": 4.455635763475202e-05, "loss": 0.8904, "step": 2934 }, { "epoch": 0.2159576914233157, "grad_norm": 0.890625, "learning_rate": 4.455274918260748e-05, "loss": 0.9982, "step": 2935 }, { "epoch": 0.2160312715566797, "grad_norm": 1.2890625, "learning_rate": 4.4549139681103825e-05, "loss": 1.307, "step": 2936 }, { "epoch": 0.21610485169004368, "grad_norm": 0.89453125, "learning_rate": 4.454552913043477e-05, "loss": 0.918, "step": 2937 }, { "epoch": 0.21617843182340768, "grad_norm": 0.87109375, "learning_rate": 4.454191753079408e-05, "loss": 0.8802, "step": 2938 }, { "epoch": 0.21625201195677168, "grad_norm": 0.9140625, "learning_rate": 4.4538304882375584e-05, "loss": 1.3401, "step": 2939 }, { "epoch": 0.21632559209013566, "grad_norm": 0.8046875, "learning_rate": 4.453469118537317e-05, "loss": 0.817, "step": 2940 }, { "epoch": 0.21639917222349966, "grad_norm": 0.99609375, "learning_rate": 4.453107643998077e-05, "loss": 1.4482, "step": 2941 }, { "epoch": 0.21647275235686364, "grad_norm": 0.97265625, "learning_rate": 4.452746064639239e-05, "loss": 0.9268, "step": 2942 }, { "epoch": 0.21654633249022764, "grad_norm": 0.95703125, "learning_rate": 4.452384380480208e-05, "loss": 1.0061, "step": 2943 }, { "epoch": 0.21661991262359162, "grad_norm": 1.15625, "learning_rate": 4.4520225915403945e-05, "loss": 1.6814, "step": 2944 }, { "epoch": 0.21669349275695562, "grad_norm": 0.734375, "learning_rate": 4.451660697839216e-05, "loss": 0.5645, "step": 2945 }, { "epoch": 0.2167670728903196, "grad_norm": 1.03125, "learning_rate": 4.4512986993960936e-05, "loss": 1.4558, "step": 2946 }, { "epoch": 0.2168406530236836, "grad_norm": 0.95703125, "learning_rate": 4.450936596230456e-05, "loss": 0.9441, "step": 2947 }, { "epoch": 0.2169142331570476, "grad_norm": 1.0, "learning_rate": 4.450574388361735e-05, "loss": 1.1417, "step": 2948 }, { "epoch": 0.21698781329041159, "grad_norm": 0.80859375, "learning_rate": 4.450212075809371e-05, "loss": 1.1416, "step": 2949 }, { "epoch": 0.2170613934237756, "grad_norm": 0.92578125, "learning_rate": 4.449849658592809e-05, "loss": 1.0104, "step": 2950 }, { "epoch": 0.21713497355713957, "grad_norm": 0.96484375, "learning_rate": 4.4494871367314984e-05, "loss": 1.3781, "step": 2951 }, { "epoch": 0.21720855369050357, "grad_norm": 0.828125, "learning_rate": 4.4491245102448955e-05, "loss": 0.6403, "step": 2952 }, { "epoch": 0.21728213382386755, "grad_norm": 0.80859375, "learning_rate": 4.4487617791524605e-05, "loss": 1.4903, "step": 2953 }, { "epoch": 0.21735571395723155, "grad_norm": 0.77734375, "learning_rate": 4.4483989434736624e-05, "loss": 0.6208, "step": 2954 }, { "epoch": 0.21742929409059553, "grad_norm": 0.8203125, "learning_rate": 4.448036003227972e-05, "loss": 0.8564, "step": 2955 }, { "epoch": 0.21750287422395953, "grad_norm": 0.703125, "learning_rate": 4.44767295843487e-05, "loss": 0.7602, "step": 2956 }, { "epoch": 0.21757645435732353, "grad_norm": 0.7890625, "learning_rate": 4.4473098091138374e-05, "loss": 0.9807, "step": 2957 }, { "epoch": 0.2176500344906875, "grad_norm": 0.7421875, "learning_rate": 4.446946555284366e-05, "loss": 0.705, "step": 2958 }, { "epoch": 0.21772361462405151, "grad_norm": 0.9140625, "learning_rate": 4.446583196965951e-05, "loss": 1.282, "step": 2959 }, { "epoch": 0.2177971947574155, "grad_norm": 0.890625, "learning_rate": 4.446219734178092e-05, "loss": 0.9672, "step": 2960 }, { "epoch": 0.2178707748907795, "grad_norm": 1.1015625, "learning_rate": 4.445856166940296e-05, "loss": 0.8693, "step": 2961 }, { "epoch": 0.21794435502414347, "grad_norm": 0.9921875, "learning_rate": 4.445492495272073e-05, "loss": 1.3718, "step": 2962 }, { "epoch": 0.21801793515750748, "grad_norm": 0.83984375, "learning_rate": 4.4451287191929436e-05, "loss": 1.0069, "step": 2963 }, { "epoch": 0.21809151529087145, "grad_norm": 0.93359375, "learning_rate": 4.444764838722429e-05, "loss": 1.0451, "step": 2964 }, { "epoch": 0.21816509542423546, "grad_norm": 0.7734375, "learning_rate": 4.4444008538800604e-05, "loss": 1.0163, "step": 2965 }, { "epoch": 0.21823867555759946, "grad_norm": 0.86328125, "learning_rate": 4.444036764685368e-05, "loss": 1.4415, "step": 2966 }, { "epoch": 0.21831225569096344, "grad_norm": 0.79296875, "learning_rate": 4.4436725711578965e-05, "loss": 1.0115, "step": 2967 }, { "epoch": 0.21838583582432744, "grad_norm": 0.8125, "learning_rate": 4.443308273317188e-05, "loss": 0.8376, "step": 2968 }, { "epoch": 0.21845941595769142, "grad_norm": 1.171875, "learning_rate": 4.442943871182795e-05, "loss": 1.134, "step": 2969 }, { "epoch": 0.21853299609105542, "grad_norm": 0.90625, "learning_rate": 4.442579364774274e-05, "loss": 1.2769, "step": 2970 }, { "epoch": 0.2186065762244194, "grad_norm": 0.7421875, "learning_rate": 4.442214754111188e-05, "loss": 1.0365, "step": 2971 }, { "epoch": 0.2186801563577834, "grad_norm": 0.9140625, "learning_rate": 4.441850039213104e-05, "loss": 1.0726, "step": 2972 }, { "epoch": 0.21875373649114738, "grad_norm": 1.046875, "learning_rate": 4.441485220099596e-05, "loss": 1.5218, "step": 2973 }, { "epoch": 0.21882731662451138, "grad_norm": 0.7734375, "learning_rate": 4.441120296790243e-05, "loss": 0.7332, "step": 2974 }, { "epoch": 0.21890089675787539, "grad_norm": 0.8828125, "learning_rate": 4.440755269304631e-05, "loss": 0.9733, "step": 2975 }, { "epoch": 0.21897447689123936, "grad_norm": 0.984375, "learning_rate": 4.440390137662348e-05, "loss": 1.2675, "step": 2976 }, { "epoch": 0.21904805702460337, "grad_norm": 1.03125, "learning_rate": 4.440024901882992e-05, "loss": 1.324, "step": 2977 }, { "epoch": 0.21912163715796734, "grad_norm": 1.578125, "learning_rate": 4.439659561986164e-05, "loss": 0.7713, "step": 2978 }, { "epoch": 0.21919521729133135, "grad_norm": 0.98828125, "learning_rate": 4.4392941179914696e-05, "loss": 1.0236, "step": 2979 }, { "epoch": 0.21926879742469532, "grad_norm": 0.91015625, "learning_rate": 4.4389285699185235e-05, "loss": 1.0041, "step": 2980 }, { "epoch": 0.21934237755805933, "grad_norm": 0.86328125, "learning_rate": 4.438562917786943e-05, "loss": 0.9832, "step": 2981 }, { "epoch": 0.2194159576914233, "grad_norm": 0.97265625, "learning_rate": 4.438197161616352e-05, "loss": 1.1787, "step": 2982 }, { "epoch": 0.2194895378247873, "grad_norm": 0.69921875, "learning_rate": 4.43783130142638e-05, "loss": 0.8349, "step": 2983 }, { "epoch": 0.2195631179581513, "grad_norm": 0.87890625, "learning_rate": 4.437465337236662e-05, "loss": 0.8361, "step": 2984 }, { "epoch": 0.2196366980915153, "grad_norm": 0.8046875, "learning_rate": 4.437099269066839e-05, "loss": 0.7906, "step": 2985 }, { "epoch": 0.2197102782248793, "grad_norm": 1.015625, "learning_rate": 4.436733096936557e-05, "loss": 1.1253, "step": 2986 }, { "epoch": 0.21978385835824327, "grad_norm": 0.8203125, "learning_rate": 4.436366820865468e-05, "loss": 0.9108, "step": 2987 }, { "epoch": 0.21985743849160727, "grad_norm": 0.8203125, "learning_rate": 4.436000440873228e-05, "loss": 0.8993, "step": 2988 }, { "epoch": 0.21993101862497125, "grad_norm": 0.8203125, "learning_rate": 4.435633956979501e-05, "loss": 0.9804, "step": 2989 }, { "epoch": 0.22000459875833525, "grad_norm": 0.734375, "learning_rate": 4.4352673692039564e-05, "loss": 0.8479, "step": 2990 }, { "epoch": 0.22007817889169923, "grad_norm": 1.09375, "learning_rate": 4.4349006775662664e-05, "loss": 1.5212, "step": 2991 }, { "epoch": 0.22015175902506323, "grad_norm": 0.86328125, "learning_rate": 4.434533882086112e-05, "loss": 0.8776, "step": 2992 }, { "epoch": 0.22022533915842724, "grad_norm": 0.9453125, "learning_rate": 4.434166982783178e-05, "loss": 0.7635, "step": 2993 }, { "epoch": 0.2202989192917912, "grad_norm": 0.7890625, "learning_rate": 4.433799979677155e-05, "loss": 0.8248, "step": 2994 }, { "epoch": 0.22037249942515522, "grad_norm": 0.73046875, "learning_rate": 4.43343287278774e-05, "loss": 0.987, "step": 2995 }, { "epoch": 0.2204460795585192, "grad_norm": 0.859375, "learning_rate": 4.4330656621346336e-05, "loss": 0.9917, "step": 2996 }, { "epoch": 0.2205196596918832, "grad_norm": 0.77734375, "learning_rate": 4.432698347737545e-05, "loss": 0.8746, "step": 2997 }, { "epoch": 0.22059323982524717, "grad_norm": 0.74609375, "learning_rate": 4.432330929616185e-05, "loss": 0.8718, "step": 2998 }, { "epoch": 0.22066681995861118, "grad_norm": 1.0, "learning_rate": 4.4319634077902746e-05, "loss": 1.2857, "step": 2999 }, { "epoch": 0.22074040009197515, "grad_norm": 1.015625, "learning_rate": 4.4315957822795374e-05, "loss": 0.9954, "step": 3000 }, { "epoch": 0.22081398022533916, "grad_norm": 0.61328125, "learning_rate": 4.4312280531037025e-05, "loss": 0.7495, "step": 3001 }, { "epoch": 0.22088756035870316, "grad_norm": 0.78515625, "learning_rate": 4.430860220282506e-05, "loss": 0.7469, "step": 3002 }, { "epoch": 0.22096114049206714, "grad_norm": 0.88671875, "learning_rate": 4.430492283835688e-05, "loss": 0.7839, "step": 3003 }, { "epoch": 0.22103472062543114, "grad_norm": 1.1953125, "learning_rate": 4.430124243782995e-05, "loss": 1.5534, "step": 3004 }, { "epoch": 0.22110830075879512, "grad_norm": 0.8359375, "learning_rate": 4.429756100144179e-05, "loss": 0.8677, "step": 3005 }, { "epoch": 0.22118188089215912, "grad_norm": 0.71875, "learning_rate": 4.429387852938999e-05, "loss": 0.7379, "step": 3006 }, { "epoch": 0.2212554610255231, "grad_norm": 0.80078125, "learning_rate": 4.4290195021872164e-05, "loss": 0.9089, "step": 3007 }, { "epoch": 0.2213290411588871, "grad_norm": 0.92578125, "learning_rate": 4.428651047908601e-05, "loss": 1.0036, "step": 3008 }, { "epoch": 0.22140262129225108, "grad_norm": 0.73828125, "learning_rate": 4.428282490122926e-05, "loss": 0.7464, "step": 3009 }, { "epoch": 0.22147620142561508, "grad_norm": 0.8828125, "learning_rate": 4.4279138288499725e-05, "loss": 0.7255, "step": 3010 }, { "epoch": 0.2215497815589791, "grad_norm": 0.66015625, "learning_rate": 4.427545064109524e-05, "loss": 0.6455, "step": 3011 }, { "epoch": 0.22162336169234306, "grad_norm": 0.62890625, "learning_rate": 4.427176195921373e-05, "loss": 0.6021, "step": 3012 }, { "epoch": 0.22169694182570707, "grad_norm": 0.8046875, "learning_rate": 4.426807224305316e-05, "loss": 1.094, "step": 3013 }, { "epoch": 0.22177052195907104, "grad_norm": 0.8203125, "learning_rate": 4.426438149281154e-05, "loss": 1.2464, "step": 3014 }, { "epoch": 0.22184410209243505, "grad_norm": 0.875, "learning_rate": 4.4260689708686945e-05, "loss": 1.2478, "step": 3015 }, { "epoch": 0.22191768222579902, "grad_norm": 0.95703125, "learning_rate": 4.425699689087752e-05, "loss": 0.8136, "step": 3016 }, { "epoch": 0.22199126235916303, "grad_norm": 0.8125, "learning_rate": 4.4253303039581436e-05, "loss": 1.1139, "step": 3017 }, { "epoch": 0.22206484249252703, "grad_norm": 0.765625, "learning_rate": 4.424960815499695e-05, "loss": 0.7272, "step": 3018 }, { "epoch": 0.222138422625891, "grad_norm": 0.66015625, "learning_rate": 4.424591223732235e-05, "loss": 0.525, "step": 3019 }, { "epoch": 0.222212002759255, "grad_norm": 0.67578125, "learning_rate": 4.4242215286755987e-05, "loss": 0.6138, "step": 3020 }, { "epoch": 0.222285582892619, "grad_norm": 0.9921875, "learning_rate": 4.4238517303496276e-05, "loss": 0.9417, "step": 3021 }, { "epoch": 0.222359163025983, "grad_norm": 1.2265625, "learning_rate": 4.423481828774168e-05, "loss": 1.2537, "step": 3022 }, { "epoch": 0.22243274315934697, "grad_norm": 0.80078125, "learning_rate": 4.4231118239690714e-05, "loss": 0.7667, "step": 3023 }, { "epoch": 0.22250632329271097, "grad_norm": 0.76953125, "learning_rate": 4.422741715954195e-05, "loss": 0.8733, "step": 3024 }, { "epoch": 0.22257990342607495, "grad_norm": 0.88671875, "learning_rate": 4.422371504749403e-05, "loss": 0.9773, "step": 3025 }, { "epoch": 0.22265348355943895, "grad_norm": 1.171875, "learning_rate": 4.422001190374563e-05, "loss": 1.087, "step": 3026 }, { "epoch": 0.22272706369280296, "grad_norm": 0.68359375, "learning_rate": 4.421630772849549e-05, "loss": 0.5611, "step": 3027 }, { "epoch": 0.22280064382616693, "grad_norm": 0.875, "learning_rate": 4.42126025219424e-05, "loss": 1.0163, "step": 3028 }, { "epoch": 0.22287422395953094, "grad_norm": 0.90234375, "learning_rate": 4.4208896284285235e-05, "loss": 1.0159, "step": 3029 }, { "epoch": 0.2229478040928949, "grad_norm": 0.8671875, "learning_rate": 4.420518901572288e-05, "loss": 1.0498, "step": 3030 }, { "epoch": 0.22302138422625892, "grad_norm": 1.09375, "learning_rate": 4.42014807164543e-05, "loss": 1.3386, "step": 3031 }, { "epoch": 0.2230949643596229, "grad_norm": 0.59765625, "learning_rate": 4.419777138667853e-05, "loss": 0.7489, "step": 3032 }, { "epoch": 0.2231685444929869, "grad_norm": 0.88671875, "learning_rate": 4.4194061026594616e-05, "loss": 1.1325, "step": 3033 }, { "epoch": 0.22324212462635087, "grad_norm": 0.98828125, "learning_rate": 4.419034963640171e-05, "loss": 1.0251, "step": 3034 }, { "epoch": 0.22331570475971488, "grad_norm": 1.125, "learning_rate": 4.418663721629897e-05, "loss": 1.1174, "step": 3035 }, { "epoch": 0.22338928489307888, "grad_norm": 0.8359375, "learning_rate": 4.418292376648566e-05, "loss": 0.9524, "step": 3036 }, { "epoch": 0.22346286502644286, "grad_norm": 0.97265625, "learning_rate": 4.417920928716106e-05, "loss": 1.2242, "step": 3037 }, { "epoch": 0.22353644515980686, "grad_norm": 0.8515625, "learning_rate": 4.417549377852452e-05, "loss": 0.919, "step": 3038 }, { "epoch": 0.22361002529317084, "grad_norm": 0.92578125, "learning_rate": 4.4171777240775444e-05, "loss": 1.1967, "step": 3039 }, { "epoch": 0.22368360542653484, "grad_norm": 0.84765625, "learning_rate": 4.41680596741133e-05, "loss": 0.9151, "step": 3040 }, { "epoch": 0.22375718555989882, "grad_norm": 0.84765625, "learning_rate": 4.41643410787376e-05, "loss": 0.7744, "step": 3041 }, { "epoch": 0.22383076569326282, "grad_norm": 0.73828125, "learning_rate": 4.416062145484791e-05, "loss": 0.6924, "step": 3042 }, { "epoch": 0.2239043458266268, "grad_norm": 1.1328125, "learning_rate": 4.4156900802643844e-05, "loss": 1.6845, "step": 3043 }, { "epoch": 0.2239779259599908, "grad_norm": 0.76171875, "learning_rate": 4.41531791223251e-05, "loss": 0.8964, "step": 3044 }, { "epoch": 0.2240515060933548, "grad_norm": 0.72265625, "learning_rate": 4.4149456414091404e-05, "loss": 1.0024, "step": 3045 }, { "epoch": 0.22412508622671878, "grad_norm": 0.6875, "learning_rate": 4.414573267814256e-05, "loss": 0.8576, "step": 3046 }, { "epoch": 0.2241986663600828, "grad_norm": 0.86328125, "learning_rate": 4.41420079146784e-05, "loss": 1.1391, "step": 3047 }, { "epoch": 0.22427224649344676, "grad_norm": 0.71875, "learning_rate": 4.413828212389883e-05, "loss": 0.7299, "step": 3048 }, { "epoch": 0.22434582662681077, "grad_norm": 0.84375, "learning_rate": 4.4134555306003804e-05, "loss": 0.9945, "step": 3049 }, { "epoch": 0.22441940676017474, "grad_norm": 1.1015625, "learning_rate": 4.4130827461193334e-05, "loss": 1.0733, "step": 3050 }, { "epoch": 0.22449298689353875, "grad_norm": 0.77734375, "learning_rate": 4.412709858966749e-05, "loss": 0.7298, "step": 3051 }, { "epoch": 0.22456656702690272, "grad_norm": 1.0078125, "learning_rate": 4.412336869162639e-05, "loss": 0.8285, "step": 3052 }, { "epoch": 0.22464014716026673, "grad_norm": 0.90234375, "learning_rate": 4.4119637767270204e-05, "loss": 0.9029, "step": 3053 }, { "epoch": 0.22471372729363073, "grad_norm": 1.109375, "learning_rate": 4.4115905816799186e-05, "loss": 1.1086, "step": 3054 }, { "epoch": 0.2247873074269947, "grad_norm": 0.88671875, "learning_rate": 4.411217284041359e-05, "loss": 0.9996, "step": 3055 }, { "epoch": 0.2248608875603587, "grad_norm": 0.82421875, "learning_rate": 4.410843883831379e-05, "loss": 0.8473, "step": 3056 }, { "epoch": 0.2249344676937227, "grad_norm": 0.76953125, "learning_rate": 4.4104703810700167e-05, "loss": 0.9594, "step": 3057 }, { "epoch": 0.2250080478270867, "grad_norm": 0.84765625, "learning_rate": 4.410096775777316e-05, "loss": 0.7604, "step": 3058 }, { "epoch": 0.22508162796045067, "grad_norm": 0.953125, "learning_rate": 4.4097230679733305e-05, "loss": 0.959, "step": 3059 }, { "epoch": 0.22515520809381467, "grad_norm": 0.765625, "learning_rate": 4.409349257678114e-05, "loss": 0.6229, "step": 3060 }, { "epoch": 0.22522878822717865, "grad_norm": 1.0, "learning_rate": 4.4089753449117296e-05, "loss": 0.9904, "step": 3061 }, { "epoch": 0.22530236836054265, "grad_norm": 0.765625, "learning_rate": 4.408601329694244e-05, "loss": 0.8742, "step": 3062 }, { "epoch": 0.22537594849390666, "grad_norm": 0.8359375, "learning_rate": 4.4082272120457294e-05, "loss": 0.9492, "step": 3063 }, { "epoch": 0.22544952862727063, "grad_norm": 0.98046875, "learning_rate": 4.407852991986265e-05, "loss": 1.6065, "step": 3064 }, { "epoch": 0.22552310876063464, "grad_norm": 0.7734375, "learning_rate": 4.407478669535934e-05, "loss": 0.7719, "step": 3065 }, { "epoch": 0.22559668889399861, "grad_norm": 0.8828125, "learning_rate": 4.4071042447148246e-05, "loss": 1.1843, "step": 3066 }, { "epoch": 0.22567026902736262, "grad_norm": 0.828125, "learning_rate": 4.406729717543033e-05, "loss": 0.9189, "step": 3067 }, { "epoch": 0.2257438491607266, "grad_norm": 0.9140625, "learning_rate": 4.406355088040659e-05, "loss": 1.0585, "step": 3068 }, { "epoch": 0.2258174292940906, "grad_norm": 0.85546875, "learning_rate": 4.405980356227808e-05, "loss": 1.1171, "step": 3069 }, { "epoch": 0.22589100942745458, "grad_norm": 0.875, "learning_rate": 4.4056055221245904e-05, "loss": 1.3748, "step": 3070 }, { "epoch": 0.22596458956081858, "grad_norm": 0.93359375, "learning_rate": 4.4052305857511245e-05, "loss": 0.9345, "step": 3071 }, { "epoch": 0.22603816969418258, "grad_norm": 1.046875, "learning_rate": 4.404855547127531e-05, "loss": 1.442, "step": 3072 }, { "epoch": 0.22611174982754656, "grad_norm": 1.09375, "learning_rate": 4.4044804062739385e-05, "loss": 1.0865, "step": 3073 }, { "epoch": 0.22618532996091056, "grad_norm": 0.73046875, "learning_rate": 4.4041051632104795e-05, "loss": 0.6178, "step": 3074 }, { "epoch": 0.22625891009427454, "grad_norm": 0.99609375, "learning_rate": 4.403729817957293e-05, "loss": 0.952, "step": 3075 }, { "epoch": 0.22633249022763854, "grad_norm": 0.921875, "learning_rate": 4.403354370534522e-05, "loss": 1.2966, "step": 3076 }, { "epoch": 0.22640607036100252, "grad_norm": 0.88671875, "learning_rate": 4.402978820962317e-05, "loss": 1.0168, "step": 3077 }, { "epoch": 0.22647965049436652, "grad_norm": 0.890625, "learning_rate": 4.402603169260834e-05, "loss": 1.0222, "step": 3078 }, { "epoch": 0.2265532306277305, "grad_norm": 0.86328125, "learning_rate": 4.402227415450231e-05, "loss": 1.119, "step": 3079 }, { "epoch": 0.2266268107610945, "grad_norm": 0.9296875, "learning_rate": 4.401851559550676e-05, "loss": 0.9735, "step": 3080 }, { "epoch": 0.2267003908944585, "grad_norm": 1.1640625, "learning_rate": 4.40147560158234e-05, "loss": 1.0884, "step": 3081 }, { "epoch": 0.22677397102782249, "grad_norm": 0.921875, "learning_rate": 4.4010995415654e-05, "loss": 1.2867, "step": 3082 }, { "epoch": 0.2268475511611865, "grad_norm": 1.171875, "learning_rate": 4.400723379520038e-05, "loss": 1.1063, "step": 3083 }, { "epoch": 0.22692113129455047, "grad_norm": 1.1640625, "learning_rate": 4.400347115466442e-05, "loss": 1.4118, "step": 3084 }, { "epoch": 0.22699471142791447, "grad_norm": 0.81640625, "learning_rate": 4.399970749424805e-05, "loss": 0.6913, "step": 3085 }, { "epoch": 0.22706829156127845, "grad_norm": 0.90625, "learning_rate": 4.399594281415328e-05, "loss": 1.0776, "step": 3086 }, { "epoch": 0.22714187169464245, "grad_norm": 1.0078125, "learning_rate": 4.3992177114582124e-05, "loss": 1.0784, "step": 3087 }, { "epoch": 0.22721545182800643, "grad_norm": 0.859375, "learning_rate": 4.39884103957367e-05, "loss": 0.9732, "step": 3088 }, { "epoch": 0.22728903196137043, "grad_norm": 0.8828125, "learning_rate": 4.398464265781915e-05, "loss": 0.836, "step": 3089 }, { "epoch": 0.22736261209473443, "grad_norm": 0.828125, "learning_rate": 4.3980873901031695e-05, "loss": 0.8301, "step": 3090 }, { "epoch": 0.2274361922280984, "grad_norm": 0.66796875, "learning_rate": 4.397710412557657e-05, "loss": 0.7434, "step": 3091 }, { "epoch": 0.22750977236146241, "grad_norm": 0.75390625, "learning_rate": 4.397333333165613e-05, "loss": 1.0613, "step": 3092 }, { "epoch": 0.2275833524948264, "grad_norm": 0.81640625, "learning_rate": 4.3969561519472716e-05, "loss": 0.7699, "step": 3093 }, { "epoch": 0.2276569326281904, "grad_norm": 0.73828125, "learning_rate": 4.3965788689228757e-05, "loss": 0.7306, "step": 3094 }, { "epoch": 0.22773051276155437, "grad_norm": 0.9140625, "learning_rate": 4.3962014841126744e-05, "loss": 1.2517, "step": 3095 }, { "epoch": 0.22780409289491838, "grad_norm": 1.0546875, "learning_rate": 4.3958239975369215e-05, "loss": 1.1137, "step": 3096 }, { "epoch": 0.22787767302828235, "grad_norm": 0.76171875, "learning_rate": 4.3954464092158745e-05, "loss": 0.8204, "step": 3097 }, { "epoch": 0.22795125316164636, "grad_norm": 0.96484375, "learning_rate": 4.395068719169799e-05, "loss": 0.8983, "step": 3098 }, { "epoch": 0.22802483329501036, "grad_norm": 0.8203125, "learning_rate": 4.394690927418965e-05, "loss": 0.8394, "step": 3099 }, { "epoch": 0.22809841342837434, "grad_norm": 0.99609375, "learning_rate": 4.394313033983648e-05, "loss": 1.228, "step": 3100 }, { "epoch": 0.22817199356173834, "grad_norm": 1.859375, "learning_rate": 4.3939350388841274e-05, "loss": 1.1662, "step": 3101 }, { "epoch": 0.22824557369510232, "grad_norm": 0.8984375, "learning_rate": 4.39355694214069e-05, "loss": 0.8935, "step": 3102 }, { "epoch": 0.22831915382846632, "grad_norm": 0.75390625, "learning_rate": 4.393178743773629e-05, "loss": 0.7445, "step": 3103 }, { "epoch": 0.2283927339618303, "grad_norm": 0.9140625, "learning_rate": 4.39280044380324e-05, "loss": 1.2452, "step": 3104 }, { "epoch": 0.2284663140951943, "grad_norm": 0.7578125, "learning_rate": 4.392422042249826e-05, "loss": 0.8325, "step": 3105 }, { "epoch": 0.22853989422855828, "grad_norm": 0.953125, "learning_rate": 4.3920435391336956e-05, "loss": 1.4204, "step": 3106 }, { "epoch": 0.22861347436192228, "grad_norm": 0.80859375, "learning_rate": 4.391664934475162e-05, "loss": 0.7727, "step": 3107 }, { "epoch": 0.22868705449528628, "grad_norm": 0.91796875, "learning_rate": 4.391286228294544e-05, "loss": 1.6442, "step": 3108 }, { "epoch": 0.22876063462865026, "grad_norm": 0.75, "learning_rate": 4.390907420612166e-05, "loss": 0.7756, "step": 3109 }, { "epoch": 0.22883421476201427, "grad_norm": 1.03125, "learning_rate": 4.3905285114483584e-05, "loss": 1.0006, "step": 3110 }, { "epoch": 0.22890779489537824, "grad_norm": 0.84765625, "learning_rate": 4.390149500823457e-05, "loss": 0.6596, "step": 3111 }, { "epoch": 0.22898137502874225, "grad_norm": 0.9375, "learning_rate": 4.3897703887578015e-05, "loss": 0.9563, "step": 3112 }, { "epoch": 0.22905495516210622, "grad_norm": 0.8046875, "learning_rate": 4.3893911752717374e-05, "loss": 0.9856, "step": 3113 }, { "epoch": 0.22912853529547023, "grad_norm": 0.93359375, "learning_rate": 4.389011860385619e-05, "loss": 1.0432, "step": 3114 }, { "epoch": 0.2292021154288342, "grad_norm": 1.078125, "learning_rate": 4.3886324441198015e-05, "loss": 1.5668, "step": 3115 }, { "epoch": 0.2292756955621982, "grad_norm": 0.86328125, "learning_rate": 4.388252926494647e-05, "loss": 0.9112, "step": 3116 }, { "epoch": 0.2293492756955622, "grad_norm": 0.88671875, "learning_rate": 4.387873307530525e-05, "loss": 0.948, "step": 3117 }, { "epoch": 0.2294228558289262, "grad_norm": 0.89453125, "learning_rate": 4.3874935872478086e-05, "loss": 1.1635, "step": 3118 }, { "epoch": 0.2294964359622902, "grad_norm": 0.85546875, "learning_rate": 4.387113765666876e-05, "loss": 0.953, "step": 3119 }, { "epoch": 0.22957001609565417, "grad_norm": 1.1796875, "learning_rate": 4.386733842808112e-05, "loss": 1.118, "step": 3120 }, { "epoch": 0.22964359622901817, "grad_norm": 1.2109375, "learning_rate": 4.386353818691906e-05, "loss": 0.6992, "step": 3121 }, { "epoch": 0.22971717636238215, "grad_norm": 0.85546875, "learning_rate": 4.385973693338653e-05, "loss": 1.0678, "step": 3122 }, { "epoch": 0.22979075649574615, "grad_norm": 1.046875, "learning_rate": 4.3855934667687545e-05, "loss": 1.6797, "step": 3123 }, { "epoch": 0.22986433662911013, "grad_norm": 0.75, "learning_rate": 4.385213139002616e-05, "loss": 0.8481, "step": 3124 }, { "epoch": 0.22993791676247413, "grad_norm": 0.80078125, "learning_rate": 4.3848327100606486e-05, "loss": 0.7289, "step": 3125 }, { "epoch": 0.23001149689583814, "grad_norm": 1.0, "learning_rate": 4.38445217996327e-05, "loss": 1.4812, "step": 3126 }, { "epoch": 0.2300850770292021, "grad_norm": 0.8671875, "learning_rate": 4.3840715487309015e-05, "loss": 1.2339, "step": 3127 }, { "epoch": 0.23015865716256612, "grad_norm": 0.7109375, "learning_rate": 4.3836908163839715e-05, "loss": 0.7561, "step": 3128 }, { "epoch": 0.2302322372959301, "grad_norm": 0.7109375, "learning_rate": 4.383309982942914e-05, "loss": 0.6729, "step": 3129 }, { "epoch": 0.2303058174292941, "grad_norm": 0.9921875, "learning_rate": 4.382929048428167e-05, "loss": 0.9063, "step": 3130 }, { "epoch": 0.23037939756265807, "grad_norm": 1.09375, "learning_rate": 4.3825480128601734e-05, "loss": 1.3051, "step": 3131 }, { "epoch": 0.23045297769602208, "grad_norm": 0.79296875, "learning_rate": 4.382166876259384e-05, "loss": 1.0217, "step": 3132 }, { "epoch": 0.23052655782938605, "grad_norm": 1.0, "learning_rate": 4.381785638646253e-05, "loss": 1.2486, "step": 3133 }, { "epoch": 0.23060013796275006, "grad_norm": 0.703125, "learning_rate": 4.3814043000412405e-05, "loss": 0.6965, "step": 3134 }, { "epoch": 0.23067371809611406, "grad_norm": 0.73046875, "learning_rate": 4.381022860464814e-05, "loss": 0.8494, "step": 3135 }, { "epoch": 0.23074729822947804, "grad_norm": 0.75390625, "learning_rate": 4.380641319937442e-05, "loss": 0.9846, "step": 3136 }, { "epoch": 0.23082087836284204, "grad_norm": 0.72265625, "learning_rate": 4.380259678479604e-05, "loss": 0.9081, "step": 3137 }, { "epoch": 0.23089445849620602, "grad_norm": 0.88671875, "learning_rate": 4.379877936111779e-05, "loss": 0.839, "step": 3138 }, { "epoch": 0.23096803862957002, "grad_norm": 0.87109375, "learning_rate": 4.379496092854456e-05, "loss": 1.0511, "step": 3139 }, { "epoch": 0.231041618762934, "grad_norm": 0.8515625, "learning_rate": 4.379114148728128e-05, "loss": 0.9411, "step": 3140 }, { "epoch": 0.231115198896298, "grad_norm": 1.125, "learning_rate": 4.378732103753292e-05, "loss": 1.3649, "step": 3141 }, { "epoch": 0.23118877902966198, "grad_norm": 0.9140625, "learning_rate": 4.378349957950453e-05, "loss": 0.7724, "step": 3142 }, { "epoch": 0.23126235916302598, "grad_norm": 0.9375, "learning_rate": 4.37796771134012e-05, "loss": 1.2308, "step": 3143 }, { "epoch": 0.23133593929639, "grad_norm": 2.546875, "learning_rate": 4.377585363942805e-05, "loss": 1.2, "step": 3144 }, { "epoch": 0.23140951942975396, "grad_norm": 0.83984375, "learning_rate": 4.377202915779032e-05, "loss": 0.7315, "step": 3145 }, { "epoch": 0.23148309956311797, "grad_norm": 0.83984375, "learning_rate": 4.376820366869323e-05, "loss": 0.65, "step": 3146 }, { "epoch": 0.23155667969648194, "grad_norm": 0.76953125, "learning_rate": 4.3764377172342095e-05, "loss": 0.7376, "step": 3147 }, { "epoch": 0.23163025982984595, "grad_norm": 1.0078125, "learning_rate": 4.376054966894229e-05, "loss": 1.3158, "step": 3148 }, { "epoch": 0.23170383996320992, "grad_norm": 0.953125, "learning_rate": 4.37567211586992e-05, "loss": 0.972, "step": 3149 }, { "epoch": 0.23177742009657393, "grad_norm": 0.9375, "learning_rate": 4.375289164181832e-05, "loss": 0.8627, "step": 3150 }, { "epoch": 0.2318510002299379, "grad_norm": 0.90625, "learning_rate": 4.374906111850517e-05, "loss": 0.9473, "step": 3151 }, { "epoch": 0.2319245803633019, "grad_norm": 0.703125, "learning_rate": 4.374522958896532e-05, "loss": 0.7579, "step": 3152 }, { "epoch": 0.2319981604966659, "grad_norm": 0.87109375, "learning_rate": 4.37413970534044e-05, "loss": 0.9142, "step": 3153 }, { "epoch": 0.2320717406300299, "grad_norm": 0.77734375, "learning_rate": 4.373756351202809e-05, "loss": 0.8533, "step": 3154 }, { "epoch": 0.2321453207633939, "grad_norm": 1.0, "learning_rate": 4.373372896504215e-05, "loss": 1.1974, "step": 3155 }, { "epoch": 0.23221890089675787, "grad_norm": 0.828125, "learning_rate": 4.372989341265235e-05, "loss": 0.7317, "step": 3156 }, { "epoch": 0.23229248103012187, "grad_norm": 0.75, "learning_rate": 4.372605685506455e-05, "loss": 0.8871, "step": 3157 }, { "epoch": 0.23236606116348585, "grad_norm": 5.6875, "learning_rate": 4.372221929248464e-05, "loss": 1.2479, "step": 3158 }, { "epoch": 0.23243964129684985, "grad_norm": 0.7109375, "learning_rate": 4.37183807251186e-05, "loss": 0.7314, "step": 3159 }, { "epoch": 0.23251322143021383, "grad_norm": 0.8515625, "learning_rate": 4.3714541153172405e-05, "loss": 0.7643, "step": 3160 }, { "epoch": 0.23258680156357783, "grad_norm": 0.9296875, "learning_rate": 4.371070057685214e-05, "loss": 0.7784, "step": 3161 }, { "epoch": 0.23266038169694184, "grad_norm": 0.81640625, "learning_rate": 4.3706858996363906e-05, "loss": 0.7401, "step": 3162 }, { "epoch": 0.2327339618303058, "grad_norm": 0.9375, "learning_rate": 4.3703016411913886e-05, "loss": 0.9218, "step": 3163 }, { "epoch": 0.23280754196366982, "grad_norm": 1.046875, "learning_rate": 4.369917282370831e-05, "loss": 0.7555, "step": 3164 }, { "epoch": 0.2328811220970338, "grad_norm": 0.859375, "learning_rate": 4.369532823195344e-05, "loss": 1.0403, "step": 3165 }, { "epoch": 0.2329547022303978, "grad_norm": 0.6484375, "learning_rate": 4.369148263685561e-05, "loss": 0.6592, "step": 3166 }, { "epoch": 0.23302828236376177, "grad_norm": 0.8125, "learning_rate": 4.36876360386212e-05, "loss": 1.0878, "step": 3167 }, { "epoch": 0.23310186249712578, "grad_norm": 0.60546875, "learning_rate": 4.368378843745668e-05, "loss": 0.5583, "step": 3168 }, { "epoch": 0.23317544263048975, "grad_norm": 0.8515625, "learning_rate": 4.367993983356852e-05, "loss": 0.8004, "step": 3169 }, { "epoch": 0.23324902276385376, "grad_norm": 1.0078125, "learning_rate": 4.367609022716326e-05, "loss": 1.3917, "step": 3170 }, { "epoch": 0.23332260289721776, "grad_norm": 1.125, "learning_rate": 4.367223961844752e-05, "loss": 1.025, "step": 3171 }, { "epoch": 0.23339618303058174, "grad_norm": 0.75, "learning_rate": 4.366838800762795e-05, "loss": 0.8707, "step": 3172 }, { "epoch": 0.23346976316394574, "grad_norm": 1.2421875, "learning_rate": 4.366453539491124e-05, "loss": 1.419, "step": 3173 }, { "epoch": 0.23354334329730972, "grad_norm": 1.15625, "learning_rate": 4.3660681780504176e-05, "loss": 1.1744, "step": 3174 }, { "epoch": 0.23361692343067372, "grad_norm": 0.86328125, "learning_rate": 4.365682716461357e-05, "loss": 0.6458, "step": 3175 }, { "epoch": 0.2336905035640377, "grad_norm": 0.85546875, "learning_rate": 4.365297154744629e-05, "loss": 0.9822, "step": 3176 }, { "epoch": 0.2337640836974017, "grad_norm": 0.85546875, "learning_rate": 4.3649114929209245e-05, "loss": 0.9373, "step": 3177 }, { "epoch": 0.23383766383076568, "grad_norm": 0.92578125, "learning_rate": 4.364525731010943e-05, "loss": 1.223, "step": 3178 }, { "epoch": 0.23391124396412968, "grad_norm": 0.80078125, "learning_rate": 4.3641398690353875e-05, "loss": 0.9826, "step": 3179 }, { "epoch": 0.2339848240974937, "grad_norm": 0.7578125, "learning_rate": 4.363753907014966e-05, "loss": 0.9722, "step": 3180 }, { "epoch": 0.23405840423085766, "grad_norm": 0.796875, "learning_rate": 4.363367844970392e-05, "loss": 0.8164, "step": 3181 }, { "epoch": 0.23413198436422167, "grad_norm": 0.81640625, "learning_rate": 4.362981682922386e-05, "loss": 1.3447, "step": 3182 }, { "epoch": 0.23420556449758564, "grad_norm": 0.82421875, "learning_rate": 4.3625954208916705e-05, "loss": 0.7872, "step": 3183 }, { "epoch": 0.23427914463094965, "grad_norm": 0.8671875, "learning_rate": 4.3622090588989784e-05, "loss": 1.0747, "step": 3184 }, { "epoch": 0.23435272476431362, "grad_norm": 0.953125, "learning_rate": 4.3618225969650416e-05, "loss": 1.0023, "step": 3185 }, { "epoch": 0.23442630489767763, "grad_norm": 1.140625, "learning_rate": 4.361436035110604e-05, "loss": 1.1984, "step": 3186 }, { "epoch": 0.2344998850310416, "grad_norm": 1.0703125, "learning_rate": 4.3610493733564095e-05, "loss": 1.2764, "step": 3187 }, { "epoch": 0.2345734651644056, "grad_norm": 0.90234375, "learning_rate": 4.36066261172321e-05, "loss": 1.021, "step": 3188 }, { "epoch": 0.2346470452977696, "grad_norm": 0.9296875, "learning_rate": 4.360275750231763e-05, "loss": 0.7916, "step": 3189 }, { "epoch": 0.2347206254311336, "grad_norm": 1.0546875, "learning_rate": 4.359888788902828e-05, "loss": 1.236, "step": 3190 }, { "epoch": 0.2347942055644976, "grad_norm": 0.86328125, "learning_rate": 4.3595017277571774e-05, "loss": 0.7047, "step": 3191 }, { "epoch": 0.23486778569786157, "grad_norm": 0.7109375, "learning_rate": 4.359114566815579e-05, "loss": 0.4961, "step": 3192 }, { "epoch": 0.23494136583122557, "grad_norm": 0.78515625, "learning_rate": 4.358727306098814e-05, "loss": 0.7679, "step": 3193 }, { "epoch": 0.23501494596458955, "grad_norm": 1.0390625, "learning_rate": 4.358339945627665e-05, "loss": 1.1169, "step": 3194 }, { "epoch": 0.23508852609795355, "grad_norm": 0.74609375, "learning_rate": 4.3579524854229215e-05, "loss": 0.956, "step": 3195 }, { "epoch": 0.23516210623131756, "grad_norm": 1.0234375, "learning_rate": 4.3575649255053774e-05, "loss": 0.903, "step": 3196 }, { "epoch": 0.23523568636468153, "grad_norm": 0.83203125, "learning_rate": 4.357177265895831e-05, "loss": 0.9522, "step": 3197 }, { "epoch": 0.23530926649804554, "grad_norm": 0.85546875, "learning_rate": 4.3567895066150894e-05, "loss": 0.8687, "step": 3198 }, { "epoch": 0.23538284663140951, "grad_norm": 0.70703125, "learning_rate": 4.356401647683962e-05, "loss": 0.624, "step": 3199 }, { "epoch": 0.23545642676477352, "grad_norm": 0.76171875, "learning_rate": 4.356013689123264e-05, "loss": 1.0651, "step": 3200 }, { "epoch": 0.2355300068981375, "grad_norm": 0.68359375, "learning_rate": 4.355625630953817e-05, "loss": 0.7108, "step": 3201 }, { "epoch": 0.2356035870315015, "grad_norm": 0.8828125, "learning_rate": 4.355237473196447e-05, "loss": 0.7632, "step": 3202 }, { "epoch": 0.23567716716486548, "grad_norm": 0.765625, "learning_rate": 4.354849215871986e-05, "loss": 0.704, "step": 3203 }, { "epoch": 0.23575074729822948, "grad_norm": 0.86328125, "learning_rate": 4.354460859001272e-05, "loss": 1.0499, "step": 3204 }, { "epoch": 0.23582432743159348, "grad_norm": 0.8046875, "learning_rate": 4.354072402605144e-05, "loss": 0.992, "step": 3205 }, { "epoch": 0.23589790756495746, "grad_norm": 0.9609375, "learning_rate": 4.353683846704453e-05, "loss": 0.788, "step": 3206 }, { "epoch": 0.23597148769832146, "grad_norm": 0.65234375, "learning_rate": 4.3532951913200516e-05, "loss": 0.6209, "step": 3207 }, { "epoch": 0.23604506783168544, "grad_norm": 0.90234375, "learning_rate": 4.3529064364727974e-05, "loss": 1.0249, "step": 3208 }, { "epoch": 0.23611864796504944, "grad_norm": 0.83984375, "learning_rate": 4.352517582183554e-05, "loss": 1.0625, "step": 3209 }, { "epoch": 0.23619222809841342, "grad_norm": 0.96875, "learning_rate": 4.352128628473191e-05, "loss": 0.8386, "step": 3210 }, { "epoch": 0.23626580823177742, "grad_norm": 0.83203125, "learning_rate": 4.351739575362583e-05, "loss": 0.976, "step": 3211 }, { "epoch": 0.2363393883651414, "grad_norm": 0.875, "learning_rate": 4.3513504228726096e-05, "loss": 1.0203, "step": 3212 }, { "epoch": 0.2364129684985054, "grad_norm": 0.98828125, "learning_rate": 4.350961171024155e-05, "loss": 0.9551, "step": 3213 }, { "epoch": 0.2364865486318694, "grad_norm": 1.015625, "learning_rate": 4.350571819838111e-05, "loss": 1.2153, "step": 3214 }, { "epoch": 0.23656012876523339, "grad_norm": 0.71875, "learning_rate": 4.350182369335372e-05, "loss": 0.7942, "step": 3215 }, { "epoch": 0.2366337088985974, "grad_norm": 0.8828125, "learning_rate": 4.3497928195368406e-05, "loss": 1.0052, "step": 3216 }, { "epoch": 0.23670728903196137, "grad_norm": 0.76171875, "learning_rate": 4.349403170463421e-05, "loss": 0.6689, "step": 3217 }, { "epoch": 0.23678086916532537, "grad_norm": 0.9921875, "learning_rate": 4.349013422136028e-05, "loss": 1.0577, "step": 3218 }, { "epoch": 0.23685444929868935, "grad_norm": 0.66015625, "learning_rate": 4.348623574575575e-05, "loss": 0.651, "step": 3219 }, { "epoch": 0.23692802943205335, "grad_norm": 0.85546875, "learning_rate": 4.348233627802988e-05, "loss": 0.8211, "step": 3220 }, { "epoch": 0.23700160956541733, "grad_norm": 0.7890625, "learning_rate": 4.347843581839193e-05, "loss": 1.2313, "step": 3221 }, { "epoch": 0.23707518969878133, "grad_norm": 0.72265625, "learning_rate": 4.3474534367051216e-05, "loss": 0.632, "step": 3222 }, { "epoch": 0.23714876983214533, "grad_norm": 0.984375, "learning_rate": 4.347063192421714e-05, "loss": 1.0363, "step": 3223 }, { "epoch": 0.2372223499655093, "grad_norm": 0.8984375, "learning_rate": 4.3466728490099143e-05, "loss": 1.0407, "step": 3224 }, { "epoch": 0.23729593009887331, "grad_norm": 0.78515625, "learning_rate": 4.3462824064906704e-05, "loss": 0.8869, "step": 3225 }, { "epoch": 0.2373695102322373, "grad_norm": 1.0546875, "learning_rate": 4.345891864884937e-05, "loss": 1.3454, "step": 3226 }, { "epoch": 0.2374430903656013, "grad_norm": 0.77734375, "learning_rate": 4.345501224213673e-05, "loss": 0.8759, "step": 3227 }, { "epoch": 0.23751667049896527, "grad_norm": 0.91015625, "learning_rate": 4.3451104844978444e-05, "loss": 1.0211, "step": 3228 }, { "epoch": 0.23759025063232928, "grad_norm": 0.890625, "learning_rate": 4.34471964575842e-05, "loss": 0.7893, "step": 3229 }, { "epoch": 0.23766383076569325, "grad_norm": 0.88671875, "learning_rate": 4.3443287080163777e-05, "loss": 1.0153, "step": 3230 }, { "epoch": 0.23773741089905726, "grad_norm": 0.828125, "learning_rate": 4.343937671292697e-05, "loss": 1.2079, "step": 3231 }, { "epoch": 0.23781099103242126, "grad_norm": 1.1796875, "learning_rate": 4.343546535608364e-05, "loss": 1.1658, "step": 3232 }, { "epoch": 0.23788457116578524, "grad_norm": 0.765625, "learning_rate": 4.343155300984369e-05, "loss": 0.6722, "step": 3233 }, { "epoch": 0.23795815129914924, "grad_norm": 1.03125, "learning_rate": 4.342763967441712e-05, "loss": 1.0483, "step": 3234 }, { "epoch": 0.23803173143251322, "grad_norm": 0.90625, "learning_rate": 4.342372535001393e-05, "loss": 0.8137, "step": 3235 }, { "epoch": 0.23810531156587722, "grad_norm": 0.875, "learning_rate": 4.34198100368442e-05, "loss": 1.3357, "step": 3236 }, { "epoch": 0.2381788916992412, "grad_norm": 0.8359375, "learning_rate": 4.341589373511805e-05, "loss": 0.7589, "step": 3237 }, { "epoch": 0.2382524718326052, "grad_norm": 0.87890625, "learning_rate": 4.3411976445045664e-05, "loss": 0.9209, "step": 3238 }, { "epoch": 0.23832605196596918, "grad_norm": 0.94140625, "learning_rate": 4.340805816683728e-05, "loss": 1.4987, "step": 3239 }, { "epoch": 0.23839963209933318, "grad_norm": 0.73046875, "learning_rate": 4.340413890070318e-05, "loss": 0.5365, "step": 3240 }, { "epoch": 0.23847321223269718, "grad_norm": 1.0703125, "learning_rate": 4.340021864685371e-05, "loss": 1.1462, "step": 3241 }, { "epoch": 0.23854679236606116, "grad_norm": 0.8515625, "learning_rate": 4.339629740549926e-05, "loss": 0.7768, "step": 3242 }, { "epoch": 0.23862037249942517, "grad_norm": 0.6875, "learning_rate": 4.339237517685027e-05, "loss": 0.7225, "step": 3243 }, { "epoch": 0.23869395263278914, "grad_norm": 0.96484375, "learning_rate": 4.3388451961117246e-05, "loss": 0.7952, "step": 3244 }, { "epoch": 0.23876753276615315, "grad_norm": 0.91015625, "learning_rate": 4.338452775851073e-05, "loss": 0.8929, "step": 3245 }, { "epoch": 0.23884111289951712, "grad_norm": 0.83203125, "learning_rate": 4.338060256924134e-05, "loss": 0.9899, "step": 3246 }, { "epoch": 0.23891469303288113, "grad_norm": 0.8125, "learning_rate": 4.337667639351972e-05, "loss": 1.0291, "step": 3247 }, { "epoch": 0.2389882731662451, "grad_norm": 0.76953125, "learning_rate": 4.337274923155659e-05, "loss": 0.6663, "step": 3248 }, { "epoch": 0.2390618532996091, "grad_norm": 0.90234375, "learning_rate": 4.33688210835627e-05, "loss": 1.2437, "step": 3249 }, { "epoch": 0.2391354334329731, "grad_norm": 0.78125, "learning_rate": 4.336489194974889e-05, "loss": 0.7705, "step": 3250 }, { "epoch": 0.2392090135663371, "grad_norm": 0.94921875, "learning_rate": 4.336096183032601e-05, "loss": 1.0608, "step": 3251 }, { "epoch": 0.2392825936997011, "grad_norm": 0.7734375, "learning_rate": 4.335703072550499e-05, "loss": 0.6806, "step": 3252 }, { "epoch": 0.23935617383306507, "grad_norm": 0.65625, "learning_rate": 4.33530986354968e-05, "loss": 0.68, "step": 3253 }, { "epoch": 0.23942975396642907, "grad_norm": 1.015625, "learning_rate": 4.334916556051247e-05, "loss": 1.1363, "step": 3254 }, { "epoch": 0.23950333409979305, "grad_norm": 0.69921875, "learning_rate": 4.3345231500763084e-05, "loss": 0.7675, "step": 3255 }, { "epoch": 0.23957691423315705, "grad_norm": 0.94140625, "learning_rate": 4.334129645645977e-05, "loss": 1.0675, "step": 3256 }, { "epoch": 0.23965049436652103, "grad_norm": 0.7265625, "learning_rate": 4.333736042781371e-05, "loss": 0.6276, "step": 3257 }, { "epoch": 0.23972407449988503, "grad_norm": 0.96484375, "learning_rate": 4.333342341503616e-05, "loss": 0.7948, "step": 3258 }, { "epoch": 0.23979765463324904, "grad_norm": 0.99609375, "learning_rate": 4.3329485418338403e-05, "loss": 0.8094, "step": 3259 }, { "epoch": 0.239871234766613, "grad_norm": 0.92578125, "learning_rate": 4.332554643793177e-05, "loss": 0.9658, "step": 3260 }, { "epoch": 0.23994481489997702, "grad_norm": 0.92578125, "learning_rate": 4.332160647402769e-05, "loss": 0.9935, "step": 3261 }, { "epoch": 0.240018395033341, "grad_norm": 0.86328125, "learning_rate": 4.331766552683758e-05, "loss": 1.2022, "step": 3262 }, { "epoch": 0.240091975166705, "grad_norm": 0.7890625, "learning_rate": 4.331372359657297e-05, "loss": 0.731, "step": 3263 }, { "epoch": 0.24016555530006897, "grad_norm": 0.8515625, "learning_rate": 4.33097806834454e-05, "loss": 0.7802, "step": 3264 }, { "epoch": 0.24023913543343298, "grad_norm": 0.76171875, "learning_rate": 4.3305836787666475e-05, "loss": 0.6186, "step": 3265 }, { "epoch": 0.24031271556679695, "grad_norm": 0.9375, "learning_rate": 4.330189190944788e-05, "loss": 1.4881, "step": 3266 }, { "epoch": 0.24038629570016096, "grad_norm": 1.015625, "learning_rate": 4.329794604900131e-05, "loss": 1.1482, "step": 3267 }, { "epoch": 0.24045987583352496, "grad_norm": 0.84375, "learning_rate": 4.329399920653852e-05, "loss": 0.8388, "step": 3268 }, { "epoch": 0.24053345596688894, "grad_norm": 0.93359375, "learning_rate": 4.3290051382271356e-05, "loss": 1.34, "step": 3269 }, { "epoch": 0.24060703610025294, "grad_norm": 1.015625, "learning_rate": 4.328610257641168e-05, "loss": 1.4536, "step": 3270 }, { "epoch": 0.24068061623361692, "grad_norm": 0.94140625, "learning_rate": 4.3282152789171414e-05, "loss": 0.9708, "step": 3271 }, { "epoch": 0.24075419636698092, "grad_norm": 0.7109375, "learning_rate": 4.327820202076254e-05, "loss": 0.7082, "step": 3272 }, { "epoch": 0.2408277765003449, "grad_norm": 0.8515625, "learning_rate": 4.3274250271397085e-05, "loss": 0.8295, "step": 3273 }, { "epoch": 0.2409013566337089, "grad_norm": 0.76953125, "learning_rate": 4.327029754128714e-05, "loss": 0.7234, "step": 3274 }, { "epoch": 0.24097493676707288, "grad_norm": 1.0390625, "learning_rate": 4.326634383064482e-05, "loss": 1.2526, "step": 3275 }, { "epoch": 0.24104851690043688, "grad_norm": 0.99609375, "learning_rate": 4.326238913968234e-05, "loss": 1.2503, "step": 3276 }, { "epoch": 0.2411220970338009, "grad_norm": 0.87109375, "learning_rate": 4.3258433468611914e-05, "loss": 0.6983, "step": 3277 }, { "epoch": 0.24119567716716486, "grad_norm": 0.8515625, "learning_rate": 4.325447681764586e-05, "loss": 0.8075, "step": 3278 }, { "epoch": 0.24126925730052887, "grad_norm": 0.875, "learning_rate": 4.325051918699651e-05, "loss": 0.8359, "step": 3279 }, { "epoch": 0.24134283743389284, "grad_norm": 0.9765625, "learning_rate": 4.324656057687626e-05, "loss": 1.1151, "step": 3280 }, { "epoch": 0.24141641756725685, "grad_norm": 0.8046875, "learning_rate": 4.324260098749757e-05, "loss": 0.6284, "step": 3281 }, { "epoch": 0.24148999770062082, "grad_norm": 0.94140625, "learning_rate": 4.3238640419072945e-05, "loss": 1.3291, "step": 3282 }, { "epoch": 0.24156357783398483, "grad_norm": 0.75, "learning_rate": 4.323467887181494e-05, "loss": 0.7874, "step": 3283 }, { "epoch": 0.2416371579673488, "grad_norm": 0.84375, "learning_rate": 4.323071634593615e-05, "loss": 1.1782, "step": 3284 }, { "epoch": 0.2417107381007128, "grad_norm": 0.84765625, "learning_rate": 4.322675284164925e-05, "loss": 0.6891, "step": 3285 }, { "epoch": 0.2417843182340768, "grad_norm": 0.83984375, "learning_rate": 4.3222788359166957e-05, "loss": 0.7173, "step": 3286 }, { "epoch": 0.2418578983674408, "grad_norm": 0.78515625, "learning_rate": 4.321882289870202e-05, "loss": 1.1282, "step": 3287 }, { "epoch": 0.2419314785008048, "grad_norm": 0.69140625, "learning_rate": 4.321485646046727e-05, "loss": 0.969, "step": 3288 }, { "epoch": 0.24200505863416877, "grad_norm": 0.82421875, "learning_rate": 4.321088904467559e-05, "loss": 0.9398, "step": 3289 }, { "epoch": 0.24207863876753277, "grad_norm": 0.9375, "learning_rate": 4.3206920651539873e-05, "loss": 0.8897, "step": 3290 }, { "epoch": 0.24215221890089675, "grad_norm": 0.82421875, "learning_rate": 4.320295128127312e-05, "loss": 0.8354, "step": 3291 }, { "epoch": 0.24222579903426075, "grad_norm": 0.765625, "learning_rate": 4.3198980934088354e-05, "loss": 0.7275, "step": 3292 }, { "epoch": 0.24229937916762473, "grad_norm": 0.81640625, "learning_rate": 4.319500961019865e-05, "loss": 1.0167, "step": 3293 }, { "epoch": 0.24237295930098873, "grad_norm": 0.71484375, "learning_rate": 4.319103730981715e-05, "loss": 0.815, "step": 3294 }, { "epoch": 0.24244653943435274, "grad_norm": 0.84375, "learning_rate": 4.318706403315703e-05, "loss": 0.8111, "step": 3295 }, { "epoch": 0.2425201195677167, "grad_norm": 1.0625, "learning_rate": 4.318308978043154e-05, "loss": 1.174, "step": 3296 }, { "epoch": 0.24259369970108072, "grad_norm": 0.9375, "learning_rate": 4.317911455185396e-05, "loss": 1.1623, "step": 3297 }, { "epoch": 0.2426672798344447, "grad_norm": 0.8359375, "learning_rate": 4.3175138347637646e-05, "loss": 0.6876, "step": 3298 }, { "epoch": 0.2427408599678087, "grad_norm": 1.03125, "learning_rate": 4.317116116799598e-05, "loss": 1.4496, "step": 3299 }, { "epoch": 0.24281444010117267, "grad_norm": 0.80078125, "learning_rate": 4.316718301314241e-05, "loss": 1.355, "step": 3300 }, { "epoch": 0.24288802023453668, "grad_norm": 0.7890625, "learning_rate": 4.316320388329044e-05, "loss": 0.7542, "step": 3301 }, { "epoch": 0.24296160036790065, "grad_norm": 0.77734375, "learning_rate": 4.315922377865363e-05, "loss": 0.9499, "step": 3302 }, { "epoch": 0.24303518050126466, "grad_norm": 0.65234375, "learning_rate": 4.315524269944557e-05, "loss": 0.5671, "step": 3303 }, { "epoch": 0.24310876063462866, "grad_norm": 0.8046875, "learning_rate": 4.3151260645879934e-05, "loss": 0.6535, "step": 3304 }, { "epoch": 0.24318234076799264, "grad_norm": 0.91796875, "learning_rate": 4.3147277618170415e-05, "loss": 1.1626, "step": 3305 }, { "epoch": 0.24325592090135664, "grad_norm": 0.96484375, "learning_rate": 4.3143293616530784e-05, "loss": 0.9798, "step": 3306 }, { "epoch": 0.24332950103472062, "grad_norm": 0.82421875, "learning_rate": 4.3139308641174855e-05, "loss": 0.9162, "step": 3307 }, { "epoch": 0.24340308116808462, "grad_norm": 0.75, "learning_rate": 4.3135322692316484e-05, "loss": 0.7868, "step": 3308 }, { "epoch": 0.2434766613014486, "grad_norm": 1.1484375, "learning_rate": 4.31313357701696e-05, "loss": 1.0084, "step": 3309 }, { "epoch": 0.2435502414348126, "grad_norm": 0.98046875, "learning_rate": 4.3127347874948177e-05, "loss": 1.2432, "step": 3310 }, { "epoch": 0.24362382156817658, "grad_norm": 0.7890625, "learning_rate": 4.312335900686623e-05, "loss": 0.8526, "step": 3311 }, { "epoch": 0.24369740170154058, "grad_norm": 0.8203125, "learning_rate": 4.311936916613782e-05, "loss": 0.7488, "step": 3312 }, { "epoch": 0.2437709818349046, "grad_norm": 0.9140625, "learning_rate": 4.311537835297711e-05, "loss": 0.8447, "step": 3313 }, { "epoch": 0.24384456196826856, "grad_norm": 1.046875, "learning_rate": 4.3111386567598245e-05, "loss": 1.2009, "step": 3314 }, { "epoch": 0.24391814210163257, "grad_norm": 0.796875, "learning_rate": 4.3107393810215475e-05, "loss": 0.912, "step": 3315 }, { "epoch": 0.24399172223499654, "grad_norm": 1.015625, "learning_rate": 4.310340008104308e-05, "loss": 1.3959, "step": 3316 }, { "epoch": 0.24406530236836055, "grad_norm": 0.9921875, "learning_rate": 4.30994053802954e-05, "loss": 1.2222, "step": 3317 }, { "epoch": 0.24413888250172452, "grad_norm": 0.96875, "learning_rate": 4.3095409708186804e-05, "loss": 1.1964, "step": 3318 }, { "epoch": 0.24421246263508853, "grad_norm": 1.015625, "learning_rate": 4.309141306493176e-05, "loss": 1.406, "step": 3319 }, { "epoch": 0.2442860427684525, "grad_norm": 0.7578125, "learning_rate": 4.308741545074474e-05, "loss": 0.6776, "step": 3320 }, { "epoch": 0.2443596229018165, "grad_norm": 0.89453125, "learning_rate": 4.3083416865840296e-05, "loss": 0.68, "step": 3321 }, { "epoch": 0.2444332030351805, "grad_norm": 0.7890625, "learning_rate": 4.307941731043302e-05, "loss": 0.8754, "step": 3322 }, { "epoch": 0.2445067831685445, "grad_norm": 0.83984375, "learning_rate": 4.307541678473758e-05, "loss": 0.9996, "step": 3323 }, { "epoch": 0.2445803633019085, "grad_norm": 0.81640625, "learning_rate": 4.307141528896864e-05, "loss": 0.7086, "step": 3324 }, { "epoch": 0.24465394343527247, "grad_norm": 0.828125, "learning_rate": 4.306741282334099e-05, "loss": 0.9456, "step": 3325 }, { "epoch": 0.24472752356863647, "grad_norm": 0.84375, "learning_rate": 4.306340938806941e-05, "loss": 1.3534, "step": 3326 }, { "epoch": 0.24480110370200045, "grad_norm": 0.80859375, "learning_rate": 4.305940498336876e-05, "loss": 0.7491, "step": 3327 }, { "epoch": 0.24487468383536445, "grad_norm": 0.8203125, "learning_rate": 4.305539960945396e-05, "loss": 0.9569, "step": 3328 }, { "epoch": 0.24494826396872843, "grad_norm": 0.76953125, "learning_rate": 4.3051393266539964e-05, "loss": 0.8684, "step": 3329 }, { "epoch": 0.24502184410209243, "grad_norm": 0.7421875, "learning_rate": 4.304738595484179e-05, "loss": 0.9582, "step": 3330 }, { "epoch": 0.24509542423545644, "grad_norm": 0.875, "learning_rate": 4.304337767457449e-05, "loss": 0.9837, "step": 3331 }, { "epoch": 0.24516900436882041, "grad_norm": 0.8203125, "learning_rate": 4.30393684259532e-05, "loss": 1.0825, "step": 3332 }, { "epoch": 0.24524258450218442, "grad_norm": 0.93359375, "learning_rate": 4.3035358209193065e-05, "loss": 1.397, "step": 3333 }, { "epoch": 0.2453161646355484, "grad_norm": 0.796875, "learning_rate": 4.3031347024509336e-05, "loss": 0.8771, "step": 3334 }, { "epoch": 0.2453897447689124, "grad_norm": 0.9375, "learning_rate": 4.302733487211725e-05, "loss": 0.9097, "step": 3335 }, { "epoch": 0.24546332490227638, "grad_norm": 0.80078125, "learning_rate": 4.302332175223216e-05, "loss": 0.9695, "step": 3336 }, { "epoch": 0.24553690503564038, "grad_norm": 0.92578125, "learning_rate": 4.301930766506943e-05, "loss": 1.063, "step": 3337 }, { "epoch": 0.24561048516900436, "grad_norm": 0.73828125, "learning_rate": 4.301529261084449e-05, "loss": 0.7604, "step": 3338 }, { "epoch": 0.24568406530236836, "grad_norm": 0.8828125, "learning_rate": 4.301127658977283e-05, "loss": 0.822, "step": 3339 }, { "epoch": 0.24575764543573236, "grad_norm": 0.78515625, "learning_rate": 4.3007259602069974e-05, "loss": 0.8355, "step": 3340 }, { "epoch": 0.24583122556909634, "grad_norm": 0.765625, "learning_rate": 4.3003241647951494e-05, "loss": 0.7786, "step": 3341 }, { "epoch": 0.24590480570246034, "grad_norm": 0.9453125, "learning_rate": 4.299922272763305e-05, "loss": 1.0491, "step": 3342 }, { "epoch": 0.24597838583582432, "grad_norm": 0.953125, "learning_rate": 4.2995202841330305e-05, "loss": 0.8744, "step": 3343 }, { "epoch": 0.24605196596918832, "grad_norm": 0.92578125, "learning_rate": 4.299118198925902e-05, "loss": 1.2905, "step": 3344 }, { "epoch": 0.2461255461025523, "grad_norm": 0.73828125, "learning_rate": 4.2987160171634975e-05, "loss": 0.8037, "step": 3345 }, { "epoch": 0.2461991262359163, "grad_norm": 0.703125, "learning_rate": 4.2983137388674024e-05, "loss": 0.855, "step": 3346 }, { "epoch": 0.24627270636928028, "grad_norm": 0.87890625, "learning_rate": 4.297911364059205e-05, "loss": 0.9314, "step": 3347 }, { "epoch": 0.24634628650264428, "grad_norm": 0.92578125, "learning_rate": 4.2975088927605e-05, "loss": 0.9432, "step": 3348 }, { "epoch": 0.2464198666360083, "grad_norm": 0.9609375, "learning_rate": 4.297106324992888e-05, "loss": 1.0491, "step": 3349 }, { "epoch": 0.24649344676937227, "grad_norm": 0.796875, "learning_rate": 4.2967036607779745e-05, "loss": 1.1294, "step": 3350 }, { "epoch": 0.24656702690273627, "grad_norm": 0.87109375, "learning_rate": 4.2963009001373684e-05, "loss": 1.3044, "step": 3351 }, { "epoch": 0.24664060703610025, "grad_norm": 0.85546875, "learning_rate": 4.295898043092685e-05, "loss": 0.7408, "step": 3352 }, { "epoch": 0.24671418716946425, "grad_norm": 0.734375, "learning_rate": 4.2954950896655466e-05, "loss": 0.8605, "step": 3353 }, { "epoch": 0.24678776730282823, "grad_norm": 0.6953125, "learning_rate": 4.2950920398775783e-05, "loss": 0.7389, "step": 3354 }, { "epoch": 0.24686134743619223, "grad_norm": 1.1328125, "learning_rate": 4.29468889375041e-05, "loss": 1.1959, "step": 3355 }, { "epoch": 0.2469349275695562, "grad_norm": 0.86328125, "learning_rate": 4.2942856513056785e-05, "loss": 1.1662, "step": 3356 }, { "epoch": 0.2470085077029202, "grad_norm": 0.90234375, "learning_rate": 4.293882312565025e-05, "loss": 0.8959, "step": 3357 }, { "epoch": 0.24708208783628421, "grad_norm": 1.015625, "learning_rate": 4.293478877550096e-05, "loss": 0.9975, "step": 3358 }, { "epoch": 0.2471556679696482, "grad_norm": 1.0078125, "learning_rate": 4.293075346282543e-05, "loss": 1.4542, "step": 3359 }, { "epoch": 0.2472292481030122, "grad_norm": 0.875, "learning_rate": 4.2926717187840225e-05, "loss": 0.6137, "step": 3360 }, { "epoch": 0.24730282823637617, "grad_norm": 0.84765625, "learning_rate": 4.2922679950761975e-05, "loss": 0.9547, "step": 3361 }, { "epoch": 0.24737640836974017, "grad_norm": 0.87890625, "learning_rate": 4.291864175180734e-05, "loss": 0.9009, "step": 3362 }, { "epoch": 0.24744998850310415, "grad_norm": 0.83984375, "learning_rate": 4.2914602591193045e-05, "loss": 0.9671, "step": 3363 }, { "epoch": 0.24752356863646816, "grad_norm": 0.94921875, "learning_rate": 4.2910562469135864e-05, "loss": 0.9419, "step": 3364 }, { "epoch": 0.24759714876983213, "grad_norm": 0.82421875, "learning_rate": 4.290652138585262e-05, "loss": 0.8009, "step": 3365 }, { "epoch": 0.24767072890319614, "grad_norm": 0.9609375, "learning_rate": 4.2902479341560195e-05, "loss": 1.1007, "step": 3366 }, { "epoch": 0.24774430903656014, "grad_norm": 1.0625, "learning_rate": 4.2898436336475525e-05, "loss": 0.8986, "step": 3367 }, { "epoch": 0.24781788916992412, "grad_norm": 0.76953125, "learning_rate": 4.289439237081557e-05, "loss": 0.6785, "step": 3368 }, { "epoch": 0.24789146930328812, "grad_norm": 1.0390625, "learning_rate": 4.2890347444797384e-05, "loss": 1.1315, "step": 3369 }, { "epoch": 0.2479650494366521, "grad_norm": 0.79296875, "learning_rate": 4.2886301558638035e-05, "loss": 0.7375, "step": 3370 }, { "epoch": 0.2480386295700161, "grad_norm": 0.953125, "learning_rate": 4.288225471255467e-05, "loss": 0.9889, "step": 3371 }, { "epoch": 0.24811220970338008, "grad_norm": 0.83984375, "learning_rate": 4.2878206906764466e-05, "loss": 0.8403, "step": 3372 }, { "epoch": 0.24818578983674408, "grad_norm": 0.7265625, "learning_rate": 4.2874158141484665e-05, "loss": 0.6619, "step": 3373 }, { "epoch": 0.24825936997010806, "grad_norm": 0.77734375, "learning_rate": 4.287010841693255e-05, "loss": 0.9374, "step": 3374 }, { "epoch": 0.24833295010347206, "grad_norm": 0.6953125, "learning_rate": 4.286605773332548e-05, "loss": 0.7605, "step": 3375 }, { "epoch": 0.24840653023683607, "grad_norm": 0.765625, "learning_rate": 4.286200609088082e-05, "loss": 0.8482, "step": 3376 }, { "epoch": 0.24848011037020004, "grad_norm": 0.97265625, "learning_rate": 4.2857953489816046e-05, "loss": 1.5558, "step": 3377 }, { "epoch": 0.24855369050356405, "grad_norm": 0.84765625, "learning_rate": 4.285389993034863e-05, "loss": 1.1048, "step": 3378 }, { "epoch": 0.24862727063692802, "grad_norm": 0.890625, "learning_rate": 4.284984541269612e-05, "loss": 1.2074, "step": 3379 }, { "epoch": 0.24870085077029203, "grad_norm": 0.80859375, "learning_rate": 4.284578993707613e-05, "loss": 0.8701, "step": 3380 }, { "epoch": 0.248774430903656, "grad_norm": 0.7109375, "learning_rate": 4.284173350370629e-05, "loss": 0.6837, "step": 3381 }, { "epoch": 0.24884801103702, "grad_norm": 1.125, "learning_rate": 4.2837676112804323e-05, "loss": 1.0353, "step": 3382 }, { "epoch": 0.248921591170384, "grad_norm": 0.8515625, "learning_rate": 4.283361776458796e-05, "loss": 0.8353, "step": 3383 }, { "epoch": 0.248995171303748, "grad_norm": 0.9140625, "learning_rate": 4.282955845927502e-05, "loss": 1.0915, "step": 3384 }, { "epoch": 0.249068751437112, "grad_norm": 0.8984375, "learning_rate": 4.282549819708335e-05, "loss": 0.8539, "step": 3385 }, { "epoch": 0.24914233157047597, "grad_norm": 0.86328125, "learning_rate": 4.282143697823086e-05, "loss": 1.1669, "step": 3386 }, { "epoch": 0.24921591170383997, "grad_norm": 0.88671875, "learning_rate": 4.281737480293551e-05, "loss": 0.9369, "step": 3387 }, { "epoch": 0.24928949183720395, "grad_norm": 0.75390625, "learning_rate": 4.2813311671415305e-05, "loss": 0.7198, "step": 3388 }, { "epoch": 0.24936307197056795, "grad_norm": 0.89453125, "learning_rate": 4.28092475838883e-05, "loss": 0.7981, "step": 3389 }, { "epoch": 0.24943665210393193, "grad_norm": 0.91015625, "learning_rate": 4.280518254057262e-05, "loss": 1.0883, "step": 3390 }, { "epoch": 0.24951023223729593, "grad_norm": 1.1796875, "learning_rate": 4.280111654168642e-05, "loss": 1.2146, "step": 3391 }, { "epoch": 0.24958381237065994, "grad_norm": 0.85546875, "learning_rate": 4.279704958744792e-05, "loss": 0.9971, "step": 3392 }, { "epoch": 0.2496573925040239, "grad_norm": 0.98828125, "learning_rate": 4.279298167807538e-05, "loss": 1.0907, "step": 3393 }, { "epoch": 0.24973097263738792, "grad_norm": 0.80078125, "learning_rate": 4.278891281378713e-05, "loss": 0.9807, "step": 3394 }, { "epoch": 0.2498045527707519, "grad_norm": 0.984375, "learning_rate": 4.2784842994801517e-05, "loss": 1.0418, "step": 3395 }, { "epoch": 0.2498781329041159, "grad_norm": 0.76953125, "learning_rate": 4.278077222133697e-05, "loss": 0.665, "step": 3396 }, { "epoch": 0.24995171303747987, "grad_norm": 1.0234375, "learning_rate": 4.277670049361197e-05, "loss": 1.1062, "step": 3397 }, { "epoch": 0.2500252931708439, "grad_norm": 0.765625, "learning_rate": 4.277262781184502e-05, "loss": 0.8011, "step": 3398 }, { "epoch": 0.2500988733042079, "grad_norm": 0.78125, "learning_rate": 4.2768554176254705e-05, "loss": 0.8366, "step": 3399 }, { "epoch": 0.25017245343757183, "grad_norm": 1.0078125, "learning_rate": 4.2764479587059646e-05, "loss": 0.9901, "step": 3400 }, { "epoch": 0.25024603357093583, "grad_norm": 0.9375, "learning_rate": 4.276040404447853e-05, "loss": 0.9289, "step": 3401 }, { "epoch": 0.25031961370429984, "grad_norm": 0.83984375, "learning_rate": 4.275632754873007e-05, "loss": 1.0131, "step": 3402 }, { "epoch": 0.25039319383766384, "grad_norm": 0.9296875, "learning_rate": 4.275225010003304e-05, "loss": 0.8746, "step": 3403 }, { "epoch": 0.25046677397102785, "grad_norm": 0.91015625, "learning_rate": 4.274817169860628e-05, "loss": 1.025, "step": 3404 }, { "epoch": 0.2505403541043918, "grad_norm": 0.796875, "learning_rate": 4.274409234466867e-05, "loss": 0.9384, "step": 3405 }, { "epoch": 0.2506139342377558, "grad_norm": 0.8046875, "learning_rate": 4.2740012038439135e-05, "loss": 0.8604, "step": 3406 }, { "epoch": 0.2506875143711198, "grad_norm": 0.828125, "learning_rate": 4.273593078013666e-05, "loss": 0.8112, "step": 3407 }, { "epoch": 0.2507610945044838, "grad_norm": 0.90234375, "learning_rate": 4.2731848569980275e-05, "loss": 0.8045, "step": 3408 }, { "epoch": 0.25083467463784775, "grad_norm": 0.73828125, "learning_rate": 4.272776540818907e-05, "loss": 0.821, "step": 3409 }, { "epoch": 0.25090825477121176, "grad_norm": 0.7890625, "learning_rate": 4.272368129498218e-05, "loss": 0.8682, "step": 3410 }, { "epoch": 0.25098183490457576, "grad_norm": 0.70703125, "learning_rate": 4.271959623057879e-05, "loss": 0.5253, "step": 3411 }, { "epoch": 0.25105541503793977, "grad_norm": 1.0078125, "learning_rate": 4.271551021519814e-05, "loss": 1.1089, "step": 3412 }, { "epoch": 0.25112899517130377, "grad_norm": 0.87109375, "learning_rate": 4.271142324905951e-05, "loss": 0.8853, "step": 3413 }, { "epoch": 0.2512025753046677, "grad_norm": 0.8359375, "learning_rate": 4.270733533238226e-05, "loss": 0.9163, "step": 3414 }, { "epoch": 0.2512761554380317, "grad_norm": 0.8828125, "learning_rate": 4.270324646538576e-05, "loss": 0.8709, "step": 3415 }, { "epoch": 0.2513497355713957, "grad_norm": 0.76171875, "learning_rate": 4.269915664828945e-05, "loss": 1.1787, "step": 3416 }, { "epoch": 0.25142331570475973, "grad_norm": 0.8203125, "learning_rate": 4.269506588131284e-05, "loss": 0.8657, "step": 3417 }, { "epoch": 0.2514968958381237, "grad_norm": 0.78515625, "learning_rate": 4.269097416467547e-05, "loss": 0.9562, "step": 3418 }, { "epoch": 0.2515704759714877, "grad_norm": 0.69140625, "learning_rate": 4.268688149859692e-05, "loss": 0.8756, "step": 3419 }, { "epoch": 0.2516440561048517, "grad_norm": 0.875, "learning_rate": 4.268278788329685e-05, "loss": 0.7495, "step": 3420 }, { "epoch": 0.2517176362382157, "grad_norm": 0.8828125, "learning_rate": 4.267869331899495e-05, "loss": 0.7501, "step": 3421 }, { "epoch": 0.2517912163715797, "grad_norm": 0.91796875, "learning_rate": 4.267459780591097e-05, "loss": 1.1245, "step": 3422 }, { "epoch": 0.25186479650494364, "grad_norm": 1.1796875, "learning_rate": 4.267050134426471e-05, "loss": 1.3597, "step": 3423 }, { "epoch": 0.25193837663830765, "grad_norm": 0.92578125, "learning_rate": 4.266640393427602e-05, "loss": 1.1856, "step": 3424 }, { "epoch": 0.25201195677167165, "grad_norm": 1.0546875, "learning_rate": 4.2662305576164794e-05, "loss": 1.0716, "step": 3425 }, { "epoch": 0.25208553690503566, "grad_norm": 0.80859375, "learning_rate": 4.265820627015099e-05, "loss": 1.0572, "step": 3426 }, { "epoch": 0.2521591170383996, "grad_norm": 0.7734375, "learning_rate": 4.265410601645461e-05, "loss": 0.7815, "step": 3427 }, { "epoch": 0.2522326971717636, "grad_norm": 0.953125, "learning_rate": 4.26500048152957e-05, "loss": 1.116, "step": 3428 }, { "epoch": 0.2523062773051276, "grad_norm": 1.046875, "learning_rate": 4.2645902666894366e-05, "loss": 1.1073, "step": 3429 }, { "epoch": 0.2523798574384916, "grad_norm": 0.88671875, "learning_rate": 4.264179957147077e-05, "loss": 1.2016, "step": 3430 }, { "epoch": 0.2524534375718556, "grad_norm": 0.96875, "learning_rate": 4.2637695529245104e-05, "loss": 0.8755, "step": 3431 }, { "epoch": 0.25252701770521957, "grad_norm": 0.70703125, "learning_rate": 4.263359054043763e-05, "loss": 0.6902, "step": 3432 }, { "epoch": 0.2526005978385836, "grad_norm": 0.828125, "learning_rate": 4.262948460526867e-05, "loss": 0.8044, "step": 3433 }, { "epoch": 0.2526741779719476, "grad_norm": 0.80859375, "learning_rate": 4.262537772395856e-05, "loss": 0.8182, "step": 3434 }, { "epoch": 0.2527477581053116, "grad_norm": 0.80859375, "learning_rate": 4.262126989672772e-05, "loss": 0.6779, "step": 3435 }, { "epoch": 0.25282133823867553, "grad_norm": 1.2578125, "learning_rate": 4.26171611237966e-05, "loss": 0.9907, "step": 3436 }, { "epoch": 0.25289491837203953, "grad_norm": 1.078125, "learning_rate": 4.261305140538572e-05, "loss": 1.2386, "step": 3437 }, { "epoch": 0.25296849850540354, "grad_norm": 0.9921875, "learning_rate": 4.260894074171564e-05, "loss": 1.3068, "step": 3438 }, { "epoch": 0.25304207863876754, "grad_norm": 0.87890625, "learning_rate": 4.260482913300697e-05, "loss": 0.7777, "step": 3439 }, { "epoch": 0.25311565877213155, "grad_norm": 0.81640625, "learning_rate": 4.260071657948036e-05, "loss": 1.0844, "step": 3440 }, { "epoch": 0.2531892389054955, "grad_norm": 0.796875, "learning_rate": 4.259660308135655e-05, "loss": 0.8623, "step": 3441 }, { "epoch": 0.2532628190388595, "grad_norm": 0.8828125, "learning_rate": 4.2592488638856274e-05, "loss": 1.1151, "step": 3442 }, { "epoch": 0.2533363991722235, "grad_norm": 0.75390625, "learning_rate": 4.2588373252200374e-05, "loss": 0.8084, "step": 3443 }, { "epoch": 0.2534099793055875, "grad_norm": 0.76953125, "learning_rate": 4.258425692160969e-05, "loss": 0.9513, "step": 3444 }, { "epoch": 0.25348355943895146, "grad_norm": 0.85546875, "learning_rate": 4.258013964730515e-05, "loss": 0.8542, "step": 3445 }, { "epoch": 0.25355713957231546, "grad_norm": 0.84375, "learning_rate": 4.257602142950773e-05, "loss": 1.0133, "step": 3446 }, { "epoch": 0.25363071970567946, "grad_norm": 0.7734375, "learning_rate": 4.2571902268438426e-05, "loss": 0.8895, "step": 3447 }, { "epoch": 0.25370429983904347, "grad_norm": 0.80078125, "learning_rate": 4.256778216431832e-05, "loss": 1.0018, "step": 3448 }, { "epoch": 0.25377787997240747, "grad_norm": 0.76171875, "learning_rate": 4.2563661117368524e-05, "loss": 0.938, "step": 3449 }, { "epoch": 0.2538514601057714, "grad_norm": 1.1875, "learning_rate": 4.255953912781021e-05, "loss": 1.0347, "step": 3450 }, { "epoch": 0.2539250402391354, "grad_norm": 0.7890625, "learning_rate": 4.255541619586459e-05, "loss": 0.8427, "step": 3451 }, { "epoch": 0.25399862037249943, "grad_norm": 0.953125, "learning_rate": 4.255129232175295e-05, "loss": 1.2341, "step": 3452 }, { "epoch": 0.25407220050586343, "grad_norm": 0.8203125, "learning_rate": 4.25471675056966e-05, "loss": 0.7688, "step": 3453 }, { "epoch": 0.2541457806392274, "grad_norm": 0.9921875, "learning_rate": 4.254304174791691e-05, "loss": 1.219, "step": 3454 }, { "epoch": 0.2542193607725914, "grad_norm": 0.7109375, "learning_rate": 4.25389150486353e-05, "loss": 0.6127, "step": 3455 }, { "epoch": 0.2542929409059554, "grad_norm": 0.91796875, "learning_rate": 4.253478740807325e-05, "loss": 1.0402, "step": 3456 }, { "epoch": 0.2543665210393194, "grad_norm": 0.9609375, "learning_rate": 4.253065882645228e-05, "loss": 0.9826, "step": 3457 }, { "epoch": 0.2544401011726834, "grad_norm": 0.84765625, "learning_rate": 4.252652930399395e-05, "loss": 0.9286, "step": 3458 }, { "epoch": 0.25451368130604735, "grad_norm": 0.7421875, "learning_rate": 4.252239884091991e-05, "loss": 0.6667, "step": 3459 }, { "epoch": 0.25458726143941135, "grad_norm": 0.68359375, "learning_rate": 4.251826743745181e-05, "loss": 0.6704, "step": 3460 }, { "epoch": 0.25466084157277535, "grad_norm": 1.03125, "learning_rate": 4.251413509381138e-05, "loss": 1.0261, "step": 3461 }, { "epoch": 0.25473442170613936, "grad_norm": 0.7109375, "learning_rate": 4.2510001810220415e-05, "loss": 0.7498, "step": 3462 }, { "epoch": 0.2548080018395033, "grad_norm": 0.8515625, "learning_rate": 4.2505867586900705e-05, "loss": 0.8054, "step": 3463 }, { "epoch": 0.2548815819728673, "grad_norm": 0.5234375, "learning_rate": 4.250173242407416e-05, "loss": 0.4933, "step": 3464 }, { "epoch": 0.2549551621062313, "grad_norm": 0.97265625, "learning_rate": 4.2497596321962685e-05, "loss": 1.0394, "step": 3465 }, { "epoch": 0.2550287422395953, "grad_norm": 0.99609375, "learning_rate": 4.249345928078827e-05, "loss": 0.8976, "step": 3466 }, { "epoch": 0.2551023223729593, "grad_norm": 0.93359375, "learning_rate": 4.2489321300772927e-05, "loss": 1.02, "step": 3467 }, { "epoch": 0.25517590250632327, "grad_norm": 0.8984375, "learning_rate": 4.248518238213875e-05, "loss": 1.0248, "step": 3468 }, { "epoch": 0.2552494826396873, "grad_norm": 1.09375, "learning_rate": 4.2481042525107854e-05, "loss": 1.3725, "step": 3469 }, { "epoch": 0.2553230627730513, "grad_norm": 0.97265625, "learning_rate": 4.247690172990242e-05, "loss": 1.2737, "step": 3470 }, { "epoch": 0.2553966429064153, "grad_norm": 0.9140625, "learning_rate": 4.247275999674469e-05, "loss": 0.8972, "step": 3471 }, { "epoch": 0.2554702230397793, "grad_norm": 0.890625, "learning_rate": 4.2468617325856924e-05, "loss": 0.8607, "step": 3472 }, { "epoch": 0.25554380317314324, "grad_norm": 0.77734375, "learning_rate": 4.246447371746147e-05, "loss": 0.7903, "step": 3473 }, { "epoch": 0.25561738330650724, "grad_norm": 0.765625, "learning_rate": 4.2460329171780684e-05, "loss": 0.7285, "step": 3474 }, { "epoch": 0.25569096343987124, "grad_norm": 1.0703125, "learning_rate": 4.245618368903702e-05, "loss": 1.1474, "step": 3475 }, { "epoch": 0.25576454357323525, "grad_norm": 0.81640625, "learning_rate": 4.245203726945294e-05, "loss": 1.3079, "step": 3476 }, { "epoch": 0.2558381237065992, "grad_norm": 1.0, "learning_rate": 4.244788991325098e-05, "loss": 1.0751, "step": 3477 }, { "epoch": 0.2559117038399632, "grad_norm": 0.92578125, "learning_rate": 4.244374162065373e-05, "loss": 1.1544, "step": 3478 }, { "epoch": 0.2559852839733272, "grad_norm": 0.82421875, "learning_rate": 4.243959239188381e-05, "loss": 1.2168, "step": 3479 }, { "epoch": 0.2560588641066912, "grad_norm": 0.703125, "learning_rate": 4.243544222716391e-05, "loss": 0.9321, "step": 3480 }, { "epoch": 0.2561324442400552, "grad_norm": 0.9296875, "learning_rate": 4.243129112671674e-05, "loss": 0.6829, "step": 3481 }, { "epoch": 0.25620602437341916, "grad_norm": 0.83984375, "learning_rate": 4.242713909076512e-05, "loss": 0.7281, "step": 3482 }, { "epoch": 0.25627960450678317, "grad_norm": 1.0078125, "learning_rate": 4.242298611953185e-05, "loss": 1.1156, "step": 3483 }, { "epoch": 0.25635318464014717, "grad_norm": 0.95703125, "learning_rate": 4.241883221323982e-05, "loss": 0.9548, "step": 3484 }, { "epoch": 0.2564267647735112, "grad_norm": 0.90234375, "learning_rate": 4.241467737211197e-05, "loss": 1.0641, "step": 3485 }, { "epoch": 0.2565003449068751, "grad_norm": 0.70703125, "learning_rate": 4.241052159637128e-05, "loss": 0.8736, "step": 3486 }, { "epoch": 0.2565739250402391, "grad_norm": 0.796875, "learning_rate": 4.240636488624077e-05, "loss": 0.9449, "step": 3487 }, { "epoch": 0.25664750517360313, "grad_norm": 0.80078125, "learning_rate": 4.2402207241943534e-05, "loss": 0.5731, "step": 3488 }, { "epoch": 0.25672108530696713, "grad_norm": 0.86328125, "learning_rate": 4.239804866370271e-05, "loss": 0.7721, "step": 3489 }, { "epoch": 0.25679466544033114, "grad_norm": 0.90234375, "learning_rate": 4.239388915174147e-05, "loss": 1.068, "step": 3490 }, { "epoch": 0.2568682455736951, "grad_norm": 0.8515625, "learning_rate": 4.2389728706283056e-05, "loss": 0.6954, "step": 3491 }, { "epoch": 0.2569418257070591, "grad_norm": 0.71484375, "learning_rate": 4.2385567327550743e-05, "loss": 0.7014, "step": 3492 }, { "epoch": 0.2570154058404231, "grad_norm": 0.7734375, "learning_rate": 4.238140501576786e-05, "loss": 0.9295, "step": 3493 }, { "epoch": 0.2570889859737871, "grad_norm": 0.77734375, "learning_rate": 4.237724177115781e-05, "loss": 0.8264, "step": 3494 }, { "epoch": 0.25716256610715105, "grad_norm": 0.8203125, "learning_rate": 4.237307759394401e-05, "loss": 1.0209, "step": 3495 }, { "epoch": 0.25723614624051505, "grad_norm": 1.0390625, "learning_rate": 4.236891248434995e-05, "loss": 1.5645, "step": 3496 }, { "epoch": 0.25730972637387906, "grad_norm": 0.8046875, "learning_rate": 4.236474644259917e-05, "loss": 1.1092, "step": 3497 }, { "epoch": 0.25738330650724306, "grad_norm": 0.63671875, "learning_rate": 4.236057946891524e-05, "loss": 0.7523, "step": 3498 }, { "epoch": 0.25745688664060706, "grad_norm": 0.6875, "learning_rate": 4.2356411563521794e-05, "loss": 0.7251, "step": 3499 }, { "epoch": 0.257530466773971, "grad_norm": 0.8671875, "learning_rate": 4.235224272664253e-05, "loss": 0.7612, "step": 3500 }, { "epoch": 0.257604046907335, "grad_norm": 0.7578125, "learning_rate": 4.234807295850117e-05, "loss": 0.7898, "step": 3501 }, { "epoch": 0.257677627040699, "grad_norm": 1.1796875, "learning_rate": 4.234390225932149e-05, "loss": 1.2333, "step": 3502 }, { "epoch": 0.257751207174063, "grad_norm": 0.78515625, "learning_rate": 4.2339730629327346e-05, "loss": 0.9314, "step": 3503 }, { "epoch": 0.257824787307427, "grad_norm": 0.98828125, "learning_rate": 4.233555806874261e-05, "loss": 1.0388, "step": 3504 }, { "epoch": 0.257898367440791, "grad_norm": 0.9375, "learning_rate": 4.233138457779121e-05, "loss": 0.9291, "step": 3505 }, { "epoch": 0.257971947574155, "grad_norm": 0.890625, "learning_rate": 4.232721015669713e-05, "loss": 0.8482, "step": 3506 }, { "epoch": 0.258045527707519, "grad_norm": 1.171875, "learning_rate": 4.232303480568441e-05, "loss": 0.7691, "step": 3507 }, { "epoch": 0.258119107840883, "grad_norm": 0.74609375, "learning_rate": 4.2318858524977134e-05, "loss": 0.6183, "step": 3508 }, { "epoch": 0.25819268797424694, "grad_norm": 0.890625, "learning_rate": 4.231468131479943e-05, "loss": 0.8489, "step": 3509 }, { "epoch": 0.25826626810761094, "grad_norm": 1.3203125, "learning_rate": 4.2310503175375484e-05, "loss": 1.2199, "step": 3510 }, { "epoch": 0.25833984824097495, "grad_norm": 0.69140625, "learning_rate": 4.230632410692952e-05, "loss": 0.6541, "step": 3511 }, { "epoch": 0.25841342837433895, "grad_norm": 2.375, "learning_rate": 4.230214410968584e-05, "loss": 1.0912, "step": 3512 }, { "epoch": 0.2584870085077029, "grad_norm": 0.7109375, "learning_rate": 4.229796318386875e-05, "loss": 0.8423, "step": 3513 }, { "epoch": 0.2585605886410669, "grad_norm": 0.9375, "learning_rate": 4.229378132970266e-05, "loss": 1.0736, "step": 3514 }, { "epoch": 0.2586341687744309, "grad_norm": 0.8125, "learning_rate": 4.228959854741198e-05, "loss": 0.8272, "step": 3515 }, { "epoch": 0.2587077489077949, "grad_norm": 0.78515625, "learning_rate": 4.228541483722121e-05, "loss": 0.9831, "step": 3516 }, { "epoch": 0.2587813290411589, "grad_norm": 0.91015625, "learning_rate": 4.228123019935487e-05, "loss": 1.0159, "step": 3517 }, { "epoch": 0.25885490917452286, "grad_norm": 0.79296875, "learning_rate": 4.227704463403754e-05, "loss": 1.1013, "step": 3518 }, { "epoch": 0.25892848930788687, "grad_norm": 0.8203125, "learning_rate": 4.2272858141493864e-05, "loss": 0.8541, "step": 3519 }, { "epoch": 0.25900206944125087, "grad_norm": 0.73046875, "learning_rate": 4.226867072194851e-05, "loss": 0.6593, "step": 3520 }, { "epoch": 0.2590756495746149, "grad_norm": 0.94140625, "learning_rate": 4.226448237562621e-05, "loss": 0.8965, "step": 3521 }, { "epoch": 0.2591492297079788, "grad_norm": 0.74609375, "learning_rate": 4.226029310275176e-05, "loss": 0.6424, "step": 3522 }, { "epoch": 0.2592228098413428, "grad_norm": 1.125, "learning_rate": 4.2256102903549976e-05, "loss": 1.1124, "step": 3523 }, { "epoch": 0.25929638997470683, "grad_norm": 0.9140625, "learning_rate": 4.225191177824574e-05, "loss": 1.2578, "step": 3524 }, { "epoch": 0.25936997010807084, "grad_norm": 0.89453125, "learning_rate": 4.224771972706398e-05, "loss": 0.7997, "step": 3525 }, { "epoch": 0.25944355024143484, "grad_norm": 0.7265625, "learning_rate": 4.224352675022968e-05, "loss": 0.7046, "step": 3526 }, { "epoch": 0.2595171303747988, "grad_norm": 0.88671875, "learning_rate": 4.223933284796787e-05, "loss": 0.974, "step": 3527 }, { "epoch": 0.2595907105081628, "grad_norm": 1.0859375, "learning_rate": 4.223513802050363e-05, "loss": 0.9775, "step": 3528 }, { "epoch": 0.2596642906415268, "grad_norm": 0.84765625, "learning_rate": 4.223094226806207e-05, "loss": 1.0559, "step": 3529 }, { "epoch": 0.2597378707748908, "grad_norm": 0.83203125, "learning_rate": 4.22267455908684e-05, "loss": 0.9653, "step": 3530 }, { "epoch": 0.25981145090825475, "grad_norm": 0.859375, "learning_rate": 4.222254798914782e-05, "loss": 1.349, "step": 3531 }, { "epoch": 0.25988503104161875, "grad_norm": 0.87890625, "learning_rate": 4.221834946312563e-05, "loss": 0.8148, "step": 3532 }, { "epoch": 0.25995861117498276, "grad_norm": 0.75390625, "learning_rate": 4.221415001302713e-05, "loss": 0.6854, "step": 3533 }, { "epoch": 0.26003219130834676, "grad_norm": 0.70703125, "learning_rate": 4.220994963907772e-05, "loss": 0.6915, "step": 3534 }, { "epoch": 0.26010577144171076, "grad_norm": 0.9765625, "learning_rate": 4.220574834150281e-05, "loss": 1.1557, "step": 3535 }, { "epoch": 0.2601793515750747, "grad_norm": 0.89453125, "learning_rate": 4.22015461205279e-05, "loss": 0.8733, "step": 3536 }, { "epoch": 0.2602529317084387, "grad_norm": 0.96484375, "learning_rate": 4.219734297637849e-05, "loss": 1.2008, "step": 3537 }, { "epoch": 0.2603265118418027, "grad_norm": 0.9296875, "learning_rate": 4.2193138909280163e-05, "loss": 1.0873, "step": 3538 }, { "epoch": 0.2604000919751667, "grad_norm": 1.921875, "learning_rate": 4.218893391945854e-05, "loss": 0.6881, "step": 3539 }, { "epoch": 0.2604736721085307, "grad_norm": 0.7265625, "learning_rate": 4.218472800713931e-05, "loss": 0.7883, "step": 3540 }, { "epoch": 0.2605472522418947, "grad_norm": 0.8046875, "learning_rate": 4.218052117254817e-05, "loss": 0.8724, "step": 3541 }, { "epoch": 0.2606208323752587, "grad_norm": 0.9140625, "learning_rate": 4.217631341591092e-05, "loss": 0.7856, "step": 3542 }, { "epoch": 0.2606944125086227, "grad_norm": 0.80078125, "learning_rate": 4.2172104737453365e-05, "loss": 0.8145, "step": 3543 }, { "epoch": 0.2607679926419867, "grad_norm": 0.84765625, "learning_rate": 4.216789513740139e-05, "loss": 1.2071, "step": 3544 }, { "epoch": 0.26084157277535064, "grad_norm": 1.078125, "learning_rate": 4.21636846159809e-05, "loss": 1.4071, "step": 3545 }, { "epoch": 0.26091515290871464, "grad_norm": 0.7578125, "learning_rate": 4.2159473173417875e-05, "loss": 0.6581, "step": 3546 }, { "epoch": 0.26098873304207865, "grad_norm": 0.71875, "learning_rate": 4.2155260809938335e-05, "loss": 0.8168, "step": 3547 }, { "epoch": 0.26106231317544265, "grad_norm": 1.234375, "learning_rate": 4.215104752576835e-05, "loss": 0.8958, "step": 3548 }, { "epoch": 0.2611358933088066, "grad_norm": 0.79296875, "learning_rate": 4.214683332113404e-05, "loss": 0.8052, "step": 3549 }, { "epoch": 0.2612094734421706, "grad_norm": 0.90234375, "learning_rate": 4.214261819626156e-05, "loss": 0.6614, "step": 3550 }, { "epoch": 0.2612830535755346, "grad_norm": 0.890625, "learning_rate": 4.2138402151377156e-05, "loss": 0.9995, "step": 3551 }, { "epoch": 0.2613566337088986, "grad_norm": 0.7265625, "learning_rate": 4.213418518670707e-05, "loss": 0.6394, "step": 3552 }, { "epoch": 0.2614302138422626, "grad_norm": 0.78515625, "learning_rate": 4.212996730247763e-05, "loss": 0.6211, "step": 3553 }, { "epoch": 0.26150379397562656, "grad_norm": 1.1015625, "learning_rate": 4.212574849891519e-05, "loss": 0.799, "step": 3554 }, { "epoch": 0.26157737410899057, "grad_norm": 0.734375, "learning_rate": 4.212152877624619e-05, "loss": 0.8535, "step": 3555 }, { "epoch": 0.26165095424235457, "grad_norm": 0.91796875, "learning_rate": 4.211730813469706e-05, "loss": 1.134, "step": 3556 }, { "epoch": 0.2617245343757186, "grad_norm": 0.8828125, "learning_rate": 4.211308657449434e-05, "loss": 1.0679, "step": 3557 }, { "epoch": 0.2617981145090825, "grad_norm": 0.65234375, "learning_rate": 4.2108864095864595e-05, "loss": 0.6499, "step": 3558 }, { "epoch": 0.26187169464244653, "grad_norm": 0.69921875, "learning_rate": 4.210464069903442e-05, "loss": 0.9486, "step": 3559 }, { "epoch": 0.26194527477581053, "grad_norm": 0.72265625, "learning_rate": 4.210041638423049e-05, "loss": 0.9701, "step": 3560 }, { "epoch": 0.26201885490917454, "grad_norm": 0.9140625, "learning_rate": 4.20961911516795e-05, "loss": 1.1911, "step": 3561 }, { "epoch": 0.26209243504253854, "grad_norm": 0.84765625, "learning_rate": 4.209196500160823e-05, "loss": 0.8494, "step": 3562 }, { "epoch": 0.2621660151759025, "grad_norm": 0.9921875, "learning_rate": 4.2087737934243475e-05, "loss": 0.8744, "step": 3563 }, { "epoch": 0.2622395953092665, "grad_norm": 0.87890625, "learning_rate": 4.2083509949812104e-05, "loss": 0.8249, "step": 3564 }, { "epoch": 0.2623131754426305, "grad_norm": 0.828125, "learning_rate": 4.2079281048541016e-05, "loss": 0.9578, "step": 3565 }, { "epoch": 0.2623867555759945, "grad_norm": 0.8671875, "learning_rate": 4.207505123065717e-05, "loss": 1.1456, "step": 3566 }, { "epoch": 0.26246033570935845, "grad_norm": 0.953125, "learning_rate": 4.2070820496387584e-05, "loss": 0.8844, "step": 3567 }, { "epoch": 0.26253391584272245, "grad_norm": 0.8359375, "learning_rate": 4.20665888459593e-05, "loss": 1.0938, "step": 3568 }, { "epoch": 0.26260749597608646, "grad_norm": 0.75390625, "learning_rate": 4.206235627959942e-05, "loss": 0.656, "step": 3569 }, { "epoch": 0.26268107610945046, "grad_norm": 0.72265625, "learning_rate": 4.2058122797535117e-05, "loss": 0.7738, "step": 3570 }, { "epoch": 0.26275465624281447, "grad_norm": 0.79296875, "learning_rate": 4.205388839999358e-05, "loss": 1.1333, "step": 3571 }, { "epoch": 0.2628282363761784, "grad_norm": 1.0703125, "learning_rate": 4.2049653087202045e-05, "loss": 1.2255, "step": 3572 }, { "epoch": 0.2629018165095424, "grad_norm": 0.98828125, "learning_rate": 4.2045416859387844e-05, "loss": 1.0472, "step": 3573 }, { "epoch": 0.2629753966429064, "grad_norm": 0.91015625, "learning_rate": 4.2041179716778324e-05, "loss": 1.0815, "step": 3574 }, { "epoch": 0.2630489767762704, "grad_norm": 0.80078125, "learning_rate": 4.2036941659600856e-05, "loss": 0.9453, "step": 3575 }, { "epoch": 0.2631225569096344, "grad_norm": 0.7421875, "learning_rate": 4.203270268808292e-05, "loss": 0.9314, "step": 3576 }, { "epoch": 0.2631961370429984, "grad_norm": 0.87890625, "learning_rate": 4.202846280245199e-05, "loss": 0.8123, "step": 3577 }, { "epoch": 0.2632697171763624, "grad_norm": 0.96875, "learning_rate": 4.2024222002935644e-05, "loss": 1.0372, "step": 3578 }, { "epoch": 0.2633432973097264, "grad_norm": 1.203125, "learning_rate": 4.201998028976144e-05, "loss": 1.0443, "step": 3579 }, { "epoch": 0.2634168774430904, "grad_norm": 0.7109375, "learning_rate": 4.201573766315704e-05, "loss": 0.843, "step": 3580 }, { "epoch": 0.26349045757645434, "grad_norm": 0.80859375, "learning_rate": 4.201149412335015e-05, "loss": 1.1496, "step": 3581 }, { "epoch": 0.26356403770981834, "grad_norm": 0.73828125, "learning_rate": 4.200724967056848e-05, "loss": 0.6851, "step": 3582 }, { "epoch": 0.26363761784318235, "grad_norm": 0.9453125, "learning_rate": 4.200300430503986e-05, "loss": 1.3379, "step": 3583 }, { "epoch": 0.26371119797654635, "grad_norm": 1.1171875, "learning_rate": 4.199875802699211e-05, "loss": 1.044, "step": 3584 }, { "epoch": 0.2637847781099103, "grad_norm": 0.92578125, "learning_rate": 4.199451083665312e-05, "loss": 1.3473, "step": 3585 }, { "epoch": 0.2638583582432743, "grad_norm": 1.0078125, "learning_rate": 4.199026273425083e-05, "loss": 1.007, "step": 3586 }, { "epoch": 0.2639319383766383, "grad_norm": 0.84765625, "learning_rate": 4.198601372001323e-05, "loss": 0.936, "step": 3587 }, { "epoch": 0.2640055185100023, "grad_norm": 0.9609375, "learning_rate": 4.198176379416836e-05, "loss": 0.9573, "step": 3588 }, { "epoch": 0.2640790986433663, "grad_norm": 0.6484375, "learning_rate": 4.19775129569443e-05, "loss": 0.6779, "step": 3589 }, { "epoch": 0.26415267877673027, "grad_norm": 0.9140625, "learning_rate": 4.197326120856918e-05, "loss": 0.9555, "step": 3590 }, { "epoch": 0.26422625891009427, "grad_norm": 0.65625, "learning_rate": 4.196900854927119e-05, "loss": 0.6246, "step": 3591 }, { "epoch": 0.2642998390434583, "grad_norm": 0.71875, "learning_rate": 4.1964754979278564e-05, "loss": 0.7344, "step": 3592 }, { "epoch": 0.2643734191768223, "grad_norm": 0.98046875, "learning_rate": 4.1960500498819586e-05, "loss": 0.6947, "step": 3593 }, { "epoch": 0.2644469993101862, "grad_norm": 0.84375, "learning_rate": 4.195624510812257e-05, "loss": 0.9936, "step": 3594 }, { "epoch": 0.26452057944355023, "grad_norm": 0.73828125, "learning_rate": 4.195198880741591e-05, "loss": 0.8097, "step": 3595 }, { "epoch": 0.26459415957691423, "grad_norm": 0.71875, "learning_rate": 4.194773159692803e-05, "loss": 0.9053, "step": 3596 }, { "epoch": 0.26466773971027824, "grad_norm": 0.87890625, "learning_rate": 4.19434734768874e-05, "loss": 1.2433, "step": 3597 }, { "epoch": 0.26474131984364224, "grad_norm": 0.953125, "learning_rate": 4.193921444752255e-05, "loss": 1.0103, "step": 3598 }, { "epoch": 0.2648148999770062, "grad_norm": 0.75390625, "learning_rate": 4.1934954509062056e-05, "loss": 0.9681, "step": 3599 }, { "epoch": 0.2648884801103702, "grad_norm": 0.8203125, "learning_rate": 4.1930693661734545e-05, "loss": 0.8749, "step": 3600 }, { "epoch": 0.2649620602437342, "grad_norm": 0.87890625, "learning_rate": 4.1926431905768675e-05, "loss": 1.3861, "step": 3601 }, { "epoch": 0.2650356403770982, "grad_norm": 0.796875, "learning_rate": 4.1922169241393174e-05, "loss": 1.0495, "step": 3602 }, { "epoch": 0.26510922051046215, "grad_norm": 0.73828125, "learning_rate": 4.191790566883681e-05, "loss": 0.8725, "step": 3603 }, { "epoch": 0.26518280064382616, "grad_norm": 1.09375, "learning_rate": 4.1913641188328415e-05, "loss": 1.3962, "step": 3604 }, { "epoch": 0.26525638077719016, "grad_norm": 0.98046875, "learning_rate": 4.1909375800096826e-05, "loss": 0.8629, "step": 3605 }, { "epoch": 0.26532996091055416, "grad_norm": 0.87109375, "learning_rate": 4.190510950437099e-05, "loss": 0.7154, "step": 3606 }, { "epoch": 0.26540354104391817, "grad_norm": 0.74609375, "learning_rate": 4.190084230137984e-05, "loss": 0.84, "step": 3607 }, { "epoch": 0.2654771211772821, "grad_norm": 1.0390625, "learning_rate": 4.1896574191352424e-05, "loss": 1.0756, "step": 3608 }, { "epoch": 0.2655507013106461, "grad_norm": 0.86328125, "learning_rate": 4.189230517451777e-05, "loss": 0.8495, "step": 3609 }, { "epoch": 0.2656242814440101, "grad_norm": 0.91015625, "learning_rate": 4.188803525110501e-05, "loss": 1.0097, "step": 3610 }, { "epoch": 0.26569786157737413, "grad_norm": 0.88671875, "learning_rate": 4.188376442134329e-05, "loss": 0.9431, "step": 3611 }, { "epoch": 0.2657714417107381, "grad_norm": 0.64453125, "learning_rate": 4.187949268546182e-05, "loss": 0.7255, "step": 3612 }, { "epoch": 0.2658450218441021, "grad_norm": 0.69921875, "learning_rate": 4.187522004368986e-05, "loss": 0.6103, "step": 3613 }, { "epoch": 0.2659186019774661, "grad_norm": 0.9765625, "learning_rate": 4.1870946496256714e-05, "loss": 0.8542, "step": 3614 }, { "epoch": 0.2659921821108301, "grad_norm": 0.9765625, "learning_rate": 4.186667204339174e-05, "loss": 1.4835, "step": 3615 }, { "epoch": 0.2660657622441941, "grad_norm": 0.99609375, "learning_rate": 4.1862396685324325e-05, "loss": 0.961, "step": 3616 }, { "epoch": 0.26613934237755804, "grad_norm": 0.828125, "learning_rate": 4.185812042228393e-05, "loss": 0.8702, "step": 3617 }, { "epoch": 0.26621292251092205, "grad_norm": 1.1484375, "learning_rate": 4.185384325450006e-05, "loss": 1.7418, "step": 3618 }, { "epoch": 0.26628650264428605, "grad_norm": 0.84765625, "learning_rate": 4.1849565182202246e-05, "loss": 0.9595, "step": 3619 }, { "epoch": 0.26636008277765005, "grad_norm": 0.8125, "learning_rate": 4.18452862056201e-05, "loss": 0.7958, "step": 3620 }, { "epoch": 0.266433662911014, "grad_norm": 0.8984375, "learning_rate": 4.1841006324983256e-05, "loss": 0.9107, "step": 3621 }, { "epoch": 0.266507243044378, "grad_norm": 0.91796875, "learning_rate": 4.183672554052142e-05, "loss": 0.8926, "step": 3622 }, { "epoch": 0.266580823177742, "grad_norm": 1.109375, "learning_rate": 4.1832443852464306e-05, "loss": 1.1984, "step": 3623 }, { "epoch": 0.266654403311106, "grad_norm": 0.87109375, "learning_rate": 4.182816126104173e-05, "loss": 1.0964, "step": 3624 }, { "epoch": 0.26672798344447, "grad_norm": 1.0390625, "learning_rate": 4.1823877766483524e-05, "loss": 1.3117, "step": 3625 }, { "epoch": 0.26680156357783397, "grad_norm": 1.0078125, "learning_rate": 4.181959336901958e-05, "loss": 1.7506, "step": 3626 }, { "epoch": 0.26687514371119797, "grad_norm": 0.80078125, "learning_rate": 4.181530806887982e-05, "loss": 0.7347, "step": 3627 }, { "epoch": 0.266948723844562, "grad_norm": 1.125, "learning_rate": 4.181102186629424e-05, "loss": 0.8751, "step": 3628 }, { "epoch": 0.267022303977926, "grad_norm": 1.0, "learning_rate": 4.1806734761492866e-05, "loss": 1.3935, "step": 3629 }, { "epoch": 0.2670958841112899, "grad_norm": 1.0390625, "learning_rate": 4.1802446754705784e-05, "loss": 1.4557, "step": 3630 }, { "epoch": 0.26716946424465393, "grad_norm": 0.83203125, "learning_rate": 4.179815784616311e-05, "loss": 0.8388, "step": 3631 }, { "epoch": 0.26724304437801794, "grad_norm": 0.76953125, "learning_rate": 4.179386803609504e-05, "loss": 0.806, "step": 3632 }, { "epoch": 0.26731662451138194, "grad_norm": 0.82421875, "learning_rate": 4.1789577324731786e-05, "loss": 0.8701, "step": 3633 }, { "epoch": 0.26739020464474594, "grad_norm": 1.1640625, "learning_rate": 4.1785285712303636e-05, "loss": 1.1736, "step": 3634 }, { "epoch": 0.2674637847781099, "grad_norm": 0.84375, "learning_rate": 4.178099319904091e-05, "loss": 0.6696, "step": 3635 }, { "epoch": 0.2675373649114739, "grad_norm": 1.0625, "learning_rate": 4.1776699785173955e-05, "loss": 1.1624, "step": 3636 }, { "epoch": 0.2676109450448379, "grad_norm": 0.87890625, "learning_rate": 4.177240547093322e-05, "loss": 1.2793, "step": 3637 }, { "epoch": 0.2676845251782019, "grad_norm": 1.0, "learning_rate": 4.176811025654917e-05, "loss": 1.1772, "step": 3638 }, { "epoch": 0.26775810531156585, "grad_norm": 1.0859375, "learning_rate": 4.176381414225229e-05, "loss": 0.9055, "step": 3639 }, { "epoch": 0.26783168544492986, "grad_norm": 0.90234375, "learning_rate": 4.1759517128273184e-05, "loss": 0.8579, "step": 3640 }, { "epoch": 0.26790526557829386, "grad_norm": 0.77734375, "learning_rate": 4.175521921484245e-05, "loss": 0.6146, "step": 3641 }, { "epoch": 0.26797884571165786, "grad_norm": 0.6953125, "learning_rate": 4.175092040219074e-05, "loss": 0.6374, "step": 3642 }, { "epoch": 0.26805242584502187, "grad_norm": 1.0234375, "learning_rate": 4.174662069054877e-05, "loss": 1.7015, "step": 3643 }, { "epoch": 0.2681260059783858, "grad_norm": 0.7578125, "learning_rate": 4.1742320080147304e-05, "loss": 0.7913, "step": 3644 }, { "epoch": 0.2681995861117498, "grad_norm": 0.96875, "learning_rate": 4.173801857121713e-05, "loss": 0.8472, "step": 3645 }, { "epoch": 0.2682731662451138, "grad_norm": 0.75390625, "learning_rate": 4.173371616398912e-05, "loss": 0.9246, "step": 3646 }, { "epoch": 0.26834674637847783, "grad_norm": 0.984375, "learning_rate": 4.172941285869417e-05, "loss": 1.0593, "step": 3647 }, { "epoch": 0.2684203265118418, "grad_norm": 0.7265625, "learning_rate": 4.172510865556323e-05, "loss": 0.7386, "step": 3648 }, { "epoch": 0.2684939066452058, "grad_norm": 0.87890625, "learning_rate": 4.172080355482729e-05, "loss": 1.0615, "step": 3649 }, { "epoch": 0.2685674867785698, "grad_norm": 0.7265625, "learning_rate": 4.171649755671741e-05, "loss": 0.6225, "step": 3650 }, { "epoch": 0.2686410669119338, "grad_norm": 0.81640625, "learning_rate": 4.171219066146468e-05, "loss": 0.8728, "step": 3651 }, { "epoch": 0.2687146470452978, "grad_norm": 0.90234375, "learning_rate": 4.170788286930024e-05, "loss": 0.8478, "step": 3652 }, { "epoch": 0.26878822717866174, "grad_norm": 0.921875, "learning_rate": 4.170357418045529e-05, "loss": 0.8094, "step": 3653 }, { "epoch": 0.26886180731202575, "grad_norm": 0.86328125, "learning_rate": 4.169926459516105e-05, "loss": 0.8884, "step": 3654 }, { "epoch": 0.26893538744538975, "grad_norm": 0.80859375, "learning_rate": 4.1694954113648823e-05, "loss": 0.6956, "step": 3655 }, { "epoch": 0.26900896757875375, "grad_norm": 0.66015625, "learning_rate": 4.169064273614995e-05, "loss": 0.5339, "step": 3656 }, { "epoch": 0.2690825477121177, "grad_norm": 1.1015625, "learning_rate": 4.16863304628958e-05, "loss": 1.36, "step": 3657 }, { "epoch": 0.2691561278454817, "grad_norm": 0.89453125, "learning_rate": 4.16820172941178e-05, "loss": 1.2954, "step": 3658 }, { "epoch": 0.2692297079788457, "grad_norm": 0.84375, "learning_rate": 4.167770323004746e-05, "loss": 1.2144, "step": 3659 }, { "epoch": 0.2693032881122097, "grad_norm": 0.9609375, "learning_rate": 4.167338827091627e-05, "loss": 1.0776, "step": 3660 }, { "epoch": 0.2693768682455737, "grad_norm": 0.8984375, "learning_rate": 4.166907241695583e-05, "loss": 1.0608, "step": 3661 }, { "epoch": 0.26945044837893767, "grad_norm": 0.82421875, "learning_rate": 4.1664755668397756e-05, "loss": 1.0712, "step": 3662 }, { "epoch": 0.26952402851230167, "grad_norm": 0.93359375, "learning_rate": 4.1660438025473725e-05, "loss": 0.9114, "step": 3663 }, { "epoch": 0.2695976086456657, "grad_norm": 0.9765625, "learning_rate": 4.165611948841545e-05, "loss": 0.8202, "step": 3664 }, { "epoch": 0.2696711887790297, "grad_norm": 0.859375, "learning_rate": 4.165180005745469e-05, "loss": 0.7816, "step": 3665 }, { "epoch": 0.26974476891239363, "grad_norm": 0.92578125, "learning_rate": 4.164747973282328e-05, "loss": 1.0676, "step": 3666 }, { "epoch": 0.26981834904575763, "grad_norm": 0.91796875, "learning_rate": 4.1643158514753076e-05, "loss": 1.0146, "step": 3667 }, { "epoch": 0.26989192917912164, "grad_norm": 0.8046875, "learning_rate": 4.163883640347599e-05, "loss": 0.9597, "step": 3668 }, { "epoch": 0.26996550931248564, "grad_norm": 0.95703125, "learning_rate": 4.163451339922399e-05, "loss": 1.0143, "step": 3669 }, { "epoch": 0.27003908944584964, "grad_norm": 1.0703125, "learning_rate": 4.163018950222905e-05, "loss": 1.0431, "step": 3670 }, { "epoch": 0.2701126695792136, "grad_norm": 0.78125, "learning_rate": 4.162586471272327e-05, "loss": 0.7312, "step": 3671 }, { "epoch": 0.2701862497125776, "grad_norm": 0.80859375, "learning_rate": 4.162153903093873e-05, "loss": 1.2639, "step": 3672 }, { "epoch": 0.2702598298459416, "grad_norm": 0.78515625, "learning_rate": 4.1617212457107576e-05, "loss": 0.7579, "step": 3673 }, { "epoch": 0.2703334099793056, "grad_norm": 0.9140625, "learning_rate": 4.161288499146202e-05, "loss": 0.9605, "step": 3674 }, { "epoch": 0.27040699011266955, "grad_norm": 0.859375, "learning_rate": 4.1608556634234296e-05, "loss": 0.8284, "step": 3675 }, { "epoch": 0.27048057024603356, "grad_norm": 1.03125, "learning_rate": 4.1604227385656714e-05, "loss": 0.9931, "step": 3676 }, { "epoch": 0.27055415037939756, "grad_norm": 0.98046875, "learning_rate": 4.15998972459616e-05, "loss": 0.9074, "step": 3677 }, { "epoch": 0.27062773051276157, "grad_norm": 0.73046875, "learning_rate": 4.159556621538137e-05, "loss": 0.9218, "step": 3678 }, { "epoch": 0.27070131064612557, "grad_norm": 0.8203125, "learning_rate": 4.159123429414844e-05, "loss": 0.8277, "step": 3679 }, { "epoch": 0.2707748907794895, "grad_norm": 0.74609375, "learning_rate": 4.158690148249529e-05, "loss": 0.5017, "step": 3680 }, { "epoch": 0.2708484709128535, "grad_norm": 0.890625, "learning_rate": 4.158256778065448e-05, "loss": 1.0813, "step": 3681 }, { "epoch": 0.2709220510462175, "grad_norm": 0.9609375, "learning_rate": 4.157823318885857e-05, "loss": 1.0314, "step": 3682 }, { "epoch": 0.27099563117958153, "grad_norm": 0.83203125, "learning_rate": 4.15738977073402e-05, "loss": 0.676, "step": 3683 }, { "epoch": 0.2710692113129455, "grad_norm": 0.8125, "learning_rate": 4.156956133633204e-05, "loss": 0.9024, "step": 3684 }, { "epoch": 0.2711427914463095, "grad_norm": 0.890625, "learning_rate": 4.1565224076066824e-05, "loss": 1.1291, "step": 3685 }, { "epoch": 0.2712163715796735, "grad_norm": 0.98046875, "learning_rate": 4.1560885926777314e-05, "loss": 0.7727, "step": 3686 }, { "epoch": 0.2712899517130375, "grad_norm": 0.93359375, "learning_rate": 4.1556546888696336e-05, "loss": 1.0155, "step": 3687 }, { "epoch": 0.2713635318464015, "grad_norm": 1.0546875, "learning_rate": 4.1552206962056764e-05, "loss": 1.2204, "step": 3688 }, { "epoch": 0.27143711197976544, "grad_norm": 0.765625, "learning_rate": 4.1547866147091496e-05, "loss": 0.7764, "step": 3689 }, { "epoch": 0.27151069211312945, "grad_norm": 0.73828125, "learning_rate": 4.154352444403351e-05, "loss": 0.7791, "step": 3690 }, { "epoch": 0.27158427224649345, "grad_norm": 0.76171875, "learning_rate": 4.1539181853115814e-05, "loss": 0.8875, "step": 3691 }, { "epoch": 0.27165785237985746, "grad_norm": 0.875, "learning_rate": 4.153483837457147e-05, "loss": 1.0873, "step": 3692 }, { "epoch": 0.2717314325132214, "grad_norm": 1.0, "learning_rate": 4.153049400863358e-05, "loss": 1.1457, "step": 3693 }, { "epoch": 0.2718050126465854, "grad_norm": 0.9140625, "learning_rate": 4.1526148755535296e-05, "loss": 0.8534, "step": 3694 }, { "epoch": 0.2718785927799494, "grad_norm": 0.8203125, "learning_rate": 4.152180261550982e-05, "loss": 0.8841, "step": 3695 }, { "epoch": 0.2719521729133134, "grad_norm": 0.7109375, "learning_rate": 4.1517455588790397e-05, "loss": 0.9285, "step": 3696 }, { "epoch": 0.2720257530466774, "grad_norm": 0.875, "learning_rate": 4.151310767561034e-05, "loss": 0.7892, "step": 3697 }, { "epoch": 0.27209933318004137, "grad_norm": 0.87890625, "learning_rate": 4.150875887620298e-05, "loss": 0.9136, "step": 3698 }, { "epoch": 0.2721729133134054, "grad_norm": 1.1875, "learning_rate": 4.150440919080172e-05, "loss": 1.0957, "step": 3699 }, { "epoch": 0.2722464934467694, "grad_norm": 0.75, "learning_rate": 4.1500058619639984e-05, "loss": 0.8967, "step": 3700 }, { "epoch": 0.2723200735801334, "grad_norm": 0.80078125, "learning_rate": 4.149570716295126e-05, "loss": 1.0409, "step": 3701 }, { "epoch": 0.27239365371349733, "grad_norm": 0.98828125, "learning_rate": 4.1491354820969094e-05, "loss": 1.4121, "step": 3702 }, { "epoch": 0.27246723384686133, "grad_norm": 0.953125, "learning_rate": 4.1487001593927055e-05, "loss": 0.7186, "step": 3703 }, { "epoch": 0.27254081398022534, "grad_norm": 0.859375, "learning_rate": 4.1482647482058787e-05, "loss": 1.3317, "step": 3704 }, { "epoch": 0.27261439411358934, "grad_norm": 0.8203125, "learning_rate": 4.147829248559796e-05, "loss": 0.6391, "step": 3705 }, { "epoch": 0.27268797424695335, "grad_norm": 1.109375, "learning_rate": 4.147393660477831e-05, "loss": 1.6144, "step": 3706 }, { "epoch": 0.2727615543803173, "grad_norm": 0.8203125, "learning_rate": 4.1469579839833577e-05, "loss": 1.0032, "step": 3707 }, { "epoch": 0.2728351345136813, "grad_norm": 0.9375, "learning_rate": 4.146522219099761e-05, "loss": 0.8316, "step": 3708 }, { "epoch": 0.2729087146470453, "grad_norm": 1.0546875, "learning_rate": 4.146086365850427e-05, "loss": 1.172, "step": 3709 }, { "epoch": 0.2729822947804093, "grad_norm": 0.9921875, "learning_rate": 4.1456504242587454e-05, "loss": 1.3697, "step": 3710 }, { "epoch": 0.27305587491377326, "grad_norm": 0.7734375, "learning_rate": 4.145214394348115e-05, "loss": 1.021, "step": 3711 }, { "epoch": 0.27312945504713726, "grad_norm": 0.90625, "learning_rate": 4.1447782761419354e-05, "loss": 1.0258, "step": 3712 }, { "epoch": 0.27320303518050126, "grad_norm": 0.796875, "learning_rate": 4.144342069663611e-05, "loss": 0.861, "step": 3713 }, { "epoch": 0.27327661531386527, "grad_norm": 0.75390625, "learning_rate": 4.143905774936555e-05, "loss": 0.8782, "step": 3714 }, { "epoch": 0.27335019544722927, "grad_norm": 0.921875, "learning_rate": 4.1434693919841805e-05, "loss": 1.148, "step": 3715 }, { "epoch": 0.2734237755805932, "grad_norm": 0.84765625, "learning_rate": 4.143032920829907e-05, "loss": 1.0507, "step": 3716 }, { "epoch": 0.2734973557139572, "grad_norm": 0.8984375, "learning_rate": 4.142596361497161e-05, "loss": 0.9985, "step": 3717 }, { "epoch": 0.27357093584732123, "grad_norm": 0.9609375, "learning_rate": 4.14215971400937e-05, "loss": 1.3125, "step": 3718 }, { "epoch": 0.27364451598068523, "grad_norm": 0.921875, "learning_rate": 4.141722978389969e-05, "loss": 1.0741, "step": 3719 }, { "epoch": 0.2737180961140492, "grad_norm": 0.98828125, "learning_rate": 4.141286154662397e-05, "loss": 1.418, "step": 3720 }, { "epoch": 0.2737916762474132, "grad_norm": 0.8203125, "learning_rate": 4.140849242850096e-05, "loss": 0.7892, "step": 3721 }, { "epoch": 0.2738652563807772, "grad_norm": 0.76171875, "learning_rate": 4.140412242976516e-05, "loss": 1.0057, "step": 3722 }, { "epoch": 0.2739388365141412, "grad_norm": 0.83203125, "learning_rate": 4.1399751550651087e-05, "loss": 0.7086, "step": 3723 }, { "epoch": 0.2740124166475052, "grad_norm": 0.8203125, "learning_rate": 4.139537979139333e-05, "loss": 0.9284, "step": 3724 }, { "epoch": 0.27408599678086915, "grad_norm": 1.1015625, "learning_rate": 4.1391007152226494e-05, "loss": 0.8726, "step": 3725 }, { "epoch": 0.27415957691423315, "grad_norm": 0.77734375, "learning_rate": 4.1386633633385276e-05, "loss": 0.9388, "step": 3726 }, { "epoch": 0.27423315704759715, "grad_norm": 0.90625, "learning_rate": 4.138225923510437e-05, "loss": 1.3097, "step": 3727 }, { "epoch": 0.27430673718096116, "grad_norm": 0.76953125, "learning_rate": 4.1377883957618556e-05, "loss": 0.6689, "step": 3728 }, { "epoch": 0.2743803173143251, "grad_norm": 0.91015625, "learning_rate": 4.137350780116265e-05, "loss": 1.0086, "step": 3729 }, { "epoch": 0.2744538974476891, "grad_norm": 1.0078125, "learning_rate": 4.13691307659715e-05, "loss": 1.1734, "step": 3730 }, { "epoch": 0.2745274775810531, "grad_norm": 0.79296875, "learning_rate": 4.136475285228002e-05, "loss": 0.7806, "step": 3731 }, { "epoch": 0.2746010577144171, "grad_norm": 0.734375, "learning_rate": 4.136037406032316e-05, "loss": 0.6985, "step": 3732 }, { "epoch": 0.2746746378477811, "grad_norm": 0.87109375, "learning_rate": 4.135599439033593e-05, "loss": 1.0879, "step": 3733 }, { "epoch": 0.27474821798114507, "grad_norm": 0.87890625, "learning_rate": 4.135161384255337e-05, "loss": 0.9441, "step": 3734 }, { "epoch": 0.2748217981145091, "grad_norm": 1.03125, "learning_rate": 4.1347232417210575e-05, "loss": 1.4543, "step": 3735 }, { "epoch": 0.2748953782478731, "grad_norm": 0.86328125, "learning_rate": 4.13428501145427e-05, "loss": 0.8849, "step": 3736 }, { "epoch": 0.2749689583812371, "grad_norm": 0.78515625, "learning_rate": 4.133846693478492e-05, "loss": 0.9882, "step": 3737 }, { "epoch": 0.27504253851460103, "grad_norm": 0.8125, "learning_rate": 4.133408287817248e-05, "loss": 0.9064, "step": 3738 }, { "epoch": 0.27511611864796504, "grad_norm": 0.94921875, "learning_rate": 4.132969794494066e-05, "loss": 0.6894, "step": 3739 }, { "epoch": 0.27518969878132904, "grad_norm": 0.91796875, "learning_rate": 4.13253121353248e-05, "loss": 0.8499, "step": 3740 }, { "epoch": 0.27526327891469304, "grad_norm": 0.9140625, "learning_rate": 4.1320925449560265e-05, "loss": 0.7634, "step": 3741 }, { "epoch": 0.27533685904805705, "grad_norm": 0.77734375, "learning_rate": 4.131653788788249e-05, "loss": 1.2907, "step": 3742 }, { "epoch": 0.275410439181421, "grad_norm": 0.8984375, "learning_rate": 4.1312149450526946e-05, "loss": 1.1168, "step": 3743 }, { "epoch": 0.275484019314785, "grad_norm": 0.97265625, "learning_rate": 4.130776013772916e-05, "loss": 1.072, "step": 3744 }, { "epoch": 0.275557599448149, "grad_norm": 0.890625, "learning_rate": 4.130336994972467e-05, "loss": 1.2104, "step": 3745 }, { "epoch": 0.275631179581513, "grad_norm": 0.9453125, "learning_rate": 4.1298978886749115e-05, "loss": 0.7803, "step": 3746 }, { "epoch": 0.27570475971487696, "grad_norm": 1.0546875, "learning_rate": 4.129458694903815e-05, "loss": 1.5481, "step": 3747 }, { "epoch": 0.27577833984824096, "grad_norm": 0.7734375, "learning_rate": 4.1290194136827476e-05, "loss": 0.9913, "step": 3748 }, { "epoch": 0.27585191998160496, "grad_norm": 0.76953125, "learning_rate": 4.128580045035285e-05, "loss": 1.2736, "step": 3749 }, { "epoch": 0.27592550011496897, "grad_norm": 0.85546875, "learning_rate": 4.128140588985008e-05, "loss": 1.0355, "step": 3750 }, { "epoch": 0.275999080248333, "grad_norm": 1.421875, "learning_rate": 4.1277010455555e-05, "loss": 1.2461, "step": 3751 }, { "epoch": 0.2760726603816969, "grad_norm": 0.9296875, "learning_rate": 4.127261414770351e-05, "loss": 0.7185, "step": 3752 }, { "epoch": 0.2761462405150609, "grad_norm": 0.8125, "learning_rate": 4.126821696653156e-05, "loss": 0.9304, "step": 3753 }, { "epoch": 0.27621982064842493, "grad_norm": 0.8515625, "learning_rate": 4.126381891227513e-05, "loss": 0.9105, "step": 3754 }, { "epoch": 0.27629340078178893, "grad_norm": 0.96484375, "learning_rate": 4.125941998517026e-05, "loss": 0.9678, "step": 3755 }, { "epoch": 0.2763669809151529, "grad_norm": 0.94140625, "learning_rate": 4.125502018545302e-05, "loss": 1.1382, "step": 3756 }, { "epoch": 0.2764405610485169, "grad_norm": 0.87890625, "learning_rate": 4.125061951335956e-05, "loss": 0.811, "step": 3757 }, { "epoch": 0.2765141411818809, "grad_norm": 0.88671875, "learning_rate": 4.1246217969126036e-05, "loss": 1.2773, "step": 3758 }, { "epoch": 0.2765877213152449, "grad_norm": 1.0390625, "learning_rate": 4.1241815552988675e-05, "loss": 1.1922, "step": 3759 }, { "epoch": 0.2766613014486089, "grad_norm": 0.80078125, "learning_rate": 4.123741226518375e-05, "loss": 0.8484, "step": 3760 }, { "epoch": 0.27673488158197285, "grad_norm": 0.8671875, "learning_rate": 4.1233008105947584e-05, "loss": 1.1286, "step": 3761 }, { "epoch": 0.27680846171533685, "grad_norm": 0.76953125, "learning_rate": 4.1228603075516526e-05, "loss": 0.9195, "step": 3762 }, { "epoch": 0.27688204184870086, "grad_norm": 1.2109375, "learning_rate": 4.1224197174127e-05, "loss": 0.9709, "step": 3763 }, { "epoch": 0.27695562198206486, "grad_norm": 0.9609375, "learning_rate": 4.1219790402015444e-05, "loss": 1.0277, "step": 3764 }, { "epoch": 0.2770292021154288, "grad_norm": 0.890625, "learning_rate": 4.121538275941839e-05, "loss": 0.7648, "step": 3765 }, { "epoch": 0.2771027822487928, "grad_norm": 0.8125, "learning_rate": 4.121097424657235e-05, "loss": 0.6587, "step": 3766 }, { "epoch": 0.2771763623821568, "grad_norm": 0.89453125, "learning_rate": 4.120656486371395e-05, "loss": 0.8048, "step": 3767 }, { "epoch": 0.2772499425155208, "grad_norm": 0.7890625, "learning_rate": 4.120215461107982e-05, "loss": 0.8197, "step": 3768 }, { "epoch": 0.2773235226488848, "grad_norm": 0.71484375, "learning_rate": 4.1197743488906656e-05, "loss": 0.5523, "step": 3769 }, { "epoch": 0.2773971027822488, "grad_norm": 0.98828125, "learning_rate": 4.11933314974312e-05, "loss": 0.8875, "step": 3770 }, { "epoch": 0.2774706829156128, "grad_norm": 1.109375, "learning_rate": 4.1188918636890216e-05, "loss": 0.9472, "step": 3771 }, { "epoch": 0.2775442630489768, "grad_norm": 0.71484375, "learning_rate": 4.118450490752055e-05, "loss": 0.6643, "step": 3772 }, { "epoch": 0.2776178431823408, "grad_norm": 1.03125, "learning_rate": 4.1180090309559075e-05, "loss": 1.3515, "step": 3773 }, { "epoch": 0.27769142331570473, "grad_norm": 0.96484375, "learning_rate": 4.117567484324271e-05, "loss": 1.155, "step": 3774 }, { "epoch": 0.27776500344906874, "grad_norm": 0.921875, "learning_rate": 4.117125850880842e-05, "loss": 0.8974, "step": 3775 }, { "epoch": 0.27783858358243274, "grad_norm": 0.72265625, "learning_rate": 4.116684130649324e-05, "loss": 0.8932, "step": 3776 }, { "epoch": 0.27791216371579675, "grad_norm": 0.80859375, "learning_rate": 4.116242323653422e-05, "loss": 1.1929, "step": 3777 }, { "epoch": 0.27798574384916075, "grad_norm": 0.890625, "learning_rate": 4.1158004299168465e-05, "loss": 0.9176, "step": 3778 }, { "epoch": 0.2780593239825247, "grad_norm": 0.73828125, "learning_rate": 4.1153584494633144e-05, "loss": 0.672, "step": 3779 }, { "epoch": 0.2781329041158887, "grad_norm": 0.99609375, "learning_rate": 4.114916382316546e-05, "loss": 1.2671, "step": 3780 }, { "epoch": 0.2782064842492527, "grad_norm": 0.90234375, "learning_rate": 4.114474228500264e-05, "loss": 1.0137, "step": 3781 }, { "epoch": 0.2782800643826167, "grad_norm": 0.64453125, "learning_rate": 4.1140319880382e-05, "loss": 0.913, "step": 3782 }, { "epoch": 0.27835364451598066, "grad_norm": 0.796875, "learning_rate": 4.113589660954088e-05, "loss": 0.8609, "step": 3783 }, { "epoch": 0.27842722464934466, "grad_norm": 0.859375, "learning_rate": 4.113147247271667e-05, "loss": 0.8475, "step": 3784 }, { "epoch": 0.27850080478270867, "grad_norm": 0.8125, "learning_rate": 4.1127047470146786e-05, "loss": 1.2032, "step": 3785 }, { "epoch": 0.27857438491607267, "grad_norm": 1.0078125, "learning_rate": 4.112262160206873e-05, "loss": 1.2808, "step": 3786 }, { "epoch": 0.2786479650494367, "grad_norm": 0.859375, "learning_rate": 4.1118194868720025e-05, "loss": 0.7413, "step": 3787 }, { "epoch": 0.2787215451828006, "grad_norm": 0.8046875, "learning_rate": 4.111376727033825e-05, "loss": 0.7744, "step": 3788 }, { "epoch": 0.2787951253161646, "grad_norm": 1.0234375, "learning_rate": 4.110933880716101e-05, "loss": 1.0279, "step": 3789 }, { "epoch": 0.27886870544952863, "grad_norm": 0.6953125, "learning_rate": 4.110490947942599e-05, "loss": 0.8174, "step": 3790 }, { "epoch": 0.27894228558289264, "grad_norm": 0.89453125, "learning_rate": 4.1100479287370896e-05, "loss": 0.9783, "step": 3791 }, { "epoch": 0.2790158657162566, "grad_norm": 0.91015625, "learning_rate": 4.109604823123349e-05, "loss": 1.1988, "step": 3792 }, { "epoch": 0.2790894458496206, "grad_norm": 0.9609375, "learning_rate": 4.109161631125157e-05, "loss": 1.1926, "step": 3793 }, { "epoch": 0.2791630259829846, "grad_norm": 0.64453125, "learning_rate": 4.1087183527663e-05, "loss": 0.7411, "step": 3794 }, { "epoch": 0.2792366061163486, "grad_norm": 0.734375, "learning_rate": 4.108274988070567e-05, "loss": 0.967, "step": 3795 }, { "epoch": 0.2793101862497126, "grad_norm": 0.8203125, "learning_rate": 4.107831537061753e-05, "loss": 1.1322, "step": 3796 }, { "epoch": 0.27938376638307655, "grad_norm": 1.078125, "learning_rate": 4.107387999763658e-05, "loss": 0.9204, "step": 3797 }, { "epoch": 0.27945734651644055, "grad_norm": 0.76171875, "learning_rate": 4.1069443762000835e-05, "loss": 0.7403, "step": 3798 }, { "epoch": 0.27953092664980456, "grad_norm": 0.77734375, "learning_rate": 4.1065006663948406e-05, "loss": 0.9051, "step": 3799 }, { "epoch": 0.27960450678316856, "grad_norm": 1.1796875, "learning_rate": 4.106056870371741e-05, "loss": 1.509, "step": 3800 }, { "epoch": 0.2796780869165325, "grad_norm": 0.73828125, "learning_rate": 4.1056129881546024e-05, "loss": 0.878, "step": 3801 }, { "epoch": 0.2797516670498965, "grad_norm": 0.9609375, "learning_rate": 4.105169019767248e-05, "loss": 0.8086, "step": 3802 }, { "epoch": 0.2798252471832605, "grad_norm": 1.1796875, "learning_rate": 4.104724965233503e-05, "loss": 0.8789, "step": 3803 }, { "epoch": 0.2798988273166245, "grad_norm": 0.76953125, "learning_rate": 4.1042808245772005e-05, "loss": 1.0014, "step": 3804 }, { "epoch": 0.2799724074499885, "grad_norm": 0.83984375, "learning_rate": 4.103836597822176e-05, "loss": 0.8212, "step": 3805 }, { "epoch": 0.2800459875833525, "grad_norm": 1.109375, "learning_rate": 4.1033922849922706e-05, "loss": 1.792, "step": 3806 }, { "epoch": 0.2801195677167165, "grad_norm": 0.86328125, "learning_rate": 4.1029478861113295e-05, "loss": 0.8233, "step": 3807 }, { "epoch": 0.2801931478500805, "grad_norm": 0.66015625, "learning_rate": 4.102503401203203e-05, "loss": 0.8307, "step": 3808 }, { "epoch": 0.2802667279834445, "grad_norm": 0.62890625, "learning_rate": 4.102058830291746e-05, "loss": 0.7511, "step": 3809 }, { "epoch": 0.28034030811680843, "grad_norm": 0.74609375, "learning_rate": 4.1016141734008165e-05, "loss": 1.0038, "step": 3810 }, { "epoch": 0.28041388825017244, "grad_norm": 0.83984375, "learning_rate": 4.1011694305542805e-05, "loss": 0.8683, "step": 3811 }, { "epoch": 0.28048746838353644, "grad_norm": 1.0078125, "learning_rate": 4.1007246017760047e-05, "loss": 1.0173, "step": 3812 }, { "epoch": 0.28056104851690045, "grad_norm": 1.1328125, "learning_rate": 4.100279687089863e-05, "loss": 1.0265, "step": 3813 }, { "epoch": 0.28063462865026445, "grad_norm": 0.80078125, "learning_rate": 4.099834686519733e-05, "loss": 0.9441, "step": 3814 }, { "epoch": 0.2807082087836284, "grad_norm": 0.703125, "learning_rate": 4.099389600089497e-05, "loss": 0.7323, "step": 3815 }, { "epoch": 0.2807817889169924, "grad_norm": 0.90234375, "learning_rate": 4.098944427823041e-05, "loss": 1.2657, "step": 3816 }, { "epoch": 0.2808553690503564, "grad_norm": 0.96484375, "learning_rate": 4.0984991697442596e-05, "loss": 0.7879, "step": 3817 }, { "epoch": 0.2809289491837204, "grad_norm": 0.80859375, "learning_rate": 4.098053825877046e-05, "loss": 0.7809, "step": 3818 }, { "epoch": 0.28100252931708436, "grad_norm": 0.765625, "learning_rate": 4.097608396245301e-05, "loss": 0.8438, "step": 3819 }, { "epoch": 0.28107610945044836, "grad_norm": 0.7421875, "learning_rate": 4.097162880872932e-05, "loss": 0.6625, "step": 3820 }, { "epoch": 0.28114968958381237, "grad_norm": 0.7890625, "learning_rate": 4.096717279783847e-05, "loss": 0.8132, "step": 3821 }, { "epoch": 0.28122326971717637, "grad_norm": 1.0234375, "learning_rate": 4.096271593001962e-05, "loss": 1.2908, "step": 3822 }, { "epoch": 0.2812968498505404, "grad_norm": 1.3359375, "learning_rate": 4.095825820551195e-05, "loss": 1.0392, "step": 3823 }, { "epoch": 0.2813704299839043, "grad_norm": 0.85546875, "learning_rate": 4.09537996245547e-05, "loss": 0.8572, "step": 3824 }, { "epoch": 0.28144401011726833, "grad_norm": 0.796875, "learning_rate": 4.094934018738716e-05, "loss": 0.856, "step": 3825 }, { "epoch": 0.28151759025063233, "grad_norm": 0.71875, "learning_rate": 4.094487989424866e-05, "loss": 0.7391, "step": 3826 }, { "epoch": 0.28159117038399634, "grad_norm": 1.0, "learning_rate": 4.094041874537857e-05, "loss": 1.0149, "step": 3827 }, { "epoch": 0.2816647505173603, "grad_norm": 1.2421875, "learning_rate": 4.0935956741016313e-05, "loss": 1.3908, "step": 3828 }, { "epoch": 0.2817383306507243, "grad_norm": 1.0546875, "learning_rate": 4.0931493881401364e-05, "loss": 0.9702, "step": 3829 }, { "epoch": 0.2818119107840883, "grad_norm": 0.90234375, "learning_rate": 4.0927030166773217e-05, "loss": 1.2533, "step": 3830 }, { "epoch": 0.2818854909174523, "grad_norm": 1.0, "learning_rate": 4.092256559737144e-05, "loss": 0.9352, "step": 3831 }, { "epoch": 0.2819590710508163, "grad_norm": 1.0078125, "learning_rate": 4.091810017343565e-05, "loss": 0.9035, "step": 3832 }, { "epoch": 0.28203265118418025, "grad_norm": 0.82421875, "learning_rate": 4.0913633895205484e-05, "loss": 0.8236, "step": 3833 }, { "epoch": 0.28210623131754425, "grad_norm": 0.92578125, "learning_rate": 4.090916676292065e-05, "loss": 0.9557, "step": 3834 }, { "epoch": 0.28217981145090826, "grad_norm": 0.80859375, "learning_rate": 4.0904698776820885e-05, "loss": 0.9953, "step": 3835 }, { "epoch": 0.28225339158427226, "grad_norm": 1.2421875, "learning_rate": 4.090022993714596e-05, "loss": 1.0776, "step": 3836 }, { "epoch": 0.28232697171763627, "grad_norm": 1.0703125, "learning_rate": 4.089576024413574e-05, "loss": 1.0095, "step": 3837 }, { "epoch": 0.2824005518510002, "grad_norm": 0.85546875, "learning_rate": 4.089128969803009e-05, "loss": 0.8806, "step": 3838 }, { "epoch": 0.2824741319843642, "grad_norm": 0.93359375, "learning_rate": 4.088681829906893e-05, "loss": 0.944, "step": 3839 }, { "epoch": 0.2825477121177282, "grad_norm": 0.83203125, "learning_rate": 4.0882346047492235e-05, "loss": 0.8527, "step": 3840 }, { "epoch": 0.2826212922510922, "grad_norm": 0.875, "learning_rate": 4.087787294354004e-05, "loss": 1.0146, "step": 3841 }, { "epoch": 0.2826948723844562, "grad_norm": 0.96484375, "learning_rate": 4.0873398987452384e-05, "loss": 1.078, "step": 3842 }, { "epoch": 0.2827684525178202, "grad_norm": 0.7578125, "learning_rate": 4.086892417946938e-05, "loss": 0.8147, "step": 3843 }, { "epoch": 0.2828420326511842, "grad_norm": 1.0, "learning_rate": 4.086444851983119e-05, "loss": 1.2531, "step": 3844 }, { "epoch": 0.2829156127845482, "grad_norm": 0.80078125, "learning_rate": 4.085997200877803e-05, "loss": 1.0398, "step": 3845 }, { "epoch": 0.2829891929179122, "grad_norm": 0.90625, "learning_rate": 4.0855494646550094e-05, "loss": 0.8554, "step": 3846 }, { "epoch": 0.28306277305127614, "grad_norm": 0.90625, "learning_rate": 4.085101643338774e-05, "loss": 0.8474, "step": 3847 }, { "epoch": 0.28313635318464014, "grad_norm": 0.95703125, "learning_rate": 4.084653736953125e-05, "loss": 0.7307, "step": 3848 }, { "epoch": 0.28320993331800415, "grad_norm": 1.1015625, "learning_rate": 4.084205745522104e-05, "loss": 1.3031, "step": 3849 }, { "epoch": 0.28328351345136815, "grad_norm": 0.9140625, "learning_rate": 4.0837576690697524e-05, "loss": 1.1648, "step": 3850 }, { "epoch": 0.2833570935847321, "grad_norm": 0.71484375, "learning_rate": 4.083309507620118e-05, "loss": 0.7618, "step": 3851 }, { "epoch": 0.2834306737180961, "grad_norm": 0.75390625, "learning_rate": 4.0828612611972526e-05, "loss": 0.686, "step": 3852 }, { "epoch": 0.2835042538514601, "grad_norm": 0.69140625, "learning_rate": 4.0824129298252126e-05, "loss": 0.6035, "step": 3853 }, { "epoch": 0.2835778339848241, "grad_norm": 0.88671875, "learning_rate": 4.08196451352806e-05, "loss": 1.3134, "step": 3854 }, { "epoch": 0.2836514141181881, "grad_norm": 0.6875, "learning_rate": 4.08151601232986e-05, "loss": 0.8152, "step": 3855 }, { "epoch": 0.28372499425155207, "grad_norm": 0.8671875, "learning_rate": 4.081067426254682e-05, "loss": 1.1926, "step": 3856 }, { "epoch": 0.28379857438491607, "grad_norm": 0.875, "learning_rate": 4.0806187553266024e-05, "loss": 1.1827, "step": 3857 }, { "epoch": 0.2838721545182801, "grad_norm": 1.1796875, "learning_rate": 4.080169999569699e-05, "loss": 0.921, "step": 3858 }, { "epoch": 0.2839457346516441, "grad_norm": 0.7421875, "learning_rate": 4.079721159008056e-05, "loss": 0.6629, "step": 3859 }, { "epoch": 0.284019314785008, "grad_norm": 0.796875, "learning_rate": 4.079272233665763e-05, "loss": 0.7299, "step": 3860 }, { "epoch": 0.28409289491837203, "grad_norm": 0.8984375, "learning_rate": 4.078823223566911e-05, "loss": 0.9084, "step": 3861 }, { "epoch": 0.28416647505173603, "grad_norm": 1.0390625, "learning_rate": 4.0783741287355994e-05, "loss": 1.5359, "step": 3862 }, { "epoch": 0.28424005518510004, "grad_norm": 0.7109375, "learning_rate": 4.077924949195929e-05, "loss": 0.8932, "step": 3863 }, { "epoch": 0.28431363531846404, "grad_norm": 0.6796875, "learning_rate": 4.077475684972006e-05, "loss": 0.8656, "step": 3864 }, { "epoch": 0.284387215451828, "grad_norm": 0.73828125, "learning_rate": 4.0770263360879435e-05, "loss": 0.7003, "step": 3865 }, { "epoch": 0.284460795585192, "grad_norm": 0.828125, "learning_rate": 4.076576902567856e-05, "loss": 1.0429, "step": 3866 }, { "epoch": 0.284534375718556, "grad_norm": 1.0859375, "learning_rate": 4.076127384435864e-05, "loss": 1.1955, "step": 3867 }, { "epoch": 0.28460795585192, "grad_norm": 0.75, "learning_rate": 4.075677781716092e-05, "loss": 0.8434, "step": 3868 }, { "epoch": 0.28468153598528395, "grad_norm": 1.1328125, "learning_rate": 4.0752280944326694e-05, "loss": 0.8366, "step": 3869 }, { "epoch": 0.28475511611864796, "grad_norm": 0.8984375, "learning_rate": 4.07477832260973e-05, "loss": 0.9159, "step": 3870 }, { "epoch": 0.28482869625201196, "grad_norm": 0.890625, "learning_rate": 4.074328466271413e-05, "loss": 1.3194, "step": 3871 }, { "epoch": 0.28490227638537596, "grad_norm": 0.8828125, "learning_rate": 4.07387852544186e-05, "loss": 1.0448, "step": 3872 }, { "epoch": 0.28497585651873997, "grad_norm": 1.0625, "learning_rate": 4.0734285001452194e-05, "loss": 1.4761, "step": 3873 }, { "epoch": 0.2850494366521039, "grad_norm": 1.0390625, "learning_rate": 4.072978390405643e-05, "loss": 0.9652, "step": 3874 }, { "epoch": 0.2851230167854679, "grad_norm": 1.203125, "learning_rate": 4.0725281962472875e-05, "loss": 1.1459, "step": 3875 }, { "epoch": 0.2851965969188319, "grad_norm": 0.90234375, "learning_rate": 4.072077917694314e-05, "loss": 0.8921, "step": 3876 }, { "epoch": 0.28527017705219593, "grad_norm": 0.70703125, "learning_rate": 4.071627554770887e-05, "loss": 0.8015, "step": 3877 }, { "epoch": 0.2853437571855599, "grad_norm": 0.69921875, "learning_rate": 4.071177107501178e-05, "loss": 0.803, "step": 3878 }, { "epoch": 0.2854173373189239, "grad_norm": 0.8125, "learning_rate": 4.070726575909361e-05, "loss": 1.129, "step": 3879 }, { "epoch": 0.2854909174522879, "grad_norm": 0.64453125, "learning_rate": 4.0702759600196156e-05, "loss": 0.7708, "step": 3880 }, { "epoch": 0.2855644975856519, "grad_norm": 0.80078125, "learning_rate": 4.069825259856125e-05, "loss": 0.656, "step": 3881 }, { "epoch": 0.2856380777190159, "grad_norm": 0.79296875, "learning_rate": 4.0693744754430786e-05, "loss": 0.7339, "step": 3882 }, { "epoch": 0.28571165785237984, "grad_norm": 0.95703125, "learning_rate": 4.0689236068046665e-05, "loss": 1.2382, "step": 3883 }, { "epoch": 0.28578523798574385, "grad_norm": 0.98046875, "learning_rate": 4.068472653965089e-05, "loss": 0.8877, "step": 3884 }, { "epoch": 0.28585881811910785, "grad_norm": 0.7890625, "learning_rate": 4.0680216169485466e-05, "loss": 1.1071, "step": 3885 }, { "epoch": 0.28593239825247185, "grad_norm": 1.0234375, "learning_rate": 4.067570495779245e-05, "loss": 1.5672, "step": 3886 }, { "epoch": 0.2860059783858358, "grad_norm": 0.73046875, "learning_rate": 4.067119290481396e-05, "loss": 0.9924, "step": 3887 }, { "epoch": 0.2860795585191998, "grad_norm": 0.8125, "learning_rate": 4.066668001079214e-05, "loss": 1.0488, "step": 3888 }, { "epoch": 0.2861531386525638, "grad_norm": 0.76953125, "learning_rate": 4.066216627596921e-05, "loss": 0.7105, "step": 3889 }, { "epoch": 0.2862267187859278, "grad_norm": 1.1484375, "learning_rate": 4.0657651700587376e-05, "loss": 1.1021, "step": 3890 }, { "epoch": 0.2863002989192918, "grad_norm": 0.75390625, "learning_rate": 4.065313628488896e-05, "loss": 0.8837, "step": 3891 }, { "epoch": 0.28637387905265577, "grad_norm": 0.89453125, "learning_rate": 4.064862002911628e-05, "loss": 1.0584, "step": 3892 }, { "epoch": 0.28644745918601977, "grad_norm": 0.80859375, "learning_rate": 4.064410293351172e-05, "loss": 0.8034, "step": 3893 }, { "epoch": 0.2865210393193838, "grad_norm": 1.0078125, "learning_rate": 4.0639584998317706e-05, "loss": 1.5604, "step": 3894 }, { "epoch": 0.2865946194527478, "grad_norm": 0.7421875, "learning_rate": 4.0635066223776706e-05, "loss": 0.8334, "step": 3895 }, { "epoch": 0.2866681995861117, "grad_norm": 0.98828125, "learning_rate": 4.0630546610131224e-05, "loss": 1.2412, "step": 3896 }, { "epoch": 0.28674177971947573, "grad_norm": 0.8046875, "learning_rate": 4.062602615762384e-05, "loss": 0.7904, "step": 3897 }, { "epoch": 0.28681535985283974, "grad_norm": 0.87890625, "learning_rate": 4.0621504866497136e-05, "loss": 0.9662, "step": 3898 }, { "epoch": 0.28688893998620374, "grad_norm": 0.94921875, "learning_rate": 4.061698273699377e-05, "loss": 1.0871, "step": 3899 }, { "epoch": 0.28696252011956774, "grad_norm": 1.0625, "learning_rate": 4.0612459769356434e-05, "loss": 1.1765, "step": 3900 }, { "epoch": 0.2870361002529317, "grad_norm": 0.93359375, "learning_rate": 4.060793596382788e-05, "loss": 0.6849, "step": 3901 }, { "epoch": 0.2871096803862957, "grad_norm": 0.84765625, "learning_rate": 4.060341132065088e-05, "loss": 1.3514, "step": 3902 }, { "epoch": 0.2871832605196597, "grad_norm": 1.03125, "learning_rate": 4.0598885840068264e-05, "loss": 1.0643, "step": 3903 }, { "epoch": 0.2872568406530237, "grad_norm": 0.80078125, "learning_rate": 4.05943595223229e-05, "loss": 0.9268, "step": 3904 }, { "epoch": 0.28733042078638765, "grad_norm": 0.796875, "learning_rate": 4.0589832367657724e-05, "loss": 1.1582, "step": 3905 }, { "epoch": 0.28740400091975166, "grad_norm": 0.77734375, "learning_rate": 4.058530437631568e-05, "loss": 0.8383, "step": 3906 }, { "epoch": 0.28747758105311566, "grad_norm": 1.0, "learning_rate": 4.05807755485398e-05, "loss": 1.1832, "step": 3907 }, { "epoch": 0.28755116118647966, "grad_norm": 0.99609375, "learning_rate": 4.0576245884573105e-05, "loss": 1.1852, "step": 3908 }, { "epoch": 0.28762474131984367, "grad_norm": 1.09375, "learning_rate": 4.057171538465873e-05, "loss": 0.9573, "step": 3909 }, { "epoch": 0.2876983214532076, "grad_norm": 0.83203125, "learning_rate": 4.0567184049039796e-05, "loss": 0.8536, "step": 3910 }, { "epoch": 0.2877719015865716, "grad_norm": 1.09375, "learning_rate": 4.056265187795949e-05, "loss": 1.3186, "step": 3911 }, { "epoch": 0.2878454817199356, "grad_norm": 0.84765625, "learning_rate": 4.055811887166106e-05, "loss": 1.015, "step": 3912 }, { "epoch": 0.28791906185329963, "grad_norm": 0.67578125, "learning_rate": 4.055358503038777e-05, "loss": 0.789, "step": 3913 }, { "epoch": 0.2879926419866636, "grad_norm": 0.8203125, "learning_rate": 4.054905035438295e-05, "loss": 0.8662, "step": 3914 }, { "epoch": 0.2880662221200276, "grad_norm": 0.75, "learning_rate": 4.054451484388996e-05, "loss": 0.5611, "step": 3915 }, { "epoch": 0.2881398022533916, "grad_norm": 0.984375, "learning_rate": 4.0539978499152235e-05, "loss": 1.1378, "step": 3916 }, { "epoch": 0.2882133823867556, "grad_norm": 0.734375, "learning_rate": 4.0535441320413194e-05, "loss": 0.9651, "step": 3917 }, { "epoch": 0.2882869625201196, "grad_norm": 0.80078125, "learning_rate": 4.053090330791637e-05, "loss": 1.051, "step": 3918 }, { "epoch": 0.28836054265348354, "grad_norm": 0.94921875, "learning_rate": 4.0526364461905295e-05, "loss": 1.1585, "step": 3919 }, { "epoch": 0.28843412278684755, "grad_norm": 0.86328125, "learning_rate": 4.052182478262357e-05, "loss": 1.3894, "step": 3920 }, { "epoch": 0.28850770292021155, "grad_norm": 0.703125, "learning_rate": 4.0517284270314826e-05, "loss": 0.6976, "step": 3921 }, { "epoch": 0.28858128305357555, "grad_norm": 0.96484375, "learning_rate": 4.051274292522273e-05, "loss": 0.9478, "step": 3922 }, { "epoch": 0.2886548631869395, "grad_norm": 0.78125, "learning_rate": 4.050820074759104e-05, "loss": 1.0256, "step": 3923 }, { "epoch": 0.2887284433203035, "grad_norm": 0.91796875, "learning_rate": 4.050365773766349e-05, "loss": 0.939, "step": 3924 }, { "epoch": 0.2888020234536675, "grad_norm": 0.7890625, "learning_rate": 4.0499113895683927e-05, "loss": 0.9522, "step": 3925 }, { "epoch": 0.2888756035870315, "grad_norm": 0.85546875, "learning_rate": 4.049456922189618e-05, "loss": 0.9204, "step": 3926 }, { "epoch": 0.2889491837203955, "grad_norm": 0.9609375, "learning_rate": 4.049002371654418e-05, "loss": 0.7246, "step": 3927 }, { "epoch": 0.28902276385375947, "grad_norm": 1.0546875, "learning_rate": 4.048547737987185e-05, "loss": 0.8078, "step": 3928 }, { "epoch": 0.28909634398712347, "grad_norm": 1.0859375, "learning_rate": 4.04809302121232e-05, "loss": 0.992, "step": 3929 }, { "epoch": 0.2891699241204875, "grad_norm": 0.84375, "learning_rate": 4.047638221354228e-05, "loss": 0.9145, "step": 3930 }, { "epoch": 0.2892435042538515, "grad_norm": 0.91796875, "learning_rate": 4.047183338437314e-05, "loss": 0.995, "step": 3931 }, { "epoch": 0.28931708438721543, "grad_norm": 1.0859375, "learning_rate": 4.046728372485994e-05, "loss": 1.2221, "step": 3932 }, { "epoch": 0.28939066452057943, "grad_norm": 0.87890625, "learning_rate": 4.046273323524682e-05, "loss": 0.8771, "step": 3933 }, { "epoch": 0.28946424465394344, "grad_norm": 0.78125, "learning_rate": 4.0458181915778026e-05, "loss": 1.0416, "step": 3934 }, { "epoch": 0.28953782478730744, "grad_norm": 0.84375, "learning_rate": 4.0453629766697796e-05, "loss": 0.9703, "step": 3935 }, { "epoch": 0.28961140492067144, "grad_norm": 0.78125, "learning_rate": 4.0449076788250446e-05, "loss": 0.7855, "step": 3936 }, { "epoch": 0.2896849850540354, "grad_norm": 1.125, "learning_rate": 4.044452298068033e-05, "loss": 1.1892, "step": 3937 }, { "epoch": 0.2897585651873994, "grad_norm": 0.73828125, "learning_rate": 4.043996834423183e-05, "loss": 1.0772, "step": 3938 }, { "epoch": 0.2898321453207634, "grad_norm": 0.6796875, "learning_rate": 4.043541287914939e-05, "loss": 0.6697, "step": 3939 }, { "epoch": 0.2899057254541274, "grad_norm": 0.83984375, "learning_rate": 4.043085658567749e-05, "loss": 0.8478, "step": 3940 }, { "epoch": 0.28997930558749135, "grad_norm": 1.0390625, "learning_rate": 4.042629946406067e-05, "loss": 1.2608, "step": 3941 }, { "epoch": 0.29005288572085536, "grad_norm": 0.80078125, "learning_rate": 4.042174151454349e-05, "loss": 0.935, "step": 3942 }, { "epoch": 0.29012646585421936, "grad_norm": 0.7890625, "learning_rate": 4.0417182737370574e-05, "loss": 0.989, "step": 3943 }, { "epoch": 0.29020004598758337, "grad_norm": 1.15625, "learning_rate": 4.041262313278657e-05, "loss": 1.1673, "step": 3944 }, { "epoch": 0.29027362612094737, "grad_norm": 1.09375, "learning_rate": 4.040806270103621e-05, "loss": 1.3329, "step": 3945 }, { "epoch": 0.2903472062543113, "grad_norm": 0.95703125, "learning_rate": 4.0403501442364213e-05, "loss": 0.9676, "step": 3946 }, { "epoch": 0.2904207863876753, "grad_norm": 0.87890625, "learning_rate": 4.039893935701539e-05, "loss": 0.8828, "step": 3947 }, { "epoch": 0.2904943665210393, "grad_norm": 0.9609375, "learning_rate": 4.039437644523458e-05, "loss": 0.9777, "step": 3948 }, { "epoch": 0.29056794665440333, "grad_norm": 0.859375, "learning_rate": 4.038981270726666e-05, "loss": 0.8939, "step": 3949 }, { "epoch": 0.2906415267877673, "grad_norm": 0.79296875, "learning_rate": 4.038524814335656e-05, "loss": 0.8317, "step": 3950 }, { "epoch": 0.2907151069211313, "grad_norm": 0.875, "learning_rate": 4.0380682753749245e-05, "loss": 1.1056, "step": 3951 }, { "epoch": 0.2907886870544953, "grad_norm": 0.7109375, "learning_rate": 4.037611653868974e-05, "loss": 0.8329, "step": 3952 }, { "epoch": 0.2908622671878593, "grad_norm": 0.84765625, "learning_rate": 4.03715494984231e-05, "loss": 1.1901, "step": 3953 }, { "epoch": 0.2909358473212233, "grad_norm": 0.78125, "learning_rate": 4.0366981633194434e-05, "loss": 0.8132, "step": 3954 }, { "epoch": 0.29100942745458724, "grad_norm": 1.046875, "learning_rate": 4.036241294324889e-05, "loss": 1.1858, "step": 3955 }, { "epoch": 0.29108300758795125, "grad_norm": 0.73828125, "learning_rate": 4.035784342883165e-05, "loss": 0.6956, "step": 3956 }, { "epoch": 0.29115658772131525, "grad_norm": 0.73828125, "learning_rate": 4.0353273090187974e-05, "loss": 1.0991, "step": 3957 }, { "epoch": 0.29123016785467926, "grad_norm": 0.890625, "learning_rate": 4.034870192756311e-05, "loss": 1.155, "step": 3958 }, { "epoch": 0.2913037479880432, "grad_norm": 1.2421875, "learning_rate": 4.034412994120242e-05, "loss": 1.4814, "step": 3959 }, { "epoch": 0.2913773281214072, "grad_norm": 0.77734375, "learning_rate": 4.033955713135126e-05, "loss": 0.8542, "step": 3960 }, { "epoch": 0.2914509082547712, "grad_norm": 0.75, "learning_rate": 4.033498349825502e-05, "loss": 0.8671, "step": 3961 }, { "epoch": 0.2915244883881352, "grad_norm": 0.8125, "learning_rate": 4.03304090421592e-05, "loss": 0.8192, "step": 3962 }, { "epoch": 0.2915980685214992, "grad_norm": 0.7421875, "learning_rate": 4.032583376330927e-05, "loss": 0.7636, "step": 3963 }, { "epoch": 0.29167164865486317, "grad_norm": 0.796875, "learning_rate": 4.032125766195079e-05, "loss": 0.7492, "step": 3964 }, { "epoch": 0.2917452287882272, "grad_norm": 0.9921875, "learning_rate": 4.031668073832935e-05, "loss": 0.8939, "step": 3965 }, { "epoch": 0.2918188089215912, "grad_norm": 0.8671875, "learning_rate": 4.031210299269059e-05, "loss": 1.0382, "step": 3966 }, { "epoch": 0.2918923890549552, "grad_norm": 0.75390625, "learning_rate": 4.030752442528017e-05, "loss": 0.8662, "step": 3967 }, { "epoch": 0.29196596918831913, "grad_norm": 0.8125, "learning_rate": 4.030294503634384e-05, "loss": 1.2106, "step": 3968 }, { "epoch": 0.29203954932168313, "grad_norm": 0.765625, "learning_rate": 4.029836482612734e-05, "loss": 0.7495, "step": 3969 }, { "epoch": 0.29211312945504714, "grad_norm": 0.71875, "learning_rate": 4.0293783794876504e-05, "loss": 0.4663, "step": 3970 }, { "epoch": 0.29218670958841114, "grad_norm": 0.72265625, "learning_rate": 4.0289201942837174e-05, "loss": 0.9168, "step": 3971 }, { "epoch": 0.29226028972177515, "grad_norm": 0.83203125, "learning_rate": 4.028461927025525e-05, "loss": 0.6534, "step": 3972 }, { "epoch": 0.2923338698551391, "grad_norm": 1.046875, "learning_rate": 4.028003577737669e-05, "loss": 0.7976, "step": 3973 }, { "epoch": 0.2924074499885031, "grad_norm": 1.0546875, "learning_rate": 4.0275451464447454e-05, "loss": 0.9889, "step": 3974 }, { "epoch": 0.2924810301218671, "grad_norm": 1.015625, "learning_rate": 4.02708663317136e-05, "loss": 1.248, "step": 3975 }, { "epoch": 0.2925546102552311, "grad_norm": 1.0625, "learning_rate": 4.0266280379421195e-05, "loss": 0.9817, "step": 3976 }, { "epoch": 0.29262819038859506, "grad_norm": 0.7734375, "learning_rate": 4.0261693607816344e-05, "loss": 0.7109, "step": 3977 }, { "epoch": 0.29270177052195906, "grad_norm": 1.046875, "learning_rate": 4.025710601714523e-05, "loss": 1.7039, "step": 3978 }, { "epoch": 0.29277535065532306, "grad_norm": 0.796875, "learning_rate": 4.025251760765405e-05, "loss": 0.9992, "step": 3979 }, { "epoch": 0.29284893078868707, "grad_norm": 0.75390625, "learning_rate": 4.024792837958906e-05, "loss": 0.6932, "step": 3980 }, { "epoch": 0.29292251092205107, "grad_norm": 0.87890625, "learning_rate": 4.0243338333196556e-05, "loss": 0.812, "step": 3981 }, { "epoch": 0.292996091055415, "grad_norm": 0.6953125, "learning_rate": 4.023874746872287e-05, "loss": 0.5889, "step": 3982 }, { "epoch": 0.293069671188779, "grad_norm": 1.8671875, "learning_rate": 4.023415578641438e-05, "loss": 1.2658, "step": 3983 }, { "epoch": 0.29314325132214303, "grad_norm": 0.85546875, "learning_rate": 4.022956328651754e-05, "loss": 0.8574, "step": 3984 }, { "epoch": 0.29321683145550703, "grad_norm": 0.78515625, "learning_rate": 4.022496996927879e-05, "loss": 0.7555, "step": 3985 }, { "epoch": 0.293290411588871, "grad_norm": 0.8984375, "learning_rate": 4.022037583494466e-05, "loss": 0.921, "step": 3986 }, { "epoch": 0.293363991722235, "grad_norm": 0.76171875, "learning_rate": 4.021578088376171e-05, "loss": 0.7318, "step": 3987 }, { "epoch": 0.293437571855599, "grad_norm": 0.875, "learning_rate": 4.021118511597654e-05, "loss": 0.6764, "step": 3988 }, { "epoch": 0.293511151988963, "grad_norm": 0.8984375, "learning_rate": 4.0206588531835795e-05, "loss": 1.1966, "step": 3989 }, { "epoch": 0.293584732122327, "grad_norm": 0.70703125, "learning_rate": 4.0201991131586156e-05, "loss": 0.663, "step": 3990 }, { "epoch": 0.29365831225569095, "grad_norm": 0.7734375, "learning_rate": 4.0197392915474375e-05, "loss": 0.6867, "step": 3991 }, { "epoch": 0.29373189238905495, "grad_norm": 0.796875, "learning_rate": 4.019279388374722e-05, "loss": 0.9117, "step": 3992 }, { "epoch": 0.29380547252241895, "grad_norm": 0.9609375, "learning_rate": 4.01881940366515e-05, "loss": 1.0109, "step": 3993 }, { "epoch": 0.29387905265578296, "grad_norm": 0.94140625, "learning_rate": 4.0183593374434106e-05, "loss": 0.8342, "step": 3994 }, { "epoch": 0.2939526327891469, "grad_norm": 0.8515625, "learning_rate": 4.0178991897341925e-05, "loss": 1.0372, "step": 3995 }, { "epoch": 0.2940262129225109, "grad_norm": 0.6953125, "learning_rate": 4.017438960562192e-05, "loss": 0.7008, "step": 3996 }, { "epoch": 0.2940997930558749, "grad_norm": 0.71484375, "learning_rate": 4.0169786499521083e-05, "loss": 0.7199, "step": 3997 }, { "epoch": 0.2941733731892389, "grad_norm": 0.953125, "learning_rate": 4.0165182579286467e-05, "loss": 1.1108, "step": 3998 }, { "epoch": 0.2942469533226029, "grad_norm": 0.6640625, "learning_rate": 4.016057784516513e-05, "loss": 0.6917, "step": 3999 }, { "epoch": 0.29432053345596687, "grad_norm": 0.80859375, "learning_rate": 4.015597229740422e-05, "loss": 0.8002, "step": 4000 }, { "epoch": 0.2943941135893309, "grad_norm": 0.9609375, "learning_rate": 4.015136593625091e-05, "loss": 1.214, "step": 4001 }, { "epoch": 0.2944676937226949, "grad_norm": 0.74609375, "learning_rate": 4.0146758761952396e-05, "loss": 0.6247, "step": 4002 }, { "epoch": 0.2945412738560589, "grad_norm": 1.1484375, "learning_rate": 4.014215077475596e-05, "loss": 1.5746, "step": 4003 }, { "epoch": 0.29461485398942283, "grad_norm": 1.171875, "learning_rate": 4.013754197490888e-05, "loss": 1.0741, "step": 4004 }, { "epoch": 0.29468843412278684, "grad_norm": 0.89453125, "learning_rate": 4.0132932362658516e-05, "loss": 1.278, "step": 4005 }, { "epoch": 0.29476201425615084, "grad_norm": 0.9375, "learning_rate": 4.0128321938252254e-05, "loss": 1.0738, "step": 4006 }, { "epoch": 0.29483559438951484, "grad_norm": 1.828125, "learning_rate": 4.012371070193753e-05, "loss": 0.8013, "step": 4007 }, { "epoch": 0.29490917452287885, "grad_norm": 0.82421875, "learning_rate": 4.011909865396181e-05, "loss": 1.0525, "step": 4008 }, { "epoch": 0.2949827546562428, "grad_norm": 0.7890625, "learning_rate": 4.011448579457263e-05, "loss": 0.8665, "step": 4009 }, { "epoch": 0.2950563347896068, "grad_norm": 1.1328125, "learning_rate": 4.010987212401754e-05, "loss": 1.0984, "step": 4010 }, { "epoch": 0.2951299149229708, "grad_norm": 0.953125, "learning_rate": 4.010525764254415e-05, "loss": 0.7722, "step": 4011 }, { "epoch": 0.2952034950563348, "grad_norm": 0.82421875, "learning_rate": 4.010064235040012e-05, "loss": 1.1803, "step": 4012 }, { "epoch": 0.29527707518969876, "grad_norm": 1.125, "learning_rate": 4.009602624783312e-05, "loss": 1.4804, "step": 4013 }, { "epoch": 0.29535065532306276, "grad_norm": 0.953125, "learning_rate": 4.009140933509092e-05, "loss": 1.0147, "step": 4014 }, { "epoch": 0.29542423545642676, "grad_norm": 0.80859375, "learning_rate": 4.008679161242128e-05, "loss": 0.826, "step": 4015 }, { "epoch": 0.29549781558979077, "grad_norm": 0.67578125, "learning_rate": 4.008217308007203e-05, "loss": 0.8106, "step": 4016 }, { "epoch": 0.2955713957231548, "grad_norm": 0.796875, "learning_rate": 4.007755373829103e-05, "loss": 1.3554, "step": 4017 }, { "epoch": 0.2956449758565187, "grad_norm": 0.9375, "learning_rate": 4.00729335873262e-05, "loss": 1.0787, "step": 4018 }, { "epoch": 0.2957185559898827, "grad_norm": 0.94140625, "learning_rate": 4.00683126274255e-05, "loss": 1.0574, "step": 4019 }, { "epoch": 0.29579213612324673, "grad_norm": 1.1953125, "learning_rate": 4.0063690858836914e-05, "loss": 0.8312, "step": 4020 }, { "epoch": 0.29586571625661073, "grad_norm": 0.859375, "learning_rate": 4.0059068281808495e-05, "loss": 0.9378, "step": 4021 }, { "epoch": 0.2959392963899747, "grad_norm": 0.82421875, "learning_rate": 4.0054444896588326e-05, "loss": 0.832, "step": 4022 }, { "epoch": 0.2960128765233387, "grad_norm": 0.953125, "learning_rate": 4.0049820703424534e-05, "loss": 1.4243, "step": 4023 }, { "epoch": 0.2960864566567027, "grad_norm": 1.125, "learning_rate": 4.0045195702565285e-05, "loss": 0.8421, "step": 4024 }, { "epoch": 0.2961600367900667, "grad_norm": 0.95703125, "learning_rate": 4.0040569894258794e-05, "loss": 1.042, "step": 4025 }, { "epoch": 0.2962336169234307, "grad_norm": 0.83984375, "learning_rate": 4.003594327875334e-05, "loss": 0.8849, "step": 4026 }, { "epoch": 0.29630719705679465, "grad_norm": 0.92578125, "learning_rate": 4.003131585629719e-05, "loss": 1.2332, "step": 4027 }, { "epoch": 0.29638077719015865, "grad_norm": 0.7421875, "learning_rate": 4.002668762713873e-05, "loss": 0.7748, "step": 4028 }, { "epoch": 0.29645435732352265, "grad_norm": 1.0, "learning_rate": 4.002205859152631e-05, "loss": 1.0201, "step": 4029 }, { "epoch": 0.29652793745688666, "grad_norm": 0.7421875, "learning_rate": 4.0017428749708385e-05, "loss": 0.6225, "step": 4030 }, { "epoch": 0.2966015175902506, "grad_norm": 0.98828125, "learning_rate": 4.0012798101933414e-05, "loss": 1.0481, "step": 4031 }, { "epoch": 0.2966750977236146, "grad_norm": 0.875, "learning_rate": 4.000816664844993e-05, "loss": 0.7896, "step": 4032 }, { "epoch": 0.2967486778569786, "grad_norm": 0.8046875, "learning_rate": 4.000353438950649e-05, "loss": 0.963, "step": 4033 }, { "epoch": 0.2968222579903426, "grad_norm": 0.96484375, "learning_rate": 3.9998901325351694e-05, "loss": 1.1171, "step": 4034 }, { "epoch": 0.2968958381237066, "grad_norm": 0.8828125, "learning_rate": 3.99942674562342e-05, "loss": 0.9836, "step": 4035 }, { "epoch": 0.29696941825707057, "grad_norm": 0.89453125, "learning_rate": 3.998963278240268e-05, "loss": 1.0004, "step": 4036 }, { "epoch": 0.2970429983904346, "grad_norm": 1.0390625, "learning_rate": 3.9984997304105885e-05, "loss": 1.484, "step": 4037 }, { "epoch": 0.2971165785237986, "grad_norm": 1.1171875, "learning_rate": 3.998036102159259e-05, "loss": 0.897, "step": 4038 }, { "epoch": 0.2971901586571626, "grad_norm": 0.8828125, "learning_rate": 3.9975723935111614e-05, "loss": 0.9784, "step": 4039 }, { "epoch": 0.29726373879052653, "grad_norm": 0.83984375, "learning_rate": 3.997108604491182e-05, "loss": 1.2529, "step": 4040 }, { "epoch": 0.29733731892389054, "grad_norm": 0.890625, "learning_rate": 3.9966447351242106e-05, "loss": 0.8156, "step": 4041 }, { "epoch": 0.29741089905725454, "grad_norm": 0.98828125, "learning_rate": 3.996180785435144e-05, "loss": 1.016, "step": 4042 }, { "epoch": 0.29748447919061854, "grad_norm": 0.8984375, "learning_rate": 3.9957167554488795e-05, "loss": 1.1219, "step": 4043 }, { "epoch": 0.29755805932398255, "grad_norm": 1.0625, "learning_rate": 3.995252645190323e-05, "loss": 1.1444, "step": 4044 }, { "epoch": 0.2976316394573465, "grad_norm": 0.90625, "learning_rate": 3.99478845468438e-05, "loss": 0.906, "step": 4045 }, { "epoch": 0.2977052195907105, "grad_norm": 0.8046875, "learning_rate": 3.994324183955964e-05, "loss": 0.8373, "step": 4046 }, { "epoch": 0.2977787997240745, "grad_norm": 1.0625, "learning_rate": 3.993859833029993e-05, "loss": 1.2315, "step": 4047 }, { "epoch": 0.2978523798574385, "grad_norm": 0.92578125, "learning_rate": 3.9933954019313844e-05, "loss": 0.8823, "step": 4048 }, { "epoch": 0.29792595999080246, "grad_norm": 0.69140625, "learning_rate": 3.992930890685066e-05, "loss": 0.5843, "step": 4049 }, { "epoch": 0.29799954012416646, "grad_norm": 0.828125, "learning_rate": 3.992466299315965e-05, "loss": 0.8507, "step": 4050 }, { "epoch": 0.29807312025753047, "grad_norm": 0.80859375, "learning_rate": 3.992001627849019e-05, "loss": 0.9282, "step": 4051 }, { "epoch": 0.29814670039089447, "grad_norm": 0.9765625, "learning_rate": 3.991536876309162e-05, "loss": 0.8898, "step": 4052 }, { "epoch": 0.2982202805242585, "grad_norm": 0.91796875, "learning_rate": 3.991072044721339e-05, "loss": 0.8895, "step": 4053 }, { "epoch": 0.2982938606576224, "grad_norm": 0.79296875, "learning_rate": 3.990607133110495e-05, "loss": 0.909, "step": 4054 }, { "epoch": 0.2983674407909864, "grad_norm": 0.953125, "learning_rate": 3.9901421415015815e-05, "loss": 1.1622, "step": 4055 }, { "epoch": 0.29844102092435043, "grad_norm": 0.85546875, "learning_rate": 3.989677069919554e-05, "loss": 0.8779, "step": 4056 }, { "epoch": 0.29851460105771443, "grad_norm": 0.921875, "learning_rate": 3.9892119183893715e-05, "loss": 0.9261, "step": 4057 }, { "epoch": 0.2985881811910784, "grad_norm": 0.87109375, "learning_rate": 3.988746686935998e-05, "loss": 0.8476, "step": 4058 }, { "epoch": 0.2986617613244424, "grad_norm": 0.91015625, "learning_rate": 3.9882813755844015e-05, "loss": 1.0539, "step": 4059 }, { "epoch": 0.2987353414578064, "grad_norm": 0.984375, "learning_rate": 3.9878159843595554e-05, "loss": 1.2995, "step": 4060 }, { "epoch": 0.2988089215911704, "grad_norm": 0.9375, "learning_rate": 3.987350513286435e-05, "loss": 0.9445, "step": 4061 }, { "epoch": 0.2988825017245344, "grad_norm": 0.83984375, "learning_rate": 3.986884962390022e-05, "loss": 1.2249, "step": 4062 }, { "epoch": 0.29895608185789835, "grad_norm": 1.0546875, "learning_rate": 3.986419331695301e-05, "loss": 0.9489, "step": 4063 }, { "epoch": 0.29902966199126235, "grad_norm": 1.3515625, "learning_rate": 3.985953621227262e-05, "loss": 0.9554, "step": 4064 }, { "epoch": 0.29910324212462636, "grad_norm": 1.0390625, "learning_rate": 3.985487831010899e-05, "loss": 1.1644, "step": 4065 }, { "epoch": 0.29917682225799036, "grad_norm": 0.875, "learning_rate": 3.985021961071209e-05, "loss": 0.7879, "step": 4066 }, { "epoch": 0.2992504023913543, "grad_norm": 1.578125, "learning_rate": 3.984556011433196e-05, "loss": 1.0147, "step": 4067 }, { "epoch": 0.2993239825247183, "grad_norm": 0.80859375, "learning_rate": 3.984089982121865e-05, "loss": 0.8522, "step": 4068 }, { "epoch": 0.2993975626580823, "grad_norm": 0.69921875, "learning_rate": 3.983623873162229e-05, "loss": 0.751, "step": 4069 }, { "epoch": 0.2994711427914463, "grad_norm": 0.74609375, "learning_rate": 3.983157684579301e-05, "loss": 0.8797, "step": 4070 }, { "epoch": 0.2995447229248103, "grad_norm": 0.75, "learning_rate": 3.982691416398101e-05, "loss": 0.7336, "step": 4071 }, { "epoch": 0.2996183030581743, "grad_norm": 1.109375, "learning_rate": 3.9822250686436534e-05, "loss": 1.3268, "step": 4072 }, { "epoch": 0.2996918831915383, "grad_norm": 0.765625, "learning_rate": 3.981758641340986e-05, "loss": 0.9972, "step": 4073 }, { "epoch": 0.2997654633249023, "grad_norm": 1.1015625, "learning_rate": 3.981292134515131e-05, "loss": 1.0735, "step": 4074 }, { "epoch": 0.2998390434582663, "grad_norm": 0.91796875, "learning_rate": 3.980825548191125e-05, "loss": 1.1444, "step": 4075 }, { "epoch": 0.29991262359163023, "grad_norm": 0.9140625, "learning_rate": 3.9803588823940085e-05, "loss": 0.849, "step": 4076 }, { "epoch": 0.29998620372499424, "grad_norm": 0.83203125, "learning_rate": 3.979892137148827e-05, "loss": 0.8298, "step": 4077 }, { "epoch": 0.30005978385835824, "grad_norm": 0.8515625, "learning_rate": 3.97942531248063e-05, "loss": 1.0243, "step": 4078 }, { "epoch": 0.30013336399172225, "grad_norm": 1.0, "learning_rate": 3.97895840841447e-05, "loss": 1.2506, "step": 4079 }, { "epoch": 0.30020694412508625, "grad_norm": 0.78515625, "learning_rate": 3.978491424975406e-05, "loss": 0.9184, "step": 4080 }, { "epoch": 0.3002805242584502, "grad_norm": 0.9296875, "learning_rate": 3.9780243621884997e-05, "loss": 1.4987, "step": 4081 }, { "epoch": 0.3003541043918142, "grad_norm": 0.9453125, "learning_rate": 3.977557220078817e-05, "loss": 1.0628, "step": 4082 }, { "epoch": 0.3004276845251782, "grad_norm": 0.79296875, "learning_rate": 3.977089998671429e-05, "loss": 0.6733, "step": 4083 }, { "epoch": 0.3005012646585422, "grad_norm": 0.70703125, "learning_rate": 3.9766226979914104e-05, "loss": 0.7021, "step": 4084 }, { "epoch": 0.30057484479190616, "grad_norm": 0.67578125, "learning_rate": 3.976155318063841e-05, "loss": 0.6074, "step": 4085 }, { "epoch": 0.30064842492527016, "grad_norm": 0.74609375, "learning_rate": 3.9756878589138044e-05, "loss": 0.7702, "step": 4086 }, { "epoch": 0.30072200505863417, "grad_norm": 0.66796875, "learning_rate": 3.9752203205663865e-05, "loss": 0.6078, "step": 4087 }, { "epoch": 0.30079558519199817, "grad_norm": 0.703125, "learning_rate": 3.9747527030466805e-05, "loss": 0.6987, "step": 4088 }, { "epoch": 0.3008691653253622, "grad_norm": 0.96875, "learning_rate": 3.974285006379783e-05, "loss": 1.3702, "step": 4089 }, { "epoch": 0.3009427454587261, "grad_norm": 1.0078125, "learning_rate": 3.9738172305907936e-05, "loss": 1.4945, "step": 4090 }, { "epoch": 0.30101632559209013, "grad_norm": 1.078125, "learning_rate": 3.973349375704816e-05, "loss": 1.2491, "step": 4091 }, { "epoch": 0.30108990572545413, "grad_norm": 0.8046875, "learning_rate": 3.972881441746962e-05, "loss": 0.9268, "step": 4092 }, { "epoch": 0.30116348585881814, "grad_norm": 0.89453125, "learning_rate": 3.9724134287423406e-05, "loss": 0.7378, "step": 4093 }, { "epoch": 0.3012370659921821, "grad_norm": 1.1171875, "learning_rate": 3.971945336716074e-05, "loss": 0.9854, "step": 4094 }, { "epoch": 0.3013106461255461, "grad_norm": 0.71875, "learning_rate": 3.971477165693279e-05, "loss": 0.8025, "step": 4095 }, { "epoch": 0.3013842262589101, "grad_norm": 0.99609375, "learning_rate": 3.9710089156990856e-05, "loss": 1.0144, "step": 4096 }, { "epoch": 0.3014578063922741, "grad_norm": 0.7578125, "learning_rate": 3.970540586758621e-05, "loss": 0.9574, "step": 4097 }, { "epoch": 0.3015313865256381, "grad_norm": 0.7109375, "learning_rate": 3.970072178897021e-05, "loss": 0.7508, "step": 4098 }, { "epoch": 0.30160496665900205, "grad_norm": 0.78125, "learning_rate": 3.969603692139423e-05, "loss": 1.0062, "step": 4099 }, { "epoch": 0.30167854679236605, "grad_norm": 0.99609375, "learning_rate": 3.969135126510971e-05, "loss": 0.9119, "step": 4100 }, { "epoch": 0.30175212692573006, "grad_norm": 0.89453125, "learning_rate": 3.968666482036812e-05, "loss": 1.28, "step": 4101 }, { "epoch": 0.30182570705909406, "grad_norm": 0.9453125, "learning_rate": 3.968197758742096e-05, "loss": 1.1114, "step": 4102 }, { "epoch": 0.301899287192458, "grad_norm": 0.7734375, "learning_rate": 3.9677289566519796e-05, "loss": 0.7357, "step": 4103 }, { "epoch": 0.301972867325822, "grad_norm": 0.828125, "learning_rate": 3.967260075791622e-05, "loss": 0.5691, "step": 4104 }, { "epoch": 0.302046447459186, "grad_norm": 0.85546875, "learning_rate": 3.966791116186188e-05, "loss": 0.8774, "step": 4105 }, { "epoch": 0.30212002759255, "grad_norm": 0.82421875, "learning_rate": 3.966322077860846e-05, "loss": 1.015, "step": 4106 }, { "epoch": 0.302193607725914, "grad_norm": 0.84375, "learning_rate": 3.965852960840766e-05, "loss": 0.8266, "step": 4107 }, { "epoch": 0.302267187859278, "grad_norm": 0.875, "learning_rate": 3.9653837651511266e-05, "loss": 1.1217, "step": 4108 }, { "epoch": 0.302340767992642, "grad_norm": 0.99609375, "learning_rate": 3.964914490817108e-05, "loss": 0.9724, "step": 4109 }, { "epoch": 0.302414348126006, "grad_norm": 0.8984375, "learning_rate": 3.9644451378638956e-05, "loss": 0.853, "step": 4110 }, { "epoch": 0.30248792825937, "grad_norm": 0.8515625, "learning_rate": 3.963975706316679e-05, "loss": 0.8187, "step": 4111 }, { "epoch": 0.30256150839273394, "grad_norm": 0.73046875, "learning_rate": 3.963506196200651e-05, "loss": 0.979, "step": 4112 }, { "epoch": 0.30263508852609794, "grad_norm": 0.98828125, "learning_rate": 3.96303660754101e-05, "loss": 1.1585, "step": 4113 }, { "epoch": 0.30270866865946194, "grad_norm": 0.921875, "learning_rate": 3.9625669403629574e-05, "loss": 0.923, "step": 4114 }, { "epoch": 0.30278224879282595, "grad_norm": 0.84765625, "learning_rate": 3.9620971946916996e-05, "loss": 0.8744, "step": 4115 }, { "epoch": 0.30285582892618995, "grad_norm": 0.99609375, "learning_rate": 3.961627370552447e-05, "loss": 1.3116, "step": 4116 }, { "epoch": 0.3029294090595539, "grad_norm": 0.69140625, "learning_rate": 3.961157467970413e-05, "loss": 0.8754, "step": 4117 }, { "epoch": 0.3030029891929179, "grad_norm": 0.85546875, "learning_rate": 3.9606874869708186e-05, "loss": 0.8331, "step": 4118 }, { "epoch": 0.3030765693262819, "grad_norm": 1.1875, "learning_rate": 3.960217427578885e-05, "loss": 1.3008, "step": 4119 }, { "epoch": 0.3031501494596459, "grad_norm": 0.7421875, "learning_rate": 3.9597472898198404e-05, "loss": 0.9466, "step": 4120 }, { "epoch": 0.30322372959300986, "grad_norm": 0.8671875, "learning_rate": 3.9592770737189155e-05, "loss": 0.8472, "step": 4121 }, { "epoch": 0.30329730972637386, "grad_norm": 0.91015625, "learning_rate": 3.9588067793013466e-05, "loss": 0.9979, "step": 4122 }, { "epoch": 0.30337088985973787, "grad_norm": 0.859375, "learning_rate": 3.9583364065923727e-05, "loss": 1.0135, "step": 4123 }, { "epoch": 0.3034444699931019, "grad_norm": 0.9375, "learning_rate": 3.9578659556172386e-05, "loss": 1.0402, "step": 4124 }, { "epoch": 0.3035180501264659, "grad_norm": 0.65234375, "learning_rate": 3.957395426401192e-05, "loss": 0.8235, "step": 4125 }, { "epoch": 0.3035916302598298, "grad_norm": 1.0859375, "learning_rate": 3.9569248189694863e-05, "loss": 1.3151, "step": 4126 }, { "epoch": 0.30366521039319383, "grad_norm": 1.0390625, "learning_rate": 3.956454133347376e-05, "loss": 1.1963, "step": 4127 }, { "epoch": 0.30373879052655783, "grad_norm": 0.8828125, "learning_rate": 3.955983369560124e-05, "loss": 1.0767, "step": 4128 }, { "epoch": 0.30381237065992184, "grad_norm": 0.7578125, "learning_rate": 3.955512527632994e-05, "loss": 0.7796, "step": 4129 }, { "epoch": 0.3038859507932858, "grad_norm": 0.7421875, "learning_rate": 3.955041607591256e-05, "loss": 1.0109, "step": 4130 }, { "epoch": 0.3039595309266498, "grad_norm": 0.9921875, "learning_rate": 3.954570609460183e-05, "loss": 1.0324, "step": 4131 }, { "epoch": 0.3040331110600138, "grad_norm": 0.91015625, "learning_rate": 3.954099533265053e-05, "loss": 1.2995, "step": 4132 }, { "epoch": 0.3041066911933778, "grad_norm": 0.77734375, "learning_rate": 3.953628379031147e-05, "loss": 0.7554, "step": 4133 }, { "epoch": 0.3041802713267418, "grad_norm": 0.87109375, "learning_rate": 3.953157146783751e-05, "loss": 0.8913, "step": 4134 }, { "epoch": 0.30425385146010575, "grad_norm": 0.80859375, "learning_rate": 3.952685836548157e-05, "loss": 0.813, "step": 4135 }, { "epoch": 0.30432743159346975, "grad_norm": 0.90234375, "learning_rate": 3.952214448349657e-05, "loss": 1.051, "step": 4136 }, { "epoch": 0.30440101172683376, "grad_norm": 5.65625, "learning_rate": 3.951742982213551e-05, "loss": 0.946, "step": 4137 }, { "epoch": 0.30447459186019776, "grad_norm": 1.078125, "learning_rate": 3.9512714381651406e-05, "loss": 1.1763, "step": 4138 }, { "epoch": 0.3045481719935617, "grad_norm": 0.65234375, "learning_rate": 3.950799816229733e-05, "loss": 0.7493, "step": 4139 }, { "epoch": 0.3046217521269257, "grad_norm": 0.984375, "learning_rate": 3.95032811643264e-05, "loss": 1.0026, "step": 4140 }, { "epoch": 0.3046953322602897, "grad_norm": 0.8203125, "learning_rate": 3.949856338799175e-05, "loss": 1.0317, "step": 4141 }, { "epoch": 0.3047689123936537, "grad_norm": 4.25, "learning_rate": 3.94938448335466e-05, "loss": 0.8519, "step": 4142 }, { "epoch": 0.3048424925270177, "grad_norm": 0.81640625, "learning_rate": 3.948912550124417e-05, "loss": 1.1186, "step": 4143 }, { "epoch": 0.3049160726603817, "grad_norm": 0.6875, "learning_rate": 3.9484405391337744e-05, "loss": 0.6545, "step": 4144 }, { "epoch": 0.3049896527937457, "grad_norm": 0.9609375, "learning_rate": 3.947968450408063e-05, "loss": 1.0666, "step": 4145 }, { "epoch": 0.3050632329271097, "grad_norm": 1.1796875, "learning_rate": 3.947496283972619e-05, "loss": 0.9816, "step": 4146 }, { "epoch": 0.3051368130604737, "grad_norm": 0.84765625, "learning_rate": 3.9470240398527846e-05, "loss": 1.1742, "step": 4147 }, { "epoch": 0.30521039319383764, "grad_norm": 0.7265625, "learning_rate": 3.946551718073903e-05, "loss": 0.8515, "step": 4148 }, { "epoch": 0.30528397332720164, "grad_norm": 0.9609375, "learning_rate": 3.946079318661323e-05, "loss": 1.1281, "step": 4149 }, { "epoch": 0.30535755346056564, "grad_norm": 0.6875, "learning_rate": 3.945606841640397e-05, "loss": 0.5859, "step": 4150 }, { "epoch": 0.30543113359392965, "grad_norm": 0.83984375, "learning_rate": 3.9451342870364816e-05, "loss": 0.6277, "step": 4151 }, { "epoch": 0.30550471372729365, "grad_norm": 1.1953125, "learning_rate": 3.944661654874939e-05, "loss": 1.4611, "step": 4152 }, { "epoch": 0.3055782938606576, "grad_norm": 1.0234375, "learning_rate": 3.944188945181134e-05, "loss": 1.3086, "step": 4153 }, { "epoch": 0.3056518739940216, "grad_norm": 0.93359375, "learning_rate": 3.9437161579804355e-05, "loss": 0.9333, "step": 4154 }, { "epoch": 0.3057254541273856, "grad_norm": 0.78515625, "learning_rate": 3.943243293298218e-05, "loss": 0.8708, "step": 4155 }, { "epoch": 0.3057990342607496, "grad_norm": 0.796875, "learning_rate": 3.942770351159859e-05, "loss": 0.7147, "step": 4156 }, { "epoch": 0.30587261439411356, "grad_norm": 0.88671875, "learning_rate": 3.942297331590739e-05, "loss": 0.7497, "step": 4157 }, { "epoch": 0.30594619452747757, "grad_norm": 1.0859375, "learning_rate": 3.9418242346162474e-05, "loss": 1.3933, "step": 4158 }, { "epoch": 0.30601977466084157, "grad_norm": 0.859375, "learning_rate": 3.9413510602617706e-05, "loss": 1.102, "step": 4159 }, { "epoch": 0.3060933547942056, "grad_norm": 1.0546875, "learning_rate": 3.940877808552706e-05, "loss": 1.2623, "step": 4160 }, { "epoch": 0.3061669349275696, "grad_norm": 0.9140625, "learning_rate": 3.94040447951445e-05, "loss": 1.2528, "step": 4161 }, { "epoch": 0.3062405150609335, "grad_norm": 0.8203125, "learning_rate": 3.939931073172406e-05, "loss": 0.9589, "step": 4162 }, { "epoch": 0.30631409519429753, "grad_norm": 1.015625, "learning_rate": 3.939457589551982e-05, "loss": 1.0826, "step": 4163 }, { "epoch": 0.30638767532766154, "grad_norm": 0.95703125, "learning_rate": 3.938984028678587e-05, "loss": 0.7509, "step": 4164 }, { "epoch": 0.30646125546102554, "grad_norm": 0.87890625, "learning_rate": 3.9385103905776374e-05, "loss": 0.8302, "step": 4165 }, { "epoch": 0.3065348355943895, "grad_norm": 0.86328125, "learning_rate": 3.938036675274552e-05, "loss": 0.9071, "step": 4166 }, { "epoch": 0.3066084157277535, "grad_norm": 0.6484375, "learning_rate": 3.937562882794754e-05, "loss": 0.6937, "step": 4167 }, { "epoch": 0.3066819958611175, "grad_norm": 0.98828125, "learning_rate": 3.937089013163672e-05, "loss": 0.9938, "step": 4168 }, { "epoch": 0.3067555759944815, "grad_norm": 0.671875, "learning_rate": 3.936615066406737e-05, "loss": 0.6215, "step": 4169 }, { "epoch": 0.3068291561278455, "grad_norm": 3.578125, "learning_rate": 3.936141042549384e-05, "loss": 0.8972, "step": 4170 }, { "epoch": 0.30690273626120945, "grad_norm": 1.0546875, "learning_rate": 3.935666941617054e-05, "loss": 0.8855, "step": 4171 }, { "epoch": 0.30697631639457346, "grad_norm": 1.59375, "learning_rate": 3.9351927636351905e-05, "loss": 1.0294, "step": 4172 }, { "epoch": 0.30704989652793746, "grad_norm": 0.94921875, "learning_rate": 3.9347185086292424e-05, "loss": 1.1058, "step": 4173 }, { "epoch": 0.30712347666130146, "grad_norm": 1.0, "learning_rate": 3.934244176624662e-05, "loss": 0.7408, "step": 4174 }, { "epoch": 0.3071970567946654, "grad_norm": 0.86328125, "learning_rate": 3.9337697676469046e-05, "loss": 1.0399, "step": 4175 }, { "epoch": 0.3072706369280294, "grad_norm": 0.83984375, "learning_rate": 3.933295281721433e-05, "loss": 0.7997, "step": 4176 }, { "epoch": 0.3073442170613934, "grad_norm": 0.74609375, "learning_rate": 3.93282071887371e-05, "loss": 0.8517, "step": 4177 }, { "epoch": 0.3074177971947574, "grad_norm": 0.98046875, "learning_rate": 3.9323460791292055e-05, "loss": 1.04, "step": 4178 }, { "epoch": 0.30749137732812143, "grad_norm": 0.91796875, "learning_rate": 3.9318713625133926e-05, "loss": 1.0115, "step": 4179 }, { "epoch": 0.3075649574614854, "grad_norm": 0.85546875, "learning_rate": 3.9313965690517475e-05, "loss": 0.7472, "step": 4180 }, { "epoch": 0.3076385375948494, "grad_norm": 0.8671875, "learning_rate": 3.930921698769752e-05, "loss": 0.9818, "step": 4181 }, { "epoch": 0.3077121177282134, "grad_norm": 0.93359375, "learning_rate": 3.930446751692892e-05, "loss": 1.0383, "step": 4182 }, { "epoch": 0.3077856978615774, "grad_norm": 1.046875, "learning_rate": 3.9299717278466566e-05, "loss": 1.16, "step": 4183 }, { "epoch": 0.30785927799494134, "grad_norm": 0.8671875, "learning_rate": 3.929496627256539e-05, "loss": 0.9647, "step": 4184 }, { "epoch": 0.30793285812830534, "grad_norm": 0.87109375, "learning_rate": 3.929021449948037e-05, "loss": 0.8962, "step": 4185 }, { "epoch": 0.30800643826166935, "grad_norm": 0.76171875, "learning_rate": 3.928546195946654e-05, "loss": 1.3702, "step": 4186 }, { "epoch": 0.30808001839503335, "grad_norm": 0.9140625, "learning_rate": 3.928070865277894e-05, "loss": 0.9198, "step": 4187 }, { "epoch": 0.30815359852839735, "grad_norm": 0.82421875, "learning_rate": 3.927595457967268e-05, "loss": 0.8546, "step": 4188 }, { "epoch": 0.3082271786617613, "grad_norm": 0.9921875, "learning_rate": 3.92711997404029e-05, "loss": 1.02, "step": 4189 }, { "epoch": 0.3083007587951253, "grad_norm": 0.640625, "learning_rate": 3.9266444135224786e-05, "loss": 0.5583, "step": 4190 }, { "epoch": 0.3083743389284893, "grad_norm": 0.7734375, "learning_rate": 3.926168776439356e-05, "loss": 0.9121, "step": 4191 }, { "epoch": 0.3084479190618533, "grad_norm": 0.89453125, "learning_rate": 3.925693062816449e-05, "loss": 0.7983, "step": 4192 }, { "epoch": 0.30852149919521726, "grad_norm": 0.91015625, "learning_rate": 3.925217272679288e-05, "loss": 1.0658, "step": 4193 }, { "epoch": 0.30859507932858127, "grad_norm": 0.90625, "learning_rate": 3.924741406053407e-05, "loss": 1.1073, "step": 4194 }, { "epoch": 0.30866865946194527, "grad_norm": 0.96484375, "learning_rate": 3.924265462964347e-05, "loss": 1.1864, "step": 4195 }, { "epoch": 0.3087422395953093, "grad_norm": 0.96484375, "learning_rate": 3.923789443437649e-05, "loss": 0.9088, "step": 4196 }, { "epoch": 0.3088158197286733, "grad_norm": 0.765625, "learning_rate": 3.923313347498861e-05, "loss": 0.7344, "step": 4197 }, { "epoch": 0.30888939986203723, "grad_norm": 0.7109375, "learning_rate": 3.9228371751735336e-05, "loss": 0.8802, "step": 4198 }, { "epoch": 0.30896297999540123, "grad_norm": 0.7890625, "learning_rate": 3.922360926487223e-05, "loss": 0.6581, "step": 4199 }, { "epoch": 0.30903656012876524, "grad_norm": 0.95703125, "learning_rate": 3.921884601465487e-05, "loss": 1.1206, "step": 4200 }, { "epoch": 0.30911014026212924, "grad_norm": 1.0234375, "learning_rate": 3.921408200133891e-05, "loss": 1.3348, "step": 4201 }, { "epoch": 0.30918372039549324, "grad_norm": 0.890625, "learning_rate": 3.9209317225180006e-05, "loss": 0.9765, "step": 4202 }, { "epoch": 0.3092573005288572, "grad_norm": 0.87890625, "learning_rate": 3.920455168643389e-05, "loss": 1.094, "step": 4203 }, { "epoch": 0.3093308806622212, "grad_norm": 0.703125, "learning_rate": 3.9199785385356314e-05, "loss": 0.9887, "step": 4204 }, { "epoch": 0.3094044607955852, "grad_norm": 0.81640625, "learning_rate": 3.919501832220307e-05, "loss": 1.0445, "step": 4205 }, { "epoch": 0.3094780409289492, "grad_norm": 0.91015625, "learning_rate": 3.919025049723001e-05, "loss": 0.7977, "step": 4206 }, { "epoch": 0.30955162106231315, "grad_norm": 1.046875, "learning_rate": 3.9185481910693004e-05, "loss": 1.2536, "step": 4207 }, { "epoch": 0.30962520119567716, "grad_norm": 0.98046875, "learning_rate": 3.9180712562847974e-05, "loss": 1.1189, "step": 4208 }, { "epoch": 0.30969878132904116, "grad_norm": 1.0703125, "learning_rate": 3.917594245395089e-05, "loss": 1.6528, "step": 4209 }, { "epoch": 0.30977236146240517, "grad_norm": 1.0, "learning_rate": 3.917117158425774e-05, "loss": 1.353, "step": 4210 }, { "epoch": 0.30984594159576917, "grad_norm": 1.03125, "learning_rate": 3.916639995402459e-05, "loss": 0.9726, "step": 4211 }, { "epoch": 0.3099195217291331, "grad_norm": 0.85546875, "learning_rate": 3.9161627563507494e-05, "loss": 0.8628, "step": 4212 }, { "epoch": 0.3099931018624971, "grad_norm": 0.83203125, "learning_rate": 3.915685441296261e-05, "loss": 0.8632, "step": 4213 }, { "epoch": 0.3100666819958611, "grad_norm": 0.83203125, "learning_rate": 3.915208050264608e-05, "loss": 0.9593, "step": 4214 }, { "epoch": 0.31014026212922513, "grad_norm": 0.69140625, "learning_rate": 3.914730583281412e-05, "loss": 0.7832, "step": 4215 }, { "epoch": 0.3102138422625891, "grad_norm": 0.8671875, "learning_rate": 3.9142530403722976e-05, "loss": 1.0504, "step": 4216 }, { "epoch": 0.3102874223959531, "grad_norm": 1.0625, "learning_rate": 3.9137754215628944e-05, "loss": 0.9269, "step": 4217 }, { "epoch": 0.3103610025293171, "grad_norm": 0.859375, "learning_rate": 3.913297726878834e-05, "loss": 0.8606, "step": 4218 }, { "epoch": 0.3104345826626811, "grad_norm": 0.71484375, "learning_rate": 3.912819956345754e-05, "loss": 0.852, "step": 4219 }, { "epoch": 0.3105081627960451, "grad_norm": 0.859375, "learning_rate": 3.912342109989296e-05, "loss": 0.9616, "step": 4220 }, { "epoch": 0.31058174292940904, "grad_norm": 1.0078125, "learning_rate": 3.911864187835103e-05, "loss": 1.1499, "step": 4221 }, { "epoch": 0.31065532306277305, "grad_norm": 0.9375, "learning_rate": 3.911386189908826e-05, "loss": 0.7484, "step": 4222 }, { "epoch": 0.31072890319613705, "grad_norm": 0.796875, "learning_rate": 3.910908116236118e-05, "loss": 0.7645, "step": 4223 }, { "epoch": 0.31080248332950106, "grad_norm": 0.95703125, "learning_rate": 3.9104299668426375e-05, "loss": 0.8799, "step": 4224 }, { "epoch": 0.310876063462865, "grad_norm": 1.234375, "learning_rate": 3.909951741754043e-05, "loss": 1.3231, "step": 4225 }, { "epoch": 0.310949643596229, "grad_norm": 0.83203125, "learning_rate": 3.9094734409960026e-05, "loss": 0.8432, "step": 4226 }, { "epoch": 0.311023223729593, "grad_norm": 1.3828125, "learning_rate": 3.908995064594185e-05, "loss": 1.1038, "step": 4227 }, { "epoch": 0.311096803862957, "grad_norm": 0.9609375, "learning_rate": 3.908516612574262e-05, "loss": 0.8895, "step": 4228 }, { "epoch": 0.311170383996321, "grad_norm": 0.87109375, "learning_rate": 3.908038084961914e-05, "loss": 0.944, "step": 4229 }, { "epoch": 0.31124396412968497, "grad_norm": 0.74609375, "learning_rate": 3.907559481782821e-05, "loss": 0.7459, "step": 4230 }, { "epoch": 0.311317544263049, "grad_norm": 0.80078125, "learning_rate": 3.907080803062669e-05, "loss": 0.8815, "step": 4231 }, { "epoch": 0.311391124396413, "grad_norm": 0.9765625, "learning_rate": 3.906602048827148e-05, "loss": 1.0102, "step": 4232 }, { "epoch": 0.311464704529777, "grad_norm": 0.703125, "learning_rate": 3.906123219101952e-05, "loss": 0.9443, "step": 4233 }, { "epoch": 0.31153828466314093, "grad_norm": 0.84765625, "learning_rate": 3.905644313912778e-05, "loss": 0.8356, "step": 4234 }, { "epoch": 0.31161186479650493, "grad_norm": 1.015625, "learning_rate": 3.905165333285329e-05, "loss": 1.1552, "step": 4235 }, { "epoch": 0.31168544492986894, "grad_norm": 1.0234375, "learning_rate": 3.904686277245311e-05, "loss": 1.1431, "step": 4236 }, { "epoch": 0.31175902506323294, "grad_norm": 0.81640625, "learning_rate": 3.9042071458184323e-05, "loss": 0.6391, "step": 4237 }, { "epoch": 0.31183260519659695, "grad_norm": 0.82421875, "learning_rate": 3.903727939030409e-05, "loss": 0.7324, "step": 4238 }, { "epoch": 0.3119061853299609, "grad_norm": 0.6484375, "learning_rate": 3.903248656906958e-05, "loss": 0.7205, "step": 4239 }, { "epoch": 0.3119797654633249, "grad_norm": 0.94921875, "learning_rate": 3.902769299473803e-05, "loss": 0.7757, "step": 4240 }, { "epoch": 0.3120533455966889, "grad_norm": 0.68359375, "learning_rate": 3.9022898667566686e-05, "loss": 0.7457, "step": 4241 }, { "epoch": 0.3121269257300529, "grad_norm": 0.8671875, "learning_rate": 3.901810358781286e-05, "loss": 1.1793, "step": 4242 }, { "epoch": 0.31220050586341686, "grad_norm": 0.9921875, "learning_rate": 3.901330775573389e-05, "loss": 0.9182, "step": 4243 }, { "epoch": 0.31227408599678086, "grad_norm": 0.93359375, "learning_rate": 3.9008511171587145e-05, "loss": 1.2152, "step": 4244 }, { "epoch": 0.31234766613014486, "grad_norm": 0.94140625, "learning_rate": 3.900371383563008e-05, "loss": 0.9771, "step": 4245 }, { "epoch": 0.31242124626350887, "grad_norm": 0.8828125, "learning_rate": 3.899891574812014e-05, "loss": 0.7187, "step": 4246 }, { "epoch": 0.31249482639687287, "grad_norm": 0.921875, "learning_rate": 3.899411690931482e-05, "loss": 1.0377, "step": 4247 }, { "epoch": 0.3125684065302368, "grad_norm": 0.8125, "learning_rate": 3.89893173194717e-05, "loss": 1.0612, "step": 4248 }, { "epoch": 0.3126419866636008, "grad_norm": 1.78125, "learning_rate": 3.8984516978848326e-05, "loss": 1.1835, "step": 4249 }, { "epoch": 0.31271556679696483, "grad_norm": 0.8828125, "learning_rate": 3.8979715887702336e-05, "loss": 1.229, "step": 4250 }, { "epoch": 0.31278914693032883, "grad_norm": 0.96875, "learning_rate": 3.89749140462914e-05, "loss": 0.6719, "step": 4251 }, { "epoch": 0.3128627270636928, "grad_norm": 0.83984375, "learning_rate": 3.8970111454873225e-05, "loss": 0.9952, "step": 4252 }, { "epoch": 0.3129363071970568, "grad_norm": 0.9296875, "learning_rate": 3.8965308113705553e-05, "loss": 0.7742, "step": 4253 }, { "epoch": 0.3130098873304208, "grad_norm": 0.74609375, "learning_rate": 3.896050402304618e-05, "loss": 0.9227, "step": 4254 }, { "epoch": 0.3130834674637848, "grad_norm": 0.80859375, "learning_rate": 3.895569918315292e-05, "loss": 1.1841, "step": 4255 }, { "epoch": 0.3131570475971488, "grad_norm": 0.890625, "learning_rate": 3.8950893594283636e-05, "loss": 1.2208, "step": 4256 }, { "epoch": 0.31323062773051275, "grad_norm": 0.7578125, "learning_rate": 3.894608725669624e-05, "loss": 0.7798, "step": 4257 }, { "epoch": 0.31330420786387675, "grad_norm": 0.78125, "learning_rate": 3.894128017064869e-05, "loss": 0.8113, "step": 4258 }, { "epoch": 0.31337778799724075, "grad_norm": 0.8515625, "learning_rate": 3.8936472336398965e-05, "loss": 1.0558, "step": 4259 }, { "epoch": 0.31345136813060476, "grad_norm": 0.8359375, "learning_rate": 3.8931663754205086e-05, "loss": 0.8551, "step": 4260 }, { "epoch": 0.3135249482639687, "grad_norm": 1.4375, "learning_rate": 3.8926854424325135e-05, "loss": 0.9985, "step": 4261 }, { "epoch": 0.3135985283973327, "grad_norm": 0.72265625, "learning_rate": 3.89220443470172e-05, "loss": 0.6664, "step": 4262 }, { "epoch": 0.3136721085306967, "grad_norm": 0.76171875, "learning_rate": 3.891723352253944e-05, "loss": 0.9043, "step": 4263 }, { "epoch": 0.3137456886640607, "grad_norm": 0.87890625, "learning_rate": 3.8912421951150055e-05, "loss": 1.2252, "step": 4264 }, { "epoch": 0.3138192687974247, "grad_norm": 0.73828125, "learning_rate": 3.890760963310725e-05, "loss": 0.7947, "step": 4265 }, { "epoch": 0.31389284893078867, "grad_norm": 1.0, "learning_rate": 3.890279656866931e-05, "loss": 0.916, "step": 4266 }, { "epoch": 0.3139664290641527, "grad_norm": 0.71484375, "learning_rate": 3.889798275809453e-05, "loss": 0.6326, "step": 4267 }, { "epoch": 0.3140400091975167, "grad_norm": 0.7578125, "learning_rate": 3.889316820164127e-05, "loss": 0.8687, "step": 4268 }, { "epoch": 0.3141135893308807, "grad_norm": 0.92578125, "learning_rate": 3.888835289956792e-05, "loss": 0.9091, "step": 4269 }, { "epoch": 0.31418716946424463, "grad_norm": 0.890625, "learning_rate": 3.888353685213289e-05, "loss": 1.1087, "step": 4270 }, { "epoch": 0.31426074959760864, "grad_norm": 0.8359375, "learning_rate": 3.887872005959466e-05, "loss": 1.0186, "step": 4271 }, { "epoch": 0.31433432973097264, "grad_norm": 0.76171875, "learning_rate": 3.887390252221174e-05, "loss": 0.7486, "step": 4272 }, { "epoch": 0.31440790986433664, "grad_norm": 0.8828125, "learning_rate": 3.886908424024268e-05, "loss": 1.0472, "step": 4273 }, { "epoch": 0.31448148999770065, "grad_norm": 0.71875, "learning_rate": 3.886426521394606e-05, "loss": 0.5371, "step": 4274 }, { "epoch": 0.3145550701310646, "grad_norm": 0.96875, "learning_rate": 3.885944544358051e-05, "loss": 0.7701, "step": 4275 }, { "epoch": 0.3146286502644286, "grad_norm": 0.8984375, "learning_rate": 3.8854624929404704e-05, "loss": 1.0232, "step": 4276 }, { "epoch": 0.3147022303977926, "grad_norm": 0.86328125, "learning_rate": 3.8849803671677344e-05, "loss": 1.0513, "step": 4277 }, { "epoch": 0.3147758105311566, "grad_norm": 0.95703125, "learning_rate": 3.8844981670657174e-05, "loss": 0.8778, "step": 4278 }, { "epoch": 0.31484939066452056, "grad_norm": 0.89453125, "learning_rate": 3.8840158926603e-05, "loss": 0.7227, "step": 4279 }, { "epoch": 0.31492297079788456, "grad_norm": 1.3046875, "learning_rate": 3.8835335439773624e-05, "loss": 1.1575, "step": 4280 }, { "epoch": 0.31499655093124856, "grad_norm": 0.92578125, "learning_rate": 3.883051121042793e-05, "loss": 1.0151, "step": 4281 }, { "epoch": 0.31507013106461257, "grad_norm": 0.6875, "learning_rate": 3.882568623882482e-05, "loss": 0.4754, "step": 4282 }, { "epoch": 0.3151437111979766, "grad_norm": 0.90625, "learning_rate": 3.882086052522325e-05, "loss": 1.3109, "step": 4283 }, { "epoch": 0.3152172913313405, "grad_norm": 0.7265625, "learning_rate": 3.881603406988219e-05, "loss": 0.7013, "step": 4284 }, { "epoch": 0.3152908714647045, "grad_norm": 0.890625, "learning_rate": 3.881120687306068e-05, "loss": 1.0863, "step": 4285 }, { "epoch": 0.31536445159806853, "grad_norm": 0.99609375, "learning_rate": 3.8806378935017785e-05, "loss": 0.7815, "step": 4286 }, { "epoch": 0.31543803173143253, "grad_norm": 0.94140625, "learning_rate": 3.8801550256012606e-05, "loss": 0.9918, "step": 4287 }, { "epoch": 0.3155116118647965, "grad_norm": 0.734375, "learning_rate": 3.87967208363043e-05, "loss": 0.7851, "step": 4288 }, { "epoch": 0.3155851919981605, "grad_norm": 1.8359375, "learning_rate": 3.8791890676152036e-05, "loss": 1.0747, "step": 4289 }, { "epoch": 0.3156587721315245, "grad_norm": 0.78515625, "learning_rate": 3.8787059775815055e-05, "loss": 1.0735, "step": 4290 }, { "epoch": 0.3157323522648885, "grad_norm": 0.734375, "learning_rate": 3.878222813555261e-05, "loss": 0.8061, "step": 4291 }, { "epoch": 0.3158059323982525, "grad_norm": 1.109375, "learning_rate": 3.877739575562401e-05, "loss": 1.1215, "step": 4292 }, { "epoch": 0.31587951253161645, "grad_norm": 0.79296875, "learning_rate": 3.8772562636288614e-05, "loss": 0.868, "step": 4293 }, { "epoch": 0.31595309266498045, "grad_norm": 0.94921875, "learning_rate": 3.876772877780578e-05, "loss": 1.1162, "step": 4294 }, { "epoch": 0.31602667279834445, "grad_norm": 0.703125, "learning_rate": 3.8762894180434956e-05, "loss": 0.6902, "step": 4295 }, { "epoch": 0.31610025293170846, "grad_norm": 0.8203125, "learning_rate": 3.875805884443559e-05, "loss": 0.8879, "step": 4296 }, { "epoch": 0.3161738330650724, "grad_norm": 0.76171875, "learning_rate": 3.875322277006719e-05, "loss": 0.8738, "step": 4297 }, { "epoch": 0.3162474131984364, "grad_norm": 0.796875, "learning_rate": 3.874838595758931e-05, "loss": 0.9271, "step": 4298 }, { "epoch": 0.3163209933318004, "grad_norm": 0.78125, "learning_rate": 3.874354840726151e-05, "loss": 0.8894, "step": 4299 }, { "epoch": 0.3163945734651644, "grad_norm": 0.7734375, "learning_rate": 3.8738710119343435e-05, "loss": 0.9597, "step": 4300 }, { "epoch": 0.3164681535985284, "grad_norm": 1.125, "learning_rate": 3.873387109409473e-05, "loss": 0.971, "step": 4301 }, { "epoch": 0.31654173373189237, "grad_norm": 0.875, "learning_rate": 3.872903133177511e-05, "loss": 0.867, "step": 4302 }, { "epoch": 0.3166153138652564, "grad_norm": 1.1015625, "learning_rate": 3.8724190832644294e-05, "loss": 1.3698, "step": 4303 }, { "epoch": 0.3166888939986204, "grad_norm": 1.0390625, "learning_rate": 3.87193495969621e-05, "loss": 1.3375, "step": 4304 }, { "epoch": 0.3167624741319844, "grad_norm": 1.046875, "learning_rate": 3.8714507624988304e-05, "loss": 1.3526, "step": 4305 }, { "epoch": 0.31683605426534833, "grad_norm": 1.09375, "learning_rate": 3.87096649169828e-05, "loss": 1.6838, "step": 4306 }, { "epoch": 0.31690963439871234, "grad_norm": 0.6328125, "learning_rate": 3.8704821473205466e-05, "loss": 0.5086, "step": 4307 }, { "epoch": 0.31698321453207634, "grad_norm": 0.83984375, "learning_rate": 3.869997729391625e-05, "loss": 1.0475, "step": 4308 }, { "epoch": 0.31705679466544034, "grad_norm": 0.87890625, "learning_rate": 3.869513237937513e-05, "loss": 0.8904, "step": 4309 }, { "epoch": 0.31713037479880435, "grad_norm": 0.67578125, "learning_rate": 3.869028672984212e-05, "loss": 0.7615, "step": 4310 }, { "epoch": 0.3172039549321683, "grad_norm": 0.96875, "learning_rate": 3.868544034557728e-05, "loss": 1.0894, "step": 4311 }, { "epoch": 0.3172775350655323, "grad_norm": 0.91015625, "learning_rate": 3.86805932268407e-05, "loss": 0.9805, "step": 4312 }, { "epoch": 0.3173511151988963, "grad_norm": 0.984375, "learning_rate": 3.867574537389253e-05, "loss": 0.9955, "step": 4313 }, { "epoch": 0.3174246953322603, "grad_norm": 0.8828125, "learning_rate": 3.8670896786992926e-05, "loss": 1.1447, "step": 4314 }, { "epoch": 0.31749827546562426, "grad_norm": 0.85546875, "learning_rate": 3.8666047466402125e-05, "loss": 1.008, "step": 4315 }, { "epoch": 0.31757185559898826, "grad_norm": 0.87109375, "learning_rate": 3.8661197412380356e-05, "loss": 1.0325, "step": 4316 }, { "epoch": 0.31764543573235227, "grad_norm": 0.92578125, "learning_rate": 3.865634662518792e-05, "loss": 0.8053, "step": 4317 }, { "epoch": 0.31771901586571627, "grad_norm": 0.84765625, "learning_rate": 3.8651495105085164e-05, "loss": 1.0136, "step": 4318 }, { "epoch": 0.3177925959990803, "grad_norm": 0.91015625, "learning_rate": 3.864664285233245e-05, "loss": 0.8909, "step": 4319 }, { "epoch": 0.3178661761324442, "grad_norm": 0.79296875, "learning_rate": 3.864178986719019e-05, "loss": 0.923, "step": 4320 }, { "epoch": 0.3179397562658082, "grad_norm": 0.9375, "learning_rate": 3.863693614991883e-05, "loss": 1.127, "step": 4321 }, { "epoch": 0.31801333639917223, "grad_norm": 0.78125, "learning_rate": 3.8632081700778865e-05, "loss": 0.8364, "step": 4322 }, { "epoch": 0.31808691653253623, "grad_norm": 1.109375, "learning_rate": 3.8627226520030816e-05, "loss": 1.1214, "step": 4323 }, { "epoch": 0.3181604966659002, "grad_norm": 0.80859375, "learning_rate": 3.862237060793526e-05, "loss": 0.765, "step": 4324 }, { "epoch": 0.3182340767992642, "grad_norm": 0.78125, "learning_rate": 3.861751396475281e-05, "loss": 0.951, "step": 4325 }, { "epoch": 0.3183076569326282, "grad_norm": 1.0078125, "learning_rate": 3.86126565907441e-05, "loss": 0.9109, "step": 4326 }, { "epoch": 0.3183812370659922, "grad_norm": 0.95703125, "learning_rate": 3.860779848616982e-05, "loss": 0.7895, "step": 4327 }, { "epoch": 0.3184548171993562, "grad_norm": 0.8046875, "learning_rate": 3.86029396512907e-05, "loss": 0.8631, "step": 4328 }, { "epoch": 0.31852839733272015, "grad_norm": 0.78515625, "learning_rate": 3.85980800863675e-05, "loss": 0.6359, "step": 4329 }, { "epoch": 0.31860197746608415, "grad_norm": 1.03125, "learning_rate": 3.859321979166102e-05, "loss": 1.3715, "step": 4330 }, { "epoch": 0.31867555759944816, "grad_norm": 0.90625, "learning_rate": 3.858835876743211e-05, "loss": 0.9113, "step": 4331 }, { "epoch": 0.31874913773281216, "grad_norm": 0.73828125, "learning_rate": 3.858349701394166e-05, "loss": 1.0455, "step": 4332 }, { "epoch": 0.3188227178661761, "grad_norm": 0.82421875, "learning_rate": 3.857863453145057e-05, "loss": 0.9149, "step": 4333 }, { "epoch": 0.3188962979995401, "grad_norm": 0.84375, "learning_rate": 3.857377132021982e-05, "loss": 0.84, "step": 4334 }, { "epoch": 0.3189698781329041, "grad_norm": 0.69140625, "learning_rate": 3.856890738051039e-05, "loss": 0.6142, "step": 4335 }, { "epoch": 0.3190434582662681, "grad_norm": 0.93359375, "learning_rate": 3.856404271258334e-05, "loss": 0.9173, "step": 4336 }, { "epoch": 0.3191170383996321, "grad_norm": 0.7578125, "learning_rate": 3.855917731669972e-05, "loss": 0.9181, "step": 4337 }, { "epoch": 0.3191906185329961, "grad_norm": 0.86328125, "learning_rate": 3.8554311193120674e-05, "loss": 0.9264, "step": 4338 }, { "epoch": 0.3192641986663601, "grad_norm": 1.0, "learning_rate": 3.8549444342107345e-05, "loss": 1.0953, "step": 4339 }, { "epoch": 0.3193377787997241, "grad_norm": 0.9765625, "learning_rate": 3.8544576763920934e-05, "loss": 0.9389, "step": 4340 }, { "epoch": 0.3194113589330881, "grad_norm": 0.8046875, "learning_rate": 3.8539708458822665e-05, "loss": 0.9894, "step": 4341 }, { "epoch": 0.31948493906645203, "grad_norm": 0.96875, "learning_rate": 3.853483942707382e-05, "loss": 0.7656, "step": 4342 }, { "epoch": 0.31955851919981604, "grad_norm": 0.76953125, "learning_rate": 3.85299696689357e-05, "loss": 0.6863, "step": 4343 }, { "epoch": 0.31963209933318004, "grad_norm": 0.99609375, "learning_rate": 3.852509918466967e-05, "loss": 1.1532, "step": 4344 }, { "epoch": 0.31970567946654405, "grad_norm": 0.84765625, "learning_rate": 3.8520227974537106e-05, "loss": 0.8935, "step": 4345 }, { "epoch": 0.31977925959990805, "grad_norm": 0.81640625, "learning_rate": 3.851535603879944e-05, "loss": 0.7089, "step": 4346 }, { "epoch": 0.319852839733272, "grad_norm": 0.86328125, "learning_rate": 3.851048337771815e-05, "loss": 0.975, "step": 4347 }, { "epoch": 0.319926419866636, "grad_norm": 1.5546875, "learning_rate": 3.850560999155473e-05, "loss": 1.3401, "step": 4348 }, { "epoch": 0.32, "grad_norm": 0.99609375, "learning_rate": 3.8500735880570734e-05, "loss": 1.1325, "step": 4349 }, { "epoch": 0.320073580133364, "grad_norm": 0.7109375, "learning_rate": 3.849586104502774e-05, "loss": 0.852, "step": 4350 }, { "epoch": 0.32014716026672796, "grad_norm": 0.87109375, "learning_rate": 3.849098548518738e-05, "loss": 0.7993, "step": 4351 }, { "epoch": 0.32022074040009196, "grad_norm": 0.7578125, "learning_rate": 3.84861092013113e-05, "loss": 0.8838, "step": 4352 }, { "epoch": 0.32029432053345597, "grad_norm": 1.0, "learning_rate": 3.848123219366121e-05, "loss": 0.8858, "step": 4353 }, { "epoch": 0.32036790066681997, "grad_norm": 0.91015625, "learning_rate": 3.847635446249886e-05, "loss": 0.8729, "step": 4354 }, { "epoch": 0.320441480800184, "grad_norm": 0.86328125, "learning_rate": 3.8471476008086014e-05, "loss": 0.689, "step": 4355 }, { "epoch": 0.3205150609335479, "grad_norm": 1.0, "learning_rate": 3.84665968306845e-05, "loss": 1.2473, "step": 4356 }, { "epoch": 0.32058864106691193, "grad_norm": 0.765625, "learning_rate": 3.8461716930556164e-05, "loss": 0.8909, "step": 4357 }, { "epoch": 0.32066222120027593, "grad_norm": 1.0234375, "learning_rate": 3.845683630796291e-05, "loss": 1.0895, "step": 4358 }, { "epoch": 0.32073580133363994, "grad_norm": 0.91796875, "learning_rate": 3.845195496316666e-05, "loss": 1.2606, "step": 4359 }, { "epoch": 0.3208093814670039, "grad_norm": 0.7890625, "learning_rate": 3.844707289642939e-05, "loss": 0.7466, "step": 4360 }, { "epoch": 0.3208829616003679, "grad_norm": 1.0390625, "learning_rate": 3.8442190108013135e-05, "loss": 1.5815, "step": 4361 }, { "epoch": 0.3209565417337319, "grad_norm": 0.734375, "learning_rate": 3.843730659817991e-05, "loss": 0.8769, "step": 4362 }, { "epoch": 0.3210301218670959, "grad_norm": 0.83203125, "learning_rate": 3.843242236719182e-05, "loss": 0.7549, "step": 4363 }, { "epoch": 0.3211037020004599, "grad_norm": 0.74609375, "learning_rate": 3.8427537415311e-05, "loss": 0.7548, "step": 4364 }, { "epoch": 0.32117728213382385, "grad_norm": 0.7421875, "learning_rate": 3.84226517427996e-05, "loss": 0.6908, "step": 4365 }, { "epoch": 0.32125086226718785, "grad_norm": 0.90234375, "learning_rate": 3.8417765349919835e-05, "loss": 0.8573, "step": 4366 }, { "epoch": 0.32132444240055186, "grad_norm": 0.828125, "learning_rate": 3.8412878236933946e-05, "loss": 0.8099, "step": 4367 }, { "epoch": 0.32139802253391586, "grad_norm": 0.7890625, "learning_rate": 3.840799040410422e-05, "loss": 0.7745, "step": 4368 }, { "epoch": 0.3214716026672798, "grad_norm": 0.90234375, "learning_rate": 3.840310185169297e-05, "loss": 0.8537, "step": 4369 }, { "epoch": 0.3215451828006438, "grad_norm": 0.703125, "learning_rate": 3.839821257996256e-05, "loss": 0.6836, "step": 4370 }, { "epoch": 0.3216187629340078, "grad_norm": 0.71875, "learning_rate": 3.839332258917539e-05, "loss": 0.7052, "step": 4371 }, { "epoch": 0.3216923430673718, "grad_norm": 1.2578125, "learning_rate": 3.838843187959388e-05, "loss": 1.4513, "step": 4372 }, { "epoch": 0.3217659232007358, "grad_norm": 0.88671875, "learning_rate": 3.8383540451480527e-05, "loss": 0.8022, "step": 4373 }, { "epoch": 0.3218395033340998, "grad_norm": 0.89453125, "learning_rate": 3.837864830509783e-05, "loss": 1.1341, "step": 4374 }, { "epoch": 0.3219130834674638, "grad_norm": 0.875, "learning_rate": 3.837375544070836e-05, "loss": 0.7469, "step": 4375 }, { "epoch": 0.3219866636008278, "grad_norm": 0.7890625, "learning_rate": 3.8368861858574684e-05, "loss": 0.8379, "step": 4376 }, { "epoch": 0.3220602437341918, "grad_norm": 0.828125, "learning_rate": 3.8363967558959444e-05, "loss": 1.1046, "step": 4377 }, { "epoch": 0.32213382386755574, "grad_norm": 0.69140625, "learning_rate": 3.8359072542125305e-05, "loss": 0.7402, "step": 4378 }, { "epoch": 0.32220740400091974, "grad_norm": 1.0390625, "learning_rate": 3.835417680833499e-05, "loss": 1.3135, "step": 4379 }, { "epoch": 0.32228098413428374, "grad_norm": 1.140625, "learning_rate": 3.83492803578512e-05, "loss": 1.6153, "step": 4380 }, { "epoch": 0.32235456426764775, "grad_norm": 2.71875, "learning_rate": 3.834438319093676e-05, "loss": 0.6938, "step": 4381 }, { "epoch": 0.32242814440101175, "grad_norm": 0.80859375, "learning_rate": 3.8339485307854484e-05, "loss": 1.081, "step": 4382 }, { "epoch": 0.3225017245343757, "grad_norm": 0.921875, "learning_rate": 3.833458670886723e-05, "loss": 0.8331, "step": 4383 }, { "epoch": 0.3225753046677397, "grad_norm": 0.70703125, "learning_rate": 3.8329687394237876e-05, "loss": 0.7388, "step": 4384 }, { "epoch": 0.3226488848011037, "grad_norm": 1.0546875, "learning_rate": 3.8324787364229386e-05, "loss": 1.7329, "step": 4385 }, { "epoch": 0.3227224649344677, "grad_norm": 1.1484375, "learning_rate": 3.831988661910473e-05, "loss": 1.2951, "step": 4386 }, { "epoch": 0.32279604506783166, "grad_norm": 0.8671875, "learning_rate": 3.83149851591269e-05, "loss": 0.8811, "step": 4387 }, { "epoch": 0.32286962520119566, "grad_norm": 0.71875, "learning_rate": 3.8310082984558973e-05, "loss": 0.7405, "step": 4388 }, { "epoch": 0.32294320533455967, "grad_norm": 0.8046875, "learning_rate": 3.830518009566404e-05, "loss": 0.7229, "step": 4389 }, { "epoch": 0.3230167854679237, "grad_norm": 1.0234375, "learning_rate": 3.830027649270521e-05, "loss": 1.0959, "step": 4390 }, { "epoch": 0.3230903656012877, "grad_norm": 1.0546875, "learning_rate": 3.829537217594567e-05, "loss": 1.6317, "step": 4391 }, { "epoch": 0.3231639457346516, "grad_norm": 0.95703125, "learning_rate": 3.8290467145648615e-05, "loss": 0.97, "step": 4392 }, { "epoch": 0.32323752586801563, "grad_norm": 0.859375, "learning_rate": 3.828556140207729e-05, "loss": 1.2009, "step": 4393 }, { "epoch": 0.32331110600137963, "grad_norm": 0.88671875, "learning_rate": 3.828065494549497e-05, "loss": 1.0788, "step": 4394 }, { "epoch": 0.32338468613474364, "grad_norm": 0.9296875, "learning_rate": 3.827574777616499e-05, "loss": 0.824, "step": 4395 }, { "epoch": 0.3234582662681076, "grad_norm": 0.69140625, "learning_rate": 3.8270839894350694e-05, "loss": 0.7157, "step": 4396 }, { "epoch": 0.3235318464014716, "grad_norm": 0.8359375, "learning_rate": 3.826593130031549e-05, "loss": 0.9199, "step": 4397 }, { "epoch": 0.3236054265348356, "grad_norm": 0.95703125, "learning_rate": 3.826102199432282e-05, "loss": 1.3482, "step": 4398 }, { "epoch": 0.3236790066681996, "grad_norm": 0.80078125, "learning_rate": 3.8256111976636135e-05, "loss": 0.672, "step": 4399 }, { "epoch": 0.3237525868015636, "grad_norm": 0.93359375, "learning_rate": 3.825120124751895e-05, "loss": 1.0889, "step": 4400 }, { "epoch": 0.32382616693492755, "grad_norm": 0.7734375, "learning_rate": 3.8246289807234837e-05, "loss": 0.9362, "step": 4401 }, { "epoch": 0.32389974706829155, "grad_norm": 0.73828125, "learning_rate": 3.824137765604736e-05, "loss": 0.558, "step": 4402 }, { "epoch": 0.32397332720165556, "grad_norm": 0.6875, "learning_rate": 3.823646479422016e-05, "loss": 0.6618, "step": 4403 }, { "epoch": 0.32404690733501956, "grad_norm": 0.78125, "learning_rate": 3.8231551222016884e-05, "loss": 0.7082, "step": 4404 }, { "epoch": 0.3241204874683835, "grad_norm": 0.7890625, "learning_rate": 3.822663693970126e-05, "loss": 0.8813, "step": 4405 }, { "epoch": 0.3241940676017475, "grad_norm": 0.953125, "learning_rate": 3.8221721947537e-05, "loss": 0.7983, "step": 4406 }, { "epoch": 0.3242676477351115, "grad_norm": 0.921875, "learning_rate": 3.82168062457879e-05, "loss": 0.8671, "step": 4407 }, { "epoch": 0.3243412278684755, "grad_norm": 0.70703125, "learning_rate": 3.8211889834717775e-05, "loss": 0.64, "step": 4408 }, { "epoch": 0.3244148080018395, "grad_norm": 0.890625, "learning_rate": 3.8206972714590474e-05, "loss": 0.9115, "step": 4409 }, { "epoch": 0.3244883881352035, "grad_norm": 1.046875, "learning_rate": 3.820205488566989e-05, "loss": 1.4206, "step": 4410 }, { "epoch": 0.3245619682685675, "grad_norm": 0.83984375, "learning_rate": 3.819713634821995e-05, "loss": 0.6944, "step": 4411 }, { "epoch": 0.3246355484019315, "grad_norm": 0.88671875, "learning_rate": 3.819221710250464e-05, "loss": 0.614, "step": 4412 }, { "epoch": 0.3247091285352955, "grad_norm": 1.3984375, "learning_rate": 3.818729714878795e-05, "loss": 0.764, "step": 4413 }, { "epoch": 0.32478270866865944, "grad_norm": 1.2109375, "learning_rate": 3.8182376487333925e-05, "loss": 1.1189, "step": 4414 }, { "epoch": 0.32485628880202344, "grad_norm": 0.74609375, "learning_rate": 3.817745511840665e-05, "loss": 0.6934, "step": 4415 }, { "epoch": 0.32492986893538744, "grad_norm": 0.87890625, "learning_rate": 3.8172533042270255e-05, "loss": 1.1779, "step": 4416 }, { "epoch": 0.32500344906875145, "grad_norm": 1.0703125, "learning_rate": 3.816761025918889e-05, "loss": 1.0069, "step": 4417 }, { "epoch": 0.32507702920211545, "grad_norm": 1.140625, "learning_rate": 3.816268676942676e-05, "loss": 1.2744, "step": 4418 }, { "epoch": 0.3251506093354794, "grad_norm": 0.79296875, "learning_rate": 3.815776257324808e-05, "loss": 0.8949, "step": 4419 }, { "epoch": 0.3252241894688434, "grad_norm": 0.83984375, "learning_rate": 3.815283767091713e-05, "loss": 0.8184, "step": 4420 }, { "epoch": 0.3252977696022074, "grad_norm": 0.83203125, "learning_rate": 3.814791206269823e-05, "loss": 0.9131, "step": 4421 }, { "epoch": 0.3253713497355714, "grad_norm": 0.95703125, "learning_rate": 3.8142985748855726e-05, "loss": 1.1168, "step": 4422 }, { "epoch": 0.32544492986893536, "grad_norm": 0.94921875, "learning_rate": 3.8138058729654e-05, "loss": 0.8302, "step": 4423 }, { "epoch": 0.32551851000229937, "grad_norm": 0.76171875, "learning_rate": 3.813313100535747e-05, "loss": 0.8193, "step": 4424 }, { "epoch": 0.32559209013566337, "grad_norm": 0.99609375, "learning_rate": 3.8128202576230616e-05, "loss": 1.2543, "step": 4425 }, { "epoch": 0.3256656702690274, "grad_norm": 0.859375, "learning_rate": 3.8123273442537914e-05, "loss": 1.0079, "step": 4426 }, { "epoch": 0.3257392504023914, "grad_norm": 0.8984375, "learning_rate": 3.8118343604543916e-05, "loss": 0.7694, "step": 4427 }, { "epoch": 0.3258128305357553, "grad_norm": 1.1875, "learning_rate": 3.8113413062513195e-05, "loss": 0.9604, "step": 4428 }, { "epoch": 0.32588641066911933, "grad_norm": 1.0546875, "learning_rate": 3.8108481816710365e-05, "loss": 1.205, "step": 4429 }, { "epoch": 0.32595999080248333, "grad_norm": 0.92578125, "learning_rate": 3.810354986740007e-05, "loss": 1.4123, "step": 4430 }, { "epoch": 0.32603357093584734, "grad_norm": 0.890625, "learning_rate": 3.8098617214847e-05, "loss": 1.187, "step": 4431 }, { "epoch": 0.3261071510692113, "grad_norm": 0.75, "learning_rate": 3.809368385931589e-05, "loss": 0.8348, "step": 4432 }, { "epoch": 0.3261807312025753, "grad_norm": 0.94140625, "learning_rate": 3.80887498010715e-05, "loss": 1.1723, "step": 4433 }, { "epoch": 0.3262543113359393, "grad_norm": 0.69921875, "learning_rate": 3.808381504037862e-05, "loss": 0.6599, "step": 4434 }, { "epoch": 0.3263278914693033, "grad_norm": 0.8125, "learning_rate": 3.8078879577502105e-05, "loss": 0.7088, "step": 4435 }, { "epoch": 0.3264014716026673, "grad_norm": 0.80859375, "learning_rate": 3.807394341270682e-05, "loss": 0.7271, "step": 4436 }, { "epoch": 0.32647505173603125, "grad_norm": 0.82421875, "learning_rate": 3.806900654625769e-05, "loss": 0.7788, "step": 4437 }, { "epoch": 0.32654863186939526, "grad_norm": 0.8203125, "learning_rate": 3.806406897841966e-05, "loss": 1.0432, "step": 4438 }, { "epoch": 0.32662221200275926, "grad_norm": 0.921875, "learning_rate": 3.805913070945773e-05, "loss": 1.1296, "step": 4439 }, { "epoch": 0.32669579213612326, "grad_norm": 0.75, "learning_rate": 3.8054191739636916e-05, "loss": 0.849, "step": 4440 }, { "epoch": 0.3267693722694872, "grad_norm": 0.7578125, "learning_rate": 3.804925206922229e-05, "loss": 0.8604, "step": 4441 }, { "epoch": 0.3268429524028512, "grad_norm": 0.83984375, "learning_rate": 3.804431169847894e-05, "loss": 0.853, "step": 4442 }, { "epoch": 0.3269165325362152, "grad_norm": 0.91015625, "learning_rate": 3.8039370627672026e-05, "loss": 0.747, "step": 4443 }, { "epoch": 0.3269901126695792, "grad_norm": 1.71875, "learning_rate": 3.8034428857066716e-05, "loss": 0.8982, "step": 4444 }, { "epoch": 0.32706369280294323, "grad_norm": 0.8046875, "learning_rate": 3.802948638692823e-05, "loss": 0.8946, "step": 4445 }, { "epoch": 0.3271372729363072, "grad_norm": 1.1796875, "learning_rate": 3.802454321752182e-05, "loss": 1.32, "step": 4446 }, { "epoch": 0.3272108530696712, "grad_norm": 0.81640625, "learning_rate": 3.801959934911277e-05, "loss": 0.8762, "step": 4447 }, { "epoch": 0.3272844332030352, "grad_norm": 0.87109375, "learning_rate": 3.801465478196642e-05, "loss": 1.0046, "step": 4448 }, { "epoch": 0.3273580133363992, "grad_norm": 0.73828125, "learning_rate": 3.800970951634812e-05, "loss": 0.6777, "step": 4449 }, { "epoch": 0.32743159346976314, "grad_norm": 0.75, "learning_rate": 3.8004763552523296e-05, "loss": 0.9015, "step": 4450 }, { "epoch": 0.32750517360312714, "grad_norm": 0.90234375, "learning_rate": 3.799981689075737e-05, "loss": 0.7905, "step": 4451 }, { "epoch": 0.32757875373649115, "grad_norm": 0.81640625, "learning_rate": 3.799486953131582e-05, "loss": 1.1968, "step": 4452 }, { "epoch": 0.32765233386985515, "grad_norm": 0.78515625, "learning_rate": 3.798992147446417e-05, "loss": 0.8914, "step": 4453 }, { "epoch": 0.32772591400321915, "grad_norm": 0.8203125, "learning_rate": 3.798497272046796e-05, "loss": 0.8838, "step": 4454 }, { "epoch": 0.3277994941365831, "grad_norm": 0.71875, "learning_rate": 3.79800232695928e-05, "loss": 0.7554, "step": 4455 }, { "epoch": 0.3278730742699471, "grad_norm": 0.8203125, "learning_rate": 3.79750731221043e-05, "loss": 0.6986, "step": 4456 }, { "epoch": 0.3279466544033111, "grad_norm": 0.87109375, "learning_rate": 3.7970122278268136e-05, "loss": 1.3797, "step": 4457 }, { "epoch": 0.3280202345366751, "grad_norm": 1.0703125, "learning_rate": 3.7965170738350006e-05, "loss": 0.8397, "step": 4458 }, { "epoch": 0.32809381467003906, "grad_norm": 0.66015625, "learning_rate": 3.7960218502615655e-05, "loss": 0.8418, "step": 4459 }, { "epoch": 0.32816739480340307, "grad_norm": 0.8125, "learning_rate": 3.795526557133085e-05, "loss": 0.946, "step": 4460 }, { "epoch": 0.32824097493676707, "grad_norm": 0.96484375, "learning_rate": 3.7950311944761405e-05, "loss": 1.3836, "step": 4461 }, { "epoch": 0.3283145550701311, "grad_norm": 0.80078125, "learning_rate": 3.7945357623173185e-05, "loss": 0.8702, "step": 4462 }, { "epoch": 0.3283881352034951, "grad_norm": 0.79296875, "learning_rate": 3.794040260683207e-05, "loss": 0.7486, "step": 4463 }, { "epoch": 0.32846171533685903, "grad_norm": 0.81640625, "learning_rate": 3.793544689600399e-05, "loss": 0.8161, "step": 4464 }, { "epoch": 0.32853529547022303, "grad_norm": 1.0625, "learning_rate": 3.7930490490954904e-05, "loss": 1.0262, "step": 4465 }, { "epoch": 0.32860887560358704, "grad_norm": 0.73046875, "learning_rate": 3.792553339195082e-05, "loss": 0.7622, "step": 4466 }, { "epoch": 0.32868245573695104, "grad_norm": 0.80078125, "learning_rate": 3.7920575599257754e-05, "loss": 0.7319, "step": 4467 }, { "epoch": 0.328756035870315, "grad_norm": 0.84765625, "learning_rate": 3.791561711314182e-05, "loss": 0.994, "step": 4468 }, { "epoch": 0.328829616003679, "grad_norm": 0.91796875, "learning_rate": 3.79106579338691e-05, "loss": 0.9103, "step": 4469 }, { "epoch": 0.328903196137043, "grad_norm": 0.6796875, "learning_rate": 3.790569806170576e-05, "loss": 0.8773, "step": 4470 }, { "epoch": 0.328976776270407, "grad_norm": 0.92578125, "learning_rate": 3.790073749691798e-05, "loss": 1.2341, "step": 4471 }, { "epoch": 0.329050356403771, "grad_norm": 0.75, "learning_rate": 3.789577623977198e-05, "loss": 0.6619, "step": 4472 }, { "epoch": 0.32912393653713495, "grad_norm": 0.7421875, "learning_rate": 3.789081429053403e-05, "loss": 0.7464, "step": 4473 }, { "epoch": 0.32919751667049896, "grad_norm": 0.828125, "learning_rate": 3.7885851649470415e-05, "loss": 0.8021, "step": 4474 }, { "epoch": 0.32927109680386296, "grad_norm": 0.88671875, "learning_rate": 3.788088831684749e-05, "loss": 0.8931, "step": 4475 }, { "epoch": 0.32934467693722697, "grad_norm": 0.78515625, "learning_rate": 3.787592429293161e-05, "loss": 0.9185, "step": 4476 }, { "epoch": 0.3294182570705909, "grad_norm": 0.8671875, "learning_rate": 3.787095957798919e-05, "loss": 0.9234, "step": 4477 }, { "epoch": 0.3294918372039549, "grad_norm": 1.078125, "learning_rate": 3.78659941722867e-05, "loss": 0.9418, "step": 4478 }, { "epoch": 0.3295654173373189, "grad_norm": 0.77734375, "learning_rate": 3.786102807609059e-05, "loss": 1.0555, "step": 4479 }, { "epoch": 0.3296389974706829, "grad_norm": 0.875, "learning_rate": 3.785606128966739e-05, "loss": 0.8578, "step": 4480 }, { "epoch": 0.32971257760404693, "grad_norm": 0.69921875, "learning_rate": 3.785109381328365e-05, "loss": 0.7296, "step": 4481 }, { "epoch": 0.3297861577374109, "grad_norm": 0.828125, "learning_rate": 3.7846125647206e-05, "loss": 0.9887, "step": 4482 }, { "epoch": 0.3298597378707749, "grad_norm": 1.0390625, "learning_rate": 3.784115679170105e-05, "loss": 1.4146, "step": 4483 }, { "epoch": 0.3299333180041389, "grad_norm": 1.453125, "learning_rate": 3.783618724703546e-05, "loss": 0.9495, "step": 4484 }, { "epoch": 0.3300068981375029, "grad_norm": 0.85546875, "learning_rate": 3.783121701347594e-05, "loss": 1.0103, "step": 4485 }, { "epoch": 0.33008047827086684, "grad_norm": 1.234375, "learning_rate": 3.782624609128924e-05, "loss": 1.0129, "step": 4486 }, { "epoch": 0.33015405840423084, "grad_norm": 0.7734375, "learning_rate": 3.782127448074214e-05, "loss": 0.658, "step": 4487 }, { "epoch": 0.33022763853759485, "grad_norm": 0.98828125, "learning_rate": 3.7816302182101456e-05, "loss": 1.1676, "step": 4488 }, { "epoch": 0.33030121867095885, "grad_norm": 0.71484375, "learning_rate": 3.7811329195634044e-05, "loss": 0.7587, "step": 4489 }, { "epoch": 0.33037479880432286, "grad_norm": 1.09375, "learning_rate": 3.7806355521606787e-05, "loss": 1.4104, "step": 4490 }, { "epoch": 0.3304483789376868, "grad_norm": 0.9609375, "learning_rate": 3.780138116028662e-05, "loss": 1.2279, "step": 4491 }, { "epoch": 0.3305219590710508, "grad_norm": 0.8984375, "learning_rate": 3.77964061119405e-05, "loss": 1.0594, "step": 4492 }, { "epoch": 0.3305955392044148, "grad_norm": 0.703125, "learning_rate": 3.7791430376835425e-05, "loss": 0.8484, "step": 4493 }, { "epoch": 0.3306691193377788, "grad_norm": 1.0390625, "learning_rate": 3.778645395523845e-05, "loss": 1.0097, "step": 4494 }, { "epoch": 0.33074269947114276, "grad_norm": 0.92578125, "learning_rate": 3.7781476847416645e-05, "loss": 0.8819, "step": 4495 }, { "epoch": 0.33081627960450677, "grad_norm": 0.859375, "learning_rate": 3.777649905363712e-05, "loss": 1.1707, "step": 4496 }, { "epoch": 0.3308898597378708, "grad_norm": 0.76171875, "learning_rate": 3.7771520574167006e-05, "loss": 0.679, "step": 4497 }, { "epoch": 0.3309634398712348, "grad_norm": 0.88671875, "learning_rate": 3.7766541409273524e-05, "loss": 0.758, "step": 4498 }, { "epoch": 0.3310370200045988, "grad_norm": 0.89453125, "learning_rate": 3.7761561559223865e-05, "loss": 0.8395, "step": 4499 }, { "epoch": 0.33111060013796273, "grad_norm": 0.8671875, "learning_rate": 3.77565810242853e-05, "loss": 0.7264, "step": 4500 }, { "epoch": 0.33118418027132673, "grad_norm": 0.87109375, "learning_rate": 3.775159980472513e-05, "loss": 1.4052, "step": 4501 }, { "epoch": 0.33125776040469074, "grad_norm": 6.90625, "learning_rate": 3.7746617900810685e-05, "loss": 0.8163, "step": 4502 }, { "epoch": 0.33133134053805474, "grad_norm": 0.8359375, "learning_rate": 3.774163531280933e-05, "loss": 0.9579, "step": 4503 }, { "epoch": 0.3314049206714187, "grad_norm": 0.921875, "learning_rate": 3.7736652040988476e-05, "loss": 0.6861, "step": 4504 }, { "epoch": 0.3314785008047827, "grad_norm": 0.703125, "learning_rate": 3.773166808561556e-05, "loss": 0.6254, "step": 4505 }, { "epoch": 0.3315520809381467, "grad_norm": 0.75, "learning_rate": 3.772668344695806e-05, "loss": 0.9836, "step": 4506 }, { "epoch": 0.3316256610715107, "grad_norm": 0.8046875, "learning_rate": 3.77216981252835e-05, "loss": 0.708, "step": 4507 }, { "epoch": 0.3316992412048747, "grad_norm": 0.953125, "learning_rate": 3.7716712120859435e-05, "loss": 0.9957, "step": 4508 }, { "epoch": 0.33177282133823865, "grad_norm": 0.74609375, "learning_rate": 3.771172543395344e-05, "loss": 0.7541, "step": 4509 }, { "epoch": 0.33184640147160266, "grad_norm": 1.078125, "learning_rate": 3.7706738064833155e-05, "loss": 1.4388, "step": 4510 }, { "epoch": 0.33191998160496666, "grad_norm": 0.66015625, "learning_rate": 3.770175001376623e-05, "loss": 0.6819, "step": 4511 }, { "epoch": 0.33199356173833067, "grad_norm": 0.92578125, "learning_rate": 3.7696761281020387e-05, "loss": 1.098, "step": 4512 }, { "epoch": 0.3320671418716946, "grad_norm": 0.953125, "learning_rate": 3.769177186686334e-05, "loss": 0.9928, "step": 4513 }, { "epoch": 0.3321407220050586, "grad_norm": 0.76953125, "learning_rate": 3.768678177156286e-05, "loss": 0.759, "step": 4514 }, { "epoch": 0.3322143021384226, "grad_norm": 0.75390625, "learning_rate": 3.768179099538678e-05, "loss": 0.8186, "step": 4515 }, { "epoch": 0.3322878822717866, "grad_norm": 0.7578125, "learning_rate": 3.767679953860292e-05, "loss": 0.7773, "step": 4516 }, { "epoch": 0.33236146240515063, "grad_norm": 0.8359375, "learning_rate": 3.767180740147918e-05, "loss": 0.9745, "step": 4517 }, { "epoch": 0.3324350425385146, "grad_norm": 0.68359375, "learning_rate": 3.766681458428346e-05, "loss": 0.7881, "step": 4518 }, { "epoch": 0.3325086226718786, "grad_norm": 0.76953125, "learning_rate": 3.7661821087283726e-05, "loss": 0.8318, "step": 4519 }, { "epoch": 0.3325822028052426, "grad_norm": 1.0234375, "learning_rate": 3.765682691074797e-05, "loss": 0.9919, "step": 4520 }, { "epoch": 0.3326557829386066, "grad_norm": 0.90234375, "learning_rate": 3.765183205494422e-05, "loss": 0.9731, "step": 4521 }, { "epoch": 0.33272936307197054, "grad_norm": 1.125, "learning_rate": 3.7646836520140536e-05, "loss": 1.3843, "step": 4522 }, { "epoch": 0.33280294320533454, "grad_norm": 0.82421875, "learning_rate": 3.7641840306605034e-05, "loss": 0.79, "step": 4523 }, { "epoch": 0.33287652333869855, "grad_norm": 0.796875, "learning_rate": 3.763684341460583e-05, "loss": 0.6936, "step": 4524 }, { "epoch": 0.33295010347206255, "grad_norm": 0.83984375, "learning_rate": 3.763184584441111e-05, "loss": 0.9758, "step": 4525 }, { "epoch": 0.33302368360542656, "grad_norm": 1.0078125, "learning_rate": 3.762684759628908e-05, "loss": 0.7863, "step": 4526 }, { "epoch": 0.3330972637387905, "grad_norm": 0.59375, "learning_rate": 3.7621848670508e-05, "loss": 0.6345, "step": 4527 }, { "epoch": 0.3331708438721545, "grad_norm": 0.83984375, "learning_rate": 3.761684906733613e-05, "loss": 1.0841, "step": 4528 }, { "epoch": 0.3332444240055185, "grad_norm": 0.87890625, "learning_rate": 3.7611848787041794e-05, "loss": 1.062, "step": 4529 }, { "epoch": 0.3333180041388825, "grad_norm": 0.9375, "learning_rate": 3.760684782989337e-05, "loss": 1.0346, "step": 4530 }, { "epoch": 0.33339158427224647, "grad_norm": 0.98828125, "learning_rate": 3.760184619615922e-05, "loss": 1.297, "step": 4531 }, { "epoch": 0.33346516440561047, "grad_norm": 0.94140625, "learning_rate": 3.759684388610779e-05, "loss": 0.8569, "step": 4532 }, { "epoch": 0.3335387445389745, "grad_norm": 1.1328125, "learning_rate": 3.7591840900007544e-05, "loss": 1.1531, "step": 4533 }, { "epoch": 0.3336123246723385, "grad_norm": 0.98046875, "learning_rate": 3.758683723812698e-05, "loss": 0.9587, "step": 4534 }, { "epoch": 0.3336859048057025, "grad_norm": 0.97265625, "learning_rate": 3.758183290073463e-05, "loss": 1.0634, "step": 4535 }, { "epoch": 0.33375948493906643, "grad_norm": 1.03125, "learning_rate": 3.757682788809907e-05, "loss": 0.9081, "step": 4536 }, { "epoch": 0.33383306507243043, "grad_norm": 0.8125, "learning_rate": 3.757182220048893e-05, "loss": 0.7661, "step": 4537 }, { "epoch": 0.33390664520579444, "grad_norm": 0.92578125, "learning_rate": 3.7566815838172815e-05, "loss": 0.9866, "step": 4538 }, { "epoch": 0.33398022533915844, "grad_norm": 0.6015625, "learning_rate": 3.756180880141944e-05, "loss": 0.526, "step": 4539 }, { "epoch": 0.3340538054725224, "grad_norm": 1.0234375, "learning_rate": 3.7556801090497504e-05, "loss": 1.2827, "step": 4540 }, { "epoch": 0.3341273856058864, "grad_norm": 0.87109375, "learning_rate": 3.755179270567578e-05, "loss": 0.9763, "step": 4541 }, { "epoch": 0.3342009657392504, "grad_norm": 1.15625, "learning_rate": 3.754678364722304e-05, "loss": 1.3209, "step": 4542 }, { "epoch": 0.3342745458726144, "grad_norm": 0.87890625, "learning_rate": 3.754177391540812e-05, "loss": 1.079, "step": 4543 }, { "epoch": 0.3343481260059784, "grad_norm": 0.78515625, "learning_rate": 3.7536763510499895e-05, "loss": 1.1404, "step": 4544 }, { "epoch": 0.33442170613934236, "grad_norm": 1.0, "learning_rate": 3.753175243276724e-05, "loss": 1.2987, "step": 4545 }, { "epoch": 0.33449528627270636, "grad_norm": 0.88671875, "learning_rate": 3.752674068247911e-05, "loss": 1.0372, "step": 4546 }, { "epoch": 0.33456886640607036, "grad_norm": 0.7734375, "learning_rate": 3.752172825990446e-05, "loss": 0.9713, "step": 4547 }, { "epoch": 0.33464244653943437, "grad_norm": 0.63671875, "learning_rate": 3.75167151653123e-05, "loss": 0.6626, "step": 4548 }, { "epoch": 0.3347160266727983, "grad_norm": 0.75390625, "learning_rate": 3.751170139897168e-05, "loss": 0.8907, "step": 4549 }, { "epoch": 0.3347896068061623, "grad_norm": 1.15625, "learning_rate": 3.750668696115167e-05, "loss": 0.7901, "step": 4550 }, { "epoch": 0.3348631869395263, "grad_norm": 0.92578125, "learning_rate": 3.7501671852121414e-05, "loss": 1.1131, "step": 4551 }, { "epoch": 0.33493676707289033, "grad_norm": 1.3828125, "learning_rate": 3.7496656072150025e-05, "loss": 0.8705, "step": 4552 }, { "epoch": 0.33501034720625433, "grad_norm": 0.953125, "learning_rate": 3.749163962150671e-05, "loss": 1.1594, "step": 4553 }, { "epoch": 0.3350839273396183, "grad_norm": 0.85546875, "learning_rate": 3.74866225004607e-05, "loss": 0.7878, "step": 4554 }, { "epoch": 0.3351575074729823, "grad_norm": 0.7890625, "learning_rate": 3.7481604709281234e-05, "loss": 0.806, "step": 4555 }, { "epoch": 0.3352310876063463, "grad_norm": 0.92578125, "learning_rate": 3.7476586248237616e-05, "loss": 1.0637, "step": 4556 }, { "epoch": 0.3353046677397103, "grad_norm": 1.015625, "learning_rate": 3.747156711759918e-05, "loss": 1.2394, "step": 4557 }, { "epoch": 0.33537824787307424, "grad_norm": 0.65625, "learning_rate": 3.74665473176353e-05, "loss": 0.7652, "step": 4558 }, { "epoch": 0.33545182800643825, "grad_norm": 1.0390625, "learning_rate": 3.746152684861537e-05, "loss": 1.0927, "step": 4559 }, { "epoch": 0.33552540813980225, "grad_norm": 0.796875, "learning_rate": 3.745650571080882e-05, "loss": 1.2981, "step": 4560 }, { "epoch": 0.33559898827316625, "grad_norm": 1.03125, "learning_rate": 3.745148390448515e-05, "loss": 0.8137, "step": 4561 }, { "epoch": 0.33567256840653026, "grad_norm": 1.1953125, "learning_rate": 3.744646142991385e-05, "loss": 1.33, "step": 4562 }, { "epoch": 0.3357461485398942, "grad_norm": 1.0, "learning_rate": 3.7441438287364466e-05, "loss": 1.2443, "step": 4563 }, { "epoch": 0.3358197286732582, "grad_norm": 0.828125, "learning_rate": 3.74364144771066e-05, "loss": 0.9518, "step": 4564 }, { "epoch": 0.3358933088066222, "grad_norm": 0.73828125, "learning_rate": 3.743138999940985e-05, "loss": 0.7083, "step": 4565 }, { "epoch": 0.3359668889399862, "grad_norm": 0.97265625, "learning_rate": 3.742636485454388e-05, "loss": 0.8364, "step": 4566 }, { "epoch": 0.3360404690733502, "grad_norm": 1.015625, "learning_rate": 3.742133904277838e-05, "loss": 1.3439, "step": 4567 }, { "epoch": 0.33611404920671417, "grad_norm": 0.76171875, "learning_rate": 3.741631256438307e-05, "loss": 0.9982, "step": 4568 }, { "epoch": 0.3361876293400782, "grad_norm": 0.671875, "learning_rate": 3.7411285419627716e-05, "loss": 0.6144, "step": 4569 }, { "epoch": 0.3362612094734422, "grad_norm": 1.0703125, "learning_rate": 3.740625760878212e-05, "loss": 0.8794, "step": 4570 }, { "epoch": 0.3363347896068062, "grad_norm": 0.8828125, "learning_rate": 3.740122913211611e-05, "loss": 1.2987, "step": 4571 }, { "epoch": 0.33640836974017013, "grad_norm": 1.1171875, "learning_rate": 3.739619998989955e-05, "loss": 1.0957, "step": 4572 }, { "epoch": 0.33648194987353414, "grad_norm": 0.765625, "learning_rate": 3.739117018240235e-05, "loss": 0.7344, "step": 4573 }, { "epoch": 0.33655553000689814, "grad_norm": 0.9609375, "learning_rate": 3.7386139709894455e-05, "loss": 1.4638, "step": 4574 }, { "epoch": 0.33662911014026214, "grad_norm": 0.8515625, "learning_rate": 3.7381108572645835e-05, "loss": 0.7897, "step": 4575 }, { "epoch": 0.33670269027362615, "grad_norm": 0.87890625, "learning_rate": 3.73760767709265e-05, "loss": 1.1372, "step": 4576 }, { "epoch": 0.3367762704069901, "grad_norm": 0.921875, "learning_rate": 3.73710443050065e-05, "loss": 0.927, "step": 4577 }, { "epoch": 0.3368498505403541, "grad_norm": 0.97265625, "learning_rate": 3.736601117515592e-05, "loss": 1.03, "step": 4578 }, { "epoch": 0.3369234306737181, "grad_norm": 0.76171875, "learning_rate": 3.736097738164487e-05, "loss": 0.7169, "step": 4579 }, { "epoch": 0.3369970108070821, "grad_norm": 0.75390625, "learning_rate": 3.7355942924743525e-05, "loss": 0.9262, "step": 4580 }, { "epoch": 0.33707059094044606, "grad_norm": 1.1328125, "learning_rate": 3.7350907804722047e-05, "loss": 0.9447, "step": 4581 }, { "epoch": 0.33714417107381006, "grad_norm": 0.80078125, "learning_rate": 3.734587202185067e-05, "loss": 1.1006, "step": 4582 }, { "epoch": 0.33721775120717407, "grad_norm": 1.0625, "learning_rate": 3.7340835576399675e-05, "loss": 1.0276, "step": 4583 }, { "epoch": 0.33729133134053807, "grad_norm": 0.91796875, "learning_rate": 3.733579846863933e-05, "loss": 0.8182, "step": 4584 }, { "epoch": 0.3373649114739021, "grad_norm": 0.73828125, "learning_rate": 3.7330760698839995e-05, "loss": 0.806, "step": 4585 }, { "epoch": 0.337438491607266, "grad_norm": 0.97265625, "learning_rate": 3.732572226727201e-05, "loss": 0.7503, "step": 4586 }, { "epoch": 0.33751207174063, "grad_norm": 0.78125, "learning_rate": 3.7320683174205794e-05, "loss": 1.0204, "step": 4587 }, { "epoch": 0.33758565187399403, "grad_norm": 1.0, "learning_rate": 3.731564341991178e-05, "loss": 0.897, "step": 4588 }, { "epoch": 0.33765923200735803, "grad_norm": 0.8359375, "learning_rate": 3.7310603004660446e-05, "loss": 0.8043, "step": 4589 }, { "epoch": 0.337732812140722, "grad_norm": 0.7890625, "learning_rate": 3.7305561928722306e-05, "loss": 0.848, "step": 4590 }, { "epoch": 0.337806392274086, "grad_norm": 0.8671875, "learning_rate": 3.730052019236789e-05, "loss": 0.8005, "step": 4591 }, { "epoch": 0.33787997240745, "grad_norm": 0.9140625, "learning_rate": 3.72954777958678e-05, "loss": 0.8389, "step": 4592 }, { "epoch": 0.337953552540814, "grad_norm": 0.91796875, "learning_rate": 3.729043473949263e-05, "loss": 1.0695, "step": 4593 }, { "epoch": 0.338027132674178, "grad_norm": 1.0390625, "learning_rate": 3.728539102351305e-05, "loss": 1.1062, "step": 4594 }, { "epoch": 0.33810071280754195, "grad_norm": 0.765625, "learning_rate": 3.7280346648199724e-05, "loss": 0.7073, "step": 4595 }, { "epoch": 0.33817429294090595, "grad_norm": 0.9296875, "learning_rate": 3.72753016138234e-05, "loss": 1.2053, "step": 4596 }, { "epoch": 0.33824787307426996, "grad_norm": 0.81640625, "learning_rate": 3.727025592065481e-05, "loss": 0.8578, "step": 4597 }, { "epoch": 0.33832145320763396, "grad_norm": 0.75, "learning_rate": 3.726520956896477e-05, "loss": 0.8075, "step": 4598 }, { "epoch": 0.3383950333409979, "grad_norm": 1.1015625, "learning_rate": 3.7260162559024095e-05, "loss": 0.9552, "step": 4599 }, { "epoch": 0.3384686134743619, "grad_norm": 0.78515625, "learning_rate": 3.725511489110365e-05, "loss": 0.6871, "step": 4600 }, { "epoch": 0.3385421936077259, "grad_norm": 1.109375, "learning_rate": 3.725006656547435e-05, "loss": 1.2754, "step": 4601 }, { "epoch": 0.3386157737410899, "grad_norm": 0.73046875, "learning_rate": 3.7245017582407095e-05, "loss": 0.7249, "step": 4602 }, { "epoch": 0.3386893538744539, "grad_norm": 0.82421875, "learning_rate": 3.723996794217288e-05, "loss": 0.9319, "step": 4603 }, { "epoch": 0.3387629340078179, "grad_norm": 1.0, "learning_rate": 3.7234917645042706e-05, "loss": 0.8907, "step": 4604 }, { "epoch": 0.3388365141411819, "grad_norm": 0.7578125, "learning_rate": 3.722986669128761e-05, "loss": 0.9709, "step": 4605 }, { "epoch": 0.3389100942745459, "grad_norm": 1.046875, "learning_rate": 3.7224815081178666e-05, "loss": 1.0093, "step": 4606 }, { "epoch": 0.3389836744079099, "grad_norm": 0.94140625, "learning_rate": 3.721976281498699e-05, "loss": 1.0445, "step": 4607 }, { "epoch": 0.33905725454127383, "grad_norm": 0.91015625, "learning_rate": 3.7214709892983725e-05, "loss": 0.9872, "step": 4608 }, { "epoch": 0.33913083467463784, "grad_norm": 1.1171875, "learning_rate": 3.720965631544004e-05, "loss": 1.0319, "step": 4609 }, { "epoch": 0.33920441480800184, "grad_norm": 0.80859375, "learning_rate": 3.720460208262717e-05, "loss": 0.9197, "step": 4610 }, { "epoch": 0.33927799494136585, "grad_norm": 1.171875, "learning_rate": 3.719954719481635e-05, "loss": 0.7862, "step": 4611 }, { "epoch": 0.33935157507472985, "grad_norm": 0.890625, "learning_rate": 3.7194491652278876e-05, "loss": 0.9574, "step": 4612 }, { "epoch": 0.3394251552080938, "grad_norm": 0.796875, "learning_rate": 3.718943545528607e-05, "loss": 0.8347, "step": 4613 }, { "epoch": 0.3394987353414578, "grad_norm": 1.046875, "learning_rate": 3.718437860410929e-05, "loss": 0.9657, "step": 4614 }, { "epoch": 0.3395723154748218, "grad_norm": 0.859375, "learning_rate": 3.7179321099019916e-05, "loss": 0.8613, "step": 4615 }, { "epoch": 0.3396458956081858, "grad_norm": 0.72265625, "learning_rate": 3.717426294028938e-05, "loss": 0.8471, "step": 4616 }, { "epoch": 0.33971947574154976, "grad_norm": 0.87109375, "learning_rate": 3.7169204128189154e-05, "loss": 0.7595, "step": 4617 }, { "epoch": 0.33979305587491376, "grad_norm": 0.88671875, "learning_rate": 3.716414466299072e-05, "loss": 0.6496, "step": 4618 }, { "epoch": 0.33986663600827777, "grad_norm": 1.03125, "learning_rate": 3.715908454496563e-05, "loss": 1.1779, "step": 4619 }, { "epoch": 0.33994021614164177, "grad_norm": 0.859375, "learning_rate": 3.715402377438542e-05, "loss": 1.117, "step": 4620 }, { "epoch": 0.3400137962750058, "grad_norm": 1.0234375, "learning_rate": 3.714896235152172e-05, "loss": 1.1262, "step": 4621 }, { "epoch": 0.3400873764083697, "grad_norm": 0.89453125, "learning_rate": 3.714390027664615e-05, "loss": 0.8273, "step": 4622 }, { "epoch": 0.3401609565417337, "grad_norm": 0.78125, "learning_rate": 3.713883755003039e-05, "loss": 0.925, "step": 4623 }, { "epoch": 0.34023453667509773, "grad_norm": 0.78125, "learning_rate": 3.713377417194616e-05, "loss": 0.9008, "step": 4624 }, { "epoch": 0.34030811680846174, "grad_norm": 0.78125, "learning_rate": 3.7128710142665166e-05, "loss": 0.7594, "step": 4625 }, { "epoch": 0.3403816969418257, "grad_norm": 0.8046875, "learning_rate": 3.712364546245922e-05, "loss": 0.8971, "step": 4626 }, { "epoch": 0.3404552770751897, "grad_norm": 0.69140625, "learning_rate": 3.711858013160012e-05, "loss": 1.0136, "step": 4627 }, { "epoch": 0.3405288572085537, "grad_norm": 0.921875, "learning_rate": 3.711351415035971e-05, "loss": 1.2684, "step": 4628 }, { "epoch": 0.3406024373419177, "grad_norm": 0.83984375, "learning_rate": 3.710844751900988e-05, "loss": 0.8905, "step": 4629 }, { "epoch": 0.3406760174752817, "grad_norm": 1.28125, "learning_rate": 3.7103380237822525e-05, "loss": 0.9414, "step": 4630 }, { "epoch": 0.34074959760864565, "grad_norm": 1.140625, "learning_rate": 3.7098312307069626e-05, "loss": 1.092, "step": 4631 }, { "epoch": 0.34082317774200965, "grad_norm": 0.89453125, "learning_rate": 3.7093243727023154e-05, "loss": 0.9318, "step": 4632 }, { "epoch": 0.34089675787537366, "grad_norm": 0.63671875, "learning_rate": 3.7088174497955136e-05, "loss": 0.6058, "step": 4633 }, { "epoch": 0.34097033800873766, "grad_norm": 0.87890625, "learning_rate": 3.7083104620137624e-05, "loss": 0.9116, "step": 4634 }, { "epoch": 0.3410439181421016, "grad_norm": 1.0, "learning_rate": 3.70780340938427e-05, "loss": 1.1056, "step": 4635 }, { "epoch": 0.3411174982754656, "grad_norm": 0.83203125, "learning_rate": 3.70729629193425e-05, "loss": 1.045, "step": 4636 }, { "epoch": 0.3411910784088296, "grad_norm": 1.0859375, "learning_rate": 3.706789109690919e-05, "loss": 1.3281, "step": 4637 }, { "epoch": 0.3412646585421936, "grad_norm": 0.61328125, "learning_rate": 3.706281862681495e-05, "loss": 0.6027, "step": 4638 }, { "epoch": 0.3413382386755576, "grad_norm": 0.91796875, "learning_rate": 3.705774550933202e-05, "loss": 1.3672, "step": 4639 }, { "epoch": 0.3414118188089216, "grad_norm": 1.0390625, "learning_rate": 3.705267174473267e-05, "loss": 1.2316, "step": 4640 }, { "epoch": 0.3414853989422856, "grad_norm": 0.77734375, "learning_rate": 3.704759733328918e-05, "loss": 1.1305, "step": 4641 }, { "epoch": 0.3415589790756496, "grad_norm": 0.87109375, "learning_rate": 3.704252227527391e-05, "loss": 1.2627, "step": 4642 }, { "epoch": 0.3416325592090136, "grad_norm": 0.87890625, "learning_rate": 3.703744657095919e-05, "loss": 0.7576, "step": 4643 }, { "epoch": 0.34170613934237754, "grad_norm": 0.9140625, "learning_rate": 3.7032370220617476e-05, "loss": 1.2629, "step": 4644 }, { "epoch": 0.34177971947574154, "grad_norm": 0.81640625, "learning_rate": 3.702729322452116e-05, "loss": 0.898, "step": 4645 }, { "epoch": 0.34185329960910554, "grad_norm": 1.046875, "learning_rate": 3.702221558294274e-05, "loss": 1.3024, "step": 4646 }, { "epoch": 0.34192687974246955, "grad_norm": 0.78125, "learning_rate": 3.701713729615471e-05, "loss": 1.4048, "step": 4647 }, { "epoch": 0.34200045987583355, "grad_norm": 0.87890625, "learning_rate": 3.701205836442963e-05, "loss": 0.8842, "step": 4648 }, { "epoch": 0.3420740400091975, "grad_norm": 0.85546875, "learning_rate": 3.700697878804006e-05, "loss": 0.9754, "step": 4649 }, { "epoch": 0.3421476201425615, "grad_norm": 0.9140625, "learning_rate": 3.7001898567258605e-05, "loss": 1.0291, "step": 4650 }, { "epoch": 0.3422212002759255, "grad_norm": 0.8046875, "learning_rate": 3.699681770235794e-05, "loss": 0.9822, "step": 4651 }, { "epoch": 0.3422947804092895, "grad_norm": 0.984375, "learning_rate": 3.6991736193610724e-05, "loss": 1.2103, "step": 4652 }, { "epoch": 0.34236836054265346, "grad_norm": 0.97265625, "learning_rate": 3.698665404128967e-05, "loss": 1.2218, "step": 4653 }, { "epoch": 0.34244194067601746, "grad_norm": 1.1015625, "learning_rate": 3.698157124566753e-05, "loss": 1.1661, "step": 4654 }, { "epoch": 0.34251552080938147, "grad_norm": 1.0546875, "learning_rate": 3.6976487807017104e-05, "loss": 0.911, "step": 4655 }, { "epoch": 0.3425891009427455, "grad_norm": 0.7421875, "learning_rate": 3.6971403725611186e-05, "loss": 0.7984, "step": 4656 }, { "epoch": 0.3426626810761095, "grad_norm": 0.90234375, "learning_rate": 3.6966319001722646e-05, "loss": 0.6459, "step": 4657 }, { "epoch": 0.3427362612094734, "grad_norm": 1.015625, "learning_rate": 3.6961233635624364e-05, "loss": 1.0434, "step": 4658 }, { "epoch": 0.34280984134283743, "grad_norm": 1.0234375, "learning_rate": 3.695614762758927e-05, "loss": 1.3347, "step": 4659 }, { "epoch": 0.34288342147620143, "grad_norm": 0.8515625, "learning_rate": 3.6951060977890305e-05, "loss": 0.828, "step": 4660 }, { "epoch": 0.34295700160956544, "grad_norm": 1.0546875, "learning_rate": 3.694597368680048e-05, "loss": 0.766, "step": 4661 }, { "epoch": 0.3430305817429294, "grad_norm": 1.0546875, "learning_rate": 3.69408857545928e-05, "loss": 1.3484, "step": 4662 }, { "epoch": 0.3431041618762934, "grad_norm": 0.890625, "learning_rate": 3.693579718154034e-05, "loss": 0.6742, "step": 4663 }, { "epoch": 0.3431777420096574, "grad_norm": 0.8984375, "learning_rate": 3.693070796791619e-05, "loss": 0.9636, "step": 4664 }, { "epoch": 0.3432513221430214, "grad_norm": 0.82421875, "learning_rate": 3.692561811399348e-05, "loss": 0.7635, "step": 4665 }, { "epoch": 0.3433249022763854, "grad_norm": 0.99609375, "learning_rate": 3.692052762004536e-05, "loss": 1.2835, "step": 4666 }, { "epoch": 0.34339848240974935, "grad_norm": 0.90625, "learning_rate": 3.691543648634505e-05, "loss": 0.7873, "step": 4667 }, { "epoch": 0.34347206254311335, "grad_norm": 1.0625, "learning_rate": 3.691034471316576e-05, "loss": 1.097, "step": 4668 }, { "epoch": 0.34354564267647736, "grad_norm": 1.0625, "learning_rate": 3.6905252300780765e-05, "loss": 0.7454, "step": 4669 }, { "epoch": 0.34361922280984136, "grad_norm": 0.91796875, "learning_rate": 3.6900159249463364e-05, "loss": 0.9617, "step": 4670 }, { "epoch": 0.3436928029432053, "grad_norm": 0.7578125, "learning_rate": 3.6895065559486894e-05, "loss": 0.8249, "step": 4671 }, { "epoch": 0.3437663830765693, "grad_norm": 1.0078125, "learning_rate": 3.6889971231124725e-05, "loss": 1.4166, "step": 4672 }, { "epoch": 0.3438399632099333, "grad_norm": 0.703125, "learning_rate": 3.688487626465025e-05, "loss": 0.6122, "step": 4673 }, { "epoch": 0.3439135433432973, "grad_norm": 0.8046875, "learning_rate": 3.687978066033693e-05, "loss": 0.8879, "step": 4674 }, { "epoch": 0.3439871234766613, "grad_norm": 0.78125, "learning_rate": 3.68746844184582e-05, "loss": 0.8693, "step": 4675 }, { "epoch": 0.3440607036100253, "grad_norm": 0.93359375, "learning_rate": 3.686958753928759e-05, "loss": 1.2325, "step": 4676 }, { "epoch": 0.3441342837433893, "grad_norm": 0.8828125, "learning_rate": 3.686449002309864e-05, "loss": 0.9057, "step": 4677 }, { "epoch": 0.3442078638767533, "grad_norm": 0.90234375, "learning_rate": 3.685939187016492e-05, "loss": 1.0158, "step": 4678 }, { "epoch": 0.3442814440101173, "grad_norm": 0.8046875, "learning_rate": 3.6854293080760036e-05, "loss": 0.5932, "step": 4679 }, { "epoch": 0.34435502414348124, "grad_norm": 0.8125, "learning_rate": 3.684919365515762e-05, "loss": 0.7456, "step": 4680 }, { "epoch": 0.34442860427684524, "grad_norm": 0.609375, "learning_rate": 3.684409359363138e-05, "loss": 0.5749, "step": 4681 }, { "epoch": 0.34450218441020924, "grad_norm": 0.66796875, "learning_rate": 3.6838992896455e-05, "loss": 0.7502, "step": 4682 }, { "epoch": 0.34457576454357325, "grad_norm": 0.95703125, "learning_rate": 3.6833891563902225e-05, "loss": 0.8746, "step": 4683 }, { "epoch": 0.34464934467693725, "grad_norm": 0.78125, "learning_rate": 3.6828789596246846e-05, "loss": 0.9033, "step": 4684 }, { "epoch": 0.3447229248103012, "grad_norm": 0.98828125, "learning_rate": 3.682368699376268e-05, "loss": 1.0363, "step": 4685 }, { "epoch": 0.3447965049436652, "grad_norm": 1.015625, "learning_rate": 3.681858375672355e-05, "loss": 1.1239, "step": 4686 }, { "epoch": 0.3448700850770292, "grad_norm": 0.79296875, "learning_rate": 3.6813479885403355e-05, "loss": 0.8014, "step": 4687 }, { "epoch": 0.3449436652103932, "grad_norm": 0.83984375, "learning_rate": 3.680837538007601e-05, "loss": 1.2095, "step": 4688 }, { "epoch": 0.34501724534375716, "grad_norm": 0.7578125, "learning_rate": 3.6803270241015465e-05, "loss": 0.6501, "step": 4689 }, { "epoch": 0.34509082547712117, "grad_norm": 0.859375, "learning_rate": 3.6798164468495696e-05, "loss": 1.0622, "step": 4690 }, { "epoch": 0.34516440561048517, "grad_norm": 0.87890625, "learning_rate": 3.679305806279072e-05, "loss": 0.8261, "step": 4691 }, { "epoch": 0.3452379857438492, "grad_norm": 0.87890625, "learning_rate": 3.67879510241746e-05, "loss": 0.7577, "step": 4692 }, { "epoch": 0.3453115658772132, "grad_norm": 1.03125, "learning_rate": 3.67828433529214e-05, "loss": 1.22, "step": 4693 }, { "epoch": 0.3453851460105771, "grad_norm": 0.92578125, "learning_rate": 3.677773504930526e-05, "loss": 0.9988, "step": 4694 }, { "epoch": 0.34545872614394113, "grad_norm": 1.0625, "learning_rate": 3.677262611360033e-05, "loss": 1.5305, "step": 4695 }, { "epoch": 0.34553230627730513, "grad_norm": 0.7109375, "learning_rate": 3.6767516546080786e-05, "loss": 0.6164, "step": 4696 }, { "epoch": 0.34560588641066914, "grad_norm": 1.0078125, "learning_rate": 3.6762406347020856e-05, "loss": 1.3206, "step": 4697 }, { "epoch": 0.3456794665440331, "grad_norm": 0.83984375, "learning_rate": 3.675729551669479e-05, "loss": 0.7833, "step": 4698 }, { "epoch": 0.3457530466773971, "grad_norm": 0.69921875, "learning_rate": 3.6752184055376886e-05, "loss": 0.6527, "step": 4699 }, { "epoch": 0.3458266268107611, "grad_norm": 0.91015625, "learning_rate": 3.6747071963341454e-05, "loss": 1.1474, "step": 4700 }, { "epoch": 0.3459002069441251, "grad_norm": 0.71875, "learning_rate": 3.674195924086287e-05, "loss": 0.599, "step": 4701 }, { "epoch": 0.3459737870774891, "grad_norm": 0.96484375, "learning_rate": 3.673684588821549e-05, "loss": 0.8061, "step": 4702 }, { "epoch": 0.34604736721085305, "grad_norm": 0.87890625, "learning_rate": 3.6731731905673774e-05, "loss": 1.018, "step": 4703 }, { "epoch": 0.34612094734421706, "grad_norm": 0.7578125, "learning_rate": 3.672661729351216e-05, "loss": 0.7041, "step": 4704 }, { "epoch": 0.34619452747758106, "grad_norm": 0.953125, "learning_rate": 3.672150205200514e-05, "loss": 0.9247, "step": 4705 }, { "epoch": 0.34626810761094506, "grad_norm": 0.98828125, "learning_rate": 3.671638618142725e-05, "loss": 0.936, "step": 4706 }, { "epoch": 0.346341687744309, "grad_norm": 0.6953125, "learning_rate": 3.671126968205304e-05, "loss": 0.629, "step": 4707 }, { "epoch": 0.346415267877673, "grad_norm": 1.1640625, "learning_rate": 3.670615255415711e-05, "loss": 1.2083, "step": 4708 }, { "epoch": 0.346488848011037, "grad_norm": 0.69140625, "learning_rate": 3.670103479801408e-05, "loss": 0.7944, "step": 4709 }, { "epoch": 0.346562428144401, "grad_norm": 0.8515625, "learning_rate": 3.6695916413898603e-05, "loss": 0.7856, "step": 4710 }, { "epoch": 0.34663600827776503, "grad_norm": 0.796875, "learning_rate": 3.669079740208539e-05, "loss": 0.9476, "step": 4711 }, { "epoch": 0.346709588411129, "grad_norm": 0.62109375, "learning_rate": 3.6685677762849165e-05, "loss": 0.6409, "step": 4712 }, { "epoch": 0.346783168544493, "grad_norm": 0.97265625, "learning_rate": 3.6680557496464684e-05, "loss": 1.1041, "step": 4713 }, { "epoch": 0.346856748677857, "grad_norm": 0.7734375, "learning_rate": 3.667543660320674e-05, "loss": 0.7868, "step": 4714 }, { "epoch": 0.346930328811221, "grad_norm": 0.87109375, "learning_rate": 3.667031508335017e-05, "loss": 1.2149, "step": 4715 }, { "epoch": 0.34700390894458494, "grad_norm": 0.84765625, "learning_rate": 3.666519293716983e-05, "loss": 0.9748, "step": 4716 }, { "epoch": 0.34707748907794894, "grad_norm": 0.75390625, "learning_rate": 3.6660070164940615e-05, "loss": 0.925, "step": 4717 }, { "epoch": 0.34715106921131295, "grad_norm": 0.75, "learning_rate": 3.665494676693745e-05, "loss": 0.7965, "step": 4718 }, { "epoch": 0.34722464934467695, "grad_norm": 0.91015625, "learning_rate": 3.664982274343531e-05, "loss": 0.7948, "step": 4719 }, { "epoch": 0.34729822947804095, "grad_norm": 0.85546875, "learning_rate": 3.66446980947092e-05, "loss": 0.8549, "step": 4720 }, { "epoch": 0.3473718096114049, "grad_norm": 0.73046875, "learning_rate": 3.663957282103412e-05, "loss": 0.9215, "step": 4721 }, { "epoch": 0.3474453897447689, "grad_norm": 0.65234375, "learning_rate": 3.663444692268517e-05, "loss": 0.7448, "step": 4722 }, { "epoch": 0.3475189698781329, "grad_norm": 0.7734375, "learning_rate": 3.6629320399937414e-05, "loss": 0.9834, "step": 4723 }, { "epoch": 0.3475925500114969, "grad_norm": 0.734375, "learning_rate": 3.6624193253065996e-05, "loss": 1.0663, "step": 4724 }, { "epoch": 0.34766613014486086, "grad_norm": 0.79296875, "learning_rate": 3.6619065482346084e-05, "loss": 0.5999, "step": 4725 }, { "epoch": 0.34773971027822487, "grad_norm": 0.96875, "learning_rate": 3.6613937088052876e-05, "loss": 0.9717, "step": 4726 }, { "epoch": 0.34781329041158887, "grad_norm": 0.8359375, "learning_rate": 3.660880807046159e-05, "loss": 0.9176, "step": 4727 }, { "epoch": 0.3478868705449529, "grad_norm": 0.7734375, "learning_rate": 3.660367842984751e-05, "loss": 1.0512, "step": 4728 }, { "epoch": 0.3479604506783169, "grad_norm": 3.578125, "learning_rate": 3.659854816648592e-05, "loss": 1.1632, "step": 4729 }, { "epoch": 0.34803403081168083, "grad_norm": 0.9140625, "learning_rate": 3.6593417280652164e-05, "loss": 0.7006, "step": 4730 }, { "epoch": 0.34810761094504483, "grad_norm": 0.90234375, "learning_rate": 3.6588285772621586e-05, "loss": 0.9917, "step": 4731 }, { "epoch": 0.34818119107840884, "grad_norm": 0.98828125, "learning_rate": 3.658315364266961e-05, "loss": 1.096, "step": 4732 }, { "epoch": 0.34825477121177284, "grad_norm": 0.85546875, "learning_rate": 3.657802089107165e-05, "loss": 0.9812, "step": 4733 }, { "epoch": 0.3483283513451368, "grad_norm": 0.62109375, "learning_rate": 3.657288751810318e-05, "loss": 0.6361, "step": 4734 }, { "epoch": 0.3484019314785008, "grad_norm": 0.73046875, "learning_rate": 3.656775352403969e-05, "loss": 0.6014, "step": 4735 }, { "epoch": 0.3484755116118648, "grad_norm": 0.86328125, "learning_rate": 3.6562618909156726e-05, "loss": 0.8307, "step": 4736 }, { "epoch": 0.3485490917452288, "grad_norm": 0.89453125, "learning_rate": 3.6557483673729834e-05, "loss": 1.0536, "step": 4737 }, { "epoch": 0.3486226718785928, "grad_norm": 0.87890625, "learning_rate": 3.6552347818034626e-05, "loss": 1.0115, "step": 4738 }, { "epoch": 0.34869625201195675, "grad_norm": 1.0, "learning_rate": 3.654721134234673e-05, "loss": 1.311, "step": 4739 }, { "epoch": 0.34876983214532076, "grad_norm": 0.78515625, "learning_rate": 3.6542074246941816e-05, "loss": 0.9436, "step": 4740 }, { "epoch": 0.34884341227868476, "grad_norm": 0.8515625, "learning_rate": 3.6536936532095565e-05, "loss": 0.9542, "step": 4741 }, { "epoch": 0.34891699241204877, "grad_norm": 0.91015625, "learning_rate": 3.653179819808373e-05, "loss": 1.1838, "step": 4742 }, { "epoch": 0.3489905725454127, "grad_norm": 0.953125, "learning_rate": 3.652665924518206e-05, "loss": 0.938, "step": 4743 }, { "epoch": 0.3490641526787767, "grad_norm": 0.8515625, "learning_rate": 3.652151967366637e-05, "loss": 0.9013, "step": 4744 }, { "epoch": 0.3491377328121407, "grad_norm": 5.28125, "learning_rate": 3.651637948381247e-05, "loss": 1.3578, "step": 4745 }, { "epoch": 0.3492113129455047, "grad_norm": 0.80078125, "learning_rate": 3.651123867589623e-05, "loss": 0.8332, "step": 4746 }, { "epoch": 0.34928489307886873, "grad_norm": 0.984375, "learning_rate": 3.650609725019356e-05, "loss": 0.8459, "step": 4747 }, { "epoch": 0.3493584732122327, "grad_norm": 0.79296875, "learning_rate": 3.650095520698038e-05, "loss": 0.8967, "step": 4748 }, { "epoch": 0.3494320533455967, "grad_norm": 0.765625, "learning_rate": 3.6495812546532664e-05, "loss": 0.8182, "step": 4749 }, { "epoch": 0.3495056334789607, "grad_norm": 0.7890625, "learning_rate": 3.649066926912639e-05, "loss": 0.9408, "step": 4750 }, { "epoch": 0.3495792136123247, "grad_norm": 0.80859375, "learning_rate": 3.64855253750376e-05, "loss": 0.6891, "step": 4751 }, { "epoch": 0.34965279374568864, "grad_norm": 0.6953125, "learning_rate": 3.648038086454236e-05, "loss": 0.6999, "step": 4752 }, { "epoch": 0.34972637387905264, "grad_norm": 0.84375, "learning_rate": 3.647523573791677e-05, "loss": 0.8032, "step": 4753 }, { "epoch": 0.34979995401241665, "grad_norm": 0.87109375, "learning_rate": 3.647008999543694e-05, "loss": 1.1803, "step": 4754 }, { "epoch": 0.34987353414578065, "grad_norm": 1.109375, "learning_rate": 3.6464943637379053e-05, "loss": 1.5012, "step": 4755 }, { "epoch": 0.34994711427914466, "grad_norm": 0.890625, "learning_rate": 3.645979666401929e-05, "loss": 1.2367, "step": 4756 }, { "epoch": 0.3500206944125086, "grad_norm": 1.09375, "learning_rate": 3.6454649075633885e-05, "loss": 1.372, "step": 4757 }, { "epoch": 0.3500942745458726, "grad_norm": 0.7890625, "learning_rate": 3.64495008724991e-05, "loss": 0.8756, "step": 4758 }, { "epoch": 0.3501678546792366, "grad_norm": 0.98828125, "learning_rate": 3.644435205489122e-05, "loss": 1.0225, "step": 4759 }, { "epoch": 0.3502414348126006, "grad_norm": 0.7890625, "learning_rate": 3.643920262308659e-05, "loss": 0.677, "step": 4760 }, { "epoch": 0.35031501494596456, "grad_norm": 1.203125, "learning_rate": 3.6434052577361556e-05, "loss": 1.448, "step": 4761 }, { "epoch": 0.35038859507932857, "grad_norm": 0.74609375, "learning_rate": 3.642890191799252e-05, "loss": 0.6937, "step": 4762 }, { "epoch": 0.3504621752126926, "grad_norm": 0.609375, "learning_rate": 3.642375064525591e-05, "loss": 0.6267, "step": 4763 }, { "epoch": 0.3505357553460566, "grad_norm": 0.87109375, "learning_rate": 3.641859875942816e-05, "loss": 1.0058, "step": 4764 }, { "epoch": 0.3506093354794206, "grad_norm": 1.3046875, "learning_rate": 3.6413446260785797e-05, "loss": 1.1573, "step": 4765 }, { "epoch": 0.35068291561278453, "grad_norm": 0.73046875, "learning_rate": 3.640829314960532e-05, "loss": 0.7338, "step": 4766 }, { "epoch": 0.35075649574614853, "grad_norm": 0.8671875, "learning_rate": 3.64031394261633e-05, "loss": 1.1617, "step": 4767 }, { "epoch": 0.35083007587951254, "grad_norm": 0.9140625, "learning_rate": 3.6397985090736336e-05, "loss": 0.8588, "step": 4768 }, { "epoch": 0.35090365601287654, "grad_norm": 0.76953125, "learning_rate": 3.639283014360103e-05, "loss": 0.822, "step": 4769 }, { "epoch": 0.3509772361462405, "grad_norm": 0.8671875, "learning_rate": 3.638767458503405e-05, "loss": 0.8289, "step": 4770 }, { "epoch": 0.3510508162796045, "grad_norm": 4.53125, "learning_rate": 3.638251841531208e-05, "loss": 0.7602, "step": 4771 }, { "epoch": 0.3511243964129685, "grad_norm": 0.984375, "learning_rate": 3.637736163471185e-05, "loss": 1.2078, "step": 4772 }, { "epoch": 0.3511979765463325, "grad_norm": 0.94140625, "learning_rate": 3.63722042435101e-05, "loss": 0.9631, "step": 4773 }, { "epoch": 0.3512715566796965, "grad_norm": 0.6328125, "learning_rate": 3.6367046241983644e-05, "loss": 0.6661, "step": 4774 }, { "epoch": 0.35134513681306045, "grad_norm": 0.74609375, "learning_rate": 3.636188763040928e-05, "loss": 0.9251, "step": 4775 }, { "epoch": 0.35141871694642446, "grad_norm": 0.93359375, "learning_rate": 3.6356728409063856e-05, "loss": 1.3103, "step": 4776 }, { "epoch": 0.35149229707978846, "grad_norm": 0.88671875, "learning_rate": 3.6351568578224275e-05, "loss": 1.4042, "step": 4777 }, { "epoch": 0.35156587721315247, "grad_norm": 0.85546875, "learning_rate": 3.6346408138167455e-05, "loss": 0.6177, "step": 4778 }, { "epoch": 0.3516394573465164, "grad_norm": 0.7421875, "learning_rate": 3.634124708917033e-05, "loss": 0.868, "step": 4779 }, { "epoch": 0.3517130374798804, "grad_norm": 0.90234375, "learning_rate": 3.6336085431509895e-05, "loss": 0.7677, "step": 4780 }, { "epoch": 0.3517866176132444, "grad_norm": 0.7578125, "learning_rate": 3.6330923165463174e-05, "loss": 0.8498, "step": 4781 }, { "epoch": 0.3518601977466084, "grad_norm": 0.7890625, "learning_rate": 3.63257602913072e-05, "loss": 0.8257, "step": 4782 }, { "epoch": 0.35193377787997243, "grad_norm": 0.87890625, "learning_rate": 3.6320596809319064e-05, "loss": 0.7374, "step": 4783 }, { "epoch": 0.3520073580133364, "grad_norm": 1.1640625, "learning_rate": 3.6315432719775885e-05, "loss": 1.3143, "step": 4784 }, { "epoch": 0.3520809381467004, "grad_norm": 0.9296875, "learning_rate": 3.63102680229548e-05, "loss": 1.0398, "step": 4785 }, { "epoch": 0.3521545182800644, "grad_norm": 0.95703125, "learning_rate": 3.630510271913298e-05, "loss": 0.9623, "step": 4786 }, { "epoch": 0.3522280984134284, "grad_norm": 0.87109375, "learning_rate": 3.6299936808587666e-05, "loss": 0.877, "step": 4787 }, { "epoch": 0.35230167854679234, "grad_norm": 0.74609375, "learning_rate": 3.629477029159608e-05, "loss": 0.7758, "step": 4788 }, { "epoch": 0.35237525868015634, "grad_norm": 1.0234375, "learning_rate": 3.6289603168435504e-05, "loss": 1.0819, "step": 4789 }, { "epoch": 0.35244883881352035, "grad_norm": 0.7109375, "learning_rate": 3.6284435439383254e-05, "loss": 0.7601, "step": 4790 }, { "epoch": 0.35252241894688435, "grad_norm": 0.9765625, "learning_rate": 3.6279267104716664e-05, "loss": 1.6886, "step": 4791 }, { "epoch": 0.35259599908024836, "grad_norm": 0.796875, "learning_rate": 3.627409816471311e-05, "loss": 0.8755, "step": 4792 }, { "epoch": 0.3526695792136123, "grad_norm": 1.015625, "learning_rate": 3.626892861965e-05, "loss": 1.0923, "step": 4793 }, { "epoch": 0.3527431593469763, "grad_norm": 0.890625, "learning_rate": 3.626375846980477e-05, "loss": 0.8383, "step": 4794 }, { "epoch": 0.3528167394803403, "grad_norm": 0.7421875, "learning_rate": 3.625858771545492e-05, "loss": 0.8668, "step": 4795 }, { "epoch": 0.3528903196137043, "grad_norm": 0.84375, "learning_rate": 3.625341635687791e-05, "loss": 1.0451, "step": 4796 }, { "epoch": 0.35296389974706827, "grad_norm": 0.77734375, "learning_rate": 3.624824439435132e-05, "loss": 0.8534, "step": 4797 }, { "epoch": 0.35303747988043227, "grad_norm": 0.7265625, "learning_rate": 3.624307182815268e-05, "loss": 0.8749, "step": 4798 }, { "epoch": 0.3531110600137963, "grad_norm": 1.171875, "learning_rate": 3.623789865855961e-05, "loss": 0.9576, "step": 4799 }, { "epoch": 0.3531846401471603, "grad_norm": 0.921875, "learning_rate": 3.6232724885849745e-05, "loss": 0.9915, "step": 4800 }, { "epoch": 0.3532582202805243, "grad_norm": 1.1796875, "learning_rate": 3.6227550510300754e-05, "loss": 1.1721, "step": 4801 }, { "epoch": 0.35333180041388823, "grad_norm": 0.69140625, "learning_rate": 3.622237553219034e-05, "loss": 0.8471, "step": 4802 }, { "epoch": 0.35340538054725223, "grad_norm": 0.9140625, "learning_rate": 3.621719995179622e-05, "loss": 1.0099, "step": 4803 }, { "epoch": 0.35347896068061624, "grad_norm": 0.78125, "learning_rate": 3.6212023769396165e-05, "loss": 0.7313, "step": 4804 }, { "epoch": 0.35355254081398024, "grad_norm": 0.83203125, "learning_rate": 3.620684698526797e-05, "loss": 0.7327, "step": 4805 }, { "epoch": 0.3536261209473442, "grad_norm": 0.9140625, "learning_rate": 3.6201669599689465e-05, "loss": 0.7027, "step": 4806 }, { "epoch": 0.3536997010807082, "grad_norm": 0.92578125, "learning_rate": 3.619649161293851e-05, "loss": 1.1363, "step": 4807 }, { "epoch": 0.3537732812140722, "grad_norm": 0.69140625, "learning_rate": 3.6191313025292996e-05, "loss": 0.7394, "step": 4808 }, { "epoch": 0.3538468613474362, "grad_norm": 0.80859375, "learning_rate": 3.6186133837030856e-05, "loss": 0.8749, "step": 4809 }, { "epoch": 0.3539204414808002, "grad_norm": 0.82421875, "learning_rate": 3.618095404843003e-05, "loss": 0.9417, "step": 4810 }, { "epoch": 0.35399402161416416, "grad_norm": 0.8984375, "learning_rate": 3.617577365976853e-05, "loss": 0.8377, "step": 4811 }, { "epoch": 0.35406760174752816, "grad_norm": 0.79296875, "learning_rate": 3.617059267132435e-05, "loss": 0.9462, "step": 4812 }, { "epoch": 0.35414118188089216, "grad_norm": 0.91015625, "learning_rate": 3.6165411083375575e-05, "loss": 0.8838, "step": 4813 }, { "epoch": 0.35421476201425617, "grad_norm": 0.87890625, "learning_rate": 3.6160228896200265e-05, "loss": 1.2216, "step": 4814 }, { "epoch": 0.3542883421476201, "grad_norm": 0.9453125, "learning_rate": 3.6155046110076554e-05, "loss": 0.9618, "step": 4815 }, { "epoch": 0.3543619222809841, "grad_norm": 0.80859375, "learning_rate": 3.6149862725282587e-05, "loss": 0.8811, "step": 4816 }, { "epoch": 0.3544355024143481, "grad_norm": 0.7421875, "learning_rate": 3.614467874209654e-05, "loss": 1.0577, "step": 4817 }, { "epoch": 0.35450908254771213, "grad_norm": 0.734375, "learning_rate": 3.613949416079665e-05, "loss": 0.5258, "step": 4818 }, { "epoch": 0.35458266268107613, "grad_norm": 0.91796875, "learning_rate": 3.613430898166113e-05, "loss": 1.047, "step": 4819 }, { "epoch": 0.3546562428144401, "grad_norm": 0.87109375, "learning_rate": 3.612912320496829e-05, "loss": 0.9518, "step": 4820 }, { "epoch": 0.3547298229478041, "grad_norm": 0.9921875, "learning_rate": 3.612393683099642e-05, "loss": 1.0906, "step": 4821 }, { "epoch": 0.3548034030811681, "grad_norm": 0.828125, "learning_rate": 3.611874986002387e-05, "loss": 0.935, "step": 4822 }, { "epoch": 0.3548769832145321, "grad_norm": 0.66796875, "learning_rate": 3.6113562292329016e-05, "loss": 0.6102, "step": 4823 }, { "epoch": 0.35495056334789604, "grad_norm": 1.0078125, "learning_rate": 3.610837412819027e-05, "loss": 1.1, "step": 4824 }, { "epoch": 0.35502414348126005, "grad_norm": 0.96484375, "learning_rate": 3.610318536788606e-05, "loss": 1.2297, "step": 4825 }, { "epoch": 0.35509772361462405, "grad_norm": 0.734375, "learning_rate": 3.609799601169486e-05, "loss": 0.7288, "step": 4826 }, { "epoch": 0.35517130374798805, "grad_norm": 0.8515625, "learning_rate": 3.609280605989518e-05, "loss": 0.8733, "step": 4827 }, { "epoch": 0.35524488388135206, "grad_norm": 0.98046875, "learning_rate": 3.608761551276555e-05, "loss": 1.189, "step": 4828 }, { "epoch": 0.355318464014716, "grad_norm": 0.6875, "learning_rate": 3.608242437058454e-05, "loss": 0.6602, "step": 4829 }, { "epoch": 0.35539204414808, "grad_norm": 0.859375, "learning_rate": 3.607723263363074e-05, "loss": 0.7282, "step": 4830 }, { "epoch": 0.355465624281444, "grad_norm": 0.75, "learning_rate": 3.6072040302182784e-05, "loss": 0.9348, "step": 4831 }, { "epoch": 0.355539204414808, "grad_norm": 0.83984375, "learning_rate": 3.6066847376519345e-05, "loss": 1.2901, "step": 4832 }, { "epoch": 0.35561278454817197, "grad_norm": 0.98046875, "learning_rate": 3.60616538569191e-05, "loss": 1.0461, "step": 4833 }, { "epoch": 0.35568636468153597, "grad_norm": 0.75390625, "learning_rate": 3.60564597436608e-05, "loss": 0.6251, "step": 4834 }, { "epoch": 0.3557599448149, "grad_norm": 0.87109375, "learning_rate": 3.6051265037023176e-05, "loss": 0.9186, "step": 4835 }, { "epoch": 0.355833524948264, "grad_norm": 0.83203125, "learning_rate": 3.604606973728504e-05, "loss": 0.984, "step": 4836 }, { "epoch": 0.355907105081628, "grad_norm": 1.046875, "learning_rate": 3.604087384472519e-05, "loss": 1.1697, "step": 4837 }, { "epoch": 0.35598068521499193, "grad_norm": 0.7890625, "learning_rate": 3.603567735962251e-05, "loss": 0.9941, "step": 4838 }, { "epoch": 0.35605426534835594, "grad_norm": 0.80078125, "learning_rate": 3.603048028225585e-05, "loss": 1.1957, "step": 4839 }, { "epoch": 0.35612784548171994, "grad_norm": 0.92578125, "learning_rate": 3.6025282612904157e-05, "loss": 1.4154, "step": 4840 }, { "epoch": 0.35620142561508394, "grad_norm": 2.328125, "learning_rate": 3.602008435184638e-05, "loss": 1.1748, "step": 4841 }, { "epoch": 0.3562750057484479, "grad_norm": 0.76953125, "learning_rate": 3.6014885499361476e-05, "loss": 1.0554, "step": 4842 }, { "epoch": 0.3563485858818119, "grad_norm": 0.796875, "learning_rate": 3.600968605572848e-05, "loss": 0.9385, "step": 4843 }, { "epoch": 0.3564221660151759, "grad_norm": 0.7578125, "learning_rate": 3.600448602122643e-05, "loss": 0.729, "step": 4844 }, { "epoch": 0.3564957461485399, "grad_norm": 0.796875, "learning_rate": 3.599928539613439e-05, "loss": 0.7915, "step": 4845 }, { "epoch": 0.3565693262819039, "grad_norm": 1.015625, "learning_rate": 3.599408418073147e-05, "loss": 0.8664, "step": 4846 }, { "epoch": 0.35664290641526786, "grad_norm": 0.796875, "learning_rate": 3.598888237529684e-05, "loss": 0.7553, "step": 4847 }, { "epoch": 0.35671648654863186, "grad_norm": 0.859375, "learning_rate": 3.598367998010963e-05, "loss": 0.9498, "step": 4848 }, { "epoch": 0.35679006668199587, "grad_norm": 0.9296875, "learning_rate": 3.5978476995449066e-05, "loss": 0.9862, "step": 4849 }, { "epoch": 0.35686364681535987, "grad_norm": 0.83203125, "learning_rate": 3.597327342159438e-05, "loss": 0.981, "step": 4850 }, { "epoch": 0.3569372269487238, "grad_norm": 0.828125, "learning_rate": 3.596806925882483e-05, "loss": 0.9306, "step": 4851 }, { "epoch": 0.3570108070820878, "grad_norm": 0.86328125, "learning_rate": 3.596286450741973e-05, "loss": 0.6954, "step": 4852 }, { "epoch": 0.3570843872154518, "grad_norm": 0.73828125, "learning_rate": 3.595765916765838e-05, "loss": 0.77, "step": 4853 }, { "epoch": 0.35715796734881583, "grad_norm": 1.0625, "learning_rate": 3.595245323982017e-05, "loss": 0.8674, "step": 4854 }, { "epoch": 0.35723154748217983, "grad_norm": 0.828125, "learning_rate": 3.594724672418448e-05, "loss": 1.17, "step": 4855 }, { "epoch": 0.3573051276155438, "grad_norm": 1.234375, "learning_rate": 3.594203962103073e-05, "loss": 0.8265, "step": 4856 }, { "epoch": 0.3573787077489078, "grad_norm": 0.8984375, "learning_rate": 3.5936831930638395e-05, "loss": 0.9776, "step": 4857 }, { "epoch": 0.3574522878822718, "grad_norm": 0.98828125, "learning_rate": 3.593162365328693e-05, "loss": 1.1234, "step": 4858 }, { "epoch": 0.3575258680156358, "grad_norm": 0.828125, "learning_rate": 3.5926414789255875e-05, "loss": 0.9733, "step": 4859 }, { "epoch": 0.35759944814899974, "grad_norm": 0.9453125, "learning_rate": 3.592120533882477e-05, "loss": 1.1785, "step": 4860 }, { "epoch": 0.35767302828236375, "grad_norm": 0.83203125, "learning_rate": 3.5915995302273214e-05, "loss": 1.1252, "step": 4861 }, { "epoch": 0.35774660841572775, "grad_norm": 1.046875, "learning_rate": 3.5910784679880805e-05, "loss": 1.2581, "step": 4862 }, { "epoch": 0.35782018854909176, "grad_norm": 0.9765625, "learning_rate": 3.590557347192719e-05, "loss": 1.158, "step": 4863 }, { "epoch": 0.35789376868245576, "grad_norm": 0.78125, "learning_rate": 3.590036167869203e-05, "loss": 0.921, "step": 4864 }, { "epoch": 0.3579673488158197, "grad_norm": 0.83203125, "learning_rate": 3.5895149300455063e-05, "loss": 0.9467, "step": 4865 }, { "epoch": 0.3580409289491837, "grad_norm": 1.0546875, "learning_rate": 3.5889936337496e-05, "loss": 1.5529, "step": 4866 }, { "epoch": 0.3581145090825477, "grad_norm": 0.84375, "learning_rate": 3.5884722790094626e-05, "loss": 1.187, "step": 4867 }, { "epoch": 0.3581880892159117, "grad_norm": 0.87890625, "learning_rate": 3.587950865853075e-05, "loss": 1.0382, "step": 4868 }, { "epoch": 0.35826166934927567, "grad_norm": 1.09375, "learning_rate": 3.587429394308418e-05, "loss": 1.2916, "step": 4869 }, { "epoch": 0.3583352494826397, "grad_norm": 0.90625, "learning_rate": 3.586907864403479e-05, "loss": 0.7197, "step": 4870 }, { "epoch": 0.3584088296160037, "grad_norm": 0.88671875, "learning_rate": 3.5863862761662485e-05, "loss": 0.9155, "step": 4871 }, { "epoch": 0.3584824097493677, "grad_norm": 0.8515625, "learning_rate": 3.5858646296247186e-05, "loss": 1.0277, "step": 4872 }, { "epoch": 0.3585559898827317, "grad_norm": 0.8359375, "learning_rate": 3.585342924806884e-05, "loss": 1.0115, "step": 4873 }, { "epoch": 0.35862957001609563, "grad_norm": 0.78125, "learning_rate": 3.584821161740745e-05, "loss": 0.9582, "step": 4874 }, { "epoch": 0.35870315014945964, "grad_norm": 0.875, "learning_rate": 3.5842993404543034e-05, "loss": 1.0438, "step": 4875 }, { "epoch": 0.35877673028282364, "grad_norm": 0.75390625, "learning_rate": 3.583777460975564e-05, "loss": 0.9466, "step": 4876 }, { "epoch": 0.35885031041618765, "grad_norm": 1.0234375, "learning_rate": 3.583255523332536e-05, "loss": 1.8376, "step": 4877 }, { "epoch": 0.3589238905495516, "grad_norm": 0.96484375, "learning_rate": 3.5827335275532293e-05, "loss": 1.1748, "step": 4878 }, { "epoch": 0.3589974706829156, "grad_norm": 0.859375, "learning_rate": 3.582211473665659e-05, "loss": 0.6493, "step": 4879 }, { "epoch": 0.3590710508162796, "grad_norm": 0.76171875, "learning_rate": 3.581689361697843e-05, "loss": 1.1134, "step": 4880 }, { "epoch": 0.3591446309496436, "grad_norm": 0.91015625, "learning_rate": 3.581167191677802e-05, "loss": 0.9733, "step": 4881 }, { "epoch": 0.3592182110830076, "grad_norm": 0.80859375, "learning_rate": 3.5806449636335606e-05, "loss": 0.8222, "step": 4882 }, { "epoch": 0.35929179121637156, "grad_norm": 0.94140625, "learning_rate": 3.580122677593144e-05, "loss": 1.1154, "step": 4883 }, { "epoch": 0.35936537134973556, "grad_norm": 0.84375, "learning_rate": 3.579600333584584e-05, "loss": 0.85, "step": 4884 }, { "epoch": 0.35943895148309957, "grad_norm": 0.96484375, "learning_rate": 3.579077931635913e-05, "loss": 0.74, "step": 4885 }, { "epoch": 0.35951253161646357, "grad_norm": 1.0234375, "learning_rate": 3.578555471775167e-05, "loss": 0.8637, "step": 4886 }, { "epoch": 0.3595861117498275, "grad_norm": 0.89453125, "learning_rate": 3.5780329540303865e-05, "loss": 0.859, "step": 4887 }, { "epoch": 0.3596596918831915, "grad_norm": 0.8125, "learning_rate": 3.5775103784296135e-05, "loss": 0.8033, "step": 4888 }, { "epoch": 0.3597332720165555, "grad_norm": 0.81640625, "learning_rate": 3.5769877450008935e-05, "loss": 0.6638, "step": 4889 }, { "epoch": 0.35980685214991953, "grad_norm": 1.015625, "learning_rate": 3.576465053772275e-05, "loss": 1.4388, "step": 4890 }, { "epoch": 0.35988043228328354, "grad_norm": 0.90234375, "learning_rate": 3.575942304771811e-05, "loss": 1.4897, "step": 4891 }, { "epoch": 0.3599540124166475, "grad_norm": 0.90234375, "learning_rate": 3.5754194980275554e-05, "loss": 1.3071, "step": 4892 }, { "epoch": 0.3600275925500115, "grad_norm": 0.8359375, "learning_rate": 3.574896633567566e-05, "loss": 0.7576, "step": 4893 }, { "epoch": 0.3601011726833755, "grad_norm": 1.0390625, "learning_rate": 3.5743737114199045e-05, "loss": 1.513, "step": 4894 }, { "epoch": 0.3601747528167395, "grad_norm": 0.73046875, "learning_rate": 3.573850731612636e-05, "loss": 0.8965, "step": 4895 }, { "epoch": 0.36024833295010344, "grad_norm": 0.7890625, "learning_rate": 3.573327694173826e-05, "loss": 0.7815, "step": 4896 }, { "epoch": 0.36032191308346745, "grad_norm": 0.8984375, "learning_rate": 3.572804599131546e-05, "loss": 0.9349, "step": 4897 }, { "epoch": 0.36039549321683145, "grad_norm": 0.90234375, "learning_rate": 3.57228144651387e-05, "loss": 0.8704, "step": 4898 }, { "epoch": 0.36046907335019546, "grad_norm": 0.9140625, "learning_rate": 3.5717582363488735e-05, "loss": 1.1795, "step": 4899 }, { "epoch": 0.36054265348355946, "grad_norm": 0.83984375, "learning_rate": 3.5712349686646365e-05, "loss": 0.9342, "step": 4900 }, { "epoch": 0.3606162336169234, "grad_norm": 1.0, "learning_rate": 3.5707116434892424e-05, "loss": 1.1258, "step": 4901 }, { "epoch": 0.3606898137502874, "grad_norm": 0.8515625, "learning_rate": 3.570188260850777e-05, "loss": 0.8858, "step": 4902 }, { "epoch": 0.3607633938836514, "grad_norm": 0.8046875, "learning_rate": 3.569664820777329e-05, "loss": 0.8702, "step": 4903 }, { "epoch": 0.3608369740170154, "grad_norm": 1.015625, "learning_rate": 3.56914132329699e-05, "loss": 1.3725, "step": 4904 }, { "epoch": 0.36091055415037937, "grad_norm": 0.734375, "learning_rate": 3.568617768437855e-05, "loss": 0.6883, "step": 4905 }, { "epoch": 0.3609841342837434, "grad_norm": 0.79296875, "learning_rate": 3.5680941562280235e-05, "loss": 0.8077, "step": 4906 }, { "epoch": 0.3610577144171074, "grad_norm": 1.0078125, "learning_rate": 3.5675704866955954e-05, "loss": 1.11, "step": 4907 }, { "epoch": 0.3611312945504714, "grad_norm": 0.98828125, "learning_rate": 3.5670467598686756e-05, "loss": 1.0383, "step": 4908 }, { "epoch": 0.3612048746838354, "grad_norm": 0.7578125, "learning_rate": 3.566522975775373e-05, "loss": 0.7719, "step": 4909 }, { "epoch": 0.36127845481719933, "grad_norm": 0.90625, "learning_rate": 3.565999134443795e-05, "loss": 1.0769, "step": 4910 }, { "epoch": 0.36135203495056334, "grad_norm": 1.0390625, "learning_rate": 3.565475235902058e-05, "loss": 1.0033, "step": 4911 }, { "epoch": 0.36142561508392734, "grad_norm": 0.94921875, "learning_rate": 3.5649512801782764e-05, "loss": 1.0606, "step": 4912 }, { "epoch": 0.36149919521729135, "grad_norm": 0.91796875, "learning_rate": 3.5644272673005715e-05, "loss": 0.9322, "step": 4913 }, { "epoch": 0.3615727753506553, "grad_norm": 0.87890625, "learning_rate": 3.5639031972970646e-05, "loss": 0.9531, "step": 4914 }, { "epoch": 0.3616463554840193, "grad_norm": 0.59765625, "learning_rate": 3.563379070195883e-05, "loss": 0.8194, "step": 4915 }, { "epoch": 0.3617199356173833, "grad_norm": 0.8828125, "learning_rate": 3.5628548860251565e-05, "loss": 0.8207, "step": 4916 }, { "epoch": 0.3617935157507473, "grad_norm": 0.828125, "learning_rate": 3.562330644813015e-05, "loss": 1.0512, "step": 4917 }, { "epoch": 0.3618670958841113, "grad_norm": 0.86328125, "learning_rate": 3.561806346587594e-05, "loss": 0.8682, "step": 4918 }, { "epoch": 0.36194067601747526, "grad_norm": 0.91796875, "learning_rate": 3.5612819913770325e-05, "loss": 0.864, "step": 4919 }, { "epoch": 0.36201425615083926, "grad_norm": 0.81640625, "learning_rate": 3.56075757920947e-05, "loss": 0.788, "step": 4920 }, { "epoch": 0.36208783628420327, "grad_norm": 1.4765625, "learning_rate": 3.560233110113052e-05, "loss": 1.4926, "step": 4921 }, { "epoch": 0.3621614164175673, "grad_norm": 0.78125, "learning_rate": 3.559708584115925e-05, "loss": 0.6587, "step": 4922 }, { "epoch": 0.3622349965509312, "grad_norm": 1.0625, "learning_rate": 3.559184001246241e-05, "loss": 0.9058, "step": 4923 }, { "epoch": 0.3623085766842952, "grad_norm": 0.8046875, "learning_rate": 3.558659361532152e-05, "loss": 0.9397, "step": 4924 }, { "epoch": 0.36238215681765923, "grad_norm": 0.86328125, "learning_rate": 3.558134665001814e-05, "loss": 1.0313, "step": 4925 }, { "epoch": 0.36245573695102323, "grad_norm": 0.88671875, "learning_rate": 3.5576099116833874e-05, "loss": 0.9287, "step": 4926 }, { "epoch": 0.36252931708438724, "grad_norm": 0.91015625, "learning_rate": 3.557085101605034e-05, "loss": 0.8493, "step": 4927 }, { "epoch": 0.3626028972177512, "grad_norm": 0.72265625, "learning_rate": 3.55656023479492e-05, "loss": 0.6525, "step": 4928 }, { "epoch": 0.3626764773511152, "grad_norm": 0.91015625, "learning_rate": 3.556035311281213e-05, "loss": 1.2842, "step": 4929 }, { "epoch": 0.3627500574844792, "grad_norm": 0.8125, "learning_rate": 3.555510331092087e-05, "loss": 1.1034, "step": 4930 }, { "epoch": 0.3628236376178432, "grad_norm": 1.6171875, "learning_rate": 3.554985294255714e-05, "loss": 1.0617, "step": 4931 }, { "epoch": 0.3628972177512072, "grad_norm": 0.984375, "learning_rate": 3.554460200800273e-05, "loss": 0.876, "step": 4932 }, { "epoch": 0.36297079788457115, "grad_norm": 0.66796875, "learning_rate": 3.553935050753945e-05, "loss": 0.7704, "step": 4933 }, { "epoch": 0.36304437801793515, "grad_norm": 0.76953125, "learning_rate": 3.553409844144913e-05, "loss": 0.6025, "step": 4934 }, { "epoch": 0.36311795815129916, "grad_norm": 1.171875, "learning_rate": 3.552884581001364e-05, "loss": 0.7969, "step": 4935 }, { "epoch": 0.36319153828466316, "grad_norm": 0.7421875, "learning_rate": 3.5523592613514886e-05, "loss": 0.7456, "step": 4936 }, { "epoch": 0.3632651184180271, "grad_norm": 0.98046875, "learning_rate": 3.551833885223479e-05, "loss": 1.182, "step": 4937 }, { "epoch": 0.3633386985513911, "grad_norm": 0.94140625, "learning_rate": 3.551308452645532e-05, "loss": 1.0166, "step": 4938 }, { "epoch": 0.3634122786847551, "grad_norm": 1.0, "learning_rate": 3.5507829636458454e-05, "loss": 1.0311, "step": 4939 }, { "epoch": 0.3634858588181191, "grad_norm": 0.84375, "learning_rate": 3.550257418252622e-05, "loss": 0.6928, "step": 4940 }, { "epoch": 0.3635594389514831, "grad_norm": 0.83984375, "learning_rate": 3.5497318164940665e-05, "loss": 1.1217, "step": 4941 }, { "epoch": 0.3636330190848471, "grad_norm": 0.93359375, "learning_rate": 3.549206158398387e-05, "loss": 1.3708, "step": 4942 }, { "epoch": 0.3637065992182111, "grad_norm": 0.8515625, "learning_rate": 3.5486804439937954e-05, "loss": 0.8204, "step": 4943 }, { "epoch": 0.3637801793515751, "grad_norm": 0.74609375, "learning_rate": 3.548154673308504e-05, "loss": 0.6268, "step": 4944 }, { "epoch": 0.3638537594849391, "grad_norm": 1.21875, "learning_rate": 3.547628846370731e-05, "loss": 1.4384, "step": 4945 }, { "epoch": 0.36392733961830304, "grad_norm": 0.73046875, "learning_rate": 3.547102963208698e-05, "loss": 0.9204, "step": 4946 }, { "epoch": 0.36400091975166704, "grad_norm": 0.7265625, "learning_rate": 3.546577023850625e-05, "loss": 0.8621, "step": 4947 }, { "epoch": 0.36407449988503104, "grad_norm": 0.78515625, "learning_rate": 3.546051028324741e-05, "loss": 0.9103, "step": 4948 }, { "epoch": 0.36414808001839505, "grad_norm": 0.71484375, "learning_rate": 3.545524976659273e-05, "loss": 0.9211, "step": 4949 }, { "epoch": 0.36422166015175905, "grad_norm": 1.1953125, "learning_rate": 3.544998868882455e-05, "loss": 1.1029, "step": 4950 }, { "epoch": 0.364295240285123, "grad_norm": 0.83203125, "learning_rate": 3.544472705022521e-05, "loss": 0.831, "step": 4951 }, { "epoch": 0.364368820418487, "grad_norm": 0.734375, "learning_rate": 3.5439464851077096e-05, "loss": 0.7651, "step": 4952 }, { "epoch": 0.364442400551851, "grad_norm": 0.79296875, "learning_rate": 3.5434202091662625e-05, "loss": 1.0167, "step": 4953 }, { "epoch": 0.364515980685215, "grad_norm": 0.9140625, "learning_rate": 3.542893877226423e-05, "loss": 1.185, "step": 4954 }, { "epoch": 0.36458956081857896, "grad_norm": 0.82421875, "learning_rate": 3.542367489316439e-05, "loss": 0.7352, "step": 4955 }, { "epoch": 0.36466314095194297, "grad_norm": 0.8515625, "learning_rate": 3.5418410454645606e-05, "loss": 0.7912, "step": 4956 }, { "epoch": 0.36473672108530697, "grad_norm": 0.98828125, "learning_rate": 3.5413145456990414e-05, "loss": 0.9649, "step": 4957 }, { "epoch": 0.364810301218671, "grad_norm": 0.7421875, "learning_rate": 3.540787990048137e-05, "loss": 0.7054, "step": 4958 }, { "epoch": 0.364883881352035, "grad_norm": 0.89453125, "learning_rate": 3.5402613785401076e-05, "loss": 0.8387, "step": 4959 }, { "epoch": 0.3649574614853989, "grad_norm": 0.8203125, "learning_rate": 3.539734711203214e-05, "loss": 0.9519, "step": 4960 }, { "epoch": 0.36503104161876293, "grad_norm": 0.8515625, "learning_rate": 3.539207988065722e-05, "loss": 0.8418, "step": 4961 }, { "epoch": 0.36510462175212693, "grad_norm": 0.7734375, "learning_rate": 3.5386812091559e-05, "loss": 0.8378, "step": 4962 }, { "epoch": 0.36517820188549094, "grad_norm": 0.828125, "learning_rate": 3.53815437450202e-05, "loss": 1.0, "step": 4963 }, { "epoch": 0.3652517820188549, "grad_norm": 1.125, "learning_rate": 3.5376274841323557e-05, "loss": 1.3167, "step": 4964 }, { "epoch": 0.3653253621522189, "grad_norm": 0.87890625, "learning_rate": 3.537100538075183e-05, "loss": 1.0696, "step": 4965 }, { "epoch": 0.3653989422855829, "grad_norm": 1.109375, "learning_rate": 3.536573536358785e-05, "loss": 1.2404, "step": 4966 }, { "epoch": 0.3654725224189469, "grad_norm": 0.98046875, "learning_rate": 3.5360464790114416e-05, "loss": 0.9548, "step": 4967 }, { "epoch": 0.3655461025523109, "grad_norm": 0.8359375, "learning_rate": 3.535519366061441e-05, "loss": 1.042, "step": 4968 }, { "epoch": 0.36561968268567485, "grad_norm": 1.0, "learning_rate": 3.534992197537071e-05, "loss": 1.0249, "step": 4969 }, { "epoch": 0.36569326281903886, "grad_norm": 0.78515625, "learning_rate": 3.5344649734666256e-05, "loss": 0.7701, "step": 4970 }, { "epoch": 0.36576684295240286, "grad_norm": 1.09375, "learning_rate": 3.533937693878399e-05, "loss": 0.9024, "step": 4971 }, { "epoch": 0.36584042308576686, "grad_norm": 0.95703125, "learning_rate": 3.533410358800689e-05, "loss": 1.2592, "step": 4972 }, { "epoch": 0.3659140032191308, "grad_norm": 0.70703125, "learning_rate": 3.532882968261797e-05, "loss": 0.8673, "step": 4973 }, { "epoch": 0.3659875833524948, "grad_norm": 1.2578125, "learning_rate": 3.532355522290026e-05, "loss": 1.1793, "step": 4974 }, { "epoch": 0.3660611634858588, "grad_norm": 0.88671875, "learning_rate": 3.531828020913685e-05, "loss": 0.9704, "step": 4975 }, { "epoch": 0.3661347436192228, "grad_norm": 0.9296875, "learning_rate": 3.5313004641610825e-05, "loss": 0.7921, "step": 4976 }, { "epoch": 0.36620832375258683, "grad_norm": 0.66015625, "learning_rate": 3.530772852060532e-05, "loss": 0.616, "step": 4977 }, { "epoch": 0.3662819038859508, "grad_norm": 0.86328125, "learning_rate": 3.5302451846403496e-05, "loss": 1.0702, "step": 4978 }, { "epoch": 0.3663554840193148, "grad_norm": 0.8203125, "learning_rate": 3.5297174619288536e-05, "loss": 0.9164, "step": 4979 }, { "epoch": 0.3664290641526788, "grad_norm": 0.78515625, "learning_rate": 3.529189683954367e-05, "loss": 0.931, "step": 4980 }, { "epoch": 0.3665026442860428, "grad_norm": 1.1171875, "learning_rate": 3.528661850745213e-05, "loss": 0.9794, "step": 4981 }, { "epoch": 0.36657622441940674, "grad_norm": 0.890625, "learning_rate": 3.528133962329721e-05, "loss": 0.8532, "step": 4982 }, { "epoch": 0.36664980455277074, "grad_norm": 0.921875, "learning_rate": 3.52760601873622e-05, "loss": 1.0545, "step": 4983 }, { "epoch": 0.36672338468613475, "grad_norm": 0.92578125, "learning_rate": 3.5270780199930465e-05, "loss": 1.0776, "step": 4984 }, { "epoch": 0.36679696481949875, "grad_norm": 0.89453125, "learning_rate": 3.526549966128535e-05, "loss": 1.0756, "step": 4985 }, { "epoch": 0.36687054495286275, "grad_norm": 0.85546875, "learning_rate": 3.526021857171025e-05, "loss": 1.2265, "step": 4986 }, { "epoch": 0.3669441250862267, "grad_norm": 0.8984375, "learning_rate": 3.525493693148861e-05, "loss": 1.0376, "step": 4987 }, { "epoch": 0.3670177052195907, "grad_norm": 0.82421875, "learning_rate": 3.524965474090386e-05, "loss": 0.8881, "step": 4988 }, { "epoch": 0.3670912853529547, "grad_norm": 0.7890625, "learning_rate": 3.524437200023951e-05, "loss": 0.7254, "step": 4989 }, { "epoch": 0.3671648654863187, "grad_norm": 0.86328125, "learning_rate": 3.523908870977906e-05, "loss": 0.8913, "step": 4990 }, { "epoch": 0.36723844561968266, "grad_norm": 0.78515625, "learning_rate": 3.523380486980605e-05, "loss": 0.9651, "step": 4991 }, { "epoch": 0.36731202575304667, "grad_norm": 0.828125, "learning_rate": 3.522852048060407e-05, "loss": 0.7655, "step": 4992 }, { "epoch": 0.36738560588641067, "grad_norm": 0.89453125, "learning_rate": 3.522323554245671e-05, "loss": 1.1908, "step": 4993 }, { "epoch": 0.3674591860197747, "grad_norm": 0.99609375, "learning_rate": 3.5217950055647616e-05, "loss": 1.0507, "step": 4994 }, { "epoch": 0.3675327661531387, "grad_norm": 0.859375, "learning_rate": 3.521266402046044e-05, "loss": 0.756, "step": 4995 }, { "epoch": 0.3676063462865026, "grad_norm": 0.73046875, "learning_rate": 3.520737743717886e-05, "loss": 0.7442, "step": 4996 }, { "epoch": 0.36767992641986663, "grad_norm": 0.9609375, "learning_rate": 3.520209030608662e-05, "loss": 0.8751, "step": 4997 }, { "epoch": 0.36775350655323064, "grad_norm": 0.96484375, "learning_rate": 3.519680262746747e-05, "loss": 1.0225, "step": 4998 }, { "epoch": 0.36782708668659464, "grad_norm": 0.9140625, "learning_rate": 3.5191514401605176e-05, "loss": 0.9934, "step": 4999 }, { "epoch": 0.3679006668199586, "grad_norm": 0.84765625, "learning_rate": 3.518622562878356e-05, "loss": 1.2016, "step": 5000 }, { "epoch": 0.3679742469533226, "grad_norm": 0.86328125, "learning_rate": 3.518093630928644e-05, "loss": 0.695, "step": 5001 }, { "epoch": 0.3680478270866866, "grad_norm": 0.76953125, "learning_rate": 3.51756464433977e-05, "loss": 1.1051, "step": 5002 }, { "epoch": 0.3681214072200506, "grad_norm": 0.76171875, "learning_rate": 3.517035603140125e-05, "loss": 0.8496, "step": 5003 }, { "epoch": 0.3681949873534146, "grad_norm": 0.7578125, "learning_rate": 3.516506507358099e-05, "loss": 0.8501, "step": 5004 }, { "epoch": 0.36826856748677855, "grad_norm": 0.73828125, "learning_rate": 3.515977357022089e-05, "loss": 0.8302, "step": 5005 }, { "epoch": 0.36834214762014256, "grad_norm": 0.828125, "learning_rate": 3.515448152160492e-05, "loss": 0.8431, "step": 5006 }, { "epoch": 0.36841572775350656, "grad_norm": 0.9375, "learning_rate": 3.514918892801712e-05, "loss": 1.1764, "step": 5007 }, { "epoch": 0.36848930788687057, "grad_norm": 0.83203125, "learning_rate": 3.5143895789741516e-05, "loss": 1.0617, "step": 5008 }, { "epoch": 0.3685628880202345, "grad_norm": 0.9296875, "learning_rate": 3.5138602107062174e-05, "loss": 0.7635, "step": 5009 }, { "epoch": 0.3686364681535985, "grad_norm": 0.9140625, "learning_rate": 3.513330788026322e-05, "loss": 0.8319, "step": 5010 }, { "epoch": 0.3687100482869625, "grad_norm": 1.1484375, "learning_rate": 3.512801310962876e-05, "loss": 0.9136, "step": 5011 }, { "epoch": 0.3687836284203265, "grad_norm": 0.859375, "learning_rate": 3.512271779544297e-05, "loss": 0.8254, "step": 5012 }, { "epoch": 0.36885720855369053, "grad_norm": 0.99609375, "learning_rate": 3.5117421937990036e-05, "loss": 1.3523, "step": 5013 }, { "epoch": 0.3689307886870545, "grad_norm": 0.9765625, "learning_rate": 3.511212553755418e-05, "loss": 1.2158, "step": 5014 }, { "epoch": 0.3690043688204185, "grad_norm": 0.76171875, "learning_rate": 3.510682859441964e-05, "loss": 1.1216, "step": 5015 }, { "epoch": 0.3690779489537825, "grad_norm": 0.90625, "learning_rate": 3.51015311088707e-05, "loss": 1.0272, "step": 5016 }, { "epoch": 0.3691515290871465, "grad_norm": 1.078125, "learning_rate": 3.509623308119166e-05, "loss": 1.136, "step": 5017 }, { "epoch": 0.36922510922051044, "grad_norm": 0.94140625, "learning_rate": 3.509093451166686e-05, "loss": 1.0315, "step": 5018 }, { "epoch": 0.36929868935387444, "grad_norm": 1.1640625, "learning_rate": 3.5085635400580675e-05, "loss": 1.2436, "step": 5019 }, { "epoch": 0.36937226948723845, "grad_norm": 0.91015625, "learning_rate": 3.508033574821747e-05, "loss": 1.3433, "step": 5020 }, { "epoch": 0.36944584962060245, "grad_norm": 0.64453125, "learning_rate": 3.50750355548617e-05, "loss": 0.6279, "step": 5021 }, { "epoch": 0.36951942975396646, "grad_norm": 0.76171875, "learning_rate": 3.506973482079779e-05, "loss": 0.6971, "step": 5022 }, { "epoch": 0.3695930098873304, "grad_norm": 0.78515625, "learning_rate": 3.5064433546310236e-05, "loss": 0.6801, "step": 5023 }, { "epoch": 0.3696665900206944, "grad_norm": 0.87890625, "learning_rate": 3.505913173168354e-05, "loss": 0.8268, "step": 5024 }, { "epoch": 0.3697401701540584, "grad_norm": 0.88671875, "learning_rate": 3.505382937720225e-05, "loss": 0.9737, "step": 5025 }, { "epoch": 0.3698137502874224, "grad_norm": 0.7734375, "learning_rate": 3.504852648315092e-05, "loss": 0.8916, "step": 5026 }, { "epoch": 0.36988733042078636, "grad_norm": 0.74609375, "learning_rate": 3.5043223049814156e-05, "loss": 0.9102, "step": 5027 }, { "epoch": 0.36996091055415037, "grad_norm": 0.83984375, "learning_rate": 3.5037919077476576e-05, "loss": 1.4119, "step": 5028 }, { "epoch": 0.3700344906875144, "grad_norm": 0.73046875, "learning_rate": 3.503261456642284e-05, "loss": 0.8671, "step": 5029 }, { "epoch": 0.3701080708208784, "grad_norm": 0.83203125, "learning_rate": 3.502730951693763e-05, "loss": 1.2578, "step": 5030 }, { "epoch": 0.3701816509542424, "grad_norm": 0.875, "learning_rate": 3.502200392930565e-05, "loss": 0.7428, "step": 5031 }, { "epoch": 0.37025523108760633, "grad_norm": 0.7890625, "learning_rate": 3.501669780381166e-05, "loss": 0.8569, "step": 5032 }, { "epoch": 0.37032881122097033, "grad_norm": 0.96875, "learning_rate": 3.501139114074041e-05, "loss": 0.7413, "step": 5033 }, { "epoch": 0.37040239135433434, "grad_norm": 0.77734375, "learning_rate": 3.5006083940376714e-05, "loss": 0.9027, "step": 5034 }, { "epoch": 0.37047597148769834, "grad_norm": 1.1796875, "learning_rate": 3.5000776203005384e-05, "loss": 1.1972, "step": 5035 }, { "epoch": 0.3705495516210623, "grad_norm": 0.66015625, "learning_rate": 3.499546792891128e-05, "loss": 0.7057, "step": 5036 }, { "epoch": 0.3706231317544263, "grad_norm": 0.796875, "learning_rate": 3.49901591183793e-05, "loss": 0.7118, "step": 5037 }, { "epoch": 0.3706967118877903, "grad_norm": 0.8203125, "learning_rate": 3.4984849771694345e-05, "loss": 0.8855, "step": 5038 }, { "epoch": 0.3707702920211543, "grad_norm": 0.9765625, "learning_rate": 3.497953988914137e-05, "loss": 0.9269, "step": 5039 }, { "epoch": 0.3708438721545183, "grad_norm": 0.9296875, "learning_rate": 3.497422947100533e-05, "loss": 1.2386, "step": 5040 }, { "epoch": 0.37091745228788225, "grad_norm": 0.66796875, "learning_rate": 3.496891851757123e-05, "loss": 0.5296, "step": 5041 }, { "epoch": 0.37099103242124626, "grad_norm": 0.76953125, "learning_rate": 3.4963607029124094e-05, "loss": 0.9789, "step": 5042 }, { "epoch": 0.37106461255461026, "grad_norm": 0.69140625, "learning_rate": 3.4958295005949e-05, "loss": 0.7716, "step": 5043 }, { "epoch": 0.37113819268797427, "grad_norm": 0.8984375, "learning_rate": 3.495298244833102e-05, "loss": 0.9389, "step": 5044 }, { "epoch": 0.3712117728213382, "grad_norm": 0.64453125, "learning_rate": 3.494766935655527e-05, "loss": 0.5788, "step": 5045 }, { "epoch": 0.3712853529547022, "grad_norm": 1.1328125, "learning_rate": 3.49423557309069e-05, "loss": 0.6275, "step": 5046 }, { "epoch": 0.3713589330880662, "grad_norm": 0.859375, "learning_rate": 3.493704157167107e-05, "loss": 0.8667, "step": 5047 }, { "epoch": 0.3714325132214302, "grad_norm": 0.9296875, "learning_rate": 3.493172687913299e-05, "loss": 0.875, "step": 5048 }, { "epoch": 0.37150609335479423, "grad_norm": 0.89453125, "learning_rate": 3.492641165357788e-05, "loss": 0.8389, "step": 5049 }, { "epoch": 0.3715796734881582, "grad_norm": 0.91015625, "learning_rate": 3.492109589529101e-05, "loss": 1.293, "step": 5050 }, { "epoch": 0.3716532536215222, "grad_norm": 0.93359375, "learning_rate": 3.491577960455766e-05, "loss": 0.9775, "step": 5051 }, { "epoch": 0.3717268337548862, "grad_norm": 0.6640625, "learning_rate": 3.4910462781663146e-05, "loss": 0.8613, "step": 5052 }, { "epoch": 0.3718004138882502, "grad_norm": 0.98828125, "learning_rate": 3.4905145426892824e-05, "loss": 1.4802, "step": 5053 }, { "epoch": 0.37187399402161414, "grad_norm": 0.7734375, "learning_rate": 3.489982754053204e-05, "loss": 0.7911, "step": 5054 }, { "epoch": 0.37194757415497814, "grad_norm": 0.65234375, "learning_rate": 3.4894509122866216e-05, "loss": 0.7443, "step": 5055 }, { "epoch": 0.37202115428834215, "grad_norm": 0.7421875, "learning_rate": 3.4889190174180776e-05, "loss": 0.6426, "step": 5056 }, { "epoch": 0.37209473442170615, "grad_norm": 1.0, "learning_rate": 3.4883870694761175e-05, "loss": 1.1715, "step": 5057 }, { "epoch": 0.37216831455507016, "grad_norm": 0.7734375, "learning_rate": 3.48785506848929e-05, "loss": 0.7959, "step": 5058 }, { "epoch": 0.3722418946884341, "grad_norm": 0.9140625, "learning_rate": 3.487323014486147e-05, "loss": 0.7008, "step": 5059 }, { "epoch": 0.3723154748217981, "grad_norm": 0.71875, "learning_rate": 3.486790907495243e-05, "loss": 0.5899, "step": 5060 }, { "epoch": 0.3723890549551621, "grad_norm": 0.73828125, "learning_rate": 3.486258747545135e-05, "loss": 0.7655, "step": 5061 }, { "epoch": 0.3724626350885261, "grad_norm": 0.83984375, "learning_rate": 3.485726534664382e-05, "loss": 0.7658, "step": 5062 }, { "epoch": 0.37253621522189007, "grad_norm": 0.81640625, "learning_rate": 3.485194268881547e-05, "loss": 1.0227, "step": 5063 }, { "epoch": 0.37260979535525407, "grad_norm": 0.94921875, "learning_rate": 3.484661950225198e-05, "loss": 1.016, "step": 5064 }, { "epoch": 0.3726833754886181, "grad_norm": 0.984375, "learning_rate": 3.4841295787239015e-05, "loss": 1.0784, "step": 5065 }, { "epoch": 0.3727569556219821, "grad_norm": 0.78515625, "learning_rate": 3.483597154406228e-05, "loss": 0.9107, "step": 5066 }, { "epoch": 0.3728305357553461, "grad_norm": 0.9453125, "learning_rate": 3.4830646773007544e-05, "loss": 1.178, "step": 5067 }, { "epoch": 0.37290411588871003, "grad_norm": 0.80078125, "learning_rate": 3.482532147436056e-05, "loss": 1.0666, "step": 5068 }, { "epoch": 0.37297769602207403, "grad_norm": 0.890625, "learning_rate": 3.4819995648407125e-05, "loss": 1.0449, "step": 5069 }, { "epoch": 0.37305127615543804, "grad_norm": 0.8359375, "learning_rate": 3.481466929543308e-05, "loss": 0.8709, "step": 5070 }, { "epoch": 0.37312485628880204, "grad_norm": 0.875, "learning_rate": 3.4809342415724266e-05, "loss": 0.8263, "step": 5071 }, { "epoch": 0.373198436422166, "grad_norm": 0.67578125, "learning_rate": 3.480401500956657e-05, "loss": 0.7905, "step": 5072 }, { "epoch": 0.37327201655553, "grad_norm": 0.89453125, "learning_rate": 3.479868707724591e-05, "loss": 1.0048, "step": 5073 }, { "epoch": 0.373345596688894, "grad_norm": 0.9609375, "learning_rate": 3.479335861904822e-05, "loss": 0.8132, "step": 5074 }, { "epoch": 0.373419176822258, "grad_norm": 0.9140625, "learning_rate": 3.478802963525947e-05, "loss": 0.8332, "step": 5075 }, { "epoch": 0.373492756955622, "grad_norm": 0.65234375, "learning_rate": 3.478270012616565e-05, "loss": 0.8352, "step": 5076 }, { "epoch": 0.37356633708898596, "grad_norm": 0.90625, "learning_rate": 3.477737009205279e-05, "loss": 0.9812, "step": 5077 }, { "epoch": 0.37363991722234996, "grad_norm": 1.046875, "learning_rate": 3.4772039533206954e-05, "loss": 0.9344, "step": 5078 }, { "epoch": 0.37371349735571396, "grad_norm": 0.82421875, "learning_rate": 3.476670844991421e-05, "loss": 0.9901, "step": 5079 }, { "epoch": 0.37378707748907797, "grad_norm": 0.890625, "learning_rate": 3.476137684246067e-05, "loss": 1.0108, "step": 5080 }, { "epoch": 0.3738606576224419, "grad_norm": 1.0078125, "learning_rate": 3.475604471113247e-05, "loss": 1.5467, "step": 5081 }, { "epoch": 0.3739342377558059, "grad_norm": 0.87109375, "learning_rate": 3.475071205621578e-05, "loss": 1.1166, "step": 5082 }, { "epoch": 0.3740078178891699, "grad_norm": 0.94140625, "learning_rate": 3.474537887799678e-05, "loss": 0.8274, "step": 5083 }, { "epoch": 0.37408139802253393, "grad_norm": 0.78515625, "learning_rate": 3.474004517676171e-05, "loss": 0.6075, "step": 5084 }, { "epoch": 0.37415497815589793, "grad_norm": 0.7734375, "learning_rate": 3.473471095279682e-05, "loss": 0.9428, "step": 5085 }, { "epoch": 0.3742285582892619, "grad_norm": 0.89453125, "learning_rate": 3.472937620638837e-05, "loss": 0.9936, "step": 5086 }, { "epoch": 0.3743021384226259, "grad_norm": 0.890625, "learning_rate": 3.472404093782268e-05, "loss": 0.9604, "step": 5087 }, { "epoch": 0.3743757185559899, "grad_norm": 0.765625, "learning_rate": 3.4718705147386075e-05, "loss": 0.8169, "step": 5088 }, { "epoch": 0.3744492986893539, "grad_norm": 0.78125, "learning_rate": 3.471336883536491e-05, "loss": 0.7736, "step": 5089 }, { "epoch": 0.37452287882271784, "grad_norm": 1.09375, "learning_rate": 3.47080320020456e-05, "loss": 0.9711, "step": 5090 }, { "epoch": 0.37459645895608185, "grad_norm": 0.83984375, "learning_rate": 3.470269464771454e-05, "loss": 0.9966, "step": 5091 }, { "epoch": 0.37467003908944585, "grad_norm": 0.7421875, "learning_rate": 3.469735677265819e-05, "loss": 0.7787, "step": 5092 }, { "epoch": 0.37474361922280985, "grad_norm": 0.9140625, "learning_rate": 3.4692018377163024e-05, "loss": 1.1409, "step": 5093 }, { "epoch": 0.37481719935617386, "grad_norm": 1.0703125, "learning_rate": 3.4686679461515534e-05, "loss": 1.6977, "step": 5094 }, { "epoch": 0.3748907794895378, "grad_norm": 4.625, "learning_rate": 3.4681340026002243e-05, "loss": 0.7545, "step": 5095 }, { "epoch": 0.3749643596229018, "grad_norm": 0.87890625, "learning_rate": 3.467600007090972e-05, "loss": 0.9121, "step": 5096 }, { "epoch": 0.3750379397562658, "grad_norm": 0.79296875, "learning_rate": 3.467065959652456e-05, "loss": 1.0055, "step": 5097 }, { "epoch": 0.3751115198896298, "grad_norm": 0.83984375, "learning_rate": 3.466531860313335e-05, "loss": 0.9226, "step": 5098 }, { "epoch": 0.37518510002299377, "grad_norm": 1.09375, "learning_rate": 3.465997709102276e-05, "loss": 1.2116, "step": 5099 }, { "epoch": 0.37525868015635777, "grad_norm": 0.8984375, "learning_rate": 3.4654635060479434e-05, "loss": 0.8605, "step": 5100 }, { "epoch": 0.3753322602897218, "grad_norm": 0.7109375, "learning_rate": 3.4649292511790085e-05, "loss": 0.8016, "step": 5101 }, { "epoch": 0.3754058404230858, "grad_norm": 0.92578125, "learning_rate": 3.464394944524143e-05, "loss": 0.6412, "step": 5102 }, { "epoch": 0.3754794205564498, "grad_norm": 0.91796875, "learning_rate": 3.463860586112022e-05, "loss": 1.0554, "step": 5103 }, { "epoch": 0.37555300068981373, "grad_norm": 0.83203125, "learning_rate": 3.463326175971324e-05, "loss": 0.8955, "step": 5104 }, { "epoch": 0.37562658082317774, "grad_norm": 0.85546875, "learning_rate": 3.4627917141307295e-05, "loss": 0.8602, "step": 5105 }, { "epoch": 0.37570016095654174, "grad_norm": 0.89453125, "learning_rate": 3.462257200618923e-05, "loss": 0.9276, "step": 5106 }, { "epoch": 0.37577374108990574, "grad_norm": 0.6640625, "learning_rate": 3.4617226354645894e-05, "loss": 0.6035, "step": 5107 }, { "epoch": 0.3758473212232697, "grad_norm": 0.94140625, "learning_rate": 3.4611880186964185e-05, "loss": 1.0858, "step": 5108 }, { "epoch": 0.3759209013566337, "grad_norm": 0.8203125, "learning_rate": 3.4606533503431015e-05, "loss": 0.9035, "step": 5109 }, { "epoch": 0.3759944814899977, "grad_norm": 0.80078125, "learning_rate": 3.460118630433334e-05, "loss": 0.7965, "step": 5110 }, { "epoch": 0.3760680616233617, "grad_norm": 0.80078125, "learning_rate": 3.459583858995813e-05, "loss": 0.9757, "step": 5111 }, { "epoch": 0.3761416417567257, "grad_norm": 0.82421875, "learning_rate": 3.459049036059239e-05, "loss": 0.8417, "step": 5112 }, { "epoch": 0.37621522189008966, "grad_norm": 0.8515625, "learning_rate": 3.458514161652314e-05, "loss": 1.2755, "step": 5113 }, { "epoch": 0.37628880202345366, "grad_norm": 1.015625, "learning_rate": 3.457979235803744e-05, "loss": 1.1431, "step": 5114 }, { "epoch": 0.37636238215681767, "grad_norm": 0.9921875, "learning_rate": 3.45744425854224e-05, "loss": 1.1099, "step": 5115 }, { "epoch": 0.37643596229018167, "grad_norm": 0.7578125, "learning_rate": 3.456909229896509e-05, "loss": 0.772, "step": 5116 }, { "epoch": 0.3765095424235456, "grad_norm": 0.91015625, "learning_rate": 3.4563741498952674e-05, "loss": 0.8679, "step": 5117 }, { "epoch": 0.3765831225569096, "grad_norm": 0.84765625, "learning_rate": 3.455839018567231e-05, "loss": 0.796, "step": 5118 }, { "epoch": 0.3766567026902736, "grad_norm": 0.95703125, "learning_rate": 3.455303835941121e-05, "loss": 0.7404, "step": 5119 }, { "epoch": 0.37673028282363763, "grad_norm": 0.609375, "learning_rate": 3.4547686020456574e-05, "loss": 0.604, "step": 5120 }, { "epoch": 0.37680386295700163, "grad_norm": 0.625, "learning_rate": 3.454233316909567e-05, "loss": 0.6039, "step": 5121 }, { "epoch": 0.3768774430903656, "grad_norm": 0.859375, "learning_rate": 3.453697980561576e-05, "loss": 0.8094, "step": 5122 }, { "epoch": 0.3769510232237296, "grad_norm": 0.875, "learning_rate": 3.4531625930304155e-05, "loss": 0.9612, "step": 5123 }, { "epoch": 0.3770246033570936, "grad_norm": 0.7265625, "learning_rate": 3.4526271543448196e-05, "loss": 0.601, "step": 5124 }, { "epoch": 0.3770981834904576, "grad_norm": 0.8671875, "learning_rate": 3.452091664533523e-05, "loss": 0.9024, "step": 5125 }, { "epoch": 0.37717176362382154, "grad_norm": 0.86328125, "learning_rate": 3.451556123625266e-05, "loss": 0.7551, "step": 5126 }, { "epoch": 0.37724534375718555, "grad_norm": 0.953125, "learning_rate": 3.4510205316487885e-05, "loss": 0.9634, "step": 5127 }, { "epoch": 0.37731892389054955, "grad_norm": 0.984375, "learning_rate": 3.450484888632836e-05, "loss": 1.0097, "step": 5128 }, { "epoch": 0.37739250402391356, "grad_norm": 0.93359375, "learning_rate": 3.4499491946061534e-05, "loss": 0.6972, "step": 5129 }, { "epoch": 0.37746608415727756, "grad_norm": 0.875, "learning_rate": 3.449413449597492e-05, "loss": 0.7327, "step": 5130 }, { "epoch": 0.3775396642906415, "grad_norm": 0.83984375, "learning_rate": 3.448877653635604e-05, "loss": 1.0995, "step": 5131 }, { "epoch": 0.3776132444240055, "grad_norm": 1.03125, "learning_rate": 3.448341806749245e-05, "loss": 1.0709, "step": 5132 }, { "epoch": 0.3776868245573695, "grad_norm": 0.80859375, "learning_rate": 3.4478059089671725e-05, "loss": 0.7534, "step": 5133 }, { "epoch": 0.3777604046907335, "grad_norm": 2.859375, "learning_rate": 3.447269960318147e-05, "loss": 1.3757, "step": 5134 }, { "epoch": 0.37783398482409747, "grad_norm": 0.70703125, "learning_rate": 3.4467339608309316e-05, "loss": 0.6253, "step": 5135 }, { "epoch": 0.3779075649574615, "grad_norm": 0.87109375, "learning_rate": 3.4461979105342925e-05, "loss": 0.7474, "step": 5136 }, { "epoch": 0.3779811450908255, "grad_norm": 1.140625, "learning_rate": 3.445661809456999e-05, "loss": 1.2046, "step": 5137 }, { "epoch": 0.3780547252241895, "grad_norm": 0.97265625, "learning_rate": 3.4451256576278215e-05, "loss": 0.9753, "step": 5138 }, { "epoch": 0.3781283053575535, "grad_norm": 0.94140625, "learning_rate": 3.4445894550755356e-05, "loss": 0.7771, "step": 5139 }, { "epoch": 0.37820188549091743, "grad_norm": 1.078125, "learning_rate": 3.444053201828918e-05, "loss": 1.2612, "step": 5140 }, { "epoch": 0.37827546562428144, "grad_norm": 1.109375, "learning_rate": 3.4435168979167476e-05, "loss": 1.3281, "step": 5141 }, { "epoch": 0.37834904575764544, "grad_norm": 0.98828125, "learning_rate": 3.442980543367808e-05, "loss": 1.2024, "step": 5142 }, { "epoch": 0.37842262589100945, "grad_norm": 1.0625, "learning_rate": 3.442444138210883e-05, "loss": 1.2625, "step": 5143 }, { "epoch": 0.3784962060243734, "grad_norm": 0.91796875, "learning_rate": 3.441907682474762e-05, "loss": 0.888, "step": 5144 }, { "epoch": 0.3785697861577374, "grad_norm": 0.75, "learning_rate": 3.441371176188233e-05, "loss": 0.782, "step": 5145 }, { "epoch": 0.3786433662911014, "grad_norm": 1.0, "learning_rate": 3.440834619380092e-05, "loss": 0.8574, "step": 5146 }, { "epoch": 0.3787169464244654, "grad_norm": 1.0234375, "learning_rate": 3.4402980120791345e-05, "loss": 1.3946, "step": 5147 }, { "epoch": 0.3787905265578294, "grad_norm": 0.84765625, "learning_rate": 3.439761354314158e-05, "loss": 1.2674, "step": 5148 }, { "epoch": 0.37886410669119336, "grad_norm": 0.79296875, "learning_rate": 3.4392246461139656e-05, "loss": 0.7175, "step": 5149 }, { "epoch": 0.37893768682455736, "grad_norm": 0.82421875, "learning_rate": 3.4386878875073594e-05, "loss": 0.9561, "step": 5150 }, { "epoch": 0.37901126695792137, "grad_norm": 0.85546875, "learning_rate": 3.438151078523147e-05, "loss": 0.6987, "step": 5151 }, { "epoch": 0.37908484709128537, "grad_norm": 0.8828125, "learning_rate": 3.4376142191901385e-05, "loss": 0.8381, "step": 5152 }, { "epoch": 0.3791584272246493, "grad_norm": 0.9375, "learning_rate": 3.437077309537146e-05, "loss": 1.326, "step": 5153 }, { "epoch": 0.3792320073580133, "grad_norm": 0.7734375, "learning_rate": 3.436540349592984e-05, "loss": 0.779, "step": 5154 }, { "epoch": 0.3793055874913773, "grad_norm": 0.94921875, "learning_rate": 3.4360033393864696e-05, "loss": 0.8868, "step": 5155 }, { "epoch": 0.37937916762474133, "grad_norm": 0.79296875, "learning_rate": 3.4354662789464246e-05, "loss": 0.8471, "step": 5156 }, { "epoch": 0.37945274775810534, "grad_norm": 0.78515625, "learning_rate": 3.4349291683016715e-05, "loss": 0.8737, "step": 5157 }, { "epoch": 0.3795263278914693, "grad_norm": 0.7578125, "learning_rate": 3.434392007481035e-05, "loss": 0.7175, "step": 5158 }, { "epoch": 0.3795999080248333, "grad_norm": 1.0, "learning_rate": 3.433854796513344e-05, "loss": 0.786, "step": 5159 }, { "epoch": 0.3796734881581973, "grad_norm": 1.0, "learning_rate": 3.433317535427432e-05, "loss": 0.9675, "step": 5160 }, { "epoch": 0.3797470682915613, "grad_norm": 0.78125, "learning_rate": 3.432780224252129e-05, "loss": 0.8816, "step": 5161 }, { "epoch": 0.37982064842492524, "grad_norm": 0.87890625, "learning_rate": 3.432242863016273e-05, "loss": 1.2071, "step": 5162 }, { "epoch": 0.37989422855828925, "grad_norm": 0.86328125, "learning_rate": 3.431705451748703e-05, "loss": 1.0243, "step": 5163 }, { "epoch": 0.37996780869165325, "grad_norm": 0.84375, "learning_rate": 3.4311679904782625e-05, "loss": 1.092, "step": 5164 }, { "epoch": 0.38004138882501726, "grad_norm": 0.7734375, "learning_rate": 3.4306304792337934e-05, "loss": 0.7073, "step": 5165 }, { "epoch": 0.38011496895838126, "grad_norm": 0.91796875, "learning_rate": 3.430092918044145e-05, "loss": 0.805, "step": 5166 }, { "epoch": 0.3801885490917452, "grad_norm": 0.671875, "learning_rate": 3.4295553069381664e-05, "loss": 0.7357, "step": 5167 }, { "epoch": 0.3802621292251092, "grad_norm": 0.796875, "learning_rate": 3.4290176459447096e-05, "loss": 0.7566, "step": 5168 }, { "epoch": 0.3803357093584732, "grad_norm": 0.71875, "learning_rate": 3.428479935092631e-05, "loss": 0.8424, "step": 5169 }, { "epoch": 0.3804092894918372, "grad_norm": 0.78515625, "learning_rate": 3.427942174410787e-05, "loss": 0.8054, "step": 5170 }, { "epoch": 0.38048286962520117, "grad_norm": 0.921875, "learning_rate": 3.42740436392804e-05, "loss": 1.303, "step": 5171 }, { "epoch": 0.3805564497585652, "grad_norm": 0.83203125, "learning_rate": 3.426866503673252e-05, "loss": 0.9888, "step": 5172 }, { "epoch": 0.3806300298919292, "grad_norm": 0.84375, "learning_rate": 3.42632859367529e-05, "loss": 1.2661, "step": 5173 }, { "epoch": 0.3807036100252932, "grad_norm": 0.91796875, "learning_rate": 3.425790633963021e-05, "loss": 0.8517, "step": 5174 }, { "epoch": 0.3807771901586572, "grad_norm": 0.8359375, "learning_rate": 3.4252526245653184e-05, "loss": 0.8681, "step": 5175 }, { "epoch": 0.38085077029202113, "grad_norm": 0.9375, "learning_rate": 3.4247145655110544e-05, "loss": 1.0429, "step": 5176 }, { "epoch": 0.38092435042538514, "grad_norm": 0.98828125, "learning_rate": 3.4241764568291057e-05, "loss": 1.257, "step": 5177 }, { "epoch": 0.38099793055874914, "grad_norm": 0.7265625, "learning_rate": 3.423638298548352e-05, "loss": 0.7095, "step": 5178 }, { "epoch": 0.38107151069211315, "grad_norm": 1.0625, "learning_rate": 3.423100090697676e-05, "loss": 1.2795, "step": 5179 }, { "epoch": 0.3811450908254771, "grad_norm": 0.80078125, "learning_rate": 3.422561833305962e-05, "loss": 1.0184, "step": 5180 }, { "epoch": 0.3812186709588411, "grad_norm": 0.9296875, "learning_rate": 3.422023526402096e-05, "loss": 1.1854, "step": 5181 }, { "epoch": 0.3812922510922051, "grad_norm": 0.91015625, "learning_rate": 3.421485170014969e-05, "loss": 1.0371, "step": 5182 }, { "epoch": 0.3813658312255691, "grad_norm": 0.8515625, "learning_rate": 3.420946764173474e-05, "loss": 0.8685, "step": 5183 }, { "epoch": 0.3814394113589331, "grad_norm": 0.98046875, "learning_rate": 3.4204083089065054e-05, "loss": 1.1983, "step": 5184 }, { "epoch": 0.38151299149229706, "grad_norm": 0.99609375, "learning_rate": 3.4198698042429604e-05, "loss": 1.0159, "step": 5185 }, { "epoch": 0.38158657162566106, "grad_norm": 1.03125, "learning_rate": 3.419331250211741e-05, "loss": 0.9349, "step": 5186 }, { "epoch": 0.38166015175902507, "grad_norm": 0.92578125, "learning_rate": 3.418792646841749e-05, "loss": 0.7611, "step": 5187 }, { "epoch": 0.38173373189238907, "grad_norm": 0.92578125, "learning_rate": 3.418253994161892e-05, "loss": 1.2321, "step": 5188 }, { "epoch": 0.381807312025753, "grad_norm": 0.86328125, "learning_rate": 3.4177152922010776e-05, "loss": 0.8143, "step": 5189 }, { "epoch": 0.381880892159117, "grad_norm": 0.8359375, "learning_rate": 3.4171765409882165e-05, "loss": 0.8956, "step": 5190 }, { "epoch": 0.38195447229248103, "grad_norm": 0.7109375, "learning_rate": 3.416637740552222e-05, "loss": 0.9225, "step": 5191 }, { "epoch": 0.38202805242584503, "grad_norm": 0.95703125, "learning_rate": 3.416098890922012e-05, "loss": 1.4415, "step": 5192 }, { "epoch": 0.38210163255920904, "grad_norm": 0.8203125, "learning_rate": 3.4155599921265044e-05, "loss": 0.7804, "step": 5193 }, { "epoch": 0.382175212692573, "grad_norm": 0.73828125, "learning_rate": 3.415021044194622e-05, "loss": 0.8919, "step": 5194 }, { "epoch": 0.382248792825937, "grad_norm": 0.73046875, "learning_rate": 3.414482047155288e-05, "loss": 0.8574, "step": 5195 }, { "epoch": 0.382322372959301, "grad_norm": 0.953125, "learning_rate": 3.413943001037429e-05, "loss": 0.9972, "step": 5196 }, { "epoch": 0.382395953092665, "grad_norm": 1.0078125, "learning_rate": 3.4134039058699765e-05, "loss": 1.4238, "step": 5197 }, { "epoch": 0.38246953322602895, "grad_norm": 0.92578125, "learning_rate": 3.412864761681861e-05, "loss": 1.0294, "step": 5198 }, { "epoch": 0.38254311335939295, "grad_norm": 0.82421875, "learning_rate": 3.412325568502018e-05, "loss": 0.786, "step": 5199 }, { "epoch": 0.38261669349275695, "grad_norm": 1.1328125, "learning_rate": 3.411786326359384e-05, "loss": 1.2168, "step": 5200 }, { "epoch": 0.38269027362612096, "grad_norm": 0.75, "learning_rate": 3.411247035282902e-05, "loss": 0.588, "step": 5201 }, { "epoch": 0.38276385375948496, "grad_norm": 0.64453125, "learning_rate": 3.410707695301511e-05, "loss": 0.7805, "step": 5202 }, { "epoch": 0.3828374338928489, "grad_norm": 0.7578125, "learning_rate": 3.410168306444158e-05, "loss": 0.7293, "step": 5203 }, { "epoch": 0.3829110140262129, "grad_norm": 0.75390625, "learning_rate": 3.409628868739793e-05, "loss": 0.9363, "step": 5204 }, { "epoch": 0.3829845941595769, "grad_norm": 0.85546875, "learning_rate": 3.409089382217362e-05, "loss": 0.8522, "step": 5205 }, { "epoch": 0.3830581742929409, "grad_norm": 1.171875, "learning_rate": 3.408549846905822e-05, "loss": 0.8536, "step": 5206 }, { "epoch": 0.38313175442630487, "grad_norm": 1.0390625, "learning_rate": 3.408010262834128e-05, "loss": 0.9996, "step": 5207 }, { "epoch": 0.3832053345596689, "grad_norm": 0.71875, "learning_rate": 3.4074706300312385e-05, "loss": 0.6353, "step": 5208 }, { "epoch": 0.3832789146930329, "grad_norm": 0.859375, "learning_rate": 3.406930948526114e-05, "loss": 0.91, "step": 5209 }, { "epoch": 0.3833524948263969, "grad_norm": 0.93359375, "learning_rate": 3.4063912183477186e-05, "loss": 1.0982, "step": 5210 }, { "epoch": 0.3834260749597609, "grad_norm": 0.9375, "learning_rate": 3.405851439525018e-05, "loss": 1.0828, "step": 5211 }, { "epoch": 0.38349965509312484, "grad_norm": 0.95703125, "learning_rate": 3.405311612086981e-05, "loss": 1.0176, "step": 5212 }, { "epoch": 0.38357323522648884, "grad_norm": 0.859375, "learning_rate": 3.4047717360625804e-05, "loss": 0.9045, "step": 5213 }, { "epoch": 0.38364681535985284, "grad_norm": 0.71875, "learning_rate": 3.404231811480789e-05, "loss": 0.507, "step": 5214 }, { "epoch": 0.38372039549321685, "grad_norm": 0.92578125, "learning_rate": 3.403691838370585e-05, "loss": 0.8231, "step": 5215 }, { "epoch": 0.3837939756265808, "grad_norm": 1.0078125, "learning_rate": 3.403151816760947e-05, "loss": 1.22, "step": 5216 }, { "epoch": 0.3838675557599448, "grad_norm": 0.71484375, "learning_rate": 3.402611746680857e-05, "loss": 0.5894, "step": 5217 }, { "epoch": 0.3839411358933088, "grad_norm": 0.78125, "learning_rate": 3.4020716281592974e-05, "loss": 0.8356, "step": 5218 }, { "epoch": 0.3840147160266728, "grad_norm": 0.84375, "learning_rate": 3.401531461225258e-05, "loss": 0.8682, "step": 5219 }, { "epoch": 0.3840882961600368, "grad_norm": 0.8984375, "learning_rate": 3.400991245907729e-05, "loss": 1.2747, "step": 5220 }, { "epoch": 0.38416187629340076, "grad_norm": 0.88671875, "learning_rate": 3.400450982235701e-05, "loss": 1.047, "step": 5221 }, { "epoch": 0.38423545642676477, "grad_norm": 1.0390625, "learning_rate": 3.39991067023817e-05, "loss": 1.0059, "step": 5222 }, { "epoch": 0.38430903656012877, "grad_norm": 0.859375, "learning_rate": 3.3993703099441323e-05, "loss": 0.7447, "step": 5223 }, { "epoch": 0.3843826166934928, "grad_norm": 0.9609375, "learning_rate": 3.3988299013825894e-05, "loss": 1.1768, "step": 5224 }, { "epoch": 0.3844561968268567, "grad_norm": 0.828125, "learning_rate": 3.398289444582542e-05, "loss": 0.8489, "step": 5225 }, { "epoch": 0.3845297769602207, "grad_norm": 0.796875, "learning_rate": 3.397748939572998e-05, "loss": 0.9217, "step": 5226 }, { "epoch": 0.38460335709358473, "grad_norm": 0.921875, "learning_rate": 3.397208386382963e-05, "loss": 1.3784, "step": 5227 }, { "epoch": 0.38467693722694873, "grad_norm": 0.99609375, "learning_rate": 3.396667785041449e-05, "loss": 0.9415, "step": 5228 }, { "epoch": 0.38475051736031274, "grad_norm": 0.6796875, "learning_rate": 3.396127135577469e-05, "loss": 0.7007, "step": 5229 }, { "epoch": 0.3848240974936767, "grad_norm": 0.8984375, "learning_rate": 3.3955864380200374e-05, "loss": 1.1998, "step": 5230 }, { "epoch": 0.3848976776270407, "grad_norm": 0.80078125, "learning_rate": 3.3950456923981736e-05, "loss": 1.1884, "step": 5231 }, { "epoch": 0.3849712577604047, "grad_norm": 0.87109375, "learning_rate": 3.394504898740898e-05, "loss": 0.924, "step": 5232 }, { "epoch": 0.3850448378937687, "grad_norm": 0.91796875, "learning_rate": 3.3939640570772344e-05, "loss": 1.4184, "step": 5233 }, { "epoch": 0.38511841802713265, "grad_norm": 0.84765625, "learning_rate": 3.393423167436208e-05, "loss": 0.9991, "step": 5234 }, { "epoch": 0.38519199816049665, "grad_norm": 0.64453125, "learning_rate": 3.392882229846847e-05, "loss": 0.7082, "step": 5235 }, { "epoch": 0.38526557829386066, "grad_norm": 0.83984375, "learning_rate": 3.392341244338184e-05, "loss": 0.6199, "step": 5236 }, { "epoch": 0.38533915842722466, "grad_norm": 0.97265625, "learning_rate": 3.391800210939251e-05, "loss": 1.1754, "step": 5237 }, { "epoch": 0.38541273856058866, "grad_norm": 0.79296875, "learning_rate": 3.391259129679086e-05, "loss": 1.2509, "step": 5238 }, { "epoch": 0.3854863186939526, "grad_norm": 0.78515625, "learning_rate": 3.390718000586725e-05, "loss": 0.9425, "step": 5239 }, { "epoch": 0.3855598988273166, "grad_norm": 0.95703125, "learning_rate": 3.390176823691214e-05, "loss": 0.893, "step": 5240 }, { "epoch": 0.3856334789606806, "grad_norm": 0.75390625, "learning_rate": 3.3896355990215925e-05, "loss": 0.9134, "step": 5241 }, { "epoch": 0.3857070590940446, "grad_norm": 0.8203125, "learning_rate": 3.389094326606909e-05, "loss": 0.7703, "step": 5242 }, { "epoch": 0.3857806392274086, "grad_norm": 1.078125, "learning_rate": 3.388553006476212e-05, "loss": 1.2572, "step": 5243 }, { "epoch": 0.3858542193607726, "grad_norm": 0.703125, "learning_rate": 3.388011638658554e-05, "loss": 0.9101, "step": 5244 }, { "epoch": 0.3859277994941366, "grad_norm": 0.8515625, "learning_rate": 3.3874702231829883e-05, "loss": 0.7354, "step": 5245 }, { "epoch": 0.3860013796275006, "grad_norm": 0.87109375, "learning_rate": 3.386928760078571e-05, "loss": 1.211, "step": 5246 }, { "epoch": 0.3860749597608646, "grad_norm": 0.8515625, "learning_rate": 3.386387249374364e-05, "loss": 1.1345, "step": 5247 }, { "epoch": 0.38614853989422854, "grad_norm": 0.69140625, "learning_rate": 3.385845691099426e-05, "loss": 0.6798, "step": 5248 }, { "epoch": 0.38622212002759254, "grad_norm": 0.94140625, "learning_rate": 3.385304085282824e-05, "loss": 0.9198, "step": 5249 }, { "epoch": 0.38629570016095655, "grad_norm": 0.83203125, "learning_rate": 3.384762431953623e-05, "loss": 0.7693, "step": 5250 }, { "epoch": 0.38636928029432055, "grad_norm": 0.7734375, "learning_rate": 3.384220731140894e-05, "loss": 0.7477, "step": 5251 }, { "epoch": 0.3864428604276845, "grad_norm": 0.73046875, "learning_rate": 3.3836789828737074e-05, "loss": 0.8239, "step": 5252 }, { "epoch": 0.3865164405610485, "grad_norm": 0.77734375, "learning_rate": 3.383137187181139e-05, "loss": 0.8524, "step": 5253 }, { "epoch": 0.3865900206944125, "grad_norm": 0.86328125, "learning_rate": 3.382595344092267e-05, "loss": 1.1263, "step": 5254 }, { "epoch": 0.3866636008277765, "grad_norm": 0.94140625, "learning_rate": 3.382053453636169e-05, "loss": 1.233, "step": 5255 }, { "epoch": 0.3867371809611405, "grad_norm": 0.953125, "learning_rate": 3.3815115158419287e-05, "loss": 1.3229, "step": 5256 }, { "epoch": 0.38681076109450446, "grad_norm": 0.83203125, "learning_rate": 3.3809695307386294e-05, "loss": 0.9049, "step": 5257 }, { "epoch": 0.38688434122786847, "grad_norm": 0.7578125, "learning_rate": 3.38042749835536e-05, "loss": 0.7716, "step": 5258 }, { "epoch": 0.38695792136123247, "grad_norm": 0.96484375, "learning_rate": 3.379885418721209e-05, "loss": 1.0446, "step": 5259 }, { "epoch": 0.3870315014945965, "grad_norm": 1.0625, "learning_rate": 3.3793432918652695e-05, "loss": 1.0852, "step": 5260 }, { "epoch": 0.3871050816279604, "grad_norm": 0.88671875, "learning_rate": 3.378801117816637e-05, "loss": 0.8312, "step": 5261 }, { "epoch": 0.3871786617613244, "grad_norm": 0.8203125, "learning_rate": 3.378258896604408e-05, "loss": 0.795, "step": 5262 }, { "epoch": 0.38725224189468843, "grad_norm": 1.1796875, "learning_rate": 3.377716628257683e-05, "loss": 1.0622, "step": 5263 }, { "epoch": 0.38732582202805244, "grad_norm": 0.91015625, "learning_rate": 3.3771743128055645e-05, "loss": 0.953, "step": 5264 }, { "epoch": 0.38739940216141644, "grad_norm": 1.0078125, "learning_rate": 3.3766319502771565e-05, "loss": 0.901, "step": 5265 }, { "epoch": 0.3874729822947804, "grad_norm": 0.8046875, "learning_rate": 3.376089540701568e-05, "loss": 0.9593, "step": 5266 }, { "epoch": 0.3875465624281444, "grad_norm": 0.625, "learning_rate": 3.375547084107908e-05, "loss": 0.7164, "step": 5267 }, { "epoch": 0.3876201425615084, "grad_norm": 1.0703125, "learning_rate": 3.375004580525291e-05, "loss": 1.3671, "step": 5268 }, { "epoch": 0.3876937226948724, "grad_norm": 0.9609375, "learning_rate": 3.37446202998283e-05, "loss": 1.0811, "step": 5269 }, { "epoch": 0.38776730282823635, "grad_norm": 1.0078125, "learning_rate": 3.3739194325096436e-05, "loss": 1.0696, "step": 5270 }, { "epoch": 0.38784088296160035, "grad_norm": 0.9609375, "learning_rate": 3.373376788134852e-05, "loss": 1.0669, "step": 5271 }, { "epoch": 0.38791446309496436, "grad_norm": 0.74609375, "learning_rate": 3.3728340968875773e-05, "loss": 0.7129, "step": 5272 }, { "epoch": 0.38798804322832836, "grad_norm": 0.78125, "learning_rate": 3.372291358796945e-05, "loss": 0.8409, "step": 5273 }, { "epoch": 0.38806162336169236, "grad_norm": 0.96875, "learning_rate": 3.371748573892084e-05, "loss": 1.2108, "step": 5274 }, { "epoch": 0.3881352034950563, "grad_norm": 0.7578125, "learning_rate": 3.3712057422021224e-05, "loss": 0.7328, "step": 5275 }, { "epoch": 0.3882087836284203, "grad_norm": 0.92578125, "learning_rate": 3.370662863756194e-05, "loss": 0.97, "step": 5276 }, { "epoch": 0.3882823637617843, "grad_norm": 0.703125, "learning_rate": 3.370119938583436e-05, "loss": 0.8496, "step": 5277 }, { "epoch": 0.3883559438951483, "grad_norm": 1.078125, "learning_rate": 3.369576966712982e-05, "loss": 0.8405, "step": 5278 }, { "epoch": 0.3884295240285123, "grad_norm": 0.87109375, "learning_rate": 3.369033948173976e-05, "loss": 0.9884, "step": 5279 }, { "epoch": 0.3885031041618763, "grad_norm": 0.875, "learning_rate": 3.3684908829955586e-05, "loss": 0.7494, "step": 5280 }, { "epoch": 0.3885766842952403, "grad_norm": 0.92578125, "learning_rate": 3.367947771206877e-05, "loss": 0.7816, "step": 5281 }, { "epoch": 0.3886502644286043, "grad_norm": 0.875, "learning_rate": 3.3674046128370766e-05, "loss": 0.6771, "step": 5282 }, { "epoch": 0.3887238445619683, "grad_norm": 0.72265625, "learning_rate": 3.366861407915309e-05, "loss": 0.9264, "step": 5283 }, { "epoch": 0.38879742469533224, "grad_norm": 0.83984375, "learning_rate": 3.3663181564707286e-05, "loss": 0.8988, "step": 5284 }, { "epoch": 0.38887100482869624, "grad_norm": 0.80078125, "learning_rate": 3.365774858532487e-05, "loss": 1.3125, "step": 5285 }, { "epoch": 0.38894458496206025, "grad_norm": 0.79296875, "learning_rate": 3.365231514129745e-05, "loss": 1.2386, "step": 5286 }, { "epoch": 0.38901816509542425, "grad_norm": 0.953125, "learning_rate": 3.364688123291662e-05, "loss": 0.934, "step": 5287 }, { "epoch": 0.3890917452287882, "grad_norm": 0.93359375, "learning_rate": 3.3641446860474e-05, "loss": 0.8637, "step": 5288 }, { "epoch": 0.3891653253621522, "grad_norm": 0.79296875, "learning_rate": 3.363601202426124e-05, "loss": 0.9003, "step": 5289 }, { "epoch": 0.3892389054955162, "grad_norm": 0.8359375, "learning_rate": 3.3630576724570046e-05, "loss": 0.9079, "step": 5290 }, { "epoch": 0.3893124856288802, "grad_norm": 0.59765625, "learning_rate": 3.3625140961692084e-05, "loss": 0.6688, "step": 5291 }, { "epoch": 0.3893860657622442, "grad_norm": 0.84765625, "learning_rate": 3.361970473591911e-05, "loss": 0.7819, "step": 5292 }, { "epoch": 0.38945964589560816, "grad_norm": 0.703125, "learning_rate": 3.361426804754285e-05, "loss": 0.6088, "step": 5293 }, { "epoch": 0.38953322602897217, "grad_norm": 0.95703125, "learning_rate": 3.36088308968551e-05, "loss": 1.0231, "step": 5294 }, { "epoch": 0.38960680616233617, "grad_norm": 1.0703125, "learning_rate": 3.3603393284147656e-05, "loss": 1.3235, "step": 5295 }, { "epoch": 0.3896803862957002, "grad_norm": 0.96875, "learning_rate": 3.3597955209712337e-05, "loss": 0.8804, "step": 5296 }, { "epoch": 0.3897539664290642, "grad_norm": 0.84765625, "learning_rate": 3.359251667384101e-05, "loss": 1.1223, "step": 5297 }, { "epoch": 0.38982754656242813, "grad_norm": 0.7109375, "learning_rate": 3.358707767682554e-05, "loss": 0.8199, "step": 5298 }, { "epoch": 0.38990112669579213, "grad_norm": 0.85546875, "learning_rate": 3.358163821895783e-05, "loss": 0.661, "step": 5299 }, { "epoch": 0.38997470682915614, "grad_norm": 1.296875, "learning_rate": 3.3576198300529804e-05, "loss": 1.1993, "step": 5300 }, { "epoch": 0.39004828696252014, "grad_norm": 0.8203125, "learning_rate": 3.357075792183341e-05, "loss": 1.0676, "step": 5301 }, { "epoch": 0.3901218670958841, "grad_norm": 0.8671875, "learning_rate": 3.356531708316063e-05, "loss": 1.09, "step": 5302 }, { "epoch": 0.3901954472292481, "grad_norm": 0.7109375, "learning_rate": 3.3559875784803465e-05, "loss": 0.92, "step": 5303 }, { "epoch": 0.3902690273626121, "grad_norm": 0.9140625, "learning_rate": 3.3554434027053926e-05, "loss": 1.0002, "step": 5304 }, { "epoch": 0.3903426074959761, "grad_norm": 0.92578125, "learning_rate": 3.354899181020407e-05, "loss": 1.2513, "step": 5305 }, { "epoch": 0.3904161876293401, "grad_norm": 0.6796875, "learning_rate": 3.3543549134545975e-05, "loss": 0.6741, "step": 5306 }, { "epoch": 0.39048976776270405, "grad_norm": 0.7734375, "learning_rate": 3.353810600037173e-05, "loss": 1.0504, "step": 5307 }, { "epoch": 0.39056334789606806, "grad_norm": 0.62890625, "learning_rate": 3.353266240797346e-05, "loss": 0.5393, "step": 5308 }, { "epoch": 0.39063692802943206, "grad_norm": 0.75, "learning_rate": 3.352721835764333e-05, "loss": 1.1147, "step": 5309 }, { "epoch": 0.39071050816279607, "grad_norm": 1.0625, "learning_rate": 3.3521773849673475e-05, "loss": 0.9932, "step": 5310 }, { "epoch": 0.39078408829616, "grad_norm": 0.8203125, "learning_rate": 3.351632888435613e-05, "loss": 0.7278, "step": 5311 }, { "epoch": 0.390857668429524, "grad_norm": 0.68359375, "learning_rate": 3.351088346198349e-05, "loss": 0.7127, "step": 5312 }, { "epoch": 0.390931248562888, "grad_norm": 0.7890625, "learning_rate": 3.350543758284781e-05, "loss": 0.9187, "step": 5313 }, { "epoch": 0.391004828696252, "grad_norm": 0.7734375, "learning_rate": 3.349999124724136e-05, "loss": 0.9182, "step": 5314 }, { "epoch": 0.39107840882961603, "grad_norm": 0.65234375, "learning_rate": 3.349454445545644e-05, "loss": 0.7731, "step": 5315 }, { "epoch": 0.39115198896298, "grad_norm": 1.640625, "learning_rate": 3.348909720778535e-05, "loss": 0.8143, "step": 5316 }, { "epoch": 0.391225569096344, "grad_norm": 0.890625, "learning_rate": 3.348364950452046e-05, "loss": 1.1468, "step": 5317 }, { "epoch": 0.391299149229708, "grad_norm": 0.94140625, "learning_rate": 3.347820134595412e-05, "loss": 0.9937, "step": 5318 }, { "epoch": 0.391372729363072, "grad_norm": 0.86328125, "learning_rate": 3.347275273237872e-05, "loss": 1.2644, "step": 5319 }, { "epoch": 0.39144630949643594, "grad_norm": 0.90234375, "learning_rate": 3.346730366408669e-05, "loss": 0.9775, "step": 5320 }, { "epoch": 0.39151988962979994, "grad_norm": 0.91796875, "learning_rate": 3.346185414137046e-05, "loss": 0.8551, "step": 5321 }, { "epoch": 0.39159346976316395, "grad_norm": 0.9609375, "learning_rate": 3.345640416452251e-05, "loss": 1.1624, "step": 5322 }, { "epoch": 0.39166704989652795, "grad_norm": 0.828125, "learning_rate": 3.345095373383531e-05, "loss": 0.8431, "step": 5323 }, { "epoch": 0.39174063002989196, "grad_norm": 0.69140625, "learning_rate": 3.344550284960139e-05, "loss": 0.5652, "step": 5324 }, { "epoch": 0.3918142101632559, "grad_norm": 0.859375, "learning_rate": 3.3440051512113285e-05, "loss": 0.7254, "step": 5325 }, { "epoch": 0.3918877902966199, "grad_norm": 0.87109375, "learning_rate": 3.343459972166355e-05, "loss": 0.899, "step": 5326 }, { "epoch": 0.3919613704299839, "grad_norm": 0.91796875, "learning_rate": 3.342914747854478e-05, "loss": 1.1617, "step": 5327 }, { "epoch": 0.3920349505633479, "grad_norm": 0.859375, "learning_rate": 3.342369478304958e-05, "loss": 1.0367, "step": 5328 }, { "epoch": 0.39210853069671187, "grad_norm": 1.0078125, "learning_rate": 3.341824163547061e-05, "loss": 1.0766, "step": 5329 }, { "epoch": 0.39218211083007587, "grad_norm": 0.8046875, "learning_rate": 3.3412788036100504e-05, "loss": 0.8493, "step": 5330 }, { "epoch": 0.3922556909634399, "grad_norm": 0.87109375, "learning_rate": 3.340733398523195e-05, "loss": 1.1773, "step": 5331 }, { "epoch": 0.3923292710968039, "grad_norm": 0.8125, "learning_rate": 3.3401879483157655e-05, "loss": 0.8706, "step": 5332 }, { "epoch": 0.3924028512301679, "grad_norm": 1.015625, "learning_rate": 3.339642453017036e-05, "loss": 1.0878, "step": 5333 }, { "epoch": 0.39247643136353183, "grad_norm": 0.81640625, "learning_rate": 3.3390969126562824e-05, "loss": 0.688, "step": 5334 }, { "epoch": 0.39255001149689583, "grad_norm": 0.828125, "learning_rate": 3.3385513272627816e-05, "loss": 0.8408, "step": 5335 }, { "epoch": 0.39262359163025984, "grad_norm": 1.1015625, "learning_rate": 3.338005696865816e-05, "loss": 1.3848, "step": 5336 }, { "epoch": 0.39269717176362384, "grad_norm": 0.76953125, "learning_rate": 3.337460021494666e-05, "loss": 0.8371, "step": 5337 }, { "epoch": 0.3927707518969878, "grad_norm": 0.9453125, "learning_rate": 3.3369143011786195e-05, "loss": 0.8894, "step": 5338 }, { "epoch": 0.3928443320303518, "grad_norm": 0.87109375, "learning_rate": 3.336368535946963e-05, "loss": 0.9933, "step": 5339 }, { "epoch": 0.3929179121637158, "grad_norm": 0.8203125, "learning_rate": 3.3358227258289865e-05, "loss": 0.8759, "step": 5340 }, { "epoch": 0.3929914922970798, "grad_norm": 0.859375, "learning_rate": 3.335276870853983e-05, "loss": 1.1022, "step": 5341 }, { "epoch": 0.3930650724304438, "grad_norm": 0.77734375, "learning_rate": 3.334730971051247e-05, "loss": 0.9794, "step": 5342 }, { "epoch": 0.39313865256380776, "grad_norm": 0.76171875, "learning_rate": 3.334185026450077e-05, "loss": 0.8649, "step": 5343 }, { "epoch": 0.39321223269717176, "grad_norm": 0.76171875, "learning_rate": 3.333639037079772e-05, "loss": 0.7994, "step": 5344 }, { "epoch": 0.39328581283053576, "grad_norm": 0.859375, "learning_rate": 3.333093002969635e-05, "loss": 0.9591, "step": 5345 }, { "epoch": 0.39335939296389977, "grad_norm": 0.78125, "learning_rate": 3.3325469241489696e-05, "loss": 0.8415, "step": 5346 }, { "epoch": 0.3934329730972637, "grad_norm": 1.0703125, "learning_rate": 3.3320008006470825e-05, "loss": 1.1434, "step": 5347 }, { "epoch": 0.3935065532306277, "grad_norm": 0.9296875, "learning_rate": 3.331454632493284e-05, "loss": 1.3751, "step": 5348 }, { "epoch": 0.3935801333639917, "grad_norm": 0.6484375, "learning_rate": 3.330908419716886e-05, "loss": 0.9702, "step": 5349 }, { "epoch": 0.39365371349735573, "grad_norm": 1.09375, "learning_rate": 3.330362162347204e-05, "loss": 1.0524, "step": 5350 }, { "epoch": 0.39372729363071973, "grad_norm": 1.03125, "learning_rate": 3.329815860413551e-05, "loss": 0.8381, "step": 5351 }, { "epoch": 0.3938008737640837, "grad_norm": 0.875, "learning_rate": 3.329269513945249e-05, "loss": 0.9965, "step": 5352 }, { "epoch": 0.3938744538974477, "grad_norm": 0.84765625, "learning_rate": 3.328723122971619e-05, "loss": 1.1901, "step": 5353 }, { "epoch": 0.3939480340308117, "grad_norm": 0.75390625, "learning_rate": 3.328176687521983e-05, "loss": 0.9843, "step": 5354 }, { "epoch": 0.3940216141641757, "grad_norm": 0.79296875, "learning_rate": 3.327630207625668e-05, "loss": 0.827, "step": 5355 }, { "epoch": 0.39409519429753964, "grad_norm": 0.89453125, "learning_rate": 3.327083683312004e-05, "loss": 0.8483, "step": 5356 }, { "epoch": 0.39416877443090365, "grad_norm": 0.7578125, "learning_rate": 3.326537114610321e-05, "loss": 1.094, "step": 5357 }, { "epoch": 0.39424235456426765, "grad_norm": 0.8828125, "learning_rate": 3.325990501549952e-05, "loss": 0.8335, "step": 5358 }, { "epoch": 0.39431593469763165, "grad_norm": 0.81640625, "learning_rate": 3.325443844160233e-05, "loss": 0.8375, "step": 5359 }, { "epoch": 0.39438951483099566, "grad_norm": 0.875, "learning_rate": 3.324897142470502e-05, "loss": 0.8763, "step": 5360 }, { "epoch": 0.3944630949643596, "grad_norm": 1.390625, "learning_rate": 3.324350396510099e-05, "loss": 1.296, "step": 5361 }, { "epoch": 0.3945366750977236, "grad_norm": 0.96484375, "learning_rate": 3.323803606308367e-05, "loss": 1.3584, "step": 5362 }, { "epoch": 0.3946102552310876, "grad_norm": 0.75, "learning_rate": 3.3232567718946516e-05, "loss": 0.7868, "step": 5363 }, { "epoch": 0.3946838353644516, "grad_norm": 0.84765625, "learning_rate": 3.3227098932983e-05, "loss": 0.9081, "step": 5364 }, { "epoch": 0.39475741549781557, "grad_norm": 0.7265625, "learning_rate": 3.3221629705486627e-05, "loss": 0.8263, "step": 5365 }, { "epoch": 0.39483099563117957, "grad_norm": 0.8671875, "learning_rate": 3.321616003675091e-05, "loss": 1.1382, "step": 5366 }, { "epoch": 0.3949045757645436, "grad_norm": 1.078125, "learning_rate": 3.32106899270694e-05, "loss": 1.0956, "step": 5367 }, { "epoch": 0.3949781558979076, "grad_norm": 0.80078125, "learning_rate": 3.320521937673568e-05, "loss": 0.9621, "step": 5368 }, { "epoch": 0.3950517360312716, "grad_norm": 0.80859375, "learning_rate": 3.319974838604333e-05, "loss": 0.6868, "step": 5369 }, { "epoch": 0.39512531616463553, "grad_norm": 0.6875, "learning_rate": 3.3194276955285976e-05, "loss": 1.1196, "step": 5370 }, { "epoch": 0.39519889629799954, "grad_norm": 0.796875, "learning_rate": 3.3188805084757244e-05, "loss": 0.6754, "step": 5371 }, { "epoch": 0.39527247643136354, "grad_norm": 0.703125, "learning_rate": 3.318333277475081e-05, "loss": 0.7185, "step": 5372 }, { "epoch": 0.39534605656472754, "grad_norm": 0.8671875, "learning_rate": 3.317786002556037e-05, "loss": 0.7248, "step": 5373 }, { "epoch": 0.3954196366980915, "grad_norm": 0.7578125, "learning_rate": 3.3172386837479625e-05, "loss": 1.2999, "step": 5374 }, { "epoch": 0.3954932168314555, "grad_norm": 0.76953125, "learning_rate": 3.316691321080231e-05, "loss": 0.7879, "step": 5375 }, { "epoch": 0.3955667969648195, "grad_norm": 0.90625, "learning_rate": 3.3161439145822194e-05, "loss": 0.7453, "step": 5376 }, { "epoch": 0.3956403770981835, "grad_norm": 0.94140625, "learning_rate": 3.315596464283306e-05, "loss": 0.8616, "step": 5377 }, { "epoch": 0.3957139572315475, "grad_norm": 1.078125, "learning_rate": 3.315048970212869e-05, "loss": 0.9999, "step": 5378 }, { "epoch": 0.39578753736491146, "grad_norm": 1.0078125, "learning_rate": 3.3145014324002944e-05, "loss": 1.459, "step": 5379 }, { "epoch": 0.39586111749827546, "grad_norm": 0.93359375, "learning_rate": 3.313953850874966e-05, "loss": 0.9809, "step": 5380 }, { "epoch": 0.39593469763163947, "grad_norm": 1.1171875, "learning_rate": 3.3134062256662714e-05, "loss": 0.9415, "step": 5381 }, { "epoch": 0.39600827776500347, "grad_norm": 0.86328125, "learning_rate": 3.3128585568036014e-05, "loss": 1.1217, "step": 5382 }, { "epoch": 0.3960818578983674, "grad_norm": 0.84375, "learning_rate": 3.3123108443163474e-05, "loss": 0.6739, "step": 5383 }, { "epoch": 0.3961554380317314, "grad_norm": 0.9921875, "learning_rate": 3.3117630882339054e-05, "loss": 1.3538, "step": 5384 }, { "epoch": 0.3962290181650954, "grad_norm": 0.96875, "learning_rate": 3.3112152885856716e-05, "loss": 0.6795, "step": 5385 }, { "epoch": 0.39630259829845943, "grad_norm": 1.0390625, "learning_rate": 3.3106674454010454e-05, "loss": 1.1909, "step": 5386 }, { "epoch": 0.39637617843182343, "grad_norm": 0.8046875, "learning_rate": 3.310119558709428e-05, "loss": 0.9258, "step": 5387 }, { "epoch": 0.3964497585651874, "grad_norm": 1.3515625, "learning_rate": 3.309571628540224e-05, "loss": 1.1506, "step": 5388 }, { "epoch": 0.3965233386985514, "grad_norm": 0.890625, "learning_rate": 3.30902365492284e-05, "loss": 1.0797, "step": 5389 }, { "epoch": 0.3965969188319154, "grad_norm": 0.80078125, "learning_rate": 3.3084756378866845e-05, "loss": 0.7262, "step": 5390 }, { "epoch": 0.3966704989652794, "grad_norm": 0.65234375, "learning_rate": 3.307927577461169e-05, "loss": 0.5526, "step": 5391 }, { "epoch": 0.39674407909864334, "grad_norm": 0.8515625, "learning_rate": 3.3073794736757056e-05, "loss": 0.8195, "step": 5392 }, { "epoch": 0.39681765923200735, "grad_norm": 0.8828125, "learning_rate": 3.306831326559712e-05, "loss": 0.8256, "step": 5393 }, { "epoch": 0.39689123936537135, "grad_norm": 0.89453125, "learning_rate": 3.306283136142603e-05, "loss": 1.4114, "step": 5394 }, { "epoch": 0.39696481949873536, "grad_norm": 0.74609375, "learning_rate": 3.305734902453802e-05, "loss": 0.7751, "step": 5395 }, { "epoch": 0.39703839963209936, "grad_norm": 0.88671875, "learning_rate": 3.305186625522731e-05, "loss": 0.7799, "step": 5396 }, { "epoch": 0.3971119797654633, "grad_norm": 0.7734375, "learning_rate": 3.304638305378814e-05, "loss": 0.937, "step": 5397 }, { "epoch": 0.3971855598988273, "grad_norm": 0.75390625, "learning_rate": 3.3040899420514793e-05, "loss": 0.6851, "step": 5398 }, { "epoch": 0.3972591400321913, "grad_norm": 0.8828125, "learning_rate": 3.303541535570156e-05, "loss": 1.0003, "step": 5399 }, { "epoch": 0.3973327201655553, "grad_norm": 0.83984375, "learning_rate": 3.302993085964276e-05, "loss": 1.0351, "step": 5400 }, { "epoch": 0.39740630029891927, "grad_norm": 1.0078125, "learning_rate": 3.302444593263273e-05, "loss": 0.8263, "step": 5401 }, { "epoch": 0.3974798804322833, "grad_norm": 0.84375, "learning_rate": 3.3018960574965864e-05, "loss": 0.7955, "step": 5402 }, { "epoch": 0.3975534605656473, "grad_norm": 0.79296875, "learning_rate": 3.301347478693651e-05, "loss": 0.8923, "step": 5403 }, { "epoch": 0.3976270406990113, "grad_norm": 0.91015625, "learning_rate": 3.3007988568839104e-05, "loss": 0.7069, "step": 5404 }, { "epoch": 0.3977006208323753, "grad_norm": 0.83203125, "learning_rate": 3.300250192096808e-05, "loss": 0.979, "step": 5405 }, { "epoch": 0.39777420096573923, "grad_norm": 0.87890625, "learning_rate": 3.299701484361789e-05, "loss": 0.9844, "step": 5406 }, { "epoch": 0.39784778109910324, "grad_norm": 0.79296875, "learning_rate": 3.299152733708303e-05, "loss": 0.7956, "step": 5407 }, { "epoch": 0.39792136123246724, "grad_norm": 1.140625, "learning_rate": 3.298603940165797e-05, "loss": 1.2359, "step": 5408 }, { "epoch": 0.39799494136583125, "grad_norm": 1.140625, "learning_rate": 3.298055103763727e-05, "loss": 1.1349, "step": 5409 }, { "epoch": 0.3980685214991952, "grad_norm": 0.8359375, "learning_rate": 3.297506224531547e-05, "loss": 0.852, "step": 5410 }, { "epoch": 0.3981421016325592, "grad_norm": 0.75390625, "learning_rate": 3.2969573024987136e-05, "loss": 0.706, "step": 5411 }, { "epoch": 0.3982156817659232, "grad_norm": 0.73828125, "learning_rate": 3.296408337694687e-05, "loss": 0.7747, "step": 5412 }, { "epoch": 0.3982892618992872, "grad_norm": 0.7265625, "learning_rate": 3.2958593301489296e-05, "loss": 0.7966, "step": 5413 }, { "epoch": 0.3983628420326512, "grad_norm": 1.0546875, "learning_rate": 3.2953102798909055e-05, "loss": 1.4578, "step": 5414 }, { "epoch": 0.39843642216601516, "grad_norm": 0.859375, "learning_rate": 3.29476118695008e-05, "loss": 0.6992, "step": 5415 }, { "epoch": 0.39851000229937916, "grad_norm": 1.0234375, "learning_rate": 3.294212051355923e-05, "loss": 1.0596, "step": 5416 }, { "epoch": 0.39858358243274317, "grad_norm": 1.1484375, "learning_rate": 3.293662873137906e-05, "loss": 1.3676, "step": 5417 }, { "epoch": 0.39865716256610717, "grad_norm": 0.671875, "learning_rate": 3.293113652325501e-05, "loss": 0.8509, "step": 5418 }, { "epoch": 0.3987307426994711, "grad_norm": 0.90234375, "learning_rate": 3.292564388948184e-05, "loss": 0.7004, "step": 5419 }, { "epoch": 0.3988043228328351, "grad_norm": 0.80078125, "learning_rate": 3.292015083035433e-05, "loss": 0.8331, "step": 5420 }, { "epoch": 0.3988779029661991, "grad_norm": 0.81640625, "learning_rate": 3.291465734616729e-05, "loss": 0.736, "step": 5421 }, { "epoch": 0.39895148309956313, "grad_norm": 1.0546875, "learning_rate": 3.290916343721553e-05, "loss": 1.1495, "step": 5422 }, { "epoch": 0.39902506323292714, "grad_norm": 0.79296875, "learning_rate": 3.2903669103793916e-05, "loss": 0.5683, "step": 5423 }, { "epoch": 0.3990986433662911, "grad_norm": 0.8515625, "learning_rate": 3.2898174346197306e-05, "loss": 1.2228, "step": 5424 }, { "epoch": 0.3991722234996551, "grad_norm": 1.3359375, "learning_rate": 3.289267916472059e-05, "loss": 1.1784, "step": 5425 }, { "epoch": 0.3992458036330191, "grad_norm": 0.9453125, "learning_rate": 3.288718355965871e-05, "loss": 0.9894, "step": 5426 }, { "epoch": 0.3993193837663831, "grad_norm": 0.75390625, "learning_rate": 3.288168753130657e-05, "loss": 0.9713, "step": 5427 }, { "epoch": 0.39939296389974704, "grad_norm": 0.6875, "learning_rate": 3.287619107995914e-05, "loss": 0.9314, "step": 5428 }, { "epoch": 0.39946654403311105, "grad_norm": 0.9140625, "learning_rate": 3.287069420591142e-05, "loss": 1.343, "step": 5429 }, { "epoch": 0.39954012416647505, "grad_norm": 0.65625, "learning_rate": 3.286519690945841e-05, "loss": 0.7085, "step": 5430 }, { "epoch": 0.39961370429983906, "grad_norm": 0.9296875, "learning_rate": 3.2859699190895135e-05, "loss": 0.9537, "step": 5431 }, { "epoch": 0.39968728443320306, "grad_norm": 0.84375, "learning_rate": 3.285420105051665e-05, "loss": 0.7191, "step": 5432 }, { "epoch": 0.399760864566567, "grad_norm": 0.625, "learning_rate": 3.2848702488618025e-05, "loss": 0.4963, "step": 5433 }, { "epoch": 0.399834444699931, "grad_norm": 0.76953125, "learning_rate": 3.284320350549436e-05, "loss": 0.707, "step": 5434 }, { "epoch": 0.399908024833295, "grad_norm": 1.078125, "learning_rate": 3.283770410144078e-05, "loss": 1.1303, "step": 5435 }, { "epoch": 0.399981604966659, "grad_norm": 0.9921875, "learning_rate": 3.2832204276752435e-05, "loss": 1.1261, "step": 5436 }, { "epoch": 0.40005518510002297, "grad_norm": 0.82421875, "learning_rate": 3.282670403172447e-05, "loss": 1.2074, "step": 5437 }, { "epoch": 0.400128765233387, "grad_norm": 0.859375, "learning_rate": 3.282120336665207e-05, "loss": 0.7685, "step": 5438 }, { "epoch": 0.400202345366751, "grad_norm": 0.9765625, "learning_rate": 3.281570228183047e-05, "loss": 1.1608, "step": 5439 }, { "epoch": 0.400275925500115, "grad_norm": 0.84765625, "learning_rate": 3.2810200777554887e-05, "loss": 0.7845, "step": 5440 }, { "epoch": 0.400349505633479, "grad_norm": 0.96484375, "learning_rate": 3.280469885412058e-05, "loss": 0.9856, "step": 5441 }, { "epoch": 0.40042308576684293, "grad_norm": 0.875, "learning_rate": 3.2799196511822835e-05, "loss": 0.8395, "step": 5442 }, { "epoch": 0.40049666590020694, "grad_norm": 0.87109375, "learning_rate": 3.279369375095694e-05, "loss": 0.8938, "step": 5443 }, { "epoch": 0.40057024603357094, "grad_norm": 0.87109375, "learning_rate": 3.278819057181821e-05, "loss": 1.1752, "step": 5444 }, { "epoch": 0.40064382616693495, "grad_norm": 1.0078125, "learning_rate": 3.278268697470202e-05, "loss": 1.0623, "step": 5445 }, { "epoch": 0.4007174063002989, "grad_norm": 0.890625, "learning_rate": 3.2777182959903705e-05, "loss": 0.7103, "step": 5446 }, { "epoch": 0.4007909864336629, "grad_norm": 0.859375, "learning_rate": 3.277167852771867e-05, "loss": 0.6163, "step": 5447 }, { "epoch": 0.4008645665670269, "grad_norm": 0.8671875, "learning_rate": 3.276617367844233e-05, "loss": 0.9038, "step": 5448 }, { "epoch": 0.4009381467003909, "grad_norm": 0.6953125, "learning_rate": 3.276066841237012e-05, "loss": 0.7005, "step": 5449 }, { "epoch": 0.4010117268337549, "grad_norm": 0.8984375, "learning_rate": 3.275516272979749e-05, "loss": 1.1644, "step": 5450 }, { "epoch": 0.40108530696711886, "grad_norm": 0.90234375, "learning_rate": 3.274965663101993e-05, "loss": 1.0017, "step": 5451 }, { "epoch": 0.40115888710048286, "grad_norm": 0.984375, "learning_rate": 3.274415011633294e-05, "loss": 1.1346, "step": 5452 }, { "epoch": 0.40123246723384687, "grad_norm": 0.890625, "learning_rate": 3.2738643186032026e-05, "loss": 0.9945, "step": 5453 }, { "epoch": 0.40130604736721087, "grad_norm": 0.8671875, "learning_rate": 3.273313584041276e-05, "loss": 0.8145, "step": 5454 }, { "epoch": 0.4013796275005748, "grad_norm": 1.046875, "learning_rate": 3.272762807977068e-05, "loss": 1.1186, "step": 5455 }, { "epoch": 0.4014532076339388, "grad_norm": 0.91796875, "learning_rate": 3.272211990440141e-05, "loss": 1.1701, "step": 5456 }, { "epoch": 0.40152678776730283, "grad_norm": 0.8671875, "learning_rate": 3.2716611314600554e-05, "loss": 0.8724, "step": 5457 }, { "epoch": 0.40160036790066683, "grad_norm": 0.765625, "learning_rate": 3.271110231066374e-05, "loss": 0.9826, "step": 5458 }, { "epoch": 0.40167394803403084, "grad_norm": 1.09375, "learning_rate": 3.270559289288663e-05, "loss": 0.9174, "step": 5459 }, { "epoch": 0.4017475281673948, "grad_norm": 0.7265625, "learning_rate": 3.27000830615649e-05, "loss": 1.2029, "step": 5460 }, { "epoch": 0.4018211083007588, "grad_norm": 0.796875, "learning_rate": 3.269457281699426e-05, "loss": 0.7479, "step": 5461 }, { "epoch": 0.4018946884341228, "grad_norm": 0.70703125, "learning_rate": 3.268906215947042e-05, "loss": 0.7568, "step": 5462 }, { "epoch": 0.4019682685674868, "grad_norm": 0.8359375, "learning_rate": 3.2683551089289144e-05, "loss": 0.8404, "step": 5463 }, { "epoch": 0.40204184870085075, "grad_norm": 1.015625, "learning_rate": 3.267803960674619e-05, "loss": 1.1092, "step": 5464 }, { "epoch": 0.40211542883421475, "grad_norm": 0.9765625, "learning_rate": 3.2672527712137356e-05, "loss": 1.0652, "step": 5465 }, { "epoch": 0.40218900896757875, "grad_norm": 0.8828125, "learning_rate": 3.2667015405758453e-05, "loss": 1.0759, "step": 5466 }, { "epoch": 0.40226258910094276, "grad_norm": 0.8125, "learning_rate": 3.266150268790531e-05, "loss": 1.0637, "step": 5467 }, { "epoch": 0.40233616923430676, "grad_norm": 1.046875, "learning_rate": 3.265598955887379e-05, "loss": 1.2914, "step": 5468 }, { "epoch": 0.4024097493676707, "grad_norm": 0.8828125, "learning_rate": 3.2650476018959765e-05, "loss": 0.7047, "step": 5469 }, { "epoch": 0.4024833295010347, "grad_norm": 1.0234375, "learning_rate": 3.264496206845915e-05, "loss": 1.0054, "step": 5470 }, { "epoch": 0.4025569096343987, "grad_norm": 0.921875, "learning_rate": 3.263944770766785e-05, "loss": 1.0365, "step": 5471 }, { "epoch": 0.4026304897677627, "grad_norm": 0.98046875, "learning_rate": 3.2633932936881825e-05, "loss": 1.5661, "step": 5472 }, { "epoch": 0.40270406990112667, "grad_norm": 0.90234375, "learning_rate": 3.262841775639705e-05, "loss": 1.0951, "step": 5473 }, { "epoch": 0.4027776500344907, "grad_norm": 0.78515625, "learning_rate": 3.262290216650948e-05, "loss": 0.7258, "step": 5474 }, { "epoch": 0.4028512301678547, "grad_norm": 0.765625, "learning_rate": 3.2617386167515164e-05, "loss": 0.8595, "step": 5475 }, { "epoch": 0.4029248103012187, "grad_norm": 0.7578125, "learning_rate": 3.261186975971011e-05, "loss": 0.6583, "step": 5476 }, { "epoch": 0.4029983904345827, "grad_norm": 0.828125, "learning_rate": 3.260635294339039e-05, "loss": 0.9518, "step": 5477 }, { "epoch": 0.40307197056794664, "grad_norm": 1.0, "learning_rate": 3.260083571885207e-05, "loss": 1.1576, "step": 5478 }, { "epoch": 0.40314555070131064, "grad_norm": 0.8828125, "learning_rate": 3.259531808639126e-05, "loss": 0.9954, "step": 5479 }, { "epoch": 0.40321913083467464, "grad_norm": 0.83984375, "learning_rate": 3.258980004630407e-05, "loss": 1.269, "step": 5480 }, { "epoch": 0.40329271096803865, "grad_norm": 1.1953125, "learning_rate": 3.258428159888664e-05, "loss": 1.5506, "step": 5481 }, { "epoch": 0.4033662911014026, "grad_norm": 1.0625, "learning_rate": 3.257876274443515e-05, "loss": 1.0325, "step": 5482 }, { "epoch": 0.4034398712347666, "grad_norm": 0.8359375, "learning_rate": 3.2573243483245774e-05, "loss": 0.778, "step": 5483 }, { "epoch": 0.4035134513681306, "grad_norm": 0.8515625, "learning_rate": 3.2567723815614726e-05, "loss": 0.7232, "step": 5484 }, { "epoch": 0.4035870315014946, "grad_norm": 1.0, "learning_rate": 3.256220374183823e-05, "loss": 1.0646, "step": 5485 }, { "epoch": 0.4036606116348586, "grad_norm": 0.796875, "learning_rate": 3.255668326221255e-05, "loss": 1.1485, "step": 5486 }, { "epoch": 0.40373419176822256, "grad_norm": 0.91796875, "learning_rate": 3.2551162377033945e-05, "loss": 1.0045, "step": 5487 }, { "epoch": 0.40380777190158657, "grad_norm": 0.73046875, "learning_rate": 3.254564108659872e-05, "loss": 0.6102, "step": 5488 }, { "epoch": 0.40388135203495057, "grad_norm": 0.9609375, "learning_rate": 3.2540119391203186e-05, "loss": 1.3953, "step": 5489 }, { "epoch": 0.4039549321683146, "grad_norm": 0.84765625, "learning_rate": 3.253459729114368e-05, "loss": 0.9125, "step": 5490 }, { "epoch": 0.4040285123016785, "grad_norm": 1.0078125, "learning_rate": 3.252907478671659e-05, "loss": 1.0289, "step": 5491 }, { "epoch": 0.4041020924350425, "grad_norm": 0.7734375, "learning_rate": 3.252355187821825e-05, "loss": 0.8511, "step": 5492 }, { "epoch": 0.40417567256840653, "grad_norm": 1.015625, "learning_rate": 3.2518028565945104e-05, "loss": 1.0297, "step": 5493 }, { "epoch": 0.40424925270177053, "grad_norm": 0.76953125, "learning_rate": 3.251250485019357e-05, "loss": 0.7107, "step": 5494 }, { "epoch": 0.40432283283513454, "grad_norm": 0.72265625, "learning_rate": 3.250698073126008e-05, "loss": 0.6151, "step": 5495 }, { "epoch": 0.4043964129684985, "grad_norm": 0.859375, "learning_rate": 3.2501456209441114e-05, "loss": 0.717, "step": 5496 }, { "epoch": 0.4044699931018625, "grad_norm": 0.8359375, "learning_rate": 3.2495931285033166e-05, "loss": 0.842, "step": 5497 }, { "epoch": 0.4045435732352265, "grad_norm": 0.91015625, "learning_rate": 3.249040595833274e-05, "loss": 0.8624, "step": 5498 }, { "epoch": 0.4046171533685905, "grad_norm": 0.7734375, "learning_rate": 3.2484880229636375e-05, "loss": 0.7128, "step": 5499 }, { "epoch": 0.40469073350195445, "grad_norm": 0.85546875, "learning_rate": 3.247935409924063e-05, "loss": 0.6546, "step": 5500 }, { "epoch": 0.40476431363531845, "grad_norm": 0.7421875, "learning_rate": 3.247382756744207e-05, "loss": 0.7038, "step": 5501 }, { "epoch": 0.40483789376868246, "grad_norm": 1.1171875, "learning_rate": 3.24683006345373e-05, "loss": 1.0853, "step": 5502 }, { "epoch": 0.40491147390204646, "grad_norm": 0.984375, "learning_rate": 3.246277330082295e-05, "loss": 0.7791, "step": 5503 }, { "epoch": 0.40498505403541046, "grad_norm": 0.67578125, "learning_rate": 3.245724556659564e-05, "loss": 0.7281, "step": 5504 }, { "epoch": 0.4050586341687744, "grad_norm": 1.0234375, "learning_rate": 3.245171743215205e-05, "loss": 0.8079, "step": 5505 }, { "epoch": 0.4051322143021384, "grad_norm": 0.96875, "learning_rate": 3.244618889778886e-05, "loss": 0.9249, "step": 5506 }, { "epoch": 0.4052057944355024, "grad_norm": 0.625, "learning_rate": 3.2440659963802785e-05, "loss": 0.7506, "step": 5507 }, { "epoch": 0.4052793745688664, "grad_norm": 0.84375, "learning_rate": 3.243513063049053e-05, "loss": 0.8894, "step": 5508 }, { "epoch": 0.4053529547022304, "grad_norm": 0.84375, "learning_rate": 3.242960089814886e-05, "loss": 1.2063, "step": 5509 }, { "epoch": 0.4054265348355944, "grad_norm": 0.87890625, "learning_rate": 3.2424070767074546e-05, "loss": 1.0831, "step": 5510 }, { "epoch": 0.4055001149689584, "grad_norm": 0.89453125, "learning_rate": 3.241854023756437e-05, "loss": 0.9957, "step": 5511 }, { "epoch": 0.4055736951023224, "grad_norm": 0.8359375, "learning_rate": 3.241300930991515e-05, "loss": 0.7813, "step": 5512 }, { "epoch": 0.4056472752356864, "grad_norm": 0.73046875, "learning_rate": 3.2407477984423726e-05, "loss": 0.9488, "step": 5513 }, { "epoch": 0.40572085536905034, "grad_norm": 1.109375, "learning_rate": 3.240194626138696e-05, "loss": 1.2827, "step": 5514 }, { "epoch": 0.40579443550241434, "grad_norm": 0.90625, "learning_rate": 3.239641414110171e-05, "loss": 1.1106, "step": 5515 }, { "epoch": 0.40586801563577835, "grad_norm": 0.96875, "learning_rate": 3.239088162386487e-05, "loss": 1.4845, "step": 5516 }, { "epoch": 0.40594159576914235, "grad_norm": 0.80078125, "learning_rate": 3.238534870997338e-05, "loss": 0.9536, "step": 5517 }, { "epoch": 0.4060151759025063, "grad_norm": 0.828125, "learning_rate": 3.237981539972418e-05, "loss": 1.1186, "step": 5518 }, { "epoch": 0.4060887560358703, "grad_norm": 0.890625, "learning_rate": 3.2374281693414226e-05, "loss": 0.8825, "step": 5519 }, { "epoch": 0.4061623361692343, "grad_norm": 0.79296875, "learning_rate": 3.2368747591340496e-05, "loss": 0.6731, "step": 5520 }, { "epoch": 0.4062359163025983, "grad_norm": 0.84375, "learning_rate": 3.2363213093800006e-05, "loss": 1.0177, "step": 5521 }, { "epoch": 0.4063094964359623, "grad_norm": 1.0625, "learning_rate": 3.235767820108977e-05, "loss": 0.8926, "step": 5522 }, { "epoch": 0.40638307656932626, "grad_norm": 0.7421875, "learning_rate": 3.2352142913506846e-05, "loss": 0.6149, "step": 5523 }, { "epoch": 0.40645665670269027, "grad_norm": 0.93359375, "learning_rate": 3.23466072313483e-05, "loss": 1.2331, "step": 5524 }, { "epoch": 0.40653023683605427, "grad_norm": 0.859375, "learning_rate": 3.2341071154911215e-05, "loss": 1.1672, "step": 5525 }, { "epoch": 0.4066038169694183, "grad_norm": 0.734375, "learning_rate": 3.233553468449271e-05, "loss": 0.7292, "step": 5526 }, { "epoch": 0.4066773971027822, "grad_norm": 0.8125, "learning_rate": 3.232999782038991e-05, "loss": 0.6549, "step": 5527 }, { "epoch": 0.4067509772361462, "grad_norm": 0.94921875, "learning_rate": 3.232446056289997e-05, "loss": 1.1656, "step": 5528 }, { "epoch": 0.40682455736951023, "grad_norm": 1.046875, "learning_rate": 3.231892291232007e-05, "loss": 1.3971, "step": 5529 }, { "epoch": 0.40689813750287424, "grad_norm": 0.86328125, "learning_rate": 3.23133848689474e-05, "loss": 1.2544, "step": 5530 }, { "epoch": 0.40697171763623824, "grad_norm": 0.87109375, "learning_rate": 3.230784643307917e-05, "loss": 0.8279, "step": 5531 }, { "epoch": 0.4070452977696022, "grad_norm": 0.73046875, "learning_rate": 3.230230760501264e-05, "loss": 0.7329, "step": 5532 }, { "epoch": 0.4071188779029662, "grad_norm": 0.95703125, "learning_rate": 3.229676838504504e-05, "loss": 1.2462, "step": 5533 }, { "epoch": 0.4071924580363302, "grad_norm": 0.8515625, "learning_rate": 3.229122877347366e-05, "loss": 1.0062, "step": 5534 }, { "epoch": 0.4072660381696942, "grad_norm": 0.80078125, "learning_rate": 3.2285688770595815e-05, "loss": 0.9326, "step": 5535 }, { "epoch": 0.40733961830305815, "grad_norm": 0.94140625, "learning_rate": 3.228014837670881e-05, "loss": 0.763, "step": 5536 }, { "epoch": 0.40741319843642215, "grad_norm": 0.73828125, "learning_rate": 3.227460759210999e-05, "loss": 0.7049, "step": 5537 }, { "epoch": 0.40748677856978616, "grad_norm": 0.86328125, "learning_rate": 3.2269066417096715e-05, "loss": 0.78, "step": 5538 }, { "epoch": 0.40756035870315016, "grad_norm": 0.91015625, "learning_rate": 3.2263524851966385e-05, "loss": 0.9355, "step": 5539 }, { "epoch": 0.40763393883651416, "grad_norm": 0.85546875, "learning_rate": 3.2257982897016395e-05, "loss": 0.9373, "step": 5540 }, { "epoch": 0.4077075189698781, "grad_norm": 0.71484375, "learning_rate": 3.225244055254416e-05, "loss": 0.722, "step": 5541 }, { "epoch": 0.4077810991032421, "grad_norm": 0.83984375, "learning_rate": 3.224689781884715e-05, "loss": 0.9293, "step": 5542 }, { "epoch": 0.4078546792366061, "grad_norm": 0.8515625, "learning_rate": 3.224135469622282e-05, "loss": 1.1588, "step": 5543 }, { "epoch": 0.4079282593699701, "grad_norm": 0.7265625, "learning_rate": 3.223581118496865e-05, "loss": 0.8876, "step": 5544 }, { "epoch": 0.4080018395033341, "grad_norm": 0.953125, "learning_rate": 3.223026728538216e-05, "loss": 1.0028, "step": 5545 }, { "epoch": 0.4080754196366981, "grad_norm": 0.859375, "learning_rate": 3.22247229977609e-05, "loss": 1.0211, "step": 5546 }, { "epoch": 0.4081489997700621, "grad_norm": 0.70703125, "learning_rate": 3.221917832240239e-05, "loss": 0.8526, "step": 5547 }, { "epoch": 0.4082225799034261, "grad_norm": 0.86328125, "learning_rate": 3.221363325960422e-05, "loss": 0.8572, "step": 5548 }, { "epoch": 0.4082961600367901, "grad_norm": 0.6875, "learning_rate": 3.2208087809663966e-05, "loss": 0.5536, "step": 5549 }, { "epoch": 0.40836974017015404, "grad_norm": 0.8359375, "learning_rate": 3.2202541972879256e-05, "loss": 0.7928, "step": 5550 }, { "epoch": 0.40844332030351804, "grad_norm": 0.70703125, "learning_rate": 3.2196995749547725e-05, "loss": 0.8852, "step": 5551 }, { "epoch": 0.40851690043688205, "grad_norm": 0.91015625, "learning_rate": 3.219144913996702e-05, "loss": 0.8989, "step": 5552 }, { "epoch": 0.40859048057024605, "grad_norm": 0.78125, "learning_rate": 3.218590214443484e-05, "loss": 0.8728, "step": 5553 }, { "epoch": 0.40866406070361, "grad_norm": 1.0, "learning_rate": 3.218035476324885e-05, "loss": 1.4778, "step": 5554 }, { "epoch": 0.408737640836974, "grad_norm": 0.91796875, "learning_rate": 3.2174806996706785e-05, "loss": 1.132, "step": 5555 }, { "epoch": 0.408811220970338, "grad_norm": 0.6875, "learning_rate": 3.216925884510638e-05, "loss": 0.6525, "step": 5556 }, { "epoch": 0.408884801103702, "grad_norm": 1.0546875, "learning_rate": 3.2163710308745385e-05, "loss": 1.2691, "step": 5557 }, { "epoch": 0.408958381237066, "grad_norm": 1.0078125, "learning_rate": 3.21581613879216e-05, "loss": 1.1924, "step": 5558 }, { "epoch": 0.40903196137042996, "grad_norm": 0.8203125, "learning_rate": 3.215261208293281e-05, "loss": 1.179, "step": 5559 }, { "epoch": 0.40910554150379397, "grad_norm": 0.78515625, "learning_rate": 3.214706239407684e-05, "loss": 0.7966, "step": 5560 }, { "epoch": 0.40917912163715797, "grad_norm": 0.7734375, "learning_rate": 3.214151232165152e-05, "loss": 0.7218, "step": 5561 }, { "epoch": 0.409252701770522, "grad_norm": 1.15625, "learning_rate": 3.213596186595474e-05, "loss": 1.6786, "step": 5562 }, { "epoch": 0.4093262819038859, "grad_norm": 0.78515625, "learning_rate": 3.213041102728435e-05, "loss": 0.7659, "step": 5563 }, { "epoch": 0.40939986203724993, "grad_norm": 0.97265625, "learning_rate": 3.212485980593827e-05, "loss": 0.9232, "step": 5564 }, { "epoch": 0.40947344217061393, "grad_norm": 1.0859375, "learning_rate": 3.211930820221443e-05, "loss": 1.1072, "step": 5565 }, { "epoch": 0.40954702230397794, "grad_norm": 0.73046875, "learning_rate": 3.211375621641075e-05, "loss": 0.8378, "step": 5566 }, { "epoch": 0.40962060243734194, "grad_norm": 0.86328125, "learning_rate": 3.210820384882522e-05, "loss": 0.8205, "step": 5567 }, { "epoch": 0.4096941825707059, "grad_norm": 0.87890625, "learning_rate": 3.2102651099755815e-05, "loss": 1.1259, "step": 5568 }, { "epoch": 0.4097677627040699, "grad_norm": 0.8125, "learning_rate": 3.209709796950054e-05, "loss": 1.0587, "step": 5569 }, { "epoch": 0.4098413428374339, "grad_norm": 0.7421875, "learning_rate": 3.209154445835742e-05, "loss": 0.7716, "step": 5570 }, { "epoch": 0.4099149229707979, "grad_norm": 1.03125, "learning_rate": 3.20859905666245e-05, "loss": 1.0481, "step": 5571 }, { "epoch": 0.40998850310416185, "grad_norm": 0.6484375, "learning_rate": 3.208043629459986e-05, "loss": 0.5851, "step": 5572 }, { "epoch": 0.41006208323752585, "grad_norm": 0.86328125, "learning_rate": 3.2074881642581564e-05, "loss": 1.0272, "step": 5573 }, { "epoch": 0.41013566337088986, "grad_norm": 0.7421875, "learning_rate": 3.206932661086775e-05, "loss": 0.9631, "step": 5574 }, { "epoch": 0.41020924350425386, "grad_norm": 0.71875, "learning_rate": 3.206377119975651e-05, "loss": 0.612, "step": 5575 }, { "epoch": 0.41028282363761787, "grad_norm": 0.84765625, "learning_rate": 3.205821540954602e-05, "loss": 0.769, "step": 5576 }, { "epoch": 0.4103564037709818, "grad_norm": 0.734375, "learning_rate": 3.205265924053443e-05, "loss": 0.9553, "step": 5577 }, { "epoch": 0.4104299839043458, "grad_norm": 0.87890625, "learning_rate": 3.204710269301996e-05, "loss": 0.867, "step": 5578 }, { "epoch": 0.4105035640377098, "grad_norm": 0.7890625, "learning_rate": 3.2041545767300785e-05, "loss": 0.8351, "step": 5579 }, { "epoch": 0.4105771441710738, "grad_norm": 0.70703125, "learning_rate": 3.203598846367515e-05, "loss": 0.8639, "step": 5580 }, { "epoch": 0.4106507243044378, "grad_norm": 0.87890625, "learning_rate": 3.20304307824413e-05, "loss": 0.6448, "step": 5581 }, { "epoch": 0.4107243044378018, "grad_norm": 0.78125, "learning_rate": 3.2024872723897514e-05, "loss": 0.5816, "step": 5582 }, { "epoch": 0.4107978845711658, "grad_norm": 0.828125, "learning_rate": 3.2019314288342075e-05, "loss": 0.8489, "step": 5583 }, { "epoch": 0.4108714647045298, "grad_norm": 1.1015625, "learning_rate": 3.2013755476073294e-05, "loss": 0.9564, "step": 5584 }, { "epoch": 0.4109450448378938, "grad_norm": 0.87890625, "learning_rate": 3.200819628738951e-05, "loss": 1.0322, "step": 5585 }, { "epoch": 0.41101862497125774, "grad_norm": 0.828125, "learning_rate": 3.200263672258906e-05, "loss": 0.9438, "step": 5586 }, { "epoch": 0.41109220510462174, "grad_norm": 0.8671875, "learning_rate": 3.199707678197033e-05, "loss": 0.9212, "step": 5587 }, { "epoch": 0.41116578523798575, "grad_norm": 0.96875, "learning_rate": 3.1991516465831696e-05, "loss": 1.0347, "step": 5588 }, { "epoch": 0.41123936537134975, "grad_norm": 0.8359375, "learning_rate": 3.198595577447159e-05, "loss": 1.0681, "step": 5589 }, { "epoch": 0.4113129455047137, "grad_norm": 0.8515625, "learning_rate": 3.1980394708188425e-05, "loss": 0.8626, "step": 5590 }, { "epoch": 0.4113865256380777, "grad_norm": 4.09375, "learning_rate": 3.197483326728065e-05, "loss": 0.9472, "step": 5591 }, { "epoch": 0.4114601057714417, "grad_norm": 0.8203125, "learning_rate": 3.196927145204676e-05, "loss": 0.9151, "step": 5592 }, { "epoch": 0.4115336859048057, "grad_norm": 1.125, "learning_rate": 3.1963709262785234e-05, "loss": 1.0061, "step": 5593 }, { "epoch": 0.4116072660381697, "grad_norm": 0.6953125, "learning_rate": 3.1958146699794586e-05, "loss": 0.7624, "step": 5594 }, { "epoch": 0.41168084617153367, "grad_norm": 0.98046875, "learning_rate": 3.195258376337334e-05, "loss": 1.3725, "step": 5595 }, { "epoch": 0.41175442630489767, "grad_norm": 0.78125, "learning_rate": 3.1947020453820066e-05, "loss": 0.8691, "step": 5596 }, { "epoch": 0.4118280064382617, "grad_norm": 0.95703125, "learning_rate": 3.194145677143331e-05, "loss": 1.0105, "step": 5597 }, { "epoch": 0.4119015865716257, "grad_norm": 0.75390625, "learning_rate": 3.193589271651169e-05, "loss": 0.9358, "step": 5598 }, { "epoch": 0.4119751667049896, "grad_norm": 0.890625, "learning_rate": 3.1930328289353805e-05, "loss": 1.1158, "step": 5599 }, { "epoch": 0.41204874683835363, "grad_norm": 0.890625, "learning_rate": 3.192476349025829e-05, "loss": 0.7623, "step": 5600 }, { "epoch": 0.41212232697171763, "grad_norm": 0.86328125, "learning_rate": 3.1919198319523805e-05, "loss": 0.918, "step": 5601 }, { "epoch": 0.41219590710508164, "grad_norm": 0.8359375, "learning_rate": 3.1913632777449e-05, "loss": 0.7094, "step": 5602 }, { "epoch": 0.41226948723844564, "grad_norm": 1.03125, "learning_rate": 3.190806686433259e-05, "loss": 0.9955, "step": 5603 }, { "epoch": 0.4123430673718096, "grad_norm": 1.0859375, "learning_rate": 3.190250058047328e-05, "loss": 1.1954, "step": 5604 }, { "epoch": 0.4124166475051736, "grad_norm": 1.03125, "learning_rate": 3.18969339261698e-05, "loss": 1.2955, "step": 5605 }, { "epoch": 0.4124902276385376, "grad_norm": 0.94140625, "learning_rate": 3.1891366901720896e-05, "loss": 1.2679, "step": 5606 }, { "epoch": 0.4125638077719016, "grad_norm": 0.85546875, "learning_rate": 3.188579950742535e-05, "loss": 1.3088, "step": 5607 }, { "epoch": 0.41263738790526555, "grad_norm": 0.95703125, "learning_rate": 3.188023174358196e-05, "loss": 1.1992, "step": 5608 }, { "epoch": 0.41271096803862956, "grad_norm": 0.89453125, "learning_rate": 3.187466361048952e-05, "loss": 1.1695, "step": 5609 }, { "epoch": 0.41278454817199356, "grad_norm": 1.0078125, "learning_rate": 3.1869095108446864e-05, "loss": 1.1898, "step": 5610 }, { "epoch": 0.41285812830535756, "grad_norm": 0.78125, "learning_rate": 3.186352623775285e-05, "loss": 0.981, "step": 5611 }, { "epoch": 0.41293170843872157, "grad_norm": 0.73046875, "learning_rate": 3.185795699870635e-05, "loss": 0.73, "step": 5612 }, { "epoch": 0.4130052885720855, "grad_norm": 0.921875, "learning_rate": 3.185238739160624e-05, "loss": 0.8553, "step": 5613 }, { "epoch": 0.4130788687054495, "grad_norm": 0.88671875, "learning_rate": 3.184681741675145e-05, "loss": 0.7428, "step": 5614 }, { "epoch": 0.4131524488388135, "grad_norm": 1.1171875, "learning_rate": 3.1841247074440906e-05, "loss": 0.9858, "step": 5615 }, { "epoch": 0.41322602897217753, "grad_norm": 0.96484375, "learning_rate": 3.1835676364973546e-05, "loss": 0.9968, "step": 5616 }, { "epoch": 0.4132996091055415, "grad_norm": 1.0234375, "learning_rate": 3.183010528864835e-05, "loss": 0.9381, "step": 5617 }, { "epoch": 0.4133731892389055, "grad_norm": 0.77734375, "learning_rate": 3.182453384576429e-05, "loss": 0.7072, "step": 5618 }, { "epoch": 0.4134467693722695, "grad_norm": 0.78515625, "learning_rate": 3.18189620366204e-05, "loss": 0.8605, "step": 5619 }, { "epoch": 0.4135203495056335, "grad_norm": 0.6484375, "learning_rate": 3.1813389861515706e-05, "loss": 0.9305, "step": 5620 }, { "epoch": 0.4135939296389975, "grad_norm": 0.74609375, "learning_rate": 3.1807817320749236e-05, "loss": 0.6096, "step": 5621 }, { "epoch": 0.41366750977236144, "grad_norm": 0.85546875, "learning_rate": 3.1802244414620065e-05, "loss": 1.0141, "step": 5622 }, { "epoch": 0.41374108990572545, "grad_norm": 1.078125, "learning_rate": 3.179667114342729e-05, "loss": 0.7475, "step": 5623 }, { "epoch": 0.41381467003908945, "grad_norm": 0.7890625, "learning_rate": 3.179109750747e-05, "loss": 1.0894, "step": 5624 }, { "epoch": 0.41388825017245345, "grad_norm": 0.94921875, "learning_rate": 3.1785523507047345e-05, "loss": 1.0363, "step": 5625 }, { "epoch": 0.4139618303058174, "grad_norm": 0.90625, "learning_rate": 3.177994914245846e-05, "loss": 0.9267, "step": 5626 }, { "epoch": 0.4140354104391814, "grad_norm": 0.8359375, "learning_rate": 3.1774374414002505e-05, "loss": 0.8263, "step": 5627 }, { "epoch": 0.4141089905725454, "grad_norm": 0.7265625, "learning_rate": 3.1768799321978674e-05, "loss": 0.5761, "step": 5628 }, { "epoch": 0.4141825707059094, "grad_norm": 0.73828125, "learning_rate": 3.176322386668617e-05, "loss": 0.7389, "step": 5629 }, { "epoch": 0.4142561508392734, "grad_norm": 0.8046875, "learning_rate": 3.17576480484242e-05, "loss": 0.6863, "step": 5630 }, { "epoch": 0.41432973097263737, "grad_norm": 0.8046875, "learning_rate": 3.1752071867492037e-05, "loss": 0.8422, "step": 5631 }, { "epoch": 0.41440331110600137, "grad_norm": 0.8828125, "learning_rate": 3.174649532418893e-05, "loss": 1.3692, "step": 5632 }, { "epoch": 0.4144768912393654, "grad_norm": 1.03125, "learning_rate": 3.1740918418814156e-05, "loss": 1.0136, "step": 5633 }, { "epoch": 0.4145504713727294, "grad_norm": 1.1640625, "learning_rate": 3.173534115166702e-05, "loss": 1.1615, "step": 5634 }, { "epoch": 0.4146240515060933, "grad_norm": 0.80078125, "learning_rate": 3.1729763523046864e-05, "loss": 0.5981, "step": 5635 }, { "epoch": 0.41469763163945733, "grad_norm": 0.96484375, "learning_rate": 3.172418553325299e-05, "loss": 1.1503, "step": 5636 }, { "epoch": 0.41477121177282134, "grad_norm": 0.86328125, "learning_rate": 3.171860718258478e-05, "loss": 0.7839, "step": 5637 }, { "epoch": 0.41484479190618534, "grad_norm": 0.93359375, "learning_rate": 3.1713028471341624e-05, "loss": 1.2209, "step": 5638 }, { "epoch": 0.41491837203954934, "grad_norm": 0.6640625, "learning_rate": 3.17074493998229e-05, "loss": 0.5049, "step": 5639 }, { "epoch": 0.4149919521729133, "grad_norm": 0.8359375, "learning_rate": 3.1701869968328036e-05, "loss": 0.8211, "step": 5640 }, { "epoch": 0.4150655323062773, "grad_norm": 0.9296875, "learning_rate": 3.169629017715647e-05, "loss": 0.8303, "step": 5641 }, { "epoch": 0.4151391124396413, "grad_norm": 0.78515625, "learning_rate": 3.169071002660766e-05, "loss": 0.6896, "step": 5642 }, { "epoch": 0.4152126925730053, "grad_norm": 1.078125, "learning_rate": 3.1685129516981076e-05, "loss": 1.0469, "step": 5643 }, { "epoch": 0.41528627270636925, "grad_norm": 0.83203125, "learning_rate": 3.1679548648576216e-05, "loss": 0.9086, "step": 5644 }, { "epoch": 0.41535985283973326, "grad_norm": 0.9921875, "learning_rate": 3.167396742169259e-05, "loss": 1.2287, "step": 5645 }, { "epoch": 0.41543343297309726, "grad_norm": 0.7421875, "learning_rate": 3.166838583662975e-05, "loss": 0.5987, "step": 5646 }, { "epoch": 0.41550701310646126, "grad_norm": 0.91015625, "learning_rate": 3.1662803893687244e-05, "loss": 1.229, "step": 5647 }, { "epoch": 0.41558059323982527, "grad_norm": 0.9296875, "learning_rate": 3.165722159316462e-05, "loss": 0.9456, "step": 5648 }, { "epoch": 0.4156541733731892, "grad_norm": 0.83984375, "learning_rate": 3.16516389353615e-05, "loss": 0.9693, "step": 5649 }, { "epoch": 0.4157277535065532, "grad_norm": 0.79296875, "learning_rate": 3.164605592057747e-05, "loss": 0.6719, "step": 5650 }, { "epoch": 0.4158013336399172, "grad_norm": 0.765625, "learning_rate": 3.1640472549112174e-05, "loss": 0.6725, "step": 5651 }, { "epoch": 0.41587491377328123, "grad_norm": 0.80859375, "learning_rate": 3.163488882126526e-05, "loss": 0.6966, "step": 5652 }, { "epoch": 0.4159484939066452, "grad_norm": 1.0859375, "learning_rate": 3.1629304737336404e-05, "loss": 1.3396, "step": 5653 }, { "epoch": 0.4160220740400092, "grad_norm": 0.69921875, "learning_rate": 3.162372029762527e-05, "loss": 0.647, "step": 5654 }, { "epoch": 0.4160956541733732, "grad_norm": 0.80859375, "learning_rate": 3.161813550243158e-05, "loss": 0.958, "step": 5655 }, { "epoch": 0.4161692343067372, "grad_norm": 0.93359375, "learning_rate": 3.161255035205507e-05, "loss": 1.1894, "step": 5656 }, { "epoch": 0.4162428144401012, "grad_norm": 0.859375, "learning_rate": 3.160696484679546e-05, "loss": 0.9956, "step": 5657 }, { "epoch": 0.41631639457346514, "grad_norm": 0.83984375, "learning_rate": 3.160137898695252e-05, "loss": 0.8505, "step": 5658 }, { "epoch": 0.41638997470682915, "grad_norm": 1.03125, "learning_rate": 3.159579277282605e-05, "loss": 0.9437, "step": 5659 }, { "epoch": 0.41646355484019315, "grad_norm": 0.9296875, "learning_rate": 3.159020620471584e-05, "loss": 0.9145, "step": 5660 }, { "epoch": 0.41653713497355715, "grad_norm": 0.91015625, "learning_rate": 3.1584619282921704e-05, "loss": 1.3915, "step": 5661 }, { "epoch": 0.41661071510692116, "grad_norm": 0.8515625, "learning_rate": 3.1579032007743485e-05, "loss": 0.8081, "step": 5662 }, { "epoch": 0.4166842952402851, "grad_norm": 0.77734375, "learning_rate": 3.157344437948105e-05, "loss": 0.8382, "step": 5663 }, { "epoch": 0.4167578753736491, "grad_norm": 0.90625, "learning_rate": 3.156785639843427e-05, "loss": 0.824, "step": 5664 }, { "epoch": 0.4168314555070131, "grad_norm": 0.890625, "learning_rate": 3.156226806490304e-05, "loss": 1.2242, "step": 5665 }, { "epoch": 0.4169050356403771, "grad_norm": 0.75, "learning_rate": 3.1556679379187274e-05, "loss": 1.0485, "step": 5666 }, { "epoch": 0.41697861577374107, "grad_norm": 1.0234375, "learning_rate": 3.1551090341586917e-05, "loss": 1.1262, "step": 5667 }, { "epoch": 0.41705219590710507, "grad_norm": 0.87890625, "learning_rate": 3.154550095240191e-05, "loss": 0.8812, "step": 5668 }, { "epoch": 0.4171257760404691, "grad_norm": 0.703125, "learning_rate": 3.153991121193224e-05, "loss": 0.686, "step": 5669 }, { "epoch": 0.4171993561738331, "grad_norm": 1.0546875, "learning_rate": 3.153432112047787e-05, "loss": 0.8766, "step": 5670 }, { "epoch": 0.4172729363071971, "grad_norm": 0.9921875, "learning_rate": 3.152873067833884e-05, "loss": 0.9772, "step": 5671 }, { "epoch": 0.41734651644056103, "grad_norm": 1.046875, "learning_rate": 3.1523139885815154e-05, "loss": 1.5448, "step": 5672 }, { "epoch": 0.41742009657392504, "grad_norm": 3.03125, "learning_rate": 3.151754874320688e-05, "loss": 1.0638, "step": 5673 }, { "epoch": 0.41749367670728904, "grad_norm": 1.0234375, "learning_rate": 3.151195725081408e-05, "loss": 1.1067, "step": 5674 }, { "epoch": 0.41756725684065304, "grad_norm": 0.97265625, "learning_rate": 3.150636540893682e-05, "loss": 1.1005, "step": 5675 }, { "epoch": 0.417640836974017, "grad_norm": 0.80859375, "learning_rate": 3.150077321787523e-05, "loss": 0.9598, "step": 5676 }, { "epoch": 0.417714417107381, "grad_norm": 0.890625, "learning_rate": 3.149518067792941e-05, "loss": 1.0019, "step": 5677 }, { "epoch": 0.417787997240745, "grad_norm": 0.8203125, "learning_rate": 3.148958778939951e-05, "loss": 0.9, "step": 5678 }, { "epoch": 0.417861577374109, "grad_norm": 0.92578125, "learning_rate": 3.1483994552585696e-05, "loss": 1.0215, "step": 5679 }, { "epoch": 0.417935157507473, "grad_norm": 0.9296875, "learning_rate": 3.147840096778813e-05, "loss": 0.9514, "step": 5680 }, { "epoch": 0.41800873764083696, "grad_norm": 0.99609375, "learning_rate": 3.147280703530704e-05, "loss": 0.9191, "step": 5681 }, { "epoch": 0.41808231777420096, "grad_norm": 0.7734375, "learning_rate": 3.146721275544261e-05, "loss": 0.6177, "step": 5682 }, { "epoch": 0.41815589790756497, "grad_norm": 0.85546875, "learning_rate": 3.146161812849509e-05, "loss": 0.9278, "step": 5683 }, { "epoch": 0.41822947804092897, "grad_norm": 0.9453125, "learning_rate": 3.1456023154764725e-05, "loss": 1.3063, "step": 5684 }, { "epoch": 0.4183030581742929, "grad_norm": 0.62890625, "learning_rate": 3.1450427834551795e-05, "loss": 0.8765, "step": 5685 }, { "epoch": 0.4183766383076569, "grad_norm": 1.234375, "learning_rate": 3.144483216815658e-05, "loss": 1.439, "step": 5686 }, { "epoch": 0.4184502184410209, "grad_norm": 1.0078125, "learning_rate": 3.14392361558794e-05, "loss": 1.2136, "step": 5687 }, { "epoch": 0.41852379857438493, "grad_norm": 0.890625, "learning_rate": 3.1433639798020575e-05, "loss": 0.8733, "step": 5688 }, { "epoch": 0.41859737870774893, "grad_norm": 0.9296875, "learning_rate": 3.142804309488045e-05, "loss": 0.8334, "step": 5689 }, { "epoch": 0.4186709588411129, "grad_norm": 0.73046875, "learning_rate": 3.142244604675941e-05, "loss": 0.7069, "step": 5690 }, { "epoch": 0.4187445389744769, "grad_norm": 0.89453125, "learning_rate": 3.14168486539578e-05, "loss": 0.8814, "step": 5691 }, { "epoch": 0.4188181191078409, "grad_norm": 0.8046875, "learning_rate": 3.141125091677605e-05, "loss": 0.6196, "step": 5692 }, { "epoch": 0.4188916992412049, "grad_norm": 0.86328125, "learning_rate": 3.140565283551458e-05, "loss": 0.9069, "step": 5693 }, { "epoch": 0.41896527937456884, "grad_norm": 0.7890625, "learning_rate": 3.140005441047381e-05, "loss": 1.2138, "step": 5694 }, { "epoch": 0.41903885950793285, "grad_norm": 1.015625, "learning_rate": 3.139445564195421e-05, "loss": 1.1058, "step": 5695 }, { "epoch": 0.41911243964129685, "grad_norm": 0.8828125, "learning_rate": 3.138885653025626e-05, "loss": 0.955, "step": 5696 }, { "epoch": 0.41918601977466086, "grad_norm": 0.8046875, "learning_rate": 3.1383257075680446e-05, "loss": 0.722, "step": 5697 }, { "epoch": 0.41925959990802486, "grad_norm": 0.921875, "learning_rate": 3.1377657278527274e-05, "loss": 1.0323, "step": 5698 }, { "epoch": 0.4193331800413888, "grad_norm": 0.7578125, "learning_rate": 3.1372057139097284e-05, "loss": 0.8831, "step": 5699 }, { "epoch": 0.4194067601747528, "grad_norm": 0.84765625, "learning_rate": 3.136645665769102e-05, "loss": 1.2314, "step": 5700 }, { "epoch": 0.4194803403081168, "grad_norm": 0.890625, "learning_rate": 3.136085583460906e-05, "loss": 1.176, "step": 5701 }, { "epoch": 0.4195539204414808, "grad_norm": 0.78515625, "learning_rate": 3.135525467015197e-05, "loss": 0.8508, "step": 5702 }, { "epoch": 0.41962750057484477, "grad_norm": 0.91015625, "learning_rate": 3.134965316462037e-05, "loss": 0.9366, "step": 5703 }, { "epoch": 0.4197010807082088, "grad_norm": 0.76953125, "learning_rate": 3.134405131831489e-05, "loss": 0.623, "step": 5704 }, { "epoch": 0.4197746608415728, "grad_norm": 0.8203125, "learning_rate": 3.133844913153614e-05, "loss": 0.7714, "step": 5705 }, { "epoch": 0.4198482409749368, "grad_norm": 0.84765625, "learning_rate": 3.133284660458481e-05, "loss": 1.3795, "step": 5706 }, { "epoch": 0.4199218211083008, "grad_norm": 0.93359375, "learning_rate": 3.1327243737761555e-05, "loss": 0.888, "step": 5707 }, { "epoch": 0.41999540124166473, "grad_norm": 0.92578125, "learning_rate": 3.132164053136708e-05, "loss": 0.8543, "step": 5708 }, { "epoch": 0.42006898137502874, "grad_norm": 0.8671875, "learning_rate": 3.13160369857021e-05, "loss": 0.7844, "step": 5709 }, { "epoch": 0.42014256150839274, "grad_norm": 0.953125, "learning_rate": 3.131043310106735e-05, "loss": 1.0513, "step": 5710 }, { "epoch": 0.42021614164175675, "grad_norm": 0.92578125, "learning_rate": 3.130482887776356e-05, "loss": 1.0161, "step": 5711 }, { "epoch": 0.4202897217751207, "grad_norm": 0.9765625, "learning_rate": 3.129922431609152e-05, "loss": 0.8555, "step": 5712 }, { "epoch": 0.4203633019084847, "grad_norm": 0.80078125, "learning_rate": 3.1293619416352005e-05, "loss": 0.7085, "step": 5713 }, { "epoch": 0.4204368820418487, "grad_norm": 0.875, "learning_rate": 3.1288014178845824e-05, "loss": 0.7576, "step": 5714 }, { "epoch": 0.4205104621752127, "grad_norm": 0.63671875, "learning_rate": 3.128240860387381e-05, "loss": 0.6418, "step": 5715 }, { "epoch": 0.4205840423085767, "grad_norm": 0.69140625, "learning_rate": 3.127680269173678e-05, "loss": 0.6519, "step": 5716 }, { "epoch": 0.42065762244194066, "grad_norm": 1.1796875, "learning_rate": 3.1271196442735606e-05, "loss": 1.6772, "step": 5717 }, { "epoch": 0.42073120257530466, "grad_norm": 0.78125, "learning_rate": 3.126558985717117e-05, "loss": 0.9215, "step": 5718 }, { "epoch": 0.42080478270866867, "grad_norm": 0.859375, "learning_rate": 3.125998293534435e-05, "loss": 0.8935, "step": 5719 }, { "epoch": 0.42087836284203267, "grad_norm": 1.1328125, "learning_rate": 3.1254375677556076e-05, "loss": 1.3714, "step": 5720 }, { "epoch": 0.4209519429753966, "grad_norm": 0.76171875, "learning_rate": 3.1248768084107265e-05, "loss": 0.8136, "step": 5721 }, { "epoch": 0.4210255231087606, "grad_norm": 0.953125, "learning_rate": 3.1243160155298886e-05, "loss": 1.1025, "step": 5722 }, { "epoch": 0.42109910324212463, "grad_norm": 1.0859375, "learning_rate": 3.123755189143188e-05, "loss": 1.3903, "step": 5723 }, { "epoch": 0.42117268337548863, "grad_norm": 0.7890625, "learning_rate": 3.123194329280726e-05, "loss": 1.1325, "step": 5724 }, { "epoch": 0.42124626350885264, "grad_norm": 0.98828125, "learning_rate": 3.1226334359726e-05, "loss": 1.3851, "step": 5725 }, { "epoch": 0.4213198436422166, "grad_norm": 0.89453125, "learning_rate": 3.122072509248914e-05, "loss": 0.7743, "step": 5726 }, { "epoch": 0.4213934237755806, "grad_norm": 0.8828125, "learning_rate": 3.1215115491397714e-05, "loss": 1.2776, "step": 5727 }, { "epoch": 0.4214670039089446, "grad_norm": 0.83203125, "learning_rate": 3.120950555675277e-05, "loss": 1.1721, "step": 5728 }, { "epoch": 0.4215405840423086, "grad_norm": 0.8671875, "learning_rate": 3.120389528885541e-05, "loss": 1.3343, "step": 5729 }, { "epoch": 0.42161416417567255, "grad_norm": 0.8359375, "learning_rate": 3.119828468800669e-05, "loss": 0.9707, "step": 5730 }, { "epoch": 0.42168774430903655, "grad_norm": 1.109375, "learning_rate": 3.1192673754507754e-05, "loss": 1.2327, "step": 5731 }, { "epoch": 0.42176132444240055, "grad_norm": 0.93359375, "learning_rate": 3.11870624886597e-05, "loss": 0.8943, "step": 5732 }, { "epoch": 0.42183490457576456, "grad_norm": 0.93359375, "learning_rate": 3.118145089076369e-05, "loss": 1.0463, "step": 5733 }, { "epoch": 0.42190848470912856, "grad_norm": 1.0, "learning_rate": 3.117583896112088e-05, "loss": 0.8832, "step": 5734 }, { "epoch": 0.4219820648424925, "grad_norm": 1.1015625, "learning_rate": 3.117022670003246e-05, "loss": 1.1644, "step": 5735 }, { "epoch": 0.4220556449758565, "grad_norm": 0.86328125, "learning_rate": 3.116461410779964e-05, "loss": 0.916, "step": 5736 }, { "epoch": 0.4221292251092205, "grad_norm": 0.953125, "learning_rate": 3.1159001184723615e-05, "loss": 1.1364, "step": 5737 }, { "epoch": 0.4222028052425845, "grad_norm": 1.1171875, "learning_rate": 3.115338793110563e-05, "loss": 1.3442, "step": 5738 }, { "epoch": 0.42227638537594847, "grad_norm": 0.6953125, "learning_rate": 3.114777434724694e-05, "loss": 0.7471, "step": 5739 }, { "epoch": 0.4223499655093125, "grad_norm": 1.0078125, "learning_rate": 3.11421604334488e-05, "loss": 1.0077, "step": 5740 }, { "epoch": 0.4224235456426765, "grad_norm": 0.8359375, "learning_rate": 3.1136546190012524e-05, "loss": 0.7534, "step": 5741 }, { "epoch": 0.4224971257760405, "grad_norm": 0.9453125, "learning_rate": 3.11309316172394e-05, "loss": 0.9952, "step": 5742 }, { "epoch": 0.4225707059094045, "grad_norm": 1.234375, "learning_rate": 3.1125316715430755e-05, "loss": 1.0639, "step": 5743 }, { "epoch": 0.42264428604276844, "grad_norm": 0.859375, "learning_rate": 3.111970148488793e-05, "loss": 0.8631, "step": 5744 }, { "epoch": 0.42271786617613244, "grad_norm": 1.03125, "learning_rate": 3.111408592591229e-05, "loss": 1.0575, "step": 5745 }, { "epoch": 0.42279144630949644, "grad_norm": 0.76953125, "learning_rate": 3.11084700388052e-05, "loss": 0.7257, "step": 5746 }, { "epoch": 0.42286502644286045, "grad_norm": 0.73828125, "learning_rate": 3.110285382386806e-05, "loss": 0.6819, "step": 5747 }, { "epoch": 0.4229386065762244, "grad_norm": 0.78125, "learning_rate": 3.109723728140229e-05, "loss": 0.7694, "step": 5748 }, { "epoch": 0.4230121867095884, "grad_norm": 0.83984375, "learning_rate": 3.10916204117093e-05, "loss": 1.0799, "step": 5749 }, { "epoch": 0.4230857668429524, "grad_norm": 1.0703125, "learning_rate": 3.108600321509056e-05, "loss": 0.9085, "step": 5750 }, { "epoch": 0.4231593469763164, "grad_norm": 0.78515625, "learning_rate": 3.108038569184751e-05, "loss": 1.1044, "step": 5751 }, { "epoch": 0.4232329271096804, "grad_norm": 0.9921875, "learning_rate": 3.1074767842281654e-05, "loss": 1.2157, "step": 5752 }, { "epoch": 0.42330650724304436, "grad_norm": 0.7265625, "learning_rate": 3.1069149666694474e-05, "loss": 1.0037, "step": 5753 }, { "epoch": 0.42338008737640836, "grad_norm": 0.9375, "learning_rate": 3.10635311653875e-05, "loss": 0.9024, "step": 5754 }, { "epoch": 0.42345366750977237, "grad_norm": 1.03125, "learning_rate": 3.105791233866226e-05, "loss": 1.5471, "step": 5755 }, { "epoch": 0.4235272476431364, "grad_norm": 0.89453125, "learning_rate": 3.10522931868203e-05, "loss": 1.5793, "step": 5756 }, { "epoch": 0.4236008277765003, "grad_norm": 0.8359375, "learning_rate": 3.1046673710163206e-05, "loss": 0.7626, "step": 5757 }, { "epoch": 0.4236744079098643, "grad_norm": 0.91015625, "learning_rate": 3.104105390899255e-05, "loss": 0.947, "step": 5758 }, { "epoch": 0.42374798804322833, "grad_norm": 0.9140625, "learning_rate": 3.103543378360994e-05, "loss": 0.9181, "step": 5759 }, { "epoch": 0.42382156817659233, "grad_norm": 0.875, "learning_rate": 3.1029813334316994e-05, "loss": 0.784, "step": 5760 }, { "epoch": 0.42389514830995634, "grad_norm": 0.8984375, "learning_rate": 3.102419256141536e-05, "loss": 0.951, "step": 5761 }, { "epoch": 0.4239687284433203, "grad_norm": 0.81640625, "learning_rate": 3.101857146520669e-05, "loss": 0.9759, "step": 5762 }, { "epoch": 0.4240423085766843, "grad_norm": 0.84765625, "learning_rate": 3.101295004599266e-05, "loss": 1.1313, "step": 5763 }, { "epoch": 0.4241158887100483, "grad_norm": 1.5703125, "learning_rate": 3.100732830407495e-05, "loss": 1.0304, "step": 5764 }, { "epoch": 0.4241894688434123, "grad_norm": 0.9140625, "learning_rate": 3.1001706239755286e-05, "loss": 0.9478, "step": 5765 }, { "epoch": 0.42426304897677625, "grad_norm": 0.81640625, "learning_rate": 3.0996083853335374e-05, "loss": 0.8076, "step": 5766 }, { "epoch": 0.42433662911014025, "grad_norm": 0.84765625, "learning_rate": 3.099046114511696e-05, "loss": 0.5262, "step": 5767 }, { "epoch": 0.42441020924350426, "grad_norm": 1.1640625, "learning_rate": 3.098483811540183e-05, "loss": 1.2619, "step": 5768 }, { "epoch": 0.42448378937686826, "grad_norm": 0.828125, "learning_rate": 3.097921476449173e-05, "loss": 0.7225, "step": 5769 }, { "epoch": 0.42455736951023226, "grad_norm": 1.046875, "learning_rate": 3.097359109268847e-05, "loss": 1.2377, "step": 5770 }, { "epoch": 0.4246309496435962, "grad_norm": 0.953125, "learning_rate": 3.096796710029386e-05, "loss": 1.4803, "step": 5771 }, { "epoch": 0.4247045297769602, "grad_norm": 0.8203125, "learning_rate": 3.0962342787609725e-05, "loss": 1.1074, "step": 5772 }, { "epoch": 0.4247781099103242, "grad_norm": 0.81640625, "learning_rate": 3.0956718154937915e-05, "loss": 1.1368, "step": 5773 }, { "epoch": 0.4248516900436882, "grad_norm": 1.0859375, "learning_rate": 3.095109320258029e-05, "loss": 1.1014, "step": 5774 }, { "epoch": 0.4249252701770522, "grad_norm": 1.015625, "learning_rate": 3.094546793083873e-05, "loss": 1.9882, "step": 5775 }, { "epoch": 0.4249988503104162, "grad_norm": 0.82421875, "learning_rate": 3.0939842340015145e-05, "loss": 1.282, "step": 5776 }, { "epoch": 0.4250724304437802, "grad_norm": 0.74609375, "learning_rate": 3.093421643041144e-05, "loss": 0.8593, "step": 5777 }, { "epoch": 0.4251460105771442, "grad_norm": 1.0, "learning_rate": 3.092859020232954e-05, "loss": 0.9462, "step": 5778 }, { "epoch": 0.4252195907105082, "grad_norm": 0.84765625, "learning_rate": 3.092296365607141e-05, "loss": 0.8246, "step": 5779 }, { "epoch": 0.42529317084387214, "grad_norm": 0.94140625, "learning_rate": 3.091733679193899e-05, "loss": 0.9693, "step": 5780 }, { "epoch": 0.42536675097723614, "grad_norm": 0.79296875, "learning_rate": 3.09117096102343e-05, "loss": 0.7689, "step": 5781 }, { "epoch": 0.42544033111060015, "grad_norm": 0.91015625, "learning_rate": 3.090608211125931e-05, "loss": 0.9306, "step": 5782 }, { "epoch": 0.42551391124396415, "grad_norm": 0.96484375, "learning_rate": 3.090045429531605e-05, "loss": 1.2965, "step": 5783 }, { "epoch": 0.4255874913773281, "grad_norm": 0.85546875, "learning_rate": 3.089482616270656e-05, "loss": 0.8974, "step": 5784 }, { "epoch": 0.4256610715106921, "grad_norm": 0.98828125, "learning_rate": 3.088919771373287e-05, "loss": 1.1757, "step": 5785 }, { "epoch": 0.4257346516440561, "grad_norm": 1.0546875, "learning_rate": 3.0883568948697077e-05, "loss": 1.1472, "step": 5786 }, { "epoch": 0.4258082317774201, "grad_norm": 0.80859375, "learning_rate": 3.0877939867901235e-05, "loss": 0.823, "step": 5787 }, { "epoch": 0.4258818119107841, "grad_norm": 0.8515625, "learning_rate": 3.0872310471647474e-05, "loss": 1.0921, "step": 5788 }, { "epoch": 0.42595539204414806, "grad_norm": 0.80859375, "learning_rate": 3.08666807602379e-05, "loss": 0.9897, "step": 5789 }, { "epoch": 0.42602897217751207, "grad_norm": 0.76171875, "learning_rate": 3.086105073397465e-05, "loss": 0.7113, "step": 5790 }, { "epoch": 0.42610255231087607, "grad_norm": 0.68359375, "learning_rate": 3.0855420393159874e-05, "loss": 0.6338, "step": 5791 }, { "epoch": 0.4261761324442401, "grad_norm": 0.828125, "learning_rate": 3.084978973809574e-05, "loss": 1.2566, "step": 5792 }, { "epoch": 0.426249712577604, "grad_norm": 0.81640625, "learning_rate": 3.084415876908444e-05, "loss": 0.7628, "step": 5793 }, { "epoch": 0.426323292710968, "grad_norm": 0.86328125, "learning_rate": 3.083852748642818e-05, "loss": 0.7678, "step": 5794 }, { "epoch": 0.42639687284433203, "grad_norm": 0.81640625, "learning_rate": 3.083289589042918e-05, "loss": 0.9107, "step": 5795 }, { "epoch": 0.42647045297769604, "grad_norm": 0.95703125, "learning_rate": 3.082726398138968e-05, "loss": 0.8132, "step": 5796 }, { "epoch": 0.42654403311106004, "grad_norm": 0.671875, "learning_rate": 3.0821631759611925e-05, "loss": 0.616, "step": 5797 }, { "epoch": 0.426617613244424, "grad_norm": 0.80078125, "learning_rate": 3.0815999225398186e-05, "loss": 1.4227, "step": 5798 }, { "epoch": 0.426691193377788, "grad_norm": 1.0546875, "learning_rate": 3.081036637905075e-05, "loss": 1.1151, "step": 5799 }, { "epoch": 0.426764773511152, "grad_norm": 0.79296875, "learning_rate": 3.080473322087193e-05, "loss": 1.221, "step": 5800 }, { "epoch": 0.426838353644516, "grad_norm": 0.75, "learning_rate": 3.0799099751164036e-05, "loss": 0.8428, "step": 5801 }, { "epoch": 0.42691193377787995, "grad_norm": 0.77734375, "learning_rate": 3.079346597022942e-05, "loss": 0.8761, "step": 5802 }, { "epoch": 0.42698551391124395, "grad_norm": 0.88671875, "learning_rate": 3.078783187837042e-05, "loss": 1.2068, "step": 5803 }, { "epoch": 0.42705909404460796, "grad_norm": 0.84375, "learning_rate": 3.078219747588942e-05, "loss": 0.7164, "step": 5804 }, { "epoch": 0.42713267417797196, "grad_norm": 0.79296875, "learning_rate": 3.077656276308879e-05, "loss": 0.8161, "step": 5805 }, { "epoch": 0.42720625431133596, "grad_norm": 0.9140625, "learning_rate": 3.077092774027096e-05, "loss": 1.4555, "step": 5806 }, { "epoch": 0.4272798344446999, "grad_norm": 1.03125, "learning_rate": 3.076529240773833e-05, "loss": 0.9777, "step": 5807 }, { "epoch": 0.4273534145780639, "grad_norm": 0.87109375, "learning_rate": 3.0759656765793356e-05, "loss": 1.1901, "step": 5808 }, { "epoch": 0.4274269947114279, "grad_norm": 0.9765625, "learning_rate": 3.075402081473848e-05, "loss": 1.0205, "step": 5809 }, { "epoch": 0.4275005748447919, "grad_norm": 0.82421875, "learning_rate": 3.074838455487616e-05, "loss": 1.1187, "step": 5810 }, { "epoch": 0.4275741549781559, "grad_norm": 0.7890625, "learning_rate": 3.074274798650892e-05, "loss": 0.909, "step": 5811 }, { "epoch": 0.4276477351115199, "grad_norm": 0.80859375, "learning_rate": 3.073711110993923e-05, "loss": 0.9265, "step": 5812 }, { "epoch": 0.4277213152448839, "grad_norm": 0.7265625, "learning_rate": 3.0731473925469616e-05, "loss": 0.868, "step": 5813 }, { "epoch": 0.4277948953782479, "grad_norm": 0.96875, "learning_rate": 3.072583643340263e-05, "loss": 1.1088, "step": 5814 }, { "epoch": 0.4278684755116119, "grad_norm": 1.1484375, "learning_rate": 3.072019863404082e-05, "loss": 1.3737, "step": 5815 }, { "epoch": 0.42794205564497584, "grad_norm": 0.984375, "learning_rate": 3.071456052768675e-05, "loss": 1.4669, "step": 5816 }, { "epoch": 0.42801563577833984, "grad_norm": 1.203125, "learning_rate": 3.0708922114643e-05, "loss": 1.3207, "step": 5817 }, { "epoch": 0.42808921591170385, "grad_norm": 1.3046875, "learning_rate": 3.07032833952122e-05, "loss": 1.0223, "step": 5818 }, { "epoch": 0.42816279604506785, "grad_norm": 0.8203125, "learning_rate": 3.0697644369696945e-05, "loss": 0.7622, "step": 5819 }, { "epoch": 0.4282363761784318, "grad_norm": 0.8828125, "learning_rate": 3.069200503839988e-05, "loss": 0.7641, "step": 5820 }, { "epoch": 0.4283099563117958, "grad_norm": 0.80078125, "learning_rate": 3.068636540162364e-05, "loss": 0.8504, "step": 5821 }, { "epoch": 0.4283835364451598, "grad_norm": 1.078125, "learning_rate": 3.068072545967093e-05, "loss": 1.165, "step": 5822 }, { "epoch": 0.4284571165785238, "grad_norm": 0.98828125, "learning_rate": 3.0675085212844403e-05, "loss": 1.3198, "step": 5823 }, { "epoch": 0.4285306967118878, "grad_norm": 1.015625, "learning_rate": 3.066944466144677e-05, "loss": 0.7301, "step": 5824 }, { "epoch": 0.42860427684525176, "grad_norm": 0.81640625, "learning_rate": 3.066380380578075e-05, "loss": 0.8298, "step": 5825 }, { "epoch": 0.42867785697861577, "grad_norm": 0.890625, "learning_rate": 3.065816264614909e-05, "loss": 0.9894, "step": 5826 }, { "epoch": 0.42875143711197977, "grad_norm": 0.890625, "learning_rate": 3.065252118285451e-05, "loss": 0.8816, "step": 5827 }, { "epoch": 0.4288250172453438, "grad_norm": 0.80859375, "learning_rate": 3.0646879416199805e-05, "loss": 1.0246, "step": 5828 }, { "epoch": 0.4288985973787077, "grad_norm": 0.8359375, "learning_rate": 3.0641237346487747e-05, "loss": 0.8419, "step": 5829 }, { "epoch": 0.42897217751207173, "grad_norm": 0.9375, "learning_rate": 3.063559497402113e-05, "loss": 0.8907, "step": 5830 }, { "epoch": 0.42904575764543573, "grad_norm": 1.0703125, "learning_rate": 3.062995229910278e-05, "loss": 0.8183, "step": 5831 }, { "epoch": 0.42911933777879974, "grad_norm": 0.796875, "learning_rate": 3.062430932203552e-05, "loss": 0.9873, "step": 5832 }, { "epoch": 0.42919291791216374, "grad_norm": 0.89453125, "learning_rate": 3.06186660431222e-05, "loss": 1.1113, "step": 5833 }, { "epoch": 0.4292664980455277, "grad_norm": 0.90625, "learning_rate": 3.061302246266569e-05, "loss": 1.0394, "step": 5834 }, { "epoch": 0.4293400781788917, "grad_norm": 0.76171875, "learning_rate": 3.060737858096886e-05, "loss": 0.9237, "step": 5835 }, { "epoch": 0.4294136583122557, "grad_norm": 0.859375, "learning_rate": 3.0601734398334626e-05, "loss": 0.8831, "step": 5836 }, { "epoch": 0.4294872384456197, "grad_norm": 0.6953125, "learning_rate": 3.0596089915065875e-05, "loss": 0.5404, "step": 5837 }, { "epoch": 0.42956081857898365, "grad_norm": 0.75, "learning_rate": 3.059044513146555e-05, "loss": 0.7114, "step": 5838 }, { "epoch": 0.42963439871234765, "grad_norm": 0.7421875, "learning_rate": 3.058480004783659e-05, "loss": 0.7721, "step": 5839 }, { "epoch": 0.42970797884571166, "grad_norm": 0.81640625, "learning_rate": 3.0579154664481966e-05, "loss": 0.8697, "step": 5840 }, { "epoch": 0.42978155897907566, "grad_norm": 0.8125, "learning_rate": 3.057350898170464e-05, "loss": 0.8042, "step": 5841 }, { "epoch": 0.42985513911243967, "grad_norm": 0.82421875, "learning_rate": 3.056786299980763e-05, "loss": 1.1312, "step": 5842 }, { "epoch": 0.4299287192458036, "grad_norm": 1.0390625, "learning_rate": 3.0562216719093913e-05, "loss": 1.0338, "step": 5843 }, { "epoch": 0.4300022993791676, "grad_norm": 0.77734375, "learning_rate": 3.055657013986654e-05, "loss": 1.1038, "step": 5844 }, { "epoch": 0.4300758795125316, "grad_norm": 0.92578125, "learning_rate": 3.055092326242854e-05, "loss": 0.9665, "step": 5845 }, { "epoch": 0.4301494596458956, "grad_norm": 1.7578125, "learning_rate": 3.054527608708298e-05, "loss": 1.4164, "step": 5846 }, { "epoch": 0.4302230397792596, "grad_norm": 1.09375, "learning_rate": 3.053962861413291e-05, "loss": 1.3889, "step": 5847 }, { "epoch": 0.4302966199126236, "grad_norm": 0.87890625, "learning_rate": 3.053398084388144e-05, "loss": 1.0581, "step": 5848 }, { "epoch": 0.4303702000459876, "grad_norm": 0.82421875, "learning_rate": 3.052833277663167e-05, "loss": 0.7859, "step": 5849 }, { "epoch": 0.4304437801793516, "grad_norm": 0.8125, "learning_rate": 3.052268441268673e-05, "loss": 0.7511, "step": 5850 }, { "epoch": 0.4305173603127156, "grad_norm": 1.1796875, "learning_rate": 3.0517035752349743e-05, "loss": 1.367, "step": 5851 }, { "epoch": 0.43059094044607954, "grad_norm": 1.09375, "learning_rate": 3.051138679592387e-05, "loss": 0.9445, "step": 5852 }, { "epoch": 0.43066452057944354, "grad_norm": 1.0859375, "learning_rate": 3.0505737543712275e-05, "loss": 1.543, "step": 5853 }, { "epoch": 0.43073810071280755, "grad_norm": 0.6796875, "learning_rate": 3.050008799601814e-05, "loss": 0.5615, "step": 5854 }, { "epoch": 0.43081168084617155, "grad_norm": 1.0, "learning_rate": 3.0494438153144676e-05, "loss": 0.6474, "step": 5855 }, { "epoch": 0.4308852609795355, "grad_norm": 1.03125, "learning_rate": 3.0488788015395087e-05, "loss": 0.8403, "step": 5856 }, { "epoch": 0.4309588411128995, "grad_norm": 0.828125, "learning_rate": 3.0483137583072623e-05, "loss": 0.8642, "step": 5857 }, { "epoch": 0.4310324212462635, "grad_norm": 0.91015625, "learning_rate": 3.0477486856480515e-05, "loss": 0.9637, "step": 5858 }, { "epoch": 0.4311060013796275, "grad_norm": 0.734375, "learning_rate": 3.0471835835922034e-05, "loss": 0.6809, "step": 5859 }, { "epoch": 0.4311795815129915, "grad_norm": 0.9296875, "learning_rate": 3.046618452170046e-05, "loss": 0.9921, "step": 5860 }, { "epoch": 0.43125316164635547, "grad_norm": 0.9453125, "learning_rate": 3.0460532914119083e-05, "loss": 1.0586, "step": 5861 }, { "epoch": 0.43132674177971947, "grad_norm": 1.078125, "learning_rate": 3.0454881013481217e-05, "loss": 1.4062, "step": 5862 }, { "epoch": 0.4314003219130835, "grad_norm": 1.015625, "learning_rate": 3.0449228820090198e-05, "loss": 1.2889, "step": 5863 }, { "epoch": 0.4314739020464475, "grad_norm": 0.73828125, "learning_rate": 3.0443576334249357e-05, "loss": 1.1536, "step": 5864 }, { "epoch": 0.4315474821798114, "grad_norm": 0.6484375, "learning_rate": 3.043792355626205e-05, "loss": 0.5844, "step": 5865 }, { "epoch": 0.43162106231317543, "grad_norm": 0.98828125, "learning_rate": 3.0432270486431663e-05, "loss": 0.7336, "step": 5866 }, { "epoch": 0.43169464244653943, "grad_norm": 1.2421875, "learning_rate": 3.042661712506158e-05, "loss": 1.3385, "step": 5867 }, { "epoch": 0.43176822257990344, "grad_norm": 0.90625, "learning_rate": 3.04209634724552e-05, "loss": 0.7551, "step": 5868 }, { "epoch": 0.43184180271326744, "grad_norm": 0.8828125, "learning_rate": 3.0415309528915946e-05, "loss": 0.8322, "step": 5869 }, { "epoch": 0.4319153828466314, "grad_norm": 0.875, "learning_rate": 3.040965529474727e-05, "loss": 0.7568, "step": 5870 }, { "epoch": 0.4319889629799954, "grad_norm": 0.93359375, "learning_rate": 3.04040007702526e-05, "loss": 0.9458, "step": 5871 }, { "epoch": 0.4320625431133594, "grad_norm": 0.84375, "learning_rate": 3.039834595573542e-05, "loss": 0.9895, "step": 5872 }, { "epoch": 0.4321361232467234, "grad_norm": 0.953125, "learning_rate": 3.039269085149921e-05, "loss": 1.1591, "step": 5873 }, { "epoch": 0.43220970338008735, "grad_norm": 0.73828125, "learning_rate": 3.038703545784747e-05, "loss": 0.8226, "step": 5874 }, { "epoch": 0.43228328351345136, "grad_norm": 0.765625, "learning_rate": 3.03813797750837e-05, "loss": 0.9298, "step": 5875 }, { "epoch": 0.43235686364681536, "grad_norm": 0.78125, "learning_rate": 3.0375723803511447e-05, "loss": 0.5958, "step": 5876 }, { "epoch": 0.43243044378017936, "grad_norm": 0.765625, "learning_rate": 3.0370067543434254e-05, "loss": 0.8827, "step": 5877 }, { "epoch": 0.43250402391354337, "grad_norm": 1.03125, "learning_rate": 3.036441099515568e-05, "loss": 1.133, "step": 5878 }, { "epoch": 0.4325776040469073, "grad_norm": 0.859375, "learning_rate": 3.0358754158979298e-05, "loss": 0.796, "step": 5879 }, { "epoch": 0.4326511841802713, "grad_norm": 0.8359375, "learning_rate": 3.03530970352087e-05, "loss": 0.7298, "step": 5880 }, { "epoch": 0.4327247643136353, "grad_norm": 0.76171875, "learning_rate": 3.0347439624147493e-05, "loss": 0.6449, "step": 5881 }, { "epoch": 0.43279834444699933, "grad_norm": 0.8125, "learning_rate": 3.0341781926099305e-05, "loss": 0.9, "step": 5882 }, { "epoch": 0.4328719245803633, "grad_norm": 0.8828125, "learning_rate": 3.0336123941367768e-05, "loss": 1.0585, "step": 5883 }, { "epoch": 0.4329455047137273, "grad_norm": 0.78125, "learning_rate": 3.0330465670256542e-05, "loss": 0.7246, "step": 5884 }, { "epoch": 0.4330190848470913, "grad_norm": 0.87109375, "learning_rate": 3.032480711306928e-05, "loss": 1.0527, "step": 5885 }, { "epoch": 0.4330926649804553, "grad_norm": 1.015625, "learning_rate": 3.0319148270109693e-05, "loss": 1.1721, "step": 5886 }, { "epoch": 0.4331662451138193, "grad_norm": 1.1953125, "learning_rate": 3.0313489141681456e-05, "loss": 1.5106, "step": 5887 }, { "epoch": 0.43323982524718324, "grad_norm": 0.734375, "learning_rate": 3.030782972808829e-05, "loss": 0.7848, "step": 5888 }, { "epoch": 0.43331340538054725, "grad_norm": 0.90234375, "learning_rate": 3.030217002963393e-05, "loss": 1.0025, "step": 5889 }, { "epoch": 0.43338698551391125, "grad_norm": 0.6875, "learning_rate": 3.0296510046622118e-05, "loss": 0.6883, "step": 5890 }, { "epoch": 0.43346056564727525, "grad_norm": 0.8984375, "learning_rate": 3.0290849779356623e-05, "loss": 1.1494, "step": 5891 }, { "epoch": 0.4335341457806392, "grad_norm": 0.73828125, "learning_rate": 3.0285189228141202e-05, "loss": 0.8172, "step": 5892 }, { "epoch": 0.4336077259140032, "grad_norm": 0.734375, "learning_rate": 3.0279528393279664e-05, "loss": 0.7093, "step": 5893 }, { "epoch": 0.4336813060473672, "grad_norm": 0.6171875, "learning_rate": 3.0273867275075808e-05, "loss": 0.5568, "step": 5894 }, { "epoch": 0.4337548861807312, "grad_norm": 0.984375, "learning_rate": 3.0268205873833456e-05, "loss": 0.8622, "step": 5895 }, { "epoch": 0.4338284663140952, "grad_norm": 0.8515625, "learning_rate": 3.0262544189856446e-05, "loss": 1.0844, "step": 5896 }, { "epoch": 0.43390204644745917, "grad_norm": 1.046875, "learning_rate": 3.0256882223448625e-05, "loss": 1.2497, "step": 5897 }, { "epoch": 0.43397562658082317, "grad_norm": 0.796875, "learning_rate": 3.025121997491387e-05, "loss": 0.7019, "step": 5898 }, { "epoch": 0.4340492067141872, "grad_norm": 0.85546875, "learning_rate": 3.024555744455605e-05, "loss": 1.0185, "step": 5899 }, { "epoch": 0.4341227868475512, "grad_norm": 0.7890625, "learning_rate": 3.0239894632679078e-05, "loss": 0.9628, "step": 5900 }, { "epoch": 0.4341963669809151, "grad_norm": 0.80859375, "learning_rate": 3.023423153958685e-05, "loss": 0.612, "step": 5901 }, { "epoch": 0.43426994711427913, "grad_norm": 0.81640625, "learning_rate": 3.0228568165583303e-05, "loss": 0.999, "step": 5902 }, { "epoch": 0.43434352724764314, "grad_norm": 1.0078125, "learning_rate": 3.0222904510972377e-05, "loss": 1.065, "step": 5903 }, { "epoch": 0.43441710738100714, "grad_norm": 0.796875, "learning_rate": 3.0217240576058033e-05, "loss": 0.9642, "step": 5904 }, { "epoch": 0.43449068751437114, "grad_norm": 1.0078125, "learning_rate": 3.0211576361144245e-05, "loss": 1.1554, "step": 5905 }, { "epoch": 0.4345642676477351, "grad_norm": 0.9296875, "learning_rate": 3.020591186653499e-05, "loss": 1.2682, "step": 5906 }, { "epoch": 0.4346378477810991, "grad_norm": 0.921875, "learning_rate": 3.0200247092534285e-05, "loss": 0.7066, "step": 5907 }, { "epoch": 0.4347114279144631, "grad_norm": 0.671875, "learning_rate": 3.0194582039446136e-05, "loss": 0.7009, "step": 5908 }, { "epoch": 0.4347850080478271, "grad_norm": 0.84765625, "learning_rate": 3.018891670757458e-05, "loss": 0.9667, "step": 5909 }, { "epoch": 0.43485858818119105, "grad_norm": 0.7890625, "learning_rate": 3.0183251097223664e-05, "loss": 0.8592, "step": 5910 }, { "epoch": 0.43493216831455506, "grad_norm": 0.828125, "learning_rate": 3.0177585208697456e-05, "loss": 0.9382, "step": 5911 }, { "epoch": 0.43500574844791906, "grad_norm": 0.796875, "learning_rate": 3.0171919042300023e-05, "loss": 1.0079, "step": 5912 }, { "epoch": 0.43507932858128306, "grad_norm": 0.7265625, "learning_rate": 3.0166252598335466e-05, "loss": 0.8211, "step": 5913 }, { "epoch": 0.43515290871464707, "grad_norm": 0.9296875, "learning_rate": 3.016058587710789e-05, "loss": 1.1583, "step": 5914 }, { "epoch": 0.435226488848011, "grad_norm": 0.984375, "learning_rate": 3.015491887892141e-05, "loss": 1.0118, "step": 5915 }, { "epoch": 0.435300068981375, "grad_norm": 0.7890625, "learning_rate": 3.014925160408018e-05, "loss": 1.1678, "step": 5916 }, { "epoch": 0.435373649114739, "grad_norm": 0.98046875, "learning_rate": 3.0143584052888334e-05, "loss": 0.9329, "step": 5917 }, { "epoch": 0.43544722924810303, "grad_norm": 0.73046875, "learning_rate": 3.0137916225650054e-05, "loss": 0.5739, "step": 5918 }, { "epoch": 0.435520809381467, "grad_norm": 0.79296875, "learning_rate": 3.0132248122669514e-05, "loss": 0.8449, "step": 5919 }, { "epoch": 0.435594389514831, "grad_norm": 1.0, "learning_rate": 3.012657974425091e-05, "loss": 0.9105, "step": 5920 }, { "epoch": 0.435667969648195, "grad_norm": 0.875, "learning_rate": 3.012091109069845e-05, "loss": 0.821, "step": 5921 }, { "epoch": 0.435741549781559, "grad_norm": 1.2890625, "learning_rate": 3.0115242162316365e-05, "loss": 1.7447, "step": 5922 }, { "epoch": 0.435815129914923, "grad_norm": 0.92578125, "learning_rate": 3.0109572959408894e-05, "loss": 1.1237, "step": 5923 }, { "epoch": 0.43588871004828694, "grad_norm": 0.8203125, "learning_rate": 3.0103903482280292e-05, "loss": 1.156, "step": 5924 }, { "epoch": 0.43596229018165095, "grad_norm": 0.875, "learning_rate": 3.009823373123484e-05, "loss": 0.682, "step": 5925 }, { "epoch": 0.43603587031501495, "grad_norm": 0.85546875, "learning_rate": 3.0092563706576798e-05, "loss": 0.8481, "step": 5926 }, { "epoch": 0.43610945044837895, "grad_norm": 0.828125, "learning_rate": 3.0086893408610495e-05, "loss": 0.7081, "step": 5927 }, { "epoch": 0.4361830305817429, "grad_norm": 0.890625, "learning_rate": 3.0081222837640215e-05, "loss": 1.0804, "step": 5928 }, { "epoch": 0.4362566107151069, "grad_norm": 0.890625, "learning_rate": 3.0075551993970314e-05, "loss": 0.9245, "step": 5929 }, { "epoch": 0.4363301908484709, "grad_norm": 0.75, "learning_rate": 3.0069880877905116e-05, "loss": 0.5928, "step": 5930 }, { "epoch": 0.4364037709818349, "grad_norm": 1.0859375, "learning_rate": 3.006420948974899e-05, "loss": 0.9989, "step": 5931 }, { "epoch": 0.4364773511151989, "grad_norm": 1.1640625, "learning_rate": 3.005853782980631e-05, "loss": 0.9612, "step": 5932 }, { "epoch": 0.43655093124856287, "grad_norm": 0.87890625, "learning_rate": 3.0052865898381456e-05, "loss": 1.058, "step": 5933 }, { "epoch": 0.43662451138192687, "grad_norm": 0.9296875, "learning_rate": 3.0047193695778836e-05, "loss": 1.1295, "step": 5934 }, { "epoch": 0.4366980915152909, "grad_norm": 0.86328125, "learning_rate": 3.0041521222302853e-05, "loss": 0.966, "step": 5935 }, { "epoch": 0.4367716716486549, "grad_norm": 0.83203125, "learning_rate": 3.0035848478257956e-05, "loss": 0.882, "step": 5936 }, { "epoch": 0.43684525178201883, "grad_norm": 1.0390625, "learning_rate": 3.003017546394858e-05, "loss": 0.7737, "step": 5937 }, { "epoch": 0.43691883191538283, "grad_norm": 1.0390625, "learning_rate": 3.0024502179679187e-05, "loss": 1.3886, "step": 5938 }, { "epoch": 0.43699241204874684, "grad_norm": 0.93359375, "learning_rate": 3.0018828625754258e-05, "loss": 1.2207, "step": 5939 }, { "epoch": 0.43706599218211084, "grad_norm": 0.875, "learning_rate": 3.0013154802478266e-05, "loss": 0.8199, "step": 5940 }, { "epoch": 0.43713957231547484, "grad_norm": 0.78515625, "learning_rate": 3.0007480710155732e-05, "loss": 0.7809, "step": 5941 }, { "epoch": 0.4372131524488388, "grad_norm": 0.84765625, "learning_rate": 3.0001806349091166e-05, "loss": 0.9857, "step": 5942 }, { "epoch": 0.4372867325822028, "grad_norm": 1.2109375, "learning_rate": 2.9996131719589092e-05, "loss": 1.1154, "step": 5943 }, { "epoch": 0.4373603127155668, "grad_norm": 1.1953125, "learning_rate": 2.999045682195407e-05, "loss": 1.4869, "step": 5944 }, { "epoch": 0.4374338928489308, "grad_norm": 0.8125, "learning_rate": 2.9984781656490656e-05, "loss": 0.6608, "step": 5945 }, { "epoch": 0.43750747298229475, "grad_norm": 0.76953125, "learning_rate": 2.9979106223503435e-05, "loss": 0.9823, "step": 5946 }, { "epoch": 0.43758105311565876, "grad_norm": 1.25, "learning_rate": 2.9973430523296974e-05, "loss": 1.1349, "step": 5947 }, { "epoch": 0.43765463324902276, "grad_norm": 1.03125, "learning_rate": 2.9967754556175896e-05, "loss": 1.3051, "step": 5948 }, { "epoch": 0.43772821338238677, "grad_norm": 1.0, "learning_rate": 2.9962078322444815e-05, "loss": 1.2846, "step": 5949 }, { "epoch": 0.43780179351575077, "grad_norm": 0.80078125, "learning_rate": 2.9956401822408357e-05, "loss": 0.8564, "step": 5950 }, { "epoch": 0.4378753736491147, "grad_norm": 1.015625, "learning_rate": 2.9950725056371177e-05, "loss": 1.1169, "step": 5951 }, { "epoch": 0.4379489537824787, "grad_norm": 0.9609375, "learning_rate": 2.9945048024637935e-05, "loss": 0.9506, "step": 5952 }, { "epoch": 0.4380225339158427, "grad_norm": 0.74609375, "learning_rate": 2.9939370727513306e-05, "loss": 0.671, "step": 5953 }, { "epoch": 0.43809611404920673, "grad_norm": 0.9140625, "learning_rate": 2.9933693165301975e-05, "loss": 1.1273, "step": 5954 }, { "epoch": 0.4381696941825707, "grad_norm": 0.87109375, "learning_rate": 2.9928015338308655e-05, "loss": 0.9897, "step": 5955 }, { "epoch": 0.4382432743159347, "grad_norm": 0.75390625, "learning_rate": 2.992233724683805e-05, "loss": 0.7975, "step": 5956 }, { "epoch": 0.4383168544492987, "grad_norm": 0.9140625, "learning_rate": 2.9916658891194917e-05, "loss": 0.6962, "step": 5957 }, { "epoch": 0.4383904345826627, "grad_norm": 0.8671875, "learning_rate": 2.9910980271683975e-05, "loss": 0.7968, "step": 5958 }, { "epoch": 0.4384640147160267, "grad_norm": 0.8203125, "learning_rate": 2.9905301388610008e-05, "loss": 1.1579, "step": 5959 }, { "epoch": 0.43853759484939064, "grad_norm": 0.80859375, "learning_rate": 2.9899622242277773e-05, "loss": 0.7132, "step": 5960 }, { "epoch": 0.43861117498275465, "grad_norm": 0.78125, "learning_rate": 2.989394283299207e-05, "loss": 0.9599, "step": 5961 }, { "epoch": 0.43868475511611865, "grad_norm": 0.8203125, "learning_rate": 2.9888263161057696e-05, "loss": 0.7488, "step": 5962 }, { "epoch": 0.43875833524948266, "grad_norm": 0.66796875, "learning_rate": 2.9882583226779466e-05, "loss": 0.7828, "step": 5963 }, { "epoch": 0.4388319153828466, "grad_norm": 0.86328125, "learning_rate": 2.9876903030462222e-05, "loss": 0.9261, "step": 5964 }, { "epoch": 0.4389054955162106, "grad_norm": 0.765625, "learning_rate": 2.9871222572410805e-05, "loss": 0.8842, "step": 5965 }, { "epoch": 0.4389790756495746, "grad_norm": 1.1015625, "learning_rate": 2.9865541852930075e-05, "loss": 1.0741, "step": 5966 }, { "epoch": 0.4390526557829386, "grad_norm": 1.015625, "learning_rate": 2.9859860872324896e-05, "loss": 0.8826, "step": 5967 }, { "epoch": 0.4391262359163026, "grad_norm": 0.734375, "learning_rate": 2.985417963090017e-05, "loss": 0.7303, "step": 5968 }, { "epoch": 0.43919981604966657, "grad_norm": 0.83984375, "learning_rate": 2.9848498128960785e-05, "loss": 0.7794, "step": 5969 }, { "epoch": 0.4392733961830306, "grad_norm": 1.2109375, "learning_rate": 2.984281636681166e-05, "loss": 1.0307, "step": 5970 }, { "epoch": 0.4393469763163946, "grad_norm": 1.140625, "learning_rate": 2.983713434475774e-05, "loss": 1.1013, "step": 5971 }, { "epoch": 0.4394205564497586, "grad_norm": 0.90625, "learning_rate": 2.9831452063103944e-05, "loss": 1.058, "step": 5972 }, { "epoch": 0.43949413658312253, "grad_norm": 0.80859375, "learning_rate": 2.982576952215525e-05, "loss": 0.942, "step": 5973 }, { "epoch": 0.43956771671648653, "grad_norm": 0.9296875, "learning_rate": 2.9820086722216616e-05, "loss": 0.873, "step": 5974 }, { "epoch": 0.43964129684985054, "grad_norm": 0.83203125, "learning_rate": 2.9814403663593036e-05, "loss": 0.8835, "step": 5975 }, { "epoch": 0.43971487698321454, "grad_norm": 0.74609375, "learning_rate": 2.9808720346589493e-05, "loss": 0.5536, "step": 5976 }, { "epoch": 0.43978845711657855, "grad_norm": 0.90625, "learning_rate": 2.9803036771511016e-05, "loss": 0.9971, "step": 5977 }, { "epoch": 0.4398620372499425, "grad_norm": 1.359375, "learning_rate": 2.9797352938662633e-05, "loss": 0.7169, "step": 5978 }, { "epoch": 0.4399356173833065, "grad_norm": 0.72265625, "learning_rate": 2.9791668848349376e-05, "loss": 0.7012, "step": 5979 }, { "epoch": 0.4400091975166705, "grad_norm": 0.7578125, "learning_rate": 2.9785984500876303e-05, "loss": 0.7088, "step": 5980 }, { "epoch": 0.4400827776500345, "grad_norm": 0.87890625, "learning_rate": 2.978029989654848e-05, "loss": 0.962, "step": 5981 }, { "epoch": 0.44015635778339846, "grad_norm": 1.0234375, "learning_rate": 2.9774615035670996e-05, "loss": 1.1626, "step": 5982 }, { "epoch": 0.44022993791676246, "grad_norm": 0.7421875, "learning_rate": 2.9768929918548926e-05, "loss": 0.7757, "step": 5983 }, { "epoch": 0.44030351805012646, "grad_norm": 0.6875, "learning_rate": 2.9763244545487406e-05, "loss": 0.5121, "step": 5984 }, { "epoch": 0.44037709818349047, "grad_norm": 1.078125, "learning_rate": 2.9757558916791545e-05, "loss": 0.9457, "step": 5985 }, { "epoch": 0.44045067831685447, "grad_norm": 0.69140625, "learning_rate": 2.9751873032766482e-05, "loss": 0.961, "step": 5986 }, { "epoch": 0.4405242584502184, "grad_norm": 0.86328125, "learning_rate": 2.974618689371737e-05, "loss": 0.9199, "step": 5987 }, { "epoch": 0.4405978385835824, "grad_norm": 0.83203125, "learning_rate": 2.974050049994937e-05, "loss": 0.6986, "step": 5988 }, { "epoch": 0.44067141871694643, "grad_norm": 0.74609375, "learning_rate": 2.9734813851767662e-05, "loss": 0.6249, "step": 5989 }, { "epoch": 0.44074499885031043, "grad_norm": 0.9609375, "learning_rate": 2.9729126949477436e-05, "loss": 1.158, "step": 5990 }, { "epoch": 0.4408185789836744, "grad_norm": 0.79296875, "learning_rate": 2.9723439793383907e-05, "loss": 0.6793, "step": 5991 }, { "epoch": 0.4408921591170384, "grad_norm": 0.9140625, "learning_rate": 2.9717752383792276e-05, "loss": 0.9961, "step": 5992 }, { "epoch": 0.4409657392504024, "grad_norm": 0.92578125, "learning_rate": 2.971206472100779e-05, "loss": 0.7426, "step": 5993 }, { "epoch": 0.4410393193837664, "grad_norm": 1.03125, "learning_rate": 2.970637680533569e-05, "loss": 0.8417, "step": 5994 }, { "epoch": 0.4411128995171304, "grad_norm": 1.21875, "learning_rate": 2.970068863708123e-05, "loss": 1.532, "step": 5995 }, { "epoch": 0.44118647965049435, "grad_norm": 0.81640625, "learning_rate": 2.9695000216549695e-05, "loss": 1.146, "step": 5996 }, { "epoch": 0.44126005978385835, "grad_norm": 0.63671875, "learning_rate": 2.9689311544046362e-05, "loss": 0.5832, "step": 5997 }, { "epoch": 0.44133363991722235, "grad_norm": 1.015625, "learning_rate": 2.9683622619876544e-05, "loss": 0.8079, "step": 5998 }, { "epoch": 0.44140722005058636, "grad_norm": 0.734375, "learning_rate": 2.9677933444345535e-05, "loss": 0.5841, "step": 5999 }, { "epoch": 0.4414808001839503, "grad_norm": 0.89453125, "learning_rate": 2.9672244017758683e-05, "loss": 0.9227, "step": 6000 }, { "epoch": 0.4415543803173143, "grad_norm": 0.921875, "learning_rate": 2.9666554340421314e-05, "loss": 0.7703, "step": 6001 }, { "epoch": 0.4416279604506783, "grad_norm": 0.73046875, "learning_rate": 2.966086441263879e-05, "loss": 0.6359, "step": 6002 }, { "epoch": 0.4417015405840423, "grad_norm": 0.8203125, "learning_rate": 2.965517423471647e-05, "loss": 0.5752, "step": 6003 }, { "epoch": 0.4417751207174063, "grad_norm": 0.7421875, "learning_rate": 2.9649483806959742e-05, "loss": 0.8808, "step": 6004 }, { "epoch": 0.44184870085077027, "grad_norm": 0.80078125, "learning_rate": 2.964379312967401e-05, "loss": 0.8262, "step": 6005 }, { "epoch": 0.4419222809841343, "grad_norm": 0.80859375, "learning_rate": 2.9638102203164663e-05, "loss": 1.0234, "step": 6006 }, { "epoch": 0.4419958611174983, "grad_norm": 1.2734375, "learning_rate": 2.9632411027737135e-05, "loss": 1.6336, "step": 6007 }, { "epoch": 0.4420694412508623, "grad_norm": 0.9296875, "learning_rate": 2.9626719603696852e-05, "loss": 1.2063, "step": 6008 }, { "epoch": 0.44214302138422623, "grad_norm": 0.72265625, "learning_rate": 2.9621027931349267e-05, "loss": 1.0634, "step": 6009 }, { "epoch": 0.44221660151759024, "grad_norm": 0.79296875, "learning_rate": 2.9615336010999844e-05, "loss": 1.0263, "step": 6010 }, { "epoch": 0.44229018165095424, "grad_norm": 0.8359375, "learning_rate": 2.960964384295405e-05, "loss": 1.041, "step": 6011 }, { "epoch": 0.44236376178431824, "grad_norm": 0.9453125, "learning_rate": 2.9603951427517385e-05, "loss": 0.983, "step": 6012 }, { "epoch": 0.44243734191768225, "grad_norm": 1.0390625, "learning_rate": 2.9598258764995336e-05, "loss": 0.6677, "step": 6013 }, { "epoch": 0.4425109220510462, "grad_norm": 0.96484375, "learning_rate": 2.959256585569343e-05, "loss": 1.0042, "step": 6014 }, { "epoch": 0.4425845021844102, "grad_norm": 0.9765625, "learning_rate": 2.9586872699917178e-05, "loss": 0.783, "step": 6015 }, { "epoch": 0.4426580823177742, "grad_norm": 0.88671875, "learning_rate": 2.958117929797214e-05, "loss": 0.7919, "step": 6016 }, { "epoch": 0.4427316624511382, "grad_norm": 0.68359375, "learning_rate": 2.957548565016386e-05, "loss": 1.0166, "step": 6017 }, { "epoch": 0.44280524258450216, "grad_norm": 0.9453125, "learning_rate": 2.9569791756797905e-05, "loss": 0.9252, "step": 6018 }, { "epoch": 0.44287882271786616, "grad_norm": 1.03125, "learning_rate": 2.956409761817986e-05, "loss": 1.3885, "step": 6019 }, { "epoch": 0.44295240285123016, "grad_norm": 1.109375, "learning_rate": 2.955840323461532e-05, "loss": 1.5966, "step": 6020 }, { "epoch": 0.44302598298459417, "grad_norm": 0.9453125, "learning_rate": 2.9552708606409885e-05, "loss": 1.6081, "step": 6021 }, { "epoch": 0.4430995631179582, "grad_norm": 0.88671875, "learning_rate": 2.9547013733869172e-05, "loss": 0.6657, "step": 6022 }, { "epoch": 0.4431731432513221, "grad_norm": 1.171875, "learning_rate": 2.9541318617298825e-05, "loss": 0.9565, "step": 6023 }, { "epoch": 0.4432467233846861, "grad_norm": 0.8984375, "learning_rate": 2.953562325700448e-05, "loss": 1.0535, "step": 6024 }, { "epoch": 0.44332030351805013, "grad_norm": 0.8984375, "learning_rate": 2.9529927653291806e-05, "loss": 0.9568, "step": 6025 }, { "epoch": 0.44339388365141413, "grad_norm": 1.1640625, "learning_rate": 2.9524231806466475e-05, "loss": 1.438, "step": 6026 }, { "epoch": 0.44346746378477814, "grad_norm": 1.0078125, "learning_rate": 2.9518535716834162e-05, "loss": 0.9918, "step": 6027 }, { "epoch": 0.4435410439181421, "grad_norm": 0.96484375, "learning_rate": 2.9512839384700575e-05, "loss": 0.8983, "step": 6028 }, { "epoch": 0.4436146240515061, "grad_norm": 0.9921875, "learning_rate": 2.950714281037142e-05, "loss": 1.0855, "step": 6029 }, { "epoch": 0.4436882041848701, "grad_norm": 1.0078125, "learning_rate": 2.950144599415242e-05, "loss": 1.0439, "step": 6030 }, { "epoch": 0.4437617843182341, "grad_norm": 0.97265625, "learning_rate": 2.9495748936349316e-05, "loss": 1.113, "step": 6031 }, { "epoch": 0.44383536445159805, "grad_norm": 0.9765625, "learning_rate": 2.9490051637267864e-05, "loss": 1.3181, "step": 6032 }, { "epoch": 0.44390894458496205, "grad_norm": 0.7578125, "learning_rate": 2.9484354097213818e-05, "loss": 0.8374, "step": 6033 }, { "epoch": 0.44398252471832605, "grad_norm": 0.81640625, "learning_rate": 2.9478656316492952e-05, "loss": 0.9774, "step": 6034 }, { "epoch": 0.44405610485169006, "grad_norm": 0.75, "learning_rate": 2.9472958295411067e-05, "loss": 0.9186, "step": 6035 }, { "epoch": 0.44412968498505406, "grad_norm": 0.8671875, "learning_rate": 2.9467260034273957e-05, "loss": 0.7875, "step": 6036 }, { "epoch": 0.444203265118418, "grad_norm": 1.0859375, "learning_rate": 2.9461561533387437e-05, "loss": 1.146, "step": 6037 }, { "epoch": 0.444276845251782, "grad_norm": 0.86328125, "learning_rate": 2.9455862793057338e-05, "loss": 1.0432, "step": 6038 }, { "epoch": 0.444350425385146, "grad_norm": 0.7109375, "learning_rate": 2.9450163813589498e-05, "loss": 0.966, "step": 6039 }, { "epoch": 0.44442400551851, "grad_norm": 0.8515625, "learning_rate": 2.9444464595289772e-05, "loss": 0.8573, "step": 6040 }, { "epoch": 0.44449758565187397, "grad_norm": 1.1328125, "learning_rate": 2.943876513846403e-05, "loss": 1.5034, "step": 6041 }, { "epoch": 0.444571165785238, "grad_norm": 0.765625, "learning_rate": 2.9433065443418138e-05, "loss": 0.7055, "step": 6042 }, { "epoch": 0.444644745918602, "grad_norm": 0.89453125, "learning_rate": 2.9427365510458e-05, "loss": 0.6595, "step": 6043 }, { "epoch": 0.444718326051966, "grad_norm": 0.76171875, "learning_rate": 2.9421665339889515e-05, "loss": 0.8481, "step": 6044 }, { "epoch": 0.44479190618533, "grad_norm": 0.6875, "learning_rate": 2.94159649320186e-05, "loss": 0.5326, "step": 6045 }, { "epoch": 0.44486548631869394, "grad_norm": 0.796875, "learning_rate": 2.94102642871512e-05, "loss": 0.8251, "step": 6046 }, { "epoch": 0.44493906645205794, "grad_norm": 0.6875, "learning_rate": 2.9404563405593235e-05, "loss": 0.6753, "step": 6047 }, { "epoch": 0.44501264658542194, "grad_norm": 0.7890625, "learning_rate": 2.939886228765068e-05, "loss": 0.8832, "step": 6048 }, { "epoch": 0.44508622671878595, "grad_norm": 0.95703125, "learning_rate": 2.939316093362948e-05, "loss": 0.9699, "step": 6049 }, { "epoch": 0.4451598068521499, "grad_norm": 0.703125, "learning_rate": 2.9387459343835644e-05, "loss": 0.6979, "step": 6050 }, { "epoch": 0.4452333869855139, "grad_norm": 0.75390625, "learning_rate": 2.9381757518575142e-05, "loss": 0.8925, "step": 6051 }, { "epoch": 0.4453069671188779, "grad_norm": 0.80859375, "learning_rate": 2.9376055458153994e-05, "loss": 0.821, "step": 6052 }, { "epoch": 0.4453805472522419, "grad_norm": 0.82421875, "learning_rate": 2.9370353162878216e-05, "loss": 0.8584, "step": 6053 }, { "epoch": 0.4454541273856059, "grad_norm": 0.83984375, "learning_rate": 2.9364650633053838e-05, "loss": 1.0352, "step": 6054 }, { "epoch": 0.44552770751896986, "grad_norm": 0.921875, "learning_rate": 2.935894786898691e-05, "loss": 0.9812, "step": 6055 }, { "epoch": 0.44560128765233387, "grad_norm": 0.66796875, "learning_rate": 2.935324487098347e-05, "loss": 0.9025, "step": 6056 }, { "epoch": 0.44567486778569787, "grad_norm": 0.796875, "learning_rate": 2.934754163934961e-05, "loss": 1.0327, "step": 6057 }, { "epoch": 0.4457484479190619, "grad_norm": 0.90625, "learning_rate": 2.934183817439139e-05, "loss": 0.8906, "step": 6058 }, { "epoch": 0.4458220280524258, "grad_norm": 0.7890625, "learning_rate": 2.9336134476414923e-05, "loss": 0.7753, "step": 6059 }, { "epoch": 0.4458956081857898, "grad_norm": 0.93359375, "learning_rate": 2.933043054572631e-05, "loss": 0.884, "step": 6060 }, { "epoch": 0.44596918831915383, "grad_norm": 0.78515625, "learning_rate": 2.932472638263167e-05, "loss": 0.8998, "step": 6061 }, { "epoch": 0.44604276845251783, "grad_norm": 0.9140625, "learning_rate": 2.9319021987437134e-05, "loss": 1.1624, "step": 6062 }, { "epoch": 0.44611634858588184, "grad_norm": 0.99609375, "learning_rate": 2.931331736044884e-05, "loss": 1.1614, "step": 6063 }, { "epoch": 0.4461899287192458, "grad_norm": 0.7421875, "learning_rate": 2.9307612501972948e-05, "loss": 0.7683, "step": 6064 }, { "epoch": 0.4462635088526098, "grad_norm": 1.03125, "learning_rate": 2.9301907412315632e-05, "loss": 1.1088, "step": 6065 }, { "epoch": 0.4463370889859738, "grad_norm": 0.8671875, "learning_rate": 2.9296202091783072e-05, "loss": 0.9251, "step": 6066 }, { "epoch": 0.4464106691193378, "grad_norm": 0.859375, "learning_rate": 2.929049654068146e-05, "loss": 0.7168, "step": 6067 }, { "epoch": 0.44648424925270175, "grad_norm": 0.90234375, "learning_rate": 2.9284790759317004e-05, "loss": 1.1251, "step": 6068 }, { "epoch": 0.44655782938606575, "grad_norm": 1.046875, "learning_rate": 2.9279084747995922e-05, "loss": 1.3019, "step": 6069 }, { "epoch": 0.44663140951942976, "grad_norm": 0.80078125, "learning_rate": 2.9273378507024435e-05, "loss": 0.6461, "step": 6070 }, { "epoch": 0.44670498965279376, "grad_norm": 1.0703125, "learning_rate": 2.92676720367088e-05, "loss": 1.0662, "step": 6071 }, { "epoch": 0.44677856978615776, "grad_norm": 0.921875, "learning_rate": 2.926196533735527e-05, "loss": 1.0884, "step": 6072 }, { "epoch": 0.4468521499195217, "grad_norm": 0.98046875, "learning_rate": 2.925625840927011e-05, "loss": 1.2211, "step": 6073 }, { "epoch": 0.4469257300528857, "grad_norm": 0.87890625, "learning_rate": 2.9250551252759595e-05, "loss": 1.1065, "step": 6074 }, { "epoch": 0.4469993101862497, "grad_norm": 0.8515625, "learning_rate": 2.9244843868130023e-05, "loss": 0.94, "step": 6075 }, { "epoch": 0.4470728903196137, "grad_norm": 0.9453125, "learning_rate": 2.9239136255687698e-05, "loss": 0.7705, "step": 6076 }, { "epoch": 0.4471464704529777, "grad_norm": 0.84765625, "learning_rate": 2.923342841573894e-05, "loss": 1.1884, "step": 6077 }, { "epoch": 0.4472200505863417, "grad_norm": 0.734375, "learning_rate": 2.922772034859007e-05, "loss": 0.9155, "step": 6078 }, { "epoch": 0.4472936307197057, "grad_norm": 0.85546875, "learning_rate": 2.9222012054547436e-05, "loss": 0.9214, "step": 6079 }, { "epoch": 0.4473672108530697, "grad_norm": 0.8671875, "learning_rate": 2.9216303533917394e-05, "loss": 1.0133, "step": 6080 }, { "epoch": 0.4474407909864337, "grad_norm": 0.72265625, "learning_rate": 2.9210594787006302e-05, "loss": 0.9676, "step": 6081 }, { "epoch": 0.44751437111979764, "grad_norm": 1.0703125, "learning_rate": 2.9204885814120535e-05, "loss": 1.2342, "step": 6082 }, { "epoch": 0.44758795125316164, "grad_norm": 1.0703125, "learning_rate": 2.9199176615566498e-05, "loss": 1.4221, "step": 6083 }, { "epoch": 0.44766153138652565, "grad_norm": 0.62890625, "learning_rate": 2.9193467191650577e-05, "loss": 0.6799, "step": 6084 }, { "epoch": 0.44773511151988965, "grad_norm": 0.64453125, "learning_rate": 2.918775754267919e-05, "loss": 0.8441, "step": 6085 }, { "epoch": 0.4478086916532536, "grad_norm": 0.90234375, "learning_rate": 2.9182047668958766e-05, "loss": 0.7616, "step": 6086 }, { "epoch": 0.4478822717866176, "grad_norm": 0.86328125, "learning_rate": 2.9176337570795752e-05, "loss": 0.8768, "step": 6087 }, { "epoch": 0.4479558519199816, "grad_norm": 0.74609375, "learning_rate": 2.917062724849658e-05, "loss": 0.6916, "step": 6088 }, { "epoch": 0.4480294320533456, "grad_norm": 0.89453125, "learning_rate": 2.9164916702367722e-05, "loss": 0.6812, "step": 6089 }, { "epoch": 0.4481030121867096, "grad_norm": 0.6953125, "learning_rate": 2.915920593271565e-05, "loss": 1.0242, "step": 6090 }, { "epoch": 0.44817659232007356, "grad_norm": 0.90625, "learning_rate": 2.9153494939846855e-05, "loss": 0.8894, "step": 6091 }, { "epoch": 0.44825017245343757, "grad_norm": 1.0546875, "learning_rate": 2.914778372406782e-05, "loss": 1.1701, "step": 6092 }, { "epoch": 0.44832375258680157, "grad_norm": 0.9296875, "learning_rate": 2.914207228568508e-05, "loss": 1.1768, "step": 6093 }, { "epoch": 0.4483973327201656, "grad_norm": 1.0625, "learning_rate": 2.9136360625005142e-05, "loss": 0.9539, "step": 6094 }, { "epoch": 0.4484709128535295, "grad_norm": 0.82421875, "learning_rate": 2.913064874233454e-05, "loss": 1.1154, "step": 6095 }, { "epoch": 0.44854449298689353, "grad_norm": 1.03125, "learning_rate": 2.912493663797982e-05, "loss": 1.4624, "step": 6096 }, { "epoch": 0.44861807312025753, "grad_norm": 0.97265625, "learning_rate": 2.911922431224754e-05, "loss": 1.0469, "step": 6097 }, { "epoch": 0.44869165325362154, "grad_norm": 0.83203125, "learning_rate": 2.9113511765444272e-05, "loss": 0.5635, "step": 6098 }, { "epoch": 0.44876523338698554, "grad_norm": 1.2734375, "learning_rate": 2.91077989978766e-05, "loss": 1.336, "step": 6099 }, { "epoch": 0.4488388135203495, "grad_norm": 0.96484375, "learning_rate": 2.910208600985111e-05, "loss": 1.1571, "step": 6100 }, { "epoch": 0.4489123936537135, "grad_norm": 0.91796875, "learning_rate": 2.9096372801674416e-05, "loss": 0.7791, "step": 6101 }, { "epoch": 0.4489859737870775, "grad_norm": 0.765625, "learning_rate": 2.909065937365313e-05, "loss": 0.8365, "step": 6102 }, { "epoch": 0.4490595539204415, "grad_norm": 1.0234375, "learning_rate": 2.908494572609388e-05, "loss": 0.9837, "step": 6103 }, { "epoch": 0.44913313405380545, "grad_norm": 0.86328125, "learning_rate": 2.9079231859303306e-05, "loss": 1.0402, "step": 6104 }, { "epoch": 0.44920671418716945, "grad_norm": 0.90625, "learning_rate": 2.9073517773588067e-05, "loss": 1.1519, "step": 6105 }, { "epoch": 0.44928029432053346, "grad_norm": 0.7421875, "learning_rate": 2.9067803469254818e-05, "loss": 1.0381, "step": 6106 }, { "epoch": 0.44935387445389746, "grad_norm": 0.82421875, "learning_rate": 2.9062088946610244e-05, "loss": 0.8927, "step": 6107 }, { "epoch": 0.44942745458726147, "grad_norm": 0.71484375, "learning_rate": 2.905637420596103e-05, "loss": 0.5416, "step": 6108 }, { "epoch": 0.4495010347206254, "grad_norm": 1.1640625, "learning_rate": 2.905065924761387e-05, "loss": 1.0362, "step": 6109 }, { "epoch": 0.4495746148539894, "grad_norm": 0.74609375, "learning_rate": 2.904494407187548e-05, "loss": 0.749, "step": 6110 }, { "epoch": 0.4496481949873534, "grad_norm": 0.99609375, "learning_rate": 2.9039228679052583e-05, "loss": 0.9613, "step": 6111 }, { "epoch": 0.4497217751207174, "grad_norm": 0.9765625, "learning_rate": 2.9033513069451905e-05, "loss": 1.0949, "step": 6112 }, { "epoch": 0.4497953552540814, "grad_norm": 0.76953125, "learning_rate": 2.9027797243380205e-05, "loss": 0.7375, "step": 6113 }, { "epoch": 0.4498689353874454, "grad_norm": 0.734375, "learning_rate": 2.902208120114423e-05, "loss": 0.6392, "step": 6114 }, { "epoch": 0.4499425155208094, "grad_norm": 0.8671875, "learning_rate": 2.9016364943050763e-05, "loss": 1.0716, "step": 6115 }, { "epoch": 0.4500160956541734, "grad_norm": 0.703125, "learning_rate": 2.9010648469406565e-05, "loss": 1.2349, "step": 6116 }, { "epoch": 0.4500896757875374, "grad_norm": 0.65625, "learning_rate": 2.900493178051845e-05, "loss": 0.6422, "step": 6117 }, { "epoch": 0.45016325592090134, "grad_norm": 0.984375, "learning_rate": 2.8999214876693205e-05, "loss": 1.2383, "step": 6118 }, { "epoch": 0.45023683605426534, "grad_norm": 0.90234375, "learning_rate": 2.899349775823764e-05, "loss": 0.9533, "step": 6119 }, { "epoch": 0.45031041618762935, "grad_norm": 1.2265625, "learning_rate": 2.8987780425458604e-05, "loss": 1.0481, "step": 6120 }, { "epoch": 0.45038399632099335, "grad_norm": 0.8984375, "learning_rate": 2.898206287866293e-05, "loss": 1.1189, "step": 6121 }, { "epoch": 0.4504575764543573, "grad_norm": 0.9921875, "learning_rate": 2.897634511815745e-05, "loss": 0.6942, "step": 6122 }, { "epoch": 0.4505311565877213, "grad_norm": 0.91015625, "learning_rate": 2.897062714424904e-05, "loss": 1.0282, "step": 6123 }, { "epoch": 0.4506047367210853, "grad_norm": 0.81640625, "learning_rate": 2.896490895724458e-05, "loss": 1.0714, "step": 6124 }, { "epoch": 0.4506783168544493, "grad_norm": 0.84375, "learning_rate": 2.8959190557450937e-05, "loss": 0.9996, "step": 6125 }, { "epoch": 0.4507518969878133, "grad_norm": 0.9453125, "learning_rate": 2.8953471945175013e-05, "loss": 1.0714, "step": 6126 }, { "epoch": 0.45082547712117726, "grad_norm": 0.91796875, "learning_rate": 2.894775312072372e-05, "loss": 1.0969, "step": 6127 }, { "epoch": 0.45089905725454127, "grad_norm": 1.2890625, "learning_rate": 2.894203408440398e-05, "loss": 1.3571, "step": 6128 }, { "epoch": 0.4509726373879053, "grad_norm": 0.96875, "learning_rate": 2.8936314836522706e-05, "loss": 0.7812, "step": 6129 }, { "epoch": 0.4510462175212693, "grad_norm": 0.94140625, "learning_rate": 2.893059537738685e-05, "loss": 0.8675, "step": 6130 }, { "epoch": 0.4511197976546332, "grad_norm": 0.8125, "learning_rate": 2.8924875707303366e-05, "loss": 1.1733, "step": 6131 }, { "epoch": 0.45119337778799723, "grad_norm": 1.046875, "learning_rate": 2.8919155826579215e-05, "loss": 1.1205, "step": 6132 }, { "epoch": 0.45126695792136123, "grad_norm": 0.89453125, "learning_rate": 2.8913435735521377e-05, "loss": 0.9608, "step": 6133 }, { "epoch": 0.45134053805472524, "grad_norm": 0.89453125, "learning_rate": 2.8907715434436834e-05, "loss": 0.8731, "step": 6134 }, { "epoch": 0.45141411818808924, "grad_norm": 0.84375, "learning_rate": 2.8901994923632582e-05, "loss": 1.1364, "step": 6135 }, { "epoch": 0.4514876983214532, "grad_norm": 0.84375, "learning_rate": 2.889627420341563e-05, "loss": 0.8074, "step": 6136 }, { "epoch": 0.4515612784548172, "grad_norm": 0.828125, "learning_rate": 2.8890553274093006e-05, "loss": 0.909, "step": 6137 }, { "epoch": 0.4516348585881812, "grad_norm": 0.69921875, "learning_rate": 2.8884832135971735e-05, "loss": 0.6877, "step": 6138 }, { "epoch": 0.4517084387215452, "grad_norm": 1.0390625, "learning_rate": 2.887911078935885e-05, "loss": 0.9929, "step": 6139 }, { "epoch": 0.45178201885490915, "grad_norm": 0.859375, "learning_rate": 2.8873389234561428e-05, "loss": 0.6979, "step": 6140 }, { "epoch": 0.45185559898827315, "grad_norm": 0.82421875, "learning_rate": 2.8867667471886518e-05, "loss": 1.1729, "step": 6141 }, { "epoch": 0.45192917912163716, "grad_norm": 0.86328125, "learning_rate": 2.88619455016412e-05, "loss": 0.9276, "step": 6142 }, { "epoch": 0.45200275925500116, "grad_norm": 1.0, "learning_rate": 2.885622332413256e-05, "loss": 1.0578, "step": 6143 }, { "epoch": 0.45207633938836517, "grad_norm": 1.0, "learning_rate": 2.88505009396677e-05, "loss": 1.0555, "step": 6144 }, { "epoch": 0.4521499195217291, "grad_norm": 0.8125, "learning_rate": 2.8844778348553724e-05, "loss": 1.0048, "step": 6145 }, { "epoch": 0.4522234996550931, "grad_norm": 0.84765625, "learning_rate": 2.8839055551097755e-05, "loss": 0.837, "step": 6146 }, { "epoch": 0.4522970797884571, "grad_norm": 0.73046875, "learning_rate": 2.8833332547606927e-05, "loss": 0.6533, "step": 6147 }, { "epoch": 0.4523706599218211, "grad_norm": 1.0078125, "learning_rate": 2.8827609338388385e-05, "loss": 1.1388, "step": 6148 }, { "epoch": 0.4524442400551851, "grad_norm": 0.66796875, "learning_rate": 2.8821885923749275e-05, "loss": 0.8837, "step": 6149 }, { "epoch": 0.4525178201885491, "grad_norm": 0.73046875, "learning_rate": 2.881616230399677e-05, "loss": 0.9821, "step": 6150 }, { "epoch": 0.4525914003219131, "grad_norm": 1.015625, "learning_rate": 2.881043847943804e-05, "loss": 1.0957, "step": 6151 }, { "epoch": 0.4526649804552771, "grad_norm": 0.8125, "learning_rate": 2.8804714450380266e-05, "loss": 0.7325, "step": 6152 }, { "epoch": 0.4527385605886411, "grad_norm": 1.0, "learning_rate": 2.879899021713066e-05, "loss": 0.6788, "step": 6153 }, { "epoch": 0.45281214072200504, "grad_norm": 0.890625, "learning_rate": 2.8793265779996427e-05, "loss": 0.6262, "step": 6154 }, { "epoch": 0.45288572085536904, "grad_norm": 0.640625, "learning_rate": 2.878754113928478e-05, "loss": 0.8315, "step": 6155 }, { "epoch": 0.45295930098873305, "grad_norm": 0.99609375, "learning_rate": 2.878181629530296e-05, "loss": 0.8807, "step": 6156 }, { "epoch": 0.45303288112209705, "grad_norm": 0.71875, "learning_rate": 2.8776091248358194e-05, "loss": 0.78, "step": 6157 }, { "epoch": 0.453106461255461, "grad_norm": 0.83203125, "learning_rate": 2.8770365998757754e-05, "loss": 0.7171, "step": 6158 }, { "epoch": 0.453180041388825, "grad_norm": 0.87109375, "learning_rate": 2.8764640546808873e-05, "loss": 0.9987, "step": 6159 }, { "epoch": 0.453253621522189, "grad_norm": 0.76171875, "learning_rate": 2.875891489281886e-05, "loss": 0.6616, "step": 6160 }, { "epoch": 0.453327201655553, "grad_norm": 0.84375, "learning_rate": 2.875318903709498e-05, "loss": 0.9825, "step": 6161 }, { "epoch": 0.453400781788917, "grad_norm": 0.84375, "learning_rate": 2.8747462979944534e-05, "loss": 0.7909, "step": 6162 }, { "epoch": 0.45347436192228097, "grad_norm": 0.8515625, "learning_rate": 2.8741736721674827e-05, "loss": 0.7882, "step": 6163 }, { "epoch": 0.45354794205564497, "grad_norm": 0.76953125, "learning_rate": 2.8736010262593178e-05, "loss": 0.8349, "step": 6164 }, { "epoch": 0.453621522189009, "grad_norm": 0.76953125, "learning_rate": 2.8730283603006907e-05, "loss": 0.9046, "step": 6165 }, { "epoch": 0.453695102322373, "grad_norm": 0.9140625, "learning_rate": 2.872455674322337e-05, "loss": 1.1353, "step": 6166 }, { "epoch": 0.4537686824557369, "grad_norm": 0.81640625, "learning_rate": 2.871882968354991e-05, "loss": 0.9633, "step": 6167 }, { "epoch": 0.45384226258910093, "grad_norm": 0.62109375, "learning_rate": 2.8713102424293874e-05, "loss": 0.6544, "step": 6168 }, { "epoch": 0.45391584272246494, "grad_norm": 1.0625, "learning_rate": 2.870737496576265e-05, "loss": 1.5257, "step": 6169 }, { "epoch": 0.45398942285582894, "grad_norm": 1.0078125, "learning_rate": 2.8701647308263613e-05, "loss": 0.6725, "step": 6170 }, { "epoch": 0.45406300298919294, "grad_norm": 0.92578125, "learning_rate": 2.8695919452104157e-05, "loss": 1.0626, "step": 6171 }, { "epoch": 0.4541365831225569, "grad_norm": 1.0078125, "learning_rate": 2.8690191397591682e-05, "loss": 0.9497, "step": 6172 }, { "epoch": 0.4542101632559209, "grad_norm": 0.6953125, "learning_rate": 2.86844631450336e-05, "loss": 0.7885, "step": 6173 }, { "epoch": 0.4542837433892849, "grad_norm": 0.83203125, "learning_rate": 2.8678734694737352e-05, "loss": 0.8002, "step": 6174 }, { "epoch": 0.4543573235226489, "grad_norm": 0.90625, "learning_rate": 2.8673006047010353e-05, "loss": 0.8922, "step": 6175 }, { "epoch": 0.45443090365601285, "grad_norm": 1.0078125, "learning_rate": 2.8667277202160066e-05, "loss": 1.1074, "step": 6176 }, { "epoch": 0.45450448378937686, "grad_norm": 0.84765625, "learning_rate": 2.8661548160493927e-05, "loss": 0.7483, "step": 6177 }, { "epoch": 0.45457806392274086, "grad_norm": 0.74609375, "learning_rate": 2.865581892231942e-05, "loss": 0.8306, "step": 6178 }, { "epoch": 0.45465164405610486, "grad_norm": 0.90234375, "learning_rate": 2.865008948794401e-05, "loss": 1.1476, "step": 6179 }, { "epoch": 0.45472522418946887, "grad_norm": 0.66796875, "learning_rate": 2.8644359857675195e-05, "loss": 0.7028, "step": 6180 }, { "epoch": 0.4547988043228328, "grad_norm": 0.9140625, "learning_rate": 2.8638630031820472e-05, "loss": 1.1273, "step": 6181 }, { "epoch": 0.4548723844561968, "grad_norm": 0.83203125, "learning_rate": 2.863290001068735e-05, "loss": 1.0302, "step": 6182 }, { "epoch": 0.4549459645895608, "grad_norm": 0.828125, "learning_rate": 2.862716979458334e-05, "loss": 1.0111, "step": 6183 }, { "epoch": 0.45501954472292483, "grad_norm": 0.80859375, "learning_rate": 2.8621439383815985e-05, "loss": 0.8821, "step": 6184 }, { "epoch": 0.4550931248562888, "grad_norm": 0.93359375, "learning_rate": 2.8615708778692807e-05, "loss": 0.9798, "step": 6185 }, { "epoch": 0.4551667049896528, "grad_norm": 0.9375, "learning_rate": 2.8609977979521375e-05, "loss": 1.1728, "step": 6186 }, { "epoch": 0.4552402851230168, "grad_norm": 0.6953125, "learning_rate": 2.860424698660924e-05, "loss": 0.7231, "step": 6187 }, { "epoch": 0.4553138652563808, "grad_norm": 0.7890625, "learning_rate": 2.859851580026398e-05, "loss": 0.9579, "step": 6188 }, { "epoch": 0.4553874453897448, "grad_norm": 0.765625, "learning_rate": 2.8592784420793173e-05, "loss": 0.9078, "step": 6189 }, { "epoch": 0.45546102552310874, "grad_norm": 1.03125, "learning_rate": 2.8587052848504414e-05, "loss": 1.1242, "step": 6190 }, { "epoch": 0.45553460565647275, "grad_norm": 0.8125, "learning_rate": 2.8581321083705298e-05, "loss": 1.0109, "step": 6191 }, { "epoch": 0.45560818578983675, "grad_norm": 0.72265625, "learning_rate": 2.8575589126703452e-05, "loss": 0.6212, "step": 6192 }, { "epoch": 0.45568176592320075, "grad_norm": 0.88671875, "learning_rate": 2.856985697780648e-05, "loss": 0.9691, "step": 6193 }, { "epoch": 0.4557553460565647, "grad_norm": 0.890625, "learning_rate": 2.856412463732204e-05, "loss": 0.9252, "step": 6194 }, { "epoch": 0.4558289261899287, "grad_norm": 0.80859375, "learning_rate": 2.8558392105557746e-05, "loss": 0.978, "step": 6195 }, { "epoch": 0.4559025063232927, "grad_norm": 0.75390625, "learning_rate": 2.8552659382821277e-05, "loss": 0.6545, "step": 6196 }, { "epoch": 0.4559760864566567, "grad_norm": 0.93359375, "learning_rate": 2.8546926469420293e-05, "loss": 1.3891, "step": 6197 }, { "epoch": 0.4560496665900207, "grad_norm": 0.9765625, "learning_rate": 2.854119336566246e-05, "loss": 0.9865, "step": 6198 }, { "epoch": 0.45612324672338467, "grad_norm": 1.1015625, "learning_rate": 2.8535460071855464e-05, "loss": 1.1086, "step": 6199 }, { "epoch": 0.45619682685674867, "grad_norm": 0.85546875, "learning_rate": 2.8529726588307005e-05, "loss": 1.3484, "step": 6200 }, { "epoch": 0.4562704069901127, "grad_norm": 0.83984375, "learning_rate": 2.8523992915324794e-05, "loss": 0.8123, "step": 6201 }, { "epoch": 0.4563439871234767, "grad_norm": 0.69921875, "learning_rate": 2.8518259053216534e-05, "loss": 0.9677, "step": 6202 }, { "epoch": 0.45641756725684063, "grad_norm": 0.85546875, "learning_rate": 2.8512525002289954e-05, "loss": 1.1213, "step": 6203 }, { "epoch": 0.45649114739020463, "grad_norm": 1.109375, "learning_rate": 2.8506790762852796e-05, "loss": 1.123, "step": 6204 }, { "epoch": 0.45656472752356864, "grad_norm": 0.8984375, "learning_rate": 2.8501056335212794e-05, "loss": 1.2317, "step": 6205 }, { "epoch": 0.45663830765693264, "grad_norm": 0.91015625, "learning_rate": 2.8495321719677714e-05, "loss": 1.1782, "step": 6206 }, { "epoch": 0.45671188779029664, "grad_norm": 0.8984375, "learning_rate": 2.8489586916555322e-05, "loss": 0.6275, "step": 6207 }, { "epoch": 0.4567854679236606, "grad_norm": 0.7109375, "learning_rate": 2.8483851926153393e-05, "loss": 0.6487, "step": 6208 }, { "epoch": 0.4568590480570246, "grad_norm": 0.84375, "learning_rate": 2.8478116748779705e-05, "loss": 0.946, "step": 6209 }, { "epoch": 0.4569326281903886, "grad_norm": 0.8359375, "learning_rate": 2.847238138474207e-05, "loss": 1.0903, "step": 6210 }, { "epoch": 0.4570062083237526, "grad_norm": 0.73046875, "learning_rate": 2.8466645834348272e-05, "loss": 0.7167, "step": 6211 }, { "epoch": 0.45707978845711655, "grad_norm": 1.1015625, "learning_rate": 2.8460910097906148e-05, "loss": 1.0082, "step": 6212 }, { "epoch": 0.45715336859048056, "grad_norm": 0.86328125, "learning_rate": 2.8455174175723508e-05, "loss": 0.9808, "step": 6213 }, { "epoch": 0.45722694872384456, "grad_norm": 0.8828125, "learning_rate": 2.8449438068108198e-05, "loss": 0.6252, "step": 6214 }, { "epoch": 0.45730052885720857, "grad_norm": 0.8671875, "learning_rate": 2.844370177536807e-05, "loss": 0.8817, "step": 6215 }, { "epoch": 0.45737410899057257, "grad_norm": 1.0078125, "learning_rate": 2.8437965297810966e-05, "loss": 1.0954, "step": 6216 }, { "epoch": 0.4574476891239365, "grad_norm": 1.015625, "learning_rate": 2.843222863574476e-05, "loss": 1.0867, "step": 6217 }, { "epoch": 0.4575212692573005, "grad_norm": 0.8515625, "learning_rate": 2.842649178947732e-05, "loss": 1.1105, "step": 6218 }, { "epoch": 0.4575948493906645, "grad_norm": 0.8203125, "learning_rate": 2.842075475931654e-05, "loss": 0.9596, "step": 6219 }, { "epoch": 0.45766842952402853, "grad_norm": 0.7734375, "learning_rate": 2.8415017545570318e-05, "loss": 1.0159, "step": 6220 }, { "epoch": 0.4577420096573925, "grad_norm": 0.86328125, "learning_rate": 2.8409280148546548e-05, "loss": 1.4714, "step": 6221 }, { "epoch": 0.4578155897907565, "grad_norm": 0.87109375, "learning_rate": 2.840354256855316e-05, "loss": 0.8535, "step": 6222 }, { "epoch": 0.4578891699241205, "grad_norm": 0.9609375, "learning_rate": 2.8397804805898058e-05, "loss": 1.2393, "step": 6223 }, { "epoch": 0.4579627500574845, "grad_norm": 0.7578125, "learning_rate": 2.8392066860889206e-05, "loss": 0.7248, "step": 6224 }, { "epoch": 0.4580363301908485, "grad_norm": 0.87890625, "learning_rate": 2.8386328733834522e-05, "loss": 0.9474, "step": 6225 }, { "epoch": 0.45810991032421244, "grad_norm": 0.953125, "learning_rate": 2.838059042504197e-05, "loss": 0.9051, "step": 6226 }, { "epoch": 0.45818349045757645, "grad_norm": 0.8828125, "learning_rate": 2.8374851934819514e-05, "loss": 1.2669, "step": 6227 }, { "epoch": 0.45825707059094045, "grad_norm": 0.67578125, "learning_rate": 2.836911326347513e-05, "loss": 0.6519, "step": 6228 }, { "epoch": 0.45833065072430446, "grad_norm": 0.87890625, "learning_rate": 2.8363374411316806e-05, "loss": 1.4039, "step": 6229 }, { "epoch": 0.4584042308576684, "grad_norm": 0.953125, "learning_rate": 2.8357635378652527e-05, "loss": 0.9145, "step": 6230 }, { "epoch": 0.4584778109910324, "grad_norm": 0.8828125, "learning_rate": 2.83518961657903e-05, "loss": 1.0376, "step": 6231 }, { "epoch": 0.4585513911243964, "grad_norm": 0.796875, "learning_rate": 2.834615677303813e-05, "loss": 0.8081, "step": 6232 }, { "epoch": 0.4586249712577604, "grad_norm": 0.7890625, "learning_rate": 2.834041720070405e-05, "loss": 0.8228, "step": 6233 }, { "epoch": 0.4586985513911244, "grad_norm": 0.67578125, "learning_rate": 2.8334677449096088e-05, "loss": 0.5794, "step": 6234 }, { "epoch": 0.45877213152448837, "grad_norm": 0.99609375, "learning_rate": 2.832893751852228e-05, "loss": 0.7654, "step": 6235 }, { "epoch": 0.4588457116578524, "grad_norm": 1.21875, "learning_rate": 2.8323197409290696e-05, "loss": 1.2892, "step": 6236 }, { "epoch": 0.4589192917912164, "grad_norm": 0.98046875, "learning_rate": 2.831745712170937e-05, "loss": 1.0228, "step": 6237 }, { "epoch": 0.4589928719245804, "grad_norm": 0.84765625, "learning_rate": 2.83117166560864e-05, "loss": 0.7601, "step": 6238 }, { "epoch": 0.45906645205794433, "grad_norm": 0.7890625, "learning_rate": 2.8305976012729845e-05, "loss": 0.65, "step": 6239 }, { "epoch": 0.45914003219130833, "grad_norm": 0.9296875, "learning_rate": 2.83002351919478e-05, "loss": 1.0663, "step": 6240 }, { "epoch": 0.45921361232467234, "grad_norm": 0.86328125, "learning_rate": 2.8294494194048365e-05, "loss": 1.2407, "step": 6241 }, { "epoch": 0.45928719245803634, "grad_norm": 0.8984375, "learning_rate": 2.8288753019339653e-05, "loss": 1.0828, "step": 6242 }, { "epoch": 0.45936077259140035, "grad_norm": 0.86328125, "learning_rate": 2.8283011668129777e-05, "loss": 0.9263, "step": 6243 }, { "epoch": 0.4594343527247643, "grad_norm": 0.73828125, "learning_rate": 2.8277270140726863e-05, "loss": 0.6277, "step": 6244 }, { "epoch": 0.4595079328581283, "grad_norm": 0.84765625, "learning_rate": 2.8271528437439054e-05, "loss": 0.8775, "step": 6245 }, { "epoch": 0.4595815129914923, "grad_norm": 0.87109375, "learning_rate": 2.8265786558574486e-05, "loss": 1.0949, "step": 6246 }, { "epoch": 0.4596550931248563, "grad_norm": 0.96875, "learning_rate": 2.826004450444133e-05, "loss": 1.1995, "step": 6247 }, { "epoch": 0.45972867325822026, "grad_norm": 1.15625, "learning_rate": 2.825430227534773e-05, "loss": 1.6563, "step": 6248 }, { "epoch": 0.45980225339158426, "grad_norm": 0.890625, "learning_rate": 2.8248559871601887e-05, "loss": 0.9782, "step": 6249 }, { "epoch": 0.45987583352494826, "grad_norm": 0.77734375, "learning_rate": 2.824281729351197e-05, "loss": 0.7899, "step": 6250 }, { "epoch": 0.45994941365831227, "grad_norm": 0.7890625, "learning_rate": 2.8237074541386167e-05, "loss": 0.9182, "step": 6251 }, { "epoch": 0.46002299379167627, "grad_norm": 0.68359375, "learning_rate": 2.8231331615532697e-05, "loss": 0.6859, "step": 6252 }, { "epoch": 0.4600965739250402, "grad_norm": 0.85546875, "learning_rate": 2.8225588516259748e-05, "loss": 1.0853, "step": 6253 }, { "epoch": 0.4601701540584042, "grad_norm": 1.03125, "learning_rate": 2.8219845243875565e-05, "loss": 1.3617, "step": 6254 }, { "epoch": 0.46024373419176823, "grad_norm": 0.89453125, "learning_rate": 2.8214101798688363e-05, "loss": 1.1836, "step": 6255 }, { "epoch": 0.46031731432513223, "grad_norm": 0.9375, "learning_rate": 2.82083581810064e-05, "loss": 0.8941, "step": 6256 }, { "epoch": 0.4603908944584962, "grad_norm": 1.0390625, "learning_rate": 2.8202614391137906e-05, "loss": 0.7217, "step": 6257 }, { "epoch": 0.4604644745918602, "grad_norm": 0.73046875, "learning_rate": 2.8196870429391147e-05, "loss": 0.6514, "step": 6258 }, { "epoch": 0.4605380547252242, "grad_norm": 0.984375, "learning_rate": 2.819112629607439e-05, "loss": 0.9014, "step": 6259 }, { "epoch": 0.4606116348585882, "grad_norm": 0.75390625, "learning_rate": 2.8185381991495908e-05, "loss": 0.708, "step": 6260 }, { "epoch": 0.4606852149919522, "grad_norm": 0.8984375, "learning_rate": 2.8179637515963996e-05, "loss": 1.1131, "step": 6261 }, { "epoch": 0.46075879512531615, "grad_norm": 1.078125, "learning_rate": 2.817389286978694e-05, "loss": 1.6942, "step": 6262 }, { "epoch": 0.46083237525868015, "grad_norm": 1.25, "learning_rate": 2.8168148053273053e-05, "loss": 1.1356, "step": 6263 }, { "epoch": 0.46090595539204415, "grad_norm": 0.80078125, "learning_rate": 2.8162403066730643e-05, "loss": 1.2119, "step": 6264 }, { "epoch": 0.46097953552540816, "grad_norm": 0.875, "learning_rate": 2.815665791046803e-05, "loss": 1.3804, "step": 6265 }, { "epoch": 0.4610531156587721, "grad_norm": 0.99609375, "learning_rate": 2.8150912584793554e-05, "loss": 1.095, "step": 6266 }, { "epoch": 0.4611266957921361, "grad_norm": 1.03125, "learning_rate": 2.8145167090015546e-05, "loss": 1.2664, "step": 6267 }, { "epoch": 0.4612002759255001, "grad_norm": 0.80859375, "learning_rate": 2.8139421426442357e-05, "loss": 0.993, "step": 6268 }, { "epoch": 0.4612738560588641, "grad_norm": 0.78515625, "learning_rate": 2.813367559438235e-05, "loss": 0.9281, "step": 6269 }, { "epoch": 0.4613474361922281, "grad_norm": 1.1328125, "learning_rate": 2.8127929594143903e-05, "loss": 1.0383, "step": 6270 }, { "epoch": 0.46142101632559207, "grad_norm": 0.7421875, "learning_rate": 2.8122183426035377e-05, "loss": 0.9486, "step": 6271 }, { "epoch": 0.4614945964589561, "grad_norm": 0.97265625, "learning_rate": 2.8116437090365166e-05, "loss": 1.5287, "step": 6272 }, { "epoch": 0.4615681765923201, "grad_norm": 1.1171875, "learning_rate": 2.8110690587441656e-05, "loss": 1.0122, "step": 6273 }, { "epoch": 0.4616417567256841, "grad_norm": 0.80859375, "learning_rate": 2.8104943917573262e-05, "loss": 1.0152, "step": 6274 }, { "epoch": 0.46171533685904803, "grad_norm": 0.9765625, "learning_rate": 2.809919708106839e-05, "loss": 1.0841, "step": 6275 }, { "epoch": 0.46178891699241204, "grad_norm": 0.8828125, "learning_rate": 2.809345007823546e-05, "loss": 1.2076, "step": 6276 }, { "epoch": 0.46186249712577604, "grad_norm": 1.0234375, "learning_rate": 2.8087702909382918e-05, "loss": 1.175, "step": 6277 }, { "epoch": 0.46193607725914004, "grad_norm": 0.89453125, "learning_rate": 2.8081955574819184e-05, "loss": 0.932, "step": 6278 }, { "epoch": 0.46200965739250405, "grad_norm": 0.75390625, "learning_rate": 2.8076208074852728e-05, "loss": 0.9306, "step": 6279 }, { "epoch": 0.462083237525868, "grad_norm": 1.1484375, "learning_rate": 2.807046040979198e-05, "loss": 1.2228, "step": 6280 }, { "epoch": 0.462156817659232, "grad_norm": 0.82421875, "learning_rate": 2.806471257994543e-05, "loss": 0.7424, "step": 6281 }, { "epoch": 0.462230397792596, "grad_norm": 0.68359375, "learning_rate": 2.805896458562154e-05, "loss": 0.6086, "step": 6282 }, { "epoch": 0.46230397792596, "grad_norm": 0.69921875, "learning_rate": 2.8053216427128796e-05, "loss": 0.768, "step": 6283 }, { "epoch": 0.46237755805932396, "grad_norm": 0.80859375, "learning_rate": 2.8047468104775697e-05, "loss": 0.8747, "step": 6284 }, { "epoch": 0.46245113819268796, "grad_norm": 0.84375, "learning_rate": 2.8041719618870737e-05, "loss": 0.739, "step": 6285 }, { "epoch": 0.46252471832605196, "grad_norm": 0.96484375, "learning_rate": 2.8035970969722436e-05, "loss": 0.9525, "step": 6286 }, { "epoch": 0.46259829845941597, "grad_norm": 0.93359375, "learning_rate": 2.8030222157639308e-05, "loss": 1.1422, "step": 6287 }, { "epoch": 0.46267187859278, "grad_norm": 0.8203125, "learning_rate": 2.8024473182929872e-05, "loss": 0.64, "step": 6288 }, { "epoch": 0.4627454587261439, "grad_norm": 0.84375, "learning_rate": 2.8018724045902673e-05, "loss": 0.8409, "step": 6289 }, { "epoch": 0.4628190388595079, "grad_norm": 0.78515625, "learning_rate": 2.8012974746866266e-05, "loss": 0.823, "step": 6290 }, { "epoch": 0.46289261899287193, "grad_norm": 0.77734375, "learning_rate": 2.8007225286129187e-05, "loss": 0.7293, "step": 6291 }, { "epoch": 0.46296619912623593, "grad_norm": 0.875, "learning_rate": 2.8001475664000004e-05, "loss": 0.8793, "step": 6292 }, { "epoch": 0.4630397792595999, "grad_norm": 0.8671875, "learning_rate": 2.79957258807873e-05, "loss": 1.0189, "step": 6293 }, { "epoch": 0.4631133593929639, "grad_norm": 0.9765625, "learning_rate": 2.798997593679964e-05, "loss": 0.839, "step": 6294 }, { "epoch": 0.4631869395263279, "grad_norm": 0.7109375, "learning_rate": 2.7984225832345624e-05, "loss": 0.7649, "step": 6295 }, { "epoch": 0.4632605196596919, "grad_norm": 0.98828125, "learning_rate": 2.7978475567733836e-05, "loss": 1.1753, "step": 6296 }, { "epoch": 0.4633340997930559, "grad_norm": 0.84765625, "learning_rate": 2.7972725143272898e-05, "loss": 0.9838, "step": 6297 }, { "epoch": 0.46340767992641985, "grad_norm": 1.0, "learning_rate": 2.7966974559271407e-05, "loss": 1.024, "step": 6298 }, { "epoch": 0.46348126005978385, "grad_norm": 0.72265625, "learning_rate": 2.7961223816038008e-05, "loss": 0.7331, "step": 6299 }, { "epoch": 0.46355484019314785, "grad_norm": 0.79296875, "learning_rate": 2.795547291388131e-05, "loss": 0.9146, "step": 6300 }, { "epoch": 0.46362842032651186, "grad_norm": 0.6875, "learning_rate": 2.794972185310996e-05, "loss": 0.5683, "step": 6301 }, { "epoch": 0.4637020004598758, "grad_norm": 0.9453125, "learning_rate": 2.7943970634032618e-05, "loss": 1.18, "step": 6302 }, { "epoch": 0.4637755805932398, "grad_norm": 0.98828125, "learning_rate": 2.7938219256957926e-05, "loss": 0.6956, "step": 6303 }, { "epoch": 0.4638491607266038, "grad_norm": 0.8828125, "learning_rate": 2.7932467722194562e-05, "loss": 0.8688, "step": 6304 }, { "epoch": 0.4639227408599678, "grad_norm": 1.0625, "learning_rate": 2.7926716030051193e-05, "loss": 1.1108, "step": 6305 }, { "epoch": 0.4639963209933318, "grad_norm": 0.55859375, "learning_rate": 2.79209641808365e-05, "loss": 0.4767, "step": 6306 }, { "epoch": 0.46406990112669577, "grad_norm": 0.75, "learning_rate": 2.7915212174859177e-05, "loss": 0.7869, "step": 6307 }, { "epoch": 0.4641434812600598, "grad_norm": 0.76171875, "learning_rate": 2.7909460012427923e-05, "loss": 1.1705, "step": 6308 }, { "epoch": 0.4642170613934238, "grad_norm": 0.7265625, "learning_rate": 2.7903707693851444e-05, "loss": 0.7204, "step": 6309 }, { "epoch": 0.4642906415267878, "grad_norm": 0.625, "learning_rate": 2.7897955219438454e-05, "loss": 0.7186, "step": 6310 }, { "epoch": 0.46436422166015173, "grad_norm": 0.84765625, "learning_rate": 2.789220258949769e-05, "loss": 0.8165, "step": 6311 }, { "epoch": 0.46443780179351574, "grad_norm": 0.734375, "learning_rate": 2.7886449804337867e-05, "loss": 0.8139, "step": 6312 }, { "epoch": 0.46451138192687974, "grad_norm": 0.8125, "learning_rate": 2.788069686426774e-05, "loss": 1.2423, "step": 6313 }, { "epoch": 0.46458496206024374, "grad_norm": 0.66796875, "learning_rate": 2.787494376959605e-05, "loss": 0.674, "step": 6314 }, { "epoch": 0.46465854219360775, "grad_norm": 0.8828125, "learning_rate": 2.786919052063156e-05, "loss": 1.146, "step": 6315 }, { "epoch": 0.4647321223269717, "grad_norm": 0.8671875, "learning_rate": 2.7863437117683034e-05, "loss": 1.2663, "step": 6316 }, { "epoch": 0.4648057024603357, "grad_norm": 0.75390625, "learning_rate": 2.7857683561059245e-05, "loss": 0.5894, "step": 6317 }, { "epoch": 0.4648792825936997, "grad_norm": 1.0546875, "learning_rate": 2.785192985106898e-05, "loss": 1.1378, "step": 6318 }, { "epoch": 0.4649528627270637, "grad_norm": 1.0, "learning_rate": 2.784617598802102e-05, "loss": 0.7876, "step": 6319 }, { "epoch": 0.46502644286042766, "grad_norm": 0.83984375, "learning_rate": 2.784042197222418e-05, "loss": 0.8514, "step": 6320 }, { "epoch": 0.46510002299379166, "grad_norm": 1.0703125, "learning_rate": 2.7834667803987247e-05, "loss": 1.0082, "step": 6321 }, { "epoch": 0.46517360312715567, "grad_norm": 0.94140625, "learning_rate": 2.782891348361905e-05, "loss": 1.0139, "step": 6322 }, { "epoch": 0.46524718326051967, "grad_norm": 0.8359375, "learning_rate": 2.7823159011428412e-05, "loss": 0.634, "step": 6323 }, { "epoch": 0.4653207633938837, "grad_norm": 0.734375, "learning_rate": 2.7817404387724154e-05, "loss": 0.6427, "step": 6324 }, { "epoch": 0.4653943435272476, "grad_norm": 0.76171875, "learning_rate": 2.7811649612815138e-05, "loss": 0.7685, "step": 6325 }, { "epoch": 0.4654679236606116, "grad_norm": 1.03125, "learning_rate": 2.7805894687010186e-05, "loss": 0.8788, "step": 6326 }, { "epoch": 0.46554150379397563, "grad_norm": 0.6171875, "learning_rate": 2.7800139610618176e-05, "loss": 0.6293, "step": 6327 }, { "epoch": 0.46561508392733963, "grad_norm": 0.66796875, "learning_rate": 2.7794384383947945e-05, "loss": 0.7029, "step": 6328 }, { "epoch": 0.4656886640607036, "grad_norm": 0.828125, "learning_rate": 2.7788629007308396e-05, "loss": 1.1446, "step": 6329 }, { "epoch": 0.4657622441940676, "grad_norm": 0.68359375, "learning_rate": 2.77828734810084e-05, "loss": 0.9256, "step": 6330 }, { "epoch": 0.4658358243274316, "grad_norm": 1.0078125, "learning_rate": 2.7777117805356834e-05, "loss": 1.0913, "step": 6331 }, { "epoch": 0.4659094044607956, "grad_norm": 1.046875, "learning_rate": 2.7771361980662596e-05, "loss": 1.1589, "step": 6332 }, { "epoch": 0.4659829845941596, "grad_norm": 0.765625, "learning_rate": 2.7765606007234597e-05, "loss": 0.7919, "step": 6333 }, { "epoch": 0.46605656472752355, "grad_norm": 0.94140625, "learning_rate": 2.775984988538175e-05, "loss": 0.9905, "step": 6334 }, { "epoch": 0.46613014486088755, "grad_norm": 0.87109375, "learning_rate": 2.7754093615412963e-05, "loss": 0.6927, "step": 6335 }, { "epoch": 0.46620372499425156, "grad_norm": 0.7890625, "learning_rate": 2.7748337197637186e-05, "loss": 0.8133, "step": 6336 }, { "epoch": 0.46627730512761556, "grad_norm": 0.76171875, "learning_rate": 2.7742580632363336e-05, "loss": 0.9428, "step": 6337 }, { "epoch": 0.4663508852609795, "grad_norm": 0.98828125, "learning_rate": 2.7736823919900367e-05, "loss": 0.9724, "step": 6338 }, { "epoch": 0.4664244653943435, "grad_norm": 0.984375, "learning_rate": 2.7731067060557224e-05, "loss": 0.9285, "step": 6339 }, { "epoch": 0.4664980455277075, "grad_norm": 0.859375, "learning_rate": 2.7725310054642866e-05, "loss": 1.1349, "step": 6340 }, { "epoch": 0.4665716256610715, "grad_norm": 0.76171875, "learning_rate": 2.7719552902466273e-05, "loss": 0.9095, "step": 6341 }, { "epoch": 0.4666452057944355, "grad_norm": 1.0, "learning_rate": 2.7713795604336407e-05, "loss": 1.5698, "step": 6342 }, { "epoch": 0.4667187859277995, "grad_norm": 0.83984375, "learning_rate": 2.770803816056226e-05, "loss": 0.9831, "step": 6343 }, { "epoch": 0.4667923660611635, "grad_norm": 0.99609375, "learning_rate": 2.7702280571452817e-05, "loss": 0.7587, "step": 6344 }, { "epoch": 0.4668659461945275, "grad_norm": 0.87890625, "learning_rate": 2.7696522837317085e-05, "loss": 0.9243, "step": 6345 }, { "epoch": 0.4669395263278915, "grad_norm": 1.015625, "learning_rate": 2.769076495846406e-05, "loss": 0.9187, "step": 6346 }, { "epoch": 0.46701310646125543, "grad_norm": 0.8828125, "learning_rate": 2.7685006935202768e-05, "loss": 0.8834, "step": 6347 }, { "epoch": 0.46708668659461944, "grad_norm": 0.83203125, "learning_rate": 2.767924876784221e-05, "loss": 0.8859, "step": 6348 }, { "epoch": 0.46716026672798344, "grad_norm": 0.84375, "learning_rate": 2.7673490456691442e-05, "loss": 1.0394, "step": 6349 }, { "epoch": 0.46723384686134745, "grad_norm": 0.8671875, "learning_rate": 2.7667732002059493e-05, "loss": 0.8982, "step": 6350 }, { "epoch": 0.46730742699471145, "grad_norm": 0.8359375, "learning_rate": 2.76619734042554e-05, "loss": 0.9029, "step": 6351 }, { "epoch": 0.4673810071280754, "grad_norm": 0.9296875, "learning_rate": 2.7656214663588226e-05, "loss": 0.7857, "step": 6352 }, { "epoch": 0.4674545872614394, "grad_norm": 0.94921875, "learning_rate": 2.765045578036703e-05, "loss": 1.067, "step": 6353 }, { "epoch": 0.4675281673948034, "grad_norm": 0.828125, "learning_rate": 2.7644696754900878e-05, "loss": 0.9241, "step": 6354 }, { "epoch": 0.4676017475281674, "grad_norm": 0.765625, "learning_rate": 2.7638937587498835e-05, "loss": 0.723, "step": 6355 }, { "epoch": 0.46767532766153136, "grad_norm": 0.8125, "learning_rate": 2.763317827847e-05, "loss": 1.4426, "step": 6356 }, { "epoch": 0.46774890779489536, "grad_norm": 0.8515625, "learning_rate": 2.762741882812347e-05, "loss": 1.2153, "step": 6357 }, { "epoch": 0.46782248792825937, "grad_norm": 0.8046875, "learning_rate": 2.7621659236768326e-05, "loss": 0.7467, "step": 6358 }, { "epoch": 0.46789606806162337, "grad_norm": 1.0, "learning_rate": 2.7615899504713687e-05, "loss": 1.0205, "step": 6359 }, { "epoch": 0.4679696481949874, "grad_norm": 0.70703125, "learning_rate": 2.761013963226866e-05, "loss": 0.689, "step": 6360 }, { "epoch": 0.4680432283283513, "grad_norm": 0.79296875, "learning_rate": 2.7604379619742378e-05, "loss": 0.7143, "step": 6361 }, { "epoch": 0.46811680846171533, "grad_norm": 0.7109375, "learning_rate": 2.7598619467443943e-05, "loss": 0.7523, "step": 6362 }, { "epoch": 0.46819038859507933, "grad_norm": 0.796875, "learning_rate": 2.7592859175682517e-05, "loss": 0.6483, "step": 6363 }, { "epoch": 0.46826396872844334, "grad_norm": 0.6875, "learning_rate": 2.7587098744767238e-05, "loss": 0.57, "step": 6364 }, { "epoch": 0.4683375488618073, "grad_norm": 0.91015625, "learning_rate": 2.7581338175007253e-05, "loss": 0.8446, "step": 6365 }, { "epoch": 0.4684111289951713, "grad_norm": 0.95703125, "learning_rate": 2.7575577466711733e-05, "loss": 1.0267, "step": 6366 }, { "epoch": 0.4684847091285353, "grad_norm": 1.078125, "learning_rate": 2.7569816620189825e-05, "loss": 0.8569, "step": 6367 }, { "epoch": 0.4685582892618993, "grad_norm": 0.8359375, "learning_rate": 2.7564055635750712e-05, "loss": 0.9914, "step": 6368 }, { "epoch": 0.4686318693952633, "grad_norm": 0.734375, "learning_rate": 2.7558294513703575e-05, "loss": 0.8706, "step": 6369 }, { "epoch": 0.46870544952862725, "grad_norm": 0.828125, "learning_rate": 2.7552533254357615e-05, "loss": 1.0821, "step": 6370 }, { "epoch": 0.46877902966199125, "grad_norm": 0.765625, "learning_rate": 2.7546771858022006e-05, "loss": 0.7812, "step": 6371 }, { "epoch": 0.46885260979535526, "grad_norm": 0.67578125, "learning_rate": 2.7541010325005963e-05, "loss": 0.8116, "step": 6372 }, { "epoch": 0.46892618992871926, "grad_norm": 0.9765625, "learning_rate": 2.7535248655618702e-05, "loss": 0.9588, "step": 6373 }, { "epoch": 0.4689997700620832, "grad_norm": 0.84375, "learning_rate": 2.7529486850169422e-05, "loss": 0.974, "step": 6374 }, { "epoch": 0.4690733501954472, "grad_norm": 0.98828125, "learning_rate": 2.7523724908967367e-05, "loss": 1.1405, "step": 6375 }, { "epoch": 0.4691469303288112, "grad_norm": 0.9140625, "learning_rate": 2.7517962832321763e-05, "loss": 1.1151, "step": 6376 }, { "epoch": 0.4692205104621752, "grad_norm": 1.0078125, "learning_rate": 2.751220062054185e-05, "loss": 0.965, "step": 6377 }, { "epoch": 0.4692940905955392, "grad_norm": 0.8828125, "learning_rate": 2.7506438273936874e-05, "loss": 0.6209, "step": 6378 }, { "epoch": 0.4693676707289032, "grad_norm": 0.96484375, "learning_rate": 2.7500675792816094e-05, "loss": 1.389, "step": 6379 }, { "epoch": 0.4694412508622672, "grad_norm": 1.0390625, "learning_rate": 2.7494913177488756e-05, "loss": 1.2503, "step": 6380 }, { "epoch": 0.4695148309956312, "grad_norm": 0.85546875, "learning_rate": 2.748915042826415e-05, "loss": 1.0505, "step": 6381 }, { "epoch": 0.4695884111289952, "grad_norm": 0.796875, "learning_rate": 2.7483387545451535e-05, "loss": 0.9022, "step": 6382 }, { "epoch": 0.46966199126235914, "grad_norm": 0.83203125, "learning_rate": 2.74776245293602e-05, "loss": 0.6629, "step": 6383 }, { "epoch": 0.46973557139572314, "grad_norm": 0.6953125, "learning_rate": 2.7471861380299447e-05, "loss": 0.7861, "step": 6384 }, { "epoch": 0.46980915152908714, "grad_norm": 0.9296875, "learning_rate": 2.7466098098578557e-05, "loss": 1.0548, "step": 6385 }, { "epoch": 0.46988273166245115, "grad_norm": 0.9140625, "learning_rate": 2.746033468450684e-05, "loss": 0.7586, "step": 6386 }, { "epoch": 0.46995631179581515, "grad_norm": 0.86328125, "learning_rate": 2.7454571138393603e-05, "loss": 1.3519, "step": 6387 }, { "epoch": 0.4700298919291791, "grad_norm": 0.94921875, "learning_rate": 2.7448807460548174e-05, "loss": 0.9955, "step": 6388 }, { "epoch": 0.4701034720625431, "grad_norm": 0.85546875, "learning_rate": 2.744304365127987e-05, "loss": 1.0774, "step": 6389 }, { "epoch": 0.4701770521959071, "grad_norm": 0.84375, "learning_rate": 2.7437279710898027e-05, "loss": 0.6787, "step": 6390 }, { "epoch": 0.4702506323292711, "grad_norm": 0.8359375, "learning_rate": 2.7431515639711992e-05, "loss": 1.358, "step": 6391 }, { "epoch": 0.4703242124626351, "grad_norm": 0.828125, "learning_rate": 2.7425751438031098e-05, "loss": 1.093, "step": 6392 }, { "epoch": 0.47039779259599906, "grad_norm": 0.7109375, "learning_rate": 2.7419987106164714e-05, "loss": 0.5847, "step": 6393 }, { "epoch": 0.47047137272936307, "grad_norm": 0.81640625, "learning_rate": 2.741422264442218e-05, "loss": 0.9338, "step": 6394 }, { "epoch": 0.4705449528627271, "grad_norm": 1.0, "learning_rate": 2.740845805311289e-05, "loss": 1.2731, "step": 6395 }, { "epoch": 0.4706185329960911, "grad_norm": 0.8828125, "learning_rate": 2.7402693332546198e-05, "loss": 1.114, "step": 6396 }, { "epoch": 0.470692113129455, "grad_norm": 0.98828125, "learning_rate": 2.739692848303149e-05, "loss": 1.4891, "step": 6397 }, { "epoch": 0.47076569326281903, "grad_norm": 1.0078125, "learning_rate": 2.7391163504878164e-05, "loss": 1.2744, "step": 6398 }, { "epoch": 0.47083927339618303, "grad_norm": 0.984375, "learning_rate": 2.7385398398395606e-05, "loss": 0.7885, "step": 6399 }, { "epoch": 0.47091285352954704, "grad_norm": 0.73046875, "learning_rate": 2.7379633163893226e-05, "loss": 0.6159, "step": 6400 }, { "epoch": 0.47098643366291104, "grad_norm": 0.90234375, "learning_rate": 2.737386780168042e-05, "loss": 1.0149, "step": 6401 }, { "epoch": 0.471060013796275, "grad_norm": 0.81640625, "learning_rate": 2.7368102312066618e-05, "loss": 0.9604, "step": 6402 }, { "epoch": 0.471133593929639, "grad_norm": 0.734375, "learning_rate": 2.7362336695361235e-05, "loss": 0.6421, "step": 6403 }, { "epoch": 0.471207174063003, "grad_norm": 1.0234375, "learning_rate": 2.735657095187371e-05, "loss": 1.1529, "step": 6404 }, { "epoch": 0.471280754196367, "grad_norm": 0.79296875, "learning_rate": 2.7350805081913466e-05, "loss": 0.7504, "step": 6405 }, { "epoch": 0.47135433432973095, "grad_norm": 0.9140625, "learning_rate": 2.7345039085789953e-05, "loss": 0.8606, "step": 6406 }, { "epoch": 0.47142791446309495, "grad_norm": 0.8984375, "learning_rate": 2.733927296381263e-05, "loss": 0.7918, "step": 6407 }, { "epoch": 0.47150149459645896, "grad_norm": 0.73828125, "learning_rate": 2.7333506716290937e-05, "loss": 0.9462, "step": 6408 }, { "epoch": 0.47157507472982296, "grad_norm": 0.94921875, "learning_rate": 2.7327740343534346e-05, "loss": 1.1555, "step": 6409 }, { "epoch": 0.47164865486318697, "grad_norm": 0.77734375, "learning_rate": 2.732197384585233e-05, "loss": 0.964, "step": 6410 }, { "epoch": 0.4717222349965509, "grad_norm": 0.82421875, "learning_rate": 2.7316207223554364e-05, "loss": 0.6545, "step": 6411 }, { "epoch": 0.4717958151299149, "grad_norm": 0.984375, "learning_rate": 2.731044047694993e-05, "loss": 1.1812, "step": 6412 }, { "epoch": 0.4718693952632789, "grad_norm": 1.015625, "learning_rate": 2.7304673606348514e-05, "loss": 1.0098, "step": 6413 }, { "epoch": 0.4719429753966429, "grad_norm": 0.828125, "learning_rate": 2.7298906612059628e-05, "loss": 0.5711, "step": 6414 }, { "epoch": 0.4720165555300069, "grad_norm": 0.8359375, "learning_rate": 2.729313949439276e-05, "loss": 0.9497, "step": 6415 }, { "epoch": 0.4720901356633709, "grad_norm": 0.83984375, "learning_rate": 2.7287372253657422e-05, "loss": 0.9081, "step": 6416 }, { "epoch": 0.4721637157967349, "grad_norm": 0.875, "learning_rate": 2.7281604890163144e-05, "loss": 0.9944, "step": 6417 }, { "epoch": 0.4722372959300989, "grad_norm": 0.9375, "learning_rate": 2.727583740421944e-05, "loss": 1.0096, "step": 6418 }, { "epoch": 0.4723108760634629, "grad_norm": 0.84765625, "learning_rate": 2.727006979613584e-05, "loss": 1.1038, "step": 6419 }, { "epoch": 0.47238445619682684, "grad_norm": 0.84765625, "learning_rate": 2.726430206622188e-05, "loss": 0.9111, "step": 6420 }, { "epoch": 0.47245803633019084, "grad_norm": 0.71875, "learning_rate": 2.7258534214787108e-05, "loss": 0.6363, "step": 6421 }, { "epoch": 0.47253161646355485, "grad_norm": 0.77734375, "learning_rate": 2.7252766242141066e-05, "loss": 0.745, "step": 6422 }, { "epoch": 0.47260519659691885, "grad_norm": 0.96484375, "learning_rate": 2.7246998148593313e-05, "loss": 0.906, "step": 6423 }, { "epoch": 0.4726787767302828, "grad_norm": 0.8359375, "learning_rate": 2.7241229934453417e-05, "loss": 0.6845, "step": 6424 }, { "epoch": 0.4727523568636468, "grad_norm": 0.9921875, "learning_rate": 2.7235461600030947e-05, "loss": 0.7243, "step": 6425 }, { "epoch": 0.4728259369970108, "grad_norm": 0.7109375, "learning_rate": 2.7229693145635472e-05, "loss": 0.8047, "step": 6426 }, { "epoch": 0.4728995171303748, "grad_norm": 1.1015625, "learning_rate": 2.7223924571576577e-05, "loss": 1.1103, "step": 6427 }, { "epoch": 0.4729730972637388, "grad_norm": 0.7578125, "learning_rate": 2.7218155878163847e-05, "loss": 0.7126, "step": 6428 }, { "epoch": 0.47304667739710277, "grad_norm": 0.8828125, "learning_rate": 2.7212387065706885e-05, "loss": 0.7595, "step": 6429 }, { "epoch": 0.47312025753046677, "grad_norm": 1.0078125, "learning_rate": 2.7206618134515284e-05, "loss": 1.0391, "step": 6430 }, { "epoch": 0.4731938376638308, "grad_norm": 0.7734375, "learning_rate": 2.7200849084898654e-05, "loss": 1.1466, "step": 6431 }, { "epoch": 0.4732674177971948, "grad_norm": 0.71875, "learning_rate": 2.719507991716662e-05, "loss": 0.9107, "step": 6432 }, { "epoch": 0.4733409979305587, "grad_norm": 0.85546875, "learning_rate": 2.718931063162879e-05, "loss": 0.9272, "step": 6433 }, { "epoch": 0.47341457806392273, "grad_norm": 0.80859375, "learning_rate": 2.7183541228594796e-05, "loss": 0.8903, "step": 6434 }, { "epoch": 0.47348815819728673, "grad_norm": 0.97265625, "learning_rate": 2.7177771708374257e-05, "loss": 1.2076, "step": 6435 }, { "epoch": 0.47356173833065074, "grad_norm": 0.8203125, "learning_rate": 2.7172002071276832e-05, "loss": 0.684, "step": 6436 }, { "epoch": 0.47363531846401474, "grad_norm": 0.765625, "learning_rate": 2.716623231761215e-05, "loss": 1.1711, "step": 6437 }, { "epoch": 0.4737088985973787, "grad_norm": 0.80078125, "learning_rate": 2.7160462447689877e-05, "loss": 0.6916, "step": 6438 }, { "epoch": 0.4737824787307427, "grad_norm": 0.91796875, "learning_rate": 2.715469246181967e-05, "loss": 0.7315, "step": 6439 }, { "epoch": 0.4738560588641067, "grad_norm": 0.8046875, "learning_rate": 2.714892236031118e-05, "loss": 1.0714, "step": 6440 }, { "epoch": 0.4739296389974707, "grad_norm": 0.75390625, "learning_rate": 2.7143152143474093e-05, "loss": 0.6242, "step": 6441 }, { "epoch": 0.47400321913083465, "grad_norm": 1.0390625, "learning_rate": 2.7137381811618073e-05, "loss": 1.0698, "step": 6442 }, { "epoch": 0.47407679926419866, "grad_norm": 0.75, "learning_rate": 2.7131611365052806e-05, "loss": 0.6787, "step": 6443 }, { "epoch": 0.47415037939756266, "grad_norm": 0.7578125, "learning_rate": 2.7125840804087987e-05, "loss": 0.756, "step": 6444 }, { "epoch": 0.47422395953092666, "grad_norm": 0.93359375, "learning_rate": 2.7120070129033302e-05, "loss": 0.7813, "step": 6445 }, { "epoch": 0.47429753966429067, "grad_norm": 1.0546875, "learning_rate": 2.7114299340198467e-05, "loss": 1.0591, "step": 6446 }, { "epoch": 0.4743711197976546, "grad_norm": 0.81640625, "learning_rate": 2.710852843789317e-05, "loss": 0.7675, "step": 6447 }, { "epoch": 0.4744446999310186, "grad_norm": 0.78125, "learning_rate": 2.7102757422427145e-05, "loss": 0.7435, "step": 6448 }, { "epoch": 0.4745182800643826, "grad_norm": 0.71875, "learning_rate": 2.7096986294110094e-05, "loss": 0.8249, "step": 6449 }, { "epoch": 0.47459186019774663, "grad_norm": 0.95703125, "learning_rate": 2.7091215053251745e-05, "loss": 0.8246, "step": 6450 }, { "epoch": 0.4746654403311106, "grad_norm": 0.7890625, "learning_rate": 2.708544370016184e-05, "loss": 0.9574, "step": 6451 }, { "epoch": 0.4747390204644746, "grad_norm": 0.7265625, "learning_rate": 2.707967223515011e-05, "loss": 0.989, "step": 6452 }, { "epoch": 0.4748126005978386, "grad_norm": 0.76953125, "learning_rate": 2.7073900658526295e-05, "loss": 1.0891, "step": 6453 }, { "epoch": 0.4748861807312026, "grad_norm": 0.8515625, "learning_rate": 2.7068128970600152e-05, "loss": 1.2637, "step": 6454 }, { "epoch": 0.4749597608645666, "grad_norm": 0.69921875, "learning_rate": 2.7062357171681435e-05, "loss": 0.8857, "step": 6455 }, { "epoch": 0.47503334099793054, "grad_norm": 0.96484375, "learning_rate": 2.7056585262079902e-05, "loss": 1.1733, "step": 6456 }, { "epoch": 0.47510692113129455, "grad_norm": 1.03125, "learning_rate": 2.705081324210532e-05, "loss": 1.2251, "step": 6457 }, { "epoch": 0.47518050126465855, "grad_norm": 1.1328125, "learning_rate": 2.7045041112067465e-05, "loss": 1.0351, "step": 6458 }, { "epoch": 0.47525408139802255, "grad_norm": 0.80859375, "learning_rate": 2.7039268872276124e-05, "loss": 0.6846, "step": 6459 }, { "epoch": 0.4753276615313865, "grad_norm": 0.8203125, "learning_rate": 2.703349652304107e-05, "loss": 0.6974, "step": 6460 }, { "epoch": 0.4754012416647505, "grad_norm": 0.8046875, "learning_rate": 2.7027724064672088e-05, "loss": 0.8048, "step": 6461 }, { "epoch": 0.4754748217981145, "grad_norm": 0.73828125, "learning_rate": 2.7021951497479003e-05, "loss": 0.6476, "step": 6462 }, { "epoch": 0.4755484019314785, "grad_norm": 0.78125, "learning_rate": 2.7016178821771586e-05, "loss": 1.1329, "step": 6463 }, { "epoch": 0.4756219820648425, "grad_norm": 0.703125, "learning_rate": 2.7010406037859663e-05, "loss": 0.6575, "step": 6464 }, { "epoch": 0.47569556219820647, "grad_norm": 0.859375, "learning_rate": 2.7004633146053043e-05, "loss": 0.8041, "step": 6465 }, { "epoch": 0.47576914233157047, "grad_norm": 0.87890625, "learning_rate": 2.699886014666156e-05, "loss": 1.2123, "step": 6466 }, { "epoch": 0.4758427224649345, "grad_norm": 0.77734375, "learning_rate": 2.6993087039995017e-05, "loss": 0.9338, "step": 6467 }, { "epoch": 0.4759163025982985, "grad_norm": 1.109375, "learning_rate": 2.6987313826363264e-05, "loss": 1.7897, "step": 6468 }, { "epoch": 0.47598988273166243, "grad_norm": 1.0, "learning_rate": 2.6981540506076126e-05, "loss": 1.1717, "step": 6469 }, { "epoch": 0.47606346286502643, "grad_norm": 0.88671875, "learning_rate": 2.6975767079443454e-05, "loss": 1.0878, "step": 6470 }, { "epoch": 0.47613704299839044, "grad_norm": 0.80078125, "learning_rate": 2.696999354677509e-05, "loss": 0.8289, "step": 6471 }, { "epoch": 0.47621062313175444, "grad_norm": 0.8984375, "learning_rate": 2.6964219908380895e-05, "loss": 0.9862, "step": 6472 }, { "epoch": 0.47628420326511844, "grad_norm": 0.70703125, "learning_rate": 2.6958446164570734e-05, "loss": 0.7615, "step": 6473 }, { "epoch": 0.4763577833984824, "grad_norm": 1.2421875, "learning_rate": 2.6952672315654458e-05, "loss": 1.434, "step": 6474 }, { "epoch": 0.4764313635318464, "grad_norm": 1.0703125, "learning_rate": 2.694689836194195e-05, "loss": 1.4117, "step": 6475 }, { "epoch": 0.4765049436652104, "grad_norm": 0.80859375, "learning_rate": 2.694112430374309e-05, "loss": 0.7745, "step": 6476 }, { "epoch": 0.4765785237985744, "grad_norm": 0.83203125, "learning_rate": 2.6935350141367742e-05, "loss": 0.9303, "step": 6477 }, { "epoch": 0.47665210393193835, "grad_norm": 0.88671875, "learning_rate": 2.6929575875125817e-05, "loss": 0.9707, "step": 6478 }, { "epoch": 0.47672568406530236, "grad_norm": 0.8984375, "learning_rate": 2.6923801505327196e-05, "loss": 0.8115, "step": 6479 }, { "epoch": 0.47679926419866636, "grad_norm": 0.72265625, "learning_rate": 2.6918027032281784e-05, "loss": 0.6961, "step": 6480 }, { "epoch": 0.47687284433203037, "grad_norm": 0.87109375, "learning_rate": 2.691225245629948e-05, "loss": 0.7492, "step": 6481 }, { "epoch": 0.47694642446539437, "grad_norm": 0.90625, "learning_rate": 2.690647777769021e-05, "loss": 1.1451, "step": 6482 }, { "epoch": 0.4770200045987583, "grad_norm": 0.984375, "learning_rate": 2.6900702996763866e-05, "loss": 0.9732, "step": 6483 }, { "epoch": 0.4770935847321223, "grad_norm": 1.125, "learning_rate": 2.689492811383038e-05, "loss": 1.2119, "step": 6484 }, { "epoch": 0.4771671648654863, "grad_norm": 0.7109375, "learning_rate": 2.688915312919969e-05, "loss": 0.5543, "step": 6485 }, { "epoch": 0.47724074499885033, "grad_norm": 0.828125, "learning_rate": 2.6883378043181713e-05, "loss": 1.4937, "step": 6486 }, { "epoch": 0.4773143251322143, "grad_norm": 1.078125, "learning_rate": 2.68776028560864e-05, "loss": 1.0516, "step": 6487 }, { "epoch": 0.4773879052655783, "grad_norm": 0.9453125, "learning_rate": 2.6871827568223684e-05, "loss": 1.3265, "step": 6488 }, { "epoch": 0.4774614853989423, "grad_norm": 0.6875, "learning_rate": 2.6866052179903522e-05, "loss": 1.0175, "step": 6489 }, { "epoch": 0.4775350655323063, "grad_norm": 0.9375, "learning_rate": 2.686027669143586e-05, "loss": 1.1962, "step": 6490 }, { "epoch": 0.4776086456656703, "grad_norm": 0.859375, "learning_rate": 2.6854501103130657e-05, "loss": 0.9129, "step": 6491 }, { "epoch": 0.47768222579903424, "grad_norm": 0.828125, "learning_rate": 2.6848725415297887e-05, "loss": 0.8448, "step": 6492 }, { "epoch": 0.47775580593239825, "grad_norm": 0.94140625, "learning_rate": 2.6842949628247516e-05, "loss": 1.237, "step": 6493 }, { "epoch": 0.47782938606576225, "grad_norm": 0.72265625, "learning_rate": 2.6837173742289524e-05, "loss": 0.9473, "step": 6494 }, { "epoch": 0.47790296619912626, "grad_norm": 0.765625, "learning_rate": 2.683139775773388e-05, "loss": 0.8712, "step": 6495 }, { "epoch": 0.4779765463324902, "grad_norm": 0.9140625, "learning_rate": 2.6825621674890584e-05, "loss": 0.7254, "step": 6496 }, { "epoch": 0.4780501264658542, "grad_norm": 0.984375, "learning_rate": 2.681984549406962e-05, "loss": 1.2051, "step": 6497 }, { "epoch": 0.4781237065992182, "grad_norm": 0.62890625, "learning_rate": 2.6814069215580978e-05, "loss": 0.8268, "step": 6498 }, { "epoch": 0.4781972867325822, "grad_norm": 0.89453125, "learning_rate": 2.680829283973467e-05, "loss": 0.9537, "step": 6499 }, { "epoch": 0.4782708668659462, "grad_norm": 0.859375, "learning_rate": 2.6802516366840706e-05, "loss": 0.7822, "step": 6500 }, { "epoch": 0.47834444699931017, "grad_norm": 0.73046875, "learning_rate": 2.6796739797209087e-05, "loss": 0.791, "step": 6501 }, { "epoch": 0.4784180271326742, "grad_norm": 0.828125, "learning_rate": 2.679096313114984e-05, "loss": 1.1303, "step": 6502 }, { "epoch": 0.4784916072660382, "grad_norm": 0.9140625, "learning_rate": 2.6785186368972987e-05, "loss": 1.2051, "step": 6503 }, { "epoch": 0.4785651873994022, "grad_norm": 0.71484375, "learning_rate": 2.677940951098855e-05, "loss": 1.0447, "step": 6504 }, { "epoch": 0.47863876753276613, "grad_norm": 0.9140625, "learning_rate": 2.677363255750656e-05, "loss": 0.9259, "step": 6505 }, { "epoch": 0.47871234766613013, "grad_norm": 0.9765625, "learning_rate": 2.676785550883707e-05, "loss": 0.8799, "step": 6506 }, { "epoch": 0.47878592779949414, "grad_norm": 0.83203125, "learning_rate": 2.676207836529011e-05, "loss": 1.212, "step": 6507 }, { "epoch": 0.47885950793285814, "grad_norm": 0.796875, "learning_rate": 2.6756301127175732e-05, "loss": 0.8355, "step": 6508 }, { "epoch": 0.47893308806622215, "grad_norm": 0.6875, "learning_rate": 2.6750523794803988e-05, "loss": 0.687, "step": 6509 }, { "epoch": 0.4790066681995861, "grad_norm": 0.81640625, "learning_rate": 2.674474636848494e-05, "loss": 0.7383, "step": 6510 }, { "epoch": 0.4790802483329501, "grad_norm": 0.6640625, "learning_rate": 2.6738968848528647e-05, "loss": 0.742, "step": 6511 }, { "epoch": 0.4791538284663141, "grad_norm": 0.7265625, "learning_rate": 2.6733191235245185e-05, "loss": 0.6763, "step": 6512 }, { "epoch": 0.4792274085996781, "grad_norm": 0.9609375, "learning_rate": 2.672741352894462e-05, "loss": 0.8088, "step": 6513 }, { "epoch": 0.47930098873304205, "grad_norm": 0.859375, "learning_rate": 2.672163572993704e-05, "loss": 1.2608, "step": 6514 }, { "epoch": 0.47937456886640606, "grad_norm": 1.21875, "learning_rate": 2.6715857838532516e-05, "loss": 1.5293, "step": 6515 }, { "epoch": 0.47944814899977006, "grad_norm": 0.7578125, "learning_rate": 2.6710079855041142e-05, "loss": 0.7698, "step": 6516 }, { "epoch": 0.47952172913313407, "grad_norm": 0.69140625, "learning_rate": 2.6704301779773016e-05, "loss": 0.8382, "step": 6517 }, { "epoch": 0.47959530926649807, "grad_norm": 0.9140625, "learning_rate": 2.6698523613038223e-05, "loss": 1.1378, "step": 6518 }, { "epoch": 0.479668889399862, "grad_norm": 0.9609375, "learning_rate": 2.6692745355146887e-05, "loss": 1.0447, "step": 6519 }, { "epoch": 0.479742469533226, "grad_norm": 1.078125, "learning_rate": 2.66869670064091e-05, "loss": 0.9409, "step": 6520 }, { "epoch": 0.47981604966659, "grad_norm": 0.73828125, "learning_rate": 2.668118856713498e-05, "loss": 0.7465, "step": 6521 }, { "epoch": 0.47988962979995403, "grad_norm": 0.6328125, "learning_rate": 2.6675410037634646e-05, "loss": 0.6655, "step": 6522 }, { "epoch": 0.479963209933318, "grad_norm": 0.72265625, "learning_rate": 2.6669631418218223e-05, "loss": 0.7218, "step": 6523 }, { "epoch": 0.480036790066682, "grad_norm": 0.80859375, "learning_rate": 2.666385270919583e-05, "loss": 0.81, "step": 6524 }, { "epoch": 0.480110370200046, "grad_norm": 0.80078125, "learning_rate": 2.6658073910877603e-05, "loss": 0.8634, "step": 6525 }, { "epoch": 0.48018395033341, "grad_norm": 0.7890625, "learning_rate": 2.665229502357368e-05, "loss": 0.8237, "step": 6526 }, { "epoch": 0.480257530466774, "grad_norm": 0.86328125, "learning_rate": 2.6646516047594206e-05, "loss": 0.9938, "step": 6527 }, { "epoch": 0.48033111060013794, "grad_norm": 0.80078125, "learning_rate": 2.6640736983249332e-05, "loss": 0.7124, "step": 6528 }, { "epoch": 0.48040469073350195, "grad_norm": 0.8203125, "learning_rate": 2.66349578308492e-05, "loss": 0.9326, "step": 6529 }, { "epoch": 0.48047827086686595, "grad_norm": 0.98828125, "learning_rate": 2.6629178590703968e-05, "loss": 1.2848, "step": 6530 }, { "epoch": 0.48055185100022996, "grad_norm": 0.96484375, "learning_rate": 2.6623399263123792e-05, "loss": 1.0676, "step": 6531 }, { "epoch": 0.4806254311335939, "grad_norm": 0.84375, "learning_rate": 2.6617619848418852e-05, "loss": 0.6388, "step": 6532 }, { "epoch": 0.4806990112669579, "grad_norm": 0.86328125, "learning_rate": 2.661184034689931e-05, "loss": 0.8049, "step": 6533 }, { "epoch": 0.4807725914003219, "grad_norm": 0.953125, "learning_rate": 2.6606060758875333e-05, "loss": 1.0493, "step": 6534 }, { "epoch": 0.4808461715336859, "grad_norm": 0.77734375, "learning_rate": 2.660028108465712e-05, "loss": 0.8408, "step": 6535 }, { "epoch": 0.4809197516670499, "grad_norm": 1.09375, "learning_rate": 2.6594501324554833e-05, "loss": 1.2574, "step": 6536 }, { "epoch": 0.48099333180041387, "grad_norm": 0.9921875, "learning_rate": 2.6588721478878682e-05, "loss": 1.0273, "step": 6537 }, { "epoch": 0.4810669119337779, "grad_norm": 0.9765625, "learning_rate": 2.6582941547938832e-05, "loss": 0.9924, "step": 6538 }, { "epoch": 0.4811404920671419, "grad_norm": 0.9921875, "learning_rate": 2.6577161532045515e-05, "loss": 0.9943, "step": 6539 }, { "epoch": 0.4812140722005059, "grad_norm": 0.90234375, "learning_rate": 2.6571381431508913e-05, "loss": 1.3333, "step": 6540 }, { "epoch": 0.48128765233386983, "grad_norm": 0.8203125, "learning_rate": 2.6565601246639245e-05, "loss": 0.9713, "step": 6541 }, { "epoch": 0.48136123246723383, "grad_norm": 0.77734375, "learning_rate": 2.6559820977746703e-05, "loss": 0.7901, "step": 6542 }, { "epoch": 0.48143481260059784, "grad_norm": 0.76953125, "learning_rate": 2.655404062514152e-05, "loss": 0.9806, "step": 6543 }, { "epoch": 0.48150839273396184, "grad_norm": 0.6796875, "learning_rate": 2.6548260189133904e-05, "loss": 0.6986, "step": 6544 }, { "epoch": 0.48158197286732585, "grad_norm": 1.015625, "learning_rate": 2.6542479670034098e-05, "loss": 1.0739, "step": 6545 }, { "epoch": 0.4816555530006898, "grad_norm": 0.703125, "learning_rate": 2.6536699068152322e-05, "loss": 0.6641, "step": 6546 }, { "epoch": 0.4817291331340538, "grad_norm": 0.83984375, "learning_rate": 2.6530918383798804e-05, "loss": 1.1925, "step": 6547 }, { "epoch": 0.4818027132674178, "grad_norm": 0.82421875, "learning_rate": 2.6525137617283797e-05, "loss": 0.7034, "step": 6548 }, { "epoch": 0.4818762934007818, "grad_norm": 0.65234375, "learning_rate": 2.6519356768917524e-05, "loss": 0.6368, "step": 6549 }, { "epoch": 0.48194987353414576, "grad_norm": 0.73046875, "learning_rate": 2.6513575839010246e-05, "loss": 0.8171, "step": 6550 }, { "epoch": 0.48202345366750976, "grad_norm": 0.8125, "learning_rate": 2.6507794827872212e-05, "loss": 0.9612, "step": 6551 }, { "epoch": 0.48209703380087376, "grad_norm": 0.8359375, "learning_rate": 2.6502013735813676e-05, "loss": 0.8156, "step": 6552 }, { "epoch": 0.48217061393423777, "grad_norm": 0.7578125, "learning_rate": 2.64962325631449e-05, "loss": 0.9645, "step": 6553 }, { "epoch": 0.4822441940676018, "grad_norm": 0.91796875, "learning_rate": 2.649045131017615e-05, "loss": 1.0374, "step": 6554 }, { "epoch": 0.4823177742009657, "grad_norm": 0.7578125, "learning_rate": 2.6484669977217696e-05, "loss": 0.697, "step": 6555 }, { "epoch": 0.4823913543343297, "grad_norm": 0.71875, "learning_rate": 2.6478888564579808e-05, "loss": 0.7014, "step": 6556 }, { "epoch": 0.48246493446769373, "grad_norm": 1.0234375, "learning_rate": 2.647310707257276e-05, "loss": 1.2796, "step": 6557 }, { "epoch": 0.48253851460105773, "grad_norm": 1.015625, "learning_rate": 2.6467325501506834e-05, "loss": 0.9344, "step": 6558 }, { "epoch": 0.4826120947344217, "grad_norm": 0.734375, "learning_rate": 2.646154385169232e-05, "loss": 0.6295, "step": 6559 }, { "epoch": 0.4826856748677857, "grad_norm": 0.85546875, "learning_rate": 2.6455762123439522e-05, "loss": 0.8593, "step": 6560 }, { "epoch": 0.4827592550011497, "grad_norm": 0.86328125, "learning_rate": 2.6449980317058708e-05, "loss": 0.743, "step": 6561 }, { "epoch": 0.4828328351345137, "grad_norm": 0.734375, "learning_rate": 2.6444198432860197e-05, "loss": 1.0652, "step": 6562 }, { "epoch": 0.4829064152678777, "grad_norm": 0.890625, "learning_rate": 2.6438416471154275e-05, "loss": 0.6952, "step": 6563 }, { "epoch": 0.48297999540124165, "grad_norm": 0.7890625, "learning_rate": 2.643263443225126e-05, "loss": 1.2082, "step": 6564 }, { "epoch": 0.48305357553460565, "grad_norm": 0.94921875, "learning_rate": 2.6426852316461465e-05, "loss": 0.8351, "step": 6565 }, { "epoch": 0.48312715566796965, "grad_norm": 0.8359375, "learning_rate": 2.6421070124095194e-05, "loss": 0.7706, "step": 6566 }, { "epoch": 0.48320073580133366, "grad_norm": 0.76171875, "learning_rate": 2.6415287855462784e-05, "loss": 0.6269, "step": 6567 }, { "epoch": 0.4832743159346976, "grad_norm": 1.1484375, "learning_rate": 2.6409505510874538e-05, "loss": 1.1887, "step": 6568 }, { "epoch": 0.4833478960680616, "grad_norm": 0.85546875, "learning_rate": 2.64037230906408e-05, "loss": 0.6661, "step": 6569 }, { "epoch": 0.4834214762014256, "grad_norm": 0.875, "learning_rate": 2.639794059507189e-05, "loss": 0.8258, "step": 6570 }, { "epoch": 0.4834950563347896, "grad_norm": 0.9609375, "learning_rate": 2.6392158024478154e-05, "loss": 1.0068, "step": 6571 }, { "epoch": 0.4835686364681536, "grad_norm": 0.9609375, "learning_rate": 2.6386375379169918e-05, "loss": 1.1187, "step": 6572 }, { "epoch": 0.48364221660151757, "grad_norm": 0.8359375, "learning_rate": 2.6380592659457543e-05, "loss": 0.9549, "step": 6573 }, { "epoch": 0.4837157967348816, "grad_norm": 0.828125, "learning_rate": 2.6374809865651363e-05, "loss": 1.1353, "step": 6574 }, { "epoch": 0.4837893768682456, "grad_norm": 0.94140625, "learning_rate": 2.6369026998061736e-05, "loss": 1.303, "step": 6575 }, { "epoch": 0.4838629570016096, "grad_norm": 0.93359375, "learning_rate": 2.6363244056999014e-05, "loss": 1.2945, "step": 6576 }, { "epoch": 0.48393653713497353, "grad_norm": 0.921875, "learning_rate": 2.635746104277356e-05, "loss": 1.0926, "step": 6577 }, { "epoch": 0.48401011726833754, "grad_norm": 0.875, "learning_rate": 2.6351677955695732e-05, "loss": 0.8453, "step": 6578 }, { "epoch": 0.48408369740170154, "grad_norm": 0.9765625, "learning_rate": 2.6345894796075904e-05, "loss": 0.7784, "step": 6579 }, { "epoch": 0.48415727753506554, "grad_norm": 0.8203125, "learning_rate": 2.6340111564224447e-05, "loss": 1.159, "step": 6580 }, { "epoch": 0.48423085766842955, "grad_norm": 0.8515625, "learning_rate": 2.6334328260451728e-05, "loss": 0.9067, "step": 6581 }, { "epoch": 0.4843044378017935, "grad_norm": 0.8046875, "learning_rate": 2.6328544885068136e-05, "loss": 0.8354, "step": 6582 }, { "epoch": 0.4843780179351575, "grad_norm": 0.71875, "learning_rate": 2.6322761438384048e-05, "loss": 0.6787, "step": 6583 }, { "epoch": 0.4844515980685215, "grad_norm": 0.8359375, "learning_rate": 2.631697792070985e-05, "loss": 1.0695, "step": 6584 }, { "epoch": 0.4845251782018855, "grad_norm": 0.8359375, "learning_rate": 2.631119433235593e-05, "loss": 0.9824, "step": 6585 }, { "epoch": 0.48459875833524946, "grad_norm": 1.0, "learning_rate": 2.630541067363269e-05, "loss": 1.0727, "step": 6586 }, { "epoch": 0.48467233846861346, "grad_norm": 0.859375, "learning_rate": 2.6299626944850537e-05, "loss": 0.7621, "step": 6587 }, { "epoch": 0.48474591860197747, "grad_norm": 0.9140625, "learning_rate": 2.6293843146319846e-05, "loss": 0.7323, "step": 6588 }, { "epoch": 0.48481949873534147, "grad_norm": 0.9609375, "learning_rate": 2.6288059278351047e-05, "loss": 0.9427, "step": 6589 }, { "epoch": 0.4848930788687055, "grad_norm": 0.8671875, "learning_rate": 2.628227534125453e-05, "loss": 1.0747, "step": 6590 }, { "epoch": 0.4849666590020694, "grad_norm": 0.64453125, "learning_rate": 2.627649133534072e-05, "loss": 0.6269, "step": 6591 }, { "epoch": 0.4850402391354334, "grad_norm": 0.9609375, "learning_rate": 2.627070726092003e-05, "loss": 1.057, "step": 6592 }, { "epoch": 0.48511381926879743, "grad_norm": 0.88671875, "learning_rate": 2.6264923118302882e-05, "loss": 0.838, "step": 6593 }, { "epoch": 0.48518739940216143, "grad_norm": 0.80078125, "learning_rate": 2.625913890779971e-05, "loss": 0.9063, "step": 6594 }, { "epoch": 0.4852609795355254, "grad_norm": 0.90234375, "learning_rate": 2.6253354629720918e-05, "loss": 0.9571, "step": 6595 }, { "epoch": 0.4853345596688894, "grad_norm": 0.78515625, "learning_rate": 2.6247570284376954e-05, "loss": 0.859, "step": 6596 }, { "epoch": 0.4854081398022534, "grad_norm": 0.99609375, "learning_rate": 2.624178587207825e-05, "loss": 1.0043, "step": 6597 }, { "epoch": 0.4854817199356174, "grad_norm": 0.640625, "learning_rate": 2.623600139313524e-05, "loss": 0.5801, "step": 6598 }, { "epoch": 0.4855553000689814, "grad_norm": 0.828125, "learning_rate": 2.623021684785837e-05, "loss": 0.8346, "step": 6599 }, { "epoch": 0.48562888020234535, "grad_norm": 1.5234375, "learning_rate": 2.6224432236558088e-05, "loss": 0.663, "step": 6600 }, { "epoch": 0.48570246033570935, "grad_norm": 0.7734375, "learning_rate": 2.6218647559544844e-05, "loss": 1.0242, "step": 6601 }, { "epoch": 0.48577604046907336, "grad_norm": 0.93359375, "learning_rate": 2.621286281712908e-05, "loss": 0.8787, "step": 6602 }, { "epoch": 0.48584962060243736, "grad_norm": 0.83203125, "learning_rate": 2.6207078009621266e-05, "loss": 0.6553, "step": 6603 }, { "epoch": 0.4859232007358013, "grad_norm": 0.6484375, "learning_rate": 2.620129313733185e-05, "loss": 0.7066, "step": 6604 }, { "epoch": 0.4859967808691653, "grad_norm": 0.7890625, "learning_rate": 2.6195508200571305e-05, "loss": 0.9585, "step": 6605 }, { "epoch": 0.4860703610025293, "grad_norm": 0.9375, "learning_rate": 2.6189723199650084e-05, "loss": 1.4974, "step": 6606 }, { "epoch": 0.4861439411358933, "grad_norm": 1.171875, "learning_rate": 2.6183938134878673e-05, "loss": 1.0687, "step": 6607 }, { "epoch": 0.4862175212692573, "grad_norm": 0.66015625, "learning_rate": 2.6178153006567542e-05, "loss": 0.5521, "step": 6608 }, { "epoch": 0.4862911014026213, "grad_norm": 0.8515625, "learning_rate": 2.617236781502716e-05, "loss": 1.0084, "step": 6609 }, { "epoch": 0.4863646815359853, "grad_norm": 0.7578125, "learning_rate": 2.6166582560568016e-05, "loss": 0.8275, "step": 6610 }, { "epoch": 0.4864382616693493, "grad_norm": 0.7265625, "learning_rate": 2.616079724350058e-05, "loss": 0.702, "step": 6611 }, { "epoch": 0.4865118418027133, "grad_norm": 0.7421875, "learning_rate": 2.6155011864135355e-05, "loss": 0.8587, "step": 6612 }, { "epoch": 0.48658542193607723, "grad_norm": 0.63671875, "learning_rate": 2.614922642278282e-05, "loss": 0.635, "step": 6613 }, { "epoch": 0.48665900206944124, "grad_norm": 0.80078125, "learning_rate": 2.614344091975347e-05, "loss": 1.03, "step": 6614 }, { "epoch": 0.48673258220280524, "grad_norm": 1.0625, "learning_rate": 2.6137655355357822e-05, "loss": 1.0933, "step": 6615 }, { "epoch": 0.48680616233616925, "grad_norm": 0.85546875, "learning_rate": 2.6131869729906344e-05, "loss": 0.9497, "step": 6616 }, { "epoch": 0.48687974246953325, "grad_norm": 0.828125, "learning_rate": 2.6126084043709563e-05, "loss": 0.6989, "step": 6617 }, { "epoch": 0.4869533226028972, "grad_norm": 0.92578125, "learning_rate": 2.6120298297077978e-05, "loss": 0.7913, "step": 6618 }, { "epoch": 0.4870269027362612, "grad_norm": 0.73828125, "learning_rate": 2.6114512490322096e-05, "loss": 0.6741, "step": 6619 }, { "epoch": 0.4871004828696252, "grad_norm": 0.9140625, "learning_rate": 2.6108726623752434e-05, "loss": 0.7712, "step": 6620 }, { "epoch": 0.4871740630029892, "grad_norm": 0.7890625, "learning_rate": 2.6102940697679514e-05, "loss": 0.8339, "step": 6621 }, { "epoch": 0.48724764313635316, "grad_norm": 0.703125, "learning_rate": 2.609715471241384e-05, "loss": 0.8066, "step": 6622 }, { "epoch": 0.48732122326971716, "grad_norm": 1.0703125, "learning_rate": 2.6091368668265954e-05, "loss": 1.4414, "step": 6623 }, { "epoch": 0.48739480340308117, "grad_norm": 0.71875, "learning_rate": 2.608558256554637e-05, "loss": 1.1509, "step": 6624 }, { "epoch": 0.48746838353644517, "grad_norm": 0.92578125, "learning_rate": 2.6079796404565622e-05, "loss": 1.0638, "step": 6625 }, { "epoch": 0.4875419636698092, "grad_norm": 0.94140625, "learning_rate": 2.6074010185634235e-05, "loss": 1.0043, "step": 6626 }, { "epoch": 0.4876155438031731, "grad_norm": 0.8984375, "learning_rate": 2.606822390906275e-05, "loss": 1.0775, "step": 6627 }, { "epoch": 0.48768912393653713, "grad_norm": 0.99609375, "learning_rate": 2.6062437575161708e-05, "loss": 1.2867, "step": 6628 }, { "epoch": 0.48776270406990113, "grad_norm": 1.078125, "learning_rate": 2.605665118424165e-05, "loss": 1.2731, "step": 6629 }, { "epoch": 0.48783628420326514, "grad_norm": 0.72265625, "learning_rate": 2.6050864736613112e-05, "loss": 0.72, "step": 6630 }, { "epoch": 0.4879098643366291, "grad_norm": 0.87109375, "learning_rate": 2.6045078232586657e-05, "loss": 0.9182, "step": 6631 }, { "epoch": 0.4879834444699931, "grad_norm": 0.97265625, "learning_rate": 2.603929167247282e-05, "loss": 1.1477, "step": 6632 }, { "epoch": 0.4880570246033571, "grad_norm": 0.74609375, "learning_rate": 2.6033505056582164e-05, "loss": 1.0072, "step": 6633 }, { "epoch": 0.4881306047367211, "grad_norm": 0.828125, "learning_rate": 2.6027718385225245e-05, "loss": 0.8941, "step": 6634 }, { "epoch": 0.4882041848700851, "grad_norm": 0.671875, "learning_rate": 2.6021931658712624e-05, "loss": 0.7858, "step": 6635 }, { "epoch": 0.48827776500344905, "grad_norm": 0.609375, "learning_rate": 2.601614487735486e-05, "loss": 0.5726, "step": 6636 }, { "epoch": 0.48835134513681305, "grad_norm": 0.90234375, "learning_rate": 2.601035804146252e-05, "loss": 0.9406, "step": 6637 }, { "epoch": 0.48842492527017706, "grad_norm": 0.94921875, "learning_rate": 2.600457115134617e-05, "loss": 1.0871, "step": 6638 }, { "epoch": 0.48849850540354106, "grad_norm": 0.99609375, "learning_rate": 2.5998784207316378e-05, "loss": 0.9156, "step": 6639 }, { "epoch": 0.488572085536905, "grad_norm": 0.9140625, "learning_rate": 2.599299720968373e-05, "loss": 0.835, "step": 6640 }, { "epoch": 0.488645665670269, "grad_norm": 0.87890625, "learning_rate": 2.5987210158758797e-05, "loss": 0.8283, "step": 6641 }, { "epoch": 0.488719245803633, "grad_norm": 0.73828125, "learning_rate": 2.598142305485216e-05, "loss": 0.8809, "step": 6642 }, { "epoch": 0.488792825936997, "grad_norm": 0.78515625, "learning_rate": 2.5975635898274397e-05, "loss": 0.689, "step": 6643 }, { "epoch": 0.488866406070361, "grad_norm": 0.796875, "learning_rate": 2.596984868933611e-05, "loss": 0.8381, "step": 6644 }, { "epoch": 0.488939986203725, "grad_norm": 0.78125, "learning_rate": 2.5964061428347862e-05, "loss": 0.8655, "step": 6645 }, { "epoch": 0.489013566337089, "grad_norm": 1.0390625, "learning_rate": 2.5958274115620262e-05, "loss": 1.0838, "step": 6646 }, { "epoch": 0.489087146470453, "grad_norm": 0.9453125, "learning_rate": 2.59524867514639e-05, "loss": 1.3738, "step": 6647 }, { "epoch": 0.489160726603817, "grad_norm": 0.796875, "learning_rate": 2.5946699336189373e-05, "loss": 1.0237, "step": 6648 }, { "epoch": 0.48923430673718094, "grad_norm": 1.078125, "learning_rate": 2.5940911870107288e-05, "loss": 0.9828, "step": 6649 }, { "epoch": 0.48930788687054494, "grad_norm": 0.921875, "learning_rate": 2.593512435352823e-05, "loss": 1.157, "step": 6650 }, { "epoch": 0.48938146700390894, "grad_norm": 0.8671875, "learning_rate": 2.592933678676282e-05, "loss": 0.7981, "step": 6651 }, { "epoch": 0.48945504713727295, "grad_norm": 0.8515625, "learning_rate": 2.592354917012166e-05, "loss": 1.1765, "step": 6652 }, { "epoch": 0.48952862727063695, "grad_norm": 0.8125, "learning_rate": 2.5917761503915355e-05, "loss": 1.0321, "step": 6653 }, { "epoch": 0.4896022074040009, "grad_norm": 0.76171875, "learning_rate": 2.5911973788454525e-05, "loss": 0.7883, "step": 6654 }, { "epoch": 0.4896757875373649, "grad_norm": 0.91015625, "learning_rate": 2.5906186024049784e-05, "loss": 0.9707, "step": 6655 }, { "epoch": 0.4897493676707289, "grad_norm": 0.7109375, "learning_rate": 2.5900398211011755e-05, "loss": 0.7421, "step": 6656 }, { "epoch": 0.4898229478040929, "grad_norm": 1.09375, "learning_rate": 2.5894610349651055e-05, "loss": 0.833, "step": 6657 }, { "epoch": 0.48989652793745686, "grad_norm": 0.65234375, "learning_rate": 2.588882244027831e-05, "loss": 0.6903, "step": 6658 }, { "epoch": 0.48997010807082086, "grad_norm": 1.09375, "learning_rate": 2.5883034483204133e-05, "loss": 0.9731, "step": 6659 }, { "epoch": 0.49004368820418487, "grad_norm": 0.91015625, "learning_rate": 2.5877246478739165e-05, "loss": 1.0695, "step": 6660 }, { "epoch": 0.4901172683375489, "grad_norm": 0.78515625, "learning_rate": 2.587145842719404e-05, "loss": 0.5633, "step": 6661 }, { "epoch": 0.4901908484709129, "grad_norm": 0.734375, "learning_rate": 2.5865670328879387e-05, "loss": 0.8886, "step": 6662 }, { "epoch": 0.4902644286042768, "grad_norm": 0.9296875, "learning_rate": 2.5859882184105844e-05, "loss": 1.2564, "step": 6663 }, { "epoch": 0.49033800873764083, "grad_norm": 0.89453125, "learning_rate": 2.585409399318404e-05, "loss": 0.7831, "step": 6664 }, { "epoch": 0.49041158887100483, "grad_norm": 1.0390625, "learning_rate": 2.5848305756424635e-05, "loss": 1.1321, "step": 6665 }, { "epoch": 0.49048516900436884, "grad_norm": 1.0859375, "learning_rate": 2.5842517474138254e-05, "loss": 1.2362, "step": 6666 }, { "epoch": 0.4905587491377328, "grad_norm": 0.77734375, "learning_rate": 2.583672914663555e-05, "loss": 0.9461, "step": 6667 }, { "epoch": 0.4906323292710968, "grad_norm": 1.078125, "learning_rate": 2.583094077422718e-05, "loss": 1.2423, "step": 6668 }, { "epoch": 0.4907059094044608, "grad_norm": 0.86328125, "learning_rate": 2.5825152357223792e-05, "loss": 0.6962, "step": 6669 }, { "epoch": 0.4907794895378248, "grad_norm": 0.72265625, "learning_rate": 2.5819363895936026e-05, "loss": 0.946, "step": 6670 }, { "epoch": 0.4908530696711888, "grad_norm": 0.83203125, "learning_rate": 2.5813575390674548e-05, "loss": 0.9372, "step": 6671 }, { "epoch": 0.49092664980455275, "grad_norm": 0.875, "learning_rate": 2.5807786841750025e-05, "loss": 0.9871, "step": 6672 }, { "epoch": 0.49100022993791675, "grad_norm": 0.78125, "learning_rate": 2.5801998249473096e-05, "loss": 0.7371, "step": 6673 }, { "epoch": 0.49107381007128076, "grad_norm": 1.0390625, "learning_rate": 2.5796209614154442e-05, "loss": 1.0865, "step": 6674 }, { "epoch": 0.49114739020464476, "grad_norm": 0.8984375, "learning_rate": 2.5790420936104714e-05, "loss": 0.8629, "step": 6675 }, { "epoch": 0.4912209703380087, "grad_norm": 0.70703125, "learning_rate": 2.5784632215634603e-05, "loss": 0.7611, "step": 6676 }, { "epoch": 0.4912945504713727, "grad_norm": 0.921875, "learning_rate": 2.577884345305475e-05, "loss": 1.1233, "step": 6677 }, { "epoch": 0.4913681306047367, "grad_norm": 0.80859375, "learning_rate": 2.577305464867585e-05, "loss": 0.9199, "step": 6678 }, { "epoch": 0.4914417107381007, "grad_norm": 0.81640625, "learning_rate": 2.5767265802808555e-05, "loss": 0.9293, "step": 6679 }, { "epoch": 0.4915152908714647, "grad_norm": 0.95703125, "learning_rate": 2.576147691576356e-05, "loss": 1.1749, "step": 6680 }, { "epoch": 0.4915888710048287, "grad_norm": 1.09375, "learning_rate": 2.5755687987851534e-05, "loss": 0.9712, "step": 6681 }, { "epoch": 0.4916624511381927, "grad_norm": 1.1171875, "learning_rate": 2.5749899019383163e-05, "loss": 1.1325, "step": 6682 }, { "epoch": 0.4917360312715567, "grad_norm": 0.8515625, "learning_rate": 2.5744110010669133e-05, "loss": 1.0317, "step": 6683 }, { "epoch": 0.4918096114049207, "grad_norm": 0.94140625, "learning_rate": 2.573832096202011e-05, "loss": 0.7013, "step": 6684 }, { "epoch": 0.49188319153828464, "grad_norm": 0.796875, "learning_rate": 2.573253187374681e-05, "loss": 0.9334, "step": 6685 }, { "epoch": 0.49195677167164864, "grad_norm": 1.03125, "learning_rate": 2.5726742746159906e-05, "loss": 1.036, "step": 6686 }, { "epoch": 0.49203035180501264, "grad_norm": 0.84375, "learning_rate": 2.5720953579570078e-05, "loss": 0.9906, "step": 6687 }, { "epoch": 0.49210393193837665, "grad_norm": 0.73828125, "learning_rate": 2.571516437428805e-05, "loss": 0.7805, "step": 6688 }, { "epoch": 0.49217751207174065, "grad_norm": 0.859375, "learning_rate": 2.570937513062449e-05, "loss": 0.7876, "step": 6689 }, { "epoch": 0.4922510922051046, "grad_norm": 0.62890625, "learning_rate": 2.5703585848890116e-05, "loss": 0.5749, "step": 6690 }, { "epoch": 0.4923246723384686, "grad_norm": 0.93359375, "learning_rate": 2.569779652939561e-05, "loss": 0.7909, "step": 6691 }, { "epoch": 0.4923982524718326, "grad_norm": 1.0078125, "learning_rate": 2.5692007172451693e-05, "loss": 0.8743, "step": 6692 }, { "epoch": 0.4924718326051966, "grad_norm": 0.83984375, "learning_rate": 2.568621777836905e-05, "loss": 0.7506, "step": 6693 }, { "epoch": 0.49254541273856056, "grad_norm": 0.8515625, "learning_rate": 2.568042834745839e-05, "loss": 0.89, "step": 6694 }, { "epoch": 0.49261899287192457, "grad_norm": 0.953125, "learning_rate": 2.5674638880030427e-05, "loss": 0.841, "step": 6695 }, { "epoch": 0.49269257300528857, "grad_norm": 1.03125, "learning_rate": 2.5668849376395876e-05, "loss": 1.1024, "step": 6696 }, { "epoch": 0.4927661531386526, "grad_norm": 1.015625, "learning_rate": 2.5663059836865444e-05, "loss": 0.8662, "step": 6697 }, { "epoch": 0.4928397332720166, "grad_norm": 0.875, "learning_rate": 2.5657270261749834e-05, "loss": 0.7646, "step": 6698 }, { "epoch": 0.4929133134053805, "grad_norm": 0.8828125, "learning_rate": 2.5651480651359777e-05, "loss": 1.05, "step": 6699 }, { "epoch": 0.49298689353874453, "grad_norm": 0.828125, "learning_rate": 2.5645691006005985e-05, "loss": 1.0519, "step": 6700 }, { "epoch": 0.49306047367210853, "grad_norm": 1.015625, "learning_rate": 2.563990132599917e-05, "loss": 1.1284, "step": 6701 }, { "epoch": 0.49313405380547254, "grad_norm": 0.765625, "learning_rate": 2.5634111611650063e-05, "loss": 0.6174, "step": 6702 }, { "epoch": 0.4932076339388365, "grad_norm": 1.1953125, "learning_rate": 2.5628321863269377e-05, "loss": 1.2426, "step": 6703 }, { "epoch": 0.4932812140722005, "grad_norm": 0.890625, "learning_rate": 2.5622532081167855e-05, "loss": 1.1306, "step": 6704 }, { "epoch": 0.4933547942055645, "grad_norm": 0.78125, "learning_rate": 2.5616742265656207e-05, "loss": 0.9979, "step": 6705 }, { "epoch": 0.4934283743389285, "grad_norm": 0.796875, "learning_rate": 2.561095241704517e-05, "loss": 0.8668, "step": 6706 }, { "epoch": 0.4935019544722925, "grad_norm": 0.99609375, "learning_rate": 2.560516253564546e-05, "loss": 0.6973, "step": 6707 }, { "epoch": 0.49357553460565645, "grad_norm": 1.1796875, "learning_rate": 2.5599372621767833e-05, "loss": 1.0704, "step": 6708 }, { "epoch": 0.49364911473902046, "grad_norm": 0.94140625, "learning_rate": 2.5593582675723e-05, "loss": 0.9875, "step": 6709 }, { "epoch": 0.49372269487238446, "grad_norm": 0.8984375, "learning_rate": 2.558779269782171e-05, "loss": 0.9234, "step": 6710 }, { "epoch": 0.49379627500574846, "grad_norm": 0.93359375, "learning_rate": 2.558200268837469e-05, "loss": 0.888, "step": 6711 }, { "epoch": 0.4938698551391124, "grad_norm": 0.63671875, "learning_rate": 2.5576212647692688e-05, "loss": 0.6125, "step": 6712 }, { "epoch": 0.4939434352724764, "grad_norm": 0.70703125, "learning_rate": 2.557042257608645e-05, "loss": 0.7742, "step": 6713 }, { "epoch": 0.4940170154058404, "grad_norm": 0.94921875, "learning_rate": 2.5564632473866694e-05, "loss": 1.027, "step": 6714 }, { "epoch": 0.4940905955392044, "grad_norm": 0.875, "learning_rate": 2.5558842341344193e-05, "loss": 1.3594, "step": 6715 }, { "epoch": 0.49416417567256843, "grad_norm": 1.171875, "learning_rate": 2.555305217882967e-05, "loss": 0.7544, "step": 6716 }, { "epoch": 0.4942377558059324, "grad_norm": 0.81640625, "learning_rate": 2.5547261986633892e-05, "loss": 0.7817, "step": 6717 }, { "epoch": 0.4943113359392964, "grad_norm": 0.94921875, "learning_rate": 2.554147176506759e-05, "loss": 1.3649, "step": 6718 }, { "epoch": 0.4943849160726604, "grad_norm": 0.734375, "learning_rate": 2.553568151444152e-05, "loss": 0.8064, "step": 6719 }, { "epoch": 0.4944584962060244, "grad_norm": 0.890625, "learning_rate": 2.552989123506644e-05, "loss": 1.2102, "step": 6720 }, { "epoch": 0.49453207633938834, "grad_norm": 0.9921875, "learning_rate": 2.5524100927253085e-05, "loss": 1.3482, "step": 6721 }, { "epoch": 0.49460565647275234, "grad_norm": 0.75, "learning_rate": 2.551831059131224e-05, "loss": 0.8116, "step": 6722 }, { "epoch": 0.49467923660611635, "grad_norm": 0.6953125, "learning_rate": 2.5512520227554637e-05, "loss": 0.573, "step": 6723 }, { "epoch": 0.49475281673948035, "grad_norm": 0.88671875, "learning_rate": 2.5506729836291045e-05, "loss": 0.8246, "step": 6724 }, { "epoch": 0.49482639687284435, "grad_norm": 0.82421875, "learning_rate": 2.5500939417832213e-05, "loss": 0.8664, "step": 6725 }, { "epoch": 0.4948999770062083, "grad_norm": 0.86328125, "learning_rate": 2.549514897248892e-05, "loss": 0.8978, "step": 6726 }, { "epoch": 0.4949735571395723, "grad_norm": 0.8671875, "learning_rate": 2.5489358500571896e-05, "loss": 0.9654, "step": 6727 }, { "epoch": 0.4950471372729363, "grad_norm": 0.9140625, "learning_rate": 2.548356800239194e-05, "loss": 1.2929, "step": 6728 }, { "epoch": 0.4951207174063003, "grad_norm": 0.9296875, "learning_rate": 2.5477777478259805e-05, "loss": 0.8073, "step": 6729 }, { "epoch": 0.49519429753966426, "grad_norm": 0.88671875, "learning_rate": 2.5471986928486245e-05, "loss": 0.9441, "step": 6730 }, { "epoch": 0.49526787767302827, "grad_norm": 0.82421875, "learning_rate": 2.5466196353382053e-05, "loss": 1.1509, "step": 6731 }, { "epoch": 0.49534145780639227, "grad_norm": 0.98046875, "learning_rate": 2.5460405753257972e-05, "loss": 1.2072, "step": 6732 }, { "epoch": 0.4954150379397563, "grad_norm": 0.87109375, "learning_rate": 2.545461512842479e-05, "loss": 0.8782, "step": 6733 }, { "epoch": 0.4954886180731203, "grad_norm": 0.95703125, "learning_rate": 2.5448824479193262e-05, "loss": 1.0232, "step": 6734 }, { "epoch": 0.49556219820648423, "grad_norm": 0.84765625, "learning_rate": 2.5443033805874177e-05, "loss": 1.0602, "step": 6735 }, { "epoch": 0.49563577833984823, "grad_norm": 0.74609375, "learning_rate": 2.5437243108778307e-05, "loss": 0.7239, "step": 6736 }, { "epoch": 0.49570935847321224, "grad_norm": 0.65625, "learning_rate": 2.5431452388216427e-05, "loss": 0.6611, "step": 6737 }, { "epoch": 0.49578293860657624, "grad_norm": 1.0078125, "learning_rate": 2.5425661644499315e-05, "loss": 0.9489, "step": 6738 }, { "epoch": 0.4958565187399402, "grad_norm": 0.75, "learning_rate": 2.541987087793774e-05, "loss": 0.5325, "step": 6739 }, { "epoch": 0.4959300988733042, "grad_norm": 1.0390625, "learning_rate": 2.5414080088842484e-05, "loss": 1.1207, "step": 6740 }, { "epoch": 0.4960036790066682, "grad_norm": 0.98828125, "learning_rate": 2.540828927752434e-05, "loss": 1.1418, "step": 6741 }, { "epoch": 0.4960772591400322, "grad_norm": 0.92578125, "learning_rate": 2.540249844429408e-05, "loss": 0.8199, "step": 6742 }, { "epoch": 0.4961508392733962, "grad_norm": 0.71875, "learning_rate": 2.5396707589462486e-05, "loss": 0.7246, "step": 6743 }, { "epoch": 0.49622441940676015, "grad_norm": 0.921875, "learning_rate": 2.5390916713340345e-05, "loss": 0.943, "step": 6744 }, { "epoch": 0.49629799954012416, "grad_norm": 0.78125, "learning_rate": 2.5385125816238448e-05, "loss": 1.1701, "step": 6745 }, { "epoch": 0.49637157967348816, "grad_norm": 0.9140625, "learning_rate": 2.5379334898467565e-05, "loss": 0.8514, "step": 6746 }, { "epoch": 0.49644515980685217, "grad_norm": 1.1875, "learning_rate": 2.5373543960338503e-05, "loss": 1.1307, "step": 6747 }, { "epoch": 0.4965187399402161, "grad_norm": 0.796875, "learning_rate": 2.5367753002162037e-05, "loss": 0.6164, "step": 6748 }, { "epoch": 0.4965923200735801, "grad_norm": 0.8046875, "learning_rate": 2.536196202424897e-05, "loss": 0.7547, "step": 6749 }, { "epoch": 0.4966659002069441, "grad_norm": 0.80078125, "learning_rate": 2.5356171026910074e-05, "loss": 0.7357, "step": 6750 }, { "epoch": 0.4967394803403081, "grad_norm": 0.98046875, "learning_rate": 2.535038001045616e-05, "loss": 1.8553, "step": 6751 }, { "epoch": 0.49681306047367213, "grad_norm": 0.80859375, "learning_rate": 2.5344588975198012e-05, "loss": 0.8724, "step": 6752 }, { "epoch": 0.4968866406070361, "grad_norm": 0.9296875, "learning_rate": 2.5338797921446422e-05, "loss": 1.2423, "step": 6753 }, { "epoch": 0.4969602207404001, "grad_norm": 0.98046875, "learning_rate": 2.5333006849512185e-05, "loss": 1.4631, "step": 6754 }, { "epoch": 0.4970338008737641, "grad_norm": 1.046875, "learning_rate": 2.53272157597061e-05, "loss": 1.0206, "step": 6755 }, { "epoch": 0.4971073810071281, "grad_norm": 0.79296875, "learning_rate": 2.5321424652338972e-05, "loss": 0.6425, "step": 6756 }, { "epoch": 0.4971809611404921, "grad_norm": 0.9296875, "learning_rate": 2.5315633527721583e-05, "loss": 0.9942, "step": 6757 }, { "epoch": 0.49725454127385604, "grad_norm": 0.7265625, "learning_rate": 2.5309842386164744e-05, "loss": 0.7313, "step": 6758 }, { "epoch": 0.49732812140722005, "grad_norm": 0.99609375, "learning_rate": 2.5304051227979243e-05, "loss": 1.2216, "step": 6759 }, { "epoch": 0.49740170154058405, "grad_norm": 0.96484375, "learning_rate": 2.5298260053475885e-05, "loss": 0.9072, "step": 6760 }, { "epoch": 0.49747528167394806, "grad_norm": 0.9140625, "learning_rate": 2.5292468862965472e-05, "loss": 1.1626, "step": 6761 }, { "epoch": 0.497548861807312, "grad_norm": 0.7421875, "learning_rate": 2.5286677656758817e-05, "loss": 0.6964, "step": 6762 }, { "epoch": 0.497622441940676, "grad_norm": 0.86328125, "learning_rate": 2.5280886435166713e-05, "loss": 0.8919, "step": 6763 }, { "epoch": 0.49769602207404, "grad_norm": 0.9453125, "learning_rate": 2.5275095198499953e-05, "loss": 1.1505, "step": 6764 }, { "epoch": 0.497769602207404, "grad_norm": 1.0859375, "learning_rate": 2.5269303947069368e-05, "loss": 1.2343, "step": 6765 }, { "epoch": 0.497843182340768, "grad_norm": 0.99609375, "learning_rate": 2.5263512681185742e-05, "loss": 1.6502, "step": 6766 }, { "epoch": 0.49791676247413197, "grad_norm": 0.90234375, "learning_rate": 2.5257721401159884e-05, "loss": 1.0583, "step": 6767 }, { "epoch": 0.497990342607496, "grad_norm": 0.77734375, "learning_rate": 2.5251930107302608e-05, "loss": 0.9987, "step": 6768 }, { "epoch": 0.49806392274086, "grad_norm": 0.828125, "learning_rate": 2.5246138799924718e-05, "loss": 0.9529, "step": 6769 }, { "epoch": 0.498137502874224, "grad_norm": 0.82421875, "learning_rate": 2.5240347479337027e-05, "loss": 0.8805, "step": 6770 }, { "epoch": 0.49821108300758793, "grad_norm": 0.7890625, "learning_rate": 2.5234556145850343e-05, "loss": 0.9518, "step": 6771 }, { "epoch": 0.49828466314095193, "grad_norm": 0.7734375, "learning_rate": 2.5228764799775468e-05, "loss": 1.0451, "step": 6772 }, { "epoch": 0.49835824327431594, "grad_norm": 0.82421875, "learning_rate": 2.522297344142322e-05, "loss": 0.9713, "step": 6773 }, { "epoch": 0.49843182340767994, "grad_norm": 0.76171875, "learning_rate": 2.5217182071104405e-05, "loss": 0.8702, "step": 6774 }, { "epoch": 0.49850540354104395, "grad_norm": 0.95703125, "learning_rate": 2.5211390689129844e-05, "loss": 1.041, "step": 6775 }, { "epoch": 0.4985789836744079, "grad_norm": 0.71875, "learning_rate": 2.5205599295810338e-05, "loss": 0.898, "step": 6776 }, { "epoch": 0.4986525638077719, "grad_norm": 0.8046875, "learning_rate": 2.5199807891456716e-05, "loss": 0.6358, "step": 6777 }, { "epoch": 0.4987261439411359, "grad_norm": 0.82421875, "learning_rate": 2.5194016476379773e-05, "loss": 0.7588, "step": 6778 }, { "epoch": 0.4987997240744999, "grad_norm": 0.703125, "learning_rate": 2.518822505089034e-05, "loss": 0.5863, "step": 6779 }, { "epoch": 0.49887330420786385, "grad_norm": 0.92578125, "learning_rate": 2.5182433615299215e-05, "loss": 1.0457, "step": 6780 }, { "epoch": 0.49894688434122786, "grad_norm": 0.921875, "learning_rate": 2.5176642169917225e-05, "loss": 0.8337, "step": 6781 }, { "epoch": 0.49902046447459186, "grad_norm": 0.8125, "learning_rate": 2.517085071505518e-05, "loss": 1.1624, "step": 6782 }, { "epoch": 0.49909404460795587, "grad_norm": 0.70703125, "learning_rate": 2.5165059251023915e-05, "loss": 0.7193, "step": 6783 }, { "epoch": 0.49916762474131987, "grad_norm": 1.0390625, "learning_rate": 2.515926777813422e-05, "loss": 1.4729, "step": 6784 }, { "epoch": 0.4992412048746838, "grad_norm": 0.8125, "learning_rate": 2.5153476296696922e-05, "loss": 0.6558, "step": 6785 }, { "epoch": 0.4993147850080478, "grad_norm": 0.70703125, "learning_rate": 2.5147684807022847e-05, "loss": 0.6489, "step": 6786 }, { "epoch": 0.4993883651414118, "grad_norm": 0.86328125, "learning_rate": 2.5141893309422804e-05, "loss": 1.0101, "step": 6787 }, { "epoch": 0.49946194527477583, "grad_norm": 0.859375, "learning_rate": 2.513610180420762e-05, "loss": 1.3912, "step": 6788 }, { "epoch": 0.4995355254081398, "grad_norm": 1.015625, "learning_rate": 2.5130310291688097e-05, "loss": 0.9714, "step": 6789 }, { "epoch": 0.4996091055415038, "grad_norm": 0.8203125, "learning_rate": 2.512451877217508e-05, "loss": 0.7537, "step": 6790 }, { "epoch": 0.4996826856748678, "grad_norm": 1.1484375, "learning_rate": 2.5118727245979372e-05, "loss": 0.8053, "step": 6791 }, { "epoch": 0.4997562658082318, "grad_norm": 0.89453125, "learning_rate": 2.5112935713411796e-05, "loss": 1.1749, "step": 6792 }, { "epoch": 0.4998298459415958, "grad_norm": 0.83203125, "learning_rate": 2.5107144174783174e-05, "loss": 0.5942, "step": 6793 }, { "epoch": 0.49990342607495974, "grad_norm": 0.89453125, "learning_rate": 2.5101352630404324e-05, "loss": 0.7524, "step": 6794 }, { "epoch": 0.49997700620832375, "grad_norm": 0.671875, "learning_rate": 2.509556108058607e-05, "loss": 0.8138, "step": 6795 }, { "epoch": 0.5000505863416878, "grad_norm": 0.73828125, "learning_rate": 2.5089769525639235e-05, "loss": 0.6325, "step": 6796 }, { "epoch": 0.5001241664750518, "grad_norm": 0.78515625, "learning_rate": 2.5083977965874637e-05, "loss": 0.8426, "step": 6797 }, { "epoch": 0.5001977466084158, "grad_norm": 0.65625, "learning_rate": 2.5078186401603103e-05, "loss": 0.6433, "step": 6798 }, { "epoch": 0.5002713267417798, "grad_norm": 0.74609375, "learning_rate": 2.507239483313546e-05, "loss": 0.7694, "step": 6799 }, { "epoch": 0.5003449068751437, "grad_norm": 0.609375, "learning_rate": 2.506660326078251e-05, "loss": 0.6423, "step": 6800 }, { "epoch": 0.5004184870085077, "grad_norm": 0.75, "learning_rate": 2.506081168485509e-05, "loss": 0.7475, "step": 6801 }, { "epoch": 0.5004920671418717, "grad_norm": 0.84765625, "learning_rate": 2.5055020105664024e-05, "loss": 0.7871, "step": 6802 }, { "epoch": 0.5005656472752357, "grad_norm": 0.85546875, "learning_rate": 2.5049228523520135e-05, "loss": 1.1883, "step": 6803 }, { "epoch": 0.5006392274085997, "grad_norm": 0.875, "learning_rate": 2.5043436938734245e-05, "loss": 1.0403, "step": 6804 }, { "epoch": 0.5007128075419637, "grad_norm": 0.8515625, "learning_rate": 2.503764535161717e-05, "loss": 1.0646, "step": 6805 }, { "epoch": 0.5007863876753277, "grad_norm": 1.03125, "learning_rate": 2.503185376247975e-05, "loss": 0.8878, "step": 6806 }, { "epoch": 0.5008599678086917, "grad_norm": 0.7578125, "learning_rate": 2.5026062171632796e-05, "loss": 0.9242, "step": 6807 }, { "epoch": 0.5009335479420557, "grad_norm": 0.76953125, "learning_rate": 2.5020270579387127e-05, "loss": 0.9098, "step": 6808 }, { "epoch": 0.5010071280754196, "grad_norm": 0.9453125, "learning_rate": 2.501447898605358e-05, "loss": 0.9524, "step": 6809 }, { "epoch": 0.5010807082087836, "grad_norm": 0.81640625, "learning_rate": 2.500868739194297e-05, "loss": 0.8059, "step": 6810 }, { "epoch": 0.5011542883421476, "grad_norm": 1.046875, "learning_rate": 2.5002895797366134e-05, "loss": 0.758, "step": 6811 }, { "epoch": 0.5012278684755116, "grad_norm": 0.734375, "learning_rate": 2.4997104202633875e-05, "loss": 0.7122, "step": 6812 }, { "epoch": 0.5013014486088756, "grad_norm": 0.80859375, "learning_rate": 2.499131260805703e-05, "loss": 0.7517, "step": 6813 }, { "epoch": 0.5013750287422396, "grad_norm": 1.0625, "learning_rate": 2.4985521013946425e-05, "loss": 1.0842, "step": 6814 }, { "epoch": 0.5014486088756036, "grad_norm": 0.984375, "learning_rate": 2.4979729420612875e-05, "loss": 1.4459, "step": 6815 }, { "epoch": 0.5015221890089676, "grad_norm": 0.87109375, "learning_rate": 2.497393782836721e-05, "loss": 1.1544, "step": 6816 }, { "epoch": 0.5015957691423316, "grad_norm": 0.9375, "learning_rate": 2.496814623752025e-05, "loss": 1.3312, "step": 6817 }, { "epoch": 0.5016693492756955, "grad_norm": 1.078125, "learning_rate": 2.4962354648382827e-05, "loss": 1.2026, "step": 6818 }, { "epoch": 0.5017429294090595, "grad_norm": 0.8359375, "learning_rate": 2.4956563061265764e-05, "loss": 0.5985, "step": 6819 }, { "epoch": 0.5018165095424235, "grad_norm": 0.7421875, "learning_rate": 2.495077147647987e-05, "loss": 0.8615, "step": 6820 }, { "epoch": 0.5018900896757875, "grad_norm": 1.140625, "learning_rate": 2.494497989433598e-05, "loss": 1.1456, "step": 6821 }, { "epoch": 0.5019636698091515, "grad_norm": 0.8515625, "learning_rate": 2.4939188315144914e-05, "loss": 0.897, "step": 6822 }, { "epoch": 0.5020372499425155, "grad_norm": 0.92578125, "learning_rate": 2.49333967392175e-05, "loss": 1.0049, "step": 6823 }, { "epoch": 0.5021108300758795, "grad_norm": 0.88671875, "learning_rate": 2.4927605166864554e-05, "loss": 0.7346, "step": 6824 }, { "epoch": 0.5021844102092435, "grad_norm": 1.015625, "learning_rate": 2.49218135983969e-05, "loss": 1.0402, "step": 6825 }, { "epoch": 0.5022579903426075, "grad_norm": 0.890625, "learning_rate": 2.4916022034125365e-05, "loss": 1.0197, "step": 6826 }, { "epoch": 0.5023315704759714, "grad_norm": 0.75390625, "learning_rate": 2.4910230474360764e-05, "loss": 0.7358, "step": 6827 }, { "epoch": 0.5024051506093354, "grad_norm": 0.95703125, "learning_rate": 2.4904438919413938e-05, "loss": 1.1908, "step": 6828 }, { "epoch": 0.5024787307426994, "grad_norm": 0.828125, "learning_rate": 2.4898647369595682e-05, "loss": 0.6124, "step": 6829 }, { "epoch": 0.5025523108760634, "grad_norm": 0.76953125, "learning_rate": 2.489285582521683e-05, "loss": 0.9041, "step": 6830 }, { "epoch": 0.5026258910094275, "grad_norm": 0.8125, "learning_rate": 2.4887064286588206e-05, "loss": 0.8017, "step": 6831 }, { "epoch": 0.5026994711427915, "grad_norm": 0.87109375, "learning_rate": 2.488127275402063e-05, "loss": 0.9575, "step": 6832 }, { "epoch": 0.5027730512761555, "grad_norm": 0.94921875, "learning_rate": 2.4875481227824928e-05, "loss": 0.889, "step": 6833 }, { "epoch": 0.5028466314095195, "grad_norm": 1.0390625, "learning_rate": 2.4869689708311905e-05, "loss": 0.9494, "step": 6834 }, { "epoch": 0.5029202115428835, "grad_norm": 0.765625, "learning_rate": 2.486389819579239e-05, "loss": 0.8909, "step": 6835 }, { "epoch": 0.5029937916762474, "grad_norm": 1.0625, "learning_rate": 2.4858106690577198e-05, "loss": 1.0806, "step": 6836 }, { "epoch": 0.5030673718096114, "grad_norm": 0.83984375, "learning_rate": 2.4852315192977162e-05, "loss": 1.1335, "step": 6837 }, { "epoch": 0.5031409519429754, "grad_norm": 0.81640625, "learning_rate": 2.484652370330308e-05, "loss": 1.1262, "step": 6838 }, { "epoch": 0.5032145320763394, "grad_norm": 0.83203125, "learning_rate": 2.4840732221865788e-05, "loss": 0.7607, "step": 6839 }, { "epoch": 0.5032881122097034, "grad_norm": 0.9375, "learning_rate": 2.4834940748976095e-05, "loss": 0.8848, "step": 6840 }, { "epoch": 0.5033616923430674, "grad_norm": 1.0703125, "learning_rate": 2.4829149284944814e-05, "loss": 1.009, "step": 6841 }, { "epoch": 0.5034352724764314, "grad_norm": 0.92578125, "learning_rate": 2.482335783008278e-05, "loss": 0.9775, "step": 6842 }, { "epoch": 0.5035088526097954, "grad_norm": 0.90625, "learning_rate": 2.4817566384700787e-05, "loss": 0.6439, "step": 6843 }, { "epoch": 0.5035824327431594, "grad_norm": 1.0078125, "learning_rate": 2.4811774949109668e-05, "loss": 1.2899, "step": 6844 }, { "epoch": 0.5036560128765233, "grad_norm": 0.98046875, "learning_rate": 2.4805983523620226e-05, "loss": 1.7501, "step": 6845 }, { "epoch": 0.5037295930098873, "grad_norm": 0.92578125, "learning_rate": 2.4800192108543296e-05, "loss": 0.9806, "step": 6846 }, { "epoch": 0.5038031731432513, "grad_norm": 1.0546875, "learning_rate": 2.4794400704189664e-05, "loss": 0.9335, "step": 6847 }, { "epoch": 0.5038767532766153, "grad_norm": 0.8046875, "learning_rate": 2.478860931087016e-05, "loss": 0.9179, "step": 6848 }, { "epoch": 0.5039503334099793, "grad_norm": 0.79296875, "learning_rate": 2.4782817928895598e-05, "loss": 0.6442, "step": 6849 }, { "epoch": 0.5040239135433433, "grad_norm": 0.9296875, "learning_rate": 2.477702655857678e-05, "loss": 0.9782, "step": 6850 }, { "epoch": 0.5040974936767073, "grad_norm": 0.81640625, "learning_rate": 2.477123520022454e-05, "loss": 0.677, "step": 6851 }, { "epoch": 0.5041710738100713, "grad_norm": 0.86328125, "learning_rate": 2.4765443854149667e-05, "loss": 1.0652, "step": 6852 }, { "epoch": 0.5042446539434353, "grad_norm": 0.75, "learning_rate": 2.4759652520662975e-05, "loss": 0.7857, "step": 6853 }, { "epoch": 0.5043182340767992, "grad_norm": 0.7734375, "learning_rate": 2.475386120007528e-05, "loss": 0.6471, "step": 6854 }, { "epoch": 0.5043918142101632, "grad_norm": 0.84765625, "learning_rate": 2.4748069892697398e-05, "loss": 0.799, "step": 6855 }, { "epoch": 0.5044653943435272, "grad_norm": 0.90234375, "learning_rate": 2.4742278598840122e-05, "loss": 1.0061, "step": 6856 }, { "epoch": 0.5045389744768912, "grad_norm": 0.94140625, "learning_rate": 2.4736487318814267e-05, "loss": 0.7325, "step": 6857 }, { "epoch": 0.5046125546102552, "grad_norm": 0.83203125, "learning_rate": 2.4730696052930638e-05, "loss": 0.8355, "step": 6858 }, { "epoch": 0.5046861347436192, "grad_norm": 0.93359375, "learning_rate": 2.4724904801500043e-05, "loss": 0.8914, "step": 6859 }, { "epoch": 0.5047597148769832, "grad_norm": 0.984375, "learning_rate": 2.4719113564833303e-05, "loss": 1.6786, "step": 6860 }, { "epoch": 0.5048332950103472, "grad_norm": 1.0625, "learning_rate": 2.4713322343241192e-05, "loss": 1.0828, "step": 6861 }, { "epoch": 0.5049068751437112, "grad_norm": 0.796875, "learning_rate": 2.470753113703453e-05, "loss": 0.7747, "step": 6862 }, { "epoch": 0.5049804552770751, "grad_norm": 1.0234375, "learning_rate": 2.470173994652412e-05, "loss": 1.0917, "step": 6863 }, { "epoch": 0.5050540354104391, "grad_norm": 0.94140625, "learning_rate": 2.4695948772020756e-05, "loss": 1.1608, "step": 6864 }, { "epoch": 0.5051276155438031, "grad_norm": 0.6640625, "learning_rate": 2.469015761383527e-05, "loss": 0.7306, "step": 6865 }, { "epoch": 0.5052011956771671, "grad_norm": 0.76953125, "learning_rate": 2.4684366472278423e-05, "loss": 0.706, "step": 6866 }, { "epoch": 0.5052747758105312, "grad_norm": 0.80859375, "learning_rate": 2.4678575347661034e-05, "loss": 0.7526, "step": 6867 }, { "epoch": 0.5053483559438952, "grad_norm": 0.9921875, "learning_rate": 2.4672784240293897e-05, "loss": 0.9766, "step": 6868 }, { "epoch": 0.5054219360772592, "grad_norm": 0.90234375, "learning_rate": 2.4666993150487818e-05, "loss": 1.0136, "step": 6869 }, { "epoch": 0.5054955162106232, "grad_norm": 1.0234375, "learning_rate": 2.4661202078553584e-05, "loss": 1.1905, "step": 6870 }, { "epoch": 0.5055690963439872, "grad_norm": 0.69921875, "learning_rate": 2.465541102480199e-05, "loss": 0.7406, "step": 6871 }, { "epoch": 0.5056426764773511, "grad_norm": 0.80859375, "learning_rate": 2.464961998954384e-05, "loss": 0.6446, "step": 6872 }, { "epoch": 0.5057162566107151, "grad_norm": 1.0703125, "learning_rate": 2.464382897308992e-05, "loss": 0.8695, "step": 6873 }, { "epoch": 0.5057898367440791, "grad_norm": 0.84765625, "learning_rate": 2.463803797575104e-05, "loss": 1.2651, "step": 6874 }, { "epoch": 0.5058634168774431, "grad_norm": 0.94921875, "learning_rate": 2.463224699783797e-05, "loss": 0.7668, "step": 6875 }, { "epoch": 0.5059369970108071, "grad_norm": 0.90234375, "learning_rate": 2.4626456039661503e-05, "loss": 0.8345, "step": 6876 }, { "epoch": 0.5060105771441711, "grad_norm": 0.88671875, "learning_rate": 2.462066510153243e-05, "loss": 1.0072, "step": 6877 }, { "epoch": 0.5060841572775351, "grad_norm": 0.921875, "learning_rate": 2.4614874183761564e-05, "loss": 1.4951, "step": 6878 }, { "epoch": 0.5061577374108991, "grad_norm": 0.9375, "learning_rate": 2.460908328665966e-05, "loss": 1.0984, "step": 6879 }, { "epoch": 0.5062313175442631, "grad_norm": 1.0078125, "learning_rate": 2.460329241053752e-05, "loss": 1.1923, "step": 6880 }, { "epoch": 0.506304897677627, "grad_norm": 0.828125, "learning_rate": 2.4597501555705925e-05, "loss": 0.8413, "step": 6881 }, { "epoch": 0.506378477810991, "grad_norm": 1.0546875, "learning_rate": 2.4591710722475662e-05, "loss": 1.3315, "step": 6882 }, { "epoch": 0.506452057944355, "grad_norm": 0.7109375, "learning_rate": 2.4585919911157522e-05, "loss": 0.7327, "step": 6883 }, { "epoch": 0.506525638077719, "grad_norm": 0.8515625, "learning_rate": 2.4580129122062267e-05, "loss": 0.7946, "step": 6884 }, { "epoch": 0.506599218211083, "grad_norm": 0.859375, "learning_rate": 2.4574338355500694e-05, "loss": 0.6864, "step": 6885 }, { "epoch": 0.506672798344447, "grad_norm": 0.84765625, "learning_rate": 2.4568547611783575e-05, "loss": 0.7982, "step": 6886 }, { "epoch": 0.506746378477811, "grad_norm": 1.1015625, "learning_rate": 2.4562756891221698e-05, "loss": 1.0393, "step": 6887 }, { "epoch": 0.506819958611175, "grad_norm": 0.859375, "learning_rate": 2.4556966194125826e-05, "loss": 1.0597, "step": 6888 }, { "epoch": 0.506893538744539, "grad_norm": 1.046875, "learning_rate": 2.4551175520806744e-05, "loss": 1.2202, "step": 6889 }, { "epoch": 0.5069671188779029, "grad_norm": 0.63671875, "learning_rate": 2.4545384871575215e-05, "loss": 0.5673, "step": 6890 }, { "epoch": 0.5070406990112669, "grad_norm": 0.67578125, "learning_rate": 2.4539594246742027e-05, "loss": 0.6118, "step": 6891 }, { "epoch": 0.5071142791446309, "grad_norm": 0.72265625, "learning_rate": 2.453380364661796e-05, "loss": 0.9154, "step": 6892 }, { "epoch": 0.5071878592779949, "grad_norm": 0.76171875, "learning_rate": 2.4528013071513757e-05, "loss": 0.7346, "step": 6893 }, { "epoch": 0.5072614394113589, "grad_norm": 0.765625, "learning_rate": 2.45222225217402e-05, "loss": 0.7266, "step": 6894 }, { "epoch": 0.5073350195447229, "grad_norm": 0.73046875, "learning_rate": 2.4516431997608062e-05, "loss": 0.6065, "step": 6895 }, { "epoch": 0.5074085996780869, "grad_norm": 0.6328125, "learning_rate": 2.451064149942811e-05, "loss": 0.6672, "step": 6896 }, { "epoch": 0.5074821798114509, "grad_norm": 0.90625, "learning_rate": 2.4504851027511094e-05, "loss": 0.7494, "step": 6897 }, { "epoch": 0.5075557599448149, "grad_norm": 0.828125, "learning_rate": 2.449906058216779e-05, "loss": 1.0767, "step": 6898 }, { "epoch": 0.5076293400781788, "grad_norm": 1.125, "learning_rate": 2.449327016370896e-05, "loss": 1.2438, "step": 6899 }, { "epoch": 0.5077029202115428, "grad_norm": 0.7890625, "learning_rate": 2.4487479772445365e-05, "loss": 0.752, "step": 6900 }, { "epoch": 0.5077765003449068, "grad_norm": 0.89453125, "learning_rate": 2.4481689408687772e-05, "loss": 0.9818, "step": 6901 }, { "epoch": 0.5078500804782708, "grad_norm": 0.90625, "learning_rate": 2.4475899072746918e-05, "loss": 0.7712, "step": 6902 }, { "epoch": 0.5079236606116349, "grad_norm": 0.74609375, "learning_rate": 2.4470108764933564e-05, "loss": 0.8489, "step": 6903 }, { "epoch": 0.5079972407449989, "grad_norm": 1.109375, "learning_rate": 2.446431848555848e-05, "loss": 0.9714, "step": 6904 }, { "epoch": 0.5080708208783629, "grad_norm": 0.8984375, "learning_rate": 2.445852823493241e-05, "loss": 0.9575, "step": 6905 }, { "epoch": 0.5081444010117269, "grad_norm": 1.125, "learning_rate": 2.4452738013366117e-05, "loss": 1.476, "step": 6906 }, { "epoch": 0.5082179811450909, "grad_norm": 1.0625, "learning_rate": 2.444694782117033e-05, "loss": 0.7326, "step": 6907 }, { "epoch": 0.5082915612784548, "grad_norm": 1.0, "learning_rate": 2.444115765865581e-05, "loss": 0.8245, "step": 6908 }, { "epoch": 0.5083651414118188, "grad_norm": 0.734375, "learning_rate": 2.4435367526133305e-05, "loss": 0.7986, "step": 6909 }, { "epoch": 0.5084387215451828, "grad_norm": 0.7734375, "learning_rate": 2.442957742391356e-05, "loss": 0.6466, "step": 6910 }, { "epoch": 0.5085123016785468, "grad_norm": 0.91796875, "learning_rate": 2.4423787352307314e-05, "loss": 1.061, "step": 6911 }, { "epoch": 0.5085858818119108, "grad_norm": 0.84765625, "learning_rate": 2.4417997311625313e-05, "loss": 0.6697, "step": 6912 }, { "epoch": 0.5086594619452748, "grad_norm": 0.87109375, "learning_rate": 2.4412207302178298e-05, "loss": 0.9173, "step": 6913 }, { "epoch": 0.5087330420786388, "grad_norm": 0.8671875, "learning_rate": 2.4406417324277003e-05, "loss": 1.1273, "step": 6914 }, { "epoch": 0.5088066222120028, "grad_norm": 1.0390625, "learning_rate": 2.4400627378232183e-05, "loss": 1.1021, "step": 6915 }, { "epoch": 0.5088802023453668, "grad_norm": 0.85546875, "learning_rate": 2.4394837464354548e-05, "loss": 0.8225, "step": 6916 }, { "epoch": 0.5089537824787307, "grad_norm": 1.015625, "learning_rate": 2.438904758295484e-05, "loss": 1.2826, "step": 6917 }, { "epoch": 0.5090273626120947, "grad_norm": 0.6796875, "learning_rate": 2.4383257734343796e-05, "loss": 0.6253, "step": 6918 }, { "epoch": 0.5091009427454587, "grad_norm": 1.25, "learning_rate": 2.4377467918832157e-05, "loss": 1.2076, "step": 6919 }, { "epoch": 0.5091745228788227, "grad_norm": 1.0, "learning_rate": 2.437167813673063e-05, "loss": 1.6609, "step": 6920 }, { "epoch": 0.5092481030121867, "grad_norm": 0.83984375, "learning_rate": 2.4365888388349943e-05, "loss": 1.054, "step": 6921 }, { "epoch": 0.5093216831455507, "grad_norm": 0.89453125, "learning_rate": 2.4360098674000835e-05, "loss": 1.1305, "step": 6922 }, { "epoch": 0.5093952632789147, "grad_norm": 0.81640625, "learning_rate": 2.435430899399402e-05, "loss": 0.6593, "step": 6923 }, { "epoch": 0.5094688434122787, "grad_norm": 0.7421875, "learning_rate": 2.434851934864023e-05, "loss": 0.8338, "step": 6924 }, { "epoch": 0.5095424235456427, "grad_norm": 0.984375, "learning_rate": 2.434272973825017e-05, "loss": 1.2494, "step": 6925 }, { "epoch": 0.5096160036790066, "grad_norm": 0.8828125, "learning_rate": 2.4336940163134565e-05, "loss": 1.015, "step": 6926 }, { "epoch": 0.5096895838123706, "grad_norm": 0.85546875, "learning_rate": 2.4331150623604126e-05, "loss": 1.0062, "step": 6927 }, { "epoch": 0.5097631639457346, "grad_norm": 0.9609375, "learning_rate": 2.4325361119969582e-05, "loss": 0.9374, "step": 6928 }, { "epoch": 0.5098367440790986, "grad_norm": 0.87890625, "learning_rate": 2.431957165254162e-05, "loss": 0.6762, "step": 6929 }, { "epoch": 0.5099103242124626, "grad_norm": 0.984375, "learning_rate": 2.431378222163096e-05, "loss": 0.8244, "step": 6930 }, { "epoch": 0.5099839043458266, "grad_norm": 0.87890625, "learning_rate": 2.4307992827548316e-05, "loss": 0.8764, "step": 6931 }, { "epoch": 0.5100574844791906, "grad_norm": 1.1953125, "learning_rate": 2.4302203470604388e-05, "loss": 1.1631, "step": 6932 }, { "epoch": 0.5101310646125546, "grad_norm": 0.6484375, "learning_rate": 2.4296414151109893e-05, "loss": 0.732, "step": 6933 }, { "epoch": 0.5102046447459186, "grad_norm": 0.98828125, "learning_rate": 2.4290624869375515e-05, "loss": 0.9829, "step": 6934 }, { "epoch": 0.5102782248792826, "grad_norm": 0.84375, "learning_rate": 2.4284835625711957e-05, "loss": 0.95, "step": 6935 }, { "epoch": 0.5103518050126465, "grad_norm": 1.109375, "learning_rate": 2.427904642042992e-05, "loss": 1.4681, "step": 6936 }, { "epoch": 0.5104253851460105, "grad_norm": 0.828125, "learning_rate": 2.42732572538401e-05, "loss": 0.8922, "step": 6937 }, { "epoch": 0.5104989652793746, "grad_norm": 0.765625, "learning_rate": 2.42674681262532e-05, "loss": 0.966, "step": 6938 }, { "epoch": 0.5105725454127386, "grad_norm": 0.99609375, "learning_rate": 2.426167903797989e-05, "loss": 1.0419, "step": 6939 }, { "epoch": 0.5106461255461026, "grad_norm": 0.8828125, "learning_rate": 2.4255889989330877e-05, "loss": 0.7437, "step": 6940 }, { "epoch": 0.5107197056794666, "grad_norm": 0.7421875, "learning_rate": 2.425010098061684e-05, "loss": 0.7891, "step": 6941 }, { "epoch": 0.5107932858128306, "grad_norm": 0.84375, "learning_rate": 2.424431201214848e-05, "loss": 0.8571, "step": 6942 }, { "epoch": 0.5108668659461946, "grad_norm": 0.69140625, "learning_rate": 2.423852308423645e-05, "loss": 0.398, "step": 6943 }, { "epoch": 0.5109404460795586, "grad_norm": 0.89453125, "learning_rate": 2.4232734197191448e-05, "loss": 0.8547, "step": 6944 }, { "epoch": 0.5110140262129225, "grad_norm": 1.1171875, "learning_rate": 2.422694535132416e-05, "loss": 1.027, "step": 6945 }, { "epoch": 0.5110876063462865, "grad_norm": 0.81640625, "learning_rate": 2.422115654694525e-05, "loss": 0.8691, "step": 6946 }, { "epoch": 0.5111611864796505, "grad_norm": 0.83984375, "learning_rate": 2.4215367784365413e-05, "loss": 0.9444, "step": 6947 }, { "epoch": 0.5112347666130145, "grad_norm": 0.7421875, "learning_rate": 2.420957906389529e-05, "loss": 0.8475, "step": 6948 }, { "epoch": 0.5113083467463785, "grad_norm": 0.8046875, "learning_rate": 2.4203790385845564e-05, "loss": 0.78, "step": 6949 }, { "epoch": 0.5113819268797425, "grad_norm": 1.2734375, "learning_rate": 2.419800175052691e-05, "loss": 1.2925, "step": 6950 }, { "epoch": 0.5114555070131065, "grad_norm": 0.8203125, "learning_rate": 2.4192213158249987e-05, "loss": 0.9363, "step": 6951 }, { "epoch": 0.5115290871464705, "grad_norm": 0.75390625, "learning_rate": 2.4186424609325455e-05, "loss": 0.8718, "step": 6952 }, { "epoch": 0.5116026672798345, "grad_norm": 0.69140625, "learning_rate": 2.418063610406398e-05, "loss": 0.7089, "step": 6953 }, { "epoch": 0.5116762474131984, "grad_norm": 0.70703125, "learning_rate": 2.4174847642776217e-05, "loss": 0.8138, "step": 6954 }, { "epoch": 0.5117498275465624, "grad_norm": 0.91015625, "learning_rate": 2.416905922577282e-05, "loss": 0.8168, "step": 6955 }, { "epoch": 0.5118234076799264, "grad_norm": 1.0546875, "learning_rate": 2.416327085336445e-05, "loss": 0.9951, "step": 6956 }, { "epoch": 0.5118969878132904, "grad_norm": 0.79296875, "learning_rate": 2.4157482525861748e-05, "loss": 0.9895, "step": 6957 }, { "epoch": 0.5119705679466544, "grad_norm": 1.0078125, "learning_rate": 2.415169424357537e-05, "loss": 0.7844, "step": 6958 }, { "epoch": 0.5120441480800184, "grad_norm": 0.79296875, "learning_rate": 2.414590600681596e-05, "loss": 0.6938, "step": 6959 }, { "epoch": 0.5121177282133824, "grad_norm": 1.03125, "learning_rate": 2.414011781589417e-05, "loss": 1.1322, "step": 6960 }, { "epoch": 0.5121913083467464, "grad_norm": 0.94140625, "learning_rate": 2.4134329671120623e-05, "loss": 1.162, "step": 6961 }, { "epoch": 0.5122648884801104, "grad_norm": 0.73828125, "learning_rate": 2.4128541572805966e-05, "loss": 0.6704, "step": 6962 }, { "epoch": 0.5123384686134743, "grad_norm": 0.72265625, "learning_rate": 2.4122753521260837e-05, "loss": 0.6643, "step": 6963 }, { "epoch": 0.5124120487468383, "grad_norm": 0.90234375, "learning_rate": 2.4116965516795873e-05, "loss": 1.1199, "step": 6964 }, { "epoch": 0.5124856288802023, "grad_norm": 0.71484375, "learning_rate": 2.4111177559721703e-05, "loss": 0.664, "step": 6965 }, { "epoch": 0.5125592090135663, "grad_norm": 0.88671875, "learning_rate": 2.4105389650348954e-05, "loss": 0.8739, "step": 6966 }, { "epoch": 0.5126327891469303, "grad_norm": 1.1484375, "learning_rate": 2.4099601788988247e-05, "loss": 1.1971, "step": 6967 }, { "epoch": 0.5127063692802943, "grad_norm": 1.0546875, "learning_rate": 2.4093813975950215e-05, "loss": 0.9507, "step": 6968 }, { "epoch": 0.5127799494136583, "grad_norm": 0.75390625, "learning_rate": 2.4088026211545474e-05, "loss": 0.8032, "step": 6969 }, { "epoch": 0.5128535295470223, "grad_norm": 0.75, "learning_rate": 2.408223849608465e-05, "loss": 1.1861, "step": 6970 }, { "epoch": 0.5129271096803864, "grad_norm": 0.94921875, "learning_rate": 2.4076450829878347e-05, "loss": 1.0445, "step": 6971 }, { "epoch": 0.5130006898137502, "grad_norm": 1.171875, "learning_rate": 2.4070663213237185e-05, "loss": 1.5042, "step": 6972 }, { "epoch": 0.5130742699471142, "grad_norm": 0.734375, "learning_rate": 2.406487564647177e-05, "loss": 0.7042, "step": 6973 }, { "epoch": 0.5131478500804783, "grad_norm": 1.078125, "learning_rate": 2.4059088129892725e-05, "loss": 1.1904, "step": 6974 }, { "epoch": 0.5132214302138423, "grad_norm": 0.87890625, "learning_rate": 2.4053300663810633e-05, "loss": 0.8076, "step": 6975 }, { "epoch": 0.5132950103472063, "grad_norm": 0.9453125, "learning_rate": 2.4047513248536104e-05, "loss": 0.7586, "step": 6976 }, { "epoch": 0.5133685904805703, "grad_norm": 0.75390625, "learning_rate": 2.4041725884379744e-05, "loss": 0.5449, "step": 6977 }, { "epoch": 0.5134421706139343, "grad_norm": 0.75, "learning_rate": 2.4035938571652143e-05, "loss": 0.7126, "step": 6978 }, { "epoch": 0.5135157507472983, "grad_norm": 0.8515625, "learning_rate": 2.4030151310663904e-05, "loss": 0.8298, "step": 6979 }, { "epoch": 0.5135893308806623, "grad_norm": 0.69921875, "learning_rate": 2.402436410172561e-05, "loss": 0.5451, "step": 6980 }, { "epoch": 0.5136629110140262, "grad_norm": 0.94921875, "learning_rate": 2.401857694514785e-05, "loss": 0.7817, "step": 6981 }, { "epoch": 0.5137364911473902, "grad_norm": 0.66015625, "learning_rate": 2.4012789841241205e-05, "loss": 0.6687, "step": 6982 }, { "epoch": 0.5138100712807542, "grad_norm": 1.1015625, "learning_rate": 2.4007002790316276e-05, "loss": 1.3465, "step": 6983 }, { "epoch": 0.5138836514141182, "grad_norm": 0.91796875, "learning_rate": 2.4001215792683625e-05, "loss": 1.1978, "step": 6984 }, { "epoch": 0.5139572315474822, "grad_norm": 1.015625, "learning_rate": 2.399542884865384e-05, "loss": 1.0106, "step": 6985 }, { "epoch": 0.5140308116808462, "grad_norm": 0.83984375, "learning_rate": 2.3989641958537486e-05, "loss": 0.8311, "step": 6986 }, { "epoch": 0.5141043918142102, "grad_norm": 0.7265625, "learning_rate": 2.3983855122645142e-05, "loss": 0.6216, "step": 6987 }, { "epoch": 0.5141779719475742, "grad_norm": 0.875, "learning_rate": 2.3978068341287385e-05, "loss": 1.3475, "step": 6988 }, { "epoch": 0.5142515520809382, "grad_norm": 1.078125, "learning_rate": 2.397228161477476e-05, "loss": 0.9304, "step": 6989 }, { "epoch": 0.5143251322143021, "grad_norm": 0.86328125, "learning_rate": 2.3966494943417838e-05, "loss": 0.944, "step": 6990 }, { "epoch": 0.5143987123476661, "grad_norm": 0.79296875, "learning_rate": 2.3960708327527184e-05, "loss": 0.7263, "step": 6991 }, { "epoch": 0.5144722924810301, "grad_norm": 0.91015625, "learning_rate": 2.3954921767413352e-05, "loss": 0.9915, "step": 6992 }, { "epoch": 0.5145458726143941, "grad_norm": 0.98828125, "learning_rate": 2.3949135263386894e-05, "loss": 0.864, "step": 6993 }, { "epoch": 0.5146194527477581, "grad_norm": 0.97265625, "learning_rate": 2.3943348815758356e-05, "loss": 1.1575, "step": 6994 }, { "epoch": 0.5146930328811221, "grad_norm": 0.9296875, "learning_rate": 2.3937562424838294e-05, "loss": 0.9029, "step": 6995 }, { "epoch": 0.5147666130144861, "grad_norm": 0.83203125, "learning_rate": 2.3931776090937252e-05, "loss": 0.859, "step": 6996 }, { "epoch": 0.5148401931478501, "grad_norm": 0.90625, "learning_rate": 2.3925989814365774e-05, "loss": 0.8189, "step": 6997 }, { "epoch": 0.5149137732812141, "grad_norm": 0.81640625, "learning_rate": 2.3920203595434387e-05, "loss": 0.7732, "step": 6998 }, { "epoch": 0.514987353414578, "grad_norm": 0.81640625, "learning_rate": 2.3914417434453633e-05, "loss": 0.9874, "step": 6999 }, { "epoch": 0.515060933547942, "grad_norm": 0.921875, "learning_rate": 2.390863133173405e-05, "loss": 1.3188, "step": 7000 }, { "epoch": 0.515134513681306, "grad_norm": 0.88671875, "learning_rate": 2.390284528758617e-05, "loss": 1.1229, "step": 7001 }, { "epoch": 0.51520809381467, "grad_norm": 0.83984375, "learning_rate": 2.38970593023205e-05, "loss": 1.0909, "step": 7002 }, { "epoch": 0.515281673948034, "grad_norm": 1.015625, "learning_rate": 2.3891273376247572e-05, "loss": 1.0579, "step": 7003 }, { "epoch": 0.515355254081398, "grad_norm": 1.0078125, "learning_rate": 2.388548750967791e-05, "loss": 0.8992, "step": 7004 }, { "epoch": 0.515428834214762, "grad_norm": 0.8046875, "learning_rate": 2.3879701702922028e-05, "loss": 0.8122, "step": 7005 }, { "epoch": 0.515502414348126, "grad_norm": 0.71875, "learning_rate": 2.3873915956290446e-05, "loss": 0.649, "step": 7006 }, { "epoch": 0.51557599448149, "grad_norm": 0.90625, "learning_rate": 2.386813027009366e-05, "loss": 0.831, "step": 7007 }, { "epoch": 0.515649574614854, "grad_norm": 0.69921875, "learning_rate": 2.3862344644642187e-05, "loss": 0.6527, "step": 7008 }, { "epoch": 0.515723154748218, "grad_norm": 0.8125, "learning_rate": 2.385655908024653e-05, "loss": 0.7749, "step": 7009 }, { "epoch": 0.515796734881582, "grad_norm": 0.94140625, "learning_rate": 2.385077357721718e-05, "loss": 0.7606, "step": 7010 }, { "epoch": 0.515870315014946, "grad_norm": 0.734375, "learning_rate": 2.3844988135864654e-05, "loss": 0.8744, "step": 7011 }, { "epoch": 0.51594389514831, "grad_norm": 0.71875, "learning_rate": 2.3839202756499426e-05, "loss": 0.6514, "step": 7012 }, { "epoch": 0.516017475281674, "grad_norm": 1.125, "learning_rate": 2.3833417439431993e-05, "loss": 1.3973, "step": 7013 }, { "epoch": 0.516091055415038, "grad_norm": 1.015625, "learning_rate": 2.3827632184972842e-05, "loss": 0.8511, "step": 7014 }, { "epoch": 0.516164635548402, "grad_norm": 0.9375, "learning_rate": 2.382184699343247e-05, "loss": 0.8448, "step": 7015 }, { "epoch": 0.516238215681766, "grad_norm": 0.88671875, "learning_rate": 2.3816061865121333e-05, "loss": 0.9146, "step": 7016 }, { "epoch": 0.5163117958151299, "grad_norm": 0.703125, "learning_rate": 2.3810276800349918e-05, "loss": 0.6113, "step": 7017 }, { "epoch": 0.5163853759484939, "grad_norm": 0.8203125, "learning_rate": 2.38044917994287e-05, "loss": 0.7426, "step": 7018 }, { "epoch": 0.5164589560818579, "grad_norm": 0.734375, "learning_rate": 2.379870686266815e-05, "loss": 0.8616, "step": 7019 }, { "epoch": 0.5165325362152219, "grad_norm": 1.0703125, "learning_rate": 2.379292199037874e-05, "loss": 1.7812, "step": 7020 }, { "epoch": 0.5166061163485859, "grad_norm": 0.84765625, "learning_rate": 2.3787137182870926e-05, "loss": 0.8274, "step": 7021 }, { "epoch": 0.5166796964819499, "grad_norm": 0.9765625, "learning_rate": 2.3781352440455162e-05, "loss": 1.1996, "step": 7022 }, { "epoch": 0.5167532766153139, "grad_norm": 2.90625, "learning_rate": 2.3775567763441915e-05, "loss": 0.9538, "step": 7023 }, { "epoch": 0.5168268567486779, "grad_norm": 0.75390625, "learning_rate": 2.3769783152141633e-05, "loss": 0.7558, "step": 7024 }, { "epoch": 0.5169004368820419, "grad_norm": 1.078125, "learning_rate": 2.3763998606864764e-05, "loss": 1.3663, "step": 7025 }, { "epoch": 0.5169740170154058, "grad_norm": 1.09375, "learning_rate": 2.375821412792176e-05, "loss": 1.1262, "step": 7026 }, { "epoch": 0.5170475971487698, "grad_norm": 0.99609375, "learning_rate": 2.3752429715623052e-05, "loss": 1.0163, "step": 7027 }, { "epoch": 0.5171211772821338, "grad_norm": 1.09375, "learning_rate": 2.3746645370279084e-05, "loss": 1.0332, "step": 7028 }, { "epoch": 0.5171947574154978, "grad_norm": 0.859375, "learning_rate": 2.3740861092200307e-05, "loss": 1.1006, "step": 7029 }, { "epoch": 0.5172683375488618, "grad_norm": 0.80859375, "learning_rate": 2.3735076881697124e-05, "loss": 1.0731, "step": 7030 }, { "epoch": 0.5173419176822258, "grad_norm": 0.8125, "learning_rate": 2.3729292739079975e-05, "loss": 0.7434, "step": 7031 }, { "epoch": 0.5174154978155898, "grad_norm": 0.609375, "learning_rate": 2.3723508664659286e-05, "loss": 0.6844, "step": 7032 }, { "epoch": 0.5174890779489538, "grad_norm": 1.125, "learning_rate": 2.371772465874548e-05, "loss": 1.2202, "step": 7033 }, { "epoch": 0.5175626580823178, "grad_norm": 0.7421875, "learning_rate": 2.3711940721648966e-05, "loss": 0.7353, "step": 7034 }, { "epoch": 0.5176362382156817, "grad_norm": 0.8046875, "learning_rate": 2.370615685368016e-05, "loss": 0.8891, "step": 7035 }, { "epoch": 0.5177098183490457, "grad_norm": 0.890625, "learning_rate": 2.3700373055149472e-05, "loss": 1.232, "step": 7036 }, { "epoch": 0.5177833984824097, "grad_norm": 0.984375, "learning_rate": 2.3694589326367305e-05, "loss": 0.8903, "step": 7037 }, { "epoch": 0.5178569786157737, "grad_norm": 0.89453125, "learning_rate": 2.3688805667644074e-05, "loss": 1.089, "step": 7038 }, { "epoch": 0.5179305587491377, "grad_norm": 0.9921875, "learning_rate": 2.3683022079290158e-05, "loss": 0.9619, "step": 7039 }, { "epoch": 0.5180041388825017, "grad_norm": 0.76171875, "learning_rate": 2.3677238561615958e-05, "loss": 1.0581, "step": 7040 }, { "epoch": 0.5180777190158657, "grad_norm": 1.0, "learning_rate": 2.3671455114931867e-05, "loss": 1.1741, "step": 7041 }, { "epoch": 0.5181512991492297, "grad_norm": 0.95703125, "learning_rate": 2.366567173954827e-05, "loss": 0.871, "step": 7042 }, { "epoch": 0.5182248792825938, "grad_norm": 0.99609375, "learning_rate": 2.3659888435775565e-05, "loss": 1.5283, "step": 7043 }, { "epoch": 0.5182984594159576, "grad_norm": 0.7109375, "learning_rate": 2.3654105203924105e-05, "loss": 0.7814, "step": 7044 }, { "epoch": 0.5183720395493217, "grad_norm": 0.80078125, "learning_rate": 2.364832204430427e-05, "loss": 0.8017, "step": 7045 }, { "epoch": 0.5184456196826857, "grad_norm": 0.9765625, "learning_rate": 2.364253895722644e-05, "loss": 1.2968, "step": 7046 }, { "epoch": 0.5185191998160497, "grad_norm": 2.703125, "learning_rate": 2.3636755943000995e-05, "loss": 0.6544, "step": 7047 }, { "epoch": 0.5185927799494137, "grad_norm": 1.0234375, "learning_rate": 2.3630973001938273e-05, "loss": 1.4115, "step": 7048 }, { "epoch": 0.5186663600827777, "grad_norm": 0.60546875, "learning_rate": 2.362519013434864e-05, "loss": 0.6817, "step": 7049 }, { "epoch": 0.5187399402161417, "grad_norm": 0.921875, "learning_rate": 2.361940734054246e-05, "loss": 1.2427, "step": 7050 }, { "epoch": 0.5188135203495057, "grad_norm": 1.0234375, "learning_rate": 2.361362462083008e-05, "loss": 0.8087, "step": 7051 }, { "epoch": 0.5188871004828697, "grad_norm": 0.8515625, "learning_rate": 2.3607841975521852e-05, "loss": 1.005, "step": 7052 }, { "epoch": 0.5189606806162336, "grad_norm": 0.9609375, "learning_rate": 2.3602059404928112e-05, "loss": 0.801, "step": 7053 }, { "epoch": 0.5190342607495976, "grad_norm": 0.94140625, "learning_rate": 2.35962769093592e-05, "loss": 1.3282, "step": 7054 }, { "epoch": 0.5191078408829616, "grad_norm": 0.69921875, "learning_rate": 2.359049448912546e-05, "loss": 0.7246, "step": 7055 }, { "epoch": 0.5191814210163256, "grad_norm": 1.0625, "learning_rate": 2.358471214453723e-05, "loss": 1.1222, "step": 7056 }, { "epoch": 0.5192550011496896, "grad_norm": 0.76953125, "learning_rate": 2.3578929875904812e-05, "loss": 0.9401, "step": 7057 }, { "epoch": 0.5193285812830536, "grad_norm": 0.77734375, "learning_rate": 2.3573147683538544e-05, "loss": 0.7846, "step": 7058 }, { "epoch": 0.5194021614164176, "grad_norm": 0.84765625, "learning_rate": 2.3567365567748744e-05, "loss": 0.6962, "step": 7059 }, { "epoch": 0.5194757415497816, "grad_norm": 0.8828125, "learning_rate": 2.3561583528845724e-05, "loss": 0.8994, "step": 7060 }, { "epoch": 0.5195493216831456, "grad_norm": 0.86328125, "learning_rate": 2.3555801567139816e-05, "loss": 0.9372, "step": 7061 }, { "epoch": 0.5196229018165095, "grad_norm": 0.8125, "learning_rate": 2.3550019682941295e-05, "loss": 1.2208, "step": 7062 }, { "epoch": 0.5196964819498735, "grad_norm": 0.83203125, "learning_rate": 2.3544237876560484e-05, "loss": 0.938, "step": 7063 }, { "epoch": 0.5197700620832375, "grad_norm": 1.34375, "learning_rate": 2.3538456148307674e-05, "loss": 1.3258, "step": 7064 }, { "epoch": 0.5198436422166015, "grad_norm": 1.109375, "learning_rate": 2.3532674498493172e-05, "loss": 1.2045, "step": 7065 }, { "epoch": 0.5199172223499655, "grad_norm": 0.97265625, "learning_rate": 2.3526892927427248e-05, "loss": 0.8768, "step": 7066 }, { "epoch": 0.5199908024833295, "grad_norm": 0.890625, "learning_rate": 2.3521111435420198e-05, "loss": 0.959, "step": 7067 }, { "epoch": 0.5200643826166935, "grad_norm": 0.84375, "learning_rate": 2.3515330022782307e-05, "loss": 0.958, "step": 7068 }, { "epoch": 0.5201379627500575, "grad_norm": 0.83984375, "learning_rate": 2.350954868982385e-05, "loss": 0.7206, "step": 7069 }, { "epoch": 0.5202115428834215, "grad_norm": 0.890625, "learning_rate": 2.3503767436855108e-05, "loss": 0.8993, "step": 7070 }, { "epoch": 0.5202851230167854, "grad_norm": 1.0078125, "learning_rate": 2.349798626418633e-05, "loss": 1.1485, "step": 7071 }, { "epoch": 0.5203587031501494, "grad_norm": 0.91796875, "learning_rate": 2.3492205172127797e-05, "loss": 0.8782, "step": 7072 }, { "epoch": 0.5204322832835134, "grad_norm": 0.99609375, "learning_rate": 2.3486424160989753e-05, "loss": 0.9922, "step": 7073 }, { "epoch": 0.5205058634168774, "grad_norm": 0.734375, "learning_rate": 2.3480643231082475e-05, "loss": 0.6067, "step": 7074 }, { "epoch": 0.5205794435502414, "grad_norm": 0.8203125, "learning_rate": 2.347486238271622e-05, "loss": 0.9247, "step": 7075 }, { "epoch": 0.5206530236836054, "grad_norm": 0.7421875, "learning_rate": 2.34690816162012e-05, "loss": 0.7634, "step": 7076 }, { "epoch": 0.5207266038169694, "grad_norm": 0.73828125, "learning_rate": 2.3463300931847684e-05, "loss": 0.9113, "step": 7077 }, { "epoch": 0.5208001839503335, "grad_norm": 0.94921875, "learning_rate": 2.34575203299659e-05, "loss": 0.9231, "step": 7078 }, { "epoch": 0.5208737640836975, "grad_norm": 0.77734375, "learning_rate": 2.34517398108661e-05, "loss": 0.4933, "step": 7079 }, { "epoch": 0.5209473442170613, "grad_norm": 0.84765625, "learning_rate": 2.344595937485849e-05, "loss": 0.8535, "step": 7080 }, { "epoch": 0.5210209243504254, "grad_norm": 0.96484375, "learning_rate": 2.3440179022253303e-05, "loss": 0.8081, "step": 7081 }, { "epoch": 0.5210945044837894, "grad_norm": 0.796875, "learning_rate": 2.3434398753360765e-05, "loss": 1.1482, "step": 7082 }, { "epoch": 0.5211680846171534, "grad_norm": 1.078125, "learning_rate": 2.3428618568491086e-05, "loss": 1.1784, "step": 7083 }, { "epoch": 0.5212416647505174, "grad_norm": 0.72265625, "learning_rate": 2.3422838467954495e-05, "loss": 0.8263, "step": 7084 }, { "epoch": 0.5213152448838814, "grad_norm": 0.87109375, "learning_rate": 2.341705845206117e-05, "loss": 0.8077, "step": 7085 }, { "epoch": 0.5213888250172454, "grad_norm": 1.1796875, "learning_rate": 2.3411278521121327e-05, "loss": 1.7572, "step": 7086 }, { "epoch": 0.5214624051506094, "grad_norm": 0.8203125, "learning_rate": 2.3405498675445166e-05, "loss": 0.8529, "step": 7087 }, { "epoch": 0.5215359852839734, "grad_norm": 0.8984375, "learning_rate": 2.3399718915342893e-05, "loss": 1.0166, "step": 7088 }, { "epoch": 0.5216095654173373, "grad_norm": 0.75390625, "learning_rate": 2.3393939241124672e-05, "loss": 0.741, "step": 7089 }, { "epoch": 0.5216831455507013, "grad_norm": 1.0703125, "learning_rate": 2.33881596531007e-05, "loss": 0.9169, "step": 7090 }, { "epoch": 0.5217567256840653, "grad_norm": 0.87109375, "learning_rate": 2.3382380151581154e-05, "loss": 0.9867, "step": 7091 }, { "epoch": 0.5218303058174293, "grad_norm": 0.8515625, "learning_rate": 2.337660073687621e-05, "loss": 1.0336, "step": 7092 }, { "epoch": 0.5219038859507933, "grad_norm": 0.734375, "learning_rate": 2.337082140929604e-05, "loss": 1.063, "step": 7093 }, { "epoch": 0.5219774660841573, "grad_norm": 1.125, "learning_rate": 2.3365042169150808e-05, "loss": 1.2778, "step": 7094 }, { "epoch": 0.5220510462175213, "grad_norm": 0.9453125, "learning_rate": 2.3359263016750673e-05, "loss": 0.8718, "step": 7095 }, { "epoch": 0.5221246263508853, "grad_norm": 0.78125, "learning_rate": 2.335348395240579e-05, "loss": 0.4805, "step": 7096 }, { "epoch": 0.5221982064842493, "grad_norm": 0.8203125, "learning_rate": 2.3347704976426328e-05, "loss": 0.8076, "step": 7097 }, { "epoch": 0.5222717866176132, "grad_norm": 0.83984375, "learning_rate": 2.334192608912241e-05, "loss": 0.7714, "step": 7098 }, { "epoch": 0.5223453667509772, "grad_norm": 0.90234375, "learning_rate": 2.333614729080418e-05, "loss": 0.9829, "step": 7099 }, { "epoch": 0.5224189468843412, "grad_norm": 0.8515625, "learning_rate": 2.3330368581781783e-05, "loss": 1.0025, "step": 7100 }, { "epoch": 0.5224925270177052, "grad_norm": 0.89453125, "learning_rate": 2.3324589962365357e-05, "loss": 0.8973, "step": 7101 }, { "epoch": 0.5225661071510692, "grad_norm": 0.984375, "learning_rate": 2.3318811432865032e-05, "loss": 1.0828, "step": 7102 }, { "epoch": 0.5226396872844332, "grad_norm": 0.9140625, "learning_rate": 2.3313032993590907e-05, "loss": 0.9739, "step": 7103 }, { "epoch": 0.5227132674177972, "grad_norm": 0.90234375, "learning_rate": 2.3307254644853122e-05, "loss": 0.9812, "step": 7104 }, { "epoch": 0.5227868475511612, "grad_norm": 0.859375, "learning_rate": 2.330147638696178e-05, "loss": 0.8089, "step": 7105 }, { "epoch": 0.5228604276845252, "grad_norm": 0.91015625, "learning_rate": 2.329569822022699e-05, "loss": 0.848, "step": 7106 }, { "epoch": 0.5229340078178891, "grad_norm": 1.1171875, "learning_rate": 2.3289920144958864e-05, "loss": 1.4171, "step": 7107 }, { "epoch": 0.5230075879512531, "grad_norm": 0.9765625, "learning_rate": 2.3284142161467493e-05, "loss": 0.9272, "step": 7108 }, { "epoch": 0.5230811680846171, "grad_norm": 0.8359375, "learning_rate": 2.3278364270062966e-05, "loss": 1.0096, "step": 7109 }, { "epoch": 0.5231547482179811, "grad_norm": 0.84765625, "learning_rate": 2.327258647105538e-05, "loss": 0.9844, "step": 7110 }, { "epoch": 0.5232283283513451, "grad_norm": 0.93359375, "learning_rate": 2.3266808764754824e-05, "loss": 1.5004, "step": 7111 }, { "epoch": 0.5233019084847091, "grad_norm": 0.9453125, "learning_rate": 2.326103115147136e-05, "loss": 0.9452, "step": 7112 }, { "epoch": 0.5233754886180731, "grad_norm": 0.8046875, "learning_rate": 2.3255253631515062e-05, "loss": 1.2341, "step": 7113 }, { "epoch": 0.5234490687514372, "grad_norm": 0.8359375, "learning_rate": 2.3249476205196014e-05, "loss": 0.9148, "step": 7114 }, { "epoch": 0.5235226488848012, "grad_norm": 0.90625, "learning_rate": 2.3243698872824267e-05, "loss": 1.3095, "step": 7115 }, { "epoch": 0.523596229018165, "grad_norm": 0.6875, "learning_rate": 2.32379216347099e-05, "loss": 0.7405, "step": 7116 }, { "epoch": 0.523669809151529, "grad_norm": 0.921875, "learning_rate": 2.3232144491162938e-05, "loss": 1.0501, "step": 7117 }, { "epoch": 0.5237433892848931, "grad_norm": 1.03125, "learning_rate": 2.3226367442493442e-05, "loss": 0.9641, "step": 7118 }, { "epoch": 0.5238169694182571, "grad_norm": 0.75, "learning_rate": 2.3220590489011455e-05, "loss": 0.7268, "step": 7119 }, { "epoch": 0.5238905495516211, "grad_norm": 0.90625, "learning_rate": 2.3214813631027025e-05, "loss": 1.2105, "step": 7120 }, { "epoch": 0.5239641296849851, "grad_norm": 0.80078125, "learning_rate": 2.3209036868850164e-05, "loss": 0.9473, "step": 7121 }, { "epoch": 0.5240377098183491, "grad_norm": 1.0625, "learning_rate": 2.3203260202790915e-05, "loss": 1.2614, "step": 7122 }, { "epoch": 0.5241112899517131, "grad_norm": 0.90234375, "learning_rate": 2.31974836331593e-05, "loss": 1.2449, "step": 7123 }, { "epoch": 0.5241848700850771, "grad_norm": 0.984375, "learning_rate": 2.319170716026533e-05, "loss": 1.1326, "step": 7124 }, { "epoch": 0.524258450218441, "grad_norm": 1.0625, "learning_rate": 2.318593078441903e-05, "loss": 1.1696, "step": 7125 }, { "epoch": 0.524332030351805, "grad_norm": 0.9765625, "learning_rate": 2.318015450593039e-05, "loss": 1.0247, "step": 7126 }, { "epoch": 0.524405610485169, "grad_norm": 0.8984375, "learning_rate": 2.317437832510942e-05, "loss": 0.9742, "step": 7127 }, { "epoch": 0.524479190618533, "grad_norm": 1.046875, "learning_rate": 2.316860224226612e-05, "loss": 0.8823, "step": 7128 }, { "epoch": 0.524552770751897, "grad_norm": 0.83203125, "learning_rate": 2.3162826257710488e-05, "loss": 0.9043, "step": 7129 }, { "epoch": 0.524626350885261, "grad_norm": 0.76953125, "learning_rate": 2.315705037175249e-05, "loss": 0.7274, "step": 7130 }, { "epoch": 0.524699931018625, "grad_norm": 0.6796875, "learning_rate": 2.315127458470212e-05, "loss": 0.5809, "step": 7131 }, { "epoch": 0.524773511151989, "grad_norm": 0.73046875, "learning_rate": 2.3145498896869345e-05, "loss": 1.0719, "step": 7132 }, { "epoch": 0.524847091285353, "grad_norm": 1.078125, "learning_rate": 2.3139723308564146e-05, "loss": 1.2744, "step": 7133 }, { "epoch": 0.5249206714187169, "grad_norm": 0.69921875, "learning_rate": 2.3133947820096487e-05, "loss": 0.7277, "step": 7134 }, { "epoch": 0.5249942515520809, "grad_norm": 0.80078125, "learning_rate": 2.3128172431776322e-05, "loss": 0.8872, "step": 7135 }, { "epoch": 0.5250678316854449, "grad_norm": 0.99609375, "learning_rate": 2.3122397143913604e-05, "loss": 1.0752, "step": 7136 }, { "epoch": 0.5251414118188089, "grad_norm": 0.8984375, "learning_rate": 2.311662195681829e-05, "loss": 0.7656, "step": 7137 }, { "epoch": 0.5252149919521729, "grad_norm": 0.97265625, "learning_rate": 2.3110846870800324e-05, "loss": 1.03, "step": 7138 }, { "epoch": 0.5252885720855369, "grad_norm": 0.84375, "learning_rate": 2.310507188616962e-05, "loss": 0.8522, "step": 7139 }, { "epoch": 0.5253621522189009, "grad_norm": 0.97265625, "learning_rate": 2.309929700323614e-05, "loss": 0.817, "step": 7140 }, { "epoch": 0.5254357323522649, "grad_norm": 0.9296875, "learning_rate": 2.30935222223098e-05, "loss": 1.248, "step": 7141 }, { "epoch": 0.5255093124856289, "grad_norm": 0.8359375, "learning_rate": 2.3087747543700516e-05, "loss": 0.8816, "step": 7142 }, { "epoch": 0.5255828926189928, "grad_norm": 0.8671875, "learning_rate": 2.3081972967718226e-05, "loss": 0.814, "step": 7143 }, { "epoch": 0.5256564727523568, "grad_norm": 0.93359375, "learning_rate": 2.307619849467281e-05, "loss": 0.8109, "step": 7144 }, { "epoch": 0.5257300528857208, "grad_norm": 0.85546875, "learning_rate": 2.307042412487419e-05, "loss": 0.9709, "step": 7145 }, { "epoch": 0.5258036330190848, "grad_norm": 0.73828125, "learning_rate": 2.306464985863226e-05, "loss": 0.5595, "step": 7146 }, { "epoch": 0.5258772131524488, "grad_norm": 0.75, "learning_rate": 2.3058875696256917e-05, "loss": 0.7491, "step": 7147 }, { "epoch": 0.5259507932858128, "grad_norm": 0.80078125, "learning_rate": 2.3053101638058055e-05, "loss": 1.3381, "step": 7148 }, { "epoch": 0.5260243734191768, "grad_norm": 0.765625, "learning_rate": 2.3047327684345548e-05, "loss": 0.8883, "step": 7149 }, { "epoch": 0.5260979535525409, "grad_norm": 0.8984375, "learning_rate": 2.304155383542927e-05, "loss": 1.0357, "step": 7150 }, { "epoch": 0.5261715336859049, "grad_norm": 1.0546875, "learning_rate": 2.3035780091619104e-05, "loss": 1.2783, "step": 7151 }, { "epoch": 0.5262451138192688, "grad_norm": 0.71875, "learning_rate": 2.303000645322492e-05, "loss": 0.7235, "step": 7152 }, { "epoch": 0.5263186939526328, "grad_norm": 0.7734375, "learning_rate": 2.3024232920556555e-05, "loss": 1.0723, "step": 7153 }, { "epoch": 0.5263922740859968, "grad_norm": 0.9140625, "learning_rate": 2.301845949392388e-05, "loss": 0.7592, "step": 7154 }, { "epoch": 0.5264658542193608, "grad_norm": 0.984375, "learning_rate": 2.3012686173636742e-05, "loss": 0.8355, "step": 7155 }, { "epoch": 0.5265394343527248, "grad_norm": 1.0859375, "learning_rate": 2.3006912960004985e-05, "loss": 1.0347, "step": 7156 }, { "epoch": 0.5266130144860888, "grad_norm": 0.8984375, "learning_rate": 2.3001139853338453e-05, "loss": 1.2346, "step": 7157 }, { "epoch": 0.5266865946194528, "grad_norm": 0.7109375, "learning_rate": 2.299536685394696e-05, "loss": 0.8342, "step": 7158 }, { "epoch": 0.5267601747528168, "grad_norm": 0.94921875, "learning_rate": 2.298959396214034e-05, "loss": 1.0139, "step": 7159 }, { "epoch": 0.5268337548861808, "grad_norm": 0.8984375, "learning_rate": 2.2983821178228416e-05, "loss": 0.7412, "step": 7160 }, { "epoch": 0.5269073350195447, "grad_norm": 0.9296875, "learning_rate": 2.297804850252101e-05, "loss": 1.248, "step": 7161 }, { "epoch": 0.5269809151529087, "grad_norm": 0.8203125, "learning_rate": 2.2972275935327914e-05, "loss": 0.7006, "step": 7162 }, { "epoch": 0.5270544952862727, "grad_norm": 0.85546875, "learning_rate": 2.296650347695894e-05, "loss": 0.9661, "step": 7163 }, { "epoch": 0.5271280754196367, "grad_norm": 0.890625, "learning_rate": 2.2960731127723885e-05, "loss": 0.8884, "step": 7164 }, { "epoch": 0.5272016555530007, "grad_norm": 0.890625, "learning_rate": 2.2954958887932534e-05, "loss": 1.1262, "step": 7165 }, { "epoch": 0.5272752356863647, "grad_norm": 0.859375, "learning_rate": 2.2949186757894685e-05, "loss": 0.7143, "step": 7166 }, { "epoch": 0.5273488158197287, "grad_norm": 0.7734375, "learning_rate": 2.29434147379201e-05, "loss": 0.6592, "step": 7167 }, { "epoch": 0.5274223959530927, "grad_norm": 0.96875, "learning_rate": 2.2937642828318568e-05, "loss": 1.0319, "step": 7168 }, { "epoch": 0.5274959760864567, "grad_norm": 0.8671875, "learning_rate": 2.293187102939985e-05, "loss": 1.0607, "step": 7169 }, { "epoch": 0.5275695562198206, "grad_norm": 0.87890625, "learning_rate": 2.2926099341473714e-05, "loss": 1.3403, "step": 7170 }, { "epoch": 0.5276431363531846, "grad_norm": 0.79296875, "learning_rate": 2.29203277648499e-05, "loss": 0.7048, "step": 7171 }, { "epoch": 0.5277167164865486, "grad_norm": 0.9765625, "learning_rate": 2.2914556299838166e-05, "loss": 1.1061, "step": 7172 }, { "epoch": 0.5277902966199126, "grad_norm": 0.68359375, "learning_rate": 2.290878494674826e-05, "loss": 0.6776, "step": 7173 }, { "epoch": 0.5278638767532766, "grad_norm": 0.80859375, "learning_rate": 2.2903013705889916e-05, "loss": 0.7855, "step": 7174 }, { "epoch": 0.5279374568866406, "grad_norm": 0.7734375, "learning_rate": 2.2897242577572868e-05, "loss": 0.9804, "step": 7175 }, { "epoch": 0.5280110370200046, "grad_norm": 0.9296875, "learning_rate": 2.2891471562106832e-05, "loss": 1.1, "step": 7176 }, { "epoch": 0.5280846171533686, "grad_norm": 0.95703125, "learning_rate": 2.288570065980154e-05, "loss": 1.1266, "step": 7177 }, { "epoch": 0.5281581972867326, "grad_norm": 0.796875, "learning_rate": 2.28799298709667e-05, "loss": 0.9623, "step": 7178 }, { "epoch": 0.5282317774200965, "grad_norm": 0.75390625, "learning_rate": 2.2874159195912015e-05, "loss": 0.8287, "step": 7179 }, { "epoch": 0.5283053575534605, "grad_norm": 0.8359375, "learning_rate": 2.28683886349472e-05, "loss": 0.9611, "step": 7180 }, { "epoch": 0.5283789376868245, "grad_norm": 0.7265625, "learning_rate": 2.2862618188381936e-05, "loss": 0.9517, "step": 7181 }, { "epoch": 0.5284525178201885, "grad_norm": 0.86328125, "learning_rate": 2.2856847856525916e-05, "loss": 0.797, "step": 7182 }, { "epoch": 0.5285260979535525, "grad_norm": 0.84375, "learning_rate": 2.285107763968882e-05, "loss": 1.0026, "step": 7183 }, { "epoch": 0.5285996780869165, "grad_norm": 0.9921875, "learning_rate": 2.2845307538180342e-05, "loss": 0.999, "step": 7184 }, { "epoch": 0.5286732582202806, "grad_norm": 0.8125, "learning_rate": 2.283953755231013e-05, "loss": 1.0142, "step": 7185 }, { "epoch": 0.5287468383536446, "grad_norm": 1.0078125, "learning_rate": 2.2833767682387856e-05, "loss": 0.991, "step": 7186 }, { "epoch": 0.5288204184870086, "grad_norm": 0.76171875, "learning_rate": 2.2827997928723177e-05, "loss": 0.8853, "step": 7187 }, { "epoch": 0.5288939986203725, "grad_norm": 1.015625, "learning_rate": 2.2822228291625746e-05, "loss": 1.3182, "step": 7188 }, { "epoch": 0.5289675787537365, "grad_norm": 0.6640625, "learning_rate": 2.281645877140522e-05, "loss": 0.722, "step": 7189 }, { "epoch": 0.5290411588871005, "grad_norm": 0.828125, "learning_rate": 2.281068936837122e-05, "loss": 0.8684, "step": 7190 }, { "epoch": 0.5291147390204645, "grad_norm": 0.8828125, "learning_rate": 2.2804920082833385e-05, "loss": 0.7389, "step": 7191 }, { "epoch": 0.5291883191538285, "grad_norm": 0.87109375, "learning_rate": 2.279915091510134e-05, "loss": 1.354, "step": 7192 }, { "epoch": 0.5292618992871925, "grad_norm": 0.796875, "learning_rate": 2.279338186548472e-05, "loss": 0.796, "step": 7193 }, { "epoch": 0.5293354794205565, "grad_norm": 0.7578125, "learning_rate": 2.278761293429312e-05, "loss": 0.7321, "step": 7194 }, { "epoch": 0.5294090595539205, "grad_norm": 0.828125, "learning_rate": 2.2781844121836155e-05, "loss": 0.8823, "step": 7195 }, { "epoch": 0.5294826396872845, "grad_norm": 0.8046875, "learning_rate": 2.2776075428423426e-05, "loss": 0.7476, "step": 7196 }, { "epoch": 0.5295562198206484, "grad_norm": 0.9453125, "learning_rate": 2.277030685436453e-05, "loss": 0.8428, "step": 7197 }, { "epoch": 0.5296297999540124, "grad_norm": 0.79296875, "learning_rate": 2.2764538399969065e-05, "loss": 0.8999, "step": 7198 }, { "epoch": 0.5297033800873764, "grad_norm": 0.78515625, "learning_rate": 2.275877006554659e-05, "loss": 0.8448, "step": 7199 }, { "epoch": 0.5297769602207404, "grad_norm": 0.84765625, "learning_rate": 2.275300185140669e-05, "loss": 0.9239, "step": 7200 }, { "epoch": 0.5298505403541044, "grad_norm": 0.91796875, "learning_rate": 2.274723375785894e-05, "loss": 0.9951, "step": 7201 }, { "epoch": 0.5299241204874684, "grad_norm": 0.9609375, "learning_rate": 2.2741465785212905e-05, "loss": 0.832, "step": 7202 }, { "epoch": 0.5299977006208324, "grad_norm": 0.82421875, "learning_rate": 2.273569793377813e-05, "loss": 0.6314, "step": 7203 }, { "epoch": 0.5300712807541964, "grad_norm": 0.78515625, "learning_rate": 2.2729930203864167e-05, "loss": 0.9671, "step": 7204 }, { "epoch": 0.5301448608875604, "grad_norm": 0.8046875, "learning_rate": 2.2724162595780564e-05, "loss": 0.9649, "step": 7205 }, { "epoch": 0.5302184410209243, "grad_norm": 0.8125, "learning_rate": 2.271839510983686e-05, "loss": 0.7653, "step": 7206 }, { "epoch": 0.5302920211542883, "grad_norm": 0.7890625, "learning_rate": 2.271262774634258e-05, "loss": 0.6205, "step": 7207 }, { "epoch": 0.5303656012876523, "grad_norm": 0.93359375, "learning_rate": 2.2706860505607246e-05, "loss": 1.0973, "step": 7208 }, { "epoch": 0.5304391814210163, "grad_norm": 0.73046875, "learning_rate": 2.2701093387940378e-05, "loss": 0.7639, "step": 7209 }, { "epoch": 0.5305127615543803, "grad_norm": 0.859375, "learning_rate": 2.2695326393651485e-05, "loss": 0.7974, "step": 7210 }, { "epoch": 0.5305863416877443, "grad_norm": 0.9296875, "learning_rate": 2.2689559523050073e-05, "loss": 1.028, "step": 7211 }, { "epoch": 0.5306599218211083, "grad_norm": 0.79296875, "learning_rate": 2.268379277644565e-05, "loss": 0.7399, "step": 7212 }, { "epoch": 0.5307335019544723, "grad_norm": 1.015625, "learning_rate": 2.267802615414768e-05, "loss": 1.0821, "step": 7213 }, { "epoch": 0.5308070820878363, "grad_norm": 1.1953125, "learning_rate": 2.267225965646566e-05, "loss": 1.3199, "step": 7214 }, { "epoch": 0.5308806622212002, "grad_norm": 1.296875, "learning_rate": 2.266649328370907e-05, "loss": 1.1325, "step": 7215 }, { "epoch": 0.5309542423545642, "grad_norm": 0.9453125, "learning_rate": 2.2660727036187384e-05, "loss": 1.1914, "step": 7216 }, { "epoch": 0.5310278224879282, "grad_norm": 0.94140625, "learning_rate": 2.2654960914210053e-05, "loss": 1.0181, "step": 7217 }, { "epoch": 0.5311014026212922, "grad_norm": 0.69140625, "learning_rate": 2.264919491808654e-05, "loss": 0.9841, "step": 7218 }, { "epoch": 0.5311749827546562, "grad_norm": 0.6171875, "learning_rate": 2.2643429048126298e-05, "loss": 0.6514, "step": 7219 }, { "epoch": 0.5312485628880202, "grad_norm": 1.2578125, "learning_rate": 2.2637663304638764e-05, "loss": 1.4098, "step": 7220 }, { "epoch": 0.5313221430213843, "grad_norm": 0.77734375, "learning_rate": 2.2631897687933388e-05, "loss": 1.1333, "step": 7221 }, { "epoch": 0.5313957231547483, "grad_norm": 0.890625, "learning_rate": 2.2626132198319582e-05, "loss": 1.2393, "step": 7222 }, { "epoch": 0.5314693032881123, "grad_norm": 0.7578125, "learning_rate": 2.262036683610678e-05, "loss": 0.8314, "step": 7223 }, { "epoch": 0.5315428834214762, "grad_norm": 0.7578125, "learning_rate": 2.2614601601604393e-05, "loss": 0.611, "step": 7224 }, { "epoch": 0.5316164635548402, "grad_norm": 0.75390625, "learning_rate": 2.2608836495121845e-05, "loss": 0.7074, "step": 7225 }, { "epoch": 0.5316900436882042, "grad_norm": 0.7578125, "learning_rate": 2.2603071516968515e-05, "loss": 1.0695, "step": 7226 }, { "epoch": 0.5317636238215682, "grad_norm": 0.84375, "learning_rate": 2.259730666745381e-05, "loss": 0.7008, "step": 7227 }, { "epoch": 0.5318372039549322, "grad_norm": 0.84765625, "learning_rate": 2.2591541946887118e-05, "loss": 0.6894, "step": 7228 }, { "epoch": 0.5319107840882962, "grad_norm": 0.7890625, "learning_rate": 2.2585777355577814e-05, "loss": 0.7755, "step": 7229 }, { "epoch": 0.5319843642216602, "grad_norm": 0.8125, "learning_rate": 2.25800128938353e-05, "loss": 0.986, "step": 7230 }, { "epoch": 0.5320579443550242, "grad_norm": 0.76953125, "learning_rate": 2.2574248561968904e-05, "loss": 0.8011, "step": 7231 }, { "epoch": 0.5321315244883882, "grad_norm": 0.875, "learning_rate": 2.2568484360288014e-05, "loss": 1.0675, "step": 7232 }, { "epoch": 0.5322051046217521, "grad_norm": 0.765625, "learning_rate": 2.2562720289101975e-05, "loss": 0.7311, "step": 7233 }, { "epoch": 0.5322786847551161, "grad_norm": 0.62890625, "learning_rate": 2.2556956348720138e-05, "loss": 0.6406, "step": 7234 }, { "epoch": 0.5323522648884801, "grad_norm": 0.796875, "learning_rate": 2.255119253945183e-05, "loss": 0.6657, "step": 7235 }, { "epoch": 0.5324258450218441, "grad_norm": 0.7734375, "learning_rate": 2.25454288616064e-05, "loss": 0.8719, "step": 7236 }, { "epoch": 0.5324994251552081, "grad_norm": 0.76953125, "learning_rate": 2.2539665315493164e-05, "loss": 0.777, "step": 7237 }, { "epoch": 0.5325730052885721, "grad_norm": 0.87890625, "learning_rate": 2.2533901901421445e-05, "loss": 1.1499, "step": 7238 }, { "epoch": 0.5326465854219361, "grad_norm": 0.98828125, "learning_rate": 2.2528138619700562e-05, "loss": 0.8541, "step": 7239 }, { "epoch": 0.5327201655553001, "grad_norm": 1.125, "learning_rate": 2.25223754706398e-05, "loss": 0.9575, "step": 7240 }, { "epoch": 0.5327937456886641, "grad_norm": 0.76171875, "learning_rate": 2.2516612454548468e-05, "loss": 0.8939, "step": 7241 }, { "epoch": 0.532867325822028, "grad_norm": 0.71484375, "learning_rate": 2.2510849571735857e-05, "loss": 0.7688, "step": 7242 }, { "epoch": 0.532940905955392, "grad_norm": 0.92578125, "learning_rate": 2.250508682251125e-05, "loss": 0.7608, "step": 7243 }, { "epoch": 0.533014486088756, "grad_norm": 0.96484375, "learning_rate": 2.249932420718392e-05, "loss": 1.3086, "step": 7244 }, { "epoch": 0.53308806622212, "grad_norm": 0.76171875, "learning_rate": 2.2493561726063135e-05, "loss": 0.7568, "step": 7245 }, { "epoch": 0.533161646355484, "grad_norm": 0.8828125, "learning_rate": 2.2487799379458156e-05, "loss": 0.693, "step": 7246 }, { "epoch": 0.533235226488848, "grad_norm": 0.80078125, "learning_rate": 2.248203716767824e-05, "loss": 0.8562, "step": 7247 }, { "epoch": 0.533308806622212, "grad_norm": 0.86328125, "learning_rate": 2.247627509103264e-05, "loss": 0.9862, "step": 7248 }, { "epoch": 0.533382386755576, "grad_norm": 0.77734375, "learning_rate": 2.247051314983058e-05, "loss": 0.9593, "step": 7249 }, { "epoch": 0.53345596688894, "grad_norm": 0.73046875, "learning_rate": 2.2464751344381307e-05, "loss": 1.0049, "step": 7250 }, { "epoch": 0.5335295470223039, "grad_norm": 0.859375, "learning_rate": 2.245898967499404e-05, "loss": 1.0317, "step": 7251 }, { "epoch": 0.5336031271556679, "grad_norm": 0.73046875, "learning_rate": 2.2453228141977996e-05, "loss": 0.8445, "step": 7252 }, { "epoch": 0.5336767072890319, "grad_norm": 0.92578125, "learning_rate": 2.2447466745642397e-05, "loss": 1.3378, "step": 7253 }, { "epoch": 0.5337502874223959, "grad_norm": 0.76171875, "learning_rate": 2.2441705486296427e-05, "loss": 0.7162, "step": 7254 }, { "epoch": 0.53382386755576, "grad_norm": 0.9140625, "learning_rate": 2.2435944364249294e-05, "loss": 1.0402, "step": 7255 }, { "epoch": 0.533897447689124, "grad_norm": 0.8046875, "learning_rate": 2.2430183379810178e-05, "loss": 1.0057, "step": 7256 }, { "epoch": 0.533971027822488, "grad_norm": 0.875, "learning_rate": 2.242442253328828e-05, "loss": 1.0033, "step": 7257 }, { "epoch": 0.534044607955852, "grad_norm": 0.78515625, "learning_rate": 2.241866182499275e-05, "loss": 0.6219, "step": 7258 }, { "epoch": 0.534118188089216, "grad_norm": 1.0078125, "learning_rate": 2.2412901255232765e-05, "loss": 1.0824, "step": 7259 }, { "epoch": 0.5341917682225799, "grad_norm": 0.69921875, "learning_rate": 2.2407140824317486e-05, "loss": 0.7379, "step": 7260 }, { "epoch": 0.5342653483559439, "grad_norm": 0.7421875, "learning_rate": 2.240138053255606e-05, "loss": 0.7896, "step": 7261 }, { "epoch": 0.5343389284893079, "grad_norm": 0.92578125, "learning_rate": 2.2395620380257638e-05, "loss": 1.1455, "step": 7262 }, { "epoch": 0.5344125086226719, "grad_norm": 1.0234375, "learning_rate": 2.2389860367731346e-05, "loss": 1.0093, "step": 7263 }, { "epoch": 0.5344860887560359, "grad_norm": 0.984375, "learning_rate": 2.2384100495286315e-05, "loss": 1.1271, "step": 7264 }, { "epoch": 0.5345596688893999, "grad_norm": 0.97265625, "learning_rate": 2.2378340763231673e-05, "loss": 1.311, "step": 7265 }, { "epoch": 0.5346332490227639, "grad_norm": 0.828125, "learning_rate": 2.237258117187654e-05, "loss": 1.0288, "step": 7266 }, { "epoch": 0.5347068291561279, "grad_norm": 1.0859375, "learning_rate": 2.2366821721530005e-05, "loss": 1.013, "step": 7267 }, { "epoch": 0.5347804092894919, "grad_norm": 1.015625, "learning_rate": 2.236106241250117e-05, "loss": 1.0508, "step": 7268 }, { "epoch": 0.5348539894228558, "grad_norm": 1.015625, "learning_rate": 2.2355303245099128e-05, "loss": 1.031, "step": 7269 }, { "epoch": 0.5349275695562198, "grad_norm": 1.1796875, "learning_rate": 2.2349544219632974e-05, "loss": 1.4136, "step": 7270 }, { "epoch": 0.5350011496895838, "grad_norm": 0.83203125, "learning_rate": 2.2343785336411783e-05, "loss": 0.6587, "step": 7271 }, { "epoch": 0.5350747298229478, "grad_norm": 0.79296875, "learning_rate": 2.2338026595744604e-05, "loss": 0.6274, "step": 7272 }, { "epoch": 0.5351483099563118, "grad_norm": 0.90625, "learning_rate": 2.2332267997940516e-05, "loss": 1.0917, "step": 7273 }, { "epoch": 0.5352218900896758, "grad_norm": 0.91796875, "learning_rate": 2.232650954330856e-05, "loss": 1.4517, "step": 7274 }, { "epoch": 0.5352954702230398, "grad_norm": 0.76953125, "learning_rate": 2.2320751232157793e-05, "loss": 0.8355, "step": 7275 }, { "epoch": 0.5353690503564038, "grad_norm": 0.9453125, "learning_rate": 2.2314993064797244e-05, "loss": 1.3266, "step": 7276 }, { "epoch": 0.5354426304897678, "grad_norm": 1.171875, "learning_rate": 2.2309235041535947e-05, "loss": 1.5163, "step": 7277 }, { "epoch": 0.5355162106231317, "grad_norm": 0.8984375, "learning_rate": 2.230347716268292e-05, "loss": 1.0089, "step": 7278 }, { "epoch": 0.5355897907564957, "grad_norm": 0.98046875, "learning_rate": 2.2297719428547186e-05, "loss": 1.3617, "step": 7279 }, { "epoch": 0.5356633708898597, "grad_norm": 0.828125, "learning_rate": 2.229196183943775e-05, "loss": 1.0673, "step": 7280 }, { "epoch": 0.5357369510232237, "grad_norm": 0.73828125, "learning_rate": 2.2286204395663602e-05, "loss": 0.8204, "step": 7281 }, { "epoch": 0.5358105311565877, "grad_norm": 0.91015625, "learning_rate": 2.228044709753373e-05, "loss": 1.0077, "step": 7282 }, { "epoch": 0.5358841112899517, "grad_norm": 1.0546875, "learning_rate": 2.2274689945357133e-05, "loss": 0.9992, "step": 7283 }, { "epoch": 0.5359576914233157, "grad_norm": 1.1171875, "learning_rate": 2.226893293944278e-05, "loss": 1.1013, "step": 7284 }, { "epoch": 0.5360312715566797, "grad_norm": 0.85546875, "learning_rate": 2.2263176080099645e-05, "loss": 1.0745, "step": 7285 }, { "epoch": 0.5361048516900437, "grad_norm": 0.79296875, "learning_rate": 2.2257419367636673e-05, "loss": 0.9617, "step": 7286 }, { "epoch": 0.5361784318234076, "grad_norm": 0.92578125, "learning_rate": 2.225166280236282e-05, "loss": 0.7743, "step": 7287 }, { "epoch": 0.5362520119567716, "grad_norm": 0.8828125, "learning_rate": 2.2245906384587036e-05, "loss": 1.2785, "step": 7288 }, { "epoch": 0.5363255920901356, "grad_norm": 0.78125, "learning_rate": 2.224015011461826e-05, "loss": 0.8162, "step": 7289 }, { "epoch": 0.5363991722234996, "grad_norm": 0.78515625, "learning_rate": 2.223439399276541e-05, "loss": 0.7667, "step": 7290 }, { "epoch": 0.5364727523568636, "grad_norm": 1.15625, "learning_rate": 2.222863801933741e-05, "loss": 1.3415, "step": 7291 }, { "epoch": 0.5365463324902277, "grad_norm": 0.96875, "learning_rate": 2.2222882194643175e-05, "loss": 0.9342, "step": 7292 }, { "epoch": 0.5366199126235917, "grad_norm": 0.80078125, "learning_rate": 2.2217126518991604e-05, "loss": 0.8758, "step": 7293 }, { "epoch": 0.5366934927569557, "grad_norm": 0.98046875, "learning_rate": 2.221137099269161e-05, "loss": 0.9747, "step": 7294 }, { "epoch": 0.5367670728903197, "grad_norm": 0.91015625, "learning_rate": 2.2205615616052057e-05, "loss": 0.8359, "step": 7295 }, { "epoch": 0.5368406530236836, "grad_norm": 0.94921875, "learning_rate": 2.219986038938183e-05, "loss": 1.2206, "step": 7296 }, { "epoch": 0.5369142331570476, "grad_norm": 0.61328125, "learning_rate": 2.2194105312989813e-05, "loss": 0.6821, "step": 7297 }, { "epoch": 0.5369878132904116, "grad_norm": 0.921875, "learning_rate": 2.218835038718487e-05, "loss": 0.8949, "step": 7298 }, { "epoch": 0.5370613934237756, "grad_norm": 0.87890625, "learning_rate": 2.2182595612275848e-05, "loss": 0.7176, "step": 7299 }, { "epoch": 0.5371349735571396, "grad_norm": 0.86328125, "learning_rate": 2.2176840988571594e-05, "loss": 1.0669, "step": 7300 }, { "epoch": 0.5372085536905036, "grad_norm": 0.9921875, "learning_rate": 2.2171086516380955e-05, "loss": 0.9216, "step": 7301 }, { "epoch": 0.5372821338238676, "grad_norm": 0.7578125, "learning_rate": 2.216533219601276e-05, "loss": 0.6807, "step": 7302 }, { "epoch": 0.5373557139572316, "grad_norm": 0.8671875, "learning_rate": 2.2159578027775833e-05, "loss": 0.9525, "step": 7303 }, { "epoch": 0.5374292940905956, "grad_norm": 0.83984375, "learning_rate": 2.2153824011978984e-05, "loss": 0.631, "step": 7304 }, { "epoch": 0.5375028742239595, "grad_norm": 0.921875, "learning_rate": 2.214807014893103e-05, "loss": 1.2324, "step": 7305 }, { "epoch": 0.5375764543573235, "grad_norm": 1.046875, "learning_rate": 2.2142316438940757e-05, "loss": 1.3342, "step": 7306 }, { "epoch": 0.5376500344906875, "grad_norm": 0.8359375, "learning_rate": 2.213656288231698e-05, "loss": 1.0739, "step": 7307 }, { "epoch": 0.5377236146240515, "grad_norm": 0.96484375, "learning_rate": 2.213080947936845e-05, "loss": 0.8572, "step": 7308 }, { "epoch": 0.5377971947574155, "grad_norm": 0.921875, "learning_rate": 2.212505623040395e-05, "loss": 0.905, "step": 7309 }, { "epoch": 0.5378707748907795, "grad_norm": 0.75, "learning_rate": 2.211930313573226e-05, "loss": 0.7273, "step": 7310 }, { "epoch": 0.5379443550241435, "grad_norm": 0.83984375, "learning_rate": 2.2113550195662132e-05, "loss": 0.9139, "step": 7311 }, { "epoch": 0.5380179351575075, "grad_norm": 0.72265625, "learning_rate": 2.2107797410502324e-05, "loss": 1.1933, "step": 7312 }, { "epoch": 0.5380915152908715, "grad_norm": 0.92578125, "learning_rate": 2.2102044780561552e-05, "loss": 0.8918, "step": 7313 }, { "epoch": 0.5381650954242354, "grad_norm": 0.83203125, "learning_rate": 2.2096292306148565e-05, "loss": 0.9323, "step": 7314 }, { "epoch": 0.5382386755575994, "grad_norm": 0.73828125, "learning_rate": 2.2090539987572083e-05, "loss": 0.8173, "step": 7315 }, { "epoch": 0.5383122556909634, "grad_norm": 0.8671875, "learning_rate": 2.208478782514083e-05, "loss": 1.0322, "step": 7316 }, { "epoch": 0.5383858358243274, "grad_norm": 0.890625, "learning_rate": 2.207903581916351e-05, "loss": 0.5873, "step": 7317 }, { "epoch": 0.5384594159576914, "grad_norm": 0.83203125, "learning_rate": 2.2073283969948816e-05, "loss": 1.1847, "step": 7318 }, { "epoch": 0.5385329960910554, "grad_norm": 0.91015625, "learning_rate": 2.206753227780544e-05, "loss": 1.084, "step": 7319 }, { "epoch": 0.5386065762244194, "grad_norm": 0.89453125, "learning_rate": 2.2061780743042073e-05, "loss": 1.1231, "step": 7320 }, { "epoch": 0.5386801563577834, "grad_norm": 1.0546875, "learning_rate": 2.205602936596739e-05, "loss": 0.9485, "step": 7321 }, { "epoch": 0.5387537364911474, "grad_norm": 0.76171875, "learning_rate": 2.2050278146890042e-05, "loss": 0.7016, "step": 7322 }, { "epoch": 0.5388273166245113, "grad_norm": 0.859375, "learning_rate": 2.2044527086118693e-05, "loss": 1.0751, "step": 7323 }, { "epoch": 0.5389008967578753, "grad_norm": 1.0390625, "learning_rate": 2.2038776183961998e-05, "loss": 0.8132, "step": 7324 }, { "epoch": 0.5389744768912393, "grad_norm": 0.71484375, "learning_rate": 2.203302544072859e-05, "loss": 0.8207, "step": 7325 }, { "epoch": 0.5390480570246033, "grad_norm": 0.87109375, "learning_rate": 2.2027274856727115e-05, "loss": 1.4968, "step": 7326 }, { "epoch": 0.5391216371579673, "grad_norm": 1.171875, "learning_rate": 2.2021524432266173e-05, "loss": 1.3769, "step": 7327 }, { "epoch": 0.5391952172913314, "grad_norm": 0.8359375, "learning_rate": 2.2015774167654386e-05, "loss": 1.0462, "step": 7328 }, { "epoch": 0.5392687974246954, "grad_norm": 0.61328125, "learning_rate": 2.2010024063200364e-05, "loss": 0.5404, "step": 7329 }, { "epoch": 0.5393423775580594, "grad_norm": 0.9453125, "learning_rate": 2.200427411921271e-05, "loss": 1.2398, "step": 7330 }, { "epoch": 0.5394159576914234, "grad_norm": 1.1484375, "learning_rate": 2.1998524335999998e-05, "loss": 1.2768, "step": 7331 }, { "epoch": 0.5394895378247873, "grad_norm": 0.8671875, "learning_rate": 2.199277471387082e-05, "loss": 0.8722, "step": 7332 }, { "epoch": 0.5395631179581513, "grad_norm": 0.7890625, "learning_rate": 2.1987025253133743e-05, "loss": 1.1583, "step": 7333 }, { "epoch": 0.5396366980915153, "grad_norm": 0.89453125, "learning_rate": 2.1981275954097323e-05, "loss": 0.8982, "step": 7334 }, { "epoch": 0.5397102782248793, "grad_norm": 0.83203125, "learning_rate": 2.1975526817070137e-05, "loss": 1.0524, "step": 7335 }, { "epoch": 0.5397838583582433, "grad_norm": 1.0625, "learning_rate": 2.19697778423607e-05, "loss": 0.9947, "step": 7336 }, { "epoch": 0.5398574384916073, "grad_norm": 0.79296875, "learning_rate": 2.1964029030277567e-05, "loss": 1.2866, "step": 7337 }, { "epoch": 0.5399310186249713, "grad_norm": 0.68359375, "learning_rate": 2.1958280381129263e-05, "loss": 0.707, "step": 7338 }, { "epoch": 0.5400045987583353, "grad_norm": 0.671875, "learning_rate": 2.1952531895224313e-05, "loss": 0.5779, "step": 7339 }, { "epoch": 0.5400781788916993, "grad_norm": 0.78515625, "learning_rate": 2.194678357287121e-05, "loss": 0.7908, "step": 7340 }, { "epoch": 0.5401517590250632, "grad_norm": 0.86328125, "learning_rate": 2.194103541437847e-05, "loss": 1.3806, "step": 7341 }, { "epoch": 0.5402253391584272, "grad_norm": 0.703125, "learning_rate": 2.193528742005458e-05, "loss": 0.8333, "step": 7342 }, { "epoch": 0.5402989192917912, "grad_norm": 0.9609375, "learning_rate": 2.192953959020802e-05, "loss": 1.1764, "step": 7343 }, { "epoch": 0.5403724994251552, "grad_norm": 0.8046875, "learning_rate": 2.1923791925147285e-05, "loss": 0.8591, "step": 7344 }, { "epoch": 0.5404460795585192, "grad_norm": 0.79296875, "learning_rate": 2.191804442518082e-05, "loss": 0.6764, "step": 7345 }, { "epoch": 0.5405196596918832, "grad_norm": 0.6484375, "learning_rate": 2.1912297090617084e-05, "loss": 0.5337, "step": 7346 }, { "epoch": 0.5405932398252472, "grad_norm": 0.8671875, "learning_rate": 2.1906549921764535e-05, "loss": 0.9451, "step": 7347 }, { "epoch": 0.5406668199586112, "grad_norm": 0.62109375, "learning_rate": 2.190080291893161e-05, "loss": 0.6899, "step": 7348 }, { "epoch": 0.5407404000919752, "grad_norm": 0.84765625, "learning_rate": 2.1895056082426744e-05, "loss": 0.7317, "step": 7349 }, { "epoch": 0.5408139802253391, "grad_norm": 0.93359375, "learning_rate": 2.1889309412558346e-05, "loss": 0.7372, "step": 7350 }, { "epoch": 0.5408875603587031, "grad_norm": 0.7421875, "learning_rate": 2.188356290963484e-05, "loss": 1.036, "step": 7351 }, { "epoch": 0.5409611404920671, "grad_norm": 0.91015625, "learning_rate": 2.1877816573964626e-05, "loss": 0.9117, "step": 7352 }, { "epoch": 0.5410347206254311, "grad_norm": 1.0234375, "learning_rate": 2.1872070405856106e-05, "loss": 0.9015, "step": 7353 }, { "epoch": 0.5411083007587951, "grad_norm": 0.953125, "learning_rate": 2.186632440561765e-05, "loss": 0.8015, "step": 7354 }, { "epoch": 0.5411818808921591, "grad_norm": 1.125, "learning_rate": 2.186057857355765e-05, "loss": 1.2927, "step": 7355 }, { "epoch": 0.5412554610255231, "grad_norm": 0.66796875, "learning_rate": 2.1854832909984463e-05, "loss": 0.5719, "step": 7356 }, { "epoch": 0.5413290411588871, "grad_norm": 0.8359375, "learning_rate": 2.1849087415206455e-05, "loss": 1.0546, "step": 7357 }, { "epoch": 0.5414026212922511, "grad_norm": 0.76171875, "learning_rate": 2.184334208953198e-05, "loss": 0.7692, "step": 7358 }, { "epoch": 0.541476201425615, "grad_norm": 0.7890625, "learning_rate": 2.1837596933269366e-05, "loss": 1.1095, "step": 7359 }, { "epoch": 0.541549781558979, "grad_norm": 0.63671875, "learning_rate": 2.1831851946726953e-05, "loss": 0.5641, "step": 7360 }, { "epoch": 0.541623361692343, "grad_norm": 0.875, "learning_rate": 2.182610713021306e-05, "loss": 1.0079, "step": 7361 }, { "epoch": 0.541696941825707, "grad_norm": 0.89453125, "learning_rate": 2.182036248403601e-05, "loss": 0.9787, "step": 7362 }, { "epoch": 0.541770521959071, "grad_norm": 0.76171875, "learning_rate": 2.1814618008504094e-05, "loss": 0.9466, "step": 7363 }, { "epoch": 0.541844102092435, "grad_norm": 0.8359375, "learning_rate": 2.1808873703925616e-05, "loss": 0.6536, "step": 7364 }, { "epoch": 0.5419176822257991, "grad_norm": 0.73046875, "learning_rate": 2.1803129570608858e-05, "loss": 0.8832, "step": 7365 }, { "epoch": 0.5419912623591631, "grad_norm": 0.7265625, "learning_rate": 2.1797385608862093e-05, "loss": 0.6238, "step": 7366 }, { "epoch": 0.5420648424925271, "grad_norm": 0.98828125, "learning_rate": 2.179164181899361e-05, "loss": 1.0085, "step": 7367 }, { "epoch": 0.542138422625891, "grad_norm": 0.73828125, "learning_rate": 2.178589820131164e-05, "loss": 0.8215, "step": 7368 }, { "epoch": 0.542212002759255, "grad_norm": 0.78515625, "learning_rate": 2.178015475612444e-05, "loss": 0.6883, "step": 7369 }, { "epoch": 0.542285582892619, "grad_norm": 0.95703125, "learning_rate": 2.1774411483740255e-05, "loss": 0.9847, "step": 7370 }, { "epoch": 0.542359163025983, "grad_norm": 0.98828125, "learning_rate": 2.1768668384467316e-05, "loss": 1.1023, "step": 7371 }, { "epoch": 0.542432743159347, "grad_norm": 0.81640625, "learning_rate": 2.176292545861384e-05, "loss": 0.6608, "step": 7372 }, { "epoch": 0.542506323292711, "grad_norm": 1.1015625, "learning_rate": 2.175718270648804e-05, "loss": 0.9281, "step": 7373 }, { "epoch": 0.542579903426075, "grad_norm": 0.8046875, "learning_rate": 2.1751440128398115e-05, "loss": 0.8566, "step": 7374 }, { "epoch": 0.542653483559439, "grad_norm": 0.859375, "learning_rate": 2.1745697724652268e-05, "loss": 1.1914, "step": 7375 }, { "epoch": 0.542727063692803, "grad_norm": 1.0546875, "learning_rate": 2.1739955495558678e-05, "loss": 0.9672, "step": 7376 }, { "epoch": 0.5428006438261669, "grad_norm": 0.87109375, "learning_rate": 2.1734213441425516e-05, "loss": 0.8312, "step": 7377 }, { "epoch": 0.5428742239595309, "grad_norm": 0.796875, "learning_rate": 2.1728471562560955e-05, "loss": 1.1319, "step": 7378 }, { "epoch": 0.5429478040928949, "grad_norm": 0.703125, "learning_rate": 2.172272985927314e-05, "loss": 0.6857, "step": 7379 }, { "epoch": 0.5430213842262589, "grad_norm": 0.96875, "learning_rate": 2.1716988331870236e-05, "loss": 0.8395, "step": 7380 }, { "epoch": 0.5430949643596229, "grad_norm": 0.8359375, "learning_rate": 2.171124698066036e-05, "loss": 0.778, "step": 7381 }, { "epoch": 0.5431685444929869, "grad_norm": 0.91796875, "learning_rate": 2.1705505805951644e-05, "loss": 0.9363, "step": 7382 }, { "epoch": 0.5432421246263509, "grad_norm": 0.9765625, "learning_rate": 2.1699764808052206e-05, "loss": 1.06, "step": 7383 }, { "epoch": 0.5433157047597149, "grad_norm": 1.0234375, "learning_rate": 2.169402398727016e-05, "loss": 1.117, "step": 7384 }, { "epoch": 0.5433892848930789, "grad_norm": 0.7734375, "learning_rate": 2.168828334391361e-05, "loss": 0.7153, "step": 7385 }, { "epoch": 0.5434628650264428, "grad_norm": 0.79296875, "learning_rate": 2.168254287829063e-05, "loss": 0.736, "step": 7386 }, { "epoch": 0.5435364451598068, "grad_norm": 1.359375, "learning_rate": 2.167680259070931e-05, "loss": 0.6422, "step": 7387 }, { "epoch": 0.5436100252931708, "grad_norm": 0.71484375, "learning_rate": 2.1671062481477718e-05, "loss": 0.7089, "step": 7388 }, { "epoch": 0.5436836054265348, "grad_norm": 0.9765625, "learning_rate": 2.1665322550903914e-05, "loss": 0.9998, "step": 7389 }, { "epoch": 0.5437571855598988, "grad_norm": 0.8203125, "learning_rate": 2.1659582799295955e-05, "loss": 0.9858, "step": 7390 }, { "epoch": 0.5438307656932628, "grad_norm": 1.0078125, "learning_rate": 2.1653843226961872e-05, "loss": 1.0957, "step": 7391 }, { "epoch": 0.5439043458266268, "grad_norm": 0.8828125, "learning_rate": 2.1648103834209707e-05, "loss": 0.9069, "step": 7392 }, { "epoch": 0.5439779259599908, "grad_norm": 1.03125, "learning_rate": 2.1642364621347476e-05, "loss": 1.0202, "step": 7393 }, { "epoch": 0.5440515060933548, "grad_norm": 0.67578125, "learning_rate": 2.1636625588683206e-05, "loss": 0.7816, "step": 7394 }, { "epoch": 0.5441250862267187, "grad_norm": 0.8515625, "learning_rate": 2.1630886736524873e-05, "loss": 0.9869, "step": 7395 }, { "epoch": 0.5441986663600827, "grad_norm": 0.96875, "learning_rate": 2.1625148065180492e-05, "loss": 1.2544, "step": 7396 }, { "epoch": 0.5442722464934467, "grad_norm": 0.66796875, "learning_rate": 2.161940957495804e-05, "loss": 0.7264, "step": 7397 }, { "epoch": 0.5443458266268107, "grad_norm": 0.78515625, "learning_rate": 2.1613671266165487e-05, "loss": 0.8085, "step": 7398 }, { "epoch": 0.5444194067601748, "grad_norm": 1.015625, "learning_rate": 2.1607933139110807e-05, "loss": 1.1389, "step": 7399 }, { "epoch": 0.5444929868935388, "grad_norm": 1.390625, "learning_rate": 2.1602195194101944e-05, "loss": 0.8613, "step": 7400 }, { "epoch": 0.5445665670269028, "grad_norm": 0.8125, "learning_rate": 2.1596457431446848e-05, "loss": 0.7618, "step": 7401 }, { "epoch": 0.5446401471602668, "grad_norm": 0.8984375, "learning_rate": 2.1590719851453455e-05, "loss": 0.6643, "step": 7402 }, { "epoch": 0.5447137272936308, "grad_norm": 0.98828125, "learning_rate": 2.1584982454429688e-05, "loss": 0.8465, "step": 7403 }, { "epoch": 0.5447873074269947, "grad_norm": 0.91015625, "learning_rate": 2.157924524068346e-05, "loss": 1.0482, "step": 7404 }, { "epoch": 0.5448608875603587, "grad_norm": 0.765625, "learning_rate": 2.157350821052268e-05, "loss": 0.7691, "step": 7405 }, { "epoch": 0.5449344676937227, "grad_norm": 0.6953125, "learning_rate": 2.1567771364255244e-05, "loss": 0.7051, "step": 7406 }, { "epoch": 0.5450080478270867, "grad_norm": 0.75, "learning_rate": 2.1562034702189033e-05, "loss": 0.696, "step": 7407 }, { "epoch": 0.5450816279604507, "grad_norm": 0.74609375, "learning_rate": 2.1556298224631942e-05, "loss": 0.6731, "step": 7408 }, { "epoch": 0.5451552080938147, "grad_norm": 0.83984375, "learning_rate": 2.1550561931891805e-05, "loss": 0.7866, "step": 7409 }, { "epoch": 0.5452287882271787, "grad_norm": 0.65234375, "learning_rate": 2.1544825824276498e-05, "loss": 0.5782, "step": 7410 }, { "epoch": 0.5453023683605427, "grad_norm": 0.87109375, "learning_rate": 2.153908990209386e-05, "loss": 0.9274, "step": 7411 }, { "epoch": 0.5453759484939067, "grad_norm": 0.6953125, "learning_rate": 2.1533354165651737e-05, "loss": 0.7583, "step": 7412 }, { "epoch": 0.5454495286272706, "grad_norm": 1.015625, "learning_rate": 2.1527618615257943e-05, "loss": 1.0156, "step": 7413 }, { "epoch": 0.5455231087606346, "grad_norm": 0.9453125, "learning_rate": 2.1521883251220297e-05, "loss": 1.203, "step": 7414 }, { "epoch": 0.5455966888939986, "grad_norm": 0.69921875, "learning_rate": 2.1516148073846613e-05, "loss": 0.7928, "step": 7415 }, { "epoch": 0.5456702690273626, "grad_norm": 0.91796875, "learning_rate": 2.151041308344468e-05, "loss": 0.9713, "step": 7416 }, { "epoch": 0.5457438491607266, "grad_norm": 0.921875, "learning_rate": 2.150467828032229e-05, "loss": 0.876, "step": 7417 }, { "epoch": 0.5458174292940906, "grad_norm": 0.85546875, "learning_rate": 2.1498943664787208e-05, "loss": 1.079, "step": 7418 }, { "epoch": 0.5458910094274546, "grad_norm": 0.90625, "learning_rate": 2.149320923714721e-05, "loss": 1.0431, "step": 7419 }, { "epoch": 0.5459645895608186, "grad_norm": 0.98828125, "learning_rate": 2.148747499771005e-05, "loss": 0.6573, "step": 7420 }, { "epoch": 0.5460381696941826, "grad_norm": 1.140625, "learning_rate": 2.1481740946783468e-05, "loss": 1.3769, "step": 7421 }, { "epoch": 0.5461117498275465, "grad_norm": 0.9140625, "learning_rate": 2.147600708467522e-05, "loss": 1.1254, "step": 7422 }, { "epoch": 0.5461853299609105, "grad_norm": 0.78125, "learning_rate": 2.1470273411692998e-05, "loss": 0.5865, "step": 7423 }, { "epoch": 0.5462589100942745, "grad_norm": 0.796875, "learning_rate": 2.146453992814454e-05, "loss": 1.0619, "step": 7424 }, { "epoch": 0.5463324902276385, "grad_norm": 0.796875, "learning_rate": 2.145880663433754e-05, "loss": 0.8634, "step": 7425 }, { "epoch": 0.5464060703610025, "grad_norm": 0.70703125, "learning_rate": 2.1453073530579716e-05, "loss": 0.5685, "step": 7426 }, { "epoch": 0.5464796504943665, "grad_norm": 0.71875, "learning_rate": 2.144734061717873e-05, "loss": 0.991, "step": 7427 }, { "epoch": 0.5465532306277305, "grad_norm": 0.8984375, "learning_rate": 2.144160789444226e-05, "loss": 0.9245, "step": 7428 }, { "epoch": 0.5466268107610945, "grad_norm": 0.86328125, "learning_rate": 2.143587536267797e-05, "loss": 0.9547, "step": 7429 }, { "epoch": 0.5467003908944585, "grad_norm": 0.76953125, "learning_rate": 2.143014302219352e-05, "loss": 1.04, "step": 7430 }, { "epoch": 0.5467739710278224, "grad_norm": 0.91015625, "learning_rate": 2.1424410873296557e-05, "loss": 1.016, "step": 7431 }, { "epoch": 0.5468475511611864, "grad_norm": 1.171875, "learning_rate": 2.1418678916294705e-05, "loss": 1.1255, "step": 7432 }, { "epoch": 0.5469211312945504, "grad_norm": 1.046875, "learning_rate": 2.141294715149559e-05, "loss": 1.1142, "step": 7433 }, { "epoch": 0.5469947114279144, "grad_norm": 0.86328125, "learning_rate": 2.1407215579206826e-05, "loss": 0.9451, "step": 7434 }, { "epoch": 0.5470682915612785, "grad_norm": 0.75390625, "learning_rate": 2.140148419973603e-05, "loss": 0.8057, "step": 7435 }, { "epoch": 0.5471418716946425, "grad_norm": 0.8359375, "learning_rate": 2.1395753013390765e-05, "loss": 0.7773, "step": 7436 }, { "epoch": 0.5472154518280065, "grad_norm": 0.88671875, "learning_rate": 2.139002202047863e-05, "loss": 0.9999, "step": 7437 }, { "epoch": 0.5472890319613705, "grad_norm": 0.8671875, "learning_rate": 2.1384291221307195e-05, "loss": 1.0387, "step": 7438 }, { "epoch": 0.5473626120947345, "grad_norm": 0.77734375, "learning_rate": 2.1378560616184017e-05, "loss": 0.7955, "step": 7439 }, { "epoch": 0.5474361922280984, "grad_norm": 1.078125, "learning_rate": 2.1372830205416666e-05, "loss": 1.2673, "step": 7440 }, { "epoch": 0.5475097723614624, "grad_norm": 0.90234375, "learning_rate": 2.1367099989312657e-05, "loss": 0.7981, "step": 7441 }, { "epoch": 0.5475833524948264, "grad_norm": 0.7890625, "learning_rate": 2.136136996817953e-05, "loss": 1.0126, "step": 7442 }, { "epoch": 0.5476569326281904, "grad_norm": 0.765625, "learning_rate": 2.1355640142324804e-05, "loss": 0.8105, "step": 7443 }, { "epoch": 0.5477305127615544, "grad_norm": 0.80078125, "learning_rate": 2.1349910512055992e-05, "loss": 0.7491, "step": 7444 }, { "epoch": 0.5478040928949184, "grad_norm": 0.7734375, "learning_rate": 2.1344181077680585e-05, "loss": 0.8243, "step": 7445 }, { "epoch": 0.5478776730282824, "grad_norm": 0.984375, "learning_rate": 2.1338451839506075e-05, "loss": 0.9097, "step": 7446 }, { "epoch": 0.5479512531616464, "grad_norm": 0.828125, "learning_rate": 2.1332722797839937e-05, "loss": 0.8273, "step": 7447 }, { "epoch": 0.5480248332950104, "grad_norm": 1.0625, "learning_rate": 2.1326993952989642e-05, "loss": 1.2072, "step": 7448 }, { "epoch": 0.5480984134283743, "grad_norm": 0.84765625, "learning_rate": 2.1321265305262654e-05, "loss": 0.629, "step": 7449 }, { "epoch": 0.5481719935617383, "grad_norm": 0.828125, "learning_rate": 2.13155368549664e-05, "loss": 1.0074, "step": 7450 }, { "epoch": 0.5482455736951023, "grad_norm": 0.86328125, "learning_rate": 2.1309808602408323e-05, "loss": 0.7728, "step": 7451 }, { "epoch": 0.5483191538284663, "grad_norm": 0.76953125, "learning_rate": 2.1304080547895845e-05, "loss": 1.2414, "step": 7452 }, { "epoch": 0.5483927339618303, "grad_norm": 0.828125, "learning_rate": 2.1298352691736386e-05, "loss": 0.8012, "step": 7453 }, { "epoch": 0.5484663140951943, "grad_norm": 0.66796875, "learning_rate": 2.1292625034237358e-05, "loss": 0.4832, "step": 7454 }, { "epoch": 0.5485398942285583, "grad_norm": 1.2734375, "learning_rate": 2.1286897575706132e-05, "loss": 0.9493, "step": 7455 }, { "epoch": 0.5486134743619223, "grad_norm": 0.76953125, "learning_rate": 2.12811703164501e-05, "loss": 1.1735, "step": 7456 }, { "epoch": 0.5486870544952863, "grad_norm": 0.99609375, "learning_rate": 2.1275443256776632e-05, "loss": 1.3607, "step": 7457 }, { "epoch": 0.5487606346286502, "grad_norm": 0.78515625, "learning_rate": 2.1269716396993095e-05, "loss": 0.9992, "step": 7458 }, { "epoch": 0.5488342147620142, "grad_norm": 0.83984375, "learning_rate": 2.126398973740683e-05, "loss": 0.6699, "step": 7459 }, { "epoch": 0.5489077948953782, "grad_norm": 0.93359375, "learning_rate": 2.125826327832518e-05, "loss": 0.9599, "step": 7460 }, { "epoch": 0.5489813750287422, "grad_norm": 0.65234375, "learning_rate": 2.1252537020055468e-05, "loss": 0.617, "step": 7461 }, { "epoch": 0.5490549551621062, "grad_norm": 0.7890625, "learning_rate": 2.1246810962905024e-05, "loss": 1.0797, "step": 7462 }, { "epoch": 0.5491285352954702, "grad_norm": 0.8359375, "learning_rate": 2.1241085107181148e-05, "loss": 0.6466, "step": 7463 }, { "epoch": 0.5492021154288342, "grad_norm": 1.0390625, "learning_rate": 2.1235359453191132e-05, "loss": 1.1459, "step": 7464 }, { "epoch": 0.5492756955621982, "grad_norm": 0.984375, "learning_rate": 2.1229634001242256e-05, "loss": 1.2396, "step": 7465 }, { "epoch": 0.5493492756955622, "grad_norm": 0.8359375, "learning_rate": 2.1223908751641805e-05, "loss": 0.8712, "step": 7466 }, { "epoch": 0.5494228558289261, "grad_norm": 0.7109375, "learning_rate": 2.1218183704697053e-05, "loss": 0.7721, "step": 7467 }, { "epoch": 0.5494964359622901, "grad_norm": 0.96484375, "learning_rate": 2.1212458860715225e-05, "loss": 1.315, "step": 7468 }, { "epoch": 0.5495700160956541, "grad_norm": 0.7109375, "learning_rate": 2.1206734220003575e-05, "loss": 0.9055, "step": 7469 }, { "epoch": 0.5496435962290181, "grad_norm": 0.9296875, "learning_rate": 2.120100978286934e-05, "loss": 1.1254, "step": 7470 }, { "epoch": 0.5497171763623822, "grad_norm": 0.9765625, "learning_rate": 2.1195285549619733e-05, "loss": 1.2472, "step": 7471 }, { "epoch": 0.5497907564957462, "grad_norm": 0.921875, "learning_rate": 2.118956152056197e-05, "loss": 0.7265, "step": 7472 }, { "epoch": 0.5498643366291102, "grad_norm": 0.9140625, "learning_rate": 2.118383769600324e-05, "loss": 1.5356, "step": 7473 }, { "epoch": 0.5499379167624742, "grad_norm": 0.7890625, "learning_rate": 2.1178114076250727e-05, "loss": 0.8452, "step": 7474 }, { "epoch": 0.5500114968958382, "grad_norm": 0.88671875, "learning_rate": 2.1172390661611618e-05, "loss": 0.8157, "step": 7475 }, { "epoch": 0.5500850770292021, "grad_norm": 0.95703125, "learning_rate": 2.1166667452393082e-05, "loss": 0.8626, "step": 7476 }, { "epoch": 0.5501586571625661, "grad_norm": 0.94140625, "learning_rate": 2.1160944448902254e-05, "loss": 1.1232, "step": 7477 }, { "epoch": 0.5502322372959301, "grad_norm": 1.0234375, "learning_rate": 2.1155221651446278e-05, "loss": 1.1201, "step": 7478 }, { "epoch": 0.5503058174292941, "grad_norm": 0.66015625, "learning_rate": 2.1149499060332302e-05, "loss": 0.8872, "step": 7479 }, { "epoch": 0.5503793975626581, "grad_norm": 0.87109375, "learning_rate": 2.114377667586744e-05, "loss": 1.3235, "step": 7480 }, { "epoch": 0.5504529776960221, "grad_norm": 0.796875, "learning_rate": 2.1138054498358808e-05, "loss": 0.6922, "step": 7481 }, { "epoch": 0.5505265578293861, "grad_norm": 0.703125, "learning_rate": 2.113233252811349e-05, "loss": 0.5839, "step": 7482 }, { "epoch": 0.5506001379627501, "grad_norm": 0.6875, "learning_rate": 2.1126610765438574e-05, "loss": 0.7273, "step": 7483 }, { "epoch": 0.5506737180961141, "grad_norm": 0.72265625, "learning_rate": 2.1120889210641152e-05, "loss": 0.626, "step": 7484 }, { "epoch": 0.550747298229478, "grad_norm": 0.8203125, "learning_rate": 2.1115167864028278e-05, "loss": 0.7516, "step": 7485 }, { "epoch": 0.550820878362842, "grad_norm": 0.76171875, "learning_rate": 2.1109446725907003e-05, "loss": 1.0033, "step": 7486 }, { "epoch": 0.550894458496206, "grad_norm": 0.92578125, "learning_rate": 2.1103725796584374e-05, "loss": 1.243, "step": 7487 }, { "epoch": 0.55096803862957, "grad_norm": 0.91015625, "learning_rate": 2.109800507636742e-05, "loss": 1.1234, "step": 7488 }, { "epoch": 0.551041618762934, "grad_norm": 0.73046875, "learning_rate": 2.1092284565563168e-05, "loss": 0.795, "step": 7489 }, { "epoch": 0.551115198896298, "grad_norm": 1.015625, "learning_rate": 2.1086564264478635e-05, "loss": 0.94, "step": 7490 }, { "epoch": 0.551188779029662, "grad_norm": 1.0703125, "learning_rate": 2.108084417342079e-05, "loss": 1.2097, "step": 7491 }, { "epoch": 0.551262359163026, "grad_norm": 0.88671875, "learning_rate": 2.1075124292696636e-05, "loss": 0.8736, "step": 7492 }, { "epoch": 0.55133593929639, "grad_norm": 1.3125, "learning_rate": 2.106940462261315e-05, "loss": 1.0572, "step": 7493 }, { "epoch": 0.5514095194297539, "grad_norm": 0.8203125, "learning_rate": 2.1063685163477296e-05, "loss": 0.6779, "step": 7494 }, { "epoch": 0.5514830995631179, "grad_norm": 0.7421875, "learning_rate": 2.1057965915596034e-05, "loss": 0.703, "step": 7495 }, { "epoch": 0.5515566796964819, "grad_norm": 0.69140625, "learning_rate": 2.1052246879276287e-05, "loss": 0.6112, "step": 7496 }, { "epoch": 0.5516302598298459, "grad_norm": 0.69921875, "learning_rate": 2.104652805482499e-05, "loss": 0.9929, "step": 7497 }, { "epoch": 0.5517038399632099, "grad_norm": 0.86328125, "learning_rate": 2.1040809442549068e-05, "loss": 0.7051, "step": 7498 }, { "epoch": 0.5517774200965739, "grad_norm": 0.81640625, "learning_rate": 2.103509104275543e-05, "loss": 0.8016, "step": 7499 }, { "epoch": 0.5518510002299379, "grad_norm": 0.8671875, "learning_rate": 2.1029372855750962e-05, "loss": 0.8743, "step": 7500 }, { "epoch": 0.5519245803633019, "grad_norm": 0.89453125, "learning_rate": 2.1023654881842553e-05, "loss": 1.0809, "step": 7501 }, { "epoch": 0.551998160496666, "grad_norm": 0.98046875, "learning_rate": 2.101793712133708e-05, "loss": 1.3755, "step": 7502 }, { "epoch": 0.5520717406300298, "grad_norm": 0.76171875, "learning_rate": 2.1012219574541395e-05, "loss": 0.5926, "step": 7503 }, { "epoch": 0.5521453207633938, "grad_norm": 0.7421875, "learning_rate": 2.1006502241762365e-05, "loss": 0.9977, "step": 7504 }, { "epoch": 0.5522189008967578, "grad_norm": 0.7734375, "learning_rate": 2.1000785123306804e-05, "loss": 0.7903, "step": 7505 }, { "epoch": 0.5522924810301219, "grad_norm": 0.84375, "learning_rate": 2.0995068219481555e-05, "loss": 0.812, "step": 7506 }, { "epoch": 0.5523660611634859, "grad_norm": 0.8828125, "learning_rate": 2.098935153059343e-05, "loss": 0.8522, "step": 7507 }, { "epoch": 0.5524396412968499, "grad_norm": 1.0078125, "learning_rate": 2.0983635056949246e-05, "loss": 0.7858, "step": 7508 }, { "epoch": 0.5525132214302139, "grad_norm": 0.91015625, "learning_rate": 2.0977918798855774e-05, "loss": 0.7539, "step": 7509 }, { "epoch": 0.5525868015635779, "grad_norm": 0.93359375, "learning_rate": 2.0972202756619798e-05, "loss": 1.0636, "step": 7510 }, { "epoch": 0.5526603816969419, "grad_norm": 0.8203125, "learning_rate": 2.0966486930548097e-05, "loss": 0.6219, "step": 7511 }, { "epoch": 0.5527339618303058, "grad_norm": 0.82421875, "learning_rate": 2.0960771320947423e-05, "loss": 0.9379, "step": 7512 }, { "epoch": 0.5528075419636698, "grad_norm": 0.7421875, "learning_rate": 2.095505592812453e-05, "loss": 0.8834, "step": 7513 }, { "epoch": 0.5528811220970338, "grad_norm": 0.8359375, "learning_rate": 2.094934075238614e-05, "loss": 0.8552, "step": 7514 }, { "epoch": 0.5529547022303978, "grad_norm": 0.8359375, "learning_rate": 2.0943625794038977e-05, "loss": 0.9106, "step": 7515 }, { "epoch": 0.5530282823637618, "grad_norm": 0.8828125, "learning_rate": 2.093791105338976e-05, "loss": 0.6977, "step": 7516 }, { "epoch": 0.5531018624971258, "grad_norm": 1.109375, "learning_rate": 2.093219653074519e-05, "loss": 1.0213, "step": 7517 }, { "epoch": 0.5531754426304898, "grad_norm": 0.88671875, "learning_rate": 2.092648222641194e-05, "loss": 1.0649, "step": 7518 }, { "epoch": 0.5532490227638538, "grad_norm": 0.7421875, "learning_rate": 2.0920768140696696e-05, "loss": 0.9968, "step": 7519 }, { "epoch": 0.5533226028972178, "grad_norm": 0.81640625, "learning_rate": 2.0915054273906125e-05, "loss": 0.6715, "step": 7520 }, { "epoch": 0.5533961830305817, "grad_norm": 0.72265625, "learning_rate": 2.0909340626346874e-05, "loss": 0.9704, "step": 7521 }, { "epoch": 0.5534697631639457, "grad_norm": 0.8125, "learning_rate": 2.0903627198325593e-05, "loss": 0.6492, "step": 7522 }, { "epoch": 0.5535433432973097, "grad_norm": 0.7578125, "learning_rate": 2.0897913990148895e-05, "loss": 0.8551, "step": 7523 }, { "epoch": 0.5536169234306737, "grad_norm": 0.95703125, "learning_rate": 2.0892201002123406e-05, "loss": 0.9271, "step": 7524 }, { "epoch": 0.5536905035640377, "grad_norm": 0.890625, "learning_rate": 2.088648823455573e-05, "loss": 0.826, "step": 7525 }, { "epoch": 0.5537640836974017, "grad_norm": 0.77734375, "learning_rate": 2.0880775687752464e-05, "loss": 0.797, "step": 7526 }, { "epoch": 0.5538376638307657, "grad_norm": 0.84375, "learning_rate": 2.087506336202019e-05, "loss": 0.8945, "step": 7527 }, { "epoch": 0.5539112439641297, "grad_norm": 0.625, "learning_rate": 2.0869351257665467e-05, "loss": 0.7587, "step": 7528 }, { "epoch": 0.5539848240974937, "grad_norm": 0.87890625, "learning_rate": 2.0863639374994863e-05, "loss": 0.7524, "step": 7529 }, { "epoch": 0.5540584042308576, "grad_norm": 0.7578125, "learning_rate": 2.0857927714314922e-05, "loss": 0.7819, "step": 7530 }, { "epoch": 0.5541319843642216, "grad_norm": 1.09375, "learning_rate": 2.085221627593218e-05, "loss": 1.0946, "step": 7531 }, { "epoch": 0.5542055644975856, "grad_norm": 0.80859375, "learning_rate": 2.084650506015315e-05, "loss": 0.83, "step": 7532 }, { "epoch": 0.5542791446309496, "grad_norm": 0.8046875, "learning_rate": 2.0840794067284353e-05, "loss": 0.9074, "step": 7533 }, { "epoch": 0.5543527247643136, "grad_norm": 0.78125, "learning_rate": 2.083508329763228e-05, "loss": 0.7968, "step": 7534 }, { "epoch": 0.5544263048976776, "grad_norm": 0.84375, "learning_rate": 2.082937275150342e-05, "loss": 0.8298, "step": 7535 }, { "epoch": 0.5544998850310416, "grad_norm": 0.84765625, "learning_rate": 2.082366242920426e-05, "loss": 1.2276, "step": 7536 }, { "epoch": 0.5545734651644056, "grad_norm": 0.77734375, "learning_rate": 2.0817952331041236e-05, "loss": 0.7956, "step": 7537 }, { "epoch": 0.5546470452977696, "grad_norm": 1.125, "learning_rate": 2.0812242457320812e-05, "loss": 1.2686, "step": 7538 }, { "epoch": 0.5547206254311335, "grad_norm": 0.98828125, "learning_rate": 2.0806532808349426e-05, "loss": 0.809, "step": 7539 }, { "epoch": 0.5547942055644975, "grad_norm": 0.89453125, "learning_rate": 2.080082338443351e-05, "loss": 0.732, "step": 7540 }, { "epoch": 0.5548677856978615, "grad_norm": 0.70703125, "learning_rate": 2.0795114185879468e-05, "loss": 0.8356, "step": 7541 }, { "epoch": 0.5549413658312256, "grad_norm": 0.84765625, "learning_rate": 2.0789405212993704e-05, "loss": 0.9492, "step": 7542 }, { "epoch": 0.5550149459645896, "grad_norm": 0.76953125, "learning_rate": 2.078369646608261e-05, "loss": 0.8767, "step": 7543 }, { "epoch": 0.5550885260979536, "grad_norm": 0.9296875, "learning_rate": 2.0777987945452563e-05, "loss": 0.8245, "step": 7544 }, { "epoch": 0.5551621062313176, "grad_norm": 0.796875, "learning_rate": 2.0772279651409933e-05, "loss": 0.8415, "step": 7545 }, { "epoch": 0.5552356863646816, "grad_norm": 0.80078125, "learning_rate": 2.0766571584261066e-05, "loss": 0.7504, "step": 7546 }, { "epoch": 0.5553092664980456, "grad_norm": 0.91015625, "learning_rate": 2.0760863744312305e-05, "loss": 0.8587, "step": 7547 }, { "epoch": 0.5553828466314095, "grad_norm": 0.75, "learning_rate": 2.075515613186998e-05, "loss": 0.5417, "step": 7548 }, { "epoch": 0.5554564267647735, "grad_norm": 0.88671875, "learning_rate": 2.0749448747240417e-05, "loss": 0.8205, "step": 7549 }, { "epoch": 0.5555300068981375, "grad_norm": 0.8984375, "learning_rate": 2.0743741590729903e-05, "loss": 0.7322, "step": 7550 }, { "epoch": 0.5556035870315015, "grad_norm": 0.7421875, "learning_rate": 2.073803466264474e-05, "loss": 0.7334, "step": 7551 }, { "epoch": 0.5556771671648655, "grad_norm": 0.875, "learning_rate": 2.0732327963291203e-05, "loss": 0.9157, "step": 7552 }, { "epoch": 0.5557507472982295, "grad_norm": 0.81640625, "learning_rate": 2.0726621492975567e-05, "loss": 0.8135, "step": 7553 }, { "epoch": 0.5558243274315935, "grad_norm": 0.8046875, "learning_rate": 2.072091525200409e-05, "loss": 0.6913, "step": 7554 }, { "epoch": 0.5558979075649575, "grad_norm": 0.76171875, "learning_rate": 2.0715209240683005e-05, "loss": 0.5635, "step": 7555 }, { "epoch": 0.5559714876983215, "grad_norm": 1.109375, "learning_rate": 2.0709503459318544e-05, "loss": 1.2376, "step": 7556 }, { "epoch": 0.5560450678316854, "grad_norm": 1.015625, "learning_rate": 2.070379790821693e-05, "loss": 0.9284, "step": 7557 }, { "epoch": 0.5561186479650494, "grad_norm": 0.6796875, "learning_rate": 2.0698092587684367e-05, "loss": 1.0626, "step": 7558 }, { "epoch": 0.5561922280984134, "grad_norm": 0.84375, "learning_rate": 2.0692387498027055e-05, "loss": 0.8049, "step": 7559 }, { "epoch": 0.5562658082317774, "grad_norm": 1.015625, "learning_rate": 2.0686682639551167e-05, "loss": 1.0139, "step": 7560 }, { "epoch": 0.5563393883651414, "grad_norm": 0.89453125, "learning_rate": 2.0680978012562875e-05, "loss": 0.9984, "step": 7561 }, { "epoch": 0.5564129684985054, "grad_norm": 0.83203125, "learning_rate": 2.0675273617368334e-05, "loss": 0.9501, "step": 7562 }, { "epoch": 0.5564865486318694, "grad_norm": 0.65625, "learning_rate": 2.0669569454273698e-05, "loss": 0.7705, "step": 7563 }, { "epoch": 0.5565601287652334, "grad_norm": 0.97265625, "learning_rate": 2.0663865523585083e-05, "loss": 0.9158, "step": 7564 }, { "epoch": 0.5566337088985974, "grad_norm": 0.66796875, "learning_rate": 2.0658161825608612e-05, "loss": 0.8112, "step": 7565 }, { "epoch": 0.5567072890319613, "grad_norm": 0.75, "learning_rate": 2.06524583606504e-05, "loss": 0.8144, "step": 7566 }, { "epoch": 0.5567808691653253, "grad_norm": 0.9765625, "learning_rate": 2.0646755129016535e-05, "loss": 1.0407, "step": 7567 }, { "epoch": 0.5568544492986893, "grad_norm": 0.9296875, "learning_rate": 2.0641052131013107e-05, "loss": 0.9335, "step": 7568 }, { "epoch": 0.5569280294320533, "grad_norm": 0.7578125, "learning_rate": 2.0635349366946168e-05, "loss": 0.7903, "step": 7569 }, { "epoch": 0.5570016095654173, "grad_norm": 0.921875, "learning_rate": 2.0629646837121787e-05, "loss": 1.2399, "step": 7570 }, { "epoch": 0.5570751896987813, "grad_norm": 0.875, "learning_rate": 2.062394454184601e-05, "loss": 1.1816, "step": 7571 }, { "epoch": 0.5571487698321453, "grad_norm": 0.9453125, "learning_rate": 2.0618242481424864e-05, "loss": 0.9437, "step": 7572 }, { "epoch": 0.5572223499655093, "grad_norm": 0.83203125, "learning_rate": 2.0612540656164365e-05, "loss": 0.9093, "step": 7573 }, { "epoch": 0.5572959300988733, "grad_norm": 0.9296875, "learning_rate": 2.060683906637052e-05, "loss": 0.9084, "step": 7574 }, { "epoch": 0.5573695102322372, "grad_norm": 0.765625, "learning_rate": 2.0601137712349328e-05, "loss": 1.0847, "step": 7575 }, { "epoch": 0.5574430903656012, "grad_norm": 0.83203125, "learning_rate": 2.0595436594406764e-05, "loss": 0.9812, "step": 7576 }, { "epoch": 0.5575166704989652, "grad_norm": 0.921875, "learning_rate": 2.0589735712848813e-05, "loss": 0.9761, "step": 7577 }, { "epoch": 0.5575902506323293, "grad_norm": 0.83984375, "learning_rate": 2.05840350679814e-05, "loss": 1.1286, "step": 7578 }, { "epoch": 0.5576638307656933, "grad_norm": 0.98046875, "learning_rate": 2.057833466011049e-05, "loss": 0.7901, "step": 7579 }, { "epoch": 0.5577374108990573, "grad_norm": 0.79296875, "learning_rate": 2.0572634489542007e-05, "loss": 0.9269, "step": 7580 }, { "epoch": 0.5578109910324213, "grad_norm": 0.828125, "learning_rate": 2.0566934556581874e-05, "loss": 0.588, "step": 7581 }, { "epoch": 0.5578845711657853, "grad_norm": 1.0546875, "learning_rate": 2.0561234861535983e-05, "loss": 1.3045, "step": 7582 }, { "epoch": 0.5579581512991493, "grad_norm": 0.8515625, "learning_rate": 2.0555535404710237e-05, "loss": 0.7659, "step": 7583 }, { "epoch": 0.5580317314325132, "grad_norm": 0.97265625, "learning_rate": 2.0549836186410508e-05, "loss": 0.8927, "step": 7584 }, { "epoch": 0.5581053115658772, "grad_norm": 1.1015625, "learning_rate": 2.0544137206942665e-05, "loss": 1.1027, "step": 7585 }, { "epoch": 0.5581788916992412, "grad_norm": 0.91015625, "learning_rate": 2.053843846661257e-05, "loss": 1.3398, "step": 7586 }, { "epoch": 0.5582524718326052, "grad_norm": 0.890625, "learning_rate": 2.053273996572605e-05, "loss": 0.9436, "step": 7587 }, { "epoch": 0.5583260519659692, "grad_norm": 0.94140625, "learning_rate": 2.052704170458894e-05, "loss": 1.2582, "step": 7588 }, { "epoch": 0.5583996320993332, "grad_norm": 0.9453125, "learning_rate": 2.0521343683507047e-05, "loss": 0.6782, "step": 7589 }, { "epoch": 0.5584732122326972, "grad_norm": 0.7578125, "learning_rate": 2.0515645902786184e-05, "loss": 0.8031, "step": 7590 }, { "epoch": 0.5585467923660612, "grad_norm": 0.73828125, "learning_rate": 2.050994836273215e-05, "loss": 0.6921, "step": 7591 }, { "epoch": 0.5586203724994252, "grad_norm": 0.9296875, "learning_rate": 2.050425106365069e-05, "loss": 0.9654, "step": 7592 }, { "epoch": 0.5586939526327891, "grad_norm": 0.90625, "learning_rate": 2.0498554005847588e-05, "loss": 0.7598, "step": 7593 }, { "epoch": 0.5587675327661531, "grad_norm": 1.015625, "learning_rate": 2.049285718962859e-05, "loss": 0.8186, "step": 7594 }, { "epoch": 0.5588411128995171, "grad_norm": 1.0390625, "learning_rate": 2.0487160615299437e-05, "loss": 0.9807, "step": 7595 }, { "epoch": 0.5589146930328811, "grad_norm": 1.0390625, "learning_rate": 2.0481464283165847e-05, "loss": 1.0518, "step": 7596 }, { "epoch": 0.5589882731662451, "grad_norm": 1.0703125, "learning_rate": 2.047576819353353e-05, "loss": 1.0099, "step": 7597 }, { "epoch": 0.5590618532996091, "grad_norm": 0.875, "learning_rate": 2.0470072346708196e-05, "loss": 1.2192, "step": 7598 }, { "epoch": 0.5591354334329731, "grad_norm": 0.9921875, "learning_rate": 2.046437674299552e-05, "loss": 1.0461, "step": 7599 }, { "epoch": 0.5592090135663371, "grad_norm": 0.85546875, "learning_rate": 2.0458681382701184e-05, "loss": 1.4193, "step": 7600 }, { "epoch": 0.5592825936997011, "grad_norm": 0.78515625, "learning_rate": 2.0452986266130834e-05, "loss": 1.0968, "step": 7601 }, { "epoch": 0.559356173833065, "grad_norm": 1.0546875, "learning_rate": 2.0447291393590124e-05, "loss": 1.3195, "step": 7602 }, { "epoch": 0.559429753966429, "grad_norm": 0.828125, "learning_rate": 2.0441596765384684e-05, "loss": 0.8513, "step": 7603 }, { "epoch": 0.559503334099793, "grad_norm": 0.86328125, "learning_rate": 2.043590238182015e-05, "loss": 1.076, "step": 7604 }, { "epoch": 0.559576914233157, "grad_norm": 0.9765625, "learning_rate": 2.0430208243202104e-05, "loss": 0.9032, "step": 7605 }, { "epoch": 0.559650494366521, "grad_norm": 0.9296875, "learning_rate": 2.042451434983615e-05, "loss": 0.9369, "step": 7606 }, { "epoch": 0.559724074499885, "grad_norm": 1.09375, "learning_rate": 2.0418820702027866e-05, "loss": 1.3684, "step": 7607 }, { "epoch": 0.559797654633249, "grad_norm": 0.97265625, "learning_rate": 2.0413127300082818e-05, "loss": 0.9994, "step": 7608 }, { "epoch": 0.559871234766613, "grad_norm": 0.56640625, "learning_rate": 2.040743414430658e-05, "loss": 0.7899, "step": 7609 }, { "epoch": 0.559944814899977, "grad_norm": 1.0859375, "learning_rate": 2.040174123500467e-05, "loss": 1.2626, "step": 7610 }, { "epoch": 0.5600183950333409, "grad_norm": 0.7265625, "learning_rate": 2.039604857248262e-05, "loss": 0.8315, "step": 7611 }, { "epoch": 0.560091975166705, "grad_norm": 0.86328125, "learning_rate": 2.039035615704595e-05, "loss": 0.7149, "step": 7612 }, { "epoch": 0.560165555300069, "grad_norm": 1.0234375, "learning_rate": 2.0384663989000162e-05, "loss": 1.2585, "step": 7613 }, { "epoch": 0.560239135433433, "grad_norm": 0.7734375, "learning_rate": 2.0378972068650736e-05, "loss": 0.9356, "step": 7614 }, { "epoch": 0.560312715566797, "grad_norm": 0.9609375, "learning_rate": 2.037328039630315e-05, "loss": 1.1026, "step": 7615 }, { "epoch": 0.560386295700161, "grad_norm": 0.98046875, "learning_rate": 2.0367588972262867e-05, "loss": 0.9621, "step": 7616 }, { "epoch": 0.560459875833525, "grad_norm": 1.0078125, "learning_rate": 2.036189779683534e-05, "loss": 1.2336, "step": 7617 }, { "epoch": 0.560533455966889, "grad_norm": 0.89453125, "learning_rate": 2.0356206870326004e-05, "loss": 0.8108, "step": 7618 }, { "epoch": 0.560607036100253, "grad_norm": 0.91015625, "learning_rate": 2.035051619304026e-05, "loss": 0.8924, "step": 7619 }, { "epoch": 0.5606806162336169, "grad_norm": 0.92578125, "learning_rate": 2.0344825765283535e-05, "loss": 0.7975, "step": 7620 }, { "epoch": 0.5607541963669809, "grad_norm": 0.8125, "learning_rate": 2.0339135587361213e-05, "loss": 1.0401, "step": 7621 }, { "epoch": 0.5608277765003449, "grad_norm": 0.640625, "learning_rate": 2.0333445659578695e-05, "loss": 0.5405, "step": 7622 }, { "epoch": 0.5609013566337089, "grad_norm": 0.94921875, "learning_rate": 2.0327755982241326e-05, "loss": 0.8101, "step": 7623 }, { "epoch": 0.5609749367670729, "grad_norm": 0.7109375, "learning_rate": 2.0322066555654467e-05, "loss": 0.6919, "step": 7624 }, { "epoch": 0.5610485169004369, "grad_norm": 0.99609375, "learning_rate": 2.0316377380123465e-05, "loss": 1.1004, "step": 7625 }, { "epoch": 0.5611220970338009, "grad_norm": 0.62890625, "learning_rate": 2.0310688455953637e-05, "loss": 0.5486, "step": 7626 }, { "epoch": 0.5611956771671649, "grad_norm": 0.93359375, "learning_rate": 2.030499978345031e-05, "loss": 1.0957, "step": 7627 }, { "epoch": 0.5612692573005289, "grad_norm": 1.078125, "learning_rate": 2.0299311362918775e-05, "loss": 1.3608, "step": 7628 }, { "epoch": 0.5613428374338928, "grad_norm": 0.8046875, "learning_rate": 2.0293623194664317e-05, "loss": 0.8691, "step": 7629 }, { "epoch": 0.5614164175672568, "grad_norm": 0.890625, "learning_rate": 2.0287935278992214e-05, "loss": 0.9023, "step": 7630 }, { "epoch": 0.5614899977006208, "grad_norm": 0.88671875, "learning_rate": 2.0282247616207727e-05, "loss": 0.7869, "step": 7631 }, { "epoch": 0.5615635778339848, "grad_norm": 0.9765625, "learning_rate": 2.027656020661611e-05, "loss": 1.2842, "step": 7632 }, { "epoch": 0.5616371579673488, "grad_norm": 0.8515625, "learning_rate": 2.0270873050522566e-05, "loss": 0.7527, "step": 7633 }, { "epoch": 0.5617107381007128, "grad_norm": 0.96875, "learning_rate": 2.0265186148232343e-05, "loss": 1.1703, "step": 7634 }, { "epoch": 0.5617843182340768, "grad_norm": 0.984375, "learning_rate": 2.025949950005063e-05, "loss": 1.009, "step": 7635 }, { "epoch": 0.5618578983674408, "grad_norm": 0.8046875, "learning_rate": 2.025381310628264e-05, "loss": 1.2368, "step": 7636 }, { "epoch": 0.5619314785008048, "grad_norm": 1.0625, "learning_rate": 2.0248126967233524e-05, "loss": 1.1802, "step": 7637 }, { "epoch": 0.5620050586341687, "grad_norm": 0.65234375, "learning_rate": 2.024244108320846e-05, "loss": 0.6024, "step": 7638 }, { "epoch": 0.5620786387675327, "grad_norm": 0.83203125, "learning_rate": 2.02367554545126e-05, "loss": 0.7543, "step": 7639 }, { "epoch": 0.5621522189008967, "grad_norm": 0.890625, "learning_rate": 2.0231070081451076e-05, "loss": 0.6373, "step": 7640 }, { "epoch": 0.5622257990342607, "grad_norm": 0.74609375, "learning_rate": 2.022538496432902e-05, "loss": 0.7917, "step": 7641 }, { "epoch": 0.5622993791676247, "grad_norm": 0.953125, "learning_rate": 2.0219700103451528e-05, "loss": 0.7983, "step": 7642 }, { "epoch": 0.5623729593009887, "grad_norm": 1.0546875, "learning_rate": 2.02140154991237e-05, "loss": 1.0568, "step": 7643 }, { "epoch": 0.5624465394343527, "grad_norm": 0.7109375, "learning_rate": 2.0208331151650626e-05, "loss": 0.6334, "step": 7644 }, { "epoch": 0.5625201195677167, "grad_norm": 0.890625, "learning_rate": 2.0202647061337376e-05, "loss": 0.8103, "step": 7645 }, { "epoch": 0.5625936997010808, "grad_norm": 0.703125, "learning_rate": 2.019696322848899e-05, "loss": 0.8565, "step": 7646 }, { "epoch": 0.5626672798344446, "grad_norm": 0.84765625, "learning_rate": 2.0191279653410513e-05, "loss": 1.0922, "step": 7647 }, { "epoch": 0.5627408599678086, "grad_norm": 0.984375, "learning_rate": 2.018559633640697e-05, "loss": 0.8309, "step": 7648 }, { "epoch": 0.5628144401011727, "grad_norm": 0.8671875, "learning_rate": 2.0179913277783386e-05, "loss": 0.7959, "step": 7649 }, { "epoch": 0.5628880202345367, "grad_norm": 0.8515625, "learning_rate": 2.017423047784476e-05, "loss": 1.374, "step": 7650 }, { "epoch": 0.5629616003679007, "grad_norm": 0.828125, "learning_rate": 2.016854793689606e-05, "loss": 0.7029, "step": 7651 }, { "epoch": 0.5630351805012647, "grad_norm": 0.73828125, "learning_rate": 2.016286565524227e-05, "loss": 0.7069, "step": 7652 }, { "epoch": 0.5631087606346287, "grad_norm": 0.6640625, "learning_rate": 2.015718363318834e-05, "loss": 0.4827, "step": 7653 }, { "epoch": 0.5631823407679927, "grad_norm": 0.671875, "learning_rate": 2.0151501871039224e-05, "loss": 0.6133, "step": 7654 }, { "epoch": 0.5632559209013567, "grad_norm": 0.8984375, "learning_rate": 2.014582036909984e-05, "loss": 0.8943, "step": 7655 }, { "epoch": 0.5633295010347206, "grad_norm": 0.80859375, "learning_rate": 2.014013912767511e-05, "loss": 0.678, "step": 7656 }, { "epoch": 0.5634030811680846, "grad_norm": 0.71875, "learning_rate": 2.013445814706993e-05, "loss": 0.5697, "step": 7657 }, { "epoch": 0.5634766613014486, "grad_norm": 0.71484375, "learning_rate": 2.0128777427589198e-05, "loss": 0.6967, "step": 7658 }, { "epoch": 0.5635502414348126, "grad_norm": 0.8125, "learning_rate": 2.0123096969537787e-05, "loss": 1.3641, "step": 7659 }, { "epoch": 0.5636238215681766, "grad_norm": 0.640625, "learning_rate": 2.011741677322054e-05, "loss": 0.666, "step": 7660 }, { "epoch": 0.5636974017015406, "grad_norm": 0.6875, "learning_rate": 2.0111736838942306e-05, "loss": 0.5978, "step": 7661 }, { "epoch": 0.5637709818349046, "grad_norm": 0.87890625, "learning_rate": 2.0106057167007934e-05, "loss": 0.6678, "step": 7662 }, { "epoch": 0.5638445619682686, "grad_norm": 0.8671875, "learning_rate": 2.010037775772223e-05, "loss": 1.0527, "step": 7663 }, { "epoch": 0.5639181421016326, "grad_norm": 0.9140625, "learning_rate": 2.0094698611390005e-05, "loss": 0.9668, "step": 7664 }, { "epoch": 0.5639917222349966, "grad_norm": 0.91015625, "learning_rate": 2.0089019728316027e-05, "loss": 0.9403, "step": 7665 }, { "epoch": 0.5640653023683605, "grad_norm": 0.9453125, "learning_rate": 2.008334110880509e-05, "loss": 1.039, "step": 7666 }, { "epoch": 0.5641388825017245, "grad_norm": 1.0390625, "learning_rate": 2.0077662753161946e-05, "loss": 0.8635, "step": 7667 }, { "epoch": 0.5642124626350885, "grad_norm": 0.94140625, "learning_rate": 2.0071984661691354e-05, "loss": 1.0429, "step": 7668 }, { "epoch": 0.5642860427684525, "grad_norm": 0.8359375, "learning_rate": 2.006630683469803e-05, "loss": 0.7902, "step": 7669 }, { "epoch": 0.5643596229018165, "grad_norm": 1.0625, "learning_rate": 2.00606292724867e-05, "loss": 1.0544, "step": 7670 }, { "epoch": 0.5644332030351805, "grad_norm": 1.078125, "learning_rate": 2.0054951975362067e-05, "loss": 1.218, "step": 7671 }, { "epoch": 0.5645067831685445, "grad_norm": 1.015625, "learning_rate": 2.0049274943628822e-05, "loss": 1.234, "step": 7672 }, { "epoch": 0.5645803633019085, "grad_norm": 0.83984375, "learning_rate": 2.0043598177591655e-05, "loss": 0.7262, "step": 7673 }, { "epoch": 0.5646539434352725, "grad_norm": 1.0625, "learning_rate": 2.0037921677555194e-05, "loss": 1.0449, "step": 7674 }, { "epoch": 0.5647275235686364, "grad_norm": 0.7890625, "learning_rate": 2.0032245443824106e-05, "loss": 0.5502, "step": 7675 }, { "epoch": 0.5648011037020004, "grad_norm": 1.0390625, "learning_rate": 2.002656947670303e-05, "loss": 1.3229, "step": 7676 }, { "epoch": 0.5648746838353644, "grad_norm": 0.97265625, "learning_rate": 2.002089377649658e-05, "loss": 1.1109, "step": 7677 }, { "epoch": 0.5649482639687284, "grad_norm": 0.9765625, "learning_rate": 2.0015218343509347e-05, "loss": 1.4111, "step": 7678 }, { "epoch": 0.5650218441020924, "grad_norm": 0.81640625, "learning_rate": 2.0009543178045932e-05, "loss": 0.9055, "step": 7679 }, { "epoch": 0.5650954242354564, "grad_norm": 0.78125, "learning_rate": 2.000386828041091e-05, "loss": 0.5753, "step": 7680 }, { "epoch": 0.5651690043688204, "grad_norm": 0.73828125, "learning_rate": 1.9998193650908843e-05, "loss": 1.1224, "step": 7681 }, { "epoch": 0.5652425845021845, "grad_norm": 1.015625, "learning_rate": 1.9992519289844274e-05, "loss": 1.0986, "step": 7682 }, { "epoch": 0.5653161646355485, "grad_norm": 0.69921875, "learning_rate": 1.9986845197521737e-05, "loss": 0.6512, "step": 7683 }, { "epoch": 0.5653897447689124, "grad_norm": 1.1015625, "learning_rate": 1.9981171374245748e-05, "loss": 1.4244, "step": 7684 }, { "epoch": 0.5654633249022764, "grad_norm": 0.8203125, "learning_rate": 1.9975497820320815e-05, "loss": 0.7566, "step": 7685 }, { "epoch": 0.5655369050356404, "grad_norm": 0.80859375, "learning_rate": 1.9969824536051433e-05, "loss": 0.7247, "step": 7686 }, { "epoch": 0.5656104851690044, "grad_norm": 0.77734375, "learning_rate": 1.9964151521742057e-05, "loss": 0.6758, "step": 7687 }, { "epoch": 0.5656840653023684, "grad_norm": 0.8984375, "learning_rate": 1.995847877769715e-05, "loss": 0.663, "step": 7688 }, { "epoch": 0.5657576454357324, "grad_norm": 0.69140625, "learning_rate": 1.9952806304221173e-05, "loss": 0.6042, "step": 7689 }, { "epoch": 0.5658312255690964, "grad_norm": 0.796875, "learning_rate": 1.9947134101618547e-05, "loss": 0.8123, "step": 7690 }, { "epoch": 0.5659048057024604, "grad_norm": 1.0859375, "learning_rate": 1.99414621701937e-05, "loss": 1.0737, "step": 7691 }, { "epoch": 0.5659783858358244, "grad_norm": 0.81640625, "learning_rate": 1.9935790510251013e-05, "loss": 1.0011, "step": 7692 }, { "epoch": 0.5660519659691883, "grad_norm": 0.9609375, "learning_rate": 1.993011912209489e-05, "loss": 0.7549, "step": 7693 }, { "epoch": 0.5661255461025523, "grad_norm": 1.0, "learning_rate": 1.9924448006029695e-05, "loss": 1.2663, "step": 7694 }, { "epoch": 0.5661991262359163, "grad_norm": 0.77734375, "learning_rate": 1.9918777162359787e-05, "loss": 1.3989, "step": 7695 }, { "epoch": 0.5662727063692803, "grad_norm": 0.86328125, "learning_rate": 1.9913106591389517e-05, "loss": 0.7846, "step": 7696 }, { "epoch": 0.5663462865026443, "grad_norm": 0.85546875, "learning_rate": 1.9907436293423208e-05, "loss": 0.7626, "step": 7697 }, { "epoch": 0.5664198666360083, "grad_norm": 0.84765625, "learning_rate": 1.990176626876517e-05, "loss": 1.2104, "step": 7698 }, { "epoch": 0.5664934467693723, "grad_norm": 0.93359375, "learning_rate": 1.989609651771971e-05, "loss": 1.087, "step": 7699 }, { "epoch": 0.5665670269027363, "grad_norm": 0.7578125, "learning_rate": 1.9890427040591116e-05, "loss": 0.8319, "step": 7700 }, { "epoch": 0.5666406070361003, "grad_norm": 0.734375, "learning_rate": 1.988475783768364e-05, "loss": 0.7973, "step": 7701 }, { "epoch": 0.5667141871694642, "grad_norm": 1.109375, "learning_rate": 1.9879088909301556e-05, "loss": 1.1152, "step": 7702 }, { "epoch": 0.5667877673028282, "grad_norm": 0.8984375, "learning_rate": 1.9873420255749096e-05, "loss": 1.093, "step": 7703 }, { "epoch": 0.5668613474361922, "grad_norm": 0.8671875, "learning_rate": 1.9867751877330488e-05, "loss": 0.951, "step": 7704 }, { "epoch": 0.5669349275695562, "grad_norm": 1.09375, "learning_rate": 1.9862083774349956e-05, "loss": 1.2658, "step": 7705 }, { "epoch": 0.5670085077029202, "grad_norm": 0.90234375, "learning_rate": 1.985641594711167e-05, "loss": 0.9686, "step": 7706 }, { "epoch": 0.5670820878362842, "grad_norm": 0.921875, "learning_rate": 1.9850748395919826e-05, "loss": 1.0193, "step": 7707 }, { "epoch": 0.5671556679696482, "grad_norm": 0.80078125, "learning_rate": 1.984508112107859e-05, "loss": 0.7656, "step": 7708 }, { "epoch": 0.5672292481030122, "grad_norm": 0.8515625, "learning_rate": 1.983941412289212e-05, "loss": 0.8086, "step": 7709 }, { "epoch": 0.5673028282363762, "grad_norm": 0.875, "learning_rate": 1.9833747401664543e-05, "loss": 0.7632, "step": 7710 }, { "epoch": 0.5673764083697401, "grad_norm": 0.69921875, "learning_rate": 1.9828080957699987e-05, "loss": 0.4955, "step": 7711 }, { "epoch": 0.5674499885031041, "grad_norm": 0.82421875, "learning_rate": 1.982241479130255e-05, "loss": 0.9354, "step": 7712 }, { "epoch": 0.5675235686364681, "grad_norm": 0.92578125, "learning_rate": 1.981674890277634e-05, "loss": 1.072, "step": 7713 }, { "epoch": 0.5675971487698321, "grad_norm": 0.7578125, "learning_rate": 1.9811083292425427e-05, "loss": 0.7631, "step": 7714 }, { "epoch": 0.5676707289031961, "grad_norm": 1.0234375, "learning_rate": 1.980541796055387e-05, "loss": 0.9107, "step": 7715 }, { "epoch": 0.5677443090365601, "grad_norm": 0.8984375, "learning_rate": 1.9799752907465717e-05, "loss": 0.9741, "step": 7716 }, { "epoch": 0.5678178891699242, "grad_norm": 0.87890625, "learning_rate": 1.9794088133465008e-05, "loss": 0.9064, "step": 7717 }, { "epoch": 0.5678914693032882, "grad_norm": 0.9296875, "learning_rate": 1.9788423638855767e-05, "loss": 0.9792, "step": 7718 }, { "epoch": 0.5679650494366522, "grad_norm": 0.734375, "learning_rate": 1.978275942394197e-05, "loss": 0.5625, "step": 7719 }, { "epoch": 0.568038629570016, "grad_norm": 0.7734375, "learning_rate": 1.977709548902763e-05, "loss": 0.7593, "step": 7720 }, { "epoch": 0.5681122097033801, "grad_norm": 0.83984375, "learning_rate": 1.97714318344167e-05, "loss": 0.6402, "step": 7721 }, { "epoch": 0.5681857898367441, "grad_norm": 0.98046875, "learning_rate": 1.9765768460413153e-05, "loss": 1.0625, "step": 7722 }, { "epoch": 0.5682593699701081, "grad_norm": 0.96875, "learning_rate": 1.9760105367320934e-05, "loss": 1.1693, "step": 7723 }, { "epoch": 0.5683329501034721, "grad_norm": 0.96484375, "learning_rate": 1.9754442555443956e-05, "loss": 1.3715, "step": 7724 }, { "epoch": 0.5684065302368361, "grad_norm": 0.890625, "learning_rate": 1.9748780025086136e-05, "loss": 0.9845, "step": 7725 }, { "epoch": 0.5684801103702001, "grad_norm": 0.80859375, "learning_rate": 1.9743117776551377e-05, "loss": 0.7554, "step": 7726 }, { "epoch": 0.5685536905035641, "grad_norm": 0.8359375, "learning_rate": 1.9737455810143564e-05, "loss": 0.791, "step": 7727 }, { "epoch": 0.5686272706369281, "grad_norm": 0.9375, "learning_rate": 1.973179412616655e-05, "loss": 1.1941, "step": 7728 }, { "epoch": 0.568700850770292, "grad_norm": 0.875, "learning_rate": 1.9726132724924195e-05, "loss": 0.678, "step": 7729 }, { "epoch": 0.568774430903656, "grad_norm": 0.87109375, "learning_rate": 1.9720471606720338e-05, "loss": 0.8929, "step": 7730 }, { "epoch": 0.56884801103702, "grad_norm": 0.6796875, "learning_rate": 1.9714810771858797e-05, "loss": 0.554, "step": 7731 }, { "epoch": 0.568921591170384, "grad_norm": 0.78125, "learning_rate": 1.970915022064339e-05, "loss": 0.8933, "step": 7732 }, { "epoch": 0.568995171303748, "grad_norm": 0.82421875, "learning_rate": 1.9703489953377888e-05, "loss": 1.0544, "step": 7733 }, { "epoch": 0.569068751437112, "grad_norm": 0.92578125, "learning_rate": 1.9697829970366076e-05, "loss": 0.9059, "step": 7734 }, { "epoch": 0.569142331570476, "grad_norm": 1.375, "learning_rate": 1.9692170271911717e-05, "loss": 1.1534, "step": 7735 }, { "epoch": 0.56921591170384, "grad_norm": 0.98828125, "learning_rate": 1.9686510858318553e-05, "loss": 1.1078, "step": 7736 }, { "epoch": 0.569289491837204, "grad_norm": 1.46875, "learning_rate": 1.968085172989032e-05, "loss": 1.294, "step": 7737 }, { "epoch": 0.5693630719705679, "grad_norm": 0.8828125, "learning_rate": 1.9675192886930722e-05, "loss": 0.9051, "step": 7738 }, { "epoch": 0.5694366521039319, "grad_norm": 0.89453125, "learning_rate": 1.9669534329743467e-05, "loss": 0.8163, "step": 7739 }, { "epoch": 0.5695102322372959, "grad_norm": 0.9140625, "learning_rate": 1.9663876058632235e-05, "loss": 1.6457, "step": 7740 }, { "epoch": 0.5695838123706599, "grad_norm": 0.7265625, "learning_rate": 1.9658218073900704e-05, "loss": 0.618, "step": 7741 }, { "epoch": 0.5696573925040239, "grad_norm": 0.96875, "learning_rate": 1.965256037585251e-05, "loss": 0.9302, "step": 7742 }, { "epoch": 0.5697309726373879, "grad_norm": 0.80859375, "learning_rate": 1.9646902964791305e-05, "loss": 1.4923, "step": 7743 }, { "epoch": 0.5698045527707519, "grad_norm": 0.87890625, "learning_rate": 1.9641245841020705e-05, "loss": 0.9057, "step": 7744 }, { "epoch": 0.5698781329041159, "grad_norm": 1.09375, "learning_rate": 1.9635589004844322e-05, "loss": 1.0035, "step": 7745 }, { "epoch": 0.5699517130374799, "grad_norm": 0.87890625, "learning_rate": 1.9629932456565752e-05, "loss": 0.849, "step": 7746 }, { "epoch": 0.5700252931708438, "grad_norm": 0.9453125, "learning_rate": 1.9624276196488556e-05, "loss": 0.8605, "step": 7747 }, { "epoch": 0.5700988733042078, "grad_norm": 0.84375, "learning_rate": 1.9618620224916304e-05, "loss": 0.7747, "step": 7748 }, { "epoch": 0.5701724534375718, "grad_norm": 0.87109375, "learning_rate": 1.961296454215254e-05, "loss": 1.1588, "step": 7749 }, { "epoch": 0.5702460335709358, "grad_norm": 0.765625, "learning_rate": 1.96073091485008e-05, "loss": 0.5469, "step": 7750 }, { "epoch": 0.5703196137042998, "grad_norm": 0.875, "learning_rate": 1.9601654044264586e-05, "loss": 0.9935, "step": 7751 }, { "epoch": 0.5703931938376638, "grad_norm": 0.9140625, "learning_rate": 1.9595999229747405e-05, "loss": 0.9887, "step": 7752 }, { "epoch": 0.5704667739710279, "grad_norm": 1.0625, "learning_rate": 1.959034470525274e-05, "loss": 1.0697, "step": 7753 }, { "epoch": 0.5705403541043919, "grad_norm": 0.97265625, "learning_rate": 1.9584690471084053e-05, "loss": 1.1769, "step": 7754 }, { "epoch": 0.5706139342377559, "grad_norm": 0.71875, "learning_rate": 1.957903652754481e-05, "loss": 1.0359, "step": 7755 }, { "epoch": 0.5706875143711198, "grad_norm": 0.80859375, "learning_rate": 1.957338287493843e-05, "loss": 0.9603, "step": 7756 }, { "epoch": 0.5707610945044838, "grad_norm": 1.34375, "learning_rate": 1.956772951356834e-05, "loss": 0.9707, "step": 7757 }, { "epoch": 0.5708346746378478, "grad_norm": 0.6953125, "learning_rate": 1.9562076443737947e-05, "loss": 0.5688, "step": 7758 }, { "epoch": 0.5709082547712118, "grad_norm": 0.765625, "learning_rate": 1.9556423665750655e-05, "loss": 0.6643, "step": 7759 }, { "epoch": 0.5709818349045758, "grad_norm": 0.859375, "learning_rate": 1.955077117990981e-05, "loss": 0.8678, "step": 7760 }, { "epoch": 0.5710554150379398, "grad_norm": 0.7890625, "learning_rate": 1.9545118986518785e-05, "loss": 0.7246, "step": 7761 }, { "epoch": 0.5711289951713038, "grad_norm": 0.8125, "learning_rate": 1.953946708588092e-05, "loss": 0.9106, "step": 7762 }, { "epoch": 0.5712025753046678, "grad_norm": 0.87890625, "learning_rate": 1.9533815478299543e-05, "loss": 0.7549, "step": 7763 }, { "epoch": 0.5712761554380318, "grad_norm": 0.96484375, "learning_rate": 1.952816416407797e-05, "loss": 1.1657, "step": 7764 }, { "epoch": 0.5713497355713957, "grad_norm": 0.77734375, "learning_rate": 1.9522513143519488e-05, "loss": 0.8916, "step": 7765 }, { "epoch": 0.5714233157047597, "grad_norm": 1.03125, "learning_rate": 1.9516862416927383e-05, "loss": 1.3224, "step": 7766 }, { "epoch": 0.5714968958381237, "grad_norm": 0.84375, "learning_rate": 1.951121198460491e-05, "loss": 0.8834, "step": 7767 }, { "epoch": 0.5715704759714877, "grad_norm": 0.828125, "learning_rate": 1.9505561846855326e-05, "loss": 0.8804, "step": 7768 }, { "epoch": 0.5716440561048517, "grad_norm": 1.0078125, "learning_rate": 1.9499912003981864e-05, "loss": 1.0037, "step": 7769 }, { "epoch": 0.5717176362382157, "grad_norm": 0.93359375, "learning_rate": 1.949426245628773e-05, "loss": 1.2265, "step": 7770 }, { "epoch": 0.5717912163715797, "grad_norm": 0.76171875, "learning_rate": 1.9488613204076133e-05, "loss": 0.714, "step": 7771 }, { "epoch": 0.5718647965049437, "grad_norm": 0.765625, "learning_rate": 1.948296424765026e-05, "loss": 0.9577, "step": 7772 }, { "epoch": 0.5719383766383077, "grad_norm": 0.8203125, "learning_rate": 1.947731558731328e-05, "loss": 0.8312, "step": 7773 }, { "epoch": 0.5720119567716716, "grad_norm": 0.8984375, "learning_rate": 1.9471667223368333e-05, "loss": 0.8835, "step": 7774 }, { "epoch": 0.5720855369050356, "grad_norm": 0.81640625, "learning_rate": 1.9466019156118565e-05, "loss": 1.032, "step": 7775 }, { "epoch": 0.5721591170383996, "grad_norm": 0.796875, "learning_rate": 1.9460371385867097e-05, "loss": 0.89, "step": 7776 }, { "epoch": 0.5722326971717636, "grad_norm": 0.8515625, "learning_rate": 1.9454723912917035e-05, "loss": 0.712, "step": 7777 }, { "epoch": 0.5723062773051276, "grad_norm": 0.9296875, "learning_rate": 1.9449076737571467e-05, "loss": 1.0629, "step": 7778 }, { "epoch": 0.5723798574384916, "grad_norm": 1.03125, "learning_rate": 1.9443429860133467e-05, "loss": 1.1659, "step": 7779 }, { "epoch": 0.5724534375718556, "grad_norm": 0.77734375, "learning_rate": 1.9437783280906086e-05, "loss": 0.6964, "step": 7780 }, { "epoch": 0.5725270177052196, "grad_norm": 0.75390625, "learning_rate": 1.9432137000192376e-05, "loss": 0.9, "step": 7781 }, { "epoch": 0.5726005978385836, "grad_norm": 0.97265625, "learning_rate": 1.942649101829536e-05, "loss": 1.2242, "step": 7782 }, { "epoch": 0.5726741779719475, "grad_norm": 1.0390625, "learning_rate": 1.9420845335518036e-05, "loss": 0.907, "step": 7783 }, { "epoch": 0.5727477581053115, "grad_norm": 0.67578125, "learning_rate": 1.941519995216341e-05, "loss": 0.5625, "step": 7784 }, { "epoch": 0.5728213382386755, "grad_norm": 0.9140625, "learning_rate": 1.940955486853445e-05, "loss": 0.8627, "step": 7785 }, { "epoch": 0.5728949183720395, "grad_norm": 0.74609375, "learning_rate": 1.9403910084934128e-05, "loss": 0.6583, "step": 7786 }, { "epoch": 0.5729684985054035, "grad_norm": 0.66015625, "learning_rate": 1.939826560166539e-05, "loss": 0.5249, "step": 7787 }, { "epoch": 0.5730420786387675, "grad_norm": 0.74609375, "learning_rate": 1.939262141903114e-05, "loss": 0.691, "step": 7788 }, { "epoch": 0.5731156587721316, "grad_norm": 0.984375, "learning_rate": 1.9386977537334316e-05, "loss": 1.3678, "step": 7789 }, { "epoch": 0.5731892389054956, "grad_norm": 0.94140625, "learning_rate": 1.93813339568778e-05, "loss": 0.896, "step": 7790 }, { "epoch": 0.5732628190388596, "grad_norm": 0.96484375, "learning_rate": 1.937569067796449e-05, "loss": 0.9596, "step": 7791 }, { "epoch": 0.5733363991722235, "grad_norm": 0.79296875, "learning_rate": 1.9370047700897227e-05, "loss": 0.849, "step": 7792 }, { "epoch": 0.5734099793055875, "grad_norm": 0.85546875, "learning_rate": 1.9364405025978877e-05, "loss": 1.0411, "step": 7793 }, { "epoch": 0.5734835594389515, "grad_norm": 0.84765625, "learning_rate": 1.935876265351226e-05, "loss": 0.7349, "step": 7794 }, { "epoch": 0.5735571395723155, "grad_norm": 0.8359375, "learning_rate": 1.9353120583800197e-05, "loss": 0.716, "step": 7795 }, { "epoch": 0.5736307197056795, "grad_norm": 0.86328125, "learning_rate": 1.9347478817145492e-05, "loss": 0.9886, "step": 7796 }, { "epoch": 0.5737042998390435, "grad_norm": 0.74609375, "learning_rate": 1.934183735385092e-05, "loss": 0.8516, "step": 7797 }, { "epoch": 0.5737778799724075, "grad_norm": 0.734375, "learning_rate": 1.933619619421925e-05, "loss": 1.1078, "step": 7798 }, { "epoch": 0.5738514601057715, "grad_norm": 0.875, "learning_rate": 1.933055533855323e-05, "loss": 1.1994, "step": 7799 }, { "epoch": 0.5739250402391355, "grad_norm": 0.921875, "learning_rate": 1.93249147871556e-05, "loss": 1.1573, "step": 7800 }, { "epoch": 0.5739986203724994, "grad_norm": 0.7734375, "learning_rate": 1.9319274540329085e-05, "loss": 0.9439, "step": 7801 }, { "epoch": 0.5740722005058634, "grad_norm": 0.90625, "learning_rate": 1.9313634598376363e-05, "loss": 0.8465, "step": 7802 }, { "epoch": 0.5741457806392274, "grad_norm": 0.7890625, "learning_rate": 1.9307994961600136e-05, "loss": 1.1433, "step": 7803 }, { "epoch": 0.5742193607725914, "grad_norm": 0.95703125, "learning_rate": 1.930235563030306e-05, "loss": 1.1513, "step": 7804 }, { "epoch": 0.5742929409059554, "grad_norm": 0.85546875, "learning_rate": 1.929671660478781e-05, "loss": 1.366, "step": 7805 }, { "epoch": 0.5743665210393194, "grad_norm": 1.015625, "learning_rate": 1.9291077885357e-05, "loss": 0.9652, "step": 7806 }, { "epoch": 0.5744401011726834, "grad_norm": 0.77734375, "learning_rate": 1.928543947231326e-05, "loss": 0.8515, "step": 7807 }, { "epoch": 0.5745136813060474, "grad_norm": 1.0078125, "learning_rate": 1.927980136595919e-05, "loss": 1.1187, "step": 7808 }, { "epoch": 0.5745872614394114, "grad_norm": 0.89453125, "learning_rate": 1.9274163566597372e-05, "loss": 1.021, "step": 7809 }, { "epoch": 0.5746608415727753, "grad_norm": 0.76171875, "learning_rate": 1.9268526074530386e-05, "loss": 0.9821, "step": 7810 }, { "epoch": 0.5747344217061393, "grad_norm": 0.7578125, "learning_rate": 1.926288889006078e-05, "loss": 0.8988, "step": 7811 }, { "epoch": 0.5748080018395033, "grad_norm": 0.9609375, "learning_rate": 1.9257252013491088e-05, "loss": 0.7945, "step": 7812 }, { "epoch": 0.5748815819728673, "grad_norm": 0.98046875, "learning_rate": 1.9251615445123835e-05, "loss": 0.8642, "step": 7813 }, { "epoch": 0.5749551621062313, "grad_norm": 0.8046875, "learning_rate": 1.9245979185261536e-05, "loss": 0.7619, "step": 7814 }, { "epoch": 0.5750287422395953, "grad_norm": 0.7890625, "learning_rate": 1.9240343234206653e-05, "loss": 0.549, "step": 7815 }, { "epoch": 0.5751023223729593, "grad_norm": 0.828125, "learning_rate": 1.9234707592261672e-05, "loss": 0.6836, "step": 7816 }, { "epoch": 0.5751759025063233, "grad_norm": 0.765625, "learning_rate": 1.9229072259729045e-05, "loss": 0.6723, "step": 7817 }, { "epoch": 0.5752494826396873, "grad_norm": 0.7421875, "learning_rate": 1.9223437236911203e-05, "loss": 0.5329, "step": 7818 }, { "epoch": 0.5753230627730512, "grad_norm": 1.0625, "learning_rate": 1.9217802524110594e-05, "loss": 1.3878, "step": 7819 }, { "epoch": 0.5753966429064152, "grad_norm": 0.7734375, "learning_rate": 1.921216812162959e-05, "loss": 0.7419, "step": 7820 }, { "epoch": 0.5754702230397792, "grad_norm": 0.98828125, "learning_rate": 1.920653402977059e-05, "loss": 0.7552, "step": 7821 }, { "epoch": 0.5755438031731432, "grad_norm": 0.77734375, "learning_rate": 1.9200900248835967e-05, "loss": 0.662, "step": 7822 }, { "epoch": 0.5756173833065072, "grad_norm": 0.875, "learning_rate": 1.919526677912808e-05, "loss": 1.0735, "step": 7823 }, { "epoch": 0.5756909634398713, "grad_norm": 0.9609375, "learning_rate": 1.9189633620949256e-05, "loss": 1.2137, "step": 7824 }, { "epoch": 0.5757645435732353, "grad_norm": 0.6953125, "learning_rate": 1.918400077460182e-05, "loss": 0.5444, "step": 7825 }, { "epoch": 0.5758381237065993, "grad_norm": 0.74609375, "learning_rate": 1.917836824038808e-05, "loss": 0.6851, "step": 7826 }, { "epoch": 0.5759117038399633, "grad_norm": 0.82421875, "learning_rate": 1.9172736018610322e-05, "loss": 0.9634, "step": 7827 }, { "epoch": 0.5759852839733272, "grad_norm": 0.9296875, "learning_rate": 1.9167104109570826e-05, "loss": 0.8805, "step": 7828 }, { "epoch": 0.5760588641066912, "grad_norm": 1.0625, "learning_rate": 1.916147251357182e-05, "loss": 0.9175, "step": 7829 }, { "epoch": 0.5761324442400552, "grad_norm": 0.921875, "learning_rate": 1.915584123091556e-05, "loss": 0.9094, "step": 7830 }, { "epoch": 0.5762060243734192, "grad_norm": 1.09375, "learning_rate": 1.9150210261904257e-05, "loss": 0.9626, "step": 7831 }, { "epoch": 0.5762796045067832, "grad_norm": 1.03125, "learning_rate": 1.9144579606840142e-05, "loss": 0.6984, "step": 7832 }, { "epoch": 0.5763531846401472, "grad_norm": 0.94921875, "learning_rate": 1.9138949266025362e-05, "loss": 1.0027, "step": 7833 }, { "epoch": 0.5764267647735112, "grad_norm": 0.74609375, "learning_rate": 1.913331923976211e-05, "loss": 0.8469, "step": 7834 }, { "epoch": 0.5765003449068752, "grad_norm": 0.82421875, "learning_rate": 1.9127689528352532e-05, "loss": 0.7408, "step": 7835 }, { "epoch": 0.5765739250402392, "grad_norm": 0.90625, "learning_rate": 1.9122060132098764e-05, "loss": 0.8928, "step": 7836 }, { "epoch": 0.5766475051736031, "grad_norm": 0.89453125, "learning_rate": 1.9116431051302936e-05, "loss": 0.9892, "step": 7837 }, { "epoch": 0.5767210853069671, "grad_norm": 0.73828125, "learning_rate": 1.9110802286267133e-05, "loss": 0.6941, "step": 7838 }, { "epoch": 0.5767946654403311, "grad_norm": 0.8828125, "learning_rate": 1.9105173837293448e-05, "loss": 0.8589, "step": 7839 }, { "epoch": 0.5768682455736951, "grad_norm": 0.8984375, "learning_rate": 1.909954570468395e-05, "loss": 1.0987, "step": 7840 }, { "epoch": 0.5769418257070591, "grad_norm": 0.83203125, "learning_rate": 1.909391788874069e-05, "loss": 0.7726, "step": 7841 }, { "epoch": 0.5770154058404231, "grad_norm": 0.7890625, "learning_rate": 1.908829038976571e-05, "loss": 0.7741, "step": 7842 }, { "epoch": 0.5770889859737871, "grad_norm": 1.1328125, "learning_rate": 1.9082663208061014e-05, "loss": 0.9955, "step": 7843 }, { "epoch": 0.5771625661071511, "grad_norm": 1.046875, "learning_rate": 1.9077036343928596e-05, "loss": 1.2145, "step": 7844 }, { "epoch": 0.5772361462405151, "grad_norm": 0.765625, "learning_rate": 1.9071409797670462e-05, "loss": 0.6294, "step": 7845 }, { "epoch": 0.577309726373879, "grad_norm": 0.76171875, "learning_rate": 1.9065783569588576e-05, "loss": 0.8148, "step": 7846 }, { "epoch": 0.577383306507243, "grad_norm": 0.84375, "learning_rate": 1.906015765998486e-05, "loss": 0.9866, "step": 7847 }, { "epoch": 0.577456886640607, "grad_norm": 0.828125, "learning_rate": 1.905453206916127e-05, "loss": 0.8646, "step": 7848 }, { "epoch": 0.577530466773971, "grad_norm": 0.98046875, "learning_rate": 1.9048906797419713e-05, "loss": 0.8968, "step": 7849 }, { "epoch": 0.577604046907335, "grad_norm": 0.90234375, "learning_rate": 1.9043281845062087e-05, "loss": 1.5815, "step": 7850 }, { "epoch": 0.577677627040699, "grad_norm": 0.88671875, "learning_rate": 1.903765721239028e-05, "loss": 0.7399, "step": 7851 }, { "epoch": 0.577751207174063, "grad_norm": 0.7890625, "learning_rate": 1.903203289970615e-05, "loss": 0.694, "step": 7852 }, { "epoch": 0.577824787307427, "grad_norm": 0.69921875, "learning_rate": 1.9026408907311532e-05, "loss": 0.7211, "step": 7853 }, { "epoch": 0.577898367440791, "grad_norm": 0.92578125, "learning_rate": 1.902078523550827e-05, "loss": 0.979, "step": 7854 }, { "epoch": 0.5779719475741549, "grad_norm": 1.0546875, "learning_rate": 1.901516188459818e-05, "loss": 1.4331, "step": 7855 }, { "epoch": 0.5780455277075189, "grad_norm": 1.015625, "learning_rate": 1.900953885488304e-05, "loss": 1.0824, "step": 7856 }, { "epoch": 0.5781191078408829, "grad_norm": 0.7734375, "learning_rate": 1.900391614666463e-05, "loss": 0.7317, "step": 7857 }, { "epoch": 0.5781926879742469, "grad_norm": 0.94921875, "learning_rate": 1.899829376024472e-05, "loss": 1.0449, "step": 7858 }, { "epoch": 0.578266268107611, "grad_norm": 0.93359375, "learning_rate": 1.899267169592505e-05, "loss": 0.7986, "step": 7859 }, { "epoch": 0.578339848240975, "grad_norm": 1.0, "learning_rate": 1.898704995400735e-05, "loss": 1.1066, "step": 7860 }, { "epoch": 0.578413428374339, "grad_norm": 0.80078125, "learning_rate": 1.8981428534793317e-05, "loss": 0.6905, "step": 7861 }, { "epoch": 0.578487008507703, "grad_norm": 0.640625, "learning_rate": 1.8975807438584642e-05, "loss": 0.7546, "step": 7862 }, { "epoch": 0.578560588641067, "grad_norm": 0.73828125, "learning_rate": 1.8970186665683005e-05, "loss": 0.7572, "step": 7863 }, { "epoch": 0.5786341687744309, "grad_norm": 1.1484375, "learning_rate": 1.896456621639007e-05, "loss": 1.2453, "step": 7864 }, { "epoch": 0.5787077489077949, "grad_norm": 0.77734375, "learning_rate": 1.8958946091007458e-05, "loss": 0.6999, "step": 7865 }, { "epoch": 0.5787813290411589, "grad_norm": 0.890625, "learning_rate": 1.89533262898368e-05, "loss": 0.8552, "step": 7866 }, { "epoch": 0.5788549091745229, "grad_norm": 0.78515625, "learning_rate": 1.89477068131797e-05, "loss": 0.6997, "step": 7867 }, { "epoch": 0.5789284893078869, "grad_norm": 0.87109375, "learning_rate": 1.8942087661337742e-05, "loss": 0.8844, "step": 7868 }, { "epoch": 0.5790020694412509, "grad_norm": 0.85546875, "learning_rate": 1.893646883461251e-05, "loss": 0.9771, "step": 7869 }, { "epoch": 0.5790756495746149, "grad_norm": 1.03125, "learning_rate": 1.8930850333305532e-05, "loss": 1.0053, "step": 7870 }, { "epoch": 0.5791492297079789, "grad_norm": 0.90625, "learning_rate": 1.8925232157718352e-05, "loss": 0.8334, "step": 7871 }, { "epoch": 0.5792228098413429, "grad_norm": 0.89453125, "learning_rate": 1.891961430815249e-05, "loss": 0.9882, "step": 7872 }, { "epoch": 0.5792963899747068, "grad_norm": 0.83984375, "learning_rate": 1.8913996784909445e-05, "loss": 0.9642, "step": 7873 }, { "epoch": 0.5793699701080708, "grad_norm": 0.99609375, "learning_rate": 1.8908379588290707e-05, "loss": 0.8087, "step": 7874 }, { "epoch": 0.5794435502414348, "grad_norm": 0.71484375, "learning_rate": 1.890276271859772e-05, "loss": 0.7891, "step": 7875 }, { "epoch": 0.5795171303747988, "grad_norm": 0.94921875, "learning_rate": 1.8897146176131945e-05, "loss": 0.8852, "step": 7876 }, { "epoch": 0.5795907105081628, "grad_norm": 0.85546875, "learning_rate": 1.8891529961194804e-05, "loss": 0.8946, "step": 7877 }, { "epoch": 0.5796642906415268, "grad_norm": 0.70703125, "learning_rate": 1.8885914074087722e-05, "loss": 0.7218, "step": 7878 }, { "epoch": 0.5797378707748908, "grad_norm": 1.0, "learning_rate": 1.8880298515112073e-05, "loss": 1.0425, "step": 7879 }, { "epoch": 0.5798114509082548, "grad_norm": 0.67578125, "learning_rate": 1.887468328456925e-05, "loss": 0.5877, "step": 7880 }, { "epoch": 0.5798850310416188, "grad_norm": 0.953125, "learning_rate": 1.8869068382760604e-05, "loss": 0.9595, "step": 7881 }, { "epoch": 0.5799586111749827, "grad_norm": 0.7890625, "learning_rate": 1.8863453809987478e-05, "loss": 0.7985, "step": 7882 }, { "epoch": 0.5800321913083467, "grad_norm": 0.79296875, "learning_rate": 1.8857839566551205e-05, "loss": 0.734, "step": 7883 }, { "epoch": 0.5801057714417107, "grad_norm": 0.69140625, "learning_rate": 1.885222565275307e-05, "loss": 0.6292, "step": 7884 }, { "epoch": 0.5801793515750747, "grad_norm": 0.87109375, "learning_rate": 1.8846612068894373e-05, "loss": 0.7832, "step": 7885 }, { "epoch": 0.5802529317084387, "grad_norm": 0.9765625, "learning_rate": 1.8840998815276387e-05, "loss": 1.0154, "step": 7886 }, { "epoch": 0.5803265118418027, "grad_norm": 1.03125, "learning_rate": 1.883538589220037e-05, "loss": 0.9185, "step": 7887 }, { "epoch": 0.5804000919751667, "grad_norm": 0.84375, "learning_rate": 1.882977329996754e-05, "loss": 1.2152, "step": 7888 }, { "epoch": 0.5804736721085307, "grad_norm": 0.828125, "learning_rate": 1.8824161038879122e-05, "loss": 0.8345, "step": 7889 }, { "epoch": 0.5805472522418947, "grad_norm": 1.078125, "learning_rate": 1.881854910923632e-05, "loss": 0.6835, "step": 7890 }, { "epoch": 0.5806208323752586, "grad_norm": 0.98828125, "learning_rate": 1.8812937511340307e-05, "loss": 1.1624, "step": 7891 }, { "epoch": 0.5806944125086226, "grad_norm": 0.8828125, "learning_rate": 1.8807326245492262e-05, "loss": 0.9678, "step": 7892 }, { "epoch": 0.5807679926419866, "grad_norm": 0.78515625, "learning_rate": 1.8801715311993315e-05, "loss": 1.1112, "step": 7893 }, { "epoch": 0.5808415727753506, "grad_norm": 1.0859375, "learning_rate": 1.87961047111446e-05, "loss": 1.2048, "step": 7894 }, { "epoch": 0.5809151529087146, "grad_norm": 0.78515625, "learning_rate": 1.8790494443247225e-05, "loss": 0.6695, "step": 7895 }, { "epoch": 0.5809887330420787, "grad_norm": 0.703125, "learning_rate": 1.87848845086023e-05, "loss": 0.6305, "step": 7896 }, { "epoch": 0.5810623131754427, "grad_norm": 0.84765625, "learning_rate": 1.8779274907510866e-05, "loss": 0.7055, "step": 7897 }, { "epoch": 0.5811358933088067, "grad_norm": 0.94140625, "learning_rate": 1.8773665640274004e-05, "loss": 0.9516, "step": 7898 }, { "epoch": 0.5812094734421707, "grad_norm": 0.74609375, "learning_rate": 1.8768056707192748e-05, "loss": 0.7438, "step": 7899 }, { "epoch": 0.5812830535755346, "grad_norm": 0.796875, "learning_rate": 1.876244810856812e-05, "loss": 0.8965, "step": 7900 }, { "epoch": 0.5813566337088986, "grad_norm": 0.69921875, "learning_rate": 1.8756839844701126e-05, "loss": 0.9464, "step": 7901 }, { "epoch": 0.5814302138422626, "grad_norm": 0.8125, "learning_rate": 1.875123191589274e-05, "loss": 0.8169, "step": 7902 }, { "epoch": 0.5815037939756266, "grad_norm": 0.71875, "learning_rate": 1.8745624322443933e-05, "loss": 0.7822, "step": 7903 }, { "epoch": 0.5815773741089906, "grad_norm": 0.90234375, "learning_rate": 1.8740017064655655e-05, "loss": 0.6579, "step": 7904 }, { "epoch": 0.5816509542423546, "grad_norm": 0.85546875, "learning_rate": 1.873441014282884e-05, "loss": 0.6201, "step": 7905 }, { "epoch": 0.5817245343757186, "grad_norm": 0.8828125, "learning_rate": 1.8728803557264403e-05, "loss": 0.8544, "step": 7906 }, { "epoch": 0.5817981145090826, "grad_norm": 0.8984375, "learning_rate": 1.8723197308263227e-05, "loss": 0.6245, "step": 7907 }, { "epoch": 0.5818716946424466, "grad_norm": 1.0859375, "learning_rate": 1.87175913961262e-05, "loss": 1.4219, "step": 7908 }, { "epoch": 0.5819452747758105, "grad_norm": 0.85546875, "learning_rate": 1.8711985821154172e-05, "loss": 0.8163, "step": 7909 }, { "epoch": 0.5820188549091745, "grad_norm": 0.8125, "learning_rate": 1.8706380583647998e-05, "loss": 0.9422, "step": 7910 }, { "epoch": 0.5820924350425385, "grad_norm": 0.94921875, "learning_rate": 1.8700775683908483e-05, "loss": 0.9302, "step": 7911 }, { "epoch": 0.5821660151759025, "grad_norm": 0.9375, "learning_rate": 1.8695171122236444e-05, "loss": 0.9279, "step": 7912 }, { "epoch": 0.5822395953092665, "grad_norm": 0.99609375, "learning_rate": 1.868956689893266e-05, "loss": 1.2156, "step": 7913 }, { "epoch": 0.5823131754426305, "grad_norm": 1.0390625, "learning_rate": 1.86839630142979e-05, "loss": 1.0219, "step": 7914 }, { "epoch": 0.5823867555759945, "grad_norm": 0.8984375, "learning_rate": 1.8678359468632926e-05, "loss": 0.8994, "step": 7915 }, { "epoch": 0.5824603357093585, "grad_norm": 0.9453125, "learning_rate": 1.8672756262238454e-05, "loss": 1.2641, "step": 7916 }, { "epoch": 0.5825339158427225, "grad_norm": 0.88671875, "learning_rate": 1.8667153395415198e-05, "loss": 0.9052, "step": 7917 }, { "epoch": 0.5826074959760864, "grad_norm": 0.875, "learning_rate": 1.866155086846386e-05, "loss": 0.8305, "step": 7918 }, { "epoch": 0.5826810761094504, "grad_norm": 0.78125, "learning_rate": 1.8655948681685123e-05, "loss": 0.7444, "step": 7919 }, { "epoch": 0.5827546562428144, "grad_norm": 0.91015625, "learning_rate": 1.865034683537963e-05, "loss": 0.7477, "step": 7920 }, { "epoch": 0.5828282363761784, "grad_norm": 0.76953125, "learning_rate": 1.8644745329848027e-05, "loss": 0.6366, "step": 7921 }, { "epoch": 0.5829018165095424, "grad_norm": 0.73828125, "learning_rate": 1.8639144165390945e-05, "loss": 0.8134, "step": 7922 }, { "epoch": 0.5829753966429064, "grad_norm": 1.0546875, "learning_rate": 1.863354334230898e-05, "loss": 1.0882, "step": 7923 }, { "epoch": 0.5830489767762704, "grad_norm": 0.80859375, "learning_rate": 1.862794286090272e-05, "loss": 0.7208, "step": 7924 }, { "epoch": 0.5831225569096344, "grad_norm": 0.76953125, "learning_rate": 1.8622342721472728e-05, "loss": 0.5987, "step": 7925 }, { "epoch": 0.5831961370429984, "grad_norm": 1.109375, "learning_rate": 1.861674292431956e-05, "loss": 1.1597, "step": 7926 }, { "epoch": 0.5832697171763623, "grad_norm": 0.625, "learning_rate": 1.861114346974374e-05, "loss": 0.7863, "step": 7927 }, { "epoch": 0.5833432973097263, "grad_norm": 1.03125, "learning_rate": 1.8605544358045794e-05, "loss": 0.9326, "step": 7928 }, { "epoch": 0.5834168774430903, "grad_norm": 0.71484375, "learning_rate": 1.8599945589526198e-05, "loss": 0.6481, "step": 7929 }, { "epoch": 0.5834904575764543, "grad_norm": 0.75, "learning_rate": 1.8594347164485427e-05, "loss": 0.803, "step": 7930 }, { "epoch": 0.5835640377098184, "grad_norm": 0.80078125, "learning_rate": 1.858874908322395e-05, "loss": 0.806, "step": 7931 }, { "epoch": 0.5836376178431824, "grad_norm": 0.796875, "learning_rate": 1.8583151346042203e-05, "loss": 0.9222, "step": 7932 }, { "epoch": 0.5837111979765464, "grad_norm": 0.8125, "learning_rate": 1.8577553953240604e-05, "loss": 0.9362, "step": 7933 }, { "epoch": 0.5837847781099104, "grad_norm": 0.87109375, "learning_rate": 1.857195690511955e-05, "loss": 0.8713, "step": 7934 }, { "epoch": 0.5838583582432744, "grad_norm": 0.8203125, "learning_rate": 1.8566360201979427e-05, "loss": 0.8874, "step": 7935 }, { "epoch": 0.5839319383766383, "grad_norm": 1.015625, "learning_rate": 1.8560763844120603e-05, "loss": 0.9917, "step": 7936 }, { "epoch": 0.5840055185100023, "grad_norm": 1.0234375, "learning_rate": 1.8555167831843422e-05, "loss": 1.3873, "step": 7937 }, { "epoch": 0.5840790986433663, "grad_norm": 0.91015625, "learning_rate": 1.8549572165448214e-05, "loss": 0.8306, "step": 7938 }, { "epoch": 0.5841526787767303, "grad_norm": 0.9140625, "learning_rate": 1.8543976845235277e-05, "loss": 1.0889, "step": 7939 }, { "epoch": 0.5842262589100943, "grad_norm": 0.80078125, "learning_rate": 1.8538381871504915e-05, "loss": 0.8001, "step": 7940 }, { "epoch": 0.5842998390434583, "grad_norm": 1.3515625, "learning_rate": 1.8532787244557393e-05, "loss": 1.2419, "step": 7941 }, { "epoch": 0.5843734191768223, "grad_norm": 0.72265625, "learning_rate": 1.852719296469297e-05, "loss": 0.8252, "step": 7942 }, { "epoch": 0.5844469993101863, "grad_norm": 0.765625, "learning_rate": 1.8521599032211866e-05, "loss": 0.9006, "step": 7943 }, { "epoch": 0.5845205794435503, "grad_norm": 0.890625, "learning_rate": 1.851600544741431e-05, "loss": 1.0111, "step": 7944 }, { "epoch": 0.5845941595769142, "grad_norm": 0.97265625, "learning_rate": 1.8510412210600493e-05, "loss": 1.1289, "step": 7945 }, { "epoch": 0.5846677397102782, "grad_norm": 0.8828125, "learning_rate": 1.8504819322070595e-05, "loss": 0.6984, "step": 7946 }, { "epoch": 0.5847413198436422, "grad_norm": 0.7265625, "learning_rate": 1.849922678212478e-05, "loss": 0.8641, "step": 7947 }, { "epoch": 0.5848148999770062, "grad_norm": 0.60546875, "learning_rate": 1.8493634591063187e-05, "loss": 0.5647, "step": 7948 }, { "epoch": 0.5848884801103702, "grad_norm": 1.0, "learning_rate": 1.848804274918593e-05, "loss": 1.2141, "step": 7949 }, { "epoch": 0.5849620602437342, "grad_norm": 0.86328125, "learning_rate": 1.848245125679312e-05, "loss": 0.7978, "step": 7950 }, { "epoch": 0.5850356403770982, "grad_norm": 1.0078125, "learning_rate": 1.8476860114184845e-05, "loss": 0.869, "step": 7951 }, { "epoch": 0.5851092205104622, "grad_norm": 0.71484375, "learning_rate": 1.8471269321661167e-05, "loss": 0.775, "step": 7952 }, { "epoch": 0.5851828006438262, "grad_norm": 0.78125, "learning_rate": 1.846567887952213e-05, "loss": 0.596, "step": 7953 }, { "epoch": 0.5852563807771901, "grad_norm": 1.09375, "learning_rate": 1.846008878806777e-05, "loss": 1.1225, "step": 7954 }, { "epoch": 0.5853299609105541, "grad_norm": 0.95703125, "learning_rate": 1.845449904759809e-05, "loss": 1.0799, "step": 7955 }, { "epoch": 0.5854035410439181, "grad_norm": 0.8125, "learning_rate": 1.8448909658413093e-05, "loss": 1.0973, "step": 7956 }, { "epoch": 0.5854771211772821, "grad_norm": 0.91796875, "learning_rate": 1.844332062081273e-05, "loss": 1.0958, "step": 7957 }, { "epoch": 0.5855507013106461, "grad_norm": 0.9765625, "learning_rate": 1.8437731935096967e-05, "loss": 0.9853, "step": 7958 }, { "epoch": 0.5856242814440101, "grad_norm": 0.984375, "learning_rate": 1.8432143601565737e-05, "loss": 0.988, "step": 7959 }, { "epoch": 0.5856978615773741, "grad_norm": 0.7578125, "learning_rate": 1.842655562051896e-05, "loss": 0.6313, "step": 7960 }, { "epoch": 0.5857714417107381, "grad_norm": 0.78125, "learning_rate": 1.842096799225652e-05, "loss": 0.8302, "step": 7961 }, { "epoch": 0.5858450218441021, "grad_norm": 0.76171875, "learning_rate": 1.8415380717078305e-05, "loss": 0.7189, "step": 7962 }, { "epoch": 0.585918601977466, "grad_norm": 0.67578125, "learning_rate": 1.840979379528417e-05, "loss": 0.6009, "step": 7963 }, { "epoch": 0.58599218211083, "grad_norm": 0.921875, "learning_rate": 1.8404207227173953e-05, "loss": 0.8535, "step": 7964 }, { "epoch": 0.586065762244194, "grad_norm": 0.796875, "learning_rate": 1.8398621013047483e-05, "loss": 1.0447, "step": 7965 }, { "epoch": 0.586139342377558, "grad_norm": 0.80859375, "learning_rate": 1.8393035153204547e-05, "loss": 1.0278, "step": 7966 }, { "epoch": 0.586212922510922, "grad_norm": 0.90234375, "learning_rate": 1.8387449647944938e-05, "loss": 0.924, "step": 7967 }, { "epoch": 0.5862865026442861, "grad_norm": 0.65625, "learning_rate": 1.838186449756842e-05, "loss": 0.7968, "step": 7968 }, { "epoch": 0.5863600827776501, "grad_norm": 0.87890625, "learning_rate": 1.837627970237474e-05, "loss": 1.2992, "step": 7969 }, { "epoch": 0.5864336629110141, "grad_norm": 0.83203125, "learning_rate": 1.837069526266361e-05, "loss": 0.9364, "step": 7970 }, { "epoch": 0.5865072430443781, "grad_norm": 0.91015625, "learning_rate": 1.8365111178734745e-05, "loss": 0.7273, "step": 7971 }, { "epoch": 0.586580823177742, "grad_norm": 0.890625, "learning_rate": 1.8359527450887828e-05, "loss": 0.7213, "step": 7972 }, { "epoch": 0.586654403311106, "grad_norm": 0.77734375, "learning_rate": 1.8353944079422533e-05, "loss": 0.725, "step": 7973 }, { "epoch": 0.58672798344447, "grad_norm": 0.859375, "learning_rate": 1.8348361064638513e-05, "loss": 1.2185, "step": 7974 }, { "epoch": 0.586801563577834, "grad_norm": 0.71875, "learning_rate": 1.8342778406835383e-05, "loss": 0.766, "step": 7975 }, { "epoch": 0.586875143711198, "grad_norm": 0.98828125, "learning_rate": 1.8337196106312766e-05, "loss": 1.5889, "step": 7976 }, { "epoch": 0.586948723844562, "grad_norm": 0.9296875, "learning_rate": 1.8331614163370247e-05, "loss": 0.6474, "step": 7977 }, { "epoch": 0.587022303977926, "grad_norm": 0.7734375, "learning_rate": 1.83260325783074e-05, "loss": 0.7061, "step": 7978 }, { "epoch": 0.58709588411129, "grad_norm": 0.73828125, "learning_rate": 1.832045135142379e-05, "loss": 0.9112, "step": 7979 }, { "epoch": 0.587169464244654, "grad_norm": 0.96875, "learning_rate": 1.831487048301893e-05, "loss": 0.6902, "step": 7980 }, { "epoch": 0.5872430443780179, "grad_norm": 0.9765625, "learning_rate": 1.8309289973392347e-05, "loss": 0.797, "step": 7981 }, { "epoch": 0.5873166245113819, "grad_norm": 0.875, "learning_rate": 1.8303709822843533e-05, "loss": 0.8609, "step": 7982 }, { "epoch": 0.5873902046447459, "grad_norm": 0.83984375, "learning_rate": 1.8298130031671974e-05, "loss": 1.0372, "step": 7983 }, { "epoch": 0.5874637847781099, "grad_norm": 0.9921875, "learning_rate": 1.8292550600177112e-05, "loss": 1.3363, "step": 7984 }, { "epoch": 0.5875373649114739, "grad_norm": 0.6875, "learning_rate": 1.8286971528658386e-05, "loss": 0.8451, "step": 7985 }, { "epoch": 0.5876109450448379, "grad_norm": 0.98828125, "learning_rate": 1.8281392817415223e-05, "loss": 1.6296, "step": 7986 }, { "epoch": 0.5876845251782019, "grad_norm": 1.3046875, "learning_rate": 1.827581446674701e-05, "loss": 1.386, "step": 7987 }, { "epoch": 0.5877581053115659, "grad_norm": 1.1796875, "learning_rate": 1.827023647695315e-05, "loss": 1.2393, "step": 7988 }, { "epoch": 0.5878316854449299, "grad_norm": 0.859375, "learning_rate": 1.8264658848332977e-05, "loss": 1.1769, "step": 7989 }, { "epoch": 0.5879052655782938, "grad_norm": 0.82421875, "learning_rate": 1.8259081581185843e-05, "loss": 0.8126, "step": 7990 }, { "epoch": 0.5879788457116578, "grad_norm": 0.9921875, "learning_rate": 1.8253504675811073e-05, "loss": 0.9025, "step": 7991 }, { "epoch": 0.5880524258450218, "grad_norm": 1.2109375, "learning_rate": 1.8247928132507962e-05, "loss": 1.0028, "step": 7992 }, { "epoch": 0.5881260059783858, "grad_norm": 0.78125, "learning_rate": 1.82423519515758e-05, "loss": 0.5055, "step": 7993 }, { "epoch": 0.5881995861117498, "grad_norm": 0.90625, "learning_rate": 1.8236776133313837e-05, "loss": 0.7578, "step": 7994 }, { "epoch": 0.5882731662451138, "grad_norm": 0.9140625, "learning_rate": 1.8231200678021325e-05, "loss": 0.7794, "step": 7995 }, { "epoch": 0.5883467463784778, "grad_norm": 0.9140625, "learning_rate": 1.8225625585997494e-05, "loss": 1.2828, "step": 7996 }, { "epoch": 0.5884203265118418, "grad_norm": 0.77734375, "learning_rate": 1.8220050857541548e-05, "loss": 0.993, "step": 7997 }, { "epoch": 0.5884939066452058, "grad_norm": 0.98046875, "learning_rate": 1.8214476492952658e-05, "loss": 0.8535, "step": 7998 }, { "epoch": 0.5885674867785697, "grad_norm": 0.8515625, "learning_rate": 1.820890249253e-05, "loss": 0.9465, "step": 7999 }, { "epoch": 0.5886410669119337, "grad_norm": 0.6640625, "learning_rate": 1.8203328856572716e-05, "loss": 0.6409, "step": 8000 } ], "logging_steps": 1, "max_steps": 13591, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0646749664124574e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }