{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 9.993420504705588, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00033313900224868825, "grad_norm": 3.234375, "learning_rate": 0.0, "loss": 4.5, "step": 1 }, { "epoch": 0.0006662780044973765, "grad_norm": 3.75, "learning_rate": 1.664816870144284e-08, "loss": 4.7339, "step": 2 }, { "epoch": 0.0009994170067460647, "grad_norm": 3.578125, "learning_rate": 3.329633740288568e-08, "loss": 4.6078, "step": 3 }, { "epoch": 0.001332556008994753, "grad_norm": 3.390625, "learning_rate": 4.994450610432852e-08, "loss": 4.6254, "step": 4 }, { "epoch": 0.0016656950112434413, "grad_norm": 3.421875, "learning_rate": 6.659267480577137e-08, "loss": 4.5743, "step": 5 }, { "epoch": 0.0019988340134921294, "grad_norm": 3.9375, "learning_rate": 8.32408435072142e-08, "loss": 4.6471, "step": 6 }, { "epoch": 0.0023319730157408177, "grad_norm": 3.640625, "learning_rate": 9.988901220865704e-08, "loss": 4.6109, "step": 7 }, { "epoch": 0.002665112017989506, "grad_norm": 3.703125, "learning_rate": 1.165371809100999e-07, "loss": 4.6294, "step": 8 }, { "epoch": 0.0029982510202381943, "grad_norm": 3.3125, "learning_rate": 1.3318534961154273e-07, "loss": 4.5852, "step": 9 }, { "epoch": 0.0033313900224868826, "grad_norm": 3.515625, "learning_rate": 1.498335183129856e-07, "loss": 4.6198, "step": 10 }, { "epoch": 0.003664529024735571, "grad_norm": 3.78125, "learning_rate": 1.664816870144284e-07, "loss": 4.6809, "step": 11 }, { "epoch": 0.003997668026984259, "grad_norm": 3.515625, "learning_rate": 1.8312985571587127e-07, "loss": 4.5738, "step": 12 }, { "epoch": 0.0043308070292329475, "grad_norm": 3.640625, "learning_rate": 1.9977802441731409e-07, "loss": 4.6422, "step": 13 }, { "epoch": 0.004663946031481635, "grad_norm": 3.328125, "learning_rate": 2.1642619311875695e-07, "loss": 4.5652, "step": 14 }, { "epoch": 0.004997085033730324, "grad_norm": 3.0625, "learning_rate": 2.330743618201998e-07, "loss": 4.5031, "step": 15 }, { "epoch": 0.005330224035979012, "grad_norm": 3.6875, "learning_rate": 2.4972253052164265e-07, "loss": 4.6404, "step": 16 }, { "epoch": 0.005663363038227701, "grad_norm": 3.515625, "learning_rate": 2.6637069922308547e-07, "loss": 4.5984, "step": 17 }, { "epoch": 0.005996502040476389, "grad_norm": 3.515625, "learning_rate": 2.830188679245283e-07, "loss": 4.5901, "step": 18 }, { "epoch": 0.006329641042725077, "grad_norm": 3.5, "learning_rate": 2.996670366259712e-07, "loss": 4.5709, "step": 19 }, { "epoch": 0.006662780044973765, "grad_norm": 3.546875, "learning_rate": 3.16315205327414e-07, "loss": 4.5739, "step": 20 }, { "epoch": 0.006995919047222454, "grad_norm": 3.6875, "learning_rate": 3.329633740288568e-07, "loss": 4.6062, "step": 21 }, { "epoch": 0.007329058049471142, "grad_norm": 3.890625, "learning_rate": 3.4961154273029963e-07, "loss": 4.6463, "step": 22 }, { "epoch": 0.00766219705171983, "grad_norm": 3.921875, "learning_rate": 3.6625971143174255e-07, "loss": 4.668, "step": 23 }, { "epoch": 0.007995336053968518, "grad_norm": 3.203125, "learning_rate": 3.8290788013318536e-07, "loss": 4.5462, "step": 24 }, { "epoch": 0.008328475056217206, "grad_norm": 3.609375, "learning_rate": 3.9955604883462817e-07, "loss": 4.6369, "step": 25 }, { "epoch": 0.008661614058465895, "grad_norm": 3.796875, "learning_rate": 4.1620421753607104e-07, "loss": 4.6612, "step": 26 }, { "epoch": 0.008994753060714584, "grad_norm": 3.734375, "learning_rate": 4.328523862375139e-07, "loss": 4.5768, "step": 27 }, { "epoch": 0.00932789206296327, "grad_norm": 3.4375, "learning_rate": 4.495005549389567e-07, "loss": 4.609, "step": 28 }, { "epoch": 0.00966103106521196, "grad_norm": 3.515625, "learning_rate": 4.661487236403996e-07, "loss": 4.5621, "step": 29 }, { "epoch": 0.009994170067460648, "grad_norm": 3.703125, "learning_rate": 4.827968923418424e-07, "loss": 4.6267, "step": 30 }, { "epoch": 0.010327309069709337, "grad_norm": 3.578125, "learning_rate": 4.994450610432853e-07, "loss": 4.5638, "step": 31 }, { "epoch": 0.010660448071958024, "grad_norm": 3.171875, "learning_rate": 5.160932297447281e-07, "loss": 4.4998, "step": 32 }, { "epoch": 0.010993587074206713, "grad_norm": 3.28125, "learning_rate": 5.327413984461709e-07, "loss": 4.5656, "step": 33 }, { "epoch": 0.011326726076455402, "grad_norm": 3.59375, "learning_rate": 5.493895671476137e-07, "loss": 4.6295, "step": 34 }, { "epoch": 0.011659865078704089, "grad_norm": 3.1875, "learning_rate": 5.660377358490566e-07, "loss": 4.5675, "step": 35 }, { "epoch": 0.011993004080952777, "grad_norm": 3.3125, "learning_rate": 5.826859045504995e-07, "loss": 4.5796, "step": 36 }, { "epoch": 0.012326143083201466, "grad_norm": 3.4375, "learning_rate": 5.993340732519424e-07, "loss": 4.5581, "step": 37 }, { "epoch": 0.012659282085450155, "grad_norm": 3.609375, "learning_rate": 6.159822419533852e-07, "loss": 4.6121, "step": 38 }, { "epoch": 0.012992421087698842, "grad_norm": 3.25, "learning_rate": 6.32630410654828e-07, "loss": 4.5035, "step": 39 }, { "epoch": 0.01332556008994753, "grad_norm": 3.828125, "learning_rate": 6.492785793562708e-07, "loss": 4.6283, "step": 40 }, { "epoch": 0.01365869909219622, "grad_norm": 3.125, "learning_rate": 6.659267480577136e-07, "loss": 4.499, "step": 41 }, { "epoch": 0.013991838094444908, "grad_norm": 3.40625, "learning_rate": 6.825749167591565e-07, "loss": 4.6149, "step": 42 }, { "epoch": 0.014324977096693595, "grad_norm": 3.78125, "learning_rate": 6.992230854605993e-07, "loss": 4.6687, "step": 43 }, { "epoch": 0.014658116098942284, "grad_norm": 3.84375, "learning_rate": 7.158712541620423e-07, "loss": 4.6319, "step": 44 }, { "epoch": 0.014991255101190972, "grad_norm": 3.5625, "learning_rate": 7.325194228634851e-07, "loss": 4.6135, "step": 45 }, { "epoch": 0.01532439410343966, "grad_norm": 3.8125, "learning_rate": 7.491675915649279e-07, "loss": 4.6109, "step": 46 }, { "epoch": 0.015657533105688348, "grad_norm": 3.453125, "learning_rate": 7.658157602663707e-07, "loss": 4.5668, "step": 47 }, { "epoch": 0.015990672107937035, "grad_norm": 3.609375, "learning_rate": 7.824639289678135e-07, "loss": 4.6447, "step": 48 }, { "epoch": 0.016323811110185726, "grad_norm": 3.1875, "learning_rate": 7.991120976692563e-07, "loss": 4.5511, "step": 49 }, { "epoch": 0.016656950112434413, "grad_norm": 3.640625, "learning_rate": 8.157602663706993e-07, "loss": 4.6341, "step": 50 }, { "epoch": 0.016990089114683103, "grad_norm": 3.515625, "learning_rate": 8.324084350721421e-07, "loss": 4.6087, "step": 51 }, { "epoch": 0.01732322811693179, "grad_norm": 3.40625, "learning_rate": 8.49056603773585e-07, "loss": 4.5658, "step": 52 }, { "epoch": 0.017656367119180477, "grad_norm": 3.578125, "learning_rate": 8.657047724750278e-07, "loss": 4.6102, "step": 53 }, { "epoch": 0.017989506121429168, "grad_norm": 3.53125, "learning_rate": 8.823529411764706e-07, "loss": 4.5918, "step": 54 }, { "epoch": 0.018322645123677855, "grad_norm": 3.546875, "learning_rate": 8.990011098779134e-07, "loss": 4.5924, "step": 55 }, { "epoch": 0.01865578412592654, "grad_norm": 3.484375, "learning_rate": 9.156492785793562e-07, "loss": 4.6195, "step": 56 }, { "epoch": 0.018988923128175232, "grad_norm": 3.75, "learning_rate": 9.322974472807992e-07, "loss": 4.6216, "step": 57 }, { "epoch": 0.01932206213042392, "grad_norm": 3.421875, "learning_rate": 9.489456159822419e-07, "loss": 4.5662, "step": 58 }, { "epoch": 0.019655201132672606, "grad_norm": 3.40625, "learning_rate": 9.655937846836848e-07, "loss": 4.5889, "step": 59 }, { "epoch": 0.019988340134921297, "grad_norm": 3.265625, "learning_rate": 9.822419533851278e-07, "loss": 4.6052, "step": 60 }, { "epoch": 0.020321479137169984, "grad_norm": 3.921875, "learning_rate": 9.988901220865706e-07, "loss": 4.6308, "step": 61 }, { "epoch": 0.020654618139418674, "grad_norm": 3.859375, "learning_rate": 1.0155382907880134e-06, "loss": 4.643, "step": 62 }, { "epoch": 0.02098775714166736, "grad_norm": 3.796875, "learning_rate": 1.0321864594894562e-06, "loss": 4.6407, "step": 63 }, { "epoch": 0.021320896143916048, "grad_norm": 3.5, "learning_rate": 1.048834628190899e-06, "loss": 4.6135, "step": 64 }, { "epoch": 0.02165403514616474, "grad_norm": 3.640625, "learning_rate": 1.0654827968923419e-06, "loss": 4.6086, "step": 65 }, { "epoch": 0.021987174148413426, "grad_norm": 3.296875, "learning_rate": 1.0821309655937847e-06, "loss": 4.589, "step": 66 }, { "epoch": 0.022320313150662113, "grad_norm": 3.390625, "learning_rate": 1.0987791342952275e-06, "loss": 4.5962, "step": 67 }, { "epoch": 0.022653452152910803, "grad_norm": 3.140625, "learning_rate": 1.1154273029966703e-06, "loss": 4.5433, "step": 68 }, { "epoch": 0.02298659115515949, "grad_norm": 3.625, "learning_rate": 1.1320754716981131e-06, "loss": 4.5927, "step": 69 }, { "epoch": 0.023319730157408177, "grad_norm": 3.28125, "learning_rate": 1.148723640399556e-06, "loss": 4.5541, "step": 70 }, { "epoch": 0.023652869159656868, "grad_norm": 3.625, "learning_rate": 1.165371809100999e-06, "loss": 4.6167, "step": 71 }, { "epoch": 0.023986008161905555, "grad_norm": 3.296875, "learning_rate": 1.1820199778024418e-06, "loss": 4.5702, "step": 72 }, { "epoch": 0.024319147164154245, "grad_norm": 3.546875, "learning_rate": 1.1986681465038848e-06, "loss": 4.6017, "step": 73 }, { "epoch": 0.024652286166402932, "grad_norm": 3.4375, "learning_rate": 1.2153163152053276e-06, "loss": 4.5756, "step": 74 }, { "epoch": 0.02498542516865162, "grad_norm": 3.515625, "learning_rate": 1.2319644839067704e-06, "loss": 4.6517, "step": 75 }, { "epoch": 0.02531856417090031, "grad_norm": 3.40625, "learning_rate": 1.2486126526082132e-06, "loss": 4.5742, "step": 76 }, { "epoch": 0.025651703173148997, "grad_norm": 3.53125, "learning_rate": 1.265260821309656e-06, "loss": 4.6328, "step": 77 }, { "epoch": 0.025984842175397684, "grad_norm": 3.484375, "learning_rate": 1.2819089900110988e-06, "loss": 4.6183, "step": 78 }, { "epoch": 0.026317981177646374, "grad_norm": 3.5625, "learning_rate": 1.2985571587125417e-06, "loss": 4.6041, "step": 79 }, { "epoch": 0.02665112017989506, "grad_norm": 3.390625, "learning_rate": 1.3152053274139845e-06, "loss": 4.5881, "step": 80 }, { "epoch": 0.026984259182143748, "grad_norm": 3.625, "learning_rate": 1.3318534961154273e-06, "loss": 4.5878, "step": 81 }, { "epoch": 0.02731739818439244, "grad_norm": 3.40625, "learning_rate": 1.34850166481687e-06, "loss": 4.5924, "step": 82 }, { "epoch": 0.027650537186641126, "grad_norm": 3.578125, "learning_rate": 1.365149833518313e-06, "loss": 4.6446, "step": 83 }, { "epoch": 0.027983676188889816, "grad_norm": 3.484375, "learning_rate": 1.3817980022197557e-06, "loss": 4.5876, "step": 84 }, { "epoch": 0.028316815191138503, "grad_norm": 3.15625, "learning_rate": 1.3984461709211985e-06, "loss": 4.577, "step": 85 }, { "epoch": 0.02864995419338719, "grad_norm": 3.5, "learning_rate": 1.4150943396226415e-06, "loss": 4.5488, "step": 86 }, { "epoch": 0.02898309319563588, "grad_norm": 3.4375, "learning_rate": 1.4317425083240846e-06, "loss": 4.5988, "step": 87 }, { "epoch": 0.029316232197884567, "grad_norm": 3.0625, "learning_rate": 1.4483906770255274e-06, "loss": 4.5409, "step": 88 }, { "epoch": 0.029649371200133254, "grad_norm": 3.484375, "learning_rate": 1.4650388457269702e-06, "loss": 4.5992, "step": 89 }, { "epoch": 0.029982510202381945, "grad_norm": 3.734375, "learning_rate": 1.481687014428413e-06, "loss": 4.6404, "step": 90 }, { "epoch": 0.030315649204630632, "grad_norm": 3.78125, "learning_rate": 1.4983351831298558e-06, "loss": 4.6248, "step": 91 }, { "epoch": 0.03064878820687932, "grad_norm": 3.0, "learning_rate": 1.5149833518312986e-06, "loss": 4.4543, "step": 92 }, { "epoch": 0.03098192720912801, "grad_norm": 3.359375, "learning_rate": 1.5316315205327414e-06, "loss": 4.5336, "step": 93 }, { "epoch": 0.031315066211376696, "grad_norm": 3.1875, "learning_rate": 1.5482796892341843e-06, "loss": 4.5782, "step": 94 }, { "epoch": 0.03164820521362539, "grad_norm": 3.65625, "learning_rate": 1.564927857935627e-06, "loss": 4.6116, "step": 95 }, { "epoch": 0.03198134421587407, "grad_norm": 3.0, "learning_rate": 1.5815760266370699e-06, "loss": 4.5363, "step": 96 }, { "epoch": 0.03231448321812276, "grad_norm": 3.46875, "learning_rate": 1.5982241953385127e-06, "loss": 4.6024, "step": 97 }, { "epoch": 0.03264762222037145, "grad_norm": 3.21875, "learning_rate": 1.6148723640399555e-06, "loss": 4.5431, "step": 98 }, { "epoch": 0.032980761222620135, "grad_norm": 3.515625, "learning_rate": 1.6315205327413985e-06, "loss": 4.6089, "step": 99 }, { "epoch": 0.033313900224868825, "grad_norm": 3.3125, "learning_rate": 1.6481687014428413e-06, "loss": 4.624, "step": 100 }, { "epoch": 0.033647039227117516, "grad_norm": 3.71875, "learning_rate": 1.6648168701442842e-06, "loss": 4.6291, "step": 101 }, { "epoch": 0.033980178229366206, "grad_norm": 3.453125, "learning_rate": 1.681465038845727e-06, "loss": 4.6392, "step": 102 }, { "epoch": 0.03431331723161489, "grad_norm": 3.125, "learning_rate": 1.69811320754717e-06, "loss": 4.5399, "step": 103 }, { "epoch": 0.03464645623386358, "grad_norm": 3.078125, "learning_rate": 1.7147613762486128e-06, "loss": 4.5428, "step": 104 }, { "epoch": 0.03497959523611227, "grad_norm": 3.328125, "learning_rate": 1.7314095449500556e-06, "loss": 4.5579, "step": 105 }, { "epoch": 0.035312734238360954, "grad_norm": 3.3125, "learning_rate": 1.7480577136514984e-06, "loss": 4.57, "step": 106 }, { "epoch": 0.035645873240609645, "grad_norm": 3.203125, "learning_rate": 1.7647058823529412e-06, "loss": 4.5252, "step": 107 }, { "epoch": 0.035979012242858335, "grad_norm": 3.203125, "learning_rate": 1.781354051054384e-06, "loss": 4.5246, "step": 108 }, { "epoch": 0.03631215124510702, "grad_norm": 3.40625, "learning_rate": 1.7980022197558269e-06, "loss": 4.6314, "step": 109 }, { "epoch": 0.03664529024735571, "grad_norm": 3.171875, "learning_rate": 1.8146503884572697e-06, "loss": 4.598, "step": 110 }, { "epoch": 0.0369784292496044, "grad_norm": 3.375, "learning_rate": 1.8312985571587125e-06, "loss": 4.562, "step": 111 }, { "epoch": 0.03731156825185308, "grad_norm": 3.140625, "learning_rate": 1.8479467258601555e-06, "loss": 4.5918, "step": 112 }, { "epoch": 0.037644707254101774, "grad_norm": 3.1875, "learning_rate": 1.8645948945615983e-06, "loss": 4.5492, "step": 113 }, { "epoch": 0.037977846256350464, "grad_norm": 2.921875, "learning_rate": 1.8812430632630411e-06, "loss": 4.5311, "step": 114 }, { "epoch": 0.03831098525859915, "grad_norm": 3.21875, "learning_rate": 1.8978912319644837e-06, "loss": 4.6063, "step": 115 }, { "epoch": 0.03864412426084784, "grad_norm": 3.515625, "learning_rate": 1.914539400665927e-06, "loss": 4.6008, "step": 116 }, { "epoch": 0.03897726326309653, "grad_norm": 3.15625, "learning_rate": 1.9311875693673696e-06, "loss": 4.5904, "step": 117 }, { "epoch": 0.03931040226534521, "grad_norm": 3.328125, "learning_rate": 1.9478357380688126e-06, "loss": 4.5772, "step": 118 }, { "epoch": 0.0396435412675939, "grad_norm": 3.265625, "learning_rate": 1.9644839067702556e-06, "loss": 4.624, "step": 119 }, { "epoch": 0.03997668026984259, "grad_norm": 3.328125, "learning_rate": 1.981132075471698e-06, "loss": 4.5803, "step": 120 }, { "epoch": 0.04030981927209128, "grad_norm": 3.078125, "learning_rate": 1.9977802441731412e-06, "loss": 4.5815, "step": 121 }, { "epoch": 0.04064295827433997, "grad_norm": 3.34375, "learning_rate": 2.014428412874584e-06, "loss": 4.5763, "step": 122 }, { "epoch": 0.04097609727658866, "grad_norm": 3.3125, "learning_rate": 2.031076581576027e-06, "loss": 4.5875, "step": 123 }, { "epoch": 0.04130923627883735, "grad_norm": 2.96875, "learning_rate": 2.0477247502774695e-06, "loss": 4.5794, "step": 124 }, { "epoch": 0.04164237528108603, "grad_norm": 3.171875, "learning_rate": 2.0643729189789125e-06, "loss": 4.6124, "step": 125 }, { "epoch": 0.04197551428333472, "grad_norm": 3.171875, "learning_rate": 2.081021087680355e-06, "loss": 4.538, "step": 126 }, { "epoch": 0.04230865328558341, "grad_norm": 3.390625, "learning_rate": 2.097669256381798e-06, "loss": 4.6125, "step": 127 }, { "epoch": 0.042641792287832096, "grad_norm": 3.25, "learning_rate": 2.1143174250832407e-06, "loss": 4.5706, "step": 128 }, { "epoch": 0.04297493129008079, "grad_norm": 2.984375, "learning_rate": 2.1309655937846837e-06, "loss": 4.5273, "step": 129 }, { "epoch": 0.04330807029232948, "grad_norm": 3.1875, "learning_rate": 2.1476137624861263e-06, "loss": 4.5415, "step": 130 }, { "epoch": 0.04364120929457816, "grad_norm": 2.984375, "learning_rate": 2.1642619311875694e-06, "loss": 4.5285, "step": 131 }, { "epoch": 0.04397434829682685, "grad_norm": 3.21875, "learning_rate": 2.1809100998890124e-06, "loss": 4.5642, "step": 132 }, { "epoch": 0.04430748729907554, "grad_norm": 2.96875, "learning_rate": 2.197558268590455e-06, "loss": 4.5586, "step": 133 }, { "epoch": 0.044640626301324225, "grad_norm": 3.03125, "learning_rate": 2.214206437291898e-06, "loss": 4.5767, "step": 134 }, { "epoch": 0.044973765303572916, "grad_norm": 2.859375, "learning_rate": 2.2308546059933406e-06, "loss": 4.5467, "step": 135 }, { "epoch": 0.045306904305821606, "grad_norm": 3.328125, "learning_rate": 2.2475027746947836e-06, "loss": 4.6296, "step": 136 }, { "epoch": 0.04564004330807029, "grad_norm": 3.375, "learning_rate": 2.2641509433962262e-06, "loss": 4.6068, "step": 137 }, { "epoch": 0.04597318231031898, "grad_norm": 3.046875, "learning_rate": 2.2807991120976692e-06, "loss": 4.5471, "step": 138 }, { "epoch": 0.04630632131256767, "grad_norm": 2.84375, "learning_rate": 2.297447280799112e-06, "loss": 4.5494, "step": 139 }, { "epoch": 0.046639460314816354, "grad_norm": 2.765625, "learning_rate": 2.314095449500555e-06, "loss": 4.5315, "step": 140 }, { "epoch": 0.046972599317065045, "grad_norm": 3.125, "learning_rate": 2.330743618201998e-06, "loss": 4.5672, "step": 141 }, { "epoch": 0.047305738319313735, "grad_norm": 3.0625, "learning_rate": 2.347391786903441e-06, "loss": 4.5668, "step": 142 }, { "epoch": 0.04763887732156242, "grad_norm": 3.09375, "learning_rate": 2.3640399556048835e-06, "loss": 4.6051, "step": 143 }, { "epoch": 0.04797201632381111, "grad_norm": 3.171875, "learning_rate": 2.3806881243063265e-06, "loss": 4.5727, "step": 144 }, { "epoch": 0.0483051553260598, "grad_norm": 3.078125, "learning_rate": 2.3973362930077696e-06, "loss": 4.4996, "step": 145 }, { "epoch": 0.04863829432830849, "grad_norm": 2.984375, "learning_rate": 2.413984461709212e-06, "loss": 4.572, "step": 146 }, { "epoch": 0.048971433330557174, "grad_norm": 2.96875, "learning_rate": 2.430632630410655e-06, "loss": 4.5535, "step": 147 }, { "epoch": 0.049304572332805864, "grad_norm": 2.984375, "learning_rate": 2.447280799112098e-06, "loss": 4.5441, "step": 148 }, { "epoch": 0.049637711335054555, "grad_norm": 2.828125, "learning_rate": 2.463928967813541e-06, "loss": 4.5152, "step": 149 }, { "epoch": 0.04997085033730324, "grad_norm": 2.65625, "learning_rate": 2.4805771365149834e-06, "loss": 4.5401, "step": 150 }, { "epoch": 0.05030398933955193, "grad_norm": 2.5, "learning_rate": 2.4972253052164264e-06, "loss": 4.5328, "step": 151 }, { "epoch": 0.05063712834180062, "grad_norm": 2.65625, "learning_rate": 2.513873473917869e-06, "loss": 4.5302, "step": 152 }, { "epoch": 0.0509702673440493, "grad_norm": 2.953125, "learning_rate": 2.530521642619312e-06, "loss": 4.5393, "step": 153 }, { "epoch": 0.05130340634629799, "grad_norm": 2.875, "learning_rate": 2.5471698113207547e-06, "loss": 4.5804, "step": 154 }, { "epoch": 0.051636545348546684, "grad_norm": 2.84375, "learning_rate": 2.5638179800221977e-06, "loss": 4.4796, "step": 155 }, { "epoch": 0.05196968435079537, "grad_norm": 2.90625, "learning_rate": 2.5804661487236403e-06, "loss": 4.5478, "step": 156 }, { "epoch": 0.05230282335304406, "grad_norm": 2.984375, "learning_rate": 2.5971143174250833e-06, "loss": 4.5555, "step": 157 }, { "epoch": 0.05263596235529275, "grad_norm": 2.921875, "learning_rate": 2.6137624861265263e-06, "loss": 4.526, "step": 158 }, { "epoch": 0.05296910135754143, "grad_norm": 3.140625, "learning_rate": 2.630410654827969e-06, "loss": 4.5869, "step": 159 }, { "epoch": 0.05330224035979012, "grad_norm": 2.890625, "learning_rate": 2.647058823529412e-06, "loss": 4.5903, "step": 160 }, { "epoch": 0.05363537936203881, "grad_norm": 2.875, "learning_rate": 2.6637069922308546e-06, "loss": 4.5722, "step": 161 }, { "epoch": 0.053968518364287496, "grad_norm": 2.84375, "learning_rate": 2.6803551609322976e-06, "loss": 4.5642, "step": 162 }, { "epoch": 0.054301657366536187, "grad_norm": 3.15625, "learning_rate": 2.69700332963374e-06, "loss": 4.631, "step": 163 }, { "epoch": 0.05463479636878488, "grad_norm": 2.71875, "learning_rate": 2.713651498335183e-06, "loss": 4.5555, "step": 164 }, { "epoch": 0.05496793537103356, "grad_norm": 2.421875, "learning_rate": 2.730299667036626e-06, "loss": 4.5418, "step": 165 }, { "epoch": 0.05530107437328225, "grad_norm": 2.5625, "learning_rate": 2.746947835738069e-06, "loss": 4.523, "step": 166 }, { "epoch": 0.05563421337553094, "grad_norm": 2.625, "learning_rate": 2.7635960044395114e-06, "loss": 4.5253, "step": 167 }, { "epoch": 0.05596735237777963, "grad_norm": 2.78125, "learning_rate": 2.7802441731409545e-06, "loss": 4.5662, "step": 168 }, { "epoch": 0.056300491380028315, "grad_norm": 2.453125, "learning_rate": 2.796892341842397e-06, "loss": 4.4993, "step": 169 }, { "epoch": 0.056633630382277006, "grad_norm": 2.8125, "learning_rate": 2.81354051054384e-06, "loss": 4.5708, "step": 170 }, { "epoch": 0.056966769384525696, "grad_norm": 2.609375, "learning_rate": 2.830188679245283e-06, "loss": 4.5596, "step": 171 }, { "epoch": 0.05729990838677438, "grad_norm": 2.484375, "learning_rate": 2.846836847946726e-06, "loss": 4.5274, "step": 172 }, { "epoch": 0.05763304738902307, "grad_norm": 2.46875, "learning_rate": 2.863485016648169e-06, "loss": 4.5139, "step": 173 }, { "epoch": 0.05796618639127176, "grad_norm": 2.515625, "learning_rate": 2.8801331853496117e-06, "loss": 4.4979, "step": 174 }, { "epoch": 0.058299325393520444, "grad_norm": 2.5625, "learning_rate": 2.8967813540510548e-06, "loss": 4.5038, "step": 175 }, { "epoch": 0.058632464395769135, "grad_norm": 2.453125, "learning_rate": 2.9134295227524974e-06, "loss": 4.5297, "step": 176 }, { "epoch": 0.058965603398017825, "grad_norm": 2.46875, "learning_rate": 2.9300776914539404e-06, "loss": 4.5524, "step": 177 }, { "epoch": 0.05929874240026651, "grad_norm": 2.640625, "learning_rate": 2.946725860155383e-06, "loss": 4.5556, "step": 178 }, { "epoch": 0.0596318814025152, "grad_norm": 2.4375, "learning_rate": 2.963374028856826e-06, "loss": 4.4693, "step": 179 }, { "epoch": 0.05996502040476389, "grad_norm": 2.328125, "learning_rate": 2.9800221975582686e-06, "loss": 4.5125, "step": 180 }, { "epoch": 0.06029815940701257, "grad_norm": 2.390625, "learning_rate": 2.9966703662597116e-06, "loss": 4.481, "step": 181 }, { "epoch": 0.060631298409261264, "grad_norm": 2.46875, "learning_rate": 3.0133185349611542e-06, "loss": 4.4706, "step": 182 }, { "epoch": 0.060964437411509954, "grad_norm": 2.453125, "learning_rate": 3.0299667036625973e-06, "loss": 4.5165, "step": 183 }, { "epoch": 0.06129757641375864, "grad_norm": 2.53125, "learning_rate": 3.0466148723640403e-06, "loss": 4.5224, "step": 184 }, { "epoch": 0.06163071541600733, "grad_norm": 2.4375, "learning_rate": 3.063263041065483e-06, "loss": 4.505, "step": 185 }, { "epoch": 0.06196385441825602, "grad_norm": 2.5, "learning_rate": 3.079911209766926e-06, "loss": 4.5555, "step": 186 }, { "epoch": 0.0622969934205047, "grad_norm": 2.421875, "learning_rate": 3.0965593784683685e-06, "loss": 4.5328, "step": 187 }, { "epoch": 0.06263013242275339, "grad_norm": 2.484375, "learning_rate": 3.1132075471698115e-06, "loss": 4.5751, "step": 188 }, { "epoch": 0.06296327142500208, "grad_norm": 2.15625, "learning_rate": 3.129855715871254e-06, "loss": 4.4598, "step": 189 }, { "epoch": 0.06329641042725077, "grad_norm": 2.296875, "learning_rate": 3.146503884572697e-06, "loss": 4.4877, "step": 190 }, { "epoch": 0.06362954942949946, "grad_norm": 2.28125, "learning_rate": 3.1631520532741398e-06, "loss": 4.5359, "step": 191 }, { "epoch": 0.06396268843174814, "grad_norm": 2.453125, "learning_rate": 3.1798002219755828e-06, "loss": 4.5484, "step": 192 }, { "epoch": 0.06429582743399684, "grad_norm": 2.296875, "learning_rate": 3.1964483906770254e-06, "loss": 4.5013, "step": 193 }, { "epoch": 0.06462896643624552, "grad_norm": 2.09375, "learning_rate": 3.2130965593784684e-06, "loss": 4.4485, "step": 194 }, { "epoch": 0.0649621054384942, "grad_norm": 2.171875, "learning_rate": 3.229744728079911e-06, "loss": 4.4665, "step": 195 }, { "epoch": 0.0652952444407429, "grad_norm": 2.09375, "learning_rate": 3.246392896781354e-06, "loss": 4.5124, "step": 196 }, { "epoch": 0.06562838344299159, "grad_norm": 2.46875, "learning_rate": 3.263041065482797e-06, "loss": 4.5488, "step": 197 }, { "epoch": 0.06596152244524027, "grad_norm": 2.171875, "learning_rate": 3.2796892341842397e-06, "loss": 4.4755, "step": 198 }, { "epoch": 0.06629466144748897, "grad_norm": 2.203125, "learning_rate": 3.2963374028856827e-06, "loss": 4.5116, "step": 199 }, { "epoch": 0.06662780044973765, "grad_norm": 2.3125, "learning_rate": 3.3129855715871253e-06, "loss": 4.5005, "step": 200 }, { "epoch": 0.06696093945198635, "grad_norm": 2.359375, "learning_rate": 3.3296337402885683e-06, "loss": 4.5216, "step": 201 }, { "epoch": 0.06729407845423503, "grad_norm": 2.1875, "learning_rate": 3.346281908990011e-06, "loss": 4.5322, "step": 202 }, { "epoch": 0.06762721745648372, "grad_norm": 2.046875, "learning_rate": 3.362930077691454e-06, "loss": 4.4826, "step": 203 }, { "epoch": 0.06796035645873241, "grad_norm": 2.15625, "learning_rate": 3.379578246392897e-06, "loss": 4.4809, "step": 204 }, { "epoch": 0.0682934954609811, "grad_norm": 2.03125, "learning_rate": 3.39622641509434e-06, "loss": 4.4458, "step": 205 }, { "epoch": 0.06862663446322978, "grad_norm": 2.1875, "learning_rate": 3.4128745837957826e-06, "loss": 4.5055, "step": 206 }, { "epoch": 0.06895977346547848, "grad_norm": 1.921875, "learning_rate": 3.4295227524972256e-06, "loss": 4.4985, "step": 207 }, { "epoch": 0.06929291246772716, "grad_norm": 2.1875, "learning_rate": 3.446170921198668e-06, "loss": 4.4973, "step": 208 }, { "epoch": 0.06962605146997584, "grad_norm": 1.9453125, "learning_rate": 3.4628190899001112e-06, "loss": 4.5216, "step": 209 }, { "epoch": 0.06995919047222454, "grad_norm": 2.03125, "learning_rate": 3.4794672586015542e-06, "loss": 4.5123, "step": 210 }, { "epoch": 0.07029232947447323, "grad_norm": 1.9453125, "learning_rate": 3.496115427302997e-06, "loss": 4.482, "step": 211 }, { "epoch": 0.07062546847672191, "grad_norm": 1.8046875, "learning_rate": 3.51276359600444e-06, "loss": 4.4299, "step": 212 }, { "epoch": 0.0709586074789706, "grad_norm": 2.03125, "learning_rate": 3.5294117647058825e-06, "loss": 4.5146, "step": 213 }, { "epoch": 0.07129174648121929, "grad_norm": 1.7265625, "learning_rate": 3.5460599334073255e-06, "loss": 4.4577, "step": 214 }, { "epoch": 0.07162488548346797, "grad_norm": 1.8359375, "learning_rate": 3.562708102108768e-06, "loss": 4.4829, "step": 215 }, { "epoch": 0.07195802448571667, "grad_norm": 1.6796875, "learning_rate": 3.579356270810211e-06, "loss": 4.477, "step": 216 }, { "epoch": 0.07229116348796535, "grad_norm": 1.6171875, "learning_rate": 3.5960044395116537e-06, "loss": 4.4413, "step": 217 }, { "epoch": 0.07262430249021404, "grad_norm": 1.7734375, "learning_rate": 3.6126526082130967e-06, "loss": 4.5123, "step": 218 }, { "epoch": 0.07295744149246274, "grad_norm": 1.78125, "learning_rate": 3.6293007769145393e-06, "loss": 4.4721, "step": 219 }, { "epoch": 0.07329058049471142, "grad_norm": 1.5, "learning_rate": 3.6459489456159824e-06, "loss": 4.4681, "step": 220 }, { "epoch": 0.0736237194969601, "grad_norm": 1.515625, "learning_rate": 3.662597114317425e-06, "loss": 4.3836, "step": 221 }, { "epoch": 0.0739568584992088, "grad_norm": 1.46875, "learning_rate": 3.679245283018868e-06, "loss": 4.4924, "step": 222 }, { "epoch": 0.07428999750145748, "grad_norm": 1.4453125, "learning_rate": 3.695893451720311e-06, "loss": 4.4879, "step": 223 }, { "epoch": 0.07462313650370617, "grad_norm": 1.484375, "learning_rate": 3.7125416204217536e-06, "loss": 4.427, "step": 224 }, { "epoch": 0.07495627550595486, "grad_norm": 1.296875, "learning_rate": 3.7291897891231966e-06, "loss": 4.4476, "step": 225 }, { "epoch": 0.07528941450820355, "grad_norm": 1.328125, "learning_rate": 3.7458379578246392e-06, "loss": 4.492, "step": 226 }, { "epoch": 0.07562255351045223, "grad_norm": 1.203125, "learning_rate": 3.7624861265260823e-06, "loss": 4.47, "step": 227 }, { "epoch": 0.07595569251270093, "grad_norm": 1.203125, "learning_rate": 3.7791342952275253e-06, "loss": 4.3994, "step": 228 }, { "epoch": 0.07628883151494961, "grad_norm": 1.3046875, "learning_rate": 3.7957824639289675e-06, "loss": 4.4774, "step": 229 }, { "epoch": 0.0766219705171983, "grad_norm": 1.265625, "learning_rate": 3.8124306326304105e-06, "loss": 4.4852, "step": 230 }, { "epoch": 0.076955109519447, "grad_norm": 1.1484375, "learning_rate": 3.829078801331854e-06, "loss": 4.4871, "step": 231 }, { "epoch": 0.07728824852169568, "grad_norm": 1.1484375, "learning_rate": 3.8457269700332965e-06, "loss": 4.4622, "step": 232 }, { "epoch": 0.07762138752394436, "grad_norm": 1.0546875, "learning_rate": 3.862375138734739e-06, "loss": 4.4855, "step": 233 }, { "epoch": 0.07795452652619306, "grad_norm": 1.046875, "learning_rate": 3.879023307436182e-06, "loss": 4.4074, "step": 234 }, { "epoch": 0.07828766552844174, "grad_norm": 1.0234375, "learning_rate": 3.895671476137625e-06, "loss": 4.3602, "step": 235 }, { "epoch": 0.07862080453069042, "grad_norm": 1.015625, "learning_rate": 3.912319644839068e-06, "loss": 4.4271, "step": 236 }, { "epoch": 0.07895394353293912, "grad_norm": 0.9453125, "learning_rate": 3.928967813540511e-06, "loss": 4.4449, "step": 237 }, { "epoch": 0.0792870825351878, "grad_norm": 0.95703125, "learning_rate": 3.945615982241953e-06, "loss": 4.5088, "step": 238 }, { "epoch": 0.07962022153743649, "grad_norm": 0.9140625, "learning_rate": 3.962264150943396e-06, "loss": 4.4237, "step": 239 }, { "epoch": 0.07995336053968519, "grad_norm": 0.96875, "learning_rate": 3.978912319644839e-06, "loss": 4.469, "step": 240 }, { "epoch": 0.08028649954193387, "grad_norm": 0.859375, "learning_rate": 3.9955604883462825e-06, "loss": 4.4688, "step": 241 }, { "epoch": 0.08061963854418255, "grad_norm": 0.9921875, "learning_rate": 4.012208657047724e-06, "loss": 4.4179, "step": 242 }, { "epoch": 0.08095277754643125, "grad_norm": 0.94140625, "learning_rate": 4.028856825749168e-06, "loss": 4.4564, "step": 243 }, { "epoch": 0.08128591654867993, "grad_norm": 0.9140625, "learning_rate": 4.04550499445061e-06, "loss": 4.3748, "step": 244 }, { "epoch": 0.08161905555092862, "grad_norm": 0.92578125, "learning_rate": 4.062153163152054e-06, "loss": 4.4275, "step": 245 }, { "epoch": 0.08195219455317732, "grad_norm": 0.92578125, "learning_rate": 4.0788013318534955e-06, "loss": 4.4558, "step": 246 }, { "epoch": 0.082285333555426, "grad_norm": 0.859375, "learning_rate": 4.095449500554939e-06, "loss": 4.3504, "step": 247 }, { "epoch": 0.0826184725576747, "grad_norm": 0.8359375, "learning_rate": 4.1120976692563815e-06, "loss": 4.3627, "step": 248 }, { "epoch": 0.08295161155992338, "grad_norm": 0.8125, "learning_rate": 4.128745837957825e-06, "loss": 4.3588, "step": 249 }, { "epoch": 0.08328475056217206, "grad_norm": 0.8203125, "learning_rate": 4.1453940066592676e-06, "loss": 4.4611, "step": 250 }, { "epoch": 0.08361788956442076, "grad_norm": 0.7734375, "learning_rate": 4.16204217536071e-06, "loss": 4.4035, "step": 251 }, { "epoch": 0.08395102856666944, "grad_norm": 0.81640625, "learning_rate": 4.178690344062154e-06, "loss": 4.4088, "step": 252 }, { "epoch": 0.08428416756891813, "grad_norm": 0.8203125, "learning_rate": 4.195338512763596e-06, "loss": 4.4225, "step": 253 }, { "epoch": 0.08461730657116683, "grad_norm": 0.7890625, "learning_rate": 4.21198668146504e-06, "loss": 4.4267, "step": 254 }, { "epoch": 0.08495044557341551, "grad_norm": 0.81640625, "learning_rate": 4.228634850166481e-06, "loss": 4.3805, "step": 255 }, { "epoch": 0.08528358457566419, "grad_norm": 0.75, "learning_rate": 4.245283018867925e-06, "loss": 4.4107, "step": 256 }, { "epoch": 0.08561672357791289, "grad_norm": 0.77734375, "learning_rate": 4.2619311875693675e-06, "loss": 4.4316, "step": 257 }, { "epoch": 0.08594986258016157, "grad_norm": 0.78125, "learning_rate": 4.278579356270811e-06, "loss": 4.3948, "step": 258 }, { "epoch": 0.08628300158241026, "grad_norm": 0.76171875, "learning_rate": 4.295227524972253e-06, "loss": 4.4038, "step": 259 }, { "epoch": 0.08661614058465895, "grad_norm": 0.7578125, "learning_rate": 4.311875693673696e-06, "loss": 4.3819, "step": 260 }, { "epoch": 0.08694927958690764, "grad_norm": 0.74609375, "learning_rate": 4.328523862375139e-06, "loss": 4.4372, "step": 261 }, { "epoch": 0.08728241858915632, "grad_norm": 0.76171875, "learning_rate": 4.345172031076582e-06, "loss": 4.3749, "step": 262 }, { "epoch": 0.08761555759140502, "grad_norm": 0.7109375, "learning_rate": 4.361820199778025e-06, "loss": 4.3946, "step": 263 }, { "epoch": 0.0879486965936537, "grad_norm": 0.72265625, "learning_rate": 4.378468368479467e-06, "loss": 4.4464, "step": 264 }, { "epoch": 0.08828183559590239, "grad_norm": 0.75, "learning_rate": 4.39511653718091e-06, "loss": 4.3642, "step": 265 }, { "epoch": 0.08861497459815108, "grad_norm": 0.734375, "learning_rate": 4.411764705882353e-06, "loss": 4.4273, "step": 266 }, { "epoch": 0.08894811360039977, "grad_norm": 0.66796875, "learning_rate": 4.428412874583796e-06, "loss": 4.3867, "step": 267 }, { "epoch": 0.08928125260264845, "grad_norm": 0.73046875, "learning_rate": 4.445061043285239e-06, "loss": 4.4388, "step": 268 }, { "epoch": 0.08961439160489715, "grad_norm": 0.68359375, "learning_rate": 4.461709211986681e-06, "loss": 4.4179, "step": 269 }, { "epoch": 0.08994753060714583, "grad_norm": 0.64453125, "learning_rate": 4.478357380688125e-06, "loss": 4.3221, "step": 270 }, { "epoch": 0.09028066960939451, "grad_norm": 0.66796875, "learning_rate": 4.495005549389567e-06, "loss": 4.3842, "step": 271 }, { "epoch": 0.09061380861164321, "grad_norm": 0.66015625, "learning_rate": 4.51165371809101e-06, "loss": 4.3478, "step": 272 }, { "epoch": 0.0909469476138919, "grad_norm": 0.703125, "learning_rate": 4.5283018867924524e-06, "loss": 4.4957, "step": 273 }, { "epoch": 0.09128008661614058, "grad_norm": 0.62890625, "learning_rate": 4.544950055493896e-06, "loss": 4.3879, "step": 274 }, { "epoch": 0.09161322561838928, "grad_norm": 0.66796875, "learning_rate": 4.5615982241953385e-06, "loss": 4.3881, "step": 275 }, { "epoch": 0.09194636462063796, "grad_norm": 0.6328125, "learning_rate": 4.578246392896782e-06, "loss": 4.4328, "step": 276 }, { "epoch": 0.09227950362288664, "grad_norm": 0.6796875, "learning_rate": 4.594894561598224e-06, "loss": 4.3012, "step": 277 }, { "epoch": 0.09261264262513534, "grad_norm": 0.66796875, "learning_rate": 4.611542730299667e-06, "loss": 4.3339, "step": 278 }, { "epoch": 0.09294578162738402, "grad_norm": 0.671875, "learning_rate": 4.62819089900111e-06, "loss": 4.3669, "step": 279 }, { "epoch": 0.09327892062963271, "grad_norm": 0.640625, "learning_rate": 4.644839067702553e-06, "loss": 4.3207, "step": 280 }, { "epoch": 0.0936120596318814, "grad_norm": 0.63671875, "learning_rate": 4.661487236403996e-06, "loss": 4.3239, "step": 281 }, { "epoch": 0.09394519863413009, "grad_norm": 0.62890625, "learning_rate": 4.678135405105438e-06, "loss": 4.4349, "step": 282 }, { "epoch": 0.09427833763637877, "grad_norm": 0.6328125, "learning_rate": 4.694783573806882e-06, "loss": 4.3343, "step": 283 }, { "epoch": 0.09461147663862747, "grad_norm": 0.63671875, "learning_rate": 4.7114317425083244e-06, "loss": 4.3816, "step": 284 }, { "epoch": 0.09494461564087615, "grad_norm": 0.60546875, "learning_rate": 4.728079911209767e-06, "loss": 4.4511, "step": 285 }, { "epoch": 0.09527775464312484, "grad_norm": 0.65234375, "learning_rate": 4.74472807991121e-06, "loss": 4.4086, "step": 286 }, { "epoch": 0.09561089364537353, "grad_norm": 0.62890625, "learning_rate": 4.761376248612653e-06, "loss": 4.3877, "step": 287 }, { "epoch": 0.09594403264762222, "grad_norm": 0.62109375, "learning_rate": 4.778024417314096e-06, "loss": 4.4086, "step": 288 }, { "epoch": 0.0962771716498709, "grad_norm": 0.6171875, "learning_rate": 4.794672586015539e-06, "loss": 4.3441, "step": 289 }, { "epoch": 0.0966103106521196, "grad_norm": 0.58984375, "learning_rate": 4.811320754716981e-06, "loss": 4.4052, "step": 290 }, { "epoch": 0.09694344965436828, "grad_norm": 0.640625, "learning_rate": 4.827968923418424e-06, "loss": 4.4251, "step": 291 }, { "epoch": 0.09727658865661698, "grad_norm": 0.59375, "learning_rate": 4.844617092119867e-06, "loss": 4.396, "step": 292 }, { "epoch": 0.09760972765886566, "grad_norm": 0.6328125, "learning_rate": 4.86126526082131e-06, "loss": 4.3681, "step": 293 }, { "epoch": 0.09794286666111435, "grad_norm": 0.59765625, "learning_rate": 4.877913429522752e-06, "loss": 4.4412, "step": 294 }, { "epoch": 0.09827600566336304, "grad_norm": 0.60546875, "learning_rate": 4.894561598224196e-06, "loss": 4.4217, "step": 295 }, { "epoch": 0.09860914466561173, "grad_norm": 0.59375, "learning_rate": 4.911209766925638e-06, "loss": 4.4341, "step": 296 }, { "epoch": 0.09894228366786041, "grad_norm": 0.61328125, "learning_rate": 4.927857935627082e-06, "loss": 4.3777, "step": 297 }, { "epoch": 0.09927542267010911, "grad_norm": 0.6015625, "learning_rate": 4.944506104328523e-06, "loss": 4.4743, "step": 298 }, { "epoch": 0.09960856167235779, "grad_norm": 0.57421875, "learning_rate": 4.961154273029967e-06, "loss": 4.3363, "step": 299 }, { "epoch": 0.09994170067460648, "grad_norm": 0.6015625, "learning_rate": 4.9778024417314094e-06, "loss": 4.4027, "step": 300 }, { "epoch": 0.10027483967685517, "grad_norm": 0.609375, "learning_rate": 4.994450610432853e-06, "loss": 4.4022, "step": 301 }, { "epoch": 0.10060797867910386, "grad_norm": 0.58203125, "learning_rate": 5.0110987791342955e-06, "loss": 4.4228, "step": 302 }, { "epoch": 0.10094111768135254, "grad_norm": 0.6171875, "learning_rate": 5.027746947835738e-06, "loss": 4.4059, "step": 303 }, { "epoch": 0.10127425668360124, "grad_norm": 0.578125, "learning_rate": 5.044395116537181e-06, "loss": 4.3918, "step": 304 }, { "epoch": 0.10160739568584992, "grad_norm": 0.59375, "learning_rate": 5.061043285238624e-06, "loss": 4.4012, "step": 305 }, { "epoch": 0.1019405346880986, "grad_norm": 0.59375, "learning_rate": 5.077691453940067e-06, "loss": 4.4395, "step": 306 }, { "epoch": 0.1022736736903473, "grad_norm": 0.60546875, "learning_rate": 5.094339622641509e-06, "loss": 4.3832, "step": 307 }, { "epoch": 0.10260681269259599, "grad_norm": 0.5625, "learning_rate": 5.110987791342952e-06, "loss": 4.3791, "step": 308 }, { "epoch": 0.10293995169484467, "grad_norm": 0.55859375, "learning_rate": 5.127635960044395e-06, "loss": 4.4372, "step": 309 }, { "epoch": 0.10327309069709337, "grad_norm": 0.578125, "learning_rate": 5.144284128745838e-06, "loss": 4.3851, "step": 310 }, { "epoch": 0.10360622969934205, "grad_norm": 0.5703125, "learning_rate": 5.1609322974472806e-06, "loss": 4.3675, "step": 311 }, { "epoch": 0.10393936870159073, "grad_norm": 0.56640625, "learning_rate": 5.177580466148724e-06, "loss": 4.4399, "step": 312 }, { "epoch": 0.10427250770383943, "grad_norm": 0.56640625, "learning_rate": 5.194228634850167e-06, "loss": 4.4224, "step": 313 }, { "epoch": 0.10460564670608812, "grad_norm": 0.55859375, "learning_rate": 5.21087680355161e-06, "loss": 4.3635, "step": 314 }, { "epoch": 0.1049387857083368, "grad_norm": 0.609375, "learning_rate": 5.227524972253053e-06, "loss": 4.4088, "step": 315 }, { "epoch": 0.1052719247105855, "grad_norm": 0.5703125, "learning_rate": 5.244173140954495e-06, "loss": 4.4839, "step": 316 }, { "epoch": 0.10560506371283418, "grad_norm": 0.5390625, "learning_rate": 5.260821309655938e-06, "loss": 4.413, "step": 317 }, { "epoch": 0.10593820271508286, "grad_norm": 0.5546875, "learning_rate": 5.277469478357381e-06, "loss": 4.3671, "step": 318 }, { "epoch": 0.10627134171733156, "grad_norm": 0.52734375, "learning_rate": 5.294117647058824e-06, "loss": 4.3682, "step": 319 }, { "epoch": 0.10660448071958024, "grad_norm": 0.5390625, "learning_rate": 5.3107658157602665e-06, "loss": 4.3574, "step": 320 }, { "epoch": 0.10693761972182893, "grad_norm": 0.56640625, "learning_rate": 5.327413984461709e-06, "loss": 4.3687, "step": 321 }, { "epoch": 0.10727075872407763, "grad_norm": 0.54296875, "learning_rate": 5.3440621531631526e-06, "loss": 4.3224, "step": 322 }, { "epoch": 0.10760389772632631, "grad_norm": 0.52734375, "learning_rate": 5.360710321864595e-06, "loss": 4.3597, "step": 323 }, { "epoch": 0.10793703672857499, "grad_norm": 0.5234375, "learning_rate": 5.377358490566038e-06, "loss": 4.3597, "step": 324 }, { "epoch": 0.10827017573082369, "grad_norm": 0.52734375, "learning_rate": 5.39400665926748e-06, "loss": 4.3989, "step": 325 }, { "epoch": 0.10860331473307237, "grad_norm": 0.51953125, "learning_rate": 5.410654827968924e-06, "loss": 4.3244, "step": 326 }, { "epoch": 0.10893645373532106, "grad_norm": 0.53515625, "learning_rate": 5.427302996670366e-06, "loss": 4.3804, "step": 327 }, { "epoch": 0.10926959273756975, "grad_norm": 0.5546875, "learning_rate": 5.44395116537181e-06, "loss": 4.3372, "step": 328 }, { "epoch": 0.10960273173981844, "grad_norm": 0.5390625, "learning_rate": 5.460599334073252e-06, "loss": 4.3105, "step": 329 }, { "epoch": 0.10993587074206712, "grad_norm": 0.51171875, "learning_rate": 5.477247502774695e-06, "loss": 4.3768, "step": 330 }, { "epoch": 0.11026900974431582, "grad_norm": 0.51953125, "learning_rate": 5.493895671476138e-06, "loss": 4.4238, "step": 331 }, { "epoch": 0.1106021487465645, "grad_norm": 0.5390625, "learning_rate": 5.510543840177581e-06, "loss": 4.3851, "step": 332 }, { "epoch": 0.11093528774881319, "grad_norm": 0.52734375, "learning_rate": 5.527192008879023e-06, "loss": 4.3375, "step": 333 }, { "epoch": 0.11126842675106188, "grad_norm": 0.52734375, "learning_rate": 5.543840177580466e-06, "loss": 4.3829, "step": 334 }, { "epoch": 0.11160156575331057, "grad_norm": 0.5390625, "learning_rate": 5.560488346281909e-06, "loss": 4.4402, "step": 335 }, { "epoch": 0.11193470475555926, "grad_norm": 0.5234375, "learning_rate": 5.577136514983352e-06, "loss": 4.3321, "step": 336 }, { "epoch": 0.11226784375780795, "grad_norm": 0.54296875, "learning_rate": 5.593784683684794e-06, "loss": 4.3709, "step": 337 }, { "epoch": 0.11260098276005663, "grad_norm": 0.51171875, "learning_rate": 5.6104328523862375e-06, "loss": 4.4497, "step": 338 }, { "epoch": 0.11293412176230533, "grad_norm": 0.5234375, "learning_rate": 5.62708102108768e-06, "loss": 4.3112, "step": 339 }, { "epoch": 0.11326726076455401, "grad_norm": 0.53125, "learning_rate": 5.643729189789124e-06, "loss": 4.3598, "step": 340 }, { "epoch": 0.1136003997668027, "grad_norm": 0.53515625, "learning_rate": 5.660377358490566e-06, "loss": 4.374, "step": 341 }, { "epoch": 0.11393353876905139, "grad_norm": 0.53125, "learning_rate": 5.677025527192009e-06, "loss": 4.3962, "step": 342 }, { "epoch": 0.11426667777130008, "grad_norm": 0.51171875, "learning_rate": 5.693673695893452e-06, "loss": 4.3415, "step": 343 }, { "epoch": 0.11459981677354876, "grad_norm": 0.5234375, "learning_rate": 5.710321864594895e-06, "loss": 4.3803, "step": 344 }, { "epoch": 0.11493295577579746, "grad_norm": 0.5, "learning_rate": 5.726970033296338e-06, "loss": 4.3205, "step": 345 }, { "epoch": 0.11526609477804614, "grad_norm": 0.51171875, "learning_rate": 5.74361820199778e-06, "loss": 4.3359, "step": 346 }, { "epoch": 0.11559923378029482, "grad_norm": 0.546875, "learning_rate": 5.7602663706992235e-06, "loss": 4.401, "step": 347 }, { "epoch": 0.11593237278254352, "grad_norm": 0.51953125, "learning_rate": 5.776914539400666e-06, "loss": 4.4225, "step": 348 }, { "epoch": 0.1162655117847922, "grad_norm": 0.51171875, "learning_rate": 5.7935627081021095e-06, "loss": 4.365, "step": 349 }, { "epoch": 0.11659865078704089, "grad_norm": 0.515625, "learning_rate": 5.810210876803551e-06, "loss": 4.373, "step": 350 }, { "epoch": 0.11693178978928959, "grad_norm": 0.5078125, "learning_rate": 5.826859045504995e-06, "loss": 4.4226, "step": 351 }, { "epoch": 0.11726492879153827, "grad_norm": 0.50390625, "learning_rate": 5.843507214206437e-06, "loss": 4.3843, "step": 352 }, { "epoch": 0.11759806779378695, "grad_norm": 0.515625, "learning_rate": 5.860155382907881e-06, "loss": 4.414, "step": 353 }, { "epoch": 0.11793120679603565, "grad_norm": 0.52734375, "learning_rate": 5.876803551609323e-06, "loss": 4.3388, "step": 354 }, { "epoch": 0.11826434579828433, "grad_norm": 0.51953125, "learning_rate": 5.893451720310766e-06, "loss": 4.3819, "step": 355 }, { "epoch": 0.11859748480053302, "grad_norm": 0.52734375, "learning_rate": 5.910099889012209e-06, "loss": 4.3314, "step": 356 }, { "epoch": 0.11893062380278172, "grad_norm": 0.54296875, "learning_rate": 5.926748057713652e-06, "loss": 4.3576, "step": 357 }, { "epoch": 0.1192637628050304, "grad_norm": 0.53515625, "learning_rate": 5.943396226415095e-06, "loss": 4.338, "step": 358 }, { "epoch": 0.11959690180727908, "grad_norm": 0.51953125, "learning_rate": 5.960044395116537e-06, "loss": 4.3822, "step": 359 }, { "epoch": 0.11993004080952778, "grad_norm": 0.4921875, "learning_rate": 5.97669256381798e-06, "loss": 4.4149, "step": 360 }, { "epoch": 0.12026317981177646, "grad_norm": 0.51953125, "learning_rate": 5.993340732519423e-06, "loss": 4.3847, "step": 361 }, { "epoch": 0.12059631881402515, "grad_norm": 0.51171875, "learning_rate": 6.009988901220866e-06, "loss": 4.3314, "step": 362 }, { "epoch": 0.12092945781627384, "grad_norm": 0.5, "learning_rate": 6.0266370699223085e-06, "loss": 4.3912, "step": 363 }, { "epoch": 0.12126259681852253, "grad_norm": 0.50390625, "learning_rate": 6.043285238623751e-06, "loss": 4.3894, "step": 364 }, { "epoch": 0.12159573582077121, "grad_norm": 0.50390625, "learning_rate": 6.0599334073251945e-06, "loss": 4.3151, "step": 365 }, { "epoch": 0.12192887482301991, "grad_norm": 0.482421875, "learning_rate": 6.076581576026637e-06, "loss": 4.3513, "step": 366 }, { "epoch": 0.12226201382526859, "grad_norm": 0.51953125, "learning_rate": 6.0932297447280806e-06, "loss": 4.3069, "step": 367 }, { "epoch": 0.12259515282751728, "grad_norm": 0.51171875, "learning_rate": 6.109877913429522e-06, "loss": 4.4066, "step": 368 }, { "epoch": 0.12292829182976597, "grad_norm": 0.51953125, "learning_rate": 6.126526082130966e-06, "loss": 4.3958, "step": 369 }, { "epoch": 0.12326143083201466, "grad_norm": 0.52734375, "learning_rate": 6.143174250832408e-06, "loss": 4.3872, "step": 370 }, { "epoch": 0.12359456983426334, "grad_norm": 0.49609375, "learning_rate": 6.159822419533852e-06, "loss": 4.3558, "step": 371 }, { "epoch": 0.12392770883651204, "grad_norm": 0.490234375, "learning_rate": 6.176470588235294e-06, "loss": 4.3822, "step": 372 }, { "epoch": 0.12426084783876072, "grad_norm": 0.5078125, "learning_rate": 6.193118756936737e-06, "loss": 4.3543, "step": 373 }, { "epoch": 0.1245939868410094, "grad_norm": 0.51953125, "learning_rate": 6.2097669256381805e-06, "loss": 4.338, "step": 374 }, { "epoch": 0.1249271258432581, "grad_norm": 0.486328125, "learning_rate": 6.226415094339623e-06, "loss": 4.3607, "step": 375 }, { "epoch": 0.12526026484550679, "grad_norm": 0.5, "learning_rate": 6.243063263041066e-06, "loss": 4.3536, "step": 376 }, { "epoch": 0.12559340384775547, "grad_norm": 0.50390625, "learning_rate": 6.259711431742508e-06, "loss": 4.3698, "step": 377 }, { "epoch": 0.12592654285000415, "grad_norm": 0.5, "learning_rate": 6.276359600443952e-06, "loss": 4.3924, "step": 378 }, { "epoch": 0.12625968185225286, "grad_norm": 0.498046875, "learning_rate": 6.293007769145394e-06, "loss": 4.3569, "step": 379 }, { "epoch": 0.12659282085450155, "grad_norm": 0.486328125, "learning_rate": 6.309655937846838e-06, "loss": 4.3757, "step": 380 }, { "epoch": 0.12692595985675023, "grad_norm": 0.484375, "learning_rate": 6.3263041065482795e-06, "loss": 4.3607, "step": 381 }, { "epoch": 0.12725909885899891, "grad_norm": 0.51171875, "learning_rate": 6.342952275249723e-06, "loss": 4.3274, "step": 382 }, { "epoch": 0.1275922378612476, "grad_norm": 0.515625, "learning_rate": 6.3596004439511656e-06, "loss": 4.3267, "step": 383 }, { "epoch": 0.12792537686349628, "grad_norm": 0.51171875, "learning_rate": 6.376248612652609e-06, "loss": 4.3573, "step": 384 }, { "epoch": 0.128258515865745, "grad_norm": 0.4921875, "learning_rate": 6.392896781354051e-06, "loss": 4.3535, "step": 385 }, { "epoch": 0.12859165486799368, "grad_norm": 0.515625, "learning_rate": 6.409544950055494e-06, "loss": 4.3642, "step": 386 }, { "epoch": 0.12892479387024236, "grad_norm": 0.49609375, "learning_rate": 6.426193118756937e-06, "loss": 4.3742, "step": 387 }, { "epoch": 0.12925793287249104, "grad_norm": 0.51171875, "learning_rate": 6.44284128745838e-06, "loss": 4.3207, "step": 388 }, { "epoch": 0.12959107187473973, "grad_norm": 0.50390625, "learning_rate": 6.459489456159822e-06, "loss": 4.3606, "step": 389 }, { "epoch": 0.1299242108769884, "grad_norm": 0.490234375, "learning_rate": 6.4761376248612655e-06, "loss": 4.3854, "step": 390 }, { "epoch": 0.13025734987923712, "grad_norm": 0.48046875, "learning_rate": 6.492785793562708e-06, "loss": 4.4051, "step": 391 }, { "epoch": 0.1305904888814858, "grad_norm": 0.5078125, "learning_rate": 6.5094339622641515e-06, "loss": 4.41, "step": 392 }, { "epoch": 0.1309236278837345, "grad_norm": 0.5078125, "learning_rate": 6.526082130965594e-06, "loss": 4.3389, "step": 393 }, { "epoch": 0.13125676688598317, "grad_norm": 0.48046875, "learning_rate": 6.542730299667037e-06, "loss": 4.3397, "step": 394 }, { "epoch": 0.13158990588823186, "grad_norm": 0.51171875, "learning_rate": 6.559378468368479e-06, "loss": 4.3919, "step": 395 }, { "epoch": 0.13192304489048054, "grad_norm": 0.498046875, "learning_rate": 6.576026637069923e-06, "loss": 4.3369, "step": 396 }, { "epoch": 0.13225618389272925, "grad_norm": 0.490234375, "learning_rate": 6.592674805771365e-06, "loss": 4.3615, "step": 397 }, { "epoch": 0.13258932289497793, "grad_norm": 0.484375, "learning_rate": 6.609322974472808e-06, "loss": 4.3975, "step": 398 }, { "epoch": 0.13292246189722662, "grad_norm": 0.50390625, "learning_rate": 6.6259711431742506e-06, "loss": 4.3691, "step": 399 }, { "epoch": 0.1332556008994753, "grad_norm": 0.48828125, "learning_rate": 6.642619311875694e-06, "loss": 4.3621, "step": 400 }, { "epoch": 0.13358873990172399, "grad_norm": 0.482421875, "learning_rate": 6.659267480577137e-06, "loss": 4.4149, "step": 401 }, { "epoch": 0.1339218789039727, "grad_norm": 0.486328125, "learning_rate": 6.675915649278579e-06, "loss": 4.3448, "step": 402 }, { "epoch": 0.13425501790622138, "grad_norm": 0.4765625, "learning_rate": 6.692563817980022e-06, "loss": 4.3991, "step": 403 }, { "epoch": 0.13458815690847006, "grad_norm": 0.5, "learning_rate": 6.709211986681465e-06, "loss": 4.2902, "step": 404 }, { "epoch": 0.13492129591071875, "grad_norm": 0.486328125, "learning_rate": 6.725860155382908e-06, "loss": 4.3865, "step": 405 }, { "epoch": 0.13525443491296743, "grad_norm": 0.46875, "learning_rate": 6.742508324084351e-06, "loss": 4.3541, "step": 406 }, { "epoch": 0.13558757391521611, "grad_norm": 0.46484375, "learning_rate": 6.759156492785794e-06, "loss": 4.2847, "step": 407 }, { "epoch": 0.13592071291746483, "grad_norm": 0.470703125, "learning_rate": 6.7758046614872365e-06, "loss": 4.3582, "step": 408 }, { "epoch": 0.1362538519197135, "grad_norm": 0.486328125, "learning_rate": 6.79245283018868e-06, "loss": 4.3519, "step": 409 }, { "epoch": 0.1365869909219622, "grad_norm": 0.484375, "learning_rate": 6.8091009988901225e-06, "loss": 4.3447, "step": 410 }, { "epoch": 0.13692012992421088, "grad_norm": 0.49609375, "learning_rate": 6.825749167591565e-06, "loss": 4.3249, "step": 411 }, { "epoch": 0.13725326892645956, "grad_norm": 0.48046875, "learning_rate": 6.842397336293008e-06, "loss": 4.3842, "step": 412 }, { "epoch": 0.13758640792870824, "grad_norm": 0.515625, "learning_rate": 6.859045504994451e-06, "loss": 4.2749, "step": 413 }, { "epoch": 0.13791954693095695, "grad_norm": 0.48828125, "learning_rate": 6.875693673695894e-06, "loss": 4.3922, "step": 414 }, { "epoch": 0.13825268593320564, "grad_norm": 0.490234375, "learning_rate": 6.892341842397336e-06, "loss": 4.3126, "step": 415 }, { "epoch": 0.13858582493545432, "grad_norm": 0.486328125, "learning_rate": 6.908990011098779e-06, "loss": 4.396, "step": 416 }, { "epoch": 0.138918963937703, "grad_norm": 0.48828125, "learning_rate": 6.9256381798002224e-06, "loss": 4.3498, "step": 417 }, { "epoch": 0.1392521029399517, "grad_norm": 0.48828125, "learning_rate": 6.942286348501665e-06, "loss": 4.3672, "step": 418 }, { "epoch": 0.13958524194220037, "grad_norm": 0.48046875, "learning_rate": 6.9589345172031085e-06, "loss": 4.3576, "step": 419 }, { "epoch": 0.13991838094444908, "grad_norm": 0.4609375, "learning_rate": 6.97558268590455e-06, "loss": 4.3712, "step": 420 }, { "epoch": 0.14025151994669777, "grad_norm": 0.48828125, "learning_rate": 6.992230854605994e-06, "loss": 4.3049, "step": 421 }, { "epoch": 0.14058465894894645, "grad_norm": 0.494140625, "learning_rate": 7.008879023307436e-06, "loss": 4.3117, "step": 422 }, { "epoch": 0.14091779795119513, "grad_norm": 0.4609375, "learning_rate": 7.02552719200888e-06, "loss": 4.4106, "step": 423 }, { "epoch": 0.14125093695344382, "grad_norm": 0.474609375, "learning_rate": 7.0421753607103215e-06, "loss": 4.4236, "step": 424 }, { "epoch": 0.1415840759556925, "grad_norm": 0.50390625, "learning_rate": 7.058823529411765e-06, "loss": 4.3841, "step": 425 }, { "epoch": 0.1419172149579412, "grad_norm": 0.49609375, "learning_rate": 7.0754716981132075e-06, "loss": 4.3168, "step": 426 }, { "epoch": 0.1422503539601899, "grad_norm": 0.4609375, "learning_rate": 7.092119866814651e-06, "loss": 4.3901, "step": 427 }, { "epoch": 0.14258349296243858, "grad_norm": 0.498046875, "learning_rate": 7.108768035516093e-06, "loss": 4.3226, "step": 428 }, { "epoch": 0.14291663196468726, "grad_norm": 0.5078125, "learning_rate": 7.125416204217536e-06, "loss": 4.3555, "step": 429 }, { "epoch": 0.14324977096693595, "grad_norm": 0.490234375, "learning_rate": 7.142064372918979e-06, "loss": 4.347, "step": 430 }, { "epoch": 0.14358290996918463, "grad_norm": 0.490234375, "learning_rate": 7.158712541620422e-06, "loss": 4.3023, "step": 431 }, { "epoch": 0.14391604897143334, "grad_norm": 0.48828125, "learning_rate": 7.175360710321865e-06, "loss": 4.3972, "step": 432 }, { "epoch": 0.14424918797368202, "grad_norm": 0.470703125, "learning_rate": 7.1920088790233074e-06, "loss": 4.3563, "step": 433 }, { "epoch": 0.1445823269759307, "grad_norm": 0.52734375, "learning_rate": 7.20865704772475e-06, "loss": 4.3333, "step": 434 }, { "epoch": 0.1449154659781794, "grad_norm": 0.515625, "learning_rate": 7.2253052164261935e-06, "loss": 4.2961, "step": 435 }, { "epoch": 0.14524860498042808, "grad_norm": 0.48046875, "learning_rate": 7.241953385127636e-06, "loss": 4.3746, "step": 436 }, { "epoch": 0.14558174398267676, "grad_norm": 0.498046875, "learning_rate": 7.258601553829079e-06, "loss": 4.3311, "step": 437 }, { "epoch": 0.14591488298492547, "grad_norm": 0.48828125, "learning_rate": 7.275249722530522e-06, "loss": 4.3131, "step": 438 }, { "epoch": 0.14624802198717415, "grad_norm": 0.4765625, "learning_rate": 7.291897891231965e-06, "loss": 4.369, "step": 439 }, { "epoch": 0.14658116098942284, "grad_norm": 0.490234375, "learning_rate": 7.308546059933408e-06, "loss": 4.3666, "step": 440 }, { "epoch": 0.14691429999167152, "grad_norm": 0.482421875, "learning_rate": 7.32519422863485e-06, "loss": 4.3439, "step": 441 }, { "epoch": 0.1472474389939202, "grad_norm": 0.466796875, "learning_rate": 7.341842397336293e-06, "loss": 4.3284, "step": 442 }, { "epoch": 0.1475805779961689, "grad_norm": 0.4921875, "learning_rate": 7.358490566037736e-06, "loss": 4.2902, "step": 443 }, { "epoch": 0.1479137169984176, "grad_norm": 0.5, "learning_rate": 7.375138734739179e-06, "loss": 4.3149, "step": 444 }, { "epoch": 0.14824685600066628, "grad_norm": 0.4765625, "learning_rate": 7.391786903440622e-06, "loss": 4.3492, "step": 445 }, { "epoch": 0.14857999500291497, "grad_norm": 0.4921875, "learning_rate": 7.408435072142065e-06, "loss": 4.377, "step": 446 }, { "epoch": 0.14891313400516365, "grad_norm": 0.478515625, "learning_rate": 7.425083240843507e-06, "loss": 4.3245, "step": 447 }, { "epoch": 0.14924627300741233, "grad_norm": 0.5, "learning_rate": 7.441731409544951e-06, "loss": 4.459, "step": 448 }, { "epoch": 0.14957941200966104, "grad_norm": 0.5078125, "learning_rate": 7.458379578246393e-06, "loss": 4.3769, "step": 449 }, { "epoch": 0.14991255101190973, "grad_norm": 0.484375, "learning_rate": 7.475027746947836e-06, "loss": 4.2935, "step": 450 }, { "epoch": 0.1502456900141584, "grad_norm": 0.466796875, "learning_rate": 7.4916759156492785e-06, "loss": 4.3541, "step": 451 }, { "epoch": 0.1505788290164071, "grad_norm": 0.466796875, "learning_rate": 7.508324084350722e-06, "loss": 4.3057, "step": 452 }, { "epoch": 0.15091196801865578, "grad_norm": 0.470703125, "learning_rate": 7.5249722530521645e-06, "loss": 4.3743, "step": 453 }, { "epoch": 0.15124510702090446, "grad_norm": 0.46484375, "learning_rate": 7.541620421753608e-06, "loss": 4.3596, "step": 454 }, { "epoch": 0.15157824602315317, "grad_norm": 0.47265625, "learning_rate": 7.5582685904550506e-06, "loss": 4.4039, "step": 455 }, { "epoch": 0.15191138502540186, "grad_norm": 0.46484375, "learning_rate": 7.574916759156492e-06, "loss": 4.2903, "step": 456 }, { "epoch": 0.15224452402765054, "grad_norm": 0.4921875, "learning_rate": 7.591564927857935e-06, "loss": 4.3208, "step": 457 }, { "epoch": 0.15257766302989922, "grad_norm": 0.4921875, "learning_rate": 7.608213096559378e-06, "loss": 4.3021, "step": 458 }, { "epoch": 0.1529108020321479, "grad_norm": 0.490234375, "learning_rate": 7.624861265260821e-06, "loss": 4.3669, "step": 459 }, { "epoch": 0.1532439410343966, "grad_norm": 0.478515625, "learning_rate": 7.641509433962264e-06, "loss": 4.3779, "step": 460 }, { "epoch": 0.1535770800366453, "grad_norm": 0.49609375, "learning_rate": 7.658157602663708e-06, "loss": 4.2954, "step": 461 }, { "epoch": 0.153910219038894, "grad_norm": 0.47265625, "learning_rate": 7.67480577136515e-06, "loss": 4.3413, "step": 462 }, { "epoch": 0.15424335804114267, "grad_norm": 0.482421875, "learning_rate": 7.691453940066593e-06, "loss": 4.3813, "step": 463 }, { "epoch": 0.15457649704339135, "grad_norm": 0.5078125, "learning_rate": 7.708102108768037e-06, "loss": 4.291, "step": 464 }, { "epoch": 0.15490963604564004, "grad_norm": 0.50390625, "learning_rate": 7.724750277469478e-06, "loss": 4.2823, "step": 465 }, { "epoch": 0.15524277504788872, "grad_norm": 0.46484375, "learning_rate": 7.74139844617092e-06, "loss": 4.3115, "step": 466 }, { "epoch": 0.15557591405013743, "grad_norm": 0.48828125, "learning_rate": 7.758046614872363e-06, "loss": 4.3182, "step": 467 }, { "epoch": 0.15590905305238612, "grad_norm": 0.4765625, "learning_rate": 7.774694783573807e-06, "loss": 4.305, "step": 468 }, { "epoch": 0.1562421920546348, "grad_norm": 0.50390625, "learning_rate": 7.79134295227525e-06, "loss": 4.2697, "step": 469 }, { "epoch": 0.15657533105688348, "grad_norm": 0.486328125, "learning_rate": 7.807991120976694e-06, "loss": 4.3578, "step": 470 }, { "epoch": 0.15690847005913217, "grad_norm": 0.48046875, "learning_rate": 7.824639289678136e-06, "loss": 4.3004, "step": 471 }, { "epoch": 0.15724160906138085, "grad_norm": 0.486328125, "learning_rate": 7.841287458379579e-06, "loss": 4.3983, "step": 472 }, { "epoch": 0.15757474806362956, "grad_norm": 0.462890625, "learning_rate": 7.857935627081022e-06, "loss": 4.2835, "step": 473 }, { "epoch": 0.15790788706587824, "grad_norm": 0.474609375, "learning_rate": 7.874583795782464e-06, "loss": 4.3508, "step": 474 }, { "epoch": 0.15824102606812693, "grad_norm": 0.482421875, "learning_rate": 7.891231964483906e-06, "loss": 4.3408, "step": 475 }, { "epoch": 0.1585741650703756, "grad_norm": 0.462890625, "learning_rate": 7.90788013318535e-06, "loss": 4.3245, "step": 476 }, { "epoch": 0.1589073040726243, "grad_norm": 0.462890625, "learning_rate": 7.924528301886793e-06, "loss": 4.3347, "step": 477 }, { "epoch": 0.15924044307487298, "grad_norm": 0.474609375, "learning_rate": 7.941176470588236e-06, "loss": 4.4049, "step": 478 }, { "epoch": 0.1595735820771217, "grad_norm": 0.470703125, "learning_rate": 7.957824639289678e-06, "loss": 4.2875, "step": 479 }, { "epoch": 0.15990672107937037, "grad_norm": 0.455078125, "learning_rate": 7.974472807991121e-06, "loss": 4.3217, "step": 480 }, { "epoch": 0.16023986008161906, "grad_norm": 0.466796875, "learning_rate": 7.991120976692565e-06, "loss": 4.3422, "step": 481 }, { "epoch": 0.16057299908386774, "grad_norm": 0.482421875, "learning_rate": 8.007769145394007e-06, "loss": 4.3129, "step": 482 }, { "epoch": 0.16090613808611642, "grad_norm": 0.47265625, "learning_rate": 8.024417314095448e-06, "loss": 4.3534, "step": 483 }, { "epoch": 0.1612392770883651, "grad_norm": 0.494140625, "learning_rate": 8.041065482796892e-06, "loss": 4.3422, "step": 484 }, { "epoch": 0.16157241609061382, "grad_norm": 0.451171875, "learning_rate": 8.057713651498335e-06, "loss": 4.3847, "step": 485 }, { "epoch": 0.1619055550928625, "grad_norm": 0.484375, "learning_rate": 8.074361820199779e-06, "loss": 4.3587, "step": 486 }, { "epoch": 0.16223869409511119, "grad_norm": 0.5078125, "learning_rate": 8.09100998890122e-06, "loss": 4.3284, "step": 487 }, { "epoch": 0.16257183309735987, "grad_norm": 0.4609375, "learning_rate": 8.107658157602664e-06, "loss": 4.4283, "step": 488 }, { "epoch": 0.16290497209960855, "grad_norm": 0.4609375, "learning_rate": 8.124306326304107e-06, "loss": 4.3741, "step": 489 }, { "epoch": 0.16323811110185724, "grad_norm": 0.478515625, "learning_rate": 8.140954495005551e-06, "loss": 4.3844, "step": 490 }, { "epoch": 0.16357125010410595, "grad_norm": 0.482421875, "learning_rate": 8.157602663706991e-06, "loss": 4.3543, "step": 491 }, { "epoch": 0.16390438910635463, "grad_norm": 0.474609375, "learning_rate": 8.174250832408434e-06, "loss": 4.3007, "step": 492 }, { "epoch": 0.16423752810860331, "grad_norm": 0.455078125, "learning_rate": 8.190899001109878e-06, "loss": 4.3214, "step": 493 }, { "epoch": 0.164570667110852, "grad_norm": 0.482421875, "learning_rate": 8.207547169811321e-06, "loss": 4.3222, "step": 494 }, { "epoch": 0.16490380611310068, "grad_norm": 0.46484375, "learning_rate": 8.224195338512763e-06, "loss": 4.3784, "step": 495 }, { "epoch": 0.1652369451153494, "grad_norm": 0.470703125, "learning_rate": 8.240843507214206e-06, "loss": 4.3064, "step": 496 }, { "epoch": 0.16557008411759808, "grad_norm": 0.47265625, "learning_rate": 8.25749167591565e-06, "loss": 4.3362, "step": 497 }, { "epoch": 0.16590322311984676, "grad_norm": 0.466796875, "learning_rate": 8.274139844617093e-06, "loss": 4.359, "step": 498 }, { "epoch": 0.16623636212209544, "grad_norm": 0.466796875, "learning_rate": 8.290788013318535e-06, "loss": 4.3883, "step": 499 }, { "epoch": 0.16656950112434413, "grad_norm": 0.498046875, "learning_rate": 8.307436182019977e-06, "loss": 4.2878, "step": 500 }, { "epoch": 0.1669026401265928, "grad_norm": 0.47265625, "learning_rate": 8.32408435072142e-06, "loss": 4.3301, "step": 501 }, { "epoch": 0.16723577912884152, "grad_norm": 0.4609375, "learning_rate": 8.340732519422864e-06, "loss": 4.3663, "step": 502 }, { "epoch": 0.1675689181310902, "grad_norm": 0.4921875, "learning_rate": 8.357380688124307e-06, "loss": 4.235, "step": 503 }, { "epoch": 0.1679020571333389, "grad_norm": 0.447265625, "learning_rate": 8.374028856825749e-06, "loss": 4.3386, "step": 504 }, { "epoch": 0.16823519613558757, "grad_norm": 0.470703125, "learning_rate": 8.390677025527192e-06, "loss": 4.3265, "step": 505 }, { "epoch": 0.16856833513783626, "grad_norm": 0.443359375, "learning_rate": 8.407325194228636e-06, "loss": 4.2973, "step": 506 }, { "epoch": 0.16890147414008494, "grad_norm": 0.4453125, "learning_rate": 8.42397336293008e-06, "loss": 4.3403, "step": 507 }, { "epoch": 0.16923461314233365, "grad_norm": 0.46484375, "learning_rate": 8.44062153163152e-06, "loss": 4.3014, "step": 508 }, { "epoch": 0.16956775214458233, "grad_norm": 0.484375, "learning_rate": 8.457269700332963e-06, "loss": 4.2459, "step": 509 }, { "epoch": 0.16990089114683102, "grad_norm": 0.498046875, "learning_rate": 8.473917869034406e-06, "loss": 4.3029, "step": 510 }, { "epoch": 0.1702340301490797, "grad_norm": 0.48828125, "learning_rate": 8.49056603773585e-06, "loss": 4.318, "step": 511 }, { "epoch": 0.17056716915132838, "grad_norm": 0.458984375, "learning_rate": 8.507214206437291e-06, "loss": 4.3135, "step": 512 }, { "epoch": 0.17090030815357707, "grad_norm": 0.4609375, "learning_rate": 8.523862375138735e-06, "loss": 4.3183, "step": 513 }, { "epoch": 0.17123344715582578, "grad_norm": 0.455078125, "learning_rate": 8.540510543840178e-06, "loss": 4.3263, "step": 514 }, { "epoch": 0.17156658615807446, "grad_norm": 0.48046875, "learning_rate": 8.557158712541622e-06, "loss": 4.3398, "step": 515 }, { "epoch": 0.17189972516032315, "grad_norm": 0.47265625, "learning_rate": 8.573806881243064e-06, "loss": 4.2892, "step": 516 }, { "epoch": 0.17223286416257183, "grad_norm": 0.482421875, "learning_rate": 8.590455049944505e-06, "loss": 4.3781, "step": 517 }, { "epoch": 0.17256600316482051, "grad_norm": 0.4375, "learning_rate": 8.607103218645949e-06, "loss": 4.3685, "step": 518 }, { "epoch": 0.1728991421670692, "grad_norm": 0.48828125, "learning_rate": 8.623751387347392e-06, "loss": 4.3289, "step": 519 }, { "epoch": 0.1732322811693179, "grad_norm": 0.46484375, "learning_rate": 8.640399556048834e-06, "loss": 4.2929, "step": 520 }, { "epoch": 0.1735654201715666, "grad_norm": 0.466796875, "learning_rate": 8.657047724750277e-06, "loss": 4.3083, "step": 521 }, { "epoch": 0.17389855917381528, "grad_norm": 0.466796875, "learning_rate": 8.673695893451721e-06, "loss": 4.3563, "step": 522 }, { "epoch": 0.17423169817606396, "grad_norm": 0.4453125, "learning_rate": 8.690344062153164e-06, "loss": 4.3242, "step": 523 }, { "epoch": 0.17456483717831264, "grad_norm": 0.44921875, "learning_rate": 8.706992230854606e-06, "loss": 4.4032, "step": 524 }, { "epoch": 0.17489797618056133, "grad_norm": 0.451171875, "learning_rate": 8.72364039955605e-06, "loss": 4.3383, "step": 525 }, { "epoch": 0.17523111518281004, "grad_norm": 0.462890625, "learning_rate": 8.740288568257491e-06, "loss": 4.392, "step": 526 }, { "epoch": 0.17556425418505872, "grad_norm": 0.453125, "learning_rate": 8.756936736958935e-06, "loss": 4.3403, "step": 527 }, { "epoch": 0.1758973931873074, "grad_norm": 0.466796875, "learning_rate": 8.773584905660376e-06, "loss": 4.3797, "step": 528 }, { "epoch": 0.1762305321895561, "grad_norm": 0.46484375, "learning_rate": 8.79023307436182e-06, "loss": 4.3739, "step": 529 }, { "epoch": 0.17656367119180477, "grad_norm": 0.478515625, "learning_rate": 8.806881243063263e-06, "loss": 4.3534, "step": 530 }, { "epoch": 0.17689681019405346, "grad_norm": 0.48828125, "learning_rate": 8.823529411764707e-06, "loss": 4.3437, "step": 531 }, { "epoch": 0.17722994919630217, "grad_norm": 0.453125, "learning_rate": 8.84017758046615e-06, "loss": 4.3106, "step": 532 }, { "epoch": 0.17756308819855085, "grad_norm": 0.4453125, "learning_rate": 8.856825749167592e-06, "loss": 4.4213, "step": 533 }, { "epoch": 0.17789622720079953, "grad_norm": 0.466796875, "learning_rate": 8.873473917869034e-06, "loss": 4.3225, "step": 534 }, { "epoch": 0.17822936620304822, "grad_norm": 0.470703125, "learning_rate": 8.890122086570477e-06, "loss": 4.2805, "step": 535 }, { "epoch": 0.1785625052052969, "grad_norm": 0.45703125, "learning_rate": 8.90677025527192e-06, "loss": 4.2771, "step": 536 }, { "epoch": 0.1788956442075456, "grad_norm": 0.462890625, "learning_rate": 8.923418423973362e-06, "loss": 4.3145, "step": 537 }, { "epoch": 0.1792287832097943, "grad_norm": 0.50390625, "learning_rate": 8.940066592674806e-06, "loss": 4.3795, "step": 538 }, { "epoch": 0.17956192221204298, "grad_norm": 0.46484375, "learning_rate": 8.95671476137625e-06, "loss": 4.33, "step": 539 }, { "epoch": 0.17989506121429166, "grad_norm": 0.515625, "learning_rate": 8.973362930077693e-06, "loss": 4.3137, "step": 540 }, { "epoch": 0.18022820021654035, "grad_norm": 0.49609375, "learning_rate": 8.990011098779134e-06, "loss": 4.3353, "step": 541 }, { "epoch": 0.18056133921878903, "grad_norm": 0.44921875, "learning_rate": 9.006659267480578e-06, "loss": 4.3248, "step": 542 }, { "epoch": 0.18089447822103774, "grad_norm": 0.466796875, "learning_rate": 9.02330743618202e-06, "loss": 4.2735, "step": 543 }, { "epoch": 0.18122761722328642, "grad_norm": 0.462890625, "learning_rate": 9.039955604883463e-06, "loss": 4.3604, "step": 544 }, { "epoch": 0.1815607562255351, "grad_norm": 0.47265625, "learning_rate": 9.056603773584905e-06, "loss": 4.325, "step": 545 }, { "epoch": 0.1818938952277838, "grad_norm": 0.470703125, "learning_rate": 9.073251942286348e-06, "loss": 4.3713, "step": 546 }, { "epoch": 0.18222703423003248, "grad_norm": 0.474609375, "learning_rate": 9.089900110987792e-06, "loss": 4.3177, "step": 547 }, { "epoch": 0.18256017323228116, "grad_norm": 0.486328125, "learning_rate": 9.106548279689235e-06, "loss": 4.3482, "step": 548 }, { "epoch": 0.18289331223452987, "grad_norm": 0.49609375, "learning_rate": 9.123196448390677e-06, "loss": 4.3421, "step": 549 }, { "epoch": 0.18322645123677855, "grad_norm": 0.5, "learning_rate": 9.13984461709212e-06, "loss": 4.3723, "step": 550 }, { "epoch": 0.18355959023902724, "grad_norm": 0.5, "learning_rate": 9.156492785793564e-06, "loss": 4.315, "step": 551 }, { "epoch": 0.18389272924127592, "grad_norm": 0.484375, "learning_rate": 9.173140954495006e-06, "loss": 4.3069, "step": 552 }, { "epoch": 0.1842258682435246, "grad_norm": 0.466796875, "learning_rate": 9.189789123196447e-06, "loss": 4.3511, "step": 553 }, { "epoch": 0.1845590072457733, "grad_norm": 0.49609375, "learning_rate": 9.20643729189789e-06, "loss": 4.2977, "step": 554 }, { "epoch": 0.184892146248022, "grad_norm": 0.5078125, "learning_rate": 9.223085460599334e-06, "loss": 4.2736, "step": 555 }, { "epoch": 0.18522528525027068, "grad_norm": 0.45703125, "learning_rate": 9.239733629300778e-06, "loss": 4.3188, "step": 556 }, { "epoch": 0.18555842425251937, "grad_norm": 0.546875, "learning_rate": 9.25638179800222e-06, "loss": 4.3112, "step": 557 }, { "epoch": 0.18589156325476805, "grad_norm": 0.49609375, "learning_rate": 9.273029966703663e-06, "loss": 4.3134, "step": 558 }, { "epoch": 0.18622470225701673, "grad_norm": 0.49609375, "learning_rate": 9.289678135405106e-06, "loss": 4.3039, "step": 559 }, { "epoch": 0.18655784125926542, "grad_norm": 0.458984375, "learning_rate": 9.306326304106548e-06, "loss": 4.3157, "step": 560 }, { "epoch": 0.18689098026151413, "grad_norm": 0.49609375, "learning_rate": 9.322974472807992e-06, "loss": 4.2728, "step": 561 }, { "epoch": 0.1872241192637628, "grad_norm": 0.46875, "learning_rate": 9.339622641509433e-06, "loss": 4.2984, "step": 562 }, { "epoch": 0.1875572582660115, "grad_norm": 0.447265625, "learning_rate": 9.356270810210877e-06, "loss": 4.378, "step": 563 }, { "epoch": 0.18789039726826018, "grad_norm": 0.46484375, "learning_rate": 9.37291897891232e-06, "loss": 4.3042, "step": 564 }, { "epoch": 0.18822353627050886, "grad_norm": 0.462890625, "learning_rate": 9.389567147613764e-06, "loss": 4.3235, "step": 565 }, { "epoch": 0.18855667527275755, "grad_norm": 0.453125, "learning_rate": 9.406215316315205e-06, "loss": 4.3235, "step": 566 }, { "epoch": 0.18888981427500626, "grad_norm": 0.455078125, "learning_rate": 9.422863485016649e-06, "loss": 4.3726, "step": 567 }, { "epoch": 0.18922295327725494, "grad_norm": 0.474609375, "learning_rate": 9.439511653718092e-06, "loss": 4.3404, "step": 568 }, { "epoch": 0.18955609227950362, "grad_norm": 0.4609375, "learning_rate": 9.456159822419534e-06, "loss": 4.2801, "step": 569 }, { "epoch": 0.1898892312817523, "grad_norm": 0.466796875, "learning_rate": 9.472807991120976e-06, "loss": 4.379, "step": 570 }, { "epoch": 0.190222370284001, "grad_norm": 0.474609375, "learning_rate": 9.48945615982242e-06, "loss": 4.3101, "step": 571 }, { "epoch": 0.19055550928624967, "grad_norm": 0.458984375, "learning_rate": 9.506104328523863e-06, "loss": 4.2984, "step": 572 }, { "epoch": 0.19088864828849839, "grad_norm": 0.484375, "learning_rate": 9.522752497225306e-06, "loss": 4.3468, "step": 573 }, { "epoch": 0.19122178729074707, "grad_norm": 0.458984375, "learning_rate": 9.539400665926748e-06, "loss": 4.3672, "step": 574 }, { "epoch": 0.19155492629299575, "grad_norm": 0.45703125, "learning_rate": 9.556048834628191e-06, "loss": 4.3233, "step": 575 }, { "epoch": 0.19188806529524444, "grad_norm": 0.443359375, "learning_rate": 9.572697003329635e-06, "loss": 4.333, "step": 576 }, { "epoch": 0.19222120429749312, "grad_norm": 0.48046875, "learning_rate": 9.589345172031078e-06, "loss": 4.3603, "step": 577 }, { "epoch": 0.1925543432997418, "grad_norm": 0.482421875, "learning_rate": 9.605993340732518e-06, "loss": 4.2194, "step": 578 }, { "epoch": 0.19288748230199051, "grad_norm": 0.46875, "learning_rate": 9.622641509433962e-06, "loss": 4.3135, "step": 579 }, { "epoch": 0.1932206213042392, "grad_norm": 0.462890625, "learning_rate": 9.639289678135405e-06, "loss": 4.3485, "step": 580 }, { "epoch": 0.19355376030648788, "grad_norm": 0.4453125, "learning_rate": 9.655937846836849e-06, "loss": 4.285, "step": 581 }, { "epoch": 0.19388689930873657, "grad_norm": 0.46484375, "learning_rate": 9.67258601553829e-06, "loss": 4.3298, "step": 582 }, { "epoch": 0.19422003831098525, "grad_norm": 0.45703125, "learning_rate": 9.689234184239734e-06, "loss": 4.2928, "step": 583 }, { "epoch": 0.19455317731323396, "grad_norm": 0.47265625, "learning_rate": 9.705882352941177e-06, "loss": 4.32, "step": 584 }, { "epoch": 0.19488631631548264, "grad_norm": 0.48046875, "learning_rate": 9.72253052164262e-06, "loss": 4.318, "step": 585 }, { "epoch": 0.19521945531773133, "grad_norm": 0.4765625, "learning_rate": 9.73917869034406e-06, "loss": 4.3136, "step": 586 }, { "epoch": 0.19555259431998, "grad_norm": 0.47265625, "learning_rate": 9.755826859045504e-06, "loss": 4.2782, "step": 587 }, { "epoch": 0.1958857333222287, "grad_norm": 0.47265625, "learning_rate": 9.772475027746948e-06, "loss": 4.3498, "step": 588 }, { "epoch": 0.19621887232447738, "grad_norm": 0.470703125, "learning_rate": 9.789123196448391e-06, "loss": 4.4063, "step": 589 }, { "epoch": 0.1965520113267261, "grad_norm": 0.462890625, "learning_rate": 9.805771365149833e-06, "loss": 4.2643, "step": 590 }, { "epoch": 0.19688515032897477, "grad_norm": 0.474609375, "learning_rate": 9.822419533851276e-06, "loss": 4.2769, "step": 591 }, { "epoch": 0.19721828933122346, "grad_norm": 0.46484375, "learning_rate": 9.83906770255272e-06, "loss": 4.2931, "step": 592 }, { "epoch": 0.19755142833347214, "grad_norm": 0.447265625, "learning_rate": 9.855715871254163e-06, "loss": 4.28, "step": 593 }, { "epoch": 0.19788456733572082, "grad_norm": 0.46484375, "learning_rate": 9.872364039955607e-06, "loss": 4.3185, "step": 594 }, { "epoch": 0.1982177063379695, "grad_norm": 0.44140625, "learning_rate": 9.889012208657047e-06, "loss": 4.2708, "step": 595 }, { "epoch": 0.19855084534021822, "grad_norm": 0.47265625, "learning_rate": 9.90566037735849e-06, "loss": 4.3013, "step": 596 }, { "epoch": 0.1988839843424669, "grad_norm": 0.48046875, "learning_rate": 9.922308546059934e-06, "loss": 4.3291, "step": 597 }, { "epoch": 0.19921712334471559, "grad_norm": 0.453125, "learning_rate": 9.938956714761377e-06, "loss": 4.3203, "step": 598 }, { "epoch": 0.19955026234696427, "grad_norm": 0.4453125, "learning_rate": 9.955604883462819e-06, "loss": 4.3103, "step": 599 }, { "epoch": 0.19988340134921295, "grad_norm": 0.447265625, "learning_rate": 9.972253052164262e-06, "loss": 4.3287, "step": 600 }, { "epoch": 0.20021654035146164, "grad_norm": 0.458984375, "learning_rate": 9.988901220865706e-06, "loss": 4.2613, "step": 601 }, { "epoch": 0.20054967935371035, "grad_norm": 0.4453125, "learning_rate": 1.000554938956715e-05, "loss": 4.3627, "step": 602 }, { "epoch": 0.20088281835595903, "grad_norm": 0.470703125, "learning_rate": 1.0022197558268591e-05, "loss": 4.2492, "step": 603 }, { "epoch": 0.20121595735820771, "grad_norm": 0.453125, "learning_rate": 1.0038845726970033e-05, "loss": 4.299, "step": 604 }, { "epoch": 0.2015490963604564, "grad_norm": 0.4765625, "learning_rate": 1.0055493895671476e-05, "loss": 4.2437, "step": 605 }, { "epoch": 0.20188223536270508, "grad_norm": 0.458984375, "learning_rate": 1.007214206437292e-05, "loss": 4.337, "step": 606 }, { "epoch": 0.20221537436495376, "grad_norm": 0.498046875, "learning_rate": 1.0088790233074361e-05, "loss": 4.2883, "step": 607 }, { "epoch": 0.20254851336720248, "grad_norm": 0.447265625, "learning_rate": 1.0105438401775805e-05, "loss": 4.4138, "step": 608 }, { "epoch": 0.20288165236945116, "grad_norm": 0.470703125, "learning_rate": 1.0122086570477248e-05, "loss": 4.2951, "step": 609 }, { "epoch": 0.20321479137169984, "grad_norm": 0.453125, "learning_rate": 1.0138734739178692e-05, "loss": 4.3309, "step": 610 }, { "epoch": 0.20354793037394853, "grad_norm": 0.46484375, "learning_rate": 1.0155382907880133e-05, "loss": 4.3309, "step": 611 }, { "epoch": 0.2038810693761972, "grad_norm": 0.4453125, "learning_rate": 1.0172031076581575e-05, "loss": 4.3639, "step": 612 }, { "epoch": 0.2042142083784459, "grad_norm": 0.49609375, "learning_rate": 1.0188679245283019e-05, "loss": 4.3259, "step": 613 }, { "epoch": 0.2045473473806946, "grad_norm": 0.462890625, "learning_rate": 1.0205327413984462e-05, "loss": 4.2875, "step": 614 }, { "epoch": 0.2048804863829433, "grad_norm": 0.4765625, "learning_rate": 1.0221975582685904e-05, "loss": 4.292, "step": 615 }, { "epoch": 0.20521362538519197, "grad_norm": 0.474609375, "learning_rate": 1.0238623751387347e-05, "loss": 4.2797, "step": 616 }, { "epoch": 0.20554676438744066, "grad_norm": 0.46484375, "learning_rate": 1.025527192008879e-05, "loss": 4.3019, "step": 617 }, { "epoch": 0.20587990338968934, "grad_norm": 0.4453125, "learning_rate": 1.0271920088790234e-05, "loss": 4.2542, "step": 618 }, { "epoch": 0.20621304239193802, "grad_norm": 0.44140625, "learning_rate": 1.0288568257491676e-05, "loss": 4.3123, "step": 619 }, { "epoch": 0.20654618139418673, "grad_norm": 0.46875, "learning_rate": 1.030521642619312e-05, "loss": 4.3455, "step": 620 }, { "epoch": 0.20687932039643542, "grad_norm": 0.447265625, "learning_rate": 1.0321864594894561e-05, "loss": 4.2922, "step": 621 }, { "epoch": 0.2072124593986841, "grad_norm": 0.44140625, "learning_rate": 1.0338512763596005e-05, "loss": 4.3117, "step": 622 }, { "epoch": 0.20754559840093278, "grad_norm": 0.4609375, "learning_rate": 1.0355160932297448e-05, "loss": 4.3168, "step": 623 }, { "epoch": 0.20787873740318147, "grad_norm": 0.46484375, "learning_rate": 1.037180910099889e-05, "loss": 4.2836, "step": 624 }, { "epoch": 0.20821187640543018, "grad_norm": 0.45703125, "learning_rate": 1.0388457269700333e-05, "loss": 4.2849, "step": 625 }, { "epoch": 0.20854501540767886, "grad_norm": 0.462890625, "learning_rate": 1.0405105438401777e-05, "loss": 4.3204, "step": 626 }, { "epoch": 0.20887815440992755, "grad_norm": 0.474609375, "learning_rate": 1.042175360710322e-05, "loss": 4.3424, "step": 627 }, { "epoch": 0.20921129341217623, "grad_norm": 0.4609375, "learning_rate": 1.0438401775804662e-05, "loss": 4.2606, "step": 628 }, { "epoch": 0.2095444324144249, "grad_norm": 0.46484375, "learning_rate": 1.0455049944506105e-05, "loss": 4.3559, "step": 629 }, { "epoch": 0.2098775714166736, "grad_norm": 0.4765625, "learning_rate": 1.0471698113207547e-05, "loss": 4.2803, "step": 630 }, { "epoch": 0.2102107104189223, "grad_norm": 0.45703125, "learning_rate": 1.048834628190899e-05, "loss": 4.3022, "step": 631 }, { "epoch": 0.210543849421171, "grad_norm": 0.470703125, "learning_rate": 1.0504994450610432e-05, "loss": 4.3031, "step": 632 }, { "epoch": 0.21087698842341968, "grad_norm": 0.4765625, "learning_rate": 1.0521642619311876e-05, "loss": 4.3135, "step": 633 }, { "epoch": 0.21121012742566836, "grad_norm": 0.44921875, "learning_rate": 1.053829078801332e-05, "loss": 4.2794, "step": 634 }, { "epoch": 0.21154326642791704, "grad_norm": 0.478515625, "learning_rate": 1.0554938956714763e-05, "loss": 4.2774, "step": 635 }, { "epoch": 0.21187640543016573, "grad_norm": 0.447265625, "learning_rate": 1.0571587125416204e-05, "loss": 4.3552, "step": 636 }, { "epoch": 0.21220954443241444, "grad_norm": 0.46484375, "learning_rate": 1.0588235294117648e-05, "loss": 4.2966, "step": 637 }, { "epoch": 0.21254268343466312, "grad_norm": 0.443359375, "learning_rate": 1.060488346281909e-05, "loss": 4.3446, "step": 638 }, { "epoch": 0.2128758224369118, "grad_norm": 0.47265625, "learning_rate": 1.0621531631520533e-05, "loss": 4.296, "step": 639 }, { "epoch": 0.2132089614391605, "grad_norm": 0.470703125, "learning_rate": 1.0638179800221975e-05, "loss": 4.2905, "step": 640 }, { "epoch": 0.21354210044140917, "grad_norm": 0.470703125, "learning_rate": 1.0654827968923418e-05, "loss": 4.3728, "step": 641 }, { "epoch": 0.21387523944365786, "grad_norm": 0.478515625, "learning_rate": 1.0671476137624862e-05, "loss": 4.3491, "step": 642 }, { "epoch": 0.21420837844590657, "grad_norm": 0.4765625, "learning_rate": 1.0688124306326305e-05, "loss": 4.3507, "step": 643 }, { "epoch": 0.21454151744815525, "grad_norm": 0.4765625, "learning_rate": 1.0704772475027747e-05, "loss": 4.258, "step": 644 }, { "epoch": 0.21487465645040393, "grad_norm": 0.455078125, "learning_rate": 1.072142064372919e-05, "loss": 4.3008, "step": 645 }, { "epoch": 0.21520779545265262, "grad_norm": 0.453125, "learning_rate": 1.0738068812430634e-05, "loss": 4.249, "step": 646 }, { "epoch": 0.2155409344549013, "grad_norm": 0.455078125, "learning_rate": 1.0754716981132076e-05, "loss": 4.3396, "step": 647 }, { "epoch": 0.21587407345714998, "grad_norm": 0.4921875, "learning_rate": 1.0771365149833517e-05, "loss": 4.3481, "step": 648 }, { "epoch": 0.2162072124593987, "grad_norm": 0.47265625, "learning_rate": 1.078801331853496e-05, "loss": 4.3293, "step": 649 }, { "epoch": 0.21654035146164738, "grad_norm": 0.48828125, "learning_rate": 1.0804661487236404e-05, "loss": 4.2852, "step": 650 }, { "epoch": 0.21687349046389606, "grad_norm": 0.4453125, "learning_rate": 1.0821309655937848e-05, "loss": 4.3281, "step": 651 }, { "epoch": 0.21720662946614475, "grad_norm": 0.474609375, "learning_rate": 1.083795782463929e-05, "loss": 4.3284, "step": 652 }, { "epoch": 0.21753976846839343, "grad_norm": 0.451171875, "learning_rate": 1.0854605993340733e-05, "loss": 4.3233, "step": 653 }, { "epoch": 0.2178729074706421, "grad_norm": 0.455078125, "learning_rate": 1.0871254162042176e-05, "loss": 4.3126, "step": 654 }, { "epoch": 0.21820604647289082, "grad_norm": 0.4609375, "learning_rate": 1.088790233074362e-05, "loss": 4.2802, "step": 655 }, { "epoch": 0.2185391854751395, "grad_norm": 0.458984375, "learning_rate": 1.0904550499445061e-05, "loss": 4.2708, "step": 656 }, { "epoch": 0.2188723244773882, "grad_norm": 0.474609375, "learning_rate": 1.0921198668146503e-05, "loss": 4.3091, "step": 657 }, { "epoch": 0.21920546347963688, "grad_norm": 0.458984375, "learning_rate": 1.0937846836847947e-05, "loss": 4.2848, "step": 658 }, { "epoch": 0.21953860248188556, "grad_norm": 0.470703125, "learning_rate": 1.095449500554939e-05, "loss": 4.298, "step": 659 }, { "epoch": 0.21987174148413424, "grad_norm": 0.462890625, "learning_rate": 1.0971143174250834e-05, "loss": 4.2992, "step": 660 }, { "epoch": 0.22020488048638295, "grad_norm": 0.458984375, "learning_rate": 1.0987791342952275e-05, "loss": 4.361, "step": 661 }, { "epoch": 0.22053801948863164, "grad_norm": 0.486328125, "learning_rate": 1.1004439511653719e-05, "loss": 4.2537, "step": 662 }, { "epoch": 0.22087115849088032, "grad_norm": 0.453125, "learning_rate": 1.1021087680355162e-05, "loss": 4.3398, "step": 663 }, { "epoch": 0.221204297493129, "grad_norm": 0.47265625, "learning_rate": 1.1037735849056604e-05, "loss": 4.3206, "step": 664 }, { "epoch": 0.2215374364953777, "grad_norm": 0.458984375, "learning_rate": 1.1054384017758046e-05, "loss": 4.379, "step": 665 }, { "epoch": 0.22187057549762637, "grad_norm": 0.455078125, "learning_rate": 1.107103218645949e-05, "loss": 4.2965, "step": 666 }, { "epoch": 0.22220371449987508, "grad_norm": 0.4609375, "learning_rate": 1.1087680355160933e-05, "loss": 4.2539, "step": 667 }, { "epoch": 0.22253685350212377, "grad_norm": 0.45703125, "learning_rate": 1.1104328523862376e-05, "loss": 4.2366, "step": 668 }, { "epoch": 0.22286999250437245, "grad_norm": 0.470703125, "learning_rate": 1.1120976692563818e-05, "loss": 4.249, "step": 669 }, { "epoch": 0.22320313150662113, "grad_norm": 0.4609375, "learning_rate": 1.1137624861265261e-05, "loss": 4.3573, "step": 670 }, { "epoch": 0.22353627050886982, "grad_norm": 0.482421875, "learning_rate": 1.1154273029966705e-05, "loss": 4.2792, "step": 671 }, { "epoch": 0.22386940951111853, "grad_norm": 0.462890625, "learning_rate": 1.1170921198668148e-05, "loss": 4.3156, "step": 672 }, { "epoch": 0.2242025485133672, "grad_norm": 0.45703125, "learning_rate": 1.1187569367369588e-05, "loss": 4.3156, "step": 673 }, { "epoch": 0.2245356875156159, "grad_norm": 0.482421875, "learning_rate": 1.1204217536071032e-05, "loss": 4.2933, "step": 674 }, { "epoch": 0.22486882651786458, "grad_norm": 0.453125, "learning_rate": 1.1220865704772475e-05, "loss": 4.255, "step": 675 }, { "epoch": 0.22520196552011326, "grad_norm": 0.4375, "learning_rate": 1.1237513873473919e-05, "loss": 4.308, "step": 676 }, { "epoch": 0.22553510452236195, "grad_norm": 0.4609375, "learning_rate": 1.125416204217536e-05, "loss": 4.3225, "step": 677 }, { "epoch": 0.22586824352461066, "grad_norm": 0.53515625, "learning_rate": 1.1270810210876804e-05, "loss": 4.2301, "step": 678 }, { "epoch": 0.22620138252685934, "grad_norm": 0.46484375, "learning_rate": 1.1287458379578247e-05, "loss": 4.3049, "step": 679 }, { "epoch": 0.22653452152910802, "grad_norm": 0.474609375, "learning_rate": 1.130410654827969e-05, "loss": 4.3355, "step": 680 }, { "epoch": 0.2268676605313567, "grad_norm": 0.447265625, "learning_rate": 1.1320754716981132e-05, "loss": 4.3397, "step": 681 }, { "epoch": 0.2272007995336054, "grad_norm": 0.47265625, "learning_rate": 1.1337402885682574e-05, "loss": 4.327, "step": 682 }, { "epoch": 0.22753393853585407, "grad_norm": 0.466796875, "learning_rate": 1.1354051054384018e-05, "loss": 4.2542, "step": 683 }, { "epoch": 0.22786707753810279, "grad_norm": 0.466796875, "learning_rate": 1.1370699223085461e-05, "loss": 4.2699, "step": 684 }, { "epoch": 0.22820021654035147, "grad_norm": 0.45703125, "learning_rate": 1.1387347391786904e-05, "loss": 4.2652, "step": 685 }, { "epoch": 0.22853335554260015, "grad_norm": 0.46484375, "learning_rate": 1.1403995560488346e-05, "loss": 4.2609, "step": 686 }, { "epoch": 0.22886649454484884, "grad_norm": 0.466796875, "learning_rate": 1.142064372918979e-05, "loss": 4.3202, "step": 687 }, { "epoch": 0.22919963354709752, "grad_norm": 0.478515625, "learning_rate": 1.1437291897891233e-05, "loss": 4.2149, "step": 688 }, { "epoch": 0.2295327725493462, "grad_norm": 0.44921875, "learning_rate": 1.1453940066592677e-05, "loss": 4.314, "step": 689 }, { "epoch": 0.22986591155159491, "grad_norm": 0.47265625, "learning_rate": 1.1470588235294117e-05, "loss": 4.2357, "step": 690 }, { "epoch": 0.2301990505538436, "grad_norm": 0.486328125, "learning_rate": 1.148723640399556e-05, "loss": 4.2774, "step": 691 }, { "epoch": 0.23053218955609228, "grad_norm": 0.4609375, "learning_rate": 1.1503884572697004e-05, "loss": 4.2845, "step": 692 }, { "epoch": 0.23086532855834097, "grad_norm": 0.462890625, "learning_rate": 1.1520532741398447e-05, "loss": 4.3475, "step": 693 }, { "epoch": 0.23119846756058965, "grad_norm": 0.439453125, "learning_rate": 1.1537180910099889e-05, "loss": 4.3074, "step": 694 }, { "epoch": 0.23153160656283833, "grad_norm": 0.466796875, "learning_rate": 1.1553829078801332e-05, "loss": 4.2972, "step": 695 }, { "epoch": 0.23186474556508704, "grad_norm": 0.45703125, "learning_rate": 1.1570477247502776e-05, "loss": 4.2919, "step": 696 }, { "epoch": 0.23219788456733573, "grad_norm": 0.470703125, "learning_rate": 1.1587125416204219e-05, "loss": 4.2282, "step": 697 }, { "epoch": 0.2325310235695844, "grad_norm": 0.4453125, "learning_rate": 1.160377358490566e-05, "loss": 4.286, "step": 698 }, { "epoch": 0.2328641625718331, "grad_norm": 0.44921875, "learning_rate": 1.1620421753607103e-05, "loss": 4.3308, "step": 699 }, { "epoch": 0.23319730157408178, "grad_norm": 0.484375, "learning_rate": 1.1637069922308546e-05, "loss": 4.2591, "step": 700 }, { "epoch": 0.23353044057633046, "grad_norm": 0.482421875, "learning_rate": 1.165371809100999e-05, "loss": 4.3092, "step": 701 }, { "epoch": 0.23386357957857917, "grad_norm": 0.46484375, "learning_rate": 1.1670366259711431e-05, "loss": 4.3105, "step": 702 }, { "epoch": 0.23419671858082786, "grad_norm": 0.470703125, "learning_rate": 1.1687014428412875e-05, "loss": 4.3469, "step": 703 }, { "epoch": 0.23452985758307654, "grad_norm": 0.455078125, "learning_rate": 1.1703662597114318e-05, "loss": 4.3056, "step": 704 }, { "epoch": 0.23486299658532522, "grad_norm": 0.470703125, "learning_rate": 1.1720310765815762e-05, "loss": 4.3081, "step": 705 }, { "epoch": 0.2351961355875739, "grad_norm": 0.44921875, "learning_rate": 1.1736958934517203e-05, "loss": 4.31, "step": 706 }, { "epoch": 0.2355292745898226, "grad_norm": 0.453125, "learning_rate": 1.1753607103218647e-05, "loss": 4.3288, "step": 707 }, { "epoch": 0.2358624135920713, "grad_norm": 0.470703125, "learning_rate": 1.1770255271920089e-05, "loss": 4.2564, "step": 708 }, { "epoch": 0.23619555259431999, "grad_norm": 0.466796875, "learning_rate": 1.1786903440621532e-05, "loss": 4.3367, "step": 709 }, { "epoch": 0.23652869159656867, "grad_norm": 0.45703125, "learning_rate": 1.1803551609322974e-05, "loss": 4.2946, "step": 710 }, { "epoch": 0.23686183059881735, "grad_norm": 0.44921875, "learning_rate": 1.1820199778024417e-05, "loss": 4.3208, "step": 711 }, { "epoch": 0.23719496960106604, "grad_norm": 0.46484375, "learning_rate": 1.183684794672586e-05, "loss": 4.2593, "step": 712 }, { "epoch": 0.23752810860331472, "grad_norm": 0.47265625, "learning_rate": 1.1853496115427304e-05, "loss": 4.2805, "step": 713 }, { "epoch": 0.23786124760556343, "grad_norm": 0.4453125, "learning_rate": 1.1870144284128746e-05, "loss": 4.3088, "step": 714 }, { "epoch": 0.23819438660781211, "grad_norm": 0.458984375, "learning_rate": 1.188679245283019e-05, "loss": 4.2624, "step": 715 }, { "epoch": 0.2385275256100608, "grad_norm": 0.46484375, "learning_rate": 1.1903440621531631e-05, "loss": 4.244, "step": 716 }, { "epoch": 0.23886066461230948, "grad_norm": 0.466796875, "learning_rate": 1.1920088790233074e-05, "loss": 4.3341, "step": 717 }, { "epoch": 0.23919380361455816, "grad_norm": 0.44921875, "learning_rate": 1.1936736958934518e-05, "loss": 4.2741, "step": 718 }, { "epoch": 0.23952694261680688, "grad_norm": 0.458984375, "learning_rate": 1.195338512763596e-05, "loss": 4.3124, "step": 719 }, { "epoch": 0.23986008161905556, "grad_norm": 0.451171875, "learning_rate": 1.1970033296337403e-05, "loss": 4.2711, "step": 720 }, { "epoch": 0.24019322062130424, "grad_norm": 0.4765625, "learning_rate": 1.1986681465038847e-05, "loss": 4.2762, "step": 721 }, { "epoch": 0.24052635962355293, "grad_norm": 0.474609375, "learning_rate": 1.200332963374029e-05, "loss": 4.271, "step": 722 }, { "epoch": 0.2408594986258016, "grad_norm": 0.48046875, "learning_rate": 1.2019977802441732e-05, "loss": 4.2715, "step": 723 }, { "epoch": 0.2411926376280503, "grad_norm": 0.451171875, "learning_rate": 1.2036625971143175e-05, "loss": 4.255, "step": 724 }, { "epoch": 0.241525776630299, "grad_norm": 0.4765625, "learning_rate": 1.2053274139844617e-05, "loss": 4.1805, "step": 725 }, { "epoch": 0.2418589156325477, "grad_norm": 0.46484375, "learning_rate": 1.206992230854606e-05, "loss": 4.2455, "step": 726 }, { "epoch": 0.24219205463479637, "grad_norm": 0.455078125, "learning_rate": 1.2086570477247502e-05, "loss": 4.25, "step": 727 }, { "epoch": 0.24252519363704506, "grad_norm": 0.48828125, "learning_rate": 1.2103218645948946e-05, "loss": 4.3398, "step": 728 }, { "epoch": 0.24285833263929374, "grad_norm": 0.45703125, "learning_rate": 1.2119866814650389e-05, "loss": 4.2831, "step": 729 }, { "epoch": 0.24319147164154242, "grad_norm": 0.48046875, "learning_rate": 1.2136514983351833e-05, "loss": 4.3203, "step": 730 }, { "epoch": 0.24352461064379113, "grad_norm": 0.474609375, "learning_rate": 1.2153163152053274e-05, "loss": 4.2918, "step": 731 }, { "epoch": 0.24385774964603982, "grad_norm": 0.453125, "learning_rate": 1.2169811320754718e-05, "loss": 4.2942, "step": 732 }, { "epoch": 0.2441908886482885, "grad_norm": 0.474609375, "learning_rate": 1.2186459489456161e-05, "loss": 4.251, "step": 733 }, { "epoch": 0.24452402765053718, "grad_norm": 0.46484375, "learning_rate": 1.2203107658157603e-05, "loss": 4.3196, "step": 734 }, { "epoch": 0.24485716665278587, "grad_norm": 0.486328125, "learning_rate": 1.2219755826859045e-05, "loss": 4.3119, "step": 735 }, { "epoch": 0.24519030565503455, "grad_norm": 0.47265625, "learning_rate": 1.2236403995560488e-05, "loss": 4.3002, "step": 736 }, { "epoch": 0.24552344465728326, "grad_norm": 0.453125, "learning_rate": 1.2253052164261932e-05, "loss": 4.3001, "step": 737 }, { "epoch": 0.24585658365953195, "grad_norm": 0.474609375, "learning_rate": 1.2269700332963375e-05, "loss": 4.3254, "step": 738 }, { "epoch": 0.24618972266178063, "grad_norm": 0.470703125, "learning_rate": 1.2286348501664817e-05, "loss": 4.2471, "step": 739 }, { "epoch": 0.2465228616640293, "grad_norm": 0.4609375, "learning_rate": 1.230299667036626e-05, "loss": 4.3072, "step": 740 }, { "epoch": 0.246856000666278, "grad_norm": 0.486328125, "learning_rate": 1.2319644839067704e-05, "loss": 4.2622, "step": 741 }, { "epoch": 0.24718913966852668, "grad_norm": 0.484375, "learning_rate": 1.2336293007769145e-05, "loss": 4.249, "step": 742 }, { "epoch": 0.2475222786707754, "grad_norm": 0.462890625, "learning_rate": 1.2352941176470587e-05, "loss": 4.3098, "step": 743 }, { "epoch": 0.24785541767302408, "grad_norm": 0.47265625, "learning_rate": 1.236958934517203e-05, "loss": 4.3334, "step": 744 }, { "epoch": 0.24818855667527276, "grad_norm": 0.47265625, "learning_rate": 1.2386237513873474e-05, "loss": 4.3383, "step": 745 }, { "epoch": 0.24852169567752144, "grad_norm": 0.48046875, "learning_rate": 1.2402885682574917e-05, "loss": 4.2447, "step": 746 }, { "epoch": 0.24885483467977013, "grad_norm": 0.46875, "learning_rate": 1.2419533851276361e-05, "loss": 4.2421, "step": 747 }, { "epoch": 0.2491879736820188, "grad_norm": 0.51171875, "learning_rate": 1.2436182019977803e-05, "loss": 4.1842, "step": 748 }, { "epoch": 0.24952111268426752, "grad_norm": 0.47265625, "learning_rate": 1.2452830188679246e-05, "loss": 4.2252, "step": 749 }, { "epoch": 0.2498542516865162, "grad_norm": 0.50390625, "learning_rate": 1.246947835738069e-05, "loss": 4.2485, "step": 750 }, { "epoch": 0.2501873906887649, "grad_norm": 0.5546875, "learning_rate": 1.2486126526082131e-05, "loss": 4.2651, "step": 751 }, { "epoch": 0.25052052969101357, "grad_norm": 0.515625, "learning_rate": 1.2502774694783573e-05, "loss": 4.238, "step": 752 }, { "epoch": 0.25085366869326226, "grad_norm": 0.462890625, "learning_rate": 1.2519422863485017e-05, "loss": 4.2577, "step": 753 }, { "epoch": 0.25118680769551094, "grad_norm": 0.462890625, "learning_rate": 1.253607103218646e-05, "loss": 4.2316, "step": 754 }, { "epoch": 0.2515199466977596, "grad_norm": 0.51953125, "learning_rate": 1.2552719200887903e-05, "loss": 4.2626, "step": 755 }, { "epoch": 0.2518530857000083, "grad_norm": 0.486328125, "learning_rate": 1.2569367369589345e-05, "loss": 4.2481, "step": 756 }, { "epoch": 0.252186224702257, "grad_norm": 0.50390625, "learning_rate": 1.2586015538290789e-05, "loss": 4.2087, "step": 757 }, { "epoch": 0.25251936370450573, "grad_norm": 0.5, "learning_rate": 1.2602663706992232e-05, "loss": 4.4053, "step": 758 }, { "epoch": 0.2528525027067544, "grad_norm": 0.4609375, "learning_rate": 1.2619311875693676e-05, "loss": 4.2627, "step": 759 }, { "epoch": 0.2531856417090031, "grad_norm": 0.46875, "learning_rate": 1.2635960044395116e-05, "loss": 4.2653, "step": 760 }, { "epoch": 0.2535187807112518, "grad_norm": 0.490234375, "learning_rate": 1.2652608213096559e-05, "loss": 4.2138, "step": 761 }, { "epoch": 0.25385191971350046, "grad_norm": 0.490234375, "learning_rate": 1.2669256381798002e-05, "loss": 4.2513, "step": 762 }, { "epoch": 0.25418505871574915, "grad_norm": 0.47265625, "learning_rate": 1.2685904550499446e-05, "loss": 4.3307, "step": 763 }, { "epoch": 0.25451819771799783, "grad_norm": 0.5703125, "learning_rate": 1.2702552719200888e-05, "loss": 4.2748, "step": 764 }, { "epoch": 0.2548513367202465, "grad_norm": 0.5859375, "learning_rate": 1.2719200887902331e-05, "loss": 4.3053, "step": 765 }, { "epoch": 0.2551844757224952, "grad_norm": 0.51171875, "learning_rate": 1.2735849056603775e-05, "loss": 4.2753, "step": 766 }, { "epoch": 0.2555176147247439, "grad_norm": 0.45703125, "learning_rate": 1.2752497225305218e-05, "loss": 4.2636, "step": 767 }, { "epoch": 0.25585075372699256, "grad_norm": 0.46875, "learning_rate": 1.2769145394006658e-05, "loss": 4.2303, "step": 768 }, { "epoch": 0.2561838927292413, "grad_norm": 0.482421875, "learning_rate": 1.2785793562708102e-05, "loss": 4.2913, "step": 769 }, { "epoch": 0.25651703173149, "grad_norm": 0.498046875, "learning_rate": 1.2802441731409545e-05, "loss": 4.3275, "step": 770 }, { "epoch": 0.25685017073373867, "grad_norm": 0.5234375, "learning_rate": 1.2819089900110988e-05, "loss": 4.336, "step": 771 }, { "epoch": 0.25718330973598735, "grad_norm": 0.5078125, "learning_rate": 1.283573806881243e-05, "loss": 4.2375, "step": 772 }, { "epoch": 0.25751644873823604, "grad_norm": 0.49609375, "learning_rate": 1.2852386237513874e-05, "loss": 4.2774, "step": 773 }, { "epoch": 0.2578495877404847, "grad_norm": 0.48046875, "learning_rate": 1.2869034406215317e-05, "loss": 4.2393, "step": 774 }, { "epoch": 0.2581827267427334, "grad_norm": 0.44921875, "learning_rate": 1.288568257491676e-05, "loss": 4.2706, "step": 775 }, { "epoch": 0.2585158657449821, "grad_norm": 0.474609375, "learning_rate": 1.2902330743618202e-05, "loss": 4.2189, "step": 776 }, { "epoch": 0.25884900474723077, "grad_norm": 0.48828125, "learning_rate": 1.2918978912319644e-05, "loss": 4.3303, "step": 777 }, { "epoch": 0.25918214374947945, "grad_norm": 0.4921875, "learning_rate": 1.2935627081021087e-05, "loss": 4.3059, "step": 778 }, { "epoch": 0.25951528275172814, "grad_norm": 0.498046875, "learning_rate": 1.2952275249722531e-05, "loss": 4.2558, "step": 779 }, { "epoch": 0.2598484217539768, "grad_norm": 0.494140625, "learning_rate": 1.2968923418423974e-05, "loss": 4.3225, "step": 780 }, { "epoch": 0.26018156075622556, "grad_norm": 0.48046875, "learning_rate": 1.2985571587125416e-05, "loss": 4.1904, "step": 781 }, { "epoch": 0.26051469975847424, "grad_norm": 0.474609375, "learning_rate": 1.300221975582686e-05, "loss": 4.2846, "step": 782 }, { "epoch": 0.2608478387607229, "grad_norm": 0.46875, "learning_rate": 1.3018867924528303e-05, "loss": 4.3042, "step": 783 }, { "epoch": 0.2611809777629716, "grad_norm": 0.470703125, "learning_rate": 1.3035516093229746e-05, "loss": 4.3187, "step": 784 }, { "epoch": 0.2615141167652203, "grad_norm": 0.46875, "learning_rate": 1.3052164261931188e-05, "loss": 4.2597, "step": 785 }, { "epoch": 0.261847255767469, "grad_norm": 0.486328125, "learning_rate": 1.306881243063263e-05, "loss": 4.2928, "step": 786 }, { "epoch": 0.26218039476971766, "grad_norm": 0.474609375, "learning_rate": 1.3085460599334073e-05, "loss": 4.2819, "step": 787 }, { "epoch": 0.26251353377196635, "grad_norm": 0.51171875, "learning_rate": 1.3102108768035517e-05, "loss": 4.213, "step": 788 }, { "epoch": 0.26284667277421503, "grad_norm": 0.4765625, "learning_rate": 1.3118756936736959e-05, "loss": 4.1928, "step": 789 }, { "epoch": 0.2631798117764637, "grad_norm": 0.439453125, "learning_rate": 1.3135405105438402e-05, "loss": 4.2486, "step": 790 }, { "epoch": 0.2635129507787124, "grad_norm": 0.458984375, "learning_rate": 1.3152053274139846e-05, "loss": 4.3149, "step": 791 }, { "epoch": 0.2638460897809611, "grad_norm": 0.494140625, "learning_rate": 1.3168701442841289e-05, "loss": 4.2867, "step": 792 }, { "epoch": 0.2641792287832098, "grad_norm": 0.4921875, "learning_rate": 1.318534961154273e-05, "loss": 4.2727, "step": 793 }, { "epoch": 0.2645123677854585, "grad_norm": 0.4765625, "learning_rate": 1.3201997780244172e-05, "loss": 4.2488, "step": 794 }, { "epoch": 0.2648455067877072, "grad_norm": 0.455078125, "learning_rate": 1.3218645948945616e-05, "loss": 4.2927, "step": 795 }, { "epoch": 0.26517864578995587, "grad_norm": 0.4609375, "learning_rate": 1.323529411764706e-05, "loss": 4.234, "step": 796 }, { "epoch": 0.26551178479220455, "grad_norm": 0.455078125, "learning_rate": 1.3251942286348501e-05, "loss": 4.3002, "step": 797 }, { "epoch": 0.26584492379445324, "grad_norm": 0.494140625, "learning_rate": 1.3268590455049945e-05, "loss": 4.2122, "step": 798 }, { "epoch": 0.2661780627967019, "grad_norm": 0.48828125, "learning_rate": 1.3285238623751388e-05, "loss": 4.2267, "step": 799 }, { "epoch": 0.2665112017989506, "grad_norm": 0.44921875, "learning_rate": 1.3301886792452831e-05, "loss": 4.2038, "step": 800 }, { "epoch": 0.2668443408011993, "grad_norm": 0.462890625, "learning_rate": 1.3318534961154273e-05, "loss": 4.2488, "step": 801 }, { "epoch": 0.26717747980344797, "grad_norm": 0.466796875, "learning_rate": 1.3335183129855717e-05, "loss": 4.2515, "step": 802 }, { "epoch": 0.26751061880569665, "grad_norm": 0.455078125, "learning_rate": 1.3351831298557158e-05, "loss": 4.2823, "step": 803 }, { "epoch": 0.2678437578079454, "grad_norm": 0.474609375, "learning_rate": 1.3368479467258602e-05, "loss": 4.2951, "step": 804 }, { "epoch": 0.2681768968101941, "grad_norm": 0.5, "learning_rate": 1.3385127635960044e-05, "loss": 4.1978, "step": 805 }, { "epoch": 0.26851003581244276, "grad_norm": 0.490234375, "learning_rate": 1.3401775804661487e-05, "loss": 4.2163, "step": 806 }, { "epoch": 0.26884317481469144, "grad_norm": 0.48046875, "learning_rate": 1.341842397336293e-05, "loss": 4.1877, "step": 807 }, { "epoch": 0.2691763138169401, "grad_norm": 0.470703125, "learning_rate": 1.3435072142064374e-05, "loss": 4.1991, "step": 808 }, { "epoch": 0.2695094528191888, "grad_norm": 0.462890625, "learning_rate": 1.3451720310765816e-05, "loss": 4.2658, "step": 809 }, { "epoch": 0.2698425918214375, "grad_norm": 0.5, "learning_rate": 1.3468368479467259e-05, "loss": 4.2571, "step": 810 }, { "epoch": 0.2701757308236862, "grad_norm": 0.4609375, "learning_rate": 1.3485016648168703e-05, "loss": 4.2552, "step": 811 }, { "epoch": 0.27050886982593486, "grad_norm": 0.498046875, "learning_rate": 1.3501664816870144e-05, "loss": 4.3024, "step": 812 }, { "epoch": 0.27084200882818354, "grad_norm": 0.47265625, "learning_rate": 1.3518312985571588e-05, "loss": 4.2807, "step": 813 }, { "epoch": 0.27117514783043223, "grad_norm": 0.50390625, "learning_rate": 1.353496115427303e-05, "loss": 4.254, "step": 814 }, { "epoch": 0.2715082868326809, "grad_norm": 0.50390625, "learning_rate": 1.3551609322974473e-05, "loss": 4.2737, "step": 815 }, { "epoch": 0.27184142583492965, "grad_norm": 0.482421875, "learning_rate": 1.3568257491675916e-05, "loss": 4.2047, "step": 816 }, { "epoch": 0.27217456483717833, "grad_norm": 0.482421875, "learning_rate": 1.358490566037736e-05, "loss": 4.1694, "step": 817 }, { "epoch": 0.272507703839427, "grad_norm": 0.515625, "learning_rate": 1.3601553829078802e-05, "loss": 4.2317, "step": 818 }, { "epoch": 0.2728408428416757, "grad_norm": 0.48046875, "learning_rate": 1.3618201997780245e-05, "loss": 4.2337, "step": 819 }, { "epoch": 0.2731739818439244, "grad_norm": 0.484375, "learning_rate": 1.3634850166481687e-05, "loss": 4.2568, "step": 820 }, { "epoch": 0.27350712084617307, "grad_norm": 0.474609375, "learning_rate": 1.365149833518313e-05, "loss": 4.241, "step": 821 }, { "epoch": 0.27384025984842175, "grad_norm": 0.5, "learning_rate": 1.3668146503884572e-05, "loss": 4.2715, "step": 822 }, { "epoch": 0.27417339885067044, "grad_norm": 0.486328125, "learning_rate": 1.3684794672586015e-05, "loss": 4.2467, "step": 823 }, { "epoch": 0.2745065378529191, "grad_norm": 0.486328125, "learning_rate": 1.3701442841287459e-05, "loss": 4.2926, "step": 824 }, { "epoch": 0.2748396768551678, "grad_norm": 0.55078125, "learning_rate": 1.3718091009988902e-05, "loss": 4.2957, "step": 825 }, { "epoch": 0.2751728158574165, "grad_norm": 0.48046875, "learning_rate": 1.3734739178690344e-05, "loss": 4.26, "step": 826 }, { "epoch": 0.27550595485966517, "grad_norm": 0.47265625, "learning_rate": 1.3751387347391788e-05, "loss": 4.3068, "step": 827 }, { "epoch": 0.2758390938619139, "grad_norm": 0.46484375, "learning_rate": 1.3768035516093231e-05, "loss": 4.2947, "step": 828 }, { "epoch": 0.2761722328641626, "grad_norm": 0.478515625, "learning_rate": 1.3784683684794673e-05, "loss": 4.2168, "step": 829 }, { "epoch": 0.2765053718664113, "grad_norm": 0.546875, "learning_rate": 1.3801331853496115e-05, "loss": 4.2304, "step": 830 }, { "epoch": 0.27683851086865996, "grad_norm": 0.484375, "learning_rate": 1.3817980022197558e-05, "loss": 4.2424, "step": 831 }, { "epoch": 0.27717164987090864, "grad_norm": 0.47265625, "learning_rate": 1.3834628190899001e-05, "loss": 4.3289, "step": 832 }, { "epoch": 0.2775047888731573, "grad_norm": 0.458984375, "learning_rate": 1.3851276359600445e-05, "loss": 4.2212, "step": 833 }, { "epoch": 0.277837927875406, "grad_norm": 0.515625, "learning_rate": 1.3867924528301887e-05, "loss": 4.2794, "step": 834 }, { "epoch": 0.2781710668776547, "grad_norm": 0.4765625, "learning_rate": 1.388457269700333e-05, "loss": 4.2621, "step": 835 }, { "epoch": 0.2785042058799034, "grad_norm": 0.5078125, "learning_rate": 1.3901220865704774e-05, "loss": 4.2633, "step": 836 }, { "epoch": 0.27883734488215206, "grad_norm": 0.484375, "learning_rate": 1.3917869034406217e-05, "loss": 4.322, "step": 837 }, { "epoch": 0.27917048388440074, "grad_norm": 0.5078125, "learning_rate": 1.3934517203107657e-05, "loss": 4.2133, "step": 838 }, { "epoch": 0.2795036228866494, "grad_norm": 0.52734375, "learning_rate": 1.39511653718091e-05, "loss": 4.2302, "step": 839 }, { "epoch": 0.27983676188889817, "grad_norm": 0.5078125, "learning_rate": 1.3967813540510544e-05, "loss": 4.2026, "step": 840 }, { "epoch": 0.28016990089114685, "grad_norm": 0.4765625, "learning_rate": 1.3984461709211987e-05, "loss": 4.292, "step": 841 }, { "epoch": 0.28050303989339553, "grad_norm": 0.474609375, "learning_rate": 1.400110987791343e-05, "loss": 4.3179, "step": 842 }, { "epoch": 0.2808361788956442, "grad_norm": 0.5390625, "learning_rate": 1.4017758046614873e-05, "loss": 4.2161, "step": 843 }, { "epoch": 0.2811693178978929, "grad_norm": 0.515625, "learning_rate": 1.4034406215316316e-05, "loss": 4.2767, "step": 844 }, { "epoch": 0.2815024569001416, "grad_norm": 0.48828125, "learning_rate": 1.405105438401776e-05, "loss": 4.2493, "step": 845 }, { "epoch": 0.28183559590239027, "grad_norm": 0.478515625, "learning_rate": 1.4067702552719201e-05, "loss": 4.3152, "step": 846 }, { "epoch": 0.28216873490463895, "grad_norm": 0.5078125, "learning_rate": 1.4084350721420643e-05, "loss": 4.2267, "step": 847 }, { "epoch": 0.28250187390688764, "grad_norm": 0.453125, "learning_rate": 1.4100998890122086e-05, "loss": 4.2986, "step": 848 }, { "epoch": 0.2828350129091363, "grad_norm": 0.484375, "learning_rate": 1.411764705882353e-05, "loss": 4.2179, "step": 849 }, { "epoch": 0.283168151911385, "grad_norm": 0.5, "learning_rate": 1.4134295227524973e-05, "loss": 4.3005, "step": 850 }, { "epoch": 0.28350129091363374, "grad_norm": 0.5078125, "learning_rate": 1.4150943396226415e-05, "loss": 4.2448, "step": 851 }, { "epoch": 0.2838344299158824, "grad_norm": 0.47265625, "learning_rate": 1.4167591564927859e-05, "loss": 4.3343, "step": 852 }, { "epoch": 0.2841675689181311, "grad_norm": 0.5078125, "learning_rate": 1.4184239733629302e-05, "loss": 4.3058, "step": 853 }, { "epoch": 0.2845007079203798, "grad_norm": 0.5390625, "learning_rate": 1.4200887902330745e-05, "loss": 4.2189, "step": 854 }, { "epoch": 0.2848338469226285, "grad_norm": 0.466796875, "learning_rate": 1.4217536071032185e-05, "loss": 4.2325, "step": 855 }, { "epoch": 0.28516698592487716, "grad_norm": 0.46875, "learning_rate": 1.4234184239733629e-05, "loss": 4.2507, "step": 856 }, { "epoch": 0.28550012492712584, "grad_norm": 0.474609375, "learning_rate": 1.4250832408435072e-05, "loss": 4.2436, "step": 857 }, { "epoch": 0.2858332639293745, "grad_norm": 0.4609375, "learning_rate": 1.4267480577136516e-05, "loss": 4.1575, "step": 858 }, { "epoch": 0.2861664029316232, "grad_norm": 0.51171875, "learning_rate": 1.4284128745837958e-05, "loss": 4.1896, "step": 859 }, { "epoch": 0.2864995419338719, "grad_norm": 0.48828125, "learning_rate": 1.4300776914539401e-05, "loss": 4.2643, "step": 860 }, { "epoch": 0.2868326809361206, "grad_norm": 0.4609375, "learning_rate": 1.4317425083240844e-05, "loss": 4.2572, "step": 861 }, { "epoch": 0.28716581993836926, "grad_norm": 0.52734375, "learning_rate": 1.4334073251942288e-05, "loss": 4.2729, "step": 862 }, { "epoch": 0.287498958940618, "grad_norm": 0.498046875, "learning_rate": 1.435072142064373e-05, "loss": 4.2201, "step": 863 }, { "epoch": 0.2878320979428667, "grad_norm": 0.470703125, "learning_rate": 1.4367369589345171e-05, "loss": 4.2461, "step": 864 }, { "epoch": 0.28816523694511537, "grad_norm": 0.50390625, "learning_rate": 1.4384017758046615e-05, "loss": 4.2184, "step": 865 }, { "epoch": 0.28849837594736405, "grad_norm": 0.51953125, "learning_rate": 1.4400665926748058e-05, "loss": 4.2328, "step": 866 }, { "epoch": 0.28883151494961273, "grad_norm": 0.48046875, "learning_rate": 1.44173140954495e-05, "loss": 4.2004, "step": 867 }, { "epoch": 0.2891646539518614, "grad_norm": 0.5234375, "learning_rate": 1.4433962264150944e-05, "loss": 4.2975, "step": 868 }, { "epoch": 0.2894977929541101, "grad_norm": 0.478515625, "learning_rate": 1.4450610432852387e-05, "loss": 4.2318, "step": 869 }, { "epoch": 0.2898309319563588, "grad_norm": 0.484375, "learning_rate": 1.446725860155383e-05, "loss": 4.2484, "step": 870 }, { "epoch": 0.29016407095860747, "grad_norm": 0.470703125, "learning_rate": 1.4483906770255272e-05, "loss": 4.2208, "step": 871 }, { "epoch": 0.29049720996085615, "grad_norm": 0.5546875, "learning_rate": 1.4500554938956714e-05, "loss": 4.2228, "step": 872 }, { "epoch": 0.29083034896310483, "grad_norm": 0.4765625, "learning_rate": 1.4517203107658157e-05, "loss": 4.1794, "step": 873 }, { "epoch": 0.2911634879653535, "grad_norm": 0.4921875, "learning_rate": 1.45338512763596e-05, "loss": 4.2492, "step": 874 }, { "epoch": 0.29149662696760226, "grad_norm": 0.515625, "learning_rate": 1.4550499445061044e-05, "loss": 4.2618, "step": 875 }, { "epoch": 0.29182976596985094, "grad_norm": 0.515625, "learning_rate": 1.4567147613762486e-05, "loss": 4.2997, "step": 876 }, { "epoch": 0.2921629049720996, "grad_norm": 0.470703125, "learning_rate": 1.458379578246393e-05, "loss": 4.3445, "step": 877 }, { "epoch": 0.2924960439743483, "grad_norm": 0.515625, "learning_rate": 1.4600443951165373e-05, "loss": 4.2703, "step": 878 }, { "epoch": 0.292829182976597, "grad_norm": 0.50390625, "learning_rate": 1.4617092119866816e-05, "loss": 4.2158, "step": 879 }, { "epoch": 0.2931623219788457, "grad_norm": 0.53515625, "learning_rate": 1.4633740288568258e-05, "loss": 4.2098, "step": 880 }, { "epoch": 0.29349546098109436, "grad_norm": 0.56640625, "learning_rate": 1.46503884572697e-05, "loss": 4.2321, "step": 881 }, { "epoch": 0.29382859998334304, "grad_norm": 0.5625, "learning_rate": 1.4667036625971143e-05, "loss": 4.269, "step": 882 }, { "epoch": 0.2941617389855917, "grad_norm": 0.494140625, "learning_rate": 1.4683684794672587e-05, "loss": 4.2655, "step": 883 }, { "epoch": 0.2944948779878404, "grad_norm": 0.49609375, "learning_rate": 1.4700332963374028e-05, "loss": 4.2179, "step": 884 }, { "epoch": 0.2948280169900891, "grad_norm": 0.49609375, "learning_rate": 1.4716981132075472e-05, "loss": 4.2109, "step": 885 }, { "epoch": 0.2951611559923378, "grad_norm": 0.50390625, "learning_rate": 1.4733629300776915e-05, "loss": 4.2293, "step": 886 }, { "epoch": 0.2954942949945865, "grad_norm": 0.515625, "learning_rate": 1.4750277469478359e-05, "loss": 4.1776, "step": 887 }, { "epoch": 0.2958274339968352, "grad_norm": 0.4765625, "learning_rate": 1.47669256381798e-05, "loss": 4.2574, "step": 888 }, { "epoch": 0.2961605729990839, "grad_norm": 0.51953125, "learning_rate": 1.4783573806881244e-05, "loss": 4.2539, "step": 889 }, { "epoch": 0.29649371200133257, "grad_norm": 0.5703125, "learning_rate": 1.4800221975582686e-05, "loss": 4.2618, "step": 890 }, { "epoch": 0.29682685100358125, "grad_norm": 0.51953125, "learning_rate": 1.481687014428413e-05, "loss": 4.2833, "step": 891 }, { "epoch": 0.29715999000582993, "grad_norm": 0.5, "learning_rate": 1.4833518312985571e-05, "loss": 4.2936, "step": 892 }, { "epoch": 0.2974931290080786, "grad_norm": 0.5078125, "learning_rate": 1.4850166481687014e-05, "loss": 4.2638, "step": 893 }, { "epoch": 0.2978262680103273, "grad_norm": 0.50390625, "learning_rate": 1.4866814650388458e-05, "loss": 4.2456, "step": 894 }, { "epoch": 0.298159407012576, "grad_norm": 0.515625, "learning_rate": 1.4883462819089901e-05, "loss": 4.2456, "step": 895 }, { "epoch": 0.29849254601482467, "grad_norm": 0.498046875, "learning_rate": 1.4900110987791343e-05, "loss": 4.2594, "step": 896 }, { "epoch": 0.29882568501707335, "grad_norm": 0.5, "learning_rate": 1.4916759156492787e-05, "loss": 4.2377, "step": 897 }, { "epoch": 0.2991588240193221, "grad_norm": 0.498046875, "learning_rate": 1.4933407325194228e-05, "loss": 4.2304, "step": 898 }, { "epoch": 0.2994919630215708, "grad_norm": 0.5078125, "learning_rate": 1.4950055493895672e-05, "loss": 4.2574, "step": 899 }, { "epoch": 0.29982510202381946, "grad_norm": 0.49609375, "learning_rate": 1.4966703662597113e-05, "loss": 4.2417, "step": 900 }, { "epoch": 0.30015824102606814, "grad_norm": 0.48046875, "learning_rate": 1.4983351831298557e-05, "loss": 4.2311, "step": 901 }, { "epoch": 0.3004913800283168, "grad_norm": 0.5390625, "learning_rate": 1.5e-05, "loss": 4.2702, "step": 902 }, { "epoch": 0.3008245190305655, "grad_norm": 0.51953125, "learning_rate": 1.4999999956320624e-05, "loss": 4.2645, "step": 903 }, { "epoch": 0.3011576580328142, "grad_norm": 0.486328125, "learning_rate": 1.499999982528249e-05, "loss": 4.2511, "step": 904 }, { "epoch": 0.3014907970350629, "grad_norm": 0.47265625, "learning_rate": 1.4999999606885606e-05, "loss": 4.2858, "step": 905 }, { "epoch": 0.30182393603731156, "grad_norm": 0.5078125, "learning_rate": 1.499999930112997e-05, "loss": 4.2568, "step": 906 }, { "epoch": 0.30215707503956024, "grad_norm": 0.515625, "learning_rate": 1.4999998908015587e-05, "loss": 4.2183, "step": 907 }, { "epoch": 0.3024902140418089, "grad_norm": 0.49609375, "learning_rate": 1.4999998427542463e-05, "loss": 4.2426, "step": 908 }, { "epoch": 0.3028233530440576, "grad_norm": 0.453125, "learning_rate": 1.4999997859710602e-05, "loss": 4.2361, "step": 909 }, { "epoch": 0.30315649204630635, "grad_norm": 0.498046875, "learning_rate": 1.4999997204520009e-05, "loss": 4.3047, "step": 910 }, { "epoch": 0.30348963104855503, "grad_norm": 0.50390625, "learning_rate": 1.4999996461970698e-05, "loss": 4.2258, "step": 911 }, { "epoch": 0.3038227700508037, "grad_norm": 0.50390625, "learning_rate": 1.4999995632062668e-05, "loss": 4.2083, "step": 912 }, { "epoch": 0.3041559090530524, "grad_norm": 0.51171875, "learning_rate": 1.4999994714795936e-05, "loss": 4.2945, "step": 913 }, { "epoch": 0.3044890480553011, "grad_norm": 0.4921875, "learning_rate": 1.4999993710170512e-05, "loss": 4.2414, "step": 914 }, { "epoch": 0.30482218705754976, "grad_norm": 0.45703125, "learning_rate": 1.4999992618186404e-05, "loss": 4.3402, "step": 915 }, { "epoch": 0.30515532605979845, "grad_norm": 0.490234375, "learning_rate": 1.4999991438843629e-05, "loss": 4.2658, "step": 916 }, { "epoch": 0.30548846506204713, "grad_norm": 0.51953125, "learning_rate": 1.4999990172142196e-05, "loss": 4.3405, "step": 917 }, { "epoch": 0.3058216040642958, "grad_norm": 0.53125, "learning_rate": 1.4999988818082126e-05, "loss": 4.208, "step": 918 }, { "epoch": 0.3061547430665445, "grad_norm": 0.49609375, "learning_rate": 1.4999987376663427e-05, "loss": 4.231, "step": 919 }, { "epoch": 0.3064878820687932, "grad_norm": 0.490234375, "learning_rate": 1.4999985847886122e-05, "loss": 4.2198, "step": 920 }, { "epoch": 0.30682102107104187, "grad_norm": 0.486328125, "learning_rate": 1.4999984231750229e-05, "loss": 4.3031, "step": 921 }, { "epoch": 0.3071541600732906, "grad_norm": 0.53125, "learning_rate": 1.4999982528255763e-05, "loss": 4.1944, "step": 922 }, { "epoch": 0.3074872990755393, "grad_norm": 0.51953125, "learning_rate": 1.4999980737402742e-05, "loss": 4.222, "step": 923 }, { "epoch": 0.307820438077788, "grad_norm": 0.5703125, "learning_rate": 1.4999978859191196e-05, "loss": 4.183, "step": 924 }, { "epoch": 0.30815357708003666, "grad_norm": 0.515625, "learning_rate": 1.4999976893621136e-05, "loss": 4.2128, "step": 925 }, { "epoch": 0.30848671608228534, "grad_norm": 0.515625, "learning_rate": 1.4999974840692595e-05, "loss": 4.2225, "step": 926 }, { "epoch": 0.308819855084534, "grad_norm": 0.5, "learning_rate": 1.4999972700405588e-05, "loss": 4.2285, "step": 927 }, { "epoch": 0.3091529940867827, "grad_norm": 0.515625, "learning_rate": 1.4999970472760147e-05, "loss": 4.2769, "step": 928 }, { "epoch": 0.3094861330890314, "grad_norm": 0.490234375, "learning_rate": 1.4999968157756293e-05, "loss": 4.3052, "step": 929 }, { "epoch": 0.3098192720912801, "grad_norm": 0.47265625, "learning_rate": 1.4999965755394057e-05, "loss": 4.2903, "step": 930 }, { "epoch": 0.31015241109352876, "grad_norm": 0.46875, "learning_rate": 1.4999963265673464e-05, "loss": 4.2898, "step": 931 }, { "epoch": 0.31048555009577744, "grad_norm": 0.4765625, "learning_rate": 1.4999960688594543e-05, "loss": 4.225, "step": 932 }, { "epoch": 0.3108186890980261, "grad_norm": 0.49609375, "learning_rate": 1.4999958024157326e-05, "loss": 4.218, "step": 933 }, { "epoch": 0.31115182810027486, "grad_norm": 0.48046875, "learning_rate": 1.499995527236184e-05, "loss": 4.2313, "step": 934 }, { "epoch": 0.31148496710252355, "grad_norm": 0.48046875, "learning_rate": 1.4999952433208122e-05, "loss": 4.337, "step": 935 }, { "epoch": 0.31181810610477223, "grad_norm": 0.45703125, "learning_rate": 1.4999949506696203e-05, "loss": 4.2811, "step": 936 }, { "epoch": 0.3121512451070209, "grad_norm": 0.50390625, "learning_rate": 1.4999946492826118e-05, "loss": 4.124, "step": 937 }, { "epoch": 0.3124843841092696, "grad_norm": 0.474609375, "learning_rate": 1.49999433915979e-05, "loss": 4.1859, "step": 938 }, { "epoch": 0.3128175231115183, "grad_norm": 0.5078125, "learning_rate": 1.4999940203011586e-05, "loss": 4.3103, "step": 939 }, { "epoch": 0.31315066211376696, "grad_norm": 0.48828125, "learning_rate": 1.4999936927067214e-05, "loss": 4.2378, "step": 940 }, { "epoch": 0.31348380111601565, "grad_norm": 0.462890625, "learning_rate": 1.4999933563764822e-05, "loss": 4.2724, "step": 941 }, { "epoch": 0.31381694011826433, "grad_norm": 0.48046875, "learning_rate": 1.4999930113104449e-05, "loss": 4.2799, "step": 942 }, { "epoch": 0.314150079120513, "grad_norm": 0.4921875, "learning_rate": 1.4999926575086134e-05, "loss": 4.1502, "step": 943 }, { "epoch": 0.3144832181227617, "grad_norm": 0.46484375, "learning_rate": 1.4999922949709919e-05, "loss": 4.207, "step": 944 }, { "epoch": 0.31481635712501044, "grad_norm": 0.482421875, "learning_rate": 1.4999919236975848e-05, "loss": 4.2744, "step": 945 }, { "epoch": 0.3151494961272591, "grad_norm": 0.484375, "learning_rate": 1.4999915436883961e-05, "loss": 4.2541, "step": 946 }, { "epoch": 0.3154826351295078, "grad_norm": 0.46875, "learning_rate": 1.4999911549434305e-05, "loss": 4.2641, "step": 947 }, { "epoch": 0.3158157741317565, "grad_norm": 0.4921875, "learning_rate": 1.4999907574626925e-05, "loss": 4.1703, "step": 948 }, { "epoch": 0.31614891313400517, "grad_norm": 0.458984375, "learning_rate": 1.4999903512461865e-05, "loss": 4.2674, "step": 949 }, { "epoch": 0.31648205213625386, "grad_norm": 0.48828125, "learning_rate": 1.4999899362939175e-05, "loss": 4.2284, "step": 950 }, { "epoch": 0.31681519113850254, "grad_norm": 0.4609375, "learning_rate": 1.4999895126058902e-05, "loss": 4.2486, "step": 951 }, { "epoch": 0.3171483301407512, "grad_norm": 0.494140625, "learning_rate": 1.4999890801821094e-05, "loss": 4.2751, "step": 952 }, { "epoch": 0.3174814691429999, "grad_norm": 0.46875, "learning_rate": 1.4999886390225806e-05, "loss": 4.2829, "step": 953 }, { "epoch": 0.3178146081452486, "grad_norm": 0.48046875, "learning_rate": 1.4999881891273083e-05, "loss": 4.2313, "step": 954 }, { "epoch": 0.3181477471474973, "grad_norm": 0.484375, "learning_rate": 1.4999877304962983e-05, "loss": 4.2289, "step": 955 }, { "epoch": 0.31848088614974596, "grad_norm": 0.455078125, "learning_rate": 1.4999872631295555e-05, "loss": 4.2191, "step": 956 }, { "epoch": 0.3188140251519947, "grad_norm": 0.47265625, "learning_rate": 1.4999867870270857e-05, "loss": 4.3129, "step": 957 }, { "epoch": 0.3191471641542434, "grad_norm": 0.478515625, "learning_rate": 1.4999863021888943e-05, "loss": 4.2274, "step": 958 }, { "epoch": 0.31948030315649206, "grad_norm": 0.470703125, "learning_rate": 1.499985808614987e-05, "loss": 4.2014, "step": 959 }, { "epoch": 0.31981344215874075, "grad_norm": 0.494140625, "learning_rate": 1.4999853063053693e-05, "loss": 4.1731, "step": 960 }, { "epoch": 0.32014658116098943, "grad_norm": 0.474609375, "learning_rate": 1.4999847952600473e-05, "loss": 4.2103, "step": 961 }, { "epoch": 0.3204797201632381, "grad_norm": 0.486328125, "learning_rate": 1.4999842754790271e-05, "loss": 4.1554, "step": 962 }, { "epoch": 0.3208128591654868, "grad_norm": 0.47265625, "learning_rate": 1.4999837469623143e-05, "loss": 4.2136, "step": 963 }, { "epoch": 0.3211459981677355, "grad_norm": 0.443359375, "learning_rate": 1.4999832097099153e-05, "loss": 4.2433, "step": 964 }, { "epoch": 0.32147913716998416, "grad_norm": 0.50390625, "learning_rate": 1.4999826637218368e-05, "loss": 4.2013, "step": 965 }, { "epoch": 0.32181227617223285, "grad_norm": 0.50390625, "learning_rate": 1.4999821089980843e-05, "loss": 4.3, "step": 966 }, { "epoch": 0.32214541517448153, "grad_norm": 0.474609375, "learning_rate": 1.4999815455386648e-05, "loss": 4.1153, "step": 967 }, { "epoch": 0.3224785541767302, "grad_norm": 0.482421875, "learning_rate": 1.4999809733435847e-05, "loss": 4.1802, "step": 968 }, { "epoch": 0.32281169317897895, "grad_norm": 0.470703125, "learning_rate": 1.4999803924128509e-05, "loss": 4.2231, "step": 969 }, { "epoch": 0.32314483218122764, "grad_norm": 0.49609375, "learning_rate": 1.4999798027464697e-05, "loss": 4.257, "step": 970 }, { "epoch": 0.3234779711834763, "grad_norm": 0.48828125, "learning_rate": 1.4999792043444484e-05, "loss": 4.1606, "step": 971 }, { "epoch": 0.323811110185725, "grad_norm": 0.474609375, "learning_rate": 1.499978597206794e-05, "loss": 4.2199, "step": 972 }, { "epoch": 0.3241442491879737, "grad_norm": 0.4765625, "learning_rate": 1.4999779813335131e-05, "loss": 4.18, "step": 973 }, { "epoch": 0.32447738819022237, "grad_norm": 0.478515625, "learning_rate": 1.4999773567246133e-05, "loss": 4.2176, "step": 974 }, { "epoch": 0.32481052719247105, "grad_norm": 0.458984375, "learning_rate": 1.4999767233801019e-05, "loss": 4.2719, "step": 975 }, { "epoch": 0.32514366619471974, "grad_norm": 0.45703125, "learning_rate": 1.4999760812999857e-05, "loss": 4.2526, "step": 976 }, { "epoch": 0.3254768051969684, "grad_norm": 0.451171875, "learning_rate": 1.4999754304842729e-05, "loss": 4.2411, "step": 977 }, { "epoch": 0.3258099441992171, "grad_norm": 0.48046875, "learning_rate": 1.4999747709329708e-05, "loss": 4.2316, "step": 978 }, { "epoch": 0.3261430832014658, "grad_norm": 0.47265625, "learning_rate": 1.499974102646087e-05, "loss": 4.2534, "step": 979 }, { "epoch": 0.3264762222037145, "grad_norm": 0.48828125, "learning_rate": 1.4999734256236292e-05, "loss": 4.2509, "step": 980 }, { "epoch": 0.3268093612059632, "grad_norm": 0.48828125, "learning_rate": 1.4999727398656055e-05, "loss": 4.233, "step": 981 }, { "epoch": 0.3271425002082119, "grad_norm": 0.482421875, "learning_rate": 1.4999720453720242e-05, "loss": 4.258, "step": 982 }, { "epoch": 0.3274756392104606, "grad_norm": 0.46875, "learning_rate": 1.4999713421428927e-05, "loss": 4.266, "step": 983 }, { "epoch": 0.32780877821270926, "grad_norm": 0.474609375, "learning_rate": 1.4999706301782194e-05, "loss": 4.2252, "step": 984 }, { "epoch": 0.32814191721495795, "grad_norm": 0.474609375, "learning_rate": 1.4999699094780128e-05, "loss": 4.1287, "step": 985 }, { "epoch": 0.32847505621720663, "grad_norm": 0.462890625, "learning_rate": 1.4999691800422811e-05, "loss": 4.2573, "step": 986 }, { "epoch": 0.3288081952194553, "grad_norm": 0.470703125, "learning_rate": 1.499968441871033e-05, "loss": 4.2373, "step": 987 }, { "epoch": 0.329141334221704, "grad_norm": 0.46484375, "learning_rate": 1.4999676949642768e-05, "loss": 4.2208, "step": 988 }, { "epoch": 0.3294744732239527, "grad_norm": 0.46875, "learning_rate": 1.4999669393220216e-05, "loss": 4.3049, "step": 989 }, { "epoch": 0.32980761222620136, "grad_norm": 0.46484375, "learning_rate": 1.499966174944276e-05, "loss": 4.1773, "step": 990 }, { "epoch": 0.33014075122845005, "grad_norm": 0.484375, "learning_rate": 1.4999654018310486e-05, "loss": 4.2567, "step": 991 }, { "epoch": 0.3304738902306988, "grad_norm": 0.482421875, "learning_rate": 1.499964619982349e-05, "loss": 4.1692, "step": 992 }, { "epoch": 0.33080702923294747, "grad_norm": 0.486328125, "learning_rate": 1.4999638293981858e-05, "loss": 4.2435, "step": 993 }, { "epoch": 0.33114016823519615, "grad_norm": 0.54296875, "learning_rate": 1.4999630300785685e-05, "loss": 4.1501, "step": 994 }, { "epoch": 0.33147330723744484, "grad_norm": 0.50390625, "learning_rate": 1.4999622220235063e-05, "loss": 4.2239, "step": 995 }, { "epoch": 0.3318064462396935, "grad_norm": 0.466796875, "learning_rate": 1.4999614052330088e-05, "loss": 4.2267, "step": 996 }, { "epoch": 0.3321395852419422, "grad_norm": 0.474609375, "learning_rate": 1.4999605797070851e-05, "loss": 4.1769, "step": 997 }, { "epoch": 0.3324727242441909, "grad_norm": 0.48828125, "learning_rate": 1.4999597454457452e-05, "loss": 4.1738, "step": 998 }, { "epoch": 0.33280586324643957, "grad_norm": 0.474609375, "learning_rate": 1.4999589024489983e-05, "loss": 4.1648, "step": 999 }, { "epoch": 0.33313900224868825, "grad_norm": 0.48046875, "learning_rate": 1.4999580507168551e-05, "loss": 4.2219, "step": 1000 }, { "epoch": 0.33347214125093694, "grad_norm": 0.490234375, "learning_rate": 1.4999571902493248e-05, "loss": 4.188, "step": 1001 }, { "epoch": 0.3338052802531856, "grad_norm": 0.45703125, "learning_rate": 1.4999563210464177e-05, "loss": 4.2435, "step": 1002 }, { "epoch": 0.3341384192554343, "grad_norm": 0.451171875, "learning_rate": 1.4999554431081437e-05, "loss": 4.2512, "step": 1003 }, { "epoch": 0.33447155825768304, "grad_norm": 0.48828125, "learning_rate": 1.4999545564345134e-05, "loss": 4.2208, "step": 1004 }, { "epoch": 0.3348046972599317, "grad_norm": 0.490234375, "learning_rate": 1.4999536610255367e-05, "loss": 4.2319, "step": 1005 }, { "epoch": 0.3351378362621804, "grad_norm": 0.470703125, "learning_rate": 1.4999527568812243e-05, "loss": 4.2001, "step": 1006 }, { "epoch": 0.3354709752644291, "grad_norm": 0.48828125, "learning_rate": 1.4999518440015868e-05, "loss": 4.1937, "step": 1007 }, { "epoch": 0.3358041142666778, "grad_norm": 0.51953125, "learning_rate": 1.4999509223866348e-05, "loss": 4.2385, "step": 1008 }, { "epoch": 0.33613725326892646, "grad_norm": 0.5, "learning_rate": 1.4999499920363786e-05, "loss": 4.1849, "step": 1009 }, { "epoch": 0.33647039227117514, "grad_norm": 0.470703125, "learning_rate": 1.4999490529508297e-05, "loss": 4.195, "step": 1010 }, { "epoch": 0.33680353127342383, "grad_norm": 0.47265625, "learning_rate": 1.4999481051299987e-05, "loss": 4.2281, "step": 1011 }, { "epoch": 0.3371366702756725, "grad_norm": 0.462890625, "learning_rate": 1.4999471485738965e-05, "loss": 4.2712, "step": 1012 }, { "epoch": 0.3374698092779212, "grad_norm": 0.48828125, "learning_rate": 1.4999461832825344e-05, "loss": 4.284, "step": 1013 }, { "epoch": 0.3378029482801699, "grad_norm": 0.498046875, "learning_rate": 1.4999452092559237e-05, "loss": 4.2475, "step": 1014 }, { "epoch": 0.33813608728241856, "grad_norm": 0.5, "learning_rate": 1.4999442264940756e-05, "loss": 4.2213, "step": 1015 }, { "epoch": 0.3384692262846673, "grad_norm": 0.44921875, "learning_rate": 1.4999432349970019e-05, "loss": 4.2459, "step": 1016 }, { "epoch": 0.338802365286916, "grad_norm": 0.4921875, "learning_rate": 1.4999422347647135e-05, "loss": 4.2155, "step": 1017 }, { "epoch": 0.33913550428916467, "grad_norm": 0.486328125, "learning_rate": 1.499941225797223e-05, "loss": 4.217, "step": 1018 }, { "epoch": 0.33946864329141335, "grad_norm": 0.48828125, "learning_rate": 1.4999402080945412e-05, "loss": 4.2145, "step": 1019 }, { "epoch": 0.33980178229366204, "grad_norm": 0.490234375, "learning_rate": 1.4999391816566801e-05, "loss": 4.1889, "step": 1020 }, { "epoch": 0.3401349212959107, "grad_norm": 0.4609375, "learning_rate": 1.4999381464836523e-05, "loss": 4.271, "step": 1021 }, { "epoch": 0.3404680602981594, "grad_norm": 0.4609375, "learning_rate": 1.4999371025754694e-05, "loss": 4.2309, "step": 1022 }, { "epoch": 0.3408011993004081, "grad_norm": 0.4453125, "learning_rate": 1.4999360499321434e-05, "loss": 4.2785, "step": 1023 }, { "epoch": 0.34113433830265677, "grad_norm": 0.51171875, "learning_rate": 1.499934988553687e-05, "loss": 4.2208, "step": 1024 }, { "epoch": 0.34146747730490545, "grad_norm": 0.474609375, "learning_rate": 1.4999339184401125e-05, "loss": 4.2947, "step": 1025 }, { "epoch": 0.34180061630715414, "grad_norm": 0.50390625, "learning_rate": 1.499932839591432e-05, "loss": 4.1945, "step": 1026 }, { "epoch": 0.3421337553094029, "grad_norm": 0.45703125, "learning_rate": 1.4999317520076582e-05, "loss": 4.2963, "step": 1027 }, { "epoch": 0.34246689431165156, "grad_norm": 0.4921875, "learning_rate": 1.4999306556888039e-05, "loss": 4.2378, "step": 1028 }, { "epoch": 0.34280003331390024, "grad_norm": 0.498046875, "learning_rate": 1.4999295506348818e-05, "loss": 4.2062, "step": 1029 }, { "epoch": 0.3431331723161489, "grad_norm": 0.462890625, "learning_rate": 1.4999284368459048e-05, "loss": 4.2572, "step": 1030 }, { "epoch": 0.3434663113183976, "grad_norm": 0.4765625, "learning_rate": 1.4999273143218858e-05, "loss": 4.2259, "step": 1031 }, { "epoch": 0.3437994503206463, "grad_norm": 0.482421875, "learning_rate": 1.499926183062838e-05, "loss": 4.1854, "step": 1032 }, { "epoch": 0.344132589322895, "grad_norm": 0.498046875, "learning_rate": 1.4999250430687745e-05, "loss": 4.2061, "step": 1033 }, { "epoch": 0.34446572832514366, "grad_norm": 0.48046875, "learning_rate": 1.4999238943397086e-05, "loss": 4.186, "step": 1034 }, { "epoch": 0.34479886732739234, "grad_norm": 0.474609375, "learning_rate": 1.4999227368756536e-05, "loss": 4.1929, "step": 1035 }, { "epoch": 0.34513200632964103, "grad_norm": 0.47265625, "learning_rate": 1.4999215706766229e-05, "loss": 4.2145, "step": 1036 }, { "epoch": 0.3454651453318897, "grad_norm": 0.46484375, "learning_rate": 1.4999203957426306e-05, "loss": 4.2759, "step": 1037 }, { "epoch": 0.3457982843341384, "grad_norm": 0.5078125, "learning_rate": 1.4999192120736896e-05, "loss": 4.2102, "step": 1038 }, { "epoch": 0.34613142333638713, "grad_norm": 0.49609375, "learning_rate": 1.4999180196698144e-05, "loss": 4.197, "step": 1039 }, { "epoch": 0.3464645623386358, "grad_norm": 0.494140625, "learning_rate": 1.4999168185310186e-05, "loss": 4.1466, "step": 1040 }, { "epoch": 0.3467977013408845, "grad_norm": 0.50390625, "learning_rate": 1.499915608657316e-05, "loss": 4.2471, "step": 1041 }, { "epoch": 0.3471308403431332, "grad_norm": 0.49609375, "learning_rate": 1.499914390048721e-05, "loss": 4.2297, "step": 1042 }, { "epoch": 0.34746397934538187, "grad_norm": 0.5, "learning_rate": 1.4999131627052475e-05, "loss": 4.2363, "step": 1043 }, { "epoch": 0.34779711834763055, "grad_norm": 0.498046875, "learning_rate": 1.4999119266269102e-05, "loss": 4.2198, "step": 1044 }, { "epoch": 0.34813025734987924, "grad_norm": 0.52734375, "learning_rate": 1.4999106818137231e-05, "loss": 4.2372, "step": 1045 }, { "epoch": 0.3484633963521279, "grad_norm": 0.5078125, "learning_rate": 1.4999094282657009e-05, "loss": 4.2776, "step": 1046 }, { "epoch": 0.3487965353543766, "grad_norm": 0.5, "learning_rate": 1.4999081659828581e-05, "loss": 4.2655, "step": 1047 }, { "epoch": 0.3491296743566253, "grad_norm": 0.50390625, "learning_rate": 1.4999068949652094e-05, "loss": 4.192, "step": 1048 }, { "epoch": 0.34946281335887397, "grad_norm": 0.498046875, "learning_rate": 1.49990561521277e-05, "loss": 4.271, "step": 1049 }, { "epoch": 0.34979595236112265, "grad_norm": 0.48046875, "learning_rate": 1.4999043267255542e-05, "loss": 4.234, "step": 1050 }, { "epoch": 0.3501290913633714, "grad_norm": 0.5, "learning_rate": 1.4999030295035774e-05, "loss": 4.2234, "step": 1051 }, { "epoch": 0.3504622303656201, "grad_norm": 0.466796875, "learning_rate": 1.4999017235468545e-05, "loss": 4.1731, "step": 1052 }, { "epoch": 0.35079536936786876, "grad_norm": 0.46875, "learning_rate": 1.4999004088554009e-05, "loss": 4.2459, "step": 1053 }, { "epoch": 0.35112850837011744, "grad_norm": 0.46484375, "learning_rate": 1.4998990854292319e-05, "loss": 4.239, "step": 1054 }, { "epoch": 0.3514616473723661, "grad_norm": 0.470703125, "learning_rate": 1.4998977532683627e-05, "loss": 4.2385, "step": 1055 }, { "epoch": 0.3517947863746148, "grad_norm": 0.498046875, "learning_rate": 1.499896412372809e-05, "loss": 4.2022, "step": 1056 }, { "epoch": 0.3521279253768635, "grad_norm": 0.50390625, "learning_rate": 1.499895062742586e-05, "loss": 4.218, "step": 1057 }, { "epoch": 0.3524610643791122, "grad_norm": 0.54296875, "learning_rate": 1.4998937043777105e-05, "loss": 4.2131, "step": 1058 }, { "epoch": 0.35279420338136086, "grad_norm": 0.49609375, "learning_rate": 1.4998923372781969e-05, "loss": 4.2319, "step": 1059 }, { "epoch": 0.35312734238360954, "grad_norm": 0.455078125, "learning_rate": 1.4998909614440623e-05, "loss": 4.2645, "step": 1060 }, { "epoch": 0.3534604813858582, "grad_norm": 0.50390625, "learning_rate": 1.4998895768753221e-05, "loss": 4.1576, "step": 1061 }, { "epoch": 0.3537936203881069, "grad_norm": 0.5, "learning_rate": 1.4998881835719927e-05, "loss": 4.1958, "step": 1062 }, { "epoch": 0.35412675939035565, "grad_norm": 0.5, "learning_rate": 1.4998867815340902e-05, "loss": 4.1425, "step": 1063 }, { "epoch": 0.35445989839260433, "grad_norm": 0.486328125, "learning_rate": 1.499885370761631e-05, "loss": 4.2318, "step": 1064 }, { "epoch": 0.354793037394853, "grad_norm": 0.47265625, "learning_rate": 1.499883951254631e-05, "loss": 4.1758, "step": 1065 }, { "epoch": 0.3551261763971017, "grad_norm": 0.474609375, "learning_rate": 1.4998825230131077e-05, "loss": 4.2366, "step": 1066 }, { "epoch": 0.3554593153993504, "grad_norm": 0.5078125, "learning_rate": 1.4998810860370772e-05, "loss": 4.1909, "step": 1067 }, { "epoch": 0.35579245440159907, "grad_norm": 0.498046875, "learning_rate": 1.4998796403265563e-05, "loss": 4.2319, "step": 1068 }, { "epoch": 0.35612559340384775, "grad_norm": 0.484375, "learning_rate": 1.4998781858815617e-05, "loss": 4.2055, "step": 1069 }, { "epoch": 0.35645873240609643, "grad_norm": 0.474609375, "learning_rate": 1.4998767227021106e-05, "loss": 4.1713, "step": 1070 }, { "epoch": 0.3567918714083451, "grad_norm": 0.482421875, "learning_rate": 1.4998752507882197e-05, "loss": 4.2074, "step": 1071 }, { "epoch": 0.3571250104105938, "grad_norm": 0.5078125, "learning_rate": 1.4998737701399065e-05, "loss": 4.2052, "step": 1072 }, { "epoch": 0.3574581494128425, "grad_norm": 0.458984375, "learning_rate": 1.4998722807571879e-05, "loss": 4.2172, "step": 1073 }, { "epoch": 0.3577912884150912, "grad_norm": 0.48046875, "learning_rate": 1.4998707826400815e-05, "loss": 4.2505, "step": 1074 }, { "epoch": 0.3581244274173399, "grad_norm": 0.46875, "learning_rate": 1.499869275788605e-05, "loss": 4.2247, "step": 1075 }, { "epoch": 0.3584575664195886, "grad_norm": 0.484375, "learning_rate": 1.4998677602027756e-05, "loss": 4.1898, "step": 1076 }, { "epoch": 0.3587907054218373, "grad_norm": 0.51171875, "learning_rate": 1.4998662358826108e-05, "loss": 4.2053, "step": 1077 }, { "epoch": 0.35912384442408596, "grad_norm": 0.46484375, "learning_rate": 1.4998647028281288e-05, "loss": 4.1799, "step": 1078 }, { "epoch": 0.35945698342633464, "grad_norm": 0.46484375, "learning_rate": 1.4998631610393467e-05, "loss": 4.2156, "step": 1079 }, { "epoch": 0.3597901224285833, "grad_norm": 0.486328125, "learning_rate": 1.4998616105162835e-05, "loss": 4.2345, "step": 1080 }, { "epoch": 0.360123261430832, "grad_norm": 0.490234375, "learning_rate": 1.4998600512589565e-05, "loss": 4.2325, "step": 1081 }, { "epoch": 0.3604564004330807, "grad_norm": 0.482421875, "learning_rate": 1.4998584832673843e-05, "loss": 4.2507, "step": 1082 }, { "epoch": 0.3607895394353294, "grad_norm": 0.486328125, "learning_rate": 1.4998569065415847e-05, "loss": 4.1397, "step": 1083 }, { "epoch": 0.36112267843757806, "grad_norm": 0.46875, "learning_rate": 1.4998553210815764e-05, "loss": 4.2195, "step": 1084 }, { "epoch": 0.36145581743982674, "grad_norm": 0.486328125, "learning_rate": 1.4998537268873779e-05, "loss": 4.1462, "step": 1085 }, { "epoch": 0.3617889564420755, "grad_norm": 0.462890625, "learning_rate": 1.4998521239590075e-05, "loss": 4.2086, "step": 1086 }, { "epoch": 0.36212209544432417, "grad_norm": 0.498046875, "learning_rate": 1.4998505122964843e-05, "loss": 4.1922, "step": 1087 }, { "epoch": 0.36245523444657285, "grad_norm": 0.4921875, "learning_rate": 1.4998488918998267e-05, "loss": 4.1549, "step": 1088 }, { "epoch": 0.36278837344882153, "grad_norm": 0.4921875, "learning_rate": 1.4998472627690536e-05, "loss": 4.2492, "step": 1089 }, { "epoch": 0.3631215124510702, "grad_norm": 0.458984375, "learning_rate": 1.499845624904184e-05, "loss": 4.1977, "step": 1090 }, { "epoch": 0.3634546514533189, "grad_norm": 0.462890625, "learning_rate": 1.4998439783052371e-05, "loss": 4.3306, "step": 1091 }, { "epoch": 0.3637877904555676, "grad_norm": 0.5, "learning_rate": 1.499842322972232e-05, "loss": 4.194, "step": 1092 }, { "epoch": 0.36412092945781627, "grad_norm": 0.51171875, "learning_rate": 1.499840658905188e-05, "loss": 4.2357, "step": 1093 }, { "epoch": 0.36445406846006495, "grad_norm": 0.52734375, "learning_rate": 1.4998389861041244e-05, "loss": 4.209, "step": 1094 }, { "epoch": 0.36478720746231363, "grad_norm": 0.46484375, "learning_rate": 1.4998373045690609e-05, "loss": 4.224, "step": 1095 }, { "epoch": 0.3651203464645623, "grad_norm": 0.466796875, "learning_rate": 1.499835614300017e-05, "loss": 4.2185, "step": 1096 }, { "epoch": 0.365453485466811, "grad_norm": 0.47265625, "learning_rate": 1.4998339152970122e-05, "loss": 4.28, "step": 1097 }, { "epoch": 0.36578662446905974, "grad_norm": 0.51953125, "learning_rate": 1.4998322075600664e-05, "loss": 4.1903, "step": 1098 }, { "epoch": 0.3661197634713084, "grad_norm": 0.4765625, "learning_rate": 1.4998304910891995e-05, "loss": 4.2376, "step": 1099 }, { "epoch": 0.3664529024735571, "grad_norm": 0.494140625, "learning_rate": 1.4998287658844316e-05, "loss": 4.2292, "step": 1100 }, { "epoch": 0.3667860414758058, "grad_norm": 0.5078125, "learning_rate": 1.4998270319457829e-05, "loss": 4.2246, "step": 1101 }, { "epoch": 0.3671191804780545, "grad_norm": 0.462890625, "learning_rate": 1.499825289273273e-05, "loss": 4.2304, "step": 1102 }, { "epoch": 0.36745231948030316, "grad_norm": 0.484375, "learning_rate": 1.4998235378669229e-05, "loss": 4.2533, "step": 1103 }, { "epoch": 0.36778545848255184, "grad_norm": 0.470703125, "learning_rate": 1.4998217777267525e-05, "loss": 4.1744, "step": 1104 }, { "epoch": 0.3681185974848005, "grad_norm": 0.5, "learning_rate": 1.4998200088527826e-05, "loss": 4.1544, "step": 1105 }, { "epoch": 0.3684517364870492, "grad_norm": 0.482421875, "learning_rate": 1.4998182312450338e-05, "loss": 4.1953, "step": 1106 }, { "epoch": 0.3687848754892979, "grad_norm": 0.4765625, "learning_rate": 1.4998164449035265e-05, "loss": 4.2176, "step": 1107 }, { "epoch": 0.3691180144915466, "grad_norm": 0.484375, "learning_rate": 1.4998146498282819e-05, "loss": 4.2447, "step": 1108 }, { "epoch": 0.36945115349379526, "grad_norm": 0.462890625, "learning_rate": 1.4998128460193205e-05, "loss": 4.2979, "step": 1109 }, { "epoch": 0.369784292496044, "grad_norm": 0.462890625, "learning_rate": 1.4998110334766637e-05, "loss": 4.2485, "step": 1110 }, { "epoch": 0.3701174314982927, "grad_norm": 0.484375, "learning_rate": 1.4998092122003325e-05, "loss": 4.2254, "step": 1111 }, { "epoch": 0.37045057050054137, "grad_norm": 0.4609375, "learning_rate": 1.499807382190348e-05, "loss": 4.2725, "step": 1112 }, { "epoch": 0.37078370950279005, "grad_norm": 0.47265625, "learning_rate": 1.4998055434467314e-05, "loss": 4.1829, "step": 1113 }, { "epoch": 0.37111684850503873, "grad_norm": 0.470703125, "learning_rate": 1.4998036959695046e-05, "loss": 4.2044, "step": 1114 }, { "epoch": 0.3714499875072874, "grad_norm": 0.482421875, "learning_rate": 1.4998018397586886e-05, "loss": 4.1831, "step": 1115 }, { "epoch": 0.3717831265095361, "grad_norm": 0.46875, "learning_rate": 1.4997999748143052e-05, "loss": 4.2676, "step": 1116 }, { "epoch": 0.3721162655117848, "grad_norm": 0.46875, "learning_rate": 1.4997981011363764e-05, "loss": 4.2536, "step": 1117 }, { "epoch": 0.37244940451403347, "grad_norm": 0.47265625, "learning_rate": 1.4997962187249237e-05, "loss": 4.1811, "step": 1118 }, { "epoch": 0.37278254351628215, "grad_norm": 0.48046875, "learning_rate": 1.4997943275799691e-05, "loss": 4.2328, "step": 1119 }, { "epoch": 0.37311568251853083, "grad_norm": 0.482421875, "learning_rate": 1.4997924277015344e-05, "loss": 4.2224, "step": 1120 }, { "epoch": 0.3734488215207796, "grad_norm": 0.447265625, "learning_rate": 1.4997905190896423e-05, "loss": 4.2196, "step": 1121 }, { "epoch": 0.37378196052302826, "grad_norm": 0.490234375, "learning_rate": 1.4997886017443144e-05, "loss": 4.1187, "step": 1122 }, { "epoch": 0.37411509952527694, "grad_norm": 0.474609375, "learning_rate": 1.4997866756655736e-05, "loss": 4.215, "step": 1123 }, { "epoch": 0.3744482385275256, "grad_norm": 0.5, "learning_rate": 1.4997847408534418e-05, "loss": 4.2277, "step": 1124 }, { "epoch": 0.3747813775297743, "grad_norm": 0.48046875, "learning_rate": 1.499782797307942e-05, "loss": 4.2302, "step": 1125 }, { "epoch": 0.375114516532023, "grad_norm": 0.466796875, "learning_rate": 1.4997808450290966e-05, "loss": 4.2371, "step": 1126 }, { "epoch": 0.3754476555342717, "grad_norm": 0.482421875, "learning_rate": 1.4997788840169283e-05, "loss": 4.2009, "step": 1127 }, { "epoch": 0.37578079453652036, "grad_norm": 0.490234375, "learning_rate": 1.4997769142714602e-05, "loss": 4.1782, "step": 1128 }, { "epoch": 0.37611393353876904, "grad_norm": 0.44921875, "learning_rate": 1.4997749357927148e-05, "loss": 4.206, "step": 1129 }, { "epoch": 0.3764470725410177, "grad_norm": 0.478515625, "learning_rate": 1.4997729485807156e-05, "loss": 4.1941, "step": 1130 }, { "epoch": 0.3767802115432664, "grad_norm": 0.4765625, "learning_rate": 1.4997709526354856e-05, "loss": 4.2471, "step": 1131 }, { "epoch": 0.3771133505455151, "grad_norm": 0.470703125, "learning_rate": 1.4997689479570478e-05, "loss": 4.2314, "step": 1132 }, { "epoch": 0.37744648954776383, "grad_norm": 0.484375, "learning_rate": 1.4997669345454258e-05, "loss": 4.1759, "step": 1133 }, { "epoch": 0.3777796285500125, "grad_norm": 0.490234375, "learning_rate": 1.499764912400643e-05, "loss": 4.1717, "step": 1134 }, { "epoch": 0.3781127675522612, "grad_norm": 0.46484375, "learning_rate": 1.4997628815227233e-05, "loss": 4.2073, "step": 1135 }, { "epoch": 0.3784459065545099, "grad_norm": 0.466796875, "learning_rate": 1.4997608419116895e-05, "loss": 4.2783, "step": 1136 }, { "epoch": 0.37877904555675856, "grad_norm": 0.458984375, "learning_rate": 1.4997587935675661e-05, "loss": 4.2004, "step": 1137 }, { "epoch": 0.37911218455900725, "grad_norm": 0.484375, "learning_rate": 1.4997567364903769e-05, "loss": 4.1848, "step": 1138 }, { "epoch": 0.37944532356125593, "grad_norm": 0.5, "learning_rate": 1.4997546706801453e-05, "loss": 4.1358, "step": 1139 }, { "epoch": 0.3797784625635046, "grad_norm": 0.4765625, "learning_rate": 1.4997525961368961e-05, "loss": 4.1995, "step": 1140 }, { "epoch": 0.3801116015657533, "grad_norm": 0.484375, "learning_rate": 1.499750512860653e-05, "loss": 4.1596, "step": 1141 }, { "epoch": 0.380444740568002, "grad_norm": 0.4609375, "learning_rate": 1.4997484208514405e-05, "loss": 4.2426, "step": 1142 }, { "epoch": 0.38077787957025067, "grad_norm": 0.4921875, "learning_rate": 1.4997463201092825e-05, "loss": 4.2508, "step": 1143 }, { "epoch": 0.38111101857249935, "grad_norm": 0.48828125, "learning_rate": 1.4997442106342044e-05, "loss": 4.1926, "step": 1144 }, { "epoch": 0.3814441575747481, "grad_norm": 0.474609375, "learning_rate": 1.4997420924262295e-05, "loss": 4.176, "step": 1145 }, { "epoch": 0.38177729657699677, "grad_norm": 0.4765625, "learning_rate": 1.4997399654853837e-05, "loss": 4.2373, "step": 1146 }, { "epoch": 0.38211043557924546, "grad_norm": 0.45703125, "learning_rate": 1.499737829811691e-05, "loss": 4.2306, "step": 1147 }, { "epoch": 0.38244357458149414, "grad_norm": 0.5390625, "learning_rate": 1.4997356854051767e-05, "loss": 4.1961, "step": 1148 }, { "epoch": 0.3827767135837428, "grad_norm": 0.4765625, "learning_rate": 1.4997335322658652e-05, "loss": 4.195, "step": 1149 }, { "epoch": 0.3831098525859915, "grad_norm": 0.498046875, "learning_rate": 1.4997313703937824e-05, "loss": 4.221, "step": 1150 }, { "epoch": 0.3834429915882402, "grad_norm": 0.48828125, "learning_rate": 1.4997291997889529e-05, "loss": 4.1994, "step": 1151 }, { "epoch": 0.3837761305904889, "grad_norm": 0.474609375, "learning_rate": 1.4997270204514019e-05, "loss": 4.1338, "step": 1152 }, { "epoch": 0.38410926959273756, "grad_norm": 0.466796875, "learning_rate": 1.4997248323811552e-05, "loss": 4.1963, "step": 1153 }, { "epoch": 0.38444240859498624, "grad_norm": 0.478515625, "learning_rate": 1.4997226355782382e-05, "loss": 4.1181, "step": 1154 }, { "epoch": 0.3847755475972349, "grad_norm": 0.484375, "learning_rate": 1.4997204300426764e-05, "loss": 4.189, "step": 1155 }, { "epoch": 0.3851086865994836, "grad_norm": 0.5, "learning_rate": 1.4997182157744952e-05, "loss": 4.1763, "step": 1156 }, { "epoch": 0.38544182560173235, "grad_norm": 0.494140625, "learning_rate": 1.4997159927737209e-05, "loss": 4.1872, "step": 1157 }, { "epoch": 0.38577496460398103, "grad_norm": 0.5, "learning_rate": 1.499713761040379e-05, "loss": 4.1972, "step": 1158 }, { "epoch": 0.3861081036062297, "grad_norm": 0.494140625, "learning_rate": 1.4997115205744959e-05, "loss": 4.179, "step": 1159 }, { "epoch": 0.3864412426084784, "grad_norm": 0.5, "learning_rate": 1.4997092713760972e-05, "loss": 4.1739, "step": 1160 }, { "epoch": 0.3867743816107271, "grad_norm": 0.455078125, "learning_rate": 1.4997070134452096e-05, "loss": 4.1903, "step": 1161 }, { "epoch": 0.38710752061297576, "grad_norm": 0.458984375, "learning_rate": 1.499704746781859e-05, "loss": 4.2709, "step": 1162 }, { "epoch": 0.38744065961522445, "grad_norm": 0.494140625, "learning_rate": 1.4997024713860723e-05, "loss": 4.2006, "step": 1163 }, { "epoch": 0.38777379861747313, "grad_norm": 0.515625, "learning_rate": 1.4997001872578753e-05, "loss": 4.2067, "step": 1164 }, { "epoch": 0.3881069376197218, "grad_norm": 0.470703125, "learning_rate": 1.499697894397295e-05, "loss": 4.1712, "step": 1165 }, { "epoch": 0.3884400766219705, "grad_norm": 0.5234375, "learning_rate": 1.4996955928043579e-05, "loss": 4.193, "step": 1166 }, { "epoch": 0.3887732156242192, "grad_norm": 0.50390625, "learning_rate": 1.4996932824790912e-05, "loss": 4.2624, "step": 1167 }, { "epoch": 0.3891063546264679, "grad_norm": 0.466796875, "learning_rate": 1.4996909634215218e-05, "loss": 4.1637, "step": 1168 }, { "epoch": 0.3894394936287166, "grad_norm": 0.474609375, "learning_rate": 1.4996886356316761e-05, "loss": 4.1511, "step": 1169 }, { "epoch": 0.3897726326309653, "grad_norm": 0.48828125, "learning_rate": 1.499686299109582e-05, "loss": 4.1631, "step": 1170 }, { "epoch": 0.39010577163321397, "grad_norm": 0.53515625, "learning_rate": 1.4996839538552662e-05, "loss": 4.183, "step": 1171 }, { "epoch": 0.39043891063546265, "grad_norm": 0.490234375, "learning_rate": 1.4996815998687559e-05, "loss": 4.1785, "step": 1172 }, { "epoch": 0.39077204963771134, "grad_norm": 0.484375, "learning_rate": 1.499679237150079e-05, "loss": 4.1305, "step": 1173 }, { "epoch": 0.39110518863996, "grad_norm": 0.474609375, "learning_rate": 1.4996768656992626e-05, "loss": 4.1828, "step": 1174 }, { "epoch": 0.3914383276422087, "grad_norm": 0.498046875, "learning_rate": 1.4996744855163349e-05, "loss": 4.2753, "step": 1175 }, { "epoch": 0.3917714666444574, "grad_norm": 0.49609375, "learning_rate": 1.4996720966013229e-05, "loss": 4.2147, "step": 1176 }, { "epoch": 0.3921046056467061, "grad_norm": 0.4921875, "learning_rate": 1.4996696989542548e-05, "loss": 4.1286, "step": 1177 }, { "epoch": 0.39243774464895476, "grad_norm": 0.498046875, "learning_rate": 1.4996672925751585e-05, "loss": 4.1938, "step": 1178 }, { "epoch": 0.39277088365120344, "grad_norm": 0.486328125, "learning_rate": 1.4996648774640622e-05, "loss": 4.2006, "step": 1179 }, { "epoch": 0.3931040226534522, "grad_norm": 0.5078125, "learning_rate": 1.4996624536209937e-05, "loss": 4.1445, "step": 1180 }, { "epoch": 0.39343716165570086, "grad_norm": 0.478515625, "learning_rate": 1.4996600210459815e-05, "loss": 4.2031, "step": 1181 }, { "epoch": 0.39377030065794955, "grad_norm": 0.470703125, "learning_rate": 1.4996575797390536e-05, "loss": 4.1755, "step": 1182 }, { "epoch": 0.39410343966019823, "grad_norm": 0.48046875, "learning_rate": 1.4996551297002388e-05, "loss": 4.2311, "step": 1183 }, { "epoch": 0.3944365786624469, "grad_norm": 0.484375, "learning_rate": 1.4996526709295655e-05, "loss": 4.1549, "step": 1184 }, { "epoch": 0.3947697176646956, "grad_norm": 0.49609375, "learning_rate": 1.4996502034270623e-05, "loss": 4.2201, "step": 1185 }, { "epoch": 0.3951028566669443, "grad_norm": 0.484375, "learning_rate": 1.499647727192758e-05, "loss": 4.1962, "step": 1186 }, { "epoch": 0.39543599566919296, "grad_norm": 0.466796875, "learning_rate": 1.4996452422266812e-05, "loss": 4.2489, "step": 1187 }, { "epoch": 0.39576913467144165, "grad_norm": 0.498046875, "learning_rate": 1.4996427485288613e-05, "loss": 4.1961, "step": 1188 }, { "epoch": 0.39610227367369033, "grad_norm": 0.49609375, "learning_rate": 1.499640246099327e-05, "loss": 4.1657, "step": 1189 }, { "epoch": 0.396435412675939, "grad_norm": 0.474609375, "learning_rate": 1.4996377349381077e-05, "loss": 4.1838, "step": 1190 }, { "epoch": 0.3967685516781877, "grad_norm": 0.4609375, "learning_rate": 1.4996352150452323e-05, "loss": 4.2396, "step": 1191 }, { "epoch": 0.39710169068043644, "grad_norm": 0.466796875, "learning_rate": 1.4996326864207304e-05, "loss": 4.179, "step": 1192 }, { "epoch": 0.3974348296826851, "grad_norm": 0.46484375, "learning_rate": 1.4996301490646315e-05, "loss": 4.2225, "step": 1193 }, { "epoch": 0.3977679686849338, "grad_norm": 0.46484375, "learning_rate": 1.4996276029769649e-05, "loss": 4.2294, "step": 1194 }, { "epoch": 0.3981011076871825, "grad_norm": 0.466796875, "learning_rate": 1.4996250481577605e-05, "loss": 4.1785, "step": 1195 }, { "epoch": 0.39843424668943117, "grad_norm": 0.49609375, "learning_rate": 1.499622484607048e-05, "loss": 4.1903, "step": 1196 }, { "epoch": 0.39876738569167985, "grad_norm": 0.462890625, "learning_rate": 1.4996199123248572e-05, "loss": 4.2665, "step": 1197 }, { "epoch": 0.39910052469392854, "grad_norm": 0.478515625, "learning_rate": 1.4996173313112183e-05, "loss": 4.1762, "step": 1198 }, { "epoch": 0.3994336636961772, "grad_norm": 0.46484375, "learning_rate": 1.4996147415661609e-05, "loss": 4.1756, "step": 1199 }, { "epoch": 0.3997668026984259, "grad_norm": 0.46875, "learning_rate": 1.4996121430897156e-05, "loss": 4.1691, "step": 1200 }, { "epoch": 0.4000999417006746, "grad_norm": 0.4921875, "learning_rate": 1.4996095358819121e-05, "loss": 4.238, "step": 1201 }, { "epoch": 0.40043308070292327, "grad_norm": 0.482421875, "learning_rate": 1.4996069199427816e-05, "loss": 4.219, "step": 1202 }, { "epoch": 0.40076621970517196, "grad_norm": 0.4921875, "learning_rate": 1.4996042952723538e-05, "loss": 4.1907, "step": 1203 }, { "epoch": 0.4010993587074207, "grad_norm": 0.482421875, "learning_rate": 1.4996016618706599e-05, "loss": 4.1565, "step": 1204 }, { "epoch": 0.4014324977096694, "grad_norm": 0.4453125, "learning_rate": 1.49959901973773e-05, "loss": 4.2773, "step": 1205 }, { "epoch": 0.40176563671191806, "grad_norm": 0.4765625, "learning_rate": 1.4995963688735956e-05, "loss": 4.2032, "step": 1206 }, { "epoch": 0.40209877571416675, "grad_norm": 0.494140625, "learning_rate": 1.4995937092782867e-05, "loss": 4.2699, "step": 1207 }, { "epoch": 0.40243191471641543, "grad_norm": 0.48046875, "learning_rate": 1.4995910409518347e-05, "loss": 4.1618, "step": 1208 }, { "epoch": 0.4027650537186641, "grad_norm": 0.484375, "learning_rate": 1.4995883638942709e-05, "loss": 4.2215, "step": 1209 }, { "epoch": 0.4030981927209128, "grad_norm": 0.46875, "learning_rate": 1.4995856781056263e-05, "loss": 4.2469, "step": 1210 }, { "epoch": 0.4034313317231615, "grad_norm": 0.494140625, "learning_rate": 1.499582983585932e-05, "loss": 4.1672, "step": 1211 }, { "epoch": 0.40376447072541016, "grad_norm": 0.47265625, "learning_rate": 1.4995802803352195e-05, "loss": 4.2303, "step": 1212 }, { "epoch": 0.40409760972765885, "grad_norm": 0.46875, "learning_rate": 1.4995775683535205e-05, "loss": 4.2313, "step": 1213 }, { "epoch": 0.40443074872990753, "grad_norm": 0.494140625, "learning_rate": 1.4995748476408662e-05, "loss": 4.1651, "step": 1214 }, { "epoch": 0.40476388773215627, "grad_norm": 0.482421875, "learning_rate": 1.4995721181972884e-05, "loss": 4.2153, "step": 1215 }, { "epoch": 0.40509702673440495, "grad_norm": 0.4765625, "learning_rate": 1.4995693800228192e-05, "loss": 4.1852, "step": 1216 }, { "epoch": 0.40543016573665364, "grad_norm": 0.478515625, "learning_rate": 1.4995666331174905e-05, "loss": 4.2051, "step": 1217 }, { "epoch": 0.4057633047389023, "grad_norm": 0.4765625, "learning_rate": 1.4995638774813339e-05, "loss": 4.2406, "step": 1218 }, { "epoch": 0.406096443741151, "grad_norm": 0.466796875, "learning_rate": 1.4995611131143815e-05, "loss": 4.2132, "step": 1219 }, { "epoch": 0.4064295827433997, "grad_norm": 0.474609375, "learning_rate": 1.4995583400166658e-05, "loss": 4.2276, "step": 1220 }, { "epoch": 0.40676272174564837, "grad_norm": 0.482421875, "learning_rate": 1.4995555581882191e-05, "loss": 4.1431, "step": 1221 }, { "epoch": 0.40709586074789705, "grad_norm": 0.486328125, "learning_rate": 1.4995527676290735e-05, "loss": 4.2674, "step": 1222 }, { "epoch": 0.40742899975014574, "grad_norm": 0.484375, "learning_rate": 1.4995499683392617e-05, "loss": 4.1541, "step": 1223 }, { "epoch": 0.4077621387523944, "grad_norm": 0.458984375, "learning_rate": 1.4995471603188165e-05, "loss": 4.1564, "step": 1224 }, { "epoch": 0.4080952777546431, "grad_norm": 0.482421875, "learning_rate": 1.4995443435677701e-05, "loss": 4.1561, "step": 1225 }, { "epoch": 0.4084284167568918, "grad_norm": 0.46875, "learning_rate": 1.4995415180861558e-05, "loss": 4.1964, "step": 1226 }, { "epoch": 0.4087615557591405, "grad_norm": 0.474609375, "learning_rate": 1.4995386838740065e-05, "loss": 4.1521, "step": 1227 }, { "epoch": 0.4090946947613892, "grad_norm": 0.46484375, "learning_rate": 1.4995358409313548e-05, "loss": 4.1402, "step": 1228 }, { "epoch": 0.4094278337636379, "grad_norm": 0.486328125, "learning_rate": 1.4995329892582338e-05, "loss": 4.1419, "step": 1229 }, { "epoch": 0.4097609727658866, "grad_norm": 0.458984375, "learning_rate": 1.4995301288546774e-05, "loss": 4.232, "step": 1230 }, { "epoch": 0.41009411176813526, "grad_norm": 0.474609375, "learning_rate": 1.4995272597207183e-05, "loss": 4.1034, "step": 1231 }, { "epoch": 0.41042725077038394, "grad_norm": 0.4765625, "learning_rate": 1.49952438185639e-05, "loss": 4.1916, "step": 1232 }, { "epoch": 0.41076038977263263, "grad_norm": 0.474609375, "learning_rate": 1.4995214952617263e-05, "loss": 4.1669, "step": 1233 }, { "epoch": 0.4110935287748813, "grad_norm": 0.48046875, "learning_rate": 1.4995185999367605e-05, "loss": 4.1474, "step": 1234 }, { "epoch": 0.41142666777713, "grad_norm": 0.482421875, "learning_rate": 1.4995156958815267e-05, "loss": 4.205, "step": 1235 }, { "epoch": 0.4117598067793787, "grad_norm": 0.484375, "learning_rate": 1.499512783096058e-05, "loss": 4.1668, "step": 1236 }, { "epoch": 0.41209294578162736, "grad_norm": 0.470703125, "learning_rate": 1.4995098615803894e-05, "loss": 4.2017, "step": 1237 }, { "epoch": 0.41242608478387605, "grad_norm": 0.49609375, "learning_rate": 1.4995069313345541e-05, "loss": 4.173, "step": 1238 }, { "epoch": 0.4127592237861248, "grad_norm": 0.50390625, "learning_rate": 1.4995039923585864e-05, "loss": 4.1185, "step": 1239 }, { "epoch": 0.41309236278837347, "grad_norm": 0.490234375, "learning_rate": 1.4995010446525206e-05, "loss": 4.1423, "step": 1240 }, { "epoch": 0.41342550179062215, "grad_norm": 0.494140625, "learning_rate": 1.499498088216391e-05, "loss": 4.1382, "step": 1241 }, { "epoch": 0.41375864079287084, "grad_norm": 0.458984375, "learning_rate": 1.4994951230502324e-05, "loss": 4.218, "step": 1242 }, { "epoch": 0.4140917797951195, "grad_norm": 0.46484375, "learning_rate": 1.4994921491540787e-05, "loss": 4.2507, "step": 1243 }, { "epoch": 0.4144249187973682, "grad_norm": 0.478515625, "learning_rate": 1.499489166527965e-05, "loss": 4.1552, "step": 1244 }, { "epoch": 0.4147580577996169, "grad_norm": 0.451171875, "learning_rate": 1.499486175171926e-05, "loss": 4.2678, "step": 1245 }, { "epoch": 0.41509119680186557, "grad_norm": 0.47265625, "learning_rate": 1.4994831750859963e-05, "loss": 4.1853, "step": 1246 }, { "epoch": 0.41542433580411425, "grad_norm": 0.494140625, "learning_rate": 1.4994801662702108e-05, "loss": 4.1073, "step": 1247 }, { "epoch": 0.41575747480636294, "grad_norm": 0.47265625, "learning_rate": 1.4994771487246052e-05, "loss": 4.2121, "step": 1248 }, { "epoch": 0.4160906138086116, "grad_norm": 0.4609375, "learning_rate": 1.4994741224492138e-05, "loss": 4.2903, "step": 1249 }, { "epoch": 0.41642375281086036, "grad_norm": 0.46484375, "learning_rate": 1.4994710874440724e-05, "loss": 4.2139, "step": 1250 }, { "epoch": 0.41675689181310904, "grad_norm": 0.484375, "learning_rate": 1.4994680437092161e-05, "loss": 4.1682, "step": 1251 }, { "epoch": 0.4170900308153577, "grad_norm": 0.515625, "learning_rate": 1.4994649912446804e-05, "loss": 4.1803, "step": 1252 }, { "epoch": 0.4174231698176064, "grad_norm": 0.466796875, "learning_rate": 1.499461930050501e-05, "loss": 4.1512, "step": 1253 }, { "epoch": 0.4177563088198551, "grad_norm": 0.5, "learning_rate": 1.4994588601267135e-05, "loss": 4.1606, "step": 1254 }, { "epoch": 0.4180894478221038, "grad_norm": 0.474609375, "learning_rate": 1.4994557814733536e-05, "loss": 4.1943, "step": 1255 }, { "epoch": 0.41842258682435246, "grad_norm": 0.482421875, "learning_rate": 1.499452694090457e-05, "loss": 4.1726, "step": 1256 }, { "epoch": 0.41875572582660114, "grad_norm": 0.4609375, "learning_rate": 1.4994495979780601e-05, "loss": 4.2104, "step": 1257 }, { "epoch": 0.4190888648288498, "grad_norm": 0.466796875, "learning_rate": 1.4994464931361984e-05, "loss": 4.2136, "step": 1258 }, { "epoch": 0.4194220038310985, "grad_norm": 0.46875, "learning_rate": 1.4994433795649083e-05, "loss": 4.2498, "step": 1259 }, { "epoch": 0.4197551428333472, "grad_norm": 0.47265625, "learning_rate": 1.4994402572642262e-05, "loss": 4.2441, "step": 1260 }, { "epoch": 0.4200882818355959, "grad_norm": 0.51953125, "learning_rate": 1.4994371262341886e-05, "loss": 4.2183, "step": 1261 }, { "epoch": 0.4204214208378446, "grad_norm": 0.494140625, "learning_rate": 1.4994339864748316e-05, "loss": 4.2301, "step": 1262 }, { "epoch": 0.4207545598400933, "grad_norm": 0.5078125, "learning_rate": 1.499430837986192e-05, "loss": 4.1628, "step": 1263 }, { "epoch": 0.421087698842342, "grad_norm": 0.466796875, "learning_rate": 1.4994276807683063e-05, "loss": 4.1896, "step": 1264 }, { "epoch": 0.42142083784459067, "grad_norm": 0.4921875, "learning_rate": 1.4994245148212115e-05, "loss": 4.23, "step": 1265 }, { "epoch": 0.42175397684683935, "grad_norm": 0.4765625, "learning_rate": 1.4994213401449443e-05, "loss": 4.2908, "step": 1266 }, { "epoch": 0.42208711584908803, "grad_norm": 0.5078125, "learning_rate": 1.4994181567395417e-05, "loss": 4.1781, "step": 1267 }, { "epoch": 0.4224202548513367, "grad_norm": 0.49609375, "learning_rate": 1.4994149646050408e-05, "loss": 4.2266, "step": 1268 }, { "epoch": 0.4227533938535854, "grad_norm": 0.51953125, "learning_rate": 1.4994117637414787e-05, "loss": 4.1432, "step": 1269 }, { "epoch": 0.4230865328558341, "grad_norm": 0.51171875, "learning_rate": 1.499408554148893e-05, "loss": 4.233, "step": 1270 }, { "epoch": 0.42341967185808277, "grad_norm": 0.482421875, "learning_rate": 1.4994053358273208e-05, "loss": 4.166, "step": 1271 }, { "epoch": 0.42375281086033145, "grad_norm": 0.474609375, "learning_rate": 1.4994021087767997e-05, "loss": 4.2207, "step": 1272 }, { "epoch": 0.42408594986258014, "grad_norm": 0.494140625, "learning_rate": 1.499398872997367e-05, "loss": 4.2362, "step": 1273 }, { "epoch": 0.4244190888648289, "grad_norm": 0.50390625, "learning_rate": 1.4993956284890606e-05, "loss": 4.2302, "step": 1274 }, { "epoch": 0.42475222786707756, "grad_norm": 0.515625, "learning_rate": 1.4993923752519185e-05, "loss": 4.1501, "step": 1275 }, { "epoch": 0.42508536686932624, "grad_norm": 0.498046875, "learning_rate": 1.4993891132859785e-05, "loss": 4.1888, "step": 1276 }, { "epoch": 0.4254185058715749, "grad_norm": 0.4765625, "learning_rate": 1.4993858425912785e-05, "loss": 4.278, "step": 1277 }, { "epoch": 0.4257516448738236, "grad_norm": 0.48828125, "learning_rate": 1.4993825631678563e-05, "loss": 4.184, "step": 1278 }, { "epoch": 0.4260847838760723, "grad_norm": 0.51171875, "learning_rate": 1.4993792750157506e-05, "loss": 4.2569, "step": 1279 }, { "epoch": 0.426417922878321, "grad_norm": 0.5, "learning_rate": 1.4993759781349996e-05, "loss": 4.2181, "step": 1280 }, { "epoch": 0.42675106188056966, "grad_norm": 0.4921875, "learning_rate": 1.4993726725256414e-05, "loss": 4.2144, "step": 1281 }, { "epoch": 0.42708420088281834, "grad_norm": 0.486328125, "learning_rate": 1.4993693581877148e-05, "loss": 4.1705, "step": 1282 }, { "epoch": 0.427417339885067, "grad_norm": 0.5078125, "learning_rate": 1.4993660351212583e-05, "loss": 4.1912, "step": 1283 }, { "epoch": 0.4277504788873157, "grad_norm": 0.46484375, "learning_rate": 1.4993627033263106e-05, "loss": 4.2455, "step": 1284 }, { "epoch": 0.4280836178895644, "grad_norm": 0.482421875, "learning_rate": 1.4993593628029105e-05, "loss": 4.2258, "step": 1285 }, { "epoch": 0.42841675689181313, "grad_norm": 0.46484375, "learning_rate": 1.4993560135510969e-05, "loss": 4.2136, "step": 1286 }, { "epoch": 0.4287498958940618, "grad_norm": 0.49609375, "learning_rate": 1.499352655570909e-05, "loss": 4.1422, "step": 1287 }, { "epoch": 0.4290830348963105, "grad_norm": 0.4765625, "learning_rate": 1.4993492888623857e-05, "loss": 4.1999, "step": 1288 }, { "epoch": 0.4294161738985592, "grad_norm": 0.48828125, "learning_rate": 1.4993459134255663e-05, "loss": 4.2196, "step": 1289 }, { "epoch": 0.42974931290080787, "grad_norm": 0.478515625, "learning_rate": 1.4993425292604899e-05, "loss": 4.1741, "step": 1290 }, { "epoch": 0.43008245190305655, "grad_norm": 0.46484375, "learning_rate": 1.4993391363671963e-05, "loss": 4.1885, "step": 1291 }, { "epoch": 0.43041559090530523, "grad_norm": 0.478515625, "learning_rate": 1.4993357347457249e-05, "loss": 4.1781, "step": 1292 }, { "epoch": 0.4307487299075539, "grad_norm": 0.5078125, "learning_rate": 1.499332324396115e-05, "loss": 4.2074, "step": 1293 }, { "epoch": 0.4310818689098026, "grad_norm": 0.46875, "learning_rate": 1.4993289053184066e-05, "loss": 4.1649, "step": 1294 }, { "epoch": 0.4314150079120513, "grad_norm": 0.51953125, "learning_rate": 1.4993254775126398e-05, "loss": 4.1315, "step": 1295 }, { "epoch": 0.43174814691429997, "grad_norm": 0.49609375, "learning_rate": 1.4993220409788538e-05, "loss": 4.1255, "step": 1296 }, { "epoch": 0.4320812859165487, "grad_norm": 0.484375, "learning_rate": 1.4993185957170893e-05, "loss": 4.1654, "step": 1297 }, { "epoch": 0.4324144249187974, "grad_norm": 0.482421875, "learning_rate": 1.4993151417273861e-05, "loss": 4.2235, "step": 1298 }, { "epoch": 0.4327475639210461, "grad_norm": 0.48046875, "learning_rate": 1.4993116790097846e-05, "loss": 4.1728, "step": 1299 }, { "epoch": 0.43308070292329476, "grad_norm": 0.49609375, "learning_rate": 1.499308207564325e-05, "loss": 4.2245, "step": 1300 }, { "epoch": 0.43341384192554344, "grad_norm": 0.4765625, "learning_rate": 1.4993047273910478e-05, "loss": 4.2282, "step": 1301 }, { "epoch": 0.4337469809277921, "grad_norm": 0.4765625, "learning_rate": 1.4993012384899935e-05, "loss": 4.1916, "step": 1302 }, { "epoch": 0.4340801199300408, "grad_norm": 0.50390625, "learning_rate": 1.4992977408612027e-05, "loss": 4.2057, "step": 1303 }, { "epoch": 0.4344132589322895, "grad_norm": 0.4921875, "learning_rate": 1.4992942345047162e-05, "loss": 4.1271, "step": 1304 }, { "epoch": 0.4347463979345382, "grad_norm": 0.498046875, "learning_rate": 1.499290719420575e-05, "loss": 4.1685, "step": 1305 }, { "epoch": 0.43507953693678686, "grad_norm": 0.458984375, "learning_rate": 1.4992871956088198e-05, "loss": 4.1543, "step": 1306 }, { "epoch": 0.43541267593903554, "grad_norm": 0.484375, "learning_rate": 1.4992836630694917e-05, "loss": 4.1744, "step": 1307 }, { "epoch": 0.4357458149412842, "grad_norm": 0.44921875, "learning_rate": 1.4992801218026319e-05, "loss": 4.2003, "step": 1308 }, { "epoch": 0.43607895394353297, "grad_norm": 0.4921875, "learning_rate": 1.4992765718082815e-05, "loss": 4.1791, "step": 1309 }, { "epoch": 0.43641209294578165, "grad_norm": 0.47265625, "learning_rate": 1.4992730130864818e-05, "loss": 4.1548, "step": 1310 }, { "epoch": 0.43674523194803033, "grad_norm": 0.46875, "learning_rate": 1.499269445637275e-05, "loss": 4.14, "step": 1311 }, { "epoch": 0.437078370950279, "grad_norm": 0.46875, "learning_rate": 1.4992658694607017e-05, "loss": 4.2122, "step": 1312 }, { "epoch": 0.4374115099525277, "grad_norm": 0.5078125, "learning_rate": 1.4992622845568039e-05, "loss": 4.1654, "step": 1313 }, { "epoch": 0.4377446489547764, "grad_norm": 0.48828125, "learning_rate": 1.4992586909256234e-05, "loss": 4.1999, "step": 1314 }, { "epoch": 0.43807778795702507, "grad_norm": 0.50390625, "learning_rate": 1.4992550885672021e-05, "loss": 4.1519, "step": 1315 }, { "epoch": 0.43841092695927375, "grad_norm": 0.48046875, "learning_rate": 1.4992514774815819e-05, "loss": 4.1015, "step": 1316 }, { "epoch": 0.43874406596152243, "grad_norm": 0.494140625, "learning_rate": 1.499247857668805e-05, "loss": 4.1914, "step": 1317 }, { "epoch": 0.4390772049637711, "grad_norm": 0.474609375, "learning_rate": 1.499244229128913e-05, "loss": 4.1704, "step": 1318 }, { "epoch": 0.4394103439660198, "grad_norm": 0.494140625, "learning_rate": 1.4992405918619492e-05, "loss": 4.1565, "step": 1319 }, { "epoch": 0.4397434829682685, "grad_norm": 0.494140625, "learning_rate": 1.4992369458679548e-05, "loss": 4.1844, "step": 1320 }, { "epoch": 0.4400766219705172, "grad_norm": 0.462890625, "learning_rate": 1.4992332911469733e-05, "loss": 4.2251, "step": 1321 }, { "epoch": 0.4404097609727659, "grad_norm": 0.482421875, "learning_rate": 1.4992296276990464e-05, "loss": 4.2182, "step": 1322 }, { "epoch": 0.4407428999750146, "grad_norm": 0.4765625, "learning_rate": 1.4992259555242174e-05, "loss": 4.1548, "step": 1323 }, { "epoch": 0.4410760389772633, "grad_norm": 0.462890625, "learning_rate": 1.4992222746225287e-05, "loss": 4.2032, "step": 1324 }, { "epoch": 0.44140917797951196, "grad_norm": 0.484375, "learning_rate": 1.4992185849940233e-05, "loss": 4.213, "step": 1325 }, { "epoch": 0.44174231698176064, "grad_norm": 0.50390625, "learning_rate": 1.4992148866387443e-05, "loss": 4.189, "step": 1326 }, { "epoch": 0.4420754559840093, "grad_norm": 0.48828125, "learning_rate": 1.4992111795567346e-05, "loss": 4.1542, "step": 1327 }, { "epoch": 0.442408594986258, "grad_norm": 0.48046875, "learning_rate": 1.4992074637480376e-05, "loss": 4.196, "step": 1328 }, { "epoch": 0.4427417339885067, "grad_norm": 0.478515625, "learning_rate": 1.4992037392126963e-05, "loss": 4.2515, "step": 1329 }, { "epoch": 0.4430748729907554, "grad_norm": 0.478515625, "learning_rate": 1.4992000059507542e-05, "loss": 4.284, "step": 1330 }, { "epoch": 0.44340801199300406, "grad_norm": 0.46875, "learning_rate": 1.4991962639622548e-05, "loss": 4.2723, "step": 1331 }, { "epoch": 0.44374115099525274, "grad_norm": 0.4921875, "learning_rate": 1.4991925132472417e-05, "loss": 4.2079, "step": 1332 }, { "epoch": 0.4440742899975015, "grad_norm": 0.486328125, "learning_rate": 1.4991887538057585e-05, "loss": 4.1866, "step": 1333 }, { "epoch": 0.44440742899975016, "grad_norm": 0.4765625, "learning_rate": 1.4991849856378492e-05, "loss": 4.2018, "step": 1334 }, { "epoch": 0.44474056800199885, "grad_norm": 0.453125, "learning_rate": 1.4991812087435575e-05, "loss": 4.212, "step": 1335 }, { "epoch": 0.44507370700424753, "grad_norm": 0.48046875, "learning_rate": 1.4991774231229273e-05, "loss": 4.2192, "step": 1336 }, { "epoch": 0.4454068460064962, "grad_norm": 0.4921875, "learning_rate": 1.4991736287760029e-05, "loss": 4.2224, "step": 1337 }, { "epoch": 0.4457399850087449, "grad_norm": 0.49609375, "learning_rate": 1.4991698257028286e-05, "loss": 4.1873, "step": 1338 }, { "epoch": 0.4460731240109936, "grad_norm": 0.478515625, "learning_rate": 1.4991660139034485e-05, "loss": 4.1606, "step": 1339 }, { "epoch": 0.44640626301324227, "grad_norm": 0.50390625, "learning_rate": 1.4991621933779068e-05, "loss": 4.1721, "step": 1340 }, { "epoch": 0.44673940201549095, "grad_norm": 0.47265625, "learning_rate": 1.4991583641262484e-05, "loss": 4.1706, "step": 1341 }, { "epoch": 0.44707254101773963, "grad_norm": 0.515625, "learning_rate": 1.4991545261485178e-05, "loss": 4.1979, "step": 1342 }, { "epoch": 0.4474056800199883, "grad_norm": 0.474609375, "learning_rate": 1.4991506794447594e-05, "loss": 4.2339, "step": 1343 }, { "epoch": 0.44773881902223706, "grad_norm": 0.50390625, "learning_rate": 1.4991468240150184e-05, "loss": 4.1682, "step": 1344 }, { "epoch": 0.44807195802448574, "grad_norm": 0.4921875, "learning_rate": 1.4991429598593396e-05, "loss": 4.1512, "step": 1345 }, { "epoch": 0.4484050970267344, "grad_norm": 0.515625, "learning_rate": 1.4991390869777679e-05, "loss": 4.1958, "step": 1346 }, { "epoch": 0.4487382360289831, "grad_norm": 0.48046875, "learning_rate": 1.4991352053703483e-05, "loss": 4.1905, "step": 1347 }, { "epoch": 0.4490713750312318, "grad_norm": 0.46484375, "learning_rate": 1.4991313150371266e-05, "loss": 4.2016, "step": 1348 }, { "epoch": 0.4494045140334805, "grad_norm": 0.482421875, "learning_rate": 1.4991274159781472e-05, "loss": 4.2606, "step": 1349 }, { "epoch": 0.44973765303572916, "grad_norm": 0.4765625, "learning_rate": 1.4991235081934563e-05, "loss": 4.2191, "step": 1350 }, { "epoch": 0.45007079203797784, "grad_norm": 0.53515625, "learning_rate": 1.4991195916830989e-05, "loss": 4.088, "step": 1351 }, { "epoch": 0.4504039310402265, "grad_norm": 0.50390625, "learning_rate": 1.4991156664471212e-05, "loss": 4.105, "step": 1352 }, { "epoch": 0.4507370700424752, "grad_norm": 0.498046875, "learning_rate": 1.4991117324855683e-05, "loss": 4.1698, "step": 1353 }, { "epoch": 0.4510702090447239, "grad_norm": 0.478515625, "learning_rate": 1.4991077897984864e-05, "loss": 4.1724, "step": 1354 }, { "epoch": 0.4514033480469726, "grad_norm": 0.5, "learning_rate": 1.4991038383859213e-05, "loss": 4.2211, "step": 1355 }, { "epoch": 0.4517364870492213, "grad_norm": 0.48828125, "learning_rate": 1.4990998782479191e-05, "loss": 4.1711, "step": 1356 }, { "epoch": 0.45206962605147, "grad_norm": 0.474609375, "learning_rate": 1.4990959093845257e-05, "loss": 4.236, "step": 1357 }, { "epoch": 0.4524027650537187, "grad_norm": 0.49609375, "learning_rate": 1.4990919317957877e-05, "loss": 4.123, "step": 1358 }, { "epoch": 0.45273590405596736, "grad_norm": 0.51171875, "learning_rate": 1.4990879454817512e-05, "loss": 4.1394, "step": 1359 }, { "epoch": 0.45306904305821605, "grad_norm": 0.474609375, "learning_rate": 1.4990839504424624e-05, "loss": 4.1933, "step": 1360 }, { "epoch": 0.45340218206046473, "grad_norm": 0.484375, "learning_rate": 1.4990799466779682e-05, "loss": 4.2202, "step": 1361 }, { "epoch": 0.4537353210627134, "grad_norm": 0.4609375, "learning_rate": 1.4990759341883153e-05, "loss": 4.1672, "step": 1362 }, { "epoch": 0.4540684600649621, "grad_norm": 0.474609375, "learning_rate": 1.4990719129735502e-05, "loss": 4.2002, "step": 1363 }, { "epoch": 0.4544015990672108, "grad_norm": 0.48046875, "learning_rate": 1.4990678830337196e-05, "loss": 4.2332, "step": 1364 }, { "epoch": 0.45473473806945947, "grad_norm": 0.478515625, "learning_rate": 1.499063844368871e-05, "loss": 4.1652, "step": 1365 }, { "epoch": 0.45506787707170815, "grad_norm": 0.490234375, "learning_rate": 1.4990597969790509e-05, "loss": 4.198, "step": 1366 }, { "epoch": 0.45540101607395683, "grad_norm": 0.5, "learning_rate": 1.4990557408643064e-05, "loss": 4.1452, "step": 1367 }, { "epoch": 0.45573415507620557, "grad_norm": 0.49609375, "learning_rate": 1.4990516760246851e-05, "loss": 4.2025, "step": 1368 }, { "epoch": 0.45606729407845426, "grad_norm": 0.48046875, "learning_rate": 1.4990476024602344e-05, "loss": 4.1876, "step": 1369 }, { "epoch": 0.45640043308070294, "grad_norm": 0.484375, "learning_rate": 1.4990435201710014e-05, "loss": 4.1323, "step": 1370 }, { "epoch": 0.4567335720829516, "grad_norm": 0.482421875, "learning_rate": 1.499039429157034e-05, "loss": 4.18, "step": 1371 }, { "epoch": 0.4570667110852003, "grad_norm": 0.482421875, "learning_rate": 1.4990353294183797e-05, "loss": 4.1347, "step": 1372 }, { "epoch": 0.457399850087449, "grad_norm": 0.47265625, "learning_rate": 1.499031220955086e-05, "loss": 4.1747, "step": 1373 }, { "epoch": 0.4577329890896977, "grad_norm": 0.44921875, "learning_rate": 1.4990271037672009e-05, "loss": 4.2342, "step": 1374 }, { "epoch": 0.45806612809194636, "grad_norm": 0.5, "learning_rate": 1.4990229778547727e-05, "loss": 4.116, "step": 1375 }, { "epoch": 0.45839926709419504, "grad_norm": 0.494140625, "learning_rate": 1.4990188432178488e-05, "loss": 4.2179, "step": 1376 }, { "epoch": 0.4587324060964437, "grad_norm": 0.50390625, "learning_rate": 1.4990146998564782e-05, "loss": 4.0881, "step": 1377 }, { "epoch": 0.4590655450986924, "grad_norm": 0.474609375, "learning_rate": 1.4990105477707083e-05, "loss": 4.2145, "step": 1378 }, { "epoch": 0.4593986841009411, "grad_norm": 0.44921875, "learning_rate": 1.4990063869605882e-05, "loss": 4.2181, "step": 1379 }, { "epoch": 0.45973182310318983, "grad_norm": 0.46875, "learning_rate": 1.499002217426166e-05, "loss": 4.1655, "step": 1380 }, { "epoch": 0.4600649621054385, "grad_norm": 0.46875, "learning_rate": 1.4989980391674902e-05, "loss": 4.159, "step": 1381 }, { "epoch": 0.4603981011076872, "grad_norm": 0.470703125, "learning_rate": 1.4989938521846097e-05, "loss": 4.1614, "step": 1382 }, { "epoch": 0.4607312401099359, "grad_norm": 0.46875, "learning_rate": 1.4989896564775729e-05, "loss": 4.1576, "step": 1383 }, { "epoch": 0.46106437911218456, "grad_norm": 0.4921875, "learning_rate": 1.498985452046429e-05, "loss": 4.1716, "step": 1384 }, { "epoch": 0.46139751811443325, "grad_norm": 0.478515625, "learning_rate": 1.498981238891227e-05, "loss": 4.2118, "step": 1385 }, { "epoch": 0.46173065711668193, "grad_norm": 0.5, "learning_rate": 1.4989770170120157e-05, "loss": 4.2103, "step": 1386 }, { "epoch": 0.4620637961189306, "grad_norm": 0.5234375, "learning_rate": 1.4989727864088446e-05, "loss": 4.1554, "step": 1387 }, { "epoch": 0.4623969351211793, "grad_norm": 0.44921875, "learning_rate": 1.4989685470817628e-05, "loss": 4.2206, "step": 1388 }, { "epoch": 0.462730074123428, "grad_norm": 0.5078125, "learning_rate": 1.4989642990308195e-05, "loss": 4.1379, "step": 1389 }, { "epoch": 0.46306321312567666, "grad_norm": 0.5078125, "learning_rate": 1.4989600422560646e-05, "loss": 4.1553, "step": 1390 }, { "epoch": 0.4633963521279254, "grad_norm": 0.5, "learning_rate": 1.4989557767575474e-05, "loss": 4.2336, "step": 1391 }, { "epoch": 0.4637294911301741, "grad_norm": 0.5, "learning_rate": 1.4989515025353175e-05, "loss": 4.1741, "step": 1392 }, { "epoch": 0.46406263013242277, "grad_norm": 0.494140625, "learning_rate": 1.498947219589425e-05, "loss": 4.3039, "step": 1393 }, { "epoch": 0.46439576913467145, "grad_norm": 0.494140625, "learning_rate": 1.4989429279199195e-05, "loss": 4.1976, "step": 1394 }, { "epoch": 0.46472890813692014, "grad_norm": 0.4921875, "learning_rate": 1.4989386275268509e-05, "loss": 4.1466, "step": 1395 }, { "epoch": 0.4650620471391688, "grad_norm": 0.494140625, "learning_rate": 1.4989343184102697e-05, "loss": 4.1342, "step": 1396 }, { "epoch": 0.4653951861414175, "grad_norm": 0.453125, "learning_rate": 1.4989300005702259e-05, "loss": 4.2243, "step": 1397 }, { "epoch": 0.4657283251436662, "grad_norm": 0.51171875, "learning_rate": 1.4989256740067699e-05, "loss": 4.1887, "step": 1398 }, { "epoch": 0.46606146414591487, "grad_norm": 0.48828125, "learning_rate": 1.4989213387199519e-05, "loss": 4.2252, "step": 1399 }, { "epoch": 0.46639460314816356, "grad_norm": 0.486328125, "learning_rate": 1.4989169947098222e-05, "loss": 4.2015, "step": 1400 }, { "epoch": 0.46672774215041224, "grad_norm": 0.478515625, "learning_rate": 1.4989126419764319e-05, "loss": 4.2728, "step": 1401 }, { "epoch": 0.4670608811526609, "grad_norm": 0.484375, "learning_rate": 1.4989082805198313e-05, "loss": 4.2296, "step": 1402 }, { "epoch": 0.46739402015490966, "grad_norm": 0.5234375, "learning_rate": 1.4989039103400715e-05, "loss": 4.2124, "step": 1403 }, { "epoch": 0.46772715915715835, "grad_norm": 0.490234375, "learning_rate": 1.4988995314372033e-05, "loss": 4.1783, "step": 1404 }, { "epoch": 0.46806029815940703, "grad_norm": 0.482421875, "learning_rate": 1.4988951438112777e-05, "loss": 4.2243, "step": 1405 }, { "epoch": 0.4683934371616557, "grad_norm": 0.484375, "learning_rate": 1.4988907474623456e-05, "loss": 4.1699, "step": 1406 }, { "epoch": 0.4687265761639044, "grad_norm": 0.478515625, "learning_rate": 1.4988863423904586e-05, "loss": 4.1211, "step": 1407 }, { "epoch": 0.4690597151661531, "grad_norm": 0.478515625, "learning_rate": 1.4988819285956675e-05, "loss": 4.2092, "step": 1408 }, { "epoch": 0.46939285416840176, "grad_norm": 0.4921875, "learning_rate": 1.4988775060780243e-05, "loss": 4.1308, "step": 1409 }, { "epoch": 0.46972599317065045, "grad_norm": 0.490234375, "learning_rate": 1.4988730748375803e-05, "loss": 4.1475, "step": 1410 }, { "epoch": 0.47005913217289913, "grad_norm": 0.490234375, "learning_rate": 1.4988686348743865e-05, "loss": 4.1793, "step": 1411 }, { "epoch": 0.4703922711751478, "grad_norm": 0.48828125, "learning_rate": 1.4988641861884956e-05, "loss": 4.1875, "step": 1412 }, { "epoch": 0.4707254101773965, "grad_norm": 0.5, "learning_rate": 1.4988597287799589e-05, "loss": 4.2461, "step": 1413 }, { "epoch": 0.4710585491796452, "grad_norm": 0.458984375, "learning_rate": 1.4988552626488283e-05, "loss": 4.2182, "step": 1414 }, { "epoch": 0.4713916881818939, "grad_norm": 0.4765625, "learning_rate": 1.4988507877951559e-05, "loss": 4.1659, "step": 1415 }, { "epoch": 0.4717248271841426, "grad_norm": 0.474609375, "learning_rate": 1.498846304218994e-05, "loss": 4.1939, "step": 1416 }, { "epoch": 0.4720579661863913, "grad_norm": 0.474609375, "learning_rate": 1.4988418119203944e-05, "loss": 4.1848, "step": 1417 }, { "epoch": 0.47239110518863997, "grad_norm": 0.5, "learning_rate": 1.4988373108994099e-05, "loss": 4.1365, "step": 1418 }, { "epoch": 0.47272424419088865, "grad_norm": 0.49609375, "learning_rate": 1.4988328011560926e-05, "loss": 4.1385, "step": 1419 }, { "epoch": 0.47305738319313734, "grad_norm": 0.53515625, "learning_rate": 1.4988282826904952e-05, "loss": 4.1767, "step": 1420 }, { "epoch": 0.473390522195386, "grad_norm": 0.486328125, "learning_rate": 1.4988237555026702e-05, "loss": 4.1744, "step": 1421 }, { "epoch": 0.4737236611976347, "grad_norm": 0.482421875, "learning_rate": 1.4988192195926705e-05, "loss": 4.1847, "step": 1422 }, { "epoch": 0.4740568001998834, "grad_norm": 0.48828125, "learning_rate": 1.4988146749605485e-05, "loss": 4.201, "step": 1423 }, { "epoch": 0.47438993920213207, "grad_norm": 0.5078125, "learning_rate": 1.4988101216063579e-05, "loss": 4.0896, "step": 1424 }, { "epoch": 0.47472307820438076, "grad_norm": 0.486328125, "learning_rate": 1.4988055595301511e-05, "loss": 4.1321, "step": 1425 }, { "epoch": 0.47505621720662944, "grad_norm": 0.4765625, "learning_rate": 1.4988009887319815e-05, "loss": 4.1805, "step": 1426 }, { "epoch": 0.4753893562088782, "grad_norm": 0.51171875, "learning_rate": 1.4987964092119023e-05, "loss": 4.1325, "step": 1427 }, { "epoch": 0.47572249521112686, "grad_norm": 0.466796875, "learning_rate": 1.4987918209699666e-05, "loss": 4.1655, "step": 1428 }, { "epoch": 0.47605563421337554, "grad_norm": 0.4765625, "learning_rate": 1.4987872240062283e-05, "loss": 4.1598, "step": 1429 }, { "epoch": 0.47638877321562423, "grad_norm": 0.48046875, "learning_rate": 1.4987826183207406e-05, "loss": 4.1626, "step": 1430 }, { "epoch": 0.4767219122178729, "grad_norm": 0.482421875, "learning_rate": 1.4987780039135574e-05, "loss": 4.1859, "step": 1431 }, { "epoch": 0.4770550512201216, "grad_norm": 0.4921875, "learning_rate": 1.4987733807847322e-05, "loss": 4.2235, "step": 1432 }, { "epoch": 0.4773881902223703, "grad_norm": 0.48828125, "learning_rate": 1.4987687489343188e-05, "loss": 4.17, "step": 1433 }, { "epoch": 0.47772132922461896, "grad_norm": 0.46875, "learning_rate": 1.4987641083623715e-05, "loss": 4.2119, "step": 1434 }, { "epoch": 0.47805446822686765, "grad_norm": 0.4609375, "learning_rate": 1.4987594590689443e-05, "loss": 4.2258, "step": 1435 }, { "epoch": 0.47838760722911633, "grad_norm": 0.4765625, "learning_rate": 1.498754801054091e-05, "loss": 4.1816, "step": 1436 }, { "epoch": 0.478720746231365, "grad_norm": 0.5234375, "learning_rate": 1.498750134317866e-05, "loss": 4.1745, "step": 1437 }, { "epoch": 0.47905388523361375, "grad_norm": 0.498046875, "learning_rate": 1.4987454588603238e-05, "loss": 4.1544, "step": 1438 }, { "epoch": 0.47938702423586244, "grad_norm": 0.48828125, "learning_rate": 1.4987407746815191e-05, "loss": 4.1585, "step": 1439 }, { "epoch": 0.4797201632381111, "grad_norm": 0.470703125, "learning_rate": 1.4987360817815058e-05, "loss": 4.1732, "step": 1440 }, { "epoch": 0.4800533022403598, "grad_norm": 0.51171875, "learning_rate": 1.4987313801603389e-05, "loss": 4.268, "step": 1441 }, { "epoch": 0.4803864412426085, "grad_norm": 0.494140625, "learning_rate": 1.4987266698180732e-05, "loss": 4.2112, "step": 1442 }, { "epoch": 0.48071958024485717, "grad_norm": 0.515625, "learning_rate": 1.4987219507547637e-05, "loss": 4.1376, "step": 1443 }, { "epoch": 0.48105271924710585, "grad_norm": 0.5, "learning_rate": 1.498717222970465e-05, "loss": 4.1988, "step": 1444 }, { "epoch": 0.48138585824935454, "grad_norm": 0.478515625, "learning_rate": 1.4987124864652323e-05, "loss": 4.1837, "step": 1445 }, { "epoch": 0.4817189972516032, "grad_norm": 0.4765625, "learning_rate": 1.498707741239121e-05, "loss": 4.2076, "step": 1446 }, { "epoch": 0.4820521362538519, "grad_norm": 0.486328125, "learning_rate": 1.4987029872921864e-05, "loss": 4.1929, "step": 1447 }, { "epoch": 0.4823852752561006, "grad_norm": 0.46875, "learning_rate": 1.4986982246244834e-05, "loss": 4.1426, "step": 1448 }, { "epoch": 0.48271841425834927, "grad_norm": 0.474609375, "learning_rate": 1.4986934532360678e-05, "loss": 4.2304, "step": 1449 }, { "epoch": 0.483051553260598, "grad_norm": 0.47265625, "learning_rate": 1.4986886731269953e-05, "loss": 4.1358, "step": 1450 }, { "epoch": 0.4833846922628467, "grad_norm": 0.4765625, "learning_rate": 1.4986838842973212e-05, "loss": 4.1517, "step": 1451 }, { "epoch": 0.4837178312650954, "grad_norm": 0.47265625, "learning_rate": 1.4986790867471017e-05, "loss": 4.1856, "step": 1452 }, { "epoch": 0.48405097026734406, "grad_norm": 0.455078125, "learning_rate": 1.4986742804763923e-05, "loss": 4.2042, "step": 1453 }, { "epoch": 0.48438410926959274, "grad_norm": 0.51953125, "learning_rate": 1.4986694654852492e-05, "loss": 4.1505, "step": 1454 }, { "epoch": 0.4847172482718414, "grad_norm": 0.4609375, "learning_rate": 1.4986646417737285e-05, "loss": 4.1589, "step": 1455 }, { "epoch": 0.4850503872740901, "grad_norm": 0.478515625, "learning_rate": 1.4986598093418863e-05, "loss": 4.2037, "step": 1456 }, { "epoch": 0.4853835262763388, "grad_norm": 0.474609375, "learning_rate": 1.498654968189779e-05, "loss": 4.2122, "step": 1457 }, { "epoch": 0.4857166652785875, "grad_norm": 0.52734375, "learning_rate": 1.4986501183174626e-05, "loss": 4.1396, "step": 1458 }, { "epoch": 0.48604980428083616, "grad_norm": 0.482421875, "learning_rate": 1.4986452597249943e-05, "loss": 4.1428, "step": 1459 }, { "epoch": 0.48638294328308485, "grad_norm": 0.50390625, "learning_rate": 1.49864039241243e-05, "loss": 4.1606, "step": 1460 }, { "epoch": 0.48671608228533353, "grad_norm": 0.51171875, "learning_rate": 1.4986355163798269e-05, "loss": 4.1961, "step": 1461 }, { "epoch": 0.48704922128758227, "grad_norm": 0.478515625, "learning_rate": 1.4986306316272415e-05, "loss": 4.2113, "step": 1462 }, { "epoch": 0.48738236028983095, "grad_norm": 0.49609375, "learning_rate": 1.4986257381547307e-05, "loss": 4.1709, "step": 1463 }, { "epoch": 0.48771549929207964, "grad_norm": 0.50390625, "learning_rate": 1.4986208359623515e-05, "loss": 4.1243, "step": 1464 }, { "epoch": 0.4880486382943283, "grad_norm": 0.478515625, "learning_rate": 1.498615925050161e-05, "loss": 4.1277, "step": 1465 }, { "epoch": 0.488381777296577, "grad_norm": 0.5, "learning_rate": 1.498611005418217e-05, "loss": 4.217, "step": 1466 }, { "epoch": 0.4887149162988257, "grad_norm": 0.5, "learning_rate": 1.4986060770665759e-05, "loss": 4.193, "step": 1467 }, { "epoch": 0.48904805530107437, "grad_norm": 0.474609375, "learning_rate": 1.4986011399952954e-05, "loss": 4.2231, "step": 1468 }, { "epoch": 0.48938119430332305, "grad_norm": 0.484375, "learning_rate": 1.4985961942044332e-05, "loss": 4.1814, "step": 1469 }, { "epoch": 0.48971433330557174, "grad_norm": 0.44921875, "learning_rate": 1.4985912396940467e-05, "loss": 4.2213, "step": 1470 }, { "epoch": 0.4900474723078204, "grad_norm": 0.5078125, "learning_rate": 1.4985862764641938e-05, "loss": 4.1924, "step": 1471 }, { "epoch": 0.4903806113100691, "grad_norm": 0.498046875, "learning_rate": 1.4985813045149321e-05, "loss": 4.1935, "step": 1472 }, { "epoch": 0.49071375031231784, "grad_norm": 0.48828125, "learning_rate": 1.4985763238463199e-05, "loss": 4.1756, "step": 1473 }, { "epoch": 0.4910468893145665, "grad_norm": 0.4765625, "learning_rate": 1.4985713344584148e-05, "loss": 4.121, "step": 1474 }, { "epoch": 0.4913800283168152, "grad_norm": 0.5078125, "learning_rate": 1.498566336351275e-05, "loss": 4.168, "step": 1475 }, { "epoch": 0.4917131673190639, "grad_norm": 0.46484375, "learning_rate": 1.4985613295249587e-05, "loss": 4.1453, "step": 1476 }, { "epoch": 0.4920463063213126, "grad_norm": 0.482421875, "learning_rate": 1.4985563139795244e-05, "loss": 4.1728, "step": 1477 }, { "epoch": 0.49237944532356126, "grad_norm": 0.515625, "learning_rate": 1.4985512897150305e-05, "loss": 4.188, "step": 1478 }, { "epoch": 0.49271258432580994, "grad_norm": 0.484375, "learning_rate": 1.4985462567315353e-05, "loss": 4.1604, "step": 1479 }, { "epoch": 0.4930457233280586, "grad_norm": 0.4921875, "learning_rate": 1.4985412150290975e-05, "loss": 4.2298, "step": 1480 }, { "epoch": 0.4933788623303073, "grad_norm": 0.482421875, "learning_rate": 1.4985361646077759e-05, "loss": 4.0945, "step": 1481 }, { "epoch": 0.493712001332556, "grad_norm": 0.490234375, "learning_rate": 1.4985311054676296e-05, "loss": 4.1705, "step": 1482 }, { "epoch": 0.4940451403348047, "grad_norm": 0.470703125, "learning_rate": 1.498526037608717e-05, "loss": 4.1927, "step": 1483 }, { "epoch": 0.49437827933705336, "grad_norm": 0.52734375, "learning_rate": 1.4985209610310974e-05, "loss": 4.1933, "step": 1484 }, { "epoch": 0.4947114183393021, "grad_norm": 0.50390625, "learning_rate": 1.49851587573483e-05, "loss": 4.1703, "step": 1485 }, { "epoch": 0.4950445573415508, "grad_norm": 0.474609375, "learning_rate": 1.4985107817199736e-05, "loss": 4.1732, "step": 1486 }, { "epoch": 0.49537769634379947, "grad_norm": 0.51953125, "learning_rate": 1.4985056789865882e-05, "loss": 4.1376, "step": 1487 }, { "epoch": 0.49571083534604815, "grad_norm": 0.50390625, "learning_rate": 1.4985005675347328e-05, "loss": 4.1962, "step": 1488 }, { "epoch": 0.49604397434829683, "grad_norm": 0.5, "learning_rate": 1.498495447364467e-05, "loss": 4.1833, "step": 1489 }, { "epoch": 0.4963771133505455, "grad_norm": 0.51171875, "learning_rate": 1.4984903184758505e-05, "loss": 4.1554, "step": 1490 }, { "epoch": 0.4967102523527942, "grad_norm": 0.52734375, "learning_rate": 1.498485180868943e-05, "loss": 4.1059, "step": 1491 }, { "epoch": 0.4970433913550429, "grad_norm": 0.466796875, "learning_rate": 1.4984800345438043e-05, "loss": 4.0961, "step": 1492 }, { "epoch": 0.49737653035729157, "grad_norm": 0.482421875, "learning_rate": 1.4984748795004945e-05, "loss": 4.1822, "step": 1493 }, { "epoch": 0.49770966935954025, "grad_norm": 0.46484375, "learning_rate": 1.4984697157390735e-05, "loss": 4.1593, "step": 1494 }, { "epoch": 0.49804280836178894, "grad_norm": 0.478515625, "learning_rate": 1.4984645432596015e-05, "loss": 4.1932, "step": 1495 }, { "epoch": 0.4983759473640376, "grad_norm": 0.51171875, "learning_rate": 1.4984593620621387e-05, "loss": 4.1476, "step": 1496 }, { "epoch": 0.49870908636628636, "grad_norm": 0.490234375, "learning_rate": 1.4984541721467456e-05, "loss": 4.2524, "step": 1497 }, { "epoch": 0.49904222536853504, "grad_norm": 0.484375, "learning_rate": 1.4984489735134826e-05, "loss": 4.2272, "step": 1498 }, { "epoch": 0.4993753643707837, "grad_norm": 0.466796875, "learning_rate": 1.49844376616241e-05, "loss": 4.1511, "step": 1499 }, { "epoch": 0.4997085033730324, "grad_norm": 0.466796875, "learning_rate": 1.4984385500935889e-05, "loss": 4.2039, "step": 1500 }, { "epoch": 0.500041642375281, "grad_norm": 0.498046875, "learning_rate": 1.4984333253070797e-05, "loss": 4.0985, "step": 1501 }, { "epoch": 0.5003747813775298, "grad_norm": 0.46484375, "learning_rate": 1.4984280918029431e-05, "loss": 4.1658, "step": 1502 }, { "epoch": 0.5007079203797785, "grad_norm": 0.453125, "learning_rate": 1.4984228495812407e-05, "loss": 4.244, "step": 1503 }, { "epoch": 0.5010410593820271, "grad_norm": 0.48828125, "learning_rate": 1.498417598642033e-05, "loss": 4.2055, "step": 1504 }, { "epoch": 0.5013741983842759, "grad_norm": 0.51171875, "learning_rate": 1.4984123389853813e-05, "loss": 4.2133, "step": 1505 }, { "epoch": 0.5017073373865245, "grad_norm": 0.4765625, "learning_rate": 1.4984070706113469e-05, "loss": 4.1713, "step": 1506 }, { "epoch": 0.5020404763887732, "grad_norm": 0.486328125, "learning_rate": 1.4984017935199915e-05, "loss": 4.1268, "step": 1507 }, { "epoch": 0.5023736153910219, "grad_norm": 0.478515625, "learning_rate": 1.498396507711376e-05, "loss": 4.1768, "step": 1508 }, { "epoch": 0.5027067543932706, "grad_norm": 0.48828125, "learning_rate": 1.498391213185562e-05, "loss": 4.2286, "step": 1509 }, { "epoch": 0.5030398933955192, "grad_norm": 0.482421875, "learning_rate": 1.4983859099426117e-05, "loss": 4.1882, "step": 1510 }, { "epoch": 0.503373032397768, "grad_norm": 0.46875, "learning_rate": 1.4983805979825864e-05, "loss": 4.1483, "step": 1511 }, { "epoch": 0.5037061714000166, "grad_norm": 0.478515625, "learning_rate": 1.498375277305548e-05, "loss": 4.201, "step": 1512 }, { "epoch": 0.5040393104022654, "grad_norm": 0.447265625, "learning_rate": 1.4983699479115588e-05, "loss": 4.1608, "step": 1513 }, { "epoch": 0.504372449404514, "grad_norm": 0.4765625, "learning_rate": 1.4983646098006809e-05, "loss": 4.1984, "step": 1514 }, { "epoch": 0.5047055884067627, "grad_norm": 0.486328125, "learning_rate": 1.4983592629729758e-05, "loss": 4.1975, "step": 1515 }, { "epoch": 0.5050387274090115, "grad_norm": 0.47265625, "learning_rate": 1.4983539074285064e-05, "loss": 4.2094, "step": 1516 }, { "epoch": 0.5053718664112601, "grad_norm": 0.478515625, "learning_rate": 1.4983485431673349e-05, "loss": 4.2316, "step": 1517 }, { "epoch": 0.5057050054135088, "grad_norm": 0.45703125, "learning_rate": 1.4983431701895237e-05, "loss": 4.1291, "step": 1518 }, { "epoch": 0.5060381444157575, "grad_norm": 0.474609375, "learning_rate": 1.4983377884951356e-05, "loss": 4.2047, "step": 1519 }, { "epoch": 0.5063712834180062, "grad_norm": 0.4765625, "learning_rate": 1.4983323980842332e-05, "loss": 4.1619, "step": 1520 }, { "epoch": 0.5067044224202548, "grad_norm": 0.48828125, "learning_rate": 1.4983269989568792e-05, "loss": 4.152, "step": 1521 }, { "epoch": 0.5070375614225036, "grad_norm": 0.484375, "learning_rate": 1.4983215911131366e-05, "loss": 4.1567, "step": 1522 }, { "epoch": 0.5073707004247522, "grad_norm": 0.46875, "learning_rate": 1.4983161745530681e-05, "loss": 4.2462, "step": 1523 }, { "epoch": 0.5077038394270009, "grad_norm": 0.50390625, "learning_rate": 1.4983107492767373e-05, "loss": 4.0974, "step": 1524 }, { "epoch": 0.5080369784292496, "grad_norm": 0.4765625, "learning_rate": 1.498305315284207e-05, "loss": 4.0914, "step": 1525 }, { "epoch": 0.5083701174314983, "grad_norm": 0.484375, "learning_rate": 1.4982998725755405e-05, "loss": 4.1892, "step": 1526 }, { "epoch": 0.508703256433747, "grad_norm": 0.482421875, "learning_rate": 1.4982944211508014e-05, "loss": 4.1457, "step": 1527 }, { "epoch": 0.5090363954359957, "grad_norm": 0.470703125, "learning_rate": 1.498288961010053e-05, "loss": 4.2175, "step": 1528 }, { "epoch": 0.5093695344382444, "grad_norm": 0.490234375, "learning_rate": 1.4982834921533593e-05, "loss": 4.2262, "step": 1529 }, { "epoch": 0.509702673440493, "grad_norm": 0.484375, "learning_rate": 1.4982780145807833e-05, "loss": 4.1854, "step": 1530 }, { "epoch": 0.5100358124427418, "grad_norm": 0.486328125, "learning_rate": 1.4982725282923895e-05, "loss": 4.1546, "step": 1531 }, { "epoch": 0.5103689514449904, "grad_norm": 0.482421875, "learning_rate": 1.4982670332882413e-05, "loss": 4.232, "step": 1532 }, { "epoch": 0.5107020904472391, "grad_norm": 0.4765625, "learning_rate": 1.4982615295684029e-05, "loss": 4.2562, "step": 1533 }, { "epoch": 0.5110352294494878, "grad_norm": 0.466796875, "learning_rate": 1.4982560171329385e-05, "loss": 4.1693, "step": 1534 }, { "epoch": 0.5113683684517365, "grad_norm": 0.484375, "learning_rate": 1.4982504959819123e-05, "loss": 4.155, "step": 1535 }, { "epoch": 0.5117015074539851, "grad_norm": 0.484375, "learning_rate": 1.4982449661153884e-05, "loss": 4.1542, "step": 1536 }, { "epoch": 0.5120346464562339, "grad_norm": 0.478515625, "learning_rate": 1.4982394275334313e-05, "loss": 4.1214, "step": 1537 }, { "epoch": 0.5123677854584826, "grad_norm": 0.46484375, "learning_rate": 1.4982338802361058e-05, "loss": 4.1198, "step": 1538 }, { "epoch": 0.5127009244607312, "grad_norm": 0.484375, "learning_rate": 1.4982283242234759e-05, "loss": 4.0995, "step": 1539 }, { "epoch": 0.51303406346298, "grad_norm": 0.494140625, "learning_rate": 1.498222759495607e-05, "loss": 4.1492, "step": 1540 }, { "epoch": 0.5133672024652286, "grad_norm": 0.55078125, "learning_rate": 1.4982171860525634e-05, "loss": 4.2543, "step": 1541 }, { "epoch": 0.5137003414674773, "grad_norm": 0.478515625, "learning_rate": 1.4982116038944102e-05, "loss": 4.2121, "step": 1542 }, { "epoch": 0.514033480469726, "grad_norm": 0.47265625, "learning_rate": 1.4982060130212128e-05, "loss": 4.1872, "step": 1543 }, { "epoch": 0.5143666194719747, "grad_norm": 0.466796875, "learning_rate": 1.4982004134330356e-05, "loss": 4.1785, "step": 1544 }, { "epoch": 0.5146997584742233, "grad_norm": 0.484375, "learning_rate": 1.4981948051299444e-05, "loss": 4.1115, "step": 1545 }, { "epoch": 0.5150328974764721, "grad_norm": 0.482421875, "learning_rate": 1.498189188112004e-05, "loss": 4.1632, "step": 1546 }, { "epoch": 0.5153660364787207, "grad_norm": 0.490234375, "learning_rate": 1.4981835623792803e-05, "loss": 4.2198, "step": 1547 }, { "epoch": 0.5156991754809694, "grad_norm": 0.50390625, "learning_rate": 1.4981779279318388e-05, "loss": 4.1077, "step": 1548 }, { "epoch": 0.5160323144832181, "grad_norm": 0.474609375, "learning_rate": 1.4981722847697447e-05, "loss": 4.1356, "step": 1549 }, { "epoch": 0.5163654534854668, "grad_norm": 0.486328125, "learning_rate": 1.4981666328930643e-05, "loss": 4.1589, "step": 1550 }, { "epoch": 0.5166985924877155, "grad_norm": 0.48046875, "learning_rate": 1.498160972301863e-05, "loss": 4.195, "step": 1551 }, { "epoch": 0.5170317314899642, "grad_norm": 0.462890625, "learning_rate": 1.4981553029962071e-05, "loss": 4.1847, "step": 1552 }, { "epoch": 0.5173648704922129, "grad_norm": 0.48828125, "learning_rate": 1.4981496249761623e-05, "loss": 4.2033, "step": 1553 }, { "epoch": 0.5176980094944615, "grad_norm": 0.53515625, "learning_rate": 1.4981439382417947e-05, "loss": 4.1754, "step": 1554 }, { "epoch": 0.5180311484967103, "grad_norm": 0.482421875, "learning_rate": 1.4981382427931708e-05, "loss": 4.1419, "step": 1555 }, { "epoch": 0.5183642874989589, "grad_norm": 0.5, "learning_rate": 1.4981325386303567e-05, "loss": 4.2031, "step": 1556 }, { "epoch": 0.5186974265012076, "grad_norm": 0.482421875, "learning_rate": 1.4981268257534192e-05, "loss": 4.2526, "step": 1557 }, { "epoch": 0.5190305655034563, "grad_norm": 0.4765625, "learning_rate": 1.4981211041624246e-05, "loss": 4.1763, "step": 1558 }, { "epoch": 0.519363704505705, "grad_norm": 0.478515625, "learning_rate": 1.4981153738574395e-05, "loss": 4.0979, "step": 1559 }, { "epoch": 0.5196968435079536, "grad_norm": 0.462890625, "learning_rate": 1.4981096348385305e-05, "loss": 4.1362, "step": 1560 }, { "epoch": 0.5200299825102024, "grad_norm": 0.51953125, "learning_rate": 1.4981038871057651e-05, "loss": 4.1406, "step": 1561 }, { "epoch": 0.5203631215124511, "grad_norm": 0.486328125, "learning_rate": 1.4980981306592095e-05, "loss": 4.165, "step": 1562 }, { "epoch": 0.5206962605146997, "grad_norm": 0.455078125, "learning_rate": 1.4980923654989313e-05, "loss": 4.171, "step": 1563 }, { "epoch": 0.5210293995169485, "grad_norm": 0.486328125, "learning_rate": 1.4980865916249971e-05, "loss": 4.1512, "step": 1564 }, { "epoch": 0.5213625385191971, "grad_norm": 0.5234375, "learning_rate": 1.4980808090374747e-05, "loss": 4.127, "step": 1565 }, { "epoch": 0.5216956775214459, "grad_norm": 0.484375, "learning_rate": 1.498075017736431e-05, "loss": 4.222, "step": 1566 }, { "epoch": 0.5220288165236945, "grad_norm": 0.498046875, "learning_rate": 1.4980692177219337e-05, "loss": 4.0942, "step": 1567 }, { "epoch": 0.5223619555259432, "grad_norm": 0.47265625, "learning_rate": 1.4980634089940506e-05, "loss": 4.1363, "step": 1568 }, { "epoch": 0.5226950945281919, "grad_norm": 0.470703125, "learning_rate": 1.4980575915528489e-05, "loss": 4.1984, "step": 1569 }, { "epoch": 0.5230282335304406, "grad_norm": 0.46875, "learning_rate": 1.4980517653983966e-05, "loss": 4.2324, "step": 1570 }, { "epoch": 0.5233613725326892, "grad_norm": 0.4921875, "learning_rate": 1.4980459305307613e-05, "loss": 4.1273, "step": 1571 }, { "epoch": 0.523694511534938, "grad_norm": 0.515625, "learning_rate": 1.4980400869500113e-05, "loss": 4.126, "step": 1572 }, { "epoch": 0.5240276505371867, "grad_norm": 0.482421875, "learning_rate": 1.4980342346562147e-05, "loss": 4.1736, "step": 1573 }, { "epoch": 0.5243607895394353, "grad_norm": 0.49609375, "learning_rate": 1.4980283736494393e-05, "loss": 4.1379, "step": 1574 }, { "epoch": 0.5246939285416841, "grad_norm": 0.49609375, "learning_rate": 1.4980225039297537e-05, "loss": 4.1417, "step": 1575 }, { "epoch": 0.5250270675439327, "grad_norm": 0.484375, "learning_rate": 1.4980166254972262e-05, "loss": 4.1465, "step": 1576 }, { "epoch": 0.5253602065461814, "grad_norm": 0.50390625, "learning_rate": 1.498010738351925e-05, "loss": 4.2173, "step": 1577 }, { "epoch": 0.5256933455484301, "grad_norm": 0.484375, "learning_rate": 1.4980048424939191e-05, "loss": 4.1527, "step": 1578 }, { "epoch": 0.5260264845506788, "grad_norm": 0.474609375, "learning_rate": 1.4979989379232768e-05, "loss": 4.1457, "step": 1579 }, { "epoch": 0.5263596235529274, "grad_norm": 0.45703125, "learning_rate": 1.497993024640067e-05, "loss": 4.1642, "step": 1580 }, { "epoch": 0.5266927625551762, "grad_norm": 0.462890625, "learning_rate": 1.4979871026443589e-05, "loss": 4.1416, "step": 1581 }, { "epoch": 0.5270259015574248, "grad_norm": 0.494140625, "learning_rate": 1.4979811719362209e-05, "loss": 4.1232, "step": 1582 }, { "epoch": 0.5273590405596735, "grad_norm": 0.46484375, "learning_rate": 1.4979752325157224e-05, "loss": 4.2901, "step": 1583 }, { "epoch": 0.5276921795619222, "grad_norm": 0.45703125, "learning_rate": 1.4979692843829328e-05, "loss": 4.1278, "step": 1584 }, { "epoch": 0.5280253185641709, "grad_norm": 0.4921875, "learning_rate": 1.4979633275379208e-05, "loss": 4.2352, "step": 1585 }, { "epoch": 0.5283584575664196, "grad_norm": 0.5, "learning_rate": 1.4979573619807563e-05, "loss": 4.1564, "step": 1586 }, { "epoch": 0.5286915965686683, "grad_norm": 0.45703125, "learning_rate": 1.4979513877115087e-05, "loss": 4.2385, "step": 1587 }, { "epoch": 0.529024735570917, "grad_norm": 0.482421875, "learning_rate": 1.4979454047302474e-05, "loss": 4.1933, "step": 1588 }, { "epoch": 0.5293578745731656, "grad_norm": 0.458984375, "learning_rate": 1.4979394130370422e-05, "loss": 4.1145, "step": 1589 }, { "epoch": 0.5296910135754144, "grad_norm": 0.46875, "learning_rate": 1.4979334126319628e-05, "loss": 4.178, "step": 1590 }, { "epoch": 0.530024152577663, "grad_norm": 0.490234375, "learning_rate": 1.4979274035150791e-05, "loss": 4.1568, "step": 1591 }, { "epoch": 0.5303572915799117, "grad_norm": 0.46875, "learning_rate": 1.4979213856864614e-05, "loss": 4.144, "step": 1592 }, { "epoch": 0.5306904305821604, "grad_norm": 0.53125, "learning_rate": 1.4979153591461793e-05, "loss": 4.1454, "step": 1593 }, { "epoch": 0.5310235695844091, "grad_norm": 0.48828125, "learning_rate": 1.4979093238943034e-05, "loss": 4.1558, "step": 1594 }, { "epoch": 0.5313567085866577, "grad_norm": 0.53125, "learning_rate": 1.4979032799309038e-05, "loss": 4.1603, "step": 1595 }, { "epoch": 0.5316898475889065, "grad_norm": 0.494140625, "learning_rate": 1.497897227256051e-05, "loss": 4.1964, "step": 1596 }, { "epoch": 0.5320229865911552, "grad_norm": 0.46484375, "learning_rate": 1.4978911658698153e-05, "loss": 4.1776, "step": 1597 }, { "epoch": 0.5323561255934038, "grad_norm": 0.478515625, "learning_rate": 1.4978850957722676e-05, "loss": 4.1594, "step": 1598 }, { "epoch": 0.5326892645956526, "grad_norm": 0.48046875, "learning_rate": 1.4978790169634782e-05, "loss": 4.1552, "step": 1599 }, { "epoch": 0.5330224035979012, "grad_norm": 0.47265625, "learning_rate": 1.4978729294435185e-05, "loss": 4.254, "step": 1600 }, { "epoch": 0.53335554260015, "grad_norm": 0.490234375, "learning_rate": 1.4978668332124589e-05, "loss": 4.1569, "step": 1601 }, { "epoch": 0.5336886816023986, "grad_norm": 0.51171875, "learning_rate": 1.4978607282703707e-05, "loss": 4.1635, "step": 1602 }, { "epoch": 0.5340218206046473, "grad_norm": 0.51171875, "learning_rate": 1.4978546146173246e-05, "loss": 4.1817, "step": 1603 }, { "epoch": 0.5343549596068959, "grad_norm": 0.490234375, "learning_rate": 1.4978484922533924e-05, "loss": 4.2004, "step": 1604 }, { "epoch": 0.5346880986091447, "grad_norm": 0.484375, "learning_rate": 1.4978423611786448e-05, "loss": 4.126, "step": 1605 }, { "epoch": 0.5350212376113933, "grad_norm": 0.490234375, "learning_rate": 1.4978362213931539e-05, "loss": 4.1872, "step": 1606 }, { "epoch": 0.535354376613642, "grad_norm": 0.48046875, "learning_rate": 1.4978300728969904e-05, "loss": 4.2417, "step": 1607 }, { "epoch": 0.5356875156158908, "grad_norm": 0.478515625, "learning_rate": 1.4978239156902266e-05, "loss": 4.1827, "step": 1608 }, { "epoch": 0.5360206546181394, "grad_norm": 0.5546875, "learning_rate": 1.4978177497729339e-05, "loss": 4.1555, "step": 1609 }, { "epoch": 0.5363537936203882, "grad_norm": 0.50390625, "learning_rate": 1.4978115751451843e-05, "loss": 4.1539, "step": 1610 }, { "epoch": 0.5366869326226368, "grad_norm": 0.5, "learning_rate": 1.4978053918070495e-05, "loss": 4.1878, "step": 1611 }, { "epoch": 0.5370200716248855, "grad_norm": 0.466796875, "learning_rate": 1.4977991997586015e-05, "loss": 4.1548, "step": 1612 }, { "epoch": 0.5373532106271341, "grad_norm": 0.48046875, "learning_rate": 1.4977929989999126e-05, "loss": 4.1673, "step": 1613 }, { "epoch": 0.5376863496293829, "grad_norm": 0.49609375, "learning_rate": 1.4977867895310551e-05, "loss": 4.0679, "step": 1614 }, { "epoch": 0.5380194886316315, "grad_norm": 0.474609375, "learning_rate": 1.4977805713521013e-05, "loss": 4.1023, "step": 1615 }, { "epoch": 0.5383526276338803, "grad_norm": 0.46484375, "learning_rate": 1.4977743444631233e-05, "loss": 4.1689, "step": 1616 }, { "epoch": 0.5386857666361289, "grad_norm": 0.486328125, "learning_rate": 1.497768108864194e-05, "loss": 4.1649, "step": 1617 }, { "epoch": 0.5390189056383776, "grad_norm": 0.4921875, "learning_rate": 1.4977618645553858e-05, "loss": 4.2005, "step": 1618 }, { "epoch": 0.5393520446406262, "grad_norm": 0.498046875, "learning_rate": 1.4977556115367717e-05, "loss": 4.1459, "step": 1619 }, { "epoch": 0.539685183642875, "grad_norm": 0.4921875, "learning_rate": 1.4977493498084243e-05, "loss": 4.1364, "step": 1620 }, { "epoch": 0.5400183226451237, "grad_norm": 0.482421875, "learning_rate": 1.4977430793704165e-05, "loss": 4.182, "step": 1621 }, { "epoch": 0.5403514616473724, "grad_norm": 0.48828125, "learning_rate": 1.4977368002228216e-05, "loss": 4.1234, "step": 1622 }, { "epoch": 0.5406846006496211, "grad_norm": 0.466796875, "learning_rate": 1.4977305123657124e-05, "loss": 4.2084, "step": 1623 }, { "epoch": 0.5410177396518697, "grad_norm": 0.4921875, "learning_rate": 1.4977242157991622e-05, "loss": 4.1198, "step": 1624 }, { "epoch": 0.5413508786541185, "grad_norm": 0.478515625, "learning_rate": 1.4977179105232448e-05, "loss": 4.142, "step": 1625 }, { "epoch": 0.5416840176563671, "grad_norm": 0.49609375, "learning_rate": 1.4977115965380332e-05, "loss": 4.1631, "step": 1626 }, { "epoch": 0.5420171566586158, "grad_norm": 0.490234375, "learning_rate": 1.4977052738436008e-05, "loss": 4.2257, "step": 1627 }, { "epoch": 0.5423502956608645, "grad_norm": 0.49609375, "learning_rate": 1.4976989424400218e-05, "loss": 4.1776, "step": 1628 }, { "epoch": 0.5426834346631132, "grad_norm": 0.4765625, "learning_rate": 1.4976926023273694e-05, "loss": 4.1549, "step": 1629 }, { "epoch": 0.5430165736653618, "grad_norm": 0.455078125, "learning_rate": 1.4976862535057177e-05, "loss": 4.2348, "step": 1630 }, { "epoch": 0.5433497126676106, "grad_norm": 0.4765625, "learning_rate": 1.4976798959751408e-05, "loss": 4.2091, "step": 1631 }, { "epoch": 0.5436828516698593, "grad_norm": 0.5234375, "learning_rate": 1.4976735297357124e-05, "loss": 4.2057, "step": 1632 }, { "epoch": 0.5440159906721079, "grad_norm": 0.52734375, "learning_rate": 1.4976671547875068e-05, "loss": 4.0656, "step": 1633 }, { "epoch": 0.5443491296743567, "grad_norm": 0.466796875, "learning_rate": 1.4976607711305984e-05, "loss": 4.1341, "step": 1634 }, { "epoch": 0.5446822686766053, "grad_norm": 0.474609375, "learning_rate": 1.4976543787650616e-05, "loss": 4.1323, "step": 1635 }, { "epoch": 0.545015407678854, "grad_norm": 0.4765625, "learning_rate": 1.4976479776909704e-05, "loss": 4.1081, "step": 1636 }, { "epoch": 0.5453485466811027, "grad_norm": 0.47265625, "learning_rate": 1.4976415679084e-05, "loss": 4.0942, "step": 1637 }, { "epoch": 0.5456816856833514, "grad_norm": 0.494140625, "learning_rate": 1.4976351494174245e-05, "loss": 4.1465, "step": 1638 }, { "epoch": 0.5460148246856, "grad_norm": 0.4765625, "learning_rate": 1.497628722218119e-05, "loss": 4.1755, "step": 1639 }, { "epoch": 0.5463479636878488, "grad_norm": 0.50390625, "learning_rate": 1.497622286310558e-05, "loss": 4.2143, "step": 1640 }, { "epoch": 0.5466811026900974, "grad_norm": 0.466796875, "learning_rate": 1.497615841694817e-05, "loss": 4.217, "step": 1641 }, { "epoch": 0.5470142416923461, "grad_norm": 0.49609375, "learning_rate": 1.4976093883709706e-05, "loss": 4.1784, "step": 1642 }, { "epoch": 0.5473473806945948, "grad_norm": 0.490234375, "learning_rate": 1.4976029263390944e-05, "loss": 4.0739, "step": 1643 }, { "epoch": 0.5476805196968435, "grad_norm": 0.48046875, "learning_rate": 1.497596455599263e-05, "loss": 4.1526, "step": 1644 }, { "epoch": 0.5480136586990922, "grad_norm": 0.49609375, "learning_rate": 1.4975899761515524e-05, "loss": 4.1193, "step": 1645 }, { "epoch": 0.5483467977013409, "grad_norm": 0.474609375, "learning_rate": 1.4975834879960378e-05, "loss": 4.1499, "step": 1646 }, { "epoch": 0.5486799367035896, "grad_norm": 0.46875, "learning_rate": 1.497576991132795e-05, "loss": 4.1546, "step": 1647 }, { "epoch": 0.5490130757058382, "grad_norm": 0.48046875, "learning_rate": 1.4975704855618994e-05, "loss": 4.2808, "step": 1648 }, { "epoch": 0.549346214708087, "grad_norm": 0.515625, "learning_rate": 1.4975639712834268e-05, "loss": 4.1445, "step": 1649 }, { "epoch": 0.5496793537103356, "grad_norm": 0.46875, "learning_rate": 1.4975574482974532e-05, "loss": 4.1734, "step": 1650 }, { "epoch": 0.5500124927125843, "grad_norm": 0.484375, "learning_rate": 1.4975509166040547e-05, "loss": 4.2497, "step": 1651 }, { "epoch": 0.550345631714833, "grad_norm": 0.474609375, "learning_rate": 1.4975443762033069e-05, "loss": 4.1813, "step": 1652 }, { "epoch": 0.5506787707170817, "grad_norm": 0.494140625, "learning_rate": 1.4975378270952865e-05, "loss": 4.0787, "step": 1653 }, { "epoch": 0.5510119097193303, "grad_norm": 0.484375, "learning_rate": 1.4975312692800695e-05, "loss": 4.1334, "step": 1654 }, { "epoch": 0.5513450487215791, "grad_norm": 0.462890625, "learning_rate": 1.4975247027577325e-05, "loss": 4.158, "step": 1655 }, { "epoch": 0.5516781877238278, "grad_norm": 0.474609375, "learning_rate": 1.4975181275283517e-05, "loss": 4.1637, "step": 1656 }, { "epoch": 0.5520113267260764, "grad_norm": 0.462890625, "learning_rate": 1.4975115435920039e-05, "loss": 4.1908, "step": 1657 }, { "epoch": 0.5523444657283252, "grad_norm": 0.474609375, "learning_rate": 1.4975049509487659e-05, "loss": 4.1412, "step": 1658 }, { "epoch": 0.5526776047305738, "grad_norm": 0.5078125, "learning_rate": 1.497498349598714e-05, "loss": 4.0751, "step": 1659 }, { "epoch": 0.5530107437328226, "grad_norm": 0.4609375, "learning_rate": 1.4974917395419255e-05, "loss": 4.223, "step": 1660 }, { "epoch": 0.5533438827350712, "grad_norm": 0.4765625, "learning_rate": 1.4974851207784774e-05, "loss": 4.1534, "step": 1661 }, { "epoch": 0.5536770217373199, "grad_norm": 0.45703125, "learning_rate": 1.4974784933084467e-05, "loss": 4.2139, "step": 1662 }, { "epoch": 0.5540101607395685, "grad_norm": 0.447265625, "learning_rate": 1.4974718571319103e-05, "loss": 4.1558, "step": 1663 }, { "epoch": 0.5543432997418173, "grad_norm": 0.466796875, "learning_rate": 1.4974652122489461e-05, "loss": 4.2077, "step": 1664 }, { "epoch": 0.5546764387440659, "grad_norm": 0.443359375, "learning_rate": 1.4974585586596311e-05, "loss": 4.2204, "step": 1665 }, { "epoch": 0.5550095777463147, "grad_norm": 0.478515625, "learning_rate": 1.4974518963640429e-05, "loss": 4.1225, "step": 1666 }, { "epoch": 0.5553427167485634, "grad_norm": 0.470703125, "learning_rate": 1.4974452253622592e-05, "loss": 4.1961, "step": 1667 }, { "epoch": 0.555675855750812, "grad_norm": 0.484375, "learning_rate": 1.4974385456543573e-05, "loss": 4.166, "step": 1668 }, { "epoch": 0.5560089947530608, "grad_norm": 0.4609375, "learning_rate": 1.4974318572404155e-05, "loss": 4.1536, "step": 1669 }, { "epoch": 0.5563421337553094, "grad_norm": 0.4765625, "learning_rate": 1.4974251601205113e-05, "loss": 4.1719, "step": 1670 }, { "epoch": 0.5566752727575581, "grad_norm": 0.46484375, "learning_rate": 1.4974184542947232e-05, "loss": 4.1457, "step": 1671 }, { "epoch": 0.5570084117598068, "grad_norm": 0.498046875, "learning_rate": 1.497411739763129e-05, "loss": 4.1303, "step": 1672 }, { "epoch": 0.5573415507620555, "grad_norm": 0.466796875, "learning_rate": 1.4974050165258067e-05, "loss": 4.1917, "step": 1673 }, { "epoch": 0.5576746897643041, "grad_norm": 0.486328125, "learning_rate": 1.4973982845828348e-05, "loss": 4.1175, "step": 1674 }, { "epoch": 0.5580078287665529, "grad_norm": 0.478515625, "learning_rate": 1.497391543934292e-05, "loss": 4.1745, "step": 1675 }, { "epoch": 0.5583409677688015, "grad_norm": 0.486328125, "learning_rate": 1.4973847945802563e-05, "loss": 4.1691, "step": 1676 }, { "epoch": 0.5586741067710502, "grad_norm": 0.515625, "learning_rate": 1.4973780365208066e-05, "loss": 4.1484, "step": 1677 }, { "epoch": 0.5590072457732989, "grad_norm": 0.46875, "learning_rate": 1.4973712697560218e-05, "loss": 4.1812, "step": 1678 }, { "epoch": 0.5593403847755476, "grad_norm": 0.4765625, "learning_rate": 1.4973644942859804e-05, "loss": 4.1967, "step": 1679 }, { "epoch": 0.5596735237777963, "grad_norm": 0.484375, "learning_rate": 1.4973577101107615e-05, "loss": 4.1352, "step": 1680 }, { "epoch": 0.560006662780045, "grad_norm": 0.482421875, "learning_rate": 1.497350917230444e-05, "loss": 4.1822, "step": 1681 }, { "epoch": 0.5603398017822937, "grad_norm": 0.47265625, "learning_rate": 1.497344115645107e-05, "loss": 4.1551, "step": 1682 }, { "epoch": 0.5606729407845423, "grad_norm": 0.474609375, "learning_rate": 1.4973373053548299e-05, "loss": 4.1466, "step": 1683 }, { "epoch": 0.5610060797867911, "grad_norm": 0.46875, "learning_rate": 1.4973304863596917e-05, "loss": 4.1296, "step": 1684 }, { "epoch": 0.5613392187890397, "grad_norm": 0.482421875, "learning_rate": 1.4973236586597722e-05, "loss": 4.1032, "step": 1685 }, { "epoch": 0.5616723577912884, "grad_norm": 0.48046875, "learning_rate": 1.4973168222551508e-05, "loss": 4.1485, "step": 1686 }, { "epoch": 0.5620054967935371, "grad_norm": 0.482421875, "learning_rate": 1.4973099771459071e-05, "loss": 4.2283, "step": 1687 }, { "epoch": 0.5623386357957858, "grad_norm": 0.46875, "learning_rate": 1.4973031233321209e-05, "loss": 4.1937, "step": 1688 }, { "epoch": 0.5626717747980344, "grad_norm": 0.494140625, "learning_rate": 1.4972962608138719e-05, "loss": 4.1201, "step": 1689 }, { "epoch": 0.5630049138002832, "grad_norm": 0.474609375, "learning_rate": 1.4972893895912403e-05, "loss": 4.1668, "step": 1690 }, { "epoch": 0.5633380528025319, "grad_norm": 0.46875, "learning_rate": 1.4972825096643056e-05, "loss": 4.2038, "step": 1691 }, { "epoch": 0.5636711918047805, "grad_norm": 0.486328125, "learning_rate": 1.4972756210331484e-05, "loss": 4.1176, "step": 1692 }, { "epoch": 0.5640043308070293, "grad_norm": 0.458984375, "learning_rate": 1.4972687236978488e-05, "loss": 4.2143, "step": 1693 }, { "epoch": 0.5643374698092779, "grad_norm": 0.484375, "learning_rate": 1.4972618176584872e-05, "loss": 4.2311, "step": 1694 }, { "epoch": 0.5646706088115266, "grad_norm": 0.494140625, "learning_rate": 1.4972549029151442e-05, "loss": 4.1753, "step": 1695 }, { "epoch": 0.5650037478137753, "grad_norm": 0.48828125, "learning_rate": 1.4972479794678996e-05, "loss": 4.1338, "step": 1696 }, { "epoch": 0.565336886816024, "grad_norm": 0.484375, "learning_rate": 1.4972410473168352e-05, "loss": 4.1525, "step": 1697 }, { "epoch": 0.5656700258182726, "grad_norm": 0.46875, "learning_rate": 1.4972341064620306e-05, "loss": 4.2103, "step": 1698 }, { "epoch": 0.5660031648205214, "grad_norm": 0.46484375, "learning_rate": 1.4972271569035676e-05, "loss": 4.1675, "step": 1699 }, { "epoch": 0.56633630382277, "grad_norm": 0.455078125, "learning_rate": 1.4972201986415264e-05, "loss": 4.2307, "step": 1700 }, { "epoch": 0.5666694428250187, "grad_norm": 0.462890625, "learning_rate": 1.4972132316759884e-05, "loss": 4.1609, "step": 1701 }, { "epoch": 0.5670025818272675, "grad_norm": 0.46875, "learning_rate": 1.4972062560070348e-05, "loss": 4.1629, "step": 1702 }, { "epoch": 0.5673357208295161, "grad_norm": 0.48046875, "learning_rate": 1.4971992716347468e-05, "loss": 4.1683, "step": 1703 }, { "epoch": 0.5676688598317648, "grad_norm": 0.490234375, "learning_rate": 1.4971922785592055e-05, "loss": 4.0815, "step": 1704 }, { "epoch": 0.5680019988340135, "grad_norm": 0.4765625, "learning_rate": 1.4971852767804928e-05, "loss": 4.1392, "step": 1705 }, { "epoch": 0.5683351378362622, "grad_norm": 0.5, "learning_rate": 1.4971782662986898e-05, "loss": 4.1906, "step": 1706 }, { "epoch": 0.5686682768385108, "grad_norm": 0.486328125, "learning_rate": 1.4971712471138786e-05, "loss": 4.1458, "step": 1707 }, { "epoch": 0.5690014158407596, "grad_norm": 0.470703125, "learning_rate": 1.4971642192261408e-05, "loss": 4.1765, "step": 1708 }, { "epoch": 0.5693345548430082, "grad_norm": 0.478515625, "learning_rate": 1.497157182635558e-05, "loss": 4.1527, "step": 1709 }, { "epoch": 0.569667693845257, "grad_norm": 0.47265625, "learning_rate": 1.4971501373422123e-05, "loss": 4.1607, "step": 1710 }, { "epoch": 0.5700008328475056, "grad_norm": 0.5234375, "learning_rate": 1.497143083346186e-05, "loss": 4.1938, "step": 1711 }, { "epoch": 0.5703339718497543, "grad_norm": 0.50390625, "learning_rate": 1.497136020647561e-05, "loss": 4.1336, "step": 1712 }, { "epoch": 0.570667110852003, "grad_norm": 0.5234375, "learning_rate": 1.4971289492464197e-05, "loss": 4.1317, "step": 1713 }, { "epoch": 0.5710002498542517, "grad_norm": 0.486328125, "learning_rate": 1.4971218691428445e-05, "loss": 4.1655, "step": 1714 }, { "epoch": 0.5713333888565004, "grad_norm": 0.474609375, "learning_rate": 1.4971147803369178e-05, "loss": 4.1811, "step": 1715 }, { "epoch": 0.571666527858749, "grad_norm": 0.50390625, "learning_rate": 1.497107682828722e-05, "loss": 4.1051, "step": 1716 }, { "epoch": 0.5719996668609978, "grad_norm": 0.490234375, "learning_rate": 1.49710057661834e-05, "loss": 4.1277, "step": 1717 }, { "epoch": 0.5723328058632464, "grad_norm": 0.484375, "learning_rate": 1.4970934617058544e-05, "loss": 4.1108, "step": 1718 }, { "epoch": 0.5726659448654952, "grad_norm": 0.4921875, "learning_rate": 1.4970863380913483e-05, "loss": 4.0942, "step": 1719 }, { "epoch": 0.5729990838677438, "grad_norm": 0.53515625, "learning_rate": 1.4970792057749045e-05, "loss": 4.1486, "step": 1720 }, { "epoch": 0.5733322228699925, "grad_norm": 0.5078125, "learning_rate": 1.4970720647566062e-05, "loss": 4.1637, "step": 1721 }, { "epoch": 0.5736653618722412, "grad_norm": 0.470703125, "learning_rate": 1.4970649150365363e-05, "loss": 4.1817, "step": 1722 }, { "epoch": 0.5739985008744899, "grad_norm": 0.49609375, "learning_rate": 1.4970577566147785e-05, "loss": 4.1446, "step": 1723 }, { "epoch": 0.5743316398767385, "grad_norm": 0.46484375, "learning_rate": 1.4970505894914157e-05, "loss": 4.199, "step": 1724 }, { "epoch": 0.5746647788789873, "grad_norm": 0.470703125, "learning_rate": 1.497043413666532e-05, "loss": 4.1287, "step": 1725 }, { "epoch": 0.574997917881236, "grad_norm": 0.5, "learning_rate": 1.4970362291402101e-05, "loss": 4.2315, "step": 1726 }, { "epoch": 0.5753310568834846, "grad_norm": 0.4921875, "learning_rate": 1.4970290359125346e-05, "loss": 4.1755, "step": 1727 }, { "epoch": 0.5756641958857334, "grad_norm": 0.50390625, "learning_rate": 1.497021833983589e-05, "loss": 4.1169, "step": 1728 }, { "epoch": 0.575997334887982, "grad_norm": 0.470703125, "learning_rate": 1.4970146233534568e-05, "loss": 4.1817, "step": 1729 }, { "epoch": 0.5763304738902307, "grad_norm": 0.4921875, "learning_rate": 1.4970074040222225e-05, "loss": 4.1094, "step": 1730 }, { "epoch": 0.5766636128924794, "grad_norm": 0.484375, "learning_rate": 1.49700017598997e-05, "loss": 4.1319, "step": 1731 }, { "epoch": 0.5769967518947281, "grad_norm": 0.49609375, "learning_rate": 1.4969929392567831e-05, "loss": 4.1314, "step": 1732 }, { "epoch": 0.5773298908969767, "grad_norm": 0.50390625, "learning_rate": 1.4969856938227467e-05, "loss": 4.2536, "step": 1733 }, { "epoch": 0.5776630298992255, "grad_norm": 0.5078125, "learning_rate": 1.4969784396879449e-05, "loss": 4.1762, "step": 1734 }, { "epoch": 0.5779961689014741, "grad_norm": 0.46484375, "learning_rate": 1.4969711768524621e-05, "loss": 4.2577, "step": 1735 }, { "epoch": 0.5783293079037228, "grad_norm": 0.498046875, "learning_rate": 1.4969639053163831e-05, "loss": 4.1319, "step": 1736 }, { "epoch": 0.5786624469059715, "grad_norm": 0.490234375, "learning_rate": 1.4969566250797926e-05, "loss": 4.143, "step": 1737 }, { "epoch": 0.5789955859082202, "grad_norm": 0.494140625, "learning_rate": 1.4969493361427751e-05, "loss": 4.2142, "step": 1738 }, { "epoch": 0.5793287249104689, "grad_norm": 0.515625, "learning_rate": 1.4969420385054162e-05, "loss": 4.1793, "step": 1739 }, { "epoch": 0.5796618639127176, "grad_norm": 0.484375, "learning_rate": 1.4969347321678e-05, "loss": 4.1122, "step": 1740 }, { "epoch": 0.5799950029149663, "grad_norm": 0.490234375, "learning_rate": 1.4969274171300121e-05, "loss": 4.1645, "step": 1741 }, { "epoch": 0.5803281419172149, "grad_norm": 0.46484375, "learning_rate": 1.496920093392138e-05, "loss": 4.2121, "step": 1742 }, { "epoch": 0.5806612809194637, "grad_norm": 0.484375, "learning_rate": 1.496912760954262e-05, "loss": 4.1387, "step": 1743 }, { "epoch": 0.5809944199217123, "grad_norm": 0.48828125, "learning_rate": 1.4969054198164706e-05, "loss": 4.1436, "step": 1744 }, { "epoch": 0.581327558923961, "grad_norm": 0.5234375, "learning_rate": 1.4968980699788486e-05, "loss": 4.1537, "step": 1745 }, { "epoch": 0.5816606979262097, "grad_norm": 0.48046875, "learning_rate": 1.4968907114414821e-05, "loss": 4.1049, "step": 1746 }, { "epoch": 0.5819938369284584, "grad_norm": 0.4921875, "learning_rate": 1.4968833442044564e-05, "loss": 4.1727, "step": 1747 }, { "epoch": 0.582326975930707, "grad_norm": 0.4765625, "learning_rate": 1.4968759682678575e-05, "loss": 4.0813, "step": 1748 }, { "epoch": 0.5826601149329558, "grad_norm": 0.455078125, "learning_rate": 1.4968685836317715e-05, "loss": 4.1963, "step": 1749 }, { "epoch": 0.5829932539352045, "grad_norm": 0.4765625, "learning_rate": 1.496861190296284e-05, "loss": 4.1339, "step": 1750 }, { "epoch": 0.5833263929374531, "grad_norm": 0.46484375, "learning_rate": 1.4968537882614815e-05, "loss": 4.2041, "step": 1751 }, { "epoch": 0.5836595319397019, "grad_norm": 0.52734375, "learning_rate": 1.4968463775274497e-05, "loss": 4.1311, "step": 1752 }, { "epoch": 0.5839926709419505, "grad_norm": 0.490234375, "learning_rate": 1.4968389580942757e-05, "loss": 4.1328, "step": 1753 }, { "epoch": 0.5843258099441992, "grad_norm": 0.47265625, "learning_rate": 1.4968315299620453e-05, "loss": 4.0655, "step": 1754 }, { "epoch": 0.5846589489464479, "grad_norm": 0.482421875, "learning_rate": 1.4968240931308451e-05, "loss": 4.1587, "step": 1755 }, { "epoch": 0.5849920879486966, "grad_norm": 0.486328125, "learning_rate": 1.4968166476007619e-05, "loss": 4.1861, "step": 1756 }, { "epoch": 0.5853252269509452, "grad_norm": 0.470703125, "learning_rate": 1.4968091933718825e-05, "loss": 4.1494, "step": 1757 }, { "epoch": 0.585658365953194, "grad_norm": 0.48828125, "learning_rate": 1.4968017304442934e-05, "loss": 4.1553, "step": 1758 }, { "epoch": 0.5859915049554426, "grad_norm": 0.474609375, "learning_rate": 1.4967942588180818e-05, "loss": 4.0898, "step": 1759 }, { "epoch": 0.5863246439576913, "grad_norm": 0.4765625, "learning_rate": 1.4967867784933346e-05, "loss": 4.0976, "step": 1760 }, { "epoch": 0.5866577829599401, "grad_norm": 0.490234375, "learning_rate": 1.4967792894701389e-05, "loss": 4.1332, "step": 1761 }, { "epoch": 0.5869909219621887, "grad_norm": 0.51171875, "learning_rate": 1.4967717917485823e-05, "loss": 4.0485, "step": 1762 }, { "epoch": 0.5873240609644375, "grad_norm": 0.47265625, "learning_rate": 1.4967642853287517e-05, "loss": 4.1694, "step": 1763 }, { "epoch": 0.5876571999666861, "grad_norm": 0.4921875, "learning_rate": 1.4967567702107345e-05, "loss": 4.1066, "step": 1764 }, { "epoch": 0.5879903389689348, "grad_norm": 0.482421875, "learning_rate": 1.4967492463946185e-05, "loss": 4.1444, "step": 1765 }, { "epoch": 0.5883234779711835, "grad_norm": 0.49609375, "learning_rate": 1.4967417138804912e-05, "loss": 4.2032, "step": 1766 }, { "epoch": 0.5886566169734322, "grad_norm": 0.47265625, "learning_rate": 1.4967341726684404e-05, "loss": 4.1981, "step": 1767 }, { "epoch": 0.5889897559756808, "grad_norm": 0.466796875, "learning_rate": 1.4967266227585539e-05, "loss": 4.155, "step": 1768 }, { "epoch": 0.5893228949779296, "grad_norm": 0.462890625, "learning_rate": 1.4967190641509197e-05, "loss": 4.0996, "step": 1769 }, { "epoch": 0.5896560339801782, "grad_norm": 0.484375, "learning_rate": 1.4967114968456258e-05, "loss": 4.1467, "step": 1770 }, { "epoch": 0.5899891729824269, "grad_norm": 0.48046875, "learning_rate": 1.4967039208427603e-05, "loss": 4.1782, "step": 1771 }, { "epoch": 0.5903223119846756, "grad_norm": 0.49609375, "learning_rate": 1.4966963361424115e-05, "loss": 4.1295, "step": 1772 }, { "epoch": 0.5906554509869243, "grad_norm": 0.48046875, "learning_rate": 1.4966887427446676e-05, "loss": 4.1847, "step": 1773 }, { "epoch": 0.590988589989173, "grad_norm": 0.48046875, "learning_rate": 1.4966811406496174e-05, "loss": 4.1288, "step": 1774 }, { "epoch": 0.5913217289914217, "grad_norm": 0.48828125, "learning_rate": 1.4966735298573489e-05, "loss": 4.1333, "step": 1775 }, { "epoch": 0.5916548679936704, "grad_norm": 0.4921875, "learning_rate": 1.4966659103679514e-05, "loss": 4.1453, "step": 1776 }, { "epoch": 0.591988006995919, "grad_norm": 0.49609375, "learning_rate": 1.4966582821815132e-05, "loss": 4.0928, "step": 1777 }, { "epoch": 0.5923211459981678, "grad_norm": 0.48046875, "learning_rate": 1.4966506452981234e-05, "loss": 4.1747, "step": 1778 }, { "epoch": 0.5926542850004164, "grad_norm": 0.474609375, "learning_rate": 1.4966429997178704e-05, "loss": 4.1569, "step": 1779 }, { "epoch": 0.5929874240026651, "grad_norm": 0.5, "learning_rate": 1.496635345440844e-05, "loss": 4.0889, "step": 1780 }, { "epoch": 0.5933205630049138, "grad_norm": 0.451171875, "learning_rate": 1.496627682467133e-05, "loss": 4.2064, "step": 1781 }, { "epoch": 0.5936537020071625, "grad_norm": 0.5078125, "learning_rate": 1.4966200107968267e-05, "loss": 4.1679, "step": 1782 }, { "epoch": 0.5939868410094111, "grad_norm": 0.46875, "learning_rate": 1.4966123304300144e-05, "loss": 4.1603, "step": 1783 }, { "epoch": 0.5943199800116599, "grad_norm": 0.486328125, "learning_rate": 1.4966046413667857e-05, "loss": 4.0866, "step": 1784 }, { "epoch": 0.5946531190139086, "grad_norm": 0.498046875, "learning_rate": 1.4965969436072301e-05, "loss": 4.0901, "step": 1785 }, { "epoch": 0.5949862580161572, "grad_norm": 0.474609375, "learning_rate": 1.496589237151437e-05, "loss": 4.2229, "step": 1786 }, { "epoch": 0.595319397018406, "grad_norm": 0.48046875, "learning_rate": 1.4965815219994967e-05, "loss": 4.1669, "step": 1787 }, { "epoch": 0.5956525360206546, "grad_norm": 0.470703125, "learning_rate": 1.4965737981514985e-05, "loss": 4.1312, "step": 1788 }, { "epoch": 0.5959856750229033, "grad_norm": 0.470703125, "learning_rate": 1.4965660656075328e-05, "loss": 4.1315, "step": 1789 }, { "epoch": 0.596318814025152, "grad_norm": 0.515625, "learning_rate": 1.4965583243676894e-05, "loss": 4.0738, "step": 1790 }, { "epoch": 0.5966519530274007, "grad_norm": 0.515625, "learning_rate": 1.4965505744320587e-05, "loss": 4.1927, "step": 1791 }, { "epoch": 0.5969850920296493, "grad_norm": 0.52734375, "learning_rate": 1.4965428158007305e-05, "loss": 4.2121, "step": 1792 }, { "epoch": 0.5973182310318981, "grad_norm": 0.494140625, "learning_rate": 1.4965350484737959e-05, "loss": 4.1572, "step": 1793 }, { "epoch": 0.5976513700341467, "grad_norm": 0.49609375, "learning_rate": 1.4965272724513449e-05, "loss": 4.1416, "step": 1794 }, { "epoch": 0.5979845090363954, "grad_norm": 0.498046875, "learning_rate": 1.496519487733468e-05, "loss": 4.2083, "step": 1795 }, { "epoch": 0.5983176480386442, "grad_norm": 0.51171875, "learning_rate": 1.4965116943202561e-05, "loss": 4.1609, "step": 1796 }, { "epoch": 0.5986507870408928, "grad_norm": 0.51171875, "learning_rate": 1.4965038922117999e-05, "loss": 4.1444, "step": 1797 }, { "epoch": 0.5989839260431415, "grad_norm": 0.478515625, "learning_rate": 1.4964960814081902e-05, "loss": 4.1247, "step": 1798 }, { "epoch": 0.5993170650453902, "grad_norm": 0.494140625, "learning_rate": 1.4964882619095182e-05, "loss": 4.1025, "step": 1799 }, { "epoch": 0.5996502040476389, "grad_norm": 0.46875, "learning_rate": 1.4964804337158748e-05, "loss": 4.1699, "step": 1800 }, { "epoch": 0.5999833430498875, "grad_norm": 0.5078125, "learning_rate": 1.496472596827351e-05, "loss": 4.1365, "step": 1801 }, { "epoch": 0.6003164820521363, "grad_norm": 0.49609375, "learning_rate": 1.4964647512440387e-05, "loss": 4.1306, "step": 1802 }, { "epoch": 0.6006496210543849, "grad_norm": 0.48046875, "learning_rate": 1.4964568969660287e-05, "loss": 4.1549, "step": 1803 }, { "epoch": 0.6009827600566336, "grad_norm": 0.5, "learning_rate": 1.4964490339934127e-05, "loss": 4.1093, "step": 1804 }, { "epoch": 0.6013158990588823, "grad_norm": 0.486328125, "learning_rate": 1.4964411623262821e-05, "loss": 4.1131, "step": 1805 }, { "epoch": 0.601649038061131, "grad_norm": 0.50390625, "learning_rate": 1.4964332819647289e-05, "loss": 4.0777, "step": 1806 }, { "epoch": 0.6019821770633796, "grad_norm": 0.474609375, "learning_rate": 1.4964253929088447e-05, "loss": 4.1436, "step": 1807 }, { "epoch": 0.6023153160656284, "grad_norm": 0.494140625, "learning_rate": 1.4964174951587215e-05, "loss": 4.1526, "step": 1808 }, { "epoch": 0.6026484550678771, "grad_norm": 0.498046875, "learning_rate": 1.496409588714451e-05, "loss": 4.0756, "step": 1809 }, { "epoch": 0.6029815940701257, "grad_norm": 0.4765625, "learning_rate": 1.4964016735761258e-05, "loss": 4.1552, "step": 1810 }, { "epoch": 0.6033147330723745, "grad_norm": 0.474609375, "learning_rate": 1.4963937497438374e-05, "loss": 4.1083, "step": 1811 }, { "epoch": 0.6036478720746231, "grad_norm": 0.474609375, "learning_rate": 1.4963858172176788e-05, "loss": 4.2253, "step": 1812 }, { "epoch": 0.6039810110768719, "grad_norm": 0.486328125, "learning_rate": 1.4963778759977422e-05, "loss": 4.155, "step": 1813 }, { "epoch": 0.6043141500791205, "grad_norm": 0.515625, "learning_rate": 1.4963699260841198e-05, "loss": 4.126, "step": 1814 }, { "epoch": 0.6046472890813692, "grad_norm": 0.50390625, "learning_rate": 1.4963619674769045e-05, "loss": 4.1446, "step": 1815 }, { "epoch": 0.6049804280836178, "grad_norm": 0.48828125, "learning_rate": 1.4963540001761889e-05, "loss": 4.1009, "step": 1816 }, { "epoch": 0.6053135670858666, "grad_norm": 0.48046875, "learning_rate": 1.4963460241820658e-05, "loss": 4.1503, "step": 1817 }, { "epoch": 0.6056467060881152, "grad_norm": 0.490234375, "learning_rate": 1.496338039494628e-05, "loss": 4.1572, "step": 1818 }, { "epoch": 0.605979845090364, "grad_norm": 0.478515625, "learning_rate": 1.4963300461139686e-05, "loss": 4.1718, "step": 1819 }, { "epoch": 0.6063129840926127, "grad_norm": 0.48046875, "learning_rate": 1.4963220440401808e-05, "loss": 4.1381, "step": 1820 }, { "epoch": 0.6066461230948613, "grad_norm": 0.53125, "learning_rate": 1.4963140332733578e-05, "loss": 4.2361, "step": 1821 }, { "epoch": 0.6069792620971101, "grad_norm": 0.455078125, "learning_rate": 1.4963060138135925e-05, "loss": 4.2292, "step": 1822 }, { "epoch": 0.6073124010993587, "grad_norm": 0.482421875, "learning_rate": 1.496297985660979e-05, "loss": 4.1047, "step": 1823 }, { "epoch": 0.6076455401016074, "grad_norm": 0.4609375, "learning_rate": 1.4962899488156103e-05, "loss": 4.13, "step": 1824 }, { "epoch": 0.6079786791038561, "grad_norm": 0.490234375, "learning_rate": 1.4962819032775801e-05, "loss": 4.0558, "step": 1825 }, { "epoch": 0.6083118181061048, "grad_norm": 0.490234375, "learning_rate": 1.4962738490469824e-05, "loss": 4.2179, "step": 1826 }, { "epoch": 0.6086449571083534, "grad_norm": 0.48046875, "learning_rate": 1.4962657861239105e-05, "loss": 4.1356, "step": 1827 }, { "epoch": 0.6089780961106022, "grad_norm": 0.474609375, "learning_rate": 1.4962577145084589e-05, "loss": 4.1791, "step": 1828 }, { "epoch": 0.6093112351128508, "grad_norm": 0.46875, "learning_rate": 1.496249634200721e-05, "loss": 4.0993, "step": 1829 }, { "epoch": 0.6096443741150995, "grad_norm": 0.474609375, "learning_rate": 1.4962415452007914e-05, "loss": 4.1119, "step": 1830 }, { "epoch": 0.6099775131173483, "grad_norm": 0.5, "learning_rate": 1.4962334475087641e-05, "loss": 4.1604, "step": 1831 }, { "epoch": 0.6103106521195969, "grad_norm": 0.47265625, "learning_rate": 1.4962253411247334e-05, "loss": 4.1764, "step": 1832 }, { "epoch": 0.6106437911218456, "grad_norm": 0.48828125, "learning_rate": 1.4962172260487938e-05, "loss": 4.1294, "step": 1833 }, { "epoch": 0.6109769301240943, "grad_norm": 0.474609375, "learning_rate": 1.49620910228104e-05, "loss": 4.0896, "step": 1834 }, { "epoch": 0.611310069126343, "grad_norm": 0.490234375, "learning_rate": 1.4962009698215665e-05, "loss": 4.1113, "step": 1835 }, { "epoch": 0.6116432081285916, "grad_norm": 0.462890625, "learning_rate": 1.4961928286704677e-05, "loss": 4.1207, "step": 1836 }, { "epoch": 0.6119763471308404, "grad_norm": 0.48046875, "learning_rate": 1.4961846788278389e-05, "loss": 4.1402, "step": 1837 }, { "epoch": 0.612309486133089, "grad_norm": 0.5078125, "learning_rate": 1.4961765202937746e-05, "loss": 4.1869, "step": 1838 }, { "epoch": 0.6126426251353377, "grad_norm": 0.48828125, "learning_rate": 1.4961683530683702e-05, "loss": 4.1396, "step": 1839 }, { "epoch": 0.6129757641375864, "grad_norm": 0.474609375, "learning_rate": 1.4961601771517207e-05, "loss": 4.2027, "step": 1840 }, { "epoch": 0.6133089031398351, "grad_norm": 0.47265625, "learning_rate": 1.4961519925439214e-05, "loss": 4.232, "step": 1841 }, { "epoch": 0.6136420421420837, "grad_norm": 0.494140625, "learning_rate": 1.4961437992450674e-05, "loss": 4.0741, "step": 1842 }, { "epoch": 0.6139751811443325, "grad_norm": 0.5078125, "learning_rate": 1.4961355972552543e-05, "loss": 4.1475, "step": 1843 }, { "epoch": 0.6143083201465812, "grad_norm": 0.51171875, "learning_rate": 1.4961273865745778e-05, "loss": 4.1496, "step": 1844 }, { "epoch": 0.6146414591488298, "grad_norm": 0.49609375, "learning_rate": 1.4961191672031331e-05, "loss": 4.1126, "step": 1845 }, { "epoch": 0.6149745981510786, "grad_norm": 0.498046875, "learning_rate": 1.4961109391410165e-05, "loss": 4.1743, "step": 1846 }, { "epoch": 0.6153077371533272, "grad_norm": 0.47265625, "learning_rate": 1.4961027023883232e-05, "loss": 4.1522, "step": 1847 }, { "epoch": 0.615640876155576, "grad_norm": 0.48828125, "learning_rate": 1.4960944569451497e-05, "loss": 4.1815, "step": 1848 }, { "epoch": 0.6159740151578246, "grad_norm": 0.482421875, "learning_rate": 1.4960862028115917e-05, "loss": 4.1376, "step": 1849 }, { "epoch": 0.6163071541600733, "grad_norm": 0.470703125, "learning_rate": 1.4960779399877454e-05, "loss": 4.1194, "step": 1850 }, { "epoch": 0.6166402931623219, "grad_norm": 0.470703125, "learning_rate": 1.4960696684737072e-05, "loss": 4.0731, "step": 1851 }, { "epoch": 0.6169734321645707, "grad_norm": 0.5078125, "learning_rate": 1.4960613882695734e-05, "loss": 4.1664, "step": 1852 }, { "epoch": 0.6173065711668193, "grad_norm": 0.46484375, "learning_rate": 1.4960530993754403e-05, "loss": 4.1729, "step": 1853 }, { "epoch": 0.617639710169068, "grad_norm": 0.47265625, "learning_rate": 1.4960448017914045e-05, "loss": 4.0861, "step": 1854 }, { "epoch": 0.6179728491713168, "grad_norm": 0.458984375, "learning_rate": 1.4960364955175629e-05, "loss": 4.1304, "step": 1855 }, { "epoch": 0.6183059881735654, "grad_norm": 0.46875, "learning_rate": 1.4960281805540118e-05, "loss": 4.1312, "step": 1856 }, { "epoch": 0.6186391271758142, "grad_norm": 0.48046875, "learning_rate": 1.4960198569008484e-05, "loss": 4.1323, "step": 1857 }, { "epoch": 0.6189722661780628, "grad_norm": 0.455078125, "learning_rate": 1.4960115245581696e-05, "loss": 4.1336, "step": 1858 }, { "epoch": 0.6193054051803115, "grad_norm": 0.478515625, "learning_rate": 1.4960031835260723e-05, "loss": 4.1638, "step": 1859 }, { "epoch": 0.6196385441825601, "grad_norm": 0.478515625, "learning_rate": 1.4959948338046536e-05, "loss": 4.1371, "step": 1860 }, { "epoch": 0.6199716831848089, "grad_norm": 0.478515625, "learning_rate": 1.4959864753940112e-05, "loss": 4.2156, "step": 1861 }, { "epoch": 0.6203048221870575, "grad_norm": 0.486328125, "learning_rate": 1.495978108294242e-05, "loss": 4.1361, "step": 1862 }, { "epoch": 0.6206379611893063, "grad_norm": 0.466796875, "learning_rate": 1.4959697325054438e-05, "loss": 4.1655, "step": 1863 }, { "epoch": 0.6209711001915549, "grad_norm": 0.49609375, "learning_rate": 1.4959613480277136e-05, "loss": 4.1798, "step": 1864 }, { "epoch": 0.6213042391938036, "grad_norm": 0.478515625, "learning_rate": 1.4959529548611499e-05, "loss": 4.1448, "step": 1865 }, { "epoch": 0.6216373781960522, "grad_norm": 0.51953125, "learning_rate": 1.4959445530058498e-05, "loss": 4.0979, "step": 1866 }, { "epoch": 0.621970517198301, "grad_norm": 0.48046875, "learning_rate": 1.4959361424619115e-05, "loss": 4.1069, "step": 1867 }, { "epoch": 0.6223036562005497, "grad_norm": 0.5234375, "learning_rate": 1.4959277232294327e-05, "loss": 4.1472, "step": 1868 }, { "epoch": 0.6226367952027984, "grad_norm": 0.494140625, "learning_rate": 1.4959192953085115e-05, "loss": 4.1396, "step": 1869 }, { "epoch": 0.6229699342050471, "grad_norm": 0.470703125, "learning_rate": 1.4959108586992463e-05, "loss": 4.0922, "step": 1870 }, { "epoch": 0.6233030732072957, "grad_norm": 0.484375, "learning_rate": 1.4959024134017352e-05, "loss": 4.1549, "step": 1871 }, { "epoch": 0.6236362122095445, "grad_norm": 0.515625, "learning_rate": 1.4958939594160768e-05, "loss": 4.1142, "step": 1872 }, { "epoch": 0.6239693512117931, "grad_norm": 0.48828125, "learning_rate": 1.4958854967423689e-05, "loss": 4.1758, "step": 1873 }, { "epoch": 0.6243024902140418, "grad_norm": 0.474609375, "learning_rate": 1.4958770253807112e-05, "loss": 4.1733, "step": 1874 }, { "epoch": 0.6246356292162905, "grad_norm": 0.4921875, "learning_rate": 1.4958685453312011e-05, "loss": 4.1367, "step": 1875 }, { "epoch": 0.6249687682185392, "grad_norm": 0.498046875, "learning_rate": 1.4958600565939385e-05, "loss": 4.1599, "step": 1876 }, { "epoch": 0.6253019072207878, "grad_norm": 0.46484375, "learning_rate": 1.4958515591690214e-05, "loss": 4.2039, "step": 1877 }, { "epoch": 0.6256350462230366, "grad_norm": 0.5, "learning_rate": 1.4958430530565493e-05, "loss": 4.142, "step": 1878 }, { "epoch": 0.6259681852252853, "grad_norm": 0.48046875, "learning_rate": 1.4958345382566212e-05, "loss": 4.1153, "step": 1879 }, { "epoch": 0.6263013242275339, "grad_norm": 0.49609375, "learning_rate": 1.495826014769336e-05, "loss": 4.1508, "step": 1880 }, { "epoch": 0.6266344632297827, "grad_norm": 0.486328125, "learning_rate": 1.4958174825947933e-05, "loss": 4.1715, "step": 1881 }, { "epoch": 0.6269676022320313, "grad_norm": 0.50390625, "learning_rate": 1.4958089417330924e-05, "loss": 4.1874, "step": 1882 }, { "epoch": 0.62730074123428, "grad_norm": 0.50390625, "learning_rate": 1.4958003921843329e-05, "loss": 4.2044, "step": 1883 }, { "epoch": 0.6276338802365287, "grad_norm": 0.478515625, "learning_rate": 1.4957918339486139e-05, "loss": 4.1505, "step": 1884 }, { "epoch": 0.6279670192387774, "grad_norm": 0.50390625, "learning_rate": 1.4957832670260356e-05, "loss": 4.1724, "step": 1885 }, { "epoch": 0.628300158241026, "grad_norm": 0.5234375, "learning_rate": 1.4957746914166974e-05, "loss": 4.0984, "step": 1886 }, { "epoch": 0.6286332972432748, "grad_norm": 0.5078125, "learning_rate": 1.4957661071206998e-05, "loss": 4.0926, "step": 1887 }, { "epoch": 0.6289664362455234, "grad_norm": 0.4921875, "learning_rate": 1.495757514138142e-05, "loss": 4.1492, "step": 1888 }, { "epoch": 0.6292995752477721, "grad_norm": 0.48046875, "learning_rate": 1.4957489124691246e-05, "loss": 4.1687, "step": 1889 }, { "epoch": 0.6296327142500209, "grad_norm": 0.498046875, "learning_rate": 1.4957403021137479e-05, "loss": 4.153, "step": 1890 }, { "epoch": 0.6299658532522695, "grad_norm": 0.482421875, "learning_rate": 1.4957316830721114e-05, "loss": 4.1857, "step": 1891 }, { "epoch": 0.6302989922545182, "grad_norm": 0.4765625, "learning_rate": 1.4957230553443166e-05, "loss": 4.2204, "step": 1892 }, { "epoch": 0.6306321312567669, "grad_norm": 0.484375, "learning_rate": 1.4957144189304632e-05, "loss": 4.144, "step": 1893 }, { "epoch": 0.6309652702590156, "grad_norm": 0.498046875, "learning_rate": 1.4957057738306518e-05, "loss": 4.1156, "step": 1894 }, { "epoch": 0.6312984092612642, "grad_norm": 0.5078125, "learning_rate": 1.4956971200449837e-05, "loss": 4.0958, "step": 1895 }, { "epoch": 0.631631548263513, "grad_norm": 0.451171875, "learning_rate": 1.4956884575735593e-05, "loss": 4.1861, "step": 1896 }, { "epoch": 0.6319646872657616, "grad_norm": 0.498046875, "learning_rate": 1.4956797864164792e-05, "loss": 4.1618, "step": 1897 }, { "epoch": 0.6322978262680103, "grad_norm": 0.482421875, "learning_rate": 1.4956711065738451e-05, "loss": 4.1173, "step": 1898 }, { "epoch": 0.632630965270259, "grad_norm": 0.53125, "learning_rate": 1.4956624180457575e-05, "loss": 4.199, "step": 1899 }, { "epoch": 0.6329641042725077, "grad_norm": 0.490234375, "learning_rate": 1.4956537208323179e-05, "loss": 4.1831, "step": 1900 }, { "epoch": 0.6332972432747563, "grad_norm": 0.486328125, "learning_rate": 1.4956450149336276e-05, "loss": 4.1487, "step": 1901 }, { "epoch": 0.6336303822770051, "grad_norm": 0.498046875, "learning_rate": 1.4956363003497879e-05, "loss": 4.1188, "step": 1902 }, { "epoch": 0.6339635212792538, "grad_norm": 0.515625, "learning_rate": 1.4956275770809004e-05, "loss": 4.187, "step": 1903 }, { "epoch": 0.6342966602815024, "grad_norm": 0.4921875, "learning_rate": 1.4956188451270663e-05, "loss": 4.2235, "step": 1904 }, { "epoch": 0.6346297992837512, "grad_norm": 0.48046875, "learning_rate": 1.4956101044883881e-05, "loss": 4.1584, "step": 1905 }, { "epoch": 0.6349629382859998, "grad_norm": 0.458984375, "learning_rate": 1.4956013551649671e-05, "loss": 4.1671, "step": 1906 }, { "epoch": 0.6352960772882486, "grad_norm": 0.470703125, "learning_rate": 1.495592597156905e-05, "loss": 4.1728, "step": 1907 }, { "epoch": 0.6356292162904972, "grad_norm": 0.484375, "learning_rate": 1.4955838304643045e-05, "loss": 4.0892, "step": 1908 }, { "epoch": 0.6359623552927459, "grad_norm": 0.50390625, "learning_rate": 1.4955750550872671e-05, "loss": 4.1368, "step": 1909 }, { "epoch": 0.6362954942949945, "grad_norm": 0.46875, "learning_rate": 1.4955662710258952e-05, "loss": 4.1826, "step": 1910 }, { "epoch": 0.6366286332972433, "grad_norm": 0.470703125, "learning_rate": 1.4955574782802912e-05, "loss": 4.2105, "step": 1911 }, { "epoch": 0.6369617722994919, "grad_norm": 0.474609375, "learning_rate": 1.4955486768505574e-05, "loss": 4.1823, "step": 1912 }, { "epoch": 0.6372949113017407, "grad_norm": 0.4609375, "learning_rate": 1.4955398667367962e-05, "loss": 4.1517, "step": 1913 }, { "epoch": 0.6376280503039894, "grad_norm": 0.5, "learning_rate": 1.4955310479391107e-05, "loss": 4.1575, "step": 1914 }, { "epoch": 0.637961189306238, "grad_norm": 0.470703125, "learning_rate": 1.4955222204576032e-05, "loss": 4.1279, "step": 1915 }, { "epoch": 0.6382943283084868, "grad_norm": 0.50390625, "learning_rate": 1.4955133842923766e-05, "loss": 4.035, "step": 1916 }, { "epoch": 0.6386274673107354, "grad_norm": 0.48828125, "learning_rate": 1.4955045394435339e-05, "loss": 4.1208, "step": 1917 }, { "epoch": 0.6389606063129841, "grad_norm": 0.474609375, "learning_rate": 1.495495685911178e-05, "loss": 4.106, "step": 1918 }, { "epoch": 0.6392937453152328, "grad_norm": 0.482421875, "learning_rate": 1.4954868236954122e-05, "loss": 4.1714, "step": 1919 }, { "epoch": 0.6396268843174815, "grad_norm": 0.484375, "learning_rate": 1.4954779527963395e-05, "loss": 4.118, "step": 1920 }, { "epoch": 0.6399600233197301, "grad_norm": 0.462890625, "learning_rate": 1.4954690732140633e-05, "loss": 4.0899, "step": 1921 }, { "epoch": 0.6402931623219789, "grad_norm": 0.462890625, "learning_rate": 1.4954601849486873e-05, "loss": 4.1617, "step": 1922 }, { "epoch": 0.6406263013242275, "grad_norm": 0.490234375, "learning_rate": 1.4954512880003147e-05, "loss": 4.037, "step": 1923 }, { "epoch": 0.6409594403264762, "grad_norm": 0.49609375, "learning_rate": 1.4954423823690493e-05, "loss": 4.124, "step": 1924 }, { "epoch": 0.641292579328725, "grad_norm": 0.4609375, "learning_rate": 1.4954334680549948e-05, "loss": 4.1173, "step": 1925 }, { "epoch": 0.6416257183309736, "grad_norm": 0.5078125, "learning_rate": 1.4954245450582547e-05, "loss": 4.1003, "step": 1926 }, { "epoch": 0.6419588573332223, "grad_norm": 0.4609375, "learning_rate": 1.4954156133789336e-05, "loss": 4.1236, "step": 1927 }, { "epoch": 0.642291996335471, "grad_norm": 0.49609375, "learning_rate": 1.495406673017135e-05, "loss": 4.1658, "step": 1928 }, { "epoch": 0.6426251353377197, "grad_norm": 0.494140625, "learning_rate": 1.4953977239729633e-05, "loss": 4.1136, "step": 1929 }, { "epoch": 0.6429582743399683, "grad_norm": 0.46484375, "learning_rate": 1.4953887662465226e-05, "loss": 4.1407, "step": 1930 }, { "epoch": 0.6432914133422171, "grad_norm": 0.482421875, "learning_rate": 1.4953797998379172e-05, "loss": 4.1399, "step": 1931 }, { "epoch": 0.6436245523444657, "grad_norm": 0.4765625, "learning_rate": 1.4953708247472518e-05, "loss": 4.2032, "step": 1932 }, { "epoch": 0.6439576913467144, "grad_norm": 0.4921875, "learning_rate": 1.4953618409746308e-05, "loss": 4.1219, "step": 1933 }, { "epoch": 0.6442908303489631, "grad_norm": 0.484375, "learning_rate": 1.4953528485201585e-05, "loss": 4.1853, "step": 1934 }, { "epoch": 0.6446239693512118, "grad_norm": 0.490234375, "learning_rate": 1.49534384738394e-05, "loss": 4.1438, "step": 1935 }, { "epoch": 0.6449571083534604, "grad_norm": 0.470703125, "learning_rate": 1.4953348375660802e-05, "loss": 4.1019, "step": 1936 }, { "epoch": 0.6452902473557092, "grad_norm": 0.48828125, "learning_rate": 1.4953258190666838e-05, "loss": 4.1843, "step": 1937 }, { "epoch": 0.6456233863579579, "grad_norm": 0.4609375, "learning_rate": 1.4953167918858564e-05, "loss": 4.1677, "step": 1938 }, { "epoch": 0.6459565253602065, "grad_norm": 0.478515625, "learning_rate": 1.4953077560237022e-05, "loss": 4.1481, "step": 1939 }, { "epoch": 0.6462896643624553, "grad_norm": 0.484375, "learning_rate": 1.4952987114803273e-05, "loss": 4.1062, "step": 1940 }, { "epoch": 0.6466228033647039, "grad_norm": 0.4921875, "learning_rate": 1.4952896582558366e-05, "loss": 4.1314, "step": 1941 }, { "epoch": 0.6469559423669526, "grad_norm": 0.484375, "learning_rate": 1.4952805963503358e-05, "loss": 4.1297, "step": 1942 }, { "epoch": 0.6472890813692013, "grad_norm": 0.482421875, "learning_rate": 1.4952715257639302e-05, "loss": 4.1777, "step": 1943 }, { "epoch": 0.64762222037145, "grad_norm": 0.48828125, "learning_rate": 1.4952624464967254e-05, "loss": 4.121, "step": 1944 }, { "epoch": 0.6479553593736986, "grad_norm": 0.466796875, "learning_rate": 1.4952533585488277e-05, "loss": 4.1781, "step": 1945 }, { "epoch": 0.6482884983759474, "grad_norm": 0.47265625, "learning_rate": 1.4952442619203425e-05, "loss": 4.1165, "step": 1946 }, { "epoch": 0.648621637378196, "grad_norm": 0.494140625, "learning_rate": 1.4952351566113759e-05, "loss": 4.1064, "step": 1947 }, { "epoch": 0.6489547763804447, "grad_norm": 0.5234375, "learning_rate": 1.4952260426220338e-05, "loss": 4.0973, "step": 1948 }, { "epoch": 0.6492879153826935, "grad_norm": 0.4765625, "learning_rate": 1.4952169199524224e-05, "loss": 4.1544, "step": 1949 }, { "epoch": 0.6496210543849421, "grad_norm": 0.478515625, "learning_rate": 1.495207788602648e-05, "loss": 4.176, "step": 1950 }, { "epoch": 0.6499541933871908, "grad_norm": 0.498046875, "learning_rate": 1.495198648572817e-05, "loss": 4.1389, "step": 1951 }, { "epoch": 0.6502873323894395, "grad_norm": 0.48828125, "learning_rate": 1.495189499863036e-05, "loss": 4.1352, "step": 1952 }, { "epoch": 0.6506204713916882, "grad_norm": 0.482421875, "learning_rate": 1.4951803424734112e-05, "loss": 4.2093, "step": 1953 }, { "epoch": 0.6509536103939368, "grad_norm": 0.46875, "learning_rate": 1.4951711764040496e-05, "loss": 4.1182, "step": 1954 }, { "epoch": 0.6512867493961856, "grad_norm": 0.46875, "learning_rate": 1.4951620016550579e-05, "loss": 4.0785, "step": 1955 }, { "epoch": 0.6516198883984342, "grad_norm": 0.494140625, "learning_rate": 1.4951528182265428e-05, "loss": 4.1556, "step": 1956 }, { "epoch": 0.651953027400683, "grad_norm": 0.5234375, "learning_rate": 1.4951436261186112e-05, "loss": 4.1586, "step": 1957 }, { "epoch": 0.6522861664029316, "grad_norm": 0.48828125, "learning_rate": 1.4951344253313704e-05, "loss": 4.167, "step": 1958 }, { "epoch": 0.6526193054051803, "grad_norm": 0.46484375, "learning_rate": 1.4951252158649277e-05, "loss": 4.1517, "step": 1959 }, { "epoch": 0.652952444407429, "grad_norm": 0.48046875, "learning_rate": 1.4951159977193901e-05, "loss": 4.1213, "step": 1960 }, { "epoch": 0.6532855834096777, "grad_norm": 0.47265625, "learning_rate": 1.495106770894865e-05, "loss": 4.1472, "step": 1961 }, { "epoch": 0.6536187224119264, "grad_norm": 0.486328125, "learning_rate": 1.49509753539146e-05, "loss": 4.1795, "step": 1962 }, { "epoch": 0.653951861414175, "grad_norm": 0.48828125, "learning_rate": 1.4950882912092826e-05, "loss": 4.1266, "step": 1963 }, { "epoch": 0.6542850004164238, "grad_norm": 0.45703125, "learning_rate": 1.4950790383484404e-05, "loss": 4.1278, "step": 1964 }, { "epoch": 0.6546181394186724, "grad_norm": 0.5, "learning_rate": 1.495069776809041e-05, "loss": 4.1645, "step": 1965 }, { "epoch": 0.6549512784209212, "grad_norm": 0.50390625, "learning_rate": 1.495060506591193e-05, "loss": 4.1176, "step": 1966 }, { "epoch": 0.6552844174231698, "grad_norm": 0.474609375, "learning_rate": 1.4950512276950036e-05, "loss": 4.1934, "step": 1967 }, { "epoch": 0.6556175564254185, "grad_norm": 0.47265625, "learning_rate": 1.4950419401205812e-05, "loss": 4.1908, "step": 1968 }, { "epoch": 0.6559506954276672, "grad_norm": 0.48828125, "learning_rate": 1.495032643868034e-05, "loss": 4.1435, "step": 1969 }, { "epoch": 0.6562838344299159, "grad_norm": 0.5, "learning_rate": 1.4950233389374701e-05, "loss": 4.2383, "step": 1970 }, { "epoch": 0.6566169734321645, "grad_norm": 0.498046875, "learning_rate": 1.4950140253289982e-05, "loss": 4.1636, "step": 1971 }, { "epoch": 0.6569501124344133, "grad_norm": 0.482421875, "learning_rate": 1.4950047030427266e-05, "loss": 4.1149, "step": 1972 }, { "epoch": 0.657283251436662, "grad_norm": 0.494140625, "learning_rate": 1.4949953720787637e-05, "loss": 4.1212, "step": 1973 }, { "epoch": 0.6576163904389106, "grad_norm": 0.50390625, "learning_rate": 1.4949860324372185e-05, "loss": 4.1572, "step": 1974 }, { "epoch": 0.6579495294411594, "grad_norm": 0.462890625, "learning_rate": 1.4949766841181997e-05, "loss": 4.129, "step": 1975 }, { "epoch": 0.658282668443408, "grad_norm": 0.48828125, "learning_rate": 1.4949673271218162e-05, "loss": 4.1363, "step": 1976 }, { "epoch": 0.6586158074456567, "grad_norm": 0.4921875, "learning_rate": 1.4949579614481769e-05, "loss": 4.2062, "step": 1977 }, { "epoch": 0.6589489464479054, "grad_norm": 0.48828125, "learning_rate": 1.4949485870973907e-05, "loss": 4.1361, "step": 1978 }, { "epoch": 0.6592820854501541, "grad_norm": 0.515625, "learning_rate": 1.4949392040695674e-05, "loss": 4.114, "step": 1979 }, { "epoch": 0.6596152244524027, "grad_norm": 0.486328125, "learning_rate": 1.4949298123648156e-05, "loss": 4.1217, "step": 1980 }, { "epoch": 0.6599483634546515, "grad_norm": 0.51953125, "learning_rate": 1.4949204119832451e-05, "loss": 4.2041, "step": 1981 }, { "epoch": 0.6602815024569001, "grad_norm": 0.48046875, "learning_rate": 1.4949110029249653e-05, "loss": 4.1453, "step": 1982 }, { "epoch": 0.6606146414591488, "grad_norm": 0.474609375, "learning_rate": 1.4949015851900857e-05, "loss": 4.1571, "step": 1983 }, { "epoch": 0.6609477804613976, "grad_norm": 0.48828125, "learning_rate": 1.4948921587787163e-05, "loss": 4.1204, "step": 1984 }, { "epoch": 0.6612809194636462, "grad_norm": 0.47265625, "learning_rate": 1.4948827236909666e-05, "loss": 4.1208, "step": 1985 }, { "epoch": 0.6616140584658949, "grad_norm": 0.5234375, "learning_rate": 1.4948732799269465e-05, "loss": 4.1424, "step": 1986 }, { "epoch": 0.6619471974681436, "grad_norm": 0.50390625, "learning_rate": 1.494863827486766e-05, "loss": 4.1365, "step": 1987 }, { "epoch": 0.6622803364703923, "grad_norm": 0.484375, "learning_rate": 1.4948543663705355e-05, "loss": 4.1703, "step": 1988 }, { "epoch": 0.6626134754726409, "grad_norm": 0.47265625, "learning_rate": 1.4948448965783648e-05, "loss": 4.1339, "step": 1989 }, { "epoch": 0.6629466144748897, "grad_norm": 0.5078125, "learning_rate": 1.4948354181103645e-05, "loss": 4.0968, "step": 1990 }, { "epoch": 0.6632797534771383, "grad_norm": 0.486328125, "learning_rate": 1.4948259309666448e-05, "loss": 4.1087, "step": 1991 }, { "epoch": 0.663612892479387, "grad_norm": 0.486328125, "learning_rate": 1.4948164351473164e-05, "loss": 4.1549, "step": 1992 }, { "epoch": 0.6639460314816357, "grad_norm": 0.49609375, "learning_rate": 1.4948069306524896e-05, "loss": 4.1352, "step": 1993 }, { "epoch": 0.6642791704838844, "grad_norm": 0.484375, "learning_rate": 1.4947974174822756e-05, "loss": 4.1585, "step": 1994 }, { "epoch": 0.664612309486133, "grad_norm": 0.48046875, "learning_rate": 1.4947878956367848e-05, "loss": 4.1703, "step": 1995 }, { "epoch": 0.6649454484883818, "grad_norm": 0.478515625, "learning_rate": 1.4947783651161282e-05, "loss": 4.1002, "step": 1996 }, { "epoch": 0.6652785874906305, "grad_norm": 0.474609375, "learning_rate": 1.4947688259204169e-05, "loss": 4.1649, "step": 1997 }, { "epoch": 0.6656117264928791, "grad_norm": 0.44921875, "learning_rate": 1.4947592780497617e-05, "loss": 4.1574, "step": 1998 }, { "epoch": 0.6659448654951279, "grad_norm": 0.484375, "learning_rate": 1.4947497215042743e-05, "loss": 4.1089, "step": 1999 }, { "epoch": 0.6662780044973765, "grad_norm": 0.45703125, "learning_rate": 1.4947401562840655e-05, "loss": 4.1726, "step": 2000 }, { "epoch": 0.6666111434996252, "grad_norm": 0.470703125, "learning_rate": 1.4947305823892473e-05, "loss": 4.1527, "step": 2001 }, { "epoch": 0.6669442825018739, "grad_norm": 0.46875, "learning_rate": 1.494720999819931e-05, "loss": 4.1199, "step": 2002 }, { "epoch": 0.6672774215041226, "grad_norm": 0.494140625, "learning_rate": 1.4947114085762277e-05, "loss": 4.1291, "step": 2003 }, { "epoch": 0.6676105605063712, "grad_norm": 0.48046875, "learning_rate": 1.4947018086582499e-05, "loss": 4.1388, "step": 2004 }, { "epoch": 0.66794369950862, "grad_norm": 0.4765625, "learning_rate": 1.4946922000661088e-05, "loss": 4.1853, "step": 2005 }, { "epoch": 0.6682768385108686, "grad_norm": 0.46484375, "learning_rate": 1.4946825827999166e-05, "loss": 4.1825, "step": 2006 }, { "epoch": 0.6686099775131173, "grad_norm": 0.47265625, "learning_rate": 1.4946729568597854e-05, "loss": 4.2175, "step": 2007 }, { "epoch": 0.6689431165153661, "grad_norm": 0.5, "learning_rate": 1.4946633222458271e-05, "loss": 4.133, "step": 2008 }, { "epoch": 0.6692762555176147, "grad_norm": 0.490234375, "learning_rate": 1.494653678958154e-05, "loss": 4.1696, "step": 2009 }, { "epoch": 0.6696093945198635, "grad_norm": 0.45703125, "learning_rate": 1.4946440269968785e-05, "loss": 4.263, "step": 2010 }, { "epoch": 0.6699425335221121, "grad_norm": 0.482421875, "learning_rate": 1.4946343663621133e-05, "loss": 4.1636, "step": 2011 }, { "epoch": 0.6702756725243608, "grad_norm": 0.486328125, "learning_rate": 1.4946246970539702e-05, "loss": 4.1589, "step": 2012 }, { "epoch": 0.6706088115266094, "grad_norm": 0.478515625, "learning_rate": 1.4946150190725622e-05, "loss": 4.1553, "step": 2013 }, { "epoch": 0.6709419505288582, "grad_norm": 0.48046875, "learning_rate": 1.4946053324180025e-05, "loss": 4.2472, "step": 2014 }, { "epoch": 0.6712750895311068, "grad_norm": 0.4765625, "learning_rate": 1.494595637090403e-05, "loss": 4.1539, "step": 2015 }, { "epoch": 0.6716082285333556, "grad_norm": 0.47265625, "learning_rate": 1.4945859330898776e-05, "loss": 4.1881, "step": 2016 }, { "epoch": 0.6719413675356042, "grad_norm": 0.466796875, "learning_rate": 1.4945762204165384e-05, "loss": 4.1904, "step": 2017 }, { "epoch": 0.6722745065378529, "grad_norm": 0.47265625, "learning_rate": 1.4945664990704992e-05, "loss": 4.1791, "step": 2018 }, { "epoch": 0.6726076455401017, "grad_norm": 0.484375, "learning_rate": 1.494556769051873e-05, "loss": 4.1618, "step": 2019 }, { "epoch": 0.6729407845423503, "grad_norm": 0.482421875, "learning_rate": 1.494547030360773e-05, "loss": 4.1299, "step": 2020 }, { "epoch": 0.673273923544599, "grad_norm": 0.486328125, "learning_rate": 1.494537282997313e-05, "loss": 4.1492, "step": 2021 }, { "epoch": 0.6736070625468477, "grad_norm": 0.478515625, "learning_rate": 1.4945275269616063e-05, "loss": 4.1846, "step": 2022 }, { "epoch": 0.6739402015490964, "grad_norm": 0.4765625, "learning_rate": 1.4945177622537665e-05, "loss": 4.1937, "step": 2023 }, { "epoch": 0.674273340551345, "grad_norm": 0.466796875, "learning_rate": 1.4945079888739074e-05, "loss": 4.1408, "step": 2024 }, { "epoch": 0.6746064795535938, "grad_norm": 0.49609375, "learning_rate": 1.4944982068221427e-05, "loss": 4.1058, "step": 2025 }, { "epoch": 0.6749396185558424, "grad_norm": 0.48828125, "learning_rate": 1.4944884160985865e-05, "loss": 4.1312, "step": 2026 }, { "epoch": 0.6752727575580911, "grad_norm": 0.478515625, "learning_rate": 1.494478616703353e-05, "loss": 4.1731, "step": 2027 }, { "epoch": 0.6756058965603398, "grad_norm": 0.5078125, "learning_rate": 1.4944688086365561e-05, "loss": 4.069, "step": 2028 }, { "epoch": 0.6759390355625885, "grad_norm": 0.466796875, "learning_rate": 1.49445899189831e-05, "loss": 4.186, "step": 2029 }, { "epoch": 0.6762721745648371, "grad_norm": 0.46875, "learning_rate": 1.4944491664887293e-05, "loss": 4.1667, "step": 2030 }, { "epoch": 0.6766053135670859, "grad_norm": 0.5078125, "learning_rate": 1.494439332407928e-05, "loss": 4.1646, "step": 2031 }, { "epoch": 0.6769384525693346, "grad_norm": 0.50390625, "learning_rate": 1.4944294896560211e-05, "loss": 4.1297, "step": 2032 }, { "epoch": 0.6772715915715832, "grad_norm": 0.478515625, "learning_rate": 1.4944196382331232e-05, "loss": 4.1575, "step": 2033 }, { "epoch": 0.677604730573832, "grad_norm": 0.462890625, "learning_rate": 1.4944097781393488e-05, "loss": 4.1983, "step": 2034 }, { "epoch": 0.6779378695760806, "grad_norm": 0.484375, "learning_rate": 1.4943999093748128e-05, "loss": 4.1118, "step": 2035 }, { "epoch": 0.6782710085783293, "grad_norm": 0.515625, "learning_rate": 1.4943900319396301e-05, "loss": 4.0634, "step": 2036 }, { "epoch": 0.678604147580578, "grad_norm": 0.498046875, "learning_rate": 1.4943801458339164e-05, "loss": 4.151, "step": 2037 }, { "epoch": 0.6789372865828267, "grad_norm": 0.484375, "learning_rate": 1.4943702510577858e-05, "loss": 4.1872, "step": 2038 }, { "epoch": 0.6792704255850753, "grad_norm": 0.490234375, "learning_rate": 1.4943603476113544e-05, "loss": 4.059, "step": 2039 }, { "epoch": 0.6796035645873241, "grad_norm": 0.4921875, "learning_rate": 1.494350435494737e-05, "loss": 4.1361, "step": 2040 }, { "epoch": 0.6799367035895727, "grad_norm": 0.490234375, "learning_rate": 1.4943405147080494e-05, "loss": 4.2294, "step": 2041 }, { "epoch": 0.6802698425918214, "grad_norm": 0.51171875, "learning_rate": 1.4943305852514072e-05, "loss": 4.1503, "step": 2042 }, { "epoch": 0.6806029815940702, "grad_norm": 0.484375, "learning_rate": 1.4943206471249256e-05, "loss": 4.1817, "step": 2043 }, { "epoch": 0.6809361205963188, "grad_norm": 0.47265625, "learning_rate": 1.4943107003287208e-05, "loss": 4.1539, "step": 2044 }, { "epoch": 0.6812692595985675, "grad_norm": 0.52734375, "learning_rate": 1.4943007448629084e-05, "loss": 4.1214, "step": 2045 }, { "epoch": 0.6816023986008162, "grad_norm": 0.48828125, "learning_rate": 1.4942907807276048e-05, "loss": 4.1839, "step": 2046 }, { "epoch": 0.6819355376030649, "grad_norm": 0.498046875, "learning_rate": 1.4942808079229255e-05, "loss": 4.0918, "step": 2047 }, { "epoch": 0.6822686766053135, "grad_norm": 0.494140625, "learning_rate": 1.4942708264489868e-05, "loss": 4.1275, "step": 2048 }, { "epoch": 0.6826018156075623, "grad_norm": 0.51953125, "learning_rate": 1.4942608363059053e-05, "loss": 4.168, "step": 2049 }, { "epoch": 0.6829349546098109, "grad_norm": 0.470703125, "learning_rate": 1.494250837493797e-05, "loss": 4.1648, "step": 2050 }, { "epoch": 0.6832680936120596, "grad_norm": 0.48046875, "learning_rate": 1.4942408300127785e-05, "loss": 4.1722, "step": 2051 }, { "epoch": 0.6836012326143083, "grad_norm": 0.494140625, "learning_rate": 1.4942308138629662e-05, "loss": 4.2071, "step": 2052 }, { "epoch": 0.683934371616557, "grad_norm": 0.494140625, "learning_rate": 1.4942207890444773e-05, "loss": 4.1534, "step": 2053 }, { "epoch": 0.6842675106188058, "grad_norm": 0.5, "learning_rate": 1.4942107555574277e-05, "loss": 4.1171, "step": 2054 }, { "epoch": 0.6846006496210544, "grad_norm": 0.48046875, "learning_rate": 1.494200713401935e-05, "loss": 4.1329, "step": 2055 }, { "epoch": 0.6849337886233031, "grad_norm": 0.50390625, "learning_rate": 1.494190662578116e-05, "loss": 4.1521, "step": 2056 }, { "epoch": 0.6852669276255517, "grad_norm": 0.462890625, "learning_rate": 1.4941806030860876e-05, "loss": 4.1704, "step": 2057 }, { "epoch": 0.6856000666278005, "grad_norm": 0.474609375, "learning_rate": 1.494170534925967e-05, "loss": 4.1647, "step": 2058 }, { "epoch": 0.6859332056300491, "grad_norm": 0.48828125, "learning_rate": 1.4941604580978716e-05, "loss": 4.1457, "step": 2059 }, { "epoch": 0.6862663446322979, "grad_norm": 0.486328125, "learning_rate": 1.494150372601919e-05, "loss": 4.2115, "step": 2060 }, { "epoch": 0.6865994836345465, "grad_norm": 0.486328125, "learning_rate": 1.4941402784382261e-05, "loss": 4.1613, "step": 2061 }, { "epoch": 0.6869326226367952, "grad_norm": 0.45703125, "learning_rate": 1.4941301756069106e-05, "loss": 4.2211, "step": 2062 }, { "epoch": 0.6872657616390438, "grad_norm": 0.48828125, "learning_rate": 1.4941200641080906e-05, "loss": 4.1221, "step": 2063 }, { "epoch": 0.6875989006412926, "grad_norm": 0.48828125, "learning_rate": 1.4941099439418837e-05, "loss": 4.0906, "step": 2064 }, { "epoch": 0.6879320396435412, "grad_norm": 0.5, "learning_rate": 1.4940998151084073e-05, "loss": 4.1078, "step": 2065 }, { "epoch": 0.68826517864579, "grad_norm": 0.5078125, "learning_rate": 1.49408967760778e-05, "loss": 4.0987, "step": 2066 }, { "epoch": 0.6885983176480387, "grad_norm": 0.50390625, "learning_rate": 1.4940795314401197e-05, "loss": 4.1038, "step": 2067 }, { "epoch": 0.6889314566502873, "grad_norm": 0.5078125, "learning_rate": 1.4940693766055444e-05, "loss": 4.13, "step": 2068 }, { "epoch": 0.6892645956525361, "grad_norm": 0.46875, "learning_rate": 1.4940592131041728e-05, "loss": 4.2013, "step": 2069 }, { "epoch": 0.6895977346547847, "grad_norm": 0.4921875, "learning_rate": 1.4940490409361226e-05, "loss": 4.1734, "step": 2070 }, { "epoch": 0.6899308736570334, "grad_norm": 0.484375, "learning_rate": 1.494038860101513e-05, "loss": 4.1483, "step": 2071 }, { "epoch": 0.6902640126592821, "grad_norm": 0.478515625, "learning_rate": 1.4940286706004621e-05, "loss": 4.1802, "step": 2072 }, { "epoch": 0.6905971516615308, "grad_norm": 0.47265625, "learning_rate": 1.4940184724330888e-05, "loss": 4.1355, "step": 2073 }, { "epoch": 0.6909302906637794, "grad_norm": 0.5234375, "learning_rate": 1.4940082655995119e-05, "loss": 4.1159, "step": 2074 }, { "epoch": 0.6912634296660282, "grad_norm": 0.48828125, "learning_rate": 1.4939980500998502e-05, "loss": 4.1948, "step": 2075 }, { "epoch": 0.6915965686682768, "grad_norm": 0.466796875, "learning_rate": 1.4939878259342226e-05, "loss": 4.2032, "step": 2076 }, { "epoch": 0.6919297076705255, "grad_norm": 0.494140625, "learning_rate": 1.4939775931027484e-05, "loss": 4.1515, "step": 2077 }, { "epoch": 0.6922628466727743, "grad_norm": 0.515625, "learning_rate": 1.4939673516055468e-05, "loss": 4.1777, "step": 2078 }, { "epoch": 0.6925959856750229, "grad_norm": 0.484375, "learning_rate": 1.4939571014427367e-05, "loss": 4.0783, "step": 2079 }, { "epoch": 0.6929291246772716, "grad_norm": 0.48828125, "learning_rate": 1.493946842614438e-05, "loss": 4.1179, "step": 2080 }, { "epoch": 0.6932622636795203, "grad_norm": 0.498046875, "learning_rate": 1.4939365751207697e-05, "loss": 4.1561, "step": 2081 }, { "epoch": 0.693595402681769, "grad_norm": 0.490234375, "learning_rate": 1.493926298961852e-05, "loss": 4.181, "step": 2082 }, { "epoch": 0.6939285416840176, "grad_norm": 0.49609375, "learning_rate": 1.4939160141378042e-05, "loss": 4.1409, "step": 2083 }, { "epoch": 0.6942616806862664, "grad_norm": 0.51171875, "learning_rate": 1.493905720648746e-05, "loss": 4.0859, "step": 2084 }, { "epoch": 0.694594819688515, "grad_norm": 0.482421875, "learning_rate": 1.4938954184947975e-05, "loss": 4.1686, "step": 2085 }, { "epoch": 0.6949279586907637, "grad_norm": 0.48046875, "learning_rate": 1.4938851076760787e-05, "loss": 4.143, "step": 2086 }, { "epoch": 0.6952610976930124, "grad_norm": 0.498046875, "learning_rate": 1.4938747881927096e-05, "loss": 4.0653, "step": 2087 }, { "epoch": 0.6955942366952611, "grad_norm": 0.50390625, "learning_rate": 1.4938644600448105e-05, "loss": 4.0826, "step": 2088 }, { "epoch": 0.6959273756975097, "grad_norm": 0.482421875, "learning_rate": 1.4938541232325017e-05, "loss": 4.1071, "step": 2089 }, { "epoch": 0.6962605146997585, "grad_norm": 0.47265625, "learning_rate": 1.4938437777559034e-05, "loss": 4.1566, "step": 2090 }, { "epoch": 0.6965936537020072, "grad_norm": 0.474609375, "learning_rate": 1.4938334236151363e-05, "loss": 4.0711, "step": 2091 }, { "epoch": 0.6969267927042558, "grad_norm": 0.482421875, "learning_rate": 1.493823060810321e-05, "loss": 4.0579, "step": 2092 }, { "epoch": 0.6972599317065046, "grad_norm": 0.4765625, "learning_rate": 1.4938126893415782e-05, "loss": 4.1085, "step": 2093 }, { "epoch": 0.6975930707087532, "grad_norm": 0.478515625, "learning_rate": 1.4938023092090286e-05, "loss": 4.1827, "step": 2094 }, { "epoch": 0.6979262097110019, "grad_norm": 0.48046875, "learning_rate": 1.493791920412793e-05, "loss": 4.1243, "step": 2095 }, { "epoch": 0.6982593487132506, "grad_norm": 0.515625, "learning_rate": 1.493781522952993e-05, "loss": 4.1258, "step": 2096 }, { "epoch": 0.6985924877154993, "grad_norm": 0.490234375, "learning_rate": 1.4937711168297489e-05, "loss": 4.1568, "step": 2097 }, { "epoch": 0.6989256267177479, "grad_norm": 0.49609375, "learning_rate": 1.4937607020431824e-05, "loss": 4.1027, "step": 2098 }, { "epoch": 0.6992587657199967, "grad_norm": 0.494140625, "learning_rate": 1.4937502785934148e-05, "loss": 4.1469, "step": 2099 }, { "epoch": 0.6995919047222453, "grad_norm": 0.48828125, "learning_rate": 1.4937398464805674e-05, "loss": 4.1797, "step": 2100 }, { "epoch": 0.699925043724494, "grad_norm": 0.494140625, "learning_rate": 1.4937294057047615e-05, "loss": 4.0861, "step": 2101 }, { "epoch": 0.7002581827267428, "grad_norm": 0.51171875, "learning_rate": 1.4937189562661191e-05, "loss": 4.1434, "step": 2102 }, { "epoch": 0.7005913217289914, "grad_norm": 0.48828125, "learning_rate": 1.493708498164762e-05, "loss": 4.184, "step": 2103 }, { "epoch": 0.7009244607312402, "grad_norm": 0.482421875, "learning_rate": 1.4936980314008114e-05, "loss": 4.096, "step": 2104 }, { "epoch": 0.7012575997334888, "grad_norm": 0.494140625, "learning_rate": 1.4936875559743897e-05, "loss": 4.17, "step": 2105 }, { "epoch": 0.7015907387357375, "grad_norm": 0.484375, "learning_rate": 1.493677071885619e-05, "loss": 4.154, "step": 2106 }, { "epoch": 0.7019238777379861, "grad_norm": 0.498046875, "learning_rate": 1.493666579134621e-05, "loss": 4.1379, "step": 2107 }, { "epoch": 0.7022570167402349, "grad_norm": 0.498046875, "learning_rate": 1.4936560777215182e-05, "loss": 4.1189, "step": 2108 }, { "epoch": 0.7025901557424835, "grad_norm": 0.474609375, "learning_rate": 1.4936455676464325e-05, "loss": 4.1788, "step": 2109 }, { "epoch": 0.7029232947447323, "grad_norm": 0.490234375, "learning_rate": 1.493635048909487e-05, "loss": 4.0864, "step": 2110 }, { "epoch": 0.7032564337469809, "grad_norm": 0.51953125, "learning_rate": 1.493624521510804e-05, "loss": 4.1013, "step": 2111 }, { "epoch": 0.7035895727492296, "grad_norm": 0.50390625, "learning_rate": 1.4936139854505058e-05, "loss": 4.0968, "step": 2112 }, { "epoch": 0.7039227117514784, "grad_norm": 0.474609375, "learning_rate": 1.4936034407287155e-05, "loss": 4.1557, "step": 2113 }, { "epoch": 0.704255850753727, "grad_norm": 0.486328125, "learning_rate": 1.4935928873455556e-05, "loss": 4.1132, "step": 2114 }, { "epoch": 0.7045889897559757, "grad_norm": 0.486328125, "learning_rate": 1.4935823253011492e-05, "loss": 4.1866, "step": 2115 }, { "epoch": 0.7049221287582244, "grad_norm": 0.462890625, "learning_rate": 1.4935717545956196e-05, "loss": 4.1936, "step": 2116 }, { "epoch": 0.7052552677604731, "grad_norm": 0.482421875, "learning_rate": 1.4935611752290892e-05, "loss": 4.1417, "step": 2117 }, { "epoch": 0.7055884067627217, "grad_norm": 0.486328125, "learning_rate": 1.4935505872016821e-05, "loss": 4.1256, "step": 2118 }, { "epoch": 0.7059215457649705, "grad_norm": 0.466796875, "learning_rate": 1.4935399905135208e-05, "loss": 4.1697, "step": 2119 }, { "epoch": 0.7062546847672191, "grad_norm": 0.4765625, "learning_rate": 1.4935293851647293e-05, "loss": 4.1368, "step": 2120 }, { "epoch": 0.7065878237694678, "grad_norm": 0.4921875, "learning_rate": 1.4935187711554308e-05, "loss": 4.1218, "step": 2121 }, { "epoch": 0.7069209627717165, "grad_norm": 0.494140625, "learning_rate": 1.4935081484857492e-05, "loss": 4.1269, "step": 2122 }, { "epoch": 0.7072541017739652, "grad_norm": 0.474609375, "learning_rate": 1.4934975171558082e-05, "loss": 4.1931, "step": 2123 }, { "epoch": 0.7075872407762138, "grad_norm": 0.50390625, "learning_rate": 1.4934868771657314e-05, "loss": 4.0467, "step": 2124 }, { "epoch": 0.7079203797784626, "grad_norm": 0.494140625, "learning_rate": 1.4934762285156428e-05, "loss": 4.1221, "step": 2125 }, { "epoch": 0.7082535187807113, "grad_norm": 0.5078125, "learning_rate": 1.4934655712056666e-05, "loss": 4.1542, "step": 2126 }, { "epoch": 0.7085866577829599, "grad_norm": 0.49609375, "learning_rate": 1.4934549052359269e-05, "loss": 4.0646, "step": 2127 }, { "epoch": 0.7089197967852087, "grad_norm": 0.5234375, "learning_rate": 1.4934442306065478e-05, "loss": 4.2687, "step": 2128 }, { "epoch": 0.7092529357874573, "grad_norm": 0.50390625, "learning_rate": 1.4934335473176537e-05, "loss": 4.0917, "step": 2129 }, { "epoch": 0.709586074789706, "grad_norm": 0.46875, "learning_rate": 1.4934228553693691e-05, "loss": 4.1102, "step": 2130 }, { "epoch": 0.7099192137919547, "grad_norm": 0.52734375, "learning_rate": 1.4934121547618185e-05, "loss": 4.0988, "step": 2131 }, { "epoch": 0.7102523527942034, "grad_norm": 0.478515625, "learning_rate": 1.4934014454951264e-05, "loss": 4.1295, "step": 2132 }, { "epoch": 0.710585491796452, "grad_norm": 0.5, "learning_rate": 1.4933907275694178e-05, "loss": 4.0974, "step": 2133 }, { "epoch": 0.7109186307987008, "grad_norm": 0.48828125, "learning_rate": 1.4933800009848173e-05, "loss": 4.1523, "step": 2134 }, { "epoch": 0.7112517698009494, "grad_norm": 0.494140625, "learning_rate": 1.4933692657414499e-05, "loss": 4.17, "step": 2135 }, { "epoch": 0.7115849088031981, "grad_norm": 0.50390625, "learning_rate": 1.4933585218394409e-05, "loss": 4.1755, "step": 2136 }, { "epoch": 0.7119180478054469, "grad_norm": 0.48828125, "learning_rate": 1.4933477692789153e-05, "loss": 4.1673, "step": 2137 }, { "epoch": 0.7122511868076955, "grad_norm": 0.48046875, "learning_rate": 1.4933370080599979e-05, "loss": 4.0108, "step": 2138 }, { "epoch": 0.7125843258099442, "grad_norm": 0.494140625, "learning_rate": 1.4933262381828147e-05, "loss": 4.0926, "step": 2139 }, { "epoch": 0.7129174648121929, "grad_norm": 0.478515625, "learning_rate": 1.4933154596474908e-05, "loss": 4.1204, "step": 2140 }, { "epoch": 0.7132506038144416, "grad_norm": 0.515625, "learning_rate": 1.4933046724541517e-05, "loss": 4.1256, "step": 2141 }, { "epoch": 0.7135837428166902, "grad_norm": 0.49609375, "learning_rate": 1.4932938766029235e-05, "loss": 4.1379, "step": 2142 }, { "epoch": 0.713916881818939, "grad_norm": 0.5078125, "learning_rate": 1.4932830720939312e-05, "loss": 4.1743, "step": 2143 }, { "epoch": 0.7142500208211876, "grad_norm": 0.484375, "learning_rate": 1.4932722589273012e-05, "loss": 4.1606, "step": 2144 }, { "epoch": 0.7145831598234363, "grad_norm": 0.486328125, "learning_rate": 1.4932614371031593e-05, "loss": 4.1307, "step": 2145 }, { "epoch": 0.714916298825685, "grad_norm": 0.490234375, "learning_rate": 1.4932506066216317e-05, "loss": 4.0363, "step": 2146 }, { "epoch": 0.7152494378279337, "grad_norm": 0.498046875, "learning_rate": 1.4932397674828442e-05, "loss": 4.1502, "step": 2147 }, { "epoch": 0.7155825768301824, "grad_norm": 0.498046875, "learning_rate": 1.4932289196869235e-05, "loss": 4.0824, "step": 2148 }, { "epoch": 0.7159157158324311, "grad_norm": 0.5078125, "learning_rate": 1.4932180632339958e-05, "loss": 4.0433, "step": 2149 }, { "epoch": 0.7162488548346798, "grad_norm": 0.48046875, "learning_rate": 1.493207198124187e-05, "loss": 4.0875, "step": 2150 }, { "epoch": 0.7165819938369284, "grad_norm": 0.46484375, "learning_rate": 1.4931963243576243e-05, "loss": 4.1631, "step": 2151 }, { "epoch": 0.7169151328391772, "grad_norm": 0.4609375, "learning_rate": 1.4931854419344344e-05, "loss": 4.1739, "step": 2152 }, { "epoch": 0.7172482718414258, "grad_norm": 0.49609375, "learning_rate": 1.4931745508547436e-05, "loss": 4.0741, "step": 2153 }, { "epoch": 0.7175814108436745, "grad_norm": 0.48828125, "learning_rate": 1.4931636511186792e-05, "loss": 4.1425, "step": 2154 }, { "epoch": 0.7179145498459232, "grad_norm": 0.482421875, "learning_rate": 1.4931527427263678e-05, "loss": 4.1103, "step": 2155 }, { "epoch": 0.7182476888481719, "grad_norm": 0.4765625, "learning_rate": 1.4931418256779368e-05, "loss": 4.1265, "step": 2156 }, { "epoch": 0.7185808278504205, "grad_norm": 0.47265625, "learning_rate": 1.4931308999735131e-05, "loss": 4.0952, "step": 2157 }, { "epoch": 0.7189139668526693, "grad_norm": 0.466796875, "learning_rate": 1.4931199656132239e-05, "loss": 4.0823, "step": 2158 }, { "epoch": 0.7192471058549179, "grad_norm": 0.486328125, "learning_rate": 1.4931090225971969e-05, "loss": 4.0969, "step": 2159 }, { "epoch": 0.7195802448571667, "grad_norm": 0.458984375, "learning_rate": 1.493098070925559e-05, "loss": 4.1384, "step": 2160 }, { "epoch": 0.7199133838594154, "grad_norm": 0.46875, "learning_rate": 1.4930871105984385e-05, "loss": 4.1653, "step": 2161 }, { "epoch": 0.720246522861664, "grad_norm": 0.484375, "learning_rate": 1.4930761416159624e-05, "loss": 4.1752, "step": 2162 }, { "epoch": 0.7205796618639128, "grad_norm": 0.484375, "learning_rate": 1.493065163978259e-05, "loss": 4.157, "step": 2163 }, { "epoch": 0.7209128008661614, "grad_norm": 0.484375, "learning_rate": 1.4930541776854558e-05, "loss": 4.0991, "step": 2164 }, { "epoch": 0.7212459398684101, "grad_norm": 0.498046875, "learning_rate": 1.4930431827376807e-05, "loss": 4.1464, "step": 2165 }, { "epoch": 0.7215790788706588, "grad_norm": 0.498046875, "learning_rate": 1.4930321791350622e-05, "loss": 4.1248, "step": 2166 }, { "epoch": 0.7219122178729075, "grad_norm": 0.486328125, "learning_rate": 1.4930211668777283e-05, "loss": 4.0816, "step": 2167 }, { "epoch": 0.7222453568751561, "grad_norm": 0.484375, "learning_rate": 1.4930101459658069e-05, "loss": 4.1054, "step": 2168 }, { "epoch": 0.7225784958774049, "grad_norm": 0.478515625, "learning_rate": 1.4929991163994267e-05, "loss": 4.1608, "step": 2169 }, { "epoch": 0.7229116348796535, "grad_norm": 0.46484375, "learning_rate": 1.4929880781787163e-05, "loss": 4.1613, "step": 2170 }, { "epoch": 0.7232447738819022, "grad_norm": 0.474609375, "learning_rate": 1.4929770313038041e-05, "loss": 4.1501, "step": 2171 }, { "epoch": 0.723577912884151, "grad_norm": 0.47265625, "learning_rate": 1.4929659757748187e-05, "loss": 4.1277, "step": 2172 }, { "epoch": 0.7239110518863996, "grad_norm": 0.482421875, "learning_rate": 1.492954911591889e-05, "loss": 4.1218, "step": 2173 }, { "epoch": 0.7242441908886483, "grad_norm": 0.50390625, "learning_rate": 1.4929438387551438e-05, "loss": 4.2086, "step": 2174 }, { "epoch": 0.724577329890897, "grad_norm": 0.474609375, "learning_rate": 1.4929327572647119e-05, "loss": 4.153, "step": 2175 }, { "epoch": 0.7249104688931457, "grad_norm": 0.484375, "learning_rate": 1.4929216671207227e-05, "loss": 4.2006, "step": 2176 }, { "epoch": 0.7252436078953943, "grad_norm": 0.482421875, "learning_rate": 1.4929105683233053e-05, "loss": 4.1372, "step": 2177 }, { "epoch": 0.7255767468976431, "grad_norm": 0.494140625, "learning_rate": 1.492899460872589e-05, "loss": 4.1333, "step": 2178 }, { "epoch": 0.7259098858998917, "grad_norm": 0.484375, "learning_rate": 1.4928883447687029e-05, "loss": 4.2472, "step": 2179 }, { "epoch": 0.7262430249021404, "grad_norm": 0.474609375, "learning_rate": 1.4928772200117768e-05, "loss": 4.1331, "step": 2180 }, { "epoch": 0.7265761639043891, "grad_norm": 0.4921875, "learning_rate": 1.49286608660194e-05, "loss": 4.1151, "step": 2181 }, { "epoch": 0.7269093029066378, "grad_norm": 0.50390625, "learning_rate": 1.4928549445393224e-05, "loss": 4.0472, "step": 2182 }, { "epoch": 0.7272424419088864, "grad_norm": 0.51171875, "learning_rate": 1.492843793824054e-05, "loss": 4.089, "step": 2183 }, { "epoch": 0.7275755809111352, "grad_norm": 0.470703125, "learning_rate": 1.4928326344562642e-05, "loss": 4.1445, "step": 2184 }, { "epoch": 0.7279087199133839, "grad_norm": 0.478515625, "learning_rate": 1.492821466436083e-05, "loss": 4.1977, "step": 2185 }, { "epoch": 0.7282418589156325, "grad_norm": 0.490234375, "learning_rate": 1.4928102897636407e-05, "loss": 4.1316, "step": 2186 }, { "epoch": 0.7285749979178813, "grad_norm": 0.494140625, "learning_rate": 1.4927991044390678e-05, "loss": 4.1532, "step": 2187 }, { "epoch": 0.7289081369201299, "grad_norm": 0.4765625, "learning_rate": 1.492787910462494e-05, "loss": 4.1619, "step": 2188 }, { "epoch": 0.7292412759223786, "grad_norm": 0.47265625, "learning_rate": 1.4927767078340498e-05, "loss": 4.09, "step": 2189 }, { "epoch": 0.7295744149246273, "grad_norm": 0.51171875, "learning_rate": 1.4927654965538659e-05, "loss": 4.113, "step": 2190 }, { "epoch": 0.729907553926876, "grad_norm": 0.46875, "learning_rate": 1.492754276622073e-05, "loss": 4.1462, "step": 2191 }, { "epoch": 0.7302406929291246, "grad_norm": 0.48828125, "learning_rate": 1.4927430480388011e-05, "loss": 4.1564, "step": 2192 }, { "epoch": 0.7305738319313734, "grad_norm": 0.48828125, "learning_rate": 1.492731810804182e-05, "loss": 4.0903, "step": 2193 }, { "epoch": 0.730906970933622, "grad_norm": 0.4921875, "learning_rate": 1.4927205649183458e-05, "loss": 4.1379, "step": 2194 }, { "epoch": 0.7312401099358707, "grad_norm": 0.48828125, "learning_rate": 1.4927093103814237e-05, "loss": 4.06, "step": 2195 }, { "epoch": 0.7315732489381195, "grad_norm": 0.45703125, "learning_rate": 1.492698047193547e-05, "loss": 4.2154, "step": 2196 }, { "epoch": 0.7319063879403681, "grad_norm": 0.48828125, "learning_rate": 1.4926867753548466e-05, "loss": 4.1021, "step": 2197 }, { "epoch": 0.7322395269426168, "grad_norm": 0.46875, "learning_rate": 1.492675494865454e-05, "loss": 4.1375, "step": 2198 }, { "epoch": 0.7325726659448655, "grad_norm": 0.5, "learning_rate": 1.4926642057255004e-05, "loss": 4.2296, "step": 2199 }, { "epoch": 0.7329058049471142, "grad_norm": 0.48046875, "learning_rate": 1.4926529079351176e-05, "loss": 4.2018, "step": 2200 }, { "epoch": 0.7332389439493628, "grad_norm": 0.494140625, "learning_rate": 1.4926416014944369e-05, "loss": 4.1265, "step": 2201 }, { "epoch": 0.7335720829516116, "grad_norm": 0.52734375, "learning_rate": 1.4926302864035899e-05, "loss": 4.175, "step": 2202 }, { "epoch": 0.7339052219538602, "grad_norm": 0.482421875, "learning_rate": 1.492618962662709e-05, "loss": 4.1792, "step": 2203 }, { "epoch": 0.734238360956109, "grad_norm": 0.5078125, "learning_rate": 1.4926076302719255e-05, "loss": 4.088, "step": 2204 }, { "epoch": 0.7345714999583576, "grad_norm": 0.494140625, "learning_rate": 1.4925962892313714e-05, "loss": 4.1122, "step": 2205 }, { "epoch": 0.7349046389606063, "grad_norm": 0.48828125, "learning_rate": 1.4925849395411791e-05, "loss": 4.0991, "step": 2206 }, { "epoch": 0.735237777962855, "grad_norm": 0.470703125, "learning_rate": 1.492573581201481e-05, "loss": 4.1121, "step": 2207 }, { "epoch": 0.7355709169651037, "grad_norm": 0.5078125, "learning_rate": 1.4925622142124087e-05, "loss": 4.1958, "step": 2208 }, { "epoch": 0.7359040559673524, "grad_norm": 0.48046875, "learning_rate": 1.4925508385740952e-05, "loss": 4.1737, "step": 2209 }, { "epoch": 0.736237194969601, "grad_norm": 0.51953125, "learning_rate": 1.4925394542866727e-05, "loss": 4.0183, "step": 2210 }, { "epoch": 0.7365703339718498, "grad_norm": 0.466796875, "learning_rate": 1.492528061350274e-05, "loss": 4.175, "step": 2211 }, { "epoch": 0.7369034729740984, "grad_norm": 0.4921875, "learning_rate": 1.4925166597650318e-05, "loss": 4.1114, "step": 2212 }, { "epoch": 0.7372366119763472, "grad_norm": 0.47265625, "learning_rate": 1.4925052495310785e-05, "loss": 4.0965, "step": 2213 }, { "epoch": 0.7375697509785958, "grad_norm": 0.486328125, "learning_rate": 1.4924938306485474e-05, "loss": 4.1566, "step": 2214 }, { "epoch": 0.7379028899808445, "grad_norm": 0.484375, "learning_rate": 1.4924824031175716e-05, "loss": 4.0931, "step": 2215 }, { "epoch": 0.7382360289830932, "grad_norm": 0.51171875, "learning_rate": 1.4924709669382838e-05, "loss": 4.1083, "step": 2216 }, { "epoch": 0.7385691679853419, "grad_norm": 0.474609375, "learning_rate": 1.4924595221108173e-05, "loss": 4.1264, "step": 2217 }, { "epoch": 0.7389023069875905, "grad_norm": 0.5, "learning_rate": 1.492448068635306e-05, "loss": 4.1505, "step": 2218 }, { "epoch": 0.7392354459898393, "grad_norm": 0.5078125, "learning_rate": 1.4924366065118825e-05, "loss": 4.1374, "step": 2219 }, { "epoch": 0.739568584992088, "grad_norm": 0.52734375, "learning_rate": 1.4924251357406807e-05, "loss": 4.1236, "step": 2220 }, { "epoch": 0.7399017239943366, "grad_norm": 0.49609375, "learning_rate": 1.4924136563218343e-05, "loss": 4.1696, "step": 2221 }, { "epoch": 0.7402348629965854, "grad_norm": 0.48828125, "learning_rate": 1.4924021682554767e-05, "loss": 4.1472, "step": 2222 }, { "epoch": 0.740568001998834, "grad_norm": 0.498046875, "learning_rate": 1.4923906715417418e-05, "loss": 4.1412, "step": 2223 }, { "epoch": 0.7409011410010827, "grad_norm": 0.50390625, "learning_rate": 1.4923791661807638e-05, "loss": 4.0972, "step": 2224 }, { "epoch": 0.7412342800033314, "grad_norm": 0.4921875, "learning_rate": 1.4923676521726764e-05, "loss": 4.159, "step": 2225 }, { "epoch": 0.7415674190055801, "grad_norm": 0.470703125, "learning_rate": 1.492356129517614e-05, "loss": 4.1223, "step": 2226 }, { "epoch": 0.7419005580078287, "grad_norm": 0.50390625, "learning_rate": 1.4923445982157103e-05, "loss": 4.1343, "step": 2227 }, { "epoch": 0.7422336970100775, "grad_norm": 0.494140625, "learning_rate": 1.4923330582671002e-05, "loss": 4.1946, "step": 2228 }, { "epoch": 0.7425668360123261, "grad_norm": 0.5, "learning_rate": 1.4923215096719178e-05, "loss": 4.1353, "step": 2229 }, { "epoch": 0.7428999750145748, "grad_norm": 0.490234375, "learning_rate": 1.4923099524302976e-05, "loss": 4.1105, "step": 2230 }, { "epoch": 0.7432331140168236, "grad_norm": 0.484375, "learning_rate": 1.4922983865423745e-05, "loss": 4.1331, "step": 2231 }, { "epoch": 0.7435662530190722, "grad_norm": 0.5078125, "learning_rate": 1.4922868120082827e-05, "loss": 4.0975, "step": 2232 }, { "epoch": 0.7438993920213209, "grad_norm": 0.48828125, "learning_rate": 1.4922752288281574e-05, "loss": 4.149, "step": 2233 }, { "epoch": 0.7442325310235696, "grad_norm": 0.51953125, "learning_rate": 1.4922636370021337e-05, "loss": 4.1053, "step": 2234 }, { "epoch": 0.7445656700258183, "grad_norm": 0.494140625, "learning_rate": 1.4922520365303463e-05, "loss": 4.1434, "step": 2235 }, { "epoch": 0.7448988090280669, "grad_norm": 0.4921875, "learning_rate": 1.4922404274129302e-05, "loss": 4.0387, "step": 2236 }, { "epoch": 0.7452319480303157, "grad_norm": 0.484375, "learning_rate": 1.4922288096500209e-05, "loss": 4.1151, "step": 2237 }, { "epoch": 0.7455650870325643, "grad_norm": 0.466796875, "learning_rate": 1.4922171832417538e-05, "loss": 4.1925, "step": 2238 }, { "epoch": 0.745898226034813, "grad_norm": 0.498046875, "learning_rate": 1.4922055481882638e-05, "loss": 4.1143, "step": 2239 }, { "epoch": 0.7462313650370617, "grad_norm": 0.50390625, "learning_rate": 1.4921939044896872e-05, "loss": 4.1328, "step": 2240 }, { "epoch": 0.7465645040393104, "grad_norm": 0.474609375, "learning_rate": 1.492182252146159e-05, "loss": 4.1641, "step": 2241 }, { "epoch": 0.7468976430415591, "grad_norm": 0.50390625, "learning_rate": 1.492170591157815e-05, "loss": 4.1261, "step": 2242 }, { "epoch": 0.7472307820438078, "grad_norm": 0.482421875, "learning_rate": 1.4921589215247912e-05, "loss": 4.1077, "step": 2243 }, { "epoch": 0.7475639210460565, "grad_norm": 0.4765625, "learning_rate": 1.4921472432472235e-05, "loss": 4.157, "step": 2244 }, { "epoch": 0.7478970600483051, "grad_norm": 0.4921875, "learning_rate": 1.4921355563252481e-05, "loss": 4.0985, "step": 2245 }, { "epoch": 0.7482301990505539, "grad_norm": 0.486328125, "learning_rate": 1.4921238607590007e-05, "loss": 4.1344, "step": 2246 }, { "epoch": 0.7485633380528025, "grad_norm": 0.5, "learning_rate": 1.492112156548618e-05, "loss": 4.0903, "step": 2247 }, { "epoch": 0.7488964770550512, "grad_norm": 0.470703125, "learning_rate": 1.492100443694236e-05, "loss": 4.0956, "step": 2248 }, { "epoch": 0.7492296160572999, "grad_norm": 0.494140625, "learning_rate": 1.492088722195991e-05, "loss": 4.1397, "step": 2249 }, { "epoch": 0.7495627550595486, "grad_norm": 0.50390625, "learning_rate": 1.4920769920540197e-05, "loss": 4.0707, "step": 2250 }, { "epoch": 0.7498958940617972, "grad_norm": 0.486328125, "learning_rate": 1.4920652532684592e-05, "loss": 4.1405, "step": 2251 }, { "epoch": 0.750229033064046, "grad_norm": 0.48046875, "learning_rate": 1.4920535058394454e-05, "loss": 4.1936, "step": 2252 }, { "epoch": 0.7505621720662946, "grad_norm": 0.470703125, "learning_rate": 1.4920417497671157e-05, "loss": 4.097, "step": 2253 }, { "epoch": 0.7508953110685433, "grad_norm": 0.478515625, "learning_rate": 1.492029985051607e-05, "loss": 4.226, "step": 2254 }, { "epoch": 0.7512284500707921, "grad_norm": 0.48828125, "learning_rate": 1.4920182116930561e-05, "loss": 4.0878, "step": 2255 }, { "epoch": 0.7515615890730407, "grad_norm": 0.48828125, "learning_rate": 1.4920064296916003e-05, "loss": 4.1598, "step": 2256 }, { "epoch": 0.7518947280752895, "grad_norm": 0.4921875, "learning_rate": 1.4919946390473768e-05, "loss": 4.1056, "step": 2257 }, { "epoch": 0.7522278670775381, "grad_norm": 0.466796875, "learning_rate": 1.4919828397605228e-05, "loss": 4.1658, "step": 2258 }, { "epoch": 0.7525610060797868, "grad_norm": 0.462890625, "learning_rate": 1.4919710318311759e-05, "loss": 4.2094, "step": 2259 }, { "epoch": 0.7528941450820354, "grad_norm": 0.466796875, "learning_rate": 1.4919592152594734e-05, "loss": 4.1457, "step": 2260 }, { "epoch": 0.7532272840842842, "grad_norm": 0.478515625, "learning_rate": 1.4919473900455533e-05, "loss": 4.0851, "step": 2261 }, { "epoch": 0.7535604230865328, "grad_norm": 0.4921875, "learning_rate": 1.4919355561895533e-05, "loss": 4.1325, "step": 2262 }, { "epoch": 0.7538935620887816, "grad_norm": 0.51953125, "learning_rate": 1.4919237136916109e-05, "loss": 4.1322, "step": 2263 }, { "epoch": 0.7542267010910302, "grad_norm": 0.49609375, "learning_rate": 1.4919118625518644e-05, "loss": 4.0667, "step": 2264 }, { "epoch": 0.7545598400932789, "grad_norm": 0.515625, "learning_rate": 1.4919000027704515e-05, "loss": 4.1137, "step": 2265 }, { "epoch": 0.7548929790955277, "grad_norm": 0.4609375, "learning_rate": 1.4918881343475108e-05, "loss": 4.1601, "step": 2266 }, { "epoch": 0.7552261180977763, "grad_norm": 0.48046875, "learning_rate": 1.49187625728318e-05, "loss": 4.1234, "step": 2267 }, { "epoch": 0.755559257100025, "grad_norm": 0.484375, "learning_rate": 1.4918643715775975e-05, "loss": 4.1373, "step": 2268 }, { "epoch": 0.7558923961022737, "grad_norm": 0.498046875, "learning_rate": 1.4918524772309024e-05, "loss": 4.1505, "step": 2269 }, { "epoch": 0.7562255351045224, "grad_norm": 0.486328125, "learning_rate": 1.4918405742432325e-05, "loss": 4.1242, "step": 2270 }, { "epoch": 0.756558674106771, "grad_norm": 0.4921875, "learning_rate": 1.4918286626147269e-05, "loss": 4.1805, "step": 2271 }, { "epoch": 0.7568918131090198, "grad_norm": 0.5, "learning_rate": 1.4918167423455242e-05, "loss": 4.1209, "step": 2272 }, { "epoch": 0.7572249521112684, "grad_norm": 0.50390625, "learning_rate": 1.4918048134357631e-05, "loss": 4.1307, "step": 2273 }, { "epoch": 0.7575580911135171, "grad_norm": 0.4765625, "learning_rate": 1.4917928758855828e-05, "loss": 4.1283, "step": 2274 }, { "epoch": 0.7578912301157658, "grad_norm": 0.51953125, "learning_rate": 1.4917809296951221e-05, "loss": 4.138, "step": 2275 }, { "epoch": 0.7582243691180145, "grad_norm": 0.498046875, "learning_rate": 1.4917689748645203e-05, "loss": 4.1113, "step": 2276 }, { "epoch": 0.7585575081202632, "grad_norm": 0.46484375, "learning_rate": 1.4917570113939164e-05, "loss": 4.1204, "step": 2277 }, { "epoch": 0.7588906471225119, "grad_norm": 0.5078125, "learning_rate": 1.4917450392834505e-05, "loss": 4.1459, "step": 2278 }, { "epoch": 0.7592237861247606, "grad_norm": 0.494140625, "learning_rate": 1.491733058533261e-05, "loss": 4.1262, "step": 2279 }, { "epoch": 0.7595569251270092, "grad_norm": 0.5, "learning_rate": 1.4917210691434881e-05, "loss": 4.0932, "step": 2280 }, { "epoch": 0.759890064129258, "grad_norm": 0.482421875, "learning_rate": 1.4917090711142714e-05, "loss": 4.1322, "step": 2281 }, { "epoch": 0.7602232031315066, "grad_norm": 0.46875, "learning_rate": 1.4916970644457504e-05, "loss": 4.1276, "step": 2282 }, { "epoch": 0.7605563421337553, "grad_norm": 0.5078125, "learning_rate": 1.4916850491380651e-05, "loss": 4.0843, "step": 2283 }, { "epoch": 0.760889481136004, "grad_norm": 0.478515625, "learning_rate": 1.4916730251913558e-05, "loss": 4.154, "step": 2284 }, { "epoch": 0.7612226201382527, "grad_norm": 0.51171875, "learning_rate": 1.491660992605762e-05, "loss": 4.0947, "step": 2285 }, { "epoch": 0.7615557591405013, "grad_norm": 0.50390625, "learning_rate": 1.491648951381424e-05, "loss": 4.1446, "step": 2286 }, { "epoch": 0.7618888981427501, "grad_norm": 0.474609375, "learning_rate": 1.4916369015184823e-05, "loss": 4.1158, "step": 2287 }, { "epoch": 0.7622220371449987, "grad_norm": 0.49609375, "learning_rate": 1.4916248430170769e-05, "loss": 4.0894, "step": 2288 }, { "epoch": 0.7625551761472474, "grad_norm": 0.498046875, "learning_rate": 1.4916127758773483e-05, "loss": 4.1174, "step": 2289 }, { "epoch": 0.7628883151494962, "grad_norm": 0.490234375, "learning_rate": 1.4916007000994376e-05, "loss": 4.1876, "step": 2290 }, { "epoch": 0.7632214541517448, "grad_norm": 0.515625, "learning_rate": 1.4915886156834847e-05, "loss": 4.1045, "step": 2291 }, { "epoch": 0.7635545931539935, "grad_norm": 0.47265625, "learning_rate": 1.491576522629631e-05, "loss": 4.18, "step": 2292 }, { "epoch": 0.7638877321562422, "grad_norm": 0.5078125, "learning_rate": 1.4915644209380168e-05, "loss": 4.0768, "step": 2293 }, { "epoch": 0.7642208711584909, "grad_norm": 0.474609375, "learning_rate": 1.4915523106087837e-05, "loss": 4.1827, "step": 2294 }, { "epoch": 0.7645540101607395, "grad_norm": 0.48046875, "learning_rate": 1.491540191642072e-05, "loss": 4.1508, "step": 2295 }, { "epoch": 0.7648871491629883, "grad_norm": 0.4609375, "learning_rate": 1.4915280640380233e-05, "loss": 4.1806, "step": 2296 }, { "epoch": 0.7652202881652369, "grad_norm": 0.494140625, "learning_rate": 1.4915159277967789e-05, "loss": 4.1117, "step": 2297 }, { "epoch": 0.7655534271674856, "grad_norm": 0.5234375, "learning_rate": 1.49150378291848e-05, "loss": 4.1574, "step": 2298 }, { "epoch": 0.7658865661697343, "grad_norm": 0.49609375, "learning_rate": 1.4914916294032682e-05, "loss": 4.1426, "step": 2299 }, { "epoch": 0.766219705171983, "grad_norm": 0.4765625, "learning_rate": 1.4914794672512849e-05, "loss": 4.1688, "step": 2300 }, { "epoch": 0.7665528441742318, "grad_norm": 0.486328125, "learning_rate": 1.4914672964626717e-05, "loss": 4.1239, "step": 2301 }, { "epoch": 0.7668859831764804, "grad_norm": 0.498046875, "learning_rate": 1.4914551170375709e-05, "loss": 4.1398, "step": 2302 }, { "epoch": 0.7672191221787291, "grad_norm": 0.484375, "learning_rate": 1.4914429289761237e-05, "loss": 4.1212, "step": 2303 }, { "epoch": 0.7675522611809777, "grad_norm": 0.4921875, "learning_rate": 1.4914307322784723e-05, "loss": 4.1308, "step": 2304 }, { "epoch": 0.7678854001832265, "grad_norm": 0.48046875, "learning_rate": 1.491418526944759e-05, "loss": 4.1284, "step": 2305 }, { "epoch": 0.7682185391854751, "grad_norm": 0.490234375, "learning_rate": 1.4914063129751257e-05, "loss": 4.1357, "step": 2306 }, { "epoch": 0.7685516781877239, "grad_norm": 0.4921875, "learning_rate": 1.4913940903697148e-05, "loss": 4.1378, "step": 2307 }, { "epoch": 0.7688848171899725, "grad_norm": 0.47265625, "learning_rate": 1.4913818591286686e-05, "loss": 4.138, "step": 2308 }, { "epoch": 0.7692179561922212, "grad_norm": 0.49609375, "learning_rate": 1.4913696192521293e-05, "loss": 4.1649, "step": 2309 }, { "epoch": 0.7695510951944698, "grad_norm": 0.52734375, "learning_rate": 1.4913573707402402e-05, "loss": 4.0135, "step": 2310 }, { "epoch": 0.7698842341967186, "grad_norm": 0.515625, "learning_rate": 1.4913451135931432e-05, "loss": 4.1439, "step": 2311 }, { "epoch": 0.7702173731989672, "grad_norm": 0.5078125, "learning_rate": 1.4913328478109812e-05, "loss": 4.1535, "step": 2312 }, { "epoch": 0.770550512201216, "grad_norm": 0.51953125, "learning_rate": 1.4913205733938975e-05, "loss": 4.0836, "step": 2313 }, { "epoch": 0.7708836512034647, "grad_norm": 0.5078125, "learning_rate": 1.491308290342035e-05, "loss": 4.1581, "step": 2314 }, { "epoch": 0.7712167902057133, "grad_norm": 0.48828125, "learning_rate": 1.4912959986555363e-05, "loss": 4.2056, "step": 2315 }, { "epoch": 0.7715499292079621, "grad_norm": 0.515625, "learning_rate": 1.4912836983345449e-05, "loss": 4.1611, "step": 2316 }, { "epoch": 0.7718830682102107, "grad_norm": 0.490234375, "learning_rate": 1.4912713893792042e-05, "loss": 4.0784, "step": 2317 }, { "epoch": 0.7722162072124594, "grad_norm": 0.484375, "learning_rate": 1.4912590717896572e-05, "loss": 4.101, "step": 2318 }, { "epoch": 0.772549346214708, "grad_norm": 0.49609375, "learning_rate": 1.4912467455660477e-05, "loss": 4.1003, "step": 2319 }, { "epoch": 0.7728824852169568, "grad_norm": 0.49609375, "learning_rate": 1.491234410708519e-05, "loss": 4.1411, "step": 2320 }, { "epoch": 0.7732156242192054, "grad_norm": 0.490234375, "learning_rate": 1.491222067217215e-05, "loss": 4.1192, "step": 2321 }, { "epoch": 0.7735487632214542, "grad_norm": 0.53125, "learning_rate": 1.4912097150922795e-05, "loss": 4.1874, "step": 2322 }, { "epoch": 0.7738819022237028, "grad_norm": 0.515625, "learning_rate": 1.4911973543338562e-05, "loss": 4.035, "step": 2323 }, { "epoch": 0.7742150412259515, "grad_norm": 0.50390625, "learning_rate": 1.4911849849420892e-05, "loss": 4.0812, "step": 2324 }, { "epoch": 0.7745481802282003, "grad_norm": 0.5078125, "learning_rate": 1.4911726069171224e-05, "loss": 4.1378, "step": 2325 }, { "epoch": 0.7748813192304489, "grad_norm": 0.498046875, "learning_rate": 1.4911602202591003e-05, "loss": 4.0916, "step": 2326 }, { "epoch": 0.7752144582326976, "grad_norm": 0.47265625, "learning_rate": 1.4911478249681666e-05, "loss": 4.1709, "step": 2327 }, { "epoch": 0.7755475972349463, "grad_norm": 0.5, "learning_rate": 1.4911354210444665e-05, "loss": 4.1144, "step": 2328 }, { "epoch": 0.775880736237195, "grad_norm": 0.498046875, "learning_rate": 1.4911230084881439e-05, "loss": 4.1806, "step": 2329 }, { "epoch": 0.7762138752394436, "grad_norm": 0.478515625, "learning_rate": 1.4911105872993435e-05, "loss": 4.2139, "step": 2330 }, { "epoch": 0.7765470142416924, "grad_norm": 0.49609375, "learning_rate": 1.49109815747821e-05, "loss": 4.2023, "step": 2331 }, { "epoch": 0.776880153243941, "grad_norm": 0.466796875, "learning_rate": 1.4910857190248883e-05, "loss": 4.1935, "step": 2332 }, { "epoch": 0.7772132922461897, "grad_norm": 0.48046875, "learning_rate": 1.491073271939523e-05, "loss": 4.1345, "step": 2333 }, { "epoch": 0.7775464312484384, "grad_norm": 0.4765625, "learning_rate": 1.4910608162222592e-05, "loss": 4.1455, "step": 2334 }, { "epoch": 0.7778795702506871, "grad_norm": 0.48046875, "learning_rate": 1.4910483518732422e-05, "loss": 4.047, "step": 2335 }, { "epoch": 0.7782127092529358, "grad_norm": 0.515625, "learning_rate": 1.4910358788926169e-05, "loss": 4.0714, "step": 2336 }, { "epoch": 0.7785458482551845, "grad_norm": 0.484375, "learning_rate": 1.4910233972805287e-05, "loss": 4.1637, "step": 2337 }, { "epoch": 0.7788789872574332, "grad_norm": 0.484375, "learning_rate": 1.4910109070371232e-05, "loss": 4.1282, "step": 2338 }, { "epoch": 0.7792121262596818, "grad_norm": 0.4921875, "learning_rate": 1.4909984081625454e-05, "loss": 4.2214, "step": 2339 }, { "epoch": 0.7795452652619306, "grad_norm": 0.5234375, "learning_rate": 1.4909859006569412e-05, "loss": 4.0808, "step": 2340 }, { "epoch": 0.7798784042641792, "grad_norm": 0.474609375, "learning_rate": 1.4909733845204561e-05, "loss": 4.1394, "step": 2341 }, { "epoch": 0.7802115432664279, "grad_norm": 0.50390625, "learning_rate": 1.4909608597532363e-05, "loss": 4.1328, "step": 2342 }, { "epoch": 0.7805446822686766, "grad_norm": 0.48046875, "learning_rate": 1.4909483263554271e-05, "loss": 4.1754, "step": 2343 }, { "epoch": 0.7808778212709253, "grad_norm": 0.5, "learning_rate": 1.490935784327175e-05, "loss": 4.1547, "step": 2344 }, { "epoch": 0.7812109602731739, "grad_norm": 0.50390625, "learning_rate": 1.490923233668626e-05, "loss": 4.1494, "step": 2345 }, { "epoch": 0.7815440992754227, "grad_norm": 0.48828125, "learning_rate": 1.4909106743799259e-05, "loss": 4.1219, "step": 2346 }, { "epoch": 0.7818772382776713, "grad_norm": 0.484375, "learning_rate": 1.4908981064612212e-05, "loss": 4.1502, "step": 2347 }, { "epoch": 0.78221037727992, "grad_norm": 0.49609375, "learning_rate": 1.4908855299126586e-05, "loss": 4.0927, "step": 2348 }, { "epoch": 0.7825435162821688, "grad_norm": 0.48828125, "learning_rate": 1.4908729447343842e-05, "loss": 4.1353, "step": 2349 }, { "epoch": 0.7828766552844174, "grad_norm": 0.515625, "learning_rate": 1.4908603509265446e-05, "loss": 4.0886, "step": 2350 }, { "epoch": 0.7832097942866661, "grad_norm": 0.466796875, "learning_rate": 1.4908477484892869e-05, "loss": 4.1106, "step": 2351 }, { "epoch": 0.7835429332889148, "grad_norm": 0.49609375, "learning_rate": 1.4908351374227574e-05, "loss": 4.134, "step": 2352 }, { "epoch": 0.7838760722911635, "grad_norm": 0.478515625, "learning_rate": 1.4908225177271034e-05, "loss": 4.1139, "step": 2353 }, { "epoch": 0.7842092112934121, "grad_norm": 0.5, "learning_rate": 1.4908098894024714e-05, "loss": 4.147, "step": 2354 }, { "epoch": 0.7845423502956609, "grad_norm": 0.51953125, "learning_rate": 1.4907972524490089e-05, "loss": 4.0959, "step": 2355 }, { "epoch": 0.7848754892979095, "grad_norm": 0.474609375, "learning_rate": 1.490784606866863e-05, "loss": 4.1697, "step": 2356 }, { "epoch": 0.7852086283001583, "grad_norm": 0.48828125, "learning_rate": 1.490771952656181e-05, "loss": 4.1728, "step": 2357 }, { "epoch": 0.7855417673024069, "grad_norm": 0.4765625, "learning_rate": 1.4907592898171102e-05, "loss": 4.1611, "step": 2358 }, { "epoch": 0.7858749063046556, "grad_norm": 0.478515625, "learning_rate": 1.4907466183497982e-05, "loss": 4.1048, "step": 2359 }, { "epoch": 0.7862080453069044, "grad_norm": 0.4921875, "learning_rate": 1.4907339382543925e-05, "loss": 4.142, "step": 2360 }, { "epoch": 0.786541184309153, "grad_norm": 0.49609375, "learning_rate": 1.4907212495310408e-05, "loss": 4.0858, "step": 2361 }, { "epoch": 0.7868743233114017, "grad_norm": 0.49609375, "learning_rate": 1.490708552179891e-05, "loss": 4.1114, "step": 2362 }, { "epoch": 0.7872074623136504, "grad_norm": 0.474609375, "learning_rate": 1.4906958462010909e-05, "loss": 4.1296, "step": 2363 }, { "epoch": 0.7875406013158991, "grad_norm": 0.478515625, "learning_rate": 1.4906831315947886e-05, "loss": 4.0794, "step": 2364 }, { "epoch": 0.7878737403181477, "grad_norm": 0.486328125, "learning_rate": 1.4906704083611322e-05, "loss": 4.133, "step": 2365 }, { "epoch": 0.7882068793203965, "grad_norm": 0.4765625, "learning_rate": 1.4906576765002697e-05, "loss": 4.1848, "step": 2366 }, { "epoch": 0.7885400183226451, "grad_norm": 0.490234375, "learning_rate": 1.4906449360123495e-05, "loss": 4.1656, "step": 2367 }, { "epoch": 0.7888731573248938, "grad_norm": 0.5, "learning_rate": 1.4906321868975203e-05, "loss": 4.1435, "step": 2368 }, { "epoch": 0.7892062963271425, "grad_norm": 0.4921875, "learning_rate": 1.49061942915593e-05, "loss": 4.1684, "step": 2369 }, { "epoch": 0.7895394353293912, "grad_norm": 0.515625, "learning_rate": 1.4906066627877277e-05, "loss": 4.1329, "step": 2370 }, { "epoch": 0.7898725743316399, "grad_norm": 0.478515625, "learning_rate": 1.4905938877930618e-05, "loss": 4.155, "step": 2371 }, { "epoch": 0.7902057133338886, "grad_norm": 0.5078125, "learning_rate": 1.4905811041720813e-05, "loss": 4.1565, "step": 2372 }, { "epoch": 0.7905388523361373, "grad_norm": 0.482421875, "learning_rate": 1.4905683119249351e-05, "loss": 4.1033, "step": 2373 }, { "epoch": 0.7908719913383859, "grad_norm": 0.47265625, "learning_rate": 1.4905555110517722e-05, "loss": 4.1079, "step": 2374 }, { "epoch": 0.7912051303406347, "grad_norm": 0.494140625, "learning_rate": 1.4905427015527415e-05, "loss": 4.1726, "step": 2375 }, { "epoch": 0.7915382693428833, "grad_norm": 0.515625, "learning_rate": 1.4905298834279923e-05, "loss": 4.0637, "step": 2376 }, { "epoch": 0.791871408345132, "grad_norm": 0.478515625, "learning_rate": 1.490517056677674e-05, "loss": 4.0805, "step": 2377 }, { "epoch": 0.7922045473473807, "grad_norm": 0.490234375, "learning_rate": 1.490504221301936e-05, "loss": 4.0646, "step": 2378 }, { "epoch": 0.7925376863496294, "grad_norm": 0.484375, "learning_rate": 1.4904913773009277e-05, "loss": 4.1322, "step": 2379 }, { "epoch": 0.792870825351878, "grad_norm": 0.50390625, "learning_rate": 1.490478524674799e-05, "loss": 4.0576, "step": 2380 }, { "epoch": 0.7932039643541268, "grad_norm": 0.482421875, "learning_rate": 1.490465663423699e-05, "loss": 4.1244, "step": 2381 }, { "epoch": 0.7935371033563754, "grad_norm": 0.490234375, "learning_rate": 1.4904527935477778e-05, "loss": 4.1472, "step": 2382 }, { "epoch": 0.7938702423586241, "grad_norm": 0.486328125, "learning_rate": 1.4904399150471857e-05, "loss": 4.1189, "step": 2383 }, { "epoch": 0.7942033813608729, "grad_norm": 0.490234375, "learning_rate": 1.4904270279220722e-05, "loss": 4.1379, "step": 2384 }, { "epoch": 0.7945365203631215, "grad_norm": 0.50390625, "learning_rate": 1.4904141321725877e-05, "loss": 4.0717, "step": 2385 }, { "epoch": 0.7948696593653702, "grad_norm": 0.494140625, "learning_rate": 1.4904012277988823e-05, "loss": 4.1286, "step": 2386 }, { "epoch": 0.7952027983676189, "grad_norm": 0.490234375, "learning_rate": 1.4903883148011062e-05, "loss": 4.0673, "step": 2387 }, { "epoch": 0.7955359373698676, "grad_norm": 0.482421875, "learning_rate": 1.49037539317941e-05, "loss": 4.1388, "step": 2388 }, { "epoch": 0.7958690763721162, "grad_norm": 0.478515625, "learning_rate": 1.490362462933944e-05, "loss": 4.1361, "step": 2389 }, { "epoch": 0.796202215374365, "grad_norm": 0.51171875, "learning_rate": 1.490349524064859e-05, "loss": 4.1281, "step": 2390 }, { "epoch": 0.7965353543766136, "grad_norm": 0.5234375, "learning_rate": 1.4903365765723058e-05, "loss": 4.0816, "step": 2391 }, { "epoch": 0.7968684933788623, "grad_norm": 0.48828125, "learning_rate": 1.4903236204564347e-05, "loss": 4.1213, "step": 2392 }, { "epoch": 0.797201632381111, "grad_norm": 0.5, "learning_rate": 1.4903106557173973e-05, "loss": 4.0999, "step": 2393 }, { "epoch": 0.7975347713833597, "grad_norm": 0.48046875, "learning_rate": 1.4902976823553438e-05, "loss": 4.0871, "step": 2394 }, { "epoch": 0.7978679103856084, "grad_norm": 0.494140625, "learning_rate": 1.4902847003704262e-05, "loss": 4.1857, "step": 2395 }, { "epoch": 0.7982010493878571, "grad_norm": 0.498046875, "learning_rate": 1.4902717097627952e-05, "loss": 4.1115, "step": 2396 }, { "epoch": 0.7985341883901058, "grad_norm": 0.48046875, "learning_rate": 1.4902587105326023e-05, "loss": 4.1759, "step": 2397 }, { "epoch": 0.7988673273923544, "grad_norm": 0.470703125, "learning_rate": 1.4902457026799986e-05, "loss": 4.1263, "step": 2398 }, { "epoch": 0.7992004663946032, "grad_norm": 0.47265625, "learning_rate": 1.490232686205136e-05, "loss": 4.1054, "step": 2399 }, { "epoch": 0.7995336053968518, "grad_norm": 0.515625, "learning_rate": 1.490219661108166e-05, "loss": 4.1171, "step": 2400 }, { "epoch": 0.7998667443991005, "grad_norm": 0.484375, "learning_rate": 1.4902066273892403e-05, "loss": 4.1766, "step": 2401 }, { "epoch": 0.8001998834013492, "grad_norm": 0.52734375, "learning_rate": 1.4901935850485104e-05, "loss": 4.0852, "step": 2402 }, { "epoch": 0.8005330224035979, "grad_norm": 0.50390625, "learning_rate": 1.4901805340861286e-05, "loss": 4.1624, "step": 2403 }, { "epoch": 0.8008661614058465, "grad_norm": 0.49609375, "learning_rate": 1.4901674745022468e-05, "loss": 4.0215, "step": 2404 }, { "epoch": 0.8011993004080953, "grad_norm": 0.486328125, "learning_rate": 1.490154406297017e-05, "loss": 4.0962, "step": 2405 }, { "epoch": 0.8015324394103439, "grad_norm": 0.5078125, "learning_rate": 1.4901413294705918e-05, "loss": 4.186, "step": 2406 }, { "epoch": 0.8018655784125927, "grad_norm": 0.49609375, "learning_rate": 1.490128244023123e-05, "loss": 4.1817, "step": 2407 }, { "epoch": 0.8021987174148414, "grad_norm": 0.51171875, "learning_rate": 1.4901151499547637e-05, "loss": 4.0154, "step": 2408 }, { "epoch": 0.80253185641709, "grad_norm": 0.50390625, "learning_rate": 1.4901020472656656e-05, "loss": 4.1443, "step": 2409 }, { "epoch": 0.8028649954193388, "grad_norm": 0.484375, "learning_rate": 1.4900889359559818e-05, "loss": 4.1296, "step": 2410 }, { "epoch": 0.8031981344215874, "grad_norm": 0.4921875, "learning_rate": 1.490075816025865e-05, "loss": 4.1113, "step": 2411 }, { "epoch": 0.8035312734238361, "grad_norm": 0.4921875, "learning_rate": 1.490062687475468e-05, "loss": 4.0956, "step": 2412 }, { "epoch": 0.8038644124260848, "grad_norm": 0.46484375, "learning_rate": 1.4900495503049434e-05, "loss": 4.1072, "step": 2413 }, { "epoch": 0.8041975514283335, "grad_norm": 0.50390625, "learning_rate": 1.4900364045144448e-05, "loss": 4.085, "step": 2414 }, { "epoch": 0.8045306904305821, "grad_norm": 0.5078125, "learning_rate": 1.4900232501041248e-05, "loss": 4.2106, "step": 2415 }, { "epoch": 0.8048638294328309, "grad_norm": 0.494140625, "learning_rate": 1.4900100870741367e-05, "loss": 4.1427, "step": 2416 }, { "epoch": 0.8051969684350795, "grad_norm": 0.46875, "learning_rate": 1.4899969154246341e-05, "loss": 4.1564, "step": 2417 }, { "epoch": 0.8055301074373282, "grad_norm": 0.49609375, "learning_rate": 1.4899837351557703e-05, "loss": 4.0843, "step": 2418 }, { "epoch": 0.805863246439577, "grad_norm": 0.47265625, "learning_rate": 1.4899705462676988e-05, "loss": 4.1569, "step": 2419 }, { "epoch": 0.8061963854418256, "grad_norm": 0.5, "learning_rate": 1.4899573487605732e-05, "loss": 4.1096, "step": 2420 }, { "epoch": 0.8065295244440743, "grad_norm": 0.474609375, "learning_rate": 1.4899441426345471e-05, "loss": 4.1655, "step": 2421 }, { "epoch": 0.806862663446323, "grad_norm": 0.498046875, "learning_rate": 1.4899309278897747e-05, "loss": 4.1019, "step": 2422 }, { "epoch": 0.8071958024485717, "grad_norm": 0.486328125, "learning_rate": 1.4899177045264096e-05, "loss": 4.0804, "step": 2423 }, { "epoch": 0.8075289414508203, "grad_norm": 0.498046875, "learning_rate": 1.4899044725446058e-05, "loss": 4.1589, "step": 2424 }, { "epoch": 0.8078620804530691, "grad_norm": 0.48046875, "learning_rate": 1.4898912319445174e-05, "loss": 4.1322, "step": 2425 }, { "epoch": 0.8081952194553177, "grad_norm": 0.48828125, "learning_rate": 1.489877982726299e-05, "loss": 4.1393, "step": 2426 }, { "epoch": 0.8085283584575664, "grad_norm": 0.515625, "learning_rate": 1.4898647248901047e-05, "loss": 4.1478, "step": 2427 }, { "epoch": 0.8088614974598151, "grad_norm": 0.4921875, "learning_rate": 1.4898514584360888e-05, "loss": 4.1514, "step": 2428 }, { "epoch": 0.8091946364620638, "grad_norm": 0.47265625, "learning_rate": 1.4898381833644058e-05, "loss": 4.1772, "step": 2429 }, { "epoch": 0.8095277754643125, "grad_norm": 0.470703125, "learning_rate": 1.4898248996752104e-05, "loss": 4.1768, "step": 2430 }, { "epoch": 0.8098609144665612, "grad_norm": 0.47265625, "learning_rate": 1.4898116073686577e-05, "loss": 4.1117, "step": 2431 }, { "epoch": 0.8101940534688099, "grad_norm": 0.47265625, "learning_rate": 1.489798306444902e-05, "loss": 4.1828, "step": 2432 }, { "epoch": 0.8105271924710585, "grad_norm": 0.50390625, "learning_rate": 1.4897849969040985e-05, "loss": 4.1177, "step": 2433 }, { "epoch": 0.8108603314733073, "grad_norm": 0.498046875, "learning_rate": 1.4897716787464022e-05, "loss": 4.0874, "step": 2434 }, { "epoch": 0.8111934704755559, "grad_norm": 0.478515625, "learning_rate": 1.4897583519719682e-05, "loss": 4.1668, "step": 2435 }, { "epoch": 0.8115266094778046, "grad_norm": 0.49609375, "learning_rate": 1.4897450165809516e-05, "loss": 4.1236, "step": 2436 }, { "epoch": 0.8118597484800533, "grad_norm": 0.478515625, "learning_rate": 1.4897316725735077e-05, "loss": 4.1049, "step": 2437 }, { "epoch": 0.812192887482302, "grad_norm": 0.46875, "learning_rate": 1.4897183199497923e-05, "loss": 4.1483, "step": 2438 }, { "epoch": 0.8125260264845506, "grad_norm": 0.486328125, "learning_rate": 1.4897049587099609e-05, "loss": 4.1831, "step": 2439 }, { "epoch": 0.8128591654867994, "grad_norm": 0.53125, "learning_rate": 1.4896915888541686e-05, "loss": 4.1518, "step": 2440 }, { "epoch": 0.813192304489048, "grad_norm": 0.47265625, "learning_rate": 1.4896782103825714e-05, "loss": 4.1672, "step": 2441 }, { "epoch": 0.8135254434912967, "grad_norm": 0.478515625, "learning_rate": 1.4896648232953255e-05, "loss": 4.1545, "step": 2442 }, { "epoch": 0.8138585824935455, "grad_norm": 0.455078125, "learning_rate": 1.4896514275925862e-05, "loss": 4.1552, "step": 2443 }, { "epoch": 0.8141917214957941, "grad_norm": 0.478515625, "learning_rate": 1.48963802327451e-05, "loss": 4.1453, "step": 2444 }, { "epoch": 0.8145248604980428, "grad_norm": 0.490234375, "learning_rate": 1.4896246103412527e-05, "loss": 4.0516, "step": 2445 }, { "epoch": 0.8148579995002915, "grad_norm": 0.478515625, "learning_rate": 1.489611188792971e-05, "loss": 4.0958, "step": 2446 }, { "epoch": 0.8151911385025402, "grad_norm": 0.49609375, "learning_rate": 1.4895977586298208e-05, "loss": 4.1061, "step": 2447 }, { "epoch": 0.8155242775047888, "grad_norm": 0.486328125, "learning_rate": 1.489584319851959e-05, "loss": 4.1466, "step": 2448 }, { "epoch": 0.8158574165070376, "grad_norm": 0.486328125, "learning_rate": 1.4895708724595413e-05, "loss": 4.2073, "step": 2449 }, { "epoch": 0.8161905555092862, "grad_norm": 0.470703125, "learning_rate": 1.4895574164527252e-05, "loss": 4.1253, "step": 2450 }, { "epoch": 0.816523694511535, "grad_norm": 0.486328125, "learning_rate": 1.4895439518316667e-05, "loss": 4.0657, "step": 2451 }, { "epoch": 0.8168568335137836, "grad_norm": 0.466796875, "learning_rate": 1.4895304785965235e-05, "loss": 4.1541, "step": 2452 }, { "epoch": 0.8171899725160323, "grad_norm": 0.49609375, "learning_rate": 1.4895169967474516e-05, "loss": 4.0629, "step": 2453 }, { "epoch": 0.817523111518281, "grad_norm": 0.52734375, "learning_rate": 1.4895035062846089e-05, "loss": 4.0408, "step": 2454 }, { "epoch": 0.8178562505205297, "grad_norm": 0.486328125, "learning_rate": 1.4894900072081516e-05, "loss": 4.1092, "step": 2455 }, { "epoch": 0.8181893895227784, "grad_norm": 0.484375, "learning_rate": 1.489476499518238e-05, "loss": 4.178, "step": 2456 }, { "epoch": 0.818522528525027, "grad_norm": 0.4921875, "learning_rate": 1.4894629832150245e-05, "loss": 4.1276, "step": 2457 }, { "epoch": 0.8188556675272758, "grad_norm": 0.5234375, "learning_rate": 1.489449458298669e-05, "loss": 4.1039, "step": 2458 }, { "epoch": 0.8191888065295244, "grad_norm": 0.470703125, "learning_rate": 1.489435924769329e-05, "loss": 4.1077, "step": 2459 }, { "epoch": 0.8195219455317732, "grad_norm": 0.490234375, "learning_rate": 1.4894223826271623e-05, "loss": 4.1394, "step": 2460 }, { "epoch": 0.8198550845340218, "grad_norm": 0.4765625, "learning_rate": 1.489408831872326e-05, "loss": 4.1296, "step": 2461 }, { "epoch": 0.8201882235362705, "grad_norm": 0.478515625, "learning_rate": 1.4893952725049788e-05, "loss": 4.115, "step": 2462 }, { "epoch": 0.8205213625385192, "grad_norm": 0.48828125, "learning_rate": 1.4893817045252779e-05, "loss": 4.108, "step": 2463 }, { "epoch": 0.8208545015407679, "grad_norm": 0.49609375, "learning_rate": 1.4893681279333816e-05, "loss": 4.1404, "step": 2464 }, { "epoch": 0.8211876405430166, "grad_norm": 0.484375, "learning_rate": 1.4893545427294482e-05, "loss": 4.107, "step": 2465 }, { "epoch": 0.8215207795452653, "grad_norm": 0.47265625, "learning_rate": 1.4893409489136358e-05, "loss": 4.1895, "step": 2466 }, { "epoch": 0.821853918547514, "grad_norm": 0.466796875, "learning_rate": 1.4893273464861027e-05, "loss": 4.141, "step": 2467 }, { "epoch": 0.8221870575497626, "grad_norm": 0.5078125, "learning_rate": 1.4893137354470077e-05, "loss": 4.1439, "step": 2468 }, { "epoch": 0.8225201965520114, "grad_norm": 0.515625, "learning_rate": 1.4893001157965088e-05, "loss": 4.1187, "step": 2469 }, { "epoch": 0.82285333555426, "grad_norm": 0.49609375, "learning_rate": 1.4892864875347648e-05, "loss": 4.0827, "step": 2470 }, { "epoch": 0.8231864745565087, "grad_norm": 0.45703125, "learning_rate": 1.4892728506619346e-05, "loss": 4.1774, "step": 2471 }, { "epoch": 0.8235196135587574, "grad_norm": 0.5078125, "learning_rate": 1.4892592051781769e-05, "loss": 4.1172, "step": 2472 }, { "epoch": 0.8238527525610061, "grad_norm": 0.5, "learning_rate": 1.489245551083651e-05, "loss": 4.1121, "step": 2473 }, { "epoch": 0.8241858915632547, "grad_norm": 0.484375, "learning_rate": 1.4892318883785152e-05, "loss": 4.1387, "step": 2474 }, { "epoch": 0.8245190305655035, "grad_norm": 0.470703125, "learning_rate": 1.4892182170629292e-05, "loss": 4.1734, "step": 2475 }, { "epoch": 0.8248521695677521, "grad_norm": 0.5234375, "learning_rate": 1.489204537137052e-05, "loss": 4.1518, "step": 2476 }, { "epoch": 0.8251853085700008, "grad_norm": 0.49609375, "learning_rate": 1.4891908486010433e-05, "loss": 4.1248, "step": 2477 }, { "epoch": 0.8255184475722496, "grad_norm": 0.50390625, "learning_rate": 1.4891771514550622e-05, "loss": 4.0913, "step": 2478 }, { "epoch": 0.8258515865744982, "grad_norm": 0.50390625, "learning_rate": 1.4891634456992686e-05, "loss": 4.0865, "step": 2479 }, { "epoch": 0.8261847255767469, "grad_norm": 0.49609375, "learning_rate": 1.4891497313338214e-05, "loss": 4.1717, "step": 2480 }, { "epoch": 0.8265178645789956, "grad_norm": 0.48046875, "learning_rate": 1.4891360083588811e-05, "loss": 4.0723, "step": 2481 }, { "epoch": 0.8268510035812443, "grad_norm": 0.482421875, "learning_rate": 1.4891222767746072e-05, "loss": 4.1146, "step": 2482 }, { "epoch": 0.8271841425834929, "grad_norm": 0.515625, "learning_rate": 1.4891085365811598e-05, "loss": 4.0892, "step": 2483 }, { "epoch": 0.8275172815857417, "grad_norm": 0.50390625, "learning_rate": 1.4890947877786988e-05, "loss": 4.1166, "step": 2484 }, { "epoch": 0.8278504205879903, "grad_norm": 0.5, "learning_rate": 1.4890810303673843e-05, "loss": 4.1402, "step": 2485 }, { "epoch": 0.828183559590239, "grad_norm": 0.50390625, "learning_rate": 1.4890672643473769e-05, "loss": 4.1261, "step": 2486 }, { "epoch": 0.8285166985924877, "grad_norm": 0.482421875, "learning_rate": 1.4890534897188366e-05, "loss": 4.1135, "step": 2487 }, { "epoch": 0.8288498375947364, "grad_norm": 0.46484375, "learning_rate": 1.4890397064819236e-05, "loss": 4.1032, "step": 2488 }, { "epoch": 0.8291829765969851, "grad_norm": 0.474609375, "learning_rate": 1.4890259146367991e-05, "loss": 4.0842, "step": 2489 }, { "epoch": 0.8295161155992338, "grad_norm": 0.49609375, "learning_rate": 1.4890121141836235e-05, "loss": 4.0916, "step": 2490 }, { "epoch": 0.8298492546014825, "grad_norm": 0.498046875, "learning_rate": 1.4889983051225573e-05, "loss": 4.1177, "step": 2491 }, { "epoch": 0.8301823936037311, "grad_norm": 0.5, "learning_rate": 1.4889844874537614e-05, "loss": 4.0643, "step": 2492 }, { "epoch": 0.8305155326059799, "grad_norm": 0.494140625, "learning_rate": 1.488970661177397e-05, "loss": 4.1099, "step": 2493 }, { "epoch": 0.8308486716082285, "grad_norm": 0.48828125, "learning_rate": 1.488956826293625e-05, "loss": 4.1253, "step": 2494 }, { "epoch": 0.8311818106104772, "grad_norm": 0.4765625, "learning_rate": 1.4889429828026066e-05, "loss": 4.0755, "step": 2495 }, { "epoch": 0.8315149496127259, "grad_norm": 0.4921875, "learning_rate": 1.4889291307045029e-05, "loss": 4.0559, "step": 2496 }, { "epoch": 0.8318480886149746, "grad_norm": 0.48828125, "learning_rate": 1.4889152699994755e-05, "loss": 4.0851, "step": 2497 }, { "epoch": 0.8321812276172232, "grad_norm": 0.490234375, "learning_rate": 1.4889014006876855e-05, "loss": 4.1421, "step": 2498 }, { "epoch": 0.832514366619472, "grad_norm": 0.4921875, "learning_rate": 1.4888875227692948e-05, "loss": 4.1255, "step": 2499 }, { "epoch": 0.8328475056217207, "grad_norm": 0.466796875, "learning_rate": 1.4888736362444648e-05, "loss": 4.1271, "step": 2500 }, { "epoch": 0.8331806446239693, "grad_norm": 0.49609375, "learning_rate": 1.4888597411133574e-05, "loss": 4.1205, "step": 2501 }, { "epoch": 0.8335137836262181, "grad_norm": 0.4921875, "learning_rate": 1.4888458373761343e-05, "loss": 4.1222, "step": 2502 }, { "epoch": 0.8338469226284667, "grad_norm": 0.51171875, "learning_rate": 1.4888319250329576e-05, "loss": 4.1746, "step": 2503 }, { "epoch": 0.8341800616307155, "grad_norm": 0.5, "learning_rate": 1.4888180040839893e-05, "loss": 4.1833, "step": 2504 }, { "epoch": 0.8345132006329641, "grad_norm": 0.4765625, "learning_rate": 1.4888040745293916e-05, "loss": 4.0928, "step": 2505 }, { "epoch": 0.8348463396352128, "grad_norm": 0.515625, "learning_rate": 1.4887901363693264e-05, "loss": 4.0957, "step": 2506 }, { "epoch": 0.8351794786374614, "grad_norm": 0.494140625, "learning_rate": 1.4887761896039565e-05, "loss": 4.0543, "step": 2507 }, { "epoch": 0.8355126176397102, "grad_norm": 0.470703125, "learning_rate": 1.488762234233444e-05, "loss": 4.0957, "step": 2508 }, { "epoch": 0.8358457566419588, "grad_norm": 0.486328125, "learning_rate": 1.4887482702579518e-05, "loss": 4.168, "step": 2509 }, { "epoch": 0.8361788956442076, "grad_norm": 0.5078125, "learning_rate": 1.4887342976776425e-05, "loss": 3.9818, "step": 2510 }, { "epoch": 0.8365120346464562, "grad_norm": 0.478515625, "learning_rate": 1.4887203164926785e-05, "loss": 4.1029, "step": 2511 }, { "epoch": 0.8368451736487049, "grad_norm": 0.490234375, "learning_rate": 1.488706326703223e-05, "loss": 4.0514, "step": 2512 }, { "epoch": 0.8371783126509537, "grad_norm": 0.4765625, "learning_rate": 1.4886923283094389e-05, "loss": 4.1624, "step": 2513 }, { "epoch": 0.8375114516532023, "grad_norm": 0.48828125, "learning_rate": 1.488678321311489e-05, "loss": 4.1547, "step": 2514 }, { "epoch": 0.837844590655451, "grad_norm": 0.5, "learning_rate": 1.4886643057095365e-05, "loss": 4.0465, "step": 2515 }, { "epoch": 0.8381777296576997, "grad_norm": 0.51171875, "learning_rate": 1.4886502815037453e-05, "loss": 4.0322, "step": 2516 }, { "epoch": 0.8385108686599484, "grad_norm": 0.54296875, "learning_rate": 1.4886362486942778e-05, "loss": 4.1156, "step": 2517 }, { "epoch": 0.838844007662197, "grad_norm": 0.50390625, "learning_rate": 1.4886222072812982e-05, "loss": 4.0757, "step": 2518 }, { "epoch": 0.8391771466644458, "grad_norm": 0.51171875, "learning_rate": 1.4886081572649695e-05, "loss": 4.1065, "step": 2519 }, { "epoch": 0.8395102856666944, "grad_norm": 0.466796875, "learning_rate": 1.4885940986454555e-05, "loss": 4.1184, "step": 2520 }, { "epoch": 0.8398434246689431, "grad_norm": 0.50390625, "learning_rate": 1.4885800314229202e-05, "loss": 4.0103, "step": 2521 }, { "epoch": 0.8401765636711918, "grad_norm": 0.51953125, "learning_rate": 1.4885659555975274e-05, "loss": 4.0815, "step": 2522 }, { "epoch": 0.8405097026734405, "grad_norm": 0.56640625, "learning_rate": 1.4885518711694407e-05, "loss": 4.0429, "step": 2523 }, { "epoch": 0.8408428416756892, "grad_norm": 0.5, "learning_rate": 1.4885377781388245e-05, "loss": 4.156, "step": 2524 }, { "epoch": 0.8411759806779379, "grad_norm": 0.48828125, "learning_rate": 1.4885236765058428e-05, "loss": 4.1207, "step": 2525 }, { "epoch": 0.8415091196801866, "grad_norm": 0.484375, "learning_rate": 1.4885095662706603e-05, "loss": 4.0879, "step": 2526 }, { "epoch": 0.8418422586824352, "grad_norm": 0.4921875, "learning_rate": 1.4884954474334407e-05, "loss": 4.1256, "step": 2527 }, { "epoch": 0.842175397684684, "grad_norm": 0.4765625, "learning_rate": 1.4884813199943485e-05, "loss": 4.1444, "step": 2528 }, { "epoch": 0.8425085366869326, "grad_norm": 0.52734375, "learning_rate": 1.4884671839535488e-05, "loss": 4.1771, "step": 2529 }, { "epoch": 0.8428416756891813, "grad_norm": 0.474609375, "learning_rate": 1.4884530393112057e-05, "loss": 4.1608, "step": 2530 }, { "epoch": 0.84317481469143, "grad_norm": 0.5, "learning_rate": 1.4884388860674841e-05, "loss": 4.0544, "step": 2531 }, { "epoch": 0.8435079536936787, "grad_norm": 0.478515625, "learning_rate": 1.4884247242225491e-05, "loss": 4.0977, "step": 2532 }, { "epoch": 0.8438410926959273, "grad_norm": 0.49609375, "learning_rate": 1.4884105537765656e-05, "loss": 4.1247, "step": 2533 }, { "epoch": 0.8441742316981761, "grad_norm": 0.50390625, "learning_rate": 1.4883963747296983e-05, "loss": 4.1018, "step": 2534 }, { "epoch": 0.8445073707004247, "grad_norm": 0.486328125, "learning_rate": 1.4883821870821128e-05, "loss": 4.1341, "step": 2535 }, { "epoch": 0.8448405097026734, "grad_norm": 0.5078125, "learning_rate": 1.4883679908339739e-05, "loss": 4.1434, "step": 2536 }, { "epoch": 0.8451736487049222, "grad_norm": 0.4921875, "learning_rate": 1.4883537859854475e-05, "loss": 4.0913, "step": 2537 }, { "epoch": 0.8455067877071708, "grad_norm": 0.4921875, "learning_rate": 1.4883395725366985e-05, "loss": 4.0866, "step": 2538 }, { "epoch": 0.8458399267094195, "grad_norm": 0.515625, "learning_rate": 1.4883253504878928e-05, "loss": 4.1909, "step": 2539 }, { "epoch": 0.8461730657116682, "grad_norm": 0.486328125, "learning_rate": 1.488311119839196e-05, "loss": 4.1226, "step": 2540 }, { "epoch": 0.8465062047139169, "grad_norm": 0.5234375, "learning_rate": 1.4882968805907741e-05, "loss": 4.1557, "step": 2541 }, { "epoch": 0.8468393437161655, "grad_norm": 0.478515625, "learning_rate": 1.4882826327427925e-05, "loss": 4.1073, "step": 2542 }, { "epoch": 0.8471724827184143, "grad_norm": 0.48828125, "learning_rate": 1.4882683762954174e-05, "loss": 4.0594, "step": 2543 }, { "epoch": 0.8475056217206629, "grad_norm": 0.50390625, "learning_rate": 1.4882541112488146e-05, "loss": 4.1437, "step": 2544 }, { "epoch": 0.8478387607229116, "grad_norm": 0.50390625, "learning_rate": 1.4882398376031506e-05, "loss": 4.1153, "step": 2545 }, { "epoch": 0.8481718997251603, "grad_norm": 0.494140625, "learning_rate": 1.4882255553585913e-05, "loss": 4.1104, "step": 2546 }, { "epoch": 0.848505038727409, "grad_norm": 0.455078125, "learning_rate": 1.4882112645153036e-05, "loss": 4.1301, "step": 2547 }, { "epoch": 0.8488381777296578, "grad_norm": 0.48828125, "learning_rate": 1.4881969650734535e-05, "loss": 4.1018, "step": 2548 }, { "epoch": 0.8491713167319064, "grad_norm": 0.482421875, "learning_rate": 1.4881826570332076e-05, "loss": 4.1728, "step": 2549 }, { "epoch": 0.8495044557341551, "grad_norm": 0.482421875, "learning_rate": 1.4881683403947328e-05, "loss": 4.187, "step": 2550 }, { "epoch": 0.8498375947364037, "grad_norm": 0.482421875, "learning_rate": 1.4881540151581956e-05, "loss": 4.0935, "step": 2551 }, { "epoch": 0.8501707337386525, "grad_norm": 0.478515625, "learning_rate": 1.4881396813237629e-05, "loss": 4.1424, "step": 2552 }, { "epoch": 0.8505038727409011, "grad_norm": 0.47265625, "learning_rate": 1.488125338891602e-05, "loss": 4.1715, "step": 2553 }, { "epoch": 0.8508370117431499, "grad_norm": 0.5078125, "learning_rate": 1.4881109878618794e-05, "loss": 4.1158, "step": 2554 }, { "epoch": 0.8511701507453985, "grad_norm": 0.47265625, "learning_rate": 1.4880966282347627e-05, "loss": 4.156, "step": 2555 }, { "epoch": 0.8515032897476472, "grad_norm": 0.50390625, "learning_rate": 1.4880822600104188e-05, "loss": 4.1382, "step": 2556 }, { "epoch": 0.8518364287498958, "grad_norm": 0.5078125, "learning_rate": 1.4880678831890153e-05, "loss": 4.0809, "step": 2557 }, { "epoch": 0.8521695677521446, "grad_norm": 0.490234375, "learning_rate": 1.4880534977707197e-05, "loss": 4.1339, "step": 2558 }, { "epoch": 0.8525027067543933, "grad_norm": 0.48828125, "learning_rate": 1.4880391037556995e-05, "loss": 4.1467, "step": 2559 }, { "epoch": 0.852835845756642, "grad_norm": 0.49609375, "learning_rate": 1.4880247011441224e-05, "loss": 4.0561, "step": 2560 }, { "epoch": 0.8531689847588907, "grad_norm": 0.47265625, "learning_rate": 1.4880102899361559e-05, "loss": 4.1233, "step": 2561 }, { "epoch": 0.8535021237611393, "grad_norm": 0.478515625, "learning_rate": 1.4879958701319682e-05, "loss": 4.1195, "step": 2562 }, { "epoch": 0.8538352627633881, "grad_norm": 0.5234375, "learning_rate": 1.487981441731727e-05, "loss": 4.1046, "step": 2563 }, { "epoch": 0.8541684017656367, "grad_norm": 0.54296875, "learning_rate": 1.4879670047356007e-05, "loss": 4.0946, "step": 2564 }, { "epoch": 0.8545015407678854, "grad_norm": 0.486328125, "learning_rate": 1.487952559143757e-05, "loss": 4.1477, "step": 2565 }, { "epoch": 0.854834679770134, "grad_norm": 0.482421875, "learning_rate": 1.4879381049563643e-05, "loss": 4.1733, "step": 2566 }, { "epoch": 0.8551678187723828, "grad_norm": 0.5234375, "learning_rate": 1.4879236421735912e-05, "loss": 4.1003, "step": 2567 }, { "epoch": 0.8555009577746314, "grad_norm": 0.51171875, "learning_rate": 1.487909170795606e-05, "loss": 4.1499, "step": 2568 }, { "epoch": 0.8558340967768802, "grad_norm": 0.48828125, "learning_rate": 1.4878946908225774e-05, "loss": 4.1247, "step": 2569 }, { "epoch": 0.8561672357791288, "grad_norm": 0.494140625, "learning_rate": 1.4878802022546736e-05, "loss": 4.1196, "step": 2570 }, { "epoch": 0.8565003747813775, "grad_norm": 0.482421875, "learning_rate": 1.4878657050920639e-05, "loss": 4.1336, "step": 2571 }, { "epoch": 0.8568335137836263, "grad_norm": 0.5, "learning_rate": 1.487851199334917e-05, "loss": 4.0892, "step": 2572 }, { "epoch": 0.8571666527858749, "grad_norm": 0.5, "learning_rate": 1.4878366849834017e-05, "loss": 4.153, "step": 2573 }, { "epoch": 0.8574997917881236, "grad_norm": 0.5078125, "learning_rate": 1.4878221620376871e-05, "loss": 4.0981, "step": 2574 }, { "epoch": 0.8578329307903723, "grad_norm": 0.50390625, "learning_rate": 1.4878076304979425e-05, "loss": 4.041, "step": 2575 }, { "epoch": 0.858166069792621, "grad_norm": 0.482421875, "learning_rate": 1.4877930903643372e-05, "loss": 4.0746, "step": 2576 }, { "epoch": 0.8584992087948696, "grad_norm": 0.482421875, "learning_rate": 1.4877785416370405e-05, "loss": 4.1217, "step": 2577 }, { "epoch": 0.8588323477971184, "grad_norm": 0.494140625, "learning_rate": 1.4877639843162218e-05, "loss": 4.0913, "step": 2578 }, { "epoch": 0.859165486799367, "grad_norm": 0.515625, "learning_rate": 1.4877494184020504e-05, "loss": 4.1177, "step": 2579 }, { "epoch": 0.8594986258016157, "grad_norm": 0.5, "learning_rate": 1.4877348438946965e-05, "loss": 4.0962, "step": 2580 }, { "epoch": 0.8598317648038644, "grad_norm": 0.515625, "learning_rate": 1.4877202607943293e-05, "loss": 4.1576, "step": 2581 }, { "epoch": 0.8601649038061131, "grad_norm": 0.51171875, "learning_rate": 1.4877056691011193e-05, "loss": 4.1055, "step": 2582 }, { "epoch": 0.8604980428083618, "grad_norm": 0.474609375, "learning_rate": 1.487691068815236e-05, "loss": 4.1288, "step": 2583 }, { "epoch": 0.8608311818106105, "grad_norm": 0.4765625, "learning_rate": 1.4876764599368497e-05, "loss": 4.1915, "step": 2584 }, { "epoch": 0.8611643208128592, "grad_norm": 0.51171875, "learning_rate": 1.4876618424661301e-05, "loss": 4.1334, "step": 2585 }, { "epoch": 0.8614974598151078, "grad_norm": 0.482421875, "learning_rate": 1.4876472164032483e-05, "loss": 4.162, "step": 2586 }, { "epoch": 0.8618305988173566, "grad_norm": 0.498046875, "learning_rate": 1.4876325817483737e-05, "loss": 4.1705, "step": 2587 }, { "epoch": 0.8621637378196052, "grad_norm": 0.5, "learning_rate": 1.4876179385016776e-05, "loss": 4.0705, "step": 2588 }, { "epoch": 0.8624968768218539, "grad_norm": 0.486328125, "learning_rate": 1.4876032866633299e-05, "loss": 4.0812, "step": 2589 }, { "epoch": 0.8628300158241026, "grad_norm": 0.484375, "learning_rate": 1.4875886262335017e-05, "loss": 4.1176, "step": 2590 }, { "epoch": 0.8631631548263513, "grad_norm": 0.5, "learning_rate": 1.4875739572123636e-05, "loss": 4.203, "step": 2591 }, { "epoch": 0.8634962938285999, "grad_norm": 0.458984375, "learning_rate": 1.4875592796000866e-05, "loss": 4.2376, "step": 2592 }, { "epoch": 0.8638294328308487, "grad_norm": 0.50390625, "learning_rate": 1.4875445933968414e-05, "loss": 4.1264, "step": 2593 }, { "epoch": 0.8641625718330974, "grad_norm": 0.515625, "learning_rate": 1.4875298986027994e-05, "loss": 4.0686, "step": 2594 }, { "epoch": 0.864495710835346, "grad_norm": 0.498046875, "learning_rate": 1.4875151952181312e-05, "loss": 4.1124, "step": 2595 }, { "epoch": 0.8648288498375948, "grad_norm": 0.482421875, "learning_rate": 1.4875004832430087e-05, "loss": 4.078, "step": 2596 }, { "epoch": 0.8651619888398434, "grad_norm": 0.4921875, "learning_rate": 1.487485762677603e-05, "loss": 4.0845, "step": 2597 }, { "epoch": 0.8654951278420921, "grad_norm": 0.4921875, "learning_rate": 1.4874710335220855e-05, "loss": 4.1262, "step": 2598 }, { "epoch": 0.8658282668443408, "grad_norm": 0.48828125, "learning_rate": 1.4874562957766279e-05, "loss": 4.1667, "step": 2599 }, { "epoch": 0.8661614058465895, "grad_norm": 0.484375, "learning_rate": 1.4874415494414016e-05, "loss": 4.1627, "step": 2600 }, { "epoch": 0.8664945448488381, "grad_norm": 0.47265625, "learning_rate": 1.4874267945165786e-05, "loss": 4.1807, "step": 2601 }, { "epoch": 0.8668276838510869, "grad_norm": 0.484375, "learning_rate": 1.4874120310023308e-05, "loss": 4.1419, "step": 2602 }, { "epoch": 0.8671608228533355, "grad_norm": 0.4921875, "learning_rate": 1.48739725889883e-05, "loss": 4.19, "step": 2603 }, { "epoch": 0.8674939618555843, "grad_norm": 0.484375, "learning_rate": 1.4873824782062483e-05, "loss": 4.1133, "step": 2604 }, { "epoch": 0.8678271008578329, "grad_norm": 0.5, "learning_rate": 1.4873676889247581e-05, "loss": 4.173, "step": 2605 }, { "epoch": 0.8681602398600816, "grad_norm": 0.5078125, "learning_rate": 1.4873528910545311e-05, "loss": 4.0969, "step": 2606 }, { "epoch": 0.8684933788623304, "grad_norm": 0.490234375, "learning_rate": 1.4873380845957403e-05, "loss": 4.0976, "step": 2607 }, { "epoch": 0.868826517864579, "grad_norm": 0.51953125, "learning_rate": 1.4873232695485578e-05, "loss": 4.1212, "step": 2608 }, { "epoch": 0.8691596568668277, "grad_norm": 0.49609375, "learning_rate": 1.4873084459131562e-05, "loss": 4.0863, "step": 2609 }, { "epoch": 0.8694927958690764, "grad_norm": 0.515625, "learning_rate": 1.4872936136897083e-05, "loss": 4.1489, "step": 2610 }, { "epoch": 0.8698259348713251, "grad_norm": 0.50390625, "learning_rate": 1.4872787728783869e-05, "loss": 4.1042, "step": 2611 }, { "epoch": 0.8701590738735737, "grad_norm": 0.48046875, "learning_rate": 1.4872639234793646e-05, "loss": 4.1004, "step": 2612 }, { "epoch": 0.8704922128758225, "grad_norm": 0.48828125, "learning_rate": 1.4872490654928144e-05, "loss": 4.2187, "step": 2613 }, { "epoch": 0.8708253518780711, "grad_norm": 0.5078125, "learning_rate": 1.4872341989189095e-05, "loss": 4.1155, "step": 2614 }, { "epoch": 0.8711584908803198, "grad_norm": 0.50390625, "learning_rate": 1.4872193237578231e-05, "loss": 4.1039, "step": 2615 }, { "epoch": 0.8714916298825685, "grad_norm": 0.51171875, "learning_rate": 1.4872044400097283e-05, "loss": 4.1138, "step": 2616 }, { "epoch": 0.8718247688848172, "grad_norm": 0.4765625, "learning_rate": 1.4871895476747987e-05, "loss": 4.2016, "step": 2617 }, { "epoch": 0.8721579078870659, "grad_norm": 0.484375, "learning_rate": 1.4871746467532076e-05, "loss": 4.1054, "step": 2618 }, { "epoch": 0.8724910468893146, "grad_norm": 0.5, "learning_rate": 1.4871597372451287e-05, "loss": 4.061, "step": 2619 }, { "epoch": 0.8728241858915633, "grad_norm": 0.48046875, "learning_rate": 1.4871448191507352e-05, "loss": 4.1421, "step": 2620 }, { "epoch": 0.8731573248938119, "grad_norm": 0.50390625, "learning_rate": 1.4871298924702013e-05, "loss": 4.1254, "step": 2621 }, { "epoch": 0.8734904638960607, "grad_norm": 0.486328125, "learning_rate": 1.487114957203701e-05, "loss": 4.1388, "step": 2622 }, { "epoch": 0.8738236028983093, "grad_norm": 0.478515625, "learning_rate": 1.4871000133514079e-05, "loss": 4.1259, "step": 2623 }, { "epoch": 0.874156741900558, "grad_norm": 0.478515625, "learning_rate": 1.4870850609134962e-05, "loss": 4.1455, "step": 2624 }, { "epoch": 0.8744898809028067, "grad_norm": 0.470703125, "learning_rate": 1.48707009989014e-05, "loss": 4.1581, "step": 2625 }, { "epoch": 0.8748230199050554, "grad_norm": 0.474609375, "learning_rate": 1.4870551302815138e-05, "loss": 4.0669, "step": 2626 }, { "epoch": 0.875156158907304, "grad_norm": 0.47265625, "learning_rate": 1.4870401520877917e-05, "loss": 4.0874, "step": 2627 }, { "epoch": 0.8754892979095528, "grad_norm": 0.48046875, "learning_rate": 1.4870251653091483e-05, "loss": 4.1379, "step": 2628 }, { "epoch": 0.8758224369118014, "grad_norm": 0.498046875, "learning_rate": 1.487010169945758e-05, "loss": 4.1098, "step": 2629 }, { "epoch": 0.8761555759140501, "grad_norm": 0.5, "learning_rate": 1.4869951659977956e-05, "loss": 4.1138, "step": 2630 }, { "epoch": 0.8764887149162989, "grad_norm": 0.46484375, "learning_rate": 1.4869801534654361e-05, "loss": 4.1202, "step": 2631 }, { "epoch": 0.8768218539185475, "grad_norm": 0.484375, "learning_rate": 1.4869651323488537e-05, "loss": 4.0922, "step": 2632 }, { "epoch": 0.8771549929207962, "grad_norm": 0.4765625, "learning_rate": 1.4869501026482241e-05, "loss": 4.0567, "step": 2633 }, { "epoch": 0.8774881319230449, "grad_norm": 0.470703125, "learning_rate": 1.4869350643637218e-05, "loss": 4.1506, "step": 2634 }, { "epoch": 0.8778212709252936, "grad_norm": 0.51171875, "learning_rate": 1.4869200174955223e-05, "loss": 4.1368, "step": 2635 }, { "epoch": 0.8781544099275422, "grad_norm": 0.48046875, "learning_rate": 1.4869049620438009e-05, "loss": 4.1004, "step": 2636 }, { "epoch": 0.878487548929791, "grad_norm": 0.46484375, "learning_rate": 1.4868898980087326e-05, "loss": 4.0919, "step": 2637 }, { "epoch": 0.8788206879320396, "grad_norm": 0.50390625, "learning_rate": 1.4868748253904931e-05, "loss": 4.1036, "step": 2638 }, { "epoch": 0.8791538269342883, "grad_norm": 0.482421875, "learning_rate": 1.4868597441892581e-05, "loss": 4.2159, "step": 2639 }, { "epoch": 0.879486965936537, "grad_norm": 0.498046875, "learning_rate": 1.486844654405203e-05, "loss": 4.0775, "step": 2640 }, { "epoch": 0.8798201049387857, "grad_norm": 0.478515625, "learning_rate": 1.4868295560385039e-05, "loss": 4.1483, "step": 2641 }, { "epoch": 0.8801532439410344, "grad_norm": 0.46484375, "learning_rate": 1.4868144490893362e-05, "loss": 4.0102, "step": 2642 }, { "epoch": 0.8804863829432831, "grad_norm": 0.466796875, "learning_rate": 1.486799333557876e-05, "loss": 4.1611, "step": 2643 }, { "epoch": 0.8808195219455318, "grad_norm": 0.46875, "learning_rate": 1.4867842094442998e-05, "loss": 4.0866, "step": 2644 }, { "epoch": 0.8811526609477804, "grad_norm": 0.4765625, "learning_rate": 1.4867690767487832e-05, "loss": 4.1604, "step": 2645 }, { "epoch": 0.8814857999500292, "grad_norm": 0.466796875, "learning_rate": 1.4867539354715028e-05, "loss": 4.1221, "step": 2646 }, { "epoch": 0.8818189389522778, "grad_norm": 0.482421875, "learning_rate": 1.4867387856126348e-05, "loss": 4.2228, "step": 2647 }, { "epoch": 0.8821520779545265, "grad_norm": 0.478515625, "learning_rate": 1.4867236271723557e-05, "loss": 4.1065, "step": 2648 }, { "epoch": 0.8824852169567752, "grad_norm": 0.4921875, "learning_rate": 1.4867084601508423e-05, "loss": 4.0772, "step": 2649 }, { "epoch": 0.8828183559590239, "grad_norm": 0.478515625, "learning_rate": 1.4866932845482708e-05, "loss": 4.2158, "step": 2650 }, { "epoch": 0.8831514949612725, "grad_norm": 0.484375, "learning_rate": 1.4866781003648182e-05, "loss": 4.0551, "step": 2651 }, { "epoch": 0.8834846339635213, "grad_norm": 0.482421875, "learning_rate": 1.4866629076006615e-05, "loss": 4.1082, "step": 2652 }, { "epoch": 0.88381777296577, "grad_norm": 0.5, "learning_rate": 1.4866477062559776e-05, "loss": 4.1277, "step": 2653 }, { "epoch": 0.8841509119680186, "grad_norm": 0.494140625, "learning_rate": 1.4866324963309433e-05, "loss": 4.0683, "step": 2654 }, { "epoch": 0.8844840509702674, "grad_norm": 0.490234375, "learning_rate": 1.4866172778257362e-05, "loss": 4.1343, "step": 2655 }, { "epoch": 0.884817189972516, "grad_norm": 0.494140625, "learning_rate": 1.4866020507405332e-05, "loss": 4.1126, "step": 2656 }, { "epoch": 0.8851503289747648, "grad_norm": 0.484375, "learning_rate": 1.4865868150755116e-05, "loss": 4.1505, "step": 2657 }, { "epoch": 0.8854834679770134, "grad_norm": 0.490234375, "learning_rate": 1.4865715708308493e-05, "loss": 4.1194, "step": 2658 }, { "epoch": 0.8858166069792621, "grad_norm": 0.486328125, "learning_rate": 1.4865563180067237e-05, "loss": 4.1553, "step": 2659 }, { "epoch": 0.8861497459815108, "grad_norm": 0.478515625, "learning_rate": 1.486541056603312e-05, "loss": 4.1347, "step": 2660 }, { "epoch": 0.8864828849837595, "grad_norm": 0.49609375, "learning_rate": 1.4865257866207925e-05, "loss": 4.0577, "step": 2661 }, { "epoch": 0.8868160239860081, "grad_norm": 0.486328125, "learning_rate": 1.486510508059343e-05, "loss": 4.2055, "step": 2662 }, { "epoch": 0.8871491629882569, "grad_norm": 0.478515625, "learning_rate": 1.4864952209191413e-05, "loss": 4.1601, "step": 2663 }, { "epoch": 0.8874823019905055, "grad_norm": 0.484375, "learning_rate": 1.4864799252003656e-05, "loss": 4.1549, "step": 2664 }, { "epoch": 0.8878154409927542, "grad_norm": 0.515625, "learning_rate": 1.4864646209031938e-05, "loss": 4.096, "step": 2665 }, { "epoch": 0.888148579995003, "grad_norm": 0.486328125, "learning_rate": 1.4864493080278045e-05, "loss": 4.1076, "step": 2666 }, { "epoch": 0.8884817189972516, "grad_norm": 0.49609375, "learning_rate": 1.4864339865743759e-05, "loss": 4.0848, "step": 2667 }, { "epoch": 0.8888148579995003, "grad_norm": 0.48046875, "learning_rate": 1.4864186565430863e-05, "loss": 4.1916, "step": 2668 }, { "epoch": 0.889147997001749, "grad_norm": 0.48828125, "learning_rate": 1.4864033179341146e-05, "loss": 4.1244, "step": 2669 }, { "epoch": 0.8894811360039977, "grad_norm": 0.515625, "learning_rate": 1.4863879707476393e-05, "loss": 4.0491, "step": 2670 }, { "epoch": 0.8898142750062463, "grad_norm": 0.50390625, "learning_rate": 1.4863726149838391e-05, "loss": 4.0656, "step": 2671 }, { "epoch": 0.8901474140084951, "grad_norm": 0.478515625, "learning_rate": 1.486357250642893e-05, "loss": 4.087, "step": 2672 }, { "epoch": 0.8904805530107437, "grad_norm": 0.494140625, "learning_rate": 1.4863418777249797e-05, "loss": 4.1232, "step": 2673 }, { "epoch": 0.8908136920129924, "grad_norm": 0.515625, "learning_rate": 1.4863264962302787e-05, "loss": 4.0359, "step": 2674 }, { "epoch": 0.8911468310152411, "grad_norm": 0.4765625, "learning_rate": 1.4863111061589687e-05, "loss": 4.068, "step": 2675 }, { "epoch": 0.8914799700174898, "grad_norm": 0.5, "learning_rate": 1.486295707511229e-05, "loss": 4.1653, "step": 2676 }, { "epoch": 0.8918131090197385, "grad_norm": 0.470703125, "learning_rate": 1.4862803002872395e-05, "loss": 4.0666, "step": 2677 }, { "epoch": 0.8921462480219872, "grad_norm": 0.48046875, "learning_rate": 1.486264884487179e-05, "loss": 4.0646, "step": 2678 }, { "epoch": 0.8924793870242359, "grad_norm": 0.482421875, "learning_rate": 1.4862494601112275e-05, "loss": 4.1813, "step": 2679 }, { "epoch": 0.8928125260264845, "grad_norm": 0.515625, "learning_rate": 1.4862340271595645e-05, "loss": 4.1853, "step": 2680 }, { "epoch": 0.8931456650287333, "grad_norm": 0.494140625, "learning_rate": 1.4862185856323696e-05, "loss": 4.0765, "step": 2681 }, { "epoch": 0.8934788040309819, "grad_norm": 0.494140625, "learning_rate": 1.486203135529823e-05, "loss": 4.108, "step": 2682 }, { "epoch": 0.8938119430332306, "grad_norm": 0.4921875, "learning_rate": 1.4861876768521045e-05, "loss": 4.1674, "step": 2683 }, { "epoch": 0.8941450820354793, "grad_norm": 0.474609375, "learning_rate": 1.486172209599394e-05, "loss": 4.128, "step": 2684 }, { "epoch": 0.894478221037728, "grad_norm": 0.50390625, "learning_rate": 1.4861567337718719e-05, "loss": 4.1163, "step": 2685 }, { "epoch": 0.8948113600399766, "grad_norm": 0.490234375, "learning_rate": 1.4861412493697181e-05, "loss": 4.0932, "step": 2686 }, { "epoch": 0.8951444990422254, "grad_norm": 0.470703125, "learning_rate": 1.4861257563931135e-05, "loss": 4.1036, "step": 2687 }, { "epoch": 0.8954776380444741, "grad_norm": 0.484375, "learning_rate": 1.4861102548422383e-05, "loss": 4.1367, "step": 2688 }, { "epoch": 0.8958107770467227, "grad_norm": 0.490234375, "learning_rate": 1.486094744717273e-05, "loss": 4.0655, "step": 2689 }, { "epoch": 0.8961439160489715, "grad_norm": 0.49609375, "learning_rate": 1.4860792260183983e-05, "loss": 4.1118, "step": 2690 }, { "epoch": 0.8964770550512201, "grad_norm": 0.4921875, "learning_rate": 1.486063698745795e-05, "loss": 4.1461, "step": 2691 }, { "epoch": 0.8968101940534688, "grad_norm": 0.48046875, "learning_rate": 1.4860481628996439e-05, "loss": 4.1871, "step": 2692 }, { "epoch": 0.8971433330557175, "grad_norm": 0.515625, "learning_rate": 1.4860326184801258e-05, "loss": 4.0963, "step": 2693 }, { "epoch": 0.8974764720579662, "grad_norm": 0.5078125, "learning_rate": 1.4860170654874223e-05, "loss": 4.2009, "step": 2694 }, { "epoch": 0.8978096110602148, "grad_norm": 0.4921875, "learning_rate": 1.4860015039217139e-05, "loss": 4.0889, "step": 2695 }, { "epoch": 0.8981427500624636, "grad_norm": 0.49609375, "learning_rate": 1.4859859337831823e-05, "loss": 4.0824, "step": 2696 }, { "epoch": 0.8984758890647122, "grad_norm": 0.486328125, "learning_rate": 1.4859703550720084e-05, "loss": 4.0317, "step": 2697 }, { "epoch": 0.898809028066961, "grad_norm": 0.462890625, "learning_rate": 1.4859547677883743e-05, "loss": 4.23, "step": 2698 }, { "epoch": 0.8991421670692096, "grad_norm": 0.494140625, "learning_rate": 1.4859391719324613e-05, "loss": 4.084, "step": 2699 }, { "epoch": 0.8994753060714583, "grad_norm": 0.484375, "learning_rate": 1.4859235675044509e-05, "loss": 4.0756, "step": 2700 }, { "epoch": 0.899808445073707, "grad_norm": 0.482421875, "learning_rate": 1.485907954504525e-05, "loss": 4.109, "step": 2701 }, { "epoch": 0.9001415840759557, "grad_norm": 0.51171875, "learning_rate": 1.485892332932865e-05, "loss": 4.119, "step": 2702 }, { "epoch": 0.9004747230782044, "grad_norm": 0.5234375, "learning_rate": 1.4858767027896538e-05, "loss": 4.1358, "step": 2703 }, { "epoch": 0.900807862080453, "grad_norm": 0.478515625, "learning_rate": 1.4858610640750727e-05, "loss": 4.1356, "step": 2704 }, { "epoch": 0.9011410010827018, "grad_norm": 0.48046875, "learning_rate": 1.4858454167893039e-05, "loss": 4.1299, "step": 2705 }, { "epoch": 0.9014741400849504, "grad_norm": 0.53515625, "learning_rate": 1.48582976093253e-05, "loss": 4.0804, "step": 2706 }, { "epoch": 0.9018072790871992, "grad_norm": 0.4921875, "learning_rate": 1.4858140965049333e-05, "loss": 4.1065, "step": 2707 }, { "epoch": 0.9021404180894478, "grad_norm": 0.486328125, "learning_rate": 1.4857984235066961e-05, "loss": 4.1002, "step": 2708 }, { "epoch": 0.9024735570916965, "grad_norm": 0.50390625, "learning_rate": 1.4857827419380008e-05, "loss": 4.0487, "step": 2709 }, { "epoch": 0.9028066960939451, "grad_norm": 0.52734375, "learning_rate": 1.4857670517990303e-05, "loss": 4.1297, "step": 2710 }, { "epoch": 0.9031398350961939, "grad_norm": 0.5078125, "learning_rate": 1.4857513530899675e-05, "loss": 4.1607, "step": 2711 }, { "epoch": 0.9034729740984426, "grad_norm": 0.5, "learning_rate": 1.4857356458109949e-05, "loss": 4.0794, "step": 2712 }, { "epoch": 0.9038061131006913, "grad_norm": 0.470703125, "learning_rate": 1.4857199299622958e-05, "loss": 4.1485, "step": 2713 }, { "epoch": 0.90413925210294, "grad_norm": 0.4765625, "learning_rate": 1.4857042055440529e-05, "loss": 4.1632, "step": 2714 }, { "epoch": 0.9044723911051886, "grad_norm": 0.494140625, "learning_rate": 1.4856884725564495e-05, "loss": 4.1065, "step": 2715 }, { "epoch": 0.9048055301074374, "grad_norm": 0.5234375, "learning_rate": 1.4856727309996691e-05, "loss": 4.0555, "step": 2716 }, { "epoch": 0.905138669109686, "grad_norm": 0.5078125, "learning_rate": 1.485656980873895e-05, "loss": 4.1864, "step": 2717 }, { "epoch": 0.9054718081119347, "grad_norm": 0.5078125, "learning_rate": 1.48564122217931e-05, "loss": 4.1567, "step": 2718 }, { "epoch": 0.9058049471141834, "grad_norm": 0.48828125, "learning_rate": 1.4856254549160985e-05, "loss": 4.1473, "step": 2719 }, { "epoch": 0.9061380861164321, "grad_norm": 0.4921875, "learning_rate": 1.485609679084444e-05, "loss": 4.1313, "step": 2720 }, { "epoch": 0.9064712251186807, "grad_norm": 0.478515625, "learning_rate": 1.4855938946845296e-05, "loss": 4.1185, "step": 2721 }, { "epoch": 0.9068043641209295, "grad_norm": 0.51171875, "learning_rate": 1.4855781017165398e-05, "loss": 4.1207, "step": 2722 }, { "epoch": 0.9071375031231782, "grad_norm": 0.5078125, "learning_rate": 1.4855623001806582e-05, "loss": 4.1324, "step": 2723 }, { "epoch": 0.9074706421254268, "grad_norm": 0.50390625, "learning_rate": 1.4855464900770693e-05, "loss": 4.1298, "step": 2724 }, { "epoch": 0.9078037811276756, "grad_norm": 0.494140625, "learning_rate": 1.485530671405957e-05, "loss": 4.1831, "step": 2725 }, { "epoch": 0.9081369201299242, "grad_norm": 0.484375, "learning_rate": 1.4855148441675055e-05, "loss": 4.0695, "step": 2726 }, { "epoch": 0.9084700591321729, "grad_norm": 0.4921875, "learning_rate": 1.485499008361899e-05, "loss": 4.1259, "step": 2727 }, { "epoch": 0.9088031981344216, "grad_norm": 0.5078125, "learning_rate": 1.4854831639893224e-05, "loss": 4.0494, "step": 2728 }, { "epoch": 0.9091363371366703, "grad_norm": 0.5078125, "learning_rate": 1.4854673110499598e-05, "loss": 4.1752, "step": 2729 }, { "epoch": 0.9094694761389189, "grad_norm": 0.515625, "learning_rate": 1.4854514495439962e-05, "loss": 4.0885, "step": 2730 }, { "epoch": 0.9098026151411677, "grad_norm": 0.486328125, "learning_rate": 1.485435579471616e-05, "loss": 4.1474, "step": 2731 }, { "epoch": 0.9101357541434163, "grad_norm": 0.5, "learning_rate": 1.4854197008330045e-05, "loss": 4.1172, "step": 2732 }, { "epoch": 0.910468893145665, "grad_norm": 0.49609375, "learning_rate": 1.485403813628346e-05, "loss": 4.1283, "step": 2733 }, { "epoch": 0.9108020321479137, "grad_norm": 0.498046875, "learning_rate": 1.4853879178578263e-05, "loss": 4.1068, "step": 2734 }, { "epoch": 0.9111351711501624, "grad_norm": 0.470703125, "learning_rate": 1.4853720135216302e-05, "loss": 4.084, "step": 2735 }, { "epoch": 0.9114683101524111, "grad_norm": 0.494140625, "learning_rate": 1.4853561006199429e-05, "loss": 4.2211, "step": 2736 }, { "epoch": 0.9118014491546598, "grad_norm": 0.5078125, "learning_rate": 1.4853401791529497e-05, "loss": 4.0933, "step": 2737 }, { "epoch": 0.9121345881569085, "grad_norm": 0.46875, "learning_rate": 1.4853242491208364e-05, "loss": 4.1306, "step": 2738 }, { "epoch": 0.9124677271591571, "grad_norm": 0.5078125, "learning_rate": 1.4853083105237881e-05, "loss": 4.1673, "step": 2739 }, { "epoch": 0.9128008661614059, "grad_norm": 0.474609375, "learning_rate": 1.4852923633619907e-05, "loss": 4.0847, "step": 2740 }, { "epoch": 0.9131340051636545, "grad_norm": 0.51171875, "learning_rate": 1.48527640763563e-05, "loss": 4.0967, "step": 2741 }, { "epoch": 0.9134671441659032, "grad_norm": 0.47265625, "learning_rate": 1.4852604433448916e-05, "loss": 4.1028, "step": 2742 }, { "epoch": 0.9138002831681519, "grad_norm": 0.50390625, "learning_rate": 1.4852444704899617e-05, "loss": 4.0386, "step": 2743 }, { "epoch": 0.9141334221704006, "grad_norm": 0.51171875, "learning_rate": 1.4852284890710262e-05, "loss": 4.1753, "step": 2744 }, { "epoch": 0.9144665611726492, "grad_norm": 0.52734375, "learning_rate": 1.4852124990882713e-05, "loss": 4.098, "step": 2745 }, { "epoch": 0.914799700174898, "grad_norm": 0.515625, "learning_rate": 1.4851965005418834e-05, "loss": 4.0822, "step": 2746 }, { "epoch": 0.9151328391771467, "grad_norm": 0.49609375, "learning_rate": 1.4851804934320484e-05, "loss": 4.1305, "step": 2747 }, { "epoch": 0.9154659781793953, "grad_norm": 0.4765625, "learning_rate": 1.4851644777589534e-05, "loss": 4.143, "step": 2748 }, { "epoch": 0.9157991171816441, "grad_norm": 0.5078125, "learning_rate": 1.4851484535227844e-05, "loss": 4.0834, "step": 2749 }, { "epoch": 0.9161322561838927, "grad_norm": 0.515625, "learning_rate": 1.4851324207237283e-05, "loss": 4.0906, "step": 2750 }, { "epoch": 0.9164653951861415, "grad_norm": 0.5078125, "learning_rate": 1.4851163793619717e-05, "loss": 4.0469, "step": 2751 }, { "epoch": 0.9167985341883901, "grad_norm": 0.48046875, "learning_rate": 1.4851003294377016e-05, "loss": 4.0616, "step": 2752 }, { "epoch": 0.9171316731906388, "grad_norm": 0.50390625, "learning_rate": 1.485084270951105e-05, "loss": 4.1436, "step": 2753 }, { "epoch": 0.9174648121928874, "grad_norm": 0.482421875, "learning_rate": 1.4850682039023686e-05, "loss": 4.1089, "step": 2754 }, { "epoch": 0.9177979511951362, "grad_norm": 0.494140625, "learning_rate": 1.4850521282916799e-05, "loss": 4.1482, "step": 2755 }, { "epoch": 0.9181310901973848, "grad_norm": 0.5078125, "learning_rate": 1.485036044119226e-05, "loss": 4.0482, "step": 2756 }, { "epoch": 0.9184642291996336, "grad_norm": 0.482421875, "learning_rate": 1.4850199513851942e-05, "loss": 4.0843, "step": 2757 }, { "epoch": 0.9187973682018822, "grad_norm": 0.546875, "learning_rate": 1.4850038500897723e-05, "loss": 4.086, "step": 2758 }, { "epoch": 0.9191305072041309, "grad_norm": 0.50390625, "learning_rate": 1.4849877402331474e-05, "loss": 4.0741, "step": 2759 }, { "epoch": 0.9194636462063797, "grad_norm": 0.51171875, "learning_rate": 1.4849716218155074e-05, "loss": 4.1392, "step": 2760 }, { "epoch": 0.9197967852086283, "grad_norm": 0.466796875, "learning_rate": 1.4849554948370397e-05, "loss": 4.1772, "step": 2761 }, { "epoch": 0.920129924210877, "grad_norm": 0.51171875, "learning_rate": 1.4849393592979326e-05, "loss": 4.0607, "step": 2762 }, { "epoch": 0.9204630632131257, "grad_norm": 0.4921875, "learning_rate": 1.4849232151983738e-05, "loss": 4.1567, "step": 2763 }, { "epoch": 0.9207962022153744, "grad_norm": 0.5078125, "learning_rate": 1.4849070625385514e-05, "loss": 4.0856, "step": 2764 }, { "epoch": 0.921129341217623, "grad_norm": 0.5078125, "learning_rate": 1.4848909013186535e-05, "loss": 4.0978, "step": 2765 }, { "epoch": 0.9214624802198718, "grad_norm": 0.515625, "learning_rate": 1.4848747315388684e-05, "loss": 4.1036, "step": 2766 }, { "epoch": 0.9217956192221204, "grad_norm": 0.50390625, "learning_rate": 1.4848585531993843e-05, "loss": 4.1194, "step": 2767 }, { "epoch": 0.9221287582243691, "grad_norm": 0.474609375, "learning_rate": 1.4848423663003899e-05, "loss": 4.1573, "step": 2768 }, { "epoch": 0.9224618972266178, "grad_norm": 0.515625, "learning_rate": 1.4848261708420735e-05, "loss": 4.1049, "step": 2769 }, { "epoch": 0.9227950362288665, "grad_norm": 0.50390625, "learning_rate": 1.484809966824624e-05, "loss": 4.1471, "step": 2770 }, { "epoch": 0.9231281752311152, "grad_norm": 0.5, "learning_rate": 1.4847937542482297e-05, "loss": 4.0633, "step": 2771 }, { "epoch": 0.9234613142333639, "grad_norm": 0.5234375, "learning_rate": 1.4847775331130799e-05, "loss": 4.136, "step": 2772 }, { "epoch": 0.9237944532356126, "grad_norm": 0.51953125, "learning_rate": 1.4847613034193635e-05, "loss": 4.0949, "step": 2773 }, { "epoch": 0.9241275922378612, "grad_norm": 0.478515625, "learning_rate": 1.484745065167269e-05, "loss": 4.1179, "step": 2774 }, { "epoch": 0.92446073124011, "grad_norm": 0.47265625, "learning_rate": 1.4847288183569863e-05, "loss": 4.1161, "step": 2775 }, { "epoch": 0.9247938702423586, "grad_norm": 0.498046875, "learning_rate": 1.4847125629887043e-05, "loss": 4.0637, "step": 2776 }, { "epoch": 0.9251270092446073, "grad_norm": 0.47265625, "learning_rate": 1.4846962990626121e-05, "loss": 4.1521, "step": 2777 }, { "epoch": 0.925460148246856, "grad_norm": 0.494140625, "learning_rate": 1.4846800265788995e-05, "loss": 4.1304, "step": 2778 }, { "epoch": 0.9257932872491047, "grad_norm": 0.490234375, "learning_rate": 1.484663745537756e-05, "loss": 4.1014, "step": 2779 }, { "epoch": 0.9261264262513533, "grad_norm": 0.4921875, "learning_rate": 1.484647455939371e-05, "loss": 4.132, "step": 2780 }, { "epoch": 0.9264595652536021, "grad_norm": 0.486328125, "learning_rate": 1.4846311577839343e-05, "loss": 4.1559, "step": 2781 }, { "epoch": 0.9267927042558508, "grad_norm": 0.5078125, "learning_rate": 1.4846148510716359e-05, "loss": 4.082, "step": 2782 }, { "epoch": 0.9271258432580994, "grad_norm": 0.494140625, "learning_rate": 1.4845985358026656e-05, "loss": 4.1153, "step": 2783 }, { "epoch": 0.9274589822603482, "grad_norm": 0.49609375, "learning_rate": 1.4845822119772137e-05, "loss": 4.1554, "step": 2784 }, { "epoch": 0.9277921212625968, "grad_norm": 0.486328125, "learning_rate": 1.48456587959547e-05, "loss": 4.1019, "step": 2785 }, { "epoch": 0.9281252602648455, "grad_norm": 0.48828125, "learning_rate": 1.4845495386576249e-05, "loss": 4.0806, "step": 2786 }, { "epoch": 0.9284583992670942, "grad_norm": 0.4765625, "learning_rate": 1.4845331891638686e-05, "loss": 4.1783, "step": 2787 }, { "epoch": 0.9287915382693429, "grad_norm": 0.50390625, "learning_rate": 1.4845168311143918e-05, "loss": 4.1081, "step": 2788 }, { "epoch": 0.9291246772715915, "grad_norm": 0.470703125, "learning_rate": 1.4845004645093849e-05, "loss": 4.1217, "step": 2789 }, { "epoch": 0.9294578162738403, "grad_norm": 0.498046875, "learning_rate": 1.4844840893490384e-05, "loss": 4.1513, "step": 2790 }, { "epoch": 0.9297909552760889, "grad_norm": 0.486328125, "learning_rate": 1.484467705633543e-05, "loss": 4.0789, "step": 2791 }, { "epoch": 0.9301240942783376, "grad_norm": 0.4921875, "learning_rate": 1.48445131336309e-05, "loss": 4.1876, "step": 2792 }, { "epoch": 0.9304572332805863, "grad_norm": 0.4921875, "learning_rate": 1.4844349125378698e-05, "loss": 4.0893, "step": 2793 }, { "epoch": 0.930790372282835, "grad_norm": 0.486328125, "learning_rate": 1.4844185031580737e-05, "loss": 4.0639, "step": 2794 }, { "epoch": 0.9311235112850837, "grad_norm": 0.5, "learning_rate": 1.484402085223893e-05, "loss": 4.1299, "step": 2795 }, { "epoch": 0.9314566502873324, "grad_norm": 0.490234375, "learning_rate": 1.4843856587355185e-05, "loss": 4.1222, "step": 2796 }, { "epoch": 0.9317897892895811, "grad_norm": 0.51953125, "learning_rate": 1.4843692236931417e-05, "loss": 4.1207, "step": 2797 }, { "epoch": 0.9321229282918297, "grad_norm": 0.5, "learning_rate": 1.4843527800969542e-05, "loss": 4.1233, "step": 2798 }, { "epoch": 0.9324560672940785, "grad_norm": 0.49609375, "learning_rate": 1.4843363279471473e-05, "loss": 4.1157, "step": 2799 }, { "epoch": 0.9327892062963271, "grad_norm": 0.515625, "learning_rate": 1.484319867243913e-05, "loss": 4.0529, "step": 2800 }, { "epoch": 0.9331223452985759, "grad_norm": 0.474609375, "learning_rate": 1.4843033979874426e-05, "loss": 4.1503, "step": 2801 }, { "epoch": 0.9334554843008245, "grad_norm": 0.51953125, "learning_rate": 1.484286920177928e-05, "loss": 4.1114, "step": 2802 }, { "epoch": 0.9337886233030732, "grad_norm": 0.484375, "learning_rate": 1.4842704338155614e-05, "loss": 4.0602, "step": 2803 }, { "epoch": 0.9341217623053218, "grad_norm": 0.484375, "learning_rate": 1.4842539389005347e-05, "loss": 4.0928, "step": 2804 }, { "epoch": 0.9344549013075706, "grad_norm": 0.52734375, "learning_rate": 1.48423743543304e-05, "loss": 4.0454, "step": 2805 }, { "epoch": 0.9347880403098193, "grad_norm": 0.484375, "learning_rate": 1.4842209234132695e-05, "loss": 4.1374, "step": 2806 }, { "epoch": 0.935121179312068, "grad_norm": 0.4765625, "learning_rate": 1.4842044028414155e-05, "loss": 4.1374, "step": 2807 }, { "epoch": 0.9354543183143167, "grad_norm": 0.47265625, "learning_rate": 1.4841878737176705e-05, "loss": 4.1021, "step": 2808 }, { "epoch": 0.9357874573165653, "grad_norm": 0.51171875, "learning_rate": 1.4841713360422271e-05, "loss": 4.1682, "step": 2809 }, { "epoch": 0.9361205963188141, "grad_norm": 0.482421875, "learning_rate": 1.4841547898152779e-05, "loss": 4.1052, "step": 2810 }, { "epoch": 0.9364537353210627, "grad_norm": 0.498046875, "learning_rate": 1.4841382350370153e-05, "loss": 4.1, "step": 2811 }, { "epoch": 0.9367868743233114, "grad_norm": 0.4765625, "learning_rate": 1.4841216717076326e-05, "loss": 4.148, "step": 2812 }, { "epoch": 0.93712001332556, "grad_norm": 0.482421875, "learning_rate": 1.4841050998273225e-05, "loss": 4.0947, "step": 2813 }, { "epoch": 0.9374531523278088, "grad_norm": 0.5, "learning_rate": 1.484088519396278e-05, "loss": 4.1133, "step": 2814 }, { "epoch": 0.9377862913300574, "grad_norm": 0.4765625, "learning_rate": 1.4840719304146923e-05, "loss": 4.1561, "step": 2815 }, { "epoch": 0.9381194303323062, "grad_norm": 0.486328125, "learning_rate": 1.4840553328827588e-05, "loss": 4.1227, "step": 2816 }, { "epoch": 0.9384525693345549, "grad_norm": 0.50390625, "learning_rate": 1.4840387268006704e-05, "loss": 4.1697, "step": 2817 }, { "epoch": 0.9387857083368035, "grad_norm": 0.45703125, "learning_rate": 1.484022112168621e-05, "loss": 4.1651, "step": 2818 }, { "epoch": 0.9391188473390523, "grad_norm": 0.4765625, "learning_rate": 1.4840054889868035e-05, "loss": 4.1643, "step": 2819 }, { "epoch": 0.9394519863413009, "grad_norm": 0.515625, "learning_rate": 1.4839888572554123e-05, "loss": 4.0076, "step": 2820 }, { "epoch": 0.9397851253435496, "grad_norm": 0.53125, "learning_rate": 1.4839722169746405e-05, "loss": 4.0203, "step": 2821 }, { "epoch": 0.9401182643457983, "grad_norm": 0.494140625, "learning_rate": 1.4839555681446823e-05, "loss": 4.132, "step": 2822 }, { "epoch": 0.940451403348047, "grad_norm": 0.50390625, "learning_rate": 1.4839389107657312e-05, "loss": 4.1042, "step": 2823 }, { "epoch": 0.9407845423502956, "grad_norm": 0.486328125, "learning_rate": 1.4839222448379818e-05, "loss": 4.1972, "step": 2824 }, { "epoch": 0.9411176813525444, "grad_norm": 0.498046875, "learning_rate": 1.4839055703616278e-05, "loss": 4.1487, "step": 2825 }, { "epoch": 0.941450820354793, "grad_norm": 0.5078125, "learning_rate": 1.4838888873368636e-05, "loss": 4.1319, "step": 2826 }, { "epoch": 0.9417839593570417, "grad_norm": 0.5, "learning_rate": 1.4838721957638835e-05, "loss": 4.0805, "step": 2827 }, { "epoch": 0.9421170983592904, "grad_norm": 0.51171875, "learning_rate": 1.4838554956428817e-05, "loss": 4.0437, "step": 2828 }, { "epoch": 0.9424502373615391, "grad_norm": 0.478515625, "learning_rate": 1.4838387869740531e-05, "loss": 4.1722, "step": 2829 }, { "epoch": 0.9427833763637878, "grad_norm": 0.484375, "learning_rate": 1.483822069757592e-05, "loss": 4.1448, "step": 2830 }, { "epoch": 0.9431165153660365, "grad_norm": 0.5, "learning_rate": 1.4838053439936933e-05, "loss": 4.1742, "step": 2831 }, { "epoch": 0.9434496543682852, "grad_norm": 0.474609375, "learning_rate": 1.4837886096825518e-05, "loss": 4.022, "step": 2832 }, { "epoch": 0.9437827933705338, "grad_norm": 0.48828125, "learning_rate": 1.4837718668243623e-05, "loss": 4.0639, "step": 2833 }, { "epoch": 0.9441159323727826, "grad_norm": 0.51171875, "learning_rate": 1.48375511541932e-05, "loss": 4.1616, "step": 2834 }, { "epoch": 0.9444490713750312, "grad_norm": 0.482421875, "learning_rate": 1.4837383554676198e-05, "loss": 4.1484, "step": 2835 }, { "epoch": 0.9447822103772799, "grad_norm": 0.48046875, "learning_rate": 1.483721586969457e-05, "loss": 4.142, "step": 2836 }, { "epoch": 0.9451153493795286, "grad_norm": 0.5, "learning_rate": 1.483704809925027e-05, "loss": 4.1034, "step": 2837 }, { "epoch": 0.9454484883817773, "grad_norm": 0.5, "learning_rate": 1.4836880243345253e-05, "loss": 4.1631, "step": 2838 }, { "epoch": 0.9457816273840259, "grad_norm": 0.484375, "learning_rate": 1.483671230198147e-05, "loss": 4.1663, "step": 2839 }, { "epoch": 0.9461147663862747, "grad_norm": 0.5, "learning_rate": 1.4836544275160882e-05, "loss": 4.0898, "step": 2840 }, { "epoch": 0.9464479053885234, "grad_norm": 0.484375, "learning_rate": 1.4836376162885444e-05, "loss": 4.0682, "step": 2841 }, { "epoch": 0.946781044390772, "grad_norm": 0.5, "learning_rate": 1.4836207965157114e-05, "loss": 4.1679, "step": 2842 }, { "epoch": 0.9471141833930208, "grad_norm": 0.515625, "learning_rate": 1.4836039681977852e-05, "loss": 4.0908, "step": 2843 }, { "epoch": 0.9474473223952694, "grad_norm": 0.466796875, "learning_rate": 1.4835871313349616e-05, "loss": 4.1528, "step": 2844 }, { "epoch": 0.9477804613975181, "grad_norm": 0.5, "learning_rate": 1.483570285927437e-05, "loss": 4.1074, "step": 2845 }, { "epoch": 0.9481136003997668, "grad_norm": 0.494140625, "learning_rate": 1.4835534319754074e-05, "loss": 4.1589, "step": 2846 }, { "epoch": 0.9484467394020155, "grad_norm": 0.5078125, "learning_rate": 1.4835365694790692e-05, "loss": 4.0747, "step": 2847 }, { "epoch": 0.9487798784042641, "grad_norm": 0.50390625, "learning_rate": 1.4835196984386188e-05, "loss": 4.0375, "step": 2848 }, { "epoch": 0.9491130174065129, "grad_norm": 0.4921875, "learning_rate": 1.4835028188542528e-05, "loss": 4.1706, "step": 2849 }, { "epoch": 0.9494461564087615, "grad_norm": 0.5078125, "learning_rate": 1.4834859307261678e-05, "loss": 4.0766, "step": 2850 }, { "epoch": 0.9497792954110102, "grad_norm": 0.5, "learning_rate": 1.4834690340545602e-05, "loss": 4.1464, "step": 2851 }, { "epoch": 0.9501124344132589, "grad_norm": 0.5078125, "learning_rate": 1.4834521288396271e-05, "loss": 4.0707, "step": 2852 }, { "epoch": 0.9504455734155076, "grad_norm": 0.49609375, "learning_rate": 1.4834352150815655e-05, "loss": 4.163, "step": 2853 }, { "epoch": 0.9507787124177564, "grad_norm": 0.5, "learning_rate": 1.4834182927805721e-05, "loss": 4.122, "step": 2854 }, { "epoch": 0.951111851420005, "grad_norm": 0.50390625, "learning_rate": 1.4834013619368443e-05, "loss": 4.1092, "step": 2855 }, { "epoch": 0.9514449904222537, "grad_norm": 0.50390625, "learning_rate": 1.4833844225505792e-05, "loss": 4.0924, "step": 2856 }, { "epoch": 0.9517781294245024, "grad_norm": 0.49609375, "learning_rate": 1.483367474621974e-05, "loss": 4.0933, "step": 2857 }, { "epoch": 0.9521112684267511, "grad_norm": 0.46875, "learning_rate": 1.483350518151226e-05, "loss": 4.072, "step": 2858 }, { "epoch": 0.9524444074289997, "grad_norm": 0.478515625, "learning_rate": 1.4833335531385332e-05, "loss": 4.1627, "step": 2859 }, { "epoch": 0.9527775464312485, "grad_norm": 0.5, "learning_rate": 1.4833165795840929e-05, "loss": 4.0502, "step": 2860 }, { "epoch": 0.9531106854334971, "grad_norm": 0.484375, "learning_rate": 1.4832995974881027e-05, "loss": 4.1081, "step": 2861 }, { "epoch": 0.9534438244357458, "grad_norm": 0.494140625, "learning_rate": 1.4832826068507605e-05, "loss": 4.0768, "step": 2862 }, { "epoch": 0.9537769634379945, "grad_norm": 0.47265625, "learning_rate": 1.4832656076722641e-05, "loss": 4.0791, "step": 2863 }, { "epoch": 0.9541101024402432, "grad_norm": 0.5078125, "learning_rate": 1.4832485999528119e-05, "loss": 4.0796, "step": 2864 }, { "epoch": 0.9544432414424919, "grad_norm": 0.5078125, "learning_rate": 1.4832315836926014e-05, "loss": 4.1512, "step": 2865 }, { "epoch": 0.9547763804447406, "grad_norm": 0.51171875, "learning_rate": 1.4832145588918315e-05, "loss": 4.1584, "step": 2866 }, { "epoch": 0.9551095194469893, "grad_norm": 0.5078125, "learning_rate": 1.4831975255506999e-05, "loss": 4.012, "step": 2867 }, { "epoch": 0.9554426584492379, "grad_norm": 0.486328125, "learning_rate": 1.4831804836694053e-05, "loss": 4.1505, "step": 2868 }, { "epoch": 0.9557757974514867, "grad_norm": 0.494140625, "learning_rate": 1.483163433248146e-05, "loss": 4.1156, "step": 2869 }, { "epoch": 0.9561089364537353, "grad_norm": 0.482421875, "learning_rate": 1.4831463742871209e-05, "loss": 4.1287, "step": 2870 }, { "epoch": 0.956442075455984, "grad_norm": 0.46875, "learning_rate": 1.4831293067865285e-05, "loss": 4.1435, "step": 2871 }, { "epoch": 0.9567752144582327, "grad_norm": 0.466796875, "learning_rate": 1.4831122307465677e-05, "loss": 4.0709, "step": 2872 }, { "epoch": 0.9571083534604814, "grad_norm": 0.51953125, "learning_rate": 1.483095146167437e-05, "loss": 4.1593, "step": 2873 }, { "epoch": 0.95744149246273, "grad_norm": 0.51953125, "learning_rate": 1.4830780530493361e-05, "loss": 4.0871, "step": 2874 }, { "epoch": 0.9577746314649788, "grad_norm": 0.5078125, "learning_rate": 1.4830609513924635e-05, "loss": 4.128, "step": 2875 }, { "epoch": 0.9581077704672275, "grad_norm": 0.50390625, "learning_rate": 1.4830438411970188e-05, "loss": 4.0849, "step": 2876 }, { "epoch": 0.9584409094694761, "grad_norm": 0.5078125, "learning_rate": 1.483026722463201e-05, "loss": 4.0718, "step": 2877 }, { "epoch": 0.9587740484717249, "grad_norm": 0.482421875, "learning_rate": 1.4830095951912096e-05, "loss": 4.0589, "step": 2878 }, { "epoch": 0.9591071874739735, "grad_norm": 0.515625, "learning_rate": 1.4829924593812442e-05, "loss": 4.0095, "step": 2879 }, { "epoch": 0.9594403264762222, "grad_norm": 0.4921875, "learning_rate": 1.4829753150335044e-05, "loss": 4.0909, "step": 2880 }, { "epoch": 0.9597734654784709, "grad_norm": 0.53125, "learning_rate": 1.4829581621481896e-05, "loss": 4.0991, "step": 2881 }, { "epoch": 0.9601066044807196, "grad_norm": 0.486328125, "learning_rate": 1.4829410007254998e-05, "loss": 4.1549, "step": 2882 }, { "epoch": 0.9604397434829682, "grad_norm": 0.46484375, "learning_rate": 1.4829238307656348e-05, "loss": 4.1034, "step": 2883 }, { "epoch": 0.960772882485217, "grad_norm": 0.49609375, "learning_rate": 1.482906652268795e-05, "loss": 4.1245, "step": 2884 }, { "epoch": 0.9611060214874656, "grad_norm": 0.5, "learning_rate": 1.48288946523518e-05, "loss": 4.1294, "step": 2885 }, { "epoch": 0.9614391604897143, "grad_norm": 0.498046875, "learning_rate": 1.4828722696649902e-05, "loss": 4.1415, "step": 2886 }, { "epoch": 0.961772299491963, "grad_norm": 0.490234375, "learning_rate": 1.4828550655584259e-05, "loss": 4.1293, "step": 2887 }, { "epoch": 0.9621054384942117, "grad_norm": 0.5, "learning_rate": 1.4828378529156871e-05, "loss": 4.1152, "step": 2888 }, { "epoch": 0.9624385774964604, "grad_norm": 0.515625, "learning_rate": 1.4828206317369752e-05, "loss": 4.0769, "step": 2889 }, { "epoch": 0.9627717164987091, "grad_norm": 0.4921875, "learning_rate": 1.4828034020224898e-05, "loss": 4.1269, "step": 2890 }, { "epoch": 0.9631048555009578, "grad_norm": 0.498046875, "learning_rate": 1.4827861637724322e-05, "loss": 4.117, "step": 2891 }, { "epoch": 0.9634379945032064, "grad_norm": 0.490234375, "learning_rate": 1.482768916987003e-05, "loss": 4.1157, "step": 2892 }, { "epoch": 0.9637711335054552, "grad_norm": 0.48828125, "learning_rate": 1.4827516616664032e-05, "loss": 4.1703, "step": 2893 }, { "epoch": 0.9641042725077038, "grad_norm": 0.490234375, "learning_rate": 1.4827343978108333e-05, "loss": 4.1717, "step": 2894 }, { "epoch": 0.9644374115099525, "grad_norm": 0.494140625, "learning_rate": 1.482717125420495e-05, "loss": 4.1656, "step": 2895 }, { "epoch": 0.9647705505122012, "grad_norm": 0.49609375, "learning_rate": 1.4826998444955892e-05, "loss": 4.1135, "step": 2896 }, { "epoch": 0.9651036895144499, "grad_norm": 0.5078125, "learning_rate": 1.4826825550363173e-05, "loss": 4.044, "step": 2897 }, { "epoch": 0.9654368285166985, "grad_norm": 0.484375, "learning_rate": 1.4826652570428807e-05, "loss": 4.069, "step": 2898 }, { "epoch": 0.9657699675189473, "grad_norm": 0.494140625, "learning_rate": 1.4826479505154807e-05, "loss": 4.1253, "step": 2899 }, { "epoch": 0.966103106521196, "grad_norm": 0.49609375, "learning_rate": 1.482630635454319e-05, "loss": 4.1244, "step": 2900 }, { "epoch": 0.9664362455234446, "grad_norm": 0.494140625, "learning_rate": 1.4826133118595972e-05, "loss": 4.0567, "step": 2901 }, { "epoch": 0.9667693845256934, "grad_norm": 0.5, "learning_rate": 1.4825959797315174e-05, "loss": 4.0931, "step": 2902 }, { "epoch": 0.967102523527942, "grad_norm": 0.5078125, "learning_rate": 1.482578639070281e-05, "loss": 4.1003, "step": 2903 }, { "epoch": 0.9674356625301908, "grad_norm": 0.486328125, "learning_rate": 1.4825612898760902e-05, "loss": 4.0664, "step": 2904 }, { "epoch": 0.9677688015324394, "grad_norm": 0.498046875, "learning_rate": 1.4825439321491472e-05, "loss": 4.154, "step": 2905 }, { "epoch": 0.9681019405346881, "grad_norm": 0.51171875, "learning_rate": 1.4825265658896541e-05, "loss": 4.0788, "step": 2906 }, { "epoch": 0.9684350795369367, "grad_norm": 0.515625, "learning_rate": 1.4825091910978131e-05, "loss": 4.1592, "step": 2907 }, { "epoch": 0.9687682185391855, "grad_norm": 0.5234375, "learning_rate": 1.4824918077738267e-05, "loss": 4.0399, "step": 2908 }, { "epoch": 0.9691013575414341, "grad_norm": 0.478515625, "learning_rate": 1.4824744159178972e-05, "loss": 4.1639, "step": 2909 }, { "epoch": 0.9694344965436829, "grad_norm": 0.4921875, "learning_rate": 1.4824570155302274e-05, "loss": 4.0942, "step": 2910 }, { "epoch": 0.9697676355459316, "grad_norm": 0.494140625, "learning_rate": 1.4824396066110199e-05, "loss": 4.1236, "step": 2911 }, { "epoch": 0.9701007745481802, "grad_norm": 0.51171875, "learning_rate": 1.4824221891604773e-05, "loss": 4.1629, "step": 2912 }, { "epoch": 0.970433913550429, "grad_norm": 0.4921875, "learning_rate": 1.4824047631788027e-05, "loss": 4.1397, "step": 2913 }, { "epoch": 0.9707670525526776, "grad_norm": 0.490234375, "learning_rate": 1.482387328666199e-05, "loss": 4.1416, "step": 2914 }, { "epoch": 0.9711001915549263, "grad_norm": 0.490234375, "learning_rate": 1.4823698856228694e-05, "loss": 4.0909, "step": 2915 }, { "epoch": 0.971433330557175, "grad_norm": 0.486328125, "learning_rate": 1.4823524340490167e-05, "loss": 4.1135, "step": 2916 }, { "epoch": 0.9717664695594237, "grad_norm": 0.478515625, "learning_rate": 1.4823349739448445e-05, "loss": 4.1598, "step": 2917 }, { "epoch": 0.9720996085616723, "grad_norm": 0.494140625, "learning_rate": 1.4823175053105561e-05, "loss": 4.1522, "step": 2918 }, { "epoch": 0.9724327475639211, "grad_norm": 0.494140625, "learning_rate": 1.482300028146355e-05, "loss": 4.1009, "step": 2919 }, { "epoch": 0.9727658865661697, "grad_norm": 0.47265625, "learning_rate": 1.4822825424524448e-05, "loss": 4.0955, "step": 2920 }, { "epoch": 0.9730990255684184, "grad_norm": 0.50390625, "learning_rate": 1.4822650482290288e-05, "loss": 4.0827, "step": 2921 }, { "epoch": 0.9734321645706671, "grad_norm": 0.55859375, "learning_rate": 1.4822475454763113e-05, "loss": 4.0912, "step": 2922 }, { "epoch": 0.9737653035729158, "grad_norm": 0.50390625, "learning_rate": 1.482230034194496e-05, "loss": 4.0838, "step": 2923 }, { "epoch": 0.9740984425751645, "grad_norm": 0.50390625, "learning_rate": 1.4822125143837867e-05, "loss": 4.1489, "step": 2924 }, { "epoch": 0.9744315815774132, "grad_norm": 0.50390625, "learning_rate": 1.4821949860443876e-05, "loss": 4.1766, "step": 2925 }, { "epoch": 0.9747647205796619, "grad_norm": 0.484375, "learning_rate": 1.4821774491765028e-05, "loss": 4.118, "step": 2926 }, { "epoch": 0.9750978595819105, "grad_norm": 0.5, "learning_rate": 1.4821599037803366e-05, "loss": 4.1067, "step": 2927 }, { "epoch": 0.9754309985841593, "grad_norm": 0.51171875, "learning_rate": 1.4821423498560934e-05, "loss": 4.133, "step": 2928 }, { "epoch": 0.9757641375864079, "grad_norm": 0.498046875, "learning_rate": 1.4821247874039778e-05, "loss": 4.0814, "step": 2929 }, { "epoch": 0.9760972765886566, "grad_norm": 0.5078125, "learning_rate": 1.4821072164241941e-05, "loss": 4.1822, "step": 2930 }, { "epoch": 0.9764304155909053, "grad_norm": 0.51171875, "learning_rate": 1.482089636916947e-05, "loss": 4.0298, "step": 2931 }, { "epoch": 0.976763554593154, "grad_norm": 0.5, "learning_rate": 1.4820720488824413e-05, "loss": 4.0866, "step": 2932 }, { "epoch": 0.9770966935954026, "grad_norm": 0.4765625, "learning_rate": 1.482054452320882e-05, "loss": 4.0955, "step": 2933 }, { "epoch": 0.9774298325976514, "grad_norm": 0.5234375, "learning_rate": 1.4820368472324738e-05, "loss": 4.1604, "step": 2934 }, { "epoch": 0.9777629715999001, "grad_norm": 0.5078125, "learning_rate": 1.4820192336174223e-05, "loss": 4.1269, "step": 2935 }, { "epoch": 0.9780961106021487, "grad_norm": 0.490234375, "learning_rate": 1.4820016114759321e-05, "loss": 4.0998, "step": 2936 }, { "epoch": 0.9784292496043975, "grad_norm": 0.5234375, "learning_rate": 1.4819839808082085e-05, "loss": 4.1266, "step": 2937 }, { "epoch": 0.9787623886066461, "grad_norm": 0.50390625, "learning_rate": 1.481966341614457e-05, "loss": 4.2134, "step": 2938 }, { "epoch": 0.9790955276088948, "grad_norm": 0.494140625, "learning_rate": 1.4819486938948832e-05, "loss": 4.0277, "step": 2939 }, { "epoch": 0.9794286666111435, "grad_norm": 0.490234375, "learning_rate": 1.4819310376496924e-05, "loss": 4.1917, "step": 2940 }, { "epoch": 0.9797618056133922, "grad_norm": 0.5234375, "learning_rate": 1.4819133728790904e-05, "loss": 4.0762, "step": 2941 }, { "epoch": 0.9800949446156408, "grad_norm": 0.5078125, "learning_rate": 1.4818956995832829e-05, "loss": 4.0135, "step": 2942 }, { "epoch": 0.9804280836178896, "grad_norm": 0.48828125, "learning_rate": 1.4818780177624757e-05, "loss": 4.1249, "step": 2943 }, { "epoch": 0.9807612226201382, "grad_norm": 0.515625, "learning_rate": 1.481860327416875e-05, "loss": 4.0427, "step": 2944 }, { "epoch": 0.981094361622387, "grad_norm": 0.5078125, "learning_rate": 1.4818426285466867e-05, "loss": 4.1962, "step": 2945 }, { "epoch": 0.9814275006246357, "grad_norm": 0.515625, "learning_rate": 1.481824921152117e-05, "loss": 4.1108, "step": 2946 }, { "epoch": 0.9817606396268843, "grad_norm": 0.515625, "learning_rate": 1.481807205233372e-05, "loss": 4.139, "step": 2947 }, { "epoch": 0.982093778629133, "grad_norm": 0.494140625, "learning_rate": 1.481789480790658e-05, "loss": 4.0645, "step": 2948 }, { "epoch": 0.9824269176313817, "grad_norm": 0.486328125, "learning_rate": 1.4817717478241819e-05, "loss": 4.1266, "step": 2949 }, { "epoch": 0.9827600566336304, "grad_norm": 0.50390625, "learning_rate": 1.4817540063341498e-05, "loss": 4.0431, "step": 2950 }, { "epoch": 0.983093195635879, "grad_norm": 0.484375, "learning_rate": 1.4817362563207684e-05, "loss": 4.0723, "step": 2951 }, { "epoch": 0.9834263346381278, "grad_norm": 0.53515625, "learning_rate": 1.4817184977842446e-05, "loss": 4.1325, "step": 2952 }, { "epoch": 0.9837594736403764, "grad_norm": 0.490234375, "learning_rate": 1.4817007307247854e-05, "loss": 4.1251, "step": 2953 }, { "epoch": 0.9840926126426252, "grad_norm": 0.490234375, "learning_rate": 1.4816829551425973e-05, "loss": 4.1919, "step": 2954 }, { "epoch": 0.9844257516448738, "grad_norm": 0.490234375, "learning_rate": 1.4816651710378878e-05, "loss": 4.2024, "step": 2955 }, { "epoch": 0.9847588906471225, "grad_norm": 0.55859375, "learning_rate": 1.4816473784108637e-05, "loss": 4.0651, "step": 2956 }, { "epoch": 0.9850920296493711, "grad_norm": 0.484375, "learning_rate": 1.4816295772617325e-05, "loss": 4.0948, "step": 2957 }, { "epoch": 0.9854251686516199, "grad_norm": 0.5078125, "learning_rate": 1.4816117675907013e-05, "loss": 4.0889, "step": 2958 }, { "epoch": 0.9857583076538686, "grad_norm": 0.494140625, "learning_rate": 1.4815939493979779e-05, "loss": 4.0829, "step": 2959 }, { "epoch": 0.9860914466561173, "grad_norm": 0.5078125, "learning_rate": 1.4815761226837693e-05, "loss": 4.127, "step": 2960 }, { "epoch": 0.986424585658366, "grad_norm": 0.50390625, "learning_rate": 1.4815582874482837e-05, "loss": 4.1503, "step": 2961 }, { "epoch": 0.9867577246606146, "grad_norm": 0.51953125, "learning_rate": 1.4815404436917285e-05, "loss": 4.0887, "step": 2962 }, { "epoch": 0.9870908636628634, "grad_norm": 0.48046875, "learning_rate": 1.4815225914143117e-05, "loss": 4.1616, "step": 2963 }, { "epoch": 0.987424002665112, "grad_norm": 0.52734375, "learning_rate": 1.4815047306162412e-05, "loss": 4.1197, "step": 2964 }, { "epoch": 0.9877571416673607, "grad_norm": 0.494140625, "learning_rate": 1.481486861297725e-05, "loss": 4.0127, "step": 2965 }, { "epoch": 0.9880902806696094, "grad_norm": 0.48828125, "learning_rate": 1.4814689834589712e-05, "loss": 4.0941, "step": 2966 }, { "epoch": 0.9884234196718581, "grad_norm": 0.49609375, "learning_rate": 1.4814510971001882e-05, "loss": 4.069, "step": 2967 }, { "epoch": 0.9887565586741067, "grad_norm": 0.50390625, "learning_rate": 1.4814332022215842e-05, "loss": 4.2104, "step": 2968 }, { "epoch": 0.9890896976763555, "grad_norm": 0.51171875, "learning_rate": 1.4814152988233675e-05, "loss": 4.0751, "step": 2969 }, { "epoch": 0.9894228366786042, "grad_norm": 0.50390625, "learning_rate": 1.4813973869057471e-05, "loss": 4.0797, "step": 2970 }, { "epoch": 0.9897559756808528, "grad_norm": 0.48828125, "learning_rate": 1.481379466468931e-05, "loss": 4.1081, "step": 2971 }, { "epoch": 0.9900891146831016, "grad_norm": 0.478515625, "learning_rate": 1.4813615375131286e-05, "loss": 4.099, "step": 2972 }, { "epoch": 0.9904222536853502, "grad_norm": 0.5, "learning_rate": 1.4813436000385483e-05, "loss": 4.0782, "step": 2973 }, { "epoch": 0.9907553926875989, "grad_norm": 0.494140625, "learning_rate": 1.4813256540453992e-05, "loss": 4.052, "step": 2974 }, { "epoch": 0.9910885316898476, "grad_norm": 0.54296875, "learning_rate": 1.4813076995338902e-05, "loss": 3.9826, "step": 2975 }, { "epoch": 0.9914216706920963, "grad_norm": 0.5078125, "learning_rate": 1.4812897365042306e-05, "loss": 4.04, "step": 2976 }, { "epoch": 0.9917548096943449, "grad_norm": 0.5, "learning_rate": 1.4812717649566294e-05, "loss": 4.0936, "step": 2977 }, { "epoch": 0.9920879486965937, "grad_norm": 0.478515625, "learning_rate": 1.4812537848912963e-05, "loss": 4.0212, "step": 2978 }, { "epoch": 0.9924210876988423, "grad_norm": 0.53125, "learning_rate": 1.4812357963084404e-05, "loss": 4.0371, "step": 2979 }, { "epoch": 0.992754226701091, "grad_norm": 0.486328125, "learning_rate": 1.4812177992082712e-05, "loss": 4.1316, "step": 2980 }, { "epoch": 0.9930873657033397, "grad_norm": 0.49609375, "learning_rate": 1.4811997935909985e-05, "loss": 4.1163, "step": 2981 }, { "epoch": 0.9934205047055884, "grad_norm": 0.51171875, "learning_rate": 1.4811817794568322e-05, "loss": 4.1414, "step": 2982 }, { "epoch": 0.9937536437078371, "grad_norm": 0.4921875, "learning_rate": 1.4811637568059817e-05, "loss": 4.1104, "step": 2983 }, { "epoch": 0.9940867827100858, "grad_norm": 0.515625, "learning_rate": 1.4811457256386571e-05, "loss": 4.1232, "step": 2984 }, { "epoch": 0.9944199217123345, "grad_norm": 0.5078125, "learning_rate": 1.4811276859550686e-05, "loss": 4.1236, "step": 2985 }, { "epoch": 0.9947530607145831, "grad_norm": 0.494140625, "learning_rate": 1.481109637755426e-05, "loss": 4.0837, "step": 2986 }, { "epoch": 0.9950861997168319, "grad_norm": 0.490234375, "learning_rate": 1.48109158103994e-05, "loss": 4.056, "step": 2987 }, { "epoch": 0.9954193387190805, "grad_norm": 0.48828125, "learning_rate": 1.4810735158088203e-05, "loss": 4.079, "step": 2988 }, { "epoch": 0.9957524777213292, "grad_norm": 0.51171875, "learning_rate": 1.4810554420622776e-05, "loss": 4.0946, "step": 2989 }, { "epoch": 0.9960856167235779, "grad_norm": 0.5078125, "learning_rate": 1.4810373598005226e-05, "loss": 4.1057, "step": 2990 }, { "epoch": 0.9964187557258266, "grad_norm": 0.498046875, "learning_rate": 1.4810192690237661e-05, "loss": 4.1002, "step": 2991 }, { "epoch": 0.9967518947280752, "grad_norm": 0.48046875, "learning_rate": 1.481001169732218e-05, "loss": 4.14, "step": 2992 }, { "epoch": 0.997085033730324, "grad_norm": 0.47265625, "learning_rate": 1.4809830619260899e-05, "loss": 4.1159, "step": 2993 }, { "epoch": 0.9974181727325727, "grad_norm": 0.486328125, "learning_rate": 1.4809649456055922e-05, "loss": 4.1585, "step": 2994 }, { "epoch": 0.9977513117348213, "grad_norm": 0.482421875, "learning_rate": 1.4809468207709365e-05, "loss": 4.0656, "step": 2995 }, { "epoch": 0.9980844507370701, "grad_norm": 0.486328125, "learning_rate": 1.4809286874223334e-05, "loss": 4.1353, "step": 2996 }, { "epoch": 0.9984175897393187, "grad_norm": 0.478515625, "learning_rate": 1.4809105455599943e-05, "loss": 4.1236, "step": 2997 }, { "epoch": 0.9987507287415675, "grad_norm": 0.4921875, "learning_rate": 1.4808923951841305e-05, "loss": 4.1329, "step": 2998 }, { "epoch": 0.9990838677438161, "grad_norm": 0.486328125, "learning_rate": 1.4808742362949531e-05, "loss": 4.1497, "step": 2999 }, { "epoch": 0.9994170067460648, "grad_norm": 0.490234375, "learning_rate": 1.4808560688926743e-05, "loss": 4.087, "step": 3000 }, { "epoch": 0.9997501457483134, "grad_norm": 0.4921875, "learning_rate": 1.4808378929775053e-05, "loss": 4.1048, "step": 3001 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 1.4808197085496578e-05, "loss": 4.1002, "step": 3002 }, { "epoch": 1.0003331390022487, "grad_norm": 0.490234375, "learning_rate": 1.4808015156093434e-05, "loss": 4.1909, "step": 3003 }, { "epoch": 1.0006662780044975, "grad_norm": 0.5078125, "learning_rate": 1.4807833141567745e-05, "loss": 4.1207, "step": 3004 }, { "epoch": 1.000999417006746, "grad_norm": 0.5, "learning_rate": 1.4807651041921629e-05, "loss": 4.0664, "step": 3005 }, { "epoch": 1.0013325560089947, "grad_norm": 0.474609375, "learning_rate": 1.4807468857157206e-05, "loss": 4.097, "step": 3006 }, { "epoch": 1.0016656950112435, "grad_norm": 0.5078125, "learning_rate": 1.4807286587276599e-05, "loss": 4.1063, "step": 3007 }, { "epoch": 1.0019988340134922, "grad_norm": 0.46875, "learning_rate": 1.4807104232281928e-05, "loss": 4.178, "step": 3008 }, { "epoch": 1.0023319730157407, "grad_norm": 0.5234375, "learning_rate": 1.4806921792175324e-05, "loss": 4.0877, "step": 3009 }, { "epoch": 1.0026651120179895, "grad_norm": 0.5078125, "learning_rate": 1.4806739266958906e-05, "loss": 4.025, "step": 3010 }, { "epoch": 1.0029982510202382, "grad_norm": 0.498046875, "learning_rate": 1.4806556656634802e-05, "loss": 4.0826, "step": 3011 }, { "epoch": 1.003331390022487, "grad_norm": 0.4921875, "learning_rate": 1.4806373961205139e-05, "loss": 4.1509, "step": 3012 }, { "epoch": 1.0036645290247355, "grad_norm": 0.5078125, "learning_rate": 1.4806191180672046e-05, "loss": 4.0115, "step": 3013 }, { "epoch": 1.0039976680269842, "grad_norm": 0.494140625, "learning_rate": 1.4806008315037649e-05, "loss": 4.0691, "step": 3014 }, { "epoch": 1.004330807029233, "grad_norm": 0.52734375, "learning_rate": 1.4805825364304081e-05, "loss": 4.1758, "step": 3015 }, { "epoch": 1.0046639460314817, "grad_norm": 0.482421875, "learning_rate": 1.4805642328473472e-05, "loss": 4.1298, "step": 3016 }, { "epoch": 1.0049970850337304, "grad_norm": 0.5078125, "learning_rate": 1.4805459207547951e-05, "loss": 4.0999, "step": 3017 }, { "epoch": 1.005330224035979, "grad_norm": 0.50390625, "learning_rate": 1.4805276001529658e-05, "loss": 4.1414, "step": 3018 }, { "epoch": 1.0056633630382277, "grad_norm": 0.498046875, "learning_rate": 1.4805092710420722e-05, "loss": 4.102, "step": 3019 }, { "epoch": 1.0059965020404764, "grad_norm": 0.54296875, "learning_rate": 1.4804909334223278e-05, "loss": 4.0635, "step": 3020 }, { "epoch": 1.0063296410427252, "grad_norm": 0.498046875, "learning_rate": 1.4804725872939461e-05, "loss": 4.0594, "step": 3021 }, { "epoch": 1.0066627800449737, "grad_norm": 0.51171875, "learning_rate": 1.4804542326571413e-05, "loss": 4.1163, "step": 3022 }, { "epoch": 1.0069959190472224, "grad_norm": 0.4921875, "learning_rate": 1.4804358695121266e-05, "loss": 4.0443, "step": 3023 }, { "epoch": 1.0073290580494711, "grad_norm": 0.50390625, "learning_rate": 1.4804174978591162e-05, "loss": 4.1354, "step": 3024 }, { "epoch": 1.0076621970517199, "grad_norm": 0.5, "learning_rate": 1.4803991176983241e-05, "loss": 4.0831, "step": 3025 }, { "epoch": 1.0079953360539686, "grad_norm": 0.51953125, "learning_rate": 1.4803807290299641e-05, "loss": 4.1288, "step": 3026 }, { "epoch": 1.0083284750562171, "grad_norm": 0.515625, "learning_rate": 1.480362331854251e-05, "loss": 4.1095, "step": 3027 }, { "epoch": 1.0086616140584659, "grad_norm": 0.486328125, "learning_rate": 1.4803439261713983e-05, "loss": 4.1132, "step": 3028 }, { "epoch": 1.0089947530607146, "grad_norm": 0.4921875, "learning_rate": 1.480325511981621e-05, "loss": 4.0805, "step": 3029 }, { "epoch": 1.0093278920629634, "grad_norm": 0.5078125, "learning_rate": 1.4803070892851333e-05, "loss": 4.0803, "step": 3030 }, { "epoch": 1.0096610310652119, "grad_norm": 0.51171875, "learning_rate": 1.48028865808215e-05, "loss": 4.0867, "step": 3031 }, { "epoch": 1.0099941700674606, "grad_norm": 0.52734375, "learning_rate": 1.4802702183728855e-05, "loss": 4.0994, "step": 3032 }, { "epoch": 1.0103273090697094, "grad_norm": 0.5234375, "learning_rate": 1.4802517701575546e-05, "loss": 4.1414, "step": 3033 }, { "epoch": 1.010660448071958, "grad_norm": 0.50390625, "learning_rate": 1.4802333134363724e-05, "loss": 4.1002, "step": 3034 }, { "epoch": 1.0109935870742066, "grad_norm": 0.50390625, "learning_rate": 1.4802148482095537e-05, "loss": 4.0699, "step": 3035 }, { "epoch": 1.0113267260764554, "grad_norm": 0.5078125, "learning_rate": 1.4801963744773138e-05, "loss": 4.1034, "step": 3036 }, { "epoch": 1.011659865078704, "grad_norm": 0.51171875, "learning_rate": 1.4801778922398679e-05, "loss": 4.0992, "step": 3037 }, { "epoch": 1.0119930040809528, "grad_norm": 0.486328125, "learning_rate": 1.4801594014974309e-05, "loss": 4.0779, "step": 3038 }, { "epoch": 1.0123261430832016, "grad_norm": 0.546875, "learning_rate": 1.4801409022502184e-05, "loss": 4.0662, "step": 3039 }, { "epoch": 1.01265928208545, "grad_norm": 0.470703125, "learning_rate": 1.4801223944984458e-05, "loss": 4.1388, "step": 3040 }, { "epoch": 1.0129924210876988, "grad_norm": 0.498046875, "learning_rate": 1.4801038782423288e-05, "loss": 4.0561, "step": 3041 }, { "epoch": 1.0133255600899476, "grad_norm": 0.490234375, "learning_rate": 1.480085353482083e-05, "loss": 4.1224, "step": 3042 }, { "epoch": 1.0136586990921963, "grad_norm": 0.48046875, "learning_rate": 1.4800668202179244e-05, "loss": 4.1099, "step": 3043 }, { "epoch": 1.0139918380944448, "grad_norm": 0.51171875, "learning_rate": 1.4800482784500686e-05, "loss": 4.1124, "step": 3044 }, { "epoch": 1.0143249770966936, "grad_norm": 0.498046875, "learning_rate": 1.4800297281787314e-05, "loss": 4.047, "step": 3045 }, { "epoch": 1.0146581160989423, "grad_norm": 0.51171875, "learning_rate": 1.4800111694041293e-05, "loss": 4.0953, "step": 3046 }, { "epoch": 1.014991255101191, "grad_norm": 0.50390625, "learning_rate": 1.4799926021264782e-05, "loss": 4.1063, "step": 3047 }, { "epoch": 1.0153243941034396, "grad_norm": 0.478515625, "learning_rate": 1.4799740263459947e-05, "loss": 4.1209, "step": 3048 }, { "epoch": 1.0156575331056883, "grad_norm": 0.4921875, "learning_rate": 1.4799554420628946e-05, "loss": 4.1791, "step": 3049 }, { "epoch": 1.015990672107937, "grad_norm": 0.51171875, "learning_rate": 1.4799368492773946e-05, "loss": 4.0766, "step": 3050 }, { "epoch": 1.0163238111101858, "grad_norm": 0.5390625, "learning_rate": 1.4799182479897117e-05, "loss": 4.0758, "step": 3051 }, { "epoch": 1.0166569501124345, "grad_norm": 0.515625, "learning_rate": 1.4798996382000622e-05, "loss": 4.0538, "step": 3052 }, { "epoch": 1.016990089114683, "grad_norm": 0.494140625, "learning_rate": 1.4798810199086626e-05, "loss": 4.1195, "step": 3053 }, { "epoch": 1.0173232281169318, "grad_norm": 0.4921875, "learning_rate": 1.47986239311573e-05, "loss": 4.142, "step": 3054 }, { "epoch": 1.0176563671191805, "grad_norm": 0.48046875, "learning_rate": 1.4798437578214817e-05, "loss": 4.1253, "step": 3055 }, { "epoch": 1.0179895061214292, "grad_norm": 0.53125, "learning_rate": 1.4798251140261342e-05, "loss": 4.155, "step": 3056 }, { "epoch": 1.0183226451236778, "grad_norm": 0.498046875, "learning_rate": 1.4798064617299051e-05, "loss": 4.1057, "step": 3057 }, { "epoch": 1.0186557841259265, "grad_norm": 0.51171875, "learning_rate": 1.4797878009330113e-05, "loss": 4.0914, "step": 3058 }, { "epoch": 1.0189889231281752, "grad_norm": 0.51171875, "learning_rate": 1.4797691316356707e-05, "loss": 4.0279, "step": 3059 }, { "epoch": 1.019322062130424, "grad_norm": 0.48046875, "learning_rate": 1.4797504538381e-05, "loss": 4.0996, "step": 3060 }, { "epoch": 1.0196552011326725, "grad_norm": 0.5078125, "learning_rate": 1.4797317675405174e-05, "loss": 4.1277, "step": 3061 }, { "epoch": 1.0199883401349212, "grad_norm": 0.53125, "learning_rate": 1.4797130727431402e-05, "loss": 4.1319, "step": 3062 }, { "epoch": 1.02032147913717, "grad_norm": 0.48828125, "learning_rate": 1.4796943694461861e-05, "loss": 4.0537, "step": 3063 }, { "epoch": 1.0206546181394187, "grad_norm": 0.49609375, "learning_rate": 1.4796756576498733e-05, "loss": 4.0692, "step": 3064 }, { "epoch": 1.0209877571416675, "grad_norm": 0.515625, "learning_rate": 1.4796569373544195e-05, "loss": 4.1102, "step": 3065 }, { "epoch": 1.021320896143916, "grad_norm": 0.5390625, "learning_rate": 1.4796382085600428e-05, "loss": 4.0329, "step": 3066 }, { "epoch": 1.0216540351461647, "grad_norm": 0.5, "learning_rate": 1.4796194712669616e-05, "loss": 4.0563, "step": 3067 }, { "epoch": 1.0219871741484134, "grad_norm": 0.49609375, "learning_rate": 1.4796007254753937e-05, "loss": 4.0571, "step": 3068 }, { "epoch": 1.0223203131506622, "grad_norm": 0.515625, "learning_rate": 1.4795819711855576e-05, "loss": 4.1201, "step": 3069 }, { "epoch": 1.0226534521529107, "grad_norm": 0.498046875, "learning_rate": 1.4795632083976719e-05, "loss": 4.0675, "step": 3070 }, { "epoch": 1.0229865911551594, "grad_norm": 0.515625, "learning_rate": 1.479544437111955e-05, "loss": 4.0661, "step": 3071 }, { "epoch": 1.0233197301574082, "grad_norm": 0.490234375, "learning_rate": 1.4795256573286256e-05, "loss": 4.1208, "step": 3072 }, { "epoch": 1.023652869159657, "grad_norm": 0.5, "learning_rate": 1.4795068690479027e-05, "loss": 4.1541, "step": 3073 }, { "epoch": 1.0239860081619057, "grad_norm": 0.50390625, "learning_rate": 1.4794880722700045e-05, "loss": 4.0806, "step": 3074 }, { "epoch": 1.0243191471641542, "grad_norm": 0.53125, "learning_rate": 1.4794692669951506e-05, "loss": 4.1313, "step": 3075 }, { "epoch": 1.024652286166403, "grad_norm": 0.51953125, "learning_rate": 1.4794504532235597e-05, "loss": 4.1211, "step": 3076 }, { "epoch": 1.0249854251686517, "grad_norm": 0.50390625, "learning_rate": 1.4794316309554508e-05, "loss": 4.0833, "step": 3077 }, { "epoch": 1.0253185641709004, "grad_norm": 0.5078125, "learning_rate": 1.4794128001910438e-05, "loss": 4.0916, "step": 3078 }, { "epoch": 1.025651703173149, "grad_norm": 0.515625, "learning_rate": 1.479393960930557e-05, "loss": 4.0073, "step": 3079 }, { "epoch": 1.0259848421753976, "grad_norm": 0.50390625, "learning_rate": 1.4793751131742109e-05, "loss": 4.0376, "step": 3080 }, { "epoch": 1.0263179811776464, "grad_norm": 0.50390625, "learning_rate": 1.4793562569222244e-05, "loss": 4.1227, "step": 3081 }, { "epoch": 1.0266511201798951, "grad_norm": 0.5234375, "learning_rate": 1.4793373921748171e-05, "loss": 4.1166, "step": 3082 }, { "epoch": 1.0269842591821436, "grad_norm": 0.48828125, "learning_rate": 1.4793185189322092e-05, "loss": 4.0865, "step": 3083 }, { "epoch": 1.0273173981843924, "grad_norm": 0.5078125, "learning_rate": 1.4792996371946202e-05, "loss": 4.0623, "step": 3084 }, { "epoch": 1.0276505371866411, "grad_norm": 0.50390625, "learning_rate": 1.4792807469622699e-05, "loss": 4.0, "step": 3085 }, { "epoch": 1.0279836761888899, "grad_norm": 0.48046875, "learning_rate": 1.4792618482353788e-05, "loss": 4.0891, "step": 3086 }, { "epoch": 1.0283168151911386, "grad_norm": 0.51171875, "learning_rate": 1.4792429410141664e-05, "loss": 4.0985, "step": 3087 }, { "epoch": 1.0286499541933871, "grad_norm": 0.5078125, "learning_rate": 1.4792240252988538e-05, "loss": 4.0408, "step": 3088 }, { "epoch": 1.0289830931956359, "grad_norm": 0.49609375, "learning_rate": 1.4792051010896604e-05, "loss": 4.0361, "step": 3089 }, { "epoch": 1.0293162321978846, "grad_norm": 0.4921875, "learning_rate": 1.4791861683868069e-05, "loss": 4.09, "step": 3090 }, { "epoch": 1.0296493712001333, "grad_norm": 0.51171875, "learning_rate": 1.4791672271905143e-05, "loss": 4.1039, "step": 3091 }, { "epoch": 1.0299825102023819, "grad_norm": 0.50390625, "learning_rate": 1.4791482775010025e-05, "loss": 4.1478, "step": 3092 }, { "epoch": 1.0303156492046306, "grad_norm": 0.5078125, "learning_rate": 1.479129319318493e-05, "loss": 4.1337, "step": 3093 }, { "epoch": 1.0306487882068793, "grad_norm": 0.5078125, "learning_rate": 1.479110352643206e-05, "loss": 4.0666, "step": 3094 }, { "epoch": 1.030981927209128, "grad_norm": 0.51171875, "learning_rate": 1.4790913774753627e-05, "loss": 4.0844, "step": 3095 }, { "epoch": 1.0313150662113766, "grad_norm": 0.4921875, "learning_rate": 1.4790723938151842e-05, "loss": 4.1686, "step": 3096 }, { "epoch": 1.0316482052136253, "grad_norm": 0.51953125, "learning_rate": 1.4790534016628912e-05, "loss": 4.1134, "step": 3097 }, { "epoch": 1.031981344215874, "grad_norm": 0.5625, "learning_rate": 1.4790344010187054e-05, "loss": 4.0642, "step": 3098 }, { "epoch": 1.0323144832181228, "grad_norm": 0.50390625, "learning_rate": 1.4790153918828479e-05, "loss": 4.0572, "step": 3099 }, { "epoch": 1.0326476222203715, "grad_norm": 0.470703125, "learning_rate": 1.47899637425554e-05, "loss": 4.1511, "step": 3100 }, { "epoch": 1.03298076122262, "grad_norm": 0.52734375, "learning_rate": 1.4789773481370033e-05, "loss": 4.1233, "step": 3101 }, { "epoch": 1.0333139002248688, "grad_norm": 0.51171875, "learning_rate": 1.4789583135274597e-05, "loss": 4.1232, "step": 3102 }, { "epoch": 1.0336470392271175, "grad_norm": 0.515625, "learning_rate": 1.4789392704271305e-05, "loss": 4.0818, "step": 3103 }, { "epoch": 1.0339801782293663, "grad_norm": 0.53125, "learning_rate": 1.4789202188362378e-05, "loss": 4.0165, "step": 3104 }, { "epoch": 1.0343133172316148, "grad_norm": 0.5, "learning_rate": 1.4789011587550034e-05, "loss": 4.1412, "step": 3105 }, { "epoch": 1.0346464562338635, "grad_norm": 0.4921875, "learning_rate": 1.478882090183649e-05, "loss": 4.1324, "step": 3106 }, { "epoch": 1.0349795952361123, "grad_norm": 0.490234375, "learning_rate": 1.4788630131223974e-05, "loss": 4.0858, "step": 3107 }, { "epoch": 1.035312734238361, "grad_norm": 0.482421875, "learning_rate": 1.4788439275714702e-05, "loss": 4.0955, "step": 3108 }, { "epoch": 1.0356458732406097, "grad_norm": 0.515625, "learning_rate": 1.4788248335310899e-05, "loss": 4.1139, "step": 3109 }, { "epoch": 1.0359790122428583, "grad_norm": 0.50390625, "learning_rate": 1.478805731001479e-05, "loss": 4.0765, "step": 3110 }, { "epoch": 1.036312151245107, "grad_norm": 0.4921875, "learning_rate": 1.4787866199828597e-05, "loss": 4.1045, "step": 3111 }, { "epoch": 1.0366452902473557, "grad_norm": 0.5078125, "learning_rate": 1.4787675004754552e-05, "loss": 4.0653, "step": 3112 }, { "epoch": 1.0369784292496045, "grad_norm": 0.5078125, "learning_rate": 1.4787483724794877e-05, "loss": 4.0888, "step": 3113 }, { "epoch": 1.037311568251853, "grad_norm": 0.48046875, "learning_rate": 1.47872923599518e-05, "loss": 4.0456, "step": 3114 }, { "epoch": 1.0376447072541017, "grad_norm": 0.5078125, "learning_rate": 1.478710091022755e-05, "loss": 4.0639, "step": 3115 }, { "epoch": 1.0379778462563505, "grad_norm": 0.51171875, "learning_rate": 1.4786909375624362e-05, "loss": 4.1282, "step": 3116 }, { "epoch": 1.0383109852585992, "grad_norm": 0.50390625, "learning_rate": 1.4786717756144462e-05, "loss": 4.0771, "step": 3117 }, { "epoch": 1.0386441242608477, "grad_norm": 0.494140625, "learning_rate": 1.4786526051790082e-05, "loss": 4.0971, "step": 3118 }, { "epoch": 1.0389772632630965, "grad_norm": 0.486328125, "learning_rate": 1.4786334262563455e-05, "loss": 4.0415, "step": 3119 }, { "epoch": 1.0393104022653452, "grad_norm": 0.48828125, "learning_rate": 1.478614238846682e-05, "loss": 4.1131, "step": 3120 }, { "epoch": 1.039643541267594, "grad_norm": 0.53125, "learning_rate": 1.4785950429502403e-05, "loss": 4.0176, "step": 3121 }, { "epoch": 1.0399766802698427, "grad_norm": 0.498046875, "learning_rate": 1.478575838567245e-05, "loss": 4.0896, "step": 3122 }, { "epoch": 1.0403098192720912, "grad_norm": 0.5, "learning_rate": 1.4785566256979191e-05, "loss": 4.0694, "step": 3123 }, { "epoch": 1.04064295827434, "grad_norm": 0.5, "learning_rate": 1.4785374043424865e-05, "loss": 4.1005, "step": 3124 }, { "epoch": 1.0409760972765887, "grad_norm": 0.486328125, "learning_rate": 1.478518174501171e-05, "loss": 4.085, "step": 3125 }, { "epoch": 1.0413092362788374, "grad_norm": 0.50390625, "learning_rate": 1.4784989361741968e-05, "loss": 4.0191, "step": 3126 }, { "epoch": 1.041642375281086, "grad_norm": 0.498046875, "learning_rate": 1.4784796893617879e-05, "loss": 4.1194, "step": 3127 }, { "epoch": 1.0419755142833347, "grad_norm": 0.5, "learning_rate": 1.4784604340641688e-05, "loss": 4.1051, "step": 3128 }, { "epoch": 1.0423086532855834, "grad_norm": 0.52734375, "learning_rate": 1.4784411702815632e-05, "loss": 4.0474, "step": 3129 }, { "epoch": 1.0426417922878322, "grad_norm": 0.51953125, "learning_rate": 1.4784218980141956e-05, "loss": 4.1222, "step": 3130 }, { "epoch": 1.0429749312900807, "grad_norm": 0.482421875, "learning_rate": 1.478402617262291e-05, "loss": 4.1118, "step": 3131 }, { "epoch": 1.0433080702923294, "grad_norm": 0.51953125, "learning_rate": 1.4783833280260736e-05, "loss": 4.0595, "step": 3132 }, { "epoch": 1.0436412092945782, "grad_norm": 0.48828125, "learning_rate": 1.478364030305768e-05, "loss": 4.1248, "step": 3133 }, { "epoch": 1.043974348296827, "grad_norm": 0.48828125, "learning_rate": 1.4783447241015991e-05, "loss": 4.0565, "step": 3134 }, { "epoch": 1.0443074872990756, "grad_norm": 0.484375, "learning_rate": 1.4783254094137917e-05, "loss": 4.0859, "step": 3135 }, { "epoch": 1.0446406263013241, "grad_norm": 0.490234375, "learning_rate": 1.478306086242571e-05, "loss": 4.0993, "step": 3136 }, { "epoch": 1.0449737653035729, "grad_norm": 0.51171875, "learning_rate": 1.4782867545881618e-05, "loss": 4.0516, "step": 3137 }, { "epoch": 1.0453069043058216, "grad_norm": 0.5, "learning_rate": 1.4782674144507892e-05, "loss": 4.0824, "step": 3138 }, { "epoch": 1.0456400433080704, "grad_norm": 0.490234375, "learning_rate": 1.478248065830679e-05, "loss": 4.019, "step": 3139 }, { "epoch": 1.0459731823103189, "grad_norm": 0.52734375, "learning_rate": 1.4782287087280559e-05, "loss": 4.1384, "step": 3140 }, { "epoch": 1.0463063213125676, "grad_norm": 0.50390625, "learning_rate": 1.4782093431431459e-05, "loss": 4.1691, "step": 3141 }, { "epoch": 1.0466394603148164, "grad_norm": 0.51171875, "learning_rate": 1.4781899690761742e-05, "loss": 4.0971, "step": 3142 }, { "epoch": 1.046972599317065, "grad_norm": 0.494140625, "learning_rate": 1.4781705865273666e-05, "loss": 4.0755, "step": 3143 }, { "epoch": 1.0473057383193138, "grad_norm": 0.4921875, "learning_rate": 1.4781511954969489e-05, "loss": 4.1381, "step": 3144 }, { "epoch": 1.0476388773215624, "grad_norm": 0.515625, "learning_rate": 1.4781317959851471e-05, "loss": 4.119, "step": 3145 }, { "epoch": 1.047972016323811, "grad_norm": 0.498046875, "learning_rate": 1.4781123879921869e-05, "loss": 4.0208, "step": 3146 }, { "epoch": 1.0483051553260598, "grad_norm": 0.50390625, "learning_rate": 1.4780929715182945e-05, "loss": 4.1352, "step": 3147 }, { "epoch": 1.0486382943283086, "grad_norm": 0.5078125, "learning_rate": 1.478073546563696e-05, "loss": 4.0465, "step": 3148 }, { "epoch": 1.048971433330557, "grad_norm": 0.5, "learning_rate": 1.4780541131286177e-05, "loss": 4.0831, "step": 3149 }, { "epoch": 1.0493045723328058, "grad_norm": 0.49609375, "learning_rate": 1.4780346712132858e-05, "loss": 4.0416, "step": 3150 }, { "epoch": 1.0496377113350546, "grad_norm": 0.53515625, "learning_rate": 1.4780152208179272e-05, "loss": 4.066, "step": 3151 }, { "epoch": 1.0499708503373033, "grad_norm": 0.48046875, "learning_rate": 1.477995761942768e-05, "loss": 4.0966, "step": 3152 }, { "epoch": 1.0503039893395518, "grad_norm": 0.48828125, "learning_rate": 1.477976294588035e-05, "loss": 4.1311, "step": 3153 }, { "epoch": 1.0506371283418006, "grad_norm": 0.48828125, "learning_rate": 1.477956818753955e-05, "loss": 4.0595, "step": 3154 }, { "epoch": 1.0509702673440493, "grad_norm": 0.5, "learning_rate": 1.4779373344407548e-05, "loss": 4.1798, "step": 3155 }, { "epoch": 1.051303406346298, "grad_norm": 0.52734375, "learning_rate": 1.4779178416486612e-05, "loss": 4.0811, "step": 3156 }, { "epoch": 1.0516365453485468, "grad_norm": 0.51171875, "learning_rate": 1.4778983403779017e-05, "loss": 4.0462, "step": 3157 }, { "epoch": 1.0519696843507953, "grad_norm": 0.484375, "learning_rate": 1.477878830628703e-05, "loss": 4.1076, "step": 3158 }, { "epoch": 1.052302823353044, "grad_norm": 0.484375, "learning_rate": 1.4778593124012925e-05, "loss": 4.0976, "step": 3159 }, { "epoch": 1.0526359623552928, "grad_norm": 0.51171875, "learning_rate": 1.4778397856958976e-05, "loss": 4.1051, "step": 3160 }, { "epoch": 1.0529691013575415, "grad_norm": 0.50390625, "learning_rate": 1.4778202505127455e-05, "loss": 4.0402, "step": 3161 }, { "epoch": 1.05330224035979, "grad_norm": 0.478515625, "learning_rate": 1.4778007068520643e-05, "loss": 4.0965, "step": 3162 }, { "epoch": 1.0536353793620388, "grad_norm": 0.5078125, "learning_rate": 1.477781154714081e-05, "loss": 4.1162, "step": 3163 }, { "epoch": 1.0539685183642875, "grad_norm": 0.51171875, "learning_rate": 1.4777615940990236e-05, "loss": 4.0356, "step": 3164 }, { "epoch": 1.0543016573665362, "grad_norm": 0.494140625, "learning_rate": 1.47774202500712e-05, "loss": 4.0927, "step": 3165 }, { "epoch": 1.0546347963687848, "grad_norm": 0.51953125, "learning_rate": 1.4777224474385985e-05, "loss": 4.1403, "step": 3166 }, { "epoch": 1.0549679353710335, "grad_norm": 0.5390625, "learning_rate": 1.4777028613936862e-05, "loss": 4.0937, "step": 3167 }, { "epoch": 1.0553010743732822, "grad_norm": 0.486328125, "learning_rate": 1.477683266872612e-05, "loss": 3.9925, "step": 3168 }, { "epoch": 1.055634213375531, "grad_norm": 0.50390625, "learning_rate": 1.4776636638756038e-05, "loss": 4.1467, "step": 3169 }, { "epoch": 1.0559673523777797, "grad_norm": 0.5234375, "learning_rate": 1.47764405240289e-05, "loss": 4.0766, "step": 3170 }, { "epoch": 1.0563004913800282, "grad_norm": 0.52734375, "learning_rate": 1.4776244324546992e-05, "loss": 4.0786, "step": 3171 }, { "epoch": 1.056633630382277, "grad_norm": 0.490234375, "learning_rate": 1.4776048040312599e-05, "loss": 4.1283, "step": 3172 }, { "epoch": 1.0569667693845257, "grad_norm": 0.50390625, "learning_rate": 1.4775851671328003e-05, "loss": 4.0019, "step": 3173 }, { "epoch": 1.0572999083867745, "grad_norm": 0.5078125, "learning_rate": 1.4775655217595497e-05, "loss": 4.1092, "step": 3174 }, { "epoch": 1.057633047389023, "grad_norm": 0.515625, "learning_rate": 1.4775458679117366e-05, "loss": 4.0172, "step": 3175 }, { "epoch": 1.0579661863912717, "grad_norm": 0.498046875, "learning_rate": 1.47752620558959e-05, "loss": 4.0558, "step": 3176 }, { "epoch": 1.0582993253935205, "grad_norm": 0.498046875, "learning_rate": 1.477506534793339e-05, "loss": 4.0472, "step": 3177 }, { "epoch": 1.0586324643957692, "grad_norm": 0.51171875, "learning_rate": 1.4774868555232125e-05, "loss": 4.0445, "step": 3178 }, { "epoch": 1.058965603398018, "grad_norm": 0.515625, "learning_rate": 1.4774671677794399e-05, "loss": 4.1103, "step": 3179 }, { "epoch": 1.0592987424002664, "grad_norm": 0.48828125, "learning_rate": 1.4774474715622505e-05, "loss": 4.1399, "step": 3180 }, { "epoch": 1.0596318814025152, "grad_norm": 0.5078125, "learning_rate": 1.4774277668718736e-05, "loss": 4.1231, "step": 3181 }, { "epoch": 1.059965020404764, "grad_norm": 0.50390625, "learning_rate": 1.477408053708539e-05, "loss": 4.0541, "step": 3182 }, { "epoch": 1.0602981594070127, "grad_norm": 0.51953125, "learning_rate": 1.4773883320724763e-05, "loss": 4.0859, "step": 3183 }, { "epoch": 1.0606312984092612, "grad_norm": 0.51953125, "learning_rate": 1.4773686019639147e-05, "loss": 4.0941, "step": 3184 }, { "epoch": 1.06096443741151, "grad_norm": 0.470703125, "learning_rate": 1.4773488633830847e-05, "loss": 4.1067, "step": 3185 }, { "epoch": 1.0612975764137587, "grad_norm": 0.5234375, "learning_rate": 1.4773291163302153e-05, "loss": 4.1179, "step": 3186 }, { "epoch": 1.0616307154160074, "grad_norm": 0.52734375, "learning_rate": 1.4773093608055376e-05, "loss": 4.0529, "step": 3187 }, { "epoch": 1.061963854418256, "grad_norm": 0.515625, "learning_rate": 1.4772895968092812e-05, "loss": 4.0987, "step": 3188 }, { "epoch": 1.0622969934205047, "grad_norm": 0.515625, "learning_rate": 1.4772698243416762e-05, "loss": 4.0957, "step": 3189 }, { "epoch": 1.0626301324227534, "grad_norm": 0.515625, "learning_rate": 1.4772500434029528e-05, "loss": 4.0406, "step": 3190 }, { "epoch": 1.0629632714250021, "grad_norm": 0.48046875, "learning_rate": 1.4772302539933418e-05, "loss": 4.1337, "step": 3191 }, { "epoch": 1.0632964104272509, "grad_norm": 0.48828125, "learning_rate": 1.4772104561130736e-05, "loss": 4.1281, "step": 3192 }, { "epoch": 1.0636295494294994, "grad_norm": 0.50390625, "learning_rate": 1.4771906497623786e-05, "loss": 4.0798, "step": 3193 }, { "epoch": 1.0639626884317481, "grad_norm": 0.484375, "learning_rate": 1.4771708349414877e-05, "loss": 4.1541, "step": 3194 }, { "epoch": 1.0642958274339969, "grad_norm": 0.5078125, "learning_rate": 1.4771510116506315e-05, "loss": 4.0007, "step": 3195 }, { "epoch": 1.0646289664362456, "grad_norm": 0.5234375, "learning_rate": 1.477131179890041e-05, "loss": 4.0617, "step": 3196 }, { "epoch": 1.0649621054384941, "grad_norm": 0.498046875, "learning_rate": 1.4771113396599475e-05, "loss": 4.1624, "step": 3197 }, { "epoch": 1.0652952444407429, "grad_norm": 0.4921875, "learning_rate": 1.4770914909605815e-05, "loss": 4.0521, "step": 3198 }, { "epoch": 1.0656283834429916, "grad_norm": 0.53125, "learning_rate": 1.4770716337921746e-05, "loss": 4.0673, "step": 3199 }, { "epoch": 1.0659615224452403, "grad_norm": 0.498046875, "learning_rate": 1.477051768154958e-05, "loss": 4.11, "step": 3200 }, { "epoch": 1.0662946614474889, "grad_norm": 0.51953125, "learning_rate": 1.4770318940491633e-05, "loss": 4.1376, "step": 3201 }, { "epoch": 1.0666278004497376, "grad_norm": 0.49609375, "learning_rate": 1.4770120114750216e-05, "loss": 4.0363, "step": 3202 }, { "epoch": 1.0669609394519863, "grad_norm": 0.494140625, "learning_rate": 1.4769921204327645e-05, "loss": 4.106, "step": 3203 }, { "epoch": 1.067294078454235, "grad_norm": 0.494140625, "learning_rate": 1.4769722209226242e-05, "loss": 4.1514, "step": 3204 }, { "epoch": 1.0676272174564838, "grad_norm": 0.5078125, "learning_rate": 1.476952312944832e-05, "loss": 4.0843, "step": 3205 }, { "epoch": 1.0679603564587323, "grad_norm": 0.478515625, "learning_rate": 1.4769323964996199e-05, "loss": 4.088, "step": 3206 }, { "epoch": 1.068293495460981, "grad_norm": 0.5, "learning_rate": 1.4769124715872199e-05, "loss": 4.0447, "step": 3207 }, { "epoch": 1.0686266344632298, "grad_norm": 0.47265625, "learning_rate": 1.4768925382078641e-05, "loss": 4.0609, "step": 3208 }, { "epoch": 1.0689597734654785, "grad_norm": 0.515625, "learning_rate": 1.4768725963617849e-05, "loss": 4.0954, "step": 3209 }, { "epoch": 1.069292912467727, "grad_norm": 0.48828125, "learning_rate": 1.476852646049214e-05, "loss": 4.1106, "step": 3210 }, { "epoch": 1.0696260514699758, "grad_norm": 0.51171875, "learning_rate": 1.4768326872703843e-05, "loss": 4.0875, "step": 3211 }, { "epoch": 1.0699591904722245, "grad_norm": 0.46875, "learning_rate": 1.4768127200255281e-05, "loss": 4.2019, "step": 3212 }, { "epoch": 1.0702923294744733, "grad_norm": 0.5078125, "learning_rate": 1.4767927443148778e-05, "loss": 4.0392, "step": 3213 }, { "epoch": 1.070625468476722, "grad_norm": 0.5078125, "learning_rate": 1.4767727601386667e-05, "loss": 4.1026, "step": 3214 }, { "epoch": 1.0709586074789705, "grad_norm": 0.5, "learning_rate": 1.4767527674971268e-05, "loss": 4.0669, "step": 3215 }, { "epoch": 1.0712917464812193, "grad_norm": 0.5, "learning_rate": 1.4767327663904914e-05, "loss": 4.0671, "step": 3216 }, { "epoch": 1.071624885483468, "grad_norm": 0.515625, "learning_rate": 1.4767127568189935e-05, "loss": 4.0709, "step": 3217 }, { "epoch": 1.0719580244857168, "grad_norm": 0.50390625, "learning_rate": 1.4766927387828658e-05, "loss": 4.0659, "step": 3218 }, { "epoch": 1.0722911634879653, "grad_norm": 0.51171875, "learning_rate": 1.4766727122823417e-05, "loss": 4.0816, "step": 3219 }, { "epoch": 1.072624302490214, "grad_norm": 0.51171875, "learning_rate": 1.4766526773176546e-05, "loss": 4.1413, "step": 3220 }, { "epoch": 1.0729574414924627, "grad_norm": 0.5234375, "learning_rate": 1.4766326338890376e-05, "loss": 4.0692, "step": 3221 }, { "epoch": 1.0732905804947115, "grad_norm": 0.486328125, "learning_rate": 1.4766125819967245e-05, "loss": 4.21, "step": 3222 }, { "epoch": 1.07362371949696, "grad_norm": 0.51953125, "learning_rate": 1.4765925216409487e-05, "loss": 4.0895, "step": 3223 }, { "epoch": 1.0739568584992087, "grad_norm": 0.498046875, "learning_rate": 1.4765724528219437e-05, "loss": 4.1692, "step": 3224 }, { "epoch": 1.0742899975014575, "grad_norm": 0.51171875, "learning_rate": 1.4765523755399433e-05, "loss": 4.024, "step": 3225 }, { "epoch": 1.0746231365037062, "grad_norm": 0.51171875, "learning_rate": 1.4765322897951814e-05, "loss": 4.0111, "step": 3226 }, { "epoch": 1.074956275505955, "grad_norm": 0.52734375, "learning_rate": 1.4765121955878922e-05, "loss": 4.0823, "step": 3227 }, { "epoch": 1.0752894145082035, "grad_norm": 0.5234375, "learning_rate": 1.4764920929183095e-05, "loss": 4.0717, "step": 3228 }, { "epoch": 1.0756225535104522, "grad_norm": 0.5, "learning_rate": 1.4764719817866674e-05, "loss": 4.0979, "step": 3229 }, { "epoch": 1.075955692512701, "grad_norm": 0.5078125, "learning_rate": 1.4764518621932003e-05, "loss": 4.158, "step": 3230 }, { "epoch": 1.0762888315149497, "grad_norm": 0.515625, "learning_rate": 1.4764317341381424e-05, "loss": 4.0972, "step": 3231 }, { "epoch": 1.0766219705171982, "grad_norm": 0.515625, "learning_rate": 1.4764115976217284e-05, "loss": 4.0654, "step": 3232 }, { "epoch": 1.076955109519447, "grad_norm": 0.46875, "learning_rate": 1.4763914526441925e-05, "loss": 4.0989, "step": 3233 }, { "epoch": 1.0772882485216957, "grad_norm": 0.48828125, "learning_rate": 1.4763712992057697e-05, "loss": 4.2128, "step": 3234 }, { "epoch": 1.0776213875239444, "grad_norm": 0.52734375, "learning_rate": 1.4763511373066944e-05, "loss": 4.0364, "step": 3235 }, { "epoch": 1.077954526526193, "grad_norm": 0.51171875, "learning_rate": 1.4763309669472018e-05, "loss": 4.1239, "step": 3236 }, { "epoch": 1.0782876655284417, "grad_norm": 0.51953125, "learning_rate": 1.4763107881275266e-05, "loss": 4.0734, "step": 3237 }, { "epoch": 1.0786208045306904, "grad_norm": 0.494140625, "learning_rate": 1.476290600847904e-05, "loss": 4.0998, "step": 3238 }, { "epoch": 1.0789539435329392, "grad_norm": 0.494140625, "learning_rate": 1.476270405108569e-05, "loss": 4.0565, "step": 3239 }, { "epoch": 1.079287082535188, "grad_norm": 0.4921875, "learning_rate": 1.4762502009097568e-05, "loss": 4.1124, "step": 3240 }, { "epoch": 1.0796202215374364, "grad_norm": 0.484375, "learning_rate": 1.476229988251703e-05, "loss": 4.1089, "step": 3241 }, { "epoch": 1.0799533605396852, "grad_norm": 0.52734375, "learning_rate": 1.4762097671346427e-05, "loss": 4.0418, "step": 3242 }, { "epoch": 1.080286499541934, "grad_norm": 0.482421875, "learning_rate": 1.4761895375588115e-05, "loss": 4.1236, "step": 3243 }, { "epoch": 1.0806196385441826, "grad_norm": 0.51171875, "learning_rate": 1.4761692995244453e-05, "loss": 4.0925, "step": 3244 }, { "epoch": 1.0809527775464312, "grad_norm": 0.51171875, "learning_rate": 1.4761490530317794e-05, "loss": 4.1341, "step": 3245 }, { "epoch": 1.08128591654868, "grad_norm": 0.490234375, "learning_rate": 1.4761287980810502e-05, "loss": 4.1138, "step": 3246 }, { "epoch": 1.0816190555509286, "grad_norm": 0.50390625, "learning_rate": 1.4761085346724931e-05, "loss": 4.0962, "step": 3247 }, { "epoch": 1.0819521945531774, "grad_norm": 0.51953125, "learning_rate": 1.4760882628063443e-05, "loss": 4.0587, "step": 3248 }, { "epoch": 1.082285333555426, "grad_norm": 0.494140625, "learning_rate": 1.47606798248284e-05, "loss": 4.0976, "step": 3249 }, { "epoch": 1.0826184725576746, "grad_norm": 0.49609375, "learning_rate": 1.4760476937022163e-05, "loss": 4.1307, "step": 3250 }, { "epoch": 1.0829516115599234, "grad_norm": 0.48046875, "learning_rate": 1.4760273964647095e-05, "loss": 4.1273, "step": 3251 }, { "epoch": 1.083284750562172, "grad_norm": 0.5078125, "learning_rate": 1.4760070907705564e-05, "loss": 4.1668, "step": 3252 }, { "epoch": 1.0836178895644208, "grad_norm": 0.53515625, "learning_rate": 1.4759867766199931e-05, "loss": 4.0109, "step": 3253 }, { "epoch": 1.0839510285666694, "grad_norm": 0.498046875, "learning_rate": 1.4759664540132563e-05, "loss": 4.1233, "step": 3254 }, { "epoch": 1.084284167568918, "grad_norm": 0.51953125, "learning_rate": 1.4759461229505825e-05, "loss": 4.1425, "step": 3255 }, { "epoch": 1.0846173065711668, "grad_norm": 0.494140625, "learning_rate": 1.4759257834322091e-05, "loss": 4.1328, "step": 3256 }, { "epoch": 1.0849504455734156, "grad_norm": 0.51171875, "learning_rate": 1.4759054354583726e-05, "loss": 4.0923, "step": 3257 }, { "epoch": 1.085283584575664, "grad_norm": 0.515625, "learning_rate": 1.47588507902931e-05, "loss": 4.0607, "step": 3258 }, { "epoch": 1.0856167235779128, "grad_norm": 0.51953125, "learning_rate": 1.4758647141452586e-05, "loss": 4.1291, "step": 3259 }, { "epoch": 1.0859498625801616, "grad_norm": 0.49609375, "learning_rate": 1.4758443408064557e-05, "loss": 4.0885, "step": 3260 }, { "epoch": 1.0862830015824103, "grad_norm": 0.50390625, "learning_rate": 1.4758239590131378e-05, "loss": 4.0066, "step": 3261 }, { "epoch": 1.086616140584659, "grad_norm": 0.49609375, "learning_rate": 1.4758035687655435e-05, "loss": 4.1384, "step": 3262 }, { "epoch": 1.0869492795869076, "grad_norm": 0.515625, "learning_rate": 1.4757831700639092e-05, "loss": 4.0796, "step": 3263 }, { "epoch": 1.0872824185891563, "grad_norm": 0.4921875, "learning_rate": 1.4757627629084734e-05, "loss": 4.033, "step": 3264 }, { "epoch": 1.087615557591405, "grad_norm": 0.50390625, "learning_rate": 1.4757423472994734e-05, "loss": 4.0891, "step": 3265 }, { "epoch": 1.0879486965936538, "grad_norm": 0.51171875, "learning_rate": 1.4757219232371468e-05, "loss": 4.1514, "step": 3266 }, { "epoch": 1.0882818355959023, "grad_norm": 0.5078125, "learning_rate": 1.475701490721732e-05, "loss": 4.1072, "step": 3267 }, { "epoch": 1.088614974598151, "grad_norm": 0.5, "learning_rate": 1.4756810497534664e-05, "loss": 4.163, "step": 3268 }, { "epoch": 1.0889481136003998, "grad_norm": 0.51171875, "learning_rate": 1.4756606003325887e-05, "loss": 4.0966, "step": 3269 }, { "epoch": 1.0892812526026485, "grad_norm": 0.4921875, "learning_rate": 1.4756401424593365e-05, "loss": 4.0545, "step": 3270 }, { "epoch": 1.089614391604897, "grad_norm": 0.5, "learning_rate": 1.4756196761339485e-05, "loss": 4.0804, "step": 3271 }, { "epoch": 1.0899475306071458, "grad_norm": 0.50390625, "learning_rate": 1.4755992013566629e-05, "loss": 4.1148, "step": 3272 }, { "epoch": 1.0902806696093945, "grad_norm": 0.5, "learning_rate": 1.4755787181277183e-05, "loss": 4.1142, "step": 3273 }, { "epoch": 1.0906138086116433, "grad_norm": 0.478515625, "learning_rate": 1.4755582264473533e-05, "loss": 4.073, "step": 3274 }, { "epoch": 1.090946947613892, "grad_norm": 0.53515625, "learning_rate": 1.4755377263158064e-05, "loss": 4.1056, "step": 3275 }, { "epoch": 1.0912800866161405, "grad_norm": 0.51171875, "learning_rate": 1.4755172177333165e-05, "loss": 4.0847, "step": 3276 }, { "epoch": 1.0916132256183892, "grad_norm": 0.50390625, "learning_rate": 1.4754967007001224e-05, "loss": 4.1575, "step": 3277 }, { "epoch": 1.091946364620638, "grad_norm": 0.5390625, "learning_rate": 1.4754761752164633e-05, "loss": 4.1214, "step": 3278 }, { "epoch": 1.0922795036228867, "grad_norm": 0.515625, "learning_rate": 1.4754556412825782e-05, "loss": 4.0688, "step": 3279 }, { "epoch": 1.0926126426251352, "grad_norm": 0.5, "learning_rate": 1.4754350988987062e-05, "loss": 4.1348, "step": 3280 }, { "epoch": 1.092945781627384, "grad_norm": 0.51953125, "learning_rate": 1.4754145480650865e-05, "loss": 4.024, "step": 3281 }, { "epoch": 1.0932789206296327, "grad_norm": 0.5078125, "learning_rate": 1.4753939887819586e-05, "loss": 4.1126, "step": 3282 }, { "epoch": 1.0936120596318815, "grad_norm": 0.51171875, "learning_rate": 1.4753734210495618e-05, "loss": 4.0574, "step": 3283 }, { "epoch": 1.0939451986341302, "grad_norm": 0.494140625, "learning_rate": 1.475352844868136e-05, "loss": 4.0473, "step": 3284 }, { "epoch": 1.0942783376363787, "grad_norm": 0.49609375, "learning_rate": 1.4753322602379207e-05, "loss": 4.147, "step": 3285 }, { "epoch": 1.0946114766386275, "grad_norm": 0.50390625, "learning_rate": 1.4753116671591555e-05, "loss": 4.1162, "step": 3286 }, { "epoch": 1.0949446156408762, "grad_norm": 0.5, "learning_rate": 1.4752910656320802e-05, "loss": 4.0288, "step": 3287 }, { "epoch": 1.095277754643125, "grad_norm": 0.486328125, "learning_rate": 1.4752704556569354e-05, "loss": 4.1561, "step": 3288 }, { "epoch": 1.0956108936453735, "grad_norm": 0.4765625, "learning_rate": 1.4752498372339605e-05, "loss": 4.1624, "step": 3289 }, { "epoch": 1.0959440326476222, "grad_norm": 0.5, "learning_rate": 1.4752292103633958e-05, "loss": 4.0668, "step": 3290 }, { "epoch": 1.096277171649871, "grad_norm": 0.51171875, "learning_rate": 1.4752085750454818e-05, "loss": 4.1194, "step": 3291 }, { "epoch": 1.0966103106521197, "grad_norm": 0.5, "learning_rate": 1.4751879312804586e-05, "loss": 4.0828, "step": 3292 }, { "epoch": 1.0969434496543682, "grad_norm": 0.5390625, "learning_rate": 1.4751672790685668e-05, "loss": 4.0553, "step": 3293 }, { "epoch": 1.097276588656617, "grad_norm": 0.49609375, "learning_rate": 1.4751466184100468e-05, "loss": 4.0351, "step": 3294 }, { "epoch": 1.0976097276588657, "grad_norm": 0.5, "learning_rate": 1.4751259493051394e-05, "loss": 4.0083, "step": 3295 }, { "epoch": 1.0979428666611144, "grad_norm": 0.48828125, "learning_rate": 1.4751052717540853e-05, "loss": 3.9908, "step": 3296 }, { "epoch": 1.0982760056633631, "grad_norm": 0.48828125, "learning_rate": 1.4750845857571256e-05, "loss": 4.1889, "step": 3297 }, { "epoch": 1.0986091446656117, "grad_norm": 0.490234375, "learning_rate": 1.4750638913145009e-05, "loss": 4.1112, "step": 3298 }, { "epoch": 1.0989422836678604, "grad_norm": 0.4921875, "learning_rate": 1.4750431884264523e-05, "loss": 4.0873, "step": 3299 }, { "epoch": 1.0992754226701091, "grad_norm": 0.5078125, "learning_rate": 1.4750224770932208e-05, "loss": 4.0434, "step": 3300 }, { "epoch": 1.0996085616723579, "grad_norm": 0.46484375, "learning_rate": 1.475001757315048e-05, "loss": 4.1521, "step": 3301 }, { "epoch": 1.0999417006746064, "grad_norm": 0.5, "learning_rate": 1.4749810290921751e-05, "loss": 4.0945, "step": 3302 }, { "epoch": 1.1002748396768551, "grad_norm": 0.50390625, "learning_rate": 1.4749602924248436e-05, "loss": 4.031, "step": 3303 }, { "epoch": 1.1006079786791039, "grad_norm": 0.51953125, "learning_rate": 1.474939547313295e-05, "loss": 4.081, "step": 3304 }, { "epoch": 1.1009411176813526, "grad_norm": 0.49609375, "learning_rate": 1.4749187937577707e-05, "loss": 4.1075, "step": 3305 }, { "epoch": 1.1012742566836011, "grad_norm": 0.482421875, "learning_rate": 1.4748980317585126e-05, "loss": 4.1231, "step": 3306 }, { "epoch": 1.1016073956858499, "grad_norm": 0.5, "learning_rate": 1.4748772613157627e-05, "loss": 4.1405, "step": 3307 }, { "epoch": 1.1019405346880986, "grad_norm": 0.51171875, "learning_rate": 1.4748564824297627e-05, "loss": 4.0661, "step": 3308 }, { "epoch": 1.1022736736903473, "grad_norm": 0.498046875, "learning_rate": 1.4748356951007547e-05, "loss": 4.1663, "step": 3309 }, { "epoch": 1.102606812692596, "grad_norm": 0.5390625, "learning_rate": 1.4748148993289811e-05, "loss": 4.0856, "step": 3310 }, { "epoch": 1.1029399516948446, "grad_norm": 0.515625, "learning_rate": 1.4747940951146835e-05, "loss": 4.1032, "step": 3311 }, { "epoch": 1.1032730906970933, "grad_norm": 0.5, "learning_rate": 1.4747732824581048e-05, "loss": 4.0952, "step": 3312 }, { "epoch": 1.103606229699342, "grad_norm": 0.5078125, "learning_rate": 1.4747524613594871e-05, "loss": 4.0793, "step": 3313 }, { "epoch": 1.1039393687015908, "grad_norm": 0.55078125, "learning_rate": 1.4747316318190733e-05, "loss": 4.1144, "step": 3314 }, { "epoch": 1.1042725077038393, "grad_norm": 0.498046875, "learning_rate": 1.4747107938371055e-05, "loss": 4.1418, "step": 3315 }, { "epoch": 1.104605646706088, "grad_norm": 0.53515625, "learning_rate": 1.4746899474138266e-05, "loss": 4.0412, "step": 3316 }, { "epoch": 1.1049387857083368, "grad_norm": 0.52734375, "learning_rate": 1.4746690925494795e-05, "loss": 4.0826, "step": 3317 }, { "epoch": 1.1052719247105856, "grad_norm": 0.5078125, "learning_rate": 1.4746482292443074e-05, "loss": 4.1564, "step": 3318 }, { "epoch": 1.1056050637128343, "grad_norm": 0.498046875, "learning_rate": 1.4746273574985527e-05, "loss": 4.0419, "step": 3319 }, { "epoch": 1.1059382027150828, "grad_norm": 0.49609375, "learning_rate": 1.474606477312459e-05, "loss": 4.085, "step": 3320 }, { "epoch": 1.1062713417173315, "grad_norm": 0.5234375, "learning_rate": 1.4745855886862695e-05, "loss": 4.0344, "step": 3321 }, { "epoch": 1.1066044807195803, "grad_norm": 0.51171875, "learning_rate": 1.474564691620227e-05, "loss": 4.1319, "step": 3322 }, { "epoch": 1.106937619721829, "grad_norm": 0.5, "learning_rate": 1.4745437861145754e-05, "loss": 4.0791, "step": 3323 }, { "epoch": 1.1072707587240775, "grad_norm": 0.5234375, "learning_rate": 1.4745228721695582e-05, "loss": 4.0992, "step": 3324 }, { "epoch": 1.1076038977263263, "grad_norm": 0.51171875, "learning_rate": 1.4745019497854186e-05, "loss": 4.0458, "step": 3325 }, { "epoch": 1.107937036728575, "grad_norm": 0.5078125, "learning_rate": 1.4744810189624004e-05, "loss": 4.1329, "step": 3326 }, { "epoch": 1.1082701757308238, "grad_norm": 0.5078125, "learning_rate": 1.4744600797007479e-05, "loss": 4.0938, "step": 3327 }, { "epoch": 1.1086033147330723, "grad_norm": 0.5078125, "learning_rate": 1.4744391320007046e-05, "loss": 4.1475, "step": 3328 }, { "epoch": 1.108936453735321, "grad_norm": 0.5234375, "learning_rate": 1.4744181758625145e-05, "loss": 4.1074, "step": 3329 }, { "epoch": 1.1092695927375698, "grad_norm": 0.48828125, "learning_rate": 1.4743972112864218e-05, "loss": 4.1858, "step": 3330 }, { "epoch": 1.1096027317398185, "grad_norm": 0.498046875, "learning_rate": 1.4743762382726705e-05, "loss": 4.1303, "step": 3331 }, { "epoch": 1.109935870742067, "grad_norm": 0.49609375, "learning_rate": 1.474355256821505e-05, "loss": 4.0859, "step": 3332 }, { "epoch": 1.1102690097443157, "grad_norm": 0.5, "learning_rate": 1.47433426693317e-05, "loss": 4.1065, "step": 3333 }, { "epoch": 1.1106021487465645, "grad_norm": 0.52734375, "learning_rate": 1.4743132686079093e-05, "loss": 4.0909, "step": 3334 }, { "epoch": 1.1109352877488132, "grad_norm": 0.498046875, "learning_rate": 1.4742922618459681e-05, "loss": 4.1176, "step": 3335 }, { "epoch": 1.111268426751062, "grad_norm": 0.515625, "learning_rate": 1.4742712466475908e-05, "loss": 4.0814, "step": 3336 }, { "epoch": 1.1116015657533105, "grad_norm": 0.494140625, "learning_rate": 1.4742502230130223e-05, "loss": 4.0886, "step": 3337 }, { "epoch": 1.1119347047555592, "grad_norm": 0.51171875, "learning_rate": 1.4742291909425074e-05, "loss": 4.1241, "step": 3338 }, { "epoch": 1.112267843757808, "grad_norm": 0.50390625, "learning_rate": 1.4742081504362911e-05, "loss": 4.1081, "step": 3339 }, { "epoch": 1.1126009827600567, "grad_norm": 0.515625, "learning_rate": 1.4741871014946184e-05, "loss": 4.0444, "step": 3340 }, { "epoch": 1.1129341217623052, "grad_norm": 0.50390625, "learning_rate": 1.4741660441177348e-05, "loss": 4.0624, "step": 3341 }, { "epoch": 1.113267260764554, "grad_norm": 0.51953125, "learning_rate": 1.4741449783058851e-05, "loss": 4.121, "step": 3342 }, { "epoch": 1.1136003997668027, "grad_norm": 0.5, "learning_rate": 1.4741239040593148e-05, "loss": 4.0657, "step": 3343 }, { "epoch": 1.1139335387690514, "grad_norm": 0.5, "learning_rate": 1.47410282137827e-05, "loss": 4.1256, "step": 3344 }, { "epoch": 1.1142666777713002, "grad_norm": 0.51171875, "learning_rate": 1.4740817302629952e-05, "loss": 4.1414, "step": 3345 }, { "epoch": 1.1145998167735487, "grad_norm": 0.515625, "learning_rate": 1.4740606307137368e-05, "loss": 4.1141, "step": 3346 }, { "epoch": 1.1149329557757974, "grad_norm": 0.52734375, "learning_rate": 1.4740395227307404e-05, "loss": 4.1515, "step": 3347 }, { "epoch": 1.1152660947780462, "grad_norm": 0.515625, "learning_rate": 1.4740184063142516e-05, "loss": 4.0164, "step": 3348 }, { "epoch": 1.115599233780295, "grad_norm": 0.515625, "learning_rate": 1.4739972814645168e-05, "loss": 4.0994, "step": 3349 }, { "epoch": 1.1159323727825434, "grad_norm": 0.53515625, "learning_rate": 1.4739761481817816e-05, "loss": 4.0363, "step": 3350 }, { "epoch": 1.1162655117847922, "grad_norm": 0.486328125, "learning_rate": 1.4739550064662927e-05, "loss": 4.0887, "step": 3351 }, { "epoch": 1.116598650787041, "grad_norm": 0.5, "learning_rate": 1.4739338563182959e-05, "loss": 4.1675, "step": 3352 }, { "epoch": 1.1169317897892896, "grad_norm": 0.50390625, "learning_rate": 1.473912697738038e-05, "loss": 4.0895, "step": 3353 }, { "epoch": 1.1172649287915384, "grad_norm": 0.50390625, "learning_rate": 1.4738915307257649e-05, "loss": 4.147, "step": 3354 }, { "epoch": 1.117598067793787, "grad_norm": 0.515625, "learning_rate": 1.4738703552817234e-05, "loss": 4.1174, "step": 3355 }, { "epoch": 1.1179312067960356, "grad_norm": 0.49609375, "learning_rate": 1.47384917140616e-05, "loss": 4.0994, "step": 3356 }, { "epoch": 1.1182643457982844, "grad_norm": 0.5078125, "learning_rate": 1.473827979099322e-05, "loss": 4.058, "step": 3357 }, { "epoch": 1.1185974848005331, "grad_norm": 0.51171875, "learning_rate": 1.4738067783614557e-05, "loss": 4.0517, "step": 3358 }, { "epoch": 1.1189306238027816, "grad_norm": 0.5, "learning_rate": 1.473785569192808e-05, "loss": 4.0887, "step": 3359 }, { "epoch": 1.1192637628050304, "grad_norm": 0.51171875, "learning_rate": 1.4737643515936264e-05, "loss": 4.1044, "step": 3360 }, { "epoch": 1.119596901807279, "grad_norm": 0.51953125, "learning_rate": 1.4737431255641578e-05, "loss": 4.0738, "step": 3361 }, { "epoch": 1.1199300408095278, "grad_norm": 0.5, "learning_rate": 1.4737218911046491e-05, "loss": 4.1272, "step": 3362 }, { "epoch": 1.1202631798117764, "grad_norm": 0.5, "learning_rate": 1.4737006482153482e-05, "loss": 4.0956, "step": 3363 }, { "epoch": 1.120596318814025, "grad_norm": 0.515625, "learning_rate": 1.4736793968965022e-05, "loss": 4.0805, "step": 3364 }, { "epoch": 1.1209294578162738, "grad_norm": 0.5, "learning_rate": 1.4736581371483587e-05, "loss": 4.0584, "step": 3365 }, { "epoch": 1.1212625968185226, "grad_norm": 0.5, "learning_rate": 1.4736368689711654e-05, "loss": 4.059, "step": 3366 }, { "epoch": 1.121595735820771, "grad_norm": 0.4765625, "learning_rate": 1.47361559236517e-05, "loss": 4.0697, "step": 3367 }, { "epoch": 1.1219288748230198, "grad_norm": 0.490234375, "learning_rate": 1.47359430733062e-05, "loss": 4.0596, "step": 3368 }, { "epoch": 1.1222620138252686, "grad_norm": 0.48828125, "learning_rate": 1.473573013867764e-05, "loss": 4.1229, "step": 3369 }, { "epoch": 1.1225951528275173, "grad_norm": 0.5, "learning_rate": 1.4735517119768495e-05, "loss": 4.0813, "step": 3370 }, { "epoch": 1.122928291829766, "grad_norm": 0.498046875, "learning_rate": 1.4735304016581248e-05, "loss": 4.0488, "step": 3371 }, { "epoch": 1.1232614308320146, "grad_norm": 0.50390625, "learning_rate": 1.473509082911838e-05, "loss": 4.0882, "step": 3372 }, { "epoch": 1.1235945698342633, "grad_norm": 0.51171875, "learning_rate": 1.4734877557382374e-05, "loss": 4.0612, "step": 3373 }, { "epoch": 1.123927708836512, "grad_norm": 0.5, "learning_rate": 1.473466420137572e-05, "loss": 4.0291, "step": 3374 }, { "epoch": 1.1242608478387608, "grad_norm": 0.498046875, "learning_rate": 1.4734450761100893e-05, "loss": 4.1303, "step": 3375 }, { "epoch": 1.1245939868410093, "grad_norm": 0.51953125, "learning_rate": 1.4734237236560386e-05, "loss": 4.1141, "step": 3376 }, { "epoch": 1.124927125843258, "grad_norm": 0.490234375, "learning_rate": 1.4734023627756686e-05, "loss": 4.0595, "step": 3377 }, { "epoch": 1.1252602648455068, "grad_norm": 0.48828125, "learning_rate": 1.473380993469228e-05, "loss": 4.1339, "step": 3378 }, { "epoch": 1.1255934038477555, "grad_norm": 0.5078125, "learning_rate": 1.4733596157369653e-05, "loss": 4.0859, "step": 3379 }, { "epoch": 1.1259265428500043, "grad_norm": 0.5078125, "learning_rate": 1.4733382295791303e-05, "loss": 4.1001, "step": 3380 }, { "epoch": 1.1262596818522528, "grad_norm": 0.498046875, "learning_rate": 1.4733168349959715e-05, "loss": 4.1328, "step": 3381 }, { "epoch": 1.1265928208545015, "grad_norm": 0.4765625, "learning_rate": 1.4732954319877382e-05, "loss": 4.0696, "step": 3382 }, { "epoch": 1.1269259598567503, "grad_norm": 0.50390625, "learning_rate": 1.4732740205546797e-05, "loss": 4.0933, "step": 3383 }, { "epoch": 1.127259098858999, "grad_norm": 0.52734375, "learning_rate": 1.4732526006970457e-05, "loss": 4.1128, "step": 3384 }, { "epoch": 1.1275922378612475, "grad_norm": 0.5078125, "learning_rate": 1.4732311724150852e-05, "loss": 4.0582, "step": 3385 }, { "epoch": 1.1279253768634963, "grad_norm": 0.51171875, "learning_rate": 1.4732097357090482e-05, "loss": 4.0729, "step": 3386 }, { "epoch": 1.128258515865745, "grad_norm": 0.50390625, "learning_rate": 1.4731882905791844e-05, "loss": 4.0179, "step": 3387 }, { "epoch": 1.1285916548679937, "grad_norm": 0.498046875, "learning_rate": 1.4731668370257433e-05, "loss": 4.0674, "step": 3388 }, { "epoch": 1.1289247938702425, "grad_norm": 0.52734375, "learning_rate": 1.4731453750489749e-05, "loss": 4.0397, "step": 3389 }, { "epoch": 1.129257932872491, "grad_norm": 0.5078125, "learning_rate": 1.4731239046491291e-05, "loss": 4.1384, "step": 3390 }, { "epoch": 1.1295910718747397, "grad_norm": 0.490234375, "learning_rate": 1.4731024258264565e-05, "loss": 4.1394, "step": 3391 }, { "epoch": 1.1299242108769885, "grad_norm": 0.49609375, "learning_rate": 1.4730809385812065e-05, "loss": 4.139, "step": 3392 }, { "epoch": 1.1302573498792372, "grad_norm": 0.4921875, "learning_rate": 1.4730594429136298e-05, "loss": 4.1273, "step": 3393 }, { "epoch": 1.1305904888814857, "grad_norm": 0.498046875, "learning_rate": 1.4730379388239767e-05, "loss": 4.0667, "step": 3394 }, { "epoch": 1.1309236278837345, "grad_norm": 0.515625, "learning_rate": 1.473016426312498e-05, "loss": 3.9924, "step": 3395 }, { "epoch": 1.1312567668859832, "grad_norm": 0.5, "learning_rate": 1.4729949053794436e-05, "loss": 4.0258, "step": 3396 }, { "epoch": 1.131589905888232, "grad_norm": 0.52734375, "learning_rate": 1.4729733760250648e-05, "loss": 4.0468, "step": 3397 }, { "epoch": 1.1319230448904805, "grad_norm": 0.50390625, "learning_rate": 1.4729518382496123e-05, "loss": 4.1631, "step": 3398 }, { "epoch": 1.1322561838927292, "grad_norm": 0.5, "learning_rate": 1.4729302920533364e-05, "loss": 4.1285, "step": 3399 }, { "epoch": 1.132589322894978, "grad_norm": 0.515625, "learning_rate": 1.4729087374364887e-05, "loss": 4.1326, "step": 3400 }, { "epoch": 1.1329224618972267, "grad_norm": 0.48828125, "learning_rate": 1.4728871743993198e-05, "loss": 4.0971, "step": 3401 }, { "epoch": 1.1332556008994752, "grad_norm": 0.48046875, "learning_rate": 1.4728656029420813e-05, "loss": 4.0445, "step": 3402 }, { "epoch": 1.133588739901724, "grad_norm": 0.51171875, "learning_rate": 1.4728440230650242e-05, "loss": 4.0727, "step": 3403 }, { "epoch": 1.1339218789039727, "grad_norm": 0.5078125, "learning_rate": 1.4728224347683999e-05, "loss": 4.0692, "step": 3404 }, { "epoch": 1.1342550179062214, "grad_norm": 0.5, "learning_rate": 1.4728008380524599e-05, "loss": 4.097, "step": 3405 }, { "epoch": 1.1345881569084701, "grad_norm": 0.53125, "learning_rate": 1.4727792329174557e-05, "loss": 4.0837, "step": 3406 }, { "epoch": 1.1349212959107187, "grad_norm": 0.5, "learning_rate": 1.4727576193636391e-05, "loss": 4.1452, "step": 3407 }, { "epoch": 1.1352544349129674, "grad_norm": 0.5, "learning_rate": 1.4727359973912615e-05, "loss": 4.185, "step": 3408 }, { "epoch": 1.1355875739152161, "grad_norm": 0.52734375, "learning_rate": 1.4727143670005749e-05, "loss": 3.999, "step": 3409 }, { "epoch": 1.1359207129174649, "grad_norm": 0.53515625, "learning_rate": 1.4726927281918316e-05, "loss": 4.0723, "step": 3410 }, { "epoch": 1.1362538519197134, "grad_norm": 0.494140625, "learning_rate": 1.472671080965283e-05, "loss": 4.0774, "step": 3411 }, { "epoch": 1.1365869909219621, "grad_norm": 0.5078125, "learning_rate": 1.4726494253211822e-05, "loss": 4.0904, "step": 3412 }, { "epoch": 1.1369201299242109, "grad_norm": 0.49609375, "learning_rate": 1.4726277612597804e-05, "loss": 4.1142, "step": 3413 }, { "epoch": 1.1372532689264596, "grad_norm": 0.515625, "learning_rate": 1.4726060887813305e-05, "loss": 3.9639, "step": 3414 }, { "epoch": 1.1375864079287084, "grad_norm": 0.50390625, "learning_rate": 1.4725844078860849e-05, "loss": 4.1281, "step": 3415 }, { "epoch": 1.1379195469309569, "grad_norm": 0.47265625, "learning_rate": 1.472562718574296e-05, "loss": 4.1127, "step": 3416 }, { "epoch": 1.1382526859332056, "grad_norm": 0.48828125, "learning_rate": 1.4725410208462166e-05, "loss": 4.0693, "step": 3417 }, { "epoch": 1.1385858249354543, "grad_norm": 0.50390625, "learning_rate": 1.4725193147020991e-05, "loss": 4.0741, "step": 3418 }, { "epoch": 1.138918963937703, "grad_norm": 0.5, "learning_rate": 1.4724976001421967e-05, "loss": 4.1735, "step": 3419 }, { "epoch": 1.1392521029399516, "grad_norm": 0.498046875, "learning_rate": 1.4724758771667622e-05, "loss": 4.0564, "step": 3420 }, { "epoch": 1.1395852419422003, "grad_norm": 0.5, "learning_rate": 1.4724541457760486e-05, "loss": 4.0963, "step": 3421 }, { "epoch": 1.139918380944449, "grad_norm": 0.4921875, "learning_rate": 1.472432405970309e-05, "loss": 4.0501, "step": 3422 }, { "epoch": 1.1402515199466978, "grad_norm": 0.52734375, "learning_rate": 1.4724106577497965e-05, "loss": 4.0459, "step": 3423 }, { "epoch": 1.1405846589489466, "grad_norm": 0.5078125, "learning_rate": 1.472388901114765e-05, "loss": 4.1244, "step": 3424 }, { "epoch": 1.140917797951195, "grad_norm": 0.5078125, "learning_rate": 1.472367136065467e-05, "loss": 4.0691, "step": 3425 }, { "epoch": 1.1412509369534438, "grad_norm": 0.5078125, "learning_rate": 1.4723453626021567e-05, "loss": 4.1206, "step": 3426 }, { "epoch": 1.1415840759556926, "grad_norm": 0.5, "learning_rate": 1.4723235807250877e-05, "loss": 4.0743, "step": 3427 }, { "epoch": 1.1419172149579413, "grad_norm": 0.5, "learning_rate": 1.4723017904345134e-05, "loss": 4.0906, "step": 3428 }, { "epoch": 1.1422503539601898, "grad_norm": 0.51953125, "learning_rate": 1.4722799917306878e-05, "loss": 4.0098, "step": 3429 }, { "epoch": 1.1425834929624386, "grad_norm": 0.52734375, "learning_rate": 1.4722581846138646e-05, "loss": 4.1532, "step": 3430 }, { "epoch": 1.1429166319646873, "grad_norm": 0.51953125, "learning_rate": 1.472236369084298e-05, "loss": 4.0614, "step": 3431 }, { "epoch": 1.143249770966936, "grad_norm": 0.5078125, "learning_rate": 1.472214545142242e-05, "loss": 4.0349, "step": 3432 }, { "epoch": 1.1435829099691845, "grad_norm": 0.48046875, "learning_rate": 1.4721927127879511e-05, "loss": 4.113, "step": 3433 }, { "epoch": 1.1439160489714333, "grad_norm": 0.5234375, "learning_rate": 1.4721708720216792e-05, "loss": 4.0584, "step": 3434 }, { "epoch": 1.144249187973682, "grad_norm": 0.51171875, "learning_rate": 1.4721490228436809e-05, "loss": 4.1272, "step": 3435 }, { "epoch": 1.1445823269759308, "grad_norm": 0.494140625, "learning_rate": 1.4721271652542109e-05, "loss": 4.0865, "step": 3436 }, { "epoch": 1.1449154659781793, "grad_norm": 0.515625, "learning_rate": 1.4721052992535231e-05, "loss": 4.1336, "step": 3437 }, { "epoch": 1.145248604980428, "grad_norm": 0.51953125, "learning_rate": 1.4720834248418728e-05, "loss": 4.0261, "step": 3438 }, { "epoch": 1.1455817439826768, "grad_norm": 0.51953125, "learning_rate": 1.472061542019515e-05, "loss": 4.0632, "step": 3439 }, { "epoch": 1.1459148829849255, "grad_norm": 0.484375, "learning_rate": 1.4720396507867038e-05, "loss": 4.0643, "step": 3440 }, { "epoch": 1.1462480219871742, "grad_norm": 0.51953125, "learning_rate": 1.4720177511436947e-05, "loss": 4.0953, "step": 3441 }, { "epoch": 1.1465811609894228, "grad_norm": 0.515625, "learning_rate": 1.4719958430907427e-05, "loss": 4.0934, "step": 3442 }, { "epoch": 1.1469142999916715, "grad_norm": 0.51171875, "learning_rate": 1.471973926628103e-05, "loss": 4.0721, "step": 3443 }, { "epoch": 1.1472474389939202, "grad_norm": 0.48828125, "learning_rate": 1.4719520017560308e-05, "loss": 4.085, "step": 3444 }, { "epoch": 1.147580577996169, "grad_norm": 0.490234375, "learning_rate": 1.4719300684747816e-05, "loss": 4.0335, "step": 3445 }, { "epoch": 1.1479137169984175, "grad_norm": 0.53515625, "learning_rate": 1.4719081267846106e-05, "loss": 4.1147, "step": 3446 }, { "epoch": 1.1482468560006662, "grad_norm": 0.5078125, "learning_rate": 1.4718861766857737e-05, "loss": 4.1404, "step": 3447 }, { "epoch": 1.148579995002915, "grad_norm": 0.50390625, "learning_rate": 1.4718642181785263e-05, "loss": 4.0723, "step": 3448 }, { "epoch": 1.1489131340051637, "grad_norm": 0.51171875, "learning_rate": 1.4718422512631246e-05, "loss": 4.0762, "step": 3449 }, { "epoch": 1.1492462730074124, "grad_norm": 0.494140625, "learning_rate": 1.4718202759398239e-05, "loss": 4.1321, "step": 3450 }, { "epoch": 1.149579412009661, "grad_norm": 0.5078125, "learning_rate": 1.4717982922088806e-05, "loss": 4.1537, "step": 3451 }, { "epoch": 1.1499125510119097, "grad_norm": 0.5078125, "learning_rate": 1.4717763000705508e-05, "loss": 4.0537, "step": 3452 }, { "epoch": 1.1502456900141584, "grad_norm": 0.50390625, "learning_rate": 1.4717542995250902e-05, "loss": 4.0664, "step": 3453 }, { "epoch": 1.1505788290164072, "grad_norm": 0.478515625, "learning_rate": 1.4717322905727556e-05, "loss": 4.1481, "step": 3454 }, { "epoch": 1.1509119680186557, "grad_norm": 0.51953125, "learning_rate": 1.4717102732138026e-05, "loss": 4.1096, "step": 3455 }, { "epoch": 1.1512451070209044, "grad_norm": 0.5390625, "learning_rate": 1.4716882474484886e-05, "loss": 4.0933, "step": 3456 }, { "epoch": 1.1515782460231532, "grad_norm": 0.515625, "learning_rate": 1.4716662132770694e-05, "loss": 3.9841, "step": 3457 }, { "epoch": 1.151911385025402, "grad_norm": 0.53125, "learning_rate": 1.4716441706998019e-05, "loss": 4.0243, "step": 3458 }, { "epoch": 1.1522445240276507, "grad_norm": 0.51171875, "learning_rate": 1.4716221197169433e-05, "loss": 4.0453, "step": 3459 }, { "epoch": 1.1525776630298992, "grad_norm": 0.50390625, "learning_rate": 1.4716000603287497e-05, "loss": 4.1275, "step": 3460 }, { "epoch": 1.152910802032148, "grad_norm": 0.48828125, "learning_rate": 1.4715779925354784e-05, "loss": 4.0776, "step": 3461 }, { "epoch": 1.1532439410343966, "grad_norm": 0.5078125, "learning_rate": 1.4715559163373865e-05, "loss": 4.0733, "step": 3462 }, { "epoch": 1.1535770800366454, "grad_norm": 0.52734375, "learning_rate": 1.4715338317347311e-05, "loss": 4.0726, "step": 3463 }, { "epoch": 1.153910219038894, "grad_norm": 0.53515625, "learning_rate": 1.4715117387277693e-05, "loss": 3.9996, "step": 3464 }, { "epoch": 1.1542433580411426, "grad_norm": 0.51171875, "learning_rate": 1.4714896373167586e-05, "loss": 4.0548, "step": 3465 }, { "epoch": 1.1545764970433914, "grad_norm": 0.53515625, "learning_rate": 1.4714675275019563e-05, "loss": 4.0906, "step": 3466 }, { "epoch": 1.1549096360456401, "grad_norm": 0.51171875, "learning_rate": 1.4714454092836202e-05, "loss": 4.1139, "step": 3467 }, { "epoch": 1.1552427750478886, "grad_norm": 0.51953125, "learning_rate": 1.4714232826620075e-05, "loss": 4.0437, "step": 3468 }, { "epoch": 1.1555759140501374, "grad_norm": 0.5, "learning_rate": 1.4714011476373762e-05, "loss": 4.0863, "step": 3469 }, { "epoch": 1.1559090530523861, "grad_norm": 0.5078125, "learning_rate": 1.471379004209984e-05, "loss": 4.131, "step": 3470 }, { "epoch": 1.1562421920546349, "grad_norm": 0.53125, "learning_rate": 1.4713568523800892e-05, "loss": 4.0795, "step": 3471 }, { "epoch": 1.1565753310568834, "grad_norm": 0.51953125, "learning_rate": 1.4713346921479492e-05, "loss": 4.1322, "step": 3472 }, { "epoch": 1.156908470059132, "grad_norm": 0.51171875, "learning_rate": 1.4713125235138226e-05, "loss": 4.1177, "step": 3473 }, { "epoch": 1.1572416090613808, "grad_norm": 0.5, "learning_rate": 1.4712903464779674e-05, "loss": 4.0686, "step": 3474 }, { "epoch": 1.1575747480636296, "grad_norm": 0.52734375, "learning_rate": 1.4712681610406421e-05, "loss": 4.0573, "step": 3475 }, { "epoch": 1.1579078870658783, "grad_norm": 0.515625, "learning_rate": 1.471245967202105e-05, "loss": 4.0874, "step": 3476 }, { "epoch": 1.1582410260681268, "grad_norm": 0.5078125, "learning_rate": 1.4712237649626145e-05, "loss": 4.1799, "step": 3477 }, { "epoch": 1.1585741650703756, "grad_norm": 0.53515625, "learning_rate": 1.4712015543224293e-05, "loss": 4.0486, "step": 3478 }, { "epoch": 1.1589073040726243, "grad_norm": 0.478515625, "learning_rate": 1.4711793352818082e-05, "loss": 4.1596, "step": 3479 }, { "epoch": 1.159240443074873, "grad_norm": 0.515625, "learning_rate": 1.4711571078410098e-05, "loss": 4.1535, "step": 3480 }, { "epoch": 1.1595735820771216, "grad_norm": 0.515625, "learning_rate": 1.4711348720002932e-05, "loss": 4.0593, "step": 3481 }, { "epoch": 1.1599067210793703, "grad_norm": 0.5078125, "learning_rate": 1.4711126277599173e-05, "loss": 4.1011, "step": 3482 }, { "epoch": 1.160239860081619, "grad_norm": 0.498046875, "learning_rate": 1.4710903751201412e-05, "loss": 4.1563, "step": 3483 }, { "epoch": 1.1605729990838678, "grad_norm": 0.494140625, "learning_rate": 1.471068114081224e-05, "loss": 4.0777, "step": 3484 }, { "epoch": 1.1609061380861165, "grad_norm": 0.5546875, "learning_rate": 1.4710458446434254e-05, "loss": 4.0861, "step": 3485 }, { "epoch": 1.161239277088365, "grad_norm": 0.5234375, "learning_rate": 1.4710235668070042e-05, "loss": 4.0157, "step": 3486 }, { "epoch": 1.1615724160906138, "grad_norm": 0.5234375, "learning_rate": 1.4710012805722201e-05, "loss": 4.0457, "step": 3487 }, { "epoch": 1.1619055550928625, "grad_norm": 0.54296875, "learning_rate": 1.4709789859393332e-05, "loss": 4.0134, "step": 3488 }, { "epoch": 1.1622386940951113, "grad_norm": 0.546875, "learning_rate": 1.4709566829086024e-05, "loss": 3.998, "step": 3489 }, { "epoch": 1.1625718330973598, "grad_norm": 0.515625, "learning_rate": 1.4709343714802879e-05, "loss": 4.1241, "step": 3490 }, { "epoch": 1.1629049720996085, "grad_norm": 0.51953125, "learning_rate": 1.4709120516546496e-05, "loss": 4.1238, "step": 3491 }, { "epoch": 1.1632381111018573, "grad_norm": 0.5390625, "learning_rate": 1.4708897234319474e-05, "loss": 4.0753, "step": 3492 }, { "epoch": 1.163571250104106, "grad_norm": 0.5, "learning_rate": 1.4708673868124413e-05, "loss": 4.0604, "step": 3493 }, { "epoch": 1.1639043891063547, "grad_norm": 0.5078125, "learning_rate": 1.4708450417963915e-05, "loss": 4.0588, "step": 3494 }, { "epoch": 1.1642375281086033, "grad_norm": 0.50390625, "learning_rate": 1.4708226883840585e-05, "loss": 4.1615, "step": 3495 }, { "epoch": 1.164570667110852, "grad_norm": 0.5078125, "learning_rate": 1.4708003265757023e-05, "loss": 4.1117, "step": 3496 }, { "epoch": 1.1649038061131007, "grad_norm": 0.51953125, "learning_rate": 1.4707779563715837e-05, "loss": 4.0422, "step": 3497 }, { "epoch": 1.1652369451153495, "grad_norm": 0.50390625, "learning_rate": 1.470755577771963e-05, "loss": 4.1318, "step": 3498 }, { "epoch": 1.165570084117598, "grad_norm": 0.5078125, "learning_rate": 1.4707331907771007e-05, "loss": 4.1151, "step": 3499 }, { "epoch": 1.1659032231198467, "grad_norm": 0.515625, "learning_rate": 1.470710795387258e-05, "loss": 4.1064, "step": 3500 }, { "epoch": 1.1662363621220955, "grad_norm": 0.48046875, "learning_rate": 1.4706883916026959e-05, "loss": 4.1519, "step": 3501 }, { "epoch": 1.1665695011243442, "grad_norm": 0.4921875, "learning_rate": 1.4706659794236749e-05, "loss": 4.126, "step": 3502 }, { "epoch": 1.1669026401265927, "grad_norm": 0.515625, "learning_rate": 1.4706435588504561e-05, "loss": 4.0718, "step": 3503 }, { "epoch": 1.1672357791288415, "grad_norm": 0.51171875, "learning_rate": 1.4706211298833007e-05, "loss": 4.0484, "step": 3504 }, { "epoch": 1.1675689181310902, "grad_norm": 0.5234375, "learning_rate": 1.4705986925224701e-05, "loss": 4.1176, "step": 3505 }, { "epoch": 1.167902057133339, "grad_norm": 0.48046875, "learning_rate": 1.4705762467682256e-05, "loss": 4.127, "step": 3506 }, { "epoch": 1.1682351961355875, "grad_norm": 0.51953125, "learning_rate": 1.4705537926208284e-05, "loss": 4.1501, "step": 3507 }, { "epoch": 1.1685683351378362, "grad_norm": 0.51171875, "learning_rate": 1.4705313300805405e-05, "loss": 4.0765, "step": 3508 }, { "epoch": 1.168901474140085, "grad_norm": 0.50390625, "learning_rate": 1.470508859147623e-05, "loss": 4.0783, "step": 3509 }, { "epoch": 1.1692346131423337, "grad_norm": 0.51953125, "learning_rate": 1.4704863798223383e-05, "loss": 4.0219, "step": 3510 }, { "epoch": 1.1695677521445824, "grad_norm": 0.5078125, "learning_rate": 1.4704638921049473e-05, "loss": 4.1074, "step": 3511 }, { "epoch": 1.169900891146831, "grad_norm": 0.50390625, "learning_rate": 1.470441395995713e-05, "loss": 4.0674, "step": 3512 }, { "epoch": 1.1702340301490797, "grad_norm": 0.486328125, "learning_rate": 1.4704188914948965e-05, "loss": 4.1417, "step": 3513 }, { "epoch": 1.1705671691513284, "grad_norm": 0.515625, "learning_rate": 1.4703963786027606e-05, "loss": 4.1449, "step": 3514 }, { "epoch": 1.1709003081535772, "grad_norm": 0.5234375, "learning_rate": 1.4703738573195673e-05, "loss": 4.0756, "step": 3515 }, { "epoch": 1.1712334471558257, "grad_norm": 0.50390625, "learning_rate": 1.4703513276455787e-05, "loss": 4.0835, "step": 3516 }, { "epoch": 1.1715665861580744, "grad_norm": 0.4921875, "learning_rate": 1.4703287895810574e-05, "loss": 4.095, "step": 3517 }, { "epoch": 1.1718997251603231, "grad_norm": 0.50390625, "learning_rate": 1.4703062431262659e-05, "loss": 4.1304, "step": 3518 }, { "epoch": 1.1722328641625719, "grad_norm": 0.50390625, "learning_rate": 1.4702836882814671e-05, "loss": 4.0472, "step": 3519 }, { "epoch": 1.1725660031648206, "grad_norm": 0.5078125, "learning_rate": 1.4702611250469234e-05, "loss": 4.0819, "step": 3520 }, { "epoch": 1.1728991421670691, "grad_norm": 0.5078125, "learning_rate": 1.4702385534228974e-05, "loss": 4.056, "step": 3521 }, { "epoch": 1.1732322811693179, "grad_norm": 0.486328125, "learning_rate": 1.4702159734096526e-05, "loss": 4.1144, "step": 3522 }, { "epoch": 1.1735654201715666, "grad_norm": 0.515625, "learning_rate": 1.4701933850074516e-05, "loss": 4.1152, "step": 3523 }, { "epoch": 1.1738985591738154, "grad_norm": 0.490234375, "learning_rate": 1.4701707882165574e-05, "loss": 4.1695, "step": 3524 }, { "epoch": 1.1742316981760639, "grad_norm": 0.490234375, "learning_rate": 1.4701481830372337e-05, "loss": 4.0956, "step": 3525 }, { "epoch": 1.1745648371783126, "grad_norm": 0.5078125, "learning_rate": 1.4701255694697436e-05, "loss": 4.1145, "step": 3526 }, { "epoch": 1.1748979761805614, "grad_norm": 0.5, "learning_rate": 1.47010294751435e-05, "loss": 4.0478, "step": 3527 }, { "epoch": 1.17523111518281, "grad_norm": 0.515625, "learning_rate": 1.470080317171317e-05, "loss": 4.1059, "step": 3528 }, { "epoch": 1.1755642541850588, "grad_norm": 0.53125, "learning_rate": 1.4700576784409081e-05, "loss": 4.072, "step": 3529 }, { "epoch": 1.1758973931873073, "grad_norm": 0.5234375, "learning_rate": 1.4700350313233869e-05, "loss": 4.1013, "step": 3530 }, { "epoch": 1.176230532189556, "grad_norm": 0.486328125, "learning_rate": 1.470012375819017e-05, "loss": 4.0632, "step": 3531 }, { "epoch": 1.1765636711918048, "grad_norm": 0.50390625, "learning_rate": 1.4699897119280624e-05, "loss": 4.0724, "step": 3532 }, { "epoch": 1.1768968101940533, "grad_norm": 0.53125, "learning_rate": 1.4699670396507873e-05, "loss": 4.0557, "step": 3533 }, { "epoch": 1.177229949196302, "grad_norm": 0.51953125, "learning_rate": 1.4699443589874555e-05, "loss": 4.1407, "step": 3534 }, { "epoch": 1.1775630881985508, "grad_norm": 0.515625, "learning_rate": 1.4699216699383316e-05, "loss": 4.0922, "step": 3535 }, { "epoch": 1.1778962272007996, "grad_norm": 0.51171875, "learning_rate": 1.4698989725036794e-05, "loss": 4.0935, "step": 3536 }, { "epoch": 1.1782293662030483, "grad_norm": 0.515625, "learning_rate": 1.4698762666837635e-05, "loss": 4.116, "step": 3537 }, { "epoch": 1.1785625052052968, "grad_norm": 0.5078125, "learning_rate": 1.4698535524788484e-05, "loss": 4.0525, "step": 3538 }, { "epoch": 1.1788956442075456, "grad_norm": 0.515625, "learning_rate": 1.4698308298891986e-05, "loss": 4.0555, "step": 3539 }, { "epoch": 1.1792287832097943, "grad_norm": 0.52734375, "learning_rate": 1.4698080989150786e-05, "loss": 4.0418, "step": 3540 }, { "epoch": 1.179561922212043, "grad_norm": 0.51953125, "learning_rate": 1.4697853595567534e-05, "loss": 4.0107, "step": 3541 }, { "epoch": 1.1798950612142916, "grad_norm": 0.49609375, "learning_rate": 1.469762611814488e-05, "loss": 4.1443, "step": 3542 }, { "epoch": 1.1802282002165403, "grad_norm": 0.52734375, "learning_rate": 1.4697398556885473e-05, "loss": 4.1519, "step": 3543 }, { "epoch": 1.180561339218789, "grad_norm": 0.515625, "learning_rate": 1.4697170911791959e-05, "loss": 4.0894, "step": 3544 }, { "epoch": 1.1808944782210378, "grad_norm": 0.484375, "learning_rate": 1.4696943182866992e-05, "loss": 4.1268, "step": 3545 }, { "epoch": 1.1812276172232865, "grad_norm": 0.498046875, "learning_rate": 1.4696715370113227e-05, "loss": 4.1123, "step": 3546 }, { "epoch": 1.181560756225535, "grad_norm": 0.5, "learning_rate": 1.4696487473533317e-05, "loss": 4.1641, "step": 3547 }, { "epoch": 1.1818938952277838, "grad_norm": 0.5078125, "learning_rate": 1.4696259493129915e-05, "loss": 4.0615, "step": 3548 }, { "epoch": 1.1822270342300325, "grad_norm": 0.490234375, "learning_rate": 1.4696031428905678e-05, "loss": 4.0497, "step": 3549 }, { "epoch": 1.1825601732322812, "grad_norm": 0.5078125, "learning_rate": 1.469580328086326e-05, "loss": 4.0965, "step": 3550 }, { "epoch": 1.1828933122345298, "grad_norm": 0.51953125, "learning_rate": 1.469557504900532e-05, "loss": 4.0863, "step": 3551 }, { "epoch": 1.1832264512367785, "grad_norm": 0.50390625, "learning_rate": 1.4695346733334516e-05, "loss": 4.0837, "step": 3552 }, { "epoch": 1.1835595902390272, "grad_norm": 0.5078125, "learning_rate": 1.4695118333853508e-05, "loss": 4.1061, "step": 3553 }, { "epoch": 1.183892729241276, "grad_norm": 0.53515625, "learning_rate": 1.4694889850564958e-05, "loss": 4.0655, "step": 3554 }, { "epoch": 1.1842258682435247, "grad_norm": 0.48828125, "learning_rate": 1.4694661283471522e-05, "loss": 4.0847, "step": 3555 }, { "epoch": 1.1845590072457732, "grad_norm": 0.5078125, "learning_rate": 1.4694432632575869e-05, "loss": 4.0852, "step": 3556 }, { "epoch": 1.184892146248022, "grad_norm": 0.498046875, "learning_rate": 1.4694203897880656e-05, "loss": 4.1197, "step": 3557 }, { "epoch": 1.1852252852502707, "grad_norm": 0.51171875, "learning_rate": 1.469397507938855e-05, "loss": 4.1446, "step": 3558 }, { "epoch": 1.1855584242525194, "grad_norm": 0.51171875, "learning_rate": 1.4693746177102218e-05, "loss": 4.1106, "step": 3559 }, { "epoch": 1.185891563254768, "grad_norm": 0.546875, "learning_rate": 1.4693517191024324e-05, "loss": 4.1148, "step": 3560 }, { "epoch": 1.1862247022570167, "grad_norm": 0.5078125, "learning_rate": 1.4693288121157537e-05, "loss": 4.0672, "step": 3561 }, { "epoch": 1.1865578412592654, "grad_norm": 0.486328125, "learning_rate": 1.4693058967504524e-05, "loss": 4.1215, "step": 3562 }, { "epoch": 1.1868909802615142, "grad_norm": 0.50390625, "learning_rate": 1.4692829730067953e-05, "loss": 4.1914, "step": 3563 }, { "epoch": 1.187224119263763, "grad_norm": 0.498046875, "learning_rate": 1.4692600408850495e-05, "loss": 4.059, "step": 3564 }, { "epoch": 1.1875572582660114, "grad_norm": 0.52734375, "learning_rate": 1.4692371003854822e-05, "loss": 4.0926, "step": 3565 }, { "epoch": 1.1878903972682602, "grad_norm": 0.50390625, "learning_rate": 1.4692141515083607e-05, "loss": 4.0961, "step": 3566 }, { "epoch": 1.188223536270509, "grad_norm": 0.515625, "learning_rate": 1.4691911942539521e-05, "loss": 4.0507, "step": 3567 }, { "epoch": 1.1885566752727574, "grad_norm": 0.52734375, "learning_rate": 1.4691682286225236e-05, "loss": 4.0021, "step": 3568 }, { "epoch": 1.1888898142750062, "grad_norm": 0.515625, "learning_rate": 1.4691452546143433e-05, "loss": 4.1078, "step": 3569 }, { "epoch": 1.189222953277255, "grad_norm": 0.53515625, "learning_rate": 1.4691222722296781e-05, "loss": 4.0073, "step": 3570 }, { "epoch": 1.1895560922795037, "grad_norm": 0.5078125, "learning_rate": 1.4690992814687963e-05, "loss": 4.1105, "step": 3571 }, { "epoch": 1.1898892312817524, "grad_norm": 0.51953125, "learning_rate": 1.4690762823319653e-05, "loss": 4.134, "step": 3572 }, { "epoch": 1.190222370284001, "grad_norm": 0.51171875, "learning_rate": 1.4690532748194531e-05, "loss": 4.0828, "step": 3573 }, { "epoch": 1.1905555092862496, "grad_norm": 0.498046875, "learning_rate": 1.4690302589315278e-05, "loss": 4.0759, "step": 3574 }, { "epoch": 1.1908886482884984, "grad_norm": 0.5234375, "learning_rate": 1.4690072346684575e-05, "loss": 4.0667, "step": 3575 }, { "epoch": 1.1912217872907471, "grad_norm": 0.5, "learning_rate": 1.4689842020305102e-05, "loss": 4.0598, "step": 3576 }, { "epoch": 1.1915549262929956, "grad_norm": 0.486328125, "learning_rate": 1.468961161017954e-05, "loss": 4.0578, "step": 3577 }, { "epoch": 1.1918880652952444, "grad_norm": 0.53515625, "learning_rate": 1.4689381116310578e-05, "loss": 4.0978, "step": 3578 }, { "epoch": 1.1922212042974931, "grad_norm": 0.5078125, "learning_rate": 1.46891505387009e-05, "loss": 4.1158, "step": 3579 }, { "epoch": 1.1925543432997419, "grad_norm": 0.51953125, "learning_rate": 1.4688919877353187e-05, "loss": 4.0767, "step": 3580 }, { "epoch": 1.1928874823019906, "grad_norm": 0.5078125, "learning_rate": 1.468868913227013e-05, "loss": 4.1252, "step": 3581 }, { "epoch": 1.1932206213042391, "grad_norm": 0.51171875, "learning_rate": 1.4688458303454415e-05, "loss": 4.1182, "step": 3582 }, { "epoch": 1.1935537603064879, "grad_norm": 0.51171875, "learning_rate": 1.468822739090873e-05, "loss": 4.0569, "step": 3583 }, { "epoch": 1.1938868993087366, "grad_norm": 0.50390625, "learning_rate": 1.4687996394635765e-05, "loss": 4.1392, "step": 3584 }, { "epoch": 1.1942200383109853, "grad_norm": 0.53125, "learning_rate": 1.4687765314638214e-05, "loss": 4.1022, "step": 3585 }, { "epoch": 1.1945531773132338, "grad_norm": 0.52734375, "learning_rate": 1.4687534150918765e-05, "loss": 4.0765, "step": 3586 }, { "epoch": 1.1948863163154826, "grad_norm": 0.53515625, "learning_rate": 1.468730290348011e-05, "loss": 4.0407, "step": 3587 }, { "epoch": 1.1952194553177313, "grad_norm": 0.49609375, "learning_rate": 1.4687071572324946e-05, "loss": 4.1119, "step": 3588 }, { "epoch": 1.19555259431998, "grad_norm": 0.51953125, "learning_rate": 1.468684015745596e-05, "loss": 4.0598, "step": 3589 }, { "epoch": 1.1958857333222288, "grad_norm": 0.49609375, "learning_rate": 1.4686608658875858e-05, "loss": 4.0906, "step": 3590 }, { "epoch": 1.1962188723244773, "grad_norm": 0.478515625, "learning_rate": 1.468637707658733e-05, "loss": 4.2157, "step": 3591 }, { "epoch": 1.196552011326726, "grad_norm": 0.515625, "learning_rate": 1.4686145410593075e-05, "loss": 4.0945, "step": 3592 }, { "epoch": 1.1968851503289748, "grad_norm": 0.515625, "learning_rate": 1.4685913660895788e-05, "loss": 4.0593, "step": 3593 }, { "epoch": 1.1972182893312235, "grad_norm": 0.51171875, "learning_rate": 1.4685681827498176e-05, "loss": 4.0926, "step": 3594 }, { "epoch": 1.197551428333472, "grad_norm": 0.5078125, "learning_rate": 1.4685449910402934e-05, "loss": 4.0832, "step": 3595 }, { "epoch": 1.1978845673357208, "grad_norm": 0.4921875, "learning_rate": 1.4685217909612762e-05, "loss": 4.1337, "step": 3596 }, { "epoch": 1.1982177063379695, "grad_norm": 0.50390625, "learning_rate": 1.4684985825130367e-05, "loss": 4.0158, "step": 3597 }, { "epoch": 1.1985508453402183, "grad_norm": 0.5078125, "learning_rate": 1.4684753656958447e-05, "loss": 4.1143, "step": 3598 }, { "epoch": 1.198883984342467, "grad_norm": 0.5, "learning_rate": 1.4684521405099711e-05, "loss": 4.1034, "step": 3599 }, { "epoch": 1.1992171233447155, "grad_norm": 0.5078125, "learning_rate": 1.4684289069556863e-05, "loss": 4.1191, "step": 3600 }, { "epoch": 1.1995502623469643, "grad_norm": 0.5234375, "learning_rate": 1.4684056650332607e-05, "loss": 4.0601, "step": 3601 }, { "epoch": 1.199883401349213, "grad_norm": 0.5234375, "learning_rate": 1.4683824147429653e-05, "loss": 4.1126, "step": 3602 }, { "epoch": 1.2002165403514615, "grad_norm": 0.484375, "learning_rate": 1.4683591560850706e-05, "loss": 4.1484, "step": 3603 }, { "epoch": 1.2005496793537103, "grad_norm": 0.5, "learning_rate": 1.4683358890598478e-05, "loss": 4.1271, "step": 3604 }, { "epoch": 1.200882818355959, "grad_norm": 0.5, "learning_rate": 1.4683126136675676e-05, "loss": 4.078, "step": 3605 }, { "epoch": 1.2012159573582077, "grad_norm": 0.51171875, "learning_rate": 1.4682893299085017e-05, "loss": 4.0964, "step": 3606 }, { "epoch": 1.2015490963604565, "grad_norm": 0.5234375, "learning_rate": 1.4682660377829205e-05, "loss": 4.1047, "step": 3607 }, { "epoch": 1.201882235362705, "grad_norm": 0.49609375, "learning_rate": 1.468242737291096e-05, "loss": 4.0563, "step": 3608 }, { "epoch": 1.2022153743649537, "grad_norm": 0.494140625, "learning_rate": 1.4682194284332994e-05, "loss": 4.1437, "step": 3609 }, { "epoch": 1.2025485133672025, "grad_norm": 0.51953125, "learning_rate": 1.468196111209802e-05, "loss": 4.0349, "step": 3610 }, { "epoch": 1.2028816523694512, "grad_norm": 0.5234375, "learning_rate": 1.4681727856208754e-05, "loss": 4.0785, "step": 3611 }, { "epoch": 1.2032147913716997, "grad_norm": 0.490234375, "learning_rate": 1.4681494516667916e-05, "loss": 4.1114, "step": 3612 }, { "epoch": 1.2035479303739485, "grad_norm": 0.51953125, "learning_rate": 1.468126109347822e-05, "loss": 4.0266, "step": 3613 }, { "epoch": 1.2038810693761972, "grad_norm": 0.49609375, "learning_rate": 1.468102758664239e-05, "loss": 4.1031, "step": 3614 }, { "epoch": 1.204214208378446, "grad_norm": 0.50390625, "learning_rate": 1.468079399616314e-05, "loss": 4.0221, "step": 3615 }, { "epoch": 1.2045473473806947, "grad_norm": 0.5390625, "learning_rate": 1.4680560322043192e-05, "loss": 4.0541, "step": 3616 }, { "epoch": 1.2048804863829432, "grad_norm": 0.515625, "learning_rate": 1.4680326564285272e-05, "loss": 4.0382, "step": 3617 }, { "epoch": 1.205213625385192, "grad_norm": 0.5, "learning_rate": 1.46800927228921e-05, "loss": 4.1087, "step": 3618 }, { "epoch": 1.2055467643874407, "grad_norm": 0.5234375, "learning_rate": 1.46798587978664e-05, "loss": 4.0925, "step": 3619 }, { "epoch": 1.2058799033896894, "grad_norm": 0.498046875, "learning_rate": 1.4679624789210895e-05, "loss": 4.0762, "step": 3620 }, { "epoch": 1.206213042391938, "grad_norm": 0.515625, "learning_rate": 1.4679390696928312e-05, "loss": 4.0399, "step": 3621 }, { "epoch": 1.2065461813941867, "grad_norm": 0.5234375, "learning_rate": 1.467915652102138e-05, "loss": 4.0383, "step": 3622 }, { "epoch": 1.2068793203964354, "grad_norm": 0.52734375, "learning_rate": 1.4678922261492824e-05, "loss": 4.1383, "step": 3623 }, { "epoch": 1.2072124593986842, "grad_norm": 0.51953125, "learning_rate": 1.4678687918345373e-05, "loss": 4.1879, "step": 3624 }, { "epoch": 1.207545598400933, "grad_norm": 0.51171875, "learning_rate": 1.4678453491581756e-05, "loss": 4.0193, "step": 3625 }, { "epoch": 1.2078787374031814, "grad_norm": 0.482421875, "learning_rate": 1.4678218981204705e-05, "loss": 4.135, "step": 3626 }, { "epoch": 1.2082118764054302, "grad_norm": 0.52734375, "learning_rate": 1.467798438721695e-05, "loss": 3.998, "step": 3627 }, { "epoch": 1.208545015407679, "grad_norm": 0.5078125, "learning_rate": 1.4677749709621226e-05, "loss": 4.1303, "step": 3628 }, { "epoch": 1.2088781544099276, "grad_norm": 0.5234375, "learning_rate": 1.4677514948420265e-05, "loss": 4.0986, "step": 3629 }, { "epoch": 1.2092112934121761, "grad_norm": 0.53125, "learning_rate": 1.4677280103616798e-05, "loss": 4.1235, "step": 3630 }, { "epoch": 1.2095444324144249, "grad_norm": 0.515625, "learning_rate": 1.4677045175213566e-05, "loss": 4.0214, "step": 3631 }, { "epoch": 1.2098775714166736, "grad_norm": 0.50390625, "learning_rate": 1.4676810163213304e-05, "loss": 4.1801, "step": 3632 }, { "epoch": 1.2102107104189224, "grad_norm": 0.51171875, "learning_rate": 1.4676575067618748e-05, "loss": 4.0448, "step": 3633 }, { "epoch": 1.210543849421171, "grad_norm": 0.515625, "learning_rate": 1.4676339888432637e-05, "loss": 4.0767, "step": 3634 }, { "epoch": 1.2108769884234196, "grad_norm": 0.515625, "learning_rate": 1.4676104625657711e-05, "loss": 4.0426, "step": 3635 }, { "epoch": 1.2112101274256684, "grad_norm": 0.4921875, "learning_rate": 1.4675869279296707e-05, "loss": 4.0554, "step": 3636 }, { "epoch": 1.211543266427917, "grad_norm": 0.5078125, "learning_rate": 1.467563384935237e-05, "loss": 4.0789, "step": 3637 }, { "epoch": 1.2118764054301656, "grad_norm": 0.55078125, "learning_rate": 1.4675398335827442e-05, "loss": 4.0957, "step": 3638 }, { "epoch": 1.2122095444324144, "grad_norm": 0.484375, "learning_rate": 1.4675162738724664e-05, "loss": 4.0672, "step": 3639 }, { "epoch": 1.212542683434663, "grad_norm": 0.484375, "learning_rate": 1.4674927058046781e-05, "loss": 4.0856, "step": 3640 }, { "epoch": 1.2128758224369118, "grad_norm": 0.5234375, "learning_rate": 1.467469129379654e-05, "loss": 4.0793, "step": 3641 }, { "epoch": 1.2132089614391606, "grad_norm": 0.51953125, "learning_rate": 1.4674455445976686e-05, "loss": 4.1432, "step": 3642 }, { "epoch": 1.213542100441409, "grad_norm": 0.515625, "learning_rate": 1.4674219514589966e-05, "loss": 4.0586, "step": 3643 }, { "epoch": 1.2138752394436578, "grad_norm": 0.5, "learning_rate": 1.4673983499639125e-05, "loss": 4.1228, "step": 3644 }, { "epoch": 1.2142083784459066, "grad_norm": 0.51953125, "learning_rate": 1.4673747401126918e-05, "loss": 4.1116, "step": 3645 }, { "epoch": 1.2145415174481553, "grad_norm": 0.494140625, "learning_rate": 1.4673511219056093e-05, "loss": 4.1134, "step": 3646 }, { "epoch": 1.2148746564504038, "grad_norm": 0.55078125, "learning_rate": 1.4673274953429397e-05, "loss": 4.0804, "step": 3647 }, { "epoch": 1.2152077954526526, "grad_norm": 0.515625, "learning_rate": 1.4673038604249587e-05, "loss": 4.1132, "step": 3648 }, { "epoch": 1.2155409344549013, "grad_norm": 0.515625, "learning_rate": 1.4672802171519413e-05, "loss": 4.0928, "step": 3649 }, { "epoch": 1.21587407345715, "grad_norm": 0.515625, "learning_rate": 1.4672565655241632e-05, "loss": 4.044, "step": 3650 }, { "epoch": 1.2162072124593988, "grad_norm": 0.51171875, "learning_rate": 1.4672329055418995e-05, "loss": 4.123, "step": 3651 }, { "epoch": 1.2165403514616473, "grad_norm": 0.466796875, "learning_rate": 1.467209237205426e-05, "loss": 4.1294, "step": 3652 }, { "epoch": 1.216873490463896, "grad_norm": 0.5078125, "learning_rate": 1.4671855605150183e-05, "loss": 4.0835, "step": 3653 }, { "epoch": 1.2172066294661448, "grad_norm": 0.53125, "learning_rate": 1.4671618754709525e-05, "loss": 4.0388, "step": 3654 }, { "epoch": 1.2175397684683935, "grad_norm": 0.5078125, "learning_rate": 1.4671381820735039e-05, "loss": 4.0258, "step": 3655 }, { "epoch": 1.217872907470642, "grad_norm": 0.50390625, "learning_rate": 1.467114480322949e-05, "loss": 4.0923, "step": 3656 }, { "epoch": 1.2182060464728908, "grad_norm": 0.51171875, "learning_rate": 1.4670907702195637e-05, "loss": 4.0821, "step": 3657 }, { "epoch": 1.2185391854751395, "grad_norm": 0.49609375, "learning_rate": 1.467067051763624e-05, "loss": 4.1297, "step": 3658 }, { "epoch": 1.2188723244773882, "grad_norm": 0.51953125, "learning_rate": 1.4670433249554063e-05, "loss": 4.0552, "step": 3659 }, { "epoch": 1.219205463479637, "grad_norm": 0.5, "learning_rate": 1.467019589795187e-05, "loss": 4.152, "step": 3660 }, { "epoch": 1.2195386024818855, "grad_norm": 0.5, "learning_rate": 1.4669958462832428e-05, "loss": 4.101, "step": 3661 }, { "epoch": 1.2198717414841342, "grad_norm": 0.515625, "learning_rate": 1.4669720944198496e-05, "loss": 4.107, "step": 3662 }, { "epoch": 1.220204880486383, "grad_norm": 0.54296875, "learning_rate": 1.4669483342052847e-05, "loss": 4.0283, "step": 3663 }, { "epoch": 1.2205380194886317, "grad_norm": 0.50390625, "learning_rate": 1.4669245656398247e-05, "loss": 4.0999, "step": 3664 }, { "epoch": 1.2208711584908802, "grad_norm": 0.51953125, "learning_rate": 1.466900788723746e-05, "loss": 4.1515, "step": 3665 }, { "epoch": 1.221204297493129, "grad_norm": 0.515625, "learning_rate": 1.4668770034573263e-05, "loss": 4.0639, "step": 3666 }, { "epoch": 1.2215374364953777, "grad_norm": 0.50390625, "learning_rate": 1.4668532098408421e-05, "loss": 4.181, "step": 3667 }, { "epoch": 1.2218705754976265, "grad_norm": 0.53515625, "learning_rate": 1.4668294078745707e-05, "loss": 4.1321, "step": 3668 }, { "epoch": 1.2222037144998752, "grad_norm": 0.494140625, "learning_rate": 1.4668055975587893e-05, "loss": 4.1284, "step": 3669 }, { "epoch": 1.2225368535021237, "grad_norm": 0.515625, "learning_rate": 1.4667817788937753e-05, "loss": 4.023, "step": 3670 }, { "epoch": 1.2228699925043724, "grad_norm": 0.5078125, "learning_rate": 1.4667579518798064e-05, "loss": 4.1193, "step": 3671 }, { "epoch": 1.2232031315066212, "grad_norm": 0.4921875, "learning_rate": 1.4667341165171594e-05, "loss": 4.1411, "step": 3672 }, { "epoch": 1.2235362705088697, "grad_norm": 0.53515625, "learning_rate": 1.4667102728061128e-05, "loss": 4.0795, "step": 3673 }, { "epoch": 1.2238694095111184, "grad_norm": 0.51953125, "learning_rate": 1.4666864207469436e-05, "loss": 4.0546, "step": 3674 }, { "epoch": 1.2242025485133672, "grad_norm": 0.515625, "learning_rate": 1.4666625603399302e-05, "loss": 4.0649, "step": 3675 }, { "epoch": 1.224535687515616, "grad_norm": 0.53125, "learning_rate": 1.46663869158535e-05, "loss": 4.0874, "step": 3676 }, { "epoch": 1.2248688265178647, "grad_norm": 0.484375, "learning_rate": 1.4666148144834814e-05, "loss": 4.136, "step": 3677 }, { "epoch": 1.2252019655201132, "grad_norm": 0.4921875, "learning_rate": 1.4665909290346024e-05, "loss": 4.1102, "step": 3678 }, { "epoch": 1.225535104522362, "grad_norm": 0.53515625, "learning_rate": 1.4665670352389911e-05, "loss": 4.1435, "step": 3679 }, { "epoch": 1.2258682435246107, "grad_norm": 0.46875, "learning_rate": 1.4665431330969263e-05, "loss": 4.0676, "step": 3680 }, { "epoch": 1.2262013825268594, "grad_norm": 0.49609375, "learning_rate": 1.4665192226086856e-05, "loss": 4.1415, "step": 3681 }, { "epoch": 1.226534521529108, "grad_norm": 0.51171875, "learning_rate": 1.4664953037745481e-05, "loss": 4.0695, "step": 3682 }, { "epoch": 1.2268676605313567, "grad_norm": 0.51171875, "learning_rate": 1.4664713765947924e-05, "loss": 4.1169, "step": 3683 }, { "epoch": 1.2272007995336054, "grad_norm": 0.5078125, "learning_rate": 1.4664474410696968e-05, "loss": 4.1436, "step": 3684 }, { "epoch": 1.2275339385358541, "grad_norm": 0.50390625, "learning_rate": 1.4664234971995404e-05, "loss": 4.0843, "step": 3685 }, { "epoch": 1.2278670775381029, "grad_norm": 0.482421875, "learning_rate": 1.466399544984602e-05, "loss": 3.9959, "step": 3686 }, { "epoch": 1.2282002165403514, "grad_norm": 0.5234375, "learning_rate": 1.4663755844251605e-05, "loss": 4.0715, "step": 3687 }, { "epoch": 1.2285333555426001, "grad_norm": 0.5078125, "learning_rate": 1.4663516155214954e-05, "loss": 4.1077, "step": 3688 }, { "epoch": 1.2288664945448489, "grad_norm": 0.515625, "learning_rate": 1.4663276382738854e-05, "loss": 4.0627, "step": 3689 }, { "epoch": 1.2291996335470976, "grad_norm": 0.49609375, "learning_rate": 1.46630365268261e-05, "loss": 4.1385, "step": 3690 }, { "epoch": 1.2295327725493461, "grad_norm": 0.51953125, "learning_rate": 1.4662796587479486e-05, "loss": 4.0632, "step": 3691 }, { "epoch": 1.2298659115515949, "grad_norm": 0.5, "learning_rate": 1.4662556564701806e-05, "loss": 4.0912, "step": 3692 }, { "epoch": 1.2301990505538436, "grad_norm": 0.490234375, "learning_rate": 1.4662316458495855e-05, "loss": 4.1512, "step": 3693 }, { "epoch": 1.2305321895560923, "grad_norm": 0.51171875, "learning_rate": 1.4662076268864435e-05, "loss": 4.1313, "step": 3694 }, { "epoch": 1.230865328558341, "grad_norm": 0.515625, "learning_rate": 1.4661835995810335e-05, "loss": 4.0929, "step": 3695 }, { "epoch": 1.2311984675605896, "grad_norm": 0.5234375, "learning_rate": 1.4661595639336362e-05, "loss": 4.0504, "step": 3696 }, { "epoch": 1.2315316065628383, "grad_norm": 0.51171875, "learning_rate": 1.4661355199445307e-05, "loss": 4.0555, "step": 3697 }, { "epoch": 1.231864745565087, "grad_norm": 0.515625, "learning_rate": 1.4661114676139979e-05, "loss": 4.1119, "step": 3698 }, { "epoch": 1.2321978845673358, "grad_norm": 0.478515625, "learning_rate": 1.4660874069423175e-05, "loss": 4.125, "step": 3699 }, { "epoch": 1.2325310235695843, "grad_norm": 0.546875, "learning_rate": 1.4660633379297698e-05, "loss": 4.0267, "step": 3700 }, { "epoch": 1.232864162571833, "grad_norm": 0.51953125, "learning_rate": 1.4660392605766354e-05, "loss": 4.1484, "step": 3701 }, { "epoch": 1.2331973015740818, "grad_norm": 0.51171875, "learning_rate": 1.4660151748831944e-05, "loss": 4.0798, "step": 3702 }, { "epoch": 1.2335304405763305, "grad_norm": 0.490234375, "learning_rate": 1.4659910808497274e-05, "loss": 4.0617, "step": 3703 }, { "epoch": 1.2338635795785793, "grad_norm": 0.50390625, "learning_rate": 1.4659669784765151e-05, "loss": 4.0823, "step": 3704 }, { "epoch": 1.2341967185808278, "grad_norm": 0.51953125, "learning_rate": 1.4659428677638386e-05, "loss": 4.0724, "step": 3705 }, { "epoch": 1.2345298575830765, "grad_norm": 0.515625, "learning_rate": 1.4659187487119783e-05, "loss": 4.0876, "step": 3706 }, { "epoch": 1.2348629965853253, "grad_norm": 0.494140625, "learning_rate": 1.465894621321215e-05, "loss": 4.1721, "step": 3707 }, { "epoch": 1.2351961355875738, "grad_norm": 0.50390625, "learning_rate": 1.4658704855918304e-05, "loss": 4.0086, "step": 3708 }, { "epoch": 1.2355292745898225, "grad_norm": 0.50390625, "learning_rate": 1.4658463415241049e-05, "loss": 4.0842, "step": 3709 }, { "epoch": 1.2358624135920713, "grad_norm": 0.49609375, "learning_rate": 1.4658221891183202e-05, "loss": 4.1419, "step": 3710 }, { "epoch": 1.23619555259432, "grad_norm": 0.51171875, "learning_rate": 1.4657980283747573e-05, "loss": 4.0818, "step": 3711 }, { "epoch": 1.2365286915965688, "grad_norm": 0.494140625, "learning_rate": 1.4657738592936979e-05, "loss": 4.0955, "step": 3712 }, { "epoch": 1.2368618305988173, "grad_norm": 0.51171875, "learning_rate": 1.4657496818754235e-05, "loss": 4.083, "step": 3713 }, { "epoch": 1.237194969601066, "grad_norm": 0.48828125, "learning_rate": 1.4657254961202155e-05, "loss": 4.1258, "step": 3714 }, { "epoch": 1.2375281086033147, "grad_norm": 0.52734375, "learning_rate": 1.4657013020283558e-05, "loss": 4.0979, "step": 3715 }, { "epoch": 1.2378612476055635, "grad_norm": 0.515625, "learning_rate": 1.4656770996001261e-05, "loss": 4.1242, "step": 3716 }, { "epoch": 1.238194386607812, "grad_norm": 0.5078125, "learning_rate": 1.4656528888358083e-05, "loss": 4.1112, "step": 3717 }, { "epoch": 1.2385275256100607, "grad_norm": 0.5078125, "learning_rate": 1.4656286697356844e-05, "loss": 4.0352, "step": 3718 }, { "epoch": 1.2388606646123095, "grad_norm": 0.51171875, "learning_rate": 1.4656044423000367e-05, "loss": 4.1064, "step": 3719 }, { "epoch": 1.2391938036145582, "grad_norm": 0.51171875, "learning_rate": 1.4655802065291471e-05, "loss": 4.1237, "step": 3720 }, { "epoch": 1.239526942616807, "grad_norm": 0.52734375, "learning_rate": 1.4655559624232982e-05, "loss": 4.0456, "step": 3721 }, { "epoch": 1.2398600816190555, "grad_norm": 0.5078125, "learning_rate": 1.465531709982772e-05, "loss": 4.1516, "step": 3722 }, { "epoch": 1.2401932206213042, "grad_norm": 0.51171875, "learning_rate": 1.4655074492078514e-05, "loss": 4.1451, "step": 3723 }, { "epoch": 1.240526359623553, "grad_norm": 0.515625, "learning_rate": 1.4654831800988188e-05, "loss": 4.0852, "step": 3724 }, { "epoch": 1.2408594986258017, "grad_norm": 0.50390625, "learning_rate": 1.4654589026559569e-05, "loss": 4.1212, "step": 3725 }, { "epoch": 1.2411926376280502, "grad_norm": 0.5390625, "learning_rate": 1.4654346168795484e-05, "loss": 4.051, "step": 3726 }, { "epoch": 1.241525776630299, "grad_norm": 0.5546875, "learning_rate": 1.465410322769876e-05, "loss": 4.0679, "step": 3727 }, { "epoch": 1.2418589156325477, "grad_norm": 0.51171875, "learning_rate": 1.4653860203272235e-05, "loss": 4.0396, "step": 3728 }, { "epoch": 1.2421920546347964, "grad_norm": 0.5078125, "learning_rate": 1.465361709551873e-05, "loss": 4.1033, "step": 3729 }, { "epoch": 1.2425251936370452, "grad_norm": 0.5, "learning_rate": 1.465337390444108e-05, "loss": 4.0937, "step": 3730 }, { "epoch": 1.2428583326392937, "grad_norm": 0.49609375, "learning_rate": 1.465313063004212e-05, "loss": 4.0524, "step": 3731 }, { "epoch": 1.2431914716415424, "grad_norm": 0.5546875, "learning_rate": 1.4652887272324682e-05, "loss": 4.1516, "step": 3732 }, { "epoch": 1.2435246106437912, "grad_norm": 0.51953125, "learning_rate": 1.4652643831291599e-05, "loss": 4.1318, "step": 3733 }, { "epoch": 1.24385774964604, "grad_norm": 0.5234375, "learning_rate": 1.4652400306945706e-05, "loss": 4.1209, "step": 3734 }, { "epoch": 1.2441908886482884, "grad_norm": 0.53515625, "learning_rate": 1.4652156699289845e-05, "loss": 3.9989, "step": 3735 }, { "epoch": 1.2445240276505372, "grad_norm": 0.515625, "learning_rate": 1.465191300832685e-05, "loss": 4.0352, "step": 3736 }, { "epoch": 1.244857166652786, "grad_norm": 0.54296875, "learning_rate": 1.4651669234059558e-05, "loss": 4.0417, "step": 3737 }, { "epoch": 1.2451903056550346, "grad_norm": 0.5390625, "learning_rate": 1.465142537649081e-05, "loss": 3.9937, "step": 3738 }, { "epoch": 1.2455234446572834, "grad_norm": 0.5, "learning_rate": 1.4651181435623447e-05, "loss": 4.0854, "step": 3739 }, { "epoch": 1.245856583659532, "grad_norm": 0.494140625, "learning_rate": 1.4650937411460308e-05, "loss": 4.1873, "step": 3740 }, { "epoch": 1.2461897226617806, "grad_norm": 0.52734375, "learning_rate": 1.4650693304004239e-05, "loss": 4.0285, "step": 3741 }, { "epoch": 1.2465228616640294, "grad_norm": 0.50390625, "learning_rate": 1.4650449113258082e-05, "loss": 4.1039, "step": 3742 }, { "epoch": 1.2468560006662779, "grad_norm": 0.5234375, "learning_rate": 1.4650204839224677e-05, "loss": 4.1221, "step": 3743 }, { "epoch": 1.2471891396685266, "grad_norm": 0.48046875, "learning_rate": 1.4649960481906877e-05, "loss": 4.0397, "step": 3744 }, { "epoch": 1.2475222786707754, "grad_norm": 0.5234375, "learning_rate": 1.4649716041307522e-05, "loss": 4.0624, "step": 3745 }, { "epoch": 1.247855417673024, "grad_norm": 0.51953125, "learning_rate": 1.4649471517429464e-05, "loss": 4.0496, "step": 3746 }, { "epoch": 1.2481885566752728, "grad_norm": 0.50390625, "learning_rate": 1.4649226910275546e-05, "loss": 4.1252, "step": 3747 }, { "epoch": 1.2485216956775214, "grad_norm": 0.51171875, "learning_rate": 1.464898221984862e-05, "loss": 4.071, "step": 3748 }, { "epoch": 1.24885483467977, "grad_norm": 0.51953125, "learning_rate": 1.4648737446151537e-05, "loss": 4.0329, "step": 3749 }, { "epoch": 1.2491879736820188, "grad_norm": 0.53125, "learning_rate": 1.4648492589187145e-05, "loss": 4.036, "step": 3750 }, { "epoch": 1.2495211126842676, "grad_norm": 0.53125, "learning_rate": 1.4648247648958297e-05, "loss": 4.0556, "step": 3751 }, { "epoch": 1.249854251686516, "grad_norm": 0.49609375, "learning_rate": 1.464800262546785e-05, "loss": 4.1137, "step": 3752 }, { "epoch": 1.2501873906887648, "grad_norm": 0.53125, "learning_rate": 1.4647757518718653e-05, "loss": 3.9813, "step": 3753 }, { "epoch": 1.2505205296910136, "grad_norm": 0.515625, "learning_rate": 1.4647512328713564e-05, "loss": 4.0749, "step": 3754 }, { "epoch": 1.2508536686932623, "grad_norm": 0.53125, "learning_rate": 1.464726705545544e-05, "loss": 4.0882, "step": 3755 }, { "epoch": 1.251186807695511, "grad_norm": 0.49609375, "learning_rate": 1.4647021698947132e-05, "loss": 4.0815, "step": 3756 }, { "epoch": 1.2515199466977596, "grad_norm": 0.515625, "learning_rate": 1.4646776259191502e-05, "loss": 4.07, "step": 3757 }, { "epoch": 1.2518530857000083, "grad_norm": 0.490234375, "learning_rate": 1.464653073619141e-05, "loss": 4.0936, "step": 3758 }, { "epoch": 1.252186224702257, "grad_norm": 0.5234375, "learning_rate": 1.4646285129949714e-05, "loss": 4.0798, "step": 3759 }, { "epoch": 1.2525193637045058, "grad_norm": 0.546875, "learning_rate": 1.4646039440469274e-05, "loss": 4.0816, "step": 3760 }, { "epoch": 1.2528525027067543, "grad_norm": 0.515625, "learning_rate": 1.4645793667752954e-05, "loss": 4.1006, "step": 3761 }, { "epoch": 1.253185641709003, "grad_norm": 0.53125, "learning_rate": 1.4645547811803616e-05, "loss": 4.1009, "step": 3762 }, { "epoch": 1.2535187807112518, "grad_norm": 0.515625, "learning_rate": 1.4645301872624121e-05, "loss": 4.1614, "step": 3763 }, { "epoch": 1.2538519197135005, "grad_norm": 0.5234375, "learning_rate": 1.4645055850217337e-05, "loss": 4.0554, "step": 3764 }, { "epoch": 1.2541850587157493, "grad_norm": 0.50390625, "learning_rate": 1.4644809744586129e-05, "loss": 4.0402, "step": 3765 }, { "epoch": 1.2545181977179978, "grad_norm": 0.55859375, "learning_rate": 1.4644563555733362e-05, "loss": 4.1025, "step": 3766 }, { "epoch": 1.2548513367202465, "grad_norm": 0.515625, "learning_rate": 1.4644317283661906e-05, "loss": 4.083, "step": 3767 }, { "epoch": 1.2551844757224953, "grad_norm": 0.51171875, "learning_rate": 1.464407092837463e-05, "loss": 4.0755, "step": 3768 }, { "epoch": 1.2555176147247438, "grad_norm": 0.50390625, "learning_rate": 1.4643824489874398e-05, "loss": 4.1311, "step": 3769 }, { "epoch": 1.2558507537269925, "grad_norm": 0.515625, "learning_rate": 1.4643577968164085e-05, "loss": 4.1317, "step": 3770 }, { "epoch": 1.2561838927292412, "grad_norm": 0.486328125, "learning_rate": 1.4643331363246563e-05, "loss": 4.1492, "step": 3771 }, { "epoch": 1.25651703173149, "grad_norm": 0.5234375, "learning_rate": 1.4643084675124702e-05, "loss": 4.0783, "step": 3772 }, { "epoch": 1.2568501707337387, "grad_norm": 0.52734375, "learning_rate": 1.4642837903801376e-05, "loss": 4.05, "step": 3773 }, { "epoch": 1.2571833097359875, "grad_norm": 0.52734375, "learning_rate": 1.4642591049279462e-05, "loss": 3.9849, "step": 3774 }, { "epoch": 1.257516448738236, "grad_norm": 0.51171875, "learning_rate": 1.4642344111561831e-05, "loss": 4.0912, "step": 3775 }, { "epoch": 1.2578495877404847, "grad_norm": 0.52734375, "learning_rate": 1.4642097090651362e-05, "loss": 4.1203, "step": 3776 }, { "epoch": 1.2581827267427335, "grad_norm": 0.51953125, "learning_rate": 1.4641849986550932e-05, "loss": 4.0422, "step": 3777 }, { "epoch": 1.258515865744982, "grad_norm": 0.486328125, "learning_rate": 1.4641602799263419e-05, "loss": 4.1679, "step": 3778 }, { "epoch": 1.2588490047472307, "grad_norm": 0.486328125, "learning_rate": 1.46413555287917e-05, "loss": 4.063, "step": 3779 }, { "epoch": 1.2591821437494795, "grad_norm": 0.51953125, "learning_rate": 1.464110817513866e-05, "loss": 4.0652, "step": 3780 }, { "epoch": 1.2595152827517282, "grad_norm": 0.53125, "learning_rate": 1.4640860738307176e-05, "loss": 4.0564, "step": 3781 }, { "epoch": 1.259848421753977, "grad_norm": 0.515625, "learning_rate": 1.4640613218300132e-05, "loss": 4.0493, "step": 3782 }, { "epoch": 1.2601815607562257, "grad_norm": 0.51171875, "learning_rate": 1.464036561512041e-05, "loss": 4.011, "step": 3783 }, { "epoch": 1.2605146997584742, "grad_norm": 0.52734375, "learning_rate": 1.4640117928770895e-05, "loss": 4.092, "step": 3784 }, { "epoch": 1.260847838760723, "grad_norm": 0.515625, "learning_rate": 1.463987015925447e-05, "loss": 4.0391, "step": 3785 }, { "epoch": 1.2611809777629717, "grad_norm": 0.515625, "learning_rate": 1.4639622306574026e-05, "loss": 4.0186, "step": 3786 }, { "epoch": 1.2615141167652202, "grad_norm": 0.5390625, "learning_rate": 1.4639374370732445e-05, "loss": 4.0643, "step": 3787 }, { "epoch": 1.261847255767469, "grad_norm": 0.48828125, "learning_rate": 1.4639126351732615e-05, "loss": 4.1221, "step": 3788 }, { "epoch": 1.2621803947697177, "grad_norm": 0.4921875, "learning_rate": 1.4638878249577426e-05, "loss": 4.0669, "step": 3789 }, { "epoch": 1.2625135337719664, "grad_norm": 0.494140625, "learning_rate": 1.4638630064269771e-05, "loss": 4.1264, "step": 3790 }, { "epoch": 1.2628466727742151, "grad_norm": 0.52734375, "learning_rate": 1.4638381795812536e-05, "loss": 4.1002, "step": 3791 }, { "epoch": 1.2631798117764637, "grad_norm": 0.515625, "learning_rate": 1.4638133444208616e-05, "loss": 4.1164, "step": 3792 }, { "epoch": 1.2635129507787124, "grad_norm": 0.51953125, "learning_rate": 1.4637885009460902e-05, "loss": 4.0403, "step": 3793 }, { "epoch": 1.2638460897809611, "grad_norm": 0.5078125, "learning_rate": 1.4637636491572288e-05, "loss": 4.0073, "step": 3794 }, { "epoch": 1.2641792287832099, "grad_norm": 0.5078125, "learning_rate": 1.4637387890545671e-05, "loss": 4.1351, "step": 3795 }, { "epoch": 1.2645123677854584, "grad_norm": 0.51171875, "learning_rate": 1.463713920638394e-05, "loss": 4.111, "step": 3796 }, { "epoch": 1.2648455067877071, "grad_norm": 0.51953125, "learning_rate": 1.4636890439089999e-05, "loss": 4.0454, "step": 3797 }, { "epoch": 1.2651786457899559, "grad_norm": 0.53125, "learning_rate": 1.4636641588666745e-05, "loss": 4.0633, "step": 3798 }, { "epoch": 1.2655117847922046, "grad_norm": 0.51171875, "learning_rate": 1.4636392655117071e-05, "loss": 4.1082, "step": 3799 }, { "epoch": 1.2658449237944533, "grad_norm": 0.5234375, "learning_rate": 1.463614363844388e-05, "loss": 4.1385, "step": 3800 }, { "epoch": 1.2661780627967019, "grad_norm": 0.50390625, "learning_rate": 1.4635894538650074e-05, "loss": 4.0552, "step": 3801 }, { "epoch": 1.2665112017989506, "grad_norm": 0.5234375, "learning_rate": 1.4635645355738553e-05, "loss": 4.0825, "step": 3802 }, { "epoch": 1.2668443408011993, "grad_norm": 0.51171875, "learning_rate": 1.463539608971222e-05, "loss": 4.0265, "step": 3803 }, { "epoch": 1.2671774798034479, "grad_norm": 0.51171875, "learning_rate": 1.4635146740573976e-05, "loss": 4.0719, "step": 3804 }, { "epoch": 1.2675106188056966, "grad_norm": 0.482421875, "learning_rate": 1.463489730832673e-05, "loss": 4.0772, "step": 3805 }, { "epoch": 1.2678437578079453, "grad_norm": 0.490234375, "learning_rate": 1.463464779297338e-05, "loss": 4.1438, "step": 3806 }, { "epoch": 1.268176896810194, "grad_norm": 0.53125, "learning_rate": 1.463439819451684e-05, "loss": 4.0623, "step": 3807 }, { "epoch": 1.2685100358124428, "grad_norm": 0.51171875, "learning_rate": 1.4634148512960014e-05, "loss": 4.1218, "step": 3808 }, { "epoch": 1.2688431748146916, "grad_norm": 0.5, "learning_rate": 1.463389874830581e-05, "loss": 4.0773, "step": 3809 }, { "epoch": 1.26917631381694, "grad_norm": 0.5078125, "learning_rate": 1.4633648900557138e-05, "loss": 4.1103, "step": 3810 }, { "epoch": 1.2695094528191888, "grad_norm": 0.49609375, "learning_rate": 1.4633398969716907e-05, "loss": 4.0904, "step": 3811 }, { "epoch": 1.2698425918214375, "grad_norm": 0.51171875, "learning_rate": 1.4633148955788029e-05, "loss": 4.0259, "step": 3812 }, { "epoch": 1.270175730823686, "grad_norm": 0.498046875, "learning_rate": 1.4632898858773416e-05, "loss": 3.9669, "step": 3813 }, { "epoch": 1.2705088698259348, "grad_norm": 0.5234375, "learning_rate": 1.4632648678675981e-05, "loss": 4.1072, "step": 3814 }, { "epoch": 1.2708420088281835, "grad_norm": 0.484375, "learning_rate": 1.4632398415498638e-05, "loss": 4.1338, "step": 3815 }, { "epoch": 1.2711751478304323, "grad_norm": 0.498046875, "learning_rate": 1.4632148069244303e-05, "loss": 4.1579, "step": 3816 }, { "epoch": 1.271508286832681, "grad_norm": 0.5078125, "learning_rate": 1.4631897639915892e-05, "loss": 4.0417, "step": 3817 }, { "epoch": 1.2718414258349298, "grad_norm": 0.52734375, "learning_rate": 1.463164712751632e-05, "loss": 4.0571, "step": 3818 }, { "epoch": 1.2721745648371783, "grad_norm": 0.5390625, "learning_rate": 1.4631396532048504e-05, "loss": 4.0652, "step": 3819 }, { "epoch": 1.272507703839427, "grad_norm": 0.50390625, "learning_rate": 1.463114585351537e-05, "loss": 4.0881, "step": 3820 }, { "epoch": 1.2728408428416758, "grad_norm": 0.5234375, "learning_rate": 1.4630895091919828e-05, "loss": 3.9922, "step": 3821 }, { "epoch": 1.2731739818439243, "grad_norm": 0.5078125, "learning_rate": 1.4630644247264806e-05, "loss": 4.0777, "step": 3822 }, { "epoch": 1.273507120846173, "grad_norm": 0.5, "learning_rate": 1.4630393319553222e-05, "loss": 4.0639, "step": 3823 }, { "epoch": 1.2738402598484218, "grad_norm": 0.54296875, "learning_rate": 1.4630142308788001e-05, "loss": 4.1021, "step": 3824 }, { "epoch": 1.2741733988506705, "grad_norm": 0.515625, "learning_rate": 1.4629891214972069e-05, "loss": 4.0703, "step": 3825 }, { "epoch": 1.2745065378529192, "grad_norm": 0.51953125, "learning_rate": 1.4629640038108343e-05, "loss": 4.0894, "step": 3826 }, { "epoch": 1.2748396768551677, "grad_norm": 0.5390625, "learning_rate": 1.4629388778199756e-05, "loss": 4.0485, "step": 3827 }, { "epoch": 1.2751728158574165, "grad_norm": 0.5234375, "learning_rate": 1.4629137435249232e-05, "loss": 4.0484, "step": 3828 }, { "epoch": 1.2755059548596652, "grad_norm": 0.51953125, "learning_rate": 1.4628886009259698e-05, "loss": 4.0474, "step": 3829 }, { "epoch": 1.275839093861914, "grad_norm": 0.546875, "learning_rate": 1.4628634500234081e-05, "loss": 4.1232, "step": 3830 }, { "epoch": 1.2761722328641625, "grad_norm": 0.55859375, "learning_rate": 1.4628382908175315e-05, "loss": 4.1079, "step": 3831 }, { "epoch": 1.2765053718664112, "grad_norm": 0.55078125, "learning_rate": 1.462813123308633e-05, "loss": 4.0354, "step": 3832 }, { "epoch": 1.27683851086866, "grad_norm": 0.53515625, "learning_rate": 1.4627879474970051e-05, "loss": 4.0726, "step": 3833 }, { "epoch": 1.2771716498709087, "grad_norm": 0.490234375, "learning_rate": 1.4627627633829419e-05, "loss": 4.1081, "step": 3834 }, { "epoch": 1.2775047888731574, "grad_norm": 0.515625, "learning_rate": 1.4627375709667363e-05, "loss": 4.0942, "step": 3835 }, { "epoch": 1.277837927875406, "grad_norm": 0.48046875, "learning_rate": 1.4627123702486816e-05, "loss": 4.0872, "step": 3836 }, { "epoch": 1.2781710668776547, "grad_norm": 0.53125, "learning_rate": 1.4626871612290715e-05, "loss": 4.0943, "step": 3837 }, { "epoch": 1.2785042058799034, "grad_norm": 0.54296875, "learning_rate": 1.4626619439081998e-05, "loss": 4.0736, "step": 3838 }, { "epoch": 1.278837344882152, "grad_norm": 0.52734375, "learning_rate": 1.46263671828636e-05, "loss": 4.1064, "step": 3839 }, { "epoch": 1.2791704838844007, "grad_norm": 0.5390625, "learning_rate": 1.4626114843638461e-05, "loss": 4.0683, "step": 3840 }, { "epoch": 1.2795036228866494, "grad_norm": 0.50390625, "learning_rate": 1.4625862421409518e-05, "loss": 4.0632, "step": 3841 }, { "epoch": 1.2798367618888982, "grad_norm": 0.5234375, "learning_rate": 1.4625609916179713e-05, "loss": 4.0387, "step": 3842 }, { "epoch": 1.280169900891147, "grad_norm": 0.494140625, "learning_rate": 1.4625357327951985e-05, "loss": 4.0641, "step": 3843 }, { "epoch": 1.2805030398933956, "grad_norm": 0.515625, "learning_rate": 1.4625104656729282e-05, "loss": 4.102, "step": 3844 }, { "epoch": 1.2808361788956442, "grad_norm": 0.51953125, "learning_rate": 1.4624851902514538e-05, "loss": 4.1395, "step": 3845 }, { "epoch": 1.281169317897893, "grad_norm": 0.5390625, "learning_rate": 1.4624599065310704e-05, "loss": 4.1239, "step": 3846 }, { "epoch": 1.2815024569001416, "grad_norm": 0.5390625, "learning_rate": 1.4624346145120722e-05, "loss": 4.0665, "step": 3847 }, { "epoch": 1.2818355959023902, "grad_norm": 0.5546875, "learning_rate": 1.462409314194754e-05, "loss": 4.0233, "step": 3848 }, { "epoch": 1.282168734904639, "grad_norm": 0.515625, "learning_rate": 1.4623840055794102e-05, "loss": 4.127, "step": 3849 }, { "epoch": 1.2825018739068876, "grad_norm": 0.51953125, "learning_rate": 1.4623586886663357e-05, "loss": 4.1084, "step": 3850 }, { "epoch": 1.2828350129091364, "grad_norm": 0.515625, "learning_rate": 1.4623333634558256e-05, "loss": 4.1056, "step": 3851 }, { "epoch": 1.2831681519113851, "grad_norm": 0.515625, "learning_rate": 1.4623080299481747e-05, "loss": 4.1006, "step": 3852 }, { "epoch": 1.2835012909136339, "grad_norm": 0.52734375, "learning_rate": 1.462282688143678e-05, "loss": 4.141, "step": 3853 }, { "epoch": 1.2838344299158824, "grad_norm": 0.515625, "learning_rate": 1.462257338042631e-05, "loss": 4.0921, "step": 3854 }, { "epoch": 1.284167568918131, "grad_norm": 0.51171875, "learning_rate": 1.4622319796453287e-05, "loss": 4.1195, "step": 3855 }, { "epoch": 1.2845007079203798, "grad_norm": 0.5390625, "learning_rate": 1.4622066129520664e-05, "loss": 4.1207, "step": 3856 }, { "epoch": 1.2848338469226284, "grad_norm": 0.50390625, "learning_rate": 1.4621812379631397e-05, "loss": 4.0883, "step": 3857 }, { "epoch": 1.285166985924877, "grad_norm": 0.5078125, "learning_rate": 1.4621558546788442e-05, "loss": 4.0766, "step": 3858 }, { "epoch": 1.2855001249271258, "grad_norm": 0.5390625, "learning_rate": 1.4621304630994757e-05, "loss": 4.0515, "step": 3859 }, { "epoch": 1.2858332639293746, "grad_norm": 0.51171875, "learning_rate": 1.4621050632253296e-05, "loss": 4.1036, "step": 3860 }, { "epoch": 1.2861664029316233, "grad_norm": 0.5234375, "learning_rate": 1.4620796550567018e-05, "loss": 4.0783, "step": 3861 }, { "epoch": 1.2864995419338718, "grad_norm": 0.5, "learning_rate": 1.4620542385938887e-05, "loss": 4.1056, "step": 3862 }, { "epoch": 1.2868326809361206, "grad_norm": 0.51171875, "learning_rate": 1.4620288138371858e-05, "loss": 4.0769, "step": 3863 }, { "epoch": 1.2871658199383693, "grad_norm": 0.52734375, "learning_rate": 1.4620033807868894e-05, "loss": 4.0759, "step": 3864 }, { "epoch": 1.287498958940618, "grad_norm": 0.546875, "learning_rate": 1.4619779394432962e-05, "loss": 4.0074, "step": 3865 }, { "epoch": 1.2878320979428666, "grad_norm": 0.53515625, "learning_rate": 1.4619524898067016e-05, "loss": 4.0704, "step": 3866 }, { "epoch": 1.2881652369451153, "grad_norm": 0.53125, "learning_rate": 1.4619270318774031e-05, "loss": 4.101, "step": 3867 }, { "epoch": 1.288498375947364, "grad_norm": 0.5546875, "learning_rate": 1.4619015656556963e-05, "loss": 4.0417, "step": 3868 }, { "epoch": 1.2888315149496128, "grad_norm": 0.5234375, "learning_rate": 1.4618760911418787e-05, "loss": 4.0445, "step": 3869 }, { "epoch": 1.2891646539518615, "grad_norm": 0.498046875, "learning_rate": 1.461850608336246e-05, "loss": 4.1342, "step": 3870 }, { "epoch": 1.28949779295411, "grad_norm": 0.50390625, "learning_rate": 1.4618251172390959e-05, "loss": 4.1371, "step": 3871 }, { "epoch": 1.2898309319563588, "grad_norm": 0.52734375, "learning_rate": 1.461799617850725e-05, "loss": 4.0853, "step": 3872 }, { "epoch": 1.2901640709586075, "grad_norm": 0.51171875, "learning_rate": 1.4617741101714303e-05, "loss": 4.0702, "step": 3873 }, { "epoch": 1.290497209960856, "grad_norm": 0.53125, "learning_rate": 1.4617485942015088e-05, "loss": 4.1167, "step": 3874 }, { "epoch": 1.2908303489631048, "grad_norm": 0.478515625, "learning_rate": 1.4617230699412579e-05, "loss": 4.0881, "step": 3875 }, { "epoch": 1.2911634879653535, "grad_norm": 0.515625, "learning_rate": 1.461697537390975e-05, "loss": 4.0638, "step": 3876 }, { "epoch": 1.2914966269676023, "grad_norm": 0.53125, "learning_rate": 1.4616719965509569e-05, "loss": 4.0679, "step": 3877 }, { "epoch": 1.291829765969851, "grad_norm": 0.515625, "learning_rate": 1.4616464474215019e-05, "loss": 4.0504, "step": 3878 }, { "epoch": 1.2921629049720997, "grad_norm": 0.51953125, "learning_rate": 1.461620890002907e-05, "loss": 4.1102, "step": 3879 }, { "epoch": 1.2924960439743483, "grad_norm": 0.498046875, "learning_rate": 1.4615953242954703e-05, "loss": 4.014, "step": 3880 }, { "epoch": 1.292829182976597, "grad_norm": 0.52734375, "learning_rate": 1.4615697502994891e-05, "loss": 4.1112, "step": 3881 }, { "epoch": 1.2931623219788457, "grad_norm": 0.515625, "learning_rate": 1.4615441680152618e-05, "loss": 4.1334, "step": 3882 }, { "epoch": 1.2934954609810942, "grad_norm": 0.515625, "learning_rate": 1.461518577443086e-05, "loss": 4.1275, "step": 3883 }, { "epoch": 1.293828599983343, "grad_norm": 0.51953125, "learning_rate": 1.46149297858326e-05, "loss": 4.0989, "step": 3884 }, { "epoch": 1.2941617389855917, "grad_norm": 0.50390625, "learning_rate": 1.4614673714360818e-05, "loss": 4.0821, "step": 3885 }, { "epoch": 1.2944948779878405, "grad_norm": 0.53125, "learning_rate": 1.46144175600185e-05, "loss": 4.0398, "step": 3886 }, { "epoch": 1.2948280169900892, "grad_norm": 0.5078125, "learning_rate": 1.4614161322808625e-05, "loss": 4.1194, "step": 3887 }, { "epoch": 1.2951611559923377, "grad_norm": 0.5390625, "learning_rate": 1.4613905002734181e-05, "loss": 4.0156, "step": 3888 }, { "epoch": 1.2954942949945865, "grad_norm": 0.5390625, "learning_rate": 1.4613648599798151e-05, "loss": 4.0869, "step": 3889 }, { "epoch": 1.2958274339968352, "grad_norm": 0.51953125, "learning_rate": 1.4613392114003523e-05, "loss": 4.1074, "step": 3890 }, { "epoch": 1.296160572999084, "grad_norm": 0.498046875, "learning_rate": 1.4613135545353286e-05, "loss": 4.0429, "step": 3891 }, { "epoch": 1.2964937120013325, "grad_norm": 0.51171875, "learning_rate": 1.4612878893850427e-05, "loss": 4.0623, "step": 3892 }, { "epoch": 1.2968268510035812, "grad_norm": 0.50390625, "learning_rate": 1.4612622159497933e-05, "loss": 4.0794, "step": 3893 }, { "epoch": 1.29715999000583, "grad_norm": 0.5078125, "learning_rate": 1.4612365342298801e-05, "loss": 4.0765, "step": 3894 }, { "epoch": 1.2974931290080787, "grad_norm": 0.5234375, "learning_rate": 1.4612108442256014e-05, "loss": 4.0121, "step": 3895 }, { "epoch": 1.2978262680103274, "grad_norm": 0.546875, "learning_rate": 1.461185145937257e-05, "loss": 4.0151, "step": 3896 }, { "epoch": 1.298159407012576, "grad_norm": 0.51953125, "learning_rate": 1.461159439365146e-05, "loss": 4.0463, "step": 3897 }, { "epoch": 1.2984925460148247, "grad_norm": 0.546875, "learning_rate": 1.461133724509568e-05, "loss": 4.0256, "step": 3898 }, { "epoch": 1.2988256850170734, "grad_norm": 0.4921875, "learning_rate": 1.4611080013708224e-05, "loss": 4.148, "step": 3899 }, { "epoch": 1.2991588240193221, "grad_norm": 0.52734375, "learning_rate": 1.4610822699492087e-05, "loss": 4.0966, "step": 3900 }, { "epoch": 1.2994919630215707, "grad_norm": 0.5078125, "learning_rate": 1.4610565302450271e-05, "loss": 4.1866, "step": 3901 }, { "epoch": 1.2998251020238194, "grad_norm": 0.5078125, "learning_rate": 1.4610307822585768e-05, "loss": 4.0601, "step": 3902 }, { "epoch": 1.3001582410260681, "grad_norm": 0.546875, "learning_rate": 1.4610050259901581e-05, "loss": 4.1166, "step": 3903 }, { "epoch": 1.3004913800283169, "grad_norm": 0.51953125, "learning_rate": 1.4609792614400707e-05, "loss": 4.0728, "step": 3904 }, { "epoch": 1.3008245190305656, "grad_norm": 0.5390625, "learning_rate": 1.460953488608615e-05, "loss": 4.1181, "step": 3905 }, { "epoch": 1.3011576580328141, "grad_norm": 0.51171875, "learning_rate": 1.460927707496091e-05, "loss": 4.0599, "step": 3906 }, { "epoch": 1.3014907970350629, "grad_norm": 0.49609375, "learning_rate": 1.4609019181027992e-05, "loss": 4.1005, "step": 3907 }, { "epoch": 1.3018239360373116, "grad_norm": 0.52734375, "learning_rate": 1.4608761204290398e-05, "loss": 4.0411, "step": 3908 }, { "epoch": 1.3021570750395601, "grad_norm": 0.50390625, "learning_rate": 1.4608503144751133e-05, "loss": 4.0779, "step": 3909 }, { "epoch": 1.3024902140418089, "grad_norm": 0.5234375, "learning_rate": 1.4608245002413204e-05, "loss": 4.0661, "step": 3910 }, { "epoch": 1.3028233530440576, "grad_norm": 0.5078125, "learning_rate": 1.4607986777279618e-05, "loss": 4.044, "step": 3911 }, { "epoch": 1.3031564920463063, "grad_norm": 0.52734375, "learning_rate": 1.460772846935338e-05, "loss": 4.0888, "step": 3912 }, { "epoch": 1.303489631048555, "grad_norm": 0.5, "learning_rate": 1.4607470078637502e-05, "loss": 4.0943, "step": 3913 }, { "epoch": 1.3038227700508038, "grad_norm": 0.53515625, "learning_rate": 1.4607211605134991e-05, "loss": 4.0395, "step": 3914 }, { "epoch": 1.3041559090530523, "grad_norm": 0.515625, "learning_rate": 1.4606953048848862e-05, "loss": 4.0045, "step": 3915 }, { "epoch": 1.304489048055301, "grad_norm": 0.515625, "learning_rate": 1.4606694409782121e-05, "loss": 4.0755, "step": 3916 }, { "epoch": 1.3048221870575498, "grad_norm": 0.51171875, "learning_rate": 1.4606435687937785e-05, "loss": 4.1065, "step": 3917 }, { "epoch": 1.3051553260597983, "grad_norm": 0.5078125, "learning_rate": 1.4606176883318863e-05, "loss": 4.0663, "step": 3918 }, { "epoch": 1.305488465062047, "grad_norm": 0.5234375, "learning_rate": 1.4605917995928375e-05, "loss": 4.0712, "step": 3919 }, { "epoch": 1.3058216040642958, "grad_norm": 0.51953125, "learning_rate": 1.4605659025769332e-05, "loss": 4.048, "step": 3920 }, { "epoch": 1.3061547430665446, "grad_norm": 0.515625, "learning_rate": 1.4605399972844751e-05, "loss": 4.1355, "step": 3921 }, { "epoch": 1.3064878820687933, "grad_norm": 0.51171875, "learning_rate": 1.4605140837157654e-05, "loss": 4.0281, "step": 3922 }, { "epoch": 1.3068210210710418, "grad_norm": 0.546875, "learning_rate": 1.4604881618711055e-05, "loss": 4.1607, "step": 3923 }, { "epoch": 1.3071541600732905, "grad_norm": 0.515625, "learning_rate": 1.4604622317507974e-05, "loss": 4.0787, "step": 3924 }, { "epoch": 1.3074872990755393, "grad_norm": 0.53125, "learning_rate": 1.460436293355143e-05, "loss": 4.0331, "step": 3925 }, { "epoch": 1.307820438077788, "grad_norm": 0.5078125, "learning_rate": 1.4604103466844448e-05, "loss": 4.0722, "step": 3926 }, { "epoch": 1.3081535770800365, "grad_norm": 0.5234375, "learning_rate": 1.4603843917390047e-05, "loss": 4.0534, "step": 3927 }, { "epoch": 1.3084867160822853, "grad_norm": 0.5390625, "learning_rate": 1.460358428519125e-05, "loss": 3.9995, "step": 3928 }, { "epoch": 1.308819855084534, "grad_norm": 0.53515625, "learning_rate": 1.4603324570251085e-05, "loss": 4.0706, "step": 3929 }, { "epoch": 1.3091529940867828, "grad_norm": 0.51953125, "learning_rate": 1.4603064772572574e-05, "loss": 4.0753, "step": 3930 }, { "epoch": 1.3094861330890315, "grad_norm": 0.546875, "learning_rate": 1.460280489215874e-05, "loss": 4.0959, "step": 3931 }, { "epoch": 1.30981927209128, "grad_norm": 0.546875, "learning_rate": 1.4602544929012618e-05, "loss": 4.0851, "step": 3932 }, { "epoch": 1.3101524110935288, "grad_norm": 0.5234375, "learning_rate": 1.460228488313723e-05, "loss": 3.9853, "step": 3933 }, { "epoch": 1.3104855500957775, "grad_norm": 0.51953125, "learning_rate": 1.4602024754535609e-05, "loss": 4.1095, "step": 3934 }, { "epoch": 1.310818689098026, "grad_norm": 0.490234375, "learning_rate": 1.460176454321078e-05, "loss": 4.1018, "step": 3935 }, { "epoch": 1.3111518281002748, "grad_norm": 0.494140625, "learning_rate": 1.4601504249165777e-05, "loss": 4.1011, "step": 3936 }, { "epoch": 1.3114849671025235, "grad_norm": 0.486328125, "learning_rate": 1.4601243872403631e-05, "loss": 4.0693, "step": 3937 }, { "epoch": 1.3118181061047722, "grad_norm": 0.50390625, "learning_rate": 1.4600983412927376e-05, "loss": 4.0882, "step": 3938 }, { "epoch": 1.312151245107021, "grad_norm": 0.5234375, "learning_rate": 1.4600722870740043e-05, "loss": 4.0792, "step": 3939 }, { "epoch": 1.3124843841092697, "grad_norm": 0.5, "learning_rate": 1.460046224584467e-05, "loss": 4.0985, "step": 3940 }, { "epoch": 1.3128175231115182, "grad_norm": 0.50390625, "learning_rate": 1.4600201538244292e-05, "loss": 4.1341, "step": 3941 }, { "epoch": 1.313150662113767, "grad_norm": 0.5390625, "learning_rate": 1.4599940747941944e-05, "loss": 4.0038, "step": 3942 }, { "epoch": 1.3134838011160157, "grad_norm": 0.5234375, "learning_rate": 1.4599679874940666e-05, "loss": 4.0646, "step": 3943 }, { "epoch": 1.3138169401182642, "grad_norm": 0.53125, "learning_rate": 1.4599418919243496e-05, "loss": 4.0185, "step": 3944 }, { "epoch": 1.314150079120513, "grad_norm": 0.5390625, "learning_rate": 1.459915788085347e-05, "loss": 4.08, "step": 3945 }, { "epoch": 1.3144832181227617, "grad_norm": 0.51171875, "learning_rate": 1.4598896759773633e-05, "loss": 3.9989, "step": 3946 }, { "epoch": 1.3148163571250104, "grad_norm": 0.53515625, "learning_rate": 1.4598635556007025e-05, "loss": 4.0855, "step": 3947 }, { "epoch": 1.3151494961272592, "grad_norm": 0.51953125, "learning_rate": 1.4598374269556688e-05, "loss": 4.1122, "step": 3948 }, { "epoch": 1.315482635129508, "grad_norm": 0.5234375, "learning_rate": 1.4598112900425667e-05, "loss": 4.0517, "step": 3949 }, { "epoch": 1.3158157741317564, "grad_norm": 0.50390625, "learning_rate": 1.4597851448617003e-05, "loss": 4.1005, "step": 3950 }, { "epoch": 1.3161489131340052, "grad_norm": 0.51171875, "learning_rate": 1.4597589914133746e-05, "loss": 4.106, "step": 3951 }, { "epoch": 1.316482052136254, "grad_norm": 0.55078125, "learning_rate": 1.4597328296978938e-05, "loss": 4.0556, "step": 3952 }, { "epoch": 1.3168151911385024, "grad_norm": 0.53515625, "learning_rate": 1.4597066597155629e-05, "loss": 4.092, "step": 3953 }, { "epoch": 1.3171483301407512, "grad_norm": 0.515625, "learning_rate": 1.4596804814666866e-05, "loss": 4.1358, "step": 3954 }, { "epoch": 1.317481469143, "grad_norm": 0.53125, "learning_rate": 1.4596542949515699e-05, "loss": 4.0652, "step": 3955 }, { "epoch": 1.3178146081452486, "grad_norm": 0.5234375, "learning_rate": 1.4596281001705178e-05, "loss": 4.1294, "step": 3956 }, { "epoch": 1.3181477471474974, "grad_norm": 0.5, "learning_rate": 1.4596018971238353e-05, "loss": 4.1326, "step": 3957 }, { "epoch": 1.318480886149746, "grad_norm": 0.52734375, "learning_rate": 1.4595756858118277e-05, "loss": 4.0338, "step": 3958 }, { "epoch": 1.3188140251519946, "grad_norm": 0.50390625, "learning_rate": 1.4595494662348004e-05, "loss": 4.1285, "step": 3959 }, { "epoch": 1.3191471641542434, "grad_norm": 0.5234375, "learning_rate": 1.4595232383930587e-05, "loss": 4.0626, "step": 3960 }, { "epoch": 1.3194803031564921, "grad_norm": 0.50390625, "learning_rate": 1.459497002286908e-05, "loss": 4.1002, "step": 3961 }, { "epoch": 1.3198134421587406, "grad_norm": 0.5390625, "learning_rate": 1.4594707579166538e-05, "loss": 4.0942, "step": 3962 }, { "epoch": 1.3201465811609894, "grad_norm": 0.5078125, "learning_rate": 1.4594445052826025e-05, "loss": 4.0797, "step": 3963 }, { "epoch": 1.3204797201632381, "grad_norm": 0.546875, "learning_rate": 1.459418244385059e-05, "loss": 4.0834, "step": 3964 }, { "epoch": 1.3208128591654869, "grad_norm": 0.515625, "learning_rate": 1.4593919752243296e-05, "loss": 4.1424, "step": 3965 }, { "epoch": 1.3211459981677356, "grad_norm": 0.51171875, "learning_rate": 1.45936569780072e-05, "loss": 4.0562, "step": 3966 }, { "epoch": 1.321479137169984, "grad_norm": 0.53125, "learning_rate": 1.4593394121145368e-05, "loss": 4.0622, "step": 3967 }, { "epoch": 1.3218122761722328, "grad_norm": 0.51171875, "learning_rate": 1.459313118166086e-05, "loss": 4.0628, "step": 3968 }, { "epoch": 1.3221454151744816, "grad_norm": 0.53125, "learning_rate": 1.4592868159556736e-05, "loss": 4.1065, "step": 3969 }, { "epoch": 1.32247855417673, "grad_norm": 0.55859375, "learning_rate": 1.459260505483606e-05, "loss": 4.0315, "step": 3970 }, { "epoch": 1.3228116931789788, "grad_norm": 0.5390625, "learning_rate": 1.45923418675019e-05, "loss": 4.0962, "step": 3971 }, { "epoch": 1.3231448321812276, "grad_norm": 0.498046875, "learning_rate": 1.4592078597557316e-05, "loss": 4.1184, "step": 3972 }, { "epoch": 1.3234779711834763, "grad_norm": 0.53515625, "learning_rate": 1.4591815245005379e-05, "loss": 4.0718, "step": 3973 }, { "epoch": 1.323811110185725, "grad_norm": 0.5, "learning_rate": 1.4591551809849157e-05, "loss": 4.0496, "step": 3974 }, { "epoch": 1.3241442491879738, "grad_norm": 0.51171875, "learning_rate": 1.4591288292091715e-05, "loss": 4.1182, "step": 3975 }, { "epoch": 1.3244773881902223, "grad_norm": 0.51953125, "learning_rate": 1.4591024691736125e-05, "loss": 4.055, "step": 3976 }, { "epoch": 1.324810527192471, "grad_norm": 0.5, "learning_rate": 1.4590761008785454e-05, "loss": 4.0482, "step": 3977 }, { "epoch": 1.3251436661947198, "grad_norm": 0.52734375, "learning_rate": 1.4590497243242777e-05, "loss": 4.0265, "step": 3978 }, { "epoch": 1.3254768051969683, "grad_norm": 0.52734375, "learning_rate": 1.4590233395111166e-05, "loss": 4.0878, "step": 3979 }, { "epoch": 1.325809944199217, "grad_norm": 0.52734375, "learning_rate": 1.4589969464393695e-05, "loss": 4.0631, "step": 3980 }, { "epoch": 1.3261430832014658, "grad_norm": 0.51953125, "learning_rate": 1.4589705451093434e-05, "loss": 4.0452, "step": 3981 }, { "epoch": 1.3264762222037145, "grad_norm": 0.51953125, "learning_rate": 1.4589441355213461e-05, "loss": 4.1195, "step": 3982 }, { "epoch": 1.3268093612059633, "grad_norm": 0.5234375, "learning_rate": 1.4589177176756853e-05, "loss": 4.1003, "step": 3983 }, { "epoch": 1.327142500208212, "grad_norm": 0.5234375, "learning_rate": 1.4588912915726687e-05, "loss": 4.0179, "step": 3984 }, { "epoch": 1.3274756392104605, "grad_norm": 0.53125, "learning_rate": 1.4588648572126038e-05, "loss": 4.0559, "step": 3985 }, { "epoch": 1.3278087782127093, "grad_norm": 0.54296875, "learning_rate": 1.4588384145957985e-05, "loss": 4.036, "step": 3986 }, { "epoch": 1.328141917214958, "grad_norm": 0.53125, "learning_rate": 1.4588119637225616e-05, "loss": 4.1078, "step": 3987 }, { "epoch": 1.3284750562172065, "grad_norm": 0.5, "learning_rate": 1.4587855045932e-05, "loss": 4.068, "step": 3988 }, { "epoch": 1.3288081952194553, "grad_norm": 0.5234375, "learning_rate": 1.4587590372080229e-05, "loss": 4.0909, "step": 3989 }, { "epoch": 1.329141334221704, "grad_norm": 0.53125, "learning_rate": 1.458732561567338e-05, "loss": 4.04, "step": 3990 }, { "epoch": 1.3294744732239527, "grad_norm": 0.51953125, "learning_rate": 1.458706077671454e-05, "loss": 4.0243, "step": 3991 }, { "epoch": 1.3298076122262015, "grad_norm": 0.51953125, "learning_rate": 1.4586795855206792e-05, "loss": 4.0229, "step": 3992 }, { "epoch": 1.33014075122845, "grad_norm": 0.53515625, "learning_rate": 1.4586530851153223e-05, "loss": 4.0645, "step": 3993 }, { "epoch": 1.3304738902306987, "grad_norm": 0.50390625, "learning_rate": 1.4586265764556918e-05, "loss": 4.0638, "step": 3994 }, { "epoch": 1.3308070292329475, "grad_norm": 0.53125, "learning_rate": 1.4586000595420966e-05, "loss": 4.0635, "step": 3995 }, { "epoch": 1.3311401682351962, "grad_norm": 0.48828125, "learning_rate": 1.4585735343748456e-05, "loss": 4.1071, "step": 3996 }, { "epoch": 1.3314733072374447, "grad_norm": 0.51171875, "learning_rate": 1.4585470009542476e-05, "loss": 4.1153, "step": 3997 }, { "epoch": 1.3318064462396935, "grad_norm": 0.51953125, "learning_rate": 1.4585204592806116e-05, "loss": 4.0725, "step": 3998 }, { "epoch": 1.3321395852419422, "grad_norm": 0.53125, "learning_rate": 1.4584939093542471e-05, "loss": 4.1153, "step": 3999 }, { "epoch": 1.332472724244191, "grad_norm": 0.52734375, "learning_rate": 1.458467351175463e-05, "loss": 4.1249, "step": 4000 }, { "epoch": 1.3328058632464397, "grad_norm": 0.498046875, "learning_rate": 1.458440784744569e-05, "loss": 4.0622, "step": 4001 }, { "epoch": 1.3331390022486882, "grad_norm": 0.5, "learning_rate": 1.4584142100618744e-05, "loss": 4.179, "step": 4002 }, { "epoch": 1.333472141250937, "grad_norm": 0.51171875, "learning_rate": 1.4583876271276884e-05, "loss": 4.0944, "step": 4003 }, { "epoch": 1.3338052802531857, "grad_norm": 0.515625, "learning_rate": 1.458361035942321e-05, "loss": 4.1169, "step": 4004 }, { "epoch": 1.3341384192554342, "grad_norm": 0.515625, "learning_rate": 1.4583344365060821e-05, "loss": 4.073, "step": 4005 }, { "epoch": 1.334471558257683, "grad_norm": 0.5078125, "learning_rate": 1.4583078288192809e-05, "loss": 4.1087, "step": 4006 }, { "epoch": 1.3348046972599317, "grad_norm": 0.52734375, "learning_rate": 1.4582812128822278e-05, "loss": 4.0006, "step": 4007 }, { "epoch": 1.3351378362621804, "grad_norm": 0.5390625, "learning_rate": 1.4582545886952327e-05, "loss": 4.0122, "step": 4008 }, { "epoch": 1.3354709752644291, "grad_norm": 0.55078125, "learning_rate": 1.4582279562586056e-05, "loss": 3.9971, "step": 4009 }, { "epoch": 1.3358041142666779, "grad_norm": 0.51171875, "learning_rate": 1.4582013155726571e-05, "loss": 4.1407, "step": 4010 }, { "epoch": 1.3361372532689264, "grad_norm": 0.50390625, "learning_rate": 1.4581746666376971e-05, "loss": 4.0637, "step": 4011 }, { "epoch": 1.3364703922711751, "grad_norm": 0.51171875, "learning_rate": 1.4581480094540362e-05, "loss": 4.1433, "step": 4012 }, { "epoch": 1.3368035312734239, "grad_norm": 0.50390625, "learning_rate": 1.4581213440219849e-05, "loss": 4.0926, "step": 4013 }, { "epoch": 1.3371366702756724, "grad_norm": 0.484375, "learning_rate": 1.4580946703418535e-05, "loss": 4.0555, "step": 4014 }, { "epoch": 1.3374698092779211, "grad_norm": 0.51171875, "learning_rate": 1.4580679884139531e-05, "loss": 4.1295, "step": 4015 }, { "epoch": 1.3378029482801699, "grad_norm": 0.490234375, "learning_rate": 1.4580412982385942e-05, "loss": 4.0428, "step": 4016 }, { "epoch": 1.3381360872824186, "grad_norm": 0.53515625, "learning_rate": 1.4580145998160878e-05, "loss": 4.0737, "step": 4017 }, { "epoch": 1.3384692262846674, "grad_norm": 0.49609375, "learning_rate": 1.4579878931467447e-05, "loss": 4.1055, "step": 4018 }, { "epoch": 1.338802365286916, "grad_norm": 0.52734375, "learning_rate": 1.4579611782308766e-05, "loss": 4.0913, "step": 4019 }, { "epoch": 1.3391355042891646, "grad_norm": 0.52734375, "learning_rate": 1.4579344550687937e-05, "loss": 4.0552, "step": 4020 }, { "epoch": 1.3394686432914134, "grad_norm": 0.498046875, "learning_rate": 1.4579077236608082e-05, "loss": 4.0905, "step": 4021 }, { "epoch": 1.339801782293662, "grad_norm": 0.515625, "learning_rate": 1.457880984007231e-05, "loss": 4.046, "step": 4022 }, { "epoch": 1.3401349212959106, "grad_norm": 0.53125, "learning_rate": 1.4578542361083734e-05, "loss": 4.0906, "step": 4023 }, { "epoch": 1.3404680602981593, "grad_norm": 0.53515625, "learning_rate": 1.4578274799645472e-05, "loss": 4.0722, "step": 4024 }, { "epoch": 1.340801199300408, "grad_norm": 0.52734375, "learning_rate": 1.457800715576064e-05, "loss": 4.0963, "step": 4025 }, { "epoch": 1.3411343383026568, "grad_norm": 0.515625, "learning_rate": 1.4577739429432358e-05, "loss": 4.1593, "step": 4026 }, { "epoch": 1.3414674773049056, "grad_norm": 0.50390625, "learning_rate": 1.4577471620663739e-05, "loss": 4.1034, "step": 4027 }, { "epoch": 1.341800616307154, "grad_norm": 0.52734375, "learning_rate": 1.4577203729457908e-05, "loss": 4.1086, "step": 4028 }, { "epoch": 1.3421337553094028, "grad_norm": 0.52734375, "learning_rate": 1.457693575581798e-05, "loss": 4.0279, "step": 4029 }, { "epoch": 1.3424668943116516, "grad_norm": 0.51171875, "learning_rate": 1.457666769974708e-05, "loss": 4.086, "step": 4030 }, { "epoch": 1.3428000333139003, "grad_norm": 0.53515625, "learning_rate": 1.457639956124833e-05, "loss": 4.0762, "step": 4031 }, { "epoch": 1.3431331723161488, "grad_norm": 0.51953125, "learning_rate": 1.4576131340324852e-05, "loss": 4.1037, "step": 4032 }, { "epoch": 1.3434663113183976, "grad_norm": 0.5078125, "learning_rate": 1.4575863036979772e-05, "loss": 4.1203, "step": 4033 }, { "epoch": 1.3437994503206463, "grad_norm": 0.5234375, "learning_rate": 1.4575594651216211e-05, "loss": 4.0135, "step": 4034 }, { "epoch": 1.344132589322895, "grad_norm": 0.5, "learning_rate": 1.4575326183037301e-05, "loss": 4.0909, "step": 4035 }, { "epoch": 1.3444657283251438, "grad_norm": 0.5234375, "learning_rate": 1.4575057632446165e-05, "loss": 4.0214, "step": 4036 }, { "epoch": 1.3447988673273923, "grad_norm": 0.498046875, "learning_rate": 1.4574788999445932e-05, "loss": 4.1097, "step": 4037 }, { "epoch": 1.345132006329641, "grad_norm": 0.515625, "learning_rate": 1.4574520284039731e-05, "loss": 4.054, "step": 4038 }, { "epoch": 1.3454651453318898, "grad_norm": 0.5234375, "learning_rate": 1.4574251486230694e-05, "loss": 4.0887, "step": 4039 }, { "epoch": 1.3457982843341383, "grad_norm": 0.52734375, "learning_rate": 1.4573982606021947e-05, "loss": 4.1199, "step": 4040 }, { "epoch": 1.346131423336387, "grad_norm": 0.54296875, "learning_rate": 1.4573713643416626e-05, "loss": 4.0158, "step": 4041 }, { "epoch": 1.3464645623386358, "grad_norm": 0.546875, "learning_rate": 1.4573444598417865e-05, "loss": 4.0383, "step": 4042 }, { "epoch": 1.3467977013408845, "grad_norm": 0.515625, "learning_rate": 1.457317547102879e-05, "loss": 4.0294, "step": 4043 }, { "epoch": 1.3471308403431332, "grad_norm": 0.5078125, "learning_rate": 1.4572906261252546e-05, "loss": 4.0454, "step": 4044 }, { "epoch": 1.347463979345382, "grad_norm": 0.5078125, "learning_rate": 1.4572636969092262e-05, "loss": 4.0836, "step": 4045 }, { "epoch": 1.3477971183476305, "grad_norm": 0.490234375, "learning_rate": 1.457236759455108e-05, "loss": 4.0403, "step": 4046 }, { "epoch": 1.3481302573498792, "grad_norm": 0.53125, "learning_rate": 1.457209813763213e-05, "loss": 4.0343, "step": 4047 }, { "epoch": 1.348463396352128, "grad_norm": 0.51953125, "learning_rate": 1.4571828598338556e-05, "loss": 4.0499, "step": 4048 }, { "epoch": 1.3487965353543765, "grad_norm": 0.52734375, "learning_rate": 1.4571558976673496e-05, "loss": 4.0233, "step": 4049 }, { "epoch": 1.3491296743566252, "grad_norm": 0.515625, "learning_rate": 1.4571289272640092e-05, "loss": 4.1205, "step": 4050 }, { "epoch": 1.349462813358874, "grad_norm": 0.5390625, "learning_rate": 1.4571019486241485e-05, "loss": 4.0256, "step": 4051 }, { "epoch": 1.3497959523611227, "grad_norm": 0.5078125, "learning_rate": 1.4570749617480817e-05, "loss": 4.1454, "step": 4052 }, { "epoch": 1.3501290913633714, "grad_norm": 0.5, "learning_rate": 1.4570479666361228e-05, "loss": 4.09, "step": 4053 }, { "epoch": 1.3504622303656202, "grad_norm": 0.51953125, "learning_rate": 1.4570209632885869e-05, "loss": 4.0994, "step": 4054 }, { "epoch": 1.3507953693678687, "grad_norm": 0.53515625, "learning_rate": 1.456993951705788e-05, "loss": 4.1035, "step": 4055 }, { "epoch": 1.3511285083701174, "grad_norm": 0.50390625, "learning_rate": 1.4569669318880407e-05, "loss": 4.0769, "step": 4056 }, { "epoch": 1.3514616473723662, "grad_norm": 0.51171875, "learning_rate": 1.4569399038356602e-05, "loss": 4.1112, "step": 4057 }, { "epoch": 1.3517947863746147, "grad_norm": 0.50390625, "learning_rate": 1.4569128675489611e-05, "loss": 4.0649, "step": 4058 }, { "epoch": 1.3521279253768634, "grad_norm": 0.515625, "learning_rate": 1.4568858230282582e-05, "loss": 4.0827, "step": 4059 }, { "epoch": 1.3524610643791122, "grad_norm": 0.51953125, "learning_rate": 1.4568587702738666e-05, "loss": 4.0819, "step": 4060 }, { "epoch": 1.352794203381361, "grad_norm": 0.53125, "learning_rate": 1.4568317092861014e-05, "loss": 4.0325, "step": 4061 }, { "epoch": 1.3531273423836097, "grad_norm": 0.515625, "learning_rate": 1.4568046400652775e-05, "loss": 4.1338, "step": 4062 }, { "epoch": 1.3534604813858582, "grad_norm": 0.515625, "learning_rate": 1.4567775626117108e-05, "loss": 4.0407, "step": 4063 }, { "epoch": 1.353793620388107, "grad_norm": 0.5, "learning_rate": 1.4567504769257162e-05, "loss": 4.1509, "step": 4064 }, { "epoch": 1.3541267593903556, "grad_norm": 0.51953125, "learning_rate": 1.4567233830076095e-05, "loss": 4.0876, "step": 4065 }, { "epoch": 1.3544598983926044, "grad_norm": 0.51953125, "learning_rate": 1.4566962808577061e-05, "loss": 4.0962, "step": 4066 }, { "epoch": 1.354793037394853, "grad_norm": 0.5234375, "learning_rate": 1.4566691704763216e-05, "loss": 4.05, "step": 4067 }, { "epoch": 1.3551261763971016, "grad_norm": 0.486328125, "learning_rate": 1.456642051863772e-05, "loss": 4.1926, "step": 4068 }, { "epoch": 1.3554593153993504, "grad_norm": 0.515625, "learning_rate": 1.456614925020373e-05, "loss": 4.165, "step": 4069 }, { "epoch": 1.3557924544015991, "grad_norm": 0.51953125, "learning_rate": 1.456587789946441e-05, "loss": 4.1406, "step": 4070 }, { "epoch": 1.3561255934038479, "grad_norm": 0.51953125, "learning_rate": 1.4565606466422912e-05, "loss": 4.0479, "step": 4071 }, { "epoch": 1.3564587324060964, "grad_norm": 0.52734375, "learning_rate": 1.4565334951082407e-05, "loss": 4.0392, "step": 4072 }, { "epoch": 1.3567918714083451, "grad_norm": 0.5, "learning_rate": 1.456506335344605e-05, "loss": 4.0691, "step": 4073 }, { "epoch": 1.3571250104105939, "grad_norm": 0.52734375, "learning_rate": 1.456479167351701e-05, "loss": 4.0269, "step": 4074 }, { "epoch": 1.3574581494128424, "grad_norm": 0.52734375, "learning_rate": 1.456451991129845e-05, "loss": 4.0379, "step": 4075 }, { "epoch": 1.3577912884150911, "grad_norm": 0.5234375, "learning_rate": 1.4564248066793533e-05, "loss": 4.0635, "step": 4076 }, { "epoch": 1.3581244274173399, "grad_norm": 0.51171875, "learning_rate": 1.4563976140005428e-05, "loss": 4.1291, "step": 4077 }, { "epoch": 1.3584575664195886, "grad_norm": 0.50390625, "learning_rate": 1.4563704130937302e-05, "loss": 4.0957, "step": 4078 }, { "epoch": 1.3587907054218373, "grad_norm": 0.515625, "learning_rate": 1.456343203959232e-05, "loss": 4.1368, "step": 4079 }, { "epoch": 1.359123844424086, "grad_norm": 0.546875, "learning_rate": 1.4563159865973657e-05, "loss": 4.1147, "step": 4080 }, { "epoch": 1.3594569834263346, "grad_norm": 0.50390625, "learning_rate": 1.4562887610084481e-05, "loss": 4.0613, "step": 4081 }, { "epoch": 1.3597901224285833, "grad_norm": 0.51953125, "learning_rate": 1.456261527192796e-05, "loss": 4.0709, "step": 4082 }, { "epoch": 1.360123261430832, "grad_norm": 0.53125, "learning_rate": 1.456234285150727e-05, "loss": 4.0596, "step": 4083 }, { "epoch": 1.3604564004330806, "grad_norm": 0.52734375, "learning_rate": 1.4562070348825581e-05, "loss": 4.0495, "step": 4084 }, { "epoch": 1.3607895394353293, "grad_norm": 0.53125, "learning_rate": 1.4561797763886072e-05, "loss": 4.0268, "step": 4085 }, { "epoch": 1.361122678437578, "grad_norm": 0.50390625, "learning_rate": 1.4561525096691914e-05, "loss": 4.1372, "step": 4086 }, { "epoch": 1.3614558174398268, "grad_norm": 0.515625, "learning_rate": 1.456125234724628e-05, "loss": 4.1381, "step": 4087 }, { "epoch": 1.3617889564420755, "grad_norm": 0.474609375, "learning_rate": 1.4560979515552356e-05, "loss": 4.1005, "step": 4088 }, { "epoch": 1.3621220954443243, "grad_norm": 0.52734375, "learning_rate": 1.4560706601613314e-05, "loss": 4.0287, "step": 4089 }, { "epoch": 1.3624552344465728, "grad_norm": 0.50390625, "learning_rate": 1.4560433605432331e-05, "loss": 4.0834, "step": 4090 }, { "epoch": 1.3627883734488215, "grad_norm": 0.4921875, "learning_rate": 1.4560160527012589e-05, "loss": 4.032, "step": 4091 }, { "epoch": 1.3631215124510703, "grad_norm": 0.490234375, "learning_rate": 1.4559887366357273e-05, "loss": 4.1444, "step": 4092 }, { "epoch": 1.3634546514533188, "grad_norm": 0.5078125, "learning_rate": 1.4559614123469557e-05, "loss": 4.1209, "step": 4093 }, { "epoch": 1.3637877904555675, "grad_norm": 0.50390625, "learning_rate": 1.455934079835263e-05, "loss": 4.1094, "step": 4094 }, { "epoch": 1.3641209294578163, "grad_norm": 0.52734375, "learning_rate": 1.4559067391009672e-05, "loss": 4.0968, "step": 4095 }, { "epoch": 1.364454068460065, "grad_norm": 0.5234375, "learning_rate": 1.4558793901443868e-05, "loss": 4.0802, "step": 4096 }, { "epoch": 1.3647872074623137, "grad_norm": 0.5, "learning_rate": 1.4558520329658405e-05, "loss": 4.0863, "step": 4097 }, { "epoch": 1.3651203464645623, "grad_norm": 0.5234375, "learning_rate": 1.455824667565647e-05, "loss": 4.0542, "step": 4098 }, { "epoch": 1.365453485466811, "grad_norm": 0.5234375, "learning_rate": 1.4557972939441249e-05, "loss": 4.1048, "step": 4099 }, { "epoch": 1.3657866244690597, "grad_norm": 0.5234375, "learning_rate": 1.4557699121015931e-05, "loss": 4.132, "step": 4100 }, { "epoch": 1.3661197634713085, "grad_norm": 0.53125, "learning_rate": 1.4557425220383704e-05, "loss": 4.0487, "step": 4101 }, { "epoch": 1.366452902473557, "grad_norm": 0.494140625, "learning_rate": 1.455715123754776e-05, "loss": 4.1202, "step": 4102 }, { "epoch": 1.3667860414758057, "grad_norm": 0.52734375, "learning_rate": 1.455687717251129e-05, "loss": 4.1122, "step": 4103 }, { "epoch": 1.3671191804780545, "grad_norm": 0.53515625, "learning_rate": 1.4556603025277486e-05, "loss": 4.0498, "step": 4104 }, { "epoch": 1.3674523194803032, "grad_norm": 0.52734375, "learning_rate": 1.4556328795849543e-05, "loss": 4.0561, "step": 4105 }, { "epoch": 1.367785458482552, "grad_norm": 0.50390625, "learning_rate": 1.4556054484230652e-05, "loss": 4.0614, "step": 4106 }, { "epoch": 1.3681185974848005, "grad_norm": 0.51171875, "learning_rate": 1.455578009042401e-05, "loss": 4.0942, "step": 4107 }, { "epoch": 1.3684517364870492, "grad_norm": 0.52734375, "learning_rate": 1.4555505614432811e-05, "loss": 4.0675, "step": 4108 }, { "epoch": 1.368784875489298, "grad_norm": 0.5, "learning_rate": 1.4555231056260256e-05, "loss": 4.0952, "step": 4109 }, { "epoch": 1.3691180144915465, "grad_norm": 0.51171875, "learning_rate": 1.455495641590954e-05, "loss": 4.0721, "step": 4110 }, { "epoch": 1.3694511534937952, "grad_norm": 0.53515625, "learning_rate": 1.4554681693383864e-05, "loss": 4.1041, "step": 4111 }, { "epoch": 1.369784292496044, "grad_norm": 0.515625, "learning_rate": 1.4554406888686423e-05, "loss": 4.0674, "step": 4112 }, { "epoch": 1.3701174314982927, "grad_norm": 0.515625, "learning_rate": 1.4554132001820426e-05, "loss": 4.0392, "step": 4113 }, { "epoch": 1.3704505705005414, "grad_norm": 0.5078125, "learning_rate": 1.4553857032789068e-05, "loss": 4.0857, "step": 4114 }, { "epoch": 1.3707837095027902, "grad_norm": 0.5078125, "learning_rate": 1.4553581981595554e-05, "loss": 4.0987, "step": 4115 }, { "epoch": 1.3711168485050387, "grad_norm": 0.494140625, "learning_rate": 1.455330684824309e-05, "loss": 4.026, "step": 4116 }, { "epoch": 1.3714499875072874, "grad_norm": 0.53515625, "learning_rate": 1.4553031632734876e-05, "loss": 4.0501, "step": 4117 }, { "epoch": 1.3717831265095362, "grad_norm": 0.50390625, "learning_rate": 1.4552756335074123e-05, "loss": 4.0381, "step": 4118 }, { "epoch": 1.3721162655117847, "grad_norm": 0.53125, "learning_rate": 1.4552480955264033e-05, "loss": 4.0763, "step": 4119 }, { "epoch": 1.3724494045140334, "grad_norm": 0.51171875, "learning_rate": 1.4552205493307816e-05, "loss": 4.0878, "step": 4120 }, { "epoch": 1.3727825435162821, "grad_norm": 0.515625, "learning_rate": 1.4551929949208678e-05, "loss": 4.0623, "step": 4121 }, { "epoch": 1.3731156825185309, "grad_norm": 0.51171875, "learning_rate": 1.4551654322969835e-05, "loss": 4.0989, "step": 4122 }, { "epoch": 1.3734488215207796, "grad_norm": 0.50390625, "learning_rate": 1.455137861459449e-05, "loss": 4.1636, "step": 4123 }, { "epoch": 1.3737819605230284, "grad_norm": 0.52734375, "learning_rate": 1.4551102824085858e-05, "loss": 4.0206, "step": 4124 }, { "epoch": 1.3741150995252769, "grad_norm": 0.515625, "learning_rate": 1.455082695144715e-05, "loss": 4.1219, "step": 4125 }, { "epoch": 1.3744482385275256, "grad_norm": 0.51953125, "learning_rate": 1.4550550996681581e-05, "loss": 4.0517, "step": 4126 }, { "epoch": 1.3747813775297744, "grad_norm": 0.51953125, "learning_rate": 1.4550274959792364e-05, "loss": 3.9654, "step": 4127 }, { "epoch": 1.3751145165320229, "grad_norm": 0.4921875, "learning_rate": 1.4549998840782716e-05, "loss": 4.0228, "step": 4128 }, { "epoch": 1.3754476555342716, "grad_norm": 0.53125, "learning_rate": 1.4549722639655852e-05, "loss": 4.0783, "step": 4129 }, { "epoch": 1.3757807945365204, "grad_norm": 0.494140625, "learning_rate": 1.4549446356414985e-05, "loss": 4.0543, "step": 4130 }, { "epoch": 1.376113933538769, "grad_norm": 0.53515625, "learning_rate": 1.454916999106334e-05, "loss": 4.0108, "step": 4131 }, { "epoch": 1.3764470725410178, "grad_norm": 0.53515625, "learning_rate": 1.4548893543604132e-05, "loss": 4.0512, "step": 4132 }, { "epoch": 1.3767802115432664, "grad_norm": 0.5, "learning_rate": 1.4548617014040584e-05, "loss": 4.0573, "step": 4133 }, { "epoch": 1.377113350545515, "grad_norm": 0.515625, "learning_rate": 1.4548340402375914e-05, "loss": 4.095, "step": 4134 }, { "epoch": 1.3774464895477638, "grad_norm": 0.53515625, "learning_rate": 1.4548063708613345e-05, "loss": 4.1028, "step": 4135 }, { "epoch": 1.3777796285500126, "grad_norm": 0.53515625, "learning_rate": 1.4547786932756098e-05, "loss": 4.1025, "step": 4136 }, { "epoch": 1.378112767552261, "grad_norm": 0.5078125, "learning_rate": 1.4547510074807402e-05, "loss": 4.0835, "step": 4137 }, { "epoch": 1.3784459065545098, "grad_norm": 0.53125, "learning_rate": 1.4547233134770476e-05, "loss": 4.0891, "step": 4138 }, { "epoch": 1.3787790455567586, "grad_norm": 0.515625, "learning_rate": 1.4546956112648551e-05, "loss": 4.1146, "step": 4139 }, { "epoch": 1.3791121845590073, "grad_norm": 0.55859375, "learning_rate": 1.4546679008444847e-05, "loss": 4.0428, "step": 4140 }, { "epoch": 1.379445323561256, "grad_norm": 0.546875, "learning_rate": 1.45464018221626e-05, "loss": 4.0335, "step": 4141 }, { "epoch": 1.3797784625635046, "grad_norm": 0.51171875, "learning_rate": 1.454612455380503e-05, "loss": 4.0853, "step": 4142 }, { "epoch": 1.3801116015657533, "grad_norm": 0.51171875, "learning_rate": 1.4545847203375374e-05, "loss": 4.0961, "step": 4143 }, { "epoch": 1.380444740568002, "grad_norm": 0.5390625, "learning_rate": 1.4545569770876856e-05, "loss": 4.03, "step": 4144 }, { "epoch": 1.3807778795702506, "grad_norm": 0.52734375, "learning_rate": 1.4545292256312715e-05, "loss": 4.1069, "step": 4145 }, { "epoch": 1.3811110185724993, "grad_norm": 0.53125, "learning_rate": 1.4545014659686177e-05, "loss": 4.116, "step": 4146 }, { "epoch": 1.381444157574748, "grad_norm": 0.53125, "learning_rate": 1.4544736981000478e-05, "loss": 4.0731, "step": 4147 }, { "epoch": 1.3817772965769968, "grad_norm": 0.54296875, "learning_rate": 1.4544459220258852e-05, "loss": 3.9874, "step": 4148 }, { "epoch": 1.3821104355792455, "grad_norm": 0.51171875, "learning_rate": 1.4544181377464534e-05, "loss": 4.1183, "step": 4149 }, { "epoch": 1.3824435745814942, "grad_norm": 0.5078125, "learning_rate": 1.4543903452620763e-05, "loss": 4.0893, "step": 4150 }, { "epoch": 1.3827767135837428, "grad_norm": 0.51953125, "learning_rate": 1.454362544573077e-05, "loss": 4.099, "step": 4151 }, { "epoch": 1.3831098525859915, "grad_norm": 0.54296875, "learning_rate": 1.45433473567978e-05, "loss": 4.0495, "step": 4152 }, { "epoch": 1.3834429915882402, "grad_norm": 0.51171875, "learning_rate": 1.4543069185825086e-05, "loss": 4.0654, "step": 4153 }, { "epoch": 1.3837761305904888, "grad_norm": 0.50390625, "learning_rate": 1.4542790932815875e-05, "loss": 4.128, "step": 4154 }, { "epoch": 1.3841092695927375, "grad_norm": 0.50390625, "learning_rate": 1.4542512597773402e-05, "loss": 4.1149, "step": 4155 }, { "epoch": 1.3844424085949862, "grad_norm": 0.52734375, "learning_rate": 1.4542234180700912e-05, "loss": 4.02, "step": 4156 }, { "epoch": 1.384775547597235, "grad_norm": 0.5625, "learning_rate": 1.4541955681601647e-05, "loss": 4.036, "step": 4157 }, { "epoch": 1.3851086865994837, "grad_norm": 0.5234375, "learning_rate": 1.4541677100478854e-05, "loss": 4.0475, "step": 4158 }, { "epoch": 1.3854418256017325, "grad_norm": 0.5078125, "learning_rate": 1.4541398437335771e-05, "loss": 4.0754, "step": 4159 }, { "epoch": 1.385774964603981, "grad_norm": 0.51953125, "learning_rate": 1.4541119692175649e-05, "loss": 4.1126, "step": 4160 }, { "epoch": 1.3861081036062297, "grad_norm": 0.5234375, "learning_rate": 1.4540840865001732e-05, "loss": 3.9967, "step": 4161 }, { "epoch": 1.3864412426084785, "grad_norm": 0.52734375, "learning_rate": 1.4540561955817273e-05, "loss": 4.1354, "step": 4162 }, { "epoch": 1.386774381610727, "grad_norm": 0.56640625, "learning_rate": 1.4540282964625516e-05, "loss": 4.1082, "step": 4163 }, { "epoch": 1.3871075206129757, "grad_norm": 0.5234375, "learning_rate": 1.454000389142971e-05, "loss": 4.0926, "step": 4164 }, { "epoch": 1.3874406596152244, "grad_norm": 0.53515625, "learning_rate": 1.4539724736233108e-05, "loss": 4.0393, "step": 4165 }, { "epoch": 1.3877737986174732, "grad_norm": 0.51171875, "learning_rate": 1.4539445499038961e-05, "loss": 4.0323, "step": 4166 }, { "epoch": 1.388106937619722, "grad_norm": 0.515625, "learning_rate": 1.4539166179850521e-05, "loss": 4.1362, "step": 4167 }, { "epoch": 1.3884400766219704, "grad_norm": 0.51953125, "learning_rate": 1.4538886778671041e-05, "loss": 4.0874, "step": 4168 }, { "epoch": 1.3887732156242192, "grad_norm": 0.51953125, "learning_rate": 1.453860729550378e-05, "loss": 4.0921, "step": 4169 }, { "epoch": 1.389106354626468, "grad_norm": 0.5, "learning_rate": 1.4538327730351987e-05, "loss": 4.0944, "step": 4170 }, { "epoch": 1.3894394936287167, "grad_norm": 0.50390625, "learning_rate": 1.4538048083218919e-05, "loss": 4.0873, "step": 4171 }, { "epoch": 1.3897726326309652, "grad_norm": 0.5078125, "learning_rate": 1.4537768354107838e-05, "loss": 4.1131, "step": 4172 }, { "epoch": 1.390105771633214, "grad_norm": 0.5078125, "learning_rate": 1.4537488543021999e-05, "loss": 4.0945, "step": 4173 }, { "epoch": 1.3904389106354627, "grad_norm": 0.51953125, "learning_rate": 1.4537208649964661e-05, "loss": 4.1627, "step": 4174 }, { "epoch": 1.3907720496377114, "grad_norm": 0.51953125, "learning_rate": 1.4536928674939086e-05, "loss": 4.0983, "step": 4175 }, { "epoch": 1.3911051886399601, "grad_norm": 0.5078125, "learning_rate": 1.4536648617948533e-05, "loss": 4.0925, "step": 4176 }, { "epoch": 1.3914383276422086, "grad_norm": 0.50390625, "learning_rate": 1.4536368478996264e-05, "loss": 4.1183, "step": 4177 }, { "epoch": 1.3917714666444574, "grad_norm": 0.53125, "learning_rate": 1.4536088258085545e-05, "loss": 4.0656, "step": 4178 }, { "epoch": 1.3921046056467061, "grad_norm": 0.5078125, "learning_rate": 1.4535807955219637e-05, "loss": 4.1373, "step": 4179 }, { "epoch": 1.3924377446489546, "grad_norm": 0.51171875, "learning_rate": 1.4535527570401804e-05, "loss": 4.1391, "step": 4180 }, { "epoch": 1.3927708836512034, "grad_norm": 0.51171875, "learning_rate": 1.4535247103635316e-05, "loss": 4.1039, "step": 4181 }, { "epoch": 1.3931040226534521, "grad_norm": 0.515625, "learning_rate": 1.4534966554923436e-05, "loss": 4.1461, "step": 4182 }, { "epoch": 1.3934371616557009, "grad_norm": 0.53125, "learning_rate": 1.4534685924269435e-05, "loss": 4.084, "step": 4183 }, { "epoch": 1.3937703006579496, "grad_norm": 0.5078125, "learning_rate": 1.4534405211676579e-05, "loss": 4.0589, "step": 4184 }, { "epoch": 1.3941034396601983, "grad_norm": 0.52734375, "learning_rate": 1.4534124417148139e-05, "loss": 4.0231, "step": 4185 }, { "epoch": 1.3944365786624469, "grad_norm": 0.4921875, "learning_rate": 1.4533843540687386e-05, "loss": 4.1162, "step": 4186 }, { "epoch": 1.3947697176646956, "grad_norm": 0.50390625, "learning_rate": 1.4533562582297592e-05, "loss": 4.0063, "step": 4187 }, { "epoch": 1.3951028566669443, "grad_norm": 0.52734375, "learning_rate": 1.4533281541982027e-05, "loss": 4.064, "step": 4188 }, { "epoch": 1.3954359956691929, "grad_norm": 0.51953125, "learning_rate": 1.4533000419743966e-05, "loss": 4.0472, "step": 4189 }, { "epoch": 1.3957691346714416, "grad_norm": 0.5234375, "learning_rate": 1.4532719215586684e-05, "loss": 4.112, "step": 4190 }, { "epoch": 1.3961022736736903, "grad_norm": 0.54296875, "learning_rate": 1.4532437929513456e-05, "loss": 4.0758, "step": 4191 }, { "epoch": 1.396435412675939, "grad_norm": 0.51171875, "learning_rate": 1.4532156561527557e-05, "loss": 4.1319, "step": 4192 }, { "epoch": 1.3967685516781878, "grad_norm": 0.53125, "learning_rate": 1.4531875111632268e-05, "loss": 4.1138, "step": 4193 }, { "epoch": 1.3971016906804365, "grad_norm": 0.55078125, "learning_rate": 1.4531593579830864e-05, "loss": 4.0488, "step": 4194 }, { "epoch": 1.397434829682685, "grad_norm": 0.5234375, "learning_rate": 1.4531311966126626e-05, "loss": 4.0999, "step": 4195 }, { "epoch": 1.3977679686849338, "grad_norm": 0.50390625, "learning_rate": 1.4531030270522832e-05, "loss": 4.0642, "step": 4196 }, { "epoch": 1.3981011076871825, "grad_norm": 0.5078125, "learning_rate": 1.4530748493022763e-05, "loss": 4.1035, "step": 4197 }, { "epoch": 1.398434246689431, "grad_norm": 0.5078125, "learning_rate": 1.4530466633629706e-05, "loss": 4.0428, "step": 4198 }, { "epoch": 1.3987673856916798, "grad_norm": 0.51953125, "learning_rate": 1.453018469234694e-05, "loss": 4.0498, "step": 4199 }, { "epoch": 1.3991005246939285, "grad_norm": 0.5, "learning_rate": 1.4529902669177748e-05, "loss": 4.045, "step": 4200 }, { "epoch": 1.3994336636961773, "grad_norm": 0.52734375, "learning_rate": 1.4529620564125419e-05, "loss": 4.0903, "step": 4201 }, { "epoch": 1.399766802698426, "grad_norm": 0.5234375, "learning_rate": 1.4529338377193235e-05, "loss": 4.108, "step": 4202 }, { "epoch": 1.4000999417006745, "grad_norm": 0.5546875, "learning_rate": 1.4529056108384483e-05, "loss": 4.0923, "step": 4203 }, { "epoch": 1.4004330807029233, "grad_norm": 0.54296875, "learning_rate": 1.4528773757702456e-05, "loss": 4.015, "step": 4204 }, { "epoch": 1.400766219705172, "grad_norm": 0.5234375, "learning_rate": 1.4528491325150438e-05, "loss": 4.0733, "step": 4205 }, { "epoch": 1.4010993587074208, "grad_norm": 0.5234375, "learning_rate": 1.4528208810731718e-05, "loss": 4.0404, "step": 4206 }, { "epoch": 1.4014324977096693, "grad_norm": 0.54296875, "learning_rate": 1.4527926214449589e-05, "loss": 4.051, "step": 4207 }, { "epoch": 1.401765636711918, "grad_norm": 0.55859375, "learning_rate": 1.4527643536307343e-05, "loss": 4.0109, "step": 4208 }, { "epoch": 1.4020987757141667, "grad_norm": 0.51953125, "learning_rate": 1.4527360776308271e-05, "loss": 4.0263, "step": 4209 }, { "epoch": 1.4024319147164155, "grad_norm": 0.55078125, "learning_rate": 1.4527077934455666e-05, "loss": 4.1415, "step": 4210 }, { "epoch": 1.4027650537186642, "grad_norm": 0.5390625, "learning_rate": 1.4526795010752823e-05, "loss": 4.1109, "step": 4211 }, { "epoch": 1.4030981927209127, "grad_norm": 0.5234375, "learning_rate": 1.452651200520304e-05, "loss": 4.1111, "step": 4212 }, { "epoch": 1.4034313317231615, "grad_norm": 0.49609375, "learning_rate": 1.4526228917809612e-05, "loss": 4.0982, "step": 4213 }, { "epoch": 1.4037644707254102, "grad_norm": 0.5234375, "learning_rate": 1.4525945748575836e-05, "loss": 4.0804, "step": 4214 }, { "epoch": 1.4040976097276587, "grad_norm": 0.52734375, "learning_rate": 1.452566249750501e-05, "loss": 4.0675, "step": 4215 }, { "epoch": 1.4044307487299075, "grad_norm": 0.53125, "learning_rate": 1.4525379164600434e-05, "loss": 4.062, "step": 4216 }, { "epoch": 1.4047638877321562, "grad_norm": 0.546875, "learning_rate": 1.4525095749865406e-05, "loss": 4.0312, "step": 4217 }, { "epoch": 1.405097026734405, "grad_norm": 0.5390625, "learning_rate": 1.4524812253303229e-05, "loss": 4.0357, "step": 4218 }, { "epoch": 1.4054301657366537, "grad_norm": 0.51171875, "learning_rate": 1.4524528674917205e-05, "loss": 4.0982, "step": 4219 }, { "epoch": 1.4057633047389024, "grad_norm": 0.51953125, "learning_rate": 1.4524245014710637e-05, "loss": 4.1811, "step": 4220 }, { "epoch": 1.406096443741151, "grad_norm": 0.54296875, "learning_rate": 1.452396127268683e-05, "loss": 4.052, "step": 4221 }, { "epoch": 1.4064295827433997, "grad_norm": 0.55859375, "learning_rate": 1.4523677448849088e-05, "loss": 4.1184, "step": 4222 }, { "epoch": 1.4067627217456484, "grad_norm": 0.5, "learning_rate": 1.4523393543200718e-05, "loss": 4.1501, "step": 4223 }, { "epoch": 1.407095860747897, "grad_norm": 0.5078125, "learning_rate": 1.4523109555745023e-05, "loss": 4.0053, "step": 4224 }, { "epoch": 1.4074289997501457, "grad_norm": 0.51171875, "learning_rate": 1.4522825486485314e-05, "loss": 4.196, "step": 4225 }, { "epoch": 1.4077621387523944, "grad_norm": 0.51953125, "learning_rate": 1.4522541335424902e-05, "loss": 4.0504, "step": 4226 }, { "epoch": 1.4080952777546432, "grad_norm": 0.53125, "learning_rate": 1.4522257102567093e-05, "loss": 4.0822, "step": 4227 }, { "epoch": 1.408428416756892, "grad_norm": 0.53125, "learning_rate": 1.4521972787915197e-05, "loss": 4.0717, "step": 4228 }, { "epoch": 1.4087615557591406, "grad_norm": 0.5234375, "learning_rate": 1.4521688391472532e-05, "loss": 4.0811, "step": 4229 }, { "epoch": 1.4090946947613892, "grad_norm": 0.50390625, "learning_rate": 1.4521403913242402e-05, "loss": 4.0788, "step": 4230 }, { "epoch": 1.409427833763638, "grad_norm": 0.5234375, "learning_rate": 1.4521119353228127e-05, "loss": 4.0981, "step": 4231 }, { "epoch": 1.4097609727658866, "grad_norm": 0.5234375, "learning_rate": 1.452083471143302e-05, "loss": 4.0945, "step": 4232 }, { "epoch": 1.4100941117681351, "grad_norm": 0.498046875, "learning_rate": 1.4520549987860393e-05, "loss": 4.1236, "step": 4233 }, { "epoch": 1.410427250770384, "grad_norm": 0.53125, "learning_rate": 1.4520265182513567e-05, "loss": 4.0516, "step": 4234 }, { "epoch": 1.4107603897726326, "grad_norm": 0.50390625, "learning_rate": 1.4519980295395856e-05, "loss": 4.0831, "step": 4235 }, { "epoch": 1.4110935287748814, "grad_norm": 0.53515625, "learning_rate": 1.4519695326510583e-05, "loss": 4.0971, "step": 4236 }, { "epoch": 1.41142666777713, "grad_norm": 0.53125, "learning_rate": 1.4519410275861061e-05, "loss": 3.9959, "step": 4237 }, { "epoch": 1.4117598067793786, "grad_norm": 0.56640625, "learning_rate": 1.4519125143450613e-05, "loss": 4.104, "step": 4238 }, { "epoch": 1.4120929457816274, "grad_norm": 0.5390625, "learning_rate": 1.4518839929282563e-05, "loss": 4.087, "step": 4239 }, { "epoch": 1.412426084783876, "grad_norm": 0.52734375, "learning_rate": 1.4518554633360229e-05, "loss": 4.017, "step": 4240 }, { "epoch": 1.4127592237861248, "grad_norm": 0.55859375, "learning_rate": 1.4518269255686936e-05, "loss": 4.0979, "step": 4241 }, { "epoch": 1.4130923627883734, "grad_norm": 0.53515625, "learning_rate": 1.4517983796266008e-05, "loss": 4.0754, "step": 4242 }, { "epoch": 1.413425501790622, "grad_norm": 0.53515625, "learning_rate": 1.4517698255100768e-05, "loss": 4.0496, "step": 4243 }, { "epoch": 1.4137586407928708, "grad_norm": 0.5, "learning_rate": 1.4517412632194545e-05, "loss": 4.0915, "step": 4244 }, { "epoch": 1.4140917797951196, "grad_norm": 0.498046875, "learning_rate": 1.4517126927550664e-05, "loss": 4.1012, "step": 4245 }, { "epoch": 1.4144249187973683, "grad_norm": 0.5234375, "learning_rate": 1.4516841141172454e-05, "loss": 4.1273, "step": 4246 }, { "epoch": 1.4147580577996168, "grad_norm": 0.51953125, "learning_rate": 1.4516555273063244e-05, "loss": 3.9827, "step": 4247 }, { "epoch": 1.4150911968018656, "grad_norm": 0.498046875, "learning_rate": 1.451626932322636e-05, "loss": 4.0817, "step": 4248 }, { "epoch": 1.4154243358041143, "grad_norm": 0.5234375, "learning_rate": 1.4515983291665138e-05, "loss": 4.0916, "step": 4249 }, { "epoch": 1.4157574748063628, "grad_norm": 0.53515625, "learning_rate": 1.4515697178382908e-05, "loss": 4.1394, "step": 4250 }, { "epoch": 1.4160906138086116, "grad_norm": 0.5625, "learning_rate": 1.4515410983383e-05, "loss": 3.962, "step": 4251 }, { "epoch": 1.4164237528108603, "grad_norm": 0.51171875, "learning_rate": 1.4515124706668751e-05, "loss": 4.1304, "step": 4252 }, { "epoch": 1.416756891813109, "grad_norm": 0.53125, "learning_rate": 1.4514838348243492e-05, "loss": 4.05, "step": 4253 }, { "epoch": 1.4170900308153578, "grad_norm": 0.51953125, "learning_rate": 1.4514551908110563e-05, "loss": 4.0669, "step": 4254 }, { "epoch": 1.4174231698176065, "grad_norm": 0.546875, "learning_rate": 1.4514265386273294e-05, "loss": 4.0668, "step": 4255 }, { "epoch": 1.417756308819855, "grad_norm": 0.5, "learning_rate": 1.4513978782735029e-05, "loss": 4.0611, "step": 4256 }, { "epoch": 1.4180894478221038, "grad_norm": 0.55078125, "learning_rate": 1.4513692097499102e-05, "loss": 4.0144, "step": 4257 }, { "epoch": 1.4184225868243525, "grad_norm": 0.546875, "learning_rate": 1.4513405330568856e-05, "loss": 3.9996, "step": 4258 }, { "epoch": 1.418755725826601, "grad_norm": 0.515625, "learning_rate": 1.4513118481947626e-05, "loss": 4.1016, "step": 4259 }, { "epoch": 1.4190888648288498, "grad_norm": 0.546875, "learning_rate": 1.4512831551638758e-05, "loss": 4.0036, "step": 4260 }, { "epoch": 1.4194220038310985, "grad_norm": 0.53515625, "learning_rate": 1.4512544539645591e-05, "loss": 3.9981, "step": 4261 }, { "epoch": 1.4197551428333473, "grad_norm": 0.51953125, "learning_rate": 1.4512257445971468e-05, "loss": 4.0509, "step": 4262 }, { "epoch": 1.420088281835596, "grad_norm": 0.53515625, "learning_rate": 1.4511970270619736e-05, "loss": 4.1234, "step": 4263 }, { "epoch": 1.4204214208378447, "grad_norm": 0.51953125, "learning_rate": 1.4511683013593741e-05, "loss": 4.0986, "step": 4264 }, { "epoch": 1.4207545598400932, "grad_norm": 0.515625, "learning_rate": 1.4511395674896822e-05, "loss": 4.1266, "step": 4265 }, { "epoch": 1.421087698842342, "grad_norm": 0.54296875, "learning_rate": 1.4511108254532333e-05, "loss": 4.0903, "step": 4266 }, { "epoch": 1.4214208378445907, "grad_norm": 0.5234375, "learning_rate": 1.4510820752503616e-05, "loss": 4.0788, "step": 4267 }, { "epoch": 1.4217539768468392, "grad_norm": 0.53125, "learning_rate": 1.4510533168814025e-05, "loss": 4.0038, "step": 4268 }, { "epoch": 1.422087115849088, "grad_norm": 0.54296875, "learning_rate": 1.4510245503466907e-05, "loss": 3.9908, "step": 4269 }, { "epoch": 1.4224202548513367, "grad_norm": 0.51171875, "learning_rate": 1.450995775646561e-05, "loss": 4.0505, "step": 4270 }, { "epoch": 1.4227533938535855, "grad_norm": 0.53125, "learning_rate": 1.4509669927813491e-05, "loss": 4.0868, "step": 4271 }, { "epoch": 1.4230865328558342, "grad_norm": 0.515625, "learning_rate": 1.4509382017513901e-05, "loss": 4.1292, "step": 4272 }, { "epoch": 1.4234196718580827, "grad_norm": 0.51171875, "learning_rate": 1.4509094025570193e-05, "loss": 4.1389, "step": 4273 }, { "epoch": 1.4237528108603315, "grad_norm": 0.5078125, "learning_rate": 1.4508805951985721e-05, "loss": 4.067, "step": 4274 }, { "epoch": 1.4240859498625802, "grad_norm": 0.53125, "learning_rate": 1.450851779676384e-05, "loss": 4.1368, "step": 4275 }, { "epoch": 1.424419088864829, "grad_norm": 0.51953125, "learning_rate": 1.4508229559907908e-05, "loss": 4.0979, "step": 4276 }, { "epoch": 1.4247522278670774, "grad_norm": 0.53515625, "learning_rate": 1.4507941241421279e-05, "loss": 4.0363, "step": 4277 }, { "epoch": 1.4250853668693262, "grad_norm": 0.5078125, "learning_rate": 1.4507652841307315e-05, "loss": 4.0837, "step": 4278 }, { "epoch": 1.425418505871575, "grad_norm": 0.51171875, "learning_rate": 1.4507364359569375e-05, "loss": 4.0821, "step": 4279 }, { "epoch": 1.4257516448738237, "grad_norm": 0.53515625, "learning_rate": 1.4507075796210818e-05, "loss": 4.089, "step": 4280 }, { "epoch": 1.4260847838760724, "grad_norm": 0.53125, "learning_rate": 1.4506787151235004e-05, "loss": 4.0576, "step": 4281 }, { "epoch": 1.426417922878321, "grad_norm": 0.49609375, "learning_rate": 1.4506498424645298e-05, "loss": 4.0667, "step": 4282 }, { "epoch": 1.4267510618805697, "grad_norm": 0.5390625, "learning_rate": 1.450620961644506e-05, "loss": 4.1315, "step": 4283 }, { "epoch": 1.4270842008828184, "grad_norm": 0.55859375, "learning_rate": 1.4505920726637657e-05, "loss": 4.0684, "step": 4284 }, { "epoch": 1.427417339885067, "grad_norm": 0.5625, "learning_rate": 1.450563175522645e-05, "loss": 4.0362, "step": 4285 }, { "epoch": 1.4277504788873157, "grad_norm": 0.515625, "learning_rate": 1.4505342702214808e-05, "loss": 4.0873, "step": 4286 }, { "epoch": 1.4280836178895644, "grad_norm": 0.51953125, "learning_rate": 1.45050535676061e-05, "loss": 4.1167, "step": 4287 }, { "epoch": 1.4284167568918131, "grad_norm": 0.515625, "learning_rate": 1.4504764351403685e-05, "loss": 4.0851, "step": 4288 }, { "epoch": 1.4287498958940619, "grad_norm": 0.50390625, "learning_rate": 1.4504475053610943e-05, "loss": 4.0762, "step": 4289 }, { "epoch": 1.4290830348963106, "grad_norm": 0.5234375, "learning_rate": 1.4504185674231234e-05, "loss": 4.0054, "step": 4290 }, { "epoch": 1.4294161738985591, "grad_norm": 0.52734375, "learning_rate": 1.4503896213267937e-05, "loss": 4.1235, "step": 4291 }, { "epoch": 1.4297493129008079, "grad_norm": 0.51953125, "learning_rate": 1.450360667072442e-05, "loss": 4.0854, "step": 4292 }, { "epoch": 1.4300824519030566, "grad_norm": 0.5390625, "learning_rate": 1.4503317046604051e-05, "loss": 4.1227, "step": 4293 }, { "epoch": 1.4304155909053051, "grad_norm": 0.51953125, "learning_rate": 1.450302734091021e-05, "loss": 4.1423, "step": 4294 }, { "epoch": 1.4307487299075539, "grad_norm": 0.53125, "learning_rate": 1.4502737553646268e-05, "loss": 4.0147, "step": 4295 }, { "epoch": 1.4310818689098026, "grad_norm": 0.5390625, "learning_rate": 1.4502447684815601e-05, "loss": 4.1443, "step": 4296 }, { "epoch": 1.4314150079120513, "grad_norm": 0.52734375, "learning_rate": 1.4502157734421586e-05, "loss": 4.0848, "step": 4297 }, { "epoch": 1.4317481469143, "grad_norm": 0.5546875, "learning_rate": 1.45018677024676e-05, "loss": 4.0518, "step": 4298 }, { "epoch": 1.4320812859165488, "grad_norm": 0.51953125, "learning_rate": 1.4501577588957025e-05, "loss": 4.1129, "step": 4299 }, { "epoch": 1.4324144249187973, "grad_norm": 0.498046875, "learning_rate": 1.4501287393893232e-05, "loss": 4.159, "step": 4300 }, { "epoch": 1.432747563921046, "grad_norm": 0.53125, "learning_rate": 1.4500997117279607e-05, "loss": 3.9369, "step": 4301 }, { "epoch": 1.4330807029232948, "grad_norm": 0.52734375, "learning_rate": 1.450070675911953e-05, "loss": 4.0963, "step": 4302 }, { "epoch": 1.4334138419255433, "grad_norm": 0.5078125, "learning_rate": 1.4500416319416383e-05, "loss": 4.1638, "step": 4303 }, { "epoch": 1.433746980927792, "grad_norm": 0.546875, "learning_rate": 1.4500125798173546e-05, "loss": 4.1198, "step": 4304 }, { "epoch": 1.4340801199300408, "grad_norm": 0.53515625, "learning_rate": 1.449983519539441e-05, "loss": 4.0528, "step": 4305 }, { "epoch": 1.4344132589322895, "grad_norm": 0.546875, "learning_rate": 1.4499544511082353e-05, "loss": 3.9865, "step": 4306 }, { "epoch": 1.4347463979345383, "grad_norm": 0.5078125, "learning_rate": 1.4499253745240765e-05, "loss": 4.0507, "step": 4307 }, { "epoch": 1.4350795369367868, "grad_norm": 0.53125, "learning_rate": 1.4498962897873028e-05, "loss": 4.0712, "step": 4308 }, { "epoch": 1.4354126759390355, "grad_norm": 0.53125, "learning_rate": 1.4498671968982537e-05, "loss": 4.0848, "step": 4309 }, { "epoch": 1.4357458149412843, "grad_norm": 0.52734375, "learning_rate": 1.4498380958572674e-05, "loss": 4.0796, "step": 4310 }, { "epoch": 1.436078953943533, "grad_norm": 0.50390625, "learning_rate": 1.4498089866646831e-05, "loss": 4.0972, "step": 4311 }, { "epoch": 1.4364120929457815, "grad_norm": 0.52734375, "learning_rate": 1.44977986932084e-05, "loss": 4.0931, "step": 4312 }, { "epoch": 1.4367452319480303, "grad_norm": 0.5390625, "learning_rate": 1.4497507438260773e-05, "loss": 4.0521, "step": 4313 }, { "epoch": 1.437078370950279, "grad_norm": 0.52734375, "learning_rate": 1.4497216101807337e-05, "loss": 4.0208, "step": 4314 }, { "epoch": 1.4374115099525278, "grad_norm": 0.484375, "learning_rate": 1.4496924683851493e-05, "loss": 4.1055, "step": 4315 }, { "epoch": 1.4377446489547765, "grad_norm": 0.49609375, "learning_rate": 1.4496633184396631e-05, "loss": 4.0748, "step": 4316 }, { "epoch": 1.438077787957025, "grad_norm": 0.51953125, "learning_rate": 1.4496341603446146e-05, "loss": 4.0331, "step": 4317 }, { "epoch": 1.4384109269592738, "grad_norm": 0.53125, "learning_rate": 1.4496049941003435e-05, "loss": 4.0568, "step": 4318 }, { "epoch": 1.4387440659615225, "grad_norm": 0.53125, "learning_rate": 1.4495758197071898e-05, "loss": 4.0617, "step": 4319 }, { "epoch": 1.439077204963771, "grad_norm": 0.51953125, "learning_rate": 1.4495466371654929e-05, "loss": 4.0755, "step": 4320 }, { "epoch": 1.4394103439660197, "grad_norm": 0.515625, "learning_rate": 1.4495174464755928e-05, "loss": 4.0496, "step": 4321 }, { "epoch": 1.4397434829682685, "grad_norm": 0.53125, "learning_rate": 1.4494882476378299e-05, "loss": 4.1235, "step": 4322 }, { "epoch": 1.4400766219705172, "grad_norm": 0.53125, "learning_rate": 1.449459040652544e-05, "loss": 4.1288, "step": 4323 }, { "epoch": 1.440409760972766, "grad_norm": 0.53515625, "learning_rate": 1.449429825520075e-05, "loss": 4.0299, "step": 4324 }, { "epoch": 1.4407428999750147, "grad_norm": 0.50390625, "learning_rate": 1.4494006022407635e-05, "loss": 4.0487, "step": 4325 }, { "epoch": 1.4410760389772632, "grad_norm": 0.5234375, "learning_rate": 1.44937137081495e-05, "loss": 4.0904, "step": 4326 }, { "epoch": 1.441409177979512, "grad_norm": 0.54296875, "learning_rate": 1.449342131242975e-05, "loss": 4.0922, "step": 4327 }, { "epoch": 1.4417423169817607, "grad_norm": 0.5546875, "learning_rate": 1.4493128835251786e-05, "loss": 4.0948, "step": 4328 }, { "epoch": 1.4420754559840092, "grad_norm": 0.5390625, "learning_rate": 1.449283627661902e-05, "loss": 4.075, "step": 4329 }, { "epoch": 1.442408594986258, "grad_norm": 0.498046875, "learning_rate": 1.4492543636534858e-05, "loss": 4.0689, "step": 4330 }, { "epoch": 1.4427417339885067, "grad_norm": 0.52734375, "learning_rate": 1.449225091500271e-05, "loss": 4.1219, "step": 4331 }, { "epoch": 1.4430748729907554, "grad_norm": 0.51171875, "learning_rate": 1.4491958112025984e-05, "loss": 4.0769, "step": 4332 }, { "epoch": 1.4434080119930042, "grad_norm": 0.51171875, "learning_rate": 1.4491665227608088e-05, "loss": 4.0915, "step": 4333 }, { "epoch": 1.4437411509952527, "grad_norm": 0.51953125, "learning_rate": 1.4491372261752437e-05, "loss": 4.1082, "step": 4334 }, { "epoch": 1.4440742899975014, "grad_norm": 0.52734375, "learning_rate": 1.4491079214462442e-05, "loss": 4.0893, "step": 4335 }, { "epoch": 1.4444074289997502, "grad_norm": 0.52734375, "learning_rate": 1.4490786085741519e-05, "loss": 4.0977, "step": 4336 }, { "epoch": 1.444740568001999, "grad_norm": 0.51171875, "learning_rate": 1.4490492875593078e-05, "loss": 4.1042, "step": 4337 }, { "epoch": 1.4450737070042474, "grad_norm": 0.5546875, "learning_rate": 1.4490199584020536e-05, "loss": 4.0965, "step": 4338 }, { "epoch": 1.4454068460064962, "grad_norm": 0.52734375, "learning_rate": 1.4489906211027311e-05, "loss": 4.0553, "step": 4339 }, { "epoch": 1.445739985008745, "grad_norm": 0.50390625, "learning_rate": 1.448961275661682e-05, "loss": 4.111, "step": 4340 }, { "epoch": 1.4460731240109936, "grad_norm": 0.515625, "learning_rate": 1.4489319220792476e-05, "loss": 4.0578, "step": 4341 }, { "epoch": 1.4464062630132424, "grad_norm": 0.51171875, "learning_rate": 1.4489025603557705e-05, "loss": 4.054, "step": 4342 }, { "epoch": 1.446739402015491, "grad_norm": 0.53515625, "learning_rate": 1.4488731904915923e-05, "loss": 4.1152, "step": 4343 }, { "epoch": 1.4470725410177396, "grad_norm": 0.5234375, "learning_rate": 1.4488438124870553e-05, "loss": 4.0195, "step": 4344 }, { "epoch": 1.4474056800199884, "grad_norm": 0.51953125, "learning_rate": 1.4488144263425014e-05, "loss": 4.0453, "step": 4345 }, { "epoch": 1.4477388190222371, "grad_norm": 0.51953125, "learning_rate": 1.4487850320582732e-05, "loss": 4.1219, "step": 4346 }, { "epoch": 1.4480719580244856, "grad_norm": 0.52734375, "learning_rate": 1.4487556296347128e-05, "loss": 4.0757, "step": 4347 }, { "epoch": 1.4484050970267344, "grad_norm": 0.5234375, "learning_rate": 1.4487262190721628e-05, "loss": 4.0675, "step": 4348 }, { "epoch": 1.448738236028983, "grad_norm": 0.486328125, "learning_rate": 1.448696800370966e-05, "loss": 4.1337, "step": 4349 }, { "epoch": 1.4490713750312318, "grad_norm": 0.51953125, "learning_rate": 1.4486673735314647e-05, "loss": 4.1326, "step": 4350 }, { "epoch": 1.4494045140334806, "grad_norm": 0.5234375, "learning_rate": 1.4486379385540017e-05, "loss": 4.066, "step": 4351 }, { "epoch": 1.449737653035729, "grad_norm": 0.515625, "learning_rate": 1.4486084954389203e-05, "loss": 4.0466, "step": 4352 }, { "epoch": 1.4500707920379778, "grad_norm": 0.54296875, "learning_rate": 1.4485790441865629e-05, "loss": 3.9986, "step": 4353 }, { "epoch": 1.4504039310402266, "grad_norm": 0.515625, "learning_rate": 1.4485495847972727e-05, "loss": 4.1262, "step": 4354 }, { "epoch": 1.450737070042475, "grad_norm": 0.51953125, "learning_rate": 1.4485201172713929e-05, "loss": 4.1059, "step": 4355 }, { "epoch": 1.4510702090447238, "grad_norm": 0.53125, "learning_rate": 1.448490641609267e-05, "loss": 4.0592, "step": 4356 }, { "epoch": 1.4514033480469726, "grad_norm": 0.5234375, "learning_rate": 1.4484611578112377e-05, "loss": 4.0441, "step": 4357 }, { "epoch": 1.4517364870492213, "grad_norm": 0.50390625, "learning_rate": 1.448431665877649e-05, "loss": 4.0567, "step": 4358 }, { "epoch": 1.45206962605147, "grad_norm": 0.50390625, "learning_rate": 1.448402165808844e-05, "loss": 4.1431, "step": 4359 }, { "epoch": 1.4524027650537188, "grad_norm": 0.52734375, "learning_rate": 1.4483726576051667e-05, "loss": 4.0356, "step": 4360 }, { "epoch": 1.4527359040559673, "grad_norm": 0.494140625, "learning_rate": 1.4483431412669606e-05, "loss": 4.1699, "step": 4361 }, { "epoch": 1.453069043058216, "grad_norm": 0.5078125, "learning_rate": 1.4483136167945694e-05, "loss": 4.0751, "step": 4362 }, { "epoch": 1.4534021820604648, "grad_norm": 0.53125, "learning_rate": 1.4482840841883369e-05, "loss": 4.0316, "step": 4363 }, { "epoch": 1.4537353210627133, "grad_norm": 0.51171875, "learning_rate": 1.4482545434486078e-05, "loss": 4.0628, "step": 4364 }, { "epoch": 1.454068460064962, "grad_norm": 0.515625, "learning_rate": 1.4482249945757252e-05, "loss": 3.9932, "step": 4365 }, { "epoch": 1.4544015990672108, "grad_norm": 0.51953125, "learning_rate": 1.4481954375700341e-05, "loss": 4.0224, "step": 4366 }, { "epoch": 1.4547347380694595, "grad_norm": 0.50390625, "learning_rate": 1.4481658724318782e-05, "loss": 4.1069, "step": 4367 }, { "epoch": 1.4550678770717083, "grad_norm": 0.5546875, "learning_rate": 1.4481362991616023e-05, "loss": 4.0675, "step": 4368 }, { "epoch": 1.4554010160739568, "grad_norm": 0.51953125, "learning_rate": 1.4481067177595506e-05, "loss": 4.015, "step": 4369 }, { "epoch": 1.4557341550762055, "grad_norm": 0.515625, "learning_rate": 1.448077128226068e-05, "loss": 4.0883, "step": 4370 }, { "epoch": 1.4560672940784543, "grad_norm": 0.5078125, "learning_rate": 1.4480475305614984e-05, "loss": 4.0937, "step": 4371 }, { "epoch": 1.456400433080703, "grad_norm": 0.53515625, "learning_rate": 1.4480179247661875e-05, "loss": 3.99, "step": 4372 }, { "epoch": 1.4567335720829515, "grad_norm": 0.5390625, "learning_rate": 1.4479883108404795e-05, "loss": 4.0418, "step": 4373 }, { "epoch": 1.4570667110852003, "grad_norm": 0.51953125, "learning_rate": 1.4479586887847194e-05, "loss": 4.0635, "step": 4374 }, { "epoch": 1.457399850087449, "grad_norm": 0.5, "learning_rate": 1.4479290585992522e-05, "loss": 4.085, "step": 4375 }, { "epoch": 1.4577329890896977, "grad_norm": 0.5078125, "learning_rate": 1.4478994202844235e-05, "loss": 4.0927, "step": 4376 }, { "epoch": 1.4580661280919465, "grad_norm": 0.5234375, "learning_rate": 1.4478697738405782e-05, "loss": 4.0424, "step": 4377 }, { "epoch": 1.458399267094195, "grad_norm": 0.52734375, "learning_rate": 1.4478401192680615e-05, "loss": 4.0476, "step": 4378 }, { "epoch": 1.4587324060964437, "grad_norm": 0.53515625, "learning_rate": 1.447810456567219e-05, "loss": 4.0916, "step": 4379 }, { "epoch": 1.4590655450986925, "grad_norm": 0.51171875, "learning_rate": 1.447780785738396e-05, "loss": 4.0351, "step": 4380 }, { "epoch": 1.459398684100941, "grad_norm": 0.53515625, "learning_rate": 1.4477511067819385e-05, "loss": 4.0761, "step": 4381 }, { "epoch": 1.4597318231031897, "grad_norm": 0.52734375, "learning_rate": 1.4477214196981916e-05, "loss": 4.0602, "step": 4382 }, { "epoch": 1.4600649621054385, "grad_norm": 0.5234375, "learning_rate": 1.4476917244875018e-05, "loss": 4.1068, "step": 4383 }, { "epoch": 1.4603981011076872, "grad_norm": 0.5546875, "learning_rate": 1.4476620211502144e-05, "loss": 4.0427, "step": 4384 }, { "epoch": 1.460731240109936, "grad_norm": 0.52734375, "learning_rate": 1.4476323096866756e-05, "loss": 4.0758, "step": 4385 }, { "epoch": 1.4610643791121847, "grad_norm": 0.52734375, "learning_rate": 1.4476025900972314e-05, "loss": 4.0932, "step": 4386 }, { "epoch": 1.4613975181144332, "grad_norm": 0.54296875, "learning_rate": 1.447572862382228e-05, "loss": 4.0653, "step": 4387 }, { "epoch": 1.461730657116682, "grad_norm": 0.5390625, "learning_rate": 1.4475431265420117e-05, "loss": 4.0603, "step": 4388 }, { "epoch": 1.4620637961189307, "grad_norm": 0.53515625, "learning_rate": 1.4475133825769291e-05, "loss": 4.1213, "step": 4389 }, { "epoch": 1.4623969351211792, "grad_norm": 0.53515625, "learning_rate": 1.447483630487326e-05, "loss": 4.0518, "step": 4390 }, { "epoch": 1.462730074123428, "grad_norm": 0.546875, "learning_rate": 1.4474538702735498e-05, "loss": 4.0117, "step": 4391 }, { "epoch": 1.4630632131256767, "grad_norm": 0.51171875, "learning_rate": 1.4474241019359463e-05, "loss": 4.0482, "step": 4392 }, { "epoch": 1.4633963521279254, "grad_norm": 0.51953125, "learning_rate": 1.4473943254748628e-05, "loss": 4.0388, "step": 4393 }, { "epoch": 1.4637294911301741, "grad_norm": 0.5546875, "learning_rate": 1.4473645408906461e-05, "loss": 4.0319, "step": 4394 }, { "epoch": 1.4640626301324229, "grad_norm": 0.53515625, "learning_rate": 1.4473347481836428e-05, "loss": 4.0603, "step": 4395 }, { "epoch": 1.4643957691346714, "grad_norm": 0.51171875, "learning_rate": 1.4473049473542002e-05, "loss": 4.0681, "step": 4396 }, { "epoch": 1.4647289081369201, "grad_norm": 0.52734375, "learning_rate": 1.4472751384026653e-05, "loss": 4.0504, "step": 4397 }, { "epoch": 1.4650620471391689, "grad_norm": 0.52734375, "learning_rate": 1.4472453213293852e-05, "loss": 4.1157, "step": 4398 }, { "epoch": 1.4653951861414174, "grad_norm": 0.5078125, "learning_rate": 1.4472154961347077e-05, "loss": 4.0757, "step": 4399 }, { "epoch": 1.4657283251436661, "grad_norm": 0.51171875, "learning_rate": 1.4471856628189794e-05, "loss": 4.0414, "step": 4400 }, { "epoch": 1.4660614641459149, "grad_norm": 0.515625, "learning_rate": 1.4471558213825486e-05, "loss": 3.9802, "step": 4401 }, { "epoch": 1.4663946031481636, "grad_norm": 0.51953125, "learning_rate": 1.4471259718257623e-05, "loss": 4.0822, "step": 4402 }, { "epoch": 1.4667277421504124, "grad_norm": 0.55859375, "learning_rate": 1.4470961141489682e-05, "loss": 4.0992, "step": 4403 }, { "epoch": 1.4670608811526609, "grad_norm": 0.51171875, "learning_rate": 1.4470662483525147e-05, "loss": 4.0903, "step": 4404 }, { "epoch": 1.4673940201549096, "grad_norm": 0.51953125, "learning_rate": 1.447036374436749e-05, "loss": 4.0576, "step": 4405 }, { "epoch": 1.4677271591571583, "grad_norm": 0.52734375, "learning_rate": 1.4470064924020196e-05, "loss": 4.0734, "step": 4406 }, { "epoch": 1.468060298159407, "grad_norm": 0.546875, "learning_rate": 1.4469766022486738e-05, "loss": 4.0639, "step": 4407 }, { "epoch": 1.4683934371616556, "grad_norm": 0.53125, "learning_rate": 1.4469467039770607e-05, "loss": 4.0272, "step": 4408 }, { "epoch": 1.4687265761639043, "grad_norm": 0.515625, "learning_rate": 1.4469167975875278e-05, "loss": 4.0867, "step": 4409 }, { "epoch": 1.469059715166153, "grad_norm": 0.53515625, "learning_rate": 1.4468868830804237e-05, "loss": 4.0457, "step": 4410 }, { "epoch": 1.4693928541684018, "grad_norm": 0.51953125, "learning_rate": 1.446856960456097e-05, "loss": 4.1299, "step": 4411 }, { "epoch": 1.4697259931706506, "grad_norm": 0.515625, "learning_rate": 1.4468270297148961e-05, "loss": 4.0895, "step": 4412 }, { "epoch": 1.470059132172899, "grad_norm": 0.546875, "learning_rate": 1.4467970908571696e-05, "loss": 4.0375, "step": 4413 }, { "epoch": 1.4703922711751478, "grad_norm": 0.56640625, "learning_rate": 1.4467671438832662e-05, "loss": 4.0036, "step": 4414 }, { "epoch": 1.4707254101773966, "grad_norm": 0.5625, "learning_rate": 1.4467371887935349e-05, "loss": 4.0376, "step": 4415 }, { "epoch": 1.471058549179645, "grad_norm": 0.494140625, "learning_rate": 1.4467072255883243e-05, "loss": 4.0587, "step": 4416 }, { "epoch": 1.4713916881818938, "grad_norm": 0.51953125, "learning_rate": 1.4466772542679837e-05, "loss": 4.0884, "step": 4417 }, { "epoch": 1.4717248271841425, "grad_norm": 0.53515625, "learning_rate": 1.4466472748328621e-05, "loss": 4.0582, "step": 4418 }, { "epoch": 1.4720579661863913, "grad_norm": 0.5234375, "learning_rate": 1.4466172872833089e-05, "loss": 4.1196, "step": 4419 }, { "epoch": 1.47239110518864, "grad_norm": 0.53515625, "learning_rate": 1.4465872916196728e-05, "loss": 4.0149, "step": 4420 }, { "epoch": 1.4727242441908888, "grad_norm": 0.546875, "learning_rate": 1.4465572878423035e-05, "loss": 4.0744, "step": 4421 }, { "epoch": 1.4730573831931373, "grad_norm": 0.52734375, "learning_rate": 1.4465272759515508e-05, "loss": 4.0029, "step": 4422 }, { "epoch": 1.473390522195386, "grad_norm": 0.51953125, "learning_rate": 1.446497255947764e-05, "loss": 4.0122, "step": 4423 }, { "epoch": 1.4737236611976348, "grad_norm": 0.53515625, "learning_rate": 1.4464672278312928e-05, "loss": 4.124, "step": 4424 }, { "epoch": 1.4740568001998833, "grad_norm": 0.51953125, "learning_rate": 1.4464371916024867e-05, "loss": 3.9881, "step": 4425 }, { "epoch": 1.474389939202132, "grad_norm": 0.53515625, "learning_rate": 1.4464071472616961e-05, "loss": 4.0688, "step": 4426 }, { "epoch": 1.4747230782043808, "grad_norm": 0.5546875, "learning_rate": 1.4463770948092706e-05, "loss": 4.097, "step": 4427 }, { "epoch": 1.4750562172066295, "grad_norm": 0.5, "learning_rate": 1.44634703424556e-05, "loss": 4.0863, "step": 4428 }, { "epoch": 1.4753893562088782, "grad_norm": 0.52734375, "learning_rate": 1.446316965570915e-05, "loss": 4.0271, "step": 4429 }, { "epoch": 1.475722495211127, "grad_norm": 0.515625, "learning_rate": 1.4462868887856853e-05, "loss": 4.1002, "step": 4430 }, { "epoch": 1.4760556342133755, "grad_norm": 0.51953125, "learning_rate": 1.446256803890222e-05, "loss": 4.1715, "step": 4431 }, { "epoch": 1.4763887732156242, "grad_norm": 0.5078125, "learning_rate": 1.4462267108848745e-05, "loss": 4.155, "step": 4432 }, { "epoch": 1.476721912217873, "grad_norm": 0.515625, "learning_rate": 1.4461966097699943e-05, "loss": 4.079, "step": 4433 }, { "epoch": 1.4770550512201215, "grad_norm": 0.5, "learning_rate": 1.4461665005459312e-05, "loss": 4.177, "step": 4434 }, { "epoch": 1.4773881902223702, "grad_norm": 0.5, "learning_rate": 1.4461363832130365e-05, "loss": 4.1554, "step": 4435 }, { "epoch": 1.477721329224619, "grad_norm": 0.53515625, "learning_rate": 1.4461062577716605e-05, "loss": 4.0801, "step": 4436 }, { "epoch": 1.4780544682268677, "grad_norm": 0.52734375, "learning_rate": 1.4460761242221547e-05, "loss": 4.0786, "step": 4437 }, { "epoch": 1.4783876072291164, "grad_norm": 0.51171875, "learning_rate": 1.4460459825648694e-05, "loss": 4.0634, "step": 4438 }, { "epoch": 1.478720746231365, "grad_norm": 0.49609375, "learning_rate": 1.4460158328001563e-05, "loss": 4.0931, "step": 4439 }, { "epoch": 1.4790538852336137, "grad_norm": 0.53515625, "learning_rate": 1.4459856749283663e-05, "loss": 4.1005, "step": 4440 }, { "epoch": 1.4793870242358624, "grad_norm": 0.51953125, "learning_rate": 1.4459555089498506e-05, "loss": 4.0904, "step": 4441 }, { "epoch": 1.4797201632381112, "grad_norm": 0.53125, "learning_rate": 1.4459253348649605e-05, "loss": 4.1075, "step": 4442 }, { "epoch": 1.4800533022403597, "grad_norm": 0.490234375, "learning_rate": 1.445895152674048e-05, "loss": 4.0959, "step": 4443 }, { "epoch": 1.4803864412426084, "grad_norm": 0.5390625, "learning_rate": 1.445864962377464e-05, "loss": 4.0599, "step": 4444 }, { "epoch": 1.4807195802448572, "grad_norm": 0.5078125, "learning_rate": 1.4458347639755607e-05, "loss": 4.0873, "step": 4445 }, { "epoch": 1.481052719247106, "grad_norm": 0.50390625, "learning_rate": 1.4458045574686892e-05, "loss": 4.0638, "step": 4446 }, { "epoch": 1.4813858582493546, "grad_norm": 0.54296875, "learning_rate": 1.4457743428572018e-05, "loss": 4.0798, "step": 4447 }, { "epoch": 1.4817189972516032, "grad_norm": 0.5078125, "learning_rate": 1.4457441201414505e-05, "loss": 4.0699, "step": 4448 }, { "epoch": 1.482052136253852, "grad_norm": 0.50390625, "learning_rate": 1.445713889321787e-05, "loss": 4.1263, "step": 4449 }, { "epoch": 1.4823852752561006, "grad_norm": 0.51171875, "learning_rate": 1.4456836503985637e-05, "loss": 4.0822, "step": 4450 }, { "epoch": 1.4827184142583492, "grad_norm": 0.54296875, "learning_rate": 1.4456534033721328e-05, "loss": 4.0183, "step": 4451 }, { "epoch": 1.483051553260598, "grad_norm": 0.5234375, "learning_rate": 1.4456231482428466e-05, "loss": 4.1444, "step": 4452 }, { "epoch": 1.4833846922628466, "grad_norm": 0.5234375, "learning_rate": 1.4455928850110574e-05, "loss": 3.9914, "step": 4453 }, { "epoch": 1.4837178312650954, "grad_norm": 0.51953125, "learning_rate": 1.4455626136771174e-05, "loss": 4.0297, "step": 4454 }, { "epoch": 1.4840509702673441, "grad_norm": 0.51171875, "learning_rate": 1.4455323342413799e-05, "loss": 4.0984, "step": 4455 }, { "epoch": 1.4843841092695929, "grad_norm": 0.5078125, "learning_rate": 1.4455020467041973e-05, "loss": 4.0207, "step": 4456 }, { "epoch": 1.4847172482718414, "grad_norm": 0.5234375, "learning_rate": 1.445471751065922e-05, "loss": 4.0678, "step": 4457 }, { "epoch": 1.4850503872740901, "grad_norm": 0.53515625, "learning_rate": 1.4454414473269073e-05, "loss": 4.034, "step": 4458 }, { "epoch": 1.4853835262763389, "grad_norm": 0.53125, "learning_rate": 1.4454111354875061e-05, "loss": 4.0206, "step": 4459 }, { "epoch": 1.4857166652785874, "grad_norm": 0.546875, "learning_rate": 1.4453808155480713e-05, "loss": 4.07, "step": 4460 }, { "epoch": 1.486049804280836, "grad_norm": 0.51953125, "learning_rate": 1.4453504875089564e-05, "loss": 4.0734, "step": 4461 }, { "epoch": 1.4863829432830848, "grad_norm": 0.55859375, "learning_rate": 1.4453201513705144e-05, "loss": 4.1058, "step": 4462 }, { "epoch": 1.4867160822853336, "grad_norm": 0.53125, "learning_rate": 1.4452898071330985e-05, "loss": 4.091, "step": 4463 }, { "epoch": 1.4870492212875823, "grad_norm": 0.5234375, "learning_rate": 1.4452594547970626e-05, "loss": 4.0607, "step": 4464 }, { "epoch": 1.487382360289831, "grad_norm": 0.515625, "learning_rate": 1.4452290943627598e-05, "loss": 4.1111, "step": 4465 }, { "epoch": 1.4877154992920796, "grad_norm": 0.51953125, "learning_rate": 1.4451987258305441e-05, "loss": 4.1316, "step": 4466 }, { "epoch": 1.4880486382943283, "grad_norm": 0.5234375, "learning_rate": 1.445168349200769e-05, "loss": 4.079, "step": 4467 }, { "epoch": 1.488381777296577, "grad_norm": 0.51171875, "learning_rate": 1.4451379644737882e-05, "loss": 4.0772, "step": 4468 }, { "epoch": 1.4887149162988256, "grad_norm": 0.609375, "learning_rate": 1.4451075716499559e-05, "loss": 4.0823, "step": 4469 }, { "epoch": 1.4890480553010743, "grad_norm": 0.50390625, "learning_rate": 1.445077170729626e-05, "loss": 4.0446, "step": 4470 }, { "epoch": 1.489381194303323, "grad_norm": 0.51953125, "learning_rate": 1.4450467617131525e-05, "loss": 4.0238, "step": 4471 }, { "epoch": 1.4897143333055718, "grad_norm": 0.546875, "learning_rate": 1.4450163446008898e-05, "loss": 4.0377, "step": 4472 }, { "epoch": 1.4900474723078205, "grad_norm": 0.50390625, "learning_rate": 1.444985919393192e-05, "loss": 4.1024, "step": 4473 }, { "epoch": 1.490380611310069, "grad_norm": 0.52734375, "learning_rate": 1.4449554860904137e-05, "loss": 4.1212, "step": 4474 }, { "epoch": 1.4907137503123178, "grad_norm": 0.51953125, "learning_rate": 1.4449250446929092e-05, "loss": 4.0857, "step": 4475 }, { "epoch": 1.4910468893145665, "grad_norm": 0.51953125, "learning_rate": 1.4448945952010331e-05, "loss": 4.0772, "step": 4476 }, { "epoch": 1.4913800283168153, "grad_norm": 0.51171875, "learning_rate": 1.44486413761514e-05, "loss": 4.1227, "step": 4477 }, { "epoch": 1.4917131673190638, "grad_norm": 0.5390625, "learning_rate": 1.444833671935585e-05, "loss": 4.041, "step": 4478 }, { "epoch": 1.4920463063213125, "grad_norm": 0.546875, "learning_rate": 1.4448031981627226e-05, "loss": 4.0678, "step": 4479 }, { "epoch": 1.4923794453235613, "grad_norm": 0.5234375, "learning_rate": 1.444772716296908e-05, "loss": 4.1197, "step": 4480 }, { "epoch": 1.49271258432581, "grad_norm": 0.54296875, "learning_rate": 1.444742226338496e-05, "loss": 4.0228, "step": 4481 }, { "epoch": 1.4930457233280587, "grad_norm": 0.52734375, "learning_rate": 1.4447117282878419e-05, "loss": 4.0454, "step": 4482 }, { "epoch": 1.4933788623303073, "grad_norm": 0.515625, "learning_rate": 1.4446812221453008e-05, "loss": 4.0901, "step": 4483 }, { "epoch": 1.493712001332556, "grad_norm": 0.53125, "learning_rate": 1.4446507079112283e-05, "loss": 4.0447, "step": 4484 }, { "epoch": 1.4940451403348047, "grad_norm": 0.5234375, "learning_rate": 1.4446201855859795e-05, "loss": 4.1213, "step": 4485 }, { "epoch": 1.4943782793370533, "grad_norm": 0.54296875, "learning_rate": 1.4445896551699102e-05, "loss": 4.048, "step": 4486 }, { "epoch": 1.494711418339302, "grad_norm": 0.52734375, "learning_rate": 1.444559116663376e-05, "loss": 4.0352, "step": 4487 }, { "epoch": 1.4950445573415507, "grad_norm": 0.51171875, "learning_rate": 1.4445285700667324e-05, "loss": 4.0632, "step": 4488 }, { "epoch": 1.4953776963437995, "grad_norm": 0.50390625, "learning_rate": 1.4444980153803352e-05, "loss": 4.0841, "step": 4489 }, { "epoch": 1.4957108353460482, "grad_norm": 0.515625, "learning_rate": 1.4444674526045407e-05, "loss": 4.0829, "step": 4490 }, { "epoch": 1.496043974348297, "grad_norm": 0.546875, "learning_rate": 1.4444368817397042e-05, "loss": 3.9778, "step": 4491 }, { "epoch": 1.4963771133505455, "grad_norm": 0.51171875, "learning_rate": 1.4444063027861824e-05, "loss": 4.082, "step": 4492 }, { "epoch": 1.4967102523527942, "grad_norm": 0.53515625, "learning_rate": 1.4443757157443312e-05, "loss": 4.1064, "step": 4493 }, { "epoch": 1.497043391355043, "grad_norm": 0.52734375, "learning_rate": 1.444345120614507e-05, "loss": 3.9823, "step": 4494 }, { "epoch": 1.4973765303572915, "grad_norm": 0.53515625, "learning_rate": 1.444314517397066e-05, "loss": 4.1052, "step": 4495 }, { "epoch": 1.4977096693595402, "grad_norm": 0.51171875, "learning_rate": 1.444283906092365e-05, "loss": 4.0853, "step": 4496 }, { "epoch": 1.498042808361789, "grad_norm": 0.5703125, "learning_rate": 1.44425328670076e-05, "loss": 4.1142, "step": 4497 }, { "epoch": 1.4983759473640377, "grad_norm": 0.51171875, "learning_rate": 1.444222659222608e-05, "loss": 4.1205, "step": 4498 }, { "epoch": 1.4987090863662864, "grad_norm": 0.5390625, "learning_rate": 1.4441920236582658e-05, "loss": 4.0545, "step": 4499 }, { "epoch": 1.4990422253685352, "grad_norm": 0.50390625, "learning_rate": 1.4441613800080901e-05, "loss": 4.1142, "step": 4500 }, { "epoch": 1.4993753643707837, "grad_norm": 0.52734375, "learning_rate": 1.444130728272438e-05, "loss": 4.0077, "step": 4501 }, { "epoch": 1.4997085033730324, "grad_norm": 0.50390625, "learning_rate": 1.4441000684516662e-05, "loss": 4.1479, "step": 4502 }, { "epoch": 1.500041642375281, "grad_norm": 0.54296875, "learning_rate": 1.4440694005461322e-05, "loss": 4.0246, "step": 4503 }, { "epoch": 1.5003747813775297, "grad_norm": 0.52734375, "learning_rate": 1.4440387245561928e-05, "loss": 4.0734, "step": 4504 }, { "epoch": 1.5007079203797784, "grad_norm": 0.515625, "learning_rate": 1.4440080404822058e-05, "loss": 4.0423, "step": 4505 }, { "epoch": 1.5010410593820271, "grad_norm": 0.53125, "learning_rate": 1.4439773483245283e-05, "loss": 4.0678, "step": 4506 }, { "epoch": 1.5013741983842759, "grad_norm": 0.5078125, "learning_rate": 1.4439466480835176e-05, "loss": 4.0682, "step": 4507 }, { "epoch": 1.5017073373865246, "grad_norm": 0.5078125, "learning_rate": 1.4439159397595319e-05, "loss": 3.9871, "step": 4508 }, { "epoch": 1.5020404763887734, "grad_norm": 0.5234375, "learning_rate": 1.4438852233529283e-05, "loss": 4.0627, "step": 4509 }, { "epoch": 1.5023736153910219, "grad_norm": 0.546875, "learning_rate": 1.4438544988640648e-05, "loss": 3.9827, "step": 4510 }, { "epoch": 1.5027067543932706, "grad_norm": 0.55078125, "learning_rate": 1.4438237662932992e-05, "loss": 4.0926, "step": 4511 }, { "epoch": 1.5030398933955191, "grad_norm": 0.5234375, "learning_rate": 1.4437930256409897e-05, "loss": 4.1083, "step": 4512 }, { "epoch": 1.5033730323977679, "grad_norm": 0.5390625, "learning_rate": 1.4437622769074942e-05, "loss": 4.0528, "step": 4513 }, { "epoch": 1.5037061714000166, "grad_norm": 0.53515625, "learning_rate": 1.4437315200931708e-05, "loss": 4.0478, "step": 4514 }, { "epoch": 1.5040393104022654, "grad_norm": 0.51171875, "learning_rate": 1.4437007551983779e-05, "loss": 4.032, "step": 4515 }, { "epoch": 1.504372449404514, "grad_norm": 0.56640625, "learning_rate": 1.4436699822234737e-05, "loss": 3.9624, "step": 4516 }, { "epoch": 1.5047055884067628, "grad_norm": 0.5234375, "learning_rate": 1.4436392011688166e-05, "loss": 4.1143, "step": 4517 }, { "epoch": 1.5050387274090116, "grad_norm": 0.51171875, "learning_rate": 1.4436084120347654e-05, "loss": 4.1408, "step": 4518 }, { "epoch": 1.50537186641126, "grad_norm": 0.5234375, "learning_rate": 1.4435776148216784e-05, "loss": 4.1337, "step": 4519 }, { "epoch": 1.5057050054135088, "grad_norm": 0.53515625, "learning_rate": 1.4435468095299145e-05, "loss": 4.0173, "step": 4520 }, { "epoch": 1.5060381444157573, "grad_norm": 0.49609375, "learning_rate": 1.4435159961598327e-05, "loss": 4.105, "step": 4521 }, { "epoch": 1.506371283418006, "grad_norm": 0.51953125, "learning_rate": 1.4434851747117916e-05, "loss": 4.1064, "step": 4522 }, { "epoch": 1.5067044224202548, "grad_norm": 0.490234375, "learning_rate": 1.4434543451861501e-05, "loss": 4.1058, "step": 4523 }, { "epoch": 1.5070375614225036, "grad_norm": 0.51171875, "learning_rate": 1.4434235075832675e-05, "loss": 4.1619, "step": 4524 }, { "epoch": 1.5073707004247523, "grad_norm": 0.53515625, "learning_rate": 1.443392661903503e-05, "loss": 4.0392, "step": 4525 }, { "epoch": 1.507703839427001, "grad_norm": 0.51953125, "learning_rate": 1.4433618081472162e-05, "loss": 4.0742, "step": 4526 }, { "epoch": 1.5080369784292496, "grad_norm": 0.5078125, "learning_rate": 1.443330946314766e-05, "loss": 4.1086, "step": 4527 }, { "epoch": 1.5083701174314983, "grad_norm": 0.52734375, "learning_rate": 1.4433000764065119e-05, "loss": 4.1183, "step": 4528 }, { "epoch": 1.508703256433747, "grad_norm": 0.5390625, "learning_rate": 1.4432691984228136e-05, "loss": 4.0568, "step": 4529 }, { "epoch": 1.5090363954359955, "grad_norm": 0.52734375, "learning_rate": 1.443238312364031e-05, "loss": 4.1031, "step": 4530 }, { "epoch": 1.5093695344382443, "grad_norm": 0.51171875, "learning_rate": 1.4432074182305233e-05, "loss": 4.1355, "step": 4531 }, { "epoch": 1.509702673440493, "grad_norm": 0.54296875, "learning_rate": 1.443176516022651e-05, "loss": 4.0862, "step": 4532 }, { "epoch": 1.5100358124427418, "grad_norm": 0.5, "learning_rate": 1.4431456057407733e-05, "loss": 4.0525, "step": 4533 }, { "epoch": 1.5103689514449905, "grad_norm": 0.51953125, "learning_rate": 1.4431146873852508e-05, "loss": 4.1136, "step": 4534 }, { "epoch": 1.5107020904472392, "grad_norm": 0.53515625, "learning_rate": 1.4430837609564435e-05, "loss": 4.0875, "step": 4535 }, { "epoch": 1.5110352294494878, "grad_norm": 0.5078125, "learning_rate": 1.4430528264547116e-05, "loss": 4.0329, "step": 4536 }, { "epoch": 1.5113683684517365, "grad_norm": 0.515625, "learning_rate": 1.4430218838804155e-05, "loss": 4.0974, "step": 4537 }, { "epoch": 1.511701507453985, "grad_norm": 0.53125, "learning_rate": 1.4429909332339154e-05, "loss": 4.0816, "step": 4538 }, { "epoch": 1.5120346464562338, "grad_norm": 0.51171875, "learning_rate": 1.4429599745155721e-05, "loss": 4.0311, "step": 4539 }, { "epoch": 1.5123677854584825, "grad_norm": 0.5234375, "learning_rate": 1.442929007725746e-05, "loss": 4.0334, "step": 4540 }, { "epoch": 1.5127009244607312, "grad_norm": 0.5, "learning_rate": 1.442898032864798e-05, "loss": 4.1174, "step": 4541 }, { "epoch": 1.51303406346298, "grad_norm": 0.5, "learning_rate": 1.4428670499330884e-05, "loss": 4.108, "step": 4542 }, { "epoch": 1.5133672024652287, "grad_norm": 0.5390625, "learning_rate": 1.4428360589309784e-05, "loss": 4.0509, "step": 4543 }, { "epoch": 1.5137003414674775, "grad_norm": 0.53125, "learning_rate": 1.4428050598588292e-05, "loss": 4.0388, "step": 4544 }, { "epoch": 1.514033480469726, "grad_norm": 0.51953125, "learning_rate": 1.4427740527170018e-05, "loss": 4.0207, "step": 4545 }, { "epoch": 1.5143666194719747, "grad_norm": 0.54296875, "learning_rate": 1.442743037505857e-05, "loss": 4.1134, "step": 4546 }, { "epoch": 1.5146997584742232, "grad_norm": 0.51953125, "learning_rate": 1.4427120142257563e-05, "loss": 4.0365, "step": 4547 }, { "epoch": 1.515032897476472, "grad_norm": 0.53515625, "learning_rate": 1.4426809828770612e-05, "loss": 4.0635, "step": 4548 }, { "epoch": 1.5153660364787207, "grad_norm": 0.515625, "learning_rate": 1.4426499434601327e-05, "loss": 4.1075, "step": 4549 }, { "epoch": 1.5156991754809694, "grad_norm": 0.515625, "learning_rate": 1.4426188959753329e-05, "loss": 4.1049, "step": 4550 }, { "epoch": 1.5160323144832182, "grad_norm": 0.5234375, "learning_rate": 1.442587840423023e-05, "loss": 4.0784, "step": 4551 }, { "epoch": 1.516365453485467, "grad_norm": 0.5234375, "learning_rate": 1.442556776803565e-05, "loss": 4.0664, "step": 4552 }, { "epoch": 1.5166985924877157, "grad_norm": 0.55078125, "learning_rate": 1.4425257051173207e-05, "loss": 3.9766, "step": 4553 }, { "epoch": 1.5170317314899642, "grad_norm": 0.5390625, "learning_rate": 1.4424946253646519e-05, "loss": 4.086, "step": 4554 }, { "epoch": 1.517364870492213, "grad_norm": 0.546875, "learning_rate": 1.4424635375459207e-05, "loss": 4.1066, "step": 4555 }, { "epoch": 1.5176980094944614, "grad_norm": 0.5390625, "learning_rate": 1.4424324416614889e-05, "loss": 3.9943, "step": 4556 }, { "epoch": 1.5180311484967102, "grad_norm": 0.53515625, "learning_rate": 1.4424013377117191e-05, "loss": 4.081, "step": 4557 }, { "epoch": 1.518364287498959, "grad_norm": 0.5078125, "learning_rate": 1.4423702256969736e-05, "loss": 4.0524, "step": 4558 }, { "epoch": 1.5186974265012076, "grad_norm": 0.50390625, "learning_rate": 1.4423391056176145e-05, "loss": 4.1166, "step": 4559 }, { "epoch": 1.5190305655034564, "grad_norm": 0.546875, "learning_rate": 1.4423079774740044e-05, "loss": 4.0155, "step": 4560 }, { "epoch": 1.5193637045057051, "grad_norm": 0.5390625, "learning_rate": 1.4422768412665061e-05, "loss": 4.0295, "step": 4561 }, { "epoch": 1.5196968435079536, "grad_norm": 0.5625, "learning_rate": 1.4422456969954818e-05, "loss": 4.1422, "step": 4562 }, { "epoch": 1.5200299825102024, "grad_norm": 0.53125, "learning_rate": 1.4422145446612948e-05, "loss": 4.061, "step": 4563 }, { "epoch": 1.5203631215124511, "grad_norm": 0.515625, "learning_rate": 1.4421833842643074e-05, "loss": 4.0762, "step": 4564 }, { "epoch": 1.5206962605146996, "grad_norm": 0.5546875, "learning_rate": 1.442152215804883e-05, "loss": 4.0096, "step": 4565 }, { "epoch": 1.5210293995169484, "grad_norm": 0.57421875, "learning_rate": 1.4421210392833846e-05, "loss": 4.0465, "step": 4566 }, { "epoch": 1.5213625385191971, "grad_norm": 0.53125, "learning_rate": 1.442089854700175e-05, "loss": 4.0894, "step": 4567 }, { "epoch": 1.5216956775214459, "grad_norm": 0.5234375, "learning_rate": 1.4420586620556179e-05, "loss": 4.0577, "step": 4568 }, { "epoch": 1.5220288165236946, "grad_norm": 0.53515625, "learning_rate": 1.442027461350076e-05, "loss": 4.079, "step": 4569 }, { "epoch": 1.5223619555259433, "grad_norm": 0.515625, "learning_rate": 1.4419962525839135e-05, "loss": 4.0983, "step": 4570 }, { "epoch": 1.5226950945281919, "grad_norm": 0.53125, "learning_rate": 1.4419650357574933e-05, "loss": 4.0559, "step": 4571 }, { "epoch": 1.5230282335304406, "grad_norm": 0.53515625, "learning_rate": 1.4419338108711793e-05, "loss": 4.139, "step": 4572 }, { "epoch": 1.523361372532689, "grad_norm": 0.5234375, "learning_rate": 1.4419025779253351e-05, "loss": 4.1274, "step": 4573 }, { "epoch": 1.5236945115349378, "grad_norm": 0.52734375, "learning_rate": 1.4418713369203246e-05, "loss": 4.0157, "step": 4574 }, { "epoch": 1.5240276505371866, "grad_norm": 0.54296875, "learning_rate": 1.4418400878565115e-05, "loss": 4.0751, "step": 4575 }, { "epoch": 1.5243607895394353, "grad_norm": 0.5234375, "learning_rate": 1.4418088307342599e-05, "loss": 3.9948, "step": 4576 }, { "epoch": 1.524693928541684, "grad_norm": 0.51953125, "learning_rate": 1.4417775655539341e-05, "loss": 4.0292, "step": 4577 }, { "epoch": 1.5250270675439328, "grad_norm": 0.5234375, "learning_rate": 1.4417462923158978e-05, "loss": 4.0739, "step": 4578 }, { "epoch": 1.5253602065461815, "grad_norm": 0.5078125, "learning_rate": 1.4417150110205158e-05, "loss": 4.0711, "step": 4579 }, { "epoch": 1.52569334554843, "grad_norm": 0.51953125, "learning_rate": 1.4416837216681518e-05, "loss": 4.0059, "step": 4580 }, { "epoch": 1.5260264845506788, "grad_norm": 0.53125, "learning_rate": 1.4416524242591709e-05, "loss": 4.032, "step": 4581 }, { "epoch": 1.5263596235529273, "grad_norm": 0.51953125, "learning_rate": 1.4416211187939372e-05, "loss": 4.0308, "step": 4582 }, { "epoch": 1.526692762555176, "grad_norm": 0.53515625, "learning_rate": 1.4415898052728155e-05, "loss": 3.9591, "step": 4583 }, { "epoch": 1.5270259015574248, "grad_norm": 0.51953125, "learning_rate": 1.4415584836961708e-05, "loss": 4.0536, "step": 4584 }, { "epoch": 1.5273590405596735, "grad_norm": 0.51953125, "learning_rate": 1.4415271540643674e-05, "loss": 4.0915, "step": 4585 }, { "epoch": 1.5276921795619223, "grad_norm": 0.52734375, "learning_rate": 1.4414958163777707e-05, "loss": 4.0095, "step": 4586 }, { "epoch": 1.528025318564171, "grad_norm": 0.5625, "learning_rate": 1.4414644706367454e-05, "loss": 4.0964, "step": 4587 }, { "epoch": 1.5283584575664197, "grad_norm": 0.5390625, "learning_rate": 1.4414331168416567e-05, "loss": 4.0064, "step": 4588 }, { "epoch": 1.5286915965686683, "grad_norm": 0.55078125, "learning_rate": 1.4414017549928698e-05, "loss": 4.0124, "step": 4589 }, { "epoch": 1.529024735570917, "grad_norm": 0.5546875, "learning_rate": 1.4413703850907502e-05, "loss": 4.0261, "step": 4590 }, { "epoch": 1.5293578745731655, "grad_norm": 0.53515625, "learning_rate": 1.441339007135663e-05, "loss": 4.0233, "step": 4591 }, { "epoch": 1.5296910135754143, "grad_norm": 0.5546875, "learning_rate": 1.4413076211279738e-05, "loss": 4.0057, "step": 4592 }, { "epoch": 1.530024152577663, "grad_norm": 0.53125, "learning_rate": 1.4412762270680484e-05, "loss": 4.0968, "step": 4593 }, { "epoch": 1.5303572915799117, "grad_norm": 0.5390625, "learning_rate": 1.4412448249562519e-05, "loss": 4.0671, "step": 4594 }, { "epoch": 1.5306904305821605, "grad_norm": 0.5234375, "learning_rate": 1.4412134147929507e-05, "loss": 4.0758, "step": 4595 }, { "epoch": 1.5310235695844092, "grad_norm": 0.54296875, "learning_rate": 1.4411819965785103e-05, "loss": 3.9836, "step": 4596 }, { "epoch": 1.5313567085866577, "grad_norm": 0.53125, "learning_rate": 1.4411505703132967e-05, "loss": 3.9877, "step": 4597 }, { "epoch": 1.5316898475889065, "grad_norm": 0.55078125, "learning_rate": 1.441119135997676e-05, "loss": 4.0389, "step": 4598 }, { "epoch": 1.5320229865911552, "grad_norm": 0.51953125, "learning_rate": 1.4410876936320146e-05, "loss": 4.0313, "step": 4599 }, { "epoch": 1.5323561255934037, "grad_norm": 0.49609375, "learning_rate": 1.441056243216678e-05, "loss": 4.1089, "step": 4600 }, { "epoch": 1.5326892645956525, "grad_norm": 0.51953125, "learning_rate": 1.4410247847520333e-05, "loss": 4.0442, "step": 4601 }, { "epoch": 1.5330224035979012, "grad_norm": 0.54296875, "learning_rate": 1.4409933182384465e-05, "loss": 4.028, "step": 4602 }, { "epoch": 1.53335554260015, "grad_norm": 0.515625, "learning_rate": 1.4409618436762842e-05, "loss": 4.0595, "step": 4603 }, { "epoch": 1.5336886816023987, "grad_norm": 0.5234375, "learning_rate": 1.4409303610659131e-05, "loss": 4.0524, "step": 4604 }, { "epoch": 1.5340218206046474, "grad_norm": 0.546875, "learning_rate": 1.4408988704076999e-05, "loss": 4.0768, "step": 4605 }, { "epoch": 1.534354959606896, "grad_norm": 0.55078125, "learning_rate": 1.4408673717020111e-05, "loss": 4.0246, "step": 4606 }, { "epoch": 1.5346880986091447, "grad_norm": 0.53125, "learning_rate": 1.440835864949214e-05, "loss": 4.0567, "step": 4607 }, { "epoch": 1.5350212376113932, "grad_norm": 0.53515625, "learning_rate": 1.4408043501496754e-05, "loss": 4.043, "step": 4608 }, { "epoch": 1.535354376613642, "grad_norm": 0.51171875, "learning_rate": 1.4407728273037624e-05, "loss": 4.0997, "step": 4609 }, { "epoch": 1.5356875156158907, "grad_norm": 0.53125, "learning_rate": 1.4407412964118423e-05, "loss": 4.0625, "step": 4610 }, { "epoch": 1.5360206546181394, "grad_norm": 0.53125, "learning_rate": 1.4407097574742822e-05, "loss": 4.0904, "step": 4611 }, { "epoch": 1.5363537936203882, "grad_norm": 0.52734375, "learning_rate": 1.4406782104914495e-05, "loss": 4.0324, "step": 4612 }, { "epoch": 1.536686932622637, "grad_norm": 0.52734375, "learning_rate": 1.4406466554637115e-05, "loss": 4.1304, "step": 4613 }, { "epoch": 1.5370200716248856, "grad_norm": 0.51171875, "learning_rate": 1.440615092391436e-05, "loss": 4.0588, "step": 4614 }, { "epoch": 1.5373532106271341, "grad_norm": 0.55859375, "learning_rate": 1.4405835212749905e-05, "loss": 3.991, "step": 4615 }, { "epoch": 1.5376863496293829, "grad_norm": 0.52734375, "learning_rate": 1.4405519421147429e-05, "loss": 4.029, "step": 4616 }, { "epoch": 1.5380194886316314, "grad_norm": 0.5078125, "learning_rate": 1.440520354911061e-05, "loss": 4.1363, "step": 4617 }, { "epoch": 1.5383526276338801, "grad_norm": 0.53125, "learning_rate": 1.4404887596643124e-05, "loss": 4.0418, "step": 4618 }, { "epoch": 1.5386857666361289, "grad_norm": 0.5, "learning_rate": 1.4404571563748653e-05, "loss": 4.0987, "step": 4619 }, { "epoch": 1.5390189056383776, "grad_norm": 0.55078125, "learning_rate": 1.440425545043088e-05, "loss": 4.1455, "step": 4620 }, { "epoch": 1.5393520446406264, "grad_norm": 0.5390625, "learning_rate": 1.4403939256693485e-05, "loss": 4.0755, "step": 4621 }, { "epoch": 1.539685183642875, "grad_norm": 0.54296875, "learning_rate": 1.440362298254015e-05, "loss": 4.0676, "step": 4622 }, { "epoch": 1.5400183226451238, "grad_norm": 0.515625, "learning_rate": 1.4403306627974563e-05, "loss": 4.017, "step": 4623 }, { "epoch": 1.5403514616473724, "grad_norm": 0.515625, "learning_rate": 1.4402990193000408e-05, "loss": 3.9791, "step": 4624 }, { "epoch": 1.540684600649621, "grad_norm": 0.5234375, "learning_rate": 1.4402673677621363e-05, "loss": 4.0578, "step": 4625 }, { "epoch": 1.5410177396518696, "grad_norm": 0.55859375, "learning_rate": 1.4402357081841125e-05, "loss": 4.0269, "step": 4626 }, { "epoch": 1.5413508786541184, "grad_norm": 0.51171875, "learning_rate": 1.4402040405663379e-05, "loss": 4.1466, "step": 4627 }, { "epoch": 1.541684017656367, "grad_norm": 0.50390625, "learning_rate": 1.440172364909181e-05, "loss": 4.0272, "step": 4628 }, { "epoch": 1.5420171566586158, "grad_norm": 0.55859375, "learning_rate": 1.440140681213011e-05, "loss": 4.0766, "step": 4629 }, { "epoch": 1.5423502956608646, "grad_norm": 0.515625, "learning_rate": 1.4401089894781968e-05, "loss": 4.0725, "step": 4630 }, { "epoch": 1.5426834346631133, "grad_norm": 0.5078125, "learning_rate": 1.4400772897051076e-05, "loss": 4.0329, "step": 4631 }, { "epoch": 1.5430165736653618, "grad_norm": 0.5078125, "learning_rate": 1.4400455818941129e-05, "loss": 4.0831, "step": 4632 }, { "epoch": 1.5433497126676106, "grad_norm": 0.546875, "learning_rate": 1.4400138660455818e-05, "loss": 4.0644, "step": 4633 }, { "epoch": 1.5436828516698593, "grad_norm": 0.53515625, "learning_rate": 1.4399821421598836e-05, "loss": 4.09, "step": 4634 }, { "epoch": 1.5440159906721078, "grad_norm": 0.546875, "learning_rate": 1.4399504102373878e-05, "loss": 4.0402, "step": 4635 }, { "epoch": 1.5443491296743566, "grad_norm": 0.51953125, "learning_rate": 1.4399186702784643e-05, "loss": 4.0693, "step": 4636 }, { "epoch": 1.5446822686766053, "grad_norm": 0.54296875, "learning_rate": 1.439886922283483e-05, "loss": 4.0746, "step": 4637 }, { "epoch": 1.545015407678854, "grad_norm": 0.5234375, "learning_rate": 1.4398551662528128e-05, "loss": 4.001, "step": 4638 }, { "epoch": 1.5453485466811028, "grad_norm": 0.5234375, "learning_rate": 1.4398234021868244e-05, "loss": 4.0601, "step": 4639 }, { "epoch": 1.5456816856833515, "grad_norm": 0.546875, "learning_rate": 1.4397916300858876e-05, "loss": 3.9937, "step": 4640 }, { "epoch": 1.5460148246856, "grad_norm": 0.51171875, "learning_rate": 1.4397598499503722e-05, "loss": 4.1136, "step": 4641 }, { "epoch": 1.5463479636878488, "grad_norm": 0.5390625, "learning_rate": 1.4397280617806486e-05, "loss": 4.0512, "step": 4642 }, { "epoch": 1.5466811026900973, "grad_norm": 0.546875, "learning_rate": 1.439696265577087e-05, "loss": 4.0674, "step": 4643 }, { "epoch": 1.547014241692346, "grad_norm": 0.50390625, "learning_rate": 1.439664461340058e-05, "loss": 4.0777, "step": 4644 }, { "epoch": 1.5473473806945948, "grad_norm": 0.546875, "learning_rate": 1.4396326490699316e-05, "loss": 4.0772, "step": 4645 }, { "epoch": 1.5476805196968435, "grad_norm": 0.5234375, "learning_rate": 1.4396008287670787e-05, "loss": 4.1108, "step": 4646 }, { "epoch": 1.5480136586990922, "grad_norm": 0.53515625, "learning_rate": 1.4395690004318699e-05, "loss": 4.1017, "step": 4647 }, { "epoch": 1.548346797701341, "grad_norm": 0.55859375, "learning_rate": 1.4395371640646758e-05, "loss": 4.1409, "step": 4648 }, { "epoch": 1.5486799367035897, "grad_norm": 0.5390625, "learning_rate": 1.4395053196658672e-05, "loss": 4.1066, "step": 4649 }, { "epoch": 1.5490130757058382, "grad_norm": 0.55078125, "learning_rate": 1.439473467235815e-05, "loss": 4.0961, "step": 4650 }, { "epoch": 1.549346214708087, "grad_norm": 0.53125, "learning_rate": 1.4394416067748905e-05, "loss": 4.0976, "step": 4651 }, { "epoch": 1.5496793537103355, "grad_norm": 0.5, "learning_rate": 1.4394097382834643e-05, "loss": 4.1592, "step": 4652 }, { "epoch": 1.5500124927125842, "grad_norm": 0.53125, "learning_rate": 1.4393778617619084e-05, "loss": 3.9994, "step": 4653 }, { "epoch": 1.550345631714833, "grad_norm": 0.53515625, "learning_rate": 1.4393459772105932e-05, "loss": 4.0989, "step": 4654 }, { "epoch": 1.5506787707170817, "grad_norm": 0.56640625, "learning_rate": 1.4393140846298906e-05, "loss": 4.0926, "step": 4655 }, { "epoch": 1.5510119097193305, "grad_norm": 0.5390625, "learning_rate": 1.439282184020172e-05, "loss": 4.0747, "step": 4656 }, { "epoch": 1.5513450487215792, "grad_norm": 0.5703125, "learning_rate": 1.439250275381809e-05, "loss": 4.0837, "step": 4657 }, { "epoch": 1.551678187723828, "grad_norm": 0.5234375, "learning_rate": 1.439218358715173e-05, "loss": 3.9995, "step": 4658 }, { "epoch": 1.5520113267260764, "grad_norm": 0.5390625, "learning_rate": 1.4391864340206362e-05, "loss": 3.9998, "step": 4659 }, { "epoch": 1.5523444657283252, "grad_norm": 0.54296875, "learning_rate": 1.43915450129857e-05, "loss": 4.1258, "step": 4660 }, { "epoch": 1.5526776047305737, "grad_norm": 0.546875, "learning_rate": 1.4391225605493468e-05, "loss": 4.0313, "step": 4661 }, { "epoch": 1.5530107437328224, "grad_norm": 0.55078125, "learning_rate": 1.4390906117733381e-05, "loss": 4.034, "step": 4662 }, { "epoch": 1.5533438827350712, "grad_norm": 0.5078125, "learning_rate": 1.4390586549709167e-05, "loss": 4.1111, "step": 4663 }, { "epoch": 1.55367702173732, "grad_norm": 0.53125, "learning_rate": 1.4390266901424543e-05, "loss": 4.0987, "step": 4664 }, { "epoch": 1.5540101607395687, "grad_norm": 0.5, "learning_rate": 1.4389947172883234e-05, "loss": 4.0928, "step": 4665 }, { "epoch": 1.5543432997418174, "grad_norm": 0.5078125, "learning_rate": 1.4389627364088963e-05, "loss": 4.0447, "step": 4666 }, { "epoch": 1.554676438744066, "grad_norm": 0.57421875, "learning_rate": 1.4389307475045456e-05, "loss": 4.0215, "step": 4667 }, { "epoch": 1.5550095777463147, "grad_norm": 0.5546875, "learning_rate": 1.438898750575644e-05, "loss": 4.0171, "step": 4668 }, { "epoch": 1.5553427167485634, "grad_norm": 0.5390625, "learning_rate": 1.4388667456225642e-05, "loss": 4.1214, "step": 4669 }, { "epoch": 1.555675855750812, "grad_norm": 0.52734375, "learning_rate": 1.4388347326456787e-05, "loss": 4.048, "step": 4670 }, { "epoch": 1.5560089947530606, "grad_norm": 0.5546875, "learning_rate": 1.4388027116453607e-05, "loss": 4.094, "step": 4671 }, { "epoch": 1.5563421337553094, "grad_norm": 0.546875, "learning_rate": 1.4387706826219832e-05, "loss": 4.0922, "step": 4672 }, { "epoch": 1.5566752727575581, "grad_norm": 0.5078125, "learning_rate": 1.4387386455759188e-05, "loss": 4.0619, "step": 4673 }, { "epoch": 1.5570084117598069, "grad_norm": 0.5390625, "learning_rate": 1.4387066005075413e-05, "loss": 4.0739, "step": 4674 }, { "epoch": 1.5573415507620556, "grad_norm": 0.5625, "learning_rate": 1.4386745474172234e-05, "loss": 4.0121, "step": 4675 }, { "epoch": 1.5576746897643041, "grad_norm": 0.5703125, "learning_rate": 1.4386424863053389e-05, "loss": 4.074, "step": 4676 }, { "epoch": 1.5580078287665529, "grad_norm": 0.5, "learning_rate": 1.438610417172261e-05, "loss": 4.1376, "step": 4677 }, { "epoch": 1.5583409677688014, "grad_norm": 0.52734375, "learning_rate": 1.438578340018363e-05, "loss": 4.0261, "step": 4678 }, { "epoch": 1.5586741067710501, "grad_norm": 0.53515625, "learning_rate": 1.438546254844019e-05, "loss": 4.0237, "step": 4679 }, { "epoch": 1.5590072457732989, "grad_norm": 0.55859375, "learning_rate": 1.4385141616496025e-05, "loss": 4.0774, "step": 4680 }, { "epoch": 1.5593403847755476, "grad_norm": 0.51171875, "learning_rate": 1.4384820604354872e-05, "loss": 4.0952, "step": 4681 }, { "epoch": 1.5596735237777963, "grad_norm": 0.50390625, "learning_rate": 1.4384499512020474e-05, "loss": 4.1591, "step": 4682 }, { "epoch": 1.560006662780045, "grad_norm": 0.54296875, "learning_rate": 1.4384178339496567e-05, "loss": 4.0276, "step": 4683 }, { "epoch": 1.5603398017822938, "grad_norm": 0.5078125, "learning_rate": 1.4383857086786894e-05, "loss": 4.0524, "step": 4684 }, { "epoch": 1.5606729407845423, "grad_norm": 0.5234375, "learning_rate": 1.4383535753895196e-05, "loss": 4.0864, "step": 4685 }, { "epoch": 1.561006079786791, "grad_norm": 0.5390625, "learning_rate": 1.4383214340825217e-05, "loss": 4.0713, "step": 4686 }, { "epoch": 1.5613392187890396, "grad_norm": 0.53515625, "learning_rate": 1.4382892847580697e-05, "loss": 4.0396, "step": 4687 }, { "epoch": 1.5616723577912883, "grad_norm": 0.53515625, "learning_rate": 1.4382571274165387e-05, "loss": 4.0515, "step": 4688 }, { "epoch": 1.562005496793537, "grad_norm": 0.494140625, "learning_rate": 1.4382249620583029e-05, "loss": 4.1525, "step": 4689 }, { "epoch": 1.5623386357957858, "grad_norm": 0.5234375, "learning_rate": 1.438192788683737e-05, "loss": 4.0389, "step": 4690 }, { "epoch": 1.5626717747980345, "grad_norm": 0.53515625, "learning_rate": 1.4381606072932155e-05, "loss": 4.1209, "step": 4691 }, { "epoch": 1.5630049138002833, "grad_norm": 0.5234375, "learning_rate": 1.4381284178871138e-05, "loss": 3.9836, "step": 4692 }, { "epoch": 1.563338052802532, "grad_norm": 0.5234375, "learning_rate": 1.438096220465806e-05, "loss": 4.0147, "step": 4693 }, { "epoch": 1.5636711918047805, "grad_norm": 0.53515625, "learning_rate": 1.438064015029668e-05, "loss": 4.0797, "step": 4694 }, { "epoch": 1.5640043308070293, "grad_norm": 0.5390625, "learning_rate": 1.4380318015790747e-05, "loss": 4.0426, "step": 4695 }, { "epoch": 1.5643374698092778, "grad_norm": 0.51953125, "learning_rate": 1.437999580114401e-05, "loss": 4.0295, "step": 4696 }, { "epoch": 1.5646706088115265, "grad_norm": 0.5234375, "learning_rate": 1.4379673506360224e-05, "loss": 4.0823, "step": 4697 }, { "epoch": 1.5650037478137753, "grad_norm": 0.52734375, "learning_rate": 1.4379351131443143e-05, "loss": 4.0694, "step": 4698 }, { "epoch": 1.565336886816024, "grad_norm": 0.53125, "learning_rate": 1.4379028676396523e-05, "loss": 3.9677, "step": 4699 }, { "epoch": 1.5656700258182727, "grad_norm": 0.5078125, "learning_rate": 1.4378706141224118e-05, "loss": 4.0768, "step": 4700 }, { "epoch": 1.5660031648205215, "grad_norm": 0.546875, "learning_rate": 1.4378383525929685e-05, "loss": 4.0351, "step": 4701 }, { "epoch": 1.56633630382277, "grad_norm": 0.5390625, "learning_rate": 1.4378060830516983e-05, "loss": 4.0687, "step": 4702 }, { "epoch": 1.5666694428250187, "grad_norm": 0.5546875, "learning_rate": 1.437773805498977e-05, "loss": 3.9964, "step": 4703 }, { "epoch": 1.5670025818272675, "grad_norm": 0.53125, "learning_rate": 1.4377415199351808e-05, "loss": 4.0284, "step": 4704 }, { "epoch": 1.567335720829516, "grad_norm": 0.515625, "learning_rate": 1.4377092263606853e-05, "loss": 4.1052, "step": 4705 }, { "epoch": 1.5676688598317647, "grad_norm": 0.53125, "learning_rate": 1.437676924775867e-05, "loss": 4.0353, "step": 4706 }, { "epoch": 1.5680019988340135, "grad_norm": 0.5234375, "learning_rate": 1.4376446151811017e-05, "loss": 4.0969, "step": 4707 }, { "epoch": 1.5683351378362622, "grad_norm": 0.5546875, "learning_rate": 1.4376122975767665e-05, "loss": 4.0541, "step": 4708 }, { "epoch": 1.568668276838511, "grad_norm": 0.53125, "learning_rate": 1.4375799719632373e-05, "loss": 4.0457, "step": 4709 }, { "epoch": 1.5690014158407597, "grad_norm": 0.51171875, "learning_rate": 1.4375476383408907e-05, "loss": 4.0605, "step": 4710 }, { "epoch": 1.5693345548430082, "grad_norm": 0.5234375, "learning_rate": 1.4375152967101033e-05, "loss": 4.0327, "step": 4711 }, { "epoch": 1.569667693845257, "grad_norm": 0.55078125, "learning_rate": 1.4374829470712519e-05, "loss": 4.0534, "step": 4712 }, { "epoch": 1.5700008328475055, "grad_norm": 0.51171875, "learning_rate": 1.4374505894247133e-05, "loss": 4.117, "step": 4713 }, { "epoch": 1.5703339718497542, "grad_norm": 0.515625, "learning_rate": 1.437418223770864e-05, "loss": 4.1159, "step": 4714 }, { "epoch": 1.570667110852003, "grad_norm": 0.5078125, "learning_rate": 1.4373858501100819e-05, "loss": 4.1065, "step": 4715 }, { "epoch": 1.5710002498542517, "grad_norm": 0.53515625, "learning_rate": 1.4373534684427431e-05, "loss": 4.0145, "step": 4716 }, { "epoch": 1.5713333888565004, "grad_norm": 0.52734375, "learning_rate": 1.4373210787692254e-05, "loss": 4.0922, "step": 4717 }, { "epoch": 1.5716665278587492, "grad_norm": 0.53515625, "learning_rate": 1.4372886810899058e-05, "loss": 4.0771, "step": 4718 }, { "epoch": 1.571999666860998, "grad_norm": 0.51953125, "learning_rate": 1.4372562754051616e-05, "loss": 4.0586, "step": 4719 }, { "epoch": 1.5723328058632464, "grad_norm": 0.5078125, "learning_rate": 1.4372238617153706e-05, "loss": 4.0498, "step": 4720 }, { "epoch": 1.5726659448654952, "grad_norm": 0.54296875, "learning_rate": 1.4371914400209099e-05, "loss": 4.0464, "step": 4721 }, { "epoch": 1.5729990838677437, "grad_norm": 0.51953125, "learning_rate": 1.4371590103221575e-05, "loss": 4.043, "step": 4722 }, { "epoch": 1.5733322228699924, "grad_norm": 0.5078125, "learning_rate": 1.437126572619491e-05, "loss": 4.0474, "step": 4723 }, { "epoch": 1.5736653618722412, "grad_norm": 0.53515625, "learning_rate": 1.4370941269132883e-05, "loss": 4.0647, "step": 4724 }, { "epoch": 1.57399850087449, "grad_norm": 0.51953125, "learning_rate": 1.4370616732039273e-05, "loss": 4.0521, "step": 4725 }, { "epoch": 1.5743316398767386, "grad_norm": 0.51953125, "learning_rate": 1.437029211491786e-05, "loss": 3.9908, "step": 4726 }, { "epoch": 1.5746647788789874, "grad_norm": 0.515625, "learning_rate": 1.4369967417772424e-05, "loss": 4.1383, "step": 4727 }, { "epoch": 1.574997917881236, "grad_norm": 0.5625, "learning_rate": 1.4369642640606748e-05, "loss": 4.0029, "step": 4728 }, { "epoch": 1.5753310568834846, "grad_norm": 0.52734375, "learning_rate": 1.4369317783424615e-05, "loss": 4.0417, "step": 4729 }, { "epoch": 1.5756641958857334, "grad_norm": 0.5234375, "learning_rate": 1.4368992846229806e-05, "loss": 4.1249, "step": 4730 }, { "epoch": 1.5759973348879819, "grad_norm": 0.5234375, "learning_rate": 1.4368667829026113e-05, "loss": 4.1032, "step": 4731 }, { "epoch": 1.5763304738902306, "grad_norm": 0.53125, "learning_rate": 1.4368342731817316e-05, "loss": 4.1035, "step": 4732 }, { "epoch": 1.5766636128924794, "grad_norm": 0.54296875, "learning_rate": 1.4368017554607201e-05, "loss": 4.0766, "step": 4733 }, { "epoch": 1.576996751894728, "grad_norm": 0.52734375, "learning_rate": 1.436769229739956e-05, "loss": 4.0275, "step": 4734 }, { "epoch": 1.5773298908969768, "grad_norm": 0.5390625, "learning_rate": 1.4367366960198178e-05, "loss": 4.1076, "step": 4735 }, { "epoch": 1.5776630298992256, "grad_norm": 0.51171875, "learning_rate": 1.4367041543006844e-05, "loss": 4.1063, "step": 4736 }, { "epoch": 1.577996168901474, "grad_norm": 0.54296875, "learning_rate": 1.4366716045829354e-05, "loss": 4.053, "step": 4737 }, { "epoch": 1.5783293079037228, "grad_norm": 0.51953125, "learning_rate": 1.4366390468669492e-05, "loss": 4.068, "step": 4738 }, { "epoch": 1.5786624469059714, "grad_norm": 0.5234375, "learning_rate": 1.4366064811531055e-05, "loss": 4.0415, "step": 4739 }, { "epoch": 1.57899558590822, "grad_norm": 0.53515625, "learning_rate": 1.4365739074417835e-05, "loss": 4.0756, "step": 4740 }, { "epoch": 1.5793287249104688, "grad_norm": 0.515625, "learning_rate": 1.4365413257333626e-05, "loss": 4.0663, "step": 4741 }, { "epoch": 1.5796618639127176, "grad_norm": 0.53125, "learning_rate": 1.4365087360282222e-05, "loss": 4.1487, "step": 4742 }, { "epoch": 1.5799950029149663, "grad_norm": 0.52734375, "learning_rate": 1.4364761383267421e-05, "loss": 4.0367, "step": 4743 }, { "epoch": 1.580328141917215, "grad_norm": 0.515625, "learning_rate": 1.4364435326293017e-05, "loss": 3.9885, "step": 4744 }, { "epoch": 1.5806612809194638, "grad_norm": 0.5078125, "learning_rate": 1.4364109189362813e-05, "loss": 4.1158, "step": 4745 }, { "epoch": 1.5809944199217123, "grad_norm": 0.5234375, "learning_rate": 1.43637829724806e-05, "loss": 4.091, "step": 4746 }, { "epoch": 1.581327558923961, "grad_norm": 0.546875, "learning_rate": 1.4363456675650186e-05, "loss": 4.0797, "step": 4747 }, { "epoch": 1.5816606979262096, "grad_norm": 0.546875, "learning_rate": 1.4363130298875369e-05, "loss": 4.0474, "step": 4748 }, { "epoch": 1.5819938369284583, "grad_norm": 0.546875, "learning_rate": 1.4362803842159945e-05, "loss": 4.1079, "step": 4749 }, { "epoch": 1.582326975930707, "grad_norm": 0.54296875, "learning_rate": 1.4362477305507725e-05, "loss": 4.0506, "step": 4750 }, { "epoch": 1.5826601149329558, "grad_norm": 0.515625, "learning_rate": 1.4362150688922507e-05, "loss": 4.0225, "step": 4751 }, { "epoch": 1.5829932539352045, "grad_norm": 0.546875, "learning_rate": 1.4361823992408097e-05, "loss": 4.0783, "step": 4752 }, { "epoch": 1.5833263929374533, "grad_norm": 0.52734375, "learning_rate": 1.43614972159683e-05, "loss": 4.0278, "step": 4753 }, { "epoch": 1.583659531939702, "grad_norm": 0.56640625, "learning_rate": 1.4361170359606923e-05, "loss": 4.1143, "step": 4754 }, { "epoch": 1.5839926709419505, "grad_norm": 0.53125, "learning_rate": 1.4360843423327772e-05, "loss": 4.0468, "step": 4755 }, { "epoch": 1.5843258099441992, "grad_norm": 0.5078125, "learning_rate": 1.4360516407134656e-05, "loss": 4.066, "step": 4756 }, { "epoch": 1.5846589489464478, "grad_norm": 0.53125, "learning_rate": 1.4360189311031386e-05, "loss": 4.1049, "step": 4757 }, { "epoch": 1.5849920879486965, "grad_norm": 0.55078125, "learning_rate": 1.4359862135021765e-05, "loss": 4.1267, "step": 4758 }, { "epoch": 1.5853252269509452, "grad_norm": 0.54296875, "learning_rate": 1.4359534879109612e-05, "loss": 4.0651, "step": 4759 }, { "epoch": 1.585658365953194, "grad_norm": 0.5703125, "learning_rate": 1.4359207543298734e-05, "loss": 3.9751, "step": 4760 }, { "epoch": 1.5859915049554427, "grad_norm": 0.52734375, "learning_rate": 1.4358880127592945e-05, "loss": 4.0525, "step": 4761 }, { "epoch": 1.5863246439576915, "grad_norm": 0.53125, "learning_rate": 1.435855263199606e-05, "loss": 4.1135, "step": 4762 }, { "epoch": 1.5866577829599402, "grad_norm": 0.53515625, "learning_rate": 1.4358225056511891e-05, "loss": 4.0726, "step": 4763 }, { "epoch": 1.5869909219621887, "grad_norm": 0.515625, "learning_rate": 1.4357897401144255e-05, "loss": 4.1216, "step": 4764 }, { "epoch": 1.5873240609644375, "grad_norm": 0.52734375, "learning_rate": 1.4357569665896969e-05, "loss": 4.1381, "step": 4765 }, { "epoch": 1.587657199966686, "grad_norm": 0.5390625, "learning_rate": 1.4357241850773849e-05, "loss": 4.057, "step": 4766 }, { "epoch": 1.5879903389689347, "grad_norm": 0.53515625, "learning_rate": 1.4356913955778717e-05, "loss": 4.0195, "step": 4767 }, { "epoch": 1.5883234779711835, "grad_norm": 0.5234375, "learning_rate": 1.4356585980915386e-05, "loss": 4.0235, "step": 4768 }, { "epoch": 1.5886566169734322, "grad_norm": 0.51953125, "learning_rate": 1.4356257926187682e-05, "loss": 4.1086, "step": 4769 }, { "epoch": 1.588989755975681, "grad_norm": 0.55078125, "learning_rate": 1.4355929791599423e-05, "loss": 4.0192, "step": 4770 }, { "epoch": 1.5893228949779297, "grad_norm": 0.53125, "learning_rate": 1.4355601577154433e-05, "loss": 4.0831, "step": 4771 }, { "epoch": 1.5896560339801782, "grad_norm": 0.51953125, "learning_rate": 1.4355273282856533e-05, "loss": 4.0868, "step": 4772 }, { "epoch": 1.589989172982427, "grad_norm": 0.515625, "learning_rate": 1.4354944908709545e-05, "loss": 4.0629, "step": 4773 }, { "epoch": 1.5903223119846754, "grad_norm": 0.54296875, "learning_rate": 1.43546164547173e-05, "loss": 4.0134, "step": 4774 }, { "epoch": 1.5906554509869242, "grad_norm": 0.5390625, "learning_rate": 1.4354287920883619e-05, "loss": 4.0396, "step": 4775 }, { "epoch": 1.590988589989173, "grad_norm": 0.53125, "learning_rate": 1.4353959307212333e-05, "loss": 4.0971, "step": 4776 }, { "epoch": 1.5913217289914217, "grad_norm": 0.5234375, "learning_rate": 1.435363061370726e-05, "loss": 4.0598, "step": 4777 }, { "epoch": 1.5916548679936704, "grad_norm": 0.54296875, "learning_rate": 1.4353301840372242e-05, "loss": 4.0678, "step": 4778 }, { "epoch": 1.5919880069959191, "grad_norm": 0.51953125, "learning_rate": 1.43529729872111e-05, "loss": 4.0534, "step": 4779 }, { "epoch": 1.5923211459981679, "grad_norm": 0.54296875, "learning_rate": 1.4352644054227665e-05, "loss": 4.1043, "step": 4780 }, { "epoch": 1.5926542850004164, "grad_norm": 0.515625, "learning_rate": 1.4352315041425768e-05, "loss": 4.0904, "step": 4781 }, { "epoch": 1.5929874240026651, "grad_norm": 0.51953125, "learning_rate": 1.4351985948809248e-05, "loss": 4.0559, "step": 4782 }, { "epoch": 1.5933205630049136, "grad_norm": 0.5234375, "learning_rate": 1.4351656776381929e-05, "loss": 4.059, "step": 4783 }, { "epoch": 1.5936537020071624, "grad_norm": 0.5234375, "learning_rate": 1.435132752414765e-05, "loss": 4.1166, "step": 4784 }, { "epoch": 1.5939868410094111, "grad_norm": 0.54296875, "learning_rate": 1.4350998192110245e-05, "loss": 4.0953, "step": 4785 }, { "epoch": 1.5943199800116599, "grad_norm": 0.55078125, "learning_rate": 1.4350668780273553e-05, "loss": 4.0489, "step": 4786 }, { "epoch": 1.5946531190139086, "grad_norm": 0.55078125, "learning_rate": 1.4350339288641404e-05, "loss": 3.9979, "step": 4787 }, { "epoch": 1.5949862580161573, "grad_norm": 0.53515625, "learning_rate": 1.4350009717217643e-05, "loss": 4.0791, "step": 4788 }, { "epoch": 1.595319397018406, "grad_norm": 0.53515625, "learning_rate": 1.4349680066006104e-05, "loss": 4.0921, "step": 4789 }, { "epoch": 1.5956525360206546, "grad_norm": 0.52734375, "learning_rate": 1.434935033501063e-05, "loss": 4.1097, "step": 4790 }, { "epoch": 1.5959856750229033, "grad_norm": 0.53125, "learning_rate": 1.434902052423506e-05, "loss": 4.0459, "step": 4791 }, { "epoch": 1.5963188140251519, "grad_norm": 0.5078125, "learning_rate": 1.4348690633683237e-05, "loss": 4.0678, "step": 4792 }, { "epoch": 1.5966519530274006, "grad_norm": 0.55859375, "learning_rate": 1.4348360663359e-05, "loss": 4.1262, "step": 4793 }, { "epoch": 1.5969850920296493, "grad_norm": 0.51953125, "learning_rate": 1.4348030613266198e-05, "loss": 4.0993, "step": 4794 }, { "epoch": 1.597318231031898, "grad_norm": 0.5234375, "learning_rate": 1.4347700483408669e-05, "loss": 4.1178, "step": 4795 }, { "epoch": 1.5976513700341468, "grad_norm": 0.53125, "learning_rate": 1.4347370273790262e-05, "loss": 3.9939, "step": 4796 }, { "epoch": 1.5979845090363956, "grad_norm": 0.54296875, "learning_rate": 1.4347039984414824e-05, "loss": 4.001, "step": 4797 }, { "epoch": 1.5983176480386443, "grad_norm": 0.55859375, "learning_rate": 1.4346709615286199e-05, "loss": 4.0381, "step": 4798 }, { "epoch": 1.5986507870408928, "grad_norm": 0.51953125, "learning_rate": 1.4346379166408238e-05, "loss": 4.052, "step": 4799 }, { "epoch": 1.5989839260431415, "grad_norm": 0.5234375, "learning_rate": 1.4346048637784788e-05, "loss": 4.0633, "step": 4800 }, { "epoch": 1.59931706504539, "grad_norm": 0.51953125, "learning_rate": 1.43457180294197e-05, "loss": 4.0892, "step": 4801 }, { "epoch": 1.5996502040476388, "grad_norm": 0.54296875, "learning_rate": 1.4345387341316826e-05, "loss": 3.9889, "step": 4802 }, { "epoch": 1.5999833430498875, "grad_norm": 0.5625, "learning_rate": 1.4345056573480019e-05, "loss": 4.1038, "step": 4803 }, { "epoch": 1.6003164820521363, "grad_norm": 0.51953125, "learning_rate": 1.4344725725913126e-05, "loss": 4.039, "step": 4804 }, { "epoch": 1.600649621054385, "grad_norm": 0.5390625, "learning_rate": 1.4344394798620003e-05, "loss": 4.1095, "step": 4805 }, { "epoch": 1.6009827600566338, "grad_norm": 0.53515625, "learning_rate": 1.4344063791604508e-05, "loss": 4.0751, "step": 4806 }, { "epoch": 1.6013158990588823, "grad_norm": 0.51953125, "learning_rate": 1.4343732704870492e-05, "loss": 4.184, "step": 4807 }, { "epoch": 1.601649038061131, "grad_norm": 0.5234375, "learning_rate": 1.4343401538421816e-05, "loss": 4.0949, "step": 4808 }, { "epoch": 1.6019821770633795, "grad_norm": 0.52734375, "learning_rate": 1.4343070292262333e-05, "loss": 4.0469, "step": 4809 }, { "epoch": 1.6023153160656283, "grad_norm": 0.5234375, "learning_rate": 1.4342738966395905e-05, "loss": 4.0443, "step": 4810 }, { "epoch": 1.602648455067877, "grad_norm": 0.5390625, "learning_rate": 1.4342407560826387e-05, "loss": 4.053, "step": 4811 }, { "epoch": 1.6029815940701257, "grad_norm": 0.53125, "learning_rate": 1.4342076075557642e-05, "loss": 4.0939, "step": 4812 }, { "epoch": 1.6033147330723745, "grad_norm": 0.52734375, "learning_rate": 1.434174451059353e-05, "loss": 4.0564, "step": 4813 }, { "epoch": 1.6036478720746232, "grad_norm": 0.52734375, "learning_rate": 1.4341412865937914e-05, "loss": 4.0721, "step": 4814 }, { "epoch": 1.603981011076872, "grad_norm": 0.515625, "learning_rate": 1.434108114159466e-05, "loss": 4.0346, "step": 4815 }, { "epoch": 1.6043141500791205, "grad_norm": 0.515625, "learning_rate": 1.4340749337567624e-05, "loss": 4.0667, "step": 4816 }, { "epoch": 1.6046472890813692, "grad_norm": 0.51953125, "learning_rate": 1.4340417453860677e-05, "loss": 4.0745, "step": 4817 }, { "epoch": 1.6049804280836177, "grad_norm": 0.546875, "learning_rate": 1.4340085490477683e-05, "loss": 4.0671, "step": 4818 }, { "epoch": 1.6053135670858665, "grad_norm": 0.5546875, "learning_rate": 1.4339753447422509e-05, "loss": 4.0825, "step": 4819 }, { "epoch": 1.6056467060881152, "grad_norm": 0.4921875, "learning_rate": 1.4339421324699023e-05, "loss": 4.0647, "step": 4820 }, { "epoch": 1.605979845090364, "grad_norm": 0.5234375, "learning_rate": 1.4339089122311091e-05, "loss": 4.1139, "step": 4821 }, { "epoch": 1.6063129840926127, "grad_norm": 0.53125, "learning_rate": 1.4338756840262584e-05, "loss": 4.0346, "step": 4822 }, { "epoch": 1.6066461230948614, "grad_norm": 0.546875, "learning_rate": 1.4338424478557373e-05, "loss": 4.0782, "step": 4823 }, { "epoch": 1.6069792620971102, "grad_norm": 0.5234375, "learning_rate": 1.4338092037199328e-05, "loss": 4.0952, "step": 4824 }, { "epoch": 1.6073124010993587, "grad_norm": 0.5546875, "learning_rate": 1.4337759516192323e-05, "loss": 4.0459, "step": 4825 }, { "epoch": 1.6076455401016074, "grad_norm": 0.52734375, "learning_rate": 1.4337426915540232e-05, "loss": 4.0339, "step": 4826 }, { "epoch": 1.607978679103856, "grad_norm": 0.515625, "learning_rate": 1.4337094235246925e-05, "loss": 4.0644, "step": 4827 }, { "epoch": 1.6083118181061047, "grad_norm": 0.5390625, "learning_rate": 1.433676147531628e-05, "loss": 4.1699, "step": 4828 }, { "epoch": 1.6086449571083534, "grad_norm": 0.51953125, "learning_rate": 1.4336428635752173e-05, "loss": 4.0676, "step": 4829 }, { "epoch": 1.6089780961106022, "grad_norm": 0.51953125, "learning_rate": 1.433609571655848e-05, "loss": 4.1009, "step": 4830 }, { "epoch": 1.609311235112851, "grad_norm": 0.51171875, "learning_rate": 1.4335762717739077e-05, "loss": 4.0683, "step": 4831 }, { "epoch": 1.6096443741150996, "grad_norm": 0.55078125, "learning_rate": 1.4335429639297845e-05, "loss": 4.053, "step": 4832 }, { "epoch": 1.6099775131173484, "grad_norm": 0.546875, "learning_rate": 1.4335096481238666e-05, "loss": 4.1004, "step": 4833 }, { "epoch": 1.610310652119597, "grad_norm": 0.55078125, "learning_rate": 1.4334763243565412e-05, "loss": 4.0425, "step": 4834 }, { "epoch": 1.6106437911218456, "grad_norm": 0.54296875, "learning_rate": 1.4334429926281975e-05, "loss": 4.1051, "step": 4835 }, { "epoch": 1.6109769301240942, "grad_norm": 0.52734375, "learning_rate": 1.4334096529392234e-05, "loss": 4.0798, "step": 4836 }, { "epoch": 1.611310069126343, "grad_norm": 0.515625, "learning_rate": 1.4333763052900066e-05, "loss": 4.0899, "step": 4837 }, { "epoch": 1.6116432081285916, "grad_norm": 0.5390625, "learning_rate": 1.4333429496809366e-05, "loss": 4.0176, "step": 4838 }, { "epoch": 1.6119763471308404, "grad_norm": 0.55859375, "learning_rate": 1.433309586112401e-05, "loss": 4.1028, "step": 4839 }, { "epoch": 1.612309486133089, "grad_norm": 0.51953125, "learning_rate": 1.4332762145847886e-05, "loss": 4.0012, "step": 4840 }, { "epoch": 1.6126426251353378, "grad_norm": 0.50390625, "learning_rate": 1.4332428350984887e-05, "loss": 4.1104, "step": 4841 }, { "epoch": 1.6129757641375864, "grad_norm": 0.53515625, "learning_rate": 1.4332094476538893e-05, "loss": 4.1511, "step": 4842 }, { "epoch": 1.613308903139835, "grad_norm": 0.51953125, "learning_rate": 1.43317605225138e-05, "loss": 4.1559, "step": 4843 }, { "epoch": 1.6136420421420836, "grad_norm": 0.53125, "learning_rate": 1.4331426488913493e-05, "loss": 4.1341, "step": 4844 }, { "epoch": 1.6139751811443324, "grad_norm": 0.55078125, "learning_rate": 1.4331092375741863e-05, "loss": 4.1041, "step": 4845 }, { "epoch": 1.614308320146581, "grad_norm": 0.55859375, "learning_rate": 1.4330758183002805e-05, "loss": 4.0939, "step": 4846 }, { "epoch": 1.6146414591488298, "grad_norm": 0.515625, "learning_rate": 1.4330423910700208e-05, "loss": 4.1409, "step": 4847 }, { "epoch": 1.6149745981510786, "grad_norm": 0.515625, "learning_rate": 1.4330089558837968e-05, "loss": 4.0389, "step": 4848 }, { "epoch": 1.6153077371533273, "grad_norm": 0.6015625, "learning_rate": 1.4329755127419978e-05, "loss": 4.0898, "step": 4849 }, { "epoch": 1.615640876155576, "grad_norm": 0.51171875, "learning_rate": 1.4329420616450133e-05, "loss": 4.0519, "step": 4850 }, { "epoch": 1.6159740151578246, "grad_norm": 0.51953125, "learning_rate": 1.4329086025932332e-05, "loss": 4.0264, "step": 4851 }, { "epoch": 1.6163071541600733, "grad_norm": 0.546875, "learning_rate": 1.4328751355870473e-05, "loss": 4.0979, "step": 4852 }, { "epoch": 1.6166402931623218, "grad_norm": 0.53125, "learning_rate": 1.4328416606268447e-05, "loss": 4.1312, "step": 4853 }, { "epoch": 1.6169734321645706, "grad_norm": 0.5390625, "learning_rate": 1.4328081777130158e-05, "loss": 4.0748, "step": 4854 }, { "epoch": 1.6173065711668193, "grad_norm": 0.54296875, "learning_rate": 1.4327746868459509e-05, "loss": 4.0303, "step": 4855 }, { "epoch": 1.617639710169068, "grad_norm": 0.515625, "learning_rate": 1.4327411880260398e-05, "loss": 4.0756, "step": 4856 }, { "epoch": 1.6179728491713168, "grad_norm": 0.52734375, "learning_rate": 1.4327076812536726e-05, "loss": 4.1146, "step": 4857 }, { "epoch": 1.6183059881735655, "grad_norm": 0.52734375, "learning_rate": 1.4326741665292396e-05, "loss": 4.008, "step": 4858 }, { "epoch": 1.6186391271758143, "grad_norm": 0.53515625, "learning_rate": 1.432640643853131e-05, "loss": 4.053, "step": 4859 }, { "epoch": 1.6189722661780628, "grad_norm": 0.52734375, "learning_rate": 1.4326071132257378e-05, "loss": 4.0585, "step": 4860 }, { "epoch": 1.6193054051803115, "grad_norm": 0.5078125, "learning_rate": 1.4325735746474502e-05, "loss": 4.1201, "step": 4861 }, { "epoch": 1.61963854418256, "grad_norm": 0.52734375, "learning_rate": 1.4325400281186588e-05, "loss": 4.1024, "step": 4862 }, { "epoch": 1.6199716831848088, "grad_norm": 0.55078125, "learning_rate": 1.4325064736397545e-05, "loss": 4.0394, "step": 4863 }, { "epoch": 1.6203048221870575, "grad_norm": 0.5546875, "learning_rate": 1.4324729112111281e-05, "loss": 4.0093, "step": 4864 }, { "epoch": 1.6206379611893063, "grad_norm": 0.55859375, "learning_rate": 1.4324393408331706e-05, "loss": 4.114, "step": 4865 }, { "epoch": 1.620971100191555, "grad_norm": 0.51171875, "learning_rate": 1.432405762506273e-05, "loss": 4.0825, "step": 4866 }, { "epoch": 1.6213042391938037, "grad_norm": 0.5625, "learning_rate": 1.4323721762308262e-05, "loss": 4.0602, "step": 4867 }, { "epoch": 1.6216373781960522, "grad_norm": 0.53515625, "learning_rate": 1.4323385820072215e-05, "loss": 4.1348, "step": 4868 }, { "epoch": 1.621970517198301, "grad_norm": 0.5234375, "learning_rate": 1.4323049798358503e-05, "loss": 4.112, "step": 4869 }, { "epoch": 1.6223036562005497, "grad_norm": 0.50390625, "learning_rate": 1.432271369717104e-05, "loss": 4.0212, "step": 4870 }, { "epoch": 1.6226367952027982, "grad_norm": 0.53125, "learning_rate": 1.4322377516513741e-05, "loss": 4.0608, "step": 4871 }, { "epoch": 1.622969934205047, "grad_norm": 0.5234375, "learning_rate": 1.432204125639052e-05, "loss": 4.0655, "step": 4872 }, { "epoch": 1.6233030732072957, "grad_norm": 0.5390625, "learning_rate": 1.4321704916805295e-05, "loss": 4.0056, "step": 4873 }, { "epoch": 1.6236362122095445, "grad_norm": 0.54296875, "learning_rate": 1.4321368497761987e-05, "loss": 4.1159, "step": 4874 }, { "epoch": 1.6239693512117932, "grad_norm": 0.52734375, "learning_rate": 1.4321031999264506e-05, "loss": 3.9966, "step": 4875 }, { "epoch": 1.624302490214042, "grad_norm": 0.53515625, "learning_rate": 1.432069542131678e-05, "loss": 4.0146, "step": 4876 }, { "epoch": 1.6246356292162905, "grad_norm": 0.53125, "learning_rate": 1.4320358763922724e-05, "loss": 4.106, "step": 4877 }, { "epoch": 1.6249687682185392, "grad_norm": 0.54296875, "learning_rate": 1.4320022027086262e-05, "loss": 4.0045, "step": 4878 }, { "epoch": 1.6253019072207877, "grad_norm": 0.5234375, "learning_rate": 1.4319685210811317e-05, "loss": 4.0487, "step": 4879 }, { "epoch": 1.6256350462230365, "grad_norm": 0.53515625, "learning_rate": 1.4319348315101806e-05, "loss": 4.1227, "step": 4880 }, { "epoch": 1.6259681852252852, "grad_norm": 0.55859375, "learning_rate": 1.4319011339961662e-05, "loss": 4.1057, "step": 4881 }, { "epoch": 1.626301324227534, "grad_norm": 0.49609375, "learning_rate": 1.4318674285394805e-05, "loss": 4.1261, "step": 4882 }, { "epoch": 1.6266344632297827, "grad_norm": 0.55078125, "learning_rate": 1.4318337151405162e-05, "loss": 4.1609, "step": 4883 }, { "epoch": 1.6269676022320314, "grad_norm": 0.5234375, "learning_rate": 1.431799993799666e-05, "loss": 4.0126, "step": 4884 }, { "epoch": 1.6273007412342801, "grad_norm": 0.515625, "learning_rate": 1.4317662645173225e-05, "loss": 4.0059, "step": 4885 }, { "epoch": 1.6276338802365287, "grad_norm": 0.53125, "learning_rate": 1.4317325272938787e-05, "loss": 4.0547, "step": 4886 }, { "epoch": 1.6279670192387774, "grad_norm": 0.546875, "learning_rate": 1.4316987821297277e-05, "loss": 4.0451, "step": 4887 }, { "epoch": 1.628300158241026, "grad_norm": 0.5234375, "learning_rate": 1.4316650290252625e-05, "loss": 4.052, "step": 4888 }, { "epoch": 1.6286332972432747, "grad_norm": 0.546875, "learning_rate": 1.431631267980876e-05, "loss": 4.0268, "step": 4889 }, { "epoch": 1.6289664362455234, "grad_norm": 0.5390625, "learning_rate": 1.4315974989969619e-05, "loss": 4.0774, "step": 4890 }, { "epoch": 1.6292995752477721, "grad_norm": 0.54296875, "learning_rate": 1.431563722073913e-05, "loss": 3.9675, "step": 4891 }, { "epoch": 1.6296327142500209, "grad_norm": 0.55859375, "learning_rate": 1.4315299372121232e-05, "loss": 4.0491, "step": 4892 }, { "epoch": 1.6299658532522696, "grad_norm": 0.52734375, "learning_rate": 1.4314961444119856e-05, "loss": 4.0577, "step": 4893 }, { "epoch": 1.6302989922545184, "grad_norm": 0.52734375, "learning_rate": 1.4314623436738942e-05, "loss": 4.0927, "step": 4894 }, { "epoch": 1.6306321312567669, "grad_norm": 0.53515625, "learning_rate": 1.4314285349982428e-05, "loss": 4.0907, "step": 4895 }, { "epoch": 1.6309652702590156, "grad_norm": 0.51953125, "learning_rate": 1.4313947183854246e-05, "loss": 4.1148, "step": 4896 }, { "epoch": 1.6312984092612641, "grad_norm": 0.5390625, "learning_rate": 1.431360893835834e-05, "loss": 4.0106, "step": 4897 }, { "epoch": 1.6316315482635129, "grad_norm": 0.5234375, "learning_rate": 1.4313270613498646e-05, "loss": 3.9503, "step": 4898 }, { "epoch": 1.6319646872657616, "grad_norm": 0.55078125, "learning_rate": 1.431293220927911e-05, "loss": 4.1096, "step": 4899 }, { "epoch": 1.6322978262680103, "grad_norm": 0.55078125, "learning_rate": 1.4312593725703668e-05, "loss": 4.0419, "step": 4900 }, { "epoch": 1.632630965270259, "grad_norm": 0.5078125, "learning_rate": 1.4312255162776265e-05, "loss": 4.0205, "step": 4901 }, { "epoch": 1.6329641042725078, "grad_norm": 0.52734375, "learning_rate": 1.4311916520500846e-05, "loss": 4.0341, "step": 4902 }, { "epoch": 1.6332972432747563, "grad_norm": 0.53125, "learning_rate": 1.4311577798881353e-05, "loss": 4.0204, "step": 4903 }, { "epoch": 1.633630382277005, "grad_norm": 0.55078125, "learning_rate": 1.4311238997921733e-05, "loss": 4.054, "step": 4904 }, { "epoch": 1.6339635212792538, "grad_norm": 0.515625, "learning_rate": 1.4310900117625931e-05, "loss": 4.0847, "step": 4905 }, { "epoch": 1.6342966602815023, "grad_norm": 0.53125, "learning_rate": 1.4310561157997897e-05, "loss": 4.1271, "step": 4906 }, { "epoch": 1.634629799283751, "grad_norm": 0.53125, "learning_rate": 1.4310222119041576e-05, "loss": 4.0933, "step": 4907 }, { "epoch": 1.6349629382859998, "grad_norm": 0.55078125, "learning_rate": 1.4309883000760919e-05, "loss": 4.0467, "step": 4908 }, { "epoch": 1.6352960772882486, "grad_norm": 0.52734375, "learning_rate": 1.4309543803159875e-05, "loss": 4.0914, "step": 4909 }, { "epoch": 1.6356292162904973, "grad_norm": 0.546875, "learning_rate": 1.4309204526242393e-05, "loss": 4.0661, "step": 4910 }, { "epoch": 1.635962355292746, "grad_norm": 0.578125, "learning_rate": 1.4308865170012429e-05, "loss": 4.0513, "step": 4911 }, { "epoch": 1.6362954942949945, "grad_norm": 0.5234375, "learning_rate": 1.4308525734473933e-05, "loss": 4.0195, "step": 4912 }, { "epoch": 1.6366286332972433, "grad_norm": 0.498046875, "learning_rate": 1.430818621963086e-05, "loss": 4.1611, "step": 4913 }, { "epoch": 1.6369617722994918, "grad_norm": 0.51171875, "learning_rate": 1.4307846625487163e-05, "loss": 4.1575, "step": 4914 }, { "epoch": 1.6372949113017405, "grad_norm": 0.5234375, "learning_rate": 1.4307506952046802e-05, "loss": 4.0852, "step": 4915 }, { "epoch": 1.6376280503039893, "grad_norm": 0.55859375, "learning_rate": 1.4307167199313726e-05, "loss": 4.0255, "step": 4916 }, { "epoch": 1.637961189306238, "grad_norm": 0.5390625, "learning_rate": 1.4306827367291898e-05, "loss": 4.0917, "step": 4917 }, { "epoch": 1.6382943283084868, "grad_norm": 0.5234375, "learning_rate": 1.4306487455985275e-05, "loss": 4.0671, "step": 4918 }, { "epoch": 1.6386274673107355, "grad_norm": 0.51953125, "learning_rate": 1.4306147465397815e-05, "loss": 4.0971, "step": 4919 }, { "epoch": 1.6389606063129842, "grad_norm": 0.52734375, "learning_rate": 1.430580739553348e-05, "loss": 4.0778, "step": 4920 }, { "epoch": 1.6392937453152328, "grad_norm": 0.51171875, "learning_rate": 1.430546724639623e-05, "loss": 4.0754, "step": 4921 }, { "epoch": 1.6396268843174815, "grad_norm": 0.53515625, "learning_rate": 1.4305127017990026e-05, "loss": 4.0588, "step": 4922 }, { "epoch": 1.63996002331973, "grad_norm": 0.5390625, "learning_rate": 1.4304786710318833e-05, "loss": 4.1285, "step": 4923 }, { "epoch": 1.6402931623219787, "grad_norm": 0.546875, "learning_rate": 1.4304446323386616e-05, "loss": 4.0689, "step": 4924 }, { "epoch": 1.6406263013242275, "grad_norm": 0.51171875, "learning_rate": 1.4304105857197335e-05, "loss": 4.0393, "step": 4925 }, { "epoch": 1.6409594403264762, "grad_norm": 0.546875, "learning_rate": 1.430376531175496e-05, "loss": 4.038, "step": 4926 }, { "epoch": 1.641292579328725, "grad_norm": 0.5546875, "learning_rate": 1.4303424687063455e-05, "loss": 4.0032, "step": 4927 }, { "epoch": 1.6416257183309737, "grad_norm": 0.53125, "learning_rate": 1.4303083983126788e-05, "loss": 4.0243, "step": 4928 }, { "epoch": 1.6419588573332224, "grad_norm": 0.53515625, "learning_rate": 1.430274319994893e-05, "loss": 4.0382, "step": 4929 }, { "epoch": 1.642291996335471, "grad_norm": 0.53515625, "learning_rate": 1.4302402337533847e-05, "loss": 4.0549, "step": 4930 }, { "epoch": 1.6426251353377197, "grad_norm": 0.52734375, "learning_rate": 1.4302061395885513e-05, "loss": 4.0406, "step": 4931 }, { "epoch": 1.6429582743399682, "grad_norm": 0.53125, "learning_rate": 1.4301720375007893e-05, "loss": 4.053, "step": 4932 }, { "epoch": 1.643291413342217, "grad_norm": 0.53515625, "learning_rate": 1.4301379274904966e-05, "loss": 4.0569, "step": 4933 }, { "epoch": 1.6436245523444657, "grad_norm": 0.55078125, "learning_rate": 1.43010380955807e-05, "loss": 4.033, "step": 4934 }, { "epoch": 1.6439576913467144, "grad_norm": 0.55859375, "learning_rate": 1.4300696837039076e-05, "loss": 4.0639, "step": 4935 }, { "epoch": 1.6442908303489632, "grad_norm": 0.5078125, "learning_rate": 1.430035549928406e-05, "loss": 4.1291, "step": 4936 }, { "epoch": 1.644623969351212, "grad_norm": 0.52734375, "learning_rate": 1.430001408231963e-05, "loss": 4.1412, "step": 4937 }, { "epoch": 1.6449571083534604, "grad_norm": 0.52734375, "learning_rate": 1.4299672586149769e-05, "loss": 4.0602, "step": 4938 }, { "epoch": 1.6452902473557092, "grad_norm": 0.5546875, "learning_rate": 1.4299331010778452e-05, "loss": 4.0693, "step": 4939 }, { "epoch": 1.645623386357958, "grad_norm": 0.53515625, "learning_rate": 1.4298989356209651e-05, "loss": 4.0638, "step": 4940 }, { "epoch": 1.6459565253602064, "grad_norm": 0.5234375, "learning_rate": 1.4298647622447354e-05, "loss": 4.1015, "step": 4941 }, { "epoch": 1.6462896643624552, "grad_norm": 0.5078125, "learning_rate": 1.4298305809495536e-05, "loss": 4.1031, "step": 4942 }, { "epoch": 1.646622803364704, "grad_norm": 0.515625, "learning_rate": 1.429796391735818e-05, "loss": 4.1142, "step": 4943 }, { "epoch": 1.6469559423669526, "grad_norm": 0.5390625, "learning_rate": 1.4297621946039272e-05, "loss": 4.0247, "step": 4944 }, { "epoch": 1.6472890813692014, "grad_norm": 0.5234375, "learning_rate": 1.4297279895542789e-05, "loss": 4.0528, "step": 4945 }, { "epoch": 1.6476222203714501, "grad_norm": 0.546875, "learning_rate": 1.429693776587272e-05, "loss": 4.0767, "step": 4946 }, { "epoch": 1.6479553593736986, "grad_norm": 0.50390625, "learning_rate": 1.4296595557033046e-05, "loss": 4.1142, "step": 4947 }, { "epoch": 1.6482884983759474, "grad_norm": 0.5390625, "learning_rate": 1.4296253269027756e-05, "loss": 4.0377, "step": 4948 }, { "epoch": 1.648621637378196, "grad_norm": 0.50390625, "learning_rate": 1.4295910901860836e-05, "loss": 4.1185, "step": 4949 }, { "epoch": 1.6489547763804446, "grad_norm": 0.53125, "learning_rate": 1.4295568455536274e-05, "loss": 4.0596, "step": 4950 }, { "epoch": 1.6492879153826934, "grad_norm": 0.53515625, "learning_rate": 1.429522593005806e-05, "loss": 4.1033, "step": 4951 }, { "epoch": 1.649621054384942, "grad_norm": 0.5234375, "learning_rate": 1.4294883325430181e-05, "loss": 4.1122, "step": 4952 }, { "epoch": 1.6499541933871908, "grad_norm": 0.546875, "learning_rate": 1.4294540641656629e-05, "loss": 4.0375, "step": 4953 }, { "epoch": 1.6502873323894396, "grad_norm": 0.5078125, "learning_rate": 1.4294197878741397e-05, "loss": 4.0878, "step": 4954 }, { "epoch": 1.6506204713916883, "grad_norm": 0.515625, "learning_rate": 1.4293855036688474e-05, "loss": 4.1005, "step": 4955 }, { "epoch": 1.6509536103939368, "grad_norm": 0.5625, "learning_rate": 1.4293512115501857e-05, "loss": 4.0517, "step": 4956 }, { "epoch": 1.6512867493961856, "grad_norm": 0.52734375, "learning_rate": 1.4293169115185536e-05, "loss": 4.0366, "step": 4957 }, { "epoch": 1.651619888398434, "grad_norm": 0.52734375, "learning_rate": 1.4292826035743512e-05, "loss": 4.0578, "step": 4958 }, { "epoch": 1.6519530274006828, "grad_norm": 0.5234375, "learning_rate": 1.4292482877179775e-05, "loss": 4.0031, "step": 4959 }, { "epoch": 1.6522861664029316, "grad_norm": 0.55078125, "learning_rate": 1.4292139639498327e-05, "loss": 4.0064, "step": 4960 }, { "epoch": 1.6526193054051803, "grad_norm": 0.5390625, "learning_rate": 1.4291796322703163e-05, "loss": 4.0774, "step": 4961 }, { "epoch": 1.652952444407429, "grad_norm": 0.52734375, "learning_rate": 1.4291452926798281e-05, "loss": 4.0608, "step": 4962 }, { "epoch": 1.6532855834096778, "grad_norm": 0.51953125, "learning_rate": 1.4291109451787685e-05, "loss": 4.1552, "step": 4963 }, { "epoch": 1.6536187224119265, "grad_norm": 0.515625, "learning_rate": 1.4290765897675373e-05, "loss": 4.0183, "step": 4964 }, { "epoch": 1.653951861414175, "grad_norm": 0.53515625, "learning_rate": 1.429042226446535e-05, "loss": 4.0651, "step": 4965 }, { "epoch": 1.6542850004164238, "grad_norm": 0.61328125, "learning_rate": 1.429007855216161e-05, "loss": 4.1281, "step": 4966 }, { "epoch": 1.6546181394186723, "grad_norm": 0.53125, "learning_rate": 1.4289734760768165e-05, "loss": 4.0474, "step": 4967 }, { "epoch": 1.654951278420921, "grad_norm": 0.50390625, "learning_rate": 1.4289390890289018e-05, "loss": 4.0787, "step": 4968 }, { "epoch": 1.6552844174231698, "grad_norm": 0.52734375, "learning_rate": 1.428904694072817e-05, "loss": 4.064, "step": 4969 }, { "epoch": 1.6556175564254185, "grad_norm": 0.53125, "learning_rate": 1.4288702912089633e-05, "loss": 4.0744, "step": 4970 }, { "epoch": 1.6559506954276673, "grad_norm": 0.5703125, "learning_rate": 1.428835880437741e-05, "loss": 4.0421, "step": 4971 }, { "epoch": 1.656283834429916, "grad_norm": 0.53125, "learning_rate": 1.428801461759551e-05, "loss": 4.1024, "step": 4972 }, { "epoch": 1.6566169734321645, "grad_norm": 0.5234375, "learning_rate": 1.4287670351747943e-05, "loss": 4.138, "step": 4973 }, { "epoch": 1.6569501124344133, "grad_norm": 0.5234375, "learning_rate": 1.4287326006838718e-05, "loss": 4.068, "step": 4974 }, { "epoch": 1.657283251436662, "grad_norm": 0.52734375, "learning_rate": 1.4286981582871847e-05, "loss": 4.0452, "step": 4975 }, { "epoch": 1.6576163904389105, "grad_norm": 0.5234375, "learning_rate": 1.4286637079851342e-05, "loss": 4.0674, "step": 4976 }, { "epoch": 1.6579495294411593, "grad_norm": 0.5234375, "learning_rate": 1.4286292497781213e-05, "loss": 4.0813, "step": 4977 }, { "epoch": 1.658282668443408, "grad_norm": 0.56640625, "learning_rate": 1.4285947836665478e-05, "loss": 4.0746, "step": 4978 }, { "epoch": 1.6586158074456567, "grad_norm": 0.5234375, "learning_rate": 1.4285603096508146e-05, "loss": 4.0827, "step": 4979 }, { "epoch": 1.6589489464479055, "grad_norm": 0.52734375, "learning_rate": 1.4285258277313236e-05, "loss": 4.0753, "step": 4980 }, { "epoch": 1.6592820854501542, "grad_norm": 0.53125, "learning_rate": 1.4284913379084766e-05, "loss": 4.1377, "step": 4981 }, { "epoch": 1.6596152244524027, "grad_norm": 0.5390625, "learning_rate": 1.4284568401826748e-05, "loss": 4.0338, "step": 4982 }, { "epoch": 1.6599483634546515, "grad_norm": 0.51953125, "learning_rate": 1.4284223345543205e-05, "loss": 4.0045, "step": 4983 }, { "epoch": 1.6602815024569, "grad_norm": 0.53515625, "learning_rate": 1.4283878210238157e-05, "loss": 4.0744, "step": 4984 }, { "epoch": 1.6606146414591487, "grad_norm": 0.52734375, "learning_rate": 1.4283532995915619e-05, "loss": 4.1041, "step": 4985 }, { "epoch": 1.6609477804613975, "grad_norm": 0.54296875, "learning_rate": 1.4283187702579615e-05, "loss": 4.0308, "step": 4986 }, { "epoch": 1.6612809194636462, "grad_norm": 0.53515625, "learning_rate": 1.428284233023417e-05, "loss": 4.0991, "step": 4987 }, { "epoch": 1.661614058465895, "grad_norm": 0.53125, "learning_rate": 1.4282496878883299e-05, "loss": 4.1198, "step": 4988 }, { "epoch": 1.6619471974681437, "grad_norm": 0.546875, "learning_rate": 1.4282151348531032e-05, "loss": 4.0427, "step": 4989 }, { "epoch": 1.6622803364703924, "grad_norm": 0.51953125, "learning_rate": 1.4281805739181392e-05, "loss": 4.0623, "step": 4990 }, { "epoch": 1.662613475472641, "grad_norm": 0.51953125, "learning_rate": 1.4281460050838403e-05, "loss": 4.0897, "step": 4991 }, { "epoch": 1.6629466144748897, "grad_norm": 0.515625, "learning_rate": 1.4281114283506094e-05, "loss": 4.0362, "step": 4992 }, { "epoch": 1.6632797534771382, "grad_norm": 0.5234375, "learning_rate": 1.4280768437188493e-05, "loss": 4.1005, "step": 4993 }, { "epoch": 1.663612892479387, "grad_norm": 0.51171875, "learning_rate": 1.4280422511889625e-05, "loss": 3.9707, "step": 4994 }, { "epoch": 1.6639460314816357, "grad_norm": 0.5390625, "learning_rate": 1.4280076507613523e-05, "loss": 4.0215, "step": 4995 }, { "epoch": 1.6642791704838844, "grad_norm": 0.53515625, "learning_rate": 1.4279730424364212e-05, "loss": 4.0421, "step": 4996 }, { "epoch": 1.6646123094861331, "grad_norm": 0.55078125, "learning_rate": 1.4279384262145729e-05, "loss": 4.0527, "step": 4997 }, { "epoch": 1.6649454484883819, "grad_norm": 0.5625, "learning_rate": 1.4279038020962104e-05, "loss": 4.0593, "step": 4998 }, { "epoch": 1.6652785874906306, "grad_norm": 0.55859375, "learning_rate": 1.4278691700817367e-05, "loss": 4.0564, "step": 4999 }, { "epoch": 1.6656117264928791, "grad_norm": 0.52734375, "learning_rate": 1.4278345301715558e-05, "loss": 4.0804, "step": 5000 }, { "epoch": 1.6659448654951279, "grad_norm": 0.54296875, "learning_rate": 1.4277998823660703e-05, "loss": 4.1086, "step": 5001 }, { "epoch": 1.6662780044973764, "grad_norm": 0.5078125, "learning_rate": 1.4277652266656847e-05, "loss": 4.0737, "step": 5002 }, { "epoch": 1.6666111434996251, "grad_norm": 0.5390625, "learning_rate": 1.4277305630708022e-05, "loss": 4.1317, "step": 5003 }, { "epoch": 1.6669442825018739, "grad_norm": 0.53515625, "learning_rate": 1.4276958915818263e-05, "loss": 4.0891, "step": 5004 }, { "epoch": 1.6672774215041226, "grad_norm": 0.546875, "learning_rate": 1.4276612121991614e-05, "loss": 3.9847, "step": 5005 }, { "epoch": 1.6676105605063714, "grad_norm": 0.54296875, "learning_rate": 1.4276265249232113e-05, "loss": 4.0707, "step": 5006 }, { "epoch": 1.66794369950862, "grad_norm": 0.53515625, "learning_rate": 1.4275918297543797e-05, "loss": 4.0728, "step": 5007 }, { "epoch": 1.6682768385108686, "grad_norm": 0.5390625, "learning_rate": 1.427557126693071e-05, "loss": 4.0982, "step": 5008 }, { "epoch": 1.6686099775131173, "grad_norm": 0.5625, "learning_rate": 1.4275224157396895e-05, "loss": 3.9951, "step": 5009 }, { "epoch": 1.668943116515366, "grad_norm": 0.5234375, "learning_rate": 1.427487696894639e-05, "loss": 4.089, "step": 5010 }, { "epoch": 1.6692762555176146, "grad_norm": 0.5390625, "learning_rate": 1.4274529701583246e-05, "loss": 4.047, "step": 5011 }, { "epoch": 1.6696093945198633, "grad_norm": 0.55859375, "learning_rate": 1.4274182355311502e-05, "loss": 4.0517, "step": 5012 }, { "epoch": 1.669942533522112, "grad_norm": 0.5390625, "learning_rate": 1.427383493013521e-05, "loss": 4.0152, "step": 5013 }, { "epoch": 1.6702756725243608, "grad_norm": 0.546875, "learning_rate": 1.427348742605841e-05, "loss": 4.0864, "step": 5014 }, { "epoch": 1.6706088115266096, "grad_norm": 0.5234375, "learning_rate": 1.4273139843085156e-05, "loss": 4.0618, "step": 5015 }, { "epoch": 1.6709419505288583, "grad_norm": 0.53515625, "learning_rate": 1.427279218121949e-05, "loss": 4.0473, "step": 5016 }, { "epoch": 1.6712750895311068, "grad_norm": 0.52734375, "learning_rate": 1.4272444440465467e-05, "loss": 4.12, "step": 5017 }, { "epoch": 1.6716082285333556, "grad_norm": 0.5390625, "learning_rate": 1.4272096620827133e-05, "loss": 4.1041, "step": 5018 }, { "epoch": 1.671941367535604, "grad_norm": 0.56640625, "learning_rate": 1.4271748722308544e-05, "loss": 3.9679, "step": 5019 }, { "epoch": 1.6722745065378528, "grad_norm": 0.5078125, "learning_rate": 1.427140074491375e-05, "loss": 4.0347, "step": 5020 }, { "epoch": 1.6726076455401016, "grad_norm": 0.53125, "learning_rate": 1.4271052688646804e-05, "loss": 4.0375, "step": 5021 }, { "epoch": 1.6729407845423503, "grad_norm": 0.53515625, "learning_rate": 1.427070455351176e-05, "loss": 4.0608, "step": 5022 }, { "epoch": 1.673273923544599, "grad_norm": 0.54296875, "learning_rate": 1.4270356339512673e-05, "loss": 4.0862, "step": 5023 }, { "epoch": 1.6736070625468478, "grad_norm": 0.5390625, "learning_rate": 1.42700080466536e-05, "loss": 4.0613, "step": 5024 }, { "epoch": 1.6739402015490965, "grad_norm": 0.5078125, "learning_rate": 1.4269659674938595e-05, "loss": 4.0475, "step": 5025 }, { "epoch": 1.674273340551345, "grad_norm": 0.53515625, "learning_rate": 1.426931122437172e-05, "loss": 4.0566, "step": 5026 }, { "epoch": 1.6746064795535938, "grad_norm": 0.5546875, "learning_rate": 1.4268962694957029e-05, "loss": 4.0809, "step": 5027 }, { "epoch": 1.6749396185558423, "grad_norm": 0.53515625, "learning_rate": 1.4268614086698587e-05, "loss": 4.0166, "step": 5028 }, { "epoch": 1.675272757558091, "grad_norm": 0.5546875, "learning_rate": 1.426826539960045e-05, "loss": 4.0597, "step": 5029 }, { "epoch": 1.6756058965603398, "grad_norm": 0.5546875, "learning_rate": 1.4267916633666683e-05, "loss": 4.0839, "step": 5030 }, { "epoch": 1.6759390355625885, "grad_norm": 0.55078125, "learning_rate": 1.4267567788901346e-05, "loss": 4.0649, "step": 5031 }, { "epoch": 1.6762721745648372, "grad_norm": 0.55078125, "learning_rate": 1.4267218865308502e-05, "loss": 4.0951, "step": 5032 }, { "epoch": 1.676605313567086, "grad_norm": 0.51171875, "learning_rate": 1.4266869862892215e-05, "loss": 4.0568, "step": 5033 }, { "epoch": 1.6769384525693347, "grad_norm": 0.55078125, "learning_rate": 1.4266520781656553e-05, "loss": 4.0531, "step": 5034 }, { "epoch": 1.6772715915715832, "grad_norm": 0.53125, "learning_rate": 1.4266171621605579e-05, "loss": 4.0967, "step": 5035 }, { "epoch": 1.677604730573832, "grad_norm": 0.54296875, "learning_rate": 1.4265822382743362e-05, "loss": 4.1244, "step": 5036 }, { "epoch": 1.6779378695760805, "grad_norm": 0.55859375, "learning_rate": 1.4265473065073969e-05, "loss": 4.0598, "step": 5037 }, { "epoch": 1.6782710085783292, "grad_norm": 0.5390625, "learning_rate": 1.4265123668601467e-05, "loss": 4.0321, "step": 5038 }, { "epoch": 1.678604147580578, "grad_norm": 0.53125, "learning_rate": 1.4264774193329929e-05, "loss": 4.0725, "step": 5039 }, { "epoch": 1.6789372865828267, "grad_norm": 0.546875, "learning_rate": 1.4264424639263423e-05, "loss": 4.0332, "step": 5040 }, { "epoch": 1.6792704255850754, "grad_norm": 0.53125, "learning_rate": 1.4264075006406023e-05, "loss": 4.0558, "step": 5041 }, { "epoch": 1.6796035645873242, "grad_norm": 0.5390625, "learning_rate": 1.4263725294761798e-05, "loss": 4.1357, "step": 5042 }, { "epoch": 1.6799367035895727, "grad_norm": 0.5546875, "learning_rate": 1.4263375504334826e-05, "loss": 4.0438, "step": 5043 }, { "epoch": 1.6802698425918214, "grad_norm": 0.53515625, "learning_rate": 1.4263025635129177e-05, "loss": 4.1091, "step": 5044 }, { "epoch": 1.6806029815940702, "grad_norm": 0.53515625, "learning_rate": 1.426267568714893e-05, "loss": 4.005, "step": 5045 }, { "epoch": 1.6809361205963187, "grad_norm": 0.55078125, "learning_rate": 1.4262325660398159e-05, "loss": 4.0899, "step": 5046 }, { "epoch": 1.6812692595985674, "grad_norm": 0.546875, "learning_rate": 1.426197555488094e-05, "loss": 4.0964, "step": 5047 }, { "epoch": 1.6816023986008162, "grad_norm": 0.54296875, "learning_rate": 1.4261625370601352e-05, "loss": 4.0777, "step": 5048 }, { "epoch": 1.681935537603065, "grad_norm": 0.53125, "learning_rate": 1.4261275107563476e-05, "loss": 4.0265, "step": 5049 }, { "epoch": 1.6822686766053137, "grad_norm": 0.53125, "learning_rate": 1.4260924765771388e-05, "loss": 4.0456, "step": 5050 }, { "epoch": 1.6826018156075624, "grad_norm": 0.52734375, "learning_rate": 1.4260574345229171e-05, "loss": 4.0648, "step": 5051 }, { "epoch": 1.682934954609811, "grad_norm": 0.5234375, "learning_rate": 1.4260223845940906e-05, "loss": 4.0913, "step": 5052 }, { "epoch": 1.6832680936120596, "grad_norm": 0.515625, "learning_rate": 1.4259873267910677e-05, "loss": 4.1142, "step": 5053 }, { "epoch": 1.6836012326143082, "grad_norm": 0.52734375, "learning_rate": 1.4259522611142565e-05, "loss": 4.0076, "step": 5054 }, { "epoch": 1.683934371616557, "grad_norm": 0.5390625, "learning_rate": 1.4259171875640655e-05, "loss": 4.0425, "step": 5055 }, { "epoch": 1.6842675106188056, "grad_norm": 0.5390625, "learning_rate": 1.4258821061409034e-05, "loss": 4.0453, "step": 5056 }, { "epoch": 1.6846006496210544, "grad_norm": 0.54296875, "learning_rate": 1.4258470168451789e-05, "loss": 4.0368, "step": 5057 }, { "epoch": 1.6849337886233031, "grad_norm": 0.53125, "learning_rate": 1.4258119196773004e-05, "loss": 4.0567, "step": 5058 }, { "epoch": 1.6852669276255519, "grad_norm": 0.51953125, "learning_rate": 1.4257768146376768e-05, "loss": 4.0387, "step": 5059 }, { "epoch": 1.6856000666278006, "grad_norm": 0.53125, "learning_rate": 1.425741701726717e-05, "loss": 3.9841, "step": 5060 }, { "epoch": 1.6859332056300491, "grad_norm": 0.5078125, "learning_rate": 1.4257065809448302e-05, "loss": 4.1496, "step": 5061 }, { "epoch": 1.6862663446322979, "grad_norm": 0.5390625, "learning_rate": 1.4256714522924252e-05, "loss": 4.002, "step": 5062 }, { "epoch": 1.6865994836345464, "grad_norm": 0.5390625, "learning_rate": 1.4256363157699112e-05, "loss": 4.0748, "step": 5063 }, { "epoch": 1.686932622636795, "grad_norm": 0.54296875, "learning_rate": 1.4256011713776977e-05, "loss": 4.0865, "step": 5064 }, { "epoch": 1.6872657616390438, "grad_norm": 0.5078125, "learning_rate": 1.4255660191161938e-05, "loss": 4.1069, "step": 5065 }, { "epoch": 1.6875989006412926, "grad_norm": 0.55078125, "learning_rate": 1.425530858985809e-05, "loss": 4.0677, "step": 5066 }, { "epoch": 1.6879320396435413, "grad_norm": 0.54296875, "learning_rate": 1.425495690986953e-05, "loss": 4.0488, "step": 5067 }, { "epoch": 1.68826517864579, "grad_norm": 0.5390625, "learning_rate": 1.4254605151200353e-05, "loss": 4.0925, "step": 5068 }, { "epoch": 1.6885983176480388, "grad_norm": 0.53125, "learning_rate": 1.4254253313854657e-05, "loss": 4.0315, "step": 5069 }, { "epoch": 1.6889314566502873, "grad_norm": 0.546875, "learning_rate": 1.4253901397836539e-05, "loss": 4.0292, "step": 5070 }, { "epoch": 1.689264595652536, "grad_norm": 0.51171875, "learning_rate": 1.4253549403150099e-05, "loss": 4.0303, "step": 5071 }, { "epoch": 1.6895977346547846, "grad_norm": 0.55078125, "learning_rate": 1.4253197329799437e-05, "loss": 4.0552, "step": 5072 }, { "epoch": 1.6899308736570333, "grad_norm": 0.54296875, "learning_rate": 1.4252845177788651e-05, "loss": 4.0213, "step": 5073 }, { "epoch": 1.690264012659282, "grad_norm": 0.54296875, "learning_rate": 1.4252492947121848e-05, "loss": 4.0397, "step": 5074 }, { "epoch": 1.6905971516615308, "grad_norm": 0.53515625, "learning_rate": 1.4252140637803126e-05, "loss": 4.0897, "step": 5075 }, { "epoch": 1.6909302906637795, "grad_norm": 0.51953125, "learning_rate": 1.4251788249836593e-05, "loss": 4.0262, "step": 5076 }, { "epoch": 1.6912634296660283, "grad_norm": 0.5546875, "learning_rate": 1.425143578322635e-05, "loss": 4.0609, "step": 5077 }, { "epoch": 1.6915965686682768, "grad_norm": 0.50390625, "learning_rate": 1.4251083237976505e-05, "loss": 4.0282, "step": 5078 }, { "epoch": 1.6919297076705255, "grad_norm": 0.51171875, "learning_rate": 1.425073061409116e-05, "loss": 4.0883, "step": 5079 }, { "epoch": 1.6922628466727743, "grad_norm": 0.52734375, "learning_rate": 1.4250377911574427e-05, "loss": 4.0188, "step": 5080 }, { "epoch": 1.6925959856750228, "grad_norm": 0.54296875, "learning_rate": 1.4250025130430413e-05, "loss": 4.0776, "step": 5081 }, { "epoch": 1.6929291246772715, "grad_norm": 0.53515625, "learning_rate": 1.4249672270663227e-05, "loss": 4.0132, "step": 5082 }, { "epoch": 1.6932622636795203, "grad_norm": 0.52734375, "learning_rate": 1.4249319332276978e-05, "loss": 4.0535, "step": 5083 }, { "epoch": 1.693595402681769, "grad_norm": 0.53515625, "learning_rate": 1.4248966315275779e-05, "loss": 4.0059, "step": 5084 }, { "epoch": 1.6939285416840177, "grad_norm": 0.54296875, "learning_rate": 1.424861321966374e-05, "loss": 4.0597, "step": 5085 }, { "epoch": 1.6942616806862665, "grad_norm": 0.55078125, "learning_rate": 1.4248260045444974e-05, "loss": 4.0357, "step": 5086 }, { "epoch": 1.694594819688515, "grad_norm": 0.5390625, "learning_rate": 1.4247906792623595e-05, "loss": 4.0415, "step": 5087 }, { "epoch": 1.6949279586907637, "grad_norm": 0.5390625, "learning_rate": 1.424755346120372e-05, "loss": 4.0197, "step": 5088 }, { "epoch": 1.6952610976930123, "grad_norm": 0.5546875, "learning_rate": 1.4247200051189458e-05, "loss": 4.017, "step": 5089 }, { "epoch": 1.695594236695261, "grad_norm": 0.52734375, "learning_rate": 1.4246846562584933e-05, "loss": 4.0762, "step": 5090 }, { "epoch": 1.6959273756975097, "grad_norm": 0.55078125, "learning_rate": 1.4246492995394259e-05, "loss": 4.119, "step": 5091 }, { "epoch": 1.6962605146997585, "grad_norm": 0.53515625, "learning_rate": 1.4246139349621556e-05, "loss": 4.1396, "step": 5092 }, { "epoch": 1.6965936537020072, "grad_norm": 0.50390625, "learning_rate": 1.4245785625270939e-05, "loss": 4.145, "step": 5093 }, { "epoch": 1.696926792704256, "grad_norm": 0.52734375, "learning_rate": 1.424543182234653e-05, "loss": 4.0934, "step": 5094 }, { "epoch": 1.6972599317065047, "grad_norm": 0.51953125, "learning_rate": 1.4245077940852453e-05, "loss": 4.0117, "step": 5095 }, { "epoch": 1.6975930707087532, "grad_norm": 0.53125, "learning_rate": 1.4244723980792828e-05, "loss": 3.9926, "step": 5096 }, { "epoch": 1.697926209711002, "grad_norm": 0.54296875, "learning_rate": 1.4244369942171777e-05, "loss": 4.0467, "step": 5097 }, { "epoch": 1.6982593487132505, "grad_norm": 0.546875, "learning_rate": 1.4244015824993425e-05, "loss": 4.0621, "step": 5098 }, { "epoch": 1.6985924877154992, "grad_norm": 0.5078125, "learning_rate": 1.4243661629261895e-05, "loss": 4.0291, "step": 5099 }, { "epoch": 1.698925626717748, "grad_norm": 0.515625, "learning_rate": 1.4243307354981315e-05, "loss": 4.0752, "step": 5100 }, { "epoch": 1.6992587657199967, "grad_norm": 0.53125, "learning_rate": 1.4242953002155811e-05, "loss": 4.0713, "step": 5101 }, { "epoch": 1.6995919047222454, "grad_norm": 0.54296875, "learning_rate": 1.4242598570789509e-05, "loss": 4.0317, "step": 5102 }, { "epoch": 1.6999250437244942, "grad_norm": 0.51953125, "learning_rate": 1.4242244060886535e-05, "loss": 3.9877, "step": 5103 }, { "epoch": 1.700258182726743, "grad_norm": 0.52734375, "learning_rate": 1.4241889472451024e-05, "loss": 4.072, "step": 5104 }, { "epoch": 1.7005913217289914, "grad_norm": 0.5390625, "learning_rate": 1.4241534805487103e-05, "loss": 4.0541, "step": 5105 }, { "epoch": 1.7009244607312402, "grad_norm": 0.5, "learning_rate": 1.4241180059998905e-05, "loss": 4.1237, "step": 5106 }, { "epoch": 1.7012575997334887, "grad_norm": 0.55078125, "learning_rate": 1.424082523599056e-05, "loss": 4.0552, "step": 5107 }, { "epoch": 1.7015907387357374, "grad_norm": 0.5234375, "learning_rate": 1.4240470333466202e-05, "loss": 4.13, "step": 5108 }, { "epoch": 1.7019238777379861, "grad_norm": 0.5390625, "learning_rate": 1.4240115352429962e-05, "loss": 4.0463, "step": 5109 }, { "epoch": 1.7022570167402349, "grad_norm": 0.515625, "learning_rate": 1.423976029288598e-05, "loss": 3.9905, "step": 5110 }, { "epoch": 1.7025901557424836, "grad_norm": 0.546875, "learning_rate": 1.4239405154838387e-05, "loss": 4.0737, "step": 5111 }, { "epoch": 1.7029232947447324, "grad_norm": 0.53125, "learning_rate": 1.4239049938291323e-05, "loss": 4.1095, "step": 5112 }, { "epoch": 1.7032564337469809, "grad_norm": 0.53515625, "learning_rate": 1.4238694643248923e-05, "loss": 4.0815, "step": 5113 }, { "epoch": 1.7035895727492296, "grad_norm": 0.56640625, "learning_rate": 1.4238339269715327e-05, "loss": 4.0458, "step": 5114 }, { "epoch": 1.7039227117514784, "grad_norm": 0.546875, "learning_rate": 1.4237983817694674e-05, "loss": 4.0356, "step": 5115 }, { "epoch": 1.7042558507537269, "grad_norm": 0.53515625, "learning_rate": 1.4237628287191102e-05, "loss": 4.0007, "step": 5116 }, { "epoch": 1.7045889897559756, "grad_norm": 0.55078125, "learning_rate": 1.4237272678208757e-05, "loss": 4.0119, "step": 5117 }, { "epoch": 1.7049221287582244, "grad_norm": 0.51953125, "learning_rate": 1.4236916990751776e-05, "loss": 4.0435, "step": 5118 }, { "epoch": 1.705255267760473, "grad_norm": 0.5234375, "learning_rate": 1.4236561224824305e-05, "loss": 4.1025, "step": 5119 }, { "epoch": 1.7055884067627218, "grad_norm": 0.53125, "learning_rate": 1.4236205380430487e-05, "loss": 4.03, "step": 5120 }, { "epoch": 1.7059215457649706, "grad_norm": 0.54296875, "learning_rate": 1.4235849457574468e-05, "loss": 4.0213, "step": 5121 }, { "epoch": 1.706254684767219, "grad_norm": 0.53515625, "learning_rate": 1.4235493456260391e-05, "loss": 4.1056, "step": 5122 }, { "epoch": 1.7065878237694678, "grad_norm": 0.546875, "learning_rate": 1.4235137376492404e-05, "loss": 4.0739, "step": 5123 }, { "epoch": 1.7069209627717163, "grad_norm": 0.54296875, "learning_rate": 1.4234781218274657e-05, "loss": 4.0477, "step": 5124 }, { "epoch": 1.707254101773965, "grad_norm": 0.55078125, "learning_rate": 1.4234424981611293e-05, "loss": 4.0981, "step": 5125 }, { "epoch": 1.7075872407762138, "grad_norm": 0.5390625, "learning_rate": 1.423406866650647e-05, "loss": 4.0339, "step": 5126 }, { "epoch": 1.7079203797784626, "grad_norm": 0.5234375, "learning_rate": 1.4233712272964328e-05, "loss": 4.0652, "step": 5127 }, { "epoch": 1.7082535187807113, "grad_norm": 0.52734375, "learning_rate": 1.4233355800989026e-05, "loss": 3.9631, "step": 5128 }, { "epoch": 1.70858665778296, "grad_norm": 0.546875, "learning_rate": 1.4232999250584712e-05, "loss": 4.1088, "step": 5129 }, { "epoch": 1.7089197967852088, "grad_norm": 0.5390625, "learning_rate": 1.4232642621755541e-05, "loss": 4.0719, "step": 5130 }, { "epoch": 1.7092529357874573, "grad_norm": 0.546875, "learning_rate": 1.4232285914505666e-05, "loss": 4.0903, "step": 5131 }, { "epoch": 1.709586074789706, "grad_norm": 0.53125, "learning_rate": 1.4231929128839242e-05, "loss": 4.0703, "step": 5132 }, { "epoch": 1.7099192137919546, "grad_norm": 0.53125, "learning_rate": 1.4231572264760425e-05, "loss": 4.0531, "step": 5133 }, { "epoch": 1.7102523527942033, "grad_norm": 0.5234375, "learning_rate": 1.4231215322273373e-05, "loss": 4.168, "step": 5134 }, { "epoch": 1.710585491796452, "grad_norm": 0.5234375, "learning_rate": 1.423085830138224e-05, "loss": 4.0382, "step": 5135 }, { "epoch": 1.7109186307987008, "grad_norm": 0.515625, "learning_rate": 1.4230501202091188e-05, "loss": 4.1102, "step": 5136 }, { "epoch": 1.7112517698009495, "grad_norm": 0.56640625, "learning_rate": 1.4230144024404376e-05, "loss": 4.0825, "step": 5137 }, { "epoch": 1.7115849088031982, "grad_norm": 0.5234375, "learning_rate": 1.4229786768325962e-05, "loss": 4.0849, "step": 5138 }, { "epoch": 1.711918047805447, "grad_norm": 0.52734375, "learning_rate": 1.422942943386011e-05, "loss": 4.0722, "step": 5139 }, { "epoch": 1.7122511868076955, "grad_norm": 0.54296875, "learning_rate": 1.422907202101098e-05, "loss": 3.9855, "step": 5140 }, { "epoch": 1.7125843258099442, "grad_norm": 0.515625, "learning_rate": 1.4228714529782737e-05, "loss": 4.0655, "step": 5141 }, { "epoch": 1.7129174648121928, "grad_norm": 0.546875, "learning_rate": 1.4228356960179544e-05, "loss": 4.1127, "step": 5142 }, { "epoch": 1.7132506038144415, "grad_norm": 0.50390625, "learning_rate": 1.4227999312205567e-05, "loss": 4.0016, "step": 5143 }, { "epoch": 1.7135837428166902, "grad_norm": 0.52734375, "learning_rate": 1.422764158586497e-05, "loss": 4.0625, "step": 5144 }, { "epoch": 1.713916881818939, "grad_norm": 0.53125, "learning_rate": 1.422728378116192e-05, "loss": 4.0609, "step": 5145 }, { "epoch": 1.7142500208211877, "grad_norm": 0.49609375, "learning_rate": 1.4226925898100585e-05, "loss": 4.0434, "step": 5146 }, { "epoch": 1.7145831598234365, "grad_norm": 0.54296875, "learning_rate": 1.4226567936685135e-05, "loss": 4.0634, "step": 5147 }, { "epoch": 1.714916298825685, "grad_norm": 0.52734375, "learning_rate": 1.4226209896919738e-05, "loss": 4.0718, "step": 5148 }, { "epoch": 1.7152494378279337, "grad_norm": 0.53125, "learning_rate": 1.4225851778808563e-05, "loss": 4.1419, "step": 5149 }, { "epoch": 1.7155825768301824, "grad_norm": 0.53125, "learning_rate": 1.4225493582355785e-05, "loss": 4.034, "step": 5150 }, { "epoch": 1.715915715832431, "grad_norm": 0.5234375, "learning_rate": 1.4225135307565574e-05, "loss": 4.0927, "step": 5151 }, { "epoch": 1.7162488548346797, "grad_norm": 0.5234375, "learning_rate": 1.4224776954442103e-05, "loss": 4.0138, "step": 5152 }, { "epoch": 1.7165819938369284, "grad_norm": 0.5546875, "learning_rate": 1.4224418522989544e-05, "loss": 3.9963, "step": 5153 }, { "epoch": 1.7169151328391772, "grad_norm": 0.5546875, "learning_rate": 1.4224060013212076e-05, "loss": 4.0691, "step": 5154 }, { "epoch": 1.717248271841426, "grad_norm": 0.5703125, "learning_rate": 1.4223701425113875e-05, "loss": 4.0994, "step": 5155 }, { "epoch": 1.7175814108436747, "grad_norm": 0.5390625, "learning_rate": 1.4223342758699113e-05, "loss": 4.132, "step": 5156 }, { "epoch": 1.7179145498459232, "grad_norm": 0.515625, "learning_rate": 1.4222984013971973e-05, "loss": 4.1178, "step": 5157 }, { "epoch": 1.718247688848172, "grad_norm": 0.52734375, "learning_rate": 1.422262519093663e-05, "loss": 4.0791, "step": 5158 }, { "epoch": 1.7185808278504204, "grad_norm": 0.51171875, "learning_rate": 1.4222266289597266e-05, "loss": 4.0646, "step": 5159 }, { "epoch": 1.7189139668526692, "grad_norm": 0.546875, "learning_rate": 1.4221907309958057e-05, "loss": 4.0625, "step": 5160 }, { "epoch": 1.719247105854918, "grad_norm": 0.55859375, "learning_rate": 1.422154825202319e-05, "loss": 4.0905, "step": 5161 }, { "epoch": 1.7195802448571667, "grad_norm": 0.515625, "learning_rate": 1.4221189115796844e-05, "loss": 4.1187, "step": 5162 }, { "epoch": 1.7199133838594154, "grad_norm": 0.55078125, "learning_rate": 1.4220829901283202e-05, "loss": 4.0324, "step": 5163 }, { "epoch": 1.7202465228616641, "grad_norm": 0.53515625, "learning_rate": 1.422047060848645e-05, "loss": 4.0736, "step": 5164 }, { "epoch": 1.7205796618639129, "grad_norm": 0.5390625, "learning_rate": 1.4220111237410772e-05, "loss": 4.1225, "step": 5165 }, { "epoch": 1.7209128008661614, "grad_norm": 0.55078125, "learning_rate": 1.4219751788060355e-05, "loss": 3.9686, "step": 5166 }, { "epoch": 1.7212459398684101, "grad_norm": 0.546875, "learning_rate": 1.4219392260439383e-05, "loss": 4.0879, "step": 5167 }, { "epoch": 1.7215790788706586, "grad_norm": 0.55078125, "learning_rate": 1.4219032654552045e-05, "loss": 4.0306, "step": 5168 }, { "epoch": 1.7219122178729074, "grad_norm": 0.55859375, "learning_rate": 1.421867297040253e-05, "loss": 4.0199, "step": 5169 }, { "epoch": 1.7222453568751561, "grad_norm": 0.546875, "learning_rate": 1.421831320799503e-05, "loss": 4.084, "step": 5170 }, { "epoch": 1.7225784958774049, "grad_norm": 0.54296875, "learning_rate": 1.421795336733373e-05, "loss": 4.0602, "step": 5171 }, { "epoch": 1.7229116348796536, "grad_norm": 0.5625, "learning_rate": 1.4217593448422825e-05, "loss": 4.1488, "step": 5172 }, { "epoch": 1.7232447738819023, "grad_norm": 0.51953125, "learning_rate": 1.4217233451266508e-05, "loss": 4.012, "step": 5173 }, { "epoch": 1.723577912884151, "grad_norm": 0.5234375, "learning_rate": 1.421687337586897e-05, "loss": 4.0673, "step": 5174 }, { "epoch": 1.7239110518863996, "grad_norm": 0.52734375, "learning_rate": 1.4216513222234404e-05, "loss": 4.0703, "step": 5175 }, { "epoch": 1.7242441908886483, "grad_norm": 0.54296875, "learning_rate": 1.4216152990367009e-05, "loss": 4.0347, "step": 5176 }, { "epoch": 1.7245773298908968, "grad_norm": 0.52734375, "learning_rate": 1.421579268027098e-05, "loss": 4.0309, "step": 5177 }, { "epoch": 1.7249104688931456, "grad_norm": 0.53125, "learning_rate": 1.421543229195051e-05, "loss": 4.1252, "step": 5178 }, { "epoch": 1.7252436078953943, "grad_norm": 0.5546875, "learning_rate": 1.42150718254098e-05, "loss": 3.9899, "step": 5179 }, { "epoch": 1.725576746897643, "grad_norm": 0.5234375, "learning_rate": 1.421471128065305e-05, "loss": 3.9693, "step": 5180 }, { "epoch": 1.7259098858998918, "grad_norm": 0.53125, "learning_rate": 1.4214350657684457e-05, "loss": 4.0392, "step": 5181 }, { "epoch": 1.7262430249021405, "grad_norm": 0.546875, "learning_rate": 1.4213989956508218e-05, "loss": 4.1016, "step": 5182 }, { "epoch": 1.726576163904389, "grad_norm": 0.5546875, "learning_rate": 1.4213629177128545e-05, "loss": 3.9825, "step": 5183 }, { "epoch": 1.7269093029066378, "grad_norm": 0.55859375, "learning_rate": 1.421326831954963e-05, "loss": 3.9802, "step": 5184 }, { "epoch": 1.7272424419088863, "grad_norm": 0.5546875, "learning_rate": 1.421290738377568e-05, "loss": 4.0688, "step": 5185 }, { "epoch": 1.727575580911135, "grad_norm": 0.53125, "learning_rate": 1.4212546369810898e-05, "loss": 4.1324, "step": 5186 }, { "epoch": 1.7279087199133838, "grad_norm": 0.53125, "learning_rate": 1.4212185277659492e-05, "loss": 4.0281, "step": 5187 }, { "epoch": 1.7282418589156325, "grad_norm": 0.53515625, "learning_rate": 1.4211824107325667e-05, "loss": 4.0683, "step": 5188 }, { "epoch": 1.7285749979178813, "grad_norm": 0.5390625, "learning_rate": 1.4211462858813628e-05, "loss": 4.0909, "step": 5189 }, { "epoch": 1.72890813692013, "grad_norm": 0.5546875, "learning_rate": 1.4211101532127583e-05, "loss": 4.0598, "step": 5190 }, { "epoch": 1.7292412759223788, "grad_norm": 0.5625, "learning_rate": 1.4210740127271742e-05, "loss": 4.088, "step": 5191 }, { "epoch": 1.7295744149246273, "grad_norm": 0.52734375, "learning_rate": 1.4210378644250316e-05, "loss": 4.0857, "step": 5192 }, { "epoch": 1.729907553926876, "grad_norm": 0.5390625, "learning_rate": 1.4210017083067509e-05, "loss": 4.0735, "step": 5193 }, { "epoch": 1.7302406929291245, "grad_norm": 0.5234375, "learning_rate": 1.4209655443727541e-05, "loss": 4.0604, "step": 5194 }, { "epoch": 1.7305738319313733, "grad_norm": 0.57421875, "learning_rate": 1.4209293726234616e-05, "loss": 4.0293, "step": 5195 }, { "epoch": 1.730906970933622, "grad_norm": 0.546875, "learning_rate": 1.4208931930592954e-05, "loss": 4.0421, "step": 5196 }, { "epoch": 1.7312401099358707, "grad_norm": 0.53515625, "learning_rate": 1.4208570056806767e-05, "loss": 4.0122, "step": 5197 }, { "epoch": 1.7315732489381195, "grad_norm": 0.55859375, "learning_rate": 1.420820810488027e-05, "loss": 4.0327, "step": 5198 }, { "epoch": 1.7319063879403682, "grad_norm": 0.546875, "learning_rate": 1.4207846074817675e-05, "loss": 4.0101, "step": 5199 }, { "epoch": 1.732239526942617, "grad_norm": 0.52734375, "learning_rate": 1.4207483966623204e-05, "loss": 4.166, "step": 5200 }, { "epoch": 1.7325726659448655, "grad_norm": 0.55078125, "learning_rate": 1.4207121780301077e-05, "loss": 4.0045, "step": 5201 }, { "epoch": 1.7329058049471142, "grad_norm": 0.55078125, "learning_rate": 1.4206759515855505e-05, "loss": 4.012, "step": 5202 }, { "epoch": 1.7332389439493627, "grad_norm": 0.5625, "learning_rate": 1.4206397173290712e-05, "loss": 4.053, "step": 5203 }, { "epoch": 1.7335720829516115, "grad_norm": 0.5625, "learning_rate": 1.4206034752610917e-05, "loss": 4.0526, "step": 5204 }, { "epoch": 1.7339052219538602, "grad_norm": 0.5703125, "learning_rate": 1.4205672253820345e-05, "loss": 4.0373, "step": 5205 }, { "epoch": 1.734238360956109, "grad_norm": 0.53125, "learning_rate": 1.4205309676923213e-05, "loss": 4.0245, "step": 5206 }, { "epoch": 1.7345714999583577, "grad_norm": 0.52734375, "learning_rate": 1.4204947021923749e-05, "loss": 4.0874, "step": 5207 }, { "epoch": 1.7349046389606064, "grad_norm": 0.53515625, "learning_rate": 1.4204584288826174e-05, "loss": 3.9691, "step": 5208 }, { "epoch": 1.7352377779628552, "grad_norm": 0.5625, "learning_rate": 1.4204221477634716e-05, "loss": 4.0648, "step": 5209 }, { "epoch": 1.7355709169651037, "grad_norm": 0.55859375, "learning_rate": 1.4203858588353596e-05, "loss": 4.086, "step": 5210 }, { "epoch": 1.7359040559673524, "grad_norm": 0.546875, "learning_rate": 1.4203495620987045e-05, "loss": 4.078, "step": 5211 }, { "epoch": 1.736237194969601, "grad_norm": 0.51953125, "learning_rate": 1.4203132575539291e-05, "loss": 4.096, "step": 5212 }, { "epoch": 1.7365703339718497, "grad_norm": 0.53125, "learning_rate": 1.4202769452014563e-05, "loss": 4.1125, "step": 5213 }, { "epoch": 1.7369034729740984, "grad_norm": 0.55078125, "learning_rate": 1.4202406250417084e-05, "loss": 4.0401, "step": 5214 }, { "epoch": 1.7372366119763472, "grad_norm": 0.56640625, "learning_rate": 1.4202042970751092e-05, "loss": 4.0072, "step": 5215 }, { "epoch": 1.737569750978596, "grad_norm": 0.55859375, "learning_rate": 1.4201679613020818e-05, "loss": 4.0274, "step": 5216 }, { "epoch": 1.7379028899808446, "grad_norm": 0.54296875, "learning_rate": 1.420131617723049e-05, "loss": 4.0578, "step": 5217 }, { "epoch": 1.7382360289830932, "grad_norm": 0.55078125, "learning_rate": 1.4200952663384345e-05, "loss": 4.068, "step": 5218 }, { "epoch": 1.738569167985342, "grad_norm": 0.53125, "learning_rate": 1.4200589071486615e-05, "loss": 4.1031, "step": 5219 }, { "epoch": 1.7389023069875904, "grad_norm": 0.53515625, "learning_rate": 1.4200225401541537e-05, "loss": 4.1058, "step": 5220 }, { "epoch": 1.7392354459898391, "grad_norm": 0.546875, "learning_rate": 1.4199861653553344e-05, "loss": 4.0471, "step": 5221 }, { "epoch": 1.7395685849920879, "grad_norm": 0.53125, "learning_rate": 1.4199497827526275e-05, "loss": 4.0142, "step": 5222 }, { "epoch": 1.7399017239943366, "grad_norm": 0.53125, "learning_rate": 1.4199133923464567e-05, "loss": 4.1057, "step": 5223 }, { "epoch": 1.7402348629965854, "grad_norm": 0.53125, "learning_rate": 1.419876994137246e-05, "loss": 4.0609, "step": 5224 }, { "epoch": 1.740568001998834, "grad_norm": 0.56640625, "learning_rate": 1.4198405881254191e-05, "loss": 4.0859, "step": 5225 }, { "epoch": 1.7409011410010828, "grad_norm": 0.54296875, "learning_rate": 1.4198041743114004e-05, "loss": 4.0861, "step": 5226 }, { "epoch": 1.7412342800033314, "grad_norm": 0.5390625, "learning_rate": 1.4197677526956137e-05, "loss": 4.0396, "step": 5227 }, { "epoch": 1.74156741900558, "grad_norm": 0.5390625, "learning_rate": 1.4197313232784835e-05, "loss": 4.1233, "step": 5228 }, { "epoch": 1.7419005580078286, "grad_norm": 0.546875, "learning_rate": 1.419694886060434e-05, "loss": 4.0818, "step": 5229 }, { "epoch": 1.7422336970100774, "grad_norm": 0.51953125, "learning_rate": 1.4196584410418898e-05, "loss": 4.0436, "step": 5230 }, { "epoch": 1.742566836012326, "grad_norm": 0.546875, "learning_rate": 1.419621988223275e-05, "loss": 4.0674, "step": 5231 }, { "epoch": 1.7428999750145748, "grad_norm": 0.55859375, "learning_rate": 1.4195855276050146e-05, "loss": 4.0386, "step": 5232 }, { "epoch": 1.7432331140168236, "grad_norm": 0.53125, "learning_rate": 1.4195490591875332e-05, "loss": 4.1282, "step": 5233 }, { "epoch": 1.7435662530190723, "grad_norm": 0.515625, "learning_rate": 1.4195125829712552e-05, "loss": 4.1226, "step": 5234 }, { "epoch": 1.743899392021321, "grad_norm": 0.53125, "learning_rate": 1.4194760989566062e-05, "loss": 4.0915, "step": 5235 }, { "epoch": 1.7442325310235696, "grad_norm": 0.5546875, "learning_rate": 1.4194396071440102e-05, "loss": 4.0354, "step": 5236 }, { "epoch": 1.7445656700258183, "grad_norm": 0.5390625, "learning_rate": 1.4194031075338932e-05, "loss": 4.0249, "step": 5237 }, { "epoch": 1.7448988090280668, "grad_norm": 0.53515625, "learning_rate": 1.4193666001266797e-05, "loss": 4.0913, "step": 5238 }, { "epoch": 1.7452319480303156, "grad_norm": 0.55859375, "learning_rate": 1.4193300849227952e-05, "loss": 4.0633, "step": 5239 }, { "epoch": 1.7455650870325643, "grad_norm": 0.56640625, "learning_rate": 1.419293561922665e-05, "loss": 4.0813, "step": 5240 }, { "epoch": 1.745898226034813, "grad_norm": 0.54296875, "learning_rate": 1.4192570311267144e-05, "loss": 4.0828, "step": 5241 }, { "epoch": 1.7462313650370618, "grad_norm": 0.52734375, "learning_rate": 1.4192204925353692e-05, "loss": 4.0719, "step": 5242 }, { "epoch": 1.7465645040393105, "grad_norm": 0.53125, "learning_rate": 1.4191839461490547e-05, "loss": 4.0424, "step": 5243 }, { "epoch": 1.7468976430415593, "grad_norm": 0.5625, "learning_rate": 1.4191473919681966e-05, "loss": 4.046, "step": 5244 }, { "epoch": 1.7472307820438078, "grad_norm": 0.5234375, "learning_rate": 1.4191108299932209e-05, "loss": 4.0512, "step": 5245 }, { "epoch": 1.7475639210460565, "grad_norm": 0.53515625, "learning_rate": 1.4190742602245533e-05, "loss": 4.0864, "step": 5246 }, { "epoch": 1.747897060048305, "grad_norm": 0.53125, "learning_rate": 1.4190376826626195e-05, "loss": 4.0635, "step": 5247 }, { "epoch": 1.7482301990505538, "grad_norm": 0.53515625, "learning_rate": 1.419001097307846e-05, "loss": 4.0875, "step": 5248 }, { "epoch": 1.7485633380528025, "grad_norm": 0.53125, "learning_rate": 1.4189645041606589e-05, "loss": 4.0635, "step": 5249 }, { "epoch": 1.7488964770550512, "grad_norm": 0.5703125, "learning_rate": 1.4189279032214842e-05, "loss": 3.9965, "step": 5250 }, { "epoch": 1.7492296160573, "grad_norm": 0.5390625, "learning_rate": 1.4188912944907484e-05, "loss": 4.0266, "step": 5251 }, { "epoch": 1.7495627550595487, "grad_norm": 0.55859375, "learning_rate": 1.4188546779688777e-05, "loss": 4.0948, "step": 5252 }, { "epoch": 1.7498958940617972, "grad_norm": 0.5390625, "learning_rate": 1.418818053656299e-05, "loss": 4.0293, "step": 5253 }, { "epoch": 1.750229033064046, "grad_norm": 0.5234375, "learning_rate": 1.418781421553438e-05, "loss": 4.0777, "step": 5254 }, { "epoch": 1.7505621720662945, "grad_norm": 0.5546875, "learning_rate": 1.4187447816607227e-05, "loss": 4.1062, "step": 5255 }, { "epoch": 1.7508953110685432, "grad_norm": 0.5390625, "learning_rate": 1.418708133978579e-05, "loss": 4.097, "step": 5256 }, { "epoch": 1.751228450070792, "grad_norm": 0.546875, "learning_rate": 1.418671478507434e-05, "loss": 4.0727, "step": 5257 }, { "epoch": 1.7515615890730407, "grad_norm": 0.57421875, "learning_rate": 1.4186348152477145e-05, "loss": 4.0541, "step": 5258 }, { "epoch": 1.7518947280752895, "grad_norm": 0.53125, "learning_rate": 1.4185981441998476e-05, "loss": 4.0798, "step": 5259 }, { "epoch": 1.7522278670775382, "grad_norm": 0.49609375, "learning_rate": 1.4185614653642606e-05, "loss": 4.0199, "step": 5260 }, { "epoch": 1.752561006079787, "grad_norm": 0.5390625, "learning_rate": 1.4185247787413806e-05, "loss": 4.109, "step": 5261 }, { "epoch": 1.7528941450820354, "grad_norm": 0.546875, "learning_rate": 1.418488084331635e-05, "loss": 4.0527, "step": 5262 }, { "epoch": 1.7532272840842842, "grad_norm": 0.55078125, "learning_rate": 1.4184513821354512e-05, "loss": 4.0528, "step": 5263 }, { "epoch": 1.7535604230865327, "grad_norm": 0.546875, "learning_rate": 1.4184146721532564e-05, "loss": 4.0334, "step": 5264 }, { "epoch": 1.7538935620887814, "grad_norm": 0.53515625, "learning_rate": 1.4183779543854787e-05, "loss": 4.1274, "step": 5265 }, { "epoch": 1.7542267010910302, "grad_norm": 0.5546875, "learning_rate": 1.4183412288325453e-05, "loss": 4.0856, "step": 5266 }, { "epoch": 1.754559840093279, "grad_norm": 0.55078125, "learning_rate": 1.4183044954948846e-05, "loss": 4.0311, "step": 5267 }, { "epoch": 1.7548929790955277, "grad_norm": 0.5625, "learning_rate": 1.4182677543729239e-05, "loss": 4.0526, "step": 5268 }, { "epoch": 1.7552261180977764, "grad_norm": 0.55859375, "learning_rate": 1.4182310054670912e-05, "loss": 4.0359, "step": 5269 }, { "epoch": 1.7555592571000251, "grad_norm": 0.546875, "learning_rate": 1.4181942487778148e-05, "loss": 4.0735, "step": 5270 }, { "epoch": 1.7558923961022737, "grad_norm": 0.55859375, "learning_rate": 1.4181574843055227e-05, "loss": 4.0772, "step": 5271 }, { "epoch": 1.7562255351045224, "grad_norm": 0.55078125, "learning_rate": 1.418120712050643e-05, "loss": 4.1192, "step": 5272 }, { "epoch": 1.756558674106771, "grad_norm": 0.8046875, "learning_rate": 1.4180839320136043e-05, "loss": 4.0554, "step": 5273 }, { "epoch": 1.7568918131090197, "grad_norm": 0.57421875, "learning_rate": 1.4180471441948347e-05, "loss": 4.0493, "step": 5274 }, { "epoch": 1.7572249521112684, "grad_norm": 0.5390625, "learning_rate": 1.4180103485947631e-05, "loss": 4.0604, "step": 5275 }, { "epoch": 1.7575580911135171, "grad_norm": 0.53515625, "learning_rate": 1.4179735452138178e-05, "loss": 4.1117, "step": 5276 }, { "epoch": 1.7578912301157659, "grad_norm": 0.53125, "learning_rate": 1.4179367340524273e-05, "loss": 4.0894, "step": 5277 }, { "epoch": 1.7582243691180146, "grad_norm": 0.53515625, "learning_rate": 1.4178999151110208e-05, "loss": 4.0004, "step": 5278 }, { "epoch": 1.7585575081202633, "grad_norm": 0.53515625, "learning_rate": 1.4178630883900269e-05, "loss": 4.0459, "step": 5279 }, { "epoch": 1.7588906471225119, "grad_norm": 0.53125, "learning_rate": 1.4178262538898745e-05, "loss": 4.015, "step": 5280 }, { "epoch": 1.7592237861247606, "grad_norm": 0.53515625, "learning_rate": 1.4177894116109928e-05, "loss": 4.1065, "step": 5281 }, { "epoch": 1.7595569251270091, "grad_norm": 0.5390625, "learning_rate": 1.4177525615538112e-05, "loss": 4.0754, "step": 5282 }, { "epoch": 1.7598900641292579, "grad_norm": 0.5625, "learning_rate": 1.4177157037187582e-05, "loss": 4.0747, "step": 5283 }, { "epoch": 1.7602232031315066, "grad_norm": 0.5234375, "learning_rate": 1.4176788381062637e-05, "loss": 4.0415, "step": 5284 }, { "epoch": 1.7605563421337553, "grad_norm": 0.54296875, "learning_rate": 1.417641964716757e-05, "loss": 4.1396, "step": 5285 }, { "epoch": 1.760889481136004, "grad_norm": 0.5546875, "learning_rate": 1.4176050835506673e-05, "loss": 4.0281, "step": 5286 }, { "epoch": 1.7612226201382528, "grad_norm": 0.53125, "learning_rate": 1.4175681946084247e-05, "loss": 4.0667, "step": 5287 }, { "epoch": 1.7615557591405013, "grad_norm": 0.54296875, "learning_rate": 1.4175312978904582e-05, "loss": 4.0093, "step": 5288 }, { "epoch": 1.76188889814275, "grad_norm": 0.53515625, "learning_rate": 1.4174943933971982e-05, "loss": 4.1118, "step": 5289 }, { "epoch": 1.7622220371449986, "grad_norm": 0.54296875, "learning_rate": 1.4174574811290744e-05, "loss": 4.0922, "step": 5290 }, { "epoch": 1.7625551761472473, "grad_norm": 0.53515625, "learning_rate": 1.4174205610865165e-05, "loss": 4.0839, "step": 5291 }, { "epoch": 1.762888315149496, "grad_norm": 0.53125, "learning_rate": 1.4173836332699546e-05, "loss": 4.0835, "step": 5292 }, { "epoch": 1.7632214541517448, "grad_norm": 0.51953125, "learning_rate": 1.4173466976798191e-05, "loss": 4.095, "step": 5293 }, { "epoch": 1.7635545931539935, "grad_norm": 0.5390625, "learning_rate": 1.41730975431654e-05, "loss": 4.0541, "step": 5294 }, { "epoch": 1.7638877321562423, "grad_norm": 0.51953125, "learning_rate": 1.4172728031805476e-05, "loss": 4.0572, "step": 5295 }, { "epoch": 1.764220871158491, "grad_norm": 0.5234375, "learning_rate": 1.4172358442722726e-05, "loss": 4.0212, "step": 5296 }, { "epoch": 1.7645540101607395, "grad_norm": 0.53125, "learning_rate": 1.4171988775921448e-05, "loss": 4.0687, "step": 5297 }, { "epoch": 1.7648871491629883, "grad_norm": 0.546875, "learning_rate": 1.4171619031405956e-05, "loss": 4.0569, "step": 5298 }, { "epoch": 1.7652202881652368, "grad_norm": 0.515625, "learning_rate": 1.417124920918055e-05, "loss": 4.0858, "step": 5299 }, { "epoch": 1.7655534271674855, "grad_norm": 0.546875, "learning_rate": 1.4170879309249541e-05, "loss": 4.0332, "step": 5300 }, { "epoch": 1.7658865661697343, "grad_norm": 0.57421875, "learning_rate": 1.4170509331617242e-05, "loss": 4.0379, "step": 5301 }, { "epoch": 1.766219705171983, "grad_norm": 0.5234375, "learning_rate": 1.4170139276287951e-05, "loss": 4.0623, "step": 5302 }, { "epoch": 1.7665528441742318, "grad_norm": 0.53515625, "learning_rate": 1.4169769143265988e-05, "loss": 4.0464, "step": 5303 }, { "epoch": 1.7668859831764805, "grad_norm": 0.54296875, "learning_rate": 1.4169398932555661e-05, "loss": 3.9836, "step": 5304 }, { "epoch": 1.7672191221787292, "grad_norm": 0.51953125, "learning_rate": 1.4169028644161283e-05, "loss": 4.0522, "step": 5305 }, { "epoch": 1.7675522611809777, "grad_norm": 0.55078125, "learning_rate": 1.4168658278087165e-05, "loss": 4.0905, "step": 5306 }, { "epoch": 1.7678854001832265, "grad_norm": 0.55078125, "learning_rate": 1.4168287834337623e-05, "loss": 4.1194, "step": 5307 }, { "epoch": 1.768218539185475, "grad_norm": 0.53125, "learning_rate": 1.4167917312916971e-05, "loss": 4.0205, "step": 5308 }, { "epoch": 1.7685516781877237, "grad_norm": 0.53515625, "learning_rate": 1.4167546713829527e-05, "loss": 4.0932, "step": 5309 }, { "epoch": 1.7688848171899725, "grad_norm": 0.54296875, "learning_rate": 1.4167176037079604e-05, "loss": 4.0865, "step": 5310 }, { "epoch": 1.7692179561922212, "grad_norm": 0.55859375, "learning_rate": 1.4166805282671522e-05, "loss": 4.0281, "step": 5311 }, { "epoch": 1.76955109519447, "grad_norm": 0.5390625, "learning_rate": 1.4166434450609597e-05, "loss": 3.9557, "step": 5312 }, { "epoch": 1.7698842341967187, "grad_norm": 0.546875, "learning_rate": 1.4166063540898152e-05, "loss": 4.0998, "step": 5313 }, { "epoch": 1.7702173731989672, "grad_norm": 0.51171875, "learning_rate": 1.4165692553541503e-05, "loss": 4.0903, "step": 5314 }, { "epoch": 1.770550512201216, "grad_norm": 0.5390625, "learning_rate": 1.4165321488543974e-05, "loss": 4.0669, "step": 5315 }, { "epoch": 1.7708836512034647, "grad_norm": 0.5625, "learning_rate": 1.416495034590989e-05, "loss": 4.0282, "step": 5316 }, { "epoch": 1.7712167902057132, "grad_norm": 0.5625, "learning_rate": 1.4164579125643567e-05, "loss": 4.0913, "step": 5317 }, { "epoch": 1.771549929207962, "grad_norm": 0.55078125, "learning_rate": 1.4164207827749336e-05, "loss": 4.0455, "step": 5318 }, { "epoch": 1.7718830682102107, "grad_norm": 0.51171875, "learning_rate": 1.4163836452231516e-05, "loss": 4.043, "step": 5319 }, { "epoch": 1.7722162072124594, "grad_norm": 0.53125, "learning_rate": 1.4163464999094435e-05, "loss": 4.0152, "step": 5320 }, { "epoch": 1.7725493462147082, "grad_norm": 0.53125, "learning_rate": 1.4163093468342422e-05, "loss": 4.0902, "step": 5321 }, { "epoch": 1.772882485216957, "grad_norm": 0.546875, "learning_rate": 1.41627218599798e-05, "loss": 4.0796, "step": 5322 }, { "epoch": 1.7732156242192054, "grad_norm": 0.515625, "learning_rate": 1.4162350174010901e-05, "loss": 4.0887, "step": 5323 }, { "epoch": 1.7735487632214542, "grad_norm": 0.54296875, "learning_rate": 1.4161978410440053e-05, "loss": 4.0734, "step": 5324 }, { "epoch": 1.7738819022237027, "grad_norm": 0.53515625, "learning_rate": 1.4161606569271587e-05, "loss": 4.0766, "step": 5325 }, { "epoch": 1.7742150412259514, "grad_norm": 0.5703125, "learning_rate": 1.4161234650509832e-05, "loss": 3.9623, "step": 5326 }, { "epoch": 1.7745481802282002, "grad_norm": 0.55078125, "learning_rate": 1.4160862654159122e-05, "loss": 4.0411, "step": 5327 }, { "epoch": 1.774881319230449, "grad_norm": 0.5546875, "learning_rate": 1.416049058022379e-05, "loss": 4.1419, "step": 5328 }, { "epoch": 1.7752144582326976, "grad_norm": 0.53125, "learning_rate": 1.4160118428708168e-05, "loss": 4.1146, "step": 5329 }, { "epoch": 1.7755475972349464, "grad_norm": 0.5703125, "learning_rate": 1.4159746199616594e-05, "loss": 4.0521, "step": 5330 }, { "epoch": 1.7758807362371951, "grad_norm": 0.5546875, "learning_rate": 1.4159373892953402e-05, "loss": 4.1006, "step": 5331 }, { "epoch": 1.7762138752394436, "grad_norm": 0.5546875, "learning_rate": 1.4159001508722927e-05, "loss": 4.004, "step": 5332 }, { "epoch": 1.7765470142416924, "grad_norm": 0.546875, "learning_rate": 1.4158629046929506e-05, "loss": 4.0893, "step": 5333 }, { "epoch": 1.7768801532439409, "grad_norm": 0.55078125, "learning_rate": 1.4158256507577481e-05, "loss": 4.037, "step": 5334 }, { "epoch": 1.7772132922461896, "grad_norm": 0.55078125, "learning_rate": 1.415788389067119e-05, "loss": 4.0734, "step": 5335 }, { "epoch": 1.7775464312484384, "grad_norm": 0.53125, "learning_rate": 1.4157511196214972e-05, "loss": 4.0396, "step": 5336 }, { "epoch": 1.777879570250687, "grad_norm": 0.5625, "learning_rate": 1.4157138424213168e-05, "loss": 4.1741, "step": 5337 }, { "epoch": 1.7782127092529358, "grad_norm": 0.5390625, "learning_rate": 1.4156765574670124e-05, "loss": 4.0316, "step": 5338 }, { "epoch": 1.7785458482551846, "grad_norm": 0.52734375, "learning_rate": 1.4156392647590177e-05, "loss": 4.1354, "step": 5339 }, { "epoch": 1.7788789872574333, "grad_norm": 0.5703125, "learning_rate": 1.4156019642977672e-05, "loss": 4.0391, "step": 5340 }, { "epoch": 1.7792121262596818, "grad_norm": 0.546875, "learning_rate": 1.4155646560836957e-05, "loss": 4.0531, "step": 5341 }, { "epoch": 1.7795452652619306, "grad_norm": 0.5546875, "learning_rate": 1.4155273401172376e-05, "loss": 4.0892, "step": 5342 }, { "epoch": 1.779878404264179, "grad_norm": 0.51953125, "learning_rate": 1.4154900163988276e-05, "loss": 4.1139, "step": 5343 }, { "epoch": 1.7802115432664278, "grad_norm": 0.5234375, "learning_rate": 1.4154526849289e-05, "loss": 4.1272, "step": 5344 }, { "epoch": 1.7805446822686766, "grad_norm": 0.51953125, "learning_rate": 1.4154153457078903e-05, "loss": 4.0546, "step": 5345 }, { "epoch": 1.7808778212709253, "grad_norm": 0.53125, "learning_rate": 1.415377998736233e-05, "loss": 4.0958, "step": 5346 }, { "epoch": 1.781210960273174, "grad_norm": 0.546875, "learning_rate": 1.4153406440143631e-05, "loss": 4.0796, "step": 5347 }, { "epoch": 1.7815440992754228, "grad_norm": 0.52734375, "learning_rate": 1.415303281542716e-05, "loss": 4.0288, "step": 5348 }, { "epoch": 1.7818772382776713, "grad_norm": 0.5546875, "learning_rate": 1.4152659113217267e-05, "loss": 4.0306, "step": 5349 }, { "epoch": 1.78221037727992, "grad_norm": 0.53125, "learning_rate": 1.4152285333518305e-05, "loss": 4.0753, "step": 5350 }, { "epoch": 1.7825435162821688, "grad_norm": 0.5546875, "learning_rate": 1.4151911476334627e-05, "loss": 4.0687, "step": 5351 }, { "epoch": 1.7828766552844173, "grad_norm": 0.5390625, "learning_rate": 1.415153754167059e-05, "loss": 4.0966, "step": 5352 }, { "epoch": 1.783209794286666, "grad_norm": 0.55078125, "learning_rate": 1.4151163529530546e-05, "loss": 4.0487, "step": 5353 }, { "epoch": 1.7835429332889148, "grad_norm": 0.5625, "learning_rate": 1.4150789439918853e-05, "loss": 4.0329, "step": 5354 }, { "epoch": 1.7838760722911635, "grad_norm": 0.5625, "learning_rate": 1.415041527283987e-05, "loss": 4.0862, "step": 5355 }, { "epoch": 1.7842092112934123, "grad_norm": 0.546875, "learning_rate": 1.4150041028297953e-05, "loss": 4.0492, "step": 5356 }, { "epoch": 1.784542350295661, "grad_norm": 0.53125, "learning_rate": 1.4149666706297462e-05, "loss": 4.0571, "step": 5357 }, { "epoch": 1.7848754892979095, "grad_norm": 0.54296875, "learning_rate": 1.4149292306842759e-05, "loss": 4.0507, "step": 5358 }, { "epoch": 1.7852086283001583, "grad_norm": 0.53125, "learning_rate": 1.4148917829938199e-05, "loss": 4.0356, "step": 5359 }, { "epoch": 1.7855417673024068, "grad_norm": 0.5625, "learning_rate": 1.414854327558815e-05, "loss": 4.0219, "step": 5360 }, { "epoch": 1.7858749063046555, "grad_norm": 0.53125, "learning_rate": 1.4148168643796973e-05, "loss": 4.0893, "step": 5361 }, { "epoch": 1.7862080453069042, "grad_norm": 0.5546875, "learning_rate": 1.414779393456903e-05, "loss": 4.0799, "step": 5362 }, { "epoch": 1.786541184309153, "grad_norm": 0.5390625, "learning_rate": 1.414741914790869e-05, "loss": 4.0259, "step": 5363 }, { "epoch": 1.7868743233114017, "grad_norm": 0.51171875, "learning_rate": 1.4147044283820312e-05, "loss": 4.0471, "step": 5364 }, { "epoch": 1.7872074623136505, "grad_norm": 0.54296875, "learning_rate": 1.4146669342308265e-05, "loss": 4.0986, "step": 5365 }, { "epoch": 1.7875406013158992, "grad_norm": 0.54296875, "learning_rate": 1.4146294323376919e-05, "loss": 4.0636, "step": 5366 }, { "epoch": 1.7878737403181477, "grad_norm": 0.52734375, "learning_rate": 1.4145919227030639e-05, "loss": 4.0458, "step": 5367 }, { "epoch": 1.7882068793203965, "grad_norm": 0.5234375, "learning_rate": 1.4145544053273796e-05, "loss": 4.0992, "step": 5368 }, { "epoch": 1.788540018322645, "grad_norm": 0.546875, "learning_rate": 1.4145168802110757e-05, "loss": 4.0744, "step": 5369 }, { "epoch": 1.7888731573248937, "grad_norm": 0.5546875, "learning_rate": 1.4144793473545897e-05, "loss": 4.0761, "step": 5370 }, { "epoch": 1.7892062963271425, "grad_norm": 0.5390625, "learning_rate": 1.4144418067583584e-05, "loss": 4.0514, "step": 5371 }, { "epoch": 1.7895394353293912, "grad_norm": 0.55859375, "learning_rate": 1.4144042584228192e-05, "loss": 4.0822, "step": 5372 }, { "epoch": 1.78987257433164, "grad_norm": 0.5546875, "learning_rate": 1.4143667023484096e-05, "loss": 4.07, "step": 5373 }, { "epoch": 1.7902057133338887, "grad_norm": 0.53125, "learning_rate": 1.4143291385355668e-05, "loss": 4.031, "step": 5374 }, { "epoch": 1.7905388523361374, "grad_norm": 0.51953125, "learning_rate": 1.4142915669847288e-05, "loss": 4.0569, "step": 5375 }, { "epoch": 1.790871991338386, "grad_norm": 0.53515625, "learning_rate": 1.4142539876963326e-05, "loss": 4.1096, "step": 5376 }, { "epoch": 1.7912051303406347, "grad_norm": 0.55078125, "learning_rate": 1.4142164006708162e-05, "loss": 4.0016, "step": 5377 }, { "epoch": 1.7915382693428832, "grad_norm": 0.52734375, "learning_rate": 1.4141788059086175e-05, "loss": 4.0404, "step": 5378 }, { "epoch": 1.791871408345132, "grad_norm": 0.51953125, "learning_rate": 1.4141412034101746e-05, "loss": 4.0149, "step": 5379 }, { "epoch": 1.7922045473473807, "grad_norm": 0.56640625, "learning_rate": 1.414103593175925e-05, "loss": 3.9948, "step": 5380 }, { "epoch": 1.7925376863496294, "grad_norm": 0.55859375, "learning_rate": 1.4140659752063069e-05, "loss": 4.0118, "step": 5381 }, { "epoch": 1.7928708253518781, "grad_norm": 0.53515625, "learning_rate": 1.4140283495017587e-05, "loss": 4.1047, "step": 5382 }, { "epoch": 1.7932039643541269, "grad_norm": 0.52734375, "learning_rate": 1.4139907160627182e-05, "loss": 4.0459, "step": 5383 }, { "epoch": 1.7935371033563754, "grad_norm": 0.58203125, "learning_rate": 1.4139530748896243e-05, "loss": 4.0626, "step": 5384 }, { "epoch": 1.7938702423586241, "grad_norm": 0.5625, "learning_rate": 1.4139154259829152e-05, "loss": 4.129, "step": 5385 }, { "epoch": 1.7942033813608729, "grad_norm": 0.546875, "learning_rate": 1.4138777693430293e-05, "loss": 4.0617, "step": 5386 }, { "epoch": 1.7945365203631214, "grad_norm": 0.5390625, "learning_rate": 1.4138401049704055e-05, "loss": 4.086, "step": 5387 }, { "epoch": 1.7948696593653701, "grad_norm": 0.55859375, "learning_rate": 1.4138024328654823e-05, "loss": 4.0641, "step": 5388 }, { "epoch": 1.7952027983676189, "grad_norm": 0.5390625, "learning_rate": 1.4137647530286986e-05, "loss": 3.9941, "step": 5389 }, { "epoch": 1.7955359373698676, "grad_norm": 0.56640625, "learning_rate": 1.413727065460493e-05, "loss": 4.0377, "step": 5390 }, { "epoch": 1.7958690763721163, "grad_norm": 0.5546875, "learning_rate": 1.413689370161305e-05, "loss": 4.056, "step": 5391 }, { "epoch": 1.796202215374365, "grad_norm": 0.55078125, "learning_rate": 1.4136516671315732e-05, "loss": 4.0939, "step": 5392 }, { "epoch": 1.7965353543766136, "grad_norm": 0.54296875, "learning_rate": 1.413613956371737e-05, "loss": 4.1025, "step": 5393 }, { "epoch": 1.7968684933788623, "grad_norm": 0.55078125, "learning_rate": 1.4135762378822355e-05, "loss": 4.0785, "step": 5394 }, { "epoch": 1.7972016323811109, "grad_norm": 0.53125, "learning_rate": 1.4135385116635084e-05, "loss": 4.0039, "step": 5395 }, { "epoch": 1.7975347713833596, "grad_norm": 0.53515625, "learning_rate": 1.4135007777159946e-05, "loss": 4.0343, "step": 5396 }, { "epoch": 1.7978679103856083, "grad_norm": 0.55078125, "learning_rate": 1.413463036040134e-05, "loss": 4.1145, "step": 5397 }, { "epoch": 1.798201049387857, "grad_norm": 0.5625, "learning_rate": 1.413425286636366e-05, "loss": 4.0455, "step": 5398 }, { "epoch": 1.7985341883901058, "grad_norm": 0.56640625, "learning_rate": 1.4133875295051305e-05, "loss": 4.0454, "step": 5399 }, { "epoch": 1.7988673273923546, "grad_norm": 0.53125, "learning_rate": 1.4133497646468672e-05, "loss": 4.1063, "step": 5400 }, { "epoch": 1.7992004663946033, "grad_norm": 0.57421875, "learning_rate": 1.413311992062016e-05, "loss": 4.0751, "step": 5401 }, { "epoch": 1.7995336053968518, "grad_norm": 0.5625, "learning_rate": 1.4132742117510168e-05, "loss": 4.0587, "step": 5402 }, { "epoch": 1.7998667443991005, "grad_norm": 0.53125, "learning_rate": 1.4132364237143095e-05, "loss": 4.1019, "step": 5403 }, { "epoch": 1.800199883401349, "grad_norm": 0.5234375, "learning_rate": 1.4131986279523344e-05, "loss": 4.1145, "step": 5404 }, { "epoch": 1.8005330224035978, "grad_norm": 0.5390625, "learning_rate": 1.413160824465532e-05, "loss": 4.0966, "step": 5405 }, { "epoch": 1.8008661614058465, "grad_norm": 0.5390625, "learning_rate": 1.4131230132543424e-05, "loss": 4.0188, "step": 5406 }, { "epoch": 1.8011993004080953, "grad_norm": 0.54296875, "learning_rate": 1.413085194319206e-05, "loss": 4.0927, "step": 5407 }, { "epoch": 1.801532439410344, "grad_norm": 0.58203125, "learning_rate": 1.4130473676605631e-05, "loss": 4.0225, "step": 5408 }, { "epoch": 1.8018655784125928, "grad_norm": 0.52734375, "learning_rate": 1.4130095332788547e-05, "loss": 4.0803, "step": 5409 }, { "epoch": 1.8021987174148415, "grad_norm": 0.53125, "learning_rate": 1.4129716911745212e-05, "loss": 4.0231, "step": 5410 }, { "epoch": 1.80253185641709, "grad_norm": 0.5390625, "learning_rate": 1.4129338413480035e-05, "loss": 4.1328, "step": 5411 }, { "epoch": 1.8028649954193388, "grad_norm": 0.53125, "learning_rate": 1.4128959837997426e-05, "loss": 4.1151, "step": 5412 }, { "epoch": 1.8031981344215873, "grad_norm": 0.5390625, "learning_rate": 1.4128581185301795e-05, "loss": 4.0751, "step": 5413 }, { "epoch": 1.803531273423836, "grad_norm": 0.5546875, "learning_rate": 1.4128202455397547e-05, "loss": 3.9791, "step": 5414 }, { "epoch": 1.8038644124260848, "grad_norm": 0.55078125, "learning_rate": 1.4127823648289099e-05, "loss": 4.1084, "step": 5415 }, { "epoch": 1.8041975514283335, "grad_norm": 0.5390625, "learning_rate": 1.412744476398086e-05, "loss": 4.1111, "step": 5416 }, { "epoch": 1.8045306904305822, "grad_norm": 0.55078125, "learning_rate": 1.4127065802477244e-05, "loss": 4.0919, "step": 5417 }, { "epoch": 1.804863829432831, "grad_norm": 0.5546875, "learning_rate": 1.4126686763782668e-05, "loss": 4.0343, "step": 5418 }, { "epoch": 1.8051969684350795, "grad_norm": 0.5390625, "learning_rate": 1.4126307647901542e-05, "loss": 4.0328, "step": 5419 }, { "epoch": 1.8055301074373282, "grad_norm": 0.55078125, "learning_rate": 1.4125928454838286e-05, "loss": 4.0032, "step": 5420 }, { "epoch": 1.805863246439577, "grad_norm": 0.55078125, "learning_rate": 1.4125549184597315e-05, "loss": 4.0694, "step": 5421 }, { "epoch": 1.8061963854418255, "grad_norm": 0.53515625, "learning_rate": 1.4125169837183047e-05, "loss": 4.0758, "step": 5422 }, { "epoch": 1.8065295244440742, "grad_norm": 0.55859375, "learning_rate": 1.4124790412599901e-05, "loss": 4.0412, "step": 5423 }, { "epoch": 1.806862663446323, "grad_norm": 0.53125, "learning_rate": 1.4124410910852295e-05, "loss": 4.106, "step": 5424 }, { "epoch": 1.8071958024485717, "grad_norm": 0.58984375, "learning_rate": 1.412403133194465e-05, "loss": 4.0435, "step": 5425 }, { "epoch": 1.8075289414508204, "grad_norm": 0.5625, "learning_rate": 1.4123651675881388e-05, "loss": 4.1359, "step": 5426 }, { "epoch": 1.8078620804530692, "grad_norm": 0.546875, "learning_rate": 1.4123271942666933e-05, "loss": 4.0838, "step": 5427 }, { "epoch": 1.8081952194553177, "grad_norm": 0.53125, "learning_rate": 1.4122892132305704e-05, "loss": 4.0936, "step": 5428 }, { "epoch": 1.8085283584575664, "grad_norm": 0.5546875, "learning_rate": 1.4122512244802126e-05, "loss": 4.0718, "step": 5429 }, { "epoch": 1.808861497459815, "grad_norm": 0.5078125, "learning_rate": 1.4122132280160625e-05, "loss": 4.0794, "step": 5430 }, { "epoch": 1.8091946364620637, "grad_norm": 0.546875, "learning_rate": 1.4121752238385628e-05, "loss": 4.119, "step": 5431 }, { "epoch": 1.8095277754643124, "grad_norm": 0.56640625, "learning_rate": 1.412137211948156e-05, "loss": 4.0195, "step": 5432 }, { "epoch": 1.8098609144665612, "grad_norm": 0.515625, "learning_rate": 1.4120991923452845e-05, "loss": 4.0551, "step": 5433 }, { "epoch": 1.81019405346881, "grad_norm": 0.57421875, "learning_rate": 1.412061165030392e-05, "loss": 4.0739, "step": 5434 }, { "epoch": 1.8105271924710586, "grad_norm": 0.55078125, "learning_rate": 1.4120231300039206e-05, "loss": 4.019, "step": 5435 }, { "epoch": 1.8108603314733074, "grad_norm": 0.546875, "learning_rate": 1.4119850872663138e-05, "loss": 4.0388, "step": 5436 }, { "epoch": 1.811193470475556, "grad_norm": 0.54296875, "learning_rate": 1.4119470368180145e-05, "loss": 4.0831, "step": 5437 }, { "epoch": 1.8115266094778046, "grad_norm": 0.54296875, "learning_rate": 1.4119089786594663e-05, "loss": 4.0345, "step": 5438 }, { "epoch": 1.8118597484800532, "grad_norm": 0.55859375, "learning_rate": 1.4118709127911118e-05, "loss": 4.0636, "step": 5439 }, { "epoch": 1.812192887482302, "grad_norm": 0.5703125, "learning_rate": 1.4118328392133952e-05, "loss": 4.1413, "step": 5440 }, { "epoch": 1.8125260264845506, "grad_norm": 0.546875, "learning_rate": 1.4117947579267592e-05, "loss": 3.9882, "step": 5441 }, { "epoch": 1.8128591654867994, "grad_norm": 0.5234375, "learning_rate": 1.411756668931648e-05, "loss": 4.0728, "step": 5442 }, { "epoch": 1.8131923044890481, "grad_norm": 0.53515625, "learning_rate": 1.4117185722285047e-05, "loss": 4.0439, "step": 5443 }, { "epoch": 1.8135254434912969, "grad_norm": 0.54296875, "learning_rate": 1.4116804678177735e-05, "loss": 4.1405, "step": 5444 }, { "epoch": 1.8138585824935456, "grad_norm": 0.5390625, "learning_rate": 1.4116423556998978e-05, "loss": 4.0902, "step": 5445 }, { "epoch": 1.814191721495794, "grad_norm": 0.5390625, "learning_rate": 1.4116042358753219e-05, "loss": 4.1307, "step": 5446 }, { "epoch": 1.8145248604980428, "grad_norm": 0.5625, "learning_rate": 1.4115661083444898e-05, "loss": 4.0759, "step": 5447 }, { "epoch": 1.8148579995002914, "grad_norm": 0.5390625, "learning_rate": 1.4115279731078456e-05, "loss": 4.0873, "step": 5448 }, { "epoch": 1.81519113850254, "grad_norm": 0.54296875, "learning_rate": 1.411489830165833e-05, "loss": 4.017, "step": 5449 }, { "epoch": 1.8155242775047888, "grad_norm": 0.55859375, "learning_rate": 1.4114516795188968e-05, "loss": 4.024, "step": 5450 }, { "epoch": 1.8158574165070376, "grad_norm": 0.52734375, "learning_rate": 1.4114135211674814e-05, "loss": 4.0988, "step": 5451 }, { "epoch": 1.8161905555092863, "grad_norm": 0.55859375, "learning_rate": 1.4113753551120308e-05, "loss": 4.1173, "step": 5452 }, { "epoch": 1.816523694511535, "grad_norm": 0.5390625, "learning_rate": 1.4113371813529903e-05, "loss": 4.0638, "step": 5453 }, { "epoch": 1.8168568335137836, "grad_norm": 0.53125, "learning_rate": 1.4112989998908037e-05, "loss": 4.0127, "step": 5454 }, { "epoch": 1.8171899725160323, "grad_norm": 0.5234375, "learning_rate": 1.4112608107259163e-05, "loss": 4.0814, "step": 5455 }, { "epoch": 1.817523111518281, "grad_norm": 0.53125, "learning_rate": 1.4112226138587727e-05, "loss": 4.0245, "step": 5456 }, { "epoch": 1.8178562505205296, "grad_norm": 0.54296875, "learning_rate": 1.4111844092898177e-05, "loss": 4.0217, "step": 5457 }, { "epoch": 1.8181893895227783, "grad_norm": 0.53515625, "learning_rate": 1.4111461970194967e-05, "loss": 4.0681, "step": 5458 }, { "epoch": 1.818522528525027, "grad_norm": 0.55859375, "learning_rate": 1.4111079770482545e-05, "loss": 4.0879, "step": 5459 }, { "epoch": 1.8188556675272758, "grad_norm": 0.546875, "learning_rate": 1.4110697493765362e-05, "loss": 4.0329, "step": 5460 }, { "epoch": 1.8191888065295245, "grad_norm": 0.51171875, "learning_rate": 1.4110315140047872e-05, "loss": 4.1098, "step": 5461 }, { "epoch": 1.8195219455317733, "grad_norm": 0.5546875, "learning_rate": 1.4109932709334529e-05, "loss": 4.0806, "step": 5462 }, { "epoch": 1.8198550845340218, "grad_norm": 0.53125, "learning_rate": 1.4109550201629789e-05, "loss": 4.139, "step": 5463 }, { "epoch": 1.8201882235362705, "grad_norm": 0.53515625, "learning_rate": 1.41091676169381e-05, "loss": 4.0667, "step": 5464 }, { "epoch": 1.820521362538519, "grad_norm": 0.53125, "learning_rate": 1.410878495526393e-05, "loss": 4.0954, "step": 5465 }, { "epoch": 1.8208545015407678, "grad_norm": 0.5390625, "learning_rate": 1.4108402216611728e-05, "loss": 4.1109, "step": 5466 }, { "epoch": 1.8211876405430165, "grad_norm": 0.546875, "learning_rate": 1.4108019400985953e-05, "loss": 4.0629, "step": 5467 }, { "epoch": 1.8215207795452653, "grad_norm": 0.546875, "learning_rate": 1.4107636508391063e-05, "loss": 4.0423, "step": 5468 }, { "epoch": 1.821853918547514, "grad_norm": 0.54296875, "learning_rate": 1.4107253538831523e-05, "loss": 4.0628, "step": 5469 }, { "epoch": 1.8221870575497627, "grad_norm": 0.54296875, "learning_rate": 1.4106870492311788e-05, "loss": 4.0471, "step": 5470 }, { "epoch": 1.8225201965520115, "grad_norm": 0.5703125, "learning_rate": 1.4106487368836325e-05, "loss": 4.0351, "step": 5471 }, { "epoch": 1.82285333555426, "grad_norm": 0.5546875, "learning_rate": 1.4106104168409592e-05, "loss": 4.0126, "step": 5472 }, { "epoch": 1.8231864745565087, "grad_norm": 0.57421875, "learning_rate": 1.4105720891036054e-05, "loss": 3.9431, "step": 5473 }, { "epoch": 1.8235196135587572, "grad_norm": 0.55078125, "learning_rate": 1.4105337536720177e-05, "loss": 4.0537, "step": 5474 }, { "epoch": 1.823852752561006, "grad_norm": 0.5234375, "learning_rate": 1.4104954105466426e-05, "loss": 4.0806, "step": 5475 }, { "epoch": 1.8241858915632547, "grad_norm": 0.5546875, "learning_rate": 1.4104570597279262e-05, "loss": 4.0505, "step": 5476 }, { "epoch": 1.8245190305655035, "grad_norm": 0.53515625, "learning_rate": 1.4104187012163158e-05, "loss": 4.0661, "step": 5477 }, { "epoch": 1.8248521695677522, "grad_norm": 0.52734375, "learning_rate": 1.410380335012258e-05, "loss": 4.0359, "step": 5478 }, { "epoch": 1.825185308570001, "grad_norm": 0.54296875, "learning_rate": 1.4103419611161996e-05, "loss": 4.0135, "step": 5479 }, { "epoch": 1.8255184475722497, "grad_norm": 0.54296875, "learning_rate": 1.4103035795285878e-05, "loss": 4.0531, "step": 5480 }, { "epoch": 1.8258515865744982, "grad_norm": 0.5625, "learning_rate": 1.4102651902498695e-05, "loss": 3.9931, "step": 5481 }, { "epoch": 1.826184725576747, "grad_norm": 0.54296875, "learning_rate": 1.4102267932804918e-05, "loss": 4.0642, "step": 5482 }, { "epoch": 1.8265178645789955, "grad_norm": 0.56640625, "learning_rate": 1.410188388620902e-05, "loss": 4.0519, "step": 5483 }, { "epoch": 1.8268510035812442, "grad_norm": 0.55859375, "learning_rate": 1.4101499762715474e-05, "loss": 4.1183, "step": 5484 }, { "epoch": 1.827184142583493, "grad_norm": 0.5234375, "learning_rate": 1.4101115562328755e-05, "loss": 4.0377, "step": 5485 }, { "epoch": 1.8275172815857417, "grad_norm": 0.55859375, "learning_rate": 1.4100731285053338e-05, "loss": 4.0151, "step": 5486 }, { "epoch": 1.8278504205879904, "grad_norm": 0.53125, "learning_rate": 1.4100346930893699e-05, "loss": 4.0601, "step": 5487 }, { "epoch": 1.8281835595902391, "grad_norm": 0.53125, "learning_rate": 1.4099962499854315e-05, "loss": 4.1067, "step": 5488 }, { "epoch": 1.8285166985924877, "grad_norm": 0.546875, "learning_rate": 1.4099577991939663e-05, "loss": 4.0931, "step": 5489 }, { "epoch": 1.8288498375947364, "grad_norm": 0.5703125, "learning_rate": 1.4099193407154221e-05, "loss": 4.0773, "step": 5490 }, { "epoch": 1.8291829765969851, "grad_norm": 0.54296875, "learning_rate": 1.409880874550247e-05, "loss": 4.0775, "step": 5491 }, { "epoch": 1.8295161155992337, "grad_norm": 0.51953125, "learning_rate": 1.4098424006988889e-05, "loss": 4.0722, "step": 5492 }, { "epoch": 1.8298492546014824, "grad_norm": 0.5390625, "learning_rate": 1.4098039191617965e-05, "loss": 4.0291, "step": 5493 }, { "epoch": 1.8301823936037311, "grad_norm": 0.53515625, "learning_rate": 1.4097654299394171e-05, "loss": 4.0422, "step": 5494 }, { "epoch": 1.8305155326059799, "grad_norm": 0.54296875, "learning_rate": 1.4097269330321995e-05, "loss": 4.0513, "step": 5495 }, { "epoch": 1.8308486716082286, "grad_norm": 0.55078125, "learning_rate": 1.4096884284405923e-05, "loss": 4.0944, "step": 5496 }, { "epoch": 1.8311818106104774, "grad_norm": 0.5390625, "learning_rate": 1.4096499161650436e-05, "loss": 4.0356, "step": 5497 }, { "epoch": 1.8315149496127259, "grad_norm": 0.56640625, "learning_rate": 1.4096113962060026e-05, "loss": 4.0751, "step": 5498 }, { "epoch": 1.8318480886149746, "grad_norm": 0.54296875, "learning_rate": 1.4095728685639172e-05, "loss": 4.0706, "step": 5499 }, { "epoch": 1.8321812276172231, "grad_norm": 0.5703125, "learning_rate": 1.4095343332392366e-05, "loss": 4.0242, "step": 5500 }, { "epoch": 1.8325143666194719, "grad_norm": 0.5625, "learning_rate": 1.4094957902324096e-05, "loss": 4.0073, "step": 5501 }, { "epoch": 1.8328475056217206, "grad_norm": 0.546875, "learning_rate": 1.4094572395438848e-05, "loss": 4.0564, "step": 5502 }, { "epoch": 1.8331806446239693, "grad_norm": 0.5546875, "learning_rate": 1.409418681174112e-05, "loss": 4.0359, "step": 5503 }, { "epoch": 1.833513783626218, "grad_norm": 0.5390625, "learning_rate": 1.4093801151235396e-05, "loss": 4.0849, "step": 5504 }, { "epoch": 1.8338469226284668, "grad_norm": 0.55078125, "learning_rate": 1.4093415413926172e-05, "loss": 4.0247, "step": 5505 }, { "epoch": 1.8341800616307156, "grad_norm": 0.57421875, "learning_rate": 1.4093029599817936e-05, "loss": 4.0601, "step": 5506 }, { "epoch": 1.834513200632964, "grad_norm": 0.55859375, "learning_rate": 1.409264370891519e-05, "loss": 4.0029, "step": 5507 }, { "epoch": 1.8348463396352128, "grad_norm": 0.5390625, "learning_rate": 1.4092257741222423e-05, "loss": 4.0424, "step": 5508 }, { "epoch": 1.8351794786374613, "grad_norm": 0.546875, "learning_rate": 1.4091871696744132e-05, "loss": 4.0446, "step": 5509 }, { "epoch": 1.83551261763971, "grad_norm": 0.53515625, "learning_rate": 1.4091485575484814e-05, "loss": 4.0853, "step": 5510 }, { "epoch": 1.8358457566419588, "grad_norm": 0.515625, "learning_rate": 1.4091099377448968e-05, "loss": 4.066, "step": 5511 }, { "epoch": 1.8361788956442076, "grad_norm": 0.55859375, "learning_rate": 1.4090713102641087e-05, "loss": 3.9973, "step": 5512 }, { "epoch": 1.8365120346464563, "grad_norm": 0.53515625, "learning_rate": 1.4090326751065675e-05, "loss": 4.1717, "step": 5513 }, { "epoch": 1.836845173648705, "grad_norm": 0.55078125, "learning_rate": 1.4089940322727232e-05, "loss": 4.0795, "step": 5514 }, { "epoch": 1.8371783126509538, "grad_norm": 0.53515625, "learning_rate": 1.4089553817630256e-05, "loss": 4.0628, "step": 5515 }, { "epoch": 1.8375114516532023, "grad_norm": 0.546875, "learning_rate": 1.4089167235779255e-05, "loss": 4.0524, "step": 5516 }, { "epoch": 1.837844590655451, "grad_norm": 0.5625, "learning_rate": 1.4088780577178725e-05, "loss": 4.1316, "step": 5517 }, { "epoch": 1.8381777296576995, "grad_norm": 0.55859375, "learning_rate": 1.4088393841833173e-05, "loss": 4.0941, "step": 5518 }, { "epoch": 1.8385108686599483, "grad_norm": 0.5234375, "learning_rate": 1.4088007029747101e-05, "loss": 4.1295, "step": 5519 }, { "epoch": 1.838844007662197, "grad_norm": 0.53125, "learning_rate": 1.4087620140925019e-05, "loss": 4.0795, "step": 5520 }, { "epoch": 1.8391771466644458, "grad_norm": 0.53125, "learning_rate": 1.4087233175371433e-05, "loss": 4.0939, "step": 5521 }, { "epoch": 1.8395102856666945, "grad_norm": 0.52734375, "learning_rate": 1.4086846133090844e-05, "loss": 4.0226, "step": 5522 }, { "epoch": 1.8398434246689432, "grad_norm": 0.53515625, "learning_rate": 1.4086459014087767e-05, "loss": 4.1162, "step": 5523 }, { "epoch": 1.8401765636711918, "grad_norm": 0.54296875, "learning_rate": 1.4086071818366707e-05, "loss": 4.0944, "step": 5524 }, { "epoch": 1.8405097026734405, "grad_norm": 0.5546875, "learning_rate": 1.408568454593218e-05, "loss": 3.9984, "step": 5525 }, { "epoch": 1.8408428416756892, "grad_norm": 0.52734375, "learning_rate": 1.408529719678869e-05, "loss": 4.0505, "step": 5526 }, { "epoch": 1.8411759806779378, "grad_norm": 0.55078125, "learning_rate": 1.408490977094075e-05, "loss": 4.0578, "step": 5527 }, { "epoch": 1.8415091196801865, "grad_norm": 0.515625, "learning_rate": 1.4084522268392875e-05, "loss": 4.1264, "step": 5528 }, { "epoch": 1.8418422586824352, "grad_norm": 0.5625, "learning_rate": 1.4084134689149578e-05, "loss": 4.065, "step": 5529 }, { "epoch": 1.842175397684684, "grad_norm": 0.5390625, "learning_rate": 1.4083747033215373e-05, "loss": 4.0469, "step": 5530 }, { "epoch": 1.8425085366869327, "grad_norm": 0.54296875, "learning_rate": 1.4083359300594775e-05, "loss": 4.0223, "step": 5531 }, { "epoch": 1.8428416756891814, "grad_norm": 0.5546875, "learning_rate": 1.4082971491292304e-05, "loss": 4.0168, "step": 5532 }, { "epoch": 1.84317481469143, "grad_norm": 0.55078125, "learning_rate": 1.408258360531247e-05, "loss": 4.0428, "step": 5533 }, { "epoch": 1.8435079536936787, "grad_norm": 0.53125, "learning_rate": 1.4082195642659794e-05, "loss": 4.0694, "step": 5534 }, { "epoch": 1.8438410926959272, "grad_norm": 0.55078125, "learning_rate": 1.4081807603338799e-05, "loss": 4.0817, "step": 5535 }, { "epoch": 1.844174231698176, "grad_norm": 0.54296875, "learning_rate": 1.4081419487354e-05, "loss": 4.0668, "step": 5536 }, { "epoch": 1.8445073707004247, "grad_norm": 0.5859375, "learning_rate": 1.408103129470992e-05, "loss": 3.9758, "step": 5537 }, { "epoch": 1.8448405097026734, "grad_norm": 0.55859375, "learning_rate": 1.408064302541108e-05, "loss": 4.0412, "step": 5538 }, { "epoch": 1.8451736487049222, "grad_norm": 0.5546875, "learning_rate": 1.4080254679462e-05, "loss": 4.0365, "step": 5539 }, { "epoch": 1.845506787707171, "grad_norm": 0.55859375, "learning_rate": 1.4079866256867208e-05, "loss": 4.0089, "step": 5540 }, { "epoch": 1.8458399267094197, "grad_norm": 0.5390625, "learning_rate": 1.4079477757631225e-05, "loss": 4.1454, "step": 5541 }, { "epoch": 1.8461730657116682, "grad_norm": 0.55078125, "learning_rate": 1.407908918175858e-05, "loss": 4.0331, "step": 5542 }, { "epoch": 1.846506204713917, "grad_norm": 0.57421875, "learning_rate": 1.4078700529253793e-05, "loss": 4.0689, "step": 5543 }, { "epoch": 1.8468393437161654, "grad_norm": 0.53125, "learning_rate": 1.4078311800121395e-05, "loss": 4.1348, "step": 5544 }, { "epoch": 1.8471724827184142, "grad_norm": 0.5625, "learning_rate": 1.4077922994365914e-05, "loss": 4.0357, "step": 5545 }, { "epoch": 1.847505621720663, "grad_norm": 0.58984375, "learning_rate": 1.4077534111991879e-05, "loss": 4.037, "step": 5546 }, { "epoch": 1.8478387607229116, "grad_norm": 0.546875, "learning_rate": 1.4077145153003816e-05, "loss": 4.0113, "step": 5547 }, { "epoch": 1.8481718997251604, "grad_norm": 0.546875, "learning_rate": 1.407675611740626e-05, "loss": 4.0857, "step": 5548 }, { "epoch": 1.8485050387274091, "grad_norm": 0.5546875, "learning_rate": 1.407636700520374e-05, "loss": 4.0601, "step": 5549 }, { "epoch": 1.8488381777296579, "grad_norm": 0.67578125, "learning_rate": 1.4075977816400788e-05, "loss": 4.0344, "step": 5550 }, { "epoch": 1.8491713167319064, "grad_norm": 0.56640625, "learning_rate": 1.4075588551001937e-05, "loss": 3.9895, "step": 5551 }, { "epoch": 1.8495044557341551, "grad_norm": 0.55859375, "learning_rate": 1.4075199209011723e-05, "loss": 4.0981, "step": 5552 }, { "epoch": 1.8498375947364036, "grad_norm": 0.53125, "learning_rate": 1.407480979043468e-05, "loss": 4.0504, "step": 5553 }, { "epoch": 1.8501707337386524, "grad_norm": 0.5859375, "learning_rate": 1.4074420295275345e-05, "loss": 4.06, "step": 5554 }, { "epoch": 1.8505038727409011, "grad_norm": 0.55078125, "learning_rate": 1.4074030723538252e-05, "loss": 4.0814, "step": 5555 }, { "epoch": 1.8508370117431499, "grad_norm": 0.5625, "learning_rate": 1.4073641075227944e-05, "loss": 4.0231, "step": 5556 }, { "epoch": 1.8511701507453986, "grad_norm": 0.5546875, "learning_rate": 1.4073251350348953e-05, "loss": 4.028, "step": 5557 }, { "epoch": 1.8515032897476473, "grad_norm": 0.57421875, "learning_rate": 1.4072861548905822e-05, "loss": 3.978, "step": 5558 }, { "epoch": 1.8518364287498958, "grad_norm": 0.5390625, "learning_rate": 1.4072471670903092e-05, "loss": 4.0558, "step": 5559 }, { "epoch": 1.8521695677521446, "grad_norm": 0.546875, "learning_rate": 1.40720817163453e-05, "loss": 4.0534, "step": 5560 }, { "epoch": 1.8525027067543933, "grad_norm": 0.55859375, "learning_rate": 1.4071691685236993e-05, "loss": 4.1526, "step": 5561 }, { "epoch": 1.8528358457566418, "grad_norm": 0.546875, "learning_rate": 1.4071301577582713e-05, "loss": 4.1072, "step": 5562 }, { "epoch": 1.8531689847588906, "grad_norm": 0.57421875, "learning_rate": 1.4070911393387e-05, "loss": 4.0877, "step": 5563 }, { "epoch": 1.8535021237611393, "grad_norm": 0.5234375, "learning_rate": 1.4070521132654403e-05, "loss": 4.1148, "step": 5564 }, { "epoch": 1.853835262763388, "grad_norm": 0.54296875, "learning_rate": 1.4070130795389468e-05, "loss": 4.1271, "step": 5565 }, { "epoch": 1.8541684017656368, "grad_norm": 0.5625, "learning_rate": 1.4069740381596741e-05, "loss": 4.0792, "step": 5566 }, { "epoch": 1.8545015407678855, "grad_norm": 0.55078125, "learning_rate": 1.4069349891280764e-05, "loss": 4.0909, "step": 5567 }, { "epoch": 1.854834679770134, "grad_norm": 0.5390625, "learning_rate": 1.4068959324446094e-05, "loss": 4.1098, "step": 5568 }, { "epoch": 1.8551678187723828, "grad_norm": 0.5625, "learning_rate": 1.4068568681097275e-05, "loss": 3.9909, "step": 5569 }, { "epoch": 1.8555009577746313, "grad_norm": 0.5625, "learning_rate": 1.406817796123886e-05, "loss": 4.0616, "step": 5570 }, { "epoch": 1.85583409677688, "grad_norm": 0.5390625, "learning_rate": 1.4067787164875397e-05, "loss": 4.0012, "step": 5571 }, { "epoch": 1.8561672357791288, "grad_norm": 0.53515625, "learning_rate": 1.406739629201144e-05, "loss": 4.0066, "step": 5572 }, { "epoch": 1.8565003747813775, "grad_norm": 0.55078125, "learning_rate": 1.4067005342651539e-05, "loss": 4.0725, "step": 5573 }, { "epoch": 1.8568335137836263, "grad_norm": 0.54296875, "learning_rate": 1.4066614316800253e-05, "loss": 4.0386, "step": 5574 }, { "epoch": 1.857166652785875, "grad_norm": 0.59375, "learning_rate": 1.406622321446213e-05, "loss": 4.0599, "step": 5575 }, { "epoch": 1.8574997917881237, "grad_norm": 0.546875, "learning_rate": 1.4065832035641731e-05, "loss": 4.0381, "step": 5576 }, { "epoch": 1.8578329307903723, "grad_norm": 0.59765625, "learning_rate": 1.406544078034361e-05, "loss": 4.0436, "step": 5577 }, { "epoch": 1.858166069792621, "grad_norm": 0.546875, "learning_rate": 1.4065049448572324e-05, "loss": 3.9525, "step": 5578 }, { "epoch": 1.8584992087948695, "grad_norm": 0.5390625, "learning_rate": 1.4064658040332434e-05, "loss": 4.0619, "step": 5579 }, { "epoch": 1.8588323477971183, "grad_norm": 0.56640625, "learning_rate": 1.4064266555628493e-05, "loss": 4.0016, "step": 5580 }, { "epoch": 1.859165486799367, "grad_norm": 0.59765625, "learning_rate": 1.406387499446507e-05, "loss": 4.0299, "step": 5581 }, { "epoch": 1.8594986258016157, "grad_norm": 0.55078125, "learning_rate": 1.4063483356846716e-05, "loss": 4.0784, "step": 5582 }, { "epoch": 1.8598317648038645, "grad_norm": 0.5625, "learning_rate": 1.4063091642778e-05, "loss": 4.0505, "step": 5583 }, { "epoch": 1.8601649038061132, "grad_norm": 0.546875, "learning_rate": 1.4062699852263481e-05, "loss": 4.13, "step": 5584 }, { "epoch": 1.860498042808362, "grad_norm": 0.56640625, "learning_rate": 1.4062307985307723e-05, "loss": 4.0714, "step": 5585 }, { "epoch": 1.8608311818106105, "grad_norm": 0.54296875, "learning_rate": 1.4061916041915292e-05, "loss": 4.096, "step": 5586 }, { "epoch": 1.8611643208128592, "grad_norm": 0.55859375, "learning_rate": 1.4061524022090753e-05, "loss": 4.082, "step": 5587 }, { "epoch": 1.8614974598151077, "grad_norm": 0.5390625, "learning_rate": 1.4061131925838668e-05, "loss": 4.111, "step": 5588 }, { "epoch": 1.8618305988173565, "grad_norm": 0.5625, "learning_rate": 1.406073975316361e-05, "loss": 4.0005, "step": 5589 }, { "epoch": 1.8621637378196052, "grad_norm": 0.515625, "learning_rate": 1.4060347504070146e-05, "loss": 4.1323, "step": 5590 }, { "epoch": 1.862496876821854, "grad_norm": 0.53515625, "learning_rate": 1.4059955178562839e-05, "loss": 4.0372, "step": 5591 }, { "epoch": 1.8628300158241027, "grad_norm": 0.5390625, "learning_rate": 1.4059562776646265e-05, "loss": 4.0229, "step": 5592 }, { "epoch": 1.8631631548263514, "grad_norm": 0.54296875, "learning_rate": 1.4059170298324994e-05, "loss": 4.0667, "step": 5593 }, { "epoch": 1.8634962938286, "grad_norm": 0.54296875, "learning_rate": 1.4058777743603598e-05, "loss": 4.0902, "step": 5594 }, { "epoch": 1.8638294328308487, "grad_norm": 0.54296875, "learning_rate": 1.4058385112486641e-05, "loss": 4.1338, "step": 5595 }, { "epoch": 1.8641625718330974, "grad_norm": 0.5625, "learning_rate": 1.4057992404978707e-05, "loss": 4.0415, "step": 5596 }, { "epoch": 1.864495710835346, "grad_norm": 0.54296875, "learning_rate": 1.4057599621084365e-05, "loss": 4.1341, "step": 5597 }, { "epoch": 1.8648288498375947, "grad_norm": 0.53515625, "learning_rate": 1.4057206760808193e-05, "loss": 4.1402, "step": 5598 }, { "epoch": 1.8651619888398434, "grad_norm": 0.53515625, "learning_rate": 1.4056813824154763e-05, "loss": 4.0951, "step": 5599 }, { "epoch": 1.8654951278420921, "grad_norm": 0.58203125, "learning_rate": 1.4056420811128655e-05, "loss": 3.9784, "step": 5600 }, { "epoch": 1.8658282668443409, "grad_norm": 0.55078125, "learning_rate": 1.4056027721734446e-05, "loss": 4.0783, "step": 5601 }, { "epoch": 1.8661614058465896, "grad_norm": 0.5390625, "learning_rate": 1.4055634555976715e-05, "loss": 4.0874, "step": 5602 }, { "epoch": 1.8664945448488381, "grad_norm": 0.56640625, "learning_rate": 1.4055241313860038e-05, "loss": 3.9824, "step": 5603 }, { "epoch": 1.8668276838510869, "grad_norm": 0.546875, "learning_rate": 1.4054847995389001e-05, "loss": 4.0881, "step": 5604 }, { "epoch": 1.8671608228533354, "grad_norm": 0.55078125, "learning_rate": 1.4054454600568182e-05, "loss": 3.9564, "step": 5605 }, { "epoch": 1.8674939618555841, "grad_norm": 0.53125, "learning_rate": 1.4054061129402165e-05, "loss": 4.0738, "step": 5606 }, { "epoch": 1.8678271008578329, "grad_norm": 0.5625, "learning_rate": 1.405366758189553e-05, "loss": 4.0762, "step": 5607 }, { "epoch": 1.8681602398600816, "grad_norm": 0.515625, "learning_rate": 1.4053273958052864e-05, "loss": 4.0803, "step": 5608 }, { "epoch": 1.8684933788623304, "grad_norm": 0.5390625, "learning_rate": 1.405288025787875e-05, "loss": 4.0855, "step": 5609 }, { "epoch": 1.868826517864579, "grad_norm": 0.50390625, "learning_rate": 1.4052486481377773e-05, "loss": 4.0925, "step": 5610 }, { "epoch": 1.8691596568668278, "grad_norm": 0.51953125, "learning_rate": 1.4052092628554524e-05, "loss": 4.0439, "step": 5611 }, { "epoch": 1.8694927958690764, "grad_norm": 0.54296875, "learning_rate": 1.4051698699413588e-05, "loss": 4.0891, "step": 5612 }, { "epoch": 1.869825934871325, "grad_norm": 0.5546875, "learning_rate": 1.4051304693959552e-05, "loss": 4.0509, "step": 5613 }, { "epoch": 1.8701590738735736, "grad_norm": 0.54296875, "learning_rate": 1.4050910612197005e-05, "loss": 4.1034, "step": 5614 }, { "epoch": 1.8704922128758223, "grad_norm": 0.5546875, "learning_rate": 1.4050516454130539e-05, "loss": 4.0213, "step": 5615 }, { "epoch": 1.870825351878071, "grad_norm": 0.546875, "learning_rate": 1.4050122219764746e-05, "loss": 3.9674, "step": 5616 }, { "epoch": 1.8711584908803198, "grad_norm": 0.55078125, "learning_rate": 1.4049727909104215e-05, "loss": 4.0429, "step": 5617 }, { "epoch": 1.8714916298825686, "grad_norm": 0.5546875, "learning_rate": 1.4049333522153543e-05, "loss": 3.9818, "step": 5618 }, { "epoch": 1.8718247688848173, "grad_norm": 0.546875, "learning_rate": 1.4048939058917318e-05, "loss": 4.0402, "step": 5619 }, { "epoch": 1.872157907887066, "grad_norm": 0.51953125, "learning_rate": 1.4048544519400142e-05, "loss": 4.0975, "step": 5620 }, { "epoch": 1.8724910468893146, "grad_norm": 0.55078125, "learning_rate": 1.4048149903606602e-05, "loss": 4.1317, "step": 5621 }, { "epoch": 1.8728241858915633, "grad_norm": 0.57421875, "learning_rate": 1.4047755211541301e-05, "loss": 4.0927, "step": 5622 }, { "epoch": 1.8731573248938118, "grad_norm": 0.55078125, "learning_rate": 1.4047360443208835e-05, "loss": 4.0835, "step": 5623 }, { "epoch": 1.8734904638960606, "grad_norm": 0.54296875, "learning_rate": 1.4046965598613802e-05, "loss": 4.0335, "step": 5624 }, { "epoch": 1.8738236028983093, "grad_norm": 0.5546875, "learning_rate": 1.4046570677760799e-05, "loss": 4.1039, "step": 5625 }, { "epoch": 1.874156741900558, "grad_norm": 0.5625, "learning_rate": 1.4046175680654429e-05, "loss": 4.0371, "step": 5626 }, { "epoch": 1.8744898809028068, "grad_norm": 0.5390625, "learning_rate": 1.404578060729929e-05, "loss": 4.031, "step": 5627 }, { "epoch": 1.8748230199050555, "grad_norm": 0.52734375, "learning_rate": 1.4045385457699984e-05, "loss": 4.0811, "step": 5628 }, { "epoch": 1.875156158907304, "grad_norm": 0.51953125, "learning_rate": 1.4044990231861118e-05, "loss": 4.0892, "step": 5629 }, { "epoch": 1.8754892979095528, "grad_norm": 0.546875, "learning_rate": 1.404459492978729e-05, "loss": 4.0909, "step": 5630 }, { "epoch": 1.8758224369118013, "grad_norm": 0.56640625, "learning_rate": 1.4044199551483107e-05, "loss": 3.994, "step": 5631 }, { "epoch": 1.87615557591405, "grad_norm": 0.53515625, "learning_rate": 1.4043804096953172e-05, "loss": 4.0396, "step": 5632 }, { "epoch": 1.8764887149162988, "grad_norm": 0.5234375, "learning_rate": 1.4043408566202096e-05, "loss": 4.0812, "step": 5633 }, { "epoch": 1.8768218539185475, "grad_norm": 0.5703125, "learning_rate": 1.4043012959234481e-05, "loss": 4.0365, "step": 5634 }, { "epoch": 1.8771549929207962, "grad_norm": 0.55859375, "learning_rate": 1.4042617276054939e-05, "loss": 4.0679, "step": 5635 }, { "epoch": 1.877488131923045, "grad_norm": 0.5625, "learning_rate": 1.4042221516668075e-05, "loss": 4.0756, "step": 5636 }, { "epoch": 1.8778212709252937, "grad_norm": 0.5390625, "learning_rate": 1.4041825681078502e-05, "loss": 4.0121, "step": 5637 }, { "epoch": 1.8781544099275422, "grad_norm": 0.5625, "learning_rate": 1.404142976929083e-05, "loss": 4.0674, "step": 5638 }, { "epoch": 1.878487548929791, "grad_norm": 0.546875, "learning_rate": 1.4041033781309669e-05, "loss": 4.008, "step": 5639 }, { "epoch": 1.8788206879320395, "grad_norm": 0.5390625, "learning_rate": 1.4040637717139634e-05, "loss": 4.0674, "step": 5640 }, { "epoch": 1.8791538269342882, "grad_norm": 0.55078125, "learning_rate": 1.4040241576785334e-05, "loss": 4.0281, "step": 5641 }, { "epoch": 1.879486965936537, "grad_norm": 0.53125, "learning_rate": 1.4039845360251387e-05, "loss": 4.0803, "step": 5642 }, { "epoch": 1.8798201049387857, "grad_norm": 0.5546875, "learning_rate": 1.4039449067542406e-05, "loss": 4.0234, "step": 5643 }, { "epoch": 1.8801532439410344, "grad_norm": 0.5234375, "learning_rate": 1.403905269866301e-05, "loss": 4.1043, "step": 5644 }, { "epoch": 1.8804863829432832, "grad_norm": 0.53515625, "learning_rate": 1.403865625361781e-05, "loss": 4.0005, "step": 5645 }, { "epoch": 1.880819521945532, "grad_norm": 0.59375, "learning_rate": 1.4038259732411431e-05, "loss": 3.9798, "step": 5646 }, { "epoch": 1.8811526609477804, "grad_norm": 0.53125, "learning_rate": 1.4037863135048486e-05, "loss": 4.0948, "step": 5647 }, { "epoch": 1.8814857999500292, "grad_norm": 0.5390625, "learning_rate": 1.4037466461533597e-05, "loss": 4.0276, "step": 5648 }, { "epoch": 1.8818189389522777, "grad_norm": 0.5390625, "learning_rate": 1.4037069711871383e-05, "loss": 4.0575, "step": 5649 }, { "epoch": 1.8821520779545264, "grad_norm": 0.55078125, "learning_rate": 1.4036672886066467e-05, "loss": 4.05, "step": 5650 }, { "epoch": 1.8824852169567752, "grad_norm": 0.546875, "learning_rate": 1.4036275984123471e-05, "loss": 4.0806, "step": 5651 }, { "epoch": 1.882818355959024, "grad_norm": 0.5546875, "learning_rate": 1.4035879006047017e-05, "loss": 4.0401, "step": 5652 }, { "epoch": 1.8831514949612727, "grad_norm": 0.5703125, "learning_rate": 1.4035481951841729e-05, "loss": 3.9971, "step": 5653 }, { "epoch": 1.8834846339635214, "grad_norm": 0.53125, "learning_rate": 1.4035084821512231e-05, "loss": 4.0955, "step": 5654 }, { "epoch": 1.8838177729657701, "grad_norm": 0.53515625, "learning_rate": 1.4034687615063152e-05, "loss": 4.0767, "step": 5655 }, { "epoch": 1.8841509119680186, "grad_norm": 0.5859375, "learning_rate": 1.4034290332499114e-05, "loss": 4.0122, "step": 5656 }, { "epoch": 1.8844840509702674, "grad_norm": 0.54296875, "learning_rate": 1.4033892973824748e-05, "loss": 4.0575, "step": 5657 }, { "epoch": 1.884817189972516, "grad_norm": 0.54296875, "learning_rate": 1.4033495539044681e-05, "loss": 4.1, "step": 5658 }, { "epoch": 1.8851503289747646, "grad_norm": 0.5390625, "learning_rate": 1.4033098028163543e-05, "loss": 4.0856, "step": 5659 }, { "epoch": 1.8854834679770134, "grad_norm": 0.55859375, "learning_rate": 1.4032700441185962e-05, "loss": 4.0465, "step": 5660 }, { "epoch": 1.8858166069792621, "grad_norm": 0.53515625, "learning_rate": 1.4032302778116573e-05, "loss": 4.1062, "step": 5661 }, { "epoch": 1.8861497459815109, "grad_norm": 0.54296875, "learning_rate": 1.4031905038960006e-05, "loss": 4.0469, "step": 5662 }, { "epoch": 1.8864828849837596, "grad_norm": 0.5546875, "learning_rate": 1.4031507223720892e-05, "loss": 4.1248, "step": 5663 }, { "epoch": 1.8868160239860081, "grad_norm": 0.5625, "learning_rate": 1.4031109332403863e-05, "loss": 4.0534, "step": 5664 }, { "epoch": 1.8871491629882569, "grad_norm": 0.5546875, "learning_rate": 1.4030711365013562e-05, "loss": 4.031, "step": 5665 }, { "epoch": 1.8874823019905054, "grad_norm": 0.57421875, "learning_rate": 1.4030313321554616e-05, "loss": 3.9922, "step": 5666 }, { "epoch": 1.8878154409927541, "grad_norm": 0.54296875, "learning_rate": 1.4029915202031666e-05, "loss": 4.0163, "step": 5667 }, { "epoch": 1.8881485799950029, "grad_norm": 0.5703125, "learning_rate": 1.4029517006449346e-05, "loss": 4.0178, "step": 5668 }, { "epoch": 1.8884817189972516, "grad_norm": 0.5703125, "learning_rate": 1.4029118734812297e-05, "loss": 4.1121, "step": 5669 }, { "epoch": 1.8888148579995003, "grad_norm": 0.57421875, "learning_rate": 1.4028720387125156e-05, "loss": 4.0328, "step": 5670 }, { "epoch": 1.889147997001749, "grad_norm": 0.5546875, "learning_rate": 1.4028321963392564e-05, "loss": 4.0423, "step": 5671 }, { "epoch": 1.8894811360039978, "grad_norm": 0.55859375, "learning_rate": 1.4027923463619162e-05, "loss": 4.1154, "step": 5672 }, { "epoch": 1.8898142750062463, "grad_norm": 0.578125, "learning_rate": 1.402752488780959e-05, "loss": 4.0615, "step": 5673 }, { "epoch": 1.890147414008495, "grad_norm": 0.57421875, "learning_rate": 1.4027126235968493e-05, "loss": 4.0469, "step": 5674 }, { "epoch": 1.8904805530107436, "grad_norm": 0.54296875, "learning_rate": 1.4026727508100512e-05, "loss": 4.1065, "step": 5675 }, { "epoch": 1.8908136920129923, "grad_norm": 0.57421875, "learning_rate": 1.4026328704210294e-05, "loss": 4.054, "step": 5676 }, { "epoch": 1.891146831015241, "grad_norm": 0.5546875, "learning_rate": 1.4025929824302481e-05, "loss": 4.0096, "step": 5677 }, { "epoch": 1.8914799700174898, "grad_norm": 0.53515625, "learning_rate": 1.4025530868381722e-05, "loss": 4.0491, "step": 5678 }, { "epoch": 1.8918131090197385, "grad_norm": 0.59375, "learning_rate": 1.402513183645266e-05, "loss": 4.0318, "step": 5679 }, { "epoch": 1.8921462480219873, "grad_norm": 0.5625, "learning_rate": 1.4024732728519949e-05, "loss": 4.0843, "step": 5680 }, { "epoch": 1.892479387024236, "grad_norm": 0.53515625, "learning_rate": 1.4024333544588232e-05, "loss": 4.0937, "step": 5681 }, { "epoch": 1.8928125260264845, "grad_norm": 0.55078125, "learning_rate": 1.4023934284662162e-05, "loss": 4.0445, "step": 5682 }, { "epoch": 1.8931456650287333, "grad_norm": 0.5859375, "learning_rate": 1.4023534948746388e-05, "loss": 4.0153, "step": 5683 }, { "epoch": 1.8934788040309818, "grad_norm": 0.5546875, "learning_rate": 1.4023135536845562e-05, "loss": 4.0625, "step": 5684 }, { "epoch": 1.8938119430332305, "grad_norm": 0.5390625, "learning_rate": 1.4022736048964334e-05, "loss": 4.0961, "step": 5685 }, { "epoch": 1.8941450820354793, "grad_norm": 0.5390625, "learning_rate": 1.4022336485107363e-05, "loss": 4.0891, "step": 5686 }, { "epoch": 1.894478221037728, "grad_norm": 0.53125, "learning_rate": 1.4021936845279295e-05, "loss": 4.0976, "step": 5687 }, { "epoch": 1.8948113600399767, "grad_norm": 0.55078125, "learning_rate": 1.4021537129484792e-05, "loss": 4.0809, "step": 5688 }, { "epoch": 1.8951444990422255, "grad_norm": 0.54296875, "learning_rate": 1.4021137337728507e-05, "loss": 4.0812, "step": 5689 }, { "epoch": 1.8954776380444742, "grad_norm": 0.53515625, "learning_rate": 1.4020737470015097e-05, "loss": 4.0928, "step": 5690 }, { "epoch": 1.8958107770467227, "grad_norm": 0.5546875, "learning_rate": 1.402033752634922e-05, "loss": 4.0991, "step": 5691 }, { "epoch": 1.8961439160489715, "grad_norm": 0.5625, "learning_rate": 1.401993750673553e-05, "loss": 4.0346, "step": 5692 }, { "epoch": 1.89647705505122, "grad_norm": 0.5546875, "learning_rate": 1.4019537411178693e-05, "loss": 3.9792, "step": 5693 }, { "epoch": 1.8968101940534687, "grad_norm": 0.5546875, "learning_rate": 1.4019137239683367e-05, "loss": 3.9657, "step": 5694 }, { "epoch": 1.8971433330557175, "grad_norm": 0.5625, "learning_rate": 1.4018736992254212e-05, "loss": 4.0187, "step": 5695 }, { "epoch": 1.8974764720579662, "grad_norm": 0.55078125, "learning_rate": 1.401833666889589e-05, "loss": 4.0536, "step": 5696 }, { "epoch": 1.897809611060215, "grad_norm": 0.55078125, "learning_rate": 1.4017936269613064e-05, "loss": 4.0347, "step": 5697 }, { "epoch": 1.8981427500624637, "grad_norm": 0.5703125, "learning_rate": 1.4017535794410399e-05, "loss": 4.0932, "step": 5698 }, { "epoch": 1.8984758890647122, "grad_norm": 0.55078125, "learning_rate": 1.4017135243292561e-05, "loss": 4.1081, "step": 5699 }, { "epoch": 1.898809028066961, "grad_norm": 0.5234375, "learning_rate": 1.4016734616264212e-05, "loss": 4.0811, "step": 5700 }, { "epoch": 1.8991421670692095, "grad_norm": 0.55078125, "learning_rate": 1.4016333913330018e-05, "loss": 4.1209, "step": 5701 }, { "epoch": 1.8994753060714582, "grad_norm": 0.546875, "learning_rate": 1.401593313449465e-05, "loss": 4.1046, "step": 5702 }, { "epoch": 1.899808445073707, "grad_norm": 0.54296875, "learning_rate": 1.4015532279762774e-05, "loss": 4.1042, "step": 5703 }, { "epoch": 1.9001415840759557, "grad_norm": 0.546875, "learning_rate": 1.4015131349139062e-05, "loss": 4.1172, "step": 5704 }, { "epoch": 1.9004747230782044, "grad_norm": 0.546875, "learning_rate": 1.4014730342628177e-05, "loss": 4.0433, "step": 5705 }, { "epoch": 1.9008078620804532, "grad_norm": 0.58203125, "learning_rate": 1.4014329260234798e-05, "loss": 4.0262, "step": 5706 }, { "epoch": 1.901141001082702, "grad_norm": 0.52734375, "learning_rate": 1.4013928101963591e-05, "loss": 4.0189, "step": 5707 }, { "epoch": 1.9014741400849504, "grad_norm": 0.546875, "learning_rate": 1.4013526867819232e-05, "loss": 4.0079, "step": 5708 }, { "epoch": 1.9018072790871992, "grad_norm": 0.546875, "learning_rate": 1.4013125557806395e-05, "loss": 4.0933, "step": 5709 }, { "epoch": 1.9021404180894477, "grad_norm": 0.55078125, "learning_rate": 1.4012724171929748e-05, "loss": 4.0941, "step": 5710 }, { "epoch": 1.9024735570916964, "grad_norm": 0.5390625, "learning_rate": 1.4012322710193975e-05, "loss": 4.068, "step": 5711 }, { "epoch": 1.9028066960939451, "grad_norm": 0.54296875, "learning_rate": 1.4011921172603745e-05, "loss": 4.0665, "step": 5712 }, { "epoch": 1.9031398350961939, "grad_norm": 0.6015625, "learning_rate": 1.4011519559163742e-05, "loss": 3.9809, "step": 5713 }, { "epoch": 1.9034729740984426, "grad_norm": 0.55078125, "learning_rate": 1.4011117869878637e-05, "loss": 4.0866, "step": 5714 }, { "epoch": 1.9038061131006914, "grad_norm": 0.5546875, "learning_rate": 1.4010716104753112e-05, "loss": 4.1179, "step": 5715 }, { "epoch": 1.90413925210294, "grad_norm": 0.54296875, "learning_rate": 1.4010314263791849e-05, "loss": 4.015, "step": 5716 }, { "epoch": 1.9044723911051886, "grad_norm": 0.54296875, "learning_rate": 1.4009912346999523e-05, "loss": 4.1034, "step": 5717 }, { "epoch": 1.9048055301074374, "grad_norm": 0.5625, "learning_rate": 1.400951035438082e-05, "loss": 4.0945, "step": 5718 }, { "epoch": 1.9051386691096859, "grad_norm": 0.53515625, "learning_rate": 1.4009108285940421e-05, "loss": 4.0499, "step": 5719 }, { "epoch": 1.9054718081119346, "grad_norm": 0.57421875, "learning_rate": 1.400870614168301e-05, "loss": 3.9598, "step": 5720 }, { "epoch": 1.9058049471141834, "grad_norm": 0.5703125, "learning_rate": 1.4008303921613269e-05, "loss": 4.1101, "step": 5721 }, { "epoch": 1.906138086116432, "grad_norm": 0.57421875, "learning_rate": 1.4007901625735886e-05, "loss": 4.1013, "step": 5722 }, { "epoch": 1.9064712251186808, "grad_norm": 0.53125, "learning_rate": 1.4007499254055544e-05, "loss": 4.0499, "step": 5723 }, { "epoch": 1.9068043641209296, "grad_norm": 0.5625, "learning_rate": 1.4007096806576932e-05, "loss": 4.0065, "step": 5724 }, { "epoch": 1.9071375031231783, "grad_norm": 0.51953125, "learning_rate": 1.4006694283304734e-05, "loss": 4.0662, "step": 5725 }, { "epoch": 1.9074706421254268, "grad_norm": 0.53515625, "learning_rate": 1.4006291684243642e-05, "loss": 4.0968, "step": 5726 }, { "epoch": 1.9078037811276756, "grad_norm": 0.53125, "learning_rate": 1.4005889009398345e-05, "loss": 4.0458, "step": 5727 }, { "epoch": 1.908136920129924, "grad_norm": 0.515625, "learning_rate": 1.4005486258773534e-05, "loss": 4.1482, "step": 5728 }, { "epoch": 1.9084700591321728, "grad_norm": 0.55078125, "learning_rate": 1.4005083432373896e-05, "loss": 4.1191, "step": 5729 }, { "epoch": 1.9088031981344216, "grad_norm": 0.53125, "learning_rate": 1.4004680530204128e-05, "loss": 4.1189, "step": 5730 }, { "epoch": 1.9091363371366703, "grad_norm": 0.546875, "learning_rate": 1.400427755226892e-05, "loss": 4.0577, "step": 5731 }, { "epoch": 1.909469476138919, "grad_norm": 0.55859375, "learning_rate": 1.4003874498572966e-05, "loss": 4.0366, "step": 5732 }, { "epoch": 1.9098026151411678, "grad_norm": 0.5546875, "learning_rate": 1.4003471369120964e-05, "loss": 4.0692, "step": 5733 }, { "epoch": 1.9101357541434163, "grad_norm": 0.5703125, "learning_rate": 1.4003068163917607e-05, "loss": 4.0259, "step": 5734 }, { "epoch": 1.910468893145665, "grad_norm": 0.56640625, "learning_rate": 1.400266488296759e-05, "loss": 4.0725, "step": 5735 }, { "epoch": 1.9108020321479136, "grad_norm": 0.5390625, "learning_rate": 1.4002261526275612e-05, "loss": 4.0886, "step": 5736 }, { "epoch": 1.9111351711501623, "grad_norm": 0.55078125, "learning_rate": 1.400185809384637e-05, "loss": 3.9652, "step": 5737 }, { "epoch": 1.911468310152411, "grad_norm": 0.53515625, "learning_rate": 1.4001454585684566e-05, "loss": 4.0777, "step": 5738 }, { "epoch": 1.9118014491546598, "grad_norm": 0.5546875, "learning_rate": 1.4001051001794898e-05, "loss": 4.0185, "step": 5739 }, { "epoch": 1.9121345881569085, "grad_norm": 0.5390625, "learning_rate": 1.4000647342182067e-05, "loss": 4.0387, "step": 5740 }, { "epoch": 1.9124677271591572, "grad_norm": 0.55078125, "learning_rate": 1.4000243606850776e-05, "loss": 4.0272, "step": 5741 }, { "epoch": 1.912800866161406, "grad_norm": 0.54296875, "learning_rate": 1.3999839795805724e-05, "loss": 4.0314, "step": 5742 }, { "epoch": 1.9131340051636545, "grad_norm": 0.546875, "learning_rate": 1.3999435909051618e-05, "loss": 4.0512, "step": 5743 }, { "epoch": 1.9134671441659032, "grad_norm": 0.546875, "learning_rate": 1.3999031946593162e-05, "loss": 3.9844, "step": 5744 }, { "epoch": 1.9138002831681518, "grad_norm": 0.546875, "learning_rate": 1.399862790843506e-05, "loss": 4.0361, "step": 5745 }, { "epoch": 1.9141334221704005, "grad_norm": 0.54296875, "learning_rate": 1.3998223794582018e-05, "loss": 4.0655, "step": 5746 }, { "epoch": 1.9144665611726492, "grad_norm": 0.5625, "learning_rate": 1.3997819605038745e-05, "loss": 4.026, "step": 5747 }, { "epoch": 1.914799700174898, "grad_norm": 0.5546875, "learning_rate": 1.3997415339809949e-05, "loss": 4.0819, "step": 5748 }, { "epoch": 1.9151328391771467, "grad_norm": 0.55859375, "learning_rate": 1.3997010998900336e-05, "loss": 4.0393, "step": 5749 }, { "epoch": 1.9154659781793955, "grad_norm": 0.5625, "learning_rate": 1.3996606582314618e-05, "loss": 3.9985, "step": 5750 }, { "epoch": 1.9157991171816442, "grad_norm": 0.55859375, "learning_rate": 1.3996202090057506e-05, "loss": 4.0023, "step": 5751 }, { "epoch": 1.9161322561838927, "grad_norm": 0.53125, "learning_rate": 1.3995797522133708e-05, "loss": 4.0735, "step": 5752 }, { "epoch": 1.9164653951861415, "grad_norm": 0.5625, "learning_rate": 1.3995392878547941e-05, "loss": 4.0498, "step": 5753 }, { "epoch": 1.91679853418839, "grad_norm": 0.5390625, "learning_rate": 1.3994988159304913e-05, "loss": 4.0225, "step": 5754 }, { "epoch": 1.9171316731906387, "grad_norm": 0.57421875, "learning_rate": 1.3994583364409344e-05, "loss": 3.9816, "step": 5755 }, { "epoch": 1.9174648121928874, "grad_norm": 0.5546875, "learning_rate": 1.3994178493865945e-05, "loss": 4.1062, "step": 5756 }, { "epoch": 1.9177979511951362, "grad_norm": 0.55078125, "learning_rate": 1.3993773547679433e-05, "loss": 4.0373, "step": 5757 }, { "epoch": 1.918131090197385, "grad_norm": 0.5546875, "learning_rate": 1.3993368525854526e-05, "loss": 4.034, "step": 5758 }, { "epoch": 1.9184642291996337, "grad_norm": 0.5703125, "learning_rate": 1.399296342839594e-05, "loss": 4.073, "step": 5759 }, { "epoch": 1.9187973682018822, "grad_norm": 0.57421875, "learning_rate": 1.3992558255308392e-05, "loss": 4.1311, "step": 5760 }, { "epoch": 1.919130507204131, "grad_norm": 0.53125, "learning_rate": 1.3992153006596604e-05, "loss": 4.0288, "step": 5761 }, { "epoch": 1.9194636462063797, "grad_norm": 0.55859375, "learning_rate": 1.3991747682265295e-05, "loss": 4.0505, "step": 5762 }, { "epoch": 1.9197967852086282, "grad_norm": 0.5546875, "learning_rate": 1.3991342282319186e-05, "loss": 4.0673, "step": 5763 }, { "epoch": 1.920129924210877, "grad_norm": 0.55078125, "learning_rate": 1.3990936806763002e-05, "loss": 4.0612, "step": 5764 }, { "epoch": 1.9204630632131257, "grad_norm": 0.5625, "learning_rate": 1.3990531255601463e-05, "loss": 4.1034, "step": 5765 }, { "epoch": 1.9207962022153744, "grad_norm": 0.5703125, "learning_rate": 1.399012562883929e-05, "loss": 4.0291, "step": 5766 }, { "epoch": 1.9211293412176231, "grad_norm": 0.55859375, "learning_rate": 1.3989719926481215e-05, "loss": 4.0363, "step": 5767 }, { "epoch": 1.9214624802198719, "grad_norm": 0.546875, "learning_rate": 1.3989314148531957e-05, "loss": 4.1229, "step": 5768 }, { "epoch": 1.9217956192221204, "grad_norm": 0.5546875, "learning_rate": 1.3988908294996247e-05, "loss": 4.0996, "step": 5769 }, { "epoch": 1.9221287582243691, "grad_norm": 0.5546875, "learning_rate": 1.3988502365878808e-05, "loss": 4.0416, "step": 5770 }, { "epoch": 1.9224618972266176, "grad_norm": 0.546875, "learning_rate": 1.3988096361184374e-05, "loss": 4.087, "step": 5771 }, { "epoch": 1.9227950362288664, "grad_norm": 0.546875, "learning_rate": 1.3987690280917668e-05, "loss": 4.0616, "step": 5772 }, { "epoch": 1.9231281752311151, "grad_norm": 0.5390625, "learning_rate": 1.3987284125083422e-05, "loss": 4.0808, "step": 5773 }, { "epoch": 1.9234613142333639, "grad_norm": 0.54296875, "learning_rate": 1.3986877893686369e-05, "loss": 4.0771, "step": 5774 }, { "epoch": 1.9237944532356126, "grad_norm": 0.5546875, "learning_rate": 1.3986471586731238e-05, "loss": 4.0424, "step": 5775 }, { "epoch": 1.9241275922378613, "grad_norm": 0.55859375, "learning_rate": 1.3986065204222763e-05, "loss": 3.9869, "step": 5776 }, { "epoch": 1.92446073124011, "grad_norm": 0.5625, "learning_rate": 1.3985658746165678e-05, "loss": 4.0205, "step": 5777 }, { "epoch": 1.9247938702423586, "grad_norm": 0.5390625, "learning_rate": 1.3985252212564716e-05, "loss": 4.02, "step": 5778 }, { "epoch": 1.9251270092446073, "grad_norm": 0.53125, "learning_rate": 1.3984845603424613e-05, "loss": 4.0454, "step": 5779 }, { "epoch": 1.9254601482468559, "grad_norm": 0.56640625, "learning_rate": 1.3984438918750104e-05, "loss": 4.0444, "step": 5780 }, { "epoch": 1.9257932872491046, "grad_norm": 0.56640625, "learning_rate": 1.3984032158545929e-05, "loss": 4.0756, "step": 5781 }, { "epoch": 1.9261264262513533, "grad_norm": 0.56640625, "learning_rate": 1.398362532281682e-05, "loss": 3.9911, "step": 5782 }, { "epoch": 1.926459565253602, "grad_norm": 0.55078125, "learning_rate": 1.3983218411567523e-05, "loss": 4.0219, "step": 5783 }, { "epoch": 1.9267927042558508, "grad_norm": 0.55078125, "learning_rate": 1.3982811424802772e-05, "loss": 4.096, "step": 5784 }, { "epoch": 1.9271258432580995, "grad_norm": 0.5546875, "learning_rate": 1.398240436252731e-05, "loss": 4.0712, "step": 5785 }, { "epoch": 1.9274589822603483, "grad_norm": 0.5546875, "learning_rate": 1.3981997224745879e-05, "loss": 4.0536, "step": 5786 }, { "epoch": 1.9277921212625968, "grad_norm": 0.546875, "learning_rate": 1.3981590011463218e-05, "loss": 4.1112, "step": 5787 }, { "epoch": 1.9281252602648455, "grad_norm": 0.546875, "learning_rate": 1.3981182722684076e-05, "loss": 3.9719, "step": 5788 }, { "epoch": 1.928458399267094, "grad_norm": 0.57421875, "learning_rate": 1.3980775358413193e-05, "loss": 4.0735, "step": 5789 }, { "epoch": 1.9287915382693428, "grad_norm": 0.5234375, "learning_rate": 1.3980367918655311e-05, "loss": 4.0682, "step": 5790 }, { "epoch": 1.9291246772715915, "grad_norm": 0.546875, "learning_rate": 1.3979960403415181e-05, "loss": 4.1193, "step": 5791 }, { "epoch": 1.9294578162738403, "grad_norm": 0.5703125, "learning_rate": 1.397955281269755e-05, "loss": 4.1416, "step": 5792 }, { "epoch": 1.929790955276089, "grad_norm": 0.5625, "learning_rate": 1.3979145146507162e-05, "loss": 4.0217, "step": 5793 }, { "epoch": 1.9301240942783378, "grad_norm": 0.5546875, "learning_rate": 1.3978737404848765e-05, "loss": 3.9998, "step": 5794 }, { "epoch": 1.9304572332805863, "grad_norm": 0.5390625, "learning_rate": 1.3978329587727113e-05, "loss": 4.062, "step": 5795 }, { "epoch": 1.930790372282835, "grad_norm": 0.53515625, "learning_rate": 1.3977921695146951e-05, "loss": 4.0606, "step": 5796 }, { "epoch": 1.9311235112850837, "grad_norm": 0.5546875, "learning_rate": 1.3977513727113033e-05, "loss": 4.0822, "step": 5797 }, { "epoch": 1.9314566502873323, "grad_norm": 0.5703125, "learning_rate": 1.3977105683630111e-05, "loss": 4.0347, "step": 5798 }, { "epoch": 1.931789789289581, "grad_norm": 0.5546875, "learning_rate": 1.3976697564702937e-05, "loss": 4.0933, "step": 5799 }, { "epoch": 1.9321229282918297, "grad_norm": 0.54296875, "learning_rate": 1.3976289370336266e-05, "loss": 4.0979, "step": 5800 }, { "epoch": 1.9324560672940785, "grad_norm": 0.5703125, "learning_rate": 1.397588110053485e-05, "loss": 4.0642, "step": 5801 }, { "epoch": 1.9327892062963272, "grad_norm": 0.53125, "learning_rate": 1.3975472755303445e-05, "loss": 4.0676, "step": 5802 }, { "epoch": 1.933122345298576, "grad_norm": 0.5625, "learning_rate": 1.397506433464681e-05, "loss": 4.0104, "step": 5803 }, { "epoch": 1.9334554843008245, "grad_norm": 0.58203125, "learning_rate": 1.39746558385697e-05, "loss": 4.0226, "step": 5804 }, { "epoch": 1.9337886233030732, "grad_norm": 0.5390625, "learning_rate": 1.3974247267076874e-05, "loss": 4.0909, "step": 5805 }, { "epoch": 1.9341217623053217, "grad_norm": 0.5546875, "learning_rate": 1.3973838620173088e-05, "loss": 3.9562, "step": 5806 }, { "epoch": 1.9344549013075705, "grad_norm": 0.56640625, "learning_rate": 1.3973429897863108e-05, "loss": 4.1163, "step": 5807 }, { "epoch": 1.9347880403098192, "grad_norm": 0.5625, "learning_rate": 1.3973021100151689e-05, "loss": 4.0889, "step": 5808 }, { "epoch": 1.935121179312068, "grad_norm": 0.55859375, "learning_rate": 1.3972612227043595e-05, "loss": 4.08, "step": 5809 }, { "epoch": 1.9354543183143167, "grad_norm": 0.55859375, "learning_rate": 1.3972203278543588e-05, "loss": 4.0922, "step": 5810 }, { "epoch": 1.9357874573165654, "grad_norm": 0.5625, "learning_rate": 1.3971794254656431e-05, "loss": 4.0455, "step": 5811 }, { "epoch": 1.9361205963188142, "grad_norm": 0.55859375, "learning_rate": 1.397138515538689e-05, "loss": 4.0801, "step": 5812 }, { "epoch": 1.9364537353210627, "grad_norm": 0.546875, "learning_rate": 1.397097598073973e-05, "loss": 3.9488, "step": 5813 }, { "epoch": 1.9367868743233114, "grad_norm": 0.52734375, "learning_rate": 1.3970566730719715e-05, "loss": 4.0235, "step": 5814 }, { "epoch": 1.93712001332556, "grad_norm": 0.5234375, "learning_rate": 1.3970157405331613e-05, "loss": 4.0796, "step": 5815 }, { "epoch": 1.9374531523278087, "grad_norm": 0.5234375, "learning_rate": 1.3969748004580189e-05, "loss": 4.1094, "step": 5816 }, { "epoch": 1.9377862913300574, "grad_norm": 0.55078125, "learning_rate": 1.3969338528470216e-05, "loss": 4.0656, "step": 5817 }, { "epoch": 1.9381194303323062, "grad_norm": 0.54296875, "learning_rate": 1.3968928977006464e-05, "loss": 4.1034, "step": 5818 }, { "epoch": 1.938452569334555, "grad_norm": 0.54296875, "learning_rate": 1.3968519350193699e-05, "loss": 4.0751, "step": 5819 }, { "epoch": 1.9387857083368036, "grad_norm": 0.57421875, "learning_rate": 1.3968109648036694e-05, "loss": 4.0316, "step": 5820 }, { "epoch": 1.9391188473390524, "grad_norm": 0.5625, "learning_rate": 1.3967699870540223e-05, "loss": 4.0944, "step": 5821 }, { "epoch": 1.939451986341301, "grad_norm": 0.5625, "learning_rate": 1.3967290017709055e-05, "loss": 4.0878, "step": 5822 }, { "epoch": 1.9397851253435496, "grad_norm": 0.546875, "learning_rate": 1.3966880089547968e-05, "loss": 4.0405, "step": 5823 }, { "epoch": 1.9401182643457981, "grad_norm": 0.56640625, "learning_rate": 1.3966470086061734e-05, "loss": 4.0856, "step": 5824 }, { "epoch": 1.9404514033480469, "grad_norm": 0.5703125, "learning_rate": 1.3966060007255133e-05, "loss": 4.0834, "step": 5825 }, { "epoch": 1.9407845423502956, "grad_norm": 0.5703125, "learning_rate": 1.3965649853132936e-05, "loss": 3.9952, "step": 5826 }, { "epoch": 1.9411176813525444, "grad_norm": 0.53515625, "learning_rate": 1.3965239623699924e-05, "loss": 4.0726, "step": 5827 }, { "epoch": 1.941450820354793, "grad_norm": 0.5625, "learning_rate": 1.3964829318960873e-05, "loss": 4.0222, "step": 5828 }, { "epoch": 1.9417839593570418, "grad_norm": 0.56640625, "learning_rate": 1.3964418938920563e-05, "loss": 3.9724, "step": 5829 }, { "epoch": 1.9421170983592904, "grad_norm": 0.5546875, "learning_rate": 1.3964008483583773e-05, "loss": 4.0563, "step": 5830 }, { "epoch": 1.942450237361539, "grad_norm": 0.5546875, "learning_rate": 1.3963597952955288e-05, "loss": 4.1464, "step": 5831 }, { "epoch": 1.9427833763637878, "grad_norm": 0.60546875, "learning_rate": 1.3963187347039886e-05, "loss": 4.0146, "step": 5832 }, { "epoch": 1.9431165153660364, "grad_norm": 0.5390625, "learning_rate": 1.3962776665842351e-05, "loss": 4.0661, "step": 5833 }, { "epoch": 1.943449654368285, "grad_norm": 0.56640625, "learning_rate": 1.3962365909367463e-05, "loss": 4.051, "step": 5834 }, { "epoch": 1.9437827933705338, "grad_norm": 0.5546875, "learning_rate": 1.3961955077620014e-05, "loss": 4.1298, "step": 5835 }, { "epoch": 1.9441159323727826, "grad_norm": 0.57421875, "learning_rate": 1.3961544170604782e-05, "loss": 4.11, "step": 5836 }, { "epoch": 1.9444490713750313, "grad_norm": 0.5625, "learning_rate": 1.3961133188326559e-05, "loss": 4.0336, "step": 5837 }, { "epoch": 1.94478221037728, "grad_norm": 0.55859375, "learning_rate": 1.3960722130790127e-05, "loss": 4.1047, "step": 5838 }, { "epoch": 1.9451153493795286, "grad_norm": 0.5625, "learning_rate": 1.3960310998000276e-05, "loss": 4.0999, "step": 5839 }, { "epoch": 1.9454484883817773, "grad_norm": 0.55078125, "learning_rate": 1.3959899789961794e-05, "loss": 3.9989, "step": 5840 }, { "epoch": 1.9457816273840258, "grad_norm": 0.55078125, "learning_rate": 1.3959488506679474e-05, "loss": 4.0944, "step": 5841 }, { "epoch": 1.9461147663862746, "grad_norm": 0.5625, "learning_rate": 1.3959077148158104e-05, "loss": 4.0821, "step": 5842 }, { "epoch": 1.9464479053885233, "grad_norm": 0.55078125, "learning_rate": 1.3958665714402473e-05, "loss": 4.0895, "step": 5843 }, { "epoch": 1.946781044390772, "grad_norm": 0.5625, "learning_rate": 1.3958254205417378e-05, "loss": 4.0327, "step": 5844 }, { "epoch": 1.9471141833930208, "grad_norm": 0.55859375, "learning_rate": 1.3957842621207611e-05, "loss": 4.0779, "step": 5845 }, { "epoch": 1.9474473223952695, "grad_norm": 0.5703125, "learning_rate": 1.3957430961777963e-05, "loss": 4.1367, "step": 5846 }, { "epoch": 1.9477804613975183, "grad_norm": 0.52734375, "learning_rate": 1.395701922713323e-05, "loss": 4.0723, "step": 5847 }, { "epoch": 1.9481136003997668, "grad_norm": 0.55859375, "learning_rate": 1.3956607417278212e-05, "loss": 4.0476, "step": 5848 }, { "epoch": 1.9484467394020155, "grad_norm": 0.546875, "learning_rate": 1.39561955322177e-05, "loss": 4.0571, "step": 5849 }, { "epoch": 1.948779878404264, "grad_norm": 0.5703125, "learning_rate": 1.3955783571956497e-05, "loss": 4.078, "step": 5850 }, { "epoch": 1.9491130174065128, "grad_norm": 0.5546875, "learning_rate": 1.3955371536499397e-05, "loss": 3.9794, "step": 5851 }, { "epoch": 1.9494461564087615, "grad_norm": 0.51953125, "learning_rate": 1.3954959425851202e-05, "loss": 4.0067, "step": 5852 }, { "epoch": 1.9497792954110102, "grad_norm": 0.5859375, "learning_rate": 1.395454724001671e-05, "loss": 4.0082, "step": 5853 }, { "epoch": 1.950112434413259, "grad_norm": 0.54296875, "learning_rate": 1.3954134979000722e-05, "loss": 4.0378, "step": 5854 }, { "epoch": 1.9504455734155077, "grad_norm": 0.5859375, "learning_rate": 1.3953722642808042e-05, "loss": 4.0598, "step": 5855 }, { "epoch": 1.9507787124177565, "grad_norm": 0.53125, "learning_rate": 1.3953310231443472e-05, "loss": 4.0482, "step": 5856 }, { "epoch": 1.951111851420005, "grad_norm": 0.53515625, "learning_rate": 1.3952897744911816e-05, "loss": 4.0771, "step": 5857 }, { "epoch": 1.9514449904222537, "grad_norm": 0.5390625, "learning_rate": 1.3952485183217878e-05, "loss": 4.0956, "step": 5858 }, { "epoch": 1.9517781294245022, "grad_norm": 0.57421875, "learning_rate": 1.3952072546366463e-05, "loss": 4.0317, "step": 5859 }, { "epoch": 1.952111268426751, "grad_norm": 0.55859375, "learning_rate": 1.395165983436238e-05, "loss": 4.0683, "step": 5860 }, { "epoch": 1.9524444074289997, "grad_norm": 0.5546875, "learning_rate": 1.3951247047210433e-05, "loss": 4.0857, "step": 5861 }, { "epoch": 1.9527775464312485, "grad_norm": 0.53125, "learning_rate": 1.395083418491543e-05, "loss": 4.0404, "step": 5862 }, { "epoch": 1.9531106854334972, "grad_norm": 0.54296875, "learning_rate": 1.3950421247482184e-05, "loss": 4.1058, "step": 5863 }, { "epoch": 1.953443824435746, "grad_norm": 0.5546875, "learning_rate": 1.39500082349155e-05, "loss": 4.1022, "step": 5864 }, { "epoch": 1.9537769634379945, "grad_norm": 0.56640625, "learning_rate": 1.3949595147220192e-05, "loss": 4.0693, "step": 5865 }, { "epoch": 1.9541101024402432, "grad_norm": 0.55859375, "learning_rate": 1.3949181984401068e-05, "loss": 4.1259, "step": 5866 }, { "epoch": 1.954443241442492, "grad_norm": 0.56640625, "learning_rate": 1.3948768746462947e-05, "loss": 4.0722, "step": 5867 }, { "epoch": 1.9547763804447404, "grad_norm": 0.5546875, "learning_rate": 1.3948355433410634e-05, "loss": 4.0845, "step": 5868 }, { "epoch": 1.9551095194469892, "grad_norm": 0.5859375, "learning_rate": 1.3947942045248948e-05, "loss": 4.0768, "step": 5869 }, { "epoch": 1.955442658449238, "grad_norm": 0.55078125, "learning_rate": 1.3947528581982706e-05, "loss": 4.0149, "step": 5870 }, { "epoch": 1.9557757974514867, "grad_norm": 0.54296875, "learning_rate": 1.394711504361672e-05, "loss": 4.0193, "step": 5871 }, { "epoch": 1.9561089364537354, "grad_norm": 0.546875, "learning_rate": 1.3946701430155807e-05, "loss": 4.1521, "step": 5872 }, { "epoch": 1.9564420754559841, "grad_norm": 0.55078125, "learning_rate": 1.3946287741604786e-05, "loss": 4.0485, "step": 5873 }, { "epoch": 1.9567752144582327, "grad_norm": 0.55078125, "learning_rate": 1.3945873977968477e-05, "loss": 4.1158, "step": 5874 }, { "epoch": 1.9571083534604814, "grad_norm": 0.55859375, "learning_rate": 1.3945460139251696e-05, "loss": 3.9887, "step": 5875 }, { "epoch": 1.95744149246273, "grad_norm": 0.5546875, "learning_rate": 1.3945046225459268e-05, "loss": 4.022, "step": 5876 }, { "epoch": 1.9577746314649787, "grad_norm": 0.55859375, "learning_rate": 1.3944632236596009e-05, "loss": 4.0556, "step": 5877 }, { "epoch": 1.9581077704672274, "grad_norm": 0.5625, "learning_rate": 1.3944218172666743e-05, "loss": 4.0084, "step": 5878 }, { "epoch": 1.9584409094694761, "grad_norm": 0.54296875, "learning_rate": 1.3943804033676295e-05, "loss": 4.0539, "step": 5879 }, { "epoch": 1.9587740484717249, "grad_norm": 0.5625, "learning_rate": 1.3943389819629486e-05, "loss": 4.0255, "step": 5880 }, { "epoch": 1.9591071874739736, "grad_norm": 0.53515625, "learning_rate": 1.3942975530531143e-05, "loss": 4.1226, "step": 5881 }, { "epoch": 1.9594403264762223, "grad_norm": 0.546875, "learning_rate": 1.394256116638609e-05, "loss": 4.029, "step": 5882 }, { "epoch": 1.9597734654784709, "grad_norm": 0.5390625, "learning_rate": 1.3942146727199155e-05, "loss": 4.0957, "step": 5883 }, { "epoch": 1.9601066044807196, "grad_norm": 0.5625, "learning_rate": 1.3941732212975164e-05, "loss": 3.9941, "step": 5884 }, { "epoch": 1.9604397434829681, "grad_norm": 0.55078125, "learning_rate": 1.3941317623718945e-05, "loss": 4.0135, "step": 5885 }, { "epoch": 1.9607728824852169, "grad_norm": 0.55078125, "learning_rate": 1.3940902959435327e-05, "loss": 4.0292, "step": 5886 }, { "epoch": 1.9611060214874656, "grad_norm": 0.55078125, "learning_rate": 1.3940488220129142e-05, "loss": 4.0237, "step": 5887 }, { "epoch": 1.9614391604897143, "grad_norm": 0.54296875, "learning_rate": 1.3940073405805218e-05, "loss": 4.0661, "step": 5888 }, { "epoch": 1.961772299491963, "grad_norm": 0.546875, "learning_rate": 1.3939658516468388e-05, "loss": 4.1351, "step": 5889 }, { "epoch": 1.9621054384942118, "grad_norm": 0.56640625, "learning_rate": 1.3939243552123486e-05, "loss": 4.0328, "step": 5890 }, { "epoch": 1.9624385774964606, "grad_norm": 0.58984375, "learning_rate": 1.3938828512775343e-05, "loss": 4.033, "step": 5891 }, { "epoch": 1.962771716498709, "grad_norm": 0.54296875, "learning_rate": 1.3938413398428793e-05, "loss": 4.0289, "step": 5892 }, { "epoch": 1.9631048555009578, "grad_norm": 0.5390625, "learning_rate": 1.3937998209088674e-05, "loss": 4.0268, "step": 5893 }, { "epoch": 1.9634379945032063, "grad_norm": 0.53125, "learning_rate": 1.3937582944759822e-05, "loss": 4.0298, "step": 5894 }, { "epoch": 1.963771133505455, "grad_norm": 0.5703125, "learning_rate": 1.393716760544707e-05, "loss": 3.9809, "step": 5895 }, { "epoch": 1.9641042725077038, "grad_norm": 0.55078125, "learning_rate": 1.393675219115526e-05, "loss": 4.1346, "step": 5896 }, { "epoch": 1.9644374115099525, "grad_norm": 0.5390625, "learning_rate": 1.3936336701889228e-05, "loss": 4.1006, "step": 5897 }, { "epoch": 1.9647705505122013, "grad_norm": 0.54296875, "learning_rate": 1.3935921137653813e-05, "loss": 4.0947, "step": 5898 }, { "epoch": 1.96510368951445, "grad_norm": 0.55078125, "learning_rate": 1.3935505498453859e-05, "loss": 4.0358, "step": 5899 }, { "epoch": 1.9654368285166985, "grad_norm": 0.546875, "learning_rate": 1.3935089784294204e-05, "loss": 4.0726, "step": 5900 }, { "epoch": 1.9657699675189473, "grad_norm": 0.55859375, "learning_rate": 1.3934673995179693e-05, "loss": 4.1048, "step": 5901 }, { "epoch": 1.966103106521196, "grad_norm": 0.54296875, "learning_rate": 1.3934258131115166e-05, "loss": 4.1309, "step": 5902 }, { "epoch": 1.9664362455234445, "grad_norm": 0.54296875, "learning_rate": 1.393384219210547e-05, "loss": 4.0919, "step": 5903 }, { "epoch": 1.9667693845256933, "grad_norm": 0.5546875, "learning_rate": 1.3933426178155446e-05, "loss": 4.0003, "step": 5904 }, { "epoch": 1.967102523527942, "grad_norm": 0.5234375, "learning_rate": 1.3933010089269942e-05, "loss": 4.0122, "step": 5905 }, { "epoch": 1.9674356625301908, "grad_norm": 0.55078125, "learning_rate": 1.3932593925453806e-05, "loss": 4.1575, "step": 5906 }, { "epoch": 1.9677688015324395, "grad_norm": 0.54296875, "learning_rate": 1.3932177686711883e-05, "loss": 4.0445, "step": 5907 }, { "epoch": 1.9681019405346882, "grad_norm": 0.56640625, "learning_rate": 1.3931761373049022e-05, "loss": 4.0213, "step": 5908 }, { "epoch": 1.9684350795369367, "grad_norm": 0.578125, "learning_rate": 1.3931344984470072e-05, "loss": 4.0059, "step": 5909 }, { "epoch": 1.9687682185391855, "grad_norm": 0.55859375, "learning_rate": 1.3930928520979882e-05, "loss": 4.0417, "step": 5910 }, { "epoch": 1.969101357541434, "grad_norm": 0.53125, "learning_rate": 1.3930511982583308e-05, "loss": 4.1214, "step": 5911 }, { "epoch": 1.9694344965436827, "grad_norm": 0.546875, "learning_rate": 1.3930095369285195e-05, "loss": 4.0178, "step": 5912 }, { "epoch": 1.9697676355459315, "grad_norm": 0.51171875, "learning_rate": 1.3929678681090399e-05, "loss": 4.0846, "step": 5913 }, { "epoch": 1.9701007745481802, "grad_norm": 0.56640625, "learning_rate": 1.392926191800377e-05, "loss": 4.0486, "step": 5914 }, { "epoch": 1.970433913550429, "grad_norm": 0.56640625, "learning_rate": 1.392884508003017e-05, "loss": 4.0146, "step": 5915 }, { "epoch": 1.9707670525526777, "grad_norm": 0.5546875, "learning_rate": 1.3928428167174448e-05, "loss": 4.0557, "step": 5916 }, { "epoch": 1.9711001915549264, "grad_norm": 0.5625, "learning_rate": 1.3928011179441461e-05, "loss": 3.9608, "step": 5917 }, { "epoch": 1.971433330557175, "grad_norm": 0.55078125, "learning_rate": 1.3927594116836066e-05, "loss": 4.1062, "step": 5918 }, { "epoch": 1.9717664695594237, "grad_norm": 0.60546875, "learning_rate": 1.3927176979363125e-05, "loss": 4.0584, "step": 5919 }, { "epoch": 1.9720996085616722, "grad_norm": 0.58203125, "learning_rate": 1.392675976702749e-05, "loss": 4.0354, "step": 5920 }, { "epoch": 1.972432747563921, "grad_norm": 0.54296875, "learning_rate": 1.3926342479834028e-05, "loss": 4.0427, "step": 5921 }, { "epoch": 1.9727658865661697, "grad_norm": 0.5546875, "learning_rate": 1.3925925117787592e-05, "loss": 4.0478, "step": 5922 }, { "epoch": 1.9730990255684184, "grad_norm": 0.546875, "learning_rate": 1.3925507680893047e-05, "loss": 4.0778, "step": 5923 }, { "epoch": 1.9734321645706672, "grad_norm": 0.5546875, "learning_rate": 1.3925090169155259e-05, "loss": 4.0836, "step": 5924 }, { "epoch": 1.973765303572916, "grad_norm": 0.5703125, "learning_rate": 1.3924672582579084e-05, "loss": 4.0807, "step": 5925 }, { "epoch": 1.9740984425751646, "grad_norm": 0.55078125, "learning_rate": 1.392425492116939e-05, "loss": 4.1116, "step": 5926 }, { "epoch": 1.9744315815774132, "grad_norm": 0.53515625, "learning_rate": 1.392383718493104e-05, "loss": 4.1361, "step": 5927 }, { "epoch": 1.974764720579662, "grad_norm": 0.55078125, "learning_rate": 1.3923419373868901e-05, "loss": 4.1427, "step": 5928 }, { "epoch": 1.9750978595819104, "grad_norm": 0.53125, "learning_rate": 1.392300148798784e-05, "loss": 4.0077, "step": 5929 }, { "epoch": 1.9754309985841592, "grad_norm": 0.55078125, "learning_rate": 1.3922583527292726e-05, "loss": 4.0627, "step": 5930 }, { "epoch": 1.975764137586408, "grad_norm": 0.55859375, "learning_rate": 1.3922165491788423e-05, "loss": 4.0759, "step": 5931 }, { "epoch": 1.9760972765886566, "grad_norm": 0.546875, "learning_rate": 1.39217473814798e-05, "loss": 4.0844, "step": 5932 }, { "epoch": 1.9764304155909054, "grad_norm": 0.54296875, "learning_rate": 1.3921329196371734e-05, "loss": 4.0687, "step": 5933 }, { "epoch": 1.9767635545931541, "grad_norm": 0.5546875, "learning_rate": 1.392091093646909e-05, "loss": 4.0346, "step": 5934 }, { "epoch": 1.9770966935954026, "grad_norm": 0.546875, "learning_rate": 1.3920492601776739e-05, "loss": 4.0932, "step": 5935 }, { "epoch": 1.9774298325976514, "grad_norm": 0.5546875, "learning_rate": 1.3920074192299557e-05, "loss": 4.0822, "step": 5936 }, { "epoch": 1.9777629715999, "grad_norm": 0.5546875, "learning_rate": 1.391965570804242e-05, "loss": 4.0411, "step": 5937 }, { "epoch": 1.9780961106021486, "grad_norm": 0.5703125, "learning_rate": 1.3919237149010194e-05, "loss": 4.0896, "step": 5938 }, { "epoch": 1.9784292496043974, "grad_norm": 0.57421875, "learning_rate": 1.3918818515207762e-05, "loss": 4.0216, "step": 5939 }, { "epoch": 1.978762388606646, "grad_norm": 0.5546875, "learning_rate": 1.3918399806639997e-05, "loss": 4.0282, "step": 5940 }, { "epoch": 1.9790955276088948, "grad_norm": 0.55859375, "learning_rate": 1.3917981023311776e-05, "loss": 4.0716, "step": 5941 }, { "epoch": 1.9794286666111436, "grad_norm": 0.53125, "learning_rate": 1.3917562165227977e-05, "loss": 4.0127, "step": 5942 }, { "epoch": 1.9797618056133923, "grad_norm": 0.58984375, "learning_rate": 1.3917143232393481e-05, "loss": 3.9914, "step": 5943 }, { "epoch": 1.9800949446156408, "grad_norm": 0.5625, "learning_rate": 1.3916724224813163e-05, "loss": 4.1246, "step": 5944 }, { "epoch": 1.9804280836178896, "grad_norm": 0.53515625, "learning_rate": 1.3916305142491909e-05, "loss": 4.0568, "step": 5945 }, { "epoch": 1.980761222620138, "grad_norm": 0.578125, "learning_rate": 1.3915885985434598e-05, "loss": 4.0352, "step": 5946 }, { "epoch": 1.9810943616223868, "grad_norm": 0.56640625, "learning_rate": 1.391546675364611e-05, "loss": 4.086, "step": 5947 }, { "epoch": 1.9814275006246356, "grad_norm": 0.58984375, "learning_rate": 1.391504744713133e-05, "loss": 4.0845, "step": 5948 }, { "epoch": 1.9817606396268843, "grad_norm": 0.546875, "learning_rate": 1.3914628065895145e-05, "loss": 4.0695, "step": 5949 }, { "epoch": 1.982093778629133, "grad_norm": 0.578125, "learning_rate": 1.3914208609942433e-05, "loss": 4.001, "step": 5950 }, { "epoch": 1.9824269176313818, "grad_norm": 0.56640625, "learning_rate": 1.3913789079278088e-05, "loss": 4.0541, "step": 5951 }, { "epoch": 1.9827600566336305, "grad_norm": 0.5703125, "learning_rate": 1.391336947390699e-05, "loss": 3.992, "step": 5952 }, { "epoch": 1.983093195635879, "grad_norm": 0.546875, "learning_rate": 1.3912949793834033e-05, "loss": 4.1186, "step": 5953 }, { "epoch": 1.9834263346381278, "grad_norm": 0.57421875, "learning_rate": 1.3912530039064097e-05, "loss": 4.097, "step": 5954 }, { "epoch": 1.9837594736403763, "grad_norm": 0.55078125, "learning_rate": 1.3912110209602075e-05, "loss": 4.051, "step": 5955 }, { "epoch": 1.984092612642625, "grad_norm": 0.5703125, "learning_rate": 1.391169030545286e-05, "loss": 4.0092, "step": 5956 }, { "epoch": 1.9844257516448738, "grad_norm": 0.546875, "learning_rate": 1.391127032662134e-05, "loss": 3.9961, "step": 5957 }, { "epoch": 1.9847588906471225, "grad_norm": 0.55859375, "learning_rate": 1.3910850273112407e-05, "loss": 4.1179, "step": 5958 }, { "epoch": 1.9850920296493713, "grad_norm": 0.5625, "learning_rate": 1.3910430144930954e-05, "loss": 4.071, "step": 5959 }, { "epoch": 1.98542516865162, "grad_norm": 0.56640625, "learning_rate": 1.3910009942081877e-05, "loss": 3.9987, "step": 5960 }, { "epoch": 1.9857583076538687, "grad_norm": 0.609375, "learning_rate": 1.3909589664570066e-05, "loss": 4.0303, "step": 5961 }, { "epoch": 1.9860914466561173, "grad_norm": 0.55859375, "learning_rate": 1.390916931240042e-05, "loss": 4.0831, "step": 5962 }, { "epoch": 1.986424585658366, "grad_norm": 0.55078125, "learning_rate": 1.3908748885577831e-05, "loss": 4.0331, "step": 5963 }, { "epoch": 1.9867577246606145, "grad_norm": 0.5625, "learning_rate": 1.3908328384107202e-05, "loss": 4.0283, "step": 5964 }, { "epoch": 1.9870908636628632, "grad_norm": 0.5390625, "learning_rate": 1.3907907807993428e-05, "loss": 4.0732, "step": 5965 }, { "epoch": 1.987424002665112, "grad_norm": 0.58203125, "learning_rate": 1.3907487157241404e-05, "loss": 4.0468, "step": 5966 }, { "epoch": 1.9877571416673607, "grad_norm": 0.5703125, "learning_rate": 1.3907066431856036e-05, "loss": 4.0704, "step": 5967 }, { "epoch": 1.9880902806696095, "grad_norm": 0.57421875, "learning_rate": 1.390664563184222e-05, "loss": 4.0855, "step": 5968 }, { "epoch": 1.9884234196718582, "grad_norm": 0.58984375, "learning_rate": 1.3906224757204862e-05, "loss": 4.0282, "step": 5969 }, { "epoch": 1.9887565586741067, "grad_norm": 0.55859375, "learning_rate": 1.390580380794886e-05, "loss": 3.9975, "step": 5970 }, { "epoch": 1.9890896976763555, "grad_norm": 0.55859375, "learning_rate": 1.3905382784079117e-05, "loss": 4.0653, "step": 5971 }, { "epoch": 1.9894228366786042, "grad_norm": 0.5390625, "learning_rate": 1.390496168560054e-05, "loss": 4.0681, "step": 5972 }, { "epoch": 1.9897559756808527, "grad_norm": 0.546875, "learning_rate": 1.3904540512518032e-05, "loss": 4.04, "step": 5973 }, { "epoch": 1.9900891146831015, "grad_norm": 0.54296875, "learning_rate": 1.39041192648365e-05, "loss": 4.0152, "step": 5974 }, { "epoch": 1.9904222536853502, "grad_norm": 0.55078125, "learning_rate": 1.390369794256085e-05, "loss": 4.1082, "step": 5975 }, { "epoch": 1.990755392687599, "grad_norm": 0.56640625, "learning_rate": 1.3903276545695988e-05, "loss": 4.0204, "step": 5976 }, { "epoch": 1.9910885316898477, "grad_norm": 0.546875, "learning_rate": 1.3902855074246823e-05, "loss": 3.9985, "step": 5977 }, { "epoch": 1.9914216706920964, "grad_norm": 0.56640625, "learning_rate": 1.3902433528218267e-05, "loss": 4.0337, "step": 5978 }, { "epoch": 1.991754809694345, "grad_norm": 0.5859375, "learning_rate": 1.3902011907615227e-05, "loss": 3.9914, "step": 5979 }, { "epoch": 1.9920879486965937, "grad_norm": 0.56640625, "learning_rate": 1.3901590212442616e-05, "loss": 4.0391, "step": 5980 }, { "epoch": 1.9924210876988422, "grad_norm": 0.546875, "learning_rate": 1.3901168442705345e-05, "loss": 4.0863, "step": 5981 }, { "epoch": 1.992754226701091, "grad_norm": 0.55078125, "learning_rate": 1.3900746598408325e-05, "loss": 4.0027, "step": 5982 }, { "epoch": 1.9930873657033397, "grad_norm": 0.5625, "learning_rate": 1.3900324679556472e-05, "loss": 4.0352, "step": 5983 }, { "epoch": 1.9934205047055884, "grad_norm": 0.5546875, "learning_rate": 1.38999026861547e-05, "loss": 4.0501, "step": 5984 }, { "epoch": 1.9937536437078371, "grad_norm": 0.5703125, "learning_rate": 1.3899480618207923e-05, "loss": 4.1427, "step": 5985 }, { "epoch": 1.9940867827100859, "grad_norm": 0.54296875, "learning_rate": 1.389905847572106e-05, "loss": 4.0019, "step": 5986 }, { "epoch": 1.9944199217123346, "grad_norm": 0.5546875, "learning_rate": 1.3898636258699022e-05, "loss": 4.1403, "step": 5987 }, { "epoch": 1.9947530607145831, "grad_norm": 0.58984375, "learning_rate": 1.3898213967146736e-05, "loss": 4.0548, "step": 5988 }, { "epoch": 1.9950861997168319, "grad_norm": 0.5625, "learning_rate": 1.3897791601069112e-05, "loss": 4.1837, "step": 5989 }, { "epoch": 1.9954193387190804, "grad_norm": 0.5546875, "learning_rate": 1.3897369160471073e-05, "loss": 4.021, "step": 5990 }, { "epoch": 1.9957524777213291, "grad_norm": 0.55859375, "learning_rate": 1.3896946645357545e-05, "loss": 4.0976, "step": 5991 }, { "epoch": 1.9960856167235779, "grad_norm": 0.5390625, "learning_rate": 1.389652405573344e-05, "loss": 4.0688, "step": 5992 }, { "epoch": 1.9964187557258266, "grad_norm": 0.5546875, "learning_rate": 1.3896101391603686e-05, "loss": 4.0393, "step": 5993 }, { "epoch": 1.9967518947280753, "grad_norm": 0.55078125, "learning_rate": 1.3895678652973204e-05, "loss": 4.0981, "step": 5994 }, { "epoch": 1.997085033730324, "grad_norm": 0.6015625, "learning_rate": 1.389525583984692e-05, "loss": 4.0411, "step": 5995 }, { "epoch": 1.9974181727325728, "grad_norm": 0.578125, "learning_rate": 1.3894832952229756e-05, "loss": 4.1317, "step": 5996 }, { "epoch": 1.9977513117348213, "grad_norm": 0.6015625, "learning_rate": 1.389440999012664e-05, "loss": 3.9895, "step": 5997 }, { "epoch": 1.99808445073707, "grad_norm": 0.54296875, "learning_rate": 1.3893986953542498e-05, "loss": 4.1092, "step": 5998 }, { "epoch": 1.9984175897393186, "grad_norm": 0.57421875, "learning_rate": 1.3893563842482257e-05, "loss": 4.0563, "step": 5999 }, { "epoch": 1.9987507287415673, "grad_norm": 0.56640625, "learning_rate": 1.3893140656950846e-05, "loss": 4.0496, "step": 6000 }, { "epoch": 1.999083867743816, "grad_norm": 0.53515625, "learning_rate": 1.3892717396953193e-05, "loss": 4.1292, "step": 6001 }, { "epoch": 1.9994170067460648, "grad_norm": 0.546875, "learning_rate": 1.389229406249423e-05, "loss": 4.0536, "step": 6002 }, { "epoch": 1.9997501457483136, "grad_norm": 0.56640625, "learning_rate": 1.3891870653578885e-05, "loss": 4.086, "step": 6003 }, { "epoch": 2.0, "grad_norm": 0.66796875, "learning_rate": 1.3891447170212095e-05, "loss": 4.0345, "step": 6004 }, { "epoch": 2.0003331390022487, "grad_norm": 0.5234375, "learning_rate": 1.3891023612398788e-05, "loss": 4.0591, "step": 6005 }, { "epoch": 2.0006662780044975, "grad_norm": 0.55078125, "learning_rate": 1.3890599980143896e-05, "loss": 4.06, "step": 6006 }, { "epoch": 2.000999417006746, "grad_norm": 0.51171875, "learning_rate": 1.3890176273452362e-05, "loss": 4.0176, "step": 6007 }, { "epoch": 2.001332556008995, "grad_norm": 0.55078125, "learning_rate": 1.3889752492329111e-05, "loss": 4.0637, "step": 6008 }, { "epoch": 2.0016656950112433, "grad_norm": 0.56640625, "learning_rate": 1.3889328636779084e-05, "loss": 4.0292, "step": 6009 }, { "epoch": 2.001998834013492, "grad_norm": 0.5546875, "learning_rate": 1.388890470680722e-05, "loss": 4.0101, "step": 6010 }, { "epoch": 2.0023319730157407, "grad_norm": 0.55859375, "learning_rate": 1.3888480702418451e-05, "loss": 4.1315, "step": 6011 }, { "epoch": 2.0026651120179895, "grad_norm": 0.55859375, "learning_rate": 1.388805662361772e-05, "loss": 4.0506, "step": 6012 }, { "epoch": 2.002998251020238, "grad_norm": 0.55859375, "learning_rate": 1.3887632470409967e-05, "loss": 4.0824, "step": 6013 }, { "epoch": 2.003331390022487, "grad_norm": 0.53125, "learning_rate": 1.3887208242800134e-05, "loss": 4.0673, "step": 6014 }, { "epoch": 2.0036645290247357, "grad_norm": 0.55859375, "learning_rate": 1.3886783940793156e-05, "loss": 4.0021, "step": 6015 }, { "epoch": 2.0039976680269844, "grad_norm": 0.58984375, "learning_rate": 1.388635956439398e-05, "loss": 4.0424, "step": 6016 }, { "epoch": 2.004330807029233, "grad_norm": 0.57421875, "learning_rate": 1.3885935113607548e-05, "loss": 4.0275, "step": 6017 }, { "epoch": 2.0046639460314815, "grad_norm": 0.53125, "learning_rate": 1.3885510588438803e-05, "loss": 4.0486, "step": 6018 }, { "epoch": 2.00499708503373, "grad_norm": 0.56640625, "learning_rate": 1.3885085988892691e-05, "loss": 4.0364, "step": 6019 }, { "epoch": 2.005330224035979, "grad_norm": 0.609375, "learning_rate": 1.388466131497416e-05, "loss": 4.0441, "step": 6020 }, { "epoch": 2.0056633630382277, "grad_norm": 0.54296875, "learning_rate": 1.3884236566688149e-05, "loss": 4.0519, "step": 6021 }, { "epoch": 2.0059965020404764, "grad_norm": 0.546875, "learning_rate": 1.3883811744039613e-05, "loss": 3.9804, "step": 6022 }, { "epoch": 2.006329641042725, "grad_norm": 0.55078125, "learning_rate": 1.3883386847033496e-05, "loss": 3.9975, "step": 6023 }, { "epoch": 2.006662780044974, "grad_norm": 0.5703125, "learning_rate": 1.388296187567475e-05, "loss": 4.043, "step": 6024 }, { "epoch": 2.0069959190472226, "grad_norm": 0.58203125, "learning_rate": 1.3882536829968323e-05, "loss": 4.0205, "step": 6025 }, { "epoch": 2.007329058049471, "grad_norm": 0.546875, "learning_rate": 1.3882111709919168e-05, "loss": 4.0685, "step": 6026 }, { "epoch": 2.0076621970517197, "grad_norm": 0.546875, "learning_rate": 1.3881686515532232e-05, "loss": 4.0912, "step": 6027 }, { "epoch": 2.0079953360539684, "grad_norm": 0.5625, "learning_rate": 1.3881261246812474e-05, "loss": 3.9909, "step": 6028 }, { "epoch": 2.008328475056217, "grad_norm": 0.57421875, "learning_rate": 1.3880835903764844e-05, "loss": 4.0434, "step": 6029 }, { "epoch": 2.008661614058466, "grad_norm": 0.5703125, "learning_rate": 1.3880410486394297e-05, "loss": 4.019, "step": 6030 }, { "epoch": 2.0089947530607146, "grad_norm": 0.54296875, "learning_rate": 1.3879984994705783e-05, "loss": 4.0837, "step": 6031 }, { "epoch": 2.0093278920629634, "grad_norm": 0.55859375, "learning_rate": 1.3879559428704269e-05, "loss": 4.0408, "step": 6032 }, { "epoch": 2.009661031065212, "grad_norm": 0.578125, "learning_rate": 1.3879133788394702e-05, "loss": 4.0465, "step": 6033 }, { "epoch": 2.009994170067461, "grad_norm": 0.55859375, "learning_rate": 1.3878708073782045e-05, "loss": 4.0624, "step": 6034 }, { "epoch": 2.010327309069709, "grad_norm": 0.578125, "learning_rate": 1.3878282284871254e-05, "loss": 4.0756, "step": 6035 }, { "epoch": 2.010660448071958, "grad_norm": 0.5390625, "learning_rate": 1.3877856421667292e-05, "loss": 4.0245, "step": 6036 }, { "epoch": 2.0109935870742066, "grad_norm": 0.56640625, "learning_rate": 1.3877430484175114e-05, "loss": 4.1018, "step": 6037 }, { "epoch": 2.0113267260764554, "grad_norm": 0.58203125, "learning_rate": 1.3877004472399684e-05, "loss": 4.027, "step": 6038 }, { "epoch": 2.011659865078704, "grad_norm": 0.55859375, "learning_rate": 1.3876578386345968e-05, "loss": 3.9976, "step": 6039 }, { "epoch": 2.011993004080953, "grad_norm": 0.56640625, "learning_rate": 1.3876152226018924e-05, "loss": 4.0624, "step": 6040 }, { "epoch": 2.0123261430832016, "grad_norm": 0.55859375, "learning_rate": 1.3875725991423518e-05, "loss": 4.0916, "step": 6041 }, { "epoch": 2.0126592820854503, "grad_norm": 0.55078125, "learning_rate": 1.3875299682564712e-05, "loss": 3.9995, "step": 6042 }, { "epoch": 2.012992421087699, "grad_norm": 0.54296875, "learning_rate": 1.3874873299447475e-05, "loss": 4.0378, "step": 6043 }, { "epoch": 2.0133255600899473, "grad_norm": 0.53125, "learning_rate": 1.387444684207677e-05, "loss": 4.0339, "step": 6044 }, { "epoch": 2.013658699092196, "grad_norm": 0.5625, "learning_rate": 1.387402031045757e-05, "loss": 4.0988, "step": 6045 }, { "epoch": 2.013991838094445, "grad_norm": 0.60546875, "learning_rate": 1.3873593704594838e-05, "loss": 3.9581, "step": 6046 }, { "epoch": 2.0143249770966936, "grad_norm": 0.57421875, "learning_rate": 1.3873167024493544e-05, "loss": 3.977, "step": 6047 }, { "epoch": 2.0146581160989423, "grad_norm": 0.54296875, "learning_rate": 1.3872740270158658e-05, "loss": 4.114, "step": 6048 }, { "epoch": 2.014991255101191, "grad_norm": 0.55078125, "learning_rate": 1.3872313441595152e-05, "loss": 4.0545, "step": 6049 }, { "epoch": 2.0153243941034398, "grad_norm": 0.58203125, "learning_rate": 1.3871886538807997e-05, "loss": 4.0472, "step": 6050 }, { "epoch": 2.0156575331056885, "grad_norm": 0.57421875, "learning_rate": 1.3871459561802165e-05, "loss": 4.0742, "step": 6051 }, { "epoch": 2.0159906721079373, "grad_norm": 0.5625, "learning_rate": 1.3871032510582627e-05, "loss": 4.0782, "step": 6052 }, { "epoch": 2.0163238111101855, "grad_norm": 0.6015625, "learning_rate": 1.3870605385154363e-05, "loss": 4.0398, "step": 6053 }, { "epoch": 2.0166569501124343, "grad_norm": 0.5546875, "learning_rate": 1.3870178185522345e-05, "loss": 4.0437, "step": 6054 }, { "epoch": 2.016990089114683, "grad_norm": 0.52734375, "learning_rate": 1.3869750911691548e-05, "loss": 4.0455, "step": 6055 }, { "epoch": 2.0173232281169318, "grad_norm": 0.53515625, "learning_rate": 1.3869323563666949e-05, "loss": 4.0347, "step": 6056 }, { "epoch": 2.0176563671191805, "grad_norm": 0.55859375, "learning_rate": 1.386889614145353e-05, "loss": 4.0203, "step": 6057 }, { "epoch": 2.0179895061214292, "grad_norm": 0.5625, "learning_rate": 1.3868468645056262e-05, "loss": 4.0022, "step": 6058 }, { "epoch": 2.018322645123678, "grad_norm": 0.5625, "learning_rate": 1.386804107448013e-05, "loss": 4.0434, "step": 6059 }, { "epoch": 2.0186557841259267, "grad_norm": 0.56640625, "learning_rate": 1.386761342973011e-05, "loss": 4.0774, "step": 6060 }, { "epoch": 2.018988923128175, "grad_norm": 0.578125, "learning_rate": 1.386718571081119e-05, "loss": 4.0985, "step": 6061 }, { "epoch": 2.0193220621304238, "grad_norm": 0.546875, "learning_rate": 1.3866757917728346e-05, "loss": 3.9817, "step": 6062 }, { "epoch": 2.0196552011326725, "grad_norm": 0.5546875, "learning_rate": 1.3866330050486564e-05, "loss": 4.0495, "step": 6063 }, { "epoch": 2.0199883401349212, "grad_norm": 0.546875, "learning_rate": 1.3865902109090823e-05, "loss": 4.1691, "step": 6064 }, { "epoch": 2.02032147913717, "grad_norm": 0.55078125, "learning_rate": 1.3865474093546114e-05, "loss": 4.0503, "step": 6065 }, { "epoch": 2.0206546181394187, "grad_norm": 0.55078125, "learning_rate": 1.3865046003857418e-05, "loss": 3.9906, "step": 6066 }, { "epoch": 2.0209877571416675, "grad_norm": 0.55859375, "learning_rate": 1.3864617840029724e-05, "loss": 4.0406, "step": 6067 }, { "epoch": 2.021320896143916, "grad_norm": 0.55078125, "learning_rate": 1.3864189602068015e-05, "loss": 3.9984, "step": 6068 }, { "epoch": 2.021654035146165, "grad_norm": 0.5625, "learning_rate": 1.3863761289977284e-05, "loss": 4.0241, "step": 6069 }, { "epoch": 2.0219871741484132, "grad_norm": 0.53515625, "learning_rate": 1.3863332903762517e-05, "loss": 4.0016, "step": 6070 }, { "epoch": 2.022320313150662, "grad_norm": 0.515625, "learning_rate": 1.3862904443428705e-05, "loss": 4.0495, "step": 6071 }, { "epoch": 2.0226534521529107, "grad_norm": 0.56640625, "learning_rate": 1.3862475908980839e-05, "loss": 4.0511, "step": 6072 }, { "epoch": 2.0229865911551594, "grad_norm": 0.5390625, "learning_rate": 1.386204730042391e-05, "loss": 4.001, "step": 6073 }, { "epoch": 2.023319730157408, "grad_norm": 0.5859375, "learning_rate": 1.3861618617762908e-05, "loss": 4.0447, "step": 6074 }, { "epoch": 2.023652869159657, "grad_norm": 0.5625, "learning_rate": 1.386118986100283e-05, "loss": 4.0138, "step": 6075 }, { "epoch": 2.0239860081619057, "grad_norm": 0.58203125, "learning_rate": 1.3860761030148668e-05, "loss": 4.0745, "step": 6076 }, { "epoch": 2.0243191471641544, "grad_norm": 0.5390625, "learning_rate": 1.3860332125205418e-05, "loss": 4.0818, "step": 6077 }, { "epoch": 2.024652286166403, "grad_norm": 0.55078125, "learning_rate": 1.3859903146178075e-05, "loss": 3.9913, "step": 6078 }, { "epoch": 2.0249854251686514, "grad_norm": 0.5546875, "learning_rate": 1.3859474093071635e-05, "loss": 4.0358, "step": 6079 }, { "epoch": 2.0253185641709, "grad_norm": 0.55859375, "learning_rate": 1.3859044965891097e-05, "loss": 4.0716, "step": 6080 }, { "epoch": 2.025651703173149, "grad_norm": 0.55078125, "learning_rate": 1.3858615764641459e-05, "loss": 4.0363, "step": 6081 }, { "epoch": 2.0259848421753976, "grad_norm": 0.5625, "learning_rate": 1.385818648932772e-05, "loss": 3.9873, "step": 6082 }, { "epoch": 2.0263179811776464, "grad_norm": 0.58984375, "learning_rate": 1.3857757139954882e-05, "loss": 4.0307, "step": 6083 }, { "epoch": 2.026651120179895, "grad_norm": 0.55859375, "learning_rate": 1.3857327716527941e-05, "loss": 4.0008, "step": 6084 }, { "epoch": 2.026984259182144, "grad_norm": 0.53515625, "learning_rate": 1.3856898219051903e-05, "loss": 4.0589, "step": 6085 }, { "epoch": 2.0273173981843926, "grad_norm": 0.56640625, "learning_rate": 1.3856468647531771e-05, "loss": 3.9606, "step": 6086 }, { "epoch": 2.0276505371866413, "grad_norm": 0.578125, "learning_rate": 1.3856039001972546e-05, "loss": 3.9665, "step": 6087 }, { "epoch": 2.0279836761888896, "grad_norm": 0.5859375, "learning_rate": 1.3855609282379236e-05, "loss": 4.009, "step": 6088 }, { "epoch": 2.0283168151911384, "grad_norm": 0.58203125, "learning_rate": 1.385517948875684e-05, "loss": 4.0538, "step": 6089 }, { "epoch": 2.028649954193387, "grad_norm": 0.60546875, "learning_rate": 1.3854749621110372e-05, "loss": 4.0088, "step": 6090 }, { "epoch": 2.028983093195636, "grad_norm": 0.5625, "learning_rate": 1.3854319679444832e-05, "loss": 4.0263, "step": 6091 }, { "epoch": 2.0293162321978846, "grad_norm": 0.5546875, "learning_rate": 1.3853889663765234e-05, "loss": 4.0907, "step": 6092 }, { "epoch": 2.0296493712001333, "grad_norm": 0.55859375, "learning_rate": 1.3853459574076585e-05, "loss": 4.038, "step": 6093 }, { "epoch": 2.029982510202382, "grad_norm": 0.5546875, "learning_rate": 1.385302941038389e-05, "loss": 4.0372, "step": 6094 }, { "epoch": 2.030315649204631, "grad_norm": 0.56640625, "learning_rate": 1.3852599172692165e-05, "loss": 4.0209, "step": 6095 }, { "epoch": 2.030648788206879, "grad_norm": 0.5390625, "learning_rate": 1.3852168861006418e-05, "loss": 4.0975, "step": 6096 }, { "epoch": 2.030981927209128, "grad_norm": 0.52734375, "learning_rate": 1.3851738475331663e-05, "loss": 4.1164, "step": 6097 }, { "epoch": 2.0313150662113766, "grad_norm": 0.5703125, "learning_rate": 1.3851308015672914e-05, "loss": 4.0413, "step": 6098 }, { "epoch": 2.0316482052136253, "grad_norm": 0.5703125, "learning_rate": 1.3850877482035183e-05, "loss": 4.0079, "step": 6099 }, { "epoch": 2.031981344215874, "grad_norm": 0.52734375, "learning_rate": 1.3850446874423485e-05, "loss": 4.1131, "step": 6100 }, { "epoch": 2.032314483218123, "grad_norm": 0.5546875, "learning_rate": 1.3850016192842836e-05, "loss": 4.0594, "step": 6101 }, { "epoch": 2.0326476222203715, "grad_norm": 0.54296875, "learning_rate": 1.3849585437298251e-05, "loss": 4.0478, "step": 6102 }, { "epoch": 2.0329807612226203, "grad_norm": 0.546875, "learning_rate": 1.3849154607794751e-05, "loss": 4.0553, "step": 6103 }, { "epoch": 2.033313900224869, "grad_norm": 0.546875, "learning_rate": 1.3848723704337352e-05, "loss": 4.0567, "step": 6104 }, { "epoch": 2.0336470392271173, "grad_norm": 0.57421875, "learning_rate": 1.3848292726931072e-05, "loss": 4.0156, "step": 6105 }, { "epoch": 2.033980178229366, "grad_norm": 0.55078125, "learning_rate": 1.3847861675580932e-05, "loss": 4.0333, "step": 6106 }, { "epoch": 2.034313317231615, "grad_norm": 0.5546875, "learning_rate": 1.3847430550291953e-05, "loss": 4.0344, "step": 6107 }, { "epoch": 2.0346464562338635, "grad_norm": 0.55078125, "learning_rate": 1.3846999351069158e-05, "loss": 4.0348, "step": 6108 }, { "epoch": 2.0349795952361123, "grad_norm": 0.5546875, "learning_rate": 1.3846568077917568e-05, "loss": 4.0327, "step": 6109 }, { "epoch": 2.035312734238361, "grad_norm": 0.55859375, "learning_rate": 1.3846136730842204e-05, "loss": 4.1142, "step": 6110 }, { "epoch": 2.0356458732406097, "grad_norm": 0.5625, "learning_rate": 1.3845705309848095e-05, "loss": 4.0259, "step": 6111 }, { "epoch": 2.0359790122428585, "grad_norm": 0.55078125, "learning_rate": 1.3845273814940262e-05, "loss": 4.1145, "step": 6112 }, { "epoch": 2.0363121512451072, "grad_norm": 0.5625, "learning_rate": 1.3844842246123736e-05, "loss": 4.0134, "step": 6113 }, { "epoch": 2.0366452902473555, "grad_norm": 0.58984375, "learning_rate": 1.3844410603403539e-05, "loss": 4.0453, "step": 6114 }, { "epoch": 2.0369784292496043, "grad_norm": 0.54296875, "learning_rate": 1.38439788867847e-05, "loss": 4.0243, "step": 6115 }, { "epoch": 2.037311568251853, "grad_norm": 0.54296875, "learning_rate": 1.3843547096272248e-05, "loss": 4.0234, "step": 6116 }, { "epoch": 2.0376447072541017, "grad_norm": 0.5625, "learning_rate": 1.3843115231871213e-05, "loss": 4.0483, "step": 6117 }, { "epoch": 2.0379778462563505, "grad_norm": 0.56640625, "learning_rate": 1.3842683293586624e-05, "loss": 4.0026, "step": 6118 }, { "epoch": 2.038310985258599, "grad_norm": 0.578125, "learning_rate": 1.3842251281423514e-05, "loss": 3.9684, "step": 6119 }, { "epoch": 2.038644124260848, "grad_norm": 0.5703125, "learning_rate": 1.3841819195386913e-05, "loss": 4.0651, "step": 6120 }, { "epoch": 2.0389772632630967, "grad_norm": 0.5625, "learning_rate": 1.3841387035481857e-05, "loss": 4.0111, "step": 6121 }, { "epoch": 2.039310402265345, "grad_norm": 0.546875, "learning_rate": 1.3840954801713375e-05, "loss": 3.9986, "step": 6122 }, { "epoch": 2.0396435412675937, "grad_norm": 0.55859375, "learning_rate": 1.3840522494086504e-05, "loss": 3.9896, "step": 6123 }, { "epoch": 2.0399766802698425, "grad_norm": 0.5390625, "learning_rate": 1.384009011260628e-05, "loss": 4.0968, "step": 6124 }, { "epoch": 2.040309819272091, "grad_norm": 0.55078125, "learning_rate": 1.3839657657277738e-05, "loss": 4.0369, "step": 6125 }, { "epoch": 2.04064295827434, "grad_norm": 0.5703125, "learning_rate": 1.3839225128105918e-05, "loss": 4.009, "step": 6126 }, { "epoch": 2.0409760972765887, "grad_norm": 0.5625, "learning_rate": 1.3838792525095855e-05, "loss": 3.9623, "step": 6127 }, { "epoch": 2.0413092362788374, "grad_norm": 0.55859375, "learning_rate": 1.3838359848252587e-05, "loss": 4.0763, "step": 6128 }, { "epoch": 2.041642375281086, "grad_norm": 0.55859375, "learning_rate": 1.3837927097581159e-05, "loss": 4.0125, "step": 6129 }, { "epoch": 2.041975514283335, "grad_norm": 0.5703125, "learning_rate": 1.3837494273086607e-05, "loss": 4.0853, "step": 6130 }, { "epoch": 2.042308653285583, "grad_norm": 0.54296875, "learning_rate": 1.3837061374773971e-05, "loss": 3.9757, "step": 6131 }, { "epoch": 2.042641792287832, "grad_norm": 0.56640625, "learning_rate": 1.38366284026483e-05, "loss": 4.0809, "step": 6132 }, { "epoch": 2.0429749312900807, "grad_norm": 0.57421875, "learning_rate": 1.3836195356714632e-05, "loss": 4.0729, "step": 6133 }, { "epoch": 2.0433080702923294, "grad_norm": 0.57421875, "learning_rate": 1.3835762236978011e-05, "loss": 4.0823, "step": 6134 }, { "epoch": 2.043641209294578, "grad_norm": 0.58203125, "learning_rate": 1.3835329043443483e-05, "loss": 4.0258, "step": 6135 }, { "epoch": 2.043974348296827, "grad_norm": 0.53125, "learning_rate": 1.3834895776116096e-05, "loss": 4.0994, "step": 6136 }, { "epoch": 2.0443074872990756, "grad_norm": 0.5546875, "learning_rate": 1.3834462435000894e-05, "loss": 4.0696, "step": 6137 }, { "epoch": 2.0446406263013244, "grad_norm": 0.5703125, "learning_rate": 1.3834029020102925e-05, "loss": 4.0958, "step": 6138 }, { "epoch": 2.044973765303573, "grad_norm": 0.56640625, "learning_rate": 1.3833595531427238e-05, "loss": 4.0686, "step": 6139 }, { "epoch": 2.0453069043058214, "grad_norm": 0.5390625, "learning_rate": 1.3833161968978878e-05, "loss": 4.0725, "step": 6140 }, { "epoch": 2.04564004330807, "grad_norm": 0.55078125, "learning_rate": 1.3832728332762903e-05, "loss": 4.0018, "step": 6141 }, { "epoch": 2.045973182310319, "grad_norm": 0.56640625, "learning_rate": 1.3832294622784357e-05, "loss": 4.0708, "step": 6142 }, { "epoch": 2.0463063213125676, "grad_norm": 0.5625, "learning_rate": 1.3831860839048296e-05, "loss": 3.9708, "step": 6143 }, { "epoch": 2.0466394603148164, "grad_norm": 0.5703125, "learning_rate": 1.3831426981559769e-05, "loss": 4.0361, "step": 6144 }, { "epoch": 2.046972599317065, "grad_norm": 0.5625, "learning_rate": 1.3830993050323834e-05, "loss": 4.0331, "step": 6145 }, { "epoch": 2.047305738319314, "grad_norm": 0.54296875, "learning_rate": 1.383055904534554e-05, "loss": 4.0615, "step": 6146 }, { "epoch": 2.0476388773215626, "grad_norm": 0.5703125, "learning_rate": 1.3830124966629946e-05, "loss": 4.0434, "step": 6147 }, { "epoch": 2.0479720163238113, "grad_norm": 0.58203125, "learning_rate": 1.382969081418211e-05, "loss": 4.0159, "step": 6148 }, { "epoch": 2.0483051553260596, "grad_norm": 0.5390625, "learning_rate": 1.3829256588007085e-05, "loss": 4.0727, "step": 6149 }, { "epoch": 2.0486382943283084, "grad_norm": 0.578125, "learning_rate": 1.3828822288109928e-05, "loss": 4.0973, "step": 6150 }, { "epoch": 2.048971433330557, "grad_norm": 0.53125, "learning_rate": 1.38283879144957e-05, "loss": 4.1105, "step": 6151 }, { "epoch": 2.049304572332806, "grad_norm": 0.5703125, "learning_rate": 1.382795346716946e-05, "loss": 4.0652, "step": 6152 }, { "epoch": 2.0496377113350546, "grad_norm": 0.5703125, "learning_rate": 1.382751894613627e-05, "loss": 4.0778, "step": 6153 }, { "epoch": 2.0499708503373033, "grad_norm": 0.57421875, "learning_rate": 1.3827084351401188e-05, "loss": 4.0657, "step": 6154 }, { "epoch": 2.050303989339552, "grad_norm": 0.56640625, "learning_rate": 1.3826649682969279e-05, "loss": 3.9935, "step": 6155 }, { "epoch": 2.050637128341801, "grad_norm": 0.58203125, "learning_rate": 1.3826214940845603e-05, "loss": 4.1174, "step": 6156 }, { "epoch": 2.0509702673440495, "grad_norm": 0.5859375, "learning_rate": 1.3825780125035227e-05, "loss": 4.0156, "step": 6157 }, { "epoch": 2.051303406346298, "grad_norm": 0.5625, "learning_rate": 1.3825345235543213e-05, "loss": 4.0667, "step": 6158 }, { "epoch": 2.0516365453485466, "grad_norm": 0.53515625, "learning_rate": 1.3824910272374628e-05, "loss": 4.0823, "step": 6159 }, { "epoch": 2.0519696843507953, "grad_norm": 0.55078125, "learning_rate": 1.3824475235534538e-05, "loss": 4.0769, "step": 6160 }, { "epoch": 2.052302823353044, "grad_norm": 0.58984375, "learning_rate": 1.3824040125028012e-05, "loss": 4.0753, "step": 6161 }, { "epoch": 2.0526359623552928, "grad_norm": 0.5703125, "learning_rate": 1.3823604940860115e-05, "loss": 3.957, "step": 6162 }, { "epoch": 2.0529691013575415, "grad_norm": 0.5546875, "learning_rate": 1.3823169683035917e-05, "loss": 4.0758, "step": 6163 }, { "epoch": 2.0533022403597903, "grad_norm": 0.5390625, "learning_rate": 1.382273435156049e-05, "loss": 4.0378, "step": 6164 }, { "epoch": 2.053635379362039, "grad_norm": 0.55078125, "learning_rate": 1.38222989464389e-05, "loss": 4.0199, "step": 6165 }, { "epoch": 2.0539685183642873, "grad_norm": 0.6015625, "learning_rate": 1.3821863467676224e-05, "loss": 3.9889, "step": 6166 }, { "epoch": 2.054301657366536, "grad_norm": 0.5546875, "learning_rate": 1.382142791527753e-05, "loss": 4.0164, "step": 6167 }, { "epoch": 2.0546347963687848, "grad_norm": 0.5703125, "learning_rate": 1.3820992289247894e-05, "loss": 4.009, "step": 6168 }, { "epoch": 2.0549679353710335, "grad_norm": 0.546875, "learning_rate": 1.3820556589592388e-05, "loss": 4.0294, "step": 6169 }, { "epoch": 2.0553010743732822, "grad_norm": 0.58203125, "learning_rate": 1.3820120816316088e-05, "loss": 4.0266, "step": 6170 }, { "epoch": 2.055634213375531, "grad_norm": 0.53515625, "learning_rate": 1.381968496942407e-05, "loss": 4.1013, "step": 6171 }, { "epoch": 2.0559673523777797, "grad_norm": 0.55078125, "learning_rate": 1.3819249048921412e-05, "loss": 4.0684, "step": 6172 }, { "epoch": 2.0563004913800285, "grad_norm": 0.546875, "learning_rate": 1.381881305481319e-05, "loss": 4.0318, "step": 6173 }, { "epoch": 2.056633630382277, "grad_norm": 0.578125, "learning_rate": 1.3818376987104481e-05, "loss": 4.0174, "step": 6174 }, { "epoch": 2.0569667693845255, "grad_norm": 0.55859375, "learning_rate": 1.3817940845800365e-05, "loss": 4.0498, "step": 6175 }, { "epoch": 2.0572999083867742, "grad_norm": 0.55078125, "learning_rate": 1.3817504630905925e-05, "loss": 4.0394, "step": 6176 }, { "epoch": 2.057633047389023, "grad_norm": 0.5625, "learning_rate": 1.381706834242624e-05, "loss": 4.0561, "step": 6177 }, { "epoch": 2.0579661863912717, "grad_norm": 0.5703125, "learning_rate": 1.3816631980366388e-05, "loss": 4.0179, "step": 6178 }, { "epoch": 2.0582993253935205, "grad_norm": 0.5859375, "learning_rate": 1.3816195544731459e-05, "loss": 4.0188, "step": 6179 }, { "epoch": 2.058632464395769, "grad_norm": 0.58984375, "learning_rate": 1.3815759035526529e-05, "loss": 3.9783, "step": 6180 }, { "epoch": 2.058965603398018, "grad_norm": 0.5546875, "learning_rate": 1.3815322452756688e-05, "loss": 4.0887, "step": 6181 }, { "epoch": 2.0592987424002667, "grad_norm": 0.5859375, "learning_rate": 1.3814885796427021e-05, "loss": 4.0051, "step": 6182 }, { "epoch": 2.0596318814025154, "grad_norm": 0.5703125, "learning_rate": 1.381444906654261e-05, "loss": 4.0004, "step": 6183 }, { "epoch": 2.0599650204047637, "grad_norm": 0.5859375, "learning_rate": 1.3814012263108547e-05, "loss": 4.0363, "step": 6184 }, { "epoch": 2.0602981594070124, "grad_norm": 0.55859375, "learning_rate": 1.3813575386129916e-05, "loss": 4.0502, "step": 6185 }, { "epoch": 2.060631298409261, "grad_norm": 0.56640625, "learning_rate": 1.3813138435611807e-05, "loss": 4.0608, "step": 6186 }, { "epoch": 2.06096443741151, "grad_norm": 0.5625, "learning_rate": 1.381270141155931e-05, "loss": 3.9908, "step": 6187 }, { "epoch": 2.0612975764137587, "grad_norm": 0.59375, "learning_rate": 1.3812264313977514e-05, "loss": 4.0876, "step": 6188 }, { "epoch": 2.0616307154160074, "grad_norm": 0.58203125, "learning_rate": 1.381182714287151e-05, "loss": 4.0813, "step": 6189 }, { "epoch": 2.061963854418256, "grad_norm": 0.56640625, "learning_rate": 1.3811389898246393e-05, "loss": 4.0251, "step": 6190 }, { "epoch": 2.062296993420505, "grad_norm": 0.578125, "learning_rate": 1.3810952580107252e-05, "loss": 4.0302, "step": 6191 }, { "epoch": 2.062630132422753, "grad_norm": 0.55078125, "learning_rate": 1.3810515188459185e-05, "loss": 4.0796, "step": 6192 }, { "epoch": 2.062963271425002, "grad_norm": 0.5703125, "learning_rate": 1.3810077723307286e-05, "loss": 4.0188, "step": 6193 }, { "epoch": 2.0632964104272506, "grad_norm": 0.58203125, "learning_rate": 1.3809640184656645e-05, "loss": 4.0286, "step": 6194 }, { "epoch": 2.0636295494294994, "grad_norm": 0.546875, "learning_rate": 1.3809202572512366e-05, "loss": 4.0878, "step": 6195 }, { "epoch": 2.063962688431748, "grad_norm": 0.57421875, "learning_rate": 1.3808764886879543e-05, "loss": 4.1049, "step": 6196 }, { "epoch": 2.064295827433997, "grad_norm": 0.56640625, "learning_rate": 1.3808327127763272e-05, "loss": 4.0085, "step": 6197 }, { "epoch": 2.0646289664362456, "grad_norm": 0.58984375, "learning_rate": 1.3807889295168653e-05, "loss": 4.0939, "step": 6198 }, { "epoch": 2.0649621054384943, "grad_norm": 0.58203125, "learning_rate": 1.3807451389100787e-05, "loss": 4.0793, "step": 6199 }, { "epoch": 2.065295244440743, "grad_norm": 0.5703125, "learning_rate": 1.3807013409564776e-05, "loss": 4.0398, "step": 6200 }, { "epoch": 2.0656283834429914, "grad_norm": 0.58203125, "learning_rate": 1.380657535656572e-05, "loss": 4.0256, "step": 6201 }, { "epoch": 2.06596152244524, "grad_norm": 0.55859375, "learning_rate": 1.3806137230108719e-05, "loss": 4.0654, "step": 6202 }, { "epoch": 2.066294661447489, "grad_norm": 0.5859375, "learning_rate": 1.3805699030198877e-05, "loss": 4.0114, "step": 6203 }, { "epoch": 2.0666278004497376, "grad_norm": 0.5625, "learning_rate": 1.3805260756841303e-05, "loss": 4.0509, "step": 6204 }, { "epoch": 2.0669609394519863, "grad_norm": 0.5859375, "learning_rate": 1.3804822410041096e-05, "loss": 3.9818, "step": 6205 }, { "epoch": 2.067294078454235, "grad_norm": 0.58984375, "learning_rate": 1.3804383989803366e-05, "loss": 4.0458, "step": 6206 }, { "epoch": 2.067627217456484, "grad_norm": 0.5625, "learning_rate": 1.3803945496133216e-05, "loss": 4.0618, "step": 6207 }, { "epoch": 2.0679603564587326, "grad_norm": 0.56640625, "learning_rate": 1.3803506929035758e-05, "loss": 4.0398, "step": 6208 }, { "epoch": 2.0682934954609813, "grad_norm": 0.55859375, "learning_rate": 1.3803068288516094e-05, "loss": 3.943, "step": 6209 }, { "epoch": 2.0686266344632296, "grad_norm": 0.5546875, "learning_rate": 1.3802629574579338e-05, "loss": 4.037, "step": 6210 }, { "epoch": 2.0689597734654783, "grad_norm": 0.55859375, "learning_rate": 1.3802190787230599e-05, "loss": 4.0481, "step": 6211 }, { "epoch": 2.069292912467727, "grad_norm": 0.58203125, "learning_rate": 1.380175192647499e-05, "loss": 4.1434, "step": 6212 }, { "epoch": 2.069626051469976, "grad_norm": 0.5625, "learning_rate": 1.380131299231762e-05, "loss": 4.1292, "step": 6213 }, { "epoch": 2.0699591904722245, "grad_norm": 0.5546875, "learning_rate": 1.38008739847636e-05, "loss": 4.089, "step": 6214 }, { "epoch": 2.0702923294744733, "grad_norm": 0.56640625, "learning_rate": 1.3800434903818048e-05, "loss": 4.1127, "step": 6215 }, { "epoch": 2.070625468476722, "grad_norm": 0.60546875, "learning_rate": 1.3799995749486075e-05, "loss": 3.9302, "step": 6216 }, { "epoch": 2.0709586074789708, "grad_norm": 0.55859375, "learning_rate": 1.3799556521772797e-05, "loss": 4.0502, "step": 6217 }, { "epoch": 2.0712917464812195, "grad_norm": 0.5546875, "learning_rate": 1.3799117220683331e-05, "loss": 3.9995, "step": 6218 }, { "epoch": 2.071624885483468, "grad_norm": 0.55078125, "learning_rate": 1.3798677846222793e-05, "loss": 3.9721, "step": 6219 }, { "epoch": 2.0719580244857165, "grad_norm": 0.5625, "learning_rate": 1.3798238398396302e-05, "loss": 4.019, "step": 6220 }, { "epoch": 2.0722911634879653, "grad_norm": 0.578125, "learning_rate": 1.3797798877208975e-05, "loss": 4.0497, "step": 6221 }, { "epoch": 2.072624302490214, "grad_norm": 0.57421875, "learning_rate": 1.3797359282665932e-05, "loss": 4.0951, "step": 6222 }, { "epoch": 2.0729574414924627, "grad_norm": 0.5703125, "learning_rate": 1.3796919614772292e-05, "loss": 4.0534, "step": 6223 }, { "epoch": 2.0732905804947115, "grad_norm": 0.56640625, "learning_rate": 1.379647987353318e-05, "loss": 4.0388, "step": 6224 }, { "epoch": 2.0736237194969602, "grad_norm": 0.58203125, "learning_rate": 1.3796040058953714e-05, "loss": 4.0241, "step": 6225 }, { "epoch": 2.073956858499209, "grad_norm": 0.578125, "learning_rate": 1.3795600171039017e-05, "loss": 4.1023, "step": 6226 }, { "epoch": 2.0742899975014577, "grad_norm": 0.58203125, "learning_rate": 1.3795160209794218e-05, "loss": 4.0793, "step": 6227 }, { "epoch": 2.074623136503706, "grad_norm": 0.54296875, "learning_rate": 1.3794720175224435e-05, "loss": 4.05, "step": 6228 }, { "epoch": 2.0749562755059547, "grad_norm": 0.5859375, "learning_rate": 1.3794280067334797e-05, "loss": 3.9926, "step": 6229 }, { "epoch": 2.0752894145082035, "grad_norm": 0.57421875, "learning_rate": 1.3793839886130431e-05, "loss": 4.0509, "step": 6230 }, { "epoch": 2.075622553510452, "grad_norm": 0.5390625, "learning_rate": 1.3793399631616463e-05, "loss": 4.0827, "step": 6231 }, { "epoch": 2.075955692512701, "grad_norm": 0.55859375, "learning_rate": 1.3792959303798019e-05, "loss": 4.0242, "step": 6232 }, { "epoch": 2.0762888315149497, "grad_norm": 0.55078125, "learning_rate": 1.3792518902680228e-05, "loss": 4.0109, "step": 6233 }, { "epoch": 2.0766219705171984, "grad_norm": 0.5625, "learning_rate": 1.3792078428268223e-05, "loss": 4.0665, "step": 6234 }, { "epoch": 2.076955109519447, "grad_norm": 0.5625, "learning_rate": 1.3791637880567134e-05, "loss": 4.0902, "step": 6235 }, { "epoch": 2.0772882485216955, "grad_norm": 0.5703125, "learning_rate": 1.3791197259582091e-05, "loss": 4.1168, "step": 6236 }, { "epoch": 2.077621387523944, "grad_norm": 0.55859375, "learning_rate": 1.3790756565318225e-05, "loss": 4.0571, "step": 6237 }, { "epoch": 2.077954526526193, "grad_norm": 0.58203125, "learning_rate": 1.3790315797780673e-05, "loss": 4.0353, "step": 6238 }, { "epoch": 2.0782876655284417, "grad_norm": 0.58203125, "learning_rate": 1.3789874956974567e-05, "loss": 4.0785, "step": 6239 }, { "epoch": 2.0786208045306904, "grad_norm": 0.5546875, "learning_rate": 1.378943404290504e-05, "loss": 4.0683, "step": 6240 }, { "epoch": 2.078953943532939, "grad_norm": 0.5703125, "learning_rate": 1.378899305557723e-05, "loss": 4.0958, "step": 6241 }, { "epoch": 2.079287082535188, "grad_norm": 0.5546875, "learning_rate": 1.3788551994996272e-05, "loss": 4.0451, "step": 6242 }, { "epoch": 2.0796202215374366, "grad_norm": 0.5703125, "learning_rate": 1.3788110861167305e-05, "loss": 4.0348, "step": 6243 }, { "epoch": 2.0799533605396854, "grad_norm": 0.5546875, "learning_rate": 1.3787669654095466e-05, "loss": 4.0226, "step": 6244 }, { "epoch": 2.0802864995419337, "grad_norm": 0.59765625, "learning_rate": 1.3787228373785896e-05, "loss": 4.0079, "step": 6245 }, { "epoch": 2.0806196385441824, "grad_norm": 0.55859375, "learning_rate": 1.3786787020243731e-05, "loss": 4.076, "step": 6246 }, { "epoch": 2.080952777546431, "grad_norm": 0.5546875, "learning_rate": 1.3786345593474116e-05, "loss": 4.0556, "step": 6247 }, { "epoch": 2.08128591654868, "grad_norm": 0.52734375, "learning_rate": 1.3785904093482192e-05, "loss": 4.1075, "step": 6248 }, { "epoch": 2.0816190555509286, "grad_norm": 0.58984375, "learning_rate": 1.37854625202731e-05, "loss": 3.9978, "step": 6249 }, { "epoch": 2.0819521945531774, "grad_norm": 0.5546875, "learning_rate": 1.3785020873851981e-05, "loss": 4.024, "step": 6250 }, { "epoch": 2.082285333555426, "grad_norm": 0.61328125, "learning_rate": 1.3784579154223986e-05, "loss": 3.9992, "step": 6251 }, { "epoch": 2.082618472557675, "grad_norm": 0.578125, "learning_rate": 1.3784137361394254e-05, "loss": 4.0151, "step": 6252 }, { "epoch": 2.0829516115599236, "grad_norm": 0.57421875, "learning_rate": 1.3783695495367935e-05, "loss": 4.1342, "step": 6253 }, { "epoch": 2.083284750562172, "grad_norm": 0.5859375, "learning_rate": 1.3783253556150173e-05, "loss": 4.0603, "step": 6254 }, { "epoch": 2.0836178895644206, "grad_norm": 0.58203125, "learning_rate": 1.3782811543746115e-05, "loss": 4.0302, "step": 6255 }, { "epoch": 2.0839510285666694, "grad_norm": 0.5625, "learning_rate": 1.3782369458160913e-05, "loss": 4.0647, "step": 6256 }, { "epoch": 2.084284167568918, "grad_norm": 0.55078125, "learning_rate": 1.3781927299399714e-05, "loss": 4.043, "step": 6257 }, { "epoch": 2.084617306571167, "grad_norm": 0.57421875, "learning_rate": 1.378148506746767e-05, "loss": 4.1268, "step": 6258 }, { "epoch": 2.0849504455734156, "grad_norm": 0.61328125, "learning_rate": 1.3781042762369928e-05, "loss": 4.0619, "step": 6259 }, { "epoch": 2.0852835845756643, "grad_norm": 0.5859375, "learning_rate": 1.3780600384111645e-05, "loss": 4.0823, "step": 6260 }, { "epoch": 2.085616723577913, "grad_norm": 0.55859375, "learning_rate": 1.3780157932697968e-05, "loss": 4.0629, "step": 6261 }, { "epoch": 2.0859498625801614, "grad_norm": 0.57421875, "learning_rate": 1.3779715408134056e-05, "loss": 4.0527, "step": 6262 }, { "epoch": 2.08628300158241, "grad_norm": 0.58984375, "learning_rate": 1.3779272810425061e-05, "loss": 3.9805, "step": 6263 }, { "epoch": 2.086616140584659, "grad_norm": 0.5625, "learning_rate": 1.3778830139576139e-05, "loss": 4.016, "step": 6264 }, { "epoch": 2.0869492795869076, "grad_norm": 0.5625, "learning_rate": 1.3778387395592446e-05, "loss": 4.0045, "step": 6265 }, { "epoch": 2.0872824185891563, "grad_norm": 0.51953125, "learning_rate": 1.377794457847914e-05, "loss": 4.1099, "step": 6266 }, { "epoch": 2.087615557591405, "grad_norm": 0.57421875, "learning_rate": 1.3777501688241376e-05, "loss": 4.0107, "step": 6267 }, { "epoch": 2.087948696593654, "grad_norm": 0.55859375, "learning_rate": 1.3777058724884314e-05, "loss": 4.0276, "step": 6268 }, { "epoch": 2.0882818355959025, "grad_norm": 0.5234375, "learning_rate": 1.3776615688413115e-05, "loss": 4.0166, "step": 6269 }, { "epoch": 2.0886149745981513, "grad_norm": 0.5859375, "learning_rate": 1.3776172578832936e-05, "loss": 4.0253, "step": 6270 }, { "epoch": 2.0889481136003996, "grad_norm": 0.59375, "learning_rate": 1.3775729396148943e-05, "loss": 3.9944, "step": 6271 }, { "epoch": 2.0892812526026483, "grad_norm": 0.58203125, "learning_rate": 1.3775286140366295e-05, "loss": 3.9719, "step": 6272 }, { "epoch": 2.089614391604897, "grad_norm": 0.5390625, "learning_rate": 1.3774842811490156e-05, "loss": 4.0296, "step": 6273 }, { "epoch": 2.0899475306071458, "grad_norm": 0.5625, "learning_rate": 1.3774399409525689e-05, "loss": 4.0651, "step": 6274 }, { "epoch": 2.0902806696093945, "grad_norm": 0.56640625, "learning_rate": 1.3773955934478061e-05, "loss": 4.0293, "step": 6275 }, { "epoch": 2.0906138086116433, "grad_norm": 0.5703125, "learning_rate": 1.3773512386352433e-05, "loss": 3.9781, "step": 6276 }, { "epoch": 2.090946947613892, "grad_norm": 0.58203125, "learning_rate": 1.3773068765153977e-05, "loss": 3.9828, "step": 6277 }, { "epoch": 2.0912800866161407, "grad_norm": 0.5703125, "learning_rate": 1.3772625070887855e-05, "loss": 4.0674, "step": 6278 }, { "epoch": 2.0916132256183895, "grad_norm": 0.6015625, "learning_rate": 1.3772181303559239e-05, "loss": 4.0271, "step": 6279 }, { "epoch": 2.0919463646206378, "grad_norm": 0.546875, "learning_rate": 1.3771737463173294e-05, "loss": 4.0181, "step": 6280 }, { "epoch": 2.0922795036228865, "grad_norm": 0.57421875, "learning_rate": 1.3771293549735193e-05, "loss": 4.0723, "step": 6281 }, { "epoch": 2.0926126426251352, "grad_norm": 0.5390625, "learning_rate": 1.3770849563250107e-05, "loss": 4.0891, "step": 6282 }, { "epoch": 2.092945781627384, "grad_norm": 0.53125, "learning_rate": 1.3770405503723206e-05, "loss": 4.1098, "step": 6283 }, { "epoch": 2.0932789206296327, "grad_norm": 0.5859375, "learning_rate": 1.3769961371159663e-05, "loss": 4.0457, "step": 6284 }, { "epoch": 2.0936120596318815, "grad_norm": 0.5546875, "learning_rate": 1.3769517165564649e-05, "loss": 3.9722, "step": 6285 }, { "epoch": 2.09394519863413, "grad_norm": 0.57421875, "learning_rate": 1.376907288694334e-05, "loss": 3.987, "step": 6286 }, { "epoch": 2.094278337636379, "grad_norm": 0.5703125, "learning_rate": 1.3768628535300911e-05, "loss": 4.0706, "step": 6287 }, { "epoch": 2.0946114766386277, "grad_norm": 0.60546875, "learning_rate": 1.3768184110642538e-05, "loss": 4.0353, "step": 6288 }, { "epoch": 2.094944615640876, "grad_norm": 0.59375, "learning_rate": 1.3767739612973395e-05, "loss": 4.0835, "step": 6289 }, { "epoch": 2.0952777546431247, "grad_norm": 0.5703125, "learning_rate": 1.3767295042298666e-05, "loss": 4.1148, "step": 6290 }, { "epoch": 2.0956108936453735, "grad_norm": 0.5703125, "learning_rate": 1.3766850398623519e-05, "loss": 4.0259, "step": 6291 }, { "epoch": 2.095944032647622, "grad_norm": 0.57421875, "learning_rate": 1.3766405681953144e-05, "loss": 4.0332, "step": 6292 }, { "epoch": 2.096277171649871, "grad_norm": 0.5625, "learning_rate": 1.3765960892292713e-05, "loss": 4.0195, "step": 6293 }, { "epoch": 2.0966103106521197, "grad_norm": 0.6171875, "learning_rate": 1.376551602964741e-05, "loss": 4.0443, "step": 6294 }, { "epoch": 2.0969434496543684, "grad_norm": 0.57421875, "learning_rate": 1.3765071094022416e-05, "loss": 4.0026, "step": 6295 }, { "epoch": 2.097276588656617, "grad_norm": 0.578125, "learning_rate": 1.3764626085422915e-05, "loss": 3.999, "step": 6296 }, { "epoch": 2.097609727658866, "grad_norm": 0.57421875, "learning_rate": 1.3764181003854089e-05, "loss": 3.994, "step": 6297 }, { "epoch": 2.097942866661114, "grad_norm": 0.58203125, "learning_rate": 1.3763735849321121e-05, "loss": 4.0924, "step": 6298 }, { "epoch": 2.098276005663363, "grad_norm": 0.58203125, "learning_rate": 1.37632906218292e-05, "loss": 4.0322, "step": 6299 }, { "epoch": 2.0986091446656117, "grad_norm": 0.5625, "learning_rate": 1.3762845321383509e-05, "loss": 4.076, "step": 6300 }, { "epoch": 2.0989422836678604, "grad_norm": 0.57421875, "learning_rate": 1.3762399947989232e-05, "loss": 4.0319, "step": 6301 }, { "epoch": 2.099275422670109, "grad_norm": 0.57421875, "learning_rate": 1.3761954501651564e-05, "loss": 3.9833, "step": 6302 }, { "epoch": 2.099608561672358, "grad_norm": 0.578125, "learning_rate": 1.3761508982375687e-05, "loss": 4.0503, "step": 6303 }, { "epoch": 2.0999417006746066, "grad_norm": 0.57421875, "learning_rate": 1.3761063390166792e-05, "loss": 4.07, "step": 6304 }, { "epoch": 2.1002748396768554, "grad_norm": 0.5703125, "learning_rate": 1.3760617725030074e-05, "loss": 4.0462, "step": 6305 }, { "epoch": 2.1006079786791036, "grad_norm": 0.59375, "learning_rate": 1.3760171986970715e-05, "loss": 3.9645, "step": 6306 }, { "epoch": 2.1009411176813524, "grad_norm": 0.578125, "learning_rate": 1.3759726175993916e-05, "loss": 4.0375, "step": 6307 }, { "epoch": 2.101274256683601, "grad_norm": 0.5625, "learning_rate": 1.3759280292104862e-05, "loss": 4.0873, "step": 6308 }, { "epoch": 2.10160739568585, "grad_norm": 0.59765625, "learning_rate": 1.3758834335308755e-05, "loss": 4.003, "step": 6309 }, { "epoch": 2.1019405346880986, "grad_norm": 0.5703125, "learning_rate": 1.375838830561078e-05, "loss": 3.9781, "step": 6310 }, { "epoch": 2.1022736736903473, "grad_norm": 0.57421875, "learning_rate": 1.3757942203016138e-05, "loss": 4.0171, "step": 6311 }, { "epoch": 2.102606812692596, "grad_norm": 0.5703125, "learning_rate": 1.3757496027530027e-05, "loss": 3.9864, "step": 6312 }, { "epoch": 2.102939951694845, "grad_norm": 0.55859375, "learning_rate": 1.3757049779157637e-05, "loss": 4.0385, "step": 6313 }, { "epoch": 2.1032730906970936, "grad_norm": 0.5703125, "learning_rate": 1.3756603457904171e-05, "loss": 4.0636, "step": 6314 }, { "epoch": 2.103606229699342, "grad_norm": 0.56640625, "learning_rate": 1.3756157063774826e-05, "loss": 4.0026, "step": 6315 }, { "epoch": 2.1039393687015906, "grad_norm": 0.58984375, "learning_rate": 1.3755710596774807e-05, "loss": 4.0283, "step": 6316 }, { "epoch": 2.1042725077038393, "grad_norm": 0.6171875, "learning_rate": 1.3755264056909303e-05, "loss": 4.019, "step": 6317 }, { "epoch": 2.104605646706088, "grad_norm": 0.5703125, "learning_rate": 1.3754817444183524e-05, "loss": 4.0404, "step": 6318 }, { "epoch": 2.104938785708337, "grad_norm": 0.5859375, "learning_rate": 1.375437075860267e-05, "loss": 4.009, "step": 6319 }, { "epoch": 2.1052719247105856, "grad_norm": 0.58203125, "learning_rate": 1.3753924000171944e-05, "loss": 4.0293, "step": 6320 }, { "epoch": 2.1056050637128343, "grad_norm": 0.56640625, "learning_rate": 1.3753477168896549e-05, "loss": 4.1338, "step": 6321 }, { "epoch": 2.105938202715083, "grad_norm": 0.609375, "learning_rate": 1.3753030264781689e-05, "loss": 3.9924, "step": 6322 }, { "epoch": 2.1062713417173318, "grad_norm": 0.5625, "learning_rate": 1.3752583287832573e-05, "loss": 4.0455, "step": 6323 }, { "epoch": 2.10660448071958, "grad_norm": 0.5625, "learning_rate": 1.37521362380544e-05, "loss": 4.0587, "step": 6324 }, { "epoch": 2.106937619721829, "grad_norm": 0.5625, "learning_rate": 1.3751689115452385e-05, "loss": 4.1053, "step": 6325 }, { "epoch": 2.1072707587240775, "grad_norm": 0.609375, "learning_rate": 1.3751241920031734e-05, "loss": 4.0347, "step": 6326 }, { "epoch": 2.1076038977263263, "grad_norm": 0.61328125, "learning_rate": 1.3750794651797653e-05, "loss": 3.9895, "step": 6327 }, { "epoch": 2.107937036728575, "grad_norm": 0.5625, "learning_rate": 1.3750347310755353e-05, "loss": 4.057, "step": 6328 }, { "epoch": 2.1082701757308238, "grad_norm": 0.58984375, "learning_rate": 1.3749899896910048e-05, "loss": 4.0743, "step": 6329 }, { "epoch": 2.1086033147330725, "grad_norm": 0.58984375, "learning_rate": 1.3749452410266943e-05, "loss": 4.0253, "step": 6330 }, { "epoch": 2.1089364537353212, "grad_norm": 0.6015625, "learning_rate": 1.3749004850831256e-05, "loss": 4.0535, "step": 6331 }, { "epoch": 2.1092695927375695, "grad_norm": 0.59375, "learning_rate": 1.3748557218608197e-05, "loss": 4.0479, "step": 6332 }, { "epoch": 2.1096027317398183, "grad_norm": 0.58984375, "learning_rate": 1.374810951360298e-05, "loss": 3.987, "step": 6333 }, { "epoch": 2.109935870742067, "grad_norm": 0.57421875, "learning_rate": 1.3747661735820824e-05, "loss": 4.021, "step": 6334 }, { "epoch": 2.1102690097443157, "grad_norm": 0.5625, "learning_rate": 1.3747213885266935e-05, "loss": 4.0452, "step": 6335 }, { "epoch": 2.1106021487465645, "grad_norm": 0.58203125, "learning_rate": 1.374676596194654e-05, "loss": 4.0495, "step": 6336 }, { "epoch": 2.1109352877488132, "grad_norm": 0.578125, "learning_rate": 1.3746317965864854e-05, "loss": 4.0723, "step": 6337 }, { "epoch": 2.111268426751062, "grad_norm": 0.56640625, "learning_rate": 1.374586989702709e-05, "loss": 3.9828, "step": 6338 }, { "epoch": 2.1116015657533107, "grad_norm": 0.53125, "learning_rate": 1.374542175543847e-05, "loss": 4.1126, "step": 6339 }, { "epoch": 2.1119347047555594, "grad_norm": 0.53515625, "learning_rate": 1.3744973541104214e-05, "loss": 3.9949, "step": 6340 }, { "epoch": 2.1122678437578077, "grad_norm": 0.59375, "learning_rate": 1.3744525254029545e-05, "loss": 4.069, "step": 6341 }, { "epoch": 2.1126009827600565, "grad_norm": 0.5625, "learning_rate": 1.3744076894219681e-05, "loss": 4.0059, "step": 6342 }, { "epoch": 2.112934121762305, "grad_norm": 0.578125, "learning_rate": 1.3743628461679846e-05, "loss": 4.0243, "step": 6343 }, { "epoch": 2.113267260764554, "grad_norm": 0.57421875, "learning_rate": 1.3743179956415262e-05, "loss": 4.037, "step": 6344 }, { "epoch": 2.1136003997668027, "grad_norm": 0.5859375, "learning_rate": 1.3742731378431157e-05, "loss": 4.0395, "step": 6345 }, { "epoch": 2.1139335387690514, "grad_norm": 0.578125, "learning_rate": 1.374228272773275e-05, "loss": 4.0932, "step": 6346 }, { "epoch": 2.1142666777713, "grad_norm": 0.55859375, "learning_rate": 1.3741834004325271e-05, "loss": 4.0528, "step": 6347 }, { "epoch": 2.114599816773549, "grad_norm": 0.56640625, "learning_rate": 1.3741385208213945e-05, "loss": 4.0633, "step": 6348 }, { "epoch": 2.1149329557757977, "grad_norm": 0.58203125, "learning_rate": 1.3740936339404002e-05, "loss": 4.079, "step": 6349 }, { "epoch": 2.115266094778046, "grad_norm": 0.54296875, "learning_rate": 1.3740487397900669e-05, "loss": 4.0698, "step": 6350 }, { "epoch": 2.1155992337802947, "grad_norm": 0.57421875, "learning_rate": 1.3740038383709174e-05, "loss": 4.0505, "step": 6351 }, { "epoch": 2.1159323727825434, "grad_norm": 0.5625, "learning_rate": 1.3739589296834746e-05, "loss": 4.0704, "step": 6352 }, { "epoch": 2.116265511784792, "grad_norm": 0.5703125, "learning_rate": 1.3739140137282617e-05, "loss": 4.0084, "step": 6353 }, { "epoch": 2.116598650787041, "grad_norm": 0.546875, "learning_rate": 1.3738690905058023e-05, "loss": 4.0301, "step": 6354 }, { "epoch": 2.1169317897892896, "grad_norm": 0.55859375, "learning_rate": 1.3738241600166191e-05, "loss": 3.9982, "step": 6355 }, { "epoch": 2.1172649287915384, "grad_norm": 0.5625, "learning_rate": 1.3737792222612356e-05, "loss": 4.0897, "step": 6356 }, { "epoch": 2.117598067793787, "grad_norm": 0.53125, "learning_rate": 1.3737342772401751e-05, "loss": 4.0493, "step": 6357 }, { "epoch": 2.117931206796036, "grad_norm": 0.53515625, "learning_rate": 1.3736893249539616e-05, "loss": 4.0984, "step": 6358 }, { "epoch": 2.118264345798284, "grad_norm": 0.60546875, "learning_rate": 1.3736443654031181e-05, "loss": 4.0484, "step": 6359 }, { "epoch": 2.118597484800533, "grad_norm": 0.57421875, "learning_rate": 1.373599398588169e-05, "loss": 4.0209, "step": 6360 }, { "epoch": 2.1189306238027816, "grad_norm": 0.5625, "learning_rate": 1.3735544245096372e-05, "loss": 4.013, "step": 6361 }, { "epoch": 2.1192637628050304, "grad_norm": 0.56640625, "learning_rate": 1.3735094431680472e-05, "loss": 4.0062, "step": 6362 }, { "epoch": 2.119596901807279, "grad_norm": 0.55078125, "learning_rate": 1.3734644545639227e-05, "loss": 4.0386, "step": 6363 }, { "epoch": 2.119930040809528, "grad_norm": 0.56640625, "learning_rate": 1.3734194586977874e-05, "loss": 4.0727, "step": 6364 }, { "epoch": 2.1202631798117766, "grad_norm": 0.58203125, "learning_rate": 1.3733744555701661e-05, "loss": 4.0569, "step": 6365 }, { "epoch": 2.1205963188140253, "grad_norm": 0.5703125, "learning_rate": 1.3733294451815825e-05, "loss": 4.1459, "step": 6366 }, { "epoch": 2.120929457816274, "grad_norm": 0.58203125, "learning_rate": 1.3732844275325609e-05, "loss": 4.0053, "step": 6367 }, { "epoch": 2.1212625968185224, "grad_norm": 0.5703125, "learning_rate": 1.3732394026236259e-05, "loss": 4.0037, "step": 6368 }, { "epoch": 2.121595735820771, "grad_norm": 0.58203125, "learning_rate": 1.3731943704553016e-05, "loss": 4.0493, "step": 6369 }, { "epoch": 2.12192887482302, "grad_norm": 0.5546875, "learning_rate": 1.3731493310281128e-05, "loss": 4.1031, "step": 6370 }, { "epoch": 2.1222620138252686, "grad_norm": 0.578125, "learning_rate": 1.373104284342584e-05, "loss": 4.0157, "step": 6371 }, { "epoch": 2.1225951528275173, "grad_norm": 0.58984375, "learning_rate": 1.3730592303992402e-05, "loss": 3.9662, "step": 6372 }, { "epoch": 2.122928291829766, "grad_norm": 0.58984375, "learning_rate": 1.3730141691986056e-05, "loss": 4.0936, "step": 6373 }, { "epoch": 2.123261430832015, "grad_norm": 0.57421875, "learning_rate": 1.3729691007412055e-05, "loss": 4.0425, "step": 6374 }, { "epoch": 2.1235945698342635, "grad_norm": 0.5703125, "learning_rate": 1.3729240250275647e-05, "loss": 4.0222, "step": 6375 }, { "epoch": 2.123927708836512, "grad_norm": 0.56640625, "learning_rate": 1.3728789420582083e-05, "loss": 3.9859, "step": 6376 }, { "epoch": 2.1242608478387606, "grad_norm": 0.58984375, "learning_rate": 1.3728338518336612e-05, "loss": 4.0232, "step": 6377 }, { "epoch": 2.1245939868410093, "grad_norm": 0.6015625, "learning_rate": 1.3727887543544492e-05, "loss": 4.1038, "step": 6378 }, { "epoch": 2.124927125843258, "grad_norm": 0.59765625, "learning_rate": 1.3727436496210967e-05, "loss": 4.0745, "step": 6379 }, { "epoch": 2.125260264845507, "grad_norm": 0.53515625, "learning_rate": 1.3726985376341299e-05, "loss": 4.1242, "step": 6380 }, { "epoch": 2.1255934038477555, "grad_norm": 0.546875, "learning_rate": 1.3726534183940737e-05, "loss": 4.1538, "step": 6381 }, { "epoch": 2.1259265428500043, "grad_norm": 0.57421875, "learning_rate": 1.3726082919014539e-05, "loss": 4.0487, "step": 6382 }, { "epoch": 2.126259681852253, "grad_norm": 0.5859375, "learning_rate": 1.372563158156796e-05, "loss": 4.0445, "step": 6383 }, { "epoch": 2.1265928208545017, "grad_norm": 0.59375, "learning_rate": 1.3725180171606258e-05, "loss": 4.0556, "step": 6384 }, { "epoch": 2.12692595985675, "grad_norm": 0.58203125, "learning_rate": 1.372472868913469e-05, "loss": 4.0429, "step": 6385 }, { "epoch": 2.1272590988589988, "grad_norm": 0.55078125, "learning_rate": 1.3724277134158518e-05, "loss": 4.0831, "step": 6386 }, { "epoch": 2.1275922378612475, "grad_norm": 0.58984375, "learning_rate": 1.3723825506682997e-05, "loss": 3.982, "step": 6387 }, { "epoch": 2.1279253768634963, "grad_norm": 0.58203125, "learning_rate": 1.372337380671339e-05, "loss": 4.0742, "step": 6388 }, { "epoch": 2.128258515865745, "grad_norm": 0.578125, "learning_rate": 1.372292203425496e-05, "loss": 3.972, "step": 6389 }, { "epoch": 2.1285916548679937, "grad_norm": 0.57421875, "learning_rate": 1.3722470189312964e-05, "loss": 4.1596, "step": 6390 }, { "epoch": 2.1289247938702425, "grad_norm": 0.53125, "learning_rate": 1.372201827189267e-05, "loss": 4.0658, "step": 6391 }, { "epoch": 2.129257932872491, "grad_norm": 0.5859375, "learning_rate": 1.372156628199934e-05, "loss": 4.0733, "step": 6392 }, { "epoch": 2.1295910718747395, "grad_norm": 0.55078125, "learning_rate": 1.3721114219638237e-05, "loss": 4.0537, "step": 6393 }, { "epoch": 2.1299242108769882, "grad_norm": 0.55078125, "learning_rate": 1.372066208481463e-05, "loss": 4.0459, "step": 6394 }, { "epoch": 2.130257349879237, "grad_norm": 0.5703125, "learning_rate": 1.3720209877533783e-05, "loss": 4.0668, "step": 6395 }, { "epoch": 2.1305904888814857, "grad_norm": 0.546875, "learning_rate": 1.3719757597800964e-05, "loss": 4.0697, "step": 6396 }, { "epoch": 2.1309236278837345, "grad_norm": 0.6015625, "learning_rate": 1.3719305245621441e-05, "loss": 4.0183, "step": 6397 }, { "epoch": 2.131256766885983, "grad_norm": 0.58203125, "learning_rate": 1.3718852821000484e-05, "loss": 4.12, "step": 6398 }, { "epoch": 2.131589905888232, "grad_norm": 0.578125, "learning_rate": 1.371840032394336e-05, "loss": 3.9756, "step": 6399 }, { "epoch": 2.1319230448904807, "grad_norm": 0.5390625, "learning_rate": 1.3717947754455341e-05, "loss": 4.0304, "step": 6400 }, { "epoch": 2.1322561838927294, "grad_norm": 0.57421875, "learning_rate": 1.37174951125417e-05, "loss": 4.0298, "step": 6401 }, { "epoch": 2.1325893228949777, "grad_norm": 0.58984375, "learning_rate": 1.3717042398207706e-05, "loss": 4.0278, "step": 6402 }, { "epoch": 2.1329224618972265, "grad_norm": 0.58984375, "learning_rate": 1.3716589611458636e-05, "loss": 4.0514, "step": 6403 }, { "epoch": 2.133255600899475, "grad_norm": 0.5703125, "learning_rate": 1.3716136752299763e-05, "loss": 4.0335, "step": 6404 }, { "epoch": 2.133588739901724, "grad_norm": 0.57421875, "learning_rate": 1.3715683820736357e-05, "loss": 4.0191, "step": 6405 }, { "epoch": 2.1339218789039727, "grad_norm": 0.56640625, "learning_rate": 1.3715230816773702e-05, "loss": 4.0457, "step": 6406 }, { "epoch": 2.1342550179062214, "grad_norm": 0.58203125, "learning_rate": 1.371477774041707e-05, "loss": 4.0975, "step": 6407 }, { "epoch": 2.13458815690847, "grad_norm": 0.5625, "learning_rate": 1.3714324591671737e-05, "loss": 3.9926, "step": 6408 }, { "epoch": 2.134921295910719, "grad_norm": 0.5859375, "learning_rate": 1.371387137054298e-05, "loss": 4.0151, "step": 6409 }, { "epoch": 2.1352544349129676, "grad_norm": 0.55859375, "learning_rate": 1.3713418077036085e-05, "loss": 4.088, "step": 6410 }, { "epoch": 2.135587573915216, "grad_norm": 0.578125, "learning_rate": 1.3712964711156325e-05, "loss": 4.0509, "step": 6411 }, { "epoch": 2.1359207129174647, "grad_norm": 0.52734375, "learning_rate": 1.3712511272908986e-05, "loss": 4.0721, "step": 6412 }, { "epoch": 2.1362538519197134, "grad_norm": 0.5625, "learning_rate": 1.3712057762299345e-05, "loss": 4.043, "step": 6413 }, { "epoch": 2.136586990921962, "grad_norm": 0.5625, "learning_rate": 1.3711604179332686e-05, "loss": 4.0743, "step": 6414 }, { "epoch": 2.136920129924211, "grad_norm": 0.5625, "learning_rate": 1.3711150524014295e-05, "loss": 4.0354, "step": 6415 }, { "epoch": 2.1372532689264596, "grad_norm": 0.56640625, "learning_rate": 1.3710696796349452e-05, "loss": 4.0011, "step": 6416 }, { "epoch": 2.1375864079287084, "grad_norm": 0.5546875, "learning_rate": 1.3710242996343442e-05, "loss": 4.0466, "step": 6417 }, { "epoch": 2.137919546930957, "grad_norm": 0.56640625, "learning_rate": 1.3709789124001556e-05, "loss": 4.0081, "step": 6418 }, { "epoch": 2.138252685933206, "grad_norm": 0.57421875, "learning_rate": 1.3709335179329074e-05, "loss": 4.0436, "step": 6419 }, { "epoch": 2.138585824935454, "grad_norm": 0.546875, "learning_rate": 1.370888116233129e-05, "loss": 4.0028, "step": 6420 }, { "epoch": 2.138918963937703, "grad_norm": 0.5546875, "learning_rate": 1.3708427073013486e-05, "loss": 4.0363, "step": 6421 }, { "epoch": 2.1392521029399516, "grad_norm": 0.58984375, "learning_rate": 1.3707972911380954e-05, "loss": 4.0902, "step": 6422 }, { "epoch": 2.1395852419422003, "grad_norm": 0.58984375, "learning_rate": 1.3707518677438985e-05, "loss": 4.0638, "step": 6423 }, { "epoch": 2.139918380944449, "grad_norm": 0.58203125, "learning_rate": 1.370706437119287e-05, "loss": 4.041, "step": 6424 }, { "epoch": 2.140251519946698, "grad_norm": 0.55859375, "learning_rate": 1.3706609992647896e-05, "loss": 4.0654, "step": 6425 }, { "epoch": 2.1405846589489466, "grad_norm": 0.53515625, "learning_rate": 1.3706155541809363e-05, "loss": 4.0449, "step": 6426 }, { "epoch": 2.1409177979511953, "grad_norm": 0.55859375, "learning_rate": 1.370570101868256e-05, "loss": 4.0991, "step": 6427 }, { "epoch": 2.141250936953444, "grad_norm": 0.6015625, "learning_rate": 1.3705246423272779e-05, "loss": 3.9979, "step": 6428 }, { "epoch": 2.1415840759556923, "grad_norm": 0.5625, "learning_rate": 1.3704791755585319e-05, "loss": 4.0175, "step": 6429 }, { "epoch": 2.141917214957941, "grad_norm": 0.56640625, "learning_rate": 1.3704337015625476e-05, "loss": 4.0508, "step": 6430 }, { "epoch": 2.14225035396019, "grad_norm": 0.546875, "learning_rate": 1.3703882203398544e-05, "loss": 4.1127, "step": 6431 }, { "epoch": 2.1425834929624386, "grad_norm": 0.58203125, "learning_rate": 1.370342731890982e-05, "loss": 3.9744, "step": 6432 }, { "epoch": 2.1429166319646873, "grad_norm": 0.5625, "learning_rate": 1.3702972362164606e-05, "loss": 4.0514, "step": 6433 }, { "epoch": 2.143249770966936, "grad_norm": 0.56640625, "learning_rate": 1.37025173331682e-05, "loss": 4.0743, "step": 6434 }, { "epoch": 2.1435829099691848, "grad_norm": 0.609375, "learning_rate": 1.3702062231925903e-05, "loss": 3.979, "step": 6435 }, { "epoch": 2.1439160489714335, "grad_norm": 0.59375, "learning_rate": 1.3701607058443014e-05, "loss": 3.9788, "step": 6436 }, { "epoch": 2.1442491879736822, "grad_norm": 0.5546875, "learning_rate": 1.3701151812724834e-05, "loss": 4.0017, "step": 6437 }, { "epoch": 2.1445823269759305, "grad_norm": 0.58203125, "learning_rate": 1.3700696494776666e-05, "loss": 4.0159, "step": 6438 }, { "epoch": 2.1449154659781793, "grad_norm": 0.55859375, "learning_rate": 1.3700241104603819e-05, "loss": 4.0225, "step": 6439 }, { "epoch": 2.145248604980428, "grad_norm": 0.57421875, "learning_rate": 1.369978564221159e-05, "loss": 4.0238, "step": 6440 }, { "epoch": 2.1455817439826768, "grad_norm": 0.5703125, "learning_rate": 1.3699330107605287e-05, "loss": 4.1582, "step": 6441 }, { "epoch": 2.1459148829849255, "grad_norm": 0.57421875, "learning_rate": 1.3698874500790216e-05, "loss": 4.0524, "step": 6442 }, { "epoch": 2.1462480219871742, "grad_norm": 0.5859375, "learning_rate": 1.3698418821771684e-05, "loss": 4.056, "step": 6443 }, { "epoch": 2.146581160989423, "grad_norm": 0.5625, "learning_rate": 1.3697963070554999e-05, "loss": 4.0072, "step": 6444 }, { "epoch": 2.1469142999916717, "grad_norm": 0.56640625, "learning_rate": 1.3697507247145467e-05, "loss": 4.0356, "step": 6445 }, { "epoch": 2.14724743899392, "grad_norm": 0.6015625, "learning_rate": 1.3697051351548401e-05, "loss": 3.9442, "step": 6446 }, { "epoch": 2.1475805779961687, "grad_norm": 0.5703125, "learning_rate": 1.3696595383769109e-05, "loss": 4.0676, "step": 6447 }, { "epoch": 2.1479137169984175, "grad_norm": 0.6015625, "learning_rate": 1.3696139343812902e-05, "loss": 4.0313, "step": 6448 }, { "epoch": 2.1482468560006662, "grad_norm": 0.61328125, "learning_rate": 1.3695683231685094e-05, "loss": 4.0495, "step": 6449 }, { "epoch": 2.148579995002915, "grad_norm": 0.58984375, "learning_rate": 1.3695227047390996e-05, "loss": 4.0061, "step": 6450 }, { "epoch": 2.1489131340051637, "grad_norm": 0.58203125, "learning_rate": 1.3694770790935921e-05, "loss": 4.0316, "step": 6451 }, { "epoch": 2.1492462730074124, "grad_norm": 0.58203125, "learning_rate": 1.3694314462325184e-05, "loss": 3.9914, "step": 6452 }, { "epoch": 2.149579412009661, "grad_norm": 0.578125, "learning_rate": 1.36938580615641e-05, "loss": 4.0678, "step": 6453 }, { "epoch": 2.14991255101191, "grad_norm": 0.5546875, "learning_rate": 1.3693401588657987e-05, "loss": 4.0631, "step": 6454 }, { "epoch": 2.150245690014158, "grad_norm": 0.5390625, "learning_rate": 1.369294504361216e-05, "loss": 4.0497, "step": 6455 }, { "epoch": 2.150578829016407, "grad_norm": 0.56640625, "learning_rate": 1.3692488426431938e-05, "loss": 4.0812, "step": 6456 }, { "epoch": 2.1509119680186557, "grad_norm": 0.578125, "learning_rate": 1.3692031737122637e-05, "loss": 3.9606, "step": 6457 }, { "epoch": 2.1512451070209044, "grad_norm": 0.56640625, "learning_rate": 1.3691574975689578e-05, "loss": 4.0762, "step": 6458 }, { "epoch": 2.151578246023153, "grad_norm": 0.54296875, "learning_rate": 1.369111814213808e-05, "loss": 4.0707, "step": 6459 }, { "epoch": 2.151911385025402, "grad_norm": 0.59375, "learning_rate": 1.369066123647347e-05, "loss": 4.0591, "step": 6460 }, { "epoch": 2.1522445240276507, "grad_norm": 0.578125, "learning_rate": 1.3690204258701063e-05, "loss": 4.0058, "step": 6461 }, { "epoch": 2.1525776630298994, "grad_norm": 0.5703125, "learning_rate": 1.3689747208826184e-05, "loss": 4.0478, "step": 6462 }, { "epoch": 2.1529108020321477, "grad_norm": 0.5703125, "learning_rate": 1.3689290086854158e-05, "loss": 4.0559, "step": 6463 }, { "epoch": 2.1532439410343964, "grad_norm": 0.59765625, "learning_rate": 1.3688832892790308e-05, "loss": 4.0544, "step": 6464 }, { "epoch": 2.153577080036645, "grad_norm": 0.58984375, "learning_rate": 1.3688375626639961e-05, "loss": 4.0388, "step": 6465 }, { "epoch": 2.153910219038894, "grad_norm": 0.58203125, "learning_rate": 1.3687918288408441e-05, "loss": 4.0676, "step": 6466 }, { "epoch": 2.1542433580411426, "grad_norm": 0.5625, "learning_rate": 1.3687460878101075e-05, "loss": 4.0724, "step": 6467 }, { "epoch": 2.1545764970433914, "grad_norm": 0.56640625, "learning_rate": 1.3687003395723191e-05, "loss": 4.0084, "step": 6468 }, { "epoch": 2.15490963604564, "grad_norm": 0.5546875, "learning_rate": 1.368654584128012e-05, "loss": 4.0424, "step": 6469 }, { "epoch": 2.155242775047889, "grad_norm": 0.546875, "learning_rate": 1.368608821477719e-05, "loss": 4.0821, "step": 6470 }, { "epoch": 2.1555759140501376, "grad_norm": 0.5859375, "learning_rate": 1.3685630516219732e-05, "loss": 3.9954, "step": 6471 }, { "epoch": 2.155909053052386, "grad_norm": 0.5625, "learning_rate": 1.3685172745613075e-05, "loss": 3.9877, "step": 6472 }, { "epoch": 2.1562421920546346, "grad_norm": 0.58984375, "learning_rate": 1.3684714902962554e-05, "loss": 3.9831, "step": 6473 }, { "epoch": 2.1565753310568834, "grad_norm": 0.5703125, "learning_rate": 1.36842569882735e-05, "loss": 4.0198, "step": 6474 }, { "epoch": 2.156908470059132, "grad_norm": 0.5703125, "learning_rate": 1.3683799001551248e-05, "loss": 4.0121, "step": 6475 }, { "epoch": 2.157241609061381, "grad_norm": 0.609375, "learning_rate": 1.3683340942801131e-05, "loss": 3.9542, "step": 6476 }, { "epoch": 2.1575747480636296, "grad_norm": 0.59375, "learning_rate": 1.3682882812028484e-05, "loss": 3.9737, "step": 6477 }, { "epoch": 2.1579078870658783, "grad_norm": 0.58203125, "learning_rate": 1.3682424609238646e-05, "loss": 3.9985, "step": 6478 }, { "epoch": 2.158241026068127, "grad_norm": 0.5859375, "learning_rate": 1.3681966334436952e-05, "loss": 4.032, "step": 6479 }, { "epoch": 2.158574165070376, "grad_norm": 0.546875, "learning_rate": 1.3681507987628738e-05, "loss": 4.0914, "step": 6480 }, { "epoch": 2.158907304072624, "grad_norm": 0.5625, "learning_rate": 1.3681049568819349e-05, "loss": 3.9579, "step": 6481 }, { "epoch": 2.159240443074873, "grad_norm": 0.57421875, "learning_rate": 1.3680591078014116e-05, "loss": 4.0904, "step": 6482 }, { "epoch": 2.1595735820771216, "grad_norm": 0.58984375, "learning_rate": 1.3680132515218387e-05, "loss": 4.0284, "step": 6483 }, { "epoch": 2.1599067210793703, "grad_norm": 0.56640625, "learning_rate": 1.36796738804375e-05, "loss": 4.0613, "step": 6484 }, { "epoch": 2.160239860081619, "grad_norm": 0.56640625, "learning_rate": 1.36792151736768e-05, "loss": 4.1257, "step": 6485 }, { "epoch": 2.160572999083868, "grad_norm": 0.59765625, "learning_rate": 1.3678756394941624e-05, "loss": 4.0306, "step": 6486 }, { "epoch": 2.1609061380861165, "grad_norm": 0.546875, "learning_rate": 1.367829754423732e-05, "loss": 4.0462, "step": 6487 }, { "epoch": 2.1612392770883653, "grad_norm": 0.5625, "learning_rate": 1.3677838621569232e-05, "loss": 4.0635, "step": 6488 }, { "epoch": 2.161572416090614, "grad_norm": 0.58984375, "learning_rate": 1.3677379626942705e-05, "loss": 4.034, "step": 6489 }, { "epoch": 2.1619055550928623, "grad_norm": 0.5546875, "learning_rate": 1.3676920560363084e-05, "loss": 4.0416, "step": 6490 }, { "epoch": 2.162238694095111, "grad_norm": 0.55078125, "learning_rate": 1.367646142183572e-05, "loss": 4.0679, "step": 6491 }, { "epoch": 2.16257183309736, "grad_norm": 0.578125, "learning_rate": 1.3676002211365959e-05, "loss": 4.0303, "step": 6492 }, { "epoch": 2.1629049720996085, "grad_norm": 0.55078125, "learning_rate": 1.3675542928959148e-05, "loss": 4.0869, "step": 6493 }, { "epoch": 2.1632381111018573, "grad_norm": 0.5703125, "learning_rate": 1.3675083574620637e-05, "loss": 4.0196, "step": 6494 }, { "epoch": 2.163571250104106, "grad_norm": 0.55078125, "learning_rate": 1.3674624148355781e-05, "loss": 4.1476, "step": 6495 }, { "epoch": 2.1639043891063547, "grad_norm": 0.53515625, "learning_rate": 1.3674164650169924e-05, "loss": 4.0444, "step": 6496 }, { "epoch": 2.1642375281086035, "grad_norm": 0.56640625, "learning_rate": 1.3673705080068423e-05, "loss": 4.0637, "step": 6497 }, { "epoch": 2.164570667110852, "grad_norm": 0.55859375, "learning_rate": 1.3673245438056632e-05, "loss": 4.0318, "step": 6498 }, { "epoch": 2.1649038061131005, "grad_norm": 0.578125, "learning_rate": 1.3672785724139903e-05, "loss": 4.0854, "step": 6499 }, { "epoch": 2.1652369451153493, "grad_norm": 0.55859375, "learning_rate": 1.367232593832359e-05, "loss": 4.032, "step": 6500 }, { "epoch": 2.165570084117598, "grad_norm": 0.5625, "learning_rate": 1.3671866080613046e-05, "loss": 3.9999, "step": 6501 }, { "epoch": 2.1659032231198467, "grad_norm": 0.5546875, "learning_rate": 1.3671406151013634e-05, "loss": 4.046, "step": 6502 }, { "epoch": 2.1662363621220955, "grad_norm": 0.57421875, "learning_rate": 1.3670946149530706e-05, "loss": 3.987, "step": 6503 }, { "epoch": 2.166569501124344, "grad_norm": 0.578125, "learning_rate": 1.3670486076169623e-05, "loss": 4.0351, "step": 6504 }, { "epoch": 2.166902640126593, "grad_norm": 0.578125, "learning_rate": 1.3670025930935743e-05, "loss": 4.021, "step": 6505 }, { "epoch": 2.1672357791288417, "grad_norm": 0.60546875, "learning_rate": 1.366956571383442e-05, "loss": 4.057, "step": 6506 }, { "epoch": 2.1675689181310904, "grad_norm": 0.57421875, "learning_rate": 1.3669105424871025e-05, "loss": 4.0831, "step": 6507 }, { "epoch": 2.1679020571333387, "grad_norm": 0.5625, "learning_rate": 1.3668645064050912e-05, "loss": 3.9855, "step": 6508 }, { "epoch": 2.1682351961355875, "grad_norm": 0.56640625, "learning_rate": 1.3668184631379445e-05, "loss": 4.0315, "step": 6509 }, { "epoch": 2.168568335137836, "grad_norm": 0.578125, "learning_rate": 1.366772412686199e-05, "loss": 3.9845, "step": 6510 }, { "epoch": 2.168901474140085, "grad_norm": 0.5703125, "learning_rate": 1.3667263550503905e-05, "loss": 4.0949, "step": 6511 }, { "epoch": 2.1692346131423337, "grad_norm": 0.58203125, "learning_rate": 1.366680290231056e-05, "loss": 4.0846, "step": 6512 }, { "epoch": 2.1695677521445824, "grad_norm": 0.5625, "learning_rate": 1.3666342182287316e-05, "loss": 4.0587, "step": 6513 }, { "epoch": 2.169900891146831, "grad_norm": 0.58203125, "learning_rate": 1.3665881390439544e-05, "loss": 4.0697, "step": 6514 }, { "epoch": 2.17023403014908, "grad_norm": 0.5625, "learning_rate": 1.3665420526772608e-05, "loss": 4.0166, "step": 6515 }, { "epoch": 2.170567169151328, "grad_norm": 0.58203125, "learning_rate": 1.3664959591291877e-05, "loss": 4.0538, "step": 6516 }, { "epoch": 2.170900308153577, "grad_norm": 0.57421875, "learning_rate": 1.366449858400272e-05, "loss": 4.0865, "step": 6517 }, { "epoch": 2.1712334471558257, "grad_norm": 0.5546875, "learning_rate": 1.366403750491051e-05, "loss": 4.1227, "step": 6518 }, { "epoch": 2.1715665861580744, "grad_norm": 0.59375, "learning_rate": 1.366357635402061e-05, "loss": 4.0406, "step": 6519 }, { "epoch": 2.171899725160323, "grad_norm": 0.58203125, "learning_rate": 1.3663115131338399e-05, "loss": 4.103, "step": 6520 }, { "epoch": 2.172232864162572, "grad_norm": 0.57421875, "learning_rate": 1.3662653836869245e-05, "loss": 4.0641, "step": 6521 }, { "epoch": 2.1725660031648206, "grad_norm": 0.55078125, "learning_rate": 1.3662192470618521e-05, "loss": 4.039, "step": 6522 }, { "epoch": 2.1728991421670694, "grad_norm": 0.58203125, "learning_rate": 1.3661731032591602e-05, "loss": 4.0243, "step": 6523 }, { "epoch": 2.173232281169318, "grad_norm": 0.578125, "learning_rate": 1.3661269522793864e-05, "loss": 3.9714, "step": 6524 }, { "epoch": 2.1735654201715664, "grad_norm": 0.59375, "learning_rate": 1.3660807941230679e-05, "loss": 4.0891, "step": 6525 }, { "epoch": 2.173898559173815, "grad_norm": 0.57421875, "learning_rate": 1.3660346287907428e-05, "loss": 3.9922, "step": 6526 }, { "epoch": 2.174231698176064, "grad_norm": 0.58203125, "learning_rate": 1.3659884562829486e-05, "loss": 4.1244, "step": 6527 }, { "epoch": 2.1745648371783126, "grad_norm": 0.5546875, "learning_rate": 1.365942276600223e-05, "loss": 4.0588, "step": 6528 }, { "epoch": 2.1748979761805614, "grad_norm": 0.59765625, "learning_rate": 1.3658960897431041e-05, "loss": 4.0156, "step": 6529 }, { "epoch": 2.17523111518281, "grad_norm": 0.58203125, "learning_rate": 1.3658498957121299e-05, "loss": 4.0153, "step": 6530 }, { "epoch": 2.175564254185059, "grad_norm": 0.6015625, "learning_rate": 1.3658036945078382e-05, "loss": 4.0646, "step": 6531 }, { "epoch": 2.1758973931873076, "grad_norm": 0.5703125, "learning_rate": 1.3657574861307673e-05, "loss": 4.0078, "step": 6532 }, { "epoch": 2.176230532189556, "grad_norm": 0.62890625, "learning_rate": 1.3657112705814553e-05, "loss": 3.8979, "step": 6533 }, { "epoch": 2.1765636711918046, "grad_norm": 0.57421875, "learning_rate": 1.3656650478604409e-05, "loss": 4.0395, "step": 6534 }, { "epoch": 2.1768968101940533, "grad_norm": 0.578125, "learning_rate": 1.3656188179682619e-05, "loss": 4.0636, "step": 6535 }, { "epoch": 2.177229949196302, "grad_norm": 0.5546875, "learning_rate": 1.3655725809054574e-05, "loss": 4.017, "step": 6536 }, { "epoch": 2.177563088198551, "grad_norm": 0.5546875, "learning_rate": 1.3655263366725654e-05, "loss": 4.071, "step": 6537 }, { "epoch": 2.1778962272007996, "grad_norm": 0.5703125, "learning_rate": 1.365480085270125e-05, "loss": 3.998, "step": 6538 }, { "epoch": 2.1782293662030483, "grad_norm": 0.59765625, "learning_rate": 1.3654338266986746e-05, "loss": 4.044, "step": 6539 }, { "epoch": 2.178562505205297, "grad_norm": 0.5703125, "learning_rate": 1.3653875609587531e-05, "loss": 4.1249, "step": 6540 }, { "epoch": 2.178895644207546, "grad_norm": 0.56640625, "learning_rate": 1.3653412880508995e-05, "loss": 4.096, "step": 6541 }, { "epoch": 2.179228783209794, "grad_norm": 0.59375, "learning_rate": 1.3652950079756529e-05, "loss": 4.051, "step": 6542 }, { "epoch": 2.179561922212043, "grad_norm": 0.5546875, "learning_rate": 1.3652487207335522e-05, "loss": 3.9672, "step": 6543 }, { "epoch": 2.1798950612142916, "grad_norm": 0.5703125, "learning_rate": 1.3652024263251362e-05, "loss": 4.0538, "step": 6544 }, { "epoch": 2.1802282002165403, "grad_norm": 0.578125, "learning_rate": 1.3651561247509446e-05, "loss": 4.053, "step": 6545 }, { "epoch": 2.180561339218789, "grad_norm": 0.56640625, "learning_rate": 1.3651098160115165e-05, "loss": 4.0278, "step": 6546 }, { "epoch": 2.1808944782210378, "grad_norm": 0.5546875, "learning_rate": 1.3650635001073916e-05, "loss": 4.0317, "step": 6547 }, { "epoch": 2.1812276172232865, "grad_norm": 0.55078125, "learning_rate": 1.365017177039109e-05, "loss": 4.0099, "step": 6548 }, { "epoch": 2.1815607562255352, "grad_norm": 0.55859375, "learning_rate": 1.3649708468072085e-05, "loss": 4.0857, "step": 6549 }, { "epoch": 2.181893895227784, "grad_norm": 0.56640625, "learning_rate": 1.3649245094122295e-05, "loss": 4.0374, "step": 6550 }, { "epoch": 2.1822270342300323, "grad_norm": 0.55859375, "learning_rate": 1.364878164854712e-05, "loss": 4.0115, "step": 6551 }, { "epoch": 2.182560173232281, "grad_norm": 0.5390625, "learning_rate": 1.3648318131351958e-05, "loss": 4.0903, "step": 6552 }, { "epoch": 2.1828933122345298, "grad_norm": 0.56640625, "learning_rate": 1.3647854542542203e-05, "loss": 4.0206, "step": 6553 }, { "epoch": 2.1832264512367785, "grad_norm": 0.58203125, "learning_rate": 1.3647390882123264e-05, "loss": 3.9373, "step": 6554 }, { "epoch": 2.1835595902390272, "grad_norm": 0.6328125, "learning_rate": 1.3646927150100534e-05, "loss": 3.9769, "step": 6555 }, { "epoch": 2.183892729241276, "grad_norm": 0.57421875, "learning_rate": 1.3646463346479418e-05, "loss": 4.0109, "step": 6556 }, { "epoch": 2.1842258682435247, "grad_norm": 0.6015625, "learning_rate": 1.3645999471265315e-05, "loss": 4.0156, "step": 6557 }, { "epoch": 2.1845590072457735, "grad_norm": 0.56640625, "learning_rate": 1.3645535524463632e-05, "loss": 4.0698, "step": 6558 }, { "epoch": 2.184892146248022, "grad_norm": 0.5703125, "learning_rate": 1.364507150607977e-05, "loss": 4.0648, "step": 6559 }, { "epoch": 2.1852252852502705, "grad_norm": 0.57421875, "learning_rate": 1.3644607416119136e-05, "loss": 4.0178, "step": 6560 }, { "epoch": 2.1855584242525192, "grad_norm": 0.5546875, "learning_rate": 1.3644143254587136e-05, "loss": 4.0846, "step": 6561 }, { "epoch": 2.185891563254768, "grad_norm": 0.56640625, "learning_rate": 1.3643679021489172e-05, "loss": 3.9685, "step": 6562 }, { "epoch": 2.1862247022570167, "grad_norm": 0.5625, "learning_rate": 1.364321471683066e-05, "loss": 4.0288, "step": 6563 }, { "epoch": 2.1865578412592654, "grad_norm": 0.578125, "learning_rate": 1.3642750340616999e-05, "loss": 4.0209, "step": 6564 }, { "epoch": 2.186890980261514, "grad_norm": 0.58984375, "learning_rate": 1.3642285892853602e-05, "loss": 4.0958, "step": 6565 }, { "epoch": 2.187224119263763, "grad_norm": 0.5625, "learning_rate": 1.3641821373545878e-05, "loss": 4.0635, "step": 6566 }, { "epoch": 2.1875572582660117, "grad_norm": 0.56640625, "learning_rate": 1.3641356782699239e-05, "loss": 4.1418, "step": 6567 }, { "epoch": 2.1878903972682604, "grad_norm": 0.55859375, "learning_rate": 1.3640892120319097e-05, "loss": 4.0349, "step": 6568 }, { "epoch": 2.1882235362705087, "grad_norm": 0.578125, "learning_rate": 1.364042738641086e-05, "loss": 4.0846, "step": 6569 }, { "epoch": 2.1885566752727574, "grad_norm": 0.55078125, "learning_rate": 1.3639962580979947e-05, "loss": 4.1372, "step": 6570 }, { "epoch": 2.188889814275006, "grad_norm": 0.58203125, "learning_rate": 1.3639497704031767e-05, "loss": 4.0263, "step": 6571 }, { "epoch": 2.189222953277255, "grad_norm": 0.5859375, "learning_rate": 1.363903275557174e-05, "loss": 4.048, "step": 6572 }, { "epoch": 2.1895560922795037, "grad_norm": 0.59765625, "learning_rate": 1.3638567735605273e-05, "loss": 4.044, "step": 6573 }, { "epoch": 2.1898892312817524, "grad_norm": 0.55859375, "learning_rate": 1.3638102644137792e-05, "loss": 4.1043, "step": 6574 }, { "epoch": 2.190222370284001, "grad_norm": 0.56640625, "learning_rate": 1.3637637481174709e-05, "loss": 4.0858, "step": 6575 }, { "epoch": 2.19055550928625, "grad_norm": 0.57421875, "learning_rate": 1.3637172246721443e-05, "loss": 4.0433, "step": 6576 }, { "epoch": 2.1908886482884986, "grad_norm": 0.58203125, "learning_rate": 1.3636706940783415e-05, "loss": 3.9769, "step": 6577 }, { "epoch": 2.191221787290747, "grad_norm": 0.58984375, "learning_rate": 1.3636241563366044e-05, "loss": 3.9913, "step": 6578 }, { "epoch": 2.1915549262929956, "grad_norm": 0.58203125, "learning_rate": 1.3635776114474748e-05, "loss": 4.0544, "step": 6579 }, { "epoch": 2.1918880652952444, "grad_norm": 0.57421875, "learning_rate": 1.3635310594114949e-05, "loss": 4.0657, "step": 6580 }, { "epoch": 2.192221204297493, "grad_norm": 0.53125, "learning_rate": 1.3634845002292073e-05, "loss": 4.0336, "step": 6581 }, { "epoch": 2.192554343299742, "grad_norm": 0.5625, "learning_rate": 1.363437933901154e-05, "loss": 4.0759, "step": 6582 }, { "epoch": 2.1928874823019906, "grad_norm": 0.56640625, "learning_rate": 1.3633913604278775e-05, "loss": 4.0274, "step": 6583 }, { "epoch": 2.1932206213042393, "grad_norm": 0.5703125, "learning_rate": 1.3633447798099203e-05, "loss": 4.0125, "step": 6584 }, { "epoch": 2.193553760306488, "grad_norm": 0.58203125, "learning_rate": 1.3632981920478249e-05, "loss": 4.0011, "step": 6585 }, { "epoch": 2.1938868993087364, "grad_norm": 0.58203125, "learning_rate": 1.3632515971421338e-05, "loss": 3.994, "step": 6586 }, { "epoch": 2.194220038310985, "grad_norm": 0.578125, "learning_rate": 1.36320499509339e-05, "loss": 4.0087, "step": 6587 }, { "epoch": 2.194553177313234, "grad_norm": 0.609375, "learning_rate": 1.3631583859021362e-05, "loss": 4.0353, "step": 6588 }, { "epoch": 2.1948863163154826, "grad_norm": 0.5625, "learning_rate": 1.3631117695689153e-05, "loss": 4.0733, "step": 6589 }, { "epoch": 2.1952194553177313, "grad_norm": 0.59375, "learning_rate": 1.3630651460942705e-05, "loss": 4.0193, "step": 6590 }, { "epoch": 2.19555259431998, "grad_norm": 0.5859375, "learning_rate": 1.3630185154787444e-05, "loss": 4.0643, "step": 6591 }, { "epoch": 2.195885733322229, "grad_norm": 0.56640625, "learning_rate": 1.3629718777228805e-05, "loss": 3.9665, "step": 6592 }, { "epoch": 2.1962188723244775, "grad_norm": 0.578125, "learning_rate": 1.3629252328272219e-05, "loss": 4.0339, "step": 6593 }, { "epoch": 2.1965520113267263, "grad_norm": 0.58984375, "learning_rate": 1.3628785807923117e-05, "loss": 4.0081, "step": 6594 }, { "epoch": 2.1968851503289746, "grad_norm": 0.56640625, "learning_rate": 1.3628319216186938e-05, "loss": 4.1442, "step": 6595 }, { "epoch": 2.1972182893312233, "grad_norm": 0.59765625, "learning_rate": 1.3627852553069113e-05, "loss": 4.0165, "step": 6596 }, { "epoch": 2.197551428333472, "grad_norm": 0.56640625, "learning_rate": 1.362738581857508e-05, "loss": 4.0758, "step": 6597 }, { "epoch": 2.197884567335721, "grad_norm": 0.5859375, "learning_rate": 1.362691901271027e-05, "loss": 4.0468, "step": 6598 }, { "epoch": 2.1982177063379695, "grad_norm": 0.59375, "learning_rate": 1.362645213548013e-05, "loss": 4.0232, "step": 6599 }, { "epoch": 2.1985508453402183, "grad_norm": 0.609375, "learning_rate": 1.362598518689009e-05, "loss": 4.0304, "step": 6600 }, { "epoch": 2.198883984342467, "grad_norm": 0.609375, "learning_rate": 1.3625518166945589e-05, "loss": 4.0049, "step": 6601 }, { "epoch": 2.1992171233447158, "grad_norm": 0.59765625, "learning_rate": 1.3625051075652074e-05, "loss": 4.0215, "step": 6602 }, { "epoch": 2.199550262346964, "grad_norm": 0.5859375, "learning_rate": 1.3624583913014978e-05, "loss": 4.083, "step": 6603 }, { "epoch": 2.199883401349213, "grad_norm": 0.58984375, "learning_rate": 1.3624116679039745e-05, "loss": 3.9831, "step": 6604 }, { "epoch": 2.2002165403514615, "grad_norm": 0.60546875, "learning_rate": 1.362364937373182e-05, "loss": 4.0141, "step": 6605 }, { "epoch": 2.2005496793537103, "grad_norm": 0.58203125, "learning_rate": 1.3623181997096638e-05, "loss": 4.0674, "step": 6606 }, { "epoch": 2.200882818355959, "grad_norm": 0.609375, "learning_rate": 1.3622714549139653e-05, "loss": 4.0154, "step": 6607 }, { "epoch": 2.2012159573582077, "grad_norm": 0.5859375, "learning_rate": 1.3622247029866306e-05, "loss": 3.9776, "step": 6608 }, { "epoch": 2.2015490963604565, "grad_norm": 0.58984375, "learning_rate": 1.362177943928204e-05, "loss": 4.035, "step": 6609 }, { "epoch": 2.201882235362705, "grad_norm": 0.578125, "learning_rate": 1.3621311777392302e-05, "loss": 4.051, "step": 6610 }, { "epoch": 2.202215374364954, "grad_norm": 0.6171875, "learning_rate": 1.3620844044202544e-05, "loss": 4.0629, "step": 6611 }, { "epoch": 2.2025485133672023, "grad_norm": 0.58203125, "learning_rate": 1.362037623971821e-05, "loss": 4.11, "step": 6612 }, { "epoch": 2.202881652369451, "grad_norm": 0.55078125, "learning_rate": 1.3619908363944746e-05, "loss": 4.0806, "step": 6613 }, { "epoch": 2.2032147913716997, "grad_norm": 0.58203125, "learning_rate": 1.3619440416887609e-05, "loss": 4.0919, "step": 6614 }, { "epoch": 2.2035479303739485, "grad_norm": 0.5625, "learning_rate": 1.3618972398552244e-05, "loss": 4.0038, "step": 6615 }, { "epoch": 2.203881069376197, "grad_norm": 0.59765625, "learning_rate": 1.3618504308944105e-05, "loss": 4.0722, "step": 6616 }, { "epoch": 2.204214208378446, "grad_norm": 0.59765625, "learning_rate": 1.3618036148068643e-05, "loss": 4.0409, "step": 6617 }, { "epoch": 2.2045473473806947, "grad_norm": 0.55859375, "learning_rate": 1.3617567915931311e-05, "loss": 4.084, "step": 6618 }, { "epoch": 2.2048804863829434, "grad_norm": 0.578125, "learning_rate": 1.3617099612537564e-05, "loss": 4.0289, "step": 6619 }, { "epoch": 2.205213625385192, "grad_norm": 0.5703125, "learning_rate": 1.3616631237892856e-05, "loss": 4.0857, "step": 6620 }, { "epoch": 2.2055467643874405, "grad_norm": 0.59765625, "learning_rate": 1.3616162792002644e-05, "loss": 4.0504, "step": 6621 }, { "epoch": 2.205879903389689, "grad_norm": 0.59765625, "learning_rate": 1.361569427487238e-05, "loss": 3.9806, "step": 6622 }, { "epoch": 2.206213042391938, "grad_norm": 0.57421875, "learning_rate": 1.3615225686507528e-05, "loss": 4.0778, "step": 6623 }, { "epoch": 2.2065461813941867, "grad_norm": 0.56640625, "learning_rate": 1.361475702691354e-05, "loss": 4.0377, "step": 6624 }, { "epoch": 2.2068793203964354, "grad_norm": 0.58984375, "learning_rate": 1.3614288296095877e-05, "loss": 4.0255, "step": 6625 }, { "epoch": 2.207212459398684, "grad_norm": 0.5703125, "learning_rate": 1.361381949406e-05, "loss": 4.0483, "step": 6626 }, { "epoch": 2.207545598400933, "grad_norm": 0.55078125, "learning_rate": 1.3613350620811368e-05, "loss": 4.0603, "step": 6627 }, { "epoch": 2.2078787374031816, "grad_norm": 0.5703125, "learning_rate": 1.3612881676355442e-05, "loss": 4.0436, "step": 6628 }, { "epoch": 2.2082118764054304, "grad_norm": 0.57421875, "learning_rate": 1.3612412660697686e-05, "loss": 4.0909, "step": 6629 }, { "epoch": 2.2085450154076787, "grad_norm": 0.58203125, "learning_rate": 1.3611943573843563e-05, "loss": 3.977, "step": 6630 }, { "epoch": 2.2088781544099274, "grad_norm": 0.58203125, "learning_rate": 1.3611474415798532e-05, "loss": 4.0752, "step": 6631 }, { "epoch": 2.209211293412176, "grad_norm": 0.578125, "learning_rate": 1.3611005186568066e-05, "loss": 4.0268, "step": 6632 }, { "epoch": 2.209544432414425, "grad_norm": 0.57421875, "learning_rate": 1.3610535886157623e-05, "loss": 3.991, "step": 6633 }, { "epoch": 2.2098775714166736, "grad_norm": 0.57421875, "learning_rate": 1.3610066514572674e-05, "loss": 4.0373, "step": 6634 }, { "epoch": 2.2102107104189224, "grad_norm": 0.58203125, "learning_rate": 1.3609597071818683e-05, "loss": 4.0117, "step": 6635 }, { "epoch": 2.210543849421171, "grad_norm": 0.5546875, "learning_rate": 1.360912755790112e-05, "loss": 4.0917, "step": 6636 }, { "epoch": 2.21087698842342, "grad_norm": 0.59765625, "learning_rate": 1.3608657972825454e-05, "loss": 4.0348, "step": 6637 }, { "epoch": 2.2112101274256686, "grad_norm": 0.59765625, "learning_rate": 1.3608188316597153e-05, "loss": 4.0394, "step": 6638 }, { "epoch": 2.211543266427917, "grad_norm": 0.54296875, "learning_rate": 1.360771858922169e-05, "loss": 4.0632, "step": 6639 }, { "epoch": 2.2118764054301656, "grad_norm": 0.53125, "learning_rate": 1.3607248790704532e-05, "loss": 4.0599, "step": 6640 }, { "epoch": 2.2122095444324144, "grad_norm": 0.57421875, "learning_rate": 1.3606778921051157e-05, "loss": 4.0663, "step": 6641 }, { "epoch": 2.212542683434663, "grad_norm": 0.57421875, "learning_rate": 1.3606308980267033e-05, "loss": 4.0732, "step": 6642 }, { "epoch": 2.212875822436912, "grad_norm": 0.58984375, "learning_rate": 1.3605838968357635e-05, "loss": 4.0172, "step": 6643 }, { "epoch": 2.2132089614391606, "grad_norm": 0.57421875, "learning_rate": 1.360536888532844e-05, "loss": 4.071, "step": 6644 }, { "epoch": 2.2135421004414093, "grad_norm": 0.609375, "learning_rate": 1.360489873118492e-05, "loss": 3.9657, "step": 6645 }, { "epoch": 2.213875239443658, "grad_norm": 0.58984375, "learning_rate": 1.3604428505932555e-05, "loss": 4.0863, "step": 6646 }, { "epoch": 2.214208378445907, "grad_norm": 0.60546875, "learning_rate": 1.360395820957682e-05, "loss": 4.0888, "step": 6647 }, { "epoch": 2.214541517448155, "grad_norm": 0.55859375, "learning_rate": 1.3603487842123189e-05, "loss": 4.0295, "step": 6648 }, { "epoch": 2.214874656450404, "grad_norm": 0.55078125, "learning_rate": 1.3603017403577147e-05, "loss": 4.0466, "step": 6649 }, { "epoch": 2.2152077954526526, "grad_norm": 0.58984375, "learning_rate": 1.3602546893944174e-05, "loss": 4.0802, "step": 6650 }, { "epoch": 2.2155409344549013, "grad_norm": 0.58984375, "learning_rate": 1.3602076313229746e-05, "loss": 4.0325, "step": 6651 }, { "epoch": 2.21587407345715, "grad_norm": 0.578125, "learning_rate": 1.3601605661439345e-05, "loss": 4.0587, "step": 6652 }, { "epoch": 2.216207212459399, "grad_norm": 0.59375, "learning_rate": 1.3601134938578456e-05, "loss": 4.0307, "step": 6653 }, { "epoch": 2.2165403514616475, "grad_norm": 0.5625, "learning_rate": 1.3600664144652558e-05, "loss": 4.0211, "step": 6654 }, { "epoch": 2.2168734904638963, "grad_norm": 0.62109375, "learning_rate": 1.3600193279667138e-05, "loss": 4.0113, "step": 6655 }, { "epoch": 2.2172066294661446, "grad_norm": 0.57421875, "learning_rate": 1.3599722343627677e-05, "loss": 4.0462, "step": 6656 }, { "epoch": 2.2175397684683933, "grad_norm": 0.5859375, "learning_rate": 1.3599251336539666e-05, "loss": 4.0462, "step": 6657 }, { "epoch": 2.217872907470642, "grad_norm": 0.58984375, "learning_rate": 1.3598780258408586e-05, "loss": 4.059, "step": 6658 }, { "epoch": 2.2182060464728908, "grad_norm": 0.57421875, "learning_rate": 1.3598309109239928e-05, "loss": 4.0716, "step": 6659 }, { "epoch": 2.2185391854751395, "grad_norm": 0.6015625, "learning_rate": 1.3597837889039176e-05, "loss": 3.9936, "step": 6660 }, { "epoch": 2.2188723244773882, "grad_norm": 0.56640625, "learning_rate": 1.359736659781182e-05, "loss": 3.964, "step": 6661 }, { "epoch": 2.219205463479637, "grad_norm": 0.58203125, "learning_rate": 1.359689523556335e-05, "loss": 4.105, "step": 6662 }, { "epoch": 2.2195386024818857, "grad_norm": 0.60546875, "learning_rate": 1.3596423802299257e-05, "loss": 4.0955, "step": 6663 }, { "epoch": 2.219871741484134, "grad_norm": 0.5703125, "learning_rate": 1.3595952298025031e-05, "loss": 4.0384, "step": 6664 }, { "epoch": 2.2202048804863828, "grad_norm": 0.57421875, "learning_rate": 1.3595480722746167e-05, "loss": 3.9962, "step": 6665 }, { "epoch": 2.2205380194886315, "grad_norm": 0.5703125, "learning_rate": 1.3595009076468153e-05, "loss": 4.06, "step": 6666 }, { "epoch": 2.2208711584908802, "grad_norm": 0.57421875, "learning_rate": 1.3594537359196484e-05, "loss": 4.0175, "step": 6667 }, { "epoch": 2.221204297493129, "grad_norm": 0.58984375, "learning_rate": 1.359406557093666e-05, "loss": 4.0155, "step": 6668 }, { "epoch": 2.2215374364953777, "grad_norm": 0.58984375, "learning_rate": 1.3593593711694168e-05, "loss": 4.039, "step": 6669 }, { "epoch": 2.2218705754976265, "grad_norm": 0.58203125, "learning_rate": 1.3593121781474511e-05, "loss": 4.0566, "step": 6670 }, { "epoch": 2.222203714499875, "grad_norm": 0.578125, "learning_rate": 1.3592649780283178e-05, "loss": 4.0256, "step": 6671 }, { "epoch": 2.222536853502124, "grad_norm": 0.59765625, "learning_rate": 1.3592177708125674e-05, "loss": 3.9333, "step": 6672 }, { "epoch": 2.2228699925043722, "grad_norm": 0.59765625, "learning_rate": 1.3591705565007498e-05, "loss": 3.9753, "step": 6673 }, { "epoch": 2.223203131506621, "grad_norm": 0.58203125, "learning_rate": 1.3591233350934144e-05, "loss": 4.0452, "step": 6674 }, { "epoch": 2.2235362705088697, "grad_norm": 0.56640625, "learning_rate": 1.3590761065911115e-05, "loss": 4.0003, "step": 6675 }, { "epoch": 2.2238694095111184, "grad_norm": 0.55078125, "learning_rate": 1.3590288709943914e-05, "loss": 4.1061, "step": 6676 }, { "epoch": 2.224202548513367, "grad_norm": 0.58203125, "learning_rate": 1.3589816283038038e-05, "loss": 3.9723, "step": 6677 }, { "epoch": 2.224535687515616, "grad_norm": 0.55859375, "learning_rate": 1.3589343785198996e-05, "loss": 3.9962, "step": 6678 }, { "epoch": 2.2248688265178647, "grad_norm": 0.55859375, "learning_rate": 1.3588871216432287e-05, "loss": 3.9925, "step": 6679 }, { "epoch": 2.2252019655201134, "grad_norm": 0.58203125, "learning_rate": 1.3588398576743416e-05, "loss": 4.0447, "step": 6680 }, { "epoch": 2.225535104522362, "grad_norm": 0.5546875, "learning_rate": 1.358792586613789e-05, "loss": 4.0483, "step": 6681 }, { "epoch": 2.2258682435246104, "grad_norm": 0.5546875, "learning_rate": 1.3587453084621214e-05, "loss": 4.094, "step": 6682 }, { "epoch": 2.226201382526859, "grad_norm": 0.58203125, "learning_rate": 1.3586980232198896e-05, "loss": 3.9733, "step": 6683 }, { "epoch": 2.226534521529108, "grad_norm": 0.56640625, "learning_rate": 1.3586507308876441e-05, "loss": 4.0356, "step": 6684 }, { "epoch": 2.2268676605313567, "grad_norm": 0.59375, "learning_rate": 1.358603431465936e-05, "loss": 4.0613, "step": 6685 }, { "epoch": 2.2272007995336054, "grad_norm": 0.58203125, "learning_rate": 1.3585561249553163e-05, "loss": 3.9943, "step": 6686 }, { "epoch": 2.227533938535854, "grad_norm": 0.56640625, "learning_rate": 1.3585088113563356e-05, "loss": 4.0968, "step": 6687 }, { "epoch": 2.227867077538103, "grad_norm": 0.58203125, "learning_rate": 1.3584614906695455e-05, "loss": 4.0261, "step": 6688 }, { "epoch": 2.2282002165403516, "grad_norm": 0.5703125, "learning_rate": 1.3584141628954967e-05, "loss": 4.0831, "step": 6689 }, { "epoch": 2.2285333555426003, "grad_norm": 0.55859375, "learning_rate": 1.358366828034741e-05, "loss": 4.0365, "step": 6690 }, { "epoch": 2.2288664945448486, "grad_norm": 0.56640625, "learning_rate": 1.3583194860878293e-05, "loss": 4.1118, "step": 6691 }, { "epoch": 2.2291996335470974, "grad_norm": 0.62109375, "learning_rate": 1.3582721370553132e-05, "loss": 4.0433, "step": 6692 }, { "epoch": 2.229532772549346, "grad_norm": 0.5546875, "learning_rate": 1.3582247809377444e-05, "loss": 4.0913, "step": 6693 }, { "epoch": 2.229865911551595, "grad_norm": 0.578125, "learning_rate": 1.3581774177356742e-05, "loss": 4.0474, "step": 6694 }, { "epoch": 2.2301990505538436, "grad_norm": 0.59375, "learning_rate": 1.3581300474496544e-05, "loss": 4.0362, "step": 6695 }, { "epoch": 2.2305321895560923, "grad_norm": 0.59375, "learning_rate": 1.3580826700802367e-05, "loss": 4.0423, "step": 6696 }, { "epoch": 2.230865328558341, "grad_norm": 0.5703125, "learning_rate": 1.358035285627973e-05, "loss": 4.0717, "step": 6697 }, { "epoch": 2.23119846756059, "grad_norm": 0.59765625, "learning_rate": 1.3579878940934152e-05, "loss": 4.0604, "step": 6698 }, { "epoch": 2.2315316065628386, "grad_norm": 0.57421875, "learning_rate": 1.3579404954771154e-05, "loss": 3.9888, "step": 6699 }, { "epoch": 2.231864745565087, "grad_norm": 0.58984375, "learning_rate": 1.3578930897796257e-05, "loss": 4.0736, "step": 6700 }, { "epoch": 2.2321978845673356, "grad_norm": 0.56640625, "learning_rate": 1.3578456770014983e-05, "loss": 4.0957, "step": 6701 }, { "epoch": 2.2325310235695843, "grad_norm": 0.609375, "learning_rate": 1.3577982571432851e-05, "loss": 3.9675, "step": 6702 }, { "epoch": 2.232864162571833, "grad_norm": 0.5703125, "learning_rate": 1.3577508302055386e-05, "loss": 4.0614, "step": 6703 }, { "epoch": 2.233197301574082, "grad_norm": 0.59765625, "learning_rate": 1.3577033961888115e-05, "loss": 3.9988, "step": 6704 }, { "epoch": 2.2335304405763305, "grad_norm": 0.5625, "learning_rate": 1.357655955093656e-05, "loss": 4.028, "step": 6705 }, { "epoch": 2.2338635795785793, "grad_norm": 0.5703125, "learning_rate": 1.357608506920625e-05, "loss": 3.9905, "step": 6706 }, { "epoch": 2.234196718580828, "grad_norm": 0.5703125, "learning_rate": 1.3575610516702708e-05, "loss": 4.0108, "step": 6707 }, { "epoch": 2.2345298575830768, "grad_norm": 0.59765625, "learning_rate": 1.3575135893431465e-05, "loss": 3.9804, "step": 6708 }, { "epoch": 2.234862996585325, "grad_norm": 0.578125, "learning_rate": 1.3574661199398048e-05, "loss": 4.0307, "step": 6709 }, { "epoch": 2.235196135587574, "grad_norm": 0.56640625, "learning_rate": 1.3574186434607982e-05, "loss": 4.0105, "step": 6710 }, { "epoch": 2.2355292745898225, "grad_norm": 0.5703125, "learning_rate": 1.3573711599066803e-05, "loss": 4.0681, "step": 6711 }, { "epoch": 2.2358624135920713, "grad_norm": 0.58203125, "learning_rate": 1.357323669278004e-05, "loss": 4.0784, "step": 6712 }, { "epoch": 2.23619555259432, "grad_norm": 0.62109375, "learning_rate": 1.3572761715753223e-05, "loss": 3.9881, "step": 6713 }, { "epoch": 2.2365286915965688, "grad_norm": 0.5859375, "learning_rate": 1.3572286667991887e-05, "loss": 4.0724, "step": 6714 }, { "epoch": 2.2368618305988175, "grad_norm": 0.5859375, "learning_rate": 1.3571811549501563e-05, "loss": 4.0592, "step": 6715 }, { "epoch": 2.2371949696010662, "grad_norm": 0.6015625, "learning_rate": 1.3571336360287785e-05, "loss": 4.0137, "step": 6716 }, { "epoch": 2.2375281086033145, "grad_norm": 0.5703125, "learning_rate": 1.3570861100356092e-05, "loss": 3.9751, "step": 6717 }, { "epoch": 2.2378612476055633, "grad_norm": 0.5859375, "learning_rate": 1.3570385769712013e-05, "loss": 4.0241, "step": 6718 }, { "epoch": 2.238194386607812, "grad_norm": 0.55859375, "learning_rate": 1.356991036836109e-05, "loss": 4.0676, "step": 6719 }, { "epoch": 2.2385275256100607, "grad_norm": 0.578125, "learning_rate": 1.3569434896308859e-05, "loss": 4.043, "step": 6720 }, { "epoch": 2.2388606646123095, "grad_norm": 0.5859375, "learning_rate": 1.3568959353560857e-05, "loss": 4.0515, "step": 6721 }, { "epoch": 2.239193803614558, "grad_norm": 0.56640625, "learning_rate": 1.3568483740122623e-05, "loss": 4.0586, "step": 6722 }, { "epoch": 2.239526942616807, "grad_norm": 0.578125, "learning_rate": 1.35680080559997e-05, "loss": 4.1135, "step": 6723 }, { "epoch": 2.2398600816190557, "grad_norm": 0.59375, "learning_rate": 1.3567532301197624e-05, "loss": 4.0387, "step": 6724 }, { "epoch": 2.2401932206213044, "grad_norm": 0.59375, "learning_rate": 1.356705647572194e-05, "loss": 4.097, "step": 6725 }, { "epoch": 2.2405263596235527, "grad_norm": 0.54296875, "learning_rate": 1.3566580579578191e-05, "loss": 4.0122, "step": 6726 }, { "epoch": 2.2408594986258015, "grad_norm": 0.58984375, "learning_rate": 1.3566104612771916e-05, "loss": 4.0666, "step": 6727 }, { "epoch": 2.24119263762805, "grad_norm": 0.53125, "learning_rate": 1.3565628575308662e-05, "loss": 4.0393, "step": 6728 }, { "epoch": 2.241525776630299, "grad_norm": 0.5703125, "learning_rate": 1.3565152467193975e-05, "loss": 4.1009, "step": 6729 }, { "epoch": 2.2418589156325477, "grad_norm": 0.55859375, "learning_rate": 1.3564676288433396e-05, "loss": 4.1522, "step": 6730 }, { "epoch": 2.2421920546347964, "grad_norm": 0.5859375, "learning_rate": 1.3564200039032477e-05, "loss": 4.0723, "step": 6731 }, { "epoch": 2.242525193637045, "grad_norm": 0.60546875, "learning_rate": 1.3563723718996761e-05, "loss": 3.9778, "step": 6732 }, { "epoch": 2.242858332639294, "grad_norm": 0.578125, "learning_rate": 1.35632473283318e-05, "loss": 4.003, "step": 6733 }, { "epoch": 2.243191471641542, "grad_norm": 0.58203125, "learning_rate": 1.3562770867043138e-05, "loss": 4.0791, "step": 6734 }, { "epoch": 2.243524610643791, "grad_norm": 0.59765625, "learning_rate": 1.3562294335136328e-05, "loss": 4.116, "step": 6735 }, { "epoch": 2.2438577496460397, "grad_norm": 0.578125, "learning_rate": 1.3561817732616919e-05, "loss": 3.9923, "step": 6736 }, { "epoch": 2.2441908886482884, "grad_norm": 0.5546875, "learning_rate": 1.3561341059490465e-05, "loss": 3.9903, "step": 6737 }, { "epoch": 2.244524027650537, "grad_norm": 0.56640625, "learning_rate": 1.3560864315762513e-05, "loss": 4.0229, "step": 6738 }, { "epoch": 2.244857166652786, "grad_norm": 0.5859375, "learning_rate": 1.3560387501438622e-05, "loss": 4.0307, "step": 6739 }, { "epoch": 2.2451903056550346, "grad_norm": 0.59765625, "learning_rate": 1.3559910616524345e-05, "loss": 4.0118, "step": 6740 }, { "epoch": 2.2455234446572834, "grad_norm": 0.58984375, "learning_rate": 1.3559433661025233e-05, "loss": 4.0334, "step": 6741 }, { "epoch": 2.245856583659532, "grad_norm": 0.546875, "learning_rate": 1.3558956634946846e-05, "loss": 4.0551, "step": 6742 }, { "epoch": 2.2461897226617804, "grad_norm": 0.58203125, "learning_rate": 1.3558479538294735e-05, "loss": 4.0799, "step": 6743 }, { "epoch": 2.246522861664029, "grad_norm": 0.53515625, "learning_rate": 1.3558002371074463e-05, "loss": 4.0465, "step": 6744 }, { "epoch": 2.246856000666278, "grad_norm": 0.57421875, "learning_rate": 1.355752513329158e-05, "loss": 4.0551, "step": 6745 }, { "epoch": 2.2471891396685266, "grad_norm": 0.59765625, "learning_rate": 1.3557047824951655e-05, "loss": 4.0915, "step": 6746 }, { "epoch": 2.2475222786707754, "grad_norm": 0.57421875, "learning_rate": 1.355657044606024e-05, "loss": 4.1083, "step": 6747 }, { "epoch": 2.247855417673024, "grad_norm": 0.6015625, "learning_rate": 1.3556092996622898e-05, "loss": 4.0192, "step": 6748 }, { "epoch": 2.248188556675273, "grad_norm": 0.546875, "learning_rate": 1.355561547664519e-05, "loss": 4.0199, "step": 6749 }, { "epoch": 2.2485216956775216, "grad_norm": 0.53515625, "learning_rate": 1.3555137886132679e-05, "loss": 4.0793, "step": 6750 }, { "epoch": 2.2488548346797703, "grad_norm": 0.58984375, "learning_rate": 1.3554660225090925e-05, "loss": 4.075, "step": 6751 }, { "epoch": 2.2491879736820186, "grad_norm": 0.61328125, "learning_rate": 1.3554182493525494e-05, "loss": 4.0791, "step": 6752 }, { "epoch": 2.2495211126842674, "grad_norm": 0.60546875, "learning_rate": 1.3553704691441953e-05, "loss": 3.9868, "step": 6753 }, { "epoch": 2.249854251686516, "grad_norm": 0.5859375, "learning_rate": 1.3553226818845861e-05, "loss": 4.0384, "step": 6754 }, { "epoch": 2.250187390688765, "grad_norm": 0.578125, "learning_rate": 1.3552748875742791e-05, "loss": 4.0496, "step": 6755 }, { "epoch": 2.2505205296910136, "grad_norm": 0.57421875, "learning_rate": 1.3552270862138305e-05, "loss": 4.0222, "step": 6756 }, { "epoch": 2.2508536686932623, "grad_norm": 0.5859375, "learning_rate": 1.3551792778037973e-05, "loss": 3.9976, "step": 6757 }, { "epoch": 2.251186807695511, "grad_norm": 0.578125, "learning_rate": 1.3551314623447364e-05, "loss": 3.9809, "step": 6758 }, { "epoch": 2.25151994669776, "grad_norm": 0.59765625, "learning_rate": 1.3550836398372049e-05, "loss": 3.9724, "step": 6759 }, { "epoch": 2.2518530857000085, "grad_norm": 0.59375, "learning_rate": 1.3550358102817592e-05, "loss": 4.0193, "step": 6760 }, { "epoch": 2.252186224702257, "grad_norm": 0.5859375, "learning_rate": 1.354987973678957e-05, "loss": 4.0653, "step": 6761 }, { "epoch": 2.2525193637045056, "grad_norm": 0.5703125, "learning_rate": 1.3549401300293553e-05, "loss": 4.1339, "step": 6762 }, { "epoch": 2.2528525027067543, "grad_norm": 0.55859375, "learning_rate": 1.3548922793335112e-05, "loss": 3.996, "step": 6763 }, { "epoch": 2.253185641709003, "grad_norm": 0.609375, "learning_rate": 1.3548444215919826e-05, "loss": 4.0225, "step": 6764 }, { "epoch": 2.253518780711252, "grad_norm": 0.57421875, "learning_rate": 1.3547965568053263e-05, "loss": 4.0621, "step": 6765 }, { "epoch": 2.2538519197135005, "grad_norm": 0.61328125, "learning_rate": 1.3547486849741e-05, "loss": 4.0305, "step": 6766 }, { "epoch": 2.2541850587157493, "grad_norm": 0.62109375, "learning_rate": 1.3547008060988616e-05, "loss": 4.0525, "step": 6767 }, { "epoch": 2.254518197717998, "grad_norm": 0.55078125, "learning_rate": 1.3546529201801686e-05, "loss": 4.0541, "step": 6768 }, { "epoch": 2.2548513367202467, "grad_norm": 0.58203125, "learning_rate": 1.3546050272185788e-05, "loss": 4.1001, "step": 6769 }, { "epoch": 2.255184475722495, "grad_norm": 0.56640625, "learning_rate": 1.3545571272146499e-05, "loss": 4.0676, "step": 6770 }, { "epoch": 2.2555176147247438, "grad_norm": 0.58984375, "learning_rate": 1.3545092201689399e-05, "loss": 4.0393, "step": 6771 }, { "epoch": 2.2558507537269925, "grad_norm": 0.58203125, "learning_rate": 1.3544613060820069e-05, "loss": 4.043, "step": 6772 }, { "epoch": 2.2561838927292412, "grad_norm": 0.5546875, "learning_rate": 1.354413384954409e-05, "loss": 4.1337, "step": 6773 }, { "epoch": 2.25651703173149, "grad_norm": 0.5546875, "learning_rate": 1.3543654567867044e-05, "loss": 4.0544, "step": 6774 }, { "epoch": 2.2568501707337387, "grad_norm": 0.609375, "learning_rate": 1.3543175215794509e-05, "loss": 4.0441, "step": 6775 }, { "epoch": 2.2571833097359875, "grad_norm": 0.6171875, "learning_rate": 1.3542695793332074e-05, "loss": 4.0528, "step": 6776 }, { "epoch": 2.257516448738236, "grad_norm": 0.6015625, "learning_rate": 1.3542216300485324e-05, "loss": 4.0132, "step": 6777 }, { "epoch": 2.257849587740485, "grad_norm": 0.56640625, "learning_rate": 1.354173673725984e-05, "loss": 4.0696, "step": 6778 }, { "epoch": 2.2581827267427332, "grad_norm": 0.59375, "learning_rate": 1.354125710366121e-05, "loss": 4.0498, "step": 6779 }, { "epoch": 2.258515865744982, "grad_norm": 0.5859375, "learning_rate": 1.3540777399695018e-05, "loss": 4.0623, "step": 6780 }, { "epoch": 2.2588490047472307, "grad_norm": 0.6015625, "learning_rate": 1.3540297625366854e-05, "loss": 3.968, "step": 6781 }, { "epoch": 2.2591821437494795, "grad_norm": 0.59765625, "learning_rate": 1.3539817780682307e-05, "loss": 4.0159, "step": 6782 }, { "epoch": 2.259515282751728, "grad_norm": 0.5859375, "learning_rate": 1.3539337865646964e-05, "loss": 4.0367, "step": 6783 }, { "epoch": 2.259848421753977, "grad_norm": 0.56640625, "learning_rate": 1.3538857880266418e-05, "loss": 4.0881, "step": 6784 }, { "epoch": 2.2601815607562257, "grad_norm": 0.6015625, "learning_rate": 1.3538377824546258e-05, "loss": 4.0544, "step": 6785 }, { "epoch": 2.2605146997584744, "grad_norm": 0.578125, "learning_rate": 1.3537897698492074e-05, "loss": 3.9533, "step": 6786 }, { "epoch": 2.260847838760723, "grad_norm": 0.61328125, "learning_rate": 1.3537417502109459e-05, "loss": 4.0025, "step": 6787 }, { "epoch": 2.2611809777629714, "grad_norm": 0.56640625, "learning_rate": 1.3536937235404007e-05, "loss": 4.0349, "step": 6788 }, { "epoch": 2.26151411676522, "grad_norm": 0.609375, "learning_rate": 1.3536456898381314e-05, "loss": 4.0866, "step": 6789 }, { "epoch": 2.261847255767469, "grad_norm": 0.609375, "learning_rate": 1.3535976491046975e-05, "loss": 4.017, "step": 6790 }, { "epoch": 2.2621803947697177, "grad_norm": 0.58984375, "learning_rate": 1.353549601340658e-05, "loss": 4.0253, "step": 6791 }, { "epoch": 2.2625135337719664, "grad_norm": 0.578125, "learning_rate": 1.353501546546573e-05, "loss": 3.9871, "step": 6792 }, { "epoch": 2.262846672774215, "grad_norm": 0.60546875, "learning_rate": 1.3534534847230023e-05, "loss": 4.0528, "step": 6793 }, { "epoch": 2.263179811776464, "grad_norm": 0.6015625, "learning_rate": 1.3534054158705054e-05, "loss": 4.0494, "step": 6794 }, { "epoch": 2.263512950778712, "grad_norm": 0.59765625, "learning_rate": 1.3533573399896426e-05, "loss": 4.085, "step": 6795 }, { "epoch": 2.263846089780961, "grad_norm": 0.5625, "learning_rate": 1.3533092570809736e-05, "loss": 4.0549, "step": 6796 }, { "epoch": 2.2641792287832097, "grad_norm": 0.58984375, "learning_rate": 1.3532611671450583e-05, "loss": 4.089, "step": 6797 }, { "epoch": 2.2645123677854584, "grad_norm": 0.57421875, "learning_rate": 1.3532130701824572e-05, "loss": 4.0399, "step": 6798 }, { "epoch": 2.264845506787707, "grad_norm": 0.58984375, "learning_rate": 1.3531649661937305e-05, "loss": 4.0227, "step": 6799 }, { "epoch": 2.265178645789956, "grad_norm": 0.5625, "learning_rate": 1.3531168551794383e-05, "loss": 4.0941, "step": 6800 }, { "epoch": 2.2655117847922046, "grad_norm": 0.55859375, "learning_rate": 1.353068737140141e-05, "loss": 4.031, "step": 6801 }, { "epoch": 2.2658449237944533, "grad_norm": 0.56640625, "learning_rate": 1.3530206120763992e-05, "loss": 3.9853, "step": 6802 }, { "epoch": 2.266178062796702, "grad_norm": 0.58203125, "learning_rate": 1.3529724799887735e-05, "loss": 4.0349, "step": 6803 }, { "epoch": 2.2665112017989504, "grad_norm": 0.59375, "learning_rate": 1.3529243408778246e-05, "loss": 4.0739, "step": 6804 }, { "epoch": 2.266844340801199, "grad_norm": 0.5625, "learning_rate": 1.3528761947441127e-05, "loss": 4.0893, "step": 6805 }, { "epoch": 2.267177479803448, "grad_norm": 0.578125, "learning_rate": 1.352828041588199e-05, "loss": 4.0102, "step": 6806 }, { "epoch": 2.2675106188056966, "grad_norm": 0.58203125, "learning_rate": 1.3527798814106448e-05, "loss": 4.092, "step": 6807 }, { "epoch": 2.2678437578079453, "grad_norm": 0.56640625, "learning_rate": 1.35273171421201e-05, "loss": 3.9905, "step": 6808 }, { "epoch": 2.268176896810194, "grad_norm": 0.59375, "learning_rate": 1.3526835399928566e-05, "loss": 4.0224, "step": 6809 }, { "epoch": 2.268510035812443, "grad_norm": 0.56640625, "learning_rate": 1.3526353587537453e-05, "loss": 4.0633, "step": 6810 }, { "epoch": 2.2688431748146916, "grad_norm": 0.5625, "learning_rate": 1.3525871704952374e-05, "loss": 4.0642, "step": 6811 }, { "epoch": 2.2691763138169403, "grad_norm": 0.59765625, "learning_rate": 1.3525389752178941e-05, "loss": 4.0753, "step": 6812 }, { "epoch": 2.2695094528191886, "grad_norm": 0.61328125, "learning_rate": 1.3524907729222769e-05, "loss": 4.014, "step": 6813 }, { "epoch": 2.2698425918214373, "grad_norm": 0.59375, "learning_rate": 1.352442563608947e-05, "loss": 4.0158, "step": 6814 }, { "epoch": 2.270175730823686, "grad_norm": 0.58984375, "learning_rate": 1.3523943472784664e-05, "loss": 3.9434, "step": 6815 }, { "epoch": 2.270508869825935, "grad_norm": 0.6015625, "learning_rate": 1.3523461239313962e-05, "loss": 4.0004, "step": 6816 }, { "epoch": 2.2708420088281835, "grad_norm": 0.5625, "learning_rate": 1.3522978935682986e-05, "loss": 4.1041, "step": 6817 }, { "epoch": 2.2711751478304323, "grad_norm": 0.56640625, "learning_rate": 1.352249656189735e-05, "loss": 4.0192, "step": 6818 }, { "epoch": 2.271508286832681, "grad_norm": 0.57421875, "learning_rate": 1.3522014117962672e-05, "loss": 4.0313, "step": 6819 }, { "epoch": 2.2718414258349298, "grad_norm": 0.625, "learning_rate": 1.3521531603884576e-05, "loss": 3.9444, "step": 6820 }, { "epoch": 2.2721745648371785, "grad_norm": 0.55859375, "learning_rate": 1.3521049019668678e-05, "loss": 4.0434, "step": 6821 }, { "epoch": 2.272507703839427, "grad_norm": 0.5625, "learning_rate": 1.35205663653206e-05, "loss": 4.0224, "step": 6822 }, { "epoch": 2.2728408428416755, "grad_norm": 0.59765625, "learning_rate": 1.3520083640845964e-05, "loss": 3.9543, "step": 6823 }, { "epoch": 2.2731739818439243, "grad_norm": 0.58203125, "learning_rate": 1.3519600846250394e-05, "loss": 4.0119, "step": 6824 }, { "epoch": 2.273507120846173, "grad_norm": 0.58984375, "learning_rate": 1.3519117981539511e-05, "loss": 4.0634, "step": 6825 }, { "epoch": 2.2738402598484218, "grad_norm": 0.6171875, "learning_rate": 1.3518635046718945e-05, "loss": 3.9838, "step": 6826 }, { "epoch": 2.2741733988506705, "grad_norm": 0.57421875, "learning_rate": 1.3518152041794314e-05, "loss": 4.0033, "step": 6827 }, { "epoch": 2.2745065378529192, "grad_norm": 0.56640625, "learning_rate": 1.3517668966771246e-05, "loss": 4.0212, "step": 6828 }, { "epoch": 2.274839676855168, "grad_norm": 0.58203125, "learning_rate": 1.351718582165537e-05, "loss": 3.9706, "step": 6829 }, { "epoch": 2.2751728158574167, "grad_norm": 0.640625, "learning_rate": 1.3516702606452313e-05, "loss": 3.9933, "step": 6830 }, { "epoch": 2.275505954859665, "grad_norm": 0.55078125, "learning_rate": 1.3516219321167702e-05, "loss": 4.0253, "step": 6831 }, { "epoch": 2.2758390938619137, "grad_norm": 0.5390625, "learning_rate": 1.3515735965807167e-05, "loss": 4.1062, "step": 6832 }, { "epoch": 2.2761722328641625, "grad_norm": 0.5625, "learning_rate": 1.351525254037634e-05, "loss": 4.0062, "step": 6833 }, { "epoch": 2.276505371866411, "grad_norm": 0.58984375, "learning_rate": 1.3514769044880849e-05, "loss": 4.0559, "step": 6834 }, { "epoch": 2.27683851086866, "grad_norm": 0.58984375, "learning_rate": 1.3514285479326325e-05, "loss": 4.0258, "step": 6835 }, { "epoch": 2.2771716498709087, "grad_norm": 0.58984375, "learning_rate": 1.3513801843718404e-05, "loss": 3.9652, "step": 6836 }, { "epoch": 2.2775047888731574, "grad_norm": 0.578125, "learning_rate": 1.3513318138062715e-05, "loss": 3.9895, "step": 6837 }, { "epoch": 2.277837927875406, "grad_norm": 0.58984375, "learning_rate": 1.3512834362364896e-05, "loss": 4.0302, "step": 6838 }, { "epoch": 2.278171066877655, "grad_norm": 0.578125, "learning_rate": 1.3512350516630581e-05, "loss": 3.9651, "step": 6839 }, { "epoch": 2.278504205879903, "grad_norm": 0.57421875, "learning_rate": 1.3511866600865404e-05, "loss": 4.0199, "step": 6840 }, { "epoch": 2.278837344882152, "grad_norm": 0.59765625, "learning_rate": 1.3511382615075004e-05, "loss": 4.0368, "step": 6841 }, { "epoch": 2.2791704838844007, "grad_norm": 0.5625, "learning_rate": 1.3510898559265015e-05, "loss": 4.0816, "step": 6842 }, { "epoch": 2.2795036228866494, "grad_norm": 0.5859375, "learning_rate": 1.351041443344108e-05, "loss": 3.9544, "step": 6843 }, { "epoch": 2.279836761888898, "grad_norm": 0.6015625, "learning_rate": 1.3509930237608833e-05, "loss": 4.0061, "step": 6844 }, { "epoch": 2.280169900891147, "grad_norm": 0.61328125, "learning_rate": 1.350944597177392e-05, "loss": 4.0168, "step": 6845 }, { "epoch": 2.2805030398933956, "grad_norm": 0.59375, "learning_rate": 1.3508961635941975e-05, "loss": 4.0439, "step": 6846 }, { "epoch": 2.2808361788956444, "grad_norm": 0.640625, "learning_rate": 1.3508477230118643e-05, "loss": 4.0172, "step": 6847 }, { "epoch": 2.281169317897893, "grad_norm": 0.5625, "learning_rate": 1.3507992754309563e-05, "loss": 4.112, "step": 6848 }, { "epoch": 2.2815024569001414, "grad_norm": 0.578125, "learning_rate": 1.3507508208520385e-05, "loss": 4.0677, "step": 6849 }, { "epoch": 2.28183559590239, "grad_norm": 0.59375, "learning_rate": 1.3507023592756746e-05, "loss": 4.0133, "step": 6850 }, { "epoch": 2.282168734904639, "grad_norm": 0.57421875, "learning_rate": 1.3506538907024294e-05, "loss": 3.9994, "step": 6851 }, { "epoch": 2.2825018739068876, "grad_norm": 0.6015625, "learning_rate": 1.3506054151328674e-05, "loss": 4.0228, "step": 6852 }, { "epoch": 2.2828350129091364, "grad_norm": 0.6015625, "learning_rate": 1.3505569325675532e-05, "loss": 4.0045, "step": 6853 }, { "epoch": 2.283168151911385, "grad_norm": 0.5625, "learning_rate": 1.3505084430070516e-05, "loss": 4.0845, "step": 6854 }, { "epoch": 2.283501290913634, "grad_norm": 0.5859375, "learning_rate": 1.3504599464519274e-05, "loss": 3.9756, "step": 6855 }, { "epoch": 2.2838344299158826, "grad_norm": 0.62109375, "learning_rate": 1.3504114429027453e-05, "loss": 4.0417, "step": 6856 }, { "epoch": 2.2841675689181313, "grad_norm": 0.578125, "learning_rate": 1.3503629323600704e-05, "loss": 4.0213, "step": 6857 }, { "epoch": 2.2845007079203796, "grad_norm": 0.58203125, "learning_rate": 1.3503144148244676e-05, "loss": 4.0424, "step": 6858 }, { "epoch": 2.2848338469226284, "grad_norm": 0.60546875, "learning_rate": 1.3502658902965022e-05, "loss": 3.9753, "step": 6859 }, { "epoch": 2.285166985924877, "grad_norm": 0.59765625, "learning_rate": 1.3502173587767396e-05, "loss": 4.0395, "step": 6860 }, { "epoch": 2.285500124927126, "grad_norm": 0.578125, "learning_rate": 1.3501688202657446e-05, "loss": 3.9994, "step": 6861 }, { "epoch": 2.2858332639293746, "grad_norm": 0.58984375, "learning_rate": 1.3501202747640828e-05, "loss": 3.9573, "step": 6862 }, { "epoch": 2.2861664029316233, "grad_norm": 0.58203125, "learning_rate": 1.3500717222723198e-05, "loss": 3.9801, "step": 6863 }, { "epoch": 2.286499541933872, "grad_norm": 0.61328125, "learning_rate": 1.3500231627910208e-05, "loss": 4.003, "step": 6864 }, { "epoch": 2.2868326809361204, "grad_norm": 0.578125, "learning_rate": 1.3499745963207518e-05, "loss": 4.1134, "step": 6865 }, { "epoch": 2.287165819938369, "grad_norm": 0.578125, "learning_rate": 1.3499260228620781e-05, "loss": 4.0419, "step": 6866 }, { "epoch": 2.287498958940618, "grad_norm": 0.58984375, "learning_rate": 1.3498774424155659e-05, "loss": 4.0394, "step": 6867 }, { "epoch": 2.2878320979428666, "grad_norm": 0.59765625, "learning_rate": 1.3498288549817805e-05, "loss": 4.0686, "step": 6868 }, { "epoch": 2.2881652369451153, "grad_norm": 0.57421875, "learning_rate": 1.3497802605612884e-05, "loss": 4.0523, "step": 6869 }, { "epoch": 2.288498375947364, "grad_norm": 0.6171875, "learning_rate": 1.3497316591546554e-05, "loss": 3.9859, "step": 6870 }, { "epoch": 2.288831514949613, "grad_norm": 0.57421875, "learning_rate": 1.3496830507624475e-05, "loss": 3.98, "step": 6871 }, { "epoch": 2.2891646539518615, "grad_norm": 0.5859375, "learning_rate": 1.3496344353852308e-05, "loss": 4.0514, "step": 6872 }, { "epoch": 2.2894977929541103, "grad_norm": 0.57421875, "learning_rate": 1.3495858130235721e-05, "loss": 4.003, "step": 6873 }, { "epoch": 2.2898309319563586, "grad_norm": 0.5625, "learning_rate": 1.3495371836780373e-05, "loss": 4.1017, "step": 6874 }, { "epoch": 2.2901640709586073, "grad_norm": 0.6015625, "learning_rate": 1.349488547349193e-05, "loss": 4.0374, "step": 6875 }, { "epoch": 2.290497209960856, "grad_norm": 0.5703125, "learning_rate": 1.3494399040376053e-05, "loss": 4.1048, "step": 6876 }, { "epoch": 2.290830348963105, "grad_norm": 0.62890625, "learning_rate": 1.3493912537438413e-05, "loss": 4.0582, "step": 6877 }, { "epoch": 2.2911634879653535, "grad_norm": 0.59765625, "learning_rate": 1.3493425964684674e-05, "loss": 3.992, "step": 6878 }, { "epoch": 2.2914966269676023, "grad_norm": 0.57421875, "learning_rate": 1.3492939322120503e-05, "loss": 3.9934, "step": 6879 }, { "epoch": 2.291829765969851, "grad_norm": 0.58984375, "learning_rate": 1.3492452609751572e-05, "loss": 4.0186, "step": 6880 }, { "epoch": 2.2921629049720997, "grad_norm": 0.59375, "learning_rate": 1.3491965827583546e-05, "loss": 4.0446, "step": 6881 }, { "epoch": 2.2924960439743485, "grad_norm": 0.609375, "learning_rate": 1.3491478975622097e-05, "loss": 3.9948, "step": 6882 }, { "epoch": 2.2928291829765968, "grad_norm": 0.57421875, "learning_rate": 1.3490992053872896e-05, "loss": 3.9378, "step": 6883 }, { "epoch": 2.2931623219788455, "grad_norm": 0.5703125, "learning_rate": 1.3490505062341614e-05, "loss": 4.0768, "step": 6884 }, { "epoch": 2.2934954609810942, "grad_norm": 0.5703125, "learning_rate": 1.3490018001033923e-05, "loss": 4.0442, "step": 6885 }, { "epoch": 2.293828599983343, "grad_norm": 0.59375, "learning_rate": 1.34895308699555e-05, "loss": 4.0212, "step": 6886 }, { "epoch": 2.2941617389855917, "grad_norm": 0.5859375, "learning_rate": 1.348904366911201e-05, "loss": 3.9744, "step": 6887 }, { "epoch": 2.2944948779878405, "grad_norm": 0.59765625, "learning_rate": 1.3488556398509137e-05, "loss": 4.0819, "step": 6888 }, { "epoch": 2.294828016990089, "grad_norm": 0.59375, "learning_rate": 1.3488069058152551e-05, "loss": 4.0184, "step": 6889 }, { "epoch": 2.295161155992338, "grad_norm": 0.546875, "learning_rate": 1.3487581648047933e-05, "loss": 4.0511, "step": 6890 }, { "epoch": 2.2954942949945867, "grad_norm": 0.56640625, "learning_rate": 1.3487094168200957e-05, "loss": 4.068, "step": 6891 }, { "epoch": 2.295827433996835, "grad_norm": 0.5625, "learning_rate": 1.3486606618617301e-05, "loss": 4.0887, "step": 6892 }, { "epoch": 2.2961605729990837, "grad_norm": 0.57421875, "learning_rate": 1.3486118999302646e-05, "loss": 4.0062, "step": 6893 }, { "epoch": 2.2964937120013325, "grad_norm": 0.57421875, "learning_rate": 1.3485631310262671e-05, "loss": 4.1282, "step": 6894 }, { "epoch": 2.296826851003581, "grad_norm": 0.5703125, "learning_rate": 1.3485143551503054e-05, "loss": 4.0531, "step": 6895 }, { "epoch": 2.29715999000583, "grad_norm": 0.5859375, "learning_rate": 1.3484655723029479e-05, "loss": 4.0075, "step": 6896 }, { "epoch": 2.2974931290080787, "grad_norm": 0.58984375, "learning_rate": 1.348416782484763e-05, "loss": 4.0122, "step": 6897 }, { "epoch": 2.2978262680103274, "grad_norm": 0.5703125, "learning_rate": 1.3483679856963184e-05, "loss": 4.0988, "step": 6898 }, { "epoch": 2.298159407012576, "grad_norm": 0.58984375, "learning_rate": 1.3483191819381831e-05, "loss": 4.0476, "step": 6899 }, { "epoch": 2.298492546014825, "grad_norm": 0.5703125, "learning_rate": 1.3482703712109252e-05, "loss": 4.0296, "step": 6900 }, { "epoch": 2.298825685017073, "grad_norm": 0.58203125, "learning_rate": 1.3482215535151134e-05, "loss": 4.0389, "step": 6901 }, { "epoch": 2.299158824019322, "grad_norm": 0.578125, "learning_rate": 1.3481727288513161e-05, "loss": 4.0336, "step": 6902 }, { "epoch": 2.2994919630215707, "grad_norm": 0.5859375, "learning_rate": 1.3481238972201021e-05, "loss": 4.0931, "step": 6903 }, { "epoch": 2.2998251020238194, "grad_norm": 0.58203125, "learning_rate": 1.3480750586220405e-05, "loss": 4.0465, "step": 6904 }, { "epoch": 2.300158241026068, "grad_norm": 0.56640625, "learning_rate": 1.3480262130576996e-05, "loss": 4.0818, "step": 6905 }, { "epoch": 2.300491380028317, "grad_norm": 0.59765625, "learning_rate": 1.347977360527649e-05, "loss": 3.9788, "step": 6906 }, { "epoch": 2.3008245190305656, "grad_norm": 0.56640625, "learning_rate": 1.347928501032457e-05, "loss": 4.0373, "step": 6907 }, { "epoch": 2.3011576580328144, "grad_norm": 0.6015625, "learning_rate": 1.3478796345726932e-05, "loss": 3.9907, "step": 6908 }, { "epoch": 2.301490797035063, "grad_norm": 0.62109375, "learning_rate": 1.3478307611489267e-05, "loss": 3.946, "step": 6909 }, { "epoch": 2.3018239360373114, "grad_norm": 0.5703125, "learning_rate": 1.3477818807617268e-05, "loss": 4.0263, "step": 6910 }, { "epoch": 2.30215707503956, "grad_norm": 0.5703125, "learning_rate": 1.3477329934116626e-05, "loss": 4.0644, "step": 6911 }, { "epoch": 2.302490214041809, "grad_norm": 0.5703125, "learning_rate": 1.347684099099304e-05, "loss": 4.0543, "step": 6912 }, { "epoch": 2.3028233530440576, "grad_norm": 0.60546875, "learning_rate": 1.34763519782522e-05, "loss": 3.9607, "step": 6913 }, { "epoch": 2.3031564920463063, "grad_norm": 0.5546875, "learning_rate": 1.3475862895899803e-05, "loss": 4.1211, "step": 6914 }, { "epoch": 2.303489631048555, "grad_norm": 0.57421875, "learning_rate": 1.347537374394155e-05, "loss": 4.0154, "step": 6915 }, { "epoch": 2.303822770050804, "grad_norm": 0.56640625, "learning_rate": 1.3474884522383133e-05, "loss": 4.0315, "step": 6916 }, { "epoch": 2.3041559090530526, "grad_norm": 0.59765625, "learning_rate": 1.3474395231230256e-05, "loss": 4.0534, "step": 6917 }, { "epoch": 2.3044890480553013, "grad_norm": 0.58203125, "learning_rate": 1.3473905870488611e-05, "loss": 4.0602, "step": 6918 }, { "epoch": 2.3048221870575496, "grad_norm": 0.578125, "learning_rate": 1.3473416440163904e-05, "loss": 4.0259, "step": 6919 }, { "epoch": 2.3051553260597983, "grad_norm": 0.57421875, "learning_rate": 1.3472926940261836e-05, "loss": 3.9777, "step": 6920 }, { "epoch": 2.305488465062047, "grad_norm": 0.59765625, "learning_rate": 1.3472437370788103e-05, "loss": 4.0348, "step": 6921 }, { "epoch": 2.305821604064296, "grad_norm": 0.55859375, "learning_rate": 1.3471947731748411e-05, "loss": 4.024, "step": 6922 }, { "epoch": 2.3061547430665446, "grad_norm": 0.57421875, "learning_rate": 1.3471458023148467e-05, "loss": 4.0546, "step": 6923 }, { "epoch": 2.3064878820687933, "grad_norm": 0.6015625, "learning_rate": 1.3470968244993968e-05, "loss": 4.016, "step": 6924 }, { "epoch": 2.306821021071042, "grad_norm": 0.59765625, "learning_rate": 1.3470478397290625e-05, "loss": 3.99, "step": 6925 }, { "epoch": 2.3071541600732908, "grad_norm": 0.58203125, "learning_rate": 1.3469988480044136e-05, "loss": 4.0924, "step": 6926 }, { "epoch": 2.3074872990755395, "grad_norm": 0.56640625, "learning_rate": 1.3469498493260216e-05, "loss": 4.0971, "step": 6927 }, { "epoch": 2.307820438077788, "grad_norm": 0.56640625, "learning_rate": 1.3469008436944566e-05, "loss": 4.0626, "step": 6928 }, { "epoch": 2.3081535770800365, "grad_norm": 0.61328125, "learning_rate": 1.3468518311102898e-05, "loss": 4.0496, "step": 6929 }, { "epoch": 2.3084867160822853, "grad_norm": 0.5625, "learning_rate": 1.346802811574092e-05, "loss": 4.0691, "step": 6930 }, { "epoch": 2.308819855084534, "grad_norm": 0.6015625, "learning_rate": 1.3467537850864339e-05, "loss": 4.0714, "step": 6931 }, { "epoch": 2.3091529940867828, "grad_norm": 0.58984375, "learning_rate": 1.346704751647887e-05, "loss": 4.056, "step": 6932 }, { "epoch": 2.3094861330890315, "grad_norm": 0.61328125, "learning_rate": 1.346655711259022e-05, "loss": 4.0343, "step": 6933 }, { "epoch": 2.3098192720912802, "grad_norm": 0.5859375, "learning_rate": 1.3466066639204105e-05, "loss": 4.0205, "step": 6934 }, { "epoch": 2.3101524110935285, "grad_norm": 0.59375, "learning_rate": 1.3465576096326236e-05, "loss": 3.9874, "step": 6935 }, { "epoch": 2.3104855500957773, "grad_norm": 0.58984375, "learning_rate": 1.3465085483962325e-05, "loss": 4.0493, "step": 6936 }, { "epoch": 2.310818689098026, "grad_norm": 0.58203125, "learning_rate": 1.346459480211809e-05, "loss": 4.0102, "step": 6937 }, { "epoch": 2.3111518281002748, "grad_norm": 0.59765625, "learning_rate": 1.3464104050799244e-05, "loss": 4.0314, "step": 6938 }, { "epoch": 2.3114849671025235, "grad_norm": 0.57421875, "learning_rate": 1.3463613230011507e-05, "loss": 4.0836, "step": 6939 }, { "epoch": 2.3118181061047722, "grad_norm": 0.59765625, "learning_rate": 1.3463122339760589e-05, "loss": 3.9973, "step": 6940 }, { "epoch": 2.312151245107021, "grad_norm": 0.59765625, "learning_rate": 1.3462631380052213e-05, "loss": 4.1211, "step": 6941 }, { "epoch": 2.3124843841092697, "grad_norm": 0.58203125, "learning_rate": 1.3462140350892098e-05, "loss": 4.0336, "step": 6942 }, { "epoch": 2.3128175231115184, "grad_norm": 0.5859375, "learning_rate": 1.346164925228596e-05, "loss": 4.0651, "step": 6943 }, { "epoch": 2.3131506621137667, "grad_norm": 0.5703125, "learning_rate": 1.3461158084239521e-05, "loss": 4.0709, "step": 6944 }, { "epoch": 2.3134838011160155, "grad_norm": 0.6015625, "learning_rate": 1.3460666846758505e-05, "loss": 4.0722, "step": 6945 }, { "epoch": 2.313816940118264, "grad_norm": 0.6015625, "learning_rate": 1.3460175539848631e-05, "loss": 4.0471, "step": 6946 }, { "epoch": 2.314150079120513, "grad_norm": 0.609375, "learning_rate": 1.3459684163515618e-05, "loss": 3.9922, "step": 6947 }, { "epoch": 2.3144832181227617, "grad_norm": 0.6015625, "learning_rate": 1.3459192717765196e-05, "loss": 4.0425, "step": 6948 }, { "epoch": 2.3148163571250104, "grad_norm": 0.59765625, "learning_rate": 1.3458701202603086e-05, "loss": 4.0101, "step": 6949 }, { "epoch": 2.315149496127259, "grad_norm": 0.61328125, "learning_rate": 1.3458209618035012e-05, "loss": 3.9906, "step": 6950 }, { "epoch": 2.315482635129508, "grad_norm": 0.63671875, "learning_rate": 1.3457717964066703e-05, "loss": 3.9487, "step": 6951 }, { "epoch": 2.3158157741317567, "grad_norm": 0.59765625, "learning_rate": 1.3457226240703882e-05, "loss": 3.9847, "step": 6952 }, { "epoch": 2.316148913134005, "grad_norm": 0.6171875, "learning_rate": 1.345673444795228e-05, "loss": 4.0879, "step": 6953 }, { "epoch": 2.3164820521362537, "grad_norm": 0.55078125, "learning_rate": 1.3456242585817624e-05, "loss": 4.0374, "step": 6954 }, { "epoch": 2.3168151911385024, "grad_norm": 0.61328125, "learning_rate": 1.3455750654305646e-05, "loss": 4.0075, "step": 6955 }, { "epoch": 2.317148330140751, "grad_norm": 0.59375, "learning_rate": 1.3455258653422068e-05, "loss": 4.0146, "step": 6956 }, { "epoch": 2.317481469143, "grad_norm": 0.578125, "learning_rate": 1.3454766583172627e-05, "loss": 4.0239, "step": 6957 }, { "epoch": 2.3178146081452486, "grad_norm": 0.6015625, "learning_rate": 1.3454274443563056e-05, "loss": 4.0382, "step": 6958 }, { "epoch": 2.3181477471474974, "grad_norm": 0.59765625, "learning_rate": 1.3453782234599082e-05, "loss": 4.083, "step": 6959 }, { "epoch": 2.318480886149746, "grad_norm": 0.578125, "learning_rate": 1.3453289956286442e-05, "loss": 4.0182, "step": 6960 }, { "epoch": 2.318814025151995, "grad_norm": 0.58984375, "learning_rate": 1.3452797608630867e-05, "loss": 4.0833, "step": 6961 }, { "epoch": 2.319147164154243, "grad_norm": 0.6015625, "learning_rate": 1.3452305191638096e-05, "loss": 4.023, "step": 6962 }, { "epoch": 2.319480303156492, "grad_norm": 0.5625, "learning_rate": 1.345181270531386e-05, "loss": 4.0811, "step": 6963 }, { "epoch": 2.3198134421587406, "grad_norm": 0.5859375, "learning_rate": 1.34513201496639e-05, "loss": 4.0406, "step": 6964 }, { "epoch": 2.3201465811609894, "grad_norm": 0.5625, "learning_rate": 1.345082752469395e-05, "loss": 4.0913, "step": 6965 }, { "epoch": 2.320479720163238, "grad_norm": 0.58203125, "learning_rate": 1.345033483040975e-05, "loss": 4.0416, "step": 6966 }, { "epoch": 2.320812859165487, "grad_norm": 0.5859375, "learning_rate": 1.3449842066817035e-05, "loss": 4.0292, "step": 6967 }, { "epoch": 2.3211459981677356, "grad_norm": 0.55859375, "learning_rate": 1.344934923392155e-05, "loss": 3.9971, "step": 6968 }, { "epoch": 2.3214791371699843, "grad_norm": 0.59375, "learning_rate": 1.344885633172903e-05, "loss": 4.0654, "step": 6969 }, { "epoch": 2.321812276172233, "grad_norm": 0.58984375, "learning_rate": 1.344836336024522e-05, "loss": 3.981, "step": 6970 }, { "epoch": 2.3221454151744814, "grad_norm": 0.58203125, "learning_rate": 1.344787031947586e-05, "loss": 3.9917, "step": 6971 }, { "epoch": 2.32247855417673, "grad_norm": 0.58203125, "learning_rate": 1.3447377209426695e-05, "loss": 3.9731, "step": 6972 }, { "epoch": 2.322811693178979, "grad_norm": 0.6171875, "learning_rate": 1.3446884030103467e-05, "loss": 4.0048, "step": 6973 }, { "epoch": 2.3231448321812276, "grad_norm": 0.60546875, "learning_rate": 1.3446390781511921e-05, "loss": 3.9526, "step": 6974 }, { "epoch": 2.3234779711834763, "grad_norm": 0.58203125, "learning_rate": 1.3445897463657805e-05, "loss": 4.0109, "step": 6975 }, { "epoch": 2.323811110185725, "grad_norm": 0.59765625, "learning_rate": 1.3445404076546858e-05, "loss": 4.011, "step": 6976 }, { "epoch": 2.324144249187974, "grad_norm": 0.578125, "learning_rate": 1.3444910620184835e-05, "loss": 4.1052, "step": 6977 }, { "epoch": 2.3244773881902225, "grad_norm": 0.58984375, "learning_rate": 1.3444417094577477e-05, "loss": 4.0404, "step": 6978 }, { "epoch": 2.3248105271924713, "grad_norm": 0.59375, "learning_rate": 1.3443923499730536e-05, "loss": 3.9792, "step": 6979 }, { "epoch": 2.3251436661947196, "grad_norm": 0.61328125, "learning_rate": 1.3443429835649761e-05, "loss": 4.0378, "step": 6980 }, { "epoch": 2.3254768051969683, "grad_norm": 0.62109375, "learning_rate": 1.3442936102340902e-05, "loss": 3.9153, "step": 6981 }, { "epoch": 2.325809944199217, "grad_norm": 0.58203125, "learning_rate": 1.3442442299809709e-05, "loss": 4.0453, "step": 6982 }, { "epoch": 2.326143083201466, "grad_norm": 0.6015625, "learning_rate": 1.3441948428061938e-05, "loss": 4.0913, "step": 6983 }, { "epoch": 2.3264762222037145, "grad_norm": 0.57421875, "learning_rate": 1.3441454487103334e-05, "loss": 4.0437, "step": 6984 }, { "epoch": 2.3268093612059633, "grad_norm": 0.56640625, "learning_rate": 1.3440960476939658e-05, "loss": 3.9974, "step": 6985 }, { "epoch": 2.327142500208212, "grad_norm": 0.609375, "learning_rate": 1.3440466397576659e-05, "loss": 4.0422, "step": 6986 }, { "epoch": 2.3274756392104607, "grad_norm": 0.61328125, "learning_rate": 1.3439972249020092e-05, "loss": 4.0385, "step": 6987 }, { "epoch": 2.3278087782127095, "grad_norm": 0.578125, "learning_rate": 1.3439478031275715e-05, "loss": 4.0471, "step": 6988 }, { "epoch": 2.328141917214958, "grad_norm": 0.58984375, "learning_rate": 1.3438983744349284e-05, "loss": 4.0072, "step": 6989 }, { "epoch": 2.3284750562172065, "grad_norm": 0.578125, "learning_rate": 1.3438489388246557e-05, "loss": 4.0966, "step": 6990 }, { "epoch": 2.3288081952194553, "grad_norm": 0.578125, "learning_rate": 1.343799496297329e-05, "loss": 4.0346, "step": 6991 }, { "epoch": 2.329141334221704, "grad_norm": 0.5703125, "learning_rate": 1.3437500468535243e-05, "loss": 4.0371, "step": 6992 }, { "epoch": 2.3294744732239527, "grad_norm": 0.60546875, "learning_rate": 1.3437005904938178e-05, "loss": 4.0173, "step": 6993 }, { "epoch": 2.3298076122262015, "grad_norm": 0.5859375, "learning_rate": 1.3436511272187852e-05, "loss": 4.0898, "step": 6994 }, { "epoch": 2.33014075122845, "grad_norm": 0.58203125, "learning_rate": 1.343601657029003e-05, "loss": 3.9941, "step": 6995 }, { "epoch": 2.330473890230699, "grad_norm": 0.609375, "learning_rate": 1.3435521799250471e-05, "loss": 4.0668, "step": 6996 }, { "epoch": 2.3308070292329477, "grad_norm": 0.59765625, "learning_rate": 1.3435026959074939e-05, "loss": 4.0071, "step": 6997 }, { "epoch": 2.331140168235196, "grad_norm": 0.58984375, "learning_rate": 1.3434532049769199e-05, "loss": 4.0746, "step": 6998 }, { "epoch": 2.3314733072374447, "grad_norm": 0.58203125, "learning_rate": 1.3434037071339016e-05, "loss": 4.0291, "step": 6999 }, { "epoch": 2.3318064462396935, "grad_norm": 0.60546875, "learning_rate": 1.3433542023790151e-05, "loss": 4.044, "step": 7000 }, { "epoch": 2.332139585241942, "grad_norm": 0.58984375, "learning_rate": 1.3433046907128374e-05, "loss": 3.9656, "step": 7001 }, { "epoch": 2.332472724244191, "grad_norm": 0.5625, "learning_rate": 1.3432551721359455e-05, "loss": 3.9707, "step": 7002 }, { "epoch": 2.3328058632464397, "grad_norm": 0.578125, "learning_rate": 1.3432056466489156e-05, "loss": 4.039, "step": 7003 }, { "epoch": 2.3331390022486884, "grad_norm": 0.59765625, "learning_rate": 1.3431561142523248e-05, "loss": 4.0024, "step": 7004 }, { "epoch": 2.3334721412509367, "grad_norm": 0.6015625, "learning_rate": 1.34310657494675e-05, "loss": 4.0167, "step": 7005 }, { "epoch": 2.3338052802531855, "grad_norm": 0.5625, "learning_rate": 1.3430570287327685e-05, "loss": 3.9991, "step": 7006 }, { "epoch": 2.334138419255434, "grad_norm": 0.59765625, "learning_rate": 1.3430074756109569e-05, "loss": 4.0805, "step": 7007 }, { "epoch": 2.334471558257683, "grad_norm": 0.60546875, "learning_rate": 1.3429579155818926e-05, "loss": 3.9986, "step": 7008 }, { "epoch": 2.3348046972599317, "grad_norm": 0.59375, "learning_rate": 1.3429083486461532e-05, "loss": 3.9731, "step": 7009 }, { "epoch": 2.3351378362621804, "grad_norm": 0.5859375, "learning_rate": 1.3428587748043156e-05, "loss": 4.0926, "step": 7010 }, { "epoch": 2.335470975264429, "grad_norm": 0.57421875, "learning_rate": 1.3428091940569574e-05, "loss": 4.1686, "step": 7011 }, { "epoch": 2.335804114266678, "grad_norm": 0.5703125, "learning_rate": 1.3427596064046562e-05, "loss": 3.965, "step": 7012 }, { "epoch": 2.3361372532689266, "grad_norm": 0.55859375, "learning_rate": 1.3427100118479896e-05, "loss": 3.9888, "step": 7013 }, { "epoch": 2.336470392271175, "grad_norm": 0.60546875, "learning_rate": 1.342660410387535e-05, "loss": 4.0807, "step": 7014 }, { "epoch": 2.3368035312734237, "grad_norm": 0.62109375, "learning_rate": 1.3426108020238704e-05, "loss": 4.0438, "step": 7015 }, { "epoch": 2.3371366702756724, "grad_norm": 0.578125, "learning_rate": 1.3425611867575735e-05, "loss": 4.0185, "step": 7016 }, { "epoch": 2.337469809277921, "grad_norm": 0.59375, "learning_rate": 1.3425115645892223e-05, "loss": 4.0554, "step": 7017 }, { "epoch": 2.33780294828017, "grad_norm": 0.5859375, "learning_rate": 1.3424619355193948e-05, "loss": 4.1303, "step": 7018 }, { "epoch": 2.3381360872824186, "grad_norm": 0.609375, "learning_rate": 1.342412299548669e-05, "loss": 4.0227, "step": 7019 }, { "epoch": 2.3384692262846674, "grad_norm": 0.57421875, "learning_rate": 1.342362656677623e-05, "loss": 4.0838, "step": 7020 }, { "epoch": 2.338802365286916, "grad_norm": 0.59765625, "learning_rate": 1.342313006906835e-05, "loss": 4.0439, "step": 7021 }, { "epoch": 2.339135504289165, "grad_norm": 0.5859375, "learning_rate": 1.3422633502368835e-05, "loss": 4.0091, "step": 7022 }, { "epoch": 2.339468643291413, "grad_norm": 0.57421875, "learning_rate": 1.342213686668347e-05, "loss": 4.0175, "step": 7023 }, { "epoch": 2.339801782293662, "grad_norm": 0.61328125, "learning_rate": 1.3421640162018036e-05, "loss": 3.9354, "step": 7024 }, { "epoch": 2.3401349212959106, "grad_norm": 0.578125, "learning_rate": 1.342114338837832e-05, "loss": 4.0885, "step": 7025 }, { "epoch": 2.3404680602981593, "grad_norm": 0.58203125, "learning_rate": 1.342064654577011e-05, "loss": 4.028, "step": 7026 }, { "epoch": 2.340801199300408, "grad_norm": 0.58203125, "learning_rate": 1.3420149634199192e-05, "loss": 4.0632, "step": 7027 }, { "epoch": 2.341134338302657, "grad_norm": 0.56640625, "learning_rate": 1.3419652653671351e-05, "loss": 4.1093, "step": 7028 }, { "epoch": 2.3414674773049056, "grad_norm": 0.57421875, "learning_rate": 1.3419155604192383e-05, "loss": 4.0508, "step": 7029 }, { "epoch": 2.3418006163071543, "grad_norm": 0.6015625, "learning_rate": 1.3418658485768068e-05, "loss": 4.0629, "step": 7030 }, { "epoch": 2.342133755309403, "grad_norm": 0.59765625, "learning_rate": 1.3418161298404205e-05, "loss": 4.0118, "step": 7031 }, { "epoch": 2.3424668943116513, "grad_norm": 0.578125, "learning_rate": 1.341766404210658e-05, "loss": 4.0574, "step": 7032 }, { "epoch": 2.3428000333139, "grad_norm": 0.63671875, "learning_rate": 1.3417166716880988e-05, "loss": 4.0588, "step": 7033 }, { "epoch": 2.343133172316149, "grad_norm": 0.5703125, "learning_rate": 1.3416669322733219e-05, "loss": 4.0561, "step": 7034 }, { "epoch": 2.3434663113183976, "grad_norm": 0.6171875, "learning_rate": 1.3416171859669069e-05, "loss": 3.9876, "step": 7035 }, { "epoch": 2.3437994503206463, "grad_norm": 0.578125, "learning_rate": 1.3415674327694331e-05, "loss": 4.0077, "step": 7036 }, { "epoch": 2.344132589322895, "grad_norm": 0.6171875, "learning_rate": 1.34151767268148e-05, "loss": 3.9726, "step": 7037 }, { "epoch": 2.3444657283251438, "grad_norm": 0.58984375, "learning_rate": 1.3414679057036273e-05, "loss": 4.0612, "step": 7038 }, { "epoch": 2.3447988673273925, "grad_norm": 0.5703125, "learning_rate": 1.3414181318364544e-05, "loss": 4.0548, "step": 7039 }, { "epoch": 2.3451320063296412, "grad_norm": 0.58984375, "learning_rate": 1.3413683510805416e-05, "loss": 4.0085, "step": 7040 }, { "epoch": 2.3454651453318895, "grad_norm": 0.5625, "learning_rate": 1.3413185634364682e-05, "loss": 4.099, "step": 7041 }, { "epoch": 2.3457982843341383, "grad_norm": 0.55859375, "learning_rate": 1.3412687689048145e-05, "loss": 4.0288, "step": 7042 }, { "epoch": 2.346131423336387, "grad_norm": 0.6015625, "learning_rate": 1.34121896748616e-05, "loss": 4.0581, "step": 7043 }, { "epoch": 2.3464645623386358, "grad_norm": 0.60546875, "learning_rate": 1.3411691591810856e-05, "loss": 4.0794, "step": 7044 }, { "epoch": 2.3467977013408845, "grad_norm": 0.58984375, "learning_rate": 1.3411193439901705e-05, "loss": 4.0314, "step": 7045 }, { "epoch": 2.3471308403431332, "grad_norm": 0.60546875, "learning_rate": 1.3410695219139956e-05, "loss": 4.0842, "step": 7046 }, { "epoch": 2.347463979345382, "grad_norm": 0.6171875, "learning_rate": 1.3410196929531412e-05, "loss": 4.0122, "step": 7047 }, { "epoch": 2.3477971183476307, "grad_norm": 0.58984375, "learning_rate": 1.3409698571081872e-05, "loss": 4.0705, "step": 7048 }, { "epoch": 2.3481302573498795, "grad_norm": 0.56640625, "learning_rate": 1.3409200143797145e-05, "loss": 4.0026, "step": 7049 }, { "epoch": 2.3484633963521278, "grad_norm": 0.5703125, "learning_rate": 1.3408701647683035e-05, "loss": 3.9995, "step": 7050 }, { "epoch": 2.3487965353543765, "grad_norm": 0.5703125, "learning_rate": 1.3408203082745349e-05, "loss": 4.0098, "step": 7051 }, { "epoch": 2.3491296743566252, "grad_norm": 0.57421875, "learning_rate": 1.3407704448989897e-05, "loss": 4.0437, "step": 7052 }, { "epoch": 2.349462813358874, "grad_norm": 0.609375, "learning_rate": 1.3407205746422482e-05, "loss": 4.0304, "step": 7053 }, { "epoch": 2.3497959523611227, "grad_norm": 0.640625, "learning_rate": 1.3406706975048916e-05, "loss": 4.0661, "step": 7054 }, { "epoch": 2.3501290913633714, "grad_norm": 0.6171875, "learning_rate": 1.3406208134875008e-05, "loss": 4.0606, "step": 7055 }, { "epoch": 2.35046223036562, "grad_norm": 0.6171875, "learning_rate": 1.3405709225906566e-05, "loss": 4.0492, "step": 7056 }, { "epoch": 2.350795369367869, "grad_norm": 0.59765625, "learning_rate": 1.3405210248149406e-05, "loss": 4.0162, "step": 7057 }, { "epoch": 2.3511285083701177, "grad_norm": 0.58203125, "learning_rate": 1.3404711201609334e-05, "loss": 4.0406, "step": 7058 }, { "epoch": 2.351461647372366, "grad_norm": 0.6015625, "learning_rate": 1.3404212086292168e-05, "loss": 4.0807, "step": 7059 }, { "epoch": 2.3517947863746147, "grad_norm": 0.5859375, "learning_rate": 1.340371290220372e-05, "loss": 3.9862, "step": 7060 }, { "epoch": 2.3521279253768634, "grad_norm": 0.61328125, "learning_rate": 1.3403213649349802e-05, "loss": 4.0895, "step": 7061 }, { "epoch": 2.352461064379112, "grad_norm": 0.6484375, "learning_rate": 1.3402714327736236e-05, "loss": 3.9755, "step": 7062 }, { "epoch": 2.352794203381361, "grad_norm": 0.6640625, "learning_rate": 1.340221493736883e-05, "loss": 4.057, "step": 7063 }, { "epoch": 2.3531273423836097, "grad_norm": 0.6015625, "learning_rate": 1.3401715478253403e-05, "loss": 4.0261, "step": 7064 }, { "epoch": 2.3534604813858584, "grad_norm": 0.58984375, "learning_rate": 1.3401215950395775e-05, "loss": 3.995, "step": 7065 }, { "epoch": 2.3537936203881067, "grad_norm": 0.58984375, "learning_rate": 1.3400716353801765e-05, "loss": 4.08, "step": 7066 }, { "epoch": 2.354126759390356, "grad_norm": 0.5859375, "learning_rate": 1.3400216688477188e-05, "loss": 4.0574, "step": 7067 }, { "epoch": 2.354459898392604, "grad_norm": 0.57421875, "learning_rate": 1.3399716954427867e-05, "loss": 4.1161, "step": 7068 }, { "epoch": 2.354793037394853, "grad_norm": 0.58984375, "learning_rate": 1.3399217151659625e-05, "loss": 4.0921, "step": 7069 }, { "epoch": 2.3551261763971016, "grad_norm": 0.61328125, "learning_rate": 1.3398717280178276e-05, "loss": 4.0122, "step": 7070 }, { "epoch": 2.3554593153993504, "grad_norm": 0.58203125, "learning_rate": 1.339821733998965e-05, "loss": 4.0148, "step": 7071 }, { "epoch": 2.355792454401599, "grad_norm": 0.63671875, "learning_rate": 1.3397717331099567e-05, "loss": 4.011, "step": 7072 }, { "epoch": 2.356125593403848, "grad_norm": 0.5625, "learning_rate": 1.3397217253513852e-05, "loss": 3.994, "step": 7073 }, { "epoch": 2.3564587324060966, "grad_norm": 0.58984375, "learning_rate": 1.3396717107238328e-05, "loss": 4.0263, "step": 7074 }, { "epoch": 2.356791871408345, "grad_norm": 0.56640625, "learning_rate": 1.3396216892278824e-05, "loss": 4.0427, "step": 7075 }, { "epoch": 2.3571250104105936, "grad_norm": 0.59375, "learning_rate": 1.3395716608641164e-05, "loss": 4.1118, "step": 7076 }, { "epoch": 2.3574581494128424, "grad_norm": 0.5859375, "learning_rate": 1.3395216256331173e-05, "loss": 4.0613, "step": 7077 }, { "epoch": 2.357791288415091, "grad_norm": 0.6484375, "learning_rate": 1.3394715835354686e-05, "loss": 4.0496, "step": 7078 }, { "epoch": 2.35812442741734, "grad_norm": 0.58984375, "learning_rate": 1.3394215345717523e-05, "loss": 4.0752, "step": 7079 }, { "epoch": 2.3584575664195886, "grad_norm": 0.59375, "learning_rate": 1.3393714787425521e-05, "loss": 4.0067, "step": 7080 }, { "epoch": 2.3587907054218373, "grad_norm": 0.58984375, "learning_rate": 1.3393214160484508e-05, "loss": 4.0232, "step": 7081 }, { "epoch": 2.359123844424086, "grad_norm": 0.58984375, "learning_rate": 1.3392713464900312e-05, "loss": 4.0326, "step": 7082 }, { "epoch": 2.359456983426335, "grad_norm": 0.56640625, "learning_rate": 1.339221270067877e-05, "loss": 4.0569, "step": 7083 }, { "epoch": 2.359790122428583, "grad_norm": 0.5546875, "learning_rate": 1.3391711867825711e-05, "loss": 4.0874, "step": 7084 }, { "epoch": 2.360123261430832, "grad_norm": 0.59765625, "learning_rate": 1.3391210966346971e-05, "loss": 4.1162, "step": 7085 }, { "epoch": 2.3604564004330806, "grad_norm": 0.58203125, "learning_rate": 1.3390709996248384e-05, "loss": 4.025, "step": 7086 }, { "epoch": 2.3607895394353293, "grad_norm": 0.58203125, "learning_rate": 1.3390208957535784e-05, "loss": 4.0188, "step": 7087 }, { "epoch": 2.361122678437578, "grad_norm": 0.56640625, "learning_rate": 1.3389707850215007e-05, "loss": 4.0731, "step": 7088 }, { "epoch": 2.361455817439827, "grad_norm": 0.6015625, "learning_rate": 1.3389206674291893e-05, "loss": 4.0695, "step": 7089 }, { "epoch": 2.3617889564420755, "grad_norm": 0.58203125, "learning_rate": 1.3388705429772274e-05, "loss": 4.0232, "step": 7090 }, { "epoch": 2.3621220954443243, "grad_norm": 0.56640625, "learning_rate": 1.3388204116661994e-05, "loss": 4.0691, "step": 7091 }, { "epoch": 2.362455234446573, "grad_norm": 0.58203125, "learning_rate": 1.338770273496689e-05, "loss": 4.0602, "step": 7092 }, { "epoch": 2.3627883734488213, "grad_norm": 0.6015625, "learning_rate": 1.3387201284692802e-05, "loss": 3.9793, "step": 7093 }, { "epoch": 2.36312151245107, "grad_norm": 0.59765625, "learning_rate": 1.338669976584557e-05, "loss": 4.069, "step": 7094 }, { "epoch": 2.363454651453319, "grad_norm": 0.578125, "learning_rate": 1.3386198178431036e-05, "loss": 4.0409, "step": 7095 }, { "epoch": 2.3637877904555675, "grad_norm": 0.5859375, "learning_rate": 1.3385696522455043e-05, "loss": 4.0184, "step": 7096 }, { "epoch": 2.3641209294578163, "grad_norm": 0.55859375, "learning_rate": 1.3385194797923434e-05, "loss": 4.0219, "step": 7097 }, { "epoch": 2.364454068460065, "grad_norm": 0.578125, "learning_rate": 1.3384693004842054e-05, "loss": 4.0253, "step": 7098 }, { "epoch": 2.3647872074623137, "grad_norm": 0.5703125, "learning_rate": 1.3384191143216742e-05, "loss": 4.0606, "step": 7099 }, { "epoch": 2.3651203464645625, "grad_norm": 0.578125, "learning_rate": 1.3383689213053354e-05, "loss": 4.0158, "step": 7100 }, { "epoch": 2.3654534854668112, "grad_norm": 0.58203125, "learning_rate": 1.3383187214357729e-05, "loss": 4.0658, "step": 7101 }, { "epoch": 2.3657866244690595, "grad_norm": 0.59765625, "learning_rate": 1.3382685147135715e-05, "loss": 3.967, "step": 7102 }, { "epoch": 2.3661197634713083, "grad_norm": 0.58203125, "learning_rate": 1.3382183011393161e-05, "loss": 4.0121, "step": 7103 }, { "epoch": 2.366452902473557, "grad_norm": 0.578125, "learning_rate": 1.3381680807135915e-05, "loss": 4.0719, "step": 7104 }, { "epoch": 2.3667860414758057, "grad_norm": 0.56640625, "learning_rate": 1.338117853436983e-05, "loss": 4.0528, "step": 7105 }, { "epoch": 2.3671191804780545, "grad_norm": 0.56640625, "learning_rate": 1.3380676193100752e-05, "loss": 4.0639, "step": 7106 }, { "epoch": 2.367452319480303, "grad_norm": 0.58984375, "learning_rate": 1.3380173783334535e-05, "loss": 4.0274, "step": 7107 }, { "epoch": 2.367785458482552, "grad_norm": 0.5703125, "learning_rate": 1.3379671305077028e-05, "loss": 4.1185, "step": 7108 }, { "epoch": 2.3681185974848007, "grad_norm": 0.59765625, "learning_rate": 1.3379168758334086e-05, "loss": 3.9322, "step": 7109 }, { "epoch": 2.3684517364870494, "grad_norm": 0.59375, "learning_rate": 1.3378666143111563e-05, "loss": 4.0637, "step": 7110 }, { "epoch": 2.3687848754892977, "grad_norm": 0.5859375, "learning_rate": 1.3378163459415313e-05, "loss": 4.0102, "step": 7111 }, { "epoch": 2.3691180144915465, "grad_norm": 0.59765625, "learning_rate": 1.3377660707251189e-05, "loss": 3.9508, "step": 7112 }, { "epoch": 2.369451153493795, "grad_norm": 0.5625, "learning_rate": 1.337715788662505e-05, "loss": 4.0469, "step": 7113 }, { "epoch": 2.369784292496044, "grad_norm": 0.55859375, "learning_rate": 1.3376654997542751e-05, "loss": 4.0209, "step": 7114 }, { "epoch": 2.3701174314982927, "grad_norm": 0.56640625, "learning_rate": 1.3376152040010151e-05, "loss": 3.997, "step": 7115 }, { "epoch": 2.3704505705005414, "grad_norm": 0.59765625, "learning_rate": 1.3375649014033105e-05, "loss": 4.0605, "step": 7116 }, { "epoch": 2.37078370950279, "grad_norm": 0.56640625, "learning_rate": 1.3375145919617479e-05, "loss": 3.9914, "step": 7117 }, { "epoch": 2.371116848505039, "grad_norm": 0.5625, "learning_rate": 1.3374642756769126e-05, "loss": 4.011, "step": 7118 }, { "epoch": 2.3714499875072876, "grad_norm": 0.59375, "learning_rate": 1.3374139525493909e-05, "loss": 4.0168, "step": 7119 }, { "epoch": 2.371783126509536, "grad_norm": 0.609375, "learning_rate": 1.337363622579769e-05, "loss": 4.0157, "step": 7120 }, { "epoch": 2.3721162655117847, "grad_norm": 0.5859375, "learning_rate": 1.3373132857686331e-05, "loss": 4.0082, "step": 7121 }, { "epoch": 2.3724494045140334, "grad_norm": 0.6015625, "learning_rate": 1.3372629421165697e-05, "loss": 3.9972, "step": 7122 }, { "epoch": 2.372782543516282, "grad_norm": 0.58203125, "learning_rate": 1.337212591624165e-05, "loss": 3.9946, "step": 7123 }, { "epoch": 2.373115682518531, "grad_norm": 0.578125, "learning_rate": 1.3371622342920055e-05, "loss": 4.0503, "step": 7124 }, { "epoch": 2.3734488215207796, "grad_norm": 0.578125, "learning_rate": 1.3371118701206777e-05, "loss": 4.0913, "step": 7125 }, { "epoch": 2.3737819605230284, "grad_norm": 0.55859375, "learning_rate": 1.3370614991107682e-05, "loss": 4.0595, "step": 7126 }, { "epoch": 2.374115099525277, "grad_norm": 0.59375, "learning_rate": 1.3370111212628639e-05, "loss": 4.0538, "step": 7127 }, { "epoch": 2.374448238527526, "grad_norm": 0.59765625, "learning_rate": 1.3369607365775514e-05, "loss": 4.0134, "step": 7128 }, { "epoch": 2.374781377529774, "grad_norm": 0.578125, "learning_rate": 1.3369103450554179e-05, "loss": 4.1122, "step": 7129 }, { "epoch": 2.375114516532023, "grad_norm": 0.59375, "learning_rate": 1.33685994669705e-05, "loss": 4.0625, "step": 7130 }, { "epoch": 2.3754476555342716, "grad_norm": 0.58203125, "learning_rate": 1.3368095415030347e-05, "loss": 3.9629, "step": 7131 }, { "epoch": 2.3757807945365204, "grad_norm": 0.58203125, "learning_rate": 1.3367591294739596e-05, "loss": 4.033, "step": 7132 }, { "epoch": 2.376113933538769, "grad_norm": 0.61328125, "learning_rate": 1.3367087106104113e-05, "loss": 3.9313, "step": 7133 }, { "epoch": 2.376447072541018, "grad_norm": 0.59765625, "learning_rate": 1.3366582849129773e-05, "loss": 3.9548, "step": 7134 }, { "epoch": 2.3767802115432666, "grad_norm": 0.59765625, "learning_rate": 1.3366078523822451e-05, "loss": 4.0081, "step": 7135 }, { "epoch": 2.377113350545515, "grad_norm": 0.578125, "learning_rate": 1.3365574130188022e-05, "loss": 4.0681, "step": 7136 }, { "epoch": 2.377446489547764, "grad_norm": 0.578125, "learning_rate": 1.3365069668232356e-05, "loss": 4.085, "step": 7137 }, { "epoch": 2.3777796285500123, "grad_norm": 0.578125, "learning_rate": 1.3364565137961335e-05, "loss": 4.0625, "step": 7138 }, { "epoch": 2.378112767552261, "grad_norm": 0.58203125, "learning_rate": 1.336406053938083e-05, "loss": 4.0472, "step": 7139 }, { "epoch": 2.37844590655451, "grad_norm": 0.5859375, "learning_rate": 1.3363555872496723e-05, "loss": 3.9729, "step": 7140 }, { "epoch": 2.3787790455567586, "grad_norm": 0.59765625, "learning_rate": 1.336305113731489e-05, "loss": 4.0214, "step": 7141 }, { "epoch": 2.3791121845590073, "grad_norm": 0.6171875, "learning_rate": 1.3362546333841211e-05, "loss": 4.0206, "step": 7142 }, { "epoch": 2.379445323561256, "grad_norm": 0.5625, "learning_rate": 1.3362041462081566e-05, "loss": 4.0863, "step": 7143 }, { "epoch": 2.379778462563505, "grad_norm": 0.6015625, "learning_rate": 1.3361536522041832e-05, "loss": 4.0524, "step": 7144 }, { "epoch": 2.380111601565753, "grad_norm": 0.58984375, "learning_rate": 1.3361031513727896e-05, "loss": 4.0348, "step": 7145 }, { "epoch": 2.380444740568002, "grad_norm": 0.578125, "learning_rate": 1.3360526437145637e-05, "loss": 4.0117, "step": 7146 }, { "epoch": 2.3807778795702506, "grad_norm": 0.6015625, "learning_rate": 1.336002129230094e-05, "loss": 4.1036, "step": 7147 }, { "epoch": 2.3811110185724993, "grad_norm": 0.5625, "learning_rate": 1.3359516079199688e-05, "loss": 4.0558, "step": 7148 }, { "epoch": 2.381444157574748, "grad_norm": 0.58203125, "learning_rate": 1.3359010797847764e-05, "loss": 4.108, "step": 7149 }, { "epoch": 2.3817772965769968, "grad_norm": 0.6015625, "learning_rate": 1.3358505448251055e-05, "loss": 4.0253, "step": 7150 }, { "epoch": 2.3821104355792455, "grad_norm": 0.58984375, "learning_rate": 1.3358000030415449e-05, "loss": 4.0581, "step": 7151 }, { "epoch": 2.3824435745814942, "grad_norm": 0.58984375, "learning_rate": 1.3357494544346829e-05, "loss": 4.0597, "step": 7152 }, { "epoch": 2.382776713583743, "grad_norm": 0.6015625, "learning_rate": 1.3356988990051086e-05, "loss": 4.0742, "step": 7153 }, { "epoch": 2.3831098525859913, "grad_norm": 0.55078125, "learning_rate": 1.3356483367534106e-05, "loss": 4.0521, "step": 7154 }, { "epoch": 2.38344299158824, "grad_norm": 0.5859375, "learning_rate": 1.335597767680178e-05, "loss": 4.0125, "step": 7155 }, { "epoch": 2.3837761305904888, "grad_norm": 0.578125, "learning_rate": 1.3355471917859998e-05, "loss": 4.0771, "step": 7156 }, { "epoch": 2.3841092695927375, "grad_norm": 0.60546875, "learning_rate": 1.3354966090714652e-05, "loss": 4.0807, "step": 7157 }, { "epoch": 2.3844424085949862, "grad_norm": 0.578125, "learning_rate": 1.3354460195371634e-05, "loss": 4.0442, "step": 7158 }, { "epoch": 2.384775547597235, "grad_norm": 0.62109375, "learning_rate": 1.3353954231836835e-05, "loss": 3.9684, "step": 7159 }, { "epoch": 2.3851086865994837, "grad_norm": 0.59375, "learning_rate": 1.3353448200116146e-05, "loss": 4.0186, "step": 7160 }, { "epoch": 2.3854418256017325, "grad_norm": 0.57421875, "learning_rate": 1.3352942100215467e-05, "loss": 4.0513, "step": 7161 }, { "epoch": 2.385774964603981, "grad_norm": 0.6171875, "learning_rate": 1.3352435932140689e-05, "loss": 3.9671, "step": 7162 }, { "epoch": 2.3861081036062295, "grad_norm": 0.6171875, "learning_rate": 1.3351929695897708e-05, "loss": 4.0547, "step": 7163 }, { "epoch": 2.3864412426084782, "grad_norm": 0.60546875, "learning_rate": 1.3351423391492423e-05, "loss": 4.0239, "step": 7164 }, { "epoch": 2.386774381610727, "grad_norm": 0.58984375, "learning_rate": 1.335091701893073e-05, "loss": 4.0726, "step": 7165 }, { "epoch": 2.3871075206129757, "grad_norm": 0.59765625, "learning_rate": 1.3350410578218524e-05, "loss": 4.0021, "step": 7166 }, { "epoch": 2.3874406596152244, "grad_norm": 0.59765625, "learning_rate": 1.3349904069361709e-05, "loss": 4.0394, "step": 7167 }, { "epoch": 2.387773798617473, "grad_norm": 0.60546875, "learning_rate": 1.3349397492366181e-05, "loss": 4.0096, "step": 7168 }, { "epoch": 2.388106937619722, "grad_norm": 0.56640625, "learning_rate": 1.3348890847237844e-05, "loss": 4.0497, "step": 7169 }, { "epoch": 2.3884400766219707, "grad_norm": 0.6171875, "learning_rate": 1.3348384133982595e-05, "loss": 4.0465, "step": 7170 }, { "epoch": 2.3887732156242194, "grad_norm": 0.5703125, "learning_rate": 1.334787735260634e-05, "loss": 4.0429, "step": 7171 }, { "epoch": 2.3891063546264677, "grad_norm": 0.57421875, "learning_rate": 1.3347370503114979e-05, "loss": 4.043, "step": 7172 }, { "epoch": 2.3894394936287164, "grad_norm": 0.5859375, "learning_rate": 1.3346863585514418e-05, "loss": 4.0412, "step": 7173 }, { "epoch": 2.389772632630965, "grad_norm": 0.62109375, "learning_rate": 1.334635659981056e-05, "loss": 4.0339, "step": 7174 }, { "epoch": 2.390105771633214, "grad_norm": 0.58203125, "learning_rate": 1.3345849546009312e-05, "loss": 4.0984, "step": 7175 }, { "epoch": 2.3904389106354627, "grad_norm": 0.59765625, "learning_rate": 1.3345342424116579e-05, "loss": 4.0311, "step": 7176 }, { "epoch": 2.3907720496377114, "grad_norm": 0.5625, "learning_rate": 1.3344835234138266e-05, "loss": 4.0487, "step": 7177 }, { "epoch": 2.39110518863996, "grad_norm": 0.5859375, "learning_rate": 1.3344327976080283e-05, "loss": 4.0234, "step": 7178 }, { "epoch": 2.391438327642209, "grad_norm": 0.59765625, "learning_rate": 1.3343820649948537e-05, "loss": 4.108, "step": 7179 }, { "epoch": 2.3917714666444576, "grad_norm": 0.578125, "learning_rate": 1.334331325574894e-05, "loss": 4.047, "step": 7180 }, { "epoch": 2.392104605646706, "grad_norm": 0.59375, "learning_rate": 1.3342805793487398e-05, "loss": 4.0275, "step": 7181 }, { "epoch": 2.3924377446489546, "grad_norm": 0.6015625, "learning_rate": 1.3342298263169825e-05, "loss": 3.9667, "step": 7182 }, { "epoch": 2.3927708836512034, "grad_norm": 0.6015625, "learning_rate": 1.3341790664802134e-05, "loss": 4.0208, "step": 7183 }, { "epoch": 2.393104022653452, "grad_norm": 0.58203125, "learning_rate": 1.3341282998390231e-05, "loss": 4.0447, "step": 7184 }, { "epoch": 2.393437161655701, "grad_norm": 0.60546875, "learning_rate": 1.3340775263940035e-05, "loss": 3.9615, "step": 7185 }, { "epoch": 2.3937703006579496, "grad_norm": 0.57421875, "learning_rate": 1.334026746145746e-05, "loss": 3.9612, "step": 7186 }, { "epoch": 2.3941034396601983, "grad_norm": 0.58984375, "learning_rate": 1.3339759590948418e-05, "loss": 4.0435, "step": 7187 }, { "epoch": 2.394436578662447, "grad_norm": 0.58203125, "learning_rate": 1.3339251652418825e-05, "loss": 4.0414, "step": 7188 }, { "epoch": 2.394769717664696, "grad_norm": 0.609375, "learning_rate": 1.3338743645874597e-05, "loss": 4.1379, "step": 7189 }, { "epoch": 2.395102856666944, "grad_norm": 0.59765625, "learning_rate": 1.3338235571321654e-05, "loss": 3.9806, "step": 7190 }, { "epoch": 2.395435995669193, "grad_norm": 0.671875, "learning_rate": 1.3337727428765913e-05, "loss": 4.0317, "step": 7191 }, { "epoch": 2.3957691346714416, "grad_norm": 0.61328125, "learning_rate": 1.333721921821329e-05, "loss": 4.0423, "step": 7192 }, { "epoch": 2.3961022736736903, "grad_norm": 0.5625, "learning_rate": 1.3336710939669708e-05, "loss": 3.9822, "step": 7193 }, { "epoch": 2.396435412675939, "grad_norm": 0.55078125, "learning_rate": 1.3336202593141084e-05, "loss": 4.0313, "step": 7194 }, { "epoch": 2.396768551678188, "grad_norm": 0.59375, "learning_rate": 1.3335694178633343e-05, "loss": 4.0608, "step": 7195 }, { "epoch": 2.3971016906804365, "grad_norm": 0.5859375, "learning_rate": 1.3335185696152403e-05, "loss": 4.0777, "step": 7196 }, { "epoch": 2.3974348296826853, "grad_norm": 0.62890625, "learning_rate": 1.333467714570419e-05, "loss": 4.0579, "step": 7197 }, { "epoch": 2.397767968684934, "grad_norm": 0.58203125, "learning_rate": 1.3334168527294624e-05, "loss": 4.0178, "step": 7198 }, { "epoch": 2.3981011076871823, "grad_norm": 0.62109375, "learning_rate": 1.3333659840929635e-05, "loss": 3.9362, "step": 7199 }, { "epoch": 2.398434246689431, "grad_norm": 0.6328125, "learning_rate": 1.3333151086615144e-05, "loss": 4.0036, "step": 7200 }, { "epoch": 2.39876738569168, "grad_norm": 0.58984375, "learning_rate": 1.3332642264357075e-05, "loss": 4.0603, "step": 7201 }, { "epoch": 2.3991005246939285, "grad_norm": 0.6015625, "learning_rate": 1.3332133374161357e-05, "loss": 4.0692, "step": 7202 }, { "epoch": 2.3994336636961773, "grad_norm": 0.58984375, "learning_rate": 1.333162441603392e-05, "loss": 4.0317, "step": 7203 }, { "epoch": 2.399766802698426, "grad_norm": 0.60546875, "learning_rate": 1.3331115389980689e-05, "loss": 4.0614, "step": 7204 }, { "epoch": 2.4000999417006748, "grad_norm": 0.53515625, "learning_rate": 1.3330606296007594e-05, "loss": 3.9994, "step": 7205 }, { "epoch": 2.400433080702923, "grad_norm": 0.5859375, "learning_rate": 1.3330097134120564e-05, "loss": 4.0198, "step": 7206 }, { "epoch": 2.400766219705172, "grad_norm": 0.5859375, "learning_rate": 1.332958790432553e-05, "loss": 4.012, "step": 7207 }, { "epoch": 2.4010993587074205, "grad_norm": 0.61328125, "learning_rate": 1.3329078606628424e-05, "loss": 4.0435, "step": 7208 }, { "epoch": 2.4014324977096693, "grad_norm": 0.58203125, "learning_rate": 1.3328569241035177e-05, "loss": 4.0639, "step": 7209 }, { "epoch": 2.401765636711918, "grad_norm": 0.578125, "learning_rate": 1.3328059807551725e-05, "loss": 4.0362, "step": 7210 }, { "epoch": 2.4020987757141667, "grad_norm": 0.58203125, "learning_rate": 1.3327550306184e-05, "loss": 4.0326, "step": 7211 }, { "epoch": 2.4024319147164155, "grad_norm": 0.6015625, "learning_rate": 1.3327040736937935e-05, "loss": 4.0188, "step": 7212 }, { "epoch": 2.4027650537186642, "grad_norm": 0.5625, "learning_rate": 1.3326531099819468e-05, "loss": 4.0564, "step": 7213 }, { "epoch": 2.403098192720913, "grad_norm": 0.58203125, "learning_rate": 1.3326021394834532e-05, "loss": 4.0696, "step": 7214 }, { "epoch": 2.4034313317231613, "grad_norm": 0.60546875, "learning_rate": 1.332551162198907e-05, "loss": 4.0299, "step": 7215 }, { "epoch": 2.40376447072541, "grad_norm": 0.5703125, "learning_rate": 1.3325001781289011e-05, "loss": 4.0159, "step": 7216 }, { "epoch": 2.4040976097276587, "grad_norm": 0.59375, "learning_rate": 1.33244918727403e-05, "loss": 3.9704, "step": 7217 }, { "epoch": 2.4044307487299075, "grad_norm": 0.59375, "learning_rate": 1.3323981896348874e-05, "loss": 4.0547, "step": 7218 }, { "epoch": 2.404763887732156, "grad_norm": 0.57421875, "learning_rate": 1.3323471852120674e-05, "loss": 3.9422, "step": 7219 }, { "epoch": 2.405097026734405, "grad_norm": 0.58203125, "learning_rate": 1.3322961740061642e-05, "loss": 3.9913, "step": 7220 }, { "epoch": 2.4054301657366537, "grad_norm": 0.5625, "learning_rate": 1.3322451560177716e-05, "loss": 3.9808, "step": 7221 }, { "epoch": 2.4057633047389024, "grad_norm": 0.5859375, "learning_rate": 1.3321941312474843e-05, "loss": 4.109, "step": 7222 }, { "epoch": 2.406096443741151, "grad_norm": 0.61328125, "learning_rate": 1.3321430996958961e-05, "loss": 4.0583, "step": 7223 }, { "epoch": 2.4064295827433995, "grad_norm": 0.6015625, "learning_rate": 1.332092061363602e-05, "loss": 4.0437, "step": 7224 }, { "epoch": 2.406762721745648, "grad_norm": 0.5859375, "learning_rate": 1.3320410162511961e-05, "loss": 4.0436, "step": 7225 }, { "epoch": 2.407095860747897, "grad_norm": 0.6015625, "learning_rate": 1.3319899643592728e-05, "loss": 4.0183, "step": 7226 }, { "epoch": 2.4074289997501457, "grad_norm": 0.59765625, "learning_rate": 1.3319389056884273e-05, "loss": 4.0754, "step": 7227 }, { "epoch": 2.4077621387523944, "grad_norm": 0.59765625, "learning_rate": 1.331887840239254e-05, "loss": 4.0368, "step": 7228 }, { "epoch": 2.408095277754643, "grad_norm": 0.625, "learning_rate": 1.3318367680123476e-05, "loss": 4.0665, "step": 7229 }, { "epoch": 2.408428416756892, "grad_norm": 0.57421875, "learning_rate": 1.3317856890083032e-05, "loss": 4.0355, "step": 7230 }, { "epoch": 2.4087615557591406, "grad_norm": 0.5703125, "learning_rate": 1.3317346032277157e-05, "loss": 4.0556, "step": 7231 }, { "epoch": 2.4090946947613894, "grad_norm": 0.55859375, "learning_rate": 1.3316835106711801e-05, "loss": 4.042, "step": 7232 }, { "epoch": 2.4094278337636377, "grad_norm": 0.625, "learning_rate": 1.3316324113392914e-05, "loss": 3.9957, "step": 7233 }, { "epoch": 2.4097609727658864, "grad_norm": 0.61328125, "learning_rate": 1.331581305232645e-05, "loss": 4.0285, "step": 7234 }, { "epoch": 2.410094111768135, "grad_norm": 0.59765625, "learning_rate": 1.3315301923518363e-05, "loss": 4.0723, "step": 7235 }, { "epoch": 2.410427250770384, "grad_norm": 0.6015625, "learning_rate": 1.33147907269746e-05, "loss": 4.1082, "step": 7236 }, { "epoch": 2.4107603897726326, "grad_norm": 0.5625, "learning_rate": 1.3314279462701124e-05, "loss": 4.0116, "step": 7237 }, { "epoch": 2.4110935287748814, "grad_norm": 0.56640625, "learning_rate": 1.3313768130703884e-05, "loss": 4.1451, "step": 7238 }, { "epoch": 2.41142666777713, "grad_norm": 0.5859375, "learning_rate": 1.3313256730988837e-05, "loss": 4.0884, "step": 7239 }, { "epoch": 2.411759806779379, "grad_norm": 0.57421875, "learning_rate": 1.3312745263561944e-05, "loss": 4.0507, "step": 7240 }, { "epoch": 2.4120929457816276, "grad_norm": 0.6171875, "learning_rate": 1.3312233728429156e-05, "loss": 4.0608, "step": 7241 }, { "epoch": 2.412426084783876, "grad_norm": 0.58203125, "learning_rate": 1.3311722125596436e-05, "loss": 4.0931, "step": 7242 }, { "epoch": 2.4127592237861246, "grad_norm": 0.59765625, "learning_rate": 1.3311210455069741e-05, "loss": 4.05, "step": 7243 }, { "epoch": 2.4130923627883734, "grad_norm": 0.5859375, "learning_rate": 1.3310698716855031e-05, "loss": 4.0015, "step": 7244 }, { "epoch": 2.413425501790622, "grad_norm": 0.58984375, "learning_rate": 1.3310186910958268e-05, "loss": 4.0444, "step": 7245 }, { "epoch": 2.413758640792871, "grad_norm": 0.6015625, "learning_rate": 1.3309675037385409e-05, "loss": 4.0118, "step": 7246 }, { "epoch": 2.4140917797951196, "grad_norm": 0.58984375, "learning_rate": 1.3309163096142422e-05, "loss": 4.0577, "step": 7247 }, { "epoch": 2.4144249187973683, "grad_norm": 0.5859375, "learning_rate": 1.3308651087235267e-05, "loss": 3.9724, "step": 7248 }, { "epoch": 2.414758057799617, "grad_norm": 0.56640625, "learning_rate": 1.3308139010669909e-05, "loss": 3.9541, "step": 7249 }, { "epoch": 2.415091196801866, "grad_norm": 0.58984375, "learning_rate": 1.3307626866452311e-05, "loss": 4.0401, "step": 7250 }, { "epoch": 2.415424335804114, "grad_norm": 0.5703125, "learning_rate": 1.330711465458844e-05, "loss": 4.0958, "step": 7251 }, { "epoch": 2.415757474806363, "grad_norm": 0.60546875, "learning_rate": 1.3306602375084262e-05, "loss": 4.0902, "step": 7252 }, { "epoch": 2.4160906138086116, "grad_norm": 0.56640625, "learning_rate": 1.3306090027945742e-05, "loss": 4.0749, "step": 7253 }, { "epoch": 2.4164237528108603, "grad_norm": 0.58984375, "learning_rate": 1.330557761317885e-05, "loss": 4.1043, "step": 7254 }, { "epoch": 2.416756891813109, "grad_norm": 0.59375, "learning_rate": 1.3305065130789556e-05, "loss": 3.9784, "step": 7255 }, { "epoch": 2.417090030815358, "grad_norm": 0.58984375, "learning_rate": 1.3304552580783824e-05, "loss": 4.0618, "step": 7256 }, { "epoch": 2.4174231698176065, "grad_norm": 0.59765625, "learning_rate": 1.3304039963167626e-05, "loss": 4.0184, "step": 7257 }, { "epoch": 2.4177563088198553, "grad_norm": 0.6015625, "learning_rate": 1.3303527277946936e-05, "loss": 4.0441, "step": 7258 }, { "epoch": 2.418089447822104, "grad_norm": 0.58984375, "learning_rate": 1.3303014525127725e-05, "loss": 4.018, "step": 7259 }, { "epoch": 2.4184225868243523, "grad_norm": 0.5625, "learning_rate": 1.330250170471596e-05, "loss": 4.0341, "step": 7260 }, { "epoch": 2.418755725826601, "grad_norm": 0.59375, "learning_rate": 1.330198881671762e-05, "loss": 3.9902, "step": 7261 }, { "epoch": 2.4190888648288498, "grad_norm": 0.58203125, "learning_rate": 1.330147586113868e-05, "loss": 3.9913, "step": 7262 }, { "epoch": 2.4194220038310985, "grad_norm": 0.59375, "learning_rate": 1.3300962837985109e-05, "loss": 4.1002, "step": 7263 }, { "epoch": 2.4197551428333473, "grad_norm": 0.62109375, "learning_rate": 1.3300449747262888e-05, "loss": 4.0043, "step": 7264 }, { "epoch": 2.420088281835596, "grad_norm": 0.5703125, "learning_rate": 1.329993658897799e-05, "loss": 4.0324, "step": 7265 }, { "epoch": 2.4204214208378447, "grad_norm": 0.5703125, "learning_rate": 1.3299423363136392e-05, "loss": 3.9973, "step": 7266 }, { "epoch": 2.4207545598400935, "grad_norm": 0.60546875, "learning_rate": 1.3298910069744076e-05, "loss": 4.0056, "step": 7267 }, { "epoch": 2.421087698842342, "grad_norm": 0.6171875, "learning_rate": 1.3298396708807018e-05, "loss": 3.962, "step": 7268 }, { "epoch": 2.4214208378445905, "grad_norm": 0.5859375, "learning_rate": 1.3297883280331195e-05, "loss": 4.0704, "step": 7269 }, { "epoch": 2.4217539768468392, "grad_norm": 0.6171875, "learning_rate": 1.3297369784322595e-05, "loss": 4.0177, "step": 7270 }, { "epoch": 2.422087115849088, "grad_norm": 0.58984375, "learning_rate": 1.3296856220787189e-05, "loss": 4.0269, "step": 7271 }, { "epoch": 2.4224202548513367, "grad_norm": 0.578125, "learning_rate": 1.3296342589730965e-05, "loss": 4.0314, "step": 7272 }, { "epoch": 2.4227533938535855, "grad_norm": 0.5625, "learning_rate": 1.3295828891159907e-05, "loss": 4.0221, "step": 7273 }, { "epoch": 2.423086532855834, "grad_norm": 0.56640625, "learning_rate": 1.3295315125079994e-05, "loss": 4.0512, "step": 7274 }, { "epoch": 2.423419671858083, "grad_norm": 0.60546875, "learning_rate": 1.3294801291497213e-05, "loss": 4.0477, "step": 7275 }, { "epoch": 2.4237528108603312, "grad_norm": 0.58984375, "learning_rate": 1.3294287390417548e-05, "loss": 4.0222, "step": 7276 }, { "epoch": 2.42408594986258, "grad_norm": 0.59765625, "learning_rate": 1.3293773421846986e-05, "loss": 4.057, "step": 7277 }, { "epoch": 2.4244190888648287, "grad_norm": 0.59375, "learning_rate": 1.3293259385791513e-05, "loss": 4.0197, "step": 7278 }, { "epoch": 2.4247522278670774, "grad_norm": 0.5625, "learning_rate": 1.3292745282257115e-05, "loss": 4.0785, "step": 7279 }, { "epoch": 2.425085366869326, "grad_norm": 0.5859375, "learning_rate": 1.3292231111249783e-05, "loss": 4.0581, "step": 7280 }, { "epoch": 2.425418505871575, "grad_norm": 0.57421875, "learning_rate": 1.3291716872775506e-05, "loss": 4.0179, "step": 7281 }, { "epoch": 2.4257516448738237, "grad_norm": 0.5859375, "learning_rate": 1.329120256684027e-05, "loss": 3.9858, "step": 7282 }, { "epoch": 2.4260847838760724, "grad_norm": 0.578125, "learning_rate": 1.3290688193450067e-05, "loss": 4.0224, "step": 7283 }, { "epoch": 2.426417922878321, "grad_norm": 0.58984375, "learning_rate": 1.3290173752610891e-05, "loss": 4.0351, "step": 7284 }, { "epoch": 2.4267510618805694, "grad_norm": 0.59375, "learning_rate": 1.3289659244328731e-05, "loss": 4.008, "step": 7285 }, { "epoch": 2.427084200882818, "grad_norm": 0.6015625, "learning_rate": 1.328914466860958e-05, "loss": 3.9894, "step": 7286 }, { "epoch": 2.427417339885067, "grad_norm": 0.6015625, "learning_rate": 1.3288630025459438e-05, "loss": 4.0107, "step": 7287 }, { "epoch": 2.4277504788873157, "grad_norm": 0.58984375, "learning_rate": 1.328811531488429e-05, "loss": 4.0015, "step": 7288 }, { "epoch": 2.4280836178895644, "grad_norm": 0.59375, "learning_rate": 1.3287600536890137e-05, "loss": 4.0699, "step": 7289 }, { "epoch": 2.428416756891813, "grad_norm": 0.59765625, "learning_rate": 1.3287085691482972e-05, "loss": 4.0565, "step": 7290 }, { "epoch": 2.428749895894062, "grad_norm": 0.5859375, "learning_rate": 1.3286570778668795e-05, "loss": 4.0737, "step": 7291 }, { "epoch": 2.4290830348963106, "grad_norm": 0.56640625, "learning_rate": 1.3286055798453604e-05, "loss": 4.1164, "step": 7292 }, { "epoch": 2.4294161738985594, "grad_norm": 0.5859375, "learning_rate": 1.3285540750843392e-05, "loss": 4.0874, "step": 7293 }, { "epoch": 2.4297493129008076, "grad_norm": 0.5859375, "learning_rate": 1.3285025635844163e-05, "loss": 4.1656, "step": 7294 }, { "epoch": 2.4300824519030564, "grad_norm": 0.625, "learning_rate": 1.3284510453461916e-05, "loss": 4.0448, "step": 7295 }, { "epoch": 2.430415590905305, "grad_norm": 0.62109375, "learning_rate": 1.3283995203702652e-05, "loss": 4.0474, "step": 7296 }, { "epoch": 2.430748729907554, "grad_norm": 0.61328125, "learning_rate": 1.328347988657237e-05, "loss": 4.0137, "step": 7297 }, { "epoch": 2.4310818689098026, "grad_norm": 0.57421875, "learning_rate": 1.3282964502077077e-05, "loss": 4.0699, "step": 7298 }, { "epoch": 2.4314150079120513, "grad_norm": 0.6015625, "learning_rate": 1.3282449050222773e-05, "loss": 4.0542, "step": 7299 }, { "epoch": 2.4317481469143, "grad_norm": 0.578125, "learning_rate": 1.328193353101546e-05, "loss": 4.0306, "step": 7300 }, { "epoch": 2.432081285916549, "grad_norm": 0.625, "learning_rate": 1.3281417944461146e-05, "loss": 4.044, "step": 7301 }, { "epoch": 2.4324144249187976, "grad_norm": 0.58984375, "learning_rate": 1.3280902290565836e-05, "loss": 4.0315, "step": 7302 }, { "epoch": 2.432747563921046, "grad_norm": 0.59765625, "learning_rate": 1.3280386569335536e-05, "loss": 4.091, "step": 7303 }, { "epoch": 2.4330807029232946, "grad_norm": 0.63671875, "learning_rate": 1.3279870780776252e-05, "loss": 4.0228, "step": 7304 }, { "epoch": 2.4334138419255433, "grad_norm": 0.62109375, "learning_rate": 1.3279354924893996e-05, "loss": 4.011, "step": 7305 }, { "epoch": 2.433746980927792, "grad_norm": 0.5859375, "learning_rate": 1.3278839001694768e-05, "loss": 4.0691, "step": 7306 }, { "epoch": 2.434080119930041, "grad_norm": 0.59765625, "learning_rate": 1.3278323011184583e-05, "loss": 4.0264, "step": 7307 }, { "epoch": 2.4344132589322895, "grad_norm": 0.6171875, "learning_rate": 1.3277806953369455e-05, "loss": 4.0128, "step": 7308 }, { "epoch": 2.4347463979345383, "grad_norm": 0.59765625, "learning_rate": 1.3277290828255389e-05, "loss": 4.0609, "step": 7309 }, { "epoch": 2.435079536936787, "grad_norm": 0.58203125, "learning_rate": 1.3276774635848396e-05, "loss": 4.0536, "step": 7310 }, { "epoch": 2.4354126759390358, "grad_norm": 0.5859375, "learning_rate": 1.3276258376154491e-05, "loss": 4.0592, "step": 7311 }, { "epoch": 2.435745814941284, "grad_norm": 0.5625, "learning_rate": 1.327574204917969e-05, "loss": 4.0004, "step": 7312 }, { "epoch": 2.436078953943533, "grad_norm": 0.5859375, "learning_rate": 1.3275225654930003e-05, "loss": 3.9624, "step": 7313 }, { "epoch": 2.4364120929457815, "grad_norm": 0.58203125, "learning_rate": 1.3274709193411447e-05, "loss": 4.0357, "step": 7314 }, { "epoch": 2.4367452319480303, "grad_norm": 0.58984375, "learning_rate": 1.3274192664630037e-05, "loss": 4.0992, "step": 7315 }, { "epoch": 2.437078370950279, "grad_norm": 0.578125, "learning_rate": 1.3273676068591789e-05, "loss": 4.1493, "step": 7316 }, { "epoch": 2.4374115099525278, "grad_norm": 0.58203125, "learning_rate": 1.3273159405302719e-05, "loss": 4.0927, "step": 7317 }, { "epoch": 2.4377446489547765, "grad_norm": 0.5859375, "learning_rate": 1.3272642674768849e-05, "loss": 4.0167, "step": 7318 }, { "epoch": 2.4380777879570252, "grad_norm": 0.6015625, "learning_rate": 1.3272125876996194e-05, "loss": 4.1269, "step": 7319 }, { "epoch": 2.438410926959274, "grad_norm": 0.609375, "learning_rate": 1.3271609011990774e-05, "loss": 4.0622, "step": 7320 }, { "epoch": 2.4387440659615223, "grad_norm": 0.6015625, "learning_rate": 1.3271092079758612e-05, "loss": 3.9641, "step": 7321 }, { "epoch": 2.439077204963771, "grad_norm": 0.5546875, "learning_rate": 1.3270575080305726e-05, "loss": 4.0271, "step": 7322 }, { "epoch": 2.4394103439660197, "grad_norm": 0.59765625, "learning_rate": 1.327005801363814e-05, "loss": 4.0763, "step": 7323 }, { "epoch": 2.4397434829682685, "grad_norm": 0.58203125, "learning_rate": 1.3269540879761877e-05, "loss": 4.0548, "step": 7324 }, { "epoch": 2.4400766219705172, "grad_norm": 0.61328125, "learning_rate": 1.326902367868296e-05, "loss": 4.0536, "step": 7325 }, { "epoch": 2.440409760972766, "grad_norm": 0.60546875, "learning_rate": 1.326850641040741e-05, "loss": 4.075, "step": 7326 }, { "epoch": 2.4407428999750147, "grad_norm": 0.609375, "learning_rate": 1.3267989074941258e-05, "loss": 3.9652, "step": 7327 }, { "epoch": 2.4410760389772634, "grad_norm": 0.59765625, "learning_rate": 1.3267471672290525e-05, "loss": 4.0556, "step": 7328 }, { "epoch": 2.441409177979512, "grad_norm": 0.57421875, "learning_rate": 1.326695420246124e-05, "loss": 4.0479, "step": 7329 }, { "epoch": 2.4417423169817605, "grad_norm": 0.64453125, "learning_rate": 1.326643666545943e-05, "loss": 4.0421, "step": 7330 }, { "epoch": 2.442075455984009, "grad_norm": 0.63671875, "learning_rate": 1.3265919061291121e-05, "loss": 4.0122, "step": 7331 }, { "epoch": 2.442408594986258, "grad_norm": 0.6015625, "learning_rate": 1.3265401389962345e-05, "loss": 4.0088, "step": 7332 }, { "epoch": 2.4427417339885067, "grad_norm": 0.578125, "learning_rate": 1.3264883651479131e-05, "loss": 3.991, "step": 7333 }, { "epoch": 2.4430748729907554, "grad_norm": 0.57421875, "learning_rate": 1.3264365845847508e-05, "loss": 4.1006, "step": 7334 }, { "epoch": 2.443408011993004, "grad_norm": 0.60546875, "learning_rate": 1.326384797307351e-05, "loss": 4.0123, "step": 7335 }, { "epoch": 2.443741150995253, "grad_norm": 0.640625, "learning_rate": 1.3263330033163167e-05, "loss": 3.955, "step": 7336 }, { "epoch": 2.4440742899975016, "grad_norm": 0.59765625, "learning_rate": 1.3262812026122514e-05, "loss": 4.0355, "step": 7337 }, { "epoch": 2.4444074289997504, "grad_norm": 0.64453125, "learning_rate": 1.326229395195758e-05, "loss": 3.9392, "step": 7338 }, { "epoch": 2.4447405680019987, "grad_norm": 0.6015625, "learning_rate": 1.3261775810674404e-05, "loss": 4.0531, "step": 7339 }, { "epoch": 2.4450737070042474, "grad_norm": 0.609375, "learning_rate": 1.326125760227902e-05, "loss": 4.0271, "step": 7340 }, { "epoch": 2.445406846006496, "grad_norm": 0.5859375, "learning_rate": 1.3260739326777464e-05, "loss": 4.0106, "step": 7341 }, { "epoch": 2.445739985008745, "grad_norm": 0.58984375, "learning_rate": 1.3260220984175774e-05, "loss": 3.9838, "step": 7342 }, { "epoch": 2.4460731240109936, "grad_norm": 0.57421875, "learning_rate": 1.3259702574479984e-05, "loss": 4.0927, "step": 7343 }, { "epoch": 2.4464062630132424, "grad_norm": 0.59375, "learning_rate": 1.3259184097696134e-05, "loss": 4.0404, "step": 7344 }, { "epoch": 2.446739402015491, "grad_norm": 0.609375, "learning_rate": 1.3258665553830266e-05, "loss": 4.0673, "step": 7345 }, { "epoch": 2.4470725410177394, "grad_norm": 0.59375, "learning_rate": 1.3258146942888417e-05, "loss": 4.0232, "step": 7346 }, { "epoch": 2.447405680019988, "grad_norm": 0.59765625, "learning_rate": 1.3257628264876625e-05, "loss": 4.0376, "step": 7347 }, { "epoch": 2.447738819022237, "grad_norm": 0.59765625, "learning_rate": 1.3257109519800938e-05, "loss": 4.042, "step": 7348 }, { "epoch": 2.4480719580244856, "grad_norm": 0.60546875, "learning_rate": 1.3256590707667396e-05, "loss": 3.9779, "step": 7349 }, { "epoch": 2.4484050970267344, "grad_norm": 0.5703125, "learning_rate": 1.3256071828482039e-05, "loss": 4.0764, "step": 7350 }, { "epoch": 2.448738236028983, "grad_norm": 0.62109375, "learning_rate": 1.3255552882250913e-05, "loss": 4.0959, "step": 7351 }, { "epoch": 2.449071375031232, "grad_norm": 0.62109375, "learning_rate": 1.3255033868980062e-05, "loss": 4.0109, "step": 7352 }, { "epoch": 2.4494045140334806, "grad_norm": 0.578125, "learning_rate": 1.3254514788675531e-05, "loss": 4.0743, "step": 7353 }, { "epoch": 2.4497376530357293, "grad_norm": 0.60546875, "learning_rate": 1.325399564134337e-05, "loss": 4.0275, "step": 7354 }, { "epoch": 2.4500707920379776, "grad_norm": 0.6328125, "learning_rate": 1.325347642698962e-05, "loss": 4.0419, "step": 7355 }, { "epoch": 2.4504039310402264, "grad_norm": 0.578125, "learning_rate": 1.3252957145620333e-05, "loss": 4.0819, "step": 7356 }, { "epoch": 2.450737070042475, "grad_norm": 0.578125, "learning_rate": 1.3252437797241555e-05, "loss": 4.0588, "step": 7357 }, { "epoch": 2.451070209044724, "grad_norm": 0.60546875, "learning_rate": 1.325191838185934e-05, "loss": 4.0257, "step": 7358 }, { "epoch": 2.4514033480469726, "grad_norm": 0.6171875, "learning_rate": 1.3251398899479731e-05, "loss": 4.0736, "step": 7359 }, { "epoch": 2.4517364870492213, "grad_norm": 0.62109375, "learning_rate": 1.3250879350108781e-05, "loss": 4.036, "step": 7360 }, { "epoch": 2.45206962605147, "grad_norm": 0.60546875, "learning_rate": 1.3250359733752547e-05, "loss": 3.9575, "step": 7361 }, { "epoch": 2.452402765053719, "grad_norm": 0.60546875, "learning_rate": 1.3249840050417074e-05, "loss": 4.0423, "step": 7362 }, { "epoch": 2.4527359040559675, "grad_norm": 0.6015625, "learning_rate": 1.3249320300108422e-05, "loss": 3.9763, "step": 7363 }, { "epoch": 2.453069043058216, "grad_norm": 0.5625, "learning_rate": 1.3248800482832638e-05, "loss": 4.0731, "step": 7364 }, { "epoch": 2.4534021820604646, "grad_norm": 0.62890625, "learning_rate": 1.3248280598595782e-05, "loss": 4.0953, "step": 7365 }, { "epoch": 2.4537353210627133, "grad_norm": 0.6015625, "learning_rate": 1.3247760647403906e-05, "loss": 4.0474, "step": 7366 }, { "epoch": 2.454068460064962, "grad_norm": 0.5625, "learning_rate": 1.3247240629263072e-05, "loss": 4.074, "step": 7367 }, { "epoch": 2.454401599067211, "grad_norm": 0.5703125, "learning_rate": 1.324672054417933e-05, "loss": 4.0746, "step": 7368 }, { "epoch": 2.4547347380694595, "grad_norm": 0.578125, "learning_rate": 1.3246200392158742e-05, "loss": 4.0265, "step": 7369 }, { "epoch": 2.4550678770717083, "grad_norm": 0.62109375, "learning_rate": 1.3245680173207366e-05, "loss": 4.0254, "step": 7370 }, { "epoch": 2.455401016073957, "grad_norm": 0.5859375, "learning_rate": 1.324515988733126e-05, "loss": 4.0703, "step": 7371 }, { "epoch": 2.4557341550762057, "grad_norm": 0.59375, "learning_rate": 1.3244639534536486e-05, "loss": 4.0684, "step": 7372 }, { "epoch": 2.456067294078454, "grad_norm": 0.6015625, "learning_rate": 1.3244119114829103e-05, "loss": 3.9912, "step": 7373 }, { "epoch": 2.4564004330807028, "grad_norm": 0.6171875, "learning_rate": 1.3243598628215178e-05, "loss": 3.9955, "step": 7374 }, { "epoch": 2.4567335720829515, "grad_norm": 0.6015625, "learning_rate": 1.3243078074700766e-05, "loss": 3.958, "step": 7375 }, { "epoch": 2.4570667110852003, "grad_norm": 0.56640625, "learning_rate": 1.3242557454291935e-05, "loss": 4.0106, "step": 7376 }, { "epoch": 2.457399850087449, "grad_norm": 0.5703125, "learning_rate": 1.3242036766994746e-05, "loss": 4.013, "step": 7377 }, { "epoch": 2.4577329890896977, "grad_norm": 0.578125, "learning_rate": 1.3241516012815269e-05, "loss": 4.0449, "step": 7378 }, { "epoch": 2.4580661280919465, "grad_norm": 0.5625, "learning_rate": 1.3240995191759566e-05, "loss": 3.9961, "step": 7379 }, { "epoch": 2.458399267094195, "grad_norm": 0.61328125, "learning_rate": 1.3240474303833703e-05, "loss": 4.0372, "step": 7380 }, { "epoch": 2.458732406096444, "grad_norm": 0.57421875, "learning_rate": 1.3239953349043748e-05, "loss": 3.984, "step": 7381 }, { "epoch": 2.4590655450986922, "grad_norm": 0.6015625, "learning_rate": 1.3239432327395771e-05, "loss": 4.022, "step": 7382 }, { "epoch": 2.459398684100941, "grad_norm": 0.58203125, "learning_rate": 1.3238911238895837e-05, "loss": 4.0735, "step": 7383 }, { "epoch": 2.4597318231031897, "grad_norm": 0.57421875, "learning_rate": 1.3238390083550018e-05, "loss": 4.0641, "step": 7384 }, { "epoch": 2.4600649621054385, "grad_norm": 0.59375, "learning_rate": 1.3237868861364384e-05, "loss": 4.0754, "step": 7385 }, { "epoch": 2.460398101107687, "grad_norm": 0.61328125, "learning_rate": 1.3237347572345007e-05, "loss": 3.9665, "step": 7386 }, { "epoch": 2.460731240109936, "grad_norm": 0.5859375, "learning_rate": 1.3236826216497957e-05, "loss": 3.9888, "step": 7387 }, { "epoch": 2.4610643791121847, "grad_norm": 0.59765625, "learning_rate": 1.3236304793829307e-05, "loss": 4.0146, "step": 7388 }, { "epoch": 2.4613975181144334, "grad_norm": 0.58984375, "learning_rate": 1.323578330434513e-05, "loss": 4.0568, "step": 7389 }, { "epoch": 2.461730657116682, "grad_norm": 0.5625, "learning_rate": 1.3235261748051503e-05, "loss": 4.0992, "step": 7390 }, { "epoch": 2.4620637961189304, "grad_norm": 0.61328125, "learning_rate": 1.3234740124954499e-05, "loss": 4.0838, "step": 7391 }, { "epoch": 2.462396935121179, "grad_norm": 0.59375, "learning_rate": 1.3234218435060195e-05, "loss": 4.0807, "step": 7392 }, { "epoch": 2.462730074123428, "grad_norm": 0.57421875, "learning_rate": 1.3233696678374663e-05, "loss": 4.0613, "step": 7393 }, { "epoch": 2.4630632131256767, "grad_norm": 0.578125, "learning_rate": 1.3233174854903989e-05, "loss": 3.9879, "step": 7394 }, { "epoch": 2.4633963521279254, "grad_norm": 0.5703125, "learning_rate": 1.3232652964654241e-05, "loss": 4.0829, "step": 7395 }, { "epoch": 2.463729491130174, "grad_norm": 0.58984375, "learning_rate": 1.3232131007631506e-05, "loss": 4.087, "step": 7396 }, { "epoch": 2.464062630132423, "grad_norm": 0.59765625, "learning_rate": 1.3231608983841857e-05, "loss": 4.0556, "step": 7397 }, { "epoch": 2.4643957691346716, "grad_norm": 0.609375, "learning_rate": 1.3231086893291381e-05, "loss": 4.0334, "step": 7398 }, { "epoch": 2.4647289081369204, "grad_norm": 0.59765625, "learning_rate": 1.3230564735986157e-05, "loss": 4.0007, "step": 7399 }, { "epoch": 2.4650620471391687, "grad_norm": 0.625, "learning_rate": 1.3230042511932263e-05, "loss": 4.0124, "step": 7400 }, { "epoch": 2.4653951861414174, "grad_norm": 0.59765625, "learning_rate": 1.3229520221135788e-05, "loss": 4.0284, "step": 7401 }, { "epoch": 2.465728325143666, "grad_norm": 0.58203125, "learning_rate": 1.322899786360281e-05, "loss": 4.0017, "step": 7402 }, { "epoch": 2.466061464145915, "grad_norm": 0.59765625, "learning_rate": 1.322847543933942e-05, "loss": 4.0398, "step": 7403 }, { "epoch": 2.4663946031481636, "grad_norm": 0.6171875, "learning_rate": 1.3227952948351694e-05, "loss": 4.0984, "step": 7404 }, { "epoch": 2.4667277421504124, "grad_norm": 0.609375, "learning_rate": 1.3227430390645726e-05, "loss": 3.9952, "step": 7405 }, { "epoch": 2.467060881152661, "grad_norm": 0.58203125, "learning_rate": 1.32269077662276e-05, "loss": 3.9894, "step": 7406 }, { "epoch": 2.46739402015491, "grad_norm": 0.6484375, "learning_rate": 1.32263850751034e-05, "loss": 3.9864, "step": 7407 }, { "epoch": 2.4677271591571586, "grad_norm": 0.61328125, "learning_rate": 1.3225862317279219e-05, "loss": 4.0055, "step": 7408 }, { "epoch": 2.468060298159407, "grad_norm": 0.578125, "learning_rate": 1.3225339492761145e-05, "loss": 3.9703, "step": 7409 }, { "epoch": 2.4683934371616556, "grad_norm": 0.60546875, "learning_rate": 1.3224816601555265e-05, "loss": 3.9764, "step": 7410 }, { "epoch": 2.4687265761639043, "grad_norm": 0.58984375, "learning_rate": 1.3224293643667674e-05, "loss": 4.0311, "step": 7411 }, { "epoch": 2.469059715166153, "grad_norm": 0.609375, "learning_rate": 1.3223770619104458e-05, "loss": 4.0094, "step": 7412 }, { "epoch": 2.469392854168402, "grad_norm": 0.57421875, "learning_rate": 1.3223247527871715e-05, "loss": 4.0765, "step": 7413 }, { "epoch": 2.4697259931706506, "grad_norm": 0.63671875, "learning_rate": 1.3222724369975531e-05, "loss": 3.9063, "step": 7414 }, { "epoch": 2.4700591321728993, "grad_norm": 0.625, "learning_rate": 1.3222201145422008e-05, "loss": 3.9856, "step": 7415 }, { "epoch": 2.4703922711751476, "grad_norm": 0.59375, "learning_rate": 1.3221677854217233e-05, "loss": 4.0802, "step": 7416 }, { "epoch": 2.4707254101773963, "grad_norm": 0.65625, "learning_rate": 1.3221154496367305e-05, "loss": 4.0646, "step": 7417 }, { "epoch": 2.471058549179645, "grad_norm": 0.62109375, "learning_rate": 1.3220631071878318e-05, "loss": 4.081, "step": 7418 }, { "epoch": 2.471391688181894, "grad_norm": 0.609375, "learning_rate": 1.322010758075637e-05, "loss": 4.0467, "step": 7419 }, { "epoch": 2.4717248271841425, "grad_norm": 0.58984375, "learning_rate": 1.321958402300756e-05, "loss": 4.0272, "step": 7420 }, { "epoch": 2.4720579661863913, "grad_norm": 0.609375, "learning_rate": 1.3219060398637985e-05, "loss": 4.0452, "step": 7421 }, { "epoch": 2.47239110518864, "grad_norm": 0.578125, "learning_rate": 1.3218536707653741e-05, "loss": 3.9952, "step": 7422 }, { "epoch": 2.4727242441908888, "grad_norm": 0.625, "learning_rate": 1.3218012950060933e-05, "loss": 3.9897, "step": 7423 }, { "epoch": 2.4730573831931375, "grad_norm": 0.609375, "learning_rate": 1.3217489125865659e-05, "loss": 4.0238, "step": 7424 }, { "epoch": 2.473390522195386, "grad_norm": 0.62890625, "learning_rate": 1.321696523507402e-05, "loss": 3.9536, "step": 7425 }, { "epoch": 2.4737236611976345, "grad_norm": 0.6171875, "learning_rate": 1.321644127769212e-05, "loss": 4.0752, "step": 7426 }, { "epoch": 2.4740568001998833, "grad_norm": 0.61328125, "learning_rate": 1.3215917253726061e-05, "loss": 4.0395, "step": 7427 }, { "epoch": 2.474389939202132, "grad_norm": 0.61328125, "learning_rate": 1.3215393163181944e-05, "loss": 3.9016, "step": 7428 }, { "epoch": 2.4747230782043808, "grad_norm": 0.6328125, "learning_rate": 1.3214869006065879e-05, "loss": 4.0124, "step": 7429 }, { "epoch": 2.4750562172066295, "grad_norm": 0.59375, "learning_rate": 1.3214344782383968e-05, "loss": 4.08, "step": 7430 }, { "epoch": 2.4753893562088782, "grad_norm": 0.5859375, "learning_rate": 1.3213820492142317e-05, "loss": 4.065, "step": 7431 }, { "epoch": 2.475722495211127, "grad_norm": 0.6015625, "learning_rate": 1.3213296135347036e-05, "loss": 4.086, "step": 7432 }, { "epoch": 2.4760556342133757, "grad_norm": 0.58203125, "learning_rate": 1.3212771712004226e-05, "loss": 4.0257, "step": 7433 }, { "epoch": 2.476388773215624, "grad_norm": 0.6015625, "learning_rate": 1.321224722212e-05, "loss": 3.9956, "step": 7434 }, { "epoch": 2.4767219122178727, "grad_norm": 0.6171875, "learning_rate": 1.3211722665700469e-05, "loss": 4.08, "step": 7435 }, { "epoch": 2.4770550512201215, "grad_norm": 0.55859375, "learning_rate": 1.321119804275174e-05, "loss": 4.0496, "step": 7436 }, { "epoch": 2.4773881902223702, "grad_norm": 0.58984375, "learning_rate": 1.3210673353279922e-05, "loss": 4.0289, "step": 7437 }, { "epoch": 2.477721329224619, "grad_norm": 0.57421875, "learning_rate": 1.321014859729113e-05, "loss": 4.0761, "step": 7438 }, { "epoch": 2.4780544682268677, "grad_norm": 0.61328125, "learning_rate": 1.3209623774791473e-05, "loss": 3.9956, "step": 7439 }, { "epoch": 2.4783876072291164, "grad_norm": 0.57421875, "learning_rate": 1.320909888578707e-05, "loss": 4.0452, "step": 7440 }, { "epoch": 2.478720746231365, "grad_norm": 0.60546875, "learning_rate": 1.3208573930284028e-05, "loss": 4.0208, "step": 7441 }, { "epoch": 2.479053885233614, "grad_norm": 0.58984375, "learning_rate": 1.3208048908288465e-05, "loss": 4.0377, "step": 7442 }, { "epoch": 2.479387024235862, "grad_norm": 0.5703125, "learning_rate": 1.3207523819806496e-05, "loss": 4.07, "step": 7443 }, { "epoch": 2.479720163238111, "grad_norm": 0.61328125, "learning_rate": 1.3206998664844238e-05, "loss": 4.0588, "step": 7444 }, { "epoch": 2.4800533022403597, "grad_norm": 0.60546875, "learning_rate": 1.3206473443407807e-05, "loss": 3.9741, "step": 7445 }, { "epoch": 2.4803864412426084, "grad_norm": 0.5859375, "learning_rate": 1.320594815550332e-05, "loss": 4.0727, "step": 7446 }, { "epoch": 2.480719580244857, "grad_norm": 0.5703125, "learning_rate": 1.3205422801136895e-05, "loss": 4.0795, "step": 7447 }, { "epoch": 2.481052719247106, "grad_norm": 0.60546875, "learning_rate": 1.3204897380314652e-05, "loss": 3.9896, "step": 7448 }, { "epoch": 2.4813858582493546, "grad_norm": 0.59375, "learning_rate": 1.3204371893042712e-05, "loss": 4.1043, "step": 7449 }, { "epoch": 2.4817189972516034, "grad_norm": 0.59765625, "learning_rate": 1.3203846339327195e-05, "loss": 4.0205, "step": 7450 }, { "epoch": 2.482052136253852, "grad_norm": 0.578125, "learning_rate": 1.3203320719174223e-05, "loss": 4.0248, "step": 7451 }, { "epoch": 2.4823852752561004, "grad_norm": 0.60546875, "learning_rate": 1.320279503258992e-05, "loss": 4.0093, "step": 7452 }, { "epoch": 2.482718414258349, "grad_norm": 0.5859375, "learning_rate": 1.3202269279580405e-05, "loss": 4.0848, "step": 7453 }, { "epoch": 2.483051553260598, "grad_norm": 0.60546875, "learning_rate": 1.3201743460151804e-05, "loss": 4.0347, "step": 7454 }, { "epoch": 2.4833846922628466, "grad_norm": 0.5859375, "learning_rate": 1.320121757431024e-05, "loss": 4.037, "step": 7455 }, { "epoch": 2.4837178312650954, "grad_norm": 0.609375, "learning_rate": 1.3200691622061843e-05, "loss": 4.0287, "step": 7456 }, { "epoch": 2.484050970267344, "grad_norm": 0.62109375, "learning_rate": 1.3200165603412735e-05, "loss": 4.0159, "step": 7457 }, { "epoch": 2.484384109269593, "grad_norm": 0.56640625, "learning_rate": 1.3199639518369044e-05, "loss": 3.9677, "step": 7458 }, { "epoch": 2.4847172482718416, "grad_norm": 0.6171875, "learning_rate": 1.31991133669369e-05, "loss": 4.0377, "step": 7459 }, { "epoch": 2.4850503872740903, "grad_norm": 0.625, "learning_rate": 1.3198587149122428e-05, "loss": 4.0619, "step": 7460 }, { "epoch": 2.4853835262763386, "grad_norm": 0.62109375, "learning_rate": 1.3198060864931757e-05, "loss": 4.0602, "step": 7461 }, { "epoch": 2.4857166652785874, "grad_norm": 0.58984375, "learning_rate": 1.319753451437102e-05, "loss": 4.0198, "step": 7462 }, { "epoch": 2.486049804280836, "grad_norm": 0.59375, "learning_rate": 1.319700809744635e-05, "loss": 4.0977, "step": 7463 }, { "epoch": 2.486382943283085, "grad_norm": 0.5859375, "learning_rate": 1.3196481614163872e-05, "loss": 4.0873, "step": 7464 }, { "epoch": 2.4867160822853336, "grad_norm": 0.57421875, "learning_rate": 1.3195955064529722e-05, "loss": 4.0422, "step": 7465 }, { "epoch": 2.4870492212875823, "grad_norm": 0.65234375, "learning_rate": 1.3195428448550035e-05, "loss": 4.0276, "step": 7466 }, { "epoch": 2.487382360289831, "grad_norm": 0.5625, "learning_rate": 1.319490176623094e-05, "loss": 4.0696, "step": 7467 }, { "epoch": 2.48771549929208, "grad_norm": 0.60546875, "learning_rate": 1.3194375017578577e-05, "loss": 3.9578, "step": 7468 }, { "epoch": 2.4880486382943285, "grad_norm": 0.5703125, "learning_rate": 1.3193848202599077e-05, "loss": 4.0174, "step": 7469 }, { "epoch": 2.488381777296577, "grad_norm": 0.6015625, "learning_rate": 1.3193321321298578e-05, "loss": 4.0353, "step": 7470 }, { "epoch": 2.4887149162988256, "grad_norm": 0.609375, "learning_rate": 1.3192794373683218e-05, "loss": 4.084, "step": 7471 }, { "epoch": 2.4890480553010743, "grad_norm": 0.60546875, "learning_rate": 1.3192267359759135e-05, "loss": 4.0055, "step": 7472 }, { "epoch": 2.489381194303323, "grad_norm": 0.59375, "learning_rate": 1.3191740279532467e-05, "loss": 4.0007, "step": 7473 }, { "epoch": 2.489714333305572, "grad_norm": 0.578125, "learning_rate": 1.3191213133009352e-05, "loss": 4.0044, "step": 7474 }, { "epoch": 2.4900474723078205, "grad_norm": 0.62890625, "learning_rate": 1.3190685920195931e-05, "loss": 4.0637, "step": 7475 }, { "epoch": 2.4903806113100693, "grad_norm": 0.61328125, "learning_rate": 1.3190158641098348e-05, "loss": 4.0388, "step": 7476 }, { "epoch": 2.490713750312318, "grad_norm": 0.6171875, "learning_rate": 1.3189631295722737e-05, "loss": 3.9949, "step": 7477 }, { "epoch": 2.4910468893145667, "grad_norm": 0.609375, "learning_rate": 1.3189103884075248e-05, "loss": 4.0262, "step": 7478 }, { "epoch": 2.491380028316815, "grad_norm": 0.609375, "learning_rate": 1.3188576406162021e-05, "loss": 3.9519, "step": 7479 }, { "epoch": 2.491713167319064, "grad_norm": 0.59765625, "learning_rate": 1.31880488619892e-05, "loss": 4.1267, "step": 7480 }, { "epoch": 2.4920463063213125, "grad_norm": 0.6328125, "learning_rate": 1.318752125156293e-05, "loss": 3.9774, "step": 7481 }, { "epoch": 2.4923794453235613, "grad_norm": 0.58984375, "learning_rate": 1.3186993574889357e-05, "loss": 4.0093, "step": 7482 }, { "epoch": 2.49271258432581, "grad_norm": 0.55859375, "learning_rate": 1.3186465831974627e-05, "loss": 3.9581, "step": 7483 }, { "epoch": 2.4930457233280587, "grad_norm": 0.57421875, "learning_rate": 1.3185938022824884e-05, "loss": 4.0676, "step": 7484 }, { "epoch": 2.4933788623303075, "grad_norm": 0.609375, "learning_rate": 1.3185410147446283e-05, "loss": 3.9619, "step": 7485 }, { "epoch": 2.4937120013325558, "grad_norm": 0.58203125, "learning_rate": 1.3184882205844967e-05, "loss": 4.0697, "step": 7486 }, { "epoch": 2.4940451403348045, "grad_norm": 0.61328125, "learning_rate": 1.3184354198027085e-05, "loss": 4.0152, "step": 7487 }, { "epoch": 2.4943782793370533, "grad_norm": 0.625, "learning_rate": 1.318382612399879e-05, "loss": 4.0387, "step": 7488 }, { "epoch": 2.494711418339302, "grad_norm": 0.609375, "learning_rate": 1.3183297983766233e-05, "loss": 3.9904, "step": 7489 }, { "epoch": 2.4950445573415507, "grad_norm": 0.5859375, "learning_rate": 1.3182769777335563e-05, "loss": 4.0428, "step": 7490 }, { "epoch": 2.4953776963437995, "grad_norm": 0.60546875, "learning_rate": 1.3182241504712934e-05, "loss": 4.0706, "step": 7491 }, { "epoch": 2.495710835346048, "grad_norm": 0.5859375, "learning_rate": 1.31817131659045e-05, "loss": 4.0305, "step": 7492 }, { "epoch": 2.496043974348297, "grad_norm": 0.578125, "learning_rate": 1.3181184760916413e-05, "loss": 4.081, "step": 7493 }, { "epoch": 2.4963771133505457, "grad_norm": 0.57421875, "learning_rate": 1.318065628975483e-05, "loss": 4.0048, "step": 7494 }, { "epoch": 2.496710252352794, "grad_norm": 0.58203125, "learning_rate": 1.3180127752425906e-05, "loss": 4.0474, "step": 7495 }, { "epoch": 2.4970433913550427, "grad_norm": 0.62109375, "learning_rate": 1.3179599148935796e-05, "loss": 4.0341, "step": 7496 }, { "epoch": 2.4973765303572915, "grad_norm": 0.58984375, "learning_rate": 1.3179070479290658e-05, "loss": 4.0236, "step": 7497 }, { "epoch": 2.49770966935954, "grad_norm": 0.60546875, "learning_rate": 1.317854174349665e-05, "loss": 3.9974, "step": 7498 }, { "epoch": 2.498042808361789, "grad_norm": 0.59765625, "learning_rate": 1.317801294155993e-05, "loss": 4.0805, "step": 7499 }, { "epoch": 2.4983759473640377, "grad_norm": 0.578125, "learning_rate": 1.3177484073486657e-05, "loss": 4.0347, "step": 7500 }, { "epoch": 2.4987090863662864, "grad_norm": 0.59375, "learning_rate": 1.3176955139282994e-05, "loss": 4.0615, "step": 7501 }, { "epoch": 2.499042225368535, "grad_norm": 0.59765625, "learning_rate": 1.3176426138955097e-05, "loss": 4.0009, "step": 7502 }, { "epoch": 2.499375364370784, "grad_norm": 0.609375, "learning_rate": 1.3175897072509134e-05, "loss": 4.0087, "step": 7503 }, { "epoch": 2.499708503373032, "grad_norm": 0.58984375, "learning_rate": 1.3175367939951261e-05, "loss": 4.006, "step": 7504 }, { "epoch": 2.500041642375281, "grad_norm": 0.62109375, "learning_rate": 1.3174838741287648e-05, "loss": 4.0728, "step": 7505 }, { "epoch": 2.5003747813775297, "grad_norm": 0.62109375, "learning_rate": 1.3174309476524453e-05, "loss": 4.013, "step": 7506 }, { "epoch": 2.5007079203797784, "grad_norm": 0.609375, "learning_rate": 1.3173780145667842e-05, "loss": 3.9679, "step": 7507 }, { "epoch": 2.501041059382027, "grad_norm": 0.5859375, "learning_rate": 1.3173250748723982e-05, "loss": 4.0866, "step": 7508 }, { "epoch": 2.501374198384276, "grad_norm": 0.58203125, "learning_rate": 1.317272128569904e-05, "loss": 4.0161, "step": 7509 }, { "epoch": 2.5017073373865246, "grad_norm": 0.6171875, "learning_rate": 1.3172191756599183e-05, "loss": 4.017, "step": 7510 }, { "epoch": 2.5020404763887734, "grad_norm": 0.57421875, "learning_rate": 1.3171662161430577e-05, "loss": 4.0885, "step": 7511 }, { "epoch": 2.502373615391022, "grad_norm": 0.58203125, "learning_rate": 1.317113250019939e-05, "loss": 3.9829, "step": 7512 }, { "epoch": 2.5027067543932704, "grad_norm": 0.62109375, "learning_rate": 1.3170602772911795e-05, "loss": 4.0213, "step": 7513 }, { "epoch": 2.503039893395519, "grad_norm": 0.609375, "learning_rate": 1.317007297957396e-05, "loss": 4.0598, "step": 7514 }, { "epoch": 2.503373032397768, "grad_norm": 0.58984375, "learning_rate": 1.3169543120192057e-05, "loss": 3.9616, "step": 7515 }, { "epoch": 2.5037061714000166, "grad_norm": 0.59765625, "learning_rate": 1.3169013194772258e-05, "loss": 4.0273, "step": 7516 }, { "epoch": 2.5040393104022654, "grad_norm": 0.62109375, "learning_rate": 1.3168483203320731e-05, "loss": 4.0547, "step": 7517 }, { "epoch": 2.504372449404514, "grad_norm": 0.625, "learning_rate": 1.3167953145843654e-05, "loss": 4.0589, "step": 7518 }, { "epoch": 2.504705588406763, "grad_norm": 0.6484375, "learning_rate": 1.3167423022347199e-05, "loss": 3.9923, "step": 7519 }, { "epoch": 2.5050387274090116, "grad_norm": 0.6328125, "learning_rate": 1.3166892832837541e-05, "loss": 4.0103, "step": 7520 }, { "epoch": 2.5053718664112603, "grad_norm": 0.6015625, "learning_rate": 1.3166362577320856e-05, "loss": 3.9542, "step": 7521 }, { "epoch": 2.5057050054135086, "grad_norm": 0.60546875, "learning_rate": 1.3165832255803323e-05, "loss": 4.0421, "step": 7522 }, { "epoch": 2.5060381444157573, "grad_norm": 0.58203125, "learning_rate": 1.3165301868291113e-05, "loss": 3.9778, "step": 7523 }, { "epoch": 2.506371283418006, "grad_norm": 0.62890625, "learning_rate": 1.316477141479041e-05, "loss": 3.9977, "step": 7524 }, { "epoch": 2.506704422420255, "grad_norm": 0.61328125, "learning_rate": 1.3164240895307387e-05, "loss": 3.9904, "step": 7525 }, { "epoch": 2.5070375614225036, "grad_norm": 0.59765625, "learning_rate": 1.316371030984823e-05, "loss": 4.0302, "step": 7526 }, { "epoch": 2.5073707004247523, "grad_norm": 0.63671875, "learning_rate": 1.3163179658419112e-05, "loss": 4.0304, "step": 7527 }, { "epoch": 2.507703839427001, "grad_norm": 0.6171875, "learning_rate": 1.3162648941026218e-05, "loss": 4.0513, "step": 7528 }, { "epoch": 2.5080369784292493, "grad_norm": 0.61328125, "learning_rate": 1.3162118157675732e-05, "loss": 4.0423, "step": 7529 }, { "epoch": 2.5083701174314985, "grad_norm": 0.5625, "learning_rate": 1.316158730837383e-05, "loss": 4.0656, "step": 7530 }, { "epoch": 2.508703256433747, "grad_norm": 0.6015625, "learning_rate": 1.3161056393126701e-05, "loss": 4.015, "step": 7531 }, { "epoch": 2.5090363954359955, "grad_norm": 0.59375, "learning_rate": 1.3160525411940527e-05, "loss": 4.0861, "step": 7532 }, { "epoch": 2.5093695344382443, "grad_norm": 0.6640625, "learning_rate": 1.3159994364821493e-05, "loss": 4.0593, "step": 7533 }, { "epoch": 2.509702673440493, "grad_norm": 0.59765625, "learning_rate": 1.3159463251775782e-05, "loss": 4.0414, "step": 7534 }, { "epoch": 2.5100358124427418, "grad_norm": 0.62890625, "learning_rate": 1.3158932072809585e-05, "loss": 4.0188, "step": 7535 }, { "epoch": 2.5103689514449905, "grad_norm": 0.58203125, "learning_rate": 1.3158400827929082e-05, "loss": 4.0315, "step": 7536 }, { "epoch": 2.5107020904472392, "grad_norm": 0.578125, "learning_rate": 1.3157869517140469e-05, "loss": 3.981, "step": 7537 }, { "epoch": 2.5110352294494875, "grad_norm": 0.6171875, "learning_rate": 1.315733814044993e-05, "loss": 4.0535, "step": 7538 }, { "epoch": 2.5113683684517367, "grad_norm": 0.6171875, "learning_rate": 1.3156806697863658e-05, "loss": 4.0479, "step": 7539 }, { "epoch": 2.511701507453985, "grad_norm": 0.58203125, "learning_rate": 1.3156275189387837e-05, "loss": 4.1335, "step": 7540 }, { "epoch": 2.5120346464562338, "grad_norm": 0.609375, "learning_rate": 1.3155743615028664e-05, "loss": 4.028, "step": 7541 }, { "epoch": 2.5123677854584825, "grad_norm": 0.5703125, "learning_rate": 1.3155211974792327e-05, "loss": 4.0557, "step": 7542 }, { "epoch": 2.5127009244607312, "grad_norm": 0.625, "learning_rate": 1.315468026868502e-05, "loss": 3.935, "step": 7543 }, { "epoch": 2.51303406346298, "grad_norm": 0.62109375, "learning_rate": 1.3154148496712937e-05, "loss": 4.0342, "step": 7544 }, { "epoch": 2.5133672024652287, "grad_norm": 0.5703125, "learning_rate": 1.3153616658882268e-05, "loss": 4.0702, "step": 7545 }, { "epoch": 2.5137003414674775, "grad_norm": 0.60546875, "learning_rate": 1.3153084755199213e-05, "loss": 4.0614, "step": 7546 }, { "epoch": 2.5140334804697257, "grad_norm": 0.59765625, "learning_rate": 1.3152552785669966e-05, "loss": 4.1177, "step": 7547 }, { "epoch": 2.514366619471975, "grad_norm": 0.61328125, "learning_rate": 1.3152020750300718e-05, "loss": 4.0126, "step": 7548 }, { "epoch": 2.5146997584742232, "grad_norm": 0.5859375, "learning_rate": 1.3151488649097676e-05, "loss": 4.0107, "step": 7549 }, { "epoch": 2.515032897476472, "grad_norm": 0.625, "learning_rate": 1.3150956482067031e-05, "loss": 3.9472, "step": 7550 }, { "epoch": 2.5153660364787207, "grad_norm": 0.60546875, "learning_rate": 1.315042424921498e-05, "loss": 4.0296, "step": 7551 }, { "epoch": 2.5156991754809694, "grad_norm": 0.59375, "learning_rate": 1.3149891950547725e-05, "loss": 4.0792, "step": 7552 }, { "epoch": 2.516032314483218, "grad_norm": 0.5859375, "learning_rate": 1.314935958607147e-05, "loss": 4.0889, "step": 7553 }, { "epoch": 2.516365453485467, "grad_norm": 0.60546875, "learning_rate": 1.3148827155792411e-05, "loss": 4.0942, "step": 7554 }, { "epoch": 2.5166985924877157, "grad_norm": 0.6015625, "learning_rate": 1.314829465971675e-05, "loss": 4.0256, "step": 7555 }, { "epoch": 2.517031731489964, "grad_norm": 0.58984375, "learning_rate": 1.3147762097850692e-05, "loss": 4.076, "step": 7556 }, { "epoch": 2.517364870492213, "grad_norm": 0.5859375, "learning_rate": 1.3147229470200438e-05, "loss": 4.1208, "step": 7557 }, { "epoch": 2.5176980094944614, "grad_norm": 0.578125, "learning_rate": 1.3146696776772193e-05, "loss": 4.0953, "step": 7558 }, { "epoch": 2.51803114849671, "grad_norm": 0.6015625, "learning_rate": 1.314616401757216e-05, "loss": 4.0079, "step": 7559 }, { "epoch": 2.518364287498959, "grad_norm": 0.61328125, "learning_rate": 1.3145631192606547e-05, "loss": 4.0171, "step": 7560 }, { "epoch": 2.5186974265012076, "grad_norm": 0.60546875, "learning_rate": 1.3145098301881558e-05, "loss": 4.0257, "step": 7561 }, { "epoch": 2.5190305655034564, "grad_norm": 0.55078125, "learning_rate": 1.3144565345403402e-05, "loss": 4.023, "step": 7562 }, { "epoch": 2.519363704505705, "grad_norm": 0.5703125, "learning_rate": 1.3144032323178286e-05, "loss": 4.0349, "step": 7563 }, { "epoch": 2.519696843507954, "grad_norm": 0.6328125, "learning_rate": 1.3143499235212416e-05, "loss": 3.9591, "step": 7564 }, { "epoch": 2.520029982510202, "grad_norm": 0.6015625, "learning_rate": 1.3142966081512006e-05, "loss": 4.1014, "step": 7565 }, { "epoch": 2.5203631215124513, "grad_norm": 0.578125, "learning_rate": 1.3142432862083263e-05, "loss": 4.0212, "step": 7566 }, { "epoch": 2.5206962605146996, "grad_norm": 0.61328125, "learning_rate": 1.3141899576932398e-05, "loss": 4.0055, "step": 7567 }, { "epoch": 2.5210293995169484, "grad_norm": 0.6015625, "learning_rate": 1.3141366226065626e-05, "loss": 3.9995, "step": 7568 }, { "epoch": 2.521362538519197, "grad_norm": 0.59375, "learning_rate": 1.3140832809489153e-05, "loss": 4.0592, "step": 7569 }, { "epoch": 2.521695677521446, "grad_norm": 0.59765625, "learning_rate": 1.3140299327209198e-05, "loss": 4.0775, "step": 7570 }, { "epoch": 2.5220288165236946, "grad_norm": 0.61328125, "learning_rate": 1.3139765779231971e-05, "loss": 4.0681, "step": 7571 }, { "epoch": 2.5223619555259433, "grad_norm": 0.56640625, "learning_rate": 1.3139232165563691e-05, "loss": 4.0805, "step": 7572 }, { "epoch": 2.522695094528192, "grad_norm": 0.55859375, "learning_rate": 1.3138698486210569e-05, "loss": 4.0229, "step": 7573 }, { "epoch": 2.5230282335304404, "grad_norm": 0.5625, "learning_rate": 1.3138164741178824e-05, "loss": 4.0428, "step": 7574 }, { "epoch": 2.523361372532689, "grad_norm": 0.58203125, "learning_rate": 1.3137630930474671e-05, "loss": 4.0678, "step": 7575 }, { "epoch": 2.523694511534938, "grad_norm": 0.59765625, "learning_rate": 1.313709705410433e-05, "loss": 3.9917, "step": 7576 }, { "epoch": 2.5240276505371866, "grad_norm": 0.640625, "learning_rate": 1.3136563112074015e-05, "loss": 4.0513, "step": 7577 }, { "epoch": 2.5243607895394353, "grad_norm": 0.59765625, "learning_rate": 1.3136029104389953e-05, "loss": 4.0084, "step": 7578 }, { "epoch": 2.524693928541684, "grad_norm": 0.59765625, "learning_rate": 1.3135495031058358e-05, "loss": 4.0664, "step": 7579 }, { "epoch": 2.525027067543933, "grad_norm": 0.60546875, "learning_rate": 1.3134960892085452e-05, "loss": 4.0419, "step": 7580 }, { "epoch": 2.5253602065461815, "grad_norm": 0.5703125, "learning_rate": 1.3134426687477456e-05, "loss": 4.0689, "step": 7581 }, { "epoch": 2.5256933455484303, "grad_norm": 0.58984375, "learning_rate": 1.3133892417240594e-05, "loss": 4.0409, "step": 7582 }, { "epoch": 2.5260264845506786, "grad_norm": 0.62890625, "learning_rate": 1.313335808138109e-05, "loss": 4.0862, "step": 7583 }, { "epoch": 2.5263596235529273, "grad_norm": 0.5859375, "learning_rate": 1.3132823679905163e-05, "loss": 4.023, "step": 7584 }, { "epoch": 2.526692762555176, "grad_norm": 0.55859375, "learning_rate": 1.3132289212819043e-05, "loss": 4.0858, "step": 7585 }, { "epoch": 2.527025901557425, "grad_norm": 0.58203125, "learning_rate": 1.3131754680128952e-05, "loss": 4.0142, "step": 7586 }, { "epoch": 2.5273590405596735, "grad_norm": 0.61328125, "learning_rate": 1.3131220081841118e-05, "loss": 4.0987, "step": 7587 }, { "epoch": 2.5276921795619223, "grad_norm": 0.63671875, "learning_rate": 1.3130685417961767e-05, "loss": 4.0716, "step": 7588 }, { "epoch": 2.528025318564171, "grad_norm": 0.6015625, "learning_rate": 1.3130150688497126e-05, "loss": 4.0296, "step": 7589 }, { "epoch": 2.5283584575664197, "grad_norm": 0.58984375, "learning_rate": 1.3129615893453427e-05, "loss": 4.0781, "step": 7590 }, { "epoch": 2.5286915965686685, "grad_norm": 0.58203125, "learning_rate": 1.3129081032836895e-05, "loss": 3.9867, "step": 7591 }, { "epoch": 2.529024735570917, "grad_norm": 0.59765625, "learning_rate": 1.3128546106653761e-05, "loss": 4.0638, "step": 7592 }, { "epoch": 2.5293578745731655, "grad_norm": 0.60546875, "learning_rate": 1.3128011114910257e-05, "loss": 4.0319, "step": 7593 }, { "epoch": 2.5296910135754143, "grad_norm": 0.60546875, "learning_rate": 1.3127476057612615e-05, "loss": 4.0339, "step": 7594 }, { "epoch": 2.530024152577663, "grad_norm": 0.625, "learning_rate": 1.3126940934767064e-05, "loss": 3.9404, "step": 7595 }, { "epoch": 2.5303572915799117, "grad_norm": 0.60546875, "learning_rate": 1.3126405746379838e-05, "loss": 3.9781, "step": 7596 }, { "epoch": 2.5306904305821605, "grad_norm": 0.61328125, "learning_rate": 1.3125870492457173e-05, "loss": 4.0061, "step": 7597 }, { "epoch": 2.531023569584409, "grad_norm": 0.59765625, "learning_rate": 1.3125335173005304e-05, "loss": 4.0484, "step": 7598 }, { "epoch": 2.5313567085866575, "grad_norm": 0.5703125, "learning_rate": 1.3124799788030464e-05, "loss": 4.0122, "step": 7599 }, { "epoch": 2.5316898475889067, "grad_norm": 0.578125, "learning_rate": 1.312426433753889e-05, "loss": 4.1168, "step": 7600 }, { "epoch": 2.532022986591155, "grad_norm": 0.64453125, "learning_rate": 1.3123728821536818e-05, "loss": 3.9723, "step": 7601 }, { "epoch": 2.5323561255934037, "grad_norm": 0.625, "learning_rate": 1.3123193240030485e-05, "loss": 4.0252, "step": 7602 }, { "epoch": 2.5326892645956525, "grad_norm": 0.59375, "learning_rate": 1.3122657593026133e-05, "loss": 4.0246, "step": 7603 }, { "epoch": 2.533022403597901, "grad_norm": 0.61328125, "learning_rate": 1.3122121880529995e-05, "loss": 4.0542, "step": 7604 }, { "epoch": 2.53335554260015, "grad_norm": 0.625, "learning_rate": 1.3121586102548319e-05, "loss": 4.0352, "step": 7605 }, { "epoch": 2.5336886816023987, "grad_norm": 0.59765625, "learning_rate": 1.312105025908734e-05, "loss": 4.0448, "step": 7606 }, { "epoch": 2.5340218206046474, "grad_norm": 0.59375, "learning_rate": 1.3120514350153297e-05, "loss": 4.0454, "step": 7607 }, { "epoch": 2.5343549596068957, "grad_norm": 0.61328125, "learning_rate": 1.311997837575244e-05, "loss": 4.0454, "step": 7608 }, { "epoch": 2.534688098609145, "grad_norm": 0.6328125, "learning_rate": 1.3119442335891007e-05, "loss": 4.0436, "step": 7609 }, { "epoch": 2.535021237611393, "grad_norm": 0.5703125, "learning_rate": 1.3118906230575242e-05, "loss": 4.0746, "step": 7610 }, { "epoch": 2.535354376613642, "grad_norm": 0.625, "learning_rate": 1.311837005981139e-05, "loss": 4.0016, "step": 7611 }, { "epoch": 2.5356875156158907, "grad_norm": 0.59765625, "learning_rate": 1.3117833823605697e-05, "loss": 4.028, "step": 7612 }, { "epoch": 2.5360206546181394, "grad_norm": 0.578125, "learning_rate": 1.3117297521964407e-05, "loss": 4.0912, "step": 7613 }, { "epoch": 2.536353793620388, "grad_norm": 0.62109375, "learning_rate": 1.3116761154893769e-05, "loss": 4.0295, "step": 7614 }, { "epoch": 2.536686932622637, "grad_norm": 0.6171875, "learning_rate": 1.3116224722400027e-05, "loss": 4.0642, "step": 7615 }, { "epoch": 2.5370200716248856, "grad_norm": 0.57421875, "learning_rate": 1.3115688224489433e-05, "loss": 4.0612, "step": 7616 }, { "epoch": 2.537353210627134, "grad_norm": 0.59765625, "learning_rate": 1.3115151661168235e-05, "loss": 3.962, "step": 7617 }, { "epoch": 2.537686349629383, "grad_norm": 0.578125, "learning_rate": 1.3114615032442682e-05, "loss": 4.0301, "step": 7618 }, { "epoch": 2.5380194886316314, "grad_norm": 0.5703125, "learning_rate": 1.3114078338319027e-05, "loss": 4.1261, "step": 7619 }, { "epoch": 2.53835262763388, "grad_norm": 0.6015625, "learning_rate": 1.3113541578803516e-05, "loss": 4.0149, "step": 7620 }, { "epoch": 2.538685766636129, "grad_norm": 0.60546875, "learning_rate": 1.3113004753902409e-05, "loss": 4.0174, "step": 7621 }, { "epoch": 2.5390189056383776, "grad_norm": 0.59375, "learning_rate": 1.311246786362195e-05, "loss": 4.0877, "step": 7622 }, { "epoch": 2.5393520446406264, "grad_norm": 0.5703125, "learning_rate": 1.3111930907968399e-05, "loss": 4.0305, "step": 7623 }, { "epoch": 2.539685183642875, "grad_norm": 0.62890625, "learning_rate": 1.311139388694801e-05, "loss": 4.077, "step": 7624 }, { "epoch": 2.540018322645124, "grad_norm": 0.59375, "learning_rate": 1.3110856800567034e-05, "loss": 4.0298, "step": 7625 }, { "epoch": 2.540351461647372, "grad_norm": 0.625, "learning_rate": 1.3110319648831731e-05, "loss": 4.0504, "step": 7626 }, { "epoch": 2.5406846006496213, "grad_norm": 0.625, "learning_rate": 1.3109782431748355e-05, "loss": 4.0166, "step": 7627 }, { "epoch": 2.5410177396518696, "grad_norm": 0.625, "learning_rate": 1.3109245149323163e-05, "loss": 3.9834, "step": 7628 }, { "epoch": 2.5413508786541184, "grad_norm": 0.64453125, "learning_rate": 1.3108707801562417e-05, "loss": 4.125, "step": 7629 }, { "epoch": 2.541684017656367, "grad_norm": 0.61328125, "learning_rate": 1.3108170388472373e-05, "loss": 3.9717, "step": 7630 }, { "epoch": 2.542017156658616, "grad_norm": 0.5859375, "learning_rate": 1.310763291005929e-05, "loss": 4.0465, "step": 7631 }, { "epoch": 2.5423502956608646, "grad_norm": 0.6171875, "learning_rate": 1.3107095366329432e-05, "loss": 4.012, "step": 7632 }, { "epoch": 2.5426834346631133, "grad_norm": 0.6015625, "learning_rate": 1.3106557757289058e-05, "loss": 4.003, "step": 7633 }, { "epoch": 2.543016573665362, "grad_norm": 0.59375, "learning_rate": 1.3106020082944427e-05, "loss": 4.0842, "step": 7634 }, { "epoch": 2.5433497126676103, "grad_norm": 0.61328125, "learning_rate": 1.3105482343301806e-05, "loss": 4.0165, "step": 7635 }, { "epoch": 2.5436828516698595, "grad_norm": 0.59375, "learning_rate": 1.3104944538367459e-05, "loss": 4.0168, "step": 7636 }, { "epoch": 2.544015990672108, "grad_norm": 0.58984375, "learning_rate": 1.3104406668147647e-05, "loss": 4.0187, "step": 7637 }, { "epoch": 2.5443491296743566, "grad_norm": 0.609375, "learning_rate": 1.3103868732648636e-05, "loss": 4.0114, "step": 7638 }, { "epoch": 2.5446822686766053, "grad_norm": 0.6015625, "learning_rate": 1.3103330731876691e-05, "loss": 4.0023, "step": 7639 }, { "epoch": 2.545015407678854, "grad_norm": 0.60546875, "learning_rate": 1.3102792665838082e-05, "loss": 4.042, "step": 7640 }, { "epoch": 2.5453485466811028, "grad_norm": 0.61328125, "learning_rate": 1.3102254534539073e-05, "loss": 4.0775, "step": 7641 }, { "epoch": 2.5456816856833515, "grad_norm": 0.6484375, "learning_rate": 1.3101716337985933e-05, "loss": 4.0426, "step": 7642 }, { "epoch": 2.5460148246856003, "grad_norm": 0.60546875, "learning_rate": 1.3101178076184932e-05, "loss": 4.0245, "step": 7643 }, { "epoch": 2.5463479636878485, "grad_norm": 0.6328125, "learning_rate": 1.3100639749142338e-05, "loss": 4.0281, "step": 7644 }, { "epoch": 2.5466811026900973, "grad_norm": 0.59765625, "learning_rate": 1.3100101356864421e-05, "loss": 3.9652, "step": 7645 }, { "epoch": 2.547014241692346, "grad_norm": 0.6171875, "learning_rate": 1.3099562899357456e-05, "loss": 4.0276, "step": 7646 }, { "epoch": 2.5473473806945948, "grad_norm": 0.65234375, "learning_rate": 1.309902437662771e-05, "loss": 4.0189, "step": 7647 }, { "epoch": 2.5476805196968435, "grad_norm": 0.6015625, "learning_rate": 1.3098485788681456e-05, "loss": 4.0741, "step": 7648 }, { "epoch": 2.5480136586990922, "grad_norm": 0.6015625, "learning_rate": 1.3097947135524969e-05, "loss": 4.0191, "step": 7649 }, { "epoch": 2.548346797701341, "grad_norm": 0.59375, "learning_rate": 1.3097408417164524e-05, "loss": 3.9934, "step": 7650 }, { "epoch": 2.5486799367035897, "grad_norm": 0.5859375, "learning_rate": 1.3096869633606396e-05, "loss": 4.0392, "step": 7651 }, { "epoch": 2.5490130757058385, "grad_norm": 0.58984375, "learning_rate": 1.3096330784856859e-05, "loss": 4.0634, "step": 7652 }, { "epoch": 2.5493462147080868, "grad_norm": 0.5859375, "learning_rate": 1.3095791870922188e-05, "loss": 4.0841, "step": 7653 }, { "epoch": 2.5496793537103355, "grad_norm": 0.609375, "learning_rate": 1.3095252891808664e-05, "loss": 3.9771, "step": 7654 }, { "epoch": 2.5500124927125842, "grad_norm": 0.578125, "learning_rate": 1.3094713847522564e-05, "loss": 4.0533, "step": 7655 }, { "epoch": 2.550345631714833, "grad_norm": 0.6015625, "learning_rate": 1.3094174738070164e-05, "loss": 4.0883, "step": 7656 }, { "epoch": 2.5506787707170817, "grad_norm": 0.60546875, "learning_rate": 1.3093635563457747e-05, "loss": 3.9417, "step": 7657 }, { "epoch": 2.5510119097193305, "grad_norm": 0.578125, "learning_rate": 1.309309632369159e-05, "loss": 3.9931, "step": 7658 }, { "epoch": 2.551345048721579, "grad_norm": 0.609375, "learning_rate": 1.3092557018777976e-05, "loss": 4.0837, "step": 7659 }, { "epoch": 2.551678187723828, "grad_norm": 0.5859375, "learning_rate": 1.3092017648723186e-05, "loss": 4.05, "step": 7660 }, { "epoch": 2.5520113267260767, "grad_norm": 0.61328125, "learning_rate": 1.3091478213533502e-05, "loss": 4.0167, "step": 7661 }, { "epoch": 2.552344465728325, "grad_norm": 0.59375, "learning_rate": 1.3090938713215209e-05, "loss": 3.9991, "step": 7662 }, { "epoch": 2.5526776047305737, "grad_norm": 0.60546875, "learning_rate": 1.309039914777459e-05, "loss": 4.018, "step": 7663 }, { "epoch": 2.5530107437328224, "grad_norm": 0.640625, "learning_rate": 1.3089859517217929e-05, "loss": 3.9994, "step": 7664 }, { "epoch": 2.553343882735071, "grad_norm": 0.6171875, "learning_rate": 1.3089319821551513e-05, "loss": 3.971, "step": 7665 }, { "epoch": 2.55367702173732, "grad_norm": 0.59375, "learning_rate": 1.3088780060781627e-05, "loss": 4.0037, "step": 7666 }, { "epoch": 2.5540101607395687, "grad_norm": 0.58203125, "learning_rate": 1.3088240234914558e-05, "loss": 4.0327, "step": 7667 }, { "epoch": 2.5543432997418174, "grad_norm": 0.609375, "learning_rate": 1.3087700343956595e-05, "loss": 4.0491, "step": 7668 }, { "epoch": 2.5546764387440657, "grad_norm": 0.6484375, "learning_rate": 1.3087160387914025e-05, "loss": 4.0127, "step": 7669 }, { "epoch": 2.555009577746315, "grad_norm": 0.61328125, "learning_rate": 1.3086620366793141e-05, "loss": 4.0659, "step": 7670 }, { "epoch": 2.555342716748563, "grad_norm": 0.578125, "learning_rate": 1.3086080280600227e-05, "loss": 4.0545, "step": 7671 }, { "epoch": 2.555675855750812, "grad_norm": 0.60546875, "learning_rate": 1.308554012934158e-05, "loss": 3.987, "step": 7672 }, { "epoch": 2.5560089947530606, "grad_norm": 0.63671875, "learning_rate": 1.3084999913023485e-05, "loss": 4.0506, "step": 7673 }, { "epoch": 2.5563421337553094, "grad_norm": 0.6328125, "learning_rate": 1.3084459631652241e-05, "loss": 3.988, "step": 7674 }, { "epoch": 2.556675272757558, "grad_norm": 0.6484375, "learning_rate": 1.3083919285234137e-05, "loss": 4.0544, "step": 7675 }, { "epoch": 2.557008411759807, "grad_norm": 0.578125, "learning_rate": 1.3083378873775469e-05, "loss": 4.0659, "step": 7676 }, { "epoch": 2.5573415507620556, "grad_norm": 0.60546875, "learning_rate": 1.308283839728253e-05, "loss": 3.9705, "step": 7677 }, { "epoch": 2.557674689764304, "grad_norm": 0.5703125, "learning_rate": 1.3082297855761617e-05, "loss": 4.0407, "step": 7678 }, { "epoch": 2.558007828766553, "grad_norm": 0.5859375, "learning_rate": 1.3081757249219026e-05, "loss": 4.0585, "step": 7679 }, { "epoch": 2.5583409677688014, "grad_norm": 0.62109375, "learning_rate": 1.3081216577661052e-05, "loss": 4.0059, "step": 7680 }, { "epoch": 2.55867410677105, "grad_norm": 0.5859375, "learning_rate": 1.3080675841093994e-05, "loss": 4.0226, "step": 7681 }, { "epoch": 2.559007245773299, "grad_norm": 0.5859375, "learning_rate": 1.3080135039524149e-05, "loss": 4.0866, "step": 7682 }, { "epoch": 2.5593403847755476, "grad_norm": 0.59765625, "learning_rate": 1.3079594172957819e-05, "loss": 3.9979, "step": 7683 }, { "epoch": 2.5596735237777963, "grad_norm": 0.59765625, "learning_rate": 1.3079053241401301e-05, "loss": 4.0968, "step": 7684 }, { "epoch": 2.560006662780045, "grad_norm": 0.63671875, "learning_rate": 1.3078512244860897e-05, "loss": 3.9493, "step": 7685 }, { "epoch": 2.560339801782294, "grad_norm": 0.6328125, "learning_rate": 1.3077971183342909e-05, "loss": 4.0371, "step": 7686 }, { "epoch": 2.560672940784542, "grad_norm": 0.62109375, "learning_rate": 1.3077430056853637e-05, "loss": 4.0095, "step": 7687 }, { "epoch": 2.5610060797867913, "grad_norm": 0.609375, "learning_rate": 1.3076888865399389e-05, "loss": 4.0468, "step": 7688 }, { "epoch": 2.5613392187890396, "grad_norm": 0.6171875, "learning_rate": 1.3076347608986463e-05, "loss": 4.0207, "step": 7689 }, { "epoch": 2.5616723577912883, "grad_norm": 0.60546875, "learning_rate": 1.3075806287621165e-05, "loss": 4.0426, "step": 7690 }, { "epoch": 2.562005496793537, "grad_norm": 0.62109375, "learning_rate": 1.3075264901309802e-05, "loss": 4.0711, "step": 7691 }, { "epoch": 2.562338635795786, "grad_norm": 0.57421875, "learning_rate": 1.307472345005868e-05, "loss": 4.0624, "step": 7692 }, { "epoch": 2.5626717747980345, "grad_norm": 0.61328125, "learning_rate": 1.3074181933874101e-05, "loss": 4.0604, "step": 7693 }, { "epoch": 2.5630049138002833, "grad_norm": 0.6171875, "learning_rate": 1.3073640352762379e-05, "loss": 4.0027, "step": 7694 }, { "epoch": 2.563338052802532, "grad_norm": 0.61328125, "learning_rate": 1.3073098706729818e-05, "loss": 3.9485, "step": 7695 }, { "epoch": 2.5636711918047803, "grad_norm": 0.5703125, "learning_rate": 1.307255699578273e-05, "loss": 3.9836, "step": 7696 }, { "epoch": 2.5640043308070295, "grad_norm": 0.58984375, "learning_rate": 1.3072015219927423e-05, "loss": 4.0395, "step": 7697 }, { "epoch": 2.564337469809278, "grad_norm": 0.59765625, "learning_rate": 1.3071473379170206e-05, "loss": 4.0838, "step": 7698 }, { "epoch": 2.5646706088115265, "grad_norm": 0.625, "learning_rate": 1.3070931473517395e-05, "loss": 4.0018, "step": 7699 }, { "epoch": 2.5650037478137753, "grad_norm": 0.61328125, "learning_rate": 1.3070389502975295e-05, "loss": 4.0632, "step": 7700 }, { "epoch": 2.565336886816024, "grad_norm": 0.60546875, "learning_rate": 1.3069847467550225e-05, "loss": 4.0215, "step": 7701 }, { "epoch": 2.5656700258182727, "grad_norm": 0.59375, "learning_rate": 1.3069305367248496e-05, "loss": 4.0449, "step": 7702 }, { "epoch": 2.5660031648205215, "grad_norm": 0.6015625, "learning_rate": 1.3068763202076423e-05, "loss": 4.0163, "step": 7703 }, { "epoch": 2.5663363038227702, "grad_norm": 0.6171875, "learning_rate": 1.3068220972040319e-05, "loss": 4.0138, "step": 7704 }, { "epoch": 2.5666694428250185, "grad_norm": 0.58984375, "learning_rate": 1.3067678677146502e-05, "loss": 4.0248, "step": 7705 }, { "epoch": 2.5670025818272677, "grad_norm": 0.60546875, "learning_rate": 1.306713631740129e-05, "loss": 4.0183, "step": 7706 }, { "epoch": 2.567335720829516, "grad_norm": 0.62890625, "learning_rate": 1.3066593892810996e-05, "loss": 4.0408, "step": 7707 }, { "epoch": 2.5676688598317647, "grad_norm": 0.625, "learning_rate": 1.3066051403381941e-05, "loss": 3.9734, "step": 7708 }, { "epoch": 2.5680019988340135, "grad_norm": 0.62890625, "learning_rate": 1.3065508849120444e-05, "loss": 3.9536, "step": 7709 }, { "epoch": 2.568335137836262, "grad_norm": 0.62890625, "learning_rate": 1.3064966230032823e-05, "loss": 4.0791, "step": 7710 }, { "epoch": 2.568668276838511, "grad_norm": 0.6015625, "learning_rate": 1.3064423546125398e-05, "loss": 3.9993, "step": 7711 }, { "epoch": 2.5690014158407597, "grad_norm": 0.62109375, "learning_rate": 1.3063880797404493e-05, "loss": 4.0646, "step": 7712 }, { "epoch": 2.5693345548430084, "grad_norm": 0.5859375, "learning_rate": 1.3063337983876426e-05, "loss": 4.0088, "step": 7713 }, { "epoch": 2.5696676938452567, "grad_norm": 0.6328125, "learning_rate": 1.3062795105547526e-05, "loss": 4.0013, "step": 7714 }, { "epoch": 2.5700008328475055, "grad_norm": 0.59765625, "learning_rate": 1.3062252162424108e-05, "loss": 4.0082, "step": 7715 }, { "epoch": 2.570333971849754, "grad_norm": 0.6328125, "learning_rate": 1.3061709154512501e-05, "loss": 4.0186, "step": 7716 }, { "epoch": 2.570667110852003, "grad_norm": 0.59765625, "learning_rate": 1.306116608181903e-05, "loss": 4.1812, "step": 7717 }, { "epoch": 2.5710002498542517, "grad_norm": 0.58984375, "learning_rate": 1.3060622944350018e-05, "loss": 4.0894, "step": 7718 }, { "epoch": 2.5713333888565004, "grad_norm": 0.6015625, "learning_rate": 1.3060079742111794e-05, "loss": 4.0649, "step": 7719 }, { "epoch": 2.571666527858749, "grad_norm": 0.60546875, "learning_rate": 1.3059536475110685e-05, "loss": 4.0619, "step": 7720 }, { "epoch": 2.571999666860998, "grad_norm": 0.57421875, "learning_rate": 1.3058993143353017e-05, "loss": 4.071, "step": 7721 }, { "epoch": 2.5723328058632466, "grad_norm": 0.63671875, "learning_rate": 1.3058449746845119e-05, "loss": 3.9774, "step": 7722 }, { "epoch": 2.572665944865495, "grad_norm": 0.58984375, "learning_rate": 1.3057906285593322e-05, "loss": 4.0263, "step": 7723 }, { "epoch": 2.5729990838677437, "grad_norm": 0.6015625, "learning_rate": 1.3057362759603954e-05, "loss": 4.0342, "step": 7724 }, { "epoch": 2.5733322228699924, "grad_norm": 0.625, "learning_rate": 1.3056819168883347e-05, "loss": 4.0412, "step": 7725 }, { "epoch": 2.573665361872241, "grad_norm": 0.6171875, "learning_rate": 1.3056275513437836e-05, "loss": 3.9749, "step": 7726 }, { "epoch": 2.57399850087449, "grad_norm": 0.58984375, "learning_rate": 1.3055731793273746e-05, "loss": 4.0716, "step": 7727 }, { "epoch": 2.5743316398767386, "grad_norm": 0.58203125, "learning_rate": 1.3055188008397419e-05, "loss": 3.9887, "step": 7728 }, { "epoch": 2.5746647788789874, "grad_norm": 0.59765625, "learning_rate": 1.305464415881518e-05, "loss": 3.9593, "step": 7729 }, { "epoch": 2.574997917881236, "grad_norm": 0.5859375, "learning_rate": 1.305410024453337e-05, "loss": 3.9873, "step": 7730 }, { "epoch": 2.575331056883485, "grad_norm": 0.609375, "learning_rate": 1.305355626555832e-05, "loss": 4.044, "step": 7731 }, { "epoch": 2.575664195885733, "grad_norm": 0.5859375, "learning_rate": 1.305301222189637e-05, "loss": 4.0247, "step": 7732 }, { "epoch": 2.575997334887982, "grad_norm": 0.59375, "learning_rate": 1.3052468113553855e-05, "loss": 3.9902, "step": 7733 }, { "epoch": 2.5763304738902306, "grad_norm": 0.609375, "learning_rate": 1.3051923940537113e-05, "loss": 3.9984, "step": 7734 }, { "epoch": 2.5766636128924794, "grad_norm": 0.62109375, "learning_rate": 1.3051379702852486e-05, "loss": 4.0244, "step": 7735 }, { "epoch": 2.576996751894728, "grad_norm": 0.61328125, "learning_rate": 1.3050835400506303e-05, "loss": 4.0162, "step": 7736 }, { "epoch": 2.577329890896977, "grad_norm": 0.6015625, "learning_rate": 1.3050291033504918e-05, "loss": 4.0155, "step": 7737 }, { "epoch": 2.5776630298992256, "grad_norm": 0.58203125, "learning_rate": 1.3049746601854658e-05, "loss": 4.0765, "step": 7738 }, { "epoch": 2.577996168901474, "grad_norm": 0.6015625, "learning_rate": 1.3049202105561875e-05, "loss": 3.9916, "step": 7739 }, { "epoch": 2.578329307903723, "grad_norm": 0.60546875, "learning_rate": 1.3048657544632905e-05, "loss": 4.073, "step": 7740 }, { "epoch": 2.5786624469059714, "grad_norm": 0.6171875, "learning_rate": 1.3048112919074093e-05, "loss": 3.9905, "step": 7741 }, { "epoch": 2.57899558590822, "grad_norm": 0.60546875, "learning_rate": 1.3047568228891785e-05, "loss": 4.0354, "step": 7742 }, { "epoch": 2.579328724910469, "grad_norm": 0.63671875, "learning_rate": 1.304702347409232e-05, "loss": 3.9063, "step": 7743 }, { "epoch": 2.5796618639127176, "grad_norm": 0.60546875, "learning_rate": 1.3046478654682049e-05, "loss": 4.0397, "step": 7744 }, { "epoch": 2.5799950029149663, "grad_norm": 0.57421875, "learning_rate": 1.3045933770667315e-05, "loss": 4.1071, "step": 7745 }, { "epoch": 2.580328141917215, "grad_norm": 0.61328125, "learning_rate": 1.3045388822054463e-05, "loss": 3.9765, "step": 7746 }, { "epoch": 2.580661280919464, "grad_norm": 0.5859375, "learning_rate": 1.3044843808849844e-05, "loss": 4.0649, "step": 7747 }, { "epoch": 2.580994419921712, "grad_norm": 0.59765625, "learning_rate": 1.3044298731059804e-05, "loss": 4.015, "step": 7748 }, { "epoch": 2.5813275589239613, "grad_norm": 0.6015625, "learning_rate": 1.3043753588690694e-05, "loss": 3.9885, "step": 7749 }, { "epoch": 2.5816606979262096, "grad_norm": 0.62109375, "learning_rate": 1.304320838174886e-05, "loss": 4.0152, "step": 7750 }, { "epoch": 2.5819938369284583, "grad_norm": 0.625, "learning_rate": 1.3042663110240657e-05, "loss": 4.0929, "step": 7751 }, { "epoch": 2.582326975930707, "grad_norm": 0.55859375, "learning_rate": 1.3042117774172433e-05, "loss": 4.0694, "step": 7752 }, { "epoch": 2.5826601149329558, "grad_norm": 0.61328125, "learning_rate": 1.3041572373550543e-05, "loss": 3.9894, "step": 7753 }, { "epoch": 2.5829932539352045, "grad_norm": 0.60546875, "learning_rate": 1.3041026908381337e-05, "loss": 4.0351, "step": 7754 }, { "epoch": 2.5833263929374533, "grad_norm": 0.60546875, "learning_rate": 1.304048137867117e-05, "loss": 4.0516, "step": 7755 }, { "epoch": 2.583659531939702, "grad_norm": 0.58984375, "learning_rate": 1.3039935784426395e-05, "loss": 3.9901, "step": 7756 }, { "epoch": 2.5839926709419503, "grad_norm": 0.62109375, "learning_rate": 1.3039390125653365e-05, "loss": 4.0111, "step": 7757 }, { "epoch": 2.5843258099441995, "grad_norm": 0.609375, "learning_rate": 1.3038844402358443e-05, "loss": 4.0271, "step": 7758 }, { "epoch": 2.5846589489464478, "grad_norm": 0.62109375, "learning_rate": 1.3038298614547977e-05, "loss": 4.0777, "step": 7759 }, { "epoch": 2.5849920879486965, "grad_norm": 0.625, "learning_rate": 1.303775276222833e-05, "loss": 4.0594, "step": 7760 }, { "epoch": 2.5853252269509452, "grad_norm": 0.6171875, "learning_rate": 1.3037206845405857e-05, "loss": 3.9472, "step": 7761 }, { "epoch": 2.585658365953194, "grad_norm": 0.60546875, "learning_rate": 1.3036660864086918e-05, "loss": 4.0573, "step": 7762 }, { "epoch": 2.5859915049554427, "grad_norm": 0.609375, "learning_rate": 1.3036114818277871e-05, "loss": 4.0327, "step": 7763 }, { "epoch": 2.5863246439576915, "grad_norm": 0.59765625, "learning_rate": 1.3035568707985081e-05, "loss": 4.0287, "step": 7764 }, { "epoch": 2.58665778295994, "grad_norm": 0.61328125, "learning_rate": 1.3035022533214904e-05, "loss": 4.027, "step": 7765 }, { "epoch": 2.5869909219621885, "grad_norm": 0.62109375, "learning_rate": 1.3034476293973704e-05, "loss": 4.0627, "step": 7766 }, { "epoch": 2.5873240609644377, "grad_norm": 0.59375, "learning_rate": 1.3033929990267842e-05, "loss": 4.0393, "step": 7767 }, { "epoch": 2.587657199966686, "grad_norm": 0.59765625, "learning_rate": 1.3033383622103682e-05, "loss": 4.0314, "step": 7768 }, { "epoch": 2.5879903389689347, "grad_norm": 0.609375, "learning_rate": 1.3032837189487589e-05, "loss": 3.9426, "step": 7769 }, { "epoch": 2.5883234779711835, "grad_norm": 0.61328125, "learning_rate": 1.3032290692425924e-05, "loss": 3.9972, "step": 7770 }, { "epoch": 2.588656616973432, "grad_norm": 0.6015625, "learning_rate": 1.3031744130925059e-05, "loss": 4.0501, "step": 7771 }, { "epoch": 2.588989755975681, "grad_norm": 0.609375, "learning_rate": 1.3031197504991355e-05, "loss": 4.0528, "step": 7772 }, { "epoch": 2.5893228949779297, "grad_norm": 0.6171875, "learning_rate": 1.3030650814631181e-05, "loss": 4.0429, "step": 7773 }, { "epoch": 2.5896560339801784, "grad_norm": 0.609375, "learning_rate": 1.3030104059850904e-05, "loss": 4.089, "step": 7774 }, { "epoch": 2.5899891729824267, "grad_norm": 0.6328125, "learning_rate": 1.3029557240656892e-05, "loss": 4.0077, "step": 7775 }, { "epoch": 2.5903223119846754, "grad_norm": 0.59375, "learning_rate": 1.3029010357055517e-05, "loss": 3.9857, "step": 7776 }, { "epoch": 2.590655450986924, "grad_norm": 0.60546875, "learning_rate": 1.3028463409053145e-05, "loss": 4.0869, "step": 7777 }, { "epoch": 2.590988589989173, "grad_norm": 0.60546875, "learning_rate": 1.302791639665615e-05, "loss": 4.0877, "step": 7778 }, { "epoch": 2.5913217289914217, "grad_norm": 0.58203125, "learning_rate": 1.3027369319870903e-05, "loss": 4.05, "step": 7779 }, { "epoch": 2.5916548679936704, "grad_norm": 0.6171875, "learning_rate": 1.3026822178703774e-05, "loss": 3.976, "step": 7780 }, { "epoch": 2.591988006995919, "grad_norm": 0.59375, "learning_rate": 1.302627497316114e-05, "loss": 4.0438, "step": 7781 }, { "epoch": 2.592321145998168, "grad_norm": 0.625, "learning_rate": 1.302572770324937e-05, "loss": 4.0185, "step": 7782 }, { "epoch": 2.5926542850004166, "grad_norm": 0.61328125, "learning_rate": 1.3025180368974844e-05, "loss": 4.0591, "step": 7783 }, { "epoch": 2.592987424002665, "grad_norm": 0.6015625, "learning_rate": 1.3024632970343934e-05, "loss": 4.0797, "step": 7784 }, { "epoch": 2.5933205630049136, "grad_norm": 0.609375, "learning_rate": 1.3024085507363013e-05, "loss": 4.0584, "step": 7785 }, { "epoch": 2.5936537020071624, "grad_norm": 0.609375, "learning_rate": 1.3023537980038463e-05, "loss": 4.0543, "step": 7786 }, { "epoch": 2.593986841009411, "grad_norm": 0.62109375, "learning_rate": 1.302299038837666e-05, "loss": 4.1026, "step": 7787 }, { "epoch": 2.59431998001166, "grad_norm": 0.6171875, "learning_rate": 1.3022442732383979e-05, "loss": 3.9758, "step": 7788 }, { "epoch": 2.5946531190139086, "grad_norm": 0.64453125, "learning_rate": 1.3021895012066805e-05, "loss": 4.065, "step": 7789 }, { "epoch": 2.5949862580161573, "grad_norm": 0.625, "learning_rate": 1.3021347227431513e-05, "loss": 3.967, "step": 7790 }, { "epoch": 2.595319397018406, "grad_norm": 0.609375, "learning_rate": 1.3020799378484485e-05, "loss": 4.0077, "step": 7791 }, { "epoch": 2.595652536020655, "grad_norm": 0.6171875, "learning_rate": 1.3020251465232103e-05, "loss": 4.0612, "step": 7792 }, { "epoch": 2.595985675022903, "grad_norm": 0.62890625, "learning_rate": 1.3019703487680747e-05, "loss": 3.9763, "step": 7793 }, { "epoch": 2.596318814025152, "grad_norm": 0.6171875, "learning_rate": 1.3019155445836802e-05, "loss": 4.0492, "step": 7794 }, { "epoch": 2.5966519530274006, "grad_norm": 0.5625, "learning_rate": 1.3018607339706651e-05, "loss": 3.9975, "step": 7795 }, { "epoch": 2.5969850920296493, "grad_norm": 0.6015625, "learning_rate": 1.3018059169296678e-05, "loss": 4.0471, "step": 7796 }, { "epoch": 2.597318231031898, "grad_norm": 0.59765625, "learning_rate": 1.3017510934613267e-05, "loss": 4.0166, "step": 7797 }, { "epoch": 2.597651370034147, "grad_norm": 0.58984375, "learning_rate": 1.3016962635662804e-05, "loss": 4.1116, "step": 7798 }, { "epoch": 2.5979845090363956, "grad_norm": 0.640625, "learning_rate": 1.3016414272451676e-05, "loss": 4.0428, "step": 7799 }, { "epoch": 2.5983176480386443, "grad_norm": 0.58984375, "learning_rate": 1.3015865844986271e-05, "loss": 3.9672, "step": 7800 }, { "epoch": 2.598650787040893, "grad_norm": 0.5859375, "learning_rate": 1.3015317353272977e-05, "loss": 4.0751, "step": 7801 }, { "epoch": 2.5989839260431413, "grad_norm": 0.73828125, "learning_rate": 1.3014768797318182e-05, "loss": 4.0569, "step": 7802 }, { "epoch": 2.59931706504539, "grad_norm": 0.640625, "learning_rate": 1.3014220177128272e-05, "loss": 4.01, "step": 7803 }, { "epoch": 2.599650204047639, "grad_norm": 0.62890625, "learning_rate": 1.3013671492709646e-05, "loss": 3.913, "step": 7804 }, { "epoch": 2.5999833430498875, "grad_norm": 0.6953125, "learning_rate": 1.3013122744068685e-05, "loss": 3.9988, "step": 7805 }, { "epoch": 2.6003164820521363, "grad_norm": 0.6015625, "learning_rate": 1.3012573931211789e-05, "loss": 4.0517, "step": 7806 }, { "epoch": 2.600649621054385, "grad_norm": 0.609375, "learning_rate": 1.3012025054145345e-05, "loss": 4.028, "step": 7807 }, { "epoch": 2.6009827600566338, "grad_norm": 0.62109375, "learning_rate": 1.3011476112875748e-05, "loss": 3.9822, "step": 7808 }, { "epoch": 2.601315899058882, "grad_norm": 0.59765625, "learning_rate": 1.3010927107409393e-05, "loss": 4.0002, "step": 7809 }, { "epoch": 2.6016490380611312, "grad_norm": 0.6484375, "learning_rate": 1.3010378037752672e-05, "loss": 3.9802, "step": 7810 }, { "epoch": 2.6019821770633795, "grad_norm": 0.62109375, "learning_rate": 1.3009828903911984e-05, "loss": 4.0515, "step": 7811 }, { "epoch": 2.6023153160656283, "grad_norm": 0.59375, "learning_rate": 1.3009279705893723e-05, "loss": 4.0289, "step": 7812 }, { "epoch": 2.602648455067877, "grad_norm": 0.59375, "learning_rate": 1.3008730443704288e-05, "loss": 4.0632, "step": 7813 }, { "epoch": 2.6029815940701257, "grad_norm": 0.63671875, "learning_rate": 1.3008181117350074e-05, "loss": 4.0213, "step": 7814 }, { "epoch": 2.6033147330723745, "grad_norm": 0.609375, "learning_rate": 1.300763172683748e-05, "loss": 4.0081, "step": 7815 }, { "epoch": 2.6036478720746232, "grad_norm": 0.6015625, "learning_rate": 1.3007082272172906e-05, "loss": 4.0437, "step": 7816 }, { "epoch": 2.603981011076872, "grad_norm": 0.65234375, "learning_rate": 1.3006532753362752e-05, "loss": 3.9653, "step": 7817 }, { "epoch": 2.6043141500791203, "grad_norm": 0.6171875, "learning_rate": 1.300598317041342e-05, "loss": 4.0342, "step": 7818 }, { "epoch": 2.6046472890813694, "grad_norm": 0.5859375, "learning_rate": 1.3005433523331308e-05, "loss": 4.0702, "step": 7819 }, { "epoch": 2.6049804280836177, "grad_norm": 0.6015625, "learning_rate": 1.3004883812122821e-05, "loss": 4.0526, "step": 7820 }, { "epoch": 2.6053135670858665, "grad_norm": 0.609375, "learning_rate": 1.3004334036794362e-05, "loss": 4.0298, "step": 7821 }, { "epoch": 2.605646706088115, "grad_norm": 0.609375, "learning_rate": 1.3003784197352333e-05, "loss": 4.0009, "step": 7822 }, { "epoch": 2.605979845090364, "grad_norm": 0.62109375, "learning_rate": 1.300323429380314e-05, "loss": 3.9694, "step": 7823 }, { "epoch": 2.6063129840926127, "grad_norm": 0.60546875, "learning_rate": 1.3002684326153187e-05, "loss": 4.0031, "step": 7824 }, { "epoch": 2.6066461230948614, "grad_norm": 0.61328125, "learning_rate": 1.3002134294408881e-05, "loss": 4.0196, "step": 7825 }, { "epoch": 2.60697926209711, "grad_norm": 0.60546875, "learning_rate": 1.3001584198576624e-05, "loss": 3.9953, "step": 7826 }, { "epoch": 2.6073124010993585, "grad_norm": 0.6171875, "learning_rate": 1.3001034038662832e-05, "loss": 4.0445, "step": 7827 }, { "epoch": 2.6076455401016077, "grad_norm": 0.58203125, "learning_rate": 1.3000483814673907e-05, "loss": 4.0093, "step": 7828 }, { "epoch": 2.607978679103856, "grad_norm": 0.56640625, "learning_rate": 1.2999933526616261e-05, "loss": 4.088, "step": 7829 }, { "epoch": 2.6083118181061047, "grad_norm": 0.6328125, "learning_rate": 1.2999383174496303e-05, "loss": 4.0291, "step": 7830 }, { "epoch": 2.6086449571083534, "grad_norm": 0.6171875, "learning_rate": 1.299883275832044e-05, "loss": 4.0552, "step": 7831 }, { "epoch": 2.608978096110602, "grad_norm": 0.6171875, "learning_rate": 1.2998282278095085e-05, "loss": 4.0699, "step": 7832 }, { "epoch": 2.609311235112851, "grad_norm": 0.640625, "learning_rate": 1.2997731733826656e-05, "loss": 3.9335, "step": 7833 }, { "epoch": 2.6096443741150996, "grad_norm": 0.60546875, "learning_rate": 1.2997181125521556e-05, "loss": 4.0572, "step": 7834 }, { "epoch": 2.6099775131173484, "grad_norm": 0.61328125, "learning_rate": 1.2996630453186204e-05, "loss": 3.9778, "step": 7835 }, { "epoch": 2.6103106521195967, "grad_norm": 0.58203125, "learning_rate": 1.299607971682701e-05, "loss": 3.9602, "step": 7836 }, { "epoch": 2.610643791121846, "grad_norm": 0.63671875, "learning_rate": 1.2995528916450395e-05, "loss": 4.0495, "step": 7837 }, { "epoch": 2.610976930124094, "grad_norm": 0.62890625, "learning_rate": 1.2994978052062772e-05, "loss": 4.0368, "step": 7838 }, { "epoch": 2.611310069126343, "grad_norm": 0.59375, "learning_rate": 1.2994427123670555e-05, "loss": 4.0463, "step": 7839 }, { "epoch": 2.6116432081285916, "grad_norm": 0.58984375, "learning_rate": 1.2993876131280163e-05, "loss": 3.9913, "step": 7840 }, { "epoch": 2.6119763471308404, "grad_norm": 0.59765625, "learning_rate": 1.2993325074898014e-05, "loss": 4.0536, "step": 7841 }, { "epoch": 2.612309486133089, "grad_norm": 0.6484375, "learning_rate": 1.2992773954530529e-05, "loss": 3.9791, "step": 7842 }, { "epoch": 2.612642625135338, "grad_norm": 0.6015625, "learning_rate": 1.299222277018412e-05, "loss": 3.9555, "step": 7843 }, { "epoch": 2.6129757641375866, "grad_norm": 0.60546875, "learning_rate": 1.2991671521865215e-05, "loss": 3.9581, "step": 7844 }, { "epoch": 2.613308903139835, "grad_norm": 0.56640625, "learning_rate": 1.2991120209580232e-05, "loss": 4.0288, "step": 7845 }, { "epoch": 2.6136420421420836, "grad_norm": 0.60546875, "learning_rate": 1.2990568833335589e-05, "loss": 4.0132, "step": 7846 }, { "epoch": 2.6139751811443324, "grad_norm": 0.58984375, "learning_rate": 1.2990017393137716e-05, "loss": 4.0539, "step": 7847 }, { "epoch": 2.614308320146581, "grad_norm": 0.62890625, "learning_rate": 1.2989465888993029e-05, "loss": 4.06, "step": 7848 }, { "epoch": 2.61464145914883, "grad_norm": 0.62109375, "learning_rate": 1.2988914320907958e-05, "loss": 3.9347, "step": 7849 }, { "epoch": 2.6149745981510786, "grad_norm": 0.56640625, "learning_rate": 1.2988362688888921e-05, "loss": 4.0475, "step": 7850 }, { "epoch": 2.6153077371533273, "grad_norm": 0.61328125, "learning_rate": 1.2987810992942345e-05, "loss": 4.0412, "step": 7851 }, { "epoch": 2.615640876155576, "grad_norm": 0.64453125, "learning_rate": 1.298725923307466e-05, "loss": 4.054, "step": 7852 }, { "epoch": 2.615974015157825, "grad_norm": 0.6484375, "learning_rate": 1.298670740929229e-05, "loss": 4.0171, "step": 7853 }, { "epoch": 2.616307154160073, "grad_norm": 0.60546875, "learning_rate": 1.2986155521601662e-05, "loss": 3.99, "step": 7854 }, { "epoch": 2.616640293162322, "grad_norm": 0.59375, "learning_rate": 1.2985603570009205e-05, "loss": 4.0684, "step": 7855 }, { "epoch": 2.6169734321645706, "grad_norm": 0.578125, "learning_rate": 1.2985051554521349e-05, "loss": 3.9701, "step": 7856 }, { "epoch": 2.6173065711668193, "grad_norm": 0.5859375, "learning_rate": 1.2984499475144524e-05, "loss": 4.1122, "step": 7857 }, { "epoch": 2.617639710169068, "grad_norm": 0.6171875, "learning_rate": 1.2983947331885159e-05, "loss": 4.0469, "step": 7858 }, { "epoch": 2.617972849171317, "grad_norm": 0.62890625, "learning_rate": 1.2983395124749686e-05, "loss": 4.1412, "step": 7859 }, { "epoch": 2.6183059881735655, "grad_norm": 0.58984375, "learning_rate": 1.2982842853744535e-05, "loss": 4.0069, "step": 7860 }, { "epoch": 2.6186391271758143, "grad_norm": 0.60546875, "learning_rate": 1.2982290518876143e-05, "loss": 4.0595, "step": 7861 }, { "epoch": 2.618972266178063, "grad_norm": 0.61328125, "learning_rate": 1.2981738120150936e-05, "loss": 3.9499, "step": 7862 }, { "epoch": 2.6193054051803113, "grad_norm": 0.625, "learning_rate": 1.298118565757536e-05, "loss": 4.0768, "step": 7863 }, { "epoch": 2.61963854418256, "grad_norm": 0.62890625, "learning_rate": 1.298063313115584e-05, "loss": 4.0525, "step": 7864 }, { "epoch": 2.6199716831848088, "grad_norm": 0.58984375, "learning_rate": 1.2980080540898813e-05, "loss": 4.0206, "step": 7865 }, { "epoch": 2.6203048221870575, "grad_norm": 0.6875, "learning_rate": 1.297952788681072e-05, "loss": 4.0403, "step": 7866 }, { "epoch": 2.6206379611893063, "grad_norm": 0.60546875, "learning_rate": 1.2978975168897998e-05, "loss": 3.9958, "step": 7867 }, { "epoch": 2.620971100191555, "grad_norm": 0.625, "learning_rate": 1.2978422387167079e-05, "loss": 3.9932, "step": 7868 }, { "epoch": 2.6213042391938037, "grad_norm": 0.61328125, "learning_rate": 1.2977869541624408e-05, "loss": 4.0564, "step": 7869 }, { "epoch": 2.621637378196052, "grad_norm": 0.6015625, "learning_rate": 1.297731663227642e-05, "loss": 4.0233, "step": 7870 }, { "epoch": 2.621970517198301, "grad_norm": 0.63671875, "learning_rate": 1.2976763659129557e-05, "loss": 4.0441, "step": 7871 }, { "epoch": 2.6223036562005495, "grad_norm": 0.62109375, "learning_rate": 1.2976210622190262e-05, "loss": 4.059, "step": 7872 }, { "epoch": 2.6226367952027982, "grad_norm": 0.56640625, "learning_rate": 1.2975657521464972e-05, "loss": 4.0884, "step": 7873 }, { "epoch": 2.622969934205047, "grad_norm": 0.625, "learning_rate": 1.2975104356960135e-05, "loss": 4.0822, "step": 7874 }, { "epoch": 2.6233030732072957, "grad_norm": 0.640625, "learning_rate": 1.297455112868219e-05, "loss": 4.041, "step": 7875 }, { "epoch": 2.6236362122095445, "grad_norm": 0.6171875, "learning_rate": 1.2973997836637581e-05, "loss": 4.0599, "step": 7876 }, { "epoch": 2.623969351211793, "grad_norm": 0.6171875, "learning_rate": 1.2973444480832756e-05, "loss": 4.0771, "step": 7877 }, { "epoch": 2.624302490214042, "grad_norm": 0.6640625, "learning_rate": 1.2972891061274156e-05, "loss": 3.9939, "step": 7878 }, { "epoch": 2.6246356292162902, "grad_norm": 0.6015625, "learning_rate": 1.2972337577968231e-05, "loss": 4.0421, "step": 7879 }, { "epoch": 2.6249687682185394, "grad_norm": 0.58984375, "learning_rate": 1.2971784030921427e-05, "loss": 4.0375, "step": 7880 }, { "epoch": 2.6253019072207877, "grad_norm": 0.6171875, "learning_rate": 1.297123042014019e-05, "loss": 4.0754, "step": 7881 }, { "epoch": 2.6256350462230365, "grad_norm": 0.6015625, "learning_rate": 1.2970676745630968e-05, "loss": 4.0553, "step": 7882 }, { "epoch": 2.625968185225285, "grad_norm": 0.62109375, "learning_rate": 1.2970123007400213e-05, "loss": 3.9651, "step": 7883 }, { "epoch": 2.626301324227534, "grad_norm": 0.62109375, "learning_rate": 1.2969569205454372e-05, "loss": 3.9774, "step": 7884 }, { "epoch": 2.6266344632297827, "grad_norm": 0.57421875, "learning_rate": 1.2969015339799898e-05, "loss": 4.1095, "step": 7885 }, { "epoch": 2.6269676022320314, "grad_norm": 0.58984375, "learning_rate": 1.2968461410443241e-05, "loss": 4.0306, "step": 7886 }, { "epoch": 2.62730074123428, "grad_norm": 0.5859375, "learning_rate": 1.2967907417390854e-05, "loss": 4.0238, "step": 7887 }, { "epoch": 2.6276338802365284, "grad_norm": 0.6015625, "learning_rate": 1.2967353360649187e-05, "loss": 4.0076, "step": 7888 }, { "epoch": 2.6279670192387776, "grad_norm": 0.59375, "learning_rate": 1.2966799240224697e-05, "loss": 4.0137, "step": 7889 }, { "epoch": 2.628300158241026, "grad_norm": 0.59765625, "learning_rate": 1.2966245056123837e-05, "loss": 4.1104, "step": 7890 }, { "epoch": 2.6286332972432747, "grad_norm": 0.6171875, "learning_rate": 1.296569080835306e-05, "loss": 4.0036, "step": 7891 }, { "epoch": 2.6289664362455234, "grad_norm": 0.61328125, "learning_rate": 1.2965136496918825e-05, "loss": 3.894, "step": 7892 }, { "epoch": 2.629299575247772, "grad_norm": 0.60546875, "learning_rate": 1.296458212182759e-05, "loss": 4.068, "step": 7893 }, { "epoch": 2.629632714250021, "grad_norm": 0.5859375, "learning_rate": 1.2964027683085806e-05, "loss": 4.063, "step": 7894 }, { "epoch": 2.6299658532522696, "grad_norm": 0.62109375, "learning_rate": 1.2963473180699936e-05, "loss": 3.9784, "step": 7895 }, { "epoch": 2.6302989922545184, "grad_norm": 0.60546875, "learning_rate": 1.2962918614676438e-05, "loss": 3.9919, "step": 7896 }, { "epoch": 2.6306321312567666, "grad_norm": 0.59765625, "learning_rate": 1.2962363985021769e-05, "loss": 4.0448, "step": 7897 }, { "epoch": 2.630965270259016, "grad_norm": 0.61328125, "learning_rate": 1.296180929174239e-05, "loss": 4.0824, "step": 7898 }, { "epoch": 2.631298409261264, "grad_norm": 0.6015625, "learning_rate": 1.2961254534844767e-05, "loss": 4.1063, "step": 7899 }, { "epoch": 2.631631548263513, "grad_norm": 0.57421875, "learning_rate": 1.2960699714335355e-05, "loss": 4.0063, "step": 7900 }, { "epoch": 2.6319646872657616, "grad_norm": 0.59375, "learning_rate": 1.2960144830220619e-05, "loss": 4.0565, "step": 7901 }, { "epoch": 2.6322978262680103, "grad_norm": 0.62890625, "learning_rate": 1.2959589882507023e-05, "loss": 4.0591, "step": 7902 }, { "epoch": 2.632630965270259, "grad_norm": 0.55859375, "learning_rate": 1.295903487120103e-05, "loss": 4.0412, "step": 7903 }, { "epoch": 2.632964104272508, "grad_norm": 0.59765625, "learning_rate": 1.2958479796309105e-05, "loss": 3.992, "step": 7904 }, { "epoch": 2.6332972432747566, "grad_norm": 0.60546875, "learning_rate": 1.2957924657837714e-05, "loss": 3.9313, "step": 7905 }, { "epoch": 2.633630382277005, "grad_norm": 0.58984375, "learning_rate": 1.2957369455793324e-05, "loss": 4.0145, "step": 7906 }, { "epoch": 2.633963521279254, "grad_norm": 0.609375, "learning_rate": 1.2956814190182398e-05, "loss": 4.0798, "step": 7907 }, { "epoch": 2.6342966602815023, "grad_norm": 0.5859375, "learning_rate": 1.2956258861011408e-05, "loss": 4.0695, "step": 7908 }, { "epoch": 2.634629799283751, "grad_norm": 0.6015625, "learning_rate": 1.2955703468286819e-05, "loss": 4.0098, "step": 7909 }, { "epoch": 2.634962938286, "grad_norm": 0.609375, "learning_rate": 1.2955148012015102e-05, "loss": 4.0375, "step": 7910 }, { "epoch": 2.6352960772882486, "grad_norm": 0.6328125, "learning_rate": 1.2954592492202725e-05, "loss": 3.9611, "step": 7911 }, { "epoch": 2.6356292162904973, "grad_norm": 0.58984375, "learning_rate": 1.2954036908856164e-05, "loss": 4.1224, "step": 7912 }, { "epoch": 2.635962355292746, "grad_norm": 0.58984375, "learning_rate": 1.2953481261981882e-05, "loss": 3.9705, "step": 7913 }, { "epoch": 2.6362954942949948, "grad_norm": 0.55859375, "learning_rate": 1.2952925551586359e-05, "loss": 4.0885, "step": 7914 }, { "epoch": 2.636628633297243, "grad_norm": 0.6171875, "learning_rate": 1.2952369777676063e-05, "loss": 4.0417, "step": 7915 }, { "epoch": 2.636961772299492, "grad_norm": 0.5859375, "learning_rate": 1.295181394025747e-05, "loss": 4.0713, "step": 7916 }, { "epoch": 2.6372949113017405, "grad_norm": 0.58984375, "learning_rate": 1.2951258039337051e-05, "loss": 4.0628, "step": 7917 }, { "epoch": 2.6376280503039893, "grad_norm": 0.6015625, "learning_rate": 1.2950702074921286e-05, "loss": 4.0598, "step": 7918 }, { "epoch": 2.637961189306238, "grad_norm": 0.58984375, "learning_rate": 1.2950146047016646e-05, "loss": 4.0432, "step": 7919 }, { "epoch": 2.6382943283084868, "grad_norm": 0.58203125, "learning_rate": 1.2949589955629609e-05, "loss": 4.0004, "step": 7920 }, { "epoch": 2.6386274673107355, "grad_norm": 0.5703125, "learning_rate": 1.2949033800766654e-05, "loss": 4.0921, "step": 7921 }, { "epoch": 2.6389606063129842, "grad_norm": 0.609375, "learning_rate": 1.2948477582434256e-05, "loss": 4.0025, "step": 7922 }, { "epoch": 2.639293745315233, "grad_norm": 0.6171875, "learning_rate": 1.2947921300638899e-05, "loss": 4.0655, "step": 7923 }, { "epoch": 2.6396268843174813, "grad_norm": 0.6015625, "learning_rate": 1.2947364955387058e-05, "loss": 3.9573, "step": 7924 }, { "epoch": 2.63996002331973, "grad_norm": 0.6015625, "learning_rate": 1.2946808546685211e-05, "loss": 3.915, "step": 7925 }, { "epoch": 2.6402931623219787, "grad_norm": 0.546875, "learning_rate": 1.2946252074539845e-05, "loss": 4.0579, "step": 7926 }, { "epoch": 2.6406263013242275, "grad_norm": 0.60546875, "learning_rate": 1.2945695538957439e-05, "loss": 3.9803, "step": 7927 }, { "epoch": 2.6409594403264762, "grad_norm": 0.61328125, "learning_rate": 1.2945138939944477e-05, "loss": 4.0413, "step": 7928 }, { "epoch": 2.641292579328725, "grad_norm": 0.59375, "learning_rate": 1.2944582277507437e-05, "loss": 4.0815, "step": 7929 }, { "epoch": 2.6416257183309737, "grad_norm": 0.59375, "learning_rate": 1.2944025551652808e-05, "loss": 4.0724, "step": 7930 }, { "epoch": 2.6419588573332224, "grad_norm": 0.60546875, "learning_rate": 1.2943468762387073e-05, "loss": 4.0517, "step": 7931 }, { "epoch": 2.642291996335471, "grad_norm": 0.60546875, "learning_rate": 1.2942911909716715e-05, "loss": 3.9748, "step": 7932 }, { "epoch": 2.6426251353377195, "grad_norm": 0.57421875, "learning_rate": 1.2942354993648225e-05, "loss": 4.0625, "step": 7933 }, { "epoch": 2.642958274339968, "grad_norm": 0.578125, "learning_rate": 1.2941798014188088e-05, "loss": 4.0307, "step": 7934 }, { "epoch": 2.643291413342217, "grad_norm": 0.58984375, "learning_rate": 1.2941240971342791e-05, "loss": 4.0072, "step": 7935 }, { "epoch": 2.6436245523444657, "grad_norm": 0.609375, "learning_rate": 1.2940683865118822e-05, "loss": 3.981, "step": 7936 }, { "epoch": 2.6439576913467144, "grad_norm": 0.59765625, "learning_rate": 1.2940126695522671e-05, "loss": 3.991, "step": 7937 }, { "epoch": 2.644290830348963, "grad_norm": 0.6015625, "learning_rate": 1.2939569462560824e-05, "loss": 4.0197, "step": 7938 }, { "epoch": 2.644623969351212, "grad_norm": 0.60546875, "learning_rate": 1.293901216623978e-05, "loss": 4.0583, "step": 7939 }, { "epoch": 2.64495710835346, "grad_norm": 0.6015625, "learning_rate": 1.293845480656602e-05, "loss": 4.0737, "step": 7940 }, { "epoch": 2.6452902473557094, "grad_norm": 0.5859375, "learning_rate": 1.2937897383546045e-05, "loss": 4.0604, "step": 7941 }, { "epoch": 2.6456233863579577, "grad_norm": 0.61328125, "learning_rate": 1.2937339897186343e-05, "loss": 4.0233, "step": 7942 }, { "epoch": 2.6459565253602064, "grad_norm": 0.5859375, "learning_rate": 1.2936782347493407e-05, "loss": 3.9694, "step": 7943 }, { "epoch": 2.646289664362455, "grad_norm": 0.5859375, "learning_rate": 1.2936224734473735e-05, "loss": 4.0948, "step": 7944 }, { "epoch": 2.646622803364704, "grad_norm": 0.62109375, "learning_rate": 1.2935667058133818e-05, "loss": 4.1023, "step": 7945 }, { "epoch": 2.6469559423669526, "grad_norm": 0.62109375, "learning_rate": 1.2935109318480154e-05, "loss": 4.06, "step": 7946 }, { "epoch": 2.6472890813692014, "grad_norm": 0.6328125, "learning_rate": 1.2934551515519238e-05, "loss": 3.9927, "step": 7947 }, { "epoch": 2.64762222037145, "grad_norm": 0.59375, "learning_rate": 1.2933993649257569e-05, "loss": 4.0193, "step": 7948 }, { "epoch": 2.6479553593736984, "grad_norm": 0.59765625, "learning_rate": 1.2933435719701643e-05, "loss": 4.0523, "step": 7949 }, { "epoch": 2.6482884983759476, "grad_norm": 0.6015625, "learning_rate": 1.2932877726857962e-05, "loss": 4.0873, "step": 7950 }, { "epoch": 2.648621637378196, "grad_norm": 0.609375, "learning_rate": 1.2932319670733021e-05, "loss": 4.0843, "step": 7951 }, { "epoch": 2.6489547763804446, "grad_norm": 0.62109375, "learning_rate": 1.2931761551333324e-05, "loss": 4.0167, "step": 7952 }, { "epoch": 2.6492879153826934, "grad_norm": 0.59765625, "learning_rate": 1.2931203368665367e-05, "loss": 4.0031, "step": 7953 }, { "epoch": 2.649621054384942, "grad_norm": 0.5703125, "learning_rate": 1.2930645122735658e-05, "loss": 4.1199, "step": 7954 }, { "epoch": 2.649954193387191, "grad_norm": 0.63671875, "learning_rate": 1.2930086813550694e-05, "loss": 4.0676, "step": 7955 }, { "epoch": 2.6502873323894396, "grad_norm": 0.6171875, "learning_rate": 1.292952844111698e-05, "loss": 3.9738, "step": 7956 }, { "epoch": 2.6506204713916883, "grad_norm": 0.64453125, "learning_rate": 1.2928970005441022e-05, "loss": 4.0652, "step": 7957 }, { "epoch": 2.6509536103939366, "grad_norm": 0.6171875, "learning_rate": 1.292841150652932e-05, "loss": 4.0057, "step": 7958 }, { "epoch": 2.651286749396186, "grad_norm": 0.625, "learning_rate": 1.2927852944388384e-05, "loss": 3.9953, "step": 7959 }, { "epoch": 2.651619888398434, "grad_norm": 0.609375, "learning_rate": 1.2927294319024717e-05, "loss": 4.1307, "step": 7960 }, { "epoch": 2.651953027400683, "grad_norm": 0.62890625, "learning_rate": 1.2926735630444829e-05, "loss": 4.0314, "step": 7961 }, { "epoch": 2.6522861664029316, "grad_norm": 0.6171875, "learning_rate": 1.292617687865522e-05, "loss": 4.046, "step": 7962 }, { "epoch": 2.6526193054051803, "grad_norm": 0.57421875, "learning_rate": 1.2925618063662408e-05, "loss": 4.1259, "step": 7963 }, { "epoch": 2.652952444407429, "grad_norm": 0.59765625, "learning_rate": 1.2925059185472896e-05, "loss": 4.005, "step": 7964 }, { "epoch": 2.653285583409678, "grad_norm": 0.58984375, "learning_rate": 1.2924500244093194e-05, "loss": 4.1149, "step": 7965 }, { "epoch": 2.6536187224119265, "grad_norm": 0.62890625, "learning_rate": 1.2923941239529815e-05, "loss": 4.0111, "step": 7966 }, { "epoch": 2.653951861414175, "grad_norm": 0.578125, "learning_rate": 1.2923382171789267e-05, "loss": 4.0586, "step": 7967 }, { "epoch": 2.654285000416424, "grad_norm": 0.59375, "learning_rate": 1.2922823040878066e-05, "loss": 3.9905, "step": 7968 }, { "epoch": 2.6546181394186723, "grad_norm": 0.625, "learning_rate": 1.2922263846802722e-05, "loss": 4.0194, "step": 7969 }, { "epoch": 2.654951278420921, "grad_norm": 0.609375, "learning_rate": 1.2921704589569749e-05, "loss": 3.9518, "step": 7970 }, { "epoch": 2.65528441742317, "grad_norm": 0.6640625, "learning_rate": 1.292114526918566e-05, "loss": 4.038, "step": 7971 }, { "epoch": 2.6556175564254185, "grad_norm": 0.6015625, "learning_rate": 1.2920585885656972e-05, "loss": 4.0327, "step": 7972 }, { "epoch": 2.6559506954276673, "grad_norm": 0.65234375, "learning_rate": 1.2920026438990196e-05, "loss": 4.0267, "step": 7973 }, { "epoch": 2.656283834429916, "grad_norm": 0.59375, "learning_rate": 1.2919466929191856e-05, "loss": 4.0439, "step": 7974 }, { "epoch": 2.6566169734321647, "grad_norm": 0.6171875, "learning_rate": 1.2918907356268463e-05, "loss": 3.9926, "step": 7975 }, { "epoch": 2.656950112434413, "grad_norm": 0.62109375, "learning_rate": 1.2918347720226535e-05, "loss": 4.0392, "step": 7976 }, { "epoch": 2.657283251436662, "grad_norm": 0.60546875, "learning_rate": 1.2917788021072595e-05, "loss": 3.9922, "step": 7977 }, { "epoch": 2.6576163904389105, "grad_norm": 0.6015625, "learning_rate": 1.2917228258813157e-05, "loss": 4.0057, "step": 7978 }, { "epoch": 2.6579495294411593, "grad_norm": 0.6171875, "learning_rate": 1.2916668433454747e-05, "loss": 3.9955, "step": 7979 }, { "epoch": 2.658282668443408, "grad_norm": 0.64453125, "learning_rate": 1.2916108545003878e-05, "loss": 4.0118, "step": 7980 }, { "epoch": 2.6586158074456567, "grad_norm": 0.6171875, "learning_rate": 1.291554859346708e-05, "loss": 3.9948, "step": 7981 }, { "epoch": 2.6589489464479055, "grad_norm": 0.62890625, "learning_rate": 1.2914988578850867e-05, "loss": 4.0631, "step": 7982 }, { "epoch": 2.659282085450154, "grad_norm": 0.625, "learning_rate": 1.291442850116177e-05, "loss": 4.0242, "step": 7983 }, { "epoch": 2.659615224452403, "grad_norm": 0.59765625, "learning_rate": 1.2913868360406304e-05, "loss": 4.0385, "step": 7984 }, { "epoch": 2.6599483634546512, "grad_norm": 0.62890625, "learning_rate": 1.2913308156591002e-05, "loss": 3.9821, "step": 7985 }, { "epoch": 2.6602815024569, "grad_norm": 0.609375, "learning_rate": 1.2912747889722382e-05, "loss": 4.0785, "step": 7986 }, { "epoch": 2.6606146414591487, "grad_norm": 0.62890625, "learning_rate": 1.2912187559806976e-05, "loss": 4.0001, "step": 7987 }, { "epoch": 2.6609477804613975, "grad_norm": 0.6171875, "learning_rate": 1.2911627166851305e-05, "loss": 4.0163, "step": 7988 }, { "epoch": 2.661280919463646, "grad_norm": 0.63671875, "learning_rate": 1.29110667108619e-05, "loss": 4.0423, "step": 7989 }, { "epoch": 2.661614058465895, "grad_norm": 0.59375, "learning_rate": 1.291050619184529e-05, "loss": 4.0133, "step": 7990 }, { "epoch": 2.6619471974681437, "grad_norm": 0.609375, "learning_rate": 1.2909945609808e-05, "loss": 4.0325, "step": 7991 }, { "epoch": 2.6622803364703924, "grad_norm": 0.609375, "learning_rate": 1.290938496475656e-05, "loss": 4.1034, "step": 7992 }, { "epoch": 2.662613475472641, "grad_norm": 0.6171875, "learning_rate": 1.2908824256697505e-05, "loss": 4.0176, "step": 7993 }, { "epoch": 2.6629466144748895, "grad_norm": 0.6171875, "learning_rate": 1.2908263485637362e-05, "loss": 4.0344, "step": 7994 }, { "epoch": 2.663279753477138, "grad_norm": 0.609375, "learning_rate": 1.2907702651582663e-05, "loss": 4.0199, "step": 7995 }, { "epoch": 2.663612892479387, "grad_norm": 0.6015625, "learning_rate": 1.2907141754539942e-05, "loss": 4.0091, "step": 7996 }, { "epoch": 2.6639460314816357, "grad_norm": 0.625, "learning_rate": 1.2906580794515732e-05, "loss": 4.0815, "step": 7997 }, { "epoch": 2.6642791704838844, "grad_norm": 0.6640625, "learning_rate": 1.2906019771516565e-05, "loss": 3.9885, "step": 7998 }, { "epoch": 2.664612309486133, "grad_norm": 0.6171875, "learning_rate": 1.2905458685548978e-05, "loss": 4.018, "step": 7999 }, { "epoch": 2.664945448488382, "grad_norm": 0.59375, "learning_rate": 1.2904897536619505e-05, "loss": 4.0434, "step": 8000 }, { "epoch": 2.6652785874906306, "grad_norm": 0.578125, "learning_rate": 1.290433632473468e-05, "loss": 3.9905, "step": 8001 }, { "epoch": 2.6656117264928794, "grad_norm": 0.59765625, "learning_rate": 1.2903775049901047e-05, "loss": 4.0768, "step": 8002 }, { "epoch": 2.6659448654951277, "grad_norm": 0.58984375, "learning_rate": 1.2903213712125138e-05, "loss": 4.0094, "step": 8003 }, { "epoch": 2.6662780044973764, "grad_norm": 0.625, "learning_rate": 1.290265231141349e-05, "loss": 4.0889, "step": 8004 }, { "epoch": 2.666611143499625, "grad_norm": 0.60546875, "learning_rate": 1.290209084777265e-05, "loss": 4.0888, "step": 8005 }, { "epoch": 2.666944282501874, "grad_norm": 0.62890625, "learning_rate": 1.2901529321209149e-05, "loss": 4.0243, "step": 8006 }, { "epoch": 2.6672774215041226, "grad_norm": 0.63671875, "learning_rate": 1.2900967731729529e-05, "loss": 4.1201, "step": 8007 }, { "epoch": 2.6676105605063714, "grad_norm": 0.58984375, "learning_rate": 1.2900406079340337e-05, "loss": 4.0049, "step": 8008 }, { "epoch": 2.66794369950862, "grad_norm": 0.609375, "learning_rate": 1.289984436404811e-05, "loss": 4.0147, "step": 8009 }, { "epoch": 2.6682768385108684, "grad_norm": 0.60546875, "learning_rate": 1.2899282585859392e-05, "loss": 4.0227, "step": 8010 }, { "epoch": 2.6686099775131176, "grad_norm": 0.6328125, "learning_rate": 1.2898720744780728e-05, "loss": 3.9712, "step": 8011 }, { "epoch": 2.668943116515366, "grad_norm": 0.640625, "learning_rate": 1.2898158840818661e-05, "loss": 4.1093, "step": 8012 }, { "epoch": 2.6692762555176146, "grad_norm": 0.60546875, "learning_rate": 1.2897596873979734e-05, "loss": 3.9871, "step": 8013 }, { "epoch": 2.6696093945198633, "grad_norm": 0.58984375, "learning_rate": 1.2897034844270495e-05, "loss": 4.0291, "step": 8014 }, { "epoch": 2.669942533522112, "grad_norm": 0.58984375, "learning_rate": 1.2896472751697491e-05, "loss": 4.0562, "step": 8015 }, { "epoch": 2.670275672524361, "grad_norm": 0.59765625, "learning_rate": 1.2895910596267268e-05, "loss": 4.0559, "step": 8016 }, { "epoch": 2.6706088115266096, "grad_norm": 0.6015625, "learning_rate": 1.2895348377986374e-05, "loss": 4.0484, "step": 8017 }, { "epoch": 2.6709419505288583, "grad_norm": 0.60546875, "learning_rate": 1.2894786096861357e-05, "loss": 4.0173, "step": 8018 }, { "epoch": 2.6712750895311066, "grad_norm": 0.6015625, "learning_rate": 1.2894223752898768e-05, "loss": 4.0409, "step": 8019 }, { "epoch": 2.6716082285333558, "grad_norm": 0.5859375, "learning_rate": 1.2893661346105155e-05, "loss": 4.0463, "step": 8020 }, { "epoch": 2.671941367535604, "grad_norm": 0.625, "learning_rate": 1.2893098876487073e-05, "loss": 4.006, "step": 8021 }, { "epoch": 2.672274506537853, "grad_norm": 0.56640625, "learning_rate": 1.2892536344051067e-05, "loss": 4.0581, "step": 8022 }, { "epoch": 2.6726076455401016, "grad_norm": 0.59765625, "learning_rate": 1.2891973748803694e-05, "loss": 4.0167, "step": 8023 }, { "epoch": 2.6729407845423503, "grad_norm": 0.6171875, "learning_rate": 1.2891411090751506e-05, "loss": 4.0262, "step": 8024 }, { "epoch": 2.673273923544599, "grad_norm": 0.64453125, "learning_rate": 1.2890848369901057e-05, "loss": 3.9609, "step": 8025 }, { "epoch": 2.6736070625468478, "grad_norm": 0.6015625, "learning_rate": 1.28902855862589e-05, "loss": 4.0117, "step": 8026 }, { "epoch": 2.6739402015490965, "grad_norm": 0.6015625, "learning_rate": 1.2889722739831592e-05, "loss": 4.0404, "step": 8027 }, { "epoch": 2.674273340551345, "grad_norm": 0.62109375, "learning_rate": 1.2889159830625687e-05, "loss": 4.063, "step": 8028 }, { "epoch": 2.674606479553594, "grad_norm": 0.6015625, "learning_rate": 1.2888596858647746e-05, "loss": 3.994, "step": 8029 }, { "epoch": 2.6749396185558423, "grad_norm": 0.6015625, "learning_rate": 1.288803382390432e-05, "loss": 4.0063, "step": 8030 }, { "epoch": 2.675272757558091, "grad_norm": 0.58984375, "learning_rate": 1.288747072640197e-05, "loss": 3.9788, "step": 8031 }, { "epoch": 2.6756058965603398, "grad_norm": 0.60546875, "learning_rate": 1.2886907566147258e-05, "loss": 4.1351, "step": 8032 }, { "epoch": 2.6759390355625885, "grad_norm": 0.6015625, "learning_rate": 1.288634434314674e-05, "loss": 4.0524, "step": 8033 }, { "epoch": 2.6762721745648372, "grad_norm": 0.5859375, "learning_rate": 1.2885781057406978e-05, "loss": 4.0907, "step": 8034 }, { "epoch": 2.676605313567086, "grad_norm": 0.58203125, "learning_rate": 1.288521770893453e-05, "loss": 4.1154, "step": 8035 }, { "epoch": 2.6769384525693347, "grad_norm": 0.62109375, "learning_rate": 1.2884654297735962e-05, "loss": 3.9856, "step": 8036 }, { "epoch": 2.677271591571583, "grad_norm": 0.58203125, "learning_rate": 1.2884090823817833e-05, "loss": 4.0464, "step": 8037 }, { "epoch": 2.677604730573832, "grad_norm": 0.5703125, "learning_rate": 1.2883527287186709e-05, "loss": 4.1145, "step": 8038 }, { "epoch": 2.6779378695760805, "grad_norm": 0.6015625, "learning_rate": 1.2882963687849152e-05, "loss": 4.096, "step": 8039 }, { "epoch": 2.6782710085783292, "grad_norm": 0.62890625, "learning_rate": 1.2882400025811729e-05, "loss": 3.9068, "step": 8040 }, { "epoch": 2.678604147580578, "grad_norm": 0.6015625, "learning_rate": 1.2881836301081001e-05, "loss": 4.0108, "step": 8041 }, { "epoch": 2.6789372865828267, "grad_norm": 0.62109375, "learning_rate": 1.2881272513663539e-05, "loss": 4.045, "step": 8042 }, { "epoch": 2.6792704255850754, "grad_norm": 0.609375, "learning_rate": 1.288070866356591e-05, "loss": 4.0588, "step": 8043 }, { "epoch": 2.679603564587324, "grad_norm": 0.61328125, "learning_rate": 1.2880144750794677e-05, "loss": 3.9353, "step": 8044 }, { "epoch": 2.679936703589573, "grad_norm": 0.6015625, "learning_rate": 1.2879580775356411e-05, "loss": 4.1186, "step": 8045 }, { "epoch": 2.680269842591821, "grad_norm": 0.609375, "learning_rate": 1.2879016737257684e-05, "loss": 3.9722, "step": 8046 }, { "epoch": 2.6806029815940704, "grad_norm": 0.59765625, "learning_rate": 1.287845263650506e-05, "loss": 4.006, "step": 8047 }, { "epoch": 2.6809361205963187, "grad_norm": 0.625, "learning_rate": 1.287788847310511e-05, "loss": 4.0549, "step": 8048 }, { "epoch": 2.6812692595985674, "grad_norm": 0.60546875, "learning_rate": 1.2877324247064415e-05, "loss": 4.035, "step": 8049 }, { "epoch": 2.681602398600816, "grad_norm": 0.62109375, "learning_rate": 1.2876759958389536e-05, "loss": 3.9791, "step": 8050 }, { "epoch": 2.681935537603065, "grad_norm": 0.62890625, "learning_rate": 1.2876195607087048e-05, "loss": 4.0402, "step": 8051 }, { "epoch": 2.6822686766053137, "grad_norm": 0.625, "learning_rate": 1.2875631193163528e-05, "loss": 3.9547, "step": 8052 }, { "epoch": 2.6826018156075624, "grad_norm": 0.59375, "learning_rate": 1.2875066716625547e-05, "loss": 4.043, "step": 8053 }, { "epoch": 2.682934954609811, "grad_norm": 0.6015625, "learning_rate": 1.2874502177479681e-05, "loss": 3.9945, "step": 8054 }, { "epoch": 2.6832680936120594, "grad_norm": 0.62109375, "learning_rate": 1.2873937575732507e-05, "loss": 3.9642, "step": 8055 }, { "epoch": 2.683601232614308, "grad_norm": 0.59375, "learning_rate": 1.28733729113906e-05, "loss": 3.9934, "step": 8056 }, { "epoch": 2.683934371616557, "grad_norm": 0.609375, "learning_rate": 1.2872808184460537e-05, "loss": 4.0695, "step": 8057 }, { "epoch": 2.6842675106188056, "grad_norm": 0.56640625, "learning_rate": 1.2872243394948898e-05, "loss": 4.0067, "step": 8058 }, { "epoch": 2.6846006496210544, "grad_norm": 0.5859375, "learning_rate": 1.2871678542862256e-05, "loss": 4.0104, "step": 8059 }, { "epoch": 2.684933788623303, "grad_norm": 0.6171875, "learning_rate": 1.2871113628207196e-05, "loss": 4.0107, "step": 8060 }, { "epoch": 2.685266927625552, "grad_norm": 0.6015625, "learning_rate": 1.2870548650990297e-05, "loss": 3.9821, "step": 8061 }, { "epoch": 2.6856000666278006, "grad_norm": 0.6171875, "learning_rate": 1.2869983611218137e-05, "loss": 4.0407, "step": 8062 }, { "epoch": 2.6859332056300493, "grad_norm": 0.62109375, "learning_rate": 1.28694185088973e-05, "loss": 4.0947, "step": 8063 }, { "epoch": 2.6862663446322976, "grad_norm": 0.578125, "learning_rate": 1.2868853344034367e-05, "loss": 4.0185, "step": 8064 }, { "epoch": 2.6865994836345464, "grad_norm": 0.58984375, "learning_rate": 1.286828811663592e-05, "loss": 4.0495, "step": 8065 }, { "epoch": 2.686932622636795, "grad_norm": 0.58203125, "learning_rate": 1.2867722826708547e-05, "loss": 4.0632, "step": 8066 }, { "epoch": 2.687265761639044, "grad_norm": 0.57421875, "learning_rate": 1.286715747425883e-05, "loss": 4.0091, "step": 8067 }, { "epoch": 2.6875989006412926, "grad_norm": 0.625, "learning_rate": 1.2866592059293353e-05, "loss": 4.0151, "step": 8068 }, { "epoch": 2.6879320396435413, "grad_norm": 0.5859375, "learning_rate": 1.2866026581818701e-05, "loss": 4.002, "step": 8069 }, { "epoch": 2.68826517864579, "grad_norm": 0.59765625, "learning_rate": 1.2865461041841463e-05, "loss": 4.0217, "step": 8070 }, { "epoch": 2.688598317648039, "grad_norm": 0.625, "learning_rate": 1.2864895439368227e-05, "loss": 4.0893, "step": 8071 }, { "epoch": 2.6889314566502875, "grad_norm": 0.609375, "learning_rate": 1.2864329774405575e-05, "loss": 4.0358, "step": 8072 }, { "epoch": 2.689264595652536, "grad_norm": 0.63671875, "learning_rate": 1.2863764046960107e-05, "loss": 3.994, "step": 8073 }, { "epoch": 2.6895977346547846, "grad_norm": 0.6328125, "learning_rate": 1.28631982570384e-05, "loss": 4.0599, "step": 8074 }, { "epoch": 2.6899308736570333, "grad_norm": 0.59375, "learning_rate": 1.286263240464705e-05, "loss": 3.9748, "step": 8075 }, { "epoch": 2.690264012659282, "grad_norm": 0.609375, "learning_rate": 1.2862066489792652e-05, "loss": 4.0789, "step": 8076 }, { "epoch": 2.690597151661531, "grad_norm": 0.59375, "learning_rate": 1.286150051248179e-05, "loss": 4.0352, "step": 8077 }, { "epoch": 2.6909302906637795, "grad_norm": 0.6171875, "learning_rate": 1.2860934472721062e-05, "loss": 4.0241, "step": 8078 }, { "epoch": 2.6912634296660283, "grad_norm": 0.625, "learning_rate": 1.2860368370517059e-05, "loss": 4.0613, "step": 8079 }, { "epoch": 2.6915965686682766, "grad_norm": 0.625, "learning_rate": 1.2859802205876373e-05, "loss": 4.0824, "step": 8080 }, { "epoch": 2.6919297076705258, "grad_norm": 0.6015625, "learning_rate": 1.2859235978805603e-05, "loss": 4.0027, "step": 8081 }, { "epoch": 2.692262846672774, "grad_norm": 0.58984375, "learning_rate": 1.2858669689311341e-05, "loss": 4.0949, "step": 8082 }, { "epoch": 2.692595985675023, "grad_norm": 0.62109375, "learning_rate": 1.2858103337400184e-05, "loss": 4.1002, "step": 8083 }, { "epoch": 2.6929291246772715, "grad_norm": 0.60546875, "learning_rate": 1.2857536923078729e-05, "loss": 4.0923, "step": 8084 }, { "epoch": 2.6932622636795203, "grad_norm": 0.62109375, "learning_rate": 1.2856970446353573e-05, "loss": 4.114, "step": 8085 }, { "epoch": 2.693595402681769, "grad_norm": 0.60546875, "learning_rate": 1.2856403907231314e-05, "loss": 4.0077, "step": 8086 }, { "epoch": 2.6939285416840177, "grad_norm": 0.60546875, "learning_rate": 1.2855837305718552e-05, "loss": 3.9997, "step": 8087 }, { "epoch": 2.6942616806862665, "grad_norm": 0.58984375, "learning_rate": 1.2855270641821887e-05, "loss": 4.0441, "step": 8088 }, { "epoch": 2.6945948196885148, "grad_norm": 0.59765625, "learning_rate": 1.2854703915547917e-05, "loss": 4.051, "step": 8089 }, { "epoch": 2.694927958690764, "grad_norm": 0.609375, "learning_rate": 1.2854137126903246e-05, "loss": 4.0307, "step": 8090 }, { "epoch": 2.6952610976930123, "grad_norm": 0.58203125, "learning_rate": 1.2853570275894473e-05, "loss": 4.0694, "step": 8091 }, { "epoch": 2.695594236695261, "grad_norm": 0.60546875, "learning_rate": 1.2853003362528203e-05, "loss": 4.0314, "step": 8092 }, { "epoch": 2.6959273756975097, "grad_norm": 0.6015625, "learning_rate": 1.2852436386811039e-05, "loss": 4.0409, "step": 8093 }, { "epoch": 2.6962605146997585, "grad_norm": 0.62890625, "learning_rate": 1.2851869348749583e-05, "loss": 4.0126, "step": 8094 }, { "epoch": 2.696593653702007, "grad_norm": 0.625, "learning_rate": 1.2851302248350442e-05, "loss": 4.0405, "step": 8095 }, { "epoch": 2.696926792704256, "grad_norm": 0.61328125, "learning_rate": 1.2850735085620221e-05, "loss": 4.0413, "step": 8096 }, { "epoch": 2.6972599317065047, "grad_norm": 0.66015625, "learning_rate": 1.2850167860565524e-05, "loss": 4.0496, "step": 8097 }, { "epoch": 2.697593070708753, "grad_norm": 0.609375, "learning_rate": 1.2849600573192963e-05, "loss": 4.02, "step": 8098 }, { "epoch": 2.697926209711002, "grad_norm": 0.61328125, "learning_rate": 1.284903322350914e-05, "loss": 4.0507, "step": 8099 }, { "epoch": 2.6982593487132505, "grad_norm": 0.6328125, "learning_rate": 1.2848465811520667e-05, "loss": 3.894, "step": 8100 }, { "epoch": 2.698592487715499, "grad_norm": 0.5859375, "learning_rate": 1.2847898337234153e-05, "loss": 3.9672, "step": 8101 }, { "epoch": 2.698925626717748, "grad_norm": 0.59765625, "learning_rate": 1.2847330800656205e-05, "loss": 4.0406, "step": 8102 }, { "epoch": 2.6992587657199967, "grad_norm": 0.6171875, "learning_rate": 1.2846763201793437e-05, "loss": 4.0271, "step": 8103 }, { "epoch": 2.6995919047222454, "grad_norm": 0.58984375, "learning_rate": 1.2846195540652457e-05, "loss": 4.0826, "step": 8104 }, { "epoch": 2.699925043724494, "grad_norm": 0.61328125, "learning_rate": 1.2845627817239881e-05, "loss": 4.0503, "step": 8105 }, { "epoch": 2.700258182726743, "grad_norm": 0.59765625, "learning_rate": 1.2845060031562319e-05, "loss": 3.9316, "step": 8106 }, { "epoch": 2.700591321728991, "grad_norm": 0.6328125, "learning_rate": 1.2844492183626384e-05, "loss": 3.9767, "step": 8107 }, { "epoch": 2.7009244607312404, "grad_norm": 0.62109375, "learning_rate": 1.2843924273438693e-05, "loss": 4.018, "step": 8108 }, { "epoch": 2.7012575997334887, "grad_norm": 0.61328125, "learning_rate": 1.2843356301005858e-05, "loss": 4.0668, "step": 8109 }, { "epoch": 2.7015907387357374, "grad_norm": 0.60546875, "learning_rate": 1.2842788266334495e-05, "loss": 4.0058, "step": 8110 }, { "epoch": 2.701923877737986, "grad_norm": 0.62890625, "learning_rate": 1.2842220169431221e-05, "loss": 4.0759, "step": 8111 }, { "epoch": 2.702257016740235, "grad_norm": 0.62890625, "learning_rate": 1.2841652010302653e-05, "loss": 3.9665, "step": 8112 }, { "epoch": 2.7025901557424836, "grad_norm": 0.60546875, "learning_rate": 1.284108378895541e-05, "loss": 4.057, "step": 8113 }, { "epoch": 2.7029232947447324, "grad_norm": 0.63671875, "learning_rate": 1.2840515505396112e-05, "loss": 4.0038, "step": 8114 }, { "epoch": 2.703256433746981, "grad_norm": 0.6484375, "learning_rate": 1.2839947159631373e-05, "loss": 3.9885, "step": 8115 }, { "epoch": 2.7035895727492294, "grad_norm": 0.62109375, "learning_rate": 1.2839378751667818e-05, "loss": 4.0207, "step": 8116 }, { "epoch": 2.7039227117514786, "grad_norm": 0.6484375, "learning_rate": 1.2838810281512063e-05, "loss": 3.9623, "step": 8117 }, { "epoch": 2.704255850753727, "grad_norm": 0.6015625, "learning_rate": 1.2838241749170734e-05, "loss": 3.9788, "step": 8118 }, { "epoch": 2.7045889897559756, "grad_norm": 0.61328125, "learning_rate": 1.283767315465045e-05, "loss": 4.0481, "step": 8119 }, { "epoch": 2.7049221287582244, "grad_norm": 0.625, "learning_rate": 1.2837104497957836e-05, "loss": 3.9769, "step": 8120 }, { "epoch": 2.705255267760473, "grad_norm": 0.5859375, "learning_rate": 1.2836535779099511e-05, "loss": 4.0329, "step": 8121 }, { "epoch": 2.705588406762722, "grad_norm": 0.60546875, "learning_rate": 1.2835966998082107e-05, "loss": 4.0116, "step": 8122 }, { "epoch": 2.7059215457649706, "grad_norm": 0.6015625, "learning_rate": 1.2835398154912245e-05, "loss": 4.035, "step": 8123 }, { "epoch": 2.7062546847672193, "grad_norm": 0.671875, "learning_rate": 1.2834829249596551e-05, "loss": 3.9659, "step": 8124 }, { "epoch": 2.7065878237694676, "grad_norm": 0.67578125, "learning_rate": 1.2834260282141653e-05, "loss": 3.9431, "step": 8125 }, { "epoch": 2.7069209627717163, "grad_norm": 0.6328125, "learning_rate": 1.2833691252554174e-05, "loss": 4.0583, "step": 8126 }, { "epoch": 2.707254101773965, "grad_norm": 0.60546875, "learning_rate": 1.2833122160840746e-05, "loss": 4.0988, "step": 8127 }, { "epoch": 2.707587240776214, "grad_norm": 0.56640625, "learning_rate": 1.2832553007007994e-05, "loss": 4.0222, "step": 8128 }, { "epoch": 2.7079203797784626, "grad_norm": 0.66796875, "learning_rate": 1.2831983791062552e-05, "loss": 3.9862, "step": 8129 }, { "epoch": 2.7082535187807113, "grad_norm": 0.6484375, "learning_rate": 1.2831414513011048e-05, "loss": 4.0452, "step": 8130 }, { "epoch": 2.70858665778296, "grad_norm": 0.6171875, "learning_rate": 1.2830845172860111e-05, "loss": 4.027, "step": 8131 }, { "epoch": 2.7089197967852088, "grad_norm": 0.6328125, "learning_rate": 1.2830275770616375e-05, "loss": 3.9737, "step": 8132 }, { "epoch": 2.7092529357874575, "grad_norm": 0.625, "learning_rate": 1.2829706306286472e-05, "loss": 4.0261, "step": 8133 }, { "epoch": 2.709586074789706, "grad_norm": 0.62109375, "learning_rate": 1.2829136779877035e-05, "loss": 4.0211, "step": 8134 }, { "epoch": 2.7099192137919546, "grad_norm": 0.59765625, "learning_rate": 1.2828567191394698e-05, "loss": 4.0418, "step": 8135 }, { "epoch": 2.7102523527942033, "grad_norm": 0.62109375, "learning_rate": 1.2827997540846093e-05, "loss": 4.0251, "step": 8136 }, { "epoch": 2.710585491796452, "grad_norm": 0.61328125, "learning_rate": 1.2827427828237858e-05, "loss": 3.9851, "step": 8137 }, { "epoch": 2.7109186307987008, "grad_norm": 0.61328125, "learning_rate": 1.2826858053576627e-05, "loss": 4.0576, "step": 8138 }, { "epoch": 2.7112517698009495, "grad_norm": 0.6015625, "learning_rate": 1.282628821686904e-05, "loss": 4.0664, "step": 8139 }, { "epoch": 2.7115849088031982, "grad_norm": 0.60546875, "learning_rate": 1.282571831812173e-05, "loss": 3.9727, "step": 8140 }, { "epoch": 2.711918047805447, "grad_norm": 0.58203125, "learning_rate": 1.2825148357341338e-05, "loss": 4.0191, "step": 8141 }, { "epoch": 2.7122511868076957, "grad_norm": 0.62890625, "learning_rate": 1.2824578334534503e-05, "loss": 4.0017, "step": 8142 }, { "epoch": 2.712584325809944, "grad_norm": 0.5703125, "learning_rate": 1.282400824970786e-05, "loss": 4.0493, "step": 8143 }, { "epoch": 2.7129174648121928, "grad_norm": 0.59765625, "learning_rate": 1.2823438102868057e-05, "loss": 4.0053, "step": 8144 }, { "epoch": 2.7132506038144415, "grad_norm": 0.64453125, "learning_rate": 1.2822867894021728e-05, "loss": 3.9883, "step": 8145 }, { "epoch": 2.7135837428166902, "grad_norm": 0.63671875, "learning_rate": 1.2822297623175517e-05, "loss": 4.0767, "step": 8146 }, { "epoch": 2.713916881818939, "grad_norm": 0.6015625, "learning_rate": 1.2821727290336067e-05, "loss": 3.9994, "step": 8147 }, { "epoch": 2.7142500208211877, "grad_norm": 0.625, "learning_rate": 1.2821156895510022e-05, "loss": 4.0018, "step": 8148 }, { "epoch": 2.7145831598234365, "grad_norm": 0.61328125, "learning_rate": 1.2820586438704026e-05, "loss": 3.9915, "step": 8149 }, { "epoch": 2.7149162988256847, "grad_norm": 0.60546875, "learning_rate": 1.2820015919924718e-05, "loss": 4.0541, "step": 8150 }, { "epoch": 2.715249437827934, "grad_norm": 0.62890625, "learning_rate": 1.2819445339178751e-05, "loss": 4.0356, "step": 8151 }, { "epoch": 2.7155825768301822, "grad_norm": 0.609375, "learning_rate": 1.2818874696472767e-05, "loss": 4.0501, "step": 8152 }, { "epoch": 2.715915715832431, "grad_norm": 0.59765625, "learning_rate": 1.2818303991813413e-05, "loss": 4.0095, "step": 8153 }, { "epoch": 2.7162488548346797, "grad_norm": 0.5859375, "learning_rate": 1.2817733225207337e-05, "loss": 3.9909, "step": 8154 }, { "epoch": 2.7165819938369284, "grad_norm": 0.62109375, "learning_rate": 1.2817162396661188e-05, "loss": 3.9842, "step": 8155 }, { "epoch": 2.716915132839177, "grad_norm": 0.6328125, "learning_rate": 1.2816591506181613e-05, "loss": 4.0329, "step": 8156 }, { "epoch": 2.717248271841426, "grad_norm": 0.60546875, "learning_rate": 1.2816020553775262e-05, "loss": 3.9339, "step": 8157 }, { "epoch": 2.7175814108436747, "grad_norm": 0.59375, "learning_rate": 1.281544953944879e-05, "loss": 3.9997, "step": 8158 }, { "epoch": 2.717914549845923, "grad_norm": 0.609375, "learning_rate": 1.281487846320884e-05, "loss": 3.9617, "step": 8159 }, { "epoch": 2.718247688848172, "grad_norm": 0.609375, "learning_rate": 1.281430732506207e-05, "loss": 3.9861, "step": 8160 }, { "epoch": 2.7185808278504204, "grad_norm": 0.64453125, "learning_rate": 1.2813736125015128e-05, "loss": 3.9738, "step": 8161 }, { "epoch": 2.718913966852669, "grad_norm": 0.6171875, "learning_rate": 1.2813164863074673e-05, "loss": 4.0182, "step": 8162 }, { "epoch": 2.719247105854918, "grad_norm": 0.61328125, "learning_rate": 1.2812593539247352e-05, "loss": 3.9955, "step": 8163 }, { "epoch": 2.7195802448571667, "grad_norm": 0.625, "learning_rate": 1.2812022153539828e-05, "loss": 4.0609, "step": 8164 }, { "epoch": 2.7199133838594154, "grad_norm": 0.61328125, "learning_rate": 1.2811450705958748e-05, "loss": 4.0869, "step": 8165 }, { "epoch": 2.720246522861664, "grad_norm": 0.609375, "learning_rate": 1.2810879196510774e-05, "loss": 4.0634, "step": 8166 }, { "epoch": 2.720579661863913, "grad_norm": 0.57421875, "learning_rate": 1.2810307625202558e-05, "loss": 4.1037, "step": 8167 }, { "epoch": 2.720912800866161, "grad_norm": 0.6015625, "learning_rate": 1.280973599204076e-05, "loss": 4.0559, "step": 8168 }, { "epoch": 2.7212459398684103, "grad_norm": 0.60546875, "learning_rate": 1.2809164297032041e-05, "loss": 4.0476, "step": 8169 }, { "epoch": 2.7215790788706586, "grad_norm": 0.58203125, "learning_rate": 1.280859254018306e-05, "loss": 4.0631, "step": 8170 }, { "epoch": 2.7219122178729074, "grad_norm": 0.6484375, "learning_rate": 1.2808020721500469e-05, "loss": 4.0751, "step": 8171 }, { "epoch": 2.722245356875156, "grad_norm": 0.6328125, "learning_rate": 1.2807448840990936e-05, "loss": 4.0108, "step": 8172 }, { "epoch": 2.722578495877405, "grad_norm": 0.62109375, "learning_rate": 1.280687689866112e-05, "loss": 4.0142, "step": 8173 }, { "epoch": 2.7229116348796536, "grad_norm": 0.59375, "learning_rate": 1.2806304894517684e-05, "loss": 4.134, "step": 8174 }, { "epoch": 2.7232447738819023, "grad_norm": 0.625, "learning_rate": 1.2805732828567288e-05, "loss": 4.0276, "step": 8175 }, { "epoch": 2.723577912884151, "grad_norm": 0.6171875, "learning_rate": 1.2805160700816596e-05, "loss": 4.0033, "step": 8176 }, { "epoch": 2.7239110518863994, "grad_norm": 0.62890625, "learning_rate": 1.2804588511272273e-05, "loss": 4.0093, "step": 8177 }, { "epoch": 2.7242441908886486, "grad_norm": 0.62890625, "learning_rate": 1.2804016259940985e-05, "loss": 4.0605, "step": 8178 }, { "epoch": 2.724577329890897, "grad_norm": 0.62890625, "learning_rate": 1.2803443946829394e-05, "loss": 4.0749, "step": 8179 }, { "epoch": 2.7249104688931456, "grad_norm": 0.61328125, "learning_rate": 1.2802871571944171e-05, "loss": 4.026, "step": 8180 }, { "epoch": 2.7252436078953943, "grad_norm": 0.6171875, "learning_rate": 1.280229913529198e-05, "loss": 3.9854, "step": 8181 }, { "epoch": 2.725576746897643, "grad_norm": 0.63671875, "learning_rate": 1.2801726636879486e-05, "loss": 4.0104, "step": 8182 }, { "epoch": 2.725909885899892, "grad_norm": 0.6171875, "learning_rate": 1.2801154076713361e-05, "loss": 4.0688, "step": 8183 }, { "epoch": 2.7262430249021405, "grad_norm": 0.6171875, "learning_rate": 1.2800581454800276e-05, "loss": 4.0813, "step": 8184 }, { "epoch": 2.7265761639043893, "grad_norm": 0.640625, "learning_rate": 1.2800008771146894e-05, "loss": 4.0177, "step": 8185 }, { "epoch": 2.7269093029066376, "grad_norm": 0.60546875, "learning_rate": 1.2799436025759895e-05, "loss": 4.0891, "step": 8186 }, { "epoch": 2.7272424419088863, "grad_norm": 0.58984375, "learning_rate": 1.279886321864594e-05, "loss": 4.0305, "step": 8187 }, { "epoch": 2.727575580911135, "grad_norm": 0.578125, "learning_rate": 1.2798290349811707e-05, "loss": 4.0482, "step": 8188 }, { "epoch": 2.727908719913384, "grad_norm": 0.58984375, "learning_rate": 1.279771741926387e-05, "loss": 4.035, "step": 8189 }, { "epoch": 2.7282418589156325, "grad_norm": 0.62109375, "learning_rate": 1.2797144427009096e-05, "loss": 3.9977, "step": 8190 }, { "epoch": 2.7285749979178813, "grad_norm": 0.59375, "learning_rate": 1.2796571373054063e-05, "loss": 4.0496, "step": 8191 }, { "epoch": 2.72890813692013, "grad_norm": 0.61328125, "learning_rate": 1.2795998257405447e-05, "loss": 4.0131, "step": 8192 }, { "epoch": 2.7292412759223788, "grad_norm": 0.62109375, "learning_rate": 1.2795425080069927e-05, "loss": 4.0122, "step": 8193 }, { "epoch": 2.7295744149246275, "grad_norm": 0.6328125, "learning_rate": 1.279485184105417e-05, "loss": 3.991, "step": 8194 }, { "epoch": 2.729907553926876, "grad_norm": 0.5859375, "learning_rate": 1.2794278540364859e-05, "loss": 4.0371, "step": 8195 }, { "epoch": 2.7302406929291245, "grad_norm": 0.59375, "learning_rate": 1.2793705178008668e-05, "loss": 4.0477, "step": 8196 }, { "epoch": 2.7305738319313733, "grad_norm": 0.61328125, "learning_rate": 1.279313175399228e-05, "loss": 4.0631, "step": 8197 }, { "epoch": 2.730906970933622, "grad_norm": 0.59765625, "learning_rate": 1.2792558268322372e-05, "loss": 3.9891, "step": 8198 }, { "epoch": 2.7312401099358707, "grad_norm": 0.60546875, "learning_rate": 1.2791984721005624e-05, "loss": 4.0692, "step": 8199 }, { "epoch": 2.7315732489381195, "grad_norm": 0.62109375, "learning_rate": 1.2791411112048714e-05, "loss": 4.015, "step": 8200 }, { "epoch": 2.731906387940368, "grad_norm": 0.59765625, "learning_rate": 1.279083744145833e-05, "loss": 4.0566, "step": 8201 }, { "epoch": 2.732239526942617, "grad_norm": 0.62109375, "learning_rate": 1.2790263709241148e-05, "loss": 4.1057, "step": 8202 }, { "epoch": 2.7325726659448657, "grad_norm": 0.61328125, "learning_rate": 1.278968991540385e-05, "loss": 3.9712, "step": 8203 }, { "epoch": 2.732905804947114, "grad_norm": 0.65625, "learning_rate": 1.2789116059953126e-05, "loss": 4.048, "step": 8204 }, { "epoch": 2.7332389439493627, "grad_norm": 0.625, "learning_rate": 1.2788542142895652e-05, "loss": 4.0588, "step": 8205 }, { "epoch": 2.7335720829516115, "grad_norm": 0.609375, "learning_rate": 1.2787968164238119e-05, "loss": 4.0415, "step": 8206 }, { "epoch": 2.73390522195386, "grad_norm": 0.63671875, "learning_rate": 1.278739412398721e-05, "loss": 4.0542, "step": 8207 }, { "epoch": 2.734238360956109, "grad_norm": 0.6015625, "learning_rate": 1.2786820022149614e-05, "loss": 3.9998, "step": 8208 }, { "epoch": 2.7345714999583577, "grad_norm": 0.61328125, "learning_rate": 1.2786245858732015e-05, "loss": 4.0957, "step": 8209 }, { "epoch": 2.7349046389606064, "grad_norm": 0.6328125, "learning_rate": 1.27856716337411e-05, "loss": 4.0343, "step": 8210 }, { "epoch": 2.735237777962855, "grad_norm": 0.609375, "learning_rate": 1.2785097347183563e-05, "loss": 3.9818, "step": 8211 }, { "epoch": 2.735570916965104, "grad_norm": 0.59765625, "learning_rate": 1.2784522999066084e-05, "loss": 3.9768, "step": 8212 }, { "epoch": 2.735904055967352, "grad_norm": 0.59765625, "learning_rate": 1.2783948589395362e-05, "loss": 4.0496, "step": 8213 }, { "epoch": 2.736237194969601, "grad_norm": 0.64453125, "learning_rate": 1.2783374118178082e-05, "loss": 4.0633, "step": 8214 }, { "epoch": 2.7365703339718497, "grad_norm": 0.6171875, "learning_rate": 1.278279958542094e-05, "loss": 4.0161, "step": 8215 }, { "epoch": 2.7369034729740984, "grad_norm": 0.62109375, "learning_rate": 1.2782224991130624e-05, "loss": 4.029, "step": 8216 }, { "epoch": 2.737236611976347, "grad_norm": 0.62109375, "learning_rate": 1.2781650335313826e-05, "loss": 3.9851, "step": 8217 }, { "epoch": 2.737569750978596, "grad_norm": 0.62109375, "learning_rate": 1.2781075617977244e-05, "loss": 3.9947, "step": 8218 }, { "epoch": 2.7379028899808446, "grad_norm": 0.62109375, "learning_rate": 1.2780500839127569e-05, "loss": 4.0795, "step": 8219 }, { "epoch": 2.738236028983093, "grad_norm": 0.62890625, "learning_rate": 1.2779925998771496e-05, "loss": 3.9565, "step": 8220 }, { "epoch": 2.738569167985342, "grad_norm": 0.65234375, "learning_rate": 1.2779351096915724e-05, "loss": 4.0056, "step": 8221 }, { "epoch": 2.7389023069875904, "grad_norm": 0.6171875, "learning_rate": 1.2778776133566945e-05, "loss": 4.0594, "step": 8222 }, { "epoch": 2.739235445989839, "grad_norm": 0.5859375, "learning_rate": 1.2778201108731858e-05, "loss": 3.9424, "step": 8223 }, { "epoch": 2.739568584992088, "grad_norm": 0.6015625, "learning_rate": 1.2777626022417161e-05, "loss": 4.057, "step": 8224 }, { "epoch": 2.7399017239943366, "grad_norm": 0.60546875, "learning_rate": 1.2777050874629553e-05, "loss": 4.0906, "step": 8225 }, { "epoch": 2.7402348629965854, "grad_norm": 0.61328125, "learning_rate": 1.2776475665375729e-05, "loss": 4.0577, "step": 8226 }, { "epoch": 2.740568001998834, "grad_norm": 0.609375, "learning_rate": 1.2775900394662395e-05, "loss": 4.0307, "step": 8227 }, { "epoch": 2.740901141001083, "grad_norm": 0.640625, "learning_rate": 1.2775325062496249e-05, "loss": 4.0163, "step": 8228 }, { "epoch": 2.741234280003331, "grad_norm": 0.62109375, "learning_rate": 1.2774749668883992e-05, "loss": 4.0446, "step": 8229 }, { "epoch": 2.7415674190055803, "grad_norm": 0.60546875, "learning_rate": 1.2774174213832327e-05, "loss": 4.0057, "step": 8230 }, { "epoch": 2.7419005580078286, "grad_norm": 0.64453125, "learning_rate": 1.2773598697347957e-05, "loss": 3.9553, "step": 8231 }, { "epoch": 2.7422336970100774, "grad_norm": 0.65625, "learning_rate": 1.2773023119437584e-05, "loss": 4.082, "step": 8232 }, { "epoch": 2.742566836012326, "grad_norm": 0.60546875, "learning_rate": 1.277244748010791e-05, "loss": 4.0762, "step": 8233 }, { "epoch": 2.742899975014575, "grad_norm": 0.609375, "learning_rate": 1.2771871779365646e-05, "loss": 4.0004, "step": 8234 }, { "epoch": 2.7432331140168236, "grad_norm": 0.625, "learning_rate": 1.2771296017217495e-05, "loss": 3.9824, "step": 8235 }, { "epoch": 2.7435662530190723, "grad_norm": 0.61328125, "learning_rate": 1.2770720193670163e-05, "loss": 4.0544, "step": 8236 }, { "epoch": 2.743899392021321, "grad_norm": 0.6171875, "learning_rate": 1.2770144308730355e-05, "loss": 4.0456, "step": 8237 }, { "epoch": 2.7442325310235693, "grad_norm": 0.6015625, "learning_rate": 1.2769568362404783e-05, "loss": 4.0434, "step": 8238 }, { "epoch": 2.7445656700258185, "grad_norm": 0.609375, "learning_rate": 1.2768992354700153e-05, "loss": 4.1023, "step": 8239 }, { "epoch": 2.744898809028067, "grad_norm": 0.64453125, "learning_rate": 1.2768416285623172e-05, "loss": 3.9451, "step": 8240 }, { "epoch": 2.7452319480303156, "grad_norm": 0.59765625, "learning_rate": 1.2767840155180555e-05, "loss": 4.0323, "step": 8241 }, { "epoch": 2.7455650870325643, "grad_norm": 0.63671875, "learning_rate": 1.276726396337901e-05, "loss": 4.0017, "step": 8242 }, { "epoch": 2.745898226034813, "grad_norm": 0.62109375, "learning_rate": 1.276668771022525e-05, "loss": 4.0358, "step": 8243 }, { "epoch": 2.7462313650370618, "grad_norm": 0.59765625, "learning_rate": 1.2766111395725983e-05, "loss": 4.0151, "step": 8244 }, { "epoch": 2.7465645040393105, "grad_norm": 0.625, "learning_rate": 1.2765535019887928e-05, "loss": 3.9706, "step": 8245 }, { "epoch": 2.7468976430415593, "grad_norm": 0.64453125, "learning_rate": 1.2764958582717792e-05, "loss": 4.0334, "step": 8246 }, { "epoch": 2.7472307820438076, "grad_norm": 0.609375, "learning_rate": 1.2764382084222295e-05, "loss": 4.0508, "step": 8247 }, { "epoch": 2.7475639210460567, "grad_norm": 0.62890625, "learning_rate": 1.276380552440815e-05, "loss": 4.0608, "step": 8248 }, { "epoch": 2.747897060048305, "grad_norm": 0.62109375, "learning_rate": 1.2763228903282069e-05, "loss": 4.0465, "step": 8249 }, { "epoch": 2.7482301990505538, "grad_norm": 0.6328125, "learning_rate": 1.276265222085077e-05, "loss": 4.1109, "step": 8250 }, { "epoch": 2.7485633380528025, "grad_norm": 0.62890625, "learning_rate": 1.2762075477120977e-05, "loss": 4.0307, "step": 8251 }, { "epoch": 2.7488964770550512, "grad_norm": 0.63671875, "learning_rate": 1.27614986720994e-05, "loss": 4.1076, "step": 8252 }, { "epoch": 2.7492296160573, "grad_norm": 0.60546875, "learning_rate": 1.2760921805792757e-05, "loss": 4.0742, "step": 8253 }, { "epoch": 2.7495627550595487, "grad_norm": 0.62109375, "learning_rate": 1.2760344878207772e-05, "loss": 4.0085, "step": 8254 }, { "epoch": 2.7498958940617975, "grad_norm": 0.60546875, "learning_rate": 1.2759767889351166e-05, "loss": 4.0269, "step": 8255 }, { "epoch": 2.7502290330640458, "grad_norm": 0.59765625, "learning_rate": 1.2759190839229654e-05, "loss": 4.0798, "step": 8256 }, { "epoch": 2.7505621720662945, "grad_norm": 0.58203125, "learning_rate": 1.2758613727849959e-05, "loss": 4.0517, "step": 8257 }, { "epoch": 2.7508953110685432, "grad_norm": 0.60546875, "learning_rate": 1.2758036555218807e-05, "loss": 3.997, "step": 8258 }, { "epoch": 2.751228450070792, "grad_norm": 0.609375, "learning_rate": 1.2757459321342915e-05, "loss": 3.9682, "step": 8259 }, { "epoch": 2.7515615890730407, "grad_norm": 0.61328125, "learning_rate": 1.2756882026229013e-05, "loss": 3.9545, "step": 8260 }, { "epoch": 2.7518947280752895, "grad_norm": 0.58984375, "learning_rate": 1.2756304669883819e-05, "loss": 3.9916, "step": 8261 }, { "epoch": 2.752227867077538, "grad_norm": 0.640625, "learning_rate": 1.2755727252314062e-05, "loss": 4.0355, "step": 8262 }, { "epoch": 2.752561006079787, "grad_norm": 0.60546875, "learning_rate": 1.2755149773526466e-05, "loss": 3.9862, "step": 8263 }, { "epoch": 2.7528941450820357, "grad_norm": 0.6328125, "learning_rate": 1.2754572233527758e-05, "loss": 4.0329, "step": 8264 }, { "epoch": 2.753227284084284, "grad_norm": 0.64453125, "learning_rate": 1.2753994632324663e-05, "loss": 4.0235, "step": 8265 }, { "epoch": 2.7535604230865327, "grad_norm": 0.625, "learning_rate": 1.2753416969923914e-05, "loss": 3.975, "step": 8266 }, { "epoch": 2.7538935620887814, "grad_norm": 0.640625, "learning_rate": 1.2752839246332234e-05, "loss": 4.0238, "step": 8267 }, { "epoch": 2.75422670109103, "grad_norm": 0.625, "learning_rate": 1.2752261461556354e-05, "loss": 3.9899, "step": 8268 }, { "epoch": 2.754559840093279, "grad_norm": 0.6484375, "learning_rate": 1.2751683615603008e-05, "loss": 4.0213, "step": 8269 }, { "epoch": 2.7548929790955277, "grad_norm": 0.59765625, "learning_rate": 1.2751105708478919e-05, "loss": 3.9928, "step": 8270 }, { "epoch": 2.7552261180977764, "grad_norm": 0.62890625, "learning_rate": 1.2750527740190823e-05, "loss": 4.0419, "step": 8271 }, { "epoch": 2.755559257100025, "grad_norm": 0.625, "learning_rate": 1.2749949710745451e-05, "loss": 3.997, "step": 8272 }, { "epoch": 2.755892396102274, "grad_norm": 0.609375, "learning_rate": 1.2749371620149539e-05, "loss": 4.0281, "step": 8273 }, { "epoch": 2.756225535104522, "grad_norm": 0.625, "learning_rate": 1.2748793468409814e-05, "loss": 4.008, "step": 8274 }, { "epoch": 2.756558674106771, "grad_norm": 0.62890625, "learning_rate": 1.2748215255533017e-05, "loss": 4.1038, "step": 8275 }, { "epoch": 2.7568918131090197, "grad_norm": 0.640625, "learning_rate": 1.2747636981525879e-05, "loss": 4.0925, "step": 8276 }, { "epoch": 2.7572249521112684, "grad_norm": 0.640625, "learning_rate": 1.2747058646395137e-05, "loss": 4.0691, "step": 8277 }, { "epoch": 2.757558091113517, "grad_norm": 0.61328125, "learning_rate": 1.2746480250147526e-05, "loss": 4.0976, "step": 8278 }, { "epoch": 2.757891230115766, "grad_norm": 0.609375, "learning_rate": 1.2745901792789784e-05, "loss": 4.018, "step": 8279 }, { "epoch": 2.7582243691180146, "grad_norm": 0.578125, "learning_rate": 1.2745323274328648e-05, "loss": 4.054, "step": 8280 }, { "epoch": 2.7585575081202633, "grad_norm": 0.625, "learning_rate": 1.2744744694770859e-05, "loss": 3.973, "step": 8281 }, { "epoch": 2.758890647122512, "grad_norm": 0.6640625, "learning_rate": 1.2744166054123155e-05, "loss": 3.992, "step": 8282 }, { "epoch": 2.7592237861247604, "grad_norm": 0.6015625, "learning_rate": 1.2743587352392273e-05, "loss": 4.0572, "step": 8283 }, { "epoch": 2.759556925127009, "grad_norm": 0.625, "learning_rate": 1.2743008589584957e-05, "loss": 4.0692, "step": 8284 }, { "epoch": 2.759890064129258, "grad_norm": 0.59765625, "learning_rate": 1.2742429765707946e-05, "loss": 4.0706, "step": 8285 }, { "epoch": 2.7602232031315066, "grad_norm": 0.609375, "learning_rate": 1.2741850880767985e-05, "loss": 4.043, "step": 8286 }, { "epoch": 2.7605563421337553, "grad_norm": 0.65625, "learning_rate": 1.2741271934771817e-05, "loss": 3.9813, "step": 8287 }, { "epoch": 2.760889481136004, "grad_norm": 0.61328125, "learning_rate": 1.274069292772618e-05, "loss": 3.9858, "step": 8288 }, { "epoch": 2.761222620138253, "grad_norm": 0.609375, "learning_rate": 1.2740113859637822e-05, "loss": 4.09, "step": 8289 }, { "epoch": 2.761555759140501, "grad_norm": 0.640625, "learning_rate": 1.2739534730513488e-05, "loss": 4.1237, "step": 8290 }, { "epoch": 2.7618888981427503, "grad_norm": 0.62890625, "learning_rate": 1.2738955540359923e-05, "loss": 4.0341, "step": 8291 }, { "epoch": 2.7622220371449986, "grad_norm": 0.6171875, "learning_rate": 1.2738376289183874e-05, "loss": 4.0031, "step": 8292 }, { "epoch": 2.7625551761472473, "grad_norm": 0.6171875, "learning_rate": 1.2737796976992088e-05, "loss": 4.0333, "step": 8293 }, { "epoch": 2.762888315149496, "grad_norm": 0.609375, "learning_rate": 1.2737217603791313e-05, "loss": 4.0142, "step": 8294 }, { "epoch": 2.763221454151745, "grad_norm": 0.58984375, "learning_rate": 1.2736638169588293e-05, "loss": 3.9766, "step": 8295 }, { "epoch": 2.7635545931539935, "grad_norm": 0.6171875, "learning_rate": 1.2736058674389787e-05, "loss": 4.0819, "step": 8296 }, { "epoch": 2.7638877321562423, "grad_norm": 0.640625, "learning_rate": 1.2735479118202536e-05, "loss": 4.102, "step": 8297 }, { "epoch": 2.764220871158491, "grad_norm": 0.61328125, "learning_rate": 1.2734899501033291e-05, "loss": 4.0467, "step": 8298 }, { "epoch": 2.7645540101607393, "grad_norm": 0.62890625, "learning_rate": 1.2734319822888806e-05, "loss": 4.0245, "step": 8299 }, { "epoch": 2.7648871491629885, "grad_norm": 0.59765625, "learning_rate": 1.2733740083775833e-05, "loss": 4.051, "step": 8300 }, { "epoch": 2.765220288165237, "grad_norm": 0.640625, "learning_rate": 1.2733160283701125e-05, "loss": 4.0866, "step": 8301 }, { "epoch": 2.7655534271674855, "grad_norm": 0.5703125, "learning_rate": 1.2732580422671436e-05, "loss": 4.0439, "step": 8302 }, { "epoch": 2.7658865661697343, "grad_norm": 0.6171875, "learning_rate": 1.2732000500693517e-05, "loss": 3.988, "step": 8303 }, { "epoch": 2.766219705171983, "grad_norm": 0.6015625, "learning_rate": 1.2731420517774125e-05, "loss": 4.0125, "step": 8304 }, { "epoch": 2.7665528441742318, "grad_norm": 0.60546875, "learning_rate": 1.2730840473920016e-05, "loss": 4.0495, "step": 8305 }, { "epoch": 2.7668859831764805, "grad_norm": 0.59375, "learning_rate": 1.2730260369137944e-05, "loss": 4.0266, "step": 8306 }, { "epoch": 2.7672191221787292, "grad_norm": 0.6328125, "learning_rate": 1.2729680203434667e-05, "loss": 3.9425, "step": 8307 }, { "epoch": 2.7675522611809775, "grad_norm": 0.64453125, "learning_rate": 1.2729099976816945e-05, "loss": 4.0109, "step": 8308 }, { "epoch": 2.7678854001832267, "grad_norm": 0.671875, "learning_rate": 1.2728519689291535e-05, "loss": 4.0055, "step": 8309 }, { "epoch": 2.768218539185475, "grad_norm": 0.62890625, "learning_rate": 1.2727939340865195e-05, "loss": 3.9566, "step": 8310 }, { "epoch": 2.7685516781877237, "grad_norm": 0.62890625, "learning_rate": 1.2727358931544683e-05, "loss": 4.1327, "step": 8311 }, { "epoch": 2.7688848171899725, "grad_norm": 0.61328125, "learning_rate": 1.2726778461336764e-05, "loss": 4.0528, "step": 8312 }, { "epoch": 2.769217956192221, "grad_norm": 0.59765625, "learning_rate": 1.2726197930248195e-05, "loss": 4.0459, "step": 8313 }, { "epoch": 2.76955109519447, "grad_norm": 0.63671875, "learning_rate": 1.2725617338285745e-05, "loss": 4.0352, "step": 8314 }, { "epoch": 2.7698842341967187, "grad_norm": 0.6015625, "learning_rate": 1.2725036685456167e-05, "loss": 3.9418, "step": 8315 }, { "epoch": 2.7702173731989674, "grad_norm": 0.65625, "learning_rate": 1.2724455971766233e-05, "loss": 4.004, "step": 8316 }, { "epoch": 2.7705505122012157, "grad_norm": 0.58203125, "learning_rate": 1.27238751972227e-05, "loss": 4.0523, "step": 8317 }, { "epoch": 2.770883651203465, "grad_norm": 0.6171875, "learning_rate": 1.2723294361832334e-05, "loss": 4.0365, "step": 8318 }, { "epoch": 2.771216790205713, "grad_norm": 0.625, "learning_rate": 1.2722713465601907e-05, "loss": 4.0662, "step": 8319 }, { "epoch": 2.771549929207962, "grad_norm": 0.59765625, "learning_rate": 1.2722132508538177e-05, "loss": 4.025, "step": 8320 }, { "epoch": 2.7718830682102107, "grad_norm": 0.6328125, "learning_rate": 1.2721551490647917e-05, "loss": 4.0324, "step": 8321 }, { "epoch": 2.7722162072124594, "grad_norm": 0.61328125, "learning_rate": 1.272097041193789e-05, "loss": 4.0732, "step": 8322 }, { "epoch": 2.772549346214708, "grad_norm": 0.65234375, "learning_rate": 1.2720389272414867e-05, "loss": 3.9957, "step": 8323 }, { "epoch": 2.772882485216957, "grad_norm": 0.62890625, "learning_rate": 1.2719808072085617e-05, "loss": 3.9982, "step": 8324 }, { "epoch": 2.7732156242192056, "grad_norm": 0.63671875, "learning_rate": 1.2719226810956907e-05, "loss": 3.9672, "step": 8325 }, { "epoch": 2.773548763221454, "grad_norm": 0.62109375, "learning_rate": 1.271864548903551e-05, "loss": 4.0242, "step": 8326 }, { "epoch": 2.7738819022237027, "grad_norm": 0.58203125, "learning_rate": 1.2718064106328198e-05, "loss": 4.0243, "step": 8327 }, { "epoch": 2.7742150412259514, "grad_norm": 0.6015625, "learning_rate": 1.271748266284174e-05, "loss": 4.0534, "step": 8328 }, { "epoch": 2.7745481802282, "grad_norm": 0.5625, "learning_rate": 1.271690115858291e-05, "loss": 4.0633, "step": 8329 }, { "epoch": 2.774881319230449, "grad_norm": 0.62890625, "learning_rate": 1.2716319593558483e-05, "loss": 3.9362, "step": 8330 }, { "epoch": 2.7752144582326976, "grad_norm": 0.60546875, "learning_rate": 1.2715737967775231e-05, "loss": 4.0438, "step": 8331 }, { "epoch": 2.7755475972349464, "grad_norm": 0.62890625, "learning_rate": 1.2715156281239929e-05, "loss": 3.9958, "step": 8332 }, { "epoch": 2.775880736237195, "grad_norm": 0.62890625, "learning_rate": 1.2714574533959352e-05, "loss": 4.0419, "step": 8333 }, { "epoch": 2.776213875239444, "grad_norm": 0.625, "learning_rate": 1.2713992725940274e-05, "loss": 4.0821, "step": 8334 }, { "epoch": 2.776547014241692, "grad_norm": 0.6484375, "learning_rate": 1.2713410857189477e-05, "loss": 4.0452, "step": 8335 }, { "epoch": 2.776880153243941, "grad_norm": 0.61328125, "learning_rate": 1.2712828927713736e-05, "loss": 4.0687, "step": 8336 }, { "epoch": 2.7772132922461896, "grad_norm": 0.609375, "learning_rate": 1.2712246937519828e-05, "loss": 3.9868, "step": 8337 }, { "epoch": 2.7775464312484384, "grad_norm": 0.578125, "learning_rate": 1.2711664886614535e-05, "loss": 4.1473, "step": 8338 }, { "epoch": 2.777879570250687, "grad_norm": 0.64453125, "learning_rate": 1.271108277500463e-05, "loss": 3.9759, "step": 8339 }, { "epoch": 2.778212709252936, "grad_norm": 0.6328125, "learning_rate": 1.2710500602696901e-05, "loss": 3.9602, "step": 8340 }, { "epoch": 2.7785458482551846, "grad_norm": 0.65234375, "learning_rate": 1.2709918369698127e-05, "loss": 3.9957, "step": 8341 }, { "epoch": 2.7788789872574333, "grad_norm": 0.64453125, "learning_rate": 1.2709336076015087e-05, "loss": 4.0721, "step": 8342 }, { "epoch": 2.779212126259682, "grad_norm": 0.60546875, "learning_rate": 1.2708753721654564e-05, "loss": 4.01, "step": 8343 }, { "epoch": 2.7795452652619304, "grad_norm": 0.6171875, "learning_rate": 1.2708171306623342e-05, "loss": 4.0447, "step": 8344 }, { "epoch": 2.779878404264179, "grad_norm": 0.58984375, "learning_rate": 1.2707588830928205e-05, "loss": 3.9504, "step": 8345 }, { "epoch": 2.780211543266428, "grad_norm": 0.6171875, "learning_rate": 1.2707006294575942e-05, "loss": 4.0339, "step": 8346 }, { "epoch": 2.7805446822686766, "grad_norm": 0.63671875, "learning_rate": 1.2706423697573332e-05, "loss": 4.0056, "step": 8347 }, { "epoch": 2.7808778212709253, "grad_norm": 0.60546875, "learning_rate": 1.2705841039927161e-05, "loss": 4.0789, "step": 8348 }, { "epoch": 2.781210960273174, "grad_norm": 0.6484375, "learning_rate": 1.270525832164422e-05, "loss": 3.9869, "step": 8349 }, { "epoch": 2.781544099275423, "grad_norm": 0.609375, "learning_rate": 1.2704675542731293e-05, "loss": 4.0553, "step": 8350 }, { "epoch": 2.781877238277671, "grad_norm": 0.59375, "learning_rate": 1.270409270319517e-05, "loss": 3.9955, "step": 8351 }, { "epoch": 2.7822103772799203, "grad_norm": 0.609375, "learning_rate": 1.2703509803042638e-05, "loss": 4.0266, "step": 8352 }, { "epoch": 2.7825435162821686, "grad_norm": 0.6171875, "learning_rate": 1.2702926842280487e-05, "loss": 4.0232, "step": 8353 }, { "epoch": 2.7828766552844173, "grad_norm": 0.65234375, "learning_rate": 1.270234382091551e-05, "loss": 3.9716, "step": 8354 }, { "epoch": 2.783209794286666, "grad_norm": 0.6328125, "learning_rate": 1.2701760738954494e-05, "loss": 4.0841, "step": 8355 }, { "epoch": 2.783542933288915, "grad_norm": 0.59375, "learning_rate": 1.2701177596404233e-05, "loss": 4.1029, "step": 8356 }, { "epoch": 2.7838760722911635, "grad_norm": 0.62109375, "learning_rate": 1.2700594393271518e-05, "loss": 3.9972, "step": 8357 }, { "epoch": 2.7842092112934123, "grad_norm": 0.61328125, "learning_rate": 1.2700011129563143e-05, "loss": 4.0495, "step": 8358 }, { "epoch": 2.784542350295661, "grad_norm": 0.6171875, "learning_rate": 1.2699427805285903e-05, "loss": 4.0216, "step": 8359 }, { "epoch": 2.7848754892979093, "grad_norm": 0.61328125, "learning_rate": 1.269884442044659e-05, "loss": 3.9711, "step": 8360 }, { "epoch": 2.7852086283001585, "grad_norm": 0.60546875, "learning_rate": 1.2698260975052e-05, "loss": 4.057, "step": 8361 }, { "epoch": 2.7855417673024068, "grad_norm": 0.60546875, "learning_rate": 1.269767746910893e-05, "loss": 3.9762, "step": 8362 }, { "epoch": 2.7858749063046555, "grad_norm": 0.640625, "learning_rate": 1.2697093902624174e-05, "loss": 4.0316, "step": 8363 }, { "epoch": 2.7862080453069042, "grad_norm": 0.6796875, "learning_rate": 1.2696510275604531e-05, "loss": 3.936, "step": 8364 }, { "epoch": 2.786541184309153, "grad_norm": 0.6171875, "learning_rate": 1.26959265880568e-05, "loss": 4.0456, "step": 8365 }, { "epoch": 2.7868743233114017, "grad_norm": 0.64453125, "learning_rate": 1.2695342839987778e-05, "loss": 3.9905, "step": 8366 }, { "epoch": 2.7872074623136505, "grad_norm": 0.62890625, "learning_rate": 1.2694759031404265e-05, "loss": 4.0643, "step": 8367 }, { "epoch": 2.787540601315899, "grad_norm": 0.609375, "learning_rate": 1.2694175162313062e-05, "loss": 4.045, "step": 8368 }, { "epoch": 2.7878737403181475, "grad_norm": 0.6328125, "learning_rate": 1.269359123272097e-05, "loss": 4.0199, "step": 8369 }, { "epoch": 2.7882068793203967, "grad_norm": 0.5859375, "learning_rate": 1.2693007242634786e-05, "loss": 3.9727, "step": 8370 }, { "epoch": 2.788540018322645, "grad_norm": 0.60546875, "learning_rate": 1.269242319206132e-05, "loss": 3.9968, "step": 8371 }, { "epoch": 2.7888731573248937, "grad_norm": 0.65234375, "learning_rate": 1.2691839081007367e-05, "loss": 4.0119, "step": 8372 }, { "epoch": 2.7892062963271425, "grad_norm": 0.625, "learning_rate": 1.2691254909479734e-05, "loss": 4.042, "step": 8373 }, { "epoch": 2.789539435329391, "grad_norm": 0.61328125, "learning_rate": 1.2690670677485227e-05, "loss": 4.0843, "step": 8374 }, { "epoch": 2.78987257433164, "grad_norm": 0.6015625, "learning_rate": 1.269008638503065e-05, "loss": 4.0634, "step": 8375 }, { "epoch": 2.7902057133338887, "grad_norm": 0.6484375, "learning_rate": 1.2689502032122806e-05, "loss": 4.0578, "step": 8376 }, { "epoch": 2.7905388523361374, "grad_norm": 0.6328125, "learning_rate": 1.2688917618768506e-05, "loss": 4.0389, "step": 8377 }, { "epoch": 2.7908719913383857, "grad_norm": 0.5859375, "learning_rate": 1.2688333144974555e-05, "loss": 4.0567, "step": 8378 }, { "epoch": 2.791205130340635, "grad_norm": 0.58984375, "learning_rate": 1.268774861074776e-05, "loss": 4.0145, "step": 8379 }, { "epoch": 2.791538269342883, "grad_norm": 0.625, "learning_rate": 1.2687164016094931e-05, "loss": 4.042, "step": 8380 }, { "epoch": 2.791871408345132, "grad_norm": 0.671875, "learning_rate": 1.2686579361022875e-05, "loss": 4.0251, "step": 8381 }, { "epoch": 2.7922045473473807, "grad_norm": 0.609375, "learning_rate": 1.2685994645538404e-05, "loss": 4.0363, "step": 8382 }, { "epoch": 2.7925376863496294, "grad_norm": 0.6171875, "learning_rate": 1.268540986964833e-05, "loss": 4.0574, "step": 8383 }, { "epoch": 2.792870825351878, "grad_norm": 0.58203125, "learning_rate": 1.2684825033359462e-05, "loss": 3.9915, "step": 8384 }, { "epoch": 2.793203964354127, "grad_norm": 0.59375, "learning_rate": 1.2684240136678611e-05, "loss": 4.0177, "step": 8385 }, { "epoch": 2.7935371033563756, "grad_norm": 0.65625, "learning_rate": 1.2683655179612593e-05, "loss": 4.0465, "step": 8386 }, { "epoch": 2.793870242358624, "grad_norm": 0.59765625, "learning_rate": 1.268307016216822e-05, "loss": 4.0308, "step": 8387 }, { "epoch": 2.794203381360873, "grad_norm": 0.640625, "learning_rate": 1.2682485084352306e-05, "loss": 4.0229, "step": 8388 }, { "epoch": 2.7945365203631214, "grad_norm": 0.6484375, "learning_rate": 1.2681899946171666e-05, "loss": 4.0308, "step": 8389 }, { "epoch": 2.79486965936537, "grad_norm": 0.60546875, "learning_rate": 1.2681314747633116e-05, "loss": 4.083, "step": 8390 }, { "epoch": 2.795202798367619, "grad_norm": 0.6171875, "learning_rate": 1.2680729488743469e-05, "loss": 4.0011, "step": 8391 }, { "epoch": 2.7955359373698676, "grad_norm": 0.60546875, "learning_rate": 1.2680144169509546e-05, "loss": 4.0305, "step": 8392 }, { "epoch": 2.7958690763721163, "grad_norm": 0.609375, "learning_rate": 1.2679558789938168e-05, "loss": 3.9778, "step": 8393 }, { "epoch": 2.796202215374365, "grad_norm": 0.6015625, "learning_rate": 1.2678973350036143e-05, "loss": 3.9954, "step": 8394 }, { "epoch": 2.796535354376614, "grad_norm": 0.6015625, "learning_rate": 1.26783878498103e-05, "loss": 4.0552, "step": 8395 }, { "epoch": 2.796868493378862, "grad_norm": 0.609375, "learning_rate": 1.2677802289267453e-05, "loss": 4.0426, "step": 8396 }, { "epoch": 2.797201632381111, "grad_norm": 0.6171875, "learning_rate": 1.2677216668414427e-05, "loss": 4.0807, "step": 8397 }, { "epoch": 2.7975347713833596, "grad_norm": 0.62890625, "learning_rate": 1.2676630987258037e-05, "loss": 4.0038, "step": 8398 }, { "epoch": 2.7978679103856083, "grad_norm": 0.60546875, "learning_rate": 1.267604524580511e-05, "loss": 4.0446, "step": 8399 }, { "epoch": 2.798201049387857, "grad_norm": 0.640625, "learning_rate": 1.2675459444062468e-05, "loss": 4.0106, "step": 8400 }, { "epoch": 2.798534188390106, "grad_norm": 0.59765625, "learning_rate": 1.2674873582036932e-05, "loss": 4.0475, "step": 8401 }, { "epoch": 2.7988673273923546, "grad_norm": 0.6015625, "learning_rate": 1.2674287659735329e-05, "loss": 3.9806, "step": 8402 }, { "epoch": 2.7992004663946033, "grad_norm": 0.60546875, "learning_rate": 1.2673701677164482e-05, "loss": 3.9601, "step": 8403 }, { "epoch": 2.799533605396852, "grad_norm": 0.63671875, "learning_rate": 1.2673115634331217e-05, "loss": 4.0209, "step": 8404 }, { "epoch": 2.7998667443991003, "grad_norm": 0.60546875, "learning_rate": 1.2672529531242358e-05, "loss": 3.9616, "step": 8405 }, { "epoch": 2.800199883401349, "grad_norm": 0.59765625, "learning_rate": 1.2671943367904736e-05, "loss": 3.9939, "step": 8406 }, { "epoch": 2.800533022403598, "grad_norm": 0.60546875, "learning_rate": 1.2671357144325174e-05, "loss": 4.0296, "step": 8407 }, { "epoch": 2.8008661614058465, "grad_norm": 0.61328125, "learning_rate": 1.2670770860510504e-05, "loss": 3.9848, "step": 8408 }, { "epoch": 2.8011993004080953, "grad_norm": 0.59375, "learning_rate": 1.2670184516467554e-05, "loss": 4.0429, "step": 8409 }, { "epoch": 2.801532439410344, "grad_norm": 0.6015625, "learning_rate": 1.2669598112203152e-05, "loss": 4.0531, "step": 8410 }, { "epoch": 2.8018655784125928, "grad_norm": 0.640625, "learning_rate": 1.266901164772413e-05, "loss": 3.9708, "step": 8411 }, { "epoch": 2.8021987174148415, "grad_norm": 0.63671875, "learning_rate": 1.2668425123037315e-05, "loss": 4.0424, "step": 8412 }, { "epoch": 2.8025318564170902, "grad_norm": 0.59765625, "learning_rate": 1.2667838538149545e-05, "loss": 4.0305, "step": 8413 }, { "epoch": 2.8028649954193385, "grad_norm": 0.640625, "learning_rate": 1.266725189306765e-05, "loss": 3.9527, "step": 8414 }, { "epoch": 2.8031981344215873, "grad_norm": 0.58984375, "learning_rate": 1.2666665187798461e-05, "loss": 4.0045, "step": 8415 }, { "epoch": 2.803531273423836, "grad_norm": 0.58203125, "learning_rate": 1.2666078422348815e-05, "loss": 4.0208, "step": 8416 }, { "epoch": 2.8038644124260848, "grad_norm": 0.58203125, "learning_rate": 1.2665491596725544e-05, "loss": 4.109, "step": 8417 }, { "epoch": 2.8041975514283335, "grad_norm": 0.6328125, "learning_rate": 1.2664904710935485e-05, "loss": 4.0195, "step": 8418 }, { "epoch": 2.8045306904305822, "grad_norm": 0.58984375, "learning_rate": 1.2664317764985472e-05, "loss": 4.0259, "step": 8419 }, { "epoch": 2.804863829432831, "grad_norm": 0.64453125, "learning_rate": 1.2663730758882346e-05, "loss": 4.0693, "step": 8420 }, { "epoch": 2.8051969684350793, "grad_norm": 0.61328125, "learning_rate": 1.2663143692632939e-05, "loss": 4.0552, "step": 8421 }, { "epoch": 2.8055301074373284, "grad_norm": 0.59765625, "learning_rate": 1.2662556566244091e-05, "loss": 4.0341, "step": 8422 }, { "epoch": 2.8058632464395767, "grad_norm": 0.61328125, "learning_rate": 1.2661969379722641e-05, "loss": 4.0601, "step": 8423 }, { "epoch": 2.8061963854418255, "grad_norm": 0.62109375, "learning_rate": 1.2661382133075432e-05, "loss": 4.0419, "step": 8424 }, { "epoch": 2.806529524444074, "grad_norm": 0.62109375, "learning_rate": 1.2660794826309296e-05, "loss": 4.0612, "step": 8425 }, { "epoch": 2.806862663446323, "grad_norm": 0.65234375, "learning_rate": 1.2660207459431083e-05, "loss": 4.0016, "step": 8426 }, { "epoch": 2.8071958024485717, "grad_norm": 0.63671875, "learning_rate": 1.2659620032447626e-05, "loss": 4.0041, "step": 8427 }, { "epoch": 2.8075289414508204, "grad_norm": 0.61328125, "learning_rate": 1.2659032545365773e-05, "loss": 3.9853, "step": 8428 }, { "epoch": 2.807862080453069, "grad_norm": 0.59375, "learning_rate": 1.2658444998192363e-05, "loss": 3.9965, "step": 8429 }, { "epoch": 2.8081952194553175, "grad_norm": 0.61328125, "learning_rate": 1.2657857390934245e-05, "loss": 4.0295, "step": 8430 }, { "epoch": 2.8085283584575667, "grad_norm": 0.61328125, "learning_rate": 1.2657269723598261e-05, "loss": 3.9966, "step": 8431 }, { "epoch": 2.808861497459815, "grad_norm": 0.640625, "learning_rate": 1.2656681996191253e-05, "loss": 4.0485, "step": 8432 }, { "epoch": 2.8091946364620637, "grad_norm": 0.625, "learning_rate": 1.2656094208720071e-05, "loss": 4.0424, "step": 8433 }, { "epoch": 2.8095277754643124, "grad_norm": 0.609375, "learning_rate": 1.2655506361191558e-05, "loss": 4.0553, "step": 8434 }, { "epoch": 2.809860914466561, "grad_norm": 0.59765625, "learning_rate": 1.2654918453612563e-05, "loss": 4.0564, "step": 8435 }, { "epoch": 2.81019405346881, "grad_norm": 0.6015625, "learning_rate": 1.2654330485989933e-05, "loss": 4.0565, "step": 8436 }, { "epoch": 2.8105271924710586, "grad_norm": 0.58984375, "learning_rate": 1.265374245833052e-05, "loss": 4.0093, "step": 8437 }, { "epoch": 2.8108603314733074, "grad_norm": 0.62109375, "learning_rate": 1.2653154370641166e-05, "loss": 4.0962, "step": 8438 }, { "epoch": 2.8111934704755557, "grad_norm": 0.65625, "learning_rate": 1.2652566222928726e-05, "loss": 3.9637, "step": 8439 }, { "epoch": 2.811526609477805, "grad_norm": 0.6328125, "learning_rate": 1.2651978015200054e-05, "loss": 4.0232, "step": 8440 }, { "epoch": 2.811859748480053, "grad_norm": 0.60546875, "learning_rate": 1.2651389747461994e-05, "loss": 4.0787, "step": 8441 }, { "epoch": 2.812192887482302, "grad_norm": 0.61328125, "learning_rate": 1.2650801419721402e-05, "loss": 4.051, "step": 8442 }, { "epoch": 2.8125260264845506, "grad_norm": 0.59375, "learning_rate": 1.265021303198513e-05, "loss": 3.9982, "step": 8443 }, { "epoch": 2.8128591654867994, "grad_norm": 0.609375, "learning_rate": 1.2649624584260031e-05, "loss": 4.003, "step": 8444 }, { "epoch": 2.813192304489048, "grad_norm": 0.625, "learning_rate": 1.2649036076552961e-05, "loss": 3.983, "step": 8445 }, { "epoch": 2.813525443491297, "grad_norm": 0.5859375, "learning_rate": 1.2648447508870773e-05, "loss": 3.987, "step": 8446 }, { "epoch": 2.8138585824935456, "grad_norm": 0.60546875, "learning_rate": 1.2647858881220322e-05, "loss": 4.0662, "step": 8447 }, { "epoch": 2.814191721495794, "grad_norm": 0.58203125, "learning_rate": 1.2647270193608467e-05, "loss": 4.0141, "step": 8448 }, { "epoch": 2.814524860498043, "grad_norm": 0.60546875, "learning_rate": 1.2646681446042061e-05, "loss": 3.9939, "step": 8449 }, { "epoch": 2.8148579995002914, "grad_norm": 0.63671875, "learning_rate": 1.2646092638527965e-05, "loss": 4.0278, "step": 8450 }, { "epoch": 2.81519113850254, "grad_norm": 0.609375, "learning_rate": 1.2645503771073038e-05, "loss": 4.0384, "step": 8451 }, { "epoch": 2.815524277504789, "grad_norm": 0.6171875, "learning_rate": 1.2644914843684135e-05, "loss": 4.0897, "step": 8452 }, { "epoch": 2.8158574165070376, "grad_norm": 0.62890625, "learning_rate": 1.2644325856368118e-05, "loss": 3.9884, "step": 8453 }, { "epoch": 2.8161905555092863, "grad_norm": 0.60546875, "learning_rate": 1.2643736809131849e-05, "loss": 4.0977, "step": 8454 }, { "epoch": 2.816523694511535, "grad_norm": 0.6015625, "learning_rate": 1.2643147701982185e-05, "loss": 4.0516, "step": 8455 }, { "epoch": 2.816856833513784, "grad_norm": 0.66015625, "learning_rate": 1.2642558534925994e-05, "loss": 4.0852, "step": 8456 }, { "epoch": 2.817189972516032, "grad_norm": 0.58984375, "learning_rate": 1.2641969307970131e-05, "loss": 4.0536, "step": 8457 }, { "epoch": 2.8175231115182813, "grad_norm": 0.61328125, "learning_rate": 1.2641380021121465e-05, "loss": 4.0493, "step": 8458 }, { "epoch": 2.8178562505205296, "grad_norm": 0.6484375, "learning_rate": 1.2640790674386857e-05, "loss": 4.0033, "step": 8459 }, { "epoch": 2.8181893895227783, "grad_norm": 0.62109375, "learning_rate": 1.2640201267773172e-05, "loss": 4.0103, "step": 8460 }, { "epoch": 2.818522528525027, "grad_norm": 0.6015625, "learning_rate": 1.2639611801287278e-05, "loss": 3.9974, "step": 8461 }, { "epoch": 2.818855667527276, "grad_norm": 0.62109375, "learning_rate": 1.2639022274936038e-05, "loss": 4.0786, "step": 8462 }, { "epoch": 2.8191888065295245, "grad_norm": 0.6015625, "learning_rate": 1.263843268872632e-05, "loss": 3.9934, "step": 8463 }, { "epoch": 2.8195219455317733, "grad_norm": 0.6015625, "learning_rate": 1.263784304266499e-05, "loss": 4.0317, "step": 8464 }, { "epoch": 2.819855084534022, "grad_norm": 0.625, "learning_rate": 1.2637253336758919e-05, "loss": 4.0415, "step": 8465 }, { "epoch": 2.8201882235362703, "grad_norm": 0.6171875, "learning_rate": 1.263666357101497e-05, "loss": 3.9995, "step": 8466 }, { "epoch": 2.820521362538519, "grad_norm": 0.59375, "learning_rate": 1.263607374544002e-05, "loss": 3.9611, "step": 8467 }, { "epoch": 2.820854501540768, "grad_norm": 0.65625, "learning_rate": 1.2635483860040934e-05, "loss": 3.9624, "step": 8468 }, { "epoch": 2.8211876405430165, "grad_norm": 0.609375, "learning_rate": 1.2634893914824583e-05, "loss": 3.9227, "step": 8469 }, { "epoch": 2.8215207795452653, "grad_norm": 0.6484375, "learning_rate": 1.2634303909797843e-05, "loss": 4.0387, "step": 8470 }, { "epoch": 2.821853918547514, "grad_norm": 0.61328125, "learning_rate": 1.2633713844967584e-05, "loss": 4.0078, "step": 8471 }, { "epoch": 2.8221870575497627, "grad_norm": 0.609375, "learning_rate": 1.2633123720340677e-05, "loss": 4.1058, "step": 8472 }, { "epoch": 2.8225201965520115, "grad_norm": 0.6171875, "learning_rate": 1.2632533535923995e-05, "loss": 4.0542, "step": 8473 }, { "epoch": 2.82285333555426, "grad_norm": 0.625, "learning_rate": 1.2631943291724416e-05, "loss": 4.051, "step": 8474 }, { "epoch": 2.8231864745565085, "grad_norm": 0.66015625, "learning_rate": 1.2631352987748812e-05, "loss": 3.9985, "step": 8475 }, { "epoch": 2.8235196135587572, "grad_norm": 0.58984375, "learning_rate": 1.2630762624004063e-05, "loss": 4.0239, "step": 8476 }, { "epoch": 2.823852752561006, "grad_norm": 0.60546875, "learning_rate": 1.2630172200497042e-05, "loss": 3.9833, "step": 8477 }, { "epoch": 2.8241858915632547, "grad_norm": 0.61328125, "learning_rate": 1.2629581717234626e-05, "loss": 4.0431, "step": 8478 }, { "epoch": 2.8245190305655035, "grad_norm": 0.6015625, "learning_rate": 1.2628991174223692e-05, "loss": 4.0218, "step": 8479 }, { "epoch": 2.824852169567752, "grad_norm": 0.6484375, "learning_rate": 1.2628400571471123e-05, "loss": 4.0331, "step": 8480 }, { "epoch": 2.825185308570001, "grad_norm": 0.59765625, "learning_rate": 1.2627809908983794e-05, "loss": 3.9355, "step": 8481 }, { "epoch": 2.8255184475722497, "grad_norm": 0.6328125, "learning_rate": 1.2627219186768587e-05, "loss": 4.0092, "step": 8482 }, { "epoch": 2.8258515865744984, "grad_norm": 0.6015625, "learning_rate": 1.2626628404832382e-05, "loss": 4.1336, "step": 8483 }, { "epoch": 2.8261847255767467, "grad_norm": 0.625, "learning_rate": 1.2626037563182058e-05, "loss": 3.9162, "step": 8484 }, { "epoch": 2.8265178645789955, "grad_norm": 0.60546875, "learning_rate": 1.2625446661824503e-05, "loss": 4.043, "step": 8485 }, { "epoch": 2.826851003581244, "grad_norm": 0.609375, "learning_rate": 1.2624855700766593e-05, "loss": 4.0184, "step": 8486 }, { "epoch": 2.827184142583493, "grad_norm": 0.609375, "learning_rate": 1.2624264680015218e-05, "loss": 4.0372, "step": 8487 }, { "epoch": 2.8275172815857417, "grad_norm": 0.62109375, "learning_rate": 1.2623673599577256e-05, "loss": 3.9765, "step": 8488 }, { "epoch": 2.8278504205879904, "grad_norm": 0.66015625, "learning_rate": 1.2623082459459595e-05, "loss": 4.0573, "step": 8489 }, { "epoch": 2.828183559590239, "grad_norm": 0.62109375, "learning_rate": 1.262249125966912e-05, "loss": 4.0317, "step": 8490 }, { "epoch": 2.8285166985924874, "grad_norm": 0.640625, "learning_rate": 1.2621900000212717e-05, "loss": 4.0154, "step": 8491 }, { "epoch": 2.8288498375947366, "grad_norm": 0.6015625, "learning_rate": 1.2621308681097273e-05, "loss": 4.0588, "step": 8492 }, { "epoch": 2.829182976596985, "grad_norm": 0.62890625, "learning_rate": 1.2620717302329675e-05, "loss": 4.1135, "step": 8493 }, { "epoch": 2.8295161155992337, "grad_norm": 0.61328125, "learning_rate": 1.2620125863916814e-05, "loss": 3.9634, "step": 8494 }, { "epoch": 2.8298492546014824, "grad_norm": 0.62890625, "learning_rate": 1.2619534365865575e-05, "loss": 3.9838, "step": 8495 }, { "epoch": 2.830182393603731, "grad_norm": 0.609375, "learning_rate": 1.2618942808182851e-05, "loss": 3.9423, "step": 8496 }, { "epoch": 2.83051553260598, "grad_norm": 0.58203125, "learning_rate": 1.261835119087553e-05, "loss": 4.0871, "step": 8497 }, { "epoch": 2.8308486716082286, "grad_norm": 0.62890625, "learning_rate": 1.2617759513950503e-05, "loss": 4.0267, "step": 8498 }, { "epoch": 2.8311818106104774, "grad_norm": 0.625, "learning_rate": 1.2617167777414664e-05, "loss": 3.9801, "step": 8499 }, { "epoch": 2.8315149496127257, "grad_norm": 0.640625, "learning_rate": 1.2616575981274902e-05, "loss": 4.0281, "step": 8500 }, { "epoch": 2.831848088614975, "grad_norm": 0.61328125, "learning_rate": 1.2615984125538115e-05, "loss": 3.9744, "step": 8501 }, { "epoch": 2.832181227617223, "grad_norm": 0.63671875, "learning_rate": 1.2615392210211191e-05, "loss": 3.9917, "step": 8502 }, { "epoch": 2.832514366619472, "grad_norm": 0.63671875, "learning_rate": 1.2614800235301031e-05, "loss": 4.0308, "step": 8503 }, { "epoch": 2.8328475056217206, "grad_norm": 0.63671875, "learning_rate": 1.2614208200814526e-05, "loss": 4.0369, "step": 8504 }, { "epoch": 2.8331806446239693, "grad_norm": 0.60546875, "learning_rate": 1.2613616106758572e-05, "loss": 4.0508, "step": 8505 }, { "epoch": 2.833513783626218, "grad_norm": 0.62890625, "learning_rate": 1.2613023953140069e-05, "loss": 3.9812, "step": 8506 }, { "epoch": 2.833846922628467, "grad_norm": 0.6171875, "learning_rate": 1.2612431739965909e-05, "loss": 4.0823, "step": 8507 }, { "epoch": 2.8341800616307156, "grad_norm": 0.640625, "learning_rate": 1.2611839467242993e-05, "loss": 4.0496, "step": 8508 }, { "epoch": 2.834513200632964, "grad_norm": 0.6328125, "learning_rate": 1.261124713497822e-05, "loss": 4.0607, "step": 8509 }, { "epoch": 2.834846339635213, "grad_norm": 0.64453125, "learning_rate": 1.2610654743178489e-05, "loss": 4.0084, "step": 8510 }, { "epoch": 2.8351794786374613, "grad_norm": 0.625, "learning_rate": 1.2610062291850698e-05, "loss": 3.9891, "step": 8511 }, { "epoch": 2.83551261763971, "grad_norm": 0.59765625, "learning_rate": 1.2609469781001752e-05, "loss": 4.055, "step": 8512 }, { "epoch": 2.835845756641959, "grad_norm": 0.61328125, "learning_rate": 1.2608877210638548e-05, "loss": 4.0559, "step": 8513 }, { "epoch": 2.8361788956442076, "grad_norm": 0.59765625, "learning_rate": 1.2608284580767992e-05, "loss": 4.0463, "step": 8514 }, { "epoch": 2.8365120346464563, "grad_norm": 0.61328125, "learning_rate": 1.2607691891396986e-05, "loss": 4.0067, "step": 8515 }, { "epoch": 2.836845173648705, "grad_norm": 0.609375, "learning_rate": 1.2607099142532432e-05, "loss": 4.0149, "step": 8516 }, { "epoch": 2.8371783126509538, "grad_norm": 0.609375, "learning_rate": 1.2606506334181234e-05, "loss": 4.0692, "step": 8517 }, { "epoch": 2.837511451653202, "grad_norm": 0.6328125, "learning_rate": 1.26059134663503e-05, "loss": 4.0363, "step": 8518 }, { "epoch": 2.8378445906554512, "grad_norm": 0.609375, "learning_rate": 1.260532053904653e-05, "loss": 3.9874, "step": 8519 }, { "epoch": 2.8381777296576995, "grad_norm": 0.62109375, "learning_rate": 1.2604727552276836e-05, "loss": 4.0559, "step": 8520 }, { "epoch": 2.8385108686599483, "grad_norm": 0.61328125, "learning_rate": 1.2604134506048123e-05, "loss": 4.0683, "step": 8521 }, { "epoch": 2.838844007662197, "grad_norm": 0.609375, "learning_rate": 1.26035414003673e-05, "loss": 4.0598, "step": 8522 }, { "epoch": 2.8391771466644458, "grad_norm": 0.61328125, "learning_rate": 1.260294823524127e-05, "loss": 4.0189, "step": 8523 }, { "epoch": 2.8395102856666945, "grad_norm": 0.6171875, "learning_rate": 1.2602355010676947e-05, "loss": 3.9596, "step": 8524 }, { "epoch": 2.8398434246689432, "grad_norm": 0.63671875, "learning_rate": 1.2601761726681243e-05, "loss": 3.9961, "step": 8525 }, { "epoch": 2.840176563671192, "grad_norm": 0.58984375, "learning_rate": 1.2601168383261063e-05, "loss": 3.9853, "step": 8526 }, { "epoch": 2.8405097026734403, "grad_norm": 0.59765625, "learning_rate": 1.2600574980423318e-05, "loss": 4.0451, "step": 8527 }, { "epoch": 2.8408428416756895, "grad_norm": 0.60546875, "learning_rate": 1.2599981518174926e-05, "loss": 4.0576, "step": 8528 }, { "epoch": 2.8411759806779378, "grad_norm": 0.60546875, "learning_rate": 1.2599387996522795e-05, "loss": 3.9483, "step": 8529 }, { "epoch": 2.8415091196801865, "grad_norm": 0.59765625, "learning_rate": 1.2598794415473839e-05, "loss": 4.0997, "step": 8530 }, { "epoch": 2.8418422586824352, "grad_norm": 0.62109375, "learning_rate": 1.2598200775034973e-05, "loss": 4.0241, "step": 8531 }, { "epoch": 2.842175397684684, "grad_norm": 0.62109375, "learning_rate": 1.2597607075213107e-05, "loss": 3.9751, "step": 8532 }, { "epoch": 2.8425085366869327, "grad_norm": 0.59765625, "learning_rate": 1.2597013316015164e-05, "loss": 3.9796, "step": 8533 }, { "epoch": 2.8428416756891814, "grad_norm": 0.57421875, "learning_rate": 1.2596419497448054e-05, "loss": 4.0511, "step": 8534 }, { "epoch": 2.84317481469143, "grad_norm": 0.64453125, "learning_rate": 1.2595825619518697e-05, "loss": 3.9638, "step": 8535 }, { "epoch": 2.8435079536936785, "grad_norm": 0.609375, "learning_rate": 1.2595231682234006e-05, "loss": 4.071, "step": 8536 }, { "epoch": 2.843841092695927, "grad_norm": 0.64453125, "learning_rate": 1.2594637685600906e-05, "loss": 4.0385, "step": 8537 }, { "epoch": 2.844174231698176, "grad_norm": 0.625, "learning_rate": 1.2594043629626311e-05, "loss": 4.0149, "step": 8538 }, { "epoch": 2.8445073707004247, "grad_norm": 0.60546875, "learning_rate": 1.2593449514317141e-05, "loss": 3.9729, "step": 8539 }, { "epoch": 2.8448405097026734, "grad_norm": 0.61328125, "learning_rate": 1.2592855339680317e-05, "loss": 4.0224, "step": 8540 }, { "epoch": 2.845173648704922, "grad_norm": 0.62890625, "learning_rate": 1.2592261105722757e-05, "loss": 4.0242, "step": 8541 }, { "epoch": 2.845506787707171, "grad_norm": 0.60546875, "learning_rate": 1.2591666812451386e-05, "loss": 3.9617, "step": 8542 }, { "epoch": 2.8458399267094197, "grad_norm": 0.609375, "learning_rate": 1.2591072459873127e-05, "loss": 3.9919, "step": 8543 }, { "epoch": 2.8461730657116684, "grad_norm": 0.65625, "learning_rate": 1.2590478047994898e-05, "loss": 4.0229, "step": 8544 }, { "epoch": 2.8465062047139167, "grad_norm": 0.6328125, "learning_rate": 1.258988357682363e-05, "loss": 4.0637, "step": 8545 }, { "epoch": 2.8468393437161654, "grad_norm": 0.59765625, "learning_rate": 1.258928904636624e-05, "loss": 4.0515, "step": 8546 }, { "epoch": 2.847172482718414, "grad_norm": 0.66796875, "learning_rate": 1.2588694456629657e-05, "loss": 4.0058, "step": 8547 }, { "epoch": 2.847505621720663, "grad_norm": 0.65234375, "learning_rate": 1.2588099807620804e-05, "loss": 4.0124, "step": 8548 }, { "epoch": 2.8478387607229116, "grad_norm": 0.63671875, "learning_rate": 1.2587505099346612e-05, "loss": 4.0102, "step": 8549 }, { "epoch": 2.8481718997251604, "grad_norm": 0.6015625, "learning_rate": 1.2586910331814002e-05, "loss": 4.0876, "step": 8550 }, { "epoch": 2.848505038727409, "grad_norm": 0.62109375, "learning_rate": 1.2586315505029904e-05, "loss": 4.0129, "step": 8551 }, { "epoch": 2.848838177729658, "grad_norm": 0.64453125, "learning_rate": 1.2585720619001253e-05, "loss": 3.9856, "step": 8552 }, { "epoch": 2.8491713167319066, "grad_norm": 0.625, "learning_rate": 1.2585125673734969e-05, "loss": 4.0507, "step": 8553 }, { "epoch": 2.849504455734155, "grad_norm": 0.61328125, "learning_rate": 1.2584530669237984e-05, "loss": 4.0437, "step": 8554 }, { "epoch": 2.8498375947364036, "grad_norm": 0.6640625, "learning_rate": 1.258393560551723e-05, "loss": 3.9922, "step": 8555 }, { "epoch": 2.8501707337386524, "grad_norm": 0.63671875, "learning_rate": 1.258334048257964e-05, "loss": 3.9972, "step": 8556 }, { "epoch": 2.850503872740901, "grad_norm": 0.6484375, "learning_rate": 1.2582745300432143e-05, "loss": 4.0529, "step": 8557 }, { "epoch": 2.85083701174315, "grad_norm": 0.58984375, "learning_rate": 1.2582150059081672e-05, "loss": 4.0493, "step": 8558 }, { "epoch": 2.8511701507453986, "grad_norm": 0.59765625, "learning_rate": 1.2581554758535161e-05, "loss": 3.9569, "step": 8559 }, { "epoch": 2.8515032897476473, "grad_norm": 0.59375, "learning_rate": 1.2580959398799546e-05, "loss": 3.9925, "step": 8560 }, { "epoch": 2.8518364287498956, "grad_norm": 0.62890625, "learning_rate": 1.2580363979881757e-05, "loss": 4.0314, "step": 8561 }, { "epoch": 2.852169567752145, "grad_norm": 0.61328125, "learning_rate": 1.257976850178873e-05, "loss": 4.0402, "step": 8562 }, { "epoch": 2.852502706754393, "grad_norm": 0.61328125, "learning_rate": 1.2579172964527405e-05, "loss": 4.0277, "step": 8563 }, { "epoch": 2.852835845756642, "grad_norm": 0.625, "learning_rate": 1.2578577368104717e-05, "loss": 4.0011, "step": 8564 }, { "epoch": 2.8531689847588906, "grad_norm": 0.62890625, "learning_rate": 1.25779817125276e-05, "loss": 4.0284, "step": 8565 }, { "epoch": 2.8535021237611393, "grad_norm": 0.6015625, "learning_rate": 1.2577385997802997e-05, "loss": 4.001, "step": 8566 }, { "epoch": 2.853835262763388, "grad_norm": 0.609375, "learning_rate": 1.2576790223937846e-05, "loss": 4.0532, "step": 8567 }, { "epoch": 2.854168401765637, "grad_norm": 0.62109375, "learning_rate": 1.2576194390939084e-05, "loss": 4.001, "step": 8568 }, { "epoch": 2.8545015407678855, "grad_norm": 0.6015625, "learning_rate": 1.257559849881365e-05, "loss": 4.04, "step": 8569 }, { "epoch": 2.854834679770134, "grad_norm": 0.6328125, "learning_rate": 1.257500254756849e-05, "loss": 4.0265, "step": 8570 }, { "epoch": 2.855167818772383, "grad_norm": 0.64453125, "learning_rate": 1.2574406537210542e-05, "loss": 4.0467, "step": 8571 }, { "epoch": 2.8555009577746313, "grad_norm": 0.6171875, "learning_rate": 1.2573810467746749e-05, "loss": 3.9651, "step": 8572 }, { "epoch": 2.85583409677688, "grad_norm": 0.64453125, "learning_rate": 1.2573214339184052e-05, "loss": 4.0261, "step": 8573 }, { "epoch": 2.856167235779129, "grad_norm": 0.61328125, "learning_rate": 1.2572618151529402e-05, "loss": 3.9995, "step": 8574 }, { "epoch": 2.8565003747813775, "grad_norm": 0.6015625, "learning_rate": 1.2572021904789734e-05, "loss": 4.0692, "step": 8575 }, { "epoch": 2.8568335137836263, "grad_norm": 0.62109375, "learning_rate": 1.2571425598971996e-05, "loss": 3.9802, "step": 8576 }, { "epoch": 2.857166652785875, "grad_norm": 0.58984375, "learning_rate": 1.2570829234083135e-05, "loss": 4.1106, "step": 8577 }, { "epoch": 2.8574997917881237, "grad_norm": 0.6484375, "learning_rate": 1.2570232810130098e-05, "loss": 4.0009, "step": 8578 }, { "epoch": 2.857832930790372, "grad_norm": 0.625, "learning_rate": 1.256963632711983e-05, "loss": 4.0194, "step": 8579 }, { "epoch": 2.8581660697926212, "grad_norm": 0.65234375, "learning_rate": 1.2569039785059278e-05, "loss": 4.0237, "step": 8580 }, { "epoch": 2.8584992087948695, "grad_norm": 0.59375, "learning_rate": 1.2568443183955396e-05, "loss": 4.1189, "step": 8581 }, { "epoch": 2.8588323477971183, "grad_norm": 0.61328125, "learning_rate": 1.2567846523815127e-05, "loss": 4.0893, "step": 8582 }, { "epoch": 2.859165486799367, "grad_norm": 0.59765625, "learning_rate": 1.2567249804645423e-05, "loss": 3.9986, "step": 8583 }, { "epoch": 2.8594986258016157, "grad_norm": 0.6484375, "learning_rate": 1.2566653026453235e-05, "loss": 4.0332, "step": 8584 }, { "epoch": 2.8598317648038645, "grad_norm": 0.61328125, "learning_rate": 1.2566056189245512e-05, "loss": 4.0675, "step": 8585 }, { "epoch": 2.860164903806113, "grad_norm": 0.609375, "learning_rate": 1.2565459293029208e-05, "loss": 4.0506, "step": 8586 }, { "epoch": 2.860498042808362, "grad_norm": 0.6171875, "learning_rate": 1.2564862337811274e-05, "loss": 4.0149, "step": 8587 }, { "epoch": 2.8608311818106102, "grad_norm": 0.59375, "learning_rate": 1.2564265323598667e-05, "loss": 4.0903, "step": 8588 }, { "epoch": 2.8611643208128594, "grad_norm": 0.6171875, "learning_rate": 1.2563668250398335e-05, "loss": 3.9988, "step": 8589 }, { "epoch": 2.8614974598151077, "grad_norm": 0.64453125, "learning_rate": 1.256307111821724e-05, "loss": 4.0478, "step": 8590 }, { "epoch": 2.8618305988173565, "grad_norm": 0.59765625, "learning_rate": 1.256247392706233e-05, "loss": 4.0489, "step": 8591 }, { "epoch": 2.862163737819605, "grad_norm": 0.60546875, "learning_rate": 1.2561876676940564e-05, "loss": 4.0665, "step": 8592 }, { "epoch": 2.862496876821854, "grad_norm": 0.66796875, "learning_rate": 1.25612793678589e-05, "loss": 3.9525, "step": 8593 }, { "epoch": 2.8628300158241027, "grad_norm": 0.62109375, "learning_rate": 1.2560681999824294e-05, "loss": 4.0249, "step": 8594 }, { "epoch": 2.8631631548263514, "grad_norm": 0.63671875, "learning_rate": 1.2560084572843703e-05, "loss": 4.0117, "step": 8595 }, { "epoch": 2.8634962938286, "grad_norm": 0.61328125, "learning_rate": 1.2559487086924088e-05, "loss": 4.0319, "step": 8596 }, { "epoch": 2.8638294328308485, "grad_norm": 0.59375, "learning_rate": 1.2558889542072405e-05, "loss": 4.0509, "step": 8597 }, { "epoch": 2.8641625718330976, "grad_norm": 0.62109375, "learning_rate": 1.255829193829562e-05, "loss": 4.0706, "step": 8598 }, { "epoch": 2.864495710835346, "grad_norm": 0.65234375, "learning_rate": 1.2557694275600686e-05, "loss": 4.0991, "step": 8599 }, { "epoch": 2.8648288498375947, "grad_norm": 0.6171875, "learning_rate": 1.2557096553994572e-05, "loss": 4.0767, "step": 8600 }, { "epoch": 2.8651619888398434, "grad_norm": 0.63671875, "learning_rate": 1.2556498773484237e-05, "loss": 4.013, "step": 8601 }, { "epoch": 2.865495127842092, "grad_norm": 0.59765625, "learning_rate": 1.255590093407664e-05, "loss": 4.0552, "step": 8602 }, { "epoch": 2.865828266844341, "grad_norm": 0.64453125, "learning_rate": 1.255530303577875e-05, "loss": 3.9827, "step": 8603 }, { "epoch": 2.8661614058465896, "grad_norm": 0.625, "learning_rate": 1.2554705078597531e-05, "loss": 4.0892, "step": 8604 }, { "epoch": 2.8664945448488384, "grad_norm": 0.61328125, "learning_rate": 1.2554107062539944e-05, "loss": 4.0784, "step": 8605 }, { "epoch": 2.8668276838510867, "grad_norm": 0.62109375, "learning_rate": 1.255350898761296e-05, "loss": 3.9815, "step": 8606 }, { "epoch": 2.8671608228533354, "grad_norm": 0.6171875, "learning_rate": 1.255291085382354e-05, "loss": 4.0195, "step": 8607 }, { "epoch": 2.867493961855584, "grad_norm": 0.6484375, "learning_rate": 1.2552312661178653e-05, "loss": 4.0553, "step": 8608 }, { "epoch": 2.867827100857833, "grad_norm": 0.66015625, "learning_rate": 1.2551714409685266e-05, "loss": 3.9933, "step": 8609 }, { "epoch": 2.8681602398600816, "grad_norm": 0.671875, "learning_rate": 1.255111609935035e-05, "loss": 4.0659, "step": 8610 }, { "epoch": 2.8684933788623304, "grad_norm": 0.6328125, "learning_rate": 1.2550517730180872e-05, "loss": 4.0144, "step": 8611 }, { "epoch": 2.868826517864579, "grad_norm": 0.63671875, "learning_rate": 1.2549919302183802e-05, "loss": 3.9445, "step": 8612 }, { "epoch": 2.869159656866828, "grad_norm": 0.62890625, "learning_rate": 1.2549320815366109e-05, "loss": 4.0873, "step": 8613 }, { "epoch": 2.8694927958690766, "grad_norm": 0.66015625, "learning_rate": 1.2548722269734765e-05, "loss": 4.0253, "step": 8614 }, { "epoch": 2.869825934871325, "grad_norm": 0.63671875, "learning_rate": 1.2548123665296744e-05, "loss": 3.9491, "step": 8615 }, { "epoch": 2.8701590738735736, "grad_norm": 0.625, "learning_rate": 1.2547525002059013e-05, "loss": 3.9367, "step": 8616 }, { "epoch": 2.8704922128758223, "grad_norm": 0.62890625, "learning_rate": 1.2546926280028553e-05, "loss": 3.9899, "step": 8617 }, { "epoch": 2.870825351878071, "grad_norm": 0.609375, "learning_rate": 1.254632749921233e-05, "loss": 4.0011, "step": 8618 }, { "epoch": 2.87115849088032, "grad_norm": 0.640625, "learning_rate": 1.2545728659617325e-05, "loss": 4.0555, "step": 8619 }, { "epoch": 2.8714916298825686, "grad_norm": 0.6171875, "learning_rate": 1.254512976125051e-05, "loss": 4.0595, "step": 8620 }, { "epoch": 2.8718247688848173, "grad_norm": 0.62890625, "learning_rate": 1.2544530804118858e-05, "loss": 4.0677, "step": 8621 }, { "epoch": 2.872157907887066, "grad_norm": 0.63671875, "learning_rate": 1.2543931788229349e-05, "loss": 3.9752, "step": 8622 }, { "epoch": 2.872491046889315, "grad_norm": 0.58984375, "learning_rate": 1.2543332713588961e-05, "loss": 4.0423, "step": 8623 }, { "epoch": 2.872824185891563, "grad_norm": 0.640625, "learning_rate": 1.2542733580204673e-05, "loss": 3.9526, "step": 8624 }, { "epoch": 2.873157324893812, "grad_norm": 0.62109375, "learning_rate": 1.2542134388083457e-05, "loss": 3.9524, "step": 8625 }, { "epoch": 2.8734904638960606, "grad_norm": 0.6171875, "learning_rate": 1.2541535137232298e-05, "loss": 4.0423, "step": 8626 }, { "epoch": 2.8738236028983093, "grad_norm": 0.62890625, "learning_rate": 1.2540935827658178e-05, "loss": 4.062, "step": 8627 }, { "epoch": 2.874156741900558, "grad_norm": 0.625, "learning_rate": 1.254033645936807e-05, "loss": 4.0203, "step": 8628 }, { "epoch": 2.8744898809028068, "grad_norm": 0.609375, "learning_rate": 1.2539737032368962e-05, "loss": 3.9929, "step": 8629 }, { "epoch": 2.8748230199050555, "grad_norm": 0.6171875, "learning_rate": 1.2539137546667833e-05, "loss": 4.0559, "step": 8630 }, { "epoch": 2.875156158907304, "grad_norm": 0.6171875, "learning_rate": 1.2538538002271664e-05, "loss": 4.1059, "step": 8631 }, { "epoch": 2.875489297909553, "grad_norm": 0.6171875, "learning_rate": 1.2537938399187444e-05, "loss": 4.0353, "step": 8632 }, { "epoch": 2.8758224369118013, "grad_norm": 0.6171875, "learning_rate": 1.2537338737422154e-05, "loss": 4.0151, "step": 8633 }, { "epoch": 2.87615557591405, "grad_norm": 0.62109375, "learning_rate": 1.2536739016982778e-05, "loss": 4.0448, "step": 8634 }, { "epoch": 2.8764887149162988, "grad_norm": 0.60546875, "learning_rate": 1.2536139237876301e-05, "loss": 4.0273, "step": 8635 }, { "epoch": 2.8768218539185475, "grad_norm": 0.62890625, "learning_rate": 1.253553940010971e-05, "loss": 3.9512, "step": 8636 }, { "epoch": 2.8771549929207962, "grad_norm": 0.640625, "learning_rate": 1.2534939503689993e-05, "loss": 4.0568, "step": 8637 }, { "epoch": 2.877488131923045, "grad_norm": 0.66796875, "learning_rate": 1.2534339548624134e-05, "loss": 4.0442, "step": 8638 }, { "epoch": 2.8778212709252937, "grad_norm": 0.66015625, "learning_rate": 1.2533739534919126e-05, "loss": 4.0172, "step": 8639 }, { "epoch": 2.878154409927542, "grad_norm": 0.64453125, "learning_rate": 1.2533139462581955e-05, "loss": 3.9987, "step": 8640 }, { "epoch": 2.878487548929791, "grad_norm": 0.625, "learning_rate": 1.253253933161961e-05, "loss": 4.0759, "step": 8641 }, { "epoch": 2.8788206879320395, "grad_norm": 0.60546875, "learning_rate": 1.2531939142039085e-05, "loss": 4.0345, "step": 8642 }, { "epoch": 2.8791538269342882, "grad_norm": 0.64453125, "learning_rate": 1.2531338893847366e-05, "loss": 4.0678, "step": 8643 }, { "epoch": 2.879486965936537, "grad_norm": 0.6640625, "learning_rate": 1.2530738587051447e-05, "loss": 4.0204, "step": 8644 }, { "epoch": 2.8798201049387857, "grad_norm": 0.67578125, "learning_rate": 1.2530138221658318e-05, "loss": 4.0394, "step": 8645 }, { "epoch": 2.8801532439410344, "grad_norm": 0.61328125, "learning_rate": 1.2529537797674978e-05, "loss": 4.0329, "step": 8646 }, { "epoch": 2.880486382943283, "grad_norm": 0.65625, "learning_rate": 1.2528937315108414e-05, "loss": 4.0588, "step": 8647 }, { "epoch": 2.880819521945532, "grad_norm": 0.59375, "learning_rate": 1.2528336773965624e-05, "loss": 4.0234, "step": 8648 }, { "epoch": 2.88115266094778, "grad_norm": 0.61328125, "learning_rate": 1.25277361742536e-05, "loss": 4.0207, "step": 8649 }, { "epoch": 2.8814857999500294, "grad_norm": 0.625, "learning_rate": 1.2527135515979342e-05, "loss": 4.0254, "step": 8650 }, { "epoch": 2.8818189389522777, "grad_norm": 0.65625, "learning_rate": 1.2526534799149845e-05, "loss": 4.0613, "step": 8651 }, { "epoch": 2.8821520779545264, "grad_norm": 0.6484375, "learning_rate": 1.2525934023772103e-05, "loss": 4.0034, "step": 8652 }, { "epoch": 2.882485216956775, "grad_norm": 0.62890625, "learning_rate": 1.2525333189853115e-05, "loss": 4.0414, "step": 8653 }, { "epoch": 2.882818355959024, "grad_norm": 0.62109375, "learning_rate": 1.2524732297399881e-05, "loss": 4.0892, "step": 8654 }, { "epoch": 2.8831514949612727, "grad_norm": 0.6171875, "learning_rate": 1.2524131346419398e-05, "loss": 4.0358, "step": 8655 }, { "epoch": 2.8834846339635214, "grad_norm": 0.6640625, "learning_rate": 1.2523530336918671e-05, "loss": 3.9889, "step": 8656 }, { "epoch": 2.88381777296577, "grad_norm": 0.60546875, "learning_rate": 1.2522929268904693e-05, "loss": 3.9779, "step": 8657 }, { "epoch": 2.8841509119680184, "grad_norm": 0.66796875, "learning_rate": 1.252232814238447e-05, "loss": 3.9807, "step": 8658 }, { "epoch": 2.8844840509702676, "grad_norm": 0.62890625, "learning_rate": 1.2521726957365001e-05, "loss": 3.9949, "step": 8659 }, { "epoch": 2.884817189972516, "grad_norm": 0.6171875, "learning_rate": 1.252112571385329e-05, "loss": 4.0284, "step": 8660 }, { "epoch": 2.8851503289747646, "grad_norm": 0.6484375, "learning_rate": 1.2520524411856344e-05, "loss": 3.9997, "step": 8661 }, { "epoch": 2.8854834679770134, "grad_norm": 0.625, "learning_rate": 1.251992305138116e-05, "loss": 4.0322, "step": 8662 }, { "epoch": 2.885816606979262, "grad_norm": 0.6015625, "learning_rate": 1.2519321632434746e-05, "loss": 4.0222, "step": 8663 }, { "epoch": 2.886149745981511, "grad_norm": 0.60546875, "learning_rate": 1.2518720155024108e-05, "loss": 4.0565, "step": 8664 }, { "epoch": 2.8864828849837596, "grad_norm": 0.65234375, "learning_rate": 1.2518118619156248e-05, "loss": 3.9913, "step": 8665 }, { "epoch": 2.8868160239860083, "grad_norm": 0.65234375, "learning_rate": 1.2517517024838177e-05, "loss": 3.929, "step": 8666 }, { "epoch": 2.8871491629882566, "grad_norm": 0.609375, "learning_rate": 1.2516915372076901e-05, "loss": 3.984, "step": 8667 }, { "epoch": 2.8874823019905054, "grad_norm": 0.640625, "learning_rate": 1.2516313660879427e-05, "loss": 4.0563, "step": 8668 }, { "epoch": 2.887815440992754, "grad_norm": 0.6640625, "learning_rate": 1.2515711891252764e-05, "loss": 3.9514, "step": 8669 }, { "epoch": 2.888148579995003, "grad_norm": 0.640625, "learning_rate": 1.2515110063203922e-05, "loss": 4.0479, "step": 8670 }, { "epoch": 2.8884817189972516, "grad_norm": 0.65234375, "learning_rate": 1.251450817673991e-05, "loss": 3.9912, "step": 8671 }, { "epoch": 2.8888148579995003, "grad_norm": 0.60546875, "learning_rate": 1.251390623186774e-05, "loss": 4.1063, "step": 8672 }, { "epoch": 2.889147997001749, "grad_norm": 0.625, "learning_rate": 1.2513304228594421e-05, "loss": 4.0407, "step": 8673 }, { "epoch": 2.889481136003998, "grad_norm": 0.66015625, "learning_rate": 1.2512702166926967e-05, "loss": 4.0727, "step": 8674 }, { "epoch": 2.8898142750062465, "grad_norm": 0.60546875, "learning_rate": 1.2512100046872391e-05, "loss": 4.0201, "step": 8675 }, { "epoch": 2.890147414008495, "grad_norm": 0.6171875, "learning_rate": 1.2511497868437704e-05, "loss": 4.0757, "step": 8676 }, { "epoch": 2.8904805530107436, "grad_norm": 0.62109375, "learning_rate": 1.2510895631629923e-05, "loss": 4.0566, "step": 8677 }, { "epoch": 2.8908136920129923, "grad_norm": 0.5859375, "learning_rate": 1.2510293336456061e-05, "loss": 4.026, "step": 8678 }, { "epoch": 2.891146831015241, "grad_norm": 0.66015625, "learning_rate": 1.2509690982923135e-05, "loss": 4.0142, "step": 8679 }, { "epoch": 2.89147997001749, "grad_norm": 0.625, "learning_rate": 1.2509088571038158e-05, "loss": 4.0813, "step": 8680 }, { "epoch": 2.8918131090197385, "grad_norm": 0.64453125, "learning_rate": 1.2508486100808151e-05, "loss": 3.991, "step": 8681 }, { "epoch": 2.8921462480219873, "grad_norm": 0.6015625, "learning_rate": 1.2507883572240129e-05, "loss": 3.9945, "step": 8682 }, { "epoch": 2.892479387024236, "grad_norm": 0.60546875, "learning_rate": 1.250728098534111e-05, "loss": 4.0274, "step": 8683 }, { "epoch": 2.8928125260264848, "grad_norm": 0.65625, "learning_rate": 1.2506678340118111e-05, "loss": 4.0021, "step": 8684 }, { "epoch": 2.893145665028733, "grad_norm": 0.6171875, "learning_rate": 1.2506075636578156e-05, "loss": 3.9859, "step": 8685 }, { "epoch": 2.893478804030982, "grad_norm": 0.6328125, "learning_rate": 1.2505472874728264e-05, "loss": 4.042, "step": 8686 }, { "epoch": 2.8938119430332305, "grad_norm": 0.65234375, "learning_rate": 1.2504870054575455e-05, "loss": 4.0436, "step": 8687 }, { "epoch": 2.8941450820354793, "grad_norm": 0.671875, "learning_rate": 1.2504267176126749e-05, "loss": 4.0375, "step": 8688 }, { "epoch": 2.894478221037728, "grad_norm": 0.5859375, "learning_rate": 1.250366423938917e-05, "loss": 4.0252, "step": 8689 }, { "epoch": 2.8948113600399767, "grad_norm": 0.6328125, "learning_rate": 1.2503061244369738e-05, "loss": 4.0153, "step": 8690 }, { "epoch": 2.8951444990422255, "grad_norm": 0.6171875, "learning_rate": 1.2502458191075484e-05, "loss": 4.0198, "step": 8691 }, { "epoch": 2.8954776380444742, "grad_norm": 0.6328125, "learning_rate": 1.2501855079513424e-05, "loss": 3.9385, "step": 8692 }, { "epoch": 2.895810777046723, "grad_norm": 0.640625, "learning_rate": 1.2501251909690586e-05, "loss": 4.017, "step": 8693 }, { "epoch": 2.8961439160489713, "grad_norm": 0.62890625, "learning_rate": 1.2500648681613996e-05, "loss": 4.0559, "step": 8694 }, { "epoch": 2.89647705505122, "grad_norm": 0.640625, "learning_rate": 1.2500045395290682e-05, "loss": 3.9828, "step": 8695 }, { "epoch": 2.8968101940534687, "grad_norm": 0.62109375, "learning_rate": 1.2499442050727669e-05, "loss": 4.1198, "step": 8696 }, { "epoch": 2.8971433330557175, "grad_norm": 0.66015625, "learning_rate": 1.2498838647931985e-05, "loss": 3.914, "step": 8697 }, { "epoch": 2.897476472057966, "grad_norm": 0.6328125, "learning_rate": 1.2498235186910658e-05, "loss": 4.0102, "step": 8698 }, { "epoch": 2.897809611060215, "grad_norm": 0.59375, "learning_rate": 1.2497631667670716e-05, "loss": 4.0759, "step": 8699 }, { "epoch": 2.8981427500624637, "grad_norm": 0.62890625, "learning_rate": 1.2497028090219191e-05, "loss": 4.0694, "step": 8700 }, { "epoch": 2.898475889064712, "grad_norm": 0.609375, "learning_rate": 1.2496424454563111e-05, "loss": 4.0322, "step": 8701 }, { "epoch": 2.898809028066961, "grad_norm": 0.6484375, "learning_rate": 1.249582076070951e-05, "loss": 4.0351, "step": 8702 }, { "epoch": 2.8991421670692095, "grad_norm": 0.6328125, "learning_rate": 1.2495217008665416e-05, "loss": 4.04, "step": 8703 }, { "epoch": 2.899475306071458, "grad_norm": 0.64453125, "learning_rate": 1.2494613198437866e-05, "loss": 4.063, "step": 8704 }, { "epoch": 2.899808445073707, "grad_norm": 0.6796875, "learning_rate": 1.249400933003389e-05, "loss": 4.029, "step": 8705 }, { "epoch": 2.9001415840759557, "grad_norm": 0.6328125, "learning_rate": 1.2493405403460522e-05, "loss": 4.0228, "step": 8706 }, { "epoch": 2.9004747230782044, "grad_norm": 0.62109375, "learning_rate": 1.2492801418724796e-05, "loss": 4.0554, "step": 8707 }, { "epoch": 2.900807862080453, "grad_norm": 0.609375, "learning_rate": 1.2492197375833749e-05, "loss": 4.0094, "step": 8708 }, { "epoch": 2.901141001082702, "grad_norm": 0.61328125, "learning_rate": 1.2491593274794415e-05, "loss": 3.9502, "step": 8709 }, { "epoch": 2.90147414008495, "grad_norm": 0.6171875, "learning_rate": 1.2490989115613832e-05, "loss": 3.9366, "step": 8710 }, { "epoch": 2.9018072790871994, "grad_norm": 0.6015625, "learning_rate": 1.2490384898299035e-05, "loss": 4.0751, "step": 8711 }, { "epoch": 2.9021404180894477, "grad_norm": 0.6171875, "learning_rate": 1.2489780622857062e-05, "loss": 4.0669, "step": 8712 }, { "epoch": 2.9024735570916964, "grad_norm": 0.6328125, "learning_rate": 1.2489176289294955e-05, "loss": 4.0499, "step": 8713 }, { "epoch": 2.902806696093945, "grad_norm": 0.62109375, "learning_rate": 1.2488571897619752e-05, "loss": 4.0682, "step": 8714 }, { "epoch": 2.903139835096194, "grad_norm": 0.60546875, "learning_rate": 1.2487967447838487e-05, "loss": 3.9904, "step": 8715 }, { "epoch": 2.9034729740984426, "grad_norm": 0.65234375, "learning_rate": 1.2487362939958207e-05, "loss": 4.0556, "step": 8716 }, { "epoch": 2.9038061131006914, "grad_norm": 0.60546875, "learning_rate": 1.2486758373985953e-05, "loss": 4.0522, "step": 8717 }, { "epoch": 2.90413925210294, "grad_norm": 0.64453125, "learning_rate": 1.2486153749928766e-05, "loss": 3.986, "step": 8718 }, { "epoch": 2.9044723911051884, "grad_norm": 0.68359375, "learning_rate": 1.2485549067793686e-05, "loss": 4.0399, "step": 8719 }, { "epoch": 2.9048055301074376, "grad_norm": 0.64453125, "learning_rate": 1.2484944327587758e-05, "loss": 4.041, "step": 8720 }, { "epoch": 2.905138669109686, "grad_norm": 0.6484375, "learning_rate": 1.2484339529318026e-05, "loss": 4.0048, "step": 8721 }, { "epoch": 2.9054718081119346, "grad_norm": 0.6328125, "learning_rate": 1.2483734672991534e-05, "loss": 3.9525, "step": 8722 }, { "epoch": 2.9058049471141834, "grad_norm": 0.58984375, "learning_rate": 1.2483129758615327e-05, "loss": 4.0642, "step": 8723 }, { "epoch": 2.906138086116432, "grad_norm": 0.6328125, "learning_rate": 1.2482524786196453e-05, "loss": 4.071, "step": 8724 }, { "epoch": 2.906471225118681, "grad_norm": 0.6171875, "learning_rate": 1.2481919755741959e-05, "loss": 3.9696, "step": 8725 }, { "epoch": 2.9068043641209296, "grad_norm": 0.6484375, "learning_rate": 1.2481314667258888e-05, "loss": 4.0389, "step": 8726 }, { "epoch": 2.9071375031231783, "grad_norm": 0.62109375, "learning_rate": 1.2480709520754294e-05, "loss": 4.0686, "step": 8727 }, { "epoch": 2.9074706421254266, "grad_norm": 0.6328125, "learning_rate": 1.248010431623522e-05, "loss": 4.0175, "step": 8728 }, { "epoch": 2.907803781127676, "grad_norm": 0.65234375, "learning_rate": 1.2479499053708716e-05, "loss": 4.0718, "step": 8729 }, { "epoch": 2.908136920129924, "grad_norm": 0.6328125, "learning_rate": 1.2478893733181837e-05, "loss": 4.0882, "step": 8730 }, { "epoch": 2.908470059132173, "grad_norm": 0.6015625, "learning_rate": 1.2478288354661629e-05, "loss": 3.9615, "step": 8731 }, { "epoch": 2.9088031981344216, "grad_norm": 0.66015625, "learning_rate": 1.2477682918155144e-05, "loss": 4.0479, "step": 8732 }, { "epoch": 2.9091363371366703, "grad_norm": 0.60546875, "learning_rate": 1.2477077423669434e-05, "loss": 4.0183, "step": 8733 }, { "epoch": 2.909469476138919, "grad_norm": 0.609375, "learning_rate": 1.2476471871211554e-05, "loss": 4.073, "step": 8734 }, { "epoch": 2.909802615141168, "grad_norm": 0.625, "learning_rate": 1.2475866260788553e-05, "loss": 3.9872, "step": 8735 }, { "epoch": 2.9101357541434165, "grad_norm": 0.62109375, "learning_rate": 1.247526059240749e-05, "loss": 3.993, "step": 8736 }, { "epoch": 2.910468893145665, "grad_norm": 0.58984375, "learning_rate": 1.2474654866075419e-05, "loss": 3.9959, "step": 8737 }, { "epoch": 2.9108020321479136, "grad_norm": 0.625, "learning_rate": 1.247404908179939e-05, "loss": 4.0205, "step": 8738 }, { "epoch": 2.9111351711501623, "grad_norm": 0.6484375, "learning_rate": 1.2473443239586467e-05, "loss": 4.0029, "step": 8739 }, { "epoch": 2.911468310152411, "grad_norm": 0.625, "learning_rate": 1.2472837339443702e-05, "loss": 4.0212, "step": 8740 }, { "epoch": 2.9118014491546598, "grad_norm": 0.61328125, "learning_rate": 1.247223138137815e-05, "loss": 4.0491, "step": 8741 }, { "epoch": 2.9121345881569085, "grad_norm": 0.625, "learning_rate": 1.2471625365396874e-05, "loss": 4.0174, "step": 8742 }, { "epoch": 2.9124677271591572, "grad_norm": 0.61328125, "learning_rate": 1.2471019291506931e-05, "loss": 4.0659, "step": 8743 }, { "epoch": 2.912800866161406, "grad_norm": 0.61328125, "learning_rate": 1.2470413159715381e-05, "loss": 4.0675, "step": 8744 }, { "epoch": 2.9131340051636547, "grad_norm": 0.609375, "learning_rate": 1.2469806970029285e-05, "loss": 4.0582, "step": 8745 }, { "epoch": 2.913467144165903, "grad_norm": 0.640625, "learning_rate": 1.24692007224557e-05, "loss": 4.0688, "step": 8746 }, { "epoch": 2.9138002831681518, "grad_norm": 0.59765625, "learning_rate": 1.2468594417001689e-05, "loss": 4.0384, "step": 8747 }, { "epoch": 2.9141334221704005, "grad_norm": 0.68359375, "learning_rate": 1.2467988053674317e-05, "loss": 3.9706, "step": 8748 }, { "epoch": 2.9144665611726492, "grad_norm": 0.62109375, "learning_rate": 1.2467381632480645e-05, "loss": 3.9764, "step": 8749 }, { "epoch": 2.914799700174898, "grad_norm": 0.65625, "learning_rate": 1.2466775153427735e-05, "loss": 3.9904, "step": 8750 }, { "epoch": 2.9151328391771467, "grad_norm": 0.63671875, "learning_rate": 1.2466168616522655e-05, "loss": 4.0061, "step": 8751 }, { "epoch": 2.9154659781793955, "grad_norm": 0.62890625, "learning_rate": 1.2465562021772464e-05, "loss": 4.0786, "step": 8752 }, { "epoch": 2.915799117181644, "grad_norm": 0.625, "learning_rate": 1.2464955369184232e-05, "loss": 4.0853, "step": 8753 }, { "epoch": 2.916132256183893, "grad_norm": 0.625, "learning_rate": 1.2464348658765026e-05, "loss": 3.966, "step": 8754 }, { "epoch": 2.9164653951861412, "grad_norm": 0.625, "learning_rate": 1.2463741890521909e-05, "loss": 4.1067, "step": 8755 }, { "epoch": 2.91679853418839, "grad_norm": 0.63671875, "learning_rate": 1.246313506446195e-05, "loss": 4.0656, "step": 8756 }, { "epoch": 2.9171316731906387, "grad_norm": 0.61328125, "learning_rate": 1.2462528180592216e-05, "loss": 4.0098, "step": 8757 }, { "epoch": 2.9174648121928874, "grad_norm": 0.6640625, "learning_rate": 1.2461921238919782e-05, "loss": 4.0125, "step": 8758 }, { "epoch": 2.917797951195136, "grad_norm": 0.6484375, "learning_rate": 1.246131423945171e-05, "loss": 4.0115, "step": 8759 }, { "epoch": 2.918131090197385, "grad_norm": 0.6875, "learning_rate": 1.2460707182195074e-05, "loss": 4.0251, "step": 8760 }, { "epoch": 2.9184642291996337, "grad_norm": 0.62890625, "learning_rate": 1.2460100067156944e-05, "loss": 4.0503, "step": 8761 }, { "epoch": 2.918797368201882, "grad_norm": 0.6484375, "learning_rate": 1.2459492894344392e-05, "loss": 3.9918, "step": 8762 }, { "epoch": 2.919130507204131, "grad_norm": 0.62890625, "learning_rate": 1.2458885663764492e-05, "loss": 4.026, "step": 8763 }, { "epoch": 2.9194636462063794, "grad_norm": 0.62890625, "learning_rate": 1.2458278375424312e-05, "loss": 4.0297, "step": 8764 }, { "epoch": 2.919796785208628, "grad_norm": 0.6015625, "learning_rate": 1.2457671029330928e-05, "loss": 4.0858, "step": 8765 }, { "epoch": 2.920129924210877, "grad_norm": 0.62890625, "learning_rate": 1.2457063625491417e-05, "loss": 4.0753, "step": 8766 }, { "epoch": 2.9204630632131257, "grad_norm": 0.63671875, "learning_rate": 1.2456456163912852e-05, "loss": 4.0367, "step": 8767 }, { "epoch": 2.9207962022153744, "grad_norm": 0.6328125, "learning_rate": 1.2455848644602307e-05, "loss": 3.9976, "step": 8768 }, { "epoch": 2.921129341217623, "grad_norm": 0.6484375, "learning_rate": 1.2455241067566859e-05, "loss": 4.0713, "step": 8769 }, { "epoch": 2.921462480219872, "grad_norm": 0.62109375, "learning_rate": 1.245463343281359e-05, "loss": 4.0307, "step": 8770 }, { "epoch": 2.92179561922212, "grad_norm": 0.65234375, "learning_rate": 1.2454025740349569e-05, "loss": 4.0322, "step": 8771 }, { "epoch": 2.9221287582243693, "grad_norm": 0.6171875, "learning_rate": 1.245341799018188e-05, "loss": 4.0252, "step": 8772 }, { "epoch": 2.9224618972266176, "grad_norm": 0.62890625, "learning_rate": 1.2452810182317599e-05, "loss": 4.0501, "step": 8773 }, { "epoch": 2.9227950362288664, "grad_norm": 0.60546875, "learning_rate": 1.2452202316763808e-05, "loss": 4.0996, "step": 8774 }, { "epoch": 2.923128175231115, "grad_norm": 0.66796875, "learning_rate": 1.2451594393527585e-05, "loss": 4.0046, "step": 8775 }, { "epoch": 2.923461314233364, "grad_norm": 0.62890625, "learning_rate": 1.2450986412616017e-05, "loss": 4.0437, "step": 8776 }, { "epoch": 2.9237944532356126, "grad_norm": 0.6796875, "learning_rate": 1.2450378374036178e-05, "loss": 4.0831, "step": 8777 }, { "epoch": 2.9241275922378613, "grad_norm": 0.64453125, "learning_rate": 1.2449770277795153e-05, "loss": 4.0166, "step": 8778 }, { "epoch": 2.92446073124011, "grad_norm": 0.57421875, "learning_rate": 1.2449162123900026e-05, "loss": 4.0094, "step": 8779 }, { "epoch": 2.9247938702423584, "grad_norm": 0.62109375, "learning_rate": 1.244855391235788e-05, "loss": 4.0546, "step": 8780 }, { "epoch": 2.9251270092446076, "grad_norm": 0.6015625, "learning_rate": 1.24479456431758e-05, "loss": 3.9974, "step": 8781 }, { "epoch": 2.925460148246856, "grad_norm": 0.640625, "learning_rate": 1.2447337316360868e-05, "loss": 4.0759, "step": 8782 }, { "epoch": 2.9257932872491046, "grad_norm": 0.65625, "learning_rate": 1.2446728931920176e-05, "loss": 4.1004, "step": 8783 }, { "epoch": 2.9261264262513533, "grad_norm": 0.62890625, "learning_rate": 1.2446120489860804e-05, "loss": 3.9553, "step": 8784 }, { "epoch": 2.926459565253602, "grad_norm": 0.6484375, "learning_rate": 1.2445511990189843e-05, "loss": 3.9969, "step": 8785 }, { "epoch": 2.926792704255851, "grad_norm": 0.63671875, "learning_rate": 1.2444903432914379e-05, "loss": 4.0011, "step": 8786 }, { "epoch": 2.9271258432580995, "grad_norm": 0.66015625, "learning_rate": 1.24442948180415e-05, "loss": 3.9968, "step": 8787 }, { "epoch": 2.9274589822603483, "grad_norm": 0.6171875, "learning_rate": 1.2443686145578297e-05, "loss": 4.0598, "step": 8788 }, { "epoch": 2.9277921212625966, "grad_norm": 0.625, "learning_rate": 1.2443077415531856e-05, "loss": 4.061, "step": 8789 }, { "epoch": 2.9281252602648458, "grad_norm": 0.62109375, "learning_rate": 1.2442468627909272e-05, "loss": 4.0576, "step": 8790 }, { "epoch": 2.928458399267094, "grad_norm": 0.6015625, "learning_rate": 1.2441859782717634e-05, "loss": 4.0234, "step": 8791 }, { "epoch": 2.928791538269343, "grad_norm": 0.62109375, "learning_rate": 1.2441250879964032e-05, "loss": 3.9866, "step": 8792 }, { "epoch": 2.9291246772715915, "grad_norm": 0.609375, "learning_rate": 1.244064191965556e-05, "loss": 3.9794, "step": 8793 }, { "epoch": 2.9294578162738403, "grad_norm": 0.62109375, "learning_rate": 1.2440032901799313e-05, "loss": 4.0616, "step": 8794 }, { "epoch": 2.929790955276089, "grad_norm": 0.578125, "learning_rate": 1.2439423826402382e-05, "loss": 4.0734, "step": 8795 }, { "epoch": 2.9301240942783378, "grad_norm": 0.62890625, "learning_rate": 1.2438814693471861e-05, "loss": 4.0374, "step": 8796 }, { "epoch": 2.9304572332805865, "grad_norm": 0.62109375, "learning_rate": 1.2438205503014847e-05, "loss": 3.9623, "step": 8797 }, { "epoch": 2.930790372282835, "grad_norm": 0.6171875, "learning_rate": 1.2437596255038436e-05, "loss": 4.0102, "step": 8798 }, { "epoch": 2.931123511285084, "grad_norm": 0.6171875, "learning_rate": 1.2436986949549722e-05, "loss": 3.9608, "step": 8799 }, { "epoch": 2.9314566502873323, "grad_norm": 0.6328125, "learning_rate": 1.2436377586555803e-05, "loss": 4.0846, "step": 8800 }, { "epoch": 2.931789789289581, "grad_norm": 0.625, "learning_rate": 1.2435768166063779e-05, "loss": 4.0382, "step": 8801 }, { "epoch": 2.9321229282918297, "grad_norm": 0.62109375, "learning_rate": 1.2435158688080746e-05, "loss": 4.0084, "step": 8802 }, { "epoch": 2.9324560672940785, "grad_norm": 0.66796875, "learning_rate": 1.2434549152613804e-05, "loss": 3.9753, "step": 8803 }, { "epoch": 2.9327892062963272, "grad_norm": 0.62109375, "learning_rate": 1.2433939559670052e-05, "loss": 4.0123, "step": 8804 }, { "epoch": 2.933122345298576, "grad_norm": 0.625, "learning_rate": 1.243332990925659e-05, "loss": 4.0312, "step": 8805 }, { "epoch": 2.9334554843008247, "grad_norm": 0.6328125, "learning_rate": 1.2432720201380523e-05, "loss": 4.0285, "step": 8806 }, { "epoch": 2.933788623303073, "grad_norm": 0.63671875, "learning_rate": 1.243211043604895e-05, "loss": 3.9994, "step": 8807 }, { "epoch": 2.9341217623053217, "grad_norm": 0.671875, "learning_rate": 1.243150061326897e-05, "loss": 3.9775, "step": 8808 }, { "epoch": 2.9344549013075705, "grad_norm": 0.64453125, "learning_rate": 1.243089073304769e-05, "loss": 3.9934, "step": 8809 }, { "epoch": 2.934788040309819, "grad_norm": 0.62890625, "learning_rate": 1.2430280795392215e-05, "loss": 4.0695, "step": 8810 }, { "epoch": 2.935121179312068, "grad_norm": 0.625, "learning_rate": 1.2429670800309648e-05, "loss": 3.997, "step": 8811 }, { "epoch": 2.9354543183143167, "grad_norm": 0.609375, "learning_rate": 1.2429060747807093e-05, "loss": 4.0416, "step": 8812 }, { "epoch": 2.9357874573165654, "grad_norm": 0.59375, "learning_rate": 1.2428450637891656e-05, "loss": 3.9541, "step": 8813 }, { "epoch": 2.936120596318814, "grad_norm": 0.6328125, "learning_rate": 1.2427840470570446e-05, "loss": 4.0394, "step": 8814 }, { "epoch": 2.936453735321063, "grad_norm": 0.6328125, "learning_rate": 1.2427230245850566e-05, "loss": 4.0304, "step": 8815 }, { "epoch": 2.936786874323311, "grad_norm": 0.61328125, "learning_rate": 1.2426619963739128e-05, "loss": 4.0505, "step": 8816 }, { "epoch": 2.93712001332556, "grad_norm": 0.66015625, "learning_rate": 1.2426009624243237e-05, "loss": 4.0142, "step": 8817 }, { "epoch": 2.9374531523278087, "grad_norm": 0.60546875, "learning_rate": 1.2425399227370003e-05, "loss": 4.0397, "step": 8818 }, { "epoch": 2.9377862913300574, "grad_norm": 0.6328125, "learning_rate": 1.2424788773126538e-05, "loss": 3.9497, "step": 8819 }, { "epoch": 2.938119430332306, "grad_norm": 0.6015625, "learning_rate": 1.2424178261519949e-05, "loss": 4.0506, "step": 8820 }, { "epoch": 2.938452569334555, "grad_norm": 0.59765625, "learning_rate": 1.2423567692557354e-05, "loss": 4.0227, "step": 8821 }, { "epoch": 2.9387857083368036, "grad_norm": 0.65234375, "learning_rate": 1.2422957066245856e-05, "loss": 4.0165, "step": 8822 }, { "epoch": 2.9391188473390524, "grad_norm": 0.62890625, "learning_rate": 1.242234638259257e-05, "loss": 3.9917, "step": 8823 }, { "epoch": 2.939451986341301, "grad_norm": 0.6171875, "learning_rate": 1.2421735641604612e-05, "loss": 4.0547, "step": 8824 }, { "epoch": 2.9397851253435494, "grad_norm": 0.65625, "learning_rate": 1.2421124843289095e-05, "loss": 4.003, "step": 8825 }, { "epoch": 2.940118264345798, "grad_norm": 0.62890625, "learning_rate": 1.242051398765313e-05, "loss": 3.9119, "step": 8826 }, { "epoch": 2.940451403348047, "grad_norm": 0.6171875, "learning_rate": 1.2419903074703838e-05, "loss": 3.9667, "step": 8827 }, { "epoch": 2.9407845423502956, "grad_norm": 0.63671875, "learning_rate": 1.2419292104448329e-05, "loss": 4.0568, "step": 8828 }, { "epoch": 2.9411176813525444, "grad_norm": 0.65234375, "learning_rate": 1.2418681076893724e-05, "loss": 4.0084, "step": 8829 }, { "epoch": 2.941450820354793, "grad_norm": 0.60546875, "learning_rate": 1.2418069992047138e-05, "loss": 4.0156, "step": 8830 }, { "epoch": 2.941783959357042, "grad_norm": 0.63671875, "learning_rate": 1.2417458849915688e-05, "loss": 4.0597, "step": 8831 }, { "epoch": 2.94211709835929, "grad_norm": 0.62890625, "learning_rate": 1.2416847650506494e-05, "loss": 3.9573, "step": 8832 }, { "epoch": 2.9424502373615393, "grad_norm": 0.640625, "learning_rate": 1.2416236393826676e-05, "loss": 4.072, "step": 8833 }, { "epoch": 2.9427833763637876, "grad_norm": 0.60546875, "learning_rate": 1.2415625079883352e-05, "loss": 3.984, "step": 8834 }, { "epoch": 2.9431165153660364, "grad_norm": 0.67578125, "learning_rate": 1.2415013708683644e-05, "loss": 4.0007, "step": 8835 }, { "epoch": 2.943449654368285, "grad_norm": 0.671875, "learning_rate": 1.2414402280234669e-05, "loss": 4.0803, "step": 8836 }, { "epoch": 2.943782793370534, "grad_norm": 0.61328125, "learning_rate": 1.2413790794543553e-05, "loss": 4.0913, "step": 8837 }, { "epoch": 2.9441159323727826, "grad_norm": 0.64453125, "learning_rate": 1.241317925161742e-05, "loss": 3.9982, "step": 8838 }, { "epoch": 2.9444490713750313, "grad_norm": 0.59375, "learning_rate": 1.241256765146339e-05, "loss": 4.0508, "step": 8839 }, { "epoch": 2.94478221037728, "grad_norm": 0.6484375, "learning_rate": 1.2411955994088588e-05, "loss": 4.0171, "step": 8840 }, { "epoch": 2.9451153493795283, "grad_norm": 0.68359375, "learning_rate": 1.2411344279500133e-05, "loss": 4.0473, "step": 8841 }, { "epoch": 2.9454484883817775, "grad_norm": 0.6015625, "learning_rate": 1.241073250770516e-05, "loss": 4.0717, "step": 8842 }, { "epoch": 2.945781627384026, "grad_norm": 0.6171875, "learning_rate": 1.241012067871079e-05, "loss": 4.0029, "step": 8843 }, { "epoch": 2.9461147663862746, "grad_norm": 0.64453125, "learning_rate": 1.2409508792524148e-05, "loss": 3.9962, "step": 8844 }, { "epoch": 2.9464479053885233, "grad_norm": 0.62890625, "learning_rate": 1.2408896849152362e-05, "loss": 4.081, "step": 8845 }, { "epoch": 2.946781044390772, "grad_norm": 0.63671875, "learning_rate": 1.2408284848602561e-05, "loss": 3.9653, "step": 8846 }, { "epoch": 2.947114183393021, "grad_norm": 0.6328125, "learning_rate": 1.2407672790881873e-05, "loss": 4.0649, "step": 8847 }, { "epoch": 2.9474473223952695, "grad_norm": 0.62109375, "learning_rate": 1.2407060675997426e-05, "loss": 4.1053, "step": 8848 }, { "epoch": 2.9477804613975183, "grad_norm": 0.6328125, "learning_rate": 1.2406448503956352e-05, "loss": 4.0648, "step": 8849 }, { "epoch": 2.9481136003997666, "grad_norm": 0.61328125, "learning_rate": 1.240583627476578e-05, "loss": 4.0276, "step": 8850 }, { "epoch": 2.9484467394020157, "grad_norm": 0.64453125, "learning_rate": 1.2405223988432842e-05, "loss": 3.9812, "step": 8851 }, { "epoch": 2.948779878404264, "grad_norm": 0.625, "learning_rate": 1.2404611644964667e-05, "loss": 4.0466, "step": 8852 }, { "epoch": 2.9491130174065128, "grad_norm": 0.60546875, "learning_rate": 1.2403999244368393e-05, "loss": 4.0506, "step": 8853 }, { "epoch": 2.9494461564087615, "grad_norm": 0.6328125, "learning_rate": 1.2403386786651147e-05, "loss": 3.9443, "step": 8854 }, { "epoch": 2.9497792954110102, "grad_norm": 0.625, "learning_rate": 1.2402774271820066e-05, "loss": 4.0078, "step": 8855 }, { "epoch": 2.950112434413259, "grad_norm": 0.6171875, "learning_rate": 1.2402161699882284e-05, "loss": 4.0044, "step": 8856 }, { "epoch": 2.9504455734155077, "grad_norm": 0.671875, "learning_rate": 1.2401549070844938e-05, "loss": 4.075, "step": 8857 }, { "epoch": 2.9507787124177565, "grad_norm": 0.62890625, "learning_rate": 1.240093638471516e-05, "loss": 3.9529, "step": 8858 }, { "epoch": 2.9511118514200048, "grad_norm": 0.61328125, "learning_rate": 1.240032364150009e-05, "loss": 4.0743, "step": 8859 }, { "epoch": 2.951444990422254, "grad_norm": 0.609375, "learning_rate": 1.2399710841206863e-05, "loss": 3.9575, "step": 8860 }, { "epoch": 2.9517781294245022, "grad_norm": 0.61328125, "learning_rate": 1.239909798384262e-05, "loss": 4.017, "step": 8861 }, { "epoch": 2.952111268426751, "grad_norm": 0.6328125, "learning_rate": 1.2398485069414495e-05, "loss": 3.9993, "step": 8862 }, { "epoch": 2.9524444074289997, "grad_norm": 0.62109375, "learning_rate": 1.2397872097929628e-05, "loss": 4.0574, "step": 8863 }, { "epoch": 2.9527775464312485, "grad_norm": 0.66796875, "learning_rate": 1.239725906939516e-05, "loss": 3.9783, "step": 8864 }, { "epoch": 2.953110685433497, "grad_norm": 0.6171875, "learning_rate": 1.2396645983818232e-05, "loss": 3.9933, "step": 8865 }, { "epoch": 2.953443824435746, "grad_norm": 0.63671875, "learning_rate": 1.2396032841205986e-05, "loss": 4.034, "step": 8866 }, { "epoch": 2.9537769634379947, "grad_norm": 0.64453125, "learning_rate": 1.2395419641565558e-05, "loss": 4.0568, "step": 8867 }, { "epoch": 2.954110102440243, "grad_norm": 0.62109375, "learning_rate": 1.2394806384904099e-05, "loss": 4.0578, "step": 8868 }, { "epoch": 2.954443241442492, "grad_norm": 0.6171875, "learning_rate": 1.2394193071228745e-05, "loss": 3.9938, "step": 8869 }, { "epoch": 2.9547763804447404, "grad_norm": 0.640625, "learning_rate": 1.2393579700546643e-05, "loss": 3.9935, "step": 8870 }, { "epoch": 2.955109519446989, "grad_norm": 0.64453125, "learning_rate": 1.2392966272864939e-05, "loss": 4.0097, "step": 8871 }, { "epoch": 2.955442658449238, "grad_norm": 0.62109375, "learning_rate": 1.2392352788190772e-05, "loss": 4.075, "step": 8872 }, { "epoch": 2.9557757974514867, "grad_norm": 0.63671875, "learning_rate": 1.2391739246531294e-05, "loss": 3.9364, "step": 8873 }, { "epoch": 2.9561089364537354, "grad_norm": 0.62890625, "learning_rate": 1.239112564789365e-05, "loss": 4.0011, "step": 8874 }, { "epoch": 2.956442075455984, "grad_norm": 0.67578125, "learning_rate": 1.2390511992284986e-05, "loss": 4.0189, "step": 8875 }, { "epoch": 2.956775214458233, "grad_norm": 0.61328125, "learning_rate": 1.2389898279712452e-05, "loss": 4.0566, "step": 8876 }, { "epoch": 2.957108353460481, "grad_norm": 0.60546875, "learning_rate": 1.238928451018319e-05, "loss": 4.1302, "step": 8877 }, { "epoch": 2.95744149246273, "grad_norm": 0.65234375, "learning_rate": 1.2388670683704356e-05, "loss": 4.033, "step": 8878 }, { "epoch": 2.9577746314649787, "grad_norm": 0.62890625, "learning_rate": 1.2388056800283099e-05, "loss": 4.0413, "step": 8879 }, { "epoch": 2.9581077704672274, "grad_norm": 0.64453125, "learning_rate": 1.2387442859926565e-05, "loss": 4.057, "step": 8880 }, { "epoch": 2.958440909469476, "grad_norm": 0.64453125, "learning_rate": 1.238682886264191e-05, "loss": 4.0486, "step": 8881 }, { "epoch": 2.958774048471725, "grad_norm": 0.61328125, "learning_rate": 1.2386214808436283e-05, "loss": 4.0235, "step": 8882 }, { "epoch": 2.9591071874739736, "grad_norm": 0.62890625, "learning_rate": 1.2385600697316837e-05, "loss": 4.064, "step": 8883 }, { "epoch": 2.9594403264762223, "grad_norm": 0.6328125, "learning_rate": 1.2384986529290725e-05, "loss": 4.0251, "step": 8884 }, { "epoch": 2.959773465478471, "grad_norm": 0.609375, "learning_rate": 1.23843723043651e-05, "loss": 3.926, "step": 8885 }, { "epoch": 2.9601066044807194, "grad_norm": 0.6171875, "learning_rate": 1.2383758022547118e-05, "loss": 4.057, "step": 8886 }, { "epoch": 2.960439743482968, "grad_norm": 0.6328125, "learning_rate": 1.2383143683843933e-05, "loss": 3.9877, "step": 8887 }, { "epoch": 2.960772882485217, "grad_norm": 0.6171875, "learning_rate": 1.2382529288262703e-05, "loss": 4.0629, "step": 8888 }, { "epoch": 2.9611060214874656, "grad_norm": 0.66015625, "learning_rate": 1.2381914835810578e-05, "loss": 3.9799, "step": 8889 }, { "epoch": 2.9614391604897143, "grad_norm": 0.61328125, "learning_rate": 1.2381300326494722e-05, "loss": 4.0668, "step": 8890 }, { "epoch": 2.961772299491963, "grad_norm": 0.6484375, "learning_rate": 1.238068576032229e-05, "loss": 4.0592, "step": 8891 }, { "epoch": 2.962105438494212, "grad_norm": 0.640625, "learning_rate": 1.2380071137300442e-05, "loss": 3.9825, "step": 8892 }, { "epoch": 2.9624385774964606, "grad_norm": 0.6796875, "learning_rate": 1.2379456457436333e-05, "loss": 4.0135, "step": 8893 }, { "epoch": 2.9627717164987093, "grad_norm": 0.6328125, "learning_rate": 1.2378841720737126e-05, "loss": 3.9724, "step": 8894 }, { "epoch": 2.9631048555009576, "grad_norm": 0.6484375, "learning_rate": 1.237822692720998e-05, "loss": 4.006, "step": 8895 }, { "epoch": 2.9634379945032063, "grad_norm": 0.625, "learning_rate": 1.2377612076862058e-05, "loss": 4.0762, "step": 8896 }, { "epoch": 2.963771133505455, "grad_norm": 0.61328125, "learning_rate": 1.2376997169700519e-05, "loss": 4.1115, "step": 8897 }, { "epoch": 2.964104272507704, "grad_norm": 0.625, "learning_rate": 1.2376382205732525e-05, "loss": 3.9877, "step": 8898 }, { "epoch": 2.9644374115099525, "grad_norm": 0.640625, "learning_rate": 1.2375767184965242e-05, "loss": 3.9668, "step": 8899 }, { "epoch": 2.9647705505122013, "grad_norm": 0.66015625, "learning_rate": 1.2375152107405834e-05, "loss": 3.9936, "step": 8900 }, { "epoch": 2.96510368951445, "grad_norm": 0.60546875, "learning_rate": 1.2374536973061462e-05, "loss": 3.9485, "step": 8901 }, { "epoch": 2.9654368285166983, "grad_norm": 0.625, "learning_rate": 1.2373921781939291e-05, "loss": 3.9961, "step": 8902 }, { "epoch": 2.9657699675189475, "grad_norm": 0.60546875, "learning_rate": 1.237330653404649e-05, "loss": 4.0111, "step": 8903 }, { "epoch": 2.966103106521196, "grad_norm": 0.63671875, "learning_rate": 1.2372691229390223e-05, "loss": 3.9622, "step": 8904 }, { "epoch": 2.9664362455234445, "grad_norm": 0.64453125, "learning_rate": 1.2372075867977658e-05, "loss": 4.0432, "step": 8905 }, { "epoch": 2.9667693845256933, "grad_norm": 0.6171875, "learning_rate": 1.2371460449815961e-05, "loss": 4.0011, "step": 8906 }, { "epoch": 2.967102523527942, "grad_norm": 0.62109375, "learning_rate": 1.2370844974912302e-05, "loss": 3.9865, "step": 8907 }, { "epoch": 2.9674356625301908, "grad_norm": 0.61328125, "learning_rate": 1.2370229443273847e-05, "loss": 3.9567, "step": 8908 }, { "epoch": 2.9677688015324395, "grad_norm": 0.62890625, "learning_rate": 1.2369613854907772e-05, "loss": 4.0339, "step": 8909 }, { "epoch": 2.9681019405346882, "grad_norm": 0.6328125, "learning_rate": 1.2368998209821241e-05, "loss": 4.051, "step": 8910 }, { "epoch": 2.9684350795369365, "grad_norm": 0.640625, "learning_rate": 1.2368382508021428e-05, "loss": 3.9526, "step": 8911 }, { "epoch": 2.9687682185391857, "grad_norm": 0.6328125, "learning_rate": 1.2367766749515502e-05, "loss": 4.0197, "step": 8912 }, { "epoch": 2.969101357541434, "grad_norm": 0.64453125, "learning_rate": 1.2367150934310637e-05, "loss": 4.101, "step": 8913 }, { "epoch": 2.9694344965436827, "grad_norm": 0.6328125, "learning_rate": 1.2366535062414006e-05, "loss": 4.1198, "step": 8914 }, { "epoch": 2.9697676355459315, "grad_norm": 0.6640625, "learning_rate": 1.2365919133832784e-05, "loss": 3.9781, "step": 8915 }, { "epoch": 2.9701007745481802, "grad_norm": 0.6015625, "learning_rate": 1.2365303148574143e-05, "loss": 4.0564, "step": 8916 }, { "epoch": 2.970433913550429, "grad_norm": 0.6171875, "learning_rate": 1.2364687106645258e-05, "loss": 4.0075, "step": 8917 }, { "epoch": 2.9707670525526777, "grad_norm": 0.65625, "learning_rate": 1.2364071008053304e-05, "loss": 3.9563, "step": 8918 }, { "epoch": 2.9711001915549264, "grad_norm": 0.62109375, "learning_rate": 1.2363454852805462e-05, "loss": 4.0359, "step": 8919 }, { "epoch": 2.9714333305571747, "grad_norm": 0.66015625, "learning_rate": 1.2362838640908903e-05, "loss": 4.025, "step": 8920 }, { "epoch": 2.971766469559424, "grad_norm": 0.6640625, "learning_rate": 1.2362222372370805e-05, "loss": 4.0298, "step": 8921 }, { "epoch": 2.972099608561672, "grad_norm": 0.58984375, "learning_rate": 1.236160604719835e-05, "loss": 4.02, "step": 8922 }, { "epoch": 2.972432747563921, "grad_norm": 0.61328125, "learning_rate": 1.2360989665398713e-05, "loss": 3.9894, "step": 8923 }, { "epoch": 2.9727658865661697, "grad_norm": 0.62890625, "learning_rate": 1.2360373226979077e-05, "loss": 3.9996, "step": 8924 }, { "epoch": 2.9730990255684184, "grad_norm": 0.62109375, "learning_rate": 1.2359756731946619e-05, "loss": 4.1029, "step": 8925 }, { "epoch": 2.973432164570667, "grad_norm": 0.63671875, "learning_rate": 1.2359140180308523e-05, "loss": 4.0536, "step": 8926 }, { "epoch": 2.973765303572916, "grad_norm": 0.66796875, "learning_rate": 1.2358523572071968e-05, "loss": 3.9862, "step": 8927 }, { "epoch": 2.9740984425751646, "grad_norm": 0.6171875, "learning_rate": 1.2357906907244137e-05, "loss": 4.0531, "step": 8928 }, { "epoch": 2.974431581577413, "grad_norm": 0.62890625, "learning_rate": 1.2357290185832214e-05, "loss": 4.0946, "step": 8929 }, { "epoch": 2.974764720579662, "grad_norm": 0.62890625, "learning_rate": 1.235667340784338e-05, "loss": 3.9921, "step": 8930 }, { "epoch": 2.9750978595819104, "grad_norm": 0.60546875, "learning_rate": 1.2356056573284819e-05, "loss": 4.0304, "step": 8931 }, { "epoch": 2.975430998584159, "grad_norm": 0.63671875, "learning_rate": 1.2355439682163717e-05, "loss": 4.045, "step": 8932 }, { "epoch": 2.975764137586408, "grad_norm": 0.60546875, "learning_rate": 1.2354822734487264e-05, "loss": 4.0142, "step": 8933 }, { "epoch": 2.9760972765886566, "grad_norm": 0.6875, "learning_rate": 1.2354205730262638e-05, "loss": 4.015, "step": 8934 }, { "epoch": 2.9764304155909054, "grad_norm": 0.64453125, "learning_rate": 1.235358866949703e-05, "loss": 3.971, "step": 8935 }, { "epoch": 2.976763554593154, "grad_norm": 0.6171875, "learning_rate": 1.2352971552197625e-05, "loss": 3.975, "step": 8936 }, { "epoch": 2.977096693595403, "grad_norm": 0.64453125, "learning_rate": 1.2352354378371616e-05, "loss": 3.9671, "step": 8937 }, { "epoch": 2.977429832597651, "grad_norm": 0.625, "learning_rate": 1.2351737148026189e-05, "loss": 3.9622, "step": 8938 }, { "epoch": 2.9777629715999003, "grad_norm": 0.62109375, "learning_rate": 1.235111986116853e-05, "loss": 3.9493, "step": 8939 }, { "epoch": 2.9780961106021486, "grad_norm": 0.62890625, "learning_rate": 1.2350502517805834e-05, "loss": 3.9396, "step": 8940 }, { "epoch": 2.9784292496043974, "grad_norm": 0.6484375, "learning_rate": 1.234988511794529e-05, "loss": 4.04, "step": 8941 }, { "epoch": 2.978762388606646, "grad_norm": 0.6328125, "learning_rate": 1.2349267661594088e-05, "loss": 4.0326, "step": 8942 }, { "epoch": 2.979095527608895, "grad_norm": 0.640625, "learning_rate": 1.2348650148759422e-05, "loss": 3.9465, "step": 8943 }, { "epoch": 2.9794286666111436, "grad_norm": 0.62890625, "learning_rate": 1.2348032579448485e-05, "loss": 3.9858, "step": 8944 }, { "epoch": 2.9797618056133923, "grad_norm": 0.62109375, "learning_rate": 1.2347414953668467e-05, "loss": 4.0991, "step": 8945 }, { "epoch": 2.980094944615641, "grad_norm": 0.64453125, "learning_rate": 1.2346797271426567e-05, "loss": 4.0083, "step": 8946 }, { "epoch": 2.9804280836178894, "grad_norm": 0.64453125, "learning_rate": 1.2346179532729974e-05, "loss": 4.0577, "step": 8947 }, { "epoch": 2.980761222620138, "grad_norm": 0.640625, "learning_rate": 1.2345561737585886e-05, "loss": 4.0097, "step": 8948 }, { "epoch": 2.981094361622387, "grad_norm": 0.66796875, "learning_rate": 1.2344943886001501e-05, "loss": 4.0713, "step": 8949 }, { "epoch": 2.9814275006246356, "grad_norm": 0.60546875, "learning_rate": 1.2344325977984015e-05, "loss": 4.1338, "step": 8950 }, { "epoch": 2.9817606396268843, "grad_norm": 0.60546875, "learning_rate": 1.2343708013540623e-05, "loss": 4.0611, "step": 8951 }, { "epoch": 2.982093778629133, "grad_norm": 0.63671875, "learning_rate": 1.2343089992678525e-05, "loss": 4.0586, "step": 8952 }, { "epoch": 2.982426917631382, "grad_norm": 0.6484375, "learning_rate": 1.2342471915404916e-05, "loss": 4.0635, "step": 8953 }, { "epoch": 2.9827600566336305, "grad_norm": 0.6640625, "learning_rate": 1.2341853781727001e-05, "loss": 4.024, "step": 8954 }, { "epoch": 2.9830931956358793, "grad_norm": 0.625, "learning_rate": 1.2341235591651977e-05, "loss": 4.0639, "step": 8955 }, { "epoch": 2.9834263346381276, "grad_norm": 0.62890625, "learning_rate": 1.234061734518704e-05, "loss": 4.0134, "step": 8956 }, { "epoch": 2.9837594736403763, "grad_norm": 0.578125, "learning_rate": 1.2339999042339399e-05, "loss": 4.0083, "step": 8957 }, { "epoch": 2.984092612642625, "grad_norm": 0.58203125, "learning_rate": 1.2339380683116254e-05, "loss": 3.954, "step": 8958 }, { "epoch": 2.984425751644874, "grad_norm": 0.609375, "learning_rate": 1.2338762267524803e-05, "loss": 3.9781, "step": 8959 }, { "epoch": 2.9847588906471225, "grad_norm": 0.63671875, "learning_rate": 1.2338143795572252e-05, "loss": 4.0126, "step": 8960 }, { "epoch": 2.9850920296493713, "grad_norm": 0.625, "learning_rate": 1.233752526726581e-05, "loss": 4.0083, "step": 8961 }, { "epoch": 2.98542516865162, "grad_norm": 0.62109375, "learning_rate": 1.2336906682612673e-05, "loss": 4.0004, "step": 8962 }, { "epoch": 2.9857583076538687, "grad_norm": 0.66015625, "learning_rate": 1.233628804162005e-05, "loss": 3.9686, "step": 8963 }, { "epoch": 2.9860914466561175, "grad_norm": 0.65625, "learning_rate": 1.2335669344295147e-05, "loss": 4.0496, "step": 8964 }, { "epoch": 2.9864245856583658, "grad_norm": 0.625, "learning_rate": 1.2335050590645171e-05, "loss": 4.0205, "step": 8965 }, { "epoch": 2.9867577246606145, "grad_norm": 0.6328125, "learning_rate": 1.2334431780677326e-05, "loss": 4.0733, "step": 8966 }, { "epoch": 2.9870908636628632, "grad_norm": 0.6171875, "learning_rate": 1.2333812914398824e-05, "loss": 4.0157, "step": 8967 }, { "epoch": 2.987424002665112, "grad_norm": 0.60546875, "learning_rate": 1.2333193991816872e-05, "loss": 4.0406, "step": 8968 }, { "epoch": 2.9877571416673607, "grad_norm": 0.63671875, "learning_rate": 1.2332575012938678e-05, "loss": 3.9924, "step": 8969 }, { "epoch": 2.9880902806696095, "grad_norm": 0.6015625, "learning_rate": 1.2331955977771451e-05, "loss": 4.0571, "step": 8970 }, { "epoch": 2.988423419671858, "grad_norm": 0.6484375, "learning_rate": 1.2331336886322406e-05, "loss": 4.0122, "step": 8971 }, { "epoch": 2.9887565586741065, "grad_norm": 0.66015625, "learning_rate": 1.2330717738598749e-05, "loss": 4.054, "step": 8972 }, { "epoch": 2.9890896976763557, "grad_norm": 0.58984375, "learning_rate": 1.2330098534607694e-05, "loss": 3.9673, "step": 8973 }, { "epoch": 2.989422836678604, "grad_norm": 0.66796875, "learning_rate": 1.2329479274356454e-05, "loss": 3.9583, "step": 8974 }, { "epoch": 2.9897559756808527, "grad_norm": 0.65234375, "learning_rate": 1.232885995785224e-05, "loss": 3.8789, "step": 8975 }, { "epoch": 2.9900891146831015, "grad_norm": 0.625, "learning_rate": 1.2328240585102265e-05, "loss": 4.0533, "step": 8976 }, { "epoch": 2.99042225368535, "grad_norm": 0.64453125, "learning_rate": 1.2327621156113747e-05, "loss": 4.0457, "step": 8977 }, { "epoch": 2.990755392687599, "grad_norm": 0.6484375, "learning_rate": 1.2327001670893901e-05, "loss": 3.9952, "step": 8978 }, { "epoch": 2.9910885316898477, "grad_norm": 0.62890625, "learning_rate": 1.2326382129449938e-05, "loss": 4.0771, "step": 8979 }, { "epoch": 2.9914216706920964, "grad_norm": 0.6171875, "learning_rate": 1.2325762531789078e-05, "loss": 4.0177, "step": 8980 }, { "epoch": 2.9917548096943447, "grad_norm": 0.625, "learning_rate": 1.2325142877918538e-05, "loss": 4.0298, "step": 8981 }, { "epoch": 2.992087948696594, "grad_norm": 0.6484375, "learning_rate": 1.2324523167845535e-05, "loss": 4.0264, "step": 8982 }, { "epoch": 2.992421087698842, "grad_norm": 0.6171875, "learning_rate": 1.2323903401577284e-05, "loss": 4.0078, "step": 8983 }, { "epoch": 2.992754226701091, "grad_norm": 0.6171875, "learning_rate": 1.2323283579121008e-05, "loss": 4.1369, "step": 8984 }, { "epoch": 2.9930873657033397, "grad_norm": 0.62109375, "learning_rate": 1.2322663700483927e-05, "loss": 4.0784, "step": 8985 }, { "epoch": 2.9934205047055884, "grad_norm": 0.6171875, "learning_rate": 1.2322043765673258e-05, "loss": 3.9322, "step": 8986 }, { "epoch": 2.993753643707837, "grad_norm": 0.625, "learning_rate": 1.2321423774696227e-05, "loss": 4.0654, "step": 8987 }, { "epoch": 2.994086782710086, "grad_norm": 0.625, "learning_rate": 1.2320803727560048e-05, "loss": 4.0418, "step": 8988 }, { "epoch": 2.9944199217123346, "grad_norm": 0.640625, "learning_rate": 1.232018362427195e-05, "loss": 3.9397, "step": 8989 }, { "epoch": 2.994753060714583, "grad_norm": 0.609375, "learning_rate": 1.2319563464839152e-05, "loss": 3.998, "step": 8990 }, { "epoch": 2.995086199716832, "grad_norm": 0.6171875, "learning_rate": 1.231894324926888e-05, "loss": 4.0859, "step": 8991 }, { "epoch": 2.9954193387190804, "grad_norm": 0.65234375, "learning_rate": 1.2318322977568357e-05, "loss": 3.9986, "step": 8992 }, { "epoch": 2.995752477721329, "grad_norm": 0.58984375, "learning_rate": 1.2317702649744806e-05, "loss": 4.0774, "step": 8993 }, { "epoch": 2.996085616723578, "grad_norm": 0.61328125, "learning_rate": 1.2317082265805456e-05, "loss": 4.0214, "step": 8994 }, { "epoch": 2.9964187557258266, "grad_norm": 0.6015625, "learning_rate": 1.2316461825757529e-05, "loss": 4.0606, "step": 8995 }, { "epoch": 2.9967518947280753, "grad_norm": 0.66015625, "learning_rate": 1.2315841329608257e-05, "loss": 4.0167, "step": 8996 }, { "epoch": 2.997085033730324, "grad_norm": 0.66015625, "learning_rate": 1.2315220777364865e-05, "loss": 4.012, "step": 8997 }, { "epoch": 2.997418172732573, "grad_norm": 0.609375, "learning_rate": 1.2314600169034578e-05, "loss": 4.1182, "step": 8998 }, { "epoch": 2.997751311734821, "grad_norm": 0.609375, "learning_rate": 1.2313979504624629e-05, "loss": 3.9873, "step": 8999 }, { "epoch": 2.9980844507370703, "grad_norm": 0.65234375, "learning_rate": 1.2313358784142245e-05, "loss": 4.0619, "step": 9000 }, { "epoch": 2.9984175897393186, "grad_norm": 0.62109375, "learning_rate": 1.2312738007594659e-05, "loss": 4.0134, "step": 9001 }, { "epoch": 2.9987507287415673, "grad_norm": 0.62109375, "learning_rate": 1.2312117174989097e-05, "loss": 4.0383, "step": 9002 }, { "epoch": 2.999083867743816, "grad_norm": 0.71484375, "learning_rate": 1.2311496286332793e-05, "loss": 4.055, "step": 9003 }, { "epoch": 2.999417006746065, "grad_norm": 0.63671875, "learning_rate": 1.2310875341632981e-05, "loss": 4.0063, "step": 9004 }, { "epoch": 2.9997501457483136, "grad_norm": 0.6171875, "learning_rate": 1.231025434089689e-05, "loss": 4.063, "step": 9005 }, { "epoch": 3.0, "grad_norm": 0.72265625, "learning_rate": 1.2309633284131754e-05, "loss": 3.9655, "step": 9006 }, { "epoch": 3.0003331390022487, "grad_norm": 0.58203125, "learning_rate": 1.230901217134481e-05, "loss": 4.0763, "step": 9007 }, { "epoch": 3.0006662780044975, "grad_norm": 0.60546875, "learning_rate": 1.230839100254329e-05, "loss": 4.0483, "step": 9008 }, { "epoch": 3.000999417006746, "grad_norm": 0.62109375, "learning_rate": 1.230776977773443e-05, "loss": 4.0016, "step": 9009 }, { "epoch": 3.001332556008995, "grad_norm": 0.60546875, "learning_rate": 1.2307148496925467e-05, "loss": 4.0421, "step": 9010 }, { "epoch": 3.0016656950112433, "grad_norm": 0.6171875, "learning_rate": 1.2306527160123634e-05, "loss": 4.036, "step": 9011 }, { "epoch": 3.001998834013492, "grad_norm": 0.62890625, "learning_rate": 1.230590576733617e-05, "loss": 4.1087, "step": 9012 }, { "epoch": 3.0023319730157407, "grad_norm": 0.6328125, "learning_rate": 1.2305284318570315e-05, "loss": 4.0017, "step": 9013 }, { "epoch": 3.0026651120179895, "grad_norm": 0.609375, "learning_rate": 1.2304662813833307e-05, "loss": 4.0789, "step": 9014 }, { "epoch": 3.002998251020238, "grad_norm": 0.62890625, "learning_rate": 1.2304041253132381e-05, "loss": 4.0503, "step": 9015 }, { "epoch": 3.003331390022487, "grad_norm": 0.60546875, "learning_rate": 1.230341963647478e-05, "loss": 4.0812, "step": 9016 }, { "epoch": 3.0036645290247357, "grad_norm": 0.66796875, "learning_rate": 1.2302797963867746e-05, "loss": 4.0327, "step": 9017 }, { "epoch": 3.0039976680269844, "grad_norm": 0.640625, "learning_rate": 1.2302176235318519e-05, "loss": 4.02, "step": 9018 }, { "epoch": 3.004330807029233, "grad_norm": 0.64453125, "learning_rate": 1.230155445083434e-05, "loss": 3.974, "step": 9019 }, { "epoch": 3.0046639460314815, "grad_norm": 0.61328125, "learning_rate": 1.230093261042245e-05, "loss": 3.9795, "step": 9020 }, { "epoch": 3.00499708503373, "grad_norm": 0.65625, "learning_rate": 1.2300310714090097e-05, "loss": 3.9841, "step": 9021 }, { "epoch": 3.005330224035979, "grad_norm": 0.640625, "learning_rate": 1.229968876184452e-05, "loss": 3.9567, "step": 9022 }, { "epoch": 3.0056633630382277, "grad_norm": 0.63671875, "learning_rate": 1.2299066753692965e-05, "loss": 4.0707, "step": 9023 }, { "epoch": 3.0059965020404764, "grad_norm": 0.6640625, "learning_rate": 1.2298444689642677e-05, "loss": 4.0194, "step": 9024 }, { "epoch": 3.006329641042725, "grad_norm": 0.59765625, "learning_rate": 1.2297822569700902e-05, "loss": 4.0307, "step": 9025 }, { "epoch": 3.006662780044974, "grad_norm": 0.64453125, "learning_rate": 1.2297200393874886e-05, "loss": 3.9775, "step": 9026 }, { "epoch": 3.0069959190472226, "grad_norm": 0.609375, "learning_rate": 1.2296578162171876e-05, "loss": 4.0711, "step": 9027 }, { "epoch": 3.007329058049471, "grad_norm": 0.61328125, "learning_rate": 1.229595587459912e-05, "loss": 3.978, "step": 9028 }, { "epoch": 3.0076621970517197, "grad_norm": 0.61328125, "learning_rate": 1.2295333531163866e-05, "loss": 4.0223, "step": 9029 }, { "epoch": 3.0079953360539684, "grad_norm": 0.609375, "learning_rate": 1.2294711131873363e-05, "loss": 4.0209, "step": 9030 }, { "epoch": 3.008328475056217, "grad_norm": 0.62109375, "learning_rate": 1.2294088676734863e-05, "loss": 4.0972, "step": 9031 }, { "epoch": 3.008661614058466, "grad_norm": 0.578125, "learning_rate": 1.229346616575561e-05, "loss": 4.0254, "step": 9032 }, { "epoch": 3.0089947530607146, "grad_norm": 0.6484375, "learning_rate": 1.229284359894286e-05, "loss": 4.0385, "step": 9033 }, { "epoch": 3.0093278920629634, "grad_norm": 0.609375, "learning_rate": 1.2292220976303863e-05, "loss": 4.1123, "step": 9034 }, { "epoch": 3.009661031065212, "grad_norm": 0.6328125, "learning_rate": 1.2291598297845874e-05, "loss": 4.0264, "step": 9035 }, { "epoch": 3.009994170067461, "grad_norm": 0.6171875, "learning_rate": 1.2290975563576141e-05, "loss": 3.9396, "step": 9036 }, { "epoch": 3.010327309069709, "grad_norm": 0.625, "learning_rate": 1.2290352773501921e-05, "loss": 3.9863, "step": 9037 }, { "epoch": 3.010660448071958, "grad_norm": 0.60546875, "learning_rate": 1.2289729927630468e-05, "loss": 3.9319, "step": 9038 }, { "epoch": 3.0109935870742066, "grad_norm": 0.609375, "learning_rate": 1.2289107025969034e-05, "loss": 4.0227, "step": 9039 }, { "epoch": 3.0113267260764554, "grad_norm": 0.625, "learning_rate": 1.2288484068524876e-05, "loss": 3.9766, "step": 9040 }, { "epoch": 3.011659865078704, "grad_norm": 0.64453125, "learning_rate": 1.2287861055305252e-05, "loss": 3.9556, "step": 9041 }, { "epoch": 3.011993004080953, "grad_norm": 0.6484375, "learning_rate": 1.2287237986317415e-05, "loss": 4.0518, "step": 9042 }, { "epoch": 3.0123261430832016, "grad_norm": 0.63671875, "learning_rate": 1.2286614861568627e-05, "loss": 4.0173, "step": 9043 }, { "epoch": 3.0126592820854503, "grad_norm": 0.62109375, "learning_rate": 1.2285991681066144e-05, "loss": 4.0834, "step": 9044 }, { "epoch": 3.012992421087699, "grad_norm": 0.62890625, "learning_rate": 1.2285368444817223e-05, "loss": 4.0154, "step": 9045 }, { "epoch": 3.0133255600899473, "grad_norm": 0.65234375, "learning_rate": 1.2284745152829127e-05, "loss": 4.0347, "step": 9046 }, { "epoch": 3.013658699092196, "grad_norm": 0.64453125, "learning_rate": 1.2284121805109108e-05, "loss": 3.9814, "step": 9047 }, { "epoch": 3.013991838094445, "grad_norm": 0.65625, "learning_rate": 1.2283498401664438e-05, "loss": 4.1141, "step": 9048 }, { "epoch": 3.0143249770966936, "grad_norm": 0.6328125, "learning_rate": 1.228287494250237e-05, "loss": 3.9448, "step": 9049 }, { "epoch": 3.0146581160989423, "grad_norm": 0.6015625, "learning_rate": 1.2282251427630169e-05, "loss": 4.0221, "step": 9050 }, { "epoch": 3.014991255101191, "grad_norm": 0.625, "learning_rate": 1.2281627857055099e-05, "loss": 3.958, "step": 9051 }, { "epoch": 3.0153243941034398, "grad_norm": 0.62109375, "learning_rate": 1.2281004230784419e-05, "loss": 4.0672, "step": 9052 }, { "epoch": 3.0156575331056885, "grad_norm": 0.640625, "learning_rate": 1.2280380548825395e-05, "loss": 3.9449, "step": 9053 }, { "epoch": 3.0159906721079373, "grad_norm": 0.64453125, "learning_rate": 1.2279756811185295e-05, "loss": 3.9624, "step": 9054 }, { "epoch": 3.0163238111101855, "grad_norm": 0.62109375, "learning_rate": 1.227913301787138e-05, "loss": 4.0186, "step": 9055 }, { "epoch": 3.0166569501124343, "grad_norm": 0.69921875, "learning_rate": 1.2278509168890915e-05, "loss": 3.9916, "step": 9056 }, { "epoch": 3.016990089114683, "grad_norm": 0.65234375, "learning_rate": 1.2277885264251168e-05, "loss": 3.9699, "step": 9057 }, { "epoch": 3.0173232281169318, "grad_norm": 0.6328125, "learning_rate": 1.2277261303959411e-05, "loss": 4.0136, "step": 9058 }, { "epoch": 3.0176563671191805, "grad_norm": 0.640625, "learning_rate": 1.2276637288022905e-05, "loss": 3.9901, "step": 9059 }, { "epoch": 3.0179895061214292, "grad_norm": 0.61328125, "learning_rate": 1.2276013216448919e-05, "loss": 4.0274, "step": 9060 }, { "epoch": 3.018322645123678, "grad_norm": 0.63671875, "learning_rate": 1.2275389089244724e-05, "loss": 4.0154, "step": 9061 }, { "epoch": 3.0186557841259267, "grad_norm": 0.62109375, "learning_rate": 1.2274764906417592e-05, "loss": 4.0432, "step": 9062 }, { "epoch": 3.018988923128175, "grad_norm": 0.703125, "learning_rate": 1.227414066797479e-05, "loss": 3.9818, "step": 9063 }, { "epoch": 3.0193220621304238, "grad_norm": 0.6640625, "learning_rate": 1.227351637392359e-05, "loss": 4.0836, "step": 9064 }, { "epoch": 3.0196552011326725, "grad_norm": 0.64453125, "learning_rate": 1.2272892024271262e-05, "loss": 4.0177, "step": 9065 }, { "epoch": 3.0199883401349212, "grad_norm": 0.65625, "learning_rate": 1.2272267619025082e-05, "loss": 3.9701, "step": 9066 }, { "epoch": 3.02032147913717, "grad_norm": 0.62109375, "learning_rate": 1.2271643158192321e-05, "loss": 4.086, "step": 9067 }, { "epoch": 3.0206546181394187, "grad_norm": 0.64453125, "learning_rate": 1.2271018641780251e-05, "loss": 4.0111, "step": 9068 }, { "epoch": 3.0209877571416675, "grad_norm": 0.62890625, "learning_rate": 1.2270394069796149e-05, "loss": 4.0005, "step": 9069 }, { "epoch": 3.021320896143916, "grad_norm": 0.6171875, "learning_rate": 1.2269769442247288e-05, "loss": 3.9834, "step": 9070 }, { "epoch": 3.021654035146165, "grad_norm": 0.67578125, "learning_rate": 1.2269144759140946e-05, "loss": 4.0309, "step": 9071 }, { "epoch": 3.0219871741484132, "grad_norm": 0.6171875, "learning_rate": 1.2268520020484395e-05, "loss": 4.0751, "step": 9072 }, { "epoch": 3.022320313150662, "grad_norm": 0.671875, "learning_rate": 1.2267895226284916e-05, "loss": 4.0891, "step": 9073 }, { "epoch": 3.0226534521529107, "grad_norm": 0.60546875, "learning_rate": 1.2267270376549784e-05, "loss": 4.0217, "step": 9074 }, { "epoch": 3.0229865911551594, "grad_norm": 0.62890625, "learning_rate": 1.226664547128628e-05, "loss": 4.0659, "step": 9075 }, { "epoch": 3.023319730157408, "grad_norm": 0.625, "learning_rate": 1.2266020510501679e-05, "loss": 4.0453, "step": 9076 }, { "epoch": 3.023652869159657, "grad_norm": 0.65234375, "learning_rate": 1.2265395494203263e-05, "loss": 3.9413, "step": 9077 }, { "epoch": 3.0239860081619057, "grad_norm": 0.6328125, "learning_rate": 1.226477042239831e-05, "loss": 4.0329, "step": 9078 }, { "epoch": 3.0243191471641544, "grad_norm": 0.61328125, "learning_rate": 1.2264145295094102e-05, "loss": 4.0246, "step": 9079 }, { "epoch": 3.024652286166403, "grad_norm": 0.62890625, "learning_rate": 1.2263520112297922e-05, "loss": 4.0884, "step": 9080 }, { "epoch": 3.0249854251686514, "grad_norm": 0.6328125, "learning_rate": 1.226289487401705e-05, "loss": 4.0402, "step": 9081 }, { "epoch": 3.0253185641709, "grad_norm": 0.64453125, "learning_rate": 1.2262269580258768e-05, "loss": 4.042, "step": 9082 }, { "epoch": 3.025651703173149, "grad_norm": 0.67578125, "learning_rate": 1.226164423103036e-05, "loss": 3.9619, "step": 9083 }, { "epoch": 3.0259848421753976, "grad_norm": 0.66796875, "learning_rate": 1.2261018826339113e-05, "loss": 3.9605, "step": 9084 }, { "epoch": 3.0263179811776464, "grad_norm": 0.60546875, "learning_rate": 1.2260393366192309e-05, "loss": 4.0208, "step": 9085 }, { "epoch": 3.026651120179895, "grad_norm": 0.6171875, "learning_rate": 1.225976785059723e-05, "loss": 4.0921, "step": 9086 }, { "epoch": 3.026984259182144, "grad_norm": 0.62890625, "learning_rate": 1.2259142279561168e-05, "loss": 4.0261, "step": 9087 }, { "epoch": 3.0273173981843926, "grad_norm": 0.609375, "learning_rate": 1.2258516653091405e-05, "loss": 3.9882, "step": 9088 }, { "epoch": 3.0276505371866413, "grad_norm": 0.640625, "learning_rate": 1.2257890971195232e-05, "loss": 4.0763, "step": 9089 }, { "epoch": 3.0279836761888896, "grad_norm": 0.63671875, "learning_rate": 1.2257265233879935e-05, "loss": 4.0686, "step": 9090 }, { "epoch": 3.0283168151911384, "grad_norm": 0.640625, "learning_rate": 1.2256639441152801e-05, "loss": 4.0068, "step": 9091 }, { "epoch": 3.028649954193387, "grad_norm": 0.64453125, "learning_rate": 1.2256013593021119e-05, "loss": 3.9775, "step": 9092 }, { "epoch": 3.028983093195636, "grad_norm": 0.65234375, "learning_rate": 1.2255387689492182e-05, "loss": 3.9832, "step": 9093 }, { "epoch": 3.0293162321978846, "grad_norm": 0.65234375, "learning_rate": 1.2254761730573278e-05, "loss": 3.9865, "step": 9094 }, { "epoch": 3.0296493712001333, "grad_norm": 0.65625, "learning_rate": 1.2254135716271696e-05, "loss": 3.9625, "step": 9095 }, { "epoch": 3.029982510202382, "grad_norm": 0.62890625, "learning_rate": 1.2253509646594737e-05, "loss": 4.0054, "step": 9096 }, { "epoch": 3.030315649204631, "grad_norm": 0.6640625, "learning_rate": 1.2252883521549681e-05, "loss": 4.0318, "step": 9097 }, { "epoch": 3.030648788206879, "grad_norm": 0.640625, "learning_rate": 1.2252257341143828e-05, "loss": 3.9753, "step": 9098 }, { "epoch": 3.030981927209128, "grad_norm": 0.640625, "learning_rate": 1.2251631105384473e-05, "loss": 3.9955, "step": 9099 }, { "epoch": 3.0313150662113766, "grad_norm": 0.6328125, "learning_rate": 1.2251004814278903e-05, "loss": 3.9705, "step": 9100 }, { "epoch": 3.0316482052136253, "grad_norm": 0.62890625, "learning_rate": 1.225037846783442e-05, "loss": 4.0016, "step": 9101 }, { "epoch": 3.031981344215874, "grad_norm": 0.6640625, "learning_rate": 1.2249752066058317e-05, "loss": 3.9653, "step": 9102 }, { "epoch": 3.032314483218123, "grad_norm": 0.671875, "learning_rate": 1.2249125608957893e-05, "loss": 3.9623, "step": 9103 }, { "epoch": 3.0326476222203715, "grad_norm": 0.66015625, "learning_rate": 1.2248499096540436e-05, "loss": 4.0442, "step": 9104 }, { "epoch": 3.0329807612226203, "grad_norm": 0.640625, "learning_rate": 1.2247872528813254e-05, "loss": 4.1418, "step": 9105 }, { "epoch": 3.033313900224869, "grad_norm": 0.6484375, "learning_rate": 1.224724590578364e-05, "loss": 3.9644, "step": 9106 }, { "epoch": 3.0336470392271173, "grad_norm": 0.6328125, "learning_rate": 1.2246619227458895e-05, "loss": 4.0194, "step": 9107 }, { "epoch": 3.033980178229366, "grad_norm": 0.6171875, "learning_rate": 1.2245992493846316e-05, "loss": 4.0279, "step": 9108 }, { "epoch": 3.034313317231615, "grad_norm": 0.6484375, "learning_rate": 1.2245365704953205e-05, "loss": 3.9885, "step": 9109 }, { "epoch": 3.0346464562338635, "grad_norm": 0.6484375, "learning_rate": 1.224473886078686e-05, "loss": 3.9549, "step": 9110 }, { "epoch": 3.0349795952361123, "grad_norm": 0.6640625, "learning_rate": 1.2244111961354589e-05, "loss": 4.0549, "step": 9111 }, { "epoch": 3.035312734238361, "grad_norm": 0.61328125, "learning_rate": 1.2243485006663686e-05, "loss": 4.0108, "step": 9112 }, { "epoch": 3.0356458732406097, "grad_norm": 0.640625, "learning_rate": 1.2242857996721457e-05, "loss": 3.9813, "step": 9113 }, { "epoch": 3.0359790122428585, "grad_norm": 0.6640625, "learning_rate": 1.2242230931535205e-05, "loss": 3.9749, "step": 9114 }, { "epoch": 3.0363121512451072, "grad_norm": 0.640625, "learning_rate": 1.2241603811112235e-05, "loss": 4.0161, "step": 9115 }, { "epoch": 3.0366452902473555, "grad_norm": 0.64453125, "learning_rate": 1.224097663545985e-05, "loss": 4.0191, "step": 9116 }, { "epoch": 3.0369784292496043, "grad_norm": 0.67578125, "learning_rate": 1.2240349404585358e-05, "loss": 3.9814, "step": 9117 }, { "epoch": 3.037311568251853, "grad_norm": 0.640625, "learning_rate": 1.2239722118496061e-05, "loss": 4.0478, "step": 9118 }, { "epoch": 3.0376447072541017, "grad_norm": 0.66015625, "learning_rate": 1.2239094777199268e-05, "loss": 3.9943, "step": 9119 }, { "epoch": 3.0379778462563505, "grad_norm": 0.671875, "learning_rate": 1.2238467380702287e-05, "loss": 3.9952, "step": 9120 }, { "epoch": 3.038310985258599, "grad_norm": 0.6796875, "learning_rate": 1.2237839929012423e-05, "loss": 4.0612, "step": 9121 }, { "epoch": 3.038644124260848, "grad_norm": 0.68359375, "learning_rate": 1.2237212422136989e-05, "loss": 3.9729, "step": 9122 }, { "epoch": 3.0389772632630967, "grad_norm": 0.6484375, "learning_rate": 1.223658486008329e-05, "loss": 3.984, "step": 9123 }, { "epoch": 3.039310402265345, "grad_norm": 0.6171875, "learning_rate": 1.2235957242858632e-05, "loss": 4.0198, "step": 9124 }, { "epoch": 3.0396435412675937, "grad_norm": 0.6796875, "learning_rate": 1.2235329570470334e-05, "loss": 3.987, "step": 9125 }, { "epoch": 3.0399766802698425, "grad_norm": 0.640625, "learning_rate": 1.2234701842925704e-05, "loss": 4.0891, "step": 9126 }, { "epoch": 3.040309819272091, "grad_norm": 0.66015625, "learning_rate": 1.2234074060232053e-05, "loss": 4.037, "step": 9127 }, { "epoch": 3.04064295827434, "grad_norm": 0.65234375, "learning_rate": 1.2233446222396689e-05, "loss": 3.923, "step": 9128 }, { "epoch": 3.0409760972765887, "grad_norm": 0.66796875, "learning_rate": 1.2232818329426934e-05, "loss": 3.9717, "step": 9129 }, { "epoch": 3.0413092362788374, "grad_norm": 0.6484375, "learning_rate": 1.2232190381330096e-05, "loss": 3.9872, "step": 9130 }, { "epoch": 3.041642375281086, "grad_norm": 0.6484375, "learning_rate": 1.2231562378113486e-05, "loss": 3.9744, "step": 9131 }, { "epoch": 3.041975514283335, "grad_norm": 0.640625, "learning_rate": 1.2230934319784428e-05, "loss": 3.9989, "step": 9132 }, { "epoch": 3.042308653285583, "grad_norm": 0.6484375, "learning_rate": 1.2230306206350228e-05, "loss": 3.9907, "step": 9133 }, { "epoch": 3.042641792287832, "grad_norm": 0.65625, "learning_rate": 1.222967803781821e-05, "loss": 3.9966, "step": 9134 }, { "epoch": 3.0429749312900807, "grad_norm": 0.62890625, "learning_rate": 1.2229049814195687e-05, "loss": 3.9924, "step": 9135 }, { "epoch": 3.0433080702923294, "grad_norm": 0.67578125, "learning_rate": 1.2228421535489974e-05, "loss": 4.0003, "step": 9136 }, { "epoch": 3.043641209294578, "grad_norm": 0.6484375, "learning_rate": 1.2227793201708395e-05, "loss": 3.9967, "step": 9137 }, { "epoch": 3.043974348296827, "grad_norm": 0.61328125, "learning_rate": 1.2227164812858263e-05, "loss": 4.0926, "step": 9138 }, { "epoch": 3.0443074872990756, "grad_norm": 0.640625, "learning_rate": 1.2226536368946902e-05, "loss": 3.9893, "step": 9139 }, { "epoch": 3.0446406263013244, "grad_norm": 0.6328125, "learning_rate": 1.2225907869981628e-05, "loss": 3.9715, "step": 9140 }, { "epoch": 3.044973765303573, "grad_norm": 0.63671875, "learning_rate": 1.2225279315969765e-05, "loss": 4.0628, "step": 9141 }, { "epoch": 3.0453069043058214, "grad_norm": 0.62890625, "learning_rate": 1.2224650706918632e-05, "loss": 3.9945, "step": 9142 }, { "epoch": 3.04564004330807, "grad_norm": 0.6484375, "learning_rate": 1.2224022042835553e-05, "loss": 4.0209, "step": 9143 }, { "epoch": 3.045973182310319, "grad_norm": 0.609375, "learning_rate": 1.2223393323727849e-05, "loss": 4.001, "step": 9144 }, { "epoch": 3.0463063213125676, "grad_norm": 0.62890625, "learning_rate": 1.2222764549602845e-05, "loss": 4.0116, "step": 9145 }, { "epoch": 3.0466394603148164, "grad_norm": 0.6484375, "learning_rate": 1.2222135720467861e-05, "loss": 3.9951, "step": 9146 }, { "epoch": 3.046972599317065, "grad_norm": 0.66796875, "learning_rate": 1.2221506836330225e-05, "loss": 4.0476, "step": 9147 }, { "epoch": 3.047305738319314, "grad_norm": 0.66796875, "learning_rate": 1.2220877897197261e-05, "loss": 4.0089, "step": 9148 }, { "epoch": 3.0476388773215626, "grad_norm": 0.65234375, "learning_rate": 1.2220248903076293e-05, "loss": 4.0295, "step": 9149 }, { "epoch": 3.0479720163238113, "grad_norm": 0.63671875, "learning_rate": 1.2219619853974651e-05, "loss": 4.051, "step": 9150 }, { "epoch": 3.0483051553260596, "grad_norm": 0.65234375, "learning_rate": 1.2218990749899661e-05, "loss": 4.0087, "step": 9151 }, { "epoch": 3.0486382943283084, "grad_norm": 0.640625, "learning_rate": 1.2218361590858652e-05, "loss": 3.9764, "step": 9152 }, { "epoch": 3.048971433330557, "grad_norm": 0.6484375, "learning_rate": 1.2217732376858947e-05, "loss": 3.9847, "step": 9153 }, { "epoch": 3.049304572332806, "grad_norm": 0.65234375, "learning_rate": 1.221710310790788e-05, "loss": 4.005, "step": 9154 }, { "epoch": 3.0496377113350546, "grad_norm": 0.625, "learning_rate": 1.2216473784012778e-05, "loss": 3.9983, "step": 9155 }, { "epoch": 3.0499708503373033, "grad_norm": 0.609375, "learning_rate": 1.2215844405180971e-05, "loss": 3.9781, "step": 9156 }, { "epoch": 3.050303989339552, "grad_norm": 0.640625, "learning_rate": 1.2215214971419796e-05, "loss": 3.976, "step": 9157 }, { "epoch": 3.050637128341801, "grad_norm": 0.625, "learning_rate": 1.2214585482736576e-05, "loss": 4.0091, "step": 9158 }, { "epoch": 3.0509702673440495, "grad_norm": 0.65234375, "learning_rate": 1.2213955939138648e-05, "loss": 3.9163, "step": 9159 }, { "epoch": 3.051303406346298, "grad_norm": 0.63671875, "learning_rate": 1.2213326340633344e-05, "loss": 3.9981, "step": 9160 }, { "epoch": 3.0516365453485466, "grad_norm": 0.625, "learning_rate": 1.2212696687227996e-05, "loss": 3.9702, "step": 9161 }, { "epoch": 3.0519696843507953, "grad_norm": 0.671875, "learning_rate": 1.221206697892994e-05, "loss": 4.0384, "step": 9162 }, { "epoch": 3.052302823353044, "grad_norm": 0.640625, "learning_rate": 1.2211437215746508e-05, "loss": 4.0228, "step": 9163 }, { "epoch": 3.0526359623552928, "grad_norm": 0.66015625, "learning_rate": 1.2210807397685041e-05, "loss": 3.9859, "step": 9164 }, { "epoch": 3.0529691013575415, "grad_norm": 0.6640625, "learning_rate": 1.221017752475287e-05, "loss": 3.9839, "step": 9165 }, { "epoch": 3.0533022403597903, "grad_norm": 0.640625, "learning_rate": 1.2209547596957333e-05, "loss": 4.019, "step": 9166 }, { "epoch": 3.053635379362039, "grad_norm": 0.6484375, "learning_rate": 1.2208917614305767e-05, "loss": 4.0464, "step": 9167 }, { "epoch": 3.0539685183642873, "grad_norm": 0.62109375, "learning_rate": 1.2208287576805511e-05, "loss": 4.0888, "step": 9168 }, { "epoch": 3.054301657366536, "grad_norm": 0.6328125, "learning_rate": 1.2207657484463902e-05, "loss": 4.0411, "step": 9169 }, { "epoch": 3.0546347963687848, "grad_norm": 0.6484375, "learning_rate": 1.220702733728828e-05, "loss": 4.0513, "step": 9170 }, { "epoch": 3.0549679353710335, "grad_norm": 0.671875, "learning_rate": 1.2206397135285985e-05, "loss": 4.0502, "step": 9171 }, { "epoch": 3.0553010743732822, "grad_norm": 0.63671875, "learning_rate": 1.2205766878464356e-05, "loss": 4.0968, "step": 9172 }, { "epoch": 3.055634213375531, "grad_norm": 0.6484375, "learning_rate": 1.2205136566830738e-05, "loss": 4.014, "step": 9173 }, { "epoch": 3.0559673523777797, "grad_norm": 0.6796875, "learning_rate": 1.2204506200392469e-05, "loss": 3.9978, "step": 9174 }, { "epoch": 3.0563004913800285, "grad_norm": 0.6328125, "learning_rate": 1.2203875779156894e-05, "loss": 4.0097, "step": 9175 }, { "epoch": 3.056633630382277, "grad_norm": 0.6328125, "learning_rate": 1.2203245303131354e-05, "loss": 4.0819, "step": 9176 }, { "epoch": 3.0569667693845255, "grad_norm": 0.6484375, "learning_rate": 1.2202614772323194e-05, "loss": 4.0375, "step": 9177 }, { "epoch": 3.0572999083867742, "grad_norm": 0.6484375, "learning_rate": 1.2201984186739756e-05, "loss": 3.9893, "step": 9178 }, { "epoch": 3.057633047389023, "grad_norm": 0.6328125, "learning_rate": 1.2201353546388389e-05, "loss": 4.0764, "step": 9179 }, { "epoch": 3.0579661863912717, "grad_norm": 0.64453125, "learning_rate": 1.2200722851276435e-05, "loss": 3.9699, "step": 9180 }, { "epoch": 3.0582993253935205, "grad_norm": 0.6171875, "learning_rate": 1.2200092101411242e-05, "loss": 4.0819, "step": 9181 }, { "epoch": 3.058632464395769, "grad_norm": 0.6796875, "learning_rate": 1.2199461296800154e-05, "loss": 4.0774, "step": 9182 }, { "epoch": 3.058965603398018, "grad_norm": 0.625, "learning_rate": 1.2198830437450524e-05, "loss": 3.9969, "step": 9183 }, { "epoch": 3.0592987424002667, "grad_norm": 0.640625, "learning_rate": 1.2198199523369695e-05, "loss": 4.0507, "step": 9184 }, { "epoch": 3.0596318814025154, "grad_norm": 0.65234375, "learning_rate": 1.2197568554565021e-05, "loss": 4.0291, "step": 9185 }, { "epoch": 3.0599650204047637, "grad_norm": 0.63671875, "learning_rate": 1.2196937531043845e-05, "loss": 4.0809, "step": 9186 }, { "epoch": 3.0602981594070124, "grad_norm": 0.62109375, "learning_rate": 1.2196306452813522e-05, "loss": 4.0761, "step": 9187 }, { "epoch": 3.060631298409261, "grad_norm": 0.62109375, "learning_rate": 1.21956753198814e-05, "loss": 4.0014, "step": 9188 }, { "epoch": 3.06096443741151, "grad_norm": 0.671875, "learning_rate": 1.2195044132254833e-05, "loss": 4.0084, "step": 9189 }, { "epoch": 3.0612975764137587, "grad_norm": 0.640625, "learning_rate": 1.219441288994117e-05, "loss": 4.0207, "step": 9190 }, { "epoch": 3.0616307154160074, "grad_norm": 0.66015625, "learning_rate": 1.2193781592947764e-05, "loss": 3.9955, "step": 9191 }, { "epoch": 3.061963854418256, "grad_norm": 0.65234375, "learning_rate": 1.219315024128197e-05, "loss": 4.055, "step": 9192 }, { "epoch": 3.062296993420505, "grad_norm": 0.64453125, "learning_rate": 1.2192518834951142e-05, "loss": 3.9645, "step": 9193 }, { "epoch": 3.062630132422753, "grad_norm": 0.65234375, "learning_rate": 1.2191887373962633e-05, "loss": 3.9847, "step": 9194 }, { "epoch": 3.062963271425002, "grad_norm": 0.62109375, "learning_rate": 1.2191255858323798e-05, "loss": 4.003, "step": 9195 }, { "epoch": 3.0632964104272506, "grad_norm": 0.63671875, "learning_rate": 1.2190624288041994e-05, "loss": 4.0654, "step": 9196 }, { "epoch": 3.0636295494294994, "grad_norm": 0.6171875, "learning_rate": 1.2189992663124576e-05, "loss": 3.9768, "step": 9197 }, { "epoch": 3.063962688431748, "grad_norm": 0.63671875, "learning_rate": 1.2189360983578903e-05, "loss": 3.9645, "step": 9198 }, { "epoch": 3.064295827433997, "grad_norm": 0.67578125, "learning_rate": 1.2188729249412333e-05, "loss": 3.9276, "step": 9199 }, { "epoch": 3.0646289664362456, "grad_norm": 0.6796875, "learning_rate": 1.218809746063222e-05, "loss": 4.0286, "step": 9200 }, { "epoch": 3.0649621054384943, "grad_norm": 0.65625, "learning_rate": 1.2187465617245927e-05, "loss": 4.0922, "step": 9201 }, { "epoch": 3.065295244440743, "grad_norm": 0.640625, "learning_rate": 1.2186833719260816e-05, "loss": 4.0002, "step": 9202 }, { "epoch": 3.0656283834429914, "grad_norm": 0.59375, "learning_rate": 1.218620176668424e-05, "loss": 3.972, "step": 9203 }, { "epoch": 3.06596152244524, "grad_norm": 0.63671875, "learning_rate": 1.2185569759523565e-05, "loss": 3.9574, "step": 9204 }, { "epoch": 3.066294661447489, "grad_norm": 0.640625, "learning_rate": 1.2184937697786149e-05, "loss": 4.0558, "step": 9205 }, { "epoch": 3.0666278004497376, "grad_norm": 0.640625, "learning_rate": 1.218430558147936e-05, "loss": 4.0551, "step": 9206 }, { "epoch": 3.0669609394519863, "grad_norm": 0.640625, "learning_rate": 1.2183673410610556e-05, "loss": 4.0446, "step": 9207 }, { "epoch": 3.067294078454235, "grad_norm": 0.63671875, "learning_rate": 1.2183041185187098e-05, "loss": 3.9764, "step": 9208 }, { "epoch": 3.067627217456484, "grad_norm": 0.65625, "learning_rate": 1.2182408905216358e-05, "loss": 3.9949, "step": 9209 }, { "epoch": 3.0679603564587326, "grad_norm": 0.64453125, "learning_rate": 1.2181776570705696e-05, "loss": 4.0105, "step": 9210 }, { "epoch": 3.0682934954609813, "grad_norm": 0.65625, "learning_rate": 1.2181144181662477e-05, "loss": 4.0308, "step": 9211 }, { "epoch": 3.0686266344632296, "grad_norm": 0.6484375, "learning_rate": 1.2180511738094066e-05, "loss": 3.9715, "step": 9212 }, { "epoch": 3.0689597734654783, "grad_norm": 0.63671875, "learning_rate": 1.2179879240007832e-05, "loss": 3.9958, "step": 9213 }, { "epoch": 3.069292912467727, "grad_norm": 0.61328125, "learning_rate": 1.2179246687411143e-05, "loss": 3.9511, "step": 9214 }, { "epoch": 3.069626051469976, "grad_norm": 0.62890625, "learning_rate": 1.2178614080311365e-05, "loss": 4.055, "step": 9215 }, { "epoch": 3.0699591904722245, "grad_norm": 0.65234375, "learning_rate": 1.2177981418715864e-05, "loss": 4.0058, "step": 9216 }, { "epoch": 3.0702923294744733, "grad_norm": 0.62109375, "learning_rate": 1.2177348702632015e-05, "loss": 4.0734, "step": 9217 }, { "epoch": 3.070625468476722, "grad_norm": 0.625, "learning_rate": 1.2176715932067184e-05, "loss": 4.0059, "step": 9218 }, { "epoch": 3.0709586074789708, "grad_norm": 0.625, "learning_rate": 1.217608310702874e-05, "loss": 4.0152, "step": 9219 }, { "epoch": 3.0712917464812195, "grad_norm": 0.6328125, "learning_rate": 1.2175450227524062e-05, "loss": 3.9926, "step": 9220 }, { "epoch": 3.071624885483468, "grad_norm": 0.66796875, "learning_rate": 1.217481729356051e-05, "loss": 3.9862, "step": 9221 }, { "epoch": 3.0719580244857165, "grad_norm": 0.6640625, "learning_rate": 1.2174184305145463e-05, "loss": 4.0514, "step": 9222 }, { "epoch": 3.0722911634879653, "grad_norm": 0.6484375, "learning_rate": 1.2173551262286294e-05, "loss": 3.957, "step": 9223 }, { "epoch": 3.072624302490214, "grad_norm": 0.6640625, "learning_rate": 1.2172918164990377e-05, "loss": 4.0152, "step": 9224 }, { "epoch": 3.0729574414924627, "grad_norm": 0.65234375, "learning_rate": 1.2172285013265084e-05, "loss": 4.0026, "step": 9225 }, { "epoch": 3.0732905804947115, "grad_norm": 0.63671875, "learning_rate": 1.2171651807117792e-05, "loss": 4.0337, "step": 9226 }, { "epoch": 3.0736237194969602, "grad_norm": 0.62109375, "learning_rate": 1.2171018546555873e-05, "loss": 4.017, "step": 9227 }, { "epoch": 3.073956858499209, "grad_norm": 0.6640625, "learning_rate": 1.2170385231586706e-05, "loss": 4.0242, "step": 9228 }, { "epoch": 3.0742899975014577, "grad_norm": 0.63671875, "learning_rate": 1.2169751862217667e-05, "loss": 4.0113, "step": 9229 }, { "epoch": 3.074623136503706, "grad_norm": 0.6796875, "learning_rate": 1.2169118438456135e-05, "loss": 4.0634, "step": 9230 }, { "epoch": 3.0749562755059547, "grad_norm": 0.64453125, "learning_rate": 1.2168484960309484e-05, "loss": 4.0223, "step": 9231 }, { "epoch": 3.0752894145082035, "grad_norm": 0.65234375, "learning_rate": 1.2167851427785096e-05, "loss": 3.9955, "step": 9232 }, { "epoch": 3.075622553510452, "grad_norm": 0.64453125, "learning_rate": 1.2167217840890352e-05, "loss": 4.0246, "step": 9233 }, { "epoch": 3.075955692512701, "grad_norm": 0.63671875, "learning_rate": 1.2166584199632626e-05, "loss": 4.0399, "step": 9234 }, { "epoch": 3.0762888315149497, "grad_norm": 0.65625, "learning_rate": 1.2165950504019301e-05, "loss": 3.9485, "step": 9235 }, { "epoch": 3.0766219705171984, "grad_norm": 0.64453125, "learning_rate": 1.2165316754057763e-05, "loss": 3.9445, "step": 9236 }, { "epoch": 3.076955109519447, "grad_norm": 0.6484375, "learning_rate": 1.2164682949755389e-05, "loss": 4.0355, "step": 9237 }, { "epoch": 3.0772882485216955, "grad_norm": 0.64453125, "learning_rate": 1.216404909111956e-05, "loss": 4.0602, "step": 9238 }, { "epoch": 3.077621387523944, "grad_norm": 0.65625, "learning_rate": 1.2163415178157663e-05, "loss": 4.0025, "step": 9239 }, { "epoch": 3.077954526526193, "grad_norm": 0.66015625, "learning_rate": 1.216278121087708e-05, "loss": 3.998, "step": 9240 }, { "epoch": 3.0782876655284417, "grad_norm": 0.63671875, "learning_rate": 1.2162147189285193e-05, "loss": 3.9755, "step": 9241 }, { "epoch": 3.0786208045306904, "grad_norm": 0.65625, "learning_rate": 1.2161513113389395e-05, "loss": 4.0589, "step": 9242 }, { "epoch": 3.078953943532939, "grad_norm": 0.625, "learning_rate": 1.2160878983197062e-05, "loss": 4.0404, "step": 9243 }, { "epoch": 3.079287082535188, "grad_norm": 0.66015625, "learning_rate": 1.2160244798715585e-05, "loss": 4.0136, "step": 9244 }, { "epoch": 3.0796202215374366, "grad_norm": 0.609375, "learning_rate": 1.215961055995235e-05, "loss": 4.037, "step": 9245 }, { "epoch": 3.0799533605396854, "grad_norm": 0.67578125, "learning_rate": 1.2158976266914743e-05, "loss": 4.0251, "step": 9246 }, { "epoch": 3.0802864995419337, "grad_norm": 0.64453125, "learning_rate": 1.2158341919610156e-05, "loss": 4.0468, "step": 9247 }, { "epoch": 3.0806196385441824, "grad_norm": 0.69140625, "learning_rate": 1.2157707518045977e-05, "loss": 3.9321, "step": 9248 }, { "epoch": 3.080952777546431, "grad_norm": 0.65234375, "learning_rate": 1.2157073062229592e-05, "loss": 4.0138, "step": 9249 }, { "epoch": 3.08128591654868, "grad_norm": 0.625, "learning_rate": 1.2156438552168395e-05, "loss": 3.9987, "step": 9250 }, { "epoch": 3.0816190555509286, "grad_norm": 0.6171875, "learning_rate": 1.215580398786977e-05, "loss": 4.0341, "step": 9251 }, { "epoch": 3.0819521945531774, "grad_norm": 0.60546875, "learning_rate": 1.2155169369341116e-05, "loss": 3.9754, "step": 9252 }, { "epoch": 3.082285333555426, "grad_norm": 0.6171875, "learning_rate": 1.2154534696589821e-05, "loss": 3.9975, "step": 9253 }, { "epoch": 3.082618472557675, "grad_norm": 0.69140625, "learning_rate": 1.215389996962328e-05, "loss": 3.973, "step": 9254 }, { "epoch": 3.0829516115599236, "grad_norm": 0.65234375, "learning_rate": 1.2153265188448885e-05, "loss": 3.9511, "step": 9255 }, { "epoch": 3.083284750562172, "grad_norm": 0.66796875, "learning_rate": 1.2152630353074029e-05, "loss": 4.032, "step": 9256 }, { "epoch": 3.0836178895644206, "grad_norm": 0.6328125, "learning_rate": 1.2151995463506106e-05, "loss": 3.971, "step": 9257 }, { "epoch": 3.0839510285666694, "grad_norm": 0.65625, "learning_rate": 1.2151360519752512e-05, "loss": 4.0523, "step": 9258 }, { "epoch": 3.084284167568918, "grad_norm": 0.62109375, "learning_rate": 1.2150725521820645e-05, "loss": 4.0309, "step": 9259 }, { "epoch": 3.084617306571167, "grad_norm": 0.62109375, "learning_rate": 1.2150090469717898e-05, "loss": 3.9938, "step": 9260 }, { "epoch": 3.0849504455734156, "grad_norm": 0.68359375, "learning_rate": 1.2149455363451666e-05, "loss": 4.0347, "step": 9261 }, { "epoch": 3.0852835845756643, "grad_norm": 0.609375, "learning_rate": 1.2148820203029353e-05, "loss": 4.0495, "step": 9262 }, { "epoch": 3.085616723577913, "grad_norm": 0.64453125, "learning_rate": 1.2148184988458353e-05, "loss": 4.0091, "step": 9263 }, { "epoch": 3.0859498625801614, "grad_norm": 0.625, "learning_rate": 1.2147549719746066e-05, "loss": 3.962, "step": 9264 }, { "epoch": 3.08628300158241, "grad_norm": 0.625, "learning_rate": 1.2146914396899894e-05, "loss": 4.0154, "step": 9265 }, { "epoch": 3.086616140584659, "grad_norm": 0.66796875, "learning_rate": 1.2146279019927231e-05, "loss": 3.9959, "step": 9266 }, { "epoch": 3.0869492795869076, "grad_norm": 0.65234375, "learning_rate": 1.2145643588835484e-05, "loss": 4.0731, "step": 9267 }, { "epoch": 3.0872824185891563, "grad_norm": 0.66015625, "learning_rate": 1.2145008103632047e-05, "loss": 3.9972, "step": 9268 }, { "epoch": 3.087615557591405, "grad_norm": 0.6796875, "learning_rate": 1.2144372564324329e-05, "loss": 4.0106, "step": 9269 }, { "epoch": 3.087948696593654, "grad_norm": 0.640625, "learning_rate": 1.2143736970919732e-05, "loss": 3.9674, "step": 9270 }, { "epoch": 3.0882818355959025, "grad_norm": 0.64453125, "learning_rate": 1.2143101323425657e-05, "loss": 4.0426, "step": 9271 }, { "epoch": 3.0886149745981513, "grad_norm": 0.6328125, "learning_rate": 1.214246562184951e-05, "loss": 4.0045, "step": 9272 }, { "epoch": 3.0889481136003996, "grad_norm": 0.66015625, "learning_rate": 1.214182986619869e-05, "loss": 4.0283, "step": 9273 }, { "epoch": 3.0892812526026483, "grad_norm": 0.68359375, "learning_rate": 1.2141194056480608e-05, "loss": 3.9717, "step": 9274 }, { "epoch": 3.089614391604897, "grad_norm": 0.640625, "learning_rate": 1.2140558192702669e-05, "loss": 3.9816, "step": 9275 }, { "epoch": 3.0899475306071458, "grad_norm": 0.6171875, "learning_rate": 1.2139922274872277e-05, "loss": 4.0117, "step": 9276 }, { "epoch": 3.0902806696093945, "grad_norm": 0.62109375, "learning_rate": 1.213928630299684e-05, "loss": 4.0349, "step": 9277 }, { "epoch": 3.0906138086116433, "grad_norm": 0.61328125, "learning_rate": 1.2138650277083768e-05, "loss": 4.0815, "step": 9278 }, { "epoch": 3.090946947613892, "grad_norm": 0.6640625, "learning_rate": 1.2138014197140469e-05, "loss": 3.9744, "step": 9279 }, { "epoch": 3.0912800866161407, "grad_norm": 0.609375, "learning_rate": 1.213737806317435e-05, "loss": 4.0031, "step": 9280 }, { "epoch": 3.0916132256183895, "grad_norm": 0.62890625, "learning_rate": 1.2136741875192817e-05, "loss": 3.9811, "step": 9281 }, { "epoch": 3.0919463646206378, "grad_norm": 0.62890625, "learning_rate": 1.2136105633203287e-05, "loss": 4.013, "step": 9282 }, { "epoch": 3.0922795036228865, "grad_norm": 0.609375, "learning_rate": 1.213546933721317e-05, "loss": 4.0192, "step": 9283 }, { "epoch": 3.0926126426251352, "grad_norm": 0.65234375, "learning_rate": 1.2134832987229872e-05, "loss": 4.0783, "step": 9284 }, { "epoch": 3.092945781627384, "grad_norm": 0.6015625, "learning_rate": 1.2134196583260813e-05, "loss": 3.9933, "step": 9285 }, { "epoch": 3.0932789206296327, "grad_norm": 0.63671875, "learning_rate": 1.2133560125313397e-05, "loss": 3.9858, "step": 9286 }, { "epoch": 3.0936120596318815, "grad_norm": 0.6328125, "learning_rate": 1.2132923613395045e-05, "loss": 4.0931, "step": 9287 }, { "epoch": 3.09394519863413, "grad_norm": 0.640625, "learning_rate": 1.2132287047513166e-05, "loss": 3.9379, "step": 9288 }, { "epoch": 3.094278337636379, "grad_norm": 0.63671875, "learning_rate": 1.2131650427675178e-05, "loss": 4.0329, "step": 9289 }, { "epoch": 3.0946114766386277, "grad_norm": 0.65625, "learning_rate": 1.2131013753888495e-05, "loss": 3.999, "step": 9290 }, { "epoch": 3.094944615640876, "grad_norm": 0.625, "learning_rate": 1.213037702616053e-05, "loss": 4.0385, "step": 9291 }, { "epoch": 3.0952777546431247, "grad_norm": 0.6875, "learning_rate": 1.2129740244498702e-05, "loss": 4.0206, "step": 9292 }, { "epoch": 3.0956108936453735, "grad_norm": 0.625, "learning_rate": 1.2129103408910431e-05, "loss": 4.0367, "step": 9293 }, { "epoch": 3.095944032647622, "grad_norm": 0.70703125, "learning_rate": 1.2128466519403129e-05, "loss": 3.9337, "step": 9294 }, { "epoch": 3.096277171649871, "grad_norm": 0.64453125, "learning_rate": 1.212782957598422e-05, "loss": 4.0312, "step": 9295 }, { "epoch": 3.0966103106521197, "grad_norm": 0.6484375, "learning_rate": 1.2127192578661118e-05, "loss": 3.9836, "step": 9296 }, { "epoch": 3.0969434496543684, "grad_norm": 0.640625, "learning_rate": 1.2126555527441247e-05, "loss": 3.9793, "step": 9297 }, { "epoch": 3.097276588656617, "grad_norm": 0.609375, "learning_rate": 1.2125918422332023e-05, "loss": 4.0644, "step": 9298 }, { "epoch": 3.097609727658866, "grad_norm": 0.6875, "learning_rate": 1.2125281263340872e-05, "loss": 4.0272, "step": 9299 }, { "epoch": 3.097942866661114, "grad_norm": 0.63671875, "learning_rate": 1.2124644050475213e-05, "loss": 4.0197, "step": 9300 }, { "epoch": 3.098276005663363, "grad_norm": 0.66796875, "learning_rate": 1.2124006783742463e-05, "loss": 3.9493, "step": 9301 }, { "epoch": 3.0986091446656117, "grad_norm": 0.64453125, "learning_rate": 1.2123369463150052e-05, "loss": 4.0366, "step": 9302 }, { "epoch": 3.0989422836678604, "grad_norm": 0.62890625, "learning_rate": 1.2122732088705401e-05, "loss": 4.0032, "step": 9303 }, { "epoch": 3.099275422670109, "grad_norm": 0.609375, "learning_rate": 1.2122094660415933e-05, "loss": 4.0619, "step": 9304 }, { "epoch": 3.099608561672358, "grad_norm": 0.63671875, "learning_rate": 1.2121457178289076e-05, "loss": 4.0189, "step": 9305 }, { "epoch": 3.0999417006746066, "grad_norm": 0.67578125, "learning_rate": 1.2120819642332253e-05, "loss": 4.0423, "step": 9306 }, { "epoch": 3.1002748396768554, "grad_norm": 0.66015625, "learning_rate": 1.2120182052552887e-05, "loss": 4.065, "step": 9307 }, { "epoch": 3.1006079786791036, "grad_norm": 0.640625, "learning_rate": 1.2119544408958408e-05, "loss": 4.0018, "step": 9308 }, { "epoch": 3.1009411176813524, "grad_norm": 0.65234375, "learning_rate": 1.2118906711556244e-05, "loss": 3.9908, "step": 9309 }, { "epoch": 3.101274256683601, "grad_norm": 0.625, "learning_rate": 1.2118268960353824e-05, "loss": 3.9755, "step": 9310 }, { "epoch": 3.10160739568585, "grad_norm": 0.63671875, "learning_rate": 1.2117631155358569e-05, "loss": 4.0356, "step": 9311 }, { "epoch": 3.1019405346880986, "grad_norm": 0.67578125, "learning_rate": 1.2116993296577914e-05, "loss": 3.9825, "step": 9312 }, { "epoch": 3.1022736736903473, "grad_norm": 0.66796875, "learning_rate": 1.2116355384019288e-05, "loss": 3.9919, "step": 9313 }, { "epoch": 3.102606812692596, "grad_norm": 0.6328125, "learning_rate": 1.211571741769012e-05, "loss": 3.9865, "step": 9314 }, { "epoch": 3.102939951694845, "grad_norm": 0.640625, "learning_rate": 1.2115079397597843e-05, "loss": 4.0399, "step": 9315 }, { "epoch": 3.1032730906970936, "grad_norm": 0.6484375, "learning_rate": 1.2114441323749886e-05, "loss": 3.9911, "step": 9316 }, { "epoch": 3.103606229699342, "grad_norm": 0.67578125, "learning_rate": 1.2113803196153682e-05, "loss": 4.0071, "step": 9317 }, { "epoch": 3.1039393687015906, "grad_norm": 0.65625, "learning_rate": 1.2113165014816668e-05, "loss": 4.065, "step": 9318 }, { "epoch": 3.1042725077038393, "grad_norm": 0.60546875, "learning_rate": 1.211252677974627e-05, "loss": 4.0305, "step": 9319 }, { "epoch": 3.104605646706088, "grad_norm": 0.65625, "learning_rate": 1.2111888490949926e-05, "loss": 4.0039, "step": 9320 }, { "epoch": 3.104938785708337, "grad_norm": 0.64453125, "learning_rate": 1.2111250148435072e-05, "loss": 4.0109, "step": 9321 }, { "epoch": 3.1052719247105856, "grad_norm": 0.640625, "learning_rate": 1.2110611752209141e-05, "loss": 4.0354, "step": 9322 }, { "epoch": 3.1056050637128343, "grad_norm": 0.62890625, "learning_rate": 1.2109973302279571e-05, "loss": 3.9829, "step": 9323 }, { "epoch": 3.105938202715083, "grad_norm": 0.64453125, "learning_rate": 1.2109334798653796e-05, "loss": 4.0332, "step": 9324 }, { "epoch": 3.1062713417173318, "grad_norm": 0.6171875, "learning_rate": 1.2108696241339252e-05, "loss": 3.9774, "step": 9325 }, { "epoch": 3.10660448071958, "grad_norm": 0.65234375, "learning_rate": 1.2108057630343384e-05, "loss": 4.0158, "step": 9326 }, { "epoch": 3.106937619721829, "grad_norm": 0.62890625, "learning_rate": 1.2107418965673621e-05, "loss": 4.0327, "step": 9327 }, { "epoch": 3.1072707587240775, "grad_norm": 0.640625, "learning_rate": 1.210678024733741e-05, "loss": 3.9799, "step": 9328 }, { "epoch": 3.1076038977263263, "grad_norm": 0.63671875, "learning_rate": 1.2106141475342184e-05, "loss": 4.0994, "step": 9329 }, { "epoch": 3.107937036728575, "grad_norm": 0.72265625, "learning_rate": 1.2105502649695389e-05, "loss": 4.0183, "step": 9330 }, { "epoch": 3.1082701757308238, "grad_norm": 0.625, "learning_rate": 1.2104863770404464e-05, "loss": 4.0863, "step": 9331 }, { "epoch": 3.1086033147330725, "grad_norm": 0.64453125, "learning_rate": 1.210422483747685e-05, "loss": 4.0, "step": 9332 }, { "epoch": 3.1089364537353212, "grad_norm": 0.6796875, "learning_rate": 1.210358585091999e-05, "loss": 4.012, "step": 9333 }, { "epoch": 3.1092695927375695, "grad_norm": 0.62109375, "learning_rate": 1.2102946810741324e-05, "loss": 4.0489, "step": 9334 }, { "epoch": 3.1096027317398183, "grad_norm": 0.6484375, "learning_rate": 1.2102307716948299e-05, "loss": 3.9735, "step": 9335 }, { "epoch": 3.109935870742067, "grad_norm": 0.62109375, "learning_rate": 1.2101668569548358e-05, "loss": 4.0282, "step": 9336 }, { "epoch": 3.1102690097443157, "grad_norm": 0.6484375, "learning_rate": 1.2101029368548944e-05, "loss": 3.9951, "step": 9337 }, { "epoch": 3.1106021487465645, "grad_norm": 0.62890625, "learning_rate": 1.2100390113957505e-05, "loss": 4.0164, "step": 9338 }, { "epoch": 3.1109352877488132, "grad_norm": 0.625, "learning_rate": 1.2099750805781484e-05, "loss": 4.0223, "step": 9339 }, { "epoch": 3.111268426751062, "grad_norm": 0.6640625, "learning_rate": 1.2099111444028331e-05, "loss": 4.0771, "step": 9340 }, { "epoch": 3.1116015657533107, "grad_norm": 0.65625, "learning_rate": 1.209847202870549e-05, "loss": 3.9867, "step": 9341 }, { "epoch": 3.1119347047555594, "grad_norm": 0.6484375, "learning_rate": 1.2097832559820411e-05, "loss": 4.0273, "step": 9342 }, { "epoch": 3.1122678437578077, "grad_norm": 0.62890625, "learning_rate": 1.2097193037380542e-05, "loss": 4.0154, "step": 9343 }, { "epoch": 3.1126009827600565, "grad_norm": 0.68359375, "learning_rate": 1.209655346139333e-05, "loss": 4.0002, "step": 9344 }, { "epoch": 3.112934121762305, "grad_norm": 0.64453125, "learning_rate": 1.2095913831866229e-05, "loss": 4.0472, "step": 9345 }, { "epoch": 3.113267260764554, "grad_norm": 0.69140625, "learning_rate": 1.2095274148806685e-05, "loss": 3.9745, "step": 9346 }, { "epoch": 3.1136003997668027, "grad_norm": 0.62109375, "learning_rate": 1.2094634412222149e-05, "loss": 4.0483, "step": 9347 }, { "epoch": 3.1139335387690514, "grad_norm": 0.625, "learning_rate": 1.2093994622120075e-05, "loss": 3.9754, "step": 9348 }, { "epoch": 3.1142666777713, "grad_norm": 0.62890625, "learning_rate": 1.2093354778507915e-05, "loss": 4.0345, "step": 9349 }, { "epoch": 3.114599816773549, "grad_norm": 0.72265625, "learning_rate": 1.2092714881393122e-05, "loss": 4.0056, "step": 9350 }, { "epoch": 3.1149329557757977, "grad_norm": 0.6953125, "learning_rate": 1.2092074930783146e-05, "loss": 3.9597, "step": 9351 }, { "epoch": 3.115266094778046, "grad_norm": 0.66015625, "learning_rate": 1.2091434926685444e-05, "loss": 4.0262, "step": 9352 }, { "epoch": 3.1155992337802947, "grad_norm": 0.66015625, "learning_rate": 1.2090794869107473e-05, "loss": 4.0241, "step": 9353 }, { "epoch": 3.1159323727825434, "grad_norm": 0.6484375, "learning_rate": 1.2090154758056682e-05, "loss": 4.0338, "step": 9354 }, { "epoch": 3.116265511784792, "grad_norm": 0.6796875, "learning_rate": 1.2089514593540531e-05, "loss": 4.0357, "step": 9355 }, { "epoch": 3.116598650787041, "grad_norm": 0.62890625, "learning_rate": 1.2088874375566476e-05, "loss": 3.9944, "step": 9356 }, { "epoch": 3.1169317897892896, "grad_norm": 0.6328125, "learning_rate": 1.2088234104141974e-05, "loss": 3.9838, "step": 9357 }, { "epoch": 3.1172649287915384, "grad_norm": 0.65234375, "learning_rate": 1.2087593779274484e-05, "loss": 3.9949, "step": 9358 }, { "epoch": 3.117598067793787, "grad_norm": 0.65625, "learning_rate": 1.2086953400971463e-05, "loss": 3.99, "step": 9359 }, { "epoch": 3.117931206796036, "grad_norm": 0.65234375, "learning_rate": 1.2086312969240368e-05, "loss": 4.0096, "step": 9360 }, { "epoch": 3.118264345798284, "grad_norm": 0.66015625, "learning_rate": 1.2085672484088664e-05, "loss": 3.9448, "step": 9361 }, { "epoch": 3.118597484800533, "grad_norm": 0.63671875, "learning_rate": 1.2085031945523804e-05, "loss": 4.0072, "step": 9362 }, { "epoch": 3.1189306238027816, "grad_norm": 0.64453125, "learning_rate": 1.2084391353553258e-05, "loss": 4.0873, "step": 9363 }, { "epoch": 3.1192637628050304, "grad_norm": 0.67578125, "learning_rate": 1.208375070818448e-05, "loss": 4.0317, "step": 9364 }, { "epoch": 3.119596901807279, "grad_norm": 0.70703125, "learning_rate": 1.2083110009424934e-05, "loss": 4.0014, "step": 9365 }, { "epoch": 3.119930040809528, "grad_norm": 0.65625, "learning_rate": 1.2082469257282084e-05, "loss": 3.9993, "step": 9366 }, { "epoch": 3.1202631798117766, "grad_norm": 0.66015625, "learning_rate": 1.2081828451763393e-05, "loss": 3.9584, "step": 9367 }, { "epoch": 3.1205963188140253, "grad_norm": 0.62890625, "learning_rate": 1.2081187592876325e-05, "loss": 3.9719, "step": 9368 }, { "epoch": 3.120929457816274, "grad_norm": 0.6640625, "learning_rate": 1.2080546680628344e-05, "loss": 3.975, "step": 9369 }, { "epoch": 3.1212625968185224, "grad_norm": 0.64453125, "learning_rate": 1.2079905715026914e-05, "loss": 4.0174, "step": 9370 }, { "epoch": 3.121595735820771, "grad_norm": 0.609375, "learning_rate": 1.2079264696079504e-05, "loss": 4.0048, "step": 9371 }, { "epoch": 3.12192887482302, "grad_norm": 0.640625, "learning_rate": 1.2078623623793581e-05, "loss": 4.0563, "step": 9372 }, { "epoch": 3.1222620138252686, "grad_norm": 0.65234375, "learning_rate": 1.2077982498176606e-05, "loss": 3.9751, "step": 9373 }, { "epoch": 3.1225951528275173, "grad_norm": 0.6328125, "learning_rate": 1.2077341319236055e-05, "loss": 4.0447, "step": 9374 }, { "epoch": 3.122928291829766, "grad_norm": 0.65234375, "learning_rate": 1.2076700086979388e-05, "loss": 3.9747, "step": 9375 }, { "epoch": 3.123261430832015, "grad_norm": 0.66796875, "learning_rate": 1.207605880141408e-05, "loss": 3.9864, "step": 9376 }, { "epoch": 3.1235945698342635, "grad_norm": 0.67578125, "learning_rate": 1.20754174625476e-05, "loss": 3.9888, "step": 9377 }, { "epoch": 3.123927708836512, "grad_norm": 0.64453125, "learning_rate": 1.2074776070387416e-05, "loss": 4.0271, "step": 9378 }, { "epoch": 3.1242608478387606, "grad_norm": 0.68359375, "learning_rate": 1.2074134624940998e-05, "loss": 3.9799, "step": 9379 }, { "epoch": 3.1245939868410093, "grad_norm": 0.64453125, "learning_rate": 1.2073493126215822e-05, "loss": 4.0389, "step": 9380 }, { "epoch": 3.124927125843258, "grad_norm": 0.6796875, "learning_rate": 1.2072851574219353e-05, "loss": 4.0767, "step": 9381 }, { "epoch": 3.125260264845507, "grad_norm": 0.69140625, "learning_rate": 1.2072209968959073e-05, "loss": 3.9988, "step": 9382 }, { "epoch": 3.1255934038477555, "grad_norm": 0.6328125, "learning_rate": 1.2071568310442446e-05, "loss": 4.0328, "step": 9383 }, { "epoch": 3.1259265428500043, "grad_norm": 0.68359375, "learning_rate": 1.2070926598676954e-05, "loss": 4.0453, "step": 9384 }, { "epoch": 3.126259681852253, "grad_norm": 0.65234375, "learning_rate": 1.2070284833670064e-05, "loss": 3.9667, "step": 9385 }, { "epoch": 3.1265928208545017, "grad_norm": 0.703125, "learning_rate": 1.2069643015429259e-05, "loss": 4.0738, "step": 9386 }, { "epoch": 3.12692595985675, "grad_norm": 0.66015625, "learning_rate": 1.2069001143962008e-05, "loss": 4.0154, "step": 9387 }, { "epoch": 3.1272590988589988, "grad_norm": 0.6484375, "learning_rate": 1.206835921927579e-05, "loss": 3.9564, "step": 9388 }, { "epoch": 3.1275922378612475, "grad_norm": 0.671875, "learning_rate": 1.2067717241378084e-05, "loss": 3.9812, "step": 9389 }, { "epoch": 3.1279253768634963, "grad_norm": 0.625, "learning_rate": 1.2067075210276365e-05, "loss": 4.0021, "step": 9390 }, { "epoch": 3.128258515865745, "grad_norm": 0.6484375, "learning_rate": 1.206643312597811e-05, "loss": 3.9798, "step": 9391 }, { "epoch": 3.1285916548679937, "grad_norm": 0.64453125, "learning_rate": 1.2065790988490802e-05, "loss": 3.9976, "step": 9392 }, { "epoch": 3.1289247938702425, "grad_norm": 0.62109375, "learning_rate": 1.2065148797821917e-05, "loss": 4.0885, "step": 9393 }, { "epoch": 3.129257932872491, "grad_norm": 0.609375, "learning_rate": 1.206450655397894e-05, "loss": 4.0576, "step": 9394 }, { "epoch": 3.1295910718747395, "grad_norm": 0.62890625, "learning_rate": 1.2063864256969345e-05, "loss": 4.0912, "step": 9395 }, { "epoch": 3.1299242108769882, "grad_norm": 0.62109375, "learning_rate": 1.2063221906800618e-05, "loss": 4.077, "step": 9396 }, { "epoch": 3.130257349879237, "grad_norm": 0.66015625, "learning_rate": 1.2062579503480239e-05, "loss": 3.9635, "step": 9397 }, { "epoch": 3.1305904888814857, "grad_norm": 0.671875, "learning_rate": 1.2061937047015692e-05, "loss": 3.9975, "step": 9398 }, { "epoch": 3.1309236278837345, "grad_norm": 0.6484375, "learning_rate": 1.206129453741446e-05, "loss": 3.9566, "step": 9399 }, { "epoch": 3.131256766885983, "grad_norm": 0.66015625, "learning_rate": 1.2060651974684025e-05, "loss": 4.0803, "step": 9400 }, { "epoch": 3.131589905888232, "grad_norm": 0.66796875, "learning_rate": 1.2060009358831874e-05, "loss": 4.0088, "step": 9401 }, { "epoch": 3.1319230448904807, "grad_norm": 0.6328125, "learning_rate": 1.205936668986549e-05, "loss": 4.0793, "step": 9402 }, { "epoch": 3.1322561838927294, "grad_norm": 0.6484375, "learning_rate": 1.205872396779236e-05, "loss": 4.0397, "step": 9403 }, { "epoch": 3.1325893228949777, "grad_norm": 0.6875, "learning_rate": 1.2058081192619968e-05, "loss": 4.0107, "step": 9404 }, { "epoch": 3.1329224618972265, "grad_norm": 0.6640625, "learning_rate": 1.2057438364355806e-05, "loss": 4.0767, "step": 9405 }, { "epoch": 3.133255600899475, "grad_norm": 0.64453125, "learning_rate": 1.2056795483007357e-05, "loss": 3.9651, "step": 9406 }, { "epoch": 3.133588739901724, "grad_norm": 0.64453125, "learning_rate": 1.2056152548582112e-05, "loss": 4.0256, "step": 9407 }, { "epoch": 3.1339218789039727, "grad_norm": 0.6640625, "learning_rate": 1.2055509561087558e-05, "loss": 4.0204, "step": 9408 }, { "epoch": 3.1342550179062214, "grad_norm": 0.6484375, "learning_rate": 1.2054866520531183e-05, "loss": 4.0281, "step": 9409 }, { "epoch": 3.13458815690847, "grad_norm": 0.6796875, "learning_rate": 1.205422342692048e-05, "loss": 4.0279, "step": 9410 }, { "epoch": 3.134921295910719, "grad_norm": 0.6484375, "learning_rate": 1.2053580280262939e-05, "loss": 4.0182, "step": 9411 }, { "epoch": 3.1352544349129676, "grad_norm": 0.625, "learning_rate": 1.2052937080566053e-05, "loss": 4.0238, "step": 9412 }, { "epoch": 3.135587573915216, "grad_norm": 0.671875, "learning_rate": 1.2052293827837308e-05, "loss": 4.0575, "step": 9413 }, { "epoch": 3.1359207129174647, "grad_norm": 0.67578125, "learning_rate": 1.20516505220842e-05, "loss": 3.9136, "step": 9414 }, { "epoch": 3.1362538519197134, "grad_norm": 0.640625, "learning_rate": 1.2051007163314223e-05, "loss": 4.0099, "step": 9415 }, { "epoch": 3.136586990921962, "grad_norm": 0.6484375, "learning_rate": 1.2050363751534873e-05, "loss": 4.0023, "step": 9416 }, { "epoch": 3.136920129924211, "grad_norm": 0.65625, "learning_rate": 1.2049720286753637e-05, "loss": 4.0207, "step": 9417 }, { "epoch": 3.1372532689264596, "grad_norm": 0.64453125, "learning_rate": 1.2049076768978016e-05, "loss": 4.0264, "step": 9418 }, { "epoch": 3.1375864079287084, "grad_norm": 0.6484375, "learning_rate": 1.2048433198215505e-05, "loss": 4.0786, "step": 9419 }, { "epoch": 3.137919546930957, "grad_norm": 0.6640625, "learning_rate": 1.20477895744736e-05, "loss": 4.0293, "step": 9420 }, { "epoch": 3.138252685933206, "grad_norm": 0.64453125, "learning_rate": 1.2047145897759794e-05, "loss": 4.0718, "step": 9421 }, { "epoch": 3.138585824935454, "grad_norm": 0.63671875, "learning_rate": 1.2046502168081589e-05, "loss": 3.9615, "step": 9422 }, { "epoch": 3.138918963937703, "grad_norm": 0.640625, "learning_rate": 1.204585838544648e-05, "loss": 4.0531, "step": 9423 }, { "epoch": 3.1392521029399516, "grad_norm": 0.640625, "learning_rate": 1.2045214549861969e-05, "loss": 4.1152, "step": 9424 }, { "epoch": 3.1395852419422003, "grad_norm": 0.66796875, "learning_rate": 1.2044570661335552e-05, "loss": 3.9589, "step": 9425 }, { "epoch": 3.139918380944449, "grad_norm": 0.640625, "learning_rate": 1.2043926719874731e-05, "loss": 4.0421, "step": 9426 }, { "epoch": 3.140251519946698, "grad_norm": 0.66015625, "learning_rate": 1.2043282725487005e-05, "loss": 4.0632, "step": 9427 }, { "epoch": 3.1405846589489466, "grad_norm": 0.64453125, "learning_rate": 1.2042638678179876e-05, "loss": 4.0475, "step": 9428 }, { "epoch": 3.1409177979511953, "grad_norm": 0.65234375, "learning_rate": 1.2041994577960847e-05, "loss": 4.0765, "step": 9429 }, { "epoch": 3.141250936953444, "grad_norm": 0.6171875, "learning_rate": 1.2041350424837421e-05, "loss": 3.9833, "step": 9430 }, { "epoch": 3.1415840759556923, "grad_norm": 0.6640625, "learning_rate": 1.2040706218817095e-05, "loss": 3.9739, "step": 9431 }, { "epoch": 3.141917214957941, "grad_norm": 0.61328125, "learning_rate": 1.2040061959907379e-05, "loss": 4.0471, "step": 9432 }, { "epoch": 3.14225035396019, "grad_norm": 0.6328125, "learning_rate": 1.2039417648115774e-05, "loss": 4.0104, "step": 9433 }, { "epoch": 3.1425834929624386, "grad_norm": 0.66015625, "learning_rate": 1.2038773283449786e-05, "loss": 4.0304, "step": 9434 }, { "epoch": 3.1429166319646873, "grad_norm": 0.6328125, "learning_rate": 1.203812886591692e-05, "loss": 3.9903, "step": 9435 }, { "epoch": 3.143249770966936, "grad_norm": 0.6640625, "learning_rate": 1.2037484395524683e-05, "loss": 4.0247, "step": 9436 }, { "epoch": 3.1435829099691848, "grad_norm": 0.67578125, "learning_rate": 1.2036839872280582e-05, "loss": 3.9866, "step": 9437 }, { "epoch": 3.1439160489714335, "grad_norm": 0.6953125, "learning_rate": 1.203619529619212e-05, "loss": 3.9357, "step": 9438 }, { "epoch": 3.1442491879736822, "grad_norm": 0.62109375, "learning_rate": 1.2035550667266811e-05, "loss": 3.9967, "step": 9439 }, { "epoch": 3.1445823269759305, "grad_norm": 0.65234375, "learning_rate": 1.2034905985512159e-05, "loss": 4.025, "step": 9440 }, { "epoch": 3.1449154659781793, "grad_norm": 0.62109375, "learning_rate": 1.2034261250935675e-05, "loss": 4.0704, "step": 9441 }, { "epoch": 3.145248604980428, "grad_norm": 0.625, "learning_rate": 1.2033616463544869e-05, "loss": 3.9757, "step": 9442 }, { "epoch": 3.1455817439826768, "grad_norm": 0.6484375, "learning_rate": 1.2032971623347252e-05, "loss": 4.0177, "step": 9443 }, { "epoch": 3.1459148829849255, "grad_norm": 0.640625, "learning_rate": 1.2032326730350335e-05, "loss": 4.0008, "step": 9444 }, { "epoch": 3.1462480219871742, "grad_norm": 0.64453125, "learning_rate": 1.2031681784561625e-05, "loss": 3.9659, "step": 9445 }, { "epoch": 3.146581160989423, "grad_norm": 0.625, "learning_rate": 1.2031036785988639e-05, "loss": 3.9869, "step": 9446 }, { "epoch": 3.1469142999916717, "grad_norm": 0.71875, "learning_rate": 1.203039173463889e-05, "loss": 3.9858, "step": 9447 }, { "epoch": 3.14724743899392, "grad_norm": 0.64453125, "learning_rate": 1.202974663051989e-05, "loss": 4.0, "step": 9448 }, { "epoch": 3.1475805779961687, "grad_norm": 0.60546875, "learning_rate": 1.2029101473639153e-05, "loss": 4.0284, "step": 9449 }, { "epoch": 3.1479137169984175, "grad_norm": 0.6484375, "learning_rate": 1.2028456264004193e-05, "loss": 3.9901, "step": 9450 }, { "epoch": 3.1482468560006662, "grad_norm": 0.64453125, "learning_rate": 1.2027811001622526e-05, "loss": 4.0104, "step": 9451 }, { "epoch": 3.148579995002915, "grad_norm": 0.6640625, "learning_rate": 1.2027165686501667e-05, "loss": 4.0196, "step": 9452 }, { "epoch": 3.1489131340051637, "grad_norm": 0.65234375, "learning_rate": 1.2026520318649138e-05, "loss": 3.9791, "step": 9453 }, { "epoch": 3.1492462730074124, "grad_norm": 0.640625, "learning_rate": 1.2025874898072447e-05, "loss": 4.0507, "step": 9454 }, { "epoch": 3.149579412009661, "grad_norm": 0.62890625, "learning_rate": 1.202522942477912e-05, "loss": 4.0518, "step": 9455 }, { "epoch": 3.14991255101191, "grad_norm": 0.6484375, "learning_rate": 1.2024583898776667e-05, "loss": 4.0494, "step": 9456 }, { "epoch": 3.150245690014158, "grad_norm": 0.640625, "learning_rate": 1.2023938320072618e-05, "loss": 4.018, "step": 9457 }, { "epoch": 3.150578829016407, "grad_norm": 0.6640625, "learning_rate": 1.2023292688674482e-05, "loss": 4.0181, "step": 9458 }, { "epoch": 3.1509119680186557, "grad_norm": 0.66015625, "learning_rate": 1.2022647004589786e-05, "loss": 3.9869, "step": 9459 }, { "epoch": 3.1512451070209044, "grad_norm": 0.59765625, "learning_rate": 1.2022001267826047e-05, "loss": 4.1181, "step": 9460 }, { "epoch": 3.151578246023153, "grad_norm": 0.66015625, "learning_rate": 1.2021355478390788e-05, "loss": 4.0107, "step": 9461 }, { "epoch": 3.151911385025402, "grad_norm": 0.6328125, "learning_rate": 1.202070963629153e-05, "loss": 4.0055, "step": 9462 }, { "epoch": 3.1522445240276507, "grad_norm": 0.65234375, "learning_rate": 1.2020063741535799e-05, "loss": 3.9999, "step": 9463 }, { "epoch": 3.1525776630298994, "grad_norm": 0.60546875, "learning_rate": 1.2019417794131114e-05, "loss": 4.0761, "step": 9464 }, { "epoch": 3.1529108020321477, "grad_norm": 0.69921875, "learning_rate": 1.2018771794085e-05, "loss": 4.0246, "step": 9465 }, { "epoch": 3.1532439410343964, "grad_norm": 0.65625, "learning_rate": 1.2018125741404987e-05, "loss": 4.0102, "step": 9466 }, { "epoch": 3.153577080036645, "grad_norm": 0.6484375, "learning_rate": 1.2017479636098591e-05, "loss": 4.0421, "step": 9467 }, { "epoch": 3.153910219038894, "grad_norm": 0.6484375, "learning_rate": 1.2016833478173343e-05, "loss": 3.9587, "step": 9468 }, { "epoch": 3.1542433580411426, "grad_norm": 0.62890625, "learning_rate": 1.2016187267636768e-05, "loss": 4.0244, "step": 9469 }, { "epoch": 3.1545764970433914, "grad_norm": 0.625, "learning_rate": 1.2015541004496394e-05, "loss": 3.9842, "step": 9470 }, { "epoch": 3.15490963604564, "grad_norm": 0.66015625, "learning_rate": 1.2014894688759748e-05, "loss": 3.9845, "step": 9471 }, { "epoch": 3.155242775047889, "grad_norm": 0.63671875, "learning_rate": 1.2014248320434359e-05, "loss": 3.9904, "step": 9472 }, { "epoch": 3.1555759140501376, "grad_norm": 0.6484375, "learning_rate": 1.2013601899527752e-05, "loss": 4.0785, "step": 9473 }, { "epoch": 3.155909053052386, "grad_norm": 0.67578125, "learning_rate": 1.201295542604746e-05, "loss": 4.051, "step": 9474 }, { "epoch": 3.1562421920546346, "grad_norm": 0.64453125, "learning_rate": 1.2012308900001016e-05, "loss": 4.0458, "step": 9475 }, { "epoch": 3.1565753310568834, "grad_norm": 0.61328125, "learning_rate": 1.2011662321395944e-05, "loss": 4.076, "step": 9476 }, { "epoch": 3.156908470059132, "grad_norm": 0.609375, "learning_rate": 1.201101569023978e-05, "loss": 4.0471, "step": 9477 }, { "epoch": 3.157241609061381, "grad_norm": 0.64453125, "learning_rate": 1.2010369006540051e-05, "loss": 3.9827, "step": 9478 }, { "epoch": 3.1575747480636296, "grad_norm": 0.66796875, "learning_rate": 1.2009722270304295e-05, "loss": 3.9975, "step": 9479 }, { "epoch": 3.1579078870658783, "grad_norm": 0.625, "learning_rate": 1.2009075481540041e-05, "loss": 4.0632, "step": 9480 }, { "epoch": 3.158241026068127, "grad_norm": 0.64453125, "learning_rate": 1.2008428640254825e-05, "loss": 3.9399, "step": 9481 }, { "epoch": 3.158574165070376, "grad_norm": 0.63671875, "learning_rate": 1.2007781746456182e-05, "loss": 4.0136, "step": 9482 }, { "epoch": 3.158907304072624, "grad_norm": 0.67578125, "learning_rate": 1.2007134800151642e-05, "loss": 4.0222, "step": 9483 }, { "epoch": 3.159240443074873, "grad_norm": 0.6484375, "learning_rate": 1.2006487801348747e-05, "loss": 4.0381, "step": 9484 }, { "epoch": 3.1595735820771216, "grad_norm": 0.66796875, "learning_rate": 1.200584075005503e-05, "loss": 4.0466, "step": 9485 }, { "epoch": 3.1599067210793703, "grad_norm": 0.640625, "learning_rate": 1.2005193646278027e-05, "loss": 4.068, "step": 9486 }, { "epoch": 3.160239860081619, "grad_norm": 0.6484375, "learning_rate": 1.2004546490025276e-05, "loss": 4.0419, "step": 9487 }, { "epoch": 3.160572999083868, "grad_norm": 0.65234375, "learning_rate": 1.2003899281304317e-05, "loss": 4.053, "step": 9488 }, { "epoch": 3.1609061380861165, "grad_norm": 0.625, "learning_rate": 1.2003252020122685e-05, "loss": 4.0336, "step": 9489 }, { "epoch": 3.1612392770883653, "grad_norm": 0.63671875, "learning_rate": 1.2002604706487921e-05, "loss": 4.038, "step": 9490 }, { "epoch": 3.161572416090614, "grad_norm": 0.640625, "learning_rate": 1.2001957340407566e-05, "loss": 3.9861, "step": 9491 }, { "epoch": 3.1619055550928623, "grad_norm": 0.64453125, "learning_rate": 1.2001309921889158e-05, "loss": 4.0973, "step": 9492 }, { "epoch": 3.162238694095111, "grad_norm": 0.62890625, "learning_rate": 1.200066245094024e-05, "loss": 3.9945, "step": 9493 }, { "epoch": 3.16257183309736, "grad_norm": 0.6328125, "learning_rate": 1.2000014927568352e-05, "loss": 4.0194, "step": 9494 }, { "epoch": 3.1629049720996085, "grad_norm": 0.63671875, "learning_rate": 1.1999367351781039e-05, "loss": 4.0103, "step": 9495 }, { "epoch": 3.1632381111018573, "grad_norm": 0.63671875, "learning_rate": 1.199871972358584e-05, "loss": 4.0684, "step": 9496 }, { "epoch": 3.163571250104106, "grad_norm": 0.6328125, "learning_rate": 1.1998072042990301e-05, "loss": 3.9537, "step": 9497 }, { "epoch": 3.1639043891063547, "grad_norm": 0.65234375, "learning_rate": 1.1997424310001965e-05, "loss": 4.0307, "step": 9498 }, { "epoch": 3.1642375281086035, "grad_norm": 0.65234375, "learning_rate": 1.1996776524628378e-05, "loss": 4.0044, "step": 9499 }, { "epoch": 3.164570667110852, "grad_norm": 0.64453125, "learning_rate": 1.1996128686877086e-05, "loss": 4.0404, "step": 9500 }, { "epoch": 3.1649038061131005, "grad_norm": 0.671875, "learning_rate": 1.199548079675563e-05, "loss": 4.0257, "step": 9501 }, { "epoch": 3.1652369451153493, "grad_norm": 0.65625, "learning_rate": 1.1994832854271564e-05, "loss": 4.05, "step": 9502 }, { "epoch": 3.165570084117598, "grad_norm": 0.64453125, "learning_rate": 1.1994184859432426e-05, "loss": 4.0814, "step": 9503 }, { "epoch": 3.1659032231198467, "grad_norm": 0.62109375, "learning_rate": 1.1993536812245773e-05, "loss": 4.005, "step": 9504 }, { "epoch": 3.1662363621220955, "grad_norm": 0.6640625, "learning_rate": 1.1992888712719148e-05, "loss": 4.0317, "step": 9505 }, { "epoch": 3.166569501124344, "grad_norm": 0.64453125, "learning_rate": 1.19922405608601e-05, "loss": 4.0614, "step": 9506 }, { "epoch": 3.166902640126593, "grad_norm": 0.703125, "learning_rate": 1.1991592356676179e-05, "loss": 3.9454, "step": 9507 }, { "epoch": 3.1672357791288417, "grad_norm": 0.66015625, "learning_rate": 1.1990944100174939e-05, "loss": 4.0554, "step": 9508 }, { "epoch": 3.1675689181310904, "grad_norm": 0.625, "learning_rate": 1.1990295791363925e-05, "loss": 3.994, "step": 9509 }, { "epoch": 3.1679020571333387, "grad_norm": 0.625, "learning_rate": 1.198964743025069e-05, "loss": 4.1029, "step": 9510 }, { "epoch": 3.1682351961355875, "grad_norm": 0.6953125, "learning_rate": 1.198899901684279e-05, "loss": 4.0001, "step": 9511 }, { "epoch": 3.168568335137836, "grad_norm": 0.6640625, "learning_rate": 1.1988350551147774e-05, "loss": 3.9942, "step": 9512 }, { "epoch": 3.168901474140085, "grad_norm": 0.7109375, "learning_rate": 1.1987702033173194e-05, "loss": 3.9624, "step": 9513 }, { "epoch": 3.1692346131423337, "grad_norm": 0.671875, "learning_rate": 1.1987053462926607e-05, "loss": 4.0068, "step": 9514 }, { "epoch": 3.1695677521445824, "grad_norm": 0.64453125, "learning_rate": 1.1986404840415565e-05, "loss": 4.0233, "step": 9515 }, { "epoch": 3.169900891146831, "grad_norm": 0.671875, "learning_rate": 1.1985756165647626e-05, "loss": 4.0894, "step": 9516 }, { "epoch": 3.17023403014908, "grad_norm": 0.61328125, "learning_rate": 1.1985107438630342e-05, "loss": 4.0823, "step": 9517 }, { "epoch": 3.170567169151328, "grad_norm": 0.625, "learning_rate": 1.1984458659371272e-05, "loss": 3.9809, "step": 9518 }, { "epoch": 3.170900308153577, "grad_norm": 0.64453125, "learning_rate": 1.1983809827877973e-05, "loss": 4.0351, "step": 9519 }, { "epoch": 3.1712334471558257, "grad_norm": 0.62109375, "learning_rate": 1.1983160944158e-05, "loss": 4.0235, "step": 9520 }, { "epoch": 3.1715665861580744, "grad_norm": 0.671875, "learning_rate": 1.1982512008218912e-05, "loss": 4.0278, "step": 9521 }, { "epoch": 3.171899725160323, "grad_norm": 0.69921875, "learning_rate": 1.1981863020068269e-05, "loss": 4.0304, "step": 9522 }, { "epoch": 3.172232864162572, "grad_norm": 0.6640625, "learning_rate": 1.198121397971363e-05, "loss": 4.0385, "step": 9523 }, { "epoch": 3.1725660031648206, "grad_norm": 0.66796875, "learning_rate": 1.1980564887162554e-05, "loss": 4.0184, "step": 9524 }, { "epoch": 3.1728991421670694, "grad_norm": 0.6328125, "learning_rate": 1.19799157424226e-05, "loss": 3.9635, "step": 9525 }, { "epoch": 3.173232281169318, "grad_norm": 0.66015625, "learning_rate": 1.1979266545501335e-05, "loss": 3.9968, "step": 9526 }, { "epoch": 3.1735654201715664, "grad_norm": 0.62890625, "learning_rate": 1.1978617296406314e-05, "loss": 4.0112, "step": 9527 }, { "epoch": 3.173898559173815, "grad_norm": 0.65234375, "learning_rate": 1.1977967995145104e-05, "loss": 4.0141, "step": 9528 }, { "epoch": 3.174231698176064, "grad_norm": 0.6640625, "learning_rate": 1.1977318641725265e-05, "loss": 4.0044, "step": 9529 }, { "epoch": 3.1745648371783126, "grad_norm": 0.66015625, "learning_rate": 1.1976669236154361e-05, "loss": 4.0856, "step": 9530 }, { "epoch": 3.1748979761805614, "grad_norm": 0.703125, "learning_rate": 1.1976019778439957e-05, "loss": 4.0487, "step": 9531 }, { "epoch": 3.17523111518281, "grad_norm": 0.65234375, "learning_rate": 1.197537026858962e-05, "loss": 3.9839, "step": 9532 }, { "epoch": 3.175564254185059, "grad_norm": 0.65234375, "learning_rate": 1.1974720706610911e-05, "loss": 4.0772, "step": 9533 }, { "epoch": 3.1758973931873076, "grad_norm": 0.6484375, "learning_rate": 1.1974071092511399e-05, "loss": 3.9341, "step": 9534 }, { "epoch": 3.176230532189556, "grad_norm": 0.6171875, "learning_rate": 1.1973421426298649e-05, "loss": 4.0332, "step": 9535 }, { "epoch": 3.1765636711918046, "grad_norm": 0.6640625, "learning_rate": 1.1972771707980228e-05, "loss": 4.0261, "step": 9536 }, { "epoch": 3.1768968101940533, "grad_norm": 0.640625, "learning_rate": 1.1972121937563704e-05, "loss": 4.0715, "step": 9537 }, { "epoch": 3.177229949196302, "grad_norm": 0.65234375, "learning_rate": 1.1971472115056648e-05, "loss": 3.9795, "step": 9538 }, { "epoch": 3.177563088198551, "grad_norm": 0.6484375, "learning_rate": 1.1970822240466625e-05, "loss": 4.0121, "step": 9539 }, { "epoch": 3.1778962272007996, "grad_norm": 0.6640625, "learning_rate": 1.1970172313801208e-05, "loss": 3.9548, "step": 9540 }, { "epoch": 3.1782293662030483, "grad_norm": 0.67578125, "learning_rate": 1.1969522335067966e-05, "loss": 4.006, "step": 9541 }, { "epoch": 3.178562505205297, "grad_norm": 0.64453125, "learning_rate": 1.196887230427447e-05, "loss": 3.9908, "step": 9542 }, { "epoch": 3.178895644207546, "grad_norm": 0.65625, "learning_rate": 1.1968222221428292e-05, "loss": 3.9572, "step": 9543 }, { "epoch": 3.179228783209794, "grad_norm": 0.6640625, "learning_rate": 1.1967572086537001e-05, "loss": 3.9721, "step": 9544 }, { "epoch": 3.179561922212043, "grad_norm": 0.60546875, "learning_rate": 1.1966921899608174e-05, "loss": 4.0341, "step": 9545 }, { "epoch": 3.1798950612142916, "grad_norm": 0.65625, "learning_rate": 1.196627166064938e-05, "loss": 3.99, "step": 9546 }, { "epoch": 3.1802282002165403, "grad_norm": 0.65625, "learning_rate": 1.1965621369668197e-05, "loss": 4.0939, "step": 9547 }, { "epoch": 3.180561339218789, "grad_norm": 0.63671875, "learning_rate": 1.1964971026672197e-05, "loss": 4.0439, "step": 9548 }, { "epoch": 3.1808944782210378, "grad_norm": 0.64453125, "learning_rate": 1.1964320631668957e-05, "loss": 3.978, "step": 9549 }, { "epoch": 3.1812276172232865, "grad_norm": 0.6875, "learning_rate": 1.1963670184666047e-05, "loss": 4.0708, "step": 9550 }, { "epoch": 3.1815607562255352, "grad_norm": 0.68359375, "learning_rate": 1.1963019685671052e-05, "loss": 3.9329, "step": 9551 }, { "epoch": 3.181893895227784, "grad_norm": 0.69921875, "learning_rate": 1.1962369134691543e-05, "loss": 3.9845, "step": 9552 }, { "epoch": 3.1822270342300323, "grad_norm": 0.63671875, "learning_rate": 1.1961718531735099e-05, "loss": 4.0775, "step": 9553 }, { "epoch": 3.182560173232281, "grad_norm": 0.68359375, "learning_rate": 1.1961067876809299e-05, "loss": 3.9981, "step": 9554 }, { "epoch": 3.1828933122345298, "grad_norm": 0.65625, "learning_rate": 1.1960417169921719e-05, "loss": 4.0216, "step": 9555 }, { "epoch": 3.1832264512367785, "grad_norm": 0.66796875, "learning_rate": 1.1959766411079943e-05, "loss": 4.0456, "step": 9556 }, { "epoch": 3.1835595902390272, "grad_norm": 0.64453125, "learning_rate": 1.1959115600291546e-05, "loss": 3.975, "step": 9557 }, { "epoch": 3.183892729241276, "grad_norm": 0.6640625, "learning_rate": 1.195846473756411e-05, "loss": 3.9769, "step": 9558 }, { "epoch": 3.1842258682435247, "grad_norm": 0.625, "learning_rate": 1.195781382290522e-05, "loss": 4.0083, "step": 9559 }, { "epoch": 3.1845590072457735, "grad_norm": 0.640625, "learning_rate": 1.1957162856322453e-05, "loss": 3.9621, "step": 9560 }, { "epoch": 3.184892146248022, "grad_norm": 0.68359375, "learning_rate": 1.1956511837823392e-05, "loss": 4.1069, "step": 9561 }, { "epoch": 3.1852252852502705, "grad_norm": 0.6796875, "learning_rate": 1.195586076741562e-05, "loss": 4.048, "step": 9562 }, { "epoch": 3.1855584242525192, "grad_norm": 0.67578125, "learning_rate": 1.1955209645106723e-05, "loss": 3.9913, "step": 9563 }, { "epoch": 3.185891563254768, "grad_norm": 0.66796875, "learning_rate": 1.1954558470904285e-05, "loss": 4.0018, "step": 9564 }, { "epoch": 3.1862247022570167, "grad_norm": 0.6875, "learning_rate": 1.195390724481589e-05, "loss": 4.0218, "step": 9565 }, { "epoch": 3.1865578412592654, "grad_norm": 0.69921875, "learning_rate": 1.1953255966849118e-05, "loss": 3.9992, "step": 9566 }, { "epoch": 3.186890980261514, "grad_norm": 0.6640625, "learning_rate": 1.1952604637011564e-05, "loss": 4.0233, "step": 9567 }, { "epoch": 3.187224119263763, "grad_norm": 0.6640625, "learning_rate": 1.1951953255310807e-05, "loss": 3.9663, "step": 9568 }, { "epoch": 3.1875572582660117, "grad_norm": 0.6171875, "learning_rate": 1.1951301821754441e-05, "loss": 4.0418, "step": 9569 }, { "epoch": 3.1878903972682604, "grad_norm": 0.703125, "learning_rate": 1.1950650336350048e-05, "loss": 3.9555, "step": 9570 }, { "epoch": 3.1882235362705087, "grad_norm": 0.6640625, "learning_rate": 1.194999879910522e-05, "loss": 4.0563, "step": 9571 }, { "epoch": 3.1885566752727574, "grad_norm": 0.69140625, "learning_rate": 1.1949347210027543e-05, "loss": 3.9867, "step": 9572 }, { "epoch": 3.188889814275006, "grad_norm": 0.65234375, "learning_rate": 1.1948695569124608e-05, "loss": 4.0286, "step": 9573 }, { "epoch": 3.189222953277255, "grad_norm": 0.6875, "learning_rate": 1.1948043876404007e-05, "loss": 3.9944, "step": 9574 }, { "epoch": 3.1895560922795037, "grad_norm": 0.65234375, "learning_rate": 1.194739213187333e-05, "loss": 3.9338, "step": 9575 }, { "epoch": 3.1898892312817524, "grad_norm": 0.64453125, "learning_rate": 1.1946740335540165e-05, "loss": 3.955, "step": 9576 }, { "epoch": 3.190222370284001, "grad_norm": 0.66796875, "learning_rate": 1.1946088487412107e-05, "loss": 4.0587, "step": 9577 }, { "epoch": 3.19055550928625, "grad_norm": 0.70703125, "learning_rate": 1.1945436587496752e-05, "loss": 4.0188, "step": 9578 }, { "epoch": 3.1908886482884986, "grad_norm": 0.640625, "learning_rate": 1.1944784635801688e-05, "loss": 4.023, "step": 9579 }, { "epoch": 3.191221787290747, "grad_norm": 0.71875, "learning_rate": 1.1944132632334509e-05, "loss": 4.0112, "step": 9580 }, { "epoch": 3.1915549262929956, "grad_norm": 0.6640625, "learning_rate": 1.194348057710281e-05, "loss": 4.0831, "step": 9581 }, { "epoch": 3.1918880652952444, "grad_norm": 0.65625, "learning_rate": 1.1942828470114189e-05, "loss": 4.055, "step": 9582 }, { "epoch": 3.192221204297493, "grad_norm": 0.68359375, "learning_rate": 1.194217631137624e-05, "loss": 4.0187, "step": 9583 }, { "epoch": 3.192554343299742, "grad_norm": 0.67578125, "learning_rate": 1.1941524100896555e-05, "loss": 3.9976, "step": 9584 }, { "epoch": 3.1928874823019906, "grad_norm": 0.68359375, "learning_rate": 1.1940871838682737e-05, "loss": 4.0533, "step": 9585 }, { "epoch": 3.1932206213042393, "grad_norm": 0.6328125, "learning_rate": 1.1940219524742383e-05, "loss": 4.0469, "step": 9586 }, { "epoch": 3.193553760306488, "grad_norm": 0.66015625, "learning_rate": 1.1939567159083085e-05, "loss": 4.021, "step": 9587 }, { "epoch": 3.1938868993087364, "grad_norm": 0.65234375, "learning_rate": 1.1938914741712447e-05, "loss": 3.9659, "step": 9588 }, { "epoch": 3.194220038310985, "grad_norm": 0.66015625, "learning_rate": 1.1938262272638069e-05, "loss": 4.0643, "step": 9589 }, { "epoch": 3.194553177313234, "grad_norm": 0.65625, "learning_rate": 1.1937609751867547e-05, "loss": 4.0358, "step": 9590 }, { "epoch": 3.1948863163154826, "grad_norm": 0.66796875, "learning_rate": 1.1936957179408482e-05, "loss": 3.9722, "step": 9591 }, { "epoch": 3.1952194553177313, "grad_norm": 0.6640625, "learning_rate": 1.193630455526848e-05, "loss": 3.9842, "step": 9592 }, { "epoch": 3.19555259431998, "grad_norm": 0.6875, "learning_rate": 1.1935651879455137e-05, "loss": 4.0855, "step": 9593 }, { "epoch": 3.195885733322229, "grad_norm": 0.6484375, "learning_rate": 1.1934999151976056e-05, "loss": 4.0998, "step": 9594 }, { "epoch": 3.1962188723244775, "grad_norm": 0.63671875, "learning_rate": 1.1934346372838842e-05, "loss": 4.0426, "step": 9595 }, { "epoch": 3.1965520113267263, "grad_norm": 0.6328125, "learning_rate": 1.1933693542051098e-05, "loss": 4.0221, "step": 9596 }, { "epoch": 3.1968851503289746, "grad_norm": 0.67578125, "learning_rate": 1.1933040659620428e-05, "loss": 4.0491, "step": 9597 }, { "epoch": 3.1972182893312233, "grad_norm": 0.65625, "learning_rate": 1.1932387725554435e-05, "loss": 4.0094, "step": 9598 }, { "epoch": 3.197551428333472, "grad_norm": 0.65625, "learning_rate": 1.1931734739860724e-05, "loss": 3.9547, "step": 9599 }, { "epoch": 3.197884567335721, "grad_norm": 0.69921875, "learning_rate": 1.1931081702546907e-05, "loss": 4.0356, "step": 9600 }, { "epoch": 3.1982177063379695, "grad_norm": 0.61328125, "learning_rate": 1.1930428613620583e-05, "loss": 4.0058, "step": 9601 }, { "epoch": 3.1985508453402183, "grad_norm": 0.640625, "learning_rate": 1.1929775473089362e-05, "loss": 4.0395, "step": 9602 }, { "epoch": 3.198883984342467, "grad_norm": 0.63671875, "learning_rate": 1.1929122280960853e-05, "loss": 4.0763, "step": 9603 }, { "epoch": 3.1992171233447158, "grad_norm": 0.66015625, "learning_rate": 1.1928469037242661e-05, "loss": 3.9879, "step": 9604 }, { "epoch": 3.199550262346964, "grad_norm": 0.64453125, "learning_rate": 1.1927815741942397e-05, "loss": 3.9649, "step": 9605 }, { "epoch": 3.199883401349213, "grad_norm": 0.62890625, "learning_rate": 1.192716239506767e-05, "loss": 4.0673, "step": 9606 }, { "epoch": 3.2002165403514615, "grad_norm": 0.6171875, "learning_rate": 1.1926508996626091e-05, "loss": 4.0947, "step": 9607 }, { "epoch": 3.2005496793537103, "grad_norm": 0.6796875, "learning_rate": 1.1925855546625271e-05, "loss": 4.0675, "step": 9608 }, { "epoch": 3.200882818355959, "grad_norm": 0.6484375, "learning_rate": 1.1925202045072818e-05, "loss": 3.9897, "step": 9609 }, { "epoch": 3.2012159573582077, "grad_norm": 0.67578125, "learning_rate": 1.1924548491976348e-05, "loss": 3.9831, "step": 9610 }, { "epoch": 3.2015490963604565, "grad_norm": 0.66796875, "learning_rate": 1.192389488734347e-05, "loss": 4.0553, "step": 9611 }, { "epoch": 3.201882235362705, "grad_norm": 0.66796875, "learning_rate": 1.1923241231181802e-05, "loss": 4.0551, "step": 9612 }, { "epoch": 3.202215374364954, "grad_norm": 0.67578125, "learning_rate": 1.1922587523498951e-05, "loss": 3.9825, "step": 9613 }, { "epoch": 3.2025485133672023, "grad_norm": 0.66015625, "learning_rate": 1.1921933764302537e-05, "loss": 4.005, "step": 9614 }, { "epoch": 3.202881652369451, "grad_norm": 0.66015625, "learning_rate": 1.1921279953600171e-05, "loss": 3.9927, "step": 9615 }, { "epoch": 3.2032147913716997, "grad_norm": 0.66796875, "learning_rate": 1.1920626091399471e-05, "loss": 4.0228, "step": 9616 }, { "epoch": 3.2035479303739485, "grad_norm": 0.6640625, "learning_rate": 1.191997217770805e-05, "loss": 4.0575, "step": 9617 }, { "epoch": 3.203881069376197, "grad_norm": 0.71875, "learning_rate": 1.191931821253353e-05, "loss": 3.9943, "step": 9618 }, { "epoch": 3.204214208378446, "grad_norm": 0.703125, "learning_rate": 1.1918664195883523e-05, "loss": 3.9542, "step": 9619 }, { "epoch": 3.2045473473806947, "grad_norm": 0.67578125, "learning_rate": 1.191801012776565e-05, "loss": 3.9466, "step": 9620 }, { "epoch": 3.2048804863829434, "grad_norm": 0.63671875, "learning_rate": 1.1917356008187527e-05, "loss": 4.0556, "step": 9621 }, { "epoch": 3.205213625385192, "grad_norm": 0.66796875, "learning_rate": 1.1916701837156775e-05, "loss": 4.0913, "step": 9622 }, { "epoch": 3.2055467643874405, "grad_norm": 0.66796875, "learning_rate": 1.1916047614681013e-05, "loss": 4.0318, "step": 9623 }, { "epoch": 3.205879903389689, "grad_norm": 0.66796875, "learning_rate": 1.1915393340767862e-05, "loss": 3.9956, "step": 9624 }, { "epoch": 3.206213042391938, "grad_norm": 0.6875, "learning_rate": 1.1914739015424942e-05, "loss": 3.9805, "step": 9625 }, { "epoch": 3.2065461813941867, "grad_norm": 0.63671875, "learning_rate": 1.1914084638659874e-05, "loss": 3.9966, "step": 9626 }, { "epoch": 3.2068793203964354, "grad_norm": 0.65234375, "learning_rate": 1.191343021048028e-05, "loss": 4.004, "step": 9627 }, { "epoch": 3.207212459398684, "grad_norm": 0.6484375, "learning_rate": 1.1912775730893789e-05, "loss": 4.0109, "step": 9628 }, { "epoch": 3.207545598400933, "grad_norm": 0.67578125, "learning_rate": 1.1912121199908014e-05, "loss": 4.0009, "step": 9629 }, { "epoch": 3.2078787374031816, "grad_norm": 0.66015625, "learning_rate": 1.1911466617530583e-05, "loss": 4.0793, "step": 9630 }, { "epoch": 3.2082118764054304, "grad_norm": 0.625, "learning_rate": 1.1910811983769123e-05, "loss": 3.955, "step": 9631 }, { "epoch": 3.2085450154076787, "grad_norm": 0.62890625, "learning_rate": 1.1910157298631258e-05, "loss": 4.0249, "step": 9632 }, { "epoch": 3.2088781544099274, "grad_norm": 0.67578125, "learning_rate": 1.1909502562124612e-05, "loss": 4.0729, "step": 9633 }, { "epoch": 3.209211293412176, "grad_norm": 0.64453125, "learning_rate": 1.190884777425681e-05, "loss": 3.9981, "step": 9634 }, { "epoch": 3.209544432414425, "grad_norm": 0.6484375, "learning_rate": 1.1908192935035485e-05, "loss": 4.0555, "step": 9635 }, { "epoch": 3.2098775714166736, "grad_norm": 0.6875, "learning_rate": 1.1907538044468257e-05, "loss": 3.9424, "step": 9636 }, { "epoch": 3.2102107104189224, "grad_norm": 0.70703125, "learning_rate": 1.190688310256276e-05, "loss": 3.9416, "step": 9637 }, { "epoch": 3.210543849421171, "grad_norm": 0.64453125, "learning_rate": 1.1906228109326618e-05, "loss": 4.0029, "step": 9638 }, { "epoch": 3.21087698842342, "grad_norm": 0.66015625, "learning_rate": 1.1905573064767464e-05, "loss": 4.0317, "step": 9639 }, { "epoch": 3.2112101274256686, "grad_norm": 0.6328125, "learning_rate": 1.1904917968892924e-05, "loss": 4.0303, "step": 9640 }, { "epoch": 3.211543266427917, "grad_norm": 0.671875, "learning_rate": 1.1904262821710633e-05, "loss": 3.9718, "step": 9641 }, { "epoch": 3.2118764054301656, "grad_norm": 0.6796875, "learning_rate": 1.1903607623228221e-05, "loss": 3.9435, "step": 9642 }, { "epoch": 3.2122095444324144, "grad_norm": 0.640625, "learning_rate": 1.1902952373453314e-05, "loss": 4.0045, "step": 9643 }, { "epoch": 3.212542683434663, "grad_norm": 0.62890625, "learning_rate": 1.1902297072393553e-05, "loss": 4.0467, "step": 9644 }, { "epoch": 3.212875822436912, "grad_norm": 0.6953125, "learning_rate": 1.1901641720056564e-05, "loss": 3.9793, "step": 9645 }, { "epoch": 3.2132089614391606, "grad_norm": 0.66015625, "learning_rate": 1.1900986316449985e-05, "loss": 4.0213, "step": 9646 }, { "epoch": 3.2135421004414093, "grad_norm": 0.6484375, "learning_rate": 1.1900330861581445e-05, "loss": 4.0504, "step": 9647 }, { "epoch": 3.213875239443658, "grad_norm": 0.6640625, "learning_rate": 1.1899675355458583e-05, "loss": 4.0189, "step": 9648 }, { "epoch": 3.214208378445907, "grad_norm": 0.6796875, "learning_rate": 1.1899019798089033e-05, "loss": 4.0085, "step": 9649 }, { "epoch": 3.214541517448155, "grad_norm": 0.6640625, "learning_rate": 1.189836418948043e-05, "loss": 4.0199, "step": 9650 }, { "epoch": 3.214874656450404, "grad_norm": 0.62109375, "learning_rate": 1.1897708529640411e-05, "loss": 4.0721, "step": 9651 }, { "epoch": 3.2152077954526526, "grad_norm": 0.640625, "learning_rate": 1.1897052818576615e-05, "loss": 4.0348, "step": 9652 }, { "epoch": 3.2155409344549013, "grad_norm": 0.66015625, "learning_rate": 1.1896397056296675e-05, "loss": 3.9806, "step": 9653 }, { "epoch": 3.21587407345715, "grad_norm": 0.68359375, "learning_rate": 1.1895741242808234e-05, "loss": 4.0556, "step": 9654 }, { "epoch": 3.216207212459399, "grad_norm": 0.703125, "learning_rate": 1.1895085378118928e-05, "loss": 3.9591, "step": 9655 }, { "epoch": 3.2165403514616475, "grad_norm": 0.69140625, "learning_rate": 1.1894429462236396e-05, "loss": 4.0368, "step": 9656 }, { "epoch": 3.2168734904638963, "grad_norm": 0.63671875, "learning_rate": 1.1893773495168279e-05, "loss": 3.9981, "step": 9657 }, { "epoch": 3.2172066294661446, "grad_norm": 0.66015625, "learning_rate": 1.1893117476922219e-05, "loss": 4.0445, "step": 9658 }, { "epoch": 3.2175397684683933, "grad_norm": 0.640625, "learning_rate": 1.1892461407505855e-05, "loss": 4.0264, "step": 9659 }, { "epoch": 3.217872907470642, "grad_norm": 0.61328125, "learning_rate": 1.1891805286926831e-05, "loss": 4.0902, "step": 9660 }, { "epoch": 3.2182060464728908, "grad_norm": 0.6640625, "learning_rate": 1.1891149115192787e-05, "loss": 3.9167, "step": 9661 }, { "epoch": 3.2185391854751395, "grad_norm": 0.71484375, "learning_rate": 1.1890492892311365e-05, "loss": 3.9651, "step": 9662 }, { "epoch": 3.2188723244773882, "grad_norm": 0.671875, "learning_rate": 1.1889836618290215e-05, "loss": 3.9845, "step": 9663 }, { "epoch": 3.219205463479637, "grad_norm": 0.6484375, "learning_rate": 1.1889180293136973e-05, "loss": 4.0228, "step": 9664 }, { "epoch": 3.2195386024818857, "grad_norm": 0.6484375, "learning_rate": 1.188852391685929e-05, "loss": 4.0061, "step": 9665 }, { "epoch": 3.219871741484134, "grad_norm": 0.67578125, "learning_rate": 1.1887867489464807e-05, "loss": 3.9169, "step": 9666 }, { "epoch": 3.2202048804863828, "grad_norm": 0.67578125, "learning_rate": 1.1887211010961173e-05, "loss": 4.0082, "step": 9667 }, { "epoch": 3.2205380194886315, "grad_norm": 0.6484375, "learning_rate": 1.1886554481356034e-05, "loss": 4.1103, "step": 9668 }, { "epoch": 3.2208711584908802, "grad_norm": 0.69140625, "learning_rate": 1.1885897900657037e-05, "loss": 4.0491, "step": 9669 }, { "epoch": 3.221204297493129, "grad_norm": 0.6484375, "learning_rate": 1.1885241268871827e-05, "loss": 4.0474, "step": 9670 }, { "epoch": 3.2215374364953777, "grad_norm": 0.66015625, "learning_rate": 1.1884584586008055e-05, "loss": 3.9968, "step": 9671 }, { "epoch": 3.2218705754976265, "grad_norm": 0.6484375, "learning_rate": 1.1883927852073371e-05, "loss": 3.9467, "step": 9672 }, { "epoch": 3.222203714499875, "grad_norm": 0.62109375, "learning_rate": 1.1883271067075422e-05, "loss": 3.9939, "step": 9673 }, { "epoch": 3.222536853502124, "grad_norm": 0.66015625, "learning_rate": 1.1882614231021859e-05, "loss": 3.9799, "step": 9674 }, { "epoch": 3.2228699925043722, "grad_norm": 0.66796875, "learning_rate": 1.1881957343920332e-05, "loss": 3.9872, "step": 9675 }, { "epoch": 3.223203131506621, "grad_norm": 0.62890625, "learning_rate": 1.1881300405778495e-05, "loss": 4.0607, "step": 9676 }, { "epoch": 3.2235362705088697, "grad_norm": 0.6640625, "learning_rate": 1.1880643416604e-05, "loss": 3.974, "step": 9677 }, { "epoch": 3.2238694095111184, "grad_norm": 0.640625, "learning_rate": 1.1879986376404496e-05, "loss": 4.0044, "step": 9678 }, { "epoch": 3.224202548513367, "grad_norm": 0.6640625, "learning_rate": 1.1879329285187636e-05, "loss": 3.9245, "step": 9679 }, { "epoch": 3.224535687515616, "grad_norm": 0.6484375, "learning_rate": 1.1878672142961076e-05, "loss": 3.9507, "step": 9680 }, { "epoch": 3.2248688265178647, "grad_norm": 0.64453125, "learning_rate": 1.1878014949732474e-05, "loss": 4.0871, "step": 9681 }, { "epoch": 3.2252019655201134, "grad_norm": 0.671875, "learning_rate": 1.1877357705509474e-05, "loss": 4.054, "step": 9682 }, { "epoch": 3.225535104522362, "grad_norm": 0.65234375, "learning_rate": 1.1876700410299742e-05, "loss": 4.028, "step": 9683 }, { "epoch": 3.2258682435246104, "grad_norm": 0.65234375, "learning_rate": 1.1876043064110928e-05, "loss": 3.9576, "step": 9684 }, { "epoch": 3.226201382526859, "grad_norm": 0.6328125, "learning_rate": 1.1875385666950694e-05, "loss": 4.0142, "step": 9685 }, { "epoch": 3.226534521529108, "grad_norm": 0.65625, "learning_rate": 1.1874728218826691e-05, "loss": 3.994, "step": 9686 }, { "epoch": 3.2268676605313567, "grad_norm": 0.6171875, "learning_rate": 1.187407071974658e-05, "loss": 4.0762, "step": 9687 }, { "epoch": 3.2272007995336054, "grad_norm": 0.66796875, "learning_rate": 1.1873413169718021e-05, "loss": 3.978, "step": 9688 }, { "epoch": 3.227533938535854, "grad_norm": 0.6640625, "learning_rate": 1.1872755568748671e-05, "loss": 3.9545, "step": 9689 }, { "epoch": 3.227867077538103, "grad_norm": 0.62890625, "learning_rate": 1.187209791684619e-05, "loss": 4.0168, "step": 9690 }, { "epoch": 3.2282002165403516, "grad_norm": 0.62890625, "learning_rate": 1.1871440214018237e-05, "loss": 4.0418, "step": 9691 }, { "epoch": 3.2285333555426003, "grad_norm": 0.62109375, "learning_rate": 1.1870782460272475e-05, "loss": 4.0002, "step": 9692 }, { "epoch": 3.2288664945448486, "grad_norm": 0.66015625, "learning_rate": 1.1870124655616565e-05, "loss": 4.0092, "step": 9693 }, { "epoch": 3.2291996335470974, "grad_norm": 0.6484375, "learning_rate": 1.1869466800058166e-05, "loss": 3.9568, "step": 9694 }, { "epoch": 3.229532772549346, "grad_norm": 0.62109375, "learning_rate": 1.1868808893604947e-05, "loss": 4.0057, "step": 9695 }, { "epoch": 3.229865911551595, "grad_norm": 0.64453125, "learning_rate": 1.1868150936264564e-05, "loss": 4.0015, "step": 9696 }, { "epoch": 3.2301990505538436, "grad_norm": 0.6484375, "learning_rate": 1.1867492928044684e-05, "loss": 4.0698, "step": 9697 }, { "epoch": 3.2305321895560923, "grad_norm": 0.6484375, "learning_rate": 1.1866834868952973e-05, "loss": 4.0672, "step": 9698 }, { "epoch": 3.230865328558341, "grad_norm": 0.68359375, "learning_rate": 1.1866176758997096e-05, "loss": 3.8826, "step": 9699 }, { "epoch": 3.23119846756059, "grad_norm": 0.69140625, "learning_rate": 1.1865518598184713e-05, "loss": 4.0232, "step": 9700 }, { "epoch": 3.2315316065628386, "grad_norm": 0.671875, "learning_rate": 1.1864860386523497e-05, "loss": 4.0039, "step": 9701 }, { "epoch": 3.231864745565087, "grad_norm": 0.64453125, "learning_rate": 1.186420212402111e-05, "loss": 3.9375, "step": 9702 }, { "epoch": 3.2321978845673356, "grad_norm": 0.625, "learning_rate": 1.1863543810685219e-05, "loss": 4.0069, "step": 9703 }, { "epoch": 3.2325310235695843, "grad_norm": 0.6171875, "learning_rate": 1.1862885446523498e-05, "loss": 4.0205, "step": 9704 }, { "epoch": 3.232864162571833, "grad_norm": 0.69921875, "learning_rate": 1.186222703154361e-05, "loss": 3.958, "step": 9705 }, { "epoch": 3.233197301574082, "grad_norm": 0.6640625, "learning_rate": 1.1861568565753227e-05, "loss": 4.0166, "step": 9706 }, { "epoch": 3.2335304405763305, "grad_norm": 0.66015625, "learning_rate": 1.1860910049160014e-05, "loss": 4.0063, "step": 9707 }, { "epoch": 3.2338635795785793, "grad_norm": 0.671875, "learning_rate": 1.1860251481771649e-05, "loss": 4.0868, "step": 9708 }, { "epoch": 3.234196718580828, "grad_norm": 0.67578125, "learning_rate": 1.1859592863595796e-05, "loss": 4.0105, "step": 9709 }, { "epoch": 3.2345298575830768, "grad_norm": 0.640625, "learning_rate": 1.1858934194640128e-05, "loss": 4.0612, "step": 9710 }, { "epoch": 3.234862996585325, "grad_norm": 0.6640625, "learning_rate": 1.185827547491232e-05, "loss": 3.9348, "step": 9711 }, { "epoch": 3.235196135587574, "grad_norm": 0.640625, "learning_rate": 1.1857616704420041e-05, "loss": 3.9829, "step": 9712 }, { "epoch": 3.2355292745898225, "grad_norm": 0.66015625, "learning_rate": 1.1856957883170968e-05, "loss": 4.0459, "step": 9713 }, { "epoch": 3.2358624135920713, "grad_norm": 0.6640625, "learning_rate": 1.1856299011172772e-05, "loss": 4.0273, "step": 9714 }, { "epoch": 3.23619555259432, "grad_norm": 0.671875, "learning_rate": 1.1855640088433128e-05, "loss": 4.0182, "step": 9715 }, { "epoch": 3.2365286915965688, "grad_norm": 0.6875, "learning_rate": 1.185498111495971e-05, "loss": 4.0252, "step": 9716 }, { "epoch": 3.2368618305988175, "grad_norm": 0.6484375, "learning_rate": 1.1854322090760196e-05, "loss": 3.9994, "step": 9717 }, { "epoch": 3.2371949696010662, "grad_norm": 0.66796875, "learning_rate": 1.1853663015842262e-05, "loss": 3.9472, "step": 9718 }, { "epoch": 3.2375281086033145, "grad_norm": 0.64453125, "learning_rate": 1.1853003890213584e-05, "loss": 4.0713, "step": 9719 }, { "epoch": 3.2378612476055633, "grad_norm": 0.6328125, "learning_rate": 1.1852344713881838e-05, "loss": 4.0752, "step": 9720 }, { "epoch": 3.238194386607812, "grad_norm": 0.70703125, "learning_rate": 1.1851685486854703e-05, "loss": 4.0368, "step": 9721 }, { "epoch": 3.2385275256100607, "grad_norm": 0.64453125, "learning_rate": 1.185102620913986e-05, "loss": 3.9528, "step": 9722 }, { "epoch": 3.2388606646123095, "grad_norm": 0.70703125, "learning_rate": 1.1850366880744985e-05, "loss": 4.0298, "step": 9723 }, { "epoch": 3.239193803614558, "grad_norm": 0.65234375, "learning_rate": 1.1849707501677757e-05, "loss": 3.9738, "step": 9724 }, { "epoch": 3.239526942616807, "grad_norm": 0.65625, "learning_rate": 1.1849048071945858e-05, "loss": 3.9745, "step": 9725 }, { "epoch": 3.2398600816190557, "grad_norm": 0.67578125, "learning_rate": 1.1848388591556974e-05, "loss": 3.9412, "step": 9726 }, { "epoch": 3.2401932206213044, "grad_norm": 0.67578125, "learning_rate": 1.1847729060518777e-05, "loss": 3.9545, "step": 9727 }, { "epoch": 3.2405263596235527, "grad_norm": 0.6484375, "learning_rate": 1.1847069478838954e-05, "loss": 3.9718, "step": 9728 }, { "epoch": 3.2408594986258015, "grad_norm": 0.65234375, "learning_rate": 1.1846409846525185e-05, "loss": 4.0113, "step": 9729 }, { "epoch": 3.24119263762805, "grad_norm": 0.67578125, "learning_rate": 1.184575016358516e-05, "loss": 4.0257, "step": 9730 }, { "epoch": 3.241525776630299, "grad_norm": 0.64453125, "learning_rate": 1.1845090430026556e-05, "loss": 4.0558, "step": 9731 }, { "epoch": 3.2418589156325477, "grad_norm": 0.64453125, "learning_rate": 1.184443064585706e-05, "loss": 3.9887, "step": 9732 }, { "epoch": 3.2421920546347964, "grad_norm": 0.65234375, "learning_rate": 1.1843770811084359e-05, "loss": 3.9858, "step": 9733 }, { "epoch": 3.242525193637045, "grad_norm": 0.65625, "learning_rate": 1.1843110925716134e-05, "loss": 4.008, "step": 9734 }, { "epoch": 3.242858332639294, "grad_norm": 0.66796875, "learning_rate": 1.1842450989760076e-05, "loss": 4.0622, "step": 9735 }, { "epoch": 3.243191471641542, "grad_norm": 0.62109375, "learning_rate": 1.184179100322387e-05, "loss": 4.0101, "step": 9736 }, { "epoch": 3.243524610643791, "grad_norm": 0.6484375, "learning_rate": 1.1841130966115201e-05, "loss": 4.0097, "step": 9737 }, { "epoch": 3.2438577496460397, "grad_norm": 0.671875, "learning_rate": 1.184047087844176e-05, "loss": 4.0095, "step": 9738 }, { "epoch": 3.2441908886482884, "grad_norm": 0.6484375, "learning_rate": 1.1839810740211234e-05, "loss": 4.0458, "step": 9739 }, { "epoch": 3.244524027650537, "grad_norm": 0.6953125, "learning_rate": 1.1839150551431315e-05, "loss": 3.9725, "step": 9740 }, { "epoch": 3.244857166652786, "grad_norm": 0.65625, "learning_rate": 1.1838490312109689e-05, "loss": 3.9785, "step": 9741 }, { "epoch": 3.2451903056550346, "grad_norm": 0.6328125, "learning_rate": 1.1837830022254048e-05, "loss": 4.0327, "step": 9742 }, { "epoch": 3.2455234446572834, "grad_norm": 0.671875, "learning_rate": 1.1837169681872084e-05, "loss": 4.0367, "step": 9743 }, { "epoch": 3.245856583659532, "grad_norm": 0.60546875, "learning_rate": 1.1836509290971488e-05, "loss": 4.0369, "step": 9744 }, { "epoch": 3.2461897226617804, "grad_norm": 0.6796875, "learning_rate": 1.1835848849559952e-05, "loss": 3.9467, "step": 9745 }, { "epoch": 3.246522861664029, "grad_norm": 0.6796875, "learning_rate": 1.1835188357645164e-05, "loss": 4.0307, "step": 9746 }, { "epoch": 3.246856000666278, "grad_norm": 0.65234375, "learning_rate": 1.1834527815234824e-05, "loss": 4.0293, "step": 9747 }, { "epoch": 3.2471891396685266, "grad_norm": 0.69140625, "learning_rate": 1.1833867222336625e-05, "loss": 4.0346, "step": 9748 }, { "epoch": 3.2475222786707754, "grad_norm": 0.63671875, "learning_rate": 1.183320657895826e-05, "loss": 4.0567, "step": 9749 }, { "epoch": 3.247855417673024, "grad_norm": 0.65234375, "learning_rate": 1.1832545885107423e-05, "loss": 3.9915, "step": 9750 }, { "epoch": 3.248188556675273, "grad_norm": 0.68359375, "learning_rate": 1.1831885140791812e-05, "loss": 3.9944, "step": 9751 }, { "epoch": 3.2485216956775216, "grad_norm": 0.62890625, "learning_rate": 1.1831224346019119e-05, "loss": 4.0491, "step": 9752 }, { "epoch": 3.2488548346797703, "grad_norm": 0.67578125, "learning_rate": 1.1830563500797046e-05, "loss": 4.0185, "step": 9753 }, { "epoch": 3.2491879736820186, "grad_norm": 0.69140625, "learning_rate": 1.1829902605133287e-05, "loss": 3.9678, "step": 9754 }, { "epoch": 3.2495211126842674, "grad_norm": 0.67578125, "learning_rate": 1.1829241659035542e-05, "loss": 4.0, "step": 9755 }, { "epoch": 3.249854251686516, "grad_norm": 0.6484375, "learning_rate": 1.1828580662511508e-05, "loss": 4.0341, "step": 9756 }, { "epoch": 3.250187390688765, "grad_norm": 0.640625, "learning_rate": 1.1827919615568885e-05, "loss": 3.9289, "step": 9757 }, { "epoch": 3.2505205296910136, "grad_norm": 0.69140625, "learning_rate": 1.1827258518215375e-05, "loss": 4.0265, "step": 9758 }, { "epoch": 3.2508536686932623, "grad_norm": 0.671875, "learning_rate": 1.1826597370458677e-05, "loss": 4.0004, "step": 9759 }, { "epoch": 3.251186807695511, "grad_norm": 0.6171875, "learning_rate": 1.1825936172306486e-05, "loss": 4.0121, "step": 9760 }, { "epoch": 3.25151994669776, "grad_norm": 0.671875, "learning_rate": 1.1825274923766513e-05, "loss": 4.0033, "step": 9761 }, { "epoch": 3.2518530857000085, "grad_norm": 0.69140625, "learning_rate": 1.1824613624846452e-05, "loss": 4.0648, "step": 9762 }, { "epoch": 3.252186224702257, "grad_norm": 0.65234375, "learning_rate": 1.1823952275554012e-05, "loss": 4.018, "step": 9763 }, { "epoch": 3.2525193637045056, "grad_norm": 0.6875, "learning_rate": 1.1823290875896893e-05, "loss": 3.9593, "step": 9764 }, { "epoch": 3.2528525027067543, "grad_norm": 0.69140625, "learning_rate": 1.1822629425882798e-05, "loss": 4.0265, "step": 9765 }, { "epoch": 3.253185641709003, "grad_norm": 0.6640625, "learning_rate": 1.1821967925519435e-05, "loss": 4.0029, "step": 9766 }, { "epoch": 3.253518780711252, "grad_norm": 0.7109375, "learning_rate": 1.1821306374814506e-05, "loss": 4.02, "step": 9767 }, { "epoch": 3.2538519197135005, "grad_norm": 0.6484375, "learning_rate": 1.182064477377572e-05, "loss": 4.0003, "step": 9768 }, { "epoch": 3.2541850587157493, "grad_norm": 0.640625, "learning_rate": 1.1819983122410778e-05, "loss": 4.0458, "step": 9769 }, { "epoch": 3.254518197717998, "grad_norm": 0.640625, "learning_rate": 1.181932142072739e-05, "loss": 3.9711, "step": 9770 }, { "epoch": 3.2548513367202467, "grad_norm": 0.61328125, "learning_rate": 1.1818659668733264e-05, "loss": 4.05, "step": 9771 }, { "epoch": 3.255184475722495, "grad_norm": 0.66796875, "learning_rate": 1.1817997866436107e-05, "loss": 4.0353, "step": 9772 }, { "epoch": 3.2555176147247438, "grad_norm": 0.6796875, "learning_rate": 1.1817336013843627e-05, "loss": 4.0284, "step": 9773 }, { "epoch": 3.2558507537269925, "grad_norm": 0.6796875, "learning_rate": 1.1816674110963534e-05, "loss": 3.9811, "step": 9774 }, { "epoch": 3.2561838927292412, "grad_norm": 0.66015625, "learning_rate": 1.1816012157803539e-05, "loss": 4.0112, "step": 9775 }, { "epoch": 3.25651703173149, "grad_norm": 0.66015625, "learning_rate": 1.181535015437135e-05, "loss": 4.0206, "step": 9776 }, { "epoch": 3.2568501707337387, "grad_norm": 0.6875, "learning_rate": 1.1814688100674676e-05, "loss": 3.9601, "step": 9777 }, { "epoch": 3.2571833097359875, "grad_norm": 0.6328125, "learning_rate": 1.1814025996721234e-05, "loss": 4.0068, "step": 9778 }, { "epoch": 3.257516448738236, "grad_norm": 0.64453125, "learning_rate": 1.1813363842518732e-05, "loss": 4.0055, "step": 9779 }, { "epoch": 3.257849587740485, "grad_norm": 0.66796875, "learning_rate": 1.1812701638074885e-05, "loss": 4.0365, "step": 9780 }, { "epoch": 3.2581827267427332, "grad_norm": 0.6796875, "learning_rate": 1.1812039383397405e-05, "loss": 3.9828, "step": 9781 }, { "epoch": 3.258515865744982, "grad_norm": 0.63671875, "learning_rate": 1.1811377078494007e-05, "loss": 4.0662, "step": 9782 }, { "epoch": 3.2588490047472307, "grad_norm": 0.65234375, "learning_rate": 1.1810714723372404e-05, "loss": 4.0331, "step": 9783 }, { "epoch": 3.2591821437494795, "grad_norm": 0.65625, "learning_rate": 1.181005231804031e-05, "loss": 4.0294, "step": 9784 }, { "epoch": 3.259515282751728, "grad_norm": 0.65625, "learning_rate": 1.1809389862505443e-05, "loss": 4.0787, "step": 9785 }, { "epoch": 3.259848421753977, "grad_norm": 0.65625, "learning_rate": 1.1808727356775519e-05, "loss": 4.0421, "step": 9786 }, { "epoch": 3.2601815607562257, "grad_norm": 0.66796875, "learning_rate": 1.1808064800858252e-05, "loss": 4.0129, "step": 9787 }, { "epoch": 3.2605146997584744, "grad_norm": 0.64453125, "learning_rate": 1.180740219476136e-05, "loss": 3.9963, "step": 9788 }, { "epoch": 3.260847838760723, "grad_norm": 0.68359375, "learning_rate": 1.1806739538492567e-05, "loss": 4.0016, "step": 9789 }, { "epoch": 3.2611809777629714, "grad_norm": 0.62890625, "learning_rate": 1.1806076832059585e-05, "loss": 4.0235, "step": 9790 }, { "epoch": 3.26151411676522, "grad_norm": 0.66796875, "learning_rate": 1.1805414075470135e-05, "loss": 4.0143, "step": 9791 }, { "epoch": 3.261847255767469, "grad_norm": 0.6484375, "learning_rate": 1.1804751268731933e-05, "loss": 4.0237, "step": 9792 }, { "epoch": 3.2621803947697177, "grad_norm": 0.62109375, "learning_rate": 1.1804088411852708e-05, "loss": 3.9906, "step": 9793 }, { "epoch": 3.2625135337719664, "grad_norm": 0.640625, "learning_rate": 1.1803425504840174e-05, "loss": 3.9738, "step": 9794 }, { "epoch": 3.262846672774215, "grad_norm": 0.62109375, "learning_rate": 1.1802762547702052e-05, "loss": 4.0335, "step": 9795 }, { "epoch": 3.263179811776464, "grad_norm": 0.68359375, "learning_rate": 1.180209954044607e-05, "loss": 3.9789, "step": 9796 }, { "epoch": 3.263512950778712, "grad_norm": 0.66015625, "learning_rate": 1.1801436483079943e-05, "loss": 3.963, "step": 9797 }, { "epoch": 3.263846089780961, "grad_norm": 0.65625, "learning_rate": 1.1800773375611397e-05, "loss": 4.093, "step": 9798 }, { "epoch": 3.2641792287832097, "grad_norm": 0.66796875, "learning_rate": 1.180011021804816e-05, "loss": 3.9794, "step": 9799 }, { "epoch": 3.2645123677854584, "grad_norm": 0.6640625, "learning_rate": 1.179944701039795e-05, "loss": 4.0396, "step": 9800 }, { "epoch": 3.264845506787707, "grad_norm": 0.67578125, "learning_rate": 1.1798783752668498e-05, "loss": 4.0337, "step": 9801 }, { "epoch": 3.265178645789956, "grad_norm": 0.65234375, "learning_rate": 1.1798120444867526e-05, "loss": 3.9917, "step": 9802 }, { "epoch": 3.2655117847922046, "grad_norm": 0.65234375, "learning_rate": 1.179745708700276e-05, "loss": 4.0036, "step": 9803 }, { "epoch": 3.2658449237944533, "grad_norm": 0.67578125, "learning_rate": 1.1796793679081926e-05, "loss": 3.9771, "step": 9804 }, { "epoch": 3.266178062796702, "grad_norm": 0.6796875, "learning_rate": 1.1796130221112753e-05, "loss": 4.033, "step": 9805 }, { "epoch": 3.2665112017989504, "grad_norm": 0.6640625, "learning_rate": 1.1795466713102969e-05, "loss": 3.9865, "step": 9806 }, { "epoch": 3.266844340801199, "grad_norm": 0.66015625, "learning_rate": 1.17948031550603e-05, "loss": 4.0852, "step": 9807 }, { "epoch": 3.267177479803448, "grad_norm": 0.66015625, "learning_rate": 1.1794139546992479e-05, "loss": 3.9177, "step": 9808 }, { "epoch": 3.2675106188056966, "grad_norm": 0.640625, "learning_rate": 1.1793475888907232e-05, "loss": 4.0122, "step": 9809 }, { "epoch": 3.2678437578079453, "grad_norm": 0.6875, "learning_rate": 1.179281218081229e-05, "loss": 3.9396, "step": 9810 }, { "epoch": 3.268176896810194, "grad_norm": 0.66796875, "learning_rate": 1.1792148422715387e-05, "loss": 4.0779, "step": 9811 }, { "epoch": 3.268510035812443, "grad_norm": 0.6953125, "learning_rate": 1.179148461462425e-05, "loss": 3.9999, "step": 9812 }, { "epoch": 3.2688431748146916, "grad_norm": 0.640625, "learning_rate": 1.179082075654661e-05, "loss": 4.0728, "step": 9813 }, { "epoch": 3.2691763138169403, "grad_norm": 0.6328125, "learning_rate": 1.1790156848490207e-05, "loss": 3.9995, "step": 9814 }, { "epoch": 3.2695094528191886, "grad_norm": 0.67578125, "learning_rate": 1.1789492890462765e-05, "loss": 4.023, "step": 9815 }, { "epoch": 3.2698425918214373, "grad_norm": 0.67578125, "learning_rate": 1.1788828882472026e-05, "loss": 3.9806, "step": 9816 }, { "epoch": 3.270175730823686, "grad_norm": 0.68359375, "learning_rate": 1.1788164824525716e-05, "loss": 3.9577, "step": 9817 }, { "epoch": 3.270508869825935, "grad_norm": 0.6875, "learning_rate": 1.1787500716631575e-05, "loss": 4.0267, "step": 9818 }, { "epoch": 3.2708420088281835, "grad_norm": 0.73046875, "learning_rate": 1.178683655879734e-05, "loss": 4.0549, "step": 9819 }, { "epoch": 3.2711751478304323, "grad_norm": 0.6484375, "learning_rate": 1.1786172351030742e-05, "loss": 4.0201, "step": 9820 }, { "epoch": 3.271508286832681, "grad_norm": 0.6640625, "learning_rate": 1.178550809333952e-05, "loss": 4.0162, "step": 9821 }, { "epoch": 3.2718414258349298, "grad_norm": 0.65234375, "learning_rate": 1.1784843785731411e-05, "loss": 3.9721, "step": 9822 }, { "epoch": 3.2721745648371785, "grad_norm": 0.6875, "learning_rate": 1.1784179428214155e-05, "loss": 4.0636, "step": 9823 }, { "epoch": 3.272507703839427, "grad_norm": 0.69921875, "learning_rate": 1.1783515020795485e-05, "loss": 3.9768, "step": 9824 }, { "epoch": 3.2728408428416755, "grad_norm": 0.62890625, "learning_rate": 1.1782850563483146e-05, "loss": 4.0741, "step": 9825 }, { "epoch": 3.2731739818439243, "grad_norm": 0.6484375, "learning_rate": 1.1782186056284872e-05, "loss": 4.0045, "step": 9826 }, { "epoch": 3.273507120846173, "grad_norm": 0.66796875, "learning_rate": 1.1781521499208406e-05, "loss": 3.9495, "step": 9827 }, { "epoch": 3.2738402598484218, "grad_norm": 0.68359375, "learning_rate": 1.1780856892261488e-05, "loss": 3.9924, "step": 9828 }, { "epoch": 3.2741733988506705, "grad_norm": 0.6953125, "learning_rate": 1.1780192235451862e-05, "loss": 3.9708, "step": 9829 }, { "epoch": 3.2745065378529192, "grad_norm": 0.68359375, "learning_rate": 1.1779527528787267e-05, "loss": 4.0104, "step": 9830 }, { "epoch": 3.274839676855168, "grad_norm": 0.66796875, "learning_rate": 1.1778862772275445e-05, "loss": 4.0541, "step": 9831 }, { "epoch": 3.2751728158574167, "grad_norm": 0.64453125, "learning_rate": 1.1778197965924139e-05, "loss": 4.0153, "step": 9832 }, { "epoch": 3.275505954859665, "grad_norm": 0.6328125, "learning_rate": 1.1777533109741093e-05, "loss": 4.0075, "step": 9833 }, { "epoch": 3.2758390938619137, "grad_norm": 0.640625, "learning_rate": 1.1776868203734053e-05, "loss": 3.9916, "step": 9834 }, { "epoch": 3.2761722328641625, "grad_norm": 0.6640625, "learning_rate": 1.177620324791076e-05, "loss": 4.0233, "step": 9835 }, { "epoch": 3.276505371866411, "grad_norm": 0.69921875, "learning_rate": 1.1775538242278963e-05, "loss": 3.9206, "step": 9836 }, { "epoch": 3.27683851086866, "grad_norm": 0.65625, "learning_rate": 1.1774873186846406e-05, "loss": 4.0215, "step": 9837 }, { "epoch": 3.2771716498709087, "grad_norm": 0.703125, "learning_rate": 1.1774208081620837e-05, "loss": 3.9971, "step": 9838 }, { "epoch": 3.2775047888731574, "grad_norm": 0.65234375, "learning_rate": 1.1773542926609998e-05, "loss": 3.9743, "step": 9839 }, { "epoch": 3.277837927875406, "grad_norm": 0.6484375, "learning_rate": 1.1772877721821643e-05, "loss": 4.1002, "step": 9840 }, { "epoch": 3.278171066877655, "grad_norm": 0.6796875, "learning_rate": 1.177221246726352e-05, "loss": 3.9905, "step": 9841 }, { "epoch": 3.278504205879903, "grad_norm": 0.66015625, "learning_rate": 1.177154716294337e-05, "loss": 4.0232, "step": 9842 }, { "epoch": 3.278837344882152, "grad_norm": 0.64453125, "learning_rate": 1.177088180886895e-05, "loss": 4.0603, "step": 9843 }, { "epoch": 3.2791704838844007, "grad_norm": 0.65625, "learning_rate": 1.177021640504801e-05, "loss": 4.0799, "step": 9844 }, { "epoch": 3.2795036228866494, "grad_norm": 0.6484375, "learning_rate": 1.1769550951488295e-05, "loss": 4.0155, "step": 9845 }, { "epoch": 3.279836761888898, "grad_norm": 0.69140625, "learning_rate": 1.176888544819756e-05, "loss": 4.0021, "step": 9846 }, { "epoch": 3.280169900891147, "grad_norm": 0.66015625, "learning_rate": 1.1768219895183559e-05, "loss": 4.067, "step": 9847 }, { "epoch": 3.2805030398933956, "grad_norm": 0.6796875, "learning_rate": 1.1767554292454035e-05, "loss": 4.038, "step": 9848 }, { "epoch": 3.2808361788956444, "grad_norm": 0.6328125, "learning_rate": 1.1766888640016751e-05, "loss": 4.0368, "step": 9849 }, { "epoch": 3.281169317897893, "grad_norm": 0.66796875, "learning_rate": 1.1766222937879458e-05, "loss": 3.9685, "step": 9850 }, { "epoch": 3.2815024569001414, "grad_norm": 0.6328125, "learning_rate": 1.1765557186049906e-05, "loss": 3.9964, "step": 9851 }, { "epoch": 3.28183559590239, "grad_norm": 0.625, "learning_rate": 1.1764891384535852e-05, "loss": 4.0331, "step": 9852 }, { "epoch": 3.282168734904639, "grad_norm": 0.66796875, "learning_rate": 1.176422553334505e-05, "loss": 4.0366, "step": 9853 }, { "epoch": 3.2825018739068876, "grad_norm": 0.67578125, "learning_rate": 1.176355963248526e-05, "loss": 4.0423, "step": 9854 }, { "epoch": 3.2828350129091364, "grad_norm": 0.609375, "learning_rate": 1.1762893681964232e-05, "loss": 3.9886, "step": 9855 }, { "epoch": 3.283168151911385, "grad_norm": 0.6640625, "learning_rate": 1.176222768178973e-05, "loss": 4.0044, "step": 9856 }, { "epoch": 3.283501290913634, "grad_norm": 0.640625, "learning_rate": 1.1761561631969504e-05, "loss": 4.0614, "step": 9857 }, { "epoch": 3.2838344299158826, "grad_norm": 0.6953125, "learning_rate": 1.1760895532511314e-05, "loss": 4.0073, "step": 9858 }, { "epoch": 3.2841675689181313, "grad_norm": 0.66796875, "learning_rate": 1.1760229383422924e-05, "loss": 4.0198, "step": 9859 }, { "epoch": 3.2845007079203796, "grad_norm": 0.63671875, "learning_rate": 1.1759563184712087e-05, "loss": 4.0235, "step": 9860 }, { "epoch": 3.2848338469226284, "grad_norm": 0.6484375, "learning_rate": 1.1758896936386567e-05, "loss": 3.9803, "step": 9861 }, { "epoch": 3.285166985924877, "grad_norm": 0.671875, "learning_rate": 1.175823063845412e-05, "loss": 4.0014, "step": 9862 }, { "epoch": 3.285500124927126, "grad_norm": 0.6640625, "learning_rate": 1.1757564290922511e-05, "loss": 4.0278, "step": 9863 }, { "epoch": 3.2858332639293746, "grad_norm": 0.66015625, "learning_rate": 1.1756897893799499e-05, "loss": 3.9917, "step": 9864 }, { "epoch": 3.2861664029316233, "grad_norm": 0.66015625, "learning_rate": 1.1756231447092849e-05, "loss": 4.0232, "step": 9865 }, { "epoch": 3.286499541933872, "grad_norm": 0.671875, "learning_rate": 1.175556495081032e-05, "loss": 3.9016, "step": 9866 }, { "epoch": 3.2868326809361204, "grad_norm": 0.6640625, "learning_rate": 1.1754898404959677e-05, "loss": 4.0076, "step": 9867 }, { "epoch": 3.287165819938369, "grad_norm": 0.62109375, "learning_rate": 1.1754231809548685e-05, "loss": 4.0599, "step": 9868 }, { "epoch": 3.287498958940618, "grad_norm": 0.65234375, "learning_rate": 1.1753565164585103e-05, "loss": 3.9884, "step": 9869 }, { "epoch": 3.2878320979428666, "grad_norm": 0.6328125, "learning_rate": 1.1752898470076706e-05, "loss": 4.06, "step": 9870 }, { "epoch": 3.2881652369451153, "grad_norm": 0.640625, "learning_rate": 1.1752231726031249e-05, "loss": 3.9186, "step": 9871 }, { "epoch": 3.288498375947364, "grad_norm": 0.6484375, "learning_rate": 1.1751564932456504e-05, "loss": 4.009, "step": 9872 }, { "epoch": 3.288831514949613, "grad_norm": 0.66796875, "learning_rate": 1.1750898089360237e-05, "loss": 4.0195, "step": 9873 }, { "epoch": 3.2891646539518615, "grad_norm": 0.6328125, "learning_rate": 1.1750231196750214e-05, "loss": 4.0119, "step": 9874 }, { "epoch": 3.2894977929541103, "grad_norm": 0.703125, "learning_rate": 1.1749564254634204e-05, "loss": 4.0003, "step": 9875 }, { "epoch": 3.2898309319563586, "grad_norm": 0.65234375, "learning_rate": 1.1748897263019975e-05, "loss": 3.9932, "step": 9876 }, { "epoch": 3.2901640709586073, "grad_norm": 0.63671875, "learning_rate": 1.1748230221915294e-05, "loss": 4.006, "step": 9877 }, { "epoch": 3.290497209960856, "grad_norm": 0.640625, "learning_rate": 1.1747563131327935e-05, "loss": 4.0695, "step": 9878 }, { "epoch": 3.290830348963105, "grad_norm": 0.67578125, "learning_rate": 1.1746895991265667e-05, "loss": 4.0552, "step": 9879 }, { "epoch": 3.2911634879653535, "grad_norm": 0.6640625, "learning_rate": 1.1746228801736255e-05, "loss": 4.0562, "step": 9880 }, { "epoch": 3.2914966269676023, "grad_norm": 0.6484375, "learning_rate": 1.1745561562747479e-05, "loss": 4.0372, "step": 9881 }, { "epoch": 3.291829765969851, "grad_norm": 0.6953125, "learning_rate": 1.1744894274307104e-05, "loss": 4.0189, "step": 9882 }, { "epoch": 3.2921629049720997, "grad_norm": 0.62109375, "learning_rate": 1.1744226936422905e-05, "loss": 3.9967, "step": 9883 }, { "epoch": 3.2924960439743485, "grad_norm": 0.6484375, "learning_rate": 1.1743559549102659e-05, "loss": 4.0153, "step": 9884 }, { "epoch": 3.2928291829765968, "grad_norm": 0.66796875, "learning_rate": 1.1742892112354132e-05, "loss": 4.0222, "step": 9885 }, { "epoch": 3.2931623219788455, "grad_norm": 0.671875, "learning_rate": 1.1742224626185101e-05, "loss": 4.0096, "step": 9886 }, { "epoch": 3.2934954609810942, "grad_norm": 0.609375, "learning_rate": 1.1741557090603344e-05, "loss": 4.0916, "step": 9887 }, { "epoch": 3.293828599983343, "grad_norm": 0.609375, "learning_rate": 1.1740889505616636e-05, "loss": 4.0553, "step": 9888 }, { "epoch": 3.2941617389855917, "grad_norm": 0.65625, "learning_rate": 1.1740221871232749e-05, "loss": 3.9465, "step": 9889 }, { "epoch": 3.2944948779878405, "grad_norm": 0.6015625, "learning_rate": 1.173955418745946e-05, "loss": 4.0426, "step": 9890 }, { "epoch": 3.294828016990089, "grad_norm": 0.6875, "learning_rate": 1.173888645430455e-05, "loss": 3.9536, "step": 9891 }, { "epoch": 3.295161155992338, "grad_norm": 0.6875, "learning_rate": 1.1738218671775793e-05, "loss": 4.0489, "step": 9892 }, { "epoch": 3.2954942949945867, "grad_norm": 0.66015625, "learning_rate": 1.1737550839880968e-05, "loss": 4.0107, "step": 9893 }, { "epoch": 3.295827433996835, "grad_norm": 0.67578125, "learning_rate": 1.1736882958627856e-05, "loss": 4.006, "step": 9894 }, { "epoch": 3.2961605729990837, "grad_norm": 0.67578125, "learning_rate": 1.1736215028024233e-05, "loss": 3.9554, "step": 9895 }, { "epoch": 3.2964937120013325, "grad_norm": 0.671875, "learning_rate": 1.173554704807788e-05, "loss": 4.0359, "step": 9896 }, { "epoch": 3.296826851003581, "grad_norm": 0.65625, "learning_rate": 1.173487901879658e-05, "loss": 3.9837, "step": 9897 }, { "epoch": 3.29715999000583, "grad_norm": 0.67578125, "learning_rate": 1.173421094018811e-05, "loss": 4.03, "step": 9898 }, { "epoch": 3.2974931290080787, "grad_norm": 0.66796875, "learning_rate": 1.1733542812260256e-05, "loss": 3.9771, "step": 9899 }, { "epoch": 3.2978262680103274, "grad_norm": 0.703125, "learning_rate": 1.1732874635020797e-05, "loss": 4.0465, "step": 9900 }, { "epoch": 3.298159407012576, "grad_norm": 0.62890625, "learning_rate": 1.1732206408477518e-05, "loss": 4.0572, "step": 9901 }, { "epoch": 3.298492546014825, "grad_norm": 0.66796875, "learning_rate": 1.1731538132638199e-05, "loss": 4.0003, "step": 9902 }, { "epoch": 3.298825685017073, "grad_norm": 0.65234375, "learning_rate": 1.1730869807510629e-05, "loss": 4.1049, "step": 9903 }, { "epoch": 3.299158824019322, "grad_norm": 0.6328125, "learning_rate": 1.1730201433102587e-05, "loss": 4.0351, "step": 9904 }, { "epoch": 3.2994919630215707, "grad_norm": 0.68359375, "learning_rate": 1.1729533009421863e-05, "loss": 4.0095, "step": 9905 }, { "epoch": 3.2998251020238194, "grad_norm": 0.7109375, "learning_rate": 1.1728864536476239e-05, "loss": 4.0351, "step": 9906 }, { "epoch": 3.300158241026068, "grad_norm": 0.6796875, "learning_rate": 1.1728196014273503e-05, "loss": 3.9399, "step": 9907 }, { "epoch": 3.300491380028317, "grad_norm": 0.68359375, "learning_rate": 1.1727527442821443e-05, "loss": 4.0196, "step": 9908 }, { "epoch": 3.3008245190305656, "grad_norm": 0.68359375, "learning_rate": 1.1726858822127845e-05, "loss": 4.064, "step": 9909 }, { "epoch": 3.3011576580328144, "grad_norm": 0.67578125, "learning_rate": 1.1726190152200496e-05, "loss": 4.0505, "step": 9910 }, { "epoch": 3.301490797035063, "grad_norm": 0.6171875, "learning_rate": 1.1725521433047189e-05, "loss": 4.0703, "step": 9911 }, { "epoch": 3.3018239360373114, "grad_norm": 0.65234375, "learning_rate": 1.1724852664675704e-05, "loss": 3.9473, "step": 9912 }, { "epoch": 3.30215707503956, "grad_norm": 0.66796875, "learning_rate": 1.172418384709384e-05, "loss": 3.995, "step": 9913 }, { "epoch": 3.302490214041809, "grad_norm": 0.69140625, "learning_rate": 1.1723514980309384e-05, "loss": 3.9882, "step": 9914 }, { "epoch": 3.3028233530440576, "grad_norm": 0.65234375, "learning_rate": 1.1722846064330128e-05, "loss": 4.0608, "step": 9915 }, { "epoch": 3.3031564920463063, "grad_norm": 0.69140625, "learning_rate": 1.1722177099163858e-05, "loss": 4.0186, "step": 9916 }, { "epoch": 3.303489631048555, "grad_norm": 0.671875, "learning_rate": 1.172150808481837e-05, "loss": 3.9504, "step": 9917 }, { "epoch": 3.303822770050804, "grad_norm": 0.66015625, "learning_rate": 1.172083902130146e-05, "loss": 3.9794, "step": 9918 }, { "epoch": 3.3041559090530526, "grad_norm": 0.66796875, "learning_rate": 1.1720169908620916e-05, "loss": 3.992, "step": 9919 }, { "epoch": 3.3044890480553013, "grad_norm": 0.6484375, "learning_rate": 1.1719500746784533e-05, "loss": 3.9938, "step": 9920 }, { "epoch": 3.3048221870575496, "grad_norm": 0.6796875, "learning_rate": 1.1718831535800105e-05, "loss": 3.9346, "step": 9921 }, { "epoch": 3.3051553260597983, "grad_norm": 0.6953125, "learning_rate": 1.171816227567543e-05, "loss": 4.0132, "step": 9922 }, { "epoch": 3.305488465062047, "grad_norm": 0.67578125, "learning_rate": 1.1717492966418301e-05, "loss": 4.0447, "step": 9923 }, { "epoch": 3.305821604064296, "grad_norm": 0.65625, "learning_rate": 1.1716823608036513e-05, "loss": 4.018, "step": 9924 }, { "epoch": 3.3061547430665446, "grad_norm": 0.68359375, "learning_rate": 1.1716154200537865e-05, "loss": 3.9934, "step": 9925 }, { "epoch": 3.3064878820687933, "grad_norm": 0.6484375, "learning_rate": 1.1715484743930148e-05, "loss": 4.0362, "step": 9926 }, { "epoch": 3.306821021071042, "grad_norm": 0.62109375, "learning_rate": 1.171481523822117e-05, "loss": 4.0592, "step": 9927 }, { "epoch": 3.3071541600732908, "grad_norm": 0.65234375, "learning_rate": 1.1714145683418721e-05, "loss": 4.0045, "step": 9928 }, { "epoch": 3.3074872990755395, "grad_norm": 0.63671875, "learning_rate": 1.1713476079530602e-05, "loss": 3.9973, "step": 9929 }, { "epoch": 3.307820438077788, "grad_norm": 0.65234375, "learning_rate": 1.1712806426564615e-05, "loss": 4.0576, "step": 9930 }, { "epoch": 3.3081535770800365, "grad_norm": 0.65234375, "learning_rate": 1.1712136724528555e-05, "loss": 4.0975, "step": 9931 }, { "epoch": 3.3084867160822853, "grad_norm": 0.671875, "learning_rate": 1.1711466973430231e-05, "loss": 4.0557, "step": 9932 }, { "epoch": 3.308819855084534, "grad_norm": 0.63671875, "learning_rate": 1.1710797173277435e-05, "loss": 3.9949, "step": 9933 }, { "epoch": 3.3091529940867828, "grad_norm": 0.6484375, "learning_rate": 1.1710127324077972e-05, "loss": 4.0119, "step": 9934 }, { "epoch": 3.3094861330890315, "grad_norm": 0.6640625, "learning_rate": 1.1709457425839645e-05, "loss": 3.9985, "step": 9935 }, { "epoch": 3.3098192720912802, "grad_norm": 0.65234375, "learning_rate": 1.1708787478570259e-05, "loss": 4.0125, "step": 9936 }, { "epoch": 3.3101524110935285, "grad_norm": 0.6640625, "learning_rate": 1.1708117482277614e-05, "loss": 4.0671, "step": 9937 }, { "epoch": 3.3104855500957773, "grad_norm": 0.6875, "learning_rate": 1.1707447436969514e-05, "loss": 4.0504, "step": 9938 }, { "epoch": 3.310818689098026, "grad_norm": 0.67578125, "learning_rate": 1.1706777342653767e-05, "loss": 4.0452, "step": 9939 }, { "epoch": 3.3111518281002748, "grad_norm": 0.63671875, "learning_rate": 1.1706107199338173e-05, "loss": 4.0437, "step": 9940 }, { "epoch": 3.3114849671025235, "grad_norm": 0.70703125, "learning_rate": 1.1705437007030542e-05, "loss": 4.0103, "step": 9941 }, { "epoch": 3.3118181061047722, "grad_norm": 0.67578125, "learning_rate": 1.170476676573868e-05, "loss": 3.9986, "step": 9942 }, { "epoch": 3.312151245107021, "grad_norm": 0.703125, "learning_rate": 1.1704096475470389e-05, "loss": 3.9603, "step": 9943 }, { "epoch": 3.3124843841092697, "grad_norm": 0.69140625, "learning_rate": 1.1703426136233482e-05, "loss": 4.0301, "step": 9944 }, { "epoch": 3.3128175231115184, "grad_norm": 0.66796875, "learning_rate": 1.1702755748035766e-05, "loss": 3.9644, "step": 9945 }, { "epoch": 3.3131506621137667, "grad_norm": 0.6328125, "learning_rate": 1.170208531088505e-05, "loss": 4.0583, "step": 9946 }, { "epoch": 3.3134838011160155, "grad_norm": 0.65234375, "learning_rate": 1.170141482478914e-05, "loss": 3.9322, "step": 9947 }, { "epoch": 3.313816940118264, "grad_norm": 0.6640625, "learning_rate": 1.1700744289755846e-05, "loss": 3.9808, "step": 9948 }, { "epoch": 3.314150079120513, "grad_norm": 0.65234375, "learning_rate": 1.1700073705792981e-05, "loss": 3.9636, "step": 9949 }, { "epoch": 3.3144832181227617, "grad_norm": 0.64453125, "learning_rate": 1.1699403072908356e-05, "loss": 4.0452, "step": 9950 }, { "epoch": 3.3148163571250104, "grad_norm": 0.640625, "learning_rate": 1.169873239110978e-05, "loss": 3.9029, "step": 9951 }, { "epoch": 3.315149496127259, "grad_norm": 0.70703125, "learning_rate": 1.1698061660405066e-05, "loss": 4.0173, "step": 9952 }, { "epoch": 3.315482635129508, "grad_norm": 0.6484375, "learning_rate": 1.1697390880802027e-05, "loss": 3.9921, "step": 9953 }, { "epoch": 3.3158157741317567, "grad_norm": 0.703125, "learning_rate": 1.1696720052308476e-05, "loss": 4.1111, "step": 9954 }, { "epoch": 3.316148913134005, "grad_norm": 0.65234375, "learning_rate": 1.1696049174932228e-05, "loss": 4.0681, "step": 9955 }, { "epoch": 3.3164820521362537, "grad_norm": 0.66015625, "learning_rate": 1.169537824868109e-05, "loss": 4.023, "step": 9956 }, { "epoch": 3.3168151911385024, "grad_norm": 0.62890625, "learning_rate": 1.1694707273562888e-05, "loss": 4.0846, "step": 9957 }, { "epoch": 3.317148330140751, "grad_norm": 0.69140625, "learning_rate": 1.1694036249585431e-05, "loss": 3.9702, "step": 9958 }, { "epoch": 3.317481469143, "grad_norm": 0.62890625, "learning_rate": 1.1693365176756535e-05, "loss": 4.0942, "step": 9959 }, { "epoch": 3.3178146081452486, "grad_norm": 0.6484375, "learning_rate": 1.1692694055084019e-05, "loss": 4.04, "step": 9960 }, { "epoch": 3.3181477471474974, "grad_norm": 0.66015625, "learning_rate": 1.1692022884575697e-05, "loss": 4.0512, "step": 9961 }, { "epoch": 3.318480886149746, "grad_norm": 0.69921875, "learning_rate": 1.169135166523939e-05, "loss": 4.0015, "step": 9962 }, { "epoch": 3.318814025151995, "grad_norm": 0.671875, "learning_rate": 1.1690680397082911e-05, "loss": 3.9932, "step": 9963 }, { "epoch": 3.319147164154243, "grad_norm": 0.671875, "learning_rate": 1.1690009080114085e-05, "loss": 4.0483, "step": 9964 }, { "epoch": 3.319480303156492, "grad_norm": 0.6640625, "learning_rate": 1.168933771434073e-05, "loss": 4.0197, "step": 9965 }, { "epoch": 3.3198134421587406, "grad_norm": 0.6328125, "learning_rate": 1.1688666299770663e-05, "loss": 3.9914, "step": 9966 }, { "epoch": 3.3201465811609894, "grad_norm": 0.703125, "learning_rate": 1.1687994836411706e-05, "loss": 3.9557, "step": 9967 }, { "epoch": 3.320479720163238, "grad_norm": 0.73046875, "learning_rate": 1.1687323324271682e-05, "loss": 3.964, "step": 9968 }, { "epoch": 3.320812859165487, "grad_norm": 0.67578125, "learning_rate": 1.1686651763358408e-05, "loss": 4.0071, "step": 9969 }, { "epoch": 3.3211459981677356, "grad_norm": 0.69921875, "learning_rate": 1.1685980153679712e-05, "loss": 4.0001, "step": 9970 }, { "epoch": 3.3214791371699843, "grad_norm": 0.61328125, "learning_rate": 1.1685308495243413e-05, "loss": 3.9806, "step": 9971 }, { "epoch": 3.321812276172233, "grad_norm": 0.640625, "learning_rate": 1.1684636788057335e-05, "loss": 4.0122, "step": 9972 }, { "epoch": 3.3221454151744814, "grad_norm": 0.65234375, "learning_rate": 1.1683965032129303e-05, "loss": 4.0149, "step": 9973 }, { "epoch": 3.32247855417673, "grad_norm": 0.65625, "learning_rate": 1.168329322746714e-05, "loss": 3.9777, "step": 9974 }, { "epoch": 3.322811693178979, "grad_norm": 0.640625, "learning_rate": 1.1682621374078674e-05, "loss": 3.9842, "step": 9975 }, { "epoch": 3.3231448321812276, "grad_norm": 0.6875, "learning_rate": 1.1681949471971725e-05, "loss": 3.9792, "step": 9976 }, { "epoch": 3.3234779711834763, "grad_norm": 0.6796875, "learning_rate": 1.1681277521154127e-05, "loss": 3.9997, "step": 9977 }, { "epoch": 3.323811110185725, "grad_norm": 0.6875, "learning_rate": 1.16806055216337e-05, "loss": 4.0152, "step": 9978 }, { "epoch": 3.324144249187974, "grad_norm": 0.65234375, "learning_rate": 1.1679933473418273e-05, "loss": 4.0144, "step": 9979 }, { "epoch": 3.3244773881902225, "grad_norm": 0.65625, "learning_rate": 1.1679261376515678e-05, "loss": 4.0148, "step": 9980 }, { "epoch": 3.3248105271924713, "grad_norm": 0.7109375, "learning_rate": 1.1678589230933735e-05, "loss": 4.0341, "step": 9981 }, { "epoch": 3.3251436661947196, "grad_norm": 0.6875, "learning_rate": 1.1677917036680284e-05, "loss": 4.0565, "step": 9982 }, { "epoch": 3.3254768051969683, "grad_norm": 0.65234375, "learning_rate": 1.1677244793763146e-05, "loss": 4.0317, "step": 9983 }, { "epoch": 3.325809944199217, "grad_norm": 0.671875, "learning_rate": 1.1676572502190155e-05, "loss": 4.0513, "step": 9984 }, { "epoch": 3.326143083201466, "grad_norm": 0.6875, "learning_rate": 1.167590016196914e-05, "loss": 3.9678, "step": 9985 }, { "epoch": 3.3264762222037145, "grad_norm": 0.64453125, "learning_rate": 1.1675227773107932e-05, "loss": 4.0435, "step": 9986 }, { "epoch": 3.3268093612059633, "grad_norm": 0.61328125, "learning_rate": 1.1674555335614364e-05, "loss": 4.0339, "step": 9987 }, { "epoch": 3.327142500208212, "grad_norm": 0.67578125, "learning_rate": 1.167388284949627e-05, "loss": 3.9837, "step": 9988 }, { "epoch": 3.3274756392104607, "grad_norm": 0.7109375, "learning_rate": 1.1673210314761481e-05, "loss": 3.958, "step": 9989 }, { "epoch": 3.3278087782127095, "grad_norm": 0.671875, "learning_rate": 1.167253773141783e-05, "loss": 4.0316, "step": 9990 }, { "epoch": 3.328141917214958, "grad_norm": 0.68359375, "learning_rate": 1.1671865099473154e-05, "loss": 3.9967, "step": 9991 }, { "epoch": 3.3284750562172065, "grad_norm": 0.671875, "learning_rate": 1.1671192418935287e-05, "loss": 4.0197, "step": 9992 }, { "epoch": 3.3288081952194553, "grad_norm": 0.640625, "learning_rate": 1.1670519689812062e-05, "loss": 3.9903, "step": 9993 }, { "epoch": 3.329141334221704, "grad_norm": 0.68359375, "learning_rate": 1.1669846912111314e-05, "loss": 4.0274, "step": 9994 }, { "epoch": 3.3294744732239527, "grad_norm": 0.69921875, "learning_rate": 1.1669174085840883e-05, "loss": 3.929, "step": 9995 }, { "epoch": 3.3298076122262015, "grad_norm": 0.65234375, "learning_rate": 1.1668501211008602e-05, "loss": 4.0866, "step": 9996 }, { "epoch": 3.33014075122845, "grad_norm": 0.671875, "learning_rate": 1.1667828287622313e-05, "loss": 3.9764, "step": 9997 }, { "epoch": 3.330473890230699, "grad_norm": 0.640625, "learning_rate": 1.166715531568985e-05, "loss": 4.0583, "step": 9998 }, { "epoch": 3.3308070292329477, "grad_norm": 0.6953125, "learning_rate": 1.1666482295219056e-05, "loss": 3.9983, "step": 9999 }, { "epoch": 3.331140168235196, "grad_norm": 0.6640625, "learning_rate": 1.1665809226217767e-05, "loss": 4.0399, "step": 10000 }, { "epoch": 3.3314733072374447, "grad_norm": 0.66015625, "learning_rate": 1.1665136108693824e-05, "loss": 4.0763, "step": 10001 }, { "epoch": 3.3318064462396935, "grad_norm": 0.66015625, "learning_rate": 1.1664462942655067e-05, "loss": 3.9939, "step": 10002 }, { "epoch": 3.332139585241942, "grad_norm": 0.640625, "learning_rate": 1.1663789728109335e-05, "loss": 4.0245, "step": 10003 }, { "epoch": 3.332472724244191, "grad_norm": 0.6328125, "learning_rate": 1.1663116465064474e-05, "loss": 4.0459, "step": 10004 }, { "epoch": 3.3328058632464397, "grad_norm": 0.6328125, "learning_rate": 1.1662443153528323e-05, "loss": 3.9882, "step": 10005 }, { "epoch": 3.3331390022486884, "grad_norm": 0.65234375, "learning_rate": 1.1661769793508724e-05, "loss": 4.0495, "step": 10006 }, { "epoch": 3.3334721412509367, "grad_norm": 0.66796875, "learning_rate": 1.1661096385013522e-05, "loss": 4.0167, "step": 10007 }, { "epoch": 3.3338052802531855, "grad_norm": 0.671875, "learning_rate": 1.1660422928050559e-05, "loss": 3.9445, "step": 10008 }, { "epoch": 3.334138419255434, "grad_norm": 0.671875, "learning_rate": 1.1659749422627684e-05, "loss": 4.0094, "step": 10009 }, { "epoch": 3.334471558257683, "grad_norm": 0.703125, "learning_rate": 1.1659075868752734e-05, "loss": 4.0222, "step": 10010 }, { "epoch": 3.3348046972599317, "grad_norm": 0.69140625, "learning_rate": 1.1658402266433558e-05, "loss": 4.0234, "step": 10011 }, { "epoch": 3.3351378362621804, "grad_norm": 0.68359375, "learning_rate": 1.1657728615678007e-05, "loss": 3.9778, "step": 10012 }, { "epoch": 3.335470975264429, "grad_norm": 0.66796875, "learning_rate": 1.1657054916493921e-05, "loss": 3.9933, "step": 10013 }, { "epoch": 3.335804114266678, "grad_norm": 0.6640625, "learning_rate": 1.165638116888915e-05, "loss": 4.0011, "step": 10014 }, { "epoch": 3.3361372532689266, "grad_norm": 0.65625, "learning_rate": 1.165570737287154e-05, "loss": 4.0473, "step": 10015 }, { "epoch": 3.336470392271175, "grad_norm": 0.68359375, "learning_rate": 1.165503352844894e-05, "loss": 4.0265, "step": 10016 }, { "epoch": 3.3368035312734237, "grad_norm": 0.66796875, "learning_rate": 1.16543596356292e-05, "loss": 3.9742, "step": 10017 }, { "epoch": 3.3371366702756724, "grad_norm": 0.6796875, "learning_rate": 1.1653685694420169e-05, "loss": 3.9808, "step": 10018 }, { "epoch": 3.337469809277921, "grad_norm": 0.74609375, "learning_rate": 1.1653011704829697e-05, "loss": 3.9989, "step": 10019 }, { "epoch": 3.33780294828017, "grad_norm": 0.65625, "learning_rate": 1.165233766686563e-05, "loss": 3.9917, "step": 10020 }, { "epoch": 3.3381360872824186, "grad_norm": 0.69921875, "learning_rate": 1.1651663580535827e-05, "loss": 3.9926, "step": 10021 }, { "epoch": 3.3384692262846674, "grad_norm": 0.7421875, "learning_rate": 1.1650989445848137e-05, "loss": 3.9978, "step": 10022 }, { "epoch": 3.338802365286916, "grad_norm": 0.68359375, "learning_rate": 1.165031526281041e-05, "loss": 4.0322, "step": 10023 }, { "epoch": 3.339135504289165, "grad_norm": 0.64453125, "learning_rate": 1.1649641031430497e-05, "loss": 4.0124, "step": 10024 }, { "epoch": 3.339468643291413, "grad_norm": 0.6953125, "learning_rate": 1.1648966751716257e-05, "loss": 3.9569, "step": 10025 }, { "epoch": 3.339801782293662, "grad_norm": 0.66796875, "learning_rate": 1.164829242367554e-05, "loss": 4.03, "step": 10026 }, { "epoch": 3.3401349212959106, "grad_norm": 0.6796875, "learning_rate": 1.1647618047316202e-05, "loss": 4.0163, "step": 10027 }, { "epoch": 3.3404680602981593, "grad_norm": 0.6640625, "learning_rate": 1.1646943622646096e-05, "loss": 4.0185, "step": 10028 }, { "epoch": 3.340801199300408, "grad_norm": 0.6484375, "learning_rate": 1.1646269149673079e-05, "loss": 3.9613, "step": 10029 }, { "epoch": 3.341134338302657, "grad_norm": 0.6640625, "learning_rate": 1.1645594628405008e-05, "loss": 4.0187, "step": 10030 }, { "epoch": 3.3414674773049056, "grad_norm": 0.67578125, "learning_rate": 1.164492005884974e-05, "loss": 3.9475, "step": 10031 }, { "epoch": 3.3418006163071543, "grad_norm": 0.69921875, "learning_rate": 1.164424544101513e-05, "loss": 4.0263, "step": 10032 }, { "epoch": 3.342133755309403, "grad_norm": 0.64453125, "learning_rate": 1.1643570774909036e-05, "loss": 4.031, "step": 10033 }, { "epoch": 3.3424668943116513, "grad_norm": 0.640625, "learning_rate": 1.1642896060539319e-05, "loss": 4.0295, "step": 10034 }, { "epoch": 3.3428000333139, "grad_norm": 0.66015625, "learning_rate": 1.1642221297913838e-05, "loss": 3.9941, "step": 10035 }, { "epoch": 3.343133172316149, "grad_norm": 0.65625, "learning_rate": 1.164154648704045e-05, "loss": 3.9985, "step": 10036 }, { "epoch": 3.3434663113183976, "grad_norm": 0.70703125, "learning_rate": 1.1640871627927014e-05, "loss": 3.9627, "step": 10037 }, { "epoch": 3.3437994503206463, "grad_norm": 0.6953125, "learning_rate": 1.1640196720581392e-05, "loss": 3.9865, "step": 10038 }, { "epoch": 3.344132589322895, "grad_norm": 0.68359375, "learning_rate": 1.1639521765011447e-05, "loss": 4.0324, "step": 10039 }, { "epoch": 3.3444657283251438, "grad_norm": 0.64453125, "learning_rate": 1.1638846761225042e-05, "loss": 4.1003, "step": 10040 }, { "epoch": 3.3447988673273925, "grad_norm": 0.72265625, "learning_rate": 1.1638171709230035e-05, "loss": 3.9675, "step": 10041 }, { "epoch": 3.3451320063296412, "grad_norm": 0.68359375, "learning_rate": 1.163749660903429e-05, "loss": 4.0427, "step": 10042 }, { "epoch": 3.3454651453318895, "grad_norm": 0.6640625, "learning_rate": 1.1636821460645674e-05, "loss": 4.0484, "step": 10043 }, { "epoch": 3.3457982843341383, "grad_norm": 0.74609375, "learning_rate": 1.1636146264072045e-05, "loss": 3.934, "step": 10044 }, { "epoch": 3.346131423336387, "grad_norm": 0.6640625, "learning_rate": 1.1635471019321273e-05, "loss": 4.0015, "step": 10045 }, { "epoch": 3.3464645623386358, "grad_norm": 0.62109375, "learning_rate": 1.1634795726401222e-05, "loss": 4.1159, "step": 10046 }, { "epoch": 3.3467977013408845, "grad_norm": 0.62890625, "learning_rate": 1.1634120385319754e-05, "loss": 4.0323, "step": 10047 }, { "epoch": 3.3471308403431332, "grad_norm": 0.6640625, "learning_rate": 1.1633444996084738e-05, "loss": 4.0984, "step": 10048 }, { "epoch": 3.347463979345382, "grad_norm": 0.68359375, "learning_rate": 1.1632769558704045e-05, "loss": 3.9359, "step": 10049 }, { "epoch": 3.3477971183476307, "grad_norm": 0.734375, "learning_rate": 1.1632094073185532e-05, "loss": 4.0514, "step": 10050 }, { "epoch": 3.3481302573498795, "grad_norm": 0.6640625, "learning_rate": 1.1631418539537078e-05, "loss": 4.0485, "step": 10051 }, { "epoch": 3.3484633963521278, "grad_norm": 0.71875, "learning_rate": 1.1630742957766546e-05, "loss": 3.9328, "step": 10052 }, { "epoch": 3.3487965353543765, "grad_norm": 0.6796875, "learning_rate": 1.1630067327881804e-05, "loss": 3.9779, "step": 10053 }, { "epoch": 3.3491296743566252, "grad_norm": 0.640625, "learning_rate": 1.1629391649890724e-05, "loss": 4.0358, "step": 10054 }, { "epoch": 3.349462813358874, "grad_norm": 0.66015625, "learning_rate": 1.1628715923801177e-05, "loss": 4.0716, "step": 10055 }, { "epoch": 3.3497959523611227, "grad_norm": 0.64453125, "learning_rate": 1.162804014962103e-05, "loss": 4.0086, "step": 10056 }, { "epoch": 3.3501290913633714, "grad_norm": 0.67578125, "learning_rate": 1.1627364327358157e-05, "loss": 4.0009, "step": 10057 }, { "epoch": 3.35046223036562, "grad_norm": 0.69921875, "learning_rate": 1.1626688457020432e-05, "loss": 4.0445, "step": 10058 }, { "epoch": 3.350795369367869, "grad_norm": 0.72265625, "learning_rate": 1.1626012538615721e-05, "loss": 4.088, "step": 10059 }, { "epoch": 3.3511285083701177, "grad_norm": 0.6875, "learning_rate": 1.1625336572151903e-05, "loss": 4.0389, "step": 10060 }, { "epoch": 3.351461647372366, "grad_norm": 0.66796875, "learning_rate": 1.1624660557636847e-05, "loss": 3.9811, "step": 10061 }, { "epoch": 3.3517947863746147, "grad_norm": 0.640625, "learning_rate": 1.1623984495078434e-05, "loss": 4.0063, "step": 10062 }, { "epoch": 3.3521279253768634, "grad_norm": 0.6796875, "learning_rate": 1.1623308384484531e-05, "loss": 3.9986, "step": 10063 }, { "epoch": 3.352461064379112, "grad_norm": 0.6953125, "learning_rate": 1.1622632225863018e-05, "loss": 4.0463, "step": 10064 }, { "epoch": 3.352794203381361, "grad_norm": 0.65625, "learning_rate": 1.1621956019221767e-05, "loss": 4.0559, "step": 10065 }, { "epoch": 3.3531273423836097, "grad_norm": 0.65234375, "learning_rate": 1.1621279764568658e-05, "loss": 4.0128, "step": 10066 }, { "epoch": 3.3534604813858584, "grad_norm": 0.65625, "learning_rate": 1.1620603461911567e-05, "loss": 3.98, "step": 10067 }, { "epoch": 3.3537936203881067, "grad_norm": 0.671875, "learning_rate": 1.161992711125837e-05, "loss": 3.9889, "step": 10068 }, { "epoch": 3.354126759390356, "grad_norm": 0.65234375, "learning_rate": 1.1619250712616947e-05, "loss": 4.0342, "step": 10069 }, { "epoch": 3.354459898392604, "grad_norm": 0.6484375, "learning_rate": 1.1618574265995174e-05, "loss": 4.0044, "step": 10070 }, { "epoch": 3.354793037394853, "grad_norm": 0.6875, "learning_rate": 1.1617897771400934e-05, "loss": 3.9779, "step": 10071 }, { "epoch": 3.3551261763971016, "grad_norm": 0.6640625, "learning_rate": 1.1617221228842102e-05, "loss": 4.0297, "step": 10072 }, { "epoch": 3.3554593153993504, "grad_norm": 0.640625, "learning_rate": 1.161654463832656e-05, "loss": 3.9849, "step": 10073 }, { "epoch": 3.355792454401599, "grad_norm": 0.63671875, "learning_rate": 1.161586799986219e-05, "loss": 3.9696, "step": 10074 }, { "epoch": 3.356125593403848, "grad_norm": 0.65234375, "learning_rate": 1.1615191313456874e-05, "loss": 3.9898, "step": 10075 }, { "epoch": 3.3564587324060966, "grad_norm": 0.66796875, "learning_rate": 1.1614514579118491e-05, "loss": 4.1148, "step": 10076 }, { "epoch": 3.356791871408345, "grad_norm": 0.640625, "learning_rate": 1.1613837796854926e-05, "loss": 4.0426, "step": 10077 }, { "epoch": 3.3571250104105936, "grad_norm": 0.6640625, "learning_rate": 1.161316096667406e-05, "loss": 3.9418, "step": 10078 }, { "epoch": 3.3574581494128424, "grad_norm": 0.6875, "learning_rate": 1.1612484088583779e-05, "loss": 3.9864, "step": 10079 }, { "epoch": 3.357791288415091, "grad_norm": 0.65625, "learning_rate": 1.161180716259197e-05, "loss": 4.0325, "step": 10080 }, { "epoch": 3.35812442741734, "grad_norm": 0.6953125, "learning_rate": 1.1611130188706508e-05, "loss": 3.9536, "step": 10081 }, { "epoch": 3.3584575664195886, "grad_norm": 0.67578125, "learning_rate": 1.1610453166935284e-05, "loss": 4.0, "step": 10082 }, { "epoch": 3.3587907054218373, "grad_norm": 0.65625, "learning_rate": 1.1609776097286187e-05, "loss": 4.0207, "step": 10083 }, { "epoch": 3.359123844424086, "grad_norm": 0.66796875, "learning_rate": 1.1609098979767098e-05, "loss": 4.0098, "step": 10084 }, { "epoch": 3.359456983426335, "grad_norm": 0.65625, "learning_rate": 1.1608421814385908e-05, "loss": 4.1055, "step": 10085 }, { "epoch": 3.359790122428583, "grad_norm": 0.65625, "learning_rate": 1.1607744601150502e-05, "loss": 4.0106, "step": 10086 }, { "epoch": 3.360123261430832, "grad_norm": 0.62890625, "learning_rate": 1.1607067340068767e-05, "loss": 3.9734, "step": 10087 }, { "epoch": 3.3604564004330806, "grad_norm": 0.64453125, "learning_rate": 1.1606390031148595e-05, "loss": 4.0712, "step": 10088 }, { "epoch": 3.3607895394353293, "grad_norm": 0.6328125, "learning_rate": 1.1605712674397874e-05, "loss": 3.9483, "step": 10089 }, { "epoch": 3.361122678437578, "grad_norm": 0.640625, "learning_rate": 1.1605035269824492e-05, "loss": 3.9746, "step": 10090 }, { "epoch": 3.361455817439827, "grad_norm": 0.65234375, "learning_rate": 1.1604357817436341e-05, "loss": 4.081, "step": 10091 }, { "epoch": 3.3617889564420755, "grad_norm": 0.66796875, "learning_rate": 1.1603680317241314e-05, "loss": 4.0271, "step": 10092 }, { "epoch": 3.3621220954443243, "grad_norm": 0.7109375, "learning_rate": 1.1603002769247295e-05, "loss": 3.9353, "step": 10093 }, { "epoch": 3.362455234446573, "grad_norm": 0.66796875, "learning_rate": 1.1602325173462186e-05, "loss": 3.9477, "step": 10094 }, { "epoch": 3.3627883734488213, "grad_norm": 0.65625, "learning_rate": 1.160164752989387e-05, "loss": 4.0116, "step": 10095 }, { "epoch": 3.36312151245107, "grad_norm": 0.63671875, "learning_rate": 1.1600969838550247e-05, "loss": 4.016, "step": 10096 }, { "epoch": 3.363454651453319, "grad_norm": 0.65625, "learning_rate": 1.1600292099439208e-05, "loss": 4.0392, "step": 10097 }, { "epoch": 3.3637877904555675, "grad_norm": 0.6484375, "learning_rate": 1.1599614312568646e-05, "loss": 3.9996, "step": 10098 }, { "epoch": 3.3641209294578163, "grad_norm": 0.66796875, "learning_rate": 1.1598936477946459e-05, "loss": 3.995, "step": 10099 }, { "epoch": 3.364454068460065, "grad_norm": 0.67578125, "learning_rate": 1.1598258595580538e-05, "loss": 3.9548, "step": 10100 }, { "epoch": 3.3647872074623137, "grad_norm": 0.67578125, "learning_rate": 1.1597580665478781e-05, "loss": 3.9462, "step": 10101 }, { "epoch": 3.3651203464645625, "grad_norm": 0.6875, "learning_rate": 1.1596902687649085e-05, "loss": 3.9179, "step": 10102 }, { "epoch": 3.3654534854668112, "grad_norm": 0.65625, "learning_rate": 1.159622466209935e-05, "loss": 4.0176, "step": 10103 }, { "epoch": 3.3657866244690595, "grad_norm": 0.703125, "learning_rate": 1.1595546588837466e-05, "loss": 4.0297, "step": 10104 }, { "epoch": 3.3661197634713083, "grad_norm": 0.65625, "learning_rate": 1.1594868467871338e-05, "loss": 4.0163, "step": 10105 }, { "epoch": 3.366452902473557, "grad_norm": 0.6484375, "learning_rate": 1.1594190299208858e-05, "loss": 4.1098, "step": 10106 }, { "epoch": 3.3667860414758057, "grad_norm": 0.6875, "learning_rate": 1.1593512082857933e-05, "loss": 3.9301, "step": 10107 }, { "epoch": 3.3671191804780545, "grad_norm": 0.65625, "learning_rate": 1.1592833818826458e-05, "loss": 4.0198, "step": 10108 }, { "epoch": 3.367452319480303, "grad_norm": 0.6328125, "learning_rate": 1.1592155507122331e-05, "loss": 4.0694, "step": 10109 }, { "epoch": 3.367785458482552, "grad_norm": 0.640625, "learning_rate": 1.1591477147753457e-05, "loss": 4.0382, "step": 10110 }, { "epoch": 3.3681185974848007, "grad_norm": 0.6640625, "learning_rate": 1.1590798740727738e-05, "loss": 4.0269, "step": 10111 }, { "epoch": 3.3684517364870494, "grad_norm": 0.6796875, "learning_rate": 1.1590120286053075e-05, "loss": 3.9817, "step": 10112 }, { "epoch": 3.3687848754892977, "grad_norm": 0.6640625, "learning_rate": 1.1589441783737368e-05, "loss": 3.9941, "step": 10113 }, { "epoch": 3.3691180144915465, "grad_norm": 0.68359375, "learning_rate": 1.1588763233788522e-05, "loss": 3.9827, "step": 10114 }, { "epoch": 3.369451153493795, "grad_norm": 0.69140625, "learning_rate": 1.1588084636214442e-05, "loss": 4.0322, "step": 10115 }, { "epoch": 3.369784292496044, "grad_norm": 0.66015625, "learning_rate": 1.158740599102303e-05, "loss": 4.082, "step": 10116 }, { "epoch": 3.3701174314982927, "grad_norm": 0.68359375, "learning_rate": 1.1586727298222192e-05, "loss": 3.9829, "step": 10117 }, { "epoch": 3.3704505705005414, "grad_norm": 0.67578125, "learning_rate": 1.1586048557819831e-05, "loss": 4.0584, "step": 10118 }, { "epoch": 3.37078370950279, "grad_norm": 0.65625, "learning_rate": 1.1585369769823857e-05, "loss": 3.9895, "step": 10119 }, { "epoch": 3.371116848505039, "grad_norm": 0.69140625, "learning_rate": 1.1584690934242174e-05, "loss": 3.9588, "step": 10120 }, { "epoch": 3.3714499875072876, "grad_norm": 0.6953125, "learning_rate": 1.1584012051082688e-05, "loss": 4.1114, "step": 10121 }, { "epoch": 3.371783126509536, "grad_norm": 0.671875, "learning_rate": 1.1583333120353305e-05, "loss": 4.0646, "step": 10122 }, { "epoch": 3.3721162655117847, "grad_norm": 0.65234375, "learning_rate": 1.158265414206194e-05, "loss": 3.9702, "step": 10123 }, { "epoch": 3.3724494045140334, "grad_norm": 0.671875, "learning_rate": 1.1581975116216495e-05, "loss": 3.9783, "step": 10124 }, { "epoch": 3.372782543516282, "grad_norm": 0.68359375, "learning_rate": 1.1581296042824882e-05, "loss": 3.9605, "step": 10125 }, { "epoch": 3.373115682518531, "grad_norm": 0.66015625, "learning_rate": 1.158061692189501e-05, "loss": 3.965, "step": 10126 }, { "epoch": 3.3734488215207796, "grad_norm": 0.7109375, "learning_rate": 1.157993775343479e-05, "loss": 3.9863, "step": 10127 }, { "epoch": 3.3737819605230284, "grad_norm": 0.671875, "learning_rate": 1.157925853745213e-05, "loss": 3.9994, "step": 10128 }, { "epoch": 3.374115099525277, "grad_norm": 0.67578125, "learning_rate": 1.1578579273954948e-05, "loss": 3.9871, "step": 10129 }, { "epoch": 3.374448238527526, "grad_norm": 0.68359375, "learning_rate": 1.157789996295115e-05, "loss": 4.0367, "step": 10130 }, { "epoch": 3.374781377529774, "grad_norm": 0.69921875, "learning_rate": 1.1577220604448648e-05, "loss": 4.0107, "step": 10131 }, { "epoch": 3.375114516532023, "grad_norm": 0.640625, "learning_rate": 1.1576541198455358e-05, "loss": 4.0376, "step": 10132 }, { "epoch": 3.3754476555342716, "grad_norm": 0.6875, "learning_rate": 1.1575861744979193e-05, "loss": 3.9316, "step": 10133 }, { "epoch": 3.3757807945365204, "grad_norm": 0.671875, "learning_rate": 1.1575182244028069e-05, "loss": 4.0365, "step": 10134 }, { "epoch": 3.376113933538769, "grad_norm": 0.68359375, "learning_rate": 1.1574502695609897e-05, "loss": 3.9461, "step": 10135 }, { "epoch": 3.376447072541018, "grad_norm": 0.71875, "learning_rate": 1.1573823099732594e-05, "loss": 3.985, "step": 10136 }, { "epoch": 3.3767802115432666, "grad_norm": 0.66015625, "learning_rate": 1.1573143456404073e-05, "loss": 3.9582, "step": 10137 }, { "epoch": 3.377113350545515, "grad_norm": 0.68359375, "learning_rate": 1.1572463765632258e-05, "loss": 4.0154, "step": 10138 }, { "epoch": 3.377446489547764, "grad_norm": 0.62890625, "learning_rate": 1.157178402742506e-05, "loss": 4.073, "step": 10139 }, { "epoch": 3.3777796285500123, "grad_norm": 0.67578125, "learning_rate": 1.1571104241790396e-05, "loss": 3.9367, "step": 10140 }, { "epoch": 3.378112767552261, "grad_norm": 0.6796875, "learning_rate": 1.1570424408736187e-05, "loss": 3.9486, "step": 10141 }, { "epoch": 3.37844590655451, "grad_norm": 0.640625, "learning_rate": 1.1569744528270347e-05, "loss": 3.9654, "step": 10142 }, { "epoch": 3.3787790455567586, "grad_norm": 0.671875, "learning_rate": 1.1569064600400803e-05, "loss": 4.0056, "step": 10143 }, { "epoch": 3.3791121845590073, "grad_norm": 0.640625, "learning_rate": 1.1568384625135466e-05, "loss": 4.0232, "step": 10144 }, { "epoch": 3.379445323561256, "grad_norm": 0.6640625, "learning_rate": 1.1567704602482262e-05, "loss": 4.0875, "step": 10145 }, { "epoch": 3.379778462563505, "grad_norm": 0.69921875, "learning_rate": 1.1567024532449109e-05, "loss": 3.9137, "step": 10146 }, { "epoch": 3.380111601565753, "grad_norm": 0.66015625, "learning_rate": 1.156634441504393e-05, "loss": 4.0834, "step": 10147 }, { "epoch": 3.380444740568002, "grad_norm": 0.6484375, "learning_rate": 1.1565664250274646e-05, "loss": 4.0578, "step": 10148 }, { "epoch": 3.3807778795702506, "grad_norm": 0.671875, "learning_rate": 1.1564984038149181e-05, "loss": 4.0428, "step": 10149 }, { "epoch": 3.3811110185724993, "grad_norm": 0.6640625, "learning_rate": 1.1564303778675457e-05, "loss": 4.0487, "step": 10150 }, { "epoch": 3.381444157574748, "grad_norm": 0.69140625, "learning_rate": 1.1563623471861394e-05, "loss": 3.9664, "step": 10151 }, { "epoch": 3.3817772965769968, "grad_norm": 0.6484375, "learning_rate": 1.1562943117714923e-05, "loss": 4.0893, "step": 10152 }, { "epoch": 3.3821104355792455, "grad_norm": 0.64453125, "learning_rate": 1.1562262716243963e-05, "loss": 4.0036, "step": 10153 }, { "epoch": 3.3824435745814942, "grad_norm": 0.66015625, "learning_rate": 1.1561582267456442e-05, "loss": 4.0767, "step": 10154 }, { "epoch": 3.382776713583743, "grad_norm": 0.65234375, "learning_rate": 1.1560901771360283e-05, "loss": 4.0543, "step": 10155 }, { "epoch": 3.3831098525859913, "grad_norm": 0.65625, "learning_rate": 1.1560221227963416e-05, "loss": 4.0694, "step": 10156 }, { "epoch": 3.38344299158824, "grad_norm": 0.69140625, "learning_rate": 1.1559540637273767e-05, "loss": 3.982, "step": 10157 }, { "epoch": 3.3837761305904888, "grad_norm": 0.65234375, "learning_rate": 1.155885999929926e-05, "loss": 4.0488, "step": 10158 }, { "epoch": 3.3841092695927375, "grad_norm": 0.6640625, "learning_rate": 1.1558179314047828e-05, "loss": 4.0167, "step": 10159 }, { "epoch": 3.3844424085949862, "grad_norm": 0.65234375, "learning_rate": 1.1557498581527395e-05, "loss": 3.9685, "step": 10160 }, { "epoch": 3.384775547597235, "grad_norm": 0.69140625, "learning_rate": 1.1556817801745896e-05, "loss": 4.028, "step": 10161 }, { "epoch": 3.3851086865994837, "grad_norm": 0.68359375, "learning_rate": 1.1556136974711253e-05, "loss": 3.9701, "step": 10162 }, { "epoch": 3.3854418256017325, "grad_norm": 0.6484375, "learning_rate": 1.15554561004314e-05, "loss": 3.9973, "step": 10163 }, { "epoch": 3.385774964603981, "grad_norm": 0.67578125, "learning_rate": 1.1554775178914266e-05, "loss": 4.0216, "step": 10164 }, { "epoch": 3.3861081036062295, "grad_norm": 0.69140625, "learning_rate": 1.1554094210167787e-05, "loss": 3.9304, "step": 10165 }, { "epoch": 3.3864412426084782, "grad_norm": 0.63671875, "learning_rate": 1.155341319419989e-05, "loss": 4.0269, "step": 10166 }, { "epoch": 3.386774381610727, "grad_norm": 0.6171875, "learning_rate": 1.155273213101851e-05, "loss": 4.0197, "step": 10167 }, { "epoch": 3.3871075206129757, "grad_norm": 0.6640625, "learning_rate": 1.1552051020631578e-05, "loss": 3.9967, "step": 10168 }, { "epoch": 3.3874406596152244, "grad_norm": 0.66796875, "learning_rate": 1.155136986304703e-05, "loss": 4.081, "step": 10169 }, { "epoch": 3.387773798617473, "grad_norm": 0.6484375, "learning_rate": 1.1550688658272797e-05, "loss": 3.9984, "step": 10170 }, { "epoch": 3.388106937619722, "grad_norm": 0.671875, "learning_rate": 1.1550007406316814e-05, "loss": 3.9743, "step": 10171 }, { "epoch": 3.3884400766219707, "grad_norm": 0.66015625, "learning_rate": 1.1549326107187018e-05, "loss": 4.0561, "step": 10172 }, { "epoch": 3.3887732156242194, "grad_norm": 0.6640625, "learning_rate": 1.1548644760891342e-05, "loss": 4.0034, "step": 10173 }, { "epoch": 3.3891063546264677, "grad_norm": 0.671875, "learning_rate": 1.1547963367437728e-05, "loss": 4.0557, "step": 10174 }, { "epoch": 3.3894394936287164, "grad_norm": 0.63671875, "learning_rate": 1.1547281926834105e-05, "loss": 3.9766, "step": 10175 }, { "epoch": 3.389772632630965, "grad_norm": 0.640625, "learning_rate": 1.1546600439088414e-05, "loss": 4.0193, "step": 10176 }, { "epoch": 3.390105771633214, "grad_norm": 0.67578125, "learning_rate": 1.1545918904208594e-05, "loss": 4.0753, "step": 10177 }, { "epoch": 3.3904389106354627, "grad_norm": 0.6640625, "learning_rate": 1.1545237322202582e-05, "loss": 3.9715, "step": 10178 }, { "epoch": 3.3907720496377114, "grad_norm": 0.6875, "learning_rate": 1.1544555693078317e-05, "loss": 4.0199, "step": 10179 }, { "epoch": 3.39110518863996, "grad_norm": 0.65625, "learning_rate": 1.1543874016843738e-05, "loss": 4.0122, "step": 10180 }, { "epoch": 3.391438327642209, "grad_norm": 0.671875, "learning_rate": 1.1543192293506789e-05, "loss": 3.9815, "step": 10181 }, { "epoch": 3.3917714666444576, "grad_norm": 0.65234375, "learning_rate": 1.1542510523075404e-05, "loss": 3.979, "step": 10182 }, { "epoch": 3.392104605646706, "grad_norm": 0.62890625, "learning_rate": 1.154182870555753e-05, "loss": 4.0508, "step": 10183 }, { "epoch": 3.3924377446489546, "grad_norm": 0.6875, "learning_rate": 1.1541146840961103e-05, "loss": 3.9416, "step": 10184 }, { "epoch": 3.3927708836512034, "grad_norm": 0.65625, "learning_rate": 1.154046492929407e-05, "loss": 3.9692, "step": 10185 }, { "epoch": 3.393104022653452, "grad_norm": 0.703125, "learning_rate": 1.1539782970564373e-05, "loss": 3.9719, "step": 10186 }, { "epoch": 3.393437161655701, "grad_norm": 0.66796875, "learning_rate": 1.1539100964779952e-05, "loss": 4.0014, "step": 10187 }, { "epoch": 3.3937703006579496, "grad_norm": 0.6953125, "learning_rate": 1.1538418911948757e-05, "loss": 3.971, "step": 10188 }, { "epoch": 3.3941034396601983, "grad_norm": 0.671875, "learning_rate": 1.1537736812078726e-05, "loss": 4.0016, "step": 10189 }, { "epoch": 3.394436578662447, "grad_norm": 0.6796875, "learning_rate": 1.1537054665177807e-05, "loss": 4.0144, "step": 10190 }, { "epoch": 3.394769717664696, "grad_norm": 0.65234375, "learning_rate": 1.1536372471253946e-05, "loss": 4.0521, "step": 10191 }, { "epoch": 3.395102856666944, "grad_norm": 0.703125, "learning_rate": 1.153569023031509e-05, "loss": 3.9868, "step": 10192 }, { "epoch": 3.395435995669193, "grad_norm": 0.66015625, "learning_rate": 1.1535007942369183e-05, "loss": 4.0663, "step": 10193 }, { "epoch": 3.3957691346714416, "grad_norm": 0.6484375, "learning_rate": 1.1534325607424171e-05, "loss": 4.0343, "step": 10194 }, { "epoch": 3.3961022736736903, "grad_norm": 0.62890625, "learning_rate": 1.1533643225488007e-05, "loss": 4.0128, "step": 10195 }, { "epoch": 3.396435412675939, "grad_norm": 0.6328125, "learning_rate": 1.1532960796568637e-05, "loss": 4.1538, "step": 10196 }, { "epoch": 3.396768551678188, "grad_norm": 0.6640625, "learning_rate": 1.1532278320674007e-05, "loss": 4.017, "step": 10197 }, { "epoch": 3.3971016906804365, "grad_norm": 0.6875, "learning_rate": 1.1531595797812069e-05, "loss": 4.0328, "step": 10198 }, { "epoch": 3.3974348296826853, "grad_norm": 0.6875, "learning_rate": 1.1530913227990772e-05, "loss": 4.0108, "step": 10199 }, { "epoch": 3.397767968684934, "grad_norm": 0.66796875, "learning_rate": 1.1530230611218067e-05, "loss": 4.0129, "step": 10200 }, { "epoch": 3.3981011076871823, "grad_norm": 0.70703125, "learning_rate": 1.1529547947501906e-05, "loss": 3.9924, "step": 10201 }, { "epoch": 3.398434246689431, "grad_norm": 0.6484375, "learning_rate": 1.1528865236850236e-05, "loss": 4.0425, "step": 10202 }, { "epoch": 3.39876738569168, "grad_norm": 0.6796875, "learning_rate": 1.1528182479271016e-05, "loss": 3.9735, "step": 10203 }, { "epoch": 3.3991005246939285, "grad_norm": 0.66796875, "learning_rate": 1.1527499674772196e-05, "loss": 4.0322, "step": 10204 }, { "epoch": 3.3994336636961773, "grad_norm": 0.6640625, "learning_rate": 1.1526816823361725e-05, "loss": 3.9878, "step": 10205 }, { "epoch": 3.399766802698426, "grad_norm": 0.67578125, "learning_rate": 1.1526133925047562e-05, "loss": 3.9674, "step": 10206 }, { "epoch": 3.4000999417006748, "grad_norm": 0.671875, "learning_rate": 1.152545097983766e-05, "loss": 4.0108, "step": 10207 }, { "epoch": 3.400433080702923, "grad_norm": 0.65234375, "learning_rate": 1.1524767987739972e-05, "loss": 3.9586, "step": 10208 }, { "epoch": 3.400766219705172, "grad_norm": 0.65625, "learning_rate": 1.1524084948762455e-05, "loss": 4.0043, "step": 10209 }, { "epoch": 3.4010993587074205, "grad_norm": 0.625, "learning_rate": 1.1523401862913063e-05, "loss": 4.026, "step": 10210 }, { "epoch": 3.4014324977096693, "grad_norm": 0.65234375, "learning_rate": 1.1522718730199756e-05, "loss": 4.0733, "step": 10211 }, { "epoch": 3.401765636711918, "grad_norm": 0.69140625, "learning_rate": 1.1522035550630486e-05, "loss": 4.0284, "step": 10212 }, { "epoch": 3.4020987757141667, "grad_norm": 0.6875, "learning_rate": 1.1521352324213217e-05, "loss": 3.9765, "step": 10213 }, { "epoch": 3.4024319147164155, "grad_norm": 0.67578125, "learning_rate": 1.1520669050955901e-05, "loss": 3.9857, "step": 10214 }, { "epoch": 3.4027650537186642, "grad_norm": 0.6640625, "learning_rate": 1.1519985730866502e-05, "loss": 3.9873, "step": 10215 }, { "epoch": 3.403098192720913, "grad_norm": 0.6875, "learning_rate": 1.1519302363952977e-05, "loss": 4.0997, "step": 10216 }, { "epoch": 3.4034313317231613, "grad_norm": 0.640625, "learning_rate": 1.1518618950223283e-05, "loss": 4.0554, "step": 10217 }, { "epoch": 3.40376447072541, "grad_norm": 0.6875, "learning_rate": 1.151793548968538e-05, "loss": 3.9692, "step": 10218 }, { "epoch": 3.4040976097276587, "grad_norm": 0.69921875, "learning_rate": 1.1517251982347235e-05, "loss": 3.996, "step": 10219 }, { "epoch": 3.4044307487299075, "grad_norm": 0.64453125, "learning_rate": 1.1516568428216807e-05, "loss": 4.0137, "step": 10220 }, { "epoch": 3.404763887732156, "grad_norm": 0.66796875, "learning_rate": 1.1515884827302054e-05, "loss": 3.9747, "step": 10221 }, { "epoch": 3.405097026734405, "grad_norm": 0.6484375, "learning_rate": 1.1515201179610942e-05, "loss": 4.0727, "step": 10222 }, { "epoch": 3.4054301657366537, "grad_norm": 0.68359375, "learning_rate": 1.1514517485151432e-05, "loss": 4.0034, "step": 10223 }, { "epoch": 3.4057633047389024, "grad_norm": 0.63671875, "learning_rate": 1.151383374393149e-05, "loss": 3.9392, "step": 10224 }, { "epoch": 3.406096443741151, "grad_norm": 0.6484375, "learning_rate": 1.151314995595908e-05, "loss": 4.0605, "step": 10225 }, { "epoch": 3.4064295827433995, "grad_norm": 0.68359375, "learning_rate": 1.1512466121242162e-05, "loss": 3.9482, "step": 10226 }, { "epoch": 3.406762721745648, "grad_norm": 0.7109375, "learning_rate": 1.1511782239788708e-05, "loss": 3.9693, "step": 10227 }, { "epoch": 3.407095860747897, "grad_norm": 0.6796875, "learning_rate": 1.151109831160668e-05, "loss": 4.0387, "step": 10228 }, { "epoch": 3.4074289997501457, "grad_norm": 0.6953125, "learning_rate": 1.1510414336704046e-05, "loss": 3.984, "step": 10229 }, { "epoch": 3.4077621387523944, "grad_norm": 0.66796875, "learning_rate": 1.1509730315088765e-05, "loss": 4.0479, "step": 10230 }, { "epoch": 3.408095277754643, "grad_norm": 0.64453125, "learning_rate": 1.1509046246768816e-05, "loss": 4.0178, "step": 10231 }, { "epoch": 3.408428416756892, "grad_norm": 0.64453125, "learning_rate": 1.150836213175216e-05, "loss": 3.8978, "step": 10232 }, { "epoch": 3.4087615557591406, "grad_norm": 0.640625, "learning_rate": 1.1507677970046772e-05, "loss": 4.0609, "step": 10233 }, { "epoch": 3.4090946947613894, "grad_norm": 0.640625, "learning_rate": 1.1506993761660613e-05, "loss": 4.048, "step": 10234 }, { "epoch": 3.4094278337636377, "grad_norm": 0.65625, "learning_rate": 1.1506309506601654e-05, "loss": 3.9846, "step": 10235 }, { "epoch": 3.4097609727658864, "grad_norm": 0.6484375, "learning_rate": 1.1505625204877868e-05, "loss": 3.9906, "step": 10236 }, { "epoch": 3.410094111768135, "grad_norm": 0.6640625, "learning_rate": 1.1504940856497226e-05, "loss": 3.9399, "step": 10237 }, { "epoch": 3.410427250770384, "grad_norm": 0.640625, "learning_rate": 1.1504256461467697e-05, "loss": 4.0037, "step": 10238 }, { "epoch": 3.4107603897726326, "grad_norm": 0.66015625, "learning_rate": 1.1503572019797252e-05, "loss": 3.9942, "step": 10239 }, { "epoch": 3.4110935287748814, "grad_norm": 0.6875, "learning_rate": 1.1502887531493866e-05, "loss": 4.0201, "step": 10240 }, { "epoch": 3.41142666777713, "grad_norm": 0.6796875, "learning_rate": 1.150220299656551e-05, "loss": 4.0425, "step": 10241 }, { "epoch": 3.411759806779379, "grad_norm": 0.66796875, "learning_rate": 1.1501518415020158e-05, "loss": 3.9925, "step": 10242 }, { "epoch": 3.4120929457816276, "grad_norm": 0.6640625, "learning_rate": 1.1500833786865783e-05, "loss": 3.9795, "step": 10243 }, { "epoch": 3.412426084783876, "grad_norm": 0.6953125, "learning_rate": 1.1500149112110361e-05, "loss": 4.0786, "step": 10244 }, { "epoch": 3.4127592237861246, "grad_norm": 0.6796875, "learning_rate": 1.1499464390761866e-05, "loss": 3.9887, "step": 10245 }, { "epoch": 3.4130923627883734, "grad_norm": 0.6953125, "learning_rate": 1.1498779622828274e-05, "loss": 3.9415, "step": 10246 }, { "epoch": 3.413425501790622, "grad_norm": 0.65234375, "learning_rate": 1.1498094808317561e-05, "loss": 4.0605, "step": 10247 }, { "epoch": 3.413758640792871, "grad_norm": 0.6953125, "learning_rate": 1.1497409947237703e-05, "loss": 3.996, "step": 10248 }, { "epoch": 3.4140917797951196, "grad_norm": 0.6875, "learning_rate": 1.1496725039596677e-05, "loss": 4.0229, "step": 10249 }, { "epoch": 3.4144249187973683, "grad_norm": 0.6796875, "learning_rate": 1.149604008540246e-05, "loss": 4.0822, "step": 10250 }, { "epoch": 3.414758057799617, "grad_norm": 0.65625, "learning_rate": 1.1495355084663036e-05, "loss": 3.9567, "step": 10251 }, { "epoch": 3.415091196801866, "grad_norm": 0.671875, "learning_rate": 1.1494670037386374e-05, "loss": 4.0062, "step": 10252 }, { "epoch": 3.415424335804114, "grad_norm": 0.69140625, "learning_rate": 1.1493984943580463e-05, "loss": 4.0131, "step": 10253 }, { "epoch": 3.415757474806363, "grad_norm": 0.69140625, "learning_rate": 1.1493299803253273e-05, "loss": 4.058, "step": 10254 }, { "epoch": 3.4160906138086116, "grad_norm": 0.6640625, "learning_rate": 1.1492614616412797e-05, "loss": 4.0094, "step": 10255 }, { "epoch": 3.4164237528108603, "grad_norm": 0.671875, "learning_rate": 1.1491929383067003e-05, "loss": 3.9283, "step": 10256 }, { "epoch": 3.416756891813109, "grad_norm": 0.72265625, "learning_rate": 1.149124410322388e-05, "loss": 4.0372, "step": 10257 }, { "epoch": 3.417090030815358, "grad_norm": 0.6796875, "learning_rate": 1.1490558776891407e-05, "loss": 3.9941, "step": 10258 }, { "epoch": 3.4174231698176065, "grad_norm": 0.70703125, "learning_rate": 1.1489873404077568e-05, "loss": 3.9584, "step": 10259 }, { "epoch": 3.4177563088198553, "grad_norm": 0.6640625, "learning_rate": 1.1489187984790348e-05, "loss": 4.0547, "step": 10260 }, { "epoch": 3.418089447822104, "grad_norm": 0.6953125, "learning_rate": 1.1488502519037727e-05, "loss": 4.0405, "step": 10261 }, { "epoch": 3.4184225868243523, "grad_norm": 0.6640625, "learning_rate": 1.148781700682769e-05, "loss": 4.0284, "step": 10262 }, { "epoch": 3.418755725826601, "grad_norm": 0.67578125, "learning_rate": 1.1487131448168223e-05, "loss": 4.0264, "step": 10263 }, { "epoch": 3.4190888648288498, "grad_norm": 0.734375, "learning_rate": 1.1486445843067312e-05, "loss": 4.0073, "step": 10264 }, { "epoch": 3.4194220038310985, "grad_norm": 0.63671875, "learning_rate": 1.148576019153294e-05, "loss": 3.9641, "step": 10265 }, { "epoch": 3.4197551428333473, "grad_norm": 0.65234375, "learning_rate": 1.1485074493573098e-05, "loss": 4.0309, "step": 10266 }, { "epoch": 3.420088281835596, "grad_norm": 0.67578125, "learning_rate": 1.1484388749195765e-05, "loss": 4.0122, "step": 10267 }, { "epoch": 3.4204214208378447, "grad_norm": 0.65625, "learning_rate": 1.1483702958408937e-05, "loss": 4.0292, "step": 10268 }, { "epoch": 3.4207545598400935, "grad_norm": 0.6640625, "learning_rate": 1.1483017121220596e-05, "loss": 4.0754, "step": 10269 }, { "epoch": 3.421087698842342, "grad_norm": 0.6484375, "learning_rate": 1.1482331237638732e-05, "loss": 4.0607, "step": 10270 }, { "epoch": 3.4214208378445905, "grad_norm": 0.65625, "learning_rate": 1.1481645307671337e-05, "loss": 3.9754, "step": 10271 }, { "epoch": 3.4217539768468392, "grad_norm": 0.6796875, "learning_rate": 1.1480959331326397e-05, "loss": 4.0077, "step": 10272 }, { "epoch": 3.422087115849088, "grad_norm": 0.71484375, "learning_rate": 1.1480273308611908e-05, "loss": 4.044, "step": 10273 }, { "epoch": 3.4224202548513367, "grad_norm": 0.6640625, "learning_rate": 1.1479587239535851e-05, "loss": 4.0008, "step": 10274 }, { "epoch": 3.4227533938535855, "grad_norm": 0.6328125, "learning_rate": 1.1478901124106224e-05, "loss": 4.0562, "step": 10275 }, { "epoch": 3.423086532855834, "grad_norm": 0.6796875, "learning_rate": 1.1478214962331016e-05, "loss": 3.9502, "step": 10276 }, { "epoch": 3.423419671858083, "grad_norm": 0.67578125, "learning_rate": 1.1477528754218223e-05, "loss": 3.9684, "step": 10277 }, { "epoch": 3.4237528108603312, "grad_norm": 0.65625, "learning_rate": 1.1476842499775832e-05, "loss": 4.0235, "step": 10278 }, { "epoch": 3.42408594986258, "grad_norm": 0.64453125, "learning_rate": 1.1476156199011843e-05, "loss": 4.0072, "step": 10279 }, { "epoch": 3.4244190888648287, "grad_norm": 0.6796875, "learning_rate": 1.1475469851934245e-05, "loss": 4.0337, "step": 10280 }, { "epoch": 3.4247522278670774, "grad_norm": 0.65234375, "learning_rate": 1.1474783458551033e-05, "loss": 4.0455, "step": 10281 }, { "epoch": 3.425085366869326, "grad_norm": 0.67578125, "learning_rate": 1.1474097018870206e-05, "loss": 4.0448, "step": 10282 }, { "epoch": 3.425418505871575, "grad_norm": 0.671875, "learning_rate": 1.1473410532899754e-05, "loss": 3.974, "step": 10283 }, { "epoch": 3.4257516448738237, "grad_norm": 0.65234375, "learning_rate": 1.1472724000647679e-05, "loss": 4.0358, "step": 10284 }, { "epoch": 3.4260847838760724, "grad_norm": 0.6640625, "learning_rate": 1.147203742212197e-05, "loss": 4.0259, "step": 10285 }, { "epoch": 3.426417922878321, "grad_norm": 0.66796875, "learning_rate": 1.1471350797330633e-05, "loss": 4.0237, "step": 10286 }, { "epoch": 3.4267510618805694, "grad_norm": 0.66796875, "learning_rate": 1.1470664126281659e-05, "loss": 4.0082, "step": 10287 }, { "epoch": 3.427084200882818, "grad_norm": 0.62890625, "learning_rate": 1.1469977408983046e-05, "loss": 4.0093, "step": 10288 }, { "epoch": 3.427417339885067, "grad_norm": 0.65234375, "learning_rate": 1.1469290645442795e-05, "loss": 3.9863, "step": 10289 }, { "epoch": 3.4277504788873157, "grad_norm": 0.6875, "learning_rate": 1.146860383566891e-05, "loss": 3.9549, "step": 10290 }, { "epoch": 3.4280836178895644, "grad_norm": 0.6953125, "learning_rate": 1.1467916979669382e-05, "loss": 3.9898, "step": 10291 }, { "epoch": 3.428416756891813, "grad_norm": 0.66796875, "learning_rate": 1.1467230077452218e-05, "loss": 3.9475, "step": 10292 }, { "epoch": 3.428749895894062, "grad_norm": 0.64453125, "learning_rate": 1.1466543129025417e-05, "loss": 4.0163, "step": 10293 }, { "epoch": 3.4290830348963106, "grad_norm": 0.68359375, "learning_rate": 1.1465856134396979e-05, "loss": 3.9636, "step": 10294 }, { "epoch": 3.4294161738985594, "grad_norm": 0.66015625, "learning_rate": 1.1465169093574908e-05, "loss": 3.9819, "step": 10295 }, { "epoch": 3.4297493129008076, "grad_norm": 0.6875, "learning_rate": 1.1464482006567205e-05, "loss": 3.972, "step": 10296 }, { "epoch": 3.4300824519030564, "grad_norm": 0.66015625, "learning_rate": 1.1463794873381874e-05, "loss": 4.031, "step": 10297 }, { "epoch": 3.430415590905305, "grad_norm": 0.66015625, "learning_rate": 1.1463107694026916e-05, "loss": 4.0276, "step": 10298 }, { "epoch": 3.430748729907554, "grad_norm": 0.71875, "learning_rate": 1.1462420468510339e-05, "loss": 4.0132, "step": 10299 }, { "epoch": 3.4310818689098026, "grad_norm": 0.6796875, "learning_rate": 1.1461733196840148e-05, "loss": 4.0444, "step": 10300 }, { "epoch": 3.4314150079120513, "grad_norm": 0.6875, "learning_rate": 1.1461045879024346e-05, "loss": 4.0164, "step": 10301 }, { "epoch": 3.4317481469143, "grad_norm": 0.69140625, "learning_rate": 1.1460358515070936e-05, "loss": 4.0725, "step": 10302 }, { "epoch": 3.432081285916549, "grad_norm": 0.6484375, "learning_rate": 1.1459671104987928e-05, "loss": 3.9653, "step": 10303 }, { "epoch": 3.4324144249187976, "grad_norm": 0.6640625, "learning_rate": 1.1458983648783333e-05, "loss": 4.0468, "step": 10304 }, { "epoch": 3.432747563921046, "grad_norm": 0.68359375, "learning_rate": 1.1458296146465148e-05, "loss": 4.087, "step": 10305 }, { "epoch": 3.4330807029232946, "grad_norm": 0.6484375, "learning_rate": 1.145760859804139e-05, "loss": 4.0498, "step": 10306 }, { "epoch": 3.4334138419255433, "grad_norm": 0.66015625, "learning_rate": 1.1456921003520062e-05, "loss": 3.9799, "step": 10307 }, { "epoch": 3.433746980927792, "grad_norm": 0.6796875, "learning_rate": 1.1456233362909177e-05, "loss": 4.0186, "step": 10308 }, { "epoch": 3.434080119930041, "grad_norm": 0.73046875, "learning_rate": 1.145554567621674e-05, "loss": 4.0166, "step": 10309 }, { "epoch": 3.4344132589322895, "grad_norm": 0.69140625, "learning_rate": 1.1454857943450765e-05, "loss": 3.9912, "step": 10310 }, { "epoch": 3.4347463979345383, "grad_norm": 0.73046875, "learning_rate": 1.145417016461926e-05, "loss": 4.037, "step": 10311 }, { "epoch": 3.435079536936787, "grad_norm": 0.67578125, "learning_rate": 1.145348233973024e-05, "loss": 3.9222, "step": 10312 }, { "epoch": 3.4354126759390358, "grad_norm": 0.67578125, "learning_rate": 1.1452794468791711e-05, "loss": 4.0629, "step": 10313 }, { "epoch": 3.435745814941284, "grad_norm": 0.65625, "learning_rate": 1.1452106551811689e-05, "loss": 3.9879, "step": 10314 }, { "epoch": 3.436078953943533, "grad_norm": 0.6484375, "learning_rate": 1.1451418588798188e-05, "loss": 4.0324, "step": 10315 }, { "epoch": 3.4364120929457815, "grad_norm": 0.63671875, "learning_rate": 1.1450730579759216e-05, "loss": 3.984, "step": 10316 }, { "epoch": 3.4367452319480303, "grad_norm": 0.6875, "learning_rate": 1.1450042524702793e-05, "loss": 4.0571, "step": 10317 }, { "epoch": 3.437078370950279, "grad_norm": 0.671875, "learning_rate": 1.1449354423636927e-05, "loss": 3.9848, "step": 10318 }, { "epoch": 3.4374115099525278, "grad_norm": 0.69140625, "learning_rate": 1.144866627656964e-05, "loss": 4.0458, "step": 10319 }, { "epoch": 3.4377446489547765, "grad_norm": 0.703125, "learning_rate": 1.1447978083508942e-05, "loss": 4.0842, "step": 10320 }, { "epoch": 3.4380777879570252, "grad_norm": 0.69140625, "learning_rate": 1.1447289844462849e-05, "loss": 4.0166, "step": 10321 }, { "epoch": 3.438410926959274, "grad_norm": 0.65234375, "learning_rate": 1.1446601559439382e-05, "loss": 3.9633, "step": 10322 }, { "epoch": 3.4387440659615223, "grad_norm": 0.6640625, "learning_rate": 1.1445913228446555e-05, "loss": 3.9871, "step": 10323 }, { "epoch": 3.439077204963771, "grad_norm": 0.6640625, "learning_rate": 1.1445224851492383e-05, "loss": 3.9715, "step": 10324 }, { "epoch": 3.4394103439660197, "grad_norm": 0.70703125, "learning_rate": 1.1444536428584889e-05, "loss": 3.9897, "step": 10325 }, { "epoch": 3.4397434829682685, "grad_norm": 0.67578125, "learning_rate": 1.1443847959732086e-05, "loss": 4.0158, "step": 10326 }, { "epoch": 3.4400766219705172, "grad_norm": 0.65234375, "learning_rate": 1.1443159444942001e-05, "loss": 4.0363, "step": 10327 }, { "epoch": 3.440409760972766, "grad_norm": 0.67578125, "learning_rate": 1.1442470884222648e-05, "loss": 4.074, "step": 10328 }, { "epoch": 3.4407428999750147, "grad_norm": 0.69140625, "learning_rate": 1.1441782277582048e-05, "loss": 3.9898, "step": 10329 }, { "epoch": 3.4410760389772634, "grad_norm": 0.6484375, "learning_rate": 1.1441093625028223e-05, "loss": 4.0163, "step": 10330 }, { "epoch": 3.441409177979512, "grad_norm": 0.6640625, "learning_rate": 1.1440404926569194e-05, "loss": 3.9317, "step": 10331 }, { "epoch": 3.4417423169817605, "grad_norm": 0.68359375, "learning_rate": 1.1439716182212983e-05, "loss": 3.9493, "step": 10332 }, { "epoch": 3.442075455984009, "grad_norm": 0.67578125, "learning_rate": 1.143902739196761e-05, "loss": 4.0528, "step": 10333 }, { "epoch": 3.442408594986258, "grad_norm": 0.703125, "learning_rate": 1.1438338555841102e-05, "loss": 4.009, "step": 10334 }, { "epoch": 3.4427417339885067, "grad_norm": 0.69921875, "learning_rate": 1.1437649673841477e-05, "loss": 3.979, "step": 10335 }, { "epoch": 3.4430748729907554, "grad_norm": 0.6875, "learning_rate": 1.1436960745976765e-05, "loss": 4.0105, "step": 10336 }, { "epoch": 3.443408011993004, "grad_norm": 0.6953125, "learning_rate": 1.1436271772254986e-05, "loss": 4.0314, "step": 10337 }, { "epoch": 3.443741150995253, "grad_norm": 0.66796875, "learning_rate": 1.1435582752684167e-05, "loss": 4.0016, "step": 10338 }, { "epoch": 3.4440742899975016, "grad_norm": 0.61328125, "learning_rate": 1.1434893687272334e-05, "loss": 3.9966, "step": 10339 }, { "epoch": 3.4444074289997504, "grad_norm": 0.640625, "learning_rate": 1.1434204576027513e-05, "loss": 4.0406, "step": 10340 }, { "epoch": 3.4447405680019987, "grad_norm": 0.66015625, "learning_rate": 1.1433515418957731e-05, "loss": 3.9693, "step": 10341 }, { "epoch": 3.4450737070042474, "grad_norm": 0.64453125, "learning_rate": 1.1432826216071011e-05, "loss": 3.986, "step": 10342 }, { "epoch": 3.445406846006496, "grad_norm": 0.671875, "learning_rate": 1.1432136967375387e-05, "loss": 3.994, "step": 10343 }, { "epoch": 3.445739985008745, "grad_norm": 0.6953125, "learning_rate": 1.1431447672878881e-05, "loss": 3.967, "step": 10344 }, { "epoch": 3.4460731240109936, "grad_norm": 0.671875, "learning_rate": 1.143075833258953e-05, "loss": 4.0243, "step": 10345 }, { "epoch": 3.4464062630132424, "grad_norm": 0.6875, "learning_rate": 1.1430068946515356e-05, "loss": 4.0496, "step": 10346 }, { "epoch": 3.446739402015491, "grad_norm": 0.72265625, "learning_rate": 1.1429379514664392e-05, "loss": 3.9264, "step": 10347 }, { "epoch": 3.4470725410177394, "grad_norm": 0.62890625, "learning_rate": 1.1428690037044665e-05, "loss": 3.9946, "step": 10348 }, { "epoch": 3.447405680019988, "grad_norm": 0.6953125, "learning_rate": 1.1428000513664211e-05, "loss": 4.0274, "step": 10349 }, { "epoch": 3.447738819022237, "grad_norm": 0.67578125, "learning_rate": 1.142731094453106e-05, "loss": 3.9837, "step": 10350 }, { "epoch": 3.4480719580244856, "grad_norm": 0.71875, "learning_rate": 1.1426621329653238e-05, "loss": 4.0239, "step": 10351 }, { "epoch": 3.4484050970267344, "grad_norm": 0.68359375, "learning_rate": 1.1425931669038789e-05, "loss": 4.0085, "step": 10352 }, { "epoch": 3.448738236028983, "grad_norm": 0.67578125, "learning_rate": 1.1425241962695734e-05, "loss": 4.0103, "step": 10353 }, { "epoch": 3.449071375031232, "grad_norm": 0.67578125, "learning_rate": 1.142455221063212e-05, "loss": 3.9981, "step": 10354 }, { "epoch": 3.4494045140334806, "grad_norm": 0.703125, "learning_rate": 1.1423862412855965e-05, "loss": 4.0116, "step": 10355 }, { "epoch": 3.4497376530357293, "grad_norm": 0.66015625, "learning_rate": 1.1423172569375316e-05, "loss": 4.0754, "step": 10356 }, { "epoch": 3.4500707920379776, "grad_norm": 0.67578125, "learning_rate": 1.1422482680198203e-05, "loss": 3.9961, "step": 10357 }, { "epoch": 3.4504039310402264, "grad_norm": 0.6484375, "learning_rate": 1.1421792745332664e-05, "loss": 4.0692, "step": 10358 }, { "epoch": 3.450737070042475, "grad_norm": 0.69921875, "learning_rate": 1.1421102764786731e-05, "loss": 4.0723, "step": 10359 }, { "epoch": 3.451070209044724, "grad_norm": 0.65234375, "learning_rate": 1.1420412738568448e-05, "loss": 3.9414, "step": 10360 }, { "epoch": 3.4514033480469726, "grad_norm": 0.68359375, "learning_rate": 1.1419722666685847e-05, "loss": 4.0213, "step": 10361 }, { "epoch": 3.4517364870492213, "grad_norm": 0.6953125, "learning_rate": 1.1419032549146965e-05, "loss": 4.0604, "step": 10362 }, { "epoch": 3.45206962605147, "grad_norm": 0.6796875, "learning_rate": 1.1418342385959846e-05, "loss": 3.9768, "step": 10363 }, { "epoch": 3.452402765053719, "grad_norm": 0.6484375, "learning_rate": 1.141765217713252e-05, "loss": 4.063, "step": 10364 }, { "epoch": 3.4527359040559675, "grad_norm": 0.6484375, "learning_rate": 1.1416961922673038e-05, "loss": 4.1086, "step": 10365 }, { "epoch": 3.453069043058216, "grad_norm": 0.65625, "learning_rate": 1.141627162258943e-05, "loss": 4.0367, "step": 10366 }, { "epoch": 3.4534021820604646, "grad_norm": 0.6640625, "learning_rate": 1.141558127688974e-05, "loss": 4.0518, "step": 10367 }, { "epoch": 3.4537353210627133, "grad_norm": 0.7109375, "learning_rate": 1.141489088558201e-05, "loss": 4.0202, "step": 10368 }, { "epoch": 3.454068460064962, "grad_norm": 0.6640625, "learning_rate": 1.141420044867428e-05, "loss": 4.0359, "step": 10369 }, { "epoch": 3.454401599067211, "grad_norm": 0.64453125, "learning_rate": 1.1413509966174594e-05, "loss": 4.1038, "step": 10370 }, { "epoch": 3.4547347380694595, "grad_norm": 0.69140625, "learning_rate": 1.1412819438090994e-05, "loss": 4.0471, "step": 10371 }, { "epoch": 3.4550678770717083, "grad_norm": 0.68359375, "learning_rate": 1.1412128864431524e-05, "loss": 3.9679, "step": 10372 }, { "epoch": 3.455401016073957, "grad_norm": 0.65625, "learning_rate": 1.1411438245204222e-05, "loss": 3.9679, "step": 10373 }, { "epoch": 3.4557341550762057, "grad_norm": 0.671875, "learning_rate": 1.1410747580417141e-05, "loss": 4.02, "step": 10374 }, { "epoch": 3.456067294078454, "grad_norm": 0.65625, "learning_rate": 1.1410056870078319e-05, "loss": 4.0445, "step": 10375 }, { "epoch": 3.4564004330807028, "grad_norm": 0.66015625, "learning_rate": 1.1409366114195806e-05, "loss": 4.0293, "step": 10376 }, { "epoch": 3.4567335720829515, "grad_norm": 0.6640625, "learning_rate": 1.140867531277764e-05, "loss": 3.9965, "step": 10377 }, { "epoch": 3.4570667110852003, "grad_norm": 0.65234375, "learning_rate": 1.1407984465831878e-05, "loss": 4.0011, "step": 10378 }, { "epoch": 3.457399850087449, "grad_norm": 0.71484375, "learning_rate": 1.1407293573366559e-05, "loss": 4.0271, "step": 10379 }, { "epoch": 3.4577329890896977, "grad_norm": 0.66015625, "learning_rate": 1.1406602635389735e-05, "loss": 4.0224, "step": 10380 }, { "epoch": 3.4580661280919465, "grad_norm": 0.66015625, "learning_rate": 1.1405911651909452e-05, "loss": 4.0256, "step": 10381 }, { "epoch": 3.458399267094195, "grad_norm": 0.625, "learning_rate": 1.1405220622933756e-05, "loss": 3.9742, "step": 10382 }, { "epoch": 3.458732406096444, "grad_norm": 0.69140625, "learning_rate": 1.14045295484707e-05, "loss": 4.0021, "step": 10383 }, { "epoch": 3.4590655450986922, "grad_norm": 0.6953125, "learning_rate": 1.1403838428528332e-05, "loss": 3.9528, "step": 10384 }, { "epoch": 3.459398684100941, "grad_norm": 0.69140625, "learning_rate": 1.1403147263114703e-05, "loss": 4.054, "step": 10385 }, { "epoch": 3.4597318231031897, "grad_norm": 0.66015625, "learning_rate": 1.1402456052237861e-05, "loss": 4.0607, "step": 10386 }, { "epoch": 3.4600649621054385, "grad_norm": 0.64453125, "learning_rate": 1.1401764795905859e-05, "loss": 4.0864, "step": 10387 }, { "epoch": 3.460398101107687, "grad_norm": 0.65234375, "learning_rate": 1.1401073494126747e-05, "loss": 3.9875, "step": 10388 }, { "epoch": 3.460731240109936, "grad_norm": 0.6875, "learning_rate": 1.140038214690858e-05, "loss": 4.0525, "step": 10389 }, { "epoch": 3.4610643791121847, "grad_norm": 0.65625, "learning_rate": 1.1399690754259412e-05, "loss": 4.0055, "step": 10390 }, { "epoch": 3.4613975181144334, "grad_norm": 0.72265625, "learning_rate": 1.139899931618729e-05, "loss": 3.9675, "step": 10391 }, { "epoch": 3.461730657116682, "grad_norm": 0.671875, "learning_rate": 1.139830783270027e-05, "loss": 3.9418, "step": 10392 }, { "epoch": 3.4620637961189304, "grad_norm": 0.65234375, "learning_rate": 1.139761630380641e-05, "loss": 4.0746, "step": 10393 }, { "epoch": 3.462396935121179, "grad_norm": 0.6796875, "learning_rate": 1.1396924729513763e-05, "loss": 4.0759, "step": 10394 }, { "epoch": 3.462730074123428, "grad_norm": 0.68359375, "learning_rate": 1.1396233109830381e-05, "loss": 3.9497, "step": 10395 }, { "epoch": 3.4630632131256767, "grad_norm": 0.6640625, "learning_rate": 1.1395541444764324e-05, "loss": 3.9608, "step": 10396 }, { "epoch": 3.4633963521279254, "grad_norm": 0.63671875, "learning_rate": 1.1394849734323645e-05, "loss": 4.0584, "step": 10397 }, { "epoch": 3.463729491130174, "grad_norm": 0.6484375, "learning_rate": 1.1394157978516407e-05, "loss": 4.012, "step": 10398 }, { "epoch": 3.464062630132423, "grad_norm": 0.66796875, "learning_rate": 1.139346617735066e-05, "loss": 4.0307, "step": 10399 }, { "epoch": 3.4643957691346716, "grad_norm": 0.6484375, "learning_rate": 1.1392774330834467e-05, "loss": 4.0313, "step": 10400 }, { "epoch": 3.4647289081369204, "grad_norm": 0.65625, "learning_rate": 1.1392082438975882e-05, "loss": 4.0564, "step": 10401 }, { "epoch": 3.4650620471391687, "grad_norm": 0.6875, "learning_rate": 1.139139050178297e-05, "loss": 4.013, "step": 10402 }, { "epoch": 3.4653951861414174, "grad_norm": 0.69140625, "learning_rate": 1.1390698519263786e-05, "loss": 3.9682, "step": 10403 }, { "epoch": 3.465728325143666, "grad_norm": 0.6484375, "learning_rate": 1.139000649142639e-05, "loss": 4.0453, "step": 10404 }, { "epoch": 3.466061464145915, "grad_norm": 0.671875, "learning_rate": 1.1389314418278847e-05, "loss": 4.0063, "step": 10405 }, { "epoch": 3.4663946031481636, "grad_norm": 0.6796875, "learning_rate": 1.1388622299829212e-05, "loss": 4.0127, "step": 10406 }, { "epoch": 3.4667277421504124, "grad_norm": 0.6484375, "learning_rate": 1.1387930136085552e-05, "loss": 4.0217, "step": 10407 }, { "epoch": 3.467060881152661, "grad_norm": 0.6875, "learning_rate": 1.1387237927055929e-05, "loss": 3.9781, "step": 10408 }, { "epoch": 3.46739402015491, "grad_norm": 0.6640625, "learning_rate": 1.1386545672748402e-05, "loss": 4.055, "step": 10409 }, { "epoch": 3.4677271591571586, "grad_norm": 0.65625, "learning_rate": 1.1385853373171033e-05, "loss": 4.0367, "step": 10410 }, { "epoch": 3.468060298159407, "grad_norm": 0.66796875, "learning_rate": 1.1385161028331894e-05, "loss": 3.9776, "step": 10411 }, { "epoch": 3.4683934371616556, "grad_norm": 0.73046875, "learning_rate": 1.1384468638239043e-05, "loss": 3.961, "step": 10412 }, { "epoch": 3.4687265761639043, "grad_norm": 0.69921875, "learning_rate": 1.1383776202900544e-05, "loss": 3.9902, "step": 10413 }, { "epoch": 3.469059715166153, "grad_norm": 0.67578125, "learning_rate": 1.1383083722324466e-05, "loss": 3.9548, "step": 10414 }, { "epoch": 3.469392854168402, "grad_norm": 0.70703125, "learning_rate": 1.1382391196518874e-05, "loss": 3.956, "step": 10415 }, { "epoch": 3.4697259931706506, "grad_norm": 0.67578125, "learning_rate": 1.1381698625491833e-05, "loss": 3.9968, "step": 10416 }, { "epoch": 3.4700591321728993, "grad_norm": 0.6640625, "learning_rate": 1.1381006009251409e-05, "loss": 3.9795, "step": 10417 }, { "epoch": 3.4703922711751476, "grad_norm": 0.6640625, "learning_rate": 1.1380313347805676e-05, "loss": 4.0182, "step": 10418 }, { "epoch": 3.4707254101773963, "grad_norm": 0.69140625, "learning_rate": 1.1379620641162692e-05, "loss": 4.0617, "step": 10419 }, { "epoch": 3.471058549179645, "grad_norm": 0.6640625, "learning_rate": 1.1378927889330535e-05, "loss": 4.0591, "step": 10420 }, { "epoch": 3.471391688181894, "grad_norm": 0.6953125, "learning_rate": 1.1378235092317267e-05, "loss": 4.0266, "step": 10421 }, { "epoch": 3.4717248271841425, "grad_norm": 0.68359375, "learning_rate": 1.137754225013096e-05, "loss": 3.9475, "step": 10422 }, { "epoch": 3.4720579661863913, "grad_norm": 0.703125, "learning_rate": 1.1376849362779685e-05, "loss": 3.9804, "step": 10423 }, { "epoch": 3.47239110518864, "grad_norm": 0.7109375, "learning_rate": 1.1376156430271515e-05, "loss": 3.9531, "step": 10424 }, { "epoch": 3.4727242441908888, "grad_norm": 0.65234375, "learning_rate": 1.1375463452614515e-05, "loss": 4.0587, "step": 10425 }, { "epoch": 3.4730573831931375, "grad_norm": 0.67578125, "learning_rate": 1.1374770429816762e-05, "loss": 4.0252, "step": 10426 }, { "epoch": 3.473390522195386, "grad_norm": 0.69921875, "learning_rate": 1.1374077361886325e-05, "loss": 3.9857, "step": 10427 }, { "epoch": 3.4737236611976345, "grad_norm": 0.6640625, "learning_rate": 1.1373384248831276e-05, "loss": 4.0237, "step": 10428 }, { "epoch": 3.4740568001998833, "grad_norm": 0.69140625, "learning_rate": 1.1372691090659696e-05, "loss": 3.9529, "step": 10429 }, { "epoch": 3.474389939202132, "grad_norm": 0.64453125, "learning_rate": 1.1371997887379648e-05, "loss": 4.1045, "step": 10430 }, { "epoch": 3.4747230782043808, "grad_norm": 0.6640625, "learning_rate": 1.1371304638999214e-05, "loss": 4.0254, "step": 10431 }, { "epoch": 3.4750562172066295, "grad_norm": 0.68359375, "learning_rate": 1.1370611345526467e-05, "loss": 4.0575, "step": 10432 }, { "epoch": 3.4753893562088782, "grad_norm": 0.69140625, "learning_rate": 1.1369918006969479e-05, "loss": 4.0428, "step": 10433 }, { "epoch": 3.475722495211127, "grad_norm": 0.67578125, "learning_rate": 1.1369224623336328e-05, "loss": 4.0278, "step": 10434 }, { "epoch": 3.4760556342133757, "grad_norm": 0.671875, "learning_rate": 1.1368531194635094e-05, "loss": 4.0284, "step": 10435 }, { "epoch": 3.476388773215624, "grad_norm": 0.6640625, "learning_rate": 1.1367837720873847e-05, "loss": 4.1311, "step": 10436 }, { "epoch": 3.4767219122178727, "grad_norm": 0.69140625, "learning_rate": 1.1367144202060672e-05, "loss": 4.0256, "step": 10437 }, { "epoch": 3.4770550512201215, "grad_norm": 0.671875, "learning_rate": 1.136645063820364e-05, "loss": 4.0444, "step": 10438 }, { "epoch": 3.4773881902223702, "grad_norm": 0.671875, "learning_rate": 1.1365757029310836e-05, "loss": 3.9518, "step": 10439 }, { "epoch": 3.477721329224619, "grad_norm": 0.68359375, "learning_rate": 1.1365063375390333e-05, "loss": 4.0444, "step": 10440 }, { "epoch": 3.4780544682268677, "grad_norm": 0.69921875, "learning_rate": 1.1364369676450215e-05, "loss": 4.0133, "step": 10441 }, { "epoch": 3.4783876072291164, "grad_norm": 0.65234375, "learning_rate": 1.136367593249856e-05, "loss": 4.0254, "step": 10442 }, { "epoch": 3.478720746231365, "grad_norm": 0.68359375, "learning_rate": 1.1362982143543448e-05, "loss": 3.9832, "step": 10443 }, { "epoch": 3.479053885233614, "grad_norm": 0.70703125, "learning_rate": 1.1362288309592965e-05, "loss": 4.0428, "step": 10444 }, { "epoch": 3.479387024235862, "grad_norm": 0.7265625, "learning_rate": 1.1361594430655183e-05, "loss": 4.0027, "step": 10445 }, { "epoch": 3.479720163238111, "grad_norm": 0.66015625, "learning_rate": 1.1360900506738196e-05, "loss": 4.0484, "step": 10446 }, { "epoch": 3.4800533022403597, "grad_norm": 0.7109375, "learning_rate": 1.1360206537850075e-05, "loss": 4.0112, "step": 10447 }, { "epoch": 3.4803864412426084, "grad_norm": 0.68359375, "learning_rate": 1.1359512523998913e-05, "loss": 3.9584, "step": 10448 }, { "epoch": 3.480719580244857, "grad_norm": 0.62890625, "learning_rate": 1.135881846519279e-05, "loss": 3.9814, "step": 10449 }, { "epoch": 3.481052719247106, "grad_norm": 0.69140625, "learning_rate": 1.1358124361439788e-05, "loss": 3.9542, "step": 10450 }, { "epoch": 3.4813858582493546, "grad_norm": 0.68359375, "learning_rate": 1.1357430212747994e-05, "loss": 4.0231, "step": 10451 }, { "epoch": 3.4817189972516034, "grad_norm": 0.6484375, "learning_rate": 1.1356736019125495e-05, "loss": 4.0708, "step": 10452 }, { "epoch": 3.482052136253852, "grad_norm": 0.703125, "learning_rate": 1.1356041780580375e-05, "loss": 3.9753, "step": 10453 }, { "epoch": 3.4823852752561004, "grad_norm": 0.7109375, "learning_rate": 1.135534749712072e-05, "loss": 4.0522, "step": 10454 }, { "epoch": 3.482718414258349, "grad_norm": 0.6796875, "learning_rate": 1.1354653168754614e-05, "loss": 4.0029, "step": 10455 }, { "epoch": 3.483051553260598, "grad_norm": 0.6796875, "learning_rate": 1.135395879549015e-05, "loss": 4.0296, "step": 10456 }, { "epoch": 3.4833846922628466, "grad_norm": 0.69140625, "learning_rate": 1.1353264377335417e-05, "loss": 4.0894, "step": 10457 }, { "epoch": 3.4837178312650954, "grad_norm": 0.6875, "learning_rate": 1.1352569914298496e-05, "loss": 3.971, "step": 10458 }, { "epoch": 3.484050970267344, "grad_norm": 0.68359375, "learning_rate": 1.1351875406387484e-05, "loss": 4.0357, "step": 10459 }, { "epoch": 3.484384109269593, "grad_norm": 0.69140625, "learning_rate": 1.1351180853610464e-05, "loss": 4.0078, "step": 10460 }, { "epoch": 3.4847172482718416, "grad_norm": 0.69140625, "learning_rate": 1.1350486255975528e-05, "loss": 3.9442, "step": 10461 }, { "epoch": 3.4850503872740903, "grad_norm": 0.68359375, "learning_rate": 1.134979161349077e-05, "loss": 4.0183, "step": 10462 }, { "epoch": 3.4853835262763386, "grad_norm": 0.703125, "learning_rate": 1.1349096926164275e-05, "loss": 4.1007, "step": 10463 }, { "epoch": 3.4857166652785874, "grad_norm": 0.6796875, "learning_rate": 1.134840219400414e-05, "loss": 4.0365, "step": 10464 }, { "epoch": 3.486049804280836, "grad_norm": 0.6328125, "learning_rate": 1.1347707417018454e-05, "loss": 4.0293, "step": 10465 }, { "epoch": 3.486382943283085, "grad_norm": 0.69921875, "learning_rate": 1.1347012595215312e-05, "loss": 4.0305, "step": 10466 }, { "epoch": 3.4867160822853336, "grad_norm": 0.66796875, "learning_rate": 1.1346317728602805e-05, "loss": 3.9862, "step": 10467 }, { "epoch": 3.4870492212875823, "grad_norm": 0.66015625, "learning_rate": 1.134562281718903e-05, "loss": 3.946, "step": 10468 }, { "epoch": 3.487382360289831, "grad_norm": 0.671875, "learning_rate": 1.1344927860982073e-05, "loss": 4.0465, "step": 10469 }, { "epoch": 3.48771549929208, "grad_norm": 0.70703125, "learning_rate": 1.1344232859990039e-05, "loss": 4.0225, "step": 10470 }, { "epoch": 3.4880486382943285, "grad_norm": 0.72265625, "learning_rate": 1.1343537814221017e-05, "loss": 3.9711, "step": 10471 }, { "epoch": 3.488381777296577, "grad_norm": 0.71875, "learning_rate": 1.1342842723683104e-05, "loss": 3.9239, "step": 10472 }, { "epoch": 3.4887149162988256, "grad_norm": 0.71484375, "learning_rate": 1.1342147588384401e-05, "loss": 4.0188, "step": 10473 }, { "epoch": 3.4890480553010743, "grad_norm": 0.69140625, "learning_rate": 1.1341452408332997e-05, "loss": 4.0823, "step": 10474 }, { "epoch": 3.489381194303323, "grad_norm": 0.66796875, "learning_rate": 1.1340757183536994e-05, "loss": 3.9813, "step": 10475 }, { "epoch": 3.489714333305572, "grad_norm": 0.6640625, "learning_rate": 1.1340061914004488e-05, "loss": 4.0749, "step": 10476 }, { "epoch": 3.4900474723078205, "grad_norm": 0.68359375, "learning_rate": 1.133936659974358e-05, "loss": 3.9855, "step": 10477 }, { "epoch": 3.4903806113100693, "grad_norm": 0.67578125, "learning_rate": 1.1338671240762365e-05, "loss": 4.0679, "step": 10478 }, { "epoch": 3.490713750312318, "grad_norm": 0.671875, "learning_rate": 1.1337975837068949e-05, "loss": 4.0831, "step": 10479 }, { "epoch": 3.4910468893145667, "grad_norm": 0.68359375, "learning_rate": 1.1337280388671421e-05, "loss": 4.0833, "step": 10480 }, { "epoch": 3.491380028316815, "grad_norm": 0.71484375, "learning_rate": 1.1336584895577893e-05, "loss": 4.005, "step": 10481 }, { "epoch": 3.491713167319064, "grad_norm": 0.6953125, "learning_rate": 1.1335889357796459e-05, "loss": 3.9967, "step": 10482 }, { "epoch": 3.4920463063213125, "grad_norm": 0.69140625, "learning_rate": 1.1335193775335222e-05, "loss": 3.9823, "step": 10483 }, { "epoch": 3.4923794453235613, "grad_norm": 0.6640625, "learning_rate": 1.1334498148202286e-05, "loss": 4.0552, "step": 10484 }, { "epoch": 3.49271258432581, "grad_norm": 0.65234375, "learning_rate": 1.133380247640575e-05, "loss": 4.0174, "step": 10485 }, { "epoch": 3.4930457233280587, "grad_norm": 0.66015625, "learning_rate": 1.1333106759953721e-05, "loss": 4.0198, "step": 10486 }, { "epoch": 3.4933788623303075, "grad_norm": 0.6796875, "learning_rate": 1.1332410998854302e-05, "loss": 3.9581, "step": 10487 }, { "epoch": 3.4937120013325558, "grad_norm": 0.68359375, "learning_rate": 1.1331715193115595e-05, "loss": 3.992, "step": 10488 }, { "epoch": 3.4940451403348045, "grad_norm": 0.6953125, "learning_rate": 1.1331019342745703e-05, "loss": 4.0338, "step": 10489 }, { "epoch": 3.4943782793370533, "grad_norm": 0.6953125, "learning_rate": 1.1330323447752736e-05, "loss": 3.9994, "step": 10490 }, { "epoch": 3.494711418339302, "grad_norm": 0.6875, "learning_rate": 1.1329627508144795e-05, "loss": 3.984, "step": 10491 }, { "epoch": 3.4950445573415507, "grad_norm": 0.66796875, "learning_rate": 1.132893152392999e-05, "loss": 3.9397, "step": 10492 }, { "epoch": 3.4953776963437995, "grad_norm": 0.6875, "learning_rate": 1.132823549511643e-05, "loss": 4.015, "step": 10493 }, { "epoch": 3.495710835346048, "grad_norm": 0.67578125, "learning_rate": 1.1327539421712213e-05, "loss": 4.1089, "step": 10494 }, { "epoch": 3.496043974348297, "grad_norm": 0.6640625, "learning_rate": 1.1326843303725453e-05, "loss": 4.0233, "step": 10495 }, { "epoch": 3.4963771133505457, "grad_norm": 0.67578125, "learning_rate": 1.1326147141164258e-05, "loss": 4.0383, "step": 10496 }, { "epoch": 3.496710252352794, "grad_norm": 0.6796875, "learning_rate": 1.132545093403674e-05, "loss": 4.0065, "step": 10497 }, { "epoch": 3.4970433913550427, "grad_norm": 0.7109375, "learning_rate": 1.1324754682350999e-05, "loss": 4.0883, "step": 10498 }, { "epoch": 3.4973765303572915, "grad_norm": 0.69140625, "learning_rate": 1.1324058386115153e-05, "loss": 4.0038, "step": 10499 }, { "epoch": 3.49770966935954, "grad_norm": 0.65625, "learning_rate": 1.1323362045337308e-05, "loss": 4.056, "step": 10500 }, { "epoch": 3.498042808361789, "grad_norm": 0.6640625, "learning_rate": 1.1322665660025577e-05, "loss": 3.9795, "step": 10501 }, { "epoch": 3.4983759473640377, "grad_norm": 0.65625, "learning_rate": 1.1321969230188074e-05, "loss": 3.9769, "step": 10502 }, { "epoch": 3.4987090863662864, "grad_norm": 0.66796875, "learning_rate": 1.1321272755832906e-05, "loss": 3.9813, "step": 10503 }, { "epoch": 3.499042225368535, "grad_norm": 0.69140625, "learning_rate": 1.1320576236968184e-05, "loss": 3.8821, "step": 10504 }, { "epoch": 3.499375364370784, "grad_norm": 0.6953125, "learning_rate": 1.1319879673602029e-05, "loss": 3.9811, "step": 10505 }, { "epoch": 3.499708503373032, "grad_norm": 0.6484375, "learning_rate": 1.1319183065742547e-05, "loss": 4.0197, "step": 10506 }, { "epoch": 3.500041642375281, "grad_norm": 0.65625, "learning_rate": 1.1318486413397855e-05, "loss": 4.0525, "step": 10507 }, { "epoch": 3.5003747813775297, "grad_norm": 0.65234375, "learning_rate": 1.1317789716576067e-05, "loss": 4.0441, "step": 10508 }, { "epoch": 3.5007079203797784, "grad_norm": 0.66015625, "learning_rate": 1.1317092975285298e-05, "loss": 4.0493, "step": 10509 }, { "epoch": 3.501041059382027, "grad_norm": 0.69140625, "learning_rate": 1.1316396189533664e-05, "loss": 3.9735, "step": 10510 }, { "epoch": 3.501374198384276, "grad_norm": 0.69140625, "learning_rate": 1.131569935932928e-05, "loss": 3.9334, "step": 10511 }, { "epoch": 3.5017073373865246, "grad_norm": 0.68359375, "learning_rate": 1.1315002484680267e-05, "loss": 3.9805, "step": 10512 }, { "epoch": 3.5020404763887734, "grad_norm": 0.68359375, "learning_rate": 1.1314305565594734e-05, "loss": 3.9684, "step": 10513 }, { "epoch": 3.502373615391022, "grad_norm": 0.65625, "learning_rate": 1.1313608602080805e-05, "loss": 4.1248, "step": 10514 }, { "epoch": 3.5027067543932704, "grad_norm": 0.66015625, "learning_rate": 1.1312911594146595e-05, "loss": 4.0104, "step": 10515 }, { "epoch": 3.503039893395519, "grad_norm": 0.65234375, "learning_rate": 1.1312214541800223e-05, "loss": 3.9925, "step": 10516 }, { "epoch": 3.503373032397768, "grad_norm": 0.671875, "learning_rate": 1.1311517445049811e-05, "loss": 4.0747, "step": 10517 }, { "epoch": 3.5037061714000166, "grad_norm": 0.6796875, "learning_rate": 1.1310820303903475e-05, "loss": 3.993, "step": 10518 }, { "epoch": 3.5040393104022654, "grad_norm": 0.6953125, "learning_rate": 1.1310123118369336e-05, "loss": 4.0732, "step": 10519 }, { "epoch": 3.504372449404514, "grad_norm": 0.6796875, "learning_rate": 1.1309425888455519e-05, "loss": 4.0488, "step": 10520 }, { "epoch": 3.504705588406763, "grad_norm": 0.66015625, "learning_rate": 1.1308728614170138e-05, "loss": 3.9575, "step": 10521 }, { "epoch": 3.5050387274090116, "grad_norm": 0.71484375, "learning_rate": 1.1308031295521319e-05, "loss": 3.9619, "step": 10522 }, { "epoch": 3.5053718664112603, "grad_norm": 0.71875, "learning_rate": 1.1307333932517186e-05, "loss": 3.9757, "step": 10523 }, { "epoch": 3.5057050054135086, "grad_norm": 0.67578125, "learning_rate": 1.1306636525165856e-05, "loss": 3.9587, "step": 10524 }, { "epoch": 3.5060381444157573, "grad_norm": 0.67578125, "learning_rate": 1.130593907347546e-05, "loss": 4.0381, "step": 10525 }, { "epoch": 3.506371283418006, "grad_norm": 0.7265625, "learning_rate": 1.1305241577454112e-05, "loss": 4.0148, "step": 10526 }, { "epoch": 3.506704422420255, "grad_norm": 0.69921875, "learning_rate": 1.1304544037109944e-05, "loss": 3.8495, "step": 10527 }, { "epoch": 3.5070375614225036, "grad_norm": 0.69140625, "learning_rate": 1.130384645245108e-05, "loss": 3.9743, "step": 10528 }, { "epoch": 3.5073707004247523, "grad_norm": 0.6796875, "learning_rate": 1.130314882348564e-05, "loss": 4.0709, "step": 10529 }, { "epoch": 3.507703839427001, "grad_norm": 0.67578125, "learning_rate": 1.1302451150221759e-05, "loss": 4.0276, "step": 10530 }, { "epoch": 3.5080369784292493, "grad_norm": 0.67578125, "learning_rate": 1.1301753432667554e-05, "loss": 3.9849, "step": 10531 }, { "epoch": 3.5083701174314985, "grad_norm": 0.66015625, "learning_rate": 1.1301055670831157e-05, "loss": 4.0149, "step": 10532 }, { "epoch": 3.508703256433747, "grad_norm": 0.640625, "learning_rate": 1.1300357864720696e-05, "loss": 4.0272, "step": 10533 }, { "epoch": 3.5090363954359955, "grad_norm": 0.66796875, "learning_rate": 1.1299660014344298e-05, "loss": 3.9708, "step": 10534 }, { "epoch": 3.5093695344382443, "grad_norm": 0.66796875, "learning_rate": 1.1298962119710088e-05, "loss": 4.0399, "step": 10535 }, { "epoch": 3.509702673440493, "grad_norm": 0.66015625, "learning_rate": 1.12982641808262e-05, "loss": 3.9965, "step": 10536 }, { "epoch": 3.5100358124427418, "grad_norm": 0.671875, "learning_rate": 1.129756619770076e-05, "loss": 4.0203, "step": 10537 }, { "epoch": 3.5103689514449905, "grad_norm": 0.703125, "learning_rate": 1.1296868170341901e-05, "loss": 3.9494, "step": 10538 }, { "epoch": 3.5107020904472392, "grad_norm": 0.671875, "learning_rate": 1.1296170098757752e-05, "loss": 4.0785, "step": 10539 }, { "epoch": 3.5110352294494875, "grad_norm": 0.71875, "learning_rate": 1.129547198295644e-05, "loss": 4.0121, "step": 10540 }, { "epoch": 3.5113683684517367, "grad_norm": 0.6953125, "learning_rate": 1.1294773822946105e-05, "loss": 3.974, "step": 10541 }, { "epoch": 3.511701507453985, "grad_norm": 0.6796875, "learning_rate": 1.1294075618734873e-05, "loss": 4.068, "step": 10542 }, { "epoch": 3.5120346464562338, "grad_norm": 0.671875, "learning_rate": 1.1293377370330878e-05, "loss": 3.9374, "step": 10543 }, { "epoch": 3.5123677854584825, "grad_norm": 0.66015625, "learning_rate": 1.1292679077742251e-05, "loss": 4.1031, "step": 10544 }, { "epoch": 3.5127009244607312, "grad_norm": 0.69140625, "learning_rate": 1.1291980740977131e-05, "loss": 3.9826, "step": 10545 }, { "epoch": 3.51303406346298, "grad_norm": 0.7265625, "learning_rate": 1.1291282360043647e-05, "loss": 3.9978, "step": 10546 }, { "epoch": 3.5133672024652287, "grad_norm": 0.703125, "learning_rate": 1.1290583934949936e-05, "loss": 4.0085, "step": 10547 }, { "epoch": 3.5137003414674775, "grad_norm": 0.6953125, "learning_rate": 1.1289885465704131e-05, "loss": 4.0472, "step": 10548 }, { "epoch": 3.5140334804697257, "grad_norm": 0.69140625, "learning_rate": 1.1289186952314372e-05, "loss": 4.0378, "step": 10549 }, { "epoch": 3.514366619471975, "grad_norm": 0.671875, "learning_rate": 1.128848839478879e-05, "loss": 4.0079, "step": 10550 }, { "epoch": 3.5146997584742232, "grad_norm": 0.6953125, "learning_rate": 1.1287789793135525e-05, "loss": 3.8884, "step": 10551 }, { "epoch": 3.515032897476472, "grad_norm": 0.68359375, "learning_rate": 1.1287091147362714e-05, "loss": 4.0171, "step": 10552 }, { "epoch": 3.5153660364787207, "grad_norm": 0.65234375, "learning_rate": 1.1286392457478492e-05, "loss": 4.0474, "step": 10553 }, { "epoch": 3.5156991754809694, "grad_norm": 0.671875, "learning_rate": 1.1285693723490998e-05, "loss": 4.0221, "step": 10554 }, { "epoch": 3.516032314483218, "grad_norm": 0.73046875, "learning_rate": 1.1284994945408375e-05, "loss": 3.957, "step": 10555 }, { "epoch": 3.516365453485467, "grad_norm": 0.6640625, "learning_rate": 1.128429612323876e-05, "loss": 4.0136, "step": 10556 }, { "epoch": 3.5166985924877157, "grad_norm": 0.6796875, "learning_rate": 1.1283597256990287e-05, "loss": 4.0777, "step": 10557 }, { "epoch": 3.517031731489964, "grad_norm": 0.6875, "learning_rate": 1.1282898346671105e-05, "loss": 4.0094, "step": 10558 }, { "epoch": 3.517364870492213, "grad_norm": 0.68359375, "learning_rate": 1.128219939228935e-05, "loss": 4.0947, "step": 10559 }, { "epoch": 3.5176980094944614, "grad_norm": 0.67578125, "learning_rate": 1.1281500393853165e-05, "loss": 4.0086, "step": 10560 }, { "epoch": 3.51803114849671, "grad_norm": 0.734375, "learning_rate": 1.1280801351370691e-05, "loss": 3.9439, "step": 10561 }, { "epoch": 3.518364287498959, "grad_norm": 0.6796875, "learning_rate": 1.128010226485007e-05, "loss": 4.0069, "step": 10562 }, { "epoch": 3.5186974265012076, "grad_norm": 0.671875, "learning_rate": 1.1279403134299446e-05, "loss": 3.9937, "step": 10563 }, { "epoch": 3.5190305655034564, "grad_norm": 0.68359375, "learning_rate": 1.1278703959726962e-05, "loss": 3.9771, "step": 10564 }, { "epoch": 3.519363704505705, "grad_norm": 0.66015625, "learning_rate": 1.1278004741140761e-05, "loss": 3.9691, "step": 10565 }, { "epoch": 3.519696843507954, "grad_norm": 0.65234375, "learning_rate": 1.1277305478548987e-05, "loss": 4.0515, "step": 10566 }, { "epoch": 3.520029982510202, "grad_norm": 0.671875, "learning_rate": 1.1276606171959788e-05, "loss": 4.0042, "step": 10567 }, { "epoch": 3.5203631215124513, "grad_norm": 0.6640625, "learning_rate": 1.1275906821381303e-05, "loss": 4.0487, "step": 10568 }, { "epoch": 3.5206962605146996, "grad_norm": 0.63671875, "learning_rate": 1.1275207426821687e-05, "loss": 4.0071, "step": 10569 }, { "epoch": 3.5210293995169484, "grad_norm": 0.69140625, "learning_rate": 1.1274507988289077e-05, "loss": 4.0175, "step": 10570 }, { "epoch": 3.521362538519197, "grad_norm": 0.6796875, "learning_rate": 1.1273808505791629e-05, "loss": 3.9633, "step": 10571 }, { "epoch": 3.521695677521446, "grad_norm": 0.6875, "learning_rate": 1.1273108979337482e-05, "loss": 3.9304, "step": 10572 }, { "epoch": 3.5220288165236946, "grad_norm": 0.72265625, "learning_rate": 1.1272409408934792e-05, "loss": 4.0387, "step": 10573 }, { "epoch": 3.5223619555259433, "grad_norm": 0.69140625, "learning_rate": 1.12717097945917e-05, "loss": 4.0631, "step": 10574 }, { "epoch": 3.522695094528192, "grad_norm": 0.69921875, "learning_rate": 1.127101013631636e-05, "loss": 4.0259, "step": 10575 }, { "epoch": 3.5230282335304404, "grad_norm": 0.64453125, "learning_rate": 1.1270310434116918e-05, "loss": 3.9838, "step": 10576 }, { "epoch": 3.523361372532689, "grad_norm": 0.66796875, "learning_rate": 1.1269610688001527e-05, "loss": 3.9583, "step": 10577 }, { "epoch": 3.523694511534938, "grad_norm": 0.6640625, "learning_rate": 1.126891089797834e-05, "loss": 4.0074, "step": 10578 }, { "epoch": 3.5240276505371866, "grad_norm": 0.64453125, "learning_rate": 1.12682110640555e-05, "loss": 4.0748, "step": 10579 }, { "epoch": 3.5243607895394353, "grad_norm": 0.66796875, "learning_rate": 1.1267511186241165e-05, "loss": 3.9905, "step": 10580 }, { "epoch": 3.524693928541684, "grad_norm": 0.6796875, "learning_rate": 1.1266811264543484e-05, "loss": 4.0486, "step": 10581 }, { "epoch": 3.525027067543933, "grad_norm": 0.62890625, "learning_rate": 1.1266111298970611e-05, "loss": 4.0523, "step": 10582 }, { "epoch": 3.5253602065461815, "grad_norm": 0.671875, "learning_rate": 1.1265411289530699e-05, "loss": 3.9842, "step": 10583 }, { "epoch": 3.5256933455484303, "grad_norm": 0.69140625, "learning_rate": 1.1264711236231901e-05, "loss": 4.0058, "step": 10584 }, { "epoch": 3.5260264845506786, "grad_norm": 0.6796875, "learning_rate": 1.1264011139082372e-05, "loss": 3.9842, "step": 10585 }, { "epoch": 3.5263596235529273, "grad_norm": 0.6875, "learning_rate": 1.1263310998090264e-05, "loss": 4.0241, "step": 10586 }, { "epoch": 3.526692762555176, "grad_norm": 0.67578125, "learning_rate": 1.1262610813263738e-05, "loss": 3.9734, "step": 10587 }, { "epoch": 3.527025901557425, "grad_norm": 0.703125, "learning_rate": 1.1261910584610943e-05, "loss": 4.0288, "step": 10588 }, { "epoch": 3.5273590405596735, "grad_norm": 0.68359375, "learning_rate": 1.1261210312140039e-05, "loss": 4.0145, "step": 10589 }, { "epoch": 3.5276921795619223, "grad_norm": 0.69140625, "learning_rate": 1.126050999585918e-05, "loss": 3.9302, "step": 10590 }, { "epoch": 3.528025318564171, "grad_norm": 0.66796875, "learning_rate": 1.1259809635776528e-05, "loss": 3.9337, "step": 10591 }, { "epoch": 3.5283584575664197, "grad_norm": 0.6640625, "learning_rate": 1.1259109231900235e-05, "loss": 4.0139, "step": 10592 }, { "epoch": 3.5286915965686685, "grad_norm": 0.6640625, "learning_rate": 1.1258408784238462e-05, "loss": 3.997, "step": 10593 }, { "epoch": 3.529024735570917, "grad_norm": 0.69921875, "learning_rate": 1.1257708292799369e-05, "loss": 4.0212, "step": 10594 }, { "epoch": 3.5293578745731655, "grad_norm": 0.67578125, "learning_rate": 1.1257007757591113e-05, "loss": 3.9891, "step": 10595 }, { "epoch": 3.5296910135754143, "grad_norm": 0.6640625, "learning_rate": 1.1256307178621852e-05, "loss": 3.9993, "step": 10596 }, { "epoch": 3.530024152577663, "grad_norm": 0.64453125, "learning_rate": 1.125560655589975e-05, "loss": 4.0582, "step": 10597 }, { "epoch": 3.5303572915799117, "grad_norm": 0.6796875, "learning_rate": 1.1254905889432967e-05, "loss": 3.9396, "step": 10598 }, { "epoch": 3.5306904305821605, "grad_norm": 0.625, "learning_rate": 1.1254205179229661e-05, "loss": 3.9927, "step": 10599 }, { "epoch": 3.531023569584409, "grad_norm": 0.69140625, "learning_rate": 1.1253504425298002e-05, "loss": 3.9487, "step": 10600 }, { "epoch": 3.5313567085866575, "grad_norm": 0.69140625, "learning_rate": 1.1252803627646139e-05, "loss": 3.9733, "step": 10601 }, { "epoch": 3.5316898475889067, "grad_norm": 0.67578125, "learning_rate": 1.1252102786282247e-05, "loss": 3.9862, "step": 10602 }, { "epoch": 3.532022986591155, "grad_norm": 0.62109375, "learning_rate": 1.1251401901214482e-05, "loss": 4.0354, "step": 10603 }, { "epoch": 3.5323561255934037, "grad_norm": 0.65234375, "learning_rate": 1.1250700972451012e-05, "loss": 4.0331, "step": 10604 }, { "epoch": 3.5326892645956525, "grad_norm": 0.65625, "learning_rate": 1.125e-05, "loss": 4.0148, "step": 10605 }, { "epoch": 3.533022403597901, "grad_norm": 0.7265625, "learning_rate": 1.124929898386961e-05, "loss": 3.9828, "step": 10606 }, { "epoch": 3.53335554260015, "grad_norm": 0.74609375, "learning_rate": 1.1248597924068008e-05, "loss": 4.0326, "step": 10607 }, { "epoch": 3.5336886816023987, "grad_norm": 0.6796875, "learning_rate": 1.124789682060336e-05, "loss": 4.0125, "step": 10608 }, { "epoch": 3.5340218206046474, "grad_norm": 0.6953125, "learning_rate": 1.1247195673483832e-05, "loss": 4.0748, "step": 10609 }, { "epoch": 3.5343549596068957, "grad_norm": 0.69140625, "learning_rate": 1.124649448271759e-05, "loss": 4.0668, "step": 10610 }, { "epoch": 3.534688098609145, "grad_norm": 0.66796875, "learning_rate": 1.1245793248312802e-05, "loss": 4.0056, "step": 10611 }, { "epoch": 3.535021237611393, "grad_norm": 0.7109375, "learning_rate": 1.1245091970277636e-05, "loss": 4.0119, "step": 10612 }, { "epoch": 3.535354376613642, "grad_norm": 0.6953125, "learning_rate": 1.1244390648620261e-05, "loss": 4.0426, "step": 10613 }, { "epoch": 3.5356875156158907, "grad_norm": 0.66796875, "learning_rate": 1.1243689283348845e-05, "loss": 4.0241, "step": 10614 }, { "epoch": 3.5360206546181394, "grad_norm": 0.70703125, "learning_rate": 1.124298787447156e-05, "loss": 4.0181, "step": 10615 }, { "epoch": 3.536353793620388, "grad_norm": 0.6640625, "learning_rate": 1.124228642199657e-05, "loss": 3.9784, "step": 10616 }, { "epoch": 3.536686932622637, "grad_norm": 0.66796875, "learning_rate": 1.1241584925932049e-05, "loss": 4.0902, "step": 10617 }, { "epoch": 3.5370200716248856, "grad_norm": 0.703125, "learning_rate": 1.1240883386286172e-05, "loss": 3.9578, "step": 10618 }, { "epoch": 3.537353210627134, "grad_norm": 0.66796875, "learning_rate": 1.1240181803067102e-05, "loss": 4.0367, "step": 10619 }, { "epoch": 3.537686349629383, "grad_norm": 0.66796875, "learning_rate": 1.1239480176283016e-05, "loss": 3.9744, "step": 10620 }, { "epoch": 3.5380194886316314, "grad_norm": 0.71875, "learning_rate": 1.1238778505942086e-05, "loss": 4.1068, "step": 10621 }, { "epoch": 3.53835262763388, "grad_norm": 0.66015625, "learning_rate": 1.1238076792052485e-05, "loss": 3.9099, "step": 10622 }, { "epoch": 3.538685766636129, "grad_norm": 0.70703125, "learning_rate": 1.1237375034622385e-05, "loss": 3.9875, "step": 10623 }, { "epoch": 3.5390189056383776, "grad_norm": 0.72265625, "learning_rate": 1.1236673233659964e-05, "loss": 4.0187, "step": 10624 }, { "epoch": 3.5393520446406264, "grad_norm": 0.6484375, "learning_rate": 1.123597138917339e-05, "loss": 4.0611, "step": 10625 }, { "epoch": 3.539685183642875, "grad_norm": 0.70703125, "learning_rate": 1.1235269501170844e-05, "loss": 3.9811, "step": 10626 }, { "epoch": 3.540018322645124, "grad_norm": 0.69140625, "learning_rate": 1.1234567569660498e-05, "loss": 4.0178, "step": 10627 }, { "epoch": 3.540351461647372, "grad_norm": 0.66796875, "learning_rate": 1.1233865594650528e-05, "loss": 4.0555, "step": 10628 }, { "epoch": 3.5406846006496213, "grad_norm": 0.69140625, "learning_rate": 1.1233163576149112e-05, "loss": 4.0064, "step": 10629 }, { "epoch": 3.5410177396518696, "grad_norm": 0.6484375, "learning_rate": 1.1232461514164425e-05, "loss": 4.0411, "step": 10630 }, { "epoch": 3.5413508786541184, "grad_norm": 0.65234375, "learning_rate": 1.1231759408704648e-05, "loss": 3.9907, "step": 10631 }, { "epoch": 3.541684017656367, "grad_norm": 0.6953125, "learning_rate": 1.1231057259777957e-05, "loss": 4.0524, "step": 10632 }, { "epoch": 3.542017156658616, "grad_norm": 0.65234375, "learning_rate": 1.1230355067392528e-05, "loss": 4.0455, "step": 10633 }, { "epoch": 3.5423502956608646, "grad_norm": 0.67578125, "learning_rate": 1.1229652831556545e-05, "loss": 3.9381, "step": 10634 }, { "epoch": 3.5426834346631133, "grad_norm": 0.69140625, "learning_rate": 1.1228950552278182e-05, "loss": 4.0528, "step": 10635 }, { "epoch": 3.543016573665362, "grad_norm": 0.68359375, "learning_rate": 1.1228248229565626e-05, "loss": 3.9844, "step": 10636 }, { "epoch": 3.5433497126676103, "grad_norm": 0.7265625, "learning_rate": 1.122754586342705e-05, "loss": 4.0612, "step": 10637 }, { "epoch": 3.5436828516698595, "grad_norm": 0.6640625, "learning_rate": 1.122684345387064e-05, "loss": 4.0189, "step": 10638 }, { "epoch": 3.544015990672108, "grad_norm": 0.66015625, "learning_rate": 1.1226141000904575e-05, "loss": 3.9888, "step": 10639 }, { "epoch": 3.5443491296743566, "grad_norm": 0.64453125, "learning_rate": 1.122543850453704e-05, "loss": 4.0683, "step": 10640 }, { "epoch": 3.5446822686766053, "grad_norm": 0.68359375, "learning_rate": 1.1224735964776214e-05, "loss": 4.0184, "step": 10641 }, { "epoch": 3.545015407678854, "grad_norm": 0.72265625, "learning_rate": 1.1224033381630283e-05, "loss": 3.9985, "step": 10642 }, { "epoch": 3.5453485466811028, "grad_norm": 0.65234375, "learning_rate": 1.1223330755107428e-05, "loss": 4.0123, "step": 10643 }, { "epoch": 3.5456816856833515, "grad_norm": 0.70703125, "learning_rate": 1.1222628085215836e-05, "loss": 4.0575, "step": 10644 }, { "epoch": 3.5460148246856003, "grad_norm": 0.70703125, "learning_rate": 1.1221925371963689e-05, "loss": 4.0036, "step": 10645 }, { "epoch": 3.5463479636878485, "grad_norm": 0.6484375, "learning_rate": 1.1221222615359174e-05, "loss": 4.0404, "step": 10646 }, { "epoch": 3.5466811026900973, "grad_norm": 0.70703125, "learning_rate": 1.1220519815410474e-05, "loss": 3.9508, "step": 10647 }, { "epoch": 3.547014241692346, "grad_norm": 0.71875, "learning_rate": 1.121981697212578e-05, "loss": 3.9871, "step": 10648 }, { "epoch": 3.5473473806945948, "grad_norm": 0.671875, "learning_rate": 1.121911408551327e-05, "loss": 3.9974, "step": 10649 }, { "epoch": 3.5476805196968435, "grad_norm": 0.671875, "learning_rate": 1.121841115558114e-05, "loss": 4.022, "step": 10650 }, { "epoch": 3.5480136586990922, "grad_norm": 0.66796875, "learning_rate": 1.1217708182337574e-05, "loss": 3.9916, "step": 10651 }, { "epoch": 3.548346797701341, "grad_norm": 0.70703125, "learning_rate": 1.121700516579076e-05, "loss": 3.9156, "step": 10652 }, { "epoch": 3.5486799367035897, "grad_norm": 0.68359375, "learning_rate": 1.1216302105948889e-05, "loss": 4.0605, "step": 10653 }, { "epoch": 3.5490130757058385, "grad_norm": 0.68359375, "learning_rate": 1.1215599002820144e-05, "loss": 4.0132, "step": 10654 }, { "epoch": 3.5493462147080868, "grad_norm": 0.66796875, "learning_rate": 1.121489585641272e-05, "loss": 3.9711, "step": 10655 }, { "epoch": 3.5496793537103355, "grad_norm": 0.66796875, "learning_rate": 1.1214192666734807e-05, "loss": 3.986, "step": 10656 }, { "epoch": 3.5500124927125842, "grad_norm": 0.6484375, "learning_rate": 1.1213489433794594e-05, "loss": 4.058, "step": 10657 }, { "epoch": 3.550345631714833, "grad_norm": 0.6875, "learning_rate": 1.121278615760027e-05, "loss": 3.9974, "step": 10658 }, { "epoch": 3.5506787707170817, "grad_norm": 0.71875, "learning_rate": 1.1212082838160033e-05, "loss": 4.008, "step": 10659 }, { "epoch": 3.5510119097193305, "grad_norm": 0.68359375, "learning_rate": 1.1211379475482069e-05, "loss": 3.9459, "step": 10660 }, { "epoch": 3.551345048721579, "grad_norm": 0.6953125, "learning_rate": 1.1210676069574573e-05, "loss": 3.983, "step": 10661 }, { "epoch": 3.551678187723828, "grad_norm": 0.68359375, "learning_rate": 1.1209972620445738e-05, "loss": 4.0395, "step": 10662 }, { "epoch": 3.5520113267260767, "grad_norm": 0.6875, "learning_rate": 1.1209269128103758e-05, "loss": 3.9795, "step": 10663 }, { "epoch": 3.552344465728325, "grad_norm": 0.68359375, "learning_rate": 1.1208565592556827e-05, "loss": 4.029, "step": 10664 }, { "epoch": 3.5526776047305737, "grad_norm": 0.65234375, "learning_rate": 1.120786201381314e-05, "loss": 4.0188, "step": 10665 }, { "epoch": 3.5530107437328224, "grad_norm": 0.65625, "learning_rate": 1.1207158391880891e-05, "loss": 4.1061, "step": 10666 }, { "epoch": 3.553343882735071, "grad_norm": 0.65234375, "learning_rate": 1.1206454726768276e-05, "loss": 4.0568, "step": 10667 }, { "epoch": 3.55367702173732, "grad_norm": 0.671875, "learning_rate": 1.1205751018483495e-05, "loss": 4.0473, "step": 10668 }, { "epoch": 3.5540101607395687, "grad_norm": 0.71484375, "learning_rate": 1.1205047267034738e-05, "loss": 4.0343, "step": 10669 }, { "epoch": 3.5543432997418174, "grad_norm": 0.6640625, "learning_rate": 1.1204343472430207e-05, "loss": 4.013, "step": 10670 }, { "epoch": 3.5546764387440657, "grad_norm": 0.6875, "learning_rate": 1.1203639634678095e-05, "loss": 3.9742, "step": 10671 }, { "epoch": 3.555009577746315, "grad_norm": 0.67578125, "learning_rate": 1.1202935753786607e-05, "loss": 3.9977, "step": 10672 }, { "epoch": 3.555342716748563, "grad_norm": 0.73828125, "learning_rate": 1.1202231829763938e-05, "loss": 3.998, "step": 10673 }, { "epoch": 3.555675855750812, "grad_norm": 0.67578125, "learning_rate": 1.1201527862618286e-05, "loss": 4.0308, "step": 10674 }, { "epoch": 3.5560089947530606, "grad_norm": 0.6953125, "learning_rate": 1.120082385235785e-05, "loss": 3.9965, "step": 10675 }, { "epoch": 3.5563421337553094, "grad_norm": 0.71875, "learning_rate": 1.1200119798990834e-05, "loss": 3.957, "step": 10676 }, { "epoch": 3.556675272757558, "grad_norm": 0.66796875, "learning_rate": 1.1199415702525438e-05, "loss": 3.9553, "step": 10677 }, { "epoch": 3.557008411759807, "grad_norm": 0.6328125, "learning_rate": 1.1198711562969859e-05, "loss": 4.047, "step": 10678 }, { "epoch": 3.5573415507620556, "grad_norm": 0.72265625, "learning_rate": 1.1198007380332304e-05, "loss": 3.9849, "step": 10679 }, { "epoch": 3.557674689764304, "grad_norm": 0.69140625, "learning_rate": 1.1197303154620972e-05, "loss": 3.9836, "step": 10680 }, { "epoch": 3.558007828766553, "grad_norm": 0.66015625, "learning_rate": 1.119659888584407e-05, "loss": 3.9979, "step": 10681 }, { "epoch": 3.5583409677688014, "grad_norm": 0.66796875, "learning_rate": 1.1195894574009792e-05, "loss": 4.04, "step": 10682 }, { "epoch": 3.55867410677105, "grad_norm": 0.6640625, "learning_rate": 1.119519021912635e-05, "loss": 3.9626, "step": 10683 }, { "epoch": 3.559007245773299, "grad_norm": 0.69140625, "learning_rate": 1.1194485821201946e-05, "loss": 4.0652, "step": 10684 }, { "epoch": 3.5593403847755476, "grad_norm": 0.6328125, "learning_rate": 1.1193781380244785e-05, "loss": 4.0407, "step": 10685 }, { "epoch": 3.5596735237777963, "grad_norm": 0.70703125, "learning_rate": 1.119307689626307e-05, "loss": 3.9829, "step": 10686 }, { "epoch": 3.560006662780045, "grad_norm": 0.703125, "learning_rate": 1.119237236926501e-05, "loss": 3.9568, "step": 10687 }, { "epoch": 3.560339801782294, "grad_norm": 0.69921875, "learning_rate": 1.1191667799258808e-05, "loss": 4.011, "step": 10688 }, { "epoch": 3.560672940784542, "grad_norm": 0.69921875, "learning_rate": 1.1190963186252673e-05, "loss": 3.9993, "step": 10689 }, { "epoch": 3.5610060797867913, "grad_norm": 0.66796875, "learning_rate": 1.1190258530254812e-05, "loss": 4.014, "step": 10690 }, { "epoch": 3.5613392187890396, "grad_norm": 0.68359375, "learning_rate": 1.1189553831273432e-05, "loss": 4.0338, "step": 10691 }, { "epoch": 3.5616723577912883, "grad_norm": 0.68359375, "learning_rate": 1.1188849089316742e-05, "loss": 4.0013, "step": 10692 }, { "epoch": 3.562005496793537, "grad_norm": 0.68359375, "learning_rate": 1.1188144304392949e-05, "loss": 3.9899, "step": 10693 }, { "epoch": 3.562338635795786, "grad_norm": 0.6875, "learning_rate": 1.1187439476510264e-05, "loss": 3.9802, "step": 10694 }, { "epoch": 3.5626717747980345, "grad_norm": 0.68359375, "learning_rate": 1.1186734605676896e-05, "loss": 4.1126, "step": 10695 }, { "epoch": 3.5630049138002833, "grad_norm": 0.734375, "learning_rate": 1.1186029691901054e-05, "loss": 4.0363, "step": 10696 }, { "epoch": 3.563338052802532, "grad_norm": 0.7421875, "learning_rate": 1.118532473519095e-05, "loss": 4.0098, "step": 10697 }, { "epoch": 3.5636711918047803, "grad_norm": 0.6875, "learning_rate": 1.1184619735554797e-05, "loss": 4.0206, "step": 10698 }, { "epoch": 3.5640043308070295, "grad_norm": 0.7109375, "learning_rate": 1.1183914693000807e-05, "loss": 4.0188, "step": 10699 }, { "epoch": 3.564337469809278, "grad_norm": 0.71484375, "learning_rate": 1.1183209607537185e-05, "loss": 3.9114, "step": 10700 }, { "epoch": 3.5646706088115265, "grad_norm": 0.6796875, "learning_rate": 1.1182504479172152e-05, "loss": 4.0457, "step": 10701 }, { "epoch": 3.5650037478137753, "grad_norm": 0.68359375, "learning_rate": 1.1181799307913918e-05, "loss": 3.9572, "step": 10702 }, { "epoch": 3.565336886816024, "grad_norm": 0.6796875, "learning_rate": 1.1181094093770698e-05, "loss": 3.9444, "step": 10703 }, { "epoch": 3.5656700258182727, "grad_norm": 0.66796875, "learning_rate": 1.1180388836750702e-05, "loss": 4.0231, "step": 10704 }, { "epoch": 3.5660031648205215, "grad_norm": 0.69140625, "learning_rate": 1.117968353686215e-05, "loss": 3.9502, "step": 10705 }, { "epoch": 3.5663363038227702, "grad_norm": 0.67578125, "learning_rate": 1.1178978194113252e-05, "loss": 3.9872, "step": 10706 }, { "epoch": 3.5666694428250185, "grad_norm": 0.640625, "learning_rate": 1.1178272808512231e-05, "loss": 4.0398, "step": 10707 }, { "epoch": 3.5670025818272677, "grad_norm": 0.6640625, "learning_rate": 1.1177567380067296e-05, "loss": 3.9988, "step": 10708 }, { "epoch": 3.567335720829516, "grad_norm": 0.71484375, "learning_rate": 1.1176861908786668e-05, "loss": 3.9893, "step": 10709 }, { "epoch": 3.5676688598317647, "grad_norm": 0.6953125, "learning_rate": 1.117615639467856e-05, "loss": 3.999, "step": 10710 }, { "epoch": 3.5680019988340135, "grad_norm": 0.6796875, "learning_rate": 1.1175450837751196e-05, "loss": 4.0254, "step": 10711 }, { "epoch": 3.568335137836262, "grad_norm": 0.6640625, "learning_rate": 1.117474523801279e-05, "loss": 4.0523, "step": 10712 }, { "epoch": 3.568668276838511, "grad_norm": 0.6796875, "learning_rate": 1.117403959547156e-05, "loss": 4.0081, "step": 10713 }, { "epoch": 3.5690014158407597, "grad_norm": 0.671875, "learning_rate": 1.1173333910135728e-05, "loss": 4.0049, "step": 10714 }, { "epoch": 3.5693345548430084, "grad_norm": 0.66015625, "learning_rate": 1.1172628182013511e-05, "loss": 4.0399, "step": 10715 }, { "epoch": 3.5696676938452567, "grad_norm": 0.69921875, "learning_rate": 1.1171922411113132e-05, "loss": 3.999, "step": 10716 }, { "epoch": 3.5700008328475055, "grad_norm": 0.62890625, "learning_rate": 1.117121659744281e-05, "loss": 4.0139, "step": 10717 }, { "epoch": 3.570333971849754, "grad_norm": 0.6875, "learning_rate": 1.1170510741010767e-05, "loss": 4.0064, "step": 10718 }, { "epoch": 3.570667110852003, "grad_norm": 0.69140625, "learning_rate": 1.1169804841825221e-05, "loss": 4.0331, "step": 10719 }, { "epoch": 3.5710002498542517, "grad_norm": 0.7421875, "learning_rate": 1.11690988998944e-05, "loss": 3.9948, "step": 10720 }, { "epoch": 3.5713333888565004, "grad_norm": 0.6953125, "learning_rate": 1.1168392915226527e-05, "loss": 3.9791, "step": 10721 }, { "epoch": 3.571666527858749, "grad_norm": 0.703125, "learning_rate": 1.1167686887829818e-05, "loss": 3.9528, "step": 10722 }, { "epoch": 3.571999666860998, "grad_norm": 0.6484375, "learning_rate": 1.1166980817712502e-05, "loss": 4.0876, "step": 10723 }, { "epoch": 3.5723328058632466, "grad_norm": 0.65234375, "learning_rate": 1.1166274704882803e-05, "loss": 4.0168, "step": 10724 }, { "epoch": 3.572665944865495, "grad_norm": 0.69140625, "learning_rate": 1.1165568549348943e-05, "loss": 3.9719, "step": 10725 }, { "epoch": 3.5729990838677437, "grad_norm": 0.67578125, "learning_rate": 1.1164862351119151e-05, "loss": 4.0606, "step": 10726 }, { "epoch": 3.5733322228699924, "grad_norm": 0.65625, "learning_rate": 1.116415611020165e-05, "loss": 3.9615, "step": 10727 }, { "epoch": 3.573665361872241, "grad_norm": 0.65234375, "learning_rate": 1.1163449826604667e-05, "loss": 4.0431, "step": 10728 }, { "epoch": 3.57399850087449, "grad_norm": 0.66015625, "learning_rate": 1.1162743500336428e-05, "loss": 3.9088, "step": 10729 }, { "epoch": 3.5743316398767386, "grad_norm": 0.6875, "learning_rate": 1.1162037131405161e-05, "loss": 3.9418, "step": 10730 }, { "epoch": 3.5746647788789874, "grad_norm": 0.66015625, "learning_rate": 1.1161330719819093e-05, "loss": 3.9448, "step": 10731 }, { "epoch": 3.574997917881236, "grad_norm": 0.69140625, "learning_rate": 1.1160624265586452e-05, "loss": 3.928, "step": 10732 }, { "epoch": 3.575331056883485, "grad_norm": 0.65234375, "learning_rate": 1.1159917768715469e-05, "loss": 4.0466, "step": 10733 }, { "epoch": 3.575664195885733, "grad_norm": 0.65234375, "learning_rate": 1.115921122921437e-05, "loss": 3.9907, "step": 10734 }, { "epoch": 3.575997334887982, "grad_norm": 0.65234375, "learning_rate": 1.1158504647091387e-05, "loss": 4.0051, "step": 10735 }, { "epoch": 3.5763304738902306, "grad_norm": 0.640625, "learning_rate": 1.1157798022354748e-05, "loss": 3.9691, "step": 10736 }, { "epoch": 3.5766636128924794, "grad_norm": 0.67578125, "learning_rate": 1.1157091355012685e-05, "loss": 3.9927, "step": 10737 }, { "epoch": 3.576996751894728, "grad_norm": 0.66796875, "learning_rate": 1.115638464507343e-05, "loss": 4.0669, "step": 10738 }, { "epoch": 3.577329890896977, "grad_norm": 0.68359375, "learning_rate": 1.1155677892545213e-05, "loss": 3.958, "step": 10739 }, { "epoch": 3.5776630298992256, "grad_norm": 0.6484375, "learning_rate": 1.1154971097436266e-05, "loss": 4.0338, "step": 10740 }, { "epoch": 3.577996168901474, "grad_norm": 0.65625, "learning_rate": 1.1154264259754824e-05, "loss": 3.9904, "step": 10741 }, { "epoch": 3.578329307903723, "grad_norm": 0.68359375, "learning_rate": 1.1153557379509118e-05, "loss": 4.0275, "step": 10742 }, { "epoch": 3.5786624469059714, "grad_norm": 0.6796875, "learning_rate": 1.115285045670738e-05, "loss": 3.9657, "step": 10743 }, { "epoch": 3.57899558590822, "grad_norm": 0.6875, "learning_rate": 1.115214349135785e-05, "loss": 4.0515, "step": 10744 }, { "epoch": 3.579328724910469, "grad_norm": 0.64453125, "learning_rate": 1.1151436483468757e-05, "loss": 3.9848, "step": 10745 }, { "epoch": 3.5796618639127176, "grad_norm": 0.69140625, "learning_rate": 1.1150729433048336e-05, "loss": 4.0255, "step": 10746 }, { "epoch": 3.5799950029149663, "grad_norm": 0.6875, "learning_rate": 1.1150022340104828e-05, "loss": 4.0881, "step": 10747 }, { "epoch": 3.580328141917215, "grad_norm": 0.6875, "learning_rate": 1.1149315204646463e-05, "loss": 4.0907, "step": 10748 }, { "epoch": 3.580661280919464, "grad_norm": 0.62109375, "learning_rate": 1.1148608026681482e-05, "loss": 3.9195, "step": 10749 }, { "epoch": 3.580994419921712, "grad_norm": 0.6875, "learning_rate": 1.114790080621812e-05, "loss": 4.0164, "step": 10750 }, { "epoch": 3.5813275589239613, "grad_norm": 0.68359375, "learning_rate": 1.1147193543264613e-05, "loss": 4.0511, "step": 10751 }, { "epoch": 3.5816606979262096, "grad_norm": 0.66796875, "learning_rate": 1.1146486237829202e-05, "loss": 3.9949, "step": 10752 }, { "epoch": 3.5819938369284583, "grad_norm": 0.64453125, "learning_rate": 1.1145778889920127e-05, "loss": 4.0098, "step": 10753 }, { "epoch": 3.582326975930707, "grad_norm": 0.69921875, "learning_rate": 1.1145071499545622e-05, "loss": 4.0278, "step": 10754 }, { "epoch": 3.5826601149329558, "grad_norm": 0.6640625, "learning_rate": 1.1144364066713929e-05, "loss": 4.028, "step": 10755 }, { "epoch": 3.5829932539352045, "grad_norm": 0.69921875, "learning_rate": 1.114365659143329e-05, "loss": 4.0582, "step": 10756 }, { "epoch": 3.5833263929374533, "grad_norm": 0.66796875, "learning_rate": 1.1142949073711945e-05, "loss": 4.0005, "step": 10757 }, { "epoch": 3.583659531939702, "grad_norm": 0.67578125, "learning_rate": 1.1142241513558133e-05, "loss": 3.9288, "step": 10758 }, { "epoch": 3.5839926709419503, "grad_norm": 0.70703125, "learning_rate": 1.1141533910980095e-05, "loss": 3.9337, "step": 10759 }, { "epoch": 3.5843258099441995, "grad_norm": 0.70703125, "learning_rate": 1.1140826265986073e-05, "loss": 4.0011, "step": 10760 }, { "epoch": 3.5846589489464478, "grad_norm": 0.64453125, "learning_rate": 1.1140118578584314e-05, "loss": 3.9551, "step": 10761 }, { "epoch": 3.5849920879486965, "grad_norm": 0.6875, "learning_rate": 1.1139410848783057e-05, "loss": 3.9939, "step": 10762 }, { "epoch": 3.5853252269509452, "grad_norm": 0.69140625, "learning_rate": 1.1138703076590547e-05, "loss": 4.0181, "step": 10763 }, { "epoch": 3.585658365953194, "grad_norm": 0.6796875, "learning_rate": 1.1137995262015028e-05, "loss": 4.0324, "step": 10764 }, { "epoch": 3.5859915049554427, "grad_norm": 0.671875, "learning_rate": 1.1137287405064744e-05, "loss": 3.9797, "step": 10765 }, { "epoch": 3.5863246439576915, "grad_norm": 0.69140625, "learning_rate": 1.113657950574794e-05, "loss": 4.0606, "step": 10766 }, { "epoch": 3.58665778295994, "grad_norm": 0.69921875, "learning_rate": 1.113587156407286e-05, "loss": 4.0784, "step": 10767 }, { "epoch": 3.5869909219621885, "grad_norm": 0.69921875, "learning_rate": 1.1135163580047752e-05, "loss": 4.0073, "step": 10768 }, { "epoch": 3.5873240609644377, "grad_norm": 0.69140625, "learning_rate": 1.1134455553680865e-05, "loss": 4.0125, "step": 10769 }, { "epoch": 3.587657199966686, "grad_norm": 0.6953125, "learning_rate": 1.1133747484980439e-05, "loss": 4.0158, "step": 10770 }, { "epoch": 3.5879903389689347, "grad_norm": 0.6796875, "learning_rate": 1.1133039373954728e-05, "loss": 3.9734, "step": 10771 }, { "epoch": 3.5883234779711835, "grad_norm": 0.72265625, "learning_rate": 1.1132331220611976e-05, "loss": 3.9934, "step": 10772 }, { "epoch": 3.588656616973432, "grad_norm": 0.70703125, "learning_rate": 1.1131623024960435e-05, "loss": 3.9903, "step": 10773 }, { "epoch": 3.588989755975681, "grad_norm": 0.6875, "learning_rate": 1.113091478700835e-05, "loss": 4.0065, "step": 10774 }, { "epoch": 3.5893228949779297, "grad_norm": 0.69921875, "learning_rate": 1.1130206506763973e-05, "loss": 4.0272, "step": 10775 }, { "epoch": 3.5896560339801784, "grad_norm": 0.65234375, "learning_rate": 1.1129498184235551e-05, "loss": 4.0123, "step": 10776 }, { "epoch": 3.5899891729824267, "grad_norm": 0.66796875, "learning_rate": 1.112878981943134e-05, "loss": 4.0083, "step": 10777 }, { "epoch": 3.5903223119846754, "grad_norm": 0.6796875, "learning_rate": 1.1128081412359583e-05, "loss": 3.9707, "step": 10778 }, { "epoch": 3.590655450986924, "grad_norm": 0.65234375, "learning_rate": 1.112737296302854e-05, "loss": 4.0569, "step": 10779 }, { "epoch": 3.590988589989173, "grad_norm": 0.671875, "learning_rate": 1.112666447144646e-05, "loss": 4.0606, "step": 10780 }, { "epoch": 3.5913217289914217, "grad_norm": 0.66796875, "learning_rate": 1.1125955937621591e-05, "loss": 4.0363, "step": 10781 }, { "epoch": 3.5916548679936704, "grad_norm": 0.6484375, "learning_rate": 1.112524736156219e-05, "loss": 4.0192, "step": 10782 }, { "epoch": 3.591988006995919, "grad_norm": 0.67578125, "learning_rate": 1.112453874327651e-05, "loss": 4.0346, "step": 10783 }, { "epoch": 3.592321145998168, "grad_norm": 0.70703125, "learning_rate": 1.1123830082772804e-05, "loss": 3.972, "step": 10784 }, { "epoch": 3.5926542850004166, "grad_norm": 0.67578125, "learning_rate": 1.1123121380059327e-05, "loss": 3.9064, "step": 10785 }, { "epoch": 3.592987424002665, "grad_norm": 0.67578125, "learning_rate": 1.1122412635144333e-05, "loss": 4.0023, "step": 10786 }, { "epoch": 3.5933205630049136, "grad_norm": 0.68359375, "learning_rate": 1.1121703848036079e-05, "loss": 4.0004, "step": 10787 }, { "epoch": 3.5936537020071624, "grad_norm": 0.65625, "learning_rate": 1.1120995018742819e-05, "loss": 4.0647, "step": 10788 }, { "epoch": 3.593986841009411, "grad_norm": 0.72265625, "learning_rate": 1.1120286147272811e-05, "loss": 3.9747, "step": 10789 }, { "epoch": 3.59431998001166, "grad_norm": 0.66796875, "learning_rate": 1.111957723363431e-05, "loss": 4.0472, "step": 10790 }, { "epoch": 3.5946531190139086, "grad_norm": 0.703125, "learning_rate": 1.1118868277835576e-05, "loss": 3.9651, "step": 10791 }, { "epoch": 3.5949862580161573, "grad_norm": 0.671875, "learning_rate": 1.1118159279884862e-05, "loss": 4.0497, "step": 10792 }, { "epoch": 3.595319397018406, "grad_norm": 0.7109375, "learning_rate": 1.1117450239790431e-05, "loss": 3.9844, "step": 10793 }, { "epoch": 3.595652536020655, "grad_norm": 0.66796875, "learning_rate": 1.111674115756054e-05, "loss": 4.0186, "step": 10794 }, { "epoch": 3.595985675022903, "grad_norm": 0.65625, "learning_rate": 1.1116032033203448e-05, "loss": 4.0152, "step": 10795 }, { "epoch": 3.596318814025152, "grad_norm": 0.69921875, "learning_rate": 1.1115322866727416e-05, "loss": 4.0117, "step": 10796 }, { "epoch": 3.5966519530274006, "grad_norm": 0.67578125, "learning_rate": 1.1114613658140701e-05, "loss": 3.9782, "step": 10797 }, { "epoch": 3.5969850920296493, "grad_norm": 0.6640625, "learning_rate": 1.1113904407451569e-05, "loss": 4.0614, "step": 10798 }, { "epoch": 3.597318231031898, "grad_norm": 0.66796875, "learning_rate": 1.1113195114668276e-05, "loss": 4.0119, "step": 10799 }, { "epoch": 3.597651370034147, "grad_norm": 0.67578125, "learning_rate": 1.1112485779799088e-05, "loss": 4.0199, "step": 10800 }, { "epoch": 3.5979845090363956, "grad_norm": 0.6953125, "learning_rate": 1.1111776402852263e-05, "loss": 3.9513, "step": 10801 }, { "epoch": 3.5983176480386443, "grad_norm": 0.671875, "learning_rate": 1.111106698383607e-05, "loss": 3.9309, "step": 10802 }, { "epoch": 3.598650787040893, "grad_norm": 0.69140625, "learning_rate": 1.1110357522758764e-05, "loss": 3.9775, "step": 10803 }, { "epoch": 3.5989839260431413, "grad_norm": 0.6953125, "learning_rate": 1.1109648019628617e-05, "loss": 4.0548, "step": 10804 }, { "epoch": 3.59931706504539, "grad_norm": 0.69140625, "learning_rate": 1.1108938474453884e-05, "loss": 3.9742, "step": 10805 }, { "epoch": 3.599650204047639, "grad_norm": 0.671875, "learning_rate": 1.1108228887242838e-05, "loss": 3.9939, "step": 10806 }, { "epoch": 3.5999833430498875, "grad_norm": 0.68359375, "learning_rate": 1.110751925800374e-05, "loss": 4.0599, "step": 10807 }, { "epoch": 3.6003164820521363, "grad_norm": 0.65625, "learning_rate": 1.1106809586744855e-05, "loss": 3.9902, "step": 10808 }, { "epoch": 3.600649621054385, "grad_norm": 0.70703125, "learning_rate": 1.1106099873474455e-05, "loss": 3.9646, "step": 10809 }, { "epoch": 3.6009827600566338, "grad_norm": 0.65625, "learning_rate": 1.1105390118200798e-05, "loss": 4.0598, "step": 10810 }, { "epoch": 3.601315899058882, "grad_norm": 0.6640625, "learning_rate": 1.1104680320932158e-05, "loss": 3.9504, "step": 10811 }, { "epoch": 3.6016490380611312, "grad_norm": 0.6484375, "learning_rate": 1.11039704816768e-05, "loss": 3.9811, "step": 10812 }, { "epoch": 3.6019821770633795, "grad_norm": 0.69140625, "learning_rate": 1.1103260600442991e-05, "loss": 4.0038, "step": 10813 }, { "epoch": 3.6023153160656283, "grad_norm": 0.67578125, "learning_rate": 1.1102550677239e-05, "loss": 4.0097, "step": 10814 }, { "epoch": 3.602648455067877, "grad_norm": 0.65625, "learning_rate": 1.1101840712073098e-05, "loss": 4.0574, "step": 10815 }, { "epoch": 3.6029815940701257, "grad_norm": 0.6875, "learning_rate": 1.1101130704953553e-05, "loss": 3.9008, "step": 10816 }, { "epoch": 3.6033147330723745, "grad_norm": 0.71875, "learning_rate": 1.1100420655888636e-05, "loss": 4.0739, "step": 10817 }, { "epoch": 3.6036478720746232, "grad_norm": 0.66796875, "learning_rate": 1.1099710564886615e-05, "loss": 3.9956, "step": 10818 }, { "epoch": 3.603981011076872, "grad_norm": 0.69921875, "learning_rate": 1.1099000431955764e-05, "loss": 4.0565, "step": 10819 }, { "epoch": 3.6043141500791203, "grad_norm": 0.65234375, "learning_rate": 1.1098290257104353e-05, "loss": 4.0245, "step": 10820 }, { "epoch": 3.6046472890813694, "grad_norm": 0.6953125, "learning_rate": 1.1097580040340656e-05, "loss": 4.0505, "step": 10821 }, { "epoch": 3.6049804280836177, "grad_norm": 0.6796875, "learning_rate": 1.1096869781672943e-05, "loss": 4.0909, "step": 10822 }, { "epoch": 3.6053135670858665, "grad_norm": 0.7109375, "learning_rate": 1.1096159481109486e-05, "loss": 3.9543, "step": 10823 }, { "epoch": 3.605646706088115, "grad_norm": 0.64453125, "learning_rate": 1.1095449138658563e-05, "loss": 4.0221, "step": 10824 }, { "epoch": 3.605979845090364, "grad_norm": 0.6953125, "learning_rate": 1.1094738754328443e-05, "loss": 4.012, "step": 10825 }, { "epoch": 3.6063129840926127, "grad_norm": 0.6640625, "learning_rate": 1.1094028328127405e-05, "loss": 4.0147, "step": 10826 }, { "epoch": 3.6066461230948614, "grad_norm": 0.69921875, "learning_rate": 1.1093317860063718e-05, "loss": 4.0377, "step": 10827 }, { "epoch": 3.60697926209711, "grad_norm": 0.69140625, "learning_rate": 1.1092607350145663e-05, "loss": 4.0372, "step": 10828 }, { "epoch": 3.6073124010993585, "grad_norm": 0.66015625, "learning_rate": 1.1091896798381516e-05, "loss": 4.0339, "step": 10829 }, { "epoch": 3.6076455401016077, "grad_norm": 0.6875, "learning_rate": 1.1091186204779547e-05, "loss": 3.9806, "step": 10830 }, { "epoch": 3.607978679103856, "grad_norm": 0.6875, "learning_rate": 1.1090475569348042e-05, "loss": 4.0399, "step": 10831 }, { "epoch": 3.6083118181061047, "grad_norm": 0.6640625, "learning_rate": 1.108976489209527e-05, "loss": 4.0942, "step": 10832 }, { "epoch": 3.6086449571083534, "grad_norm": 0.6796875, "learning_rate": 1.1089054173029516e-05, "loss": 3.9745, "step": 10833 }, { "epoch": 3.608978096110602, "grad_norm": 0.70703125, "learning_rate": 1.108834341215905e-05, "loss": 3.9786, "step": 10834 }, { "epoch": 3.609311235112851, "grad_norm": 0.68359375, "learning_rate": 1.108763260949216e-05, "loss": 4.0388, "step": 10835 }, { "epoch": 3.6096443741150996, "grad_norm": 0.7265625, "learning_rate": 1.1086921765037118e-05, "loss": 4.0906, "step": 10836 }, { "epoch": 3.6099775131173484, "grad_norm": 0.69140625, "learning_rate": 1.1086210878802209e-05, "loss": 4.0272, "step": 10837 }, { "epoch": 3.6103106521195967, "grad_norm": 0.6796875, "learning_rate": 1.108549995079571e-05, "loss": 3.985, "step": 10838 }, { "epoch": 3.610643791121846, "grad_norm": 0.67578125, "learning_rate": 1.10847889810259e-05, "loss": 4.0364, "step": 10839 }, { "epoch": 3.610976930124094, "grad_norm": 0.66015625, "learning_rate": 1.1084077969501067e-05, "loss": 4.049, "step": 10840 }, { "epoch": 3.611310069126343, "grad_norm": 0.66015625, "learning_rate": 1.1083366916229486e-05, "loss": 4.058, "step": 10841 }, { "epoch": 3.6116432081285916, "grad_norm": 0.69921875, "learning_rate": 1.1082655821219445e-05, "loss": 4.0226, "step": 10842 }, { "epoch": 3.6119763471308404, "grad_norm": 0.68359375, "learning_rate": 1.1081944684479222e-05, "loss": 4.0632, "step": 10843 }, { "epoch": 3.612309486133089, "grad_norm": 0.66796875, "learning_rate": 1.1081233506017103e-05, "loss": 4.0863, "step": 10844 }, { "epoch": 3.612642625135338, "grad_norm": 0.71484375, "learning_rate": 1.1080522285841368e-05, "loss": 4.016, "step": 10845 }, { "epoch": 3.6129757641375866, "grad_norm": 0.67578125, "learning_rate": 1.1079811023960306e-05, "loss": 4.0124, "step": 10846 }, { "epoch": 3.613308903139835, "grad_norm": 0.6640625, "learning_rate": 1.1079099720382197e-05, "loss": 3.9907, "step": 10847 }, { "epoch": 3.6136420421420836, "grad_norm": 0.6953125, "learning_rate": 1.1078388375115332e-05, "loss": 3.9422, "step": 10848 }, { "epoch": 3.6139751811443324, "grad_norm": 0.66796875, "learning_rate": 1.1077676988167991e-05, "loss": 3.9561, "step": 10849 }, { "epoch": 3.614308320146581, "grad_norm": 0.671875, "learning_rate": 1.1076965559548462e-05, "loss": 3.9254, "step": 10850 }, { "epoch": 3.61464145914883, "grad_norm": 0.6953125, "learning_rate": 1.1076254089265035e-05, "loss": 4.0791, "step": 10851 }, { "epoch": 3.6149745981510786, "grad_norm": 0.671875, "learning_rate": 1.1075542577325989e-05, "loss": 3.9943, "step": 10852 }, { "epoch": 3.6153077371533273, "grad_norm": 0.6875, "learning_rate": 1.107483102373962e-05, "loss": 4.0678, "step": 10853 }, { "epoch": 3.615640876155576, "grad_norm": 0.6953125, "learning_rate": 1.1074119428514212e-05, "loss": 3.9931, "step": 10854 }, { "epoch": 3.615974015157825, "grad_norm": 0.67578125, "learning_rate": 1.1073407791658054e-05, "loss": 4.0548, "step": 10855 }, { "epoch": 3.616307154160073, "grad_norm": 0.67578125, "learning_rate": 1.1072696113179435e-05, "loss": 4.0263, "step": 10856 }, { "epoch": 3.616640293162322, "grad_norm": 0.66796875, "learning_rate": 1.1071984393086647e-05, "loss": 4.132, "step": 10857 }, { "epoch": 3.6169734321645706, "grad_norm": 0.625, "learning_rate": 1.1071272631387975e-05, "loss": 4.0381, "step": 10858 }, { "epoch": 3.6173065711668193, "grad_norm": 0.68359375, "learning_rate": 1.1070560828091714e-05, "loss": 3.9588, "step": 10859 }, { "epoch": 3.617639710169068, "grad_norm": 0.671875, "learning_rate": 1.1069848983206152e-05, "loss": 4.0055, "step": 10860 }, { "epoch": 3.617972849171317, "grad_norm": 0.65234375, "learning_rate": 1.106913709673958e-05, "loss": 4.0374, "step": 10861 }, { "epoch": 3.6183059881735655, "grad_norm": 0.68359375, "learning_rate": 1.1068425168700293e-05, "loss": 4.037, "step": 10862 }, { "epoch": 3.6186391271758143, "grad_norm": 0.7109375, "learning_rate": 1.1067713199096583e-05, "loss": 3.9541, "step": 10863 }, { "epoch": 3.618972266178063, "grad_norm": 0.69140625, "learning_rate": 1.1067001187936742e-05, "loss": 3.9805, "step": 10864 }, { "epoch": 3.6193054051803113, "grad_norm": 0.66015625, "learning_rate": 1.1066289135229063e-05, "loss": 4.029, "step": 10865 }, { "epoch": 3.61963854418256, "grad_norm": 0.6875, "learning_rate": 1.106557704098184e-05, "loss": 3.992, "step": 10866 }, { "epoch": 3.6199716831848088, "grad_norm": 0.68359375, "learning_rate": 1.1064864905203365e-05, "loss": 3.9977, "step": 10867 }, { "epoch": 3.6203048221870575, "grad_norm": 0.703125, "learning_rate": 1.1064152727901938e-05, "loss": 4.0358, "step": 10868 }, { "epoch": 3.6206379611893063, "grad_norm": 0.6953125, "learning_rate": 1.1063440509085852e-05, "loss": 3.9851, "step": 10869 }, { "epoch": 3.620971100191555, "grad_norm": 0.65234375, "learning_rate": 1.1062728248763402e-05, "loss": 3.9758, "step": 10870 }, { "epoch": 3.6213042391938037, "grad_norm": 0.6640625, "learning_rate": 1.1062015946942883e-05, "loss": 3.9772, "step": 10871 }, { "epoch": 3.621637378196052, "grad_norm": 0.69140625, "learning_rate": 1.1061303603632595e-05, "loss": 3.9815, "step": 10872 }, { "epoch": 3.621970517198301, "grad_norm": 0.65234375, "learning_rate": 1.1060591218840834e-05, "loss": 3.986, "step": 10873 }, { "epoch": 3.6223036562005495, "grad_norm": 0.671875, "learning_rate": 1.1059878792575897e-05, "loss": 3.9981, "step": 10874 }, { "epoch": 3.6226367952027982, "grad_norm": 0.64453125, "learning_rate": 1.1059166324846084e-05, "loss": 4.0411, "step": 10875 }, { "epoch": 3.622969934205047, "grad_norm": 0.71484375, "learning_rate": 1.105845381565969e-05, "loss": 4.0663, "step": 10876 }, { "epoch": 3.6233030732072957, "grad_norm": 0.71875, "learning_rate": 1.1057741265025018e-05, "loss": 4.0698, "step": 10877 }, { "epoch": 3.6236362122095445, "grad_norm": 0.68359375, "learning_rate": 1.1057028672950368e-05, "loss": 4.0708, "step": 10878 }, { "epoch": 3.623969351211793, "grad_norm": 0.671875, "learning_rate": 1.1056316039444037e-05, "loss": 3.9934, "step": 10879 }, { "epoch": 3.624302490214042, "grad_norm": 0.6953125, "learning_rate": 1.1055603364514326e-05, "loss": 4.0177, "step": 10880 }, { "epoch": 3.6246356292162902, "grad_norm": 0.7265625, "learning_rate": 1.1054890648169538e-05, "loss": 3.9358, "step": 10881 }, { "epoch": 3.6249687682185394, "grad_norm": 0.703125, "learning_rate": 1.1054177890417974e-05, "loss": 4.0268, "step": 10882 }, { "epoch": 3.6253019072207877, "grad_norm": 0.7109375, "learning_rate": 1.1053465091267937e-05, "loss": 4.0046, "step": 10883 }, { "epoch": 3.6256350462230365, "grad_norm": 0.7109375, "learning_rate": 1.1052752250727728e-05, "loss": 3.9716, "step": 10884 }, { "epoch": 3.625968185225285, "grad_norm": 0.7421875, "learning_rate": 1.1052039368805647e-05, "loss": 3.94, "step": 10885 }, { "epoch": 3.626301324227534, "grad_norm": 0.65625, "learning_rate": 1.1051326445510006e-05, "loss": 3.9007, "step": 10886 }, { "epoch": 3.6266344632297827, "grad_norm": 0.6875, "learning_rate": 1.1050613480849101e-05, "loss": 3.9646, "step": 10887 }, { "epoch": 3.6269676022320314, "grad_norm": 0.7109375, "learning_rate": 1.1049900474831243e-05, "loss": 3.9944, "step": 10888 }, { "epoch": 3.62730074123428, "grad_norm": 0.734375, "learning_rate": 1.104918742746473e-05, "loss": 3.9717, "step": 10889 }, { "epoch": 3.6276338802365284, "grad_norm": 0.69921875, "learning_rate": 1.1048474338757871e-05, "loss": 4.015, "step": 10890 }, { "epoch": 3.6279670192387776, "grad_norm": 0.65625, "learning_rate": 1.1047761208718974e-05, "loss": 4.0106, "step": 10891 }, { "epoch": 3.628300158241026, "grad_norm": 0.703125, "learning_rate": 1.1047048037356344e-05, "loss": 4.0241, "step": 10892 }, { "epoch": 3.6286332972432747, "grad_norm": 0.68359375, "learning_rate": 1.1046334824678283e-05, "loss": 3.9636, "step": 10893 }, { "epoch": 3.6289664362455234, "grad_norm": 0.69921875, "learning_rate": 1.1045621570693107e-05, "loss": 4.0586, "step": 10894 }, { "epoch": 3.629299575247772, "grad_norm": 0.71875, "learning_rate": 1.1044908275409117e-05, "loss": 4.0121, "step": 10895 }, { "epoch": 3.629632714250021, "grad_norm": 0.6875, "learning_rate": 1.1044194938834624e-05, "loss": 3.9946, "step": 10896 }, { "epoch": 3.6299658532522696, "grad_norm": 0.69921875, "learning_rate": 1.1043481560977939e-05, "loss": 3.9529, "step": 10897 }, { "epoch": 3.6302989922545184, "grad_norm": 0.703125, "learning_rate": 1.1042768141847366e-05, "loss": 4.0274, "step": 10898 }, { "epoch": 3.6306321312567666, "grad_norm": 0.6484375, "learning_rate": 1.104205468145122e-05, "loss": 4.0208, "step": 10899 }, { "epoch": 3.630965270259016, "grad_norm": 0.6796875, "learning_rate": 1.1041341179797808e-05, "loss": 3.8996, "step": 10900 }, { "epoch": 3.631298409261264, "grad_norm": 0.69140625, "learning_rate": 1.1040627636895443e-05, "loss": 4.0553, "step": 10901 }, { "epoch": 3.631631548263513, "grad_norm": 0.6796875, "learning_rate": 1.1039914052752433e-05, "loss": 3.9574, "step": 10902 }, { "epoch": 3.6319646872657616, "grad_norm": 0.6796875, "learning_rate": 1.1039200427377093e-05, "loss": 4.0063, "step": 10903 }, { "epoch": 3.6322978262680103, "grad_norm": 0.65625, "learning_rate": 1.1038486760777733e-05, "loss": 4.0028, "step": 10904 }, { "epoch": 3.632630965270259, "grad_norm": 0.6953125, "learning_rate": 1.1037773052962667e-05, "loss": 3.9329, "step": 10905 }, { "epoch": 3.632964104272508, "grad_norm": 0.671875, "learning_rate": 1.1037059303940208e-05, "loss": 4.0018, "step": 10906 }, { "epoch": 3.6332972432747566, "grad_norm": 0.6484375, "learning_rate": 1.103634551371867e-05, "loss": 4.0031, "step": 10907 }, { "epoch": 3.633630382277005, "grad_norm": 0.69140625, "learning_rate": 1.1035631682306367e-05, "loss": 3.9243, "step": 10908 }, { "epoch": 3.633963521279254, "grad_norm": 0.69921875, "learning_rate": 1.1034917809711608e-05, "loss": 3.9686, "step": 10909 }, { "epoch": 3.6342966602815023, "grad_norm": 0.6953125, "learning_rate": 1.103420389594272e-05, "loss": 4.0114, "step": 10910 }, { "epoch": 3.634629799283751, "grad_norm": 0.63671875, "learning_rate": 1.1033489941008009e-05, "loss": 4.0542, "step": 10911 }, { "epoch": 3.634962938286, "grad_norm": 0.6640625, "learning_rate": 1.1032775944915794e-05, "loss": 4.0466, "step": 10912 }, { "epoch": 3.6352960772882486, "grad_norm": 0.6875, "learning_rate": 1.1032061907674388e-05, "loss": 4.0095, "step": 10913 }, { "epoch": 3.6356292162904973, "grad_norm": 0.64453125, "learning_rate": 1.1031347829292117e-05, "loss": 4.0201, "step": 10914 }, { "epoch": 3.635962355292746, "grad_norm": 0.6875, "learning_rate": 1.1030633709777288e-05, "loss": 3.9882, "step": 10915 }, { "epoch": 3.6362954942949948, "grad_norm": 0.70703125, "learning_rate": 1.1029919549138225e-05, "loss": 3.9901, "step": 10916 }, { "epoch": 3.636628633297243, "grad_norm": 0.66796875, "learning_rate": 1.1029205347383246e-05, "loss": 4.0191, "step": 10917 }, { "epoch": 3.636961772299492, "grad_norm": 0.69140625, "learning_rate": 1.1028491104520669e-05, "loss": 3.9715, "step": 10918 }, { "epoch": 3.6372949113017405, "grad_norm": 0.67578125, "learning_rate": 1.1027776820558811e-05, "loss": 4.0716, "step": 10919 }, { "epoch": 3.6376280503039893, "grad_norm": 0.65234375, "learning_rate": 1.1027062495505994e-05, "loss": 4.0755, "step": 10920 }, { "epoch": 3.637961189306238, "grad_norm": 0.65625, "learning_rate": 1.1026348129370543e-05, "loss": 4.0445, "step": 10921 }, { "epoch": 3.6382943283084868, "grad_norm": 0.73046875, "learning_rate": 1.102563372216077e-05, "loss": 4.0165, "step": 10922 }, { "epoch": 3.6386274673107355, "grad_norm": 0.67578125, "learning_rate": 1.1024919273885002e-05, "loss": 3.9559, "step": 10923 }, { "epoch": 3.6389606063129842, "grad_norm": 0.6953125, "learning_rate": 1.102420478455156e-05, "loss": 4.0727, "step": 10924 }, { "epoch": 3.639293745315233, "grad_norm": 0.6640625, "learning_rate": 1.1023490254168766e-05, "loss": 4.0261, "step": 10925 }, { "epoch": 3.6396268843174813, "grad_norm": 0.734375, "learning_rate": 1.1022775682744941e-05, "loss": 3.9696, "step": 10926 }, { "epoch": 3.63996002331973, "grad_norm": 0.703125, "learning_rate": 1.102206107028841e-05, "loss": 4.0242, "step": 10927 }, { "epoch": 3.6402931623219787, "grad_norm": 0.734375, "learning_rate": 1.1021346416807496e-05, "loss": 3.9392, "step": 10928 }, { "epoch": 3.6406263013242275, "grad_norm": 0.68359375, "learning_rate": 1.1020631722310524e-05, "loss": 4.0082, "step": 10929 }, { "epoch": 3.6409594403264762, "grad_norm": 0.703125, "learning_rate": 1.101991698680582e-05, "loss": 3.984, "step": 10930 }, { "epoch": 3.641292579328725, "grad_norm": 0.72265625, "learning_rate": 1.1019202210301704e-05, "loss": 3.976, "step": 10931 }, { "epoch": 3.6416257183309737, "grad_norm": 0.72265625, "learning_rate": 1.1018487392806509e-05, "loss": 4.0206, "step": 10932 }, { "epoch": 3.6419588573332224, "grad_norm": 0.671875, "learning_rate": 1.1017772534328553e-05, "loss": 3.9627, "step": 10933 }, { "epoch": 3.642291996335471, "grad_norm": 0.70703125, "learning_rate": 1.1017057634876167e-05, "loss": 4.0086, "step": 10934 }, { "epoch": 3.6426251353377195, "grad_norm": 0.66015625, "learning_rate": 1.1016342694457678e-05, "loss": 4.0031, "step": 10935 }, { "epoch": 3.642958274339968, "grad_norm": 0.68359375, "learning_rate": 1.1015627713081413e-05, "loss": 4.0642, "step": 10936 }, { "epoch": 3.643291413342217, "grad_norm": 0.6875, "learning_rate": 1.10149126907557e-05, "loss": 4.0009, "step": 10937 }, { "epoch": 3.6436245523444657, "grad_norm": 0.640625, "learning_rate": 1.1014197627488869e-05, "loss": 4.1061, "step": 10938 }, { "epoch": 3.6439576913467144, "grad_norm": 0.6953125, "learning_rate": 1.1013482523289245e-05, "loss": 4.0761, "step": 10939 }, { "epoch": 3.644290830348963, "grad_norm": 0.70703125, "learning_rate": 1.101276737816516e-05, "loss": 3.9918, "step": 10940 }, { "epoch": 3.644623969351212, "grad_norm": 0.671875, "learning_rate": 1.1012052192124944e-05, "loss": 4.07, "step": 10941 }, { "epoch": 3.64495710835346, "grad_norm": 0.6953125, "learning_rate": 1.1011336965176927e-05, "loss": 3.9321, "step": 10942 }, { "epoch": 3.6452902473557094, "grad_norm": 0.6875, "learning_rate": 1.101062169732944e-05, "loss": 4.0052, "step": 10943 }, { "epoch": 3.6456233863579577, "grad_norm": 0.67578125, "learning_rate": 1.1009906388590816e-05, "loss": 3.9147, "step": 10944 }, { "epoch": 3.6459565253602064, "grad_norm": 0.6328125, "learning_rate": 1.1009191038969384e-05, "loss": 4.0137, "step": 10945 }, { "epoch": 3.646289664362455, "grad_norm": 0.6796875, "learning_rate": 1.1008475648473477e-05, "loss": 3.9696, "step": 10946 }, { "epoch": 3.646622803364704, "grad_norm": 0.703125, "learning_rate": 1.1007760217111428e-05, "loss": 4.0217, "step": 10947 }, { "epoch": 3.6469559423669526, "grad_norm": 0.671875, "learning_rate": 1.1007044744891569e-05, "loss": 4.0795, "step": 10948 }, { "epoch": 3.6472890813692014, "grad_norm": 0.71484375, "learning_rate": 1.1006329231822238e-05, "loss": 4.0017, "step": 10949 }, { "epoch": 3.64762222037145, "grad_norm": 0.69921875, "learning_rate": 1.1005613677911763e-05, "loss": 3.9778, "step": 10950 }, { "epoch": 3.6479553593736984, "grad_norm": 0.67578125, "learning_rate": 1.1004898083168483e-05, "loss": 4.0704, "step": 10951 }, { "epoch": 3.6482884983759476, "grad_norm": 0.64453125, "learning_rate": 1.1004182447600732e-05, "loss": 4.0207, "step": 10952 }, { "epoch": 3.648621637378196, "grad_norm": 0.67578125, "learning_rate": 1.1003466771216843e-05, "loss": 3.9875, "step": 10953 }, { "epoch": 3.6489547763804446, "grad_norm": 0.65234375, "learning_rate": 1.1002751054025159e-05, "loss": 4.0704, "step": 10954 }, { "epoch": 3.6492879153826934, "grad_norm": 0.65625, "learning_rate": 1.100203529603401e-05, "loss": 3.926, "step": 10955 }, { "epoch": 3.649621054384942, "grad_norm": 0.7265625, "learning_rate": 1.1001319497251736e-05, "loss": 3.9207, "step": 10956 }, { "epoch": 3.649954193387191, "grad_norm": 0.7109375, "learning_rate": 1.1000603657686671e-05, "loss": 4.0229, "step": 10957 }, { "epoch": 3.6502873323894396, "grad_norm": 0.6484375, "learning_rate": 1.0999887777347161e-05, "loss": 4.0239, "step": 10958 }, { "epoch": 3.6506204713916883, "grad_norm": 0.69140625, "learning_rate": 1.0999171856241536e-05, "loss": 3.9567, "step": 10959 }, { "epoch": 3.6509536103939366, "grad_norm": 0.6796875, "learning_rate": 1.0998455894378138e-05, "loss": 4.0008, "step": 10960 }, { "epoch": 3.651286749396186, "grad_norm": 0.7109375, "learning_rate": 1.0997739891765306e-05, "loss": 4.0414, "step": 10961 }, { "epoch": 3.651619888398434, "grad_norm": 0.7109375, "learning_rate": 1.0997023848411381e-05, "loss": 4.0015, "step": 10962 }, { "epoch": 3.651953027400683, "grad_norm": 0.6953125, "learning_rate": 1.0996307764324704e-05, "loss": 4.0176, "step": 10963 }, { "epoch": 3.6522861664029316, "grad_norm": 0.6953125, "learning_rate": 1.0995591639513613e-05, "loss": 4.0419, "step": 10964 }, { "epoch": 3.6526193054051803, "grad_norm": 0.69921875, "learning_rate": 1.0994875473986451e-05, "loss": 3.9726, "step": 10965 }, { "epoch": 3.652952444407429, "grad_norm": 0.68359375, "learning_rate": 1.0994159267751557e-05, "loss": 4.0909, "step": 10966 }, { "epoch": 3.653285583409678, "grad_norm": 0.69140625, "learning_rate": 1.099344302081728e-05, "loss": 3.9363, "step": 10967 }, { "epoch": 3.6536187224119265, "grad_norm": 0.65234375, "learning_rate": 1.0992726733191956e-05, "loss": 4.003, "step": 10968 }, { "epoch": 3.653951861414175, "grad_norm": 0.73046875, "learning_rate": 1.0992010404883934e-05, "loss": 4.0356, "step": 10969 }, { "epoch": 3.654285000416424, "grad_norm": 0.66796875, "learning_rate": 1.099129403590155e-05, "loss": 3.975, "step": 10970 }, { "epoch": 3.6546181394186723, "grad_norm": 0.65234375, "learning_rate": 1.0990577626253155e-05, "loss": 4.0514, "step": 10971 }, { "epoch": 3.654951278420921, "grad_norm": 0.66015625, "learning_rate": 1.0989861175947093e-05, "loss": 4.0083, "step": 10972 }, { "epoch": 3.65528441742317, "grad_norm": 0.69921875, "learning_rate": 1.0989144684991706e-05, "loss": 3.9852, "step": 10973 }, { "epoch": 3.6556175564254185, "grad_norm": 0.69921875, "learning_rate": 1.098842815339534e-05, "loss": 3.9326, "step": 10974 }, { "epoch": 3.6559506954276673, "grad_norm": 0.6953125, "learning_rate": 1.098771158116634e-05, "loss": 4.0781, "step": 10975 }, { "epoch": 3.656283834429916, "grad_norm": 0.703125, "learning_rate": 1.098699496831306e-05, "loss": 4.0892, "step": 10976 }, { "epoch": 3.6566169734321647, "grad_norm": 0.703125, "learning_rate": 1.0986278314843837e-05, "loss": 4.0328, "step": 10977 }, { "epoch": 3.656950112434413, "grad_norm": 0.6953125, "learning_rate": 1.0985561620767025e-05, "loss": 4.0393, "step": 10978 }, { "epoch": 3.657283251436662, "grad_norm": 0.7109375, "learning_rate": 1.0984844886090967e-05, "loss": 4.0059, "step": 10979 }, { "epoch": 3.6576163904389105, "grad_norm": 0.67578125, "learning_rate": 1.0984128110824017e-05, "loss": 4.0718, "step": 10980 }, { "epoch": 3.6579495294411593, "grad_norm": 0.69921875, "learning_rate": 1.0983411294974522e-05, "loss": 4.0448, "step": 10981 }, { "epoch": 3.658282668443408, "grad_norm": 0.69921875, "learning_rate": 1.0982694438550828e-05, "loss": 4.0662, "step": 10982 }, { "epoch": 3.6586158074456567, "grad_norm": 0.6640625, "learning_rate": 1.0981977541561287e-05, "loss": 4.0205, "step": 10983 }, { "epoch": 3.6589489464479055, "grad_norm": 0.65234375, "learning_rate": 1.0981260604014251e-05, "loss": 3.9465, "step": 10984 }, { "epoch": 3.659282085450154, "grad_norm": 0.63671875, "learning_rate": 1.0980543625918068e-05, "loss": 4.0087, "step": 10985 }, { "epoch": 3.659615224452403, "grad_norm": 0.67578125, "learning_rate": 1.0979826607281092e-05, "loss": 4.0789, "step": 10986 }, { "epoch": 3.6599483634546512, "grad_norm": 0.65625, "learning_rate": 1.0979109548111674e-05, "loss": 4.0198, "step": 10987 }, { "epoch": 3.6602815024569, "grad_norm": 0.6484375, "learning_rate": 1.0978392448418163e-05, "loss": 4.041, "step": 10988 }, { "epoch": 3.6606146414591487, "grad_norm": 0.66796875, "learning_rate": 1.0977675308208916e-05, "loss": 4.0076, "step": 10989 }, { "epoch": 3.6609477804613975, "grad_norm": 0.68359375, "learning_rate": 1.0976958127492284e-05, "loss": 3.9894, "step": 10990 }, { "epoch": 3.661280919463646, "grad_norm": 0.62890625, "learning_rate": 1.0976240906276621e-05, "loss": 4.0529, "step": 10991 }, { "epoch": 3.661614058465895, "grad_norm": 0.671875, "learning_rate": 1.097552364457028e-05, "loss": 4.0496, "step": 10992 }, { "epoch": 3.6619471974681437, "grad_norm": 0.69921875, "learning_rate": 1.0974806342381619e-05, "loss": 3.9659, "step": 10993 }, { "epoch": 3.6622803364703924, "grad_norm": 0.67578125, "learning_rate": 1.0974088999718987e-05, "loss": 4.0039, "step": 10994 }, { "epoch": 3.662613475472641, "grad_norm": 0.68359375, "learning_rate": 1.0973371616590746e-05, "loss": 3.9774, "step": 10995 }, { "epoch": 3.6629466144748895, "grad_norm": 0.71484375, "learning_rate": 1.0972654193005248e-05, "loss": 3.9813, "step": 10996 }, { "epoch": 3.663279753477138, "grad_norm": 0.65625, "learning_rate": 1.0971936728970848e-05, "loss": 4.0301, "step": 10997 }, { "epoch": 3.663612892479387, "grad_norm": 0.69921875, "learning_rate": 1.097121922449591e-05, "loss": 3.9854, "step": 10998 }, { "epoch": 3.6639460314816357, "grad_norm": 0.67578125, "learning_rate": 1.0970501679588785e-05, "loss": 3.963, "step": 10999 }, { "epoch": 3.6642791704838844, "grad_norm": 0.66796875, "learning_rate": 1.0969784094257832e-05, "loss": 3.9773, "step": 11000 }, { "epoch": 3.664612309486133, "grad_norm": 0.66796875, "learning_rate": 1.0969066468511408e-05, "loss": 4.0292, "step": 11001 }, { "epoch": 3.664945448488382, "grad_norm": 0.66796875, "learning_rate": 1.0968348802357874e-05, "loss": 3.9964, "step": 11002 }, { "epoch": 3.6652785874906306, "grad_norm": 0.6875, "learning_rate": 1.096763109580559e-05, "loss": 4.068, "step": 11003 }, { "epoch": 3.6656117264928794, "grad_norm": 0.7265625, "learning_rate": 1.0966913348862917e-05, "loss": 4.0544, "step": 11004 }, { "epoch": 3.6659448654951277, "grad_norm": 0.65625, "learning_rate": 1.096619556153821e-05, "loss": 4.0294, "step": 11005 }, { "epoch": 3.6662780044973764, "grad_norm": 0.703125, "learning_rate": 1.0965477733839832e-05, "loss": 3.9636, "step": 11006 }, { "epoch": 3.666611143499625, "grad_norm": 0.703125, "learning_rate": 1.0964759865776146e-05, "loss": 4.0188, "step": 11007 }, { "epoch": 3.666944282501874, "grad_norm": 0.6796875, "learning_rate": 1.0964041957355512e-05, "loss": 4.065, "step": 11008 }, { "epoch": 3.6672774215041226, "grad_norm": 0.6875, "learning_rate": 1.0963324008586293e-05, "loss": 4.031, "step": 11009 }, { "epoch": 3.6676105605063714, "grad_norm": 0.67578125, "learning_rate": 1.0962606019476849e-05, "loss": 3.9236, "step": 11010 }, { "epoch": 3.66794369950862, "grad_norm": 0.65625, "learning_rate": 1.0961887990035545e-05, "loss": 3.9702, "step": 11011 }, { "epoch": 3.6682768385108684, "grad_norm": 0.6953125, "learning_rate": 1.0961169920270746e-05, "loss": 3.9248, "step": 11012 }, { "epoch": 3.6686099775131176, "grad_norm": 0.6484375, "learning_rate": 1.0960451810190814e-05, "loss": 4.0158, "step": 11013 }, { "epoch": 3.668943116515366, "grad_norm": 0.71875, "learning_rate": 1.0959733659804112e-05, "loss": 4.013, "step": 11014 }, { "epoch": 3.6692762555176146, "grad_norm": 0.6953125, "learning_rate": 1.0959015469119009e-05, "loss": 4.0723, "step": 11015 }, { "epoch": 3.6696093945198633, "grad_norm": 0.65234375, "learning_rate": 1.0958297238143865e-05, "loss": 4.0332, "step": 11016 }, { "epoch": 3.669942533522112, "grad_norm": 0.72265625, "learning_rate": 1.0957578966887052e-05, "loss": 4.0312, "step": 11017 }, { "epoch": 3.670275672524361, "grad_norm": 0.703125, "learning_rate": 1.0956860655356932e-05, "loss": 4.0307, "step": 11018 }, { "epoch": 3.6706088115266096, "grad_norm": 0.70703125, "learning_rate": 1.0956142303561872e-05, "loss": 3.9567, "step": 11019 }, { "epoch": 3.6709419505288583, "grad_norm": 0.70703125, "learning_rate": 1.095542391151024e-05, "loss": 3.9817, "step": 11020 }, { "epoch": 3.6712750895311066, "grad_norm": 0.73046875, "learning_rate": 1.0954705479210407e-05, "loss": 3.9765, "step": 11021 }, { "epoch": 3.6716082285333558, "grad_norm": 0.6953125, "learning_rate": 1.0953987006670737e-05, "loss": 4.0408, "step": 11022 }, { "epoch": 3.671941367535604, "grad_norm": 0.6875, "learning_rate": 1.0953268493899597e-05, "loss": 4.0764, "step": 11023 }, { "epoch": 3.672274506537853, "grad_norm": 0.671875, "learning_rate": 1.0952549940905361e-05, "loss": 3.9782, "step": 11024 }, { "epoch": 3.6726076455401016, "grad_norm": 0.6640625, "learning_rate": 1.0951831347696393e-05, "loss": 4.0525, "step": 11025 }, { "epoch": 3.6729407845423503, "grad_norm": 0.7421875, "learning_rate": 1.0951112714281072e-05, "loss": 4.0345, "step": 11026 }, { "epoch": 3.673273923544599, "grad_norm": 0.6953125, "learning_rate": 1.095039404066776e-05, "loss": 4.0365, "step": 11027 }, { "epoch": 3.6736070625468478, "grad_norm": 0.66015625, "learning_rate": 1.0949675326864832e-05, "loss": 3.9761, "step": 11028 }, { "epoch": 3.6739402015490965, "grad_norm": 0.73828125, "learning_rate": 1.0948956572880657e-05, "loss": 3.9809, "step": 11029 }, { "epoch": 3.674273340551345, "grad_norm": 0.65234375, "learning_rate": 1.0948237778723608e-05, "loss": 3.9819, "step": 11030 }, { "epoch": 3.674606479553594, "grad_norm": 0.671875, "learning_rate": 1.0947518944402058e-05, "loss": 3.9767, "step": 11031 }, { "epoch": 3.6749396185558423, "grad_norm": 0.7109375, "learning_rate": 1.094680006992438e-05, "loss": 3.9876, "step": 11032 }, { "epoch": 3.675272757558091, "grad_norm": 0.71484375, "learning_rate": 1.0946081155298949e-05, "loss": 4.0045, "step": 11033 }, { "epoch": 3.6756058965603398, "grad_norm": 0.6953125, "learning_rate": 1.0945362200534132e-05, "loss": 4.0402, "step": 11034 }, { "epoch": 3.6759390355625885, "grad_norm": 0.6875, "learning_rate": 1.0944643205638315e-05, "loss": 3.999, "step": 11035 }, { "epoch": 3.6762721745648372, "grad_norm": 0.70703125, "learning_rate": 1.0943924170619858e-05, "loss": 4.0014, "step": 11036 }, { "epoch": 3.676605313567086, "grad_norm": 0.671875, "learning_rate": 1.094320509548715e-05, "loss": 3.9846, "step": 11037 }, { "epoch": 3.6769384525693347, "grad_norm": 0.6953125, "learning_rate": 1.0942485980248558e-05, "loss": 4.0093, "step": 11038 }, { "epoch": 3.677271591571583, "grad_norm": 0.69140625, "learning_rate": 1.0941766824912461e-05, "loss": 4.0832, "step": 11039 }, { "epoch": 3.677604730573832, "grad_norm": 0.68359375, "learning_rate": 1.0941047629487235e-05, "loss": 4.0616, "step": 11040 }, { "epoch": 3.6779378695760805, "grad_norm": 0.66796875, "learning_rate": 1.0940328393981257e-05, "loss": 4.0695, "step": 11041 }, { "epoch": 3.6782710085783292, "grad_norm": 0.69140625, "learning_rate": 1.0939609118402906e-05, "loss": 4.0486, "step": 11042 }, { "epoch": 3.678604147580578, "grad_norm": 0.7109375, "learning_rate": 1.0938889802760557e-05, "loss": 4.0357, "step": 11043 }, { "epoch": 3.6789372865828267, "grad_norm": 0.6875, "learning_rate": 1.0938170447062595e-05, "loss": 4.0989, "step": 11044 }, { "epoch": 3.6792704255850754, "grad_norm": 0.69140625, "learning_rate": 1.093745105131739e-05, "loss": 3.9991, "step": 11045 }, { "epoch": 3.679603564587324, "grad_norm": 0.671875, "learning_rate": 1.0936731615533328e-05, "loss": 3.9746, "step": 11046 }, { "epoch": 3.679936703589573, "grad_norm": 0.69921875, "learning_rate": 1.0936012139718785e-05, "loss": 3.9942, "step": 11047 }, { "epoch": 3.680269842591821, "grad_norm": 0.71875, "learning_rate": 1.0935292623882144e-05, "loss": 4.1104, "step": 11048 }, { "epoch": 3.6806029815940704, "grad_norm": 0.734375, "learning_rate": 1.0934573068031785e-05, "loss": 3.9904, "step": 11049 }, { "epoch": 3.6809361205963187, "grad_norm": 0.6953125, "learning_rate": 1.0933853472176089e-05, "loss": 3.9589, "step": 11050 }, { "epoch": 3.6812692595985674, "grad_norm": 0.7109375, "learning_rate": 1.0933133836323436e-05, "loss": 3.9289, "step": 11051 }, { "epoch": 3.681602398600816, "grad_norm": 0.68359375, "learning_rate": 1.0932414160482212e-05, "loss": 3.9603, "step": 11052 }, { "epoch": 3.681935537603065, "grad_norm": 0.7109375, "learning_rate": 1.0931694444660799e-05, "loss": 3.9776, "step": 11053 }, { "epoch": 3.6822686766053137, "grad_norm": 0.6953125, "learning_rate": 1.0930974688867574e-05, "loss": 3.9428, "step": 11054 }, { "epoch": 3.6826018156075624, "grad_norm": 0.65625, "learning_rate": 1.0930254893110928e-05, "loss": 4.0596, "step": 11055 }, { "epoch": 3.682934954609811, "grad_norm": 0.66796875, "learning_rate": 1.0929535057399242e-05, "loss": 4.012, "step": 11056 }, { "epoch": 3.6832680936120594, "grad_norm": 0.6875, "learning_rate": 1.0928815181740904e-05, "loss": 4.0186, "step": 11057 }, { "epoch": 3.683601232614308, "grad_norm": 0.76953125, "learning_rate": 1.0928095266144292e-05, "loss": 3.9601, "step": 11058 }, { "epoch": 3.683934371616557, "grad_norm": 0.703125, "learning_rate": 1.0927375310617796e-05, "loss": 4.0643, "step": 11059 }, { "epoch": 3.6842675106188056, "grad_norm": 0.6953125, "learning_rate": 1.0926655315169801e-05, "loss": 3.9297, "step": 11060 }, { "epoch": 3.6846006496210544, "grad_norm": 0.703125, "learning_rate": 1.0925935279808697e-05, "loss": 4.0171, "step": 11061 }, { "epoch": 3.684933788623303, "grad_norm": 0.7421875, "learning_rate": 1.0925215204542866e-05, "loss": 4.0426, "step": 11062 }, { "epoch": 3.685266927625552, "grad_norm": 0.71484375, "learning_rate": 1.0924495089380695e-05, "loss": 4.0178, "step": 11063 }, { "epoch": 3.6856000666278006, "grad_norm": 0.67578125, "learning_rate": 1.0923774934330574e-05, "loss": 3.9987, "step": 11064 }, { "epoch": 3.6859332056300493, "grad_norm": 0.69921875, "learning_rate": 1.0923054739400892e-05, "loss": 3.9906, "step": 11065 }, { "epoch": 3.6862663446322976, "grad_norm": 0.68359375, "learning_rate": 1.0922334504600038e-05, "loss": 3.9766, "step": 11066 }, { "epoch": 3.6865994836345464, "grad_norm": 0.7734375, "learning_rate": 1.0921614229936396e-05, "loss": 4.0314, "step": 11067 }, { "epoch": 3.686932622636795, "grad_norm": 0.66796875, "learning_rate": 1.0920893915418362e-05, "loss": 4.0115, "step": 11068 }, { "epoch": 3.687265761639044, "grad_norm": 0.703125, "learning_rate": 1.0920173561054321e-05, "loss": 4.0094, "step": 11069 }, { "epoch": 3.6875989006412926, "grad_norm": 0.6796875, "learning_rate": 1.0919453166852669e-05, "loss": 4.0497, "step": 11070 }, { "epoch": 3.6879320396435413, "grad_norm": 0.703125, "learning_rate": 1.0918732732821795e-05, "loss": 4.0151, "step": 11071 }, { "epoch": 3.68826517864579, "grad_norm": 0.70703125, "learning_rate": 1.0918012258970087e-05, "loss": 3.9678, "step": 11072 }, { "epoch": 3.688598317648039, "grad_norm": 0.6640625, "learning_rate": 1.091729174530594e-05, "loss": 3.9633, "step": 11073 }, { "epoch": 3.6889314566502875, "grad_norm": 0.671875, "learning_rate": 1.0916571191837749e-05, "loss": 4.0282, "step": 11074 }, { "epoch": 3.689264595652536, "grad_norm": 0.68359375, "learning_rate": 1.09158505985739e-05, "loss": 3.9086, "step": 11075 }, { "epoch": 3.6895977346547846, "grad_norm": 0.703125, "learning_rate": 1.0915129965522791e-05, "loss": 3.991, "step": 11076 }, { "epoch": 3.6899308736570333, "grad_norm": 0.69140625, "learning_rate": 1.0914409292692817e-05, "loss": 3.9558, "step": 11077 }, { "epoch": 3.690264012659282, "grad_norm": 0.65234375, "learning_rate": 1.0913688580092368e-05, "loss": 3.9659, "step": 11078 }, { "epoch": 3.690597151661531, "grad_norm": 0.703125, "learning_rate": 1.0912967827729842e-05, "loss": 3.967, "step": 11079 }, { "epoch": 3.6909302906637795, "grad_norm": 0.73046875, "learning_rate": 1.0912247035613633e-05, "loss": 4.0067, "step": 11080 }, { "epoch": 3.6912634296660283, "grad_norm": 0.65234375, "learning_rate": 1.0911526203752139e-05, "loss": 3.9936, "step": 11081 }, { "epoch": 3.6915965686682766, "grad_norm": 0.6875, "learning_rate": 1.0910805332153751e-05, "loss": 3.963, "step": 11082 }, { "epoch": 3.6919297076705258, "grad_norm": 0.6796875, "learning_rate": 1.091008442082687e-05, "loss": 4.0153, "step": 11083 }, { "epoch": 3.692262846672774, "grad_norm": 0.66796875, "learning_rate": 1.0909363469779893e-05, "loss": 3.9657, "step": 11084 }, { "epoch": 3.692595985675023, "grad_norm": 0.66796875, "learning_rate": 1.0908642479021214e-05, "loss": 3.9986, "step": 11085 }, { "epoch": 3.6929291246772715, "grad_norm": 0.65625, "learning_rate": 1.0907921448559235e-05, "loss": 4.0993, "step": 11086 }, { "epoch": 3.6932622636795203, "grad_norm": 0.7109375, "learning_rate": 1.090720037840235e-05, "loss": 4.0708, "step": 11087 }, { "epoch": 3.693595402681769, "grad_norm": 0.71484375, "learning_rate": 1.0906479268558964e-05, "loss": 3.93, "step": 11088 }, { "epoch": 3.6939285416840177, "grad_norm": 0.68359375, "learning_rate": 1.0905758119037472e-05, "loss": 4.0103, "step": 11089 }, { "epoch": 3.6942616806862665, "grad_norm": 0.71484375, "learning_rate": 1.0905036929846274e-05, "loss": 3.9632, "step": 11090 }, { "epoch": 3.6945948196885148, "grad_norm": 0.67578125, "learning_rate": 1.090431570099377e-05, "loss": 3.9564, "step": 11091 }, { "epoch": 3.694927958690764, "grad_norm": 0.6953125, "learning_rate": 1.0903594432488364e-05, "loss": 4.0456, "step": 11092 }, { "epoch": 3.6952610976930123, "grad_norm": 0.65625, "learning_rate": 1.0902873124338455e-05, "loss": 4.0296, "step": 11093 }, { "epoch": 3.695594236695261, "grad_norm": 0.71875, "learning_rate": 1.0902151776552445e-05, "loss": 4.0078, "step": 11094 }, { "epoch": 3.6959273756975097, "grad_norm": 0.671875, "learning_rate": 1.0901430389138734e-05, "loss": 4.0544, "step": 11095 }, { "epoch": 3.6962605146997585, "grad_norm": 0.73046875, "learning_rate": 1.0900708962105728e-05, "loss": 4.0276, "step": 11096 }, { "epoch": 3.696593653702007, "grad_norm": 0.70703125, "learning_rate": 1.0899987495461826e-05, "loss": 4.0312, "step": 11097 }, { "epoch": 3.696926792704256, "grad_norm": 0.67578125, "learning_rate": 1.0899265989215435e-05, "loss": 4.0752, "step": 11098 }, { "epoch": 3.6972599317065047, "grad_norm": 0.67578125, "learning_rate": 1.0898544443374959e-05, "loss": 3.9425, "step": 11099 }, { "epoch": 3.697593070708753, "grad_norm": 0.7109375, "learning_rate": 1.0897822857948801e-05, "loss": 4.023, "step": 11100 }, { "epoch": 3.697926209711002, "grad_norm": 0.68359375, "learning_rate": 1.0897101232945367e-05, "loss": 3.9598, "step": 11101 }, { "epoch": 3.6982593487132505, "grad_norm": 0.68359375, "learning_rate": 1.0896379568373062e-05, "loss": 3.9829, "step": 11102 }, { "epoch": 3.698592487715499, "grad_norm": 0.72265625, "learning_rate": 1.0895657864240293e-05, "loss": 3.9823, "step": 11103 }, { "epoch": 3.698925626717748, "grad_norm": 0.6875, "learning_rate": 1.089493612055546e-05, "loss": 4.0688, "step": 11104 }, { "epoch": 3.6992587657199967, "grad_norm": 0.69921875, "learning_rate": 1.0894214337326978e-05, "loss": 4.0104, "step": 11105 }, { "epoch": 3.6995919047222454, "grad_norm": 0.65234375, "learning_rate": 1.0893492514563249e-05, "loss": 3.9964, "step": 11106 }, { "epoch": 3.699925043724494, "grad_norm": 0.65234375, "learning_rate": 1.0892770652272684e-05, "loss": 3.996, "step": 11107 }, { "epoch": 3.700258182726743, "grad_norm": 0.6796875, "learning_rate": 1.0892048750463691e-05, "loss": 4.1209, "step": 11108 }, { "epoch": 3.700591321728991, "grad_norm": 0.6796875, "learning_rate": 1.0891326809144674e-05, "loss": 3.9902, "step": 11109 }, { "epoch": 3.7009244607312404, "grad_norm": 0.73046875, "learning_rate": 1.0890604828324047e-05, "loss": 4.0393, "step": 11110 }, { "epoch": 3.7012575997334887, "grad_norm": 0.6875, "learning_rate": 1.0889882808010219e-05, "loss": 4.0, "step": 11111 }, { "epoch": 3.7015907387357374, "grad_norm": 0.66796875, "learning_rate": 1.0889160748211598e-05, "loss": 4.0954, "step": 11112 }, { "epoch": 3.701923877737986, "grad_norm": 0.69140625, "learning_rate": 1.0888438648936594e-05, "loss": 4.0838, "step": 11113 }, { "epoch": 3.702257016740235, "grad_norm": 0.69921875, "learning_rate": 1.088771651019362e-05, "loss": 3.9309, "step": 11114 }, { "epoch": 3.7025901557424836, "grad_norm": 0.6953125, "learning_rate": 1.0886994331991085e-05, "loss": 3.9911, "step": 11115 }, { "epoch": 3.7029232947447324, "grad_norm": 0.66796875, "learning_rate": 1.0886272114337404e-05, "loss": 4.0178, "step": 11116 }, { "epoch": 3.703256433746981, "grad_norm": 0.70703125, "learning_rate": 1.0885549857240987e-05, "loss": 4.0358, "step": 11117 }, { "epoch": 3.7035895727492294, "grad_norm": 0.65234375, "learning_rate": 1.0884827560710248e-05, "loss": 4.0764, "step": 11118 }, { "epoch": 3.7039227117514786, "grad_norm": 0.6484375, "learning_rate": 1.0884105224753598e-05, "loss": 3.9892, "step": 11119 }, { "epoch": 3.704255850753727, "grad_norm": 0.6640625, "learning_rate": 1.0883382849379453e-05, "loss": 4.0532, "step": 11120 }, { "epoch": 3.7045889897559756, "grad_norm": 0.70703125, "learning_rate": 1.0882660434596226e-05, "loss": 4.042, "step": 11121 }, { "epoch": 3.7049221287582244, "grad_norm": 0.671875, "learning_rate": 1.088193798041233e-05, "loss": 4.0138, "step": 11122 }, { "epoch": 3.705255267760473, "grad_norm": 0.69140625, "learning_rate": 1.0881215486836184e-05, "loss": 3.9695, "step": 11123 }, { "epoch": 3.705588406762722, "grad_norm": 0.66015625, "learning_rate": 1.08804929538762e-05, "loss": 4.0369, "step": 11124 }, { "epoch": 3.7059215457649706, "grad_norm": 0.71484375, "learning_rate": 1.0879770381540797e-05, "loss": 3.9681, "step": 11125 }, { "epoch": 3.7062546847672193, "grad_norm": 0.68359375, "learning_rate": 1.087904776983839e-05, "loss": 4.0472, "step": 11126 }, { "epoch": 3.7065878237694676, "grad_norm": 0.69921875, "learning_rate": 1.0878325118777393e-05, "loss": 3.9924, "step": 11127 }, { "epoch": 3.7069209627717163, "grad_norm": 0.6953125, "learning_rate": 1.0877602428366226e-05, "loss": 3.9734, "step": 11128 }, { "epoch": 3.707254101773965, "grad_norm": 0.6640625, "learning_rate": 1.0876879698613307e-05, "loss": 4.0537, "step": 11129 }, { "epoch": 3.707587240776214, "grad_norm": 0.66015625, "learning_rate": 1.0876156929527057e-05, "loss": 4.0025, "step": 11130 }, { "epoch": 3.7079203797784626, "grad_norm": 0.6875, "learning_rate": 1.0875434121115886e-05, "loss": 3.9298, "step": 11131 }, { "epoch": 3.7082535187807113, "grad_norm": 0.6796875, "learning_rate": 1.0874711273388223e-05, "loss": 3.9954, "step": 11132 }, { "epoch": 3.70858665778296, "grad_norm": 0.72265625, "learning_rate": 1.087398838635248e-05, "loss": 4.0384, "step": 11133 }, { "epoch": 3.7089197967852088, "grad_norm": 0.69921875, "learning_rate": 1.0873265460017084e-05, "loss": 4.0299, "step": 11134 }, { "epoch": 3.7092529357874575, "grad_norm": 0.66796875, "learning_rate": 1.0872542494390449e-05, "loss": 4.0471, "step": 11135 }, { "epoch": 3.709586074789706, "grad_norm": 0.66796875, "learning_rate": 1.0871819489481e-05, "loss": 4.0034, "step": 11136 }, { "epoch": 3.7099192137919546, "grad_norm": 0.671875, "learning_rate": 1.0871096445297157e-05, "loss": 4.0754, "step": 11137 }, { "epoch": 3.7102523527942033, "grad_norm": 0.6953125, "learning_rate": 1.0870373361847345e-05, "loss": 3.9103, "step": 11138 }, { "epoch": 3.710585491796452, "grad_norm": 0.71484375, "learning_rate": 1.0869650239139978e-05, "loss": 3.9627, "step": 11139 }, { "epoch": 3.7109186307987008, "grad_norm": 0.6796875, "learning_rate": 1.086892707718349e-05, "loss": 4.028, "step": 11140 }, { "epoch": 3.7112517698009495, "grad_norm": 0.67578125, "learning_rate": 1.0868203875986292e-05, "loss": 4.0621, "step": 11141 }, { "epoch": 3.7115849088031982, "grad_norm": 0.703125, "learning_rate": 1.086748063555682e-05, "loss": 3.9906, "step": 11142 }, { "epoch": 3.711918047805447, "grad_norm": 0.67578125, "learning_rate": 1.086675735590349e-05, "loss": 4.0358, "step": 11143 }, { "epoch": 3.7122511868076957, "grad_norm": 0.70703125, "learning_rate": 1.086603403703473e-05, "loss": 4.0384, "step": 11144 }, { "epoch": 3.712584325809944, "grad_norm": 0.72265625, "learning_rate": 1.0865310678958966e-05, "loss": 4.0387, "step": 11145 }, { "epoch": 3.7129174648121928, "grad_norm": 0.71484375, "learning_rate": 1.0864587281684617e-05, "loss": 4.0023, "step": 11146 }, { "epoch": 3.7132506038144415, "grad_norm": 0.73046875, "learning_rate": 1.0863863845220119e-05, "loss": 4.1094, "step": 11147 }, { "epoch": 3.7135837428166902, "grad_norm": 0.66796875, "learning_rate": 1.086314036957389e-05, "loss": 4.0346, "step": 11148 }, { "epoch": 3.713916881818939, "grad_norm": 0.66015625, "learning_rate": 1.0862416854754362e-05, "loss": 4.0569, "step": 11149 }, { "epoch": 3.7142500208211877, "grad_norm": 0.671875, "learning_rate": 1.086169330076996e-05, "loss": 4.0208, "step": 11150 }, { "epoch": 3.7145831598234365, "grad_norm": 0.671875, "learning_rate": 1.0860969707629114e-05, "loss": 4.0228, "step": 11151 }, { "epoch": 3.7149162988256847, "grad_norm": 0.6796875, "learning_rate": 1.0860246075340249e-05, "loss": 3.9552, "step": 11152 }, { "epoch": 3.715249437827934, "grad_norm": 0.6640625, "learning_rate": 1.0859522403911796e-05, "loss": 3.9278, "step": 11153 }, { "epoch": 3.7155825768301822, "grad_norm": 0.6796875, "learning_rate": 1.0858798693352181e-05, "loss": 3.9822, "step": 11154 }, { "epoch": 3.715915715832431, "grad_norm": 0.66796875, "learning_rate": 1.085807494366984e-05, "loss": 4.0098, "step": 11155 }, { "epoch": 3.7162488548346797, "grad_norm": 0.6875, "learning_rate": 1.08573511548732e-05, "loss": 4.0786, "step": 11156 }, { "epoch": 3.7165819938369284, "grad_norm": 0.6484375, "learning_rate": 1.0856627326970689e-05, "loss": 4.0413, "step": 11157 }, { "epoch": 3.716915132839177, "grad_norm": 0.66796875, "learning_rate": 1.085590345997074e-05, "loss": 3.972, "step": 11158 }, { "epoch": 3.717248271841426, "grad_norm": 0.71484375, "learning_rate": 1.0855179553881785e-05, "loss": 4.0332, "step": 11159 }, { "epoch": 3.7175814108436747, "grad_norm": 0.6875, "learning_rate": 1.0854455608712255e-05, "loss": 4.0141, "step": 11160 }, { "epoch": 3.717914549845923, "grad_norm": 0.69921875, "learning_rate": 1.0853731624470584e-05, "loss": 4.0427, "step": 11161 }, { "epoch": 3.718247688848172, "grad_norm": 0.68359375, "learning_rate": 1.0853007601165205e-05, "loss": 4.0258, "step": 11162 }, { "epoch": 3.7185808278504204, "grad_norm": 0.66796875, "learning_rate": 1.0852283538804545e-05, "loss": 4.0425, "step": 11163 }, { "epoch": 3.718913966852669, "grad_norm": 0.66796875, "learning_rate": 1.0851559437397049e-05, "loss": 4.106, "step": 11164 }, { "epoch": 3.719247105854918, "grad_norm": 0.69921875, "learning_rate": 1.0850835296951142e-05, "loss": 4.0491, "step": 11165 }, { "epoch": 3.7195802448571667, "grad_norm": 0.66015625, "learning_rate": 1.085011111747526e-05, "loss": 4.0431, "step": 11166 }, { "epoch": 3.7199133838594154, "grad_norm": 0.65234375, "learning_rate": 1.0849386898977844e-05, "loss": 4.0056, "step": 11167 }, { "epoch": 3.720246522861664, "grad_norm": 0.66015625, "learning_rate": 1.0848662641467322e-05, "loss": 4.0687, "step": 11168 }, { "epoch": 3.720579661863913, "grad_norm": 0.671875, "learning_rate": 1.0847938344952134e-05, "loss": 3.988, "step": 11169 }, { "epoch": 3.720912800866161, "grad_norm": 0.71484375, "learning_rate": 1.0847214009440718e-05, "loss": 3.9854, "step": 11170 }, { "epoch": 3.7212459398684103, "grad_norm": 0.68359375, "learning_rate": 1.0846489634941506e-05, "loss": 3.9974, "step": 11171 }, { "epoch": 3.7215790788706586, "grad_norm": 0.69921875, "learning_rate": 1.0845765221462939e-05, "loss": 3.896, "step": 11172 }, { "epoch": 3.7219122178729074, "grad_norm": 0.67578125, "learning_rate": 1.0845040769013455e-05, "loss": 3.9711, "step": 11173 }, { "epoch": 3.722245356875156, "grad_norm": 0.67578125, "learning_rate": 1.084431627760149e-05, "loss": 3.9972, "step": 11174 }, { "epoch": 3.722578495877405, "grad_norm": 0.6875, "learning_rate": 1.0843591747235486e-05, "loss": 3.9963, "step": 11175 }, { "epoch": 3.7229116348796536, "grad_norm": 0.6640625, "learning_rate": 1.0842867177923879e-05, "loss": 3.9217, "step": 11176 }, { "epoch": 3.7232447738819023, "grad_norm": 0.6796875, "learning_rate": 1.084214256967511e-05, "loss": 4.004, "step": 11177 }, { "epoch": 3.723577912884151, "grad_norm": 0.640625, "learning_rate": 1.0841417922497619e-05, "loss": 4.0256, "step": 11178 }, { "epoch": 3.7239110518863994, "grad_norm": 0.6796875, "learning_rate": 1.0840693236399849e-05, "loss": 4.036, "step": 11179 }, { "epoch": 3.7242441908886486, "grad_norm": 0.66796875, "learning_rate": 1.0839968511390235e-05, "loss": 4.0497, "step": 11180 }, { "epoch": 3.724577329890897, "grad_norm": 0.734375, "learning_rate": 1.0839243747477225e-05, "loss": 3.9844, "step": 11181 }, { "epoch": 3.7249104688931456, "grad_norm": 0.6875, "learning_rate": 1.0838518944669257e-05, "loss": 3.9855, "step": 11182 }, { "epoch": 3.7252436078953943, "grad_norm": 0.671875, "learning_rate": 1.0837794102974776e-05, "loss": 3.8841, "step": 11183 }, { "epoch": 3.725576746897643, "grad_norm": 0.6796875, "learning_rate": 1.0837069222402222e-05, "loss": 4.0549, "step": 11184 }, { "epoch": 3.725909885899892, "grad_norm": 0.734375, "learning_rate": 1.0836344302960038e-05, "loss": 4.0002, "step": 11185 }, { "epoch": 3.7262430249021405, "grad_norm": 0.7109375, "learning_rate": 1.0835619344656673e-05, "loss": 3.9787, "step": 11186 }, { "epoch": 3.7265761639043893, "grad_norm": 0.68359375, "learning_rate": 1.0834894347500566e-05, "loss": 3.9829, "step": 11187 }, { "epoch": 3.7269093029066376, "grad_norm": 0.72265625, "learning_rate": 1.0834169311500163e-05, "loss": 4.0487, "step": 11188 }, { "epoch": 3.7272424419088863, "grad_norm": 0.671875, "learning_rate": 1.083344423666391e-05, "loss": 3.9995, "step": 11189 }, { "epoch": 3.727575580911135, "grad_norm": 0.69140625, "learning_rate": 1.0832719123000249e-05, "loss": 3.9586, "step": 11190 }, { "epoch": 3.727908719913384, "grad_norm": 0.6796875, "learning_rate": 1.0831993970517632e-05, "loss": 3.9729, "step": 11191 }, { "epoch": 3.7282418589156325, "grad_norm": 0.6640625, "learning_rate": 1.0831268779224502e-05, "loss": 4.0009, "step": 11192 }, { "epoch": 3.7285749979178813, "grad_norm": 0.6875, "learning_rate": 1.0830543549129307e-05, "loss": 4.0645, "step": 11193 }, { "epoch": 3.72890813692013, "grad_norm": 0.66015625, "learning_rate": 1.082981828024049e-05, "loss": 3.9878, "step": 11194 }, { "epoch": 3.7292412759223788, "grad_norm": 0.70703125, "learning_rate": 1.0829092972566506e-05, "loss": 3.9716, "step": 11195 }, { "epoch": 3.7295744149246275, "grad_norm": 0.69140625, "learning_rate": 1.0828367626115798e-05, "loss": 3.9808, "step": 11196 }, { "epoch": 3.729907553926876, "grad_norm": 0.66796875, "learning_rate": 1.0827642240896815e-05, "loss": 3.9351, "step": 11197 }, { "epoch": 3.7302406929291245, "grad_norm": 0.66796875, "learning_rate": 1.0826916816918009e-05, "loss": 4.0223, "step": 11198 }, { "epoch": 3.7305738319313733, "grad_norm": 0.6796875, "learning_rate": 1.0826191354187828e-05, "loss": 3.9754, "step": 11199 }, { "epoch": 3.730906970933622, "grad_norm": 0.67578125, "learning_rate": 1.0825465852714725e-05, "loss": 3.9473, "step": 11200 }, { "epoch": 3.7312401099358707, "grad_norm": 0.6796875, "learning_rate": 1.0824740312507144e-05, "loss": 4.0454, "step": 11201 }, { "epoch": 3.7315732489381195, "grad_norm": 0.6875, "learning_rate": 1.0824014733573541e-05, "loss": 4.0492, "step": 11202 }, { "epoch": 3.731906387940368, "grad_norm": 0.66796875, "learning_rate": 1.0823289115922366e-05, "loss": 4.0145, "step": 11203 }, { "epoch": 3.732239526942617, "grad_norm": 0.703125, "learning_rate": 1.0822563459562073e-05, "loss": 3.9209, "step": 11204 }, { "epoch": 3.7325726659448657, "grad_norm": 0.69921875, "learning_rate": 1.0821837764501111e-05, "loss": 4.0237, "step": 11205 }, { "epoch": 3.732905804947114, "grad_norm": 0.671875, "learning_rate": 1.0821112030747933e-05, "loss": 3.9366, "step": 11206 }, { "epoch": 3.7332389439493627, "grad_norm": 0.69140625, "learning_rate": 1.0820386258310994e-05, "loss": 3.9935, "step": 11207 }, { "epoch": 3.7335720829516115, "grad_norm": 0.671875, "learning_rate": 1.0819660447198749e-05, "loss": 4.0188, "step": 11208 }, { "epoch": 3.73390522195386, "grad_norm": 0.69921875, "learning_rate": 1.0818934597419646e-05, "loss": 4.0071, "step": 11209 }, { "epoch": 3.734238360956109, "grad_norm": 0.734375, "learning_rate": 1.0818208708982146e-05, "loss": 3.9205, "step": 11210 }, { "epoch": 3.7345714999583577, "grad_norm": 0.7109375, "learning_rate": 1.0817482781894703e-05, "loss": 3.9384, "step": 11211 }, { "epoch": 3.7349046389606064, "grad_norm": 0.6953125, "learning_rate": 1.0816756816165768e-05, "loss": 3.9869, "step": 11212 }, { "epoch": 3.735237777962855, "grad_norm": 0.65234375, "learning_rate": 1.0816030811803803e-05, "loss": 4.0419, "step": 11213 }, { "epoch": 3.735570916965104, "grad_norm": 0.7265625, "learning_rate": 1.0815304768817261e-05, "loss": 3.9502, "step": 11214 }, { "epoch": 3.735904055967352, "grad_norm": 0.69921875, "learning_rate": 1.0814578687214601e-05, "loss": 3.9464, "step": 11215 }, { "epoch": 3.736237194969601, "grad_norm": 0.65234375, "learning_rate": 1.0813852567004277e-05, "loss": 4.0089, "step": 11216 }, { "epoch": 3.7365703339718497, "grad_norm": 0.66015625, "learning_rate": 1.0813126408194746e-05, "loss": 3.9908, "step": 11217 }, { "epoch": 3.7369034729740984, "grad_norm": 0.7265625, "learning_rate": 1.0812400210794471e-05, "loss": 3.9672, "step": 11218 }, { "epoch": 3.737236611976347, "grad_norm": 0.69921875, "learning_rate": 1.0811673974811909e-05, "loss": 4.0333, "step": 11219 }, { "epoch": 3.737569750978596, "grad_norm": 0.69921875, "learning_rate": 1.0810947700255518e-05, "loss": 3.9583, "step": 11220 }, { "epoch": 3.7379028899808446, "grad_norm": 0.6875, "learning_rate": 1.0810221387133755e-05, "loss": 3.9544, "step": 11221 }, { "epoch": 3.738236028983093, "grad_norm": 0.69921875, "learning_rate": 1.0809495035455087e-05, "loss": 4.0241, "step": 11222 }, { "epoch": 3.738569167985342, "grad_norm": 0.69921875, "learning_rate": 1.0808768645227967e-05, "loss": 3.9832, "step": 11223 }, { "epoch": 3.7389023069875904, "grad_norm": 0.70703125, "learning_rate": 1.080804221646086e-05, "loss": 3.9762, "step": 11224 }, { "epoch": 3.739235445989839, "grad_norm": 0.67578125, "learning_rate": 1.0807315749162227e-05, "loss": 4.0133, "step": 11225 }, { "epoch": 3.739568584992088, "grad_norm": 0.74609375, "learning_rate": 1.0806589243340528e-05, "loss": 4.0235, "step": 11226 }, { "epoch": 3.7399017239943366, "grad_norm": 0.7421875, "learning_rate": 1.0805862699004226e-05, "loss": 3.9646, "step": 11227 }, { "epoch": 3.7402348629965854, "grad_norm": 0.69921875, "learning_rate": 1.0805136116161788e-05, "loss": 4.051, "step": 11228 }, { "epoch": 3.740568001998834, "grad_norm": 0.6796875, "learning_rate": 1.080440949482167e-05, "loss": 4.0286, "step": 11229 }, { "epoch": 3.740901141001083, "grad_norm": 0.66015625, "learning_rate": 1.0803682834992339e-05, "loss": 4.001, "step": 11230 }, { "epoch": 3.741234280003331, "grad_norm": 0.703125, "learning_rate": 1.0802956136682259e-05, "loss": 4.0154, "step": 11231 }, { "epoch": 3.7415674190055803, "grad_norm": 0.67578125, "learning_rate": 1.0802229399899893e-05, "loss": 3.9847, "step": 11232 }, { "epoch": 3.7419005580078286, "grad_norm": 0.68359375, "learning_rate": 1.0801502624653707e-05, "loss": 4.0068, "step": 11233 }, { "epoch": 3.7422336970100774, "grad_norm": 0.7421875, "learning_rate": 1.0800775810952168e-05, "loss": 3.9905, "step": 11234 }, { "epoch": 3.742566836012326, "grad_norm": 0.69921875, "learning_rate": 1.0800048958803742e-05, "loss": 3.9833, "step": 11235 }, { "epoch": 3.742899975014575, "grad_norm": 0.70703125, "learning_rate": 1.079932206821689e-05, "loss": 4.0453, "step": 11236 }, { "epoch": 3.7432331140168236, "grad_norm": 0.703125, "learning_rate": 1.0798595139200085e-05, "loss": 4.0981, "step": 11237 }, { "epoch": 3.7435662530190723, "grad_norm": 0.65234375, "learning_rate": 1.0797868171761788e-05, "loss": 4.0494, "step": 11238 }, { "epoch": 3.743899392021321, "grad_norm": 0.68359375, "learning_rate": 1.0797141165910472e-05, "loss": 4.0156, "step": 11239 }, { "epoch": 3.7442325310235693, "grad_norm": 0.68359375, "learning_rate": 1.0796414121654603e-05, "loss": 4.0185, "step": 11240 }, { "epoch": 3.7445656700258185, "grad_norm": 0.68359375, "learning_rate": 1.079568703900265e-05, "loss": 4.0259, "step": 11241 }, { "epoch": 3.744898809028067, "grad_norm": 0.6796875, "learning_rate": 1.0794959917963082e-05, "loss": 3.923, "step": 11242 }, { "epoch": 3.7452319480303156, "grad_norm": 0.66796875, "learning_rate": 1.0794232758544364e-05, "loss": 4.0154, "step": 11243 }, { "epoch": 3.7455650870325643, "grad_norm": 0.69140625, "learning_rate": 1.0793505560754972e-05, "loss": 3.996, "step": 11244 }, { "epoch": 3.745898226034813, "grad_norm": 0.71875, "learning_rate": 1.0792778324603374e-05, "loss": 3.9984, "step": 11245 }, { "epoch": 3.7462313650370618, "grad_norm": 0.69140625, "learning_rate": 1.0792051050098044e-05, "loss": 4.041, "step": 11246 }, { "epoch": 3.7465645040393105, "grad_norm": 0.72265625, "learning_rate": 1.0791323737247444e-05, "loss": 4.0405, "step": 11247 }, { "epoch": 3.7468976430415593, "grad_norm": 0.66796875, "learning_rate": 1.0790596386060054e-05, "loss": 3.9869, "step": 11248 }, { "epoch": 3.7472307820438076, "grad_norm": 0.640625, "learning_rate": 1.0789868996544344e-05, "loss": 4.0194, "step": 11249 }, { "epoch": 3.7475639210460567, "grad_norm": 0.6640625, "learning_rate": 1.0789141568708787e-05, "loss": 4.0261, "step": 11250 }, { "epoch": 3.747897060048305, "grad_norm": 0.6796875, "learning_rate": 1.0788414102561853e-05, "loss": 4.0445, "step": 11251 }, { "epoch": 3.7482301990505538, "grad_norm": 0.70703125, "learning_rate": 1.0787686598112018e-05, "loss": 3.9394, "step": 11252 }, { "epoch": 3.7485633380528025, "grad_norm": 0.6484375, "learning_rate": 1.0786959055367755e-05, "loss": 4.013, "step": 11253 }, { "epoch": 3.7488964770550512, "grad_norm": 0.6796875, "learning_rate": 1.0786231474337539e-05, "loss": 3.9988, "step": 11254 }, { "epoch": 3.7492296160573, "grad_norm": 0.69921875, "learning_rate": 1.0785503855029845e-05, "loss": 3.9926, "step": 11255 }, { "epoch": 3.7495627550595487, "grad_norm": 0.671875, "learning_rate": 1.0784776197453146e-05, "loss": 3.9682, "step": 11256 }, { "epoch": 3.7498958940617975, "grad_norm": 0.6796875, "learning_rate": 1.078404850161592e-05, "loss": 3.9775, "step": 11257 }, { "epoch": 3.7502290330640458, "grad_norm": 0.6328125, "learning_rate": 1.0783320767526642e-05, "loss": 4.0876, "step": 11258 }, { "epoch": 3.7505621720662945, "grad_norm": 0.6484375, "learning_rate": 1.0782592995193791e-05, "loss": 4.1449, "step": 11259 }, { "epoch": 3.7508953110685432, "grad_norm": 0.6953125, "learning_rate": 1.0781865184625837e-05, "loss": 3.9821, "step": 11260 }, { "epoch": 3.751228450070792, "grad_norm": 0.64453125, "learning_rate": 1.0781137335831266e-05, "loss": 3.9586, "step": 11261 }, { "epoch": 3.7515615890730407, "grad_norm": 0.65234375, "learning_rate": 1.078040944881855e-05, "loss": 4.0243, "step": 11262 }, { "epoch": 3.7518947280752895, "grad_norm": 0.68359375, "learning_rate": 1.077968152359617e-05, "loss": 4.0836, "step": 11263 }, { "epoch": 3.752227867077538, "grad_norm": 0.68359375, "learning_rate": 1.0778953560172603e-05, "loss": 4.0333, "step": 11264 }, { "epoch": 3.752561006079787, "grad_norm": 0.65625, "learning_rate": 1.0778225558556329e-05, "loss": 3.9524, "step": 11265 }, { "epoch": 3.7528941450820357, "grad_norm": 0.65234375, "learning_rate": 1.0777497518755828e-05, "loss": 3.9783, "step": 11266 }, { "epoch": 3.753227284084284, "grad_norm": 0.71484375, "learning_rate": 1.0776769440779581e-05, "loss": 4.0376, "step": 11267 }, { "epoch": 3.7535604230865327, "grad_norm": 0.6796875, "learning_rate": 1.0776041324636069e-05, "loss": 4.056, "step": 11268 }, { "epoch": 3.7538935620887814, "grad_norm": 0.69140625, "learning_rate": 1.077531317033377e-05, "loss": 3.9909, "step": 11269 }, { "epoch": 3.75422670109103, "grad_norm": 0.70703125, "learning_rate": 1.0774584977881169e-05, "loss": 3.9713, "step": 11270 }, { "epoch": 3.754559840093279, "grad_norm": 0.65625, "learning_rate": 1.077385674728674e-05, "loss": 3.9698, "step": 11271 }, { "epoch": 3.7548929790955277, "grad_norm": 0.68359375, "learning_rate": 1.0773128478558975e-05, "loss": 3.9961, "step": 11272 }, { "epoch": 3.7552261180977764, "grad_norm": 0.6953125, "learning_rate": 1.0772400171706354e-05, "loss": 3.927, "step": 11273 }, { "epoch": 3.755559257100025, "grad_norm": 0.67578125, "learning_rate": 1.0771671826737356e-05, "loss": 4.0002, "step": 11274 }, { "epoch": 3.755892396102274, "grad_norm": 0.6875, "learning_rate": 1.0770943443660469e-05, "loss": 4.031, "step": 11275 }, { "epoch": 3.756225535104522, "grad_norm": 0.66796875, "learning_rate": 1.0770215022484176e-05, "loss": 3.9813, "step": 11276 }, { "epoch": 3.756558674106771, "grad_norm": 0.671875, "learning_rate": 1.0769486563216961e-05, "loss": 4.0397, "step": 11277 }, { "epoch": 3.7568918131090197, "grad_norm": 0.6953125, "learning_rate": 1.076875806586731e-05, "loss": 4.0048, "step": 11278 }, { "epoch": 3.7572249521112684, "grad_norm": 0.68359375, "learning_rate": 1.0768029530443706e-05, "loss": 4.0354, "step": 11279 }, { "epoch": 3.757558091113517, "grad_norm": 0.69921875, "learning_rate": 1.0767300956954637e-05, "loss": 3.9761, "step": 11280 }, { "epoch": 3.757891230115766, "grad_norm": 0.66796875, "learning_rate": 1.0766572345408589e-05, "loss": 4.0448, "step": 11281 }, { "epoch": 3.7582243691180146, "grad_norm": 0.65625, "learning_rate": 1.076584369581405e-05, "loss": 4.0227, "step": 11282 }, { "epoch": 3.7585575081202633, "grad_norm": 0.6640625, "learning_rate": 1.0765115008179503e-05, "loss": 4.0761, "step": 11283 }, { "epoch": 3.758890647122512, "grad_norm": 0.68359375, "learning_rate": 1.076438628251344e-05, "loss": 4.1278, "step": 11284 }, { "epoch": 3.7592237861247604, "grad_norm": 0.66796875, "learning_rate": 1.0763657518824349e-05, "loss": 4.0421, "step": 11285 }, { "epoch": 3.759556925127009, "grad_norm": 0.671875, "learning_rate": 1.0762928717120716e-05, "loss": 4.0243, "step": 11286 }, { "epoch": 3.759890064129258, "grad_norm": 0.67578125, "learning_rate": 1.0762199877411026e-05, "loss": 4.0671, "step": 11287 }, { "epoch": 3.7602232031315066, "grad_norm": 0.65234375, "learning_rate": 1.076147099970378e-05, "loss": 3.9244, "step": 11288 }, { "epoch": 3.7605563421337553, "grad_norm": 0.6796875, "learning_rate": 1.0760742084007457e-05, "loss": 3.9911, "step": 11289 }, { "epoch": 3.760889481136004, "grad_norm": 0.6796875, "learning_rate": 1.0760013130330553e-05, "loss": 4.0397, "step": 11290 }, { "epoch": 3.761222620138253, "grad_norm": 0.640625, "learning_rate": 1.0759284138681559e-05, "loss": 4.0439, "step": 11291 }, { "epoch": 3.761555759140501, "grad_norm": 0.703125, "learning_rate": 1.0758555109068962e-05, "loss": 3.9294, "step": 11292 }, { "epoch": 3.7618888981427503, "grad_norm": 0.6796875, "learning_rate": 1.0757826041501257e-05, "loss": 4.074, "step": 11293 }, { "epoch": 3.7622220371449986, "grad_norm": 0.7109375, "learning_rate": 1.0757096935986936e-05, "loss": 3.9743, "step": 11294 }, { "epoch": 3.7625551761472473, "grad_norm": 0.7109375, "learning_rate": 1.0756367792534489e-05, "loss": 4.0292, "step": 11295 }, { "epoch": 3.762888315149496, "grad_norm": 0.65234375, "learning_rate": 1.0755638611152411e-05, "loss": 3.9446, "step": 11296 }, { "epoch": 3.763221454151745, "grad_norm": 0.70703125, "learning_rate": 1.0754909391849194e-05, "loss": 3.9845, "step": 11297 }, { "epoch": 3.7635545931539935, "grad_norm": 0.66796875, "learning_rate": 1.0754180134633334e-05, "loss": 3.9677, "step": 11298 }, { "epoch": 3.7638877321562423, "grad_norm": 0.66796875, "learning_rate": 1.0753450839513324e-05, "loss": 3.9729, "step": 11299 }, { "epoch": 3.764220871158491, "grad_norm": 0.72265625, "learning_rate": 1.0752721506497658e-05, "loss": 4.0744, "step": 11300 }, { "epoch": 3.7645540101607393, "grad_norm": 0.69140625, "learning_rate": 1.0751992135594831e-05, "loss": 4.0, "step": 11301 }, { "epoch": 3.7648871491629885, "grad_norm": 0.6953125, "learning_rate": 1.075126272681334e-05, "loss": 4.0461, "step": 11302 }, { "epoch": 3.765220288165237, "grad_norm": 0.7265625, "learning_rate": 1.0750533280161682e-05, "loss": 4.0565, "step": 11303 }, { "epoch": 3.7655534271674855, "grad_norm": 0.66015625, "learning_rate": 1.074980379564835e-05, "loss": 3.9814, "step": 11304 }, { "epoch": 3.7658865661697343, "grad_norm": 0.703125, "learning_rate": 1.0749074273281845e-05, "loss": 3.9888, "step": 11305 }, { "epoch": 3.766219705171983, "grad_norm": 0.6796875, "learning_rate": 1.074834471307066e-05, "loss": 3.9337, "step": 11306 }, { "epoch": 3.7665528441742318, "grad_norm": 0.72265625, "learning_rate": 1.0747615115023296e-05, "loss": 3.9726, "step": 11307 }, { "epoch": 3.7668859831764805, "grad_norm": 0.6953125, "learning_rate": 1.074688547914825e-05, "loss": 3.98, "step": 11308 }, { "epoch": 3.7672191221787292, "grad_norm": 0.66796875, "learning_rate": 1.0746155805454022e-05, "loss": 3.979, "step": 11309 }, { "epoch": 3.7675522611809775, "grad_norm": 0.66796875, "learning_rate": 1.0745426093949108e-05, "loss": 4.0191, "step": 11310 }, { "epoch": 3.7678854001832267, "grad_norm": 0.64453125, "learning_rate": 1.0744696344642009e-05, "loss": 4.0792, "step": 11311 }, { "epoch": 3.768218539185475, "grad_norm": 0.63671875, "learning_rate": 1.074396655754123e-05, "loss": 4.0515, "step": 11312 }, { "epoch": 3.7685516781877237, "grad_norm": 0.66015625, "learning_rate": 1.0743236732655264e-05, "loss": 3.9954, "step": 11313 }, { "epoch": 3.7688848171899725, "grad_norm": 0.6796875, "learning_rate": 1.0742506869992615e-05, "loss": 3.9862, "step": 11314 }, { "epoch": 3.769217956192221, "grad_norm": 0.68359375, "learning_rate": 1.0741776969561783e-05, "loss": 4.0636, "step": 11315 }, { "epoch": 3.76955109519447, "grad_norm": 0.67578125, "learning_rate": 1.0741047031371274e-05, "loss": 4.062, "step": 11316 }, { "epoch": 3.7698842341967187, "grad_norm": 0.69140625, "learning_rate": 1.0740317055429585e-05, "loss": 3.9809, "step": 11317 }, { "epoch": 3.7702173731989674, "grad_norm": 0.70703125, "learning_rate": 1.0739587041745222e-05, "loss": 3.9807, "step": 11318 }, { "epoch": 3.7705505122012157, "grad_norm": 0.6875, "learning_rate": 1.0738856990326684e-05, "loss": 4.0155, "step": 11319 }, { "epoch": 3.770883651203465, "grad_norm": 0.68359375, "learning_rate": 1.073812690118248e-05, "loss": 4.0066, "step": 11320 }, { "epoch": 3.771216790205713, "grad_norm": 0.69921875, "learning_rate": 1.073739677432111e-05, "loss": 4.09, "step": 11321 }, { "epoch": 3.771549929207962, "grad_norm": 0.6953125, "learning_rate": 1.0736666609751082e-05, "loss": 3.9792, "step": 11322 }, { "epoch": 3.7718830682102107, "grad_norm": 0.6953125, "learning_rate": 1.0735936407480898e-05, "loss": 4.0522, "step": 11323 }, { "epoch": 3.7722162072124594, "grad_norm": 0.71484375, "learning_rate": 1.0735206167519063e-05, "loss": 3.9782, "step": 11324 }, { "epoch": 3.772549346214708, "grad_norm": 0.6953125, "learning_rate": 1.0734475889874085e-05, "loss": 3.9931, "step": 11325 }, { "epoch": 3.772882485216957, "grad_norm": 0.6640625, "learning_rate": 1.0733745574554468e-05, "loss": 4.0847, "step": 11326 }, { "epoch": 3.7732156242192056, "grad_norm": 0.64453125, "learning_rate": 1.0733015221568718e-05, "loss": 4.0094, "step": 11327 }, { "epoch": 3.773548763221454, "grad_norm": 0.6953125, "learning_rate": 1.0732284830925344e-05, "loss": 3.9968, "step": 11328 }, { "epoch": 3.7738819022237027, "grad_norm": 0.6875, "learning_rate": 1.0731554402632852e-05, "loss": 3.8794, "step": 11329 }, { "epoch": 3.7742150412259514, "grad_norm": 0.734375, "learning_rate": 1.0730823936699752e-05, "loss": 3.9719, "step": 11330 }, { "epoch": 3.7745481802282, "grad_norm": 0.69140625, "learning_rate": 1.0730093433134551e-05, "loss": 4.0002, "step": 11331 }, { "epoch": 3.774881319230449, "grad_norm": 0.6484375, "learning_rate": 1.072936289194576e-05, "loss": 4.0009, "step": 11332 }, { "epoch": 3.7752144582326976, "grad_norm": 0.72265625, "learning_rate": 1.0728632313141882e-05, "loss": 4.0988, "step": 11333 }, { "epoch": 3.7755475972349464, "grad_norm": 0.703125, "learning_rate": 1.0727901696731432e-05, "loss": 4.0462, "step": 11334 }, { "epoch": 3.775880736237195, "grad_norm": 0.72265625, "learning_rate": 1.072717104272292e-05, "loss": 3.9847, "step": 11335 }, { "epoch": 3.776213875239444, "grad_norm": 0.69140625, "learning_rate": 1.0726440351124856e-05, "loss": 4.0399, "step": 11336 }, { "epoch": 3.776547014241692, "grad_norm": 0.68359375, "learning_rate": 1.072570962194575e-05, "loss": 4.0568, "step": 11337 }, { "epoch": 3.776880153243941, "grad_norm": 0.6875, "learning_rate": 1.0724978855194113e-05, "loss": 4.0241, "step": 11338 }, { "epoch": 3.7772132922461896, "grad_norm": 0.64453125, "learning_rate": 1.0724248050878456e-05, "loss": 4.0477, "step": 11339 }, { "epoch": 3.7775464312484384, "grad_norm": 0.71875, "learning_rate": 1.0723517209007297e-05, "loss": 3.9209, "step": 11340 }, { "epoch": 3.777879570250687, "grad_norm": 0.66015625, "learning_rate": 1.0722786329589141e-05, "loss": 3.9598, "step": 11341 }, { "epoch": 3.778212709252936, "grad_norm": 0.66015625, "learning_rate": 1.0722055412632506e-05, "loss": 4.0553, "step": 11342 }, { "epoch": 3.7785458482551846, "grad_norm": 0.69140625, "learning_rate": 1.0721324458145904e-05, "loss": 4.0115, "step": 11343 }, { "epoch": 3.7788789872574333, "grad_norm": 0.66796875, "learning_rate": 1.0720593466137851e-05, "loss": 3.951, "step": 11344 }, { "epoch": 3.779212126259682, "grad_norm": 0.73828125, "learning_rate": 1.071986243661686e-05, "loss": 3.9908, "step": 11345 }, { "epoch": 3.7795452652619304, "grad_norm": 0.71484375, "learning_rate": 1.0719131369591443e-05, "loss": 3.9649, "step": 11346 }, { "epoch": 3.779878404264179, "grad_norm": 0.671875, "learning_rate": 1.071840026507012e-05, "loss": 4.0476, "step": 11347 }, { "epoch": 3.780211543266428, "grad_norm": 0.65625, "learning_rate": 1.0717669123061405e-05, "loss": 3.9512, "step": 11348 }, { "epoch": 3.7805446822686766, "grad_norm": 0.66015625, "learning_rate": 1.0716937943573814e-05, "loss": 4.0368, "step": 11349 }, { "epoch": 3.7808778212709253, "grad_norm": 0.68359375, "learning_rate": 1.0716206726615862e-05, "loss": 4.021, "step": 11350 }, { "epoch": 3.781210960273174, "grad_norm": 0.6875, "learning_rate": 1.0715475472196071e-05, "loss": 3.9789, "step": 11351 }, { "epoch": 3.781544099275423, "grad_norm": 0.66015625, "learning_rate": 1.0714744180322953e-05, "loss": 4.0026, "step": 11352 }, { "epoch": 3.781877238277671, "grad_norm": 0.68359375, "learning_rate": 1.0714012851005031e-05, "loss": 3.9999, "step": 11353 }, { "epoch": 3.7822103772799203, "grad_norm": 0.69921875, "learning_rate": 1.0713281484250819e-05, "loss": 3.9638, "step": 11354 }, { "epoch": 3.7825435162821686, "grad_norm": 0.67578125, "learning_rate": 1.0712550080068838e-05, "loss": 4.0251, "step": 11355 }, { "epoch": 3.7828766552844173, "grad_norm": 0.71484375, "learning_rate": 1.0711818638467607e-05, "loss": 4.0228, "step": 11356 }, { "epoch": 3.783209794286666, "grad_norm": 0.71484375, "learning_rate": 1.0711087159455646e-05, "loss": 4.0269, "step": 11357 }, { "epoch": 3.783542933288915, "grad_norm": 0.66796875, "learning_rate": 1.0710355643041476e-05, "loss": 3.9421, "step": 11358 }, { "epoch": 3.7838760722911635, "grad_norm": 0.66015625, "learning_rate": 1.0709624089233614e-05, "loss": 3.9876, "step": 11359 }, { "epoch": 3.7842092112934123, "grad_norm": 0.6796875, "learning_rate": 1.0708892498040584e-05, "loss": 3.9898, "step": 11360 }, { "epoch": 3.784542350295661, "grad_norm": 0.703125, "learning_rate": 1.0708160869470907e-05, "loss": 4.0272, "step": 11361 }, { "epoch": 3.7848754892979093, "grad_norm": 0.703125, "learning_rate": 1.0707429203533106e-05, "loss": 3.9637, "step": 11362 }, { "epoch": 3.7852086283001585, "grad_norm": 0.6875, "learning_rate": 1.0706697500235699e-05, "loss": 3.9823, "step": 11363 }, { "epoch": 3.7855417673024068, "grad_norm": 0.6953125, "learning_rate": 1.0705965759587215e-05, "loss": 3.9363, "step": 11364 }, { "epoch": 3.7858749063046555, "grad_norm": 0.73046875, "learning_rate": 1.0705233981596171e-05, "loss": 4.0722, "step": 11365 }, { "epoch": 3.7862080453069042, "grad_norm": 0.69921875, "learning_rate": 1.0704502166271096e-05, "loss": 4.0207, "step": 11366 }, { "epoch": 3.786541184309153, "grad_norm": 0.70703125, "learning_rate": 1.0703770313620514e-05, "loss": 3.9628, "step": 11367 }, { "epoch": 3.7868743233114017, "grad_norm": 0.65234375, "learning_rate": 1.0703038423652942e-05, "loss": 4.0416, "step": 11368 }, { "epoch": 3.7872074623136505, "grad_norm": 0.67578125, "learning_rate": 1.0702306496376912e-05, "loss": 4.0212, "step": 11369 }, { "epoch": 3.787540601315899, "grad_norm": 0.67578125, "learning_rate": 1.0701574531800949e-05, "loss": 4.0478, "step": 11370 }, { "epoch": 3.7878737403181475, "grad_norm": 0.66015625, "learning_rate": 1.0700842529933576e-05, "loss": 4.0399, "step": 11371 }, { "epoch": 3.7882068793203967, "grad_norm": 0.69921875, "learning_rate": 1.070011049078332e-05, "loss": 4.0185, "step": 11372 }, { "epoch": 3.788540018322645, "grad_norm": 0.640625, "learning_rate": 1.0699378414358709e-05, "loss": 4.1205, "step": 11373 }, { "epoch": 3.7888731573248937, "grad_norm": 0.671875, "learning_rate": 1.069864630066827e-05, "loss": 4.0258, "step": 11374 }, { "epoch": 3.7892062963271425, "grad_norm": 0.67578125, "learning_rate": 1.0697914149720529e-05, "loss": 4.054, "step": 11375 }, { "epoch": 3.789539435329391, "grad_norm": 0.671875, "learning_rate": 1.0697181961524015e-05, "loss": 4.0112, "step": 11376 }, { "epoch": 3.78987257433164, "grad_norm": 0.66796875, "learning_rate": 1.0696449736087256e-05, "loss": 3.9097, "step": 11377 }, { "epoch": 3.7902057133338887, "grad_norm": 0.69140625, "learning_rate": 1.069571747341878e-05, "loss": 3.8939, "step": 11378 }, { "epoch": 3.7905388523361374, "grad_norm": 0.6640625, "learning_rate": 1.069498517352712e-05, "loss": 4.0485, "step": 11379 }, { "epoch": 3.7908719913383857, "grad_norm": 0.6640625, "learning_rate": 1.06942528364208e-05, "loss": 4.0878, "step": 11380 }, { "epoch": 3.791205130340635, "grad_norm": 0.69140625, "learning_rate": 1.0693520462108357e-05, "loss": 3.9977, "step": 11381 }, { "epoch": 3.791538269342883, "grad_norm": 0.69140625, "learning_rate": 1.0692788050598315e-05, "loss": 3.9411, "step": 11382 }, { "epoch": 3.791871408345132, "grad_norm": 0.68359375, "learning_rate": 1.0692055601899206e-05, "loss": 3.9444, "step": 11383 }, { "epoch": 3.7922045473473807, "grad_norm": 0.73046875, "learning_rate": 1.0691323116019568e-05, "loss": 3.9951, "step": 11384 }, { "epoch": 3.7925376863496294, "grad_norm": 0.734375, "learning_rate": 1.0690590592967925e-05, "loss": 3.9304, "step": 11385 }, { "epoch": 3.792870825351878, "grad_norm": 0.65234375, "learning_rate": 1.0689858032752813e-05, "loss": 4.0686, "step": 11386 }, { "epoch": 3.793203964354127, "grad_norm": 0.671875, "learning_rate": 1.0689125435382764e-05, "loss": 3.9937, "step": 11387 }, { "epoch": 3.7935371033563756, "grad_norm": 0.70703125, "learning_rate": 1.0688392800866312e-05, "loss": 3.9548, "step": 11388 }, { "epoch": 3.793870242358624, "grad_norm": 0.6484375, "learning_rate": 1.0687660129211991e-05, "loss": 4.0463, "step": 11389 }, { "epoch": 3.794203381360873, "grad_norm": 0.6875, "learning_rate": 1.0686927420428332e-05, "loss": 3.9654, "step": 11390 }, { "epoch": 3.7945365203631214, "grad_norm": 0.671875, "learning_rate": 1.0686194674523872e-05, "loss": 4.0731, "step": 11391 }, { "epoch": 3.79486965936537, "grad_norm": 0.6953125, "learning_rate": 1.0685461891507145e-05, "loss": 4.0876, "step": 11392 }, { "epoch": 3.795202798367619, "grad_norm": 0.69921875, "learning_rate": 1.0684729071386685e-05, "loss": 3.9766, "step": 11393 }, { "epoch": 3.7955359373698676, "grad_norm": 0.71875, "learning_rate": 1.0683996214171033e-05, "loss": 4.0311, "step": 11394 }, { "epoch": 3.7958690763721163, "grad_norm": 0.6875, "learning_rate": 1.0683263319868722e-05, "loss": 3.999, "step": 11395 }, { "epoch": 3.796202215374365, "grad_norm": 0.703125, "learning_rate": 1.0682530388488283e-05, "loss": 3.9642, "step": 11396 }, { "epoch": 3.796535354376614, "grad_norm": 0.65234375, "learning_rate": 1.0681797420038262e-05, "loss": 3.9762, "step": 11397 }, { "epoch": 3.796868493378862, "grad_norm": 0.68359375, "learning_rate": 1.0681064414527194e-05, "loss": 3.9787, "step": 11398 }, { "epoch": 3.797201632381111, "grad_norm": 0.71484375, "learning_rate": 1.0680331371963612e-05, "loss": 3.9736, "step": 11399 }, { "epoch": 3.7975347713833596, "grad_norm": 0.68359375, "learning_rate": 1.067959829235606e-05, "loss": 4.0308, "step": 11400 }, { "epoch": 3.7978679103856083, "grad_norm": 0.70703125, "learning_rate": 1.0678865175713075e-05, "loss": 3.9995, "step": 11401 }, { "epoch": 3.798201049387857, "grad_norm": 0.68359375, "learning_rate": 1.0678132022043198e-05, "loss": 4.0033, "step": 11402 }, { "epoch": 3.798534188390106, "grad_norm": 0.73828125, "learning_rate": 1.0677398831354964e-05, "loss": 3.9773, "step": 11403 }, { "epoch": 3.7988673273923546, "grad_norm": 0.68359375, "learning_rate": 1.0676665603656917e-05, "loss": 3.9926, "step": 11404 }, { "epoch": 3.7992004663946033, "grad_norm": 0.67578125, "learning_rate": 1.0675932338957594e-05, "loss": 3.9718, "step": 11405 }, { "epoch": 3.799533605396852, "grad_norm": 0.74609375, "learning_rate": 1.0675199037265543e-05, "loss": 4.0186, "step": 11406 }, { "epoch": 3.7998667443991003, "grad_norm": 0.69921875, "learning_rate": 1.0674465698589299e-05, "loss": 3.9417, "step": 11407 }, { "epoch": 3.800199883401349, "grad_norm": 0.66796875, "learning_rate": 1.0673732322937405e-05, "loss": 4.023, "step": 11408 }, { "epoch": 3.800533022403598, "grad_norm": 0.6796875, "learning_rate": 1.0672998910318403e-05, "loss": 4.0613, "step": 11409 }, { "epoch": 3.8008661614058465, "grad_norm": 0.7109375, "learning_rate": 1.0672265460740836e-05, "loss": 4.0232, "step": 11410 }, { "epoch": 3.8011993004080953, "grad_norm": 0.67578125, "learning_rate": 1.0671531974213248e-05, "loss": 4.001, "step": 11411 }, { "epoch": 3.801532439410344, "grad_norm": 0.66796875, "learning_rate": 1.0670798450744182e-05, "loss": 3.9074, "step": 11412 }, { "epoch": 3.8018655784125928, "grad_norm": 0.671875, "learning_rate": 1.0670064890342184e-05, "loss": 3.9982, "step": 11413 }, { "epoch": 3.8021987174148415, "grad_norm": 0.70703125, "learning_rate": 1.066933129301579e-05, "loss": 4.0256, "step": 11414 }, { "epoch": 3.8025318564170902, "grad_norm": 0.66796875, "learning_rate": 1.066859765877356e-05, "loss": 4.0866, "step": 11415 }, { "epoch": 3.8028649954193385, "grad_norm": 0.72265625, "learning_rate": 1.0667863987624025e-05, "loss": 3.9077, "step": 11416 }, { "epoch": 3.8031981344215873, "grad_norm": 0.66796875, "learning_rate": 1.0667130279575739e-05, "loss": 3.9692, "step": 11417 }, { "epoch": 3.803531273423836, "grad_norm": 0.703125, "learning_rate": 1.0666396534637243e-05, "loss": 3.9389, "step": 11418 }, { "epoch": 3.8038644124260848, "grad_norm": 0.703125, "learning_rate": 1.0665662752817086e-05, "loss": 3.9295, "step": 11419 }, { "epoch": 3.8041975514283335, "grad_norm": 0.7265625, "learning_rate": 1.0664928934123814e-05, "loss": 4.0352, "step": 11420 }, { "epoch": 3.8045306904305822, "grad_norm": 0.6484375, "learning_rate": 1.0664195078565977e-05, "loss": 4.0001, "step": 11421 }, { "epoch": 3.804863829432831, "grad_norm": 0.66796875, "learning_rate": 1.0663461186152121e-05, "loss": 4.08, "step": 11422 }, { "epoch": 3.8051969684350793, "grad_norm": 0.67578125, "learning_rate": 1.0662727256890793e-05, "loss": 3.9623, "step": 11423 }, { "epoch": 3.8055301074373284, "grad_norm": 0.66015625, "learning_rate": 1.0661993290790544e-05, "loss": 4.0203, "step": 11424 }, { "epoch": 3.8058632464395767, "grad_norm": 0.66796875, "learning_rate": 1.066125928785992e-05, "loss": 3.9906, "step": 11425 }, { "epoch": 3.8061963854418255, "grad_norm": 0.6640625, "learning_rate": 1.0660525248107477e-05, "loss": 4.0444, "step": 11426 }, { "epoch": 3.806529524444074, "grad_norm": 0.69140625, "learning_rate": 1.0659791171541756e-05, "loss": 4.0044, "step": 11427 }, { "epoch": 3.806862663446323, "grad_norm": 0.6875, "learning_rate": 1.0659057058171312e-05, "loss": 4.0275, "step": 11428 }, { "epoch": 3.8071958024485717, "grad_norm": 0.71875, "learning_rate": 1.0658322908004697e-05, "loss": 3.9966, "step": 11429 }, { "epoch": 3.8075289414508204, "grad_norm": 0.71484375, "learning_rate": 1.0657588721050462e-05, "loss": 4.0024, "step": 11430 }, { "epoch": 3.807862080453069, "grad_norm": 0.6640625, "learning_rate": 1.0656854497317159e-05, "loss": 4.0122, "step": 11431 }, { "epoch": 3.8081952194553175, "grad_norm": 0.68359375, "learning_rate": 1.0656120236813337e-05, "loss": 4.0462, "step": 11432 }, { "epoch": 3.8085283584575667, "grad_norm": 0.68359375, "learning_rate": 1.065538593954755e-05, "loss": 3.9878, "step": 11433 }, { "epoch": 3.808861497459815, "grad_norm": 0.66015625, "learning_rate": 1.065465160552835e-05, "loss": 4.0307, "step": 11434 }, { "epoch": 3.8091946364620637, "grad_norm": 0.703125, "learning_rate": 1.0653917234764295e-05, "loss": 4.0479, "step": 11435 }, { "epoch": 3.8095277754643124, "grad_norm": 0.70703125, "learning_rate": 1.0653182827263932e-05, "loss": 4.0241, "step": 11436 }, { "epoch": 3.809860914466561, "grad_norm": 0.6796875, "learning_rate": 1.0652448383035823e-05, "loss": 4.1091, "step": 11437 }, { "epoch": 3.81019405346881, "grad_norm": 0.671875, "learning_rate": 1.0651713902088516e-05, "loss": 3.9324, "step": 11438 }, { "epoch": 3.8105271924710586, "grad_norm": 0.69140625, "learning_rate": 1.065097938443057e-05, "loss": 4.0289, "step": 11439 }, { "epoch": 3.8108603314733074, "grad_norm": 0.6953125, "learning_rate": 1.0650244830070537e-05, "loss": 4.0404, "step": 11440 }, { "epoch": 3.8111934704755557, "grad_norm": 0.69921875, "learning_rate": 1.0649510239016978e-05, "loss": 3.9406, "step": 11441 }, { "epoch": 3.811526609477805, "grad_norm": 0.6953125, "learning_rate": 1.0648775611278444e-05, "loss": 4.0067, "step": 11442 }, { "epoch": 3.811859748480053, "grad_norm": 0.69140625, "learning_rate": 1.0648040946863495e-05, "loss": 4.0269, "step": 11443 }, { "epoch": 3.812192887482302, "grad_norm": 0.68359375, "learning_rate": 1.0647306245780688e-05, "loss": 4.0483, "step": 11444 }, { "epoch": 3.8125260264845506, "grad_norm": 0.66796875, "learning_rate": 1.064657150803858e-05, "loss": 4.0195, "step": 11445 }, { "epoch": 3.8128591654867994, "grad_norm": 0.68359375, "learning_rate": 1.064583673364573e-05, "loss": 4.0068, "step": 11446 }, { "epoch": 3.813192304489048, "grad_norm": 0.6875, "learning_rate": 1.0645101922610695e-05, "loss": 3.9726, "step": 11447 }, { "epoch": 3.813525443491297, "grad_norm": 0.68359375, "learning_rate": 1.0644367074942037e-05, "loss": 4.0559, "step": 11448 }, { "epoch": 3.8138585824935456, "grad_norm": 0.70703125, "learning_rate": 1.0643632190648309e-05, "loss": 4.0242, "step": 11449 }, { "epoch": 3.814191721495794, "grad_norm": 0.71484375, "learning_rate": 1.0642897269738079e-05, "loss": 4.0038, "step": 11450 }, { "epoch": 3.814524860498043, "grad_norm": 0.75, "learning_rate": 1.06421623122199e-05, "loss": 4.0398, "step": 11451 }, { "epoch": 3.8148579995002914, "grad_norm": 0.67578125, "learning_rate": 1.0641427318102339e-05, "loss": 4.121, "step": 11452 }, { "epoch": 3.81519113850254, "grad_norm": 0.70703125, "learning_rate": 1.0640692287393953e-05, "loss": 3.9735, "step": 11453 }, { "epoch": 3.815524277504789, "grad_norm": 0.6796875, "learning_rate": 1.0639957220103304e-05, "loss": 4.0319, "step": 11454 }, { "epoch": 3.8158574165070376, "grad_norm": 0.71875, "learning_rate": 1.0639222116238956e-05, "loss": 3.9743, "step": 11455 }, { "epoch": 3.8161905555092863, "grad_norm": 0.65625, "learning_rate": 1.063848697580947e-05, "loss": 4.0124, "step": 11456 }, { "epoch": 3.816523694511535, "grad_norm": 0.71484375, "learning_rate": 1.0637751798823407e-05, "loss": 4.0542, "step": 11457 }, { "epoch": 3.816856833513784, "grad_norm": 0.66796875, "learning_rate": 1.0637016585289333e-05, "loss": 3.9507, "step": 11458 }, { "epoch": 3.817189972516032, "grad_norm": 0.7109375, "learning_rate": 1.063628133521581e-05, "loss": 3.998, "step": 11459 }, { "epoch": 3.8175231115182813, "grad_norm": 0.69140625, "learning_rate": 1.0635546048611401e-05, "loss": 4.079, "step": 11460 }, { "epoch": 3.8178562505205296, "grad_norm": 0.6953125, "learning_rate": 1.0634810725484677e-05, "loss": 4.0211, "step": 11461 }, { "epoch": 3.8181893895227783, "grad_norm": 0.63671875, "learning_rate": 1.0634075365844194e-05, "loss": 4.0141, "step": 11462 }, { "epoch": 3.818522528525027, "grad_norm": 0.65234375, "learning_rate": 1.0633339969698523e-05, "loss": 4.0066, "step": 11463 }, { "epoch": 3.818855667527276, "grad_norm": 0.68359375, "learning_rate": 1.0632604537056227e-05, "loss": 4.0098, "step": 11464 }, { "epoch": 3.8191888065295245, "grad_norm": 0.6640625, "learning_rate": 1.0631869067925872e-05, "loss": 4.0265, "step": 11465 }, { "epoch": 3.8195219455317733, "grad_norm": 0.67578125, "learning_rate": 1.0631133562316028e-05, "loss": 3.9606, "step": 11466 }, { "epoch": 3.819855084534022, "grad_norm": 0.69140625, "learning_rate": 1.063039802023526e-05, "loss": 4.0741, "step": 11467 }, { "epoch": 3.8201882235362703, "grad_norm": 0.6953125, "learning_rate": 1.0629662441692136e-05, "loss": 4.0512, "step": 11468 }, { "epoch": 3.820521362538519, "grad_norm": 0.66015625, "learning_rate": 1.0628926826695222e-05, "loss": 3.9466, "step": 11469 }, { "epoch": 3.820854501540768, "grad_norm": 0.67578125, "learning_rate": 1.062819117525309e-05, "loss": 3.9871, "step": 11470 }, { "epoch": 3.8211876405430165, "grad_norm": 0.72265625, "learning_rate": 1.0627455487374304e-05, "loss": 3.986, "step": 11471 }, { "epoch": 3.8215207795452653, "grad_norm": 0.7265625, "learning_rate": 1.0626719763067438e-05, "loss": 4.0591, "step": 11472 }, { "epoch": 3.821853918547514, "grad_norm": 0.69140625, "learning_rate": 1.0625984002341056e-05, "loss": 4.0721, "step": 11473 }, { "epoch": 3.8221870575497627, "grad_norm": 0.70703125, "learning_rate": 1.0625248205203734e-05, "loss": 4.1223, "step": 11474 }, { "epoch": 3.8225201965520115, "grad_norm": 0.625, "learning_rate": 1.062451237166404e-05, "loss": 4.0512, "step": 11475 }, { "epoch": 3.82285333555426, "grad_norm": 0.6796875, "learning_rate": 1.0623776501730541e-05, "loss": 4.0041, "step": 11476 }, { "epoch": 3.8231864745565085, "grad_norm": 0.69140625, "learning_rate": 1.0623040595411814e-05, "loss": 3.9388, "step": 11477 }, { "epoch": 3.8235196135587572, "grad_norm": 0.7109375, "learning_rate": 1.062230465271643e-05, "loss": 4.0268, "step": 11478 }, { "epoch": 3.823852752561006, "grad_norm": 0.6875, "learning_rate": 1.062156867365296e-05, "loss": 3.9289, "step": 11479 }, { "epoch": 3.8241858915632547, "grad_norm": 0.65625, "learning_rate": 1.0620832658229972e-05, "loss": 4.0191, "step": 11480 }, { "epoch": 3.8245190305655035, "grad_norm": 0.68359375, "learning_rate": 1.0620096606456047e-05, "loss": 4.0359, "step": 11481 }, { "epoch": 3.824852169567752, "grad_norm": 0.6796875, "learning_rate": 1.0619360518339751e-05, "loss": 4.0083, "step": 11482 }, { "epoch": 3.825185308570001, "grad_norm": 0.6875, "learning_rate": 1.0618624393889668e-05, "loss": 4.0202, "step": 11483 }, { "epoch": 3.8255184475722497, "grad_norm": 0.703125, "learning_rate": 1.0617888233114361e-05, "loss": 4.0201, "step": 11484 }, { "epoch": 3.8258515865744984, "grad_norm": 0.64453125, "learning_rate": 1.0617152036022408e-05, "loss": 3.9738, "step": 11485 }, { "epoch": 3.8261847255767467, "grad_norm": 0.6796875, "learning_rate": 1.0616415802622388e-05, "loss": 3.9927, "step": 11486 }, { "epoch": 3.8265178645789955, "grad_norm": 0.6640625, "learning_rate": 1.0615679532922874e-05, "loss": 4.0217, "step": 11487 }, { "epoch": 3.826851003581244, "grad_norm": 0.67578125, "learning_rate": 1.0614943226932442e-05, "loss": 4.0208, "step": 11488 }, { "epoch": 3.827184142583493, "grad_norm": 0.73828125, "learning_rate": 1.0614206884659668e-05, "loss": 3.9991, "step": 11489 }, { "epoch": 3.8275172815857417, "grad_norm": 0.73046875, "learning_rate": 1.0613470506113129e-05, "loss": 4.0246, "step": 11490 }, { "epoch": 3.8278504205879904, "grad_norm": 0.69921875, "learning_rate": 1.0612734091301402e-05, "loss": 4.0262, "step": 11491 }, { "epoch": 3.828183559590239, "grad_norm": 0.69140625, "learning_rate": 1.0611997640233066e-05, "loss": 3.9987, "step": 11492 }, { "epoch": 3.8285166985924874, "grad_norm": 0.640625, "learning_rate": 1.0611261152916697e-05, "loss": 4.0862, "step": 11493 }, { "epoch": 3.8288498375947366, "grad_norm": 0.68359375, "learning_rate": 1.0610524629360877e-05, "loss": 3.983, "step": 11494 }, { "epoch": 3.829182976596985, "grad_norm": 0.6875, "learning_rate": 1.0609788069574179e-05, "loss": 4.022, "step": 11495 }, { "epoch": 3.8295161155992337, "grad_norm": 0.6953125, "learning_rate": 1.0609051473565188e-05, "loss": 4.011, "step": 11496 }, { "epoch": 3.8298492546014824, "grad_norm": 0.71875, "learning_rate": 1.0608314841342481e-05, "loss": 4.0058, "step": 11497 }, { "epoch": 3.830182393603731, "grad_norm": 0.6796875, "learning_rate": 1.0607578172914639e-05, "loss": 4.059, "step": 11498 }, { "epoch": 3.83051553260598, "grad_norm": 0.7109375, "learning_rate": 1.060684146829024e-05, "loss": 3.9415, "step": 11499 }, { "epoch": 3.8308486716082286, "grad_norm": 0.6328125, "learning_rate": 1.060610472747787e-05, "loss": 4.0974, "step": 11500 }, { "epoch": 3.8311818106104774, "grad_norm": 0.7109375, "learning_rate": 1.0605367950486109e-05, "loss": 4.0323, "step": 11501 }, { "epoch": 3.8315149496127257, "grad_norm": 0.68359375, "learning_rate": 1.0604631137323534e-05, "loss": 3.9707, "step": 11502 }, { "epoch": 3.831848088614975, "grad_norm": 0.6875, "learning_rate": 1.0603894287998734e-05, "loss": 3.8847, "step": 11503 }, { "epoch": 3.832181227617223, "grad_norm": 0.66796875, "learning_rate": 1.0603157402520286e-05, "loss": 4.0213, "step": 11504 }, { "epoch": 3.832514366619472, "grad_norm": 0.69140625, "learning_rate": 1.0602420480896776e-05, "loss": 3.9372, "step": 11505 }, { "epoch": 3.8328475056217206, "grad_norm": 0.6953125, "learning_rate": 1.060168352313679e-05, "loss": 4.0635, "step": 11506 }, { "epoch": 3.8331806446239693, "grad_norm": 0.73828125, "learning_rate": 1.0600946529248906e-05, "loss": 3.9883, "step": 11507 }, { "epoch": 3.833513783626218, "grad_norm": 0.71875, "learning_rate": 1.0600209499241712e-05, "loss": 3.9948, "step": 11508 }, { "epoch": 3.833846922628467, "grad_norm": 0.70703125, "learning_rate": 1.0599472433123793e-05, "loss": 3.9456, "step": 11509 }, { "epoch": 3.8341800616307156, "grad_norm": 0.69140625, "learning_rate": 1.0598735330903736e-05, "loss": 3.9743, "step": 11510 }, { "epoch": 3.834513200632964, "grad_norm": 0.7265625, "learning_rate": 1.059799819259012e-05, "loss": 4.0521, "step": 11511 }, { "epoch": 3.834846339635213, "grad_norm": 0.71875, "learning_rate": 1.0597261018191538e-05, "loss": 3.9492, "step": 11512 }, { "epoch": 3.8351794786374613, "grad_norm": 0.6796875, "learning_rate": 1.0596523807716572e-05, "loss": 4.022, "step": 11513 }, { "epoch": 3.83551261763971, "grad_norm": 0.6328125, "learning_rate": 1.0595786561173813e-05, "loss": 4.012, "step": 11514 }, { "epoch": 3.835845756641959, "grad_norm": 0.70703125, "learning_rate": 1.0595049278571843e-05, "loss": 4.0972, "step": 11515 }, { "epoch": 3.8361788956442076, "grad_norm": 0.6796875, "learning_rate": 1.0594311959919255e-05, "loss": 4.0337, "step": 11516 }, { "epoch": 3.8365120346464563, "grad_norm": 0.6640625, "learning_rate": 1.0593574605224635e-05, "loss": 3.9903, "step": 11517 }, { "epoch": 3.836845173648705, "grad_norm": 0.67578125, "learning_rate": 1.0592837214496572e-05, "loss": 4.0209, "step": 11518 }, { "epoch": 3.8371783126509538, "grad_norm": 0.6484375, "learning_rate": 1.0592099787743653e-05, "loss": 4.0619, "step": 11519 }, { "epoch": 3.837511451653202, "grad_norm": 0.6875, "learning_rate": 1.0591362324974469e-05, "loss": 4.1092, "step": 11520 }, { "epoch": 3.8378445906554512, "grad_norm": 0.66015625, "learning_rate": 1.0590624826197611e-05, "loss": 4.0154, "step": 11521 }, { "epoch": 3.8381777296576995, "grad_norm": 0.70703125, "learning_rate": 1.0589887291421666e-05, "loss": 3.9552, "step": 11522 }, { "epoch": 3.8385108686599483, "grad_norm": 0.69140625, "learning_rate": 1.0589149720655229e-05, "loss": 4.1032, "step": 11523 }, { "epoch": 3.838844007662197, "grad_norm": 0.73828125, "learning_rate": 1.0588412113906886e-05, "loss": 4.0557, "step": 11524 }, { "epoch": 3.8391771466644458, "grad_norm": 0.6796875, "learning_rate": 1.0587674471185236e-05, "loss": 3.9625, "step": 11525 }, { "epoch": 3.8395102856666945, "grad_norm": 0.671875, "learning_rate": 1.0586936792498862e-05, "loss": 3.9006, "step": 11526 }, { "epoch": 3.8398434246689432, "grad_norm": 0.68359375, "learning_rate": 1.0586199077856362e-05, "loss": 3.9621, "step": 11527 }, { "epoch": 3.840176563671192, "grad_norm": 0.66015625, "learning_rate": 1.0585461327266328e-05, "loss": 4.0282, "step": 11528 }, { "epoch": 3.8405097026734403, "grad_norm": 0.71875, "learning_rate": 1.0584723540737355e-05, "loss": 4.0561, "step": 11529 }, { "epoch": 3.8408428416756895, "grad_norm": 0.65625, "learning_rate": 1.0583985718278028e-05, "loss": 4.0495, "step": 11530 }, { "epoch": 3.8411759806779378, "grad_norm": 0.69921875, "learning_rate": 1.0583247859896953e-05, "loss": 4.0195, "step": 11531 }, { "epoch": 3.8415091196801865, "grad_norm": 0.72265625, "learning_rate": 1.0582509965602715e-05, "loss": 4.0211, "step": 11532 }, { "epoch": 3.8418422586824352, "grad_norm": 0.69921875, "learning_rate": 1.0581772035403913e-05, "loss": 4.0597, "step": 11533 }, { "epoch": 3.842175397684684, "grad_norm": 0.703125, "learning_rate": 1.0581034069309144e-05, "loss": 4.0385, "step": 11534 }, { "epoch": 3.8425085366869327, "grad_norm": 0.70703125, "learning_rate": 1.0580296067327e-05, "loss": 3.9071, "step": 11535 }, { "epoch": 3.8428416756891814, "grad_norm": 0.703125, "learning_rate": 1.057955802946608e-05, "loss": 3.9908, "step": 11536 }, { "epoch": 3.84317481469143, "grad_norm": 0.6875, "learning_rate": 1.057881995573498e-05, "loss": 4.0285, "step": 11537 }, { "epoch": 3.8435079536936785, "grad_norm": 0.65625, "learning_rate": 1.0578081846142295e-05, "loss": 3.9972, "step": 11538 }, { "epoch": 3.843841092695927, "grad_norm": 0.6875, "learning_rate": 1.0577343700696622e-05, "loss": 4.0345, "step": 11539 }, { "epoch": 3.844174231698176, "grad_norm": 0.6640625, "learning_rate": 1.0576605519406563e-05, "loss": 4.0032, "step": 11540 }, { "epoch": 3.8445073707004247, "grad_norm": 0.6796875, "learning_rate": 1.0575867302280712e-05, "loss": 3.9576, "step": 11541 }, { "epoch": 3.8448405097026734, "grad_norm": 0.6640625, "learning_rate": 1.0575129049327671e-05, "loss": 3.9548, "step": 11542 }, { "epoch": 3.845173648704922, "grad_norm": 0.6953125, "learning_rate": 1.0574390760556036e-05, "loss": 4.0095, "step": 11543 }, { "epoch": 3.845506787707171, "grad_norm": 0.64453125, "learning_rate": 1.0573652435974406e-05, "loss": 3.9241, "step": 11544 }, { "epoch": 3.8458399267094197, "grad_norm": 0.6875, "learning_rate": 1.0572914075591385e-05, "loss": 4.0392, "step": 11545 }, { "epoch": 3.8461730657116684, "grad_norm": 0.69140625, "learning_rate": 1.057217567941557e-05, "loss": 4.0226, "step": 11546 }, { "epoch": 3.8465062047139167, "grad_norm": 0.66796875, "learning_rate": 1.0571437247455564e-05, "loss": 3.9836, "step": 11547 }, { "epoch": 3.8468393437161654, "grad_norm": 0.69140625, "learning_rate": 1.0570698779719965e-05, "loss": 3.9733, "step": 11548 }, { "epoch": 3.847172482718414, "grad_norm": 0.6640625, "learning_rate": 1.0569960276217379e-05, "loss": 4.061, "step": 11549 }, { "epoch": 3.847505621720663, "grad_norm": 0.6796875, "learning_rate": 1.0569221736956402e-05, "loss": 4.0242, "step": 11550 }, { "epoch": 3.8478387607229116, "grad_norm": 0.66015625, "learning_rate": 1.0568483161945643e-05, "loss": 3.9801, "step": 11551 }, { "epoch": 3.8481718997251604, "grad_norm": 0.70703125, "learning_rate": 1.0567744551193698e-05, "loss": 4.012, "step": 11552 }, { "epoch": 3.848505038727409, "grad_norm": 0.71875, "learning_rate": 1.0567005904709175e-05, "loss": 4.0605, "step": 11553 }, { "epoch": 3.848838177729658, "grad_norm": 0.703125, "learning_rate": 1.0566267222500677e-05, "loss": 4.0769, "step": 11554 }, { "epoch": 3.8491713167319066, "grad_norm": 0.66796875, "learning_rate": 1.0565528504576807e-05, "loss": 3.9601, "step": 11555 }, { "epoch": 3.849504455734155, "grad_norm": 0.70703125, "learning_rate": 1.0564789750946171e-05, "loss": 4.0065, "step": 11556 }, { "epoch": 3.8498375947364036, "grad_norm": 0.67578125, "learning_rate": 1.056405096161737e-05, "loss": 4.0827, "step": 11557 }, { "epoch": 3.8501707337386524, "grad_norm": 0.68359375, "learning_rate": 1.0563312136599013e-05, "loss": 4.0751, "step": 11558 }, { "epoch": 3.850503872740901, "grad_norm": 0.66015625, "learning_rate": 1.0562573275899706e-05, "loss": 4.0744, "step": 11559 }, { "epoch": 3.85083701174315, "grad_norm": 0.69140625, "learning_rate": 1.0561834379528056e-05, "loss": 4.0328, "step": 11560 }, { "epoch": 3.8511701507453986, "grad_norm": 0.7109375, "learning_rate": 1.0561095447492662e-05, "loss": 4.0315, "step": 11561 }, { "epoch": 3.8515032897476473, "grad_norm": 0.66796875, "learning_rate": 1.0560356479802141e-05, "loss": 4.0038, "step": 11562 }, { "epoch": 3.8518364287498956, "grad_norm": 0.65625, "learning_rate": 1.0559617476465092e-05, "loss": 3.9674, "step": 11563 }, { "epoch": 3.852169567752145, "grad_norm": 0.7265625, "learning_rate": 1.0558878437490128e-05, "loss": 4.0484, "step": 11564 }, { "epoch": 3.852502706754393, "grad_norm": 0.71875, "learning_rate": 1.0558139362885858e-05, "loss": 4.0131, "step": 11565 }, { "epoch": 3.852835845756642, "grad_norm": 0.7265625, "learning_rate": 1.0557400252660885e-05, "loss": 4.0115, "step": 11566 }, { "epoch": 3.8531689847588906, "grad_norm": 0.7265625, "learning_rate": 1.0556661106823821e-05, "loss": 3.9816, "step": 11567 }, { "epoch": 3.8535021237611393, "grad_norm": 0.70703125, "learning_rate": 1.055592192538328e-05, "loss": 3.9465, "step": 11568 }, { "epoch": 3.853835262763388, "grad_norm": 0.69921875, "learning_rate": 1.0555182708347864e-05, "loss": 4.0372, "step": 11569 }, { "epoch": 3.854168401765637, "grad_norm": 0.71484375, "learning_rate": 1.0554443455726188e-05, "loss": 4.0732, "step": 11570 }, { "epoch": 3.8545015407678855, "grad_norm": 0.6796875, "learning_rate": 1.0553704167526862e-05, "loss": 4.0228, "step": 11571 }, { "epoch": 3.854834679770134, "grad_norm": 0.7421875, "learning_rate": 1.0552964843758496e-05, "loss": 4.0096, "step": 11572 }, { "epoch": 3.855167818772383, "grad_norm": 0.72265625, "learning_rate": 1.0552225484429705e-05, "loss": 3.9909, "step": 11573 }, { "epoch": 3.8555009577746313, "grad_norm": 0.7265625, "learning_rate": 1.0551486089549096e-05, "loss": 3.9926, "step": 11574 }, { "epoch": 3.85583409677688, "grad_norm": 0.69921875, "learning_rate": 1.0550746659125284e-05, "loss": 3.95, "step": 11575 }, { "epoch": 3.856167235779129, "grad_norm": 0.6953125, "learning_rate": 1.055000719316688e-05, "loss": 4.0136, "step": 11576 }, { "epoch": 3.8565003747813775, "grad_norm": 0.703125, "learning_rate": 1.0549267691682499e-05, "loss": 4.0286, "step": 11577 }, { "epoch": 3.8568335137836263, "grad_norm": 0.69140625, "learning_rate": 1.0548528154680756e-05, "loss": 4.025, "step": 11578 }, { "epoch": 3.857166652785875, "grad_norm": 0.67578125, "learning_rate": 1.0547788582170259e-05, "loss": 4.0189, "step": 11579 }, { "epoch": 3.8574997917881237, "grad_norm": 0.71484375, "learning_rate": 1.054704897415963e-05, "loss": 3.9075, "step": 11580 }, { "epoch": 3.857832930790372, "grad_norm": 0.72265625, "learning_rate": 1.054630933065748e-05, "loss": 4.0523, "step": 11581 }, { "epoch": 3.8581660697926212, "grad_norm": 0.6640625, "learning_rate": 1.0545569651672426e-05, "loss": 4.0756, "step": 11582 }, { "epoch": 3.8584992087948695, "grad_norm": 0.68359375, "learning_rate": 1.0544829937213078e-05, "loss": 4.0158, "step": 11583 }, { "epoch": 3.8588323477971183, "grad_norm": 0.66015625, "learning_rate": 1.0544090187288059e-05, "loss": 4.0847, "step": 11584 }, { "epoch": 3.859165486799367, "grad_norm": 0.69921875, "learning_rate": 1.0543350401905983e-05, "loss": 3.9664, "step": 11585 }, { "epoch": 3.8594986258016157, "grad_norm": 0.72265625, "learning_rate": 1.0542610581075466e-05, "loss": 3.9257, "step": 11586 }, { "epoch": 3.8598317648038645, "grad_norm": 0.70703125, "learning_rate": 1.0541870724805127e-05, "loss": 3.9952, "step": 11587 }, { "epoch": 3.860164903806113, "grad_norm": 0.7109375, "learning_rate": 1.054113083310358e-05, "loss": 4.0498, "step": 11588 }, { "epoch": 3.860498042808362, "grad_norm": 0.703125, "learning_rate": 1.054039090597945e-05, "loss": 4.061, "step": 11589 }, { "epoch": 3.8608311818106102, "grad_norm": 0.70703125, "learning_rate": 1.0539650943441348e-05, "loss": 3.9315, "step": 11590 }, { "epoch": 3.8611643208128594, "grad_norm": 0.65625, "learning_rate": 1.0538910945497899e-05, "loss": 4.0911, "step": 11591 }, { "epoch": 3.8614974598151077, "grad_norm": 0.7109375, "learning_rate": 1.0538170912157717e-05, "loss": 3.9545, "step": 11592 }, { "epoch": 3.8618305988173565, "grad_norm": 0.7265625, "learning_rate": 1.0537430843429427e-05, "loss": 3.9501, "step": 11593 }, { "epoch": 3.862163737819605, "grad_norm": 0.671875, "learning_rate": 1.0536690739321644e-05, "loss": 4.0594, "step": 11594 }, { "epoch": 3.862496876821854, "grad_norm": 0.71484375, "learning_rate": 1.0535950599842996e-05, "loss": 3.9338, "step": 11595 }, { "epoch": 3.8628300158241027, "grad_norm": 0.7109375, "learning_rate": 1.0535210425002096e-05, "loss": 4.0027, "step": 11596 }, { "epoch": 3.8631631548263514, "grad_norm": 0.6875, "learning_rate": 1.0534470214807569e-05, "loss": 4.0583, "step": 11597 }, { "epoch": 3.8634962938286, "grad_norm": 0.6796875, "learning_rate": 1.0533729969268034e-05, "loss": 4.0888, "step": 11598 }, { "epoch": 3.8638294328308485, "grad_norm": 0.65234375, "learning_rate": 1.0532989688392122e-05, "loss": 3.9815, "step": 11599 }, { "epoch": 3.8641625718330976, "grad_norm": 0.6953125, "learning_rate": 1.0532249372188445e-05, "loss": 3.981, "step": 11600 }, { "epoch": 3.864495710835346, "grad_norm": 0.6875, "learning_rate": 1.053150902066563e-05, "loss": 4.0276, "step": 11601 }, { "epoch": 3.8648288498375947, "grad_norm": 0.71484375, "learning_rate": 1.0530768633832305e-05, "loss": 3.9729, "step": 11602 }, { "epoch": 3.8651619888398434, "grad_norm": 0.6953125, "learning_rate": 1.0530028211697089e-05, "loss": 4.0074, "step": 11603 }, { "epoch": 3.865495127842092, "grad_norm": 0.6875, "learning_rate": 1.0529287754268606e-05, "loss": 4.0287, "step": 11604 }, { "epoch": 3.865828266844341, "grad_norm": 0.66796875, "learning_rate": 1.0528547261555483e-05, "loss": 3.9446, "step": 11605 }, { "epoch": 3.8661614058465896, "grad_norm": 0.671875, "learning_rate": 1.0527806733566342e-05, "loss": 4.0034, "step": 11606 }, { "epoch": 3.8664945448488384, "grad_norm": 0.73046875, "learning_rate": 1.0527066170309813e-05, "loss": 3.962, "step": 11607 }, { "epoch": 3.8668276838510867, "grad_norm": 0.69140625, "learning_rate": 1.052632557179452e-05, "loss": 4.0245, "step": 11608 }, { "epoch": 3.8671608228533354, "grad_norm": 0.6953125, "learning_rate": 1.0525584938029088e-05, "loss": 3.9831, "step": 11609 }, { "epoch": 3.867493961855584, "grad_norm": 0.72265625, "learning_rate": 1.0524844269022145e-05, "loss": 3.9881, "step": 11610 }, { "epoch": 3.867827100857833, "grad_norm": 0.69140625, "learning_rate": 1.0524103564782321e-05, "loss": 3.9919, "step": 11611 }, { "epoch": 3.8681602398600816, "grad_norm": 0.65625, "learning_rate": 1.0523362825318235e-05, "loss": 4.0903, "step": 11612 }, { "epoch": 3.8684933788623304, "grad_norm": 0.67578125, "learning_rate": 1.0522622050638526e-05, "loss": 4.0111, "step": 11613 }, { "epoch": 3.868826517864579, "grad_norm": 0.7109375, "learning_rate": 1.0521881240751815e-05, "loss": 3.974, "step": 11614 }, { "epoch": 3.869159656866828, "grad_norm": 0.6796875, "learning_rate": 1.0521140395666733e-05, "loss": 4.05, "step": 11615 }, { "epoch": 3.8694927958690766, "grad_norm": 0.69140625, "learning_rate": 1.0520399515391907e-05, "loss": 4.0113, "step": 11616 }, { "epoch": 3.869825934871325, "grad_norm": 0.69921875, "learning_rate": 1.0519658599935971e-05, "loss": 3.9564, "step": 11617 }, { "epoch": 3.8701590738735736, "grad_norm": 0.70703125, "learning_rate": 1.0518917649307554e-05, "loss": 4.0063, "step": 11618 }, { "epoch": 3.8704922128758223, "grad_norm": 0.70703125, "learning_rate": 1.0518176663515285e-05, "loss": 3.9884, "step": 11619 }, { "epoch": 3.870825351878071, "grad_norm": 0.7421875, "learning_rate": 1.0517435642567792e-05, "loss": 4.0115, "step": 11620 }, { "epoch": 3.87115849088032, "grad_norm": 0.703125, "learning_rate": 1.0516694586473713e-05, "loss": 3.9886, "step": 11621 }, { "epoch": 3.8714916298825686, "grad_norm": 0.65234375, "learning_rate": 1.0515953495241676e-05, "loss": 4.0131, "step": 11622 }, { "epoch": 3.8718247688848173, "grad_norm": 0.7109375, "learning_rate": 1.0515212368880311e-05, "loss": 4.0211, "step": 11623 }, { "epoch": 3.872157907887066, "grad_norm": 0.68359375, "learning_rate": 1.0514471207398253e-05, "loss": 4.0894, "step": 11624 }, { "epoch": 3.872491046889315, "grad_norm": 0.6640625, "learning_rate": 1.0513730010804137e-05, "loss": 3.9929, "step": 11625 }, { "epoch": 3.872824185891563, "grad_norm": 0.734375, "learning_rate": 1.0512988779106592e-05, "loss": 3.9736, "step": 11626 }, { "epoch": 3.873157324893812, "grad_norm": 0.72265625, "learning_rate": 1.0512247512314255e-05, "loss": 3.9972, "step": 11627 }, { "epoch": 3.8734904638960606, "grad_norm": 0.671875, "learning_rate": 1.051150621043576e-05, "loss": 3.9654, "step": 11628 }, { "epoch": 3.8738236028983093, "grad_norm": 0.69140625, "learning_rate": 1.0510764873479738e-05, "loss": 3.9585, "step": 11629 }, { "epoch": 3.874156741900558, "grad_norm": 0.69140625, "learning_rate": 1.0510023501454829e-05, "loss": 4.05, "step": 11630 }, { "epoch": 3.8744898809028068, "grad_norm": 0.6796875, "learning_rate": 1.0509282094369665e-05, "loss": 4.0253, "step": 11631 }, { "epoch": 3.8748230199050555, "grad_norm": 0.7265625, "learning_rate": 1.0508540652232881e-05, "loss": 3.9693, "step": 11632 }, { "epoch": 3.875156158907304, "grad_norm": 0.67578125, "learning_rate": 1.0507799175053116e-05, "loss": 3.9612, "step": 11633 }, { "epoch": 3.875489297909553, "grad_norm": 0.74609375, "learning_rate": 1.0507057662839006e-05, "loss": 4.0045, "step": 11634 }, { "epoch": 3.8758224369118013, "grad_norm": 0.671875, "learning_rate": 1.050631611559919e-05, "loss": 3.9762, "step": 11635 }, { "epoch": 3.87615557591405, "grad_norm": 0.70703125, "learning_rate": 1.0505574533342298e-05, "loss": 3.9866, "step": 11636 }, { "epoch": 3.8764887149162988, "grad_norm": 0.6796875, "learning_rate": 1.0504832916076977e-05, "loss": 4.0325, "step": 11637 }, { "epoch": 3.8768218539185475, "grad_norm": 0.69921875, "learning_rate": 1.0504091263811857e-05, "loss": 3.996, "step": 11638 }, { "epoch": 3.8771549929207962, "grad_norm": 0.72265625, "learning_rate": 1.0503349576555583e-05, "loss": 4.0677, "step": 11639 }, { "epoch": 3.877488131923045, "grad_norm": 0.69921875, "learning_rate": 1.0502607854316793e-05, "loss": 4.0288, "step": 11640 }, { "epoch": 3.8778212709252937, "grad_norm": 0.69140625, "learning_rate": 1.0501866097104125e-05, "loss": 4.0431, "step": 11641 }, { "epoch": 3.878154409927542, "grad_norm": 0.71875, "learning_rate": 1.0501124304926215e-05, "loss": 4.0034, "step": 11642 }, { "epoch": 3.878487548929791, "grad_norm": 0.67578125, "learning_rate": 1.0500382477791712e-05, "loss": 4.0738, "step": 11643 }, { "epoch": 3.8788206879320395, "grad_norm": 0.734375, "learning_rate": 1.0499640615709249e-05, "loss": 4.0556, "step": 11644 }, { "epoch": 3.8791538269342882, "grad_norm": 0.69921875, "learning_rate": 1.0498898718687469e-05, "loss": 3.9893, "step": 11645 }, { "epoch": 3.879486965936537, "grad_norm": 0.6796875, "learning_rate": 1.0498156786735018e-05, "loss": 3.9337, "step": 11646 }, { "epoch": 3.8798201049387857, "grad_norm": 0.66015625, "learning_rate": 1.049741481986053e-05, "loss": 4.0632, "step": 11647 }, { "epoch": 3.8801532439410344, "grad_norm": 0.6796875, "learning_rate": 1.0496672818072656e-05, "loss": 3.9579, "step": 11648 }, { "epoch": 3.880486382943283, "grad_norm": 0.68359375, "learning_rate": 1.0495930781380034e-05, "loss": 4.0267, "step": 11649 }, { "epoch": 3.880819521945532, "grad_norm": 0.6796875, "learning_rate": 1.0495188709791307e-05, "loss": 4.0081, "step": 11650 }, { "epoch": 3.88115266094778, "grad_norm": 0.69140625, "learning_rate": 1.0494446603315117e-05, "loss": 4.0692, "step": 11651 }, { "epoch": 3.8814857999500294, "grad_norm": 0.6796875, "learning_rate": 1.0493704461960113e-05, "loss": 4.0134, "step": 11652 }, { "epoch": 3.8818189389522777, "grad_norm": 0.69921875, "learning_rate": 1.0492962285734932e-05, "loss": 3.9918, "step": 11653 }, { "epoch": 3.8821520779545264, "grad_norm": 0.72265625, "learning_rate": 1.0492220074648228e-05, "loss": 4.0033, "step": 11654 }, { "epoch": 3.882485216956775, "grad_norm": 0.69921875, "learning_rate": 1.0491477828708641e-05, "loss": 4.0601, "step": 11655 }, { "epoch": 3.882818355959024, "grad_norm": 0.7421875, "learning_rate": 1.0490735547924812e-05, "loss": 4.0033, "step": 11656 }, { "epoch": 3.8831514949612727, "grad_norm": 0.66796875, "learning_rate": 1.0489993232305396e-05, "loss": 3.9885, "step": 11657 }, { "epoch": 3.8834846339635214, "grad_norm": 0.6953125, "learning_rate": 1.0489250881859035e-05, "loss": 4.056, "step": 11658 }, { "epoch": 3.88381777296577, "grad_norm": 0.69921875, "learning_rate": 1.0488508496594375e-05, "loss": 3.9415, "step": 11659 }, { "epoch": 3.8841509119680184, "grad_norm": 0.703125, "learning_rate": 1.0487766076520064e-05, "loss": 3.9857, "step": 11660 }, { "epoch": 3.8844840509702676, "grad_norm": 0.6875, "learning_rate": 1.0487023621644752e-05, "loss": 4.0677, "step": 11661 }, { "epoch": 3.884817189972516, "grad_norm": 0.70703125, "learning_rate": 1.048628113197708e-05, "loss": 3.9973, "step": 11662 }, { "epoch": 3.8851503289747646, "grad_norm": 0.703125, "learning_rate": 1.0485538607525707e-05, "loss": 4.0741, "step": 11663 }, { "epoch": 3.8854834679770134, "grad_norm": 0.73046875, "learning_rate": 1.048479604829927e-05, "loss": 3.9955, "step": 11664 }, { "epoch": 3.885816606979262, "grad_norm": 0.73046875, "learning_rate": 1.0484053454306428e-05, "loss": 3.9532, "step": 11665 }, { "epoch": 3.886149745981511, "grad_norm": 0.6640625, "learning_rate": 1.0483310825555823e-05, "loss": 4.0259, "step": 11666 }, { "epoch": 3.8864828849837596, "grad_norm": 0.703125, "learning_rate": 1.0482568162056113e-05, "loss": 3.9942, "step": 11667 }, { "epoch": 3.8868160239860083, "grad_norm": 0.70703125, "learning_rate": 1.0481825463815943e-05, "loss": 4.0091, "step": 11668 }, { "epoch": 3.8871491629882566, "grad_norm": 0.72265625, "learning_rate": 1.0481082730843962e-05, "loss": 4.0585, "step": 11669 }, { "epoch": 3.8874823019905054, "grad_norm": 0.6953125, "learning_rate": 1.0480339963148827e-05, "loss": 4.0177, "step": 11670 }, { "epoch": 3.887815440992754, "grad_norm": 0.72265625, "learning_rate": 1.0479597160739186e-05, "loss": 3.9733, "step": 11671 }, { "epoch": 3.888148579995003, "grad_norm": 0.7109375, "learning_rate": 1.0478854323623693e-05, "loss": 4.053, "step": 11672 }, { "epoch": 3.8884817189972516, "grad_norm": 0.6796875, "learning_rate": 1.0478111451810998e-05, "loss": 3.9691, "step": 11673 }, { "epoch": 3.8888148579995003, "grad_norm": 0.69140625, "learning_rate": 1.0477368545309755e-05, "loss": 4.0998, "step": 11674 }, { "epoch": 3.889147997001749, "grad_norm": 0.6875, "learning_rate": 1.0476625604128618e-05, "loss": 3.9583, "step": 11675 }, { "epoch": 3.889481136003998, "grad_norm": 0.66796875, "learning_rate": 1.0475882628276241e-05, "loss": 4.004, "step": 11676 }, { "epoch": 3.8898142750062465, "grad_norm": 0.68359375, "learning_rate": 1.0475139617761277e-05, "loss": 4.0294, "step": 11677 }, { "epoch": 3.890147414008495, "grad_norm": 0.72265625, "learning_rate": 1.0474396572592378e-05, "loss": 3.9565, "step": 11678 }, { "epoch": 3.8904805530107436, "grad_norm": 0.67578125, "learning_rate": 1.0473653492778203e-05, "loss": 4.0111, "step": 11679 }, { "epoch": 3.8908136920129923, "grad_norm": 0.66796875, "learning_rate": 1.0472910378327406e-05, "loss": 4.0393, "step": 11680 }, { "epoch": 3.891146831015241, "grad_norm": 0.71875, "learning_rate": 1.0472167229248645e-05, "loss": 4.0276, "step": 11681 }, { "epoch": 3.89147997001749, "grad_norm": 0.66796875, "learning_rate": 1.0471424045550569e-05, "loss": 4.0374, "step": 11682 }, { "epoch": 3.8918131090197385, "grad_norm": 0.68359375, "learning_rate": 1.0470680827241841e-05, "loss": 4.0259, "step": 11683 }, { "epoch": 3.8921462480219873, "grad_norm": 0.6796875, "learning_rate": 1.0469937574331116e-05, "loss": 3.998, "step": 11684 }, { "epoch": 3.892479387024236, "grad_norm": 0.69140625, "learning_rate": 1.0469194286827053e-05, "loss": 3.932, "step": 11685 }, { "epoch": 3.8928125260264848, "grad_norm": 0.6875, "learning_rate": 1.0468450964738304e-05, "loss": 3.9686, "step": 11686 }, { "epoch": 3.893145665028733, "grad_norm": 0.69140625, "learning_rate": 1.0467707608073532e-05, "loss": 3.9984, "step": 11687 }, { "epoch": 3.893478804030982, "grad_norm": 0.71875, "learning_rate": 1.0466964216841395e-05, "loss": 3.9997, "step": 11688 }, { "epoch": 3.8938119430332305, "grad_norm": 0.72265625, "learning_rate": 1.0466220791050552e-05, "loss": 4.014, "step": 11689 }, { "epoch": 3.8941450820354793, "grad_norm": 0.69921875, "learning_rate": 1.0465477330709663e-05, "loss": 3.9812, "step": 11690 }, { "epoch": 3.894478221037728, "grad_norm": 0.67578125, "learning_rate": 1.0464733835827381e-05, "loss": 4.0827, "step": 11691 }, { "epoch": 3.8948113600399767, "grad_norm": 0.6640625, "learning_rate": 1.0463990306412376e-05, "loss": 4.0189, "step": 11692 }, { "epoch": 3.8951444990422255, "grad_norm": 0.68359375, "learning_rate": 1.04632467424733e-05, "loss": 4.0665, "step": 11693 }, { "epoch": 3.8954776380444742, "grad_norm": 0.6875, "learning_rate": 1.0462503144018823e-05, "loss": 4.0325, "step": 11694 }, { "epoch": 3.895810777046723, "grad_norm": 0.69140625, "learning_rate": 1.0461759511057598e-05, "loss": 3.9708, "step": 11695 }, { "epoch": 3.8961439160489713, "grad_norm": 0.703125, "learning_rate": 1.0461015843598291e-05, "loss": 3.9405, "step": 11696 }, { "epoch": 3.89647705505122, "grad_norm": 0.6953125, "learning_rate": 1.0460272141649561e-05, "loss": 4.0462, "step": 11697 }, { "epoch": 3.8968101940534687, "grad_norm": 0.734375, "learning_rate": 1.0459528405220074e-05, "loss": 3.9336, "step": 11698 }, { "epoch": 3.8971433330557175, "grad_norm": 0.66796875, "learning_rate": 1.0458784634318492e-05, "loss": 4.0276, "step": 11699 }, { "epoch": 3.897476472057966, "grad_norm": 0.66015625, "learning_rate": 1.0458040828953476e-05, "loss": 3.9555, "step": 11700 }, { "epoch": 3.897809611060215, "grad_norm": 0.71484375, "learning_rate": 1.0457296989133693e-05, "loss": 3.9452, "step": 11701 }, { "epoch": 3.8981427500624637, "grad_norm": 0.703125, "learning_rate": 1.0456553114867806e-05, "loss": 3.9751, "step": 11702 }, { "epoch": 3.898475889064712, "grad_norm": 0.65234375, "learning_rate": 1.0455809206164479e-05, "loss": 3.9899, "step": 11703 }, { "epoch": 3.898809028066961, "grad_norm": 0.7109375, "learning_rate": 1.0455065263032375e-05, "loss": 4.0073, "step": 11704 }, { "epoch": 3.8991421670692095, "grad_norm": 0.67578125, "learning_rate": 1.0454321285480161e-05, "loss": 4.0098, "step": 11705 }, { "epoch": 3.899475306071458, "grad_norm": 0.6640625, "learning_rate": 1.0453577273516505e-05, "loss": 3.9774, "step": 11706 }, { "epoch": 3.899808445073707, "grad_norm": 0.6640625, "learning_rate": 1.0452833227150074e-05, "loss": 3.9709, "step": 11707 }, { "epoch": 3.9001415840759557, "grad_norm": 0.66796875, "learning_rate": 1.0452089146389527e-05, "loss": 3.986, "step": 11708 }, { "epoch": 3.9004747230782044, "grad_norm": 0.69921875, "learning_rate": 1.0451345031243538e-05, "loss": 4.0142, "step": 11709 }, { "epoch": 3.900807862080453, "grad_norm": 0.71484375, "learning_rate": 1.0450600881720769e-05, "loss": 3.9902, "step": 11710 }, { "epoch": 3.901141001082702, "grad_norm": 0.71484375, "learning_rate": 1.0449856697829894e-05, "loss": 3.9929, "step": 11711 }, { "epoch": 3.90147414008495, "grad_norm": 0.69921875, "learning_rate": 1.0449112479579576e-05, "loss": 4.039, "step": 11712 }, { "epoch": 3.9018072790871994, "grad_norm": 0.6953125, "learning_rate": 1.0448368226978486e-05, "loss": 4.0583, "step": 11713 }, { "epoch": 3.9021404180894477, "grad_norm": 0.71875, "learning_rate": 1.0447623940035294e-05, "loss": 3.9987, "step": 11714 }, { "epoch": 3.9024735570916964, "grad_norm": 0.65625, "learning_rate": 1.0446879618758663e-05, "loss": 3.9645, "step": 11715 }, { "epoch": 3.902806696093945, "grad_norm": 0.671875, "learning_rate": 1.0446135263157274e-05, "loss": 4.0121, "step": 11716 }, { "epoch": 3.903139835096194, "grad_norm": 0.7421875, "learning_rate": 1.0445390873239783e-05, "loss": 4.0416, "step": 11717 }, { "epoch": 3.9034729740984426, "grad_norm": 0.70703125, "learning_rate": 1.0444646449014875e-05, "loss": 3.9953, "step": 11718 }, { "epoch": 3.9038061131006914, "grad_norm": 0.71484375, "learning_rate": 1.0443901990491209e-05, "loss": 3.9927, "step": 11719 }, { "epoch": 3.90413925210294, "grad_norm": 0.70703125, "learning_rate": 1.0443157497677463e-05, "loss": 3.9841, "step": 11720 }, { "epoch": 3.9044723911051884, "grad_norm": 0.72265625, "learning_rate": 1.044241297058231e-05, "loss": 3.956, "step": 11721 }, { "epoch": 3.9048055301074376, "grad_norm": 0.6796875, "learning_rate": 1.0441668409214413e-05, "loss": 3.9646, "step": 11722 }, { "epoch": 3.905138669109686, "grad_norm": 0.7265625, "learning_rate": 1.0440923813582456e-05, "loss": 3.9967, "step": 11723 }, { "epoch": 3.9054718081119346, "grad_norm": 0.67578125, "learning_rate": 1.0440179183695103e-05, "loss": 3.9307, "step": 11724 }, { "epoch": 3.9058049471141834, "grad_norm": 0.72265625, "learning_rate": 1.0439434519561035e-05, "loss": 3.9783, "step": 11725 }, { "epoch": 3.906138086116432, "grad_norm": 0.6953125, "learning_rate": 1.0438689821188917e-05, "loss": 4.0089, "step": 11726 }, { "epoch": 3.906471225118681, "grad_norm": 0.70703125, "learning_rate": 1.043794508858743e-05, "loss": 4.002, "step": 11727 }, { "epoch": 3.9068043641209296, "grad_norm": 0.6953125, "learning_rate": 1.0437200321765246e-05, "loss": 4.0506, "step": 11728 }, { "epoch": 3.9071375031231783, "grad_norm": 0.70703125, "learning_rate": 1.0436455520731042e-05, "loss": 3.9598, "step": 11729 }, { "epoch": 3.9074706421254266, "grad_norm": 0.69140625, "learning_rate": 1.043571068549349e-05, "loss": 4.0879, "step": 11730 }, { "epoch": 3.907803781127676, "grad_norm": 0.67578125, "learning_rate": 1.0434965816061267e-05, "loss": 4.042, "step": 11731 }, { "epoch": 3.908136920129924, "grad_norm": 0.6875, "learning_rate": 1.043422091244305e-05, "loss": 4.0631, "step": 11732 }, { "epoch": 3.908470059132173, "grad_norm": 0.6796875, "learning_rate": 1.0433475974647515e-05, "loss": 3.9932, "step": 11733 }, { "epoch": 3.9088031981344216, "grad_norm": 0.75, "learning_rate": 1.0432731002683338e-05, "loss": 4.0053, "step": 11734 }, { "epoch": 3.9091363371366703, "grad_norm": 0.69921875, "learning_rate": 1.0431985996559197e-05, "loss": 4.0252, "step": 11735 }, { "epoch": 3.909469476138919, "grad_norm": 0.65625, "learning_rate": 1.0431240956283769e-05, "loss": 3.9991, "step": 11736 }, { "epoch": 3.909802615141168, "grad_norm": 0.71484375, "learning_rate": 1.0430495881865733e-05, "loss": 4.0193, "step": 11737 }, { "epoch": 3.9101357541434165, "grad_norm": 0.66015625, "learning_rate": 1.042975077331377e-05, "loss": 3.9956, "step": 11738 }, { "epoch": 3.910468893145665, "grad_norm": 0.66796875, "learning_rate": 1.0429005630636556e-05, "loss": 4.0349, "step": 11739 }, { "epoch": 3.9108020321479136, "grad_norm": 0.671875, "learning_rate": 1.0428260453842771e-05, "loss": 3.9845, "step": 11740 }, { "epoch": 3.9111351711501623, "grad_norm": 0.6796875, "learning_rate": 1.042751524294109e-05, "loss": 4.0029, "step": 11741 }, { "epoch": 3.911468310152411, "grad_norm": 0.6640625, "learning_rate": 1.0426769997940202e-05, "loss": 4.0077, "step": 11742 }, { "epoch": 3.9118014491546598, "grad_norm": 0.703125, "learning_rate": 1.042602471884878e-05, "loss": 4.0499, "step": 11743 }, { "epoch": 3.9121345881569085, "grad_norm": 0.66796875, "learning_rate": 1.042527940567551e-05, "loss": 4.0622, "step": 11744 }, { "epoch": 3.9124677271591572, "grad_norm": 0.73046875, "learning_rate": 1.042453405842907e-05, "loss": 4.0164, "step": 11745 }, { "epoch": 3.912800866161406, "grad_norm": 0.72265625, "learning_rate": 1.0423788677118142e-05, "loss": 4.0136, "step": 11746 }, { "epoch": 3.9131340051636547, "grad_norm": 0.67578125, "learning_rate": 1.042304326175141e-05, "loss": 3.9593, "step": 11747 }, { "epoch": 3.913467144165903, "grad_norm": 0.6875, "learning_rate": 1.0422297812337556e-05, "loss": 4.004, "step": 11748 }, { "epoch": 3.9138002831681518, "grad_norm": 0.703125, "learning_rate": 1.042155232888526e-05, "loss": 4.0512, "step": 11749 }, { "epoch": 3.9141334221704005, "grad_norm": 0.69140625, "learning_rate": 1.0420806811403208e-05, "loss": 4.0167, "step": 11750 }, { "epoch": 3.9144665611726492, "grad_norm": 0.6875, "learning_rate": 1.0420061259900085e-05, "loss": 4.1091, "step": 11751 }, { "epoch": 3.914799700174898, "grad_norm": 0.6875, "learning_rate": 1.0419315674384571e-05, "loss": 4.0248, "step": 11752 }, { "epoch": 3.9151328391771467, "grad_norm": 0.69921875, "learning_rate": 1.0418570054865355e-05, "loss": 3.959, "step": 11753 }, { "epoch": 3.9154659781793955, "grad_norm": 0.703125, "learning_rate": 1.0417824401351115e-05, "loss": 3.9737, "step": 11754 }, { "epoch": 3.915799117181644, "grad_norm": 0.7109375, "learning_rate": 1.0417078713850544e-05, "loss": 4.0148, "step": 11755 }, { "epoch": 3.916132256183893, "grad_norm": 0.71484375, "learning_rate": 1.0416332992372323e-05, "loss": 3.9947, "step": 11756 }, { "epoch": 3.9164653951861412, "grad_norm": 0.703125, "learning_rate": 1.0415587236925143e-05, "loss": 4.0006, "step": 11757 }, { "epoch": 3.91679853418839, "grad_norm": 0.70703125, "learning_rate": 1.0414841447517682e-05, "loss": 4.0507, "step": 11758 }, { "epoch": 3.9171316731906387, "grad_norm": 0.73046875, "learning_rate": 1.0414095624158633e-05, "loss": 4.0549, "step": 11759 }, { "epoch": 3.9174648121928874, "grad_norm": 0.67578125, "learning_rate": 1.0413349766856684e-05, "loss": 4.0235, "step": 11760 }, { "epoch": 3.917797951195136, "grad_norm": 0.6640625, "learning_rate": 1.0412603875620522e-05, "loss": 4.0265, "step": 11761 }, { "epoch": 3.918131090197385, "grad_norm": 0.703125, "learning_rate": 1.041185795045883e-05, "loss": 3.9705, "step": 11762 }, { "epoch": 3.9184642291996337, "grad_norm": 0.703125, "learning_rate": 1.0411111991380298e-05, "loss": 4.0378, "step": 11763 }, { "epoch": 3.918797368201882, "grad_norm": 0.66015625, "learning_rate": 1.0410365998393622e-05, "loss": 4.036, "step": 11764 }, { "epoch": 3.919130507204131, "grad_norm": 0.703125, "learning_rate": 1.0409619971507481e-05, "loss": 4.0203, "step": 11765 }, { "epoch": 3.9194636462063794, "grad_norm": 0.734375, "learning_rate": 1.0408873910730574e-05, "loss": 4.0069, "step": 11766 }, { "epoch": 3.919796785208628, "grad_norm": 0.62890625, "learning_rate": 1.0408127816071584e-05, "loss": 4.0159, "step": 11767 }, { "epoch": 3.920129924210877, "grad_norm": 0.7109375, "learning_rate": 1.0407381687539204e-05, "loss": 4.0232, "step": 11768 }, { "epoch": 3.9204630632131257, "grad_norm": 0.6875, "learning_rate": 1.0406635525142125e-05, "loss": 4.0041, "step": 11769 }, { "epoch": 3.9207962022153744, "grad_norm": 0.69140625, "learning_rate": 1.0405889328889039e-05, "loss": 4.0515, "step": 11770 }, { "epoch": 3.921129341217623, "grad_norm": 0.69921875, "learning_rate": 1.0405143098788635e-05, "loss": 3.9657, "step": 11771 }, { "epoch": 3.921462480219872, "grad_norm": 0.72265625, "learning_rate": 1.0404396834849607e-05, "loss": 4.0135, "step": 11772 }, { "epoch": 3.92179561922212, "grad_norm": 0.703125, "learning_rate": 1.0403650537080647e-05, "loss": 4.0024, "step": 11773 }, { "epoch": 3.9221287582243693, "grad_norm": 0.7109375, "learning_rate": 1.0402904205490448e-05, "loss": 3.9991, "step": 11774 }, { "epoch": 3.9224618972266176, "grad_norm": 0.6796875, "learning_rate": 1.0402157840087703e-05, "loss": 3.9342, "step": 11775 }, { "epoch": 3.9227950362288664, "grad_norm": 0.76171875, "learning_rate": 1.0401411440881102e-05, "loss": 3.989, "step": 11776 }, { "epoch": 3.923128175231115, "grad_norm": 0.7109375, "learning_rate": 1.0400665007879346e-05, "loss": 3.9907, "step": 11777 }, { "epoch": 3.923461314233364, "grad_norm": 0.68359375, "learning_rate": 1.0399918541091123e-05, "loss": 3.9552, "step": 11778 }, { "epoch": 3.9237944532356126, "grad_norm": 0.7734375, "learning_rate": 1.0399172040525131e-05, "loss": 3.9247, "step": 11779 }, { "epoch": 3.9241275922378613, "grad_norm": 0.703125, "learning_rate": 1.0398425506190066e-05, "loss": 4.0051, "step": 11780 }, { "epoch": 3.92446073124011, "grad_norm": 0.75, "learning_rate": 1.0397678938094619e-05, "loss": 4.0286, "step": 11781 }, { "epoch": 3.9247938702423584, "grad_norm": 0.69140625, "learning_rate": 1.039693233624749e-05, "loss": 4.1004, "step": 11782 }, { "epoch": 3.9251270092446076, "grad_norm": 0.6640625, "learning_rate": 1.0396185700657375e-05, "loss": 3.9842, "step": 11783 }, { "epoch": 3.925460148246856, "grad_norm": 0.6796875, "learning_rate": 1.0395439031332969e-05, "loss": 3.9689, "step": 11784 }, { "epoch": 3.9257932872491046, "grad_norm": 0.74609375, "learning_rate": 1.0394692328282969e-05, "loss": 3.9979, "step": 11785 }, { "epoch": 3.9261264262513533, "grad_norm": 0.6640625, "learning_rate": 1.0393945591516074e-05, "loss": 3.9869, "step": 11786 }, { "epoch": 3.926459565253602, "grad_norm": 0.70703125, "learning_rate": 1.0393198821040981e-05, "loss": 3.9941, "step": 11787 }, { "epoch": 3.926792704255851, "grad_norm": 0.69921875, "learning_rate": 1.0392452016866389e-05, "loss": 4.0262, "step": 11788 }, { "epoch": 3.9271258432580995, "grad_norm": 0.73046875, "learning_rate": 1.0391705179000994e-05, "loss": 3.9696, "step": 11789 }, { "epoch": 3.9274589822603483, "grad_norm": 0.66796875, "learning_rate": 1.0390958307453498e-05, "loss": 3.9538, "step": 11790 }, { "epoch": 3.9277921212625966, "grad_norm": 0.6875, "learning_rate": 1.0390211402232602e-05, "loss": 4.065, "step": 11791 }, { "epoch": 3.9281252602648458, "grad_norm": 0.69140625, "learning_rate": 1.0389464463346998e-05, "loss": 4.0921, "step": 11792 }, { "epoch": 3.928458399267094, "grad_norm": 0.7109375, "learning_rate": 1.0388717490805397e-05, "loss": 3.9912, "step": 11793 }, { "epoch": 3.928791538269343, "grad_norm": 0.65234375, "learning_rate": 1.0387970484616491e-05, "loss": 4.0099, "step": 11794 }, { "epoch": 3.9291246772715915, "grad_norm": 0.75, "learning_rate": 1.0387223444788987e-05, "loss": 3.9953, "step": 11795 }, { "epoch": 3.9294578162738403, "grad_norm": 0.6875, "learning_rate": 1.038647637133158e-05, "loss": 4.0225, "step": 11796 }, { "epoch": 3.929790955276089, "grad_norm": 0.72265625, "learning_rate": 1.038572926425298e-05, "loss": 4.0538, "step": 11797 }, { "epoch": 3.9301240942783378, "grad_norm": 0.703125, "learning_rate": 1.038498212356188e-05, "loss": 4.0001, "step": 11798 }, { "epoch": 3.9304572332805865, "grad_norm": 0.6796875, "learning_rate": 1.038423494926699e-05, "loss": 3.9483, "step": 11799 }, { "epoch": 3.930790372282835, "grad_norm": 0.73828125, "learning_rate": 1.038348774137701e-05, "loss": 3.9324, "step": 11800 }, { "epoch": 3.931123511285084, "grad_norm": 0.7109375, "learning_rate": 1.038274049990064e-05, "loss": 4.0115, "step": 11801 }, { "epoch": 3.9314566502873323, "grad_norm": 0.73046875, "learning_rate": 1.038199322484659e-05, "loss": 4.0151, "step": 11802 }, { "epoch": 3.931789789289581, "grad_norm": 0.6640625, "learning_rate": 1.038124591622356e-05, "loss": 4.0457, "step": 11803 }, { "epoch": 3.9321229282918297, "grad_norm": 0.71484375, "learning_rate": 1.0380498574040259e-05, "loss": 4.008, "step": 11804 }, { "epoch": 3.9324560672940785, "grad_norm": 0.71484375, "learning_rate": 1.0379751198305386e-05, "loss": 3.9998, "step": 11805 }, { "epoch": 3.9327892062963272, "grad_norm": 0.71484375, "learning_rate": 1.0379003789027652e-05, "loss": 4.0162, "step": 11806 }, { "epoch": 3.933122345298576, "grad_norm": 0.6953125, "learning_rate": 1.0378256346215754e-05, "loss": 4.0013, "step": 11807 }, { "epoch": 3.9334554843008247, "grad_norm": 0.68359375, "learning_rate": 1.0377508869878409e-05, "loss": 4.081, "step": 11808 }, { "epoch": 3.933788623303073, "grad_norm": 0.6640625, "learning_rate": 1.0376761360024316e-05, "loss": 4.0233, "step": 11809 }, { "epoch": 3.9341217623053217, "grad_norm": 0.6640625, "learning_rate": 1.0376013816662184e-05, "loss": 4.057, "step": 11810 }, { "epoch": 3.9344549013075705, "grad_norm": 0.703125, "learning_rate": 1.0375266239800722e-05, "loss": 4.0421, "step": 11811 }, { "epoch": 3.934788040309819, "grad_norm": 0.67578125, "learning_rate": 1.0374518629448636e-05, "loss": 4.0217, "step": 11812 }, { "epoch": 3.935121179312068, "grad_norm": 0.69921875, "learning_rate": 1.0373770985614632e-05, "loss": 4.0683, "step": 11813 }, { "epoch": 3.9354543183143167, "grad_norm": 0.734375, "learning_rate": 1.0373023308307423e-05, "loss": 4.0334, "step": 11814 }, { "epoch": 3.9357874573165654, "grad_norm": 0.68359375, "learning_rate": 1.0372275597535717e-05, "loss": 3.92, "step": 11815 }, { "epoch": 3.936120596318814, "grad_norm": 0.73828125, "learning_rate": 1.0371527853308217e-05, "loss": 3.9693, "step": 11816 }, { "epoch": 3.936453735321063, "grad_norm": 0.72265625, "learning_rate": 1.037078007563364e-05, "loss": 4.0178, "step": 11817 }, { "epoch": 3.936786874323311, "grad_norm": 0.67578125, "learning_rate": 1.0370032264520693e-05, "loss": 4.0196, "step": 11818 }, { "epoch": 3.93712001332556, "grad_norm": 0.69140625, "learning_rate": 1.0369284419978087e-05, "loss": 4.0261, "step": 11819 }, { "epoch": 3.9374531523278087, "grad_norm": 0.69140625, "learning_rate": 1.036853654201453e-05, "loss": 4.0202, "step": 11820 }, { "epoch": 3.9377862913300574, "grad_norm": 0.6640625, "learning_rate": 1.0367788630638738e-05, "loss": 4.0511, "step": 11821 }, { "epoch": 3.938119430332306, "grad_norm": 0.734375, "learning_rate": 1.0367040685859418e-05, "loss": 4.077, "step": 11822 }, { "epoch": 3.938452569334555, "grad_norm": 0.6953125, "learning_rate": 1.0366292707685286e-05, "loss": 3.9821, "step": 11823 }, { "epoch": 3.9387857083368036, "grad_norm": 0.734375, "learning_rate": 1.0365544696125052e-05, "loss": 4.0068, "step": 11824 }, { "epoch": 3.9391188473390524, "grad_norm": 0.72265625, "learning_rate": 1.0364796651187427e-05, "loss": 3.9851, "step": 11825 }, { "epoch": 3.939451986341301, "grad_norm": 0.69140625, "learning_rate": 1.036404857288113e-05, "loss": 3.9986, "step": 11826 }, { "epoch": 3.9397851253435494, "grad_norm": 0.67578125, "learning_rate": 1.0363300461214867e-05, "loss": 3.999, "step": 11827 }, { "epoch": 3.940118264345798, "grad_norm": 0.70703125, "learning_rate": 1.0362552316197359e-05, "loss": 4.0162, "step": 11828 }, { "epoch": 3.940451403348047, "grad_norm": 0.67578125, "learning_rate": 1.0361804137837313e-05, "loss": 4.0148, "step": 11829 }, { "epoch": 3.9407845423502956, "grad_norm": 0.6796875, "learning_rate": 1.036105592614345e-05, "loss": 3.9883, "step": 11830 }, { "epoch": 3.9411176813525444, "grad_norm": 0.67578125, "learning_rate": 1.036030768112448e-05, "loss": 3.9446, "step": 11831 }, { "epoch": 3.941450820354793, "grad_norm": 0.72265625, "learning_rate": 1.0359559402789123e-05, "loss": 4.0206, "step": 11832 }, { "epoch": 3.941783959357042, "grad_norm": 0.7109375, "learning_rate": 1.0358811091146093e-05, "loss": 3.9872, "step": 11833 }, { "epoch": 3.94211709835929, "grad_norm": 0.66796875, "learning_rate": 1.0358062746204105e-05, "loss": 3.9118, "step": 11834 }, { "epoch": 3.9424502373615393, "grad_norm": 0.703125, "learning_rate": 1.0357314367971876e-05, "loss": 4.0066, "step": 11835 }, { "epoch": 3.9427833763637876, "grad_norm": 0.6875, "learning_rate": 1.0356565956458124e-05, "loss": 3.9318, "step": 11836 }, { "epoch": 3.9431165153660364, "grad_norm": 0.6875, "learning_rate": 1.0355817511671568e-05, "loss": 4.0123, "step": 11837 }, { "epoch": 3.943449654368285, "grad_norm": 0.69921875, "learning_rate": 1.0355069033620922e-05, "loss": 4.0732, "step": 11838 }, { "epoch": 3.943782793370534, "grad_norm": 0.75, "learning_rate": 1.0354320522314905e-05, "loss": 3.9481, "step": 11839 }, { "epoch": 3.9441159323727826, "grad_norm": 0.71484375, "learning_rate": 1.0353571977762238e-05, "loss": 3.9619, "step": 11840 }, { "epoch": 3.9444490713750313, "grad_norm": 0.7109375, "learning_rate": 1.0352823399971638e-05, "loss": 4.0216, "step": 11841 }, { "epoch": 3.94478221037728, "grad_norm": 0.70703125, "learning_rate": 1.0352074788951824e-05, "loss": 3.9643, "step": 11842 }, { "epoch": 3.9451153493795283, "grad_norm": 0.6484375, "learning_rate": 1.0351326144711516e-05, "loss": 3.9889, "step": 11843 }, { "epoch": 3.9454484883817775, "grad_norm": 0.69921875, "learning_rate": 1.0350577467259434e-05, "loss": 4.0604, "step": 11844 }, { "epoch": 3.945781627384026, "grad_norm": 0.73046875, "learning_rate": 1.0349828756604301e-05, "loss": 3.9857, "step": 11845 }, { "epoch": 3.9461147663862746, "grad_norm": 0.7109375, "learning_rate": 1.0349080012754835e-05, "loss": 3.9844, "step": 11846 }, { "epoch": 3.9464479053885233, "grad_norm": 0.6953125, "learning_rate": 1.0348331235719756e-05, "loss": 4.0189, "step": 11847 }, { "epoch": 3.946781044390772, "grad_norm": 0.71875, "learning_rate": 1.0347582425507789e-05, "loss": 4.0037, "step": 11848 }, { "epoch": 3.947114183393021, "grad_norm": 0.66015625, "learning_rate": 1.0346833582127654e-05, "loss": 4.0207, "step": 11849 }, { "epoch": 3.9474473223952695, "grad_norm": 0.71875, "learning_rate": 1.0346084705588074e-05, "loss": 3.9935, "step": 11850 }, { "epoch": 3.9477804613975183, "grad_norm": 0.68359375, "learning_rate": 1.0345335795897773e-05, "loss": 3.984, "step": 11851 }, { "epoch": 3.9481136003997666, "grad_norm": 0.72265625, "learning_rate": 1.0344586853065473e-05, "loss": 3.9209, "step": 11852 }, { "epoch": 3.9484467394020157, "grad_norm": 0.70703125, "learning_rate": 1.0343837877099895e-05, "loss": 4.0455, "step": 11853 }, { "epoch": 3.948779878404264, "grad_norm": 0.6953125, "learning_rate": 1.0343088868009767e-05, "loss": 3.8868, "step": 11854 }, { "epoch": 3.9491130174065128, "grad_norm": 0.703125, "learning_rate": 1.0342339825803814e-05, "loss": 3.9988, "step": 11855 }, { "epoch": 3.9494461564087615, "grad_norm": 0.67578125, "learning_rate": 1.0341590750490756e-05, "loss": 3.9875, "step": 11856 }, { "epoch": 3.9497792954110102, "grad_norm": 0.71875, "learning_rate": 1.034084164207932e-05, "loss": 4.0074, "step": 11857 }, { "epoch": 3.950112434413259, "grad_norm": 0.6875, "learning_rate": 1.0340092500578233e-05, "loss": 3.9965, "step": 11858 }, { "epoch": 3.9504455734155077, "grad_norm": 0.6875, "learning_rate": 1.033934332599622e-05, "loss": 4.0142, "step": 11859 }, { "epoch": 3.9507787124177565, "grad_norm": 0.70703125, "learning_rate": 1.0338594118342009e-05, "loss": 3.9683, "step": 11860 }, { "epoch": 3.9511118514200048, "grad_norm": 0.671875, "learning_rate": 1.0337844877624325e-05, "loss": 4.0372, "step": 11861 }, { "epoch": 3.951444990422254, "grad_norm": 0.6875, "learning_rate": 1.0337095603851891e-05, "loss": 3.9858, "step": 11862 }, { "epoch": 3.9517781294245022, "grad_norm": 0.66796875, "learning_rate": 1.0336346297033442e-05, "loss": 4.0165, "step": 11863 }, { "epoch": 3.952111268426751, "grad_norm": 0.7109375, "learning_rate": 1.03355969571777e-05, "loss": 4.0107, "step": 11864 }, { "epoch": 3.9524444074289997, "grad_norm": 0.74609375, "learning_rate": 1.0334847584293398e-05, "loss": 4.0233, "step": 11865 }, { "epoch": 3.9527775464312485, "grad_norm": 0.6875, "learning_rate": 1.033409817838926e-05, "loss": 4.0374, "step": 11866 }, { "epoch": 3.953110685433497, "grad_norm": 0.671875, "learning_rate": 1.0333348739474016e-05, "loss": 3.967, "step": 11867 }, { "epoch": 3.953443824435746, "grad_norm": 0.67578125, "learning_rate": 1.0332599267556396e-05, "loss": 3.9277, "step": 11868 }, { "epoch": 3.9537769634379947, "grad_norm": 0.6796875, "learning_rate": 1.033184976264513e-05, "loss": 4.0209, "step": 11869 }, { "epoch": 3.954110102440243, "grad_norm": 0.70703125, "learning_rate": 1.0331100224748952e-05, "loss": 3.9376, "step": 11870 }, { "epoch": 3.954443241442492, "grad_norm": 0.69921875, "learning_rate": 1.0330350653876586e-05, "loss": 4.067, "step": 11871 }, { "epoch": 3.9547763804447404, "grad_norm": 0.71484375, "learning_rate": 1.0329601050036763e-05, "loss": 3.9793, "step": 11872 }, { "epoch": 3.955109519446989, "grad_norm": 0.66015625, "learning_rate": 1.0328851413238221e-05, "loss": 3.9921, "step": 11873 }, { "epoch": 3.955442658449238, "grad_norm": 0.71484375, "learning_rate": 1.0328101743489684e-05, "loss": 3.9864, "step": 11874 }, { "epoch": 3.9557757974514867, "grad_norm": 0.703125, "learning_rate": 1.0327352040799889e-05, "loss": 3.9513, "step": 11875 }, { "epoch": 3.9561089364537354, "grad_norm": 0.68359375, "learning_rate": 1.0326602305177566e-05, "loss": 3.9426, "step": 11876 }, { "epoch": 3.956442075455984, "grad_norm": 0.703125, "learning_rate": 1.0325852536631448e-05, "loss": 3.9711, "step": 11877 }, { "epoch": 3.956775214458233, "grad_norm": 0.67578125, "learning_rate": 1.032510273517027e-05, "loss": 4.0122, "step": 11878 }, { "epoch": 3.957108353460481, "grad_norm": 0.68359375, "learning_rate": 1.0324352900802764e-05, "loss": 4.0181, "step": 11879 }, { "epoch": 3.95744149246273, "grad_norm": 0.69921875, "learning_rate": 1.0323603033537664e-05, "loss": 4.0272, "step": 11880 }, { "epoch": 3.9577746314649787, "grad_norm": 0.73828125, "learning_rate": 1.0322853133383703e-05, "loss": 4.0447, "step": 11881 }, { "epoch": 3.9581077704672274, "grad_norm": 0.66015625, "learning_rate": 1.0322103200349622e-05, "loss": 4.0518, "step": 11882 }, { "epoch": 3.958440909469476, "grad_norm": 0.6875, "learning_rate": 1.0321353234444148e-05, "loss": 3.9898, "step": 11883 }, { "epoch": 3.958774048471725, "grad_norm": 0.72265625, "learning_rate": 1.0320603235676019e-05, "loss": 4.0, "step": 11884 }, { "epoch": 3.9591071874739736, "grad_norm": 0.70703125, "learning_rate": 1.0319853204053973e-05, "loss": 4.0412, "step": 11885 }, { "epoch": 3.9594403264762223, "grad_norm": 0.67578125, "learning_rate": 1.0319103139586744e-05, "loss": 3.9997, "step": 11886 }, { "epoch": 3.959773465478471, "grad_norm": 0.69140625, "learning_rate": 1.0318353042283072e-05, "loss": 4.0499, "step": 11887 }, { "epoch": 3.9601066044807194, "grad_norm": 0.703125, "learning_rate": 1.031760291215169e-05, "loss": 3.9952, "step": 11888 }, { "epoch": 3.960439743482968, "grad_norm": 0.6953125, "learning_rate": 1.0316852749201336e-05, "loss": 4.0026, "step": 11889 }, { "epoch": 3.960772882485217, "grad_norm": 0.69140625, "learning_rate": 1.031610255344075e-05, "loss": 4.0069, "step": 11890 }, { "epoch": 3.9611060214874656, "grad_norm": 0.70703125, "learning_rate": 1.0315352324878669e-05, "loss": 4.0087, "step": 11891 }, { "epoch": 3.9614391604897143, "grad_norm": 0.65234375, "learning_rate": 1.0314602063523832e-05, "loss": 3.9652, "step": 11892 }, { "epoch": 3.961772299491963, "grad_norm": 0.69140625, "learning_rate": 1.0313851769384977e-05, "loss": 4.0333, "step": 11893 }, { "epoch": 3.962105438494212, "grad_norm": 0.69921875, "learning_rate": 1.0313101442470842e-05, "loss": 3.962, "step": 11894 }, { "epoch": 3.9624385774964606, "grad_norm": 0.72265625, "learning_rate": 1.0312351082790168e-05, "loss": 3.924, "step": 11895 }, { "epoch": 3.9627717164987093, "grad_norm": 0.734375, "learning_rate": 1.03116006903517e-05, "loss": 3.9666, "step": 11896 }, { "epoch": 3.9631048555009576, "grad_norm": 0.6796875, "learning_rate": 1.031085026516417e-05, "loss": 4.0478, "step": 11897 }, { "epoch": 3.9634379945032063, "grad_norm": 0.703125, "learning_rate": 1.0310099807236324e-05, "loss": 4.0353, "step": 11898 }, { "epoch": 3.963771133505455, "grad_norm": 0.6875, "learning_rate": 1.0309349316576901e-05, "loss": 4.0271, "step": 11899 }, { "epoch": 3.964104272507704, "grad_norm": 0.68359375, "learning_rate": 1.0308598793194645e-05, "loss": 4.0344, "step": 11900 }, { "epoch": 3.9644374115099525, "grad_norm": 0.66796875, "learning_rate": 1.0307848237098296e-05, "loss": 4.1048, "step": 11901 }, { "epoch": 3.9647705505122013, "grad_norm": 0.6875, "learning_rate": 1.0307097648296595e-05, "loss": 4.0329, "step": 11902 }, { "epoch": 3.96510368951445, "grad_norm": 0.69921875, "learning_rate": 1.0306347026798288e-05, "loss": 3.9912, "step": 11903 }, { "epoch": 3.9654368285166983, "grad_norm": 0.6953125, "learning_rate": 1.0305596372612115e-05, "loss": 4.028, "step": 11904 }, { "epoch": 3.9657699675189475, "grad_norm": 0.69140625, "learning_rate": 1.0304845685746825e-05, "loss": 3.9981, "step": 11905 }, { "epoch": 3.966103106521196, "grad_norm": 0.6796875, "learning_rate": 1.0304094966211154e-05, "loss": 3.9904, "step": 11906 }, { "epoch": 3.9664362455234445, "grad_norm": 0.71484375, "learning_rate": 1.0303344214013854e-05, "loss": 3.9448, "step": 11907 }, { "epoch": 3.9667693845256933, "grad_norm": 0.68359375, "learning_rate": 1.0302593429163662e-05, "loss": 4.0025, "step": 11908 }, { "epoch": 3.967102523527942, "grad_norm": 0.68359375, "learning_rate": 1.0301842611669332e-05, "loss": 3.9943, "step": 11909 }, { "epoch": 3.9674356625301908, "grad_norm": 0.68359375, "learning_rate": 1.03010917615396e-05, "loss": 4.0096, "step": 11910 }, { "epoch": 3.9677688015324395, "grad_norm": 0.69140625, "learning_rate": 1.0300340878783218e-05, "loss": 3.993, "step": 11911 }, { "epoch": 3.9681019405346882, "grad_norm": 0.69140625, "learning_rate": 1.0299589963408929e-05, "loss": 4.0121, "step": 11912 }, { "epoch": 3.9684350795369365, "grad_norm": 0.7109375, "learning_rate": 1.029883901542548e-05, "loss": 3.9433, "step": 11913 }, { "epoch": 3.9687682185391857, "grad_norm": 0.69921875, "learning_rate": 1.0298088034841622e-05, "loss": 4.1068, "step": 11914 }, { "epoch": 3.969101357541434, "grad_norm": 0.7109375, "learning_rate": 1.0297337021666097e-05, "loss": 4.0505, "step": 11915 }, { "epoch": 3.9694344965436827, "grad_norm": 0.70703125, "learning_rate": 1.0296585975907655e-05, "loss": 4.0146, "step": 11916 }, { "epoch": 3.9697676355459315, "grad_norm": 0.70703125, "learning_rate": 1.0295834897575044e-05, "loss": 4.0029, "step": 11917 }, { "epoch": 3.9701007745481802, "grad_norm": 0.7265625, "learning_rate": 1.0295083786677014e-05, "loss": 4.0092, "step": 11918 }, { "epoch": 3.970433913550429, "grad_norm": 0.66796875, "learning_rate": 1.0294332643222309e-05, "loss": 3.9925, "step": 11919 }, { "epoch": 3.9707670525526777, "grad_norm": 0.69921875, "learning_rate": 1.0293581467219683e-05, "loss": 3.9522, "step": 11920 }, { "epoch": 3.9711001915549264, "grad_norm": 0.7265625, "learning_rate": 1.0292830258677882e-05, "loss": 3.9613, "step": 11921 }, { "epoch": 3.9714333305571747, "grad_norm": 0.69921875, "learning_rate": 1.0292079017605658e-05, "loss": 4.0227, "step": 11922 }, { "epoch": 3.971766469559424, "grad_norm": 0.70703125, "learning_rate": 1.0291327744011762e-05, "loss": 4.022, "step": 11923 }, { "epoch": 3.972099608561672, "grad_norm": 0.71875, "learning_rate": 1.0290576437904945e-05, "loss": 3.9656, "step": 11924 }, { "epoch": 3.972432747563921, "grad_norm": 0.71484375, "learning_rate": 1.0289825099293955e-05, "loss": 4.0283, "step": 11925 }, { "epoch": 3.9727658865661697, "grad_norm": 0.6875, "learning_rate": 1.0289073728187544e-05, "loss": 4.0343, "step": 11926 }, { "epoch": 3.9730990255684184, "grad_norm": 0.72265625, "learning_rate": 1.028832232459447e-05, "loss": 3.9946, "step": 11927 }, { "epoch": 3.973432164570667, "grad_norm": 0.73046875, "learning_rate": 1.0287570888523478e-05, "loss": 4.0818, "step": 11928 }, { "epoch": 3.973765303572916, "grad_norm": 0.69921875, "learning_rate": 1.0286819419983321e-05, "loss": 3.9764, "step": 11929 }, { "epoch": 3.9740984425751646, "grad_norm": 0.69921875, "learning_rate": 1.0286067918982754e-05, "loss": 4.015, "step": 11930 }, { "epoch": 3.974431581577413, "grad_norm": 0.68359375, "learning_rate": 1.0285316385530534e-05, "loss": 3.9995, "step": 11931 }, { "epoch": 3.974764720579662, "grad_norm": 0.7265625, "learning_rate": 1.0284564819635409e-05, "loss": 3.9712, "step": 11932 }, { "epoch": 3.9750978595819104, "grad_norm": 0.6796875, "learning_rate": 1.0283813221306135e-05, "loss": 3.9571, "step": 11933 }, { "epoch": 3.975430998584159, "grad_norm": 0.67578125, "learning_rate": 1.0283061590551467e-05, "loss": 4.027, "step": 11934 }, { "epoch": 3.975764137586408, "grad_norm": 0.71484375, "learning_rate": 1.0282309927380161e-05, "loss": 4.0229, "step": 11935 }, { "epoch": 3.9760972765886566, "grad_norm": 0.71484375, "learning_rate": 1.0281558231800968e-05, "loss": 4.017, "step": 11936 }, { "epoch": 3.9764304155909054, "grad_norm": 0.67578125, "learning_rate": 1.0280806503822647e-05, "loss": 3.9937, "step": 11937 }, { "epoch": 3.976763554593154, "grad_norm": 0.69140625, "learning_rate": 1.0280054743453956e-05, "loss": 4.068, "step": 11938 }, { "epoch": 3.977096693595403, "grad_norm": 0.72265625, "learning_rate": 1.0279302950703646e-05, "loss": 3.9991, "step": 11939 }, { "epoch": 3.977429832597651, "grad_norm": 0.69921875, "learning_rate": 1.0278551125580476e-05, "loss": 4.0155, "step": 11940 }, { "epoch": 3.9777629715999003, "grad_norm": 0.6640625, "learning_rate": 1.0277799268093208e-05, "loss": 4.0338, "step": 11941 }, { "epoch": 3.9780961106021486, "grad_norm": 0.6953125, "learning_rate": 1.0277047378250593e-05, "loss": 3.9946, "step": 11942 }, { "epoch": 3.9784292496043974, "grad_norm": 0.734375, "learning_rate": 1.0276295456061389e-05, "loss": 4.0432, "step": 11943 }, { "epoch": 3.978762388606646, "grad_norm": 0.7421875, "learning_rate": 1.0275543501534359e-05, "loss": 3.9715, "step": 11944 }, { "epoch": 3.979095527608895, "grad_norm": 0.70703125, "learning_rate": 1.0274791514678258e-05, "loss": 3.9892, "step": 11945 }, { "epoch": 3.9794286666111436, "grad_norm": 0.71484375, "learning_rate": 1.0274039495501845e-05, "loss": 3.9892, "step": 11946 }, { "epoch": 3.9797618056133923, "grad_norm": 0.69921875, "learning_rate": 1.0273287444013883e-05, "loss": 4.0285, "step": 11947 }, { "epoch": 3.980094944615641, "grad_norm": 0.69140625, "learning_rate": 1.0272535360223125e-05, "loss": 4.0587, "step": 11948 }, { "epoch": 3.9804280836178894, "grad_norm": 0.7265625, "learning_rate": 1.0271783244138342e-05, "loss": 3.9756, "step": 11949 }, { "epoch": 3.980761222620138, "grad_norm": 0.6875, "learning_rate": 1.0271031095768281e-05, "loss": 4.0559, "step": 11950 }, { "epoch": 3.981094361622387, "grad_norm": 0.70703125, "learning_rate": 1.0270278915121715e-05, "loss": 3.9228, "step": 11951 }, { "epoch": 3.9814275006246356, "grad_norm": 0.70703125, "learning_rate": 1.0269526702207396e-05, "loss": 4.075, "step": 11952 }, { "epoch": 3.9817606396268843, "grad_norm": 0.703125, "learning_rate": 1.0268774457034094e-05, "loss": 4.0733, "step": 11953 }, { "epoch": 3.982093778629133, "grad_norm": 0.68359375, "learning_rate": 1.0268022179610564e-05, "loss": 3.9938, "step": 11954 }, { "epoch": 3.982426917631382, "grad_norm": 0.71484375, "learning_rate": 1.026726986994557e-05, "loss": 3.9244, "step": 11955 }, { "epoch": 3.9827600566336305, "grad_norm": 0.7109375, "learning_rate": 1.0266517528047878e-05, "loss": 3.9857, "step": 11956 }, { "epoch": 3.9830931956358793, "grad_norm": 0.67578125, "learning_rate": 1.0265765153926248e-05, "loss": 4.0094, "step": 11957 }, { "epoch": 3.9834263346381276, "grad_norm": 0.69140625, "learning_rate": 1.0265012747589447e-05, "loss": 4.0067, "step": 11958 }, { "epoch": 3.9837594736403763, "grad_norm": 0.69140625, "learning_rate": 1.0264260309046232e-05, "loss": 4.0785, "step": 11959 }, { "epoch": 3.984092612642625, "grad_norm": 0.6875, "learning_rate": 1.0263507838305376e-05, "loss": 3.9441, "step": 11960 }, { "epoch": 3.984425751644874, "grad_norm": 0.73828125, "learning_rate": 1.0262755335375636e-05, "loss": 3.9637, "step": 11961 }, { "epoch": 3.9847588906471225, "grad_norm": 0.69140625, "learning_rate": 1.0262002800265781e-05, "loss": 4.0329, "step": 11962 }, { "epoch": 3.9850920296493713, "grad_norm": 0.70703125, "learning_rate": 1.0261250232984579e-05, "loss": 4.0257, "step": 11963 }, { "epoch": 3.98542516865162, "grad_norm": 0.68359375, "learning_rate": 1.0260497633540791e-05, "loss": 3.9942, "step": 11964 }, { "epoch": 3.9857583076538687, "grad_norm": 0.6953125, "learning_rate": 1.0259745001943184e-05, "loss": 4.0769, "step": 11965 }, { "epoch": 3.9860914466561175, "grad_norm": 0.69921875, "learning_rate": 1.0258992338200526e-05, "loss": 3.9764, "step": 11966 }, { "epoch": 3.9864245856583658, "grad_norm": 0.71875, "learning_rate": 1.0258239642321584e-05, "loss": 4.026, "step": 11967 }, { "epoch": 3.9867577246606145, "grad_norm": 0.6953125, "learning_rate": 1.0257486914315122e-05, "loss": 4.0024, "step": 11968 }, { "epoch": 3.9870908636628632, "grad_norm": 0.6875, "learning_rate": 1.0256734154189913e-05, "loss": 4.0116, "step": 11969 }, { "epoch": 3.987424002665112, "grad_norm": 0.6640625, "learning_rate": 1.025598136195472e-05, "loss": 4.0041, "step": 11970 }, { "epoch": 3.9877571416673607, "grad_norm": 0.67578125, "learning_rate": 1.0255228537618315e-05, "loss": 3.9818, "step": 11971 }, { "epoch": 3.9880902806696095, "grad_norm": 0.72265625, "learning_rate": 1.0254475681189464e-05, "loss": 3.9518, "step": 11972 }, { "epoch": 3.988423419671858, "grad_norm": 0.71484375, "learning_rate": 1.0253722792676939e-05, "loss": 4.0067, "step": 11973 }, { "epoch": 3.9887565586741065, "grad_norm": 0.73828125, "learning_rate": 1.0252969872089507e-05, "loss": 4.0561, "step": 11974 }, { "epoch": 3.9890896976763557, "grad_norm": 0.70703125, "learning_rate": 1.0252216919435938e-05, "loss": 4.0116, "step": 11975 }, { "epoch": 3.989422836678604, "grad_norm": 0.6953125, "learning_rate": 1.0251463934725007e-05, "loss": 4.0227, "step": 11976 }, { "epoch": 3.9897559756808527, "grad_norm": 0.7109375, "learning_rate": 1.0250710917965479e-05, "loss": 4.0067, "step": 11977 }, { "epoch": 3.9900891146831015, "grad_norm": 0.671875, "learning_rate": 1.0249957869166124e-05, "loss": 4.0309, "step": 11978 }, { "epoch": 3.99042225368535, "grad_norm": 0.66015625, "learning_rate": 1.024920478833572e-05, "loss": 4.0767, "step": 11979 }, { "epoch": 3.990755392687599, "grad_norm": 0.65625, "learning_rate": 1.024845167548303e-05, "loss": 4.0533, "step": 11980 }, { "epoch": 3.9910885316898477, "grad_norm": 0.6875, "learning_rate": 1.0247698530616837e-05, "loss": 3.9299, "step": 11981 }, { "epoch": 3.9914216706920964, "grad_norm": 0.6953125, "learning_rate": 1.0246945353745904e-05, "loss": 3.9999, "step": 11982 }, { "epoch": 3.9917548096943447, "grad_norm": 0.7421875, "learning_rate": 1.0246192144879009e-05, "loss": 4.0204, "step": 11983 }, { "epoch": 3.992087948696594, "grad_norm": 0.69921875, "learning_rate": 1.0245438904024921e-05, "loss": 4.0497, "step": 11984 }, { "epoch": 3.992421087698842, "grad_norm": 0.7109375, "learning_rate": 1.0244685631192421e-05, "loss": 4.0265, "step": 11985 }, { "epoch": 3.992754226701091, "grad_norm": 0.671875, "learning_rate": 1.0243932326390276e-05, "loss": 3.9962, "step": 11986 }, { "epoch": 3.9930873657033397, "grad_norm": 0.7109375, "learning_rate": 1.0243178989627263e-05, "loss": 3.9875, "step": 11987 }, { "epoch": 3.9934205047055884, "grad_norm": 0.703125, "learning_rate": 1.0242425620912156e-05, "loss": 4.0097, "step": 11988 }, { "epoch": 3.993753643707837, "grad_norm": 0.71484375, "learning_rate": 1.0241672220253729e-05, "loss": 3.9851, "step": 11989 }, { "epoch": 3.994086782710086, "grad_norm": 0.6796875, "learning_rate": 1.0240918787660762e-05, "loss": 4.0564, "step": 11990 }, { "epoch": 3.9944199217123346, "grad_norm": 0.71875, "learning_rate": 1.0240165323142027e-05, "loss": 4.1415, "step": 11991 }, { "epoch": 3.994753060714583, "grad_norm": 0.73046875, "learning_rate": 1.02394118267063e-05, "loss": 3.9723, "step": 11992 }, { "epoch": 3.995086199716832, "grad_norm": 0.7109375, "learning_rate": 1.023865829836236e-05, "loss": 3.9609, "step": 11993 }, { "epoch": 3.9954193387190804, "grad_norm": 0.734375, "learning_rate": 1.0237904738118984e-05, "loss": 4.0554, "step": 11994 }, { "epoch": 3.995752477721329, "grad_norm": 0.72265625, "learning_rate": 1.0237151145984946e-05, "loss": 3.9768, "step": 11995 }, { "epoch": 3.996085616723578, "grad_norm": 0.703125, "learning_rate": 1.0236397521969028e-05, "loss": 4.003, "step": 11996 }, { "epoch": 3.9964187557258266, "grad_norm": 0.66796875, "learning_rate": 1.0235643866080004e-05, "loss": 4.1055, "step": 11997 }, { "epoch": 3.9967518947280753, "grad_norm": 0.6796875, "learning_rate": 1.0234890178326653e-05, "loss": 4.062, "step": 11998 }, { "epoch": 3.997085033730324, "grad_norm": 0.69140625, "learning_rate": 1.023413645871776e-05, "loss": 4.036, "step": 11999 }, { "epoch": 3.997418172732573, "grad_norm": 0.71875, "learning_rate": 1.0233382707262094e-05, "loss": 4.0513, "step": 12000 }, { "epoch": 3.997751311734821, "grad_norm": 0.71484375, "learning_rate": 1.0232628923968444e-05, "loss": 3.9691, "step": 12001 }, { "epoch": 3.9980844507370703, "grad_norm": 0.69921875, "learning_rate": 1.0231875108845583e-05, "loss": 4.0126, "step": 12002 }, { "epoch": 3.9984175897393186, "grad_norm": 0.70703125, "learning_rate": 1.0231121261902296e-05, "loss": 3.9615, "step": 12003 }, { "epoch": 3.9987507287415673, "grad_norm": 0.703125, "learning_rate": 1.0230367383147362e-05, "loss": 4.0085, "step": 12004 }, { "epoch": 3.999083867743816, "grad_norm": 0.69140625, "learning_rate": 1.022961347258956e-05, "loss": 4.0725, "step": 12005 }, { "epoch": 3.999417006746065, "grad_norm": 0.68359375, "learning_rate": 1.0228859530237674e-05, "loss": 3.9759, "step": 12006 }, { "epoch": 3.9997501457483136, "grad_norm": 0.68359375, "learning_rate": 1.0228105556100484e-05, "loss": 4.0, "step": 12007 }, { "epoch": 4.0, "grad_norm": 0.80078125, "learning_rate": 1.0227351550186777e-05, "loss": 3.9923, "step": 12008 }, { "epoch": 4.000333139002248, "grad_norm": 0.63671875, "learning_rate": 1.022659751250533e-05, "loss": 4.0012, "step": 12009 }, { "epoch": 4.0006662780044975, "grad_norm": 0.6953125, "learning_rate": 1.0225843443064929e-05, "loss": 4.0089, "step": 12010 }, { "epoch": 4.000999417006746, "grad_norm": 0.71875, "learning_rate": 1.0225089341874354e-05, "loss": 4.0157, "step": 12011 }, { "epoch": 4.001332556008995, "grad_norm": 0.6875, "learning_rate": 1.0224335208942391e-05, "loss": 3.9588, "step": 12012 }, { "epoch": 4.001665695011243, "grad_norm": 0.72265625, "learning_rate": 1.0223581044277825e-05, "loss": 4.0201, "step": 12013 }, { "epoch": 4.001998834013492, "grad_norm": 0.69140625, "learning_rate": 1.0222826847889437e-05, "loss": 3.8905, "step": 12014 }, { "epoch": 4.002331973015741, "grad_norm": 0.6875, "learning_rate": 1.0222072619786017e-05, "loss": 3.975, "step": 12015 }, { "epoch": 4.00266511201799, "grad_norm": 0.7109375, "learning_rate": 1.0221318359976344e-05, "loss": 4.0214, "step": 12016 }, { "epoch": 4.002998251020238, "grad_norm": 0.69921875, "learning_rate": 1.0220564068469209e-05, "loss": 3.9666, "step": 12017 }, { "epoch": 4.0033313900224865, "grad_norm": 0.67578125, "learning_rate": 1.0219809745273392e-05, "loss": 4.0581, "step": 12018 }, { "epoch": 4.003664529024736, "grad_norm": 0.703125, "learning_rate": 1.0219055390397686e-05, "loss": 4.0148, "step": 12019 }, { "epoch": 4.003997668026984, "grad_norm": 0.68359375, "learning_rate": 1.0218301003850872e-05, "loss": 3.9677, "step": 12020 }, { "epoch": 4.004330807029233, "grad_norm": 0.65234375, "learning_rate": 1.021754658564174e-05, "loss": 4.0191, "step": 12021 }, { "epoch": 4.0046639460314815, "grad_norm": 0.7265625, "learning_rate": 1.0216792135779076e-05, "loss": 3.9925, "step": 12022 }, { "epoch": 4.004997085033731, "grad_norm": 0.70703125, "learning_rate": 1.021603765427167e-05, "loss": 4.0255, "step": 12023 }, { "epoch": 4.005330224035979, "grad_norm": 0.7265625, "learning_rate": 1.0215283141128305e-05, "loss": 4.0147, "step": 12024 }, { "epoch": 4.005663363038228, "grad_norm": 0.70703125, "learning_rate": 1.0214528596357777e-05, "loss": 3.9939, "step": 12025 }, { "epoch": 4.005996502040476, "grad_norm": 0.6796875, "learning_rate": 1.0213774019968867e-05, "loss": 3.9918, "step": 12026 }, { "epoch": 4.006329641042725, "grad_norm": 0.7109375, "learning_rate": 1.0213019411970367e-05, "loss": 3.9481, "step": 12027 }, { "epoch": 4.006662780044974, "grad_norm": 0.7109375, "learning_rate": 1.021226477237107e-05, "loss": 3.9247, "step": 12028 }, { "epoch": 4.006995919047222, "grad_norm": 0.7109375, "learning_rate": 1.021151010117976e-05, "loss": 3.9408, "step": 12029 }, { "epoch": 4.007329058049471, "grad_norm": 0.7109375, "learning_rate": 1.0210755398405236e-05, "loss": 3.9124, "step": 12030 }, { "epoch": 4.00766219705172, "grad_norm": 0.67578125, "learning_rate": 1.0210000664056279e-05, "loss": 4.0273, "step": 12031 }, { "epoch": 4.007995336053969, "grad_norm": 0.7109375, "learning_rate": 1.0209245898141684e-05, "loss": 4.039, "step": 12032 }, { "epoch": 4.008328475056217, "grad_norm": 0.75390625, "learning_rate": 1.0208491100670241e-05, "loss": 4.0384, "step": 12033 }, { "epoch": 4.008661614058466, "grad_norm": 0.7265625, "learning_rate": 1.0207736271650748e-05, "loss": 4.0073, "step": 12034 }, { "epoch": 4.008994753060715, "grad_norm": 0.69921875, "learning_rate": 1.020698141109199e-05, "loss": 4.0185, "step": 12035 }, { "epoch": 4.009327892062963, "grad_norm": 0.7109375, "learning_rate": 1.020622651900276e-05, "loss": 3.9943, "step": 12036 }, { "epoch": 4.009661031065212, "grad_norm": 0.734375, "learning_rate": 1.0205471595391857e-05, "loss": 3.9894, "step": 12037 }, { "epoch": 4.00999417006746, "grad_norm": 0.64453125, "learning_rate": 1.0204716640268066e-05, "loss": 3.9983, "step": 12038 }, { "epoch": 4.01032730906971, "grad_norm": 0.69921875, "learning_rate": 1.0203961653640186e-05, "loss": 3.9974, "step": 12039 }, { "epoch": 4.010660448071958, "grad_norm": 0.73046875, "learning_rate": 1.020320663551701e-05, "loss": 3.9412, "step": 12040 }, { "epoch": 4.010993587074207, "grad_norm": 0.703125, "learning_rate": 1.020245158590733e-05, "loss": 3.9729, "step": 12041 }, { "epoch": 4.011326726076455, "grad_norm": 0.6953125, "learning_rate": 1.0201696504819944e-05, "loss": 3.9696, "step": 12042 }, { "epoch": 4.011659865078704, "grad_norm": 0.73046875, "learning_rate": 1.0200941392263646e-05, "loss": 4.0456, "step": 12043 }, { "epoch": 4.011993004080953, "grad_norm": 0.73046875, "learning_rate": 1.0200186248247231e-05, "loss": 3.9532, "step": 12044 }, { "epoch": 4.012326143083201, "grad_norm": 0.69140625, "learning_rate": 1.0199431072779492e-05, "loss": 4.0081, "step": 12045 }, { "epoch": 4.01265928208545, "grad_norm": 0.6875, "learning_rate": 1.0198675865869231e-05, "loss": 3.9378, "step": 12046 }, { "epoch": 4.012992421087699, "grad_norm": 0.71484375, "learning_rate": 1.0197920627525242e-05, "loss": 3.9582, "step": 12047 }, { "epoch": 4.013325560089948, "grad_norm": 0.6640625, "learning_rate": 1.019716535775632e-05, "loss": 3.9865, "step": 12048 }, { "epoch": 4.013658699092196, "grad_norm": 0.69921875, "learning_rate": 1.0196410056571264e-05, "loss": 4.0461, "step": 12049 }, { "epoch": 4.013991838094445, "grad_norm": 0.6953125, "learning_rate": 1.0195654723978871e-05, "loss": 3.9685, "step": 12050 }, { "epoch": 4.014324977096694, "grad_norm": 0.7109375, "learning_rate": 1.0194899359987937e-05, "loss": 4.0659, "step": 12051 }, { "epoch": 4.014658116098942, "grad_norm": 0.6796875, "learning_rate": 1.0194143964607265e-05, "loss": 3.9696, "step": 12052 }, { "epoch": 4.014991255101191, "grad_norm": 0.67578125, "learning_rate": 1.0193388537845652e-05, "loss": 4.0947, "step": 12053 }, { "epoch": 4.015324394103439, "grad_norm": 0.703125, "learning_rate": 1.0192633079711897e-05, "loss": 3.9601, "step": 12054 }, { "epoch": 4.0156575331056885, "grad_norm": 0.6796875, "learning_rate": 1.0191877590214796e-05, "loss": 3.9892, "step": 12055 }, { "epoch": 4.015990672107937, "grad_norm": 0.7109375, "learning_rate": 1.0191122069363154e-05, "loss": 3.9836, "step": 12056 }, { "epoch": 4.016323811110186, "grad_norm": 0.6796875, "learning_rate": 1.0190366517165768e-05, "loss": 3.9954, "step": 12057 }, { "epoch": 4.016656950112434, "grad_norm": 0.72265625, "learning_rate": 1.0189610933631438e-05, "loss": 3.9613, "step": 12058 }, { "epoch": 4.0169900891146835, "grad_norm": 0.6953125, "learning_rate": 1.0188855318768968e-05, "loss": 4.0212, "step": 12059 }, { "epoch": 4.017323228116932, "grad_norm": 0.6875, "learning_rate": 1.0188099672587156e-05, "loss": 3.9987, "step": 12060 }, { "epoch": 4.01765636711918, "grad_norm": 0.7421875, "learning_rate": 1.0187343995094807e-05, "loss": 3.9224, "step": 12061 }, { "epoch": 4.017989506121429, "grad_norm": 0.6953125, "learning_rate": 1.0186588286300721e-05, "loss": 4.0328, "step": 12062 }, { "epoch": 4.0183226451236775, "grad_norm": 0.6953125, "learning_rate": 1.01858325462137e-05, "loss": 4.0396, "step": 12063 }, { "epoch": 4.018655784125927, "grad_norm": 0.671875, "learning_rate": 1.0185076774842548e-05, "loss": 3.9817, "step": 12064 }, { "epoch": 4.018988923128175, "grad_norm": 0.71875, "learning_rate": 1.0184320972196066e-05, "loss": 4.0338, "step": 12065 }, { "epoch": 4.019322062130424, "grad_norm": 0.6953125, "learning_rate": 1.0183565138283061e-05, "loss": 3.9449, "step": 12066 }, { "epoch": 4.0196552011326725, "grad_norm": 0.68359375, "learning_rate": 1.0182809273112333e-05, "loss": 3.9947, "step": 12067 }, { "epoch": 4.019988340134922, "grad_norm": 0.68359375, "learning_rate": 1.0182053376692689e-05, "loss": 3.9981, "step": 12068 }, { "epoch": 4.02032147913717, "grad_norm": 0.72265625, "learning_rate": 1.0181297449032933e-05, "loss": 3.9459, "step": 12069 }, { "epoch": 4.020654618139418, "grad_norm": 0.6953125, "learning_rate": 1.0180541490141867e-05, "loss": 4.02, "step": 12070 }, { "epoch": 4.0209877571416675, "grad_norm": 0.70703125, "learning_rate": 1.01797855000283e-05, "loss": 3.9938, "step": 12071 }, { "epoch": 4.021320896143916, "grad_norm": 0.7265625, "learning_rate": 1.0179029478701039e-05, "loss": 4.0299, "step": 12072 }, { "epoch": 4.021654035146165, "grad_norm": 0.68359375, "learning_rate": 1.0178273426168883e-05, "loss": 4.0457, "step": 12073 }, { "epoch": 4.021987174148413, "grad_norm": 0.6796875, "learning_rate": 1.0177517342440645e-05, "loss": 4.0453, "step": 12074 }, { "epoch": 4.022320313150662, "grad_norm": 0.69140625, "learning_rate": 1.017676122752513e-05, "loss": 3.992, "step": 12075 }, { "epoch": 4.022653452152911, "grad_norm": 0.6640625, "learning_rate": 1.0176005081431143e-05, "loss": 3.9818, "step": 12076 }, { "epoch": 4.02298659115516, "grad_norm": 0.73046875, "learning_rate": 1.0175248904167493e-05, "loss": 3.9799, "step": 12077 }, { "epoch": 4.023319730157408, "grad_norm": 0.6796875, "learning_rate": 1.017449269574299e-05, "loss": 3.9821, "step": 12078 }, { "epoch": 4.0236528691596565, "grad_norm": 0.671875, "learning_rate": 1.0173736456166438e-05, "loss": 3.9415, "step": 12079 }, { "epoch": 4.023986008161906, "grad_norm": 0.66796875, "learning_rate": 1.0172980185446648e-05, "loss": 4.0403, "step": 12080 }, { "epoch": 4.024319147164154, "grad_norm": 0.6953125, "learning_rate": 1.0172223883592428e-05, "loss": 4.0293, "step": 12081 }, { "epoch": 4.024652286166403, "grad_norm": 0.703125, "learning_rate": 1.0171467550612587e-05, "loss": 4.0436, "step": 12082 }, { "epoch": 4.024985425168651, "grad_norm": 0.7109375, "learning_rate": 1.0170711186515938e-05, "loss": 3.9882, "step": 12083 }, { "epoch": 4.025318564170901, "grad_norm": 0.734375, "learning_rate": 1.0169954791311286e-05, "loss": 4.0108, "step": 12084 }, { "epoch": 4.025651703173149, "grad_norm": 0.7578125, "learning_rate": 1.0169198365007446e-05, "loss": 4.0099, "step": 12085 }, { "epoch": 4.025984842175398, "grad_norm": 0.70703125, "learning_rate": 1.0168441907613224e-05, "loss": 4.053, "step": 12086 }, { "epoch": 4.026317981177646, "grad_norm": 0.703125, "learning_rate": 1.0167685419137437e-05, "loss": 4.0068, "step": 12087 }, { "epoch": 4.026651120179895, "grad_norm": 0.69921875, "learning_rate": 1.0166928899588891e-05, "loss": 3.9855, "step": 12088 }, { "epoch": 4.026984259182144, "grad_norm": 0.66796875, "learning_rate": 1.0166172348976398e-05, "loss": 4.0604, "step": 12089 }, { "epoch": 4.027317398184392, "grad_norm": 0.6953125, "learning_rate": 1.0165415767308777e-05, "loss": 4.0076, "step": 12090 }, { "epoch": 4.027650537186641, "grad_norm": 0.67578125, "learning_rate": 1.016465915459483e-05, "loss": 4.132, "step": 12091 }, { "epoch": 4.02798367618889, "grad_norm": 0.68359375, "learning_rate": 1.016390251084338e-05, "loss": 4.0749, "step": 12092 }, { "epoch": 4.028316815191139, "grad_norm": 0.70703125, "learning_rate": 1.0163145836063234e-05, "loss": 3.9697, "step": 12093 }, { "epoch": 4.028649954193387, "grad_norm": 0.6875, "learning_rate": 1.0162389130263207e-05, "loss": 4.0528, "step": 12094 }, { "epoch": 4.028983093195636, "grad_norm": 0.71875, "learning_rate": 1.0161632393452113e-05, "loss": 4.0075, "step": 12095 }, { "epoch": 4.029316232197885, "grad_norm": 0.70703125, "learning_rate": 1.0160875625638767e-05, "loss": 4.0124, "step": 12096 }, { "epoch": 4.029649371200133, "grad_norm": 0.73046875, "learning_rate": 1.0160118826831984e-05, "loss": 3.9996, "step": 12097 }, { "epoch": 4.029982510202382, "grad_norm": 0.7265625, "learning_rate": 1.0159361997040581e-05, "loss": 3.9571, "step": 12098 }, { "epoch": 4.03031564920463, "grad_norm": 0.70703125, "learning_rate": 1.0158605136273366e-05, "loss": 4.0011, "step": 12099 }, { "epoch": 4.0306487882068796, "grad_norm": 0.72265625, "learning_rate": 1.0157848244539163e-05, "loss": 3.9876, "step": 12100 }, { "epoch": 4.030981927209128, "grad_norm": 0.6953125, "learning_rate": 1.0157091321846782e-05, "loss": 4.0264, "step": 12101 }, { "epoch": 4.031315066211377, "grad_norm": 0.6640625, "learning_rate": 1.0156334368205045e-05, "loss": 4.0037, "step": 12102 }, { "epoch": 4.031648205213625, "grad_norm": 0.73828125, "learning_rate": 1.0155577383622765e-05, "loss": 3.9133, "step": 12103 }, { "epoch": 4.0319813442158745, "grad_norm": 0.7265625, "learning_rate": 1.015482036810876e-05, "loss": 4.0229, "step": 12104 }, { "epoch": 4.032314483218123, "grad_norm": 0.7421875, "learning_rate": 1.0154063321671848e-05, "loss": 4.1238, "step": 12105 }, { "epoch": 4.032647622220371, "grad_norm": 0.703125, "learning_rate": 1.015330624432085e-05, "loss": 3.9976, "step": 12106 }, { "epoch": 4.03298076122262, "grad_norm": 0.73046875, "learning_rate": 1.0152549136064578e-05, "loss": 4.027, "step": 12107 }, { "epoch": 4.033313900224869, "grad_norm": 0.70703125, "learning_rate": 1.0151791996911855e-05, "loss": 3.9922, "step": 12108 }, { "epoch": 4.033647039227118, "grad_norm": 0.7109375, "learning_rate": 1.01510348268715e-05, "loss": 4.0248, "step": 12109 }, { "epoch": 4.033980178229366, "grad_norm": 0.6875, "learning_rate": 1.0150277625952329e-05, "loss": 3.9943, "step": 12110 }, { "epoch": 4.034313317231615, "grad_norm": 0.6875, "learning_rate": 1.0149520394163167e-05, "loss": 4.0402, "step": 12111 }, { "epoch": 4.0346464562338635, "grad_norm": 0.703125, "learning_rate": 1.0148763131512828e-05, "loss": 4.0384, "step": 12112 }, { "epoch": 4.034979595236113, "grad_norm": 0.70703125, "learning_rate": 1.0148005838010138e-05, "loss": 3.9927, "step": 12113 }, { "epoch": 4.035312734238361, "grad_norm": 0.68359375, "learning_rate": 1.0147248513663916e-05, "loss": 4.0578, "step": 12114 }, { "epoch": 4.035645873240609, "grad_norm": 0.70703125, "learning_rate": 1.0146491158482982e-05, "loss": 3.9272, "step": 12115 }, { "epoch": 4.0359790122428585, "grad_norm": 0.6875, "learning_rate": 1.0145733772476159e-05, "loss": 4.0087, "step": 12116 }, { "epoch": 4.036312151245107, "grad_norm": 0.703125, "learning_rate": 1.0144976355652266e-05, "loss": 3.8878, "step": 12117 }, { "epoch": 4.036645290247356, "grad_norm": 0.68359375, "learning_rate": 1.014421890802013e-05, "loss": 4.1016, "step": 12118 }, { "epoch": 4.036978429249604, "grad_norm": 0.73828125, "learning_rate": 1.0143461429588567e-05, "loss": 4.0395, "step": 12119 }, { "epoch": 4.0373115682518534, "grad_norm": 0.6796875, "learning_rate": 1.014270392036641e-05, "loss": 3.9473, "step": 12120 }, { "epoch": 4.037644707254102, "grad_norm": 0.7109375, "learning_rate": 1.0141946380362472e-05, "loss": 4.0221, "step": 12121 }, { "epoch": 4.03797784625635, "grad_norm": 0.6953125, "learning_rate": 1.0141188809585584e-05, "loss": 3.9549, "step": 12122 }, { "epoch": 4.038310985258599, "grad_norm": 0.66796875, "learning_rate": 1.0140431208044563e-05, "loss": 4.0271, "step": 12123 }, { "epoch": 4.0386441242608475, "grad_norm": 0.69921875, "learning_rate": 1.0139673575748242e-05, "loss": 3.9726, "step": 12124 }, { "epoch": 4.038977263263097, "grad_norm": 0.70703125, "learning_rate": 1.0138915912705441e-05, "loss": 4.0074, "step": 12125 }, { "epoch": 4.039310402265345, "grad_norm": 0.68359375, "learning_rate": 1.0138158218924983e-05, "loss": 4.0188, "step": 12126 }, { "epoch": 4.039643541267594, "grad_norm": 0.7265625, "learning_rate": 1.0137400494415697e-05, "loss": 3.9024, "step": 12127 }, { "epoch": 4.0399766802698425, "grad_norm": 0.734375, "learning_rate": 1.0136642739186407e-05, "loss": 4.0527, "step": 12128 }, { "epoch": 4.040309819272092, "grad_norm": 0.6796875, "learning_rate": 1.0135884953245943e-05, "loss": 3.9574, "step": 12129 }, { "epoch": 4.04064295827434, "grad_norm": 0.6953125, "learning_rate": 1.0135127136603125e-05, "loss": 3.9817, "step": 12130 }, { "epoch": 4.040976097276588, "grad_norm": 0.671875, "learning_rate": 1.0134369289266786e-05, "loss": 3.9632, "step": 12131 }, { "epoch": 4.041309236278837, "grad_norm": 0.69140625, "learning_rate": 1.0133611411245749e-05, "loss": 4.0331, "step": 12132 }, { "epoch": 4.041642375281086, "grad_norm": 0.6953125, "learning_rate": 1.0132853502548844e-05, "loss": 3.985, "step": 12133 }, { "epoch": 4.041975514283335, "grad_norm": 0.67578125, "learning_rate": 1.01320955631849e-05, "loss": 4.0274, "step": 12134 }, { "epoch": 4.042308653285583, "grad_norm": 0.6796875, "learning_rate": 1.0131337593162741e-05, "loss": 4.0144, "step": 12135 }, { "epoch": 4.042641792287832, "grad_norm": 0.6953125, "learning_rate": 1.0130579592491202e-05, "loss": 4.0111, "step": 12136 }, { "epoch": 4.042974931290081, "grad_norm": 0.71484375, "learning_rate": 1.0129821561179106e-05, "loss": 4.0206, "step": 12137 }, { "epoch": 4.04330807029233, "grad_norm": 0.7578125, "learning_rate": 1.0129063499235286e-05, "loss": 4.0065, "step": 12138 }, { "epoch": 4.043641209294578, "grad_norm": 0.66796875, "learning_rate": 1.0128305406668571e-05, "loss": 3.972, "step": 12139 }, { "epoch": 4.0439743482968264, "grad_norm": 0.70703125, "learning_rate": 1.0127547283487792e-05, "loss": 4.0001, "step": 12140 }, { "epoch": 4.044307487299076, "grad_norm": 0.6953125, "learning_rate": 1.0126789129701775e-05, "loss": 3.9934, "step": 12141 }, { "epoch": 4.044640626301324, "grad_norm": 0.703125, "learning_rate": 1.0126030945319359e-05, "loss": 4.0245, "step": 12142 }, { "epoch": 4.044973765303573, "grad_norm": 0.70703125, "learning_rate": 1.0125272730349366e-05, "loss": 3.9701, "step": 12143 }, { "epoch": 4.045306904305821, "grad_norm": 0.71875, "learning_rate": 1.0124514484800633e-05, "loss": 4.0009, "step": 12144 }, { "epoch": 4.045640043308071, "grad_norm": 0.6953125, "learning_rate": 1.0123756208681992e-05, "loss": 4.0029, "step": 12145 }, { "epoch": 4.045973182310319, "grad_norm": 0.7421875, "learning_rate": 1.0122997902002273e-05, "loss": 4.0459, "step": 12146 }, { "epoch": 4.046306321312568, "grad_norm": 0.6875, "learning_rate": 1.0122239564770313e-05, "loss": 4.0344, "step": 12147 }, { "epoch": 4.046639460314816, "grad_norm": 0.671875, "learning_rate": 1.0121481196994936e-05, "loss": 4.0201, "step": 12148 }, { "epoch": 4.046972599317065, "grad_norm": 0.6953125, "learning_rate": 1.0120722798684986e-05, "loss": 3.967, "step": 12149 }, { "epoch": 4.047305738319314, "grad_norm": 0.734375, "learning_rate": 1.011996436984929e-05, "loss": 4.0847, "step": 12150 }, { "epoch": 4.047638877321562, "grad_norm": 0.7109375, "learning_rate": 1.0119205910496686e-05, "loss": 4.0692, "step": 12151 }, { "epoch": 4.047972016323811, "grad_norm": 0.68359375, "learning_rate": 1.0118447420636004e-05, "loss": 4.049, "step": 12152 }, { "epoch": 4.04830515532606, "grad_norm": 0.67578125, "learning_rate": 1.0117688900276082e-05, "loss": 4.0439, "step": 12153 }, { "epoch": 4.048638294328309, "grad_norm": 0.71875, "learning_rate": 1.0116930349425753e-05, "loss": 4.0049, "step": 12154 }, { "epoch": 4.048971433330557, "grad_norm": 0.73828125, "learning_rate": 1.0116171768093855e-05, "loss": 3.9516, "step": 12155 }, { "epoch": 4.049304572332806, "grad_norm": 0.72265625, "learning_rate": 1.0115413156289222e-05, "loss": 3.9527, "step": 12156 }, { "epoch": 4.049637711335055, "grad_norm": 0.70703125, "learning_rate": 1.011465451402069e-05, "loss": 4.078, "step": 12157 }, { "epoch": 4.049970850337303, "grad_norm": 0.75, "learning_rate": 1.0113895841297099e-05, "loss": 3.9762, "step": 12158 }, { "epoch": 4.050303989339552, "grad_norm": 0.6953125, "learning_rate": 1.011313713812728e-05, "loss": 4.0032, "step": 12159 }, { "epoch": 4.0506371283418, "grad_norm": 0.7265625, "learning_rate": 1.0112378404520077e-05, "loss": 4.0204, "step": 12160 }, { "epoch": 4.0509702673440495, "grad_norm": 0.73046875, "learning_rate": 1.011161964048432e-05, "loss": 4.0403, "step": 12161 }, { "epoch": 4.051303406346298, "grad_norm": 0.72265625, "learning_rate": 1.0110860846028853e-05, "loss": 3.99, "step": 12162 }, { "epoch": 4.051636545348547, "grad_norm": 0.67578125, "learning_rate": 1.011010202116251e-05, "loss": 3.9268, "step": 12163 }, { "epoch": 4.051969684350795, "grad_norm": 0.70703125, "learning_rate": 1.0109343165894136e-05, "loss": 3.9506, "step": 12164 }, { "epoch": 4.0523028233530445, "grad_norm": 0.69921875, "learning_rate": 1.0108584280232565e-05, "loss": 4.073, "step": 12165 }, { "epoch": 4.052635962355293, "grad_norm": 0.7109375, "learning_rate": 1.0107825364186635e-05, "loss": 4.0305, "step": 12166 }, { "epoch": 4.052969101357541, "grad_norm": 0.6796875, "learning_rate": 1.0107066417765188e-05, "loss": 4.0794, "step": 12167 }, { "epoch": 4.05330224035979, "grad_norm": 0.671875, "learning_rate": 1.0106307440977066e-05, "loss": 4.0495, "step": 12168 }, { "epoch": 4.0536353793620385, "grad_norm": 0.671875, "learning_rate": 1.0105548433831109e-05, "loss": 4.0119, "step": 12169 }, { "epoch": 4.053968518364288, "grad_norm": 0.7109375, "learning_rate": 1.0104789396336152e-05, "loss": 3.9814, "step": 12170 }, { "epoch": 4.054301657366536, "grad_norm": 0.6875, "learning_rate": 1.0104030328501045e-05, "loss": 3.8966, "step": 12171 }, { "epoch": 4.054634796368785, "grad_norm": 0.6953125, "learning_rate": 1.010327123033462e-05, "loss": 4.0021, "step": 12172 }, { "epoch": 4.0549679353710335, "grad_norm": 0.69140625, "learning_rate": 1.0102512101845727e-05, "loss": 4.0084, "step": 12173 }, { "epoch": 4.055301074373283, "grad_norm": 0.71875, "learning_rate": 1.0101752943043206e-05, "loss": 3.9077, "step": 12174 }, { "epoch": 4.055634213375531, "grad_norm": 0.73046875, "learning_rate": 1.0100993753935898e-05, "loss": 4.0203, "step": 12175 }, { "epoch": 4.055967352377779, "grad_norm": 0.73828125, "learning_rate": 1.0100234534532644e-05, "loss": 3.9919, "step": 12176 }, { "epoch": 4.0563004913800285, "grad_norm": 0.70703125, "learning_rate": 1.0099475284842291e-05, "loss": 4.0472, "step": 12177 }, { "epoch": 4.056633630382277, "grad_norm": 0.67578125, "learning_rate": 1.0098716004873683e-05, "loss": 3.9867, "step": 12178 }, { "epoch": 4.056966769384526, "grad_norm": 0.71875, "learning_rate": 1.0097956694635659e-05, "loss": 4.0264, "step": 12179 }, { "epoch": 4.057299908386774, "grad_norm": 0.69921875, "learning_rate": 1.009719735413707e-05, "loss": 4.0379, "step": 12180 }, { "epoch": 4.057633047389023, "grad_norm": 0.6875, "learning_rate": 1.0096437983386756e-05, "loss": 3.9517, "step": 12181 }, { "epoch": 4.057966186391272, "grad_norm": 0.6875, "learning_rate": 1.0095678582393563e-05, "loss": 4.0652, "step": 12182 }, { "epoch": 4.05829932539352, "grad_norm": 0.69921875, "learning_rate": 1.0094919151166339e-05, "loss": 3.9565, "step": 12183 }, { "epoch": 4.058632464395769, "grad_norm": 0.7421875, "learning_rate": 1.0094159689713926e-05, "loss": 4.0288, "step": 12184 }, { "epoch": 4.0589656033980175, "grad_norm": 0.6875, "learning_rate": 1.009340019804517e-05, "loss": 3.9659, "step": 12185 }, { "epoch": 4.059298742400267, "grad_norm": 0.7109375, "learning_rate": 1.0092640676168922e-05, "loss": 3.996, "step": 12186 }, { "epoch": 4.059631881402515, "grad_norm": 0.73046875, "learning_rate": 1.0091881124094024e-05, "loss": 4.0673, "step": 12187 }, { "epoch": 4.059965020404764, "grad_norm": 0.703125, "learning_rate": 1.0091121541829326e-05, "loss": 3.9403, "step": 12188 }, { "epoch": 4.060298159407012, "grad_norm": 0.734375, "learning_rate": 1.0090361929383671e-05, "loss": 3.982, "step": 12189 }, { "epoch": 4.060631298409262, "grad_norm": 0.66796875, "learning_rate": 1.0089602286765914e-05, "loss": 3.9479, "step": 12190 }, { "epoch": 4.06096443741151, "grad_norm": 0.70703125, "learning_rate": 1.0088842613984897e-05, "loss": 3.9054, "step": 12191 }, { "epoch": 4.061297576413758, "grad_norm": 0.72265625, "learning_rate": 1.0088082911049473e-05, "loss": 4.0014, "step": 12192 }, { "epoch": 4.061630715416007, "grad_norm": 0.69921875, "learning_rate": 1.0087323177968489e-05, "loss": 4.0205, "step": 12193 }, { "epoch": 4.061963854418256, "grad_norm": 0.67578125, "learning_rate": 1.0086563414750792e-05, "loss": 3.8873, "step": 12194 }, { "epoch": 4.062296993420505, "grad_norm": 0.73046875, "learning_rate": 1.0085803621405235e-05, "loss": 3.9479, "step": 12195 }, { "epoch": 4.062630132422753, "grad_norm": 0.7109375, "learning_rate": 1.0085043797940666e-05, "loss": 4.0586, "step": 12196 }, { "epoch": 4.062963271425002, "grad_norm": 0.703125, "learning_rate": 1.0084283944365938e-05, "loss": 4.0223, "step": 12197 }, { "epoch": 4.063296410427251, "grad_norm": 0.69921875, "learning_rate": 1.0083524060689895e-05, "loss": 4.0271, "step": 12198 }, { "epoch": 4.0636295494295, "grad_norm": 0.66015625, "learning_rate": 1.0082764146921398e-05, "loss": 4.0261, "step": 12199 }, { "epoch": 4.063962688431748, "grad_norm": 0.71875, "learning_rate": 1.0082004203069288e-05, "loss": 3.9895, "step": 12200 }, { "epoch": 4.064295827433996, "grad_norm": 0.6875, "learning_rate": 1.0081244229142426e-05, "loss": 4.013, "step": 12201 }, { "epoch": 4.064628966436246, "grad_norm": 0.6953125, "learning_rate": 1.0080484225149659e-05, "loss": 4.0368, "step": 12202 }, { "epoch": 4.064962105438494, "grad_norm": 0.71875, "learning_rate": 1.0079724191099837e-05, "loss": 4.0029, "step": 12203 }, { "epoch": 4.065295244440743, "grad_norm": 0.69921875, "learning_rate": 1.0078964127001817e-05, "loss": 4.0096, "step": 12204 }, { "epoch": 4.065628383442991, "grad_norm": 0.6953125, "learning_rate": 1.0078204032864454e-05, "loss": 4.0006, "step": 12205 }, { "epoch": 4.065961522445241, "grad_norm": 0.67578125, "learning_rate": 1.0077443908696596e-05, "loss": 3.9932, "step": 12206 }, { "epoch": 4.066294661447489, "grad_norm": 0.70703125, "learning_rate": 1.0076683754507098e-05, "loss": 3.9886, "step": 12207 }, { "epoch": 4.066627800449738, "grad_norm": 0.68359375, "learning_rate": 1.0075923570304818e-05, "loss": 4.0094, "step": 12208 }, { "epoch": 4.066960939451986, "grad_norm": 0.74609375, "learning_rate": 1.0075163356098606e-05, "loss": 4.0151, "step": 12209 }, { "epoch": 4.067294078454235, "grad_norm": 0.703125, "learning_rate": 1.0074403111897321e-05, "loss": 3.9937, "step": 12210 }, { "epoch": 4.067627217456484, "grad_norm": 0.6796875, "learning_rate": 1.0073642837709813e-05, "loss": 4.0642, "step": 12211 }, { "epoch": 4.067960356458732, "grad_norm": 0.75390625, "learning_rate": 1.0072882533544943e-05, "loss": 3.9559, "step": 12212 }, { "epoch": 4.068293495460981, "grad_norm": 0.6875, "learning_rate": 1.007212219941156e-05, "loss": 3.945, "step": 12213 }, { "epoch": 4.06862663446323, "grad_norm": 0.67578125, "learning_rate": 1.0071361835318527e-05, "loss": 4.0708, "step": 12214 }, { "epoch": 4.068959773465479, "grad_norm": 0.69140625, "learning_rate": 1.00706014412747e-05, "loss": 3.9784, "step": 12215 }, { "epoch": 4.069292912467727, "grad_norm": 0.7109375, "learning_rate": 1.0069841017288932e-05, "loss": 4.0547, "step": 12216 }, { "epoch": 4.069626051469976, "grad_norm": 0.671875, "learning_rate": 1.0069080563370084e-05, "loss": 3.9972, "step": 12217 }, { "epoch": 4.0699591904722245, "grad_norm": 0.67578125, "learning_rate": 1.0068320079527012e-05, "loss": 3.9779, "step": 12218 }, { "epoch": 4.070292329474473, "grad_norm": 0.70703125, "learning_rate": 1.0067559565768574e-05, "loss": 3.9936, "step": 12219 }, { "epoch": 4.070625468476722, "grad_norm": 0.6875, "learning_rate": 1.0066799022103625e-05, "loss": 3.9742, "step": 12220 }, { "epoch": 4.07095860747897, "grad_norm": 0.71875, "learning_rate": 1.0066038448541033e-05, "loss": 3.9827, "step": 12221 }, { "epoch": 4.0712917464812195, "grad_norm": 0.7265625, "learning_rate": 1.0065277845089644e-05, "loss": 3.9595, "step": 12222 }, { "epoch": 4.071624885483468, "grad_norm": 0.69921875, "learning_rate": 1.006451721175833e-05, "loss": 4.0955, "step": 12223 }, { "epoch": 4.071958024485717, "grad_norm": 0.734375, "learning_rate": 1.0063756548555943e-05, "loss": 4.0385, "step": 12224 }, { "epoch": 4.072291163487965, "grad_norm": 0.7109375, "learning_rate": 1.0062995855491347e-05, "loss": 3.979, "step": 12225 }, { "epoch": 4.0726243024902145, "grad_norm": 0.68359375, "learning_rate": 1.00622351325734e-05, "loss": 4.0051, "step": 12226 }, { "epoch": 4.072957441492463, "grad_norm": 0.68359375, "learning_rate": 1.0061474379810963e-05, "loss": 4.0655, "step": 12227 }, { "epoch": 4.073290580494711, "grad_norm": 0.72265625, "learning_rate": 1.0060713597212898e-05, "loss": 4.0124, "step": 12228 }, { "epoch": 4.07362371949696, "grad_norm": 0.7265625, "learning_rate": 1.0059952784788064e-05, "loss": 4.0644, "step": 12229 }, { "epoch": 4.0739568584992085, "grad_norm": 0.703125, "learning_rate": 1.0059191942545328e-05, "loss": 3.979, "step": 12230 }, { "epoch": 4.074289997501458, "grad_norm": 0.74609375, "learning_rate": 1.0058431070493548e-05, "loss": 4.068, "step": 12231 }, { "epoch": 4.074623136503706, "grad_norm": 0.71875, "learning_rate": 1.0057670168641588e-05, "loss": 3.9483, "step": 12232 }, { "epoch": 4.074956275505955, "grad_norm": 0.71484375, "learning_rate": 1.0056909236998309e-05, "loss": 4.0036, "step": 12233 }, { "epoch": 4.0752894145082035, "grad_norm": 0.6953125, "learning_rate": 1.0056148275572577e-05, "loss": 4.0228, "step": 12234 }, { "epoch": 4.075622553510453, "grad_norm": 0.66015625, "learning_rate": 1.0055387284373253e-05, "loss": 3.9608, "step": 12235 }, { "epoch": 4.075955692512701, "grad_norm": 0.7265625, "learning_rate": 1.0054626263409203e-05, "loss": 4.0087, "step": 12236 }, { "epoch": 4.076288831514949, "grad_norm": 0.7109375, "learning_rate": 1.005386521268929e-05, "loss": 4.006, "step": 12237 }, { "epoch": 4.076621970517198, "grad_norm": 0.734375, "learning_rate": 1.0053104132222378e-05, "loss": 4.0134, "step": 12238 }, { "epoch": 4.076955109519447, "grad_norm": 0.734375, "learning_rate": 1.0052343022017335e-05, "loss": 4.0037, "step": 12239 }, { "epoch": 4.077288248521696, "grad_norm": 0.68359375, "learning_rate": 1.0051581882083022e-05, "loss": 3.9022, "step": 12240 }, { "epoch": 4.077621387523944, "grad_norm": 0.7421875, "learning_rate": 1.005082071242831e-05, "loss": 4.0014, "step": 12241 }, { "epoch": 4.077954526526193, "grad_norm": 0.6640625, "learning_rate": 1.0050059513062058e-05, "loss": 4.0292, "step": 12242 }, { "epoch": 4.078287665528442, "grad_norm": 0.73046875, "learning_rate": 1.0049298283993139e-05, "loss": 3.9789, "step": 12243 }, { "epoch": 4.07862080453069, "grad_norm": 0.62890625, "learning_rate": 1.0048537025230415e-05, "loss": 4.0682, "step": 12244 }, { "epoch": 4.078953943532939, "grad_norm": 0.6953125, "learning_rate": 1.0047775736782755e-05, "loss": 3.9382, "step": 12245 }, { "epoch": 4.0792870825351875, "grad_norm": 0.6953125, "learning_rate": 1.0047014418659026e-05, "loss": 3.9369, "step": 12246 }, { "epoch": 4.079620221537437, "grad_norm": 0.73046875, "learning_rate": 1.0046253070868094e-05, "loss": 4.0334, "step": 12247 }, { "epoch": 4.079953360539685, "grad_norm": 0.6875, "learning_rate": 1.0045491693418834e-05, "loss": 3.945, "step": 12248 }, { "epoch": 4.080286499541934, "grad_norm": 0.7265625, "learning_rate": 1.0044730286320105e-05, "loss": 4.0042, "step": 12249 }, { "epoch": 4.080619638544182, "grad_norm": 0.6953125, "learning_rate": 1.0043968849580783e-05, "loss": 4.0396, "step": 12250 }, { "epoch": 4.080952777546432, "grad_norm": 0.69921875, "learning_rate": 1.0043207383209731e-05, "loss": 3.966, "step": 12251 }, { "epoch": 4.08128591654868, "grad_norm": 0.71875, "learning_rate": 1.0042445887215825e-05, "loss": 4.0492, "step": 12252 }, { "epoch": 4.081619055550928, "grad_norm": 0.6953125, "learning_rate": 1.0041684361607927e-05, "loss": 3.9113, "step": 12253 }, { "epoch": 4.081952194553177, "grad_norm": 0.71484375, "learning_rate": 1.0040922806394918e-05, "loss": 4.0182, "step": 12254 }, { "epoch": 4.082285333555426, "grad_norm": 0.7265625, "learning_rate": 1.0040161221585655e-05, "loss": 3.9782, "step": 12255 }, { "epoch": 4.082618472557675, "grad_norm": 0.72265625, "learning_rate": 1.0039399607189019e-05, "loss": 3.9719, "step": 12256 }, { "epoch": 4.082951611559923, "grad_norm": 0.6875, "learning_rate": 1.0038637963213877e-05, "loss": 3.9991, "step": 12257 }, { "epoch": 4.083284750562172, "grad_norm": 0.73046875, "learning_rate": 1.0037876289669103e-05, "loss": 3.951, "step": 12258 }, { "epoch": 4.083617889564421, "grad_norm": 0.7109375, "learning_rate": 1.0037114586563566e-05, "loss": 3.9838, "step": 12259 }, { "epoch": 4.08395102856667, "grad_norm": 0.71875, "learning_rate": 1.003635285390614e-05, "loss": 4.035, "step": 12260 }, { "epoch": 4.084284167568918, "grad_norm": 0.6953125, "learning_rate": 1.0035591091705695e-05, "loss": 4.0548, "step": 12261 }, { "epoch": 4.084617306571166, "grad_norm": 0.69140625, "learning_rate": 1.0034829299971108e-05, "loss": 3.9955, "step": 12262 }, { "epoch": 4.084950445573416, "grad_norm": 0.703125, "learning_rate": 1.0034067478711252e-05, "loss": 3.9474, "step": 12263 }, { "epoch": 4.085283584575664, "grad_norm": 0.70703125, "learning_rate": 1.0033305627934995e-05, "loss": 3.997, "step": 12264 }, { "epoch": 4.085616723577913, "grad_norm": 0.71484375, "learning_rate": 1.0032543747651216e-05, "loss": 4.0155, "step": 12265 }, { "epoch": 4.085949862580161, "grad_norm": 0.6796875, "learning_rate": 1.0031781837868785e-05, "loss": 3.941, "step": 12266 }, { "epoch": 4.0862830015824105, "grad_norm": 0.7109375, "learning_rate": 1.0031019898596584e-05, "loss": 4.0072, "step": 12267 }, { "epoch": 4.086616140584659, "grad_norm": 0.69921875, "learning_rate": 1.0030257929843481e-05, "loss": 4.005, "step": 12268 }, { "epoch": 4.086949279586908, "grad_norm": 0.68359375, "learning_rate": 1.0029495931618353e-05, "loss": 3.9956, "step": 12269 }, { "epoch": 4.087282418589156, "grad_norm": 0.6953125, "learning_rate": 1.002873390393008e-05, "loss": 4.0314, "step": 12270 }, { "epoch": 4.087615557591405, "grad_norm": 0.70703125, "learning_rate": 1.002797184678753e-05, "loss": 4.0143, "step": 12271 }, { "epoch": 4.087948696593654, "grad_norm": 0.71875, "learning_rate": 1.0027209760199587e-05, "loss": 3.9463, "step": 12272 }, { "epoch": 4.088281835595902, "grad_norm": 0.6953125, "learning_rate": 1.002644764417512e-05, "loss": 4.0261, "step": 12273 }, { "epoch": 4.088614974598151, "grad_norm": 0.71875, "learning_rate": 1.0025685498723015e-05, "loss": 3.942, "step": 12274 }, { "epoch": 4.0889481136004, "grad_norm": 0.6953125, "learning_rate": 1.0024923323852142e-05, "loss": 3.969, "step": 12275 }, { "epoch": 4.089281252602649, "grad_norm": 0.69140625, "learning_rate": 1.002416111957138e-05, "loss": 3.9889, "step": 12276 }, { "epoch": 4.089614391604897, "grad_norm": 0.70703125, "learning_rate": 1.002339888588961e-05, "loss": 3.9588, "step": 12277 }, { "epoch": 4.089947530607146, "grad_norm": 0.7265625, "learning_rate": 1.002263662281571e-05, "loss": 3.9782, "step": 12278 }, { "epoch": 4.0902806696093945, "grad_norm": 0.70703125, "learning_rate": 1.0021874330358554e-05, "loss": 3.9509, "step": 12279 }, { "epoch": 4.090613808611643, "grad_norm": 0.75, "learning_rate": 1.0021112008527026e-05, "loss": 3.9279, "step": 12280 }, { "epoch": 4.090946947613892, "grad_norm": 0.73046875, "learning_rate": 1.0020349657330005e-05, "loss": 3.9878, "step": 12281 }, { "epoch": 4.09128008661614, "grad_norm": 0.72265625, "learning_rate": 1.0019587276776366e-05, "loss": 3.9713, "step": 12282 }, { "epoch": 4.0916132256183895, "grad_norm": 0.7109375, "learning_rate": 1.0018824866874998e-05, "loss": 3.9774, "step": 12283 }, { "epoch": 4.091946364620638, "grad_norm": 0.6875, "learning_rate": 1.001806242763477e-05, "loss": 3.9596, "step": 12284 }, { "epoch": 4.092279503622887, "grad_norm": 0.7265625, "learning_rate": 1.0017299959064573e-05, "loss": 4.0082, "step": 12285 }, { "epoch": 4.092612642625135, "grad_norm": 0.66796875, "learning_rate": 1.0016537461173284e-05, "loss": 4.049, "step": 12286 }, { "epoch": 4.092945781627384, "grad_norm": 0.7421875, "learning_rate": 1.0015774933969784e-05, "loss": 3.9266, "step": 12287 }, { "epoch": 4.093278920629633, "grad_norm": 0.72265625, "learning_rate": 1.0015012377462954e-05, "loss": 3.9984, "step": 12288 }, { "epoch": 4.093612059631881, "grad_norm": 0.6953125, "learning_rate": 1.0014249791661679e-05, "loss": 3.9968, "step": 12289 }, { "epoch": 4.09394519863413, "grad_norm": 0.69921875, "learning_rate": 1.0013487176574839e-05, "loss": 4.0217, "step": 12290 }, { "epoch": 4.0942783376363785, "grad_norm": 0.70703125, "learning_rate": 1.0012724532211315e-05, "loss": 4.047, "step": 12291 }, { "epoch": 4.094611476638628, "grad_norm": 0.703125, "learning_rate": 1.0011961858579997e-05, "loss": 3.9935, "step": 12292 }, { "epoch": 4.094944615640876, "grad_norm": 0.703125, "learning_rate": 1.001119915568976e-05, "loss": 3.9196, "step": 12293 }, { "epoch": 4.095277754643125, "grad_norm": 0.69921875, "learning_rate": 1.0010436423549494e-05, "loss": 3.9554, "step": 12294 }, { "epoch": 4.0956108936453735, "grad_norm": 0.7109375, "learning_rate": 1.0009673662168082e-05, "loss": 3.9763, "step": 12295 }, { "epoch": 4.095944032647623, "grad_norm": 0.6640625, "learning_rate": 1.000891087155441e-05, "loss": 3.9928, "step": 12296 }, { "epoch": 4.096277171649871, "grad_norm": 0.68359375, "learning_rate": 1.0008148051717356e-05, "loss": 3.9043, "step": 12297 }, { "epoch": 4.096610310652119, "grad_norm": 0.69140625, "learning_rate": 1.0007385202665812e-05, "loss": 4.0783, "step": 12298 }, { "epoch": 4.096943449654368, "grad_norm": 0.71484375, "learning_rate": 1.0006622324408662e-05, "loss": 3.977, "step": 12299 }, { "epoch": 4.097276588656617, "grad_norm": 0.69140625, "learning_rate": 1.000585941695479e-05, "loss": 4.0021, "step": 12300 }, { "epoch": 4.097609727658866, "grad_norm": 0.6796875, "learning_rate": 1.0005096480313083e-05, "loss": 3.9598, "step": 12301 }, { "epoch": 4.097942866661114, "grad_norm": 0.71875, "learning_rate": 1.000433351449243e-05, "loss": 4.0466, "step": 12302 }, { "epoch": 4.098276005663363, "grad_norm": 0.6875, "learning_rate": 1.0003570519501713e-05, "loss": 4.0011, "step": 12303 }, { "epoch": 4.098609144665612, "grad_norm": 0.72265625, "learning_rate": 1.0002807495349825e-05, "loss": 3.9856, "step": 12304 }, { "epoch": 4.098942283667861, "grad_norm": 0.67578125, "learning_rate": 1.0002044442045652e-05, "loss": 3.9836, "step": 12305 }, { "epoch": 4.099275422670109, "grad_norm": 0.7421875, "learning_rate": 1.0001281359598077e-05, "loss": 3.9972, "step": 12306 }, { "epoch": 4.099608561672357, "grad_norm": 0.7109375, "learning_rate": 1.0000518248015993e-05, "loss": 3.9799, "step": 12307 }, { "epoch": 4.099941700674607, "grad_norm": 0.74609375, "learning_rate": 9.999755107308289e-06, "loss": 4.0196, "step": 12308 }, { "epoch": 4.100274839676855, "grad_norm": 0.765625, "learning_rate": 9.998991937483853e-06, "loss": 3.9915, "step": 12309 }, { "epoch": 4.100607978679104, "grad_norm": 0.7265625, "learning_rate": 9.99822873855157e-06, "loss": 4.056, "step": 12310 }, { "epoch": 4.100941117681352, "grad_norm": 0.6875, "learning_rate": 9.997465510520337e-06, "loss": 3.9726, "step": 12311 }, { "epoch": 4.101274256683602, "grad_norm": 0.71484375, "learning_rate": 9.996702253399036e-06, "loss": 3.986, "step": 12312 }, { "epoch": 4.10160739568585, "grad_norm": 0.6796875, "learning_rate": 9.995938967196566e-06, "loss": 3.9622, "step": 12313 }, { "epoch": 4.101940534688099, "grad_norm": 0.69921875, "learning_rate": 9.995175651921814e-06, "loss": 4.0491, "step": 12314 }, { "epoch": 4.102273673690347, "grad_norm": 0.73828125, "learning_rate": 9.994412307583665e-06, "loss": 4.0625, "step": 12315 }, { "epoch": 4.102606812692596, "grad_norm": 0.75, "learning_rate": 9.993648934191018e-06, "loss": 4.0242, "step": 12316 }, { "epoch": 4.102939951694845, "grad_norm": 0.72265625, "learning_rate": 9.992885531752764e-06, "loss": 3.9244, "step": 12317 }, { "epoch": 4.103273090697093, "grad_norm": 0.6953125, "learning_rate": 9.992122100277792e-06, "loss": 3.9827, "step": 12318 }, { "epoch": 4.103606229699342, "grad_norm": 0.7421875, "learning_rate": 9.991358639774994e-06, "loss": 4.0056, "step": 12319 }, { "epoch": 4.103939368701591, "grad_norm": 0.71484375, "learning_rate": 9.990595150253265e-06, "loss": 3.949, "step": 12320 }, { "epoch": 4.10427250770384, "grad_norm": 0.7421875, "learning_rate": 9.989831631721497e-06, "loss": 4.0512, "step": 12321 }, { "epoch": 4.104605646706088, "grad_norm": 0.69140625, "learning_rate": 9.989068084188586e-06, "loss": 3.9842, "step": 12322 }, { "epoch": 4.104938785708336, "grad_norm": 0.703125, "learning_rate": 9.98830450766342e-06, "loss": 4.0507, "step": 12323 }, { "epoch": 4.1052719247105856, "grad_norm": 0.6875, "learning_rate": 9.987540902154897e-06, "loss": 3.9988, "step": 12324 }, { "epoch": 4.105605063712834, "grad_norm": 0.74609375, "learning_rate": 9.98677726767191e-06, "loss": 4.0331, "step": 12325 }, { "epoch": 4.105938202715083, "grad_norm": 0.6796875, "learning_rate": 9.986013604223355e-06, "loss": 4.0387, "step": 12326 }, { "epoch": 4.106271341717331, "grad_norm": 0.69921875, "learning_rate": 9.985249911818126e-06, "loss": 3.9599, "step": 12327 }, { "epoch": 4.1066044807195805, "grad_norm": 0.70703125, "learning_rate": 9.984486190465117e-06, "loss": 4.0026, "step": 12328 }, { "epoch": 4.106937619721829, "grad_norm": 0.69921875, "learning_rate": 9.983722440173227e-06, "loss": 4.0148, "step": 12329 }, { "epoch": 4.107270758724078, "grad_norm": 0.70703125, "learning_rate": 9.982958660951353e-06, "loss": 3.9906, "step": 12330 }, { "epoch": 4.107603897726326, "grad_norm": 0.68359375, "learning_rate": 9.982194852808385e-06, "loss": 3.9984, "step": 12331 }, { "epoch": 4.107937036728575, "grad_norm": 0.7421875, "learning_rate": 9.981431015753224e-06, "loss": 3.97, "step": 12332 }, { "epoch": 4.108270175730824, "grad_norm": 0.73828125, "learning_rate": 9.980667149794765e-06, "loss": 4.0287, "step": 12333 }, { "epoch": 4.108603314733072, "grad_norm": 0.71875, "learning_rate": 9.979903254941908e-06, "loss": 3.9482, "step": 12334 }, { "epoch": 4.108936453735321, "grad_norm": 0.703125, "learning_rate": 9.979139331203553e-06, "loss": 4.0175, "step": 12335 }, { "epoch": 4.1092695927375695, "grad_norm": 0.6953125, "learning_rate": 9.97837537858859e-06, "loss": 4.0473, "step": 12336 }, { "epoch": 4.109602731739819, "grad_norm": 0.6953125, "learning_rate": 9.977611397105923e-06, "loss": 3.9942, "step": 12337 }, { "epoch": 4.109935870742067, "grad_norm": 0.69921875, "learning_rate": 9.976847386764452e-06, "loss": 4.0075, "step": 12338 }, { "epoch": 4.110269009744316, "grad_norm": 0.7109375, "learning_rate": 9.976083347573071e-06, "loss": 4.0134, "step": 12339 }, { "epoch": 4.1106021487465645, "grad_norm": 0.71484375, "learning_rate": 9.975319279540684e-06, "loss": 4.0135, "step": 12340 }, { "epoch": 4.110935287748813, "grad_norm": 0.79296875, "learning_rate": 9.974555182676189e-06, "loss": 4.0253, "step": 12341 }, { "epoch": 4.111268426751062, "grad_norm": 0.6953125, "learning_rate": 9.973791056988483e-06, "loss": 4.0141, "step": 12342 }, { "epoch": 4.11160156575331, "grad_norm": 0.6953125, "learning_rate": 9.973026902486472e-06, "loss": 3.9814, "step": 12343 }, { "epoch": 4.1119347047555594, "grad_norm": 0.7265625, "learning_rate": 9.972262719179055e-06, "loss": 4.0087, "step": 12344 }, { "epoch": 4.112267843757808, "grad_norm": 0.70703125, "learning_rate": 9.971498507075131e-06, "loss": 4.0256, "step": 12345 }, { "epoch": 4.112600982760057, "grad_norm": 0.72265625, "learning_rate": 9.970734266183603e-06, "loss": 4.0642, "step": 12346 }, { "epoch": 4.112934121762305, "grad_norm": 0.70703125, "learning_rate": 9.969969996513369e-06, "loss": 3.9885, "step": 12347 }, { "epoch": 4.113267260764554, "grad_norm": 0.6796875, "learning_rate": 9.96920569807334e-06, "loss": 4.0539, "step": 12348 }, { "epoch": 4.113600399766803, "grad_norm": 0.6640625, "learning_rate": 9.96844137087241e-06, "loss": 4.0486, "step": 12349 }, { "epoch": 4.113933538769051, "grad_norm": 0.71875, "learning_rate": 9.967677014919484e-06, "loss": 3.9928, "step": 12350 }, { "epoch": 4.1142666777713, "grad_norm": 0.7109375, "learning_rate": 9.966912630223467e-06, "loss": 4.0123, "step": 12351 }, { "epoch": 4.1145998167735485, "grad_norm": 0.68359375, "learning_rate": 9.96614821679326e-06, "loss": 3.9666, "step": 12352 }, { "epoch": 4.114932955775798, "grad_norm": 0.67578125, "learning_rate": 9.965383774637771e-06, "loss": 4.0242, "step": 12353 }, { "epoch": 4.115266094778046, "grad_norm": 0.69921875, "learning_rate": 9.964619303765898e-06, "loss": 4.0295, "step": 12354 }, { "epoch": 4.115599233780295, "grad_norm": 0.69140625, "learning_rate": 9.96385480418655e-06, "loss": 4.0435, "step": 12355 }, { "epoch": 4.115932372782543, "grad_norm": 0.6953125, "learning_rate": 9.963090275908628e-06, "loss": 4.0567, "step": 12356 }, { "epoch": 4.116265511784793, "grad_norm": 0.74609375, "learning_rate": 9.962325718941043e-06, "loss": 3.9696, "step": 12357 }, { "epoch": 4.116598650787041, "grad_norm": 0.703125, "learning_rate": 9.961561133292696e-06, "loss": 4.0551, "step": 12358 }, { "epoch": 4.116931789789289, "grad_norm": 0.73828125, "learning_rate": 9.96079651897249e-06, "loss": 3.9825, "step": 12359 }, { "epoch": 4.117264928791538, "grad_norm": 0.7109375, "learning_rate": 9.960031875989337e-06, "loss": 3.987, "step": 12360 }, { "epoch": 4.117598067793787, "grad_norm": 0.70703125, "learning_rate": 9.959267204352141e-06, "loss": 4.021, "step": 12361 }, { "epoch": 4.117931206796036, "grad_norm": 0.6875, "learning_rate": 9.95850250406981e-06, "loss": 4.0741, "step": 12362 }, { "epoch": 4.118264345798284, "grad_norm": 0.7265625, "learning_rate": 9.957737775151247e-06, "loss": 4.0419, "step": 12363 }, { "epoch": 4.118597484800533, "grad_norm": 0.76171875, "learning_rate": 9.956973017605366e-06, "loss": 4.038, "step": 12364 }, { "epoch": 4.118930623802782, "grad_norm": 0.71484375, "learning_rate": 9.956208231441068e-06, "loss": 3.9372, "step": 12365 }, { "epoch": 4.119263762805031, "grad_norm": 0.72265625, "learning_rate": 9.955443416667268e-06, "loss": 4.0738, "step": 12366 }, { "epoch": 4.119596901807279, "grad_norm": 0.71484375, "learning_rate": 9.954678573292867e-06, "loss": 3.9626, "step": 12367 }, { "epoch": 4.119930040809527, "grad_norm": 0.7109375, "learning_rate": 9.953913701326779e-06, "loss": 4.0074, "step": 12368 }, { "epoch": 4.120263179811777, "grad_norm": 0.6953125, "learning_rate": 9.95314880077791e-06, "loss": 3.9618, "step": 12369 }, { "epoch": 4.120596318814025, "grad_norm": 0.71484375, "learning_rate": 9.952383871655173e-06, "loss": 4.0409, "step": 12370 }, { "epoch": 4.120929457816274, "grad_norm": 0.73046875, "learning_rate": 9.951618913967477e-06, "loss": 3.9948, "step": 12371 }, { "epoch": 4.121262596818522, "grad_norm": 0.734375, "learning_rate": 9.950853927723725e-06, "loss": 4.0239, "step": 12372 }, { "epoch": 4.1215957358207715, "grad_norm": 0.73828125, "learning_rate": 9.95008891293284e-06, "loss": 3.9935, "step": 12373 }, { "epoch": 4.12192887482302, "grad_norm": 0.72265625, "learning_rate": 9.94932386960372e-06, "loss": 4.0519, "step": 12374 }, { "epoch": 4.122262013825269, "grad_norm": 0.703125, "learning_rate": 9.948558797745288e-06, "loss": 4.0019, "step": 12375 }, { "epoch": 4.122595152827517, "grad_norm": 0.703125, "learning_rate": 9.947793697366446e-06, "loss": 4.007, "step": 12376 }, { "epoch": 4.122928291829766, "grad_norm": 0.71484375, "learning_rate": 9.94702856847611e-06, "loss": 3.8941, "step": 12377 }, { "epoch": 4.123261430832015, "grad_norm": 0.72265625, "learning_rate": 9.94626341108319e-06, "loss": 3.9325, "step": 12378 }, { "epoch": 4.123594569834263, "grad_norm": 0.70703125, "learning_rate": 9.945498225196604e-06, "loss": 3.9874, "step": 12379 }, { "epoch": 4.123927708836512, "grad_norm": 0.72265625, "learning_rate": 9.944733010825259e-06, "loss": 3.9935, "step": 12380 }, { "epoch": 4.124260847838761, "grad_norm": 0.6796875, "learning_rate": 9.943967767978066e-06, "loss": 4.0497, "step": 12381 }, { "epoch": 4.12459398684101, "grad_norm": 0.7109375, "learning_rate": 9.943202496663946e-06, "loss": 4.0395, "step": 12382 }, { "epoch": 4.124927125843258, "grad_norm": 0.69921875, "learning_rate": 9.942437196891807e-06, "loss": 4.0251, "step": 12383 }, { "epoch": 4.125260264845506, "grad_norm": 0.6875, "learning_rate": 9.941671868670569e-06, "loss": 3.9677, "step": 12384 }, { "epoch": 4.1255934038477555, "grad_norm": 0.69921875, "learning_rate": 9.940906512009136e-06, "loss": 4.0548, "step": 12385 }, { "epoch": 4.125926542850004, "grad_norm": 0.703125, "learning_rate": 9.940141126916434e-06, "loss": 3.9801, "step": 12386 }, { "epoch": 4.126259681852253, "grad_norm": 0.6640625, "learning_rate": 9.93937571340137e-06, "loss": 4.0187, "step": 12387 }, { "epoch": 4.126592820854501, "grad_norm": 0.71484375, "learning_rate": 9.938610271472864e-06, "loss": 4.0123, "step": 12388 }, { "epoch": 4.1269259598567505, "grad_norm": 0.73828125, "learning_rate": 9.937844801139832e-06, "loss": 3.974, "step": 12389 }, { "epoch": 4.127259098858999, "grad_norm": 0.7109375, "learning_rate": 9.937079302411187e-06, "loss": 4.0875, "step": 12390 }, { "epoch": 4.127592237861248, "grad_norm": 0.7109375, "learning_rate": 9.936313775295845e-06, "loss": 3.9884, "step": 12391 }, { "epoch": 4.127925376863496, "grad_norm": 0.7109375, "learning_rate": 9.935548219802727e-06, "loss": 3.9675, "step": 12392 }, { "epoch": 4.128258515865745, "grad_norm": 0.6953125, "learning_rate": 9.934782635940749e-06, "loss": 3.9261, "step": 12393 }, { "epoch": 4.128591654867994, "grad_norm": 0.69921875, "learning_rate": 9.934017023718823e-06, "loss": 4.0305, "step": 12394 }, { "epoch": 4.128924793870242, "grad_norm": 0.6875, "learning_rate": 9.933251383145873e-06, "loss": 3.9902, "step": 12395 }, { "epoch": 4.129257932872491, "grad_norm": 0.69921875, "learning_rate": 9.932485714230815e-06, "loss": 3.9754, "step": 12396 }, { "epoch": 4.1295910718747395, "grad_norm": 0.66015625, "learning_rate": 9.931720016982565e-06, "loss": 4.0411, "step": 12397 }, { "epoch": 4.129924210876989, "grad_norm": 0.7109375, "learning_rate": 9.930954291410047e-06, "loss": 4.0291, "step": 12398 }, { "epoch": 4.130257349879237, "grad_norm": 0.68359375, "learning_rate": 9.930188537522178e-06, "loss": 3.9789, "step": 12399 }, { "epoch": 4.130590488881486, "grad_norm": 0.734375, "learning_rate": 9.92942275532787e-06, "loss": 4.0127, "step": 12400 }, { "epoch": 4.1309236278837345, "grad_norm": 0.69921875, "learning_rate": 9.928656944836054e-06, "loss": 4.0658, "step": 12401 }, { "epoch": 4.131256766885983, "grad_norm": 0.74609375, "learning_rate": 9.927891106055644e-06, "loss": 3.98, "step": 12402 }, { "epoch": 4.131589905888232, "grad_norm": 0.77734375, "learning_rate": 9.92712523899556e-06, "loss": 4.0018, "step": 12403 }, { "epoch": 4.13192304489048, "grad_norm": 0.65625, "learning_rate": 9.926359343664728e-06, "loss": 3.9838, "step": 12404 }, { "epoch": 4.132256183892729, "grad_norm": 0.71484375, "learning_rate": 9.925593420072061e-06, "loss": 4.0022, "step": 12405 }, { "epoch": 4.132589322894978, "grad_norm": 0.71484375, "learning_rate": 9.924827468226485e-06, "loss": 4.0245, "step": 12406 }, { "epoch": 4.132922461897227, "grad_norm": 0.703125, "learning_rate": 9.924061488136924e-06, "loss": 4.0313, "step": 12407 }, { "epoch": 4.133255600899475, "grad_norm": 0.68359375, "learning_rate": 9.923295479812296e-06, "loss": 4.0134, "step": 12408 }, { "epoch": 4.133588739901724, "grad_norm": 0.75390625, "learning_rate": 9.922529443261523e-06, "loss": 4.0163, "step": 12409 }, { "epoch": 4.133921878903973, "grad_norm": 0.6875, "learning_rate": 9.92176337849353e-06, "loss": 4.0244, "step": 12410 }, { "epoch": 4.134255017906221, "grad_norm": 0.703125, "learning_rate": 9.92099728551724e-06, "loss": 4.0341, "step": 12411 }, { "epoch": 4.13458815690847, "grad_norm": 0.73828125, "learning_rate": 9.920231164341575e-06, "loss": 3.9932, "step": 12412 }, { "epoch": 4.134921295910718, "grad_norm": 0.67578125, "learning_rate": 9.919465014975458e-06, "loss": 4.0001, "step": 12413 }, { "epoch": 4.135254434912968, "grad_norm": 0.7265625, "learning_rate": 9.918698837427816e-06, "loss": 3.9794, "step": 12414 }, { "epoch": 4.135587573915216, "grad_norm": 0.6953125, "learning_rate": 9.91793263170757e-06, "loss": 4.063, "step": 12415 }, { "epoch": 4.135920712917465, "grad_norm": 0.76171875, "learning_rate": 9.917166397823646e-06, "loss": 3.9357, "step": 12416 }, { "epoch": 4.136253851919713, "grad_norm": 0.70703125, "learning_rate": 9.91640013578497e-06, "loss": 3.9551, "step": 12417 }, { "epoch": 4.136586990921963, "grad_norm": 0.7265625, "learning_rate": 9.915633845600463e-06, "loss": 3.9692, "step": 12418 }, { "epoch": 4.136920129924211, "grad_norm": 0.7421875, "learning_rate": 9.91486752727906e-06, "loss": 3.9983, "step": 12419 }, { "epoch": 4.137253268926459, "grad_norm": 0.7265625, "learning_rate": 9.914101180829677e-06, "loss": 4.0589, "step": 12420 }, { "epoch": 4.137586407928708, "grad_norm": 0.6640625, "learning_rate": 9.913334806261247e-06, "loss": 4.0353, "step": 12421 }, { "epoch": 4.137919546930957, "grad_norm": 0.70703125, "learning_rate": 9.91256840358269e-06, "loss": 4.0181, "step": 12422 }, { "epoch": 4.138252685933206, "grad_norm": 0.703125, "learning_rate": 9.911801972802937e-06, "loss": 4.0683, "step": 12423 }, { "epoch": 4.138585824935454, "grad_norm": 0.703125, "learning_rate": 9.911035513930918e-06, "loss": 4.0123, "step": 12424 }, { "epoch": 4.138918963937703, "grad_norm": 0.7421875, "learning_rate": 9.910269026975554e-06, "loss": 4.0196, "step": 12425 }, { "epoch": 4.139252102939952, "grad_norm": 0.69921875, "learning_rate": 9.90950251194578e-06, "loss": 4.0158, "step": 12426 }, { "epoch": 4.139585241942201, "grad_norm": 0.6875, "learning_rate": 9.908735968850518e-06, "loss": 4.0686, "step": 12427 }, { "epoch": 4.139918380944449, "grad_norm": 0.73046875, "learning_rate": 9.907969397698702e-06, "loss": 4.0006, "step": 12428 }, { "epoch": 4.140251519946697, "grad_norm": 0.734375, "learning_rate": 9.907202798499255e-06, "loss": 4.0035, "step": 12429 }, { "epoch": 4.140584658948947, "grad_norm": 0.73828125, "learning_rate": 9.906436171261113e-06, "loss": 3.943, "step": 12430 }, { "epoch": 4.140917797951195, "grad_norm": 0.71484375, "learning_rate": 9.905669515993197e-06, "loss": 4.0662, "step": 12431 }, { "epoch": 4.141250936953444, "grad_norm": 0.703125, "learning_rate": 9.904902832704448e-06, "loss": 3.9663, "step": 12432 }, { "epoch": 4.141584075955692, "grad_norm": 0.73828125, "learning_rate": 9.904136121403785e-06, "loss": 3.985, "step": 12433 }, { "epoch": 4.1419172149579415, "grad_norm": 0.7109375, "learning_rate": 9.903369382100146e-06, "loss": 3.9707, "step": 12434 }, { "epoch": 4.14225035396019, "grad_norm": 0.71875, "learning_rate": 9.902602614802457e-06, "loss": 3.9708, "step": 12435 }, { "epoch": 4.142583492962439, "grad_norm": 0.703125, "learning_rate": 9.901835819519655e-06, "loss": 4.0106, "step": 12436 }, { "epoch": 4.142916631964687, "grad_norm": 0.703125, "learning_rate": 9.901068996260666e-06, "loss": 4.0495, "step": 12437 }, { "epoch": 4.143249770966936, "grad_norm": 0.69921875, "learning_rate": 9.900302145034424e-06, "loss": 3.9507, "step": 12438 }, { "epoch": 4.143582909969185, "grad_norm": 0.703125, "learning_rate": 9.899535265849862e-06, "loss": 3.9748, "step": 12439 }, { "epoch": 4.143916048971433, "grad_norm": 0.671875, "learning_rate": 9.89876835871591e-06, "loss": 4.022, "step": 12440 }, { "epoch": 4.144249187973682, "grad_norm": 0.67578125, "learning_rate": 9.898001423641504e-06, "loss": 4.0489, "step": 12441 }, { "epoch": 4.1445823269759305, "grad_norm": 0.73828125, "learning_rate": 9.897234460635572e-06, "loss": 3.9764, "step": 12442 }, { "epoch": 4.14491546597818, "grad_norm": 0.74609375, "learning_rate": 9.896467469707056e-06, "loss": 3.9622, "step": 12443 }, { "epoch": 4.145248604980428, "grad_norm": 0.6953125, "learning_rate": 9.895700450864881e-06, "loss": 3.9936, "step": 12444 }, { "epoch": 4.145581743982676, "grad_norm": 0.70703125, "learning_rate": 9.894933404117987e-06, "loss": 4.0819, "step": 12445 }, { "epoch": 4.1459148829849255, "grad_norm": 0.7109375, "learning_rate": 9.894166329475304e-06, "loss": 4.008, "step": 12446 }, { "epoch": 4.146248021987174, "grad_norm": 0.7265625, "learning_rate": 9.893399226945771e-06, "loss": 3.9315, "step": 12447 }, { "epoch": 4.146581160989423, "grad_norm": 0.6796875, "learning_rate": 9.89263209653832e-06, "loss": 3.9509, "step": 12448 }, { "epoch": 4.146914299991671, "grad_norm": 0.70703125, "learning_rate": 9.891864938261888e-06, "loss": 4.0525, "step": 12449 }, { "epoch": 4.1472474389939205, "grad_norm": 0.71484375, "learning_rate": 9.89109775212541e-06, "loss": 3.9664, "step": 12450 }, { "epoch": 4.147580577996169, "grad_norm": 0.72265625, "learning_rate": 9.890330538137822e-06, "loss": 3.9783, "step": 12451 }, { "epoch": 4.147913716998418, "grad_norm": 0.75, "learning_rate": 9.889563296308064e-06, "loss": 4.0124, "step": 12452 }, { "epoch": 4.148246856000666, "grad_norm": 0.70703125, "learning_rate": 9.888796026645066e-06, "loss": 3.9718, "step": 12453 }, { "epoch": 4.148579995002915, "grad_norm": 0.73046875, "learning_rate": 9.888028729157768e-06, "loss": 3.9372, "step": 12454 }, { "epoch": 4.148913134005164, "grad_norm": 0.73828125, "learning_rate": 9.887261403855109e-06, "loss": 3.987, "step": 12455 }, { "epoch": 4.149246273007412, "grad_norm": 0.7265625, "learning_rate": 9.886494050746026e-06, "loss": 4.0298, "step": 12456 }, { "epoch": 4.149579412009661, "grad_norm": 0.6875, "learning_rate": 9.885726669839456e-06, "loss": 3.9577, "step": 12457 }, { "epoch": 4.1499125510119095, "grad_norm": 0.71484375, "learning_rate": 9.884959261144338e-06, "loss": 4.0397, "step": 12458 }, { "epoch": 4.150245690014159, "grad_norm": 0.765625, "learning_rate": 9.884191824669608e-06, "loss": 3.9973, "step": 12459 }, { "epoch": 4.150578829016407, "grad_norm": 0.73046875, "learning_rate": 9.88342436042421e-06, "loss": 4.0339, "step": 12460 }, { "epoch": 4.150911968018656, "grad_norm": 0.6796875, "learning_rate": 9.88265686841708e-06, "loss": 4.0194, "step": 12461 }, { "epoch": 4.151245107020904, "grad_norm": 0.71484375, "learning_rate": 9.881889348657158e-06, "loss": 3.9546, "step": 12462 }, { "epoch": 4.151578246023153, "grad_norm": 0.7109375, "learning_rate": 9.881121801153384e-06, "loss": 4.0286, "step": 12463 }, { "epoch": 4.151911385025402, "grad_norm": 0.703125, "learning_rate": 9.880354225914699e-06, "loss": 4.0303, "step": 12464 }, { "epoch": 4.15224452402765, "grad_norm": 0.72265625, "learning_rate": 9.879586622950045e-06, "loss": 3.9757, "step": 12465 }, { "epoch": 4.152577663029899, "grad_norm": 0.71484375, "learning_rate": 9.878818992268357e-06, "loss": 3.9565, "step": 12466 }, { "epoch": 4.152910802032148, "grad_norm": 0.69921875, "learning_rate": 9.878051333878582e-06, "loss": 4.0475, "step": 12467 }, { "epoch": 4.153243941034397, "grad_norm": 0.6796875, "learning_rate": 9.87728364778966e-06, "loss": 4.037, "step": 12468 }, { "epoch": 4.153577080036645, "grad_norm": 0.73046875, "learning_rate": 9.876515934010534e-06, "loss": 4.0116, "step": 12469 }, { "epoch": 4.153910219038894, "grad_norm": 0.71875, "learning_rate": 9.875748192550144e-06, "loss": 4.004, "step": 12470 }, { "epoch": 4.154243358041143, "grad_norm": 0.69140625, "learning_rate": 9.874980423417432e-06, "loss": 4.0355, "step": 12471 }, { "epoch": 4.154576497043391, "grad_norm": 0.70703125, "learning_rate": 9.874212626621345e-06, "loss": 4.0336, "step": 12472 }, { "epoch": 4.15490963604564, "grad_norm": 0.72265625, "learning_rate": 9.87344480217082e-06, "loss": 3.968, "step": 12473 }, { "epoch": 4.155242775047888, "grad_norm": 0.73828125, "learning_rate": 9.872676950074807e-06, "loss": 4.0386, "step": 12474 }, { "epoch": 4.155575914050138, "grad_norm": 0.70703125, "learning_rate": 9.871909070342245e-06, "loss": 3.9811, "step": 12475 }, { "epoch": 4.155909053052386, "grad_norm": 0.72265625, "learning_rate": 9.871141162982082e-06, "loss": 3.9374, "step": 12476 }, { "epoch": 4.156242192054635, "grad_norm": 0.73046875, "learning_rate": 9.870373228003257e-06, "loss": 4.0116, "step": 12477 }, { "epoch": 4.156575331056883, "grad_norm": 0.68359375, "learning_rate": 9.869605265414723e-06, "loss": 3.9996, "step": 12478 }, { "epoch": 4.1569084700591326, "grad_norm": 0.76171875, "learning_rate": 9.868837275225413e-06, "loss": 3.9885, "step": 12479 }, { "epoch": 4.157241609061381, "grad_norm": 0.7265625, "learning_rate": 9.868069257444285e-06, "loss": 4.0007, "step": 12480 }, { "epoch": 4.157574748063629, "grad_norm": 0.70703125, "learning_rate": 9.867301212080278e-06, "loss": 3.9771, "step": 12481 }, { "epoch": 4.157907887065878, "grad_norm": 0.69140625, "learning_rate": 9.86653313914234e-06, "loss": 4.007, "step": 12482 }, { "epoch": 4.158241026068127, "grad_norm": 0.69140625, "learning_rate": 9.865765038639418e-06, "loss": 4.0138, "step": 12483 }, { "epoch": 4.158574165070376, "grad_norm": 0.70703125, "learning_rate": 9.864996910580454e-06, "loss": 3.9823, "step": 12484 }, { "epoch": 4.158907304072624, "grad_norm": 0.7265625, "learning_rate": 9.8642287549744e-06, "loss": 3.9891, "step": 12485 }, { "epoch": 4.159240443074873, "grad_norm": 0.67578125, "learning_rate": 9.863460571830203e-06, "loss": 3.9839, "step": 12486 }, { "epoch": 4.159573582077122, "grad_norm": 0.734375, "learning_rate": 9.86269236115681e-06, "loss": 3.882, "step": 12487 }, { "epoch": 4.159906721079371, "grad_norm": 0.72265625, "learning_rate": 9.861924122963167e-06, "loss": 3.9935, "step": 12488 }, { "epoch": 4.160239860081619, "grad_norm": 0.73828125, "learning_rate": 9.861155857258226e-06, "loss": 3.9687, "step": 12489 }, { "epoch": 4.160572999083867, "grad_norm": 0.71875, "learning_rate": 9.860387564050931e-06, "loss": 4.0098, "step": 12490 }, { "epoch": 4.1609061380861165, "grad_norm": 0.72265625, "learning_rate": 9.859619243350235e-06, "loss": 4.0599, "step": 12491 }, { "epoch": 4.161239277088365, "grad_norm": 0.7265625, "learning_rate": 9.858850895165084e-06, "loss": 3.9442, "step": 12492 }, { "epoch": 4.161572416090614, "grad_norm": 0.7109375, "learning_rate": 9.85808251950443e-06, "loss": 4.0056, "step": 12493 }, { "epoch": 4.161905555092862, "grad_norm": 0.71875, "learning_rate": 9.857314116377222e-06, "loss": 3.9964, "step": 12494 }, { "epoch": 4.1622386940951115, "grad_norm": 0.72265625, "learning_rate": 9.856545685792412e-06, "loss": 4.016, "step": 12495 }, { "epoch": 4.16257183309736, "grad_norm": 0.75390625, "learning_rate": 9.85577722775895e-06, "loss": 4.0308, "step": 12496 }, { "epoch": 4.162904972099609, "grad_norm": 0.6953125, "learning_rate": 9.855008742285783e-06, "loss": 3.9467, "step": 12497 }, { "epoch": 4.163238111101857, "grad_norm": 0.7109375, "learning_rate": 9.854240229381868e-06, "loss": 4.05, "step": 12498 }, { "epoch": 4.163571250104106, "grad_norm": 0.6875, "learning_rate": 9.85347168905615e-06, "loss": 3.9717, "step": 12499 }, { "epoch": 4.163904389106355, "grad_norm": 0.7109375, "learning_rate": 9.852703121317586e-06, "loss": 4.0453, "step": 12500 }, { "epoch": 4.164237528108603, "grad_norm": 0.73046875, "learning_rate": 9.851934526175128e-06, "loss": 4.0648, "step": 12501 }, { "epoch": 4.164570667110852, "grad_norm": 0.7265625, "learning_rate": 9.851165903637725e-06, "loss": 4.0539, "step": 12502 }, { "epoch": 4.1649038061131005, "grad_norm": 0.72265625, "learning_rate": 9.850397253714333e-06, "loss": 4.01, "step": 12503 }, { "epoch": 4.16523694511535, "grad_norm": 0.72265625, "learning_rate": 9.849628576413904e-06, "loss": 4.004, "step": 12504 }, { "epoch": 4.165570084117598, "grad_norm": 0.7421875, "learning_rate": 9.848859871745392e-06, "loss": 3.9341, "step": 12505 }, { "epoch": 4.165903223119847, "grad_norm": 0.75390625, "learning_rate": 9.848091139717748e-06, "loss": 4.1131, "step": 12506 }, { "epoch": 4.1662363621220955, "grad_norm": 0.7265625, "learning_rate": 9.84732238033993e-06, "loss": 4.0073, "step": 12507 }, { "epoch": 4.166569501124344, "grad_norm": 0.73828125, "learning_rate": 9.846553593620889e-06, "loss": 3.9916, "step": 12508 }, { "epoch": 4.166902640126593, "grad_norm": 0.72265625, "learning_rate": 9.845784779569583e-06, "loss": 3.9698, "step": 12509 }, { "epoch": 4.167235779128841, "grad_norm": 0.7109375, "learning_rate": 9.845015938194966e-06, "loss": 4.006, "step": 12510 }, { "epoch": 4.16756891813109, "grad_norm": 0.7109375, "learning_rate": 9.844247069505989e-06, "loss": 3.9126, "step": 12511 }, { "epoch": 4.167902057133339, "grad_norm": 0.75390625, "learning_rate": 9.843478173511614e-06, "loss": 3.9288, "step": 12512 }, { "epoch": 4.168235196135588, "grad_norm": 0.72265625, "learning_rate": 9.842709250220794e-06, "loss": 4.0756, "step": 12513 }, { "epoch": 4.168568335137836, "grad_norm": 0.69140625, "learning_rate": 9.841940299642485e-06, "loss": 3.9946, "step": 12514 }, { "epoch": 4.168901474140085, "grad_norm": 0.6953125, "learning_rate": 9.841171321785644e-06, "loss": 4.03, "step": 12515 }, { "epoch": 4.169234613142334, "grad_norm": 0.75, "learning_rate": 9.840402316659229e-06, "loss": 4.0684, "step": 12516 }, { "epoch": 4.169567752144582, "grad_norm": 0.76953125, "learning_rate": 9.839633284272196e-06, "loss": 4.0009, "step": 12517 }, { "epoch": 4.169900891146831, "grad_norm": 0.77734375, "learning_rate": 9.838864224633502e-06, "loss": 3.976, "step": 12518 }, { "epoch": 4.1702340301490795, "grad_norm": 0.703125, "learning_rate": 9.838095137752105e-06, "loss": 4.0347, "step": 12519 }, { "epoch": 4.170567169151329, "grad_norm": 0.72265625, "learning_rate": 9.837326023636967e-06, "loss": 3.9714, "step": 12520 }, { "epoch": 4.170900308153577, "grad_norm": 0.8046875, "learning_rate": 9.836556882297041e-06, "loss": 4.0711, "step": 12521 }, { "epoch": 4.171233447155826, "grad_norm": 0.73046875, "learning_rate": 9.835787713741289e-06, "loss": 4.0079, "step": 12522 }, { "epoch": 4.171566586158074, "grad_norm": 0.6953125, "learning_rate": 9.83501851797867e-06, "loss": 3.9563, "step": 12523 }, { "epoch": 4.171899725160323, "grad_norm": 0.73828125, "learning_rate": 9.834249295018144e-06, "loss": 4.0126, "step": 12524 }, { "epoch": 4.172232864162572, "grad_norm": 0.7109375, "learning_rate": 9.833480044868665e-06, "loss": 3.9938, "step": 12525 }, { "epoch": 4.17256600316482, "grad_norm": 0.6875, "learning_rate": 9.832710767539204e-06, "loss": 3.9632, "step": 12526 }, { "epoch": 4.172899142167069, "grad_norm": 0.71875, "learning_rate": 9.83194146303871e-06, "loss": 3.9493, "step": 12527 }, { "epoch": 4.173232281169318, "grad_norm": 0.73828125, "learning_rate": 9.83117213137615e-06, "loss": 4.0514, "step": 12528 }, { "epoch": 4.173565420171567, "grad_norm": 0.703125, "learning_rate": 9.830402772560488e-06, "loss": 3.9861, "step": 12529 }, { "epoch": 4.173898559173815, "grad_norm": 0.71484375, "learning_rate": 9.829633386600676e-06, "loss": 3.9721, "step": 12530 }, { "epoch": 4.174231698176064, "grad_norm": 0.69140625, "learning_rate": 9.828863973505683e-06, "loss": 4.0633, "step": 12531 }, { "epoch": 4.174564837178313, "grad_norm": 0.671875, "learning_rate": 9.828094533284471e-06, "loss": 3.9961, "step": 12532 }, { "epoch": 4.174897976180561, "grad_norm": 0.7265625, "learning_rate": 9.827325065945997e-06, "loss": 4.0907, "step": 12533 }, { "epoch": 4.17523111518281, "grad_norm": 0.7265625, "learning_rate": 9.82655557149923e-06, "loss": 3.9533, "step": 12534 }, { "epoch": 4.175564254185058, "grad_norm": 0.73828125, "learning_rate": 9.825786049953128e-06, "loss": 3.9836, "step": 12535 }, { "epoch": 4.175897393187308, "grad_norm": 0.71484375, "learning_rate": 9.825016501316655e-06, "loss": 3.921, "step": 12536 }, { "epoch": 4.176230532189556, "grad_norm": 0.72265625, "learning_rate": 9.82424692559878e-06, "loss": 3.9743, "step": 12537 }, { "epoch": 4.176563671191805, "grad_norm": 0.734375, "learning_rate": 9.82347732280846e-06, "loss": 3.998, "step": 12538 }, { "epoch": 4.176896810194053, "grad_norm": 0.70703125, "learning_rate": 9.82270769295466e-06, "loss": 4.0583, "step": 12539 }, { "epoch": 4.1772299491963025, "grad_norm": 0.79296875, "learning_rate": 9.82193803604635e-06, "loss": 4.0121, "step": 12540 }, { "epoch": 4.177563088198551, "grad_norm": 0.7109375, "learning_rate": 9.82116835209249e-06, "loss": 3.9999, "step": 12541 }, { "epoch": 4.177896227200799, "grad_norm": 0.7421875, "learning_rate": 9.820398641102046e-06, "loss": 4.0605, "step": 12542 }, { "epoch": 4.178229366203048, "grad_norm": 0.7421875, "learning_rate": 9.819628903083982e-06, "loss": 4.0044, "step": 12543 }, { "epoch": 4.178562505205297, "grad_norm": 0.7734375, "learning_rate": 9.818859138047268e-06, "loss": 4.0796, "step": 12544 }, { "epoch": 4.178895644207546, "grad_norm": 0.6875, "learning_rate": 9.818089346000865e-06, "loss": 4.0487, "step": 12545 }, { "epoch": 4.179228783209794, "grad_norm": 0.7578125, "learning_rate": 9.817319526953747e-06, "loss": 3.9594, "step": 12546 }, { "epoch": 4.179561922212043, "grad_norm": 0.734375, "learning_rate": 9.81654968091487e-06, "loss": 3.9178, "step": 12547 }, { "epoch": 4.1798950612142916, "grad_norm": 0.73828125, "learning_rate": 9.815779807893211e-06, "loss": 4.0431, "step": 12548 }, { "epoch": 4.180228200216541, "grad_norm": 0.66796875, "learning_rate": 9.815009907897731e-06, "loss": 3.9663, "step": 12549 }, { "epoch": 4.180561339218789, "grad_norm": 0.72265625, "learning_rate": 9.814239980937401e-06, "loss": 4.0819, "step": 12550 }, { "epoch": 4.180894478221037, "grad_norm": 0.72265625, "learning_rate": 9.813470027021188e-06, "loss": 3.9725, "step": 12551 }, { "epoch": 4.1812276172232865, "grad_norm": 0.71875, "learning_rate": 9.81270004615806e-06, "loss": 4.01, "step": 12552 }, { "epoch": 4.181560756225535, "grad_norm": 0.71484375, "learning_rate": 9.811930038356985e-06, "loss": 3.9601, "step": 12553 }, { "epoch": 4.181893895227784, "grad_norm": 0.71875, "learning_rate": 9.811160003626931e-06, "loss": 3.9002, "step": 12554 }, { "epoch": 4.182227034230032, "grad_norm": 0.76171875, "learning_rate": 9.810389941976874e-06, "loss": 3.9804, "step": 12555 }, { "epoch": 4.1825601732322815, "grad_norm": 0.69921875, "learning_rate": 9.809619853415775e-06, "loss": 3.951, "step": 12556 }, { "epoch": 4.18289331223453, "grad_norm": 0.7265625, "learning_rate": 9.808849737952607e-06, "loss": 4.0113, "step": 12557 }, { "epoch": 4.183226451236779, "grad_norm": 0.703125, "learning_rate": 9.80807959559634e-06, "loss": 3.9992, "step": 12558 }, { "epoch": 4.183559590239027, "grad_norm": 0.75, "learning_rate": 9.807309426355948e-06, "loss": 3.8948, "step": 12559 }, { "epoch": 4.1838927292412755, "grad_norm": 0.6953125, "learning_rate": 9.8065392302404e-06, "loss": 3.9504, "step": 12560 }, { "epoch": 4.184225868243525, "grad_norm": 0.74609375, "learning_rate": 9.805769007258661e-06, "loss": 3.9887, "step": 12561 }, { "epoch": 4.184559007245773, "grad_norm": 0.75, "learning_rate": 9.804998757419709e-06, "loss": 4.0611, "step": 12562 }, { "epoch": 4.184892146248022, "grad_norm": 0.6875, "learning_rate": 9.804228480732514e-06, "loss": 3.9294, "step": 12563 }, { "epoch": 4.1852252852502705, "grad_norm": 0.73046875, "learning_rate": 9.803458177206052e-06, "loss": 4.0933, "step": 12564 }, { "epoch": 4.18555842425252, "grad_norm": 0.7109375, "learning_rate": 9.802687846849285e-06, "loss": 3.9927, "step": 12565 }, { "epoch": 4.185891563254768, "grad_norm": 0.69140625, "learning_rate": 9.801917489671197e-06, "loss": 4.0359, "step": 12566 }, { "epoch": 4.186224702257017, "grad_norm": 0.703125, "learning_rate": 9.801147105680754e-06, "loss": 3.9645, "step": 12567 }, { "epoch": 4.1865578412592654, "grad_norm": 0.72265625, "learning_rate": 9.800376694886934e-06, "loss": 4.0488, "step": 12568 }, { "epoch": 4.186890980261514, "grad_norm": 0.72265625, "learning_rate": 9.799606257298706e-06, "loss": 4.1028, "step": 12569 }, { "epoch": 4.187224119263763, "grad_norm": 0.6875, "learning_rate": 9.798835792925046e-06, "loss": 4.0727, "step": 12570 }, { "epoch": 4.187557258266011, "grad_norm": 0.703125, "learning_rate": 9.798065301774928e-06, "loss": 3.9912, "step": 12571 }, { "epoch": 4.18789039726826, "grad_norm": 0.6640625, "learning_rate": 9.797294783857327e-06, "loss": 3.9952, "step": 12572 }, { "epoch": 4.188223536270509, "grad_norm": 0.67578125, "learning_rate": 9.796524239181219e-06, "loss": 4.057, "step": 12573 }, { "epoch": 4.188556675272758, "grad_norm": 0.68359375, "learning_rate": 9.795753667755576e-06, "loss": 4.023, "step": 12574 }, { "epoch": 4.188889814275006, "grad_norm": 0.70703125, "learning_rate": 9.794983069589378e-06, "loss": 3.9935, "step": 12575 }, { "epoch": 4.189222953277255, "grad_norm": 0.73046875, "learning_rate": 9.794212444691594e-06, "loss": 3.9767, "step": 12576 }, { "epoch": 4.189556092279504, "grad_norm": 0.70703125, "learning_rate": 9.79344179307121e-06, "loss": 3.9558, "step": 12577 }, { "epoch": 4.189889231281752, "grad_norm": 0.734375, "learning_rate": 9.792671114737192e-06, "loss": 4.0194, "step": 12578 }, { "epoch": 4.190222370284001, "grad_norm": 0.6875, "learning_rate": 9.791900409698524e-06, "loss": 4.0123, "step": 12579 }, { "epoch": 4.190555509286249, "grad_norm": 0.71484375, "learning_rate": 9.791129677964178e-06, "loss": 4.0022, "step": 12580 }, { "epoch": 4.190888648288499, "grad_norm": 0.72265625, "learning_rate": 9.790358919543135e-06, "loss": 3.9728, "step": 12581 }, { "epoch": 4.191221787290747, "grad_norm": 0.734375, "learning_rate": 9.789588134444372e-06, "loss": 3.9723, "step": 12582 }, { "epoch": 4.191554926292996, "grad_norm": 0.69140625, "learning_rate": 9.788817322676866e-06, "loss": 4.02, "step": 12583 }, { "epoch": 4.191888065295244, "grad_norm": 0.6640625, "learning_rate": 9.788046484249596e-06, "loss": 4.0401, "step": 12584 }, { "epoch": 4.192221204297493, "grad_norm": 0.7265625, "learning_rate": 9.787275619171539e-06, "loss": 3.9631, "step": 12585 }, { "epoch": 4.192554343299742, "grad_norm": 0.6875, "learning_rate": 9.786504727451678e-06, "loss": 4.0143, "step": 12586 }, { "epoch": 4.19288748230199, "grad_norm": 0.703125, "learning_rate": 9.785733809098989e-06, "loss": 4.0247, "step": 12587 }, { "epoch": 4.193220621304239, "grad_norm": 0.734375, "learning_rate": 9.78496286412245e-06, "loss": 4.0293, "step": 12588 }, { "epoch": 4.193553760306488, "grad_norm": 0.7265625, "learning_rate": 9.784191892531043e-06, "loss": 4.0638, "step": 12589 }, { "epoch": 4.193886899308737, "grad_norm": 0.6640625, "learning_rate": 9.78342089433375e-06, "loss": 4.0209, "step": 12590 }, { "epoch": 4.194220038310985, "grad_norm": 0.73828125, "learning_rate": 9.782649869539549e-06, "loss": 3.9648, "step": 12591 }, { "epoch": 4.194553177313234, "grad_norm": 0.69140625, "learning_rate": 9.781878818157421e-06, "loss": 3.9721, "step": 12592 }, { "epoch": 4.194886316315483, "grad_norm": 0.6875, "learning_rate": 9.781107740196347e-06, "loss": 4.0863, "step": 12593 }, { "epoch": 4.195219455317732, "grad_norm": 0.7265625, "learning_rate": 9.780336635665308e-06, "loss": 4.0271, "step": 12594 }, { "epoch": 4.19555259431998, "grad_norm": 0.7109375, "learning_rate": 9.779565504573288e-06, "loss": 3.9539, "step": 12595 }, { "epoch": 4.195885733322228, "grad_norm": 0.7265625, "learning_rate": 9.778794346929266e-06, "loss": 3.9641, "step": 12596 }, { "epoch": 4.1962188723244775, "grad_norm": 0.73046875, "learning_rate": 9.778023162742227e-06, "loss": 4.0613, "step": 12597 }, { "epoch": 4.196552011326726, "grad_norm": 0.703125, "learning_rate": 9.77725195202115e-06, "loss": 4.0233, "step": 12598 }, { "epoch": 4.196885150328975, "grad_norm": 0.7109375, "learning_rate": 9.776480714775023e-06, "loss": 4.003, "step": 12599 }, { "epoch": 4.197218289331223, "grad_norm": 0.71484375, "learning_rate": 9.775709451012825e-06, "loss": 4.0292, "step": 12600 }, { "epoch": 4.1975514283334725, "grad_norm": 0.71875, "learning_rate": 9.77493816074354e-06, "loss": 3.97, "step": 12601 }, { "epoch": 4.197884567335721, "grad_norm": 0.7578125, "learning_rate": 9.774166843976155e-06, "loss": 4.0401, "step": 12602 }, { "epoch": 4.198217706337969, "grad_norm": 0.76171875, "learning_rate": 9.77339550071965e-06, "loss": 3.966, "step": 12603 }, { "epoch": 4.198550845340218, "grad_norm": 0.7109375, "learning_rate": 9.772624130983016e-06, "loss": 3.9973, "step": 12604 }, { "epoch": 4.198883984342467, "grad_norm": 0.68359375, "learning_rate": 9.771852734775228e-06, "loss": 4.0717, "step": 12605 }, { "epoch": 4.199217123344716, "grad_norm": 0.67578125, "learning_rate": 9.771081312105278e-06, "loss": 4.034, "step": 12606 }, { "epoch": 4.199550262346964, "grad_norm": 0.74609375, "learning_rate": 9.77030986298215e-06, "loss": 3.9844, "step": 12607 }, { "epoch": 4.199883401349213, "grad_norm": 0.69140625, "learning_rate": 9.769538387414834e-06, "loss": 4.0269, "step": 12608 }, { "epoch": 4.2002165403514615, "grad_norm": 0.71484375, "learning_rate": 9.768766885412307e-06, "loss": 4.0494, "step": 12609 }, { "epoch": 4.200549679353711, "grad_norm": 0.71875, "learning_rate": 9.76799535698356e-06, "loss": 3.9511, "step": 12610 }, { "epoch": 4.200882818355959, "grad_norm": 0.68359375, "learning_rate": 9.76722380213758e-06, "loss": 4.057, "step": 12611 }, { "epoch": 4.201215957358207, "grad_norm": 0.71484375, "learning_rate": 9.766452220883354e-06, "loss": 3.9922, "step": 12612 }, { "epoch": 4.2015490963604565, "grad_norm": 0.703125, "learning_rate": 9.765680613229869e-06, "loss": 4.0297, "step": 12613 }, { "epoch": 4.201882235362705, "grad_norm": 0.73828125, "learning_rate": 9.764908979186113e-06, "loss": 4.0446, "step": 12614 }, { "epoch": 4.202215374364954, "grad_norm": 0.69921875, "learning_rate": 9.764137318761071e-06, "loss": 3.9331, "step": 12615 }, { "epoch": 4.202548513367202, "grad_norm": 0.7109375, "learning_rate": 9.763365631963736e-06, "loss": 3.9591, "step": 12616 }, { "epoch": 4.202881652369451, "grad_norm": 0.74609375, "learning_rate": 9.762593918803092e-06, "loss": 4.0764, "step": 12617 }, { "epoch": 4.2032147913717, "grad_norm": 0.6796875, "learning_rate": 9.761822179288129e-06, "loss": 4.0392, "step": 12618 }, { "epoch": 4.203547930373949, "grad_norm": 0.71875, "learning_rate": 9.761050413427839e-06, "loss": 4.0055, "step": 12619 }, { "epoch": 4.203881069376197, "grad_norm": 0.6875, "learning_rate": 9.760278621231206e-06, "loss": 3.9295, "step": 12620 }, { "epoch": 4.2042142083784455, "grad_norm": 0.71875, "learning_rate": 9.759506802707225e-06, "loss": 3.957, "step": 12621 }, { "epoch": 4.204547347380695, "grad_norm": 0.6953125, "learning_rate": 9.758734957864884e-06, "loss": 4.0411, "step": 12622 }, { "epoch": 4.204880486382943, "grad_norm": 0.7265625, "learning_rate": 9.757963086713173e-06, "loss": 4.0037, "step": 12623 }, { "epoch": 4.205213625385192, "grad_norm": 0.69921875, "learning_rate": 9.757191189261081e-06, "loss": 4.0082, "step": 12624 }, { "epoch": 4.2055467643874405, "grad_norm": 0.70703125, "learning_rate": 9.756419265517603e-06, "loss": 4.068, "step": 12625 }, { "epoch": 4.20587990338969, "grad_norm": 0.734375, "learning_rate": 9.755647315491726e-06, "loss": 4.0593, "step": 12626 }, { "epoch": 4.206213042391938, "grad_norm": 0.7265625, "learning_rate": 9.754875339192445e-06, "loss": 3.936, "step": 12627 }, { "epoch": 4.206546181394187, "grad_norm": 0.67578125, "learning_rate": 9.75410333662875e-06, "loss": 3.966, "step": 12628 }, { "epoch": 4.206879320396435, "grad_norm": 0.6796875, "learning_rate": 9.753331307809632e-06, "loss": 4.0421, "step": 12629 }, { "epoch": 4.207212459398684, "grad_norm": 0.6796875, "learning_rate": 9.752559252744086e-06, "loss": 3.994, "step": 12630 }, { "epoch": 4.207545598400933, "grad_norm": 0.71484375, "learning_rate": 9.751787171441105e-06, "loss": 3.8885, "step": 12631 }, { "epoch": 4.207878737403181, "grad_norm": 0.6875, "learning_rate": 9.751015063909682e-06, "loss": 4.0196, "step": 12632 }, { "epoch": 4.20821187640543, "grad_norm": 0.73046875, "learning_rate": 9.750242930158804e-06, "loss": 4.0628, "step": 12633 }, { "epoch": 4.208545015407679, "grad_norm": 0.69140625, "learning_rate": 9.749470770197474e-06, "loss": 3.9232, "step": 12634 }, { "epoch": 4.208878154409928, "grad_norm": 0.69921875, "learning_rate": 9.748698584034682e-06, "loss": 4.0039, "step": 12635 }, { "epoch": 4.209211293412176, "grad_norm": 0.7109375, "learning_rate": 9.747926371679422e-06, "loss": 4.0067, "step": 12636 }, { "epoch": 4.209544432414425, "grad_norm": 0.71875, "learning_rate": 9.747154133140687e-06, "loss": 4.0257, "step": 12637 }, { "epoch": 4.209877571416674, "grad_norm": 0.765625, "learning_rate": 9.746381868427475e-06, "loss": 3.919, "step": 12638 }, { "epoch": 4.210210710418922, "grad_norm": 0.66796875, "learning_rate": 9.745609577548778e-06, "loss": 3.9088, "step": 12639 }, { "epoch": 4.210543849421171, "grad_norm": 0.71875, "learning_rate": 9.744837260513594e-06, "loss": 3.9498, "step": 12640 }, { "epoch": 4.210876988423419, "grad_norm": 0.7265625, "learning_rate": 9.74406491733092e-06, "loss": 3.9633, "step": 12641 }, { "epoch": 4.211210127425669, "grad_norm": 0.765625, "learning_rate": 9.743292548009748e-06, "loss": 4.0409, "step": 12642 }, { "epoch": 4.211543266427917, "grad_norm": 0.7421875, "learning_rate": 9.742520152559079e-06, "loss": 3.9584, "step": 12643 }, { "epoch": 4.211876405430166, "grad_norm": 0.71484375, "learning_rate": 9.741747730987906e-06, "loss": 3.9647, "step": 12644 }, { "epoch": 4.212209544432414, "grad_norm": 0.71484375, "learning_rate": 9.74097528330523e-06, "loss": 4.0568, "step": 12645 }, { "epoch": 4.2125426834346635, "grad_norm": 0.74609375, "learning_rate": 9.740202809520043e-06, "loss": 3.9734, "step": 12646 }, { "epoch": 4.212875822436912, "grad_norm": 0.69921875, "learning_rate": 9.739430309641346e-06, "loss": 3.9821, "step": 12647 }, { "epoch": 4.21320896143916, "grad_norm": 0.73046875, "learning_rate": 9.738657783678137e-06, "loss": 4.004, "step": 12648 }, { "epoch": 4.213542100441409, "grad_norm": 0.6953125, "learning_rate": 9.737885231639416e-06, "loss": 4.043, "step": 12649 }, { "epoch": 4.213875239443658, "grad_norm": 0.75390625, "learning_rate": 9.737112653534176e-06, "loss": 3.9354, "step": 12650 }, { "epoch": 4.214208378445907, "grad_norm": 0.7578125, "learning_rate": 9.736340049371419e-06, "loss": 3.903, "step": 12651 }, { "epoch": 4.214541517448155, "grad_norm": 0.71875, "learning_rate": 9.735567419160146e-06, "loss": 3.8916, "step": 12652 }, { "epoch": 4.214874656450404, "grad_norm": 0.75390625, "learning_rate": 9.734794762909354e-06, "loss": 4.001, "step": 12653 }, { "epoch": 4.215207795452653, "grad_norm": 0.703125, "learning_rate": 9.734022080628047e-06, "loss": 4.019, "step": 12654 }, { "epoch": 4.215540934454902, "grad_norm": 0.72265625, "learning_rate": 9.733249372325217e-06, "loss": 4.0554, "step": 12655 }, { "epoch": 4.21587407345715, "grad_norm": 0.7578125, "learning_rate": 9.73247663800987e-06, "loss": 4.0114, "step": 12656 }, { "epoch": 4.216207212459398, "grad_norm": 0.703125, "learning_rate": 9.731703877691005e-06, "loss": 3.9528, "step": 12657 }, { "epoch": 4.2165403514616475, "grad_norm": 0.703125, "learning_rate": 9.730931091377626e-06, "loss": 3.9588, "step": 12658 }, { "epoch": 4.216873490463896, "grad_norm": 0.671875, "learning_rate": 9.730158279078732e-06, "loss": 4.0397, "step": 12659 }, { "epoch": 4.217206629466145, "grad_norm": 0.73828125, "learning_rate": 9.729385440803324e-06, "loss": 3.9912, "step": 12660 }, { "epoch": 4.217539768468393, "grad_norm": 0.765625, "learning_rate": 9.728612576560404e-06, "loss": 3.9119, "step": 12661 }, { "epoch": 4.2178729074706425, "grad_norm": 0.74609375, "learning_rate": 9.727839686358975e-06, "loss": 3.9981, "step": 12662 }, { "epoch": 4.218206046472891, "grad_norm": 0.70703125, "learning_rate": 9.727066770208039e-06, "loss": 3.9429, "step": 12663 }, { "epoch": 4.218539185475139, "grad_norm": 0.73828125, "learning_rate": 9.726293828116597e-06, "loss": 3.9834, "step": 12664 }, { "epoch": 4.218872324477388, "grad_norm": 0.69921875, "learning_rate": 9.725520860093657e-06, "loss": 4.0344, "step": 12665 }, { "epoch": 4.2192054634796365, "grad_norm": 0.73046875, "learning_rate": 9.724747866148216e-06, "loss": 3.9636, "step": 12666 }, { "epoch": 4.219538602481886, "grad_norm": 0.703125, "learning_rate": 9.723974846289287e-06, "loss": 3.9866, "step": 12667 }, { "epoch": 4.219871741484134, "grad_norm": 0.72265625, "learning_rate": 9.723201800525863e-06, "loss": 4.0004, "step": 12668 }, { "epoch": 4.220204880486383, "grad_norm": 0.71484375, "learning_rate": 9.722428728866955e-06, "loss": 3.9487, "step": 12669 }, { "epoch": 4.2205380194886315, "grad_norm": 0.78515625, "learning_rate": 9.721655631321566e-06, "loss": 3.89, "step": 12670 }, { "epoch": 4.220871158490881, "grad_norm": 0.71875, "learning_rate": 9.7208825078987e-06, "loss": 4.0221, "step": 12671 }, { "epoch": 4.221204297493129, "grad_norm": 0.7265625, "learning_rate": 9.720109358607365e-06, "loss": 4.0208, "step": 12672 }, { "epoch": 4.221537436495377, "grad_norm": 0.7109375, "learning_rate": 9.719336183456564e-06, "loss": 3.9708, "step": 12673 }, { "epoch": 4.2218705754976265, "grad_norm": 0.78515625, "learning_rate": 9.718562982455304e-06, "loss": 3.954, "step": 12674 }, { "epoch": 4.222203714499875, "grad_norm": 0.71875, "learning_rate": 9.71778975561259e-06, "loss": 3.9385, "step": 12675 }, { "epoch": 4.222536853502124, "grad_norm": 0.71484375, "learning_rate": 9.717016502937431e-06, "loss": 3.9845, "step": 12676 }, { "epoch": 4.222869992504372, "grad_norm": 0.66796875, "learning_rate": 9.71624322443883e-06, "loss": 4.0144, "step": 12677 }, { "epoch": 4.223203131506621, "grad_norm": 0.76171875, "learning_rate": 9.715469920125794e-06, "loss": 4.0031, "step": 12678 }, { "epoch": 4.22353627050887, "grad_norm": 0.7265625, "learning_rate": 9.714696590007335e-06, "loss": 4.0066, "step": 12679 }, { "epoch": 4.223869409511119, "grad_norm": 0.71875, "learning_rate": 9.713923234092458e-06, "loss": 3.9136, "step": 12680 }, { "epoch": 4.224202548513367, "grad_norm": 0.65625, "learning_rate": 9.713149852390169e-06, "loss": 4.0611, "step": 12681 }, { "epoch": 4.2245356875156155, "grad_norm": 0.71875, "learning_rate": 9.712376444909478e-06, "loss": 4.0921, "step": 12682 }, { "epoch": 4.224868826517865, "grad_norm": 0.71875, "learning_rate": 9.711603011659393e-06, "loss": 4.0225, "step": 12683 }, { "epoch": 4.225201965520113, "grad_norm": 0.71484375, "learning_rate": 9.710829552648925e-06, "loss": 3.9819, "step": 12684 }, { "epoch": 4.225535104522362, "grad_norm": 0.71875, "learning_rate": 9.710056067887082e-06, "loss": 3.9687, "step": 12685 }, { "epoch": 4.22586824352461, "grad_norm": 0.69921875, "learning_rate": 9.709282557382868e-06, "loss": 4.0089, "step": 12686 }, { "epoch": 4.22620138252686, "grad_norm": 0.70703125, "learning_rate": 9.708509021145303e-06, "loss": 4.0292, "step": 12687 }, { "epoch": 4.226534521529108, "grad_norm": 0.73046875, "learning_rate": 9.707735459183387e-06, "loss": 4.0134, "step": 12688 }, { "epoch": 4.226867660531357, "grad_norm": 0.7109375, "learning_rate": 9.70696187150614e-06, "loss": 4.0221, "step": 12689 }, { "epoch": 4.227200799533605, "grad_norm": 0.70703125, "learning_rate": 9.706188258122563e-06, "loss": 4.0528, "step": 12690 }, { "epoch": 4.227533938535854, "grad_norm": 0.71875, "learning_rate": 9.705414619041673e-06, "loss": 3.9752, "step": 12691 }, { "epoch": 4.227867077538103, "grad_norm": 0.7109375, "learning_rate": 9.704640954272478e-06, "loss": 3.9899, "step": 12692 }, { "epoch": 4.228200216540351, "grad_norm": 0.72265625, "learning_rate": 9.703867263823994e-06, "loss": 4.0405, "step": 12693 }, { "epoch": 4.2285333555426, "grad_norm": 0.6953125, "learning_rate": 9.70309354770523e-06, "loss": 3.9974, "step": 12694 }, { "epoch": 4.228866494544849, "grad_norm": 0.73046875, "learning_rate": 9.702319805925194e-06, "loss": 4.0023, "step": 12695 }, { "epoch": 4.229199633547098, "grad_norm": 0.76953125, "learning_rate": 9.701546038492905e-06, "loss": 3.9941, "step": 12696 }, { "epoch": 4.229532772549346, "grad_norm": 0.71875, "learning_rate": 9.700772245417375e-06, "loss": 4.0062, "step": 12697 }, { "epoch": 4.229865911551595, "grad_norm": 0.6953125, "learning_rate": 9.699998426707614e-06, "loss": 4.0812, "step": 12698 }, { "epoch": 4.230199050553844, "grad_norm": 0.6953125, "learning_rate": 9.699224582372635e-06, "loss": 3.9891, "step": 12699 }, { "epoch": 4.230532189556092, "grad_norm": 0.71484375, "learning_rate": 9.698450712421455e-06, "loss": 3.9436, "step": 12700 }, { "epoch": 4.230865328558341, "grad_norm": 0.72265625, "learning_rate": 9.697676816863083e-06, "loss": 4.0574, "step": 12701 }, { "epoch": 4.231198467560589, "grad_norm": 0.69921875, "learning_rate": 9.696902895706541e-06, "loss": 4.0065, "step": 12702 }, { "epoch": 4.2315316065628386, "grad_norm": 0.6875, "learning_rate": 9.696128948960835e-06, "loss": 4.0304, "step": 12703 }, { "epoch": 4.231864745565087, "grad_norm": 0.7109375, "learning_rate": 9.695354976634985e-06, "loss": 4.0196, "step": 12704 }, { "epoch": 4.232197884567336, "grad_norm": 0.71875, "learning_rate": 9.694580978738004e-06, "loss": 3.9316, "step": 12705 }, { "epoch": 4.232531023569584, "grad_norm": 0.71484375, "learning_rate": 9.693806955278908e-06, "loss": 4.0122, "step": 12706 }, { "epoch": 4.2328641625718335, "grad_norm": 0.74609375, "learning_rate": 9.693032906266715e-06, "loss": 3.9536, "step": 12707 }, { "epoch": 4.233197301574082, "grad_norm": 0.703125, "learning_rate": 9.692258831710437e-06, "loss": 4.0194, "step": 12708 }, { "epoch": 4.23353044057633, "grad_norm": 0.69140625, "learning_rate": 9.691484731619092e-06, "loss": 3.9817, "step": 12709 }, { "epoch": 4.233863579578579, "grad_norm": 0.7109375, "learning_rate": 9.690710606001695e-06, "loss": 3.8922, "step": 12710 }, { "epoch": 4.234196718580828, "grad_norm": 0.71484375, "learning_rate": 9.689936454867266e-06, "loss": 3.9721, "step": 12711 }, { "epoch": 4.234529857583077, "grad_norm": 0.6953125, "learning_rate": 9.689162278224822e-06, "loss": 4.0525, "step": 12712 }, { "epoch": 4.234862996585325, "grad_norm": 0.734375, "learning_rate": 9.688388076083378e-06, "loss": 3.9727, "step": 12713 }, { "epoch": 4.235196135587574, "grad_norm": 0.7109375, "learning_rate": 9.687613848451952e-06, "loss": 4.0033, "step": 12714 }, { "epoch": 4.2355292745898225, "grad_norm": 0.71875, "learning_rate": 9.686839595339565e-06, "loss": 3.9631, "step": 12715 }, { "epoch": 4.235862413592072, "grad_norm": 0.71875, "learning_rate": 9.686065316755234e-06, "loss": 4.0663, "step": 12716 }, { "epoch": 4.23619555259432, "grad_norm": 0.73828125, "learning_rate": 9.685291012707975e-06, "loss": 3.9299, "step": 12717 }, { "epoch": 4.236528691596568, "grad_norm": 0.7109375, "learning_rate": 9.684516683206808e-06, "loss": 3.9768, "step": 12718 }, { "epoch": 4.2368618305988175, "grad_norm": 0.73828125, "learning_rate": 9.683742328260754e-06, "loss": 4.0366, "step": 12719 }, { "epoch": 4.237194969601066, "grad_norm": 0.703125, "learning_rate": 9.682967947878835e-06, "loss": 4.0832, "step": 12720 }, { "epoch": 4.237528108603315, "grad_norm": 0.7578125, "learning_rate": 9.682193542070064e-06, "loss": 4.0035, "step": 12721 }, { "epoch": 4.237861247605563, "grad_norm": 0.73046875, "learning_rate": 9.681419110843467e-06, "loss": 3.9186, "step": 12722 }, { "epoch": 4.2381943866078124, "grad_norm": 0.70703125, "learning_rate": 9.68064465420806e-06, "loss": 4.0566, "step": 12723 }, { "epoch": 4.238527525610061, "grad_norm": 0.703125, "learning_rate": 9.679870172172868e-06, "loss": 3.9692, "step": 12724 }, { "epoch": 4.238860664612309, "grad_norm": 0.69921875, "learning_rate": 9.679095664746912e-06, "loss": 3.8999, "step": 12725 }, { "epoch": 4.239193803614558, "grad_norm": 0.7265625, "learning_rate": 9.678321131939208e-06, "loss": 3.9696, "step": 12726 }, { "epoch": 4.2395269426168065, "grad_norm": 0.68359375, "learning_rate": 9.677546573758781e-06, "loss": 3.9598, "step": 12727 }, { "epoch": 4.239860081619056, "grad_norm": 0.6796875, "learning_rate": 9.676771990214652e-06, "loss": 4.042, "step": 12728 }, { "epoch": 4.240193220621304, "grad_norm": 0.75, "learning_rate": 9.67599738131585e-06, "loss": 3.9788, "step": 12729 }, { "epoch": 4.240526359623553, "grad_norm": 0.7578125, "learning_rate": 9.675222747071386e-06, "loss": 3.9442, "step": 12730 }, { "epoch": 4.2408594986258015, "grad_norm": 0.7109375, "learning_rate": 9.674448087490292e-06, "loss": 4.0073, "step": 12731 }, { "epoch": 4.241192637628051, "grad_norm": 0.66015625, "learning_rate": 9.673673402581585e-06, "loss": 4.0155, "step": 12732 }, { "epoch": 4.241525776630299, "grad_norm": 0.7265625, "learning_rate": 9.672898692354292e-06, "loss": 3.9671, "step": 12733 }, { "epoch": 4.241858915632548, "grad_norm": 0.72265625, "learning_rate": 9.672123956817436e-06, "loss": 3.9936, "step": 12734 }, { "epoch": 4.242192054634796, "grad_norm": 0.75, "learning_rate": 9.671349195980043e-06, "loss": 4.0251, "step": 12735 }, { "epoch": 4.242525193637045, "grad_norm": 0.734375, "learning_rate": 9.67057440985113e-06, "loss": 4.0611, "step": 12736 }, { "epoch": 4.242858332639294, "grad_norm": 0.734375, "learning_rate": 9.66979959843973e-06, "loss": 3.9941, "step": 12737 }, { "epoch": 4.243191471641542, "grad_norm": 0.72265625, "learning_rate": 9.669024761754865e-06, "loss": 4.0313, "step": 12738 }, { "epoch": 4.243524610643791, "grad_norm": 0.7265625, "learning_rate": 9.668249899805557e-06, "loss": 4.0322, "step": 12739 }, { "epoch": 4.24385774964604, "grad_norm": 0.75390625, "learning_rate": 9.667475012600835e-06, "loss": 3.9591, "step": 12740 }, { "epoch": 4.244190888648289, "grad_norm": 0.6875, "learning_rate": 9.666700100149723e-06, "loss": 3.9458, "step": 12741 }, { "epoch": 4.244524027650537, "grad_norm": 0.74609375, "learning_rate": 9.66592516246125e-06, "loss": 4.0362, "step": 12742 }, { "epoch": 4.2448571666527855, "grad_norm": 0.73828125, "learning_rate": 9.66515019954444e-06, "loss": 3.998, "step": 12743 }, { "epoch": 4.245190305655035, "grad_norm": 0.69140625, "learning_rate": 9.66437521140832e-06, "loss": 4.0027, "step": 12744 }, { "epoch": 4.245523444657283, "grad_norm": 0.703125, "learning_rate": 9.663600198061914e-06, "loss": 4.0357, "step": 12745 }, { "epoch": 4.245856583659532, "grad_norm": 0.7421875, "learning_rate": 9.662825159514256e-06, "loss": 4.08, "step": 12746 }, { "epoch": 4.24618972266178, "grad_norm": 0.67578125, "learning_rate": 9.662050095774368e-06, "loss": 3.9592, "step": 12747 }, { "epoch": 4.24652286166403, "grad_norm": 0.72265625, "learning_rate": 9.661275006851277e-06, "loss": 4.0166, "step": 12748 }, { "epoch": 4.246856000666278, "grad_norm": 0.7265625, "learning_rate": 9.660499892754016e-06, "loss": 3.9719, "step": 12749 }, { "epoch": 4.247189139668527, "grad_norm": 0.7109375, "learning_rate": 9.65972475349161e-06, "loss": 4.0028, "step": 12750 }, { "epoch": 4.247522278670775, "grad_norm": 0.69921875, "learning_rate": 9.658949589073089e-06, "loss": 4.0253, "step": 12751 }, { "epoch": 4.247855417673024, "grad_norm": 0.703125, "learning_rate": 9.658174399507483e-06, "loss": 4.0211, "step": 12752 }, { "epoch": 4.248188556675273, "grad_norm": 0.6953125, "learning_rate": 9.657399184803818e-06, "loss": 4.051, "step": 12753 }, { "epoch": 4.248521695677521, "grad_norm": 0.6796875, "learning_rate": 9.656623944971126e-06, "loss": 4.0359, "step": 12754 }, { "epoch": 4.24885483467977, "grad_norm": 0.72265625, "learning_rate": 9.655848680018436e-06, "loss": 4.0065, "step": 12755 }, { "epoch": 4.249187973682019, "grad_norm": 0.71484375, "learning_rate": 9.65507338995478e-06, "loss": 4.0451, "step": 12756 }, { "epoch": 4.249521112684268, "grad_norm": 0.72265625, "learning_rate": 9.654298074789184e-06, "loss": 4.026, "step": 12757 }, { "epoch": 4.249854251686516, "grad_norm": 0.7265625, "learning_rate": 9.653522734530682e-06, "loss": 3.9909, "step": 12758 }, { "epoch": 4.250187390688765, "grad_norm": 0.6953125, "learning_rate": 9.652747369188306e-06, "loss": 3.9929, "step": 12759 }, { "epoch": 4.250520529691014, "grad_norm": 0.72265625, "learning_rate": 9.651971978771086e-06, "loss": 4.0072, "step": 12760 }, { "epoch": 4.250853668693262, "grad_norm": 0.70703125, "learning_rate": 9.651196563288053e-06, "loss": 3.9889, "step": 12761 }, { "epoch": 4.251186807695511, "grad_norm": 0.73046875, "learning_rate": 9.65042112274824e-06, "loss": 4.007, "step": 12762 }, { "epoch": 4.251519946697759, "grad_norm": 0.71484375, "learning_rate": 9.649645657160678e-06, "loss": 3.9608, "step": 12763 }, { "epoch": 4.2518530857000085, "grad_norm": 0.74609375, "learning_rate": 9.648870166534402e-06, "loss": 4.0573, "step": 12764 }, { "epoch": 4.252186224702257, "grad_norm": 0.7265625, "learning_rate": 9.648094650878442e-06, "loss": 3.9851, "step": 12765 }, { "epoch": 4.252519363704506, "grad_norm": 0.7109375, "learning_rate": 9.647319110201831e-06, "loss": 3.9426, "step": 12766 }, { "epoch": 4.252852502706754, "grad_norm": 0.71875, "learning_rate": 9.646543544513603e-06, "loss": 4.0276, "step": 12767 }, { "epoch": 4.2531856417090035, "grad_norm": 0.70703125, "learning_rate": 9.645767953822795e-06, "loss": 3.9661, "step": 12768 }, { "epoch": 4.253518780711252, "grad_norm": 0.6875, "learning_rate": 9.644992338138436e-06, "loss": 3.9855, "step": 12769 }, { "epoch": 4.2538519197135, "grad_norm": 0.71875, "learning_rate": 9.644216697469562e-06, "loss": 4.0451, "step": 12770 }, { "epoch": 4.254185058715749, "grad_norm": 0.66796875, "learning_rate": 9.64344103182521e-06, "loss": 4.0363, "step": 12771 }, { "epoch": 4.2545181977179976, "grad_norm": 0.73046875, "learning_rate": 9.64266534121441e-06, "loss": 4.0049, "step": 12772 }, { "epoch": 4.254851336720247, "grad_norm": 0.67578125, "learning_rate": 9.6418896256462e-06, "loss": 3.9451, "step": 12773 }, { "epoch": 4.255184475722495, "grad_norm": 0.7109375, "learning_rate": 9.641113885129618e-06, "loss": 3.9921, "step": 12774 }, { "epoch": 4.255517614724744, "grad_norm": 0.7109375, "learning_rate": 9.640338119673696e-06, "loss": 3.9491, "step": 12775 }, { "epoch": 4.2558507537269925, "grad_norm": 0.74609375, "learning_rate": 9.63956232928747e-06, "loss": 4.0038, "step": 12776 }, { "epoch": 4.256183892729242, "grad_norm": 0.74609375, "learning_rate": 9.638786513979975e-06, "loss": 3.9551, "step": 12777 }, { "epoch": 4.25651703173149, "grad_norm": 0.71875, "learning_rate": 9.638010673760254e-06, "loss": 4.0271, "step": 12778 }, { "epoch": 4.256850170733738, "grad_norm": 0.6796875, "learning_rate": 9.637234808637338e-06, "loss": 4.0212, "step": 12779 }, { "epoch": 4.2571833097359875, "grad_norm": 0.7734375, "learning_rate": 9.636458918620264e-06, "loss": 3.986, "step": 12780 }, { "epoch": 4.257516448738236, "grad_norm": 0.74609375, "learning_rate": 9.635683003718073e-06, "loss": 3.963, "step": 12781 }, { "epoch": 4.257849587740485, "grad_norm": 0.71484375, "learning_rate": 9.634907063939799e-06, "loss": 4.0658, "step": 12782 }, { "epoch": 4.258182726742733, "grad_norm": 0.703125, "learning_rate": 9.634131099294484e-06, "loss": 3.919, "step": 12783 }, { "epoch": 4.258515865744982, "grad_norm": 0.72265625, "learning_rate": 9.633355109791164e-06, "loss": 3.9442, "step": 12784 }, { "epoch": 4.258849004747231, "grad_norm": 0.73046875, "learning_rate": 9.632579095438876e-06, "loss": 3.9845, "step": 12785 }, { "epoch": 4.259182143749479, "grad_norm": 0.67578125, "learning_rate": 9.631803056246664e-06, "loss": 3.9599, "step": 12786 }, { "epoch": 4.259515282751728, "grad_norm": 0.6796875, "learning_rate": 9.631026992223559e-06, "loss": 4.0373, "step": 12787 }, { "epoch": 4.2598484217539765, "grad_norm": 0.71875, "learning_rate": 9.63025090337861e-06, "loss": 4.0309, "step": 12788 }, { "epoch": 4.260181560756226, "grad_norm": 0.69921875, "learning_rate": 9.62947478972085e-06, "loss": 3.9688, "step": 12789 }, { "epoch": 4.260514699758474, "grad_norm": 0.7265625, "learning_rate": 9.628698651259321e-06, "loss": 3.9408, "step": 12790 }, { "epoch": 4.260847838760723, "grad_norm": 0.7421875, "learning_rate": 9.627922488003064e-06, "loss": 3.9752, "step": 12791 }, { "epoch": 4.2611809777629714, "grad_norm": 0.69921875, "learning_rate": 9.627146299961118e-06, "loss": 3.9367, "step": 12792 }, { "epoch": 4.261514116765221, "grad_norm": 0.6953125, "learning_rate": 9.626370087142525e-06, "loss": 4.0015, "step": 12793 }, { "epoch": 4.261847255767469, "grad_norm": 0.72265625, "learning_rate": 9.625593849556327e-06, "loss": 4.047, "step": 12794 }, { "epoch": 4.262180394769718, "grad_norm": 0.7265625, "learning_rate": 9.624817587211565e-06, "loss": 3.9892, "step": 12795 }, { "epoch": 4.262513533771966, "grad_norm": 0.671875, "learning_rate": 9.624041300117278e-06, "loss": 3.9951, "step": 12796 }, { "epoch": 4.262846672774215, "grad_norm": 0.71484375, "learning_rate": 9.623264988282516e-06, "loss": 4.0233, "step": 12797 }, { "epoch": 4.263179811776464, "grad_norm": 0.6953125, "learning_rate": 9.622488651716311e-06, "loss": 4.0627, "step": 12798 }, { "epoch": 4.263512950778712, "grad_norm": 0.75, "learning_rate": 9.621712290427714e-06, "loss": 4.0118, "step": 12799 }, { "epoch": 4.263846089780961, "grad_norm": 0.6875, "learning_rate": 9.620935904425762e-06, "loss": 4.0256, "step": 12800 }, { "epoch": 4.26417922878321, "grad_norm": 0.71875, "learning_rate": 9.620159493719502e-06, "loss": 4.0126, "step": 12801 }, { "epoch": 4.264512367785459, "grad_norm": 0.74609375, "learning_rate": 9.619383058317976e-06, "loss": 4.0147, "step": 12802 }, { "epoch": 4.264845506787707, "grad_norm": 0.69921875, "learning_rate": 9.618606598230227e-06, "loss": 4.0439, "step": 12803 }, { "epoch": 4.265178645789955, "grad_norm": 0.75, "learning_rate": 9.617830113465298e-06, "loss": 3.9268, "step": 12804 }, { "epoch": 4.265511784792205, "grad_norm": 0.73046875, "learning_rate": 9.617053604032239e-06, "loss": 4.0363, "step": 12805 }, { "epoch": 4.265844923794453, "grad_norm": 0.71875, "learning_rate": 9.61627706994009e-06, "loss": 4.008, "step": 12806 }, { "epoch": 4.266178062796702, "grad_norm": 0.73046875, "learning_rate": 9.615500511197897e-06, "loss": 4.0309, "step": 12807 }, { "epoch": 4.26651120179895, "grad_norm": 0.69921875, "learning_rate": 9.614723927814704e-06, "loss": 4.0396, "step": 12808 }, { "epoch": 4.2668443408012, "grad_norm": 0.7890625, "learning_rate": 9.613947319799557e-06, "loss": 3.9636, "step": 12809 }, { "epoch": 4.267177479803448, "grad_norm": 0.7421875, "learning_rate": 9.613170687161507e-06, "loss": 3.9452, "step": 12810 }, { "epoch": 4.267510618805697, "grad_norm": 0.69921875, "learning_rate": 9.612394029909592e-06, "loss": 3.9934, "step": 12811 }, { "epoch": 4.267843757807945, "grad_norm": 0.69921875, "learning_rate": 9.611617348052863e-06, "loss": 3.9883, "step": 12812 }, { "epoch": 4.2681768968101945, "grad_norm": 0.74609375, "learning_rate": 9.610840641600365e-06, "loss": 3.9685, "step": 12813 }, { "epoch": 4.268510035812443, "grad_norm": 0.71484375, "learning_rate": 9.610063910561145e-06, "loss": 4.0232, "step": 12814 }, { "epoch": 4.268843174814691, "grad_norm": 0.734375, "learning_rate": 9.609287154944255e-06, "loss": 3.9435, "step": 12815 }, { "epoch": 4.26917631381694, "grad_norm": 0.71875, "learning_rate": 9.608510374758733e-06, "loss": 3.9255, "step": 12816 }, { "epoch": 4.269509452819189, "grad_norm": 0.75390625, "learning_rate": 9.607733570013635e-06, "loss": 3.9256, "step": 12817 }, { "epoch": 4.269842591821438, "grad_norm": 0.70703125, "learning_rate": 9.606956740718004e-06, "loss": 3.9165, "step": 12818 }, { "epoch": 4.270175730823686, "grad_norm": 0.70703125, "learning_rate": 9.606179886880894e-06, "loss": 3.973, "step": 12819 }, { "epoch": 4.270508869825935, "grad_norm": 0.734375, "learning_rate": 9.605403008511347e-06, "loss": 4.0401, "step": 12820 }, { "epoch": 4.2708420088281835, "grad_norm": 0.76171875, "learning_rate": 9.604626105618418e-06, "loss": 3.9419, "step": 12821 }, { "epoch": 4.271175147830432, "grad_norm": 0.69140625, "learning_rate": 9.60384917821115e-06, "loss": 4.0106, "step": 12822 }, { "epoch": 4.271508286832681, "grad_norm": 0.76171875, "learning_rate": 9.6030722262986e-06, "loss": 3.9296, "step": 12823 }, { "epoch": 4.271841425834929, "grad_norm": 0.71875, "learning_rate": 9.602295249889813e-06, "loss": 4.0096, "step": 12824 }, { "epoch": 4.2721745648371785, "grad_norm": 0.72265625, "learning_rate": 9.60151824899384e-06, "loss": 3.9681, "step": 12825 }, { "epoch": 4.272507703839427, "grad_norm": 0.6875, "learning_rate": 9.600741223619729e-06, "loss": 3.964, "step": 12826 }, { "epoch": 4.272840842841676, "grad_norm": 0.69140625, "learning_rate": 9.599964173776533e-06, "loss": 3.9846, "step": 12827 }, { "epoch": 4.273173981843924, "grad_norm": 0.76171875, "learning_rate": 9.599187099473307e-06, "loss": 3.9749, "step": 12828 }, { "epoch": 4.2735071208461735, "grad_norm": 0.73828125, "learning_rate": 9.598410000719092e-06, "loss": 4.0254, "step": 12829 }, { "epoch": 4.273840259848422, "grad_norm": 0.73828125, "learning_rate": 9.59763287752295e-06, "loss": 3.9719, "step": 12830 }, { "epoch": 4.27417339885067, "grad_norm": 0.7890625, "learning_rate": 9.596855729893928e-06, "loss": 3.9966, "step": 12831 }, { "epoch": 4.274506537852919, "grad_norm": 0.703125, "learning_rate": 9.596078557841077e-06, "loss": 4.0421, "step": 12832 }, { "epoch": 4.2748396768551675, "grad_norm": 0.68359375, "learning_rate": 9.595301361373451e-06, "loss": 3.9602, "step": 12833 }, { "epoch": 4.275172815857417, "grad_norm": 0.6796875, "learning_rate": 9.594524140500104e-06, "loss": 3.9766, "step": 12834 }, { "epoch": 4.275505954859665, "grad_norm": 0.703125, "learning_rate": 9.593746895230086e-06, "loss": 4.0185, "step": 12835 }, { "epoch": 4.275839093861914, "grad_norm": 0.796875, "learning_rate": 9.592969625572453e-06, "loss": 4.0305, "step": 12836 }, { "epoch": 4.2761722328641625, "grad_norm": 0.7578125, "learning_rate": 9.592192331536257e-06, "loss": 3.9735, "step": 12837 }, { "epoch": 4.276505371866412, "grad_norm": 0.73046875, "learning_rate": 9.591415013130551e-06, "loss": 3.9723, "step": 12838 }, { "epoch": 4.27683851086866, "grad_norm": 0.71875, "learning_rate": 9.590637670364392e-06, "loss": 3.9612, "step": 12839 }, { "epoch": 4.277171649870908, "grad_norm": 0.69921875, "learning_rate": 9.58986030324683e-06, "loss": 4.0002, "step": 12840 }, { "epoch": 4.277504788873157, "grad_norm": 0.71484375, "learning_rate": 9.589082911786926e-06, "loss": 3.9658, "step": 12841 }, { "epoch": 4.277837927875406, "grad_norm": 0.6953125, "learning_rate": 9.588305495993727e-06, "loss": 3.9217, "step": 12842 }, { "epoch": 4.278171066877655, "grad_norm": 0.70703125, "learning_rate": 9.587528055876294e-06, "loss": 3.9608, "step": 12843 }, { "epoch": 4.278504205879903, "grad_norm": 0.74609375, "learning_rate": 9.586750591443679e-06, "loss": 3.9931, "step": 12844 }, { "epoch": 4.278837344882152, "grad_norm": 0.68359375, "learning_rate": 9.585973102704943e-06, "loss": 4.0583, "step": 12845 }, { "epoch": 4.279170483884401, "grad_norm": 0.76171875, "learning_rate": 9.585195589669136e-06, "loss": 4.0374, "step": 12846 }, { "epoch": 4.279503622886649, "grad_norm": 0.734375, "learning_rate": 9.584418052345318e-06, "loss": 4.0615, "step": 12847 }, { "epoch": 4.279836761888898, "grad_norm": 0.72265625, "learning_rate": 9.583640490742543e-06, "loss": 3.9749, "step": 12848 }, { "epoch": 4.2801699008911465, "grad_norm": 0.71875, "learning_rate": 9.58286290486987e-06, "loss": 4.0061, "step": 12849 }, { "epoch": 4.280503039893396, "grad_norm": 0.69921875, "learning_rate": 9.582085294736357e-06, "loss": 4.0107, "step": 12850 }, { "epoch": 4.280836178895644, "grad_norm": 0.69140625, "learning_rate": 9.581307660351058e-06, "loss": 4.0116, "step": 12851 }, { "epoch": 4.281169317897893, "grad_norm": 0.7265625, "learning_rate": 9.580530001723034e-06, "loss": 3.9433, "step": 12852 }, { "epoch": 4.281502456900141, "grad_norm": 0.71875, "learning_rate": 9.579752318861341e-06, "loss": 4.0124, "step": 12853 }, { "epoch": 4.281835595902391, "grad_norm": 0.73046875, "learning_rate": 9.578974611775038e-06, "loss": 4.0469, "step": 12854 }, { "epoch": 4.282168734904639, "grad_norm": 0.74609375, "learning_rate": 9.578196880473188e-06, "loss": 3.9714, "step": 12855 }, { "epoch": 4.282501873906888, "grad_norm": 0.76171875, "learning_rate": 9.577419124964842e-06, "loss": 4.0249, "step": 12856 }, { "epoch": 4.282835012909136, "grad_norm": 0.6953125, "learning_rate": 9.576641345259062e-06, "loss": 3.9362, "step": 12857 }, { "epoch": 4.283168151911385, "grad_norm": 0.76953125, "learning_rate": 9.575863541364911e-06, "loss": 3.8781, "step": 12858 }, { "epoch": 4.283501290913634, "grad_norm": 0.69921875, "learning_rate": 9.575085713291446e-06, "loss": 3.9847, "step": 12859 }, { "epoch": 4.283834429915882, "grad_norm": 0.734375, "learning_rate": 9.574307861047724e-06, "loss": 4.0197, "step": 12860 }, { "epoch": 4.284167568918131, "grad_norm": 0.68359375, "learning_rate": 9.573529984642811e-06, "loss": 3.9401, "step": 12861 }, { "epoch": 4.28450070792038, "grad_norm": 0.7421875, "learning_rate": 9.572752084085763e-06, "loss": 3.976, "step": 12862 }, { "epoch": 4.284833846922629, "grad_norm": 0.74609375, "learning_rate": 9.571974159385646e-06, "loss": 3.979, "step": 12863 }, { "epoch": 4.285166985924877, "grad_norm": 0.72265625, "learning_rate": 9.571196210551515e-06, "loss": 4.02, "step": 12864 }, { "epoch": 4.285500124927125, "grad_norm": 0.77734375, "learning_rate": 9.570418237592437e-06, "loss": 3.9558, "step": 12865 }, { "epoch": 4.285833263929375, "grad_norm": 0.734375, "learning_rate": 9.569640240517467e-06, "loss": 3.9785, "step": 12866 }, { "epoch": 4.286166402931623, "grad_norm": 0.703125, "learning_rate": 9.568862219335675e-06, "loss": 3.9482, "step": 12867 }, { "epoch": 4.286499541933872, "grad_norm": 0.6875, "learning_rate": 9.568084174056118e-06, "loss": 3.9277, "step": 12868 }, { "epoch": 4.28683268093612, "grad_norm": 0.69921875, "learning_rate": 9.56730610468786e-06, "loss": 4.1191, "step": 12869 }, { "epoch": 4.2871658199383695, "grad_norm": 0.76953125, "learning_rate": 9.566528011239962e-06, "loss": 3.9919, "step": 12870 }, { "epoch": 4.287498958940618, "grad_norm": 0.75, "learning_rate": 9.56574989372149e-06, "loss": 3.9697, "step": 12871 }, { "epoch": 4.287832097942867, "grad_norm": 0.765625, "learning_rate": 9.564971752141506e-06, "loss": 3.9769, "step": 12872 }, { "epoch": 4.288165236945115, "grad_norm": 0.73046875, "learning_rate": 9.564193586509072e-06, "loss": 3.9647, "step": 12873 }, { "epoch": 4.2884983759473645, "grad_norm": 0.7421875, "learning_rate": 9.563415396833257e-06, "loss": 3.9161, "step": 12874 }, { "epoch": 4.288831514949613, "grad_norm": 0.734375, "learning_rate": 9.562637183123121e-06, "loss": 4.0232, "step": 12875 }, { "epoch": 4.289164653951861, "grad_norm": 0.72265625, "learning_rate": 9.561858945387729e-06, "loss": 3.989, "step": 12876 }, { "epoch": 4.28949779295411, "grad_norm": 0.69921875, "learning_rate": 9.561080683636147e-06, "loss": 3.9416, "step": 12877 }, { "epoch": 4.289830931956359, "grad_norm": 0.73046875, "learning_rate": 9.560302397877438e-06, "loss": 3.9778, "step": 12878 }, { "epoch": 4.290164070958608, "grad_norm": 0.671875, "learning_rate": 9.559524088120667e-06, "loss": 3.9735, "step": 12879 }, { "epoch": 4.290497209960856, "grad_norm": 0.71875, "learning_rate": 9.558745754374907e-06, "loss": 4.0359, "step": 12880 }, { "epoch": 4.290830348963105, "grad_norm": 0.7421875, "learning_rate": 9.557967396649212e-06, "loss": 3.9909, "step": 12881 }, { "epoch": 4.2911634879653535, "grad_norm": 0.7109375, "learning_rate": 9.557189014952658e-06, "loss": 3.9821, "step": 12882 }, { "epoch": 4.291496626967602, "grad_norm": 0.76171875, "learning_rate": 9.556410609294307e-06, "loss": 4.0018, "step": 12883 }, { "epoch": 4.291829765969851, "grad_norm": 0.68359375, "learning_rate": 9.555632179683227e-06, "loss": 4.0912, "step": 12884 }, { "epoch": 4.292162904972099, "grad_norm": 0.79296875, "learning_rate": 9.554853726128485e-06, "loss": 4.0355, "step": 12885 }, { "epoch": 4.2924960439743485, "grad_norm": 0.703125, "learning_rate": 9.554075248639146e-06, "loss": 3.9734, "step": 12886 }, { "epoch": 4.292829182976597, "grad_norm": 0.67578125, "learning_rate": 9.553296747224281e-06, "loss": 4.0235, "step": 12887 }, { "epoch": 4.293162321978846, "grad_norm": 0.76171875, "learning_rate": 9.552518221892955e-06, "loss": 4.0608, "step": 12888 }, { "epoch": 4.293495460981094, "grad_norm": 0.71875, "learning_rate": 9.551739672654241e-06, "loss": 4.0482, "step": 12889 }, { "epoch": 4.293828599983343, "grad_norm": 0.7265625, "learning_rate": 9.550961099517198e-06, "loss": 4.0453, "step": 12890 }, { "epoch": 4.294161738985592, "grad_norm": 0.796875, "learning_rate": 9.550182502490908e-06, "loss": 3.9676, "step": 12891 }, { "epoch": 4.29449487798784, "grad_norm": 0.73828125, "learning_rate": 9.549403881584426e-06, "loss": 4.0429, "step": 12892 }, { "epoch": 4.294828016990089, "grad_norm": 0.734375, "learning_rate": 9.548625236806831e-06, "loss": 4.0482, "step": 12893 }, { "epoch": 4.2951611559923375, "grad_norm": 0.69140625, "learning_rate": 9.547846568167188e-06, "loss": 4.0357, "step": 12894 }, { "epoch": 4.295494294994587, "grad_norm": 0.71875, "learning_rate": 9.547067875674569e-06, "loss": 3.9739, "step": 12895 }, { "epoch": 4.295827433996835, "grad_norm": 0.7265625, "learning_rate": 9.546289159338041e-06, "loss": 3.9611, "step": 12896 }, { "epoch": 4.296160572999084, "grad_norm": 0.76953125, "learning_rate": 9.54551041916668e-06, "loss": 3.9702, "step": 12897 }, { "epoch": 4.2964937120013325, "grad_norm": 0.734375, "learning_rate": 9.544731655169551e-06, "loss": 3.9488, "step": 12898 }, { "epoch": 4.296826851003582, "grad_norm": 0.7109375, "learning_rate": 9.543952867355728e-06, "loss": 3.9871, "step": 12899 }, { "epoch": 4.29715999000583, "grad_norm": 0.703125, "learning_rate": 9.543174055734283e-06, "loss": 4.0253, "step": 12900 }, { "epoch": 4.297493129008078, "grad_norm": 0.72265625, "learning_rate": 9.542395220314281e-06, "loss": 4.0068, "step": 12901 }, { "epoch": 4.297826268010327, "grad_norm": 0.73046875, "learning_rate": 9.541616361104801e-06, "loss": 4.0544, "step": 12902 }, { "epoch": 4.298159407012576, "grad_norm": 0.75, "learning_rate": 9.540837478114913e-06, "loss": 4.0354, "step": 12903 }, { "epoch": 4.298492546014825, "grad_norm": 0.68359375, "learning_rate": 9.54005857135369e-06, "loss": 4.0063, "step": 12904 }, { "epoch": 4.298825685017073, "grad_norm": 0.6953125, "learning_rate": 9.5392796408302e-06, "loss": 4.0243, "step": 12905 }, { "epoch": 4.299158824019322, "grad_norm": 0.75, "learning_rate": 9.538500686553521e-06, "loss": 4.0379, "step": 12906 }, { "epoch": 4.299491963021571, "grad_norm": 0.71875, "learning_rate": 9.537721708532726e-06, "loss": 4.0211, "step": 12907 }, { "epoch": 4.29982510202382, "grad_norm": 0.75390625, "learning_rate": 9.536942706776883e-06, "loss": 4.0063, "step": 12908 }, { "epoch": 4.300158241026068, "grad_norm": 0.765625, "learning_rate": 9.536163681295073e-06, "loss": 3.9452, "step": 12909 }, { "epoch": 4.300491380028316, "grad_norm": 0.74609375, "learning_rate": 9.535384632096365e-06, "loss": 4.0007, "step": 12910 }, { "epoch": 4.300824519030566, "grad_norm": 0.75390625, "learning_rate": 9.534605559189836e-06, "loss": 4.0045, "step": 12911 }, { "epoch": 4.301157658032814, "grad_norm": 0.71875, "learning_rate": 9.533826462584557e-06, "loss": 4.0218, "step": 12912 }, { "epoch": 4.301490797035063, "grad_norm": 0.7109375, "learning_rate": 9.533047342289607e-06, "loss": 3.9436, "step": 12913 }, { "epoch": 4.301823936037311, "grad_norm": 0.67578125, "learning_rate": 9.532268198314058e-06, "loss": 4.0144, "step": 12914 }, { "epoch": 4.302157075039561, "grad_norm": 0.73828125, "learning_rate": 9.531489030666987e-06, "loss": 3.9917, "step": 12915 }, { "epoch": 4.302490214041809, "grad_norm": 0.69140625, "learning_rate": 9.530709839357468e-06, "loss": 4.021, "step": 12916 }, { "epoch": 4.302823353044058, "grad_norm": 0.7265625, "learning_rate": 9.52993062439458e-06, "loss": 3.9337, "step": 12917 }, { "epoch": 4.303156492046306, "grad_norm": 0.7109375, "learning_rate": 9.529151385787395e-06, "loss": 4.0171, "step": 12918 }, { "epoch": 4.303489631048555, "grad_norm": 0.69921875, "learning_rate": 9.52837212354499e-06, "loss": 4.0317, "step": 12919 }, { "epoch": 4.303822770050804, "grad_norm": 0.70703125, "learning_rate": 9.527592837676447e-06, "loss": 4.0201, "step": 12920 }, { "epoch": 4.304155909053052, "grad_norm": 0.6953125, "learning_rate": 9.526813528190834e-06, "loss": 4.0457, "step": 12921 }, { "epoch": 4.304489048055301, "grad_norm": 0.734375, "learning_rate": 9.52603419509724e-06, "loss": 3.9603, "step": 12922 }, { "epoch": 4.30482218705755, "grad_norm": 0.72265625, "learning_rate": 9.525254838404732e-06, "loss": 3.9921, "step": 12923 }, { "epoch": 4.305155326059799, "grad_norm": 0.7109375, "learning_rate": 9.524475458122394e-06, "loss": 4.0687, "step": 12924 }, { "epoch": 4.305488465062047, "grad_norm": 0.78515625, "learning_rate": 9.523696054259298e-06, "loss": 3.9949, "step": 12925 }, { "epoch": 4.305821604064295, "grad_norm": 0.7109375, "learning_rate": 9.52291662682453e-06, "loss": 4.0179, "step": 12926 }, { "epoch": 4.3061547430665446, "grad_norm": 0.71875, "learning_rate": 9.522137175827165e-06, "loss": 3.8936, "step": 12927 }, { "epoch": 4.306487882068793, "grad_norm": 0.7421875, "learning_rate": 9.521357701276277e-06, "loss": 3.9151, "step": 12928 }, { "epoch": 4.306821021071042, "grad_norm": 0.73828125, "learning_rate": 9.520578203180956e-06, "loss": 3.9937, "step": 12929 }, { "epoch": 4.30715416007329, "grad_norm": 0.703125, "learning_rate": 9.519798681550271e-06, "loss": 4.0408, "step": 12930 }, { "epoch": 4.3074872990755395, "grad_norm": 0.734375, "learning_rate": 9.519019136393311e-06, "loss": 3.9777, "step": 12931 }, { "epoch": 4.307820438077788, "grad_norm": 0.7265625, "learning_rate": 9.518239567719147e-06, "loss": 4.0222, "step": 12932 }, { "epoch": 4.308153577080037, "grad_norm": 0.7109375, "learning_rate": 9.517459975536866e-06, "loss": 3.9713, "step": 12933 }, { "epoch": 4.308486716082285, "grad_norm": 0.71484375, "learning_rate": 9.516680359855546e-06, "loss": 3.9197, "step": 12934 }, { "epoch": 4.3088198550845345, "grad_norm": 0.6953125, "learning_rate": 9.515900720684266e-06, "loss": 3.9894, "step": 12935 }, { "epoch": 4.309152994086783, "grad_norm": 0.703125, "learning_rate": 9.51512105803211e-06, "loss": 4.0524, "step": 12936 }, { "epoch": 4.309486133089031, "grad_norm": 0.67578125, "learning_rate": 9.514341371908159e-06, "loss": 4.0314, "step": 12937 }, { "epoch": 4.30981927209128, "grad_norm": 0.671875, "learning_rate": 9.513561662321494e-06, "loss": 4.0349, "step": 12938 }, { "epoch": 4.3101524110935285, "grad_norm": 0.73046875, "learning_rate": 9.512781929281198e-06, "loss": 4.0136, "step": 12939 }, { "epoch": 4.310485550095778, "grad_norm": 0.68359375, "learning_rate": 9.512002172796351e-06, "loss": 3.981, "step": 12940 }, { "epoch": 4.310818689098026, "grad_norm": 0.71484375, "learning_rate": 9.511222392876036e-06, "loss": 3.9581, "step": 12941 }, { "epoch": 4.311151828100275, "grad_norm": 0.70703125, "learning_rate": 9.510442589529337e-06, "loss": 4.0207, "step": 12942 }, { "epoch": 4.3114849671025235, "grad_norm": 0.7265625, "learning_rate": 9.509662762765335e-06, "loss": 4.0246, "step": 12943 }, { "epoch": 4.311818106104772, "grad_norm": 0.7421875, "learning_rate": 9.508882912593117e-06, "loss": 3.9711, "step": 12944 }, { "epoch": 4.312151245107021, "grad_norm": 0.71484375, "learning_rate": 9.508103039021764e-06, "loss": 4.0205, "step": 12945 }, { "epoch": 4.312484384109269, "grad_norm": 0.6796875, "learning_rate": 9.507323142060359e-06, "loss": 3.9389, "step": 12946 }, { "epoch": 4.3128175231115184, "grad_norm": 0.72265625, "learning_rate": 9.506543221717988e-06, "loss": 4.0029, "step": 12947 }, { "epoch": 4.313150662113767, "grad_norm": 0.76953125, "learning_rate": 9.505763278003733e-06, "loss": 4.0644, "step": 12948 }, { "epoch": 4.313483801116016, "grad_norm": 0.73828125, "learning_rate": 9.504983310926684e-06, "loss": 3.9415, "step": 12949 }, { "epoch": 4.313816940118264, "grad_norm": 0.70703125, "learning_rate": 9.504203320495918e-06, "loss": 3.9299, "step": 12950 }, { "epoch": 4.314150079120513, "grad_norm": 0.72265625, "learning_rate": 9.503423306720528e-06, "loss": 4.0451, "step": 12951 }, { "epoch": 4.314483218122762, "grad_norm": 0.703125, "learning_rate": 9.502643269609591e-06, "loss": 4.0162, "step": 12952 }, { "epoch": 4.314816357125011, "grad_norm": 0.7265625, "learning_rate": 9.501863209172203e-06, "loss": 4.0355, "step": 12953 }, { "epoch": 4.315149496127259, "grad_norm": 0.71875, "learning_rate": 9.501083125417442e-06, "loss": 4.0218, "step": 12954 }, { "epoch": 4.3154826351295075, "grad_norm": 0.76171875, "learning_rate": 9.5003030183544e-06, "loss": 3.9933, "step": 12955 }, { "epoch": 4.315815774131757, "grad_norm": 0.7109375, "learning_rate": 9.499522887992157e-06, "loss": 4.0207, "step": 12956 }, { "epoch": 4.316148913134005, "grad_norm": 0.734375, "learning_rate": 9.498742734339805e-06, "loss": 3.9507, "step": 12957 }, { "epoch": 4.316482052136254, "grad_norm": 0.7265625, "learning_rate": 9.497962557406428e-06, "loss": 3.9565, "step": 12958 }, { "epoch": 4.316815191138502, "grad_norm": 0.73046875, "learning_rate": 9.497182357201116e-06, "loss": 3.9962, "step": 12959 }, { "epoch": 4.317148330140752, "grad_norm": 0.7109375, "learning_rate": 9.496402133732955e-06, "loss": 3.9985, "step": 12960 }, { "epoch": 4.317481469143, "grad_norm": 0.70703125, "learning_rate": 9.495621887011032e-06, "loss": 4.0104, "step": 12961 }, { "epoch": 4.317814608145248, "grad_norm": 0.734375, "learning_rate": 9.494841617044439e-06, "loss": 4.044, "step": 12962 }, { "epoch": 4.318147747147497, "grad_norm": 0.73046875, "learning_rate": 9.494061323842259e-06, "loss": 3.9913, "step": 12963 }, { "epoch": 4.318480886149746, "grad_norm": 0.71484375, "learning_rate": 9.493281007413587e-06, "loss": 4.044, "step": 12964 }, { "epoch": 4.318814025151995, "grad_norm": 0.71875, "learning_rate": 9.492500667767505e-06, "loss": 3.9986, "step": 12965 }, { "epoch": 4.319147164154243, "grad_norm": 0.734375, "learning_rate": 9.49172030491311e-06, "loss": 4.0149, "step": 12966 }, { "epoch": 4.319480303156492, "grad_norm": 0.703125, "learning_rate": 9.490939918859487e-06, "loss": 4.0251, "step": 12967 }, { "epoch": 4.319813442158741, "grad_norm": 0.7265625, "learning_rate": 9.490159509615725e-06, "loss": 4.091, "step": 12968 }, { "epoch": 4.32014658116099, "grad_norm": 0.69921875, "learning_rate": 9.489379077190914e-06, "loss": 4.0136, "step": 12969 }, { "epoch": 4.320479720163238, "grad_norm": 0.71484375, "learning_rate": 9.488598621594148e-06, "loss": 3.9096, "step": 12970 }, { "epoch": 4.320812859165486, "grad_norm": 0.71484375, "learning_rate": 9.487818142834514e-06, "loss": 4.0813, "step": 12971 }, { "epoch": 4.321145998167736, "grad_norm": 0.71484375, "learning_rate": 9.487037640921105e-06, "loss": 3.9823, "step": 12972 }, { "epoch": 4.321479137169984, "grad_norm": 0.7265625, "learning_rate": 9.486257115863011e-06, "loss": 4.0274, "step": 12973 }, { "epoch": 4.321812276172233, "grad_norm": 0.7265625, "learning_rate": 9.485476567669321e-06, "loss": 3.9207, "step": 12974 }, { "epoch": 4.322145415174481, "grad_norm": 0.66796875, "learning_rate": 9.484695996349135e-06, "loss": 4.0147, "step": 12975 }, { "epoch": 4.3224785541767305, "grad_norm": 0.73828125, "learning_rate": 9.483915401911536e-06, "loss": 3.9926, "step": 12976 }, { "epoch": 4.322811693178979, "grad_norm": 0.73828125, "learning_rate": 9.483134784365622e-06, "loss": 4.0275, "step": 12977 }, { "epoch": 4.323144832181228, "grad_norm": 0.7265625, "learning_rate": 9.482354143720481e-06, "loss": 4.0242, "step": 12978 }, { "epoch": 4.323477971183476, "grad_norm": 0.7265625, "learning_rate": 9.48157347998521e-06, "loss": 3.9853, "step": 12979 }, { "epoch": 4.323811110185725, "grad_norm": 0.74609375, "learning_rate": 9.480792793168898e-06, "loss": 3.9582, "step": 12980 }, { "epoch": 4.324144249187974, "grad_norm": 0.72265625, "learning_rate": 9.480012083280644e-06, "loss": 4.0202, "step": 12981 }, { "epoch": 4.324477388190222, "grad_norm": 0.7109375, "learning_rate": 9.479231350329532e-06, "loss": 3.9428, "step": 12982 }, { "epoch": 4.324810527192471, "grad_norm": 0.69921875, "learning_rate": 9.478450594324666e-06, "loss": 3.9886, "step": 12983 }, { "epoch": 4.32514366619472, "grad_norm": 0.7109375, "learning_rate": 9.477669815275133e-06, "loss": 3.9499, "step": 12984 }, { "epoch": 4.325476805196969, "grad_norm": 0.7265625, "learning_rate": 9.476889013190033e-06, "loss": 3.9811, "step": 12985 }, { "epoch": 4.325809944199217, "grad_norm": 0.73046875, "learning_rate": 9.476108188078458e-06, "loss": 4.0097, "step": 12986 }, { "epoch": 4.326143083201465, "grad_norm": 0.73828125, "learning_rate": 9.475327339949502e-06, "loss": 4.0655, "step": 12987 }, { "epoch": 4.3264762222037145, "grad_norm": 0.734375, "learning_rate": 9.474546468812261e-06, "loss": 3.9413, "step": 12988 }, { "epoch": 4.326809361205963, "grad_norm": 0.75390625, "learning_rate": 9.47376557467583e-06, "loss": 3.9202, "step": 12989 }, { "epoch": 4.327142500208212, "grad_norm": 0.734375, "learning_rate": 9.472984657549305e-06, "loss": 4.0155, "step": 12990 }, { "epoch": 4.32747563921046, "grad_norm": 0.7421875, "learning_rate": 9.472203717441784e-06, "loss": 4.0291, "step": 12991 }, { "epoch": 4.3278087782127095, "grad_norm": 0.71875, "learning_rate": 9.47142275436236e-06, "loss": 3.9682, "step": 12992 }, { "epoch": 4.328141917214958, "grad_norm": 0.71875, "learning_rate": 9.470641768320131e-06, "loss": 3.9692, "step": 12993 }, { "epoch": 4.328475056217207, "grad_norm": 0.75, "learning_rate": 9.469860759324196e-06, "loss": 4.0223, "step": 12994 }, { "epoch": 4.328808195219455, "grad_norm": 0.734375, "learning_rate": 9.469079727383648e-06, "loss": 4.0086, "step": 12995 }, { "epoch": 4.329141334221704, "grad_norm": 0.6875, "learning_rate": 9.468298672507584e-06, "loss": 3.9947, "step": 12996 }, { "epoch": 4.329474473223953, "grad_norm": 0.75390625, "learning_rate": 9.467517594705105e-06, "loss": 3.9851, "step": 12997 }, { "epoch": 4.329807612226201, "grad_norm": 0.69921875, "learning_rate": 9.466736493985312e-06, "loss": 4.0251, "step": 12998 }, { "epoch": 4.33014075122845, "grad_norm": 0.71875, "learning_rate": 9.465955370357295e-06, "loss": 4.0524, "step": 12999 }, { "epoch": 4.3304738902306985, "grad_norm": 0.69921875, "learning_rate": 9.465174223830156e-06, "loss": 3.9834, "step": 13000 }, { "epoch": 4.330807029232948, "grad_norm": 0.7265625, "learning_rate": 9.464393054412995e-06, "loss": 4.0831, "step": 13001 }, { "epoch": 4.331140168235196, "grad_norm": 0.71875, "learning_rate": 9.463611862114908e-06, "loss": 4.0411, "step": 13002 }, { "epoch": 4.331473307237445, "grad_norm": 0.69921875, "learning_rate": 9.462830646944999e-06, "loss": 4.0172, "step": 13003 }, { "epoch": 4.3318064462396935, "grad_norm": 0.8046875, "learning_rate": 9.462049408912363e-06, "loss": 4.0025, "step": 13004 }, { "epoch": 4.332139585241942, "grad_norm": 0.7421875, "learning_rate": 9.461268148026102e-06, "loss": 3.9892, "step": 13005 }, { "epoch": 4.332472724244191, "grad_norm": 0.7109375, "learning_rate": 9.460486864295311e-06, "loss": 3.9915, "step": 13006 }, { "epoch": 4.332805863246439, "grad_norm": 0.72265625, "learning_rate": 9.459705557729099e-06, "loss": 3.9729, "step": 13007 }, { "epoch": 4.333139002248688, "grad_norm": 0.703125, "learning_rate": 9.458924228336561e-06, "loss": 4.0304, "step": 13008 }, { "epoch": 4.333472141250937, "grad_norm": 0.75, "learning_rate": 9.458142876126798e-06, "loss": 4.0578, "step": 13009 }, { "epoch": 4.333805280253186, "grad_norm": 0.72265625, "learning_rate": 9.457361501108912e-06, "loss": 3.8998, "step": 13010 }, { "epoch": 4.334138419255434, "grad_norm": 0.71875, "learning_rate": 9.456580103292004e-06, "loss": 3.9971, "step": 13011 }, { "epoch": 4.334471558257683, "grad_norm": 0.75, "learning_rate": 9.455798682685177e-06, "loss": 4.0064, "step": 13012 }, { "epoch": 4.334804697259932, "grad_norm": 0.69921875, "learning_rate": 9.45501723929753e-06, "loss": 3.9877, "step": 13013 }, { "epoch": 4.335137836262181, "grad_norm": 0.7421875, "learning_rate": 9.454235773138168e-06, "loss": 4.0145, "step": 13014 }, { "epoch": 4.335470975264429, "grad_norm": 0.66796875, "learning_rate": 9.453454284216189e-06, "loss": 3.9359, "step": 13015 }, { "epoch": 4.3358041142666774, "grad_norm": 0.70703125, "learning_rate": 9.452672772540703e-06, "loss": 3.9999, "step": 13016 }, { "epoch": 4.336137253268927, "grad_norm": 0.73046875, "learning_rate": 9.451891238120808e-06, "loss": 4.013, "step": 13017 }, { "epoch": 4.336470392271175, "grad_norm": 0.71484375, "learning_rate": 9.451109680965605e-06, "loss": 4.0249, "step": 13018 }, { "epoch": 4.336803531273424, "grad_norm": 0.7265625, "learning_rate": 9.450328101084202e-06, "loss": 4.0305, "step": 13019 }, { "epoch": 4.337136670275672, "grad_norm": 0.74609375, "learning_rate": 9.4495464984857e-06, "loss": 3.8948, "step": 13020 }, { "epoch": 4.337469809277922, "grad_norm": 0.74609375, "learning_rate": 9.448764873179208e-06, "loss": 4.032, "step": 13021 }, { "epoch": 4.33780294828017, "grad_norm": 0.7265625, "learning_rate": 9.447983225173821e-06, "loss": 4.0967, "step": 13022 }, { "epoch": 4.338136087282418, "grad_norm": 0.71875, "learning_rate": 9.447201554478653e-06, "loss": 3.9814, "step": 13023 }, { "epoch": 4.338469226284667, "grad_norm": 0.70703125, "learning_rate": 9.4464198611028e-06, "loss": 3.9771, "step": 13024 }, { "epoch": 4.338802365286916, "grad_norm": 0.71484375, "learning_rate": 9.445638145055377e-06, "loss": 3.9941, "step": 13025 }, { "epoch": 4.339135504289165, "grad_norm": 0.7421875, "learning_rate": 9.44485640634548e-06, "loss": 4.0643, "step": 13026 }, { "epoch": 4.339468643291413, "grad_norm": 0.71875, "learning_rate": 9.44407464498222e-06, "loss": 3.9743, "step": 13027 }, { "epoch": 4.339801782293662, "grad_norm": 0.7734375, "learning_rate": 9.443292860974701e-06, "loss": 3.9899, "step": 13028 }, { "epoch": 4.340134921295911, "grad_norm": 0.69921875, "learning_rate": 9.442511054332029e-06, "loss": 4.0257, "step": 13029 }, { "epoch": 4.34046806029816, "grad_norm": 0.69921875, "learning_rate": 9.44172922506331e-06, "loss": 4.0126, "step": 13030 }, { "epoch": 4.340801199300408, "grad_norm": 0.69921875, "learning_rate": 9.440947373177651e-06, "loss": 3.936, "step": 13031 }, { "epoch": 4.341134338302656, "grad_norm": 0.70703125, "learning_rate": 9.44016549868416e-06, "loss": 3.9631, "step": 13032 }, { "epoch": 4.341467477304906, "grad_norm": 0.7109375, "learning_rate": 9.439383601591944e-06, "loss": 4.0164, "step": 13033 }, { "epoch": 4.341800616307154, "grad_norm": 0.73046875, "learning_rate": 9.438601681910113e-06, "loss": 3.9623, "step": 13034 }, { "epoch": 4.342133755309403, "grad_norm": 0.69921875, "learning_rate": 9.437819739647765e-06, "loss": 4.0246, "step": 13035 }, { "epoch": 4.342466894311651, "grad_norm": 0.74609375, "learning_rate": 9.437037774814016e-06, "loss": 4.0159, "step": 13036 }, { "epoch": 4.3428000333139005, "grad_norm": 0.7265625, "learning_rate": 9.436255787417974e-06, "loss": 3.9863, "step": 13037 }, { "epoch": 4.343133172316149, "grad_norm": 0.7265625, "learning_rate": 9.435473777468746e-06, "loss": 3.9442, "step": 13038 }, { "epoch": 4.343466311318398, "grad_norm": 0.7265625, "learning_rate": 9.434691744975442e-06, "loss": 3.9311, "step": 13039 }, { "epoch": 4.343799450320646, "grad_norm": 0.73828125, "learning_rate": 9.433909689947164e-06, "loss": 4.0022, "step": 13040 }, { "epoch": 4.344132589322895, "grad_norm": 0.72265625, "learning_rate": 9.433127612393034e-06, "loss": 4.0202, "step": 13041 }, { "epoch": 4.344465728325144, "grad_norm": 0.734375, "learning_rate": 9.432345512322148e-06, "loss": 4.0442, "step": 13042 }, { "epoch": 4.344798867327392, "grad_norm": 0.73046875, "learning_rate": 9.431563389743629e-06, "loss": 3.9302, "step": 13043 }, { "epoch": 4.345132006329641, "grad_norm": 0.71875, "learning_rate": 9.430781244666576e-06, "loss": 3.9944, "step": 13044 }, { "epoch": 4.3454651453318895, "grad_norm": 0.6953125, "learning_rate": 9.429999077100105e-06, "loss": 3.9881, "step": 13045 }, { "epoch": 4.345798284334139, "grad_norm": 0.73828125, "learning_rate": 9.429216887053323e-06, "loss": 4.1194, "step": 13046 }, { "epoch": 4.346131423336387, "grad_norm": 0.73828125, "learning_rate": 9.428434674535346e-06, "loss": 4.012, "step": 13047 }, { "epoch": 4.346464562338636, "grad_norm": 0.75, "learning_rate": 9.42765243955528e-06, "loss": 4.0367, "step": 13048 }, { "epoch": 4.3467977013408845, "grad_norm": 0.77734375, "learning_rate": 9.42687018212224e-06, "loss": 3.9281, "step": 13049 }, { "epoch": 4.347130840343133, "grad_norm": 0.75, "learning_rate": 9.426087902245334e-06, "loss": 3.9584, "step": 13050 }, { "epoch": 4.347463979345382, "grad_norm": 0.6953125, "learning_rate": 9.425305599933677e-06, "loss": 3.9832, "step": 13051 }, { "epoch": 4.34779711834763, "grad_norm": 0.74609375, "learning_rate": 9.42452327519638e-06, "loss": 4.0285, "step": 13052 }, { "epoch": 4.3481302573498795, "grad_norm": 0.76953125, "learning_rate": 9.423740928042555e-06, "loss": 3.9064, "step": 13053 }, { "epoch": 4.348463396352128, "grad_norm": 0.75, "learning_rate": 9.422958558481316e-06, "loss": 3.9786, "step": 13054 }, { "epoch": 4.348796535354377, "grad_norm": 0.734375, "learning_rate": 9.422176166521772e-06, "loss": 4.0423, "step": 13055 }, { "epoch": 4.349129674356625, "grad_norm": 0.703125, "learning_rate": 9.421393752173042e-06, "loss": 4.0065, "step": 13056 }, { "epoch": 4.349462813358874, "grad_norm": 0.734375, "learning_rate": 9.420611315444235e-06, "loss": 3.9674, "step": 13057 }, { "epoch": 4.349795952361123, "grad_norm": 0.6875, "learning_rate": 9.419828856344468e-06, "loss": 3.9491, "step": 13058 }, { "epoch": 4.350129091363371, "grad_norm": 0.71484375, "learning_rate": 9.41904637488285e-06, "loss": 3.9989, "step": 13059 }, { "epoch": 4.35046223036562, "grad_norm": 0.765625, "learning_rate": 9.418263871068501e-06, "loss": 3.9806, "step": 13060 }, { "epoch": 4.3507953693678685, "grad_norm": 0.73828125, "learning_rate": 9.417481344910533e-06, "loss": 3.983, "step": 13061 }, { "epoch": 4.351128508370118, "grad_norm": 0.70703125, "learning_rate": 9.41669879641806e-06, "loss": 3.9851, "step": 13062 }, { "epoch": 4.351461647372366, "grad_norm": 0.7265625, "learning_rate": 9.415916225600196e-06, "loss": 4.0001, "step": 13063 }, { "epoch": 4.351794786374615, "grad_norm": 0.77734375, "learning_rate": 9.415133632466059e-06, "loss": 3.9849, "step": 13064 }, { "epoch": 4.352127925376863, "grad_norm": 0.75, "learning_rate": 9.414351017024764e-06, "loss": 4.0535, "step": 13065 }, { "epoch": 4.352461064379112, "grad_norm": 0.72265625, "learning_rate": 9.413568379285426e-06, "loss": 4.0004, "step": 13066 }, { "epoch": 4.352794203381361, "grad_norm": 0.703125, "learning_rate": 9.412785719257161e-06, "loss": 4.0552, "step": 13067 }, { "epoch": 4.353127342383609, "grad_norm": 0.71484375, "learning_rate": 9.412003036949085e-06, "loss": 4.0125, "step": 13068 }, { "epoch": 4.353460481385858, "grad_norm": 0.75, "learning_rate": 9.411220332370315e-06, "loss": 3.9913, "step": 13069 }, { "epoch": 4.353793620388107, "grad_norm": 0.7421875, "learning_rate": 9.41043760552997e-06, "loss": 3.9553, "step": 13070 }, { "epoch": 4.354126759390356, "grad_norm": 0.73828125, "learning_rate": 9.409654856437163e-06, "loss": 4.007, "step": 13071 }, { "epoch": 4.354459898392604, "grad_norm": 0.7109375, "learning_rate": 9.408872085101011e-06, "loss": 4.0032, "step": 13072 }, { "epoch": 4.354793037394853, "grad_norm": 0.74609375, "learning_rate": 9.408089291530638e-06, "loss": 3.97, "step": 13073 }, { "epoch": 4.355126176397102, "grad_norm": 0.71484375, "learning_rate": 9.407306475735154e-06, "loss": 3.9697, "step": 13074 }, { "epoch": 4.355459315399351, "grad_norm": 0.73828125, "learning_rate": 9.406523637723684e-06, "loss": 3.9816, "step": 13075 }, { "epoch": 4.355792454401599, "grad_norm": 0.7265625, "learning_rate": 9.405740777505342e-06, "loss": 4.0479, "step": 13076 }, { "epoch": 4.356125593403847, "grad_norm": 0.69921875, "learning_rate": 9.404957895089246e-06, "loss": 4.058, "step": 13077 }, { "epoch": 4.356458732406097, "grad_norm": 0.6875, "learning_rate": 9.404174990484519e-06, "loss": 4.1211, "step": 13078 }, { "epoch": 4.356791871408345, "grad_norm": 0.70703125, "learning_rate": 9.403392063700278e-06, "loss": 4.0083, "step": 13079 }, { "epoch": 4.357125010410594, "grad_norm": 0.74609375, "learning_rate": 9.40260911474564e-06, "loss": 3.9902, "step": 13080 }, { "epoch": 4.357458149412842, "grad_norm": 0.71484375, "learning_rate": 9.401826143629727e-06, "loss": 3.9265, "step": 13081 }, { "epoch": 4.357791288415092, "grad_norm": 0.71875, "learning_rate": 9.401043150361661e-06, "loss": 3.9717, "step": 13082 }, { "epoch": 4.35812442741734, "grad_norm": 0.74609375, "learning_rate": 9.400260134950557e-06, "loss": 3.993, "step": 13083 }, { "epoch": 4.358457566419588, "grad_norm": 0.69921875, "learning_rate": 9.399477097405542e-06, "loss": 4.0017, "step": 13084 }, { "epoch": 4.358790705421837, "grad_norm": 0.734375, "learning_rate": 9.39869403773573e-06, "loss": 4.008, "step": 13085 }, { "epoch": 4.359123844424086, "grad_norm": 0.7265625, "learning_rate": 9.397910955950245e-06, "loss": 3.9765, "step": 13086 }, { "epoch": 4.359456983426335, "grad_norm": 0.74609375, "learning_rate": 9.397127852058212e-06, "loss": 3.9944, "step": 13087 }, { "epoch": 4.359790122428583, "grad_norm": 0.75, "learning_rate": 9.396344726068745e-06, "loss": 3.9932, "step": 13088 }, { "epoch": 4.360123261430832, "grad_norm": 0.71875, "learning_rate": 9.39556157799097e-06, "loss": 4.0495, "step": 13089 }, { "epoch": 4.360456400433081, "grad_norm": 0.6875, "learning_rate": 9.394778407834008e-06, "loss": 4.0073, "step": 13090 }, { "epoch": 4.36078953943533, "grad_norm": 0.734375, "learning_rate": 9.393995215606985e-06, "loss": 4.0493, "step": 13091 }, { "epoch": 4.361122678437578, "grad_norm": 0.734375, "learning_rate": 9.393212001319016e-06, "loss": 4.0023, "step": 13092 }, { "epoch": 4.361455817439826, "grad_norm": 0.6796875, "learning_rate": 9.392428764979231e-06, "loss": 3.9334, "step": 13093 }, { "epoch": 4.3617889564420755, "grad_norm": 0.75390625, "learning_rate": 9.391645506596748e-06, "loss": 4.0048, "step": 13094 }, { "epoch": 4.362122095444324, "grad_norm": 0.6953125, "learning_rate": 9.390862226180692e-06, "loss": 4.0101, "step": 13095 }, { "epoch": 4.362455234446573, "grad_norm": 0.73828125, "learning_rate": 9.390078923740187e-06, "loss": 4.0142, "step": 13096 }, { "epoch": 4.362788373448821, "grad_norm": 0.72265625, "learning_rate": 9.389295599284356e-06, "loss": 4.006, "step": 13097 }, { "epoch": 4.3631215124510705, "grad_norm": 0.7109375, "learning_rate": 9.388512252822326e-06, "loss": 4.0387, "step": 13098 }, { "epoch": 4.363454651453319, "grad_norm": 0.7109375, "learning_rate": 9.387728884363217e-06, "loss": 3.9657, "step": 13099 }, { "epoch": 4.363787790455568, "grad_norm": 0.7265625, "learning_rate": 9.386945493916155e-06, "loss": 3.9684, "step": 13100 }, { "epoch": 4.364120929457816, "grad_norm": 0.73828125, "learning_rate": 9.386162081490264e-06, "loss": 3.9289, "step": 13101 }, { "epoch": 4.364454068460065, "grad_norm": 0.71484375, "learning_rate": 9.385378647094674e-06, "loss": 4.0529, "step": 13102 }, { "epoch": 4.364787207462314, "grad_norm": 0.70703125, "learning_rate": 9.384595190738502e-06, "loss": 3.9627, "step": 13103 }, { "epoch": 4.365120346464562, "grad_norm": 0.69140625, "learning_rate": 9.383811712430882e-06, "loss": 3.9862, "step": 13104 }, { "epoch": 4.365453485466811, "grad_norm": 0.71875, "learning_rate": 9.383028212180933e-06, "loss": 4.0162, "step": 13105 }, { "epoch": 4.3657866244690595, "grad_norm": 0.7109375, "learning_rate": 9.382244689997784e-06, "loss": 4.0095, "step": 13106 }, { "epoch": 4.366119763471309, "grad_norm": 0.6796875, "learning_rate": 9.381461145890563e-06, "loss": 4.0074, "step": 13107 }, { "epoch": 4.366452902473557, "grad_norm": 0.72265625, "learning_rate": 9.380677579868393e-06, "loss": 4.0109, "step": 13108 }, { "epoch": 4.366786041475806, "grad_norm": 0.6953125, "learning_rate": 9.379893991940404e-06, "loss": 4.0661, "step": 13109 }, { "epoch": 4.3671191804780545, "grad_norm": 0.6953125, "learning_rate": 9.379110382115723e-06, "loss": 4.0428, "step": 13110 }, { "epoch": 4.367452319480303, "grad_norm": 0.70703125, "learning_rate": 9.378326750403473e-06, "loss": 3.9904, "step": 13111 }, { "epoch": 4.367785458482552, "grad_norm": 0.75, "learning_rate": 9.377543096812788e-06, "loss": 3.9722, "step": 13112 }, { "epoch": 4.3681185974848, "grad_norm": 0.7265625, "learning_rate": 9.376759421352792e-06, "loss": 3.9537, "step": 13113 }, { "epoch": 4.368451736487049, "grad_norm": 0.734375, "learning_rate": 9.375975724032611e-06, "loss": 4.0154, "step": 13114 }, { "epoch": 4.368784875489298, "grad_norm": 0.74609375, "learning_rate": 9.37519200486138e-06, "loss": 4.0314, "step": 13115 }, { "epoch": 4.369118014491547, "grad_norm": 0.70703125, "learning_rate": 9.374408263848222e-06, "loss": 4.0084, "step": 13116 }, { "epoch": 4.369451153493795, "grad_norm": 0.7421875, "learning_rate": 9.373624501002269e-06, "loss": 4.0441, "step": 13117 }, { "epoch": 4.369784292496044, "grad_norm": 0.69140625, "learning_rate": 9.372840716332647e-06, "loss": 4.0517, "step": 13118 }, { "epoch": 4.370117431498293, "grad_norm": 0.75, "learning_rate": 9.372056909848488e-06, "loss": 4.0091, "step": 13119 }, { "epoch": 4.370450570500541, "grad_norm": 0.7109375, "learning_rate": 9.37127308155892e-06, "loss": 3.9432, "step": 13120 }, { "epoch": 4.37078370950279, "grad_norm": 0.68359375, "learning_rate": 9.370489231473074e-06, "loss": 4.0776, "step": 13121 }, { "epoch": 4.3711168485050385, "grad_norm": 0.75, "learning_rate": 9.36970535960008e-06, "loss": 3.9744, "step": 13122 }, { "epoch": 4.371449987507288, "grad_norm": 0.6875, "learning_rate": 9.368921465949069e-06, "loss": 4.0253, "step": 13123 }, { "epoch": 4.371783126509536, "grad_norm": 0.7109375, "learning_rate": 9.368137550529173e-06, "loss": 4.0135, "step": 13124 }, { "epoch": 4.372116265511785, "grad_norm": 0.73828125, "learning_rate": 9.367353613349516e-06, "loss": 4.023, "step": 13125 }, { "epoch": 4.372449404514033, "grad_norm": 0.703125, "learning_rate": 9.366569654419239e-06, "loss": 4.0783, "step": 13126 }, { "epoch": 4.372782543516282, "grad_norm": 0.671875, "learning_rate": 9.365785673747464e-06, "loss": 4.0427, "step": 13127 }, { "epoch": 4.373115682518531, "grad_norm": 0.7421875, "learning_rate": 9.36500167134333e-06, "loss": 3.9659, "step": 13128 }, { "epoch": 4.373448821520779, "grad_norm": 0.75390625, "learning_rate": 9.364217647215967e-06, "loss": 4.0443, "step": 13129 }, { "epoch": 4.373781960523028, "grad_norm": 0.71484375, "learning_rate": 9.363433601374503e-06, "loss": 4.0078, "step": 13130 }, { "epoch": 4.374115099525277, "grad_norm": 0.70703125, "learning_rate": 9.362649533828076e-06, "loss": 3.9843, "step": 13131 }, { "epoch": 4.374448238527526, "grad_norm": 0.69140625, "learning_rate": 9.361865444585814e-06, "loss": 4.0085, "step": 13132 }, { "epoch": 4.374781377529774, "grad_norm": 0.70703125, "learning_rate": 9.361081333656858e-06, "loss": 3.9752, "step": 13133 }, { "epoch": 4.375114516532023, "grad_norm": 0.71484375, "learning_rate": 9.36029720105033e-06, "loss": 3.9478, "step": 13134 }, { "epoch": 4.375447655534272, "grad_norm": 0.76953125, "learning_rate": 9.359513046775371e-06, "loss": 3.999, "step": 13135 }, { "epoch": 4.375780794536521, "grad_norm": 0.69140625, "learning_rate": 9.358728870841113e-06, "loss": 3.9785, "step": 13136 }, { "epoch": 4.376113933538769, "grad_norm": 0.75390625, "learning_rate": 9.35794467325669e-06, "loss": 3.9819, "step": 13137 }, { "epoch": 4.376447072541017, "grad_norm": 0.73046875, "learning_rate": 9.357160454031233e-06, "loss": 4.0619, "step": 13138 }, { "epoch": 4.376780211543267, "grad_norm": 0.7265625, "learning_rate": 9.356376213173885e-06, "loss": 4.0269, "step": 13139 }, { "epoch": 4.377113350545515, "grad_norm": 0.78125, "learning_rate": 9.35559195069377e-06, "loss": 3.9614, "step": 13140 }, { "epoch": 4.377446489547764, "grad_norm": 0.74609375, "learning_rate": 9.35480766660003e-06, "loss": 4.0601, "step": 13141 }, { "epoch": 4.377779628550012, "grad_norm": 0.69921875, "learning_rate": 9.354023360901798e-06, "loss": 4.0565, "step": 13142 }, { "epoch": 4.3781127675522615, "grad_norm": 0.7578125, "learning_rate": 9.353239033608208e-06, "loss": 4.0026, "step": 13143 }, { "epoch": 4.37844590655451, "grad_norm": 0.703125, "learning_rate": 9.352454684728399e-06, "loss": 4.0043, "step": 13144 }, { "epoch": 4.378779045556758, "grad_norm": 0.68359375, "learning_rate": 9.351670314271504e-06, "loss": 4.0496, "step": 13145 }, { "epoch": 4.379112184559007, "grad_norm": 0.7109375, "learning_rate": 9.350885922246662e-06, "loss": 4.0371, "step": 13146 }, { "epoch": 4.379445323561256, "grad_norm": 0.76171875, "learning_rate": 9.350101508663009e-06, "loss": 3.9844, "step": 13147 }, { "epoch": 4.379778462563505, "grad_norm": 0.72265625, "learning_rate": 9.349317073529678e-06, "loss": 3.9669, "step": 13148 }, { "epoch": 4.380111601565753, "grad_norm": 0.74609375, "learning_rate": 9.348532616855809e-06, "loss": 4.0409, "step": 13149 }, { "epoch": 4.380444740568002, "grad_norm": 0.7421875, "learning_rate": 9.34774813865054e-06, "loss": 3.9924, "step": 13150 }, { "epoch": 4.3807778795702506, "grad_norm": 0.734375, "learning_rate": 9.34696363892301e-06, "loss": 3.9389, "step": 13151 }, { "epoch": 4.3811110185725, "grad_norm": 0.7421875, "learning_rate": 9.34617911768235e-06, "loss": 4.0501, "step": 13152 }, { "epoch": 4.381444157574748, "grad_norm": 0.7109375, "learning_rate": 9.345394574937703e-06, "loss": 4.0817, "step": 13153 }, { "epoch": 4.381777296576997, "grad_norm": 0.73046875, "learning_rate": 9.344610010698207e-06, "loss": 3.997, "step": 13154 }, { "epoch": 4.3821104355792455, "grad_norm": 0.73828125, "learning_rate": 9.343825424973001e-06, "loss": 3.9937, "step": 13155 }, { "epoch": 4.382443574581494, "grad_norm": 0.7109375, "learning_rate": 9.34304081777122e-06, "loss": 4.0393, "step": 13156 }, { "epoch": 4.382776713583743, "grad_norm": 0.7421875, "learning_rate": 9.342256189102008e-06, "loss": 4.0072, "step": 13157 }, { "epoch": 4.383109852585991, "grad_norm": 0.75390625, "learning_rate": 9.341471538974499e-06, "loss": 3.9521, "step": 13158 }, { "epoch": 4.3834429915882405, "grad_norm": 0.7109375, "learning_rate": 9.340686867397837e-06, "loss": 3.9412, "step": 13159 }, { "epoch": 4.383776130590489, "grad_norm": 0.7265625, "learning_rate": 9.33990217438116e-06, "loss": 3.9855, "step": 13160 }, { "epoch": 4.384109269592738, "grad_norm": 0.7265625, "learning_rate": 9.339117459933606e-06, "loss": 3.9717, "step": 13161 }, { "epoch": 4.384442408594986, "grad_norm": 0.70703125, "learning_rate": 9.33833272406432e-06, "loss": 3.9299, "step": 13162 }, { "epoch": 4.3847755475972345, "grad_norm": 0.734375, "learning_rate": 9.337547966782437e-06, "loss": 3.975, "step": 13163 }, { "epoch": 4.385108686599484, "grad_norm": 0.68359375, "learning_rate": 9.336763188097102e-06, "loss": 4.0396, "step": 13164 }, { "epoch": 4.385441825601732, "grad_norm": 0.7265625, "learning_rate": 9.335978388017455e-06, "loss": 4.0267, "step": 13165 }, { "epoch": 4.385774964603981, "grad_norm": 0.6875, "learning_rate": 9.335193566552635e-06, "loss": 3.9769, "step": 13166 }, { "epoch": 4.3861081036062295, "grad_norm": 0.73828125, "learning_rate": 9.334408723711784e-06, "loss": 3.9857, "step": 13167 }, { "epoch": 4.386441242608479, "grad_norm": 0.7109375, "learning_rate": 9.333623859504047e-06, "loss": 3.9833, "step": 13168 }, { "epoch": 4.386774381610727, "grad_norm": 0.703125, "learning_rate": 9.332838973938561e-06, "loss": 3.9831, "step": 13169 }, { "epoch": 4.387107520612976, "grad_norm": 0.6953125, "learning_rate": 9.332054067024474e-06, "loss": 4.0069, "step": 13170 }, { "epoch": 4.3874406596152244, "grad_norm": 0.7421875, "learning_rate": 9.33126913877092e-06, "loss": 3.9728, "step": 13171 }, { "epoch": 4.387773798617473, "grad_norm": 0.7421875, "learning_rate": 9.330484189187053e-06, "loss": 4.0751, "step": 13172 }, { "epoch": 4.388106937619722, "grad_norm": 0.76171875, "learning_rate": 9.329699218282009e-06, "loss": 3.9688, "step": 13173 }, { "epoch": 4.38844007662197, "grad_norm": 0.68359375, "learning_rate": 9.328914226064931e-06, "loss": 3.9831, "step": 13174 }, { "epoch": 4.388773215624219, "grad_norm": 0.74609375, "learning_rate": 9.328129212544962e-06, "loss": 4.0378, "step": 13175 }, { "epoch": 4.389106354626468, "grad_norm": 0.7109375, "learning_rate": 9.32734417773125e-06, "loss": 4.088, "step": 13176 }, { "epoch": 4.389439493628717, "grad_norm": 0.80078125, "learning_rate": 9.326559121632934e-06, "loss": 3.9454, "step": 13177 }, { "epoch": 4.389772632630965, "grad_norm": 0.7109375, "learning_rate": 9.325774044259164e-06, "loss": 3.9286, "step": 13178 }, { "epoch": 4.390105771633214, "grad_norm": 0.7421875, "learning_rate": 9.324988945619078e-06, "loss": 4.0276, "step": 13179 }, { "epoch": 4.390438910635463, "grad_norm": 0.73828125, "learning_rate": 9.324203825721825e-06, "loss": 4.0479, "step": 13180 }, { "epoch": 4.390772049637711, "grad_norm": 0.69921875, "learning_rate": 9.32341868457655e-06, "loss": 4.0251, "step": 13181 }, { "epoch": 4.39110518863996, "grad_norm": 0.73046875, "learning_rate": 9.322633522192395e-06, "loss": 3.9773, "step": 13182 }, { "epoch": 4.391438327642208, "grad_norm": 0.7109375, "learning_rate": 9.321848338578507e-06, "loss": 4.0454, "step": 13183 }, { "epoch": 4.391771466644458, "grad_norm": 0.703125, "learning_rate": 9.321063133744032e-06, "loss": 3.9403, "step": 13184 }, { "epoch": 4.392104605646706, "grad_norm": 0.734375, "learning_rate": 9.320277907698118e-06, "loss": 3.961, "step": 13185 }, { "epoch": 4.392437744648955, "grad_norm": 0.71875, "learning_rate": 9.319492660449906e-06, "loss": 3.9643, "step": 13186 }, { "epoch": 4.392770883651203, "grad_norm": 0.734375, "learning_rate": 9.318707392008546e-06, "loss": 3.996, "step": 13187 }, { "epoch": 4.393104022653453, "grad_norm": 0.67578125, "learning_rate": 9.317922102383186e-06, "loss": 4.0596, "step": 13188 }, { "epoch": 4.393437161655701, "grad_norm": 0.71875, "learning_rate": 9.317136791582972e-06, "loss": 3.9997, "step": 13189 }, { "epoch": 4.393770300657949, "grad_norm": 0.7109375, "learning_rate": 9.316351459617047e-06, "loss": 3.9765, "step": 13190 }, { "epoch": 4.394103439660198, "grad_norm": 0.6953125, "learning_rate": 9.315566106494564e-06, "loss": 4.0243, "step": 13191 }, { "epoch": 4.394436578662447, "grad_norm": 0.76171875, "learning_rate": 9.314780732224668e-06, "loss": 3.9902, "step": 13192 }, { "epoch": 4.394769717664696, "grad_norm": 0.73046875, "learning_rate": 9.313995336816506e-06, "loss": 3.987, "step": 13193 }, { "epoch": 4.395102856666944, "grad_norm": 0.73828125, "learning_rate": 9.313209920279229e-06, "loss": 4.0377, "step": 13194 }, { "epoch": 4.395435995669193, "grad_norm": 0.7421875, "learning_rate": 9.312424482621984e-06, "loss": 3.9902, "step": 13195 }, { "epoch": 4.395769134671442, "grad_norm": 0.7421875, "learning_rate": 9.311639023853917e-06, "loss": 3.9264, "step": 13196 }, { "epoch": 4.396102273673691, "grad_norm": 0.74609375, "learning_rate": 9.310853543984182e-06, "loss": 4.0054, "step": 13197 }, { "epoch": 4.396435412675939, "grad_norm": 0.6796875, "learning_rate": 9.310068043021925e-06, "loss": 3.9955, "step": 13198 }, { "epoch": 4.396768551678187, "grad_norm": 0.734375, "learning_rate": 9.309282520976294e-06, "loss": 3.9683, "step": 13199 }, { "epoch": 4.3971016906804365, "grad_norm": 0.69921875, "learning_rate": 9.308496977856444e-06, "loss": 3.9926, "step": 13200 }, { "epoch": 4.397434829682685, "grad_norm": 0.703125, "learning_rate": 9.307711413671519e-06, "loss": 4.0039, "step": 13201 }, { "epoch": 4.397767968684934, "grad_norm": 0.71875, "learning_rate": 9.306925828430671e-06, "loss": 3.9208, "step": 13202 }, { "epoch": 4.398101107687182, "grad_norm": 0.7265625, "learning_rate": 9.306140222143054e-06, "loss": 3.9253, "step": 13203 }, { "epoch": 4.3984342466894315, "grad_norm": 0.7109375, "learning_rate": 9.30535459481781e-06, "loss": 4.0572, "step": 13204 }, { "epoch": 4.39876738569168, "grad_norm": 0.73046875, "learning_rate": 9.304568946464102e-06, "loss": 4.028, "step": 13205 }, { "epoch": 4.399100524693928, "grad_norm": 0.765625, "learning_rate": 9.30378327709107e-06, "loss": 3.9269, "step": 13206 }, { "epoch": 4.399433663696177, "grad_norm": 0.73046875, "learning_rate": 9.302997586707871e-06, "loss": 3.869, "step": 13207 }, { "epoch": 4.399766802698426, "grad_norm": 0.73828125, "learning_rate": 9.302211875323653e-06, "loss": 3.97, "step": 13208 }, { "epoch": 4.400099941700675, "grad_norm": 0.72265625, "learning_rate": 9.301426142947574e-06, "loss": 4.0296, "step": 13209 }, { "epoch": 4.400433080702923, "grad_norm": 0.71875, "learning_rate": 9.300640389588782e-06, "loss": 4.0019, "step": 13210 }, { "epoch": 4.400766219705172, "grad_norm": 0.75390625, "learning_rate": 9.299854615256428e-06, "loss": 4.0849, "step": 13211 }, { "epoch": 4.4010993587074205, "grad_norm": 0.74609375, "learning_rate": 9.299068819959666e-06, "loss": 3.9632, "step": 13212 }, { "epoch": 4.40143249770967, "grad_norm": 0.71875, "learning_rate": 9.29828300370765e-06, "loss": 3.9876, "step": 13213 }, { "epoch": 4.401765636711918, "grad_norm": 0.73828125, "learning_rate": 9.297497166509534e-06, "loss": 3.9588, "step": 13214 }, { "epoch": 4.402098775714167, "grad_norm": 0.75, "learning_rate": 9.296711308374466e-06, "loss": 4.0181, "step": 13215 }, { "epoch": 4.4024319147164155, "grad_norm": 0.72265625, "learning_rate": 9.295925429311604e-06, "loss": 3.9758, "step": 13216 }, { "epoch": 4.402765053718664, "grad_norm": 0.74609375, "learning_rate": 9.295139529330101e-06, "loss": 3.9978, "step": 13217 }, { "epoch": 4.403098192720913, "grad_norm": 0.71484375, "learning_rate": 9.294353608439111e-06, "loss": 4.0189, "step": 13218 }, { "epoch": 4.403431331723161, "grad_norm": 0.75390625, "learning_rate": 9.293567666647788e-06, "loss": 4.0083, "step": 13219 }, { "epoch": 4.40376447072541, "grad_norm": 0.74609375, "learning_rate": 9.292781703965286e-06, "loss": 3.9465, "step": 13220 }, { "epoch": 4.404097609727659, "grad_norm": 0.75, "learning_rate": 9.29199572040076e-06, "loss": 3.987, "step": 13221 }, { "epoch": 4.404430748729908, "grad_norm": 0.69140625, "learning_rate": 9.291209715963365e-06, "loss": 4.0291, "step": 13222 }, { "epoch": 4.404763887732156, "grad_norm": 0.6953125, "learning_rate": 9.290423690662259e-06, "loss": 4.0114, "step": 13223 }, { "epoch": 4.4050970267344045, "grad_norm": 0.75390625, "learning_rate": 9.289637644506591e-06, "loss": 4.0572, "step": 13224 }, { "epoch": 4.405430165736654, "grad_norm": 0.71484375, "learning_rate": 9.288851577505526e-06, "loss": 4.0416, "step": 13225 }, { "epoch": 4.405763304738902, "grad_norm": 0.70703125, "learning_rate": 9.288065489668211e-06, "loss": 3.9512, "step": 13226 }, { "epoch": 4.406096443741151, "grad_norm": 0.69921875, "learning_rate": 9.28727938100381e-06, "loss": 3.9869, "step": 13227 }, { "epoch": 4.4064295827433995, "grad_norm": 0.7265625, "learning_rate": 9.286493251521471e-06, "loss": 4.0015, "step": 13228 }, { "epoch": 4.406762721745649, "grad_norm": 0.74609375, "learning_rate": 9.285707101230357e-06, "loss": 4.0114, "step": 13229 }, { "epoch": 4.407095860747897, "grad_norm": 0.7265625, "learning_rate": 9.284920930139622e-06, "loss": 4.0074, "step": 13230 }, { "epoch": 4.407428999750146, "grad_norm": 0.71875, "learning_rate": 9.284134738258426e-06, "loss": 4.052, "step": 13231 }, { "epoch": 4.407762138752394, "grad_norm": 0.67578125, "learning_rate": 9.283348525595924e-06, "loss": 4.0367, "step": 13232 }, { "epoch": 4.408095277754643, "grad_norm": 0.73046875, "learning_rate": 9.282562292161273e-06, "loss": 4.0267, "step": 13233 }, { "epoch": 4.408428416756892, "grad_norm": 0.7109375, "learning_rate": 9.281776037963635e-06, "loss": 3.9677, "step": 13234 }, { "epoch": 4.40876155575914, "grad_norm": 0.73046875, "learning_rate": 9.28098976301216e-06, "loss": 4.0397, "step": 13235 }, { "epoch": 4.409094694761389, "grad_norm": 0.72265625, "learning_rate": 9.28020346731602e-06, "loss": 4.1175, "step": 13236 }, { "epoch": 4.409427833763638, "grad_norm": 0.73828125, "learning_rate": 9.27941715088436e-06, "loss": 3.9845, "step": 13237 }, { "epoch": 4.409760972765887, "grad_norm": 0.75, "learning_rate": 9.278630813726345e-06, "loss": 3.9764, "step": 13238 }, { "epoch": 4.410094111768135, "grad_norm": 0.68359375, "learning_rate": 9.277844455851135e-06, "loss": 4.0666, "step": 13239 }, { "epoch": 4.410427250770384, "grad_norm": 0.75, "learning_rate": 9.277058077267887e-06, "loss": 4.0367, "step": 13240 }, { "epoch": 4.410760389772633, "grad_norm": 0.74609375, "learning_rate": 9.276271677985763e-06, "loss": 4.0241, "step": 13241 }, { "epoch": 4.411093528774881, "grad_norm": 0.76953125, "learning_rate": 9.275485258013919e-06, "loss": 3.9624, "step": 13242 }, { "epoch": 4.41142666777713, "grad_norm": 0.71875, "learning_rate": 9.27469881736152e-06, "loss": 4.0244, "step": 13243 }, { "epoch": 4.411759806779378, "grad_norm": 0.75, "learning_rate": 9.27391235603772e-06, "loss": 4.0498, "step": 13244 }, { "epoch": 4.412092945781628, "grad_norm": 0.78125, "learning_rate": 9.273125874051687e-06, "loss": 3.9781, "step": 13245 }, { "epoch": 4.412426084783876, "grad_norm": 0.68359375, "learning_rate": 9.272339371412576e-06, "loss": 3.959, "step": 13246 }, { "epoch": 4.412759223786125, "grad_norm": 0.71875, "learning_rate": 9.271552848129552e-06, "loss": 4.0503, "step": 13247 }, { "epoch": 4.413092362788373, "grad_norm": 0.74609375, "learning_rate": 9.270766304211773e-06, "loss": 3.9709, "step": 13248 }, { "epoch": 4.4134255017906225, "grad_norm": 0.7109375, "learning_rate": 9.269979739668403e-06, "loss": 3.9887, "step": 13249 }, { "epoch": 4.413758640792871, "grad_norm": 0.7734375, "learning_rate": 9.269193154508601e-06, "loss": 3.9882, "step": 13250 }, { "epoch": 4.414091779795119, "grad_norm": 0.6953125, "learning_rate": 9.268406548741534e-06, "loss": 3.9361, "step": 13251 }, { "epoch": 4.414424918797368, "grad_norm": 0.73046875, "learning_rate": 9.267619922376358e-06, "loss": 3.9763, "step": 13252 }, { "epoch": 4.414758057799617, "grad_norm": 0.7421875, "learning_rate": 9.26683327542224e-06, "loss": 4.041, "step": 13253 }, { "epoch": 4.415091196801866, "grad_norm": 0.7109375, "learning_rate": 9.266046607888341e-06, "loss": 4.0034, "step": 13254 }, { "epoch": 4.415424335804114, "grad_norm": 0.72265625, "learning_rate": 9.265259919783823e-06, "loss": 3.9328, "step": 13255 }, { "epoch": 4.415757474806363, "grad_norm": 0.703125, "learning_rate": 9.264473211117853e-06, "loss": 3.9866, "step": 13256 }, { "epoch": 4.416090613808612, "grad_norm": 0.71875, "learning_rate": 9.263686481899589e-06, "loss": 3.9989, "step": 13257 }, { "epoch": 4.416423752810861, "grad_norm": 0.7265625, "learning_rate": 9.262899732138203e-06, "loss": 4.0251, "step": 13258 }, { "epoch": 4.416756891813109, "grad_norm": 0.73046875, "learning_rate": 9.262112961842847e-06, "loss": 3.9213, "step": 13259 }, { "epoch": 4.417090030815357, "grad_norm": 0.73046875, "learning_rate": 9.261326171022696e-06, "loss": 4.0301, "step": 13260 }, { "epoch": 4.4174231698176065, "grad_norm": 0.7578125, "learning_rate": 9.260539359686907e-06, "loss": 3.95, "step": 13261 }, { "epoch": 4.417756308819855, "grad_norm": 0.75, "learning_rate": 9.259752527844652e-06, "loss": 3.9958, "step": 13262 }, { "epoch": 4.418089447822104, "grad_norm": 0.71875, "learning_rate": 9.258965675505089e-06, "loss": 3.9964, "step": 13263 }, { "epoch": 4.418422586824352, "grad_norm": 0.7265625, "learning_rate": 9.258178802677385e-06, "loss": 4.008, "step": 13264 }, { "epoch": 4.4187557258266015, "grad_norm": 0.70703125, "learning_rate": 9.257391909370707e-06, "loss": 4.0057, "step": 13265 }, { "epoch": 4.41908886482885, "grad_norm": 0.72265625, "learning_rate": 9.256604995594218e-06, "loss": 4.0251, "step": 13266 }, { "epoch": 4.419422003831098, "grad_norm": 0.7265625, "learning_rate": 9.255818061357091e-06, "loss": 4.0092, "step": 13267 }, { "epoch": 4.419755142833347, "grad_norm": 0.7265625, "learning_rate": 9.25503110666848e-06, "loss": 4.0598, "step": 13268 }, { "epoch": 4.4200882818355955, "grad_norm": 0.734375, "learning_rate": 9.254244131537563e-06, "loss": 4.002, "step": 13269 }, { "epoch": 4.420421420837845, "grad_norm": 0.76171875, "learning_rate": 9.253457135973499e-06, "loss": 3.969, "step": 13270 }, { "epoch": 4.420754559840093, "grad_norm": 0.76953125, "learning_rate": 9.25267011998546e-06, "loss": 4.0404, "step": 13271 }, { "epoch": 4.421087698842342, "grad_norm": 0.7265625, "learning_rate": 9.251883083582608e-06, "loss": 4.0595, "step": 13272 }, { "epoch": 4.4214208378445905, "grad_norm": 0.71484375, "learning_rate": 9.251096026774113e-06, "loss": 3.9243, "step": 13273 }, { "epoch": 4.42175397684684, "grad_norm": 0.73828125, "learning_rate": 9.250308949569142e-06, "loss": 3.9407, "step": 13274 }, { "epoch": 4.422087115849088, "grad_norm": 0.68359375, "learning_rate": 9.249521851976864e-06, "loss": 4.0567, "step": 13275 }, { "epoch": 4.422420254851337, "grad_norm": 0.703125, "learning_rate": 9.248734734006444e-06, "loss": 4.0189, "step": 13276 }, { "epoch": 4.4227533938535855, "grad_norm": 0.73046875, "learning_rate": 9.247947595667054e-06, "loss": 3.9944, "step": 13277 }, { "epoch": 4.423086532855834, "grad_norm": 0.7109375, "learning_rate": 9.247160436967859e-06, "loss": 3.9677, "step": 13278 }, { "epoch": 4.423419671858083, "grad_norm": 0.74609375, "learning_rate": 9.246373257918029e-06, "loss": 3.9746, "step": 13279 }, { "epoch": 4.423752810860331, "grad_norm": 0.75, "learning_rate": 9.245586058526734e-06, "loss": 4.0028, "step": 13280 }, { "epoch": 4.42408594986258, "grad_norm": 0.7109375, "learning_rate": 9.244798838803144e-06, "loss": 4.0048, "step": 13281 }, { "epoch": 4.424419088864829, "grad_norm": 0.73046875, "learning_rate": 9.244011598756425e-06, "loss": 3.9829, "step": 13282 }, { "epoch": 4.424752227867078, "grad_norm": 0.72265625, "learning_rate": 9.243224338395748e-06, "loss": 4.0281, "step": 13283 }, { "epoch": 4.425085366869326, "grad_norm": 0.75, "learning_rate": 9.242437057730285e-06, "loss": 3.9511, "step": 13284 }, { "epoch": 4.4254185058715745, "grad_norm": 0.76171875, "learning_rate": 9.241649756769204e-06, "loss": 4.0122, "step": 13285 }, { "epoch": 4.425751644873824, "grad_norm": 0.73046875, "learning_rate": 9.240862435521672e-06, "loss": 3.9861, "step": 13286 }, { "epoch": 4.426084783876072, "grad_norm": 0.69921875, "learning_rate": 9.240075093996869e-06, "loss": 3.9988, "step": 13287 }, { "epoch": 4.426417922878321, "grad_norm": 0.69140625, "learning_rate": 9.239287732203956e-06, "loss": 4.0429, "step": 13288 }, { "epoch": 4.426751061880569, "grad_norm": 0.72265625, "learning_rate": 9.23850035015211e-06, "loss": 3.9621, "step": 13289 }, { "epoch": 4.427084200882819, "grad_norm": 0.7421875, "learning_rate": 9.237712947850503e-06, "loss": 3.9489, "step": 13290 }, { "epoch": 4.427417339885067, "grad_norm": 0.7265625, "learning_rate": 9.2369255253083e-06, "loss": 3.9682, "step": 13291 }, { "epoch": 4.427750478887316, "grad_norm": 0.69921875, "learning_rate": 9.236138082534677e-06, "loss": 4.0149, "step": 13292 }, { "epoch": 4.428083617889564, "grad_norm": 0.7265625, "learning_rate": 9.235350619538808e-06, "loss": 4.0031, "step": 13293 }, { "epoch": 4.428416756891814, "grad_norm": 0.73828125, "learning_rate": 9.234563136329861e-06, "loss": 3.9629, "step": 13294 }, { "epoch": 4.428749895894062, "grad_norm": 0.69921875, "learning_rate": 9.23377563291701e-06, "loss": 3.9871, "step": 13295 }, { "epoch": 4.42908303489631, "grad_norm": 0.76171875, "learning_rate": 9.232988109309432e-06, "loss": 3.9304, "step": 13296 }, { "epoch": 4.429416173898559, "grad_norm": 0.73828125, "learning_rate": 9.232200565516295e-06, "loss": 3.9209, "step": 13297 }, { "epoch": 4.429749312900808, "grad_norm": 0.7109375, "learning_rate": 9.23141300154677e-06, "loss": 3.9705, "step": 13298 }, { "epoch": 4.430082451903057, "grad_norm": 0.703125, "learning_rate": 9.230625417410036e-06, "loss": 4.0154, "step": 13299 }, { "epoch": 4.430415590905305, "grad_norm": 0.71484375, "learning_rate": 9.229837813115265e-06, "loss": 3.9708, "step": 13300 }, { "epoch": 4.430748729907554, "grad_norm": 0.70703125, "learning_rate": 9.22905018867163e-06, "loss": 3.9986, "step": 13301 }, { "epoch": 4.431081868909803, "grad_norm": 0.76953125, "learning_rate": 9.228262544088304e-06, "loss": 3.9296, "step": 13302 }, { "epoch": 4.431415007912051, "grad_norm": 0.70703125, "learning_rate": 9.227474879374466e-06, "loss": 3.955, "step": 13303 }, { "epoch": 4.4317481469143, "grad_norm": 0.72265625, "learning_rate": 9.226687194539286e-06, "loss": 3.9952, "step": 13304 }, { "epoch": 4.432081285916548, "grad_norm": 0.74609375, "learning_rate": 9.225899489591936e-06, "loss": 3.9731, "step": 13305 }, { "epoch": 4.432414424918798, "grad_norm": 0.73828125, "learning_rate": 9.225111764541602e-06, "loss": 4.0423, "step": 13306 }, { "epoch": 4.432747563921046, "grad_norm": 0.69921875, "learning_rate": 9.224324019397447e-06, "loss": 3.9726, "step": 13307 }, { "epoch": 4.433080702923295, "grad_norm": 0.7265625, "learning_rate": 9.223536254168654e-06, "loss": 4.0006, "step": 13308 }, { "epoch": 4.433413841925543, "grad_norm": 0.71875, "learning_rate": 9.222748468864399e-06, "loss": 3.9835, "step": 13309 }, { "epoch": 4.4337469809277925, "grad_norm": 0.70703125, "learning_rate": 9.221960663493852e-06, "loss": 4.0403, "step": 13310 }, { "epoch": 4.434080119930041, "grad_norm": 0.71875, "learning_rate": 9.221172838066196e-06, "loss": 3.9445, "step": 13311 }, { "epoch": 4.434413258932289, "grad_norm": 0.6953125, "learning_rate": 9.220384992590603e-06, "loss": 4.0201, "step": 13312 }, { "epoch": 4.434746397934538, "grad_norm": 0.71484375, "learning_rate": 9.219597127076253e-06, "loss": 3.9648, "step": 13313 }, { "epoch": 4.435079536936787, "grad_norm": 0.78125, "learning_rate": 9.21880924153232e-06, "loss": 3.9564, "step": 13314 }, { "epoch": 4.435412675939036, "grad_norm": 0.71875, "learning_rate": 9.218021335967981e-06, "loss": 4.0107, "step": 13315 }, { "epoch": 4.435745814941284, "grad_norm": 0.7265625, "learning_rate": 9.217233410392415e-06, "loss": 3.972, "step": 13316 }, { "epoch": 4.436078953943533, "grad_norm": 0.71875, "learning_rate": 9.2164454648148e-06, "loss": 3.9709, "step": 13317 }, { "epoch": 4.4364120929457815, "grad_norm": 0.7109375, "learning_rate": 9.215657499244314e-06, "loss": 4.0119, "step": 13318 }, { "epoch": 4.436745231948031, "grad_norm": 0.7421875, "learning_rate": 9.214869513690133e-06, "loss": 3.9889, "step": 13319 }, { "epoch": 4.437078370950279, "grad_norm": 0.73828125, "learning_rate": 9.214081508161437e-06, "loss": 3.991, "step": 13320 }, { "epoch": 4.437411509952527, "grad_norm": 0.73046875, "learning_rate": 9.213293482667403e-06, "loss": 3.9425, "step": 13321 }, { "epoch": 4.4377446489547765, "grad_norm": 0.734375, "learning_rate": 9.212505437217214e-06, "loss": 3.9757, "step": 13322 }, { "epoch": 4.438077787957025, "grad_norm": 0.765625, "learning_rate": 9.211717371820042e-06, "loss": 4.039, "step": 13323 }, { "epoch": 4.438410926959274, "grad_norm": 0.69140625, "learning_rate": 9.210929286485074e-06, "loss": 3.9498, "step": 13324 }, { "epoch": 4.438744065961522, "grad_norm": 0.75, "learning_rate": 9.210141181221482e-06, "loss": 3.9862, "step": 13325 }, { "epoch": 4.4390772049637715, "grad_norm": 0.76953125, "learning_rate": 9.209353056038453e-06, "loss": 4.0426, "step": 13326 }, { "epoch": 4.43941034396602, "grad_norm": 0.7109375, "learning_rate": 9.208564910945161e-06, "loss": 4.0395, "step": 13327 }, { "epoch": 4.439743482968268, "grad_norm": 0.75390625, "learning_rate": 9.207776745950788e-06, "loss": 4.0812, "step": 13328 }, { "epoch": 4.440076621970517, "grad_norm": 0.73046875, "learning_rate": 9.206988561064515e-06, "loss": 3.9331, "step": 13329 }, { "epoch": 4.4404097609727655, "grad_norm": 0.7421875, "learning_rate": 9.206200356295526e-06, "loss": 4.0309, "step": 13330 }, { "epoch": 4.440742899975015, "grad_norm": 0.7421875, "learning_rate": 9.205412131652996e-06, "loss": 3.9946, "step": 13331 }, { "epoch": 4.441076038977263, "grad_norm": 0.76171875, "learning_rate": 9.204623887146108e-06, "loss": 3.93, "step": 13332 }, { "epoch": 4.441409177979512, "grad_norm": 0.69921875, "learning_rate": 9.203835622784043e-06, "loss": 4.0631, "step": 13333 }, { "epoch": 4.4417423169817605, "grad_norm": 0.7109375, "learning_rate": 9.203047338575984e-06, "loss": 4.0493, "step": 13334 }, { "epoch": 4.44207545598401, "grad_norm": 0.7421875, "learning_rate": 9.202259034531115e-06, "loss": 4.03, "step": 13335 }, { "epoch": 4.442408594986258, "grad_norm": 0.7109375, "learning_rate": 9.201470710658614e-06, "loss": 3.9878, "step": 13336 }, { "epoch": 4.442741733988507, "grad_norm": 0.7734375, "learning_rate": 9.200682366967665e-06, "loss": 3.976, "step": 13337 }, { "epoch": 4.443074872990755, "grad_norm": 0.734375, "learning_rate": 9.199894003467448e-06, "loss": 4.0007, "step": 13338 }, { "epoch": 4.443408011993004, "grad_norm": 0.69921875, "learning_rate": 9.19910562016715e-06, "loss": 4.032, "step": 13339 }, { "epoch": 4.443741150995253, "grad_norm": 0.6796875, "learning_rate": 9.198317217075952e-06, "loss": 4.0452, "step": 13340 }, { "epoch": 4.444074289997501, "grad_norm": 0.734375, "learning_rate": 9.197528794203033e-06, "loss": 4.0779, "step": 13341 }, { "epoch": 4.44440742899975, "grad_norm": 0.703125, "learning_rate": 9.196740351557585e-06, "loss": 4.0074, "step": 13342 }, { "epoch": 4.444740568001999, "grad_norm": 0.74609375, "learning_rate": 9.195951889148783e-06, "loss": 3.9996, "step": 13343 }, { "epoch": 4.445073707004248, "grad_norm": 0.671875, "learning_rate": 9.195163406985821e-06, "loss": 4.0689, "step": 13344 }, { "epoch": 4.445406846006496, "grad_norm": 0.7421875, "learning_rate": 9.194374905077872e-06, "loss": 4.0024, "step": 13345 }, { "epoch": 4.4457399850087445, "grad_norm": 0.765625, "learning_rate": 9.193586383434125e-06, "loss": 3.95, "step": 13346 }, { "epoch": 4.446073124010994, "grad_norm": 0.75, "learning_rate": 9.192797842063767e-06, "loss": 3.9959, "step": 13347 }, { "epoch": 4.446406263013242, "grad_norm": 0.69921875, "learning_rate": 9.192009280975983e-06, "loss": 4.0155, "step": 13348 }, { "epoch": 4.446739402015491, "grad_norm": 0.7265625, "learning_rate": 9.191220700179952e-06, "loss": 4.0081, "step": 13349 }, { "epoch": 4.447072541017739, "grad_norm": 0.69140625, "learning_rate": 9.190432099684865e-06, "loss": 4.0671, "step": 13350 }, { "epoch": 4.447405680019989, "grad_norm": 0.73828125, "learning_rate": 9.189643479499904e-06, "loss": 4.0363, "step": 13351 }, { "epoch": 4.447738819022237, "grad_norm": 0.73828125, "learning_rate": 9.188854839634257e-06, "loss": 3.9769, "step": 13352 }, { "epoch": 4.448071958024486, "grad_norm": 0.75, "learning_rate": 9.18806618009711e-06, "loss": 4.0263, "step": 13353 }, { "epoch": 4.448405097026734, "grad_norm": 0.71875, "learning_rate": 9.187277500897644e-06, "loss": 4.0385, "step": 13354 }, { "epoch": 4.4487382360289836, "grad_norm": 0.7578125, "learning_rate": 9.186488802045054e-06, "loss": 3.9917, "step": 13355 }, { "epoch": 4.449071375031232, "grad_norm": 0.70703125, "learning_rate": 9.185700083548522e-06, "loss": 4.0026, "step": 13356 }, { "epoch": 4.44940451403348, "grad_norm": 0.6953125, "learning_rate": 9.184911345417236e-06, "loss": 4.0402, "step": 13357 }, { "epoch": 4.449737653035729, "grad_norm": 0.6953125, "learning_rate": 9.18412258766038e-06, "loss": 3.9818, "step": 13358 }, { "epoch": 4.450070792037978, "grad_norm": 0.7421875, "learning_rate": 9.183333810287146e-06, "loss": 4.0067, "step": 13359 }, { "epoch": 4.450403931040227, "grad_norm": 0.77734375, "learning_rate": 9.182545013306717e-06, "loss": 3.981, "step": 13360 }, { "epoch": 4.450737070042475, "grad_norm": 0.7265625, "learning_rate": 9.181756196728283e-06, "loss": 3.8873, "step": 13361 }, { "epoch": 4.451070209044724, "grad_norm": 0.73046875, "learning_rate": 9.180967360561035e-06, "loss": 4.0254, "step": 13362 }, { "epoch": 4.451403348046973, "grad_norm": 0.734375, "learning_rate": 9.180178504814156e-06, "loss": 4.05, "step": 13363 }, { "epoch": 4.451736487049221, "grad_norm": 0.73828125, "learning_rate": 9.179389629496837e-06, "loss": 3.9242, "step": 13364 }, { "epoch": 4.45206962605147, "grad_norm": 0.7578125, "learning_rate": 9.178600734618264e-06, "loss": 3.9883, "step": 13365 }, { "epoch": 4.452402765053718, "grad_norm": 0.75, "learning_rate": 9.177811820187633e-06, "loss": 4.0374, "step": 13366 }, { "epoch": 4.4527359040559675, "grad_norm": 0.69140625, "learning_rate": 9.177022886214124e-06, "loss": 3.9618, "step": 13367 }, { "epoch": 4.453069043058216, "grad_norm": 0.72265625, "learning_rate": 9.176233932706934e-06, "loss": 3.9929, "step": 13368 }, { "epoch": 4.453402182060465, "grad_norm": 0.7265625, "learning_rate": 9.175444959675248e-06, "loss": 3.9515, "step": 13369 }, { "epoch": 4.453735321062713, "grad_norm": 0.7265625, "learning_rate": 9.174655967128257e-06, "loss": 4.0057, "step": 13370 }, { "epoch": 4.4540684600649625, "grad_norm": 0.74609375, "learning_rate": 9.173866955075151e-06, "loss": 3.9667, "step": 13371 }, { "epoch": 4.454401599067211, "grad_norm": 0.71484375, "learning_rate": 9.173077923525124e-06, "loss": 3.9992, "step": 13372 }, { "epoch": 4.454734738069459, "grad_norm": 0.71484375, "learning_rate": 9.172288872487359e-06, "loss": 4.0342, "step": 13373 }, { "epoch": 4.455067877071708, "grad_norm": 0.734375, "learning_rate": 9.171499801971051e-06, "loss": 4.0022, "step": 13374 }, { "epoch": 4.4554010160739566, "grad_norm": 0.74609375, "learning_rate": 9.170710711985393e-06, "loss": 3.9246, "step": 13375 }, { "epoch": 4.455734155076206, "grad_norm": 0.75, "learning_rate": 9.169921602539571e-06, "loss": 4.0196, "step": 13376 }, { "epoch": 4.456067294078454, "grad_norm": 0.71875, "learning_rate": 9.169132473642782e-06, "loss": 4.0023, "step": 13377 }, { "epoch": 4.456400433080703, "grad_norm": 0.73828125, "learning_rate": 9.168343325304212e-06, "loss": 3.9936, "step": 13378 }, { "epoch": 4.4567335720829515, "grad_norm": 0.75, "learning_rate": 9.16755415753306e-06, "loss": 3.9722, "step": 13379 }, { "epoch": 4.457066711085201, "grad_norm": 0.74609375, "learning_rate": 9.166764970338511e-06, "loss": 4.0505, "step": 13380 }, { "epoch": 4.457399850087449, "grad_norm": 0.71484375, "learning_rate": 9.165975763729764e-06, "loss": 3.9398, "step": 13381 }, { "epoch": 4.457732989089697, "grad_norm": 0.70703125, "learning_rate": 9.165186537716003e-06, "loss": 4.002, "step": 13382 }, { "epoch": 4.4580661280919465, "grad_norm": 0.7578125, "learning_rate": 9.164397292306428e-06, "loss": 3.9753, "step": 13383 }, { "epoch": 4.458399267094195, "grad_norm": 0.703125, "learning_rate": 9.163608027510231e-06, "loss": 4.0125, "step": 13384 }, { "epoch": 4.458732406096444, "grad_norm": 0.73046875, "learning_rate": 9.162818743336601e-06, "loss": 4.01, "step": 13385 }, { "epoch": 4.459065545098692, "grad_norm": 0.7109375, "learning_rate": 9.162029439794737e-06, "loss": 4.0143, "step": 13386 }, { "epoch": 4.459398684100941, "grad_norm": 0.7265625, "learning_rate": 9.161240116893827e-06, "loss": 3.9809, "step": 13387 }, { "epoch": 4.45973182310319, "grad_norm": 0.75390625, "learning_rate": 9.160450774643072e-06, "loss": 4.0153, "step": 13388 }, { "epoch": 4.460064962105439, "grad_norm": 0.6875, "learning_rate": 9.159661413051659e-06, "loss": 3.995, "step": 13389 }, { "epoch": 4.460398101107687, "grad_norm": 0.70703125, "learning_rate": 9.158872032128784e-06, "loss": 4.0199, "step": 13390 }, { "epoch": 4.4607312401099355, "grad_norm": 0.703125, "learning_rate": 9.158082631883645e-06, "loss": 3.9822, "step": 13391 }, { "epoch": 4.461064379112185, "grad_norm": 0.73046875, "learning_rate": 9.157293212325436e-06, "loss": 3.9207, "step": 13392 }, { "epoch": 4.461397518114433, "grad_norm": 0.703125, "learning_rate": 9.15650377346335e-06, "loss": 4.0045, "step": 13393 }, { "epoch": 4.461730657116682, "grad_norm": 0.73046875, "learning_rate": 9.15571431530658e-06, "loss": 4.0729, "step": 13394 }, { "epoch": 4.4620637961189304, "grad_norm": 0.765625, "learning_rate": 9.154924837864327e-06, "loss": 4.0433, "step": 13395 }, { "epoch": 4.46239693512118, "grad_norm": 0.71484375, "learning_rate": 9.154135341145786e-06, "loss": 4.0268, "step": 13396 }, { "epoch": 4.462730074123428, "grad_norm": 0.69140625, "learning_rate": 9.153345825160148e-06, "loss": 3.9908, "step": 13397 }, { "epoch": 4.463063213125677, "grad_norm": 0.66015625, "learning_rate": 9.152556289916612e-06, "loss": 4.0148, "step": 13398 }, { "epoch": 4.463396352127925, "grad_norm": 0.70703125, "learning_rate": 9.151766735424377e-06, "loss": 3.9911, "step": 13399 }, { "epoch": 4.463729491130174, "grad_norm": 0.71875, "learning_rate": 9.150977161692636e-06, "loss": 3.9505, "step": 13400 }, { "epoch": 4.464062630132423, "grad_norm": 0.7578125, "learning_rate": 9.150187568730588e-06, "loss": 3.9633, "step": 13401 }, { "epoch": 4.464395769134671, "grad_norm": 0.6640625, "learning_rate": 9.149397956547429e-06, "loss": 4.0164, "step": 13402 }, { "epoch": 4.46472890813692, "grad_norm": 0.75390625, "learning_rate": 9.148608325152356e-06, "loss": 3.9925, "step": 13403 }, { "epoch": 4.465062047139169, "grad_norm": 0.6953125, "learning_rate": 9.147818674554566e-06, "loss": 3.9062, "step": 13404 }, { "epoch": 4.465395186141418, "grad_norm": 0.7421875, "learning_rate": 9.147029004763258e-06, "loss": 4.0057, "step": 13405 }, { "epoch": 4.465728325143666, "grad_norm": 0.74609375, "learning_rate": 9.14623931578763e-06, "loss": 4.015, "step": 13406 }, { "epoch": 4.466061464145914, "grad_norm": 0.7265625, "learning_rate": 9.14544960763688e-06, "loss": 4.0438, "step": 13407 }, { "epoch": 4.466394603148164, "grad_norm": 0.74609375, "learning_rate": 9.144659880320206e-06, "loss": 4.0287, "step": 13408 }, { "epoch": 4.466727742150412, "grad_norm": 0.71875, "learning_rate": 9.143870133846805e-06, "loss": 4.0067, "step": 13409 }, { "epoch": 4.467060881152661, "grad_norm": 0.6953125, "learning_rate": 9.14308036822588e-06, "loss": 3.9956, "step": 13410 }, { "epoch": 4.467394020154909, "grad_norm": 0.69921875, "learning_rate": 9.142290583466626e-06, "loss": 3.9744, "step": 13411 }, { "epoch": 4.467727159157159, "grad_norm": 0.76171875, "learning_rate": 9.141500779578246e-06, "loss": 3.9305, "step": 13412 }, { "epoch": 4.468060298159407, "grad_norm": 0.71875, "learning_rate": 9.140710956569936e-06, "loss": 4.0346, "step": 13413 }, { "epoch": 4.468393437161656, "grad_norm": 0.69140625, "learning_rate": 9.139921114450896e-06, "loss": 3.9714, "step": 13414 }, { "epoch": 4.468726576163904, "grad_norm": 0.73828125, "learning_rate": 9.139131253230329e-06, "loss": 3.9441, "step": 13415 }, { "epoch": 4.4690597151661535, "grad_norm": 0.734375, "learning_rate": 9.138341372917433e-06, "loss": 3.9975, "step": 13416 }, { "epoch": 4.469392854168402, "grad_norm": 0.69921875, "learning_rate": 9.137551473521409e-06, "loss": 3.9331, "step": 13417 }, { "epoch": 4.46972599317065, "grad_norm": 0.74609375, "learning_rate": 9.136761555051456e-06, "loss": 3.9756, "step": 13418 }, { "epoch": 4.470059132172899, "grad_norm": 0.703125, "learning_rate": 9.135971617516774e-06, "loss": 4.0233, "step": 13419 }, { "epoch": 4.470392271175148, "grad_norm": 0.734375, "learning_rate": 9.13518166092657e-06, "loss": 3.9376, "step": 13420 }, { "epoch": 4.470725410177397, "grad_norm": 0.6953125, "learning_rate": 9.13439168529004e-06, "loss": 4.0962, "step": 13421 }, { "epoch": 4.471058549179645, "grad_norm": 0.71484375, "learning_rate": 9.133601690616387e-06, "loss": 3.9985, "step": 13422 }, { "epoch": 4.471391688181894, "grad_norm": 0.7421875, "learning_rate": 9.132811676914813e-06, "loss": 3.9928, "step": 13423 }, { "epoch": 4.4717248271841425, "grad_norm": 0.73046875, "learning_rate": 9.13202164419452e-06, "loss": 3.9482, "step": 13424 }, { "epoch": 4.472057966186391, "grad_norm": 0.75, "learning_rate": 9.13123159246471e-06, "loss": 4.0721, "step": 13425 }, { "epoch": 4.47239110518864, "grad_norm": 0.76953125, "learning_rate": 9.130441521734583e-06, "loss": 3.9573, "step": 13426 }, { "epoch": 4.472724244190888, "grad_norm": 0.6875, "learning_rate": 9.129651432013345e-06, "loss": 4.0186, "step": 13427 }, { "epoch": 4.4730573831931375, "grad_norm": 0.7578125, "learning_rate": 9.128861323310196e-06, "loss": 3.9804, "step": 13428 }, { "epoch": 4.473390522195386, "grad_norm": 0.7109375, "learning_rate": 9.128071195634344e-06, "loss": 3.9729, "step": 13429 }, { "epoch": 4.473723661197635, "grad_norm": 0.73828125, "learning_rate": 9.127281048994987e-06, "loss": 3.9958, "step": 13430 }, { "epoch": 4.474056800199883, "grad_norm": 0.6953125, "learning_rate": 9.126490883401329e-06, "loss": 3.9858, "step": 13431 }, { "epoch": 4.4743899392021325, "grad_norm": 0.6875, "learning_rate": 9.125700698862578e-06, "loss": 4.0306, "step": 13432 }, { "epoch": 4.474723078204381, "grad_norm": 0.76953125, "learning_rate": 9.124910495387936e-06, "loss": 4.0262, "step": 13433 }, { "epoch": 4.475056217206629, "grad_norm": 0.78125, "learning_rate": 9.124120272986604e-06, "loss": 4.0797, "step": 13434 }, { "epoch": 4.475389356208878, "grad_norm": 0.83203125, "learning_rate": 9.123330031667789e-06, "loss": 3.8806, "step": 13435 }, { "epoch": 4.4757224952111265, "grad_norm": 0.75390625, "learning_rate": 9.122539771440696e-06, "loss": 4.017, "step": 13436 }, { "epoch": 4.476055634213376, "grad_norm": 0.734375, "learning_rate": 9.121749492314526e-06, "loss": 4.035, "step": 13437 }, { "epoch": 4.476388773215624, "grad_norm": 0.7265625, "learning_rate": 9.120959194298491e-06, "loss": 4.004, "step": 13438 }, { "epoch": 4.476721912217873, "grad_norm": 0.7265625, "learning_rate": 9.120168877401791e-06, "loss": 3.9946, "step": 13439 }, { "epoch": 4.4770550512201215, "grad_norm": 0.74609375, "learning_rate": 9.119378541633633e-06, "loss": 3.9341, "step": 13440 }, { "epoch": 4.477388190222371, "grad_norm": 0.69921875, "learning_rate": 9.118588187003221e-06, "loss": 4.0013, "step": 13441 }, { "epoch": 4.477721329224619, "grad_norm": 0.76171875, "learning_rate": 9.117797813519765e-06, "loss": 4.0283, "step": 13442 }, { "epoch": 4.478054468226867, "grad_norm": 0.7578125, "learning_rate": 9.11700742119247e-06, "loss": 4.0555, "step": 13443 }, { "epoch": 4.478387607229116, "grad_norm": 0.734375, "learning_rate": 9.116217010030536e-06, "loss": 4.0789, "step": 13444 }, { "epoch": 4.478720746231365, "grad_norm": 0.69140625, "learning_rate": 9.115426580043179e-06, "loss": 3.9678, "step": 13445 }, { "epoch": 4.479053885233614, "grad_norm": 0.73828125, "learning_rate": 9.114636131239596e-06, "loss": 3.9925, "step": 13446 }, { "epoch": 4.479387024235862, "grad_norm": 0.7109375, "learning_rate": 9.113845663629005e-06, "loss": 4.0743, "step": 13447 }, { "epoch": 4.479720163238111, "grad_norm": 0.7265625, "learning_rate": 9.113055177220605e-06, "loss": 4.0025, "step": 13448 }, { "epoch": 4.48005330224036, "grad_norm": 0.76171875, "learning_rate": 9.112264672023607e-06, "loss": 3.9794, "step": 13449 }, { "epoch": 4.480386441242609, "grad_norm": 0.75390625, "learning_rate": 9.111474148047215e-06, "loss": 3.9768, "step": 13450 }, { "epoch": 4.480719580244857, "grad_norm": 0.734375, "learning_rate": 9.110683605300643e-06, "loss": 3.9354, "step": 13451 }, { "epoch": 4.4810527192471055, "grad_norm": 0.75390625, "learning_rate": 9.109893043793096e-06, "loss": 3.9473, "step": 13452 }, { "epoch": 4.481385858249355, "grad_norm": 0.71484375, "learning_rate": 9.109102463533779e-06, "loss": 4.0499, "step": 13453 }, { "epoch": 4.481718997251603, "grad_norm": 0.71484375, "learning_rate": 9.108311864531905e-06, "loss": 3.9801, "step": 13454 }, { "epoch": 4.482052136253852, "grad_norm": 0.703125, "learning_rate": 9.107521246796681e-06, "loss": 4.0125, "step": 13455 }, { "epoch": 4.4823852752561, "grad_norm": 0.73046875, "learning_rate": 9.10673061033732e-06, "loss": 3.9633, "step": 13456 }, { "epoch": 4.48271841425835, "grad_norm": 0.7265625, "learning_rate": 9.105939955163022e-06, "loss": 3.9887, "step": 13457 }, { "epoch": 4.483051553260598, "grad_norm": 0.75390625, "learning_rate": 9.105149281283005e-06, "loss": 3.9703, "step": 13458 }, { "epoch": 4.483384692262847, "grad_norm": 0.7578125, "learning_rate": 9.104358588706473e-06, "loss": 3.9993, "step": 13459 }, { "epoch": 4.483717831265095, "grad_norm": 0.7265625, "learning_rate": 9.103567877442643e-06, "loss": 4.026, "step": 13460 }, { "epoch": 4.484050970267344, "grad_norm": 0.74609375, "learning_rate": 9.102777147500717e-06, "loss": 4.01, "step": 13461 }, { "epoch": 4.484384109269593, "grad_norm": 0.75390625, "learning_rate": 9.101986398889911e-06, "loss": 3.9894, "step": 13462 }, { "epoch": 4.484717248271841, "grad_norm": 0.75390625, "learning_rate": 9.101195631619431e-06, "loss": 4.0388, "step": 13463 }, { "epoch": 4.48505038727409, "grad_norm": 0.796875, "learning_rate": 9.100404845698492e-06, "loss": 3.941, "step": 13464 }, { "epoch": 4.485383526276339, "grad_norm": 0.7421875, "learning_rate": 9.099614041136303e-06, "loss": 4.0098, "step": 13465 }, { "epoch": 4.485716665278588, "grad_norm": 0.73828125, "learning_rate": 9.098823217942073e-06, "loss": 4.0231, "step": 13466 }, { "epoch": 4.486049804280836, "grad_norm": 0.72265625, "learning_rate": 9.098032376125018e-06, "loss": 4.059, "step": 13467 }, { "epoch": 4.486382943283084, "grad_norm": 0.74609375, "learning_rate": 9.097241515694345e-06, "loss": 3.992, "step": 13468 }, { "epoch": 4.486716082285334, "grad_norm": 0.7578125, "learning_rate": 9.09645063665927e-06, "loss": 4.0094, "step": 13469 }, { "epoch": 4.487049221287582, "grad_norm": 0.765625, "learning_rate": 9.095659739029002e-06, "loss": 3.9746, "step": 13470 }, { "epoch": 4.487382360289831, "grad_norm": 0.75, "learning_rate": 9.094868822812756e-06, "loss": 3.9493, "step": 13471 }, { "epoch": 4.487715499292079, "grad_norm": 0.74609375, "learning_rate": 9.094077888019739e-06, "loss": 3.9954, "step": 13472 }, { "epoch": 4.4880486382943285, "grad_norm": 0.734375, "learning_rate": 9.093286934659171e-06, "loss": 4.0425, "step": 13473 }, { "epoch": 4.488381777296577, "grad_norm": 0.734375, "learning_rate": 9.09249596274026e-06, "loss": 3.959, "step": 13474 }, { "epoch": 4.488714916298826, "grad_norm": 0.7421875, "learning_rate": 9.091704972272219e-06, "loss": 3.9884, "step": 13475 }, { "epoch": 4.489048055301074, "grad_norm": 0.73046875, "learning_rate": 9.090913963264264e-06, "loss": 4.0121, "step": 13476 }, { "epoch": 4.4893811943033235, "grad_norm": 0.7421875, "learning_rate": 9.090122935725606e-06, "loss": 4.0072, "step": 13477 }, { "epoch": 4.489714333305572, "grad_norm": 0.75, "learning_rate": 9.089331889665462e-06, "loss": 4.0815, "step": 13478 }, { "epoch": 4.49004747230782, "grad_norm": 0.70703125, "learning_rate": 9.088540825093043e-06, "loss": 4.0142, "step": 13479 }, { "epoch": 4.490380611310069, "grad_norm": 0.7109375, "learning_rate": 9.087749742017565e-06, "loss": 4.0322, "step": 13480 }, { "epoch": 4.490713750312318, "grad_norm": 0.71875, "learning_rate": 9.086958640448238e-06, "loss": 3.9854, "step": 13481 }, { "epoch": 4.491046889314567, "grad_norm": 0.765625, "learning_rate": 9.086167520394287e-06, "loss": 4.0462, "step": 13482 }, { "epoch": 4.491380028316815, "grad_norm": 0.7421875, "learning_rate": 9.085376381864913e-06, "loss": 3.8412, "step": 13483 }, { "epoch": 4.491713167319064, "grad_norm": 0.71484375, "learning_rate": 9.084585224869343e-06, "loss": 4.0338, "step": 13484 }, { "epoch": 4.4920463063213125, "grad_norm": 0.75, "learning_rate": 9.083794049416784e-06, "loss": 4.0626, "step": 13485 }, { "epoch": 4.492379445323561, "grad_norm": 0.7421875, "learning_rate": 9.083002855516458e-06, "loss": 3.9937, "step": 13486 }, { "epoch": 4.49271258432581, "grad_norm": 0.7578125, "learning_rate": 9.082211643177576e-06, "loss": 4.053, "step": 13487 }, { "epoch": 4.493045723328058, "grad_norm": 0.72265625, "learning_rate": 9.081420412409353e-06, "loss": 4.022, "step": 13488 }, { "epoch": 4.4933788623303075, "grad_norm": 0.73828125, "learning_rate": 9.08062916322101e-06, "loss": 4.0348, "step": 13489 }, { "epoch": 4.493712001332556, "grad_norm": 0.78515625, "learning_rate": 9.079837895621761e-06, "loss": 3.9747, "step": 13490 }, { "epoch": 4.494045140334805, "grad_norm": 0.765625, "learning_rate": 9.079046609620823e-06, "loss": 3.9812, "step": 13491 }, { "epoch": 4.494378279337053, "grad_norm": 0.75, "learning_rate": 9.07825530522741e-06, "loss": 4.01, "step": 13492 }, { "epoch": 4.494711418339302, "grad_norm": 0.73828125, "learning_rate": 9.077463982450743e-06, "loss": 3.9924, "step": 13493 }, { "epoch": 4.495044557341551, "grad_norm": 0.6953125, "learning_rate": 9.076672641300036e-06, "loss": 4.0694, "step": 13494 }, { "epoch": 4.4953776963438, "grad_norm": 0.6875, "learning_rate": 9.075881281784507e-06, "loss": 4.052, "step": 13495 }, { "epoch": 4.495710835346048, "grad_norm": 0.7421875, "learning_rate": 9.075089903913379e-06, "loss": 3.979, "step": 13496 }, { "epoch": 4.4960439743482965, "grad_norm": 0.7734375, "learning_rate": 9.07429850769586e-06, "loss": 4.0418, "step": 13497 }, { "epoch": 4.496377113350546, "grad_norm": 0.77734375, "learning_rate": 9.073507093141176e-06, "loss": 4.036, "step": 13498 }, { "epoch": 4.496710252352794, "grad_norm": 0.71484375, "learning_rate": 9.072715660258542e-06, "loss": 3.9721, "step": 13499 }, { "epoch": 4.497043391355043, "grad_norm": 0.75390625, "learning_rate": 9.071924209057178e-06, "loss": 4.031, "step": 13500 }, { "epoch": 4.4973765303572915, "grad_norm": 0.66015625, "learning_rate": 9.0711327395463e-06, "loss": 4.0592, "step": 13501 }, { "epoch": 4.497709669359541, "grad_norm": 0.7578125, "learning_rate": 9.07034125173513e-06, "loss": 3.989, "step": 13502 }, { "epoch": 4.498042808361789, "grad_norm": 0.75390625, "learning_rate": 9.069549745632886e-06, "loss": 4.0568, "step": 13503 }, { "epoch": 4.498375947364037, "grad_norm": 0.69921875, "learning_rate": 9.068758221248786e-06, "loss": 4.0081, "step": 13504 }, { "epoch": 4.498709086366286, "grad_norm": 0.71484375, "learning_rate": 9.06796667859205e-06, "loss": 3.9778, "step": 13505 }, { "epoch": 4.499042225368535, "grad_norm": 0.7421875, "learning_rate": 9.067175117671901e-06, "loss": 4.0263, "step": 13506 }, { "epoch": 4.499375364370784, "grad_norm": 0.70703125, "learning_rate": 9.066383538497554e-06, "loss": 4.0109, "step": 13507 }, { "epoch": 4.499708503373032, "grad_norm": 0.67578125, "learning_rate": 9.065591941078233e-06, "loss": 3.9265, "step": 13508 }, { "epoch": 4.500041642375281, "grad_norm": 0.71875, "learning_rate": 9.064800325423158e-06, "loss": 4.0995, "step": 13509 }, { "epoch": 4.50037478137753, "grad_norm": 0.7578125, "learning_rate": 9.064008691541546e-06, "loss": 4.0142, "step": 13510 }, { "epoch": 4.500707920379779, "grad_norm": 0.71484375, "learning_rate": 9.063217039442621e-06, "loss": 3.9665, "step": 13511 }, { "epoch": 4.501041059382027, "grad_norm": 0.6796875, "learning_rate": 9.062425369135603e-06, "loss": 3.9985, "step": 13512 }, { "epoch": 4.501374198384276, "grad_norm": 0.77734375, "learning_rate": 9.061633680629715e-06, "loss": 3.9953, "step": 13513 }, { "epoch": 4.501707337386525, "grad_norm": 0.6875, "learning_rate": 9.060841973934177e-06, "loss": 4.0208, "step": 13514 }, { "epoch": 4.502040476388773, "grad_norm": 0.7578125, "learning_rate": 9.060050249058212e-06, "loss": 4.0271, "step": 13515 }, { "epoch": 4.502373615391022, "grad_norm": 0.69921875, "learning_rate": 9.059258506011037e-06, "loss": 4.02, "step": 13516 }, { "epoch": 4.50270675439327, "grad_norm": 0.734375, "learning_rate": 9.05846674480188e-06, "loss": 3.9928, "step": 13517 }, { "epoch": 4.50303989339552, "grad_norm": 0.765625, "learning_rate": 9.057674965439961e-06, "loss": 3.9997, "step": 13518 }, { "epoch": 4.503373032397768, "grad_norm": 0.7109375, "learning_rate": 9.056883167934502e-06, "loss": 3.9692, "step": 13519 }, { "epoch": 4.503706171400017, "grad_norm": 0.71484375, "learning_rate": 9.056091352294726e-06, "loss": 4.0172, "step": 13520 }, { "epoch": 4.504039310402265, "grad_norm": 0.734375, "learning_rate": 9.055299518529857e-06, "loss": 3.9798, "step": 13521 }, { "epoch": 4.504372449404514, "grad_norm": 0.71875, "learning_rate": 9.054507666649118e-06, "loss": 4.0099, "step": 13522 }, { "epoch": 4.504705588406763, "grad_norm": 0.69140625, "learning_rate": 9.053715796661731e-06, "loss": 4.0481, "step": 13523 }, { "epoch": 4.505038727409011, "grad_norm": 0.68359375, "learning_rate": 9.05292390857692e-06, "loss": 3.9751, "step": 13524 }, { "epoch": 4.50537186641126, "grad_norm": 0.67578125, "learning_rate": 9.052132002403908e-06, "loss": 3.9677, "step": 13525 }, { "epoch": 4.505705005413509, "grad_norm": 0.71484375, "learning_rate": 9.051340078151924e-06, "loss": 4.0012, "step": 13526 }, { "epoch": 4.506038144415758, "grad_norm": 0.71484375, "learning_rate": 9.050548135830185e-06, "loss": 4.0219, "step": 13527 }, { "epoch": 4.506371283418006, "grad_norm": 0.6875, "learning_rate": 9.04975617544792e-06, "loss": 4.0108, "step": 13528 }, { "epoch": 4.506704422420254, "grad_norm": 0.75390625, "learning_rate": 9.04896419701435e-06, "loss": 3.9886, "step": 13529 }, { "epoch": 4.507037561422504, "grad_norm": 0.7265625, "learning_rate": 9.048172200538705e-06, "loss": 4.0878, "step": 13530 }, { "epoch": 4.507370700424752, "grad_norm": 0.71484375, "learning_rate": 9.047380186030205e-06, "loss": 3.9678, "step": 13531 }, { "epoch": 4.507703839427001, "grad_norm": 0.70703125, "learning_rate": 9.046588153498079e-06, "loss": 4.0804, "step": 13532 }, { "epoch": 4.508036978429249, "grad_norm": 0.77734375, "learning_rate": 9.045796102951552e-06, "loss": 3.9592, "step": 13533 }, { "epoch": 4.5083701174314985, "grad_norm": 0.73046875, "learning_rate": 9.045004034399846e-06, "loss": 3.9179, "step": 13534 }, { "epoch": 4.508703256433747, "grad_norm": 0.73828125, "learning_rate": 9.044211947852191e-06, "loss": 4.0563, "step": 13535 }, { "epoch": 4.509036395435996, "grad_norm": 0.73828125, "learning_rate": 9.043419843317812e-06, "loss": 4.0284, "step": 13536 }, { "epoch": 4.509369534438244, "grad_norm": 0.6875, "learning_rate": 9.042627720805933e-06, "loss": 3.9783, "step": 13537 }, { "epoch": 4.5097026734404935, "grad_norm": 0.71875, "learning_rate": 9.041835580325784e-06, "loss": 3.9936, "step": 13538 }, { "epoch": 4.510035812442742, "grad_norm": 0.734375, "learning_rate": 9.04104342188659e-06, "loss": 3.8952, "step": 13539 }, { "epoch": 4.51036895144499, "grad_norm": 0.70703125, "learning_rate": 9.040251245497575e-06, "loss": 4.024, "step": 13540 }, { "epoch": 4.510702090447239, "grad_norm": 0.7734375, "learning_rate": 9.039459051167973e-06, "loss": 3.9398, "step": 13541 }, { "epoch": 4.5110352294494875, "grad_norm": 0.734375, "learning_rate": 9.038666838907007e-06, "loss": 3.9159, "step": 13542 }, { "epoch": 4.511368368451737, "grad_norm": 0.74609375, "learning_rate": 9.037874608723903e-06, "loss": 4.015, "step": 13543 }, { "epoch": 4.511701507453985, "grad_norm": 0.71484375, "learning_rate": 9.037082360627892e-06, "loss": 3.9784, "step": 13544 }, { "epoch": 4.512034646456234, "grad_norm": 0.69140625, "learning_rate": 9.036290094628201e-06, "loss": 4.0186, "step": 13545 }, { "epoch": 4.5123677854584825, "grad_norm": 0.77734375, "learning_rate": 9.035497810734059e-06, "loss": 3.9401, "step": 13546 }, { "epoch": 4.512700924460731, "grad_norm": 0.73828125, "learning_rate": 9.034705508954692e-06, "loss": 3.9836, "step": 13547 }, { "epoch": 4.51303406346298, "grad_norm": 0.74609375, "learning_rate": 9.03391318929933e-06, "loss": 4.0075, "step": 13548 }, { "epoch": 4.513367202465228, "grad_norm": 0.73046875, "learning_rate": 9.033120851777202e-06, "loss": 3.9785, "step": 13549 }, { "epoch": 4.5137003414674775, "grad_norm": 0.71875, "learning_rate": 9.032328496397538e-06, "loss": 3.9734, "step": 13550 }, { "epoch": 4.514033480469726, "grad_norm": 0.71875, "learning_rate": 9.031536123169563e-06, "loss": 3.9989, "step": 13551 }, { "epoch": 4.514366619471975, "grad_norm": 0.72265625, "learning_rate": 9.030743732102513e-06, "loss": 3.9914, "step": 13552 }, { "epoch": 4.514699758474223, "grad_norm": 0.75, "learning_rate": 9.029951323205612e-06, "loss": 4.025, "step": 13553 }, { "epoch": 4.515032897476472, "grad_norm": 0.78125, "learning_rate": 9.029158896488092e-06, "loss": 3.931, "step": 13554 }, { "epoch": 4.515366036478721, "grad_norm": 0.70703125, "learning_rate": 9.028366451959185e-06, "loss": 3.9846, "step": 13555 }, { "epoch": 4.51569917548097, "grad_norm": 0.7265625, "learning_rate": 9.027573989628116e-06, "loss": 4.0516, "step": 13556 }, { "epoch": 4.516032314483218, "grad_norm": 0.734375, "learning_rate": 9.02678150950412e-06, "loss": 4.0521, "step": 13557 }, { "epoch": 4.5163654534854665, "grad_norm": 0.703125, "learning_rate": 9.025989011596426e-06, "loss": 3.9775, "step": 13558 }, { "epoch": 4.516698592487716, "grad_norm": 0.68359375, "learning_rate": 9.02519649591427e-06, "loss": 4.1019, "step": 13559 }, { "epoch": 4.517031731489964, "grad_norm": 0.74609375, "learning_rate": 9.024403962466873e-06, "loss": 3.9493, "step": 13560 }, { "epoch": 4.517364870492213, "grad_norm": 0.70703125, "learning_rate": 9.023611411263474e-06, "loss": 4.0029, "step": 13561 }, { "epoch": 4.517698009494461, "grad_norm": 0.6953125, "learning_rate": 9.0228188423133e-06, "loss": 3.9418, "step": 13562 }, { "epoch": 4.518031148496711, "grad_norm": 0.6875, "learning_rate": 9.022026255625587e-06, "loss": 4.0062, "step": 13563 }, { "epoch": 4.518364287498959, "grad_norm": 0.70703125, "learning_rate": 9.021233651209565e-06, "loss": 3.909, "step": 13564 }, { "epoch": 4.518697426501207, "grad_norm": 0.72265625, "learning_rate": 9.020441029074465e-06, "loss": 3.9986, "step": 13565 }, { "epoch": 4.519030565503456, "grad_norm": 0.765625, "learning_rate": 9.01964838922952e-06, "loss": 3.969, "step": 13566 }, { "epoch": 4.519363704505705, "grad_norm": 0.7109375, "learning_rate": 9.018855731683963e-06, "loss": 3.9784, "step": 13567 }, { "epoch": 4.519696843507954, "grad_norm": 0.7109375, "learning_rate": 9.01806305644703e-06, "loss": 3.988, "step": 13568 }, { "epoch": 4.520029982510202, "grad_norm": 0.8046875, "learning_rate": 9.017270363527946e-06, "loss": 3.94, "step": 13569 }, { "epoch": 4.520363121512451, "grad_norm": 0.7578125, "learning_rate": 9.016477652935948e-06, "loss": 4.0089, "step": 13570 }, { "epoch": 4.5206962605147, "grad_norm": 0.6953125, "learning_rate": 9.015684924680272e-06, "loss": 4.0243, "step": 13571 }, { "epoch": 4.521029399516949, "grad_norm": 0.71484375, "learning_rate": 9.014892178770153e-06, "loss": 4.0162, "step": 13572 }, { "epoch": 4.521362538519197, "grad_norm": 0.7265625, "learning_rate": 9.014099415214815e-06, "loss": 3.9397, "step": 13573 }, { "epoch": 4.521695677521446, "grad_norm": 0.765625, "learning_rate": 9.013306634023503e-06, "loss": 3.963, "step": 13574 }, { "epoch": 4.522028816523695, "grad_norm": 0.75390625, "learning_rate": 9.012513835205443e-06, "loss": 3.9818, "step": 13575 }, { "epoch": 4.522361955525943, "grad_norm": 0.7109375, "learning_rate": 9.011721018769877e-06, "loss": 4.0519, "step": 13576 }, { "epoch": 4.522695094528192, "grad_norm": 0.74609375, "learning_rate": 9.010928184726033e-06, "loss": 4.0262, "step": 13577 }, { "epoch": 4.52302823353044, "grad_norm": 0.71875, "learning_rate": 9.010135333083148e-06, "loss": 4.0175, "step": 13578 }, { "epoch": 4.5233613725326896, "grad_norm": 0.70703125, "learning_rate": 9.009342463850458e-06, "loss": 3.9677, "step": 13579 }, { "epoch": 4.523694511534938, "grad_norm": 0.75390625, "learning_rate": 9.008549577037197e-06, "loss": 3.9958, "step": 13580 }, { "epoch": 4.524027650537187, "grad_norm": 0.7421875, "learning_rate": 9.007756672652602e-06, "loss": 3.988, "step": 13581 }, { "epoch": 4.524360789539435, "grad_norm": 0.71875, "learning_rate": 9.006963750705907e-06, "loss": 4.0821, "step": 13582 }, { "epoch": 4.524693928541684, "grad_norm": 0.75390625, "learning_rate": 9.006170811206349e-06, "loss": 4.0368, "step": 13583 }, { "epoch": 4.525027067543933, "grad_norm": 0.6953125, "learning_rate": 9.005377854163161e-06, "loss": 4.0692, "step": 13584 }, { "epoch": 4.525360206546181, "grad_norm": 0.72265625, "learning_rate": 9.004584879585585e-06, "loss": 3.9775, "step": 13585 }, { "epoch": 4.52569334554843, "grad_norm": 0.72265625, "learning_rate": 9.003791887482852e-06, "loss": 4.0174, "step": 13586 }, { "epoch": 4.526026484550679, "grad_norm": 0.71875, "learning_rate": 9.0029988778642e-06, "loss": 3.9758, "step": 13587 }, { "epoch": 4.526359623552928, "grad_norm": 0.7265625, "learning_rate": 9.00220585073887e-06, "loss": 3.9826, "step": 13588 }, { "epoch": 4.526692762555176, "grad_norm": 0.7265625, "learning_rate": 9.001412806116092e-06, "loss": 4.0631, "step": 13589 }, { "epoch": 4.527025901557424, "grad_norm": 0.69921875, "learning_rate": 9.00061974400511e-06, "loss": 4.0368, "step": 13590 }, { "epoch": 4.5273590405596735, "grad_norm": 0.703125, "learning_rate": 8.999826664415154e-06, "loss": 3.9913, "step": 13591 }, { "epoch": 4.527692179561922, "grad_norm": 0.734375, "learning_rate": 8.99903356735547e-06, "loss": 4.017, "step": 13592 }, { "epoch": 4.528025318564171, "grad_norm": 0.7578125, "learning_rate": 8.99824045283529e-06, "loss": 3.961, "step": 13593 }, { "epoch": 4.528358457566419, "grad_norm": 0.73046875, "learning_rate": 8.997447320863854e-06, "loss": 3.9983, "step": 13594 }, { "epoch": 4.5286915965686685, "grad_norm": 0.72265625, "learning_rate": 8.996654171450401e-06, "loss": 3.9545, "step": 13595 }, { "epoch": 4.529024735570917, "grad_norm": 0.7890625, "learning_rate": 8.99586100460417e-06, "loss": 3.9842, "step": 13596 }, { "epoch": 4.529357874573166, "grad_norm": 0.765625, "learning_rate": 8.995067820334394e-06, "loss": 4.0788, "step": 13597 }, { "epoch": 4.529691013575414, "grad_norm": 0.72265625, "learning_rate": 8.994274618650318e-06, "loss": 4.0011, "step": 13598 }, { "epoch": 4.530024152577663, "grad_norm": 0.69921875, "learning_rate": 8.993481399561182e-06, "loss": 3.993, "step": 13599 }, { "epoch": 4.530357291579912, "grad_norm": 0.71484375, "learning_rate": 8.99268816307622e-06, "loss": 3.924, "step": 13600 }, { "epoch": 4.53069043058216, "grad_norm": 0.7421875, "learning_rate": 8.991894909204674e-06, "loss": 3.9822, "step": 13601 }, { "epoch": 4.531023569584409, "grad_norm": 0.76953125, "learning_rate": 8.991101637955782e-06, "loss": 4.0738, "step": 13602 }, { "epoch": 4.5313567085866575, "grad_norm": 0.7421875, "learning_rate": 8.99030834933879e-06, "loss": 4.0219, "step": 13603 }, { "epoch": 4.531689847588907, "grad_norm": 0.76171875, "learning_rate": 8.989515043362932e-06, "loss": 3.9952, "step": 13604 }, { "epoch": 4.532022986591155, "grad_norm": 0.71484375, "learning_rate": 8.98872172003745e-06, "loss": 4.0699, "step": 13605 }, { "epoch": 4.532356125593404, "grad_norm": 0.7421875, "learning_rate": 8.987928379371584e-06, "loss": 4.0133, "step": 13606 }, { "epoch": 4.5326892645956525, "grad_norm": 0.7421875, "learning_rate": 8.987135021374576e-06, "loss": 4.0512, "step": 13607 }, { "epoch": 4.533022403597901, "grad_norm": 0.76953125, "learning_rate": 8.986341646055666e-06, "loss": 3.9134, "step": 13608 }, { "epoch": 4.53335554260015, "grad_norm": 0.69921875, "learning_rate": 8.985548253424093e-06, "loss": 3.9908, "step": 13609 }, { "epoch": 4.533688681602398, "grad_norm": 0.75390625, "learning_rate": 8.984754843489104e-06, "loss": 3.9563, "step": 13610 }, { "epoch": 4.534021820604647, "grad_norm": 0.7109375, "learning_rate": 8.983961416259933e-06, "loss": 3.9966, "step": 13611 }, { "epoch": 4.534354959606896, "grad_norm": 0.73046875, "learning_rate": 8.983167971745832e-06, "loss": 4.0541, "step": 13612 }, { "epoch": 4.534688098609145, "grad_norm": 0.7265625, "learning_rate": 8.982374509956031e-06, "loss": 4.0388, "step": 13613 }, { "epoch": 4.535021237611393, "grad_norm": 0.734375, "learning_rate": 8.98158103089978e-06, "loss": 4.0023, "step": 13614 }, { "epoch": 4.535354376613642, "grad_norm": 0.7421875, "learning_rate": 8.980787534586318e-06, "loss": 3.9785, "step": 13615 }, { "epoch": 4.535687515615891, "grad_norm": 0.7421875, "learning_rate": 8.979994021024888e-06, "loss": 4.0239, "step": 13616 }, { "epoch": 4.53602065461814, "grad_norm": 0.7109375, "learning_rate": 8.979200490224737e-06, "loss": 3.9888, "step": 13617 }, { "epoch": 4.536353793620388, "grad_norm": 0.72265625, "learning_rate": 8.978406942195102e-06, "loss": 4.0052, "step": 13618 }, { "epoch": 4.5366869326226364, "grad_norm": 0.75, "learning_rate": 8.977613376945226e-06, "loss": 4.0384, "step": 13619 }, { "epoch": 4.537020071624886, "grad_norm": 0.77734375, "learning_rate": 8.976819794484357e-06, "loss": 3.9469, "step": 13620 }, { "epoch": 4.537353210627134, "grad_norm": 0.74609375, "learning_rate": 8.976026194821738e-06, "loss": 4.0926, "step": 13621 }, { "epoch": 4.537686349629383, "grad_norm": 0.75, "learning_rate": 8.975232577966607e-06, "loss": 3.932, "step": 13622 }, { "epoch": 4.538019488631631, "grad_norm": 0.75, "learning_rate": 8.974438943928215e-06, "loss": 3.988, "step": 13623 }, { "epoch": 4.538352627633881, "grad_norm": 0.76171875, "learning_rate": 8.973645292715799e-06, "loss": 4.0024, "step": 13624 }, { "epoch": 4.538685766636129, "grad_norm": 0.71484375, "learning_rate": 8.972851624338612e-06, "loss": 4.0306, "step": 13625 }, { "epoch": 4.539018905638377, "grad_norm": 0.76171875, "learning_rate": 8.972057938805893e-06, "loss": 3.9753, "step": 13626 }, { "epoch": 4.539352044640626, "grad_norm": 0.70703125, "learning_rate": 8.971264236126885e-06, "loss": 4.007, "step": 13627 }, { "epoch": 4.539685183642875, "grad_norm": 0.73828125, "learning_rate": 8.970470516310837e-06, "loss": 3.9303, "step": 13628 }, { "epoch": 4.540018322645124, "grad_norm": 0.734375, "learning_rate": 8.969676779366991e-06, "loss": 4.0139, "step": 13629 }, { "epoch": 4.540351461647372, "grad_norm": 0.7421875, "learning_rate": 8.968883025304596e-06, "loss": 3.9946, "step": 13630 }, { "epoch": 4.540684600649621, "grad_norm": 0.76171875, "learning_rate": 8.968089254132893e-06, "loss": 4.0407, "step": 13631 }, { "epoch": 4.54101773965187, "grad_norm": 0.734375, "learning_rate": 8.967295465861132e-06, "loss": 3.9701, "step": 13632 }, { "epoch": 4.541350878654119, "grad_norm": 0.69921875, "learning_rate": 8.966501660498556e-06, "loss": 4.0514, "step": 13633 }, { "epoch": 4.541684017656367, "grad_norm": 0.6953125, "learning_rate": 8.965707838054412e-06, "loss": 3.9857, "step": 13634 }, { "epoch": 4.542017156658616, "grad_norm": 0.75, "learning_rate": 8.964913998537948e-06, "loss": 3.9801, "step": 13635 }, { "epoch": 4.542350295660865, "grad_norm": 0.734375, "learning_rate": 8.96412014195841e-06, "loss": 4.0497, "step": 13636 }, { "epoch": 4.542683434663113, "grad_norm": 0.7578125, "learning_rate": 8.963326268325039e-06, "loss": 3.9503, "step": 13637 }, { "epoch": 4.543016573665362, "grad_norm": 0.77734375, "learning_rate": 8.962532377647088e-06, "loss": 4.0252, "step": 13638 }, { "epoch": 4.54334971266761, "grad_norm": 0.765625, "learning_rate": 8.961738469933806e-06, "loss": 4.0017, "step": 13639 }, { "epoch": 4.5436828516698595, "grad_norm": 0.73828125, "learning_rate": 8.960944545194435e-06, "loss": 4.0584, "step": 13640 }, { "epoch": 4.544015990672108, "grad_norm": 0.77734375, "learning_rate": 8.960150603438223e-06, "loss": 4.0423, "step": 13641 }, { "epoch": 4.544349129674357, "grad_norm": 0.734375, "learning_rate": 8.959356644674419e-06, "loss": 3.9668, "step": 13642 }, { "epoch": 4.544682268676605, "grad_norm": 0.70703125, "learning_rate": 8.958562668912273e-06, "loss": 3.9782, "step": 13643 }, { "epoch": 4.545015407678854, "grad_norm": 0.7265625, "learning_rate": 8.957768676161028e-06, "loss": 4.0125, "step": 13644 }, { "epoch": 4.545348546681103, "grad_norm": 0.73046875, "learning_rate": 8.956974666429939e-06, "loss": 4.0326, "step": 13645 }, { "epoch": 4.545681685683351, "grad_norm": 0.71484375, "learning_rate": 8.95618063972825e-06, "loss": 3.9643, "step": 13646 }, { "epoch": 4.5460148246856, "grad_norm": 0.7421875, "learning_rate": 8.95538659606521e-06, "loss": 3.993, "step": 13647 }, { "epoch": 4.5463479636878485, "grad_norm": 0.7421875, "learning_rate": 8.954592535450069e-06, "loss": 3.9449, "step": 13648 }, { "epoch": 4.546681102690098, "grad_norm": 0.703125, "learning_rate": 8.953798457892078e-06, "loss": 4.0252, "step": 13649 }, { "epoch": 4.547014241692346, "grad_norm": 0.71875, "learning_rate": 8.95300436340048e-06, "loss": 3.9951, "step": 13650 }, { "epoch": 4.547347380694594, "grad_norm": 0.7421875, "learning_rate": 8.952210251984529e-06, "loss": 4.0151, "step": 13651 }, { "epoch": 4.5476805196968435, "grad_norm": 0.76953125, "learning_rate": 8.951416123653474e-06, "loss": 4.0742, "step": 13652 }, { "epoch": 4.548013658699093, "grad_norm": 0.72265625, "learning_rate": 8.950621978416566e-06, "loss": 4.0101, "step": 13653 }, { "epoch": 4.548346797701341, "grad_norm": 0.75390625, "learning_rate": 8.949827816283055e-06, "loss": 4.0472, "step": 13654 }, { "epoch": 4.548679936703589, "grad_norm": 0.72265625, "learning_rate": 8.949033637262187e-06, "loss": 3.964, "step": 13655 }, { "epoch": 4.5490130757058385, "grad_norm": 0.73828125, "learning_rate": 8.948239441363219e-06, "loss": 3.9661, "step": 13656 }, { "epoch": 4.549346214708087, "grad_norm": 0.7421875, "learning_rate": 8.947445228595397e-06, "loss": 4.0484, "step": 13657 }, { "epoch": 4.549679353710336, "grad_norm": 0.73828125, "learning_rate": 8.946650998967973e-06, "loss": 4.0005, "step": 13658 }, { "epoch": 4.550012492712584, "grad_norm": 0.69140625, "learning_rate": 8.945856752490199e-06, "loss": 3.9445, "step": 13659 }, { "epoch": 4.550345631714833, "grad_norm": 0.75, "learning_rate": 8.945062489171325e-06, "loss": 3.9903, "step": 13660 }, { "epoch": 4.550678770717082, "grad_norm": 0.76171875, "learning_rate": 8.944268209020602e-06, "loss": 3.9957, "step": 13661 }, { "epoch": 4.55101190971933, "grad_norm": 0.75390625, "learning_rate": 8.943473912047286e-06, "loss": 4.0022, "step": 13662 }, { "epoch": 4.551345048721579, "grad_norm": 0.703125, "learning_rate": 8.942679598260622e-06, "loss": 3.9739, "step": 13663 }, { "epoch": 4.5516781877238275, "grad_norm": 0.7265625, "learning_rate": 8.941885267669867e-06, "loss": 3.9664, "step": 13664 }, { "epoch": 4.552011326726077, "grad_norm": 0.76953125, "learning_rate": 8.941090920284271e-06, "loss": 3.9053, "step": 13665 }, { "epoch": 4.552344465728325, "grad_norm": 0.7265625, "learning_rate": 8.940296556113089e-06, "loss": 3.925, "step": 13666 }, { "epoch": 4.552677604730574, "grad_norm": 0.734375, "learning_rate": 8.93950217516557e-06, "loss": 4.0278, "step": 13667 }, { "epoch": 4.553010743732822, "grad_norm": 0.73828125, "learning_rate": 8.938707777450968e-06, "loss": 3.9466, "step": 13668 }, { "epoch": 4.553343882735071, "grad_norm": 0.734375, "learning_rate": 8.937913362978538e-06, "loss": 3.9094, "step": 13669 }, { "epoch": 4.55367702173732, "grad_norm": 0.6640625, "learning_rate": 8.93711893175753e-06, "loss": 4.0499, "step": 13670 }, { "epoch": 4.554010160739568, "grad_norm": 0.7265625, "learning_rate": 8.936324483797203e-06, "loss": 3.99, "step": 13671 }, { "epoch": 4.554343299741817, "grad_norm": 0.7421875, "learning_rate": 8.935530019106803e-06, "loss": 3.9895, "step": 13672 }, { "epoch": 4.554676438744066, "grad_norm": 0.7578125, "learning_rate": 8.934735537695592e-06, "loss": 4.0938, "step": 13673 }, { "epoch": 4.555009577746315, "grad_norm": 0.74609375, "learning_rate": 8.933941039572815e-06, "loss": 4.0607, "step": 13674 }, { "epoch": 4.555342716748563, "grad_norm": 0.7421875, "learning_rate": 8.933146524747734e-06, "loss": 3.9306, "step": 13675 }, { "epoch": 4.555675855750812, "grad_norm": 0.7265625, "learning_rate": 8.9323519932296e-06, "loss": 3.9679, "step": 13676 }, { "epoch": 4.556008994753061, "grad_norm": 0.703125, "learning_rate": 8.931557445027663e-06, "loss": 4.0025, "step": 13677 }, { "epoch": 4.55634213375531, "grad_norm": 0.6953125, "learning_rate": 8.930762880151189e-06, "loss": 3.9315, "step": 13678 }, { "epoch": 4.556675272757558, "grad_norm": 0.75, "learning_rate": 8.929968298609422e-06, "loss": 3.9578, "step": 13679 }, { "epoch": 4.557008411759806, "grad_norm": 0.73828125, "learning_rate": 8.929173700411626e-06, "loss": 4.0126, "step": 13680 }, { "epoch": 4.557341550762056, "grad_norm": 0.765625, "learning_rate": 8.928379085567046e-06, "loss": 4.0301, "step": 13681 }, { "epoch": 4.557674689764304, "grad_norm": 0.74609375, "learning_rate": 8.927584454084948e-06, "loss": 4.0698, "step": 13682 }, { "epoch": 4.558007828766553, "grad_norm": 0.7734375, "learning_rate": 8.926789805974581e-06, "loss": 3.9568, "step": 13683 }, { "epoch": 4.558340967768801, "grad_norm": 0.71875, "learning_rate": 8.925995141245206e-06, "loss": 4.0134, "step": 13684 }, { "epoch": 4.558674106771051, "grad_norm": 0.69921875, "learning_rate": 8.925200459906075e-06, "loss": 3.9993, "step": 13685 }, { "epoch": 4.559007245773299, "grad_norm": 0.734375, "learning_rate": 8.924405761966446e-06, "loss": 4.0287, "step": 13686 }, { "epoch": 4.559340384775547, "grad_norm": 0.69921875, "learning_rate": 8.923611047435572e-06, "loss": 4.0027, "step": 13687 }, { "epoch": 4.559673523777796, "grad_norm": 0.7578125, "learning_rate": 8.922816316322715e-06, "loss": 3.9298, "step": 13688 }, { "epoch": 4.560006662780045, "grad_norm": 0.7109375, "learning_rate": 8.922021568637132e-06, "loss": 3.997, "step": 13689 }, { "epoch": 4.560339801782294, "grad_norm": 0.703125, "learning_rate": 8.921226804388074e-06, "loss": 3.9383, "step": 13690 }, { "epoch": 4.560672940784542, "grad_norm": 0.765625, "learning_rate": 8.920432023584803e-06, "loss": 3.9845, "step": 13691 }, { "epoch": 4.561006079786791, "grad_norm": 0.7421875, "learning_rate": 8.919637226236576e-06, "loss": 4.0463, "step": 13692 }, { "epoch": 4.56133921878904, "grad_norm": 0.734375, "learning_rate": 8.918842412352652e-06, "loss": 3.9786, "step": 13693 }, { "epoch": 4.561672357791289, "grad_norm": 0.74609375, "learning_rate": 8.918047581942286e-06, "loss": 3.9685, "step": 13694 }, { "epoch": 4.562005496793537, "grad_norm": 0.70703125, "learning_rate": 8.917252735014738e-06, "loss": 3.9804, "step": 13695 }, { "epoch": 4.562338635795786, "grad_norm": 0.73828125, "learning_rate": 8.916457871579262e-06, "loss": 4.0131, "step": 13696 }, { "epoch": 4.5626717747980345, "grad_norm": 0.6875, "learning_rate": 8.915662991645122e-06, "loss": 4.0094, "step": 13697 }, { "epoch": 4.563004913800283, "grad_norm": 0.77734375, "learning_rate": 8.914868095221574e-06, "loss": 3.9699, "step": 13698 }, { "epoch": 4.563338052802532, "grad_norm": 0.73828125, "learning_rate": 8.914073182317878e-06, "loss": 3.9463, "step": 13699 }, { "epoch": 4.56367119180478, "grad_norm": 0.765625, "learning_rate": 8.913278252943292e-06, "loss": 3.9952, "step": 13700 }, { "epoch": 4.5640043308070295, "grad_norm": 0.734375, "learning_rate": 8.912483307107075e-06, "loss": 3.9456, "step": 13701 }, { "epoch": 4.564337469809278, "grad_norm": 0.78515625, "learning_rate": 8.911688344818489e-06, "loss": 3.9958, "step": 13702 }, { "epoch": 4.564670608811527, "grad_norm": 0.71875, "learning_rate": 8.91089336608679e-06, "loss": 4.0019, "step": 13703 }, { "epoch": 4.565003747813775, "grad_norm": 0.73046875, "learning_rate": 8.910098370921241e-06, "loss": 4.0134, "step": 13704 }, { "epoch": 4.565336886816024, "grad_norm": 0.70703125, "learning_rate": 8.909303359331096e-06, "loss": 3.9403, "step": 13705 }, { "epoch": 4.565670025818273, "grad_norm": 0.69921875, "learning_rate": 8.908508331325623e-06, "loss": 4.0853, "step": 13706 }, { "epoch": 4.566003164820521, "grad_norm": 0.74609375, "learning_rate": 8.907713286914078e-06, "loss": 3.9732, "step": 13707 }, { "epoch": 4.56633630382277, "grad_norm": 0.76953125, "learning_rate": 8.906918226105723e-06, "loss": 4.0009, "step": 13708 }, { "epoch": 4.5666694428250185, "grad_norm": 0.74609375, "learning_rate": 8.906123148909816e-06, "loss": 3.9693, "step": 13709 }, { "epoch": 4.567002581827268, "grad_norm": 0.7421875, "learning_rate": 8.90532805533562e-06, "loss": 4.0465, "step": 13710 }, { "epoch": 4.567335720829516, "grad_norm": 0.7578125, "learning_rate": 8.904532945392399e-06, "loss": 4.024, "step": 13711 }, { "epoch": 4.567668859831765, "grad_norm": 0.7421875, "learning_rate": 8.903737819089409e-06, "loss": 4.0102, "step": 13712 }, { "epoch": 4.5680019988340135, "grad_norm": 0.71484375, "learning_rate": 8.902942676435913e-06, "loss": 3.9584, "step": 13713 }, { "epoch": 4.568335137836263, "grad_norm": 0.7421875, "learning_rate": 8.902147517441175e-06, "loss": 4.0248, "step": 13714 }, { "epoch": 4.568668276838511, "grad_norm": 0.76953125, "learning_rate": 8.901352342114455e-06, "loss": 4.0016, "step": 13715 }, { "epoch": 4.569001415840759, "grad_norm": 0.7109375, "learning_rate": 8.900557150465018e-06, "loss": 3.9834, "step": 13716 }, { "epoch": 4.569334554843008, "grad_norm": 0.71875, "learning_rate": 8.899761942502121e-06, "loss": 4.0348, "step": 13717 }, { "epoch": 4.569667693845257, "grad_norm": 0.77734375, "learning_rate": 8.898966718235031e-06, "loss": 3.9073, "step": 13718 }, { "epoch": 4.570000832847506, "grad_norm": 0.70703125, "learning_rate": 8.898171477673007e-06, "loss": 4.0075, "step": 13719 }, { "epoch": 4.570333971849754, "grad_norm": 0.7890625, "learning_rate": 8.897376220825316e-06, "loss": 3.9362, "step": 13720 }, { "epoch": 4.570667110852003, "grad_norm": 0.69921875, "learning_rate": 8.896580947701215e-06, "loss": 4.0449, "step": 13721 }, { "epoch": 4.571000249854252, "grad_norm": 0.734375, "learning_rate": 8.895785658309974e-06, "loss": 3.941, "step": 13722 }, { "epoch": 4.5713333888565, "grad_norm": 0.75390625, "learning_rate": 8.894990352660853e-06, "loss": 4.0082, "step": 13723 }, { "epoch": 4.571666527858749, "grad_norm": 0.7109375, "learning_rate": 8.894195030763115e-06, "loss": 3.9776, "step": 13724 }, { "epoch": 4.5719996668609975, "grad_norm": 0.7265625, "learning_rate": 8.893399692626026e-06, "loss": 4.01, "step": 13725 }, { "epoch": 4.572332805863247, "grad_norm": 0.7265625, "learning_rate": 8.89260433825885e-06, "loss": 4.0273, "step": 13726 }, { "epoch": 4.572665944865495, "grad_norm": 0.74609375, "learning_rate": 8.891808967670846e-06, "loss": 3.9871, "step": 13727 }, { "epoch": 4.572999083867744, "grad_norm": 0.7578125, "learning_rate": 8.891013580871284e-06, "loss": 4.0603, "step": 13728 }, { "epoch": 4.573332222869992, "grad_norm": 0.71875, "learning_rate": 8.890218177869429e-06, "loss": 3.9795, "step": 13729 }, { "epoch": 4.573665361872241, "grad_norm": 0.734375, "learning_rate": 8.889422758674541e-06, "loss": 4.0622, "step": 13730 }, { "epoch": 4.57399850087449, "grad_norm": 0.73046875, "learning_rate": 8.888627323295885e-06, "loss": 4.0313, "step": 13731 }, { "epoch": 4.574331639876738, "grad_norm": 0.73828125, "learning_rate": 8.887831871742733e-06, "loss": 3.9607, "step": 13732 }, { "epoch": 4.574664778878987, "grad_norm": 0.71484375, "learning_rate": 8.887036404024345e-06, "loss": 3.9251, "step": 13733 }, { "epoch": 4.574997917881236, "grad_norm": 0.7265625, "learning_rate": 8.886240920149984e-06, "loss": 4.0758, "step": 13734 }, { "epoch": 4.575331056883485, "grad_norm": 0.7265625, "learning_rate": 8.88544542012892e-06, "loss": 4.0545, "step": 13735 }, { "epoch": 4.575664195885733, "grad_norm": 0.6953125, "learning_rate": 8.884649903970418e-06, "loss": 3.9845, "step": 13736 }, { "epoch": 4.575997334887982, "grad_norm": 0.7734375, "learning_rate": 8.883854371683745e-06, "loss": 3.9393, "step": 13737 }, { "epoch": 4.576330473890231, "grad_norm": 0.734375, "learning_rate": 8.883058823278166e-06, "loss": 4.0894, "step": 13738 }, { "epoch": 4.57666361289248, "grad_norm": 0.7265625, "learning_rate": 8.882263258762946e-06, "loss": 4.0182, "step": 13739 }, { "epoch": 4.576996751894728, "grad_norm": 0.76171875, "learning_rate": 8.881467678147354e-06, "loss": 3.9999, "step": 13740 }, { "epoch": 4.577329890896976, "grad_norm": 0.76953125, "learning_rate": 8.880672081440656e-06, "loss": 3.9707, "step": 13741 }, { "epoch": 4.577663029899226, "grad_norm": 0.7109375, "learning_rate": 8.879876468652119e-06, "loss": 4.0069, "step": 13742 }, { "epoch": 4.577996168901474, "grad_norm": 0.69921875, "learning_rate": 8.879080839791008e-06, "loss": 3.9931, "step": 13743 }, { "epoch": 4.578329307903723, "grad_norm": 0.7421875, "learning_rate": 8.878285194866594e-06, "loss": 3.9656, "step": 13744 }, { "epoch": 4.578662446905971, "grad_norm": 0.71484375, "learning_rate": 8.87748953388814e-06, "loss": 4.0306, "step": 13745 }, { "epoch": 4.5789955859082205, "grad_norm": 0.72265625, "learning_rate": 8.876693856864921e-06, "loss": 3.968, "step": 13746 }, { "epoch": 4.579328724910469, "grad_norm": 0.70703125, "learning_rate": 8.8758981638062e-06, "loss": 4.076, "step": 13747 }, { "epoch": 4.579661863912717, "grad_norm": 0.69140625, "learning_rate": 8.875102454721243e-06, "loss": 4.0285, "step": 13748 }, { "epoch": 4.579995002914966, "grad_norm": 0.734375, "learning_rate": 8.874306729619321e-06, "loss": 4.008, "step": 13749 }, { "epoch": 4.580328141917215, "grad_norm": 0.6875, "learning_rate": 8.873510988509704e-06, "loss": 4.0885, "step": 13750 }, { "epoch": 4.580661280919464, "grad_norm": 0.73828125, "learning_rate": 8.87271523140166e-06, "loss": 4.041, "step": 13751 }, { "epoch": 4.580994419921712, "grad_norm": 0.69140625, "learning_rate": 8.871919458304455e-06, "loss": 4.0537, "step": 13752 }, { "epoch": 4.581327558923961, "grad_norm": 0.734375, "learning_rate": 8.871123669227357e-06, "loss": 4.0187, "step": 13753 }, { "epoch": 4.58166069792621, "grad_norm": 0.7265625, "learning_rate": 8.870327864179643e-06, "loss": 3.9216, "step": 13754 }, { "epoch": 4.581993836928459, "grad_norm": 0.70703125, "learning_rate": 8.869532043170575e-06, "loss": 4.0435, "step": 13755 }, { "epoch": 4.582326975930707, "grad_norm": 0.74609375, "learning_rate": 8.868736206209426e-06, "loss": 3.9962, "step": 13756 }, { "epoch": 4.582660114932956, "grad_norm": 0.73828125, "learning_rate": 8.867940353305465e-06, "loss": 3.9381, "step": 13757 }, { "epoch": 4.5829932539352045, "grad_norm": 0.73046875, "learning_rate": 8.867144484467959e-06, "loss": 3.9659, "step": 13758 }, { "epoch": 4.583326392937453, "grad_norm": 0.734375, "learning_rate": 8.866348599706184e-06, "loss": 3.951, "step": 13759 }, { "epoch": 4.583659531939702, "grad_norm": 0.703125, "learning_rate": 8.865552699029409e-06, "loss": 4.0653, "step": 13760 }, { "epoch": 4.58399267094195, "grad_norm": 0.703125, "learning_rate": 8.8647567824469e-06, "loss": 4.0065, "step": 13761 }, { "epoch": 4.5843258099441995, "grad_norm": 0.73828125, "learning_rate": 8.86396084996793e-06, "loss": 3.989, "step": 13762 }, { "epoch": 4.584658948946448, "grad_norm": 0.75390625, "learning_rate": 8.863164901601773e-06, "loss": 3.9513, "step": 13763 }, { "epoch": 4.584992087948697, "grad_norm": 0.6796875, "learning_rate": 8.862368937357694e-06, "loss": 4.0277, "step": 13764 }, { "epoch": 4.585325226950945, "grad_norm": 0.73046875, "learning_rate": 8.861572957244971e-06, "loss": 3.9863, "step": 13765 }, { "epoch": 4.5856583659531935, "grad_norm": 0.7265625, "learning_rate": 8.860776961272871e-06, "loss": 4.042, "step": 13766 }, { "epoch": 4.585991504955443, "grad_norm": 0.7265625, "learning_rate": 8.859980949450666e-06, "loss": 3.9296, "step": 13767 }, { "epoch": 4.586324643957691, "grad_norm": 0.67578125, "learning_rate": 8.85918492178763e-06, "loss": 4.0179, "step": 13768 }, { "epoch": 4.58665778295994, "grad_norm": 0.73046875, "learning_rate": 8.858388878293032e-06, "loss": 4.0036, "step": 13769 }, { "epoch": 4.5869909219621885, "grad_norm": 0.734375, "learning_rate": 8.85759281897615e-06, "loss": 4.0188, "step": 13770 }, { "epoch": 4.587324060964438, "grad_norm": 0.734375, "learning_rate": 8.856796743846247e-06, "loss": 3.9552, "step": 13771 }, { "epoch": 4.587657199966686, "grad_norm": 0.703125, "learning_rate": 8.856000652912602e-06, "loss": 4.0039, "step": 13772 }, { "epoch": 4.587990338968935, "grad_norm": 0.7421875, "learning_rate": 8.855204546184489e-06, "loss": 4.0019, "step": 13773 }, { "epoch": 4.5883234779711835, "grad_norm": 0.71875, "learning_rate": 8.854408423671177e-06, "loss": 4.044, "step": 13774 }, { "epoch": 4.588656616973433, "grad_norm": 0.71875, "learning_rate": 8.85361228538194e-06, "loss": 3.993, "step": 13775 }, { "epoch": 4.588989755975681, "grad_norm": 0.7265625, "learning_rate": 8.852816131326053e-06, "loss": 3.9798, "step": 13776 }, { "epoch": 4.589322894977929, "grad_norm": 0.734375, "learning_rate": 8.852019961512785e-06, "loss": 3.9261, "step": 13777 }, { "epoch": 4.589656033980178, "grad_norm": 0.75390625, "learning_rate": 8.851223775951417e-06, "loss": 4.0089, "step": 13778 }, { "epoch": 4.589989172982427, "grad_norm": 0.73046875, "learning_rate": 8.850427574651219e-06, "loss": 3.8864, "step": 13779 }, { "epoch": 4.590322311984676, "grad_norm": 0.70703125, "learning_rate": 8.84963135762146e-06, "loss": 4.0765, "step": 13780 }, { "epoch": 4.590655450986924, "grad_norm": 0.7421875, "learning_rate": 8.848835124871421e-06, "loss": 3.9552, "step": 13781 }, { "epoch": 4.590988589989173, "grad_norm": 0.796875, "learning_rate": 8.848038876410375e-06, "loss": 4.0066, "step": 13782 }, { "epoch": 4.591321728991422, "grad_norm": 0.73046875, "learning_rate": 8.847242612247598e-06, "loss": 3.9626, "step": 13783 }, { "epoch": 4.59165486799367, "grad_norm": 0.76171875, "learning_rate": 8.84644633239236e-06, "loss": 3.9784, "step": 13784 }, { "epoch": 4.591988006995919, "grad_norm": 0.6796875, "learning_rate": 8.845650036853942e-06, "loss": 3.9233, "step": 13785 }, { "epoch": 4.592321145998167, "grad_norm": 0.734375, "learning_rate": 8.844853725641612e-06, "loss": 3.9036, "step": 13786 }, { "epoch": 4.592654285000417, "grad_norm": 0.77734375, "learning_rate": 8.84405739876465e-06, "loss": 3.9464, "step": 13787 }, { "epoch": 4.592987424002665, "grad_norm": 0.75, "learning_rate": 8.84326105623233e-06, "loss": 3.9845, "step": 13788 }, { "epoch": 4.593320563004914, "grad_norm": 0.74609375, "learning_rate": 8.84246469805393e-06, "loss": 4.0482, "step": 13789 }, { "epoch": 4.593653702007162, "grad_norm": 0.7734375, "learning_rate": 8.841668324238723e-06, "loss": 3.9897, "step": 13790 }, { "epoch": 4.593986841009411, "grad_norm": 0.71484375, "learning_rate": 8.840871934795984e-06, "loss": 4.008, "step": 13791 }, { "epoch": 4.59431998001166, "grad_norm": 0.73828125, "learning_rate": 8.840075529734995e-06, "loss": 3.9621, "step": 13792 }, { "epoch": 4.594653119013909, "grad_norm": 0.72265625, "learning_rate": 8.839279109065026e-06, "loss": 3.9986, "step": 13793 }, { "epoch": 4.594986258016157, "grad_norm": 0.765625, "learning_rate": 8.838482672795359e-06, "loss": 4.0095, "step": 13794 }, { "epoch": 4.595319397018406, "grad_norm": 0.75, "learning_rate": 8.837686220935265e-06, "loss": 4.031, "step": 13795 }, { "epoch": 4.595652536020655, "grad_norm": 0.734375, "learning_rate": 8.836889753494026e-06, "loss": 3.9407, "step": 13796 }, { "epoch": 4.595985675022903, "grad_norm": 0.69140625, "learning_rate": 8.836093270480916e-06, "loss": 3.985, "step": 13797 }, { "epoch": 4.596318814025152, "grad_norm": 0.73828125, "learning_rate": 8.835296771905213e-06, "loss": 4.0376, "step": 13798 }, { "epoch": 4.596651953027401, "grad_norm": 0.7734375, "learning_rate": 8.834500257776195e-06, "loss": 4.0256, "step": 13799 }, { "epoch": 4.59698509202965, "grad_norm": 0.74609375, "learning_rate": 8.833703728103141e-06, "loss": 3.9604, "step": 13800 }, { "epoch": 4.597318231031898, "grad_norm": 0.74609375, "learning_rate": 8.832907182895327e-06, "loss": 3.9655, "step": 13801 }, { "epoch": 4.597651370034146, "grad_norm": 0.7265625, "learning_rate": 8.832110622162031e-06, "loss": 3.9922, "step": 13802 }, { "epoch": 4.5979845090363956, "grad_norm": 0.70703125, "learning_rate": 8.831314045912531e-06, "loss": 4.0555, "step": 13803 }, { "epoch": 4.598317648038644, "grad_norm": 0.7265625, "learning_rate": 8.830517454156106e-06, "loss": 3.9551, "step": 13804 }, { "epoch": 4.598650787040893, "grad_norm": 0.7890625, "learning_rate": 8.829720846902036e-06, "loss": 3.9929, "step": 13805 }, { "epoch": 4.598983926043141, "grad_norm": 0.75390625, "learning_rate": 8.828924224159595e-06, "loss": 3.9817, "step": 13806 }, { "epoch": 4.5993170650453905, "grad_norm": 0.7421875, "learning_rate": 8.82812758593807e-06, "loss": 3.9688, "step": 13807 }, { "epoch": 4.599650204047639, "grad_norm": 0.77734375, "learning_rate": 8.827330932246732e-06, "loss": 4.0396, "step": 13808 }, { "epoch": 4.599983343049887, "grad_norm": 0.73046875, "learning_rate": 8.826534263094863e-06, "loss": 4.0326, "step": 13809 }, { "epoch": 4.600316482052136, "grad_norm": 0.7265625, "learning_rate": 8.825737578491746e-06, "loss": 4.1099, "step": 13810 }, { "epoch": 4.600649621054385, "grad_norm": 0.73046875, "learning_rate": 8.824940878446657e-06, "loss": 4.0531, "step": 13811 }, { "epoch": 4.600982760056634, "grad_norm": 0.73046875, "learning_rate": 8.824144162968876e-06, "loss": 4.0118, "step": 13812 }, { "epoch": 4.601315899058882, "grad_norm": 0.69921875, "learning_rate": 8.823347432067682e-06, "loss": 3.9816, "step": 13813 }, { "epoch": 4.601649038061131, "grad_norm": 0.73828125, "learning_rate": 8.822550685752359e-06, "loss": 3.948, "step": 13814 }, { "epoch": 4.6019821770633795, "grad_norm": 0.6796875, "learning_rate": 8.821753924032183e-06, "loss": 3.9956, "step": 13815 }, { "epoch": 4.602315316065629, "grad_norm": 0.75390625, "learning_rate": 8.82095714691644e-06, "loss": 4.048, "step": 13816 }, { "epoch": 4.602648455067877, "grad_norm": 0.7265625, "learning_rate": 8.820160354414406e-06, "loss": 4.0158, "step": 13817 }, { "epoch": 4.602981594070126, "grad_norm": 0.72265625, "learning_rate": 8.819363546535362e-06, "loss": 3.9887, "step": 13818 }, { "epoch": 4.6033147330723745, "grad_norm": 0.73046875, "learning_rate": 8.818566723288592e-06, "loss": 4.022, "step": 13819 }, { "epoch": 4.603647872074623, "grad_norm": 0.78125, "learning_rate": 8.817769884683374e-06, "loss": 4.0124, "step": 13820 }, { "epoch": 4.603981011076872, "grad_norm": 0.7109375, "learning_rate": 8.816973030728992e-06, "loss": 4.0018, "step": 13821 }, { "epoch": 4.60431415007912, "grad_norm": 0.69140625, "learning_rate": 8.816176161434728e-06, "loss": 4.0816, "step": 13822 }, { "epoch": 4.604647289081369, "grad_norm": 0.75, "learning_rate": 8.815379276809862e-06, "loss": 4.0094, "step": 13823 }, { "epoch": 4.604980428083618, "grad_norm": 0.8046875, "learning_rate": 8.814582376863674e-06, "loss": 3.9202, "step": 13824 }, { "epoch": 4.605313567085867, "grad_norm": 0.734375, "learning_rate": 8.813785461605452e-06, "loss": 4.0072, "step": 13825 }, { "epoch": 4.605646706088115, "grad_norm": 0.734375, "learning_rate": 8.812988531044471e-06, "loss": 3.984, "step": 13826 }, { "epoch": 4.6059798450903635, "grad_norm": 0.75390625, "learning_rate": 8.812191585190022e-06, "loss": 4.0002, "step": 13827 }, { "epoch": 4.606312984092613, "grad_norm": 0.77734375, "learning_rate": 8.811394624051382e-06, "loss": 4.011, "step": 13828 }, { "epoch": 4.606646123094861, "grad_norm": 0.7578125, "learning_rate": 8.810597647637834e-06, "loss": 4.0308, "step": 13829 }, { "epoch": 4.60697926209711, "grad_norm": 0.765625, "learning_rate": 8.809800655958662e-06, "loss": 3.937, "step": 13830 }, { "epoch": 4.6073124010993585, "grad_norm": 0.71875, "learning_rate": 8.809003649023151e-06, "loss": 4.0292, "step": 13831 }, { "epoch": 4.607645540101608, "grad_norm": 0.7265625, "learning_rate": 8.808206626840584e-06, "loss": 4.0201, "step": 13832 }, { "epoch": 4.607978679103856, "grad_norm": 0.76171875, "learning_rate": 8.807409589420239e-06, "loss": 3.9748, "step": 13833 }, { "epoch": 4.608311818106105, "grad_norm": 0.703125, "learning_rate": 8.806612536771407e-06, "loss": 4.0152, "step": 13834 }, { "epoch": 4.608644957108353, "grad_norm": 0.74609375, "learning_rate": 8.805815468903366e-06, "loss": 4.0232, "step": 13835 }, { "epoch": 4.608978096110603, "grad_norm": 0.7421875, "learning_rate": 8.805018385825407e-06, "loss": 3.9624, "step": 13836 }, { "epoch": 4.609311235112851, "grad_norm": 0.73046875, "learning_rate": 8.80422128754681e-06, "loss": 3.9659, "step": 13837 }, { "epoch": 4.609644374115099, "grad_norm": 0.7421875, "learning_rate": 8.80342417407686e-06, "loss": 3.9767, "step": 13838 }, { "epoch": 4.609977513117348, "grad_norm": 0.75, "learning_rate": 8.802627045424839e-06, "loss": 4.0173, "step": 13839 }, { "epoch": 4.610310652119597, "grad_norm": 0.73828125, "learning_rate": 8.801829901600037e-06, "loss": 4.0435, "step": 13840 }, { "epoch": 4.610643791121846, "grad_norm": 0.734375, "learning_rate": 8.801032742611735e-06, "loss": 3.9294, "step": 13841 }, { "epoch": 4.610976930124094, "grad_norm": 0.796875, "learning_rate": 8.800235568469222e-06, "loss": 3.91, "step": 13842 }, { "epoch": 4.611310069126343, "grad_norm": 0.71484375, "learning_rate": 8.799438379181778e-06, "loss": 4.0488, "step": 13843 }, { "epoch": 4.611643208128592, "grad_norm": 0.70703125, "learning_rate": 8.798641174758692e-06, "loss": 3.9456, "step": 13844 }, { "epoch": 4.61197634713084, "grad_norm": 0.7578125, "learning_rate": 8.797843955209248e-06, "loss": 3.9412, "step": 13845 }, { "epoch": 4.612309486133089, "grad_norm": 0.734375, "learning_rate": 8.797046720542735e-06, "loss": 4.0218, "step": 13846 }, { "epoch": 4.612642625135337, "grad_norm": 0.75, "learning_rate": 8.796249470768437e-06, "loss": 3.9907, "step": 13847 }, { "epoch": 4.612975764137587, "grad_norm": 0.7265625, "learning_rate": 8.79545220589564e-06, "loss": 4.0488, "step": 13848 }, { "epoch": 4.613308903139835, "grad_norm": 0.71484375, "learning_rate": 8.79465492593363e-06, "loss": 4.0334, "step": 13849 }, { "epoch": 4.613642042142084, "grad_norm": 0.73046875, "learning_rate": 8.793857630891697e-06, "loss": 3.9304, "step": 13850 }, { "epoch": 4.613975181144332, "grad_norm": 0.75390625, "learning_rate": 8.793060320779122e-06, "loss": 3.9829, "step": 13851 }, { "epoch": 4.6143083201465815, "grad_norm": 0.7265625, "learning_rate": 8.792262995605196e-06, "loss": 3.9907, "step": 13852 }, { "epoch": 4.61464145914883, "grad_norm": 0.703125, "learning_rate": 8.791465655379207e-06, "loss": 3.9891, "step": 13853 }, { "epoch": 4.614974598151079, "grad_norm": 0.73828125, "learning_rate": 8.790668300110437e-06, "loss": 3.9777, "step": 13854 }, { "epoch": 4.615307737153327, "grad_norm": 0.7421875, "learning_rate": 8.78987092980818e-06, "loss": 4.0505, "step": 13855 }, { "epoch": 4.615640876155576, "grad_norm": 0.76171875, "learning_rate": 8.78907354448172e-06, "loss": 3.9318, "step": 13856 }, { "epoch": 4.615974015157825, "grad_norm": 0.7265625, "learning_rate": 8.788276144140343e-06, "loss": 4.0292, "step": 13857 }, { "epoch": 4.616307154160073, "grad_norm": 0.74609375, "learning_rate": 8.787478728793341e-06, "loss": 3.9955, "step": 13858 }, { "epoch": 4.616640293162322, "grad_norm": 0.71875, "learning_rate": 8.78668129845e-06, "loss": 3.9662, "step": 13859 }, { "epoch": 4.616973432164571, "grad_norm": 0.765625, "learning_rate": 8.78588385311961e-06, "loss": 3.9742, "step": 13860 }, { "epoch": 4.61730657116682, "grad_norm": 0.73046875, "learning_rate": 8.785086392811456e-06, "loss": 4.0853, "step": 13861 }, { "epoch": 4.617639710169068, "grad_norm": 0.76171875, "learning_rate": 8.784288917534832e-06, "loss": 3.9717, "step": 13862 }, { "epoch": 4.617972849171316, "grad_norm": 0.73828125, "learning_rate": 8.783491427299023e-06, "loss": 4.0839, "step": 13863 }, { "epoch": 4.6183059881735655, "grad_norm": 0.72265625, "learning_rate": 8.78269392211332e-06, "loss": 3.9884, "step": 13864 }, { "epoch": 4.618639127175814, "grad_norm": 0.73046875, "learning_rate": 8.781896401987007e-06, "loss": 3.936, "step": 13865 }, { "epoch": 4.618972266178063, "grad_norm": 0.7578125, "learning_rate": 8.781098866929381e-06, "loss": 3.974, "step": 13866 }, { "epoch": 4.619305405180311, "grad_norm": 0.69921875, "learning_rate": 8.780301316949726e-06, "loss": 4.0042, "step": 13867 }, { "epoch": 4.6196385441825605, "grad_norm": 0.73046875, "learning_rate": 8.779503752057336e-06, "loss": 3.9676, "step": 13868 }, { "epoch": 4.619971683184809, "grad_norm": 0.73828125, "learning_rate": 8.778706172261498e-06, "loss": 3.9644, "step": 13869 }, { "epoch": 4.620304822187057, "grad_norm": 0.71875, "learning_rate": 8.7779085775715e-06, "loss": 4.0635, "step": 13870 }, { "epoch": 4.620637961189306, "grad_norm": 0.7421875, "learning_rate": 8.777110967996638e-06, "loss": 4.0362, "step": 13871 }, { "epoch": 4.6209711001915545, "grad_norm": 0.72265625, "learning_rate": 8.776313343546199e-06, "loss": 4.0287, "step": 13872 }, { "epoch": 4.621304239193804, "grad_norm": 0.734375, "learning_rate": 8.775515704229474e-06, "loss": 4.0163, "step": 13873 }, { "epoch": 4.621637378196052, "grad_norm": 0.73046875, "learning_rate": 8.774718050055753e-06, "loss": 4.0055, "step": 13874 }, { "epoch": 4.621970517198301, "grad_norm": 0.74609375, "learning_rate": 8.77392038103433e-06, "loss": 4.0357, "step": 13875 }, { "epoch": 4.6223036562005495, "grad_norm": 0.71484375, "learning_rate": 8.77312269717449e-06, "loss": 4.0194, "step": 13876 }, { "epoch": 4.622636795202799, "grad_norm": 0.7265625, "learning_rate": 8.77232499848553e-06, "loss": 3.951, "step": 13877 }, { "epoch": 4.622969934205047, "grad_norm": 0.7109375, "learning_rate": 8.771527284976742e-06, "loss": 3.9881, "step": 13878 }, { "epoch": 4.623303073207296, "grad_norm": 0.7421875, "learning_rate": 8.770729556657412e-06, "loss": 3.9832, "step": 13879 }, { "epoch": 4.6236362122095445, "grad_norm": 0.75, "learning_rate": 8.769931813536837e-06, "loss": 3.9985, "step": 13880 }, { "epoch": 4.623969351211793, "grad_norm": 0.7265625, "learning_rate": 8.769134055624305e-06, "loss": 4.0156, "step": 13881 }, { "epoch": 4.624302490214042, "grad_norm": 0.73828125, "learning_rate": 8.768336282929113e-06, "loss": 3.9785, "step": 13882 }, { "epoch": 4.62463562921629, "grad_norm": 0.6875, "learning_rate": 8.767538495460548e-06, "loss": 4.0873, "step": 13883 }, { "epoch": 4.624968768218539, "grad_norm": 0.71484375, "learning_rate": 8.766740693227906e-06, "loss": 4.073, "step": 13884 }, { "epoch": 4.625301907220788, "grad_norm": 0.7265625, "learning_rate": 8.765942876240478e-06, "loss": 3.9832, "step": 13885 }, { "epoch": 4.625635046223037, "grad_norm": 0.7265625, "learning_rate": 8.765145044507558e-06, "loss": 4.0311, "step": 13886 }, { "epoch": 4.625968185225285, "grad_norm": 0.71484375, "learning_rate": 8.76434719803844e-06, "loss": 4.0106, "step": 13887 }, { "epoch": 4.6263013242275335, "grad_norm": 0.73046875, "learning_rate": 8.763549336842414e-06, "loss": 3.9652, "step": 13888 }, { "epoch": 4.626634463229783, "grad_norm": 0.7421875, "learning_rate": 8.762751460928775e-06, "loss": 4.0066, "step": 13889 }, { "epoch": 4.626967602232031, "grad_norm": 0.71875, "learning_rate": 8.761953570306817e-06, "loss": 3.9808, "step": 13890 }, { "epoch": 4.62730074123428, "grad_norm": 0.67578125, "learning_rate": 8.761155664985835e-06, "loss": 4.0205, "step": 13891 }, { "epoch": 4.627633880236528, "grad_norm": 0.75390625, "learning_rate": 8.760357744975119e-06, "loss": 3.9612, "step": 13892 }, { "epoch": 4.627967019238778, "grad_norm": 0.734375, "learning_rate": 8.759559810283965e-06, "loss": 3.9418, "step": 13893 }, { "epoch": 4.628300158241026, "grad_norm": 0.72265625, "learning_rate": 8.758761860921668e-06, "loss": 4.0236, "step": 13894 }, { "epoch": 4.628633297243275, "grad_norm": 0.77734375, "learning_rate": 8.757963896897525e-06, "loss": 3.8775, "step": 13895 }, { "epoch": 4.628966436245523, "grad_norm": 0.76171875, "learning_rate": 8.757165918220822e-06, "loss": 3.9992, "step": 13896 }, { "epoch": 4.629299575247773, "grad_norm": 0.7421875, "learning_rate": 8.756367924900862e-06, "loss": 4.0319, "step": 13897 }, { "epoch": 4.629632714250021, "grad_norm": 0.703125, "learning_rate": 8.755569916946935e-06, "loss": 4.0079, "step": 13898 }, { "epoch": 4.629965853252269, "grad_norm": 0.7265625, "learning_rate": 8.75477189436834e-06, "loss": 3.9565, "step": 13899 }, { "epoch": 4.630298992254518, "grad_norm": 0.71875, "learning_rate": 8.753973857174372e-06, "loss": 4.0239, "step": 13900 }, { "epoch": 4.630632131256767, "grad_norm": 0.73828125, "learning_rate": 8.75317580537432e-06, "loss": 4.0427, "step": 13901 }, { "epoch": 4.630965270259016, "grad_norm": 0.76171875, "learning_rate": 8.752377738977486e-06, "loss": 3.9519, "step": 13902 }, { "epoch": 4.631298409261264, "grad_norm": 0.7109375, "learning_rate": 8.751579657993164e-06, "loss": 3.9742, "step": 13903 }, { "epoch": 4.631631548263513, "grad_norm": 0.7265625, "learning_rate": 8.750781562430654e-06, "loss": 3.9203, "step": 13904 }, { "epoch": 4.631964687265762, "grad_norm": 0.734375, "learning_rate": 8.749983452299242e-06, "loss": 3.9786, "step": 13905 }, { "epoch": 4.63229782626801, "grad_norm": 0.69921875, "learning_rate": 8.749185327608232e-06, "loss": 4.0069, "step": 13906 }, { "epoch": 4.632630965270259, "grad_norm": 0.75, "learning_rate": 8.748387188366919e-06, "loss": 3.9587, "step": 13907 }, { "epoch": 4.632964104272507, "grad_norm": 0.7734375, "learning_rate": 8.747589034584602e-06, "loss": 3.9388, "step": 13908 }, { "epoch": 4.633297243274757, "grad_norm": 0.765625, "learning_rate": 8.746790866270571e-06, "loss": 3.9698, "step": 13909 }, { "epoch": 4.633630382277005, "grad_norm": 0.7421875, "learning_rate": 8.745992683434128e-06, "loss": 4.0547, "step": 13910 }, { "epoch": 4.633963521279254, "grad_norm": 0.734375, "learning_rate": 8.74519448608457e-06, "loss": 3.9312, "step": 13911 }, { "epoch": 4.634296660281502, "grad_norm": 0.69921875, "learning_rate": 8.744396274231194e-06, "loss": 3.9828, "step": 13912 }, { "epoch": 4.6346297992837515, "grad_norm": 0.73046875, "learning_rate": 8.743598047883296e-06, "loss": 3.9872, "step": 13913 }, { "epoch": 4.634962938286, "grad_norm": 0.78515625, "learning_rate": 8.742799807050173e-06, "loss": 3.9901, "step": 13914 }, { "epoch": 4.635296077288249, "grad_norm": 0.7265625, "learning_rate": 8.742001551741126e-06, "loss": 3.998, "step": 13915 }, { "epoch": 4.635629216290497, "grad_norm": 0.7265625, "learning_rate": 8.741203281965449e-06, "loss": 3.9511, "step": 13916 }, { "epoch": 4.635962355292746, "grad_norm": 0.7421875, "learning_rate": 8.740404997732446e-06, "loss": 3.9745, "step": 13917 }, { "epoch": 4.636295494294995, "grad_norm": 0.76171875, "learning_rate": 8.739606699051407e-06, "loss": 4.0299, "step": 13918 }, { "epoch": 4.636628633297243, "grad_norm": 0.703125, "learning_rate": 8.738808385931638e-06, "loss": 3.9446, "step": 13919 }, { "epoch": 4.636961772299492, "grad_norm": 0.73046875, "learning_rate": 8.738010058382433e-06, "loss": 3.9215, "step": 13920 }, { "epoch": 4.6372949113017405, "grad_norm": 0.734375, "learning_rate": 8.737211716413093e-06, "loss": 3.9882, "step": 13921 }, { "epoch": 4.63762805030399, "grad_norm": 0.73828125, "learning_rate": 8.736413360032917e-06, "loss": 3.97, "step": 13922 }, { "epoch": 4.637961189306238, "grad_norm": 0.765625, "learning_rate": 8.735614989251202e-06, "loss": 4.0051, "step": 13923 }, { "epoch": 4.638294328308486, "grad_norm": 0.78515625, "learning_rate": 8.734816604077249e-06, "loss": 3.9864, "step": 13924 }, { "epoch": 4.6386274673107355, "grad_norm": 0.70703125, "learning_rate": 8.734018204520356e-06, "loss": 4.0223, "step": 13925 }, { "epoch": 4.638960606312984, "grad_norm": 0.69921875, "learning_rate": 8.733219790589828e-06, "loss": 4.0667, "step": 13926 }, { "epoch": 4.639293745315233, "grad_norm": 0.73046875, "learning_rate": 8.732421362294957e-06, "loss": 3.9473, "step": 13927 }, { "epoch": 4.639626884317481, "grad_norm": 0.77734375, "learning_rate": 8.731622919645046e-06, "loss": 3.9863, "step": 13928 }, { "epoch": 4.6399600233197305, "grad_norm": 0.6796875, "learning_rate": 8.730824462649398e-06, "loss": 3.9421, "step": 13929 }, { "epoch": 4.640293162321979, "grad_norm": 0.703125, "learning_rate": 8.73002599131731e-06, "loss": 4.0353, "step": 13930 }, { "epoch": 4.640626301324227, "grad_norm": 0.74609375, "learning_rate": 8.729227505658083e-06, "loss": 3.9114, "step": 13931 }, { "epoch": 4.640959440326476, "grad_norm": 0.72265625, "learning_rate": 8.728429005681018e-06, "loss": 3.948, "step": 13932 }, { "epoch": 4.641292579328725, "grad_norm": 0.69921875, "learning_rate": 8.727630491395417e-06, "loss": 3.9744, "step": 13933 }, { "epoch": 4.641625718330974, "grad_norm": 0.765625, "learning_rate": 8.72683196281058e-06, "loss": 3.9572, "step": 13934 }, { "epoch": 4.641958857333222, "grad_norm": 0.72265625, "learning_rate": 8.726033419935807e-06, "loss": 3.9735, "step": 13935 }, { "epoch": 4.642291996335471, "grad_norm": 0.76171875, "learning_rate": 8.7252348627804e-06, "loss": 4.0278, "step": 13936 }, { "epoch": 4.6426251353377195, "grad_norm": 0.7421875, "learning_rate": 8.724436291353661e-06, "loss": 3.933, "step": 13937 }, { "epoch": 4.642958274339969, "grad_norm": 0.71484375, "learning_rate": 8.723637705664892e-06, "loss": 4.0193, "step": 13938 }, { "epoch": 4.643291413342217, "grad_norm": 0.7265625, "learning_rate": 8.722839105723394e-06, "loss": 3.9198, "step": 13939 }, { "epoch": 4.643624552344466, "grad_norm": 0.74609375, "learning_rate": 8.722040491538467e-06, "loss": 3.9617, "step": 13940 }, { "epoch": 4.643957691346714, "grad_norm": 0.7109375, "learning_rate": 8.721241863119418e-06, "loss": 3.9302, "step": 13941 }, { "epoch": 4.644290830348963, "grad_norm": 0.71875, "learning_rate": 8.720443220475546e-06, "loss": 4.0756, "step": 13942 }, { "epoch": 4.644623969351212, "grad_norm": 0.75390625, "learning_rate": 8.719644563616152e-06, "loss": 3.9992, "step": 13943 }, { "epoch": 4.64495710835346, "grad_norm": 0.75390625, "learning_rate": 8.718845892550543e-06, "loss": 4.07, "step": 13944 }, { "epoch": 4.645290247355709, "grad_norm": 0.765625, "learning_rate": 8.718047207288018e-06, "loss": 3.9563, "step": 13945 }, { "epoch": 4.645623386357958, "grad_norm": 0.71484375, "learning_rate": 8.71724850783788e-06, "loss": 4.0322, "step": 13946 }, { "epoch": 4.645956525360207, "grad_norm": 0.7265625, "learning_rate": 8.716449794209434e-06, "loss": 3.9715, "step": 13947 }, { "epoch": 4.646289664362455, "grad_norm": 0.69921875, "learning_rate": 8.715651066411985e-06, "loss": 4.0252, "step": 13948 }, { "epoch": 4.6466228033647035, "grad_norm": 0.69140625, "learning_rate": 8.714852324454834e-06, "loss": 4.0649, "step": 13949 }, { "epoch": 4.646955942366953, "grad_norm": 0.7578125, "learning_rate": 8.714053568347285e-06, "loss": 3.9788, "step": 13950 }, { "epoch": 4.647289081369201, "grad_norm": 0.7109375, "learning_rate": 8.713254798098638e-06, "loss": 4.0756, "step": 13951 }, { "epoch": 4.64762222037145, "grad_norm": 0.7265625, "learning_rate": 8.712456013718203e-06, "loss": 3.9295, "step": 13952 }, { "epoch": 4.647955359373698, "grad_norm": 0.74609375, "learning_rate": 8.711657215215282e-06, "loss": 3.966, "step": 13953 }, { "epoch": 4.648288498375948, "grad_norm": 0.7578125, "learning_rate": 8.710858402599178e-06, "loss": 3.9814, "step": 13954 }, { "epoch": 4.648621637378196, "grad_norm": 0.7109375, "learning_rate": 8.710059575879198e-06, "loss": 4.0295, "step": 13955 }, { "epoch": 4.648954776380445, "grad_norm": 0.72265625, "learning_rate": 8.709260735064641e-06, "loss": 3.9646, "step": 13956 }, { "epoch": 4.649287915382693, "grad_norm": 0.734375, "learning_rate": 8.708461880164819e-06, "loss": 3.9897, "step": 13957 }, { "epoch": 4.6496210543849426, "grad_norm": 0.73046875, "learning_rate": 8.707663011189034e-06, "loss": 4.0339, "step": 13958 }, { "epoch": 4.649954193387191, "grad_norm": 0.78125, "learning_rate": 8.706864128146589e-06, "loss": 3.9959, "step": 13959 }, { "epoch": 4.650287332389439, "grad_norm": 0.703125, "learning_rate": 8.706065231046792e-06, "loss": 3.9795, "step": 13960 }, { "epoch": 4.650620471391688, "grad_norm": 0.74609375, "learning_rate": 8.705266319898947e-06, "loss": 4.0127, "step": 13961 }, { "epoch": 4.650953610393937, "grad_norm": 0.72265625, "learning_rate": 8.70446739471236e-06, "loss": 3.9749, "step": 13962 }, { "epoch": 4.651286749396186, "grad_norm": 0.6875, "learning_rate": 8.703668455496337e-06, "loss": 3.9747, "step": 13963 }, { "epoch": 4.651619888398434, "grad_norm": 0.7578125, "learning_rate": 8.702869502260181e-06, "loss": 3.9827, "step": 13964 }, { "epoch": 4.651953027400683, "grad_norm": 0.75, "learning_rate": 8.702070535013205e-06, "loss": 3.9368, "step": 13965 }, { "epoch": 4.652286166402932, "grad_norm": 0.71875, "learning_rate": 8.701271553764708e-06, "loss": 3.9244, "step": 13966 }, { "epoch": 4.65261930540518, "grad_norm": 0.6875, "learning_rate": 8.700472558523999e-06, "loss": 3.9208, "step": 13967 }, { "epoch": 4.652952444407429, "grad_norm": 0.7265625, "learning_rate": 8.699673549300389e-06, "loss": 3.9644, "step": 13968 }, { "epoch": 4.653285583409677, "grad_norm": 0.7578125, "learning_rate": 8.698874526103176e-06, "loss": 4.0401, "step": 13969 }, { "epoch": 4.6536187224119265, "grad_norm": 0.77734375, "learning_rate": 8.698075488941673e-06, "loss": 3.9996, "step": 13970 }, { "epoch": 4.653951861414175, "grad_norm": 0.796875, "learning_rate": 8.697276437825188e-06, "loss": 4.0534, "step": 13971 }, { "epoch": 4.654285000416424, "grad_norm": 0.7421875, "learning_rate": 8.696477372763023e-06, "loss": 4.0376, "step": 13972 }, { "epoch": 4.654618139418672, "grad_norm": 0.734375, "learning_rate": 8.695678293764487e-06, "loss": 3.9879, "step": 13973 }, { "epoch": 4.6549512784209215, "grad_norm": 0.7421875, "learning_rate": 8.69487920083889e-06, "loss": 3.9613, "step": 13974 }, { "epoch": 4.65528441742317, "grad_norm": 0.7890625, "learning_rate": 8.69408009399554e-06, "loss": 4.0513, "step": 13975 }, { "epoch": 4.655617556425419, "grad_norm": 0.74609375, "learning_rate": 8.693280973243744e-06, "loss": 3.9785, "step": 13976 }, { "epoch": 4.655950695427667, "grad_norm": 0.7421875, "learning_rate": 8.692481838592808e-06, "loss": 4.0253, "step": 13977 }, { "epoch": 4.656283834429916, "grad_norm": 0.7109375, "learning_rate": 8.691682690052039e-06, "loss": 4.0049, "step": 13978 }, { "epoch": 4.656616973432165, "grad_norm": 0.71484375, "learning_rate": 8.69088352763075e-06, "loss": 4.0054, "step": 13979 }, { "epoch": 4.656950112434413, "grad_norm": 0.7265625, "learning_rate": 8.690084351338248e-06, "loss": 3.9686, "step": 13980 }, { "epoch": 4.657283251436662, "grad_norm": 0.71484375, "learning_rate": 8.68928516118384e-06, "loss": 4.0396, "step": 13981 }, { "epoch": 4.6576163904389105, "grad_norm": 0.77734375, "learning_rate": 8.688485957176836e-06, "loss": 3.9555, "step": 13982 }, { "epoch": 4.65794952944116, "grad_norm": 0.69921875, "learning_rate": 8.687686739326545e-06, "loss": 4.0731, "step": 13983 }, { "epoch": 4.658282668443408, "grad_norm": 0.7109375, "learning_rate": 8.686887507642276e-06, "loss": 4.0027, "step": 13984 }, { "epoch": 4.658615807445656, "grad_norm": 0.6796875, "learning_rate": 8.68608826213334e-06, "loss": 4.0709, "step": 13985 }, { "epoch": 4.6589489464479055, "grad_norm": 0.6875, "learning_rate": 8.685289002809042e-06, "loss": 4.0715, "step": 13986 }, { "epoch": 4.659282085450154, "grad_norm": 0.71484375, "learning_rate": 8.684489729678697e-06, "loss": 4.0139, "step": 13987 }, { "epoch": 4.659615224452403, "grad_norm": 0.73046875, "learning_rate": 8.683690442751608e-06, "loss": 4.0495, "step": 13988 }, { "epoch": 4.659948363454651, "grad_norm": 0.7578125, "learning_rate": 8.682891142037095e-06, "loss": 3.9742, "step": 13989 }, { "epoch": 4.6602815024569, "grad_norm": 0.7265625, "learning_rate": 8.682091827544459e-06, "loss": 3.9257, "step": 13990 }, { "epoch": 4.660614641459149, "grad_norm": 0.73828125, "learning_rate": 8.681292499283013e-06, "loss": 4.0076, "step": 13991 }, { "epoch": 4.660947780461398, "grad_norm": 0.7421875, "learning_rate": 8.680493157262068e-06, "loss": 4.0174, "step": 13992 }, { "epoch": 4.661280919463646, "grad_norm": 0.77734375, "learning_rate": 8.679693801490937e-06, "loss": 4.0247, "step": 13993 }, { "epoch": 4.661614058465895, "grad_norm": 0.70703125, "learning_rate": 8.678894431978929e-06, "loss": 4.0554, "step": 13994 }, { "epoch": 4.661947197468144, "grad_norm": 0.73828125, "learning_rate": 8.67809504873535e-06, "loss": 4.048, "step": 13995 }, { "epoch": 4.662280336470392, "grad_norm": 0.72265625, "learning_rate": 8.677295651769519e-06, "loss": 3.9675, "step": 13996 }, { "epoch": 4.662613475472641, "grad_norm": 0.6953125, "learning_rate": 8.676496241090742e-06, "loss": 3.9741, "step": 13997 }, { "epoch": 4.6629466144748895, "grad_norm": 0.7265625, "learning_rate": 8.675696816708334e-06, "loss": 3.9772, "step": 13998 }, { "epoch": 4.663279753477139, "grad_norm": 0.67578125, "learning_rate": 8.674897378631602e-06, "loss": 4.0207, "step": 13999 }, { "epoch": 4.663612892479387, "grad_norm": 0.73828125, "learning_rate": 8.67409792686986e-06, "loss": 3.9188, "step": 14000 }, { "epoch": 4.663946031481636, "grad_norm": 0.78515625, "learning_rate": 8.673298461432423e-06, "loss": 4.0296, "step": 14001 }, { "epoch": 4.664279170483884, "grad_norm": 0.69140625, "learning_rate": 8.6724989823286e-06, "loss": 3.9945, "step": 14002 }, { "epoch": 4.664612309486133, "grad_norm": 0.67578125, "learning_rate": 8.671699489567704e-06, "loss": 4.0247, "step": 14003 }, { "epoch": 4.664945448488382, "grad_norm": 0.74609375, "learning_rate": 8.670899983159044e-06, "loss": 3.9302, "step": 14004 }, { "epoch": 4.66527858749063, "grad_norm": 0.71484375, "learning_rate": 8.670100463111937e-06, "loss": 3.9647, "step": 14005 }, { "epoch": 4.665611726492879, "grad_norm": 0.75390625, "learning_rate": 8.669300929435694e-06, "loss": 4.0569, "step": 14006 }, { "epoch": 4.665944865495128, "grad_norm": 0.71875, "learning_rate": 8.668501382139629e-06, "loss": 3.9891, "step": 14007 }, { "epoch": 4.666278004497377, "grad_norm": 0.73828125, "learning_rate": 8.667701821233052e-06, "loss": 4.0154, "step": 14008 }, { "epoch": 4.666611143499625, "grad_norm": 0.7265625, "learning_rate": 8.66690224672528e-06, "loss": 3.9577, "step": 14009 }, { "epoch": 4.666944282501873, "grad_norm": 0.78515625, "learning_rate": 8.666102658625622e-06, "loss": 4.0004, "step": 14010 }, { "epoch": 4.667277421504123, "grad_norm": 0.73828125, "learning_rate": 8.665303056943396e-06, "loss": 4.0037, "step": 14011 }, { "epoch": 4.667610560506371, "grad_norm": 0.69140625, "learning_rate": 8.664503441687912e-06, "loss": 4.0226, "step": 14012 }, { "epoch": 4.66794369950862, "grad_norm": 0.77734375, "learning_rate": 8.663703812868484e-06, "loss": 4.0312, "step": 14013 }, { "epoch": 4.668276838510868, "grad_norm": 0.73828125, "learning_rate": 8.66290417049443e-06, "loss": 3.993, "step": 14014 }, { "epoch": 4.668609977513118, "grad_norm": 0.67578125, "learning_rate": 8.662104514575059e-06, "loss": 4.0492, "step": 14015 }, { "epoch": 4.668943116515366, "grad_norm": 0.71875, "learning_rate": 8.66130484511969e-06, "loss": 3.9419, "step": 14016 }, { "epoch": 4.669276255517615, "grad_norm": 0.7421875, "learning_rate": 8.660505162137632e-06, "loss": 4.021, "step": 14017 }, { "epoch": 4.669609394519863, "grad_norm": 0.78125, "learning_rate": 8.659705465638204e-06, "loss": 3.9938, "step": 14018 }, { "epoch": 4.6699425335221125, "grad_norm": 0.73046875, "learning_rate": 8.65890575563072e-06, "loss": 3.9906, "step": 14019 }, { "epoch": 4.670275672524361, "grad_norm": 0.75390625, "learning_rate": 8.658106032124491e-06, "loss": 3.9872, "step": 14020 }, { "epoch": 4.670608811526609, "grad_norm": 0.734375, "learning_rate": 8.65730629512884e-06, "loss": 4.0151, "step": 14021 }, { "epoch": 4.670941950528858, "grad_norm": 0.7109375, "learning_rate": 8.656506544653072e-06, "loss": 3.9648, "step": 14022 }, { "epoch": 4.671275089531107, "grad_norm": 0.7109375, "learning_rate": 8.655706780706512e-06, "loss": 4.0062, "step": 14023 }, { "epoch": 4.671608228533356, "grad_norm": 0.80078125, "learning_rate": 8.654907003298468e-06, "loss": 4.0174, "step": 14024 }, { "epoch": 4.671941367535604, "grad_norm": 0.70703125, "learning_rate": 8.654107212438264e-06, "loss": 3.9972, "step": 14025 }, { "epoch": 4.672274506537853, "grad_norm": 0.71484375, "learning_rate": 8.653307408135206e-06, "loss": 4.0124, "step": 14026 }, { "epoch": 4.6726076455401016, "grad_norm": 0.69921875, "learning_rate": 8.652507590398617e-06, "loss": 3.9965, "step": 14027 }, { "epoch": 4.67294078454235, "grad_norm": 0.7109375, "learning_rate": 8.651707759237809e-06, "loss": 4.1118, "step": 14028 }, { "epoch": 4.673273923544599, "grad_norm": 0.73828125, "learning_rate": 8.650907914662104e-06, "loss": 3.9692, "step": 14029 }, { "epoch": 4.673607062546847, "grad_norm": 0.75390625, "learning_rate": 8.65010805668081e-06, "loss": 3.989, "step": 14030 }, { "epoch": 4.6739402015490965, "grad_norm": 0.7265625, "learning_rate": 8.64930818530325e-06, "loss": 4.0639, "step": 14031 }, { "epoch": 4.674273340551345, "grad_norm": 0.734375, "learning_rate": 8.64850830053874e-06, "loss": 3.9468, "step": 14032 }, { "epoch": 4.674606479553594, "grad_norm": 0.75, "learning_rate": 8.647708402396595e-06, "loss": 4.0406, "step": 14033 }, { "epoch": 4.674939618555842, "grad_norm": 0.765625, "learning_rate": 8.646908490886134e-06, "loss": 3.9705, "step": 14034 }, { "epoch": 4.6752727575580915, "grad_norm": 0.73046875, "learning_rate": 8.646108566016672e-06, "loss": 3.9239, "step": 14035 }, { "epoch": 4.67560589656034, "grad_norm": 0.73828125, "learning_rate": 8.645308627797529e-06, "loss": 4.0309, "step": 14036 }, { "epoch": 4.675939035562589, "grad_norm": 0.75, "learning_rate": 8.644508676238018e-06, "loss": 3.8674, "step": 14037 }, { "epoch": 4.676272174564837, "grad_norm": 0.70703125, "learning_rate": 8.643708711347466e-06, "loss": 4.0716, "step": 14038 }, { "epoch": 4.6766053135670855, "grad_norm": 0.70703125, "learning_rate": 8.64290873313518e-06, "loss": 4.0573, "step": 14039 }, { "epoch": 4.676938452569335, "grad_norm": 0.71484375, "learning_rate": 8.642108741610483e-06, "loss": 3.9802, "step": 14040 }, { "epoch": 4.677271591571583, "grad_norm": 0.69921875, "learning_rate": 8.641308736782693e-06, "loss": 4.0441, "step": 14041 }, { "epoch": 4.677604730573832, "grad_norm": 0.73046875, "learning_rate": 8.640508718661131e-06, "loss": 3.9272, "step": 14042 }, { "epoch": 4.6779378695760805, "grad_norm": 0.71875, "learning_rate": 8.639708687255111e-06, "loss": 3.9067, "step": 14043 }, { "epoch": 4.67827100857833, "grad_norm": 0.734375, "learning_rate": 8.638908642573954e-06, "loss": 3.9891, "step": 14044 }, { "epoch": 4.678604147580578, "grad_norm": 0.72265625, "learning_rate": 8.638108584626978e-06, "loss": 4.0118, "step": 14045 }, { "epoch": 4.678937286582826, "grad_norm": 0.6953125, "learning_rate": 8.6373085134235e-06, "loss": 4.0558, "step": 14046 }, { "epoch": 4.679270425585075, "grad_norm": 0.69140625, "learning_rate": 8.636508428972844e-06, "loss": 3.9478, "step": 14047 }, { "epoch": 4.679603564587324, "grad_norm": 0.703125, "learning_rate": 8.635708331284326e-06, "loss": 3.988, "step": 14048 }, { "epoch": 4.679936703589573, "grad_norm": 0.7109375, "learning_rate": 8.634908220367264e-06, "loss": 4.019, "step": 14049 }, { "epoch": 4.680269842591821, "grad_norm": 0.72265625, "learning_rate": 8.634108096230982e-06, "loss": 4.0051, "step": 14050 }, { "epoch": 4.68060298159407, "grad_norm": 0.72265625, "learning_rate": 8.633307958884795e-06, "loss": 4.0317, "step": 14051 }, { "epoch": 4.680936120596319, "grad_norm": 0.74609375, "learning_rate": 8.632507808338026e-06, "loss": 3.9462, "step": 14052 }, { "epoch": 4.681269259598568, "grad_norm": 0.76953125, "learning_rate": 8.631707644599994e-06, "loss": 3.9685, "step": 14053 }, { "epoch": 4.681602398600816, "grad_norm": 0.7890625, "learning_rate": 8.630907467680019e-06, "loss": 3.8954, "step": 14054 }, { "epoch": 4.681935537603065, "grad_norm": 0.73046875, "learning_rate": 8.630107277587423e-06, "loss": 3.9619, "step": 14055 }, { "epoch": 4.682268676605314, "grad_norm": 0.6875, "learning_rate": 8.629307074331525e-06, "loss": 4.0033, "step": 14056 }, { "epoch": 4.682601815607562, "grad_norm": 0.74609375, "learning_rate": 8.628506857921642e-06, "loss": 3.9978, "step": 14057 }, { "epoch": 4.682934954609811, "grad_norm": 0.8046875, "learning_rate": 8.627706628367103e-06, "loss": 3.9211, "step": 14058 }, { "epoch": 4.683268093612059, "grad_norm": 0.70703125, "learning_rate": 8.62690638567722e-06, "loss": 3.9242, "step": 14059 }, { "epoch": 4.683601232614309, "grad_norm": 0.74609375, "learning_rate": 8.626106129861322e-06, "loss": 3.9555, "step": 14060 }, { "epoch": 4.683934371616557, "grad_norm": 0.70703125, "learning_rate": 8.625305860928725e-06, "loss": 4.0098, "step": 14061 }, { "epoch": 4.684267510618806, "grad_norm": 0.73046875, "learning_rate": 8.624505578888754e-06, "loss": 4.019, "step": 14062 }, { "epoch": 4.684600649621054, "grad_norm": 0.75, "learning_rate": 8.623705283750727e-06, "loss": 3.9908, "step": 14063 }, { "epoch": 4.684933788623303, "grad_norm": 0.71484375, "learning_rate": 8.622904975523967e-06, "loss": 4.0003, "step": 14064 }, { "epoch": 4.685266927625552, "grad_norm": 0.70703125, "learning_rate": 8.622104654217799e-06, "loss": 3.9293, "step": 14065 }, { "epoch": 4.6856000666278, "grad_norm": 0.76953125, "learning_rate": 8.621304319841537e-06, "loss": 4.0679, "step": 14066 }, { "epoch": 4.685933205630049, "grad_norm": 0.7890625, "learning_rate": 8.620503972404513e-06, "loss": 4.0141, "step": 14067 }, { "epoch": 4.686266344632298, "grad_norm": 0.71484375, "learning_rate": 8.619703611916042e-06, "loss": 3.9909, "step": 14068 }, { "epoch": 4.686599483634547, "grad_norm": 0.7109375, "learning_rate": 8.61890323838545e-06, "loss": 4.1237, "step": 14069 }, { "epoch": 4.686932622636795, "grad_norm": 0.75390625, "learning_rate": 8.618102851822062e-06, "loss": 3.9468, "step": 14070 }, { "epoch": 4.687265761639043, "grad_norm": 0.71875, "learning_rate": 8.617302452235194e-06, "loss": 4.0162, "step": 14071 }, { "epoch": 4.687598900641293, "grad_norm": 0.734375, "learning_rate": 8.616502039634173e-06, "loss": 3.988, "step": 14072 }, { "epoch": 4.687932039643541, "grad_norm": 0.765625, "learning_rate": 8.615701614028323e-06, "loss": 4.0069, "step": 14073 }, { "epoch": 4.68826517864579, "grad_norm": 0.7421875, "learning_rate": 8.614901175426965e-06, "loss": 3.9497, "step": 14074 }, { "epoch": 4.688598317648038, "grad_norm": 0.76171875, "learning_rate": 8.614100723839423e-06, "loss": 4.0153, "step": 14075 }, { "epoch": 4.6889314566502875, "grad_norm": 0.7890625, "learning_rate": 8.61330025927502e-06, "loss": 3.984, "step": 14076 }, { "epoch": 4.689264595652536, "grad_norm": 0.70703125, "learning_rate": 8.612499781743082e-06, "loss": 4.0477, "step": 14077 }, { "epoch": 4.689597734654785, "grad_norm": 0.7265625, "learning_rate": 8.611699291252928e-06, "loss": 3.9926, "step": 14078 }, { "epoch": 4.689930873657033, "grad_norm": 0.73828125, "learning_rate": 8.61089878781389e-06, "loss": 3.9823, "step": 14079 }, { "epoch": 4.6902640126592825, "grad_norm": 0.70703125, "learning_rate": 8.610098271435284e-06, "loss": 3.9921, "step": 14080 }, { "epoch": 4.690597151661531, "grad_norm": 0.734375, "learning_rate": 8.609297742126437e-06, "loss": 3.9018, "step": 14081 }, { "epoch": 4.690930290663779, "grad_norm": 0.71484375, "learning_rate": 8.608497199896676e-06, "loss": 4.0767, "step": 14082 }, { "epoch": 4.691263429666028, "grad_norm": 0.70703125, "learning_rate": 8.607696644755325e-06, "loss": 3.9791, "step": 14083 }, { "epoch": 4.691596568668277, "grad_norm": 0.75390625, "learning_rate": 8.606896076711704e-06, "loss": 3.9357, "step": 14084 }, { "epoch": 4.691929707670526, "grad_norm": 0.77734375, "learning_rate": 8.606095495775142e-06, "loss": 3.9539, "step": 14085 }, { "epoch": 4.692262846672774, "grad_norm": 0.73046875, "learning_rate": 8.605294901954964e-06, "loss": 4.0087, "step": 14086 }, { "epoch": 4.692595985675023, "grad_norm": 0.71875, "learning_rate": 8.604494295260494e-06, "loss": 3.9732, "step": 14087 }, { "epoch": 4.6929291246772715, "grad_norm": 0.734375, "learning_rate": 8.603693675701058e-06, "loss": 3.9944, "step": 14088 }, { "epoch": 4.69326226367952, "grad_norm": 0.7109375, "learning_rate": 8.602893043285983e-06, "loss": 3.9372, "step": 14089 }, { "epoch": 4.693595402681769, "grad_norm": 0.75, "learning_rate": 8.60209239802459e-06, "loss": 3.947, "step": 14090 }, { "epoch": 4.693928541684017, "grad_norm": 0.74609375, "learning_rate": 8.60129173992621e-06, "loss": 3.9854, "step": 14091 }, { "epoch": 4.6942616806862665, "grad_norm": 0.72265625, "learning_rate": 8.600491069000167e-06, "loss": 3.9686, "step": 14092 }, { "epoch": 4.694594819688515, "grad_norm": 0.74609375, "learning_rate": 8.599690385255785e-06, "loss": 3.9254, "step": 14093 }, { "epoch": 4.694927958690764, "grad_norm": 0.81640625, "learning_rate": 8.598889688702391e-06, "loss": 3.9541, "step": 14094 }, { "epoch": 4.695261097693012, "grad_norm": 0.73828125, "learning_rate": 8.598088979349315e-06, "loss": 4.0074, "step": 14095 }, { "epoch": 4.695594236695261, "grad_norm": 0.71875, "learning_rate": 8.597288257205878e-06, "loss": 4.0317, "step": 14096 }, { "epoch": 4.69592737569751, "grad_norm": 0.73046875, "learning_rate": 8.596487522281412e-06, "loss": 3.9983, "step": 14097 }, { "epoch": 4.696260514699759, "grad_norm": 0.71484375, "learning_rate": 8.59568677458524e-06, "loss": 3.9931, "step": 14098 }, { "epoch": 4.696593653702007, "grad_norm": 0.734375, "learning_rate": 8.594886014126692e-06, "loss": 3.9735, "step": 14099 }, { "epoch": 4.6969267927042555, "grad_norm": 0.73046875, "learning_rate": 8.59408524091509e-06, "loss": 3.969, "step": 14100 }, { "epoch": 4.697259931706505, "grad_norm": 0.734375, "learning_rate": 8.593284454959767e-06, "loss": 3.997, "step": 14101 }, { "epoch": 4.697593070708753, "grad_norm": 0.76171875, "learning_rate": 8.592483656270048e-06, "loss": 4.0396, "step": 14102 }, { "epoch": 4.697926209711002, "grad_norm": 0.74609375, "learning_rate": 8.59168284485526e-06, "loss": 3.9457, "step": 14103 }, { "epoch": 4.6982593487132505, "grad_norm": 0.76171875, "learning_rate": 8.590882020724735e-06, "loss": 4.0321, "step": 14104 }, { "epoch": 4.6985924877155, "grad_norm": 0.73046875, "learning_rate": 8.590081183887792e-06, "loss": 4.0541, "step": 14105 }, { "epoch": 4.698925626717748, "grad_norm": 0.7734375, "learning_rate": 8.589280334353768e-06, "loss": 3.9382, "step": 14106 }, { "epoch": 4.699258765719996, "grad_norm": 0.72265625, "learning_rate": 8.588479472131987e-06, "loss": 3.9914, "step": 14107 }, { "epoch": 4.699591904722245, "grad_norm": 0.7578125, "learning_rate": 8.587678597231778e-06, "loss": 4.0383, "step": 14108 }, { "epoch": 4.699925043724494, "grad_norm": 0.77734375, "learning_rate": 8.586877709662466e-06, "loss": 4.0015, "step": 14109 }, { "epoch": 4.700258182726743, "grad_norm": 0.76953125, "learning_rate": 8.586076809433385e-06, "loss": 4.0118, "step": 14110 }, { "epoch": 4.700591321728991, "grad_norm": 0.75, "learning_rate": 8.585275896553863e-06, "loss": 4.0625, "step": 14111 }, { "epoch": 4.70092446073124, "grad_norm": 0.703125, "learning_rate": 8.584474971033225e-06, "loss": 4.0188, "step": 14112 }, { "epoch": 4.701257599733489, "grad_norm": 0.7421875, "learning_rate": 8.583674032880804e-06, "loss": 4.0719, "step": 14113 }, { "epoch": 4.701590738735738, "grad_norm": 0.76171875, "learning_rate": 8.582873082105926e-06, "loss": 4.0629, "step": 14114 }, { "epoch": 4.701923877737986, "grad_norm": 0.7109375, "learning_rate": 8.582072118717928e-06, "loss": 4.0742, "step": 14115 }, { "epoch": 4.702257016740235, "grad_norm": 0.75, "learning_rate": 8.581271142726128e-06, "loss": 3.9727, "step": 14116 }, { "epoch": 4.702590155742484, "grad_norm": 0.73046875, "learning_rate": 8.58047015413986e-06, "loss": 4.0029, "step": 14117 }, { "epoch": 4.702923294744732, "grad_norm": 0.703125, "learning_rate": 8.579669152968459e-06, "loss": 4.0219, "step": 14118 }, { "epoch": 4.703256433746981, "grad_norm": 0.69140625, "learning_rate": 8.578868139221252e-06, "loss": 4.0157, "step": 14119 }, { "epoch": 4.703589572749229, "grad_norm": 0.703125, "learning_rate": 8.578067112907562e-06, "loss": 4.0223, "step": 14120 }, { "epoch": 4.703922711751479, "grad_norm": 0.7109375, "learning_rate": 8.577266074036731e-06, "loss": 4.018, "step": 14121 }, { "epoch": 4.704255850753727, "grad_norm": 0.7578125, "learning_rate": 8.57646502261808e-06, "loss": 3.9424, "step": 14122 }, { "epoch": 4.704588989755976, "grad_norm": 0.73046875, "learning_rate": 8.575663958660945e-06, "loss": 3.9718, "step": 14123 }, { "epoch": 4.704922128758224, "grad_norm": 0.7109375, "learning_rate": 8.574862882174654e-06, "loss": 4.016, "step": 14124 }, { "epoch": 4.705255267760473, "grad_norm": 0.71875, "learning_rate": 8.574061793168538e-06, "loss": 3.9809, "step": 14125 }, { "epoch": 4.705588406762722, "grad_norm": 0.73046875, "learning_rate": 8.57326069165193e-06, "loss": 4.0034, "step": 14126 }, { "epoch": 4.70592154576497, "grad_norm": 0.734375, "learning_rate": 8.572459577634158e-06, "loss": 4.0394, "step": 14127 }, { "epoch": 4.706254684767219, "grad_norm": 0.74609375, "learning_rate": 8.571658451124557e-06, "loss": 4.0151, "step": 14128 }, { "epoch": 4.706587823769468, "grad_norm": 0.7421875, "learning_rate": 8.570857312132454e-06, "loss": 4.0031, "step": 14129 }, { "epoch": 4.706920962771717, "grad_norm": 0.7109375, "learning_rate": 8.570056160667184e-06, "loss": 4.0048, "step": 14130 }, { "epoch": 4.707254101773965, "grad_norm": 0.69921875, "learning_rate": 8.569254996738077e-06, "loss": 4.0257, "step": 14131 }, { "epoch": 4.707587240776213, "grad_norm": 0.734375, "learning_rate": 8.568453820354465e-06, "loss": 4.0221, "step": 14132 }, { "epoch": 4.707920379778463, "grad_norm": 0.7578125, "learning_rate": 8.567652631525681e-06, "loss": 3.9916, "step": 14133 }, { "epoch": 4.708253518780712, "grad_norm": 0.71875, "learning_rate": 8.566851430261057e-06, "loss": 3.9958, "step": 14134 }, { "epoch": 4.70858665778296, "grad_norm": 0.71875, "learning_rate": 8.566050216569923e-06, "loss": 4.0389, "step": 14135 }, { "epoch": 4.708919796785208, "grad_norm": 0.76953125, "learning_rate": 8.565248990461612e-06, "loss": 4.0414, "step": 14136 }, { "epoch": 4.7092529357874575, "grad_norm": 0.7421875, "learning_rate": 8.564447751945463e-06, "loss": 3.9977, "step": 14137 }, { "epoch": 4.709586074789706, "grad_norm": 0.734375, "learning_rate": 8.563646501030798e-06, "loss": 4.0406, "step": 14138 }, { "epoch": 4.709919213791955, "grad_norm": 0.703125, "learning_rate": 8.562845237726958e-06, "loss": 3.9546, "step": 14139 }, { "epoch": 4.710252352794203, "grad_norm": 0.7421875, "learning_rate": 8.56204396204327e-06, "loss": 4.022, "step": 14140 }, { "epoch": 4.7105854917964525, "grad_norm": 0.72265625, "learning_rate": 8.561242673989073e-06, "loss": 3.9933, "step": 14141 }, { "epoch": 4.710918630798701, "grad_norm": 0.78515625, "learning_rate": 8.560441373573696e-06, "loss": 3.9168, "step": 14142 }, { "epoch": 4.711251769800949, "grad_norm": 0.75390625, "learning_rate": 8.559640060806476e-06, "loss": 3.9779, "step": 14143 }, { "epoch": 4.711584908803198, "grad_norm": 0.73046875, "learning_rate": 8.558838735696742e-06, "loss": 3.99, "step": 14144 }, { "epoch": 4.7119180478054465, "grad_norm": 0.73828125, "learning_rate": 8.55803739825383e-06, "loss": 3.9829, "step": 14145 }, { "epoch": 4.712251186807696, "grad_norm": 0.77734375, "learning_rate": 8.557236048487076e-06, "loss": 4.0044, "step": 14146 }, { "epoch": 4.712584325809944, "grad_norm": 0.7734375, "learning_rate": 8.556434686405812e-06, "loss": 3.944, "step": 14147 }, { "epoch": 4.712917464812193, "grad_norm": 0.72265625, "learning_rate": 8.55563331201937e-06, "loss": 3.9571, "step": 14148 }, { "epoch": 4.7132506038144415, "grad_norm": 0.7265625, "learning_rate": 8.554831925337089e-06, "loss": 4.0209, "step": 14149 }, { "epoch": 4.71358374281669, "grad_norm": 0.7421875, "learning_rate": 8.5540305263683e-06, "loss": 4.0332, "step": 14150 }, { "epoch": 4.713916881818939, "grad_norm": 0.74609375, "learning_rate": 8.553229115122337e-06, "loss": 3.9648, "step": 14151 }, { "epoch": 4.714250020821187, "grad_norm": 0.69140625, "learning_rate": 8.552427691608538e-06, "loss": 4.0227, "step": 14152 }, { "epoch": 4.7145831598234365, "grad_norm": 0.6875, "learning_rate": 8.551626255836233e-06, "loss": 4.056, "step": 14153 }, { "epoch": 4.714916298825685, "grad_norm": 0.6953125, "learning_rate": 8.550824807814765e-06, "loss": 3.9681, "step": 14154 }, { "epoch": 4.715249437827934, "grad_norm": 0.765625, "learning_rate": 8.55002334755346e-06, "loss": 4.02, "step": 14155 }, { "epoch": 4.715582576830182, "grad_norm": 0.75, "learning_rate": 8.549221875061657e-06, "loss": 3.9914, "step": 14156 }, { "epoch": 4.715915715832431, "grad_norm": 0.734375, "learning_rate": 8.548420390348694e-06, "loss": 3.9885, "step": 14157 }, { "epoch": 4.71624885483468, "grad_norm": 0.74609375, "learning_rate": 8.547618893423902e-06, "loss": 4.0041, "step": 14158 }, { "epoch": 4.716581993836929, "grad_norm": 0.71484375, "learning_rate": 8.546817384296623e-06, "loss": 3.9961, "step": 14159 }, { "epoch": 4.716915132839177, "grad_norm": 0.78515625, "learning_rate": 8.546015862976184e-06, "loss": 4.0139, "step": 14160 }, { "epoch": 4.7172482718414255, "grad_norm": 0.765625, "learning_rate": 8.545214329471931e-06, "loss": 3.9978, "step": 14161 }, { "epoch": 4.717581410843675, "grad_norm": 0.734375, "learning_rate": 8.54441278379319e-06, "loss": 3.9721, "step": 14162 }, { "epoch": 4.717914549845923, "grad_norm": 0.70703125, "learning_rate": 8.543611225949304e-06, "loss": 3.9505, "step": 14163 }, { "epoch": 4.718247688848172, "grad_norm": 0.70703125, "learning_rate": 8.542809655949611e-06, "loss": 3.9993, "step": 14164 }, { "epoch": 4.71858082785042, "grad_norm": 0.734375, "learning_rate": 8.542008073803442e-06, "loss": 3.973, "step": 14165 }, { "epoch": 4.71891396685267, "grad_norm": 0.73828125, "learning_rate": 8.541206479520132e-06, "loss": 3.9374, "step": 14166 }, { "epoch": 4.719247105854918, "grad_norm": 0.73046875, "learning_rate": 8.540404873109026e-06, "loss": 4.0039, "step": 14167 }, { "epoch": 4.719580244857166, "grad_norm": 0.77734375, "learning_rate": 8.539603254579457e-06, "loss": 3.9227, "step": 14168 }, { "epoch": 4.719913383859415, "grad_norm": 0.7578125, "learning_rate": 8.538801623940759e-06, "loss": 4.0886, "step": 14169 }, { "epoch": 4.720246522861664, "grad_norm": 0.69921875, "learning_rate": 8.537999981202274e-06, "loss": 4.0034, "step": 14170 }, { "epoch": 4.720579661863913, "grad_norm": 0.7421875, "learning_rate": 8.537198326373336e-06, "loss": 4.011, "step": 14171 }, { "epoch": 4.720912800866161, "grad_norm": 0.75390625, "learning_rate": 8.536396659463287e-06, "loss": 3.9289, "step": 14172 }, { "epoch": 4.72124593986841, "grad_norm": 0.76171875, "learning_rate": 8.53559498048146e-06, "loss": 4.0258, "step": 14173 }, { "epoch": 4.721579078870659, "grad_norm": 0.7734375, "learning_rate": 8.534793289437197e-06, "loss": 3.9699, "step": 14174 }, { "epoch": 4.721912217872908, "grad_norm": 0.7578125, "learning_rate": 8.53399158633983e-06, "loss": 3.9873, "step": 14175 }, { "epoch": 4.722245356875156, "grad_norm": 0.7578125, "learning_rate": 8.533189871198702e-06, "loss": 4.0503, "step": 14176 }, { "epoch": 4.722578495877405, "grad_norm": 0.734375, "learning_rate": 8.532388144023149e-06, "loss": 3.9701, "step": 14177 }, { "epoch": 4.722911634879654, "grad_norm": 0.7734375, "learning_rate": 8.53158640482251e-06, "loss": 3.9802, "step": 14178 }, { "epoch": 4.723244773881902, "grad_norm": 0.7890625, "learning_rate": 8.530784653606126e-06, "loss": 3.9364, "step": 14179 }, { "epoch": 4.723577912884151, "grad_norm": 0.72265625, "learning_rate": 8.529982890383332e-06, "loss": 3.9784, "step": 14180 }, { "epoch": 4.723911051886399, "grad_norm": 0.76953125, "learning_rate": 8.529181115163468e-06, "loss": 3.9602, "step": 14181 }, { "epoch": 4.7242441908886486, "grad_norm": 0.74609375, "learning_rate": 8.528379327955875e-06, "loss": 3.9584, "step": 14182 }, { "epoch": 4.724577329890897, "grad_norm": 0.71875, "learning_rate": 8.527577528769889e-06, "loss": 4.0611, "step": 14183 }, { "epoch": 4.724910468893146, "grad_norm": 0.76953125, "learning_rate": 8.52677571761485e-06, "loss": 3.9758, "step": 14184 }, { "epoch": 4.725243607895394, "grad_norm": 0.76953125, "learning_rate": 8.525973894500098e-06, "loss": 4.0252, "step": 14185 }, { "epoch": 4.725576746897643, "grad_norm": 0.73046875, "learning_rate": 8.525172059434973e-06, "loss": 4.0073, "step": 14186 }, { "epoch": 4.725909885899892, "grad_norm": 0.75, "learning_rate": 8.524370212428815e-06, "loss": 4.0359, "step": 14187 }, { "epoch": 4.72624302490214, "grad_norm": 0.7578125, "learning_rate": 8.523568353490962e-06, "loss": 3.9598, "step": 14188 }, { "epoch": 4.726576163904389, "grad_norm": 0.7890625, "learning_rate": 8.522766482630755e-06, "loss": 4.0538, "step": 14189 }, { "epoch": 4.726909302906638, "grad_norm": 0.76953125, "learning_rate": 8.521964599857531e-06, "loss": 4.034, "step": 14190 }, { "epoch": 4.727242441908887, "grad_norm": 0.72265625, "learning_rate": 8.521162705180636e-06, "loss": 3.9234, "step": 14191 }, { "epoch": 4.727575580911135, "grad_norm": 0.734375, "learning_rate": 8.520360798609406e-06, "loss": 3.9297, "step": 14192 }, { "epoch": 4.727908719913384, "grad_norm": 0.75, "learning_rate": 8.519558880153183e-06, "loss": 3.98, "step": 14193 }, { "epoch": 4.7282418589156325, "grad_norm": 0.7421875, "learning_rate": 8.518756949821309e-06, "loss": 3.9939, "step": 14194 }, { "epoch": 4.728574997917882, "grad_norm": 0.70703125, "learning_rate": 8.517955007623123e-06, "loss": 4.0383, "step": 14195 }, { "epoch": 4.72890813692013, "grad_norm": 0.76171875, "learning_rate": 8.517153053567965e-06, "loss": 3.9644, "step": 14196 }, { "epoch": 4.729241275922378, "grad_norm": 0.71875, "learning_rate": 8.516351087665175e-06, "loss": 3.9489, "step": 14197 }, { "epoch": 4.7295744149246275, "grad_norm": 0.765625, "learning_rate": 8.5155491099241e-06, "loss": 4.0201, "step": 14198 }, { "epoch": 4.729907553926876, "grad_norm": 0.76171875, "learning_rate": 8.514747120354073e-06, "loss": 4.0595, "step": 14199 }, { "epoch": 4.730240692929125, "grad_norm": 0.71484375, "learning_rate": 8.513945118964444e-06, "loss": 3.9503, "step": 14200 }, { "epoch": 4.730573831931373, "grad_norm": 0.7578125, "learning_rate": 8.51314310576455e-06, "loss": 3.928, "step": 14201 }, { "epoch": 4.7309069709336224, "grad_norm": 0.75, "learning_rate": 8.512341080763731e-06, "loss": 3.9429, "step": 14202 }, { "epoch": 4.731240109935871, "grad_norm": 0.765625, "learning_rate": 8.511539043971333e-06, "loss": 3.9895, "step": 14203 }, { "epoch": 4.731573248938119, "grad_norm": 0.71875, "learning_rate": 8.510736995396697e-06, "loss": 4.0132, "step": 14204 }, { "epoch": 4.731906387940368, "grad_norm": 0.75390625, "learning_rate": 8.509934935049163e-06, "loss": 3.9913, "step": 14205 }, { "epoch": 4.7322395269426165, "grad_norm": 0.6875, "learning_rate": 8.509132862938073e-06, "loss": 4.023, "step": 14206 }, { "epoch": 4.732572665944866, "grad_norm": 0.75390625, "learning_rate": 8.508330779072775e-06, "loss": 3.8939, "step": 14207 }, { "epoch": 4.732905804947114, "grad_norm": 0.76953125, "learning_rate": 8.507528683462602e-06, "loss": 4.0303, "step": 14208 }, { "epoch": 4.733238943949363, "grad_norm": 0.71875, "learning_rate": 8.506726576116908e-06, "loss": 3.9706, "step": 14209 }, { "epoch": 4.7335720829516115, "grad_norm": 0.70703125, "learning_rate": 8.505924457045026e-06, "loss": 4.0421, "step": 14210 }, { "epoch": 4.73390522195386, "grad_norm": 0.77734375, "learning_rate": 8.505122326256306e-06, "loss": 3.9639, "step": 14211 }, { "epoch": 4.734238360956109, "grad_norm": 0.7421875, "learning_rate": 8.504320183760085e-06, "loss": 4.0066, "step": 14212 }, { "epoch": 4.734571499958357, "grad_norm": 0.75, "learning_rate": 8.50351802956571e-06, "loss": 3.9809, "step": 14213 }, { "epoch": 4.734904638960606, "grad_norm": 0.77734375, "learning_rate": 8.502715863682523e-06, "loss": 4.0032, "step": 14214 }, { "epoch": 4.735237777962855, "grad_norm": 0.7421875, "learning_rate": 8.50191368611987e-06, "loss": 3.9564, "step": 14215 }, { "epoch": 4.735570916965104, "grad_norm": 0.73046875, "learning_rate": 8.501111496887092e-06, "loss": 3.9936, "step": 14216 }, { "epoch": 4.735904055967352, "grad_norm": 0.73828125, "learning_rate": 8.50030929599353e-06, "loss": 4.0982, "step": 14217 }, { "epoch": 4.736237194969601, "grad_norm": 0.734375, "learning_rate": 8.499507083448537e-06, "loss": 4.0302, "step": 14218 }, { "epoch": 4.73657033397185, "grad_norm": 0.703125, "learning_rate": 8.498704859261448e-06, "loss": 3.9863, "step": 14219 }, { "epoch": 4.736903472974099, "grad_norm": 0.66796875, "learning_rate": 8.497902623441612e-06, "loss": 4.0801, "step": 14220 }, { "epoch": 4.737236611976347, "grad_norm": 0.74609375, "learning_rate": 8.49710037599837e-06, "loss": 3.9296, "step": 14221 }, { "epoch": 4.7375697509785955, "grad_norm": 0.74609375, "learning_rate": 8.49629811694107e-06, "loss": 3.9961, "step": 14222 }, { "epoch": 4.737902889980845, "grad_norm": 0.77734375, "learning_rate": 8.495495846279057e-06, "loss": 3.9729, "step": 14223 }, { "epoch": 4.738236028983093, "grad_norm": 0.8203125, "learning_rate": 8.494693564021668e-06, "loss": 4.0152, "step": 14224 }, { "epoch": 4.738569167985342, "grad_norm": 0.796875, "learning_rate": 8.493891270178257e-06, "loss": 3.9832, "step": 14225 }, { "epoch": 4.73890230698759, "grad_norm": 0.78515625, "learning_rate": 8.493088964758164e-06, "loss": 3.9867, "step": 14226 }, { "epoch": 4.73923544598984, "grad_norm": 0.75390625, "learning_rate": 8.49228664777074e-06, "loss": 3.9654, "step": 14227 }, { "epoch": 4.739568584992088, "grad_norm": 0.7578125, "learning_rate": 8.491484319225323e-06, "loss": 3.9918, "step": 14228 }, { "epoch": 4.739901723994336, "grad_norm": 0.77734375, "learning_rate": 8.49068197913126e-06, "loss": 4.0348, "step": 14229 }, { "epoch": 4.740234862996585, "grad_norm": 0.7421875, "learning_rate": 8.489879627497897e-06, "loss": 4.0762, "step": 14230 }, { "epoch": 4.740568001998834, "grad_norm": 0.73046875, "learning_rate": 8.489077264334585e-06, "loss": 4.0493, "step": 14231 }, { "epoch": 4.740901141001083, "grad_norm": 0.74609375, "learning_rate": 8.48827488965066e-06, "loss": 4.0066, "step": 14232 }, { "epoch": 4.741234280003331, "grad_norm": 0.73828125, "learning_rate": 8.487472503455477e-06, "loss": 3.9743, "step": 14233 }, { "epoch": 4.74156741900558, "grad_norm": 0.73046875, "learning_rate": 8.486670105758375e-06, "loss": 4.0157, "step": 14234 }, { "epoch": 4.741900558007829, "grad_norm": 0.81640625, "learning_rate": 8.485867696568704e-06, "loss": 4.0163, "step": 14235 }, { "epoch": 4.742233697010078, "grad_norm": 0.72265625, "learning_rate": 8.485065275895813e-06, "loss": 4.0537, "step": 14236 }, { "epoch": 4.742566836012326, "grad_norm": 0.76171875, "learning_rate": 8.484262843749042e-06, "loss": 4.0022, "step": 14237 }, { "epoch": 4.742899975014575, "grad_norm": 0.80078125, "learning_rate": 8.48346040013774e-06, "loss": 3.9027, "step": 14238 }, { "epoch": 4.743233114016824, "grad_norm": 0.76171875, "learning_rate": 8.482657945071256e-06, "loss": 4.0222, "step": 14239 }, { "epoch": 4.743566253019072, "grad_norm": 0.73046875, "learning_rate": 8.481855478558937e-06, "loss": 3.935, "step": 14240 }, { "epoch": 4.743899392021321, "grad_norm": 0.73046875, "learning_rate": 8.481053000610127e-06, "loss": 4.0143, "step": 14241 }, { "epoch": 4.744232531023569, "grad_norm": 0.75, "learning_rate": 8.480250511234175e-06, "loss": 4.0369, "step": 14242 }, { "epoch": 4.7445656700258185, "grad_norm": 0.73828125, "learning_rate": 8.479448010440425e-06, "loss": 3.9894, "step": 14243 }, { "epoch": 4.744898809028067, "grad_norm": 0.74609375, "learning_rate": 8.47864549823823e-06, "loss": 4.0084, "step": 14244 }, { "epoch": 4.745231948030316, "grad_norm": 0.76171875, "learning_rate": 8.477842974636935e-06, "loss": 4.0641, "step": 14245 }, { "epoch": 4.745565087032564, "grad_norm": 0.73046875, "learning_rate": 8.477040439645885e-06, "loss": 3.9907, "step": 14246 }, { "epoch": 4.745898226034813, "grad_norm": 0.75, "learning_rate": 8.476237893274432e-06, "loss": 4.001, "step": 14247 }, { "epoch": 4.746231365037062, "grad_norm": 0.75, "learning_rate": 8.47543533553192e-06, "loss": 4.008, "step": 14248 }, { "epoch": 4.74656450403931, "grad_norm": 0.72265625, "learning_rate": 8.474632766427702e-06, "loss": 4.0419, "step": 14249 }, { "epoch": 4.746897643041559, "grad_norm": 0.73828125, "learning_rate": 8.47383018597112e-06, "loss": 3.9597, "step": 14250 }, { "epoch": 4.7472307820438076, "grad_norm": 0.75390625, "learning_rate": 8.473027594171531e-06, "loss": 4.0667, "step": 14251 }, { "epoch": 4.747563921046057, "grad_norm": 0.76171875, "learning_rate": 8.472224991038273e-06, "loss": 3.9425, "step": 14252 }, { "epoch": 4.747897060048305, "grad_norm": 0.7421875, "learning_rate": 8.471422376580703e-06, "loss": 3.9671, "step": 14253 }, { "epoch": 4.748230199050554, "grad_norm": 0.7421875, "learning_rate": 8.470619750808163e-06, "loss": 4.0511, "step": 14254 }, { "epoch": 4.7485633380528025, "grad_norm": 0.71875, "learning_rate": 8.469817113730009e-06, "loss": 3.9776, "step": 14255 }, { "epoch": 4.748896477055052, "grad_norm": 0.76171875, "learning_rate": 8.469014465355583e-06, "loss": 4.0533, "step": 14256 }, { "epoch": 4.7492296160573, "grad_norm": 0.66796875, "learning_rate": 8.46821180569424e-06, "loss": 4.0149, "step": 14257 }, { "epoch": 4.749562755059548, "grad_norm": 0.734375, "learning_rate": 8.467409134755325e-06, "loss": 4.0485, "step": 14258 }, { "epoch": 4.7498958940617975, "grad_norm": 0.6953125, "learning_rate": 8.466606452548188e-06, "loss": 4.0148, "step": 14259 }, { "epoch": 4.750229033064046, "grad_norm": 0.7265625, "learning_rate": 8.465803759082182e-06, "loss": 4.0119, "step": 14260 }, { "epoch": 4.750562172066295, "grad_norm": 0.74609375, "learning_rate": 8.465001054366651e-06, "loss": 4.0242, "step": 14261 }, { "epoch": 4.750895311068543, "grad_norm": 0.75390625, "learning_rate": 8.464198338410952e-06, "loss": 4.0053, "step": 14262 }, { "epoch": 4.751228450070792, "grad_norm": 0.6953125, "learning_rate": 8.463395611224427e-06, "loss": 3.9715, "step": 14263 }, { "epoch": 4.751561589073041, "grad_norm": 0.7734375, "learning_rate": 8.462592872816431e-06, "loss": 3.9442, "step": 14264 }, { "epoch": 4.751894728075289, "grad_norm": 0.7578125, "learning_rate": 8.461790123196312e-06, "loss": 3.9527, "step": 14265 }, { "epoch": 4.752227867077538, "grad_norm": 0.73828125, "learning_rate": 8.460987362373426e-06, "loss": 4.0135, "step": 14266 }, { "epoch": 4.7525610060797865, "grad_norm": 0.73046875, "learning_rate": 8.460184590357116e-06, "loss": 4.0857, "step": 14267 }, { "epoch": 4.752894145082036, "grad_norm": 0.7109375, "learning_rate": 8.459381807156732e-06, "loss": 4.021, "step": 14268 }, { "epoch": 4.753227284084284, "grad_norm": 0.7421875, "learning_rate": 8.458579012781632e-06, "loss": 3.9692, "step": 14269 }, { "epoch": 4.753560423086533, "grad_norm": 0.734375, "learning_rate": 8.45777620724116e-06, "loss": 3.9546, "step": 14270 }, { "epoch": 4.753893562088781, "grad_norm": 0.76171875, "learning_rate": 8.456973390544674e-06, "loss": 4.0054, "step": 14271 }, { "epoch": 4.75422670109103, "grad_norm": 0.73828125, "learning_rate": 8.456170562701517e-06, "loss": 4.0666, "step": 14272 }, { "epoch": 4.754559840093279, "grad_norm": 0.7109375, "learning_rate": 8.455367723721044e-06, "loss": 3.9681, "step": 14273 }, { "epoch": 4.754892979095528, "grad_norm": 0.71875, "learning_rate": 8.454564873612608e-06, "loss": 3.9498, "step": 14274 }, { "epoch": 4.755226118097776, "grad_norm": 0.71875, "learning_rate": 8.453762012385558e-06, "loss": 3.925, "step": 14275 }, { "epoch": 4.755559257100025, "grad_norm": 0.7265625, "learning_rate": 8.452959140049247e-06, "loss": 4.0044, "step": 14276 }, { "epoch": 4.755892396102274, "grad_norm": 0.7109375, "learning_rate": 8.452156256613027e-06, "loss": 3.9953, "step": 14277 }, { "epoch": 4.756225535104522, "grad_norm": 0.76171875, "learning_rate": 8.451353362086246e-06, "loss": 4.0311, "step": 14278 }, { "epoch": 4.756558674106771, "grad_norm": 0.75390625, "learning_rate": 8.450550456478261e-06, "loss": 4.0113, "step": 14279 }, { "epoch": 4.75689181310902, "grad_norm": 0.71875, "learning_rate": 8.449747539798424e-06, "loss": 3.9509, "step": 14280 }, { "epoch": 4.757224952111269, "grad_norm": 0.7109375, "learning_rate": 8.44894461205608e-06, "loss": 4.0494, "step": 14281 }, { "epoch": 4.757558091113517, "grad_norm": 0.69921875, "learning_rate": 8.448141673260591e-06, "loss": 4.0125, "step": 14282 }, { "epoch": 4.757891230115765, "grad_norm": 0.71875, "learning_rate": 8.447338723421303e-06, "loss": 3.9829, "step": 14283 }, { "epoch": 4.758224369118015, "grad_norm": 0.76171875, "learning_rate": 8.446535762547572e-06, "loss": 4.0053, "step": 14284 }, { "epoch": 4.758557508120263, "grad_norm": 0.73046875, "learning_rate": 8.445732790648751e-06, "loss": 4.0068, "step": 14285 }, { "epoch": 4.758890647122512, "grad_norm": 0.73046875, "learning_rate": 8.444929807734188e-06, "loss": 3.9845, "step": 14286 }, { "epoch": 4.75922378612476, "grad_norm": 0.75, "learning_rate": 8.44412681381324e-06, "loss": 3.9627, "step": 14287 }, { "epoch": 4.75955692512701, "grad_norm": 0.73046875, "learning_rate": 8.443323808895262e-06, "loss": 3.9959, "step": 14288 }, { "epoch": 4.759890064129258, "grad_norm": 0.71875, "learning_rate": 8.442520792989605e-06, "loss": 3.9731, "step": 14289 }, { "epoch": 4.760223203131506, "grad_norm": 0.7109375, "learning_rate": 8.44171776610562e-06, "loss": 4.0076, "step": 14290 }, { "epoch": 4.760556342133755, "grad_norm": 0.7421875, "learning_rate": 8.440914728252664e-06, "loss": 3.9892, "step": 14291 }, { "epoch": 4.760889481136004, "grad_norm": 0.72265625, "learning_rate": 8.440111679440087e-06, "loss": 3.9949, "step": 14292 }, { "epoch": 4.761222620138253, "grad_norm": 0.7578125, "learning_rate": 8.439308619677247e-06, "loss": 4.0492, "step": 14293 }, { "epoch": 4.761555759140501, "grad_norm": 0.69921875, "learning_rate": 8.438505548973497e-06, "loss": 3.9524, "step": 14294 }, { "epoch": 4.76188889814275, "grad_norm": 0.734375, "learning_rate": 8.43770246733819e-06, "loss": 3.9717, "step": 14295 }, { "epoch": 4.762222037144999, "grad_norm": 0.69140625, "learning_rate": 8.43689937478068e-06, "loss": 4.0272, "step": 14296 }, { "epoch": 4.762555176147248, "grad_norm": 0.7265625, "learning_rate": 8.436096271310323e-06, "loss": 3.9947, "step": 14297 }, { "epoch": 4.762888315149496, "grad_norm": 0.7578125, "learning_rate": 8.43529315693647e-06, "loss": 4.0028, "step": 14298 }, { "epoch": 4.763221454151745, "grad_norm": 0.7421875, "learning_rate": 8.43449003166848e-06, "loss": 4.0291, "step": 14299 }, { "epoch": 4.7635545931539935, "grad_norm": 0.7265625, "learning_rate": 8.4336868955157e-06, "loss": 3.9938, "step": 14300 }, { "epoch": 4.763887732156242, "grad_norm": 0.7421875, "learning_rate": 8.432883748487495e-06, "loss": 4.0008, "step": 14301 }, { "epoch": 4.764220871158491, "grad_norm": 0.6953125, "learning_rate": 8.432080590593211e-06, "loss": 3.9722, "step": 14302 }, { "epoch": 4.764554010160739, "grad_norm": 0.72265625, "learning_rate": 8.431277421842212e-06, "loss": 3.9764, "step": 14303 }, { "epoch": 4.7648871491629885, "grad_norm": 0.69140625, "learning_rate": 8.430474242243844e-06, "loss": 4.0154, "step": 14304 }, { "epoch": 4.765220288165237, "grad_norm": 0.7109375, "learning_rate": 8.429671051807468e-06, "loss": 4.0437, "step": 14305 }, { "epoch": 4.765553427167486, "grad_norm": 0.74609375, "learning_rate": 8.428867850542438e-06, "loss": 4.0385, "step": 14306 }, { "epoch": 4.765886566169734, "grad_norm": 0.6953125, "learning_rate": 8.428064638458109e-06, "loss": 4.0614, "step": 14307 }, { "epoch": 4.766219705171983, "grad_norm": 0.69140625, "learning_rate": 8.427261415563839e-06, "loss": 3.9938, "step": 14308 }, { "epoch": 4.766552844174232, "grad_norm": 0.73046875, "learning_rate": 8.426458181868978e-06, "loss": 3.9544, "step": 14309 }, { "epoch": 4.76688598317648, "grad_norm": 0.70703125, "learning_rate": 8.42565493738289e-06, "loss": 3.9605, "step": 14310 }, { "epoch": 4.767219122178729, "grad_norm": 0.765625, "learning_rate": 8.424851682114925e-06, "loss": 3.917, "step": 14311 }, { "epoch": 4.7675522611809775, "grad_norm": 0.72265625, "learning_rate": 8.424048416074441e-06, "loss": 4.0121, "step": 14312 }, { "epoch": 4.767885400183227, "grad_norm": 0.74609375, "learning_rate": 8.423245139270794e-06, "loss": 4.0159, "step": 14313 }, { "epoch": 4.768218539185475, "grad_norm": 0.7109375, "learning_rate": 8.422441851713341e-06, "loss": 3.9993, "step": 14314 }, { "epoch": 4.768551678187724, "grad_norm": 0.73828125, "learning_rate": 8.42163855341144e-06, "loss": 4.0269, "step": 14315 }, { "epoch": 4.7688848171899725, "grad_norm": 0.7421875, "learning_rate": 8.420835244374446e-06, "loss": 3.9423, "step": 14316 }, { "epoch": 4.769217956192222, "grad_norm": 0.70703125, "learning_rate": 8.420031924611716e-06, "loss": 4.0357, "step": 14317 }, { "epoch": 4.76955109519447, "grad_norm": 0.7734375, "learning_rate": 8.419228594132605e-06, "loss": 4.0348, "step": 14318 }, { "epoch": 4.769884234196718, "grad_norm": 0.75, "learning_rate": 8.418425252946473e-06, "loss": 3.9826, "step": 14319 }, { "epoch": 4.770217373198967, "grad_norm": 0.75390625, "learning_rate": 8.417621901062675e-06, "loss": 3.9897, "step": 14320 }, { "epoch": 4.770550512201216, "grad_norm": 0.78515625, "learning_rate": 8.416818538490574e-06, "loss": 3.92, "step": 14321 }, { "epoch": 4.770883651203465, "grad_norm": 0.78125, "learning_rate": 8.416015165239517e-06, "loss": 3.9083, "step": 14322 }, { "epoch": 4.771216790205713, "grad_norm": 0.76171875, "learning_rate": 8.415211781318871e-06, "loss": 4.0557, "step": 14323 }, { "epoch": 4.771549929207962, "grad_norm": 0.7578125, "learning_rate": 8.414408386737988e-06, "loss": 3.9953, "step": 14324 }, { "epoch": 4.771883068210211, "grad_norm": 0.72265625, "learning_rate": 8.413604981506227e-06, "loss": 3.9557, "step": 14325 }, { "epoch": 4.772216207212459, "grad_norm": 0.74609375, "learning_rate": 8.41280156563295e-06, "loss": 4.0296, "step": 14326 }, { "epoch": 4.772549346214708, "grad_norm": 0.71484375, "learning_rate": 8.411998139127511e-06, "loss": 4.016, "step": 14327 }, { "epoch": 4.7728824852169565, "grad_norm": 0.77734375, "learning_rate": 8.411194701999269e-06, "loss": 3.9916, "step": 14328 }, { "epoch": 4.773215624219206, "grad_norm": 0.7109375, "learning_rate": 8.41039125425758e-06, "loss": 3.9702, "step": 14329 }, { "epoch": 4.773548763221454, "grad_norm": 0.734375, "learning_rate": 8.40958779591181e-06, "loss": 3.9707, "step": 14330 }, { "epoch": 4.773881902223703, "grad_norm": 0.74609375, "learning_rate": 8.408784326971307e-06, "loss": 4.03, "step": 14331 }, { "epoch": 4.774215041225951, "grad_norm": 0.71484375, "learning_rate": 8.407980847445437e-06, "loss": 4.0421, "step": 14332 }, { "epoch": 4.774548180228201, "grad_norm": 0.70703125, "learning_rate": 8.407177357343555e-06, "loss": 4.0207, "step": 14333 }, { "epoch": 4.774881319230449, "grad_norm": 0.69140625, "learning_rate": 8.406373856675024e-06, "loss": 4.0461, "step": 14334 }, { "epoch": 4.775214458232698, "grad_norm": 0.7421875, "learning_rate": 8.405570345449204e-06, "loss": 3.9944, "step": 14335 }, { "epoch": 4.775547597234946, "grad_norm": 0.74609375, "learning_rate": 8.404766823675444e-06, "loss": 3.9643, "step": 14336 }, { "epoch": 4.775880736237195, "grad_norm": 0.7734375, "learning_rate": 8.403963291363113e-06, "loss": 3.9535, "step": 14337 }, { "epoch": 4.776213875239444, "grad_norm": 0.75390625, "learning_rate": 8.403159748521568e-06, "loss": 3.9518, "step": 14338 }, { "epoch": 4.776547014241692, "grad_norm": 0.75, "learning_rate": 8.402356195160171e-06, "loss": 3.9447, "step": 14339 }, { "epoch": 4.776880153243941, "grad_norm": 0.73828125, "learning_rate": 8.401552631288275e-06, "loss": 3.9453, "step": 14340 }, { "epoch": 4.77721329224619, "grad_norm": 0.7265625, "learning_rate": 8.400749056915245e-06, "loss": 3.9887, "step": 14341 }, { "epoch": 4.777546431248439, "grad_norm": 0.71484375, "learning_rate": 8.399945472050439e-06, "loss": 3.9861, "step": 14342 }, { "epoch": 4.777879570250687, "grad_norm": 0.73828125, "learning_rate": 8.399141876703218e-06, "loss": 4.0076, "step": 14343 }, { "epoch": 4.778212709252935, "grad_norm": 0.75390625, "learning_rate": 8.39833827088294e-06, "loss": 4.0574, "step": 14344 }, { "epoch": 4.778545848255185, "grad_norm": 0.74609375, "learning_rate": 8.39753465459897e-06, "loss": 4.0165, "step": 14345 }, { "epoch": 4.778878987257433, "grad_norm": 0.69921875, "learning_rate": 8.396731027860664e-06, "loss": 4.0424, "step": 14346 }, { "epoch": 4.779212126259682, "grad_norm": 0.6953125, "learning_rate": 8.395927390677383e-06, "loss": 4.0213, "step": 14347 }, { "epoch": 4.77954526526193, "grad_norm": 0.76953125, "learning_rate": 8.395123743058489e-06, "loss": 4.0301, "step": 14348 }, { "epoch": 4.7798784042641795, "grad_norm": 0.78125, "learning_rate": 8.394320085013343e-06, "loss": 3.9638, "step": 14349 }, { "epoch": 4.780211543266428, "grad_norm": 0.7265625, "learning_rate": 8.393516416551304e-06, "loss": 4.0523, "step": 14350 }, { "epoch": 4.780544682268676, "grad_norm": 0.7578125, "learning_rate": 8.392712737681732e-06, "loss": 4.0109, "step": 14351 }, { "epoch": 4.780877821270925, "grad_norm": 0.75, "learning_rate": 8.391909048413996e-06, "loss": 3.9331, "step": 14352 }, { "epoch": 4.781210960273174, "grad_norm": 0.734375, "learning_rate": 8.391105348757448e-06, "loss": 4.022, "step": 14353 }, { "epoch": 4.781544099275423, "grad_norm": 0.73828125, "learning_rate": 8.390301638721453e-06, "loss": 3.967, "step": 14354 }, { "epoch": 4.781877238277671, "grad_norm": 0.6640625, "learning_rate": 8.389497918315371e-06, "loss": 3.967, "step": 14355 }, { "epoch": 4.78221037727992, "grad_norm": 0.7109375, "learning_rate": 8.388694187548567e-06, "loss": 4.0378, "step": 14356 }, { "epoch": 4.782543516282169, "grad_norm": 0.69921875, "learning_rate": 8.3878904464304e-06, "loss": 4.043, "step": 14357 }, { "epoch": 4.782876655284418, "grad_norm": 0.765625, "learning_rate": 8.387086694970231e-06, "loss": 4.0067, "step": 14358 }, { "epoch": 4.783209794286666, "grad_norm": 0.73828125, "learning_rate": 8.386282933177427e-06, "loss": 4.0262, "step": 14359 }, { "epoch": 4.783542933288915, "grad_norm": 0.671875, "learning_rate": 8.385479161061342e-06, "loss": 4.0282, "step": 14360 }, { "epoch": 4.7838760722911635, "grad_norm": 0.7421875, "learning_rate": 8.384675378631347e-06, "loss": 3.9684, "step": 14361 }, { "epoch": 4.784209211293412, "grad_norm": 0.734375, "learning_rate": 8.383871585896796e-06, "loss": 4.0201, "step": 14362 }, { "epoch": 4.784542350295661, "grad_norm": 0.71875, "learning_rate": 8.383067782867058e-06, "loss": 3.9244, "step": 14363 }, { "epoch": 4.784875489297909, "grad_norm": 0.73828125, "learning_rate": 8.382263969551492e-06, "loss": 4.0051, "step": 14364 }, { "epoch": 4.7852086283001585, "grad_norm": 0.74609375, "learning_rate": 8.381460145959462e-06, "loss": 3.9942, "step": 14365 }, { "epoch": 4.785541767302407, "grad_norm": 0.734375, "learning_rate": 8.380656312100332e-06, "loss": 4.0377, "step": 14366 }, { "epoch": 4.785874906304656, "grad_norm": 0.69921875, "learning_rate": 8.379852467983463e-06, "loss": 3.9821, "step": 14367 }, { "epoch": 4.786208045306904, "grad_norm": 0.71875, "learning_rate": 8.379048613618216e-06, "loss": 4.0561, "step": 14368 }, { "epoch": 4.7865411843091525, "grad_norm": 0.7890625, "learning_rate": 8.378244749013958e-06, "loss": 3.9651, "step": 14369 }, { "epoch": 4.786874323311402, "grad_norm": 0.69140625, "learning_rate": 8.377440874180053e-06, "loss": 4.0202, "step": 14370 }, { "epoch": 4.78720746231365, "grad_norm": 0.75, "learning_rate": 8.37663698912586e-06, "loss": 3.8811, "step": 14371 }, { "epoch": 4.787540601315899, "grad_norm": 0.73046875, "learning_rate": 8.375833093860746e-06, "loss": 4.0125, "step": 14372 }, { "epoch": 4.7878737403181475, "grad_norm": 0.73046875, "learning_rate": 8.375029188394073e-06, "loss": 3.9953, "step": 14373 }, { "epoch": 4.788206879320397, "grad_norm": 0.73828125, "learning_rate": 8.374225272735207e-06, "loss": 3.9793, "step": 14374 }, { "epoch": 4.788540018322645, "grad_norm": 0.703125, "learning_rate": 8.373421346893508e-06, "loss": 3.9882, "step": 14375 }, { "epoch": 4.788873157324894, "grad_norm": 0.76953125, "learning_rate": 8.372617410878342e-06, "loss": 3.9471, "step": 14376 }, { "epoch": 4.7892062963271425, "grad_norm": 0.71875, "learning_rate": 8.371813464699074e-06, "loss": 4.0043, "step": 14377 }, { "epoch": 4.789539435329392, "grad_norm": 0.76171875, "learning_rate": 8.371009508365066e-06, "loss": 3.9009, "step": 14378 }, { "epoch": 4.78987257433164, "grad_norm": 0.75390625, "learning_rate": 8.370205541885686e-06, "loss": 3.9531, "step": 14379 }, { "epoch": 4.790205713333888, "grad_norm": 0.7265625, "learning_rate": 8.369401565270296e-06, "loss": 4.0396, "step": 14380 }, { "epoch": 4.790538852336137, "grad_norm": 0.73828125, "learning_rate": 8.36859757852826e-06, "loss": 3.9613, "step": 14381 }, { "epoch": 4.790871991338386, "grad_norm": 0.69140625, "learning_rate": 8.367793581668943e-06, "loss": 4.0177, "step": 14382 }, { "epoch": 4.791205130340635, "grad_norm": 0.734375, "learning_rate": 8.366989574701714e-06, "loss": 3.9701, "step": 14383 }, { "epoch": 4.791538269342883, "grad_norm": 0.73828125, "learning_rate": 8.36618555763593e-06, "loss": 3.9766, "step": 14384 }, { "epoch": 4.791871408345132, "grad_norm": 0.7578125, "learning_rate": 8.365381530480964e-06, "loss": 4.0373, "step": 14385 }, { "epoch": 4.792204547347381, "grad_norm": 0.7109375, "learning_rate": 8.364577493246174e-06, "loss": 4.0032, "step": 14386 }, { "epoch": 4.792537686349629, "grad_norm": 0.71484375, "learning_rate": 8.363773445940931e-06, "loss": 3.9274, "step": 14387 }, { "epoch": 4.792870825351878, "grad_norm": 0.765625, "learning_rate": 8.362969388574597e-06, "loss": 3.9805, "step": 14388 }, { "epoch": 4.793203964354126, "grad_norm": 0.73828125, "learning_rate": 8.362165321156542e-06, "loss": 4.043, "step": 14389 }, { "epoch": 4.793537103356376, "grad_norm": 0.7734375, "learning_rate": 8.361361243696125e-06, "loss": 3.9412, "step": 14390 }, { "epoch": 4.793870242358624, "grad_norm": 0.7265625, "learning_rate": 8.360557156202716e-06, "loss": 4.0287, "step": 14391 }, { "epoch": 4.794203381360873, "grad_norm": 0.7421875, "learning_rate": 8.359753058685682e-06, "loss": 4.036, "step": 14392 }, { "epoch": 4.794536520363121, "grad_norm": 0.78515625, "learning_rate": 8.358948951154383e-06, "loss": 4.0564, "step": 14393 }, { "epoch": 4.794869659365371, "grad_norm": 0.71484375, "learning_rate": 8.358144833618193e-06, "loss": 3.9985, "step": 14394 }, { "epoch": 4.795202798367619, "grad_norm": 0.73828125, "learning_rate": 8.357340706086473e-06, "loss": 3.9646, "step": 14395 }, { "epoch": 4.795535937369868, "grad_norm": 0.71484375, "learning_rate": 8.356536568568591e-06, "loss": 3.9613, "step": 14396 }, { "epoch": 4.795869076372116, "grad_norm": 0.75, "learning_rate": 8.355732421073913e-06, "loss": 4.0263, "step": 14397 }, { "epoch": 4.796202215374365, "grad_norm": 0.7265625, "learning_rate": 8.354928263611807e-06, "loss": 3.9843, "step": 14398 }, { "epoch": 4.796535354376614, "grad_norm": 0.703125, "learning_rate": 8.354124096191636e-06, "loss": 3.976, "step": 14399 }, { "epoch": 4.796868493378862, "grad_norm": 0.69921875, "learning_rate": 8.353319918822771e-06, "loss": 4.0244, "step": 14400 }, { "epoch": 4.797201632381111, "grad_norm": 0.76171875, "learning_rate": 8.352515731514578e-06, "loss": 3.9897, "step": 14401 }, { "epoch": 4.79753477138336, "grad_norm": 0.75390625, "learning_rate": 8.351711534276421e-06, "loss": 4.0786, "step": 14402 }, { "epoch": 4.797867910385609, "grad_norm": 0.73828125, "learning_rate": 8.350907327117672e-06, "loss": 3.9784, "step": 14403 }, { "epoch": 4.798201049387857, "grad_norm": 0.71484375, "learning_rate": 8.350103110047693e-06, "loss": 3.9808, "step": 14404 }, { "epoch": 4.798534188390105, "grad_norm": 0.6796875, "learning_rate": 8.349298883075855e-06, "loss": 3.9768, "step": 14405 }, { "epoch": 4.7988673273923546, "grad_norm": 0.7578125, "learning_rate": 8.348494646211525e-06, "loss": 3.9878, "step": 14406 }, { "epoch": 4.799200466394603, "grad_norm": 0.7265625, "learning_rate": 8.347690399464073e-06, "loss": 3.9769, "step": 14407 }, { "epoch": 4.799533605396852, "grad_norm": 0.75390625, "learning_rate": 8.34688614284286e-06, "loss": 3.9943, "step": 14408 }, { "epoch": 4.7998667443991, "grad_norm": 0.71875, "learning_rate": 8.34608187635726e-06, "loss": 4.0981, "step": 14409 }, { "epoch": 4.8001998834013495, "grad_norm": 0.71875, "learning_rate": 8.345277600016637e-06, "loss": 4.0599, "step": 14410 }, { "epoch": 4.800533022403598, "grad_norm": 0.7109375, "learning_rate": 8.344473313830363e-06, "loss": 4.0166, "step": 14411 }, { "epoch": 4.800866161405846, "grad_norm": 0.69921875, "learning_rate": 8.343669017807802e-06, "loss": 3.9893, "step": 14412 }, { "epoch": 4.801199300408095, "grad_norm": 0.7578125, "learning_rate": 8.342864711958325e-06, "loss": 3.9864, "step": 14413 }, { "epoch": 4.801532439410344, "grad_norm": 0.74609375, "learning_rate": 8.342060396291298e-06, "loss": 4.0292, "step": 14414 }, { "epoch": 4.801865578412593, "grad_norm": 0.75390625, "learning_rate": 8.341256070816094e-06, "loss": 3.9696, "step": 14415 }, { "epoch": 4.802198717414841, "grad_norm": 0.73828125, "learning_rate": 8.340451735542078e-06, "loss": 4.0661, "step": 14416 }, { "epoch": 4.80253185641709, "grad_norm": 0.74609375, "learning_rate": 8.33964739047862e-06, "loss": 4.0819, "step": 14417 }, { "epoch": 4.8028649954193385, "grad_norm": 0.7109375, "learning_rate": 8.338843035635088e-06, "loss": 4.016, "step": 14418 }, { "epoch": 4.803198134421588, "grad_norm": 0.7421875, "learning_rate": 8.338038671020852e-06, "loss": 4.0355, "step": 14419 }, { "epoch": 4.803531273423836, "grad_norm": 0.75390625, "learning_rate": 8.337234296645282e-06, "loss": 4.0557, "step": 14420 }, { "epoch": 4.803864412426085, "grad_norm": 0.72265625, "learning_rate": 8.336429912517742e-06, "loss": 3.9739, "step": 14421 }, { "epoch": 4.8041975514283335, "grad_norm": 0.78515625, "learning_rate": 8.335625518647608e-06, "loss": 3.9916, "step": 14422 }, { "epoch": 4.804530690430582, "grad_norm": 0.73828125, "learning_rate": 8.334821115044245e-06, "loss": 3.9612, "step": 14423 }, { "epoch": 4.804863829432831, "grad_norm": 0.73828125, "learning_rate": 8.334016701717026e-06, "loss": 3.9725, "step": 14424 }, { "epoch": 4.805196968435079, "grad_norm": 0.73828125, "learning_rate": 8.33321227867532e-06, "loss": 3.9134, "step": 14425 }, { "epoch": 4.8055301074373284, "grad_norm": 0.75, "learning_rate": 8.332407845928493e-06, "loss": 4.0153, "step": 14426 }, { "epoch": 4.805863246439577, "grad_norm": 0.73828125, "learning_rate": 8.33160340348592e-06, "loss": 4.0108, "step": 14427 }, { "epoch": 4.806196385441826, "grad_norm": 0.765625, "learning_rate": 8.330798951356967e-06, "loss": 3.9318, "step": 14428 }, { "epoch": 4.806529524444074, "grad_norm": 0.73046875, "learning_rate": 8.329994489551006e-06, "loss": 3.9994, "step": 14429 }, { "epoch": 4.8068626634463225, "grad_norm": 0.7265625, "learning_rate": 8.329190018077405e-06, "loss": 4.033, "step": 14430 }, { "epoch": 4.807195802448572, "grad_norm": 0.75390625, "learning_rate": 8.32838553694554e-06, "loss": 3.9361, "step": 14431 }, { "epoch": 4.80752894145082, "grad_norm": 0.68359375, "learning_rate": 8.327581046164776e-06, "loss": 3.9909, "step": 14432 }, { "epoch": 4.807862080453069, "grad_norm": 0.72265625, "learning_rate": 8.326776545744487e-06, "loss": 4.046, "step": 14433 }, { "epoch": 4.8081952194553175, "grad_norm": 0.734375, "learning_rate": 8.325972035694037e-06, "loss": 4.0061, "step": 14434 }, { "epoch": 4.808528358457567, "grad_norm": 0.79296875, "learning_rate": 8.325167516022808e-06, "loss": 3.958, "step": 14435 }, { "epoch": 4.808861497459815, "grad_norm": 0.72265625, "learning_rate": 8.32436298674016e-06, "loss": 3.9926, "step": 14436 }, { "epoch": 4.809194636462064, "grad_norm": 0.71484375, "learning_rate": 8.323558447855472e-06, "loss": 3.9397, "step": 14437 }, { "epoch": 4.809527775464312, "grad_norm": 0.7578125, "learning_rate": 8.32275389937811e-06, "loss": 4.0936, "step": 14438 }, { "epoch": 4.809860914466562, "grad_norm": 0.7578125, "learning_rate": 8.321949341317447e-06, "loss": 4.0358, "step": 14439 }, { "epoch": 4.81019405346881, "grad_norm": 0.734375, "learning_rate": 8.321144773682856e-06, "loss": 3.9964, "step": 14440 }, { "epoch": 4.810527192471058, "grad_norm": 0.76953125, "learning_rate": 8.320340196483705e-06, "loss": 3.9918, "step": 14441 }, { "epoch": 4.810860331473307, "grad_norm": 0.7734375, "learning_rate": 8.319535609729372e-06, "loss": 4.0708, "step": 14442 }, { "epoch": 4.811193470475556, "grad_norm": 0.703125, "learning_rate": 8.31873101342922e-06, "loss": 3.965, "step": 14443 }, { "epoch": 4.811526609477805, "grad_norm": 0.75390625, "learning_rate": 8.317926407592625e-06, "loss": 3.9881, "step": 14444 }, { "epoch": 4.811859748480053, "grad_norm": 0.7578125, "learning_rate": 8.317121792228961e-06, "loss": 3.9591, "step": 14445 }, { "epoch": 4.812192887482302, "grad_norm": 0.73046875, "learning_rate": 8.316317167347598e-06, "loss": 3.9632, "step": 14446 }, { "epoch": 4.812526026484551, "grad_norm": 0.7421875, "learning_rate": 8.315512532957907e-06, "loss": 4.0456, "step": 14447 }, { "epoch": 4.812859165486799, "grad_norm": 0.7578125, "learning_rate": 8.314707889069259e-06, "loss": 4.0391, "step": 14448 }, { "epoch": 4.813192304489048, "grad_norm": 0.7421875, "learning_rate": 8.313903235691032e-06, "loss": 3.9596, "step": 14449 }, { "epoch": 4.813525443491296, "grad_norm": 0.73828125, "learning_rate": 8.313098572832595e-06, "loss": 3.9606, "step": 14450 }, { "epoch": 4.813858582493546, "grad_norm": 0.75, "learning_rate": 8.312293900503323e-06, "loss": 4.0468, "step": 14451 }, { "epoch": 4.814191721495794, "grad_norm": 0.7109375, "learning_rate": 8.311489218712582e-06, "loss": 4.0567, "step": 14452 }, { "epoch": 4.814524860498043, "grad_norm": 0.76953125, "learning_rate": 8.310684527469752e-06, "loss": 3.976, "step": 14453 }, { "epoch": 4.814857999500291, "grad_norm": 0.7578125, "learning_rate": 8.3098798267842e-06, "loss": 4.0094, "step": 14454 }, { "epoch": 4.8151911385025405, "grad_norm": 0.6953125, "learning_rate": 8.309075116665309e-06, "loss": 4.046, "step": 14455 }, { "epoch": 4.815524277504789, "grad_norm": 0.70703125, "learning_rate": 8.308270397122438e-06, "loss": 4.0039, "step": 14456 }, { "epoch": 4.815857416507038, "grad_norm": 0.76953125, "learning_rate": 8.307465668164973e-06, "loss": 3.9565, "step": 14457 }, { "epoch": 4.816190555509286, "grad_norm": 0.76171875, "learning_rate": 8.306660929802278e-06, "loss": 3.9844, "step": 14458 }, { "epoch": 4.816523694511535, "grad_norm": 0.76953125, "learning_rate": 8.305856182043732e-06, "loss": 4.0209, "step": 14459 }, { "epoch": 4.816856833513784, "grad_norm": 0.76953125, "learning_rate": 8.305051424898708e-06, "loss": 4.0334, "step": 14460 }, { "epoch": 4.817189972516032, "grad_norm": 0.7421875, "learning_rate": 8.304246658376575e-06, "loss": 4.0851, "step": 14461 }, { "epoch": 4.817523111518281, "grad_norm": 0.74609375, "learning_rate": 8.303441882486715e-06, "loss": 4.0358, "step": 14462 }, { "epoch": 4.81785625052053, "grad_norm": 0.734375, "learning_rate": 8.302637097238493e-06, "loss": 3.9955, "step": 14463 }, { "epoch": 4.818189389522779, "grad_norm": 0.81640625, "learning_rate": 8.301832302641292e-06, "loss": 3.998, "step": 14464 }, { "epoch": 4.818522528525027, "grad_norm": 0.71484375, "learning_rate": 8.30102749870448e-06, "loss": 4.002, "step": 14465 }, { "epoch": 4.818855667527275, "grad_norm": 0.734375, "learning_rate": 8.30022268543743e-06, "loss": 3.9622, "step": 14466 }, { "epoch": 4.8191888065295245, "grad_norm": 0.71484375, "learning_rate": 8.29941786284952e-06, "loss": 4.0362, "step": 14467 }, { "epoch": 4.819521945531773, "grad_norm": 0.69140625, "learning_rate": 8.298613030950126e-06, "loss": 4.0155, "step": 14468 }, { "epoch": 4.819855084534022, "grad_norm": 0.7421875, "learning_rate": 8.297808189748617e-06, "loss": 3.9619, "step": 14469 }, { "epoch": 4.82018822353627, "grad_norm": 0.7109375, "learning_rate": 8.29700333925437e-06, "loss": 3.9396, "step": 14470 }, { "epoch": 4.8205213625385195, "grad_norm": 0.734375, "learning_rate": 8.296198479476764e-06, "loss": 3.9814, "step": 14471 }, { "epoch": 4.820854501540768, "grad_norm": 0.7578125, "learning_rate": 8.295393610425165e-06, "loss": 3.9516, "step": 14472 }, { "epoch": 4.821187640543017, "grad_norm": 0.73046875, "learning_rate": 8.294588732108961e-06, "loss": 4.0046, "step": 14473 }, { "epoch": 4.821520779545265, "grad_norm": 0.72265625, "learning_rate": 8.293783844537514e-06, "loss": 4.0723, "step": 14474 }, { "epoch": 4.821853918547514, "grad_norm": 0.7265625, "learning_rate": 8.292978947720206e-06, "loss": 3.9742, "step": 14475 }, { "epoch": 4.822187057549763, "grad_norm": 0.74609375, "learning_rate": 8.292174041666408e-06, "loss": 4.045, "step": 14476 }, { "epoch": 4.822520196552011, "grad_norm": 0.73828125, "learning_rate": 8.291369126385502e-06, "loss": 3.9891, "step": 14477 }, { "epoch": 4.82285333555426, "grad_norm": 0.71875, "learning_rate": 8.29056420188686e-06, "loss": 3.9659, "step": 14478 }, { "epoch": 4.8231864745565085, "grad_norm": 0.734375, "learning_rate": 8.289759268179855e-06, "loss": 3.998, "step": 14479 }, { "epoch": 4.823519613558758, "grad_norm": 0.7265625, "learning_rate": 8.288954325273866e-06, "loss": 4.0345, "step": 14480 }, { "epoch": 4.823852752561006, "grad_norm": 0.75, "learning_rate": 8.288149373178268e-06, "loss": 3.9126, "step": 14481 }, { "epoch": 4.824185891563255, "grad_norm": 0.69921875, "learning_rate": 8.287344411902437e-06, "loss": 3.9695, "step": 14482 }, { "epoch": 4.8245190305655035, "grad_norm": 0.703125, "learning_rate": 8.286539441455748e-06, "loss": 3.9177, "step": 14483 }, { "epoch": 4.824852169567752, "grad_norm": 0.73828125, "learning_rate": 8.28573446184758e-06, "loss": 3.9244, "step": 14484 }, { "epoch": 4.825185308570001, "grad_norm": 0.72265625, "learning_rate": 8.284929473087305e-06, "loss": 4.0049, "step": 14485 }, { "epoch": 4.825518447572249, "grad_norm": 0.75, "learning_rate": 8.284124475184304e-06, "loss": 4.0074, "step": 14486 }, { "epoch": 4.825851586574498, "grad_norm": 0.73046875, "learning_rate": 8.283319468147952e-06, "loss": 4.0606, "step": 14487 }, { "epoch": 4.826184725576747, "grad_norm": 0.77734375, "learning_rate": 8.282514451987622e-06, "loss": 3.9969, "step": 14488 }, { "epoch": 4.826517864578996, "grad_norm": 0.6875, "learning_rate": 8.281709426712693e-06, "loss": 3.9722, "step": 14489 }, { "epoch": 4.826851003581244, "grad_norm": 0.74609375, "learning_rate": 8.280904392332546e-06, "loss": 3.9899, "step": 14490 }, { "epoch": 4.8271841425834925, "grad_norm": 0.72265625, "learning_rate": 8.280099348856555e-06, "loss": 3.9889, "step": 14491 }, { "epoch": 4.827517281585742, "grad_norm": 0.78125, "learning_rate": 8.279294296294093e-06, "loss": 4.0039, "step": 14492 }, { "epoch": 4.82785042058799, "grad_norm": 0.76171875, "learning_rate": 8.27848923465454e-06, "loss": 4.0004, "step": 14493 }, { "epoch": 4.828183559590239, "grad_norm": 0.7421875, "learning_rate": 8.277684163947274e-06, "loss": 4.0058, "step": 14494 }, { "epoch": 4.828516698592487, "grad_norm": 0.765625, "learning_rate": 8.276879084181673e-06, "loss": 4.0528, "step": 14495 }, { "epoch": 4.828849837594737, "grad_norm": 0.7578125, "learning_rate": 8.276073995367115e-06, "loss": 4.0236, "step": 14496 }, { "epoch": 4.829182976596985, "grad_norm": 0.71875, "learning_rate": 8.275268897512975e-06, "loss": 4.0703, "step": 14497 }, { "epoch": 4.829516115599234, "grad_norm": 0.71875, "learning_rate": 8.27446379062863e-06, "loss": 4.0823, "step": 14498 }, { "epoch": 4.829849254601482, "grad_norm": 0.73828125, "learning_rate": 8.27365867472346e-06, "loss": 3.8844, "step": 14499 }, { "epoch": 4.830182393603732, "grad_norm": 0.7265625, "learning_rate": 8.272853549806843e-06, "loss": 3.9728, "step": 14500 }, { "epoch": 4.83051553260598, "grad_norm": 0.71484375, "learning_rate": 8.272048415888155e-06, "loss": 3.9813, "step": 14501 }, { "epoch": 4.830848671608228, "grad_norm": 0.70703125, "learning_rate": 8.271243272976773e-06, "loss": 3.9782, "step": 14502 }, { "epoch": 4.831181810610477, "grad_norm": 0.76953125, "learning_rate": 8.270438121082083e-06, "loss": 3.9743, "step": 14503 }, { "epoch": 4.831514949612726, "grad_norm": 0.76171875, "learning_rate": 8.269632960213455e-06, "loss": 4.0222, "step": 14504 }, { "epoch": 4.831848088614975, "grad_norm": 0.81640625, "learning_rate": 8.268827790380268e-06, "loss": 4.0311, "step": 14505 }, { "epoch": 4.832181227617223, "grad_norm": 0.75390625, "learning_rate": 8.268022611591904e-06, "loss": 4.0134, "step": 14506 }, { "epoch": 4.832514366619472, "grad_norm": 0.7421875, "learning_rate": 8.267217423857739e-06, "loss": 3.9783, "step": 14507 }, { "epoch": 4.832847505621721, "grad_norm": 0.7265625, "learning_rate": 8.266412227187154e-06, "loss": 3.954, "step": 14508 }, { "epoch": 4.833180644623969, "grad_norm": 0.76171875, "learning_rate": 8.265607021589528e-06, "loss": 3.947, "step": 14509 }, { "epoch": 4.833513783626218, "grad_norm": 0.7265625, "learning_rate": 8.264801807074238e-06, "loss": 3.9711, "step": 14510 }, { "epoch": 4.833846922628466, "grad_norm": 0.796875, "learning_rate": 8.263996583650661e-06, "loss": 3.9447, "step": 14511 }, { "epoch": 4.834180061630716, "grad_norm": 0.7265625, "learning_rate": 8.26319135132818e-06, "loss": 4.0827, "step": 14512 }, { "epoch": 4.834513200632964, "grad_norm": 0.7578125, "learning_rate": 8.262386110116174e-06, "loss": 3.9683, "step": 14513 }, { "epoch": 4.834846339635213, "grad_norm": 0.7265625, "learning_rate": 8.26158086002402e-06, "loss": 3.9883, "step": 14514 }, { "epoch": 4.835179478637461, "grad_norm": 0.77734375, "learning_rate": 8.260775601061099e-06, "loss": 3.9814, "step": 14515 }, { "epoch": 4.8355126176397105, "grad_norm": 0.7421875, "learning_rate": 8.259970333236788e-06, "loss": 4.0554, "step": 14516 }, { "epoch": 4.835845756641959, "grad_norm": 0.7734375, "learning_rate": 8.259165056560472e-06, "loss": 3.8549, "step": 14517 }, { "epoch": 4.836178895644208, "grad_norm": 0.7734375, "learning_rate": 8.258359771041527e-06, "loss": 4.0201, "step": 14518 }, { "epoch": 4.836512034646456, "grad_norm": 0.73828125, "learning_rate": 8.257554476689332e-06, "loss": 4.057, "step": 14519 }, { "epoch": 4.836845173648705, "grad_norm": 0.73046875, "learning_rate": 8.256749173513269e-06, "loss": 3.9174, "step": 14520 }, { "epoch": 4.837178312650954, "grad_norm": 0.7109375, "learning_rate": 8.255943861522717e-06, "loss": 3.9933, "step": 14521 }, { "epoch": 4.837511451653202, "grad_norm": 0.765625, "learning_rate": 8.255138540727055e-06, "loss": 3.9918, "step": 14522 }, { "epoch": 4.837844590655451, "grad_norm": 0.73046875, "learning_rate": 8.254333211135666e-06, "loss": 3.9653, "step": 14523 }, { "epoch": 4.8381777296576995, "grad_norm": 0.75, "learning_rate": 8.25352787275793e-06, "loss": 4.0489, "step": 14524 }, { "epoch": 4.838510868659949, "grad_norm": 0.734375, "learning_rate": 8.252722525603224e-06, "loss": 3.9455, "step": 14525 }, { "epoch": 4.838844007662197, "grad_norm": 0.703125, "learning_rate": 8.251917169680933e-06, "loss": 4.0173, "step": 14526 }, { "epoch": 4.839177146664445, "grad_norm": 0.77734375, "learning_rate": 8.251111805000433e-06, "loss": 3.934, "step": 14527 }, { "epoch": 4.8395102856666945, "grad_norm": 0.796875, "learning_rate": 8.250306431571111e-06, "loss": 3.9838, "step": 14528 }, { "epoch": 4.839843424668943, "grad_norm": 0.7109375, "learning_rate": 8.249501049402342e-06, "loss": 4.0615, "step": 14529 }, { "epoch": 4.840176563671192, "grad_norm": 0.7578125, "learning_rate": 8.24869565850351e-06, "loss": 3.9694, "step": 14530 }, { "epoch": 4.84050970267344, "grad_norm": 0.73046875, "learning_rate": 8.247890258883994e-06, "loss": 3.9917, "step": 14531 }, { "epoch": 4.8408428416756895, "grad_norm": 0.71875, "learning_rate": 8.247084850553178e-06, "loss": 3.9865, "step": 14532 }, { "epoch": 4.841175980677938, "grad_norm": 0.76171875, "learning_rate": 8.246279433520441e-06, "loss": 3.9536, "step": 14533 }, { "epoch": 4.841509119680187, "grad_norm": 0.7578125, "learning_rate": 8.245474007795164e-06, "loss": 4.0178, "step": 14534 }, { "epoch": 4.841842258682435, "grad_norm": 0.74609375, "learning_rate": 8.24466857338673e-06, "loss": 4.0624, "step": 14535 }, { "epoch": 4.842175397684684, "grad_norm": 0.74609375, "learning_rate": 8.243863130304521e-06, "loss": 3.9789, "step": 14536 }, { "epoch": 4.842508536686933, "grad_norm": 0.75, "learning_rate": 8.243057678557918e-06, "loss": 3.9573, "step": 14537 }, { "epoch": 4.842841675689181, "grad_norm": 0.765625, "learning_rate": 8.2422522181563e-06, "loss": 3.9785, "step": 14538 }, { "epoch": 4.84317481469143, "grad_norm": 0.765625, "learning_rate": 8.241446749109055e-06, "loss": 3.9622, "step": 14539 }, { "epoch": 4.8435079536936785, "grad_norm": 0.75390625, "learning_rate": 8.24064127142556e-06, "loss": 4.0852, "step": 14540 }, { "epoch": 4.843841092695928, "grad_norm": 0.70703125, "learning_rate": 8.239835785115199e-06, "loss": 3.9834, "step": 14541 }, { "epoch": 4.844174231698176, "grad_norm": 0.68359375, "learning_rate": 8.23903029018735e-06, "loss": 4.0276, "step": 14542 }, { "epoch": 4.844507370700425, "grad_norm": 0.76171875, "learning_rate": 8.238224786651401e-06, "loss": 4.0466, "step": 14543 }, { "epoch": 4.844840509702673, "grad_norm": 0.7265625, "learning_rate": 8.237419274516733e-06, "loss": 3.9883, "step": 14544 }, { "epoch": 4.845173648704922, "grad_norm": 0.765625, "learning_rate": 8.236613753792728e-06, "loss": 4.0506, "step": 14545 }, { "epoch": 4.845506787707171, "grad_norm": 0.76953125, "learning_rate": 8.235808224488768e-06, "loss": 4.0398, "step": 14546 }, { "epoch": 4.845839926709419, "grad_norm": 0.73828125, "learning_rate": 8.235002686614233e-06, "loss": 3.9293, "step": 14547 }, { "epoch": 4.846173065711668, "grad_norm": 0.70703125, "learning_rate": 8.234197140178513e-06, "loss": 3.9961, "step": 14548 }, { "epoch": 4.846506204713917, "grad_norm": 0.73046875, "learning_rate": 8.233391585190986e-06, "loss": 4.0168, "step": 14549 }, { "epoch": 4.846839343716166, "grad_norm": 0.73046875, "learning_rate": 8.232586021661034e-06, "loss": 3.9963, "step": 14550 }, { "epoch": 4.847172482718414, "grad_norm": 0.7578125, "learning_rate": 8.231780449598041e-06, "loss": 4.0311, "step": 14551 }, { "epoch": 4.8475056217206625, "grad_norm": 0.75, "learning_rate": 8.230974869011392e-06, "loss": 3.9885, "step": 14552 }, { "epoch": 4.847838760722912, "grad_norm": 0.71875, "learning_rate": 8.230169279910467e-06, "loss": 3.9778, "step": 14553 }, { "epoch": 4.84817189972516, "grad_norm": 0.78125, "learning_rate": 8.229363682304656e-06, "loss": 4.061, "step": 14554 }, { "epoch": 4.848505038727409, "grad_norm": 0.72265625, "learning_rate": 8.228558076203334e-06, "loss": 3.9495, "step": 14555 }, { "epoch": 4.848838177729657, "grad_norm": 0.70703125, "learning_rate": 8.227752461615892e-06, "loss": 4.0147, "step": 14556 }, { "epoch": 4.849171316731907, "grad_norm": 0.7109375, "learning_rate": 8.226946838551705e-06, "loss": 3.9926, "step": 14557 }, { "epoch": 4.849504455734155, "grad_norm": 0.71875, "learning_rate": 8.226141207020164e-06, "loss": 3.9849, "step": 14558 }, { "epoch": 4.849837594736404, "grad_norm": 0.71875, "learning_rate": 8.225335567030653e-06, "loss": 4.0611, "step": 14559 }, { "epoch": 4.850170733738652, "grad_norm": 0.69140625, "learning_rate": 8.22452991859255e-06, "loss": 4.023, "step": 14560 }, { "epoch": 4.850503872740902, "grad_norm": 0.74609375, "learning_rate": 8.223724261715247e-06, "loss": 4.014, "step": 14561 }, { "epoch": 4.85083701174315, "grad_norm": 0.71484375, "learning_rate": 8.22291859640812e-06, "loss": 4.0221, "step": 14562 }, { "epoch": 4.851170150745398, "grad_norm": 0.75390625, "learning_rate": 8.222112922680562e-06, "loss": 4.0238, "step": 14563 }, { "epoch": 4.851503289747647, "grad_norm": 0.72265625, "learning_rate": 8.221307240541949e-06, "loss": 4.0162, "step": 14564 }, { "epoch": 4.851836428749896, "grad_norm": 0.69921875, "learning_rate": 8.220501550001669e-06, "loss": 4.0835, "step": 14565 }, { "epoch": 4.852169567752145, "grad_norm": 0.734375, "learning_rate": 8.219695851069107e-06, "loss": 4.0192, "step": 14566 }, { "epoch": 4.852502706754393, "grad_norm": 0.73046875, "learning_rate": 8.218890143753647e-06, "loss": 3.9825, "step": 14567 }, { "epoch": 4.852835845756642, "grad_norm": 0.78125, "learning_rate": 8.218084428064676e-06, "loss": 3.9408, "step": 14568 }, { "epoch": 4.853168984758891, "grad_norm": 0.73828125, "learning_rate": 8.217278704011573e-06, "loss": 4.0227, "step": 14569 }, { "epoch": 4.853502123761139, "grad_norm": 0.71484375, "learning_rate": 8.216472971603728e-06, "loss": 3.9612, "step": 14570 }, { "epoch": 4.853835262763388, "grad_norm": 0.7421875, "learning_rate": 8.215667230850525e-06, "loss": 4.0226, "step": 14571 }, { "epoch": 4.854168401765636, "grad_norm": 0.74609375, "learning_rate": 8.214861481761352e-06, "loss": 4.0339, "step": 14572 }, { "epoch": 4.8545015407678855, "grad_norm": 0.7421875, "learning_rate": 8.214055724345588e-06, "loss": 3.9214, "step": 14573 }, { "epoch": 4.854834679770134, "grad_norm": 0.76171875, "learning_rate": 8.213249958612621e-06, "loss": 3.9426, "step": 14574 }, { "epoch": 4.855167818772383, "grad_norm": 0.71875, "learning_rate": 8.212444184571837e-06, "loss": 4.0351, "step": 14575 }, { "epoch": 4.855500957774631, "grad_norm": 0.734375, "learning_rate": 8.211638402232625e-06, "loss": 3.9453, "step": 14576 }, { "epoch": 4.8558340967768805, "grad_norm": 0.6953125, "learning_rate": 8.210832611604365e-06, "loss": 4.0426, "step": 14577 }, { "epoch": 4.856167235779129, "grad_norm": 0.73828125, "learning_rate": 8.210026812696443e-06, "loss": 3.9765, "step": 14578 }, { "epoch": 4.856500374781378, "grad_norm": 0.74609375, "learning_rate": 8.209221005518247e-06, "loss": 3.9294, "step": 14579 }, { "epoch": 4.856833513783626, "grad_norm": 0.72265625, "learning_rate": 8.208415190079162e-06, "loss": 3.9568, "step": 14580 }, { "epoch": 4.857166652785875, "grad_norm": 0.70703125, "learning_rate": 8.207609366388578e-06, "loss": 4.0566, "step": 14581 }, { "epoch": 4.857499791788124, "grad_norm": 0.73828125, "learning_rate": 8.206803534455873e-06, "loss": 3.9608, "step": 14582 }, { "epoch": 4.857832930790372, "grad_norm": 0.7265625, "learning_rate": 8.205997694290441e-06, "loss": 3.9249, "step": 14583 }, { "epoch": 4.858166069792621, "grad_norm": 0.76953125, "learning_rate": 8.205191845901663e-06, "loss": 4.0055, "step": 14584 }, { "epoch": 4.8584992087948695, "grad_norm": 0.76171875, "learning_rate": 8.204385989298929e-06, "loss": 3.9309, "step": 14585 }, { "epoch": 4.858832347797119, "grad_norm": 0.79296875, "learning_rate": 8.203580124491623e-06, "loss": 4.0048, "step": 14586 }, { "epoch": 4.859165486799367, "grad_norm": 0.69921875, "learning_rate": 8.202774251489133e-06, "loss": 4.016, "step": 14587 }, { "epoch": 4.859498625801615, "grad_norm": 0.7265625, "learning_rate": 8.201968370300842e-06, "loss": 4.0025, "step": 14588 }, { "epoch": 4.8598317648038645, "grad_norm": 0.76171875, "learning_rate": 8.201162480936146e-06, "loss": 4.0567, "step": 14589 }, { "epoch": 4.860164903806113, "grad_norm": 0.75390625, "learning_rate": 8.200356583404422e-06, "loss": 3.9544, "step": 14590 }, { "epoch": 4.860498042808362, "grad_norm": 0.69140625, "learning_rate": 8.199550677715061e-06, "loss": 4.0215, "step": 14591 }, { "epoch": 4.86083118181061, "grad_norm": 0.7421875, "learning_rate": 8.198744763877451e-06, "loss": 4.0095, "step": 14592 }, { "epoch": 4.861164320812859, "grad_norm": 0.73046875, "learning_rate": 8.197938841900975e-06, "loss": 3.9409, "step": 14593 }, { "epoch": 4.861497459815108, "grad_norm": 0.69921875, "learning_rate": 8.197132911795029e-06, "loss": 3.9748, "step": 14594 }, { "epoch": 4.861830598817357, "grad_norm": 0.71484375, "learning_rate": 8.19632697356899e-06, "loss": 3.9683, "step": 14595 }, { "epoch": 4.862163737819605, "grad_norm": 0.72265625, "learning_rate": 8.19552102723225e-06, "loss": 3.977, "step": 14596 }, { "epoch": 4.862496876821854, "grad_norm": 0.71484375, "learning_rate": 8.194715072794199e-06, "loss": 4.0267, "step": 14597 }, { "epoch": 4.862830015824103, "grad_norm": 0.7421875, "learning_rate": 8.193909110264219e-06, "loss": 4.0659, "step": 14598 }, { "epoch": 4.863163154826351, "grad_norm": 0.703125, "learning_rate": 8.193103139651704e-06, "loss": 4.082, "step": 14599 }, { "epoch": 4.8634962938286, "grad_norm": 0.7265625, "learning_rate": 8.192297160966038e-06, "loss": 4.0154, "step": 14600 }, { "epoch": 4.8638294328308485, "grad_norm": 0.72265625, "learning_rate": 8.191491174216606e-06, "loss": 3.9833, "step": 14601 }, { "epoch": 4.864162571833098, "grad_norm": 0.7421875, "learning_rate": 8.190685179412805e-06, "loss": 3.9494, "step": 14602 }, { "epoch": 4.864495710835346, "grad_norm": 0.71484375, "learning_rate": 8.189879176564015e-06, "loss": 4.0623, "step": 14603 }, { "epoch": 4.864828849837595, "grad_norm": 0.73046875, "learning_rate": 8.189073165679626e-06, "loss": 3.9893, "step": 14604 }, { "epoch": 4.865161988839843, "grad_norm": 0.734375, "learning_rate": 8.18826714676903e-06, "loss": 3.9569, "step": 14605 }, { "epoch": 4.865495127842092, "grad_norm": 0.73046875, "learning_rate": 8.187461119841609e-06, "loss": 4.0452, "step": 14606 }, { "epoch": 4.865828266844341, "grad_norm": 0.70703125, "learning_rate": 8.186655084906756e-06, "loss": 4.0091, "step": 14607 }, { "epoch": 4.866161405846589, "grad_norm": 0.765625, "learning_rate": 8.18584904197386e-06, "loss": 4.0222, "step": 14608 }, { "epoch": 4.866494544848838, "grad_norm": 0.7265625, "learning_rate": 8.185042991052308e-06, "loss": 3.9866, "step": 14609 }, { "epoch": 4.866827683851087, "grad_norm": 0.74609375, "learning_rate": 8.184236932151487e-06, "loss": 4.0304, "step": 14610 }, { "epoch": 4.867160822853336, "grad_norm": 0.7109375, "learning_rate": 8.18343086528079e-06, "loss": 3.9285, "step": 14611 }, { "epoch": 4.867493961855584, "grad_norm": 0.76171875, "learning_rate": 8.182624790449604e-06, "loss": 3.9726, "step": 14612 }, { "epoch": 4.867827100857832, "grad_norm": 0.71484375, "learning_rate": 8.181818707667314e-06, "loss": 3.9456, "step": 14613 }, { "epoch": 4.868160239860082, "grad_norm": 0.6875, "learning_rate": 8.181012616943316e-06, "loss": 4.0497, "step": 14614 }, { "epoch": 4.868493378862331, "grad_norm": 0.71875, "learning_rate": 8.180206518286995e-06, "loss": 4.0044, "step": 14615 }, { "epoch": 4.868826517864579, "grad_norm": 0.6953125, "learning_rate": 8.179400411707743e-06, "loss": 4.0071, "step": 14616 }, { "epoch": 4.869159656866827, "grad_norm": 0.73046875, "learning_rate": 8.178594297214947e-06, "loss": 3.974, "step": 14617 }, { "epoch": 4.869492795869077, "grad_norm": 0.6953125, "learning_rate": 8.177788174817997e-06, "loss": 3.9598, "step": 14618 }, { "epoch": 4.869825934871325, "grad_norm": 0.71484375, "learning_rate": 8.176982044526283e-06, "loss": 4.045, "step": 14619 }, { "epoch": 4.870159073873574, "grad_norm": 0.74609375, "learning_rate": 8.176175906349194e-06, "loss": 3.9196, "step": 14620 }, { "epoch": 4.870492212875822, "grad_norm": 0.70703125, "learning_rate": 8.175369760296121e-06, "loss": 4.0419, "step": 14621 }, { "epoch": 4.8708253518780715, "grad_norm": 0.8203125, "learning_rate": 8.174563606376454e-06, "loss": 3.9585, "step": 14622 }, { "epoch": 4.87115849088032, "grad_norm": 0.71484375, "learning_rate": 8.17375744459958e-06, "loss": 4.0548, "step": 14623 }, { "epoch": 4.871491629882568, "grad_norm": 0.73046875, "learning_rate": 8.172951274974893e-06, "loss": 4.0799, "step": 14624 }, { "epoch": 4.871824768884817, "grad_norm": 0.7421875, "learning_rate": 8.172145097511779e-06, "loss": 3.9686, "step": 14625 }, { "epoch": 4.872157907887066, "grad_norm": 0.71484375, "learning_rate": 8.171338912219632e-06, "loss": 3.9864, "step": 14626 }, { "epoch": 4.872491046889315, "grad_norm": 0.7265625, "learning_rate": 8.170532719107841e-06, "loss": 4.0109, "step": 14627 }, { "epoch": 4.872824185891563, "grad_norm": 0.6953125, "learning_rate": 8.169726518185796e-06, "loss": 3.9924, "step": 14628 }, { "epoch": 4.873157324893812, "grad_norm": 0.7265625, "learning_rate": 8.168920309462888e-06, "loss": 4.0187, "step": 14629 }, { "epoch": 4.8734904638960606, "grad_norm": 0.80078125, "learning_rate": 8.168114092948505e-06, "loss": 3.9445, "step": 14630 }, { "epoch": 4.873823602898309, "grad_norm": 0.71875, "learning_rate": 8.167307868652043e-06, "loss": 3.9154, "step": 14631 }, { "epoch": 4.874156741900558, "grad_norm": 0.734375, "learning_rate": 8.166501636582888e-06, "loss": 4.0881, "step": 14632 }, { "epoch": 4.874489880902806, "grad_norm": 0.71875, "learning_rate": 8.165695396750432e-06, "loss": 4.0013, "step": 14633 }, { "epoch": 4.8748230199050555, "grad_norm": 0.7421875, "learning_rate": 8.16488914916407e-06, "loss": 4.0902, "step": 14634 }, { "epoch": 4.875156158907304, "grad_norm": 0.74609375, "learning_rate": 8.164082893833184e-06, "loss": 4.0111, "step": 14635 }, { "epoch": 4.875489297909553, "grad_norm": 0.72265625, "learning_rate": 8.163276630767175e-06, "loss": 3.981, "step": 14636 }, { "epoch": 4.875822436911801, "grad_norm": 0.75, "learning_rate": 8.162470359975428e-06, "loss": 4.0, "step": 14637 }, { "epoch": 4.8761555759140505, "grad_norm": 0.78125, "learning_rate": 8.161664081467337e-06, "loss": 4.0307, "step": 14638 }, { "epoch": 4.876488714916299, "grad_norm": 0.7578125, "learning_rate": 8.160857795252293e-06, "loss": 4.0705, "step": 14639 }, { "epoch": 4.876821853918548, "grad_norm": 0.7421875, "learning_rate": 8.160051501339687e-06, "loss": 4.0225, "step": 14640 }, { "epoch": 4.877154992920796, "grad_norm": 0.734375, "learning_rate": 8.159245199738908e-06, "loss": 3.9641, "step": 14641 }, { "epoch": 4.8774881319230445, "grad_norm": 0.7109375, "learning_rate": 8.158438890459354e-06, "loss": 4.046, "step": 14642 }, { "epoch": 4.877821270925294, "grad_norm": 0.75390625, "learning_rate": 8.157632573510412e-06, "loss": 3.9868, "step": 14643 }, { "epoch": 4.878154409927542, "grad_norm": 0.75, "learning_rate": 8.156826248901476e-06, "loss": 3.9365, "step": 14644 }, { "epoch": 4.878487548929791, "grad_norm": 0.74609375, "learning_rate": 8.156019916641935e-06, "loss": 4.0222, "step": 14645 }, { "epoch": 4.8788206879320395, "grad_norm": 0.72265625, "learning_rate": 8.155213576741185e-06, "loss": 4.0351, "step": 14646 }, { "epoch": 4.879153826934289, "grad_norm": 0.70703125, "learning_rate": 8.154407229208613e-06, "loss": 4.0307, "step": 14647 }, { "epoch": 4.879486965936537, "grad_norm": 0.71875, "learning_rate": 8.153600874053614e-06, "loss": 4.0149, "step": 14648 }, { "epoch": 4.879820104938785, "grad_norm": 0.72265625, "learning_rate": 8.152794511285583e-06, "loss": 3.9688, "step": 14649 }, { "epoch": 4.8801532439410344, "grad_norm": 0.70703125, "learning_rate": 8.151988140913909e-06, "loss": 4.0301, "step": 14650 }, { "epoch": 4.880486382943283, "grad_norm": 0.7421875, "learning_rate": 8.151181762947985e-06, "loss": 4.0764, "step": 14651 }, { "epoch": 4.880819521945532, "grad_norm": 0.76171875, "learning_rate": 8.150375377397206e-06, "loss": 4.0461, "step": 14652 }, { "epoch": 4.88115266094778, "grad_norm": 0.74609375, "learning_rate": 8.149568984270962e-06, "loss": 3.9774, "step": 14653 }, { "epoch": 4.881485799950029, "grad_norm": 0.73046875, "learning_rate": 8.148762583578643e-06, "loss": 3.9882, "step": 14654 }, { "epoch": 4.881818938952278, "grad_norm": 0.73046875, "learning_rate": 8.147956175329647e-06, "loss": 4.0132, "step": 14655 }, { "epoch": 4.882152077954527, "grad_norm": 0.74609375, "learning_rate": 8.147149759533366e-06, "loss": 4.0075, "step": 14656 }, { "epoch": 4.882485216956775, "grad_norm": 0.75, "learning_rate": 8.146343336199192e-06, "loss": 3.9697, "step": 14657 }, { "epoch": 4.882818355959024, "grad_norm": 0.80078125, "learning_rate": 8.145536905336517e-06, "loss": 4.0375, "step": 14658 }, { "epoch": 4.883151494961273, "grad_norm": 0.7578125, "learning_rate": 8.144730466954736e-06, "loss": 3.9379, "step": 14659 }, { "epoch": 4.883484633963521, "grad_norm": 0.71875, "learning_rate": 8.14392402106324e-06, "loss": 4.0424, "step": 14660 }, { "epoch": 4.88381777296577, "grad_norm": 0.78515625, "learning_rate": 8.143117567671427e-06, "loss": 4.0322, "step": 14661 }, { "epoch": 4.884150911968018, "grad_norm": 0.81640625, "learning_rate": 8.142311106788686e-06, "loss": 3.9181, "step": 14662 }, { "epoch": 4.884484050970268, "grad_norm": 0.72265625, "learning_rate": 8.14150463842441e-06, "loss": 3.9262, "step": 14663 }, { "epoch": 4.884817189972516, "grad_norm": 0.7421875, "learning_rate": 8.140698162587997e-06, "loss": 4.07, "step": 14664 }, { "epoch": 4.885150328974765, "grad_norm": 0.765625, "learning_rate": 8.139891679288836e-06, "loss": 4.0082, "step": 14665 }, { "epoch": 4.885483467977013, "grad_norm": 0.70703125, "learning_rate": 8.139085188536326e-06, "loss": 3.9799, "step": 14666 }, { "epoch": 4.885816606979262, "grad_norm": 0.75, "learning_rate": 8.138278690339856e-06, "loss": 3.9784, "step": 14667 }, { "epoch": 4.886149745981511, "grad_norm": 0.73828125, "learning_rate": 8.137472184708822e-06, "loss": 4.0096, "step": 14668 }, { "epoch": 4.886482884983759, "grad_norm": 0.73828125, "learning_rate": 8.136665671652614e-06, "loss": 3.9831, "step": 14669 }, { "epoch": 4.886816023986008, "grad_norm": 0.76953125, "learning_rate": 8.135859151180635e-06, "loss": 3.9493, "step": 14670 }, { "epoch": 4.887149162988257, "grad_norm": 0.7890625, "learning_rate": 8.135052623302272e-06, "loss": 3.881, "step": 14671 }, { "epoch": 4.887482301990506, "grad_norm": 0.7109375, "learning_rate": 8.134246088026922e-06, "loss": 3.9489, "step": 14672 }, { "epoch": 4.887815440992754, "grad_norm": 0.70703125, "learning_rate": 8.13343954536398e-06, "loss": 3.9459, "step": 14673 }, { "epoch": 4.888148579995003, "grad_norm": 0.7265625, "learning_rate": 8.132632995322836e-06, "loss": 3.9862, "step": 14674 }, { "epoch": 4.888481718997252, "grad_norm": 0.76953125, "learning_rate": 8.131826437912891e-06, "loss": 3.966, "step": 14675 }, { "epoch": 4.888814857999501, "grad_norm": 0.73046875, "learning_rate": 8.131019873143535e-06, "loss": 4.0083, "step": 14676 }, { "epoch": 4.889147997001749, "grad_norm": 0.71484375, "learning_rate": 8.130213301024165e-06, "loss": 4.0063, "step": 14677 }, { "epoch": 4.889481136003997, "grad_norm": 0.796875, "learning_rate": 8.129406721564176e-06, "loss": 4.0491, "step": 14678 }, { "epoch": 4.8898142750062465, "grad_norm": 0.7109375, "learning_rate": 8.12860013477296e-06, "loss": 3.9889, "step": 14679 }, { "epoch": 4.890147414008495, "grad_norm": 0.76171875, "learning_rate": 8.127793540659915e-06, "loss": 4.0442, "step": 14680 }, { "epoch": 4.890480553010744, "grad_norm": 0.72265625, "learning_rate": 8.126986939234434e-06, "loss": 4.0862, "step": 14681 }, { "epoch": 4.890813692012992, "grad_norm": 0.71484375, "learning_rate": 8.126180330505914e-06, "loss": 3.9823, "step": 14682 }, { "epoch": 4.8911468310152415, "grad_norm": 0.74609375, "learning_rate": 8.12537371448375e-06, "loss": 4.0074, "step": 14683 }, { "epoch": 4.89147997001749, "grad_norm": 0.70703125, "learning_rate": 8.124567091177338e-06, "loss": 3.9959, "step": 14684 }, { "epoch": 4.891813109019738, "grad_norm": 0.7265625, "learning_rate": 8.12376046059607e-06, "loss": 3.9645, "step": 14685 }, { "epoch": 4.892146248021987, "grad_norm": 0.7421875, "learning_rate": 8.122953822749342e-06, "loss": 4.03, "step": 14686 }, { "epoch": 4.892479387024236, "grad_norm": 0.7578125, "learning_rate": 8.122147177646555e-06, "loss": 4.0322, "step": 14687 }, { "epoch": 4.892812526026485, "grad_norm": 0.765625, "learning_rate": 8.1213405252971e-06, "loss": 3.9975, "step": 14688 }, { "epoch": 4.893145665028733, "grad_norm": 0.71875, "learning_rate": 8.120533865710373e-06, "loss": 4.0434, "step": 14689 }, { "epoch": 4.893478804030982, "grad_norm": 0.69921875, "learning_rate": 8.11972719889577e-06, "loss": 3.9564, "step": 14690 }, { "epoch": 4.8938119430332305, "grad_norm": 0.73046875, "learning_rate": 8.118920524862688e-06, "loss": 4.0319, "step": 14691 }, { "epoch": 4.894145082035479, "grad_norm": 0.76953125, "learning_rate": 8.118113843620523e-06, "loss": 3.94, "step": 14692 }, { "epoch": 4.894478221037728, "grad_norm": 0.71484375, "learning_rate": 8.11730715517867e-06, "loss": 3.9946, "step": 14693 }, { "epoch": 4.894811360039976, "grad_norm": 0.765625, "learning_rate": 8.116500459546527e-06, "loss": 3.9374, "step": 14694 }, { "epoch": 4.8951444990422255, "grad_norm": 0.734375, "learning_rate": 8.115693756733487e-06, "loss": 3.9838, "step": 14695 }, { "epoch": 4.895477638044474, "grad_norm": 0.76953125, "learning_rate": 8.114887046748948e-06, "loss": 3.9639, "step": 14696 }, { "epoch": 4.895810777046723, "grad_norm": 0.73046875, "learning_rate": 8.114080329602312e-06, "loss": 4.024, "step": 14697 }, { "epoch": 4.896143916048971, "grad_norm": 0.71875, "learning_rate": 8.113273605302963e-06, "loss": 4.0159, "step": 14698 }, { "epoch": 4.89647705505122, "grad_norm": 0.78515625, "learning_rate": 8.11246687386031e-06, "loss": 4.0187, "step": 14699 }, { "epoch": 4.896810194053469, "grad_norm": 0.76171875, "learning_rate": 8.111660135283741e-06, "loss": 3.9385, "step": 14700 }, { "epoch": 4.897143333055718, "grad_norm": 0.7109375, "learning_rate": 8.110853389582659e-06, "loss": 3.9685, "step": 14701 }, { "epoch": 4.897476472057966, "grad_norm": 0.6953125, "learning_rate": 8.110046636766457e-06, "loss": 4.0187, "step": 14702 }, { "epoch": 4.8978096110602145, "grad_norm": 0.6640625, "learning_rate": 8.109239876844531e-06, "loss": 4.0018, "step": 14703 }, { "epoch": 4.898142750062464, "grad_norm": 0.7109375, "learning_rate": 8.108433109826284e-06, "loss": 3.9842, "step": 14704 }, { "epoch": 4.898475889064712, "grad_norm": 0.765625, "learning_rate": 8.107626335721105e-06, "loss": 4.0065, "step": 14705 }, { "epoch": 4.898809028066961, "grad_norm": 0.7421875, "learning_rate": 8.1068195545384e-06, "loss": 4.0082, "step": 14706 }, { "epoch": 4.8991421670692095, "grad_norm": 0.69921875, "learning_rate": 8.106012766287557e-06, "loss": 3.9966, "step": 14707 }, { "epoch": 4.899475306071459, "grad_norm": 0.71875, "learning_rate": 8.105205970977981e-06, "loss": 3.9945, "step": 14708 }, { "epoch": 4.899808445073707, "grad_norm": 0.79296875, "learning_rate": 8.104399168619064e-06, "loss": 3.9827, "step": 14709 }, { "epoch": 4.900141584075955, "grad_norm": 0.72265625, "learning_rate": 8.103592359220208e-06, "loss": 3.9967, "step": 14710 }, { "epoch": 4.900474723078204, "grad_norm": 0.79296875, "learning_rate": 8.102785542790807e-06, "loss": 4.0049, "step": 14711 }, { "epoch": 4.900807862080453, "grad_norm": 0.69921875, "learning_rate": 8.101978719340263e-06, "loss": 4.0511, "step": 14712 }, { "epoch": 4.901141001082702, "grad_norm": 0.796875, "learning_rate": 8.101171888877968e-06, "loss": 3.9555, "step": 14713 }, { "epoch": 4.90147414008495, "grad_norm": 0.7109375, "learning_rate": 8.100365051413322e-06, "loss": 4.0165, "step": 14714 }, { "epoch": 4.901807279087199, "grad_norm": 0.70703125, "learning_rate": 8.099558206955724e-06, "loss": 3.9468, "step": 14715 }, { "epoch": 4.902140418089448, "grad_norm": 0.7109375, "learning_rate": 8.098751355514572e-06, "loss": 3.997, "step": 14716 }, { "epoch": 4.902473557091697, "grad_norm": 0.75390625, "learning_rate": 8.097944497099264e-06, "loss": 4.006, "step": 14717 }, { "epoch": 4.902806696093945, "grad_norm": 0.7265625, "learning_rate": 8.097137631719195e-06, "loss": 4.0102, "step": 14718 }, { "epoch": 4.903139835096194, "grad_norm": 0.75390625, "learning_rate": 8.09633075938377e-06, "loss": 3.9659, "step": 14719 }, { "epoch": 4.903472974098443, "grad_norm": 0.6953125, "learning_rate": 8.095523880102381e-06, "loss": 4.0519, "step": 14720 }, { "epoch": 4.903806113100691, "grad_norm": 0.7109375, "learning_rate": 8.094716993884431e-06, "loss": 4.0366, "step": 14721 }, { "epoch": 4.90413925210294, "grad_norm": 0.73046875, "learning_rate": 8.093910100739314e-06, "loss": 3.9721, "step": 14722 }, { "epoch": 4.904472391105188, "grad_norm": 0.68359375, "learning_rate": 8.09310320067643e-06, "loss": 4.0084, "step": 14723 }, { "epoch": 4.904805530107438, "grad_norm": 0.72265625, "learning_rate": 8.09229629370518e-06, "loss": 4.0724, "step": 14724 }, { "epoch": 4.905138669109686, "grad_norm": 0.7421875, "learning_rate": 8.091489379834961e-06, "loss": 4.0201, "step": 14725 }, { "epoch": 4.905471808111935, "grad_norm": 0.72265625, "learning_rate": 8.090682459075172e-06, "loss": 3.9698, "step": 14726 }, { "epoch": 4.905804947114183, "grad_norm": 0.7265625, "learning_rate": 8.08987553143521e-06, "loss": 3.9949, "step": 14727 }, { "epoch": 4.906138086116432, "grad_norm": 0.6953125, "learning_rate": 8.089068596924478e-06, "loss": 4.0281, "step": 14728 }, { "epoch": 4.906471225118681, "grad_norm": 0.71484375, "learning_rate": 8.088261655552373e-06, "loss": 3.9555, "step": 14729 }, { "epoch": 4.906804364120929, "grad_norm": 0.7265625, "learning_rate": 8.087454707328292e-06, "loss": 4.0026, "step": 14730 }, { "epoch": 4.907137503123178, "grad_norm": 0.6875, "learning_rate": 8.086647752261636e-06, "loss": 4.0806, "step": 14731 }, { "epoch": 4.907470642125427, "grad_norm": 0.76171875, "learning_rate": 8.085840790361806e-06, "loss": 3.9836, "step": 14732 }, { "epoch": 4.907803781127676, "grad_norm": 0.75, "learning_rate": 8.0850338216382e-06, "loss": 4.0527, "step": 14733 }, { "epoch": 4.908136920129924, "grad_norm": 0.703125, "learning_rate": 8.084226846100217e-06, "loss": 4.0171, "step": 14734 }, { "epoch": 4.908470059132173, "grad_norm": 0.7734375, "learning_rate": 8.083419863757254e-06, "loss": 3.9238, "step": 14735 }, { "epoch": 4.908803198134422, "grad_norm": 0.73046875, "learning_rate": 8.082612874618715e-06, "loss": 4.0061, "step": 14736 }, { "epoch": 4.909136337136671, "grad_norm": 0.7734375, "learning_rate": 8.081805878693998e-06, "loss": 4.0214, "step": 14737 }, { "epoch": 4.909469476138919, "grad_norm": 0.71484375, "learning_rate": 8.080998875992503e-06, "loss": 4.056, "step": 14738 }, { "epoch": 4.909802615141167, "grad_norm": 0.75390625, "learning_rate": 8.08019186652363e-06, "loss": 4.0433, "step": 14739 }, { "epoch": 4.9101357541434165, "grad_norm": 0.70703125, "learning_rate": 8.079384850296777e-06, "loss": 3.962, "step": 14740 }, { "epoch": 4.910468893145665, "grad_norm": 0.75, "learning_rate": 8.078577827321345e-06, "loss": 4.0461, "step": 14741 }, { "epoch": 4.910802032147914, "grad_norm": 0.6875, "learning_rate": 8.077770797606737e-06, "loss": 3.9933, "step": 14742 }, { "epoch": 4.911135171150162, "grad_norm": 0.7734375, "learning_rate": 8.07696376116235e-06, "loss": 3.9724, "step": 14743 }, { "epoch": 4.9114683101524115, "grad_norm": 0.78125, "learning_rate": 8.076156717997583e-06, "loss": 3.9975, "step": 14744 }, { "epoch": 4.91180144915466, "grad_norm": 0.703125, "learning_rate": 8.075349668121841e-06, "loss": 3.9876, "step": 14745 }, { "epoch": 4.912134588156908, "grad_norm": 0.7265625, "learning_rate": 8.074542611544519e-06, "loss": 3.9348, "step": 14746 }, { "epoch": 4.912467727159157, "grad_norm": 0.6953125, "learning_rate": 8.07373554827502e-06, "loss": 4.0467, "step": 14747 }, { "epoch": 4.9128008661614055, "grad_norm": 0.73828125, "learning_rate": 8.072928478322746e-06, "loss": 4.0373, "step": 14748 }, { "epoch": 4.913134005163655, "grad_norm": 0.7265625, "learning_rate": 8.072121401697096e-06, "loss": 4.0665, "step": 14749 }, { "epoch": 4.913467144165903, "grad_norm": 0.74609375, "learning_rate": 8.071314318407471e-06, "loss": 4.0312, "step": 14750 }, { "epoch": 4.913800283168152, "grad_norm": 0.75390625, "learning_rate": 8.070507228463272e-06, "loss": 4.0674, "step": 14751 }, { "epoch": 4.9141334221704005, "grad_norm": 0.73046875, "learning_rate": 8.069700131873899e-06, "loss": 4.0802, "step": 14752 }, { "epoch": 4.914466561172649, "grad_norm": 0.7578125, "learning_rate": 8.068893028648752e-06, "loss": 3.9828, "step": 14753 }, { "epoch": 4.914799700174898, "grad_norm": 0.83203125, "learning_rate": 8.068085918797235e-06, "loss": 3.9634, "step": 14754 }, { "epoch": 4.915132839177147, "grad_norm": 0.75390625, "learning_rate": 8.067278802328749e-06, "loss": 3.9212, "step": 14755 }, { "epoch": 4.9154659781793955, "grad_norm": 0.76171875, "learning_rate": 8.06647167925269e-06, "loss": 3.9498, "step": 14756 }, { "epoch": 4.915799117181644, "grad_norm": 0.734375, "learning_rate": 8.065664549578464e-06, "loss": 3.9403, "step": 14757 }, { "epoch": 4.916132256183893, "grad_norm": 0.78515625, "learning_rate": 8.064857413315473e-06, "loss": 3.9592, "step": 14758 }, { "epoch": 4.916465395186141, "grad_norm": 0.71875, "learning_rate": 8.064050270473113e-06, "loss": 3.9982, "step": 14759 }, { "epoch": 4.91679853418839, "grad_norm": 0.765625, "learning_rate": 8.063243121060793e-06, "loss": 3.9898, "step": 14760 }, { "epoch": 4.917131673190639, "grad_norm": 0.73046875, "learning_rate": 8.06243596508791e-06, "loss": 4.0222, "step": 14761 }, { "epoch": 4.917464812192888, "grad_norm": 0.7421875, "learning_rate": 8.061628802563864e-06, "loss": 3.9915, "step": 14762 }, { "epoch": 4.917797951195136, "grad_norm": 0.75390625, "learning_rate": 8.060821633498062e-06, "loss": 4.009, "step": 14763 }, { "epoch": 4.9181310901973845, "grad_norm": 0.71875, "learning_rate": 8.060014457899903e-06, "loss": 4.0416, "step": 14764 }, { "epoch": 4.918464229199634, "grad_norm": 0.78515625, "learning_rate": 8.059207275778786e-06, "loss": 3.9415, "step": 14765 }, { "epoch": 4.918797368201882, "grad_norm": 0.7265625, "learning_rate": 8.058400087144116e-06, "loss": 4.0086, "step": 14766 }, { "epoch": 4.919130507204131, "grad_norm": 0.703125, "learning_rate": 8.057592892005295e-06, "loss": 4.007, "step": 14767 }, { "epoch": 4.919463646206379, "grad_norm": 0.7578125, "learning_rate": 8.056785690371721e-06, "loss": 3.9633, "step": 14768 }, { "epoch": 4.919796785208629, "grad_norm": 0.74609375, "learning_rate": 8.055978482252805e-06, "loss": 4.0238, "step": 14769 }, { "epoch": 4.920129924210877, "grad_norm": 0.7578125, "learning_rate": 8.055171267657942e-06, "loss": 4.0252, "step": 14770 }, { "epoch": 4.920463063213125, "grad_norm": 0.73046875, "learning_rate": 8.054364046596536e-06, "loss": 4.0429, "step": 14771 }, { "epoch": 4.920796202215374, "grad_norm": 0.70703125, "learning_rate": 8.05355681907799e-06, "loss": 3.9773, "step": 14772 }, { "epoch": 4.921129341217623, "grad_norm": 0.734375, "learning_rate": 8.052749585111706e-06, "loss": 4.0252, "step": 14773 }, { "epoch": 4.921462480219872, "grad_norm": 0.73828125, "learning_rate": 8.051942344707086e-06, "loss": 3.9933, "step": 14774 }, { "epoch": 4.92179561922212, "grad_norm": 0.7578125, "learning_rate": 8.051135097873533e-06, "loss": 4.0476, "step": 14775 }, { "epoch": 4.922128758224369, "grad_norm": 0.76171875, "learning_rate": 8.05032784462045e-06, "loss": 3.9806, "step": 14776 }, { "epoch": 4.922461897226618, "grad_norm": 0.74609375, "learning_rate": 8.04952058495724e-06, "loss": 4.0498, "step": 14777 }, { "epoch": 4.922795036228867, "grad_norm": 0.703125, "learning_rate": 8.048713318893307e-06, "loss": 3.9442, "step": 14778 }, { "epoch": 4.923128175231115, "grad_norm": 0.73046875, "learning_rate": 8.04790604643805e-06, "loss": 4.0254, "step": 14779 }, { "epoch": 4.923461314233364, "grad_norm": 0.7421875, "learning_rate": 8.047098767600877e-06, "loss": 3.9938, "step": 14780 }, { "epoch": 4.923794453235613, "grad_norm": 0.734375, "learning_rate": 8.046291482391184e-06, "loss": 3.9816, "step": 14781 }, { "epoch": 4.924127592237861, "grad_norm": 0.7578125, "learning_rate": 8.045484190818382e-06, "loss": 3.9705, "step": 14782 }, { "epoch": 4.92446073124011, "grad_norm": 0.73046875, "learning_rate": 8.044676892891869e-06, "loss": 3.9722, "step": 14783 }, { "epoch": 4.924793870242358, "grad_norm": 0.7421875, "learning_rate": 8.043869588621052e-06, "loss": 3.9529, "step": 14784 }, { "epoch": 4.925127009244608, "grad_norm": 0.73828125, "learning_rate": 8.04306227801533e-06, "loss": 3.9955, "step": 14785 }, { "epoch": 4.925460148246856, "grad_norm": 0.81640625, "learning_rate": 8.04225496108411e-06, "loss": 3.9219, "step": 14786 }, { "epoch": 4.925793287249105, "grad_norm": 0.72265625, "learning_rate": 8.041447637836795e-06, "loss": 4.0681, "step": 14787 }, { "epoch": 4.926126426251353, "grad_norm": 0.73828125, "learning_rate": 8.040640308282786e-06, "loss": 4.0247, "step": 14788 }, { "epoch": 4.926459565253602, "grad_norm": 0.734375, "learning_rate": 8.039832972431489e-06, "loss": 3.9579, "step": 14789 }, { "epoch": 4.926792704255851, "grad_norm": 0.6796875, "learning_rate": 8.039025630292306e-06, "loss": 4.1114, "step": 14790 }, { "epoch": 4.927125843258099, "grad_norm": 0.7421875, "learning_rate": 8.038218281874645e-06, "loss": 4.0462, "step": 14791 }, { "epoch": 4.927458982260348, "grad_norm": 0.73828125, "learning_rate": 8.037410927187905e-06, "loss": 3.9449, "step": 14792 }, { "epoch": 4.927792121262597, "grad_norm": 0.796875, "learning_rate": 8.03660356624149e-06, "loss": 3.9589, "step": 14793 }, { "epoch": 4.928125260264846, "grad_norm": 0.74609375, "learning_rate": 8.035796199044809e-06, "loss": 4.0261, "step": 14794 }, { "epoch": 4.928458399267094, "grad_norm": 0.77734375, "learning_rate": 8.03498882560726e-06, "loss": 3.9727, "step": 14795 }, { "epoch": 4.928791538269343, "grad_norm": 0.76171875, "learning_rate": 8.034181445938254e-06, "loss": 3.92, "step": 14796 }, { "epoch": 4.9291246772715915, "grad_norm": 0.77734375, "learning_rate": 8.033374060047186e-06, "loss": 3.9719, "step": 14797 }, { "epoch": 4.929457816273841, "grad_norm": 0.765625, "learning_rate": 8.032566667943469e-06, "loss": 3.9246, "step": 14798 }, { "epoch": 4.929790955276089, "grad_norm": 0.69140625, "learning_rate": 8.031759269636502e-06, "loss": 3.9935, "step": 14799 }, { "epoch": 4.930124094278337, "grad_norm": 0.6875, "learning_rate": 8.030951865135694e-06, "loss": 4.0609, "step": 14800 }, { "epoch": 4.9304572332805865, "grad_norm": 0.75, "learning_rate": 8.030144454450445e-06, "loss": 3.9561, "step": 14801 }, { "epoch": 4.930790372282835, "grad_norm": 0.765625, "learning_rate": 8.02933703759016e-06, "loss": 4.0038, "step": 14802 }, { "epoch": 4.931123511285084, "grad_norm": 0.79296875, "learning_rate": 8.028529614564245e-06, "loss": 4.0388, "step": 14803 }, { "epoch": 4.931456650287332, "grad_norm": 0.7109375, "learning_rate": 8.027722185382108e-06, "loss": 4.0598, "step": 14804 }, { "epoch": 4.9317897892895814, "grad_norm": 0.71875, "learning_rate": 8.02691475005315e-06, "loss": 3.9988, "step": 14805 }, { "epoch": 4.93212292829183, "grad_norm": 0.7890625, "learning_rate": 8.026107308586774e-06, "loss": 3.9549, "step": 14806 }, { "epoch": 4.932456067294078, "grad_norm": 0.72265625, "learning_rate": 8.025299860992389e-06, "loss": 4.0548, "step": 14807 }, { "epoch": 4.932789206296327, "grad_norm": 0.71484375, "learning_rate": 8.024492407279396e-06, "loss": 4.0268, "step": 14808 }, { "epoch": 4.9331223452985755, "grad_norm": 0.75390625, "learning_rate": 8.023684947457206e-06, "loss": 3.9315, "step": 14809 }, { "epoch": 4.933455484300825, "grad_norm": 0.6953125, "learning_rate": 8.02287748153522e-06, "loss": 3.9565, "step": 14810 }, { "epoch": 4.933788623303073, "grad_norm": 0.71875, "learning_rate": 8.022070009522843e-06, "loss": 3.9913, "step": 14811 }, { "epoch": 4.934121762305322, "grad_norm": 0.73046875, "learning_rate": 8.02126253142948e-06, "loss": 3.9953, "step": 14812 }, { "epoch": 4.9344549013075705, "grad_norm": 0.71484375, "learning_rate": 8.02045504726454e-06, "loss": 3.9938, "step": 14813 }, { "epoch": 4.93478804030982, "grad_norm": 0.76953125, "learning_rate": 8.019647557037425e-06, "loss": 3.9503, "step": 14814 }, { "epoch": 4.935121179312068, "grad_norm": 0.77734375, "learning_rate": 8.018840060757539e-06, "loss": 3.9741, "step": 14815 }, { "epoch": 4.935454318314317, "grad_norm": 0.7734375, "learning_rate": 8.018032558434293e-06, "loss": 4.0053, "step": 14816 }, { "epoch": 4.935787457316565, "grad_norm": 0.73046875, "learning_rate": 8.01722505007709e-06, "loss": 4.0397, "step": 14817 }, { "epoch": 4.936120596318814, "grad_norm": 0.74609375, "learning_rate": 8.016417535695335e-06, "loss": 4.0292, "step": 14818 }, { "epoch": 4.936453735321063, "grad_norm": 0.74609375, "learning_rate": 8.015610015298432e-06, "loss": 4.0268, "step": 14819 }, { "epoch": 4.936786874323311, "grad_norm": 0.72265625, "learning_rate": 8.014802488895793e-06, "loss": 3.9745, "step": 14820 }, { "epoch": 4.93712001332556, "grad_norm": 0.734375, "learning_rate": 8.013994956496818e-06, "loss": 3.9782, "step": 14821 }, { "epoch": 4.937453152327809, "grad_norm": 0.76171875, "learning_rate": 8.013187418110914e-06, "loss": 4.0877, "step": 14822 }, { "epoch": 4.937786291330058, "grad_norm": 0.75, "learning_rate": 8.012379873747491e-06, "loss": 3.9724, "step": 14823 }, { "epoch": 4.938119430332306, "grad_norm": 0.74609375, "learning_rate": 8.011572323415951e-06, "loss": 4.0059, "step": 14824 }, { "epoch": 4.9384525693345545, "grad_norm": 0.76171875, "learning_rate": 8.0107647671257e-06, "loss": 4.0507, "step": 14825 }, { "epoch": 4.938785708336804, "grad_norm": 0.765625, "learning_rate": 8.009957204886149e-06, "loss": 3.9625, "step": 14826 }, { "epoch": 4.939118847339052, "grad_norm": 0.7578125, "learning_rate": 8.009149636706698e-06, "loss": 4.0057, "step": 14827 }, { "epoch": 4.939451986341301, "grad_norm": 0.73046875, "learning_rate": 8.008342062596757e-06, "loss": 4.0275, "step": 14828 }, { "epoch": 4.939785125343549, "grad_norm": 0.76953125, "learning_rate": 8.007534482565733e-06, "loss": 3.966, "step": 14829 }, { "epoch": 4.940118264345799, "grad_norm": 0.71484375, "learning_rate": 8.00672689662303e-06, "loss": 4.0231, "step": 14830 }, { "epoch": 4.940451403348047, "grad_norm": 0.74609375, "learning_rate": 8.005919304778058e-06, "loss": 3.9526, "step": 14831 }, { "epoch": 4.940784542350295, "grad_norm": 0.6796875, "learning_rate": 8.005111707040223e-06, "loss": 3.9538, "step": 14832 }, { "epoch": 4.941117681352544, "grad_norm": 0.73828125, "learning_rate": 8.004304103418928e-06, "loss": 3.9756, "step": 14833 }, { "epoch": 4.941450820354793, "grad_norm": 0.765625, "learning_rate": 8.003496493923581e-06, "loss": 3.9387, "step": 14834 }, { "epoch": 4.941783959357042, "grad_norm": 0.70703125, "learning_rate": 8.002688878563596e-06, "loss": 3.9932, "step": 14835 }, { "epoch": 4.94211709835929, "grad_norm": 0.70703125, "learning_rate": 8.001881257348372e-06, "loss": 4.0094, "step": 14836 }, { "epoch": 4.942450237361539, "grad_norm": 0.73046875, "learning_rate": 8.001073630287316e-06, "loss": 3.9357, "step": 14837 }, { "epoch": 4.942783376363788, "grad_norm": 0.74609375, "learning_rate": 8.00026599738984e-06, "loss": 4.0083, "step": 14838 }, { "epoch": 4.943116515366037, "grad_norm": 0.71484375, "learning_rate": 7.999458358665346e-06, "loss": 4.0315, "step": 14839 }, { "epoch": 4.943449654368285, "grad_norm": 0.75, "learning_rate": 7.998650714123247e-06, "loss": 4.0357, "step": 14840 }, { "epoch": 4.943782793370534, "grad_norm": 0.7578125, "learning_rate": 7.997843063772947e-06, "loss": 3.9443, "step": 14841 }, { "epoch": 4.944115932372783, "grad_norm": 0.70703125, "learning_rate": 7.997035407623853e-06, "loss": 4.0176, "step": 14842 }, { "epoch": 4.944449071375031, "grad_norm": 0.7109375, "learning_rate": 7.996227745685372e-06, "loss": 4.0658, "step": 14843 }, { "epoch": 4.94478221037728, "grad_norm": 0.7265625, "learning_rate": 7.995420077966914e-06, "loss": 3.914, "step": 14844 }, { "epoch": 4.945115349379528, "grad_norm": 0.71875, "learning_rate": 7.994612404477884e-06, "loss": 3.9601, "step": 14845 }, { "epoch": 4.9454484883817775, "grad_norm": 0.74609375, "learning_rate": 7.993804725227694e-06, "loss": 3.9968, "step": 14846 }, { "epoch": 4.945781627384026, "grad_norm": 0.73828125, "learning_rate": 7.992997040225745e-06, "loss": 4.0683, "step": 14847 }, { "epoch": 4.946114766386275, "grad_norm": 0.70703125, "learning_rate": 7.992189349481448e-06, "loss": 3.9935, "step": 14848 }, { "epoch": 4.946447905388523, "grad_norm": 0.70703125, "learning_rate": 7.991381653004212e-06, "loss": 4.0111, "step": 14849 }, { "epoch": 4.946781044390772, "grad_norm": 0.76171875, "learning_rate": 7.990573950803445e-06, "loss": 3.9823, "step": 14850 }, { "epoch": 4.947114183393021, "grad_norm": 0.734375, "learning_rate": 7.989766242888556e-06, "loss": 3.9798, "step": 14851 }, { "epoch": 4.947447322395269, "grad_norm": 0.80078125, "learning_rate": 7.988958529268946e-06, "loss": 3.9433, "step": 14852 }, { "epoch": 4.947780461397518, "grad_norm": 0.71875, "learning_rate": 7.988150809954033e-06, "loss": 3.995, "step": 14853 }, { "epoch": 4.9481136003997666, "grad_norm": 0.70703125, "learning_rate": 7.987343084953218e-06, "loss": 3.9618, "step": 14854 }, { "epoch": 4.948446739402016, "grad_norm": 0.74609375, "learning_rate": 7.986535354275914e-06, "loss": 3.9678, "step": 14855 }, { "epoch": 4.948779878404264, "grad_norm": 0.7421875, "learning_rate": 7.985727617931524e-06, "loss": 3.9909, "step": 14856 }, { "epoch": 4.949113017406513, "grad_norm": 0.73828125, "learning_rate": 7.984919875929458e-06, "loss": 3.9441, "step": 14857 }, { "epoch": 4.9494461564087615, "grad_norm": 0.71484375, "learning_rate": 7.984112128279128e-06, "loss": 3.9843, "step": 14858 }, { "epoch": 4.949779295411011, "grad_norm": 0.76953125, "learning_rate": 7.98330437498994e-06, "loss": 4.0067, "step": 14859 }, { "epoch": 4.950112434413259, "grad_norm": 0.75, "learning_rate": 7.982496616071306e-06, "loss": 3.9403, "step": 14860 }, { "epoch": 4.950445573415507, "grad_norm": 0.69140625, "learning_rate": 7.981688851532626e-06, "loss": 3.9966, "step": 14861 }, { "epoch": 4.9507787124177565, "grad_norm": 0.6875, "learning_rate": 7.980881081383318e-06, "loss": 3.979, "step": 14862 }, { "epoch": 4.951111851420005, "grad_norm": 0.73828125, "learning_rate": 7.980073305632787e-06, "loss": 4.0429, "step": 14863 }, { "epoch": 4.951444990422254, "grad_norm": 0.73046875, "learning_rate": 7.97926552429044e-06, "loss": 3.949, "step": 14864 }, { "epoch": 4.951778129424502, "grad_norm": 0.74609375, "learning_rate": 7.978457737365688e-06, "loss": 4.0385, "step": 14865 }, { "epoch": 4.952111268426751, "grad_norm": 0.76171875, "learning_rate": 7.977649944867942e-06, "loss": 4.0227, "step": 14866 }, { "epoch": 4.952444407429, "grad_norm": 0.7265625, "learning_rate": 7.976842146806605e-06, "loss": 3.9402, "step": 14867 }, { "epoch": 4.952777546431248, "grad_norm": 0.72265625, "learning_rate": 7.976034343191093e-06, "loss": 3.9588, "step": 14868 }, { "epoch": 4.953110685433497, "grad_norm": 0.79296875, "learning_rate": 7.97522653403081e-06, "loss": 4.0001, "step": 14869 }, { "epoch": 4.9534438244357455, "grad_norm": 0.71875, "learning_rate": 7.974418719335168e-06, "loss": 3.9559, "step": 14870 }, { "epoch": 4.953776963437995, "grad_norm": 0.75390625, "learning_rate": 7.973610899113576e-06, "loss": 3.9911, "step": 14871 }, { "epoch": 4.954110102440243, "grad_norm": 0.70703125, "learning_rate": 7.972803073375443e-06, "loss": 4.0418, "step": 14872 }, { "epoch": 4.954443241442492, "grad_norm": 0.75390625, "learning_rate": 7.971995242130177e-06, "loss": 3.9407, "step": 14873 }, { "epoch": 4.9547763804447404, "grad_norm": 0.70703125, "learning_rate": 7.971187405387189e-06, "loss": 3.9223, "step": 14874 }, { "epoch": 4.95510951944699, "grad_norm": 0.734375, "learning_rate": 7.97037956315589e-06, "loss": 4.0145, "step": 14875 }, { "epoch": 4.955442658449238, "grad_norm": 0.71875, "learning_rate": 7.969571715445684e-06, "loss": 4.0329, "step": 14876 }, { "epoch": 4.955775797451487, "grad_norm": 0.72265625, "learning_rate": 7.96876386226599e-06, "loss": 4.0075, "step": 14877 }, { "epoch": 4.956108936453735, "grad_norm": 0.7578125, "learning_rate": 7.967956003626208e-06, "loss": 3.9983, "step": 14878 }, { "epoch": 4.956442075455984, "grad_norm": 0.73828125, "learning_rate": 7.967148139535754e-06, "loss": 4.0131, "step": 14879 }, { "epoch": 4.956775214458233, "grad_norm": 0.7578125, "learning_rate": 7.966340270004035e-06, "loss": 3.9364, "step": 14880 }, { "epoch": 4.957108353460481, "grad_norm": 0.76953125, "learning_rate": 7.965532395040464e-06, "loss": 3.9188, "step": 14881 }, { "epoch": 4.95744149246273, "grad_norm": 0.73046875, "learning_rate": 7.964724514654446e-06, "loss": 3.9574, "step": 14882 }, { "epoch": 4.957774631464979, "grad_norm": 0.75390625, "learning_rate": 7.963916628855396e-06, "loss": 3.9673, "step": 14883 }, { "epoch": 4.958107770467228, "grad_norm": 0.73828125, "learning_rate": 7.963108737652723e-06, "loss": 3.9828, "step": 14884 }, { "epoch": 4.958440909469476, "grad_norm": 0.703125, "learning_rate": 7.962300841055834e-06, "loss": 4.008, "step": 14885 }, { "epoch": 4.958774048471724, "grad_norm": 0.74609375, "learning_rate": 7.961492939074144e-06, "loss": 3.9222, "step": 14886 }, { "epoch": 4.959107187473974, "grad_norm": 0.7578125, "learning_rate": 7.960685031717058e-06, "loss": 4.0059, "step": 14887 }, { "epoch": 4.959440326476222, "grad_norm": 0.76171875, "learning_rate": 7.959877118993992e-06, "loss": 3.9301, "step": 14888 }, { "epoch": 4.959773465478471, "grad_norm": 0.7265625, "learning_rate": 7.95906920091435e-06, "loss": 3.9342, "step": 14889 }, { "epoch": 4.960106604480719, "grad_norm": 0.69921875, "learning_rate": 7.958261277487551e-06, "loss": 3.9224, "step": 14890 }, { "epoch": 4.960439743482969, "grad_norm": 0.80078125, "learning_rate": 7.957453348722996e-06, "loss": 4.0052, "step": 14891 }, { "epoch": 4.960772882485217, "grad_norm": 0.78125, "learning_rate": 7.956645414630104e-06, "loss": 3.9799, "step": 14892 }, { "epoch": 4.961106021487465, "grad_norm": 0.75390625, "learning_rate": 7.955837475218281e-06, "loss": 4.0184, "step": 14893 }, { "epoch": 4.961439160489714, "grad_norm": 0.7265625, "learning_rate": 7.955029530496938e-06, "loss": 3.9753, "step": 14894 }, { "epoch": 4.961772299491963, "grad_norm": 0.7421875, "learning_rate": 7.954221580475487e-06, "loss": 3.9998, "step": 14895 }, { "epoch": 4.962105438494212, "grad_norm": 0.7109375, "learning_rate": 7.953413625163337e-06, "loss": 3.9823, "step": 14896 }, { "epoch": 4.96243857749646, "grad_norm": 0.77734375, "learning_rate": 7.952605664569902e-06, "loss": 3.957, "step": 14897 }, { "epoch": 4.962771716498709, "grad_norm": 0.76171875, "learning_rate": 7.95179769870459e-06, "loss": 3.9817, "step": 14898 }, { "epoch": 4.963104855500958, "grad_norm": 0.71875, "learning_rate": 7.950989727576816e-06, "loss": 4.0545, "step": 14899 }, { "epoch": 4.963437994503207, "grad_norm": 0.73828125, "learning_rate": 7.950181751195987e-06, "loss": 4.03, "step": 14900 }, { "epoch": 4.963771133505455, "grad_norm": 0.765625, "learning_rate": 7.949373769571517e-06, "loss": 3.9669, "step": 14901 }, { "epoch": 4.964104272507704, "grad_norm": 0.76171875, "learning_rate": 7.948565782712812e-06, "loss": 3.9208, "step": 14902 }, { "epoch": 4.9644374115099525, "grad_norm": 0.75, "learning_rate": 7.94775779062929e-06, "loss": 4.0312, "step": 14903 }, { "epoch": 4.964770550512201, "grad_norm": 0.7421875, "learning_rate": 7.946949793330361e-06, "loss": 3.9923, "step": 14904 }, { "epoch": 4.96510368951445, "grad_norm": 0.69140625, "learning_rate": 7.946141790825432e-06, "loss": 3.991, "step": 14905 }, { "epoch": 4.965436828516698, "grad_norm": 0.75390625, "learning_rate": 7.945333783123919e-06, "loss": 4.0491, "step": 14906 }, { "epoch": 4.9657699675189475, "grad_norm": 0.7265625, "learning_rate": 7.94452577023523e-06, "loss": 3.9915, "step": 14907 }, { "epoch": 4.966103106521196, "grad_norm": 0.74609375, "learning_rate": 7.943717752168782e-06, "loss": 4.0133, "step": 14908 }, { "epoch": 4.966436245523445, "grad_norm": 0.76171875, "learning_rate": 7.942909728933982e-06, "loss": 4.008, "step": 14909 }, { "epoch": 4.966769384525693, "grad_norm": 0.765625, "learning_rate": 7.942101700540243e-06, "loss": 3.9726, "step": 14910 }, { "epoch": 4.967102523527942, "grad_norm": 0.73046875, "learning_rate": 7.941293666996974e-06, "loss": 3.989, "step": 14911 }, { "epoch": 4.967435662530191, "grad_norm": 0.7421875, "learning_rate": 7.940485628313596e-06, "loss": 4.004, "step": 14912 }, { "epoch": 4.967768801532439, "grad_norm": 0.75, "learning_rate": 7.939677584499508e-06, "loss": 3.9155, "step": 14913 }, { "epoch": 4.968101940534688, "grad_norm": 0.73828125, "learning_rate": 7.938869535564133e-06, "loss": 3.9635, "step": 14914 }, { "epoch": 4.9684350795369365, "grad_norm": 0.75, "learning_rate": 7.938061481516876e-06, "loss": 3.9584, "step": 14915 }, { "epoch": 4.968768218539186, "grad_norm": 0.76171875, "learning_rate": 7.937253422367152e-06, "loss": 3.9539, "step": 14916 }, { "epoch": 4.969101357541434, "grad_norm": 0.73046875, "learning_rate": 7.936445358124374e-06, "loss": 4.0013, "step": 14917 }, { "epoch": 4.969434496543683, "grad_norm": 0.71484375, "learning_rate": 7.93563728879795e-06, "loss": 3.9894, "step": 14918 }, { "epoch": 4.9697676355459315, "grad_norm": 0.7578125, "learning_rate": 7.934829214397297e-06, "loss": 3.9801, "step": 14919 }, { "epoch": 4.970100774548181, "grad_norm": 0.72265625, "learning_rate": 7.934021134931825e-06, "loss": 3.9605, "step": 14920 }, { "epoch": 4.970433913550429, "grad_norm": 0.77734375, "learning_rate": 7.933213050410948e-06, "loss": 3.9149, "step": 14921 }, { "epoch": 4.970767052552677, "grad_norm": 0.70703125, "learning_rate": 7.932404960844077e-06, "loss": 4.0371, "step": 14922 }, { "epoch": 4.971100191554926, "grad_norm": 0.8203125, "learning_rate": 7.931596866240625e-06, "loss": 3.975, "step": 14923 }, { "epoch": 4.971433330557175, "grad_norm": 0.76171875, "learning_rate": 7.930788766610004e-06, "loss": 3.9266, "step": 14924 }, { "epoch": 4.971766469559424, "grad_norm": 0.73828125, "learning_rate": 7.929980661961626e-06, "loss": 3.9351, "step": 14925 }, { "epoch": 4.972099608561672, "grad_norm": 0.73828125, "learning_rate": 7.929172552304906e-06, "loss": 4.0077, "step": 14926 }, { "epoch": 4.972432747563921, "grad_norm": 0.7265625, "learning_rate": 7.928364437649252e-06, "loss": 3.9914, "step": 14927 }, { "epoch": 4.97276588656617, "grad_norm": 0.70703125, "learning_rate": 7.927556318004085e-06, "loss": 3.9534, "step": 14928 }, { "epoch": 4.973099025568418, "grad_norm": 0.77734375, "learning_rate": 7.92674819337881e-06, "loss": 4.0267, "step": 14929 }, { "epoch": 4.973432164570667, "grad_norm": 0.71484375, "learning_rate": 7.925940063782846e-06, "loss": 4.0935, "step": 14930 }, { "epoch": 4.9737653035729155, "grad_norm": 0.73828125, "learning_rate": 7.925131929225599e-06, "loss": 4.0304, "step": 14931 }, { "epoch": 4.974098442575165, "grad_norm": 0.69140625, "learning_rate": 7.924323789716489e-06, "loss": 3.9779, "step": 14932 }, { "epoch": 4.974431581577413, "grad_norm": 0.7421875, "learning_rate": 7.923515645264923e-06, "loss": 3.991, "step": 14933 }, { "epoch": 4.974764720579662, "grad_norm": 0.76171875, "learning_rate": 7.922707495880318e-06, "loss": 3.9608, "step": 14934 }, { "epoch": 4.97509785958191, "grad_norm": 0.734375, "learning_rate": 7.921899341572087e-06, "loss": 3.9924, "step": 14935 }, { "epoch": 4.97543099858416, "grad_norm": 0.72265625, "learning_rate": 7.921091182349642e-06, "loss": 3.945, "step": 14936 }, { "epoch": 4.975764137586408, "grad_norm": 0.7109375, "learning_rate": 7.920283018222397e-06, "loss": 4.0569, "step": 14937 }, { "epoch": 4.976097276588657, "grad_norm": 0.71875, "learning_rate": 7.919474849199766e-06, "loss": 3.9807, "step": 14938 }, { "epoch": 4.976430415590905, "grad_norm": 0.71875, "learning_rate": 7.918666675291162e-06, "loss": 3.9196, "step": 14939 }, { "epoch": 4.976763554593154, "grad_norm": 0.77734375, "learning_rate": 7.917858496505995e-06, "loss": 3.9089, "step": 14940 }, { "epoch": 4.977096693595403, "grad_norm": 0.69921875, "learning_rate": 7.917050312853685e-06, "loss": 4.0004, "step": 14941 }, { "epoch": 4.977429832597651, "grad_norm": 0.78125, "learning_rate": 7.91624212434364e-06, "loss": 3.9748, "step": 14942 }, { "epoch": 4.9777629715999, "grad_norm": 0.734375, "learning_rate": 7.915433930985275e-06, "loss": 4.0034, "step": 14943 }, { "epoch": 4.978096110602149, "grad_norm": 0.73046875, "learning_rate": 7.914625732788007e-06, "loss": 3.9742, "step": 14944 }, { "epoch": 4.978429249604398, "grad_norm": 0.703125, "learning_rate": 7.913817529761248e-06, "loss": 3.9621, "step": 14945 }, { "epoch": 4.978762388606646, "grad_norm": 0.7421875, "learning_rate": 7.913009321914407e-06, "loss": 3.9938, "step": 14946 }, { "epoch": 4.979095527608894, "grad_norm": 0.75390625, "learning_rate": 7.912201109256906e-06, "loss": 3.9773, "step": 14947 }, { "epoch": 4.979428666611144, "grad_norm": 0.74609375, "learning_rate": 7.911392891798152e-06, "loss": 3.9599, "step": 14948 }, { "epoch": 4.979761805613392, "grad_norm": 0.7265625, "learning_rate": 7.910584669547564e-06, "loss": 3.94, "step": 14949 }, { "epoch": 4.980094944615641, "grad_norm": 0.7421875, "learning_rate": 7.909776442514553e-06, "loss": 3.9823, "step": 14950 }, { "epoch": 4.980428083617889, "grad_norm": 0.72265625, "learning_rate": 7.908968210708533e-06, "loss": 4.0206, "step": 14951 }, { "epoch": 4.9807612226201385, "grad_norm": 0.703125, "learning_rate": 7.908159974138919e-06, "loss": 3.9879, "step": 14952 }, { "epoch": 4.981094361622387, "grad_norm": 0.71875, "learning_rate": 7.907351732815129e-06, "loss": 4.0436, "step": 14953 }, { "epoch": 4.981427500624636, "grad_norm": 0.7421875, "learning_rate": 7.90654348674657e-06, "loss": 3.9683, "step": 14954 }, { "epoch": 4.981760639626884, "grad_norm": 0.74609375, "learning_rate": 7.90573523594266e-06, "loss": 4.0545, "step": 14955 }, { "epoch": 4.9820937786291335, "grad_norm": 0.74609375, "learning_rate": 7.904926980412815e-06, "loss": 4.0123, "step": 14956 }, { "epoch": 4.982426917631382, "grad_norm": 0.7265625, "learning_rate": 7.904118720166448e-06, "loss": 4.021, "step": 14957 }, { "epoch": 4.98276005663363, "grad_norm": 0.6953125, "learning_rate": 7.903310455212975e-06, "loss": 3.9256, "step": 14958 }, { "epoch": 4.983093195635879, "grad_norm": 0.76953125, "learning_rate": 7.902502185561805e-06, "loss": 4.0231, "step": 14959 }, { "epoch": 4.983426334638128, "grad_norm": 0.73828125, "learning_rate": 7.901693911222356e-06, "loss": 3.8573, "step": 14960 }, { "epoch": 4.983759473640377, "grad_norm": 0.7421875, "learning_rate": 7.900885632204045e-06, "loss": 4.0342, "step": 14961 }, { "epoch": 4.984092612642625, "grad_norm": 0.72265625, "learning_rate": 7.900077348516285e-06, "loss": 3.989, "step": 14962 }, { "epoch": 4.984425751644874, "grad_norm": 0.765625, "learning_rate": 7.89926906016849e-06, "loss": 4.0645, "step": 14963 }, { "epoch": 4.9847588906471225, "grad_norm": 0.7109375, "learning_rate": 7.898460767170073e-06, "loss": 4.0032, "step": 14964 }, { "epoch": 4.985092029649371, "grad_norm": 0.71875, "learning_rate": 7.897652469530454e-06, "loss": 4.0767, "step": 14965 }, { "epoch": 4.98542516865162, "grad_norm": 0.76953125, "learning_rate": 7.896844167259045e-06, "loss": 3.9982, "step": 14966 }, { "epoch": 4.985758307653868, "grad_norm": 0.72265625, "learning_rate": 7.89603586036526e-06, "loss": 4.0079, "step": 14967 }, { "epoch": 4.9860914466561175, "grad_norm": 0.73828125, "learning_rate": 7.895227548858516e-06, "loss": 3.9525, "step": 14968 }, { "epoch": 4.986424585658366, "grad_norm": 0.73046875, "learning_rate": 7.894419232748227e-06, "loss": 3.9714, "step": 14969 }, { "epoch": 4.986757724660615, "grad_norm": 0.73828125, "learning_rate": 7.893610912043806e-06, "loss": 4.0, "step": 14970 }, { "epoch": 4.987090863662863, "grad_norm": 0.75390625, "learning_rate": 7.892802586754673e-06, "loss": 3.9576, "step": 14971 }, { "epoch": 4.9874240026651115, "grad_norm": 0.75390625, "learning_rate": 7.89199425689024e-06, "loss": 3.9651, "step": 14972 }, { "epoch": 4.987757141667361, "grad_norm": 0.69921875, "learning_rate": 7.891185922459922e-06, "loss": 4.0088, "step": 14973 }, { "epoch": 4.988090280669609, "grad_norm": 0.7578125, "learning_rate": 7.890377583473135e-06, "loss": 4.0013, "step": 14974 }, { "epoch": 4.988423419671858, "grad_norm": 0.7578125, "learning_rate": 7.889569239939297e-06, "loss": 4.0196, "step": 14975 }, { "epoch": 4.9887565586741065, "grad_norm": 0.79296875, "learning_rate": 7.88876089186782e-06, "loss": 3.9796, "step": 14976 }, { "epoch": 4.989089697676356, "grad_norm": 0.796875, "learning_rate": 7.88795253926812e-06, "loss": 3.9956, "step": 14977 }, { "epoch": 4.989422836678604, "grad_norm": 0.7109375, "learning_rate": 7.887144182149614e-06, "loss": 3.9369, "step": 14978 }, { "epoch": 4.989755975680853, "grad_norm": 0.69921875, "learning_rate": 7.886335820521716e-06, "loss": 4.0621, "step": 14979 }, { "epoch": 4.9900891146831015, "grad_norm": 0.765625, "learning_rate": 7.885527454393845e-06, "loss": 4.0022, "step": 14980 }, { "epoch": 4.990422253685351, "grad_norm": 0.72265625, "learning_rate": 7.88471908377541e-06, "loss": 4.0823, "step": 14981 }, { "epoch": 4.990755392687599, "grad_norm": 0.7890625, "learning_rate": 7.883910708675834e-06, "loss": 3.9569, "step": 14982 }, { "epoch": 4.991088531689847, "grad_norm": 0.7890625, "learning_rate": 7.88310232910453e-06, "loss": 3.9279, "step": 14983 }, { "epoch": 4.991421670692096, "grad_norm": 0.73828125, "learning_rate": 7.882293945070912e-06, "loss": 4.0181, "step": 14984 }, { "epoch": 4.991754809694345, "grad_norm": 0.765625, "learning_rate": 7.881485556584399e-06, "loss": 3.9988, "step": 14985 }, { "epoch": 4.992087948696594, "grad_norm": 0.75, "learning_rate": 7.880677163654403e-06, "loss": 3.9987, "step": 14986 }, { "epoch": 4.992421087698842, "grad_norm": 0.76953125, "learning_rate": 7.879868766290346e-06, "loss": 3.9513, "step": 14987 }, { "epoch": 4.992754226701091, "grad_norm": 0.78125, "learning_rate": 7.879060364501638e-06, "loss": 3.8955, "step": 14988 }, { "epoch": 4.99308736570334, "grad_norm": 0.7578125, "learning_rate": 7.8782519582977e-06, "loss": 4.0812, "step": 14989 }, { "epoch": 4.993420504705588, "grad_norm": 0.76171875, "learning_rate": 7.877443547687946e-06, "loss": 4.0217, "step": 14990 }, { "epoch": 4.993753643707837, "grad_norm": 0.7265625, "learning_rate": 7.87663513268179e-06, "loss": 3.97, "step": 14991 }, { "epoch": 4.994086782710085, "grad_norm": 0.7578125, "learning_rate": 7.875826713288652e-06, "loss": 3.9704, "step": 14992 }, { "epoch": 4.994419921712335, "grad_norm": 0.75, "learning_rate": 7.875018289517946e-06, "loss": 4.0251, "step": 14993 }, { "epoch": 4.994753060714583, "grad_norm": 0.72265625, "learning_rate": 7.87420986137909e-06, "loss": 3.9786, "step": 14994 }, { "epoch": 4.995086199716832, "grad_norm": 0.81640625, "learning_rate": 7.873401428881498e-06, "loss": 4.0167, "step": 14995 }, { "epoch": 4.99541933871908, "grad_norm": 0.7890625, "learning_rate": 7.872592992034591e-06, "loss": 3.9391, "step": 14996 }, { "epoch": 4.99575247772133, "grad_norm": 0.7109375, "learning_rate": 7.871784550847778e-06, "loss": 3.9754, "step": 14997 }, { "epoch": 4.996085616723578, "grad_norm": 0.7421875, "learning_rate": 7.870976105330486e-06, "loss": 4.0123, "step": 14998 }, { "epoch": 4.996418755725827, "grad_norm": 0.71484375, "learning_rate": 7.870167655492124e-06, "loss": 4.0164, "step": 14999 }, { "epoch": 4.996751894728075, "grad_norm": 0.70703125, "learning_rate": 7.86935920134211e-06, "loss": 3.9929, "step": 15000 }, { "epoch": 4.997085033730324, "grad_norm": 0.71484375, "learning_rate": 7.86855074288986e-06, "loss": 3.9955, "step": 15001 }, { "epoch": 4.997418172732573, "grad_norm": 0.69921875, "learning_rate": 7.867742280144796e-06, "loss": 4.015, "step": 15002 }, { "epoch": 4.997751311734821, "grad_norm": 0.71875, "learning_rate": 7.866933813116326e-06, "loss": 3.9914, "step": 15003 }, { "epoch": 4.99808445073707, "grad_norm": 0.703125, "learning_rate": 7.866125341813875e-06, "loss": 3.9671, "step": 15004 }, { "epoch": 4.998417589739319, "grad_norm": 0.78125, "learning_rate": 7.865316866246854e-06, "loss": 3.9197, "step": 15005 }, { "epoch": 4.998750728741568, "grad_norm": 0.7421875, "learning_rate": 7.864508386424687e-06, "loss": 4.0545, "step": 15006 }, { "epoch": 4.999083867743816, "grad_norm": 0.75390625, "learning_rate": 7.863699902356785e-06, "loss": 4.003, "step": 15007 }, { "epoch": 4.999417006746064, "grad_norm": 0.765625, "learning_rate": 7.862891414052565e-06, "loss": 4.0169, "step": 15008 }, { "epoch": 4.999750145748314, "grad_norm": 0.72265625, "learning_rate": 7.862082921521448e-06, "loss": 4.073, "step": 15009 }, { "epoch": 5.0, "grad_norm": 0.828125, "learning_rate": 7.861274424772848e-06, "loss": 4.0141, "step": 15010 }, { "epoch": 5.000333139002248, "grad_norm": 0.75, "learning_rate": 7.860465923816185e-06, "loss": 3.9595, "step": 15011 }, { "epoch": 5.0006662780044975, "grad_norm": 0.734375, "learning_rate": 7.859657418660874e-06, "loss": 4.0506, "step": 15012 }, { "epoch": 5.000999417006746, "grad_norm": 0.75390625, "learning_rate": 7.858848909316333e-06, "loss": 3.9601, "step": 15013 }, { "epoch": 5.001332556008995, "grad_norm": 0.73046875, "learning_rate": 7.858040395791978e-06, "loss": 4.0719, "step": 15014 }, { "epoch": 5.001665695011243, "grad_norm": 0.72265625, "learning_rate": 7.857231878097231e-06, "loss": 3.8962, "step": 15015 }, { "epoch": 5.001998834013492, "grad_norm": 0.74609375, "learning_rate": 7.856423356241506e-06, "loss": 3.9595, "step": 15016 }, { "epoch": 5.002331973015741, "grad_norm": 0.78515625, "learning_rate": 7.855614830234219e-06, "loss": 3.9831, "step": 15017 }, { "epoch": 5.00266511201799, "grad_norm": 0.76953125, "learning_rate": 7.85480630008479e-06, "loss": 3.9849, "step": 15018 }, { "epoch": 5.002998251020238, "grad_norm": 0.70703125, "learning_rate": 7.853997765802636e-06, "loss": 3.9657, "step": 15019 }, { "epoch": 5.0033313900224865, "grad_norm": 0.73046875, "learning_rate": 7.853189227397177e-06, "loss": 4.0504, "step": 15020 }, { "epoch": 5.003664529024736, "grad_norm": 0.75, "learning_rate": 7.852380684877825e-06, "loss": 3.9755, "step": 15021 }, { "epoch": 5.003997668026984, "grad_norm": 0.7421875, "learning_rate": 7.851572138254003e-06, "loss": 3.9461, "step": 15022 }, { "epoch": 5.004330807029233, "grad_norm": 0.73828125, "learning_rate": 7.850763587535127e-06, "loss": 4.0253, "step": 15023 }, { "epoch": 5.0046639460314815, "grad_norm": 0.73046875, "learning_rate": 7.849955032730616e-06, "loss": 3.9763, "step": 15024 }, { "epoch": 5.004997085033731, "grad_norm": 0.7578125, "learning_rate": 7.849146473849885e-06, "loss": 3.9722, "step": 15025 }, { "epoch": 5.005330224035979, "grad_norm": 0.74609375, "learning_rate": 7.848337910902356e-06, "loss": 3.983, "step": 15026 }, { "epoch": 5.005663363038228, "grad_norm": 0.7734375, "learning_rate": 7.847529343897442e-06, "loss": 3.9778, "step": 15027 }, { "epoch": 5.005996502040476, "grad_norm": 0.77734375, "learning_rate": 7.846720772844566e-06, "loss": 3.9853, "step": 15028 }, { "epoch": 5.006329641042725, "grad_norm": 0.76171875, "learning_rate": 7.845912197753143e-06, "loss": 3.9613, "step": 15029 }, { "epoch": 5.006662780044974, "grad_norm": 0.71484375, "learning_rate": 7.845103618632594e-06, "loss": 3.9434, "step": 15030 }, { "epoch": 5.006995919047222, "grad_norm": 0.75, "learning_rate": 7.844295035492332e-06, "loss": 3.971, "step": 15031 }, { "epoch": 5.007329058049471, "grad_norm": 0.765625, "learning_rate": 7.84348644834178e-06, "loss": 4.0083, "step": 15032 }, { "epoch": 5.00766219705172, "grad_norm": 0.75, "learning_rate": 7.842677857190359e-06, "loss": 3.985, "step": 15033 }, { "epoch": 5.007995336053969, "grad_norm": 0.7421875, "learning_rate": 7.841869262047476e-06, "loss": 3.953, "step": 15034 }, { "epoch": 5.008328475056217, "grad_norm": 0.74609375, "learning_rate": 7.841060662922561e-06, "loss": 4.0028, "step": 15035 }, { "epoch": 5.008661614058466, "grad_norm": 0.7421875, "learning_rate": 7.840252059825026e-06, "loss": 4.0147, "step": 15036 }, { "epoch": 5.008994753060715, "grad_norm": 0.73828125, "learning_rate": 7.839443452764293e-06, "loss": 4.0257, "step": 15037 }, { "epoch": 5.009327892062963, "grad_norm": 0.74609375, "learning_rate": 7.83863484174978e-06, "loss": 3.9658, "step": 15038 }, { "epoch": 5.009661031065212, "grad_norm": 0.78125, "learning_rate": 7.837826226790902e-06, "loss": 4.0302, "step": 15039 }, { "epoch": 5.00999417006746, "grad_norm": 0.734375, "learning_rate": 7.837017607897082e-06, "loss": 3.9606, "step": 15040 }, { "epoch": 5.01032730906971, "grad_norm": 0.71875, "learning_rate": 7.836208985077735e-06, "loss": 4.0404, "step": 15041 }, { "epoch": 5.010660448071958, "grad_norm": 0.6796875, "learning_rate": 7.835400358342285e-06, "loss": 4.0658, "step": 15042 }, { "epoch": 5.010993587074207, "grad_norm": 0.72265625, "learning_rate": 7.834591727700144e-06, "loss": 4.0378, "step": 15043 }, { "epoch": 5.011326726076455, "grad_norm": 0.72265625, "learning_rate": 7.833783093160735e-06, "loss": 3.9552, "step": 15044 }, { "epoch": 5.011659865078704, "grad_norm": 0.73046875, "learning_rate": 7.832974454733476e-06, "loss": 4.0775, "step": 15045 }, { "epoch": 5.011993004080953, "grad_norm": 0.734375, "learning_rate": 7.832165812427787e-06, "loss": 3.9791, "step": 15046 }, { "epoch": 5.012326143083201, "grad_norm": 0.76171875, "learning_rate": 7.831357166253086e-06, "loss": 4.0049, "step": 15047 }, { "epoch": 5.01265928208545, "grad_norm": 0.70703125, "learning_rate": 7.83054851621879e-06, "loss": 3.9662, "step": 15048 }, { "epoch": 5.012992421087699, "grad_norm": 0.74609375, "learning_rate": 7.829739862334319e-06, "loss": 3.9952, "step": 15049 }, { "epoch": 5.013325560089948, "grad_norm": 0.76953125, "learning_rate": 7.828931204609094e-06, "loss": 4.0007, "step": 15050 }, { "epoch": 5.013658699092196, "grad_norm": 0.7734375, "learning_rate": 7.828122543052534e-06, "loss": 4.0386, "step": 15051 }, { "epoch": 5.013991838094445, "grad_norm": 0.734375, "learning_rate": 7.827313877674055e-06, "loss": 3.987, "step": 15052 }, { "epoch": 5.014324977096694, "grad_norm": 0.734375, "learning_rate": 7.82650520848308e-06, "loss": 4.0689, "step": 15053 }, { "epoch": 5.014658116098942, "grad_norm": 0.74609375, "learning_rate": 7.825696535489024e-06, "loss": 4.041, "step": 15054 }, { "epoch": 5.014991255101191, "grad_norm": 0.75390625, "learning_rate": 7.82488785870131e-06, "loss": 3.9582, "step": 15055 }, { "epoch": 5.015324394103439, "grad_norm": 0.71875, "learning_rate": 7.82407917812936e-06, "loss": 3.9625, "step": 15056 }, { "epoch": 5.0156575331056885, "grad_norm": 0.79296875, "learning_rate": 7.823270493782586e-06, "loss": 3.9746, "step": 15057 }, { "epoch": 5.015990672107937, "grad_norm": 0.7109375, "learning_rate": 7.822461805670409e-06, "loss": 3.9985, "step": 15058 }, { "epoch": 5.016323811110186, "grad_norm": 0.76953125, "learning_rate": 7.821653113802254e-06, "loss": 3.9771, "step": 15059 }, { "epoch": 5.016656950112434, "grad_norm": 0.734375, "learning_rate": 7.820844418187537e-06, "loss": 4.0362, "step": 15060 }, { "epoch": 5.0169900891146835, "grad_norm": 0.7265625, "learning_rate": 7.820035718835675e-06, "loss": 3.9338, "step": 15061 }, { "epoch": 5.017323228116932, "grad_norm": 0.7265625, "learning_rate": 7.81922701575609e-06, "loss": 4.0182, "step": 15062 }, { "epoch": 5.01765636711918, "grad_norm": 0.78515625, "learning_rate": 7.818418308958202e-06, "loss": 4.0446, "step": 15063 }, { "epoch": 5.017989506121429, "grad_norm": 0.73828125, "learning_rate": 7.817609598451433e-06, "loss": 4.0584, "step": 15064 }, { "epoch": 5.0183226451236775, "grad_norm": 0.70703125, "learning_rate": 7.816800884245198e-06, "loss": 4.0251, "step": 15065 }, { "epoch": 5.018655784125927, "grad_norm": 0.75, "learning_rate": 7.815992166348922e-06, "loss": 3.9815, "step": 15066 }, { "epoch": 5.018988923128175, "grad_norm": 0.73828125, "learning_rate": 7.815183444772016e-06, "loss": 3.9502, "step": 15067 }, { "epoch": 5.019322062130424, "grad_norm": 0.734375, "learning_rate": 7.814374719523909e-06, "loss": 4.0352, "step": 15068 }, { "epoch": 5.0196552011326725, "grad_norm": 0.70703125, "learning_rate": 7.813565990614017e-06, "loss": 3.9663, "step": 15069 }, { "epoch": 5.019988340134922, "grad_norm": 0.72265625, "learning_rate": 7.812757258051762e-06, "loss": 3.99, "step": 15070 }, { "epoch": 5.02032147913717, "grad_norm": 0.7578125, "learning_rate": 7.811948521846558e-06, "loss": 3.9364, "step": 15071 }, { "epoch": 5.020654618139418, "grad_norm": 0.71875, "learning_rate": 7.811139782007834e-06, "loss": 4.0019, "step": 15072 }, { "epoch": 5.0209877571416675, "grad_norm": 0.75, "learning_rate": 7.810331038545002e-06, "loss": 4.0303, "step": 15073 }, { "epoch": 5.021320896143916, "grad_norm": 0.7109375, "learning_rate": 7.809522291467489e-06, "loss": 4.0523, "step": 15074 }, { "epoch": 5.021654035146165, "grad_norm": 0.7734375, "learning_rate": 7.80871354078471e-06, "loss": 3.8836, "step": 15075 }, { "epoch": 5.021987174148413, "grad_norm": 0.7109375, "learning_rate": 7.807904786506085e-06, "loss": 3.992, "step": 15076 }, { "epoch": 5.022320313150662, "grad_norm": 0.78515625, "learning_rate": 7.80709602864104e-06, "loss": 4.0008, "step": 15077 }, { "epoch": 5.022653452152911, "grad_norm": 0.75390625, "learning_rate": 7.806287267198989e-06, "loss": 3.9412, "step": 15078 }, { "epoch": 5.02298659115516, "grad_norm": 0.7578125, "learning_rate": 7.805478502189354e-06, "loss": 3.9848, "step": 15079 }, { "epoch": 5.023319730157408, "grad_norm": 0.71484375, "learning_rate": 7.804669733621558e-06, "loss": 3.9169, "step": 15080 }, { "epoch": 5.0236528691596565, "grad_norm": 0.80078125, "learning_rate": 7.803860961505019e-06, "loss": 3.9917, "step": 15081 }, { "epoch": 5.023986008161906, "grad_norm": 0.75, "learning_rate": 7.803052185849158e-06, "loss": 3.9566, "step": 15082 }, { "epoch": 5.024319147164154, "grad_norm": 0.734375, "learning_rate": 7.802243406663395e-06, "loss": 4.0228, "step": 15083 }, { "epoch": 5.024652286166403, "grad_norm": 0.76171875, "learning_rate": 7.801434623957152e-06, "loss": 3.962, "step": 15084 }, { "epoch": 5.024985425168651, "grad_norm": 0.7734375, "learning_rate": 7.800625837739845e-06, "loss": 3.9241, "step": 15085 }, { "epoch": 5.025318564170901, "grad_norm": 0.8125, "learning_rate": 7.799817048020903e-06, "loss": 3.9622, "step": 15086 }, { "epoch": 5.025651703173149, "grad_norm": 0.7421875, "learning_rate": 7.79900825480974e-06, "loss": 3.9878, "step": 15087 }, { "epoch": 5.025984842175398, "grad_norm": 0.7578125, "learning_rate": 7.798199458115777e-06, "loss": 3.9321, "step": 15088 }, { "epoch": 5.026317981177646, "grad_norm": 0.73828125, "learning_rate": 7.797390657948438e-06, "loss": 4.04, "step": 15089 }, { "epoch": 5.026651120179895, "grad_norm": 0.69140625, "learning_rate": 7.796581854317141e-06, "loss": 4.0094, "step": 15090 }, { "epoch": 5.026984259182144, "grad_norm": 0.73828125, "learning_rate": 7.795773047231308e-06, "loss": 3.9389, "step": 15091 }, { "epoch": 5.027317398184392, "grad_norm": 0.76953125, "learning_rate": 7.794964236700361e-06, "loss": 4.041, "step": 15092 }, { "epoch": 5.027650537186641, "grad_norm": 0.78125, "learning_rate": 7.794155422733718e-06, "loss": 3.8855, "step": 15093 }, { "epoch": 5.02798367618889, "grad_norm": 0.7578125, "learning_rate": 7.793346605340801e-06, "loss": 3.9984, "step": 15094 }, { "epoch": 5.028316815191139, "grad_norm": 0.73828125, "learning_rate": 7.792537784531031e-06, "loss": 3.9849, "step": 15095 }, { "epoch": 5.028649954193387, "grad_norm": 0.71875, "learning_rate": 7.791728960313833e-06, "loss": 3.9996, "step": 15096 }, { "epoch": 5.028983093195636, "grad_norm": 0.73828125, "learning_rate": 7.790920132698623e-06, "loss": 4.057, "step": 15097 }, { "epoch": 5.029316232197885, "grad_norm": 0.72265625, "learning_rate": 7.79011130169482e-06, "loss": 3.9892, "step": 15098 }, { "epoch": 5.029649371200133, "grad_norm": 0.73046875, "learning_rate": 7.789302467311854e-06, "loss": 3.9706, "step": 15099 }, { "epoch": 5.029982510202382, "grad_norm": 0.71484375, "learning_rate": 7.788493629559135e-06, "loss": 4.0281, "step": 15100 }, { "epoch": 5.03031564920463, "grad_norm": 0.73828125, "learning_rate": 7.787684788446097e-06, "loss": 3.9327, "step": 15101 }, { "epoch": 5.0306487882068796, "grad_norm": 0.7421875, "learning_rate": 7.78687594398215e-06, "loss": 3.9812, "step": 15102 }, { "epoch": 5.030981927209128, "grad_norm": 0.75, "learning_rate": 7.78606709617672e-06, "loss": 3.9981, "step": 15103 }, { "epoch": 5.031315066211377, "grad_norm": 0.76171875, "learning_rate": 7.785258245039227e-06, "loss": 3.9774, "step": 15104 }, { "epoch": 5.031648205213625, "grad_norm": 0.78125, "learning_rate": 7.784449390579096e-06, "loss": 4.0137, "step": 15105 }, { "epoch": 5.0319813442158745, "grad_norm": 0.70703125, "learning_rate": 7.783640532805746e-06, "loss": 3.9931, "step": 15106 }, { "epoch": 5.032314483218123, "grad_norm": 0.75390625, "learning_rate": 7.782831671728594e-06, "loss": 4.0402, "step": 15107 }, { "epoch": 5.032647622220371, "grad_norm": 0.7109375, "learning_rate": 7.782022807357068e-06, "loss": 3.9926, "step": 15108 }, { "epoch": 5.03298076122262, "grad_norm": 0.74609375, "learning_rate": 7.781213939700588e-06, "loss": 4.0005, "step": 15109 }, { "epoch": 5.033313900224869, "grad_norm": 0.75, "learning_rate": 7.780405068768575e-06, "loss": 4.0256, "step": 15110 }, { "epoch": 5.033647039227118, "grad_norm": 0.7421875, "learning_rate": 7.779596194570449e-06, "loss": 4.0131, "step": 15111 }, { "epoch": 5.033980178229366, "grad_norm": 0.7890625, "learning_rate": 7.778787317115634e-06, "loss": 3.9215, "step": 15112 }, { "epoch": 5.034313317231615, "grad_norm": 0.78515625, "learning_rate": 7.777978436413549e-06, "loss": 3.9665, "step": 15113 }, { "epoch": 5.0346464562338635, "grad_norm": 0.765625, "learning_rate": 7.777169552473617e-06, "loss": 4.0248, "step": 15114 }, { "epoch": 5.034979595236113, "grad_norm": 0.75, "learning_rate": 7.776360665305262e-06, "loss": 4.0526, "step": 15115 }, { "epoch": 5.035312734238361, "grad_norm": 0.734375, "learning_rate": 7.775551774917904e-06, "loss": 3.9841, "step": 15116 }, { "epoch": 5.035645873240609, "grad_norm": 0.78515625, "learning_rate": 7.774742881320963e-06, "loss": 3.979, "step": 15117 }, { "epoch": 5.0359790122428585, "grad_norm": 0.8046875, "learning_rate": 7.773933984523864e-06, "loss": 4.0132, "step": 15118 }, { "epoch": 5.036312151245107, "grad_norm": 0.76171875, "learning_rate": 7.773125084536028e-06, "loss": 4.0636, "step": 15119 }, { "epoch": 5.036645290247356, "grad_norm": 0.765625, "learning_rate": 7.772316181366873e-06, "loss": 4.0116, "step": 15120 }, { "epoch": 5.036978429249604, "grad_norm": 0.75390625, "learning_rate": 7.771507275025827e-06, "loss": 3.9708, "step": 15121 }, { "epoch": 5.0373115682518534, "grad_norm": 0.71875, "learning_rate": 7.77069836552231e-06, "loss": 4.0444, "step": 15122 }, { "epoch": 5.037644707254102, "grad_norm": 0.7578125, "learning_rate": 7.769889452865742e-06, "loss": 3.9526, "step": 15123 }, { "epoch": 5.03797784625635, "grad_norm": 0.78125, "learning_rate": 7.769080537065545e-06, "loss": 3.953, "step": 15124 }, { "epoch": 5.038310985258599, "grad_norm": 0.71875, "learning_rate": 7.768271618131145e-06, "loss": 3.9855, "step": 15125 }, { "epoch": 5.0386441242608475, "grad_norm": 0.7421875, "learning_rate": 7.767462696071961e-06, "loss": 3.9749, "step": 15126 }, { "epoch": 5.038977263263097, "grad_norm": 0.73046875, "learning_rate": 7.766653770897416e-06, "loss": 3.9261, "step": 15127 }, { "epoch": 5.039310402265345, "grad_norm": 0.796875, "learning_rate": 7.76584484261693e-06, "loss": 4.0052, "step": 15128 }, { "epoch": 5.039643541267594, "grad_norm": 0.80859375, "learning_rate": 7.765035911239929e-06, "loss": 4.0241, "step": 15129 }, { "epoch": 5.0399766802698425, "grad_norm": 0.74609375, "learning_rate": 7.764226976775835e-06, "loss": 4.042, "step": 15130 }, { "epoch": 5.040309819272092, "grad_norm": 0.78125, "learning_rate": 7.763418039234067e-06, "loss": 3.9111, "step": 15131 }, { "epoch": 5.04064295827434, "grad_norm": 0.76953125, "learning_rate": 7.762609098624053e-06, "loss": 4.0282, "step": 15132 }, { "epoch": 5.040976097276588, "grad_norm": 0.7421875, "learning_rate": 7.761800154955209e-06, "loss": 4.0033, "step": 15133 }, { "epoch": 5.041309236278837, "grad_norm": 0.73828125, "learning_rate": 7.760991208236961e-06, "loss": 4.0032, "step": 15134 }, { "epoch": 5.041642375281086, "grad_norm": 0.7890625, "learning_rate": 7.760182258478729e-06, "loss": 3.9888, "step": 15135 }, { "epoch": 5.041975514283335, "grad_norm": 0.74609375, "learning_rate": 7.759373305689939e-06, "loss": 3.9268, "step": 15136 }, { "epoch": 5.042308653285583, "grad_norm": 0.74609375, "learning_rate": 7.758564349880012e-06, "loss": 4.035, "step": 15137 }, { "epoch": 5.042641792287832, "grad_norm": 0.73828125, "learning_rate": 7.75775539105837e-06, "loss": 3.9958, "step": 15138 }, { "epoch": 5.042974931290081, "grad_norm": 0.77734375, "learning_rate": 7.756946429234433e-06, "loss": 3.9122, "step": 15139 }, { "epoch": 5.04330807029233, "grad_norm": 0.75, "learning_rate": 7.756137464417631e-06, "loss": 3.9671, "step": 15140 }, { "epoch": 5.043641209294578, "grad_norm": 0.76953125, "learning_rate": 7.755328496617382e-06, "loss": 3.9671, "step": 15141 }, { "epoch": 5.0439743482968264, "grad_norm": 0.75, "learning_rate": 7.754519525843106e-06, "loss": 4.0114, "step": 15142 }, { "epoch": 5.044307487299076, "grad_norm": 0.765625, "learning_rate": 7.753710552104231e-06, "loss": 4.0078, "step": 15143 }, { "epoch": 5.044640626301324, "grad_norm": 0.69921875, "learning_rate": 7.752901575410175e-06, "loss": 4.0268, "step": 15144 }, { "epoch": 5.044973765303573, "grad_norm": 0.73828125, "learning_rate": 7.752092595770369e-06, "loss": 3.9753, "step": 15145 }, { "epoch": 5.045306904305821, "grad_norm": 0.71484375, "learning_rate": 7.751283613194225e-06, "loss": 3.9233, "step": 15146 }, { "epoch": 5.045640043308071, "grad_norm": 0.75, "learning_rate": 7.750474627691174e-06, "loss": 3.9376, "step": 15147 }, { "epoch": 5.045973182310319, "grad_norm": 0.73828125, "learning_rate": 7.749665639270633e-06, "loss": 4.0094, "step": 15148 }, { "epoch": 5.046306321312568, "grad_norm": 0.734375, "learning_rate": 7.748856647942032e-06, "loss": 3.9798, "step": 15149 }, { "epoch": 5.046639460314816, "grad_norm": 0.73828125, "learning_rate": 7.748047653714788e-06, "loss": 4.0156, "step": 15150 }, { "epoch": 5.046972599317065, "grad_norm": 0.80078125, "learning_rate": 7.747238656598324e-06, "loss": 4.0109, "step": 15151 }, { "epoch": 5.047305738319314, "grad_norm": 0.76171875, "learning_rate": 7.74642965660207e-06, "loss": 3.9876, "step": 15152 }, { "epoch": 5.047638877321562, "grad_norm": 0.76171875, "learning_rate": 7.74562065373544e-06, "loss": 3.957, "step": 15153 }, { "epoch": 5.047972016323811, "grad_norm": 0.7265625, "learning_rate": 7.744811648007865e-06, "loss": 4.0698, "step": 15154 }, { "epoch": 5.04830515532606, "grad_norm": 0.73046875, "learning_rate": 7.744002639428763e-06, "loss": 4.008, "step": 15155 }, { "epoch": 5.048638294328309, "grad_norm": 0.73046875, "learning_rate": 7.743193628007557e-06, "loss": 3.9685, "step": 15156 }, { "epoch": 5.048971433330557, "grad_norm": 0.74609375, "learning_rate": 7.742384613753674e-06, "loss": 4.0372, "step": 15157 }, { "epoch": 5.049304572332806, "grad_norm": 0.734375, "learning_rate": 7.741575596676536e-06, "loss": 3.9788, "step": 15158 }, { "epoch": 5.049637711335055, "grad_norm": 0.76953125, "learning_rate": 7.740766576785565e-06, "loss": 4.0011, "step": 15159 }, { "epoch": 5.049970850337303, "grad_norm": 0.74609375, "learning_rate": 7.739957554090181e-06, "loss": 3.9368, "step": 15160 }, { "epoch": 5.050303989339552, "grad_norm": 0.7265625, "learning_rate": 7.739148528599816e-06, "loss": 4.0519, "step": 15161 }, { "epoch": 5.0506371283418, "grad_norm": 0.7421875, "learning_rate": 7.738339500323888e-06, "loss": 4.0155, "step": 15162 }, { "epoch": 5.0509702673440495, "grad_norm": 0.70703125, "learning_rate": 7.73753046927182e-06, "loss": 4.052, "step": 15163 }, { "epoch": 5.051303406346298, "grad_norm": 0.7421875, "learning_rate": 7.736721435453037e-06, "loss": 3.9135, "step": 15164 }, { "epoch": 5.051636545348547, "grad_norm": 0.74609375, "learning_rate": 7.735912398876962e-06, "loss": 4.0312, "step": 15165 }, { "epoch": 5.051969684350795, "grad_norm": 0.74609375, "learning_rate": 7.735103359553016e-06, "loss": 3.9782, "step": 15166 }, { "epoch": 5.0523028233530445, "grad_norm": 0.75, "learning_rate": 7.734294317490629e-06, "loss": 3.9941, "step": 15167 }, { "epoch": 5.052635962355293, "grad_norm": 0.71875, "learning_rate": 7.733485272699219e-06, "loss": 3.9954, "step": 15168 }, { "epoch": 5.052969101357541, "grad_norm": 0.765625, "learning_rate": 7.732676225188211e-06, "loss": 3.9555, "step": 15169 }, { "epoch": 5.05330224035979, "grad_norm": 0.76171875, "learning_rate": 7.731867174967028e-06, "loss": 3.9651, "step": 15170 }, { "epoch": 5.0536353793620385, "grad_norm": 0.734375, "learning_rate": 7.731058122045096e-06, "loss": 3.9855, "step": 15171 }, { "epoch": 5.053968518364288, "grad_norm": 0.74609375, "learning_rate": 7.730249066431839e-06, "loss": 3.9637, "step": 15172 }, { "epoch": 5.054301657366536, "grad_norm": 0.765625, "learning_rate": 7.729440008136677e-06, "loss": 4.0295, "step": 15173 }, { "epoch": 5.054634796368785, "grad_norm": 0.7265625, "learning_rate": 7.728630947169036e-06, "loss": 4.0175, "step": 15174 }, { "epoch": 5.0549679353710335, "grad_norm": 0.765625, "learning_rate": 7.72782188353834e-06, "loss": 3.9309, "step": 15175 }, { "epoch": 5.055301074373283, "grad_norm": 0.75, "learning_rate": 7.727012817254012e-06, "loss": 4.0453, "step": 15176 }, { "epoch": 5.055634213375531, "grad_norm": 0.7734375, "learning_rate": 7.726203748325476e-06, "loss": 3.975, "step": 15177 }, { "epoch": 5.055967352377779, "grad_norm": 0.765625, "learning_rate": 7.72539467676216e-06, "loss": 4.0092, "step": 15178 }, { "epoch": 5.0563004913800285, "grad_norm": 0.75390625, "learning_rate": 7.724585602573478e-06, "loss": 3.9603, "step": 15179 }, { "epoch": 5.056633630382277, "grad_norm": 0.7421875, "learning_rate": 7.723776525768864e-06, "loss": 3.9822, "step": 15180 }, { "epoch": 5.056966769384526, "grad_norm": 0.71484375, "learning_rate": 7.722967446357739e-06, "loss": 3.9711, "step": 15181 }, { "epoch": 5.057299908386774, "grad_norm": 0.76171875, "learning_rate": 7.722158364349524e-06, "loss": 3.9808, "step": 15182 }, { "epoch": 5.057633047389023, "grad_norm": 0.70703125, "learning_rate": 7.721349279753645e-06, "loss": 3.9645, "step": 15183 }, { "epoch": 5.057966186391272, "grad_norm": 0.80078125, "learning_rate": 7.720540192579527e-06, "loss": 3.9678, "step": 15184 }, { "epoch": 5.05829932539352, "grad_norm": 0.7265625, "learning_rate": 7.719731102836594e-06, "loss": 3.9123, "step": 15185 }, { "epoch": 5.058632464395769, "grad_norm": 0.76171875, "learning_rate": 7.718922010534271e-06, "loss": 4.0289, "step": 15186 }, { "epoch": 5.0589656033980175, "grad_norm": 0.72265625, "learning_rate": 7.718112915681979e-06, "loss": 3.9837, "step": 15187 }, { "epoch": 5.059298742400267, "grad_norm": 0.76953125, "learning_rate": 7.717303818289143e-06, "loss": 4.0258, "step": 15188 }, { "epoch": 5.059631881402515, "grad_norm": 0.7734375, "learning_rate": 7.71649471836519e-06, "loss": 3.9425, "step": 15189 }, { "epoch": 5.059965020404764, "grad_norm": 0.7734375, "learning_rate": 7.715685615919542e-06, "loss": 4.0336, "step": 15190 }, { "epoch": 5.060298159407012, "grad_norm": 0.74609375, "learning_rate": 7.714876510961625e-06, "loss": 3.9596, "step": 15191 }, { "epoch": 5.060631298409262, "grad_norm": 0.7734375, "learning_rate": 7.71406740350086e-06, "loss": 3.9689, "step": 15192 }, { "epoch": 5.06096443741151, "grad_norm": 0.72265625, "learning_rate": 7.713258293546674e-06, "loss": 3.9657, "step": 15193 }, { "epoch": 5.061297576413758, "grad_norm": 0.765625, "learning_rate": 7.71244918110849e-06, "loss": 4.0027, "step": 15194 }, { "epoch": 5.061630715416007, "grad_norm": 0.76171875, "learning_rate": 7.711640066195735e-06, "loss": 4.0663, "step": 15195 }, { "epoch": 5.061963854418256, "grad_norm": 0.74609375, "learning_rate": 7.710830948817831e-06, "loss": 3.9816, "step": 15196 }, { "epoch": 5.062296993420505, "grad_norm": 0.75, "learning_rate": 7.710021828984203e-06, "loss": 3.9221, "step": 15197 }, { "epoch": 5.062630132422753, "grad_norm": 0.73828125, "learning_rate": 7.709212706704276e-06, "loss": 3.9479, "step": 15198 }, { "epoch": 5.062963271425002, "grad_norm": 0.7734375, "learning_rate": 7.708403581987473e-06, "loss": 4.0559, "step": 15199 }, { "epoch": 5.063296410427251, "grad_norm": 0.7421875, "learning_rate": 7.70759445484322e-06, "loss": 4.003, "step": 15200 }, { "epoch": 5.0636295494295, "grad_norm": 0.7421875, "learning_rate": 7.706785325280941e-06, "loss": 3.9851, "step": 15201 }, { "epoch": 5.063962688431748, "grad_norm": 0.74609375, "learning_rate": 7.705976193310062e-06, "loss": 4.0995, "step": 15202 }, { "epoch": 5.064295827433996, "grad_norm": 0.76953125, "learning_rate": 7.705167058940003e-06, "loss": 3.8983, "step": 15203 }, { "epoch": 5.064628966436246, "grad_norm": 0.71484375, "learning_rate": 7.704357922180197e-06, "loss": 3.967, "step": 15204 }, { "epoch": 5.064962105438494, "grad_norm": 0.75390625, "learning_rate": 7.703548783040062e-06, "loss": 3.952, "step": 15205 }, { "epoch": 5.065295244440743, "grad_norm": 0.70703125, "learning_rate": 7.702739641529024e-06, "loss": 3.9698, "step": 15206 }, { "epoch": 5.065628383442991, "grad_norm": 0.71875, "learning_rate": 7.701930497656509e-06, "loss": 3.9455, "step": 15207 }, { "epoch": 5.065961522445241, "grad_norm": 0.71875, "learning_rate": 7.701121351431941e-06, "loss": 3.9816, "step": 15208 }, { "epoch": 5.066294661447489, "grad_norm": 0.71875, "learning_rate": 7.700312202864746e-06, "loss": 3.9839, "step": 15209 }, { "epoch": 5.066627800449738, "grad_norm": 0.77734375, "learning_rate": 7.699503051964344e-06, "loss": 4.0019, "step": 15210 }, { "epoch": 5.066960939451986, "grad_norm": 0.7578125, "learning_rate": 7.698693898740167e-06, "loss": 4.0092, "step": 15211 }, { "epoch": 5.067294078454235, "grad_norm": 0.75390625, "learning_rate": 7.697884743201635e-06, "loss": 4.0572, "step": 15212 }, { "epoch": 5.067627217456484, "grad_norm": 0.74609375, "learning_rate": 7.697075585358176e-06, "loss": 4.0143, "step": 15213 }, { "epoch": 5.067960356458732, "grad_norm": 0.734375, "learning_rate": 7.696266425219211e-06, "loss": 4.0333, "step": 15214 }, { "epoch": 5.068293495460981, "grad_norm": 0.73828125, "learning_rate": 7.69545726279417e-06, "loss": 3.9218, "step": 15215 }, { "epoch": 5.06862663446323, "grad_norm": 0.7890625, "learning_rate": 7.694648098092472e-06, "loss": 4.0247, "step": 15216 }, { "epoch": 5.068959773465479, "grad_norm": 0.75, "learning_rate": 7.693838931123547e-06, "loss": 3.9958, "step": 15217 }, { "epoch": 5.069292912467727, "grad_norm": 0.7109375, "learning_rate": 7.693029761896819e-06, "loss": 3.9391, "step": 15218 }, { "epoch": 5.069626051469976, "grad_norm": 0.7578125, "learning_rate": 7.692220590421709e-06, "loss": 4.0244, "step": 15219 }, { "epoch": 5.0699591904722245, "grad_norm": 0.75, "learning_rate": 7.691411416707649e-06, "loss": 3.9363, "step": 15220 }, { "epoch": 5.070292329474473, "grad_norm": 0.70703125, "learning_rate": 7.69060224076406e-06, "loss": 4.0181, "step": 15221 }, { "epoch": 5.070625468476722, "grad_norm": 0.70703125, "learning_rate": 7.689793062600368e-06, "loss": 3.9701, "step": 15222 }, { "epoch": 5.07095860747897, "grad_norm": 0.76953125, "learning_rate": 7.688983882225998e-06, "loss": 4.0084, "step": 15223 }, { "epoch": 5.0712917464812195, "grad_norm": 0.7734375, "learning_rate": 7.688174699650374e-06, "loss": 3.968, "step": 15224 }, { "epoch": 5.071624885483468, "grad_norm": 0.7734375, "learning_rate": 7.68736551488292e-06, "loss": 3.9404, "step": 15225 }, { "epoch": 5.071958024485717, "grad_norm": 0.79296875, "learning_rate": 7.686556327933068e-06, "loss": 3.9546, "step": 15226 }, { "epoch": 5.072291163487965, "grad_norm": 0.734375, "learning_rate": 7.685747138810238e-06, "loss": 4.0256, "step": 15227 }, { "epoch": 5.0726243024902145, "grad_norm": 0.734375, "learning_rate": 7.684937947523855e-06, "loss": 4.0282, "step": 15228 }, { "epoch": 5.072957441492463, "grad_norm": 0.71484375, "learning_rate": 7.684128754083344e-06, "loss": 3.9575, "step": 15229 }, { "epoch": 5.073290580494711, "grad_norm": 0.75390625, "learning_rate": 7.683319558498133e-06, "loss": 3.998, "step": 15230 }, { "epoch": 5.07362371949696, "grad_norm": 0.76171875, "learning_rate": 7.68251036077765e-06, "loss": 3.9898, "step": 15231 }, { "epoch": 5.0739568584992085, "grad_norm": 0.71875, "learning_rate": 7.681701160931312e-06, "loss": 4.0229, "step": 15232 }, { "epoch": 5.074289997501458, "grad_norm": 0.74609375, "learning_rate": 7.680891958968551e-06, "loss": 3.9846, "step": 15233 }, { "epoch": 5.074623136503706, "grad_norm": 0.7578125, "learning_rate": 7.680082754898789e-06, "loss": 4.0653, "step": 15234 }, { "epoch": 5.074956275505955, "grad_norm": 0.7421875, "learning_rate": 7.679273548731456e-06, "loss": 4.0527, "step": 15235 }, { "epoch": 5.0752894145082035, "grad_norm": 0.74609375, "learning_rate": 7.67846434047597e-06, "loss": 3.9036, "step": 15236 }, { "epoch": 5.075622553510453, "grad_norm": 0.7421875, "learning_rate": 7.677655130141765e-06, "loss": 4.0448, "step": 15237 }, { "epoch": 5.075955692512701, "grad_norm": 0.7734375, "learning_rate": 7.676845917738259e-06, "loss": 3.9981, "step": 15238 }, { "epoch": 5.076288831514949, "grad_norm": 0.7265625, "learning_rate": 7.676036703274884e-06, "loss": 4.0342, "step": 15239 }, { "epoch": 5.076621970517198, "grad_norm": 0.7734375, "learning_rate": 7.675227486761062e-06, "loss": 4.0006, "step": 15240 }, { "epoch": 5.076955109519447, "grad_norm": 0.734375, "learning_rate": 7.674418268206218e-06, "loss": 3.983, "step": 15241 }, { "epoch": 5.077288248521696, "grad_norm": 0.76171875, "learning_rate": 7.67360904761978e-06, "loss": 3.9834, "step": 15242 }, { "epoch": 5.077621387523944, "grad_norm": 0.72265625, "learning_rate": 7.672799825011173e-06, "loss": 3.924, "step": 15243 }, { "epoch": 5.077954526526193, "grad_norm": 0.765625, "learning_rate": 7.671990600389823e-06, "loss": 3.9956, "step": 15244 }, { "epoch": 5.078287665528442, "grad_norm": 0.78125, "learning_rate": 7.671181373765153e-06, "loss": 3.9858, "step": 15245 }, { "epoch": 5.07862080453069, "grad_norm": 0.7265625, "learning_rate": 7.670372145146592e-06, "loss": 3.9777, "step": 15246 }, { "epoch": 5.078953943532939, "grad_norm": 0.73828125, "learning_rate": 7.669562914543563e-06, "loss": 3.9937, "step": 15247 }, { "epoch": 5.0792870825351875, "grad_norm": 0.7578125, "learning_rate": 7.668753681965496e-06, "loss": 3.9844, "step": 15248 }, { "epoch": 5.079620221537437, "grad_norm": 0.69921875, "learning_rate": 7.667944447421812e-06, "loss": 4.015, "step": 15249 }, { "epoch": 5.079953360539685, "grad_norm": 0.73828125, "learning_rate": 7.66713521092194e-06, "loss": 3.998, "step": 15250 }, { "epoch": 5.080286499541934, "grad_norm": 0.72265625, "learning_rate": 7.666325972475304e-06, "loss": 4.0067, "step": 15251 }, { "epoch": 5.080619638544182, "grad_norm": 0.73828125, "learning_rate": 7.66551673209133e-06, "loss": 3.9085, "step": 15252 }, { "epoch": 5.080952777546432, "grad_norm": 0.765625, "learning_rate": 7.664707489779448e-06, "loss": 3.8955, "step": 15253 }, { "epoch": 5.08128591654868, "grad_norm": 0.7421875, "learning_rate": 7.663898245549079e-06, "loss": 4.0097, "step": 15254 }, { "epoch": 5.081619055550928, "grad_norm": 0.73046875, "learning_rate": 7.66308899940965e-06, "loss": 3.9483, "step": 15255 }, { "epoch": 5.081952194553177, "grad_norm": 0.70703125, "learning_rate": 7.662279751370585e-06, "loss": 4.0479, "step": 15256 }, { "epoch": 5.082285333555426, "grad_norm": 0.73828125, "learning_rate": 7.661470501441315e-06, "loss": 3.9874, "step": 15257 }, { "epoch": 5.082618472557675, "grad_norm": 0.71484375, "learning_rate": 7.660661249631263e-06, "loss": 3.9691, "step": 15258 }, { "epoch": 5.082951611559923, "grad_norm": 0.7265625, "learning_rate": 7.659851995949857e-06, "loss": 3.9726, "step": 15259 }, { "epoch": 5.083284750562172, "grad_norm": 0.7265625, "learning_rate": 7.659042740406518e-06, "loss": 3.9638, "step": 15260 }, { "epoch": 5.083617889564421, "grad_norm": 0.73046875, "learning_rate": 7.658233483010678e-06, "loss": 3.9126, "step": 15261 }, { "epoch": 5.08395102856667, "grad_norm": 0.72265625, "learning_rate": 7.65742422377176e-06, "loss": 3.9673, "step": 15262 }, { "epoch": 5.084284167568918, "grad_norm": 0.73046875, "learning_rate": 7.65661496269919e-06, "loss": 3.9896, "step": 15263 }, { "epoch": 5.084617306571166, "grad_norm": 0.70703125, "learning_rate": 7.655805699802396e-06, "loss": 3.9463, "step": 15264 }, { "epoch": 5.084950445573416, "grad_norm": 0.734375, "learning_rate": 7.654996435090802e-06, "loss": 3.9684, "step": 15265 }, { "epoch": 5.085283584575664, "grad_norm": 0.71484375, "learning_rate": 7.654187168573837e-06, "loss": 3.9416, "step": 15266 }, { "epoch": 5.085616723577913, "grad_norm": 0.76953125, "learning_rate": 7.653377900260925e-06, "loss": 4.0102, "step": 15267 }, { "epoch": 5.085949862580161, "grad_norm": 0.7578125, "learning_rate": 7.652568630161492e-06, "loss": 3.9492, "step": 15268 }, { "epoch": 5.0862830015824105, "grad_norm": 0.74609375, "learning_rate": 7.651759358284963e-06, "loss": 3.9636, "step": 15269 }, { "epoch": 5.086616140584659, "grad_norm": 0.7109375, "learning_rate": 7.65095008464077e-06, "loss": 4.0209, "step": 15270 }, { "epoch": 5.086949279586908, "grad_norm": 0.73828125, "learning_rate": 7.650140809238335e-06, "loss": 3.9669, "step": 15271 }, { "epoch": 5.087282418589156, "grad_norm": 0.71484375, "learning_rate": 7.649331532087083e-06, "loss": 4.0105, "step": 15272 }, { "epoch": 5.087615557591405, "grad_norm": 0.765625, "learning_rate": 7.648522253196442e-06, "loss": 3.9969, "step": 15273 }, { "epoch": 5.087948696593654, "grad_norm": 0.7734375, "learning_rate": 7.647712972575838e-06, "loss": 3.9397, "step": 15274 }, { "epoch": 5.088281835595902, "grad_norm": 0.76953125, "learning_rate": 7.646903690234702e-06, "loss": 3.9121, "step": 15275 }, { "epoch": 5.088614974598151, "grad_norm": 0.7578125, "learning_rate": 7.64609440618245e-06, "loss": 3.937, "step": 15276 }, { "epoch": 5.0889481136004, "grad_norm": 0.7578125, "learning_rate": 7.645285120428519e-06, "loss": 3.9245, "step": 15277 }, { "epoch": 5.089281252602649, "grad_norm": 0.734375, "learning_rate": 7.644475832982328e-06, "loss": 4.0215, "step": 15278 }, { "epoch": 5.089614391604897, "grad_norm": 0.71484375, "learning_rate": 7.643666543853309e-06, "loss": 4.0216, "step": 15279 }, { "epoch": 5.089947530607146, "grad_norm": 0.73828125, "learning_rate": 7.642857253050886e-06, "loss": 3.9967, "step": 15280 }, { "epoch": 5.0902806696093945, "grad_norm": 0.73046875, "learning_rate": 7.642047960584484e-06, "loss": 3.9593, "step": 15281 }, { "epoch": 5.090613808611643, "grad_norm": 0.73828125, "learning_rate": 7.641238666463532e-06, "loss": 3.9658, "step": 15282 }, { "epoch": 5.090946947613892, "grad_norm": 0.734375, "learning_rate": 7.640429370697457e-06, "loss": 4.0248, "step": 15283 }, { "epoch": 5.09128008661614, "grad_norm": 0.74609375, "learning_rate": 7.63962007329568e-06, "loss": 3.9981, "step": 15284 }, { "epoch": 5.0916132256183895, "grad_norm": 0.74609375, "learning_rate": 7.638810774267633e-06, "loss": 3.9777, "step": 15285 }, { "epoch": 5.091946364620638, "grad_norm": 0.76953125, "learning_rate": 7.638001473622743e-06, "loss": 3.936, "step": 15286 }, { "epoch": 5.092279503622887, "grad_norm": 0.71484375, "learning_rate": 7.637192171370432e-06, "loss": 3.9605, "step": 15287 }, { "epoch": 5.092612642625135, "grad_norm": 0.7421875, "learning_rate": 7.63638286752013e-06, "loss": 4.0431, "step": 15288 }, { "epoch": 5.092945781627384, "grad_norm": 0.765625, "learning_rate": 7.635573562081267e-06, "loss": 3.9695, "step": 15289 }, { "epoch": 5.093278920629633, "grad_norm": 0.70703125, "learning_rate": 7.634764255063262e-06, "loss": 3.9936, "step": 15290 }, { "epoch": 5.093612059631881, "grad_norm": 0.734375, "learning_rate": 7.633954946475545e-06, "loss": 4.0504, "step": 15291 }, { "epoch": 5.09394519863413, "grad_norm": 0.7265625, "learning_rate": 7.633145636327543e-06, "loss": 4.0251, "step": 15292 }, { "epoch": 5.0942783376363785, "grad_norm": 0.74609375, "learning_rate": 7.632336324628684e-06, "loss": 4.0117, "step": 15293 }, { "epoch": 5.094611476638628, "grad_norm": 0.75, "learning_rate": 7.631527011388394e-06, "loss": 3.9629, "step": 15294 }, { "epoch": 5.094944615640876, "grad_norm": 0.83203125, "learning_rate": 7.630717696616097e-06, "loss": 4.01, "step": 15295 }, { "epoch": 5.095277754643125, "grad_norm": 0.7109375, "learning_rate": 7.629908380321222e-06, "loss": 3.9309, "step": 15296 }, { "epoch": 5.0956108936453735, "grad_norm": 0.76171875, "learning_rate": 7.629099062513198e-06, "loss": 3.9621, "step": 15297 }, { "epoch": 5.095944032647623, "grad_norm": 0.77734375, "learning_rate": 7.628289743201449e-06, "loss": 4.045, "step": 15298 }, { "epoch": 5.096277171649871, "grad_norm": 0.6953125, "learning_rate": 7.627480422395402e-06, "loss": 3.9943, "step": 15299 }, { "epoch": 5.096610310652119, "grad_norm": 0.73828125, "learning_rate": 7.626671100104482e-06, "loss": 3.9205, "step": 15300 }, { "epoch": 5.096943449654368, "grad_norm": 0.734375, "learning_rate": 7.6258617763381215e-06, "loss": 3.9936, "step": 15301 }, { "epoch": 5.097276588656617, "grad_norm": 0.76953125, "learning_rate": 7.625052451105743e-06, "loss": 3.9644, "step": 15302 }, { "epoch": 5.097609727658866, "grad_norm": 0.7578125, "learning_rate": 7.624243124416776e-06, "loss": 4.03, "step": 15303 }, { "epoch": 5.097942866661114, "grad_norm": 0.78515625, "learning_rate": 7.623433796280641e-06, "loss": 3.9955, "step": 15304 }, { "epoch": 5.098276005663363, "grad_norm": 0.8125, "learning_rate": 7.622624466706773e-06, "loss": 3.9628, "step": 15305 }, { "epoch": 5.098609144665612, "grad_norm": 0.765625, "learning_rate": 7.621815135704593e-06, "loss": 3.9665, "step": 15306 }, { "epoch": 5.098942283667861, "grad_norm": 0.7421875, "learning_rate": 7.621005803283534e-06, "loss": 3.9538, "step": 15307 }, { "epoch": 5.099275422670109, "grad_norm": 0.70703125, "learning_rate": 7.620196469453017e-06, "loss": 3.9556, "step": 15308 }, { "epoch": 5.099608561672357, "grad_norm": 0.71875, "learning_rate": 7.619387134222471e-06, "loss": 4.0589, "step": 15309 }, { "epoch": 5.099941700674607, "grad_norm": 0.72265625, "learning_rate": 7.6185777976013265e-06, "loss": 3.9706, "step": 15310 }, { "epoch": 5.100274839676855, "grad_norm": 0.8125, "learning_rate": 7.617768459599005e-06, "loss": 4.0423, "step": 15311 }, { "epoch": 5.100607978679104, "grad_norm": 0.78515625, "learning_rate": 7.616959120224937e-06, "loss": 3.9644, "step": 15312 }, { "epoch": 5.100941117681352, "grad_norm": 0.71875, "learning_rate": 7.6161497794885475e-06, "loss": 4.0257, "step": 15313 }, { "epoch": 5.101274256683602, "grad_norm": 0.734375, "learning_rate": 7.6153404373992666e-06, "loss": 4.0149, "step": 15314 }, { "epoch": 5.10160739568585, "grad_norm": 0.7265625, "learning_rate": 7.614531093966518e-06, "loss": 3.9693, "step": 15315 }, { "epoch": 5.101940534688099, "grad_norm": 0.7734375, "learning_rate": 7.61372174919973e-06, "loss": 4.034, "step": 15316 }, { "epoch": 5.102273673690347, "grad_norm": 0.74609375, "learning_rate": 7.612912403108331e-06, "loss": 4.0374, "step": 15317 }, { "epoch": 5.102606812692596, "grad_norm": 0.7578125, "learning_rate": 7.612103055701745e-06, "loss": 3.9242, "step": 15318 }, { "epoch": 5.102939951694845, "grad_norm": 0.74609375, "learning_rate": 7.611293706989404e-06, "loss": 3.9819, "step": 15319 }, { "epoch": 5.103273090697093, "grad_norm": 0.8359375, "learning_rate": 7.610484356980732e-06, "loss": 4.0217, "step": 15320 }, { "epoch": 5.103606229699342, "grad_norm": 0.75, "learning_rate": 7.609675005685155e-06, "loss": 3.9191, "step": 15321 }, { "epoch": 5.103939368701591, "grad_norm": 0.765625, "learning_rate": 7.608865653112102e-06, "loss": 3.9246, "step": 15322 }, { "epoch": 5.10427250770384, "grad_norm": 0.7265625, "learning_rate": 7.608056299271e-06, "loss": 4.0435, "step": 15323 }, { "epoch": 5.104605646706088, "grad_norm": 0.74609375, "learning_rate": 7.607246944171277e-06, "loss": 3.8645, "step": 15324 }, { "epoch": 5.104938785708336, "grad_norm": 0.73828125, "learning_rate": 7.60643758782236e-06, "loss": 4.077, "step": 15325 }, { "epoch": 5.1052719247105856, "grad_norm": 0.75, "learning_rate": 7.605628230233673e-06, "loss": 4.013, "step": 15326 }, { "epoch": 5.105605063712834, "grad_norm": 0.71875, "learning_rate": 7.604818871414648e-06, "loss": 4.028, "step": 15327 }, { "epoch": 5.105938202715083, "grad_norm": 0.79296875, "learning_rate": 7.60400951137471e-06, "loss": 3.98, "step": 15328 }, { "epoch": 5.106271341717331, "grad_norm": 0.71875, "learning_rate": 7.603200150123286e-06, "loss": 4.0782, "step": 15329 }, { "epoch": 5.1066044807195805, "grad_norm": 0.73828125, "learning_rate": 7.602390787669805e-06, "loss": 3.9816, "step": 15330 }, { "epoch": 5.106937619721829, "grad_norm": 0.73046875, "learning_rate": 7.601581424023691e-06, "loss": 4.022, "step": 15331 }, { "epoch": 5.107270758724078, "grad_norm": 0.69921875, "learning_rate": 7.600772059194373e-06, "loss": 3.9239, "step": 15332 }, { "epoch": 5.107603897726326, "grad_norm": 0.78515625, "learning_rate": 7.5999626931912805e-06, "loss": 3.9974, "step": 15333 }, { "epoch": 5.107937036728575, "grad_norm": 0.75, "learning_rate": 7.599153326023842e-06, "loss": 4.0466, "step": 15334 }, { "epoch": 5.108270175730824, "grad_norm": 0.7578125, "learning_rate": 7.598343957701477e-06, "loss": 4.0824, "step": 15335 }, { "epoch": 5.108603314733072, "grad_norm": 0.734375, "learning_rate": 7.59753458823362e-06, "loss": 4.0623, "step": 15336 }, { "epoch": 5.108936453735321, "grad_norm": 0.73046875, "learning_rate": 7.596725217629695e-06, "loss": 3.9502, "step": 15337 }, { "epoch": 5.1092695927375695, "grad_norm": 0.71875, "learning_rate": 7.595915845899132e-06, "loss": 3.9875, "step": 15338 }, { "epoch": 5.109602731739819, "grad_norm": 0.73046875, "learning_rate": 7.595106473051357e-06, "loss": 3.9395, "step": 15339 }, { "epoch": 5.109935870742067, "grad_norm": 0.71875, "learning_rate": 7.594297099095797e-06, "loss": 4.0765, "step": 15340 }, { "epoch": 5.110269009744316, "grad_norm": 0.80078125, "learning_rate": 7.593487724041882e-06, "loss": 4.0516, "step": 15341 }, { "epoch": 5.1106021487465645, "grad_norm": 0.72265625, "learning_rate": 7.5926783478990345e-06, "loss": 3.9685, "step": 15342 }, { "epoch": 5.110935287748813, "grad_norm": 0.71875, "learning_rate": 7.591868970676689e-06, "loss": 4.0175, "step": 15343 }, { "epoch": 5.111268426751062, "grad_norm": 0.73828125, "learning_rate": 7.591059592384266e-06, "loss": 3.9612, "step": 15344 }, { "epoch": 5.11160156575331, "grad_norm": 0.74609375, "learning_rate": 7.590250213031196e-06, "loss": 3.9359, "step": 15345 }, { "epoch": 5.1119347047555594, "grad_norm": 0.734375, "learning_rate": 7.589440832626906e-06, "loss": 4.0107, "step": 15346 }, { "epoch": 5.112267843757808, "grad_norm": 0.75, "learning_rate": 7.5886314511808275e-06, "loss": 3.9648, "step": 15347 }, { "epoch": 5.112600982760057, "grad_norm": 0.7578125, "learning_rate": 7.587822068702381e-06, "loss": 3.9437, "step": 15348 }, { "epoch": 5.112934121762305, "grad_norm": 0.75390625, "learning_rate": 7.587012685200999e-06, "loss": 3.9894, "step": 15349 }, { "epoch": 5.113267260764554, "grad_norm": 0.734375, "learning_rate": 7.586203300686107e-06, "loss": 3.9648, "step": 15350 }, { "epoch": 5.113600399766803, "grad_norm": 0.734375, "learning_rate": 7.585393915167134e-06, "loss": 4.0239, "step": 15351 }, { "epoch": 5.113933538769051, "grad_norm": 0.75390625, "learning_rate": 7.584584528653508e-06, "loss": 4.0294, "step": 15352 }, { "epoch": 5.1142666777713, "grad_norm": 0.75, "learning_rate": 7.583775141154653e-06, "loss": 3.9389, "step": 15353 }, { "epoch": 5.1145998167735485, "grad_norm": 0.7734375, "learning_rate": 7.582965752679999e-06, "loss": 3.9613, "step": 15354 }, { "epoch": 5.114932955775798, "grad_norm": 0.7890625, "learning_rate": 7.5821563632389745e-06, "loss": 3.9568, "step": 15355 }, { "epoch": 5.115266094778046, "grad_norm": 0.7734375, "learning_rate": 7.581346972841008e-06, "loss": 4.0275, "step": 15356 }, { "epoch": 5.115599233780295, "grad_norm": 0.75, "learning_rate": 7.580537581495524e-06, "loss": 4.0064, "step": 15357 }, { "epoch": 5.115932372782543, "grad_norm": 0.71484375, "learning_rate": 7.579728189211951e-06, "loss": 3.9631, "step": 15358 }, { "epoch": 5.116265511784793, "grad_norm": 0.6953125, "learning_rate": 7.5789187959997175e-06, "loss": 3.9904, "step": 15359 }, { "epoch": 5.116598650787041, "grad_norm": 0.71484375, "learning_rate": 7.578109401868251e-06, "loss": 3.9769, "step": 15360 }, { "epoch": 5.116931789789289, "grad_norm": 0.8046875, "learning_rate": 7.577300006826981e-06, "loss": 3.9562, "step": 15361 }, { "epoch": 5.117264928791538, "grad_norm": 0.74609375, "learning_rate": 7.576490610885331e-06, "loss": 3.9733, "step": 15362 }, { "epoch": 5.117598067793787, "grad_norm": 0.73046875, "learning_rate": 7.575681214052732e-06, "loss": 3.9698, "step": 15363 }, { "epoch": 5.117931206796036, "grad_norm": 0.76171875, "learning_rate": 7.57487181633861e-06, "loss": 3.9813, "step": 15364 }, { "epoch": 5.118264345798284, "grad_norm": 0.7578125, "learning_rate": 7.574062417752397e-06, "loss": 4.0189, "step": 15365 }, { "epoch": 5.118597484800533, "grad_norm": 0.7578125, "learning_rate": 7.573253018303513e-06, "loss": 3.9829, "step": 15366 }, { "epoch": 5.118930623802782, "grad_norm": 0.75, "learning_rate": 7.572443618001393e-06, "loss": 3.97, "step": 15367 }, { "epoch": 5.119263762805031, "grad_norm": 0.80859375, "learning_rate": 7.5716342168554585e-06, "loss": 3.9263, "step": 15368 }, { "epoch": 5.119596901807279, "grad_norm": 0.73828125, "learning_rate": 7.570824814875143e-06, "loss": 4.0305, "step": 15369 }, { "epoch": 5.119930040809527, "grad_norm": 0.7265625, "learning_rate": 7.570015412069871e-06, "loss": 3.9762, "step": 15370 }, { "epoch": 5.120263179811777, "grad_norm": 0.76953125, "learning_rate": 7.569206008449072e-06, "loss": 4.0119, "step": 15371 }, { "epoch": 5.120596318814025, "grad_norm": 0.7421875, "learning_rate": 7.568396604022171e-06, "loss": 3.9229, "step": 15372 }, { "epoch": 5.120929457816274, "grad_norm": 0.7578125, "learning_rate": 7.567587198798599e-06, "loss": 4.0027, "step": 15373 }, { "epoch": 5.121262596818522, "grad_norm": 0.734375, "learning_rate": 7.566777792787784e-06, "loss": 4.0025, "step": 15374 }, { "epoch": 5.1215957358207715, "grad_norm": 0.71484375, "learning_rate": 7.565968385999149e-06, "loss": 3.9002, "step": 15375 }, { "epoch": 5.12192887482302, "grad_norm": 0.71484375, "learning_rate": 7.565158978442127e-06, "loss": 4.0053, "step": 15376 }, { "epoch": 5.122262013825269, "grad_norm": 0.734375, "learning_rate": 7.564349570126144e-06, "loss": 4.0618, "step": 15377 }, { "epoch": 5.122595152827517, "grad_norm": 0.703125, "learning_rate": 7.563540161060627e-06, "loss": 4.0414, "step": 15378 }, { "epoch": 5.122928291829766, "grad_norm": 0.72265625, "learning_rate": 7.562730751255007e-06, "loss": 3.9591, "step": 15379 }, { "epoch": 5.123261430832015, "grad_norm": 0.73828125, "learning_rate": 7.561921340718709e-06, "loss": 3.9751, "step": 15380 }, { "epoch": 5.123594569834263, "grad_norm": 0.7734375, "learning_rate": 7.561111929461159e-06, "loss": 3.9684, "step": 15381 }, { "epoch": 5.123927708836512, "grad_norm": 0.7421875, "learning_rate": 7.560302517491791e-06, "loss": 4.0823, "step": 15382 }, { "epoch": 5.124260847838761, "grad_norm": 0.76953125, "learning_rate": 7.559493104820027e-06, "loss": 3.9335, "step": 15383 }, { "epoch": 5.12459398684101, "grad_norm": 0.75390625, "learning_rate": 7.558683691455297e-06, "loss": 3.9821, "step": 15384 }, { "epoch": 5.124927125843258, "grad_norm": 0.69140625, "learning_rate": 7.557874277407029e-06, "loss": 4.0231, "step": 15385 }, { "epoch": 5.125260264845506, "grad_norm": 0.7578125, "learning_rate": 7.557064862684651e-06, "loss": 4.0044, "step": 15386 }, { "epoch": 5.1255934038477555, "grad_norm": 0.73046875, "learning_rate": 7.556255447297592e-06, "loss": 3.97, "step": 15387 }, { "epoch": 5.125926542850004, "grad_norm": 0.765625, "learning_rate": 7.555446031255279e-06, "loss": 3.9941, "step": 15388 }, { "epoch": 5.126259681852253, "grad_norm": 0.77734375, "learning_rate": 7.55463661456714e-06, "loss": 4.0074, "step": 15389 }, { "epoch": 5.126592820854501, "grad_norm": 0.77734375, "learning_rate": 7.553827197242601e-06, "loss": 4.0659, "step": 15390 }, { "epoch": 5.1269259598567505, "grad_norm": 0.7421875, "learning_rate": 7.553017779291094e-06, "loss": 3.9993, "step": 15391 }, { "epoch": 5.127259098858999, "grad_norm": 0.76953125, "learning_rate": 7.552208360722044e-06, "loss": 4.0133, "step": 15392 }, { "epoch": 5.127592237861248, "grad_norm": 0.765625, "learning_rate": 7.551398941544881e-06, "loss": 3.9002, "step": 15393 }, { "epoch": 5.127925376863496, "grad_norm": 0.73828125, "learning_rate": 7.550589521769028e-06, "loss": 4.0052, "step": 15394 }, { "epoch": 5.128258515865745, "grad_norm": 0.75, "learning_rate": 7.549780101403919e-06, "loss": 4.0477, "step": 15395 }, { "epoch": 5.128591654867994, "grad_norm": 0.75, "learning_rate": 7.54897068045898e-06, "loss": 3.996, "step": 15396 }, { "epoch": 5.128924793870242, "grad_norm": 0.75, "learning_rate": 7.548161258943637e-06, "loss": 4.1169, "step": 15397 }, { "epoch": 5.129257932872491, "grad_norm": 0.76953125, "learning_rate": 7.54735183686732e-06, "loss": 3.913, "step": 15398 }, { "epoch": 5.1295910718747395, "grad_norm": 0.78125, "learning_rate": 7.5465424142394575e-06, "loss": 3.9493, "step": 15399 }, { "epoch": 5.129924210876989, "grad_norm": 0.75390625, "learning_rate": 7.545732991069476e-06, "loss": 3.9592, "step": 15400 }, { "epoch": 5.130257349879237, "grad_norm": 0.75390625, "learning_rate": 7.544923567366806e-06, "loss": 3.9972, "step": 15401 }, { "epoch": 5.130590488881486, "grad_norm": 0.75, "learning_rate": 7.544114143140873e-06, "loss": 3.9183, "step": 15402 }, { "epoch": 5.1309236278837345, "grad_norm": 0.73046875, "learning_rate": 7.543304718401104e-06, "loss": 4.0027, "step": 15403 }, { "epoch": 5.131256766885983, "grad_norm": 0.734375, "learning_rate": 7.542495293156931e-06, "loss": 3.9791, "step": 15404 }, { "epoch": 5.131589905888232, "grad_norm": 0.76953125, "learning_rate": 7.541685867417779e-06, "loss": 4.0152, "step": 15405 }, { "epoch": 5.13192304489048, "grad_norm": 0.7421875, "learning_rate": 7.540876441193075e-06, "loss": 4.063, "step": 15406 }, { "epoch": 5.132256183892729, "grad_norm": 0.70703125, "learning_rate": 7.5400670144922515e-06, "loss": 4.0222, "step": 15407 }, { "epoch": 5.132589322894978, "grad_norm": 0.80078125, "learning_rate": 7.5392575873247325e-06, "loss": 3.9355, "step": 15408 }, { "epoch": 5.132922461897227, "grad_norm": 0.76171875, "learning_rate": 7.538448159699949e-06, "loss": 4.0301, "step": 15409 }, { "epoch": 5.133255600899475, "grad_norm": 0.76953125, "learning_rate": 7.5376387316273275e-06, "loss": 3.9659, "step": 15410 }, { "epoch": 5.133588739901724, "grad_norm": 0.7265625, "learning_rate": 7.5368293031162964e-06, "loss": 4.0339, "step": 15411 }, { "epoch": 5.133921878903973, "grad_norm": 0.765625, "learning_rate": 7.536019874176282e-06, "loss": 3.9293, "step": 15412 }, { "epoch": 5.134255017906221, "grad_norm": 0.73046875, "learning_rate": 7.535210444816716e-06, "loss": 4.0399, "step": 15413 }, { "epoch": 5.13458815690847, "grad_norm": 0.7578125, "learning_rate": 7.534401015047026e-06, "loss": 3.9316, "step": 15414 }, { "epoch": 5.134921295910718, "grad_norm": 0.74609375, "learning_rate": 7.533591584876636e-06, "loss": 3.9885, "step": 15415 }, { "epoch": 5.135254434912968, "grad_norm": 0.75, "learning_rate": 7.5327821543149755e-06, "loss": 3.9758, "step": 15416 }, { "epoch": 5.135587573915216, "grad_norm": 0.765625, "learning_rate": 7.531972723371478e-06, "loss": 3.9812, "step": 15417 }, { "epoch": 5.135920712917465, "grad_norm": 0.74609375, "learning_rate": 7.531163292055564e-06, "loss": 4.0427, "step": 15418 }, { "epoch": 5.136253851919713, "grad_norm": 0.734375, "learning_rate": 7.530353860376666e-06, "loss": 3.9948, "step": 15419 }, { "epoch": 5.136586990921963, "grad_norm": 0.72265625, "learning_rate": 7.5295444283442125e-06, "loss": 3.9732, "step": 15420 }, { "epoch": 5.136920129924211, "grad_norm": 0.72265625, "learning_rate": 7.528734995967629e-06, "loss": 3.9576, "step": 15421 }, { "epoch": 5.137253268926459, "grad_norm": 0.76171875, "learning_rate": 7.527925563256346e-06, "loss": 4.0414, "step": 15422 }, { "epoch": 5.137586407928708, "grad_norm": 0.76953125, "learning_rate": 7.527116130219791e-06, "loss": 3.957, "step": 15423 }, { "epoch": 5.137919546930957, "grad_norm": 0.75, "learning_rate": 7.526306696867391e-06, "loss": 3.9887, "step": 15424 }, { "epoch": 5.138252685933206, "grad_norm": 0.76953125, "learning_rate": 7.525497263208575e-06, "loss": 3.9672, "step": 15425 }, { "epoch": 5.138585824935454, "grad_norm": 0.75, "learning_rate": 7.524687829252772e-06, "loss": 3.9801, "step": 15426 }, { "epoch": 5.138918963937703, "grad_norm": 0.74609375, "learning_rate": 7.523878395009407e-06, "loss": 3.9367, "step": 15427 }, { "epoch": 5.139252102939952, "grad_norm": 0.76171875, "learning_rate": 7.523068960487913e-06, "loss": 4.0546, "step": 15428 }, { "epoch": 5.139585241942201, "grad_norm": 0.78125, "learning_rate": 7.522259525697715e-06, "loss": 4.0402, "step": 15429 }, { "epoch": 5.139918380944449, "grad_norm": 0.76171875, "learning_rate": 7.52145009064824e-06, "loss": 3.9423, "step": 15430 }, { "epoch": 5.140251519946697, "grad_norm": 0.71484375, "learning_rate": 7.520640655348919e-06, "loss": 3.9825, "step": 15431 }, { "epoch": 5.140584658948947, "grad_norm": 0.76953125, "learning_rate": 7.519831219809181e-06, "loss": 4.0074, "step": 15432 }, { "epoch": 5.140917797951195, "grad_norm": 0.78125, "learning_rate": 7.519021784038451e-06, "loss": 4.0626, "step": 15433 }, { "epoch": 5.141250936953444, "grad_norm": 0.76953125, "learning_rate": 7.518212348046157e-06, "loss": 4.0139, "step": 15434 }, { "epoch": 5.141584075955692, "grad_norm": 0.76953125, "learning_rate": 7.51740291184173e-06, "loss": 3.9619, "step": 15435 }, { "epoch": 5.1419172149579415, "grad_norm": 0.765625, "learning_rate": 7.516593475434594e-06, "loss": 3.9911, "step": 15436 }, { "epoch": 5.14225035396019, "grad_norm": 0.7890625, "learning_rate": 7.515784038834185e-06, "loss": 3.9396, "step": 15437 }, { "epoch": 5.142583492962439, "grad_norm": 0.73828125, "learning_rate": 7.5149746020499225e-06, "loss": 3.9941, "step": 15438 }, { "epoch": 5.142916631964687, "grad_norm": 0.74609375, "learning_rate": 7.51416516509124e-06, "loss": 4.0297, "step": 15439 }, { "epoch": 5.143249770966936, "grad_norm": 0.83203125, "learning_rate": 7.5133557279675614e-06, "loss": 3.8996, "step": 15440 }, { "epoch": 5.143582909969185, "grad_norm": 0.76953125, "learning_rate": 7.5125462906883215e-06, "loss": 4.0236, "step": 15441 }, { "epoch": 5.143916048971433, "grad_norm": 0.78125, "learning_rate": 7.511736853262941e-06, "loss": 4.0121, "step": 15442 }, { "epoch": 5.144249187973682, "grad_norm": 0.71484375, "learning_rate": 7.510927415700853e-06, "loss": 3.9743, "step": 15443 }, { "epoch": 5.1445823269759305, "grad_norm": 0.7421875, "learning_rate": 7.510117978011484e-06, "loss": 4.0351, "step": 15444 }, { "epoch": 5.14491546597818, "grad_norm": 0.76171875, "learning_rate": 7.5093085402042615e-06, "loss": 4.0168, "step": 15445 }, { "epoch": 5.145248604980428, "grad_norm": 0.6953125, "learning_rate": 7.508499102288619e-06, "loss": 3.9758, "step": 15446 }, { "epoch": 5.145581743982676, "grad_norm": 0.75390625, "learning_rate": 7.507689664273975e-06, "loss": 4.0293, "step": 15447 }, { "epoch": 5.1459148829849255, "grad_norm": 0.70703125, "learning_rate": 7.506880226169766e-06, "loss": 4.0237, "step": 15448 }, { "epoch": 5.146248021987174, "grad_norm": 0.77734375, "learning_rate": 7.506070787985416e-06, "loss": 3.9492, "step": 15449 }, { "epoch": 5.146581160989423, "grad_norm": 0.76171875, "learning_rate": 7.505261349730355e-06, "loss": 3.9687, "step": 15450 }, { "epoch": 5.146914299991671, "grad_norm": 0.7421875, "learning_rate": 7.5044519114140115e-06, "loss": 3.9407, "step": 15451 }, { "epoch": 5.1472474389939205, "grad_norm": 0.71484375, "learning_rate": 7.503642473045811e-06, "loss": 4.02, "step": 15452 }, { "epoch": 5.147580577996169, "grad_norm": 0.7578125, "learning_rate": 7.5028330346351855e-06, "loss": 4.0224, "step": 15453 }, { "epoch": 5.147913716998418, "grad_norm": 0.75390625, "learning_rate": 7.502023596191559e-06, "loss": 4.0343, "step": 15454 }, { "epoch": 5.148246856000666, "grad_norm": 0.75, "learning_rate": 7.5012141577243655e-06, "loss": 3.941, "step": 15455 }, { "epoch": 5.148579995002915, "grad_norm": 0.6953125, "learning_rate": 7.500404719243026e-06, "loss": 4.0211, "step": 15456 }, { "epoch": 5.148913134005164, "grad_norm": 0.7578125, "learning_rate": 7.499595280756976e-06, "loss": 4.0118, "step": 15457 }, { "epoch": 5.149246273007412, "grad_norm": 0.7734375, "learning_rate": 7.4987858422756374e-06, "loss": 4.0234, "step": 15458 }, { "epoch": 5.149579412009661, "grad_norm": 0.76171875, "learning_rate": 7.497976403808442e-06, "loss": 4.0821, "step": 15459 }, { "epoch": 5.1499125510119095, "grad_norm": 0.74609375, "learning_rate": 7.4971669653648166e-06, "loss": 3.9877, "step": 15460 }, { "epoch": 5.150245690014159, "grad_norm": 0.78125, "learning_rate": 7.4963575269541905e-06, "loss": 3.982, "step": 15461 }, { "epoch": 5.150578829016407, "grad_norm": 0.7578125, "learning_rate": 7.49554808858599e-06, "loss": 3.9667, "step": 15462 }, { "epoch": 5.150911968018656, "grad_norm": 0.7421875, "learning_rate": 7.494738650269646e-06, "loss": 3.9268, "step": 15463 }, { "epoch": 5.151245107020904, "grad_norm": 0.76953125, "learning_rate": 7.493929212014584e-06, "loss": 3.9337, "step": 15464 }, { "epoch": 5.151578246023153, "grad_norm": 0.76171875, "learning_rate": 7.4931197738302345e-06, "loss": 3.9823, "step": 15465 }, { "epoch": 5.151911385025402, "grad_norm": 0.7109375, "learning_rate": 7.492310335726028e-06, "loss": 3.9811, "step": 15466 }, { "epoch": 5.15224452402765, "grad_norm": 0.75390625, "learning_rate": 7.491500897711384e-06, "loss": 3.9787, "step": 15467 }, { "epoch": 5.152577663029899, "grad_norm": 0.7734375, "learning_rate": 7.49069145979574e-06, "loss": 4.0388, "step": 15468 }, { "epoch": 5.152910802032148, "grad_norm": 0.765625, "learning_rate": 7.489882021988517e-06, "loss": 4.031, "step": 15469 }, { "epoch": 5.153243941034397, "grad_norm": 0.765625, "learning_rate": 7.489072584299148e-06, "loss": 3.9854, "step": 15470 }, { "epoch": 5.153577080036645, "grad_norm": 0.765625, "learning_rate": 7.48826314673706e-06, "loss": 4.0181, "step": 15471 }, { "epoch": 5.153910219038894, "grad_norm": 0.74609375, "learning_rate": 7.487453709311681e-06, "loss": 3.8883, "step": 15472 }, { "epoch": 5.154243358041143, "grad_norm": 0.7265625, "learning_rate": 7.486644272032437e-06, "loss": 3.9676, "step": 15473 }, { "epoch": 5.154576497043391, "grad_norm": 0.7578125, "learning_rate": 7.485834834908762e-06, "loss": 3.93, "step": 15474 }, { "epoch": 5.15490963604564, "grad_norm": 0.78125, "learning_rate": 7.48502539795008e-06, "loss": 3.9936, "step": 15475 }, { "epoch": 5.155242775047888, "grad_norm": 0.74609375, "learning_rate": 7.484215961165817e-06, "loss": 4.0436, "step": 15476 }, { "epoch": 5.155575914050138, "grad_norm": 0.77734375, "learning_rate": 7.483406524565406e-06, "loss": 3.9975, "step": 15477 }, { "epoch": 5.155909053052386, "grad_norm": 0.75390625, "learning_rate": 7.482597088158272e-06, "loss": 3.9959, "step": 15478 }, { "epoch": 5.156242192054635, "grad_norm": 0.74609375, "learning_rate": 7.481787651953844e-06, "loss": 3.948, "step": 15479 }, { "epoch": 5.156575331056883, "grad_norm": 0.765625, "learning_rate": 7.48097821596155e-06, "loss": 3.9769, "step": 15480 }, { "epoch": 5.1569084700591326, "grad_norm": 0.7421875, "learning_rate": 7.480168780190821e-06, "loss": 3.9872, "step": 15481 }, { "epoch": 5.157241609061381, "grad_norm": 0.7421875, "learning_rate": 7.479359344651079e-06, "loss": 3.9469, "step": 15482 }, { "epoch": 5.157574748063629, "grad_norm": 0.73046875, "learning_rate": 7.478549909351759e-06, "loss": 4.0147, "step": 15483 }, { "epoch": 5.157907887065878, "grad_norm": 0.765625, "learning_rate": 7.477740474302288e-06, "loss": 4.0121, "step": 15484 }, { "epoch": 5.158241026068127, "grad_norm": 0.7109375, "learning_rate": 7.476931039512088e-06, "loss": 4.022, "step": 15485 }, { "epoch": 5.158574165070376, "grad_norm": 0.75390625, "learning_rate": 7.476121604990594e-06, "loss": 4.0203, "step": 15486 }, { "epoch": 5.158907304072624, "grad_norm": 0.8515625, "learning_rate": 7.475312170747229e-06, "loss": 3.9748, "step": 15487 }, { "epoch": 5.159240443074873, "grad_norm": 0.7890625, "learning_rate": 7.474502736791427e-06, "loss": 3.9456, "step": 15488 }, { "epoch": 5.159573582077122, "grad_norm": 0.7265625, "learning_rate": 7.47369330313261e-06, "loss": 4.0438, "step": 15489 }, { "epoch": 5.159906721079371, "grad_norm": 0.79296875, "learning_rate": 7.47288386978021e-06, "loss": 3.9699, "step": 15490 }, { "epoch": 5.160239860081619, "grad_norm": 0.80078125, "learning_rate": 7.4720744367436534e-06, "loss": 3.9642, "step": 15491 }, { "epoch": 5.160572999083867, "grad_norm": 0.7734375, "learning_rate": 7.471265004032371e-06, "loss": 4.0401, "step": 15492 }, { "epoch": 5.1609061380861165, "grad_norm": 0.71875, "learning_rate": 7.47045557165579e-06, "loss": 3.9742, "step": 15493 }, { "epoch": 5.161239277088365, "grad_norm": 0.703125, "learning_rate": 7.469646139623334e-06, "loss": 4.0179, "step": 15494 }, { "epoch": 5.161572416090614, "grad_norm": 0.77734375, "learning_rate": 7.468836707944438e-06, "loss": 4.0458, "step": 15495 }, { "epoch": 5.161905555092862, "grad_norm": 0.80859375, "learning_rate": 7.468027276628524e-06, "loss": 3.9671, "step": 15496 }, { "epoch": 5.1622386940951115, "grad_norm": 0.73828125, "learning_rate": 7.467217845685025e-06, "loss": 3.9984, "step": 15497 }, { "epoch": 5.16257183309736, "grad_norm": 0.75, "learning_rate": 7.466408415123365e-06, "loss": 4.0594, "step": 15498 }, { "epoch": 5.162904972099609, "grad_norm": 0.75390625, "learning_rate": 7.465598984952976e-06, "loss": 4.1307, "step": 15499 }, { "epoch": 5.163238111101857, "grad_norm": 0.76171875, "learning_rate": 7.464789555183284e-06, "loss": 3.98, "step": 15500 }, { "epoch": 5.163571250104106, "grad_norm": 0.7734375, "learning_rate": 7.463980125823718e-06, "loss": 4.0158, "step": 15501 }, { "epoch": 5.163904389106355, "grad_norm": 0.8125, "learning_rate": 7.4631706968837065e-06, "loss": 3.9787, "step": 15502 }, { "epoch": 5.164237528108603, "grad_norm": 0.71484375, "learning_rate": 7.4623612683726755e-06, "loss": 4.055, "step": 15503 }, { "epoch": 5.164570667110852, "grad_norm": 0.7265625, "learning_rate": 7.461551840300053e-06, "loss": 3.9996, "step": 15504 }, { "epoch": 5.1649038061131005, "grad_norm": 0.74609375, "learning_rate": 7.46074241267527e-06, "loss": 4.0372, "step": 15505 }, { "epoch": 5.16523694511535, "grad_norm": 0.7578125, "learning_rate": 7.45993298550775e-06, "loss": 4.0544, "step": 15506 }, { "epoch": 5.165570084117598, "grad_norm": 0.71484375, "learning_rate": 7.459123558806926e-06, "loss": 3.9609, "step": 15507 }, { "epoch": 5.165903223119847, "grad_norm": 0.71875, "learning_rate": 7.458314132582223e-06, "loss": 4.0667, "step": 15508 }, { "epoch": 5.1662363621220955, "grad_norm": 0.75390625, "learning_rate": 7.457504706843071e-06, "loss": 4.0281, "step": 15509 }, { "epoch": 5.166569501124344, "grad_norm": 0.74609375, "learning_rate": 7.456695281598897e-06, "loss": 3.987, "step": 15510 }, { "epoch": 5.166902640126593, "grad_norm": 0.76953125, "learning_rate": 7.455885856859131e-06, "loss": 3.9203, "step": 15511 }, { "epoch": 5.167235779128841, "grad_norm": 0.796875, "learning_rate": 7.455076432633198e-06, "loss": 3.9283, "step": 15512 }, { "epoch": 5.16756891813109, "grad_norm": 0.74609375, "learning_rate": 7.4542670089305255e-06, "loss": 3.9786, "step": 15513 }, { "epoch": 5.167902057133339, "grad_norm": 0.734375, "learning_rate": 7.453457585760545e-06, "loss": 3.9515, "step": 15514 }, { "epoch": 5.168235196135588, "grad_norm": 0.73046875, "learning_rate": 7.452648163132681e-06, "loss": 4.0191, "step": 15515 }, { "epoch": 5.168568335137836, "grad_norm": 0.77734375, "learning_rate": 7.451838741056364e-06, "loss": 3.9622, "step": 15516 }, { "epoch": 5.168901474140085, "grad_norm": 0.70703125, "learning_rate": 7.451029319541022e-06, "loss": 3.9564, "step": 15517 }, { "epoch": 5.169234613142334, "grad_norm": 0.77734375, "learning_rate": 7.450219898596082e-06, "loss": 3.9898, "step": 15518 }, { "epoch": 5.169567752144582, "grad_norm": 0.77734375, "learning_rate": 7.449410478230971e-06, "loss": 3.9981, "step": 15519 }, { "epoch": 5.169900891146831, "grad_norm": 0.76953125, "learning_rate": 7.448601058455122e-06, "loss": 4.0672, "step": 15520 }, { "epoch": 5.1702340301490795, "grad_norm": 0.76171875, "learning_rate": 7.447791639277959e-06, "loss": 4.0501, "step": 15521 }, { "epoch": 5.170567169151329, "grad_norm": 0.72265625, "learning_rate": 7.446982220708908e-06, "loss": 4.0235, "step": 15522 }, { "epoch": 5.170900308153577, "grad_norm": 0.7265625, "learning_rate": 7.446172802757399e-06, "loss": 4.03, "step": 15523 }, { "epoch": 5.171233447155826, "grad_norm": 0.76953125, "learning_rate": 7.4453633854328605e-06, "loss": 4.0363, "step": 15524 }, { "epoch": 5.171566586158074, "grad_norm": 0.765625, "learning_rate": 7.444553968744722e-06, "loss": 3.9839, "step": 15525 }, { "epoch": 5.171899725160323, "grad_norm": 0.75390625, "learning_rate": 7.443744552702407e-06, "loss": 4.0624, "step": 15526 }, { "epoch": 5.172232864162572, "grad_norm": 0.8125, "learning_rate": 7.442935137315348e-06, "loss": 4.0001, "step": 15527 }, { "epoch": 5.17256600316482, "grad_norm": 0.78125, "learning_rate": 7.442125722592971e-06, "loss": 3.9025, "step": 15528 }, { "epoch": 5.172899142167069, "grad_norm": 0.75, "learning_rate": 7.441316308544705e-06, "loss": 4.0139, "step": 15529 }, { "epoch": 5.173232281169318, "grad_norm": 0.77734375, "learning_rate": 7.440506895179976e-06, "loss": 3.9893, "step": 15530 }, { "epoch": 5.173565420171567, "grad_norm": 0.7734375, "learning_rate": 7.439697482508211e-06, "loss": 4.0271, "step": 15531 }, { "epoch": 5.173898559173815, "grad_norm": 0.78125, "learning_rate": 7.438888070538843e-06, "loss": 4.0532, "step": 15532 }, { "epoch": 5.174231698176064, "grad_norm": 0.7421875, "learning_rate": 7.438078659281293e-06, "loss": 3.9608, "step": 15533 }, { "epoch": 5.174564837178313, "grad_norm": 0.77734375, "learning_rate": 7.4372692487449945e-06, "loss": 4.0807, "step": 15534 }, { "epoch": 5.174897976180561, "grad_norm": 0.703125, "learning_rate": 7.4364598389393715e-06, "loss": 3.9584, "step": 15535 }, { "epoch": 5.17523111518281, "grad_norm": 0.78515625, "learning_rate": 7.435650429873856e-06, "loss": 3.944, "step": 15536 }, { "epoch": 5.175564254185058, "grad_norm": 0.78125, "learning_rate": 7.434841021557873e-06, "loss": 4.0306, "step": 15537 }, { "epoch": 5.175897393187308, "grad_norm": 0.7734375, "learning_rate": 7.434031614000852e-06, "loss": 4.026, "step": 15538 }, { "epoch": 5.176230532189556, "grad_norm": 0.75, "learning_rate": 7.43322220721222e-06, "loss": 4.0036, "step": 15539 }, { "epoch": 5.176563671191805, "grad_norm": 0.78125, "learning_rate": 7.432412801201402e-06, "loss": 3.9213, "step": 15540 }, { "epoch": 5.176896810194053, "grad_norm": 0.7421875, "learning_rate": 7.43160339597783e-06, "loss": 4.0213, "step": 15541 }, { "epoch": 5.1772299491963025, "grad_norm": 0.72265625, "learning_rate": 7.430793991550929e-06, "loss": 4.0034, "step": 15542 }, { "epoch": 5.177563088198551, "grad_norm": 0.7421875, "learning_rate": 7.42998458793013e-06, "loss": 4.0105, "step": 15543 }, { "epoch": 5.177896227200799, "grad_norm": 0.7734375, "learning_rate": 7.429175185124857e-06, "loss": 3.991, "step": 15544 }, { "epoch": 5.178229366203048, "grad_norm": 0.8046875, "learning_rate": 7.428365783144542e-06, "loss": 3.9668, "step": 15545 }, { "epoch": 5.178562505205297, "grad_norm": 0.7265625, "learning_rate": 7.427556381998607e-06, "loss": 3.9089, "step": 15546 }, { "epoch": 5.178895644207546, "grad_norm": 0.7421875, "learning_rate": 7.42674698169649e-06, "loss": 4.014, "step": 15547 }, { "epoch": 5.179228783209794, "grad_norm": 0.734375, "learning_rate": 7.425937582247607e-06, "loss": 3.9648, "step": 15548 }, { "epoch": 5.179561922212043, "grad_norm": 0.74609375, "learning_rate": 7.425128183661391e-06, "loss": 4.0138, "step": 15549 }, { "epoch": 5.1798950612142916, "grad_norm": 0.765625, "learning_rate": 7.424318785947269e-06, "loss": 4.0285, "step": 15550 }, { "epoch": 5.180228200216541, "grad_norm": 0.7734375, "learning_rate": 7.42350938911467e-06, "loss": 3.9905, "step": 15551 }, { "epoch": 5.180561339218789, "grad_norm": 0.73046875, "learning_rate": 7.4226999931730204e-06, "loss": 4.0329, "step": 15552 }, { "epoch": 5.180894478221037, "grad_norm": 0.76953125, "learning_rate": 7.421890598131749e-06, "loss": 3.9408, "step": 15553 }, { "epoch": 5.1812276172232865, "grad_norm": 0.765625, "learning_rate": 7.421081204000282e-06, "loss": 3.9679, "step": 15554 }, { "epoch": 5.181560756225535, "grad_norm": 0.7578125, "learning_rate": 7.42027181078805e-06, "loss": 3.9824, "step": 15555 }, { "epoch": 5.181893895227784, "grad_norm": 0.74609375, "learning_rate": 7.419462418504479e-06, "loss": 4.0, "step": 15556 }, { "epoch": 5.182227034230032, "grad_norm": 0.765625, "learning_rate": 7.418653027158995e-06, "loss": 3.9581, "step": 15557 }, { "epoch": 5.1825601732322815, "grad_norm": 0.79296875, "learning_rate": 7.417843636761028e-06, "loss": 4.0413, "step": 15558 }, { "epoch": 5.18289331223453, "grad_norm": 0.765625, "learning_rate": 7.417034247320002e-06, "loss": 3.9512, "step": 15559 }, { "epoch": 5.183226451236779, "grad_norm": 0.73046875, "learning_rate": 7.4162248588453495e-06, "loss": 4.0282, "step": 15560 }, { "epoch": 5.183559590239027, "grad_norm": 0.78515625, "learning_rate": 7.415415471346494e-06, "loss": 3.9853, "step": 15561 }, { "epoch": 5.1838927292412755, "grad_norm": 0.73046875, "learning_rate": 7.414606084832868e-06, "loss": 3.8936, "step": 15562 }, { "epoch": 5.184225868243525, "grad_norm": 0.72265625, "learning_rate": 7.413796699313893e-06, "loss": 4.0216, "step": 15563 }, { "epoch": 5.184559007245773, "grad_norm": 0.7265625, "learning_rate": 7.4129873147990015e-06, "loss": 4.0737, "step": 15564 }, { "epoch": 5.184892146248022, "grad_norm": 0.72265625, "learning_rate": 7.4121779312976224e-06, "loss": 4.0551, "step": 15565 }, { "epoch": 5.1852252852502705, "grad_norm": 0.77734375, "learning_rate": 7.4113685488191754e-06, "loss": 4.0163, "step": 15566 }, { "epoch": 5.18555842425252, "grad_norm": 0.78125, "learning_rate": 7.410559167373096e-06, "loss": 4.0318, "step": 15567 }, { "epoch": 5.185891563254768, "grad_norm": 0.7578125, "learning_rate": 7.409749786968805e-06, "loss": 3.9759, "step": 15568 }, { "epoch": 5.186224702257017, "grad_norm": 0.74609375, "learning_rate": 7.408940407615736e-06, "loss": 3.9157, "step": 15569 }, { "epoch": 5.1865578412592654, "grad_norm": 0.75, "learning_rate": 7.4081310293233125e-06, "loss": 4.0576, "step": 15570 }, { "epoch": 5.186890980261514, "grad_norm": 0.734375, "learning_rate": 7.407321652100965e-06, "loss": 4.023, "step": 15571 }, { "epoch": 5.187224119263763, "grad_norm": 0.73828125, "learning_rate": 7.406512275958118e-06, "loss": 3.9534, "step": 15572 }, { "epoch": 5.187557258266011, "grad_norm": 0.76171875, "learning_rate": 7.405702900904202e-06, "loss": 4.0474, "step": 15573 }, { "epoch": 5.18789039726826, "grad_norm": 0.7734375, "learning_rate": 7.404893526948645e-06, "loss": 3.9662, "step": 15574 }, { "epoch": 5.188223536270509, "grad_norm": 0.78515625, "learning_rate": 7.40408415410087e-06, "loss": 4.1068, "step": 15575 }, { "epoch": 5.188556675272758, "grad_norm": 0.77734375, "learning_rate": 7.403274782370307e-06, "loss": 4.0057, "step": 15576 }, { "epoch": 5.188889814275006, "grad_norm": 0.7578125, "learning_rate": 7.402465411766381e-06, "loss": 4.0222, "step": 15577 }, { "epoch": 5.189222953277255, "grad_norm": 0.78515625, "learning_rate": 7.401656042298524e-06, "loss": 4.0392, "step": 15578 }, { "epoch": 5.189556092279504, "grad_norm": 0.77734375, "learning_rate": 7.40084667397616e-06, "loss": 3.9443, "step": 15579 }, { "epoch": 5.189889231281752, "grad_norm": 0.73046875, "learning_rate": 7.400037306808719e-06, "loss": 4.0449, "step": 15580 }, { "epoch": 5.190222370284001, "grad_norm": 0.74609375, "learning_rate": 7.399227940805625e-06, "loss": 4.0394, "step": 15581 }, { "epoch": 5.190555509286249, "grad_norm": 0.78515625, "learning_rate": 7.398418575976309e-06, "loss": 3.9656, "step": 15582 }, { "epoch": 5.190888648288499, "grad_norm": 0.7421875, "learning_rate": 7.397609212330198e-06, "loss": 4.0677, "step": 15583 }, { "epoch": 5.191221787290747, "grad_norm": 0.73828125, "learning_rate": 7.396799849876715e-06, "loss": 3.9936, "step": 15584 }, { "epoch": 5.191554926292996, "grad_norm": 0.73828125, "learning_rate": 7.395990488625292e-06, "loss": 3.9943, "step": 15585 }, { "epoch": 5.191888065295244, "grad_norm": 0.8046875, "learning_rate": 7.395181128585353e-06, "loss": 3.9831, "step": 15586 }, { "epoch": 5.192221204297493, "grad_norm": 0.75390625, "learning_rate": 7.394371769766328e-06, "loss": 4.0264, "step": 15587 }, { "epoch": 5.192554343299742, "grad_norm": 0.73046875, "learning_rate": 7.393562412177641e-06, "loss": 3.9884, "step": 15588 }, { "epoch": 5.19288748230199, "grad_norm": 0.75390625, "learning_rate": 7.392753055828724e-06, "loss": 3.9733, "step": 15589 }, { "epoch": 5.193220621304239, "grad_norm": 0.76953125, "learning_rate": 7.391943700728999e-06, "loss": 4.0346, "step": 15590 }, { "epoch": 5.193553760306488, "grad_norm": 0.75390625, "learning_rate": 7.391134346887901e-06, "loss": 4.0232, "step": 15591 }, { "epoch": 5.193886899308737, "grad_norm": 0.7421875, "learning_rate": 7.390324994314847e-06, "loss": 3.9607, "step": 15592 }, { "epoch": 5.194220038310985, "grad_norm": 0.74609375, "learning_rate": 7.389515643019271e-06, "loss": 3.9759, "step": 15593 }, { "epoch": 5.194553177313234, "grad_norm": 0.75, "learning_rate": 7.388706293010597e-06, "loss": 3.9594, "step": 15594 }, { "epoch": 5.194886316315483, "grad_norm": 0.74609375, "learning_rate": 7.387896944298256e-06, "loss": 4.0096, "step": 15595 }, { "epoch": 5.195219455317732, "grad_norm": 0.7109375, "learning_rate": 7.38708759689167e-06, "loss": 3.9635, "step": 15596 }, { "epoch": 5.19555259431998, "grad_norm": 0.72265625, "learning_rate": 7.386278250800271e-06, "loss": 4.0169, "step": 15597 }, { "epoch": 5.195885733322228, "grad_norm": 0.7421875, "learning_rate": 7.385468906033483e-06, "loss": 4.0025, "step": 15598 }, { "epoch": 5.1962188723244775, "grad_norm": 0.76953125, "learning_rate": 7.3846595626007355e-06, "loss": 3.9944, "step": 15599 }, { "epoch": 5.196552011326726, "grad_norm": 0.79296875, "learning_rate": 7.3838502205114554e-06, "loss": 3.9103, "step": 15600 }, { "epoch": 5.196885150328975, "grad_norm": 0.7421875, "learning_rate": 7.383040879775065e-06, "loss": 4.0169, "step": 15601 }, { "epoch": 5.197218289331223, "grad_norm": 0.76171875, "learning_rate": 7.382231540400997e-06, "loss": 3.9435, "step": 15602 }, { "epoch": 5.1975514283334725, "grad_norm": 0.78515625, "learning_rate": 7.3814222023986756e-06, "loss": 4.0352, "step": 15603 }, { "epoch": 5.197884567335721, "grad_norm": 0.7109375, "learning_rate": 7.38061286577753e-06, "loss": 3.9559, "step": 15604 }, { "epoch": 5.198217706337969, "grad_norm": 0.75390625, "learning_rate": 7.379803530546983e-06, "loss": 4.0396, "step": 15605 }, { "epoch": 5.198550845340218, "grad_norm": 0.76953125, "learning_rate": 7.3789941967164684e-06, "loss": 4.013, "step": 15606 }, { "epoch": 5.198883984342467, "grad_norm": 0.75, "learning_rate": 7.378184864295406e-06, "loss": 3.9791, "step": 15607 }, { "epoch": 5.199217123344716, "grad_norm": 0.75, "learning_rate": 7.377375533293228e-06, "loss": 3.9556, "step": 15608 }, { "epoch": 5.199550262346964, "grad_norm": 0.7578125, "learning_rate": 7.376566203719362e-06, "loss": 3.9595, "step": 15609 }, { "epoch": 5.199883401349213, "grad_norm": 0.7578125, "learning_rate": 7.375756875583228e-06, "loss": 4.0041, "step": 15610 }, { "epoch": 5.2002165403514615, "grad_norm": 0.76171875, "learning_rate": 7.37494754889426e-06, "loss": 4.0232, "step": 15611 }, { "epoch": 5.200549679353711, "grad_norm": 0.7421875, "learning_rate": 7.37413822366188e-06, "loss": 4.0441, "step": 15612 }, { "epoch": 5.200882818355959, "grad_norm": 0.77734375, "learning_rate": 7.373328899895518e-06, "loss": 3.9854, "step": 15613 }, { "epoch": 5.201215957358207, "grad_norm": 0.765625, "learning_rate": 7.372519577604599e-06, "loss": 3.9477, "step": 15614 }, { "epoch": 5.2015490963604565, "grad_norm": 0.765625, "learning_rate": 7.371710256798552e-06, "loss": 3.9906, "step": 15615 }, { "epoch": 5.201882235362705, "grad_norm": 0.765625, "learning_rate": 7.370900937486802e-06, "loss": 3.9962, "step": 15616 }, { "epoch": 5.202215374364954, "grad_norm": 0.796875, "learning_rate": 7.370091619678778e-06, "loss": 3.975, "step": 15617 }, { "epoch": 5.202548513367202, "grad_norm": 0.70703125, "learning_rate": 7.369282303383906e-06, "loss": 3.9799, "step": 15618 }, { "epoch": 5.202881652369451, "grad_norm": 0.74609375, "learning_rate": 7.368472988611608e-06, "loss": 4.0176, "step": 15619 }, { "epoch": 5.2032147913717, "grad_norm": 0.8203125, "learning_rate": 7.3676636753713175e-06, "loss": 3.9057, "step": 15620 }, { "epoch": 5.203547930373949, "grad_norm": 0.77734375, "learning_rate": 7.366854363672457e-06, "loss": 3.9106, "step": 15621 }, { "epoch": 5.203881069376197, "grad_norm": 0.77734375, "learning_rate": 7.366045053524457e-06, "loss": 3.9947, "step": 15622 }, { "epoch": 5.2042142083784455, "grad_norm": 0.71484375, "learning_rate": 7.365235744936739e-06, "loss": 4.0116, "step": 15623 }, { "epoch": 5.204547347380695, "grad_norm": 0.8125, "learning_rate": 7.364426437918735e-06, "loss": 4.0201, "step": 15624 }, { "epoch": 5.204880486382943, "grad_norm": 0.74609375, "learning_rate": 7.363617132479868e-06, "loss": 3.9816, "step": 15625 }, { "epoch": 5.205213625385192, "grad_norm": 0.74609375, "learning_rate": 7.3628078286295675e-06, "loss": 4.0473, "step": 15626 }, { "epoch": 5.2055467643874405, "grad_norm": 0.7578125, "learning_rate": 7.36199852637726e-06, "loss": 3.8763, "step": 15627 }, { "epoch": 5.20587990338969, "grad_norm": 0.73046875, "learning_rate": 7.361189225732368e-06, "loss": 4.0031, "step": 15628 }, { "epoch": 5.206213042391938, "grad_norm": 0.76953125, "learning_rate": 7.3603799267043216e-06, "loss": 3.9767, "step": 15629 }, { "epoch": 5.206546181394187, "grad_norm": 0.69921875, "learning_rate": 7.359570629302546e-06, "loss": 4.0438, "step": 15630 }, { "epoch": 5.206879320396435, "grad_norm": 0.71875, "learning_rate": 7.358761333536469e-06, "loss": 4.0264, "step": 15631 }, { "epoch": 5.207212459398684, "grad_norm": 0.7734375, "learning_rate": 7.357952039415515e-06, "loss": 4.0013, "step": 15632 }, { "epoch": 5.207545598400933, "grad_norm": 0.76953125, "learning_rate": 7.357142746949115e-06, "loss": 3.9708, "step": 15633 }, { "epoch": 5.207878737403181, "grad_norm": 0.78515625, "learning_rate": 7.35633345614669e-06, "loss": 4.0243, "step": 15634 }, { "epoch": 5.20821187640543, "grad_norm": 0.7578125, "learning_rate": 7.355524167017671e-06, "loss": 3.9641, "step": 15635 }, { "epoch": 5.208545015407679, "grad_norm": 0.71484375, "learning_rate": 7.354714879571483e-06, "loss": 3.9749, "step": 15636 }, { "epoch": 5.208878154409928, "grad_norm": 0.80078125, "learning_rate": 7.353905593817551e-06, "loss": 3.998, "step": 15637 }, { "epoch": 5.209211293412176, "grad_norm": 0.8203125, "learning_rate": 7.353096309765302e-06, "loss": 3.9859, "step": 15638 }, { "epoch": 5.209544432414425, "grad_norm": 0.80859375, "learning_rate": 7.352287027424163e-06, "loss": 3.9923, "step": 15639 }, { "epoch": 5.209877571416674, "grad_norm": 0.796875, "learning_rate": 7.351477746803559e-06, "loss": 3.95, "step": 15640 }, { "epoch": 5.210210710418922, "grad_norm": 0.7890625, "learning_rate": 7.350668467912919e-06, "loss": 3.9718, "step": 15641 }, { "epoch": 5.210543849421171, "grad_norm": 0.796875, "learning_rate": 7.3498591907616665e-06, "loss": 4.0085, "step": 15642 }, { "epoch": 5.210876988423419, "grad_norm": 0.7734375, "learning_rate": 7.349049915359231e-06, "loss": 3.9182, "step": 15643 }, { "epoch": 5.211210127425669, "grad_norm": 0.734375, "learning_rate": 7.348240641715035e-06, "loss": 4.0263, "step": 15644 }, { "epoch": 5.211543266427917, "grad_norm": 0.7578125, "learning_rate": 7.347431369838511e-06, "loss": 4.0221, "step": 15645 }, { "epoch": 5.211876405430166, "grad_norm": 0.734375, "learning_rate": 7.3466220997390785e-06, "loss": 3.9583, "step": 15646 }, { "epoch": 5.212209544432414, "grad_norm": 0.703125, "learning_rate": 7.345812831426164e-06, "loss": 3.957, "step": 15647 }, { "epoch": 5.2125426834346635, "grad_norm": 0.71484375, "learning_rate": 7.345003564909199e-06, "loss": 3.9214, "step": 15648 }, { "epoch": 5.212875822436912, "grad_norm": 0.73828125, "learning_rate": 7.344194300197604e-06, "loss": 3.9872, "step": 15649 }, { "epoch": 5.21320896143916, "grad_norm": 0.765625, "learning_rate": 7.343385037300812e-06, "loss": 3.8992, "step": 15650 }, { "epoch": 5.213542100441409, "grad_norm": 0.734375, "learning_rate": 7.342575776228241e-06, "loss": 4.0018, "step": 15651 }, { "epoch": 5.213875239443658, "grad_norm": 0.73046875, "learning_rate": 7.341766516989323e-06, "loss": 3.9298, "step": 15652 }, { "epoch": 5.214208378445907, "grad_norm": 0.765625, "learning_rate": 7.340957259593481e-06, "loss": 4.0369, "step": 15653 }, { "epoch": 5.214541517448155, "grad_norm": 0.73828125, "learning_rate": 7.340148004050146e-06, "loss": 3.9854, "step": 15654 }, { "epoch": 5.214874656450404, "grad_norm": 0.76171875, "learning_rate": 7.339338750368739e-06, "loss": 3.9842, "step": 15655 }, { "epoch": 5.215207795452653, "grad_norm": 0.71875, "learning_rate": 7.338529498558686e-06, "loss": 3.9619, "step": 15656 }, { "epoch": 5.215540934454902, "grad_norm": 0.69921875, "learning_rate": 7.337720248629416e-06, "loss": 3.9969, "step": 15657 }, { "epoch": 5.21587407345715, "grad_norm": 0.78515625, "learning_rate": 7.3369110005903516e-06, "loss": 4.0058, "step": 15658 }, { "epoch": 5.216207212459398, "grad_norm": 0.7421875, "learning_rate": 7.336101754450923e-06, "loss": 4.0097, "step": 15659 }, { "epoch": 5.2165403514616475, "grad_norm": 0.78515625, "learning_rate": 7.335292510220553e-06, "loss": 3.9678, "step": 15660 }, { "epoch": 5.216873490463896, "grad_norm": 0.7578125, "learning_rate": 7.334483267908668e-06, "loss": 3.9965, "step": 15661 }, { "epoch": 5.217206629466145, "grad_norm": 0.7265625, "learning_rate": 7.333674027524695e-06, "loss": 4.0077, "step": 15662 }, { "epoch": 5.217539768468393, "grad_norm": 0.69921875, "learning_rate": 7.3328647890780615e-06, "loss": 4.0412, "step": 15663 }, { "epoch": 5.2178729074706425, "grad_norm": 0.75, "learning_rate": 7.33205555257819e-06, "loss": 3.9884, "step": 15664 }, { "epoch": 5.218206046472891, "grad_norm": 0.76171875, "learning_rate": 7.3312463180345055e-06, "loss": 3.9574, "step": 15665 }, { "epoch": 5.218539185475139, "grad_norm": 0.73828125, "learning_rate": 7.330437085456437e-06, "loss": 4.021, "step": 15666 }, { "epoch": 5.218872324477388, "grad_norm": 0.80078125, "learning_rate": 7.329627854853408e-06, "loss": 4.0117, "step": 15667 }, { "epoch": 5.2192054634796365, "grad_norm": 0.7578125, "learning_rate": 7.328818626234848e-06, "loss": 4.02, "step": 15668 }, { "epoch": 5.219538602481886, "grad_norm": 0.77734375, "learning_rate": 7.328009399610178e-06, "loss": 4.0107, "step": 15669 }, { "epoch": 5.219871741484134, "grad_norm": 0.75390625, "learning_rate": 7.327200174988827e-06, "loss": 3.9406, "step": 15670 }, { "epoch": 5.220204880486383, "grad_norm": 0.71875, "learning_rate": 7.326390952380219e-06, "loss": 4.0117, "step": 15671 }, { "epoch": 5.2205380194886315, "grad_norm": 0.734375, "learning_rate": 7.325581731793783e-06, "loss": 4.007, "step": 15672 }, { "epoch": 5.220871158490881, "grad_norm": 0.76171875, "learning_rate": 7.324772513238941e-06, "loss": 4.105, "step": 15673 }, { "epoch": 5.221204297493129, "grad_norm": 0.78515625, "learning_rate": 7.3239632967251165e-06, "loss": 3.9204, "step": 15674 }, { "epoch": 5.221537436495377, "grad_norm": 0.78515625, "learning_rate": 7.323154082261742e-06, "loss": 3.8732, "step": 15675 }, { "epoch": 5.2218705754976265, "grad_norm": 0.71875, "learning_rate": 7.322344869858237e-06, "loss": 4.0304, "step": 15676 }, { "epoch": 5.222203714499875, "grad_norm": 0.7421875, "learning_rate": 7.3215356595240305e-06, "loss": 3.8859, "step": 15677 }, { "epoch": 5.222536853502124, "grad_norm": 0.7265625, "learning_rate": 7.320726451268546e-06, "loss": 4.0698, "step": 15678 }, { "epoch": 5.222869992504372, "grad_norm": 0.7265625, "learning_rate": 7.319917245101212e-06, "loss": 4.056, "step": 15679 }, { "epoch": 5.223203131506621, "grad_norm": 0.7578125, "learning_rate": 7.319108041031449e-06, "loss": 3.9691, "step": 15680 }, { "epoch": 5.22353627050887, "grad_norm": 0.7421875, "learning_rate": 7.318298839068691e-06, "loss": 3.948, "step": 15681 }, { "epoch": 5.223869409511119, "grad_norm": 0.70703125, "learning_rate": 7.317489639222354e-06, "loss": 3.9956, "step": 15682 }, { "epoch": 5.224202548513367, "grad_norm": 0.734375, "learning_rate": 7.316680441501868e-06, "loss": 3.9752, "step": 15683 }, { "epoch": 5.2245356875156155, "grad_norm": 0.7578125, "learning_rate": 7.315871245916656e-06, "loss": 3.9914, "step": 15684 }, { "epoch": 5.224868826517865, "grad_norm": 0.76171875, "learning_rate": 7.315062052476148e-06, "loss": 3.9663, "step": 15685 }, { "epoch": 5.225201965520113, "grad_norm": 0.76171875, "learning_rate": 7.314252861189764e-06, "loss": 3.9434, "step": 15686 }, { "epoch": 5.225535104522362, "grad_norm": 0.796875, "learning_rate": 7.3134436720669336e-06, "loss": 4.0088, "step": 15687 }, { "epoch": 5.22586824352461, "grad_norm": 0.75, "learning_rate": 7.312634485117078e-06, "loss": 4.0142, "step": 15688 }, { "epoch": 5.22620138252686, "grad_norm": 0.76171875, "learning_rate": 7.311825300349627e-06, "loss": 3.9999, "step": 15689 }, { "epoch": 5.226534521529108, "grad_norm": 0.77734375, "learning_rate": 7.311016117774007e-06, "loss": 3.9224, "step": 15690 }, { "epoch": 5.226867660531357, "grad_norm": 0.74609375, "learning_rate": 7.310206937399634e-06, "loss": 3.9904, "step": 15691 }, { "epoch": 5.227200799533605, "grad_norm": 0.7421875, "learning_rate": 7.309397759235942e-06, "loss": 4.0066, "step": 15692 }, { "epoch": 5.227533938535854, "grad_norm": 0.75, "learning_rate": 7.308588583292352e-06, "loss": 4.0615, "step": 15693 }, { "epoch": 5.227867077538103, "grad_norm": 0.75, "learning_rate": 7.3077794095782905e-06, "loss": 4.0227, "step": 15694 }, { "epoch": 5.228200216540351, "grad_norm": 0.765625, "learning_rate": 7.3069702381031825e-06, "loss": 3.9738, "step": 15695 }, { "epoch": 5.2285333555426, "grad_norm": 0.76953125, "learning_rate": 7.306161068876454e-06, "loss": 3.9239, "step": 15696 }, { "epoch": 5.228866494544849, "grad_norm": 0.7578125, "learning_rate": 7.305351901907528e-06, "loss": 3.9551, "step": 15697 }, { "epoch": 5.229199633547098, "grad_norm": 0.72265625, "learning_rate": 7.304542737205832e-06, "loss": 3.948, "step": 15698 }, { "epoch": 5.229532772549346, "grad_norm": 0.703125, "learning_rate": 7.303733574780791e-06, "loss": 4.0549, "step": 15699 }, { "epoch": 5.229865911551595, "grad_norm": 0.7109375, "learning_rate": 7.302924414641826e-06, "loss": 3.9806, "step": 15700 }, { "epoch": 5.230199050553844, "grad_norm": 0.7265625, "learning_rate": 7.302115256798367e-06, "loss": 4.0148, "step": 15701 }, { "epoch": 5.230532189556092, "grad_norm": 0.69921875, "learning_rate": 7.301306101259834e-06, "loss": 4.0458, "step": 15702 }, { "epoch": 5.230865328558341, "grad_norm": 0.74609375, "learning_rate": 7.300496948035657e-06, "loss": 3.9314, "step": 15703 }, { "epoch": 5.231198467560589, "grad_norm": 0.81640625, "learning_rate": 7.299687797135255e-06, "loss": 3.9564, "step": 15704 }, { "epoch": 5.2315316065628386, "grad_norm": 0.734375, "learning_rate": 7.29887864856806e-06, "loss": 3.995, "step": 15705 }, { "epoch": 5.231864745565087, "grad_norm": 0.70703125, "learning_rate": 7.298069502343491e-06, "loss": 3.93, "step": 15706 }, { "epoch": 5.232197884567336, "grad_norm": 0.7421875, "learning_rate": 7.297260358470976e-06, "loss": 3.9067, "step": 15707 }, { "epoch": 5.232531023569584, "grad_norm": 0.75390625, "learning_rate": 7.29645121695994e-06, "loss": 3.9644, "step": 15708 }, { "epoch": 5.2328641625718335, "grad_norm": 0.78515625, "learning_rate": 7.295642077819804e-06, "loss": 3.9731, "step": 15709 }, { "epoch": 5.233197301574082, "grad_norm": 0.77734375, "learning_rate": 7.294832941059996e-06, "loss": 3.9434, "step": 15710 }, { "epoch": 5.23353044057633, "grad_norm": 0.73828125, "learning_rate": 7.294023806689939e-06, "loss": 3.946, "step": 15711 }, { "epoch": 5.233863579578579, "grad_norm": 0.77734375, "learning_rate": 7.29321467471906e-06, "loss": 4.0115, "step": 15712 }, { "epoch": 5.234196718580828, "grad_norm": 0.7421875, "learning_rate": 7.292405545156781e-06, "loss": 4.0266, "step": 15713 }, { "epoch": 5.234529857583077, "grad_norm": 0.75, "learning_rate": 7.291596418012528e-06, "loss": 4.0465, "step": 15714 }, { "epoch": 5.234862996585325, "grad_norm": 0.765625, "learning_rate": 7.290787293295725e-06, "loss": 3.9754, "step": 15715 }, { "epoch": 5.235196135587574, "grad_norm": 0.7890625, "learning_rate": 7.289978171015798e-06, "loss": 3.9152, "step": 15716 }, { "epoch": 5.2355292745898225, "grad_norm": 0.74609375, "learning_rate": 7.289169051182172e-06, "loss": 3.9897, "step": 15717 }, { "epoch": 5.235862413592072, "grad_norm": 0.81640625, "learning_rate": 7.288359933804267e-06, "loss": 3.9499, "step": 15718 }, { "epoch": 5.23619555259432, "grad_norm": 0.796875, "learning_rate": 7.287550818891511e-06, "loss": 3.9828, "step": 15719 }, { "epoch": 5.236528691596568, "grad_norm": 0.765625, "learning_rate": 7.2867417064533265e-06, "loss": 4.0874, "step": 15720 }, { "epoch": 5.2368618305988175, "grad_norm": 0.7890625, "learning_rate": 7.285932596499141e-06, "loss": 4.0084, "step": 15721 }, { "epoch": 5.237194969601066, "grad_norm": 0.796875, "learning_rate": 7.2851234890383756e-06, "loss": 3.9868, "step": 15722 }, { "epoch": 5.237528108603315, "grad_norm": 0.7890625, "learning_rate": 7.2843143840804575e-06, "loss": 4.0128, "step": 15723 }, { "epoch": 5.237861247605563, "grad_norm": 0.7734375, "learning_rate": 7.283505281634809e-06, "loss": 4.0263, "step": 15724 }, { "epoch": 5.2381943866078124, "grad_norm": 0.81640625, "learning_rate": 7.282696181710856e-06, "loss": 4.0759, "step": 15725 }, { "epoch": 5.238527525610061, "grad_norm": 0.80859375, "learning_rate": 7.281887084318023e-06, "loss": 3.9777, "step": 15726 }, { "epoch": 5.238860664612309, "grad_norm": 0.7890625, "learning_rate": 7.28107798946573e-06, "loss": 4.0176, "step": 15727 }, { "epoch": 5.239193803614558, "grad_norm": 0.74609375, "learning_rate": 7.280268897163406e-06, "loss": 3.9839, "step": 15728 }, { "epoch": 5.2395269426168065, "grad_norm": 0.75390625, "learning_rate": 7.279459807420474e-06, "loss": 4.0412, "step": 15729 }, { "epoch": 5.239860081619056, "grad_norm": 0.72265625, "learning_rate": 7.278650720246355e-06, "loss": 4.0313, "step": 15730 }, { "epoch": 5.240193220621304, "grad_norm": 0.78515625, "learning_rate": 7.277841635650478e-06, "loss": 3.9497, "step": 15731 }, { "epoch": 5.240526359623553, "grad_norm": 0.7734375, "learning_rate": 7.2770325536422625e-06, "loss": 3.9552, "step": 15732 }, { "epoch": 5.2408594986258015, "grad_norm": 0.73828125, "learning_rate": 7.276223474231137e-06, "loss": 3.9982, "step": 15733 }, { "epoch": 5.241192637628051, "grad_norm": 0.74609375, "learning_rate": 7.275414397426521e-06, "loss": 3.9837, "step": 15734 }, { "epoch": 5.241525776630299, "grad_norm": 0.7421875, "learning_rate": 7.274605323237845e-06, "loss": 3.9415, "step": 15735 }, { "epoch": 5.241858915632548, "grad_norm": 0.74609375, "learning_rate": 7.273796251674526e-06, "loss": 4.0151, "step": 15736 }, { "epoch": 5.242192054634796, "grad_norm": 0.8125, "learning_rate": 7.2729871827459895e-06, "loss": 4.0128, "step": 15737 }, { "epoch": 5.242525193637045, "grad_norm": 0.78125, "learning_rate": 7.272178116461663e-06, "loss": 4.0116, "step": 15738 }, { "epoch": 5.242858332639294, "grad_norm": 0.74609375, "learning_rate": 7.271369052830966e-06, "loss": 4.0075, "step": 15739 }, { "epoch": 5.243191471641542, "grad_norm": 0.73828125, "learning_rate": 7.270559991863325e-06, "loss": 3.9592, "step": 15740 }, { "epoch": 5.243524610643791, "grad_norm": 0.7109375, "learning_rate": 7.269750933568163e-06, "loss": 3.9829, "step": 15741 }, { "epoch": 5.24385774964604, "grad_norm": 0.765625, "learning_rate": 7.268941877954904e-06, "loss": 4.0017, "step": 15742 }, { "epoch": 5.244190888648289, "grad_norm": 0.75390625, "learning_rate": 7.268132825032971e-06, "loss": 4.0059, "step": 15743 }, { "epoch": 5.244524027650537, "grad_norm": 0.75390625, "learning_rate": 7.267323774811791e-06, "loss": 3.9067, "step": 15744 }, { "epoch": 5.2448571666527855, "grad_norm": 0.78515625, "learning_rate": 7.266514727300784e-06, "loss": 4.0256, "step": 15745 }, { "epoch": 5.245190305655035, "grad_norm": 0.77734375, "learning_rate": 7.265705682509373e-06, "loss": 3.8772, "step": 15746 }, { "epoch": 5.245523444657283, "grad_norm": 0.765625, "learning_rate": 7.2648966404469846e-06, "loss": 3.9791, "step": 15747 }, { "epoch": 5.245856583659532, "grad_norm": 0.734375, "learning_rate": 7.26408760112304e-06, "loss": 4.0276, "step": 15748 }, { "epoch": 5.24618972266178, "grad_norm": 0.7890625, "learning_rate": 7.263278564546966e-06, "loss": 3.8784, "step": 15749 }, { "epoch": 5.24652286166403, "grad_norm": 0.77734375, "learning_rate": 7.26246953072818e-06, "loss": 3.9877, "step": 15750 }, { "epoch": 5.246856000666278, "grad_norm": 0.70703125, "learning_rate": 7.261660499676114e-06, "loss": 4.0397, "step": 15751 }, { "epoch": 5.247189139668527, "grad_norm": 0.76171875, "learning_rate": 7.260851471400184e-06, "loss": 3.9675, "step": 15752 }, { "epoch": 5.247522278670775, "grad_norm": 0.76953125, "learning_rate": 7.26004244590982e-06, "loss": 3.9694, "step": 15753 }, { "epoch": 5.247855417673024, "grad_norm": 0.7734375, "learning_rate": 7.259233423214439e-06, "loss": 3.9136, "step": 15754 }, { "epoch": 5.248188556675273, "grad_norm": 0.82421875, "learning_rate": 7.258424403323466e-06, "loss": 3.9783, "step": 15755 }, { "epoch": 5.248521695677521, "grad_norm": 0.76953125, "learning_rate": 7.257615386246328e-06, "loss": 3.9873, "step": 15756 }, { "epoch": 5.24885483467977, "grad_norm": 0.7421875, "learning_rate": 7.256806371992442e-06, "loss": 4.0014, "step": 15757 }, { "epoch": 5.249187973682019, "grad_norm": 0.78125, "learning_rate": 7.255997360571239e-06, "loss": 4.0126, "step": 15758 }, { "epoch": 5.249521112684268, "grad_norm": 0.734375, "learning_rate": 7.255188351992135e-06, "loss": 3.9984, "step": 15759 }, { "epoch": 5.249854251686516, "grad_norm": 0.73828125, "learning_rate": 7.2543793462645595e-06, "loss": 4.0037, "step": 15760 }, { "epoch": 5.250187390688765, "grad_norm": 0.7734375, "learning_rate": 7.25357034339793e-06, "loss": 3.9527, "step": 15761 }, { "epoch": 5.250520529691014, "grad_norm": 0.765625, "learning_rate": 7.252761343401675e-06, "loss": 4.0099, "step": 15762 }, { "epoch": 5.250853668693262, "grad_norm": 0.75390625, "learning_rate": 7.251952346285215e-06, "loss": 4.0002, "step": 15763 }, { "epoch": 5.251186807695511, "grad_norm": 0.7421875, "learning_rate": 7.25114335205797e-06, "loss": 4.0143, "step": 15764 }, { "epoch": 5.251519946697759, "grad_norm": 0.75390625, "learning_rate": 7.250334360729367e-06, "loss": 3.9805, "step": 15765 }, { "epoch": 5.2518530857000085, "grad_norm": 0.7578125, "learning_rate": 7.249525372308827e-06, "loss": 4.0124, "step": 15766 }, { "epoch": 5.252186224702257, "grad_norm": 0.76171875, "learning_rate": 7.248716386805776e-06, "loss": 3.9958, "step": 15767 }, { "epoch": 5.252519363704506, "grad_norm": 0.76953125, "learning_rate": 7.247907404229632e-06, "loss": 4.0324, "step": 15768 }, { "epoch": 5.252852502706754, "grad_norm": 0.78515625, "learning_rate": 7.247098424589824e-06, "loss": 4.0175, "step": 15769 }, { "epoch": 5.2531856417090035, "grad_norm": 0.71875, "learning_rate": 7.246289447895769e-06, "loss": 3.9305, "step": 15770 }, { "epoch": 5.253518780711252, "grad_norm": 0.71484375, "learning_rate": 7.2454804741568955e-06, "loss": 4.0891, "step": 15771 }, { "epoch": 5.2538519197135, "grad_norm": 0.7734375, "learning_rate": 7.2446715033826225e-06, "loss": 3.9923, "step": 15772 }, { "epoch": 5.254185058715749, "grad_norm": 0.75, "learning_rate": 7.24386253558237e-06, "loss": 3.9915, "step": 15773 }, { "epoch": 5.2545181977179976, "grad_norm": 0.76171875, "learning_rate": 7.243053570765567e-06, "loss": 3.9718, "step": 15774 }, { "epoch": 5.254851336720247, "grad_norm": 0.734375, "learning_rate": 7.242244608941632e-06, "loss": 4.0859, "step": 15775 }, { "epoch": 5.255184475722495, "grad_norm": 0.8359375, "learning_rate": 7.24143565011999e-06, "loss": 3.9629, "step": 15776 }, { "epoch": 5.255517614724744, "grad_norm": 0.76953125, "learning_rate": 7.240626694310063e-06, "loss": 3.9216, "step": 15777 }, { "epoch": 5.2558507537269925, "grad_norm": 0.76953125, "learning_rate": 7.239817741521271e-06, "loss": 3.9699, "step": 15778 }, { "epoch": 5.256183892729242, "grad_norm": 0.7265625, "learning_rate": 7.239008791763041e-06, "loss": 3.9839, "step": 15779 }, { "epoch": 5.25651703173149, "grad_norm": 0.73828125, "learning_rate": 7.238199845044795e-06, "loss": 3.977, "step": 15780 }, { "epoch": 5.256850170733738, "grad_norm": 0.71875, "learning_rate": 7.23739090137595e-06, "loss": 4.0299, "step": 15781 }, { "epoch": 5.2571833097359875, "grad_norm": 0.76953125, "learning_rate": 7.236581960765934e-06, "loss": 4.0506, "step": 15782 }, { "epoch": 5.257516448738236, "grad_norm": 0.7421875, "learning_rate": 7.235773023224166e-06, "loss": 3.8981, "step": 15783 }, { "epoch": 5.257849587740485, "grad_norm": 0.75390625, "learning_rate": 7.234964088760071e-06, "loss": 3.9303, "step": 15784 }, { "epoch": 5.258182726742733, "grad_norm": 0.7265625, "learning_rate": 7.23415515738307e-06, "loss": 3.9913, "step": 15785 }, { "epoch": 5.258515865744982, "grad_norm": 0.765625, "learning_rate": 7.233346229102586e-06, "loss": 4.0047, "step": 15786 }, { "epoch": 5.258849004747231, "grad_norm": 0.7421875, "learning_rate": 7.23253730392804e-06, "loss": 3.9441, "step": 15787 }, { "epoch": 5.259182143749479, "grad_norm": 0.73046875, "learning_rate": 7.231728381868856e-06, "loss": 4.0309, "step": 15788 }, { "epoch": 5.259515282751728, "grad_norm": 0.73828125, "learning_rate": 7.230919462934457e-06, "loss": 4.044, "step": 15789 }, { "epoch": 5.2598484217539765, "grad_norm": 0.72265625, "learning_rate": 7.2301105471342605e-06, "loss": 4.0202, "step": 15790 }, { "epoch": 5.260181560756226, "grad_norm": 0.77734375, "learning_rate": 7.229301634477694e-06, "loss": 4.0123, "step": 15791 }, { "epoch": 5.260514699758474, "grad_norm": 0.80078125, "learning_rate": 7.228492724974174e-06, "loss": 3.957, "step": 15792 }, { "epoch": 5.260847838760723, "grad_norm": 0.74609375, "learning_rate": 7.227683818633128e-06, "loss": 3.9718, "step": 15793 }, { "epoch": 5.2611809777629714, "grad_norm": 0.7578125, "learning_rate": 7.226874915463974e-06, "loss": 4.0026, "step": 15794 }, { "epoch": 5.261514116765221, "grad_norm": 0.7421875, "learning_rate": 7.226066015476138e-06, "loss": 4.0098, "step": 15795 }, { "epoch": 5.261847255767469, "grad_norm": 0.76171875, "learning_rate": 7.2252571186790364e-06, "loss": 3.9738, "step": 15796 }, { "epoch": 5.262180394769718, "grad_norm": 0.734375, "learning_rate": 7.224448225082097e-06, "loss": 3.9833, "step": 15797 }, { "epoch": 5.262513533771966, "grad_norm": 0.78515625, "learning_rate": 7.22363933469474e-06, "loss": 3.9731, "step": 15798 }, { "epoch": 5.262846672774215, "grad_norm": 0.76953125, "learning_rate": 7.222830447526384e-06, "loss": 3.9751, "step": 15799 }, { "epoch": 5.263179811776464, "grad_norm": 0.75390625, "learning_rate": 7.2220215635864535e-06, "loss": 3.961, "step": 15800 }, { "epoch": 5.263512950778712, "grad_norm": 0.76953125, "learning_rate": 7.221212682884368e-06, "loss": 4.0342, "step": 15801 }, { "epoch": 5.263846089780961, "grad_norm": 0.76953125, "learning_rate": 7.220403805429553e-06, "loss": 3.9544, "step": 15802 }, { "epoch": 5.26417922878321, "grad_norm": 0.76953125, "learning_rate": 7.219594931231426e-06, "loss": 4.0096, "step": 15803 }, { "epoch": 5.264512367785459, "grad_norm": 0.75, "learning_rate": 7.218786060299413e-06, "loss": 3.9969, "step": 15804 }, { "epoch": 5.264845506787707, "grad_norm": 0.75, "learning_rate": 7.217977192642931e-06, "loss": 4.0736, "step": 15805 }, { "epoch": 5.265178645789955, "grad_norm": 0.76953125, "learning_rate": 7.217168328271405e-06, "loss": 4.0075, "step": 15806 }, { "epoch": 5.265511784792205, "grad_norm": 0.75390625, "learning_rate": 7.216359467194258e-06, "loss": 3.9789, "step": 15807 }, { "epoch": 5.265844923794453, "grad_norm": 0.7890625, "learning_rate": 7.215550609420906e-06, "loss": 3.9746, "step": 15808 }, { "epoch": 5.266178062796702, "grad_norm": 0.765625, "learning_rate": 7.214741754960774e-06, "loss": 4.0029, "step": 15809 }, { "epoch": 5.26651120179895, "grad_norm": 0.75, "learning_rate": 7.213932903823281e-06, "loss": 3.9883, "step": 15810 }, { "epoch": 5.2668443408012, "grad_norm": 0.71484375, "learning_rate": 7.213124056017852e-06, "loss": 4.0535, "step": 15811 }, { "epoch": 5.267177479803448, "grad_norm": 0.73828125, "learning_rate": 7.212315211553904e-06, "loss": 4.0446, "step": 15812 }, { "epoch": 5.267510618805697, "grad_norm": 0.76953125, "learning_rate": 7.211506370440863e-06, "loss": 4.0119, "step": 15813 }, { "epoch": 5.267843757807945, "grad_norm": 0.76171875, "learning_rate": 7.210697532688147e-06, "loss": 3.9346, "step": 15814 }, { "epoch": 5.2681768968101945, "grad_norm": 0.7890625, "learning_rate": 7.209888698305179e-06, "loss": 3.9019, "step": 15815 }, { "epoch": 5.268510035812443, "grad_norm": 0.76171875, "learning_rate": 7.20907986730138e-06, "loss": 3.9304, "step": 15816 }, { "epoch": 5.268843174814691, "grad_norm": 0.8046875, "learning_rate": 7.208271039686169e-06, "loss": 3.9201, "step": 15817 }, { "epoch": 5.26917631381694, "grad_norm": 0.765625, "learning_rate": 7.207462215468969e-06, "loss": 4.0156, "step": 15818 }, { "epoch": 5.269509452819189, "grad_norm": 0.765625, "learning_rate": 7.206653394659199e-06, "loss": 3.9726, "step": 15819 }, { "epoch": 5.269842591821438, "grad_norm": 0.7578125, "learning_rate": 7.205844577266283e-06, "loss": 3.9739, "step": 15820 }, { "epoch": 5.270175730823686, "grad_norm": 0.73828125, "learning_rate": 7.2050357632996395e-06, "loss": 4.0726, "step": 15821 }, { "epoch": 5.270508869825935, "grad_norm": 0.76953125, "learning_rate": 7.204226952768692e-06, "loss": 3.973, "step": 15822 }, { "epoch": 5.2708420088281835, "grad_norm": 0.72265625, "learning_rate": 7.20341814568286e-06, "loss": 4.0263, "step": 15823 }, { "epoch": 5.271175147830432, "grad_norm": 0.76171875, "learning_rate": 7.202609342051562e-06, "loss": 3.9634, "step": 15824 }, { "epoch": 5.271508286832681, "grad_norm": 0.84765625, "learning_rate": 7.201800541884224e-06, "loss": 3.968, "step": 15825 }, { "epoch": 5.271841425834929, "grad_norm": 0.7578125, "learning_rate": 7.200991745190263e-06, "loss": 4.0475, "step": 15826 }, { "epoch": 5.2721745648371785, "grad_norm": 0.73828125, "learning_rate": 7.200182951979099e-06, "loss": 3.9711, "step": 15827 }, { "epoch": 5.272507703839427, "grad_norm": 0.7734375, "learning_rate": 7.199374162260155e-06, "loss": 4.036, "step": 15828 }, { "epoch": 5.272840842841676, "grad_norm": 0.73828125, "learning_rate": 7.19856537604285e-06, "loss": 3.9542, "step": 15829 }, { "epoch": 5.273173981843924, "grad_norm": 0.73828125, "learning_rate": 7.197756593336607e-06, "loss": 4.0398, "step": 15830 }, { "epoch": 5.2735071208461735, "grad_norm": 0.7578125, "learning_rate": 7.196947814150843e-06, "loss": 3.9924, "step": 15831 }, { "epoch": 5.273840259848422, "grad_norm": 0.7734375, "learning_rate": 7.1961390384949815e-06, "loss": 3.9662, "step": 15832 }, { "epoch": 5.27417339885067, "grad_norm": 0.7421875, "learning_rate": 7.1953302663784415e-06, "loss": 3.9958, "step": 15833 }, { "epoch": 5.274506537852919, "grad_norm": 0.78125, "learning_rate": 7.194521497810647e-06, "loss": 3.9946, "step": 15834 }, { "epoch": 5.2748396768551675, "grad_norm": 0.77734375, "learning_rate": 7.193712732801014e-06, "loss": 3.9739, "step": 15835 }, { "epoch": 5.275172815857417, "grad_norm": 0.734375, "learning_rate": 7.192903971358963e-06, "loss": 4.0467, "step": 15836 }, { "epoch": 5.275505954859665, "grad_norm": 0.7578125, "learning_rate": 7.1920952134939155e-06, "loss": 3.952, "step": 15837 }, { "epoch": 5.275839093861914, "grad_norm": 0.7578125, "learning_rate": 7.191286459215292e-06, "loss": 3.9636, "step": 15838 }, { "epoch": 5.2761722328641625, "grad_norm": 0.765625, "learning_rate": 7.1904777085325136e-06, "loss": 3.9687, "step": 15839 }, { "epoch": 5.276505371866412, "grad_norm": 0.734375, "learning_rate": 7.189668961454998e-06, "loss": 3.9773, "step": 15840 }, { "epoch": 5.27683851086866, "grad_norm": 0.74609375, "learning_rate": 7.188860217992168e-06, "loss": 4.01, "step": 15841 }, { "epoch": 5.277171649870908, "grad_norm": 0.7265625, "learning_rate": 7.188051478153444e-06, "loss": 3.994, "step": 15842 }, { "epoch": 5.277504788873157, "grad_norm": 0.7109375, "learning_rate": 7.187242741948242e-06, "loss": 3.9664, "step": 15843 }, { "epoch": 5.277837927875406, "grad_norm": 0.73828125, "learning_rate": 7.186434009385986e-06, "loss": 4.0544, "step": 15844 }, { "epoch": 5.278171066877655, "grad_norm": 0.71484375, "learning_rate": 7.185625280476092e-06, "loss": 4.0097, "step": 15845 }, { "epoch": 5.278504205879903, "grad_norm": 0.75390625, "learning_rate": 7.184816555227985e-06, "loss": 3.982, "step": 15846 }, { "epoch": 5.278837344882152, "grad_norm": 0.7734375, "learning_rate": 7.184007833651081e-06, "loss": 4.0229, "step": 15847 }, { "epoch": 5.279170483884401, "grad_norm": 0.7734375, "learning_rate": 7.1831991157548025e-06, "loss": 4.0149, "step": 15848 }, { "epoch": 5.279503622886649, "grad_norm": 0.76171875, "learning_rate": 7.182390401548568e-06, "loss": 4.0303, "step": 15849 }, { "epoch": 5.279836761888898, "grad_norm": 0.765625, "learning_rate": 7.1815816910417974e-06, "loss": 4.0268, "step": 15850 }, { "epoch": 5.2801699008911465, "grad_norm": 0.75, "learning_rate": 7.180772984243912e-06, "loss": 3.9744, "step": 15851 }, { "epoch": 5.280503039893396, "grad_norm": 0.69140625, "learning_rate": 7.179964281164327e-06, "loss": 4.0169, "step": 15852 }, { "epoch": 5.280836178895644, "grad_norm": 0.7421875, "learning_rate": 7.179155581812467e-06, "loss": 3.9082, "step": 15853 }, { "epoch": 5.281169317897893, "grad_norm": 0.77734375, "learning_rate": 7.178346886197747e-06, "loss": 3.9825, "step": 15854 }, { "epoch": 5.281502456900141, "grad_norm": 0.80859375, "learning_rate": 7.177538194329591e-06, "loss": 4.0166, "step": 15855 }, { "epoch": 5.281835595902391, "grad_norm": 0.7890625, "learning_rate": 7.176729506217416e-06, "loss": 3.9816, "step": 15856 }, { "epoch": 5.282168734904639, "grad_norm": 0.7265625, "learning_rate": 7.1759208218706425e-06, "loss": 3.987, "step": 15857 }, { "epoch": 5.282501873906888, "grad_norm": 0.76953125, "learning_rate": 7.175112141298688e-06, "loss": 3.9406, "step": 15858 }, { "epoch": 5.282835012909136, "grad_norm": 0.78125, "learning_rate": 7.174303464510975e-06, "loss": 4.0035, "step": 15859 }, { "epoch": 5.283168151911385, "grad_norm": 0.7109375, "learning_rate": 7.173494791516924e-06, "loss": 4.0505, "step": 15860 }, { "epoch": 5.283501290913634, "grad_norm": 0.75, "learning_rate": 7.172686122325947e-06, "loss": 3.999, "step": 15861 }, { "epoch": 5.283834429915882, "grad_norm": 0.71875, "learning_rate": 7.171877456947469e-06, "loss": 4.0227, "step": 15862 }, { "epoch": 5.284167568918131, "grad_norm": 0.76953125, "learning_rate": 7.171068795390907e-06, "loss": 3.9653, "step": 15863 }, { "epoch": 5.28450070792038, "grad_norm": 0.796875, "learning_rate": 7.170260137665683e-06, "loss": 4.0447, "step": 15864 }, { "epoch": 5.284833846922629, "grad_norm": 0.7734375, "learning_rate": 7.169451483781212e-06, "loss": 4.0225, "step": 15865 }, { "epoch": 5.285166985924877, "grad_norm": 0.78515625, "learning_rate": 7.168642833746917e-06, "loss": 4.0055, "step": 15866 }, { "epoch": 5.285500124927125, "grad_norm": 0.76953125, "learning_rate": 7.167834187572214e-06, "loss": 4.0862, "step": 15867 }, { "epoch": 5.285833263929375, "grad_norm": 0.7265625, "learning_rate": 7.167025545266524e-06, "loss": 3.9845, "step": 15868 }, { "epoch": 5.286166402931623, "grad_norm": 0.75390625, "learning_rate": 7.166216906839267e-06, "loss": 4.0226, "step": 15869 }, { "epoch": 5.286499541933872, "grad_norm": 0.78125, "learning_rate": 7.165408272299859e-06, "loss": 4.0279, "step": 15870 }, { "epoch": 5.28683268093612, "grad_norm": 0.7578125, "learning_rate": 7.1645996416577175e-06, "loss": 4.0395, "step": 15871 }, { "epoch": 5.2871658199383695, "grad_norm": 0.78125, "learning_rate": 7.163791014922266e-06, "loss": 4.0086, "step": 15872 }, { "epoch": 5.287498958940618, "grad_norm": 0.75390625, "learning_rate": 7.162982392102919e-06, "loss": 4.0331, "step": 15873 }, { "epoch": 5.287832097942867, "grad_norm": 0.7421875, "learning_rate": 7.162173773209099e-06, "loss": 4.0335, "step": 15874 }, { "epoch": 5.288165236945115, "grad_norm": 0.72265625, "learning_rate": 7.161365158250222e-06, "loss": 3.993, "step": 15875 }, { "epoch": 5.2884983759473645, "grad_norm": 0.76953125, "learning_rate": 7.160556547235707e-06, "loss": 3.9802, "step": 15876 }, { "epoch": 5.288831514949613, "grad_norm": 0.76953125, "learning_rate": 7.159747940174973e-06, "loss": 3.9659, "step": 15877 }, { "epoch": 5.289164653951861, "grad_norm": 0.71875, "learning_rate": 7.158939337077441e-06, "loss": 4.0528, "step": 15878 }, { "epoch": 5.28949779295411, "grad_norm": 0.77734375, "learning_rate": 7.158130737952526e-06, "loss": 3.8792, "step": 15879 }, { "epoch": 5.289830931956359, "grad_norm": 0.8046875, "learning_rate": 7.157322142809645e-06, "loss": 3.9307, "step": 15880 }, { "epoch": 5.290164070958608, "grad_norm": 0.73046875, "learning_rate": 7.156513551658221e-06, "loss": 3.9674, "step": 15881 }, { "epoch": 5.290497209960856, "grad_norm": 0.76171875, "learning_rate": 7.155704964507669e-06, "loss": 3.9468, "step": 15882 }, { "epoch": 5.290830348963105, "grad_norm": 0.78515625, "learning_rate": 7.154896381367409e-06, "loss": 4.002, "step": 15883 }, { "epoch": 5.2911634879653535, "grad_norm": 0.75, "learning_rate": 7.154087802246857e-06, "loss": 4.002, "step": 15884 }, { "epoch": 5.291496626967602, "grad_norm": 0.75390625, "learning_rate": 7.153279227155435e-06, "loss": 3.9513, "step": 15885 }, { "epoch": 5.291829765969851, "grad_norm": 0.72265625, "learning_rate": 7.152470656102557e-06, "loss": 4.0588, "step": 15886 }, { "epoch": 5.292162904972099, "grad_norm": 0.78125, "learning_rate": 7.151662089097647e-06, "loss": 3.9833, "step": 15887 }, { "epoch": 5.2924960439743485, "grad_norm": 0.71484375, "learning_rate": 7.150853526150118e-06, "loss": 4.0648, "step": 15888 }, { "epoch": 5.292829182976597, "grad_norm": 0.79296875, "learning_rate": 7.150044967269386e-06, "loss": 3.9138, "step": 15889 }, { "epoch": 5.293162321978846, "grad_norm": 0.703125, "learning_rate": 7.149236412464875e-06, "loss": 4.0257, "step": 15890 }, { "epoch": 5.293495460981094, "grad_norm": 0.8203125, "learning_rate": 7.148427861745997e-06, "loss": 3.9791, "step": 15891 }, { "epoch": 5.293828599983343, "grad_norm": 0.72265625, "learning_rate": 7.147619315122177e-06, "loss": 4.0106, "step": 15892 }, { "epoch": 5.294161738985592, "grad_norm": 0.72265625, "learning_rate": 7.146810772602825e-06, "loss": 4.0252, "step": 15893 }, { "epoch": 5.29449487798784, "grad_norm": 0.76171875, "learning_rate": 7.146002234197365e-06, "loss": 3.9073, "step": 15894 }, { "epoch": 5.294828016990089, "grad_norm": 0.7578125, "learning_rate": 7.145193699915209e-06, "loss": 4.0352, "step": 15895 }, { "epoch": 5.2951611559923375, "grad_norm": 0.78515625, "learning_rate": 7.144385169765783e-06, "loss": 4.0535, "step": 15896 }, { "epoch": 5.295494294994587, "grad_norm": 0.72265625, "learning_rate": 7.1435766437584974e-06, "loss": 3.9936, "step": 15897 }, { "epoch": 5.295827433996835, "grad_norm": 0.76953125, "learning_rate": 7.142768121902771e-06, "loss": 3.9398, "step": 15898 }, { "epoch": 5.296160572999084, "grad_norm": 0.71875, "learning_rate": 7.141959604208022e-06, "loss": 3.9532, "step": 15899 }, { "epoch": 5.2964937120013325, "grad_norm": 0.7578125, "learning_rate": 7.141151090683667e-06, "loss": 4.0577, "step": 15900 }, { "epoch": 5.296826851003582, "grad_norm": 0.76953125, "learning_rate": 7.140342581339128e-06, "loss": 3.9293, "step": 15901 }, { "epoch": 5.29715999000583, "grad_norm": 0.7421875, "learning_rate": 7.139534076183816e-06, "loss": 4.0385, "step": 15902 }, { "epoch": 5.297493129008078, "grad_norm": 0.7421875, "learning_rate": 7.1387255752271514e-06, "loss": 4.0034, "step": 15903 }, { "epoch": 5.297826268010327, "grad_norm": 0.7265625, "learning_rate": 7.137917078478552e-06, "loss": 4.0801, "step": 15904 }, { "epoch": 5.298159407012576, "grad_norm": 0.79296875, "learning_rate": 7.137108585947437e-06, "loss": 3.9704, "step": 15905 }, { "epoch": 5.298492546014825, "grad_norm": 0.78125, "learning_rate": 7.136300097643219e-06, "loss": 4.0292, "step": 15906 }, { "epoch": 5.298825685017073, "grad_norm": 0.71484375, "learning_rate": 7.135491613575315e-06, "loss": 3.9999, "step": 15907 }, { "epoch": 5.299158824019322, "grad_norm": 0.7734375, "learning_rate": 7.134683133753146e-06, "loss": 4.0756, "step": 15908 }, { "epoch": 5.299491963021571, "grad_norm": 0.7578125, "learning_rate": 7.133874658186126e-06, "loss": 3.9736, "step": 15909 }, { "epoch": 5.29982510202382, "grad_norm": 0.75390625, "learning_rate": 7.133066186883674e-06, "loss": 4.0413, "step": 15910 }, { "epoch": 5.300158241026068, "grad_norm": 0.7109375, "learning_rate": 7.132257719855206e-06, "loss": 4.1012, "step": 15911 }, { "epoch": 5.300491380028316, "grad_norm": 0.7109375, "learning_rate": 7.13144925711014e-06, "loss": 3.9388, "step": 15912 }, { "epoch": 5.300824519030566, "grad_norm": 0.8125, "learning_rate": 7.1306407986578904e-06, "loss": 3.9705, "step": 15913 }, { "epoch": 5.301157658032814, "grad_norm": 0.734375, "learning_rate": 7.12983234450788e-06, "loss": 4.0881, "step": 15914 }, { "epoch": 5.301490797035063, "grad_norm": 0.75, "learning_rate": 7.1290238946695155e-06, "loss": 3.9775, "step": 15915 }, { "epoch": 5.301823936037311, "grad_norm": 0.765625, "learning_rate": 7.128215449152221e-06, "loss": 3.9312, "step": 15916 }, { "epoch": 5.302157075039561, "grad_norm": 0.7734375, "learning_rate": 7.127407007965411e-06, "loss": 3.9979, "step": 15917 }, { "epoch": 5.302490214041809, "grad_norm": 0.80859375, "learning_rate": 7.126598571118503e-06, "loss": 4.0251, "step": 15918 }, { "epoch": 5.302823353044058, "grad_norm": 0.7109375, "learning_rate": 7.125790138620911e-06, "loss": 4.0179, "step": 15919 }, { "epoch": 5.303156492046306, "grad_norm": 0.7265625, "learning_rate": 7.124981710482055e-06, "loss": 4.0324, "step": 15920 }, { "epoch": 5.303489631048555, "grad_norm": 0.71875, "learning_rate": 7.124173286711349e-06, "loss": 3.965, "step": 15921 }, { "epoch": 5.303822770050804, "grad_norm": 0.75, "learning_rate": 7.123364867318211e-06, "loss": 3.9146, "step": 15922 }, { "epoch": 5.304155909053052, "grad_norm": 0.734375, "learning_rate": 7.122556452312058e-06, "loss": 3.9616, "step": 15923 }, { "epoch": 5.304489048055301, "grad_norm": 0.79296875, "learning_rate": 7.121748041702302e-06, "loss": 3.9848, "step": 15924 }, { "epoch": 5.30482218705755, "grad_norm": 0.71484375, "learning_rate": 7.120939635498363e-06, "loss": 3.9964, "step": 15925 }, { "epoch": 5.305155326059799, "grad_norm": 0.7578125, "learning_rate": 7.120131233709655e-06, "loss": 4.0196, "step": 15926 }, { "epoch": 5.305488465062047, "grad_norm": 0.73828125, "learning_rate": 7.119322836345597e-06, "loss": 4.0244, "step": 15927 }, { "epoch": 5.305821604064295, "grad_norm": 0.7421875, "learning_rate": 7.118514443415602e-06, "loss": 4.0376, "step": 15928 }, { "epoch": 5.3061547430665446, "grad_norm": 0.796875, "learning_rate": 7.117706054929089e-06, "loss": 3.9695, "step": 15929 }, { "epoch": 5.306487882068793, "grad_norm": 0.7265625, "learning_rate": 7.116897670895471e-06, "loss": 3.9718, "step": 15930 }, { "epoch": 5.306821021071042, "grad_norm": 0.703125, "learning_rate": 7.116089291324166e-06, "loss": 4.0475, "step": 15931 }, { "epoch": 5.30715416007329, "grad_norm": 0.7890625, "learning_rate": 7.115280916224592e-06, "loss": 3.9988, "step": 15932 }, { "epoch": 5.3074872990755395, "grad_norm": 0.69140625, "learning_rate": 7.114472545606158e-06, "loss": 4.0112, "step": 15933 }, { "epoch": 5.307820438077788, "grad_norm": 0.78125, "learning_rate": 7.1136641794782856e-06, "loss": 3.9754, "step": 15934 }, { "epoch": 5.308153577080037, "grad_norm": 0.73828125, "learning_rate": 7.112855817850387e-06, "loss": 4.0282, "step": 15935 }, { "epoch": 5.308486716082285, "grad_norm": 0.80078125, "learning_rate": 7.112047460731882e-06, "loss": 3.9937, "step": 15936 }, { "epoch": 5.3088198550845345, "grad_norm": 0.734375, "learning_rate": 7.11123910813218e-06, "loss": 3.9402, "step": 15937 }, { "epoch": 5.309152994086783, "grad_norm": 0.77734375, "learning_rate": 7.110430760060705e-06, "loss": 3.9736, "step": 15938 }, { "epoch": 5.309486133089031, "grad_norm": 0.77734375, "learning_rate": 7.1096224165268635e-06, "loss": 3.9761, "step": 15939 }, { "epoch": 5.30981927209128, "grad_norm": 0.73828125, "learning_rate": 7.108814077540078e-06, "loss": 3.9582, "step": 15940 }, { "epoch": 5.3101524110935285, "grad_norm": 0.7578125, "learning_rate": 7.108005743109763e-06, "loss": 3.9426, "step": 15941 }, { "epoch": 5.310485550095778, "grad_norm": 0.77734375, "learning_rate": 7.10719741324533e-06, "loss": 3.9838, "step": 15942 }, { "epoch": 5.310818689098026, "grad_norm": 0.8125, "learning_rate": 7.106389087956195e-06, "loss": 4.0144, "step": 15943 }, { "epoch": 5.311151828100275, "grad_norm": 0.734375, "learning_rate": 7.105580767251775e-06, "loss": 3.912, "step": 15944 }, { "epoch": 5.3114849671025235, "grad_norm": 0.765625, "learning_rate": 7.104772451141486e-06, "loss": 4.0552, "step": 15945 }, { "epoch": 5.311818106104772, "grad_norm": 0.75, "learning_rate": 7.10396413963474e-06, "loss": 3.9839, "step": 15946 }, { "epoch": 5.312151245107021, "grad_norm": 0.80859375, "learning_rate": 7.103155832740956e-06, "loss": 3.9783, "step": 15947 }, { "epoch": 5.312484384109269, "grad_norm": 0.8359375, "learning_rate": 7.102347530469545e-06, "loss": 3.9608, "step": 15948 }, { "epoch": 5.3128175231115184, "grad_norm": 0.8203125, "learning_rate": 7.101539232829926e-06, "loss": 3.9772, "step": 15949 }, { "epoch": 5.313150662113767, "grad_norm": 0.75390625, "learning_rate": 7.100730939831513e-06, "loss": 4.0508, "step": 15950 }, { "epoch": 5.313483801116016, "grad_norm": 0.79296875, "learning_rate": 7.0999226514837165e-06, "loss": 4.0733, "step": 15951 }, { "epoch": 5.313816940118264, "grad_norm": 0.73046875, "learning_rate": 7.0991143677959565e-06, "loss": 4.0381, "step": 15952 }, { "epoch": 5.314150079120513, "grad_norm": 0.7890625, "learning_rate": 7.098306088777644e-06, "loss": 3.9369, "step": 15953 }, { "epoch": 5.314483218122762, "grad_norm": 0.7890625, "learning_rate": 7.097497814438197e-06, "loss": 3.9494, "step": 15954 }, { "epoch": 5.314816357125011, "grad_norm": 0.75390625, "learning_rate": 7.096689544787027e-06, "loss": 4.0464, "step": 15955 }, { "epoch": 5.315149496127259, "grad_norm": 0.75390625, "learning_rate": 7.095881279833552e-06, "loss": 4.0056, "step": 15956 }, { "epoch": 5.3154826351295075, "grad_norm": 0.7734375, "learning_rate": 7.095073019587183e-06, "loss": 3.9716, "step": 15957 }, { "epoch": 5.315815774131757, "grad_norm": 0.7265625, "learning_rate": 7.094264764057338e-06, "loss": 3.9719, "step": 15958 }, { "epoch": 5.316148913134005, "grad_norm": 0.75, "learning_rate": 7.093456513253432e-06, "loss": 3.9451, "step": 15959 }, { "epoch": 5.316482052136254, "grad_norm": 0.74609375, "learning_rate": 7.092648267184875e-06, "loss": 3.9927, "step": 15960 }, { "epoch": 5.316815191138502, "grad_norm": 0.734375, "learning_rate": 7.091840025861081e-06, "loss": 4.0214, "step": 15961 }, { "epoch": 5.317148330140752, "grad_norm": 0.79296875, "learning_rate": 7.0910317892914695e-06, "loss": 3.9067, "step": 15962 }, { "epoch": 5.317481469143, "grad_norm": 0.70703125, "learning_rate": 7.090223557485449e-06, "loss": 4.0176, "step": 15963 }, { "epoch": 5.317814608145248, "grad_norm": 0.74609375, "learning_rate": 7.089415330452439e-06, "loss": 4.0069, "step": 15964 }, { "epoch": 5.318147747147497, "grad_norm": 0.7734375, "learning_rate": 7.088607108201848e-06, "loss": 3.9978, "step": 15965 }, { "epoch": 5.318480886149746, "grad_norm": 0.74609375, "learning_rate": 7.087798890743096e-06, "loss": 3.9348, "step": 15966 }, { "epoch": 5.318814025151995, "grad_norm": 0.78125, "learning_rate": 7.086990678085593e-06, "loss": 3.9479, "step": 15967 }, { "epoch": 5.319147164154243, "grad_norm": 0.7734375, "learning_rate": 7.086182470238756e-06, "loss": 3.9883, "step": 15968 }, { "epoch": 5.319480303156492, "grad_norm": 0.79296875, "learning_rate": 7.085374267211995e-06, "loss": 4.0179, "step": 15969 }, { "epoch": 5.319813442158741, "grad_norm": 0.75390625, "learning_rate": 7.084566069014726e-06, "loss": 3.957, "step": 15970 }, { "epoch": 5.32014658116099, "grad_norm": 0.73828125, "learning_rate": 7.083757875656363e-06, "loss": 4.0236, "step": 15971 }, { "epoch": 5.320479720163238, "grad_norm": 0.74609375, "learning_rate": 7.082949687146317e-06, "loss": 3.9874, "step": 15972 }, { "epoch": 5.320812859165486, "grad_norm": 0.78515625, "learning_rate": 7.082141503494006e-06, "loss": 4.0279, "step": 15973 }, { "epoch": 5.321145998167736, "grad_norm": 0.7421875, "learning_rate": 7.08133332470884e-06, "loss": 3.957, "step": 15974 }, { "epoch": 5.321479137169984, "grad_norm": 0.734375, "learning_rate": 7.080525150800235e-06, "loss": 4.0182, "step": 15975 }, { "epoch": 5.321812276172233, "grad_norm": 0.78515625, "learning_rate": 7.079716981777602e-06, "loss": 3.9753, "step": 15976 }, { "epoch": 5.322145415174481, "grad_norm": 0.76953125, "learning_rate": 7.078908817650359e-06, "loss": 3.9964, "step": 15977 }, { "epoch": 5.3224785541767305, "grad_norm": 0.7578125, "learning_rate": 7.078100658427916e-06, "loss": 4.0008, "step": 15978 }, { "epoch": 5.322811693178979, "grad_norm": 0.734375, "learning_rate": 7.077292504119682e-06, "loss": 3.9251, "step": 15979 }, { "epoch": 5.323144832181228, "grad_norm": 0.76171875, "learning_rate": 7.076484354735079e-06, "loss": 3.9433, "step": 15980 }, { "epoch": 5.323477971183476, "grad_norm": 0.765625, "learning_rate": 7.0756762102835135e-06, "loss": 4.0047, "step": 15981 }, { "epoch": 5.323811110185725, "grad_norm": 0.76171875, "learning_rate": 7.074868070774402e-06, "loss": 4.0207, "step": 15982 }, { "epoch": 5.324144249187974, "grad_norm": 0.79296875, "learning_rate": 7.074059936217156e-06, "loss": 4.038, "step": 15983 }, { "epoch": 5.324477388190222, "grad_norm": 0.703125, "learning_rate": 7.07325180662119e-06, "loss": 3.9691, "step": 15984 }, { "epoch": 5.324810527192471, "grad_norm": 0.71875, "learning_rate": 7.072443681995915e-06, "loss": 4.0229, "step": 15985 }, { "epoch": 5.32514366619472, "grad_norm": 0.7109375, "learning_rate": 7.071635562350747e-06, "loss": 3.9653, "step": 15986 }, { "epoch": 5.325476805196969, "grad_norm": 0.7734375, "learning_rate": 7.070827447695097e-06, "loss": 3.938, "step": 15987 }, { "epoch": 5.325809944199217, "grad_norm": 0.765625, "learning_rate": 7.0700193380383745e-06, "loss": 3.9679, "step": 15988 }, { "epoch": 5.326143083201465, "grad_norm": 0.79296875, "learning_rate": 7.069211233389998e-06, "loss": 4.1227, "step": 15989 }, { "epoch": 5.3264762222037145, "grad_norm": 0.734375, "learning_rate": 7.068403133759375e-06, "loss": 4.0352, "step": 15990 }, { "epoch": 5.326809361205963, "grad_norm": 0.7890625, "learning_rate": 7.067595039155924e-06, "loss": 3.9703, "step": 15991 }, { "epoch": 5.327142500208212, "grad_norm": 0.7734375, "learning_rate": 7.066786949589051e-06, "loss": 3.9126, "step": 15992 }, { "epoch": 5.32747563921046, "grad_norm": 0.71875, "learning_rate": 7.065978865068174e-06, "loss": 4.0595, "step": 15993 }, { "epoch": 5.3278087782127095, "grad_norm": 0.765625, "learning_rate": 7.065170785602701e-06, "loss": 3.9579, "step": 15994 }, { "epoch": 5.328141917214958, "grad_norm": 0.765625, "learning_rate": 7.064362711202051e-06, "loss": 3.9977, "step": 15995 }, { "epoch": 5.328475056217207, "grad_norm": 0.7890625, "learning_rate": 7.063554641875629e-06, "loss": 3.9692, "step": 15996 }, { "epoch": 5.328808195219455, "grad_norm": 0.76953125, "learning_rate": 7.062746577632849e-06, "loss": 3.985, "step": 15997 }, { "epoch": 5.329141334221704, "grad_norm": 0.80078125, "learning_rate": 7.061938518483126e-06, "loss": 3.9803, "step": 15998 }, { "epoch": 5.329474473223953, "grad_norm": 0.78515625, "learning_rate": 7.061130464435868e-06, "loss": 3.9473, "step": 15999 }, { "epoch": 5.329807612226201, "grad_norm": 0.83984375, "learning_rate": 7.060322415500492e-06, "loss": 3.9769, "step": 16000 }, { "epoch": 5.33014075122845, "grad_norm": 0.75, "learning_rate": 7.0595143716864055e-06, "loss": 3.9768, "step": 16001 }, { "epoch": 5.3304738902306985, "grad_norm": 0.75, "learning_rate": 7.058706333003024e-06, "loss": 3.9178, "step": 16002 }, { "epoch": 5.330807029232948, "grad_norm": 0.73828125, "learning_rate": 7.0578982994597564e-06, "loss": 3.9674, "step": 16003 }, { "epoch": 5.331140168235196, "grad_norm": 0.765625, "learning_rate": 7.057090271066021e-06, "loss": 3.9696, "step": 16004 }, { "epoch": 5.331473307237445, "grad_norm": 0.80859375, "learning_rate": 7.056282247831219e-06, "loss": 3.9184, "step": 16005 }, { "epoch": 5.3318064462396935, "grad_norm": 0.7890625, "learning_rate": 7.055474229764771e-06, "loss": 4.0173, "step": 16006 }, { "epoch": 5.332139585241942, "grad_norm": 0.73828125, "learning_rate": 7.054666216876083e-06, "loss": 4.0179, "step": 16007 }, { "epoch": 5.332472724244191, "grad_norm": 0.76171875, "learning_rate": 7.05385820917457e-06, "loss": 4.0218, "step": 16008 }, { "epoch": 5.332805863246439, "grad_norm": 0.75, "learning_rate": 7.053050206669641e-06, "loss": 3.9953, "step": 16009 }, { "epoch": 5.333139002248688, "grad_norm": 0.765625, "learning_rate": 7.052242209370711e-06, "loss": 3.9904, "step": 16010 }, { "epoch": 5.333472141250937, "grad_norm": 0.703125, "learning_rate": 7.051434217287187e-06, "loss": 3.9975, "step": 16011 }, { "epoch": 5.333805280253186, "grad_norm": 0.77734375, "learning_rate": 7.0506262304284855e-06, "loss": 4.0334, "step": 16012 }, { "epoch": 5.334138419255434, "grad_norm": 0.7890625, "learning_rate": 7.049818248804016e-06, "loss": 3.9587, "step": 16013 }, { "epoch": 5.334471558257683, "grad_norm": 0.74609375, "learning_rate": 7.049010272423186e-06, "loss": 4.0001, "step": 16014 }, { "epoch": 5.334804697259932, "grad_norm": 0.7578125, "learning_rate": 7.048202301295411e-06, "loss": 4.0756, "step": 16015 }, { "epoch": 5.335137836262181, "grad_norm": 0.76171875, "learning_rate": 7.047394335430099e-06, "loss": 4.0104, "step": 16016 }, { "epoch": 5.335470975264429, "grad_norm": 0.73828125, "learning_rate": 7.046586374836664e-06, "loss": 3.9831, "step": 16017 }, { "epoch": 5.3358041142666774, "grad_norm": 0.73828125, "learning_rate": 7.045778419524514e-06, "loss": 4.0224, "step": 16018 }, { "epoch": 5.336137253268927, "grad_norm": 0.7890625, "learning_rate": 7.044970469503064e-06, "loss": 3.9671, "step": 16019 }, { "epoch": 5.336470392271175, "grad_norm": 0.75390625, "learning_rate": 7.04416252478172e-06, "loss": 3.9971, "step": 16020 }, { "epoch": 5.336803531273424, "grad_norm": 0.78515625, "learning_rate": 7.043354585369897e-06, "loss": 3.9764, "step": 16021 }, { "epoch": 5.337136670275672, "grad_norm": 0.7578125, "learning_rate": 7.042546651277005e-06, "loss": 3.9556, "step": 16022 }, { "epoch": 5.337469809277922, "grad_norm": 0.72265625, "learning_rate": 7.041738722512452e-06, "loss": 3.9575, "step": 16023 }, { "epoch": 5.33780294828017, "grad_norm": 0.77734375, "learning_rate": 7.0409307990856505e-06, "loss": 4.0011, "step": 16024 }, { "epoch": 5.338136087282418, "grad_norm": 0.75390625, "learning_rate": 7.04012288100601e-06, "loss": 4.0463, "step": 16025 }, { "epoch": 5.338469226284667, "grad_norm": 0.796875, "learning_rate": 7.039314968282943e-06, "loss": 3.9892, "step": 16026 }, { "epoch": 5.338802365286916, "grad_norm": 0.77734375, "learning_rate": 7.038507060925857e-06, "loss": 3.9696, "step": 16027 }, { "epoch": 5.339135504289165, "grad_norm": 0.76171875, "learning_rate": 7.0376991589441665e-06, "loss": 3.9727, "step": 16028 }, { "epoch": 5.339468643291413, "grad_norm": 0.75, "learning_rate": 7.036891262347278e-06, "loss": 3.9836, "step": 16029 }, { "epoch": 5.339801782293662, "grad_norm": 0.765625, "learning_rate": 7.036083371144603e-06, "loss": 3.9653, "step": 16030 }, { "epoch": 5.340134921295911, "grad_norm": 0.73828125, "learning_rate": 7.035275485345556e-06, "loss": 4.0026, "step": 16031 }, { "epoch": 5.34046806029816, "grad_norm": 0.71484375, "learning_rate": 7.034467604959538e-06, "loss": 3.913, "step": 16032 }, { "epoch": 5.340801199300408, "grad_norm": 0.77734375, "learning_rate": 7.033659729995967e-06, "loss": 4.0847, "step": 16033 }, { "epoch": 5.341134338302656, "grad_norm": 0.80859375, "learning_rate": 7.032851860464246e-06, "loss": 3.9612, "step": 16034 }, { "epoch": 5.341467477304906, "grad_norm": 0.73828125, "learning_rate": 7.032043996373794e-06, "loss": 3.9709, "step": 16035 }, { "epoch": 5.341800616307154, "grad_norm": 0.78125, "learning_rate": 7.0312361377340115e-06, "loss": 3.9724, "step": 16036 }, { "epoch": 5.342133755309403, "grad_norm": 0.77734375, "learning_rate": 7.030428284554315e-06, "loss": 3.9528, "step": 16037 }, { "epoch": 5.342466894311651, "grad_norm": 0.76171875, "learning_rate": 7.029620436844111e-06, "loss": 3.9488, "step": 16038 }, { "epoch": 5.3428000333139005, "grad_norm": 0.80078125, "learning_rate": 7.028812594612811e-06, "loss": 3.9847, "step": 16039 }, { "epoch": 5.343133172316149, "grad_norm": 0.765625, "learning_rate": 7.0280047578698254e-06, "loss": 3.9711, "step": 16040 }, { "epoch": 5.343466311318398, "grad_norm": 0.75, "learning_rate": 7.027196926624559e-06, "loss": 3.9902, "step": 16041 }, { "epoch": 5.343799450320646, "grad_norm": 0.7265625, "learning_rate": 7.026389100886426e-06, "loss": 3.9284, "step": 16042 }, { "epoch": 5.344132589322895, "grad_norm": 0.78515625, "learning_rate": 7.025581280664833e-06, "loss": 4.0018, "step": 16043 }, { "epoch": 5.344465728325144, "grad_norm": 0.7421875, "learning_rate": 7.024773465969191e-06, "loss": 4.0248, "step": 16044 }, { "epoch": 5.344798867327392, "grad_norm": 0.796875, "learning_rate": 7.023965656808908e-06, "loss": 3.9648, "step": 16045 }, { "epoch": 5.345132006329641, "grad_norm": 0.74609375, "learning_rate": 7.023157853193395e-06, "loss": 4.0501, "step": 16046 }, { "epoch": 5.3454651453318895, "grad_norm": 0.79296875, "learning_rate": 7.022350055132059e-06, "loss": 4.0214, "step": 16047 }, { "epoch": 5.345798284334139, "grad_norm": 0.828125, "learning_rate": 7.0215422626343114e-06, "loss": 4.0448, "step": 16048 }, { "epoch": 5.346131423336387, "grad_norm": 0.72265625, "learning_rate": 7.020734475709562e-06, "loss": 3.9699, "step": 16049 }, { "epoch": 5.346464562338636, "grad_norm": 0.7734375, "learning_rate": 7.019926694367217e-06, "loss": 4.0557, "step": 16050 }, { "epoch": 5.3467977013408845, "grad_norm": 0.734375, "learning_rate": 7.019118918616684e-06, "loss": 3.9616, "step": 16051 }, { "epoch": 5.347130840343133, "grad_norm": 0.73828125, "learning_rate": 7.0183111484673746e-06, "loss": 4.0133, "step": 16052 }, { "epoch": 5.347463979345382, "grad_norm": 0.74609375, "learning_rate": 7.0175033839286965e-06, "loss": 4.0433, "step": 16053 }, { "epoch": 5.34779711834763, "grad_norm": 0.77734375, "learning_rate": 7.01669562501006e-06, "loss": 4.0461, "step": 16054 }, { "epoch": 5.3481302573498795, "grad_norm": 0.75, "learning_rate": 7.015887871720872e-06, "loss": 3.9005, "step": 16055 }, { "epoch": 5.348463396352128, "grad_norm": 0.78125, "learning_rate": 7.015080124070542e-06, "loss": 3.9482, "step": 16056 }, { "epoch": 5.348796535354377, "grad_norm": 0.8203125, "learning_rate": 7.014272382068477e-06, "loss": 4.0359, "step": 16057 }, { "epoch": 5.349129674356625, "grad_norm": 0.765625, "learning_rate": 7.013464645724091e-06, "loss": 3.9984, "step": 16058 }, { "epoch": 5.349462813358874, "grad_norm": 0.7734375, "learning_rate": 7.012656915046785e-06, "loss": 3.9221, "step": 16059 }, { "epoch": 5.349795952361123, "grad_norm": 0.75, "learning_rate": 7.011849190045969e-06, "loss": 4.045, "step": 16060 }, { "epoch": 5.350129091363371, "grad_norm": 0.75390625, "learning_rate": 7.011041470731055e-06, "loss": 4.0404, "step": 16061 }, { "epoch": 5.35046223036562, "grad_norm": 0.7421875, "learning_rate": 7.0102337571114464e-06, "loss": 4.0491, "step": 16062 }, { "epoch": 5.3507953693678685, "grad_norm": 0.7265625, "learning_rate": 7.009426049196555e-06, "loss": 3.9393, "step": 16063 }, { "epoch": 5.351128508370118, "grad_norm": 0.77734375, "learning_rate": 7.008618346995787e-06, "loss": 3.9732, "step": 16064 }, { "epoch": 5.351461647372366, "grad_norm": 0.69921875, "learning_rate": 7.007810650518552e-06, "loss": 4.023, "step": 16065 }, { "epoch": 5.351794786374615, "grad_norm": 0.75390625, "learning_rate": 7.007002959774255e-06, "loss": 3.9596, "step": 16066 }, { "epoch": 5.352127925376863, "grad_norm": 0.80078125, "learning_rate": 7.006195274772309e-06, "loss": 3.9951, "step": 16067 }, { "epoch": 5.352461064379112, "grad_norm": 0.71875, "learning_rate": 7.005387595522118e-06, "loss": 3.9996, "step": 16068 }, { "epoch": 5.352794203381361, "grad_norm": 0.7421875, "learning_rate": 7.004579922033087e-06, "loss": 3.9816, "step": 16069 }, { "epoch": 5.353127342383609, "grad_norm": 0.79296875, "learning_rate": 7.00377225431463e-06, "loss": 3.9809, "step": 16070 }, { "epoch": 5.353460481385858, "grad_norm": 0.76171875, "learning_rate": 7.002964592376148e-06, "loss": 3.875, "step": 16071 }, { "epoch": 5.353793620388107, "grad_norm": 0.75, "learning_rate": 7.002156936227055e-06, "loss": 3.9561, "step": 16072 }, { "epoch": 5.354126759390356, "grad_norm": 0.74609375, "learning_rate": 7.001349285876753e-06, "loss": 4.0012, "step": 16073 }, { "epoch": 5.354459898392604, "grad_norm": 0.78515625, "learning_rate": 7.000541641334653e-06, "loss": 3.9392, "step": 16074 }, { "epoch": 5.354793037394853, "grad_norm": 0.69921875, "learning_rate": 6.99973400261016e-06, "loss": 4.0551, "step": 16075 }, { "epoch": 5.355126176397102, "grad_norm": 0.76171875, "learning_rate": 6.998926369712685e-06, "loss": 3.9432, "step": 16076 }, { "epoch": 5.355459315399351, "grad_norm": 0.6953125, "learning_rate": 6.998118742651632e-06, "loss": 4.0546, "step": 16077 }, { "epoch": 5.355792454401599, "grad_norm": 0.7109375, "learning_rate": 6.997311121436406e-06, "loss": 3.963, "step": 16078 }, { "epoch": 5.356125593403847, "grad_norm": 0.75, "learning_rate": 6.996503506076418e-06, "loss": 4.0129, "step": 16079 }, { "epoch": 5.356458732406097, "grad_norm": 0.74609375, "learning_rate": 6.995695896581073e-06, "loss": 4.0392, "step": 16080 }, { "epoch": 5.356791871408345, "grad_norm": 0.74609375, "learning_rate": 6.99488829295978e-06, "loss": 4.0518, "step": 16081 }, { "epoch": 5.357125010410594, "grad_norm": 0.7578125, "learning_rate": 6.994080695221942e-06, "loss": 4.009, "step": 16082 }, { "epoch": 5.357458149412842, "grad_norm": 0.74609375, "learning_rate": 6.99327310337697e-06, "loss": 4.0556, "step": 16083 }, { "epoch": 5.357791288415092, "grad_norm": 0.75390625, "learning_rate": 6.992465517434267e-06, "loss": 3.983, "step": 16084 }, { "epoch": 5.35812442741734, "grad_norm": 0.80078125, "learning_rate": 6.991657937403244e-06, "loss": 3.9843, "step": 16085 }, { "epoch": 5.358457566419588, "grad_norm": 0.75, "learning_rate": 6.9908503632933045e-06, "loss": 3.9834, "step": 16086 }, { "epoch": 5.358790705421837, "grad_norm": 0.734375, "learning_rate": 6.990042795113854e-06, "loss": 4.0157, "step": 16087 }, { "epoch": 5.359123844424086, "grad_norm": 0.80078125, "learning_rate": 6.9892352328743005e-06, "loss": 3.934, "step": 16088 }, { "epoch": 5.359456983426335, "grad_norm": 0.79296875, "learning_rate": 6.98842767658405e-06, "loss": 3.9185, "step": 16089 }, { "epoch": 5.359790122428583, "grad_norm": 0.75, "learning_rate": 6.98762012625251e-06, "loss": 3.975, "step": 16090 }, { "epoch": 5.360123261430832, "grad_norm": 0.7421875, "learning_rate": 6.986812581889084e-06, "loss": 3.9905, "step": 16091 }, { "epoch": 5.360456400433081, "grad_norm": 0.80078125, "learning_rate": 6.986005043503182e-06, "loss": 3.9985, "step": 16092 }, { "epoch": 5.36078953943533, "grad_norm": 0.7421875, "learning_rate": 6.985197511104207e-06, "loss": 3.9592, "step": 16093 }, { "epoch": 5.361122678437578, "grad_norm": 0.73828125, "learning_rate": 6.984389984701569e-06, "loss": 4.026, "step": 16094 }, { "epoch": 5.361455817439826, "grad_norm": 0.765625, "learning_rate": 6.983582464304667e-06, "loss": 3.9976, "step": 16095 }, { "epoch": 5.3617889564420755, "grad_norm": 0.7890625, "learning_rate": 6.982774949922913e-06, "loss": 3.9529, "step": 16096 }, { "epoch": 5.362122095444324, "grad_norm": 0.74609375, "learning_rate": 6.981967441565707e-06, "loss": 3.9712, "step": 16097 }, { "epoch": 5.362455234446573, "grad_norm": 0.7578125, "learning_rate": 6.981159939242461e-06, "loss": 3.9484, "step": 16098 }, { "epoch": 5.362788373448821, "grad_norm": 0.7265625, "learning_rate": 6.980352442962577e-06, "loss": 4.051, "step": 16099 }, { "epoch": 5.3631215124510705, "grad_norm": 0.75, "learning_rate": 6.979544952735461e-06, "loss": 3.9732, "step": 16100 }, { "epoch": 5.363454651453319, "grad_norm": 0.75, "learning_rate": 6.97873746857052e-06, "loss": 3.9449, "step": 16101 }, { "epoch": 5.363787790455568, "grad_norm": 0.72265625, "learning_rate": 6.97792999047716e-06, "loss": 3.9951, "step": 16102 }, { "epoch": 5.364120929457816, "grad_norm": 0.7734375, "learning_rate": 6.977122518464784e-06, "loss": 4.0313, "step": 16103 }, { "epoch": 5.364454068460065, "grad_norm": 0.7421875, "learning_rate": 6.976315052542795e-06, "loss": 4.0416, "step": 16104 }, { "epoch": 5.364787207462314, "grad_norm": 0.765625, "learning_rate": 6.975507592720606e-06, "loss": 3.9092, "step": 16105 }, { "epoch": 5.365120346464562, "grad_norm": 0.73828125, "learning_rate": 6.974700139007613e-06, "loss": 3.9821, "step": 16106 }, { "epoch": 5.365453485466811, "grad_norm": 0.69921875, "learning_rate": 6.9738926914132285e-06, "loss": 4.074, "step": 16107 }, { "epoch": 5.3657866244690595, "grad_norm": 0.78515625, "learning_rate": 6.973085249946852e-06, "loss": 3.9435, "step": 16108 }, { "epoch": 5.366119763471309, "grad_norm": 0.75, "learning_rate": 6.972277814617893e-06, "loss": 3.9451, "step": 16109 }, { "epoch": 5.366452902473557, "grad_norm": 0.76171875, "learning_rate": 6.971470385435753e-06, "loss": 4.0499, "step": 16110 }, { "epoch": 5.366786041475806, "grad_norm": 0.71484375, "learning_rate": 6.970662962409842e-06, "loss": 3.9977, "step": 16111 }, { "epoch": 5.3671191804780545, "grad_norm": 0.7578125, "learning_rate": 6.969855545549559e-06, "loss": 3.9488, "step": 16112 }, { "epoch": 5.367452319480303, "grad_norm": 0.78125, "learning_rate": 6.969048134864309e-06, "loss": 3.9684, "step": 16113 }, { "epoch": 5.367785458482552, "grad_norm": 0.7421875, "learning_rate": 6.9682407303635e-06, "loss": 4.0158, "step": 16114 }, { "epoch": 5.3681185974848, "grad_norm": 0.75390625, "learning_rate": 6.9674333320565324e-06, "loss": 4.0362, "step": 16115 }, { "epoch": 5.368451736487049, "grad_norm": 0.78515625, "learning_rate": 6.966625939952814e-06, "loss": 4.0082, "step": 16116 }, { "epoch": 5.368784875489298, "grad_norm": 0.73046875, "learning_rate": 6.965818554061748e-06, "loss": 3.9123, "step": 16117 }, { "epoch": 5.369118014491547, "grad_norm": 0.7890625, "learning_rate": 6.96501117439274e-06, "loss": 3.9541, "step": 16118 }, { "epoch": 5.369451153493795, "grad_norm": 0.75390625, "learning_rate": 6.964203800955191e-06, "loss": 3.9777, "step": 16119 }, { "epoch": 5.369784292496044, "grad_norm": 0.7421875, "learning_rate": 6.9633964337585106e-06, "loss": 3.9594, "step": 16120 }, { "epoch": 5.370117431498293, "grad_norm": 0.79296875, "learning_rate": 6.962589072812098e-06, "loss": 3.9639, "step": 16121 }, { "epoch": 5.370450570500541, "grad_norm": 0.76171875, "learning_rate": 6.961781718125357e-06, "loss": 3.9599, "step": 16122 }, { "epoch": 5.37078370950279, "grad_norm": 0.703125, "learning_rate": 6.960974369707695e-06, "loss": 3.9696, "step": 16123 }, { "epoch": 5.3711168485050385, "grad_norm": 0.7734375, "learning_rate": 6.960167027568511e-06, "loss": 4.0133, "step": 16124 }, { "epoch": 5.371449987507288, "grad_norm": 0.74609375, "learning_rate": 6.959359691717216e-06, "loss": 3.9944, "step": 16125 }, { "epoch": 5.371783126509536, "grad_norm": 0.7578125, "learning_rate": 6.958552362163206e-06, "loss": 3.9724, "step": 16126 }, { "epoch": 5.372116265511785, "grad_norm": 0.7421875, "learning_rate": 6.95774503891589e-06, "loss": 4.0007, "step": 16127 }, { "epoch": 5.372449404514033, "grad_norm": 0.7421875, "learning_rate": 6.956937721984669e-06, "loss": 3.9771, "step": 16128 }, { "epoch": 5.372782543516282, "grad_norm": 0.7421875, "learning_rate": 6.956130411378951e-06, "loss": 3.9415, "step": 16129 }, { "epoch": 5.373115682518531, "grad_norm": 0.765625, "learning_rate": 6.9553231071081324e-06, "loss": 4.0171, "step": 16130 }, { "epoch": 5.373448821520779, "grad_norm": 0.76171875, "learning_rate": 6.954515809181619e-06, "loss": 3.998, "step": 16131 }, { "epoch": 5.373781960523028, "grad_norm": 0.7734375, "learning_rate": 6.953708517608816e-06, "loss": 4.0039, "step": 16132 }, { "epoch": 5.374115099525277, "grad_norm": 0.7421875, "learning_rate": 6.952901232399125e-06, "loss": 3.915, "step": 16133 }, { "epoch": 5.374448238527526, "grad_norm": 0.796875, "learning_rate": 6.952093953561951e-06, "loss": 4.062, "step": 16134 }, { "epoch": 5.374781377529774, "grad_norm": 0.80078125, "learning_rate": 6.951286681106693e-06, "loss": 4.0267, "step": 16135 }, { "epoch": 5.375114516532023, "grad_norm": 0.734375, "learning_rate": 6.950479415042759e-06, "loss": 4.0028, "step": 16136 }, { "epoch": 5.375447655534272, "grad_norm": 0.7265625, "learning_rate": 6.949672155379548e-06, "loss": 3.9623, "step": 16137 }, { "epoch": 5.375780794536521, "grad_norm": 0.765625, "learning_rate": 6.948864902126469e-06, "loss": 3.9498, "step": 16138 }, { "epoch": 5.376113933538769, "grad_norm": 0.75, "learning_rate": 6.948057655292916e-06, "loss": 3.9775, "step": 16139 }, { "epoch": 5.376447072541017, "grad_norm": 0.7578125, "learning_rate": 6.947250414888297e-06, "loss": 4.0077, "step": 16140 }, { "epoch": 5.376780211543267, "grad_norm": 0.7265625, "learning_rate": 6.946443180922012e-06, "loss": 4.0581, "step": 16141 }, { "epoch": 5.377113350545515, "grad_norm": 0.79296875, "learning_rate": 6.945635953403467e-06, "loss": 4.0353, "step": 16142 }, { "epoch": 5.377446489547764, "grad_norm": 0.79296875, "learning_rate": 6.944828732342059e-06, "loss": 3.9652, "step": 16143 }, { "epoch": 5.377779628550012, "grad_norm": 0.7578125, "learning_rate": 6.9440215177471965e-06, "loss": 3.9802, "step": 16144 }, { "epoch": 5.3781127675522615, "grad_norm": 0.74609375, "learning_rate": 6.9432143096282765e-06, "loss": 3.9618, "step": 16145 }, { "epoch": 5.37844590655451, "grad_norm": 0.7890625, "learning_rate": 6.9424071079947075e-06, "loss": 3.887, "step": 16146 }, { "epoch": 5.378779045556758, "grad_norm": 0.75390625, "learning_rate": 6.941599912855888e-06, "loss": 3.9709, "step": 16147 }, { "epoch": 5.379112184559007, "grad_norm": 0.7265625, "learning_rate": 6.940792724221217e-06, "loss": 4.0267, "step": 16148 }, { "epoch": 5.379445323561256, "grad_norm": 0.765625, "learning_rate": 6.939985542100101e-06, "loss": 3.9608, "step": 16149 }, { "epoch": 5.379778462563505, "grad_norm": 0.7421875, "learning_rate": 6.93917836650194e-06, "loss": 4.0151, "step": 16150 }, { "epoch": 5.380111601565753, "grad_norm": 0.7421875, "learning_rate": 6.938371197436136e-06, "loss": 3.9856, "step": 16151 }, { "epoch": 5.380444740568002, "grad_norm": 0.74609375, "learning_rate": 6.937564034912091e-06, "loss": 4.0199, "step": 16152 }, { "epoch": 5.3807778795702506, "grad_norm": 0.69921875, "learning_rate": 6.936756878939208e-06, "loss": 3.9889, "step": 16153 }, { "epoch": 5.3811110185725, "grad_norm": 0.76953125, "learning_rate": 6.935949729526884e-06, "loss": 3.9946, "step": 16154 }, { "epoch": 5.381444157574748, "grad_norm": 0.703125, "learning_rate": 6.935142586684529e-06, "loss": 4.0502, "step": 16155 }, { "epoch": 5.381777296576997, "grad_norm": 0.734375, "learning_rate": 6.934335450421539e-06, "loss": 4.0352, "step": 16156 }, { "epoch": 5.3821104355792455, "grad_norm": 0.79296875, "learning_rate": 6.933528320747313e-06, "loss": 3.9807, "step": 16157 }, { "epoch": 5.382443574581494, "grad_norm": 0.734375, "learning_rate": 6.932721197671256e-06, "loss": 3.9745, "step": 16158 }, { "epoch": 5.382776713583743, "grad_norm": 0.76171875, "learning_rate": 6.931914081202766e-06, "loss": 4.0212, "step": 16159 }, { "epoch": 5.383109852585991, "grad_norm": 0.734375, "learning_rate": 6.93110697135125e-06, "loss": 4.0321, "step": 16160 }, { "epoch": 5.3834429915882405, "grad_norm": 0.765625, "learning_rate": 6.930299868126103e-06, "loss": 4.0614, "step": 16161 }, { "epoch": 5.383776130590489, "grad_norm": 0.765625, "learning_rate": 6.9294927715367305e-06, "loss": 3.9159, "step": 16162 }, { "epoch": 5.384109269592738, "grad_norm": 0.78125, "learning_rate": 6.9286856815925295e-06, "loss": 3.951, "step": 16163 }, { "epoch": 5.384442408594986, "grad_norm": 0.74609375, "learning_rate": 6.927878598302905e-06, "loss": 3.958, "step": 16164 }, { "epoch": 5.3847755475972345, "grad_norm": 0.76171875, "learning_rate": 6.927071521677257e-06, "loss": 3.9648, "step": 16165 }, { "epoch": 5.385108686599484, "grad_norm": 0.7421875, "learning_rate": 6.9262644517249806e-06, "loss": 3.9959, "step": 16166 }, { "epoch": 5.385441825601732, "grad_norm": 0.7109375, "learning_rate": 6.925457388455483e-06, "loss": 4.0055, "step": 16167 }, { "epoch": 5.385774964603981, "grad_norm": 0.7578125, "learning_rate": 6.924650331878161e-06, "loss": 3.9596, "step": 16168 }, { "epoch": 5.3861081036062295, "grad_norm": 0.7265625, "learning_rate": 6.923843282002418e-06, "loss": 3.9963, "step": 16169 }, { "epoch": 5.386441242608479, "grad_norm": 0.74609375, "learning_rate": 6.923036238837651e-06, "loss": 4.022, "step": 16170 }, { "epoch": 5.386774381610727, "grad_norm": 0.75390625, "learning_rate": 6.922229202393264e-06, "loss": 4.0684, "step": 16171 }, { "epoch": 5.387107520612976, "grad_norm": 0.71875, "learning_rate": 6.921422172678654e-06, "loss": 3.9862, "step": 16172 }, { "epoch": 5.3874406596152244, "grad_norm": 0.76171875, "learning_rate": 6.920615149703223e-06, "loss": 3.9733, "step": 16173 }, { "epoch": 5.387773798617473, "grad_norm": 0.765625, "learning_rate": 6.919808133476374e-06, "loss": 3.9734, "step": 16174 }, { "epoch": 5.388106937619722, "grad_norm": 0.75390625, "learning_rate": 6.919001124007498e-06, "loss": 4.0714, "step": 16175 }, { "epoch": 5.38844007662197, "grad_norm": 0.78515625, "learning_rate": 6.918194121306004e-06, "loss": 4.0461, "step": 16176 }, { "epoch": 5.388773215624219, "grad_norm": 0.765625, "learning_rate": 6.917387125381285e-06, "loss": 3.9589, "step": 16177 }, { "epoch": 5.389106354626468, "grad_norm": 0.765625, "learning_rate": 6.916580136242747e-06, "loss": 4.0024, "step": 16178 }, { "epoch": 5.389439493628717, "grad_norm": 0.74609375, "learning_rate": 6.915773153899785e-06, "loss": 3.9881, "step": 16179 }, { "epoch": 5.389772632630965, "grad_norm": 0.76953125, "learning_rate": 6.9149661783618014e-06, "loss": 4.026, "step": 16180 }, { "epoch": 5.390105771633214, "grad_norm": 0.7578125, "learning_rate": 6.914159209638193e-06, "loss": 3.9648, "step": 16181 }, { "epoch": 5.390438910635463, "grad_norm": 0.765625, "learning_rate": 6.913352247738363e-06, "loss": 3.9416, "step": 16182 }, { "epoch": 5.390772049637711, "grad_norm": 0.7421875, "learning_rate": 6.91254529267171e-06, "loss": 3.9672, "step": 16183 }, { "epoch": 5.39110518863996, "grad_norm": 0.7890625, "learning_rate": 6.91173834444763e-06, "loss": 3.9766, "step": 16184 }, { "epoch": 5.391438327642208, "grad_norm": 0.80078125, "learning_rate": 6.910931403075523e-06, "loss": 4.0098, "step": 16185 }, { "epoch": 5.391771466644458, "grad_norm": 0.765625, "learning_rate": 6.910124468564791e-06, "loss": 3.9739, "step": 16186 }, { "epoch": 5.392104605646706, "grad_norm": 0.79296875, "learning_rate": 6.909317540924829e-06, "loss": 3.9714, "step": 16187 }, { "epoch": 5.392437744648955, "grad_norm": 0.75, "learning_rate": 6.90851062016504e-06, "loss": 4.0092, "step": 16188 }, { "epoch": 5.392770883651203, "grad_norm": 0.77734375, "learning_rate": 6.90770370629482e-06, "loss": 3.9607, "step": 16189 }, { "epoch": 5.393104022653453, "grad_norm": 0.75, "learning_rate": 6.90689679932357e-06, "loss": 4.0357, "step": 16190 }, { "epoch": 5.393437161655701, "grad_norm": 0.78515625, "learning_rate": 6.906089899260686e-06, "loss": 3.9865, "step": 16191 }, { "epoch": 5.393770300657949, "grad_norm": 0.7578125, "learning_rate": 6.905283006115572e-06, "loss": 3.9876, "step": 16192 }, { "epoch": 5.394103439660198, "grad_norm": 0.75390625, "learning_rate": 6.904476119897621e-06, "loss": 3.9971, "step": 16193 }, { "epoch": 5.394436578662447, "grad_norm": 0.75, "learning_rate": 6.903669240616232e-06, "loss": 4.0244, "step": 16194 }, { "epoch": 5.394769717664696, "grad_norm": 0.76953125, "learning_rate": 6.902862368280805e-06, "loss": 3.9843, "step": 16195 }, { "epoch": 5.395102856666944, "grad_norm": 0.75, "learning_rate": 6.9020555029007375e-06, "loss": 3.978, "step": 16196 }, { "epoch": 5.395435995669193, "grad_norm": 0.75390625, "learning_rate": 6.9012486444854294e-06, "loss": 4.057, "step": 16197 }, { "epoch": 5.395769134671442, "grad_norm": 0.7421875, "learning_rate": 6.9004417930442764e-06, "loss": 4.0335, "step": 16198 }, { "epoch": 5.396102273673691, "grad_norm": 0.76953125, "learning_rate": 6.89963494858668e-06, "loss": 4.0256, "step": 16199 }, { "epoch": 5.396435412675939, "grad_norm": 0.76953125, "learning_rate": 6.898828111122034e-06, "loss": 3.92, "step": 16200 }, { "epoch": 5.396768551678187, "grad_norm": 0.76953125, "learning_rate": 6.898021280659741e-06, "loss": 4.0319, "step": 16201 }, { "epoch": 5.3971016906804365, "grad_norm": 0.71875, "learning_rate": 6.897214457209195e-06, "loss": 3.9749, "step": 16202 }, { "epoch": 5.397434829682685, "grad_norm": 0.7265625, "learning_rate": 6.896407640779793e-06, "loss": 3.9301, "step": 16203 }, { "epoch": 5.397767968684934, "grad_norm": 0.7578125, "learning_rate": 6.895600831380938e-06, "loss": 3.9799, "step": 16204 }, { "epoch": 5.398101107687182, "grad_norm": 0.7265625, "learning_rate": 6.89479402902202e-06, "loss": 4.0136, "step": 16205 }, { "epoch": 5.3984342466894315, "grad_norm": 0.77734375, "learning_rate": 6.893987233712443e-06, "loss": 4.0374, "step": 16206 }, { "epoch": 5.39876738569168, "grad_norm": 0.74609375, "learning_rate": 6.893180445461601e-06, "loss": 4.0202, "step": 16207 }, { "epoch": 5.399100524693928, "grad_norm": 0.81640625, "learning_rate": 6.8923736642788945e-06, "loss": 4.0367, "step": 16208 }, { "epoch": 5.399433663696177, "grad_norm": 0.80859375, "learning_rate": 6.8915668901737165e-06, "loss": 3.9185, "step": 16209 }, { "epoch": 5.399766802698426, "grad_norm": 0.75390625, "learning_rate": 6.89076012315547e-06, "loss": 4.037, "step": 16210 }, { "epoch": 5.400099941700675, "grad_norm": 0.6953125, "learning_rate": 6.889953363233546e-06, "loss": 4.0311, "step": 16211 }, { "epoch": 5.400433080702923, "grad_norm": 0.796875, "learning_rate": 6.8891466104173434e-06, "loss": 3.9096, "step": 16212 }, { "epoch": 5.400766219705172, "grad_norm": 0.77734375, "learning_rate": 6.888339864716261e-06, "loss": 4.0087, "step": 16213 }, { "epoch": 5.4010993587074205, "grad_norm": 0.796875, "learning_rate": 6.8875331261396925e-06, "loss": 3.9739, "step": 16214 }, { "epoch": 5.40143249770967, "grad_norm": 0.7734375, "learning_rate": 6.886726394697038e-06, "loss": 3.9783, "step": 16215 }, { "epoch": 5.401765636711918, "grad_norm": 0.75390625, "learning_rate": 6.885919670397691e-06, "loss": 3.9606, "step": 16216 }, { "epoch": 5.402098775714167, "grad_norm": 0.77734375, "learning_rate": 6.8851129532510515e-06, "loss": 4.0107, "step": 16217 }, { "epoch": 5.4024319147164155, "grad_norm": 0.8046875, "learning_rate": 6.884306243266512e-06, "loss": 4.0262, "step": 16218 }, { "epoch": 5.402765053718664, "grad_norm": 0.78125, "learning_rate": 6.883499540453476e-06, "loss": 3.9325, "step": 16219 }, { "epoch": 5.403098192720913, "grad_norm": 0.76953125, "learning_rate": 6.8826928448213315e-06, "loss": 4.0038, "step": 16220 }, { "epoch": 5.403431331723161, "grad_norm": 0.765625, "learning_rate": 6.881886156379478e-06, "loss": 4.0169, "step": 16221 }, { "epoch": 5.40376447072541, "grad_norm": 0.71875, "learning_rate": 6.881079475137313e-06, "loss": 3.982, "step": 16222 }, { "epoch": 5.404097609727659, "grad_norm": 0.73046875, "learning_rate": 6.88027280110423e-06, "loss": 4.0255, "step": 16223 }, { "epoch": 5.404430748729908, "grad_norm": 0.78515625, "learning_rate": 6.879466134289629e-06, "loss": 3.94, "step": 16224 }, { "epoch": 5.404763887732156, "grad_norm": 0.75, "learning_rate": 6.8786594747029e-06, "loss": 3.8961, "step": 16225 }, { "epoch": 5.4050970267344045, "grad_norm": 0.8046875, "learning_rate": 6.877852822353446e-06, "loss": 3.9506, "step": 16226 }, { "epoch": 5.405430165736654, "grad_norm": 0.7890625, "learning_rate": 6.877046177250655e-06, "loss": 3.9828, "step": 16227 }, { "epoch": 5.405763304738902, "grad_norm": 0.76953125, "learning_rate": 6.876239539403934e-06, "loss": 4.0177, "step": 16228 }, { "epoch": 5.406096443741151, "grad_norm": 0.78515625, "learning_rate": 6.875432908822665e-06, "loss": 4.0091, "step": 16229 }, { "epoch": 5.4064295827433995, "grad_norm": 0.79296875, "learning_rate": 6.874626285516252e-06, "loss": 3.9397, "step": 16230 }, { "epoch": 5.406762721745649, "grad_norm": 0.734375, "learning_rate": 6.8738196694940864e-06, "loss": 3.9811, "step": 16231 }, { "epoch": 5.407095860747897, "grad_norm": 0.76953125, "learning_rate": 6.8730130607655675e-06, "loss": 4.0255, "step": 16232 }, { "epoch": 5.407428999750146, "grad_norm": 0.75, "learning_rate": 6.872206459340086e-06, "loss": 3.9558, "step": 16233 }, { "epoch": 5.407762138752394, "grad_norm": 0.75, "learning_rate": 6.871399865227041e-06, "loss": 3.9764, "step": 16234 }, { "epoch": 5.408095277754643, "grad_norm": 0.7265625, "learning_rate": 6.870593278435825e-06, "loss": 3.9949, "step": 16235 }, { "epoch": 5.408428416756892, "grad_norm": 0.765625, "learning_rate": 6.869786698975835e-06, "loss": 4.0098, "step": 16236 }, { "epoch": 5.40876155575914, "grad_norm": 0.78125, "learning_rate": 6.868980126856467e-06, "loss": 4.0189, "step": 16237 }, { "epoch": 5.409094694761389, "grad_norm": 0.7734375, "learning_rate": 6.868173562087111e-06, "loss": 3.9258, "step": 16238 }, { "epoch": 5.409427833763638, "grad_norm": 0.73046875, "learning_rate": 6.867367004677165e-06, "loss": 4.0167, "step": 16239 }, { "epoch": 5.409760972765887, "grad_norm": 0.734375, "learning_rate": 6.866560454636022e-06, "loss": 3.975, "step": 16240 }, { "epoch": 5.410094111768135, "grad_norm": 0.72265625, "learning_rate": 6.86575391197308e-06, "loss": 4.0308, "step": 16241 }, { "epoch": 5.410427250770384, "grad_norm": 0.76171875, "learning_rate": 6.864947376697728e-06, "loss": 4.0277, "step": 16242 }, { "epoch": 5.410760389772633, "grad_norm": 0.7421875, "learning_rate": 6.864140848819366e-06, "loss": 3.9716, "step": 16243 }, { "epoch": 5.411093528774881, "grad_norm": 0.765625, "learning_rate": 6.863334328347384e-06, "loss": 4.071, "step": 16244 }, { "epoch": 5.41142666777713, "grad_norm": 0.828125, "learning_rate": 6.862527815291181e-06, "loss": 3.9591, "step": 16245 }, { "epoch": 5.411759806779378, "grad_norm": 0.75, "learning_rate": 6.861721309660149e-06, "loss": 3.9874, "step": 16246 }, { "epoch": 5.412092945781628, "grad_norm": 0.76171875, "learning_rate": 6.860914811463677e-06, "loss": 3.9919, "step": 16247 }, { "epoch": 5.412426084783876, "grad_norm": 0.75390625, "learning_rate": 6.860108320711166e-06, "loss": 4.0084, "step": 16248 }, { "epoch": 5.412759223786125, "grad_norm": 0.75390625, "learning_rate": 6.859301837412005e-06, "loss": 3.9609, "step": 16249 }, { "epoch": 5.413092362788373, "grad_norm": 0.765625, "learning_rate": 6.85849536157559e-06, "loss": 4.0231, "step": 16250 }, { "epoch": 5.4134255017906225, "grad_norm": 0.74609375, "learning_rate": 6.857688893211315e-06, "loss": 3.9987, "step": 16251 }, { "epoch": 5.413758640792871, "grad_norm": 0.8046875, "learning_rate": 6.856882432328574e-06, "loss": 4.0158, "step": 16252 }, { "epoch": 5.414091779795119, "grad_norm": 0.78515625, "learning_rate": 6.8560759789367585e-06, "loss": 3.9899, "step": 16253 }, { "epoch": 5.414424918797368, "grad_norm": 0.8203125, "learning_rate": 6.855269533045264e-06, "loss": 3.9987, "step": 16254 }, { "epoch": 5.414758057799617, "grad_norm": 0.7421875, "learning_rate": 6.8544630946634855e-06, "loss": 3.9921, "step": 16255 }, { "epoch": 5.415091196801866, "grad_norm": 0.74609375, "learning_rate": 6.85365666380081e-06, "loss": 3.9525, "step": 16256 }, { "epoch": 5.415424335804114, "grad_norm": 0.7421875, "learning_rate": 6.8528502404666355e-06, "loss": 4.0322, "step": 16257 }, { "epoch": 5.415757474806363, "grad_norm": 0.765625, "learning_rate": 6.852043824670352e-06, "loss": 3.9321, "step": 16258 }, { "epoch": 5.416090613808612, "grad_norm": 0.76953125, "learning_rate": 6.851237416421357e-06, "loss": 4.0332, "step": 16259 }, { "epoch": 5.416423752810861, "grad_norm": 0.78515625, "learning_rate": 6.85043101572904e-06, "loss": 3.9672, "step": 16260 }, { "epoch": 5.416756891813109, "grad_norm": 0.765625, "learning_rate": 6.849624622602796e-06, "loss": 3.9485, "step": 16261 }, { "epoch": 5.417090030815357, "grad_norm": 0.76953125, "learning_rate": 6.848818237052014e-06, "loss": 4.0317, "step": 16262 }, { "epoch": 5.4174231698176065, "grad_norm": 0.7734375, "learning_rate": 6.848011859086091e-06, "loss": 4.0245, "step": 16263 }, { "epoch": 5.417756308819855, "grad_norm": 0.765625, "learning_rate": 6.847205488714419e-06, "loss": 4.0418, "step": 16264 }, { "epoch": 5.418089447822104, "grad_norm": 0.7265625, "learning_rate": 6.846399125946386e-06, "loss": 4.0018, "step": 16265 }, { "epoch": 5.418422586824352, "grad_norm": 0.73046875, "learning_rate": 6.84559277079139e-06, "loss": 4.0946, "step": 16266 }, { "epoch": 5.4187557258266015, "grad_norm": 0.83984375, "learning_rate": 6.844786423258818e-06, "loss": 3.9952, "step": 16267 }, { "epoch": 5.41908886482885, "grad_norm": 0.73046875, "learning_rate": 6.843980083358067e-06, "loss": 4.0005, "step": 16268 }, { "epoch": 5.419422003831098, "grad_norm": 0.78125, "learning_rate": 6.8431737510985245e-06, "loss": 4.0074, "step": 16269 }, { "epoch": 5.419755142833347, "grad_norm": 0.80078125, "learning_rate": 6.842367426489588e-06, "loss": 3.9635, "step": 16270 }, { "epoch": 5.4200882818355955, "grad_norm": 0.76171875, "learning_rate": 6.841561109540644e-06, "loss": 4.0297, "step": 16271 }, { "epoch": 5.420421420837845, "grad_norm": 0.72265625, "learning_rate": 6.84075480026109e-06, "loss": 4.0831, "step": 16272 }, { "epoch": 5.420754559840093, "grad_norm": 0.76953125, "learning_rate": 6.8399484986603154e-06, "loss": 3.9545, "step": 16273 }, { "epoch": 5.421087698842342, "grad_norm": 0.76953125, "learning_rate": 6.8391422047477095e-06, "loss": 3.9909, "step": 16274 }, { "epoch": 5.4214208378445905, "grad_norm": 0.7421875, "learning_rate": 6.838335918532664e-06, "loss": 3.9519, "step": 16275 }, { "epoch": 5.42175397684684, "grad_norm": 0.7265625, "learning_rate": 6.837529640024573e-06, "loss": 4.0449, "step": 16276 }, { "epoch": 5.422087115849088, "grad_norm": 0.76171875, "learning_rate": 6.836723369232826e-06, "loss": 3.9912, "step": 16277 }, { "epoch": 5.422420254851337, "grad_norm": 0.7890625, "learning_rate": 6.8359171061668164e-06, "loss": 3.9439, "step": 16278 }, { "epoch": 5.4227533938535855, "grad_norm": 0.8046875, "learning_rate": 6.835110850835932e-06, "loss": 3.9842, "step": 16279 }, { "epoch": 5.423086532855834, "grad_norm": 0.79296875, "learning_rate": 6.834304603249568e-06, "loss": 3.9834, "step": 16280 }, { "epoch": 5.423419671858083, "grad_norm": 0.7734375, "learning_rate": 6.833498363417112e-06, "loss": 3.9819, "step": 16281 }, { "epoch": 5.423752810860331, "grad_norm": 0.7109375, "learning_rate": 6.8326921313479605e-06, "loss": 4.032, "step": 16282 }, { "epoch": 5.42408594986258, "grad_norm": 0.75390625, "learning_rate": 6.831885907051497e-06, "loss": 3.987, "step": 16283 }, { "epoch": 5.424419088864829, "grad_norm": 0.73046875, "learning_rate": 6.831079690537115e-06, "loss": 4.0286, "step": 16284 }, { "epoch": 5.424752227867078, "grad_norm": 0.73828125, "learning_rate": 6.8302734818142065e-06, "loss": 4.0155, "step": 16285 }, { "epoch": 5.425085366869326, "grad_norm": 0.77734375, "learning_rate": 6.82946728089216e-06, "loss": 3.9823, "step": 16286 }, { "epoch": 5.4254185058715745, "grad_norm": 0.76953125, "learning_rate": 6.828661087780369e-06, "loss": 4.0415, "step": 16287 }, { "epoch": 5.425751644873824, "grad_norm": 0.734375, "learning_rate": 6.827854902488221e-06, "loss": 4.0399, "step": 16288 }, { "epoch": 5.426084783876072, "grad_norm": 0.79296875, "learning_rate": 6.827048725025109e-06, "loss": 4.0312, "step": 16289 }, { "epoch": 5.426417922878321, "grad_norm": 0.765625, "learning_rate": 6.826242555400419e-06, "loss": 4.0298, "step": 16290 }, { "epoch": 5.426751061880569, "grad_norm": 0.765625, "learning_rate": 6.8254363936235485e-06, "loss": 3.9712, "step": 16291 }, { "epoch": 5.427084200882819, "grad_norm": 0.76953125, "learning_rate": 6.824630239703881e-06, "loss": 3.9657, "step": 16292 }, { "epoch": 5.427417339885067, "grad_norm": 0.77734375, "learning_rate": 6.823824093650807e-06, "loss": 4.0068, "step": 16293 }, { "epoch": 5.427750478887316, "grad_norm": 0.7421875, "learning_rate": 6.823017955473718e-06, "loss": 3.9557, "step": 16294 }, { "epoch": 5.428083617889564, "grad_norm": 0.77734375, "learning_rate": 6.822211825182003e-06, "loss": 3.9688, "step": 16295 }, { "epoch": 5.428416756891814, "grad_norm": 0.80859375, "learning_rate": 6.821405702785054e-06, "loss": 4.0294, "step": 16296 }, { "epoch": 5.428749895894062, "grad_norm": 0.7734375, "learning_rate": 6.820599588292257e-06, "loss": 4.013, "step": 16297 }, { "epoch": 5.42908303489631, "grad_norm": 0.77734375, "learning_rate": 6.819793481713004e-06, "loss": 4.0457, "step": 16298 }, { "epoch": 5.429416173898559, "grad_norm": 0.7734375, "learning_rate": 6.8189873830566825e-06, "loss": 3.9056, "step": 16299 }, { "epoch": 5.429749312900808, "grad_norm": 0.75, "learning_rate": 6.818181292332686e-06, "loss": 4.0349, "step": 16300 }, { "epoch": 5.430082451903057, "grad_norm": 0.8125, "learning_rate": 6.8173752095504e-06, "loss": 3.9884, "step": 16301 }, { "epoch": 5.430415590905305, "grad_norm": 0.7421875, "learning_rate": 6.816569134719212e-06, "loss": 3.9696, "step": 16302 }, { "epoch": 5.430748729907554, "grad_norm": 0.79296875, "learning_rate": 6.8157630678485136e-06, "loss": 3.9677, "step": 16303 }, { "epoch": 5.431081868909803, "grad_norm": 0.81640625, "learning_rate": 6.814957008947693e-06, "loss": 4.0674, "step": 16304 }, { "epoch": 5.431415007912051, "grad_norm": 0.79296875, "learning_rate": 6.814150958026142e-06, "loss": 3.9897, "step": 16305 }, { "epoch": 5.4317481469143, "grad_norm": 0.77734375, "learning_rate": 6.813344915093243e-06, "loss": 3.9817, "step": 16306 }, { "epoch": 5.432081285916548, "grad_norm": 0.76953125, "learning_rate": 6.812538880158392e-06, "loss": 4.0546, "step": 16307 }, { "epoch": 5.432414424918798, "grad_norm": 0.75, "learning_rate": 6.8117328532309715e-06, "loss": 4.011, "step": 16308 }, { "epoch": 5.432747563921046, "grad_norm": 0.76171875, "learning_rate": 6.810926834320375e-06, "loss": 3.9534, "step": 16309 }, { "epoch": 5.433080702923295, "grad_norm": 0.765625, "learning_rate": 6.810120823435988e-06, "loss": 4.0486, "step": 16310 }, { "epoch": 5.433413841925543, "grad_norm": 0.7578125, "learning_rate": 6.809314820587197e-06, "loss": 3.96, "step": 16311 }, { "epoch": 5.4337469809277925, "grad_norm": 0.75390625, "learning_rate": 6.808508825783394e-06, "loss": 3.9822, "step": 16312 }, { "epoch": 5.434080119930041, "grad_norm": 0.77734375, "learning_rate": 6.807702839033964e-06, "loss": 3.9888, "step": 16313 }, { "epoch": 5.434413258932289, "grad_norm": 0.71875, "learning_rate": 6.8068968603482975e-06, "loss": 3.9613, "step": 16314 }, { "epoch": 5.434746397934538, "grad_norm": 0.73828125, "learning_rate": 6.8060908897357805e-06, "loss": 3.9668, "step": 16315 }, { "epoch": 5.435079536936787, "grad_norm": 0.73046875, "learning_rate": 6.805284927205803e-06, "loss": 3.9692, "step": 16316 }, { "epoch": 5.435412675939036, "grad_norm": 0.78515625, "learning_rate": 6.804478972767749e-06, "loss": 4.0154, "step": 16317 }, { "epoch": 5.435745814941284, "grad_norm": 0.73828125, "learning_rate": 6.803673026431013e-06, "loss": 3.9271, "step": 16318 }, { "epoch": 5.436078953943533, "grad_norm": 0.73046875, "learning_rate": 6.802867088204975e-06, "loss": 4.0466, "step": 16319 }, { "epoch": 5.4364120929457815, "grad_norm": 0.71875, "learning_rate": 6.802061158099025e-06, "loss": 3.9721, "step": 16320 }, { "epoch": 5.436745231948031, "grad_norm": 0.7734375, "learning_rate": 6.801255236122551e-06, "loss": 4.0627, "step": 16321 }, { "epoch": 5.437078370950279, "grad_norm": 0.76953125, "learning_rate": 6.80044932228494e-06, "loss": 3.9573, "step": 16322 }, { "epoch": 5.437411509952527, "grad_norm": 0.73046875, "learning_rate": 6.7996434165955785e-06, "loss": 4.0373, "step": 16323 }, { "epoch": 5.4377446489547765, "grad_norm": 0.7890625, "learning_rate": 6.798837519063856e-06, "loss": 3.8992, "step": 16324 }, { "epoch": 5.438077787957025, "grad_norm": 0.76953125, "learning_rate": 6.798031629699156e-06, "loss": 4.0285, "step": 16325 }, { "epoch": 5.438410926959274, "grad_norm": 0.7578125, "learning_rate": 6.797225748510868e-06, "loss": 3.9854, "step": 16326 }, { "epoch": 5.438744065961522, "grad_norm": 0.7578125, "learning_rate": 6.796419875508381e-06, "loss": 4.0136, "step": 16327 }, { "epoch": 5.4390772049637715, "grad_norm": 0.76171875, "learning_rate": 6.795614010701073e-06, "loss": 4.0191, "step": 16328 }, { "epoch": 5.43941034396602, "grad_norm": 0.76953125, "learning_rate": 6.794808154098339e-06, "loss": 4.0171, "step": 16329 }, { "epoch": 5.439743482968268, "grad_norm": 0.71484375, "learning_rate": 6.794002305709561e-06, "loss": 3.9546, "step": 16330 }, { "epoch": 5.440076621970517, "grad_norm": 0.75390625, "learning_rate": 6.793196465544128e-06, "loss": 4.0266, "step": 16331 }, { "epoch": 5.4404097609727655, "grad_norm": 0.73046875, "learning_rate": 6.7923906336114246e-06, "loss": 3.9675, "step": 16332 }, { "epoch": 5.440742899975015, "grad_norm": 0.73828125, "learning_rate": 6.791584809920838e-06, "loss": 4.0036, "step": 16333 }, { "epoch": 5.441076038977263, "grad_norm": 0.7421875, "learning_rate": 6.790778994481753e-06, "loss": 3.9255, "step": 16334 }, { "epoch": 5.441409177979512, "grad_norm": 0.78125, "learning_rate": 6.7899731873035584e-06, "loss": 3.994, "step": 16335 }, { "epoch": 5.4417423169817605, "grad_norm": 0.78515625, "learning_rate": 6.78916738839564e-06, "loss": 3.9888, "step": 16336 }, { "epoch": 5.44207545598401, "grad_norm": 0.7265625, "learning_rate": 6.7883615977673776e-06, "loss": 4.0649, "step": 16337 }, { "epoch": 5.442408594986258, "grad_norm": 0.72265625, "learning_rate": 6.787555815428163e-06, "loss": 4.0287, "step": 16338 }, { "epoch": 5.442741733988507, "grad_norm": 0.72265625, "learning_rate": 6.786750041387379e-06, "loss": 3.9477, "step": 16339 }, { "epoch": 5.443074872990755, "grad_norm": 0.76953125, "learning_rate": 6.785944275654414e-06, "loss": 3.9643, "step": 16340 }, { "epoch": 5.443408011993004, "grad_norm": 0.765625, "learning_rate": 6.785138518238649e-06, "loss": 4.0223, "step": 16341 }, { "epoch": 5.443741150995253, "grad_norm": 0.7578125, "learning_rate": 6.784332769149474e-06, "loss": 3.9965, "step": 16342 }, { "epoch": 5.444074289997501, "grad_norm": 0.67578125, "learning_rate": 6.7835270283962705e-06, "loss": 3.99, "step": 16343 }, { "epoch": 5.44440742899975, "grad_norm": 0.75390625, "learning_rate": 6.782721295988427e-06, "loss": 4.0269, "step": 16344 }, { "epoch": 5.444740568001999, "grad_norm": 0.74609375, "learning_rate": 6.781915571935329e-06, "loss": 4.0117, "step": 16345 }, { "epoch": 5.445073707004248, "grad_norm": 0.7890625, "learning_rate": 6.781109856246354e-06, "loss": 3.9512, "step": 16346 }, { "epoch": 5.445406846006496, "grad_norm": 0.73828125, "learning_rate": 6.7803041489308955e-06, "loss": 4.0068, "step": 16347 }, { "epoch": 5.4457399850087445, "grad_norm": 0.734375, "learning_rate": 6.7794984499983315e-06, "loss": 4.0368, "step": 16348 }, { "epoch": 5.446073124010994, "grad_norm": 0.78125, "learning_rate": 6.778692759458053e-06, "loss": 3.9406, "step": 16349 }, { "epoch": 5.446406263013242, "grad_norm": 0.75, "learning_rate": 6.777887077319439e-06, "loss": 3.9922, "step": 16350 }, { "epoch": 5.446739402015491, "grad_norm": 0.765625, "learning_rate": 6.777081403591879e-06, "loss": 4.0114, "step": 16351 }, { "epoch": 5.447072541017739, "grad_norm": 0.7734375, "learning_rate": 6.776275738284753e-06, "loss": 3.9933, "step": 16352 }, { "epoch": 5.447405680019989, "grad_norm": 0.77734375, "learning_rate": 6.77547008140745e-06, "loss": 3.9453, "step": 16353 }, { "epoch": 5.447738819022237, "grad_norm": 0.7421875, "learning_rate": 6.77466443296935e-06, "loss": 4.0328, "step": 16354 }, { "epoch": 5.448071958024486, "grad_norm": 0.8046875, "learning_rate": 6.773858792979836e-06, "loss": 3.972, "step": 16355 }, { "epoch": 5.448405097026734, "grad_norm": 0.73828125, "learning_rate": 6.773053161448297e-06, "loss": 3.95, "step": 16356 }, { "epoch": 5.4487382360289836, "grad_norm": 0.71875, "learning_rate": 6.772247538384111e-06, "loss": 4.0047, "step": 16357 }, { "epoch": 5.449071375031232, "grad_norm": 0.7578125, "learning_rate": 6.771441923796667e-06, "loss": 3.9988, "step": 16358 }, { "epoch": 5.44940451403348, "grad_norm": 0.734375, "learning_rate": 6.770636317695345e-06, "loss": 3.9946, "step": 16359 }, { "epoch": 5.449737653035729, "grad_norm": 0.7421875, "learning_rate": 6.769830720089532e-06, "loss": 4.0443, "step": 16360 }, { "epoch": 5.450070792037978, "grad_norm": 0.7734375, "learning_rate": 6.769025130988608e-06, "loss": 4.0186, "step": 16361 }, { "epoch": 5.450403931040227, "grad_norm": 0.73046875, "learning_rate": 6.76821955040196e-06, "loss": 4.0058, "step": 16362 }, { "epoch": 5.450737070042475, "grad_norm": 0.76171875, "learning_rate": 6.767413978338969e-06, "loss": 3.9624, "step": 16363 }, { "epoch": 5.451070209044724, "grad_norm": 0.72265625, "learning_rate": 6.766608414809016e-06, "loss": 4.0185, "step": 16364 }, { "epoch": 5.451403348046973, "grad_norm": 0.78515625, "learning_rate": 6.765802859821489e-06, "loss": 3.9433, "step": 16365 }, { "epoch": 5.451736487049221, "grad_norm": 0.8203125, "learning_rate": 6.764997313385768e-06, "loss": 3.889, "step": 16366 }, { "epoch": 5.45206962605147, "grad_norm": 0.7265625, "learning_rate": 6.764191775511233e-06, "loss": 3.9187, "step": 16367 }, { "epoch": 5.452402765053718, "grad_norm": 0.76953125, "learning_rate": 6.763386246207275e-06, "loss": 4.093, "step": 16368 }, { "epoch": 5.4527359040559675, "grad_norm": 0.765625, "learning_rate": 6.762580725483267e-06, "loss": 3.9537, "step": 16369 }, { "epoch": 5.453069043058216, "grad_norm": 0.78125, "learning_rate": 6.761775213348599e-06, "loss": 4.0099, "step": 16370 }, { "epoch": 5.453402182060465, "grad_norm": 0.7890625, "learning_rate": 6.760969709812652e-06, "loss": 3.9763, "step": 16371 }, { "epoch": 5.453735321062713, "grad_norm": 0.7421875, "learning_rate": 6.760164214884806e-06, "loss": 4.0044, "step": 16372 }, { "epoch": 5.4540684600649625, "grad_norm": 0.78515625, "learning_rate": 6.759358728574444e-06, "loss": 3.9609, "step": 16373 }, { "epoch": 5.454401599067211, "grad_norm": 0.75390625, "learning_rate": 6.758553250890947e-06, "loss": 3.9541, "step": 16374 }, { "epoch": 5.454734738069459, "grad_norm": 0.73046875, "learning_rate": 6.757747781843701e-06, "loss": 3.9911, "step": 16375 }, { "epoch": 5.455067877071708, "grad_norm": 0.75390625, "learning_rate": 6.756942321442083e-06, "loss": 3.9812, "step": 16376 }, { "epoch": 5.4554010160739566, "grad_norm": 0.77734375, "learning_rate": 6.75613686969548e-06, "loss": 3.9662, "step": 16377 }, { "epoch": 5.455734155076206, "grad_norm": 0.765625, "learning_rate": 6.7553314266132694e-06, "loss": 3.9504, "step": 16378 }, { "epoch": 5.456067294078454, "grad_norm": 0.7421875, "learning_rate": 6.754525992204837e-06, "loss": 4.0101, "step": 16379 }, { "epoch": 5.456400433080703, "grad_norm": 0.7578125, "learning_rate": 6.753720566479563e-06, "loss": 3.9541, "step": 16380 }, { "epoch": 5.4567335720829515, "grad_norm": 0.7265625, "learning_rate": 6.752915149446825e-06, "loss": 4.0484, "step": 16381 }, { "epoch": 5.457066711085201, "grad_norm": 0.73828125, "learning_rate": 6.752109741116008e-06, "loss": 3.9169, "step": 16382 }, { "epoch": 5.457399850087449, "grad_norm": 0.77734375, "learning_rate": 6.751304341496492e-06, "loss": 3.9192, "step": 16383 }, { "epoch": 5.457732989089697, "grad_norm": 0.73828125, "learning_rate": 6.75049895059766e-06, "loss": 4.0662, "step": 16384 }, { "epoch": 5.4580661280919465, "grad_norm": 0.74609375, "learning_rate": 6.74969356842889e-06, "loss": 4.0089, "step": 16385 }, { "epoch": 5.458399267094195, "grad_norm": 0.8046875, "learning_rate": 6.748888194999567e-06, "loss": 4.0178, "step": 16386 }, { "epoch": 5.458732406096444, "grad_norm": 0.7734375, "learning_rate": 6.7480828303190676e-06, "loss": 4.0683, "step": 16387 }, { "epoch": 5.459065545098692, "grad_norm": 0.75390625, "learning_rate": 6.7472774743967765e-06, "loss": 3.9867, "step": 16388 }, { "epoch": 5.459398684100941, "grad_norm": 0.77734375, "learning_rate": 6.746472127242074e-06, "loss": 4.0312, "step": 16389 }, { "epoch": 5.45973182310319, "grad_norm": 0.8125, "learning_rate": 6.745666788864336e-06, "loss": 3.9677, "step": 16390 }, { "epoch": 5.460064962105439, "grad_norm": 0.77734375, "learning_rate": 6.7448614592729475e-06, "loss": 3.9243, "step": 16391 }, { "epoch": 5.460398101107687, "grad_norm": 0.7734375, "learning_rate": 6.7440561384772856e-06, "loss": 3.97, "step": 16392 }, { "epoch": 5.4607312401099355, "grad_norm": 0.79296875, "learning_rate": 6.7432508264867335e-06, "loss": 4.0245, "step": 16393 }, { "epoch": 5.461064379112185, "grad_norm": 0.69921875, "learning_rate": 6.742445523310668e-06, "loss": 3.9425, "step": 16394 }, { "epoch": 5.461397518114433, "grad_norm": 0.78515625, "learning_rate": 6.741640228958474e-06, "loss": 4.0174, "step": 16395 }, { "epoch": 5.461730657116682, "grad_norm": 0.7734375, "learning_rate": 6.740834943439528e-06, "loss": 4.0423, "step": 16396 }, { "epoch": 5.4620637961189304, "grad_norm": 0.7578125, "learning_rate": 6.740029666763211e-06, "loss": 3.8765, "step": 16397 }, { "epoch": 5.46239693512118, "grad_norm": 0.7578125, "learning_rate": 6.739224398938903e-06, "loss": 3.9568, "step": 16398 }, { "epoch": 5.462730074123428, "grad_norm": 0.82421875, "learning_rate": 6.738419139975982e-06, "loss": 3.9733, "step": 16399 }, { "epoch": 5.463063213125677, "grad_norm": 0.7734375, "learning_rate": 6.737613889883828e-06, "loss": 3.9801, "step": 16400 }, { "epoch": 5.463396352127925, "grad_norm": 0.8125, "learning_rate": 6.73680864867182e-06, "loss": 3.9245, "step": 16401 }, { "epoch": 5.463729491130174, "grad_norm": 0.75390625, "learning_rate": 6.73600341634934e-06, "loss": 3.9145, "step": 16402 }, { "epoch": 5.464062630132423, "grad_norm": 0.77734375, "learning_rate": 6.7351981929257635e-06, "loss": 4.0121, "step": 16403 }, { "epoch": 5.464395769134671, "grad_norm": 0.765625, "learning_rate": 6.734392978410473e-06, "loss": 4.0214, "step": 16404 }, { "epoch": 5.46472890813692, "grad_norm": 0.75, "learning_rate": 6.733587772812845e-06, "loss": 4.037, "step": 16405 }, { "epoch": 5.465062047139169, "grad_norm": 0.7578125, "learning_rate": 6.7327825761422605e-06, "loss": 3.9882, "step": 16406 }, { "epoch": 5.465395186141418, "grad_norm": 0.7265625, "learning_rate": 6.7319773884080994e-06, "loss": 4.0295, "step": 16407 }, { "epoch": 5.465728325143666, "grad_norm": 0.73828125, "learning_rate": 6.7311722096197345e-06, "loss": 4.0235, "step": 16408 }, { "epoch": 5.466061464145914, "grad_norm": 0.7890625, "learning_rate": 6.730367039786549e-06, "loss": 3.9782, "step": 16409 }, { "epoch": 5.466394603148164, "grad_norm": 0.79296875, "learning_rate": 6.729561878917919e-06, "loss": 4.0303, "step": 16410 }, { "epoch": 5.466727742150412, "grad_norm": 0.7421875, "learning_rate": 6.728756727023227e-06, "loss": 4.0059, "step": 16411 }, { "epoch": 5.467060881152661, "grad_norm": 0.734375, "learning_rate": 6.727951584111847e-06, "loss": 3.8958, "step": 16412 }, { "epoch": 5.467394020154909, "grad_norm": 0.765625, "learning_rate": 6.727146450193159e-06, "loss": 3.9574, "step": 16413 }, { "epoch": 5.467727159157159, "grad_norm": 0.75, "learning_rate": 6.726341325276542e-06, "loss": 4.0327, "step": 16414 }, { "epoch": 5.468060298159407, "grad_norm": 0.75390625, "learning_rate": 6.725536209371371e-06, "loss": 3.9162, "step": 16415 }, { "epoch": 5.468393437161656, "grad_norm": 0.7734375, "learning_rate": 6.724731102487029e-06, "loss": 3.9794, "step": 16416 }, { "epoch": 5.468726576163904, "grad_norm": 0.76953125, "learning_rate": 6.723926004632889e-06, "loss": 4.0324, "step": 16417 }, { "epoch": 5.4690597151661535, "grad_norm": 0.75, "learning_rate": 6.723120915818328e-06, "loss": 3.9975, "step": 16418 }, { "epoch": 5.469392854168402, "grad_norm": 0.8046875, "learning_rate": 6.722315836052727e-06, "loss": 3.9512, "step": 16419 }, { "epoch": 5.46972599317065, "grad_norm": 0.78125, "learning_rate": 6.7215107653454615e-06, "loss": 3.9458, "step": 16420 }, { "epoch": 5.470059132172899, "grad_norm": 0.76171875, "learning_rate": 6.72070570370591e-06, "loss": 3.9559, "step": 16421 }, { "epoch": 5.470392271175148, "grad_norm": 0.765625, "learning_rate": 6.719900651143448e-06, "loss": 3.9873, "step": 16422 }, { "epoch": 5.470725410177397, "grad_norm": 0.796875, "learning_rate": 6.719095607667454e-06, "loss": 3.9761, "step": 16423 }, { "epoch": 5.471058549179645, "grad_norm": 0.73046875, "learning_rate": 6.718290573287304e-06, "loss": 3.9529, "step": 16424 }, { "epoch": 5.471391688181894, "grad_norm": 0.77734375, "learning_rate": 6.7174855480123795e-06, "loss": 3.8978, "step": 16425 }, { "epoch": 5.4717248271841425, "grad_norm": 0.8046875, "learning_rate": 6.716680531852052e-06, "loss": 3.9562, "step": 16426 }, { "epoch": 5.472057966186391, "grad_norm": 0.7890625, "learning_rate": 6.715875524815698e-06, "loss": 3.9962, "step": 16427 }, { "epoch": 5.47239110518864, "grad_norm": 0.74609375, "learning_rate": 6.715070526912696e-06, "loss": 3.9519, "step": 16428 }, { "epoch": 5.472724244190888, "grad_norm": 0.734375, "learning_rate": 6.714265538152421e-06, "loss": 4.0432, "step": 16429 }, { "epoch": 5.4730573831931375, "grad_norm": 0.7578125, "learning_rate": 6.713460558544253e-06, "loss": 3.9921, "step": 16430 }, { "epoch": 5.473390522195386, "grad_norm": 0.7578125, "learning_rate": 6.712655588097564e-06, "loss": 3.97, "step": 16431 }, { "epoch": 5.473723661197635, "grad_norm": 0.69921875, "learning_rate": 6.711850626821733e-06, "loss": 4.0627, "step": 16432 }, { "epoch": 5.474056800199883, "grad_norm": 0.7421875, "learning_rate": 6.711045674726134e-06, "loss": 4.0042, "step": 16433 }, { "epoch": 5.4743899392021325, "grad_norm": 0.77734375, "learning_rate": 6.710240731820147e-06, "loss": 4.0345, "step": 16434 }, { "epoch": 5.474723078204381, "grad_norm": 0.765625, "learning_rate": 6.709435798113143e-06, "loss": 3.9249, "step": 16435 }, { "epoch": 5.475056217206629, "grad_norm": 0.76171875, "learning_rate": 6.708630873614499e-06, "loss": 3.9869, "step": 16436 }, { "epoch": 5.475389356208878, "grad_norm": 0.8359375, "learning_rate": 6.707825958333592e-06, "loss": 3.9309, "step": 16437 }, { "epoch": 5.4757224952111265, "grad_norm": 0.78515625, "learning_rate": 6.707021052279795e-06, "loss": 4.0563, "step": 16438 }, { "epoch": 5.476055634213376, "grad_norm": 0.8046875, "learning_rate": 6.706216155462487e-06, "loss": 3.9756, "step": 16439 }, { "epoch": 5.476388773215624, "grad_norm": 0.765625, "learning_rate": 6.70541126789104e-06, "loss": 3.9888, "step": 16440 }, { "epoch": 5.476721912217873, "grad_norm": 0.75, "learning_rate": 6.7046063895748325e-06, "loss": 3.9827, "step": 16441 }, { "epoch": 5.4770550512201215, "grad_norm": 0.69921875, "learning_rate": 6.7038015205232365e-06, "loss": 4.0074, "step": 16442 }, { "epoch": 5.477388190222371, "grad_norm": 0.7734375, "learning_rate": 6.70299666074563e-06, "loss": 3.9564, "step": 16443 }, { "epoch": 5.477721329224619, "grad_norm": 0.80078125, "learning_rate": 6.702191810251386e-06, "loss": 4.0344, "step": 16444 }, { "epoch": 5.478054468226867, "grad_norm": 0.8203125, "learning_rate": 6.701386969049877e-06, "loss": 3.9887, "step": 16445 }, { "epoch": 5.478387607229116, "grad_norm": 0.75390625, "learning_rate": 6.700582137150481e-06, "loss": 4.0395, "step": 16446 }, { "epoch": 5.478720746231365, "grad_norm": 0.734375, "learning_rate": 6.699777314562571e-06, "loss": 3.8923, "step": 16447 }, { "epoch": 5.479053885233614, "grad_norm": 0.75390625, "learning_rate": 6.698972501295524e-06, "loss": 3.9513, "step": 16448 }, { "epoch": 5.479387024235862, "grad_norm": 0.76171875, "learning_rate": 6.698167697358709e-06, "loss": 3.9955, "step": 16449 }, { "epoch": 5.479720163238111, "grad_norm": 0.76953125, "learning_rate": 6.697362902761506e-06, "loss": 3.92, "step": 16450 }, { "epoch": 5.48005330224036, "grad_norm": 0.78125, "learning_rate": 6.696558117513286e-06, "loss": 3.961, "step": 16451 }, { "epoch": 5.480386441242609, "grad_norm": 0.79296875, "learning_rate": 6.695753341623425e-06, "loss": 3.9194, "step": 16452 }, { "epoch": 5.480719580244857, "grad_norm": 0.76953125, "learning_rate": 6.694948575101296e-06, "loss": 3.9922, "step": 16453 }, { "epoch": 5.4810527192471055, "grad_norm": 0.82421875, "learning_rate": 6.694143817956269e-06, "loss": 4.036, "step": 16454 }, { "epoch": 5.481385858249355, "grad_norm": 0.703125, "learning_rate": 6.693339070197723e-06, "loss": 3.954, "step": 16455 }, { "epoch": 5.481718997251603, "grad_norm": 0.734375, "learning_rate": 6.692534331835029e-06, "loss": 3.9605, "step": 16456 }, { "epoch": 5.482052136253852, "grad_norm": 0.76953125, "learning_rate": 6.691729602877562e-06, "loss": 3.9637, "step": 16457 }, { "epoch": 5.4823852752561, "grad_norm": 0.7890625, "learning_rate": 6.690924883334693e-06, "loss": 4.0131, "step": 16458 }, { "epoch": 5.48271841425835, "grad_norm": 0.72265625, "learning_rate": 6.690120173215798e-06, "loss": 4.022, "step": 16459 }, { "epoch": 5.483051553260598, "grad_norm": 0.74609375, "learning_rate": 6.689315472530249e-06, "loss": 3.9991, "step": 16460 }, { "epoch": 5.483384692262847, "grad_norm": 0.734375, "learning_rate": 6.688510781287421e-06, "loss": 3.9941, "step": 16461 }, { "epoch": 5.483717831265095, "grad_norm": 0.8046875, "learning_rate": 6.68770609949668e-06, "loss": 3.9055, "step": 16462 }, { "epoch": 5.484050970267344, "grad_norm": 0.74609375, "learning_rate": 6.686901427167406e-06, "loss": 3.9136, "step": 16463 }, { "epoch": 5.484384109269593, "grad_norm": 0.73046875, "learning_rate": 6.686096764308968e-06, "loss": 3.9765, "step": 16464 }, { "epoch": 5.484717248271841, "grad_norm": 0.75390625, "learning_rate": 6.685292110930741e-06, "loss": 4.0127, "step": 16465 }, { "epoch": 5.48505038727409, "grad_norm": 0.7890625, "learning_rate": 6.684487467042095e-06, "loss": 4.0066, "step": 16466 }, { "epoch": 5.485383526276339, "grad_norm": 0.78125, "learning_rate": 6.683682832652405e-06, "loss": 4.0041, "step": 16467 }, { "epoch": 5.485716665278588, "grad_norm": 0.75, "learning_rate": 6.68287820777104e-06, "loss": 3.9759, "step": 16468 }, { "epoch": 5.486049804280836, "grad_norm": 0.76171875, "learning_rate": 6.682073592407375e-06, "loss": 3.9879, "step": 16469 }, { "epoch": 5.486382943283084, "grad_norm": 0.74609375, "learning_rate": 6.681268986570783e-06, "loss": 4.0098, "step": 16470 }, { "epoch": 5.486716082285334, "grad_norm": 0.75, "learning_rate": 6.680464390270632e-06, "loss": 4.0514, "step": 16471 }, { "epoch": 5.487049221287582, "grad_norm": 0.73828125, "learning_rate": 6.679659803516296e-06, "loss": 4.0058, "step": 16472 }, { "epoch": 5.487382360289831, "grad_norm": 0.76953125, "learning_rate": 6.678855226317146e-06, "loss": 3.9738, "step": 16473 }, { "epoch": 5.487715499292079, "grad_norm": 0.80859375, "learning_rate": 6.678050658682554e-06, "loss": 4.1151, "step": 16474 }, { "epoch": 5.4880486382943285, "grad_norm": 0.7265625, "learning_rate": 6.67724610062189e-06, "loss": 3.9997, "step": 16475 }, { "epoch": 5.488381777296577, "grad_norm": 0.73046875, "learning_rate": 6.67644155214453e-06, "loss": 3.9946, "step": 16476 }, { "epoch": 5.488714916298826, "grad_norm": 0.75, "learning_rate": 6.67563701325984e-06, "loss": 3.9702, "step": 16477 }, { "epoch": 5.489048055301074, "grad_norm": 0.7421875, "learning_rate": 6.674832483977194e-06, "loss": 3.9671, "step": 16478 }, { "epoch": 5.4893811943033235, "grad_norm": 0.74609375, "learning_rate": 6.674027964305965e-06, "loss": 4.0033, "step": 16479 }, { "epoch": 5.489714333305572, "grad_norm": 0.76953125, "learning_rate": 6.673223454255517e-06, "loss": 4.0419, "step": 16480 }, { "epoch": 5.49004747230782, "grad_norm": 0.80078125, "learning_rate": 6.672418953835227e-06, "loss": 3.9644, "step": 16481 }, { "epoch": 5.490380611310069, "grad_norm": 0.75390625, "learning_rate": 6.671614463054462e-06, "loss": 4.0861, "step": 16482 }, { "epoch": 5.490713750312318, "grad_norm": 0.8125, "learning_rate": 6.670809981922595e-06, "loss": 4.0087, "step": 16483 }, { "epoch": 5.491046889314567, "grad_norm": 0.7578125, "learning_rate": 6.670005510448996e-06, "loss": 4.0186, "step": 16484 }, { "epoch": 5.491380028316815, "grad_norm": 0.765625, "learning_rate": 6.669201048643034e-06, "loss": 3.9892, "step": 16485 }, { "epoch": 5.491713167319064, "grad_norm": 0.7578125, "learning_rate": 6.6683965965140816e-06, "loss": 4.0304, "step": 16486 }, { "epoch": 5.4920463063213125, "grad_norm": 0.765625, "learning_rate": 6.667592154071507e-06, "loss": 3.9399, "step": 16487 }, { "epoch": 5.492379445323561, "grad_norm": 0.7421875, "learning_rate": 6.666787721324683e-06, "loss": 3.9736, "step": 16488 }, { "epoch": 5.49271258432581, "grad_norm": 0.78515625, "learning_rate": 6.665983298282975e-06, "loss": 3.9701, "step": 16489 }, { "epoch": 5.493045723328058, "grad_norm": 0.73828125, "learning_rate": 6.665178884955756e-06, "loss": 4.0012, "step": 16490 }, { "epoch": 5.4933788623303075, "grad_norm": 0.77734375, "learning_rate": 6.664374481352393e-06, "loss": 3.9262, "step": 16491 }, { "epoch": 5.493712001332556, "grad_norm": 0.76171875, "learning_rate": 6.663570087482259e-06, "loss": 3.9797, "step": 16492 }, { "epoch": 5.494045140334805, "grad_norm": 0.77734375, "learning_rate": 6.662765703354721e-06, "loss": 3.9488, "step": 16493 }, { "epoch": 5.494378279337053, "grad_norm": 0.75, "learning_rate": 6.661961328979149e-06, "loss": 4.0162, "step": 16494 }, { "epoch": 5.494711418339302, "grad_norm": 0.7734375, "learning_rate": 6.661156964364912e-06, "loss": 3.981, "step": 16495 }, { "epoch": 5.495044557341551, "grad_norm": 0.7578125, "learning_rate": 6.660352609521381e-06, "loss": 3.9777, "step": 16496 }, { "epoch": 5.4953776963438, "grad_norm": 0.74609375, "learning_rate": 6.659548264457925e-06, "loss": 3.9631, "step": 16497 }, { "epoch": 5.495710835346048, "grad_norm": 0.75390625, "learning_rate": 6.658743929183907e-06, "loss": 3.96, "step": 16498 }, { "epoch": 5.4960439743482965, "grad_norm": 0.76171875, "learning_rate": 6.657939603708703e-06, "loss": 3.984, "step": 16499 }, { "epoch": 5.496377113350546, "grad_norm": 0.78125, "learning_rate": 6.657135288041677e-06, "loss": 4.0492, "step": 16500 }, { "epoch": 5.496710252352794, "grad_norm": 0.6953125, "learning_rate": 6.6563309821922e-06, "loss": 3.9944, "step": 16501 }, { "epoch": 5.497043391355043, "grad_norm": 0.73046875, "learning_rate": 6.655526686169639e-06, "loss": 4.0527, "step": 16502 }, { "epoch": 5.4973765303572915, "grad_norm": 0.77734375, "learning_rate": 6.654722399983363e-06, "loss": 3.9278, "step": 16503 }, { "epoch": 5.497709669359541, "grad_norm": 0.75390625, "learning_rate": 6.65391812364274e-06, "loss": 4.0382, "step": 16504 }, { "epoch": 5.498042808361789, "grad_norm": 0.7890625, "learning_rate": 6.65311385715714e-06, "loss": 4.0344, "step": 16505 }, { "epoch": 5.498375947364037, "grad_norm": 0.765625, "learning_rate": 6.65230960053593e-06, "loss": 3.9658, "step": 16506 }, { "epoch": 5.498709086366286, "grad_norm": 0.76171875, "learning_rate": 6.651505353788476e-06, "loss": 3.9951, "step": 16507 }, { "epoch": 5.499042225368535, "grad_norm": 0.7578125, "learning_rate": 6.650701116924145e-06, "loss": 4.0465, "step": 16508 }, { "epoch": 5.499375364370784, "grad_norm": 0.765625, "learning_rate": 6.649896889952309e-06, "loss": 3.9692, "step": 16509 }, { "epoch": 5.499708503373032, "grad_norm": 0.7734375, "learning_rate": 6.64909267288233e-06, "loss": 3.9453, "step": 16510 }, { "epoch": 5.500041642375281, "grad_norm": 0.75, "learning_rate": 6.648288465723581e-06, "loss": 4.0115, "step": 16511 }, { "epoch": 5.50037478137753, "grad_norm": 0.78125, "learning_rate": 6.647484268485423e-06, "loss": 4.0843, "step": 16512 }, { "epoch": 5.500707920379779, "grad_norm": 0.77734375, "learning_rate": 6.64668008117723e-06, "loss": 4.0125, "step": 16513 }, { "epoch": 5.501041059382027, "grad_norm": 0.73046875, "learning_rate": 6.645875903808363e-06, "loss": 3.9637, "step": 16514 }, { "epoch": 5.501374198384276, "grad_norm": 0.7890625, "learning_rate": 6.645071736388196e-06, "loss": 3.9492, "step": 16515 }, { "epoch": 5.501707337386525, "grad_norm": 0.76953125, "learning_rate": 6.64426757892609e-06, "loss": 3.9593, "step": 16516 }, { "epoch": 5.502040476388773, "grad_norm": 0.7265625, "learning_rate": 6.643463431431411e-06, "loss": 4.0589, "step": 16517 }, { "epoch": 5.502373615391022, "grad_norm": 0.71484375, "learning_rate": 6.64265929391353e-06, "loss": 4.0709, "step": 16518 }, { "epoch": 5.50270675439327, "grad_norm": 0.734375, "learning_rate": 6.641855166381809e-06, "loss": 4.0676, "step": 16519 }, { "epoch": 5.50303989339552, "grad_norm": 0.74609375, "learning_rate": 6.641051048845618e-06, "loss": 3.9523, "step": 16520 }, { "epoch": 5.503373032397768, "grad_norm": 0.77734375, "learning_rate": 6.64024694131432e-06, "loss": 3.9858, "step": 16521 }, { "epoch": 5.503706171400017, "grad_norm": 0.765625, "learning_rate": 6.639442843797285e-06, "loss": 4.0763, "step": 16522 }, { "epoch": 5.504039310402265, "grad_norm": 0.79296875, "learning_rate": 6.6386387563038754e-06, "loss": 3.9866, "step": 16523 }, { "epoch": 5.504372449404514, "grad_norm": 0.78515625, "learning_rate": 6.637834678843462e-06, "loss": 3.9891, "step": 16524 }, { "epoch": 5.504705588406763, "grad_norm": 0.765625, "learning_rate": 6.637030611425405e-06, "loss": 4.0387, "step": 16525 }, { "epoch": 5.505038727409011, "grad_norm": 0.7578125, "learning_rate": 6.636226554059071e-06, "loss": 3.9987, "step": 16526 }, { "epoch": 5.50537186641126, "grad_norm": 0.74609375, "learning_rate": 6.635422506753828e-06, "loss": 4.021, "step": 16527 }, { "epoch": 5.505705005413509, "grad_norm": 0.7578125, "learning_rate": 6.634618469519039e-06, "loss": 3.9582, "step": 16528 }, { "epoch": 5.506038144415758, "grad_norm": 0.7421875, "learning_rate": 6.633814442364071e-06, "loss": 4.0311, "step": 16529 }, { "epoch": 5.506371283418006, "grad_norm": 0.74609375, "learning_rate": 6.633010425298288e-06, "loss": 4.0048, "step": 16530 }, { "epoch": 5.506704422420254, "grad_norm": 0.73828125, "learning_rate": 6.632206418331057e-06, "loss": 3.9968, "step": 16531 }, { "epoch": 5.507037561422504, "grad_norm": 0.80859375, "learning_rate": 6.63140242147174e-06, "loss": 3.9561, "step": 16532 }, { "epoch": 5.507370700424752, "grad_norm": 0.7734375, "learning_rate": 6.630598434729707e-06, "loss": 3.9594, "step": 16533 }, { "epoch": 5.507703839427001, "grad_norm": 0.7890625, "learning_rate": 6.629794458114316e-06, "loss": 3.9912, "step": 16534 }, { "epoch": 5.508036978429249, "grad_norm": 0.75390625, "learning_rate": 6.628990491634934e-06, "loss": 4.0921, "step": 16535 }, { "epoch": 5.5083701174314985, "grad_norm": 0.7734375, "learning_rate": 6.628186535300928e-06, "loss": 3.905, "step": 16536 }, { "epoch": 5.508703256433747, "grad_norm": 0.74609375, "learning_rate": 6.627382589121659e-06, "loss": 3.982, "step": 16537 }, { "epoch": 5.509036395435996, "grad_norm": 0.7890625, "learning_rate": 6.626578653106495e-06, "loss": 3.9722, "step": 16538 }, { "epoch": 5.509369534438244, "grad_norm": 0.734375, "learning_rate": 6.625774727264795e-06, "loss": 3.9436, "step": 16539 }, { "epoch": 5.5097026734404935, "grad_norm": 0.796875, "learning_rate": 6.624970811605928e-06, "loss": 3.9945, "step": 16540 }, { "epoch": 5.510035812442742, "grad_norm": 0.77734375, "learning_rate": 6.624166906139254e-06, "loss": 3.9697, "step": 16541 }, { "epoch": 5.51036895144499, "grad_norm": 0.72265625, "learning_rate": 6.623363010874142e-06, "loss": 3.9807, "step": 16542 }, { "epoch": 5.510702090447239, "grad_norm": 0.7578125, "learning_rate": 6.62255912581995e-06, "loss": 3.9656, "step": 16543 }, { "epoch": 5.5110352294494875, "grad_norm": 0.71875, "learning_rate": 6.621755250986042e-06, "loss": 4.0191, "step": 16544 }, { "epoch": 5.511368368451737, "grad_norm": 0.76171875, "learning_rate": 6.620951386381784e-06, "loss": 3.9762, "step": 16545 }, { "epoch": 5.511701507453985, "grad_norm": 0.75390625, "learning_rate": 6.620147532016539e-06, "loss": 4.0184, "step": 16546 }, { "epoch": 5.512034646456234, "grad_norm": 0.79296875, "learning_rate": 6.6193436878996685e-06, "loss": 4.0336, "step": 16547 }, { "epoch": 5.5123677854584825, "grad_norm": 0.74609375, "learning_rate": 6.618539854040537e-06, "loss": 4.0154, "step": 16548 }, { "epoch": 5.512700924460731, "grad_norm": 0.75, "learning_rate": 6.617736030448508e-06, "loss": 3.9872, "step": 16549 }, { "epoch": 5.51303406346298, "grad_norm": 0.78515625, "learning_rate": 6.6169322171329405e-06, "loss": 3.9894, "step": 16550 }, { "epoch": 5.513367202465228, "grad_norm": 0.734375, "learning_rate": 6.616128414103206e-06, "loss": 3.9813, "step": 16551 }, { "epoch": 5.5137003414674775, "grad_norm": 0.76953125, "learning_rate": 6.615324621368656e-06, "loss": 4.0163, "step": 16552 }, { "epoch": 5.514033480469726, "grad_norm": 0.74609375, "learning_rate": 6.61452083893866e-06, "loss": 4.0202, "step": 16553 }, { "epoch": 5.514366619471975, "grad_norm": 0.7890625, "learning_rate": 6.613717066822576e-06, "loss": 3.9782, "step": 16554 }, { "epoch": 5.514699758474223, "grad_norm": 0.7734375, "learning_rate": 6.61291330502977e-06, "loss": 4.0552, "step": 16555 }, { "epoch": 5.515032897476472, "grad_norm": 0.75, "learning_rate": 6.612109553569602e-06, "loss": 4.041, "step": 16556 }, { "epoch": 5.515366036478721, "grad_norm": 0.7578125, "learning_rate": 6.611305812451435e-06, "loss": 3.9627, "step": 16557 }, { "epoch": 5.51569917548097, "grad_norm": 0.76171875, "learning_rate": 6.610502081684629e-06, "loss": 4.0057, "step": 16558 }, { "epoch": 5.516032314483218, "grad_norm": 0.76953125, "learning_rate": 6.609698361278548e-06, "loss": 3.9863, "step": 16559 }, { "epoch": 5.5163654534854665, "grad_norm": 0.7734375, "learning_rate": 6.608894651242556e-06, "loss": 3.9277, "step": 16560 }, { "epoch": 5.516698592487716, "grad_norm": 0.78125, "learning_rate": 6.608090951586007e-06, "loss": 4.0266, "step": 16561 }, { "epoch": 5.517031731489964, "grad_norm": 0.77734375, "learning_rate": 6.6072872623182675e-06, "loss": 4.0322, "step": 16562 }, { "epoch": 5.517364870492213, "grad_norm": 0.75, "learning_rate": 6.606483583448697e-06, "loss": 3.9756, "step": 16563 }, { "epoch": 5.517698009494461, "grad_norm": 0.765625, "learning_rate": 6.6056799149866595e-06, "loss": 4.0668, "step": 16564 }, { "epoch": 5.518031148496711, "grad_norm": 0.73828125, "learning_rate": 6.604876256941511e-06, "loss": 3.9982, "step": 16565 }, { "epoch": 5.518364287498959, "grad_norm": 0.73828125, "learning_rate": 6.604072609322618e-06, "loss": 3.9716, "step": 16566 }, { "epoch": 5.518697426501207, "grad_norm": 0.8125, "learning_rate": 6.6032689721393365e-06, "loss": 4.0398, "step": 16567 }, { "epoch": 5.519030565503456, "grad_norm": 0.76953125, "learning_rate": 6.60246534540103e-06, "loss": 3.9798, "step": 16568 }, { "epoch": 5.519363704505705, "grad_norm": 0.79296875, "learning_rate": 6.601661729117061e-06, "loss": 3.9661, "step": 16569 }, { "epoch": 5.519696843507954, "grad_norm": 0.7578125, "learning_rate": 6.600858123296784e-06, "loss": 4.0241, "step": 16570 }, { "epoch": 5.520029982510202, "grad_norm": 0.75, "learning_rate": 6.600054527949563e-06, "loss": 4.0408, "step": 16571 }, { "epoch": 5.520363121512451, "grad_norm": 0.75390625, "learning_rate": 6.599250943084757e-06, "loss": 3.955, "step": 16572 }, { "epoch": 5.5206962605147, "grad_norm": 0.75390625, "learning_rate": 6.598447368711727e-06, "loss": 4.0177, "step": 16573 }, { "epoch": 5.521029399516949, "grad_norm": 0.7734375, "learning_rate": 6.597643804839831e-06, "loss": 3.9971, "step": 16574 }, { "epoch": 5.521362538519197, "grad_norm": 0.76171875, "learning_rate": 6.596840251478431e-06, "loss": 3.9542, "step": 16575 }, { "epoch": 5.521695677521446, "grad_norm": 0.8046875, "learning_rate": 6.596036708636885e-06, "loss": 3.9503, "step": 16576 }, { "epoch": 5.522028816523695, "grad_norm": 0.765625, "learning_rate": 6.595233176324555e-06, "loss": 3.9428, "step": 16577 }, { "epoch": 5.522361955525943, "grad_norm": 0.78515625, "learning_rate": 6.594429654550801e-06, "loss": 4.0212, "step": 16578 }, { "epoch": 5.522695094528192, "grad_norm": 0.76953125, "learning_rate": 6.593626143324976e-06, "loss": 3.9315, "step": 16579 }, { "epoch": 5.52302823353044, "grad_norm": 0.77734375, "learning_rate": 6.592822642656445e-06, "loss": 3.9385, "step": 16580 }, { "epoch": 5.5233613725326896, "grad_norm": 0.76953125, "learning_rate": 6.592019152554564e-06, "loss": 4.0163, "step": 16581 }, { "epoch": 5.523694511534938, "grad_norm": 0.70703125, "learning_rate": 6.591215673028695e-06, "loss": 4.0544, "step": 16582 }, { "epoch": 5.524027650537187, "grad_norm": 0.765625, "learning_rate": 6.5904122040881925e-06, "loss": 4.0254, "step": 16583 }, { "epoch": 5.524360789539435, "grad_norm": 0.7578125, "learning_rate": 6.5896087457424196e-06, "loss": 3.9962, "step": 16584 }, { "epoch": 5.524693928541684, "grad_norm": 0.84375, "learning_rate": 6.588805298000732e-06, "loss": 3.9315, "step": 16585 }, { "epoch": 5.525027067543933, "grad_norm": 0.765625, "learning_rate": 6.5880018608724895e-06, "loss": 3.9409, "step": 16586 }, { "epoch": 5.525360206546181, "grad_norm": 0.734375, "learning_rate": 6.587198434367052e-06, "loss": 3.9747, "step": 16587 }, { "epoch": 5.52569334554843, "grad_norm": 0.76953125, "learning_rate": 6.5863950184937724e-06, "loss": 4.0169, "step": 16588 }, { "epoch": 5.526026484550679, "grad_norm": 0.78125, "learning_rate": 6.5855916132620145e-06, "loss": 3.9907, "step": 16589 }, { "epoch": 5.526359623552928, "grad_norm": 0.77734375, "learning_rate": 6.584788218681131e-06, "loss": 3.9741, "step": 16590 }, { "epoch": 5.526692762555176, "grad_norm": 0.7890625, "learning_rate": 6.583984834760484e-06, "loss": 3.9528, "step": 16591 }, { "epoch": 5.527025901557424, "grad_norm": 0.75390625, "learning_rate": 6.583181461509428e-06, "loss": 3.991, "step": 16592 }, { "epoch": 5.5273590405596735, "grad_norm": 0.75390625, "learning_rate": 6.582378098937324e-06, "loss": 4.0196, "step": 16593 }, { "epoch": 5.527692179561922, "grad_norm": 0.82421875, "learning_rate": 6.581574747053526e-06, "loss": 3.9599, "step": 16594 }, { "epoch": 5.528025318564171, "grad_norm": 0.78515625, "learning_rate": 6.580771405867395e-06, "loss": 4.0534, "step": 16595 }, { "epoch": 5.528358457566419, "grad_norm": 0.76171875, "learning_rate": 6.5799680753882875e-06, "loss": 3.942, "step": 16596 }, { "epoch": 5.5286915965686685, "grad_norm": 0.765625, "learning_rate": 6.5791647556255565e-06, "loss": 4.0268, "step": 16597 }, { "epoch": 5.529024735570917, "grad_norm": 0.73046875, "learning_rate": 6.578361446588561e-06, "loss": 3.9773, "step": 16598 }, { "epoch": 5.529357874573166, "grad_norm": 0.73828125, "learning_rate": 6.577558148286659e-06, "loss": 3.9926, "step": 16599 }, { "epoch": 5.529691013575414, "grad_norm": 0.72265625, "learning_rate": 6.576754860729206e-06, "loss": 4.0042, "step": 16600 }, { "epoch": 5.530024152577663, "grad_norm": 0.7578125, "learning_rate": 6.575951583925561e-06, "loss": 4.0711, "step": 16601 }, { "epoch": 5.530357291579912, "grad_norm": 0.77734375, "learning_rate": 6.5751483178850755e-06, "loss": 3.9878, "step": 16602 }, { "epoch": 5.53069043058216, "grad_norm": 0.81640625, "learning_rate": 6.574345062617112e-06, "loss": 3.9808, "step": 16603 }, { "epoch": 5.531023569584409, "grad_norm": 0.73046875, "learning_rate": 6.573541818131021e-06, "loss": 4.0423, "step": 16604 }, { "epoch": 5.5313567085866575, "grad_norm": 0.765625, "learning_rate": 6.572738584436165e-06, "loss": 3.8885, "step": 16605 }, { "epoch": 5.531689847588907, "grad_norm": 0.76953125, "learning_rate": 6.571935361541893e-06, "loss": 4.0008, "step": 16606 }, { "epoch": 5.532022986591155, "grad_norm": 0.7578125, "learning_rate": 6.571132149457564e-06, "loss": 3.9562, "step": 16607 }, { "epoch": 5.532356125593404, "grad_norm": 0.76171875, "learning_rate": 6.570328948192534e-06, "loss": 3.9794, "step": 16608 }, { "epoch": 5.5326892645956525, "grad_norm": 0.80078125, "learning_rate": 6.569525757756156e-06, "loss": 3.9888, "step": 16609 }, { "epoch": 5.533022403597901, "grad_norm": 0.7421875, "learning_rate": 6.568722578157791e-06, "loss": 3.9593, "step": 16610 }, { "epoch": 5.53335554260015, "grad_norm": 0.765625, "learning_rate": 6.567919409406788e-06, "loss": 3.9807, "step": 16611 }, { "epoch": 5.533688681602398, "grad_norm": 0.73828125, "learning_rate": 6.5671162515125066e-06, "loss": 4.0357, "step": 16612 }, { "epoch": 5.534021820604647, "grad_norm": 0.75, "learning_rate": 6.566313104484303e-06, "loss": 3.9779, "step": 16613 }, { "epoch": 5.534354959606896, "grad_norm": 0.75, "learning_rate": 6.565509968331524e-06, "loss": 3.9659, "step": 16614 }, { "epoch": 5.534688098609145, "grad_norm": 0.72265625, "learning_rate": 6.564706843063533e-06, "loss": 4.0566, "step": 16615 }, { "epoch": 5.535021237611393, "grad_norm": 0.7734375, "learning_rate": 6.5639037286896796e-06, "loss": 4.0687, "step": 16616 }, { "epoch": 5.535354376613642, "grad_norm": 0.73828125, "learning_rate": 6.563100625219321e-06, "loss": 3.9858, "step": 16617 }, { "epoch": 5.535687515615891, "grad_norm": 0.78125, "learning_rate": 6.56229753266181e-06, "loss": 3.9536, "step": 16618 }, { "epoch": 5.53602065461814, "grad_norm": 0.734375, "learning_rate": 6.561494451026504e-06, "loss": 3.9404, "step": 16619 }, { "epoch": 5.536353793620388, "grad_norm": 0.78125, "learning_rate": 6.560691380322751e-06, "loss": 4.033, "step": 16620 }, { "epoch": 5.5366869326226364, "grad_norm": 0.7734375, "learning_rate": 6.559888320559913e-06, "loss": 4.0419, "step": 16621 }, { "epoch": 5.537020071624886, "grad_norm": 0.7890625, "learning_rate": 6.55908527174734e-06, "loss": 3.9847, "step": 16622 }, { "epoch": 5.537353210627134, "grad_norm": 0.75, "learning_rate": 6.558282233894383e-06, "loss": 3.9802, "step": 16623 }, { "epoch": 5.537686349629383, "grad_norm": 0.78125, "learning_rate": 6.557479207010399e-06, "loss": 3.9776, "step": 16624 }, { "epoch": 5.538019488631631, "grad_norm": 0.78515625, "learning_rate": 6.556676191104739e-06, "loss": 4.0137, "step": 16625 }, { "epoch": 5.538352627633881, "grad_norm": 0.74609375, "learning_rate": 6.555873186186761e-06, "loss": 4.0727, "step": 16626 }, { "epoch": 5.538685766636129, "grad_norm": 0.73828125, "learning_rate": 6.555070192265812e-06, "loss": 4.0, "step": 16627 }, { "epoch": 5.539018905638377, "grad_norm": 0.77734375, "learning_rate": 6.554267209351252e-06, "loss": 3.9834, "step": 16628 }, { "epoch": 5.539352044640626, "grad_norm": 0.76171875, "learning_rate": 6.553464237452428e-06, "loss": 4.0847, "step": 16629 }, { "epoch": 5.539685183642875, "grad_norm": 0.7734375, "learning_rate": 6.552661276578697e-06, "loss": 3.9712, "step": 16630 }, { "epoch": 5.540018322645124, "grad_norm": 0.75390625, "learning_rate": 6.551858326739413e-06, "loss": 3.9538, "step": 16631 }, { "epoch": 5.540351461647372, "grad_norm": 0.77734375, "learning_rate": 6.551055387943921e-06, "loss": 3.9994, "step": 16632 }, { "epoch": 5.540684600649621, "grad_norm": 0.78515625, "learning_rate": 6.550252460201581e-06, "loss": 3.892, "step": 16633 }, { "epoch": 5.54101773965187, "grad_norm": 0.7578125, "learning_rate": 6.54944954352174e-06, "loss": 4.0389, "step": 16634 }, { "epoch": 5.541350878654119, "grad_norm": 0.7421875, "learning_rate": 6.5486466379137545e-06, "loss": 4.0864, "step": 16635 }, { "epoch": 5.541684017656367, "grad_norm": 0.7265625, "learning_rate": 6.547843743386975e-06, "loss": 3.9463, "step": 16636 }, { "epoch": 5.542017156658616, "grad_norm": 0.7265625, "learning_rate": 6.547040859950754e-06, "loss": 4.0285, "step": 16637 }, { "epoch": 5.542350295660865, "grad_norm": 0.75, "learning_rate": 6.546237987614442e-06, "loss": 3.9577, "step": 16638 }, { "epoch": 5.542683434663113, "grad_norm": 0.79296875, "learning_rate": 6.545435126387392e-06, "loss": 3.9185, "step": 16639 }, { "epoch": 5.543016573665362, "grad_norm": 0.80078125, "learning_rate": 6.544632276278957e-06, "loss": 4.0258, "step": 16640 }, { "epoch": 5.54334971266761, "grad_norm": 0.7578125, "learning_rate": 6.543829437298487e-06, "loss": 3.963, "step": 16641 }, { "epoch": 5.5436828516698595, "grad_norm": 0.80078125, "learning_rate": 6.543026609455329e-06, "loss": 4.0592, "step": 16642 }, { "epoch": 5.544015990672108, "grad_norm": 0.7734375, "learning_rate": 6.542223792758841e-06, "loss": 3.9649, "step": 16643 }, { "epoch": 5.544349129674357, "grad_norm": 0.73046875, "learning_rate": 6.541420987218369e-06, "loss": 4.0209, "step": 16644 }, { "epoch": 5.544682268676605, "grad_norm": 0.765625, "learning_rate": 6.540618192843268e-06, "loss": 4.0517, "step": 16645 }, { "epoch": 5.545015407678854, "grad_norm": 0.78125, "learning_rate": 6.539815409642886e-06, "loss": 3.9924, "step": 16646 }, { "epoch": 5.545348546681103, "grad_norm": 0.765625, "learning_rate": 6.539012637626576e-06, "loss": 3.9946, "step": 16647 }, { "epoch": 5.545681685683351, "grad_norm": 0.77734375, "learning_rate": 6.538209876803685e-06, "loss": 3.9963, "step": 16648 }, { "epoch": 5.5460148246856, "grad_norm": 0.76171875, "learning_rate": 6.53740712718357e-06, "loss": 3.9791, "step": 16649 }, { "epoch": 5.5463479636878485, "grad_norm": 0.73828125, "learning_rate": 6.536604388775576e-06, "loss": 3.9441, "step": 16650 }, { "epoch": 5.546681102690098, "grad_norm": 0.734375, "learning_rate": 6.5358016615890514e-06, "loss": 3.9917, "step": 16651 }, { "epoch": 5.547014241692346, "grad_norm": 0.75, "learning_rate": 6.534998945633351e-06, "loss": 3.9672, "step": 16652 }, { "epoch": 5.547347380694594, "grad_norm": 0.76171875, "learning_rate": 6.534196240917821e-06, "loss": 3.9855, "step": 16653 }, { "epoch": 5.5476805196968435, "grad_norm": 0.7421875, "learning_rate": 6.533393547451814e-06, "loss": 4.0459, "step": 16654 }, { "epoch": 5.548013658699093, "grad_norm": 0.734375, "learning_rate": 6.532590865244677e-06, "loss": 3.9715, "step": 16655 }, { "epoch": 5.548346797701341, "grad_norm": 0.73828125, "learning_rate": 6.531788194305763e-06, "loss": 3.9432, "step": 16656 }, { "epoch": 5.548679936703589, "grad_norm": 0.72265625, "learning_rate": 6.530985534644417e-06, "loss": 3.9514, "step": 16657 }, { "epoch": 5.5490130757058385, "grad_norm": 0.77734375, "learning_rate": 6.530182886269995e-06, "loss": 3.9873, "step": 16658 }, { "epoch": 5.549346214708087, "grad_norm": 0.71484375, "learning_rate": 6.529380249191839e-06, "loss": 3.937, "step": 16659 }, { "epoch": 5.549679353710336, "grad_norm": 0.73046875, "learning_rate": 6.528577623419299e-06, "loss": 4.0634, "step": 16660 }, { "epoch": 5.550012492712584, "grad_norm": 0.76953125, "learning_rate": 6.527775008961729e-06, "loss": 3.9896, "step": 16661 }, { "epoch": 5.550345631714833, "grad_norm": 0.7578125, "learning_rate": 6.526972405828471e-06, "loss": 3.9672, "step": 16662 }, { "epoch": 5.550678770717082, "grad_norm": 0.76953125, "learning_rate": 6.52616981402888e-06, "loss": 3.9424, "step": 16663 }, { "epoch": 5.55101190971933, "grad_norm": 0.7421875, "learning_rate": 6.525367233572298e-06, "loss": 3.9973, "step": 16664 }, { "epoch": 5.551345048721579, "grad_norm": 0.72265625, "learning_rate": 6.52456466446808e-06, "loss": 4.0144, "step": 16665 }, { "epoch": 5.5516781877238275, "grad_norm": 0.7734375, "learning_rate": 6.523762106725568e-06, "loss": 4.0632, "step": 16666 }, { "epoch": 5.552011326726077, "grad_norm": 0.765625, "learning_rate": 6.522959560354116e-06, "loss": 3.9291, "step": 16667 }, { "epoch": 5.552344465728325, "grad_norm": 0.765625, "learning_rate": 6.522157025363068e-06, "loss": 3.9877, "step": 16668 }, { "epoch": 5.552677604730574, "grad_norm": 0.765625, "learning_rate": 6.521354501761771e-06, "loss": 3.9411, "step": 16669 }, { "epoch": 5.553010743732822, "grad_norm": 0.796875, "learning_rate": 6.520551989559576e-06, "loss": 3.946, "step": 16670 }, { "epoch": 5.553343882735071, "grad_norm": 0.7421875, "learning_rate": 6.5197494887658274e-06, "loss": 4.0329, "step": 16671 }, { "epoch": 5.55367702173732, "grad_norm": 0.73828125, "learning_rate": 6.518946999389875e-06, "loss": 3.9665, "step": 16672 }, { "epoch": 5.554010160739568, "grad_norm": 0.74609375, "learning_rate": 6.518144521441064e-06, "loss": 4.0182, "step": 16673 }, { "epoch": 5.554343299741817, "grad_norm": 0.74609375, "learning_rate": 6.517342054928743e-06, "loss": 3.9897, "step": 16674 }, { "epoch": 5.554676438744066, "grad_norm": 0.77734375, "learning_rate": 6.516539599862259e-06, "loss": 3.9261, "step": 16675 }, { "epoch": 5.555009577746315, "grad_norm": 0.75390625, "learning_rate": 6.5157371562509604e-06, "loss": 3.9071, "step": 16676 }, { "epoch": 5.555342716748563, "grad_norm": 0.7890625, "learning_rate": 6.514934724104191e-06, "loss": 4.0965, "step": 16677 }, { "epoch": 5.555675855750812, "grad_norm": 0.734375, "learning_rate": 6.5141323034312965e-06, "loss": 3.9438, "step": 16678 }, { "epoch": 5.556008994753061, "grad_norm": 0.76171875, "learning_rate": 6.5133298942416265e-06, "loss": 3.9852, "step": 16679 }, { "epoch": 5.55634213375531, "grad_norm": 0.74609375, "learning_rate": 6.512527496544525e-06, "loss": 4.0963, "step": 16680 }, { "epoch": 5.556675272757558, "grad_norm": 0.7578125, "learning_rate": 6.5117251103493415e-06, "loss": 4.0244, "step": 16681 }, { "epoch": 5.557008411759806, "grad_norm": 0.7890625, "learning_rate": 6.510922735665417e-06, "loss": 4.0189, "step": 16682 }, { "epoch": 5.557341550762056, "grad_norm": 0.74609375, "learning_rate": 6.510120372502103e-06, "loss": 4.0522, "step": 16683 }, { "epoch": 5.557674689764304, "grad_norm": 0.80078125, "learning_rate": 6.50931802086874e-06, "loss": 3.9521, "step": 16684 }, { "epoch": 5.558007828766553, "grad_norm": 0.78515625, "learning_rate": 6.508515680774682e-06, "loss": 3.9694, "step": 16685 }, { "epoch": 5.558340967768801, "grad_norm": 0.77734375, "learning_rate": 6.507713352229263e-06, "loss": 3.9635, "step": 16686 }, { "epoch": 5.558674106771051, "grad_norm": 0.80078125, "learning_rate": 6.506911035241836e-06, "loss": 3.9999, "step": 16687 }, { "epoch": 5.559007245773299, "grad_norm": 0.7734375, "learning_rate": 6.506108729821743e-06, "loss": 3.9882, "step": 16688 }, { "epoch": 5.559340384775547, "grad_norm": 0.765625, "learning_rate": 6.505306435978332e-06, "loss": 4.0345, "step": 16689 }, { "epoch": 5.559673523777796, "grad_norm": 0.71875, "learning_rate": 6.504504153720946e-06, "loss": 3.9405, "step": 16690 }, { "epoch": 5.560006662780045, "grad_norm": 0.78125, "learning_rate": 6.503701883058931e-06, "loss": 3.9881, "step": 16691 }, { "epoch": 5.560339801782294, "grad_norm": 0.7578125, "learning_rate": 6.502899624001629e-06, "loss": 3.9884, "step": 16692 }, { "epoch": 5.560672940784542, "grad_norm": 0.76171875, "learning_rate": 6.5020973765583885e-06, "loss": 3.957, "step": 16693 }, { "epoch": 5.561006079786791, "grad_norm": 0.7734375, "learning_rate": 6.501295140738555e-06, "loss": 3.8994, "step": 16694 }, { "epoch": 5.56133921878904, "grad_norm": 0.7578125, "learning_rate": 6.500492916551465e-06, "loss": 4.0418, "step": 16695 }, { "epoch": 5.561672357791289, "grad_norm": 0.82421875, "learning_rate": 6.4996907040064704e-06, "loss": 4.0221, "step": 16696 }, { "epoch": 5.562005496793537, "grad_norm": 0.77734375, "learning_rate": 6.49888850311291e-06, "loss": 4.0406, "step": 16697 }, { "epoch": 5.562338635795786, "grad_norm": 0.7265625, "learning_rate": 6.498086313880132e-06, "loss": 4.0041, "step": 16698 }, { "epoch": 5.5626717747980345, "grad_norm": 0.76953125, "learning_rate": 6.497284136317477e-06, "loss": 4.0284, "step": 16699 }, { "epoch": 5.563004913800283, "grad_norm": 0.75, "learning_rate": 6.496481970434292e-06, "loss": 3.9191, "step": 16700 }, { "epoch": 5.563338052802532, "grad_norm": 0.7890625, "learning_rate": 6.495679816239915e-06, "loss": 3.9526, "step": 16701 }, { "epoch": 5.56367119180478, "grad_norm": 0.79296875, "learning_rate": 6.494877673743697e-06, "loss": 3.9062, "step": 16702 }, { "epoch": 5.5640043308070295, "grad_norm": 0.80859375, "learning_rate": 6.494075542954977e-06, "loss": 4.0227, "step": 16703 }, { "epoch": 5.564337469809278, "grad_norm": 0.7421875, "learning_rate": 6.4932734238830945e-06, "loss": 4.039, "step": 16704 }, { "epoch": 5.564670608811527, "grad_norm": 0.79296875, "learning_rate": 6.492471316537398e-06, "loss": 3.9977, "step": 16705 }, { "epoch": 5.565003747813775, "grad_norm": 0.75390625, "learning_rate": 6.491669220927227e-06, "loss": 3.9456, "step": 16706 }, { "epoch": 5.565336886816024, "grad_norm": 0.75, "learning_rate": 6.490867137061927e-06, "loss": 3.9674, "step": 16707 }, { "epoch": 5.565670025818273, "grad_norm": 0.78125, "learning_rate": 6.490065064950838e-06, "loss": 4.013, "step": 16708 }, { "epoch": 5.566003164820521, "grad_norm": 0.72265625, "learning_rate": 6.489263004603304e-06, "loss": 3.9397, "step": 16709 }, { "epoch": 5.56633630382277, "grad_norm": 0.73828125, "learning_rate": 6.488460956028666e-06, "loss": 4.0722, "step": 16710 }, { "epoch": 5.5666694428250185, "grad_norm": 0.75, "learning_rate": 6.487658919236269e-06, "loss": 4.0024, "step": 16711 }, { "epoch": 5.567002581827268, "grad_norm": 0.80078125, "learning_rate": 6.486856894235453e-06, "loss": 3.9793, "step": 16712 }, { "epoch": 5.567335720829516, "grad_norm": 0.76953125, "learning_rate": 6.486054881035558e-06, "loss": 3.9784, "step": 16713 }, { "epoch": 5.567668859831765, "grad_norm": 0.73828125, "learning_rate": 6.485252879645928e-06, "loss": 3.9331, "step": 16714 }, { "epoch": 5.5680019988340135, "grad_norm": 0.7109375, "learning_rate": 6.4844508900759025e-06, "loss": 3.9968, "step": 16715 }, { "epoch": 5.568335137836263, "grad_norm": 0.76953125, "learning_rate": 6.483648912334826e-06, "loss": 4.0264, "step": 16716 }, { "epoch": 5.568668276838511, "grad_norm": 0.77734375, "learning_rate": 6.482846946432037e-06, "loss": 3.9888, "step": 16717 }, { "epoch": 5.569001415840759, "grad_norm": 0.73828125, "learning_rate": 6.48204499237688e-06, "loss": 4.0203, "step": 16718 }, { "epoch": 5.569334554843008, "grad_norm": 0.73828125, "learning_rate": 6.481243050178692e-06, "loss": 3.9224, "step": 16719 }, { "epoch": 5.569667693845257, "grad_norm": 0.7265625, "learning_rate": 6.480441119846816e-06, "loss": 3.9566, "step": 16720 }, { "epoch": 5.570000832847506, "grad_norm": 0.74609375, "learning_rate": 6.4796392013905965e-06, "loss": 4.0292, "step": 16721 }, { "epoch": 5.570333971849754, "grad_norm": 0.6953125, "learning_rate": 6.478837294819366e-06, "loss": 4.0212, "step": 16722 }, { "epoch": 5.570667110852003, "grad_norm": 0.79296875, "learning_rate": 6.47803540014247e-06, "loss": 4.0396, "step": 16723 }, { "epoch": 5.571000249854252, "grad_norm": 0.7890625, "learning_rate": 6.477233517369248e-06, "loss": 3.9714, "step": 16724 }, { "epoch": 5.5713333888565, "grad_norm": 0.7734375, "learning_rate": 6.476431646509041e-06, "loss": 4.0368, "step": 16725 }, { "epoch": 5.571666527858749, "grad_norm": 0.734375, "learning_rate": 6.475629787571186e-06, "loss": 4.0326, "step": 16726 }, { "epoch": 5.5719996668609975, "grad_norm": 0.8046875, "learning_rate": 6.474827940565026e-06, "loss": 3.9364, "step": 16727 }, { "epoch": 5.572332805863247, "grad_norm": 0.765625, "learning_rate": 6.4740261054999005e-06, "loss": 3.9185, "step": 16728 }, { "epoch": 5.572665944865495, "grad_norm": 0.703125, "learning_rate": 6.47322428238515e-06, "loss": 4.0305, "step": 16729 }, { "epoch": 5.572999083867744, "grad_norm": 0.75390625, "learning_rate": 6.472422471230113e-06, "loss": 3.9973, "step": 16730 }, { "epoch": 5.573332222869992, "grad_norm": 0.734375, "learning_rate": 6.471620672044128e-06, "loss": 3.9497, "step": 16731 }, { "epoch": 5.573665361872241, "grad_norm": 0.7734375, "learning_rate": 6.470818884836533e-06, "loss": 3.9622, "step": 16732 }, { "epoch": 5.57399850087449, "grad_norm": 0.76953125, "learning_rate": 6.4700171096166705e-06, "loss": 3.9228, "step": 16733 }, { "epoch": 5.574331639876738, "grad_norm": 0.7421875, "learning_rate": 6.469215346393876e-06, "loss": 4.0283, "step": 16734 }, { "epoch": 5.574664778878987, "grad_norm": 0.75390625, "learning_rate": 6.468413595177491e-06, "loss": 3.9313, "step": 16735 }, { "epoch": 5.574997917881236, "grad_norm": 0.77734375, "learning_rate": 6.467611855976852e-06, "loss": 4.044, "step": 16736 }, { "epoch": 5.575331056883485, "grad_norm": 0.8046875, "learning_rate": 6.466810128801301e-06, "loss": 4.0857, "step": 16737 }, { "epoch": 5.575664195885733, "grad_norm": 0.76171875, "learning_rate": 6.466008413660171e-06, "loss": 4.0127, "step": 16738 }, { "epoch": 5.575997334887982, "grad_norm": 0.7421875, "learning_rate": 6.465206710562808e-06, "loss": 3.958, "step": 16739 }, { "epoch": 5.576330473890231, "grad_norm": 0.7109375, "learning_rate": 6.4644050195185425e-06, "loss": 4.0442, "step": 16740 }, { "epoch": 5.57666361289248, "grad_norm": 0.7578125, "learning_rate": 6.463603340536714e-06, "loss": 4.0197, "step": 16741 }, { "epoch": 5.576996751894728, "grad_norm": 0.73828125, "learning_rate": 6.4628016736266644e-06, "loss": 3.911, "step": 16742 }, { "epoch": 5.577329890896976, "grad_norm": 0.7265625, "learning_rate": 6.462000018797726e-06, "loss": 3.9632, "step": 16743 }, { "epoch": 5.577663029899226, "grad_norm": 0.78515625, "learning_rate": 6.4611983760592424e-06, "loss": 3.9176, "step": 16744 }, { "epoch": 5.577996168901474, "grad_norm": 0.7734375, "learning_rate": 6.460396745420545e-06, "loss": 3.9095, "step": 16745 }, { "epoch": 5.578329307903723, "grad_norm": 0.76953125, "learning_rate": 6.459595126890975e-06, "loss": 3.9438, "step": 16746 }, { "epoch": 5.578662446905971, "grad_norm": 0.75, "learning_rate": 6.458793520479866e-06, "loss": 3.9861, "step": 16747 }, { "epoch": 5.5789955859082205, "grad_norm": 0.796875, "learning_rate": 6.457991926196563e-06, "loss": 4.0254, "step": 16748 }, { "epoch": 5.579328724910469, "grad_norm": 0.78125, "learning_rate": 6.457190344050393e-06, "loss": 3.9783, "step": 16749 }, { "epoch": 5.579661863912717, "grad_norm": 0.76171875, "learning_rate": 6.4563887740506965e-06, "loss": 4.0114, "step": 16750 }, { "epoch": 5.579995002914966, "grad_norm": 0.72265625, "learning_rate": 6.4555872162068114e-06, "loss": 3.972, "step": 16751 }, { "epoch": 5.580328141917215, "grad_norm": 0.77734375, "learning_rate": 6.454785670528072e-06, "loss": 4.0187, "step": 16752 }, { "epoch": 5.580661280919464, "grad_norm": 0.75, "learning_rate": 6.453984137023816e-06, "loss": 4.0584, "step": 16753 }, { "epoch": 5.580994419921712, "grad_norm": 0.7578125, "learning_rate": 6.453182615703378e-06, "loss": 3.9529, "step": 16754 }, { "epoch": 5.581327558923961, "grad_norm": 0.72265625, "learning_rate": 6.452381106576098e-06, "loss": 3.9579, "step": 16755 }, { "epoch": 5.58166069792621, "grad_norm": 0.74609375, "learning_rate": 6.451579609651306e-06, "loss": 3.9263, "step": 16756 }, { "epoch": 5.581993836928459, "grad_norm": 0.7578125, "learning_rate": 6.450778124938344e-06, "loss": 3.9973, "step": 16757 }, { "epoch": 5.582326975930707, "grad_norm": 0.70703125, "learning_rate": 6.449976652446543e-06, "loss": 4.0493, "step": 16758 }, { "epoch": 5.582660114932956, "grad_norm": 0.75390625, "learning_rate": 6.449175192185238e-06, "loss": 3.9809, "step": 16759 }, { "epoch": 5.5829932539352045, "grad_norm": 0.7890625, "learning_rate": 6.448373744163767e-06, "loss": 4.0061, "step": 16760 }, { "epoch": 5.583326392937453, "grad_norm": 0.75, "learning_rate": 6.447572308391463e-06, "loss": 3.9009, "step": 16761 }, { "epoch": 5.583659531939702, "grad_norm": 0.7578125, "learning_rate": 6.446770884877664e-06, "loss": 4.0139, "step": 16762 }, { "epoch": 5.58399267094195, "grad_norm": 0.73828125, "learning_rate": 6.4459694736317006e-06, "loss": 3.9803, "step": 16763 }, { "epoch": 5.5843258099441995, "grad_norm": 0.72265625, "learning_rate": 6.445168074662912e-06, "loss": 4.0893, "step": 16764 }, { "epoch": 5.584658948946448, "grad_norm": 0.76171875, "learning_rate": 6.444366687980628e-06, "loss": 3.9439, "step": 16765 }, { "epoch": 5.584992087948697, "grad_norm": 0.73828125, "learning_rate": 6.44356531359419e-06, "loss": 4.0865, "step": 16766 }, { "epoch": 5.585325226950945, "grad_norm": 0.73828125, "learning_rate": 6.442763951512926e-06, "loss": 3.9796, "step": 16767 }, { "epoch": 5.5856583659531935, "grad_norm": 0.796875, "learning_rate": 6.44196260174617e-06, "loss": 3.9425, "step": 16768 }, { "epoch": 5.585991504955443, "grad_norm": 0.79296875, "learning_rate": 6.441161264303259e-06, "loss": 4.0087, "step": 16769 }, { "epoch": 5.586324643957691, "grad_norm": 0.7421875, "learning_rate": 6.440359939193525e-06, "loss": 3.9982, "step": 16770 }, { "epoch": 5.58665778295994, "grad_norm": 0.71875, "learning_rate": 6.439558626426304e-06, "loss": 4.0101, "step": 16771 }, { "epoch": 5.5869909219621885, "grad_norm": 0.7421875, "learning_rate": 6.438757326010926e-06, "loss": 3.9905, "step": 16772 }, { "epoch": 5.587324060964438, "grad_norm": 0.77734375, "learning_rate": 6.43795603795673e-06, "loss": 4.0218, "step": 16773 }, { "epoch": 5.587657199966686, "grad_norm": 0.74609375, "learning_rate": 6.437154762273043e-06, "loss": 3.9625, "step": 16774 }, { "epoch": 5.587990338968935, "grad_norm": 0.734375, "learning_rate": 6.436353498969205e-06, "loss": 3.9837, "step": 16775 }, { "epoch": 5.5883234779711835, "grad_norm": 0.76953125, "learning_rate": 6.435552248054541e-06, "loss": 4.0021, "step": 16776 }, { "epoch": 5.588656616973433, "grad_norm": 0.76953125, "learning_rate": 6.434751009538389e-06, "loss": 4.0027, "step": 16777 }, { "epoch": 5.588989755975681, "grad_norm": 0.765625, "learning_rate": 6.433949783430078e-06, "loss": 4.0059, "step": 16778 }, { "epoch": 5.589322894977929, "grad_norm": 0.7421875, "learning_rate": 6.433148569738946e-06, "loss": 3.9683, "step": 16779 }, { "epoch": 5.589656033980178, "grad_norm": 0.75, "learning_rate": 6.432347368474319e-06, "loss": 3.9041, "step": 16780 }, { "epoch": 5.589989172982427, "grad_norm": 0.71875, "learning_rate": 6.431546179645536e-06, "loss": 4.0516, "step": 16781 }, { "epoch": 5.590322311984676, "grad_norm": 0.77734375, "learning_rate": 6.430745003261923e-06, "loss": 3.9328, "step": 16782 }, { "epoch": 5.590655450986924, "grad_norm": 0.7421875, "learning_rate": 6.4299438393328165e-06, "loss": 4.0271, "step": 16783 }, { "epoch": 5.590988589989173, "grad_norm": 0.77734375, "learning_rate": 6.4291426878675484e-06, "loss": 4.0488, "step": 16784 }, { "epoch": 5.591321728991422, "grad_norm": 0.734375, "learning_rate": 6.428341548875445e-06, "loss": 4.0154, "step": 16785 }, { "epoch": 5.59165486799367, "grad_norm": 0.7578125, "learning_rate": 6.427540422365844e-06, "loss": 3.9878, "step": 16786 }, { "epoch": 5.591988006995919, "grad_norm": 0.7890625, "learning_rate": 6.426739308348071e-06, "loss": 4.0001, "step": 16787 }, { "epoch": 5.592321145998167, "grad_norm": 0.76953125, "learning_rate": 6.425938206831463e-06, "loss": 3.9687, "step": 16788 }, { "epoch": 5.592654285000417, "grad_norm": 0.80078125, "learning_rate": 6.425137117825347e-06, "loss": 4.0118, "step": 16789 }, { "epoch": 5.592987424002665, "grad_norm": 0.76953125, "learning_rate": 6.4243360413390565e-06, "loss": 4.0349, "step": 16790 }, { "epoch": 5.593320563004914, "grad_norm": 0.7578125, "learning_rate": 6.423534977381919e-06, "loss": 3.9607, "step": 16791 }, { "epoch": 5.593653702007162, "grad_norm": 0.81640625, "learning_rate": 6.422733925963271e-06, "loss": 3.8905, "step": 16792 }, { "epoch": 5.593986841009411, "grad_norm": 0.74609375, "learning_rate": 6.421932887092439e-06, "loss": 3.996, "step": 16793 }, { "epoch": 5.59431998001166, "grad_norm": 0.7421875, "learning_rate": 6.421131860778752e-06, "loss": 3.9968, "step": 16794 }, { "epoch": 5.594653119013909, "grad_norm": 0.72265625, "learning_rate": 6.420330847031543e-06, "loss": 3.9271, "step": 16795 }, { "epoch": 5.594986258016157, "grad_norm": 0.7734375, "learning_rate": 6.419529845860139e-06, "loss": 3.9234, "step": 16796 }, { "epoch": 5.595319397018406, "grad_norm": 0.75390625, "learning_rate": 6.4187288572738746e-06, "loss": 3.9772, "step": 16797 }, { "epoch": 5.595652536020655, "grad_norm": 0.75390625, "learning_rate": 6.4179278812820745e-06, "loss": 3.9632, "step": 16798 }, { "epoch": 5.595985675022903, "grad_norm": 0.78515625, "learning_rate": 6.417126917894072e-06, "loss": 4.0029, "step": 16799 }, { "epoch": 5.596318814025152, "grad_norm": 0.7421875, "learning_rate": 6.416325967119195e-06, "loss": 3.9434, "step": 16800 }, { "epoch": 5.596651953027401, "grad_norm": 0.734375, "learning_rate": 6.415525028966775e-06, "loss": 4.0309, "step": 16801 }, { "epoch": 5.59698509202965, "grad_norm": 0.76953125, "learning_rate": 6.41472410344614e-06, "loss": 3.9969, "step": 16802 }, { "epoch": 5.597318231031898, "grad_norm": 0.79296875, "learning_rate": 6.413923190566616e-06, "loss": 3.9686, "step": 16803 }, { "epoch": 5.597651370034146, "grad_norm": 0.71875, "learning_rate": 6.413122290337536e-06, "loss": 4.0496, "step": 16804 }, { "epoch": 5.5979845090363956, "grad_norm": 0.796875, "learning_rate": 6.412321402768225e-06, "loss": 3.8671, "step": 16805 }, { "epoch": 5.598317648038644, "grad_norm": 0.78515625, "learning_rate": 6.411520527868016e-06, "loss": 3.9616, "step": 16806 }, { "epoch": 5.598650787040893, "grad_norm": 0.78515625, "learning_rate": 6.410719665646233e-06, "loss": 3.9678, "step": 16807 }, { "epoch": 5.598983926043141, "grad_norm": 0.77734375, "learning_rate": 6.409918816112208e-06, "loss": 3.9824, "step": 16808 }, { "epoch": 5.5993170650453905, "grad_norm": 0.79296875, "learning_rate": 6.4091179792752665e-06, "loss": 4.0243, "step": 16809 }, { "epoch": 5.599650204047639, "grad_norm": 1.265625, "learning_rate": 6.408317155144739e-06, "loss": 3.9992, "step": 16810 }, { "epoch": 5.599983343049887, "grad_norm": 0.75390625, "learning_rate": 6.407516343729954e-06, "loss": 4.0396, "step": 16811 }, { "epoch": 5.600316482052136, "grad_norm": 0.8125, "learning_rate": 6.406715545040234e-06, "loss": 3.9737, "step": 16812 }, { "epoch": 5.600649621054385, "grad_norm": 0.74609375, "learning_rate": 6.405914759084911e-06, "loss": 3.9687, "step": 16813 }, { "epoch": 5.600982760056634, "grad_norm": 0.7890625, "learning_rate": 6.4051139858733106e-06, "loss": 4.0228, "step": 16814 }, { "epoch": 5.601315899058882, "grad_norm": 0.78125, "learning_rate": 6.404313225414761e-06, "loss": 4.0493, "step": 16815 }, { "epoch": 5.601649038061131, "grad_norm": 0.71875, "learning_rate": 6.403512477718589e-06, "loss": 4.0264, "step": 16816 }, { "epoch": 5.6019821770633795, "grad_norm": 0.72265625, "learning_rate": 6.402711742794123e-06, "loss": 3.9335, "step": 16817 }, { "epoch": 5.602315316065629, "grad_norm": 0.78125, "learning_rate": 6.401911020650686e-06, "loss": 3.995, "step": 16818 }, { "epoch": 5.602648455067877, "grad_norm": 0.80859375, "learning_rate": 6.4011103112976095e-06, "loss": 4.0108, "step": 16819 }, { "epoch": 5.602981594070126, "grad_norm": 0.75, "learning_rate": 6.400309614744218e-06, "loss": 3.9607, "step": 16820 }, { "epoch": 5.6033147330723745, "grad_norm": 0.7890625, "learning_rate": 6.399508930999838e-06, "loss": 3.9291, "step": 16821 }, { "epoch": 5.603647872074623, "grad_norm": 0.7734375, "learning_rate": 6.398708260073792e-06, "loss": 3.9776, "step": 16822 }, { "epoch": 5.603981011076872, "grad_norm": 0.76171875, "learning_rate": 6.397907601975412e-06, "loss": 4.0067, "step": 16823 }, { "epoch": 5.60431415007912, "grad_norm": 0.79296875, "learning_rate": 6.397106956714019e-06, "loss": 3.9631, "step": 16824 }, { "epoch": 5.604647289081369, "grad_norm": 0.73046875, "learning_rate": 6.396306324298942e-06, "loss": 4.0102, "step": 16825 }, { "epoch": 5.604980428083618, "grad_norm": 0.76171875, "learning_rate": 6.395505704739506e-06, "loss": 4.0029, "step": 16826 }, { "epoch": 5.605313567085867, "grad_norm": 0.7421875, "learning_rate": 6.394705098045037e-06, "loss": 3.9061, "step": 16827 }, { "epoch": 5.605646706088115, "grad_norm": 0.77734375, "learning_rate": 6.393904504224858e-06, "loss": 3.9605, "step": 16828 }, { "epoch": 5.6059798450903635, "grad_norm": 0.75390625, "learning_rate": 6.3931039232882984e-06, "loss": 3.9965, "step": 16829 }, { "epoch": 5.606312984092613, "grad_norm": 0.8046875, "learning_rate": 6.39230335524468e-06, "loss": 3.9411, "step": 16830 }, { "epoch": 5.606646123094861, "grad_norm": 0.7890625, "learning_rate": 6.391502800103325e-06, "loss": 3.9489, "step": 16831 }, { "epoch": 5.60697926209711, "grad_norm": 0.75, "learning_rate": 6.390702257873564e-06, "loss": 3.9313, "step": 16832 }, { "epoch": 5.6073124010993585, "grad_norm": 0.8046875, "learning_rate": 6.389901728564718e-06, "loss": 3.9775, "step": 16833 }, { "epoch": 5.607645540101608, "grad_norm": 0.8671875, "learning_rate": 6.389101212186113e-06, "loss": 3.9312, "step": 16834 }, { "epoch": 5.607978679103856, "grad_norm": 0.765625, "learning_rate": 6.388300708747072e-06, "loss": 4.0097, "step": 16835 }, { "epoch": 5.608311818106105, "grad_norm": 0.76171875, "learning_rate": 6.38750021825692e-06, "loss": 3.9592, "step": 16836 }, { "epoch": 5.608644957108353, "grad_norm": 0.765625, "learning_rate": 6.38669974072498e-06, "loss": 3.9584, "step": 16837 }, { "epoch": 5.608978096110603, "grad_norm": 0.76953125, "learning_rate": 6.38589927616058e-06, "loss": 4.0035, "step": 16838 }, { "epoch": 5.609311235112851, "grad_norm": 0.7734375, "learning_rate": 6.385098824573038e-06, "loss": 4.0204, "step": 16839 }, { "epoch": 5.609644374115099, "grad_norm": 0.7421875, "learning_rate": 6.384298385971679e-06, "loss": 3.9792, "step": 16840 }, { "epoch": 5.609977513117348, "grad_norm": 0.80859375, "learning_rate": 6.383497960365829e-06, "loss": 3.9803, "step": 16841 }, { "epoch": 5.610310652119597, "grad_norm": 0.859375, "learning_rate": 6.382697547764808e-06, "loss": 4.0277, "step": 16842 }, { "epoch": 5.610643791121846, "grad_norm": 0.7890625, "learning_rate": 6.3818971481779405e-06, "loss": 4.0065, "step": 16843 }, { "epoch": 5.610976930124094, "grad_norm": 0.765625, "learning_rate": 6.381096761614549e-06, "loss": 3.968, "step": 16844 }, { "epoch": 5.611310069126343, "grad_norm": 0.7421875, "learning_rate": 6.380296388083958e-06, "loss": 4.0272, "step": 16845 }, { "epoch": 5.611643208128592, "grad_norm": 0.80859375, "learning_rate": 6.379496027595487e-06, "loss": 3.966, "step": 16846 }, { "epoch": 5.61197634713084, "grad_norm": 0.78125, "learning_rate": 6.378695680158464e-06, "loss": 3.9655, "step": 16847 }, { "epoch": 5.612309486133089, "grad_norm": 0.7421875, "learning_rate": 6.377895345782205e-06, "loss": 3.9531, "step": 16848 }, { "epoch": 5.612642625135337, "grad_norm": 0.765625, "learning_rate": 6.377095024476034e-06, "loss": 4.0488, "step": 16849 }, { "epoch": 5.612975764137587, "grad_norm": 0.76953125, "learning_rate": 6.376294716249276e-06, "loss": 4.0693, "step": 16850 }, { "epoch": 5.613308903139835, "grad_norm": 0.76953125, "learning_rate": 6.375494421111248e-06, "loss": 4.0502, "step": 16851 }, { "epoch": 5.613642042142084, "grad_norm": 0.78515625, "learning_rate": 6.3746941390712754e-06, "loss": 3.9909, "step": 16852 }, { "epoch": 5.613975181144332, "grad_norm": 0.80859375, "learning_rate": 6.373893870138678e-06, "loss": 4.0186, "step": 16853 }, { "epoch": 5.6143083201465815, "grad_norm": 0.7890625, "learning_rate": 6.37309361432278e-06, "loss": 4.002, "step": 16854 }, { "epoch": 5.61464145914883, "grad_norm": 0.75, "learning_rate": 6.372293371632898e-06, "loss": 4.0201, "step": 16855 }, { "epoch": 5.614974598151079, "grad_norm": 0.75390625, "learning_rate": 6.371493142078359e-06, "loss": 4.0254, "step": 16856 }, { "epoch": 5.615307737153327, "grad_norm": 0.73828125, "learning_rate": 6.370692925668479e-06, "loss": 3.9294, "step": 16857 }, { "epoch": 5.615640876155576, "grad_norm": 0.73828125, "learning_rate": 6.369892722412578e-06, "loss": 3.9576, "step": 16858 }, { "epoch": 5.615974015157825, "grad_norm": 0.76171875, "learning_rate": 6.369092532319982e-06, "loss": 3.9353, "step": 16859 }, { "epoch": 5.616307154160073, "grad_norm": 0.78125, "learning_rate": 6.368292355400005e-06, "loss": 3.982, "step": 16860 }, { "epoch": 5.616640293162322, "grad_norm": 0.75390625, "learning_rate": 6.367492191661974e-06, "loss": 4.0433, "step": 16861 }, { "epoch": 5.616973432164571, "grad_norm": 0.76171875, "learning_rate": 6.366692041115205e-06, "loss": 3.9361, "step": 16862 }, { "epoch": 5.61730657116682, "grad_norm": 0.80859375, "learning_rate": 6.365891903769019e-06, "loss": 3.9534, "step": 16863 }, { "epoch": 5.617639710169068, "grad_norm": 0.73828125, "learning_rate": 6.365091779632737e-06, "loss": 3.9801, "step": 16864 }, { "epoch": 5.617972849171316, "grad_norm": 0.765625, "learning_rate": 6.364291668715678e-06, "loss": 3.9579, "step": 16865 }, { "epoch": 5.6183059881735655, "grad_norm": 0.8046875, "learning_rate": 6.363491571027158e-06, "loss": 3.9228, "step": 16866 }, { "epoch": 5.618639127175814, "grad_norm": 0.7578125, "learning_rate": 6.3626914865765015e-06, "loss": 4.0379, "step": 16867 }, { "epoch": 5.618972266178063, "grad_norm": 0.796875, "learning_rate": 6.361891415373024e-06, "loss": 3.9159, "step": 16868 }, { "epoch": 5.619305405180311, "grad_norm": 0.7890625, "learning_rate": 6.361091357426048e-06, "loss": 3.9345, "step": 16869 }, { "epoch": 5.6196385441825605, "grad_norm": 0.765625, "learning_rate": 6.360291312744889e-06, "loss": 4.0143, "step": 16870 }, { "epoch": 5.619971683184809, "grad_norm": 0.74609375, "learning_rate": 6.359491281338871e-06, "loss": 3.977, "step": 16871 }, { "epoch": 5.620304822187057, "grad_norm": 0.76171875, "learning_rate": 6.3586912632173046e-06, "loss": 3.958, "step": 16872 }, { "epoch": 5.620637961189306, "grad_norm": 0.78125, "learning_rate": 6.357891258389518e-06, "loss": 3.9968, "step": 16873 }, { "epoch": 5.6209711001915545, "grad_norm": 0.76171875, "learning_rate": 6.357091266864823e-06, "loss": 4.0347, "step": 16874 }, { "epoch": 5.621304239193804, "grad_norm": 0.7734375, "learning_rate": 6.3562912886525374e-06, "loss": 4.0467, "step": 16875 }, { "epoch": 5.621637378196052, "grad_norm": 0.7578125, "learning_rate": 6.355491323761983e-06, "loss": 3.9983, "step": 16876 }, { "epoch": 5.621970517198301, "grad_norm": 0.8125, "learning_rate": 6.354691372202473e-06, "loss": 4.022, "step": 16877 }, { "epoch": 5.6223036562005495, "grad_norm": 0.75390625, "learning_rate": 6.35389143398333e-06, "loss": 4.0061, "step": 16878 }, { "epoch": 5.622636795202799, "grad_norm": 0.7421875, "learning_rate": 6.353091509113867e-06, "loss": 3.9682, "step": 16879 }, { "epoch": 5.622969934205047, "grad_norm": 0.75, "learning_rate": 6.352291597603406e-06, "loss": 4.0719, "step": 16880 }, { "epoch": 5.623303073207296, "grad_norm": 0.80859375, "learning_rate": 6.35149169946126e-06, "loss": 3.9791, "step": 16881 }, { "epoch": 5.6236362122095445, "grad_norm": 0.7265625, "learning_rate": 6.350691814696752e-06, "loss": 3.9654, "step": 16882 }, { "epoch": 5.623969351211793, "grad_norm": 0.76171875, "learning_rate": 6.349891943319192e-06, "loss": 4.0772, "step": 16883 }, { "epoch": 5.624302490214042, "grad_norm": 0.734375, "learning_rate": 6.349092085337899e-06, "loss": 4.0299, "step": 16884 }, { "epoch": 5.62463562921629, "grad_norm": 0.7578125, "learning_rate": 6.348292240762192e-06, "loss": 3.9687, "step": 16885 }, { "epoch": 5.624968768218539, "grad_norm": 0.79296875, "learning_rate": 6.347492409601385e-06, "loss": 3.8822, "step": 16886 }, { "epoch": 5.625301907220788, "grad_norm": 0.796875, "learning_rate": 6.346692591864795e-06, "loss": 3.9843, "step": 16887 }, { "epoch": 5.625635046223037, "grad_norm": 0.80078125, "learning_rate": 6.345892787561738e-06, "loss": 4.0072, "step": 16888 }, { "epoch": 5.625968185225285, "grad_norm": 0.765625, "learning_rate": 6.345092996701531e-06, "loss": 3.9657, "step": 16889 }, { "epoch": 5.6263013242275335, "grad_norm": 0.7421875, "learning_rate": 6.344293219293487e-06, "loss": 4.0037, "step": 16890 }, { "epoch": 5.626634463229783, "grad_norm": 0.79296875, "learning_rate": 6.343493455346928e-06, "loss": 3.9497, "step": 16891 }, { "epoch": 5.626967602232031, "grad_norm": 0.79296875, "learning_rate": 6.342693704871164e-06, "loss": 4.0201, "step": 16892 }, { "epoch": 5.62730074123428, "grad_norm": 0.76953125, "learning_rate": 6.3418939678755085e-06, "loss": 4.0284, "step": 16893 }, { "epoch": 5.627633880236528, "grad_norm": 0.7734375, "learning_rate": 6.3410942443692835e-06, "loss": 3.9027, "step": 16894 }, { "epoch": 5.627967019238778, "grad_norm": 0.77734375, "learning_rate": 6.340294534361797e-06, "loss": 3.9689, "step": 16895 }, { "epoch": 5.628300158241026, "grad_norm": 0.77734375, "learning_rate": 6.339494837862369e-06, "loss": 3.9789, "step": 16896 }, { "epoch": 5.628633297243275, "grad_norm": 0.74609375, "learning_rate": 6.338695154880311e-06, "loss": 3.9797, "step": 16897 }, { "epoch": 5.628966436245523, "grad_norm": 0.8046875, "learning_rate": 6.337895485424942e-06, "loss": 3.9741, "step": 16898 }, { "epoch": 5.629299575247773, "grad_norm": 0.77734375, "learning_rate": 6.337095829505571e-06, "loss": 3.9945, "step": 16899 }, { "epoch": 5.629632714250021, "grad_norm": 0.7734375, "learning_rate": 6.336296187131518e-06, "loss": 4.0465, "step": 16900 }, { "epoch": 5.629965853252269, "grad_norm": 0.75390625, "learning_rate": 6.335496558312091e-06, "loss": 4.1153, "step": 16901 }, { "epoch": 5.630298992254518, "grad_norm": 0.76171875, "learning_rate": 6.3346969430566065e-06, "loss": 4.0007, "step": 16902 }, { "epoch": 5.630632131256767, "grad_norm": 0.7734375, "learning_rate": 6.33389734137438e-06, "loss": 3.8969, "step": 16903 }, { "epoch": 5.630965270259016, "grad_norm": 0.76953125, "learning_rate": 6.333097753274722e-06, "loss": 4.0163, "step": 16904 }, { "epoch": 5.631298409261264, "grad_norm": 0.7421875, "learning_rate": 6.332298178766949e-06, "loss": 3.9707, "step": 16905 }, { "epoch": 5.631631548263513, "grad_norm": 0.71875, "learning_rate": 6.331498617860372e-06, "loss": 3.9459, "step": 16906 }, { "epoch": 5.631964687265762, "grad_norm": 0.74609375, "learning_rate": 6.3306990705643065e-06, "loss": 4.0032, "step": 16907 }, { "epoch": 5.63229782626801, "grad_norm": 0.765625, "learning_rate": 6.3298995368880625e-06, "loss": 4.0503, "step": 16908 }, { "epoch": 5.632630965270259, "grad_norm": 0.7734375, "learning_rate": 6.329100016840958e-06, "loss": 3.9858, "step": 16909 }, { "epoch": 5.632964104272507, "grad_norm": 0.75, "learning_rate": 6.3283005104323e-06, "loss": 3.8905, "step": 16910 }, { "epoch": 5.633297243274757, "grad_norm": 0.7578125, "learning_rate": 6.327501017671402e-06, "loss": 4.0134, "step": 16911 }, { "epoch": 5.633630382277005, "grad_norm": 0.765625, "learning_rate": 6.3267015385675775e-06, "loss": 4.0209, "step": 16912 }, { "epoch": 5.633963521279254, "grad_norm": 0.76953125, "learning_rate": 6.32590207313014e-06, "loss": 3.982, "step": 16913 }, { "epoch": 5.634296660281502, "grad_norm": 0.75, "learning_rate": 6.325102621368398e-06, "loss": 3.9706, "step": 16914 }, { "epoch": 5.6346297992837515, "grad_norm": 0.7421875, "learning_rate": 6.324303183291669e-06, "loss": 4.0061, "step": 16915 }, { "epoch": 5.634962938286, "grad_norm": 0.734375, "learning_rate": 6.323503758909258e-06, "loss": 4.0353, "step": 16916 }, { "epoch": 5.635296077288249, "grad_norm": 0.73828125, "learning_rate": 6.322704348230482e-06, "loss": 3.9686, "step": 16917 }, { "epoch": 5.635629216290497, "grad_norm": 0.703125, "learning_rate": 6.321904951264652e-06, "loss": 3.9718, "step": 16918 }, { "epoch": 5.635962355292746, "grad_norm": 0.765625, "learning_rate": 6.321105568021075e-06, "loss": 3.9661, "step": 16919 }, { "epoch": 5.636295494294995, "grad_norm": 0.76953125, "learning_rate": 6.320306198509066e-06, "loss": 3.9878, "step": 16920 }, { "epoch": 5.636628633297243, "grad_norm": 0.7265625, "learning_rate": 6.319506842737932e-06, "loss": 3.9656, "step": 16921 }, { "epoch": 5.636961772299492, "grad_norm": 0.765625, "learning_rate": 6.3187075007169895e-06, "loss": 4.0152, "step": 16922 }, { "epoch": 5.6372949113017405, "grad_norm": 0.7578125, "learning_rate": 6.317908172455543e-06, "loss": 3.9789, "step": 16923 }, { "epoch": 5.63762805030399, "grad_norm": 0.75390625, "learning_rate": 6.317108857962907e-06, "loss": 3.918, "step": 16924 }, { "epoch": 5.637961189306238, "grad_norm": 0.74609375, "learning_rate": 6.3163095572483905e-06, "loss": 4.0273, "step": 16925 }, { "epoch": 5.638294328308486, "grad_norm": 0.73828125, "learning_rate": 6.315510270321305e-06, "loss": 3.9926, "step": 16926 }, { "epoch": 5.6386274673107355, "grad_norm": 0.75390625, "learning_rate": 6.314710997190961e-06, "loss": 3.9928, "step": 16927 }, { "epoch": 5.638960606312984, "grad_norm": 0.7265625, "learning_rate": 6.313911737866663e-06, "loss": 3.9856, "step": 16928 }, { "epoch": 5.639293745315233, "grad_norm": 0.7890625, "learning_rate": 6.3131124923577255e-06, "loss": 4.0386, "step": 16929 }, { "epoch": 5.639626884317481, "grad_norm": 0.77734375, "learning_rate": 6.3123132606734555e-06, "loss": 3.961, "step": 16930 }, { "epoch": 5.6399600233197305, "grad_norm": 0.7890625, "learning_rate": 6.311514042823166e-06, "loss": 4.0047, "step": 16931 }, { "epoch": 5.640293162321979, "grad_norm": 0.77734375, "learning_rate": 6.31071483881616e-06, "loss": 3.9821, "step": 16932 }, { "epoch": 5.640626301324227, "grad_norm": 0.7890625, "learning_rate": 6.309915648661753e-06, "loss": 4.0666, "step": 16933 }, { "epoch": 5.640959440326476, "grad_norm": 0.75390625, "learning_rate": 6.309116472369249e-06, "loss": 4.0176, "step": 16934 }, { "epoch": 5.641292579328725, "grad_norm": 0.72265625, "learning_rate": 6.308317309947961e-06, "loss": 3.9125, "step": 16935 }, { "epoch": 5.641625718330974, "grad_norm": 0.734375, "learning_rate": 6.307518161407196e-06, "loss": 4.0165, "step": 16936 }, { "epoch": 5.641958857333222, "grad_norm": 0.76171875, "learning_rate": 6.3067190267562584e-06, "loss": 3.9305, "step": 16937 }, { "epoch": 5.642291996335471, "grad_norm": 0.75, "learning_rate": 6.3059199060044605e-06, "loss": 3.9532, "step": 16938 }, { "epoch": 5.6426251353377195, "grad_norm": 0.74609375, "learning_rate": 6.30512079916111e-06, "loss": 3.9492, "step": 16939 }, { "epoch": 5.642958274339969, "grad_norm": 0.7578125, "learning_rate": 6.304321706235513e-06, "loss": 4.0312, "step": 16940 }, { "epoch": 5.643291413342217, "grad_norm": 0.77734375, "learning_rate": 6.303522627236978e-06, "loss": 3.9861, "step": 16941 }, { "epoch": 5.643624552344466, "grad_norm": 0.765625, "learning_rate": 6.302723562174814e-06, "loss": 4.033, "step": 16942 }, { "epoch": 5.643957691346714, "grad_norm": 0.7890625, "learning_rate": 6.301924511058326e-06, "loss": 3.9797, "step": 16943 }, { "epoch": 5.644290830348963, "grad_norm": 0.75390625, "learning_rate": 6.301125473896824e-06, "loss": 4.0085, "step": 16944 }, { "epoch": 5.644623969351212, "grad_norm": 0.796875, "learning_rate": 6.300326450699615e-06, "loss": 3.9664, "step": 16945 }, { "epoch": 5.64495710835346, "grad_norm": 0.76171875, "learning_rate": 6.299527441476001e-06, "loss": 3.9977, "step": 16946 }, { "epoch": 5.645290247355709, "grad_norm": 0.7890625, "learning_rate": 6.2987284462352935e-06, "loss": 3.9696, "step": 16947 }, { "epoch": 5.645623386357958, "grad_norm": 0.75390625, "learning_rate": 6.297929464986796e-06, "loss": 3.9194, "step": 16948 }, { "epoch": 5.645956525360207, "grad_norm": 0.7734375, "learning_rate": 6.297130497739819e-06, "loss": 3.9442, "step": 16949 }, { "epoch": 5.646289664362455, "grad_norm": 0.765625, "learning_rate": 6.296331544503664e-06, "loss": 3.9467, "step": 16950 }, { "epoch": 5.6466228033647035, "grad_norm": 0.7421875, "learning_rate": 6.295532605287641e-06, "loss": 4.0187, "step": 16951 }, { "epoch": 5.646955942366953, "grad_norm": 0.78125, "learning_rate": 6.294733680101053e-06, "loss": 3.9998, "step": 16952 }, { "epoch": 5.647289081369201, "grad_norm": 0.76171875, "learning_rate": 6.293934768953209e-06, "loss": 4.0146, "step": 16953 }, { "epoch": 5.64762222037145, "grad_norm": 0.7265625, "learning_rate": 6.293135871853413e-06, "loss": 4.0204, "step": 16954 }, { "epoch": 5.647955359373698, "grad_norm": 0.75, "learning_rate": 6.292336988810968e-06, "loss": 4.0427, "step": 16955 }, { "epoch": 5.648288498375948, "grad_norm": 0.796875, "learning_rate": 6.291538119835182e-06, "loss": 4.0172, "step": 16956 }, { "epoch": 5.648621637378196, "grad_norm": 0.75390625, "learning_rate": 6.29073926493536e-06, "loss": 4.0394, "step": 16957 }, { "epoch": 5.648954776380445, "grad_norm": 0.734375, "learning_rate": 6.2899404241208045e-06, "loss": 3.9612, "step": 16958 }, { "epoch": 5.649287915382693, "grad_norm": 0.77734375, "learning_rate": 6.289141597400823e-06, "loss": 3.9805, "step": 16959 }, { "epoch": 5.6496210543849426, "grad_norm": 0.796875, "learning_rate": 6.288342784784719e-06, "loss": 3.9343, "step": 16960 }, { "epoch": 5.649954193387191, "grad_norm": 0.7890625, "learning_rate": 6.287543986281797e-06, "loss": 4.0268, "step": 16961 }, { "epoch": 5.650287332389439, "grad_norm": 0.76953125, "learning_rate": 6.286745201901361e-06, "loss": 3.9487, "step": 16962 }, { "epoch": 5.650620471391688, "grad_norm": 0.76171875, "learning_rate": 6.285946431652718e-06, "loss": 4.0072, "step": 16963 }, { "epoch": 5.650953610393937, "grad_norm": 0.80078125, "learning_rate": 6.285147675545169e-06, "loss": 4.0286, "step": 16964 }, { "epoch": 5.651286749396186, "grad_norm": 0.75, "learning_rate": 6.284348933588016e-06, "loss": 4.0205, "step": 16965 }, { "epoch": 5.651619888398434, "grad_norm": 0.734375, "learning_rate": 6.2835502057905655e-06, "loss": 4.022, "step": 16966 }, { "epoch": 5.651953027400683, "grad_norm": 0.75390625, "learning_rate": 6.28275149216212e-06, "loss": 4.0663, "step": 16967 }, { "epoch": 5.652286166402932, "grad_norm": 0.76171875, "learning_rate": 6.281952792711984e-06, "loss": 3.9816, "step": 16968 }, { "epoch": 5.65261930540518, "grad_norm": 0.76953125, "learning_rate": 6.2811541074494575e-06, "loss": 3.9685, "step": 16969 }, { "epoch": 5.652952444407429, "grad_norm": 0.76953125, "learning_rate": 6.280355436383848e-06, "loss": 3.9672, "step": 16970 }, { "epoch": 5.653285583409677, "grad_norm": 0.78515625, "learning_rate": 6.279556779524455e-06, "loss": 4.0007, "step": 16971 }, { "epoch": 5.6536187224119265, "grad_norm": 0.76953125, "learning_rate": 6.2787581368805844e-06, "loss": 3.958, "step": 16972 }, { "epoch": 5.653951861414175, "grad_norm": 0.80859375, "learning_rate": 6.277959508461535e-06, "loss": 3.9993, "step": 16973 }, { "epoch": 5.654285000416424, "grad_norm": 0.7734375, "learning_rate": 6.277160894276608e-06, "loss": 3.935, "step": 16974 }, { "epoch": 5.654618139418672, "grad_norm": 0.73046875, "learning_rate": 6.27636229433511e-06, "loss": 4.0111, "step": 16975 }, { "epoch": 5.6549512784209215, "grad_norm": 0.7578125, "learning_rate": 6.27556370864634e-06, "loss": 3.9932, "step": 16976 }, { "epoch": 5.65528441742317, "grad_norm": 0.76953125, "learning_rate": 6.2747651372196014e-06, "loss": 3.9732, "step": 16977 }, { "epoch": 5.655617556425419, "grad_norm": 0.75, "learning_rate": 6.273966580064193e-06, "loss": 4.1074, "step": 16978 }, { "epoch": 5.655950695427667, "grad_norm": 0.7265625, "learning_rate": 6.2731680371894215e-06, "loss": 3.9357, "step": 16979 }, { "epoch": 5.656283834429916, "grad_norm": 0.76171875, "learning_rate": 6.272369508604583e-06, "loss": 3.9268, "step": 16980 }, { "epoch": 5.656616973432165, "grad_norm": 0.76171875, "learning_rate": 6.271570994318983e-06, "loss": 3.9703, "step": 16981 }, { "epoch": 5.656950112434413, "grad_norm": 0.78125, "learning_rate": 6.270772494341919e-06, "loss": 4.0106, "step": 16982 }, { "epoch": 5.657283251436662, "grad_norm": 0.75390625, "learning_rate": 6.2699740086826915e-06, "loss": 4.0165, "step": 16983 }, { "epoch": 5.6576163904389105, "grad_norm": 0.7578125, "learning_rate": 6.2691755373506044e-06, "loss": 3.946, "step": 16984 }, { "epoch": 5.65794952944116, "grad_norm": 0.7734375, "learning_rate": 6.268377080354954e-06, "loss": 4.0132, "step": 16985 }, { "epoch": 5.658282668443408, "grad_norm": 0.75390625, "learning_rate": 6.267578637705045e-06, "loss": 3.9856, "step": 16986 }, { "epoch": 5.658615807445656, "grad_norm": 0.734375, "learning_rate": 6.2667802094101736e-06, "loss": 4.0985, "step": 16987 }, { "epoch": 5.6589489464479055, "grad_norm": 0.81640625, "learning_rate": 6.265981795479643e-06, "loss": 3.9658, "step": 16988 }, { "epoch": 5.659282085450154, "grad_norm": 0.76171875, "learning_rate": 6.2651833959227515e-06, "loss": 4.0549, "step": 16989 }, { "epoch": 5.659615224452403, "grad_norm": 0.7421875, "learning_rate": 6.2643850107488e-06, "loss": 4.0113, "step": 16990 }, { "epoch": 5.659948363454651, "grad_norm": 0.75, "learning_rate": 6.263586639967086e-06, "loss": 4.0095, "step": 16991 }, { "epoch": 5.6602815024569, "grad_norm": 0.78125, "learning_rate": 6.262788283586909e-06, "loss": 4.0437, "step": 16992 }, { "epoch": 5.660614641459149, "grad_norm": 0.80859375, "learning_rate": 6.2619899416175685e-06, "loss": 4.0476, "step": 16993 }, { "epoch": 5.660947780461398, "grad_norm": 0.77734375, "learning_rate": 6.261191614068363e-06, "loss": 3.9989, "step": 16994 }, { "epoch": 5.661280919463646, "grad_norm": 0.7578125, "learning_rate": 6.260393300948594e-06, "loss": 3.974, "step": 16995 }, { "epoch": 5.661614058465895, "grad_norm": 0.734375, "learning_rate": 6.259595002267555e-06, "loss": 3.9787, "step": 16996 }, { "epoch": 5.661947197468144, "grad_norm": 0.765625, "learning_rate": 6.258796718034552e-06, "loss": 4.0125, "step": 16997 }, { "epoch": 5.662280336470392, "grad_norm": 0.734375, "learning_rate": 6.257998448258874e-06, "loss": 3.9868, "step": 16998 }, { "epoch": 5.662613475472641, "grad_norm": 0.75390625, "learning_rate": 6.257200192949828e-06, "loss": 4.0423, "step": 16999 }, { "epoch": 5.6629466144748895, "grad_norm": 0.75, "learning_rate": 6.256401952116706e-06, "loss": 3.9465, "step": 17000 }, { "epoch": 5.663279753477139, "grad_norm": 0.75, "learning_rate": 6.2556037257688075e-06, "loss": 4.0074, "step": 17001 }, { "epoch": 5.663612892479387, "grad_norm": 0.7421875, "learning_rate": 6.2548055139154315e-06, "loss": 3.9882, "step": 17002 }, { "epoch": 5.663946031481636, "grad_norm": 0.76953125, "learning_rate": 6.2540073165658735e-06, "loss": 4.0365, "step": 17003 }, { "epoch": 5.664279170483884, "grad_norm": 0.7578125, "learning_rate": 6.25320913372943e-06, "loss": 3.9929, "step": 17004 }, { "epoch": 5.664612309486133, "grad_norm": 0.77734375, "learning_rate": 6.252410965415401e-06, "loss": 3.9478, "step": 17005 }, { "epoch": 5.664945448488382, "grad_norm": 0.7578125, "learning_rate": 6.2516128116330805e-06, "loss": 3.9465, "step": 17006 }, { "epoch": 5.66527858749063, "grad_norm": 0.7578125, "learning_rate": 6.250814672391768e-06, "loss": 3.9589, "step": 17007 }, { "epoch": 5.665611726492879, "grad_norm": 0.7890625, "learning_rate": 6.250016547700761e-06, "loss": 3.9811, "step": 17008 }, { "epoch": 5.665944865495128, "grad_norm": 0.78125, "learning_rate": 6.24921843756935e-06, "loss": 4.0309, "step": 17009 }, { "epoch": 5.666278004497377, "grad_norm": 0.77734375, "learning_rate": 6.248420342006837e-06, "loss": 3.9809, "step": 17010 }, { "epoch": 5.666611143499625, "grad_norm": 0.73046875, "learning_rate": 6.247622261022515e-06, "loss": 3.9561, "step": 17011 }, { "epoch": 5.666944282501873, "grad_norm": 0.7734375, "learning_rate": 6.246824194625682e-06, "loss": 3.9874, "step": 17012 }, { "epoch": 5.667277421504123, "grad_norm": 0.85546875, "learning_rate": 6.246026142825631e-06, "loss": 4.0234, "step": 17013 }, { "epoch": 5.667610560506371, "grad_norm": 0.7890625, "learning_rate": 6.245228105631661e-06, "loss": 4.0225, "step": 17014 }, { "epoch": 5.66794369950862, "grad_norm": 0.79296875, "learning_rate": 6.244430083053065e-06, "loss": 4.0972, "step": 17015 }, { "epoch": 5.668276838510868, "grad_norm": 0.7890625, "learning_rate": 6.2436320750991395e-06, "loss": 3.9928, "step": 17016 }, { "epoch": 5.668609977513118, "grad_norm": 0.765625, "learning_rate": 6.242834081779181e-06, "loss": 3.951, "step": 17017 }, { "epoch": 5.668943116515366, "grad_norm": 0.76171875, "learning_rate": 6.242036103102479e-06, "loss": 3.9801, "step": 17018 }, { "epoch": 5.669276255517615, "grad_norm": 0.734375, "learning_rate": 6.2412381390783345e-06, "loss": 3.982, "step": 17019 }, { "epoch": 5.669609394519863, "grad_norm": 0.73046875, "learning_rate": 6.240440189716036e-06, "loss": 3.988, "step": 17020 }, { "epoch": 5.6699425335221125, "grad_norm": 0.84375, "learning_rate": 6.239642255024883e-06, "loss": 3.9683, "step": 17021 }, { "epoch": 5.670275672524361, "grad_norm": 0.7265625, "learning_rate": 6.238844335014166e-06, "loss": 4.0376, "step": 17022 }, { "epoch": 5.670608811526609, "grad_norm": 0.7890625, "learning_rate": 6.238046429693184e-06, "loss": 4.0221, "step": 17023 }, { "epoch": 5.670941950528858, "grad_norm": 0.8203125, "learning_rate": 6.237248539071225e-06, "loss": 3.9256, "step": 17024 }, { "epoch": 5.671275089531107, "grad_norm": 0.734375, "learning_rate": 6.236450663157586e-06, "loss": 4.029, "step": 17025 }, { "epoch": 5.671608228533356, "grad_norm": 0.7734375, "learning_rate": 6.235652801961563e-06, "loss": 3.9377, "step": 17026 }, { "epoch": 5.671941367535604, "grad_norm": 0.74609375, "learning_rate": 6.2348549554924425e-06, "loss": 3.9639, "step": 17027 }, { "epoch": 5.672274506537853, "grad_norm": 0.7734375, "learning_rate": 6.234057123759524e-06, "loss": 3.987, "step": 17028 }, { "epoch": 5.6726076455401016, "grad_norm": 0.7734375, "learning_rate": 6.233259306772096e-06, "loss": 4.0378, "step": 17029 }, { "epoch": 5.67294078454235, "grad_norm": 0.6875, "learning_rate": 6.232461504539454e-06, "loss": 4.011, "step": 17030 }, { "epoch": 5.673273923544599, "grad_norm": 0.7578125, "learning_rate": 6.231663717070889e-06, "loss": 4.0549, "step": 17031 }, { "epoch": 5.673607062546847, "grad_norm": 0.77734375, "learning_rate": 6.230865944375696e-06, "loss": 4.0052, "step": 17032 }, { "epoch": 5.6739402015490965, "grad_norm": 0.74609375, "learning_rate": 6.230068186463164e-06, "loss": 3.967, "step": 17033 }, { "epoch": 5.674273340551345, "grad_norm": 0.80859375, "learning_rate": 6.229270443342589e-06, "loss": 4.0657, "step": 17034 }, { "epoch": 5.674606479553594, "grad_norm": 0.76171875, "learning_rate": 6.228472715023262e-06, "loss": 3.9794, "step": 17035 }, { "epoch": 5.674939618555842, "grad_norm": 0.765625, "learning_rate": 6.227675001514471e-06, "loss": 3.9897, "step": 17036 }, { "epoch": 5.6752727575580915, "grad_norm": 0.77734375, "learning_rate": 6.226877302825511e-06, "loss": 3.9926, "step": 17037 }, { "epoch": 5.67560589656034, "grad_norm": 0.78515625, "learning_rate": 6.2260796189656726e-06, "loss": 4.0271, "step": 17038 }, { "epoch": 5.675939035562589, "grad_norm": 0.765625, "learning_rate": 6.225281949944249e-06, "loss": 4.0187, "step": 17039 }, { "epoch": 5.676272174564837, "grad_norm": 0.74609375, "learning_rate": 6.224484295770527e-06, "loss": 3.9724, "step": 17040 }, { "epoch": 5.6766053135670855, "grad_norm": 0.7890625, "learning_rate": 6.223686656453802e-06, "loss": 3.9331, "step": 17041 }, { "epoch": 5.676938452569335, "grad_norm": 0.73828125, "learning_rate": 6.222889032003362e-06, "loss": 4.0208, "step": 17042 }, { "epoch": 5.677271591571583, "grad_norm": 0.70703125, "learning_rate": 6.222091422428499e-06, "loss": 4.0586, "step": 17043 }, { "epoch": 5.677604730573832, "grad_norm": 0.71875, "learning_rate": 6.221293827738506e-06, "loss": 4.0023, "step": 17044 }, { "epoch": 5.6779378695760805, "grad_norm": 0.77734375, "learning_rate": 6.220496247942666e-06, "loss": 4.0048, "step": 17045 }, { "epoch": 5.67827100857833, "grad_norm": 0.75, "learning_rate": 6.219698683050276e-06, "loss": 3.977, "step": 17046 }, { "epoch": 5.678604147580578, "grad_norm": 0.78125, "learning_rate": 6.21890113307062e-06, "loss": 4.0176, "step": 17047 }, { "epoch": 5.678937286582826, "grad_norm": 0.72265625, "learning_rate": 6.218103598012994e-06, "loss": 3.9455, "step": 17048 }, { "epoch": 5.679270425585075, "grad_norm": 0.73828125, "learning_rate": 6.217306077886682e-06, "loss": 3.9398, "step": 17049 }, { "epoch": 5.679603564587324, "grad_norm": 0.7734375, "learning_rate": 6.216508572700978e-06, "loss": 3.9509, "step": 17050 }, { "epoch": 5.679936703589573, "grad_norm": 0.79296875, "learning_rate": 6.215711082465168e-06, "loss": 4.0511, "step": 17051 }, { "epoch": 5.680269842591821, "grad_norm": 0.80078125, "learning_rate": 6.214913607188542e-06, "loss": 3.9591, "step": 17052 }, { "epoch": 5.68060298159407, "grad_norm": 0.74609375, "learning_rate": 6.214116146880392e-06, "loss": 3.9756, "step": 17053 }, { "epoch": 5.680936120596319, "grad_norm": 0.74609375, "learning_rate": 6.213318701550002e-06, "loss": 3.9646, "step": 17054 }, { "epoch": 5.681269259598568, "grad_norm": 0.75, "learning_rate": 6.21252127120666e-06, "loss": 3.9596, "step": 17055 }, { "epoch": 5.681602398600816, "grad_norm": 0.765625, "learning_rate": 6.211723855859659e-06, "loss": 4.0904, "step": 17056 }, { "epoch": 5.681935537603065, "grad_norm": 0.80859375, "learning_rate": 6.210926455518282e-06, "loss": 3.9854, "step": 17057 }, { "epoch": 5.682268676605314, "grad_norm": 0.7578125, "learning_rate": 6.2101290701918215e-06, "loss": 3.9508, "step": 17058 }, { "epoch": 5.682601815607562, "grad_norm": 0.73828125, "learning_rate": 6.209331699889562e-06, "loss": 3.9808, "step": 17059 }, { "epoch": 5.682934954609811, "grad_norm": 0.74609375, "learning_rate": 6.208534344620795e-06, "loss": 3.9597, "step": 17060 }, { "epoch": 5.683268093612059, "grad_norm": 0.75, "learning_rate": 6.207737004394803e-06, "loss": 4.0136, "step": 17061 }, { "epoch": 5.683601232614309, "grad_norm": 0.765625, "learning_rate": 6.206939679220879e-06, "loss": 3.9722, "step": 17062 }, { "epoch": 5.683934371616557, "grad_norm": 0.7109375, "learning_rate": 6.206142369108306e-06, "loss": 3.9511, "step": 17063 }, { "epoch": 5.684267510618806, "grad_norm": 0.71875, "learning_rate": 6.20534507406637e-06, "loss": 4.012, "step": 17064 }, { "epoch": 5.684600649621054, "grad_norm": 0.796875, "learning_rate": 6.204547794104361e-06, "loss": 3.9401, "step": 17065 }, { "epoch": 5.684933788623303, "grad_norm": 0.734375, "learning_rate": 6.203750529231563e-06, "loss": 3.9865, "step": 17066 }, { "epoch": 5.685266927625552, "grad_norm": 0.77734375, "learning_rate": 6.202953279457266e-06, "loss": 3.9496, "step": 17067 }, { "epoch": 5.6856000666278, "grad_norm": 0.7578125, "learning_rate": 6.202156044790752e-06, "loss": 3.9883, "step": 17068 }, { "epoch": 5.685933205630049, "grad_norm": 0.765625, "learning_rate": 6.201358825241309e-06, "loss": 4.0428, "step": 17069 }, { "epoch": 5.686266344632298, "grad_norm": 0.7734375, "learning_rate": 6.200561620818223e-06, "loss": 3.9746, "step": 17070 }, { "epoch": 5.686599483634547, "grad_norm": 0.796875, "learning_rate": 6.199764431530782e-06, "loss": 3.9479, "step": 17071 }, { "epoch": 5.686932622636795, "grad_norm": 0.78125, "learning_rate": 6.198967257388268e-06, "loss": 4.0263, "step": 17072 }, { "epoch": 5.687265761639043, "grad_norm": 0.7578125, "learning_rate": 6.198170098399965e-06, "loss": 3.9718, "step": 17073 }, { "epoch": 5.687598900641293, "grad_norm": 0.74609375, "learning_rate": 6.1973729545751626e-06, "loss": 3.9819, "step": 17074 }, { "epoch": 5.687932039643541, "grad_norm": 0.7109375, "learning_rate": 6.196575825923141e-06, "loss": 3.9809, "step": 17075 }, { "epoch": 5.68826517864579, "grad_norm": 0.73828125, "learning_rate": 6.195778712453191e-06, "loss": 3.9554, "step": 17076 }, { "epoch": 5.688598317648038, "grad_norm": 0.8125, "learning_rate": 6.194981614174592e-06, "loss": 3.9969, "step": 17077 }, { "epoch": 5.6889314566502875, "grad_norm": 0.7578125, "learning_rate": 6.194184531096632e-06, "loss": 4.021, "step": 17078 }, { "epoch": 5.689264595652536, "grad_norm": 0.77734375, "learning_rate": 6.193387463228593e-06, "loss": 3.9473, "step": 17079 }, { "epoch": 5.689597734654785, "grad_norm": 0.7421875, "learning_rate": 6.192590410579762e-06, "loss": 3.9551, "step": 17080 }, { "epoch": 5.689930873657033, "grad_norm": 0.796875, "learning_rate": 6.19179337315942e-06, "loss": 3.923, "step": 17081 }, { "epoch": 5.6902640126592825, "grad_norm": 0.7734375, "learning_rate": 6.19099635097685e-06, "loss": 3.966, "step": 17082 }, { "epoch": 5.690597151661531, "grad_norm": 0.7421875, "learning_rate": 6.190199344041338e-06, "loss": 3.9874, "step": 17083 }, { "epoch": 5.690930290663779, "grad_norm": 0.72265625, "learning_rate": 6.1894023523621655e-06, "loss": 4.05, "step": 17084 }, { "epoch": 5.691263429666028, "grad_norm": 0.78515625, "learning_rate": 6.188605375948619e-06, "loss": 3.9341, "step": 17085 }, { "epoch": 5.691596568668277, "grad_norm": 0.765625, "learning_rate": 6.1878084148099775e-06, "loss": 3.9566, "step": 17086 }, { "epoch": 5.691929707670526, "grad_norm": 0.75390625, "learning_rate": 6.187011468955528e-06, "loss": 3.9903, "step": 17087 }, { "epoch": 5.692262846672774, "grad_norm": 0.78515625, "learning_rate": 6.186214538394548e-06, "loss": 4.0763, "step": 17088 }, { "epoch": 5.692595985675023, "grad_norm": 0.73828125, "learning_rate": 6.185417623136326e-06, "loss": 3.9723, "step": 17089 }, { "epoch": 5.6929291246772715, "grad_norm": 0.75390625, "learning_rate": 6.184620723190141e-06, "loss": 4.0123, "step": 17090 }, { "epoch": 5.69326226367952, "grad_norm": 0.7734375, "learning_rate": 6.183823838565274e-06, "loss": 4.0161, "step": 17091 }, { "epoch": 5.693595402681769, "grad_norm": 0.75, "learning_rate": 6.183026969271009e-06, "loss": 4.035, "step": 17092 }, { "epoch": 5.693928541684017, "grad_norm": 0.75390625, "learning_rate": 6.182230115316626e-06, "loss": 3.9857, "step": 17093 }, { "epoch": 5.6942616806862665, "grad_norm": 0.7578125, "learning_rate": 6.181433276711409e-06, "loss": 4.0271, "step": 17094 }, { "epoch": 5.694594819688515, "grad_norm": 0.7421875, "learning_rate": 6.180636453464638e-06, "loss": 3.976, "step": 17095 }, { "epoch": 5.694927958690764, "grad_norm": 0.765625, "learning_rate": 6.179839645585596e-06, "loss": 4.0147, "step": 17096 }, { "epoch": 5.695261097693012, "grad_norm": 0.71875, "learning_rate": 6.17904285308356e-06, "loss": 3.9621, "step": 17097 }, { "epoch": 5.695594236695261, "grad_norm": 0.76953125, "learning_rate": 6.178246075967818e-06, "loss": 3.9695, "step": 17098 }, { "epoch": 5.69592737569751, "grad_norm": 0.79296875, "learning_rate": 6.177449314247643e-06, "loss": 3.9523, "step": 17099 }, { "epoch": 5.696260514699759, "grad_norm": 0.75390625, "learning_rate": 6.17665256793232e-06, "loss": 3.933, "step": 17100 }, { "epoch": 5.696593653702007, "grad_norm": 0.74609375, "learning_rate": 6.175855837031126e-06, "loss": 4.0362, "step": 17101 }, { "epoch": 5.6969267927042555, "grad_norm": 0.7265625, "learning_rate": 6.175059121553346e-06, "loss": 4.0155, "step": 17102 }, { "epoch": 5.697259931706505, "grad_norm": 0.7265625, "learning_rate": 6.174262421508255e-06, "loss": 4.0175, "step": 17103 }, { "epoch": 5.697593070708753, "grad_norm": 0.75, "learning_rate": 6.1734657369051365e-06, "loss": 3.8748, "step": 17104 }, { "epoch": 5.697926209711002, "grad_norm": 0.73828125, "learning_rate": 6.172669067753269e-06, "loss": 4.0185, "step": 17105 }, { "epoch": 5.6982593487132505, "grad_norm": 0.75, "learning_rate": 6.171872414061932e-06, "loss": 3.9427, "step": 17106 }, { "epoch": 5.6985924877155, "grad_norm": 0.74609375, "learning_rate": 6.171075775840407e-06, "loss": 3.9253, "step": 17107 }, { "epoch": 5.698925626717748, "grad_norm": 0.71484375, "learning_rate": 6.170279153097966e-06, "loss": 4.0303, "step": 17108 }, { "epoch": 5.699258765719996, "grad_norm": 0.734375, "learning_rate": 6.169482545843896e-06, "loss": 4.0322, "step": 17109 }, { "epoch": 5.699591904722245, "grad_norm": 0.76171875, "learning_rate": 6.16868595408747e-06, "loss": 3.9299, "step": 17110 }, { "epoch": 5.699925043724494, "grad_norm": 0.7421875, "learning_rate": 6.167889377837971e-06, "loss": 4.0422, "step": 17111 }, { "epoch": 5.700258182726743, "grad_norm": 0.75, "learning_rate": 6.167092817104674e-06, "loss": 3.9798, "step": 17112 }, { "epoch": 5.700591321728991, "grad_norm": 0.7265625, "learning_rate": 6.16629627189686e-06, "loss": 4.0414, "step": 17113 }, { "epoch": 5.70092446073124, "grad_norm": 0.77734375, "learning_rate": 6.165499742223804e-06, "loss": 3.988, "step": 17114 }, { "epoch": 5.701257599733489, "grad_norm": 0.7890625, "learning_rate": 6.164703228094787e-06, "loss": 4.0193, "step": 17115 }, { "epoch": 5.701590738735738, "grad_norm": 0.78515625, "learning_rate": 6.163906729519087e-06, "loss": 3.9407, "step": 17116 }, { "epoch": 5.701923877737986, "grad_norm": 0.8046875, "learning_rate": 6.163110246505976e-06, "loss": 3.9803, "step": 17117 }, { "epoch": 5.702257016740235, "grad_norm": 0.76171875, "learning_rate": 6.162313779064737e-06, "loss": 4.059, "step": 17118 }, { "epoch": 5.702590155742484, "grad_norm": 0.73828125, "learning_rate": 6.161517327204643e-06, "loss": 3.9379, "step": 17119 }, { "epoch": 5.702923294744732, "grad_norm": 0.71875, "learning_rate": 6.160720890934975e-06, "loss": 4.0083, "step": 17120 }, { "epoch": 5.703256433746981, "grad_norm": 0.72265625, "learning_rate": 6.159924470265005e-06, "loss": 3.9841, "step": 17121 }, { "epoch": 5.703589572749229, "grad_norm": 0.72265625, "learning_rate": 6.159128065204015e-06, "loss": 4.0091, "step": 17122 }, { "epoch": 5.703922711751479, "grad_norm": 0.77734375, "learning_rate": 6.158331675761277e-06, "loss": 3.9365, "step": 17123 }, { "epoch": 5.704255850753727, "grad_norm": 0.734375, "learning_rate": 6.157535301946073e-06, "loss": 3.9686, "step": 17124 }, { "epoch": 5.704588989755976, "grad_norm": 0.73046875, "learning_rate": 6.156738943767672e-06, "loss": 4.0059, "step": 17125 }, { "epoch": 5.704922128758224, "grad_norm": 0.7734375, "learning_rate": 6.155942601235352e-06, "loss": 3.9649, "step": 17126 }, { "epoch": 5.705255267760473, "grad_norm": 0.76171875, "learning_rate": 6.155146274358391e-06, "loss": 4.0091, "step": 17127 }, { "epoch": 5.705588406762722, "grad_norm": 0.765625, "learning_rate": 6.154349963146061e-06, "loss": 4.0518, "step": 17128 }, { "epoch": 5.70592154576497, "grad_norm": 0.7578125, "learning_rate": 6.153553667607641e-06, "loss": 3.984, "step": 17129 }, { "epoch": 5.706254684767219, "grad_norm": 0.77734375, "learning_rate": 6.152757387752402e-06, "loss": 3.9869, "step": 17130 }, { "epoch": 5.706587823769468, "grad_norm": 0.73046875, "learning_rate": 6.151961123589624e-06, "loss": 3.9948, "step": 17131 }, { "epoch": 5.706920962771717, "grad_norm": 0.7734375, "learning_rate": 6.151164875128577e-06, "loss": 3.9459, "step": 17132 }, { "epoch": 5.707254101773965, "grad_norm": 0.7421875, "learning_rate": 6.1503686423785415e-06, "loss": 4.0817, "step": 17133 }, { "epoch": 5.707587240776213, "grad_norm": 0.75, "learning_rate": 6.149572425348786e-06, "loss": 4.0032, "step": 17134 }, { "epoch": 5.707920379778463, "grad_norm": 0.7421875, "learning_rate": 6.148776224048585e-06, "loss": 4.0212, "step": 17135 }, { "epoch": 5.708253518780712, "grad_norm": 0.76953125, "learning_rate": 6.147980038487215e-06, "loss": 4.0029, "step": 17136 }, { "epoch": 5.70858665778296, "grad_norm": 0.765625, "learning_rate": 6.147183868673949e-06, "loss": 3.9728, "step": 17137 }, { "epoch": 5.708919796785208, "grad_norm": 0.7578125, "learning_rate": 6.146387714618061e-06, "loss": 4.0604, "step": 17138 }, { "epoch": 5.7092529357874575, "grad_norm": 0.7734375, "learning_rate": 6.145591576328823e-06, "loss": 3.9599, "step": 17139 }, { "epoch": 5.709586074789706, "grad_norm": 0.76953125, "learning_rate": 6.144795453815512e-06, "loss": 4.0315, "step": 17140 }, { "epoch": 5.709919213791955, "grad_norm": 0.7109375, "learning_rate": 6.143999347087395e-06, "loss": 4.0357, "step": 17141 }, { "epoch": 5.710252352794203, "grad_norm": 0.73828125, "learning_rate": 6.143203256153756e-06, "loss": 3.9976, "step": 17142 }, { "epoch": 5.7105854917964525, "grad_norm": 0.81640625, "learning_rate": 6.142407181023854e-06, "loss": 4.0185, "step": 17143 }, { "epoch": 5.710918630798701, "grad_norm": 0.7265625, "learning_rate": 6.141611121706969e-06, "loss": 3.9688, "step": 17144 }, { "epoch": 5.711251769800949, "grad_norm": 0.75, "learning_rate": 6.140815078212371e-06, "loss": 3.9528, "step": 17145 }, { "epoch": 5.711584908803198, "grad_norm": 0.7421875, "learning_rate": 6.1400190505493355e-06, "loss": 3.9595, "step": 17146 }, { "epoch": 5.7119180478054465, "grad_norm": 0.765625, "learning_rate": 6.13922303872713e-06, "loss": 3.9737, "step": 17147 }, { "epoch": 5.712251186807696, "grad_norm": 0.7890625, "learning_rate": 6.138427042755031e-06, "loss": 3.9406, "step": 17148 }, { "epoch": 5.712584325809944, "grad_norm": 0.73828125, "learning_rate": 6.137631062642305e-06, "loss": 3.9881, "step": 17149 }, { "epoch": 5.712917464812193, "grad_norm": 0.75390625, "learning_rate": 6.136835098398229e-06, "loss": 4.0591, "step": 17150 }, { "epoch": 5.7132506038144415, "grad_norm": 0.80078125, "learning_rate": 6.136039150032072e-06, "loss": 3.9711, "step": 17151 }, { "epoch": 5.71358374281669, "grad_norm": 0.7265625, "learning_rate": 6.135243217553103e-06, "loss": 3.9799, "step": 17152 }, { "epoch": 5.713916881818939, "grad_norm": 0.7265625, "learning_rate": 6.134447300970595e-06, "loss": 4.0369, "step": 17153 }, { "epoch": 5.714250020821187, "grad_norm": 0.73046875, "learning_rate": 6.133651400293816e-06, "loss": 3.9673, "step": 17154 }, { "epoch": 5.7145831598234365, "grad_norm": 0.7421875, "learning_rate": 6.132855515532041e-06, "loss": 4.0178, "step": 17155 }, { "epoch": 5.714916298825685, "grad_norm": 0.82421875, "learning_rate": 6.132059646694536e-06, "loss": 4.0393, "step": 17156 }, { "epoch": 5.715249437827934, "grad_norm": 0.7890625, "learning_rate": 6.131263793790576e-06, "loss": 3.9483, "step": 17157 }, { "epoch": 5.715582576830182, "grad_norm": 0.7265625, "learning_rate": 6.130467956829425e-06, "loss": 3.9289, "step": 17158 }, { "epoch": 5.715915715832431, "grad_norm": 0.78125, "learning_rate": 6.129672135820359e-06, "loss": 3.9731, "step": 17159 }, { "epoch": 5.71624885483468, "grad_norm": 0.77734375, "learning_rate": 6.128876330772644e-06, "loss": 3.9848, "step": 17160 }, { "epoch": 5.716581993836929, "grad_norm": 0.7734375, "learning_rate": 6.128080541695548e-06, "loss": 4.1115, "step": 17161 }, { "epoch": 5.716915132839177, "grad_norm": 0.7890625, "learning_rate": 6.1272847685983445e-06, "loss": 4.002, "step": 17162 }, { "epoch": 5.7172482718414255, "grad_norm": 0.76953125, "learning_rate": 6.126489011490298e-06, "loss": 3.9625, "step": 17163 }, { "epoch": 5.717581410843675, "grad_norm": 0.73046875, "learning_rate": 6.12569327038068e-06, "loss": 4.0518, "step": 17164 }, { "epoch": 5.717914549845923, "grad_norm": 0.7578125, "learning_rate": 6.124897545278758e-06, "loss": 3.96, "step": 17165 }, { "epoch": 5.718247688848172, "grad_norm": 0.76953125, "learning_rate": 6.124101836193803e-06, "loss": 4.0033, "step": 17166 }, { "epoch": 5.71858082785042, "grad_norm": 0.8515625, "learning_rate": 6.123306143135078e-06, "loss": 3.9655, "step": 17167 }, { "epoch": 5.71891396685267, "grad_norm": 0.765625, "learning_rate": 6.122510466111858e-06, "loss": 3.9989, "step": 17168 }, { "epoch": 5.719247105854918, "grad_norm": 0.7578125, "learning_rate": 6.121714805133409e-06, "loss": 3.8667, "step": 17169 }, { "epoch": 5.719580244857166, "grad_norm": 0.75, "learning_rate": 6.120919160208994e-06, "loss": 3.9925, "step": 17170 }, { "epoch": 5.719913383859415, "grad_norm": 0.78515625, "learning_rate": 6.120123531347884e-06, "loss": 3.9346, "step": 17171 }, { "epoch": 5.720246522861664, "grad_norm": 0.73828125, "learning_rate": 6.119327918559346e-06, "loss": 3.9848, "step": 17172 }, { "epoch": 5.720579661863913, "grad_norm": 0.8046875, "learning_rate": 6.118532321852648e-06, "loss": 3.979, "step": 17173 }, { "epoch": 5.720912800866161, "grad_norm": 0.74609375, "learning_rate": 6.117736741237054e-06, "loss": 4.0388, "step": 17174 }, { "epoch": 5.72124593986841, "grad_norm": 0.77734375, "learning_rate": 6.116941176721835e-06, "loss": 4.0445, "step": 17175 }, { "epoch": 5.721579078870659, "grad_norm": 0.796875, "learning_rate": 6.116145628316255e-06, "loss": 3.9366, "step": 17176 }, { "epoch": 5.721912217872908, "grad_norm": 0.80859375, "learning_rate": 6.115350096029581e-06, "loss": 3.9747, "step": 17177 }, { "epoch": 5.722245356875156, "grad_norm": 0.75, "learning_rate": 6.114554579871082e-06, "loss": 3.9263, "step": 17178 }, { "epoch": 5.722578495877405, "grad_norm": 0.74609375, "learning_rate": 6.113759079850018e-06, "loss": 3.9709, "step": 17179 }, { "epoch": 5.722911634879654, "grad_norm": 0.78515625, "learning_rate": 6.11296359597566e-06, "loss": 3.9584, "step": 17180 }, { "epoch": 5.723244773881902, "grad_norm": 0.74609375, "learning_rate": 6.112168128257269e-06, "loss": 4.0332, "step": 17181 }, { "epoch": 5.723577912884151, "grad_norm": 0.79296875, "learning_rate": 6.111372676704115e-06, "loss": 3.9593, "step": 17182 }, { "epoch": 5.723911051886399, "grad_norm": 0.73828125, "learning_rate": 6.110577241325461e-06, "loss": 3.9933, "step": 17183 }, { "epoch": 5.7242441908886486, "grad_norm": 0.765625, "learning_rate": 6.109781822130574e-06, "loss": 3.8804, "step": 17184 }, { "epoch": 5.724577329890897, "grad_norm": 0.75390625, "learning_rate": 6.108986419128715e-06, "loss": 4.0007, "step": 17185 }, { "epoch": 5.724910468893146, "grad_norm": 0.74609375, "learning_rate": 6.108191032329154e-06, "loss": 4.021, "step": 17186 }, { "epoch": 5.725243607895394, "grad_norm": 0.83203125, "learning_rate": 6.107395661741154e-06, "loss": 4.0205, "step": 17187 }, { "epoch": 5.725576746897643, "grad_norm": 0.7734375, "learning_rate": 6.1066003073739765e-06, "loss": 4.0105, "step": 17188 }, { "epoch": 5.725909885899892, "grad_norm": 0.765625, "learning_rate": 6.105804969236886e-06, "loss": 4.0075, "step": 17189 }, { "epoch": 5.72624302490214, "grad_norm": 0.80078125, "learning_rate": 6.1050096473391485e-06, "loss": 3.9821, "step": 17190 }, { "epoch": 5.726576163904389, "grad_norm": 0.77734375, "learning_rate": 6.104214341690027e-06, "loss": 3.979, "step": 17191 }, { "epoch": 5.726909302906638, "grad_norm": 0.74609375, "learning_rate": 6.103419052298786e-06, "loss": 4.0115, "step": 17192 }, { "epoch": 5.727242441908887, "grad_norm": 0.74609375, "learning_rate": 6.102623779174686e-06, "loss": 4.0045, "step": 17193 }, { "epoch": 5.727575580911135, "grad_norm": 0.74609375, "learning_rate": 6.101828522326994e-06, "loss": 3.9771, "step": 17194 }, { "epoch": 5.727908719913384, "grad_norm": 0.73828125, "learning_rate": 6.10103328176497e-06, "loss": 4.0664, "step": 17195 }, { "epoch": 5.7282418589156325, "grad_norm": 0.75, "learning_rate": 6.100238057497882e-06, "loss": 3.9834, "step": 17196 }, { "epoch": 5.728574997917882, "grad_norm": 0.7734375, "learning_rate": 6.0994428495349855e-06, "loss": 4.0327, "step": 17197 }, { "epoch": 5.72890813692013, "grad_norm": 0.7421875, "learning_rate": 6.098647657885545e-06, "loss": 4.0315, "step": 17198 }, { "epoch": 5.729241275922378, "grad_norm": 0.73046875, "learning_rate": 6.0978524825588265e-06, "loss": 4.0357, "step": 17199 }, { "epoch": 5.7295744149246275, "grad_norm": 0.765625, "learning_rate": 6.097057323564087e-06, "loss": 3.9827, "step": 17200 }, { "epoch": 5.729907553926876, "grad_norm": 0.76953125, "learning_rate": 6.0962621809105935e-06, "loss": 4.0795, "step": 17201 }, { "epoch": 5.730240692929125, "grad_norm": 0.7890625, "learning_rate": 6.095467054607603e-06, "loss": 4.0151, "step": 17202 }, { "epoch": 5.730573831931373, "grad_norm": 0.80859375, "learning_rate": 6.09467194466438e-06, "loss": 3.991, "step": 17203 }, { "epoch": 5.7309069709336224, "grad_norm": 0.7578125, "learning_rate": 6.093876851090185e-06, "loss": 3.968, "step": 17204 }, { "epoch": 5.731240109935871, "grad_norm": 0.7578125, "learning_rate": 6.0930817738942804e-06, "loss": 4.0076, "step": 17205 }, { "epoch": 5.731573248938119, "grad_norm": 0.78515625, "learning_rate": 6.092286713085925e-06, "loss": 4.0037, "step": 17206 }, { "epoch": 5.731906387940368, "grad_norm": 0.77734375, "learning_rate": 6.091491668674379e-06, "loss": 3.9841, "step": 17207 }, { "epoch": 5.7322395269426165, "grad_norm": 0.734375, "learning_rate": 6.090696640668905e-06, "loss": 3.9454, "step": 17208 }, { "epoch": 5.732572665944866, "grad_norm": 0.7890625, "learning_rate": 6.089901629078762e-06, "loss": 3.9838, "step": 17209 }, { "epoch": 5.732905804947114, "grad_norm": 0.7421875, "learning_rate": 6.089106633913212e-06, "loss": 3.9766, "step": 17210 }, { "epoch": 5.733238943949363, "grad_norm": 0.74609375, "learning_rate": 6.088311655181511e-06, "loss": 4.0235, "step": 17211 }, { "epoch": 5.7335720829516115, "grad_norm": 0.8046875, "learning_rate": 6.087516692892925e-06, "loss": 4.0076, "step": 17212 }, { "epoch": 5.73390522195386, "grad_norm": 0.79296875, "learning_rate": 6.086721747056707e-06, "loss": 4.0175, "step": 17213 }, { "epoch": 5.734238360956109, "grad_norm": 0.796875, "learning_rate": 6.085926817682124e-06, "loss": 4.023, "step": 17214 }, { "epoch": 5.734571499958357, "grad_norm": 0.73828125, "learning_rate": 6.085131904778428e-06, "loss": 3.9692, "step": 17215 }, { "epoch": 5.734904638960606, "grad_norm": 0.8046875, "learning_rate": 6.084337008354879e-06, "loss": 3.9951, "step": 17216 }, { "epoch": 5.735237777962855, "grad_norm": 0.71875, "learning_rate": 6.08354212842074e-06, "loss": 3.9585, "step": 17217 }, { "epoch": 5.735570916965104, "grad_norm": 0.75390625, "learning_rate": 6.082747264985265e-06, "loss": 4.0063, "step": 17218 }, { "epoch": 5.735904055967352, "grad_norm": 0.75, "learning_rate": 6.081952418057716e-06, "loss": 3.9911, "step": 17219 }, { "epoch": 5.736237194969601, "grad_norm": 0.7578125, "learning_rate": 6.081157587647349e-06, "loss": 4.0398, "step": 17220 }, { "epoch": 5.73657033397185, "grad_norm": 0.8046875, "learning_rate": 6.080362773763424e-06, "loss": 4.0068, "step": 17221 }, { "epoch": 5.736903472974099, "grad_norm": 0.796875, "learning_rate": 6.079567976415196e-06, "loss": 3.9885, "step": 17222 }, { "epoch": 5.737236611976347, "grad_norm": 0.77734375, "learning_rate": 6.078773195611928e-06, "loss": 3.8699, "step": 17223 }, { "epoch": 5.7375697509785955, "grad_norm": 0.7734375, "learning_rate": 6.077978431362873e-06, "loss": 3.9389, "step": 17224 }, { "epoch": 5.737902889980845, "grad_norm": 0.7890625, "learning_rate": 6.077183683677285e-06, "loss": 4.0109, "step": 17225 }, { "epoch": 5.738236028983093, "grad_norm": 0.78515625, "learning_rate": 6.076388952564429e-06, "loss": 4.028, "step": 17226 }, { "epoch": 5.738569167985342, "grad_norm": 0.734375, "learning_rate": 6.0755942380335564e-06, "loss": 3.9153, "step": 17227 }, { "epoch": 5.73890230698759, "grad_norm": 0.703125, "learning_rate": 6.074799540093927e-06, "loss": 4.0084, "step": 17228 }, { "epoch": 5.73923544598984, "grad_norm": 0.77734375, "learning_rate": 6.074004858754794e-06, "loss": 3.985, "step": 17229 }, { "epoch": 5.739568584992088, "grad_norm": 0.7578125, "learning_rate": 6.073210194025419e-06, "loss": 3.9735, "step": 17230 }, { "epoch": 5.739901723994336, "grad_norm": 0.73828125, "learning_rate": 6.072415545915051e-06, "loss": 4.0034, "step": 17231 }, { "epoch": 5.740234862996585, "grad_norm": 0.78125, "learning_rate": 6.071620914432956e-06, "loss": 3.9621, "step": 17232 }, { "epoch": 5.740568001998834, "grad_norm": 0.73828125, "learning_rate": 6.070826299588377e-06, "loss": 3.9398, "step": 17233 }, { "epoch": 5.740901141001083, "grad_norm": 0.78125, "learning_rate": 6.07003170139058e-06, "loss": 3.9844, "step": 17234 }, { "epoch": 5.741234280003331, "grad_norm": 0.765625, "learning_rate": 6.069237119848813e-06, "loss": 3.9861, "step": 17235 }, { "epoch": 5.74156741900558, "grad_norm": 0.765625, "learning_rate": 6.0684425549723364e-06, "loss": 3.9703, "step": 17236 }, { "epoch": 5.741900558007829, "grad_norm": 0.734375, "learning_rate": 6.067648006770403e-06, "loss": 4.048, "step": 17237 }, { "epoch": 5.742233697010078, "grad_norm": 0.7109375, "learning_rate": 6.066853475252268e-06, "loss": 4.0214, "step": 17238 }, { "epoch": 5.742566836012326, "grad_norm": 0.78125, "learning_rate": 6.066058960427185e-06, "loss": 4.1241, "step": 17239 }, { "epoch": 5.742899975014575, "grad_norm": 0.78125, "learning_rate": 6.0652644623044095e-06, "loss": 3.9862, "step": 17240 }, { "epoch": 5.743233114016824, "grad_norm": 0.76171875, "learning_rate": 6.064469980893198e-06, "loss": 3.8939, "step": 17241 }, { "epoch": 5.743566253019072, "grad_norm": 0.77734375, "learning_rate": 6.063675516202798e-06, "loss": 4.0431, "step": 17242 }, { "epoch": 5.743899392021321, "grad_norm": 0.734375, "learning_rate": 6.06288106824247e-06, "loss": 4.0056, "step": 17243 }, { "epoch": 5.744232531023569, "grad_norm": 0.75, "learning_rate": 6.0620866370214635e-06, "loss": 3.9796, "step": 17244 }, { "epoch": 5.7445656700258185, "grad_norm": 0.77734375, "learning_rate": 6.061292222549033e-06, "loss": 3.8962, "step": 17245 }, { "epoch": 5.744898809028067, "grad_norm": 0.7734375, "learning_rate": 6.060497824834431e-06, "loss": 3.8829, "step": 17246 }, { "epoch": 5.745231948030316, "grad_norm": 0.77734375, "learning_rate": 6.0597034438869135e-06, "loss": 3.9449, "step": 17247 }, { "epoch": 5.745565087032564, "grad_norm": 0.76171875, "learning_rate": 6.0589090797157294e-06, "loss": 4.0384, "step": 17248 }, { "epoch": 5.745898226034813, "grad_norm": 0.7890625, "learning_rate": 6.058114732330134e-06, "loss": 4.0333, "step": 17249 }, { "epoch": 5.746231365037062, "grad_norm": 0.734375, "learning_rate": 6.05732040173938e-06, "loss": 3.9841, "step": 17250 }, { "epoch": 5.74656450403931, "grad_norm": 0.7421875, "learning_rate": 6.056526087952717e-06, "loss": 3.975, "step": 17251 }, { "epoch": 5.746897643041559, "grad_norm": 0.7734375, "learning_rate": 6.0557317909793994e-06, "loss": 3.9587, "step": 17252 }, { "epoch": 5.7472307820438076, "grad_norm": 0.76953125, "learning_rate": 6.054937510828676e-06, "loss": 3.9041, "step": 17253 }, { "epoch": 5.747563921046057, "grad_norm": 0.69921875, "learning_rate": 6.054143247509804e-06, "loss": 3.9534, "step": 17254 }, { "epoch": 5.747897060048305, "grad_norm": 0.76953125, "learning_rate": 6.0533490010320275e-06, "loss": 3.9223, "step": 17255 }, { "epoch": 5.748230199050554, "grad_norm": 0.765625, "learning_rate": 6.0525547714046046e-06, "loss": 3.9814, "step": 17256 }, { "epoch": 5.7485633380528025, "grad_norm": 0.7421875, "learning_rate": 6.051760558636781e-06, "loss": 4.0182, "step": 17257 }, { "epoch": 5.748896477055052, "grad_norm": 0.765625, "learning_rate": 6.050966362737812e-06, "loss": 4.0046, "step": 17258 }, { "epoch": 5.7492296160573, "grad_norm": 0.74609375, "learning_rate": 6.050172183716949e-06, "loss": 3.9512, "step": 17259 }, { "epoch": 5.749562755059548, "grad_norm": 0.7578125, "learning_rate": 6.049378021583435e-06, "loss": 3.982, "step": 17260 }, { "epoch": 5.7498958940617975, "grad_norm": 0.7578125, "learning_rate": 6.0485838763465275e-06, "loss": 4.0144, "step": 17261 }, { "epoch": 5.750229033064046, "grad_norm": 0.74609375, "learning_rate": 6.0477897480154715e-06, "loss": 3.9404, "step": 17262 }, { "epoch": 5.750562172066295, "grad_norm": 0.7265625, "learning_rate": 6.046995636599522e-06, "loss": 3.9889, "step": 17263 }, { "epoch": 5.750895311068543, "grad_norm": 0.78125, "learning_rate": 6.046201542107924e-06, "loss": 3.957, "step": 17264 }, { "epoch": 5.751228450070792, "grad_norm": 0.78125, "learning_rate": 6.045407464549932e-06, "loss": 4.0927, "step": 17265 }, { "epoch": 5.751561589073041, "grad_norm": 0.73828125, "learning_rate": 6.044613403934789e-06, "loss": 3.9836, "step": 17266 }, { "epoch": 5.751894728075289, "grad_norm": 0.765625, "learning_rate": 6.04381936027175e-06, "loss": 4.0142, "step": 17267 }, { "epoch": 5.752227867077538, "grad_norm": 0.78125, "learning_rate": 6.043025333570062e-06, "loss": 4.0217, "step": 17268 }, { "epoch": 5.7525610060797865, "grad_norm": 0.74609375, "learning_rate": 6.0422313238389715e-06, "loss": 4.0034, "step": 17269 }, { "epoch": 5.752894145082036, "grad_norm": 0.75, "learning_rate": 6.04143733108773e-06, "loss": 4.0979, "step": 17270 }, { "epoch": 5.753227284084284, "grad_norm": 0.76953125, "learning_rate": 6.040643355325581e-06, "loss": 3.9909, "step": 17271 }, { "epoch": 5.753560423086533, "grad_norm": 0.7578125, "learning_rate": 6.039849396561779e-06, "loss": 3.9855, "step": 17272 }, { "epoch": 5.753893562088781, "grad_norm": 0.80078125, "learning_rate": 6.039055454805567e-06, "loss": 3.9912, "step": 17273 }, { "epoch": 5.75422670109103, "grad_norm": 0.7890625, "learning_rate": 6.038261530066196e-06, "loss": 3.9966, "step": 17274 }, { "epoch": 5.754559840093279, "grad_norm": 0.80078125, "learning_rate": 6.03746762235291e-06, "loss": 3.9786, "step": 17275 }, { "epoch": 5.754892979095528, "grad_norm": 0.7734375, "learning_rate": 6.036673731674961e-06, "loss": 4.0399, "step": 17276 }, { "epoch": 5.755226118097776, "grad_norm": 0.7578125, "learning_rate": 6.035879858041595e-06, "loss": 3.9954, "step": 17277 }, { "epoch": 5.755559257100025, "grad_norm": 0.734375, "learning_rate": 6.035086001462055e-06, "loss": 3.9895, "step": 17278 }, { "epoch": 5.755892396102274, "grad_norm": 0.75390625, "learning_rate": 6.0342921619455884e-06, "loss": 4.0424, "step": 17279 }, { "epoch": 5.756225535104522, "grad_norm": 0.75, "learning_rate": 6.033498339501446e-06, "loss": 4.0378, "step": 17280 }, { "epoch": 5.756558674106771, "grad_norm": 0.8203125, "learning_rate": 6.032704534138868e-06, "loss": 3.967, "step": 17281 }, { "epoch": 5.75689181310902, "grad_norm": 0.80078125, "learning_rate": 6.031910745867108e-06, "loss": 3.9734, "step": 17282 }, { "epoch": 5.757224952111269, "grad_norm": 0.79296875, "learning_rate": 6.031116974695404e-06, "loss": 3.9616, "step": 17283 }, { "epoch": 5.757558091113517, "grad_norm": 0.78515625, "learning_rate": 6.0303232206330085e-06, "loss": 4.0265, "step": 17284 }, { "epoch": 5.757891230115765, "grad_norm": 0.7578125, "learning_rate": 6.029529483689163e-06, "loss": 3.9758, "step": 17285 }, { "epoch": 5.758224369118015, "grad_norm": 0.75390625, "learning_rate": 6.028735763873116e-06, "loss": 3.9421, "step": 17286 }, { "epoch": 5.758557508120263, "grad_norm": 0.734375, "learning_rate": 6.027942061194111e-06, "loss": 3.9997, "step": 17287 }, { "epoch": 5.758890647122512, "grad_norm": 0.74609375, "learning_rate": 6.027148375661389e-06, "loss": 3.9955, "step": 17288 }, { "epoch": 5.75922378612476, "grad_norm": 0.75390625, "learning_rate": 6.0263547072842005e-06, "loss": 3.9474, "step": 17289 }, { "epoch": 5.75955692512701, "grad_norm": 0.7578125, "learning_rate": 6.025561056071786e-06, "loss": 3.9513, "step": 17290 }, { "epoch": 5.759890064129258, "grad_norm": 0.76171875, "learning_rate": 6.0247674220333935e-06, "loss": 3.9623, "step": 17291 }, { "epoch": 5.760223203131506, "grad_norm": 0.75390625, "learning_rate": 6.023973805178264e-06, "loss": 4.0741, "step": 17292 }, { "epoch": 5.760556342133755, "grad_norm": 0.75390625, "learning_rate": 6.023180205515643e-06, "loss": 3.9536, "step": 17293 }, { "epoch": 5.760889481136004, "grad_norm": 0.80859375, "learning_rate": 6.022386623054772e-06, "loss": 3.9751, "step": 17294 }, { "epoch": 5.761222620138253, "grad_norm": 0.7578125, "learning_rate": 6.021593057804901e-06, "loss": 3.946, "step": 17295 }, { "epoch": 5.761555759140501, "grad_norm": 0.734375, "learning_rate": 6.020799509775267e-06, "loss": 4.0246, "step": 17296 }, { "epoch": 5.76188889814275, "grad_norm": 0.7734375, "learning_rate": 6.020005978975112e-06, "loss": 3.9735, "step": 17297 }, { "epoch": 5.762222037144999, "grad_norm": 0.765625, "learning_rate": 6.019212465413685e-06, "loss": 4.0039, "step": 17298 }, { "epoch": 5.762555176147248, "grad_norm": 0.75390625, "learning_rate": 6.018418969100221e-06, "loss": 3.9683, "step": 17299 }, { "epoch": 5.762888315149496, "grad_norm": 0.78125, "learning_rate": 6.017625490043971e-06, "loss": 4.0135, "step": 17300 }, { "epoch": 5.763221454151745, "grad_norm": 0.7578125, "learning_rate": 6.01683202825417e-06, "loss": 4.0013, "step": 17301 }, { "epoch": 5.7635545931539935, "grad_norm": 0.7421875, "learning_rate": 6.016038583740067e-06, "loss": 3.9834, "step": 17302 }, { "epoch": 5.763887732156242, "grad_norm": 0.7578125, "learning_rate": 6.015245156510897e-06, "loss": 4.0084, "step": 17303 }, { "epoch": 5.764220871158491, "grad_norm": 0.765625, "learning_rate": 6.0144517465759086e-06, "loss": 3.97, "step": 17304 }, { "epoch": 5.764554010160739, "grad_norm": 0.73828125, "learning_rate": 6.013658353944338e-06, "loss": 4.0392, "step": 17305 }, { "epoch": 5.7648871491629885, "grad_norm": 0.75390625, "learning_rate": 6.0128649786254265e-06, "loss": 4.0213, "step": 17306 }, { "epoch": 5.765220288165237, "grad_norm": 0.74609375, "learning_rate": 6.0120716206284175e-06, "loss": 4.0232, "step": 17307 }, { "epoch": 5.765553427167486, "grad_norm": 0.7421875, "learning_rate": 6.011278279962551e-06, "loss": 3.9616, "step": 17308 }, { "epoch": 5.765886566169734, "grad_norm": 0.74609375, "learning_rate": 6.010484956637069e-06, "loss": 3.9544, "step": 17309 }, { "epoch": 5.766219705171983, "grad_norm": 0.7578125, "learning_rate": 6.00969165066121e-06, "loss": 3.9214, "step": 17310 }, { "epoch": 5.766552844174232, "grad_norm": 0.6953125, "learning_rate": 6.008898362044216e-06, "loss": 4.0302, "step": 17311 }, { "epoch": 5.76688598317648, "grad_norm": 0.765625, "learning_rate": 6.0081050907953256e-06, "loss": 3.9804, "step": 17312 }, { "epoch": 5.767219122178729, "grad_norm": 0.77734375, "learning_rate": 6.007311836923783e-06, "loss": 3.9779, "step": 17313 }, { "epoch": 5.7675522611809775, "grad_norm": 0.77734375, "learning_rate": 6.006518600438821e-06, "loss": 3.9545, "step": 17314 }, { "epoch": 5.767885400183227, "grad_norm": 0.8203125, "learning_rate": 6.005725381349683e-06, "loss": 3.9812, "step": 17315 }, { "epoch": 5.768218539185475, "grad_norm": 0.78125, "learning_rate": 6.004932179665608e-06, "loss": 4.0545, "step": 17316 }, { "epoch": 5.768551678187724, "grad_norm": 0.734375, "learning_rate": 6.004138995395833e-06, "loss": 3.9568, "step": 17317 }, { "epoch": 5.7688848171899725, "grad_norm": 0.7421875, "learning_rate": 6.0033458285496e-06, "loss": 3.9669, "step": 17318 }, { "epoch": 5.769217956192222, "grad_norm": 0.765625, "learning_rate": 6.002552679136146e-06, "loss": 3.9526, "step": 17319 }, { "epoch": 5.76955109519447, "grad_norm": 0.71484375, "learning_rate": 6.00175954716471e-06, "loss": 4.0411, "step": 17320 }, { "epoch": 5.769884234196718, "grad_norm": 0.78125, "learning_rate": 6.00096643264453e-06, "loss": 3.9918, "step": 17321 }, { "epoch": 5.770217373198967, "grad_norm": 0.7578125, "learning_rate": 6.000173335584848e-06, "loss": 3.9684, "step": 17322 }, { "epoch": 5.770550512201216, "grad_norm": 0.78515625, "learning_rate": 5.999380255994894e-06, "loss": 3.9862, "step": 17323 }, { "epoch": 5.770883651203465, "grad_norm": 0.78125, "learning_rate": 5.99858719388391e-06, "loss": 4.007, "step": 17324 }, { "epoch": 5.771216790205713, "grad_norm": 0.7578125, "learning_rate": 5.997794149261133e-06, "loss": 4.0143, "step": 17325 }, { "epoch": 5.771549929207962, "grad_norm": 0.7734375, "learning_rate": 5.9970011221358005e-06, "loss": 3.9555, "step": 17326 }, { "epoch": 5.771883068210211, "grad_norm": 0.7421875, "learning_rate": 5.996208112517149e-06, "loss": 4.0121, "step": 17327 }, { "epoch": 5.772216207212459, "grad_norm": 0.76171875, "learning_rate": 5.995415120414418e-06, "loss": 3.9362, "step": 17328 }, { "epoch": 5.772549346214708, "grad_norm": 0.77734375, "learning_rate": 5.994622145836838e-06, "loss": 3.9599, "step": 17329 }, { "epoch": 5.7728824852169565, "grad_norm": 0.7890625, "learning_rate": 5.993829188793653e-06, "loss": 4.0269, "step": 17330 }, { "epoch": 5.773215624219206, "grad_norm": 0.76171875, "learning_rate": 5.993036249294096e-06, "loss": 3.9741, "step": 17331 }, { "epoch": 5.773548763221454, "grad_norm": 0.77734375, "learning_rate": 5.9922433273474e-06, "loss": 4.024, "step": 17332 }, { "epoch": 5.773881902223703, "grad_norm": 0.7421875, "learning_rate": 5.991450422962805e-06, "loss": 3.9115, "step": 17333 }, { "epoch": 5.774215041225951, "grad_norm": 0.7578125, "learning_rate": 5.990657536149544e-06, "loss": 4.0157, "step": 17334 }, { "epoch": 5.774548180228201, "grad_norm": 0.7578125, "learning_rate": 5.989864666916854e-06, "loss": 3.9249, "step": 17335 }, { "epoch": 5.774881319230449, "grad_norm": 0.76953125, "learning_rate": 5.9890718152739685e-06, "loss": 4.0441, "step": 17336 }, { "epoch": 5.775214458232698, "grad_norm": 0.83203125, "learning_rate": 5.988278981230126e-06, "loss": 4.0041, "step": 17337 }, { "epoch": 5.775547597234946, "grad_norm": 0.73046875, "learning_rate": 5.987486164794557e-06, "loss": 3.9674, "step": 17338 }, { "epoch": 5.775880736237195, "grad_norm": 0.79296875, "learning_rate": 5.986693365976499e-06, "loss": 3.9669, "step": 17339 }, { "epoch": 5.776213875239444, "grad_norm": 0.765625, "learning_rate": 5.985900584785187e-06, "loss": 4.0777, "step": 17340 }, { "epoch": 5.776547014241692, "grad_norm": 0.8203125, "learning_rate": 5.985107821229851e-06, "loss": 4.0207, "step": 17341 }, { "epoch": 5.776880153243941, "grad_norm": 0.75390625, "learning_rate": 5.984315075319729e-06, "loss": 3.9873, "step": 17342 }, { "epoch": 5.77721329224619, "grad_norm": 0.75, "learning_rate": 5.983522347064052e-06, "loss": 3.9859, "step": 17343 }, { "epoch": 5.777546431248439, "grad_norm": 0.78125, "learning_rate": 5.982729636472057e-06, "loss": 3.9651, "step": 17344 }, { "epoch": 5.777879570250687, "grad_norm": 0.734375, "learning_rate": 5.981936943552973e-06, "loss": 4.0786, "step": 17345 }, { "epoch": 5.778212709252935, "grad_norm": 0.8125, "learning_rate": 5.981144268316038e-06, "loss": 3.9801, "step": 17346 }, { "epoch": 5.778545848255185, "grad_norm": 0.734375, "learning_rate": 5.98035161077048e-06, "loss": 3.9978, "step": 17347 }, { "epoch": 5.778878987257433, "grad_norm": 0.74609375, "learning_rate": 5.9795589709255355e-06, "loss": 4.0135, "step": 17348 }, { "epoch": 5.779212126259682, "grad_norm": 0.80859375, "learning_rate": 5.978766348790437e-06, "loss": 3.9543, "step": 17349 }, { "epoch": 5.77954526526193, "grad_norm": 0.7265625, "learning_rate": 5.977973744374415e-06, "loss": 4.0406, "step": 17350 }, { "epoch": 5.7798784042641795, "grad_norm": 0.75, "learning_rate": 5.977181157686701e-06, "loss": 3.9534, "step": 17351 }, { "epoch": 5.780211543266428, "grad_norm": 0.74609375, "learning_rate": 5.976388588736527e-06, "loss": 3.9721, "step": 17352 }, { "epoch": 5.780544682268676, "grad_norm": 0.8046875, "learning_rate": 5.9755960375331276e-06, "loss": 3.9908, "step": 17353 }, { "epoch": 5.780877821270925, "grad_norm": 0.734375, "learning_rate": 5.9748035040857316e-06, "loss": 4.0156, "step": 17354 }, { "epoch": 5.781210960273174, "grad_norm": 0.76953125, "learning_rate": 5.974010988403572e-06, "loss": 4.0416, "step": 17355 }, { "epoch": 5.781544099275423, "grad_norm": 0.75390625, "learning_rate": 5.973218490495878e-06, "loss": 3.9776, "step": 17356 }, { "epoch": 5.781877238277671, "grad_norm": 0.75, "learning_rate": 5.972426010371883e-06, "loss": 4.0234, "step": 17357 }, { "epoch": 5.78221037727992, "grad_norm": 0.8203125, "learning_rate": 5.971633548040818e-06, "loss": 3.9563, "step": 17358 }, { "epoch": 5.782543516282169, "grad_norm": 0.73046875, "learning_rate": 5.970841103511908e-06, "loss": 3.9967, "step": 17359 }, { "epoch": 5.782876655284418, "grad_norm": 0.77734375, "learning_rate": 5.970048676794389e-06, "loss": 3.9441, "step": 17360 }, { "epoch": 5.783209794286666, "grad_norm": 0.75, "learning_rate": 5.969256267897489e-06, "loss": 3.9753, "step": 17361 }, { "epoch": 5.783542933288915, "grad_norm": 0.765625, "learning_rate": 5.9684638768304365e-06, "loss": 4.0058, "step": 17362 }, { "epoch": 5.7838760722911635, "grad_norm": 0.74609375, "learning_rate": 5.9676715036024625e-06, "loss": 3.9045, "step": 17363 }, { "epoch": 5.784209211293412, "grad_norm": 0.74609375, "learning_rate": 5.966879148222798e-06, "loss": 4.057, "step": 17364 }, { "epoch": 5.784542350295661, "grad_norm": 0.73828125, "learning_rate": 5.966086810700669e-06, "loss": 4.025, "step": 17365 }, { "epoch": 5.784875489297909, "grad_norm": 0.78515625, "learning_rate": 5.965294491045308e-06, "loss": 3.9748, "step": 17366 }, { "epoch": 5.7852086283001585, "grad_norm": 0.7578125, "learning_rate": 5.964502189265943e-06, "loss": 3.992, "step": 17367 }, { "epoch": 5.785541767302407, "grad_norm": 0.71484375, "learning_rate": 5.963709905371801e-06, "loss": 3.9853, "step": 17368 }, { "epoch": 5.785874906304656, "grad_norm": 0.73828125, "learning_rate": 5.962917639372109e-06, "loss": 4.001, "step": 17369 }, { "epoch": 5.786208045306904, "grad_norm": 0.76171875, "learning_rate": 5.962125391276098e-06, "loss": 3.9961, "step": 17370 }, { "epoch": 5.7865411843091525, "grad_norm": 0.765625, "learning_rate": 5.961333161092995e-06, "loss": 4.0122, "step": 17371 }, { "epoch": 5.786874323311402, "grad_norm": 0.73046875, "learning_rate": 5.9605409488320284e-06, "loss": 3.9765, "step": 17372 }, { "epoch": 5.78720746231365, "grad_norm": 0.76171875, "learning_rate": 5.959748754502424e-06, "loss": 3.9657, "step": 17373 }, { "epoch": 5.787540601315899, "grad_norm": 0.7578125, "learning_rate": 5.958956578113412e-06, "loss": 3.9727, "step": 17374 }, { "epoch": 5.7878737403181475, "grad_norm": 0.734375, "learning_rate": 5.95816441967422e-06, "loss": 3.9599, "step": 17375 }, { "epoch": 5.788206879320397, "grad_norm": 0.75390625, "learning_rate": 5.957372279194069e-06, "loss": 3.9778, "step": 17376 }, { "epoch": 5.788540018322645, "grad_norm": 0.7109375, "learning_rate": 5.956580156682192e-06, "loss": 4.0077, "step": 17377 }, { "epoch": 5.788873157324894, "grad_norm": 0.73828125, "learning_rate": 5.955788052147811e-06, "loss": 4.0243, "step": 17378 }, { "epoch": 5.7892062963271425, "grad_norm": 0.85546875, "learning_rate": 5.954995965600155e-06, "loss": 3.9793, "step": 17379 }, { "epoch": 5.789539435329392, "grad_norm": 0.79296875, "learning_rate": 5.95420389704845e-06, "loss": 3.9887, "step": 17380 }, { "epoch": 5.78987257433164, "grad_norm": 0.7109375, "learning_rate": 5.953411846501922e-06, "loss": 3.9969, "step": 17381 }, { "epoch": 5.790205713333888, "grad_norm": 0.71484375, "learning_rate": 5.9526198139697944e-06, "loss": 4.0237, "step": 17382 }, { "epoch": 5.790538852336137, "grad_norm": 0.79296875, "learning_rate": 5.951827799461296e-06, "loss": 4.0387, "step": 17383 }, { "epoch": 5.790871991338386, "grad_norm": 0.75, "learning_rate": 5.951035802985652e-06, "loss": 4.0559, "step": 17384 }, { "epoch": 5.791205130340635, "grad_norm": 0.7890625, "learning_rate": 5.950243824552083e-06, "loss": 4.0352, "step": 17385 }, { "epoch": 5.791538269342883, "grad_norm": 0.7734375, "learning_rate": 5.949451864169818e-06, "loss": 4.0168, "step": 17386 }, { "epoch": 5.791871408345132, "grad_norm": 0.70703125, "learning_rate": 5.948659921848078e-06, "loss": 3.9801, "step": 17387 }, { "epoch": 5.792204547347381, "grad_norm": 0.7734375, "learning_rate": 5.947867997596092e-06, "loss": 3.8976, "step": 17388 }, { "epoch": 5.792537686349629, "grad_norm": 0.76953125, "learning_rate": 5.94707609142308e-06, "loss": 4.0151, "step": 17389 }, { "epoch": 5.792870825351878, "grad_norm": 0.73046875, "learning_rate": 5.946284203338271e-06, "loss": 3.984, "step": 17390 }, { "epoch": 5.793203964354126, "grad_norm": 0.75390625, "learning_rate": 5.945492333350883e-06, "loss": 3.9898, "step": 17391 }, { "epoch": 5.793537103356376, "grad_norm": 0.796875, "learning_rate": 5.9447004814701435e-06, "loss": 3.9319, "step": 17392 }, { "epoch": 5.793870242358624, "grad_norm": 0.796875, "learning_rate": 5.9439086477052766e-06, "loss": 3.9905, "step": 17393 }, { "epoch": 5.794203381360873, "grad_norm": 0.80078125, "learning_rate": 5.943116832065499e-06, "loss": 3.9292, "step": 17394 }, { "epoch": 5.794536520363121, "grad_norm": 0.734375, "learning_rate": 5.942325034560042e-06, "loss": 3.9991, "step": 17395 }, { "epoch": 5.794869659365371, "grad_norm": 0.76953125, "learning_rate": 5.941533255198122e-06, "loss": 4.0009, "step": 17396 }, { "epoch": 5.795202798367619, "grad_norm": 0.73046875, "learning_rate": 5.9407414939889645e-06, "loss": 3.9242, "step": 17397 }, { "epoch": 5.795535937369868, "grad_norm": 0.73046875, "learning_rate": 5.939949750941791e-06, "loss": 4.0344, "step": 17398 }, { "epoch": 5.795869076372116, "grad_norm": 0.7578125, "learning_rate": 5.9391580260658244e-06, "loss": 4.0352, "step": 17399 }, { "epoch": 5.796202215374365, "grad_norm": 0.796875, "learning_rate": 5.938366319370285e-06, "loss": 4.0066, "step": 17400 }, { "epoch": 5.796535354376614, "grad_norm": 0.81640625, "learning_rate": 5.9375746308643965e-06, "loss": 3.9788, "step": 17401 }, { "epoch": 5.796868493378862, "grad_norm": 0.7421875, "learning_rate": 5.936782960557382e-06, "loss": 3.9571, "step": 17402 }, { "epoch": 5.797201632381111, "grad_norm": 0.7734375, "learning_rate": 5.935991308458456e-06, "loss": 3.9741, "step": 17403 }, { "epoch": 5.79753477138336, "grad_norm": 0.74609375, "learning_rate": 5.935199674576845e-06, "loss": 3.9783, "step": 17404 }, { "epoch": 5.797867910385609, "grad_norm": 0.75, "learning_rate": 5.934408058921768e-06, "loss": 4.0238, "step": 17405 }, { "epoch": 5.798201049387857, "grad_norm": 0.7421875, "learning_rate": 5.933616461502446e-06, "loss": 4.0623, "step": 17406 }, { "epoch": 5.798534188390105, "grad_norm": 0.7734375, "learning_rate": 5.9328248823280995e-06, "loss": 4.014, "step": 17407 }, { "epoch": 5.7988673273923546, "grad_norm": 0.7578125, "learning_rate": 5.932033321407949e-06, "loss": 3.9984, "step": 17408 }, { "epoch": 5.799200466394603, "grad_norm": 0.734375, "learning_rate": 5.931241778751214e-06, "loss": 3.9405, "step": 17409 }, { "epoch": 5.799533605396852, "grad_norm": 0.76171875, "learning_rate": 5.930450254367115e-06, "loss": 3.9831, "step": 17410 }, { "epoch": 5.7998667443991, "grad_norm": 0.796875, "learning_rate": 5.9296587482648715e-06, "loss": 4.013, "step": 17411 }, { "epoch": 5.8001998834013495, "grad_norm": 0.80859375, "learning_rate": 5.928867260453702e-06, "loss": 3.9838, "step": 17412 }, { "epoch": 5.800533022403598, "grad_norm": 0.75, "learning_rate": 5.9280757909428244e-06, "loss": 3.9469, "step": 17413 }, { "epoch": 5.800866161405846, "grad_norm": 0.7109375, "learning_rate": 5.92728433974146e-06, "loss": 4.0173, "step": 17414 }, { "epoch": 5.801199300408095, "grad_norm": 0.7890625, "learning_rate": 5.926492906858825e-06, "loss": 4.041, "step": 17415 }, { "epoch": 5.801532439410344, "grad_norm": 0.75, "learning_rate": 5.925701492304141e-06, "loss": 4.0669, "step": 17416 }, { "epoch": 5.801865578412593, "grad_norm": 0.75, "learning_rate": 5.9249100960866235e-06, "loss": 3.9099, "step": 17417 }, { "epoch": 5.802198717414841, "grad_norm": 0.76171875, "learning_rate": 5.924118718215492e-06, "loss": 3.9545, "step": 17418 }, { "epoch": 5.80253185641709, "grad_norm": 0.75, "learning_rate": 5.923327358699964e-06, "loss": 4.0108, "step": 17419 }, { "epoch": 5.8028649954193385, "grad_norm": 0.75390625, "learning_rate": 5.92253601754926e-06, "loss": 3.9121, "step": 17420 }, { "epoch": 5.803198134421588, "grad_norm": 0.73046875, "learning_rate": 5.921744694772593e-06, "loss": 4.0237, "step": 17421 }, { "epoch": 5.803531273423836, "grad_norm": 0.79296875, "learning_rate": 5.92095339037918e-06, "loss": 3.9826, "step": 17422 }, { "epoch": 5.803864412426085, "grad_norm": 0.75390625, "learning_rate": 5.920162104378242e-06, "loss": 3.9616, "step": 17423 }, { "epoch": 5.8041975514283335, "grad_norm": 0.7734375, "learning_rate": 5.919370836778991e-06, "loss": 3.9767, "step": 17424 }, { "epoch": 5.804530690430582, "grad_norm": 0.74609375, "learning_rate": 5.918579587590648e-06, "loss": 4.0104, "step": 17425 }, { "epoch": 5.804863829432831, "grad_norm": 0.7734375, "learning_rate": 5.917788356822426e-06, "loss": 3.9712, "step": 17426 }, { "epoch": 5.805196968435079, "grad_norm": 0.76171875, "learning_rate": 5.916997144483545e-06, "loss": 3.9679, "step": 17427 }, { "epoch": 5.8055301074373284, "grad_norm": 0.7421875, "learning_rate": 5.916205950583215e-06, "loss": 3.9921, "step": 17428 }, { "epoch": 5.805863246439577, "grad_norm": 0.76953125, "learning_rate": 5.91541477513066e-06, "loss": 3.9607, "step": 17429 }, { "epoch": 5.806196385441826, "grad_norm": 0.80078125, "learning_rate": 5.914623618135088e-06, "loss": 4.0168, "step": 17430 }, { "epoch": 5.806529524444074, "grad_norm": 0.73828125, "learning_rate": 5.913832479605717e-06, "loss": 3.994, "step": 17431 }, { "epoch": 5.8068626634463225, "grad_norm": 0.734375, "learning_rate": 5.913041359551762e-06, "loss": 4.0282, "step": 17432 }, { "epoch": 5.807195802448572, "grad_norm": 0.75390625, "learning_rate": 5.912250257982437e-06, "loss": 3.9939, "step": 17433 }, { "epoch": 5.80752894145082, "grad_norm": 0.7734375, "learning_rate": 5.911459174906958e-06, "loss": 3.98, "step": 17434 }, { "epoch": 5.807862080453069, "grad_norm": 0.70703125, "learning_rate": 5.910668110334538e-06, "loss": 4.0543, "step": 17435 }, { "epoch": 5.8081952194553175, "grad_norm": 0.7578125, "learning_rate": 5.909877064274394e-06, "loss": 3.9907, "step": 17436 }, { "epoch": 5.808528358457567, "grad_norm": 0.78125, "learning_rate": 5.909086036735737e-06, "loss": 3.9409, "step": 17437 }, { "epoch": 5.808861497459815, "grad_norm": 0.77734375, "learning_rate": 5.908295027727783e-06, "loss": 4.0155, "step": 17438 }, { "epoch": 5.809194636462064, "grad_norm": 0.7734375, "learning_rate": 5.907504037259744e-06, "loss": 3.9472, "step": 17439 }, { "epoch": 5.809527775464312, "grad_norm": 0.734375, "learning_rate": 5.906713065340832e-06, "loss": 4.0615, "step": 17440 }, { "epoch": 5.809860914466562, "grad_norm": 0.77734375, "learning_rate": 5.905922111980262e-06, "loss": 4.0495, "step": 17441 }, { "epoch": 5.81019405346881, "grad_norm": 0.73828125, "learning_rate": 5.905131177187246e-06, "loss": 3.9648, "step": 17442 }, { "epoch": 5.810527192471058, "grad_norm": 0.80078125, "learning_rate": 5.904340260970999e-06, "loss": 3.9782, "step": 17443 }, { "epoch": 5.810860331473307, "grad_norm": 0.75, "learning_rate": 5.90354936334073e-06, "loss": 3.9589, "step": 17444 }, { "epoch": 5.811193470475556, "grad_norm": 0.7578125, "learning_rate": 5.9027584843056554e-06, "loss": 3.9558, "step": 17445 }, { "epoch": 5.811526609477805, "grad_norm": 0.7734375, "learning_rate": 5.901967623874982e-06, "loss": 3.9495, "step": 17446 }, { "epoch": 5.811859748480053, "grad_norm": 0.765625, "learning_rate": 5.901176782057929e-06, "loss": 4.0422, "step": 17447 }, { "epoch": 5.812192887482302, "grad_norm": 0.765625, "learning_rate": 5.9003859588637e-06, "loss": 3.9922, "step": 17448 }, { "epoch": 5.812526026484551, "grad_norm": 0.7890625, "learning_rate": 5.89959515430151e-06, "loss": 3.9497, "step": 17449 }, { "epoch": 5.812859165486799, "grad_norm": 0.74609375, "learning_rate": 5.898804368380571e-06, "loss": 3.9597, "step": 17450 }, { "epoch": 5.813192304489048, "grad_norm": 0.7421875, "learning_rate": 5.89801360111009e-06, "loss": 3.9318, "step": 17451 }, { "epoch": 5.813525443491296, "grad_norm": 0.75390625, "learning_rate": 5.897222852499285e-06, "loss": 3.9584, "step": 17452 }, { "epoch": 5.813858582493546, "grad_norm": 0.7265625, "learning_rate": 5.896432122557358e-06, "loss": 3.9554, "step": 17453 }, { "epoch": 5.814191721495794, "grad_norm": 0.7578125, "learning_rate": 5.895641411293526e-06, "loss": 3.9998, "step": 17454 }, { "epoch": 5.814524860498043, "grad_norm": 0.78125, "learning_rate": 5.894850718716995e-06, "loss": 3.892, "step": 17455 }, { "epoch": 5.814857999500291, "grad_norm": 0.7734375, "learning_rate": 5.8940600448369816e-06, "loss": 4.0122, "step": 17456 }, { "epoch": 5.8151911385025405, "grad_norm": 0.75, "learning_rate": 5.8932693896626845e-06, "loss": 3.9604, "step": 17457 }, { "epoch": 5.815524277504789, "grad_norm": 0.765625, "learning_rate": 5.892478753203321e-06, "loss": 3.926, "step": 17458 }, { "epoch": 5.815857416507038, "grad_norm": 0.7421875, "learning_rate": 5.891688135468096e-06, "loss": 3.9917, "step": 17459 }, { "epoch": 5.816190555509286, "grad_norm": 0.75, "learning_rate": 5.890897536466223e-06, "loss": 3.9881, "step": 17460 }, { "epoch": 5.816523694511535, "grad_norm": 0.8203125, "learning_rate": 5.890106956206906e-06, "loss": 3.9825, "step": 17461 }, { "epoch": 5.816856833513784, "grad_norm": 0.75, "learning_rate": 5.889316394699358e-06, "loss": 4.0346, "step": 17462 }, { "epoch": 5.817189972516032, "grad_norm": 0.69921875, "learning_rate": 5.888525851952784e-06, "loss": 4.0034, "step": 17463 }, { "epoch": 5.817523111518281, "grad_norm": 0.75, "learning_rate": 5.887735327976395e-06, "loss": 4.0711, "step": 17464 }, { "epoch": 5.81785625052053, "grad_norm": 0.765625, "learning_rate": 5.886944822779398e-06, "loss": 3.9016, "step": 17465 }, { "epoch": 5.818189389522779, "grad_norm": 0.76953125, "learning_rate": 5.886154336370997e-06, "loss": 3.9619, "step": 17466 }, { "epoch": 5.818522528525027, "grad_norm": 0.71875, "learning_rate": 5.885363868760405e-06, "loss": 4.03, "step": 17467 }, { "epoch": 5.818855667527275, "grad_norm": 0.79296875, "learning_rate": 5.884573419956824e-06, "loss": 3.9497, "step": 17468 }, { "epoch": 5.8191888065295245, "grad_norm": 0.7734375, "learning_rate": 5.883782989969465e-06, "loss": 3.9158, "step": 17469 }, { "epoch": 5.819521945531773, "grad_norm": 0.7890625, "learning_rate": 5.882992578807533e-06, "loss": 3.9353, "step": 17470 }, { "epoch": 5.819855084534022, "grad_norm": 0.75, "learning_rate": 5.882202186480236e-06, "loss": 3.9703, "step": 17471 }, { "epoch": 5.82018822353627, "grad_norm": 0.76953125, "learning_rate": 5.881411812996777e-06, "loss": 3.9446, "step": 17472 }, { "epoch": 5.8205213625385195, "grad_norm": 0.76171875, "learning_rate": 5.880621458366367e-06, "loss": 4.0187, "step": 17473 }, { "epoch": 5.820854501540768, "grad_norm": 0.78515625, "learning_rate": 5.879831122598211e-06, "loss": 4.0222, "step": 17474 }, { "epoch": 5.821187640543017, "grad_norm": 0.74609375, "learning_rate": 5.87904080570151e-06, "loss": 3.9247, "step": 17475 }, { "epoch": 5.821520779545265, "grad_norm": 0.7421875, "learning_rate": 5.878250507685474e-06, "loss": 3.9332, "step": 17476 }, { "epoch": 5.821853918547514, "grad_norm": 0.79296875, "learning_rate": 5.877460228559305e-06, "loss": 4.0135, "step": 17477 }, { "epoch": 5.822187057549763, "grad_norm": 0.75390625, "learning_rate": 5.8766699683322126e-06, "loss": 3.9696, "step": 17478 }, { "epoch": 5.822520196552011, "grad_norm": 0.7734375, "learning_rate": 5.8758797270133964e-06, "loss": 4.0218, "step": 17479 }, { "epoch": 5.82285333555426, "grad_norm": 0.7265625, "learning_rate": 5.875089504612065e-06, "loss": 3.992, "step": 17480 }, { "epoch": 5.8231864745565085, "grad_norm": 0.7890625, "learning_rate": 5.87429930113742e-06, "loss": 3.9753, "step": 17481 }, { "epoch": 5.823519613558758, "grad_norm": 0.71484375, "learning_rate": 5.873509116598669e-06, "loss": 4.0401, "step": 17482 }, { "epoch": 5.823852752561006, "grad_norm": 0.72265625, "learning_rate": 5.872718951005016e-06, "loss": 3.9728, "step": 17483 }, { "epoch": 5.824185891563255, "grad_norm": 0.76171875, "learning_rate": 5.871928804365658e-06, "loss": 3.9257, "step": 17484 }, { "epoch": 5.8245190305655035, "grad_norm": 0.7578125, "learning_rate": 5.871138676689805e-06, "loss": 4.0743, "step": 17485 }, { "epoch": 5.824852169567752, "grad_norm": 0.7734375, "learning_rate": 5.870348567986656e-06, "loss": 3.9431, "step": 17486 }, { "epoch": 5.825185308570001, "grad_norm": 0.74609375, "learning_rate": 5.8695584782654195e-06, "loss": 4.0553, "step": 17487 }, { "epoch": 5.825518447572249, "grad_norm": 0.7265625, "learning_rate": 5.868768407535292e-06, "loss": 3.9182, "step": 17488 }, { "epoch": 5.825851586574498, "grad_norm": 0.765625, "learning_rate": 5.867978355805482e-06, "loss": 3.9269, "step": 17489 }, { "epoch": 5.826184725576747, "grad_norm": 0.77734375, "learning_rate": 5.867188323085187e-06, "loss": 3.993, "step": 17490 }, { "epoch": 5.826517864578996, "grad_norm": 0.796875, "learning_rate": 5.8663983093836136e-06, "loss": 3.9885, "step": 17491 }, { "epoch": 5.826851003581244, "grad_norm": 0.73828125, "learning_rate": 5.865608314709962e-06, "loss": 3.9251, "step": 17492 }, { "epoch": 5.8271841425834925, "grad_norm": 0.734375, "learning_rate": 5.864818339073432e-06, "loss": 4.0357, "step": 17493 }, { "epoch": 5.827517281585742, "grad_norm": 0.7890625, "learning_rate": 5.864028382483226e-06, "loss": 3.9537, "step": 17494 }, { "epoch": 5.82785042058799, "grad_norm": 0.7890625, "learning_rate": 5.863238444948546e-06, "loss": 3.9688, "step": 17495 }, { "epoch": 5.828183559590239, "grad_norm": 0.75390625, "learning_rate": 5.862448526478594e-06, "loss": 4.0118, "step": 17496 }, { "epoch": 5.828516698592487, "grad_norm": 0.7734375, "learning_rate": 5.861658627082567e-06, "loss": 3.9285, "step": 17497 }, { "epoch": 5.828849837594737, "grad_norm": 0.74609375, "learning_rate": 5.860868746769672e-06, "loss": 3.9924, "step": 17498 }, { "epoch": 5.829182976596985, "grad_norm": 0.73046875, "learning_rate": 5.860078885549103e-06, "loss": 3.9968, "step": 17499 }, { "epoch": 5.829516115599234, "grad_norm": 0.71875, "learning_rate": 5.859289043430065e-06, "loss": 4.0424, "step": 17500 }, { "epoch": 5.829849254601482, "grad_norm": 0.7734375, "learning_rate": 5.858499220421756e-06, "loss": 4.0565, "step": 17501 }, { "epoch": 5.830182393603732, "grad_norm": 0.7421875, "learning_rate": 5.857709416533376e-06, "loss": 3.9995, "step": 17502 }, { "epoch": 5.83051553260598, "grad_norm": 0.73828125, "learning_rate": 5.856919631774121e-06, "loss": 4.0226, "step": 17503 }, { "epoch": 5.830848671608228, "grad_norm": 0.73046875, "learning_rate": 5.8561298661531955e-06, "loss": 4.0328, "step": 17504 }, { "epoch": 5.831181810610477, "grad_norm": 0.7578125, "learning_rate": 5.855340119679795e-06, "loss": 3.9268, "step": 17505 }, { "epoch": 5.831514949612726, "grad_norm": 0.7734375, "learning_rate": 5.8545503923631225e-06, "loss": 3.9372, "step": 17506 }, { "epoch": 5.831848088614975, "grad_norm": 0.78515625, "learning_rate": 5.85376068421237e-06, "loss": 3.9309, "step": 17507 }, { "epoch": 5.832181227617223, "grad_norm": 0.8046875, "learning_rate": 5.852970995236743e-06, "loss": 4.0611, "step": 17508 }, { "epoch": 5.832514366619472, "grad_norm": 0.7421875, "learning_rate": 5.852181325445434e-06, "loss": 4.0, "step": 17509 }, { "epoch": 5.832847505621721, "grad_norm": 0.76953125, "learning_rate": 5.851391674847647e-06, "loss": 4.0213, "step": 17510 }, { "epoch": 5.833180644623969, "grad_norm": 0.77734375, "learning_rate": 5.850602043452575e-06, "loss": 4.026, "step": 17511 }, { "epoch": 5.833513783626218, "grad_norm": 0.75, "learning_rate": 5.849812431269413e-06, "loss": 3.9563, "step": 17512 }, { "epoch": 5.833846922628466, "grad_norm": 0.734375, "learning_rate": 5.849022838307366e-06, "loss": 3.9361, "step": 17513 }, { "epoch": 5.834180061630716, "grad_norm": 0.765625, "learning_rate": 5.848233264575624e-06, "loss": 4.039, "step": 17514 }, { "epoch": 5.834513200632964, "grad_norm": 0.76171875, "learning_rate": 5.847443710083388e-06, "loss": 3.9871, "step": 17515 }, { "epoch": 5.834846339635213, "grad_norm": 0.76953125, "learning_rate": 5.846654174839853e-06, "loss": 3.9602, "step": 17516 }, { "epoch": 5.835179478637461, "grad_norm": 0.73046875, "learning_rate": 5.845864658854217e-06, "loss": 4.06, "step": 17517 }, { "epoch": 5.8355126176397105, "grad_norm": 0.765625, "learning_rate": 5.845075162135672e-06, "loss": 3.9445, "step": 17518 }, { "epoch": 5.835845756641959, "grad_norm": 0.7578125, "learning_rate": 5.844285684693421e-06, "loss": 4.0155, "step": 17519 }, { "epoch": 5.836178895644208, "grad_norm": 0.76953125, "learning_rate": 5.843496226536654e-06, "loss": 3.9307, "step": 17520 }, { "epoch": 5.836512034646456, "grad_norm": 0.73046875, "learning_rate": 5.842706787674566e-06, "loss": 3.9917, "step": 17521 }, { "epoch": 5.836845173648705, "grad_norm": 0.75390625, "learning_rate": 5.841917368116356e-06, "loss": 4.0005, "step": 17522 }, { "epoch": 5.837178312650954, "grad_norm": 0.76953125, "learning_rate": 5.841127967871216e-06, "loss": 3.8651, "step": 17523 }, { "epoch": 5.837511451653202, "grad_norm": 0.77734375, "learning_rate": 5.840338586948343e-06, "loss": 3.9124, "step": 17524 }, { "epoch": 5.837844590655451, "grad_norm": 0.75390625, "learning_rate": 5.839549225356929e-06, "loss": 3.9793, "step": 17525 }, { "epoch": 5.8381777296576995, "grad_norm": 0.796875, "learning_rate": 5.838759883106172e-06, "loss": 3.924, "step": 17526 }, { "epoch": 5.838510868659949, "grad_norm": 0.77734375, "learning_rate": 5.837970560205263e-06, "loss": 3.9714, "step": 17527 }, { "epoch": 5.838844007662197, "grad_norm": 0.76171875, "learning_rate": 5.8371812566634e-06, "loss": 3.9843, "step": 17528 }, { "epoch": 5.839177146664445, "grad_norm": 0.76171875, "learning_rate": 5.836391972489772e-06, "loss": 3.9828, "step": 17529 }, { "epoch": 5.8395102856666945, "grad_norm": 0.7265625, "learning_rate": 5.835602707693572e-06, "loss": 4.0092, "step": 17530 }, { "epoch": 5.839843424668943, "grad_norm": 0.73828125, "learning_rate": 5.834813462283998e-06, "loss": 4.0373, "step": 17531 }, { "epoch": 5.840176563671192, "grad_norm": 0.75390625, "learning_rate": 5.834024236270238e-06, "loss": 3.9634, "step": 17532 }, { "epoch": 5.84050970267344, "grad_norm": 0.76953125, "learning_rate": 5.833235029661488e-06, "loss": 4.0571, "step": 17533 }, { "epoch": 5.8408428416756895, "grad_norm": 0.78515625, "learning_rate": 5.832445842466939e-06, "loss": 3.9251, "step": 17534 }, { "epoch": 5.841175980677938, "grad_norm": 0.7890625, "learning_rate": 5.8316566746957865e-06, "loss": 3.9531, "step": 17535 }, { "epoch": 5.841509119680187, "grad_norm": 0.796875, "learning_rate": 5.830867526357217e-06, "loss": 3.9795, "step": 17536 }, { "epoch": 5.841842258682435, "grad_norm": 0.7890625, "learning_rate": 5.830078397460429e-06, "loss": 4.0313, "step": 17537 }, { "epoch": 5.842175397684684, "grad_norm": 0.77734375, "learning_rate": 5.82928928801461e-06, "loss": 3.9178, "step": 17538 }, { "epoch": 5.842508536686933, "grad_norm": 0.7265625, "learning_rate": 5.828500198028949e-06, "loss": 4.0995, "step": 17539 }, { "epoch": 5.842841675689181, "grad_norm": 0.8046875, "learning_rate": 5.827711127512644e-06, "loss": 4.0285, "step": 17540 }, { "epoch": 5.84317481469143, "grad_norm": 0.78515625, "learning_rate": 5.826922076474879e-06, "loss": 3.9799, "step": 17541 }, { "epoch": 5.8435079536936785, "grad_norm": 0.76953125, "learning_rate": 5.826133044924849e-06, "loss": 4.0392, "step": 17542 }, { "epoch": 5.843841092695928, "grad_norm": 0.76953125, "learning_rate": 5.825344032871743e-06, "loss": 3.9651, "step": 17543 }, { "epoch": 5.844174231698176, "grad_norm": 0.7421875, "learning_rate": 5.824555040324753e-06, "loss": 3.9638, "step": 17544 }, { "epoch": 5.844507370700425, "grad_norm": 0.72265625, "learning_rate": 5.823766067293066e-06, "loss": 4.0285, "step": 17545 }, { "epoch": 5.844840509702673, "grad_norm": 0.7578125, "learning_rate": 5.8229771137858775e-06, "loss": 4.0321, "step": 17546 }, { "epoch": 5.845173648704922, "grad_norm": 0.765625, "learning_rate": 5.82218817981237e-06, "loss": 3.9244, "step": 17547 }, { "epoch": 5.845506787707171, "grad_norm": 0.76171875, "learning_rate": 5.821399265381737e-06, "loss": 3.9974, "step": 17548 }, { "epoch": 5.845839926709419, "grad_norm": 0.75, "learning_rate": 5.8206103705031655e-06, "loss": 4.0136, "step": 17549 }, { "epoch": 5.846173065711668, "grad_norm": 0.73828125, "learning_rate": 5.819821495185847e-06, "loss": 4.0386, "step": 17550 }, { "epoch": 5.846506204713917, "grad_norm": 0.80078125, "learning_rate": 5.819032639438967e-06, "loss": 4.0079, "step": 17551 }, { "epoch": 5.846839343716166, "grad_norm": 0.77734375, "learning_rate": 5.8182438032717165e-06, "loss": 4.0621, "step": 17552 }, { "epoch": 5.847172482718414, "grad_norm": 0.82421875, "learning_rate": 5.817454986693283e-06, "loss": 3.9717, "step": 17553 }, { "epoch": 5.8475056217206625, "grad_norm": 0.80859375, "learning_rate": 5.816666189712855e-06, "loss": 3.9072, "step": 17554 }, { "epoch": 5.847838760722912, "grad_norm": 0.765625, "learning_rate": 5.815877412339622e-06, "loss": 3.9718, "step": 17555 }, { "epoch": 5.84817189972516, "grad_norm": 0.765625, "learning_rate": 5.8150886545827665e-06, "loss": 3.9583, "step": 17556 }, { "epoch": 5.848505038727409, "grad_norm": 0.734375, "learning_rate": 5.81429991645148e-06, "loss": 4.0146, "step": 17557 }, { "epoch": 5.848838177729657, "grad_norm": 0.796875, "learning_rate": 5.813511197954946e-06, "loss": 4.0374, "step": 17558 }, { "epoch": 5.849171316731907, "grad_norm": 0.71484375, "learning_rate": 5.812722499102356e-06, "loss": 3.9583, "step": 17559 }, { "epoch": 5.849504455734155, "grad_norm": 0.7890625, "learning_rate": 5.811933819902892e-06, "loss": 4.0292, "step": 17560 }, { "epoch": 5.849837594736404, "grad_norm": 0.74609375, "learning_rate": 5.811145160365745e-06, "loss": 4.0528, "step": 17561 }, { "epoch": 5.850170733738652, "grad_norm": 0.80859375, "learning_rate": 5.810356520500097e-06, "loss": 3.969, "step": 17562 }, { "epoch": 5.850503872740902, "grad_norm": 0.796875, "learning_rate": 5.809567900315137e-06, "loss": 3.9949, "step": 17563 }, { "epoch": 5.85083701174315, "grad_norm": 0.75390625, "learning_rate": 5.808779299820051e-06, "loss": 3.966, "step": 17564 }, { "epoch": 5.851170150745398, "grad_norm": 0.75, "learning_rate": 5.807990719024019e-06, "loss": 4.0461, "step": 17565 }, { "epoch": 5.851503289747647, "grad_norm": 0.734375, "learning_rate": 5.807202157936233e-06, "loss": 4.0422, "step": 17566 }, { "epoch": 5.851836428749896, "grad_norm": 0.7109375, "learning_rate": 5.806413616565874e-06, "loss": 4.141, "step": 17567 }, { "epoch": 5.852169567752145, "grad_norm": 0.75, "learning_rate": 5.80562509492213e-06, "loss": 3.9633, "step": 17568 }, { "epoch": 5.852502706754393, "grad_norm": 0.73828125, "learning_rate": 5.804836593014181e-06, "loss": 4.0176, "step": 17569 }, { "epoch": 5.852835845756642, "grad_norm": 0.75390625, "learning_rate": 5.804048110851216e-06, "loss": 4.0152, "step": 17570 }, { "epoch": 5.853168984758891, "grad_norm": 0.8046875, "learning_rate": 5.803259648442415e-06, "loss": 3.9145, "step": 17571 }, { "epoch": 5.853502123761139, "grad_norm": 0.7421875, "learning_rate": 5.802471205796965e-06, "loss": 4.0153, "step": 17572 }, { "epoch": 5.853835262763388, "grad_norm": 0.76953125, "learning_rate": 5.801682782924052e-06, "loss": 3.9587, "step": 17573 }, { "epoch": 5.854168401765636, "grad_norm": 0.7265625, "learning_rate": 5.800894379832852e-06, "loss": 3.9724, "step": 17574 }, { "epoch": 5.8545015407678855, "grad_norm": 0.75, "learning_rate": 5.800105996532554e-06, "loss": 4.026, "step": 17575 }, { "epoch": 5.854834679770134, "grad_norm": 0.77734375, "learning_rate": 5.799317633032338e-06, "loss": 4.0796, "step": 17576 }, { "epoch": 5.855167818772383, "grad_norm": 0.78125, "learning_rate": 5.798529289341388e-06, "loss": 4.0022, "step": 17577 }, { "epoch": 5.855500957774631, "grad_norm": 0.76953125, "learning_rate": 5.797740965468885e-06, "loss": 3.9816, "step": 17578 }, { "epoch": 5.8558340967768805, "grad_norm": 0.7578125, "learning_rate": 5.796952661424015e-06, "loss": 4.0458, "step": 17579 }, { "epoch": 5.856167235779129, "grad_norm": 0.734375, "learning_rate": 5.796164377215956e-06, "loss": 3.9903, "step": 17580 }, { "epoch": 5.856500374781378, "grad_norm": 0.76171875, "learning_rate": 5.795376112853893e-06, "loss": 3.944, "step": 17581 }, { "epoch": 5.856833513783626, "grad_norm": 0.80078125, "learning_rate": 5.794587868347008e-06, "loss": 3.941, "step": 17582 }, { "epoch": 5.857166652785875, "grad_norm": 0.73828125, "learning_rate": 5.793799643704477e-06, "loss": 4.0021, "step": 17583 }, { "epoch": 5.857499791788124, "grad_norm": 0.75390625, "learning_rate": 5.793011438935486e-06, "loss": 4.0068, "step": 17584 }, { "epoch": 5.857832930790372, "grad_norm": 0.78125, "learning_rate": 5.792223254049213e-06, "loss": 3.9963, "step": 17585 }, { "epoch": 5.858166069792621, "grad_norm": 0.7265625, "learning_rate": 5.791435089054841e-06, "loss": 4.0948, "step": 17586 }, { "epoch": 5.8584992087948695, "grad_norm": 0.734375, "learning_rate": 5.790646943961549e-06, "loss": 3.9749, "step": 17587 }, { "epoch": 5.858832347797119, "grad_norm": 0.76171875, "learning_rate": 5.789858818778519e-06, "loss": 3.9311, "step": 17588 }, { "epoch": 5.859165486799367, "grad_norm": 0.8046875, "learning_rate": 5.7890707135149276e-06, "loss": 3.9882, "step": 17589 }, { "epoch": 5.859498625801615, "grad_norm": 0.75, "learning_rate": 5.7882826281799574e-06, "loss": 3.9586, "step": 17590 }, { "epoch": 5.8598317648038645, "grad_norm": 0.75, "learning_rate": 5.78749456278279e-06, "loss": 4.0466, "step": 17591 }, { "epoch": 5.860164903806113, "grad_norm": 0.734375, "learning_rate": 5.7867065173325975e-06, "loss": 4.0159, "step": 17592 }, { "epoch": 5.860498042808362, "grad_norm": 0.76171875, "learning_rate": 5.785918491838565e-06, "loss": 4.0262, "step": 17593 }, { "epoch": 5.86083118181061, "grad_norm": 0.7421875, "learning_rate": 5.78513048630987e-06, "loss": 4.006, "step": 17594 }, { "epoch": 5.861164320812859, "grad_norm": 0.7421875, "learning_rate": 5.7843425007556875e-06, "loss": 3.9959, "step": 17595 }, { "epoch": 5.861497459815108, "grad_norm": 0.75, "learning_rate": 5.783554535185201e-06, "loss": 4.0224, "step": 17596 }, { "epoch": 5.861830598817357, "grad_norm": 0.74609375, "learning_rate": 5.782766589607586e-06, "loss": 4.0445, "step": 17597 }, { "epoch": 5.862163737819605, "grad_norm": 0.7578125, "learning_rate": 5.781978664032021e-06, "loss": 4.0395, "step": 17598 }, { "epoch": 5.862496876821854, "grad_norm": 0.7265625, "learning_rate": 5.781190758467681e-06, "loss": 4.051, "step": 17599 }, { "epoch": 5.862830015824103, "grad_norm": 0.7890625, "learning_rate": 5.78040287292375e-06, "loss": 4.0205, "step": 17600 }, { "epoch": 5.863163154826351, "grad_norm": 0.69921875, "learning_rate": 5.7796150074093986e-06, "loss": 4.0074, "step": 17601 }, { "epoch": 5.8634962938286, "grad_norm": 0.7265625, "learning_rate": 5.778827161933806e-06, "loss": 3.969, "step": 17602 }, { "epoch": 5.8638294328308485, "grad_norm": 0.77734375, "learning_rate": 5.778039336506149e-06, "loss": 4.0291, "step": 17603 }, { "epoch": 5.864162571833098, "grad_norm": 0.78125, "learning_rate": 5.777251531135603e-06, "loss": 3.9942, "step": 17604 }, { "epoch": 5.864495710835346, "grad_norm": 0.75390625, "learning_rate": 5.7764637458313465e-06, "loss": 4.0332, "step": 17605 }, { "epoch": 5.864828849837595, "grad_norm": 0.765625, "learning_rate": 5.775675980602552e-06, "loss": 4.0433, "step": 17606 }, { "epoch": 5.865161988839843, "grad_norm": 0.75390625, "learning_rate": 5.7748882354584e-06, "loss": 4.0328, "step": 17607 }, { "epoch": 5.865495127842092, "grad_norm": 0.796875, "learning_rate": 5.774100510408062e-06, "loss": 3.9843, "step": 17608 }, { "epoch": 5.865828266844341, "grad_norm": 0.765625, "learning_rate": 5.773312805460717e-06, "loss": 4.0247, "step": 17609 }, { "epoch": 5.866161405846589, "grad_norm": 0.80859375, "learning_rate": 5.772525120625538e-06, "loss": 3.8748, "step": 17610 }, { "epoch": 5.866494544848838, "grad_norm": 0.78125, "learning_rate": 5.771737455911696e-06, "loss": 4.0348, "step": 17611 }, { "epoch": 5.866827683851087, "grad_norm": 0.76953125, "learning_rate": 5.770949811328372e-06, "loss": 3.9703, "step": 17612 }, { "epoch": 5.867160822853336, "grad_norm": 0.7578125, "learning_rate": 5.770162186884735e-06, "loss": 4.0545, "step": 17613 }, { "epoch": 5.867493961855584, "grad_norm": 0.7734375, "learning_rate": 5.769374582589964e-06, "loss": 4.0699, "step": 17614 }, { "epoch": 5.867827100857832, "grad_norm": 0.77734375, "learning_rate": 5.768586998453229e-06, "loss": 3.9997, "step": 17615 }, { "epoch": 5.868160239860082, "grad_norm": 0.7734375, "learning_rate": 5.767799434483708e-06, "loss": 3.9972, "step": 17616 }, { "epoch": 5.868493378862331, "grad_norm": 0.76171875, "learning_rate": 5.767011890690569e-06, "loss": 4.0439, "step": 17617 }, { "epoch": 5.868826517864579, "grad_norm": 0.8046875, "learning_rate": 5.76622436708299e-06, "loss": 4.0046, "step": 17618 }, { "epoch": 5.869159656866827, "grad_norm": 0.796875, "learning_rate": 5.765436863670141e-06, "loss": 3.913, "step": 17619 }, { "epoch": 5.869492795869077, "grad_norm": 0.765625, "learning_rate": 5.764649380461194e-06, "loss": 3.9966, "step": 17620 }, { "epoch": 5.869825934871325, "grad_norm": 0.72265625, "learning_rate": 5.763861917465324e-06, "loss": 4.0232, "step": 17621 }, { "epoch": 5.870159073873574, "grad_norm": 0.7265625, "learning_rate": 5.763074474691701e-06, "loss": 4.0381, "step": 17622 }, { "epoch": 5.870492212875822, "grad_norm": 0.75, "learning_rate": 5.7622870521495e-06, "loss": 3.9529, "step": 17623 }, { "epoch": 5.8708253518780715, "grad_norm": 0.734375, "learning_rate": 5.76149964984789e-06, "loss": 3.9713, "step": 17624 }, { "epoch": 5.87115849088032, "grad_norm": 0.76953125, "learning_rate": 5.7607122677960436e-06, "loss": 3.9496, "step": 17625 }, { "epoch": 5.871491629882568, "grad_norm": 0.73828125, "learning_rate": 5.7599249060031315e-06, "loss": 3.9802, "step": 17626 }, { "epoch": 5.871824768884817, "grad_norm": 0.7265625, "learning_rate": 5.759137564478327e-06, "loss": 3.9561, "step": 17627 }, { "epoch": 5.872157907887066, "grad_norm": 0.78125, "learning_rate": 5.758350243230799e-06, "loss": 3.9933, "step": 17628 }, { "epoch": 5.872491046889315, "grad_norm": 0.76953125, "learning_rate": 5.757562942269717e-06, "loss": 4.0066, "step": 17629 }, { "epoch": 5.872824185891563, "grad_norm": 0.76171875, "learning_rate": 5.756775661604253e-06, "loss": 4.0595, "step": 17630 }, { "epoch": 5.873157324893812, "grad_norm": 0.76171875, "learning_rate": 5.7559884012435755e-06, "loss": 3.9505, "step": 17631 }, { "epoch": 5.8734904638960606, "grad_norm": 0.734375, "learning_rate": 5.755201161196857e-06, "loss": 4.0434, "step": 17632 }, { "epoch": 5.873823602898309, "grad_norm": 0.75, "learning_rate": 5.754413941473264e-06, "loss": 3.956, "step": 17633 }, { "epoch": 5.874156741900558, "grad_norm": 0.7421875, "learning_rate": 5.753626742081971e-06, "loss": 4.0146, "step": 17634 }, { "epoch": 5.874489880902806, "grad_norm": 0.8125, "learning_rate": 5.752839563032143e-06, "loss": 4.0256, "step": 17635 }, { "epoch": 5.8748230199050555, "grad_norm": 0.7890625, "learning_rate": 5.752052404332948e-06, "loss": 3.986, "step": 17636 }, { "epoch": 5.875156158907304, "grad_norm": 0.75, "learning_rate": 5.751265265993558e-06, "loss": 3.9876, "step": 17637 }, { "epoch": 5.875489297909553, "grad_norm": 0.8359375, "learning_rate": 5.750478148023139e-06, "loss": 3.9374, "step": 17638 }, { "epoch": 5.875822436911801, "grad_norm": 0.73046875, "learning_rate": 5.749691050430859e-06, "loss": 3.9012, "step": 17639 }, { "epoch": 5.8761555759140505, "grad_norm": 0.7265625, "learning_rate": 5.748903973225888e-06, "loss": 4.0485, "step": 17640 }, { "epoch": 5.876488714916299, "grad_norm": 0.75, "learning_rate": 5.748116916417394e-06, "loss": 3.9063, "step": 17641 }, { "epoch": 5.876821853918548, "grad_norm": 0.7890625, "learning_rate": 5.747329880014543e-06, "loss": 4.0266, "step": 17642 }, { "epoch": 5.877154992920796, "grad_norm": 0.76171875, "learning_rate": 5.746542864026501e-06, "loss": 4.0386, "step": 17643 }, { "epoch": 5.8774881319230445, "grad_norm": 0.82421875, "learning_rate": 5.74575586846244e-06, "loss": 3.9239, "step": 17644 }, { "epoch": 5.877821270925294, "grad_norm": 0.77734375, "learning_rate": 5.744968893331521e-06, "loss": 3.991, "step": 17645 }, { "epoch": 5.878154409927542, "grad_norm": 0.7421875, "learning_rate": 5.744181938642913e-06, "loss": 4.0458, "step": 17646 }, { "epoch": 5.878487548929791, "grad_norm": 0.76171875, "learning_rate": 5.743395004405782e-06, "loss": 3.9959, "step": 17647 }, { "epoch": 5.8788206879320395, "grad_norm": 0.74609375, "learning_rate": 5.742608090629294e-06, "loss": 4.0601, "step": 17648 }, { "epoch": 5.879153826934289, "grad_norm": 0.74609375, "learning_rate": 5.7418211973226174e-06, "loss": 4.022, "step": 17649 }, { "epoch": 5.879486965936537, "grad_norm": 0.78515625, "learning_rate": 5.741034324494913e-06, "loss": 3.9773, "step": 17650 }, { "epoch": 5.879820104938785, "grad_norm": 0.75390625, "learning_rate": 5.740247472155351e-06, "loss": 3.9357, "step": 17651 }, { "epoch": 5.8801532439410344, "grad_norm": 0.75390625, "learning_rate": 5.739460640313092e-06, "loss": 3.9792, "step": 17652 }, { "epoch": 5.880486382943283, "grad_norm": 0.71875, "learning_rate": 5.738673828977307e-06, "loss": 4.0281, "step": 17653 }, { "epoch": 5.880819521945532, "grad_norm": 0.765625, "learning_rate": 5.737887038157155e-06, "loss": 3.9802, "step": 17654 }, { "epoch": 5.88115266094778, "grad_norm": 0.81640625, "learning_rate": 5.737100267861801e-06, "loss": 4.0378, "step": 17655 }, { "epoch": 5.881485799950029, "grad_norm": 0.8125, "learning_rate": 5.736313518100412e-06, "loss": 3.9421, "step": 17656 }, { "epoch": 5.881818938952278, "grad_norm": 0.76171875, "learning_rate": 5.735526788882149e-06, "loss": 4.0192, "step": 17657 }, { "epoch": 5.882152077954527, "grad_norm": 0.7734375, "learning_rate": 5.734740080216178e-06, "loss": 4.021, "step": 17658 }, { "epoch": 5.882485216956775, "grad_norm": 0.7890625, "learning_rate": 5.73395339211166e-06, "loss": 3.9273, "step": 17659 }, { "epoch": 5.882818355959024, "grad_norm": 0.75, "learning_rate": 5.733166724577762e-06, "loss": 3.9864, "step": 17660 }, { "epoch": 5.883151494961273, "grad_norm": 0.74609375, "learning_rate": 5.732380077623642e-06, "loss": 3.948, "step": 17661 }, { "epoch": 5.883484633963521, "grad_norm": 0.76953125, "learning_rate": 5.73159345125847e-06, "loss": 3.9788, "step": 17662 }, { "epoch": 5.88381777296577, "grad_norm": 0.7890625, "learning_rate": 5.730806845491401e-06, "loss": 3.9718, "step": 17663 }, { "epoch": 5.884150911968018, "grad_norm": 0.76171875, "learning_rate": 5.730020260331599e-06, "loss": 3.9878, "step": 17664 }, { "epoch": 5.884484050970268, "grad_norm": 0.7890625, "learning_rate": 5.7292336957882295e-06, "loss": 3.9242, "step": 17665 }, { "epoch": 5.884817189972516, "grad_norm": 0.74609375, "learning_rate": 5.72844715187045e-06, "loss": 4.0165, "step": 17666 }, { "epoch": 5.885150328974765, "grad_norm": 0.796875, "learning_rate": 5.727660628587425e-06, "loss": 3.9597, "step": 17667 }, { "epoch": 5.885483467977013, "grad_norm": 0.828125, "learning_rate": 5.726874125948314e-06, "loss": 4.056, "step": 17668 }, { "epoch": 5.885816606979262, "grad_norm": 0.74609375, "learning_rate": 5.72608764396228e-06, "loss": 4.0139, "step": 17669 }, { "epoch": 5.886149745981511, "grad_norm": 0.8046875, "learning_rate": 5.725301182638482e-06, "loss": 4.0176, "step": 17670 }, { "epoch": 5.886482884983759, "grad_norm": 0.69921875, "learning_rate": 5.724514741986083e-06, "loss": 4.1018, "step": 17671 }, { "epoch": 5.886816023986008, "grad_norm": 0.71484375, "learning_rate": 5.723728322014241e-06, "loss": 4.009, "step": 17672 }, { "epoch": 5.887149162988257, "grad_norm": 0.75, "learning_rate": 5.722941922732114e-06, "loss": 4.0312, "step": 17673 }, { "epoch": 5.887482301990506, "grad_norm": 0.71484375, "learning_rate": 5.7221555441488674e-06, "loss": 4.0603, "step": 17674 }, { "epoch": 5.887815440992754, "grad_norm": 0.75390625, "learning_rate": 5.721369186273655e-06, "loss": 3.9445, "step": 17675 }, { "epoch": 5.888148579995003, "grad_norm": 0.7734375, "learning_rate": 5.720582849115642e-06, "loss": 3.9498, "step": 17676 }, { "epoch": 5.888481718997252, "grad_norm": 0.73046875, "learning_rate": 5.719796532683981e-06, "loss": 4.0586, "step": 17677 }, { "epoch": 5.888814857999501, "grad_norm": 0.76171875, "learning_rate": 5.719010236987838e-06, "loss": 3.935, "step": 17678 }, { "epoch": 5.889147997001749, "grad_norm": 0.7421875, "learning_rate": 5.7182239620363666e-06, "loss": 4.0217, "step": 17679 }, { "epoch": 5.889481136003997, "grad_norm": 0.7734375, "learning_rate": 5.717437707838729e-06, "loss": 3.9373, "step": 17680 }, { "epoch": 5.8898142750062465, "grad_norm": 0.73828125, "learning_rate": 5.71665147440408e-06, "loss": 4.0123, "step": 17681 }, { "epoch": 5.890147414008495, "grad_norm": 0.7421875, "learning_rate": 5.715865261741576e-06, "loss": 3.969, "step": 17682 }, { "epoch": 5.890480553010744, "grad_norm": 0.7421875, "learning_rate": 5.7150790698603796e-06, "loss": 3.9404, "step": 17683 }, { "epoch": 5.890813692012992, "grad_norm": 0.74609375, "learning_rate": 5.7142928987696446e-06, "loss": 3.9229, "step": 17684 }, { "epoch": 5.8911468310152415, "grad_norm": 0.765625, "learning_rate": 5.71350674847853e-06, "loss": 3.9733, "step": 17685 }, { "epoch": 5.89147997001749, "grad_norm": 0.703125, "learning_rate": 5.7127206189961925e-06, "loss": 3.9275, "step": 17686 }, { "epoch": 5.891813109019738, "grad_norm": 0.796875, "learning_rate": 5.711934510331789e-06, "loss": 3.9601, "step": 17687 }, { "epoch": 5.892146248021987, "grad_norm": 0.73046875, "learning_rate": 5.711148422494476e-06, "loss": 3.982, "step": 17688 }, { "epoch": 5.892479387024236, "grad_norm": 0.75390625, "learning_rate": 5.71036235549341e-06, "loss": 3.9429, "step": 17689 }, { "epoch": 5.892812526026485, "grad_norm": 0.7421875, "learning_rate": 5.709576309337743e-06, "loss": 3.9709, "step": 17690 }, { "epoch": 5.893145665028733, "grad_norm": 0.74609375, "learning_rate": 5.708790284036636e-06, "loss": 3.9344, "step": 17691 }, { "epoch": 5.893478804030982, "grad_norm": 0.72265625, "learning_rate": 5.708004279599241e-06, "loss": 3.987, "step": 17692 }, { "epoch": 5.8938119430332305, "grad_norm": 0.72265625, "learning_rate": 5.707218296034716e-06, "loss": 3.9666, "step": 17693 }, { "epoch": 5.894145082035479, "grad_norm": 0.73828125, "learning_rate": 5.706432333352213e-06, "loss": 4.0516, "step": 17694 }, { "epoch": 5.894478221037728, "grad_norm": 0.796875, "learning_rate": 5.70564639156089e-06, "loss": 4.0441, "step": 17695 }, { "epoch": 5.894811360039976, "grad_norm": 0.80078125, "learning_rate": 5.704860470669899e-06, "loss": 3.9669, "step": 17696 }, { "epoch": 5.8951444990422255, "grad_norm": 0.734375, "learning_rate": 5.704074570688396e-06, "loss": 4.0187, "step": 17697 }, { "epoch": 5.895477638044474, "grad_norm": 0.78515625, "learning_rate": 5.703288691625537e-06, "loss": 4.0982, "step": 17698 }, { "epoch": 5.895810777046723, "grad_norm": 0.7578125, "learning_rate": 5.702502833490469e-06, "loss": 4.0617, "step": 17699 }, { "epoch": 5.896143916048971, "grad_norm": 0.7734375, "learning_rate": 5.701716996292352e-06, "loss": 3.9582, "step": 17700 }, { "epoch": 5.89647705505122, "grad_norm": 0.78515625, "learning_rate": 5.700931180040334e-06, "loss": 3.998, "step": 17701 }, { "epoch": 5.896810194053469, "grad_norm": 0.7734375, "learning_rate": 5.700145384743573e-06, "loss": 4.0503, "step": 17702 }, { "epoch": 5.897143333055718, "grad_norm": 0.74609375, "learning_rate": 5.699359610411219e-06, "loss": 4.0632, "step": 17703 }, { "epoch": 5.897476472057966, "grad_norm": 0.7734375, "learning_rate": 5.6985738570524266e-06, "loss": 3.982, "step": 17704 }, { "epoch": 5.8978096110602145, "grad_norm": 0.74609375, "learning_rate": 5.697788124676344e-06, "loss": 3.9857, "step": 17705 }, { "epoch": 5.898142750062464, "grad_norm": 0.75, "learning_rate": 5.69700241329213e-06, "loss": 3.9706, "step": 17706 }, { "epoch": 5.898475889064712, "grad_norm": 0.80078125, "learning_rate": 5.6962167229089325e-06, "loss": 3.972, "step": 17707 }, { "epoch": 5.898809028066961, "grad_norm": 0.76171875, "learning_rate": 5.695431053535901e-06, "loss": 3.9898, "step": 17708 }, { "epoch": 5.8991421670692095, "grad_norm": 0.80859375, "learning_rate": 5.694645405182191e-06, "loss": 3.9292, "step": 17709 }, { "epoch": 5.899475306071459, "grad_norm": 0.765625, "learning_rate": 5.693859777856949e-06, "loss": 3.9887, "step": 17710 }, { "epoch": 5.899808445073707, "grad_norm": 0.74609375, "learning_rate": 5.693074171569331e-06, "loss": 4.0233, "step": 17711 }, { "epoch": 5.900141584075955, "grad_norm": 0.7421875, "learning_rate": 5.6922885863284815e-06, "loss": 4.0224, "step": 17712 }, { "epoch": 5.900474723078204, "grad_norm": 0.7890625, "learning_rate": 5.691503022143558e-06, "loss": 3.8821, "step": 17713 }, { "epoch": 5.900807862080453, "grad_norm": 0.796875, "learning_rate": 5.690717479023705e-06, "loss": 3.9439, "step": 17714 }, { "epoch": 5.901141001082702, "grad_norm": 0.75, "learning_rate": 5.689931956978076e-06, "loss": 3.9934, "step": 17715 }, { "epoch": 5.90147414008495, "grad_norm": 0.7578125, "learning_rate": 5.689146456015821e-06, "loss": 3.9781, "step": 17716 }, { "epoch": 5.901807279087199, "grad_norm": 0.74609375, "learning_rate": 5.688360976146084e-06, "loss": 3.964, "step": 17717 }, { "epoch": 5.902140418089448, "grad_norm": 0.78515625, "learning_rate": 5.687575517378019e-06, "loss": 3.9129, "step": 17718 }, { "epoch": 5.902473557091697, "grad_norm": 0.74609375, "learning_rate": 5.686790079720772e-06, "loss": 3.9714, "step": 17719 }, { "epoch": 5.902806696093945, "grad_norm": 0.79296875, "learning_rate": 5.686004663183495e-06, "loss": 3.9684, "step": 17720 }, { "epoch": 5.903139835096194, "grad_norm": 0.74609375, "learning_rate": 5.685219267775333e-06, "loss": 4.0382, "step": 17721 }, { "epoch": 5.903472974098443, "grad_norm": 0.77734375, "learning_rate": 5.684433893505437e-06, "loss": 3.9672, "step": 17722 }, { "epoch": 5.903806113100691, "grad_norm": 0.7578125, "learning_rate": 5.6836485403829525e-06, "loss": 3.933, "step": 17723 }, { "epoch": 5.90413925210294, "grad_norm": 0.80078125, "learning_rate": 5.68286320841703e-06, "loss": 4.0197, "step": 17724 }, { "epoch": 5.904472391105188, "grad_norm": 0.7421875, "learning_rate": 5.682077897616816e-06, "loss": 3.9954, "step": 17725 }, { "epoch": 5.904805530107438, "grad_norm": 0.7265625, "learning_rate": 5.681292607991454e-06, "loss": 4.0352, "step": 17726 }, { "epoch": 5.905138669109686, "grad_norm": 0.75390625, "learning_rate": 5.680507339550096e-06, "loss": 3.9924, "step": 17727 }, { "epoch": 5.905471808111935, "grad_norm": 0.75390625, "learning_rate": 5.679722092301884e-06, "loss": 4.0096, "step": 17728 }, { "epoch": 5.905804947114183, "grad_norm": 0.74609375, "learning_rate": 5.678936866255969e-06, "loss": 3.9939, "step": 17729 }, { "epoch": 5.906138086116432, "grad_norm": 0.78515625, "learning_rate": 5.678151661421494e-06, "loss": 3.9552, "step": 17730 }, { "epoch": 5.906471225118681, "grad_norm": 0.75, "learning_rate": 5.677366477807606e-06, "loss": 4.0093, "step": 17731 }, { "epoch": 5.906804364120929, "grad_norm": 0.8046875, "learning_rate": 5.6765813154234505e-06, "loss": 3.9903, "step": 17732 }, { "epoch": 5.907137503123178, "grad_norm": 0.83203125, "learning_rate": 5.675796174278175e-06, "loss": 3.9967, "step": 17733 }, { "epoch": 5.907470642125427, "grad_norm": 0.80859375, "learning_rate": 5.675011054380924e-06, "loss": 3.9795, "step": 17734 }, { "epoch": 5.907803781127676, "grad_norm": 0.76953125, "learning_rate": 5.67422595574084e-06, "loss": 4.0072, "step": 17735 }, { "epoch": 5.908136920129924, "grad_norm": 0.76953125, "learning_rate": 5.673440878367066e-06, "loss": 3.9184, "step": 17736 }, { "epoch": 5.908470059132173, "grad_norm": 0.75, "learning_rate": 5.6726558222687525e-06, "loss": 3.9767, "step": 17737 }, { "epoch": 5.908803198134422, "grad_norm": 0.7265625, "learning_rate": 5.671870787455038e-06, "loss": 4.0835, "step": 17738 }, { "epoch": 5.909136337136671, "grad_norm": 0.78515625, "learning_rate": 5.671085773935072e-06, "loss": 3.9911, "step": 17739 }, { "epoch": 5.909469476138919, "grad_norm": 0.7578125, "learning_rate": 5.6703007817179925e-06, "loss": 3.9994, "step": 17740 }, { "epoch": 5.909802615141167, "grad_norm": 0.78125, "learning_rate": 5.6695158108129476e-06, "loss": 3.9684, "step": 17741 }, { "epoch": 5.9101357541434165, "grad_norm": 0.76171875, "learning_rate": 5.668730861229077e-06, "loss": 4.0189, "step": 17742 }, { "epoch": 5.910468893145665, "grad_norm": 0.796875, "learning_rate": 5.667945932975529e-06, "loss": 3.9989, "step": 17743 }, { "epoch": 5.910802032147914, "grad_norm": 0.7265625, "learning_rate": 5.667161026061441e-06, "loss": 3.9034, "step": 17744 }, { "epoch": 5.911135171150162, "grad_norm": 0.765625, "learning_rate": 5.6663761404959555e-06, "loss": 4.0281, "step": 17745 }, { "epoch": 5.9114683101524115, "grad_norm": 0.7421875, "learning_rate": 5.665591276288219e-06, "loss": 3.9689, "step": 17746 }, { "epoch": 5.91180144915466, "grad_norm": 0.77734375, "learning_rate": 5.664806433447367e-06, "loss": 3.9524, "step": 17747 }, { "epoch": 5.912134588156908, "grad_norm": 0.78125, "learning_rate": 5.664021611982548e-06, "loss": 4.0085, "step": 17748 }, { "epoch": 5.912467727159157, "grad_norm": 0.765625, "learning_rate": 5.663236811902898e-06, "loss": 3.9329, "step": 17749 }, { "epoch": 5.9128008661614055, "grad_norm": 0.72265625, "learning_rate": 5.662452033217563e-06, "loss": 4.021, "step": 17750 }, { "epoch": 5.913134005163655, "grad_norm": 0.7890625, "learning_rate": 5.66166727593568e-06, "loss": 3.9757, "step": 17751 }, { "epoch": 5.913467144165903, "grad_norm": 0.79296875, "learning_rate": 5.660882540066395e-06, "loss": 3.9811, "step": 17752 }, { "epoch": 5.913800283168152, "grad_norm": 0.7265625, "learning_rate": 5.660097825618843e-06, "loss": 4.0424, "step": 17753 }, { "epoch": 5.9141334221704005, "grad_norm": 0.76171875, "learning_rate": 5.659313132602164e-06, "loss": 4.0673, "step": 17754 }, { "epoch": 5.914466561172649, "grad_norm": 0.75390625, "learning_rate": 5.658528461025503e-06, "loss": 4.0556, "step": 17755 }, { "epoch": 5.914799700174898, "grad_norm": 0.7578125, "learning_rate": 5.657743810897994e-06, "loss": 3.9386, "step": 17756 }, { "epoch": 5.915132839177147, "grad_norm": 0.76953125, "learning_rate": 5.656959182228781e-06, "loss": 3.9555, "step": 17757 }, { "epoch": 5.9154659781793955, "grad_norm": 0.7578125, "learning_rate": 5.656174575027e-06, "loss": 4.0128, "step": 17758 }, { "epoch": 5.915799117181644, "grad_norm": 0.7421875, "learning_rate": 5.655389989301793e-06, "loss": 4.0125, "step": 17759 }, { "epoch": 5.916132256183893, "grad_norm": 0.74609375, "learning_rate": 5.6546054250622964e-06, "loss": 4.007, "step": 17760 }, { "epoch": 5.916465395186141, "grad_norm": 0.76953125, "learning_rate": 5.653820882317653e-06, "loss": 3.9413, "step": 17761 }, { "epoch": 5.91679853418839, "grad_norm": 0.7890625, "learning_rate": 5.653036361076994e-06, "loss": 3.9324, "step": 17762 }, { "epoch": 5.917131673190639, "grad_norm": 0.765625, "learning_rate": 5.65225186134946e-06, "loss": 3.9221, "step": 17763 }, { "epoch": 5.917464812192888, "grad_norm": 0.7265625, "learning_rate": 5.651467383144192e-06, "loss": 3.9246, "step": 17764 }, { "epoch": 5.917797951195136, "grad_norm": 0.75, "learning_rate": 5.650682926470323e-06, "loss": 3.9385, "step": 17765 }, { "epoch": 5.9181310901973845, "grad_norm": 0.76953125, "learning_rate": 5.649898491336994e-06, "loss": 4.0857, "step": 17766 }, { "epoch": 5.918464229199634, "grad_norm": 0.765625, "learning_rate": 5.6491140777533375e-06, "loss": 4.0073, "step": 17767 }, { "epoch": 5.918797368201882, "grad_norm": 0.7890625, "learning_rate": 5.6483296857284955e-06, "loss": 3.9184, "step": 17768 }, { "epoch": 5.919130507204131, "grad_norm": 0.78515625, "learning_rate": 5.647545315271601e-06, "loss": 3.9939, "step": 17769 }, { "epoch": 5.919463646206379, "grad_norm": 0.78125, "learning_rate": 5.646760966391794e-06, "loss": 3.9814, "step": 17770 }, { "epoch": 5.919796785208629, "grad_norm": 0.81640625, "learning_rate": 5.645976639098205e-06, "loss": 4.036, "step": 17771 }, { "epoch": 5.920129924210877, "grad_norm": 0.76953125, "learning_rate": 5.645192333399972e-06, "loss": 4.0091, "step": 17772 }, { "epoch": 5.920463063213125, "grad_norm": 0.765625, "learning_rate": 5.6444080493062325e-06, "loss": 4.0555, "step": 17773 }, { "epoch": 5.920796202215374, "grad_norm": 0.76953125, "learning_rate": 5.643623786826117e-06, "loss": 3.913, "step": 17774 }, { "epoch": 5.921129341217623, "grad_norm": 0.80078125, "learning_rate": 5.642839545968766e-06, "loss": 3.9574, "step": 17775 }, { "epoch": 5.921462480219872, "grad_norm": 0.76171875, "learning_rate": 5.64205532674331e-06, "loss": 3.9794, "step": 17776 }, { "epoch": 5.92179561922212, "grad_norm": 0.7734375, "learning_rate": 5.641271129158888e-06, "loss": 3.9766, "step": 17777 }, { "epoch": 5.922128758224369, "grad_norm": 0.76171875, "learning_rate": 5.640486953224628e-06, "loss": 3.9731, "step": 17778 }, { "epoch": 5.922461897226618, "grad_norm": 0.7421875, "learning_rate": 5.639702798949673e-06, "loss": 3.9939, "step": 17779 }, { "epoch": 5.922795036228867, "grad_norm": 0.7578125, "learning_rate": 5.638918666343146e-06, "loss": 3.9667, "step": 17780 }, { "epoch": 5.923128175231115, "grad_norm": 0.7109375, "learning_rate": 5.638134555414186e-06, "loss": 3.9612, "step": 17781 }, { "epoch": 5.923461314233364, "grad_norm": 0.74609375, "learning_rate": 5.637350466171926e-06, "loss": 4.0087, "step": 17782 }, { "epoch": 5.923794453235613, "grad_norm": 0.7578125, "learning_rate": 5.636566398625498e-06, "loss": 4.022, "step": 17783 }, { "epoch": 5.924127592237861, "grad_norm": 0.75, "learning_rate": 5.635782352784035e-06, "loss": 4.0267, "step": 17784 }, { "epoch": 5.92446073124011, "grad_norm": 0.78515625, "learning_rate": 5.634998328656671e-06, "loss": 3.9625, "step": 17785 }, { "epoch": 5.924793870242358, "grad_norm": 0.828125, "learning_rate": 5.634214326252536e-06, "loss": 4.0093, "step": 17786 }, { "epoch": 5.925127009244608, "grad_norm": 0.7265625, "learning_rate": 5.6334303455807635e-06, "loss": 3.974, "step": 17787 }, { "epoch": 5.925460148246856, "grad_norm": 0.7109375, "learning_rate": 5.632646386650485e-06, "loss": 3.9743, "step": 17788 }, { "epoch": 5.925793287249105, "grad_norm": 0.78515625, "learning_rate": 5.631862449470831e-06, "loss": 3.9823, "step": 17789 }, { "epoch": 5.926126426251353, "grad_norm": 0.75, "learning_rate": 5.6310785340509325e-06, "loss": 4.0254, "step": 17790 }, { "epoch": 5.926459565253602, "grad_norm": 0.73046875, "learning_rate": 5.630294640399921e-06, "loss": 3.9952, "step": 17791 }, { "epoch": 5.926792704255851, "grad_norm": 0.75390625, "learning_rate": 5.6295107685269275e-06, "loss": 3.9956, "step": 17792 }, { "epoch": 5.927125843258099, "grad_norm": 0.6953125, "learning_rate": 5.62872691844108e-06, "loss": 4.0533, "step": 17793 }, { "epoch": 5.927458982260348, "grad_norm": 0.796875, "learning_rate": 5.627943090151514e-06, "loss": 3.9448, "step": 17794 }, { "epoch": 5.927792121262597, "grad_norm": 0.7890625, "learning_rate": 5.627159283667354e-06, "loss": 3.9384, "step": 17795 }, { "epoch": 5.928125260264846, "grad_norm": 0.78125, "learning_rate": 5.6263754989977335e-06, "loss": 3.9566, "step": 17796 }, { "epoch": 5.928458399267094, "grad_norm": 0.77734375, "learning_rate": 5.625591736151781e-06, "loss": 3.9762, "step": 17797 }, { "epoch": 5.928791538269343, "grad_norm": 0.75390625, "learning_rate": 5.624807995138621e-06, "loss": 4.0621, "step": 17798 }, { "epoch": 5.9291246772715915, "grad_norm": 0.71484375, "learning_rate": 5.6240242759673895e-06, "loss": 3.9654, "step": 17799 }, { "epoch": 5.929457816273841, "grad_norm": 0.7421875, "learning_rate": 5.62324057864721e-06, "loss": 4.0254, "step": 17800 }, { "epoch": 5.929790955276089, "grad_norm": 0.74609375, "learning_rate": 5.622456903187214e-06, "loss": 4.0883, "step": 17801 }, { "epoch": 5.930124094278337, "grad_norm": 0.75390625, "learning_rate": 5.621673249596526e-06, "loss": 4.0569, "step": 17802 }, { "epoch": 5.9304572332805865, "grad_norm": 0.78125, "learning_rate": 5.6208896178842784e-06, "loss": 3.9363, "step": 17803 }, { "epoch": 5.930790372282835, "grad_norm": 0.75390625, "learning_rate": 5.620106008059595e-06, "loss": 3.9416, "step": 17804 }, { "epoch": 5.931123511285084, "grad_norm": 0.76171875, "learning_rate": 5.619322420131606e-06, "loss": 4.0168, "step": 17805 }, { "epoch": 5.931456650287332, "grad_norm": 0.80078125, "learning_rate": 5.61853885410944e-06, "loss": 4.0143, "step": 17806 }, { "epoch": 5.9317897892895814, "grad_norm": 0.75, "learning_rate": 5.617755310002217e-06, "loss": 3.9794, "step": 17807 }, { "epoch": 5.93212292829183, "grad_norm": 0.765625, "learning_rate": 5.61697178781907e-06, "loss": 4.0052, "step": 17808 }, { "epoch": 5.932456067294078, "grad_norm": 0.77734375, "learning_rate": 5.616188287569121e-06, "loss": 3.9456, "step": 17809 }, { "epoch": 5.932789206296327, "grad_norm": 0.74609375, "learning_rate": 5.615404809261499e-06, "loss": 4.0526, "step": 17810 }, { "epoch": 5.9331223452985755, "grad_norm": 0.75, "learning_rate": 5.614621352905327e-06, "loss": 4.0012, "step": 17811 }, { "epoch": 5.933455484300825, "grad_norm": 0.75, "learning_rate": 5.613837918509735e-06, "loss": 4.0474, "step": 17812 }, { "epoch": 5.933788623303073, "grad_norm": 0.7578125, "learning_rate": 5.613054506083845e-06, "loss": 3.9849, "step": 17813 }, { "epoch": 5.934121762305322, "grad_norm": 0.73828125, "learning_rate": 5.612271115636784e-06, "loss": 3.9514, "step": 17814 }, { "epoch": 5.9344549013075705, "grad_norm": 0.7734375, "learning_rate": 5.611487747177676e-06, "loss": 4.0004, "step": 17815 }, { "epoch": 5.93478804030982, "grad_norm": 0.765625, "learning_rate": 5.610704400715643e-06, "loss": 3.9691, "step": 17816 }, { "epoch": 5.935121179312068, "grad_norm": 0.734375, "learning_rate": 5.609921076259814e-06, "loss": 3.9958, "step": 17817 }, { "epoch": 5.935454318314317, "grad_norm": 0.78125, "learning_rate": 5.609137773819309e-06, "loss": 3.9833, "step": 17818 }, { "epoch": 5.935787457316565, "grad_norm": 0.7578125, "learning_rate": 5.608354493403254e-06, "loss": 3.9863, "step": 17819 }, { "epoch": 5.936120596318814, "grad_norm": 0.7578125, "learning_rate": 5.6075712350207695e-06, "loss": 4.0085, "step": 17820 }, { "epoch": 5.936453735321063, "grad_norm": 0.74609375, "learning_rate": 5.606787998680984e-06, "loss": 3.9736, "step": 17821 }, { "epoch": 5.936786874323311, "grad_norm": 0.7734375, "learning_rate": 5.6060047843930154e-06, "loss": 3.9707, "step": 17822 }, { "epoch": 5.93712001332556, "grad_norm": 0.80078125, "learning_rate": 5.60522159216599e-06, "loss": 3.9859, "step": 17823 }, { "epoch": 5.937453152327809, "grad_norm": 0.73828125, "learning_rate": 5.604438422009031e-06, "loss": 3.9615, "step": 17824 }, { "epoch": 5.937786291330058, "grad_norm": 0.734375, "learning_rate": 5.603655273931258e-06, "loss": 4.0515, "step": 17825 }, { "epoch": 5.938119430332306, "grad_norm": 0.80078125, "learning_rate": 5.6028721479417906e-06, "loss": 3.944, "step": 17826 }, { "epoch": 5.9384525693345545, "grad_norm": 0.765625, "learning_rate": 5.602089044049755e-06, "loss": 3.9836, "step": 17827 }, { "epoch": 5.938785708336804, "grad_norm": 0.796875, "learning_rate": 5.60130596226427e-06, "loss": 3.9982, "step": 17828 }, { "epoch": 5.939118847339052, "grad_norm": 0.7578125, "learning_rate": 5.600522902594461e-06, "loss": 3.8956, "step": 17829 }, { "epoch": 5.939451986341301, "grad_norm": 0.7109375, "learning_rate": 5.599739865049442e-06, "loss": 3.9822, "step": 17830 }, { "epoch": 5.939785125343549, "grad_norm": 0.73828125, "learning_rate": 5.598956849638341e-06, "loss": 3.9855, "step": 17831 }, { "epoch": 5.940118264345799, "grad_norm": 0.73046875, "learning_rate": 5.598173856370272e-06, "loss": 3.9782, "step": 17832 }, { "epoch": 5.940451403348047, "grad_norm": 0.78515625, "learning_rate": 5.597390885254363e-06, "loss": 4.0318, "step": 17833 }, { "epoch": 5.940784542350295, "grad_norm": 0.74609375, "learning_rate": 5.596607936299727e-06, "loss": 3.9519, "step": 17834 }, { "epoch": 5.941117681352544, "grad_norm": 0.77734375, "learning_rate": 5.5958250095154825e-06, "loss": 4.0674, "step": 17835 }, { "epoch": 5.941450820354793, "grad_norm": 0.75390625, "learning_rate": 5.5950421049107544e-06, "loss": 4.0245, "step": 17836 }, { "epoch": 5.941783959357042, "grad_norm": 0.77734375, "learning_rate": 5.59425922249466e-06, "loss": 3.9788, "step": 17837 }, { "epoch": 5.94211709835929, "grad_norm": 0.83203125, "learning_rate": 5.593476362276318e-06, "loss": 3.9832, "step": 17838 }, { "epoch": 5.942450237361539, "grad_norm": 0.76953125, "learning_rate": 5.592693524264846e-06, "loss": 4.0329, "step": 17839 }, { "epoch": 5.942783376363788, "grad_norm": 0.7890625, "learning_rate": 5.591910708469364e-06, "loss": 3.9268, "step": 17840 }, { "epoch": 5.943116515366037, "grad_norm": 0.79296875, "learning_rate": 5.591127914898987e-06, "loss": 3.9625, "step": 17841 }, { "epoch": 5.943449654368285, "grad_norm": 0.76953125, "learning_rate": 5.59034514356284e-06, "loss": 3.9769, "step": 17842 }, { "epoch": 5.943782793370534, "grad_norm": 0.734375, "learning_rate": 5.589562394470033e-06, "loss": 4.0094, "step": 17843 }, { "epoch": 5.944115932372783, "grad_norm": 0.7578125, "learning_rate": 5.588779667629686e-06, "loss": 3.9906, "step": 17844 }, { "epoch": 5.944449071375031, "grad_norm": 0.74609375, "learning_rate": 5.587996963050917e-06, "loss": 4.0051, "step": 17845 }, { "epoch": 5.94478221037728, "grad_norm": 0.765625, "learning_rate": 5.58721428074284e-06, "loss": 3.9565, "step": 17846 }, { "epoch": 5.945115349379528, "grad_norm": 0.7578125, "learning_rate": 5.586431620714575e-06, "loss": 3.9783, "step": 17847 }, { "epoch": 5.9454484883817775, "grad_norm": 0.78125, "learning_rate": 5.585648982975236e-06, "loss": 4.0456, "step": 17848 }, { "epoch": 5.945781627384026, "grad_norm": 0.7578125, "learning_rate": 5.584866367533941e-06, "loss": 3.96, "step": 17849 }, { "epoch": 5.946114766386275, "grad_norm": 0.765625, "learning_rate": 5.584083774399803e-06, "loss": 4.0115, "step": 17850 }, { "epoch": 5.946447905388523, "grad_norm": 0.75390625, "learning_rate": 5.583301203581943e-06, "loss": 3.9968, "step": 17851 }, { "epoch": 5.946781044390772, "grad_norm": 0.78125, "learning_rate": 5.5825186550894695e-06, "loss": 3.9773, "step": 17852 }, { "epoch": 5.947114183393021, "grad_norm": 0.78515625, "learning_rate": 5.581736128931499e-06, "loss": 4.0515, "step": 17853 }, { "epoch": 5.947447322395269, "grad_norm": 0.73046875, "learning_rate": 5.580953625117151e-06, "loss": 3.8844, "step": 17854 }, { "epoch": 5.947780461397518, "grad_norm": 0.80859375, "learning_rate": 5.5801711436555345e-06, "loss": 3.9754, "step": 17855 }, { "epoch": 5.9481136003997666, "grad_norm": 0.75390625, "learning_rate": 5.579388684555767e-06, "loss": 4.0016, "step": 17856 }, { "epoch": 5.948446739402016, "grad_norm": 0.75, "learning_rate": 5.578606247826958e-06, "loss": 4.0997, "step": 17857 }, { "epoch": 5.948779878404264, "grad_norm": 0.7265625, "learning_rate": 5.577823833478229e-06, "loss": 3.9973, "step": 17858 }, { "epoch": 5.949113017406513, "grad_norm": 0.76953125, "learning_rate": 5.577041441518686e-06, "loss": 3.974, "step": 17859 }, { "epoch": 5.9494461564087615, "grad_norm": 0.80078125, "learning_rate": 5.576259071957448e-06, "loss": 3.9607, "step": 17860 }, { "epoch": 5.949779295411011, "grad_norm": 0.75390625, "learning_rate": 5.575476724803623e-06, "loss": 4.0927, "step": 17861 }, { "epoch": 5.950112434413259, "grad_norm": 0.73828125, "learning_rate": 5.5746944000663246e-06, "loss": 3.9832, "step": 17862 }, { "epoch": 5.950445573415507, "grad_norm": 0.73828125, "learning_rate": 5.5739120977546685e-06, "loss": 3.9771, "step": 17863 }, { "epoch": 5.9507787124177565, "grad_norm": 0.765625, "learning_rate": 5.573129817877762e-06, "loss": 3.9951, "step": 17864 }, { "epoch": 5.951111851420005, "grad_norm": 0.73046875, "learning_rate": 5.572347560444721e-06, "loss": 3.9813, "step": 17865 }, { "epoch": 5.951444990422254, "grad_norm": 0.7421875, "learning_rate": 5.571565325464655e-06, "loss": 4.0065, "step": 17866 }, { "epoch": 5.951778129424502, "grad_norm": 0.75, "learning_rate": 5.570783112946677e-06, "loss": 3.9752, "step": 17867 }, { "epoch": 5.952111268426751, "grad_norm": 0.7890625, "learning_rate": 5.570000922899894e-06, "loss": 4.0692, "step": 17868 }, { "epoch": 5.952444407429, "grad_norm": 0.74609375, "learning_rate": 5.569218755333427e-06, "loss": 4.0249, "step": 17869 }, { "epoch": 5.952777546431248, "grad_norm": 0.76953125, "learning_rate": 5.5684366102563735e-06, "loss": 3.9703, "step": 17870 }, { "epoch": 5.953110685433497, "grad_norm": 0.7734375, "learning_rate": 5.5676544876778515e-06, "loss": 3.9716, "step": 17871 }, { "epoch": 5.9534438244357455, "grad_norm": 0.734375, "learning_rate": 5.5668723876069685e-06, "loss": 3.9809, "step": 17872 }, { "epoch": 5.953776963437995, "grad_norm": 0.74609375, "learning_rate": 5.566090310052836e-06, "loss": 3.9628, "step": 17873 }, { "epoch": 5.954110102440243, "grad_norm": 0.734375, "learning_rate": 5.565308255024561e-06, "loss": 4.0741, "step": 17874 }, { "epoch": 5.954443241442492, "grad_norm": 0.7734375, "learning_rate": 5.564526222531256e-06, "loss": 3.9258, "step": 17875 }, { "epoch": 5.9547763804447404, "grad_norm": 0.74609375, "learning_rate": 5.5637442125820265e-06, "loss": 3.9434, "step": 17876 }, { "epoch": 5.95510951944699, "grad_norm": 0.78515625, "learning_rate": 5.562962225185984e-06, "loss": 3.9854, "step": 17877 }, { "epoch": 5.955442658449238, "grad_norm": 0.76171875, "learning_rate": 5.5621802603522385e-06, "loss": 3.9351, "step": 17878 }, { "epoch": 5.955775797451487, "grad_norm": 0.75, "learning_rate": 5.561398318089892e-06, "loss": 4.0001, "step": 17879 }, { "epoch": 5.956108936453735, "grad_norm": 0.71484375, "learning_rate": 5.5606163984080574e-06, "loss": 4.0346, "step": 17880 }, { "epoch": 5.956442075455984, "grad_norm": 0.77734375, "learning_rate": 5.55983450131584e-06, "loss": 3.9108, "step": 17881 }, { "epoch": 5.956775214458233, "grad_norm": 0.7578125, "learning_rate": 5.55905262682235e-06, "loss": 3.9229, "step": 17882 }, { "epoch": 5.957108353460481, "grad_norm": 0.73828125, "learning_rate": 5.558270774936691e-06, "loss": 3.9726, "step": 17883 }, { "epoch": 5.95744149246273, "grad_norm": 0.73046875, "learning_rate": 5.5574889456679725e-06, "loss": 4.0214, "step": 17884 }, { "epoch": 5.957774631464979, "grad_norm": 0.73828125, "learning_rate": 5.5567071390253e-06, "loss": 4.0296, "step": 17885 }, { "epoch": 5.958107770467228, "grad_norm": 0.75, "learning_rate": 5.555925355017783e-06, "loss": 4.061, "step": 17886 }, { "epoch": 5.958440909469476, "grad_norm": 0.73828125, "learning_rate": 5.555143593654523e-06, "loss": 4.0037, "step": 17887 }, { "epoch": 5.958774048471724, "grad_norm": 0.7421875, "learning_rate": 5.554361854944626e-06, "loss": 3.9849, "step": 17888 }, { "epoch": 5.959107187473974, "grad_norm": 0.7734375, "learning_rate": 5.5535801388972e-06, "loss": 3.9512, "step": 17889 }, { "epoch": 5.959440326476222, "grad_norm": 0.7578125, "learning_rate": 5.552798445521349e-06, "loss": 3.9527, "step": 17890 }, { "epoch": 5.959773465478471, "grad_norm": 0.75390625, "learning_rate": 5.5520167748261794e-06, "loss": 3.98, "step": 17891 }, { "epoch": 5.960106604480719, "grad_norm": 0.73828125, "learning_rate": 5.551235126820794e-06, "loss": 3.9586, "step": 17892 }, { "epoch": 5.960439743482969, "grad_norm": 0.765625, "learning_rate": 5.5504535015143e-06, "loss": 3.9514, "step": 17893 }, { "epoch": 5.960772882485217, "grad_norm": 0.7578125, "learning_rate": 5.549671898915797e-06, "loss": 4.0055, "step": 17894 }, { "epoch": 5.961106021487465, "grad_norm": 0.76171875, "learning_rate": 5.548890319034397e-06, "loss": 3.9627, "step": 17895 }, { "epoch": 5.961439160489714, "grad_norm": 0.7734375, "learning_rate": 5.548108761879196e-06, "loss": 4.0178, "step": 17896 }, { "epoch": 5.961772299491963, "grad_norm": 0.76171875, "learning_rate": 5.5473272274592985e-06, "loss": 3.9422, "step": 17897 }, { "epoch": 5.962105438494212, "grad_norm": 0.78515625, "learning_rate": 5.546545715783811e-06, "loss": 4.0484, "step": 17898 }, { "epoch": 5.96243857749646, "grad_norm": 0.78125, "learning_rate": 5.545764226861834e-06, "loss": 3.9575, "step": 17899 }, { "epoch": 5.962771716498709, "grad_norm": 0.69921875, "learning_rate": 5.544982760702471e-06, "loss": 3.9763, "step": 17900 }, { "epoch": 5.963104855500958, "grad_norm": 0.77734375, "learning_rate": 5.544201317314824e-06, "loss": 3.9582, "step": 17901 }, { "epoch": 5.963437994503207, "grad_norm": 0.77734375, "learning_rate": 5.543419896707996e-06, "loss": 4.044, "step": 17902 }, { "epoch": 5.963771133505455, "grad_norm": 0.7578125, "learning_rate": 5.542638498891089e-06, "loss": 3.992, "step": 17903 }, { "epoch": 5.964104272507704, "grad_norm": 0.77734375, "learning_rate": 5.5418571238732036e-06, "loss": 3.947, "step": 17904 }, { "epoch": 5.9644374115099525, "grad_norm": 0.7734375, "learning_rate": 5.541075771663442e-06, "loss": 3.9783, "step": 17905 }, { "epoch": 5.964770550512201, "grad_norm": 0.71484375, "learning_rate": 5.540294442270902e-06, "loss": 3.99, "step": 17906 }, { "epoch": 5.96510368951445, "grad_norm": 0.765625, "learning_rate": 5.539513135704689e-06, "loss": 3.8998, "step": 17907 }, { "epoch": 5.965436828516698, "grad_norm": 0.7578125, "learning_rate": 5.5387318519739e-06, "loss": 4.057, "step": 17908 }, { "epoch": 5.9657699675189475, "grad_norm": 0.7890625, "learning_rate": 5.5379505910876385e-06, "loss": 3.9239, "step": 17909 }, { "epoch": 5.966103106521196, "grad_norm": 0.76953125, "learning_rate": 5.5371693530550015e-06, "loss": 4.0556, "step": 17910 }, { "epoch": 5.966436245523445, "grad_norm": 0.75, "learning_rate": 5.5363881378850915e-06, "loss": 4.0316, "step": 17911 }, { "epoch": 5.966769384525693, "grad_norm": 0.76171875, "learning_rate": 5.535606945587005e-06, "loss": 3.9862, "step": 17912 }, { "epoch": 5.967102523527942, "grad_norm": 0.7890625, "learning_rate": 5.534825776169847e-06, "loss": 3.9074, "step": 17913 }, { "epoch": 5.967435662530191, "grad_norm": 0.8046875, "learning_rate": 5.534044629642707e-06, "loss": 3.9952, "step": 17914 }, { "epoch": 5.967768801532439, "grad_norm": 0.7734375, "learning_rate": 5.533263506014692e-06, "loss": 4.0451, "step": 17915 }, { "epoch": 5.968101940534688, "grad_norm": 0.7578125, "learning_rate": 5.532482405294894e-06, "loss": 4.0413, "step": 17916 }, { "epoch": 5.9684350795369365, "grad_norm": 0.765625, "learning_rate": 5.531701327492416e-06, "loss": 4.0241, "step": 17917 }, { "epoch": 5.968768218539186, "grad_norm": 0.765625, "learning_rate": 5.530920272616353e-06, "loss": 4.0075, "step": 17918 }, { "epoch": 5.969101357541434, "grad_norm": 0.74609375, "learning_rate": 5.530139240675807e-06, "loss": 3.9537, "step": 17919 }, { "epoch": 5.969434496543683, "grad_norm": 0.7109375, "learning_rate": 5.529358231679869e-06, "loss": 4.057, "step": 17920 }, { "epoch": 5.9697676355459315, "grad_norm": 0.7734375, "learning_rate": 5.528577245637641e-06, "loss": 4.0393, "step": 17921 }, { "epoch": 5.970100774548181, "grad_norm": 0.73828125, "learning_rate": 5.527796282558218e-06, "loss": 3.9835, "step": 17922 }, { "epoch": 5.970433913550429, "grad_norm": 0.79296875, "learning_rate": 5.527015342450696e-06, "loss": 4.0181, "step": 17923 }, { "epoch": 5.970767052552677, "grad_norm": 0.80859375, "learning_rate": 5.526234425324172e-06, "loss": 3.9549, "step": 17924 }, { "epoch": 5.971100191554926, "grad_norm": 0.7734375, "learning_rate": 5.525453531187741e-06, "loss": 4.0375, "step": 17925 }, { "epoch": 5.971433330557175, "grad_norm": 0.78515625, "learning_rate": 5.5246726600505e-06, "loss": 3.973, "step": 17926 }, { "epoch": 5.971766469559424, "grad_norm": 0.78125, "learning_rate": 5.523891811921543e-06, "loss": 4.0372, "step": 17927 }, { "epoch": 5.972099608561672, "grad_norm": 0.7265625, "learning_rate": 5.523110986809967e-06, "loss": 3.9809, "step": 17928 }, { "epoch": 5.972432747563921, "grad_norm": 0.78125, "learning_rate": 5.522330184724865e-06, "loss": 3.994, "step": 17929 }, { "epoch": 5.97276588656617, "grad_norm": 0.72265625, "learning_rate": 5.521549405675335e-06, "loss": 3.8936, "step": 17930 }, { "epoch": 5.973099025568418, "grad_norm": 0.7265625, "learning_rate": 5.52076864967047e-06, "loss": 3.9381, "step": 17931 }, { "epoch": 5.973432164570667, "grad_norm": 0.7421875, "learning_rate": 5.519987916719361e-06, "loss": 4.066, "step": 17932 }, { "epoch": 5.9737653035729155, "grad_norm": 0.7734375, "learning_rate": 5.519207206831103e-06, "loss": 4.0568, "step": 17933 }, { "epoch": 5.974098442575165, "grad_norm": 0.76953125, "learning_rate": 5.518426520014792e-06, "loss": 4.0136, "step": 17934 }, { "epoch": 5.974431581577413, "grad_norm": 0.7265625, "learning_rate": 5.51764585627952e-06, "loss": 4.0025, "step": 17935 }, { "epoch": 5.974764720579662, "grad_norm": 0.75390625, "learning_rate": 5.516865215634379e-06, "loss": 3.9792, "step": 17936 }, { "epoch": 5.97509785958191, "grad_norm": 0.75, "learning_rate": 5.516084598088464e-06, "loss": 3.9888, "step": 17937 }, { "epoch": 5.97543099858416, "grad_norm": 0.8203125, "learning_rate": 5.515304003650865e-06, "loss": 3.9546, "step": 17938 }, { "epoch": 5.975764137586408, "grad_norm": 0.8046875, "learning_rate": 5.514523432330676e-06, "loss": 3.976, "step": 17939 }, { "epoch": 5.976097276588657, "grad_norm": 0.703125, "learning_rate": 5.513742884136992e-06, "loss": 3.96, "step": 17940 }, { "epoch": 5.976430415590905, "grad_norm": 0.7734375, "learning_rate": 5.512962359078898e-06, "loss": 3.9562, "step": 17941 }, { "epoch": 5.976763554593154, "grad_norm": 0.8046875, "learning_rate": 5.512181857165488e-06, "loss": 3.9349, "step": 17942 }, { "epoch": 5.977096693595403, "grad_norm": 0.7578125, "learning_rate": 5.511401378405853e-06, "loss": 3.9588, "step": 17943 }, { "epoch": 5.977429832597651, "grad_norm": 0.80859375, "learning_rate": 5.510620922809087e-06, "loss": 3.9502, "step": 17944 }, { "epoch": 5.9777629715999, "grad_norm": 0.8046875, "learning_rate": 5.509840490384277e-06, "loss": 3.9682, "step": 17945 }, { "epoch": 5.978096110602149, "grad_norm": 0.81640625, "learning_rate": 5.509060081140516e-06, "loss": 4.0247, "step": 17946 }, { "epoch": 5.978429249604398, "grad_norm": 0.74609375, "learning_rate": 5.50827969508689e-06, "loss": 3.9718, "step": 17947 }, { "epoch": 5.978762388606646, "grad_norm": 0.78515625, "learning_rate": 5.5074993322324935e-06, "loss": 4.0277, "step": 17948 }, { "epoch": 5.979095527608894, "grad_norm": 0.76953125, "learning_rate": 5.506718992586416e-06, "loss": 3.9452, "step": 17949 }, { "epoch": 5.979428666611144, "grad_norm": 0.75390625, "learning_rate": 5.505938676157741e-06, "loss": 3.9777, "step": 17950 }, { "epoch": 5.979761805613392, "grad_norm": 0.74609375, "learning_rate": 5.505158382955564e-06, "loss": 4.0308, "step": 17951 }, { "epoch": 5.980094944615641, "grad_norm": 0.77734375, "learning_rate": 5.504378112988968e-06, "loss": 4.011, "step": 17952 }, { "epoch": 5.980428083617889, "grad_norm": 0.7578125, "learning_rate": 5.503597866267047e-06, "loss": 3.9857, "step": 17953 }, { "epoch": 5.9807612226201385, "grad_norm": 0.81640625, "learning_rate": 5.5028176427988845e-06, "loss": 3.9787, "step": 17954 }, { "epoch": 5.981094361622387, "grad_norm": 0.75, "learning_rate": 5.502037442593573e-06, "loss": 3.9938, "step": 17955 }, { "epoch": 5.981427500624636, "grad_norm": 0.7265625, "learning_rate": 5.501257265660196e-06, "loss": 3.9591, "step": 17956 }, { "epoch": 5.981760639626884, "grad_norm": 0.78515625, "learning_rate": 5.500477112007843e-06, "loss": 3.9519, "step": 17957 }, { "epoch": 5.9820937786291335, "grad_norm": 0.74609375, "learning_rate": 5.499696981645604e-06, "loss": 3.918, "step": 17958 }, { "epoch": 5.982426917631382, "grad_norm": 0.7734375, "learning_rate": 5.49891687458256e-06, "loss": 3.9144, "step": 17959 }, { "epoch": 5.98276005663363, "grad_norm": 0.80859375, "learning_rate": 5.4981367908277974e-06, "loss": 3.9617, "step": 17960 }, { "epoch": 5.983093195635879, "grad_norm": 0.80078125, "learning_rate": 5.497356730390408e-06, "loss": 3.997, "step": 17961 }, { "epoch": 5.983426334638128, "grad_norm": 0.734375, "learning_rate": 5.4965766932794735e-06, "loss": 4.0321, "step": 17962 }, { "epoch": 5.983759473640377, "grad_norm": 0.76953125, "learning_rate": 5.495796679504083e-06, "loss": 3.93, "step": 17963 }, { "epoch": 5.984092612642625, "grad_norm": 0.75390625, "learning_rate": 5.495016689073318e-06, "loss": 3.9933, "step": 17964 }, { "epoch": 5.984425751644874, "grad_norm": 0.73046875, "learning_rate": 5.494236721996266e-06, "loss": 4.0343, "step": 17965 }, { "epoch": 5.9847588906471225, "grad_norm": 0.75390625, "learning_rate": 5.493456778282012e-06, "loss": 4.004, "step": 17966 }, { "epoch": 5.985092029649371, "grad_norm": 0.73828125, "learning_rate": 5.492676857939643e-06, "loss": 4.0152, "step": 17967 }, { "epoch": 5.98542516865162, "grad_norm": 0.75, "learning_rate": 5.49189696097824e-06, "loss": 3.9289, "step": 17968 }, { "epoch": 5.985758307653868, "grad_norm": 0.77734375, "learning_rate": 5.491117087406885e-06, "loss": 3.9874, "step": 17969 }, { "epoch": 5.9860914466561175, "grad_norm": 0.77734375, "learning_rate": 5.490337237234666e-06, "loss": 3.8941, "step": 17970 }, { "epoch": 5.986424585658366, "grad_norm": 0.73828125, "learning_rate": 5.489557410470665e-06, "loss": 3.977, "step": 17971 }, { "epoch": 5.986757724660615, "grad_norm": 0.83203125, "learning_rate": 5.488777607123966e-06, "loss": 3.9706, "step": 17972 }, { "epoch": 5.987090863662863, "grad_norm": 0.7734375, "learning_rate": 5.487997827203651e-06, "loss": 4.0048, "step": 17973 }, { "epoch": 5.9874240026651115, "grad_norm": 0.76953125, "learning_rate": 5.487218070718804e-06, "loss": 3.9582, "step": 17974 }, { "epoch": 5.987757141667361, "grad_norm": 0.75, "learning_rate": 5.486438337678507e-06, "loss": 4.051, "step": 17975 }, { "epoch": 5.988090280669609, "grad_norm": 0.76953125, "learning_rate": 5.485658628091842e-06, "loss": 3.9103, "step": 17976 }, { "epoch": 5.988423419671858, "grad_norm": 0.75, "learning_rate": 5.484878941967892e-06, "loss": 4.0065, "step": 17977 }, { "epoch": 5.9887565586741065, "grad_norm": 0.7421875, "learning_rate": 5.484099279315735e-06, "loss": 3.9343, "step": 17978 }, { "epoch": 5.989089697676356, "grad_norm": 0.7734375, "learning_rate": 5.483319640144457e-06, "loss": 3.978, "step": 17979 }, { "epoch": 5.989422836678604, "grad_norm": 0.78515625, "learning_rate": 5.482540024463135e-06, "loss": 3.9402, "step": 17980 }, { "epoch": 5.989755975680853, "grad_norm": 0.73828125, "learning_rate": 5.4817604322808535e-06, "loss": 3.9573, "step": 17981 }, { "epoch": 5.9900891146831015, "grad_norm": 0.734375, "learning_rate": 5.48098086360669e-06, "loss": 4.0346, "step": 17982 }, { "epoch": 5.990422253685351, "grad_norm": 0.78125, "learning_rate": 5.480201318449728e-06, "loss": 4.002, "step": 17983 }, { "epoch": 5.990755392687599, "grad_norm": 0.71875, "learning_rate": 5.479421796819044e-06, "loss": 3.9716, "step": 17984 }, { "epoch": 5.991088531689847, "grad_norm": 0.74609375, "learning_rate": 5.478642298723722e-06, "loss": 4.069, "step": 17985 }, { "epoch": 5.991421670692096, "grad_norm": 0.7578125, "learning_rate": 5.477862824172839e-06, "loss": 3.9711, "step": 17986 }, { "epoch": 5.991754809694345, "grad_norm": 0.80078125, "learning_rate": 5.4770833731754715e-06, "loss": 4.0101, "step": 17987 }, { "epoch": 5.992087948696594, "grad_norm": 0.73046875, "learning_rate": 5.476303945740703e-06, "loss": 4.0237, "step": 17988 }, { "epoch": 5.992421087698842, "grad_norm": 0.77734375, "learning_rate": 5.475524541877609e-06, "loss": 3.8903, "step": 17989 }, { "epoch": 5.992754226701091, "grad_norm": 0.75390625, "learning_rate": 5.4747451615952694e-06, "loss": 3.9698, "step": 17990 }, { "epoch": 5.99308736570334, "grad_norm": 0.76171875, "learning_rate": 5.4739658049027614e-06, "loss": 3.9984, "step": 17991 }, { "epoch": 5.993420504705588, "grad_norm": 0.7734375, "learning_rate": 5.473186471809164e-06, "loss": 3.9861, "step": 17992 }, { "epoch": 5.993753643707837, "grad_norm": 0.74609375, "learning_rate": 5.472407162323554e-06, "loss": 3.9807, "step": 17993 }, { "epoch": 5.994086782710085, "grad_norm": 0.7109375, "learning_rate": 5.47162787645501e-06, "loss": 3.951, "step": 17994 }, { "epoch": 5.994419921712335, "grad_norm": 0.77734375, "learning_rate": 5.4708486142126075e-06, "loss": 3.9518, "step": 17995 }, { "epoch": 5.994753060714583, "grad_norm": 0.75390625, "learning_rate": 5.470069375605423e-06, "loss": 4.0166, "step": 17996 }, { "epoch": 5.995086199716832, "grad_norm": 0.7578125, "learning_rate": 5.469290160642534e-06, "loss": 4.0194, "step": 17997 }, { "epoch": 5.99541933871908, "grad_norm": 0.75390625, "learning_rate": 5.468510969333014e-06, "loss": 4.0216, "step": 17998 }, { "epoch": 5.99575247772133, "grad_norm": 0.70703125, "learning_rate": 5.467731801685943e-06, "loss": 3.9429, "step": 17999 }, { "epoch": 5.996085616723578, "grad_norm": 0.796875, "learning_rate": 5.466952657710393e-06, "loss": 4.0128, "step": 18000 }, { "epoch": 5.996418755725827, "grad_norm": 0.75, "learning_rate": 5.466173537415443e-06, "loss": 3.9359, "step": 18001 }, { "epoch": 5.996751894728075, "grad_norm": 0.77734375, "learning_rate": 5.465394440810164e-06, "loss": 4.0204, "step": 18002 }, { "epoch": 5.997085033730324, "grad_norm": 0.73046875, "learning_rate": 5.464615367903637e-06, "loss": 4.0227, "step": 18003 }, { "epoch": 5.997418172732573, "grad_norm": 0.7734375, "learning_rate": 5.463836318704929e-06, "loss": 3.9275, "step": 18004 }, { "epoch": 5.997751311734821, "grad_norm": 0.75390625, "learning_rate": 5.463057293223118e-06, "loss": 3.8862, "step": 18005 }, { "epoch": 5.99808445073707, "grad_norm": 0.73046875, "learning_rate": 5.462278291467276e-06, "loss": 3.9886, "step": 18006 }, { "epoch": 5.998417589739319, "grad_norm": 0.73828125, "learning_rate": 5.461499313446481e-06, "loss": 3.9992, "step": 18007 }, { "epoch": 5.998750728741568, "grad_norm": 0.76171875, "learning_rate": 5.4607203591698e-06, "loss": 3.9697, "step": 18008 }, { "epoch": 5.999083867743816, "grad_norm": 0.74609375, "learning_rate": 5.459941428646313e-06, "loss": 3.9188, "step": 18009 }, { "epoch": 5.999417006746064, "grad_norm": 0.7265625, "learning_rate": 5.459162521885087e-06, "loss": 4.0034, "step": 18010 }, { "epoch": 5.999750145748314, "grad_norm": 0.77734375, "learning_rate": 5.4583836388952e-06, "loss": 3.9251, "step": 18011 }, { "epoch": 6.0, "grad_norm": 0.85546875, "learning_rate": 5.457604779685721e-06, "loss": 3.9128, "step": 18012 }, { "epoch": 6.000333139002248, "grad_norm": 0.76953125, "learning_rate": 5.456825944265721e-06, "loss": 4.0337, "step": 18013 }, { "epoch": 6.0006662780044975, "grad_norm": 0.7265625, "learning_rate": 5.456047132644275e-06, "loss": 4.0276, "step": 18014 }, { "epoch": 6.000999417006746, "grad_norm": 0.72265625, "learning_rate": 5.45526834483045e-06, "loss": 4.025, "step": 18015 }, { "epoch": 6.001332556008995, "grad_norm": 0.7421875, "learning_rate": 5.454489580833322e-06, "loss": 3.9263, "step": 18016 }, { "epoch": 6.001665695011243, "grad_norm": 0.7734375, "learning_rate": 5.453710840661958e-06, "loss": 3.9367, "step": 18017 }, { "epoch": 6.001998834013492, "grad_norm": 0.75390625, "learning_rate": 5.4529321243254334e-06, "loss": 4.0511, "step": 18018 }, { "epoch": 6.002331973015741, "grad_norm": 0.734375, "learning_rate": 5.452153431832812e-06, "loss": 4.0083, "step": 18019 }, { "epoch": 6.00266511201799, "grad_norm": 0.76953125, "learning_rate": 5.45137476319317e-06, "loss": 3.9613, "step": 18020 }, { "epoch": 6.002998251020238, "grad_norm": 0.76953125, "learning_rate": 5.450596118415576e-06, "loss": 3.9201, "step": 18021 }, { "epoch": 6.0033313900224865, "grad_norm": 0.76171875, "learning_rate": 5.449817497509096e-06, "loss": 3.9386, "step": 18022 }, { "epoch": 6.003664529024736, "grad_norm": 0.7265625, "learning_rate": 5.449038900482802e-06, "loss": 3.9599, "step": 18023 }, { "epoch": 6.003997668026984, "grad_norm": 0.75, "learning_rate": 5.448260327345761e-06, "loss": 3.9266, "step": 18024 }, { "epoch": 6.004330807029233, "grad_norm": 0.6875, "learning_rate": 5.447481778107045e-06, "loss": 3.9007, "step": 18025 }, { "epoch": 6.0046639460314815, "grad_norm": 0.7421875, "learning_rate": 5.446703252775719e-06, "loss": 4.0147, "step": 18026 }, { "epoch": 6.004997085033731, "grad_norm": 0.7734375, "learning_rate": 5.445924751360854e-06, "loss": 3.9723, "step": 18027 }, { "epoch": 6.005330224035979, "grad_norm": 0.7578125, "learning_rate": 5.445146273871515e-06, "loss": 4.0634, "step": 18028 }, { "epoch": 6.005663363038228, "grad_norm": 0.70703125, "learning_rate": 5.444367820316773e-06, "loss": 3.9458, "step": 18029 }, { "epoch": 6.005996502040476, "grad_norm": 0.72265625, "learning_rate": 5.443589390705694e-06, "loss": 3.9974, "step": 18030 }, { "epoch": 6.006329641042725, "grad_norm": 0.76953125, "learning_rate": 5.4428109850473425e-06, "loss": 4.0103, "step": 18031 }, { "epoch": 6.006662780044974, "grad_norm": 0.77734375, "learning_rate": 5.442032603350789e-06, "loss": 4.0283, "step": 18032 }, { "epoch": 6.006995919047222, "grad_norm": 0.765625, "learning_rate": 5.441254245625095e-06, "loss": 3.9366, "step": 18033 }, { "epoch": 6.007329058049471, "grad_norm": 0.7421875, "learning_rate": 5.440475911879332e-06, "loss": 3.9605, "step": 18034 }, { "epoch": 6.00766219705172, "grad_norm": 0.78515625, "learning_rate": 5.4396976021225625e-06, "loss": 3.8831, "step": 18035 }, { "epoch": 6.007995336053969, "grad_norm": 0.8125, "learning_rate": 5.438919316363855e-06, "loss": 3.922, "step": 18036 }, { "epoch": 6.008328475056217, "grad_norm": 0.77734375, "learning_rate": 5.4381410546122715e-06, "loss": 4.0082, "step": 18037 }, { "epoch": 6.008661614058466, "grad_norm": 0.76953125, "learning_rate": 5.4373628168768795e-06, "loss": 3.9172, "step": 18038 }, { "epoch": 6.008994753060715, "grad_norm": 0.76171875, "learning_rate": 5.436584603166745e-06, "loss": 4.047, "step": 18039 }, { "epoch": 6.009327892062963, "grad_norm": 0.75, "learning_rate": 5.435806413490928e-06, "loss": 3.9562, "step": 18040 }, { "epoch": 6.009661031065212, "grad_norm": 0.69921875, "learning_rate": 5.435028247858497e-06, "loss": 4.0639, "step": 18041 }, { "epoch": 6.00999417006746, "grad_norm": 0.765625, "learning_rate": 5.434250106278511e-06, "loss": 4.024, "step": 18042 }, { "epoch": 6.01032730906971, "grad_norm": 0.78125, "learning_rate": 5.43347198876004e-06, "loss": 3.9713, "step": 18043 }, { "epoch": 6.010660448071958, "grad_norm": 0.71484375, "learning_rate": 5.432693895312143e-06, "loss": 4.0179, "step": 18044 }, { "epoch": 6.010993587074207, "grad_norm": 0.765625, "learning_rate": 5.431915825943884e-06, "loss": 3.9499, "step": 18045 }, { "epoch": 6.011326726076455, "grad_norm": 0.72265625, "learning_rate": 5.431137780664326e-06, "loss": 3.9732, "step": 18046 }, { "epoch": 6.011659865078704, "grad_norm": 0.80078125, "learning_rate": 5.430359759482533e-06, "loss": 4.0203, "step": 18047 }, { "epoch": 6.011993004080953, "grad_norm": 0.78515625, "learning_rate": 5.429581762407567e-06, "loss": 3.9796, "step": 18048 }, { "epoch": 6.012326143083201, "grad_norm": 0.7265625, "learning_rate": 5.4288037894484875e-06, "loss": 3.9958, "step": 18049 }, { "epoch": 6.01265928208545, "grad_norm": 0.76953125, "learning_rate": 5.4280258406143565e-06, "loss": 4.0457, "step": 18050 }, { "epoch": 6.012992421087699, "grad_norm": 0.765625, "learning_rate": 5.427247915914238e-06, "loss": 3.9882, "step": 18051 }, { "epoch": 6.013325560089948, "grad_norm": 0.7734375, "learning_rate": 5.42647001535719e-06, "loss": 3.998, "step": 18052 }, { "epoch": 6.013658699092196, "grad_norm": 0.80859375, "learning_rate": 5.425692138952277e-06, "loss": 4.013, "step": 18053 }, { "epoch": 6.013991838094445, "grad_norm": 0.75390625, "learning_rate": 5.424914286708556e-06, "loss": 4.0408, "step": 18054 }, { "epoch": 6.014324977096694, "grad_norm": 0.7890625, "learning_rate": 5.42413645863509e-06, "loss": 3.9353, "step": 18055 }, { "epoch": 6.014658116098942, "grad_norm": 0.7734375, "learning_rate": 5.423358654740937e-06, "loss": 3.9113, "step": 18056 }, { "epoch": 6.014991255101191, "grad_norm": 0.75390625, "learning_rate": 5.42258087503516e-06, "loss": 4.0556, "step": 18057 }, { "epoch": 6.015324394103439, "grad_norm": 0.765625, "learning_rate": 5.421803119526816e-06, "loss": 4.0047, "step": 18058 }, { "epoch": 6.0156575331056885, "grad_norm": 0.75, "learning_rate": 5.421025388224961e-06, "loss": 3.9642, "step": 18059 }, { "epoch": 6.015990672107937, "grad_norm": 0.75390625, "learning_rate": 5.42024768113866e-06, "loss": 4.1005, "step": 18060 }, { "epoch": 6.016323811110186, "grad_norm": 0.77734375, "learning_rate": 5.419469998276967e-06, "loss": 3.9874, "step": 18061 }, { "epoch": 6.016656950112434, "grad_norm": 0.71484375, "learning_rate": 5.418692339648943e-06, "loss": 3.9669, "step": 18062 }, { "epoch": 6.0169900891146835, "grad_norm": 0.734375, "learning_rate": 5.417914705263644e-06, "loss": 3.9936, "step": 18063 }, { "epoch": 6.017323228116932, "grad_norm": 0.79296875, "learning_rate": 5.417137095130131e-06, "loss": 3.9667, "step": 18064 }, { "epoch": 6.01765636711918, "grad_norm": 0.7421875, "learning_rate": 5.416359509257457e-06, "loss": 3.9819, "step": 18065 }, { "epoch": 6.017989506121429, "grad_norm": 0.7578125, "learning_rate": 5.4155819476546855e-06, "loss": 3.9261, "step": 18066 }, { "epoch": 6.0183226451236775, "grad_norm": 0.78515625, "learning_rate": 5.414804410330867e-06, "loss": 3.8857, "step": 18067 }, { "epoch": 6.018655784125927, "grad_norm": 0.765625, "learning_rate": 5.4140268972950595e-06, "loss": 3.9423, "step": 18068 }, { "epoch": 6.018988923128175, "grad_norm": 0.75390625, "learning_rate": 5.413249408556321e-06, "loss": 3.9916, "step": 18069 }, { "epoch": 6.019322062130424, "grad_norm": 0.73046875, "learning_rate": 5.412471944123708e-06, "loss": 3.928, "step": 18070 }, { "epoch": 6.0196552011326725, "grad_norm": 0.7734375, "learning_rate": 5.411694504006274e-06, "loss": 3.9981, "step": 18071 }, { "epoch": 6.019988340134922, "grad_norm": 0.7734375, "learning_rate": 5.410917088213076e-06, "loss": 4.0154, "step": 18072 }, { "epoch": 6.02032147913717, "grad_norm": 0.76171875, "learning_rate": 5.410139696753169e-06, "loss": 4.02, "step": 18073 }, { "epoch": 6.020654618139418, "grad_norm": 0.73046875, "learning_rate": 5.409362329635608e-06, "loss": 3.9456, "step": 18074 }, { "epoch": 6.0209877571416675, "grad_norm": 0.734375, "learning_rate": 5.408584986869449e-06, "loss": 3.9644, "step": 18075 }, { "epoch": 6.021320896143916, "grad_norm": 0.8515625, "learning_rate": 5.407807668463745e-06, "loss": 4.0278, "step": 18076 }, { "epoch": 6.021654035146165, "grad_norm": 0.75390625, "learning_rate": 5.407030374427547e-06, "loss": 4.0089, "step": 18077 }, { "epoch": 6.021987174148413, "grad_norm": 0.77734375, "learning_rate": 5.406253104769914e-06, "loss": 3.9549, "step": 18078 }, { "epoch": 6.022320313150662, "grad_norm": 0.8046875, "learning_rate": 5.405475859499896e-06, "loss": 4.0399, "step": 18079 }, { "epoch": 6.022653452152911, "grad_norm": 0.7734375, "learning_rate": 5.404698638626549e-06, "loss": 3.9115, "step": 18080 }, { "epoch": 6.02298659115516, "grad_norm": 0.75, "learning_rate": 5.403921442158923e-06, "loss": 3.9593, "step": 18081 }, { "epoch": 6.023319730157408, "grad_norm": 0.796875, "learning_rate": 5.403144270106073e-06, "loss": 3.9929, "step": 18082 }, { "epoch": 6.0236528691596565, "grad_norm": 0.78125, "learning_rate": 5.40236712247705e-06, "loss": 3.92, "step": 18083 }, { "epoch": 6.023986008161906, "grad_norm": 0.7734375, "learning_rate": 5.401589999280908e-06, "loss": 3.9182, "step": 18084 }, { "epoch": 6.024319147164154, "grad_norm": 0.75390625, "learning_rate": 5.400812900526697e-06, "loss": 4.0089, "step": 18085 }, { "epoch": 6.024652286166403, "grad_norm": 0.77734375, "learning_rate": 5.400035826223468e-06, "loss": 3.9433, "step": 18086 }, { "epoch": 6.024985425168651, "grad_norm": 0.71875, "learning_rate": 5.3992587763802726e-06, "loss": 4.0247, "step": 18087 }, { "epoch": 6.025318564170901, "grad_norm": 0.8046875, "learning_rate": 5.398481751006163e-06, "loss": 4.0295, "step": 18088 }, { "epoch": 6.025651703173149, "grad_norm": 0.73828125, "learning_rate": 5.3977047501101885e-06, "loss": 4.0322, "step": 18089 }, { "epoch": 6.025984842175398, "grad_norm": 0.7421875, "learning_rate": 5.3969277737014e-06, "loss": 3.9609, "step": 18090 }, { "epoch": 6.026317981177646, "grad_norm": 0.76953125, "learning_rate": 5.396150821788848e-06, "loss": 4.0108, "step": 18091 }, { "epoch": 6.026651120179895, "grad_norm": 0.7734375, "learning_rate": 5.395373894381581e-06, "loss": 3.9701, "step": 18092 }, { "epoch": 6.026984259182144, "grad_norm": 0.8203125, "learning_rate": 5.394596991488656e-06, "loss": 3.9245, "step": 18093 }, { "epoch": 6.027317398184392, "grad_norm": 0.765625, "learning_rate": 5.3938201131191084e-06, "loss": 4.0223, "step": 18094 }, { "epoch": 6.027650537186641, "grad_norm": 0.73828125, "learning_rate": 5.393043259281997e-06, "loss": 4.0077, "step": 18095 }, { "epoch": 6.02798367618889, "grad_norm": 0.73828125, "learning_rate": 5.3922664299863674e-06, "loss": 3.9655, "step": 18096 }, { "epoch": 6.028316815191139, "grad_norm": 0.73828125, "learning_rate": 5.391489625241269e-06, "loss": 3.9151, "step": 18097 }, { "epoch": 6.028649954193387, "grad_norm": 0.78125, "learning_rate": 5.390712845055748e-06, "loss": 3.9968, "step": 18098 }, { "epoch": 6.028983093195636, "grad_norm": 0.79296875, "learning_rate": 5.389936089438856e-06, "loss": 4.0587, "step": 18099 }, { "epoch": 6.029316232197885, "grad_norm": 0.73828125, "learning_rate": 5.389159358399635e-06, "loss": 3.9138, "step": 18100 }, { "epoch": 6.029649371200133, "grad_norm": 0.765625, "learning_rate": 5.388382651947138e-06, "loss": 4.0073, "step": 18101 }, { "epoch": 6.029982510202382, "grad_norm": 0.8046875, "learning_rate": 5.387605970090411e-06, "loss": 3.9882, "step": 18102 }, { "epoch": 6.03031564920463, "grad_norm": 0.76953125, "learning_rate": 5.386829312838497e-06, "loss": 3.9256, "step": 18103 }, { "epoch": 6.0306487882068796, "grad_norm": 0.7578125, "learning_rate": 5.386052680200443e-06, "loss": 4.0265, "step": 18104 }, { "epoch": 6.030981927209128, "grad_norm": 0.80078125, "learning_rate": 5.385276072185297e-06, "loss": 4.0202, "step": 18105 }, { "epoch": 6.031315066211377, "grad_norm": 0.8046875, "learning_rate": 5.384499488802106e-06, "loss": 3.9039, "step": 18106 }, { "epoch": 6.031648205213625, "grad_norm": 0.8046875, "learning_rate": 5.383722930059911e-06, "loss": 3.9485, "step": 18107 }, { "epoch": 6.0319813442158745, "grad_norm": 0.7734375, "learning_rate": 5.382946395967762e-06, "loss": 4.0277, "step": 18108 }, { "epoch": 6.032314483218123, "grad_norm": 0.7890625, "learning_rate": 5.382169886534701e-06, "loss": 3.993, "step": 18109 }, { "epoch": 6.032647622220371, "grad_norm": 0.7265625, "learning_rate": 5.381393401769775e-06, "loss": 3.9843, "step": 18110 }, { "epoch": 6.03298076122262, "grad_norm": 0.76171875, "learning_rate": 5.3806169416820285e-06, "loss": 3.9075, "step": 18111 }, { "epoch": 6.033313900224869, "grad_norm": 0.72265625, "learning_rate": 5.379840506280501e-06, "loss": 4.0386, "step": 18112 }, { "epoch": 6.033647039227118, "grad_norm": 0.7265625, "learning_rate": 5.379064095574241e-06, "loss": 4.0729, "step": 18113 }, { "epoch": 6.033980178229366, "grad_norm": 0.75, "learning_rate": 5.378287709572287e-06, "loss": 3.9855, "step": 18114 }, { "epoch": 6.034313317231615, "grad_norm": 0.8125, "learning_rate": 5.377511348283689e-06, "loss": 3.9541, "step": 18115 }, { "epoch": 6.0346464562338635, "grad_norm": 0.78125, "learning_rate": 5.376735011717485e-06, "loss": 3.9926, "step": 18116 }, { "epoch": 6.034979595236113, "grad_norm": 0.7734375, "learning_rate": 5.37595869988272e-06, "loss": 4.0119, "step": 18117 }, { "epoch": 6.035312734238361, "grad_norm": 0.71875, "learning_rate": 5.3751824127884335e-06, "loss": 3.9688, "step": 18118 }, { "epoch": 6.035645873240609, "grad_norm": 0.7734375, "learning_rate": 5.374406150443673e-06, "loss": 3.9407, "step": 18119 }, { "epoch": 6.0359790122428585, "grad_norm": 0.7734375, "learning_rate": 5.373629912857476e-06, "loss": 4.0118, "step": 18120 }, { "epoch": 6.036312151245107, "grad_norm": 0.7890625, "learning_rate": 5.372853700038884e-06, "loss": 3.9654, "step": 18121 }, { "epoch": 6.036645290247356, "grad_norm": 0.75, "learning_rate": 5.372077511996939e-06, "loss": 4.0231, "step": 18122 }, { "epoch": 6.036978429249604, "grad_norm": 0.75390625, "learning_rate": 5.371301348740681e-06, "loss": 4.0289, "step": 18123 }, { "epoch": 6.0373115682518534, "grad_norm": 0.76171875, "learning_rate": 5.370525210279153e-06, "loss": 4.0336, "step": 18124 }, { "epoch": 6.037644707254102, "grad_norm": 0.76171875, "learning_rate": 5.369749096621392e-06, "loss": 4.0113, "step": 18125 }, { "epoch": 6.03797784625635, "grad_norm": 0.74609375, "learning_rate": 5.36897300777644e-06, "loss": 4.0083, "step": 18126 }, { "epoch": 6.038310985258599, "grad_norm": 0.71875, "learning_rate": 5.368196943753338e-06, "loss": 3.9365, "step": 18127 }, { "epoch": 6.0386441242608475, "grad_norm": 0.7109375, "learning_rate": 5.367420904561123e-06, "loss": 3.9654, "step": 18128 }, { "epoch": 6.038977263263097, "grad_norm": 0.828125, "learning_rate": 5.36664489020884e-06, "loss": 4.0177, "step": 18129 }, { "epoch": 6.039310402265345, "grad_norm": 0.765625, "learning_rate": 5.365868900705517e-06, "loss": 3.906, "step": 18130 }, { "epoch": 6.039643541267594, "grad_norm": 0.7890625, "learning_rate": 5.365092936060202e-06, "loss": 3.9791, "step": 18131 }, { "epoch": 6.0399766802698425, "grad_norm": 0.7734375, "learning_rate": 5.364316996281928e-06, "loss": 3.9681, "step": 18132 }, { "epoch": 6.040309819272092, "grad_norm": 0.78515625, "learning_rate": 5.363541081379737e-06, "loss": 3.989, "step": 18133 }, { "epoch": 6.04064295827434, "grad_norm": 0.7265625, "learning_rate": 5.362765191362663e-06, "loss": 3.9925, "step": 18134 }, { "epoch": 6.040976097276588, "grad_norm": 0.78125, "learning_rate": 5.361989326239747e-06, "loss": 3.9655, "step": 18135 }, { "epoch": 6.041309236278837, "grad_norm": 0.78125, "learning_rate": 5.361213486020023e-06, "loss": 4.0075, "step": 18136 }, { "epoch": 6.041642375281086, "grad_norm": 0.8203125, "learning_rate": 5.360437670712531e-06, "loss": 3.953, "step": 18137 }, { "epoch": 6.041975514283335, "grad_norm": 0.7578125, "learning_rate": 5.359661880326306e-06, "loss": 4.0267, "step": 18138 }, { "epoch": 6.042308653285583, "grad_norm": 0.73828125, "learning_rate": 5.358886114870385e-06, "loss": 4.0178, "step": 18139 }, { "epoch": 6.042641792287832, "grad_norm": 0.75390625, "learning_rate": 5.3581103743538e-06, "loss": 4.0707, "step": 18140 }, { "epoch": 6.042974931290081, "grad_norm": 0.78125, "learning_rate": 5.357334658785591e-06, "loss": 4.0177, "step": 18141 }, { "epoch": 6.04330807029233, "grad_norm": 0.80078125, "learning_rate": 5.356558968174791e-06, "loss": 3.971, "step": 18142 }, { "epoch": 6.043641209294578, "grad_norm": 0.75, "learning_rate": 5.3557833025304385e-06, "loss": 3.921, "step": 18143 }, { "epoch": 6.0439743482968264, "grad_norm": 0.79296875, "learning_rate": 5.355007661861564e-06, "loss": 3.9501, "step": 18144 }, { "epoch": 6.044307487299076, "grad_norm": 0.75, "learning_rate": 5.354232046177207e-06, "loss": 4.0067, "step": 18145 }, { "epoch": 6.044640626301324, "grad_norm": 0.75390625, "learning_rate": 5.353456455486398e-06, "loss": 3.9598, "step": 18146 }, { "epoch": 6.044973765303573, "grad_norm": 0.76953125, "learning_rate": 5.35268088979817e-06, "loss": 3.9223, "step": 18147 }, { "epoch": 6.045306904305821, "grad_norm": 0.7734375, "learning_rate": 5.351905349121562e-06, "loss": 3.9439, "step": 18148 }, { "epoch": 6.045640043308071, "grad_norm": 0.79296875, "learning_rate": 5.351129833465601e-06, "loss": 3.9935, "step": 18149 }, { "epoch": 6.045973182310319, "grad_norm": 0.78515625, "learning_rate": 5.3503543428393235e-06, "loss": 3.9805, "step": 18150 }, { "epoch": 6.046306321312568, "grad_norm": 0.73828125, "learning_rate": 5.349578877251761e-06, "loss": 3.9249, "step": 18151 }, { "epoch": 6.046639460314816, "grad_norm": 0.75390625, "learning_rate": 5.348803436711949e-06, "loss": 3.949, "step": 18152 }, { "epoch": 6.046972599317065, "grad_norm": 0.7890625, "learning_rate": 5.348028021228914e-06, "loss": 3.9896, "step": 18153 }, { "epoch": 6.047305738319314, "grad_norm": 0.76171875, "learning_rate": 5.347252630811695e-06, "loss": 4.0455, "step": 18154 }, { "epoch": 6.047638877321562, "grad_norm": 0.79296875, "learning_rate": 5.34647726546932e-06, "loss": 3.9853, "step": 18155 }, { "epoch": 6.047972016323811, "grad_norm": 0.77734375, "learning_rate": 5.345701925210818e-06, "loss": 3.9395, "step": 18156 }, { "epoch": 6.04830515532606, "grad_norm": 0.75390625, "learning_rate": 5.344926610045224e-06, "loss": 3.9637, "step": 18157 }, { "epoch": 6.048638294328309, "grad_norm": 0.78125, "learning_rate": 5.344151319981566e-06, "loss": 3.9758, "step": 18158 }, { "epoch": 6.048971433330557, "grad_norm": 0.7421875, "learning_rate": 5.343376055028877e-06, "loss": 3.9791, "step": 18159 }, { "epoch": 6.049304572332806, "grad_norm": 0.7734375, "learning_rate": 5.342600815196183e-06, "loss": 3.9642, "step": 18160 }, { "epoch": 6.049637711335055, "grad_norm": 0.71484375, "learning_rate": 5.341825600492519e-06, "loss": 3.987, "step": 18161 }, { "epoch": 6.049970850337303, "grad_norm": 0.75, "learning_rate": 5.341050410926911e-06, "loss": 3.9577, "step": 18162 }, { "epoch": 6.050303989339552, "grad_norm": 0.79296875, "learning_rate": 5.340275246508391e-06, "loss": 4.0416, "step": 18163 }, { "epoch": 6.0506371283418, "grad_norm": 0.73828125, "learning_rate": 5.339500107245987e-06, "loss": 3.9905, "step": 18164 }, { "epoch": 6.0509702673440495, "grad_norm": 0.77734375, "learning_rate": 5.3387249931487235e-06, "loss": 4.0517, "step": 18165 }, { "epoch": 6.051303406346298, "grad_norm": 0.796875, "learning_rate": 5.337949904225635e-06, "loss": 4.0183, "step": 18166 }, { "epoch": 6.051636545348547, "grad_norm": 0.78125, "learning_rate": 5.337174840485746e-06, "loss": 4.0062, "step": 18167 }, { "epoch": 6.051969684350795, "grad_norm": 0.7578125, "learning_rate": 5.336399801938087e-06, "loss": 3.9791, "step": 18168 }, { "epoch": 6.0523028233530445, "grad_norm": 0.75390625, "learning_rate": 5.335624788591682e-06, "loss": 3.9233, "step": 18169 }, { "epoch": 6.052635962355293, "grad_norm": 0.75, "learning_rate": 5.334849800455562e-06, "loss": 3.9829, "step": 18170 }, { "epoch": 6.052969101357541, "grad_norm": 0.7734375, "learning_rate": 5.33407483753875e-06, "loss": 3.9821, "step": 18171 }, { "epoch": 6.05330224035979, "grad_norm": 0.73828125, "learning_rate": 5.333299899850275e-06, "loss": 3.998, "step": 18172 }, { "epoch": 6.0536353793620385, "grad_norm": 0.7421875, "learning_rate": 5.332524987399167e-06, "loss": 3.9824, "step": 18173 }, { "epoch": 6.053968518364288, "grad_norm": 0.75390625, "learning_rate": 5.331750100194445e-06, "loss": 3.9275, "step": 18174 }, { "epoch": 6.054301657366536, "grad_norm": 0.75, "learning_rate": 5.330975238245139e-06, "loss": 3.958, "step": 18175 }, { "epoch": 6.054634796368785, "grad_norm": 0.75, "learning_rate": 5.330200401560271e-06, "loss": 4.0224, "step": 18176 }, { "epoch": 6.0549679353710335, "grad_norm": 0.78125, "learning_rate": 5.329425590148871e-06, "loss": 3.9898, "step": 18177 }, { "epoch": 6.055301074373283, "grad_norm": 0.69921875, "learning_rate": 5.32865080401996e-06, "loss": 4.0448, "step": 18178 }, { "epoch": 6.055634213375531, "grad_norm": 0.81640625, "learning_rate": 5.327876043182564e-06, "loss": 3.9675, "step": 18179 }, { "epoch": 6.055967352377779, "grad_norm": 0.77734375, "learning_rate": 5.327101307645707e-06, "loss": 3.9522, "step": 18180 }, { "epoch": 6.0563004913800285, "grad_norm": 0.7421875, "learning_rate": 5.326326597418416e-06, "loss": 3.9865, "step": 18181 }, { "epoch": 6.056633630382277, "grad_norm": 0.7578125, "learning_rate": 5.325551912509712e-06, "loss": 3.9354, "step": 18182 }, { "epoch": 6.056966769384526, "grad_norm": 0.734375, "learning_rate": 5.324777252928615e-06, "loss": 3.9362, "step": 18183 }, { "epoch": 6.057299908386774, "grad_norm": 0.828125, "learning_rate": 5.324002618684153e-06, "loss": 3.9899, "step": 18184 }, { "epoch": 6.057633047389023, "grad_norm": 0.796875, "learning_rate": 5.323228009785348e-06, "loss": 3.9777, "step": 18185 }, { "epoch": 6.057966186391272, "grad_norm": 0.7421875, "learning_rate": 5.32245342624122e-06, "loss": 4.0324, "step": 18186 }, { "epoch": 6.05829932539352, "grad_norm": 0.73046875, "learning_rate": 5.321678868060794e-06, "loss": 3.9815, "step": 18187 }, { "epoch": 6.058632464395769, "grad_norm": 0.75, "learning_rate": 5.320904335253091e-06, "loss": 4.0553, "step": 18188 }, { "epoch": 6.0589656033980175, "grad_norm": 0.7734375, "learning_rate": 5.320129827827133e-06, "loss": 3.9772, "step": 18189 }, { "epoch": 6.059298742400267, "grad_norm": 0.76171875, "learning_rate": 5.31935534579194e-06, "loss": 4.0695, "step": 18190 }, { "epoch": 6.059631881402515, "grad_norm": 0.76171875, "learning_rate": 5.318580889156536e-06, "loss": 4.0391, "step": 18191 }, { "epoch": 6.059965020404764, "grad_norm": 0.80078125, "learning_rate": 5.317806457929938e-06, "loss": 3.9905, "step": 18192 }, { "epoch": 6.060298159407012, "grad_norm": 0.88671875, "learning_rate": 5.317032052121168e-06, "loss": 4.0504, "step": 18193 }, { "epoch": 6.060631298409262, "grad_norm": 0.76953125, "learning_rate": 5.316257671739247e-06, "loss": 3.9776, "step": 18194 }, { "epoch": 6.06096443741151, "grad_norm": 0.7734375, "learning_rate": 5.315483316793193e-06, "loss": 4.0805, "step": 18195 }, { "epoch": 6.061297576413758, "grad_norm": 0.76171875, "learning_rate": 5.314708987292028e-06, "loss": 3.9961, "step": 18196 }, { "epoch": 6.061630715416007, "grad_norm": 0.80078125, "learning_rate": 5.313934683244769e-06, "loss": 3.9412, "step": 18197 }, { "epoch": 6.061963854418256, "grad_norm": 0.79296875, "learning_rate": 5.313160404660436e-06, "loss": 3.9875, "step": 18198 }, { "epoch": 6.062296993420505, "grad_norm": 0.75390625, "learning_rate": 5.312386151548047e-06, "loss": 3.95, "step": 18199 }, { "epoch": 6.062630132422753, "grad_norm": 0.76171875, "learning_rate": 5.311611923916625e-06, "loss": 4.0324, "step": 18200 }, { "epoch": 6.062963271425002, "grad_norm": 0.78125, "learning_rate": 5.31083772177518e-06, "loss": 3.9171, "step": 18201 }, { "epoch": 6.063296410427251, "grad_norm": 0.77734375, "learning_rate": 5.310063545132734e-06, "loss": 4.0284, "step": 18202 }, { "epoch": 6.0636295494295, "grad_norm": 0.73828125, "learning_rate": 5.309289393998306e-06, "loss": 3.9479, "step": 18203 }, { "epoch": 6.063962688431748, "grad_norm": 0.78125, "learning_rate": 5.30851526838091e-06, "loss": 4.0273, "step": 18204 }, { "epoch": 6.064295827433996, "grad_norm": 0.74609375, "learning_rate": 5.307741168289565e-06, "loss": 4.0293, "step": 18205 }, { "epoch": 6.064628966436246, "grad_norm": 0.796875, "learning_rate": 5.306967093733286e-06, "loss": 3.975, "step": 18206 }, { "epoch": 6.064962105438494, "grad_norm": 0.72265625, "learning_rate": 5.306193044721092e-06, "loss": 4.0046, "step": 18207 }, { "epoch": 6.065295244440743, "grad_norm": 0.77734375, "learning_rate": 5.305419021261995e-06, "loss": 3.9909, "step": 18208 }, { "epoch": 6.065628383442991, "grad_norm": 0.78125, "learning_rate": 5.304645023365017e-06, "loss": 4.019, "step": 18209 }, { "epoch": 6.065961522445241, "grad_norm": 0.828125, "learning_rate": 5.303871051039167e-06, "loss": 3.9901, "step": 18210 }, { "epoch": 6.066294661447489, "grad_norm": 0.765625, "learning_rate": 5.303097104293462e-06, "loss": 4.0045, "step": 18211 }, { "epoch": 6.066627800449738, "grad_norm": 0.7578125, "learning_rate": 5.302323183136917e-06, "loss": 4.0257, "step": 18212 }, { "epoch": 6.066960939451986, "grad_norm": 0.7890625, "learning_rate": 5.301549287578546e-06, "loss": 4.0267, "step": 18213 }, { "epoch": 6.067294078454235, "grad_norm": 0.76953125, "learning_rate": 5.300775417627365e-06, "loss": 3.9553, "step": 18214 }, { "epoch": 6.067627217456484, "grad_norm": 0.77734375, "learning_rate": 5.300001573292387e-06, "loss": 4.0429, "step": 18215 }, { "epoch": 6.067960356458732, "grad_norm": 0.77734375, "learning_rate": 5.299227754582626e-06, "loss": 4.0487, "step": 18216 }, { "epoch": 6.068293495460981, "grad_norm": 0.7890625, "learning_rate": 5.2984539615070936e-06, "loss": 4.0127, "step": 18217 }, { "epoch": 6.06862663446323, "grad_norm": 0.796875, "learning_rate": 5.297680194074806e-06, "loss": 3.9932, "step": 18218 }, { "epoch": 6.068959773465479, "grad_norm": 0.7578125, "learning_rate": 5.296906452294774e-06, "loss": 4.1032, "step": 18219 }, { "epoch": 6.069292912467727, "grad_norm": 0.79296875, "learning_rate": 5.296132736176007e-06, "loss": 3.998, "step": 18220 }, { "epoch": 6.069626051469976, "grad_norm": 0.7734375, "learning_rate": 5.295359045727522e-06, "loss": 4.0403, "step": 18221 }, { "epoch": 6.0699591904722245, "grad_norm": 0.78125, "learning_rate": 5.294585380958328e-06, "loss": 4.0086, "step": 18222 }, { "epoch": 6.070292329474473, "grad_norm": 0.8046875, "learning_rate": 5.293811741877438e-06, "loss": 3.927, "step": 18223 }, { "epoch": 6.070625468476722, "grad_norm": 0.765625, "learning_rate": 5.293038128493862e-06, "loss": 3.9404, "step": 18224 }, { "epoch": 6.07095860747897, "grad_norm": 0.75, "learning_rate": 5.2922645408166124e-06, "loss": 3.9554, "step": 18225 }, { "epoch": 6.0712917464812195, "grad_norm": 0.796875, "learning_rate": 5.291490978854698e-06, "loss": 4.0125, "step": 18226 }, { "epoch": 6.071624885483468, "grad_norm": 0.79296875, "learning_rate": 5.2907174426171325e-06, "loss": 3.9159, "step": 18227 }, { "epoch": 6.071958024485717, "grad_norm": 0.76171875, "learning_rate": 5.289943932112922e-06, "loss": 3.9825, "step": 18228 }, { "epoch": 6.072291163487965, "grad_norm": 0.78515625, "learning_rate": 5.289170447351077e-06, "loss": 4.0043, "step": 18229 }, { "epoch": 6.0726243024902145, "grad_norm": 0.7578125, "learning_rate": 5.288396988340608e-06, "loss": 3.9817, "step": 18230 }, { "epoch": 6.072957441492463, "grad_norm": 0.73828125, "learning_rate": 5.287623555090524e-06, "loss": 3.9717, "step": 18231 }, { "epoch": 6.073290580494711, "grad_norm": 0.8046875, "learning_rate": 5.286850147609832e-06, "loss": 3.9382, "step": 18232 }, { "epoch": 6.07362371949696, "grad_norm": 0.73046875, "learning_rate": 5.2860767659075445e-06, "loss": 4.0848, "step": 18233 }, { "epoch": 6.0739568584992085, "grad_norm": 0.7421875, "learning_rate": 5.285303409992665e-06, "loss": 4.0029, "step": 18234 }, { "epoch": 6.074289997501458, "grad_norm": 0.72265625, "learning_rate": 5.284530079874206e-06, "loss": 3.9106, "step": 18235 }, { "epoch": 6.074623136503706, "grad_norm": 0.73046875, "learning_rate": 5.283756775561174e-06, "loss": 4.0751, "step": 18236 }, { "epoch": 6.074956275505955, "grad_norm": 0.78515625, "learning_rate": 5.282983497062572e-06, "loss": 4.0692, "step": 18237 }, { "epoch": 6.0752894145082035, "grad_norm": 0.734375, "learning_rate": 5.282210244387412e-06, "loss": 4.0318, "step": 18238 }, { "epoch": 6.075622553510453, "grad_norm": 0.7421875, "learning_rate": 5.281437017544697e-06, "loss": 3.9294, "step": 18239 }, { "epoch": 6.075955692512701, "grad_norm": 0.7578125, "learning_rate": 5.280663816543437e-06, "loss": 3.9788, "step": 18240 }, { "epoch": 6.076288831514949, "grad_norm": 0.765625, "learning_rate": 5.279890641392635e-06, "loss": 4.0166, "step": 18241 }, { "epoch": 6.076621970517198, "grad_norm": 0.73828125, "learning_rate": 5.2791174921012995e-06, "loss": 4.0376, "step": 18242 }, { "epoch": 6.076955109519447, "grad_norm": 0.76171875, "learning_rate": 5.278344368678434e-06, "loss": 3.9397, "step": 18243 }, { "epoch": 6.077288248521696, "grad_norm": 0.76953125, "learning_rate": 5.277571271133046e-06, "loss": 3.9117, "step": 18244 }, { "epoch": 6.077621387523944, "grad_norm": 0.71875, "learning_rate": 5.27679819947414e-06, "loss": 3.9954, "step": 18245 }, { "epoch": 6.077954526526193, "grad_norm": 0.7734375, "learning_rate": 5.276025153710717e-06, "loss": 3.9426, "step": 18246 }, { "epoch": 6.078287665528442, "grad_norm": 0.73828125, "learning_rate": 5.275252133851784e-06, "loss": 3.9931, "step": 18247 }, { "epoch": 6.07862080453069, "grad_norm": 0.78125, "learning_rate": 5.274479139906345e-06, "loss": 3.9413, "step": 18248 }, { "epoch": 6.078953943532939, "grad_norm": 0.75390625, "learning_rate": 5.273706171883403e-06, "loss": 3.9985, "step": 18249 }, { "epoch": 6.0792870825351875, "grad_norm": 0.75390625, "learning_rate": 5.272933229791963e-06, "loss": 3.9665, "step": 18250 }, { "epoch": 6.079620221537437, "grad_norm": 0.765625, "learning_rate": 5.272160313641027e-06, "loss": 3.916, "step": 18251 }, { "epoch": 6.079953360539685, "grad_norm": 0.7265625, "learning_rate": 5.271387423439597e-06, "loss": 4.0508, "step": 18252 }, { "epoch": 6.080286499541934, "grad_norm": 0.74609375, "learning_rate": 5.270614559196678e-06, "loss": 4.0656, "step": 18253 }, { "epoch": 6.080619638544182, "grad_norm": 0.7734375, "learning_rate": 5.269841720921271e-06, "loss": 3.9596, "step": 18254 }, { "epoch": 6.080952777546432, "grad_norm": 0.69140625, "learning_rate": 5.269068908622374e-06, "loss": 3.9791, "step": 18255 }, { "epoch": 6.08128591654868, "grad_norm": 0.79296875, "learning_rate": 5.268296122308995e-06, "loss": 4.007, "step": 18256 }, { "epoch": 6.081619055550928, "grad_norm": 0.796875, "learning_rate": 5.26752336199013e-06, "loss": 3.946, "step": 18257 }, { "epoch": 6.081952194553177, "grad_norm": 0.81640625, "learning_rate": 5.266750627674785e-06, "loss": 3.948, "step": 18258 }, { "epoch": 6.082285333555426, "grad_norm": 0.74609375, "learning_rate": 5.265977919371955e-06, "loss": 3.951, "step": 18259 }, { "epoch": 6.082618472557675, "grad_norm": 0.71875, "learning_rate": 5.2652052370906455e-06, "loss": 4.0227, "step": 18260 }, { "epoch": 6.082951611559923, "grad_norm": 0.80078125, "learning_rate": 5.2644325808398535e-06, "loss": 3.9406, "step": 18261 }, { "epoch": 6.083284750562172, "grad_norm": 0.77734375, "learning_rate": 5.2636599506285805e-06, "loss": 3.956, "step": 18262 }, { "epoch": 6.083617889564421, "grad_norm": 0.78125, "learning_rate": 5.262887346465827e-06, "loss": 3.9681, "step": 18263 }, { "epoch": 6.08395102856667, "grad_norm": 0.76953125, "learning_rate": 5.262114768360587e-06, "loss": 4.0121, "step": 18264 }, { "epoch": 6.084284167568918, "grad_norm": 0.75390625, "learning_rate": 5.261342216321864e-06, "loss": 4.0472, "step": 18265 }, { "epoch": 6.084617306571166, "grad_norm": 0.76953125, "learning_rate": 5.260569690358654e-06, "loss": 4.0142, "step": 18266 }, { "epoch": 6.084950445573416, "grad_norm": 0.75390625, "learning_rate": 5.259797190479958e-06, "loss": 4.0151, "step": 18267 }, { "epoch": 6.085283584575664, "grad_norm": 0.75390625, "learning_rate": 5.259024716694771e-06, "loss": 4.0085, "step": 18268 }, { "epoch": 6.085616723577913, "grad_norm": 0.74609375, "learning_rate": 5.258252269012094e-06, "loss": 4.0, "step": 18269 }, { "epoch": 6.085949862580161, "grad_norm": 0.73828125, "learning_rate": 5.257479847440921e-06, "loss": 4.0288, "step": 18270 }, { "epoch": 6.0862830015824105, "grad_norm": 0.74609375, "learning_rate": 5.256707451990251e-06, "loss": 3.8938, "step": 18271 }, { "epoch": 6.086616140584659, "grad_norm": 0.75, "learning_rate": 5.255935082669082e-06, "loss": 4.034, "step": 18272 }, { "epoch": 6.086949279586908, "grad_norm": 0.7421875, "learning_rate": 5.255162739486406e-06, "loss": 3.9727, "step": 18273 }, { "epoch": 6.087282418589156, "grad_norm": 0.77734375, "learning_rate": 5.254390422451224e-06, "loss": 4.0289, "step": 18274 }, { "epoch": 6.087615557591405, "grad_norm": 0.76953125, "learning_rate": 5.253618131572527e-06, "loss": 3.9805, "step": 18275 }, { "epoch": 6.087948696593654, "grad_norm": 0.75390625, "learning_rate": 5.252845866859316e-06, "loss": 3.9821, "step": 18276 }, { "epoch": 6.088281835595902, "grad_norm": 0.76953125, "learning_rate": 5.25207362832058e-06, "loss": 3.9961, "step": 18277 }, { "epoch": 6.088614974598151, "grad_norm": 0.74609375, "learning_rate": 5.251301415965319e-06, "loss": 3.9642, "step": 18278 }, { "epoch": 6.0889481136004, "grad_norm": 0.78125, "learning_rate": 5.2505292298025264e-06, "loss": 4.0131, "step": 18279 }, { "epoch": 6.089281252602649, "grad_norm": 0.78125, "learning_rate": 5.249757069841194e-06, "loss": 3.9746, "step": 18280 }, { "epoch": 6.089614391604897, "grad_norm": 0.74609375, "learning_rate": 5.248984936090322e-06, "loss": 4.0716, "step": 18281 }, { "epoch": 6.089947530607146, "grad_norm": 0.84375, "learning_rate": 5.248212828558896e-06, "loss": 3.9044, "step": 18282 }, { "epoch": 6.0902806696093945, "grad_norm": 0.796875, "learning_rate": 5.247440747255914e-06, "loss": 3.989, "step": 18283 }, { "epoch": 6.090613808611643, "grad_norm": 0.7890625, "learning_rate": 5.2466686921903686e-06, "loss": 3.9592, "step": 18284 }, { "epoch": 6.090946947613892, "grad_norm": 0.7734375, "learning_rate": 5.2458966633712515e-06, "loss": 3.9454, "step": 18285 }, { "epoch": 6.09128008661614, "grad_norm": 0.76953125, "learning_rate": 5.245124660807556e-06, "loss": 4.0239, "step": 18286 }, { "epoch": 6.0916132256183895, "grad_norm": 0.78515625, "learning_rate": 5.244352684508273e-06, "loss": 3.922, "step": 18287 }, { "epoch": 6.091946364620638, "grad_norm": 0.73828125, "learning_rate": 5.243580734482399e-06, "loss": 3.9995, "step": 18288 }, { "epoch": 6.092279503622887, "grad_norm": 0.80859375, "learning_rate": 5.242808810738918e-06, "loss": 3.9986, "step": 18289 }, { "epoch": 6.092612642625135, "grad_norm": 0.76171875, "learning_rate": 5.242036913286829e-06, "loss": 4.0108, "step": 18290 }, { "epoch": 6.092945781627384, "grad_norm": 0.78515625, "learning_rate": 5.241265042135118e-06, "loss": 4.006, "step": 18291 }, { "epoch": 6.093278920629633, "grad_norm": 0.7578125, "learning_rate": 5.240493197292777e-06, "loss": 3.9832, "step": 18292 }, { "epoch": 6.093612059631881, "grad_norm": 0.75, "learning_rate": 5.239721378768795e-06, "loss": 4.0375, "step": 18293 }, { "epoch": 6.09394519863413, "grad_norm": 0.7421875, "learning_rate": 5.238949586572163e-06, "loss": 3.9486, "step": 18294 }, { "epoch": 6.0942783376363785, "grad_norm": 0.7734375, "learning_rate": 5.2381778207118724e-06, "loss": 4.0065, "step": 18295 }, { "epoch": 6.094611476638628, "grad_norm": 0.71484375, "learning_rate": 5.237406081196909e-06, "loss": 3.9928, "step": 18296 }, { "epoch": 6.094944615640876, "grad_norm": 0.83984375, "learning_rate": 5.236634368036266e-06, "loss": 3.9747, "step": 18297 }, { "epoch": 6.095277754643125, "grad_norm": 0.87109375, "learning_rate": 5.235862681238927e-06, "loss": 3.9274, "step": 18298 }, { "epoch": 6.0956108936453735, "grad_norm": 0.76171875, "learning_rate": 5.2350910208138895e-06, "loss": 3.9794, "step": 18299 }, { "epoch": 6.095944032647623, "grad_norm": 0.73046875, "learning_rate": 5.234319386770133e-06, "loss": 3.9805, "step": 18300 }, { "epoch": 6.096277171649871, "grad_norm": 0.7578125, "learning_rate": 5.233547779116646e-06, "loss": 4.0338, "step": 18301 }, { "epoch": 6.096610310652119, "grad_norm": 0.78515625, "learning_rate": 5.232776197862421e-06, "loss": 3.9301, "step": 18302 }, { "epoch": 6.096943449654368, "grad_norm": 0.80078125, "learning_rate": 5.23200464301644e-06, "loss": 4.015, "step": 18303 }, { "epoch": 6.097276588656617, "grad_norm": 0.75, "learning_rate": 5.231233114587694e-06, "loss": 3.9509, "step": 18304 }, { "epoch": 6.097609727658866, "grad_norm": 0.765625, "learning_rate": 5.230461612585167e-06, "loss": 4.0057, "step": 18305 }, { "epoch": 6.097942866661114, "grad_norm": 0.7421875, "learning_rate": 5.229690137017848e-06, "loss": 4.0454, "step": 18306 }, { "epoch": 6.098276005663363, "grad_norm": 0.7578125, "learning_rate": 5.22891868789472e-06, "loss": 3.946, "step": 18307 }, { "epoch": 6.098609144665612, "grad_norm": 0.76171875, "learning_rate": 5.228147265224773e-06, "loss": 4.0553, "step": 18308 }, { "epoch": 6.098942283667861, "grad_norm": 0.8203125, "learning_rate": 5.227375869016988e-06, "loss": 4.003, "step": 18309 }, { "epoch": 6.099275422670109, "grad_norm": 0.79296875, "learning_rate": 5.22660449928035e-06, "loss": 3.9895, "step": 18310 }, { "epoch": 6.099608561672357, "grad_norm": 0.796875, "learning_rate": 5.225833156023846e-06, "loss": 3.9548, "step": 18311 }, { "epoch": 6.099941700674607, "grad_norm": 0.796875, "learning_rate": 5.22506183925646e-06, "loss": 4.0126, "step": 18312 }, { "epoch": 6.100274839676855, "grad_norm": 0.703125, "learning_rate": 5.224290548987177e-06, "loss": 4.0351, "step": 18313 }, { "epoch": 6.100607978679104, "grad_norm": 0.71875, "learning_rate": 5.223519285224978e-06, "loss": 3.9077, "step": 18314 }, { "epoch": 6.100941117681352, "grad_norm": 0.8046875, "learning_rate": 5.222748047978849e-06, "loss": 3.9949, "step": 18315 }, { "epoch": 6.101274256683602, "grad_norm": 0.75390625, "learning_rate": 5.221976837257774e-06, "loss": 3.9961, "step": 18316 }, { "epoch": 6.10160739568585, "grad_norm": 0.7734375, "learning_rate": 5.221205653070736e-06, "loss": 3.984, "step": 18317 }, { "epoch": 6.101940534688099, "grad_norm": 0.81640625, "learning_rate": 5.220434495426715e-06, "loss": 3.9933, "step": 18318 }, { "epoch": 6.102273673690347, "grad_norm": 0.77734375, "learning_rate": 5.219663364334693e-06, "loss": 3.9496, "step": 18319 }, { "epoch": 6.102606812692596, "grad_norm": 0.75, "learning_rate": 5.218892259803656e-06, "loss": 3.9025, "step": 18320 }, { "epoch": 6.102939951694845, "grad_norm": 0.7578125, "learning_rate": 5.218121181842581e-06, "loss": 3.993, "step": 18321 }, { "epoch": 6.103273090697093, "grad_norm": 0.70703125, "learning_rate": 5.2173501304604526e-06, "loss": 3.9545, "step": 18322 }, { "epoch": 6.103606229699342, "grad_norm": 0.71484375, "learning_rate": 5.216579105666249e-06, "loss": 3.9758, "step": 18323 }, { "epoch": 6.103939368701591, "grad_norm": 0.75390625, "learning_rate": 5.2158081074689555e-06, "loss": 3.9634, "step": 18324 }, { "epoch": 6.10427250770384, "grad_norm": 0.7734375, "learning_rate": 5.2150371358775505e-06, "loss": 4.0186, "step": 18325 }, { "epoch": 6.104605646706088, "grad_norm": 0.7734375, "learning_rate": 5.214266190901015e-06, "loss": 3.9727, "step": 18326 }, { "epoch": 6.104938785708336, "grad_norm": 0.78515625, "learning_rate": 5.213495272548323e-06, "loss": 4.0486, "step": 18327 }, { "epoch": 6.1052719247105856, "grad_norm": 0.73046875, "learning_rate": 5.212724380828462e-06, "loss": 4.026, "step": 18328 }, { "epoch": 6.105605063712834, "grad_norm": 0.76953125, "learning_rate": 5.211953515750405e-06, "loss": 4.0069, "step": 18329 }, { "epoch": 6.105938202715083, "grad_norm": 0.7421875, "learning_rate": 5.211182677323136e-06, "loss": 3.9715, "step": 18330 }, { "epoch": 6.106271341717331, "grad_norm": 0.76171875, "learning_rate": 5.2104118655556285e-06, "loss": 3.9619, "step": 18331 }, { "epoch": 6.1066044807195805, "grad_norm": 0.765625, "learning_rate": 5.209641080456866e-06, "loss": 3.9921, "step": 18332 }, { "epoch": 6.106937619721829, "grad_norm": 0.74609375, "learning_rate": 5.208870322035823e-06, "loss": 3.9978, "step": 18333 }, { "epoch": 6.107270758724078, "grad_norm": 0.78125, "learning_rate": 5.208099590301478e-06, "loss": 4.0054, "step": 18334 }, { "epoch": 6.107603897726326, "grad_norm": 0.7421875, "learning_rate": 5.207328885262811e-06, "loss": 4.0343, "step": 18335 }, { "epoch": 6.107937036728575, "grad_norm": 0.75390625, "learning_rate": 5.206558206928793e-06, "loss": 4.0067, "step": 18336 }, { "epoch": 6.108270175730824, "grad_norm": 0.8125, "learning_rate": 5.205787555308407e-06, "loss": 3.9717, "step": 18337 }, { "epoch": 6.108603314733072, "grad_norm": 0.7890625, "learning_rate": 5.205016930410625e-06, "loss": 4.0482, "step": 18338 }, { "epoch": 6.108936453735321, "grad_norm": 0.77734375, "learning_rate": 5.204246332244426e-06, "loss": 4.0291, "step": 18339 }, { "epoch": 6.1092695927375695, "grad_norm": 0.7421875, "learning_rate": 5.203475760818783e-06, "loss": 4.0092, "step": 18340 }, { "epoch": 6.109602731739819, "grad_norm": 0.7734375, "learning_rate": 5.202705216142674e-06, "loss": 3.9676, "step": 18341 }, { "epoch": 6.109935870742067, "grad_norm": 0.7890625, "learning_rate": 5.201934698225072e-06, "loss": 3.9235, "step": 18342 }, { "epoch": 6.110269009744316, "grad_norm": 0.76171875, "learning_rate": 5.201164207074955e-06, "loss": 3.9909, "step": 18343 }, { "epoch": 6.1106021487465645, "grad_norm": 0.7890625, "learning_rate": 5.200393742701297e-06, "loss": 3.96, "step": 18344 }, { "epoch": 6.110935287748813, "grad_norm": 0.765625, "learning_rate": 5.199623305113069e-06, "loss": 4.0418, "step": 18345 }, { "epoch": 6.111268426751062, "grad_norm": 0.7421875, "learning_rate": 5.198852894319247e-06, "loss": 3.9746, "step": 18346 }, { "epoch": 6.11160156575331, "grad_norm": 0.77734375, "learning_rate": 5.198082510328804e-06, "loss": 3.9563, "step": 18347 }, { "epoch": 6.1119347047555594, "grad_norm": 0.7578125, "learning_rate": 5.197312153150715e-06, "loss": 4.0175, "step": 18348 }, { "epoch": 6.112267843757808, "grad_norm": 0.74609375, "learning_rate": 5.196541822793951e-06, "loss": 4.043, "step": 18349 }, { "epoch": 6.112600982760057, "grad_norm": 0.73046875, "learning_rate": 5.195771519267485e-06, "loss": 4.038, "step": 18350 }, { "epoch": 6.112934121762305, "grad_norm": 0.78125, "learning_rate": 5.19500124258029e-06, "loss": 3.9845, "step": 18351 }, { "epoch": 6.113267260764554, "grad_norm": 0.765625, "learning_rate": 5.194230992741339e-06, "loss": 3.9671, "step": 18352 }, { "epoch": 6.113600399766803, "grad_norm": 0.7578125, "learning_rate": 5.1934607697596045e-06, "loss": 3.9879, "step": 18353 }, { "epoch": 6.113933538769051, "grad_norm": 0.81640625, "learning_rate": 5.192690573644052e-06, "loss": 3.9908, "step": 18354 }, { "epoch": 6.1142666777713, "grad_norm": 0.75390625, "learning_rate": 5.19192040440366e-06, "loss": 3.9884, "step": 18355 }, { "epoch": 6.1145998167735485, "grad_norm": 0.7734375, "learning_rate": 5.191150262047393e-06, "loss": 3.9397, "step": 18356 }, { "epoch": 6.114932955775798, "grad_norm": 0.82421875, "learning_rate": 5.190380146584226e-06, "loss": 4.0219, "step": 18357 }, { "epoch": 6.115266094778046, "grad_norm": 0.71484375, "learning_rate": 5.189610058023127e-06, "loss": 3.9838, "step": 18358 }, { "epoch": 6.115599233780295, "grad_norm": 0.76953125, "learning_rate": 5.188839996373067e-06, "loss": 4.0487, "step": 18359 }, { "epoch": 6.115932372782543, "grad_norm": 0.765625, "learning_rate": 5.188069961643015e-06, "loss": 3.9643, "step": 18360 }, { "epoch": 6.116265511784793, "grad_norm": 0.765625, "learning_rate": 5.18729995384194e-06, "loss": 4.0746, "step": 18361 }, { "epoch": 6.116598650787041, "grad_norm": 0.7890625, "learning_rate": 5.1865299729788144e-06, "loss": 3.8842, "step": 18362 }, { "epoch": 6.116931789789289, "grad_norm": 0.78515625, "learning_rate": 5.1857600190625994e-06, "loss": 3.9811, "step": 18363 }, { "epoch": 6.117264928791538, "grad_norm": 0.7578125, "learning_rate": 5.1849900921022695e-06, "loss": 4.0084, "step": 18364 }, { "epoch": 6.117598067793787, "grad_norm": 0.8203125, "learning_rate": 5.18422019210679e-06, "loss": 3.9104, "step": 18365 }, { "epoch": 6.117931206796036, "grad_norm": 0.73046875, "learning_rate": 5.1834503190851305e-06, "loss": 3.9617, "step": 18366 }, { "epoch": 6.118264345798284, "grad_norm": 0.76171875, "learning_rate": 5.182680473046255e-06, "loss": 3.963, "step": 18367 }, { "epoch": 6.118597484800533, "grad_norm": 0.7890625, "learning_rate": 5.1819106539991345e-06, "loss": 4.0082, "step": 18368 }, { "epoch": 6.118930623802782, "grad_norm": 0.74609375, "learning_rate": 5.181140861952732e-06, "loss": 3.9906, "step": 18369 }, { "epoch": 6.119263762805031, "grad_norm": 0.765625, "learning_rate": 5.180371096916018e-06, "loss": 3.9803, "step": 18370 }, { "epoch": 6.119596901807279, "grad_norm": 0.74609375, "learning_rate": 5.179601358897957e-06, "loss": 3.8972, "step": 18371 }, { "epoch": 6.119930040809527, "grad_norm": 0.7265625, "learning_rate": 5.178831647907514e-06, "loss": 4.0594, "step": 18372 }, { "epoch": 6.120263179811777, "grad_norm": 0.74609375, "learning_rate": 5.178061963953652e-06, "loss": 4.0249, "step": 18373 }, { "epoch": 6.120596318814025, "grad_norm": 0.7578125, "learning_rate": 5.177292307045341e-06, "loss": 3.9828, "step": 18374 }, { "epoch": 6.120929457816274, "grad_norm": 0.76171875, "learning_rate": 5.1765226771915416e-06, "loss": 3.9539, "step": 18375 }, { "epoch": 6.121262596818522, "grad_norm": 0.79296875, "learning_rate": 5.175753074401222e-06, "loss": 3.9939, "step": 18376 }, { "epoch": 6.1215957358207715, "grad_norm": 0.78125, "learning_rate": 5.1749834986833435e-06, "loss": 3.9997, "step": 18377 }, { "epoch": 6.12192887482302, "grad_norm": 0.7578125, "learning_rate": 5.174213950046873e-06, "loss": 4.0252, "step": 18378 }, { "epoch": 6.122262013825269, "grad_norm": 0.8125, "learning_rate": 5.173444428500771e-06, "loss": 3.964, "step": 18379 }, { "epoch": 6.122595152827517, "grad_norm": 0.71484375, "learning_rate": 5.172674934054004e-06, "loss": 4.0101, "step": 18380 }, { "epoch": 6.122928291829766, "grad_norm": 0.74609375, "learning_rate": 5.171905466715533e-06, "loss": 3.947, "step": 18381 }, { "epoch": 6.123261430832015, "grad_norm": 0.76171875, "learning_rate": 5.171136026494318e-06, "loss": 3.9677, "step": 18382 }, { "epoch": 6.123594569834263, "grad_norm": 0.765625, "learning_rate": 5.170366613399326e-06, "loss": 3.9846, "step": 18383 }, { "epoch": 6.123927708836512, "grad_norm": 0.7109375, "learning_rate": 5.169597227439515e-06, "loss": 3.92, "step": 18384 }, { "epoch": 6.124260847838761, "grad_norm": 0.77734375, "learning_rate": 5.16882786862385e-06, "loss": 3.9781, "step": 18385 }, { "epoch": 6.12459398684101, "grad_norm": 0.78515625, "learning_rate": 5.16805853696129e-06, "loss": 3.9781, "step": 18386 }, { "epoch": 6.124927125843258, "grad_norm": 0.765625, "learning_rate": 5.167289232460799e-06, "loss": 3.8962, "step": 18387 }, { "epoch": 6.125260264845506, "grad_norm": 0.75390625, "learning_rate": 5.166519955131333e-06, "loss": 3.9908, "step": 18388 }, { "epoch": 6.1255934038477555, "grad_norm": 0.73828125, "learning_rate": 5.16575070498186e-06, "loss": 3.9647, "step": 18389 }, { "epoch": 6.125926542850004, "grad_norm": 0.76953125, "learning_rate": 5.164981482021333e-06, "loss": 4.0266, "step": 18390 }, { "epoch": 6.126259681852253, "grad_norm": 0.78125, "learning_rate": 5.164212286258713e-06, "loss": 4.0353, "step": 18391 }, { "epoch": 6.126592820854501, "grad_norm": 0.703125, "learning_rate": 5.16344311770296e-06, "loss": 3.9524, "step": 18392 }, { "epoch": 6.1269259598567505, "grad_norm": 0.75390625, "learning_rate": 5.162673976363033e-06, "loss": 3.9514, "step": 18393 }, { "epoch": 6.127259098858999, "grad_norm": 0.77734375, "learning_rate": 5.161904862247895e-06, "loss": 3.9269, "step": 18394 }, { "epoch": 6.127592237861248, "grad_norm": 0.8125, "learning_rate": 5.161135775366498e-06, "loss": 3.9638, "step": 18395 }, { "epoch": 6.127925376863496, "grad_norm": 0.73046875, "learning_rate": 5.160366715727806e-06, "loss": 3.9636, "step": 18396 }, { "epoch": 6.128258515865745, "grad_norm": 0.7421875, "learning_rate": 5.159597683340774e-06, "loss": 3.9312, "step": 18397 }, { "epoch": 6.128591654867994, "grad_norm": 0.77734375, "learning_rate": 5.1588286782143575e-06, "loss": 3.9595, "step": 18398 }, { "epoch": 6.128924793870242, "grad_norm": 0.79296875, "learning_rate": 5.158059700357518e-06, "loss": 4.0023, "step": 18399 }, { "epoch": 6.129257932872491, "grad_norm": 0.75390625, "learning_rate": 5.157290749779208e-06, "loss": 3.9621, "step": 18400 }, { "epoch": 6.1295910718747395, "grad_norm": 0.76953125, "learning_rate": 5.156521826488387e-06, "loss": 4.0402, "step": 18401 }, { "epoch": 6.129924210876989, "grad_norm": 0.79296875, "learning_rate": 5.155752930494011e-06, "loss": 3.9642, "step": 18402 }, { "epoch": 6.130257349879237, "grad_norm": 0.73828125, "learning_rate": 5.1549840618050365e-06, "loss": 4.0494, "step": 18403 }, { "epoch": 6.130590488881486, "grad_norm": 0.76953125, "learning_rate": 5.154215220430417e-06, "loss": 3.9461, "step": 18404 }, { "epoch": 6.1309236278837345, "grad_norm": 0.76953125, "learning_rate": 5.153446406379111e-06, "loss": 3.9544, "step": 18405 }, { "epoch": 6.131256766885983, "grad_norm": 0.796875, "learning_rate": 5.1526776196600725e-06, "loss": 3.8959, "step": 18406 }, { "epoch": 6.131589905888232, "grad_norm": 0.75, "learning_rate": 5.151908860282253e-06, "loss": 3.9659, "step": 18407 }, { "epoch": 6.13192304489048, "grad_norm": 0.77734375, "learning_rate": 5.151140128254611e-06, "loss": 3.9812, "step": 18408 }, { "epoch": 6.132256183892729, "grad_norm": 0.796875, "learning_rate": 5.150371423586097e-06, "loss": 3.979, "step": 18409 }, { "epoch": 6.132589322894978, "grad_norm": 0.890625, "learning_rate": 5.149602746285668e-06, "loss": 3.9882, "step": 18410 }, { "epoch": 6.132922461897227, "grad_norm": 0.7734375, "learning_rate": 5.148834096362275e-06, "loss": 4.0119, "step": 18411 }, { "epoch": 6.133255600899475, "grad_norm": 0.75, "learning_rate": 5.148065473824873e-06, "loss": 4.0331, "step": 18412 }, { "epoch": 6.133588739901724, "grad_norm": 0.79296875, "learning_rate": 5.1472968786824135e-06, "loss": 4.0329, "step": 18413 }, { "epoch": 6.133921878903973, "grad_norm": 0.75, "learning_rate": 5.146528310943851e-06, "loss": 3.9595, "step": 18414 }, { "epoch": 6.134255017906221, "grad_norm": 0.7890625, "learning_rate": 5.145759770618136e-06, "loss": 4.0211, "step": 18415 }, { "epoch": 6.13458815690847, "grad_norm": 0.765625, "learning_rate": 5.14499125771422e-06, "loss": 4.0185, "step": 18416 }, { "epoch": 6.134921295910718, "grad_norm": 0.76953125, "learning_rate": 5.144222772241053e-06, "loss": 4.0024, "step": 18417 }, { "epoch": 6.135254434912968, "grad_norm": 0.80078125, "learning_rate": 5.143454314207589e-06, "loss": 3.9895, "step": 18418 }, { "epoch": 6.135587573915216, "grad_norm": 0.77734375, "learning_rate": 5.142685883622777e-06, "loss": 4.0083, "step": 18419 }, { "epoch": 6.135920712917465, "grad_norm": 0.78125, "learning_rate": 5.141917480495572e-06, "loss": 3.9879, "step": 18420 }, { "epoch": 6.136253851919713, "grad_norm": 0.80078125, "learning_rate": 5.141149104834916e-06, "loss": 3.9838, "step": 18421 }, { "epoch": 6.136586990921963, "grad_norm": 0.7421875, "learning_rate": 5.1403807566497675e-06, "loss": 3.9878, "step": 18422 }, { "epoch": 6.136920129924211, "grad_norm": 0.76171875, "learning_rate": 5.13961243594907e-06, "loss": 3.9988, "step": 18423 }, { "epoch": 6.137253268926459, "grad_norm": 0.77734375, "learning_rate": 5.138844142741777e-06, "loss": 4.0519, "step": 18424 }, { "epoch": 6.137586407928708, "grad_norm": 0.75390625, "learning_rate": 5.138075877036835e-06, "loss": 3.9781, "step": 18425 }, { "epoch": 6.137919546930957, "grad_norm": 0.796875, "learning_rate": 5.137307638843192e-06, "loss": 3.9585, "step": 18426 }, { "epoch": 6.138252685933206, "grad_norm": 0.78125, "learning_rate": 5.136539428169799e-06, "loss": 3.9902, "step": 18427 }, { "epoch": 6.138585824935454, "grad_norm": 0.78125, "learning_rate": 5.1357712450256e-06, "loss": 4.0326, "step": 18428 }, { "epoch": 6.138918963937703, "grad_norm": 0.75, "learning_rate": 5.135003089419548e-06, "loss": 4.0971, "step": 18429 }, { "epoch": 6.139252102939952, "grad_norm": 0.765625, "learning_rate": 5.134234961360584e-06, "loss": 3.9959, "step": 18430 }, { "epoch": 6.139585241942201, "grad_norm": 0.78125, "learning_rate": 5.13346686085766e-06, "loss": 3.9526, "step": 18431 }, { "epoch": 6.139918380944449, "grad_norm": 0.7265625, "learning_rate": 5.132698787919721e-06, "loss": 4.0146, "step": 18432 }, { "epoch": 6.140251519946697, "grad_norm": 0.75, "learning_rate": 5.131930742555717e-06, "loss": 4.002, "step": 18433 }, { "epoch": 6.140584658948947, "grad_norm": 0.73828125, "learning_rate": 5.131162724774588e-06, "loss": 4.0716, "step": 18434 }, { "epoch": 6.140917797951195, "grad_norm": 0.73828125, "learning_rate": 5.130394734585282e-06, "loss": 4.0372, "step": 18435 }, { "epoch": 6.141250936953444, "grad_norm": 0.75, "learning_rate": 5.129626771996744e-06, "loss": 4.0049, "step": 18436 }, { "epoch": 6.141584075955692, "grad_norm": 0.79296875, "learning_rate": 5.12885883701792e-06, "loss": 4.0286, "step": 18437 }, { "epoch": 6.1419172149579415, "grad_norm": 0.8046875, "learning_rate": 5.1280909296577555e-06, "loss": 3.8941, "step": 18438 }, { "epoch": 6.14225035396019, "grad_norm": 0.734375, "learning_rate": 5.127323049925194e-06, "loss": 4.0527, "step": 18439 }, { "epoch": 6.142583492962439, "grad_norm": 0.7890625, "learning_rate": 5.126555197829179e-06, "loss": 3.9964, "step": 18440 }, { "epoch": 6.142916631964687, "grad_norm": 0.75390625, "learning_rate": 5.125787373378655e-06, "loss": 4.0221, "step": 18441 }, { "epoch": 6.143249770966936, "grad_norm": 0.75, "learning_rate": 5.125019576582569e-06, "loss": 4.0486, "step": 18442 }, { "epoch": 6.143582909969185, "grad_norm": 0.75390625, "learning_rate": 5.124251807449859e-06, "loss": 4.0158, "step": 18443 }, { "epoch": 6.143916048971433, "grad_norm": 0.7734375, "learning_rate": 5.123484065989468e-06, "loss": 3.9314, "step": 18444 }, { "epoch": 6.144249187973682, "grad_norm": 0.81640625, "learning_rate": 5.122716352210341e-06, "loss": 4.0465, "step": 18445 }, { "epoch": 6.1445823269759305, "grad_norm": 0.72265625, "learning_rate": 5.121948666121417e-06, "loss": 4.0439, "step": 18446 }, { "epoch": 6.14491546597818, "grad_norm": 0.73828125, "learning_rate": 5.121181007731644e-06, "loss": 3.9401, "step": 18447 }, { "epoch": 6.145248604980428, "grad_norm": 0.7578125, "learning_rate": 5.120413377049957e-06, "loss": 3.993, "step": 18448 }, { "epoch": 6.145581743982676, "grad_norm": 0.7421875, "learning_rate": 5.119645774085302e-06, "loss": 4.016, "step": 18449 }, { "epoch": 6.1459148829849255, "grad_norm": 0.79296875, "learning_rate": 5.118878198846616e-06, "loss": 3.9082, "step": 18450 }, { "epoch": 6.146248021987174, "grad_norm": 0.76953125, "learning_rate": 5.118110651342844e-06, "loss": 4.0196, "step": 18451 }, { "epoch": 6.146581160989423, "grad_norm": 0.8203125, "learning_rate": 5.117343131582923e-06, "loss": 3.9531, "step": 18452 }, { "epoch": 6.146914299991671, "grad_norm": 0.78125, "learning_rate": 5.1165756395757915e-06, "loss": 3.9932, "step": 18453 }, { "epoch": 6.1472474389939205, "grad_norm": 0.75390625, "learning_rate": 5.115808175330394e-06, "loss": 3.9577, "step": 18454 }, { "epoch": 6.147580577996169, "grad_norm": 0.78515625, "learning_rate": 5.115040738855664e-06, "loss": 3.963, "step": 18455 }, { "epoch": 6.147913716998418, "grad_norm": 0.79296875, "learning_rate": 5.1142733301605465e-06, "loss": 4.017, "step": 18456 }, { "epoch": 6.148246856000666, "grad_norm": 0.78515625, "learning_rate": 5.113505949253974e-06, "loss": 4.023, "step": 18457 }, { "epoch": 6.148579995002915, "grad_norm": 0.78125, "learning_rate": 5.112738596144892e-06, "loss": 4.0722, "step": 18458 }, { "epoch": 6.148913134005164, "grad_norm": 0.73828125, "learning_rate": 5.111971270842231e-06, "loss": 3.9989, "step": 18459 }, { "epoch": 6.149246273007412, "grad_norm": 0.7890625, "learning_rate": 5.111203973354938e-06, "loss": 4.0261, "step": 18460 }, { "epoch": 6.149579412009661, "grad_norm": 0.77734375, "learning_rate": 5.110436703691939e-06, "loss": 4.0144, "step": 18461 }, { "epoch": 6.1499125510119095, "grad_norm": 0.78125, "learning_rate": 5.109669461862179e-06, "loss": 3.9876, "step": 18462 }, { "epoch": 6.150245690014159, "grad_norm": 0.71875, "learning_rate": 5.10890224787459e-06, "loss": 3.9602, "step": 18463 }, { "epoch": 6.150578829016407, "grad_norm": 0.75, "learning_rate": 5.108135061738113e-06, "loss": 3.886, "step": 18464 }, { "epoch": 6.150911968018656, "grad_norm": 0.76953125, "learning_rate": 5.107367903461679e-06, "loss": 3.9786, "step": 18465 }, { "epoch": 6.151245107020904, "grad_norm": 0.8359375, "learning_rate": 5.10660077305423e-06, "loss": 3.954, "step": 18466 }, { "epoch": 6.151578246023153, "grad_norm": 0.76953125, "learning_rate": 5.105833670524695e-06, "loss": 3.9226, "step": 18467 }, { "epoch": 6.151911385025402, "grad_norm": 0.82421875, "learning_rate": 5.105066595882014e-06, "loss": 3.9533, "step": 18468 }, { "epoch": 6.15224452402765, "grad_norm": 0.73828125, "learning_rate": 5.104299549135121e-06, "loss": 3.9865, "step": 18469 }, { "epoch": 6.152577663029899, "grad_norm": 0.80078125, "learning_rate": 5.103532530292947e-06, "loss": 3.9683, "step": 18470 }, { "epoch": 6.152910802032148, "grad_norm": 0.796875, "learning_rate": 5.1027655393644284e-06, "loss": 3.9735, "step": 18471 }, { "epoch": 6.153243941034397, "grad_norm": 0.7890625, "learning_rate": 5.101998576358498e-06, "loss": 3.9973, "step": 18472 }, { "epoch": 6.153577080036645, "grad_norm": 0.77734375, "learning_rate": 5.101231641284092e-06, "loss": 3.9613, "step": 18473 }, { "epoch": 6.153910219038894, "grad_norm": 0.76171875, "learning_rate": 5.100464734150139e-06, "loss": 3.9627, "step": 18474 }, { "epoch": 6.154243358041143, "grad_norm": 0.73828125, "learning_rate": 5.0996978549655774e-06, "loss": 3.9583, "step": 18475 }, { "epoch": 6.154576497043391, "grad_norm": 0.77734375, "learning_rate": 5.098931003739335e-06, "loss": 4.0278, "step": 18476 }, { "epoch": 6.15490963604564, "grad_norm": 0.796875, "learning_rate": 5.0981641804803454e-06, "loss": 4.0532, "step": 18477 }, { "epoch": 6.155242775047888, "grad_norm": 0.76171875, "learning_rate": 5.097397385197545e-06, "loss": 3.9867, "step": 18478 }, { "epoch": 6.155575914050138, "grad_norm": 0.78125, "learning_rate": 5.0966306178998565e-06, "loss": 4.0734, "step": 18479 }, { "epoch": 6.155909053052386, "grad_norm": 0.79296875, "learning_rate": 5.0958638785962166e-06, "loss": 3.9308, "step": 18480 }, { "epoch": 6.156242192054635, "grad_norm": 0.75390625, "learning_rate": 5.0950971672955545e-06, "loss": 4.0266, "step": 18481 }, { "epoch": 6.156575331056883, "grad_norm": 0.81640625, "learning_rate": 5.094330484006803e-06, "loss": 3.9783, "step": 18482 }, { "epoch": 6.1569084700591326, "grad_norm": 0.83203125, "learning_rate": 5.093563828738888e-06, "loss": 3.9575, "step": 18483 }, { "epoch": 6.157241609061381, "grad_norm": 0.8203125, "learning_rate": 5.092797201500745e-06, "loss": 3.9639, "step": 18484 }, { "epoch": 6.157574748063629, "grad_norm": 0.8359375, "learning_rate": 5.0920306023012985e-06, "loss": 3.9643, "step": 18485 }, { "epoch": 6.157907887065878, "grad_norm": 0.765625, "learning_rate": 5.091264031149481e-06, "loss": 3.9706, "step": 18486 }, { "epoch": 6.158241026068127, "grad_norm": 0.7578125, "learning_rate": 5.090497488054223e-06, "loss": 3.9801, "step": 18487 }, { "epoch": 6.158574165070376, "grad_norm": 0.7578125, "learning_rate": 5.0897309730244455e-06, "loss": 4.0081, "step": 18488 }, { "epoch": 6.158907304072624, "grad_norm": 0.77734375, "learning_rate": 5.088964486069085e-06, "loss": 3.98, "step": 18489 }, { "epoch": 6.159240443074873, "grad_norm": 0.79296875, "learning_rate": 5.088198027197063e-06, "loss": 4.0733, "step": 18490 }, { "epoch": 6.159573582077122, "grad_norm": 0.88671875, "learning_rate": 5.0874315964173116e-06, "loss": 3.9503, "step": 18491 }, { "epoch": 6.159906721079371, "grad_norm": 0.80078125, "learning_rate": 5.086665193738756e-06, "loss": 3.9645, "step": 18492 }, { "epoch": 6.160239860081619, "grad_norm": 0.796875, "learning_rate": 5.085898819170324e-06, "loss": 3.9945, "step": 18493 }, { "epoch": 6.160572999083867, "grad_norm": 0.765625, "learning_rate": 5.085132472720941e-06, "loss": 3.9903, "step": 18494 }, { "epoch": 6.1609061380861165, "grad_norm": 0.75, "learning_rate": 5.084366154399535e-06, "loss": 4.0068, "step": 18495 }, { "epoch": 6.161239277088365, "grad_norm": 0.76953125, "learning_rate": 5.083599864215033e-06, "loss": 4.0395, "step": 18496 }, { "epoch": 6.161572416090614, "grad_norm": 0.80859375, "learning_rate": 5.082833602176355e-06, "loss": 3.9476, "step": 18497 }, { "epoch": 6.161905555092862, "grad_norm": 0.7421875, "learning_rate": 5.082067368292433e-06, "loss": 3.9929, "step": 18498 }, { "epoch": 6.1622386940951115, "grad_norm": 0.78125, "learning_rate": 5.081301162572186e-06, "loss": 3.9435, "step": 18499 }, { "epoch": 6.16257183309736, "grad_norm": 0.8046875, "learning_rate": 5.080534985024544e-06, "loss": 3.9229, "step": 18500 }, { "epoch": 6.162904972099609, "grad_norm": 0.765625, "learning_rate": 5.0797688356584264e-06, "loss": 3.9416, "step": 18501 }, { "epoch": 6.163238111101857, "grad_norm": 0.7890625, "learning_rate": 5.0790027144827625e-06, "loss": 4.0131, "step": 18502 }, { "epoch": 6.163571250104106, "grad_norm": 0.7421875, "learning_rate": 5.0782366215064695e-06, "loss": 4.0038, "step": 18503 }, { "epoch": 6.163904389106355, "grad_norm": 0.7734375, "learning_rate": 5.077470556738478e-06, "loss": 3.9536, "step": 18504 }, { "epoch": 6.164237528108603, "grad_norm": 0.75, "learning_rate": 5.076704520187708e-06, "loss": 3.9712, "step": 18505 }, { "epoch": 6.164570667110852, "grad_norm": 0.74609375, "learning_rate": 5.07593851186308e-06, "loss": 4.0305, "step": 18506 }, { "epoch": 6.1649038061131005, "grad_norm": 0.796875, "learning_rate": 5.075172531773515e-06, "loss": 3.9817, "step": 18507 }, { "epoch": 6.16523694511535, "grad_norm": 0.765625, "learning_rate": 5.074406579927941e-06, "loss": 3.9487, "step": 18508 }, { "epoch": 6.165570084117598, "grad_norm": 0.73828125, "learning_rate": 5.073640656335275e-06, "loss": 4.062, "step": 18509 }, { "epoch": 6.165903223119847, "grad_norm": 0.75390625, "learning_rate": 5.0728747610044406e-06, "loss": 4.0017, "step": 18510 }, { "epoch": 6.1662363621220955, "grad_norm": 0.79296875, "learning_rate": 5.072108893944357e-06, "loss": 3.9615, "step": 18511 }, { "epoch": 6.166569501124344, "grad_norm": 0.7890625, "learning_rate": 5.0713430551639475e-06, "loss": 3.9306, "step": 18512 }, { "epoch": 6.166902640126593, "grad_norm": 0.76171875, "learning_rate": 5.070577244672129e-06, "loss": 3.8803, "step": 18513 }, { "epoch": 6.167235779128841, "grad_norm": 0.78125, "learning_rate": 5.069811462477827e-06, "loss": 4.0065, "step": 18514 }, { "epoch": 6.16756891813109, "grad_norm": 0.79296875, "learning_rate": 5.069045708589956e-06, "loss": 3.8817, "step": 18515 }, { "epoch": 6.167902057133339, "grad_norm": 0.734375, "learning_rate": 5.0682799830174355e-06, "loss": 4.0195, "step": 18516 }, { "epoch": 6.168235196135588, "grad_norm": 0.76953125, "learning_rate": 5.0675142857691875e-06, "loss": 4.019, "step": 18517 }, { "epoch": 6.168568335137836, "grad_norm": 0.73828125, "learning_rate": 5.066748616854128e-06, "loss": 3.9303, "step": 18518 }, { "epoch": 6.168901474140085, "grad_norm": 0.7890625, "learning_rate": 5.065982976281179e-06, "loss": 3.9593, "step": 18519 }, { "epoch": 6.169234613142334, "grad_norm": 0.734375, "learning_rate": 5.0652173640592535e-06, "loss": 3.9716, "step": 18520 }, { "epoch": 6.169567752144582, "grad_norm": 0.7890625, "learning_rate": 5.0644517801972735e-06, "loss": 3.9933, "step": 18521 }, { "epoch": 6.169900891146831, "grad_norm": 0.7890625, "learning_rate": 5.063686224704154e-06, "loss": 3.9338, "step": 18522 }, { "epoch": 6.1702340301490795, "grad_norm": 0.73828125, "learning_rate": 5.062920697588816e-06, "loss": 3.9863, "step": 18523 }, { "epoch": 6.170567169151329, "grad_norm": 0.8125, "learning_rate": 5.06215519886017e-06, "loss": 3.9739, "step": 18524 }, { "epoch": 6.170900308153577, "grad_norm": 0.7421875, "learning_rate": 5.061389728527136e-06, "loss": 4.0185, "step": 18525 }, { "epoch": 6.171233447155826, "grad_norm": 0.75390625, "learning_rate": 5.060624286598631e-06, "loss": 3.9595, "step": 18526 }, { "epoch": 6.171566586158074, "grad_norm": 0.765625, "learning_rate": 5.059858873083567e-06, "loss": 4.0205, "step": 18527 }, { "epoch": 6.171899725160323, "grad_norm": 0.76953125, "learning_rate": 5.059093487990864e-06, "loss": 3.973, "step": 18528 }, { "epoch": 6.172232864162572, "grad_norm": 0.7578125, "learning_rate": 5.058328131329434e-06, "loss": 3.9554, "step": 18529 }, { "epoch": 6.17256600316482, "grad_norm": 0.74609375, "learning_rate": 5.0575628031081936e-06, "loss": 3.9188, "step": 18530 }, { "epoch": 6.172899142167069, "grad_norm": 0.82421875, "learning_rate": 5.056797503336054e-06, "loss": 3.9843, "step": 18531 }, { "epoch": 6.173232281169318, "grad_norm": 0.7578125, "learning_rate": 5.056032232021934e-06, "loss": 4.008, "step": 18532 }, { "epoch": 6.173565420171567, "grad_norm": 0.734375, "learning_rate": 5.055266989174745e-06, "loss": 4.0897, "step": 18533 }, { "epoch": 6.173898559173815, "grad_norm": 0.74609375, "learning_rate": 5.054501774803398e-06, "loss": 3.9862, "step": 18534 }, { "epoch": 6.174231698176064, "grad_norm": 0.7890625, "learning_rate": 5.0537365889168096e-06, "loss": 3.9535, "step": 18535 }, { "epoch": 6.174564837178313, "grad_norm": 0.796875, "learning_rate": 5.05297143152389e-06, "loss": 3.9849, "step": 18536 }, { "epoch": 6.174897976180561, "grad_norm": 0.76171875, "learning_rate": 5.052206302633554e-06, "loss": 3.9274, "step": 18537 }, { "epoch": 6.17523111518281, "grad_norm": 0.76953125, "learning_rate": 5.051441202254713e-06, "loss": 4.0479, "step": 18538 }, { "epoch": 6.175564254185058, "grad_norm": 0.75, "learning_rate": 5.050676130396279e-06, "loss": 4.0451, "step": 18539 }, { "epoch": 6.175897393187308, "grad_norm": 0.75390625, "learning_rate": 5.04991108706716e-06, "loss": 4.0422, "step": 18540 }, { "epoch": 6.176230532189556, "grad_norm": 0.74609375, "learning_rate": 5.049146072276275e-06, "loss": 4.0339, "step": 18541 }, { "epoch": 6.176563671191805, "grad_norm": 0.78125, "learning_rate": 5.048381086032528e-06, "loss": 3.9122, "step": 18542 }, { "epoch": 6.176896810194053, "grad_norm": 0.765625, "learning_rate": 5.047616128344829e-06, "loss": 3.9501, "step": 18543 }, { "epoch": 6.1772299491963025, "grad_norm": 0.71875, "learning_rate": 5.046851199222091e-06, "loss": 4.0213, "step": 18544 }, { "epoch": 6.177563088198551, "grad_norm": 0.7578125, "learning_rate": 5.046086298673222e-06, "loss": 4.0093, "step": 18545 }, { "epoch": 6.177896227200799, "grad_norm": 0.78515625, "learning_rate": 5.045321426707135e-06, "loss": 3.9409, "step": 18546 }, { "epoch": 6.178229366203048, "grad_norm": 0.74609375, "learning_rate": 5.044556583332734e-06, "loss": 4.043, "step": 18547 }, { "epoch": 6.178562505205297, "grad_norm": 0.71484375, "learning_rate": 5.043791768558931e-06, "loss": 4.0436, "step": 18548 }, { "epoch": 6.178895644207546, "grad_norm": 0.7890625, "learning_rate": 5.043026982394634e-06, "loss": 3.9428, "step": 18549 }, { "epoch": 6.179228783209794, "grad_norm": 0.7890625, "learning_rate": 5.042262224848754e-06, "loss": 4.0256, "step": 18550 }, { "epoch": 6.179561922212043, "grad_norm": 0.80859375, "learning_rate": 5.041497495930192e-06, "loss": 4.0104, "step": 18551 }, { "epoch": 6.1798950612142916, "grad_norm": 0.76953125, "learning_rate": 5.04073279564786e-06, "loss": 4.0042, "step": 18552 }, { "epoch": 6.180228200216541, "grad_norm": 0.75, "learning_rate": 5.039968124010663e-06, "loss": 4.0372, "step": 18553 }, { "epoch": 6.180561339218789, "grad_norm": 0.7578125, "learning_rate": 5.039203481027511e-06, "loss": 3.9573, "step": 18554 }, { "epoch": 6.180894478221037, "grad_norm": 0.7578125, "learning_rate": 5.0384388667073065e-06, "loss": 3.9882, "step": 18555 }, { "epoch": 6.1812276172232865, "grad_norm": 0.77734375, "learning_rate": 5.037674281058959e-06, "loss": 4.0019, "step": 18556 }, { "epoch": 6.181560756225535, "grad_norm": 0.76953125, "learning_rate": 5.03690972409137e-06, "loss": 4.0426, "step": 18557 }, { "epoch": 6.181893895227784, "grad_norm": 0.79296875, "learning_rate": 5.036145195813451e-06, "loss": 3.947, "step": 18558 }, { "epoch": 6.182227034230032, "grad_norm": 0.80078125, "learning_rate": 5.035380696234103e-06, "loss": 3.9395, "step": 18559 }, { "epoch": 6.1825601732322815, "grad_norm": 0.72265625, "learning_rate": 5.034616225362231e-06, "loss": 3.9831, "step": 18560 }, { "epoch": 6.18289331223453, "grad_norm": 0.78125, "learning_rate": 5.033851783206741e-06, "loss": 4.0455, "step": 18561 }, { "epoch": 6.183226451236779, "grad_norm": 0.75, "learning_rate": 5.033087369776534e-06, "loss": 3.9823, "step": 18562 }, { "epoch": 6.183559590239027, "grad_norm": 0.7734375, "learning_rate": 5.032322985080517e-06, "loss": 3.9692, "step": 18563 }, { "epoch": 6.1838927292412755, "grad_norm": 0.74609375, "learning_rate": 5.031558629127592e-06, "loss": 4.0031, "step": 18564 }, { "epoch": 6.184225868243525, "grad_norm": 0.73828125, "learning_rate": 5.030794301926662e-06, "loss": 3.9935, "step": 18565 }, { "epoch": 6.184559007245773, "grad_norm": 0.75390625, "learning_rate": 5.0300300034866295e-06, "loss": 3.9294, "step": 18566 }, { "epoch": 6.184892146248022, "grad_norm": 0.78515625, "learning_rate": 5.029265733816399e-06, "loss": 3.9742, "step": 18567 }, { "epoch": 6.1852252852502705, "grad_norm": 0.72265625, "learning_rate": 5.028501492924873e-06, "loss": 4.0082, "step": 18568 }, { "epoch": 6.18555842425252, "grad_norm": 0.78125, "learning_rate": 5.027737280820948e-06, "loss": 3.8778, "step": 18569 }, { "epoch": 6.185891563254768, "grad_norm": 0.7421875, "learning_rate": 5.02697309751353e-06, "loss": 3.9528, "step": 18570 }, { "epoch": 6.186224702257017, "grad_norm": 0.6953125, "learning_rate": 5.026208943011517e-06, "loss": 4.0715, "step": 18571 }, { "epoch": 6.1865578412592654, "grad_norm": 0.7578125, "learning_rate": 5.025444817323814e-06, "loss": 3.9587, "step": 18572 }, { "epoch": 6.186890980261514, "grad_norm": 0.75390625, "learning_rate": 5.024680720459317e-06, "loss": 3.9564, "step": 18573 }, { "epoch": 6.187224119263763, "grad_norm": 0.765625, "learning_rate": 5.02391665242693e-06, "loss": 3.9099, "step": 18574 }, { "epoch": 6.187557258266011, "grad_norm": 0.7734375, "learning_rate": 5.023152613235548e-06, "loss": 4.0031, "step": 18575 }, { "epoch": 6.18789039726826, "grad_norm": 0.765625, "learning_rate": 5.022388602894077e-06, "loss": 3.9136, "step": 18576 }, { "epoch": 6.188223536270509, "grad_norm": 0.80859375, "learning_rate": 5.021624621411412e-06, "loss": 3.9692, "step": 18577 }, { "epoch": 6.188556675272758, "grad_norm": 0.8046875, "learning_rate": 5.02086066879645e-06, "loss": 3.9086, "step": 18578 }, { "epoch": 6.188889814275006, "grad_norm": 0.7734375, "learning_rate": 5.020096745058092e-06, "loss": 4.0247, "step": 18579 }, { "epoch": 6.189222953277255, "grad_norm": 0.73828125, "learning_rate": 5.019332850205235e-06, "loss": 4.0442, "step": 18580 }, { "epoch": 6.189556092279504, "grad_norm": 0.78125, "learning_rate": 5.018568984246778e-06, "loss": 3.9772, "step": 18581 }, { "epoch": 6.189889231281752, "grad_norm": 0.7890625, "learning_rate": 5.017805147191616e-06, "loss": 4.0214, "step": 18582 }, { "epoch": 6.190222370284001, "grad_norm": 0.77734375, "learning_rate": 5.01704133904865e-06, "loss": 3.9881, "step": 18583 }, { "epoch": 6.190555509286249, "grad_norm": 0.74609375, "learning_rate": 5.016277559826771e-06, "loss": 3.9562, "step": 18584 }, { "epoch": 6.190888648288499, "grad_norm": 0.765625, "learning_rate": 5.015513809534881e-06, "loss": 4.0335, "step": 18585 }, { "epoch": 6.191221787290747, "grad_norm": 0.77734375, "learning_rate": 5.014750088181876e-06, "loss": 4.0421, "step": 18586 }, { "epoch": 6.191554926292996, "grad_norm": 0.7734375, "learning_rate": 5.013986395776647e-06, "loss": 3.9426, "step": 18587 }, { "epoch": 6.191888065295244, "grad_norm": 0.765625, "learning_rate": 5.013222732328091e-06, "loss": 4.046, "step": 18588 }, { "epoch": 6.192221204297493, "grad_norm": 0.78125, "learning_rate": 5.0124590978451034e-06, "loss": 3.9412, "step": 18589 }, { "epoch": 6.192554343299742, "grad_norm": 0.74609375, "learning_rate": 5.011695492336581e-06, "loss": 4.0274, "step": 18590 }, { "epoch": 6.19288748230199, "grad_norm": 0.73828125, "learning_rate": 5.010931915811416e-06, "loss": 4.0349, "step": 18591 }, { "epoch": 6.193220621304239, "grad_norm": 0.74609375, "learning_rate": 5.010168368278502e-06, "loss": 4.0701, "step": 18592 }, { "epoch": 6.193553760306488, "grad_norm": 0.78515625, "learning_rate": 5.009404849746734e-06, "loss": 4.0033, "step": 18593 }, { "epoch": 6.193886899308737, "grad_norm": 0.6953125, "learning_rate": 5.008641360225005e-06, "loss": 4.0582, "step": 18594 }, { "epoch": 6.194220038310985, "grad_norm": 0.79296875, "learning_rate": 5.007877899722211e-06, "loss": 4.0612, "step": 18595 }, { "epoch": 6.194553177313234, "grad_norm": 0.7890625, "learning_rate": 5.007114468247239e-06, "loss": 3.9418, "step": 18596 }, { "epoch": 6.194886316315483, "grad_norm": 0.75390625, "learning_rate": 5.006351065808982e-06, "loss": 4.0466, "step": 18597 }, { "epoch": 6.195219455317732, "grad_norm": 0.80078125, "learning_rate": 5.005587692416337e-06, "loss": 3.988, "step": 18598 }, { "epoch": 6.19555259431998, "grad_norm": 0.73046875, "learning_rate": 5.004824348078189e-06, "loss": 4.0185, "step": 18599 }, { "epoch": 6.195885733322228, "grad_norm": 0.796875, "learning_rate": 5.004061032803435e-06, "loss": 4.0244, "step": 18600 }, { "epoch": 6.1962188723244775, "grad_norm": 0.79296875, "learning_rate": 5.003297746600962e-06, "loss": 3.9814, "step": 18601 }, { "epoch": 6.196552011326726, "grad_norm": 0.796875, "learning_rate": 5.0025344894796655e-06, "loss": 3.9221, "step": 18602 }, { "epoch": 6.196885150328975, "grad_norm": 0.7890625, "learning_rate": 5.001771261448429e-06, "loss": 3.9939, "step": 18603 }, { "epoch": 6.197218289331223, "grad_norm": 0.734375, "learning_rate": 5.00100806251615e-06, "loss": 4.0574, "step": 18604 }, { "epoch": 6.1975514283334725, "grad_norm": 0.796875, "learning_rate": 5.000244892691713e-06, "loss": 3.9291, "step": 18605 }, { "epoch": 6.197884567335721, "grad_norm": 0.78515625, "learning_rate": 4.999481751984007e-06, "loss": 4.0133, "step": 18606 }, { "epoch": 6.198217706337969, "grad_norm": 0.765625, "learning_rate": 4.998718640401924e-06, "loss": 3.969, "step": 18607 }, { "epoch": 6.198550845340218, "grad_norm": 0.77734375, "learning_rate": 4.99795555795435e-06, "loss": 3.9951, "step": 18608 }, { "epoch": 6.198883984342467, "grad_norm": 0.75, "learning_rate": 4.997192504650175e-06, "loss": 3.942, "step": 18609 }, { "epoch": 6.199217123344716, "grad_norm": 0.76171875, "learning_rate": 4.996429480498285e-06, "loss": 3.9551, "step": 18610 }, { "epoch": 6.199550262346964, "grad_norm": 0.75, "learning_rate": 4.9956664855075715e-06, "loss": 3.9836, "step": 18611 }, { "epoch": 6.199883401349213, "grad_norm": 0.80078125, "learning_rate": 4.994903519686916e-06, "loss": 3.9728, "step": 18612 }, { "epoch": 6.2002165403514615, "grad_norm": 0.7265625, "learning_rate": 4.994140583045212e-06, "loss": 3.9921, "step": 18613 }, { "epoch": 6.200549679353711, "grad_norm": 0.7734375, "learning_rate": 4.993377675591341e-06, "loss": 3.9945, "step": 18614 }, { "epoch": 6.200882818355959, "grad_norm": 0.75390625, "learning_rate": 4.992614797334189e-06, "loss": 3.959, "step": 18615 }, { "epoch": 6.201215957358207, "grad_norm": 0.7421875, "learning_rate": 4.991851948282646e-06, "loss": 4.0124, "step": 18616 }, { "epoch": 6.2015490963604565, "grad_norm": 0.828125, "learning_rate": 4.991089128445593e-06, "loss": 4.0134, "step": 18617 }, { "epoch": 6.201882235362705, "grad_norm": 0.78515625, "learning_rate": 4.990326337831918e-06, "loss": 4.0096, "step": 18618 }, { "epoch": 6.202215374364954, "grad_norm": 0.76953125, "learning_rate": 4.989563576450504e-06, "loss": 3.9388, "step": 18619 }, { "epoch": 6.202548513367202, "grad_norm": 0.74609375, "learning_rate": 4.98880084431024e-06, "loss": 4.0119, "step": 18620 }, { "epoch": 6.202881652369451, "grad_norm": 0.7421875, "learning_rate": 4.988038141420004e-06, "loss": 4.0417, "step": 18621 }, { "epoch": 6.2032147913717, "grad_norm": 0.79296875, "learning_rate": 4.987275467788686e-06, "loss": 3.932, "step": 18622 }, { "epoch": 6.203547930373949, "grad_norm": 0.765625, "learning_rate": 4.986512823425165e-06, "loss": 4.0466, "step": 18623 }, { "epoch": 6.203881069376197, "grad_norm": 0.78125, "learning_rate": 4.985750208338324e-06, "loss": 3.9575, "step": 18624 }, { "epoch": 6.2042142083784455, "grad_norm": 0.765625, "learning_rate": 4.984987622537048e-06, "loss": 3.9787, "step": 18625 }, { "epoch": 6.204547347380695, "grad_norm": 0.77734375, "learning_rate": 4.984225066030217e-06, "loss": 3.9741, "step": 18626 }, { "epoch": 6.204880486382943, "grad_norm": 0.71875, "learning_rate": 4.983462538826718e-06, "loss": 4.024, "step": 18627 }, { "epoch": 6.205213625385192, "grad_norm": 0.8046875, "learning_rate": 4.982700040935426e-06, "loss": 3.9787, "step": 18628 }, { "epoch": 6.2055467643874405, "grad_norm": 0.74609375, "learning_rate": 4.981937572365229e-06, "loss": 4.0389, "step": 18629 }, { "epoch": 6.20587990338969, "grad_norm": 0.72265625, "learning_rate": 4.981175133125004e-06, "loss": 3.9828, "step": 18630 }, { "epoch": 6.206213042391938, "grad_norm": 0.76953125, "learning_rate": 4.980412723223633e-06, "loss": 4.0214, "step": 18631 }, { "epoch": 6.206546181394187, "grad_norm": 0.76171875, "learning_rate": 4.9796503426699976e-06, "loss": 3.8806, "step": 18632 }, { "epoch": 6.206879320396435, "grad_norm": 0.79296875, "learning_rate": 4.978887991472975e-06, "loss": 4.009, "step": 18633 }, { "epoch": 6.207212459398684, "grad_norm": 0.76953125, "learning_rate": 4.9781256696414475e-06, "loss": 3.9788, "step": 18634 }, { "epoch": 6.207545598400933, "grad_norm": 0.7734375, "learning_rate": 4.977363377184293e-06, "loss": 3.9902, "step": 18635 }, { "epoch": 6.207878737403181, "grad_norm": 0.78515625, "learning_rate": 4.976601114110392e-06, "loss": 3.9486, "step": 18636 }, { "epoch": 6.20821187640543, "grad_norm": 0.75390625, "learning_rate": 4.975838880428619e-06, "loss": 3.9874, "step": 18637 }, { "epoch": 6.208545015407679, "grad_norm": 0.8125, "learning_rate": 4.9750766761478595e-06, "loss": 4.0267, "step": 18638 }, { "epoch": 6.208878154409928, "grad_norm": 0.8046875, "learning_rate": 4.974314501276986e-06, "loss": 4.0048, "step": 18639 }, { "epoch": 6.209211293412176, "grad_norm": 0.76171875, "learning_rate": 4.973552355824882e-06, "loss": 3.8769, "step": 18640 }, { "epoch": 6.209544432414425, "grad_norm": 0.79296875, "learning_rate": 4.972790239800416e-06, "loss": 3.9443, "step": 18641 }, { "epoch": 6.209877571416674, "grad_norm": 0.76171875, "learning_rate": 4.972028153212471e-06, "loss": 3.9994, "step": 18642 }, { "epoch": 6.210210710418922, "grad_norm": 0.7578125, "learning_rate": 4.9712660960699226e-06, "loss": 4.0668, "step": 18643 }, { "epoch": 6.210543849421171, "grad_norm": 0.80078125, "learning_rate": 4.970504068381647e-06, "loss": 3.9374, "step": 18644 }, { "epoch": 6.210876988423419, "grad_norm": 0.76953125, "learning_rate": 4.9697420701565185e-06, "loss": 3.9347, "step": 18645 }, { "epoch": 6.211210127425669, "grad_norm": 0.7421875, "learning_rate": 4.968980101403417e-06, "loss": 4.006, "step": 18646 }, { "epoch": 6.211543266427917, "grad_norm": 0.765625, "learning_rate": 4.968218162131213e-06, "loss": 4.0069, "step": 18647 }, { "epoch": 6.211876405430166, "grad_norm": 0.7734375, "learning_rate": 4.967456252348786e-06, "loss": 3.9446, "step": 18648 }, { "epoch": 6.212209544432414, "grad_norm": 0.76953125, "learning_rate": 4.966694372065009e-06, "loss": 4.0322, "step": 18649 }, { "epoch": 6.2125426834346635, "grad_norm": 0.77734375, "learning_rate": 4.965932521288752e-06, "loss": 3.9115, "step": 18650 }, { "epoch": 6.212875822436912, "grad_norm": 0.79296875, "learning_rate": 4.9651707000288935e-06, "loss": 4.0194, "step": 18651 }, { "epoch": 6.21320896143916, "grad_norm": 0.75, "learning_rate": 4.964408908294304e-06, "loss": 4.0344, "step": 18652 }, { "epoch": 6.213542100441409, "grad_norm": 0.82421875, "learning_rate": 4.963647146093862e-06, "loss": 3.9846, "step": 18653 }, { "epoch": 6.213875239443658, "grad_norm": 0.73828125, "learning_rate": 4.962885413436435e-06, "loss": 3.9877, "step": 18654 }, { "epoch": 6.214208378445907, "grad_norm": 0.75, "learning_rate": 4.9621237103308995e-06, "loss": 3.9677, "step": 18655 }, { "epoch": 6.214541517448155, "grad_norm": 0.7890625, "learning_rate": 4.961362036786122e-06, "loss": 3.9877, "step": 18656 }, { "epoch": 6.214874656450404, "grad_norm": 0.80078125, "learning_rate": 4.9606003928109825e-06, "loss": 3.9841, "step": 18657 }, { "epoch": 6.215207795452653, "grad_norm": 0.7578125, "learning_rate": 4.959838778414347e-06, "loss": 3.989, "step": 18658 }, { "epoch": 6.215540934454902, "grad_norm": 0.75, "learning_rate": 4.959077193605087e-06, "loss": 3.9549, "step": 18659 }, { "epoch": 6.21587407345715, "grad_norm": 0.75, "learning_rate": 4.9583156383920736e-06, "loss": 4.0259, "step": 18660 }, { "epoch": 6.216207212459398, "grad_norm": 0.75390625, "learning_rate": 4.957554112784177e-06, "loss": 4.0688, "step": 18661 }, { "epoch": 6.2165403514616475, "grad_norm": 0.76953125, "learning_rate": 4.95679261679027e-06, "loss": 3.9844, "step": 18662 }, { "epoch": 6.216873490463896, "grad_norm": 0.765625, "learning_rate": 4.956031150419218e-06, "loss": 4.0324, "step": 18663 }, { "epoch": 6.217206629466145, "grad_norm": 0.78515625, "learning_rate": 4.955269713679896e-06, "loss": 3.9863, "step": 18664 }, { "epoch": 6.217539768468393, "grad_norm": 0.74609375, "learning_rate": 4.954508306581167e-06, "loss": 3.9676, "step": 18665 }, { "epoch": 6.2178729074706425, "grad_norm": 0.75390625, "learning_rate": 4.953746929131906e-06, "loss": 3.9765, "step": 18666 }, { "epoch": 6.218206046472891, "grad_norm": 0.76953125, "learning_rate": 4.9529855813409764e-06, "loss": 3.9219, "step": 18667 }, { "epoch": 6.218539185475139, "grad_norm": 0.76953125, "learning_rate": 4.952224263217246e-06, "loss": 3.9611, "step": 18668 }, { "epoch": 6.218872324477388, "grad_norm": 0.78515625, "learning_rate": 4.9514629747695876e-06, "loss": 3.9644, "step": 18669 }, { "epoch": 6.2192054634796365, "grad_norm": 0.76953125, "learning_rate": 4.950701716006862e-06, "loss": 4.025, "step": 18670 }, { "epoch": 6.219538602481886, "grad_norm": 0.7734375, "learning_rate": 4.949940486937943e-06, "loss": 4.039, "step": 18671 }, { "epoch": 6.219871741484134, "grad_norm": 0.77734375, "learning_rate": 4.949179287571692e-06, "loss": 4.0057, "step": 18672 }, { "epoch": 6.220204880486383, "grad_norm": 0.7734375, "learning_rate": 4.948418117916978e-06, "loss": 4.0087, "step": 18673 }, { "epoch": 6.2205380194886315, "grad_norm": 0.765625, "learning_rate": 4.947656977982665e-06, "loss": 3.9503, "step": 18674 }, { "epoch": 6.220871158490881, "grad_norm": 0.72265625, "learning_rate": 4.946895867777623e-06, "loss": 4.0106, "step": 18675 }, { "epoch": 6.221204297493129, "grad_norm": 0.7421875, "learning_rate": 4.946134787310713e-06, "loss": 3.9832, "step": 18676 }, { "epoch": 6.221537436495377, "grad_norm": 0.75, "learning_rate": 4.945373736590798e-06, "loss": 3.9653, "step": 18677 }, { "epoch": 6.2218705754976265, "grad_norm": 0.71484375, "learning_rate": 4.944612715626749e-06, "loss": 3.9412, "step": 18678 }, { "epoch": 6.222203714499875, "grad_norm": 0.79296875, "learning_rate": 4.943851724427424e-06, "loss": 3.9855, "step": 18679 }, { "epoch": 6.222536853502124, "grad_norm": 0.7578125, "learning_rate": 4.943090763001692e-06, "loss": 4.0056, "step": 18680 }, { "epoch": 6.222869992504372, "grad_norm": 0.78515625, "learning_rate": 4.942329831358412e-06, "loss": 4.0127, "step": 18681 }, { "epoch": 6.223203131506621, "grad_norm": 0.74609375, "learning_rate": 4.941568929506453e-06, "loss": 3.9587, "step": 18682 }, { "epoch": 6.22353627050887, "grad_norm": 0.77734375, "learning_rate": 4.940808057454672e-06, "loss": 3.9635, "step": 18683 }, { "epoch": 6.223869409511119, "grad_norm": 0.71875, "learning_rate": 4.940047215211938e-06, "loss": 4.0044, "step": 18684 }, { "epoch": 6.224202548513367, "grad_norm": 0.73828125, "learning_rate": 4.939286402787104e-06, "loss": 3.984, "step": 18685 }, { "epoch": 6.2245356875156155, "grad_norm": 0.7578125, "learning_rate": 4.9385256201890395e-06, "loss": 4.0162, "step": 18686 }, { "epoch": 6.224868826517865, "grad_norm": 0.76953125, "learning_rate": 4.937764867426602e-06, "loss": 3.9865, "step": 18687 }, { "epoch": 6.225201965520113, "grad_norm": 0.81640625, "learning_rate": 4.937004144508655e-06, "loss": 4.0325, "step": 18688 }, { "epoch": 6.225535104522362, "grad_norm": 0.76171875, "learning_rate": 4.936243451444057e-06, "loss": 3.9514, "step": 18689 }, { "epoch": 6.22586824352461, "grad_norm": 0.76171875, "learning_rate": 4.935482788241671e-06, "loss": 3.9455, "step": 18690 }, { "epoch": 6.22620138252686, "grad_norm": 0.81640625, "learning_rate": 4.934722154910354e-06, "loss": 3.9838, "step": 18691 }, { "epoch": 6.226534521529108, "grad_norm": 0.83203125, "learning_rate": 4.93396155145897e-06, "loss": 3.9959, "step": 18692 }, { "epoch": 6.226867660531357, "grad_norm": 0.78515625, "learning_rate": 4.933200977896377e-06, "loss": 3.9822, "step": 18693 }, { "epoch": 6.227200799533605, "grad_norm": 0.75390625, "learning_rate": 4.932440434231429e-06, "loss": 3.9603, "step": 18694 }, { "epoch": 6.227533938535854, "grad_norm": 0.72265625, "learning_rate": 4.9316799204729906e-06, "loss": 4.0311, "step": 18695 }, { "epoch": 6.227867077538103, "grad_norm": 0.75, "learning_rate": 4.9309194366299175e-06, "loss": 3.9531, "step": 18696 }, { "epoch": 6.228200216540351, "grad_norm": 0.75390625, "learning_rate": 4.930158982711069e-06, "loss": 4.0133, "step": 18697 }, { "epoch": 6.2285333555426, "grad_norm": 0.796875, "learning_rate": 4.929398558725301e-06, "loss": 4.0076, "step": 18698 }, { "epoch": 6.228866494544849, "grad_norm": 0.78125, "learning_rate": 4.928638164681472e-06, "loss": 3.9422, "step": 18699 }, { "epoch": 6.229199633547098, "grad_norm": 0.76171875, "learning_rate": 4.927877800588439e-06, "loss": 3.9348, "step": 18700 }, { "epoch": 6.229532772549346, "grad_norm": 0.78515625, "learning_rate": 4.92711746645506e-06, "loss": 3.9583, "step": 18701 }, { "epoch": 6.229865911551595, "grad_norm": 0.74609375, "learning_rate": 4.92635716229019e-06, "loss": 4.0374, "step": 18702 }, { "epoch": 6.230199050553844, "grad_norm": 0.75, "learning_rate": 4.925596888102682e-06, "loss": 3.97, "step": 18703 }, { "epoch": 6.230532189556092, "grad_norm": 0.765625, "learning_rate": 4.924836643901396e-06, "loss": 3.949, "step": 18704 }, { "epoch": 6.230865328558341, "grad_norm": 0.80078125, "learning_rate": 4.924076429695184e-06, "loss": 3.9317, "step": 18705 }, { "epoch": 6.231198467560589, "grad_norm": 0.7421875, "learning_rate": 4.923316245492903e-06, "loss": 4.0114, "step": 18706 }, { "epoch": 6.2315316065628386, "grad_norm": 0.75390625, "learning_rate": 4.922556091303405e-06, "loss": 4.0004, "step": 18707 }, { "epoch": 6.231864745565087, "grad_norm": 0.7421875, "learning_rate": 4.921795967135548e-06, "loss": 3.9643, "step": 18708 }, { "epoch": 6.232197884567336, "grad_norm": 0.78515625, "learning_rate": 4.921035872998181e-06, "loss": 3.9061, "step": 18709 }, { "epoch": 6.232531023569584, "grad_norm": 0.7578125, "learning_rate": 4.9202758089001636e-06, "loss": 3.9759, "step": 18710 }, { "epoch": 6.2328641625718335, "grad_norm": 0.77734375, "learning_rate": 4.919515774850345e-06, "loss": 4.0374, "step": 18711 }, { "epoch": 6.233197301574082, "grad_norm": 0.76171875, "learning_rate": 4.918755770857576e-06, "loss": 3.9938, "step": 18712 }, { "epoch": 6.23353044057633, "grad_norm": 0.78515625, "learning_rate": 4.917995796930712e-06, "loss": 3.9284, "step": 18713 }, { "epoch": 6.233863579578579, "grad_norm": 0.77734375, "learning_rate": 4.9172358530786034e-06, "loss": 3.9604, "step": 18714 }, { "epoch": 6.234196718580828, "grad_norm": 0.71484375, "learning_rate": 4.916475939310104e-06, "loss": 3.9921, "step": 18715 }, { "epoch": 6.234529857583077, "grad_norm": 0.75, "learning_rate": 4.915716055634063e-06, "loss": 4.0423, "step": 18716 }, { "epoch": 6.234862996585325, "grad_norm": 0.7890625, "learning_rate": 4.914956202059334e-06, "loss": 3.9706, "step": 18717 }, { "epoch": 6.235196135587574, "grad_norm": 0.75, "learning_rate": 4.914196378594765e-06, "loss": 4.0194, "step": 18718 }, { "epoch": 6.2355292745898225, "grad_norm": 0.765625, "learning_rate": 4.913436585249208e-06, "loss": 3.9494, "step": 18719 }, { "epoch": 6.235862413592072, "grad_norm": 0.76171875, "learning_rate": 4.912676822031513e-06, "loss": 3.9967, "step": 18720 }, { "epoch": 6.23619555259432, "grad_norm": 0.765625, "learning_rate": 4.911917088950527e-06, "loss": 4.0025, "step": 18721 }, { "epoch": 6.236528691596568, "grad_norm": 0.75, "learning_rate": 4.911157386015104e-06, "loss": 3.9636, "step": 18722 }, { "epoch": 6.2368618305988175, "grad_norm": 0.78125, "learning_rate": 4.910397713234087e-06, "loss": 4.0509, "step": 18723 }, { "epoch": 6.237194969601066, "grad_norm": 0.76171875, "learning_rate": 4.909638070616329e-06, "loss": 4.0181, "step": 18724 }, { "epoch": 6.237528108603315, "grad_norm": 0.76953125, "learning_rate": 4.9088784581706756e-06, "loss": 4.0227, "step": 18725 }, { "epoch": 6.237861247605563, "grad_norm": 0.73828125, "learning_rate": 4.908118875905978e-06, "loss": 4.0541, "step": 18726 }, { "epoch": 6.2381943866078124, "grad_norm": 0.828125, "learning_rate": 4.907359323831079e-06, "loss": 4.0288, "step": 18727 }, { "epoch": 6.238527525610061, "grad_norm": 0.76171875, "learning_rate": 4.906599801954829e-06, "loss": 4.031, "step": 18728 }, { "epoch": 6.238860664612309, "grad_norm": 0.78515625, "learning_rate": 4.9058403102860765e-06, "loss": 4.0169, "step": 18729 }, { "epoch": 6.239193803614558, "grad_norm": 0.796875, "learning_rate": 4.905080848833664e-06, "loss": 3.9768, "step": 18730 }, { "epoch": 6.2395269426168065, "grad_norm": 0.73828125, "learning_rate": 4.9043214176064365e-06, "loss": 4.072, "step": 18731 }, { "epoch": 6.239860081619056, "grad_norm": 0.73828125, "learning_rate": 4.903562016613245e-06, "loss": 4.0131, "step": 18732 }, { "epoch": 6.240193220621304, "grad_norm": 0.75390625, "learning_rate": 4.90280264586293e-06, "loss": 3.902, "step": 18733 }, { "epoch": 6.240526359623553, "grad_norm": 0.73828125, "learning_rate": 4.902043305364341e-06, "loss": 3.91, "step": 18734 }, { "epoch": 6.2408594986258015, "grad_norm": 0.75, "learning_rate": 4.901283995126318e-06, "loss": 3.903, "step": 18735 }, { "epoch": 6.241192637628051, "grad_norm": 0.7578125, "learning_rate": 4.90052471515771e-06, "loss": 3.967, "step": 18736 }, { "epoch": 6.241525776630299, "grad_norm": 0.78515625, "learning_rate": 4.899765465467356e-06, "loss": 4.0612, "step": 18737 }, { "epoch": 6.241858915632548, "grad_norm": 0.76953125, "learning_rate": 4.899006246064106e-06, "loss": 3.9829, "step": 18738 }, { "epoch": 6.242192054634796, "grad_norm": 0.76171875, "learning_rate": 4.898247056956798e-06, "loss": 3.9627, "step": 18739 }, { "epoch": 6.242525193637045, "grad_norm": 0.7421875, "learning_rate": 4.8974878981542736e-06, "loss": 3.9485, "step": 18740 }, { "epoch": 6.242858332639294, "grad_norm": 0.7734375, "learning_rate": 4.8967287696653805e-06, "loss": 3.9443, "step": 18741 }, { "epoch": 6.243191471641542, "grad_norm": 0.796875, "learning_rate": 4.895969671498958e-06, "loss": 3.9895, "step": 18742 }, { "epoch": 6.243524610643791, "grad_norm": 0.75, "learning_rate": 4.895210603663849e-06, "loss": 3.9737, "step": 18743 }, { "epoch": 6.24385774964604, "grad_norm": 0.7578125, "learning_rate": 4.8944515661688935e-06, "loss": 4.0657, "step": 18744 }, { "epoch": 6.244190888648289, "grad_norm": 0.7578125, "learning_rate": 4.893692559022934e-06, "loss": 3.9615, "step": 18745 }, { "epoch": 6.244524027650537, "grad_norm": 0.8046875, "learning_rate": 4.8929335822348105e-06, "loss": 3.9526, "step": 18746 }, { "epoch": 6.2448571666527855, "grad_norm": 0.82421875, "learning_rate": 4.892174635813367e-06, "loss": 3.9906, "step": 18747 }, { "epoch": 6.245190305655035, "grad_norm": 0.76171875, "learning_rate": 4.891415719767438e-06, "loss": 3.9554, "step": 18748 }, { "epoch": 6.245523444657283, "grad_norm": 0.7421875, "learning_rate": 4.890656834105865e-06, "loss": 3.991, "step": 18749 }, { "epoch": 6.245856583659532, "grad_norm": 0.75, "learning_rate": 4.88989797883749e-06, "loss": 3.9798, "step": 18750 }, { "epoch": 6.24618972266178, "grad_norm": 0.78515625, "learning_rate": 4.889139153971147e-06, "loss": 3.9379, "step": 18751 }, { "epoch": 6.24652286166403, "grad_norm": 0.78515625, "learning_rate": 4.8883803595156805e-06, "loss": 3.9072, "step": 18752 }, { "epoch": 6.246856000666278, "grad_norm": 0.73046875, "learning_rate": 4.887621595479925e-06, "loss": 4.0263, "step": 18753 }, { "epoch": 6.247189139668527, "grad_norm": 0.7578125, "learning_rate": 4.88686286187272e-06, "loss": 4.0231, "step": 18754 }, { "epoch": 6.247522278670775, "grad_norm": 0.7421875, "learning_rate": 4.886104158702901e-06, "loss": 4.1029, "step": 18755 }, { "epoch": 6.247855417673024, "grad_norm": 0.76171875, "learning_rate": 4.88534548597931e-06, "loss": 3.9946, "step": 18756 }, { "epoch": 6.248188556675273, "grad_norm": 0.79296875, "learning_rate": 4.88458684371078e-06, "loss": 4.076, "step": 18757 }, { "epoch": 6.248521695677521, "grad_norm": 0.734375, "learning_rate": 4.883828231906146e-06, "loss": 3.9503, "step": 18758 }, { "epoch": 6.24885483467977, "grad_norm": 0.81640625, "learning_rate": 4.883069650574248e-06, "loss": 3.8917, "step": 18759 }, { "epoch": 6.249187973682019, "grad_norm": 0.7578125, "learning_rate": 4.882311099723919e-06, "loss": 3.9912, "step": 18760 }, { "epoch": 6.249521112684268, "grad_norm": 0.74609375, "learning_rate": 4.881552579363998e-06, "loss": 3.9982, "step": 18761 }, { "epoch": 6.249854251686516, "grad_norm": 0.78125, "learning_rate": 4.880794089503315e-06, "loss": 3.9291, "step": 18762 }, { "epoch": 6.250187390688765, "grad_norm": 0.7578125, "learning_rate": 4.880035630150711e-06, "loss": 4.0283, "step": 18763 }, { "epoch": 6.250520529691014, "grad_norm": 0.76953125, "learning_rate": 4.879277201315014e-06, "loss": 3.9491, "step": 18764 }, { "epoch": 6.250853668693262, "grad_norm": 0.72265625, "learning_rate": 4.878518803005064e-06, "loss": 3.9257, "step": 18765 }, { "epoch": 6.251186807695511, "grad_norm": 0.76171875, "learning_rate": 4.8777604352296915e-06, "loss": 3.9767, "step": 18766 }, { "epoch": 6.251519946697759, "grad_norm": 0.7734375, "learning_rate": 4.877002097997728e-06, "loss": 4.0023, "step": 18767 }, { "epoch": 6.2518530857000085, "grad_norm": 0.76171875, "learning_rate": 4.87624379131801e-06, "loss": 4.0168, "step": 18768 }, { "epoch": 6.252186224702257, "grad_norm": 0.7734375, "learning_rate": 4.875485515199367e-06, "loss": 4.002, "step": 18769 }, { "epoch": 6.252519363704506, "grad_norm": 0.7734375, "learning_rate": 4.8747272696506355e-06, "loss": 3.9498, "step": 18770 }, { "epoch": 6.252852502706754, "grad_norm": 0.75390625, "learning_rate": 4.8739690546806435e-06, "loss": 3.9962, "step": 18771 }, { "epoch": 6.2531856417090035, "grad_norm": 0.76953125, "learning_rate": 4.873210870298225e-06, "loss": 3.9418, "step": 18772 }, { "epoch": 6.253518780711252, "grad_norm": 0.82421875, "learning_rate": 4.872452716512209e-06, "loss": 3.9309, "step": 18773 }, { "epoch": 6.2538519197135, "grad_norm": 0.7734375, "learning_rate": 4.871694593331432e-06, "loss": 3.9847, "step": 18774 }, { "epoch": 6.254185058715749, "grad_norm": 0.7109375, "learning_rate": 4.870936500764715e-06, "loss": 4.0147, "step": 18775 }, { "epoch": 6.2545181977179976, "grad_norm": 0.765625, "learning_rate": 4.8701784388208955e-06, "loss": 3.9913, "step": 18776 }, { "epoch": 6.254851336720247, "grad_norm": 0.81640625, "learning_rate": 4.869420407508799e-06, "loss": 4.0527, "step": 18777 }, { "epoch": 6.255184475722495, "grad_norm": 0.7421875, "learning_rate": 4.868662406837259e-06, "loss": 4.0841, "step": 18778 }, { "epoch": 6.255517614724744, "grad_norm": 0.71875, "learning_rate": 4.867904436815101e-06, "loss": 3.9795, "step": 18779 }, { "epoch": 6.2558507537269925, "grad_norm": 0.7578125, "learning_rate": 4.867146497451156e-06, "loss": 3.9104, "step": 18780 }, { "epoch": 6.256183892729242, "grad_norm": 0.765625, "learning_rate": 4.866388588754251e-06, "loss": 4.0558, "step": 18781 }, { "epoch": 6.25651703173149, "grad_norm": 0.78515625, "learning_rate": 4.865630710733215e-06, "loss": 3.9127, "step": 18782 }, { "epoch": 6.256850170733738, "grad_norm": 0.7109375, "learning_rate": 4.8648728633968766e-06, "loss": 3.968, "step": 18783 }, { "epoch": 6.2571833097359875, "grad_norm": 0.7578125, "learning_rate": 4.8641150467540595e-06, "loss": 3.9782, "step": 18784 }, { "epoch": 6.257516448738236, "grad_norm": 0.78515625, "learning_rate": 4.863357260813594e-06, "loss": 3.979, "step": 18785 }, { "epoch": 6.257849587740485, "grad_norm": 0.73046875, "learning_rate": 4.8625995055843045e-06, "loss": 3.9265, "step": 18786 }, { "epoch": 6.258182726742733, "grad_norm": 0.78515625, "learning_rate": 4.86184178107502e-06, "loss": 4.0286, "step": 18787 }, { "epoch": 6.258515865744982, "grad_norm": 0.7421875, "learning_rate": 4.861084087294562e-06, "loss": 3.9796, "step": 18788 }, { "epoch": 6.258849004747231, "grad_norm": 0.7578125, "learning_rate": 4.860326424251759e-06, "loss": 4.0193, "step": 18789 }, { "epoch": 6.259182143749479, "grad_norm": 0.7578125, "learning_rate": 4.8595687919554345e-06, "loss": 3.9734, "step": 18790 }, { "epoch": 6.259515282751728, "grad_norm": 0.7421875, "learning_rate": 4.858811190414418e-06, "loss": 4.0179, "step": 18791 }, { "epoch": 6.2598484217539765, "grad_norm": 0.80859375, "learning_rate": 4.85805361963753e-06, "loss": 3.9921, "step": 18792 }, { "epoch": 6.260181560756226, "grad_norm": 0.7109375, "learning_rate": 4.8572960796335924e-06, "loss": 3.9942, "step": 18793 }, { "epoch": 6.260514699758474, "grad_norm": 0.7421875, "learning_rate": 4.856538570411433e-06, "loss": 3.9666, "step": 18794 }, { "epoch": 6.260847838760723, "grad_norm": 0.79296875, "learning_rate": 4.8557810919798726e-06, "loss": 4.0077, "step": 18795 }, { "epoch": 6.2611809777629714, "grad_norm": 0.74609375, "learning_rate": 4.855023644347736e-06, "loss": 3.9523, "step": 18796 }, { "epoch": 6.261514116765221, "grad_norm": 0.78125, "learning_rate": 4.854266227523843e-06, "loss": 3.9332, "step": 18797 }, { "epoch": 6.261847255767469, "grad_norm": 0.7734375, "learning_rate": 4.853508841517019e-06, "loss": 4.0058, "step": 18798 }, { "epoch": 6.262180394769718, "grad_norm": 0.8046875, "learning_rate": 4.852751486336085e-06, "loss": 4.0189, "step": 18799 }, { "epoch": 6.262513533771966, "grad_norm": 0.7421875, "learning_rate": 4.851994161989862e-06, "loss": 3.9913, "step": 18800 }, { "epoch": 6.262846672774215, "grad_norm": 0.7578125, "learning_rate": 4.851236868487173e-06, "loss": 3.9586, "step": 18801 }, { "epoch": 6.263179811776464, "grad_norm": 0.75, "learning_rate": 4.8504796058368355e-06, "loss": 3.9564, "step": 18802 }, { "epoch": 6.263512950778712, "grad_norm": 0.7734375, "learning_rate": 4.849722374047672e-06, "loss": 4.0577, "step": 18803 }, { "epoch": 6.263846089780961, "grad_norm": 0.71484375, "learning_rate": 4.848965173128502e-06, "loss": 4.0937, "step": 18804 }, { "epoch": 6.26417922878321, "grad_norm": 0.79296875, "learning_rate": 4.8482080030881456e-06, "loss": 4.0325, "step": 18805 }, { "epoch": 6.264512367785459, "grad_norm": 0.75390625, "learning_rate": 4.8474508639354224e-06, "loss": 4.0254, "step": 18806 }, { "epoch": 6.264845506787707, "grad_norm": 0.734375, "learning_rate": 4.846693755679152e-06, "loss": 3.9437, "step": 18807 }, { "epoch": 6.265178645789955, "grad_norm": 0.74609375, "learning_rate": 4.84593667832815e-06, "loss": 4.0236, "step": 18808 }, { "epoch": 6.265511784792205, "grad_norm": 0.8046875, "learning_rate": 4.84517963189124e-06, "loss": 4.0109, "step": 18809 }, { "epoch": 6.265844923794453, "grad_norm": 0.7578125, "learning_rate": 4.8444226163772384e-06, "loss": 3.9819, "step": 18810 }, { "epoch": 6.266178062796702, "grad_norm": 0.76171875, "learning_rate": 4.843665631794958e-06, "loss": 3.9769, "step": 18811 }, { "epoch": 6.26651120179895, "grad_norm": 0.7578125, "learning_rate": 4.842908678153219e-06, "loss": 3.9145, "step": 18812 }, { "epoch": 6.2668443408012, "grad_norm": 0.78125, "learning_rate": 4.842151755460839e-06, "loss": 3.9727, "step": 18813 }, { "epoch": 6.267177479803448, "grad_norm": 0.75, "learning_rate": 4.841394863726636e-06, "loss": 3.9779, "step": 18814 }, { "epoch": 6.267510618805697, "grad_norm": 0.77734375, "learning_rate": 4.840638002959422e-06, "loss": 4.0058, "step": 18815 }, { "epoch": 6.267843757807945, "grad_norm": 0.76171875, "learning_rate": 4.839881173168016e-06, "loss": 4.0204, "step": 18816 }, { "epoch": 6.2681768968101945, "grad_norm": 0.8125, "learning_rate": 4.839124374361232e-06, "loss": 3.9723, "step": 18817 }, { "epoch": 6.268510035812443, "grad_norm": 0.78515625, "learning_rate": 4.838367606547887e-06, "loss": 3.9311, "step": 18818 }, { "epoch": 6.268843174814691, "grad_norm": 0.82421875, "learning_rate": 4.837610869736795e-06, "loss": 3.9852, "step": 18819 }, { "epoch": 6.26917631381694, "grad_norm": 0.8359375, "learning_rate": 4.836854163936768e-06, "loss": 3.938, "step": 18820 }, { "epoch": 6.269509452819189, "grad_norm": 0.79296875, "learning_rate": 4.836097489156622e-06, "loss": 3.9774, "step": 18821 }, { "epoch": 6.269842591821438, "grad_norm": 0.76171875, "learning_rate": 4.8353408454051705e-06, "loss": 3.9812, "step": 18822 }, { "epoch": 6.270175730823686, "grad_norm": 0.76171875, "learning_rate": 4.834584232691226e-06, "loss": 3.997, "step": 18823 }, { "epoch": 6.270508869825935, "grad_norm": 0.76953125, "learning_rate": 4.8338276510236015e-06, "loss": 3.9094, "step": 18824 }, { "epoch": 6.2708420088281835, "grad_norm": 0.75, "learning_rate": 4.83307110041111e-06, "loss": 3.9978, "step": 18825 }, { "epoch": 6.271175147830432, "grad_norm": 0.79296875, "learning_rate": 4.832314580862565e-06, "loss": 3.983, "step": 18826 }, { "epoch": 6.271508286832681, "grad_norm": 0.8046875, "learning_rate": 4.831558092386775e-06, "loss": 3.9706, "step": 18827 }, { "epoch": 6.271841425834929, "grad_norm": 0.76953125, "learning_rate": 4.830801634992556e-06, "loss": 3.9837, "step": 18828 }, { "epoch": 6.2721745648371785, "grad_norm": 0.7734375, "learning_rate": 4.830045208688716e-06, "loss": 3.9776, "step": 18829 }, { "epoch": 6.272507703839427, "grad_norm": 0.77734375, "learning_rate": 4.8292888134840646e-06, "loss": 4.0566, "step": 18830 }, { "epoch": 6.272840842841676, "grad_norm": 0.76953125, "learning_rate": 4.828532449387414e-06, "loss": 4.007, "step": 18831 }, { "epoch": 6.273173981843924, "grad_norm": 0.8046875, "learning_rate": 4.8277761164075736e-06, "loss": 4.0025, "step": 18832 }, { "epoch": 6.2735071208461735, "grad_norm": 0.7578125, "learning_rate": 4.8270198145533545e-06, "loss": 3.8808, "step": 18833 }, { "epoch": 6.273840259848422, "grad_norm": 0.78515625, "learning_rate": 4.826263543833564e-06, "loss": 3.9984, "step": 18834 }, { "epoch": 6.27417339885067, "grad_norm": 0.7734375, "learning_rate": 4.825507304257013e-06, "loss": 3.9089, "step": 18835 }, { "epoch": 6.274506537852919, "grad_norm": 0.78515625, "learning_rate": 4.8247510958325064e-06, "loss": 3.9324, "step": 18836 }, { "epoch": 6.2748396768551675, "grad_norm": 0.74609375, "learning_rate": 4.823994918568859e-06, "loss": 4.018, "step": 18837 }, { "epoch": 6.275172815857417, "grad_norm": 0.796875, "learning_rate": 4.823238772474874e-06, "loss": 3.9674, "step": 18838 }, { "epoch": 6.275505954859665, "grad_norm": 0.7734375, "learning_rate": 4.822482657559357e-06, "loss": 3.9129, "step": 18839 }, { "epoch": 6.275839093861914, "grad_norm": 0.76953125, "learning_rate": 4.8217265738311185e-06, "loss": 4.0069, "step": 18840 }, { "epoch": 6.2761722328641625, "grad_norm": 0.796875, "learning_rate": 4.820970521298964e-06, "loss": 3.8925, "step": 18841 }, { "epoch": 6.276505371866412, "grad_norm": 0.78125, "learning_rate": 4.8202144999717e-06, "loss": 4.0033, "step": 18842 }, { "epoch": 6.27683851086866, "grad_norm": 0.75390625, "learning_rate": 4.819458509858133e-06, "loss": 4.0369, "step": 18843 }, { "epoch": 6.277171649870908, "grad_norm": 0.8515625, "learning_rate": 4.818702550967068e-06, "loss": 3.9655, "step": 18844 }, { "epoch": 6.277504788873157, "grad_norm": 0.76953125, "learning_rate": 4.81794662330731e-06, "loss": 4.052, "step": 18845 }, { "epoch": 6.277837927875406, "grad_norm": 0.7890625, "learning_rate": 4.8171907268876675e-06, "loss": 3.9814, "step": 18846 }, { "epoch": 6.278171066877655, "grad_norm": 0.765625, "learning_rate": 4.816434861716941e-06, "loss": 3.9861, "step": 18847 }, { "epoch": 6.278504205879903, "grad_norm": 0.76953125, "learning_rate": 4.815679027803934e-06, "loss": 3.9662, "step": 18848 }, { "epoch": 6.278837344882152, "grad_norm": 0.78515625, "learning_rate": 4.814923225157453e-06, "loss": 4.0426, "step": 18849 }, { "epoch": 6.279170483884401, "grad_norm": 0.75390625, "learning_rate": 4.8141674537863e-06, "loss": 3.9845, "step": 18850 }, { "epoch": 6.279503622886649, "grad_norm": 0.76171875, "learning_rate": 4.81341171369928e-06, "loss": 3.9921, "step": 18851 }, { "epoch": 6.279836761888898, "grad_norm": 0.70703125, "learning_rate": 4.812656004905192e-06, "loss": 3.93, "step": 18852 }, { "epoch": 6.2801699008911465, "grad_norm": 0.76171875, "learning_rate": 4.8119003274128435e-06, "loss": 3.9745, "step": 18853 }, { "epoch": 6.280503039893396, "grad_norm": 0.796875, "learning_rate": 4.811144681231032e-06, "loss": 3.9472, "step": 18854 }, { "epoch": 6.280836178895644, "grad_norm": 0.7421875, "learning_rate": 4.810389066368563e-06, "loss": 3.9592, "step": 18855 }, { "epoch": 6.281169317897893, "grad_norm": 0.73046875, "learning_rate": 4.809633482834235e-06, "loss": 4.021, "step": 18856 }, { "epoch": 6.281502456900141, "grad_norm": 0.7578125, "learning_rate": 4.808877930636848e-06, "loss": 3.9721, "step": 18857 }, { "epoch": 6.281835595902391, "grad_norm": 0.765625, "learning_rate": 4.808122409785205e-06, "loss": 3.9441, "step": 18858 }, { "epoch": 6.282168734904639, "grad_norm": 0.7421875, "learning_rate": 4.807366920288105e-06, "loss": 3.9755, "step": 18859 }, { "epoch": 6.282501873906888, "grad_norm": 0.76953125, "learning_rate": 4.806611462154348e-06, "loss": 4.0233, "step": 18860 }, { "epoch": 6.282835012909136, "grad_norm": 0.73828125, "learning_rate": 4.8058560353927335e-06, "loss": 4.0309, "step": 18861 }, { "epoch": 6.283168151911385, "grad_norm": 0.73828125, "learning_rate": 4.805100640012062e-06, "loss": 3.9597, "step": 18862 }, { "epoch": 6.283501290913634, "grad_norm": 0.734375, "learning_rate": 4.804345276021129e-06, "loss": 3.9444, "step": 18863 }, { "epoch": 6.283834429915882, "grad_norm": 0.81640625, "learning_rate": 4.803589943428738e-06, "loss": 3.9223, "step": 18864 }, { "epoch": 6.284167568918131, "grad_norm": 0.79296875, "learning_rate": 4.802834642243682e-06, "loss": 3.9369, "step": 18865 }, { "epoch": 6.28450070792038, "grad_norm": 0.75, "learning_rate": 4.80207937247476e-06, "loss": 4.0194, "step": 18866 }, { "epoch": 6.284833846922629, "grad_norm": 0.74609375, "learning_rate": 4.80132413413077e-06, "loss": 3.9791, "step": 18867 }, { "epoch": 6.285166985924877, "grad_norm": 0.7578125, "learning_rate": 4.800568927220508e-06, "loss": 3.9533, "step": 18868 }, { "epoch": 6.285500124927125, "grad_norm": 0.75390625, "learning_rate": 4.799813751752771e-06, "loss": 3.9793, "step": 18869 }, { "epoch": 6.285833263929375, "grad_norm": 0.75, "learning_rate": 4.799058607736355e-06, "loss": 4.0371, "step": 18870 }, { "epoch": 6.286166402931623, "grad_norm": 0.796875, "learning_rate": 4.7983034951800554e-06, "loss": 4.0573, "step": 18871 }, { "epoch": 6.286499541933872, "grad_norm": 0.734375, "learning_rate": 4.7975484140926695e-06, "loss": 4.0251, "step": 18872 }, { "epoch": 6.28683268093612, "grad_norm": 0.8203125, "learning_rate": 4.796793364482993e-06, "loss": 3.9931, "step": 18873 }, { "epoch": 6.2871658199383695, "grad_norm": 0.80078125, "learning_rate": 4.796038346359816e-06, "loss": 3.995, "step": 18874 }, { "epoch": 6.287498958940618, "grad_norm": 0.7578125, "learning_rate": 4.795283359731936e-06, "loss": 3.9934, "step": 18875 }, { "epoch": 6.287832097942867, "grad_norm": 0.75390625, "learning_rate": 4.794528404608146e-06, "loss": 4.1213, "step": 18876 }, { "epoch": 6.288165236945115, "grad_norm": 0.796875, "learning_rate": 4.79377348099724e-06, "loss": 3.9327, "step": 18877 }, { "epoch": 6.2884983759473645, "grad_norm": 0.7734375, "learning_rate": 4.7930185889080115e-06, "loss": 4.1093, "step": 18878 }, { "epoch": 6.288831514949613, "grad_norm": 0.78515625, "learning_rate": 4.792263728349254e-06, "loss": 4.0315, "step": 18879 }, { "epoch": 6.289164653951861, "grad_norm": 0.76953125, "learning_rate": 4.791508899329757e-06, "loss": 3.9898, "step": 18880 }, { "epoch": 6.28949779295411, "grad_norm": 0.734375, "learning_rate": 4.7907541018583165e-06, "loss": 4.053, "step": 18881 }, { "epoch": 6.289830931956359, "grad_norm": 0.734375, "learning_rate": 4.789999335943725e-06, "loss": 4.0646, "step": 18882 }, { "epoch": 6.290164070958608, "grad_norm": 0.74609375, "learning_rate": 4.789244601594768e-06, "loss": 3.8969, "step": 18883 }, { "epoch": 6.290497209960856, "grad_norm": 0.8125, "learning_rate": 4.78848989882024e-06, "loss": 3.9616, "step": 18884 }, { "epoch": 6.290830348963105, "grad_norm": 0.734375, "learning_rate": 4.787735227628931e-06, "loss": 3.9997, "step": 18885 }, { "epoch": 6.2911634879653535, "grad_norm": 0.73046875, "learning_rate": 4.786980588029633e-06, "loss": 3.9527, "step": 18886 }, { "epoch": 6.291496626967602, "grad_norm": 0.78125, "learning_rate": 4.786225980031133e-06, "loss": 3.9344, "step": 18887 }, { "epoch": 6.291829765969851, "grad_norm": 0.765625, "learning_rate": 4.785471403642226e-06, "loss": 4.0607, "step": 18888 }, { "epoch": 6.292162904972099, "grad_norm": 0.7578125, "learning_rate": 4.784716858871694e-06, "loss": 3.9634, "step": 18889 }, { "epoch": 6.2924960439743485, "grad_norm": 0.7890625, "learning_rate": 4.783962345728331e-06, "loss": 3.9502, "step": 18890 }, { "epoch": 6.292829182976597, "grad_norm": 0.75, "learning_rate": 4.783207864220926e-06, "loss": 4.0086, "step": 18891 }, { "epoch": 6.293162321978846, "grad_norm": 0.78515625, "learning_rate": 4.782453414358261e-06, "loss": 4.0042, "step": 18892 }, { "epoch": 6.293495460981094, "grad_norm": 0.73828125, "learning_rate": 4.781698996149129e-06, "loss": 3.9667, "step": 18893 }, { "epoch": 6.293828599983343, "grad_norm": 0.74609375, "learning_rate": 4.7809446096023156e-06, "loss": 3.9806, "step": 18894 }, { "epoch": 6.294161738985592, "grad_norm": 0.7109375, "learning_rate": 4.780190254726608e-06, "loss": 4.0156, "step": 18895 }, { "epoch": 6.29449487798784, "grad_norm": 0.73046875, "learning_rate": 4.779435931530792e-06, "loss": 3.9864, "step": 18896 }, { "epoch": 6.294828016990089, "grad_norm": 0.7578125, "learning_rate": 4.778681640023656e-06, "loss": 4.0298, "step": 18897 }, { "epoch": 6.2951611559923375, "grad_norm": 0.765625, "learning_rate": 4.777927380213984e-06, "loss": 3.967, "step": 18898 }, { "epoch": 6.295494294994587, "grad_norm": 0.75390625, "learning_rate": 4.777173152110562e-06, "loss": 3.9423, "step": 18899 }, { "epoch": 6.295827433996835, "grad_norm": 0.76953125, "learning_rate": 4.7764189557221786e-06, "loss": 4.0361, "step": 18900 }, { "epoch": 6.296160572999084, "grad_norm": 0.78125, "learning_rate": 4.77566479105761e-06, "loss": 4.0427, "step": 18901 }, { "epoch": 6.2964937120013325, "grad_norm": 0.7578125, "learning_rate": 4.774910658125648e-06, "loss": 3.9928, "step": 18902 }, { "epoch": 6.296826851003582, "grad_norm": 0.7734375, "learning_rate": 4.774156556935073e-06, "loss": 3.8733, "step": 18903 }, { "epoch": 6.29715999000583, "grad_norm": 0.7421875, "learning_rate": 4.773402487494671e-06, "loss": 3.9505, "step": 18904 }, { "epoch": 6.297493129008078, "grad_norm": 0.80859375, "learning_rate": 4.7726484498132225e-06, "loss": 3.9726, "step": 18905 }, { "epoch": 6.297826268010327, "grad_norm": 0.76953125, "learning_rate": 4.7718944438995145e-06, "loss": 3.961, "step": 18906 }, { "epoch": 6.298159407012576, "grad_norm": 0.81640625, "learning_rate": 4.7711404697623255e-06, "loss": 3.9771, "step": 18907 }, { "epoch": 6.298492546014825, "grad_norm": 0.7421875, "learning_rate": 4.7703865274104426e-06, "loss": 3.9695, "step": 18908 }, { "epoch": 6.298825685017073, "grad_norm": 0.7265625, "learning_rate": 4.769632616852642e-06, "loss": 4.037, "step": 18909 }, { "epoch": 6.299158824019322, "grad_norm": 0.79296875, "learning_rate": 4.768878738097706e-06, "loss": 3.9087, "step": 18910 }, { "epoch": 6.299491963021571, "grad_norm": 0.80078125, "learning_rate": 4.768124891154419e-06, "loss": 3.9984, "step": 18911 }, { "epoch": 6.29982510202382, "grad_norm": 0.80859375, "learning_rate": 4.767371076031558e-06, "loss": 3.9258, "step": 18912 }, { "epoch": 6.300158241026068, "grad_norm": 0.76171875, "learning_rate": 4.766617292737907e-06, "loss": 4.0283, "step": 18913 }, { "epoch": 6.300491380028316, "grad_norm": 0.76171875, "learning_rate": 4.765863541282242e-06, "loss": 4.0452, "step": 18914 }, { "epoch": 6.300824519030566, "grad_norm": 0.72265625, "learning_rate": 4.765109821673347e-06, "loss": 3.877, "step": 18915 }, { "epoch": 6.301157658032814, "grad_norm": 0.7734375, "learning_rate": 4.764356133919998e-06, "loss": 3.9706, "step": 18916 }, { "epoch": 6.301490797035063, "grad_norm": 0.78125, "learning_rate": 4.763602478030976e-06, "loss": 4.0058, "step": 18917 }, { "epoch": 6.301823936037311, "grad_norm": 0.77734375, "learning_rate": 4.762848854015056e-06, "loss": 3.9355, "step": 18918 }, { "epoch": 6.302157075039561, "grad_norm": 0.78515625, "learning_rate": 4.762095261881019e-06, "loss": 3.9498, "step": 18919 }, { "epoch": 6.302490214041809, "grad_norm": 0.7578125, "learning_rate": 4.7613417016376405e-06, "loss": 3.9982, "step": 18920 }, { "epoch": 6.302823353044058, "grad_norm": 0.75, "learning_rate": 4.7605881732937e-06, "loss": 4.0071, "step": 18921 }, { "epoch": 6.303156492046306, "grad_norm": 0.75, "learning_rate": 4.759834676857975e-06, "loss": 4.0181, "step": 18922 }, { "epoch": 6.303489631048555, "grad_norm": 0.71484375, "learning_rate": 4.759081212339239e-06, "loss": 3.9742, "step": 18923 }, { "epoch": 6.303822770050804, "grad_norm": 0.77734375, "learning_rate": 4.75832777974627e-06, "loss": 4.0472, "step": 18924 }, { "epoch": 6.304155909053052, "grad_norm": 0.7734375, "learning_rate": 4.757574379087845e-06, "loss": 4.0188, "step": 18925 }, { "epoch": 6.304489048055301, "grad_norm": 0.828125, "learning_rate": 4.756821010372741e-06, "loss": 4.0257, "step": 18926 }, { "epoch": 6.30482218705755, "grad_norm": 0.80859375, "learning_rate": 4.7560676736097265e-06, "loss": 3.9892, "step": 18927 }, { "epoch": 6.305155326059799, "grad_norm": 0.78125, "learning_rate": 4.755314368807582e-06, "loss": 3.8924, "step": 18928 }, { "epoch": 6.305488465062047, "grad_norm": 0.734375, "learning_rate": 4.754561095975078e-06, "loss": 4.014, "step": 18929 }, { "epoch": 6.305821604064295, "grad_norm": 0.75390625, "learning_rate": 4.753807855120993e-06, "loss": 4.0065, "step": 18930 }, { "epoch": 6.3061547430665446, "grad_norm": 0.75, "learning_rate": 4.753054646254097e-06, "loss": 4.0649, "step": 18931 }, { "epoch": 6.306487882068793, "grad_norm": 0.80078125, "learning_rate": 4.752301469383165e-06, "loss": 3.9788, "step": 18932 }, { "epoch": 6.306821021071042, "grad_norm": 0.7734375, "learning_rate": 4.7515483245169685e-06, "loss": 3.9743, "step": 18933 }, { "epoch": 6.30715416007329, "grad_norm": 0.734375, "learning_rate": 4.750795211664282e-06, "loss": 3.9831, "step": 18934 }, { "epoch": 6.3074872990755395, "grad_norm": 0.73046875, "learning_rate": 4.750042130833878e-06, "loss": 4.0108, "step": 18935 }, { "epoch": 6.307820438077788, "grad_norm": 0.8359375, "learning_rate": 4.7492890820345255e-06, "loss": 3.9383, "step": 18936 }, { "epoch": 6.308153577080037, "grad_norm": 0.78125, "learning_rate": 4.748536065274996e-06, "loss": 3.9914, "step": 18937 }, { "epoch": 6.308486716082285, "grad_norm": 0.79296875, "learning_rate": 4.747783080564061e-06, "loss": 4.0008, "step": 18938 }, { "epoch": 6.3088198550845345, "grad_norm": 0.78125, "learning_rate": 4.747030127910495e-06, "loss": 4.0298, "step": 18939 }, { "epoch": 6.309152994086783, "grad_norm": 0.81640625, "learning_rate": 4.7462772073230615e-06, "loss": 4.0611, "step": 18940 }, { "epoch": 6.309486133089031, "grad_norm": 0.765625, "learning_rate": 4.745524318810537e-06, "loss": 4.0356, "step": 18941 }, { "epoch": 6.30981927209128, "grad_norm": 0.75390625, "learning_rate": 4.744771462381686e-06, "loss": 4.0174, "step": 18942 }, { "epoch": 6.3101524110935285, "grad_norm": 0.796875, "learning_rate": 4.744018638045281e-06, "loss": 3.9689, "step": 18943 }, { "epoch": 6.310485550095778, "grad_norm": 0.76953125, "learning_rate": 4.743265845810091e-06, "loss": 3.9989, "step": 18944 }, { "epoch": 6.310818689098026, "grad_norm": 0.8125, "learning_rate": 4.7425130856848785e-06, "loss": 4.0932, "step": 18945 }, { "epoch": 6.311151828100275, "grad_norm": 0.84765625, "learning_rate": 4.741760357678419e-06, "loss": 3.8716, "step": 18946 }, { "epoch": 6.3114849671025235, "grad_norm": 0.78515625, "learning_rate": 4.741007661799475e-06, "loss": 3.9569, "step": 18947 }, { "epoch": 6.311818106104772, "grad_norm": 0.71875, "learning_rate": 4.740254998056818e-06, "loss": 4.0142, "step": 18948 }, { "epoch": 6.312151245107021, "grad_norm": 0.83984375, "learning_rate": 4.73950236645921e-06, "loss": 3.9505, "step": 18949 }, { "epoch": 6.312484384109269, "grad_norm": 0.7734375, "learning_rate": 4.738749767015422e-06, "loss": 4.0527, "step": 18950 }, { "epoch": 6.3128175231115184, "grad_norm": 0.7890625, "learning_rate": 4.737997199734217e-06, "loss": 3.9755, "step": 18951 }, { "epoch": 6.313150662113767, "grad_norm": 0.7265625, "learning_rate": 4.737244664624364e-06, "loss": 4.0576, "step": 18952 }, { "epoch": 6.313483801116016, "grad_norm": 0.87109375, "learning_rate": 4.736492161694627e-06, "loss": 3.9125, "step": 18953 }, { "epoch": 6.313816940118264, "grad_norm": 0.85546875, "learning_rate": 4.7357396909537684e-06, "loss": 3.9393, "step": 18954 }, { "epoch": 6.314150079120513, "grad_norm": 0.77734375, "learning_rate": 4.734987252410556e-06, "loss": 3.9857, "step": 18955 }, { "epoch": 6.314483218122762, "grad_norm": 0.73828125, "learning_rate": 4.734234846073753e-06, "loss": 3.9964, "step": 18956 }, { "epoch": 6.314816357125011, "grad_norm": 0.73828125, "learning_rate": 4.733482471952124e-06, "loss": 3.9935, "step": 18957 }, { "epoch": 6.315149496127259, "grad_norm": 0.76953125, "learning_rate": 4.73273013005443e-06, "loss": 4.0425, "step": 18958 }, { "epoch": 6.3154826351295075, "grad_norm": 0.76171875, "learning_rate": 4.731977820389438e-06, "loss": 3.9259, "step": 18959 }, { "epoch": 6.315815774131757, "grad_norm": 0.79296875, "learning_rate": 4.7312255429659075e-06, "loss": 3.9978, "step": 18960 }, { "epoch": 6.316148913134005, "grad_norm": 0.7734375, "learning_rate": 4.730473297792603e-06, "loss": 4.0067, "step": 18961 }, { "epoch": 6.316482052136254, "grad_norm": 0.75, "learning_rate": 4.729721084878288e-06, "loss": 4.0032, "step": 18962 }, { "epoch": 6.316815191138502, "grad_norm": 0.765625, "learning_rate": 4.72896890423172e-06, "loss": 3.9488, "step": 18963 }, { "epoch": 6.317148330140752, "grad_norm": 0.78125, "learning_rate": 4.728216755861662e-06, "loss": 3.9613, "step": 18964 }, { "epoch": 6.317481469143, "grad_norm": 0.7421875, "learning_rate": 4.727464639776874e-06, "loss": 3.9651, "step": 18965 }, { "epoch": 6.317814608145248, "grad_norm": 0.8046875, "learning_rate": 4.726712555986119e-06, "loss": 3.9508, "step": 18966 }, { "epoch": 6.318147747147497, "grad_norm": 0.796875, "learning_rate": 4.725960504498156e-06, "loss": 3.9652, "step": 18967 }, { "epoch": 6.318480886149746, "grad_norm": 0.75, "learning_rate": 4.725208485321742e-06, "loss": 3.9799, "step": 18968 }, { "epoch": 6.318814025151995, "grad_norm": 0.80859375, "learning_rate": 4.724456498465643e-06, "loss": 3.8879, "step": 18969 }, { "epoch": 6.319147164154243, "grad_norm": 0.75390625, "learning_rate": 4.723704543938611e-06, "loss": 4.0205, "step": 18970 }, { "epoch": 6.319480303156492, "grad_norm": 0.74609375, "learning_rate": 4.72295262174941e-06, "loss": 3.9513, "step": 18971 }, { "epoch": 6.319813442158741, "grad_norm": 0.79296875, "learning_rate": 4.722200731906795e-06, "loss": 3.937, "step": 18972 }, { "epoch": 6.32014658116099, "grad_norm": 0.765625, "learning_rate": 4.721448874419524e-06, "loss": 3.95, "step": 18973 }, { "epoch": 6.320479720163238, "grad_norm": 0.78515625, "learning_rate": 4.720697049296357e-06, "loss": 4.0212, "step": 18974 }, { "epoch": 6.320812859165486, "grad_norm": 0.79296875, "learning_rate": 4.719945256546046e-06, "loss": 3.9442, "step": 18975 }, { "epoch": 6.321145998167736, "grad_norm": 0.80078125, "learning_rate": 4.719193496177354e-06, "loss": 3.8594, "step": 18976 }, { "epoch": 6.321479137169984, "grad_norm": 0.7578125, "learning_rate": 4.718441768199032e-06, "loss": 3.9557, "step": 18977 }, { "epoch": 6.321812276172233, "grad_norm": 0.78515625, "learning_rate": 4.717690072619842e-06, "loss": 3.9818, "step": 18978 }, { "epoch": 6.322145415174481, "grad_norm": 0.75, "learning_rate": 4.716938409448533e-06, "loss": 3.9651, "step": 18979 }, { "epoch": 6.3224785541767305, "grad_norm": 0.796875, "learning_rate": 4.7161867786938665e-06, "loss": 3.9763, "step": 18980 }, { "epoch": 6.322811693178979, "grad_norm": 0.73828125, "learning_rate": 4.7154351803645925e-06, "loss": 4.0565, "step": 18981 }, { "epoch": 6.323144832181228, "grad_norm": 0.75390625, "learning_rate": 4.714683614469468e-06, "loss": 3.9198, "step": 18982 }, { "epoch": 6.323477971183476, "grad_norm": 0.75, "learning_rate": 4.713932081017246e-06, "loss": 3.9833, "step": 18983 }, { "epoch": 6.323811110185725, "grad_norm": 0.76953125, "learning_rate": 4.713180580016679e-06, "loss": 3.9508, "step": 18984 }, { "epoch": 6.324144249187974, "grad_norm": 0.80078125, "learning_rate": 4.712429111476524e-06, "loss": 3.9303, "step": 18985 }, { "epoch": 6.324477388190222, "grad_norm": 0.76953125, "learning_rate": 4.711677675405531e-06, "loss": 4.0187, "step": 18986 }, { "epoch": 6.324810527192471, "grad_norm": 0.72265625, "learning_rate": 4.710926271812455e-06, "loss": 4.0843, "step": 18987 }, { "epoch": 6.32514366619472, "grad_norm": 0.8046875, "learning_rate": 4.710174900706044e-06, "loss": 4.0094, "step": 18988 }, { "epoch": 6.325476805196969, "grad_norm": 0.80859375, "learning_rate": 4.709423562095057e-06, "loss": 3.9355, "step": 18989 }, { "epoch": 6.325809944199217, "grad_norm": 0.76171875, "learning_rate": 4.7086722559882394e-06, "loss": 4.0094, "step": 18990 }, { "epoch": 6.326143083201465, "grad_norm": 0.75390625, "learning_rate": 4.7079209823943425e-06, "loss": 3.9234, "step": 18991 }, { "epoch": 6.3264762222037145, "grad_norm": 0.765625, "learning_rate": 4.70716974132212e-06, "loss": 3.9917, "step": 18992 }, { "epoch": 6.326809361205963, "grad_norm": 0.80078125, "learning_rate": 4.706418532780319e-06, "loss": 3.988, "step": 18993 }, { "epoch": 6.327142500208212, "grad_norm": 0.78125, "learning_rate": 4.7056673567776926e-06, "loss": 4.0001, "step": 18994 }, { "epoch": 6.32747563921046, "grad_norm": 0.78125, "learning_rate": 4.704916213322989e-06, "loss": 3.9526, "step": 18995 }, { "epoch": 6.3278087782127095, "grad_norm": 0.81640625, "learning_rate": 4.704165102424957e-06, "loss": 3.9723, "step": 18996 }, { "epoch": 6.328141917214958, "grad_norm": 0.76953125, "learning_rate": 4.703414024092344e-06, "loss": 3.9742, "step": 18997 }, { "epoch": 6.328475056217207, "grad_norm": 0.828125, "learning_rate": 4.702662978333905e-06, "loss": 3.9708, "step": 18998 }, { "epoch": 6.328808195219455, "grad_norm": 0.76953125, "learning_rate": 4.701911965158381e-06, "loss": 3.9991, "step": 18999 }, { "epoch": 6.329141334221704, "grad_norm": 0.7421875, "learning_rate": 4.7011609845745195e-06, "loss": 4.025, "step": 19000 }, { "epoch": 6.329474473223953, "grad_norm": 0.78515625, "learning_rate": 4.700410036591073e-06, "loss": 4.0489, "step": 19001 }, { "epoch": 6.329807612226201, "grad_norm": 0.75, "learning_rate": 4.699659121216783e-06, "loss": 4.0255, "step": 19002 }, { "epoch": 6.33014075122845, "grad_norm": 0.7421875, "learning_rate": 4.698908238460401e-06, "loss": 3.9914, "step": 19003 }, { "epoch": 6.3304738902306985, "grad_norm": 0.74609375, "learning_rate": 4.69815738833067e-06, "loss": 3.9949, "step": 19004 }, { "epoch": 6.330807029232948, "grad_norm": 0.7421875, "learning_rate": 4.697406570836337e-06, "loss": 3.9487, "step": 19005 }, { "epoch": 6.331140168235196, "grad_norm": 0.75390625, "learning_rate": 4.6966557859861465e-06, "loss": 3.9568, "step": 19006 }, { "epoch": 6.331473307237445, "grad_norm": 0.7578125, "learning_rate": 4.695905033788848e-06, "loss": 3.9313, "step": 19007 }, { "epoch": 6.3318064462396935, "grad_norm": 0.75, "learning_rate": 4.695154314253178e-06, "loss": 4.0525, "step": 19008 }, { "epoch": 6.332139585241942, "grad_norm": 0.7578125, "learning_rate": 4.694403627387885e-06, "loss": 3.959, "step": 19009 }, { "epoch": 6.332472724244191, "grad_norm": 0.765625, "learning_rate": 4.693652973201713e-06, "loss": 3.9559, "step": 19010 }, { "epoch": 6.332805863246439, "grad_norm": 0.82421875, "learning_rate": 4.692902351703407e-06, "loss": 4.052, "step": 19011 }, { "epoch": 6.333139002248688, "grad_norm": 0.81640625, "learning_rate": 4.692151762901706e-06, "loss": 3.9678, "step": 19012 }, { "epoch": 6.333472141250937, "grad_norm": 0.7734375, "learning_rate": 4.691401206805357e-06, "loss": 3.9407, "step": 19013 }, { "epoch": 6.333805280253186, "grad_norm": 0.7109375, "learning_rate": 4.690650683423099e-06, "loss": 3.9845, "step": 19014 }, { "epoch": 6.334138419255434, "grad_norm": 0.76953125, "learning_rate": 4.689900192763678e-06, "loss": 3.9526, "step": 19015 }, { "epoch": 6.334471558257683, "grad_norm": 0.80078125, "learning_rate": 4.6891497348358325e-06, "loss": 3.9878, "step": 19016 }, { "epoch": 6.334804697259932, "grad_norm": 0.75390625, "learning_rate": 4.688399309648303e-06, "loss": 3.9496, "step": 19017 }, { "epoch": 6.335137836262181, "grad_norm": 0.796875, "learning_rate": 4.687648917209832e-06, "loss": 3.94, "step": 19018 }, { "epoch": 6.335470975264429, "grad_norm": 0.7578125, "learning_rate": 4.686898557529158e-06, "loss": 3.9924, "step": 19019 }, { "epoch": 6.3358041142666774, "grad_norm": 0.77734375, "learning_rate": 4.686148230615026e-06, "loss": 3.9854, "step": 19020 }, { "epoch": 6.336137253268927, "grad_norm": 0.8125, "learning_rate": 4.68539793647617e-06, "loss": 3.9837, "step": 19021 }, { "epoch": 6.336470392271175, "grad_norm": 0.77734375, "learning_rate": 4.684647675121332e-06, "loss": 4.0534, "step": 19022 }, { "epoch": 6.336803531273424, "grad_norm": 0.74609375, "learning_rate": 4.68389744655925e-06, "loss": 3.9622, "step": 19023 }, { "epoch": 6.337136670275672, "grad_norm": 0.8046875, "learning_rate": 4.683147250798663e-06, "loss": 3.999, "step": 19024 }, { "epoch": 6.337469809277922, "grad_norm": 0.77734375, "learning_rate": 4.682397087848312e-06, "loss": 3.9996, "step": 19025 }, { "epoch": 6.33780294828017, "grad_norm": 0.71484375, "learning_rate": 4.68164695771693e-06, "loss": 4.0107, "step": 19026 }, { "epoch": 6.338136087282418, "grad_norm": 0.80078125, "learning_rate": 4.680896860413256e-06, "loss": 3.9863, "step": 19027 }, { "epoch": 6.338469226284667, "grad_norm": 0.78125, "learning_rate": 4.680146795946028e-06, "loss": 4.0137, "step": 19028 }, { "epoch": 6.338802365286916, "grad_norm": 0.71875, "learning_rate": 4.679396764323982e-06, "loss": 3.9334, "step": 19029 }, { "epoch": 6.339135504289165, "grad_norm": 0.76171875, "learning_rate": 4.678646765555853e-06, "loss": 3.9895, "step": 19030 }, { "epoch": 6.339468643291413, "grad_norm": 0.7421875, "learning_rate": 4.67789679965038e-06, "loss": 3.9516, "step": 19031 }, { "epoch": 6.339801782293662, "grad_norm": 0.76171875, "learning_rate": 4.6771468666162944e-06, "loss": 3.9967, "step": 19032 }, { "epoch": 6.340134921295911, "grad_norm": 0.80078125, "learning_rate": 4.676396966462336e-06, "loss": 3.9974, "step": 19033 }, { "epoch": 6.34046806029816, "grad_norm": 0.79296875, "learning_rate": 4.675647099197238e-06, "loss": 3.9855, "step": 19034 }, { "epoch": 6.340801199300408, "grad_norm": 0.71484375, "learning_rate": 4.674897264829731e-06, "loss": 3.9364, "step": 19035 }, { "epoch": 6.341134338302656, "grad_norm": 0.78125, "learning_rate": 4.674147463368553e-06, "loss": 4.028, "step": 19036 }, { "epoch": 6.341467477304906, "grad_norm": 0.7734375, "learning_rate": 4.673397694822435e-06, "loss": 3.934, "step": 19037 }, { "epoch": 6.341800616307154, "grad_norm": 0.76171875, "learning_rate": 4.672647959200112e-06, "loss": 3.9924, "step": 19038 }, { "epoch": 6.342133755309403, "grad_norm": 0.7421875, "learning_rate": 4.671898256510316e-06, "loss": 4.0313, "step": 19039 }, { "epoch": 6.342466894311651, "grad_norm": 0.74609375, "learning_rate": 4.671148586761781e-06, "loss": 3.9973, "step": 19040 }, { "epoch": 6.3428000333139005, "grad_norm": 0.76171875, "learning_rate": 4.670398949963235e-06, "loss": 4.0314, "step": 19041 }, { "epoch": 6.343133172316149, "grad_norm": 0.796875, "learning_rate": 4.6696493461234155e-06, "loss": 4.0611, "step": 19042 }, { "epoch": 6.343466311318398, "grad_norm": 0.765625, "learning_rate": 4.6688997752510504e-06, "loss": 3.9763, "step": 19043 }, { "epoch": 6.343799450320646, "grad_norm": 0.75390625, "learning_rate": 4.668150237354869e-06, "loss": 4.0087, "step": 19044 }, { "epoch": 6.344132589322895, "grad_norm": 0.734375, "learning_rate": 4.667400732443604e-06, "loss": 4.0345, "step": 19045 }, { "epoch": 6.344465728325144, "grad_norm": 0.828125, "learning_rate": 4.666651260525985e-06, "loss": 3.9503, "step": 19046 }, { "epoch": 6.344798867327392, "grad_norm": 0.75, "learning_rate": 4.665901821610742e-06, "loss": 3.9074, "step": 19047 }, { "epoch": 6.345132006329641, "grad_norm": 0.72265625, "learning_rate": 4.665152415706603e-06, "loss": 4.0158, "step": 19048 }, { "epoch": 6.3454651453318895, "grad_norm": 0.78125, "learning_rate": 4.6644030428223e-06, "loss": 3.9783, "step": 19049 }, { "epoch": 6.345798284334139, "grad_norm": 0.765625, "learning_rate": 4.663653702966558e-06, "loss": 3.9845, "step": 19050 }, { "epoch": 6.346131423336387, "grad_norm": 0.75, "learning_rate": 4.662904396148109e-06, "loss": 3.9676, "step": 19051 }, { "epoch": 6.346464562338636, "grad_norm": 0.76953125, "learning_rate": 4.662155122375679e-06, "loss": 3.9368, "step": 19052 }, { "epoch": 6.3467977013408845, "grad_norm": 0.78125, "learning_rate": 4.661405881657993e-06, "loss": 3.9988, "step": 19053 }, { "epoch": 6.347130840343133, "grad_norm": 0.73828125, "learning_rate": 4.660656674003781e-06, "loss": 3.987, "step": 19054 }, { "epoch": 6.347463979345382, "grad_norm": 0.75, "learning_rate": 4.659907499421768e-06, "loss": 3.9795, "step": 19055 }, { "epoch": 6.34779711834763, "grad_norm": 0.76953125, "learning_rate": 4.65915835792068e-06, "loss": 4.0152, "step": 19056 }, { "epoch": 6.3481302573498795, "grad_norm": 0.75, "learning_rate": 4.6584092495092465e-06, "loss": 3.8773, "step": 19057 }, { "epoch": 6.348463396352128, "grad_norm": 0.71484375, "learning_rate": 4.657660174196187e-06, "loss": 4.0288, "step": 19058 }, { "epoch": 6.348796535354377, "grad_norm": 0.80859375, "learning_rate": 4.656911131990232e-06, "loss": 3.9537, "step": 19059 }, { "epoch": 6.349129674356625, "grad_norm": 0.734375, "learning_rate": 4.656162122900104e-06, "loss": 4.015, "step": 19060 }, { "epoch": 6.349462813358874, "grad_norm": 0.75390625, "learning_rate": 4.65541314693453e-06, "loss": 3.9483, "step": 19061 }, { "epoch": 6.349795952361123, "grad_norm": 0.76171875, "learning_rate": 4.654664204102229e-06, "loss": 3.9735, "step": 19062 }, { "epoch": 6.350129091363371, "grad_norm": 0.78125, "learning_rate": 4.653915294411927e-06, "loss": 3.9989, "step": 19063 }, { "epoch": 6.35046223036562, "grad_norm": 0.76171875, "learning_rate": 4.6531664178723485e-06, "loss": 4.0167, "step": 19064 }, { "epoch": 6.3507953693678685, "grad_norm": 0.83203125, "learning_rate": 4.652417574492213e-06, "loss": 3.9959, "step": 19065 }, { "epoch": 6.351128508370118, "grad_norm": 0.8203125, "learning_rate": 4.6516687642802456e-06, "loss": 3.9553, "step": 19066 }, { "epoch": 6.351461647372366, "grad_norm": 0.7265625, "learning_rate": 4.6509199872451665e-06, "loss": 3.9898, "step": 19067 }, { "epoch": 6.351794786374615, "grad_norm": 0.796875, "learning_rate": 4.650171243395701e-06, "loss": 3.9621, "step": 19068 }, { "epoch": 6.352127925376863, "grad_norm": 0.76171875, "learning_rate": 4.649422532740565e-06, "loss": 3.9414, "step": 19069 }, { "epoch": 6.352461064379112, "grad_norm": 0.78515625, "learning_rate": 4.648673855288486e-06, "loss": 4.0103, "step": 19070 }, { "epoch": 6.352794203381361, "grad_norm": 0.75, "learning_rate": 4.6479252110481785e-06, "loss": 4.0506, "step": 19071 }, { "epoch": 6.353127342383609, "grad_norm": 0.75390625, "learning_rate": 4.647176600028364e-06, "loss": 4.0012, "step": 19072 }, { "epoch": 6.353460481385858, "grad_norm": 0.82421875, "learning_rate": 4.646428022237764e-06, "loss": 3.9874, "step": 19073 }, { "epoch": 6.353793620388107, "grad_norm": 0.78125, "learning_rate": 4.645679477685095e-06, "loss": 4.0173, "step": 19074 }, { "epoch": 6.354126759390356, "grad_norm": 0.734375, "learning_rate": 4.64493096637908e-06, "loss": 3.985, "step": 19075 }, { "epoch": 6.354459898392604, "grad_norm": 0.8125, "learning_rate": 4.644182488328432e-06, "loss": 4.0368, "step": 19076 }, { "epoch": 6.354793037394853, "grad_norm": 0.8359375, "learning_rate": 4.643434043541875e-06, "loss": 4.0472, "step": 19077 }, { "epoch": 6.355126176397102, "grad_norm": 0.765625, "learning_rate": 4.642685632028122e-06, "loss": 3.9407, "step": 19078 }, { "epoch": 6.355459315399351, "grad_norm": 0.75, "learning_rate": 4.641937253795897e-06, "loss": 4.05, "step": 19079 }, { "epoch": 6.355792454401599, "grad_norm": 0.7890625, "learning_rate": 4.64118890885391e-06, "loss": 3.9565, "step": 19080 }, { "epoch": 6.356125593403847, "grad_norm": 0.75, "learning_rate": 4.640440597210878e-06, "loss": 3.9437, "step": 19081 }, { "epoch": 6.356458732406097, "grad_norm": 0.76171875, "learning_rate": 4.639692318875521e-06, "loss": 3.8972, "step": 19082 }, { "epoch": 6.356791871408345, "grad_norm": 0.7578125, "learning_rate": 4.638944073856552e-06, "loss": 3.9974, "step": 19083 }, { "epoch": 6.357125010410594, "grad_norm": 0.75390625, "learning_rate": 4.638195862162688e-06, "loss": 4.0042, "step": 19084 }, { "epoch": 6.357458149412842, "grad_norm": 0.7421875, "learning_rate": 4.6374476838026435e-06, "loss": 3.954, "step": 19085 }, { "epoch": 6.357791288415092, "grad_norm": 0.71484375, "learning_rate": 4.636699538785133e-06, "loss": 3.989, "step": 19086 }, { "epoch": 6.35812442741734, "grad_norm": 0.79296875, "learning_rate": 4.63595142711887e-06, "loss": 3.9651, "step": 19087 }, { "epoch": 6.358457566419588, "grad_norm": 0.8046875, "learning_rate": 4.635203348812574e-06, "loss": 3.9617, "step": 19088 }, { "epoch": 6.358790705421837, "grad_norm": 0.7421875, "learning_rate": 4.634455303874951e-06, "loss": 3.9703, "step": 19089 }, { "epoch": 6.359123844424086, "grad_norm": 0.81640625, "learning_rate": 4.633707292314715e-06, "loss": 3.9624, "step": 19090 }, { "epoch": 6.359456983426335, "grad_norm": 0.82421875, "learning_rate": 4.632959314140583e-06, "loss": 3.9517, "step": 19091 }, { "epoch": 6.359790122428583, "grad_norm": 0.75390625, "learning_rate": 4.632211369361263e-06, "loss": 4.0503, "step": 19092 }, { "epoch": 6.360123261430832, "grad_norm": 0.7734375, "learning_rate": 4.631463457985471e-06, "loss": 3.9395, "step": 19093 }, { "epoch": 6.360456400433081, "grad_norm": 0.71484375, "learning_rate": 4.630715580021914e-06, "loss": 4.0331, "step": 19094 }, { "epoch": 6.36078953943533, "grad_norm": 0.7734375, "learning_rate": 4.629967735479308e-06, "loss": 3.9943, "step": 19095 }, { "epoch": 6.361122678437578, "grad_norm": 0.77734375, "learning_rate": 4.62921992436636e-06, "loss": 3.9189, "step": 19096 }, { "epoch": 6.361455817439826, "grad_norm": 0.75390625, "learning_rate": 4.628472146691785e-06, "loss": 4.0285, "step": 19097 }, { "epoch": 6.3617889564420755, "grad_norm": 0.734375, "learning_rate": 4.6277244024642875e-06, "loss": 4.0138, "step": 19098 }, { "epoch": 6.362122095444324, "grad_norm": 0.75390625, "learning_rate": 4.626976691692579e-06, "loss": 3.96, "step": 19099 }, { "epoch": 6.362455234446573, "grad_norm": 0.80078125, "learning_rate": 4.626229014385367e-06, "loss": 4.001, "step": 19100 }, { "epoch": 6.362788373448821, "grad_norm": 0.74609375, "learning_rate": 4.625481370551367e-06, "loss": 3.9925, "step": 19101 }, { "epoch": 6.3631215124510705, "grad_norm": 0.7421875, "learning_rate": 4.6247337601992785e-06, "loss": 3.939, "step": 19102 }, { "epoch": 6.363454651453319, "grad_norm": 0.76171875, "learning_rate": 4.623986183337816e-06, "loss": 3.9502, "step": 19103 }, { "epoch": 6.363787790455568, "grad_norm": 0.76953125, "learning_rate": 4.623238639975685e-06, "loss": 3.9842, "step": 19104 }, { "epoch": 6.364120929457816, "grad_norm": 0.80859375, "learning_rate": 4.6224911301215924e-06, "loss": 4.0538, "step": 19105 }, { "epoch": 6.364454068460065, "grad_norm": 0.734375, "learning_rate": 4.6217436537842475e-06, "loss": 4.0245, "step": 19106 }, { "epoch": 6.364787207462314, "grad_norm": 0.7421875, "learning_rate": 4.620996210972352e-06, "loss": 3.896, "step": 19107 }, { "epoch": 6.365120346464562, "grad_norm": 0.73828125, "learning_rate": 4.620248801694616e-06, "loss": 3.9524, "step": 19108 }, { "epoch": 6.365453485466811, "grad_norm": 0.7421875, "learning_rate": 4.6195014259597425e-06, "loss": 4.0319, "step": 19109 }, { "epoch": 6.3657866244690595, "grad_norm": 0.78125, "learning_rate": 4.61875408377644e-06, "loss": 3.9687, "step": 19110 }, { "epoch": 6.366119763471309, "grad_norm": 0.77734375, "learning_rate": 4.61800677515341e-06, "loss": 3.9819, "step": 19111 }, { "epoch": 6.366452902473557, "grad_norm": 0.7578125, "learning_rate": 4.61725950009936e-06, "loss": 4.0523, "step": 19112 }, { "epoch": 6.366786041475806, "grad_norm": 0.7578125, "learning_rate": 4.616512258622991e-06, "loss": 3.9984, "step": 19113 }, { "epoch": 6.3671191804780545, "grad_norm": 0.73828125, "learning_rate": 4.615765050733011e-06, "loss": 3.9496, "step": 19114 }, { "epoch": 6.367452319480303, "grad_norm": 0.765625, "learning_rate": 4.615017876438121e-06, "loss": 3.9826, "step": 19115 }, { "epoch": 6.367785458482552, "grad_norm": 0.75, "learning_rate": 4.614270735747022e-06, "loss": 3.9367, "step": 19116 }, { "epoch": 6.3681185974848, "grad_norm": 0.7421875, "learning_rate": 4.6135236286684205e-06, "loss": 3.9611, "step": 19117 }, { "epoch": 6.368451736487049, "grad_norm": 0.78515625, "learning_rate": 4.612776555211014e-06, "loss": 3.9685, "step": 19118 }, { "epoch": 6.368784875489298, "grad_norm": 0.73828125, "learning_rate": 4.612029515383509e-06, "loss": 3.9613, "step": 19119 }, { "epoch": 6.369118014491547, "grad_norm": 0.75390625, "learning_rate": 4.6112825091946035e-06, "loss": 4.0106, "step": 19120 }, { "epoch": 6.369451153493795, "grad_norm": 0.79296875, "learning_rate": 4.610535536653001e-06, "loss": 4.009, "step": 19121 }, { "epoch": 6.369784292496044, "grad_norm": 0.76953125, "learning_rate": 4.6097885977673995e-06, "loss": 4.0047, "step": 19122 }, { "epoch": 6.370117431498293, "grad_norm": 0.7265625, "learning_rate": 4.609041692546502e-06, "loss": 4.07, "step": 19123 }, { "epoch": 6.370450570500541, "grad_norm": 0.7890625, "learning_rate": 4.6082948209990074e-06, "loss": 4.0558, "step": 19124 }, { "epoch": 6.37078370950279, "grad_norm": 0.7734375, "learning_rate": 4.607547983133614e-06, "loss": 3.9694, "step": 19125 }, { "epoch": 6.3711168485050385, "grad_norm": 0.7734375, "learning_rate": 4.606801178959021e-06, "loss": 4.0423, "step": 19126 }, { "epoch": 6.371449987507288, "grad_norm": 0.78125, "learning_rate": 4.606054408483927e-06, "loss": 3.9983, "step": 19127 }, { "epoch": 6.371783126509536, "grad_norm": 0.7734375, "learning_rate": 4.605307671717033e-06, "loss": 4.0443, "step": 19128 }, { "epoch": 6.372116265511785, "grad_norm": 0.765625, "learning_rate": 4.6045609686670315e-06, "loss": 3.9248, "step": 19129 }, { "epoch": 6.372449404514033, "grad_norm": 0.7734375, "learning_rate": 4.603814299342627e-06, "loss": 4.0201, "step": 19130 }, { "epoch": 6.372782543516282, "grad_norm": 0.75, "learning_rate": 4.6030676637525084e-06, "loss": 3.9629, "step": 19131 }, { "epoch": 6.373115682518531, "grad_norm": 0.78515625, "learning_rate": 4.602321061905382e-06, "loss": 3.9828, "step": 19132 }, { "epoch": 6.373448821520779, "grad_norm": 0.796875, "learning_rate": 4.601574493809937e-06, "loss": 3.9444, "step": 19133 }, { "epoch": 6.373781960523028, "grad_norm": 0.828125, "learning_rate": 4.60082795947487e-06, "loss": 3.9885, "step": 19134 }, { "epoch": 6.374115099525277, "grad_norm": 0.7421875, "learning_rate": 4.600081458908878e-06, "loss": 4.0355, "step": 19135 }, { "epoch": 6.374448238527526, "grad_norm": 0.75390625, "learning_rate": 4.599334992120655e-06, "loss": 3.9537, "step": 19136 }, { "epoch": 6.374781377529774, "grad_norm": 0.73828125, "learning_rate": 4.598588559118898e-06, "loss": 3.967, "step": 19137 }, { "epoch": 6.375114516532023, "grad_norm": 0.74609375, "learning_rate": 4.597842159912298e-06, "loss": 4.0458, "step": 19138 }, { "epoch": 6.375447655534272, "grad_norm": 0.74609375, "learning_rate": 4.597095794509553e-06, "loss": 3.9302, "step": 19139 }, { "epoch": 6.375780794536521, "grad_norm": 0.8203125, "learning_rate": 4.596349462919352e-06, "loss": 3.9436, "step": 19140 }, { "epoch": 6.376113933538769, "grad_norm": 0.77734375, "learning_rate": 4.5956031651503924e-06, "loss": 3.9188, "step": 19141 }, { "epoch": 6.376447072541017, "grad_norm": 0.76953125, "learning_rate": 4.594856901211367e-06, "loss": 3.9948, "step": 19142 }, { "epoch": 6.376780211543267, "grad_norm": 0.7421875, "learning_rate": 4.594110671110963e-06, "loss": 4.0436, "step": 19143 }, { "epoch": 6.377113350545515, "grad_norm": 0.7578125, "learning_rate": 4.593364474857875e-06, "loss": 3.9965, "step": 19144 }, { "epoch": 6.377446489547764, "grad_norm": 0.79296875, "learning_rate": 4.592618312460797e-06, "loss": 3.979, "step": 19145 }, { "epoch": 6.377779628550012, "grad_norm": 0.75390625, "learning_rate": 4.591872183928417e-06, "loss": 3.997, "step": 19146 }, { "epoch": 6.3781127675522615, "grad_norm": 0.796875, "learning_rate": 4.591126089269428e-06, "loss": 3.9158, "step": 19147 }, { "epoch": 6.37844590655451, "grad_norm": 0.78125, "learning_rate": 4.590380028492518e-06, "loss": 4.0284, "step": 19148 }, { "epoch": 6.378779045556758, "grad_norm": 0.70703125, "learning_rate": 4.589634001606381e-06, "loss": 4.0101, "step": 19149 }, { "epoch": 6.379112184559007, "grad_norm": 0.78125, "learning_rate": 4.5888880086197e-06, "loss": 3.9762, "step": 19150 }, { "epoch": 6.379445323561256, "grad_norm": 0.7421875, "learning_rate": 4.588142049541173e-06, "loss": 4.0225, "step": 19151 }, { "epoch": 6.379778462563505, "grad_norm": 0.74609375, "learning_rate": 4.587396124379483e-06, "loss": 3.9208, "step": 19152 }, { "epoch": 6.380111601565753, "grad_norm": 0.72265625, "learning_rate": 4.586650233143317e-06, "loss": 3.9946, "step": 19153 }, { "epoch": 6.380444740568002, "grad_norm": 0.73828125, "learning_rate": 4.585904375841366e-06, "loss": 3.9653, "step": 19154 }, { "epoch": 6.3807778795702506, "grad_norm": 0.81640625, "learning_rate": 4.585158552482318e-06, "loss": 4.0273, "step": 19155 }, { "epoch": 6.3811110185725, "grad_norm": 0.8359375, "learning_rate": 4.584412763074859e-06, "loss": 3.9724, "step": 19156 }, { "epoch": 6.381444157574748, "grad_norm": 0.8046875, "learning_rate": 4.583667007627675e-06, "loss": 4.0383, "step": 19157 }, { "epoch": 6.381777296576997, "grad_norm": 0.7421875, "learning_rate": 4.5829212861494565e-06, "loss": 3.9729, "step": 19158 }, { "epoch": 6.3821104355792455, "grad_norm": 0.7578125, "learning_rate": 4.582175598648887e-06, "loss": 4.0113, "step": 19159 }, { "epoch": 6.382443574581494, "grad_norm": 0.76953125, "learning_rate": 4.581429945134649e-06, "loss": 4.0231, "step": 19160 }, { "epoch": 6.382776713583743, "grad_norm": 0.76171875, "learning_rate": 4.580684325615431e-06, "loss": 3.9484, "step": 19161 }, { "epoch": 6.383109852585991, "grad_norm": 0.828125, "learning_rate": 4.579938740099918e-06, "loss": 3.947, "step": 19162 }, { "epoch": 6.3834429915882405, "grad_norm": 0.75390625, "learning_rate": 4.5791931885967936e-06, "loss": 4.0276, "step": 19163 }, { "epoch": 6.383776130590489, "grad_norm": 0.7890625, "learning_rate": 4.578447671114741e-06, "loss": 3.9667, "step": 19164 }, { "epoch": 6.384109269592738, "grad_norm": 0.76171875, "learning_rate": 4.5777021876624465e-06, "loss": 3.861, "step": 19165 }, { "epoch": 6.384442408594986, "grad_norm": 0.79296875, "learning_rate": 4.5769567382485906e-06, "loss": 3.9908, "step": 19166 }, { "epoch": 6.3847755475972345, "grad_norm": 0.77734375, "learning_rate": 4.576211322881858e-06, "loss": 4.0251, "step": 19167 }, { "epoch": 6.385108686599484, "grad_norm": 0.76953125, "learning_rate": 4.575465941570933e-06, "loss": 4.0484, "step": 19168 }, { "epoch": 6.385441825601732, "grad_norm": 0.7421875, "learning_rate": 4.574720594324492e-06, "loss": 4.0418, "step": 19169 }, { "epoch": 6.385774964603981, "grad_norm": 0.78125, "learning_rate": 4.573975281151222e-06, "loss": 3.9275, "step": 19170 }, { "epoch": 6.3861081036062295, "grad_norm": 0.78125, "learning_rate": 4.5732300020597996e-06, "loss": 3.9762, "step": 19171 }, { "epoch": 6.386441242608479, "grad_norm": 0.73046875, "learning_rate": 4.57248475705891e-06, "loss": 3.9295, "step": 19172 }, { "epoch": 6.386774381610727, "grad_norm": 0.765625, "learning_rate": 4.571739546157231e-06, "loss": 4.0154, "step": 19173 }, { "epoch": 6.387107520612976, "grad_norm": 0.796875, "learning_rate": 4.570994369363445e-06, "loss": 3.9819, "step": 19174 }, { "epoch": 6.3874406596152244, "grad_norm": 0.73828125, "learning_rate": 4.570249226686229e-06, "loss": 3.9607, "step": 19175 }, { "epoch": 6.387773798617473, "grad_norm": 0.79296875, "learning_rate": 4.569504118134265e-06, "loss": 3.9738, "step": 19176 }, { "epoch": 6.388106937619722, "grad_norm": 0.7265625, "learning_rate": 4.568759043716232e-06, "loss": 4.0192, "step": 19177 }, { "epoch": 6.38844007662197, "grad_norm": 0.78125, "learning_rate": 4.568014003440805e-06, "loss": 3.9916, "step": 19178 }, { "epoch": 6.388773215624219, "grad_norm": 0.7578125, "learning_rate": 4.5672689973166645e-06, "loss": 4.0645, "step": 19179 }, { "epoch": 6.389106354626468, "grad_norm": 0.75, "learning_rate": 4.5665240253524864e-06, "loss": 3.9893, "step": 19180 }, { "epoch": 6.389439493628717, "grad_norm": 0.81640625, "learning_rate": 4.565779087556952e-06, "loss": 4.0084, "step": 19181 }, { "epoch": 6.389772632630965, "grad_norm": 0.734375, "learning_rate": 4.565034183938733e-06, "loss": 3.967, "step": 19182 }, { "epoch": 6.390105771633214, "grad_norm": 0.7734375, "learning_rate": 4.564289314506511e-06, "loss": 3.9669, "step": 19183 }, { "epoch": 6.390438910635463, "grad_norm": 0.7734375, "learning_rate": 4.563544479268958e-06, "loss": 4.0418, "step": 19184 }, { "epoch": 6.390772049637711, "grad_norm": 0.7578125, "learning_rate": 4.562799678234753e-06, "loss": 3.9454, "step": 19185 }, { "epoch": 6.39110518863996, "grad_norm": 0.75390625, "learning_rate": 4.5620549114125715e-06, "loss": 3.9842, "step": 19186 }, { "epoch": 6.391438327642208, "grad_norm": 0.7734375, "learning_rate": 4.561310178811085e-06, "loss": 3.9911, "step": 19187 }, { "epoch": 6.391771466644458, "grad_norm": 0.76953125, "learning_rate": 4.560565480438968e-06, "loss": 4.0143, "step": 19188 }, { "epoch": 6.392104605646706, "grad_norm": 0.75390625, "learning_rate": 4.559820816304899e-06, "loss": 4.038, "step": 19189 }, { "epoch": 6.392437744648955, "grad_norm": 0.765625, "learning_rate": 4.559076186417546e-06, "loss": 4.0164, "step": 19190 }, { "epoch": 6.392770883651203, "grad_norm": 0.796875, "learning_rate": 4.558331590785587e-06, "loss": 4.0183, "step": 19191 }, { "epoch": 6.393104022653453, "grad_norm": 0.75, "learning_rate": 4.557587029417693e-06, "loss": 3.9709, "step": 19192 }, { "epoch": 6.393437161655701, "grad_norm": 0.8046875, "learning_rate": 4.556842502322538e-06, "loss": 4.0064, "step": 19193 }, { "epoch": 6.393770300657949, "grad_norm": 0.7734375, "learning_rate": 4.55609800950879e-06, "loss": 4.0451, "step": 19194 }, { "epoch": 6.394103439660198, "grad_norm": 0.81640625, "learning_rate": 4.555353550985128e-06, "loss": 3.9849, "step": 19195 }, { "epoch": 6.394436578662447, "grad_norm": 0.77734375, "learning_rate": 4.554609126760218e-06, "loss": 4.0277, "step": 19196 }, { "epoch": 6.394769717664696, "grad_norm": 0.78515625, "learning_rate": 4.55386473684273e-06, "loss": 4.0113, "step": 19197 }, { "epoch": 6.395102856666944, "grad_norm": 0.73828125, "learning_rate": 4.553120381241338e-06, "loss": 4.0136, "step": 19198 }, { "epoch": 6.395435995669193, "grad_norm": 0.74609375, "learning_rate": 4.552376059964708e-06, "loss": 3.9972, "step": 19199 }, { "epoch": 6.395769134671442, "grad_norm": 0.75, "learning_rate": 4.551631773021515e-06, "loss": 4.0271, "step": 19200 }, { "epoch": 6.396102273673691, "grad_norm": 0.7734375, "learning_rate": 4.550887520420425e-06, "loss": 3.9512, "step": 19201 }, { "epoch": 6.396435412675939, "grad_norm": 0.80859375, "learning_rate": 4.5501433021701075e-06, "loss": 3.9454, "step": 19202 }, { "epoch": 6.396768551678187, "grad_norm": 0.75, "learning_rate": 4.549399118279231e-06, "loss": 4.0022, "step": 19203 }, { "epoch": 6.3971016906804365, "grad_norm": 0.73828125, "learning_rate": 4.548654968756466e-06, "loss": 3.9763, "step": 19204 }, { "epoch": 6.397434829682685, "grad_norm": 0.765625, "learning_rate": 4.5479108536104765e-06, "loss": 3.9722, "step": 19205 }, { "epoch": 6.397767968684934, "grad_norm": 0.80859375, "learning_rate": 4.547166772849929e-06, "loss": 3.9575, "step": 19206 }, { "epoch": 6.398101107687182, "grad_norm": 0.74609375, "learning_rate": 4.5464227264834956e-06, "loss": 3.995, "step": 19207 }, { "epoch": 6.3984342466894315, "grad_norm": 0.76171875, "learning_rate": 4.5456787145198385e-06, "loss": 4.0557, "step": 19208 }, { "epoch": 6.39876738569168, "grad_norm": 0.79296875, "learning_rate": 4.544934736967627e-06, "loss": 4.0311, "step": 19209 }, { "epoch": 6.399100524693928, "grad_norm": 0.76171875, "learning_rate": 4.544190793835523e-06, "loss": 3.9822, "step": 19210 }, { "epoch": 6.399433663696177, "grad_norm": 0.76953125, "learning_rate": 4.543446885132196e-06, "loss": 4.0113, "step": 19211 }, { "epoch": 6.399766802698426, "grad_norm": 0.76953125, "learning_rate": 4.542703010866307e-06, "loss": 3.9744, "step": 19212 }, { "epoch": 6.400099941700675, "grad_norm": 0.74609375, "learning_rate": 4.541959171046525e-06, "loss": 3.915, "step": 19213 }, { "epoch": 6.400433080702923, "grad_norm": 0.734375, "learning_rate": 4.5412153656815096e-06, "loss": 3.9314, "step": 19214 }, { "epoch": 6.400766219705172, "grad_norm": 0.7578125, "learning_rate": 4.540471594779927e-06, "loss": 3.9955, "step": 19215 }, { "epoch": 6.4010993587074205, "grad_norm": 0.7734375, "learning_rate": 4.5397278583504395e-06, "loss": 3.9852, "step": 19216 }, { "epoch": 6.40143249770967, "grad_norm": 0.796875, "learning_rate": 4.53898415640171e-06, "loss": 3.925, "step": 19217 }, { "epoch": 6.401765636711918, "grad_norm": 0.78515625, "learning_rate": 4.538240488942403e-06, "loss": 4.0224, "step": 19218 }, { "epoch": 6.402098775714167, "grad_norm": 0.78515625, "learning_rate": 4.537496855981178e-06, "loss": 3.9558, "step": 19219 }, { "epoch": 6.4024319147164155, "grad_norm": 0.81640625, "learning_rate": 4.536753257526698e-06, "loss": 3.9836, "step": 19220 }, { "epoch": 6.402765053718664, "grad_norm": 0.81640625, "learning_rate": 4.536009693587625e-06, "loss": 4.0211, "step": 19221 }, { "epoch": 6.403098192720913, "grad_norm": 0.75, "learning_rate": 4.5352661641726195e-06, "loss": 3.9699, "step": 19222 }, { "epoch": 6.403431331723161, "grad_norm": 0.7578125, "learning_rate": 4.534522669290342e-06, "loss": 3.9436, "step": 19223 }, { "epoch": 6.40376447072541, "grad_norm": 0.79296875, "learning_rate": 4.53377920894945e-06, "loss": 4.0043, "step": 19224 }, { "epoch": 6.404097609727659, "grad_norm": 0.7578125, "learning_rate": 4.533035783158606e-06, "loss": 3.9975, "step": 19225 }, { "epoch": 6.404430748729908, "grad_norm": 0.765625, "learning_rate": 4.532292391926468e-06, "loss": 3.9776, "step": 19226 }, { "epoch": 6.404763887732156, "grad_norm": 0.7421875, "learning_rate": 4.531549035261697e-06, "loss": 3.9392, "step": 19227 }, { "epoch": 6.4050970267344045, "grad_norm": 0.77734375, "learning_rate": 4.5308057131729495e-06, "loss": 3.9824, "step": 19228 }, { "epoch": 6.405430165736654, "grad_norm": 0.74609375, "learning_rate": 4.530062425668884e-06, "loss": 3.9476, "step": 19229 }, { "epoch": 6.405763304738902, "grad_norm": 0.7890625, "learning_rate": 4.529319172758158e-06, "loss": 4.0251, "step": 19230 }, { "epoch": 6.406096443741151, "grad_norm": 0.8046875, "learning_rate": 4.528575954449432e-06, "loss": 4.0006, "step": 19231 }, { "epoch": 6.4064295827433995, "grad_norm": 0.8359375, "learning_rate": 4.527832770751358e-06, "loss": 3.9376, "step": 19232 }, { "epoch": 6.406762721745649, "grad_norm": 0.79296875, "learning_rate": 4.527089621672595e-06, "loss": 3.867, "step": 19233 }, { "epoch": 6.407095860747897, "grad_norm": 0.796875, "learning_rate": 4.526346507221797e-06, "loss": 4.0331, "step": 19234 }, { "epoch": 6.407428999750146, "grad_norm": 0.7578125, "learning_rate": 4.525603427407623e-06, "loss": 3.9811, "step": 19235 }, { "epoch": 6.407762138752394, "grad_norm": 0.71484375, "learning_rate": 4.5248603822387245e-06, "loss": 4.0253, "step": 19236 }, { "epoch": 6.408095277754643, "grad_norm": 0.74609375, "learning_rate": 4.52411737172376e-06, "loss": 3.8957, "step": 19237 }, { "epoch": 6.408428416756892, "grad_norm": 0.8125, "learning_rate": 4.523374395871382e-06, "loss": 3.9653, "step": 19238 }, { "epoch": 6.40876155575914, "grad_norm": 0.8046875, "learning_rate": 4.522631454690245e-06, "loss": 4.0109, "step": 19239 }, { "epoch": 6.409094694761389, "grad_norm": 0.76953125, "learning_rate": 4.521888548189005e-06, "loss": 3.9602, "step": 19240 }, { "epoch": 6.409427833763638, "grad_norm": 0.75390625, "learning_rate": 4.521145676376309e-06, "loss": 3.9885, "step": 19241 }, { "epoch": 6.409760972765887, "grad_norm": 0.8203125, "learning_rate": 4.520402839260816e-06, "loss": 3.9264, "step": 19242 }, { "epoch": 6.410094111768135, "grad_norm": 0.734375, "learning_rate": 4.519660036851174e-06, "loss": 4.0278, "step": 19243 }, { "epoch": 6.410427250770384, "grad_norm": 0.7734375, "learning_rate": 4.518917269156038e-06, "loss": 3.971, "step": 19244 }, { "epoch": 6.410760389772633, "grad_norm": 0.7734375, "learning_rate": 4.51817453618406e-06, "loss": 3.9502, "step": 19245 }, { "epoch": 6.411093528774881, "grad_norm": 0.734375, "learning_rate": 4.517431837943889e-06, "loss": 3.9849, "step": 19246 }, { "epoch": 6.41142666777713, "grad_norm": 0.74609375, "learning_rate": 4.516689174444175e-06, "loss": 3.9941, "step": 19247 }, { "epoch": 6.411759806779378, "grad_norm": 0.828125, "learning_rate": 4.5159465456935734e-06, "loss": 4.0022, "step": 19248 }, { "epoch": 6.412092945781628, "grad_norm": 0.7890625, "learning_rate": 4.515203951700732e-06, "loss": 3.9413, "step": 19249 }, { "epoch": 6.412426084783876, "grad_norm": 0.7109375, "learning_rate": 4.514461392474297e-06, "loss": 4.002, "step": 19250 }, { "epoch": 6.412759223786125, "grad_norm": 0.73828125, "learning_rate": 4.5137188680229215e-06, "loss": 3.9615, "step": 19251 }, { "epoch": 6.413092362788373, "grad_norm": 0.7578125, "learning_rate": 4.512976378355251e-06, "loss": 4.0798, "step": 19252 }, { "epoch": 6.4134255017906225, "grad_norm": 0.7265625, "learning_rate": 4.5122339234799374e-06, "loss": 3.9122, "step": 19253 }, { "epoch": 6.413758640792871, "grad_norm": 0.8125, "learning_rate": 4.511491503405625e-06, "loss": 3.8923, "step": 19254 }, { "epoch": 6.414091779795119, "grad_norm": 0.72265625, "learning_rate": 4.510749118140965e-06, "loss": 4.0072, "step": 19255 }, { "epoch": 6.414424918797368, "grad_norm": 0.74609375, "learning_rate": 4.510006767694603e-06, "loss": 3.9143, "step": 19256 }, { "epoch": 6.414758057799617, "grad_norm": 0.7890625, "learning_rate": 4.5092644520751864e-06, "loss": 3.9809, "step": 19257 }, { "epoch": 6.415091196801866, "grad_norm": 0.78515625, "learning_rate": 4.5085221712913626e-06, "loss": 3.9062, "step": 19258 }, { "epoch": 6.415424335804114, "grad_norm": 0.79296875, "learning_rate": 4.507779925351773e-06, "loss": 3.9866, "step": 19259 }, { "epoch": 6.415757474806363, "grad_norm": 0.73046875, "learning_rate": 4.5070377142650675e-06, "loss": 4.0093, "step": 19260 }, { "epoch": 6.416090613808612, "grad_norm": 0.75, "learning_rate": 4.506295538039889e-06, "loss": 3.9845, "step": 19261 }, { "epoch": 6.416423752810861, "grad_norm": 0.7734375, "learning_rate": 4.505553396684884e-06, "loss": 4.0452, "step": 19262 }, { "epoch": 6.416756891813109, "grad_norm": 0.7578125, "learning_rate": 4.504811290208694e-06, "loss": 4.0147, "step": 19263 }, { "epoch": 6.417090030815357, "grad_norm": 0.75390625, "learning_rate": 4.504069218619968e-06, "loss": 3.9899, "step": 19264 }, { "epoch": 6.4174231698176065, "grad_norm": 0.78515625, "learning_rate": 4.503327181927343e-06, "loss": 3.9616, "step": 19265 }, { "epoch": 6.417756308819855, "grad_norm": 0.78515625, "learning_rate": 4.502585180139468e-06, "loss": 3.9, "step": 19266 }, { "epoch": 6.418089447822104, "grad_norm": 0.7890625, "learning_rate": 4.5018432132649856e-06, "loss": 3.9934, "step": 19267 }, { "epoch": 6.418422586824352, "grad_norm": 0.7578125, "learning_rate": 4.501101281312531e-06, "loss": 3.9978, "step": 19268 }, { "epoch": 6.4187557258266015, "grad_norm": 0.82421875, "learning_rate": 4.500359384290754e-06, "loss": 3.9722, "step": 19269 }, { "epoch": 6.41908886482885, "grad_norm": 0.765625, "learning_rate": 4.49961752220829e-06, "loss": 3.9724, "step": 19270 }, { "epoch": 6.419422003831098, "grad_norm": 0.81640625, "learning_rate": 4.498875695073785e-06, "loss": 3.9628, "step": 19271 }, { "epoch": 6.419755142833347, "grad_norm": 0.75390625, "learning_rate": 4.498133902895878e-06, "loss": 3.871, "step": 19272 }, { "epoch": 6.4200882818355955, "grad_norm": 0.7734375, "learning_rate": 4.497392145683208e-06, "loss": 3.955, "step": 19273 }, { "epoch": 6.420421420837845, "grad_norm": 0.75390625, "learning_rate": 4.496650423444415e-06, "loss": 3.9712, "step": 19274 }, { "epoch": 6.420754559840093, "grad_norm": 0.75, "learning_rate": 4.495908736188142e-06, "loss": 4.0047, "step": 19275 }, { "epoch": 6.421087698842342, "grad_norm": 0.7890625, "learning_rate": 4.495167083923026e-06, "loss": 4.0404, "step": 19276 }, { "epoch": 6.4214208378445905, "grad_norm": 0.734375, "learning_rate": 4.494425466657704e-06, "loss": 4.0004, "step": 19277 }, { "epoch": 6.42175397684684, "grad_norm": 0.79296875, "learning_rate": 4.493683884400813e-06, "loss": 3.945, "step": 19278 }, { "epoch": 6.422087115849088, "grad_norm": 0.78125, "learning_rate": 4.492942337160994e-06, "loss": 3.9733, "step": 19279 }, { "epoch": 6.422420254851337, "grad_norm": 0.75390625, "learning_rate": 4.492200824946884e-06, "loss": 3.9604, "step": 19280 }, { "epoch": 6.4227533938535855, "grad_norm": 0.8046875, "learning_rate": 4.49145934776712e-06, "loss": 3.9776, "step": 19281 }, { "epoch": 6.423086532855834, "grad_norm": 0.78515625, "learning_rate": 4.490717905630337e-06, "loss": 4.0223, "step": 19282 }, { "epoch": 6.423419671858083, "grad_norm": 0.75, "learning_rate": 4.489976498545173e-06, "loss": 4.0367, "step": 19283 }, { "epoch": 6.423752810860331, "grad_norm": 0.8125, "learning_rate": 4.489235126520261e-06, "loss": 4.0197, "step": 19284 }, { "epoch": 6.42408594986258, "grad_norm": 0.74609375, "learning_rate": 4.488493789564243e-06, "loss": 3.9882, "step": 19285 }, { "epoch": 6.424419088864829, "grad_norm": 0.76171875, "learning_rate": 4.487752487685747e-06, "loss": 3.9484, "step": 19286 }, { "epoch": 6.424752227867078, "grad_norm": 0.71875, "learning_rate": 4.487011220893408e-06, "loss": 3.9569, "step": 19287 }, { "epoch": 6.425085366869326, "grad_norm": 0.73046875, "learning_rate": 4.486269989195866e-06, "loss": 3.9626, "step": 19288 }, { "epoch": 6.4254185058715745, "grad_norm": 0.7890625, "learning_rate": 4.4855287926017465e-06, "loss": 3.9886, "step": 19289 }, { "epoch": 6.425751644873824, "grad_norm": 0.8359375, "learning_rate": 4.484787631119691e-06, "loss": 3.9396, "step": 19290 }, { "epoch": 6.426084783876072, "grad_norm": 0.7734375, "learning_rate": 4.4840465047583265e-06, "loss": 3.9804, "step": 19291 }, { "epoch": 6.426417922878321, "grad_norm": 0.8046875, "learning_rate": 4.483305413526289e-06, "loss": 3.9459, "step": 19292 }, { "epoch": 6.426751061880569, "grad_norm": 0.765625, "learning_rate": 4.482564357432208e-06, "loss": 3.9464, "step": 19293 }, { "epoch": 6.427084200882819, "grad_norm": 0.79296875, "learning_rate": 4.481823336484719e-06, "loss": 3.9936, "step": 19294 }, { "epoch": 6.427417339885067, "grad_norm": 0.796875, "learning_rate": 4.481082350692449e-06, "loss": 4.0205, "step": 19295 }, { "epoch": 6.427750478887316, "grad_norm": 0.74609375, "learning_rate": 4.48034140006403e-06, "loss": 3.9923, "step": 19296 }, { "epoch": 6.428083617889564, "grad_norm": 0.78125, "learning_rate": 4.479600484608094e-06, "loss": 3.9837, "step": 19297 }, { "epoch": 6.428416756891814, "grad_norm": 0.83203125, "learning_rate": 4.478859604333269e-06, "loss": 3.9531, "step": 19298 }, { "epoch": 6.428749895894062, "grad_norm": 0.76171875, "learning_rate": 4.478118759248187e-06, "loss": 4.0089, "step": 19299 }, { "epoch": 6.42908303489631, "grad_norm": 0.75, "learning_rate": 4.477377949361475e-06, "loss": 3.98, "step": 19300 }, { "epoch": 6.429416173898559, "grad_norm": 0.74609375, "learning_rate": 4.476637174681764e-06, "loss": 4.0168, "step": 19301 }, { "epoch": 6.429749312900808, "grad_norm": 0.77734375, "learning_rate": 4.47589643521768e-06, "loss": 3.9147, "step": 19302 }, { "epoch": 6.430082451903057, "grad_norm": 0.72265625, "learning_rate": 4.475155730977855e-06, "loss": 4.0137, "step": 19303 }, { "epoch": 6.430415590905305, "grad_norm": 0.796875, "learning_rate": 4.474415061970913e-06, "loss": 4.0236, "step": 19304 }, { "epoch": 6.430748729907554, "grad_norm": 0.734375, "learning_rate": 4.473674428205482e-06, "loss": 3.9632, "step": 19305 }, { "epoch": 6.431081868909803, "grad_norm": 0.7734375, "learning_rate": 4.472933829690188e-06, "loss": 3.9819, "step": 19306 }, { "epoch": 6.431415007912051, "grad_norm": 0.76171875, "learning_rate": 4.472193266433657e-06, "loss": 3.9446, "step": 19307 }, { "epoch": 6.4317481469143, "grad_norm": 0.77734375, "learning_rate": 4.471452738444519e-06, "loss": 3.9684, "step": 19308 }, { "epoch": 6.432081285916548, "grad_norm": 0.73046875, "learning_rate": 4.470712245731394e-06, "loss": 3.966, "step": 19309 }, { "epoch": 6.432414424918798, "grad_norm": 0.74609375, "learning_rate": 4.469971788302913e-06, "loss": 3.9718, "step": 19310 }, { "epoch": 6.432747563921046, "grad_norm": 0.73828125, "learning_rate": 4.469231366167695e-06, "loss": 3.9705, "step": 19311 }, { "epoch": 6.433080702923295, "grad_norm": 0.7578125, "learning_rate": 4.46849097933437e-06, "loss": 3.9956, "step": 19312 }, { "epoch": 6.433413841925543, "grad_norm": 0.7578125, "learning_rate": 4.467750627811557e-06, "loss": 4.0122, "step": 19313 }, { "epoch": 6.4337469809277925, "grad_norm": 0.82421875, "learning_rate": 4.46701031160788e-06, "loss": 3.8895, "step": 19314 }, { "epoch": 6.434080119930041, "grad_norm": 0.75390625, "learning_rate": 4.466270030731966e-06, "loss": 3.9231, "step": 19315 }, { "epoch": 6.434413258932289, "grad_norm": 0.77734375, "learning_rate": 4.465529785192432e-06, "loss": 3.9721, "step": 19316 }, { "epoch": 6.434746397934538, "grad_norm": 0.7734375, "learning_rate": 4.464789574997907e-06, "loss": 3.9775, "step": 19317 }, { "epoch": 6.435079536936787, "grad_norm": 0.73046875, "learning_rate": 4.464049400157006e-06, "loss": 4.0204, "step": 19318 }, { "epoch": 6.435412675939036, "grad_norm": 0.7890625, "learning_rate": 4.463309260678355e-06, "loss": 3.9166, "step": 19319 }, { "epoch": 6.435745814941284, "grad_norm": 0.796875, "learning_rate": 4.462569156570573e-06, "loss": 3.9774, "step": 19320 }, { "epoch": 6.436078953943533, "grad_norm": 0.7734375, "learning_rate": 4.461829087842285e-06, "loss": 3.9969, "step": 19321 }, { "epoch": 6.4364120929457815, "grad_norm": 0.75, "learning_rate": 4.461089054502103e-06, "loss": 3.9853, "step": 19322 }, { "epoch": 6.436745231948031, "grad_norm": 0.76171875, "learning_rate": 4.460349056558654e-06, "loss": 4.0519, "step": 19323 }, { "epoch": 6.437078370950279, "grad_norm": 0.77734375, "learning_rate": 4.459609094020552e-06, "loss": 3.9644, "step": 19324 }, { "epoch": 6.437411509952527, "grad_norm": 0.7578125, "learning_rate": 4.45886916689642e-06, "loss": 3.9579, "step": 19325 }, { "epoch": 6.4377446489547765, "grad_norm": 0.74609375, "learning_rate": 4.4581292751948745e-06, "loss": 4.0458, "step": 19326 }, { "epoch": 6.438077787957025, "grad_norm": 0.796875, "learning_rate": 4.457389418924535e-06, "loss": 4.0736, "step": 19327 }, { "epoch": 6.438410926959274, "grad_norm": 0.7578125, "learning_rate": 4.456649598094018e-06, "loss": 3.9761, "step": 19328 }, { "epoch": 6.438744065961522, "grad_norm": 0.75390625, "learning_rate": 4.455909812711941e-06, "loss": 4.1053, "step": 19329 }, { "epoch": 6.4390772049637715, "grad_norm": 0.81640625, "learning_rate": 4.455170062786923e-06, "loss": 3.8832, "step": 19330 }, { "epoch": 6.43941034396602, "grad_norm": 0.74609375, "learning_rate": 4.454430348327578e-06, "loss": 3.9915, "step": 19331 }, { "epoch": 6.439743482968268, "grad_norm": 0.74609375, "learning_rate": 4.4536906693425225e-06, "loss": 4.0785, "step": 19332 }, { "epoch": 6.440076621970517, "grad_norm": 0.74609375, "learning_rate": 4.45295102584037e-06, "loss": 3.9532, "step": 19333 }, { "epoch": 6.4404097609727655, "grad_norm": 0.796875, "learning_rate": 4.4522114178297405e-06, "loss": 3.9931, "step": 19334 }, { "epoch": 6.440742899975015, "grad_norm": 0.8046875, "learning_rate": 4.451471845319246e-06, "loss": 4.021, "step": 19335 }, { "epoch": 6.441076038977263, "grad_norm": 0.76953125, "learning_rate": 4.450732308317502e-06, "loss": 3.9059, "step": 19336 }, { "epoch": 6.441409177979512, "grad_norm": 0.7421875, "learning_rate": 4.4499928068331206e-06, "loss": 3.9449, "step": 19337 }, { "epoch": 6.4417423169817605, "grad_norm": 0.78125, "learning_rate": 4.449253340874718e-06, "loss": 3.9789, "step": 19338 }, { "epoch": 6.44207545598401, "grad_norm": 0.75390625, "learning_rate": 4.4485139104509075e-06, "loss": 3.9428, "step": 19339 }, { "epoch": 6.442408594986258, "grad_norm": 0.76953125, "learning_rate": 4.447774515570297e-06, "loss": 4.0191, "step": 19340 }, { "epoch": 6.442741733988507, "grad_norm": 0.796875, "learning_rate": 4.447035156241504e-06, "loss": 3.9349, "step": 19341 }, { "epoch": 6.443074872990755, "grad_norm": 0.80078125, "learning_rate": 4.4462958324731384e-06, "loss": 3.9439, "step": 19342 }, { "epoch": 6.443408011993004, "grad_norm": 0.78515625, "learning_rate": 4.445556544273813e-06, "loss": 4.0815, "step": 19343 }, { "epoch": 6.443741150995253, "grad_norm": 0.7578125, "learning_rate": 4.444817291652136e-06, "loss": 3.9076, "step": 19344 }, { "epoch": 6.444074289997501, "grad_norm": 0.76953125, "learning_rate": 4.444078074616722e-06, "loss": 3.9624, "step": 19345 }, { "epoch": 6.44440742899975, "grad_norm": 0.734375, "learning_rate": 4.443338893176177e-06, "loss": 3.9936, "step": 19346 }, { "epoch": 6.444740568001999, "grad_norm": 0.73828125, "learning_rate": 4.442599747339115e-06, "loss": 4.0236, "step": 19347 }, { "epoch": 6.445073707004248, "grad_norm": 0.7578125, "learning_rate": 4.441860637114145e-06, "loss": 4.0137, "step": 19348 }, { "epoch": 6.445406846006496, "grad_norm": 0.76171875, "learning_rate": 4.441121562509872e-06, "loss": 4.0435, "step": 19349 }, { "epoch": 6.4457399850087445, "grad_norm": 0.796875, "learning_rate": 4.44038252353491e-06, "loss": 3.9396, "step": 19350 }, { "epoch": 6.446073124010994, "grad_norm": 0.7578125, "learning_rate": 4.439643520197861e-06, "loss": 4.0291, "step": 19351 }, { "epoch": 6.446406263013242, "grad_norm": 0.7265625, "learning_rate": 4.4389045525073375e-06, "loss": 3.9431, "step": 19352 }, { "epoch": 6.446739402015491, "grad_norm": 0.80859375, "learning_rate": 4.438165620471946e-06, "loss": 3.9579, "step": 19353 }, { "epoch": 6.447072541017739, "grad_norm": 0.8125, "learning_rate": 4.437426724100294e-06, "loss": 3.9975, "step": 19354 }, { "epoch": 6.447405680019989, "grad_norm": 0.765625, "learning_rate": 4.436687863400985e-06, "loss": 4.0122, "step": 19355 }, { "epoch": 6.447738819022237, "grad_norm": 0.78515625, "learning_rate": 4.435949038382629e-06, "loss": 4.0186, "step": 19356 }, { "epoch": 6.448071958024486, "grad_norm": 0.8046875, "learning_rate": 4.435210249053831e-06, "loss": 3.9642, "step": 19357 }, { "epoch": 6.448405097026734, "grad_norm": 0.8046875, "learning_rate": 4.434471495423194e-06, "loss": 3.9814, "step": 19358 }, { "epoch": 6.4487382360289836, "grad_norm": 0.7734375, "learning_rate": 4.4337327774993244e-06, "loss": 4.0069, "step": 19359 }, { "epoch": 6.449071375031232, "grad_norm": 0.7265625, "learning_rate": 4.4329940952908246e-06, "loss": 3.9678, "step": 19360 }, { "epoch": 6.44940451403348, "grad_norm": 0.75, "learning_rate": 4.432255448806303e-06, "loss": 3.9807, "step": 19361 }, { "epoch": 6.449737653035729, "grad_norm": 0.73828125, "learning_rate": 4.431516838054359e-06, "loss": 3.962, "step": 19362 }, { "epoch": 6.450070792037978, "grad_norm": 0.7734375, "learning_rate": 4.430778263043599e-06, "loss": 4.0493, "step": 19363 }, { "epoch": 6.450403931040227, "grad_norm": 0.76171875, "learning_rate": 4.430039723782622e-06, "loss": 3.9641, "step": 19364 }, { "epoch": 6.450737070042475, "grad_norm": 0.75, "learning_rate": 4.429301220280034e-06, "loss": 4.0162, "step": 19365 }, { "epoch": 6.451070209044724, "grad_norm": 0.76953125, "learning_rate": 4.428562752544438e-06, "loss": 3.9427, "step": 19366 }, { "epoch": 6.451403348046973, "grad_norm": 0.78515625, "learning_rate": 4.427824320584432e-06, "loss": 3.9627, "step": 19367 }, { "epoch": 6.451736487049221, "grad_norm": 0.79296875, "learning_rate": 4.427085924408616e-06, "loss": 3.9733, "step": 19368 }, { "epoch": 6.45206962605147, "grad_norm": 0.734375, "learning_rate": 4.426347564025596e-06, "loss": 4.012, "step": 19369 }, { "epoch": 6.452402765053718, "grad_norm": 0.8125, "learning_rate": 4.425609239443966e-06, "loss": 4.0443, "step": 19370 }, { "epoch": 6.4527359040559675, "grad_norm": 0.765625, "learning_rate": 4.424870950672332e-06, "loss": 4.0403, "step": 19371 }, { "epoch": 6.453069043058216, "grad_norm": 0.73046875, "learning_rate": 4.4241326977192886e-06, "loss": 3.9387, "step": 19372 }, { "epoch": 6.453402182060465, "grad_norm": 0.80859375, "learning_rate": 4.423394480593438e-06, "loss": 3.9859, "step": 19373 }, { "epoch": 6.453735321062713, "grad_norm": 0.71484375, "learning_rate": 4.4226562993033776e-06, "loss": 4.0082, "step": 19374 }, { "epoch": 6.4540684600649625, "grad_norm": 0.83984375, "learning_rate": 4.421918153857707e-06, "loss": 3.9462, "step": 19375 }, { "epoch": 6.454401599067211, "grad_norm": 0.79296875, "learning_rate": 4.421180044265023e-06, "loss": 3.9766, "step": 19376 }, { "epoch": 6.454734738069459, "grad_norm": 0.75, "learning_rate": 4.420441970533921e-06, "loss": 3.9817, "step": 19377 }, { "epoch": 6.455067877071708, "grad_norm": 0.7578125, "learning_rate": 4.4197039326729995e-06, "loss": 3.9678, "step": 19378 }, { "epoch": 6.4554010160739566, "grad_norm": 0.78125, "learning_rate": 4.4189659306908556e-06, "loss": 4.0193, "step": 19379 }, { "epoch": 6.455734155076206, "grad_norm": 0.81640625, "learning_rate": 4.4182279645960864e-06, "loss": 3.9499, "step": 19380 }, { "epoch": 6.456067294078454, "grad_norm": 0.70703125, "learning_rate": 4.417490034397285e-06, "loss": 4.0001, "step": 19381 }, { "epoch": 6.456400433080703, "grad_norm": 0.7421875, "learning_rate": 4.416752140103049e-06, "loss": 3.9642, "step": 19382 }, { "epoch": 6.4567335720829515, "grad_norm": 0.7578125, "learning_rate": 4.41601428172197e-06, "loss": 4.0497, "step": 19383 }, { "epoch": 6.457066711085201, "grad_norm": 0.75390625, "learning_rate": 4.415276459262649e-06, "loss": 3.9245, "step": 19384 }, { "epoch": 6.457399850087449, "grad_norm": 0.76171875, "learning_rate": 4.414538672733674e-06, "loss": 3.9887, "step": 19385 }, { "epoch": 6.457732989089697, "grad_norm": 0.765625, "learning_rate": 4.413800922143639e-06, "loss": 3.9652, "step": 19386 }, { "epoch": 6.4580661280919465, "grad_norm": 0.796875, "learning_rate": 4.41306320750114e-06, "loss": 3.9514, "step": 19387 }, { "epoch": 6.458399267094195, "grad_norm": 0.7421875, "learning_rate": 4.4123255288147664e-06, "loss": 3.9511, "step": 19388 }, { "epoch": 6.458732406096444, "grad_norm": 0.7578125, "learning_rate": 4.411587886093113e-06, "loss": 3.9938, "step": 19389 }, { "epoch": 6.459065545098692, "grad_norm": 0.7421875, "learning_rate": 4.410850279344772e-06, "loss": 4.0331, "step": 19390 }, { "epoch": 6.459398684100941, "grad_norm": 0.72265625, "learning_rate": 4.410112708578334e-06, "loss": 3.9458, "step": 19391 }, { "epoch": 6.45973182310319, "grad_norm": 0.71484375, "learning_rate": 4.409375173802389e-06, "loss": 3.9449, "step": 19392 }, { "epoch": 6.460064962105439, "grad_norm": 0.7890625, "learning_rate": 4.408637675025532e-06, "loss": 3.9815, "step": 19393 }, { "epoch": 6.460398101107687, "grad_norm": 0.8125, "learning_rate": 4.4079002122563495e-06, "loss": 3.9885, "step": 19394 }, { "epoch": 6.4607312401099355, "grad_norm": 0.79296875, "learning_rate": 4.40716278550343e-06, "loss": 3.9403, "step": 19395 }, { "epoch": 6.461064379112185, "grad_norm": 0.76953125, "learning_rate": 4.406425394775367e-06, "loss": 3.9385, "step": 19396 }, { "epoch": 6.461397518114433, "grad_norm": 0.734375, "learning_rate": 4.405688040080745e-06, "loss": 3.9943, "step": 19397 }, { "epoch": 6.461730657116682, "grad_norm": 0.76171875, "learning_rate": 4.404950721428156e-06, "loss": 3.8888, "step": 19398 }, { "epoch": 6.4620637961189304, "grad_norm": 0.76171875, "learning_rate": 4.404213438826188e-06, "loss": 4.0022, "step": 19399 }, { "epoch": 6.46239693512118, "grad_norm": 0.75390625, "learning_rate": 4.4034761922834285e-06, "loss": 4.0577, "step": 19400 }, { "epoch": 6.462730074123428, "grad_norm": 0.796875, "learning_rate": 4.4027389818084625e-06, "loss": 4.0297, "step": 19401 }, { "epoch": 6.463063213125677, "grad_norm": 0.765625, "learning_rate": 4.402001807409882e-06, "loss": 3.9568, "step": 19402 }, { "epoch": 6.463396352127925, "grad_norm": 0.77734375, "learning_rate": 4.401264669096268e-06, "loss": 3.9388, "step": 19403 }, { "epoch": 6.463729491130174, "grad_norm": 0.80078125, "learning_rate": 4.400527566876207e-06, "loss": 3.9723, "step": 19404 }, { "epoch": 6.464062630132423, "grad_norm": 0.79296875, "learning_rate": 4.399790500758289e-06, "loss": 3.902, "step": 19405 }, { "epoch": 6.464395769134671, "grad_norm": 0.80078125, "learning_rate": 4.3990534707510945e-06, "loss": 4.0137, "step": 19406 }, { "epoch": 6.46472890813692, "grad_norm": 0.8359375, "learning_rate": 4.398316476863212e-06, "loss": 3.9225, "step": 19407 }, { "epoch": 6.465062047139169, "grad_norm": 0.71875, "learning_rate": 4.397579519103223e-06, "loss": 3.9195, "step": 19408 }, { "epoch": 6.465395186141418, "grad_norm": 0.74609375, "learning_rate": 4.396842597479715e-06, "loss": 4.0079, "step": 19409 }, { "epoch": 6.465728325143666, "grad_norm": 0.73828125, "learning_rate": 4.396105712001267e-06, "loss": 3.9835, "step": 19410 }, { "epoch": 6.466061464145914, "grad_norm": 0.77734375, "learning_rate": 4.395368862676469e-06, "loss": 3.9418, "step": 19411 }, { "epoch": 6.466394603148164, "grad_norm": 0.7265625, "learning_rate": 4.394632049513894e-06, "loss": 4.0249, "step": 19412 }, { "epoch": 6.466727742150412, "grad_norm": 0.734375, "learning_rate": 4.393895272522131e-06, "loss": 3.912, "step": 19413 }, { "epoch": 6.467060881152661, "grad_norm": 0.7734375, "learning_rate": 4.393158531709759e-06, "loss": 3.9206, "step": 19414 }, { "epoch": 6.467394020154909, "grad_norm": 0.8046875, "learning_rate": 4.3924218270853624e-06, "loss": 3.9679, "step": 19415 }, { "epoch": 6.467727159157159, "grad_norm": 0.81640625, "learning_rate": 4.391685158657519e-06, "loss": 4.0667, "step": 19416 }, { "epoch": 6.468060298159407, "grad_norm": 0.86328125, "learning_rate": 4.390948526434812e-06, "loss": 4.0385, "step": 19417 }, { "epoch": 6.468393437161656, "grad_norm": 0.796875, "learning_rate": 4.390211930425821e-06, "loss": 3.8821, "step": 19418 }, { "epoch": 6.468726576163904, "grad_norm": 0.73046875, "learning_rate": 4.3894753706391265e-06, "loss": 4.0274, "step": 19419 }, { "epoch": 6.4690597151661535, "grad_norm": 0.76171875, "learning_rate": 4.388738847083305e-06, "loss": 3.9434, "step": 19420 }, { "epoch": 6.469392854168402, "grad_norm": 0.7734375, "learning_rate": 4.388002359766935e-06, "loss": 4.0158, "step": 19421 }, { "epoch": 6.46972599317065, "grad_norm": 0.8125, "learning_rate": 4.3872659086986e-06, "loss": 4.0331, "step": 19422 }, { "epoch": 6.470059132172899, "grad_norm": 0.80859375, "learning_rate": 4.386529493886873e-06, "loss": 3.9723, "step": 19423 }, { "epoch": 6.470392271175148, "grad_norm": 0.7578125, "learning_rate": 4.385793115340334e-06, "loss": 3.9832, "step": 19424 }, { "epoch": 6.470725410177397, "grad_norm": 0.76953125, "learning_rate": 4.385056773067559e-06, "loss": 3.9828, "step": 19425 }, { "epoch": 6.471058549179645, "grad_norm": 0.75, "learning_rate": 4.384320467077127e-06, "loss": 4.0018, "step": 19426 }, { "epoch": 6.471391688181894, "grad_norm": 0.8125, "learning_rate": 4.383584197377612e-06, "loss": 3.9742, "step": 19427 }, { "epoch": 6.4717248271841425, "grad_norm": 0.7578125, "learning_rate": 4.382847963977594e-06, "loss": 3.9987, "step": 19428 }, { "epoch": 6.472057966186391, "grad_norm": 0.76953125, "learning_rate": 4.382111766885644e-06, "loss": 4.0045, "step": 19429 }, { "epoch": 6.47239110518864, "grad_norm": 0.73828125, "learning_rate": 4.381375606110336e-06, "loss": 4.061, "step": 19430 }, { "epoch": 6.472724244190888, "grad_norm": 0.76953125, "learning_rate": 4.380639481660249e-06, "loss": 3.9932, "step": 19431 }, { "epoch": 6.4730573831931375, "grad_norm": 0.765625, "learning_rate": 4.3799033935439545e-06, "loss": 3.9951, "step": 19432 }, { "epoch": 6.473390522195386, "grad_norm": 0.75390625, "learning_rate": 4.3791673417700295e-06, "loss": 3.9415, "step": 19433 }, { "epoch": 6.473723661197635, "grad_norm": 0.73828125, "learning_rate": 4.378431326347044e-06, "loss": 3.9296, "step": 19434 }, { "epoch": 6.474056800199883, "grad_norm": 0.796875, "learning_rate": 4.377695347283572e-06, "loss": 3.973, "step": 19435 }, { "epoch": 6.4743899392021325, "grad_norm": 0.78125, "learning_rate": 4.376959404588188e-06, "loss": 3.978, "step": 19436 }, { "epoch": 6.474723078204381, "grad_norm": 0.734375, "learning_rate": 4.37622349826946e-06, "loss": 4.0257, "step": 19437 }, { "epoch": 6.475056217206629, "grad_norm": 0.79296875, "learning_rate": 4.375487628335964e-06, "loss": 3.9926, "step": 19438 }, { "epoch": 6.475389356208878, "grad_norm": 0.78125, "learning_rate": 4.37475179479627e-06, "loss": 3.9873, "step": 19439 }, { "epoch": 6.4757224952111265, "grad_norm": 0.7890625, "learning_rate": 4.374015997658944e-06, "loss": 3.9471, "step": 19440 }, { "epoch": 6.476055634213376, "grad_norm": 0.76953125, "learning_rate": 4.373280236932564e-06, "loss": 4.007, "step": 19441 }, { "epoch": 6.476388773215624, "grad_norm": 0.734375, "learning_rate": 4.372544512625697e-06, "loss": 3.9796, "step": 19442 }, { "epoch": 6.476721912217873, "grad_norm": 0.7578125, "learning_rate": 4.371808824746912e-06, "loss": 3.9559, "step": 19443 }, { "epoch": 6.4770550512201215, "grad_norm": 0.765625, "learning_rate": 4.371073173304776e-06, "loss": 3.9861, "step": 19444 }, { "epoch": 6.477388190222371, "grad_norm": 0.7265625, "learning_rate": 4.370337558307863e-06, "loss": 3.9802, "step": 19445 }, { "epoch": 6.477721329224619, "grad_norm": 0.75, "learning_rate": 4.369601979764742e-06, "loss": 3.973, "step": 19446 }, { "epoch": 6.478054468226867, "grad_norm": 0.74609375, "learning_rate": 4.368866437683971e-06, "loss": 3.9553, "step": 19447 }, { "epoch": 6.478387607229116, "grad_norm": 0.76171875, "learning_rate": 4.368130932074128e-06, "loss": 3.9857, "step": 19448 }, { "epoch": 6.478720746231365, "grad_norm": 0.77734375, "learning_rate": 4.367395462943776e-06, "loss": 4.0648, "step": 19449 }, { "epoch": 6.479053885233614, "grad_norm": 0.78125, "learning_rate": 4.366660030301481e-06, "loss": 4.0551, "step": 19450 }, { "epoch": 6.479387024235862, "grad_norm": 0.796875, "learning_rate": 4.365924634155806e-06, "loss": 3.975, "step": 19451 }, { "epoch": 6.479720163238111, "grad_norm": 0.78125, "learning_rate": 4.365189274515325e-06, "loss": 4.0136, "step": 19452 }, { "epoch": 6.48005330224036, "grad_norm": 0.78515625, "learning_rate": 4.364453951388598e-06, "loss": 4.0235, "step": 19453 }, { "epoch": 6.480386441242609, "grad_norm": 0.7578125, "learning_rate": 4.363718664784191e-06, "loss": 3.941, "step": 19454 }, { "epoch": 6.480719580244857, "grad_norm": 0.75390625, "learning_rate": 4.362983414710669e-06, "loss": 4.0201, "step": 19455 }, { "epoch": 6.4810527192471055, "grad_norm": 0.7265625, "learning_rate": 4.362248201176595e-06, "loss": 3.9786, "step": 19456 }, { "epoch": 6.481385858249355, "grad_norm": 0.734375, "learning_rate": 4.361513024190534e-06, "loss": 4.0088, "step": 19457 }, { "epoch": 6.481718997251603, "grad_norm": 0.7578125, "learning_rate": 4.360777883761044e-06, "loss": 3.9497, "step": 19458 }, { "epoch": 6.482052136253852, "grad_norm": 0.80859375, "learning_rate": 4.360042779896695e-06, "loss": 3.9798, "step": 19459 }, { "epoch": 6.4823852752561, "grad_norm": 0.796875, "learning_rate": 4.359307712606047e-06, "loss": 3.9969, "step": 19460 }, { "epoch": 6.48271841425835, "grad_norm": 0.76171875, "learning_rate": 4.358572681897662e-06, "loss": 4.0427, "step": 19461 }, { "epoch": 6.483051553260598, "grad_norm": 0.7890625, "learning_rate": 4.3578376877801004e-06, "loss": 4.0239, "step": 19462 }, { "epoch": 6.483384692262847, "grad_norm": 0.77734375, "learning_rate": 4.357102730261921e-06, "loss": 3.9889, "step": 19463 }, { "epoch": 6.483717831265095, "grad_norm": 0.78515625, "learning_rate": 4.356367809351693e-06, "loss": 4.0326, "step": 19464 }, { "epoch": 6.484050970267344, "grad_norm": 0.765625, "learning_rate": 4.355632925057969e-06, "loss": 3.9438, "step": 19465 }, { "epoch": 6.484384109269593, "grad_norm": 0.796875, "learning_rate": 4.354898077389307e-06, "loss": 4.0323, "step": 19466 }, { "epoch": 6.484717248271841, "grad_norm": 0.828125, "learning_rate": 4.3541632663542725e-06, "loss": 3.9541, "step": 19467 }, { "epoch": 6.48505038727409, "grad_norm": 0.7890625, "learning_rate": 4.353428491961422e-06, "loss": 3.9903, "step": 19468 }, { "epoch": 6.485383526276339, "grad_norm": 0.75, "learning_rate": 4.3526937542193145e-06, "loss": 4.0547, "step": 19469 }, { "epoch": 6.485716665278588, "grad_norm": 0.73828125, "learning_rate": 4.351959053136506e-06, "loss": 4.0192, "step": 19470 }, { "epoch": 6.486049804280836, "grad_norm": 0.796875, "learning_rate": 4.351224388721558e-06, "loss": 3.9492, "step": 19471 }, { "epoch": 6.486382943283084, "grad_norm": 0.78125, "learning_rate": 4.350489760983024e-06, "loss": 4.0193, "step": 19472 }, { "epoch": 6.486716082285334, "grad_norm": 0.7421875, "learning_rate": 4.349755169929464e-06, "loss": 3.9353, "step": 19473 }, { "epoch": 6.487049221287582, "grad_norm": 0.80078125, "learning_rate": 4.349020615569433e-06, "loss": 4.0418, "step": 19474 }, { "epoch": 6.487382360289831, "grad_norm": 0.7890625, "learning_rate": 4.3482860979114864e-06, "loss": 3.9744, "step": 19475 }, { "epoch": 6.487715499292079, "grad_norm": 0.79296875, "learning_rate": 4.3475516169641805e-06, "loss": 4.0, "step": 19476 }, { "epoch": 6.4880486382943285, "grad_norm": 0.77734375, "learning_rate": 4.346817172736066e-06, "loss": 4.0469, "step": 19477 }, { "epoch": 6.488381777296577, "grad_norm": 0.79296875, "learning_rate": 4.346082765235706e-06, "loss": 4.0165, "step": 19478 }, { "epoch": 6.488714916298826, "grad_norm": 0.7890625, "learning_rate": 4.34534839447165e-06, "loss": 3.9687, "step": 19479 }, { "epoch": 6.489048055301074, "grad_norm": 0.80078125, "learning_rate": 4.3446140604524524e-06, "loss": 3.9704, "step": 19480 }, { "epoch": 6.4893811943033235, "grad_norm": 0.79296875, "learning_rate": 4.343879763186663e-06, "loss": 3.9641, "step": 19481 }, { "epoch": 6.489714333305572, "grad_norm": 0.84765625, "learning_rate": 4.343145502682844e-06, "loss": 3.995, "step": 19482 }, { "epoch": 6.49004747230782, "grad_norm": 0.73828125, "learning_rate": 4.34241127894954e-06, "loss": 3.941, "step": 19483 }, { "epoch": 6.490380611310069, "grad_norm": 0.8046875, "learning_rate": 4.341677091995301e-06, "loss": 4.0225, "step": 19484 }, { "epoch": 6.490713750312318, "grad_norm": 0.75, "learning_rate": 4.340942941828687e-06, "loss": 3.9874, "step": 19485 }, { "epoch": 6.491046889314567, "grad_norm": 0.75, "learning_rate": 4.3402088284582455e-06, "loss": 4.0825, "step": 19486 }, { "epoch": 6.491380028316815, "grad_norm": 0.7734375, "learning_rate": 4.339474751892527e-06, "loss": 3.968, "step": 19487 }, { "epoch": 6.491713167319064, "grad_norm": 0.74609375, "learning_rate": 4.338740712140078e-06, "loss": 3.9861, "step": 19488 }, { "epoch": 6.4920463063213125, "grad_norm": 0.76171875, "learning_rate": 4.338006709209456e-06, "loss": 4.0818, "step": 19489 }, { "epoch": 6.492379445323561, "grad_norm": 0.7578125, "learning_rate": 4.337272743109207e-06, "loss": 3.9809, "step": 19490 }, { "epoch": 6.49271258432581, "grad_norm": 0.72265625, "learning_rate": 4.33653881384788e-06, "loss": 3.9669, "step": 19491 }, { "epoch": 6.493045723328058, "grad_norm": 0.8203125, "learning_rate": 4.335804921434024e-06, "loss": 3.9234, "step": 19492 }, { "epoch": 6.4933788623303075, "grad_norm": 0.8046875, "learning_rate": 4.335071065876187e-06, "loss": 4.041, "step": 19493 }, { "epoch": 6.493712001332556, "grad_norm": 0.80078125, "learning_rate": 4.334337247182916e-06, "loss": 4.0059, "step": 19494 }, { "epoch": 6.494045140334805, "grad_norm": 0.7734375, "learning_rate": 4.333603465362758e-06, "loss": 3.9408, "step": 19495 }, { "epoch": 6.494378279337053, "grad_norm": 0.77734375, "learning_rate": 4.332869720424263e-06, "loss": 3.9704, "step": 19496 }, { "epoch": 6.494711418339302, "grad_norm": 0.76953125, "learning_rate": 4.332136012375975e-06, "loss": 3.9699, "step": 19497 }, { "epoch": 6.495044557341551, "grad_norm": 0.8125, "learning_rate": 4.331402341226442e-06, "loss": 3.985, "step": 19498 }, { "epoch": 6.4953776963438, "grad_norm": 0.78125, "learning_rate": 4.330668706984205e-06, "loss": 3.9876, "step": 19499 }, { "epoch": 6.495710835346048, "grad_norm": 0.80859375, "learning_rate": 4.32993510965782e-06, "loss": 4.0019, "step": 19500 }, { "epoch": 6.4960439743482965, "grad_norm": 0.77734375, "learning_rate": 4.329201549255821e-06, "loss": 4.0208, "step": 19501 }, { "epoch": 6.496377113350546, "grad_norm": 0.75390625, "learning_rate": 4.328468025786753e-06, "loss": 4.0131, "step": 19502 }, { "epoch": 6.496710252352794, "grad_norm": 0.765625, "learning_rate": 4.327734539259165e-06, "loss": 3.9879, "step": 19503 }, { "epoch": 6.497043391355043, "grad_norm": 0.78125, "learning_rate": 4.3270010896816e-06, "loss": 3.9725, "step": 19504 }, { "epoch": 6.4973765303572915, "grad_norm": 0.75, "learning_rate": 4.326267677062598e-06, "loss": 3.9847, "step": 19505 }, { "epoch": 6.497709669359541, "grad_norm": 0.765625, "learning_rate": 4.325534301410704e-06, "loss": 4.0011, "step": 19506 }, { "epoch": 6.498042808361789, "grad_norm": 0.79296875, "learning_rate": 4.324800962734458e-06, "loss": 4.0031, "step": 19507 }, { "epoch": 6.498375947364037, "grad_norm": 0.76953125, "learning_rate": 4.324067661042404e-06, "loss": 4.0586, "step": 19508 }, { "epoch": 6.498709086366286, "grad_norm": 0.7421875, "learning_rate": 4.323334396343087e-06, "loss": 3.9664, "step": 19509 }, { "epoch": 6.499042225368535, "grad_norm": 0.7890625, "learning_rate": 4.322601168645037e-06, "loss": 3.9901, "step": 19510 }, { "epoch": 6.499375364370784, "grad_norm": 0.73828125, "learning_rate": 4.3218679779568045e-06, "loss": 3.9975, "step": 19511 }, { "epoch": 6.499708503373032, "grad_norm": 0.7421875, "learning_rate": 4.321134824286926e-06, "loss": 3.9399, "step": 19512 }, { "epoch": 6.500041642375281, "grad_norm": 0.76171875, "learning_rate": 4.320401707643941e-06, "loss": 3.9542, "step": 19513 }, { "epoch": 6.50037478137753, "grad_norm": 0.73046875, "learning_rate": 4.3196686280363865e-06, "loss": 3.9911, "step": 19514 }, { "epoch": 6.500707920379779, "grad_norm": 0.74609375, "learning_rate": 4.318935585472808e-06, "loss": 3.9989, "step": 19515 }, { "epoch": 6.501041059382027, "grad_norm": 0.7578125, "learning_rate": 4.318202579961738e-06, "loss": 4.0272, "step": 19516 }, { "epoch": 6.501374198384276, "grad_norm": 0.78125, "learning_rate": 4.317469611511718e-06, "loss": 3.9968, "step": 19517 }, { "epoch": 6.501707337386525, "grad_norm": 0.7734375, "learning_rate": 4.316736680131282e-06, "loss": 3.9513, "step": 19518 }, { "epoch": 6.502040476388773, "grad_norm": 0.7734375, "learning_rate": 4.3160037858289695e-06, "loss": 3.9389, "step": 19519 }, { "epoch": 6.502373615391022, "grad_norm": 0.76953125, "learning_rate": 4.315270928613317e-06, "loss": 4.0472, "step": 19520 }, { "epoch": 6.50270675439327, "grad_norm": 0.8046875, "learning_rate": 4.314538108492856e-06, "loss": 4.0021, "step": 19521 }, { "epoch": 6.50303989339552, "grad_norm": 0.828125, "learning_rate": 4.31380532547613e-06, "loss": 4.069, "step": 19522 }, { "epoch": 6.503373032397768, "grad_norm": 0.78515625, "learning_rate": 4.31307257957167e-06, "loss": 4.0185, "step": 19523 }, { "epoch": 6.503706171400017, "grad_norm": 0.828125, "learning_rate": 4.312339870788011e-06, "loss": 4.0171, "step": 19524 }, { "epoch": 6.504039310402265, "grad_norm": 0.8046875, "learning_rate": 4.311607199133686e-06, "loss": 4.0541, "step": 19525 }, { "epoch": 6.504372449404514, "grad_norm": 0.7265625, "learning_rate": 4.310874564617234e-06, "loss": 4.003, "step": 19526 }, { "epoch": 6.504705588406763, "grad_norm": 0.7890625, "learning_rate": 4.3101419672471895e-06, "loss": 4.0019, "step": 19527 }, { "epoch": 6.505038727409011, "grad_norm": 0.765625, "learning_rate": 4.309409407032075e-06, "loss": 3.907, "step": 19528 }, { "epoch": 6.50537186641126, "grad_norm": 0.7890625, "learning_rate": 4.308676883980433e-06, "loss": 3.9782, "step": 19529 }, { "epoch": 6.505705005413509, "grad_norm": 0.73046875, "learning_rate": 4.307944398100794e-06, "loss": 4.057, "step": 19530 }, { "epoch": 6.506038144415758, "grad_norm": 0.76171875, "learning_rate": 4.307211949401688e-06, "loss": 4.0337, "step": 19531 }, { "epoch": 6.506371283418006, "grad_norm": 0.76171875, "learning_rate": 4.306479537891644e-06, "loss": 3.9996, "step": 19532 }, { "epoch": 6.506704422420254, "grad_norm": 0.76953125, "learning_rate": 4.305747163579199e-06, "loss": 3.9553, "step": 19533 }, { "epoch": 6.507037561422504, "grad_norm": 0.76171875, "learning_rate": 4.305014826472882e-06, "loss": 3.9746, "step": 19534 }, { "epoch": 6.507370700424752, "grad_norm": 0.7734375, "learning_rate": 4.30428252658122e-06, "loss": 3.9477, "step": 19535 }, { "epoch": 6.507703839427001, "grad_norm": 0.78125, "learning_rate": 4.303550263912746e-06, "loss": 3.9496, "step": 19536 }, { "epoch": 6.508036978429249, "grad_norm": 0.77734375, "learning_rate": 4.302818038475988e-06, "loss": 3.9405, "step": 19537 }, { "epoch": 6.5083701174314985, "grad_norm": 0.828125, "learning_rate": 4.302085850279474e-06, "loss": 3.9719, "step": 19538 }, { "epoch": 6.508703256433747, "grad_norm": 0.75, "learning_rate": 4.301353699331731e-06, "loss": 4.0109, "step": 19539 }, { "epoch": 6.509036395435996, "grad_norm": 0.75, "learning_rate": 4.300621585641291e-06, "loss": 3.9687, "step": 19540 }, { "epoch": 6.509369534438244, "grad_norm": 0.734375, "learning_rate": 4.299889509216681e-06, "loss": 4.0472, "step": 19541 }, { "epoch": 6.5097026734404935, "grad_norm": 0.8203125, "learning_rate": 4.299157470066426e-06, "loss": 4.006, "step": 19542 }, { "epoch": 6.510035812442742, "grad_norm": 0.734375, "learning_rate": 4.29842546819905e-06, "loss": 4.0501, "step": 19543 }, { "epoch": 6.51036895144499, "grad_norm": 0.78125, "learning_rate": 4.297693503623087e-06, "loss": 3.9647, "step": 19544 }, { "epoch": 6.510702090447239, "grad_norm": 0.81640625, "learning_rate": 4.296961576347062e-06, "loss": 3.9096, "step": 19545 }, { "epoch": 6.5110352294494875, "grad_norm": 0.76171875, "learning_rate": 4.296229686379489e-06, "loss": 4.0089, "step": 19546 }, { "epoch": 6.511368368451737, "grad_norm": 0.7578125, "learning_rate": 4.295497833728904e-06, "loss": 3.9406, "step": 19547 }, { "epoch": 6.511701507453985, "grad_norm": 0.80859375, "learning_rate": 4.294766018403829e-06, "loss": 3.9485, "step": 19548 }, { "epoch": 6.512034646456234, "grad_norm": 0.73046875, "learning_rate": 4.2940342404127875e-06, "loss": 4.0592, "step": 19549 }, { "epoch": 6.5123677854584825, "grad_norm": 0.7578125, "learning_rate": 4.2933024997643e-06, "loss": 4.0041, "step": 19550 }, { "epoch": 6.512700924460731, "grad_norm": 0.75390625, "learning_rate": 4.292570796466895e-06, "loss": 4.035, "step": 19551 }, { "epoch": 6.51303406346298, "grad_norm": 0.76953125, "learning_rate": 4.291839130529094e-06, "loss": 3.9808, "step": 19552 }, { "epoch": 6.513367202465228, "grad_norm": 0.7734375, "learning_rate": 4.291107501959418e-06, "loss": 4.0995, "step": 19553 }, { "epoch": 6.5137003414674775, "grad_norm": 0.74609375, "learning_rate": 4.2903759107663875e-06, "loss": 4.0441, "step": 19554 }, { "epoch": 6.514033480469726, "grad_norm": 0.78515625, "learning_rate": 4.289644356958528e-06, "loss": 3.982, "step": 19555 }, { "epoch": 6.514366619471975, "grad_norm": 0.7578125, "learning_rate": 4.2889128405443564e-06, "loss": 3.9693, "step": 19556 }, { "epoch": 6.514699758474223, "grad_norm": 0.82421875, "learning_rate": 4.288181361532395e-06, "loss": 3.961, "step": 19557 }, { "epoch": 6.515032897476472, "grad_norm": 0.75, "learning_rate": 4.287449919931162e-06, "loss": 4.0564, "step": 19558 }, { "epoch": 6.515366036478721, "grad_norm": 0.77734375, "learning_rate": 4.2867185157491815e-06, "loss": 3.9593, "step": 19559 }, { "epoch": 6.51569917548097, "grad_norm": 0.7890625, "learning_rate": 4.285987148994971e-06, "loss": 3.9845, "step": 19560 }, { "epoch": 6.516032314483218, "grad_norm": 0.78515625, "learning_rate": 4.285255819677048e-06, "loss": 3.9211, "step": 19561 }, { "epoch": 6.5163654534854665, "grad_norm": 0.78125, "learning_rate": 4.284524527803928e-06, "loss": 3.958, "step": 19562 }, { "epoch": 6.516698592487716, "grad_norm": 0.7421875, "learning_rate": 4.283793273384139e-06, "loss": 3.982, "step": 19563 }, { "epoch": 6.517031731489964, "grad_norm": 0.7734375, "learning_rate": 4.28306205642619e-06, "loss": 3.9913, "step": 19564 }, { "epoch": 6.517364870492213, "grad_norm": 0.828125, "learning_rate": 4.282330876938596e-06, "loss": 4.062, "step": 19565 }, { "epoch": 6.517698009494461, "grad_norm": 0.78125, "learning_rate": 4.281599734929881e-06, "loss": 3.9262, "step": 19566 }, { "epoch": 6.518031148496711, "grad_norm": 0.80859375, "learning_rate": 4.280868630408559e-06, "loss": 3.9644, "step": 19567 }, { "epoch": 6.518364287498959, "grad_norm": 0.80078125, "learning_rate": 4.280137563383144e-06, "loss": 4.0635, "step": 19568 }, { "epoch": 6.518697426501207, "grad_norm": 0.78125, "learning_rate": 4.279406533862149e-06, "loss": 4.0567, "step": 19569 }, { "epoch": 6.519030565503456, "grad_norm": 0.7421875, "learning_rate": 4.278675541854097e-06, "loss": 3.9998, "step": 19570 }, { "epoch": 6.519363704505705, "grad_norm": 0.8046875, "learning_rate": 4.277944587367495e-06, "loss": 3.9919, "step": 19571 }, { "epoch": 6.519696843507954, "grad_norm": 0.79296875, "learning_rate": 4.27721367041086e-06, "loss": 3.9369, "step": 19572 }, { "epoch": 6.520029982510202, "grad_norm": 0.7265625, "learning_rate": 4.276482790992707e-06, "loss": 4.0537, "step": 19573 }, { "epoch": 6.520363121512451, "grad_norm": 0.75390625, "learning_rate": 4.275751949121545e-06, "loss": 4.01, "step": 19574 }, { "epoch": 6.5206962605147, "grad_norm": 0.7890625, "learning_rate": 4.27502114480589e-06, "loss": 3.9804, "step": 19575 }, { "epoch": 6.521029399516949, "grad_norm": 0.79296875, "learning_rate": 4.274290378054251e-06, "loss": 3.9921, "step": 19576 }, { "epoch": 6.521362538519197, "grad_norm": 0.765625, "learning_rate": 4.273559648875145e-06, "loss": 3.976, "step": 19577 }, { "epoch": 6.521695677521446, "grad_norm": 0.828125, "learning_rate": 4.2728289572770805e-06, "loss": 3.9562, "step": 19578 }, { "epoch": 6.522028816523695, "grad_norm": 0.75, "learning_rate": 4.2720983032685675e-06, "loss": 4.0374, "step": 19579 }, { "epoch": 6.522361955525943, "grad_norm": 0.734375, "learning_rate": 4.2713676868581166e-06, "loss": 3.9115, "step": 19580 }, { "epoch": 6.522695094528192, "grad_norm": 0.80859375, "learning_rate": 4.270637108054245e-06, "loss": 3.976, "step": 19581 }, { "epoch": 6.52302823353044, "grad_norm": 0.74609375, "learning_rate": 4.2699065668654515e-06, "loss": 4.0258, "step": 19582 }, { "epoch": 6.5233613725326896, "grad_norm": 0.76171875, "learning_rate": 4.269176063300248e-06, "loss": 3.9651, "step": 19583 }, { "epoch": 6.523694511534938, "grad_norm": 0.75390625, "learning_rate": 4.268445597367148e-06, "loss": 4.0038, "step": 19584 }, { "epoch": 6.524027650537187, "grad_norm": 0.78515625, "learning_rate": 4.267715169074658e-06, "loss": 4.0349, "step": 19585 }, { "epoch": 6.524360789539435, "grad_norm": 0.79296875, "learning_rate": 4.266984778431284e-06, "loss": 4.0269, "step": 19586 }, { "epoch": 6.524693928541684, "grad_norm": 0.78125, "learning_rate": 4.266254425445533e-06, "loss": 3.9649, "step": 19587 }, { "epoch": 6.525027067543933, "grad_norm": 0.76171875, "learning_rate": 4.265524110125915e-06, "loss": 4.019, "step": 19588 }, { "epoch": 6.525360206546181, "grad_norm": 0.77734375, "learning_rate": 4.2647938324809376e-06, "loss": 3.9653, "step": 19589 }, { "epoch": 6.52569334554843, "grad_norm": 0.76171875, "learning_rate": 4.264063592519103e-06, "loss": 3.8594, "step": 19590 }, { "epoch": 6.526026484550679, "grad_norm": 0.8125, "learning_rate": 4.263333390248919e-06, "loss": 3.9882, "step": 19591 }, { "epoch": 6.526359623552928, "grad_norm": 0.75, "learning_rate": 4.26260322567889e-06, "loss": 4.0217, "step": 19592 }, { "epoch": 6.526692762555176, "grad_norm": 0.76171875, "learning_rate": 4.261873098817522e-06, "loss": 4.0696, "step": 19593 }, { "epoch": 6.527025901557424, "grad_norm": 0.77734375, "learning_rate": 4.261143009673315e-06, "loss": 3.9088, "step": 19594 }, { "epoch": 6.5273590405596735, "grad_norm": 0.76953125, "learning_rate": 4.260412958254779e-06, "loss": 4.0193, "step": 19595 }, { "epoch": 6.527692179561922, "grad_norm": 0.73828125, "learning_rate": 4.2596829445704166e-06, "loss": 3.9847, "step": 19596 }, { "epoch": 6.528025318564171, "grad_norm": 0.76171875, "learning_rate": 4.258952968628728e-06, "loss": 4.0243, "step": 19597 }, { "epoch": 6.528358457566419, "grad_norm": 0.80078125, "learning_rate": 4.258223030438215e-06, "loss": 3.8638, "step": 19598 }, { "epoch": 6.5286915965686685, "grad_norm": 0.703125, "learning_rate": 4.2574931300073876e-06, "loss": 4.0061, "step": 19599 }, { "epoch": 6.529024735570917, "grad_norm": 0.76171875, "learning_rate": 4.256763267344739e-06, "loss": 3.9618, "step": 19600 }, { "epoch": 6.529357874573166, "grad_norm": 0.7578125, "learning_rate": 4.256033442458774e-06, "loss": 3.9936, "step": 19601 }, { "epoch": 6.529691013575414, "grad_norm": 0.7421875, "learning_rate": 4.255303655357989e-06, "loss": 4.0545, "step": 19602 }, { "epoch": 6.530024152577663, "grad_norm": 0.8125, "learning_rate": 4.254573906050893e-06, "loss": 4.0007, "step": 19603 }, { "epoch": 6.530357291579912, "grad_norm": 0.8046875, "learning_rate": 4.253844194545981e-06, "loss": 4.0103, "step": 19604 }, { "epoch": 6.53069043058216, "grad_norm": 0.765625, "learning_rate": 4.253114520851751e-06, "loss": 3.9649, "step": 19605 }, { "epoch": 6.531023569584409, "grad_norm": 0.78125, "learning_rate": 4.2523848849767035e-06, "loss": 3.9108, "step": 19606 }, { "epoch": 6.5313567085866575, "grad_norm": 0.7734375, "learning_rate": 4.25165528692934e-06, "loss": 4.0005, "step": 19607 }, { "epoch": 6.531689847588907, "grad_norm": 0.7890625, "learning_rate": 4.250925726718159e-06, "loss": 3.9901, "step": 19608 }, { "epoch": 6.532022986591155, "grad_norm": 0.734375, "learning_rate": 4.2501962043516506e-06, "loss": 3.9569, "step": 19609 }, { "epoch": 6.532356125593404, "grad_norm": 0.765625, "learning_rate": 4.249466719838319e-06, "loss": 3.9753, "step": 19610 }, { "epoch": 6.5326892645956525, "grad_norm": 0.765625, "learning_rate": 4.248737273186661e-06, "loss": 4.0227, "step": 19611 }, { "epoch": 6.533022403597901, "grad_norm": 0.73828125, "learning_rate": 4.248007864405171e-06, "loss": 4.0614, "step": 19612 }, { "epoch": 6.53335554260015, "grad_norm": 0.71875, "learning_rate": 4.247278493502343e-06, "loss": 3.9181, "step": 19613 }, { "epoch": 6.533688681602398, "grad_norm": 0.7890625, "learning_rate": 4.246549160486677e-06, "loss": 3.9202, "step": 19614 }, { "epoch": 6.534021820604647, "grad_norm": 0.78125, "learning_rate": 4.2458198653666675e-06, "loss": 3.9228, "step": 19615 }, { "epoch": 6.534354959606896, "grad_norm": 0.74609375, "learning_rate": 4.245090608150807e-06, "loss": 4.0442, "step": 19616 }, { "epoch": 6.534688098609145, "grad_norm": 0.7578125, "learning_rate": 4.244361388847592e-06, "loss": 3.9951, "step": 19617 }, { "epoch": 6.535021237611393, "grad_norm": 0.7890625, "learning_rate": 4.243632207465513e-06, "loss": 3.9741, "step": 19618 }, { "epoch": 6.535354376613642, "grad_norm": 0.8125, "learning_rate": 4.242903064013068e-06, "loss": 3.9698, "step": 19619 }, { "epoch": 6.535687515615891, "grad_norm": 0.8203125, "learning_rate": 4.242173958498744e-06, "loss": 3.9273, "step": 19620 }, { "epoch": 6.53602065461814, "grad_norm": 0.77734375, "learning_rate": 4.241444890931038e-06, "loss": 3.9637, "step": 19621 }, { "epoch": 6.536353793620388, "grad_norm": 0.76171875, "learning_rate": 4.240715861318443e-06, "loss": 4.0059, "step": 19622 }, { "epoch": 6.5366869326226364, "grad_norm": 0.7578125, "learning_rate": 4.239986869669447e-06, "loss": 3.9939, "step": 19623 }, { "epoch": 6.537020071624886, "grad_norm": 0.7421875, "learning_rate": 4.239257915992541e-06, "loss": 4.0402, "step": 19624 }, { "epoch": 6.537353210627134, "grad_norm": 0.79296875, "learning_rate": 4.2385290002962205e-06, "loss": 3.9892, "step": 19625 }, { "epoch": 6.537686349629383, "grad_norm": 0.76171875, "learning_rate": 4.237800122588976e-06, "loss": 3.9084, "step": 19626 }, { "epoch": 6.538019488631631, "grad_norm": 0.7578125, "learning_rate": 4.237071282879287e-06, "loss": 4.0062, "step": 19627 }, { "epoch": 6.538352627633881, "grad_norm": 0.75, "learning_rate": 4.236342481175654e-06, "loss": 4.0115, "step": 19628 }, { "epoch": 6.538685766636129, "grad_norm": 0.765625, "learning_rate": 4.235613717486561e-06, "loss": 3.9706, "step": 19629 }, { "epoch": 6.539018905638377, "grad_norm": 0.76171875, "learning_rate": 4.234884991820498e-06, "loss": 3.9417, "step": 19630 }, { "epoch": 6.539352044640626, "grad_norm": 0.734375, "learning_rate": 4.234156304185951e-06, "loss": 4.0223, "step": 19631 }, { "epoch": 6.539685183642875, "grad_norm": 0.7578125, "learning_rate": 4.233427654591412e-06, "loss": 4.037, "step": 19632 }, { "epoch": 6.540018322645124, "grad_norm": 0.75390625, "learning_rate": 4.232699043045364e-06, "loss": 3.9803, "step": 19633 }, { "epoch": 6.540351461647372, "grad_norm": 0.75, "learning_rate": 4.231970469556295e-06, "loss": 4.0049, "step": 19634 }, { "epoch": 6.540684600649621, "grad_norm": 0.7890625, "learning_rate": 4.231241934132693e-06, "loss": 3.9569, "step": 19635 }, { "epoch": 6.54101773965187, "grad_norm": 0.74609375, "learning_rate": 4.230513436783042e-06, "loss": 3.9865, "step": 19636 }, { "epoch": 6.541350878654119, "grad_norm": 0.75, "learning_rate": 4.229784977515827e-06, "loss": 3.9242, "step": 19637 }, { "epoch": 6.541684017656367, "grad_norm": 0.7890625, "learning_rate": 4.229056556339531e-06, "loss": 4.0149, "step": 19638 }, { "epoch": 6.542017156658616, "grad_norm": 0.7578125, "learning_rate": 4.228328173262644e-06, "loss": 4.0284, "step": 19639 }, { "epoch": 6.542350295660865, "grad_norm": 0.73828125, "learning_rate": 4.227599828293648e-06, "loss": 3.8927, "step": 19640 }, { "epoch": 6.542683434663113, "grad_norm": 0.78515625, "learning_rate": 4.226871521441026e-06, "loss": 3.9811, "step": 19641 }, { "epoch": 6.543016573665362, "grad_norm": 0.75390625, "learning_rate": 4.2261432527132575e-06, "loss": 3.9435, "step": 19642 }, { "epoch": 6.54334971266761, "grad_norm": 0.7734375, "learning_rate": 4.225415022118833e-06, "loss": 4.0387, "step": 19643 }, { "epoch": 6.5436828516698595, "grad_norm": 0.78515625, "learning_rate": 4.224686829666233e-06, "loss": 4.0252, "step": 19644 }, { "epoch": 6.544015990672108, "grad_norm": 0.796875, "learning_rate": 4.223958675363935e-06, "loss": 3.9332, "step": 19645 }, { "epoch": 6.544349129674357, "grad_norm": 0.74609375, "learning_rate": 4.223230559220418e-06, "loss": 3.9479, "step": 19646 }, { "epoch": 6.544682268676605, "grad_norm": 0.78125, "learning_rate": 4.2225024812441715e-06, "loss": 3.9166, "step": 19647 }, { "epoch": 6.545015407678854, "grad_norm": 0.73828125, "learning_rate": 4.221774441443672e-06, "loss": 3.9775, "step": 19648 }, { "epoch": 6.545348546681103, "grad_norm": 0.74609375, "learning_rate": 4.221046439827399e-06, "loss": 3.9703, "step": 19649 }, { "epoch": 6.545681685683351, "grad_norm": 0.8203125, "learning_rate": 4.22031847640383e-06, "loss": 4.0062, "step": 19650 }, { "epoch": 6.5460148246856, "grad_norm": 0.7734375, "learning_rate": 4.219590551181452e-06, "loss": 4.0169, "step": 19651 }, { "epoch": 6.5463479636878485, "grad_norm": 0.75, "learning_rate": 4.218862664168736e-06, "loss": 3.9923, "step": 19652 }, { "epoch": 6.546681102690098, "grad_norm": 0.75390625, "learning_rate": 4.218134815374164e-06, "loss": 4.0185, "step": 19653 }, { "epoch": 6.547014241692346, "grad_norm": 0.7734375, "learning_rate": 4.217407004806213e-06, "loss": 4.0061, "step": 19654 }, { "epoch": 6.547347380694594, "grad_norm": 0.73828125, "learning_rate": 4.21667923247336e-06, "loss": 3.9671, "step": 19655 }, { "epoch": 6.5476805196968435, "grad_norm": 0.78125, "learning_rate": 4.215951498384083e-06, "loss": 4.037, "step": 19656 }, { "epoch": 6.548013658699093, "grad_norm": 0.79296875, "learning_rate": 4.215223802546853e-06, "loss": 4.0258, "step": 19657 }, { "epoch": 6.548346797701341, "grad_norm": 0.75390625, "learning_rate": 4.2144961449701564e-06, "loss": 4.0013, "step": 19658 }, { "epoch": 6.548679936703589, "grad_norm": 0.77734375, "learning_rate": 4.213768525662461e-06, "loss": 4.0067, "step": 19659 }, { "epoch": 6.5490130757058385, "grad_norm": 0.77734375, "learning_rate": 4.213040944632246e-06, "loss": 3.8817, "step": 19660 }, { "epoch": 6.549346214708087, "grad_norm": 0.78125, "learning_rate": 4.21231340188798e-06, "loss": 3.9689, "step": 19661 }, { "epoch": 6.549679353710336, "grad_norm": 0.7890625, "learning_rate": 4.21158589743815e-06, "loss": 3.9755, "step": 19662 }, { "epoch": 6.550012492712584, "grad_norm": 0.796875, "learning_rate": 4.210858431291217e-06, "loss": 3.991, "step": 19663 }, { "epoch": 6.550345631714833, "grad_norm": 0.73828125, "learning_rate": 4.210131003455656e-06, "loss": 4.0154, "step": 19664 }, { "epoch": 6.550678770717082, "grad_norm": 0.77734375, "learning_rate": 4.209403613939946e-06, "loss": 3.9111, "step": 19665 }, { "epoch": 6.55101190971933, "grad_norm": 0.78125, "learning_rate": 4.208676262752556e-06, "loss": 4.0181, "step": 19666 }, { "epoch": 6.551345048721579, "grad_norm": 0.87109375, "learning_rate": 4.207948949901961e-06, "loss": 3.9741, "step": 19667 }, { "epoch": 6.5516781877238275, "grad_norm": 0.75, "learning_rate": 4.207221675396624e-06, "loss": 3.9004, "step": 19668 }, { "epoch": 6.552011326726077, "grad_norm": 0.7421875, "learning_rate": 4.206494439245027e-06, "loss": 4.0469, "step": 19669 }, { "epoch": 6.552344465728325, "grad_norm": 0.75, "learning_rate": 4.205767241455639e-06, "loss": 3.9979, "step": 19670 }, { "epoch": 6.552677604730574, "grad_norm": 0.76953125, "learning_rate": 4.205040082036921e-06, "loss": 3.9196, "step": 19671 }, { "epoch": 6.553010743732822, "grad_norm": 0.7578125, "learning_rate": 4.204312960997353e-06, "loss": 3.9977, "step": 19672 }, { "epoch": 6.553343882735071, "grad_norm": 0.77734375, "learning_rate": 4.203585878345399e-06, "loss": 3.9455, "step": 19673 }, { "epoch": 6.55367702173732, "grad_norm": 0.79296875, "learning_rate": 4.202858834089531e-06, "loss": 4.0791, "step": 19674 }, { "epoch": 6.554010160739568, "grad_norm": 0.84765625, "learning_rate": 4.2021318282382126e-06, "loss": 3.932, "step": 19675 }, { "epoch": 6.554343299741817, "grad_norm": 0.78125, "learning_rate": 4.201404860799917e-06, "loss": 3.9901, "step": 19676 }, { "epoch": 6.554676438744066, "grad_norm": 0.7578125, "learning_rate": 4.2006779317831116e-06, "loss": 4.0161, "step": 19677 }, { "epoch": 6.555009577746315, "grad_norm": 0.79296875, "learning_rate": 4.199951041196261e-06, "loss": 3.9472, "step": 19678 }, { "epoch": 6.555342716748563, "grad_norm": 0.75390625, "learning_rate": 4.199224189047834e-06, "loss": 3.9823, "step": 19679 }, { "epoch": 6.555675855750812, "grad_norm": 0.80078125, "learning_rate": 4.198497375346295e-06, "loss": 4.002, "step": 19680 }, { "epoch": 6.556008994753061, "grad_norm": 0.73046875, "learning_rate": 4.19777060010011e-06, "loss": 4.0167, "step": 19681 }, { "epoch": 6.55634213375531, "grad_norm": 0.76171875, "learning_rate": 4.197043863317742e-06, "loss": 3.9194, "step": 19682 }, { "epoch": 6.556675272757558, "grad_norm": 0.76953125, "learning_rate": 4.1963171650076625e-06, "loss": 4.0313, "step": 19683 }, { "epoch": 6.557008411759806, "grad_norm": 0.7890625, "learning_rate": 4.195590505178332e-06, "loss": 3.9616, "step": 19684 }, { "epoch": 6.557341550762056, "grad_norm": 0.78515625, "learning_rate": 4.194863883838214e-06, "loss": 3.9368, "step": 19685 }, { "epoch": 6.557674689764304, "grad_norm": 0.79296875, "learning_rate": 4.194137300995771e-06, "loss": 3.9518, "step": 19686 }, { "epoch": 6.558007828766553, "grad_norm": 0.77734375, "learning_rate": 4.19341075665947e-06, "loss": 4.0043, "step": 19687 }, { "epoch": 6.558340967768801, "grad_norm": 0.8125, "learning_rate": 4.192684250837776e-06, "loss": 3.9602, "step": 19688 }, { "epoch": 6.558674106771051, "grad_norm": 0.76953125, "learning_rate": 4.191957783539143e-06, "loss": 3.9842, "step": 19689 }, { "epoch": 6.559007245773299, "grad_norm": 0.80078125, "learning_rate": 4.191231354772033e-06, "loss": 3.9928, "step": 19690 }, { "epoch": 6.559340384775547, "grad_norm": 0.79296875, "learning_rate": 4.190504964544915e-06, "loss": 3.9264, "step": 19691 }, { "epoch": 6.559673523777796, "grad_norm": 0.765625, "learning_rate": 4.189778612866245e-06, "loss": 4.0296, "step": 19692 }, { "epoch": 6.560006662780045, "grad_norm": 0.74609375, "learning_rate": 4.189052299744485e-06, "loss": 3.9513, "step": 19693 }, { "epoch": 6.560339801782294, "grad_norm": 0.7265625, "learning_rate": 4.1883260251880905e-06, "loss": 4.0051, "step": 19694 }, { "epoch": 6.560672940784542, "grad_norm": 0.75, "learning_rate": 4.187599789205528e-06, "loss": 3.9759, "step": 19695 }, { "epoch": 6.561006079786791, "grad_norm": 0.77734375, "learning_rate": 4.186873591805254e-06, "loss": 4.0177, "step": 19696 }, { "epoch": 6.56133921878904, "grad_norm": 0.8125, "learning_rate": 4.186147432995726e-06, "loss": 4.0131, "step": 19697 }, { "epoch": 6.561672357791289, "grad_norm": 0.74609375, "learning_rate": 4.185421312785401e-06, "loss": 3.9372, "step": 19698 }, { "epoch": 6.562005496793537, "grad_norm": 0.77734375, "learning_rate": 4.184695231182741e-06, "loss": 3.9042, "step": 19699 }, { "epoch": 6.562338635795786, "grad_norm": 0.78515625, "learning_rate": 4.183969188196199e-06, "loss": 4.0189, "step": 19700 }, { "epoch": 6.5626717747980345, "grad_norm": 0.7109375, "learning_rate": 4.1832431838342305e-06, "loss": 4.0162, "step": 19701 }, { "epoch": 6.563004913800283, "grad_norm": 0.77734375, "learning_rate": 4.182517218105299e-06, "loss": 3.9072, "step": 19702 }, { "epoch": 6.563338052802532, "grad_norm": 0.8046875, "learning_rate": 4.181791291017855e-06, "loss": 4.0007, "step": 19703 }, { "epoch": 6.56367119180478, "grad_norm": 0.73046875, "learning_rate": 4.181065402580356e-06, "loss": 4.0037, "step": 19704 }, { "epoch": 6.5640043308070295, "grad_norm": 0.76171875, "learning_rate": 4.1803395528012524e-06, "loss": 3.9794, "step": 19705 }, { "epoch": 6.564337469809278, "grad_norm": 0.78125, "learning_rate": 4.179613741689008e-06, "loss": 3.9748, "step": 19706 }, { "epoch": 6.564670608811527, "grad_norm": 0.78515625, "learning_rate": 4.178887969252071e-06, "loss": 3.9723, "step": 19707 }, { "epoch": 6.565003747813775, "grad_norm": 0.76171875, "learning_rate": 4.178162235498891e-06, "loss": 4.0069, "step": 19708 }, { "epoch": 6.565336886816024, "grad_norm": 0.7578125, "learning_rate": 4.17743654043793e-06, "loss": 3.939, "step": 19709 }, { "epoch": 6.565670025818273, "grad_norm": 0.77734375, "learning_rate": 4.176710884077635e-06, "loss": 3.976, "step": 19710 }, { "epoch": 6.566003164820521, "grad_norm": 0.78125, "learning_rate": 4.17598526642646e-06, "loss": 3.901, "step": 19711 }, { "epoch": 6.56633630382277, "grad_norm": 0.75390625, "learning_rate": 4.175259687492855e-06, "loss": 4.044, "step": 19712 }, { "epoch": 6.5666694428250185, "grad_norm": 0.7890625, "learning_rate": 4.174534147285276e-06, "loss": 3.9108, "step": 19713 }, { "epoch": 6.567002581827268, "grad_norm": 0.75390625, "learning_rate": 4.173808645812171e-06, "loss": 4.0171, "step": 19714 }, { "epoch": 6.567335720829516, "grad_norm": 0.7109375, "learning_rate": 4.173083183081991e-06, "loss": 3.9699, "step": 19715 }, { "epoch": 6.567668859831765, "grad_norm": 0.734375, "learning_rate": 4.172357759103186e-06, "loss": 4.0112, "step": 19716 }, { "epoch": 6.5680019988340135, "grad_norm": 0.78515625, "learning_rate": 4.1716323738842054e-06, "loss": 3.9928, "step": 19717 }, { "epoch": 6.568335137836263, "grad_norm": 0.78125, "learning_rate": 4.170907027433498e-06, "loss": 4.0064, "step": 19718 }, { "epoch": 6.568668276838511, "grad_norm": 0.7734375, "learning_rate": 4.1701817197595105e-06, "loss": 3.9205, "step": 19719 }, { "epoch": 6.569001415840759, "grad_norm": 0.6953125, "learning_rate": 4.169456450870696e-06, "loss": 4.0656, "step": 19720 }, { "epoch": 6.569334554843008, "grad_norm": 0.796875, "learning_rate": 4.1687312207755e-06, "loss": 3.9579, "step": 19721 }, { "epoch": 6.569667693845257, "grad_norm": 0.78515625, "learning_rate": 4.1680060294823695e-06, "loss": 4.0266, "step": 19722 }, { "epoch": 6.570000832847506, "grad_norm": 0.73046875, "learning_rate": 4.167280876999749e-06, "loss": 4.0599, "step": 19723 }, { "epoch": 6.570333971849754, "grad_norm": 0.8046875, "learning_rate": 4.166555763336094e-06, "loss": 3.979, "step": 19724 }, { "epoch": 6.570667110852003, "grad_norm": 0.76171875, "learning_rate": 4.165830688499841e-06, "loss": 3.9073, "step": 19725 }, { "epoch": 6.571000249854252, "grad_norm": 0.7890625, "learning_rate": 4.165105652499435e-06, "loss": 3.9703, "step": 19726 }, { "epoch": 6.5713333888565, "grad_norm": 0.765625, "learning_rate": 4.164380655343328e-06, "loss": 3.9623, "step": 19727 }, { "epoch": 6.571666527858749, "grad_norm": 0.75, "learning_rate": 4.163655697039962e-06, "loss": 4.01, "step": 19728 }, { "epoch": 6.5719996668609975, "grad_norm": 0.76171875, "learning_rate": 4.1629307775977805e-06, "loss": 4.0087, "step": 19729 }, { "epoch": 6.572332805863247, "grad_norm": 0.80859375, "learning_rate": 4.162205897025224e-06, "loss": 3.9591, "step": 19730 }, { "epoch": 6.572665944865495, "grad_norm": 0.7734375, "learning_rate": 4.161481055330742e-06, "loss": 3.9928, "step": 19731 }, { "epoch": 6.572999083867744, "grad_norm": 0.77734375, "learning_rate": 4.1607562525227745e-06, "loss": 3.8924, "step": 19732 }, { "epoch": 6.573332222869992, "grad_norm": 0.765625, "learning_rate": 4.160031488609768e-06, "loss": 3.985, "step": 19733 }, { "epoch": 6.573665361872241, "grad_norm": 0.78515625, "learning_rate": 4.1593067636001536e-06, "loss": 4.018, "step": 19734 }, { "epoch": 6.57399850087449, "grad_norm": 0.74609375, "learning_rate": 4.1585820775023816e-06, "loss": 3.9986, "step": 19735 }, { "epoch": 6.574331639876738, "grad_norm": 0.7890625, "learning_rate": 4.157857430324891e-06, "loss": 3.9602, "step": 19736 }, { "epoch": 6.574664778878987, "grad_norm": 0.73046875, "learning_rate": 4.157132822076124e-06, "loss": 4.0181, "step": 19737 }, { "epoch": 6.574997917881236, "grad_norm": 0.8125, "learning_rate": 4.1564082527645146e-06, "loss": 4.0396, "step": 19738 }, { "epoch": 6.575331056883485, "grad_norm": 0.81640625, "learning_rate": 4.15568372239851e-06, "loss": 3.9655, "step": 19739 }, { "epoch": 6.575664195885733, "grad_norm": 0.76953125, "learning_rate": 4.154959230986547e-06, "loss": 4.0108, "step": 19740 }, { "epoch": 6.575997334887982, "grad_norm": 0.7109375, "learning_rate": 4.154234778537062e-06, "loss": 4.0266, "step": 19741 }, { "epoch": 6.576330473890231, "grad_norm": 0.7265625, "learning_rate": 4.1535103650584964e-06, "loss": 4.0618, "step": 19742 }, { "epoch": 6.57666361289248, "grad_norm": 0.73828125, "learning_rate": 4.152785990559286e-06, "loss": 3.949, "step": 19743 }, { "epoch": 6.576996751894728, "grad_norm": 0.76171875, "learning_rate": 4.152061655047869e-06, "loss": 3.9745, "step": 19744 }, { "epoch": 6.577329890896976, "grad_norm": 0.76171875, "learning_rate": 4.1513373585326784e-06, "loss": 4.0304, "step": 19745 }, { "epoch": 6.577663029899226, "grad_norm": 0.77734375, "learning_rate": 4.150613101022159e-06, "loss": 3.9747, "step": 19746 }, { "epoch": 6.577996168901474, "grad_norm": 0.78515625, "learning_rate": 4.14988888252474e-06, "loss": 3.9674, "step": 19747 }, { "epoch": 6.578329307903723, "grad_norm": 0.76953125, "learning_rate": 4.149164703048861e-06, "loss": 3.9829, "step": 19748 }, { "epoch": 6.578662446905971, "grad_norm": 0.76953125, "learning_rate": 4.1484405626029514e-06, "loss": 3.9818, "step": 19749 }, { "epoch": 6.5789955859082205, "grad_norm": 0.78515625, "learning_rate": 4.1477164611954524e-06, "loss": 3.9427, "step": 19750 }, { "epoch": 6.579328724910469, "grad_norm": 0.74609375, "learning_rate": 4.146992398834799e-06, "loss": 3.9572, "step": 19751 }, { "epoch": 6.579661863912717, "grad_norm": 0.72265625, "learning_rate": 4.1462683755294165e-06, "loss": 4.0436, "step": 19752 }, { "epoch": 6.579995002914966, "grad_norm": 0.765625, "learning_rate": 4.145544391287746e-06, "loss": 4.0514, "step": 19753 }, { "epoch": 6.580328141917215, "grad_norm": 0.7890625, "learning_rate": 4.1448204461182165e-06, "loss": 3.9574, "step": 19754 }, { "epoch": 6.580661280919464, "grad_norm": 0.80859375, "learning_rate": 4.144096540029262e-06, "loss": 4.0371, "step": 19755 }, { "epoch": 6.580994419921712, "grad_norm": 0.78515625, "learning_rate": 4.143372673029311e-06, "loss": 3.9953, "step": 19756 }, { "epoch": 6.581327558923961, "grad_norm": 0.734375, "learning_rate": 4.142648845126801e-06, "loss": 3.9555, "step": 19757 }, { "epoch": 6.58166069792621, "grad_norm": 0.8125, "learning_rate": 4.14192505633016e-06, "loss": 4.0204, "step": 19758 }, { "epoch": 6.581993836928459, "grad_norm": 0.8515625, "learning_rate": 4.141201306647818e-06, "loss": 3.9436, "step": 19759 }, { "epoch": 6.582326975930707, "grad_norm": 0.73046875, "learning_rate": 4.140477596088207e-06, "loss": 3.9781, "step": 19760 }, { "epoch": 6.582660114932956, "grad_norm": 0.7734375, "learning_rate": 4.139753924659754e-06, "loss": 3.9771, "step": 19761 }, { "epoch": 6.5829932539352045, "grad_norm": 0.76171875, "learning_rate": 4.139030292370889e-06, "loss": 4.0018, "step": 19762 }, { "epoch": 6.583326392937453, "grad_norm": 0.796875, "learning_rate": 4.13830669923004e-06, "loss": 4.0357, "step": 19763 }, { "epoch": 6.583659531939702, "grad_norm": 0.7421875, "learning_rate": 4.137583145245638e-06, "loss": 4.0364, "step": 19764 }, { "epoch": 6.58399267094195, "grad_norm": 0.76171875, "learning_rate": 4.13685963042611e-06, "loss": 3.9927, "step": 19765 }, { "epoch": 6.5843258099441995, "grad_norm": 0.75390625, "learning_rate": 4.136136154779883e-06, "loss": 3.9672, "step": 19766 }, { "epoch": 6.584658948946448, "grad_norm": 0.75, "learning_rate": 4.13541271831538e-06, "loss": 4.049, "step": 19767 }, { "epoch": 6.584992087948697, "grad_norm": 0.76953125, "learning_rate": 4.1346893210410355e-06, "loss": 4.0042, "step": 19768 }, { "epoch": 6.585325226950945, "grad_norm": 0.765625, "learning_rate": 4.133965962965272e-06, "loss": 4.0426, "step": 19769 }, { "epoch": 6.5856583659531935, "grad_norm": 0.765625, "learning_rate": 4.13324264409651e-06, "loss": 3.9621, "step": 19770 }, { "epoch": 6.585991504955443, "grad_norm": 0.765625, "learning_rate": 4.132519364443181e-06, "loss": 3.9902, "step": 19771 }, { "epoch": 6.586324643957691, "grad_norm": 0.75, "learning_rate": 4.131796124013708e-06, "loss": 4.0279, "step": 19772 }, { "epoch": 6.58665778295994, "grad_norm": 0.7890625, "learning_rate": 4.131072922816514e-06, "loss": 4.0427, "step": 19773 }, { "epoch": 6.5869909219621885, "grad_norm": 0.78125, "learning_rate": 4.130349760860021e-06, "loss": 4.0002, "step": 19774 }, { "epoch": 6.587324060964438, "grad_norm": 0.77734375, "learning_rate": 4.129626638152658e-06, "loss": 4.018, "step": 19775 }, { "epoch": 6.587657199966686, "grad_norm": 0.74609375, "learning_rate": 4.128903554702843e-06, "loss": 3.9803, "step": 19776 }, { "epoch": 6.587990338968935, "grad_norm": 0.765625, "learning_rate": 4.128180510519e-06, "loss": 4.067, "step": 19777 }, { "epoch": 6.5883234779711835, "grad_norm": 0.734375, "learning_rate": 4.127457505609552e-06, "loss": 3.9974, "step": 19778 }, { "epoch": 6.588656616973433, "grad_norm": 0.76171875, "learning_rate": 4.126734539982919e-06, "loss": 4.0138, "step": 19779 }, { "epoch": 6.588989755975681, "grad_norm": 0.7734375, "learning_rate": 4.126011613647521e-06, "loss": 3.8885, "step": 19780 }, { "epoch": 6.589322894977929, "grad_norm": 0.73828125, "learning_rate": 4.125288726611781e-06, "loss": 3.9788, "step": 19781 }, { "epoch": 6.589656033980178, "grad_norm": 0.7578125, "learning_rate": 4.124565878884113e-06, "loss": 3.9107, "step": 19782 }, { "epoch": 6.589989172982427, "grad_norm": 0.77734375, "learning_rate": 4.123843070472946e-06, "loss": 3.9879, "step": 19783 }, { "epoch": 6.590322311984676, "grad_norm": 0.78125, "learning_rate": 4.123120301386693e-06, "loss": 3.9389, "step": 19784 }, { "epoch": 6.590655450986924, "grad_norm": 0.76171875, "learning_rate": 4.122397571633775e-06, "loss": 4.0283, "step": 19785 }, { "epoch": 6.590988589989173, "grad_norm": 0.76953125, "learning_rate": 4.121674881222607e-06, "loss": 4.0127, "step": 19786 }, { "epoch": 6.591321728991422, "grad_norm": 0.7890625, "learning_rate": 4.1209522301616145e-06, "loss": 3.9411, "step": 19787 }, { "epoch": 6.59165486799367, "grad_norm": 0.78125, "learning_rate": 4.120229618459206e-06, "loss": 3.9195, "step": 19788 }, { "epoch": 6.591988006995919, "grad_norm": 0.78125, "learning_rate": 4.119507046123799e-06, "loss": 3.9952, "step": 19789 }, { "epoch": 6.592321145998167, "grad_norm": 0.75390625, "learning_rate": 4.1187845131638165e-06, "loss": 3.9845, "step": 19790 }, { "epoch": 6.592654285000417, "grad_norm": 0.73046875, "learning_rate": 4.118062019587671e-06, "loss": 4.052, "step": 19791 }, { "epoch": 6.592987424002665, "grad_norm": 0.75, "learning_rate": 4.117339565403776e-06, "loss": 4.008, "step": 19792 }, { "epoch": 6.593320563004914, "grad_norm": 0.7421875, "learning_rate": 4.116617150620548e-06, "loss": 3.9933, "step": 19793 }, { "epoch": 6.593653702007162, "grad_norm": 0.74609375, "learning_rate": 4.115894775246403e-06, "loss": 4.0466, "step": 19794 }, { "epoch": 6.593986841009411, "grad_norm": 0.76171875, "learning_rate": 4.1151724392897535e-06, "loss": 4.0434, "step": 19795 }, { "epoch": 6.59431998001166, "grad_norm": 0.765625, "learning_rate": 4.114450142759014e-06, "loss": 4.0266, "step": 19796 }, { "epoch": 6.594653119013909, "grad_norm": 0.765625, "learning_rate": 4.113727885662597e-06, "loss": 3.9933, "step": 19797 }, { "epoch": 6.594986258016157, "grad_norm": 0.74609375, "learning_rate": 4.113005668008917e-06, "loss": 3.9945, "step": 19798 }, { "epoch": 6.595319397018406, "grad_norm": 0.80859375, "learning_rate": 4.112283489806383e-06, "loss": 3.8973, "step": 19799 }, { "epoch": 6.595652536020655, "grad_norm": 0.77734375, "learning_rate": 4.111561351063406e-06, "loss": 3.9149, "step": 19800 }, { "epoch": 6.595985675022903, "grad_norm": 0.83203125, "learning_rate": 4.110839251788404e-06, "loss": 4.0353, "step": 19801 }, { "epoch": 6.596318814025152, "grad_norm": 0.7734375, "learning_rate": 4.110117191989783e-06, "loss": 3.933, "step": 19802 }, { "epoch": 6.596651953027401, "grad_norm": 0.7734375, "learning_rate": 4.109395171675953e-06, "loss": 3.9539, "step": 19803 }, { "epoch": 6.59698509202965, "grad_norm": 0.7578125, "learning_rate": 4.108673190855324e-06, "loss": 3.9669, "step": 19804 }, { "epoch": 6.597318231031898, "grad_norm": 0.7734375, "learning_rate": 4.107951249536312e-06, "loss": 4.0708, "step": 19805 }, { "epoch": 6.597651370034146, "grad_norm": 0.71484375, "learning_rate": 4.107229347727318e-06, "loss": 3.9771, "step": 19806 }, { "epoch": 6.5979845090363956, "grad_norm": 0.78125, "learning_rate": 4.106507485436749e-06, "loss": 3.9782, "step": 19807 }, { "epoch": 6.598317648038644, "grad_norm": 0.7734375, "learning_rate": 4.105785662673022e-06, "loss": 3.9896, "step": 19808 }, { "epoch": 6.598650787040893, "grad_norm": 0.75, "learning_rate": 4.1050638794445405e-06, "loss": 3.9951, "step": 19809 }, { "epoch": 6.598983926043141, "grad_norm": 0.79296875, "learning_rate": 4.104342135759711e-06, "loss": 3.9808, "step": 19810 }, { "epoch": 6.5993170650453905, "grad_norm": 0.7734375, "learning_rate": 4.1036204316269374e-06, "loss": 4.0156, "step": 19811 }, { "epoch": 6.599650204047639, "grad_norm": 0.73046875, "learning_rate": 4.102898767054632e-06, "loss": 4.0409, "step": 19812 }, { "epoch": 6.599983343049887, "grad_norm": 0.7734375, "learning_rate": 4.102177142051199e-06, "loss": 3.9822, "step": 19813 }, { "epoch": 6.600316482052136, "grad_norm": 0.76953125, "learning_rate": 4.1014555566250415e-06, "loss": 3.9448, "step": 19814 }, { "epoch": 6.600649621054385, "grad_norm": 0.76953125, "learning_rate": 4.100734010784565e-06, "loss": 3.9963, "step": 19815 }, { "epoch": 6.600982760056634, "grad_norm": 0.78515625, "learning_rate": 4.100012504538175e-06, "loss": 3.9833, "step": 19816 }, { "epoch": 6.601315899058882, "grad_norm": 0.73828125, "learning_rate": 4.099291037894276e-06, "loss": 3.9756, "step": 19817 }, { "epoch": 6.601649038061131, "grad_norm": 0.734375, "learning_rate": 4.098569610861267e-06, "loss": 4.0001, "step": 19818 }, { "epoch": 6.6019821770633795, "grad_norm": 0.81640625, "learning_rate": 4.097848223447557e-06, "loss": 3.9591, "step": 19819 }, { "epoch": 6.602315316065629, "grad_norm": 0.78515625, "learning_rate": 4.097126875661547e-06, "loss": 3.9572, "step": 19820 }, { "epoch": 6.602648455067877, "grad_norm": 0.78515625, "learning_rate": 4.0964055675116365e-06, "loss": 3.9967, "step": 19821 }, { "epoch": 6.602981594070126, "grad_norm": 0.78515625, "learning_rate": 4.095684299006228e-06, "loss": 3.9012, "step": 19822 }, { "epoch": 6.6033147330723745, "grad_norm": 0.79296875, "learning_rate": 4.094963070153728e-06, "loss": 4.0243, "step": 19823 }, { "epoch": 6.603647872074623, "grad_norm": 0.7734375, "learning_rate": 4.094241880962532e-06, "loss": 3.987, "step": 19824 }, { "epoch": 6.603981011076872, "grad_norm": 0.7421875, "learning_rate": 4.09352073144104e-06, "loss": 3.949, "step": 19825 }, { "epoch": 6.60431415007912, "grad_norm": 0.75, "learning_rate": 4.092799621597649e-06, "loss": 3.9445, "step": 19826 }, { "epoch": 6.604647289081369, "grad_norm": 0.78125, "learning_rate": 4.092078551440766e-06, "loss": 3.9566, "step": 19827 }, { "epoch": 6.604980428083618, "grad_norm": 0.76953125, "learning_rate": 4.091357520978787e-06, "loss": 4.0668, "step": 19828 }, { "epoch": 6.605313567085867, "grad_norm": 0.78125, "learning_rate": 4.0906365302201096e-06, "loss": 3.9432, "step": 19829 }, { "epoch": 6.605646706088115, "grad_norm": 0.77734375, "learning_rate": 4.089915579173129e-06, "loss": 4.1149, "step": 19830 }, { "epoch": 6.6059798450903635, "grad_norm": 0.8046875, "learning_rate": 4.089194667846249e-06, "loss": 4.0069, "step": 19831 }, { "epoch": 6.606312984092613, "grad_norm": 0.73046875, "learning_rate": 4.088473796247866e-06, "loss": 3.9473, "step": 19832 }, { "epoch": 6.606646123094861, "grad_norm": 0.75, "learning_rate": 4.087752964386367e-06, "loss": 4.0526, "step": 19833 }, { "epoch": 6.60697926209711, "grad_norm": 0.69921875, "learning_rate": 4.087032172270159e-06, "loss": 3.9639, "step": 19834 }, { "epoch": 6.6073124010993585, "grad_norm": 0.7890625, "learning_rate": 4.086311419907634e-06, "loss": 4.0092, "step": 19835 }, { "epoch": 6.607645540101608, "grad_norm": 0.7734375, "learning_rate": 4.085590707307187e-06, "loss": 3.9488, "step": 19836 }, { "epoch": 6.607978679103856, "grad_norm": 0.76171875, "learning_rate": 4.084870034477208e-06, "loss": 4.0224, "step": 19837 }, { "epoch": 6.608311818106105, "grad_norm": 0.76953125, "learning_rate": 4.0841494014261e-06, "loss": 4.0249, "step": 19838 }, { "epoch": 6.608644957108353, "grad_norm": 0.73828125, "learning_rate": 4.0834288081622534e-06, "loss": 3.9655, "step": 19839 }, { "epoch": 6.608978096110603, "grad_norm": 0.74609375, "learning_rate": 4.082708254694061e-06, "loss": 3.9794, "step": 19840 }, { "epoch": 6.609311235112851, "grad_norm": 0.76171875, "learning_rate": 4.081987741029914e-06, "loss": 3.9661, "step": 19841 }, { "epoch": 6.609644374115099, "grad_norm": 0.78515625, "learning_rate": 4.081267267178207e-06, "loss": 4.1479, "step": 19842 }, { "epoch": 6.609977513117348, "grad_norm": 0.73828125, "learning_rate": 4.080546833147332e-06, "loss": 4.0272, "step": 19843 }, { "epoch": 6.610310652119597, "grad_norm": 0.78125, "learning_rate": 4.0798264389456766e-06, "loss": 3.9776, "step": 19844 }, { "epoch": 6.610643791121846, "grad_norm": 0.7421875, "learning_rate": 4.079106084581639e-06, "loss": 4.0011, "step": 19845 }, { "epoch": 6.610976930124094, "grad_norm": 0.75390625, "learning_rate": 4.078385770063605e-06, "loss": 4.032, "step": 19846 }, { "epoch": 6.611310069126343, "grad_norm": 0.7578125, "learning_rate": 4.0776654953999645e-06, "loss": 3.9726, "step": 19847 }, { "epoch": 6.611643208128592, "grad_norm": 0.78125, "learning_rate": 4.076945260599107e-06, "loss": 4.0155, "step": 19848 }, { "epoch": 6.61197634713084, "grad_norm": 0.78125, "learning_rate": 4.076225065669425e-06, "loss": 3.914, "step": 19849 }, { "epoch": 6.612309486133089, "grad_norm": 0.74609375, "learning_rate": 4.0755049106193086e-06, "loss": 3.9518, "step": 19850 }, { "epoch": 6.612642625135337, "grad_norm": 0.8046875, "learning_rate": 4.074784795457136e-06, "loss": 3.9472, "step": 19851 }, { "epoch": 6.612975764137587, "grad_norm": 0.8125, "learning_rate": 4.074064720191305e-06, "loss": 4.0138, "step": 19852 }, { "epoch": 6.613308903139835, "grad_norm": 0.734375, "learning_rate": 4.073344684830199e-06, "loss": 4.01, "step": 19853 }, { "epoch": 6.613642042142084, "grad_norm": 0.7421875, "learning_rate": 4.072624689382206e-06, "loss": 4.0448, "step": 19854 }, { "epoch": 6.613975181144332, "grad_norm": 0.8125, "learning_rate": 4.071904733855708e-06, "loss": 3.9998, "step": 19855 }, { "epoch": 6.6143083201465815, "grad_norm": 0.78125, "learning_rate": 4.071184818259098e-06, "loss": 3.9237, "step": 19856 }, { "epoch": 6.61464145914883, "grad_norm": 0.74609375, "learning_rate": 4.070464942600758e-06, "loss": 3.9192, "step": 19857 }, { "epoch": 6.614974598151079, "grad_norm": 0.76171875, "learning_rate": 4.069745106889073e-06, "loss": 4.0406, "step": 19858 }, { "epoch": 6.615307737153327, "grad_norm": 0.7421875, "learning_rate": 4.069025311132426e-06, "loss": 4.0219, "step": 19859 }, { "epoch": 6.615640876155576, "grad_norm": 0.78125, "learning_rate": 4.068305555339206e-06, "loss": 3.9865, "step": 19860 }, { "epoch": 6.615974015157825, "grad_norm": 0.75390625, "learning_rate": 4.067585839517791e-06, "loss": 4.0357, "step": 19861 }, { "epoch": 6.616307154160073, "grad_norm": 0.7734375, "learning_rate": 4.066866163676563e-06, "loss": 4.0046, "step": 19862 }, { "epoch": 6.616640293162322, "grad_norm": 0.76171875, "learning_rate": 4.066146527823912e-06, "loss": 3.9779, "step": 19863 }, { "epoch": 6.616973432164571, "grad_norm": 0.74609375, "learning_rate": 4.065426931968216e-06, "loss": 4.0035, "step": 19864 }, { "epoch": 6.61730657116682, "grad_norm": 0.8359375, "learning_rate": 4.064707376117858e-06, "loss": 3.8735, "step": 19865 }, { "epoch": 6.617639710169068, "grad_norm": 0.80078125, "learning_rate": 4.063987860281213e-06, "loss": 3.9675, "step": 19866 }, { "epoch": 6.617972849171316, "grad_norm": 0.8359375, "learning_rate": 4.063268384466671e-06, "loss": 3.9538, "step": 19867 }, { "epoch": 6.6183059881735655, "grad_norm": 0.8203125, "learning_rate": 4.062548948682613e-06, "loss": 4.0199, "step": 19868 }, { "epoch": 6.618639127175814, "grad_norm": 0.6875, "learning_rate": 4.061829552937409e-06, "loss": 3.9316, "step": 19869 }, { "epoch": 6.618972266178063, "grad_norm": 0.75, "learning_rate": 4.061110197239442e-06, "loss": 4.0008, "step": 19870 }, { "epoch": 6.619305405180311, "grad_norm": 0.76953125, "learning_rate": 4.060390881597095e-06, "loss": 4.0411, "step": 19871 }, { "epoch": 6.6196385441825605, "grad_norm": 0.77734375, "learning_rate": 4.059671606018745e-06, "loss": 3.9625, "step": 19872 }, { "epoch": 6.619971683184809, "grad_norm": 0.81640625, "learning_rate": 4.058952370512767e-06, "loss": 3.9594, "step": 19873 }, { "epoch": 6.620304822187057, "grad_norm": 0.78515625, "learning_rate": 4.058233175087539e-06, "loss": 4.0122, "step": 19874 }, { "epoch": 6.620637961189306, "grad_norm": 0.81640625, "learning_rate": 4.0575140197514426e-06, "loss": 3.9997, "step": 19875 }, { "epoch": 6.6209711001915545, "grad_norm": 0.80859375, "learning_rate": 4.056794904512851e-06, "loss": 3.9668, "step": 19876 }, { "epoch": 6.621304239193804, "grad_norm": 0.80859375, "learning_rate": 4.056075829380141e-06, "loss": 3.9902, "step": 19877 }, { "epoch": 6.621637378196052, "grad_norm": 0.7578125, "learning_rate": 4.055356794361689e-06, "loss": 3.9926, "step": 19878 }, { "epoch": 6.621970517198301, "grad_norm": 0.7890625, "learning_rate": 4.054637799465868e-06, "loss": 3.9496, "step": 19879 }, { "epoch": 6.6223036562005495, "grad_norm": 0.8046875, "learning_rate": 4.053918844701055e-06, "loss": 3.9628, "step": 19880 }, { "epoch": 6.622636795202799, "grad_norm": 0.765625, "learning_rate": 4.053199930075619e-06, "loss": 4.0335, "step": 19881 }, { "epoch": 6.622969934205047, "grad_norm": 0.79296875, "learning_rate": 4.052481055597942e-06, "loss": 4.0208, "step": 19882 }, { "epoch": 6.623303073207296, "grad_norm": 0.7734375, "learning_rate": 4.051762221276393e-06, "loss": 3.9953, "step": 19883 }, { "epoch": 6.6236362122095445, "grad_norm": 0.78515625, "learning_rate": 4.051043427119346e-06, "loss": 4.0001, "step": 19884 }, { "epoch": 6.623969351211793, "grad_norm": 0.73828125, "learning_rate": 4.050324673135169e-06, "loss": 3.9904, "step": 19885 }, { "epoch": 6.624302490214042, "grad_norm": 0.78125, "learning_rate": 4.049605959332244e-06, "loss": 3.918, "step": 19886 }, { "epoch": 6.62463562921629, "grad_norm": 0.7421875, "learning_rate": 4.048887285718932e-06, "loss": 3.9894, "step": 19887 }, { "epoch": 6.624968768218539, "grad_norm": 0.80078125, "learning_rate": 4.048168652303606e-06, "loss": 4.0068, "step": 19888 }, { "epoch": 6.625301907220788, "grad_norm": 0.734375, "learning_rate": 4.0474500590946415e-06, "loss": 3.9588, "step": 19889 }, { "epoch": 6.625635046223037, "grad_norm": 0.74609375, "learning_rate": 4.046731506100404e-06, "loss": 3.8871, "step": 19890 }, { "epoch": 6.625968185225285, "grad_norm": 0.75390625, "learning_rate": 4.046012993329267e-06, "loss": 3.9425, "step": 19891 }, { "epoch": 6.6263013242275335, "grad_norm": 0.78515625, "learning_rate": 4.045294520789593e-06, "loss": 3.9685, "step": 19892 }, { "epoch": 6.626634463229783, "grad_norm": 0.7890625, "learning_rate": 4.0445760884897585e-06, "loss": 4.0213, "step": 19893 }, { "epoch": 6.626967602232031, "grad_norm": 0.78125, "learning_rate": 4.043857696438128e-06, "loss": 4.0457, "step": 19894 }, { "epoch": 6.62730074123428, "grad_norm": 0.75390625, "learning_rate": 4.043139344643069e-06, "loss": 4.0404, "step": 19895 }, { "epoch": 6.627633880236528, "grad_norm": 0.765625, "learning_rate": 4.0424210331129485e-06, "loss": 4.0001, "step": 19896 }, { "epoch": 6.627967019238778, "grad_norm": 0.75390625, "learning_rate": 4.041702761856135e-06, "loss": 4.0201, "step": 19897 }, { "epoch": 6.628300158241026, "grad_norm": 0.765625, "learning_rate": 4.040984530880994e-06, "loss": 3.9942, "step": 19898 }, { "epoch": 6.628633297243275, "grad_norm": 0.78125, "learning_rate": 4.040266340195887e-06, "loss": 3.9371, "step": 19899 }, { "epoch": 6.628966436245523, "grad_norm": 0.78125, "learning_rate": 4.039548189809186e-06, "loss": 3.9828, "step": 19900 }, { "epoch": 6.629299575247773, "grad_norm": 0.73828125, "learning_rate": 4.0388300797292544e-06, "loss": 3.9974, "step": 19901 }, { "epoch": 6.629632714250021, "grad_norm": 0.76171875, "learning_rate": 4.038112009964456e-06, "loss": 3.9914, "step": 19902 }, { "epoch": 6.629965853252269, "grad_norm": 0.796875, "learning_rate": 4.037393980523149e-06, "loss": 3.9937, "step": 19903 }, { "epoch": 6.630298992254518, "grad_norm": 0.7890625, "learning_rate": 4.03667599141371e-06, "loss": 4.0243, "step": 19904 }, { "epoch": 6.630632131256767, "grad_norm": 0.7578125, "learning_rate": 4.0359580426444905e-06, "loss": 4.0896, "step": 19905 }, { "epoch": 6.630965270259016, "grad_norm": 0.7734375, "learning_rate": 4.035240134223854e-06, "loss": 4.0083, "step": 19906 }, { "epoch": 6.631298409261264, "grad_norm": 0.73046875, "learning_rate": 4.034522266160167e-06, "loss": 4.0129, "step": 19907 }, { "epoch": 6.631631548263513, "grad_norm": 0.75390625, "learning_rate": 4.033804438461792e-06, "loss": 4.006, "step": 19908 }, { "epoch": 6.631964687265762, "grad_norm": 0.78125, "learning_rate": 4.033086651137085e-06, "loss": 3.9723, "step": 19909 }, { "epoch": 6.63229782626801, "grad_norm": 0.7421875, "learning_rate": 4.032368904194408e-06, "loss": 3.9747, "step": 19910 }, { "epoch": 6.632630965270259, "grad_norm": 0.78125, "learning_rate": 4.031651197642124e-06, "loss": 4.0176, "step": 19911 }, { "epoch": 6.632964104272507, "grad_norm": 0.76953125, "learning_rate": 4.030933531488593e-06, "loss": 4.0075, "step": 19912 }, { "epoch": 6.633297243274757, "grad_norm": 0.78125, "learning_rate": 4.030215905742173e-06, "loss": 3.9105, "step": 19913 }, { "epoch": 6.633630382277005, "grad_norm": 0.71875, "learning_rate": 4.029498320411217e-06, "loss": 3.954, "step": 19914 }, { "epoch": 6.633963521279254, "grad_norm": 0.7890625, "learning_rate": 4.028780775504092e-06, "loss": 4.0359, "step": 19915 }, { "epoch": 6.634296660281502, "grad_norm": 0.76171875, "learning_rate": 4.028063271029152e-06, "loss": 4.0027, "step": 19916 }, { "epoch": 6.6346297992837515, "grad_norm": 0.7265625, "learning_rate": 4.0273458069947545e-06, "loss": 4.0067, "step": 19917 }, { "epoch": 6.634962938286, "grad_norm": 0.734375, "learning_rate": 4.026628383409254e-06, "loss": 4.0468, "step": 19918 }, { "epoch": 6.635296077288249, "grad_norm": 0.7421875, "learning_rate": 4.025911000281012e-06, "loss": 3.9938, "step": 19919 }, { "epoch": 6.635629216290497, "grad_norm": 0.8203125, "learning_rate": 4.0251936576183825e-06, "loss": 4.04, "step": 19920 }, { "epoch": 6.635962355292746, "grad_norm": 0.75390625, "learning_rate": 4.02447635542972e-06, "loss": 4.0149, "step": 19921 }, { "epoch": 6.636295494294995, "grad_norm": 0.78515625, "learning_rate": 4.023759093723381e-06, "loss": 3.9775, "step": 19922 }, { "epoch": 6.636628633297243, "grad_norm": 0.7578125, "learning_rate": 4.023041872507718e-06, "loss": 4.082, "step": 19923 }, { "epoch": 6.636961772299492, "grad_norm": 0.75390625, "learning_rate": 4.022324691791087e-06, "loss": 4.0361, "step": 19924 }, { "epoch": 6.6372949113017405, "grad_norm": 0.74609375, "learning_rate": 4.021607551581837e-06, "loss": 3.9365, "step": 19925 }, { "epoch": 6.63762805030399, "grad_norm": 0.78125, "learning_rate": 4.020890451888328e-06, "loss": 3.985, "step": 19926 }, { "epoch": 6.637961189306238, "grad_norm": 0.78515625, "learning_rate": 4.020173392718908e-06, "loss": 3.9796, "step": 19927 }, { "epoch": 6.638294328308486, "grad_norm": 0.78125, "learning_rate": 4.019456374081932e-06, "loss": 3.9922, "step": 19928 }, { "epoch": 6.6386274673107355, "grad_norm": 0.74609375, "learning_rate": 4.018739395985749e-06, "loss": 4.032, "step": 19929 }, { "epoch": 6.638960606312984, "grad_norm": 0.75390625, "learning_rate": 4.018022458438716e-06, "loss": 4.0133, "step": 19930 }, { "epoch": 6.639293745315233, "grad_norm": 0.765625, "learning_rate": 4.017305561449176e-06, "loss": 4.0227, "step": 19931 }, { "epoch": 6.639626884317481, "grad_norm": 0.80078125, "learning_rate": 4.016588705025481e-06, "loss": 3.9664, "step": 19932 }, { "epoch": 6.6399600233197305, "grad_norm": 0.70703125, "learning_rate": 4.015871889175984e-06, "loss": 3.9385, "step": 19933 }, { "epoch": 6.640293162321979, "grad_norm": 0.765625, "learning_rate": 4.015155113909034e-06, "loss": 3.9975, "step": 19934 }, { "epoch": 6.640626301324227, "grad_norm": 0.765625, "learning_rate": 4.0144383792329786e-06, "loss": 4.0, "step": 19935 }, { "epoch": 6.640959440326476, "grad_norm": 0.7421875, "learning_rate": 4.013721685156163e-06, "loss": 3.9727, "step": 19936 }, { "epoch": 6.641292579328725, "grad_norm": 0.765625, "learning_rate": 4.013005031686942e-06, "loss": 4.0547, "step": 19937 }, { "epoch": 6.641625718330974, "grad_norm": 0.78515625, "learning_rate": 4.012288418833659e-06, "loss": 3.9185, "step": 19938 }, { "epoch": 6.641958857333222, "grad_norm": 0.75390625, "learning_rate": 4.011571846604662e-06, "loss": 4.0699, "step": 19939 }, { "epoch": 6.642291996335471, "grad_norm": 0.7265625, "learning_rate": 4.010855315008297e-06, "loss": 4.0205, "step": 19940 }, { "epoch": 6.6426251353377195, "grad_norm": 0.7421875, "learning_rate": 4.010138824052909e-06, "loss": 4.0492, "step": 19941 }, { "epoch": 6.642958274339969, "grad_norm": 0.828125, "learning_rate": 4.009422373746846e-06, "loss": 3.9681, "step": 19942 }, { "epoch": 6.643291413342217, "grad_norm": 0.8203125, "learning_rate": 4.008705964098449e-06, "loss": 3.9415, "step": 19943 }, { "epoch": 6.643624552344466, "grad_norm": 0.74609375, "learning_rate": 4.007989595116067e-06, "loss": 3.984, "step": 19944 }, { "epoch": 6.643957691346714, "grad_norm": 0.73046875, "learning_rate": 4.007273266808044e-06, "loss": 3.9651, "step": 19945 }, { "epoch": 6.644290830348963, "grad_norm": 0.7265625, "learning_rate": 4.006556979182722e-06, "loss": 4.0096, "step": 19946 }, { "epoch": 6.644623969351212, "grad_norm": 0.734375, "learning_rate": 4.0058407322484405e-06, "loss": 4.0226, "step": 19947 }, { "epoch": 6.64495710835346, "grad_norm": 0.81640625, "learning_rate": 4.005124526013553e-06, "loss": 3.9481, "step": 19948 }, { "epoch": 6.645290247355709, "grad_norm": 0.76953125, "learning_rate": 4.004408360486391e-06, "loss": 4.0152, "step": 19949 }, { "epoch": 6.645623386357958, "grad_norm": 0.80859375, "learning_rate": 4.003692235675298e-06, "loss": 3.9459, "step": 19950 }, { "epoch": 6.645956525360207, "grad_norm": 0.78125, "learning_rate": 4.00297615158862e-06, "loss": 3.9335, "step": 19951 }, { "epoch": 6.646289664362455, "grad_norm": 0.8125, "learning_rate": 4.0022601082346945e-06, "loss": 3.9136, "step": 19952 }, { "epoch": 6.6466228033647035, "grad_norm": 0.734375, "learning_rate": 4.001544105621864e-06, "loss": 4.046, "step": 19953 }, { "epoch": 6.646955942366953, "grad_norm": 0.7734375, "learning_rate": 4.000828143758464e-06, "loss": 3.9489, "step": 19954 }, { "epoch": 6.647289081369201, "grad_norm": 0.7109375, "learning_rate": 4.000112222652839e-06, "loss": 4.0236, "step": 19955 }, { "epoch": 6.64762222037145, "grad_norm": 0.74609375, "learning_rate": 3.999396342313327e-06, "loss": 4.0381, "step": 19956 }, { "epoch": 6.647955359373698, "grad_norm": 0.74609375, "learning_rate": 3.998680502748267e-06, "loss": 3.9598, "step": 19957 }, { "epoch": 6.648288498375948, "grad_norm": 0.7578125, "learning_rate": 3.99796470396599e-06, "loss": 3.975, "step": 19958 }, { "epoch": 6.648621637378196, "grad_norm": 0.74609375, "learning_rate": 3.997248945974842e-06, "loss": 4.0221, "step": 19959 }, { "epoch": 6.648954776380445, "grad_norm": 0.7421875, "learning_rate": 3.996533228783157e-06, "loss": 4.0107, "step": 19960 }, { "epoch": 6.649287915382693, "grad_norm": 0.73046875, "learning_rate": 3.995817552399271e-06, "loss": 3.9688, "step": 19961 }, { "epoch": 6.6496210543849426, "grad_norm": 0.76171875, "learning_rate": 3.995101916831518e-06, "loss": 4.0027, "step": 19962 }, { "epoch": 6.649954193387191, "grad_norm": 0.75390625, "learning_rate": 3.9943863220882375e-06, "loss": 4.038, "step": 19963 }, { "epoch": 6.650287332389439, "grad_norm": 0.7734375, "learning_rate": 3.993670768177765e-06, "loss": 3.9609, "step": 19964 }, { "epoch": 6.650620471391688, "grad_norm": 0.796875, "learning_rate": 3.9929552551084335e-06, "loss": 3.9652, "step": 19965 }, { "epoch": 6.650953610393937, "grad_norm": 0.80859375, "learning_rate": 3.992239782888575e-06, "loss": 4.0026, "step": 19966 }, { "epoch": 6.651286749396186, "grad_norm": 0.78515625, "learning_rate": 3.991524351526527e-06, "loss": 3.9656, "step": 19967 }, { "epoch": 6.651619888398434, "grad_norm": 0.75, "learning_rate": 3.99080896103062e-06, "loss": 3.9588, "step": 19968 }, { "epoch": 6.651953027400683, "grad_norm": 0.796875, "learning_rate": 3.990093611409185e-06, "loss": 3.9714, "step": 19969 }, { "epoch": 6.652286166402932, "grad_norm": 0.75390625, "learning_rate": 3.98937830267056e-06, "loss": 3.9387, "step": 19970 }, { "epoch": 6.65261930540518, "grad_norm": 0.74609375, "learning_rate": 3.988663034823074e-06, "loss": 3.9897, "step": 19971 }, { "epoch": 6.652952444407429, "grad_norm": 0.74609375, "learning_rate": 3.987947807875058e-06, "loss": 3.9816, "step": 19972 }, { "epoch": 6.653285583409677, "grad_norm": 0.72265625, "learning_rate": 3.987232621834839e-06, "loss": 3.9407, "step": 19973 }, { "epoch": 6.6536187224119265, "grad_norm": 0.796875, "learning_rate": 3.986517476710756e-06, "loss": 3.9931, "step": 19974 }, { "epoch": 6.653951861414175, "grad_norm": 0.765625, "learning_rate": 3.985802372511136e-06, "loss": 4.0076, "step": 19975 }, { "epoch": 6.654285000416424, "grad_norm": 0.75, "learning_rate": 3.985087309244301e-06, "loss": 4.0075, "step": 19976 }, { "epoch": 6.654618139418672, "grad_norm": 0.7734375, "learning_rate": 3.9843722869185886e-06, "loss": 3.9616, "step": 19977 }, { "epoch": 6.6549512784209215, "grad_norm": 0.73046875, "learning_rate": 3.983657305542324e-06, "loss": 3.904, "step": 19978 }, { "epoch": 6.65528441742317, "grad_norm": 0.78125, "learning_rate": 3.982942365123836e-06, "loss": 3.8995, "step": 19979 }, { "epoch": 6.655617556425419, "grad_norm": 0.7421875, "learning_rate": 3.982227465671448e-06, "loss": 4.0233, "step": 19980 }, { "epoch": 6.655950695427667, "grad_norm": 0.796875, "learning_rate": 3.981512607193494e-06, "loss": 3.9616, "step": 19981 }, { "epoch": 6.656283834429916, "grad_norm": 0.7890625, "learning_rate": 3.980797789698297e-06, "loss": 4.0201, "step": 19982 }, { "epoch": 6.656616973432165, "grad_norm": 0.84765625, "learning_rate": 3.980083013194182e-06, "loss": 3.9371, "step": 19983 }, { "epoch": 6.656950112434413, "grad_norm": 0.80078125, "learning_rate": 3.979368277689477e-06, "loss": 4.0219, "step": 19984 }, { "epoch": 6.657283251436662, "grad_norm": 0.765625, "learning_rate": 3.978653583192506e-06, "loss": 3.9495, "step": 19985 }, { "epoch": 6.6576163904389105, "grad_norm": 0.77734375, "learning_rate": 3.977938929711592e-06, "loss": 3.9769, "step": 19986 }, { "epoch": 6.65794952944116, "grad_norm": 0.82421875, "learning_rate": 3.977224317255059e-06, "loss": 3.9515, "step": 19987 }, { "epoch": 6.658282668443408, "grad_norm": 0.79296875, "learning_rate": 3.976509745831236e-06, "loss": 3.9617, "step": 19988 }, { "epoch": 6.658615807445656, "grad_norm": 0.83984375, "learning_rate": 3.975795215448441e-06, "loss": 3.9602, "step": 19989 }, { "epoch": 6.6589489464479055, "grad_norm": 0.75, "learning_rate": 3.975080726115e-06, "loss": 4.0394, "step": 19990 }, { "epoch": 6.659282085450154, "grad_norm": 0.765625, "learning_rate": 3.974366277839229e-06, "loss": 3.9678, "step": 19991 }, { "epoch": 6.659615224452403, "grad_norm": 0.828125, "learning_rate": 3.973651870629458e-06, "loss": 3.9161, "step": 19992 }, { "epoch": 6.659948363454651, "grad_norm": 0.78125, "learning_rate": 3.972937504494008e-06, "loss": 4.0908, "step": 19993 }, { "epoch": 6.6602815024569, "grad_norm": 0.7578125, "learning_rate": 3.97222317944119e-06, "loss": 3.9656, "step": 19994 }, { "epoch": 6.660614641459149, "grad_norm": 0.765625, "learning_rate": 3.971508895479334e-06, "loss": 3.8734, "step": 19995 }, { "epoch": 6.660947780461398, "grad_norm": 0.79296875, "learning_rate": 3.970794652616757e-06, "loss": 3.9818, "step": 19996 }, { "epoch": 6.661280919463646, "grad_norm": 0.78125, "learning_rate": 3.970080450861777e-06, "loss": 3.9175, "step": 19997 }, { "epoch": 6.661614058465895, "grad_norm": 0.78515625, "learning_rate": 3.969366290222711e-06, "loss": 3.9818, "step": 19998 }, { "epoch": 6.661947197468144, "grad_norm": 0.7890625, "learning_rate": 3.968652170707885e-06, "loss": 3.9833, "step": 19999 }, { "epoch": 6.662280336470392, "grad_norm": 0.76953125, "learning_rate": 3.967938092325611e-06, "loss": 4.0095, "step": 20000 }, { "epoch": 6.662613475472641, "grad_norm": 0.73046875, "learning_rate": 3.967224055084208e-06, "loss": 4.0161, "step": 20001 }, { "epoch": 6.6629466144748895, "grad_norm": 0.76953125, "learning_rate": 3.966510058991993e-06, "loss": 3.9253, "step": 20002 }, { "epoch": 6.663279753477139, "grad_norm": 0.828125, "learning_rate": 3.965796104057282e-06, "loss": 3.9941, "step": 20003 }, { "epoch": 6.663612892479387, "grad_norm": 0.859375, "learning_rate": 3.965082190288391e-06, "loss": 3.9944, "step": 20004 }, { "epoch": 6.663946031481636, "grad_norm": 0.8203125, "learning_rate": 3.964368317693638e-06, "loss": 4.0002, "step": 20005 }, { "epoch": 6.664279170483884, "grad_norm": 0.76171875, "learning_rate": 3.96365448628133e-06, "loss": 4.05, "step": 20006 }, { "epoch": 6.664612309486133, "grad_norm": 0.74609375, "learning_rate": 3.962940696059792e-06, "loss": 4.0051, "step": 20007 }, { "epoch": 6.664945448488382, "grad_norm": 0.7734375, "learning_rate": 3.962226947037333e-06, "loss": 4.0112, "step": 20008 }, { "epoch": 6.66527858749063, "grad_norm": 0.76171875, "learning_rate": 3.961513239222269e-06, "loss": 4.0366, "step": 20009 }, { "epoch": 6.665611726492879, "grad_norm": 0.80078125, "learning_rate": 3.960799572622907e-06, "loss": 4.035, "step": 20010 }, { "epoch": 6.665944865495128, "grad_norm": 0.828125, "learning_rate": 3.9600859472475695e-06, "loss": 3.955, "step": 20011 }, { "epoch": 6.666278004497377, "grad_norm": 0.78515625, "learning_rate": 3.959372363104561e-06, "loss": 3.9389, "step": 20012 }, { "epoch": 6.666611143499625, "grad_norm": 0.7265625, "learning_rate": 3.958658820202193e-06, "loss": 3.9404, "step": 20013 }, { "epoch": 6.666944282501873, "grad_norm": 0.734375, "learning_rate": 3.957945318548782e-06, "loss": 3.9839, "step": 20014 }, { "epoch": 6.667277421504123, "grad_norm": 0.76171875, "learning_rate": 3.957231858152635e-06, "loss": 3.9699, "step": 20015 }, { "epoch": 6.667610560506371, "grad_norm": 0.7890625, "learning_rate": 3.956518439022063e-06, "loss": 3.9807, "step": 20016 }, { "epoch": 6.66794369950862, "grad_norm": 0.76171875, "learning_rate": 3.955805061165375e-06, "loss": 4.0232, "step": 20017 }, { "epoch": 6.668276838510868, "grad_norm": 0.77734375, "learning_rate": 3.955091724590883e-06, "loss": 3.9798, "step": 20018 }, { "epoch": 6.668609977513118, "grad_norm": 0.77734375, "learning_rate": 3.954378429306895e-06, "loss": 3.985, "step": 20019 }, { "epoch": 6.668943116515366, "grad_norm": 0.7421875, "learning_rate": 3.953665175321717e-06, "loss": 3.982, "step": 20020 }, { "epoch": 6.669276255517615, "grad_norm": 0.76171875, "learning_rate": 3.952951962643659e-06, "loss": 3.9679, "step": 20021 }, { "epoch": 6.669609394519863, "grad_norm": 0.77734375, "learning_rate": 3.952238791281028e-06, "loss": 3.9126, "step": 20022 }, { "epoch": 6.6699425335221125, "grad_norm": 0.76171875, "learning_rate": 3.951525661242131e-06, "loss": 3.9349, "step": 20023 }, { "epoch": 6.670275672524361, "grad_norm": 0.7578125, "learning_rate": 3.95081257253527e-06, "loss": 4.0255, "step": 20024 }, { "epoch": 6.670608811526609, "grad_norm": 0.78515625, "learning_rate": 3.950099525168759e-06, "loss": 4.0024, "step": 20025 }, { "epoch": 6.670941950528858, "grad_norm": 0.78515625, "learning_rate": 3.949386519150899e-06, "loss": 4.0081, "step": 20026 }, { "epoch": 6.671275089531107, "grad_norm": 0.7890625, "learning_rate": 3.948673554489996e-06, "loss": 3.9952, "step": 20027 }, { "epoch": 6.671608228533356, "grad_norm": 0.765625, "learning_rate": 3.94796063119435e-06, "loss": 4.0599, "step": 20028 }, { "epoch": 6.671941367535604, "grad_norm": 0.78515625, "learning_rate": 3.947247749272276e-06, "loss": 3.9665, "step": 20029 }, { "epoch": 6.672274506537853, "grad_norm": 0.7421875, "learning_rate": 3.946534908732067e-06, "loss": 3.9483, "step": 20030 }, { "epoch": 6.6726076455401016, "grad_norm": 0.76953125, "learning_rate": 3.945822109582026e-06, "loss": 4.0087, "step": 20031 }, { "epoch": 6.67294078454235, "grad_norm": 0.765625, "learning_rate": 3.945109351830463e-06, "loss": 3.9856, "step": 20032 }, { "epoch": 6.673273923544599, "grad_norm": 0.7421875, "learning_rate": 3.944396635485676e-06, "loss": 3.946, "step": 20033 }, { "epoch": 6.673607062546847, "grad_norm": 0.765625, "learning_rate": 3.943683960555966e-06, "loss": 4.0163, "step": 20034 }, { "epoch": 6.6739402015490965, "grad_norm": 0.76171875, "learning_rate": 3.942971327049632e-06, "loss": 3.9564, "step": 20035 }, { "epoch": 6.674273340551345, "grad_norm": 0.78515625, "learning_rate": 3.942258734974981e-06, "loss": 3.9325, "step": 20036 }, { "epoch": 6.674606479553594, "grad_norm": 0.77734375, "learning_rate": 3.941546184340309e-06, "loss": 3.9853, "step": 20037 }, { "epoch": 6.674939618555842, "grad_norm": 0.79296875, "learning_rate": 3.9408336751539185e-06, "loss": 3.9522, "step": 20038 }, { "epoch": 6.6752727575580915, "grad_norm": 0.76171875, "learning_rate": 3.9401212074241045e-06, "loss": 4.0086, "step": 20039 }, { "epoch": 6.67560589656034, "grad_norm": 0.76953125, "learning_rate": 3.939408781159167e-06, "loss": 4.0161, "step": 20040 }, { "epoch": 6.675939035562589, "grad_norm": 0.765625, "learning_rate": 3.938696396367407e-06, "loss": 4.0277, "step": 20041 }, { "epoch": 6.676272174564837, "grad_norm": 0.7890625, "learning_rate": 3.937984053057116e-06, "loss": 3.9853, "step": 20042 }, { "epoch": 6.6766053135670855, "grad_norm": 0.76171875, "learning_rate": 3.937271751236599e-06, "loss": 3.9555, "step": 20043 }, { "epoch": 6.676938452569335, "grad_norm": 0.7421875, "learning_rate": 3.936559490914149e-06, "loss": 3.9796, "step": 20044 }, { "epoch": 6.677271591571583, "grad_norm": 0.80078125, "learning_rate": 3.935847272098063e-06, "loss": 3.9627, "step": 20045 }, { "epoch": 6.677604730573832, "grad_norm": 0.78515625, "learning_rate": 3.935135094796632e-06, "loss": 4.0332, "step": 20046 }, { "epoch": 6.6779378695760805, "grad_norm": 0.77734375, "learning_rate": 3.934422959018163e-06, "loss": 4.0316, "step": 20047 }, { "epoch": 6.67827100857833, "grad_norm": 0.7578125, "learning_rate": 3.933710864770941e-06, "loss": 4.0649, "step": 20048 }, { "epoch": 6.678604147580578, "grad_norm": 0.796875, "learning_rate": 3.932998812063261e-06, "loss": 3.9755, "step": 20049 }, { "epoch": 6.678937286582826, "grad_norm": 0.74609375, "learning_rate": 3.932286800903417e-06, "loss": 4.011, "step": 20050 }, { "epoch": 6.679270425585075, "grad_norm": 0.78125, "learning_rate": 3.931574831299707e-06, "loss": 3.9376, "step": 20051 }, { "epoch": 6.679603564587324, "grad_norm": 0.76171875, "learning_rate": 3.930862903260421e-06, "loss": 4.0006, "step": 20052 }, { "epoch": 6.679936703589573, "grad_norm": 0.796875, "learning_rate": 3.930151016793851e-06, "loss": 4.0081, "step": 20053 }, { "epoch": 6.680269842591821, "grad_norm": 0.76171875, "learning_rate": 3.929439171908287e-06, "loss": 3.9331, "step": 20054 }, { "epoch": 6.68060298159407, "grad_norm": 0.7578125, "learning_rate": 3.928727368612025e-06, "loss": 4.0107, "step": 20055 }, { "epoch": 6.680936120596319, "grad_norm": 0.7578125, "learning_rate": 3.928015606913357e-06, "loss": 3.9592, "step": 20056 }, { "epoch": 6.681269259598568, "grad_norm": 0.78125, "learning_rate": 3.927303886820564e-06, "loss": 3.9344, "step": 20057 }, { "epoch": 6.681602398600816, "grad_norm": 0.73046875, "learning_rate": 3.9265922083419475e-06, "loss": 3.9814, "step": 20058 }, { "epoch": 6.681935537603065, "grad_norm": 0.765625, "learning_rate": 3.925880571485789e-06, "loss": 4.0177, "step": 20059 }, { "epoch": 6.682268676605314, "grad_norm": 0.81640625, "learning_rate": 3.9251689762603816e-06, "loss": 4.0122, "step": 20060 }, { "epoch": 6.682601815607562, "grad_norm": 0.76171875, "learning_rate": 3.924457422674009e-06, "loss": 3.955, "step": 20061 }, { "epoch": 6.682934954609811, "grad_norm": 0.734375, "learning_rate": 3.923745910734968e-06, "loss": 3.9873, "step": 20062 }, { "epoch": 6.683268093612059, "grad_norm": 0.78125, "learning_rate": 3.923034440451538e-06, "loss": 4.0068, "step": 20063 }, { "epoch": 6.683601232614309, "grad_norm": 0.76953125, "learning_rate": 3.92232301183201e-06, "loss": 4.0703, "step": 20064 }, { "epoch": 6.683934371616557, "grad_norm": 0.78515625, "learning_rate": 3.92161162488467e-06, "loss": 3.982, "step": 20065 }, { "epoch": 6.684267510618806, "grad_norm": 0.80078125, "learning_rate": 3.9209002796178045e-06, "loss": 4.0059, "step": 20066 }, { "epoch": 6.684600649621054, "grad_norm": 0.79296875, "learning_rate": 3.920188976039697e-06, "loss": 4.0077, "step": 20067 }, { "epoch": 6.684933788623303, "grad_norm": 0.76953125, "learning_rate": 3.919477714158632e-06, "loss": 3.9483, "step": 20068 }, { "epoch": 6.685266927625552, "grad_norm": 0.765625, "learning_rate": 3.9187664939828995e-06, "loss": 4.0791, "step": 20069 }, { "epoch": 6.6856000666278, "grad_norm": 0.81640625, "learning_rate": 3.918055315520779e-06, "loss": 3.9606, "step": 20070 }, { "epoch": 6.685933205630049, "grad_norm": 0.7734375, "learning_rate": 3.917344178780557e-06, "loss": 3.9891, "step": 20071 }, { "epoch": 6.686266344632298, "grad_norm": 0.78125, "learning_rate": 3.916633083770512e-06, "loss": 4.0322, "step": 20072 }, { "epoch": 6.686599483634547, "grad_norm": 0.7890625, "learning_rate": 3.915922030498933e-06, "loss": 4.0068, "step": 20073 }, { "epoch": 6.686932622636795, "grad_norm": 0.76953125, "learning_rate": 3.915211018974102e-06, "loss": 4.0037, "step": 20074 }, { "epoch": 6.687265761639043, "grad_norm": 0.76953125, "learning_rate": 3.914500049204291e-06, "loss": 3.9859, "step": 20075 }, { "epoch": 6.687598900641293, "grad_norm": 0.828125, "learning_rate": 3.913789121197793e-06, "loss": 3.9626, "step": 20076 }, { "epoch": 6.687932039643541, "grad_norm": 0.8046875, "learning_rate": 3.913078234962883e-06, "loss": 4.0657, "step": 20077 }, { "epoch": 6.68826517864579, "grad_norm": 0.796875, "learning_rate": 3.9123673905078425e-06, "loss": 4.034, "step": 20078 }, { "epoch": 6.688598317648038, "grad_norm": 0.734375, "learning_rate": 3.911656587840947e-06, "loss": 4.0308, "step": 20079 }, { "epoch": 6.6889314566502875, "grad_norm": 0.75390625, "learning_rate": 3.910945826970485e-06, "loss": 3.9681, "step": 20080 }, { "epoch": 6.689264595652536, "grad_norm": 0.76171875, "learning_rate": 3.910235107904729e-06, "loss": 4.0038, "step": 20081 }, { "epoch": 6.689597734654785, "grad_norm": 0.8046875, "learning_rate": 3.909524430651959e-06, "loss": 4.0187, "step": 20082 }, { "epoch": 6.689930873657033, "grad_norm": 0.84765625, "learning_rate": 3.908813795220452e-06, "loss": 3.9728, "step": 20083 }, { "epoch": 6.6902640126592825, "grad_norm": 0.8359375, "learning_rate": 3.908103201618487e-06, "loss": 3.9162, "step": 20084 }, { "epoch": 6.690597151661531, "grad_norm": 0.765625, "learning_rate": 3.907392649854339e-06, "loss": 3.9825, "step": 20085 }, { "epoch": 6.690930290663779, "grad_norm": 0.76171875, "learning_rate": 3.906682139936281e-06, "loss": 3.9974, "step": 20086 }, { "epoch": 6.691263429666028, "grad_norm": 0.796875, "learning_rate": 3.905971671872597e-06, "loss": 3.994, "step": 20087 }, { "epoch": 6.691596568668277, "grad_norm": 0.7421875, "learning_rate": 3.905261245671559e-06, "loss": 4.0169, "step": 20088 }, { "epoch": 6.691929707670526, "grad_norm": 0.78125, "learning_rate": 3.90455086134144e-06, "loss": 3.9315, "step": 20089 }, { "epoch": 6.692262846672774, "grad_norm": 0.76171875, "learning_rate": 3.903840518890513e-06, "loss": 3.9974, "step": 20090 }, { "epoch": 6.692595985675023, "grad_norm": 0.75, "learning_rate": 3.903130218327058e-06, "loss": 3.8983, "step": 20091 }, { "epoch": 6.6929291246772715, "grad_norm": 0.76953125, "learning_rate": 3.902419959659348e-06, "loss": 3.9877, "step": 20092 }, { "epoch": 6.69326226367952, "grad_norm": 0.7734375, "learning_rate": 3.901709742895647e-06, "loss": 4.0532, "step": 20093 }, { "epoch": 6.693595402681769, "grad_norm": 0.80078125, "learning_rate": 3.900999568044238e-06, "loss": 4.066, "step": 20094 }, { "epoch": 6.693928541684017, "grad_norm": 0.81640625, "learning_rate": 3.900289435113387e-06, "loss": 4.0827, "step": 20095 }, { "epoch": 6.6942616806862665, "grad_norm": 0.80078125, "learning_rate": 3.8995793441113666e-06, "loss": 3.9732, "step": 20096 }, { "epoch": 6.694594819688515, "grad_norm": 0.7734375, "learning_rate": 3.898869295046449e-06, "loss": 3.94, "step": 20097 }, { "epoch": 6.694927958690764, "grad_norm": 0.765625, "learning_rate": 3.898159287926902e-06, "loss": 4.0598, "step": 20098 }, { "epoch": 6.695261097693012, "grad_norm": 0.78515625, "learning_rate": 3.897449322760999e-06, "loss": 3.9877, "step": 20099 }, { "epoch": 6.695594236695261, "grad_norm": 0.73828125, "learning_rate": 3.8967393995570095e-06, "loss": 3.9881, "step": 20100 }, { "epoch": 6.69592737569751, "grad_norm": 0.8046875, "learning_rate": 3.896029518323202e-06, "loss": 3.9009, "step": 20101 }, { "epoch": 6.696260514699759, "grad_norm": 0.75390625, "learning_rate": 3.895319679067843e-06, "loss": 3.943, "step": 20102 }, { "epoch": 6.696593653702007, "grad_norm": 0.75, "learning_rate": 3.894609881799202e-06, "loss": 3.9418, "step": 20103 }, { "epoch": 6.6969267927042555, "grad_norm": 0.73046875, "learning_rate": 3.8939001265255474e-06, "loss": 4.0004, "step": 20104 }, { "epoch": 6.697259931706505, "grad_norm": 0.76953125, "learning_rate": 3.8931904132551425e-06, "loss": 4.0701, "step": 20105 }, { "epoch": 6.697593070708753, "grad_norm": 0.80078125, "learning_rate": 3.892480741996261e-06, "loss": 3.8372, "step": 20106 }, { "epoch": 6.697926209711002, "grad_norm": 0.78515625, "learning_rate": 3.891771112757163e-06, "loss": 3.9907, "step": 20107 }, { "epoch": 6.6982593487132505, "grad_norm": 0.8046875, "learning_rate": 3.891061525546118e-06, "loss": 4.0082, "step": 20108 }, { "epoch": 6.6985924877155, "grad_norm": 0.78125, "learning_rate": 3.890351980371384e-06, "loss": 3.9382, "step": 20109 }, { "epoch": 6.698925626717748, "grad_norm": 0.77734375, "learning_rate": 3.889642477241238e-06, "loss": 3.976, "step": 20110 }, { "epoch": 6.699258765719996, "grad_norm": 0.78515625, "learning_rate": 3.888933016163934e-06, "loss": 3.9297, "step": 20111 }, { "epoch": 6.699591904722245, "grad_norm": 0.73046875, "learning_rate": 3.888223597147736e-06, "loss": 4.1079, "step": 20112 }, { "epoch": 6.699925043724494, "grad_norm": 0.7578125, "learning_rate": 3.887514220200914e-06, "loss": 3.905, "step": 20113 }, { "epoch": 6.700258182726743, "grad_norm": 0.74609375, "learning_rate": 3.886804885331725e-06, "loss": 4.0295, "step": 20114 }, { "epoch": 6.700591321728991, "grad_norm": 0.75390625, "learning_rate": 3.8860955925484336e-06, "loss": 3.9564, "step": 20115 }, { "epoch": 6.70092446073124, "grad_norm": 0.7890625, "learning_rate": 3.8853863418592976e-06, "loss": 3.9197, "step": 20116 }, { "epoch": 6.701257599733489, "grad_norm": 0.74609375, "learning_rate": 3.884677133272585e-06, "loss": 3.9465, "step": 20117 }, { "epoch": 6.701590738735738, "grad_norm": 0.80078125, "learning_rate": 3.883967966796553e-06, "loss": 3.9335, "step": 20118 }, { "epoch": 6.701923877737986, "grad_norm": 0.76171875, "learning_rate": 3.883258842439462e-06, "loss": 4.0424, "step": 20119 }, { "epoch": 6.702257016740235, "grad_norm": 0.7890625, "learning_rate": 3.882549760209571e-06, "loss": 3.9693, "step": 20120 }, { "epoch": 6.702590155742484, "grad_norm": 0.80078125, "learning_rate": 3.881840720115141e-06, "loss": 3.9774, "step": 20121 }, { "epoch": 6.702923294744732, "grad_norm": 0.765625, "learning_rate": 3.881131722164428e-06, "loss": 4.0023, "step": 20122 }, { "epoch": 6.703256433746981, "grad_norm": 0.79296875, "learning_rate": 3.88042276636569e-06, "loss": 3.9662, "step": 20123 }, { "epoch": 6.703589572749229, "grad_norm": 0.734375, "learning_rate": 3.879713852727189e-06, "loss": 3.9851, "step": 20124 }, { "epoch": 6.703922711751479, "grad_norm": 0.8046875, "learning_rate": 3.879004981257181e-06, "loss": 3.99, "step": 20125 }, { "epoch": 6.704255850753727, "grad_norm": 0.77734375, "learning_rate": 3.878296151963922e-06, "loss": 3.9809, "step": 20126 }, { "epoch": 6.704588989755976, "grad_norm": 0.8046875, "learning_rate": 3.877587364855665e-06, "loss": 3.9861, "step": 20127 }, { "epoch": 6.704922128758224, "grad_norm": 0.75390625, "learning_rate": 3.876878619940675e-06, "loss": 4.0289, "step": 20128 }, { "epoch": 6.705255267760473, "grad_norm": 0.78515625, "learning_rate": 3.876169917227198e-06, "loss": 4.0012, "step": 20129 }, { "epoch": 6.705588406762722, "grad_norm": 0.8203125, "learning_rate": 3.87546125672349e-06, "loss": 3.9628, "step": 20130 }, { "epoch": 6.70592154576497, "grad_norm": 0.796875, "learning_rate": 3.87475263843781e-06, "loss": 4.0296, "step": 20131 }, { "epoch": 6.706254684767219, "grad_norm": 0.75, "learning_rate": 3.87404406237841e-06, "loss": 3.984, "step": 20132 }, { "epoch": 6.706587823769468, "grad_norm": 0.76171875, "learning_rate": 3.873335528553543e-06, "loss": 4.0391, "step": 20133 }, { "epoch": 6.706920962771717, "grad_norm": 0.74609375, "learning_rate": 3.872627036971458e-06, "loss": 3.972, "step": 20134 }, { "epoch": 6.707254101773965, "grad_norm": 0.80859375, "learning_rate": 3.871918587640414e-06, "loss": 3.9836, "step": 20135 }, { "epoch": 6.707587240776213, "grad_norm": 0.78125, "learning_rate": 3.871210180568661e-06, "loss": 4.046, "step": 20136 }, { "epoch": 6.707920379778463, "grad_norm": 0.859375, "learning_rate": 3.870501815764449e-06, "loss": 3.9229, "step": 20137 }, { "epoch": 6.708253518780712, "grad_norm": 0.80859375, "learning_rate": 3.86979349323603e-06, "loss": 4.0344, "step": 20138 }, { "epoch": 6.70858665778296, "grad_norm": 0.75, "learning_rate": 3.869085212991652e-06, "loss": 3.9917, "step": 20139 }, { "epoch": 6.708919796785208, "grad_norm": 0.84375, "learning_rate": 3.868376975039568e-06, "loss": 4.0195, "step": 20140 }, { "epoch": 6.7092529357874575, "grad_norm": 0.74609375, "learning_rate": 3.867668779388024e-06, "loss": 3.9923, "step": 20141 }, { "epoch": 6.709586074789706, "grad_norm": 0.7421875, "learning_rate": 3.866960626045272e-06, "loss": 4.0036, "step": 20142 }, { "epoch": 6.709919213791955, "grad_norm": 0.77734375, "learning_rate": 3.866252515019562e-06, "loss": 4.0107, "step": 20143 }, { "epoch": 6.710252352794203, "grad_norm": 0.78125, "learning_rate": 3.865544446319138e-06, "loss": 3.9458, "step": 20144 }, { "epoch": 6.7105854917964525, "grad_norm": 0.77734375, "learning_rate": 3.864836419952249e-06, "loss": 3.9442, "step": 20145 }, { "epoch": 6.710918630798701, "grad_norm": 0.78125, "learning_rate": 3.864128435927142e-06, "loss": 3.915, "step": 20146 }, { "epoch": 6.711251769800949, "grad_norm": 0.8671875, "learning_rate": 3.863420494252064e-06, "loss": 3.9427, "step": 20147 }, { "epoch": 6.711584908803198, "grad_norm": 0.75, "learning_rate": 3.86271259493526e-06, "loss": 3.94, "step": 20148 }, { "epoch": 6.7119180478054465, "grad_norm": 0.76953125, "learning_rate": 3.8620047379849724e-06, "loss": 4.0177, "step": 20149 }, { "epoch": 6.712251186807696, "grad_norm": 0.80078125, "learning_rate": 3.861296923409453e-06, "loss": 3.9077, "step": 20150 }, { "epoch": 6.712584325809944, "grad_norm": 0.75390625, "learning_rate": 3.860589151216943e-06, "loss": 4.0188, "step": 20151 }, { "epoch": 6.712917464812193, "grad_norm": 0.796875, "learning_rate": 3.859881421415687e-06, "loss": 3.9921, "step": 20152 }, { "epoch": 6.7132506038144415, "grad_norm": 0.79296875, "learning_rate": 3.859173734013925e-06, "loss": 4.0448, "step": 20153 }, { "epoch": 6.71358374281669, "grad_norm": 0.796875, "learning_rate": 3.858466089019906e-06, "loss": 4.0002, "step": 20154 }, { "epoch": 6.713916881818939, "grad_norm": 0.77734375, "learning_rate": 3.857758486441872e-06, "loss": 3.9905, "step": 20155 }, { "epoch": 6.714250020821187, "grad_norm": 0.76953125, "learning_rate": 3.857050926288056e-06, "loss": 3.9355, "step": 20156 }, { "epoch": 6.7145831598234365, "grad_norm": 0.75, "learning_rate": 3.85634340856671e-06, "loss": 3.9885, "step": 20157 }, { "epoch": 6.714916298825685, "grad_norm": 0.75390625, "learning_rate": 3.855635933286071e-06, "loss": 4.03, "step": 20158 }, { "epoch": 6.715249437827934, "grad_norm": 0.78125, "learning_rate": 3.8549285004543796e-06, "loss": 3.9948, "step": 20159 }, { "epoch": 6.715582576830182, "grad_norm": 0.72265625, "learning_rate": 3.8542211100798735e-06, "loss": 3.9573, "step": 20160 }, { "epoch": 6.715915715832431, "grad_norm": 0.77734375, "learning_rate": 3.853513762170797e-06, "loss": 4.0019, "step": 20161 }, { "epoch": 6.71624885483468, "grad_norm": 0.7578125, "learning_rate": 3.852806456735387e-06, "loss": 4.0, "step": 20162 }, { "epoch": 6.716581993836929, "grad_norm": 0.77734375, "learning_rate": 3.852099193781882e-06, "loss": 3.9301, "step": 20163 }, { "epoch": 6.716915132839177, "grad_norm": 0.7421875, "learning_rate": 3.8513919733185205e-06, "loss": 3.9835, "step": 20164 }, { "epoch": 6.7172482718414255, "grad_norm": 0.74609375, "learning_rate": 3.850684795353538e-06, "loss": 4.0014, "step": 20165 }, { "epoch": 6.717581410843675, "grad_norm": 0.71484375, "learning_rate": 3.849977659895176e-06, "loss": 4.0665, "step": 20166 }, { "epoch": 6.717914549845923, "grad_norm": 0.7578125, "learning_rate": 3.8492705669516634e-06, "loss": 4.0447, "step": 20167 }, { "epoch": 6.718247688848172, "grad_norm": 0.81640625, "learning_rate": 3.848563516531245e-06, "loss": 3.9798, "step": 20168 }, { "epoch": 6.71858082785042, "grad_norm": 0.76171875, "learning_rate": 3.847856508642152e-06, "loss": 4.0315, "step": 20169 }, { "epoch": 6.71891396685267, "grad_norm": 0.83203125, "learning_rate": 3.84714954329262e-06, "loss": 4.0186, "step": 20170 }, { "epoch": 6.719247105854918, "grad_norm": 0.81640625, "learning_rate": 3.846442620490882e-06, "loss": 4.018, "step": 20171 }, { "epoch": 6.719580244857166, "grad_norm": 0.765625, "learning_rate": 3.845735740245176e-06, "loss": 3.9417, "step": 20172 }, { "epoch": 6.719913383859415, "grad_norm": 0.76171875, "learning_rate": 3.8450289025637365e-06, "loss": 3.9496, "step": 20173 }, { "epoch": 6.720246522861664, "grad_norm": 0.875, "learning_rate": 3.844322107454788e-06, "loss": 3.975, "step": 20174 }, { "epoch": 6.720579661863913, "grad_norm": 0.76171875, "learning_rate": 3.8436153549265705e-06, "loss": 3.952, "step": 20175 }, { "epoch": 6.720912800866161, "grad_norm": 0.78125, "learning_rate": 3.842908644987316e-06, "loss": 3.9834, "step": 20176 }, { "epoch": 6.72124593986841, "grad_norm": 0.78515625, "learning_rate": 3.842201977645254e-06, "loss": 4.023, "step": 20177 }, { "epoch": 6.721579078870659, "grad_norm": 0.73046875, "learning_rate": 3.8414953529086136e-06, "loss": 4.0242, "step": 20178 }, { "epoch": 6.721912217872908, "grad_norm": 0.78515625, "learning_rate": 3.84078877078563e-06, "loss": 4.0037, "step": 20179 }, { "epoch": 6.722245356875156, "grad_norm": 0.77734375, "learning_rate": 3.840082231284532e-06, "loss": 3.9144, "step": 20180 }, { "epoch": 6.722578495877405, "grad_norm": 0.80859375, "learning_rate": 3.839375734413548e-06, "loss": 3.9314, "step": 20181 }, { "epoch": 6.722911634879654, "grad_norm": 0.8046875, "learning_rate": 3.838669280180908e-06, "loss": 4.0312, "step": 20182 }, { "epoch": 6.723244773881902, "grad_norm": 0.8125, "learning_rate": 3.8379628685948415e-06, "loss": 3.9956, "step": 20183 }, { "epoch": 6.723577912884151, "grad_norm": 0.74609375, "learning_rate": 3.8372564996635744e-06, "loss": 3.9773, "step": 20184 }, { "epoch": 6.723911051886399, "grad_norm": 0.7734375, "learning_rate": 3.836550173395334e-06, "loss": 3.9377, "step": 20185 }, { "epoch": 6.7242441908886486, "grad_norm": 0.81640625, "learning_rate": 3.835843889798351e-06, "loss": 3.9423, "step": 20186 }, { "epoch": 6.724577329890897, "grad_norm": 0.7265625, "learning_rate": 3.83513764888085e-06, "loss": 4.0489, "step": 20187 }, { "epoch": 6.724910468893146, "grad_norm": 0.74609375, "learning_rate": 3.834431450651058e-06, "loss": 3.958, "step": 20188 }, { "epoch": 6.725243607895394, "grad_norm": 0.75390625, "learning_rate": 3.8337252951171965e-06, "loss": 3.9655, "step": 20189 }, { "epoch": 6.725576746897643, "grad_norm": 0.796875, "learning_rate": 3.8330191822875e-06, "loss": 3.9731, "step": 20190 }, { "epoch": 6.725909885899892, "grad_norm": 0.78125, "learning_rate": 3.832313112170184e-06, "loss": 4.0063, "step": 20191 }, { "epoch": 6.72624302490214, "grad_norm": 0.7734375, "learning_rate": 3.831607084773478e-06, "loss": 3.9824, "step": 20192 }, { "epoch": 6.726576163904389, "grad_norm": 0.75, "learning_rate": 3.830901100105599e-06, "loss": 4.0817, "step": 20193 }, { "epoch": 6.726909302906638, "grad_norm": 0.8046875, "learning_rate": 3.830195158174778e-06, "loss": 3.914, "step": 20194 }, { "epoch": 6.727242441908887, "grad_norm": 0.76171875, "learning_rate": 3.8294892589892355e-06, "loss": 3.9085, "step": 20195 }, { "epoch": 6.727575580911135, "grad_norm": 0.78125, "learning_rate": 3.828783402557191e-06, "loss": 3.9988, "step": 20196 }, { "epoch": 6.727908719913384, "grad_norm": 0.79296875, "learning_rate": 3.828077588886867e-06, "loss": 4.0033, "step": 20197 }, { "epoch": 6.7282418589156325, "grad_norm": 0.78125, "learning_rate": 3.827371817986488e-06, "loss": 4.0102, "step": 20198 }, { "epoch": 6.728574997917882, "grad_norm": 0.8046875, "learning_rate": 3.826666089864275e-06, "loss": 3.971, "step": 20199 }, { "epoch": 6.72890813692013, "grad_norm": 0.79296875, "learning_rate": 3.825960404528441e-06, "loss": 4.0116, "step": 20200 }, { "epoch": 6.729241275922378, "grad_norm": 0.8203125, "learning_rate": 3.825254761987212e-06, "loss": 3.9145, "step": 20201 }, { "epoch": 6.7295744149246275, "grad_norm": 0.78515625, "learning_rate": 3.824549162248806e-06, "loss": 4.0451, "step": 20202 }, { "epoch": 6.729907553926876, "grad_norm": 0.77734375, "learning_rate": 3.823843605321441e-06, "loss": 4.0618, "step": 20203 }, { "epoch": 6.730240692929125, "grad_norm": 0.765625, "learning_rate": 3.823138091213333e-06, "loss": 4.0009, "step": 20204 }, { "epoch": 6.730573831931373, "grad_norm": 0.80859375, "learning_rate": 3.822432619932705e-06, "loss": 4.0026, "step": 20205 }, { "epoch": 6.7309069709336224, "grad_norm": 0.7734375, "learning_rate": 3.821727191487772e-06, "loss": 3.9376, "step": 20206 }, { "epoch": 6.731240109935871, "grad_norm": 0.7578125, "learning_rate": 3.821021805886748e-06, "loss": 3.9884, "step": 20207 }, { "epoch": 6.731573248938119, "grad_norm": 0.78125, "learning_rate": 3.8203164631378535e-06, "loss": 3.9367, "step": 20208 }, { "epoch": 6.731906387940368, "grad_norm": 0.75, "learning_rate": 3.8196111632493e-06, "loss": 4.0866, "step": 20209 }, { "epoch": 6.7322395269426165, "grad_norm": 0.80078125, "learning_rate": 3.818905906229306e-06, "loss": 3.9761, "step": 20210 }, { "epoch": 6.732572665944866, "grad_norm": 0.83203125, "learning_rate": 3.818200692086082e-06, "loss": 3.9184, "step": 20211 }, { "epoch": 6.732905804947114, "grad_norm": 0.765625, "learning_rate": 3.817495520827849e-06, "loss": 4.0185, "step": 20212 }, { "epoch": 6.733238943949363, "grad_norm": 0.79296875, "learning_rate": 3.816790392462816e-06, "loss": 3.9971, "step": 20213 }, { "epoch": 6.7335720829516115, "grad_norm": 0.76953125, "learning_rate": 3.816085306999196e-06, "loss": 3.9798, "step": 20214 }, { "epoch": 6.73390522195386, "grad_norm": 0.78125, "learning_rate": 3.815380264445202e-06, "loss": 4.0527, "step": 20215 }, { "epoch": 6.734238360956109, "grad_norm": 0.7578125, "learning_rate": 3.814675264809048e-06, "loss": 4.0568, "step": 20216 }, { "epoch": 6.734571499958357, "grad_norm": 0.734375, "learning_rate": 3.8139703080989484e-06, "loss": 3.9761, "step": 20217 }, { "epoch": 6.734904638960606, "grad_norm": 0.76953125, "learning_rate": 3.8132653943231054e-06, "loss": 4.0287, "step": 20218 }, { "epoch": 6.735237777962855, "grad_norm": 0.76953125, "learning_rate": 3.812560523489738e-06, "loss": 3.9915, "step": 20219 }, { "epoch": 6.735570916965104, "grad_norm": 0.74609375, "learning_rate": 3.8118556956070535e-06, "loss": 3.9357, "step": 20220 }, { "epoch": 6.735904055967352, "grad_norm": 0.73828125, "learning_rate": 3.811150910683261e-06, "loss": 3.9512, "step": 20221 }, { "epoch": 6.736237194969601, "grad_norm": 0.7578125, "learning_rate": 3.810446168726568e-06, "loss": 3.9462, "step": 20222 }, { "epoch": 6.73657033397185, "grad_norm": 0.7734375, "learning_rate": 3.8097414697451875e-06, "loss": 3.9453, "step": 20223 }, { "epoch": 6.736903472974099, "grad_norm": 0.76171875, "learning_rate": 3.8090368137473273e-06, "loss": 3.9418, "step": 20224 }, { "epoch": 6.737236611976347, "grad_norm": 0.7421875, "learning_rate": 3.8083322007411923e-06, "loss": 3.9735, "step": 20225 }, { "epoch": 6.7375697509785955, "grad_norm": 0.734375, "learning_rate": 3.807627630734992e-06, "loss": 3.9674, "step": 20226 }, { "epoch": 6.737902889980845, "grad_norm": 0.79296875, "learning_rate": 3.8069231037369318e-06, "loss": 3.9967, "step": 20227 }, { "epoch": 6.738236028983093, "grad_norm": 0.7421875, "learning_rate": 3.806218619755218e-06, "loss": 4.0303, "step": 20228 }, { "epoch": 6.738569167985342, "grad_norm": 0.796875, "learning_rate": 3.805514178798054e-06, "loss": 3.9902, "step": 20229 }, { "epoch": 6.73890230698759, "grad_norm": 0.765625, "learning_rate": 3.8048097808736504e-06, "loss": 4.0406, "step": 20230 }, { "epoch": 6.73923544598984, "grad_norm": 0.73828125, "learning_rate": 3.8041054259902084e-06, "loss": 3.9295, "step": 20231 }, { "epoch": 6.739568584992088, "grad_norm": 0.7421875, "learning_rate": 3.8034011141559344e-06, "loss": 3.9931, "step": 20232 }, { "epoch": 6.739901723994336, "grad_norm": 0.74609375, "learning_rate": 3.802696845379026e-06, "loss": 4.0585, "step": 20233 }, { "epoch": 6.740234862996585, "grad_norm": 0.79296875, "learning_rate": 3.801992619667695e-06, "loss": 3.9921, "step": 20234 }, { "epoch": 6.740568001998834, "grad_norm": 0.74609375, "learning_rate": 3.801288437030143e-06, "loss": 4.0024, "step": 20235 }, { "epoch": 6.740901141001083, "grad_norm": 0.765625, "learning_rate": 3.800584297474566e-06, "loss": 4.0428, "step": 20236 }, { "epoch": 6.741234280003331, "grad_norm": 0.796875, "learning_rate": 3.7998802010091665e-06, "loss": 3.9871, "step": 20237 }, { "epoch": 6.74156741900558, "grad_norm": 0.76953125, "learning_rate": 3.7991761476421505e-06, "loss": 3.972, "step": 20238 }, { "epoch": 6.741900558007829, "grad_norm": 0.8046875, "learning_rate": 3.7984721373817173e-06, "loss": 3.9279, "step": 20239 }, { "epoch": 6.742233697010078, "grad_norm": 0.74609375, "learning_rate": 3.7977681702360658e-06, "loss": 3.9941, "step": 20240 }, { "epoch": 6.742566836012326, "grad_norm": 0.77734375, "learning_rate": 3.7970642462133927e-06, "loss": 3.9557, "step": 20241 }, { "epoch": 6.742899975014575, "grad_norm": 0.80859375, "learning_rate": 3.7963603653219033e-06, "loss": 4.0969, "step": 20242 }, { "epoch": 6.743233114016824, "grad_norm": 0.77734375, "learning_rate": 3.7956565275697943e-06, "loss": 4.0183, "step": 20243 }, { "epoch": 6.743566253019072, "grad_norm": 0.81640625, "learning_rate": 3.7949527329652632e-06, "loss": 3.9556, "step": 20244 }, { "epoch": 6.743899392021321, "grad_norm": 0.75, "learning_rate": 3.7942489815165077e-06, "loss": 4.0233, "step": 20245 }, { "epoch": 6.744232531023569, "grad_norm": 0.76953125, "learning_rate": 3.7935452732317236e-06, "loss": 4.0107, "step": 20246 }, { "epoch": 6.7445656700258185, "grad_norm": 0.765625, "learning_rate": 3.7928416081191107e-06, "loss": 3.9239, "step": 20247 }, { "epoch": 6.744898809028067, "grad_norm": 0.7734375, "learning_rate": 3.7921379861868606e-06, "loss": 3.945, "step": 20248 }, { "epoch": 6.745231948030316, "grad_norm": 0.734375, "learning_rate": 3.791434407443173e-06, "loss": 3.9045, "step": 20249 }, { "epoch": 6.745565087032564, "grad_norm": 0.80859375, "learning_rate": 3.790730871896243e-06, "loss": 3.9761, "step": 20250 }, { "epoch": 6.745898226034813, "grad_norm": 0.8046875, "learning_rate": 3.790027379554263e-06, "loss": 3.946, "step": 20251 }, { "epoch": 6.746231365037062, "grad_norm": 0.75390625, "learning_rate": 3.7893239304254266e-06, "loss": 3.8872, "step": 20252 }, { "epoch": 6.74656450403931, "grad_norm": 0.765625, "learning_rate": 3.7886205245179343e-06, "loss": 3.9769, "step": 20253 }, { "epoch": 6.746897643041559, "grad_norm": 0.75, "learning_rate": 3.787917161839971e-06, "loss": 4.0165, "step": 20254 }, { "epoch": 6.7472307820438076, "grad_norm": 0.76171875, "learning_rate": 3.7872138423997296e-06, "loss": 3.9363, "step": 20255 }, { "epoch": 6.747563921046057, "grad_norm": 0.828125, "learning_rate": 3.7865105662054085e-06, "loss": 3.9587, "step": 20256 }, { "epoch": 6.747897060048305, "grad_norm": 0.7890625, "learning_rate": 3.785807333265195e-06, "loss": 3.9447, "step": 20257 }, { "epoch": 6.748230199050554, "grad_norm": 0.79296875, "learning_rate": 3.7851041435872817e-06, "loss": 4.0342, "step": 20258 }, { "epoch": 6.7485633380528025, "grad_norm": 0.74609375, "learning_rate": 3.784400997179856e-06, "loss": 4.0231, "step": 20259 }, { "epoch": 6.748896477055052, "grad_norm": 0.78515625, "learning_rate": 3.7836978940511126e-06, "loss": 3.9616, "step": 20260 }, { "epoch": 6.7492296160573, "grad_norm": 0.765625, "learning_rate": 3.7829948342092397e-06, "loss": 3.962, "step": 20261 }, { "epoch": 6.749562755059548, "grad_norm": 0.78515625, "learning_rate": 3.782291817662426e-06, "loss": 3.9529, "step": 20262 }, { "epoch": 6.7498958940617975, "grad_norm": 0.79296875, "learning_rate": 3.78158884441886e-06, "loss": 3.9107, "step": 20263 }, { "epoch": 6.750229033064046, "grad_norm": 0.75390625, "learning_rate": 3.78088591448673e-06, "loss": 3.9909, "step": 20264 }, { "epoch": 6.750562172066295, "grad_norm": 0.75, "learning_rate": 3.780183027874225e-06, "loss": 3.9453, "step": 20265 }, { "epoch": 6.750895311068543, "grad_norm": 0.80859375, "learning_rate": 3.779480184589526e-06, "loss": 3.9782, "step": 20266 }, { "epoch": 6.751228450070792, "grad_norm": 0.76171875, "learning_rate": 3.778777384640827e-06, "loss": 3.9374, "step": 20267 }, { "epoch": 6.751561589073041, "grad_norm": 0.734375, "learning_rate": 3.7780746280363125e-06, "loss": 3.9536, "step": 20268 }, { "epoch": 6.751894728075289, "grad_norm": 0.8203125, "learning_rate": 3.7773719147841657e-06, "loss": 3.9552, "step": 20269 }, { "epoch": 6.752227867077538, "grad_norm": 0.796875, "learning_rate": 3.776669244892571e-06, "loss": 3.9716, "step": 20270 }, { "epoch": 6.7525610060797865, "grad_norm": 0.73828125, "learning_rate": 3.77596661836972e-06, "loss": 4.0607, "step": 20271 }, { "epoch": 6.752894145082036, "grad_norm": 0.77734375, "learning_rate": 3.775264035223789e-06, "loss": 3.9257, "step": 20272 }, { "epoch": 6.753227284084284, "grad_norm": 0.79296875, "learning_rate": 3.774561495462961e-06, "loss": 3.9469, "step": 20273 }, { "epoch": 6.753560423086533, "grad_norm": 0.7421875, "learning_rate": 3.7738589990954255e-06, "loss": 3.9972, "step": 20274 }, { "epoch": 6.753893562088781, "grad_norm": 0.76171875, "learning_rate": 3.773156546129362e-06, "loss": 3.9424, "step": 20275 }, { "epoch": 6.75422670109103, "grad_norm": 0.71875, "learning_rate": 3.7724541365729517e-06, "loss": 4.0179, "step": 20276 }, { "epoch": 6.754559840093279, "grad_norm": 0.80078125, "learning_rate": 3.7717517704343752e-06, "loss": 3.9582, "step": 20277 }, { "epoch": 6.754892979095528, "grad_norm": 0.7421875, "learning_rate": 3.771049447721817e-06, "loss": 3.988, "step": 20278 }, { "epoch": 6.755226118097776, "grad_norm": 0.78125, "learning_rate": 3.7703471684434558e-06, "loss": 3.932, "step": 20279 }, { "epoch": 6.755559257100025, "grad_norm": 0.78125, "learning_rate": 3.769644932607475e-06, "loss": 3.9356, "step": 20280 }, { "epoch": 6.755892396102274, "grad_norm": 0.7890625, "learning_rate": 3.7689427402220455e-06, "loss": 4.0317, "step": 20281 }, { "epoch": 6.756225535104522, "grad_norm": 0.734375, "learning_rate": 3.7682405912953533e-06, "loss": 3.9599, "step": 20282 }, { "epoch": 6.756558674106771, "grad_norm": 0.80078125, "learning_rate": 3.7675384858355765e-06, "loss": 4.0376, "step": 20283 }, { "epoch": 6.75689181310902, "grad_norm": 0.75, "learning_rate": 3.7668364238508905e-06, "loss": 3.9972, "step": 20284 }, { "epoch": 6.757224952111269, "grad_norm": 0.73828125, "learning_rate": 3.766134405349472e-06, "loss": 4.0024, "step": 20285 }, { "epoch": 6.757558091113517, "grad_norm": 0.74609375, "learning_rate": 3.765432430339503e-06, "loss": 4.0245, "step": 20286 }, { "epoch": 6.757891230115765, "grad_norm": 0.78125, "learning_rate": 3.764730498829157e-06, "loss": 3.953, "step": 20287 }, { "epoch": 6.758224369118015, "grad_norm": 0.7265625, "learning_rate": 3.7640286108266115e-06, "loss": 4.0222, "step": 20288 }, { "epoch": 6.758557508120263, "grad_norm": 0.77734375, "learning_rate": 3.7633267663400383e-06, "loss": 3.9913, "step": 20289 }, { "epoch": 6.758890647122512, "grad_norm": 0.79296875, "learning_rate": 3.762624965377616e-06, "loss": 3.9702, "step": 20290 }, { "epoch": 6.75922378612476, "grad_norm": 0.7890625, "learning_rate": 3.7619232079475177e-06, "loss": 4.0129, "step": 20291 }, { "epoch": 6.75955692512701, "grad_norm": 0.7890625, "learning_rate": 3.7612214940579137e-06, "loss": 4.0247, "step": 20292 }, { "epoch": 6.759890064129258, "grad_norm": 0.75390625, "learning_rate": 3.7605198237169853e-06, "loss": 3.9707, "step": 20293 }, { "epoch": 6.760223203131506, "grad_norm": 0.74609375, "learning_rate": 3.7598181969329e-06, "loss": 4.0393, "step": 20294 }, { "epoch": 6.760556342133755, "grad_norm": 0.8125, "learning_rate": 3.759116613713832e-06, "loss": 4.0866, "step": 20295 }, { "epoch": 6.760889481136004, "grad_norm": 0.75390625, "learning_rate": 3.75841507406795e-06, "loss": 4.0212, "step": 20296 }, { "epoch": 6.761222620138253, "grad_norm": 0.7734375, "learning_rate": 3.7577135780034304e-06, "loss": 3.9712, "step": 20297 }, { "epoch": 6.761555759140501, "grad_norm": 0.7421875, "learning_rate": 3.7570121255284457e-06, "loss": 3.9861, "step": 20298 }, { "epoch": 6.76188889814275, "grad_norm": 0.76953125, "learning_rate": 3.7563107166511556e-06, "loss": 3.9801, "step": 20299 }, { "epoch": 6.762222037144999, "grad_norm": 0.76953125, "learning_rate": 3.7556093513797408e-06, "loss": 3.9403, "step": 20300 }, { "epoch": 6.762555176147248, "grad_norm": 0.796875, "learning_rate": 3.754908029722366e-06, "loss": 3.9154, "step": 20301 }, { "epoch": 6.762888315149496, "grad_norm": 0.78515625, "learning_rate": 3.7542067516872004e-06, "loss": 4.0017, "step": 20302 }, { "epoch": 6.763221454151745, "grad_norm": 0.78515625, "learning_rate": 3.7535055172824107e-06, "loss": 4.0243, "step": 20303 }, { "epoch": 6.7635545931539935, "grad_norm": 0.76953125, "learning_rate": 3.752804326516169e-06, "loss": 3.917, "step": 20304 }, { "epoch": 6.763887732156242, "grad_norm": 0.73046875, "learning_rate": 3.7521031793966416e-06, "loss": 3.9512, "step": 20305 }, { "epoch": 6.764220871158491, "grad_norm": 0.7578125, "learning_rate": 3.7514020759319923e-06, "loss": 3.9791, "step": 20306 }, { "epoch": 6.764554010160739, "grad_norm": 0.7734375, "learning_rate": 3.7507010161303912e-06, "loss": 3.9442, "step": 20307 }, { "epoch": 6.7648871491629885, "grad_norm": 0.78515625, "learning_rate": 3.750000000000002e-06, "loss": 3.8705, "step": 20308 }, { "epoch": 6.765220288165237, "grad_norm": 0.74609375, "learning_rate": 3.74929902754899e-06, "loss": 4.0065, "step": 20309 }, { "epoch": 6.765553427167486, "grad_norm": 0.74609375, "learning_rate": 3.748598098785517e-06, "loss": 4.0177, "step": 20310 }, { "epoch": 6.765886566169734, "grad_norm": 0.859375, "learning_rate": 3.747897213717754e-06, "loss": 3.9333, "step": 20311 }, { "epoch": 6.766219705171983, "grad_norm": 0.8125, "learning_rate": 3.7471963723538614e-06, "loss": 3.9113, "step": 20312 }, { "epoch": 6.766552844174232, "grad_norm": 0.76953125, "learning_rate": 3.7464955747020026e-06, "loss": 4.0499, "step": 20313 }, { "epoch": 6.76688598317648, "grad_norm": 0.8046875, "learning_rate": 3.7457948207703375e-06, "loss": 3.9908, "step": 20314 }, { "epoch": 6.767219122178729, "grad_norm": 0.77734375, "learning_rate": 3.7450941105670335e-06, "loss": 3.9337, "step": 20315 }, { "epoch": 6.7675522611809775, "grad_norm": 0.734375, "learning_rate": 3.7443934441002527e-06, "loss": 3.9674, "step": 20316 }, { "epoch": 6.767885400183227, "grad_norm": 0.78125, "learning_rate": 3.7436928213781485e-06, "loss": 3.9977, "step": 20317 }, { "epoch": 6.768218539185475, "grad_norm": 0.78125, "learning_rate": 3.7429922424088895e-06, "loss": 3.9792, "step": 20318 }, { "epoch": 6.768551678187724, "grad_norm": 0.76171875, "learning_rate": 3.742291707200633e-06, "loss": 4.0187, "step": 20319 }, { "epoch": 6.7688848171899725, "grad_norm": 0.74609375, "learning_rate": 3.7415912157615393e-06, "loss": 4.0968, "step": 20320 }, { "epoch": 6.769217956192222, "grad_norm": 0.76171875, "learning_rate": 3.7408907680997644e-06, "loss": 4.0145, "step": 20321 }, { "epoch": 6.76955109519447, "grad_norm": 0.796875, "learning_rate": 3.7401903642234727e-06, "loss": 3.9721, "step": 20322 }, { "epoch": 6.769884234196718, "grad_norm": 0.75390625, "learning_rate": 3.7394900041408193e-06, "loss": 3.9567, "step": 20323 }, { "epoch": 6.770217373198967, "grad_norm": 0.73828125, "learning_rate": 3.738789687859962e-06, "loss": 4.0194, "step": 20324 }, { "epoch": 6.770550512201216, "grad_norm": 0.77734375, "learning_rate": 3.7380894153890584e-06, "loss": 3.9651, "step": 20325 }, { "epoch": 6.770883651203465, "grad_norm": 0.81640625, "learning_rate": 3.7373891867362647e-06, "loss": 3.9476, "step": 20326 }, { "epoch": 6.771216790205713, "grad_norm": 0.74609375, "learning_rate": 3.736689001909737e-06, "loss": 3.9549, "step": 20327 }, { "epoch": 6.771549929207962, "grad_norm": 0.75, "learning_rate": 3.735988860917631e-06, "loss": 4.0201, "step": 20328 }, { "epoch": 6.771883068210211, "grad_norm": 0.76171875, "learning_rate": 3.7352887637680997e-06, "loss": 4.0333, "step": 20329 }, { "epoch": 6.772216207212459, "grad_norm": 0.75, "learning_rate": 3.734588710469302e-06, "loss": 4.0127, "step": 20330 }, { "epoch": 6.772549346214708, "grad_norm": 0.7890625, "learning_rate": 3.7338887010293903e-06, "loss": 4.0345, "step": 20331 }, { "epoch": 6.7728824852169565, "grad_norm": 0.75390625, "learning_rate": 3.7331887354565177e-06, "loss": 3.9921, "step": 20332 }, { "epoch": 6.773215624219206, "grad_norm": 0.7578125, "learning_rate": 3.7324888137588355e-06, "loss": 4.039, "step": 20333 }, { "epoch": 6.773548763221454, "grad_norm": 0.76953125, "learning_rate": 3.7317889359445033e-06, "loss": 3.9844, "step": 20334 }, { "epoch": 6.773881902223703, "grad_norm": 0.78515625, "learning_rate": 3.7310891020216648e-06, "loss": 3.9619, "step": 20335 }, { "epoch": 6.774215041225951, "grad_norm": 0.7890625, "learning_rate": 3.730389311998472e-06, "loss": 3.9234, "step": 20336 }, { "epoch": 6.774548180228201, "grad_norm": 0.6875, "learning_rate": 3.7296895658830825e-06, "loss": 3.9783, "step": 20337 }, { "epoch": 6.774881319230449, "grad_norm": 0.77734375, "learning_rate": 3.728989863683642e-06, "loss": 3.9568, "step": 20338 }, { "epoch": 6.775214458232698, "grad_norm": 0.73046875, "learning_rate": 3.728290205408302e-06, "loss": 4.0381, "step": 20339 }, { "epoch": 6.775547597234946, "grad_norm": 0.75390625, "learning_rate": 3.727590591065209e-06, "loss": 3.9847, "step": 20340 }, { "epoch": 6.775880736237195, "grad_norm": 0.78125, "learning_rate": 3.726891020662517e-06, "loss": 3.9442, "step": 20341 }, { "epoch": 6.776213875239444, "grad_norm": 0.80078125, "learning_rate": 3.726191494208372e-06, "loss": 4.0608, "step": 20342 }, { "epoch": 6.776547014241692, "grad_norm": 0.8046875, "learning_rate": 3.7254920117109225e-06, "loss": 3.9805, "step": 20343 }, { "epoch": 6.776880153243941, "grad_norm": 0.7734375, "learning_rate": 3.7247925731783156e-06, "loss": 4.0542, "step": 20344 }, { "epoch": 6.77721329224619, "grad_norm": 0.796875, "learning_rate": 3.724093178618698e-06, "loss": 4.0351, "step": 20345 }, { "epoch": 6.777546431248439, "grad_norm": 0.75390625, "learning_rate": 3.7233938280402157e-06, "loss": 4.0219, "step": 20346 }, { "epoch": 6.777879570250687, "grad_norm": 0.7578125, "learning_rate": 3.722694521451013e-06, "loss": 3.9982, "step": 20347 }, { "epoch": 6.778212709252935, "grad_norm": 0.75, "learning_rate": 3.72199525885924e-06, "loss": 3.978, "step": 20348 }, { "epoch": 6.778545848255185, "grad_norm": 0.8125, "learning_rate": 3.721296040273039e-06, "loss": 3.9269, "step": 20349 }, { "epoch": 6.778878987257433, "grad_norm": 0.80078125, "learning_rate": 3.7205968657005547e-06, "loss": 3.9936, "step": 20350 }, { "epoch": 6.779212126259682, "grad_norm": 0.71875, "learning_rate": 3.719897735149928e-06, "loss": 3.9347, "step": 20351 }, { "epoch": 6.77954526526193, "grad_norm": 0.75, "learning_rate": 3.7191986486293104e-06, "loss": 3.9374, "step": 20352 }, { "epoch": 6.7798784042641795, "grad_norm": 0.765625, "learning_rate": 3.7184996061468367e-06, "loss": 3.9616, "step": 20353 }, { "epoch": 6.780211543266428, "grad_norm": 0.76171875, "learning_rate": 3.717800607710649e-06, "loss": 3.9642, "step": 20354 }, { "epoch": 6.780544682268676, "grad_norm": 0.765625, "learning_rate": 3.7171016533288948e-06, "loss": 4.004, "step": 20355 }, { "epoch": 6.780877821270925, "grad_norm": 0.73828125, "learning_rate": 3.7164027430097125e-06, "loss": 3.9964, "step": 20356 }, { "epoch": 6.781210960273174, "grad_norm": 0.8046875, "learning_rate": 3.715703876761244e-06, "loss": 3.9837, "step": 20357 }, { "epoch": 6.781544099275423, "grad_norm": 0.76171875, "learning_rate": 3.715005054591624e-06, "loss": 3.941, "step": 20358 }, { "epoch": 6.781877238277671, "grad_norm": 0.7265625, "learning_rate": 3.7143062765090012e-06, "loss": 3.9556, "step": 20359 }, { "epoch": 6.78221037727992, "grad_norm": 0.75, "learning_rate": 3.7136075425215097e-06, "loss": 4.0502, "step": 20360 }, { "epoch": 6.782543516282169, "grad_norm": 0.75390625, "learning_rate": 3.7129088526372885e-06, "loss": 4.0713, "step": 20361 }, { "epoch": 6.782876655284418, "grad_norm": 0.765625, "learning_rate": 3.7122102068644774e-06, "loss": 3.92, "step": 20362 }, { "epoch": 6.783209794286666, "grad_norm": 0.7578125, "learning_rate": 3.7115116052112123e-06, "loss": 3.9737, "step": 20363 }, { "epoch": 6.783542933288915, "grad_norm": 0.75390625, "learning_rate": 3.7108130476856318e-06, "loss": 4.0545, "step": 20364 }, { "epoch": 6.7838760722911635, "grad_norm": 0.80078125, "learning_rate": 3.710114534295869e-06, "loss": 3.8957, "step": 20365 }, { "epoch": 6.784209211293412, "grad_norm": 0.76953125, "learning_rate": 3.709416065050065e-06, "loss": 3.9733, "step": 20366 }, { "epoch": 6.784542350295661, "grad_norm": 0.7578125, "learning_rate": 3.7087176399563543e-06, "loss": 4.118, "step": 20367 }, { "epoch": 6.784875489297909, "grad_norm": 0.78515625, "learning_rate": 3.7080192590228703e-06, "loss": 3.9924, "step": 20368 }, { "epoch": 6.7852086283001585, "grad_norm": 0.8203125, "learning_rate": 3.7073209222577466e-06, "loss": 4.026, "step": 20369 }, { "epoch": 6.785541767302407, "grad_norm": 0.7890625, "learning_rate": 3.706622629669125e-06, "loss": 3.9703, "step": 20370 }, { "epoch": 6.785874906304656, "grad_norm": 0.75, "learning_rate": 3.7059243812651296e-06, "loss": 3.91, "step": 20371 }, { "epoch": 6.786208045306904, "grad_norm": 0.796875, "learning_rate": 3.7052261770538977e-06, "loss": 3.849, "step": 20372 }, { "epoch": 6.7865411843091525, "grad_norm": 0.77734375, "learning_rate": 3.7045280170435584e-06, "loss": 4.0103, "step": 20373 }, { "epoch": 6.786874323311402, "grad_norm": 0.76171875, "learning_rate": 3.7038299012422493e-06, "loss": 4.0314, "step": 20374 }, { "epoch": 6.78720746231365, "grad_norm": 0.78515625, "learning_rate": 3.7031318296581e-06, "loss": 3.9569, "step": 20375 }, { "epoch": 6.787540601315899, "grad_norm": 0.80078125, "learning_rate": 3.702433802299241e-06, "loss": 3.9555, "step": 20376 }, { "epoch": 6.7878737403181475, "grad_norm": 0.765625, "learning_rate": 3.701735819173799e-06, "loss": 3.9259, "step": 20377 }, { "epoch": 6.788206879320397, "grad_norm": 0.78125, "learning_rate": 3.701037880289911e-06, "loss": 3.9778, "step": 20378 }, { "epoch": 6.788540018322645, "grad_norm": 0.78125, "learning_rate": 3.7003399856557065e-06, "loss": 3.9506, "step": 20379 }, { "epoch": 6.788873157324894, "grad_norm": 0.7890625, "learning_rate": 3.6996421352793046e-06, "loss": 3.9941, "step": 20380 }, { "epoch": 6.7892062963271425, "grad_norm": 0.8203125, "learning_rate": 3.6989443291688434e-06, "loss": 3.9786, "step": 20381 }, { "epoch": 6.789539435329392, "grad_norm": 0.73046875, "learning_rate": 3.6982465673324474e-06, "loss": 4.0823, "step": 20382 }, { "epoch": 6.78987257433164, "grad_norm": 0.76171875, "learning_rate": 3.6975488497782452e-06, "loss": 3.9814, "step": 20383 }, { "epoch": 6.790205713333888, "grad_norm": 0.7578125, "learning_rate": 3.696851176514359e-06, "loss": 3.8935, "step": 20384 }, { "epoch": 6.790538852336137, "grad_norm": 0.78125, "learning_rate": 3.6961535475489227e-06, "loss": 3.9701, "step": 20385 }, { "epoch": 6.790871991338386, "grad_norm": 0.75390625, "learning_rate": 3.6954559628900573e-06, "loss": 4.0253, "step": 20386 }, { "epoch": 6.791205130340635, "grad_norm": 0.79296875, "learning_rate": 3.69475842254589e-06, "loss": 3.9433, "step": 20387 }, { "epoch": 6.791538269342883, "grad_norm": 0.74609375, "learning_rate": 3.694060926524545e-06, "loss": 4.0043, "step": 20388 }, { "epoch": 6.791871408345132, "grad_norm": 0.828125, "learning_rate": 3.693363474834146e-06, "loss": 3.9294, "step": 20389 }, { "epoch": 6.792204547347381, "grad_norm": 0.79296875, "learning_rate": 3.692666067482818e-06, "loss": 3.9771, "step": 20390 }, { "epoch": 6.792537686349629, "grad_norm": 0.76953125, "learning_rate": 3.691968704478681e-06, "loss": 4.0622, "step": 20391 }, { "epoch": 6.792870825351878, "grad_norm": 0.8203125, "learning_rate": 3.6912713858298627e-06, "loss": 3.9787, "step": 20392 }, { "epoch": 6.793203964354126, "grad_norm": 0.7421875, "learning_rate": 3.690574111544483e-06, "loss": 3.9937, "step": 20393 }, { "epoch": 6.793537103356376, "grad_norm": 0.7890625, "learning_rate": 3.689876881630664e-06, "loss": 3.9876, "step": 20394 }, { "epoch": 6.793870242358624, "grad_norm": 0.7578125, "learning_rate": 3.689179696096524e-06, "loss": 4.0079, "step": 20395 }, { "epoch": 6.794203381360873, "grad_norm": 0.7734375, "learning_rate": 3.688482554950189e-06, "loss": 4.0061, "step": 20396 }, { "epoch": 6.794536520363121, "grad_norm": 0.73828125, "learning_rate": 3.687785458199779e-06, "loss": 3.9785, "step": 20397 }, { "epoch": 6.794869659365371, "grad_norm": 0.80859375, "learning_rate": 3.6870884058534056e-06, "loss": 4.0005, "step": 20398 }, { "epoch": 6.795202798367619, "grad_norm": 0.75390625, "learning_rate": 3.6863913979191963e-06, "loss": 4.0385, "step": 20399 }, { "epoch": 6.795535937369868, "grad_norm": 0.7421875, "learning_rate": 3.6856944344052676e-06, "loss": 3.9949, "step": 20400 }, { "epoch": 6.795869076372116, "grad_norm": 0.78515625, "learning_rate": 3.6849975153197357e-06, "loss": 3.9453, "step": 20401 }, { "epoch": 6.796202215374365, "grad_norm": 0.72265625, "learning_rate": 3.6843006406707173e-06, "loss": 3.9446, "step": 20402 }, { "epoch": 6.796535354376614, "grad_norm": 0.70703125, "learning_rate": 3.6836038104663347e-06, "loss": 3.9715, "step": 20403 }, { "epoch": 6.796868493378862, "grad_norm": 0.76953125, "learning_rate": 3.682907024714702e-06, "loss": 3.968, "step": 20404 }, { "epoch": 6.797201632381111, "grad_norm": 0.75390625, "learning_rate": 3.682210283423934e-06, "loss": 4.0214, "step": 20405 }, { "epoch": 6.79753477138336, "grad_norm": 0.78125, "learning_rate": 3.681513586602146e-06, "loss": 3.9648, "step": 20406 }, { "epoch": 6.797867910385609, "grad_norm": 0.765625, "learning_rate": 3.680816934257455e-06, "loss": 3.9968, "step": 20407 }, { "epoch": 6.798201049387857, "grad_norm": 0.78125, "learning_rate": 3.680120326397974e-06, "loss": 3.9909, "step": 20408 }, { "epoch": 6.798534188390105, "grad_norm": 0.75, "learning_rate": 3.6794237630318143e-06, "loss": 4.0148, "step": 20409 }, { "epoch": 6.7988673273923546, "grad_norm": 0.76171875, "learning_rate": 3.6787272441670956e-06, "loss": 3.9792, "step": 20410 }, { "epoch": 6.799200466394603, "grad_norm": 0.78125, "learning_rate": 3.678030769811928e-06, "loss": 3.9893, "step": 20411 }, { "epoch": 6.799533605396852, "grad_norm": 0.79296875, "learning_rate": 3.6773343399744234e-06, "loss": 3.9872, "step": 20412 }, { "epoch": 6.7998667443991, "grad_norm": 0.7734375, "learning_rate": 3.676637954662691e-06, "loss": 3.9903, "step": 20413 }, { "epoch": 6.8001998834013495, "grad_norm": 0.77734375, "learning_rate": 3.6759416138848474e-06, "loss": 3.9047, "step": 20414 }, { "epoch": 6.800533022403598, "grad_norm": 0.82421875, "learning_rate": 3.6752453176490038e-06, "loss": 4.0434, "step": 20415 }, { "epoch": 6.800866161405846, "grad_norm": 0.7578125, "learning_rate": 3.674549065963266e-06, "loss": 3.9848, "step": 20416 }, { "epoch": 6.801199300408095, "grad_norm": 0.7734375, "learning_rate": 3.6738528588357416e-06, "loss": 4.0049, "step": 20417 }, { "epoch": 6.801532439410344, "grad_norm": 0.72265625, "learning_rate": 3.673156696274548e-06, "loss": 4.0163, "step": 20418 }, { "epoch": 6.801865578412593, "grad_norm": 0.78125, "learning_rate": 3.672460578287789e-06, "loss": 3.933, "step": 20419 }, { "epoch": 6.802198717414841, "grad_norm": 0.76171875, "learning_rate": 3.6717645048835737e-06, "loss": 3.9429, "step": 20420 }, { "epoch": 6.80253185641709, "grad_norm": 0.76171875, "learning_rate": 3.671068476070008e-06, "loss": 3.9056, "step": 20421 }, { "epoch": 6.8028649954193385, "grad_norm": 0.7109375, "learning_rate": 3.6703724918552037e-06, "loss": 3.9874, "step": 20422 }, { "epoch": 6.803198134421588, "grad_norm": 0.75, "learning_rate": 3.669676552247265e-06, "loss": 4.0129, "step": 20423 }, { "epoch": 6.803531273423836, "grad_norm": 0.7890625, "learning_rate": 3.668980657254297e-06, "loss": 3.967, "step": 20424 }, { "epoch": 6.803864412426085, "grad_norm": 0.78125, "learning_rate": 3.668284806884408e-06, "loss": 3.9502, "step": 20425 }, { "epoch": 6.8041975514283335, "grad_norm": 0.765625, "learning_rate": 3.6675890011457e-06, "loss": 4.0541, "step": 20426 }, { "epoch": 6.804530690430582, "grad_norm": 0.7734375, "learning_rate": 3.6668932400462802e-06, "loss": 3.9569, "step": 20427 }, { "epoch": 6.804863829432831, "grad_norm": 0.76953125, "learning_rate": 3.6661975235942484e-06, "loss": 4.0158, "step": 20428 }, { "epoch": 6.805196968435079, "grad_norm": 0.73828125, "learning_rate": 3.6655018517977144e-06, "loss": 3.9542, "step": 20429 }, { "epoch": 6.8055301074373284, "grad_norm": 0.734375, "learning_rate": 3.6648062246647783e-06, "loss": 3.9314, "step": 20430 }, { "epoch": 6.805863246439577, "grad_norm": 0.8203125, "learning_rate": 3.664110642203543e-06, "loss": 3.9985, "step": 20431 }, { "epoch": 6.806196385441826, "grad_norm": 0.71875, "learning_rate": 3.6634151044221063e-06, "loss": 3.9385, "step": 20432 }, { "epoch": 6.806529524444074, "grad_norm": 0.73046875, "learning_rate": 3.6627196113285804e-06, "loss": 3.9462, "step": 20433 }, { "epoch": 6.8068626634463225, "grad_norm": 0.765625, "learning_rate": 3.6620241629310557e-06, "loss": 3.9742, "step": 20434 }, { "epoch": 6.807195802448572, "grad_norm": 0.7734375, "learning_rate": 3.661328759237634e-06, "loss": 4.0652, "step": 20435 }, { "epoch": 6.80752894145082, "grad_norm": 0.73046875, "learning_rate": 3.660633400256421e-06, "loss": 3.9742, "step": 20436 }, { "epoch": 6.807862080453069, "grad_norm": 0.7578125, "learning_rate": 3.6599380859955126e-06, "loss": 3.9905, "step": 20437 }, { "epoch": 6.8081952194553175, "grad_norm": 0.7734375, "learning_rate": 3.659242816463008e-06, "loss": 4.0984, "step": 20438 }, { "epoch": 6.808528358457567, "grad_norm": 0.76171875, "learning_rate": 3.6585475916670026e-06, "loss": 3.9798, "step": 20439 }, { "epoch": 6.808861497459815, "grad_norm": 0.7421875, "learning_rate": 3.6578524116156e-06, "loss": 3.9734, "step": 20440 }, { "epoch": 6.809194636462064, "grad_norm": 0.73828125, "learning_rate": 3.657157276316897e-06, "loss": 4.0465, "step": 20441 }, { "epoch": 6.809527775464312, "grad_norm": 0.76171875, "learning_rate": 3.6564621857789836e-06, "loss": 4.005, "step": 20442 }, { "epoch": 6.809860914466562, "grad_norm": 0.7421875, "learning_rate": 3.655767140009963e-06, "loss": 3.9653, "step": 20443 }, { "epoch": 6.81019405346881, "grad_norm": 0.7421875, "learning_rate": 3.655072139017928e-06, "loss": 4.0087, "step": 20444 }, { "epoch": 6.810527192471058, "grad_norm": 0.74609375, "learning_rate": 3.654377182810975e-06, "loss": 3.9288, "step": 20445 }, { "epoch": 6.810860331473307, "grad_norm": 0.76171875, "learning_rate": 3.6536822713971954e-06, "loss": 4.0009, "step": 20446 }, { "epoch": 6.811193470475556, "grad_norm": 0.796875, "learning_rate": 3.6529874047846893e-06, "loss": 3.9731, "step": 20447 }, { "epoch": 6.811526609477805, "grad_norm": 0.79296875, "learning_rate": 3.6522925829815466e-06, "loss": 3.9068, "step": 20448 }, { "epoch": 6.811859748480053, "grad_norm": 0.7734375, "learning_rate": 3.6515978059958616e-06, "loss": 4.0145, "step": 20449 }, { "epoch": 6.812192887482302, "grad_norm": 0.78515625, "learning_rate": 3.6509030738357264e-06, "loss": 3.9673, "step": 20450 }, { "epoch": 6.812526026484551, "grad_norm": 0.75390625, "learning_rate": 3.6502083865092336e-06, "loss": 4.0012, "step": 20451 }, { "epoch": 6.812859165486799, "grad_norm": 0.83984375, "learning_rate": 3.649513744024475e-06, "loss": 4.0729, "step": 20452 }, { "epoch": 6.813192304489048, "grad_norm": 0.78515625, "learning_rate": 3.6488191463895372e-06, "loss": 3.9774, "step": 20453 }, { "epoch": 6.813525443491296, "grad_norm": 0.73828125, "learning_rate": 3.6481245936125175e-06, "loss": 3.9374, "step": 20454 }, { "epoch": 6.813858582493546, "grad_norm": 0.7890625, "learning_rate": 3.647430085701504e-06, "loss": 3.9278, "step": 20455 }, { "epoch": 6.814191721495794, "grad_norm": 0.76953125, "learning_rate": 3.646735622664585e-06, "loss": 4.0213, "step": 20456 }, { "epoch": 6.814524860498043, "grad_norm": 0.75, "learning_rate": 3.6460412045098464e-06, "loss": 4.039, "step": 20457 }, { "epoch": 6.814857999500291, "grad_norm": 0.76171875, "learning_rate": 3.6453468312453835e-06, "loss": 4.0026, "step": 20458 }, { "epoch": 6.8151911385025405, "grad_norm": 0.7734375, "learning_rate": 3.644652502879284e-06, "loss": 3.944, "step": 20459 }, { "epoch": 6.815524277504789, "grad_norm": 0.79296875, "learning_rate": 3.643958219419629e-06, "loss": 3.9878, "step": 20460 }, { "epoch": 6.815857416507038, "grad_norm": 0.83203125, "learning_rate": 3.6432639808745057e-06, "loss": 3.9734, "step": 20461 }, { "epoch": 6.816190555509286, "grad_norm": 0.78125, "learning_rate": 3.6425697872520057e-06, "loss": 4.0071, "step": 20462 }, { "epoch": 6.816523694511535, "grad_norm": 0.71484375, "learning_rate": 3.6418756385602134e-06, "loss": 3.9482, "step": 20463 }, { "epoch": 6.816856833513784, "grad_norm": 0.75390625, "learning_rate": 3.6411815348072128e-06, "loss": 4.0066, "step": 20464 }, { "epoch": 6.817189972516032, "grad_norm": 0.74609375, "learning_rate": 3.640487476001086e-06, "loss": 3.9968, "step": 20465 }, { "epoch": 6.817523111518281, "grad_norm": 0.71875, "learning_rate": 3.6397934621499236e-06, "loss": 3.9849, "step": 20466 }, { "epoch": 6.81785625052053, "grad_norm": 0.7421875, "learning_rate": 3.6390994932618066e-06, "loss": 4.0007, "step": 20467 }, { "epoch": 6.818189389522779, "grad_norm": 0.765625, "learning_rate": 3.638405569344817e-06, "loss": 4.0587, "step": 20468 }, { "epoch": 6.818522528525027, "grad_norm": 0.8046875, "learning_rate": 3.637711690407039e-06, "loss": 3.9426, "step": 20469 }, { "epoch": 6.818855667527275, "grad_norm": 0.82421875, "learning_rate": 3.6370178564565533e-06, "loss": 3.9673, "step": 20470 }, { "epoch": 6.8191888065295245, "grad_norm": 0.79296875, "learning_rate": 3.636324067501443e-06, "loss": 3.9794, "step": 20471 }, { "epoch": 6.819521945531773, "grad_norm": 0.7890625, "learning_rate": 3.635630323549786e-06, "loss": 3.9993, "step": 20472 }, { "epoch": 6.819855084534022, "grad_norm": 0.79296875, "learning_rate": 3.634936624609668e-06, "loss": 3.9923, "step": 20473 }, { "epoch": 6.82018822353627, "grad_norm": 0.7578125, "learning_rate": 3.634242970689166e-06, "loss": 3.9166, "step": 20474 }, { "epoch": 6.8205213625385195, "grad_norm": 0.75390625, "learning_rate": 3.6335493617963607e-06, "loss": 4.0424, "step": 20475 }, { "epoch": 6.820854501540768, "grad_norm": 0.76953125, "learning_rate": 3.6328557979393282e-06, "loss": 4.0566, "step": 20476 }, { "epoch": 6.821187640543017, "grad_norm": 0.7734375, "learning_rate": 3.6321622791261548e-06, "loss": 4.0123, "step": 20477 }, { "epoch": 6.821520779545265, "grad_norm": 0.78515625, "learning_rate": 3.631468805364911e-06, "loss": 3.9832, "step": 20478 }, { "epoch": 6.821853918547514, "grad_norm": 0.73828125, "learning_rate": 3.630775376663672e-06, "loss": 4.0324, "step": 20479 }, { "epoch": 6.822187057549763, "grad_norm": 0.74609375, "learning_rate": 3.630081993030523e-06, "loss": 3.9958, "step": 20480 }, { "epoch": 6.822520196552011, "grad_norm": 0.75, "learning_rate": 3.629388654473536e-06, "loss": 4.001, "step": 20481 }, { "epoch": 6.82285333555426, "grad_norm": 0.74609375, "learning_rate": 3.6286953610007874e-06, "loss": 3.9669, "step": 20482 }, { "epoch": 6.8231864745565085, "grad_norm": 0.73828125, "learning_rate": 3.6280021126203507e-06, "loss": 3.9659, "step": 20483 }, { "epoch": 6.823519613558758, "grad_norm": 0.890625, "learning_rate": 3.6273089093403044e-06, "loss": 4.0396, "step": 20484 }, { "epoch": 6.823852752561006, "grad_norm": 0.7578125, "learning_rate": 3.626615751168722e-06, "loss": 3.9155, "step": 20485 }, { "epoch": 6.824185891563255, "grad_norm": 0.75390625, "learning_rate": 3.625922638113676e-06, "loss": 4.0414, "step": 20486 }, { "epoch": 6.8245190305655035, "grad_norm": 0.80078125, "learning_rate": 3.62522957018324e-06, "loss": 3.9548, "step": 20487 }, { "epoch": 6.824852169567752, "grad_norm": 0.78125, "learning_rate": 3.6245365473854867e-06, "loss": 3.9897, "step": 20488 }, { "epoch": 6.825185308570001, "grad_norm": 0.7578125, "learning_rate": 3.6238435697284885e-06, "loss": 3.9885, "step": 20489 }, { "epoch": 6.825518447572249, "grad_norm": 0.77734375, "learning_rate": 3.623150637220314e-06, "loss": 4.0181, "step": 20490 }, { "epoch": 6.825851586574498, "grad_norm": 0.7890625, "learning_rate": 3.62245774986904e-06, "loss": 3.9833, "step": 20491 }, { "epoch": 6.826184725576747, "grad_norm": 0.73828125, "learning_rate": 3.621764907682735e-06, "loss": 3.9918, "step": 20492 }, { "epoch": 6.826517864578996, "grad_norm": 0.75390625, "learning_rate": 3.6210721106694684e-06, "loss": 3.8675, "step": 20493 }, { "epoch": 6.826851003581244, "grad_norm": 0.7578125, "learning_rate": 3.6203793588373073e-06, "loss": 4.0306, "step": 20494 }, { "epoch": 6.8271841425834925, "grad_norm": 0.734375, "learning_rate": 3.6196866521943285e-06, "loss": 4.0505, "step": 20495 }, { "epoch": 6.827517281585742, "grad_norm": 0.70703125, "learning_rate": 3.618993990748593e-06, "loss": 3.999, "step": 20496 }, { "epoch": 6.82785042058799, "grad_norm": 0.79296875, "learning_rate": 3.618301374508168e-06, "loss": 4.0707, "step": 20497 }, { "epoch": 6.828183559590239, "grad_norm": 0.7890625, "learning_rate": 3.6176088034811276e-06, "loss": 3.9461, "step": 20498 }, { "epoch": 6.828516698592487, "grad_norm": 0.796875, "learning_rate": 3.6169162776755347e-06, "loss": 3.9595, "step": 20499 }, { "epoch": 6.828849837594737, "grad_norm": 0.7421875, "learning_rate": 3.6162237970994574e-06, "loss": 3.8913, "step": 20500 }, { "epoch": 6.829182976596985, "grad_norm": 0.75390625, "learning_rate": 3.615531361760957e-06, "loss": 3.9784, "step": 20501 }, { "epoch": 6.829516115599234, "grad_norm": 0.83984375, "learning_rate": 3.6148389716681063e-06, "loss": 3.9781, "step": 20502 }, { "epoch": 6.829849254601482, "grad_norm": 0.82421875, "learning_rate": 3.614146626828965e-06, "loss": 4.0033, "step": 20503 }, { "epoch": 6.830182393603732, "grad_norm": 0.76953125, "learning_rate": 3.6134543272516025e-06, "loss": 3.9542, "step": 20504 }, { "epoch": 6.83051553260598, "grad_norm": 0.77734375, "learning_rate": 3.612762072944073e-06, "loss": 3.9918, "step": 20505 }, { "epoch": 6.830848671608228, "grad_norm": 0.8046875, "learning_rate": 3.612069863914449e-06, "loss": 4.003, "step": 20506 }, { "epoch": 6.831181810610477, "grad_norm": 0.76953125, "learning_rate": 3.611377700170789e-06, "loss": 3.9975, "step": 20507 }, { "epoch": 6.831514949612726, "grad_norm": 0.75390625, "learning_rate": 3.6106855817211568e-06, "loss": 3.9939, "step": 20508 }, { "epoch": 6.831848088614975, "grad_norm": 0.796875, "learning_rate": 3.6099935085736098e-06, "loss": 3.9462, "step": 20509 }, { "epoch": 6.832181227617223, "grad_norm": 0.74609375, "learning_rate": 3.6093014807362154e-06, "loss": 4.0173, "step": 20510 }, { "epoch": 6.832514366619472, "grad_norm": 0.73828125, "learning_rate": 3.608609498217032e-06, "loss": 4.0146, "step": 20511 }, { "epoch": 6.832847505621721, "grad_norm": 0.7265625, "learning_rate": 3.607917561024119e-06, "loss": 3.9513, "step": 20512 }, { "epoch": 6.833180644623969, "grad_norm": 0.76171875, "learning_rate": 3.6072256691655363e-06, "loss": 3.9549, "step": 20513 }, { "epoch": 6.833513783626218, "grad_norm": 0.8046875, "learning_rate": 3.606533822649343e-06, "loss": 4.025, "step": 20514 }, { "epoch": 6.833846922628466, "grad_norm": 0.8046875, "learning_rate": 3.6058420214835967e-06, "loss": 4.0181, "step": 20515 }, { "epoch": 6.834180061630716, "grad_norm": 0.8515625, "learning_rate": 3.605150265676354e-06, "loss": 3.9721, "step": 20516 }, { "epoch": 6.834513200632964, "grad_norm": 0.78125, "learning_rate": 3.6044585552356764e-06, "loss": 4.0262, "step": 20517 }, { "epoch": 6.834846339635213, "grad_norm": 0.76171875, "learning_rate": 3.60376689016962e-06, "loss": 3.8975, "step": 20518 }, { "epoch": 6.835179478637461, "grad_norm": 0.78125, "learning_rate": 3.6030752704862394e-06, "loss": 3.977, "step": 20519 }, { "epoch": 6.8355126176397105, "grad_norm": 0.73046875, "learning_rate": 3.602383696193589e-06, "loss": 4.0232, "step": 20520 }, { "epoch": 6.835845756641959, "grad_norm": 0.7734375, "learning_rate": 3.6016921672997284e-06, "loss": 3.9878, "step": 20521 }, { "epoch": 6.836178895644208, "grad_norm": 0.71875, "learning_rate": 3.6010006838127137e-06, "loss": 3.9979, "step": 20522 }, { "epoch": 6.836512034646456, "grad_norm": 0.796875, "learning_rate": 3.60030924574059e-06, "loss": 3.9699, "step": 20523 }, { "epoch": 6.836845173648705, "grad_norm": 0.7890625, "learning_rate": 3.59961785309142e-06, "loss": 3.9883, "step": 20524 }, { "epoch": 6.837178312650954, "grad_norm": 0.78515625, "learning_rate": 3.598926505873253e-06, "loss": 3.958, "step": 20525 }, { "epoch": 6.837511451653202, "grad_norm": 0.78515625, "learning_rate": 3.598235204094143e-06, "loss": 3.9728, "step": 20526 }, { "epoch": 6.837844590655451, "grad_norm": 0.7890625, "learning_rate": 3.597543947762139e-06, "loss": 4.0005, "step": 20527 }, { "epoch": 6.8381777296576995, "grad_norm": 0.77734375, "learning_rate": 3.5968527368852976e-06, "loss": 3.9488, "step": 20528 }, { "epoch": 6.838510868659949, "grad_norm": 0.734375, "learning_rate": 3.596161571471668e-06, "loss": 4.0043, "step": 20529 }, { "epoch": 6.838844007662197, "grad_norm": 0.78125, "learning_rate": 3.5954704515293e-06, "loss": 3.9587, "step": 20530 }, { "epoch": 6.839177146664445, "grad_norm": 0.74609375, "learning_rate": 3.594779377066245e-06, "loss": 4.1169, "step": 20531 }, { "epoch": 6.8395102856666945, "grad_norm": 0.7734375, "learning_rate": 3.5940883480905505e-06, "loss": 3.9949, "step": 20532 }, { "epoch": 6.839843424668943, "grad_norm": 0.74609375, "learning_rate": 3.5933973646102677e-06, "loss": 4.002, "step": 20533 }, { "epoch": 6.840176563671192, "grad_norm": 0.7890625, "learning_rate": 3.59270642663344e-06, "loss": 3.9237, "step": 20534 }, { "epoch": 6.84050970267344, "grad_norm": 0.78515625, "learning_rate": 3.592015534168123e-06, "loss": 4.0697, "step": 20535 }, { "epoch": 6.8408428416756895, "grad_norm": 0.7734375, "learning_rate": 3.5913246872223594e-06, "loss": 3.9577, "step": 20536 }, { "epoch": 6.841175980677938, "grad_norm": 0.75390625, "learning_rate": 3.5906338858041976e-06, "loss": 3.9775, "step": 20537 }, { "epoch": 6.841509119680187, "grad_norm": 0.82421875, "learning_rate": 3.5899431299216805e-06, "loss": 3.9512, "step": 20538 }, { "epoch": 6.841842258682435, "grad_norm": 0.7265625, "learning_rate": 3.5892524195828594e-06, "loss": 3.9802, "step": 20539 }, { "epoch": 6.842175397684684, "grad_norm": 0.703125, "learning_rate": 3.588561754795779e-06, "loss": 3.9837, "step": 20540 }, { "epoch": 6.842508536686933, "grad_norm": 0.80859375, "learning_rate": 3.5878711355684773e-06, "loss": 3.9953, "step": 20541 }, { "epoch": 6.842841675689181, "grad_norm": 0.81640625, "learning_rate": 3.587180561909006e-06, "loss": 3.9455, "step": 20542 }, { "epoch": 6.84317481469143, "grad_norm": 0.8125, "learning_rate": 3.5864900338254057e-06, "loss": 3.8907, "step": 20543 }, { "epoch": 6.8435079536936785, "grad_norm": 0.7578125, "learning_rate": 3.585799551325721e-06, "loss": 3.9398, "step": 20544 }, { "epoch": 6.843841092695928, "grad_norm": 0.79296875, "learning_rate": 3.585109114417989e-06, "loss": 3.9515, "step": 20545 }, { "epoch": 6.844174231698176, "grad_norm": 0.8046875, "learning_rate": 3.5844187231102597e-06, "loss": 4.0177, "step": 20546 }, { "epoch": 6.844507370700425, "grad_norm": 0.796875, "learning_rate": 3.5837283774105713e-06, "loss": 4.0119, "step": 20547 }, { "epoch": 6.844840509702673, "grad_norm": 0.75390625, "learning_rate": 3.5830380773269642e-06, "loss": 3.9297, "step": 20548 }, { "epoch": 6.845173648704922, "grad_norm": 0.734375, "learning_rate": 3.58234782286748e-06, "loss": 3.9744, "step": 20549 }, { "epoch": 6.845506787707171, "grad_norm": 0.76171875, "learning_rate": 3.581657614040157e-06, "loss": 3.9169, "step": 20550 }, { "epoch": 6.845839926709419, "grad_norm": 0.78125, "learning_rate": 3.5809674508530367e-06, "loss": 4.0518, "step": 20551 }, { "epoch": 6.846173065711668, "grad_norm": 0.7734375, "learning_rate": 3.5802773333141566e-06, "loss": 3.9802, "step": 20552 }, { "epoch": 6.846506204713917, "grad_norm": 0.76953125, "learning_rate": 3.579587261431553e-06, "loss": 4.0026, "step": 20553 }, { "epoch": 6.846839343716166, "grad_norm": 0.73046875, "learning_rate": 3.578897235213269e-06, "loss": 3.9774, "step": 20554 }, { "epoch": 6.847172482718414, "grad_norm": 0.78515625, "learning_rate": 3.578207254667338e-06, "loss": 3.9752, "step": 20555 }, { "epoch": 6.8475056217206625, "grad_norm": 0.74609375, "learning_rate": 3.5775173198017987e-06, "loss": 4.0687, "step": 20556 }, { "epoch": 6.847838760722912, "grad_norm": 0.8125, "learning_rate": 3.576827430624684e-06, "loss": 4.039, "step": 20557 }, { "epoch": 6.84817189972516, "grad_norm": 0.72265625, "learning_rate": 3.576137587144037e-06, "loss": 4.0409, "step": 20558 }, { "epoch": 6.848505038727409, "grad_norm": 0.75390625, "learning_rate": 3.5754477893678856e-06, "loss": 4.0244, "step": 20559 }, { "epoch": 6.848838177729657, "grad_norm": 0.7890625, "learning_rate": 3.574758037304265e-06, "loss": 4.0171, "step": 20560 }, { "epoch": 6.849171316731907, "grad_norm": 0.7265625, "learning_rate": 3.574068330961213e-06, "loss": 3.9403, "step": 20561 }, { "epoch": 6.849504455734155, "grad_norm": 0.83203125, "learning_rate": 3.573378670346762e-06, "loss": 3.9653, "step": 20562 }, { "epoch": 6.849837594736404, "grad_norm": 0.7734375, "learning_rate": 3.5726890554689436e-06, "loss": 4.0246, "step": 20563 }, { "epoch": 6.850170733738652, "grad_norm": 0.74609375, "learning_rate": 3.5719994863357887e-06, "loss": 4.0356, "step": 20564 }, { "epoch": 6.850503872740902, "grad_norm": 0.73046875, "learning_rate": 3.571309962955335e-06, "loss": 3.9589, "step": 20565 }, { "epoch": 6.85083701174315, "grad_norm": 0.76953125, "learning_rate": 3.57062048533561e-06, "loss": 4.0379, "step": 20566 }, { "epoch": 6.851170150745398, "grad_norm": 0.81640625, "learning_rate": 3.5699310534846454e-06, "loss": 3.9503, "step": 20567 }, { "epoch": 6.851503289747647, "grad_norm": 0.76171875, "learning_rate": 3.5692416674104723e-06, "loss": 4.0007, "step": 20568 }, { "epoch": 6.851836428749896, "grad_norm": 0.796875, "learning_rate": 3.5685523271211198e-06, "loss": 3.9416, "step": 20569 }, { "epoch": 6.852169567752145, "grad_norm": 0.7578125, "learning_rate": 3.567863032624616e-06, "loss": 3.9738, "step": 20570 }, { "epoch": 6.852502706754393, "grad_norm": 0.75, "learning_rate": 3.5671737839289883e-06, "loss": 3.9855, "step": 20571 }, { "epoch": 6.852835845756642, "grad_norm": 0.75, "learning_rate": 3.5664845810422715e-06, "loss": 4.0015, "step": 20572 }, { "epoch": 6.853168984758891, "grad_norm": 0.7578125, "learning_rate": 3.565795423972488e-06, "loss": 3.9737, "step": 20573 }, { "epoch": 6.853502123761139, "grad_norm": 0.75390625, "learning_rate": 3.5651063127276674e-06, "loss": 3.9668, "step": 20574 }, { "epoch": 6.853835262763388, "grad_norm": 0.765625, "learning_rate": 3.564417247315832e-06, "loss": 4.0217, "step": 20575 }, { "epoch": 6.854168401765636, "grad_norm": 0.77734375, "learning_rate": 3.5637282277450166e-06, "loss": 3.9516, "step": 20576 }, { "epoch": 6.8545015407678855, "grad_norm": 0.80859375, "learning_rate": 3.563039254023239e-06, "loss": 3.9527, "step": 20577 }, { "epoch": 6.854834679770134, "grad_norm": 0.77734375, "learning_rate": 3.5623503261585235e-06, "loss": 4.0326, "step": 20578 }, { "epoch": 6.855167818772383, "grad_norm": 0.71484375, "learning_rate": 3.561661444158901e-06, "loss": 4.0229, "step": 20579 }, { "epoch": 6.855500957774631, "grad_norm": 0.74609375, "learning_rate": 3.560972608032392e-06, "loss": 3.9723, "step": 20580 }, { "epoch": 6.8558340967768805, "grad_norm": 0.78515625, "learning_rate": 3.56028381778702e-06, "loss": 3.9894, "step": 20581 }, { "epoch": 6.856167235779129, "grad_norm": 0.80859375, "learning_rate": 3.5595950734308056e-06, "loss": 4.0299, "step": 20582 }, { "epoch": 6.856500374781378, "grad_norm": 0.80078125, "learning_rate": 3.5589063749717767e-06, "loss": 3.978, "step": 20583 }, { "epoch": 6.856833513783626, "grad_norm": 0.7578125, "learning_rate": 3.5582177224179517e-06, "loss": 3.9564, "step": 20584 }, { "epoch": 6.857166652785875, "grad_norm": 0.78515625, "learning_rate": 3.557529115777352e-06, "loss": 3.9837, "step": 20585 }, { "epoch": 6.857499791788124, "grad_norm": 0.765625, "learning_rate": 3.5568405550579995e-06, "loss": 3.9258, "step": 20586 }, { "epoch": 6.857832930790372, "grad_norm": 0.75, "learning_rate": 3.5561520402679136e-06, "loss": 3.907, "step": 20587 }, { "epoch": 6.858166069792621, "grad_norm": 0.765625, "learning_rate": 3.5554635714151137e-06, "loss": 3.9834, "step": 20588 }, { "epoch": 6.8584992087948695, "grad_norm": 0.76953125, "learning_rate": 3.554775148507617e-06, "loss": 3.9926, "step": 20589 }, { "epoch": 6.858832347797119, "grad_norm": 0.83203125, "learning_rate": 3.5540867715534466e-06, "loss": 3.8778, "step": 20590 }, { "epoch": 6.859165486799367, "grad_norm": 0.78515625, "learning_rate": 3.5533984405606193e-06, "loss": 3.9478, "step": 20591 }, { "epoch": 6.859498625801615, "grad_norm": 0.80859375, "learning_rate": 3.552710155537151e-06, "loss": 3.917, "step": 20592 }, { "epoch": 6.8598317648038645, "grad_norm": 0.75, "learning_rate": 3.552021916491057e-06, "loss": 3.9025, "step": 20593 }, { "epoch": 6.860164903806113, "grad_norm": 0.7734375, "learning_rate": 3.551333723430362e-06, "loss": 3.9842, "step": 20594 }, { "epoch": 6.860498042808362, "grad_norm": 0.77734375, "learning_rate": 3.550645576363074e-06, "loss": 4.0217, "step": 20595 }, { "epoch": 6.86083118181061, "grad_norm": 0.76171875, "learning_rate": 3.5499574752972106e-06, "loss": 4.0203, "step": 20596 }, { "epoch": 6.861164320812859, "grad_norm": 0.7578125, "learning_rate": 3.5492694202407836e-06, "loss": 4.0243, "step": 20597 }, { "epoch": 6.861497459815108, "grad_norm": 0.76171875, "learning_rate": 3.548581411201814e-06, "loss": 3.9865, "step": 20598 }, { "epoch": 6.861830598817357, "grad_norm": 0.79296875, "learning_rate": 3.547893448188312e-06, "loss": 3.9787, "step": 20599 }, { "epoch": 6.862163737819605, "grad_norm": 0.7734375, "learning_rate": 3.54720553120829e-06, "loss": 3.9806, "step": 20600 }, { "epoch": 6.862496876821854, "grad_norm": 0.75, "learning_rate": 3.546517660269761e-06, "loss": 3.9868, "step": 20601 }, { "epoch": 6.862830015824103, "grad_norm": 0.78125, "learning_rate": 3.545829835380739e-06, "loss": 3.9736, "step": 20602 }, { "epoch": 6.863163154826351, "grad_norm": 0.796875, "learning_rate": 3.545142056549238e-06, "loss": 4.0034, "step": 20603 }, { "epoch": 6.8634962938286, "grad_norm": 0.76953125, "learning_rate": 3.5444543237832604e-06, "loss": 4.008, "step": 20604 }, { "epoch": 6.8638294328308485, "grad_norm": 0.76171875, "learning_rate": 3.5437666370908255e-06, "loss": 3.9481, "step": 20605 }, { "epoch": 6.864162571833098, "grad_norm": 0.7265625, "learning_rate": 3.5430789964799395e-06, "loss": 3.9269, "step": 20606 }, { "epoch": 6.864495710835346, "grad_norm": 0.84375, "learning_rate": 3.5423914019586132e-06, "loss": 3.9471, "step": 20607 }, { "epoch": 6.864828849837595, "grad_norm": 0.796875, "learning_rate": 3.5417038535348516e-06, "loss": 4.0302, "step": 20608 }, { "epoch": 6.865161988839843, "grad_norm": 0.76171875, "learning_rate": 3.5410163512166687e-06, "loss": 3.948, "step": 20609 }, { "epoch": 6.865495127842092, "grad_norm": 0.78125, "learning_rate": 3.5403288950120716e-06, "loss": 4.0142, "step": 20610 }, { "epoch": 6.865828266844341, "grad_norm": 0.76171875, "learning_rate": 3.5396414849290652e-06, "loss": 3.8638, "step": 20611 }, { "epoch": 6.866161405846589, "grad_norm": 0.734375, "learning_rate": 3.538954120975657e-06, "loss": 3.9312, "step": 20612 }, { "epoch": 6.866494544848838, "grad_norm": 0.76953125, "learning_rate": 3.5382668031598546e-06, "loss": 3.992, "step": 20613 }, { "epoch": 6.866827683851087, "grad_norm": 0.75, "learning_rate": 3.5375795314896623e-06, "loss": 4.0171, "step": 20614 }, { "epoch": 6.867160822853336, "grad_norm": 0.75, "learning_rate": 3.536892305973083e-06, "loss": 4.0455, "step": 20615 }, { "epoch": 6.867493961855584, "grad_norm": 0.8046875, "learning_rate": 3.5362051266181277e-06, "loss": 3.9624, "step": 20616 }, { "epoch": 6.867827100857832, "grad_norm": 0.78125, "learning_rate": 3.535517993432797e-06, "loss": 3.9216, "step": 20617 }, { "epoch": 6.868160239860082, "grad_norm": 0.8125, "learning_rate": 3.534830906425094e-06, "loss": 3.9429, "step": 20618 }, { "epoch": 6.868493378862331, "grad_norm": 0.765625, "learning_rate": 3.5341438656030205e-06, "loss": 4.018, "step": 20619 }, { "epoch": 6.868826517864579, "grad_norm": 0.796875, "learning_rate": 3.5334568709745827e-06, "loss": 4.0418, "step": 20620 }, { "epoch": 6.869159656866827, "grad_norm": 0.80078125, "learning_rate": 3.5327699225477836e-06, "loss": 3.9713, "step": 20621 }, { "epoch": 6.869492795869077, "grad_norm": 0.75, "learning_rate": 3.5320830203306176e-06, "loss": 3.9355, "step": 20622 }, { "epoch": 6.869825934871325, "grad_norm": 0.80859375, "learning_rate": 3.531396164331092e-06, "loss": 4.0107, "step": 20623 }, { "epoch": 6.870159073873574, "grad_norm": 0.75390625, "learning_rate": 3.5307093545572043e-06, "loss": 3.9954, "step": 20624 }, { "epoch": 6.870492212875822, "grad_norm": 0.80859375, "learning_rate": 3.5300225910169563e-06, "loss": 3.9829, "step": 20625 }, { "epoch": 6.8708253518780715, "grad_norm": 0.74609375, "learning_rate": 3.529335873718343e-06, "loss": 4.0374, "step": 20626 }, { "epoch": 6.87115849088032, "grad_norm": 0.8125, "learning_rate": 3.528649202669369e-06, "loss": 3.9057, "step": 20627 }, { "epoch": 6.871491629882568, "grad_norm": 0.76953125, "learning_rate": 3.52796257787803e-06, "loss": 4.0193, "step": 20628 }, { "epoch": 6.871824768884817, "grad_norm": 0.79296875, "learning_rate": 3.5272759993523233e-06, "loss": 4.076, "step": 20629 }, { "epoch": 6.872157907887066, "grad_norm": 0.765625, "learning_rate": 3.526589467100246e-06, "loss": 3.972, "step": 20630 }, { "epoch": 6.872491046889315, "grad_norm": 0.75, "learning_rate": 3.5259029811297965e-06, "loss": 3.9662, "step": 20631 }, { "epoch": 6.872824185891563, "grad_norm": 0.76953125, "learning_rate": 3.5252165414489676e-06, "loss": 3.9552, "step": 20632 }, { "epoch": 6.873157324893812, "grad_norm": 0.78125, "learning_rate": 3.5245301480657547e-06, "loss": 4.0525, "step": 20633 }, { "epoch": 6.8734904638960606, "grad_norm": 0.78125, "learning_rate": 3.5238438009881576e-06, "loss": 3.9675, "step": 20634 }, { "epoch": 6.873823602898309, "grad_norm": 0.76953125, "learning_rate": 3.5231575002241677e-06, "loss": 3.9734, "step": 20635 }, { "epoch": 6.874156741900558, "grad_norm": 0.70703125, "learning_rate": 3.5224712457817787e-06, "loss": 4.0347, "step": 20636 }, { "epoch": 6.874489880902806, "grad_norm": 0.796875, "learning_rate": 3.521785037668982e-06, "loss": 4.0014, "step": 20637 }, { "epoch": 6.8748230199050555, "grad_norm": 0.77734375, "learning_rate": 3.5210988758937763e-06, "loss": 4.0034, "step": 20638 }, { "epoch": 6.875156158907304, "grad_norm": 0.7421875, "learning_rate": 3.5204127604641524e-06, "loss": 3.9622, "step": 20639 }, { "epoch": 6.875489297909553, "grad_norm": 0.78125, "learning_rate": 3.5197266913880973e-06, "loss": 3.9993, "step": 20640 }, { "epoch": 6.875822436911801, "grad_norm": 0.73828125, "learning_rate": 3.519040668673602e-06, "loss": 3.9709, "step": 20641 }, { "epoch": 6.8761555759140505, "grad_norm": 0.8046875, "learning_rate": 3.5183546923286636e-06, "loss": 3.8767, "step": 20642 }, { "epoch": 6.876488714916299, "grad_norm": 0.7578125, "learning_rate": 3.517668762361268e-06, "loss": 3.9395, "step": 20643 }, { "epoch": 6.876821853918548, "grad_norm": 0.79296875, "learning_rate": 3.516982878779406e-06, "loss": 4.014, "step": 20644 }, { "epoch": 6.877154992920796, "grad_norm": 0.828125, "learning_rate": 3.5162970415910633e-06, "loss": 3.9937, "step": 20645 }, { "epoch": 6.8774881319230445, "grad_norm": 0.7109375, "learning_rate": 3.5156112508042347e-06, "loss": 4.0074, "step": 20646 }, { "epoch": 6.877821270925294, "grad_norm": 0.84375, "learning_rate": 3.514925506426904e-06, "loss": 3.9743, "step": 20647 }, { "epoch": 6.878154409927542, "grad_norm": 0.74609375, "learning_rate": 3.51423980846706e-06, "loss": 3.9641, "step": 20648 }, { "epoch": 6.878487548929791, "grad_norm": 0.78515625, "learning_rate": 3.5135541569326894e-06, "loss": 3.9953, "step": 20649 }, { "epoch": 6.8788206879320395, "grad_norm": 0.7734375, "learning_rate": 3.5128685518317785e-06, "loss": 3.9567, "step": 20650 }, { "epoch": 6.879153826934289, "grad_norm": 0.79296875, "learning_rate": 3.5121829931723124e-06, "loss": 4.0213, "step": 20651 }, { "epoch": 6.879486965936537, "grad_norm": 0.77734375, "learning_rate": 3.511497480962274e-06, "loss": 4.0051, "step": 20652 }, { "epoch": 6.879820104938785, "grad_norm": 0.82421875, "learning_rate": 3.5108120152096533e-06, "loss": 3.9873, "step": 20653 }, { "epoch": 6.8801532439410344, "grad_norm": 0.7890625, "learning_rate": 3.5101265959224325e-06, "loss": 3.9346, "step": 20654 }, { "epoch": 6.880486382943283, "grad_norm": 0.77734375, "learning_rate": 3.5094412231085942e-06, "loss": 4.01, "step": 20655 }, { "epoch": 6.880819521945532, "grad_norm": 0.7890625, "learning_rate": 3.50875589677612e-06, "loss": 3.9867, "step": 20656 }, { "epoch": 6.88115266094778, "grad_norm": 0.796875, "learning_rate": 3.5080706169329996e-06, "loss": 3.9437, "step": 20657 }, { "epoch": 6.881485799950029, "grad_norm": 0.7734375, "learning_rate": 3.507385383587208e-06, "loss": 3.9266, "step": 20658 }, { "epoch": 6.881818938952278, "grad_norm": 0.74609375, "learning_rate": 3.5067001967467253e-06, "loss": 4.0733, "step": 20659 }, { "epoch": 6.882152077954527, "grad_norm": 0.796875, "learning_rate": 3.5060150564195388e-06, "loss": 4.038, "step": 20660 }, { "epoch": 6.882485216956775, "grad_norm": 0.75390625, "learning_rate": 3.505329962613626e-06, "loss": 4.002, "step": 20661 }, { "epoch": 6.882818355959024, "grad_norm": 0.79296875, "learning_rate": 3.5046449153369665e-06, "loss": 3.9467, "step": 20662 }, { "epoch": 6.883151494961273, "grad_norm": 0.8046875, "learning_rate": 3.503959914597537e-06, "loss": 3.9983, "step": 20663 }, { "epoch": 6.883484633963521, "grad_norm": 0.80078125, "learning_rate": 3.503274960403322e-06, "loss": 3.9694, "step": 20664 }, { "epoch": 6.88381777296577, "grad_norm": 0.74609375, "learning_rate": 3.502590052762297e-06, "loss": 4.0501, "step": 20665 }, { "epoch": 6.884150911968018, "grad_norm": 0.75390625, "learning_rate": 3.50190519168244e-06, "loss": 3.9611, "step": 20666 }, { "epoch": 6.884484050970268, "grad_norm": 0.765625, "learning_rate": 3.5012203771717263e-06, "loss": 3.9873, "step": 20667 }, { "epoch": 6.884817189972516, "grad_norm": 0.77734375, "learning_rate": 3.500535609238135e-06, "loss": 3.9445, "step": 20668 }, { "epoch": 6.885150328974765, "grad_norm": 0.7421875, "learning_rate": 3.49985088788964e-06, "loss": 4.0227, "step": 20669 }, { "epoch": 6.885483467977013, "grad_norm": 0.76171875, "learning_rate": 3.4991662131342162e-06, "loss": 3.9669, "step": 20670 }, { "epoch": 6.885816606979262, "grad_norm": 0.78125, "learning_rate": 3.4984815849798424e-06, "loss": 3.9153, "step": 20671 }, { "epoch": 6.886149745981511, "grad_norm": 0.77734375, "learning_rate": 3.4977970034344907e-06, "loss": 4.0323, "step": 20672 }, { "epoch": 6.886482884983759, "grad_norm": 0.77734375, "learning_rate": 3.4971124685061355e-06, "loss": 3.9133, "step": 20673 }, { "epoch": 6.886816023986008, "grad_norm": 0.76953125, "learning_rate": 3.496427980202747e-06, "loss": 3.9464, "step": 20674 }, { "epoch": 6.887149162988257, "grad_norm": 0.71484375, "learning_rate": 3.4957435385323064e-06, "loss": 4.0081, "step": 20675 }, { "epoch": 6.887482301990506, "grad_norm": 0.76171875, "learning_rate": 3.4950591435027775e-06, "loss": 4.0168, "step": 20676 }, { "epoch": 6.887815440992754, "grad_norm": 0.80078125, "learning_rate": 3.494374795122132e-06, "loss": 4.0224, "step": 20677 }, { "epoch": 6.888148579995003, "grad_norm": 0.76171875, "learning_rate": 3.493690493398347e-06, "loss": 3.9817, "step": 20678 }, { "epoch": 6.888481718997252, "grad_norm": 0.75390625, "learning_rate": 3.4930062383393894e-06, "loss": 3.9669, "step": 20679 }, { "epoch": 6.888814857999501, "grad_norm": 0.76171875, "learning_rate": 3.4923220299532303e-06, "loss": 3.9891, "step": 20680 }, { "epoch": 6.889147997001749, "grad_norm": 0.8125, "learning_rate": 3.4916378682478366e-06, "loss": 3.9535, "step": 20681 }, { "epoch": 6.889481136003997, "grad_norm": 0.76171875, "learning_rate": 3.4909537532311824e-06, "loss": 4.0241, "step": 20682 }, { "epoch": 6.8898142750062465, "grad_norm": 0.79296875, "learning_rate": 3.490269684911233e-06, "loss": 3.9748, "step": 20683 }, { "epoch": 6.890147414008495, "grad_norm": 0.8046875, "learning_rate": 3.4895856632959573e-06, "loss": 4.0572, "step": 20684 }, { "epoch": 6.890480553010744, "grad_norm": 0.77734375, "learning_rate": 3.488901688393322e-06, "loss": 3.9981, "step": 20685 }, { "epoch": 6.890813692012992, "grad_norm": 0.79296875, "learning_rate": 3.4882177602112928e-06, "loss": 3.9314, "step": 20686 }, { "epoch": 6.8911468310152415, "grad_norm": 0.7734375, "learning_rate": 3.487533878757838e-06, "loss": 3.9318, "step": 20687 }, { "epoch": 6.89147997001749, "grad_norm": 0.76171875, "learning_rate": 3.486850044040923e-06, "loss": 4.0175, "step": 20688 }, { "epoch": 6.891813109019738, "grad_norm": 0.73828125, "learning_rate": 3.486166256068509e-06, "loss": 4.0196, "step": 20689 }, { "epoch": 6.892146248021987, "grad_norm": 0.7734375, "learning_rate": 3.4854825148485676e-06, "loss": 4.0294, "step": 20690 }, { "epoch": 6.892479387024236, "grad_norm": 0.76171875, "learning_rate": 3.484798820389059e-06, "loss": 3.9449, "step": 20691 }, { "epoch": 6.892812526026485, "grad_norm": 0.8125, "learning_rate": 3.4841151726979473e-06, "loss": 3.978, "step": 20692 }, { "epoch": 6.893145665028733, "grad_norm": 0.7421875, "learning_rate": 3.4834315717831957e-06, "loss": 3.9995, "step": 20693 }, { "epoch": 6.893478804030982, "grad_norm": 0.7734375, "learning_rate": 3.4827480176527665e-06, "loss": 4.0063, "step": 20694 }, { "epoch": 6.8938119430332305, "grad_norm": 0.75, "learning_rate": 3.4820645103146214e-06, "loss": 3.991, "step": 20695 }, { "epoch": 6.894145082035479, "grad_norm": 0.77734375, "learning_rate": 3.4813810497767186e-06, "loss": 3.9313, "step": 20696 }, { "epoch": 6.894478221037728, "grad_norm": 0.78515625, "learning_rate": 3.480697636047025e-06, "loss": 3.9631, "step": 20697 }, { "epoch": 6.894811360039976, "grad_norm": 0.7890625, "learning_rate": 3.480014269133499e-06, "loss": 3.9132, "step": 20698 }, { "epoch": 6.8951444990422255, "grad_norm": 0.7265625, "learning_rate": 3.4793309490440993e-06, "loss": 4.0326, "step": 20699 }, { "epoch": 6.895477638044474, "grad_norm": 0.76953125, "learning_rate": 3.4786476757867825e-06, "loss": 4.018, "step": 20700 }, { "epoch": 6.895810777046723, "grad_norm": 0.7890625, "learning_rate": 3.477964449369515e-06, "loss": 4.0101, "step": 20701 }, { "epoch": 6.896143916048971, "grad_norm": 0.7578125, "learning_rate": 3.4772812698002478e-06, "loss": 3.9494, "step": 20702 }, { "epoch": 6.89647705505122, "grad_norm": 0.8203125, "learning_rate": 3.476598137086938e-06, "loss": 3.9696, "step": 20703 }, { "epoch": 6.896810194053469, "grad_norm": 0.78515625, "learning_rate": 3.4759150512375467e-06, "loss": 4.0185, "step": 20704 }, { "epoch": 6.897143333055718, "grad_norm": 0.76953125, "learning_rate": 3.4752320122600305e-06, "loss": 3.9562, "step": 20705 }, { "epoch": 6.897476472057966, "grad_norm": 0.7734375, "learning_rate": 3.474549020162343e-06, "loss": 3.9734, "step": 20706 }, { "epoch": 6.8978096110602145, "grad_norm": 0.7421875, "learning_rate": 3.473866074952437e-06, "loss": 3.9798, "step": 20707 }, { "epoch": 6.898142750062464, "grad_norm": 0.765625, "learning_rate": 3.473183176638274e-06, "loss": 3.9273, "step": 20708 }, { "epoch": 6.898475889064712, "grad_norm": 0.734375, "learning_rate": 3.4725003252278053e-06, "loss": 4.0108, "step": 20709 }, { "epoch": 6.898809028066961, "grad_norm": 0.8046875, "learning_rate": 3.4718175207289836e-06, "loss": 3.8971, "step": 20710 }, { "epoch": 6.8991421670692095, "grad_norm": 0.80078125, "learning_rate": 3.4711347631497636e-06, "loss": 3.9508, "step": 20711 }, { "epoch": 6.899475306071459, "grad_norm": 0.78515625, "learning_rate": 3.4704520524980962e-06, "loss": 3.9417, "step": 20712 }, { "epoch": 6.899808445073707, "grad_norm": 0.83984375, "learning_rate": 3.4697693887819352e-06, "loss": 4.0422, "step": 20713 }, { "epoch": 6.900141584075955, "grad_norm": 0.7421875, "learning_rate": 3.4690867720092274e-06, "loss": 4.0099, "step": 20714 }, { "epoch": 6.900474723078204, "grad_norm": 0.74609375, "learning_rate": 3.4684042021879315e-06, "loss": 3.9081, "step": 20715 }, { "epoch": 6.900807862080453, "grad_norm": 0.8125, "learning_rate": 3.4677216793259936e-06, "loss": 4.0115, "step": 20716 }, { "epoch": 6.901141001082702, "grad_norm": 0.75, "learning_rate": 3.467039203431365e-06, "loss": 3.9218, "step": 20717 }, { "epoch": 6.90147414008495, "grad_norm": 0.78515625, "learning_rate": 3.466356774511991e-06, "loss": 3.9436, "step": 20718 }, { "epoch": 6.901807279087199, "grad_norm": 0.75390625, "learning_rate": 3.465674392575829e-06, "loss": 3.9818, "step": 20719 }, { "epoch": 6.902140418089448, "grad_norm": 0.81640625, "learning_rate": 3.46499205763082e-06, "loss": 3.9419, "step": 20720 }, { "epoch": 6.902473557091697, "grad_norm": 0.78125, "learning_rate": 3.4643097696849106e-06, "loss": 3.997, "step": 20721 }, { "epoch": 6.902806696093945, "grad_norm": 0.80859375, "learning_rate": 3.4636275287460536e-06, "loss": 4.0311, "step": 20722 }, { "epoch": 6.903139835096194, "grad_norm": 0.8203125, "learning_rate": 3.462945334822193e-06, "loss": 4.0041, "step": 20723 }, { "epoch": 6.903472974098443, "grad_norm": 0.7734375, "learning_rate": 3.462263187921276e-06, "loss": 4.0197, "step": 20724 }, { "epoch": 6.903806113100691, "grad_norm": 0.7578125, "learning_rate": 3.461581088051244e-06, "loss": 3.9775, "step": 20725 }, { "epoch": 6.90413925210294, "grad_norm": 0.77734375, "learning_rate": 3.460899035220047e-06, "loss": 3.9564, "step": 20726 }, { "epoch": 6.904472391105188, "grad_norm": 0.828125, "learning_rate": 3.4602170294356284e-06, "loss": 3.9828, "step": 20727 }, { "epoch": 6.904805530107438, "grad_norm": 0.78515625, "learning_rate": 3.4595350707059304e-06, "loss": 3.9443, "step": 20728 }, { "epoch": 6.905138669109686, "grad_norm": 0.75390625, "learning_rate": 3.4588531590388984e-06, "loss": 4.0327, "step": 20729 }, { "epoch": 6.905471808111935, "grad_norm": 0.75390625, "learning_rate": 3.4581712944424735e-06, "loss": 4.0338, "step": 20730 }, { "epoch": 6.905804947114183, "grad_norm": 0.76171875, "learning_rate": 3.4574894769245977e-06, "loss": 3.973, "step": 20731 }, { "epoch": 6.906138086116432, "grad_norm": 0.73828125, "learning_rate": 3.4568077064932123e-06, "loss": 3.9832, "step": 20732 }, { "epoch": 6.906471225118681, "grad_norm": 0.765625, "learning_rate": 3.4561259831562604e-06, "loss": 3.9622, "step": 20733 }, { "epoch": 6.906804364120929, "grad_norm": 0.8359375, "learning_rate": 3.4554443069216836e-06, "loss": 3.9862, "step": 20734 }, { "epoch": 6.907137503123178, "grad_norm": 0.7578125, "learning_rate": 3.454762677797419e-06, "loss": 3.9996, "step": 20735 }, { "epoch": 6.907470642125427, "grad_norm": 0.75, "learning_rate": 3.4540810957914073e-06, "loss": 3.8959, "step": 20736 }, { "epoch": 6.907803781127676, "grad_norm": 0.74609375, "learning_rate": 3.4533995609115874e-06, "loss": 3.9998, "step": 20737 }, { "epoch": 6.908136920129924, "grad_norm": 0.74609375, "learning_rate": 3.452718073165898e-06, "loss": 4.035, "step": 20738 }, { "epoch": 6.908470059132173, "grad_norm": 0.76953125, "learning_rate": 3.4520366325622766e-06, "loss": 4.0086, "step": 20739 }, { "epoch": 6.908803198134422, "grad_norm": 0.80078125, "learning_rate": 3.4513552391086574e-06, "loss": 3.9495, "step": 20740 }, { "epoch": 6.909136337136671, "grad_norm": 0.76171875, "learning_rate": 3.450673892812984e-06, "loss": 3.9986, "step": 20741 }, { "epoch": 6.909469476138919, "grad_norm": 0.74609375, "learning_rate": 3.449992593683188e-06, "loss": 4.0498, "step": 20742 }, { "epoch": 6.909802615141167, "grad_norm": 0.79296875, "learning_rate": 3.449311341727206e-06, "loss": 3.9586, "step": 20743 }, { "epoch": 6.9101357541434165, "grad_norm": 0.7734375, "learning_rate": 3.4486301369529705e-06, "loss": 3.9924, "step": 20744 }, { "epoch": 6.910468893145665, "grad_norm": 0.73828125, "learning_rate": 3.447948979368422e-06, "loss": 3.9437, "step": 20745 }, { "epoch": 6.910802032147914, "grad_norm": 0.8046875, "learning_rate": 3.447267868981494e-06, "loss": 3.9775, "step": 20746 }, { "epoch": 6.911135171150162, "grad_norm": 0.8046875, "learning_rate": 3.4465868058001108e-06, "loss": 3.9548, "step": 20747 }, { "epoch": 6.9114683101524115, "grad_norm": 0.796875, "learning_rate": 3.445905789832214e-06, "loss": 4.0113, "step": 20748 }, { "epoch": 6.91180144915466, "grad_norm": 0.7578125, "learning_rate": 3.4452248210857343e-06, "loss": 3.9575, "step": 20749 }, { "epoch": 6.912134588156908, "grad_norm": 0.7578125, "learning_rate": 3.4445438995686034e-06, "loss": 4.0316, "step": 20750 }, { "epoch": 6.912467727159157, "grad_norm": 0.734375, "learning_rate": 3.443863025288748e-06, "loss": 3.9596, "step": 20751 }, { "epoch": 6.9128008661614055, "grad_norm": 0.73046875, "learning_rate": 3.443182198254106e-06, "loss": 3.9066, "step": 20752 }, { "epoch": 6.913134005163655, "grad_norm": 0.8203125, "learning_rate": 3.442501418472605e-06, "loss": 3.9549, "step": 20753 }, { "epoch": 6.913467144165903, "grad_norm": 0.7890625, "learning_rate": 3.4418206859521732e-06, "loss": 4.0109, "step": 20754 }, { "epoch": 6.913800283168152, "grad_norm": 0.76171875, "learning_rate": 3.441140000700741e-06, "loss": 3.9123, "step": 20755 }, { "epoch": 6.9141334221704005, "grad_norm": 0.75, "learning_rate": 3.4404593627262356e-06, "loss": 4.0144, "step": 20756 }, { "epoch": 6.914466561172649, "grad_norm": 0.8046875, "learning_rate": 3.439778772036586e-06, "loss": 3.8734, "step": 20757 }, { "epoch": 6.914799700174898, "grad_norm": 0.80859375, "learning_rate": 3.4390982286397167e-06, "loss": 3.9174, "step": 20758 }, { "epoch": 6.915132839177147, "grad_norm": 0.75, "learning_rate": 3.4384177325435588e-06, "loss": 4.0236, "step": 20759 }, { "epoch": 6.9154659781793955, "grad_norm": 0.76171875, "learning_rate": 3.4377372837560383e-06, "loss": 3.9719, "step": 20760 }, { "epoch": 6.915799117181644, "grad_norm": 0.734375, "learning_rate": 3.4370568822850793e-06, "loss": 4.0465, "step": 20761 }, { "epoch": 6.916132256183893, "grad_norm": 0.76953125, "learning_rate": 3.4363765281386045e-06, "loss": 4.0304, "step": 20762 }, { "epoch": 6.916465395186141, "grad_norm": 0.7265625, "learning_rate": 3.4356962213245434e-06, "loss": 3.9974, "step": 20763 }, { "epoch": 6.91679853418839, "grad_norm": 0.78125, "learning_rate": 3.4350159618508216e-06, "loss": 3.9494, "step": 20764 }, { "epoch": 6.917131673190639, "grad_norm": 0.75, "learning_rate": 3.4343357497253536e-06, "loss": 4.0063, "step": 20765 }, { "epoch": 6.917464812192888, "grad_norm": 0.78125, "learning_rate": 3.4336555849560703e-06, "loss": 3.9213, "step": 20766 }, { "epoch": 6.917797951195136, "grad_norm": 0.72265625, "learning_rate": 3.432975467550892e-06, "loss": 4.0121, "step": 20767 }, { "epoch": 6.9181310901973845, "grad_norm": 0.75390625, "learning_rate": 3.4322953975177403e-06, "loss": 4.0166, "step": 20768 }, { "epoch": 6.918464229199634, "grad_norm": 0.765625, "learning_rate": 3.431615374864534e-06, "loss": 3.8777, "step": 20769 }, { "epoch": 6.918797368201882, "grad_norm": 0.77734375, "learning_rate": 3.430935399599199e-06, "loss": 4.0451, "step": 20770 }, { "epoch": 6.919130507204131, "grad_norm": 0.765625, "learning_rate": 3.430255471729653e-06, "loss": 4.0033, "step": 20771 }, { "epoch": 6.919463646206379, "grad_norm": 0.76953125, "learning_rate": 3.4295755912638154e-06, "loss": 4.0291, "step": 20772 }, { "epoch": 6.919796785208629, "grad_norm": 0.78125, "learning_rate": 3.428895758209606e-06, "loss": 4.0372, "step": 20773 }, { "epoch": 6.920129924210877, "grad_norm": 0.796875, "learning_rate": 3.428215972574943e-06, "loss": 3.9999, "step": 20774 }, { "epoch": 6.920463063213125, "grad_norm": 0.80078125, "learning_rate": 3.4275362343677446e-06, "loss": 4.0479, "step": 20775 }, { "epoch": 6.920796202215374, "grad_norm": 0.80859375, "learning_rate": 3.4268565435959247e-06, "loss": 4.0417, "step": 20776 }, { "epoch": 6.921129341217623, "grad_norm": 0.87890625, "learning_rate": 3.426176900267407e-06, "loss": 4.0618, "step": 20777 }, { "epoch": 6.921462480219872, "grad_norm": 0.7421875, "learning_rate": 3.425497304390105e-06, "loss": 3.993, "step": 20778 }, { "epoch": 6.92179561922212, "grad_norm": 0.7578125, "learning_rate": 3.4248177559719334e-06, "loss": 3.9535, "step": 20779 }, { "epoch": 6.922128758224369, "grad_norm": 0.75, "learning_rate": 3.424138255020805e-06, "loss": 3.9875, "step": 20780 }, { "epoch": 6.922461897226618, "grad_norm": 0.7890625, "learning_rate": 3.423458801544641e-06, "loss": 4.0583, "step": 20781 }, { "epoch": 6.922795036228867, "grad_norm": 0.734375, "learning_rate": 3.422779395551355e-06, "loss": 4.027, "step": 20782 }, { "epoch": 6.923128175231115, "grad_norm": 0.79296875, "learning_rate": 3.422100037048854e-06, "loss": 4.0022, "step": 20783 }, { "epoch": 6.923461314233364, "grad_norm": 0.78125, "learning_rate": 3.4214207260450532e-06, "loss": 4.0094, "step": 20784 }, { "epoch": 6.923794453235613, "grad_norm": 0.765625, "learning_rate": 3.4207414625478694e-06, "loss": 4.0673, "step": 20785 }, { "epoch": 6.924127592237861, "grad_norm": 0.73828125, "learning_rate": 3.420062246565212e-06, "loss": 3.9533, "step": 20786 }, { "epoch": 6.92446073124011, "grad_norm": 0.83984375, "learning_rate": 3.4193830781049915e-06, "loss": 3.9352, "step": 20787 }, { "epoch": 6.924793870242358, "grad_norm": 0.76953125, "learning_rate": 3.418703957175117e-06, "loss": 3.991, "step": 20788 }, { "epoch": 6.925127009244608, "grad_norm": 0.80859375, "learning_rate": 3.4180248837835045e-06, "loss": 3.9887, "step": 20789 }, { "epoch": 6.925460148246856, "grad_norm": 0.765625, "learning_rate": 3.4173458579380607e-06, "loss": 4.0209, "step": 20790 }, { "epoch": 6.925793287249105, "grad_norm": 0.73828125, "learning_rate": 3.416666879646695e-06, "loss": 3.9766, "step": 20791 }, { "epoch": 6.926126426251353, "grad_norm": 0.82421875, "learning_rate": 3.415987948917314e-06, "loss": 3.9648, "step": 20792 }, { "epoch": 6.926459565253602, "grad_norm": 0.76171875, "learning_rate": 3.4153090657578293e-06, "loss": 3.9688, "step": 20793 }, { "epoch": 6.926792704255851, "grad_norm": 0.75390625, "learning_rate": 3.414630230176146e-06, "loss": 4.0057, "step": 20794 }, { "epoch": 6.927125843258099, "grad_norm": 0.78125, "learning_rate": 3.413951442180168e-06, "loss": 3.9611, "step": 20795 }, { "epoch": 6.927458982260348, "grad_norm": 0.78515625, "learning_rate": 3.413272701777809e-06, "loss": 3.9453, "step": 20796 }, { "epoch": 6.927792121262597, "grad_norm": 0.7265625, "learning_rate": 3.412594008976971e-06, "loss": 4.0526, "step": 20797 }, { "epoch": 6.928125260264846, "grad_norm": 0.76171875, "learning_rate": 3.411915363785559e-06, "loss": 4.037, "step": 20798 }, { "epoch": 6.928458399267094, "grad_norm": 0.74609375, "learning_rate": 3.4112367662114756e-06, "loss": 4.063, "step": 20799 }, { "epoch": 6.928791538269343, "grad_norm": 0.7734375, "learning_rate": 3.410558216262634e-06, "loss": 3.9925, "step": 20800 }, { "epoch": 6.9291246772715915, "grad_norm": 0.75390625, "learning_rate": 3.409879713946928e-06, "loss": 3.9033, "step": 20801 }, { "epoch": 6.929457816273841, "grad_norm": 0.7890625, "learning_rate": 3.4092012592722613e-06, "loss": 4.0543, "step": 20802 }, { "epoch": 6.929790955276089, "grad_norm": 0.7109375, "learning_rate": 3.408522852246542e-06, "loss": 4.0689, "step": 20803 }, { "epoch": 6.930124094278337, "grad_norm": 0.765625, "learning_rate": 3.4078444928776704e-06, "loss": 3.9763, "step": 20804 }, { "epoch": 6.9304572332805865, "grad_norm": 0.76953125, "learning_rate": 3.407166181173546e-06, "loss": 3.9762, "step": 20805 }, { "epoch": 6.930790372282835, "grad_norm": 0.7890625, "learning_rate": 3.406487917142067e-06, "loss": 3.9799, "step": 20806 }, { "epoch": 6.931123511285084, "grad_norm": 0.76171875, "learning_rate": 3.405809700791141e-06, "loss": 4.0391, "step": 20807 }, { "epoch": 6.931456650287332, "grad_norm": 0.78515625, "learning_rate": 3.4051315321286637e-06, "loss": 3.9525, "step": 20808 }, { "epoch": 6.9317897892895814, "grad_norm": 0.7421875, "learning_rate": 3.4044534111625345e-06, "loss": 3.9218, "step": 20809 }, { "epoch": 6.93212292829183, "grad_norm": 0.75, "learning_rate": 3.4037753379006524e-06, "loss": 3.9924, "step": 20810 }, { "epoch": 6.932456067294078, "grad_norm": 0.76953125, "learning_rate": 3.4030973123509157e-06, "loss": 4.0147, "step": 20811 }, { "epoch": 6.932789206296327, "grad_norm": 0.77734375, "learning_rate": 3.4024193345212203e-06, "loss": 3.9644, "step": 20812 }, { "epoch": 6.9331223452985755, "grad_norm": 0.78515625, "learning_rate": 3.401741404419462e-06, "loss": 4.0327, "step": 20813 }, { "epoch": 6.933455484300825, "grad_norm": 0.76953125, "learning_rate": 3.4010635220535426e-06, "loss": 3.9893, "step": 20814 }, { "epoch": 6.933788623303073, "grad_norm": 0.78125, "learning_rate": 3.4003856874313544e-06, "loss": 3.9505, "step": 20815 }, { "epoch": 6.934121762305322, "grad_norm": 0.76171875, "learning_rate": 3.399707900560794e-06, "loss": 4.0265, "step": 20816 }, { "epoch": 6.9344549013075705, "grad_norm": 0.765625, "learning_rate": 3.3990301614497516e-06, "loss": 3.9482, "step": 20817 }, { "epoch": 6.93478804030982, "grad_norm": 0.7578125, "learning_rate": 3.398352470106131e-06, "loss": 4.0334, "step": 20818 }, { "epoch": 6.935121179312068, "grad_norm": 0.76953125, "learning_rate": 3.3976748265378173e-06, "loss": 4.0206, "step": 20819 }, { "epoch": 6.935454318314317, "grad_norm": 0.75390625, "learning_rate": 3.3969972307527033e-06, "loss": 3.9203, "step": 20820 }, { "epoch": 6.935787457316565, "grad_norm": 0.79296875, "learning_rate": 3.3963196827586874e-06, "loss": 3.9595, "step": 20821 }, { "epoch": 6.936120596318814, "grad_norm": 0.765625, "learning_rate": 3.3956421825636586e-06, "loss": 4.0097, "step": 20822 }, { "epoch": 6.936453735321063, "grad_norm": 0.78515625, "learning_rate": 3.394964730175508e-06, "loss": 3.95, "step": 20823 }, { "epoch": 6.936786874323311, "grad_norm": 0.81640625, "learning_rate": 3.394287325602125e-06, "loss": 4.0029, "step": 20824 }, { "epoch": 6.93712001332556, "grad_norm": 0.75, "learning_rate": 3.3936099688514034e-06, "loss": 4.0288, "step": 20825 }, { "epoch": 6.937453152327809, "grad_norm": 0.73046875, "learning_rate": 3.3929326599312324e-06, "loss": 3.983, "step": 20826 }, { "epoch": 6.937786291330058, "grad_norm": 0.76171875, "learning_rate": 3.392255398849502e-06, "loss": 3.9949, "step": 20827 }, { "epoch": 6.938119430332306, "grad_norm": 0.7578125, "learning_rate": 3.3915781856140936e-06, "loss": 4.0168, "step": 20828 }, { "epoch": 6.9384525693345545, "grad_norm": 0.75390625, "learning_rate": 3.390901020232903e-06, "loss": 3.959, "step": 20829 }, { "epoch": 6.938785708336804, "grad_norm": 0.81640625, "learning_rate": 3.3902239027138153e-06, "loss": 3.9462, "step": 20830 }, { "epoch": 6.939118847339052, "grad_norm": 0.75, "learning_rate": 3.389546833064717e-06, "loss": 3.9605, "step": 20831 }, { "epoch": 6.939451986341301, "grad_norm": 0.78125, "learning_rate": 3.388869811293493e-06, "loss": 4.0061, "step": 20832 }, { "epoch": 6.939785125343549, "grad_norm": 0.75, "learning_rate": 3.3881928374080325e-06, "loss": 3.9324, "step": 20833 }, { "epoch": 6.940118264345799, "grad_norm": 0.78515625, "learning_rate": 3.3875159114162205e-06, "loss": 3.9709, "step": 20834 }, { "epoch": 6.940451403348047, "grad_norm": 0.796875, "learning_rate": 3.3868390333259403e-06, "loss": 3.9286, "step": 20835 }, { "epoch": 6.940784542350295, "grad_norm": 0.79296875, "learning_rate": 3.3861622031450757e-06, "loss": 3.9677, "step": 20836 }, { "epoch": 6.941117681352544, "grad_norm": 0.796875, "learning_rate": 3.385485420881511e-06, "loss": 4.0172, "step": 20837 }, { "epoch": 6.941450820354793, "grad_norm": 0.7421875, "learning_rate": 3.3848086865431295e-06, "loss": 3.9784, "step": 20838 }, { "epoch": 6.941783959357042, "grad_norm": 0.78125, "learning_rate": 3.3841320001378105e-06, "loss": 4.0423, "step": 20839 }, { "epoch": 6.94211709835929, "grad_norm": 0.79296875, "learning_rate": 3.3834553616734412e-06, "loss": 3.9688, "step": 20840 }, { "epoch": 6.942450237361539, "grad_norm": 0.77734375, "learning_rate": 3.3827787711579007e-06, "loss": 3.9381, "step": 20841 }, { "epoch": 6.942783376363788, "grad_norm": 0.8125, "learning_rate": 3.38210222859907e-06, "loss": 3.9778, "step": 20842 }, { "epoch": 6.943116515366037, "grad_norm": 0.7421875, "learning_rate": 3.3814257340048254e-06, "loss": 3.9032, "step": 20843 }, { "epoch": 6.943449654368285, "grad_norm": 0.7421875, "learning_rate": 3.3807492873830535e-06, "loss": 4.0109, "step": 20844 }, { "epoch": 6.943782793370534, "grad_norm": 0.73046875, "learning_rate": 3.380072888741633e-06, "loss": 4.035, "step": 20845 }, { "epoch": 6.944115932372783, "grad_norm": 0.796875, "learning_rate": 3.3793965380884338e-06, "loss": 3.9304, "step": 20846 }, { "epoch": 6.944449071375031, "grad_norm": 0.78125, "learning_rate": 3.378720235431342e-06, "loss": 4.0121, "step": 20847 }, { "epoch": 6.94478221037728, "grad_norm": 0.7421875, "learning_rate": 3.378043980778234e-06, "loss": 3.9581, "step": 20848 }, { "epoch": 6.945115349379528, "grad_norm": 0.78515625, "learning_rate": 3.377367774136985e-06, "loss": 3.988, "step": 20849 }, { "epoch": 6.9454484883817775, "grad_norm": 0.75, "learning_rate": 3.3766916155154682e-06, "loss": 3.9491, "step": 20850 }, { "epoch": 6.945781627384026, "grad_norm": 0.73046875, "learning_rate": 3.376015504921566e-06, "loss": 3.9688, "step": 20851 }, { "epoch": 6.946114766386275, "grad_norm": 0.76953125, "learning_rate": 3.3753394423631516e-06, "loss": 4.0006, "step": 20852 }, { "epoch": 6.946447905388523, "grad_norm": 0.828125, "learning_rate": 3.3746634278480986e-06, "loss": 3.9406, "step": 20853 }, { "epoch": 6.946781044390772, "grad_norm": 0.80078125, "learning_rate": 3.37398746138428e-06, "loss": 3.9585, "step": 20854 }, { "epoch": 6.947114183393021, "grad_norm": 0.80859375, "learning_rate": 3.3733115429795717e-06, "loss": 4.0004, "step": 20855 }, { "epoch": 6.947447322395269, "grad_norm": 0.80859375, "learning_rate": 3.3726356726418455e-06, "loss": 3.99, "step": 20856 }, { "epoch": 6.947780461397518, "grad_norm": 0.75, "learning_rate": 3.3719598503789705e-06, "loss": 3.9927, "step": 20857 }, { "epoch": 6.9481136003997666, "grad_norm": 0.77734375, "learning_rate": 3.3712840761988245e-06, "loss": 3.9896, "step": 20858 }, { "epoch": 6.948446739402016, "grad_norm": 0.73046875, "learning_rate": 3.3706083501092765e-06, "loss": 3.9814, "step": 20859 }, { "epoch": 6.948779878404264, "grad_norm": 0.796875, "learning_rate": 3.369932672118197e-06, "loss": 4.0405, "step": 20860 }, { "epoch": 6.949113017406513, "grad_norm": 0.734375, "learning_rate": 3.3692570422334536e-06, "loss": 3.9878, "step": 20861 }, { "epoch": 6.9494461564087615, "grad_norm": 0.74609375, "learning_rate": 3.3685814604629216e-06, "loss": 3.9746, "step": 20862 }, { "epoch": 6.949779295411011, "grad_norm": 0.74609375, "learning_rate": 3.3679059268144693e-06, "loss": 3.9291, "step": 20863 }, { "epoch": 6.950112434413259, "grad_norm": 0.78515625, "learning_rate": 3.3672304412959566e-06, "loss": 3.977, "step": 20864 }, { "epoch": 6.950445573415507, "grad_norm": 0.7734375, "learning_rate": 3.3665550039152615e-06, "loss": 4.0158, "step": 20865 }, { "epoch": 6.9507787124177565, "grad_norm": 0.796875, "learning_rate": 3.365879614680247e-06, "loss": 3.8912, "step": 20866 }, { "epoch": 6.951111851420005, "grad_norm": 0.75390625, "learning_rate": 3.3652042735987813e-06, "loss": 3.9975, "step": 20867 }, { "epoch": 6.951444990422254, "grad_norm": 0.7578125, "learning_rate": 3.364528980678726e-06, "loss": 4.0497, "step": 20868 }, { "epoch": 6.951778129424502, "grad_norm": 0.79296875, "learning_rate": 3.3638537359279543e-06, "loss": 4.0655, "step": 20869 }, { "epoch": 6.952111268426751, "grad_norm": 0.7734375, "learning_rate": 3.363178539354327e-06, "loss": 4.0439, "step": 20870 }, { "epoch": 6.952444407429, "grad_norm": 0.765625, "learning_rate": 3.36250339096571e-06, "loss": 4.021, "step": 20871 }, { "epoch": 6.952777546431248, "grad_norm": 0.8046875, "learning_rate": 3.3618282907699666e-06, "loss": 3.9667, "step": 20872 }, { "epoch": 6.953110685433497, "grad_norm": 0.765625, "learning_rate": 3.36115323877496e-06, "loss": 3.9787, "step": 20873 }, { "epoch": 6.9534438244357455, "grad_norm": 0.734375, "learning_rate": 3.3604782349885534e-06, "loss": 4.0656, "step": 20874 }, { "epoch": 6.953776963437995, "grad_norm": 0.765625, "learning_rate": 3.3598032794186093e-06, "loss": 4.0227, "step": 20875 }, { "epoch": 6.954110102440243, "grad_norm": 0.78125, "learning_rate": 3.3591283720729865e-06, "loss": 3.9635, "step": 20876 }, { "epoch": 6.954443241442492, "grad_norm": 0.75390625, "learning_rate": 3.358453512959552e-06, "loss": 4.0258, "step": 20877 }, { "epoch": 6.9547763804447404, "grad_norm": 0.80078125, "learning_rate": 3.3577787020861635e-06, "loss": 3.9879, "step": 20878 }, { "epoch": 6.95510951944699, "grad_norm": 0.73828125, "learning_rate": 3.3571039394606817e-06, "loss": 3.9986, "step": 20879 }, { "epoch": 6.955442658449238, "grad_norm": 0.828125, "learning_rate": 3.356429225090962e-06, "loss": 3.9655, "step": 20880 }, { "epoch": 6.955775797451487, "grad_norm": 0.79296875, "learning_rate": 3.355754558984872e-06, "loss": 3.9964, "step": 20881 }, { "epoch": 6.956108936453735, "grad_norm": 0.7890625, "learning_rate": 3.355079941150263e-06, "loss": 4.0093, "step": 20882 }, { "epoch": 6.956442075455984, "grad_norm": 0.7421875, "learning_rate": 3.3544053715949916e-06, "loss": 3.9427, "step": 20883 }, { "epoch": 6.956775214458233, "grad_norm": 0.78125, "learning_rate": 3.3537308503269213e-06, "loss": 3.9755, "step": 20884 }, { "epoch": 6.957108353460481, "grad_norm": 0.76953125, "learning_rate": 3.353056377353906e-06, "loss": 3.9464, "step": 20885 }, { "epoch": 6.95744149246273, "grad_norm": 0.765625, "learning_rate": 3.352381952683801e-06, "loss": 3.9639, "step": 20886 }, { "epoch": 6.957774631464979, "grad_norm": 0.7421875, "learning_rate": 3.3517075763244598e-06, "loss": 4.0295, "step": 20887 }, { "epoch": 6.958107770467228, "grad_norm": 0.7734375, "learning_rate": 3.3510332482837434e-06, "loss": 3.9917, "step": 20888 }, { "epoch": 6.958440909469476, "grad_norm": 0.78515625, "learning_rate": 3.3503589685695026e-06, "loss": 3.9678, "step": 20889 }, { "epoch": 6.958774048471724, "grad_norm": 0.7578125, "learning_rate": 3.3496847371895917e-06, "loss": 3.9722, "step": 20890 }, { "epoch": 6.959107187473974, "grad_norm": 0.7578125, "learning_rate": 3.349010554151864e-06, "loss": 4.0452, "step": 20891 }, { "epoch": 6.959440326476222, "grad_norm": 0.765625, "learning_rate": 3.3483364194641735e-06, "loss": 4.0025, "step": 20892 }, { "epoch": 6.959773465478471, "grad_norm": 0.76171875, "learning_rate": 3.34766233313437e-06, "loss": 4.0056, "step": 20893 }, { "epoch": 6.960106604480719, "grad_norm": 0.765625, "learning_rate": 3.346988295170304e-06, "loss": 3.9501, "step": 20894 }, { "epoch": 6.960439743482969, "grad_norm": 0.75, "learning_rate": 3.3463143055798315e-06, "loss": 3.9453, "step": 20895 }, { "epoch": 6.960772882485217, "grad_norm": 0.73046875, "learning_rate": 3.3456403643708e-06, "loss": 4.0314, "step": 20896 }, { "epoch": 6.961106021487465, "grad_norm": 0.7734375, "learning_rate": 3.3449664715510608e-06, "loss": 4.023, "step": 20897 }, { "epoch": 6.961439160489714, "grad_norm": 0.7734375, "learning_rate": 3.3442926271284593e-06, "loss": 4.0058, "step": 20898 }, { "epoch": 6.961772299491963, "grad_norm": 0.75390625, "learning_rate": 3.3436188311108528e-06, "loss": 4.0321, "step": 20899 }, { "epoch": 6.962105438494212, "grad_norm": 0.7578125, "learning_rate": 3.342945083506082e-06, "loss": 3.9756, "step": 20900 }, { "epoch": 6.96243857749646, "grad_norm": 0.8046875, "learning_rate": 3.342271384321994e-06, "loss": 4.0559, "step": 20901 }, { "epoch": 6.962771716498709, "grad_norm": 0.76171875, "learning_rate": 3.3415977335664406e-06, "loss": 4.0395, "step": 20902 }, { "epoch": 6.963104855500958, "grad_norm": 0.75390625, "learning_rate": 3.3409241312472676e-06, "loss": 3.9263, "step": 20903 }, { "epoch": 6.963437994503207, "grad_norm": 0.81640625, "learning_rate": 3.3402505773723195e-06, "loss": 3.9561, "step": 20904 }, { "epoch": 6.963771133505455, "grad_norm": 0.78125, "learning_rate": 3.3395770719494393e-06, "loss": 3.9526, "step": 20905 }, { "epoch": 6.964104272507704, "grad_norm": 0.75, "learning_rate": 3.3389036149864775e-06, "loss": 4.0805, "step": 20906 }, { "epoch": 6.9644374115099525, "grad_norm": 0.7734375, "learning_rate": 3.338230206491276e-06, "loss": 3.9902, "step": 20907 }, { "epoch": 6.964770550512201, "grad_norm": 0.828125, "learning_rate": 3.337556846471678e-06, "loss": 3.9952, "step": 20908 }, { "epoch": 6.96510368951445, "grad_norm": 0.73828125, "learning_rate": 3.3368835349355263e-06, "loss": 4.0421, "step": 20909 }, { "epoch": 6.965436828516698, "grad_norm": 0.7265625, "learning_rate": 3.336210271890665e-06, "loss": 3.9618, "step": 20910 }, { "epoch": 6.9657699675189475, "grad_norm": 0.76953125, "learning_rate": 3.3355370573449353e-06, "loss": 3.9486, "step": 20911 }, { "epoch": 6.966103106521196, "grad_norm": 0.8046875, "learning_rate": 3.3348638913061753e-06, "loss": 4.0264, "step": 20912 }, { "epoch": 6.966436245523445, "grad_norm": 0.77734375, "learning_rate": 3.3341907737822325e-06, "loss": 3.9372, "step": 20913 }, { "epoch": 6.966769384525693, "grad_norm": 0.72265625, "learning_rate": 3.333517704780944e-06, "loss": 4.0561, "step": 20914 }, { "epoch": 6.967102523527942, "grad_norm": 0.75, "learning_rate": 3.332844684310149e-06, "loss": 4.0116, "step": 20915 }, { "epoch": 6.967435662530191, "grad_norm": 0.80078125, "learning_rate": 3.3321717123776856e-06, "loss": 3.9075, "step": 20916 }, { "epoch": 6.967768801532439, "grad_norm": 0.7890625, "learning_rate": 3.3314987889913996e-06, "loss": 3.9804, "step": 20917 }, { "epoch": 6.968101940534688, "grad_norm": 0.76171875, "learning_rate": 3.330825914159121e-06, "loss": 3.9254, "step": 20918 }, { "epoch": 6.9684350795369365, "grad_norm": 0.76953125, "learning_rate": 3.3301530878886897e-06, "loss": 3.9889, "step": 20919 }, { "epoch": 6.968768218539186, "grad_norm": 0.765625, "learning_rate": 3.3294803101879405e-06, "loss": 3.9802, "step": 20920 }, { "epoch": 6.969101357541434, "grad_norm": 0.79296875, "learning_rate": 3.3288075810647147e-06, "loss": 4.0436, "step": 20921 }, { "epoch": 6.969434496543683, "grad_norm": 0.734375, "learning_rate": 3.3281349005268468e-06, "loss": 4.0054, "step": 20922 }, { "epoch": 6.9697676355459315, "grad_norm": 0.7421875, "learning_rate": 3.32746226858217e-06, "loss": 4.0385, "step": 20923 }, { "epoch": 6.970100774548181, "grad_norm": 0.734375, "learning_rate": 3.326789685238517e-06, "loss": 4.0257, "step": 20924 }, { "epoch": 6.970433913550429, "grad_norm": 0.78515625, "learning_rate": 3.3261171505037288e-06, "loss": 3.987, "step": 20925 }, { "epoch": 6.970767052552677, "grad_norm": 0.76171875, "learning_rate": 3.3254446643856378e-06, "loss": 3.9868, "step": 20926 }, { "epoch": 6.971100191554926, "grad_norm": 0.77734375, "learning_rate": 3.324772226892069e-06, "loss": 3.9625, "step": 20927 }, { "epoch": 6.971433330557175, "grad_norm": 0.72265625, "learning_rate": 3.324099838030862e-06, "loss": 4.0103, "step": 20928 }, { "epoch": 6.971766469559424, "grad_norm": 0.7890625, "learning_rate": 3.323427497809848e-06, "loss": 4.0749, "step": 20929 }, { "epoch": 6.972099608561672, "grad_norm": 0.8359375, "learning_rate": 3.322755206236857e-06, "loss": 3.9751, "step": 20930 }, { "epoch": 6.972432747563921, "grad_norm": 0.8203125, "learning_rate": 3.3220829633197165e-06, "loss": 3.9033, "step": 20931 }, { "epoch": 6.97276588656617, "grad_norm": 0.73046875, "learning_rate": 3.321410769066264e-06, "loss": 4.0004, "step": 20932 }, { "epoch": 6.973099025568418, "grad_norm": 0.73828125, "learning_rate": 3.3207386234843244e-06, "loss": 3.9744, "step": 20933 }, { "epoch": 6.973432164570667, "grad_norm": 0.79296875, "learning_rate": 3.3200665265817275e-06, "loss": 3.9959, "step": 20934 }, { "epoch": 6.9737653035729155, "grad_norm": 0.77734375, "learning_rate": 3.3193944783663024e-06, "loss": 3.991, "step": 20935 }, { "epoch": 6.974098442575165, "grad_norm": 0.76953125, "learning_rate": 3.318722478845876e-06, "loss": 4.011, "step": 20936 }, { "epoch": 6.974431581577413, "grad_norm": 0.828125, "learning_rate": 3.318050528028276e-06, "loss": 3.9951, "step": 20937 }, { "epoch": 6.974764720579662, "grad_norm": 0.76171875, "learning_rate": 3.3173786259213267e-06, "loss": 4.1008, "step": 20938 }, { "epoch": 6.97509785958191, "grad_norm": 0.7421875, "learning_rate": 3.3167067725328595e-06, "loss": 4.0207, "step": 20939 }, { "epoch": 6.97543099858416, "grad_norm": 0.77734375, "learning_rate": 3.3160349678706978e-06, "loss": 4.053, "step": 20940 }, { "epoch": 6.975764137586408, "grad_norm": 0.78125, "learning_rate": 3.315363211942666e-06, "loss": 4.0338, "step": 20941 }, { "epoch": 6.976097276588657, "grad_norm": 0.73828125, "learning_rate": 3.3146915047565867e-06, "loss": 4.0376, "step": 20942 }, { "epoch": 6.976430415590905, "grad_norm": 0.765625, "learning_rate": 3.314019846320288e-06, "loss": 4.0006, "step": 20943 }, { "epoch": 6.976763554593154, "grad_norm": 0.7578125, "learning_rate": 3.3133482366415946e-06, "loss": 4.0078, "step": 20944 }, { "epoch": 6.977096693595403, "grad_norm": 0.76953125, "learning_rate": 3.31267667572832e-06, "loss": 3.9926, "step": 20945 }, { "epoch": 6.977429832597651, "grad_norm": 0.75, "learning_rate": 3.3120051635882953e-06, "loss": 3.9905, "step": 20946 }, { "epoch": 6.9777629715999, "grad_norm": 0.7265625, "learning_rate": 3.311333700229339e-06, "loss": 4.0252, "step": 20947 }, { "epoch": 6.978096110602149, "grad_norm": 0.75390625, "learning_rate": 3.3106622856592726e-06, "loss": 3.9761, "step": 20948 }, { "epoch": 6.978429249604398, "grad_norm": 0.76171875, "learning_rate": 3.3099909198859137e-06, "loss": 3.963, "step": 20949 }, { "epoch": 6.978762388606646, "grad_norm": 0.7421875, "learning_rate": 3.3093196029170876e-06, "loss": 4.0334, "step": 20950 }, { "epoch": 6.979095527608894, "grad_norm": 0.79296875, "learning_rate": 3.3086483347606116e-06, "loss": 4.0514, "step": 20951 }, { "epoch": 6.979428666611144, "grad_norm": 0.74609375, "learning_rate": 3.3079771154243042e-06, "loss": 4.0097, "step": 20952 }, { "epoch": 6.979761805613392, "grad_norm": 0.77734375, "learning_rate": 3.3073059449159827e-06, "loss": 3.953, "step": 20953 }, { "epoch": 6.980094944615641, "grad_norm": 0.73828125, "learning_rate": 3.306634823243467e-06, "loss": 4.0018, "step": 20954 }, { "epoch": 6.980428083617889, "grad_norm": 0.80859375, "learning_rate": 3.3059637504145716e-06, "loss": 4.0821, "step": 20955 }, { "epoch": 6.9807612226201385, "grad_norm": 0.75390625, "learning_rate": 3.305292726437112e-06, "loss": 3.9768, "step": 20956 }, { "epoch": 6.981094361622387, "grad_norm": 0.81640625, "learning_rate": 3.3046217513189082e-06, "loss": 3.9489, "step": 20957 }, { "epoch": 6.981427500624636, "grad_norm": 0.73046875, "learning_rate": 3.3039508250677748e-06, "loss": 3.9279, "step": 20958 }, { "epoch": 6.981760639626884, "grad_norm": 0.77734375, "learning_rate": 3.3032799476915253e-06, "loss": 3.9891, "step": 20959 }, { "epoch": 6.9820937786291335, "grad_norm": 0.7734375, "learning_rate": 3.302609119197972e-06, "loss": 4.0272, "step": 20960 }, { "epoch": 6.982426917631382, "grad_norm": 0.796875, "learning_rate": 3.301938339594936e-06, "loss": 3.9737, "step": 20961 }, { "epoch": 6.98276005663363, "grad_norm": 0.79296875, "learning_rate": 3.301267608890222e-06, "loss": 3.9256, "step": 20962 }, { "epoch": 6.983093195635879, "grad_norm": 0.796875, "learning_rate": 3.3005969270916475e-06, "loss": 3.9564, "step": 20963 }, { "epoch": 6.983426334638128, "grad_norm": 0.78125, "learning_rate": 3.299926294207019e-06, "loss": 3.9793, "step": 20964 }, { "epoch": 6.983759473640377, "grad_norm": 0.75390625, "learning_rate": 3.2992557102441547e-06, "loss": 3.947, "step": 20965 }, { "epoch": 6.984092612642625, "grad_norm": 0.796875, "learning_rate": 3.2985851752108627e-06, "loss": 4.0548, "step": 20966 }, { "epoch": 6.984425751644874, "grad_norm": 0.74609375, "learning_rate": 3.297914689114954e-06, "loss": 3.9537, "step": 20967 }, { "epoch": 6.9847588906471225, "grad_norm": 0.76953125, "learning_rate": 3.2972442519642336e-06, "loss": 3.9791, "step": 20968 }, { "epoch": 6.985092029649371, "grad_norm": 0.77734375, "learning_rate": 3.2965738637665175e-06, "loss": 4.025, "step": 20969 }, { "epoch": 6.98542516865162, "grad_norm": 0.76171875, "learning_rate": 3.2959035245296134e-06, "loss": 4.008, "step": 20970 }, { "epoch": 6.985758307653868, "grad_norm": 0.76953125, "learning_rate": 3.2952332342613227e-06, "loss": 3.9606, "step": 20971 }, { "epoch": 6.9860914466561175, "grad_norm": 0.82421875, "learning_rate": 3.2945629929694603e-06, "loss": 4.0254, "step": 20972 }, { "epoch": 6.986424585658366, "grad_norm": 0.78125, "learning_rate": 3.2938928006618293e-06, "loss": 3.9108, "step": 20973 }, { "epoch": 6.986757724660615, "grad_norm": 0.7421875, "learning_rate": 3.2932226573462366e-06, "loss": 4.0413, "step": 20974 }, { "epoch": 6.987090863662863, "grad_norm": 0.84375, "learning_rate": 3.292552563030486e-06, "loss": 4.0275, "step": 20975 }, { "epoch": 6.9874240026651115, "grad_norm": 0.78515625, "learning_rate": 3.2918825177223867e-06, "loss": 4.0243, "step": 20976 }, { "epoch": 6.987757141667361, "grad_norm": 0.80859375, "learning_rate": 3.2912125214297425e-06, "loss": 3.9537, "step": 20977 }, { "epoch": 6.988090280669609, "grad_norm": 0.7578125, "learning_rate": 3.2905425741603554e-06, "loss": 4.0335, "step": 20978 }, { "epoch": 6.988423419671858, "grad_norm": 0.8203125, "learning_rate": 3.28987267592203e-06, "loss": 4.0515, "step": 20979 }, { "epoch": 6.9887565586741065, "grad_norm": 0.75390625, "learning_rate": 3.2892028267225683e-06, "loss": 3.9535, "step": 20980 }, { "epoch": 6.989089697676356, "grad_norm": 0.74609375, "learning_rate": 3.2885330265697733e-06, "loss": 4.0265, "step": 20981 }, { "epoch": 6.989422836678604, "grad_norm": 0.78515625, "learning_rate": 3.287863275471444e-06, "loss": 3.9767, "step": 20982 }, { "epoch": 6.989755975680853, "grad_norm": 0.74609375, "learning_rate": 3.2871935734353863e-06, "loss": 3.9001, "step": 20983 }, { "epoch": 6.9900891146831015, "grad_norm": 0.77734375, "learning_rate": 3.2865239204693987e-06, "loss": 4.0067, "step": 20984 }, { "epoch": 6.990422253685351, "grad_norm": 0.75390625, "learning_rate": 3.285854316581281e-06, "loss": 3.9672, "step": 20985 }, { "epoch": 6.990755392687599, "grad_norm": 0.76171875, "learning_rate": 3.2851847617788303e-06, "loss": 4.0296, "step": 20986 }, { "epoch": 6.991088531689847, "grad_norm": 0.765625, "learning_rate": 3.2845152560698505e-06, "loss": 3.9968, "step": 20987 }, { "epoch": 6.991421670692096, "grad_norm": 0.8203125, "learning_rate": 3.2838457994621404e-06, "loss": 3.9212, "step": 20988 }, { "epoch": 6.991754809694345, "grad_norm": 0.7734375, "learning_rate": 3.283176391963488e-06, "loss": 3.9838, "step": 20989 }, { "epoch": 6.992087948696594, "grad_norm": 0.765625, "learning_rate": 3.2825070335817e-06, "loss": 3.961, "step": 20990 }, { "epoch": 6.992421087698842, "grad_norm": 0.76171875, "learning_rate": 3.2818377243245713e-06, "loss": 4.024, "step": 20991 }, { "epoch": 6.992754226701091, "grad_norm": 0.75390625, "learning_rate": 3.281168464199895e-06, "loss": 3.9508, "step": 20992 }, { "epoch": 6.99308736570334, "grad_norm": 0.75, "learning_rate": 3.280499253215467e-06, "loss": 4.0038, "step": 20993 }, { "epoch": 6.993420504705588, "grad_norm": 0.75, "learning_rate": 3.279830091379085e-06, "loss": 4.0574, "step": 20994 }, { "epoch": 6.993753643707837, "grad_norm": 0.75, "learning_rate": 3.279160978698541e-06, "loss": 3.9743, "step": 20995 }, { "epoch": 6.994086782710085, "grad_norm": 0.7578125, "learning_rate": 3.2784919151816297e-06, "loss": 4.0135, "step": 20996 }, { "epoch": 6.994419921712335, "grad_norm": 0.7421875, "learning_rate": 3.277822900836144e-06, "loss": 3.9445, "step": 20997 }, { "epoch": 6.994753060714583, "grad_norm": 0.7734375, "learning_rate": 3.2771539356698765e-06, "loss": 4.0359, "step": 20998 }, { "epoch": 6.995086199716832, "grad_norm": 0.765625, "learning_rate": 3.2764850196906184e-06, "loss": 3.9485, "step": 20999 }, { "epoch": 6.99541933871908, "grad_norm": 0.796875, "learning_rate": 3.2758161529061594e-06, "loss": 4.0396, "step": 21000 }, { "epoch": 6.99575247772133, "grad_norm": 0.79296875, "learning_rate": 3.275147335324296e-06, "loss": 4.0114, "step": 21001 }, { "epoch": 6.996085616723578, "grad_norm": 0.76171875, "learning_rate": 3.274478566952814e-06, "loss": 3.9381, "step": 21002 }, { "epoch": 6.996418755725827, "grad_norm": 0.7578125, "learning_rate": 3.273809847799505e-06, "loss": 3.9205, "step": 21003 }, { "epoch": 6.996751894728075, "grad_norm": 0.7265625, "learning_rate": 3.2731411778721536e-06, "loss": 3.9504, "step": 21004 }, { "epoch": 6.997085033730324, "grad_norm": 0.828125, "learning_rate": 3.2724725571785557e-06, "loss": 3.8698, "step": 21005 }, { "epoch": 6.997418172732573, "grad_norm": 0.74609375, "learning_rate": 3.2718039857264986e-06, "loss": 4.0476, "step": 21006 }, { "epoch": 6.997751311734821, "grad_norm": 0.76953125, "learning_rate": 3.2711354635237636e-06, "loss": 3.9529, "step": 21007 }, { "epoch": 6.99808445073707, "grad_norm": 0.8046875, "learning_rate": 3.2704669905781387e-06, "loss": 4.0335, "step": 21008 }, { "epoch": 6.998417589739319, "grad_norm": 0.78125, "learning_rate": 3.269798566897414e-06, "loss": 3.993, "step": 21009 }, { "epoch": 6.998750728741568, "grad_norm": 0.8046875, "learning_rate": 3.269130192489374e-06, "loss": 3.9437, "step": 21010 }, { "epoch": 6.999083867743816, "grad_norm": 0.74609375, "learning_rate": 3.268461867361803e-06, "loss": 3.9963, "step": 21011 }, { "epoch": 6.999417006746064, "grad_norm": 0.7734375, "learning_rate": 3.2677935915224834e-06, "loss": 4.0656, "step": 21012 }, { "epoch": 6.999750145748314, "grad_norm": 0.8125, "learning_rate": 3.2671253649792037e-06, "loss": 3.9565, "step": 21013 }, { "epoch": 7.0, "grad_norm": 0.84375, "learning_rate": 3.2664571877397446e-06, "loss": 3.9846, "step": 21014 }, { "epoch": 7.000333139002248, "grad_norm": 0.74609375, "learning_rate": 3.26578905981189e-06, "loss": 3.9551, "step": 21015 }, { "epoch": 7.0006662780044975, "grad_norm": 0.76953125, "learning_rate": 3.2651209812034222e-06, "loss": 3.9874, "step": 21016 }, { "epoch": 7.000999417006746, "grad_norm": 0.7734375, "learning_rate": 3.2644529519221215e-06, "loss": 4.0324, "step": 21017 }, { "epoch": 7.001332556008995, "grad_norm": 0.77734375, "learning_rate": 3.26378497197577e-06, "loss": 3.9545, "step": 21018 }, { "epoch": 7.001665695011243, "grad_norm": 0.7265625, "learning_rate": 3.2631170413721452e-06, "loss": 4.0074, "step": 21019 }, { "epoch": 7.001998834013492, "grad_norm": 0.7421875, "learning_rate": 3.262449160119033e-06, "loss": 4.0167, "step": 21020 }, { "epoch": 7.002331973015741, "grad_norm": 0.75390625, "learning_rate": 3.2617813282242086e-06, "loss": 4.0002, "step": 21021 }, { "epoch": 7.00266511201799, "grad_norm": 0.8046875, "learning_rate": 3.2611135456954525e-06, "loss": 3.9632, "step": 21022 }, { "epoch": 7.002998251020238, "grad_norm": 0.8125, "learning_rate": 3.2604458125405386e-06, "loss": 3.9908, "step": 21023 }, { "epoch": 7.0033313900224865, "grad_norm": 0.77734375, "learning_rate": 3.2597781287672547e-06, "loss": 4.0202, "step": 21024 }, { "epoch": 7.003664529024736, "grad_norm": 0.76953125, "learning_rate": 3.259110494383368e-06, "loss": 4.0125, "step": 21025 }, { "epoch": 7.003997668026984, "grad_norm": 0.84765625, "learning_rate": 3.2584429093966555e-06, "loss": 4.0466, "step": 21026 }, { "epoch": 7.004330807029233, "grad_norm": 0.77734375, "learning_rate": 3.2577753738148985e-06, "loss": 3.9523, "step": 21027 }, { "epoch": 7.0046639460314815, "grad_norm": 0.77734375, "learning_rate": 3.25710788764587e-06, "loss": 3.9526, "step": 21028 }, { "epoch": 7.004997085033731, "grad_norm": 0.73046875, "learning_rate": 3.256440450897345e-06, "loss": 4.0062, "step": 21029 }, { "epoch": 7.005330224035979, "grad_norm": 0.77734375, "learning_rate": 3.2557730635770933e-06, "loss": 3.9637, "step": 21030 }, { "epoch": 7.005663363038228, "grad_norm": 0.75, "learning_rate": 3.255105725692896e-06, "loss": 4.0028, "step": 21031 }, { "epoch": 7.005996502040476, "grad_norm": 0.7890625, "learning_rate": 3.2544384372525222e-06, "loss": 3.9363, "step": 21032 }, { "epoch": 7.006329641042725, "grad_norm": 0.76953125, "learning_rate": 3.253771198263745e-06, "loss": 3.9576, "step": 21033 }, { "epoch": 7.006662780044974, "grad_norm": 0.72265625, "learning_rate": 3.253104008734336e-06, "loss": 4.0516, "step": 21034 }, { "epoch": 7.006995919047222, "grad_norm": 0.77734375, "learning_rate": 3.2524368686720663e-06, "loss": 3.9407, "step": 21035 }, { "epoch": 7.007329058049471, "grad_norm": 0.75390625, "learning_rate": 3.2517697780847073e-06, "loss": 4.0732, "step": 21036 }, { "epoch": 7.00766219705172, "grad_norm": 0.7578125, "learning_rate": 3.251102736980026e-06, "loss": 4.0005, "step": 21037 }, { "epoch": 7.007995336053969, "grad_norm": 0.765625, "learning_rate": 3.250435745365797e-06, "loss": 3.9766, "step": 21038 }, { "epoch": 7.008328475056217, "grad_norm": 0.78515625, "learning_rate": 3.2497688032497873e-06, "loss": 4.0039, "step": 21039 }, { "epoch": 7.008661614058466, "grad_norm": 0.77734375, "learning_rate": 3.2491019106397647e-06, "loss": 3.9723, "step": 21040 }, { "epoch": 7.008994753060715, "grad_norm": 0.79296875, "learning_rate": 3.2484350675434952e-06, "loss": 4.0054, "step": 21041 }, { "epoch": 7.009327892062963, "grad_norm": 0.75390625, "learning_rate": 3.2477682739687538e-06, "loss": 4.0051, "step": 21042 }, { "epoch": 7.009661031065212, "grad_norm": 0.765625, "learning_rate": 3.2471015299232983e-06, "loss": 3.9522, "step": 21043 }, { "epoch": 7.00999417006746, "grad_norm": 0.75, "learning_rate": 3.246434835414896e-06, "loss": 4.0207, "step": 21044 }, { "epoch": 7.01032730906971, "grad_norm": 0.7421875, "learning_rate": 3.245768190451317e-06, "loss": 3.9975, "step": 21045 }, { "epoch": 7.010660448071958, "grad_norm": 0.82421875, "learning_rate": 3.2451015950403247e-06, "loss": 3.9483, "step": 21046 }, { "epoch": 7.010993587074207, "grad_norm": 0.7890625, "learning_rate": 3.2444350491896814e-06, "loss": 4.0082, "step": 21047 }, { "epoch": 7.011326726076455, "grad_norm": 0.7578125, "learning_rate": 3.2437685529071507e-06, "loss": 3.9615, "step": 21048 }, { "epoch": 7.011659865078704, "grad_norm": 0.72265625, "learning_rate": 3.2431021062004996e-06, "loss": 4.0036, "step": 21049 }, { "epoch": 7.011993004080953, "grad_norm": 0.77734375, "learning_rate": 3.2424357090774886e-06, "loss": 4.0182, "step": 21050 }, { "epoch": 7.012326143083201, "grad_norm": 0.7734375, "learning_rate": 3.241769361545883e-06, "loss": 4.0034, "step": 21051 }, { "epoch": 7.01265928208545, "grad_norm": 0.78515625, "learning_rate": 3.2411030636134348e-06, "loss": 3.9867, "step": 21052 }, { "epoch": 7.012992421087699, "grad_norm": 0.75390625, "learning_rate": 3.240436815287914e-06, "loss": 3.9112, "step": 21053 }, { "epoch": 7.013325560089948, "grad_norm": 0.75, "learning_rate": 3.2397706165770785e-06, "loss": 4.0373, "step": 21054 }, { "epoch": 7.013658699092196, "grad_norm": 0.75390625, "learning_rate": 3.239104467488687e-06, "loss": 3.9523, "step": 21055 }, { "epoch": 7.013991838094445, "grad_norm": 0.79296875, "learning_rate": 3.238438368030498e-06, "loss": 4.0076, "step": 21056 }, { "epoch": 7.014324977096694, "grad_norm": 0.82421875, "learning_rate": 3.2377723182102724e-06, "loss": 3.9376, "step": 21057 }, { "epoch": 7.014658116098942, "grad_norm": 0.8203125, "learning_rate": 3.2371063180357687e-06, "loss": 4.0076, "step": 21058 }, { "epoch": 7.014991255101191, "grad_norm": 0.75390625, "learning_rate": 3.236440367514742e-06, "loss": 4.0111, "step": 21059 }, { "epoch": 7.015324394103439, "grad_norm": 0.76171875, "learning_rate": 3.235774466654951e-06, "loss": 3.9693, "step": 21060 }, { "epoch": 7.0156575331056885, "grad_norm": 0.78125, "learning_rate": 3.2351086154641505e-06, "loss": 3.957, "step": 21061 }, { "epoch": 7.015990672107937, "grad_norm": 0.7890625, "learning_rate": 3.2344428139500975e-06, "loss": 3.9483, "step": 21062 }, { "epoch": 7.016323811110186, "grad_norm": 0.8203125, "learning_rate": 3.2337770621205432e-06, "loss": 3.961, "step": 21063 }, { "epoch": 7.016656950112434, "grad_norm": 0.76953125, "learning_rate": 3.2331113599832484e-06, "loss": 3.9547, "step": 21064 }, { "epoch": 7.0169900891146835, "grad_norm": 0.76953125, "learning_rate": 3.2324457075459645e-06, "loss": 4.0, "step": 21065 }, { "epoch": 7.017323228116932, "grad_norm": 0.7890625, "learning_rate": 3.2317801048164444e-06, "loss": 3.9855, "step": 21066 }, { "epoch": 7.01765636711918, "grad_norm": 0.81640625, "learning_rate": 3.2311145518024386e-06, "loss": 3.9048, "step": 21067 }, { "epoch": 7.017989506121429, "grad_norm": 0.765625, "learning_rate": 3.2304490485117045e-06, "loss": 3.9217, "step": 21068 }, { "epoch": 7.0183226451236775, "grad_norm": 0.7421875, "learning_rate": 3.2297835949519943e-06, "loss": 3.9625, "step": 21069 }, { "epoch": 7.018655784125927, "grad_norm": 0.79296875, "learning_rate": 3.2291181911310497e-06, "loss": 3.9753, "step": 21070 }, { "epoch": 7.018988923128175, "grad_norm": 0.76171875, "learning_rate": 3.22845283705663e-06, "loss": 3.9801, "step": 21071 }, { "epoch": 7.019322062130424, "grad_norm": 0.76953125, "learning_rate": 3.227787532736484e-06, "loss": 3.8877, "step": 21072 }, { "epoch": 7.0196552011326725, "grad_norm": 0.77734375, "learning_rate": 3.227122278178359e-06, "loss": 3.9624, "step": 21073 }, { "epoch": 7.019988340134922, "grad_norm": 0.7578125, "learning_rate": 3.226457073390001e-06, "loss": 4.0385, "step": 21074 }, { "epoch": 7.02032147913717, "grad_norm": 0.80078125, "learning_rate": 3.2257919183791648e-06, "loss": 3.9958, "step": 21075 }, { "epoch": 7.020654618139418, "grad_norm": 0.72265625, "learning_rate": 3.225126813153594e-06, "loss": 3.9811, "step": 21076 }, { "epoch": 7.0209877571416675, "grad_norm": 0.7578125, "learning_rate": 3.224461757721038e-06, "loss": 3.9952, "step": 21077 }, { "epoch": 7.021320896143916, "grad_norm": 0.7421875, "learning_rate": 3.2237967520892405e-06, "loss": 3.9666, "step": 21078 }, { "epoch": 7.021654035146165, "grad_norm": 0.7578125, "learning_rate": 3.22313179626595e-06, "loss": 4.0128, "step": 21079 }, { "epoch": 7.021987174148413, "grad_norm": 0.76171875, "learning_rate": 3.222466890258909e-06, "loss": 3.9807, "step": 21080 }, { "epoch": 7.022320313150662, "grad_norm": 0.75, "learning_rate": 3.2218020340758613e-06, "loss": 3.9899, "step": 21081 }, { "epoch": 7.022653452152911, "grad_norm": 0.83203125, "learning_rate": 3.2211372277245562e-06, "loss": 3.9977, "step": 21082 }, { "epoch": 7.02298659115516, "grad_norm": 0.74609375, "learning_rate": 3.2204724712127345e-06, "loss": 3.9828, "step": 21083 }, { "epoch": 7.023319730157408, "grad_norm": 0.74609375, "learning_rate": 3.219807764548139e-06, "loss": 3.902, "step": 21084 }, { "epoch": 7.0236528691596565, "grad_norm": 0.8046875, "learning_rate": 3.219143107738509e-06, "loss": 3.9959, "step": 21085 }, { "epoch": 7.023986008161906, "grad_norm": 0.7578125, "learning_rate": 3.2184785007915926e-06, "loss": 3.9889, "step": 21086 }, { "epoch": 7.024319147164154, "grad_norm": 0.75390625, "learning_rate": 3.2178139437151306e-06, "loss": 4.0132, "step": 21087 }, { "epoch": 7.024652286166403, "grad_norm": 0.79296875, "learning_rate": 3.2171494365168555e-06, "loss": 4.0172, "step": 21088 }, { "epoch": 7.024985425168651, "grad_norm": 0.765625, "learning_rate": 3.2164849792045155e-06, "loss": 4.0185, "step": 21089 }, { "epoch": 7.025318564170901, "grad_norm": 0.78125, "learning_rate": 3.2158205717858475e-06, "loss": 4.0029, "step": 21090 }, { "epoch": 7.025651703173149, "grad_norm": 0.75, "learning_rate": 3.2151562142685907e-06, "loss": 4.0238, "step": 21091 }, { "epoch": 7.025984842175398, "grad_norm": 0.765625, "learning_rate": 3.21449190666048e-06, "loss": 3.984, "step": 21092 }, { "epoch": 7.026317981177646, "grad_norm": 0.796875, "learning_rate": 3.2138276489692585e-06, "loss": 3.9185, "step": 21093 }, { "epoch": 7.026651120179895, "grad_norm": 0.76953125, "learning_rate": 3.213163441202661e-06, "loss": 3.9879, "step": 21094 }, { "epoch": 7.026984259182144, "grad_norm": 0.76953125, "learning_rate": 3.2124992833684252e-06, "loss": 4.0162, "step": 21095 }, { "epoch": 7.027317398184392, "grad_norm": 0.76953125, "learning_rate": 3.2118351754742848e-06, "loss": 3.9405, "step": 21096 }, { "epoch": 7.027650537186641, "grad_norm": 0.8203125, "learning_rate": 3.211171117527977e-06, "loss": 3.9782, "step": 21097 }, { "epoch": 7.02798367618889, "grad_norm": 0.78515625, "learning_rate": 3.210507109537236e-06, "loss": 3.9724, "step": 21098 }, { "epoch": 7.028316815191139, "grad_norm": 0.75, "learning_rate": 3.209843151509796e-06, "loss": 3.9622, "step": 21099 }, { "epoch": 7.028649954193387, "grad_norm": 0.75, "learning_rate": 3.2091792434533887e-06, "loss": 4.0199, "step": 21100 }, { "epoch": 7.028983093195636, "grad_norm": 0.796875, "learning_rate": 3.2085153853757515e-06, "loss": 3.8858, "step": 21101 }, { "epoch": 7.029316232197885, "grad_norm": 0.75, "learning_rate": 3.207851577284615e-06, "loss": 3.97, "step": 21102 }, { "epoch": 7.029649371200133, "grad_norm": 0.73046875, "learning_rate": 3.207187819187711e-06, "loss": 3.9723, "step": 21103 }, { "epoch": 7.029982510202382, "grad_norm": 0.76171875, "learning_rate": 3.2065241110927675e-06, "loss": 4.0459, "step": 21104 }, { "epoch": 7.03031564920463, "grad_norm": 0.7734375, "learning_rate": 3.2058604530075243e-06, "loss": 3.9408, "step": 21105 }, { "epoch": 7.0306487882068796, "grad_norm": 0.83203125, "learning_rate": 3.205196844939703e-06, "loss": 3.9899, "step": 21106 }, { "epoch": 7.030981927209128, "grad_norm": 0.72265625, "learning_rate": 3.204533286897033e-06, "loss": 4.0291, "step": 21107 }, { "epoch": 7.031315066211377, "grad_norm": 0.76953125, "learning_rate": 3.203869778887249e-06, "loss": 4.0295, "step": 21108 }, { "epoch": 7.031648205213625, "grad_norm": 0.73828125, "learning_rate": 3.2032063209180763e-06, "loss": 3.9075, "step": 21109 }, { "epoch": 7.0319813442158745, "grad_norm": 0.74609375, "learning_rate": 3.2025429129972437e-06, "loss": 3.9862, "step": 21110 }, { "epoch": 7.032314483218123, "grad_norm": 0.76171875, "learning_rate": 3.201879555132475e-06, "loss": 4.005, "step": 21111 }, { "epoch": 7.032647622220371, "grad_norm": 0.80078125, "learning_rate": 3.201216247331502e-06, "loss": 3.9362, "step": 21112 }, { "epoch": 7.03298076122262, "grad_norm": 0.8046875, "learning_rate": 3.2005529896020494e-06, "loss": 4.0265, "step": 21113 }, { "epoch": 7.033313900224869, "grad_norm": 0.765625, "learning_rate": 3.1998897819518416e-06, "loss": 4.0245, "step": 21114 }, { "epoch": 7.033647039227118, "grad_norm": 0.7578125, "learning_rate": 3.1992266243886033e-06, "loss": 4.0114, "step": 21115 }, { "epoch": 7.033980178229366, "grad_norm": 0.75390625, "learning_rate": 3.1985635169200603e-06, "loss": 3.9686, "step": 21116 }, { "epoch": 7.034313317231615, "grad_norm": 0.7109375, "learning_rate": 3.1979004595539345e-06, "loss": 3.93, "step": 21117 }, { "epoch": 7.0346464562338635, "grad_norm": 0.79296875, "learning_rate": 3.1972374522979476e-06, "loss": 3.9638, "step": 21118 }, { "epoch": 7.034979595236113, "grad_norm": 0.796875, "learning_rate": 3.196574495159828e-06, "loss": 3.9439, "step": 21119 }, { "epoch": 7.035312734238361, "grad_norm": 0.80859375, "learning_rate": 3.195911588147293e-06, "loss": 4.0648, "step": 21120 }, { "epoch": 7.035645873240609, "grad_norm": 0.76171875, "learning_rate": 3.1952487312680666e-06, "loss": 3.9744, "step": 21121 }, { "epoch": 7.0359790122428585, "grad_norm": 0.7890625, "learning_rate": 3.194585924529865e-06, "loss": 3.9506, "step": 21122 }, { "epoch": 7.036312151245107, "grad_norm": 0.7421875, "learning_rate": 3.193923167940418e-06, "loss": 3.9962, "step": 21123 }, { "epoch": 7.036645290247356, "grad_norm": 0.7734375, "learning_rate": 3.193260461507436e-06, "loss": 3.9216, "step": 21124 }, { "epoch": 7.036978429249604, "grad_norm": 0.75390625, "learning_rate": 3.1925978052386377e-06, "loss": 3.9545, "step": 21125 }, { "epoch": 7.0373115682518534, "grad_norm": 0.76171875, "learning_rate": 3.191935199141749e-06, "loss": 4.0093, "step": 21126 }, { "epoch": 7.037644707254102, "grad_norm": 0.79296875, "learning_rate": 3.191272643224484e-06, "loss": 3.8719, "step": 21127 }, { "epoch": 7.03797784625635, "grad_norm": 0.76953125, "learning_rate": 3.1906101374945595e-06, "loss": 3.9338, "step": 21128 }, { "epoch": 7.038310985258599, "grad_norm": 0.765625, "learning_rate": 3.1899476819596902e-06, "loss": 4.0329, "step": 21129 }, { "epoch": 7.0386441242608475, "grad_norm": 0.796875, "learning_rate": 3.189285276627597e-06, "loss": 4.0, "step": 21130 }, { "epoch": 7.038977263263097, "grad_norm": 0.75390625, "learning_rate": 3.188622921505994e-06, "loss": 4.0702, "step": 21131 }, { "epoch": 7.039310402265345, "grad_norm": 0.75, "learning_rate": 3.1879606166025954e-06, "loss": 4.0468, "step": 21132 }, { "epoch": 7.039643541267594, "grad_norm": 0.7421875, "learning_rate": 3.1872983619251157e-06, "loss": 3.9644, "step": 21133 }, { "epoch": 7.0399766802698425, "grad_norm": 0.75, "learning_rate": 3.1866361574812696e-06, "loss": 3.9898, "step": 21134 }, { "epoch": 7.040309819272092, "grad_norm": 0.75, "learning_rate": 3.185974003278768e-06, "loss": 3.9123, "step": 21135 }, { "epoch": 7.04064295827434, "grad_norm": 0.72265625, "learning_rate": 3.1853118993253236e-06, "loss": 4.0634, "step": 21136 }, { "epoch": 7.040976097276588, "grad_norm": 0.7578125, "learning_rate": 3.184649845628652e-06, "loss": 3.9645, "step": 21137 }, { "epoch": 7.041309236278837, "grad_norm": 0.79296875, "learning_rate": 3.1839878421964626e-06, "loss": 4.0191, "step": 21138 }, { "epoch": 7.041642375281086, "grad_norm": 0.76953125, "learning_rate": 3.1833258890364676e-06, "loss": 4.0231, "step": 21139 }, { "epoch": 7.041975514283335, "grad_norm": 0.7890625, "learning_rate": 3.1826639861563717e-06, "loss": 3.9695, "step": 21140 }, { "epoch": 7.042308653285583, "grad_norm": 0.74609375, "learning_rate": 3.182002133563896e-06, "loss": 3.9299, "step": 21141 }, { "epoch": 7.042641792287832, "grad_norm": 0.7578125, "learning_rate": 3.1813403312667385e-06, "loss": 4.0135, "step": 21142 }, { "epoch": 7.042974931290081, "grad_norm": 0.77734375, "learning_rate": 3.180678579272613e-06, "loss": 4.0387, "step": 21143 }, { "epoch": 7.04330807029233, "grad_norm": 0.8203125, "learning_rate": 3.1800168775892234e-06, "loss": 3.9865, "step": 21144 }, { "epoch": 7.043641209294578, "grad_norm": 0.7578125, "learning_rate": 3.179355226224283e-06, "loss": 3.9415, "step": 21145 }, { "epoch": 7.0439743482968264, "grad_norm": 0.78125, "learning_rate": 3.178693625185495e-06, "loss": 3.9648, "step": 21146 }, { "epoch": 7.044307487299076, "grad_norm": 0.78515625, "learning_rate": 3.178032074480567e-06, "loss": 3.9588, "step": 21147 }, { "epoch": 7.044640626301324, "grad_norm": 0.73046875, "learning_rate": 3.1773705741172015e-06, "loss": 4.0046, "step": 21148 }, { "epoch": 7.044973765303573, "grad_norm": 0.80859375, "learning_rate": 3.1767091241031077e-06, "loss": 3.9843, "step": 21149 }, { "epoch": 7.045306904305821, "grad_norm": 0.79296875, "learning_rate": 3.1760477244459917e-06, "loss": 4.0353, "step": 21150 }, { "epoch": 7.045640043308071, "grad_norm": 0.74609375, "learning_rate": 3.1753863751535484e-06, "loss": 3.9173, "step": 21151 }, { "epoch": 7.045973182310319, "grad_norm": 0.734375, "learning_rate": 3.1747250762334895e-06, "loss": 4.0725, "step": 21152 }, { "epoch": 7.046306321312568, "grad_norm": 0.75, "learning_rate": 3.174063827693515e-06, "loss": 3.9713, "step": 21153 }, { "epoch": 7.046639460314816, "grad_norm": 0.76953125, "learning_rate": 3.173402629541328e-06, "loss": 3.9812, "step": 21154 }, { "epoch": 7.046972599317065, "grad_norm": 0.78125, "learning_rate": 3.172741481784625e-06, "loss": 3.9656, "step": 21155 }, { "epoch": 7.047305738319314, "grad_norm": 0.76171875, "learning_rate": 3.1720803844311136e-06, "loss": 3.928, "step": 21156 }, { "epoch": 7.047638877321562, "grad_norm": 0.7421875, "learning_rate": 3.171419337488492e-06, "loss": 3.931, "step": 21157 }, { "epoch": 7.047972016323811, "grad_norm": 0.765625, "learning_rate": 3.1707583409644587e-06, "loss": 3.9715, "step": 21158 }, { "epoch": 7.04830515532606, "grad_norm": 0.7578125, "learning_rate": 3.1700973948667138e-06, "loss": 3.9504, "step": 21159 }, { "epoch": 7.048638294328309, "grad_norm": 0.80078125, "learning_rate": 3.1694364992029568e-06, "loss": 4.0174, "step": 21160 }, { "epoch": 7.048971433330557, "grad_norm": 0.78515625, "learning_rate": 3.1687756539808838e-06, "loss": 4.0267, "step": 21161 }, { "epoch": 7.049304572332806, "grad_norm": 0.76953125, "learning_rate": 3.1681148592081897e-06, "loss": 4.0113, "step": 21162 }, { "epoch": 7.049637711335055, "grad_norm": 0.7734375, "learning_rate": 3.1674541148925783e-06, "loss": 3.9928, "step": 21163 }, { "epoch": 7.049970850337303, "grad_norm": 0.7734375, "learning_rate": 3.1667934210417414e-06, "loss": 3.9643, "step": 21164 }, { "epoch": 7.050303989339552, "grad_norm": 0.76171875, "learning_rate": 3.166132777663376e-06, "loss": 4.0251, "step": 21165 }, { "epoch": 7.0506371283418, "grad_norm": 0.76953125, "learning_rate": 3.165472184765174e-06, "loss": 3.9236, "step": 21166 }, { "epoch": 7.0509702673440495, "grad_norm": 0.80078125, "learning_rate": 3.164811642354835e-06, "loss": 3.9202, "step": 21167 }, { "epoch": 7.051303406346298, "grad_norm": 0.7578125, "learning_rate": 3.164151150440053e-06, "loss": 3.9472, "step": 21168 }, { "epoch": 7.051636545348547, "grad_norm": 0.84375, "learning_rate": 3.163490709028513e-06, "loss": 4.004, "step": 21169 }, { "epoch": 7.051969684350795, "grad_norm": 0.80078125, "learning_rate": 3.1628303181279165e-06, "loss": 3.9982, "step": 21170 }, { "epoch": 7.0523028233530445, "grad_norm": 0.79296875, "learning_rate": 3.1621699777459525e-06, "loss": 3.9309, "step": 21171 }, { "epoch": 7.052635962355293, "grad_norm": 0.765625, "learning_rate": 3.1615096878903123e-06, "loss": 4.0002, "step": 21172 }, { "epoch": 7.052969101357541, "grad_norm": 0.7265625, "learning_rate": 3.160849448568684e-06, "loss": 3.9283, "step": 21173 }, { "epoch": 7.05330224035979, "grad_norm": 0.79296875, "learning_rate": 3.1601892597887642e-06, "loss": 4.0065, "step": 21174 }, { "epoch": 7.0536353793620385, "grad_norm": 0.7734375, "learning_rate": 3.15952912155824e-06, "loss": 3.9917, "step": 21175 }, { "epoch": 7.053968518364288, "grad_norm": 0.7578125, "learning_rate": 3.1588690338847993e-06, "loss": 3.9642, "step": 21176 }, { "epoch": 7.054301657366536, "grad_norm": 0.74609375, "learning_rate": 3.1582089967761318e-06, "loss": 4.0238, "step": 21177 }, { "epoch": 7.054634796368785, "grad_norm": 0.77734375, "learning_rate": 3.157549010239925e-06, "loss": 3.9757, "step": 21178 }, { "epoch": 7.0549679353710335, "grad_norm": 0.7890625, "learning_rate": 3.156889074283867e-06, "loss": 3.9915, "step": 21179 }, { "epoch": 7.055301074373283, "grad_norm": 0.75, "learning_rate": 3.1562291889156405e-06, "loss": 3.976, "step": 21180 }, { "epoch": 7.055634213375531, "grad_norm": 0.75390625, "learning_rate": 3.1555693541429393e-06, "loss": 4.0226, "step": 21181 }, { "epoch": 7.055967352377779, "grad_norm": 0.78125, "learning_rate": 3.1549095699734445e-06, "loss": 3.9865, "step": 21182 }, { "epoch": 7.0563004913800285, "grad_norm": 0.75, "learning_rate": 3.1542498364148413e-06, "loss": 3.9946, "step": 21183 }, { "epoch": 7.056633630382277, "grad_norm": 0.78515625, "learning_rate": 3.1535901534748123e-06, "loss": 4.002, "step": 21184 }, { "epoch": 7.056966769384526, "grad_norm": 0.76953125, "learning_rate": 3.1529305211610464e-06, "loss": 4.0548, "step": 21185 }, { "epoch": 7.057299908386774, "grad_norm": 0.7578125, "learning_rate": 3.1522709394812274e-06, "loss": 4.0042, "step": 21186 }, { "epoch": 7.057633047389023, "grad_norm": 0.76171875, "learning_rate": 3.151611408443031e-06, "loss": 3.9543, "step": 21187 }, { "epoch": 7.057966186391272, "grad_norm": 0.78125, "learning_rate": 3.150951928054141e-06, "loss": 4.0008, "step": 21188 }, { "epoch": 7.05829932539352, "grad_norm": 0.75390625, "learning_rate": 3.150292498322244e-06, "loss": 3.9795, "step": 21189 }, { "epoch": 7.058632464395769, "grad_norm": 0.765625, "learning_rate": 3.1496331192550178e-06, "loss": 3.9473, "step": 21190 }, { "epoch": 7.0589656033980175, "grad_norm": 0.7890625, "learning_rate": 3.1489737908601427e-06, "loss": 3.9571, "step": 21191 }, { "epoch": 7.059298742400267, "grad_norm": 0.78515625, "learning_rate": 3.148314513145296e-06, "loss": 4.048, "step": 21192 }, { "epoch": 7.059631881402515, "grad_norm": 0.75390625, "learning_rate": 3.1476552861181625e-06, "loss": 4.0007, "step": 21193 }, { "epoch": 7.059965020404764, "grad_norm": 0.7578125, "learning_rate": 3.146996109786417e-06, "loss": 4.0092, "step": 21194 }, { "epoch": 7.060298159407012, "grad_norm": 0.73046875, "learning_rate": 3.1463369841577385e-06, "loss": 3.9434, "step": 21195 }, { "epoch": 7.060631298409262, "grad_norm": 0.76953125, "learning_rate": 3.1456779092398044e-06, "loss": 4.0055, "step": 21196 }, { "epoch": 7.06096443741151, "grad_norm": 0.74609375, "learning_rate": 3.145018885040291e-06, "loss": 3.9689, "step": 21197 }, { "epoch": 7.061297576413758, "grad_norm": 0.73046875, "learning_rate": 3.1443599115668745e-06, "loss": 3.9639, "step": 21198 }, { "epoch": 7.061630715416007, "grad_norm": 0.71875, "learning_rate": 3.1437009888272283e-06, "loss": 3.9639, "step": 21199 }, { "epoch": 7.061963854418256, "grad_norm": 0.7734375, "learning_rate": 3.1430421168290328e-06, "loss": 4.0425, "step": 21200 }, { "epoch": 7.062296993420505, "grad_norm": 0.74609375, "learning_rate": 3.142383295579959e-06, "loss": 3.9906, "step": 21201 }, { "epoch": 7.062630132422753, "grad_norm": 0.7890625, "learning_rate": 3.1417245250876817e-06, "loss": 3.9702, "step": 21202 }, { "epoch": 7.062963271425002, "grad_norm": 0.82421875, "learning_rate": 3.1410658053598736e-06, "loss": 4.0518, "step": 21203 }, { "epoch": 7.063296410427251, "grad_norm": 0.78515625, "learning_rate": 3.1404071364042076e-06, "loss": 3.9383, "step": 21204 }, { "epoch": 7.0636295494295, "grad_norm": 0.76953125, "learning_rate": 3.1397485182283552e-06, "loss": 4.0053, "step": 21205 }, { "epoch": 7.063962688431748, "grad_norm": 0.734375, "learning_rate": 3.1390899508399855e-06, "loss": 3.9674, "step": 21206 }, { "epoch": 7.064295827433996, "grad_norm": 0.796875, "learning_rate": 3.1384314342467748e-06, "loss": 3.941, "step": 21207 }, { "epoch": 7.064628966436246, "grad_norm": 0.76171875, "learning_rate": 3.137772968456391e-06, "loss": 3.9497, "step": 21208 }, { "epoch": 7.064962105438494, "grad_norm": 0.72265625, "learning_rate": 3.137114553476504e-06, "loss": 4.0453, "step": 21209 }, { "epoch": 7.065295244440743, "grad_norm": 0.75, "learning_rate": 3.1364561893147788e-06, "loss": 4.0032, "step": 21210 }, { "epoch": 7.065628383442991, "grad_norm": 0.73046875, "learning_rate": 3.1357978759788907e-06, "loss": 3.963, "step": 21211 }, { "epoch": 7.065961522445241, "grad_norm": 0.73046875, "learning_rate": 3.1351396134765076e-06, "loss": 3.9525, "step": 21212 }, { "epoch": 7.066294661447489, "grad_norm": 0.765625, "learning_rate": 3.1344814018152883e-06, "loss": 3.9988, "step": 21213 }, { "epoch": 7.066627800449738, "grad_norm": 0.80078125, "learning_rate": 3.1338232410029073e-06, "loss": 4.0308, "step": 21214 }, { "epoch": 7.066960939451986, "grad_norm": 0.7578125, "learning_rate": 3.133165131047028e-06, "loss": 4.0047, "step": 21215 }, { "epoch": 7.067294078454235, "grad_norm": 0.79296875, "learning_rate": 3.132507071955317e-06, "loss": 3.9623, "step": 21216 }, { "epoch": 7.067627217456484, "grad_norm": 0.78515625, "learning_rate": 3.1318490637354355e-06, "loss": 4.0048, "step": 21217 }, { "epoch": 7.067960356458732, "grad_norm": 0.75390625, "learning_rate": 3.131191106395054e-06, "loss": 3.975, "step": 21218 }, { "epoch": 7.068293495460981, "grad_norm": 0.76953125, "learning_rate": 3.130533199941833e-06, "loss": 3.9688, "step": 21219 }, { "epoch": 7.06862663446323, "grad_norm": 0.76953125, "learning_rate": 3.129875344383437e-06, "loss": 3.8761, "step": 21220 }, { "epoch": 7.068959773465479, "grad_norm": 0.7734375, "learning_rate": 3.129217539727526e-06, "loss": 3.8988, "step": 21221 }, { "epoch": 7.069292912467727, "grad_norm": 0.75, "learning_rate": 3.1285597859817647e-06, "loss": 4.0366, "step": 21222 }, { "epoch": 7.069626051469976, "grad_norm": 0.7578125, "learning_rate": 3.1279020831538133e-06, "loss": 3.9389, "step": 21223 }, { "epoch": 7.0699591904722245, "grad_norm": 0.734375, "learning_rate": 3.1272444312513297e-06, "loss": 4.0692, "step": 21224 }, { "epoch": 7.070292329474473, "grad_norm": 0.74609375, "learning_rate": 3.12658683028198e-06, "loss": 4.0077, "step": 21225 }, { "epoch": 7.070625468476722, "grad_norm": 0.77734375, "learning_rate": 3.1259292802534207e-06, "loss": 4.0339, "step": 21226 }, { "epoch": 7.07095860747897, "grad_norm": 0.76171875, "learning_rate": 3.1252717811733105e-06, "loss": 3.968, "step": 21227 }, { "epoch": 7.0712917464812195, "grad_norm": 0.73828125, "learning_rate": 3.124614333049307e-06, "loss": 4.0062, "step": 21228 }, { "epoch": 7.071624885483468, "grad_norm": 0.7734375, "learning_rate": 3.123956935889071e-06, "loss": 3.9639, "step": 21229 }, { "epoch": 7.071958024485717, "grad_norm": 0.734375, "learning_rate": 3.123299589700262e-06, "loss": 3.9917, "step": 21230 }, { "epoch": 7.072291163487965, "grad_norm": 0.80078125, "learning_rate": 3.122642294490529e-06, "loss": 3.9535, "step": 21231 }, { "epoch": 7.0726243024902145, "grad_norm": 0.765625, "learning_rate": 3.12198505026753e-06, "loss": 4.052, "step": 21232 }, { "epoch": 7.072957441492463, "grad_norm": 0.73046875, "learning_rate": 3.1213278570389244e-06, "loss": 3.9626, "step": 21233 }, { "epoch": 7.073290580494711, "grad_norm": 0.78125, "learning_rate": 3.120670714812366e-06, "loss": 4.009, "step": 21234 }, { "epoch": 7.07362371949696, "grad_norm": 0.7109375, "learning_rate": 3.120013623595508e-06, "loss": 3.9166, "step": 21235 }, { "epoch": 7.0739568584992085, "grad_norm": 0.78515625, "learning_rate": 3.119356583396001e-06, "loss": 3.9214, "step": 21236 }, { "epoch": 7.074289997501458, "grad_norm": 0.8359375, "learning_rate": 3.1186995942215045e-06, "loss": 3.9143, "step": 21237 }, { "epoch": 7.074623136503706, "grad_norm": 0.7734375, "learning_rate": 3.1180426560796675e-06, "loss": 3.9545, "step": 21238 }, { "epoch": 7.074956275505955, "grad_norm": 0.77734375, "learning_rate": 3.1173857689781423e-06, "loss": 3.9501, "step": 21239 }, { "epoch": 7.0752894145082035, "grad_norm": 0.75, "learning_rate": 3.1167289329245797e-06, "loss": 3.9982, "step": 21240 }, { "epoch": 7.075622553510453, "grad_norm": 0.83984375, "learning_rate": 3.116072147926632e-06, "loss": 4.0105, "step": 21241 }, { "epoch": 7.075955692512701, "grad_norm": 0.796875, "learning_rate": 3.1154154139919473e-06, "loss": 3.9821, "step": 21242 }, { "epoch": 7.076288831514949, "grad_norm": 0.75390625, "learning_rate": 3.114758731128174e-06, "loss": 3.9839, "step": 21243 }, { "epoch": 7.076621970517198, "grad_norm": 0.74609375, "learning_rate": 3.1141020993429654e-06, "loss": 3.9275, "step": 21244 }, { "epoch": 7.076955109519447, "grad_norm": 0.77734375, "learning_rate": 3.1134455186439677e-06, "loss": 3.9223, "step": 21245 }, { "epoch": 7.077288248521696, "grad_norm": 0.75390625, "learning_rate": 3.112788989038828e-06, "loss": 3.9963, "step": 21246 }, { "epoch": 7.077621387523944, "grad_norm": 0.80078125, "learning_rate": 3.1121325105351916e-06, "loss": 3.9292, "step": 21247 }, { "epoch": 7.077954526526193, "grad_norm": 0.75, "learning_rate": 3.1114760831407124e-06, "loss": 4.0075, "step": 21248 }, { "epoch": 7.078287665528442, "grad_norm": 0.78515625, "learning_rate": 3.110819706863029e-06, "loss": 3.957, "step": 21249 }, { "epoch": 7.07862080453069, "grad_norm": 0.7734375, "learning_rate": 3.110163381709786e-06, "loss": 4.0063, "step": 21250 }, { "epoch": 7.078953943532939, "grad_norm": 0.7578125, "learning_rate": 3.109507107688634e-06, "loss": 3.9792, "step": 21251 }, { "epoch": 7.0792870825351875, "grad_norm": 0.7890625, "learning_rate": 3.108850884807215e-06, "loss": 3.9929, "step": 21252 }, { "epoch": 7.079620221537437, "grad_norm": 0.75390625, "learning_rate": 3.1081947130731717e-06, "loss": 4.048, "step": 21253 }, { "epoch": 7.079953360539685, "grad_norm": 0.80078125, "learning_rate": 3.1075385924941447e-06, "loss": 4.006, "step": 21254 }, { "epoch": 7.080286499541934, "grad_norm": 0.7421875, "learning_rate": 3.106882523077781e-06, "loss": 3.979, "step": 21255 }, { "epoch": 7.080619638544182, "grad_norm": 0.78125, "learning_rate": 3.106226504831721e-06, "loss": 3.9857, "step": 21256 }, { "epoch": 7.080952777546432, "grad_norm": 0.7734375, "learning_rate": 3.1055705377636054e-06, "loss": 3.9141, "step": 21257 }, { "epoch": 7.08128591654868, "grad_norm": 0.76171875, "learning_rate": 3.104914621881075e-06, "loss": 3.9485, "step": 21258 }, { "epoch": 7.081619055550928, "grad_norm": 0.82421875, "learning_rate": 3.104258757191769e-06, "loss": 4.0133, "step": 21259 }, { "epoch": 7.081952194553177, "grad_norm": 0.80859375, "learning_rate": 3.1036029437033275e-06, "loss": 3.9657, "step": 21260 }, { "epoch": 7.082285333555426, "grad_norm": 0.7890625, "learning_rate": 3.1029471814233864e-06, "loss": 3.9882, "step": 21261 }, { "epoch": 7.082618472557675, "grad_norm": 0.73046875, "learning_rate": 3.1022914703595885e-06, "loss": 3.9237, "step": 21262 }, { "epoch": 7.082951611559923, "grad_norm": 0.79296875, "learning_rate": 3.101635810519571e-06, "loss": 4.0751, "step": 21263 }, { "epoch": 7.083284750562172, "grad_norm": 0.7421875, "learning_rate": 3.1009802019109684e-06, "loss": 4.0485, "step": 21264 }, { "epoch": 7.083617889564421, "grad_norm": 0.77734375, "learning_rate": 3.1003246445414165e-06, "loss": 3.9597, "step": 21265 }, { "epoch": 7.08395102856667, "grad_norm": 0.78125, "learning_rate": 3.099669138418557e-06, "loss": 4.005, "step": 21266 }, { "epoch": 7.084284167568918, "grad_norm": 0.76171875, "learning_rate": 3.0990136835500192e-06, "loss": 3.9396, "step": 21267 }, { "epoch": 7.084617306571166, "grad_norm": 0.77734375, "learning_rate": 3.098358279943436e-06, "loss": 4.0387, "step": 21268 }, { "epoch": 7.084950445573416, "grad_norm": 0.765625, "learning_rate": 3.097702927606448e-06, "loss": 4.0276, "step": 21269 }, { "epoch": 7.085283584575664, "grad_norm": 0.79296875, "learning_rate": 3.097047626546686e-06, "loss": 4.0481, "step": 21270 }, { "epoch": 7.085616723577913, "grad_norm": 0.78125, "learning_rate": 3.096392376771782e-06, "loss": 3.9902, "step": 21271 }, { "epoch": 7.085949862580161, "grad_norm": 0.796875, "learning_rate": 3.0957371782893658e-06, "loss": 4.0003, "step": 21272 }, { "epoch": 7.0862830015824105, "grad_norm": 0.7578125, "learning_rate": 3.0950820311070742e-06, "loss": 4.0444, "step": 21273 }, { "epoch": 7.086616140584659, "grad_norm": 0.73828125, "learning_rate": 3.094426935232536e-06, "loss": 4.034, "step": 21274 }, { "epoch": 7.086949279586908, "grad_norm": 0.796875, "learning_rate": 3.093771890673382e-06, "loss": 3.948, "step": 21275 }, { "epoch": 7.087282418589156, "grad_norm": 0.78515625, "learning_rate": 3.0931168974372417e-06, "loss": 3.9696, "step": 21276 }, { "epoch": 7.087615557591405, "grad_norm": 0.77734375, "learning_rate": 3.092461955531744e-06, "loss": 3.9639, "step": 21277 }, { "epoch": 7.087948696593654, "grad_norm": 0.76171875, "learning_rate": 3.091807064964518e-06, "loss": 3.9879, "step": 21278 }, { "epoch": 7.088281835595902, "grad_norm": 0.7890625, "learning_rate": 3.091152225743191e-06, "loss": 3.9723, "step": 21279 }, { "epoch": 7.088614974598151, "grad_norm": 0.81640625, "learning_rate": 3.0904974378753884e-06, "loss": 3.9672, "step": 21280 }, { "epoch": 7.0889481136004, "grad_norm": 0.734375, "learning_rate": 3.0898427013687433e-06, "loss": 4.0536, "step": 21281 }, { "epoch": 7.089281252602649, "grad_norm": 0.765625, "learning_rate": 3.089188016230877e-06, "loss": 4.0075, "step": 21282 }, { "epoch": 7.089614391604897, "grad_norm": 0.78515625, "learning_rate": 3.0885333824694175e-06, "loss": 4.0612, "step": 21283 }, { "epoch": 7.089947530607146, "grad_norm": 0.78125, "learning_rate": 3.0878788000919885e-06, "loss": 4.0004, "step": 21284 }, { "epoch": 7.0902806696093945, "grad_norm": 0.76953125, "learning_rate": 3.087224269106215e-06, "loss": 3.9843, "step": 21285 }, { "epoch": 7.090613808611643, "grad_norm": 0.78515625, "learning_rate": 3.0865697895197206e-06, "loss": 3.9879, "step": 21286 }, { "epoch": 7.090946947613892, "grad_norm": 0.78125, "learning_rate": 3.0859153613401258e-06, "loss": 3.9528, "step": 21287 }, { "epoch": 7.09128008661614, "grad_norm": 0.7734375, "learning_rate": 3.0852609845750594e-06, "loss": 3.9799, "step": 21288 }, { "epoch": 7.0916132256183895, "grad_norm": 0.84375, "learning_rate": 3.0846066592321397e-06, "loss": 3.9084, "step": 21289 }, { "epoch": 7.091946364620638, "grad_norm": 0.7890625, "learning_rate": 3.083952385318989e-06, "loss": 4.0361, "step": 21290 }, { "epoch": 7.092279503622887, "grad_norm": 0.8046875, "learning_rate": 3.0832981628432248e-06, "loss": 3.9527, "step": 21291 }, { "epoch": 7.092612642625135, "grad_norm": 0.75, "learning_rate": 3.082643991812474e-06, "loss": 4.0325, "step": 21292 }, { "epoch": 7.092945781627384, "grad_norm": 0.78515625, "learning_rate": 3.081989872234354e-06, "loss": 3.9421, "step": 21293 }, { "epoch": 7.093278920629633, "grad_norm": 0.71484375, "learning_rate": 3.0813358041164784e-06, "loss": 3.9323, "step": 21294 }, { "epoch": 7.093612059631881, "grad_norm": 0.796875, "learning_rate": 3.080681787466472e-06, "loss": 3.9762, "step": 21295 }, { "epoch": 7.09394519863413, "grad_norm": 0.7421875, "learning_rate": 3.0800278222919507e-06, "loss": 4.0146, "step": 21296 }, { "epoch": 7.0942783376363785, "grad_norm": 0.75, "learning_rate": 3.0793739086005314e-06, "loss": 3.9782, "step": 21297 }, { "epoch": 7.094611476638628, "grad_norm": 0.75390625, "learning_rate": 3.0787200463998287e-06, "loss": 3.9254, "step": 21298 }, { "epoch": 7.094944615640876, "grad_norm": 0.78515625, "learning_rate": 3.078066235697464e-06, "loss": 3.9465, "step": 21299 }, { "epoch": 7.095277754643125, "grad_norm": 0.74609375, "learning_rate": 3.077412476501049e-06, "loss": 4.0217, "step": 21300 }, { "epoch": 7.0956108936453735, "grad_norm": 0.7578125, "learning_rate": 3.0767587688182002e-06, "loss": 3.963, "step": 21301 }, { "epoch": 7.095944032647623, "grad_norm": 0.76171875, "learning_rate": 3.0761051126565297e-06, "loss": 3.944, "step": 21302 }, { "epoch": 7.096277171649871, "grad_norm": 0.71875, "learning_rate": 3.0754515080236533e-06, "loss": 3.9442, "step": 21303 }, { "epoch": 7.096610310652119, "grad_norm": 0.76953125, "learning_rate": 3.0747979549271836e-06, "loss": 4.0566, "step": 21304 }, { "epoch": 7.096943449654368, "grad_norm": 0.77734375, "learning_rate": 3.0741444533747296e-06, "loss": 3.9804, "step": 21305 }, { "epoch": 7.097276588656617, "grad_norm": 0.76171875, "learning_rate": 3.0734910033739087e-06, "loss": 3.9282, "step": 21306 }, { "epoch": 7.097609727658866, "grad_norm": 0.78515625, "learning_rate": 3.0728376049323305e-06, "loss": 4.0383, "step": 21307 }, { "epoch": 7.097942866661114, "grad_norm": 0.76953125, "learning_rate": 3.072184258057605e-06, "loss": 4.0798, "step": 21308 }, { "epoch": 7.098276005663363, "grad_norm": 0.765625, "learning_rate": 3.0715309627573385e-06, "loss": 3.9543, "step": 21309 }, { "epoch": 7.098609144665612, "grad_norm": 0.75390625, "learning_rate": 3.070877719039148e-06, "loss": 4.0145, "step": 21310 }, { "epoch": 7.098942283667861, "grad_norm": 0.796875, "learning_rate": 3.0702245269106414e-06, "loss": 3.9753, "step": 21311 }, { "epoch": 7.099275422670109, "grad_norm": 0.75390625, "learning_rate": 3.069571386379418e-06, "loss": 3.9126, "step": 21312 }, { "epoch": 7.099608561672357, "grad_norm": 0.8125, "learning_rate": 3.0689182974530946e-06, "loss": 4.0441, "step": 21313 }, { "epoch": 7.099941700674607, "grad_norm": 0.78125, "learning_rate": 3.0682652601392756e-06, "loss": 3.9785, "step": 21314 }, { "epoch": 7.100274839676855, "grad_norm": 0.78515625, "learning_rate": 3.0676122744455676e-06, "loss": 4.0262, "step": 21315 }, { "epoch": 7.100607978679104, "grad_norm": 0.765625, "learning_rate": 3.0669593403795723e-06, "loss": 3.9601, "step": 21316 }, { "epoch": 7.100941117681352, "grad_norm": 0.77734375, "learning_rate": 3.0663064579489024e-06, "loss": 3.9812, "step": 21317 }, { "epoch": 7.101274256683602, "grad_norm": 0.78125, "learning_rate": 3.065653627161158e-06, "loss": 3.9492, "step": 21318 }, { "epoch": 7.10160739568585, "grad_norm": 0.78515625, "learning_rate": 3.065000848023945e-06, "loss": 3.888, "step": 21319 }, { "epoch": 7.101940534688099, "grad_norm": 0.828125, "learning_rate": 3.0643481205448655e-06, "loss": 3.9998, "step": 21320 }, { "epoch": 7.102273673690347, "grad_norm": 0.734375, "learning_rate": 3.063695444731522e-06, "loss": 3.9416, "step": 21321 }, { "epoch": 7.102606812692596, "grad_norm": 0.7734375, "learning_rate": 3.0630428205915183e-06, "loss": 3.9564, "step": 21322 }, { "epoch": 7.102939951694845, "grad_norm": 0.78515625, "learning_rate": 3.0623902481324534e-06, "loss": 3.9785, "step": 21323 }, { "epoch": 7.103273090697093, "grad_norm": 0.76171875, "learning_rate": 3.0617377273619317e-06, "loss": 4.0122, "step": 21324 }, { "epoch": 7.103606229699342, "grad_norm": 0.734375, "learning_rate": 3.0610852582875528e-06, "loss": 4.0342, "step": 21325 }, { "epoch": 7.103939368701591, "grad_norm": 0.765625, "learning_rate": 3.060432840916916e-06, "loss": 3.9492, "step": 21326 }, { "epoch": 7.10427250770384, "grad_norm": 0.7421875, "learning_rate": 3.0597804752576203e-06, "loss": 3.9745, "step": 21327 }, { "epoch": 7.104605646706088, "grad_norm": 0.76171875, "learning_rate": 3.0591281613172617e-06, "loss": 3.9364, "step": 21328 }, { "epoch": 7.104938785708336, "grad_norm": 0.7890625, "learning_rate": 3.058475899103447e-06, "loss": 3.9686, "step": 21329 }, { "epoch": 7.1052719247105856, "grad_norm": 0.73046875, "learning_rate": 3.057823688623765e-06, "loss": 3.9629, "step": 21330 }, { "epoch": 7.105605063712834, "grad_norm": 0.79296875, "learning_rate": 3.0571715298858123e-06, "loss": 3.9605, "step": 21331 }, { "epoch": 7.105938202715083, "grad_norm": 0.7734375, "learning_rate": 3.0565194228971902e-06, "loss": 3.9852, "step": 21332 }, { "epoch": 7.106271341717331, "grad_norm": 0.82421875, "learning_rate": 3.055867367665494e-06, "loss": 3.9869, "step": 21333 }, { "epoch": 7.1066044807195805, "grad_norm": 0.82421875, "learning_rate": 3.0552153641983156e-06, "loss": 4.0089, "step": 21334 }, { "epoch": 7.106937619721829, "grad_norm": 0.76953125, "learning_rate": 3.0545634125032483e-06, "loss": 3.998, "step": 21335 }, { "epoch": 7.107270758724078, "grad_norm": 0.75390625, "learning_rate": 3.0539115125878916e-06, "loss": 3.9544, "step": 21336 }, { "epoch": 7.107603897726326, "grad_norm": 0.75390625, "learning_rate": 3.053259664459835e-06, "loss": 3.9976, "step": 21337 }, { "epoch": 7.107937036728575, "grad_norm": 0.75, "learning_rate": 3.052607868126672e-06, "loss": 3.9597, "step": 21338 }, { "epoch": 7.108270175730824, "grad_norm": 0.76953125, "learning_rate": 3.0519561235959943e-06, "loss": 3.9152, "step": 21339 }, { "epoch": 7.108603314733072, "grad_norm": 0.80859375, "learning_rate": 3.051304430875393e-06, "loss": 3.9839, "step": 21340 }, { "epoch": 7.108936453735321, "grad_norm": 0.7890625, "learning_rate": 3.0506527899724595e-06, "loss": 4.0121, "step": 21341 }, { "epoch": 7.1092695927375695, "grad_norm": 0.74609375, "learning_rate": 3.0500012008947816e-06, "loss": 3.9584, "step": 21342 }, { "epoch": 7.109602731739819, "grad_norm": 0.765625, "learning_rate": 3.0493496636499525e-06, "loss": 4.0712, "step": 21343 }, { "epoch": 7.109935870742067, "grad_norm": 0.76171875, "learning_rate": 3.0486981782455604e-06, "loss": 3.9634, "step": 21344 }, { "epoch": 7.110269009744316, "grad_norm": 0.7890625, "learning_rate": 3.0480467446891935e-06, "loss": 3.9237, "step": 21345 }, { "epoch": 7.1106021487465645, "grad_norm": 0.75, "learning_rate": 3.0473953629884364e-06, "loss": 4.0967, "step": 21346 }, { "epoch": 7.110935287748813, "grad_norm": 0.734375, "learning_rate": 3.0467440331508836e-06, "loss": 3.9809, "step": 21347 }, { "epoch": 7.111268426751062, "grad_norm": 0.8046875, "learning_rate": 3.0460927551841145e-06, "loss": 3.965, "step": 21348 }, { "epoch": 7.11160156575331, "grad_norm": 0.74609375, "learning_rate": 3.045441529095715e-06, "loss": 3.9906, "step": 21349 }, { "epoch": 7.1119347047555594, "grad_norm": 0.7890625, "learning_rate": 3.0447903548932767e-06, "loss": 3.9913, "step": 21350 }, { "epoch": 7.112267843757808, "grad_norm": 0.765625, "learning_rate": 3.0441392325843797e-06, "loss": 3.9854, "step": 21351 }, { "epoch": 7.112600982760057, "grad_norm": 0.78125, "learning_rate": 3.0434881621766095e-06, "loss": 3.9767, "step": 21352 }, { "epoch": 7.112934121762305, "grad_norm": 0.8046875, "learning_rate": 3.0428371436775472e-06, "loss": 4.007, "step": 21353 }, { "epoch": 7.113267260764554, "grad_norm": 0.7421875, "learning_rate": 3.0421861770947796e-06, "loss": 4.0016, "step": 21354 }, { "epoch": 7.113600399766803, "grad_norm": 0.765625, "learning_rate": 3.0415352624358885e-06, "loss": 3.9772, "step": 21355 }, { "epoch": 7.113933538769051, "grad_norm": 0.74609375, "learning_rate": 3.040884399708454e-06, "loss": 3.9649, "step": 21356 }, { "epoch": 7.1142666777713, "grad_norm": 0.734375, "learning_rate": 3.040233588920058e-06, "loss": 3.9728, "step": 21357 }, { "epoch": 7.1145998167735485, "grad_norm": 0.76953125, "learning_rate": 3.0395828300782814e-06, "loss": 3.9483, "step": 21358 }, { "epoch": 7.114932955775798, "grad_norm": 0.796875, "learning_rate": 3.0389321231907027e-06, "loss": 3.9554, "step": 21359 }, { "epoch": 7.115266094778046, "grad_norm": 0.70703125, "learning_rate": 3.0382814682648997e-06, "loss": 3.9542, "step": 21360 }, { "epoch": 7.115599233780295, "grad_norm": 0.76171875, "learning_rate": 3.0376308653084568e-06, "loss": 4.0216, "step": 21361 }, { "epoch": 7.115932372782543, "grad_norm": 0.75390625, "learning_rate": 3.036980314328948e-06, "loss": 3.981, "step": 21362 }, { "epoch": 7.116265511784793, "grad_norm": 0.76953125, "learning_rate": 3.0363298153339524e-06, "loss": 3.9841, "step": 21363 }, { "epoch": 7.116598650787041, "grad_norm": 0.703125, "learning_rate": 3.0356793683310426e-06, "loss": 4.0185, "step": 21364 }, { "epoch": 7.116931789789289, "grad_norm": 0.80859375, "learning_rate": 3.035028973327805e-06, "loss": 3.9691, "step": 21365 }, { "epoch": 7.117264928791538, "grad_norm": 0.74609375, "learning_rate": 3.034378630331805e-06, "loss": 3.9802, "step": 21366 }, { "epoch": 7.117598067793787, "grad_norm": 0.796875, "learning_rate": 3.033728339350619e-06, "loss": 3.9774, "step": 21367 }, { "epoch": 7.117931206796036, "grad_norm": 0.73828125, "learning_rate": 3.033078100391827e-06, "loss": 4.0318, "step": 21368 }, { "epoch": 7.118264345798284, "grad_norm": 0.7578125, "learning_rate": 3.0324279134629995e-06, "loss": 3.9835, "step": 21369 }, { "epoch": 7.118597484800533, "grad_norm": 0.765625, "learning_rate": 3.0317777785717104e-06, "loss": 3.9791, "step": 21370 }, { "epoch": 7.118930623802782, "grad_norm": 0.79296875, "learning_rate": 3.0311276957255314e-06, "loss": 3.9162, "step": 21371 }, { "epoch": 7.119263762805031, "grad_norm": 0.76171875, "learning_rate": 3.0304776649320326e-06, "loss": 4.0479, "step": 21372 }, { "epoch": 7.119596901807279, "grad_norm": 0.765625, "learning_rate": 3.029827686198791e-06, "loss": 3.9503, "step": 21373 }, { "epoch": 7.119930040809527, "grad_norm": 0.7578125, "learning_rate": 3.0291777595333775e-06, "loss": 4.0376, "step": 21374 }, { "epoch": 7.120263179811777, "grad_norm": 0.765625, "learning_rate": 3.028527884943353e-06, "loss": 4.0389, "step": 21375 }, { "epoch": 7.120596318814025, "grad_norm": 0.7734375, "learning_rate": 3.0278780624362966e-06, "loss": 4.0479, "step": 21376 }, { "epoch": 7.120929457816274, "grad_norm": 0.7421875, "learning_rate": 3.0272282920197745e-06, "loss": 3.9986, "step": 21377 }, { "epoch": 7.121262596818522, "grad_norm": 0.78125, "learning_rate": 3.026578573701355e-06, "loss": 3.9429, "step": 21378 }, { "epoch": 7.1215957358207715, "grad_norm": 0.7734375, "learning_rate": 3.025928907488602e-06, "loss": 3.9913, "step": 21379 }, { "epoch": 7.12192887482302, "grad_norm": 0.7578125, "learning_rate": 3.0252792933890903e-06, "loss": 3.9745, "step": 21380 }, { "epoch": 7.122262013825269, "grad_norm": 0.7421875, "learning_rate": 3.0246297314103817e-06, "loss": 4.0393, "step": 21381 }, { "epoch": 7.122595152827517, "grad_norm": 0.7578125, "learning_rate": 3.0239802215600433e-06, "loss": 3.9913, "step": 21382 }, { "epoch": 7.122928291829766, "grad_norm": 0.76953125, "learning_rate": 3.0233307638456407e-06, "loss": 4.0327, "step": 21383 }, { "epoch": 7.123261430832015, "grad_norm": 0.7734375, "learning_rate": 3.0226813582747375e-06, "loss": 4.0082, "step": 21384 }, { "epoch": 7.123594569834263, "grad_norm": 0.734375, "learning_rate": 3.022032004854899e-06, "loss": 4.0267, "step": 21385 }, { "epoch": 7.123927708836512, "grad_norm": 0.7578125, "learning_rate": 3.021382703593686e-06, "loss": 4.0331, "step": 21386 }, { "epoch": 7.124260847838761, "grad_norm": 0.74609375, "learning_rate": 3.020733454498666e-06, "loss": 3.952, "step": 21387 }, { "epoch": 7.12459398684101, "grad_norm": 0.7890625, "learning_rate": 3.020084257577399e-06, "loss": 4.037, "step": 21388 }, { "epoch": 7.124927125843258, "grad_norm": 0.7578125, "learning_rate": 3.0194351128374477e-06, "loss": 3.9706, "step": 21389 }, { "epoch": 7.125260264845506, "grad_norm": 0.84375, "learning_rate": 3.0187860202863695e-06, "loss": 3.9969, "step": 21390 }, { "epoch": 7.1255934038477555, "grad_norm": 0.78125, "learning_rate": 3.01813697993173e-06, "loss": 3.9519, "step": 21391 }, { "epoch": 7.125926542850004, "grad_norm": 0.75390625, "learning_rate": 3.01748799178109e-06, "loss": 3.9822, "step": 21392 }, { "epoch": 7.126259681852253, "grad_norm": 0.78515625, "learning_rate": 3.0168390558420007e-06, "loss": 3.9904, "step": 21393 }, { "epoch": 7.126592820854501, "grad_norm": 0.796875, "learning_rate": 3.016190172122028e-06, "loss": 4.0524, "step": 21394 }, { "epoch": 7.1269259598567505, "grad_norm": 0.77734375, "learning_rate": 3.0155413406287286e-06, "loss": 3.9752, "step": 21395 }, { "epoch": 7.127259098858999, "grad_norm": 0.75, "learning_rate": 3.014892561369659e-06, "loss": 4.0006, "step": 21396 }, { "epoch": 7.127592237861248, "grad_norm": 0.765625, "learning_rate": 3.0142438343523736e-06, "loss": 3.951, "step": 21397 }, { "epoch": 7.127925376863496, "grad_norm": 0.765625, "learning_rate": 3.0135951595844343e-06, "loss": 3.9686, "step": 21398 }, { "epoch": 7.128258515865745, "grad_norm": 0.73046875, "learning_rate": 3.012946537073394e-06, "loss": 3.978, "step": 21399 }, { "epoch": 7.128591654867994, "grad_norm": 0.78515625, "learning_rate": 3.012297966826807e-06, "loss": 3.9309, "step": 21400 }, { "epoch": 7.128924793870242, "grad_norm": 0.77734375, "learning_rate": 3.0116494488522285e-06, "loss": 3.9639, "step": 21401 }, { "epoch": 7.129257932872491, "grad_norm": 0.8203125, "learning_rate": 3.011000983157212e-06, "loss": 4.0289, "step": 21402 }, { "epoch": 7.1295910718747395, "grad_norm": 0.73828125, "learning_rate": 3.0103525697493114e-06, "loss": 4.0681, "step": 21403 }, { "epoch": 7.129924210876989, "grad_norm": 0.734375, "learning_rate": 3.009704208636076e-06, "loss": 4.0419, "step": 21404 }, { "epoch": 7.130257349879237, "grad_norm": 0.80859375, "learning_rate": 3.009055899825063e-06, "loss": 4.1042, "step": 21405 }, { "epoch": 7.130590488881486, "grad_norm": 0.78125, "learning_rate": 3.008407643323821e-06, "loss": 3.9533, "step": 21406 }, { "epoch": 7.1309236278837345, "grad_norm": 0.82421875, "learning_rate": 3.0077594391399016e-06, "loss": 3.9725, "step": 21407 }, { "epoch": 7.131256766885983, "grad_norm": 0.7734375, "learning_rate": 3.0071112872808528e-06, "loss": 3.9478, "step": 21408 }, { "epoch": 7.131589905888232, "grad_norm": 0.74609375, "learning_rate": 3.006463187754227e-06, "loss": 3.9928, "step": 21409 }, { "epoch": 7.13192304489048, "grad_norm": 0.8046875, "learning_rate": 3.0058151405675756e-06, "loss": 3.9821, "step": 21410 }, { "epoch": 7.132256183892729, "grad_norm": 0.76171875, "learning_rate": 3.0051671457284384e-06, "loss": 3.9754, "step": 21411 }, { "epoch": 7.132589322894978, "grad_norm": 0.75, "learning_rate": 3.0045192032443704e-06, "loss": 3.9226, "step": 21412 }, { "epoch": 7.132922461897227, "grad_norm": 0.76171875, "learning_rate": 3.003871313122916e-06, "loss": 3.8864, "step": 21413 }, { "epoch": 7.133255600899475, "grad_norm": 0.765625, "learning_rate": 3.003223475371623e-06, "loss": 3.9929, "step": 21414 }, { "epoch": 7.133588739901724, "grad_norm": 0.79296875, "learning_rate": 3.002575689998034e-06, "loss": 3.9531, "step": 21415 }, { "epoch": 7.133921878903973, "grad_norm": 0.765625, "learning_rate": 3.001927957009699e-06, "loss": 3.9332, "step": 21416 }, { "epoch": 7.134255017906221, "grad_norm": 0.7890625, "learning_rate": 3.00128027641416e-06, "loss": 3.9585, "step": 21417 }, { "epoch": 7.13458815690847, "grad_norm": 0.77734375, "learning_rate": 3.0006326482189626e-06, "loss": 4.0404, "step": 21418 }, { "epoch": 7.134921295910718, "grad_norm": 0.80859375, "learning_rate": 2.999985072431649e-06, "loss": 3.9327, "step": 21419 }, { "epoch": 7.135254434912968, "grad_norm": 0.75, "learning_rate": 2.9993375490597615e-06, "loss": 4.0483, "step": 21420 }, { "epoch": 7.135587573915216, "grad_norm": 0.828125, "learning_rate": 2.998690078110844e-06, "loss": 3.9323, "step": 21421 }, { "epoch": 7.135920712917465, "grad_norm": 0.76953125, "learning_rate": 2.9980426595924366e-06, "loss": 3.9746, "step": 21422 }, { "epoch": 7.136253851919713, "grad_norm": 0.79296875, "learning_rate": 2.997395293512079e-06, "loss": 3.985, "step": 21423 }, { "epoch": 7.136586990921963, "grad_norm": 0.71484375, "learning_rate": 2.9967479798773158e-06, "loss": 3.9881, "step": 21424 }, { "epoch": 7.136920129924211, "grad_norm": 0.74609375, "learning_rate": 2.996100718695685e-06, "loss": 3.9583, "step": 21425 }, { "epoch": 7.137253268926459, "grad_norm": 0.7890625, "learning_rate": 2.9954535099747246e-06, "loss": 3.9711, "step": 21426 }, { "epoch": 7.137586407928708, "grad_norm": 0.75390625, "learning_rate": 2.994806353721972e-06, "loss": 4.0174, "step": 21427 }, { "epoch": 7.137919546930957, "grad_norm": 0.796875, "learning_rate": 2.9941592499449728e-06, "loss": 3.975, "step": 21428 }, { "epoch": 7.138252685933206, "grad_norm": 0.77734375, "learning_rate": 2.993512198651255e-06, "loss": 4.0051, "step": 21429 }, { "epoch": 7.138585824935454, "grad_norm": 0.75, "learning_rate": 2.9928651998483575e-06, "loss": 3.9965, "step": 21430 }, { "epoch": 7.138918963937703, "grad_norm": 0.8359375, "learning_rate": 2.9922182535438197e-06, "loss": 3.9232, "step": 21431 }, { "epoch": 7.139252102939952, "grad_norm": 0.7890625, "learning_rate": 2.991571359745176e-06, "loss": 3.9418, "step": 21432 }, { "epoch": 7.139585241942201, "grad_norm": 0.7421875, "learning_rate": 2.9909245184599603e-06, "loss": 4.0266, "step": 21433 }, { "epoch": 7.139918380944449, "grad_norm": 0.72265625, "learning_rate": 2.990277729695705e-06, "loss": 3.9256, "step": 21434 }, { "epoch": 7.140251519946697, "grad_norm": 0.7578125, "learning_rate": 2.989630993459949e-06, "loss": 3.9895, "step": 21435 }, { "epoch": 7.140584658948947, "grad_norm": 0.765625, "learning_rate": 2.9889843097602216e-06, "loss": 4.007, "step": 21436 }, { "epoch": 7.140917797951195, "grad_norm": 0.73828125, "learning_rate": 2.988337678604057e-06, "loss": 3.9576, "step": 21437 }, { "epoch": 7.141250936953444, "grad_norm": 0.734375, "learning_rate": 2.987691099998986e-06, "loss": 4.0, "step": 21438 }, { "epoch": 7.141584075955692, "grad_norm": 0.78515625, "learning_rate": 2.98704457395254e-06, "loss": 4.0084, "step": 21439 }, { "epoch": 7.1419172149579415, "grad_norm": 0.796875, "learning_rate": 2.9863981004722495e-06, "loss": 3.9825, "step": 21440 }, { "epoch": 7.14225035396019, "grad_norm": 0.79296875, "learning_rate": 2.985751679565642e-06, "loss": 4.0109, "step": 21441 }, { "epoch": 7.142583492962439, "grad_norm": 0.78125, "learning_rate": 2.9851053112402528e-06, "loss": 3.9286, "step": 21442 }, { "epoch": 7.142916631964687, "grad_norm": 0.7578125, "learning_rate": 2.984458995503607e-06, "loss": 3.9646, "step": 21443 }, { "epoch": 7.143249770966936, "grad_norm": 0.74609375, "learning_rate": 2.983812732363233e-06, "loss": 4.0183, "step": 21444 }, { "epoch": 7.143582909969185, "grad_norm": 0.74609375, "learning_rate": 2.9831665218266564e-06, "loss": 3.9646, "step": 21445 }, { "epoch": 7.143916048971433, "grad_norm": 0.76171875, "learning_rate": 2.9825203639014113e-06, "loss": 3.9495, "step": 21446 }, { "epoch": 7.144249187973682, "grad_norm": 0.765625, "learning_rate": 2.9818742585950167e-06, "loss": 4.0036, "step": 21447 }, { "epoch": 7.1445823269759305, "grad_norm": 0.76953125, "learning_rate": 2.981228205914998e-06, "loss": 3.9035, "step": 21448 }, { "epoch": 7.14491546597818, "grad_norm": 0.78125, "learning_rate": 2.980582205868886e-06, "loss": 4.0475, "step": 21449 }, { "epoch": 7.145248604980428, "grad_norm": 0.75, "learning_rate": 2.979936258464202e-06, "loss": 4.0151, "step": 21450 }, { "epoch": 7.145581743982676, "grad_norm": 0.7734375, "learning_rate": 2.9792903637084705e-06, "loss": 3.936, "step": 21451 }, { "epoch": 7.1459148829849255, "grad_norm": 0.87109375, "learning_rate": 2.978644521609211e-06, "loss": 3.9131, "step": 21452 }, { "epoch": 7.146248021987174, "grad_norm": 0.75390625, "learning_rate": 2.977998732173953e-06, "loss": 4.0078, "step": 21453 }, { "epoch": 7.146581160989423, "grad_norm": 0.75, "learning_rate": 2.977352995410215e-06, "loss": 4.0542, "step": 21454 }, { "epoch": 7.146914299991671, "grad_norm": 0.76953125, "learning_rate": 2.9767073113255183e-06, "loss": 3.976, "step": 21455 }, { "epoch": 7.1472474389939205, "grad_norm": 0.796875, "learning_rate": 2.9760616799273844e-06, "loss": 3.9725, "step": 21456 }, { "epoch": 7.147580577996169, "grad_norm": 0.796875, "learning_rate": 2.9754161012233327e-06, "loss": 3.9895, "step": 21457 }, { "epoch": 7.147913716998418, "grad_norm": 0.7734375, "learning_rate": 2.9747705752208833e-06, "loss": 4.0316, "step": 21458 }, { "epoch": 7.148246856000666, "grad_norm": 0.74609375, "learning_rate": 2.974125101927552e-06, "loss": 4.001, "step": 21459 }, { "epoch": 7.148579995002915, "grad_norm": 0.796875, "learning_rate": 2.9734796813508634e-06, "loss": 3.9693, "step": 21460 }, { "epoch": 7.148913134005164, "grad_norm": 0.78125, "learning_rate": 2.972834313498332e-06, "loss": 4.0282, "step": 21461 }, { "epoch": 7.149246273007412, "grad_norm": 0.74609375, "learning_rate": 2.972188998377475e-06, "loss": 4.0225, "step": 21462 }, { "epoch": 7.149579412009661, "grad_norm": 0.796875, "learning_rate": 2.9715437359958094e-06, "loss": 3.9564, "step": 21463 }, { "epoch": 7.1499125510119095, "grad_norm": 0.7734375, "learning_rate": 2.9708985263608498e-06, "loss": 3.9928, "step": 21464 }, { "epoch": 7.150245690014159, "grad_norm": 0.7734375, "learning_rate": 2.9702533694801126e-06, "loss": 4.003, "step": 21465 }, { "epoch": 7.150578829016407, "grad_norm": 0.75, "learning_rate": 2.9696082653611123e-06, "loss": 3.9664, "step": 21466 }, { "epoch": 7.150911968018656, "grad_norm": 0.74609375, "learning_rate": 2.968963214011361e-06, "loss": 3.9485, "step": 21467 }, { "epoch": 7.151245107020904, "grad_norm": 0.76171875, "learning_rate": 2.9683182154383757e-06, "loss": 4.0143, "step": 21468 }, { "epoch": 7.151578246023153, "grad_norm": 0.79296875, "learning_rate": 2.967673269649668e-06, "loss": 3.9748, "step": 21469 }, { "epoch": 7.151911385025402, "grad_norm": 0.76171875, "learning_rate": 2.9670283766527496e-06, "loss": 3.9615, "step": 21470 }, { "epoch": 7.15224452402765, "grad_norm": 0.765625, "learning_rate": 2.966383536455129e-06, "loss": 3.9972, "step": 21471 }, { "epoch": 7.152577663029899, "grad_norm": 0.78515625, "learning_rate": 2.9657387490643267e-06, "loss": 3.9921, "step": 21472 }, { "epoch": 7.152910802032148, "grad_norm": 0.75390625, "learning_rate": 2.965094014487844e-06, "loss": 3.9897, "step": 21473 }, { "epoch": 7.153243941034397, "grad_norm": 0.72265625, "learning_rate": 2.96444933273319e-06, "loss": 3.9957, "step": 21474 }, { "epoch": 7.153577080036645, "grad_norm": 0.74609375, "learning_rate": 2.963804703807881e-06, "loss": 3.953, "step": 21475 }, { "epoch": 7.153910219038894, "grad_norm": 0.734375, "learning_rate": 2.963160127719421e-06, "loss": 3.9556, "step": 21476 }, { "epoch": 7.154243358041143, "grad_norm": 0.7421875, "learning_rate": 2.9625156044753188e-06, "loss": 3.9785, "step": 21477 }, { "epoch": 7.154576497043391, "grad_norm": 0.78125, "learning_rate": 2.9618711340830796e-06, "loss": 3.9396, "step": 21478 }, { "epoch": 7.15490963604564, "grad_norm": 0.78515625, "learning_rate": 2.961226716550214e-06, "loss": 4.004, "step": 21479 }, { "epoch": 7.155242775047888, "grad_norm": 0.81640625, "learning_rate": 2.9605823518842263e-06, "loss": 3.9328, "step": 21480 }, { "epoch": 7.155575914050138, "grad_norm": 0.79296875, "learning_rate": 2.9599380400926224e-06, "loss": 3.9481, "step": 21481 }, { "epoch": 7.155909053052386, "grad_norm": 0.8203125, "learning_rate": 2.959293781182906e-06, "loss": 3.9257, "step": 21482 }, { "epoch": 7.156242192054635, "grad_norm": 0.78125, "learning_rate": 2.9586495751625823e-06, "loss": 3.9313, "step": 21483 }, { "epoch": 7.156575331056883, "grad_norm": 0.79296875, "learning_rate": 2.9580054220391544e-06, "loss": 3.9371, "step": 21484 }, { "epoch": 7.1569084700591326, "grad_norm": 0.78125, "learning_rate": 2.9573613218201227e-06, "loss": 4.0202, "step": 21485 }, { "epoch": 7.157241609061381, "grad_norm": 0.76171875, "learning_rate": 2.9567172745129947e-06, "loss": 4.0298, "step": 21486 }, { "epoch": 7.157574748063629, "grad_norm": 0.765625, "learning_rate": 2.9560732801252703e-06, "loss": 3.9888, "step": 21487 }, { "epoch": 7.157907887065878, "grad_norm": 0.79296875, "learning_rate": 2.95542933866445e-06, "loss": 3.9951, "step": 21488 }, { "epoch": 7.158241026068127, "grad_norm": 0.75, "learning_rate": 2.9547854501380312e-06, "loss": 4.0069, "step": 21489 }, { "epoch": 7.158574165070376, "grad_norm": 0.75390625, "learning_rate": 2.9541416145535223e-06, "loss": 3.9185, "step": 21490 }, { "epoch": 7.158907304072624, "grad_norm": 0.75390625, "learning_rate": 2.9534978319184146e-06, "loss": 4.027, "step": 21491 }, { "epoch": 7.159240443074873, "grad_norm": 0.76171875, "learning_rate": 2.952854102240207e-06, "loss": 4.0573, "step": 21492 }, { "epoch": 7.159573582077122, "grad_norm": 0.7890625, "learning_rate": 2.9522104255264033e-06, "loss": 3.9261, "step": 21493 }, { "epoch": 7.159906721079371, "grad_norm": 0.765625, "learning_rate": 2.951566801784496e-06, "loss": 3.9464, "step": 21494 }, { "epoch": 7.160239860081619, "grad_norm": 0.7734375, "learning_rate": 2.9509232310219852e-06, "loss": 3.9694, "step": 21495 }, { "epoch": 7.160572999083867, "grad_norm": 0.73046875, "learning_rate": 2.9502797132463624e-06, "loss": 3.9805, "step": 21496 }, { "epoch": 7.1609061380861165, "grad_norm": 0.75, "learning_rate": 2.9496362484651285e-06, "loss": 4.0622, "step": 21497 }, { "epoch": 7.161239277088365, "grad_norm": 0.74609375, "learning_rate": 2.9489928366857764e-06, "loss": 3.8957, "step": 21498 }, { "epoch": 7.161572416090614, "grad_norm": 0.7265625, "learning_rate": 2.9483494779158006e-06, "loss": 3.999, "step": 21499 }, { "epoch": 7.161905555092862, "grad_norm": 0.765625, "learning_rate": 2.947706172162694e-06, "loss": 3.9904, "step": 21500 }, { "epoch": 7.1622386940951115, "grad_norm": 0.7578125, "learning_rate": 2.9470629194339506e-06, "loss": 3.9428, "step": 21501 }, { "epoch": 7.16257183309736, "grad_norm": 0.71875, "learning_rate": 2.946419719737062e-06, "loss": 4.0587, "step": 21502 }, { "epoch": 7.162904972099609, "grad_norm": 0.8203125, "learning_rate": 2.945776573079519e-06, "loss": 4.0395, "step": 21503 }, { "epoch": 7.163238111101857, "grad_norm": 0.75, "learning_rate": 2.9451334794688166e-06, "loss": 4.0015, "step": 21504 }, { "epoch": 7.163571250104106, "grad_norm": 0.78125, "learning_rate": 2.944490438912443e-06, "loss": 4.0289, "step": 21505 }, { "epoch": 7.163904389106355, "grad_norm": 0.7734375, "learning_rate": 2.943847451417889e-06, "loss": 4.0008, "step": 21506 }, { "epoch": 7.164237528108603, "grad_norm": 0.765625, "learning_rate": 2.943204516992641e-06, "loss": 3.9425, "step": 21507 }, { "epoch": 7.164570667110852, "grad_norm": 0.76171875, "learning_rate": 2.942561635644195e-06, "loss": 4.0117, "step": 21508 }, { "epoch": 7.1649038061131005, "grad_norm": 0.78515625, "learning_rate": 2.9419188073800326e-06, "loss": 3.9581, "step": 21509 }, { "epoch": 7.16523694511535, "grad_norm": 0.7578125, "learning_rate": 2.941276032207643e-06, "loss": 4.074, "step": 21510 }, { "epoch": 7.165570084117598, "grad_norm": 0.75390625, "learning_rate": 2.9406333101345116e-06, "loss": 4.0117, "step": 21511 }, { "epoch": 7.165903223119847, "grad_norm": 0.75390625, "learning_rate": 2.939990641168128e-06, "loss": 3.9787, "step": 21512 }, { "epoch": 7.1662363621220955, "grad_norm": 0.78125, "learning_rate": 2.939348025315977e-06, "loss": 3.9637, "step": 21513 }, { "epoch": 7.166569501124344, "grad_norm": 0.7421875, "learning_rate": 2.938705462585543e-06, "loss": 4.0282, "step": 21514 }, { "epoch": 7.166902640126593, "grad_norm": 0.73828125, "learning_rate": 2.9380629529843083e-06, "loss": 3.9583, "step": 21515 }, { "epoch": 7.167235779128841, "grad_norm": 0.78125, "learning_rate": 2.9374204965197613e-06, "loss": 3.927, "step": 21516 }, { "epoch": 7.16756891813109, "grad_norm": 0.79296875, "learning_rate": 2.9367780931993857e-06, "loss": 3.9134, "step": 21517 }, { "epoch": 7.167902057133339, "grad_norm": 0.76953125, "learning_rate": 2.9361357430306566e-06, "loss": 3.9565, "step": 21518 }, { "epoch": 7.168235196135588, "grad_norm": 0.7578125, "learning_rate": 2.935493446021063e-06, "loss": 3.9847, "step": 21519 }, { "epoch": 7.168568335137836, "grad_norm": 0.796875, "learning_rate": 2.9348512021780843e-06, "loss": 3.9271, "step": 21520 }, { "epoch": 7.168901474140085, "grad_norm": 0.83203125, "learning_rate": 2.9342090115092e-06, "loss": 4.025, "step": 21521 }, { "epoch": 7.169234613142334, "grad_norm": 0.734375, "learning_rate": 2.9335668740218904e-06, "loss": 3.9453, "step": 21522 }, { "epoch": 7.169567752144582, "grad_norm": 0.80859375, "learning_rate": 2.9329247897236367e-06, "loss": 4.0501, "step": 21523 }, { "epoch": 7.169900891146831, "grad_norm": 0.77734375, "learning_rate": 2.9322827586219176e-06, "loss": 4.0168, "step": 21524 }, { "epoch": 7.1702340301490795, "grad_norm": 0.78125, "learning_rate": 2.931640780724211e-06, "loss": 3.9733, "step": 21525 }, { "epoch": 7.170567169151329, "grad_norm": 0.8046875, "learning_rate": 2.9309988560379946e-06, "loss": 3.983, "step": 21526 }, { "epoch": 7.170900308153577, "grad_norm": 0.76171875, "learning_rate": 2.9303569845707445e-06, "loss": 4.0124, "step": 21527 }, { "epoch": 7.171233447155826, "grad_norm": 0.74609375, "learning_rate": 2.929715166329937e-06, "loss": 4.0099, "step": 21528 }, { "epoch": 7.171566586158074, "grad_norm": 0.74609375, "learning_rate": 2.929073401323047e-06, "loss": 3.8911, "step": 21529 }, { "epoch": 7.171899725160323, "grad_norm": 0.76953125, "learning_rate": 2.9284316895575538e-06, "loss": 3.9258, "step": 21530 }, { "epoch": 7.172232864162572, "grad_norm": 0.7734375, "learning_rate": 2.9277900310409296e-06, "loss": 4.0547, "step": 21531 }, { "epoch": 7.17256600316482, "grad_norm": 0.74609375, "learning_rate": 2.927148425780647e-06, "loss": 4.0065, "step": 21532 }, { "epoch": 7.172899142167069, "grad_norm": 0.75390625, "learning_rate": 2.9265068737841786e-06, "loss": 3.9291, "step": 21533 }, { "epoch": 7.173232281169318, "grad_norm": 0.73046875, "learning_rate": 2.925865375059002e-06, "loss": 3.976, "step": 21534 }, { "epoch": 7.173565420171567, "grad_norm": 0.76953125, "learning_rate": 2.925223929612588e-06, "loss": 4.0254, "step": 21535 }, { "epoch": 7.173898559173815, "grad_norm": 0.73828125, "learning_rate": 2.924582537452401e-06, "loss": 4.0241, "step": 21536 }, { "epoch": 7.174231698176064, "grad_norm": 0.765625, "learning_rate": 2.923941198585921e-06, "loss": 4.0021, "step": 21537 }, { "epoch": 7.174564837178313, "grad_norm": 0.7578125, "learning_rate": 2.923299913020614e-06, "loss": 3.9715, "step": 21538 }, { "epoch": 7.174897976180561, "grad_norm": 0.7734375, "learning_rate": 2.9226586807639484e-06, "loss": 3.9563, "step": 21539 }, { "epoch": 7.17523111518281, "grad_norm": 0.7734375, "learning_rate": 2.9220175018233935e-06, "loss": 3.9707, "step": 21540 }, { "epoch": 7.175564254185058, "grad_norm": 0.76953125, "learning_rate": 2.921376376206421e-06, "loss": 3.9879, "step": 21541 }, { "epoch": 7.175897393187308, "grad_norm": 0.765625, "learning_rate": 2.920735303920496e-06, "loss": 3.9663, "step": 21542 }, { "epoch": 7.176230532189556, "grad_norm": 0.796875, "learning_rate": 2.9200942849730865e-06, "loss": 3.9308, "step": 21543 }, { "epoch": 7.176563671191805, "grad_norm": 0.796875, "learning_rate": 2.919453319371659e-06, "loss": 3.8983, "step": 21544 }, { "epoch": 7.176896810194053, "grad_norm": 0.7734375, "learning_rate": 2.918812407123678e-06, "loss": 3.9779, "step": 21545 }, { "epoch": 7.1772299491963025, "grad_norm": 0.74609375, "learning_rate": 2.9181715482366093e-06, "loss": 3.9487, "step": 21546 }, { "epoch": 7.177563088198551, "grad_norm": 0.8203125, "learning_rate": 2.9175307427179167e-06, "loss": 3.9994, "step": 21547 }, { "epoch": 7.177896227200799, "grad_norm": 0.80859375, "learning_rate": 2.9168899905750666e-06, "loss": 3.9649, "step": 21548 }, { "epoch": 7.178229366203048, "grad_norm": 0.7734375, "learning_rate": 2.916249291815522e-06, "loss": 4.0312, "step": 21549 }, { "epoch": 7.178562505205297, "grad_norm": 0.77734375, "learning_rate": 2.9156086464467437e-06, "loss": 3.9102, "step": 21550 }, { "epoch": 7.178895644207546, "grad_norm": 0.77734375, "learning_rate": 2.9149680544761934e-06, "loss": 3.9744, "step": 21551 }, { "epoch": 7.179228783209794, "grad_norm": 0.75, "learning_rate": 2.914327515911336e-06, "loss": 4.0427, "step": 21552 }, { "epoch": 7.179561922212043, "grad_norm": 0.8046875, "learning_rate": 2.9136870307596335e-06, "loss": 3.9286, "step": 21553 }, { "epoch": 7.1798950612142916, "grad_norm": 0.7890625, "learning_rate": 2.913046599028541e-06, "loss": 4.0126, "step": 21554 }, { "epoch": 7.180228200216541, "grad_norm": 0.74609375, "learning_rate": 2.9124062207255163e-06, "loss": 3.9474, "step": 21555 }, { "epoch": 7.180561339218789, "grad_norm": 0.77734375, "learning_rate": 2.9117658958580264e-06, "loss": 3.9534, "step": 21556 }, { "epoch": 7.180894478221037, "grad_norm": 0.75390625, "learning_rate": 2.9111256244335248e-06, "loss": 4.0499, "step": 21557 }, { "epoch": 7.1812276172232865, "grad_norm": 0.7734375, "learning_rate": 2.9104854064594706e-06, "loss": 4.014, "step": 21558 }, { "epoch": 7.181560756225535, "grad_norm": 0.75390625, "learning_rate": 2.909845241943318e-06, "loss": 3.982, "step": 21559 }, { "epoch": 7.181893895227784, "grad_norm": 0.7421875, "learning_rate": 2.909205130892528e-06, "loss": 3.9616, "step": 21560 }, { "epoch": 7.182227034230032, "grad_norm": 0.77734375, "learning_rate": 2.9085650733145557e-06, "loss": 3.9637, "step": 21561 }, { "epoch": 7.1825601732322815, "grad_norm": 0.75, "learning_rate": 2.9079250692168548e-06, "loss": 3.95, "step": 21562 }, { "epoch": 7.18289331223453, "grad_norm": 0.80859375, "learning_rate": 2.9072851186068806e-06, "loss": 3.9591, "step": 21563 }, { "epoch": 7.183226451236779, "grad_norm": 0.73046875, "learning_rate": 2.906645221492086e-06, "loss": 3.9367, "step": 21564 }, { "epoch": 7.183559590239027, "grad_norm": 0.7890625, "learning_rate": 2.9060053778799263e-06, "loss": 3.9711, "step": 21565 }, { "epoch": 7.1838927292412755, "grad_norm": 0.765625, "learning_rate": 2.9053655877778506e-06, "loss": 3.964, "step": 21566 }, { "epoch": 7.184225868243525, "grad_norm": 0.74609375, "learning_rate": 2.9047258511933164e-06, "loss": 3.9656, "step": 21567 }, { "epoch": 7.184559007245773, "grad_norm": 0.7421875, "learning_rate": 2.9040861681337724e-06, "loss": 3.9744, "step": 21568 }, { "epoch": 7.184892146248022, "grad_norm": 0.76953125, "learning_rate": 2.90344653860667e-06, "loss": 3.9563, "step": 21569 }, { "epoch": 7.1852252852502705, "grad_norm": 0.74609375, "learning_rate": 2.9028069626194572e-06, "loss": 3.9869, "step": 21570 }, { "epoch": 7.18555842425252, "grad_norm": 0.78125, "learning_rate": 2.902167440179591e-06, "loss": 3.9524, "step": 21571 }, { "epoch": 7.185891563254768, "grad_norm": 0.75390625, "learning_rate": 2.9015279712945117e-06, "loss": 4.0041, "step": 21572 }, { "epoch": 7.186224702257017, "grad_norm": 0.78125, "learning_rate": 2.900888555971669e-06, "loss": 4.0613, "step": 21573 }, { "epoch": 7.1865578412592654, "grad_norm": 0.765625, "learning_rate": 2.9002491942185156e-06, "loss": 3.9941, "step": 21574 }, { "epoch": 7.186890980261514, "grad_norm": 0.765625, "learning_rate": 2.8996098860424967e-06, "loss": 4.0051, "step": 21575 }, { "epoch": 7.187224119263763, "grad_norm": 0.7734375, "learning_rate": 2.8989706314510577e-06, "loss": 4.0618, "step": 21576 }, { "epoch": 7.187557258266011, "grad_norm": 0.83203125, "learning_rate": 2.8983314304516423e-06, "loss": 4.0153, "step": 21577 }, { "epoch": 7.18789039726826, "grad_norm": 0.77734375, "learning_rate": 2.8976922830517006e-06, "loss": 4.0042, "step": 21578 }, { "epoch": 7.188223536270509, "grad_norm": 0.796875, "learning_rate": 2.8970531892586768e-06, "loss": 3.9896, "step": 21579 }, { "epoch": 7.188556675272758, "grad_norm": 0.79296875, "learning_rate": 2.896414149080012e-06, "loss": 3.927, "step": 21580 }, { "epoch": 7.188889814275006, "grad_norm": 0.81640625, "learning_rate": 2.895775162523151e-06, "loss": 3.9567, "step": 21581 }, { "epoch": 7.189222953277255, "grad_norm": 0.78125, "learning_rate": 2.895136229595538e-06, "loss": 3.9311, "step": 21582 }, { "epoch": 7.189556092279504, "grad_norm": 0.7421875, "learning_rate": 2.894497350304612e-06, "loss": 3.9618, "step": 21583 }, { "epoch": 7.189889231281752, "grad_norm": 0.7421875, "learning_rate": 2.893858524657815e-06, "loss": 3.9536, "step": 21584 }, { "epoch": 7.190222370284001, "grad_norm": 0.6953125, "learning_rate": 2.8932197526625913e-06, "loss": 4.0136, "step": 21585 }, { "epoch": 7.190555509286249, "grad_norm": 0.7734375, "learning_rate": 2.8925810343263797e-06, "loss": 4.0062, "step": 21586 }, { "epoch": 7.190888648288499, "grad_norm": 0.76953125, "learning_rate": 2.8919423696566187e-06, "loss": 4.0129, "step": 21587 }, { "epoch": 7.191221787290747, "grad_norm": 0.7109375, "learning_rate": 2.891303758660746e-06, "loss": 4.0111, "step": 21588 }, { "epoch": 7.191554926292996, "grad_norm": 0.75, "learning_rate": 2.8906652013462078e-06, "loss": 3.9638, "step": 21589 }, { "epoch": 7.191888065295244, "grad_norm": 0.80859375, "learning_rate": 2.8900266977204325e-06, "loss": 3.946, "step": 21590 }, { "epoch": 7.192221204297493, "grad_norm": 0.73828125, "learning_rate": 2.889388247790859e-06, "loss": 4.0122, "step": 21591 }, { "epoch": 7.192554343299742, "grad_norm": 0.8046875, "learning_rate": 2.8887498515649283e-06, "loss": 3.9328, "step": 21592 }, { "epoch": 7.19288748230199, "grad_norm": 0.76171875, "learning_rate": 2.8881115090500737e-06, "loss": 4.0141, "step": 21593 }, { "epoch": 7.193220621304239, "grad_norm": 0.76953125, "learning_rate": 2.8874732202537314e-06, "loss": 4.0037, "step": 21594 }, { "epoch": 7.193553760306488, "grad_norm": 0.796875, "learning_rate": 2.8868349851833324e-06, "loss": 3.9352, "step": 21595 }, { "epoch": 7.193886899308737, "grad_norm": 0.78515625, "learning_rate": 2.886196803846316e-06, "loss": 4.0332, "step": 21596 }, { "epoch": 7.194220038310985, "grad_norm": 0.73046875, "learning_rate": 2.885558676250113e-06, "loss": 3.9552, "step": 21597 }, { "epoch": 7.194553177313234, "grad_norm": 0.82421875, "learning_rate": 2.88492060240216e-06, "loss": 3.9302, "step": 21598 }, { "epoch": 7.194886316315483, "grad_norm": 0.81640625, "learning_rate": 2.8842825823098804e-06, "loss": 3.9484, "step": 21599 }, { "epoch": 7.195219455317732, "grad_norm": 0.78125, "learning_rate": 2.8836446159807138e-06, "loss": 3.9647, "step": 21600 }, { "epoch": 7.19555259431998, "grad_norm": 0.77734375, "learning_rate": 2.883006703422088e-06, "loss": 3.9849, "step": 21601 }, { "epoch": 7.195885733322228, "grad_norm": 0.71484375, "learning_rate": 2.8823688446414337e-06, "loss": 3.9632, "step": 21602 }, { "epoch": 7.1962188723244775, "grad_norm": 0.828125, "learning_rate": 2.881731039646178e-06, "loss": 3.9892, "step": 21603 }, { "epoch": 7.196552011326726, "grad_norm": 0.76171875, "learning_rate": 2.881093288443756e-06, "loss": 3.9454, "step": 21604 }, { "epoch": 7.196885150328975, "grad_norm": 0.8046875, "learning_rate": 2.880455591041591e-06, "loss": 3.9291, "step": 21605 }, { "epoch": 7.197218289331223, "grad_norm": 0.76953125, "learning_rate": 2.879817947447114e-06, "loss": 4.0488, "step": 21606 }, { "epoch": 7.1975514283334725, "grad_norm": 0.78515625, "learning_rate": 2.87918035766775e-06, "loss": 3.9629, "step": 21607 }, { "epoch": 7.197884567335721, "grad_norm": 0.84375, "learning_rate": 2.8785428217109256e-06, "loss": 3.9688, "step": 21608 }, { "epoch": 7.198217706337969, "grad_norm": 0.765625, "learning_rate": 2.8779053395840678e-06, "loss": 4.0411, "step": 21609 }, { "epoch": 7.198550845340218, "grad_norm": 0.79296875, "learning_rate": 2.8772679112945986e-06, "loss": 4.0132, "step": 21610 }, { "epoch": 7.198883984342467, "grad_norm": 0.734375, "learning_rate": 2.8766305368499492e-06, "loss": 4.0307, "step": 21611 }, { "epoch": 7.199217123344716, "grad_norm": 0.72265625, "learning_rate": 2.8759932162575387e-06, "loss": 4.0487, "step": 21612 }, { "epoch": 7.199550262346964, "grad_norm": 0.73828125, "learning_rate": 2.8753559495247918e-06, "loss": 3.9668, "step": 21613 }, { "epoch": 7.199883401349213, "grad_norm": 0.75, "learning_rate": 2.874718736659128e-06, "loss": 4.0584, "step": 21614 }, { "epoch": 7.2002165403514615, "grad_norm": 0.80078125, "learning_rate": 2.874081577667976e-06, "loss": 4.0454, "step": 21615 }, { "epoch": 7.200549679353711, "grad_norm": 0.75, "learning_rate": 2.8734444725587557e-06, "loss": 3.9705, "step": 21616 }, { "epoch": 7.200882818355959, "grad_norm": 0.765625, "learning_rate": 2.8728074213388813e-06, "loss": 3.9843, "step": 21617 }, { "epoch": 7.201215957358207, "grad_norm": 0.78125, "learning_rate": 2.8721704240157816e-06, "loss": 4.0352, "step": 21618 }, { "epoch": 7.2015490963604565, "grad_norm": 0.76171875, "learning_rate": 2.871533480596871e-06, "loss": 3.9887, "step": 21619 }, { "epoch": 7.201882235362705, "grad_norm": 0.73828125, "learning_rate": 2.8708965910895715e-06, "loss": 3.9956, "step": 21620 }, { "epoch": 7.202215374364954, "grad_norm": 0.76171875, "learning_rate": 2.8702597555012962e-06, "loss": 3.9176, "step": 21621 }, { "epoch": 7.202548513367202, "grad_norm": 0.81640625, "learning_rate": 2.8696229738394705e-06, "loss": 4.0177, "step": 21622 }, { "epoch": 7.202881652369451, "grad_norm": 0.7578125, "learning_rate": 2.8689862461115075e-06, "loss": 3.9466, "step": 21623 }, { "epoch": 7.2032147913717, "grad_norm": 0.80078125, "learning_rate": 2.8683495723248235e-06, "loss": 3.9477, "step": 21624 }, { "epoch": 7.203547930373949, "grad_norm": 0.765625, "learning_rate": 2.8677129524868352e-06, "loss": 4.0152, "step": 21625 }, { "epoch": 7.203881069376197, "grad_norm": 0.7421875, "learning_rate": 2.867076386604957e-06, "loss": 3.9483, "step": 21626 }, { "epoch": 7.2042142083784455, "grad_norm": 0.828125, "learning_rate": 2.8664398746866043e-06, "loss": 4.0179, "step": 21627 }, { "epoch": 7.204547347380695, "grad_norm": 0.76953125, "learning_rate": 2.8658034167391886e-06, "loss": 4.0148, "step": 21628 }, { "epoch": 7.204880486382943, "grad_norm": 0.76953125, "learning_rate": 2.865167012770127e-06, "loss": 3.987, "step": 21629 }, { "epoch": 7.205213625385192, "grad_norm": 0.80078125, "learning_rate": 2.8645306627868313e-06, "loss": 3.9767, "step": 21630 }, { "epoch": 7.2055467643874405, "grad_norm": 0.76953125, "learning_rate": 2.863894366796713e-06, "loss": 3.9164, "step": 21631 }, { "epoch": 7.20587990338969, "grad_norm": 0.703125, "learning_rate": 2.8632581248071816e-06, "loss": 4.0123, "step": 21632 }, { "epoch": 7.206213042391938, "grad_norm": 0.7890625, "learning_rate": 2.862621936825651e-06, "loss": 3.9783, "step": 21633 }, { "epoch": 7.206546181394187, "grad_norm": 0.77734375, "learning_rate": 2.8619858028595346e-06, "loss": 3.9802, "step": 21634 }, { "epoch": 7.206879320396435, "grad_norm": 0.71875, "learning_rate": 2.861349722916231e-06, "loss": 4.0086, "step": 21635 }, { "epoch": 7.207212459398684, "grad_norm": 0.765625, "learning_rate": 2.8607136970031594e-06, "loss": 3.9683, "step": 21636 }, { "epoch": 7.207545598400933, "grad_norm": 0.78125, "learning_rate": 2.8600777251277236e-06, "loss": 4.0501, "step": 21637 }, { "epoch": 7.207878737403181, "grad_norm": 0.78515625, "learning_rate": 2.8594418072973334e-06, "loss": 3.9713, "step": 21638 }, { "epoch": 7.20821187640543, "grad_norm": 0.74609375, "learning_rate": 2.858805943519391e-06, "loss": 4.0136, "step": 21639 }, { "epoch": 7.208545015407679, "grad_norm": 0.76953125, "learning_rate": 2.8581701338013105e-06, "loss": 3.9869, "step": 21640 }, { "epoch": 7.208878154409928, "grad_norm": 0.7265625, "learning_rate": 2.8575343781504925e-06, "loss": 4.0521, "step": 21641 }, { "epoch": 7.209211293412176, "grad_norm": 0.74609375, "learning_rate": 2.856898676574344e-06, "loss": 4.0318, "step": 21642 }, { "epoch": 7.209544432414425, "grad_norm": 0.7578125, "learning_rate": 2.856263029080269e-06, "loss": 3.9781, "step": 21643 }, { "epoch": 7.209877571416674, "grad_norm": 0.7578125, "learning_rate": 2.8556274356756713e-06, "loss": 3.9543, "step": 21644 }, { "epoch": 7.210210710418922, "grad_norm": 0.81640625, "learning_rate": 2.854991896367954e-06, "loss": 3.9992, "step": 21645 }, { "epoch": 7.210543849421171, "grad_norm": 0.83203125, "learning_rate": 2.8543564111645206e-06, "loss": 3.8799, "step": 21646 }, { "epoch": 7.210876988423419, "grad_norm": 0.734375, "learning_rate": 2.853720980072769e-06, "loss": 4.056, "step": 21647 }, { "epoch": 7.211210127425669, "grad_norm": 0.75390625, "learning_rate": 2.8530856031001072e-06, "loss": 3.9997, "step": 21648 }, { "epoch": 7.211543266427917, "grad_norm": 0.78515625, "learning_rate": 2.8524502802539336e-06, "loss": 3.9727, "step": 21649 }, { "epoch": 7.211876405430166, "grad_norm": 0.73828125, "learning_rate": 2.8518150115416474e-06, "loss": 4.0187, "step": 21650 }, { "epoch": 7.212209544432414, "grad_norm": 0.73828125, "learning_rate": 2.851179796970645e-06, "loss": 3.983, "step": 21651 }, { "epoch": 7.2125426834346635, "grad_norm": 0.76171875, "learning_rate": 2.850544636548335e-06, "loss": 4.0139, "step": 21652 }, { "epoch": 7.212875822436912, "grad_norm": 0.78125, "learning_rate": 2.8499095302821052e-06, "loss": 3.9834, "step": 21653 }, { "epoch": 7.21320896143916, "grad_norm": 0.75, "learning_rate": 2.849274478179356e-06, "loss": 3.9604, "step": 21654 }, { "epoch": 7.213542100441409, "grad_norm": 0.7578125, "learning_rate": 2.848639480247488e-06, "loss": 4.036, "step": 21655 }, { "epoch": 7.213875239443658, "grad_norm": 0.70703125, "learning_rate": 2.848004536493895e-06, "loss": 4.0212, "step": 21656 }, { "epoch": 7.214208378445907, "grad_norm": 0.765625, "learning_rate": 2.847369646925973e-06, "loss": 3.9116, "step": 21657 }, { "epoch": 7.214541517448155, "grad_norm": 0.7734375, "learning_rate": 2.8467348115511143e-06, "loss": 3.9214, "step": 21658 }, { "epoch": 7.214874656450404, "grad_norm": 0.79296875, "learning_rate": 2.846100030376719e-06, "loss": 4.0025, "step": 21659 }, { "epoch": 7.215207795452653, "grad_norm": 0.80859375, "learning_rate": 2.8454653034101775e-06, "loss": 4.055, "step": 21660 }, { "epoch": 7.215540934454902, "grad_norm": 0.8046875, "learning_rate": 2.844830630658884e-06, "loss": 4.0184, "step": 21661 }, { "epoch": 7.21587407345715, "grad_norm": 0.765625, "learning_rate": 2.84419601213023e-06, "loss": 4.0008, "step": 21662 }, { "epoch": 7.216207212459398, "grad_norm": 0.75, "learning_rate": 2.8435614478316087e-06, "loss": 3.9374, "step": 21663 }, { "epoch": 7.2165403514616475, "grad_norm": 0.7578125, "learning_rate": 2.8429269377704105e-06, "loss": 4.0044, "step": 21664 }, { "epoch": 7.216873490463896, "grad_norm": 0.7890625, "learning_rate": 2.842292481954024e-06, "loss": 3.9721, "step": 21665 }, { "epoch": 7.217206629466145, "grad_norm": 0.77734375, "learning_rate": 2.8416580803898433e-06, "loss": 3.9895, "step": 21666 }, { "epoch": 7.217539768468393, "grad_norm": 0.72265625, "learning_rate": 2.8410237330852564e-06, "loss": 3.922, "step": 21667 }, { "epoch": 7.2178729074706425, "grad_norm": 0.74609375, "learning_rate": 2.840389440047652e-06, "loss": 3.9151, "step": 21668 }, { "epoch": 7.218206046472891, "grad_norm": 0.78515625, "learning_rate": 2.8397552012844145e-06, "loss": 3.9884, "step": 21669 }, { "epoch": 7.218539185475139, "grad_norm": 0.75, "learning_rate": 2.8391210168029407e-06, "loss": 3.967, "step": 21670 }, { "epoch": 7.218872324477388, "grad_norm": 0.77734375, "learning_rate": 2.8384868866106088e-06, "loss": 4.0217, "step": 21671 }, { "epoch": 7.2192054634796365, "grad_norm": 0.73828125, "learning_rate": 2.8378528107148057e-06, "loss": 4.074, "step": 21672 }, { "epoch": 7.219538602481886, "grad_norm": 0.7734375, "learning_rate": 2.8372187891229214e-06, "loss": 3.9024, "step": 21673 }, { "epoch": 7.219871741484134, "grad_norm": 0.78125, "learning_rate": 2.8365848218423382e-06, "loss": 4.0326, "step": 21674 }, { "epoch": 7.220204880486383, "grad_norm": 0.77734375, "learning_rate": 2.8359509088804415e-06, "loss": 3.9313, "step": 21675 }, { "epoch": 7.2205380194886315, "grad_norm": 0.7890625, "learning_rate": 2.8353170502446117e-06, "loss": 3.9844, "step": 21676 }, { "epoch": 7.220871158490881, "grad_norm": 0.76171875, "learning_rate": 2.834683245942237e-06, "loss": 3.9941, "step": 21677 }, { "epoch": 7.221204297493129, "grad_norm": 0.80078125, "learning_rate": 2.8340494959806977e-06, "loss": 4.0346, "step": 21678 }, { "epoch": 7.221537436495377, "grad_norm": 0.77734375, "learning_rate": 2.833415800367375e-06, "loss": 4.0096, "step": 21679 }, { "epoch": 7.2218705754976265, "grad_norm": 0.78125, "learning_rate": 2.8327821591096505e-06, "loss": 3.9927, "step": 21680 }, { "epoch": 7.222203714499875, "grad_norm": 0.8046875, "learning_rate": 2.8321485722149045e-06, "loss": 3.9465, "step": 21681 }, { "epoch": 7.222536853502124, "grad_norm": 0.8125, "learning_rate": 2.8315150396905177e-06, "loss": 4.0005, "step": 21682 }, { "epoch": 7.222869992504372, "grad_norm": 0.7734375, "learning_rate": 2.8308815615438657e-06, "loss": 3.9633, "step": 21683 }, { "epoch": 7.223203131506621, "grad_norm": 0.76171875, "learning_rate": 2.8302481377823332e-06, "loss": 3.9537, "step": 21684 }, { "epoch": 7.22353627050887, "grad_norm": 0.765625, "learning_rate": 2.8296147684132947e-06, "loss": 3.9536, "step": 21685 }, { "epoch": 7.223869409511119, "grad_norm": 0.75390625, "learning_rate": 2.828981453444128e-06, "loss": 4.0032, "step": 21686 }, { "epoch": 7.224202548513367, "grad_norm": 0.71875, "learning_rate": 2.8283481928822077e-06, "loss": 4.0233, "step": 21687 }, { "epoch": 7.2245356875156155, "grad_norm": 0.78515625, "learning_rate": 2.827714986734917e-06, "loss": 3.9825, "step": 21688 }, { "epoch": 7.224868826517865, "grad_norm": 0.78515625, "learning_rate": 2.8270818350096247e-06, "loss": 4.0069, "step": 21689 }, { "epoch": 7.225201965520113, "grad_norm": 0.76171875, "learning_rate": 2.826448737713707e-06, "loss": 4.0028, "step": 21690 }, { "epoch": 7.225535104522362, "grad_norm": 0.74609375, "learning_rate": 2.8258156948545356e-06, "loss": 4.0704, "step": 21691 }, { "epoch": 7.22586824352461, "grad_norm": 0.765625, "learning_rate": 2.8251827064394905e-06, "loss": 3.9668, "step": 21692 }, { "epoch": 7.22620138252686, "grad_norm": 0.765625, "learning_rate": 2.824549772475941e-06, "loss": 3.9976, "step": 21693 }, { "epoch": 7.226534521529108, "grad_norm": 0.73046875, "learning_rate": 2.82391689297126e-06, "loss": 3.9639, "step": 21694 }, { "epoch": 7.226867660531357, "grad_norm": 0.76171875, "learning_rate": 2.8232840679328154e-06, "loss": 4.0451, "step": 21695 }, { "epoch": 7.227200799533605, "grad_norm": 0.82421875, "learning_rate": 2.8226512973679845e-06, "loss": 3.8973, "step": 21696 }, { "epoch": 7.227533938535854, "grad_norm": 0.77734375, "learning_rate": 2.822018581284138e-06, "loss": 3.9681, "step": 21697 }, { "epoch": 7.227867077538103, "grad_norm": 0.75, "learning_rate": 2.8213859196886366e-06, "loss": 3.9424, "step": 21698 }, { "epoch": 7.228200216540351, "grad_norm": 0.72265625, "learning_rate": 2.8207533125888584e-06, "loss": 3.9129, "step": 21699 }, { "epoch": 7.2285333555426, "grad_norm": 0.73046875, "learning_rate": 2.8201207599921694e-06, "loss": 4.0052, "step": 21700 }, { "epoch": 7.228866494544849, "grad_norm": 0.8046875, "learning_rate": 2.819488261905936e-06, "loss": 3.9839, "step": 21701 }, { "epoch": 7.229199633547098, "grad_norm": 0.8125, "learning_rate": 2.8188558183375243e-06, "loss": 4.0923, "step": 21702 }, { "epoch": 7.229532772549346, "grad_norm": 0.73046875, "learning_rate": 2.8182234292943056e-06, "loss": 3.9598, "step": 21703 }, { "epoch": 7.229865911551595, "grad_norm": 0.80859375, "learning_rate": 2.8175910947836427e-06, "loss": 4.0381, "step": 21704 }, { "epoch": 7.230199050553844, "grad_norm": 0.7578125, "learning_rate": 2.8169588148129015e-06, "loss": 4.0088, "step": 21705 }, { "epoch": 7.230532189556092, "grad_norm": 0.75, "learning_rate": 2.816326589389447e-06, "loss": 3.9306, "step": 21706 }, { "epoch": 7.230865328558341, "grad_norm": 0.75390625, "learning_rate": 2.815694418520643e-06, "loss": 4.0334, "step": 21707 }, { "epoch": 7.231198467560589, "grad_norm": 0.73828125, "learning_rate": 2.8150623022138527e-06, "loss": 3.9836, "step": 21708 }, { "epoch": 7.2315316065628386, "grad_norm": 0.80859375, "learning_rate": 2.8144302404764362e-06, "loss": 4.0028, "step": 21709 }, { "epoch": 7.231864745565087, "grad_norm": 0.7890625, "learning_rate": 2.8137982333157615e-06, "loss": 4.0446, "step": 21710 }, { "epoch": 7.232197884567336, "grad_norm": 0.7265625, "learning_rate": 2.813166280739187e-06, "loss": 4.036, "step": 21711 }, { "epoch": 7.232531023569584, "grad_norm": 0.78125, "learning_rate": 2.812534382754074e-06, "loss": 3.9406, "step": 21712 }, { "epoch": 7.2328641625718335, "grad_norm": 0.75, "learning_rate": 2.811902539367779e-06, "loss": 3.9331, "step": 21713 }, { "epoch": 7.233197301574082, "grad_norm": 0.76953125, "learning_rate": 2.8112707505876707e-06, "loss": 3.9549, "step": 21714 }, { "epoch": 7.23353044057633, "grad_norm": 0.75390625, "learning_rate": 2.8106390164210998e-06, "loss": 3.9855, "step": 21715 }, { "epoch": 7.233863579578579, "grad_norm": 0.796875, "learning_rate": 2.8100073368754244e-06, "loss": 3.9609, "step": 21716 }, { "epoch": 7.234196718580828, "grad_norm": 0.75, "learning_rate": 2.8093757119580072e-06, "loss": 4.0074, "step": 21717 }, { "epoch": 7.234529857583077, "grad_norm": 0.77734375, "learning_rate": 2.8087441416762036e-06, "loss": 3.9638, "step": 21718 }, { "epoch": 7.234862996585325, "grad_norm": 0.8125, "learning_rate": 2.8081126260373695e-06, "loss": 3.9627, "step": 21719 }, { "epoch": 7.235196135587574, "grad_norm": 0.7734375, "learning_rate": 2.8074811650488585e-06, "loss": 3.9367, "step": 21720 }, { "epoch": 7.2355292745898225, "grad_norm": 0.75390625, "learning_rate": 2.80684975871803e-06, "loss": 3.9021, "step": 21721 }, { "epoch": 7.235862413592072, "grad_norm": 0.73828125, "learning_rate": 2.8062184070522364e-06, "loss": 3.8786, "step": 21722 }, { "epoch": 7.23619555259432, "grad_norm": 0.76171875, "learning_rate": 2.805587110058832e-06, "loss": 3.9512, "step": 21723 }, { "epoch": 7.236528691596568, "grad_norm": 0.765625, "learning_rate": 2.8049558677451695e-06, "loss": 4.0184, "step": 21724 }, { "epoch": 7.2368618305988175, "grad_norm": 0.78125, "learning_rate": 2.8043246801186017e-06, "loss": 4.0178, "step": 21725 }, { "epoch": 7.237194969601066, "grad_norm": 0.77734375, "learning_rate": 2.8036935471864806e-06, "loss": 4.0125, "step": 21726 }, { "epoch": 7.237528108603315, "grad_norm": 0.75, "learning_rate": 2.803062468956155e-06, "loss": 3.9957, "step": 21727 }, { "epoch": 7.237861247605563, "grad_norm": 0.796875, "learning_rate": 2.8024314454349807e-06, "loss": 3.9688, "step": 21728 }, { "epoch": 7.2381943866078124, "grad_norm": 0.80859375, "learning_rate": 2.8018004766303044e-06, "loss": 3.985, "step": 21729 }, { "epoch": 7.238527525610061, "grad_norm": 0.7578125, "learning_rate": 2.801169562549477e-06, "loss": 4.0046, "step": 21730 }, { "epoch": 7.238860664612309, "grad_norm": 0.78515625, "learning_rate": 2.800538703199844e-06, "loss": 3.9627, "step": 21731 }, { "epoch": 7.239193803614558, "grad_norm": 0.80078125, "learning_rate": 2.7999078985887613e-06, "loss": 3.9984, "step": 21732 }, { "epoch": 7.2395269426168065, "grad_norm": 0.78125, "learning_rate": 2.7992771487235683e-06, "loss": 3.9189, "step": 21733 }, { "epoch": 7.239860081619056, "grad_norm": 0.75390625, "learning_rate": 2.7986464536116146e-06, "loss": 3.9604, "step": 21734 }, { "epoch": 7.240193220621304, "grad_norm": 0.7578125, "learning_rate": 2.7980158132602447e-06, "loss": 4.0424, "step": 21735 }, { "epoch": 7.240526359623553, "grad_norm": 0.78515625, "learning_rate": 2.797385227676808e-06, "loss": 3.9585, "step": 21736 }, { "epoch": 7.2408594986258015, "grad_norm": 0.75, "learning_rate": 2.7967546968686476e-06, "loss": 4.0473, "step": 21737 }, { "epoch": 7.241192637628051, "grad_norm": 0.80078125, "learning_rate": 2.7961242208431078e-06, "loss": 4.0248, "step": 21738 }, { "epoch": 7.241525776630299, "grad_norm": 0.77734375, "learning_rate": 2.79549379960753e-06, "loss": 4.0003, "step": 21739 }, { "epoch": 7.241858915632548, "grad_norm": 0.77734375, "learning_rate": 2.794863433169262e-06, "loss": 3.9973, "step": 21740 }, { "epoch": 7.242192054634796, "grad_norm": 0.8046875, "learning_rate": 2.7942331215356467e-06, "loss": 3.918, "step": 21741 }, { "epoch": 7.242525193637045, "grad_norm": 0.7421875, "learning_rate": 2.793602864714017e-06, "loss": 4.0045, "step": 21742 }, { "epoch": 7.242858332639294, "grad_norm": 0.7421875, "learning_rate": 2.7929726627117223e-06, "loss": 4.0305, "step": 21743 }, { "epoch": 7.243191471641542, "grad_norm": 0.7890625, "learning_rate": 2.792342515536101e-06, "loss": 3.9595, "step": 21744 }, { "epoch": 7.243524610643791, "grad_norm": 0.74609375, "learning_rate": 2.7917124231944925e-06, "loss": 3.9622, "step": 21745 }, { "epoch": 7.24385774964604, "grad_norm": 0.71484375, "learning_rate": 2.791082385694234e-06, "loss": 3.9702, "step": 21746 }, { "epoch": 7.244190888648289, "grad_norm": 0.7734375, "learning_rate": 2.790452403042669e-06, "loss": 3.9842, "step": 21747 }, { "epoch": 7.244524027650537, "grad_norm": 0.796875, "learning_rate": 2.7898224752471323e-06, "loss": 3.9941, "step": 21748 }, { "epoch": 7.2448571666527855, "grad_norm": 0.7265625, "learning_rate": 2.789192602314961e-06, "loss": 3.9476, "step": 21749 }, { "epoch": 7.245190305655035, "grad_norm": 0.8125, "learning_rate": 2.7885627842534924e-06, "loss": 3.9448, "step": 21750 }, { "epoch": 7.245523444657283, "grad_norm": 0.7578125, "learning_rate": 2.787933021070063e-06, "loss": 3.9066, "step": 21751 }, { "epoch": 7.245856583659532, "grad_norm": 0.74609375, "learning_rate": 2.7873033127720076e-06, "loss": 4.0472, "step": 21752 }, { "epoch": 7.24618972266178, "grad_norm": 0.70703125, "learning_rate": 2.786673659366657e-06, "loss": 4.0254, "step": 21753 }, { "epoch": 7.24652286166403, "grad_norm": 0.7109375, "learning_rate": 2.786044060861353e-06, "loss": 3.9568, "step": 21754 }, { "epoch": 7.246856000666278, "grad_norm": 0.76171875, "learning_rate": 2.7854145172634256e-06, "loss": 3.9926, "step": 21755 }, { "epoch": 7.247189139668527, "grad_norm": 0.78515625, "learning_rate": 2.784785028580206e-06, "loss": 4.0266, "step": 21756 }, { "epoch": 7.247522278670775, "grad_norm": 0.796875, "learning_rate": 2.7841555948190262e-06, "loss": 4.0423, "step": 21757 }, { "epoch": 7.247855417673024, "grad_norm": 0.7734375, "learning_rate": 2.783526215987222e-06, "loss": 3.9787, "step": 21758 }, { "epoch": 7.248188556675273, "grad_norm": 0.7734375, "learning_rate": 2.782896892092123e-06, "loss": 4.0292, "step": 21759 }, { "epoch": 7.248521695677521, "grad_norm": 0.80859375, "learning_rate": 2.782267623141054e-06, "loss": 3.9515, "step": 21760 }, { "epoch": 7.24885483467977, "grad_norm": 0.81640625, "learning_rate": 2.78163840914135e-06, "loss": 3.9705, "step": 21761 }, { "epoch": 7.249187973682019, "grad_norm": 0.734375, "learning_rate": 2.781009250100339e-06, "loss": 3.9223, "step": 21762 }, { "epoch": 7.249521112684268, "grad_norm": 0.75390625, "learning_rate": 2.7803801460253494e-06, "loss": 3.9693, "step": 21763 }, { "epoch": 7.249854251686516, "grad_norm": 0.7421875, "learning_rate": 2.7797510969237056e-06, "loss": 4.0358, "step": 21764 }, { "epoch": 7.250187390688765, "grad_norm": 0.79296875, "learning_rate": 2.77912210280274e-06, "loss": 4.0061, "step": 21765 }, { "epoch": 7.250520529691014, "grad_norm": 0.78125, "learning_rate": 2.778493163669777e-06, "loss": 3.9737, "step": 21766 }, { "epoch": 7.250853668693262, "grad_norm": 0.7890625, "learning_rate": 2.7778642795321415e-06, "loss": 3.9783, "step": 21767 }, { "epoch": 7.251186807695511, "grad_norm": 0.7421875, "learning_rate": 2.777235450397158e-06, "loss": 3.9947, "step": 21768 }, { "epoch": 7.251519946697759, "grad_norm": 0.7421875, "learning_rate": 2.7766066762721533e-06, "loss": 4.0109, "step": 21769 }, { "epoch": 7.2518530857000085, "grad_norm": 0.765625, "learning_rate": 2.77597795716445e-06, "loss": 4.0774, "step": 21770 }, { "epoch": 7.252186224702257, "grad_norm": 0.7578125, "learning_rate": 2.775349293081368e-06, "loss": 4.0437, "step": 21771 }, { "epoch": 7.252519363704506, "grad_norm": 0.7421875, "learning_rate": 2.7747206840302356e-06, "loss": 4.0182, "step": 21772 }, { "epoch": 7.252852502706754, "grad_norm": 0.76953125, "learning_rate": 2.774092130018373e-06, "loss": 3.9876, "step": 21773 }, { "epoch": 7.2531856417090035, "grad_norm": 0.76171875, "learning_rate": 2.7734636310531e-06, "loss": 4.0033, "step": 21774 }, { "epoch": 7.253518780711252, "grad_norm": 0.734375, "learning_rate": 2.772835187141736e-06, "loss": 4.0216, "step": 21775 }, { "epoch": 7.2538519197135, "grad_norm": 0.73828125, "learning_rate": 2.7722067982916057e-06, "loss": 3.9818, "step": 21776 }, { "epoch": 7.254185058715749, "grad_norm": 0.75, "learning_rate": 2.7715784645100283e-06, "loss": 4.0217, "step": 21777 }, { "epoch": 7.2545181977179976, "grad_norm": 0.78125, "learning_rate": 2.770950185804317e-06, "loss": 3.9956, "step": 21778 }, { "epoch": 7.254851336720247, "grad_norm": 0.8125, "learning_rate": 2.770321962181791e-06, "loss": 3.938, "step": 21779 }, { "epoch": 7.255184475722495, "grad_norm": 0.78125, "learning_rate": 2.7696937936497723e-06, "loss": 3.9758, "step": 21780 }, { "epoch": 7.255517614724744, "grad_norm": 0.7421875, "learning_rate": 2.7690656802155744e-06, "loss": 3.962, "step": 21781 }, { "epoch": 7.2558507537269925, "grad_norm": 0.79296875, "learning_rate": 2.7684376218865145e-06, "loss": 4.0251, "step": 21782 }, { "epoch": 7.256183892729242, "grad_norm": 0.765625, "learning_rate": 2.767809618669905e-06, "loss": 3.986, "step": 21783 }, { "epoch": 7.25651703173149, "grad_norm": 0.81640625, "learning_rate": 2.7671816705730667e-06, "loss": 3.9756, "step": 21784 }, { "epoch": 7.256850170733738, "grad_norm": 0.81640625, "learning_rate": 2.7665537776033105e-06, "loss": 4.0198, "step": 21785 }, { "epoch": 7.2571833097359875, "grad_norm": 0.75, "learning_rate": 2.76592593976795e-06, "loss": 3.9758, "step": 21786 }, { "epoch": 7.257516448738236, "grad_norm": 0.7734375, "learning_rate": 2.765298157074298e-06, "loss": 3.9958, "step": 21787 }, { "epoch": 7.257849587740485, "grad_norm": 0.7890625, "learning_rate": 2.7646704295296677e-06, "loss": 3.9476, "step": 21788 }, { "epoch": 7.258182726742733, "grad_norm": 0.75390625, "learning_rate": 2.7640427571413695e-06, "loss": 3.98, "step": 21789 }, { "epoch": 7.258515865744982, "grad_norm": 0.73046875, "learning_rate": 2.7634151399167133e-06, "loss": 3.9715, "step": 21790 }, { "epoch": 7.258849004747231, "grad_norm": 0.765625, "learning_rate": 2.7627875778630137e-06, "loss": 3.9505, "step": 21791 }, { "epoch": 7.259182143749479, "grad_norm": 0.75, "learning_rate": 2.762160070987577e-06, "loss": 4.0393, "step": 21792 }, { "epoch": 7.259515282751728, "grad_norm": 0.7734375, "learning_rate": 2.7615326192977143e-06, "loss": 3.9649, "step": 21793 }, { "epoch": 7.2598484217539765, "grad_norm": 0.796875, "learning_rate": 2.7609052228007305e-06, "loss": 4.0516, "step": 21794 }, { "epoch": 7.260181560756226, "grad_norm": 0.80078125, "learning_rate": 2.7602778815039416e-06, "loss": 4.0399, "step": 21795 }, { "epoch": 7.260514699758474, "grad_norm": 0.73046875, "learning_rate": 2.759650595414645e-06, "loss": 3.9712, "step": 21796 }, { "epoch": 7.260847838760723, "grad_norm": 0.83203125, "learning_rate": 2.75902336454015e-06, "loss": 3.9415, "step": 21797 }, { "epoch": 7.2611809777629714, "grad_norm": 0.78125, "learning_rate": 2.758396188887766e-06, "loss": 3.9972, "step": 21798 }, { "epoch": 7.261514116765221, "grad_norm": 0.78515625, "learning_rate": 2.7577690684647967e-06, "loss": 4.0121, "step": 21799 }, { "epoch": 7.261847255767469, "grad_norm": 0.76953125, "learning_rate": 2.757142003278546e-06, "loss": 4.0084, "step": 21800 }, { "epoch": 7.262180394769718, "grad_norm": 0.79296875, "learning_rate": 2.7565149933363156e-06, "loss": 4.0141, "step": 21801 }, { "epoch": 7.262513533771966, "grad_norm": 0.76953125, "learning_rate": 2.755888038645413e-06, "loss": 4.0841, "step": 21802 }, { "epoch": 7.262846672774215, "grad_norm": 0.7734375, "learning_rate": 2.755261139213139e-06, "loss": 3.9515, "step": 21803 }, { "epoch": 7.263179811776464, "grad_norm": 0.80859375, "learning_rate": 2.7546342950467963e-06, "loss": 3.92, "step": 21804 }, { "epoch": 7.263512950778712, "grad_norm": 0.75390625, "learning_rate": 2.754007506153685e-06, "loss": 3.9848, "step": 21805 }, { "epoch": 7.263846089780961, "grad_norm": 0.76953125, "learning_rate": 2.7533807725411066e-06, "loss": 4.0566, "step": 21806 }, { "epoch": 7.26417922878321, "grad_norm": 0.78125, "learning_rate": 2.752754094216361e-06, "loss": 3.9625, "step": 21807 }, { "epoch": 7.264512367785459, "grad_norm": 0.74609375, "learning_rate": 2.7521274711867455e-06, "loss": 4.0692, "step": 21808 }, { "epoch": 7.264845506787707, "grad_norm": 0.75390625, "learning_rate": 2.751500903459563e-06, "loss": 4.0044, "step": 21809 }, { "epoch": 7.265178645789955, "grad_norm": 0.796875, "learning_rate": 2.7508743910421104e-06, "loss": 3.9561, "step": 21810 }, { "epoch": 7.265511784792205, "grad_norm": 0.74609375, "learning_rate": 2.7502479339416843e-06, "loss": 3.9968, "step": 21811 }, { "epoch": 7.265844923794453, "grad_norm": 0.7421875, "learning_rate": 2.749621532165579e-06, "loss": 3.9871, "step": 21812 }, { "epoch": 7.266178062796702, "grad_norm": 0.83203125, "learning_rate": 2.748995185721099e-06, "loss": 4.0092, "step": 21813 }, { "epoch": 7.26651120179895, "grad_norm": 0.75, "learning_rate": 2.7483688946155315e-06, "loss": 3.9951, "step": 21814 }, { "epoch": 7.2668443408012, "grad_norm": 0.76171875, "learning_rate": 2.7477426588561728e-06, "loss": 4.0076, "step": 21815 }, { "epoch": 7.267177479803448, "grad_norm": 0.76171875, "learning_rate": 2.7471164784503198e-06, "loss": 4.0074, "step": 21816 }, { "epoch": 7.267510618805697, "grad_norm": 0.78125, "learning_rate": 2.746490353405266e-06, "loss": 3.9758, "step": 21817 }, { "epoch": 7.267843757807945, "grad_norm": 0.796875, "learning_rate": 2.745864283728303e-06, "loss": 3.9686, "step": 21818 }, { "epoch": 7.2681768968101945, "grad_norm": 0.78515625, "learning_rate": 2.7452382694267223e-06, "loss": 3.9682, "step": 21819 }, { "epoch": 7.268510035812443, "grad_norm": 0.82421875, "learning_rate": 2.744612310507819e-06, "loss": 3.9117, "step": 21820 }, { "epoch": 7.268843174814691, "grad_norm": 0.7578125, "learning_rate": 2.7439864069788816e-06, "loss": 4.0342, "step": 21821 }, { "epoch": 7.26917631381694, "grad_norm": 0.78125, "learning_rate": 2.7433605588472033e-06, "loss": 3.9379, "step": 21822 }, { "epoch": 7.269509452819189, "grad_norm": 0.765625, "learning_rate": 2.742734766120067e-06, "loss": 3.9403, "step": 21823 }, { "epoch": 7.269842591821438, "grad_norm": 0.77734375, "learning_rate": 2.742109028804769e-06, "loss": 3.9653, "step": 21824 }, { "epoch": 7.270175730823686, "grad_norm": 0.796875, "learning_rate": 2.741483346908595e-06, "loss": 3.9679, "step": 21825 }, { "epoch": 7.270508869825935, "grad_norm": 0.765625, "learning_rate": 2.7408577204388345e-06, "loss": 3.945, "step": 21826 }, { "epoch": 7.2708420088281835, "grad_norm": 0.76171875, "learning_rate": 2.7402321494027693e-06, "loss": 3.9215, "step": 21827 }, { "epoch": 7.271175147830432, "grad_norm": 0.8359375, "learning_rate": 2.7396066338076927e-06, "loss": 3.9992, "step": 21828 }, { "epoch": 7.271508286832681, "grad_norm": 0.74609375, "learning_rate": 2.738981173660888e-06, "loss": 4.0428, "step": 21829 }, { "epoch": 7.271841425834929, "grad_norm": 0.8203125, "learning_rate": 2.73835576896964e-06, "loss": 3.9005, "step": 21830 }, { "epoch": 7.2721745648371785, "grad_norm": 0.7421875, "learning_rate": 2.737730419741234e-06, "loss": 4.0646, "step": 21831 }, { "epoch": 7.272507703839427, "grad_norm": 0.76171875, "learning_rate": 2.737105125982953e-06, "loss": 3.9309, "step": 21832 }, { "epoch": 7.272840842841676, "grad_norm": 0.73046875, "learning_rate": 2.7364798877020814e-06, "loss": 3.9682, "step": 21833 }, { "epoch": 7.273173981843924, "grad_norm": 0.8046875, "learning_rate": 2.7358547049058983e-06, "loss": 3.9217, "step": 21834 }, { "epoch": 7.2735071208461735, "grad_norm": 0.765625, "learning_rate": 2.735229577601692e-06, "loss": 3.9471, "step": 21835 }, { "epoch": 7.273840259848422, "grad_norm": 0.69140625, "learning_rate": 2.73460450579674e-06, "loss": 4.0443, "step": 21836 }, { "epoch": 7.27417339885067, "grad_norm": 0.75, "learning_rate": 2.7339794894983243e-06, "loss": 3.9778, "step": 21837 }, { "epoch": 7.274506537852919, "grad_norm": 0.78515625, "learning_rate": 2.733354528713721e-06, "loss": 4.0098, "step": 21838 }, { "epoch": 7.2748396768551675, "grad_norm": 0.765625, "learning_rate": 2.7327296234502155e-06, "loss": 4.0342, "step": 21839 }, { "epoch": 7.275172815857417, "grad_norm": 0.72265625, "learning_rate": 2.732104773715087e-06, "loss": 3.9914, "step": 21840 }, { "epoch": 7.275505954859665, "grad_norm": 0.71875, "learning_rate": 2.7314799795156054e-06, "loss": 3.9433, "step": 21841 }, { "epoch": 7.275839093861914, "grad_norm": 0.76171875, "learning_rate": 2.7308552408590564e-06, "loss": 4.0291, "step": 21842 }, { "epoch": 7.2761722328641625, "grad_norm": 0.75390625, "learning_rate": 2.7302305577527136e-06, "loss": 3.9559, "step": 21843 }, { "epoch": 7.276505371866412, "grad_norm": 0.77734375, "learning_rate": 2.7296059302038535e-06, "loss": 3.9635, "step": 21844 }, { "epoch": 7.27683851086866, "grad_norm": 0.7265625, "learning_rate": 2.728981358219749e-06, "loss": 3.9827, "step": 21845 }, { "epoch": 7.277171649870908, "grad_norm": 0.78515625, "learning_rate": 2.7283568418076803e-06, "loss": 3.9624, "step": 21846 }, { "epoch": 7.277504788873157, "grad_norm": 0.7265625, "learning_rate": 2.7277323809749187e-06, "loss": 3.9689, "step": 21847 }, { "epoch": 7.277837927875406, "grad_norm": 0.7421875, "learning_rate": 2.7271079757287384e-06, "loss": 4.0021, "step": 21848 }, { "epoch": 7.278171066877655, "grad_norm": 0.7578125, "learning_rate": 2.7264836260764127e-06, "loss": 3.9617, "step": 21849 }, { "epoch": 7.278504205879903, "grad_norm": 0.76953125, "learning_rate": 2.7258593320252127e-06, "loss": 4.0861, "step": 21850 }, { "epoch": 7.278837344882152, "grad_norm": 0.76171875, "learning_rate": 2.7252350935824113e-06, "loss": 3.9355, "step": 21851 }, { "epoch": 7.279170483884401, "grad_norm": 0.78515625, "learning_rate": 2.7246109107552757e-06, "loss": 3.9692, "step": 21852 }, { "epoch": 7.279503622886649, "grad_norm": 0.75, "learning_rate": 2.723986783551082e-06, "loss": 4.0079, "step": 21853 }, { "epoch": 7.279836761888898, "grad_norm": 0.75390625, "learning_rate": 2.7233627119770974e-06, "loss": 3.9851, "step": 21854 }, { "epoch": 7.2801699008911465, "grad_norm": 0.73828125, "learning_rate": 2.7227386960405915e-06, "loss": 3.9817, "step": 21855 }, { "epoch": 7.280503039893396, "grad_norm": 0.83984375, "learning_rate": 2.722114735748829e-06, "loss": 3.9747, "step": 21856 }, { "epoch": 7.280836178895644, "grad_norm": 0.7734375, "learning_rate": 2.7214908311090843e-06, "loss": 3.9784, "step": 21857 }, { "epoch": 7.281169317897893, "grad_norm": 0.79296875, "learning_rate": 2.720866982128623e-06, "loss": 4.032, "step": 21858 }, { "epoch": 7.281502456900141, "grad_norm": 0.78515625, "learning_rate": 2.720243188814706e-06, "loss": 3.995, "step": 21859 }, { "epoch": 7.281835595902391, "grad_norm": 0.76953125, "learning_rate": 2.7196194511746045e-06, "loss": 4.0798, "step": 21860 }, { "epoch": 7.282168734904639, "grad_norm": 0.78515625, "learning_rate": 2.7189957692155825e-06, "loss": 4.0233, "step": 21861 }, { "epoch": 7.282501873906888, "grad_norm": 0.75390625, "learning_rate": 2.718372142944904e-06, "loss": 4.0057, "step": 21862 }, { "epoch": 7.282835012909136, "grad_norm": 0.73828125, "learning_rate": 2.7177485723698297e-06, "loss": 4.0014, "step": 21863 }, { "epoch": 7.283168151911385, "grad_norm": 0.8046875, "learning_rate": 2.7171250574976297e-06, "loss": 3.9736, "step": 21864 }, { "epoch": 7.283501290913634, "grad_norm": 0.76953125, "learning_rate": 2.716501598335563e-06, "loss": 3.9886, "step": 21865 }, { "epoch": 7.283834429915882, "grad_norm": 0.765625, "learning_rate": 2.715878194890891e-06, "loss": 4.0025, "step": 21866 }, { "epoch": 7.284167568918131, "grad_norm": 0.69921875, "learning_rate": 2.7152548471708765e-06, "loss": 4.0435, "step": 21867 }, { "epoch": 7.28450070792038, "grad_norm": 0.765625, "learning_rate": 2.7146315551827783e-06, "loss": 3.9929, "step": 21868 }, { "epoch": 7.284833846922629, "grad_norm": 0.75, "learning_rate": 2.7140083189338586e-06, "loss": 3.954, "step": 21869 }, { "epoch": 7.285166985924877, "grad_norm": 0.78515625, "learning_rate": 2.7133851384313746e-06, "loss": 4.044, "step": 21870 }, { "epoch": 7.285500124927125, "grad_norm": 0.75390625, "learning_rate": 2.712762013682584e-06, "loss": 3.9242, "step": 21871 }, { "epoch": 7.285833263929375, "grad_norm": 0.80078125, "learning_rate": 2.7121389446947484e-06, "loss": 3.9806, "step": 21872 }, { "epoch": 7.286166402931623, "grad_norm": 0.7734375, "learning_rate": 2.7115159314751246e-06, "loss": 4.0085, "step": 21873 }, { "epoch": 7.286499541933872, "grad_norm": 0.796875, "learning_rate": 2.7108929740309683e-06, "loss": 4.0568, "step": 21874 }, { "epoch": 7.28683268093612, "grad_norm": 0.7578125, "learning_rate": 2.710270072369533e-06, "loss": 4.0719, "step": 21875 }, { "epoch": 7.2871658199383695, "grad_norm": 0.74609375, "learning_rate": 2.709647226498081e-06, "loss": 4.0119, "step": 21876 }, { "epoch": 7.287498958940618, "grad_norm": 0.734375, "learning_rate": 2.7090244364238616e-06, "loss": 3.9095, "step": 21877 }, { "epoch": 7.287832097942867, "grad_norm": 0.734375, "learning_rate": 2.7084017021541268e-06, "loss": 3.9288, "step": 21878 }, { "epoch": 7.288165236945115, "grad_norm": 0.75, "learning_rate": 2.7077790236961374e-06, "loss": 3.9549, "step": 21879 }, { "epoch": 7.2884983759473645, "grad_norm": 0.765625, "learning_rate": 2.707156401057141e-06, "loss": 4.0222, "step": 21880 }, { "epoch": 7.288831514949613, "grad_norm": 0.73828125, "learning_rate": 2.7065338342443923e-06, "loss": 4.0587, "step": 21881 }, { "epoch": 7.289164653951861, "grad_norm": 0.75390625, "learning_rate": 2.705911323265138e-06, "loss": 4.0055, "step": 21882 }, { "epoch": 7.28949779295411, "grad_norm": 0.8125, "learning_rate": 2.7052888681266365e-06, "loss": 3.9385, "step": 21883 }, { "epoch": 7.289830931956359, "grad_norm": 0.765625, "learning_rate": 2.7046664688361346e-06, "loss": 4.0001, "step": 21884 }, { "epoch": 7.290164070958608, "grad_norm": 0.75, "learning_rate": 2.704044125400881e-06, "loss": 3.9955, "step": 21885 }, { "epoch": 7.290497209960856, "grad_norm": 0.72265625, "learning_rate": 2.7034218378281248e-06, "loss": 3.9492, "step": 21886 }, { "epoch": 7.290830348963105, "grad_norm": 0.73046875, "learning_rate": 2.7027996061251155e-06, "loss": 3.9568, "step": 21887 }, { "epoch": 7.2911634879653535, "grad_norm": 0.75, "learning_rate": 2.7021774302991003e-06, "loss": 4.0272, "step": 21888 }, { "epoch": 7.291496626967602, "grad_norm": 0.7890625, "learning_rate": 2.7015553103573234e-06, "loss": 3.9571, "step": 21889 }, { "epoch": 7.291829765969851, "grad_norm": 0.77734375, "learning_rate": 2.700933246307036e-06, "loss": 3.9897, "step": 21890 }, { "epoch": 7.292162904972099, "grad_norm": 0.74609375, "learning_rate": 2.7003112381554815e-06, "loss": 4.0496, "step": 21891 }, { "epoch": 7.2924960439743485, "grad_norm": 0.80859375, "learning_rate": 2.6996892859099046e-06, "loss": 3.9883, "step": 21892 }, { "epoch": 7.292829182976597, "grad_norm": 0.7578125, "learning_rate": 2.6990673895775485e-06, "loss": 4.0223, "step": 21893 }, { "epoch": 7.293162321978846, "grad_norm": 0.77734375, "learning_rate": 2.698445549165662e-06, "loss": 3.9617, "step": 21894 }, { "epoch": 7.293495460981094, "grad_norm": 0.80078125, "learning_rate": 2.6978237646814834e-06, "loss": 4.0385, "step": 21895 }, { "epoch": 7.293828599983343, "grad_norm": 0.80078125, "learning_rate": 2.6972020361322536e-06, "loss": 4.0226, "step": 21896 }, { "epoch": 7.294161738985592, "grad_norm": 0.796875, "learning_rate": 2.69658036352522e-06, "loss": 3.9921, "step": 21897 }, { "epoch": 7.29449487798784, "grad_norm": 0.77734375, "learning_rate": 2.695958746867621e-06, "loss": 3.987, "step": 21898 }, { "epoch": 7.294828016990089, "grad_norm": 0.7421875, "learning_rate": 2.6953371861666968e-06, "loss": 4.0087, "step": 21899 }, { "epoch": 7.2951611559923375, "grad_norm": 0.79296875, "learning_rate": 2.694715681429685e-06, "loss": 3.9995, "step": 21900 }, { "epoch": 7.295494294994587, "grad_norm": 0.74609375, "learning_rate": 2.6940942326638303e-06, "loss": 4.0094, "step": 21901 }, { "epoch": 7.295827433996835, "grad_norm": 0.8046875, "learning_rate": 2.6934728398763674e-06, "loss": 3.9569, "step": 21902 }, { "epoch": 7.296160572999084, "grad_norm": 0.80078125, "learning_rate": 2.6928515030745355e-06, "loss": 3.9868, "step": 21903 }, { "epoch": 7.2964937120013325, "grad_norm": 0.76171875, "learning_rate": 2.6922302222655706e-06, "loss": 4.0575, "step": 21904 }, { "epoch": 7.296826851003582, "grad_norm": 0.75, "learning_rate": 2.6916089974567114e-06, "loss": 3.9776, "step": 21905 }, { "epoch": 7.29715999000583, "grad_norm": 0.73046875, "learning_rate": 2.6909878286551915e-06, "loss": 4.0272, "step": 21906 }, { "epoch": 7.297493129008078, "grad_norm": 0.76953125, "learning_rate": 2.6903667158682445e-06, "loss": 3.983, "step": 21907 }, { "epoch": 7.297826268010327, "grad_norm": 0.8046875, "learning_rate": 2.68974565910311e-06, "loss": 3.9267, "step": 21908 }, { "epoch": 7.298159407012576, "grad_norm": 0.8046875, "learning_rate": 2.68912465836702e-06, "loss": 4.0142, "step": 21909 }, { "epoch": 7.298492546014825, "grad_norm": 0.765625, "learning_rate": 2.6885037136672074e-06, "loss": 3.984, "step": 21910 }, { "epoch": 7.298825685017073, "grad_norm": 0.74609375, "learning_rate": 2.687882825010902e-06, "loss": 3.9715, "step": 21911 }, { "epoch": 7.299158824019322, "grad_norm": 0.7734375, "learning_rate": 2.6872619924053445e-06, "loss": 4.0549, "step": 21912 }, { "epoch": 7.299491963021571, "grad_norm": 0.765625, "learning_rate": 2.686641215857757e-06, "loss": 4.0222, "step": 21913 }, { "epoch": 7.29982510202382, "grad_norm": 0.76953125, "learning_rate": 2.686020495375374e-06, "loss": 3.963, "step": 21914 }, { "epoch": 7.300158241026068, "grad_norm": 0.765625, "learning_rate": 2.685399830965422e-06, "loss": 3.995, "step": 21915 }, { "epoch": 7.300491380028316, "grad_norm": 0.74609375, "learning_rate": 2.6847792226351366e-06, "loss": 3.9476, "step": 21916 }, { "epoch": 7.300824519030566, "grad_norm": 0.81640625, "learning_rate": 2.684158670391744e-06, "loss": 3.9401, "step": 21917 }, { "epoch": 7.301157658032814, "grad_norm": 0.76171875, "learning_rate": 2.6835381742424712e-06, "loss": 3.9556, "step": 21918 }, { "epoch": 7.301490797035063, "grad_norm": 0.71875, "learning_rate": 2.682917734194544e-06, "loss": 4.0187, "step": 21919 }, { "epoch": 7.301823936037311, "grad_norm": 0.76953125, "learning_rate": 2.6822973502551937e-06, "loss": 4.0849, "step": 21920 }, { "epoch": 7.302157075039561, "grad_norm": 0.79296875, "learning_rate": 2.681677022431647e-06, "loss": 3.9774, "step": 21921 }, { "epoch": 7.302490214041809, "grad_norm": 0.73828125, "learning_rate": 2.681056750731121e-06, "loss": 3.9716, "step": 21922 }, { "epoch": 7.302823353044058, "grad_norm": 0.796875, "learning_rate": 2.680436535160849e-06, "loss": 4.0423, "step": 21923 }, { "epoch": 7.303156492046306, "grad_norm": 0.79296875, "learning_rate": 2.679816375728052e-06, "loss": 3.9511, "step": 21924 }, { "epoch": 7.303489631048555, "grad_norm": 0.7421875, "learning_rate": 2.6791962724399537e-06, "loss": 3.9587, "step": 21925 }, { "epoch": 7.303822770050804, "grad_norm": 0.8203125, "learning_rate": 2.6785762253037745e-06, "loss": 3.9582, "step": 21926 }, { "epoch": 7.304155909053052, "grad_norm": 0.796875, "learning_rate": 2.677956234326741e-06, "loss": 4.0156, "step": 21927 }, { "epoch": 7.304489048055301, "grad_norm": 0.76953125, "learning_rate": 2.677336299516074e-06, "loss": 3.9823, "step": 21928 }, { "epoch": 7.30482218705755, "grad_norm": 0.76953125, "learning_rate": 2.6767164208789928e-06, "loss": 3.9393, "step": 21929 }, { "epoch": 7.305155326059799, "grad_norm": 0.74609375, "learning_rate": 2.6760965984227175e-06, "loss": 3.9634, "step": 21930 }, { "epoch": 7.305488465062047, "grad_norm": 0.74609375, "learning_rate": 2.675476832154469e-06, "loss": 4.0072, "step": 21931 }, { "epoch": 7.305821604064295, "grad_norm": 0.7578125, "learning_rate": 2.6748571220814656e-06, "loss": 3.9679, "step": 21932 }, { "epoch": 7.3061547430665446, "grad_norm": 0.77734375, "learning_rate": 2.6742374682109223e-06, "loss": 3.9242, "step": 21933 }, { "epoch": 7.306487882068793, "grad_norm": 0.78125, "learning_rate": 2.6736178705500623e-06, "loss": 3.9839, "step": 21934 }, { "epoch": 7.306821021071042, "grad_norm": 0.80078125, "learning_rate": 2.6729983291061013e-06, "loss": 3.9905, "step": 21935 }, { "epoch": 7.30715416007329, "grad_norm": 0.75, "learning_rate": 2.672378843886253e-06, "loss": 3.9826, "step": 21936 }, { "epoch": 7.3074872990755395, "grad_norm": 0.84765625, "learning_rate": 2.6717594148977337e-06, "loss": 3.989, "step": 21937 }, { "epoch": 7.307820438077788, "grad_norm": 0.75, "learning_rate": 2.6711400421477606e-06, "loss": 4.0239, "step": 21938 }, { "epoch": 7.308153577080037, "grad_norm": 0.71484375, "learning_rate": 2.6705207256435496e-06, "loss": 3.9931, "step": 21939 }, { "epoch": 7.308486716082285, "grad_norm": 0.73828125, "learning_rate": 2.669901465392307e-06, "loss": 4.0278, "step": 21940 }, { "epoch": 7.3088198550845345, "grad_norm": 0.8046875, "learning_rate": 2.6692822614012513e-06, "loss": 3.9461, "step": 21941 }, { "epoch": 7.309152994086783, "grad_norm": 0.7734375, "learning_rate": 2.6686631136775957e-06, "loss": 3.9957, "step": 21942 }, { "epoch": 7.309486133089031, "grad_norm": 0.77734375, "learning_rate": 2.6680440222285493e-06, "loss": 3.9737, "step": 21943 }, { "epoch": 7.30981927209128, "grad_norm": 0.80078125, "learning_rate": 2.667424987061321e-06, "loss": 3.9287, "step": 21944 }, { "epoch": 7.3101524110935285, "grad_norm": 0.7734375, "learning_rate": 2.6668060081831275e-06, "loss": 3.9893, "step": 21945 }, { "epoch": 7.310485550095778, "grad_norm": 0.7109375, "learning_rate": 2.6661870856011753e-06, "loss": 3.9997, "step": 21946 }, { "epoch": 7.310818689098026, "grad_norm": 0.7890625, "learning_rate": 2.6655682193226738e-06, "loss": 3.9818, "step": 21947 }, { "epoch": 7.311151828100275, "grad_norm": 0.75390625, "learning_rate": 2.6649494093548306e-06, "loss": 3.9498, "step": 21948 }, { "epoch": 7.3114849671025235, "grad_norm": 0.734375, "learning_rate": 2.6643306557048546e-06, "loss": 3.976, "step": 21949 }, { "epoch": 7.311818106104772, "grad_norm": 0.76171875, "learning_rate": 2.6637119583799523e-06, "loss": 3.9223, "step": 21950 }, { "epoch": 7.312151245107021, "grad_norm": 0.76953125, "learning_rate": 2.663093317387327e-06, "loss": 3.9884, "step": 21951 }, { "epoch": 7.312484384109269, "grad_norm": 0.78515625, "learning_rate": 2.662474732734191e-06, "loss": 4.035, "step": 21952 }, { "epoch": 7.3128175231115184, "grad_norm": 0.7421875, "learning_rate": 2.6618562044277464e-06, "loss": 4.0082, "step": 21953 }, { "epoch": 7.313150662113767, "grad_norm": 0.78515625, "learning_rate": 2.6612377324751983e-06, "loss": 4.0239, "step": 21954 }, { "epoch": 7.313483801116016, "grad_norm": 0.734375, "learning_rate": 2.660619316883746e-06, "loss": 3.97, "step": 21955 }, { "epoch": 7.313816940118264, "grad_norm": 0.75, "learning_rate": 2.6600009576606e-06, "loss": 3.9977, "step": 21956 }, { "epoch": 7.314150079120513, "grad_norm": 0.765625, "learning_rate": 2.6593826548129613e-06, "loss": 3.9294, "step": 21957 }, { "epoch": 7.314483218122762, "grad_norm": 0.75390625, "learning_rate": 2.658764408348025e-06, "loss": 3.9636, "step": 21958 }, { "epoch": 7.314816357125011, "grad_norm": 0.85546875, "learning_rate": 2.6581462182729997e-06, "loss": 3.9768, "step": 21959 }, { "epoch": 7.315149496127259, "grad_norm": 0.75390625, "learning_rate": 2.657528084595084e-06, "loss": 3.9935, "step": 21960 }, { "epoch": 7.3154826351295075, "grad_norm": 0.80859375, "learning_rate": 2.6569100073214773e-06, "loss": 3.9387, "step": 21961 }, { "epoch": 7.315815774131757, "grad_norm": 0.79296875, "learning_rate": 2.6562919864593792e-06, "loss": 3.9664, "step": 21962 }, { "epoch": 7.316148913134005, "grad_norm": 0.796875, "learning_rate": 2.6556740220159846e-06, "loss": 4.0233, "step": 21963 }, { "epoch": 7.316482052136254, "grad_norm": 0.7265625, "learning_rate": 2.655056113998498e-06, "loss": 4.0352, "step": 21964 }, { "epoch": 7.316815191138502, "grad_norm": 0.76953125, "learning_rate": 2.654438262414113e-06, "loss": 4.0001, "step": 21965 }, { "epoch": 7.317148330140752, "grad_norm": 0.80078125, "learning_rate": 2.6538204672700266e-06, "loss": 3.9115, "step": 21966 }, { "epoch": 7.317481469143, "grad_norm": 0.77734375, "learning_rate": 2.6532027285734362e-06, "loss": 3.9947, "step": 21967 }, { "epoch": 7.317814608145248, "grad_norm": 0.77734375, "learning_rate": 2.652585046331535e-06, "loss": 3.9531, "step": 21968 }, { "epoch": 7.318147747147497, "grad_norm": 0.7421875, "learning_rate": 2.6519674205515186e-06, "loss": 4.0153, "step": 21969 }, { "epoch": 7.318480886149746, "grad_norm": 0.76953125, "learning_rate": 2.6513498512405777e-06, "loss": 4.0368, "step": 21970 }, { "epoch": 7.318814025151995, "grad_norm": 0.75390625, "learning_rate": 2.6507323384059122e-06, "loss": 3.9914, "step": 21971 }, { "epoch": 7.319147164154243, "grad_norm": 0.78515625, "learning_rate": 2.6501148820547114e-06, "loss": 3.9296, "step": 21972 }, { "epoch": 7.319480303156492, "grad_norm": 0.79296875, "learning_rate": 2.6494974821941677e-06, "loss": 3.9997, "step": 21973 }, { "epoch": 7.319813442158741, "grad_norm": 0.74609375, "learning_rate": 2.6488801388314716e-06, "loss": 4.007, "step": 21974 }, { "epoch": 7.32014658116099, "grad_norm": 0.7578125, "learning_rate": 2.648262851973815e-06, "loss": 4.0223, "step": 21975 }, { "epoch": 7.320479720163238, "grad_norm": 0.75390625, "learning_rate": 2.6476456216283863e-06, "loss": 3.9967, "step": 21976 }, { "epoch": 7.320812859165486, "grad_norm": 0.765625, "learning_rate": 2.6470284478023746e-06, "loss": 4.0028, "step": 21977 }, { "epoch": 7.321145998167736, "grad_norm": 0.75, "learning_rate": 2.646411330502972e-06, "loss": 4.0471, "step": 21978 }, { "epoch": 7.321479137169984, "grad_norm": 0.765625, "learning_rate": 2.645794269737364e-06, "loss": 3.9388, "step": 21979 }, { "epoch": 7.321812276172233, "grad_norm": 0.79296875, "learning_rate": 2.6451772655127396e-06, "loss": 3.9566, "step": 21980 }, { "epoch": 7.322145415174481, "grad_norm": 0.765625, "learning_rate": 2.644560317836281e-06, "loss": 3.9954, "step": 21981 }, { "epoch": 7.3224785541767305, "grad_norm": 0.75390625, "learning_rate": 2.643943426715181e-06, "loss": 3.9978, "step": 21982 }, { "epoch": 7.322811693178979, "grad_norm": 0.765625, "learning_rate": 2.643326592156624e-06, "loss": 4.0128, "step": 21983 }, { "epoch": 7.323144832181228, "grad_norm": 0.7578125, "learning_rate": 2.642709814167788e-06, "loss": 4.0235, "step": 21984 }, { "epoch": 7.323477971183476, "grad_norm": 0.77734375, "learning_rate": 2.642093092755864e-06, "loss": 4.0098, "step": 21985 }, { "epoch": 7.323811110185725, "grad_norm": 0.7890625, "learning_rate": 2.6414764279280336e-06, "loss": 3.9274, "step": 21986 }, { "epoch": 7.324144249187974, "grad_norm": 0.7890625, "learning_rate": 2.6408598196914788e-06, "loss": 3.9854, "step": 21987 }, { "epoch": 7.324477388190222, "grad_norm": 0.74609375, "learning_rate": 2.6402432680533796e-06, "loss": 3.9534, "step": 21988 }, { "epoch": 7.324810527192471, "grad_norm": 0.77734375, "learning_rate": 2.6396267730209235e-06, "loss": 3.9707, "step": 21989 }, { "epoch": 7.32514366619472, "grad_norm": 0.75390625, "learning_rate": 2.639010334601287e-06, "loss": 3.9716, "step": 21990 }, { "epoch": 7.325476805196969, "grad_norm": 0.76953125, "learning_rate": 2.6383939528016524e-06, "loss": 3.9894, "step": 21991 }, { "epoch": 7.325809944199217, "grad_norm": 0.77734375, "learning_rate": 2.637777627629197e-06, "loss": 3.9649, "step": 21992 }, { "epoch": 7.326143083201465, "grad_norm": 0.76953125, "learning_rate": 2.6371613590911006e-06, "loss": 3.9987, "step": 21993 }, { "epoch": 7.3264762222037145, "grad_norm": 0.765625, "learning_rate": 2.6365451471945424e-06, "loss": 3.98, "step": 21994 }, { "epoch": 7.326809361205963, "grad_norm": 0.77734375, "learning_rate": 2.6359289919466955e-06, "loss": 3.9839, "step": 21995 }, { "epoch": 7.327142500208212, "grad_norm": 0.71484375, "learning_rate": 2.635312893354743e-06, "loss": 4.0126, "step": 21996 }, { "epoch": 7.32747563921046, "grad_norm": 0.71484375, "learning_rate": 2.6346968514258586e-06, "loss": 4.0398, "step": 21997 }, { "epoch": 7.3278087782127095, "grad_norm": 0.7421875, "learning_rate": 2.634080866167218e-06, "loss": 3.9697, "step": 21998 }, { "epoch": 7.328141917214958, "grad_norm": 0.7578125, "learning_rate": 2.6334649375859927e-06, "loss": 3.94, "step": 21999 }, { "epoch": 7.328475056217207, "grad_norm": 0.7578125, "learning_rate": 2.6328490656893624e-06, "loss": 4.0028, "step": 22000 }, { "epoch": 7.328808195219455, "grad_norm": 0.828125, "learning_rate": 2.6322332504845012e-06, "loss": 3.9929, "step": 22001 }, { "epoch": 7.329141334221704, "grad_norm": 0.78125, "learning_rate": 2.6316174919785746e-06, "loss": 3.9775, "step": 22002 }, { "epoch": 7.329474473223953, "grad_norm": 0.7890625, "learning_rate": 2.631001790178761e-06, "loss": 3.9864, "step": 22003 }, { "epoch": 7.329807612226201, "grad_norm": 0.765625, "learning_rate": 2.63038614509223e-06, "loss": 4.0558, "step": 22004 }, { "epoch": 7.33014075122845, "grad_norm": 0.7265625, "learning_rate": 2.629770556726153e-06, "loss": 3.9211, "step": 22005 }, { "epoch": 7.3304738902306985, "grad_norm": 0.74609375, "learning_rate": 2.629155025087698e-06, "loss": 3.951, "step": 22006 }, { "epoch": 7.330807029232948, "grad_norm": 0.75, "learning_rate": 2.62853955018404e-06, "loss": 4.0323, "step": 22007 }, { "epoch": 7.331140168235196, "grad_norm": 0.78125, "learning_rate": 2.6279241320223436e-06, "loss": 3.9309, "step": 22008 }, { "epoch": 7.331473307237445, "grad_norm": 0.76171875, "learning_rate": 2.6273087706097782e-06, "loss": 3.991, "step": 22009 }, { "epoch": 7.3318064462396935, "grad_norm": 0.7734375, "learning_rate": 2.6266934659535114e-06, "loss": 4.0394, "step": 22010 }, { "epoch": 7.332139585241942, "grad_norm": 0.73828125, "learning_rate": 2.6260782180607108e-06, "loss": 3.9005, "step": 22011 }, { "epoch": 7.332472724244191, "grad_norm": 0.78125, "learning_rate": 2.6254630269385418e-06, "loss": 3.9496, "step": 22012 }, { "epoch": 7.332805863246439, "grad_norm": 0.76953125, "learning_rate": 2.624847892594169e-06, "loss": 4.0098, "step": 22013 }, { "epoch": 7.333139002248688, "grad_norm": 0.7890625, "learning_rate": 2.6242328150347575e-06, "loss": 3.9463, "step": 22014 }, { "epoch": 7.333472141250937, "grad_norm": 0.7578125, "learning_rate": 2.6236177942674754e-06, "loss": 3.9919, "step": 22015 }, { "epoch": 7.333805280253186, "grad_norm": 0.73046875, "learning_rate": 2.6230028302994833e-06, "loss": 3.8948, "step": 22016 }, { "epoch": 7.334138419255434, "grad_norm": 0.796875, "learning_rate": 2.622387923137945e-06, "loss": 3.9851, "step": 22017 }, { "epoch": 7.334471558257683, "grad_norm": 0.7109375, "learning_rate": 2.6217730727900198e-06, "loss": 4.0094, "step": 22018 }, { "epoch": 7.334804697259932, "grad_norm": 0.76171875, "learning_rate": 2.6211582792628766e-06, "loss": 3.9552, "step": 22019 }, { "epoch": 7.335137836262181, "grad_norm": 0.765625, "learning_rate": 2.6205435425636703e-06, "loss": 3.9881, "step": 22020 }, { "epoch": 7.335470975264429, "grad_norm": 0.80078125, "learning_rate": 2.6199288626995603e-06, "loss": 3.9543, "step": 22021 }, { "epoch": 7.3358041142666774, "grad_norm": 0.80859375, "learning_rate": 2.6193142396777104e-06, "loss": 4.0236, "step": 22022 }, { "epoch": 7.336137253268927, "grad_norm": 0.734375, "learning_rate": 2.618699673505279e-06, "loss": 4.0081, "step": 22023 }, { "epoch": 7.336470392271175, "grad_norm": 0.80078125, "learning_rate": 2.6180851641894228e-06, "loss": 3.9598, "step": 22024 }, { "epoch": 7.336803531273424, "grad_norm": 0.7734375, "learning_rate": 2.6174707117372982e-06, "loss": 4.024, "step": 22025 }, { "epoch": 7.337136670275672, "grad_norm": 0.8203125, "learning_rate": 2.6168563161560673e-06, "loss": 3.9757, "step": 22026 }, { "epoch": 7.337469809277922, "grad_norm": 0.77734375, "learning_rate": 2.6162419774528824e-06, "loss": 3.9505, "step": 22027 }, { "epoch": 7.33780294828017, "grad_norm": 0.75390625, "learning_rate": 2.6156276956349005e-06, "loss": 3.9721, "step": 22028 }, { "epoch": 7.338136087282418, "grad_norm": 0.80078125, "learning_rate": 2.6150134707092765e-06, "loss": 4.0044, "step": 22029 }, { "epoch": 7.338469226284667, "grad_norm": 0.7265625, "learning_rate": 2.6143993026831652e-06, "loss": 3.9505, "step": 22030 }, { "epoch": 7.338802365286916, "grad_norm": 0.7578125, "learning_rate": 2.613785191563719e-06, "loss": 3.9601, "step": 22031 }, { "epoch": 7.339135504289165, "grad_norm": 0.734375, "learning_rate": 2.6131711373580896e-06, "loss": 3.9657, "step": 22032 }, { "epoch": 7.339468643291413, "grad_norm": 0.765625, "learning_rate": 2.612557140073434e-06, "loss": 4.0381, "step": 22033 }, { "epoch": 7.339801782293662, "grad_norm": 0.7734375, "learning_rate": 2.611943199716902e-06, "loss": 4.0101, "step": 22034 }, { "epoch": 7.340134921295911, "grad_norm": 0.74609375, "learning_rate": 2.6113293162956436e-06, "loss": 3.9143, "step": 22035 }, { "epoch": 7.34046806029816, "grad_norm": 0.734375, "learning_rate": 2.610715489816808e-06, "loss": 3.9724, "step": 22036 }, { "epoch": 7.340801199300408, "grad_norm": 0.8125, "learning_rate": 2.6101017202875513e-06, "loss": 3.9687, "step": 22037 }, { "epoch": 7.341134338302656, "grad_norm": 0.75, "learning_rate": 2.6094880077150156e-06, "loss": 4.0109, "step": 22038 }, { "epoch": 7.341467477304906, "grad_norm": 0.7734375, "learning_rate": 2.6088743521063495e-06, "loss": 3.98, "step": 22039 }, { "epoch": 7.341800616307154, "grad_norm": 0.7734375, "learning_rate": 2.6082607534687056e-06, "loss": 3.9748, "step": 22040 }, { "epoch": 7.342133755309403, "grad_norm": 0.75, "learning_rate": 2.607647211809228e-06, "loss": 3.9867, "step": 22041 }, { "epoch": 7.342466894311651, "grad_norm": 0.74609375, "learning_rate": 2.6070337271350643e-06, "loss": 4.0052, "step": 22042 }, { "epoch": 7.3428000333139005, "grad_norm": 0.796875, "learning_rate": 2.6064202994533564e-06, "loss": 3.9716, "step": 22043 }, { "epoch": 7.343133172316149, "grad_norm": 0.80859375, "learning_rate": 2.605806928771255e-06, "loss": 4.0713, "step": 22044 }, { "epoch": 7.343466311318398, "grad_norm": 0.8046875, "learning_rate": 2.6051936150959022e-06, "loss": 3.9737, "step": 22045 }, { "epoch": 7.343799450320646, "grad_norm": 0.76171875, "learning_rate": 2.604580358434442e-06, "loss": 3.986, "step": 22046 }, { "epoch": 7.344132589322895, "grad_norm": 0.78515625, "learning_rate": 2.603967158794017e-06, "loss": 3.9537, "step": 22047 }, { "epoch": 7.344465728325144, "grad_norm": 0.75, "learning_rate": 2.6033540161817694e-06, "loss": 3.9778, "step": 22048 }, { "epoch": 7.344798867327392, "grad_norm": 0.75390625, "learning_rate": 2.6027409306048418e-06, "loss": 3.9904, "step": 22049 }, { "epoch": 7.345132006329641, "grad_norm": 0.7734375, "learning_rate": 2.602127902070372e-06, "loss": 3.9846, "step": 22050 }, { "epoch": 7.3454651453318895, "grad_norm": 0.78515625, "learning_rate": 2.601514930585506e-06, "loss": 3.9463, "step": 22051 }, { "epoch": 7.345798284334139, "grad_norm": 0.79296875, "learning_rate": 2.6009020161573813e-06, "loss": 3.9584, "step": 22052 }, { "epoch": 7.346131423336387, "grad_norm": 0.78125, "learning_rate": 2.6002891587931364e-06, "loss": 4.0071, "step": 22053 }, { "epoch": 7.346464562338636, "grad_norm": 0.828125, "learning_rate": 2.599676358499908e-06, "loss": 3.8796, "step": 22054 }, { "epoch": 7.3467977013408845, "grad_norm": 0.7890625, "learning_rate": 2.5990636152848403e-06, "loss": 4.0141, "step": 22055 }, { "epoch": 7.347130840343133, "grad_norm": 0.75, "learning_rate": 2.5984509291550637e-06, "loss": 3.9807, "step": 22056 }, { "epoch": 7.347463979345382, "grad_norm": 0.78515625, "learning_rate": 2.597838300117717e-06, "loss": 4.0093, "step": 22057 }, { "epoch": 7.34779711834763, "grad_norm": 0.8125, "learning_rate": 2.597225728179934e-06, "loss": 3.935, "step": 22058 }, { "epoch": 7.3481302573498795, "grad_norm": 0.734375, "learning_rate": 2.5966132133488537e-06, "loss": 4.0826, "step": 22059 }, { "epoch": 7.348463396352128, "grad_norm": 0.76171875, "learning_rate": 2.5960007556316097e-06, "loss": 3.9318, "step": 22060 }, { "epoch": 7.348796535354377, "grad_norm": 0.73046875, "learning_rate": 2.595388355035334e-06, "loss": 4.0517, "step": 22061 }, { "epoch": 7.349129674356625, "grad_norm": 0.734375, "learning_rate": 2.5947760115671585e-06, "loss": 3.9862, "step": 22062 }, { "epoch": 7.349462813358874, "grad_norm": 0.765625, "learning_rate": 2.59416372523422e-06, "loss": 3.9707, "step": 22063 }, { "epoch": 7.349795952361123, "grad_norm": 0.8046875, "learning_rate": 2.593551496043651e-06, "loss": 3.9928, "step": 22064 }, { "epoch": 7.350129091363371, "grad_norm": 0.8125, "learning_rate": 2.5929393240025747e-06, "loss": 3.9726, "step": 22065 }, { "epoch": 7.35046223036562, "grad_norm": 0.78515625, "learning_rate": 2.592327209118128e-06, "loss": 4.089, "step": 22066 }, { "epoch": 7.3507953693678685, "grad_norm": 0.77734375, "learning_rate": 2.5917151513974407e-06, "loss": 4.0093, "step": 22067 }, { "epoch": 7.351128508370118, "grad_norm": 0.8046875, "learning_rate": 2.5911031508476405e-06, "loss": 3.9896, "step": 22068 }, { "epoch": 7.351461647372366, "grad_norm": 0.75, "learning_rate": 2.5904912074758525e-06, "loss": 3.9702, "step": 22069 }, { "epoch": 7.351794786374615, "grad_norm": 0.74609375, "learning_rate": 2.589879321289211e-06, "loss": 3.9087, "step": 22070 }, { "epoch": 7.352127925376863, "grad_norm": 0.7421875, "learning_rate": 2.5892674922948396e-06, "loss": 3.9944, "step": 22071 }, { "epoch": 7.352461064379112, "grad_norm": 0.7109375, "learning_rate": 2.588655720499866e-06, "loss": 4.0648, "step": 22072 }, { "epoch": 7.352794203381361, "grad_norm": 0.78125, "learning_rate": 2.5880440059114145e-06, "loss": 3.9156, "step": 22073 }, { "epoch": 7.353127342383609, "grad_norm": 0.77734375, "learning_rate": 2.587432348536612e-06, "loss": 3.9659, "step": 22074 }, { "epoch": 7.353460481385858, "grad_norm": 0.79296875, "learning_rate": 2.5868207483825813e-06, "loss": 3.919, "step": 22075 }, { "epoch": 7.353793620388107, "grad_norm": 0.7578125, "learning_rate": 2.586209205456445e-06, "loss": 3.9682, "step": 22076 }, { "epoch": 7.354126759390356, "grad_norm": 0.76953125, "learning_rate": 2.5855977197653304e-06, "loss": 3.9369, "step": 22077 }, { "epoch": 7.354459898392604, "grad_norm": 0.7421875, "learning_rate": 2.5849862913163577e-06, "loss": 3.9938, "step": 22078 }, { "epoch": 7.354793037394853, "grad_norm": 0.76171875, "learning_rate": 2.5843749201166494e-06, "loss": 4.0131, "step": 22079 }, { "epoch": 7.355126176397102, "grad_norm": 0.74609375, "learning_rate": 2.5837636061733233e-06, "loss": 3.9675, "step": 22080 }, { "epoch": 7.355459315399351, "grad_norm": 0.7265625, "learning_rate": 2.5831523494935046e-06, "loss": 3.9863, "step": 22081 }, { "epoch": 7.355792454401599, "grad_norm": 0.76953125, "learning_rate": 2.582541150084314e-06, "loss": 4.0256, "step": 22082 }, { "epoch": 7.356125593403847, "grad_norm": 0.8125, "learning_rate": 2.5819300079528624e-06, "loss": 4.0238, "step": 22083 }, { "epoch": 7.356458732406097, "grad_norm": 0.7265625, "learning_rate": 2.581318923106277e-06, "loss": 4.0213, "step": 22084 }, { "epoch": 7.356791871408345, "grad_norm": 0.80859375, "learning_rate": 2.5807078955516718e-06, "loss": 3.9858, "step": 22085 }, { "epoch": 7.357125010410594, "grad_norm": 0.7421875, "learning_rate": 2.580096925296165e-06, "loss": 3.9747, "step": 22086 }, { "epoch": 7.357458149412842, "grad_norm": 0.7890625, "learning_rate": 2.579486012346869e-06, "loss": 3.9441, "step": 22087 }, { "epoch": 7.357791288415092, "grad_norm": 0.74609375, "learning_rate": 2.578875156710906e-06, "loss": 4.0794, "step": 22088 }, { "epoch": 7.35812442741734, "grad_norm": 0.76171875, "learning_rate": 2.5782643583953887e-06, "loss": 4.0269, "step": 22089 }, { "epoch": 7.358457566419588, "grad_norm": 0.79296875, "learning_rate": 2.5776536174074314e-06, "loss": 3.983, "step": 22090 }, { "epoch": 7.358790705421837, "grad_norm": 0.734375, "learning_rate": 2.577042933754147e-06, "loss": 3.9537, "step": 22091 }, { "epoch": 7.359123844424086, "grad_norm": 0.76171875, "learning_rate": 2.5764323074426493e-06, "loss": 4.0683, "step": 22092 }, { "epoch": 7.359456983426335, "grad_norm": 0.75, "learning_rate": 2.5758217384800507e-06, "loss": 3.9648, "step": 22093 }, { "epoch": 7.359790122428583, "grad_norm": 0.75, "learning_rate": 2.575211226873461e-06, "loss": 3.8668, "step": 22094 }, { "epoch": 7.360123261430832, "grad_norm": 0.76171875, "learning_rate": 2.574600772629996e-06, "loss": 3.9685, "step": 22095 }, { "epoch": 7.360456400433081, "grad_norm": 0.75390625, "learning_rate": 2.573990375756764e-06, "loss": 3.9688, "step": 22096 }, { "epoch": 7.36078953943533, "grad_norm": 0.7734375, "learning_rate": 2.5733800362608732e-06, "loss": 3.9715, "step": 22097 }, { "epoch": 7.361122678437578, "grad_norm": 0.78515625, "learning_rate": 2.5727697541494326e-06, "loss": 3.9952, "step": 22098 }, { "epoch": 7.361455817439826, "grad_norm": 0.7421875, "learning_rate": 2.572159529429554e-06, "loss": 4.0004, "step": 22099 }, { "epoch": 7.3617889564420755, "grad_norm": 0.80859375, "learning_rate": 2.5715493621083457e-06, "loss": 3.9147, "step": 22100 }, { "epoch": 7.362122095444324, "grad_norm": 0.75390625, "learning_rate": 2.5709392521929095e-06, "loss": 3.9828, "step": 22101 }, { "epoch": 7.362455234446573, "grad_norm": 0.7734375, "learning_rate": 2.570329199690353e-06, "loss": 4.0156, "step": 22102 }, { "epoch": 7.362788373448821, "grad_norm": 0.75390625, "learning_rate": 2.5697192046077855e-06, "loss": 3.9888, "step": 22103 }, { "epoch": 7.3631215124510705, "grad_norm": 0.75390625, "learning_rate": 2.56910926695231e-06, "loss": 4.0617, "step": 22104 }, { "epoch": 7.363454651453319, "grad_norm": 0.7734375, "learning_rate": 2.5684993867310324e-06, "loss": 3.9694, "step": 22105 }, { "epoch": 7.363787790455568, "grad_norm": 0.78515625, "learning_rate": 2.5678895639510515e-06, "loss": 3.9316, "step": 22106 }, { "epoch": 7.364120929457816, "grad_norm": 0.78515625, "learning_rate": 2.5672797986194773e-06, "loss": 3.9323, "step": 22107 }, { "epoch": 7.364454068460065, "grad_norm": 0.73046875, "learning_rate": 2.5666700907434095e-06, "loss": 3.9902, "step": 22108 }, { "epoch": 7.364787207462314, "grad_norm": 0.765625, "learning_rate": 2.566060440329949e-06, "loss": 3.9333, "step": 22109 }, { "epoch": 7.365120346464562, "grad_norm": 0.7578125, "learning_rate": 2.5654508473861983e-06, "loss": 4.0492, "step": 22110 }, { "epoch": 7.365453485466811, "grad_norm": 0.78515625, "learning_rate": 2.5648413119192565e-06, "loss": 4.0482, "step": 22111 }, { "epoch": 7.3657866244690595, "grad_norm": 0.77734375, "learning_rate": 2.5642318339362244e-06, "loss": 4.0405, "step": 22112 }, { "epoch": 7.366119763471309, "grad_norm": 0.77734375, "learning_rate": 2.563622413444197e-06, "loss": 4.0217, "step": 22113 }, { "epoch": 7.366452902473557, "grad_norm": 0.76953125, "learning_rate": 2.5630130504502797e-06, "loss": 3.9495, "step": 22114 }, { "epoch": 7.366786041475806, "grad_norm": 0.77734375, "learning_rate": 2.562403744961566e-06, "loss": 3.8845, "step": 22115 }, { "epoch": 7.3671191804780545, "grad_norm": 0.78125, "learning_rate": 2.561794496985155e-06, "loss": 4.034, "step": 22116 }, { "epoch": 7.367452319480303, "grad_norm": 0.78125, "learning_rate": 2.561185306528138e-06, "loss": 3.9569, "step": 22117 }, { "epoch": 7.367785458482552, "grad_norm": 0.7421875, "learning_rate": 2.5605761735976215e-06, "loss": 3.9495, "step": 22118 }, { "epoch": 7.3681185974848, "grad_norm": 0.79296875, "learning_rate": 2.5599670982006904e-06, "loss": 3.997, "step": 22119 }, { "epoch": 7.368451736487049, "grad_norm": 0.78515625, "learning_rate": 2.5593580803444396e-06, "loss": 3.99, "step": 22120 }, { "epoch": 7.368784875489298, "grad_norm": 0.80078125, "learning_rate": 2.558749120035969e-06, "loss": 3.9635, "step": 22121 }, { "epoch": 7.369118014491547, "grad_norm": 0.78515625, "learning_rate": 2.558140217282368e-06, "loss": 3.9751, "step": 22122 }, { "epoch": 7.369451153493795, "grad_norm": 0.7734375, "learning_rate": 2.557531372090729e-06, "loss": 3.9216, "step": 22123 }, { "epoch": 7.369784292496044, "grad_norm": 0.72265625, "learning_rate": 2.556922584468142e-06, "loss": 3.9668, "step": 22124 }, { "epoch": 7.370117431498293, "grad_norm": 0.7890625, "learning_rate": 2.5563138544217035e-06, "loss": 3.912, "step": 22125 }, { "epoch": 7.370450570500541, "grad_norm": 0.796875, "learning_rate": 2.5557051819585004e-06, "loss": 4.0112, "step": 22126 }, { "epoch": 7.37078370950279, "grad_norm": 0.7734375, "learning_rate": 2.555096567085622e-06, "loss": 4.032, "step": 22127 }, { "epoch": 7.3711168485050385, "grad_norm": 0.78125, "learning_rate": 2.5544880098101577e-06, "loss": 3.9736, "step": 22128 }, { "epoch": 7.371449987507288, "grad_norm": 0.78515625, "learning_rate": 2.5538795101391965e-06, "loss": 3.9967, "step": 22129 }, { "epoch": 7.371783126509536, "grad_norm": 0.81640625, "learning_rate": 2.553271068079826e-06, "loss": 3.9604, "step": 22130 }, { "epoch": 7.372116265511785, "grad_norm": 0.796875, "learning_rate": 2.5526626836391304e-06, "loss": 3.9254, "step": 22131 }, { "epoch": 7.372449404514033, "grad_norm": 0.75390625, "learning_rate": 2.5520543568242004e-06, "loss": 3.9395, "step": 22132 }, { "epoch": 7.372782543516282, "grad_norm": 0.7734375, "learning_rate": 2.5514460876421207e-06, "loss": 3.9061, "step": 22133 }, { "epoch": 7.373115682518531, "grad_norm": 0.734375, "learning_rate": 2.5508378760999757e-06, "loss": 4.0069, "step": 22134 }, { "epoch": 7.373448821520779, "grad_norm": 0.78125, "learning_rate": 2.5502297222048466e-06, "loss": 3.939, "step": 22135 }, { "epoch": 7.373781960523028, "grad_norm": 0.7578125, "learning_rate": 2.5496216259638253e-06, "loss": 3.9921, "step": 22136 }, { "epoch": 7.374115099525277, "grad_norm": 0.7734375, "learning_rate": 2.549013587383986e-06, "loss": 4.0426, "step": 22137 }, { "epoch": 7.374448238527526, "grad_norm": 0.796875, "learning_rate": 2.548405606472413e-06, "loss": 4.0192, "step": 22138 }, { "epoch": 7.374781377529774, "grad_norm": 0.78515625, "learning_rate": 2.5477976832361928e-06, "loss": 4.0436, "step": 22139 }, { "epoch": 7.375114516532023, "grad_norm": 0.8046875, "learning_rate": 2.5471898176824025e-06, "loss": 3.9746, "step": 22140 }, { "epoch": 7.375447655534272, "grad_norm": 0.76953125, "learning_rate": 2.5465820098181226e-06, "loss": 3.9989, "step": 22141 }, { "epoch": 7.375780794536521, "grad_norm": 0.79296875, "learning_rate": 2.545974259650431e-06, "loss": 4.0007, "step": 22142 }, { "epoch": 7.376113933538769, "grad_norm": 0.78515625, "learning_rate": 2.5453665671864114e-06, "loss": 3.9335, "step": 22143 }, { "epoch": 7.376447072541017, "grad_norm": 0.7734375, "learning_rate": 2.5447589324331395e-06, "loss": 4.0598, "step": 22144 }, { "epoch": 7.376780211543267, "grad_norm": 0.7578125, "learning_rate": 2.5441513553976955e-06, "loss": 4.015, "step": 22145 }, { "epoch": 7.377113350545515, "grad_norm": 0.79296875, "learning_rate": 2.543543836087149e-06, "loss": 3.9284, "step": 22146 }, { "epoch": 7.377446489547764, "grad_norm": 0.80078125, "learning_rate": 2.5429363745085834e-06, "loss": 3.9845, "step": 22147 }, { "epoch": 7.377779628550012, "grad_norm": 0.75, "learning_rate": 2.5423289706690722e-06, "loss": 3.9582, "step": 22148 }, { "epoch": 7.3781127675522615, "grad_norm": 0.78125, "learning_rate": 2.5417216245756904e-06, "loss": 3.9795, "step": 22149 }, { "epoch": 7.37844590655451, "grad_norm": 0.74609375, "learning_rate": 2.5411143362355095e-06, "loss": 3.9823, "step": 22150 }, { "epoch": 7.378779045556758, "grad_norm": 0.76171875, "learning_rate": 2.540507105655608e-06, "loss": 4.0761, "step": 22151 }, { "epoch": 7.379112184559007, "grad_norm": 0.80859375, "learning_rate": 2.539899932843056e-06, "loss": 4.0227, "step": 22152 }, { "epoch": 7.379445323561256, "grad_norm": 0.7578125, "learning_rate": 2.539292817804927e-06, "loss": 3.9073, "step": 22153 }, { "epoch": 7.379778462563505, "grad_norm": 0.765625, "learning_rate": 2.538685760548291e-06, "loss": 4.0019, "step": 22154 }, { "epoch": 7.380111601565753, "grad_norm": 0.79296875, "learning_rate": 2.5380787610802206e-06, "loss": 3.9795, "step": 22155 }, { "epoch": 7.380444740568002, "grad_norm": 0.76953125, "learning_rate": 2.5374718194077847e-06, "loss": 3.8819, "step": 22156 }, { "epoch": 7.3807778795702506, "grad_norm": 0.76953125, "learning_rate": 2.5368649355380508e-06, "loss": 4.0267, "step": 22157 }, { "epoch": 7.3811110185725, "grad_norm": 0.76171875, "learning_rate": 2.536258109478093e-06, "loss": 3.8862, "step": 22158 }, { "epoch": 7.381444157574748, "grad_norm": 0.78125, "learning_rate": 2.5356513412349764e-06, "loss": 3.8979, "step": 22159 }, { "epoch": 7.381777296576997, "grad_norm": 0.76171875, "learning_rate": 2.5350446308157696e-06, "loss": 3.9395, "step": 22160 }, { "epoch": 7.3821104355792455, "grad_norm": 0.6953125, "learning_rate": 2.5344379782275352e-06, "loss": 4.0034, "step": 22161 }, { "epoch": 7.382443574581494, "grad_norm": 0.76953125, "learning_rate": 2.5338313834773465e-06, "loss": 3.9742, "step": 22162 }, { "epoch": 7.382776713583743, "grad_norm": 0.74609375, "learning_rate": 2.5332248465722676e-06, "loss": 3.9296, "step": 22163 }, { "epoch": 7.383109852585991, "grad_norm": 0.75, "learning_rate": 2.532618367519356e-06, "loss": 4.044, "step": 22164 }, { "epoch": 7.3834429915882405, "grad_norm": 0.7890625, "learning_rate": 2.5320119463256837e-06, "loss": 3.9894, "step": 22165 }, { "epoch": 7.383776130590489, "grad_norm": 0.75390625, "learning_rate": 2.5314055829983115e-06, "loss": 3.9753, "step": 22166 }, { "epoch": 7.384109269592738, "grad_norm": 0.73828125, "learning_rate": 2.5307992775443027e-06, "loss": 3.9559, "step": 22167 }, { "epoch": 7.384442408594986, "grad_norm": 0.7890625, "learning_rate": 2.5301930299707162e-06, "loss": 3.9695, "step": 22168 }, { "epoch": 7.3847755475972345, "grad_norm": 0.796875, "learning_rate": 2.5295868402846187e-06, "loss": 3.9895, "step": 22169 }, { "epoch": 7.385108686599484, "grad_norm": 0.8046875, "learning_rate": 2.5289807084930686e-06, "loss": 3.9488, "step": 22170 }, { "epoch": 7.385441825601732, "grad_norm": 0.7734375, "learning_rate": 2.5283746346031263e-06, "loss": 3.9418, "step": 22171 }, { "epoch": 7.385774964603981, "grad_norm": 0.74609375, "learning_rate": 2.5277686186218506e-06, "loss": 4.0175, "step": 22172 }, { "epoch": 7.3861081036062295, "grad_norm": 0.765625, "learning_rate": 2.527162660556302e-06, "loss": 3.9177, "step": 22173 }, { "epoch": 7.386441242608479, "grad_norm": 0.78125, "learning_rate": 2.526556760413536e-06, "loss": 4.0139, "step": 22174 }, { "epoch": 7.386774381610727, "grad_norm": 0.7734375, "learning_rate": 2.5259509182006084e-06, "loss": 3.988, "step": 22175 }, { "epoch": 7.387107520612976, "grad_norm": 0.75, "learning_rate": 2.525345133924582e-06, "loss": 3.9668, "step": 22176 }, { "epoch": 7.3874406596152244, "grad_norm": 0.76171875, "learning_rate": 2.5247394075925098e-06, "loss": 3.9766, "step": 22177 }, { "epoch": 7.387773798617473, "grad_norm": 0.71484375, "learning_rate": 2.524133739211447e-06, "loss": 3.8915, "step": 22178 }, { "epoch": 7.388106937619722, "grad_norm": 0.78125, "learning_rate": 2.5235281287884454e-06, "loss": 4.0464, "step": 22179 }, { "epoch": 7.38844007662197, "grad_norm": 0.74609375, "learning_rate": 2.5229225763305654e-06, "loss": 3.8842, "step": 22180 }, { "epoch": 7.388773215624219, "grad_norm": 0.765625, "learning_rate": 2.5223170818448595e-06, "loss": 4.0175, "step": 22181 }, { "epoch": 7.389106354626468, "grad_norm": 0.75, "learning_rate": 2.521711645338372e-06, "loss": 3.964, "step": 22182 }, { "epoch": 7.389439493628717, "grad_norm": 0.73046875, "learning_rate": 2.5211062668181647e-06, "loss": 4.0234, "step": 22183 }, { "epoch": 7.389772632630965, "grad_norm": 0.77734375, "learning_rate": 2.520500946291284e-06, "loss": 3.9618, "step": 22184 }, { "epoch": 7.390105771633214, "grad_norm": 0.76953125, "learning_rate": 2.5198956837647826e-06, "loss": 3.934, "step": 22185 }, { "epoch": 7.390438910635463, "grad_norm": 0.75390625, "learning_rate": 2.519290479245706e-06, "loss": 3.957, "step": 22186 }, { "epoch": 7.390772049637711, "grad_norm": 0.79296875, "learning_rate": 2.5186853327411105e-06, "loss": 3.92, "step": 22187 }, { "epoch": 7.39110518863996, "grad_norm": 0.7578125, "learning_rate": 2.518080244258042e-06, "loss": 3.9378, "step": 22188 }, { "epoch": 7.391438327642208, "grad_norm": 0.79296875, "learning_rate": 2.5174752138035465e-06, "loss": 3.9853, "step": 22189 }, { "epoch": 7.391771466644458, "grad_norm": 0.76953125, "learning_rate": 2.5168702413846723e-06, "loss": 3.9592, "step": 22190 }, { "epoch": 7.392104605646706, "grad_norm": 0.7578125, "learning_rate": 2.5162653270084674e-06, "loss": 3.994, "step": 22191 }, { "epoch": 7.392437744648955, "grad_norm": 0.78125, "learning_rate": 2.515660470681977e-06, "loss": 4.0024, "step": 22192 }, { "epoch": 7.392770883651203, "grad_norm": 0.73828125, "learning_rate": 2.5150556724122454e-06, "loss": 4.0038, "step": 22193 }, { "epoch": 7.393104022653453, "grad_norm": 0.77734375, "learning_rate": 2.5144509322063157e-06, "loss": 3.9963, "step": 22194 }, { "epoch": 7.393437161655701, "grad_norm": 0.79296875, "learning_rate": 2.513846250071236e-06, "loss": 3.9333, "step": 22195 }, { "epoch": 7.393770300657949, "grad_norm": 0.78515625, "learning_rate": 2.5132416260140476e-06, "loss": 3.9471, "step": 22196 }, { "epoch": 7.394103439660198, "grad_norm": 0.7265625, "learning_rate": 2.5126370600417927e-06, "loss": 3.9701, "step": 22197 }, { "epoch": 7.394436578662447, "grad_norm": 0.74609375, "learning_rate": 2.512032552161511e-06, "loss": 3.9793, "step": 22198 }, { "epoch": 7.394769717664696, "grad_norm": 0.75390625, "learning_rate": 2.511428102380252e-06, "loss": 3.9948, "step": 22199 }, { "epoch": 7.395102856666944, "grad_norm": 0.80078125, "learning_rate": 2.5108237107050473e-06, "loss": 3.9604, "step": 22200 }, { "epoch": 7.395435995669193, "grad_norm": 0.78125, "learning_rate": 2.5102193771429372e-06, "loss": 3.9413, "step": 22201 }, { "epoch": 7.395769134671442, "grad_norm": 0.80078125, "learning_rate": 2.5096151017009665e-06, "loss": 4.0434, "step": 22202 }, { "epoch": 7.396102273673691, "grad_norm": 0.79296875, "learning_rate": 2.50901088438617e-06, "loss": 3.9837, "step": 22203 }, { "epoch": 7.396435412675939, "grad_norm": 0.7890625, "learning_rate": 2.508406725205587e-06, "loss": 3.9849, "step": 22204 }, { "epoch": 7.396768551678187, "grad_norm": 0.75390625, "learning_rate": 2.5078026241662507e-06, "loss": 4.0167, "step": 22205 }, { "epoch": 7.3971016906804365, "grad_norm": 0.76953125, "learning_rate": 2.5071985812752033e-06, "loss": 4.0078, "step": 22206 }, { "epoch": 7.397434829682685, "grad_norm": 0.76953125, "learning_rate": 2.506594596539479e-06, "loss": 4.0131, "step": 22207 }, { "epoch": 7.397767968684934, "grad_norm": 0.7421875, "learning_rate": 2.5059906699661107e-06, "loss": 3.9732, "step": 22208 }, { "epoch": 7.398101107687182, "grad_norm": 0.79296875, "learning_rate": 2.5053868015621347e-06, "loss": 3.9404, "step": 22209 }, { "epoch": 7.3984342466894315, "grad_norm": 0.75390625, "learning_rate": 2.504782991334584e-06, "loss": 3.9393, "step": 22210 }, { "epoch": 7.39876738569168, "grad_norm": 0.75, "learning_rate": 2.5041792392904915e-06, "loss": 3.9939, "step": 22211 }, { "epoch": 7.399100524693928, "grad_norm": 0.76953125, "learning_rate": 2.503575545436888e-06, "loss": 3.9519, "step": 22212 }, { "epoch": 7.399433663696177, "grad_norm": 0.80078125, "learning_rate": 2.5029719097808094e-06, "loss": 3.9355, "step": 22213 }, { "epoch": 7.399766802698426, "grad_norm": 0.8125, "learning_rate": 2.5023683323292843e-06, "loss": 4.0274, "step": 22214 }, { "epoch": 7.400099941700675, "grad_norm": 0.76953125, "learning_rate": 2.501764813089344e-06, "loss": 3.9913, "step": 22215 }, { "epoch": 7.400433080702923, "grad_norm": 0.72265625, "learning_rate": 2.5011613520680145e-06, "loss": 3.9562, "step": 22216 }, { "epoch": 7.400766219705172, "grad_norm": 0.81640625, "learning_rate": 2.5005579492723326e-06, "loss": 3.9607, "step": 22217 }, { "epoch": 7.4010993587074205, "grad_norm": 0.76171875, "learning_rate": 2.4999546047093196e-06, "loss": 3.9385, "step": 22218 }, { "epoch": 7.40143249770967, "grad_norm": 0.76953125, "learning_rate": 2.499351318386003e-06, "loss": 4.0048, "step": 22219 }, { "epoch": 7.401765636711918, "grad_norm": 0.77734375, "learning_rate": 2.498748090309414e-06, "loss": 3.9769, "step": 22220 }, { "epoch": 7.402098775714167, "grad_norm": 0.7890625, "learning_rate": 2.4981449204865774e-06, "loss": 4.0415, "step": 22221 }, { "epoch": 7.4024319147164155, "grad_norm": 0.78515625, "learning_rate": 2.497541808924519e-06, "loss": 4.0112, "step": 22222 }, { "epoch": 7.402765053718664, "grad_norm": 0.78515625, "learning_rate": 2.49693875563026e-06, "loss": 3.9619, "step": 22223 }, { "epoch": 7.403098192720913, "grad_norm": 0.75390625, "learning_rate": 2.4963357606108313e-06, "loss": 3.9473, "step": 22224 }, { "epoch": 7.403431331723161, "grad_norm": 0.765625, "learning_rate": 2.495732823873255e-06, "loss": 3.9971, "step": 22225 }, { "epoch": 7.40376447072541, "grad_norm": 0.80859375, "learning_rate": 2.4951299454245466e-06, "loss": 4.001, "step": 22226 }, { "epoch": 7.404097609727659, "grad_norm": 0.73828125, "learning_rate": 2.4945271252717363e-06, "loss": 3.9112, "step": 22227 }, { "epoch": 7.404430748729908, "grad_norm": 0.80078125, "learning_rate": 2.4939243634218435e-06, "loss": 4.0226, "step": 22228 }, { "epoch": 7.404763887732156, "grad_norm": 0.75, "learning_rate": 2.4933216598818894e-06, "loss": 3.9065, "step": 22229 }, { "epoch": 7.4050970267344045, "grad_norm": 0.734375, "learning_rate": 2.49271901465889e-06, "loss": 3.9993, "step": 22230 }, { "epoch": 7.405430165736654, "grad_norm": 0.765625, "learning_rate": 2.4921164277598718e-06, "loss": 4.0204, "step": 22231 }, { "epoch": 7.405763304738902, "grad_norm": 0.76171875, "learning_rate": 2.4915138991918495e-06, "loss": 4.0473, "step": 22232 }, { "epoch": 7.406096443741151, "grad_norm": 0.76171875, "learning_rate": 2.4909114289618416e-06, "loss": 3.99, "step": 22233 }, { "epoch": 7.4064295827433995, "grad_norm": 0.77734375, "learning_rate": 2.490309017076867e-06, "loss": 4.0508, "step": 22234 }, { "epoch": 7.406762721745649, "grad_norm": 0.734375, "learning_rate": 2.48970666354394e-06, "loss": 3.9275, "step": 22235 }, { "epoch": 7.407095860747897, "grad_norm": 0.76171875, "learning_rate": 2.4891043683700786e-06, "loss": 3.994, "step": 22236 }, { "epoch": 7.407428999750146, "grad_norm": 0.7578125, "learning_rate": 2.4885021315622984e-06, "loss": 3.9367, "step": 22237 }, { "epoch": 7.407762138752394, "grad_norm": 0.75390625, "learning_rate": 2.4878999531276103e-06, "loss": 3.9392, "step": 22238 }, { "epoch": 7.408095277754643, "grad_norm": 0.75, "learning_rate": 2.487297833073034e-06, "loss": 4.029, "step": 22239 }, { "epoch": 7.408428416756892, "grad_norm": 0.78515625, "learning_rate": 2.486695771405581e-06, "loss": 3.9602, "step": 22240 }, { "epoch": 7.40876155575914, "grad_norm": 0.7421875, "learning_rate": 2.486093768132263e-06, "loss": 3.9938, "step": 22241 }, { "epoch": 7.409094694761389, "grad_norm": 0.85546875, "learning_rate": 2.48549182326009e-06, "loss": 3.9576, "step": 22242 }, { "epoch": 7.409427833763638, "grad_norm": 0.8046875, "learning_rate": 2.484889936796081e-06, "loss": 3.9606, "step": 22243 }, { "epoch": 7.409760972765887, "grad_norm": 0.83203125, "learning_rate": 2.484288108747239e-06, "loss": 3.9695, "step": 22244 }, { "epoch": 7.410094111768135, "grad_norm": 0.76953125, "learning_rate": 2.483686339120574e-06, "loss": 4.0493, "step": 22245 }, { "epoch": 7.410427250770384, "grad_norm": 0.78125, "learning_rate": 2.4830846279231006e-06, "loss": 4.0541, "step": 22246 }, { "epoch": 7.410760389772633, "grad_norm": 0.79296875, "learning_rate": 2.4824829751618245e-06, "loss": 3.9797, "step": 22247 }, { "epoch": 7.411093528774881, "grad_norm": 0.7734375, "learning_rate": 2.481881380843754e-06, "loss": 3.977, "step": 22248 }, { "epoch": 7.41142666777713, "grad_norm": 0.80859375, "learning_rate": 2.481279844975893e-06, "loss": 3.9268, "step": 22249 }, { "epoch": 7.411759806779378, "grad_norm": 0.77734375, "learning_rate": 2.4806783675652545e-06, "loss": 3.9712, "step": 22250 }, { "epoch": 7.412092945781628, "grad_norm": 0.7890625, "learning_rate": 2.480076948618841e-06, "loss": 3.9895, "step": 22251 }, { "epoch": 7.412426084783876, "grad_norm": 0.734375, "learning_rate": 2.4794755881436577e-06, "loss": 3.9528, "step": 22252 }, { "epoch": 7.412759223786125, "grad_norm": 0.7890625, "learning_rate": 2.4788742861467097e-06, "loss": 3.936, "step": 22253 }, { "epoch": 7.413092362788373, "grad_norm": 0.82421875, "learning_rate": 2.478273042635e-06, "loss": 3.9668, "step": 22254 }, { "epoch": 7.4134255017906225, "grad_norm": 0.76171875, "learning_rate": 2.477671857615533e-06, "loss": 3.9593, "step": 22255 }, { "epoch": 7.413758640792871, "grad_norm": 0.78515625, "learning_rate": 2.477070731095308e-06, "loss": 4.0206, "step": 22256 }, { "epoch": 7.414091779795119, "grad_norm": 0.75390625, "learning_rate": 2.476469663081331e-06, "loss": 4.003, "step": 22257 }, { "epoch": 7.414424918797368, "grad_norm": 0.75390625, "learning_rate": 2.475868653580602e-06, "loss": 4.0421, "step": 22258 }, { "epoch": 7.414758057799617, "grad_norm": 0.79296875, "learning_rate": 2.4752677026001205e-06, "loss": 3.9884, "step": 22259 }, { "epoch": 7.415091196801866, "grad_norm": 0.75, "learning_rate": 2.4746668101468843e-06, "loss": 3.9967, "step": 22260 }, { "epoch": 7.415424335804114, "grad_norm": 0.76171875, "learning_rate": 2.474065976227901e-06, "loss": 4.0024, "step": 22261 }, { "epoch": 7.415757474806363, "grad_norm": 0.734375, "learning_rate": 2.473465200850159e-06, "loss": 4.0329, "step": 22262 }, { "epoch": 7.416090613808612, "grad_norm": 0.76953125, "learning_rate": 2.4728644840206587e-06, "loss": 3.9698, "step": 22263 }, { "epoch": 7.416423752810861, "grad_norm": 0.80078125, "learning_rate": 2.4722638257464003e-06, "loss": 3.9284, "step": 22264 }, { "epoch": 7.416756891813109, "grad_norm": 0.73828125, "learning_rate": 2.471663226034378e-06, "loss": 3.9396, "step": 22265 }, { "epoch": 7.417090030815357, "grad_norm": 0.7265625, "learning_rate": 2.471062684891588e-06, "loss": 4.0284, "step": 22266 }, { "epoch": 7.4174231698176065, "grad_norm": 0.765625, "learning_rate": 2.470462202325023e-06, "loss": 4.0299, "step": 22267 }, { "epoch": 7.417756308819855, "grad_norm": 0.77734375, "learning_rate": 2.469861778341681e-06, "loss": 4.024, "step": 22268 }, { "epoch": 7.418089447822104, "grad_norm": 0.73046875, "learning_rate": 2.469261412948554e-06, "loss": 4.0271, "step": 22269 }, { "epoch": 7.418422586824352, "grad_norm": 0.78515625, "learning_rate": 2.4686611061526357e-06, "loss": 4.024, "step": 22270 }, { "epoch": 7.4187557258266015, "grad_norm": 0.8203125, "learning_rate": 2.4680608579609177e-06, "loss": 4.0094, "step": 22271 }, { "epoch": 7.41908886482885, "grad_norm": 0.765625, "learning_rate": 2.467460668380391e-06, "loss": 4.0092, "step": 22272 }, { "epoch": 7.419422003831098, "grad_norm": 0.75390625, "learning_rate": 2.466860537418048e-06, "loss": 3.9821, "step": 22273 }, { "epoch": 7.419755142833347, "grad_norm": 0.74609375, "learning_rate": 2.4662604650808743e-06, "loss": 4.0006, "step": 22274 }, { "epoch": 7.4200882818355955, "grad_norm": 0.76171875, "learning_rate": 2.4656604513758655e-06, "loss": 3.9128, "step": 22275 }, { "epoch": 7.420421420837845, "grad_norm": 0.734375, "learning_rate": 2.465060496310009e-06, "loss": 4.0087, "step": 22276 }, { "epoch": 7.420754559840093, "grad_norm": 0.75390625, "learning_rate": 2.464460599890292e-06, "loss": 4.0362, "step": 22277 }, { "epoch": 7.421087698842342, "grad_norm": 0.7734375, "learning_rate": 2.463860762123699e-06, "loss": 4.0117, "step": 22278 }, { "epoch": 7.4214208378445905, "grad_norm": 0.796875, "learning_rate": 2.463260983017225e-06, "loss": 4.0307, "step": 22279 }, { "epoch": 7.42175397684684, "grad_norm": 0.8359375, "learning_rate": 2.4626612625778493e-06, "loss": 3.9273, "step": 22280 }, { "epoch": 7.422087115849088, "grad_norm": 0.765625, "learning_rate": 2.462061600812558e-06, "loss": 4.0238, "step": 22281 }, { "epoch": 7.422420254851337, "grad_norm": 0.77734375, "learning_rate": 2.461461997728335e-06, "loss": 4.0156, "step": 22282 }, { "epoch": 7.4227533938535855, "grad_norm": 0.77734375, "learning_rate": 2.4608624533321683e-06, "loss": 4.0184, "step": 22283 }, { "epoch": 7.423086532855834, "grad_norm": 0.734375, "learning_rate": 2.4602629676310395e-06, "loss": 4.0161, "step": 22284 }, { "epoch": 7.423419671858083, "grad_norm": 0.7734375, "learning_rate": 2.4596635406319317e-06, "loss": 3.9287, "step": 22285 }, { "epoch": 7.423752810860331, "grad_norm": 0.74609375, "learning_rate": 2.4590641723418226e-06, "loss": 4.0001, "step": 22286 }, { "epoch": 7.42408594986258, "grad_norm": 0.80078125, "learning_rate": 2.4584648627677003e-06, "loss": 4.0233, "step": 22287 }, { "epoch": 7.424419088864829, "grad_norm": 0.7421875, "learning_rate": 2.4578656119165452e-06, "loss": 4.02, "step": 22288 }, { "epoch": 7.424752227867078, "grad_norm": 0.82421875, "learning_rate": 2.4572664197953292e-06, "loss": 3.9345, "step": 22289 }, { "epoch": 7.425085366869326, "grad_norm": 0.75, "learning_rate": 2.4566672864110387e-06, "loss": 3.9575, "step": 22290 }, { "epoch": 7.4254185058715745, "grad_norm": 0.7421875, "learning_rate": 2.4560682117706514e-06, "loss": 3.986, "step": 22291 }, { "epoch": 7.425751644873824, "grad_norm": 0.79296875, "learning_rate": 2.4554691958811436e-06, "loss": 3.9815, "step": 22292 }, { "epoch": 7.426084783876072, "grad_norm": 0.83203125, "learning_rate": 2.4548702387494914e-06, "loss": 3.9479, "step": 22293 }, { "epoch": 7.426417922878321, "grad_norm": 0.7890625, "learning_rate": 2.4542713403826757e-06, "loss": 3.928, "step": 22294 }, { "epoch": 7.426751061880569, "grad_norm": 0.76953125, "learning_rate": 2.4536725007876696e-06, "loss": 3.9916, "step": 22295 }, { "epoch": 7.427084200882819, "grad_norm": 0.77734375, "learning_rate": 2.453073719971449e-06, "loss": 3.9285, "step": 22296 }, { "epoch": 7.427417339885067, "grad_norm": 0.80078125, "learning_rate": 2.4524749979409868e-06, "loss": 3.8847, "step": 22297 }, { "epoch": 7.427750478887316, "grad_norm": 0.8046875, "learning_rate": 2.4518763347032585e-06, "loss": 3.9917, "step": 22298 }, { "epoch": 7.428083617889564, "grad_norm": 0.7734375, "learning_rate": 2.4512777302652374e-06, "loss": 4.0661, "step": 22299 }, { "epoch": 7.428416756891814, "grad_norm": 0.765625, "learning_rate": 2.4506791846338923e-06, "loss": 3.9335, "step": 22300 }, { "epoch": 7.428749895894062, "grad_norm": 0.80078125, "learning_rate": 2.4500806978162003e-06, "loss": 3.9907, "step": 22301 }, { "epoch": 7.42908303489631, "grad_norm": 0.77734375, "learning_rate": 2.44948226981913e-06, "loss": 3.9154, "step": 22302 }, { "epoch": 7.429416173898559, "grad_norm": 0.75390625, "learning_rate": 2.4488839006496514e-06, "loss": 4.0313, "step": 22303 }, { "epoch": 7.429749312900808, "grad_norm": 0.765625, "learning_rate": 2.4482855903147324e-06, "loss": 3.9959, "step": 22304 }, { "epoch": 7.430082451903057, "grad_norm": 0.75390625, "learning_rate": 2.447687338821347e-06, "loss": 4.0219, "step": 22305 }, { "epoch": 7.430415590905305, "grad_norm": 0.80859375, "learning_rate": 2.4470891461764637e-06, "loss": 3.9487, "step": 22306 }, { "epoch": 7.430748729907554, "grad_norm": 0.80859375, "learning_rate": 2.4464910123870413e-06, "loss": 4.0396, "step": 22307 }, { "epoch": 7.431081868909803, "grad_norm": 0.80078125, "learning_rate": 2.445892937460056e-06, "loss": 4.0049, "step": 22308 }, { "epoch": 7.431415007912051, "grad_norm": 0.8359375, "learning_rate": 2.445294921402471e-06, "loss": 3.9284, "step": 22309 }, { "epoch": 7.4317481469143, "grad_norm": 0.74609375, "learning_rate": 2.444696964221251e-06, "loss": 3.9965, "step": 22310 }, { "epoch": 7.432081285916548, "grad_norm": 0.79296875, "learning_rate": 2.4440990659233594e-06, "loss": 3.9405, "step": 22311 }, { "epoch": 7.432414424918798, "grad_norm": 0.7265625, "learning_rate": 2.4435012265157647e-06, "loss": 3.9681, "step": 22312 }, { "epoch": 7.432747563921046, "grad_norm": 0.8125, "learning_rate": 2.4429034460054286e-06, "loss": 4.0053, "step": 22313 }, { "epoch": 7.433080702923295, "grad_norm": 0.73046875, "learning_rate": 2.442305724399314e-06, "loss": 4.0203, "step": 22314 }, { "epoch": 7.433413841925543, "grad_norm": 0.75390625, "learning_rate": 2.441708061704383e-06, "loss": 3.9206, "step": 22315 }, { "epoch": 7.4337469809277925, "grad_norm": 0.78515625, "learning_rate": 2.4411104579275963e-06, "loss": 3.9099, "step": 22316 }, { "epoch": 7.434080119930041, "grad_norm": 0.8359375, "learning_rate": 2.4405129130759157e-06, "loss": 4.038, "step": 22317 }, { "epoch": 7.434413258932289, "grad_norm": 0.828125, "learning_rate": 2.4399154271562978e-06, "loss": 3.8785, "step": 22318 }, { "epoch": 7.434746397934538, "grad_norm": 0.75390625, "learning_rate": 2.4393180001757074e-06, "loss": 3.9453, "step": 22319 }, { "epoch": 7.435079536936787, "grad_norm": 0.7890625, "learning_rate": 2.4387206321411016e-06, "loss": 3.9449, "step": 22320 }, { "epoch": 7.435412675939036, "grad_norm": 0.765625, "learning_rate": 2.4381233230594376e-06, "loss": 3.9775, "step": 22321 }, { "epoch": 7.435745814941284, "grad_norm": 0.73828125, "learning_rate": 2.4375260729376694e-06, "loss": 3.9632, "step": 22322 }, { "epoch": 7.436078953943533, "grad_norm": 0.76171875, "learning_rate": 2.4369288817827613e-06, "loss": 3.953, "step": 22323 }, { "epoch": 7.4364120929457815, "grad_norm": 0.7890625, "learning_rate": 2.4363317496016663e-06, "loss": 3.9364, "step": 22324 }, { "epoch": 7.436745231948031, "grad_norm": 0.7734375, "learning_rate": 2.4357346764013367e-06, "loss": 4.0367, "step": 22325 }, { "epoch": 7.437078370950279, "grad_norm": 0.7890625, "learning_rate": 2.435137662188726e-06, "loss": 4.0526, "step": 22326 }, { "epoch": 7.437411509952527, "grad_norm": 0.75, "learning_rate": 2.4345407069707936e-06, "loss": 4.0161, "step": 22327 }, { "epoch": 7.4377446489547765, "grad_norm": 0.75390625, "learning_rate": 2.4339438107544904e-06, "loss": 4.0285, "step": 22328 }, { "epoch": 7.438077787957025, "grad_norm": 0.734375, "learning_rate": 2.4333469735467688e-06, "loss": 3.9827, "step": 22329 }, { "epoch": 7.438410926959274, "grad_norm": 0.76171875, "learning_rate": 2.432750195354578e-06, "loss": 3.9357, "step": 22330 }, { "epoch": 7.438744065961522, "grad_norm": 0.7734375, "learning_rate": 2.432153476184874e-06, "loss": 3.9694, "step": 22331 }, { "epoch": 7.4390772049637715, "grad_norm": 0.71875, "learning_rate": 2.4315568160446054e-06, "loss": 3.963, "step": 22332 }, { "epoch": 7.43941034396602, "grad_norm": 0.76171875, "learning_rate": 2.4309602149407216e-06, "loss": 3.9205, "step": 22333 }, { "epoch": 7.439743482968268, "grad_norm": 0.73828125, "learning_rate": 2.430363672880172e-06, "loss": 3.9911, "step": 22334 }, { "epoch": 7.440076621970517, "grad_norm": 0.76171875, "learning_rate": 2.429767189869905e-06, "loss": 3.9607, "step": 22335 }, { "epoch": 7.4404097609727655, "grad_norm": 0.7734375, "learning_rate": 2.4291707659168667e-06, "loss": 3.9158, "step": 22336 }, { "epoch": 7.440742899975015, "grad_norm": 0.81640625, "learning_rate": 2.4285744010280043e-06, "loss": 3.98, "step": 22337 }, { "epoch": 7.441076038977263, "grad_norm": 0.79296875, "learning_rate": 2.427978095210268e-06, "loss": 4.0164, "step": 22338 }, { "epoch": 7.441409177979512, "grad_norm": 0.75390625, "learning_rate": 2.4273818484706005e-06, "loss": 3.9882, "step": 22339 }, { "epoch": 7.4417423169817605, "grad_norm": 0.79296875, "learning_rate": 2.426785660815947e-06, "loss": 3.9559, "step": 22340 }, { "epoch": 7.44207545598401, "grad_norm": 0.77734375, "learning_rate": 2.4261895322532505e-06, "loss": 3.9698, "step": 22341 }, { "epoch": 7.442408594986258, "grad_norm": 0.734375, "learning_rate": 2.4255934627894596e-06, "loss": 3.9439, "step": 22342 }, { "epoch": 7.442741733988507, "grad_norm": 0.73046875, "learning_rate": 2.4249974524315126e-06, "loss": 3.9968, "step": 22343 }, { "epoch": 7.443074872990755, "grad_norm": 0.734375, "learning_rate": 2.42440150118635e-06, "loss": 3.9835, "step": 22344 }, { "epoch": 7.443408011993004, "grad_norm": 0.74609375, "learning_rate": 2.423805609060918e-06, "loss": 4.04, "step": 22345 }, { "epoch": 7.443741150995253, "grad_norm": 0.75, "learning_rate": 2.423209776062156e-06, "loss": 3.894, "step": 22346 }, { "epoch": 7.444074289997501, "grad_norm": 0.8125, "learning_rate": 2.422614002197004e-06, "loss": 3.9534, "step": 22347 }, { "epoch": 7.44440742899975, "grad_norm": 0.79296875, "learning_rate": 2.4220182874723985e-06, "loss": 3.9082, "step": 22348 }, { "epoch": 7.444740568001999, "grad_norm": 0.8046875, "learning_rate": 2.4214226318952836e-06, "loss": 3.9825, "step": 22349 }, { "epoch": 7.445073707004248, "grad_norm": 0.75390625, "learning_rate": 2.420827035472595e-06, "loss": 3.9007, "step": 22350 }, { "epoch": 7.445406846006496, "grad_norm": 0.8046875, "learning_rate": 2.4202314982112706e-06, "loss": 3.997, "step": 22351 }, { "epoch": 7.4457399850087445, "grad_norm": 0.765625, "learning_rate": 2.4196360201182453e-06, "loss": 4.0748, "step": 22352 }, { "epoch": 7.446073124010994, "grad_norm": 0.79296875, "learning_rate": 2.419040601200457e-06, "loss": 3.9301, "step": 22353 }, { "epoch": 7.446406263013242, "grad_norm": 0.796875, "learning_rate": 2.4184452414648408e-06, "loss": 3.9736, "step": 22354 }, { "epoch": 7.446739402015491, "grad_norm": 0.76171875, "learning_rate": 2.417849940918327e-06, "loss": 3.9454, "step": 22355 }, { "epoch": 7.447072541017739, "grad_norm": 0.71875, "learning_rate": 2.4172546995678566e-06, "loss": 4.0613, "step": 22356 }, { "epoch": 7.447405680019989, "grad_norm": 0.8046875, "learning_rate": 2.4166595174203605e-06, "loss": 3.9796, "step": 22357 }, { "epoch": 7.447738819022237, "grad_norm": 0.75390625, "learning_rate": 2.4160643944827697e-06, "loss": 3.9763, "step": 22358 }, { "epoch": 7.448071958024486, "grad_norm": 0.7578125, "learning_rate": 2.4154693307620145e-06, "loss": 3.9547, "step": 22359 }, { "epoch": 7.448405097026734, "grad_norm": 0.7265625, "learning_rate": 2.4148743262650334e-06, "loss": 4.031, "step": 22360 }, { "epoch": 7.4487382360289836, "grad_norm": 0.76171875, "learning_rate": 2.4142793809987506e-06, "loss": 3.9763, "step": 22361 }, { "epoch": 7.449071375031232, "grad_norm": 0.8046875, "learning_rate": 2.413684494970094e-06, "loss": 4.0043, "step": 22362 }, { "epoch": 7.44940451403348, "grad_norm": 0.796875, "learning_rate": 2.4130896681859987e-06, "loss": 4.0385, "step": 22363 }, { "epoch": 7.449737653035729, "grad_norm": 0.79296875, "learning_rate": 2.41249490065339e-06, "loss": 3.9774, "step": 22364 }, { "epoch": 7.450070792037978, "grad_norm": 0.77734375, "learning_rate": 2.4119001923791967e-06, "loss": 3.9824, "step": 22365 }, { "epoch": 7.450403931040227, "grad_norm": 0.765625, "learning_rate": 2.411305543370343e-06, "loss": 4.0184, "step": 22366 }, { "epoch": 7.450737070042475, "grad_norm": 0.79296875, "learning_rate": 2.4107109536337597e-06, "loss": 3.9825, "step": 22367 }, { "epoch": 7.451070209044724, "grad_norm": 0.79296875, "learning_rate": 2.4101164231763713e-06, "loss": 4.0059, "step": 22368 }, { "epoch": 7.451403348046973, "grad_norm": 0.76953125, "learning_rate": 2.4095219520051034e-06, "loss": 4.0039, "step": 22369 }, { "epoch": 7.451736487049221, "grad_norm": 0.8125, "learning_rate": 2.4089275401268744e-06, "loss": 3.9452, "step": 22370 }, { "epoch": 7.45206962605147, "grad_norm": 0.75, "learning_rate": 2.408333187548614e-06, "loss": 3.9044, "step": 22371 }, { "epoch": 7.452402765053718, "grad_norm": 0.78515625, "learning_rate": 2.407738894277245e-06, "loss": 3.9652, "step": 22372 }, { "epoch": 7.4527359040559675, "grad_norm": 0.78515625, "learning_rate": 2.407144660319687e-06, "loss": 4.0386, "step": 22373 }, { "epoch": 7.453069043058216, "grad_norm": 0.7890625, "learning_rate": 2.40655048568286e-06, "loss": 4.0208, "step": 22374 }, { "epoch": 7.453402182060465, "grad_norm": 0.7890625, "learning_rate": 2.4059563703736896e-06, "loss": 3.9908, "step": 22375 }, { "epoch": 7.453735321062713, "grad_norm": 0.76953125, "learning_rate": 2.4053623143990946e-06, "loss": 4.0057, "step": 22376 }, { "epoch": 7.4540684600649625, "grad_norm": 0.77734375, "learning_rate": 2.404768317765993e-06, "loss": 4.024, "step": 22377 }, { "epoch": 7.454401599067211, "grad_norm": 0.7578125, "learning_rate": 2.404174380481305e-06, "loss": 4.038, "step": 22378 }, { "epoch": 7.454734738069459, "grad_norm": 0.77734375, "learning_rate": 2.4035805025519482e-06, "loss": 3.9638, "step": 22379 }, { "epoch": 7.455067877071708, "grad_norm": 0.7734375, "learning_rate": 2.4029866839848386e-06, "loss": 3.9193, "step": 22380 }, { "epoch": 7.4554010160739566, "grad_norm": 0.76953125, "learning_rate": 2.4023929247868917e-06, "loss": 4.01, "step": 22381 }, { "epoch": 7.455734155076206, "grad_norm": 0.796875, "learning_rate": 2.4017992249650295e-06, "loss": 4.0152, "step": 22382 }, { "epoch": 7.456067294078454, "grad_norm": 0.75390625, "learning_rate": 2.4012055845261624e-06, "loss": 3.8552, "step": 22383 }, { "epoch": 7.456400433080703, "grad_norm": 0.75390625, "learning_rate": 2.4006120034772065e-06, "loss": 3.9965, "step": 22384 }, { "epoch": 7.4567335720829515, "grad_norm": 0.7890625, "learning_rate": 2.4000184818250735e-06, "loss": 4.0011, "step": 22385 }, { "epoch": 7.457066711085201, "grad_norm": 0.72265625, "learning_rate": 2.3994250195766804e-06, "loss": 3.9826, "step": 22386 }, { "epoch": 7.457399850087449, "grad_norm": 0.7421875, "learning_rate": 2.398831616738941e-06, "loss": 3.96, "step": 22387 }, { "epoch": 7.457732989089697, "grad_norm": 0.8203125, "learning_rate": 2.398238273318759e-06, "loss": 3.9647, "step": 22388 }, { "epoch": 7.4580661280919465, "grad_norm": 0.78125, "learning_rate": 2.3976449893230535e-06, "loss": 4.0328, "step": 22389 }, { "epoch": 7.458399267094195, "grad_norm": 0.7734375, "learning_rate": 2.397051764758732e-06, "loss": 3.9216, "step": 22390 }, { "epoch": 7.458732406096444, "grad_norm": 0.76171875, "learning_rate": 2.396458599632704e-06, "loss": 3.9471, "step": 22391 }, { "epoch": 7.459065545098692, "grad_norm": 0.78125, "learning_rate": 2.3958654939518773e-06, "loss": 3.9949, "step": 22392 }, { "epoch": 7.459398684100941, "grad_norm": 0.7578125, "learning_rate": 2.395272447723165e-06, "loss": 4.0048, "step": 22393 }, { "epoch": 7.45973182310319, "grad_norm": 0.734375, "learning_rate": 2.3946794609534704e-06, "loss": 4.0615, "step": 22394 }, { "epoch": 7.460064962105439, "grad_norm": 0.765625, "learning_rate": 2.394086533649703e-06, "loss": 4.0014, "step": 22395 }, { "epoch": 7.460398101107687, "grad_norm": 0.78125, "learning_rate": 2.3934936658187677e-06, "loss": 3.9223, "step": 22396 }, { "epoch": 7.4607312401099355, "grad_norm": 0.765625, "learning_rate": 2.3929008574675703e-06, "loss": 3.9026, "step": 22397 }, { "epoch": 7.461064379112185, "grad_norm": 0.75390625, "learning_rate": 2.3923081086030164e-06, "loss": 3.9419, "step": 22398 }, { "epoch": 7.461397518114433, "grad_norm": 0.82421875, "learning_rate": 2.3917154192320074e-06, "loss": 3.9585, "step": 22399 }, { "epoch": 7.461730657116682, "grad_norm": 0.796875, "learning_rate": 2.391122789361451e-06, "loss": 3.9427, "step": 22400 }, { "epoch": 7.4620637961189304, "grad_norm": 0.73828125, "learning_rate": 2.3905302189982495e-06, "loss": 3.9823, "step": 22401 }, { "epoch": 7.46239693512118, "grad_norm": 0.74609375, "learning_rate": 2.3899377081493024e-06, "loss": 4.0355, "step": 22402 }, { "epoch": 7.462730074123428, "grad_norm": 0.78515625, "learning_rate": 2.3893452568215114e-06, "loss": 4.0999, "step": 22403 }, { "epoch": 7.463063213125677, "grad_norm": 0.796875, "learning_rate": 2.388752865021781e-06, "loss": 3.9769, "step": 22404 }, { "epoch": 7.463396352127925, "grad_norm": 0.76953125, "learning_rate": 2.38816053275701e-06, "loss": 3.9707, "step": 22405 }, { "epoch": 7.463729491130174, "grad_norm": 0.80078125, "learning_rate": 2.387568260034092e-06, "loss": 4.026, "step": 22406 }, { "epoch": 7.464062630132423, "grad_norm": 0.80078125, "learning_rate": 2.386976046859934e-06, "loss": 3.944, "step": 22407 }, { "epoch": 7.464395769134671, "grad_norm": 0.83203125, "learning_rate": 2.3863838932414286e-06, "loss": 4.0176, "step": 22408 }, { "epoch": 7.46472890813692, "grad_norm": 0.73828125, "learning_rate": 2.3857917991854757e-06, "loss": 4.0414, "step": 22409 }, { "epoch": 7.465062047139169, "grad_norm": 0.7578125, "learning_rate": 2.3851997646989682e-06, "loss": 3.9565, "step": 22410 }, { "epoch": 7.465395186141418, "grad_norm": 0.76171875, "learning_rate": 2.3846077897888077e-06, "loss": 4.0014, "step": 22411 }, { "epoch": 7.465728325143666, "grad_norm": 0.765625, "learning_rate": 2.3840158744618864e-06, "loss": 4.0188, "step": 22412 }, { "epoch": 7.466061464145914, "grad_norm": 0.81640625, "learning_rate": 2.383424018725098e-06, "loss": 4.0234, "step": 22413 }, { "epoch": 7.466394603148164, "grad_norm": 0.77734375, "learning_rate": 2.3828322225853384e-06, "loss": 4.0226, "step": 22414 }, { "epoch": 7.466727742150412, "grad_norm": 0.7734375, "learning_rate": 2.3822404860494993e-06, "loss": 4.0618, "step": 22415 }, { "epoch": 7.467060881152661, "grad_norm": 0.77734375, "learning_rate": 2.3816488091244737e-06, "loss": 4.0011, "step": 22416 }, { "epoch": 7.467394020154909, "grad_norm": 0.75, "learning_rate": 2.3810571918171532e-06, "loss": 3.9646, "step": 22417 }, { "epoch": 7.467727159157159, "grad_norm": 0.8046875, "learning_rate": 2.3804656341344262e-06, "loss": 3.9299, "step": 22418 }, { "epoch": 7.468060298159407, "grad_norm": 0.77734375, "learning_rate": 2.379874136083187e-06, "loss": 4.1047, "step": 22419 }, { "epoch": 7.468393437161656, "grad_norm": 0.71875, "learning_rate": 2.379282697670326e-06, "loss": 3.9725, "step": 22420 }, { "epoch": 7.468726576163904, "grad_norm": 0.76171875, "learning_rate": 2.378691318902729e-06, "loss": 3.9829, "step": 22421 }, { "epoch": 7.4690597151661535, "grad_norm": 0.78125, "learning_rate": 2.378099999787283e-06, "loss": 3.9896, "step": 22422 }, { "epoch": 7.469392854168402, "grad_norm": 0.73046875, "learning_rate": 2.377508740330883e-06, "loss": 3.9651, "step": 22423 }, { "epoch": 7.46972599317065, "grad_norm": 0.75, "learning_rate": 2.3769175405404076e-06, "loss": 3.9841, "step": 22424 }, { "epoch": 7.470059132172899, "grad_norm": 0.79296875, "learning_rate": 2.376326400422745e-06, "loss": 3.9816, "step": 22425 }, { "epoch": 7.470392271175148, "grad_norm": 0.71875, "learning_rate": 2.375735319984784e-06, "loss": 3.9837, "step": 22426 }, { "epoch": 7.470725410177397, "grad_norm": 0.81640625, "learning_rate": 2.3751442992334073e-06, "loss": 3.9837, "step": 22427 }, { "epoch": 7.471058549179645, "grad_norm": 0.734375, "learning_rate": 2.374553338175499e-06, "loss": 3.996, "step": 22428 }, { "epoch": 7.471391688181894, "grad_norm": 0.74609375, "learning_rate": 2.37396243681794e-06, "loss": 4.0139, "step": 22429 }, { "epoch": 7.4717248271841425, "grad_norm": 0.78515625, "learning_rate": 2.373371595167619e-06, "loss": 3.9579, "step": 22430 }, { "epoch": 7.472057966186391, "grad_norm": 0.76171875, "learning_rate": 2.372780813231414e-06, "loss": 4.0282, "step": 22431 }, { "epoch": 7.47239110518864, "grad_norm": 0.796875, "learning_rate": 2.372190091016207e-06, "loss": 3.9343, "step": 22432 }, { "epoch": 7.472724244190888, "grad_norm": 0.76953125, "learning_rate": 2.371599428528879e-06, "loss": 3.9258, "step": 22433 }, { "epoch": 7.4730573831931375, "grad_norm": 0.765625, "learning_rate": 2.3710088257763086e-06, "loss": 3.9017, "step": 22434 }, { "epoch": 7.473390522195386, "grad_norm": 0.7734375, "learning_rate": 2.370418282765377e-06, "loss": 3.9871, "step": 22435 }, { "epoch": 7.473723661197635, "grad_norm": 0.76953125, "learning_rate": 2.3698277995029587e-06, "loss": 3.9827, "step": 22436 }, { "epoch": 7.474056800199883, "grad_norm": 0.7734375, "learning_rate": 2.369237375995938e-06, "loss": 3.9911, "step": 22437 }, { "epoch": 7.4743899392021325, "grad_norm": 0.78125, "learning_rate": 2.3686470122511876e-06, "loss": 4.0314, "step": 22438 }, { "epoch": 7.474723078204381, "grad_norm": 0.71875, "learning_rate": 2.368056708275585e-06, "loss": 4.0682, "step": 22439 }, { "epoch": 7.475056217206629, "grad_norm": 0.7421875, "learning_rate": 2.367466464076004e-06, "loss": 4.0073, "step": 22440 }, { "epoch": 7.475389356208878, "grad_norm": 0.73046875, "learning_rate": 2.366876279659327e-06, "loss": 3.9868, "step": 22441 }, { "epoch": 7.4757224952111265, "grad_norm": 0.77734375, "learning_rate": 2.3662861550324197e-06, "loss": 3.9936, "step": 22442 }, { "epoch": 7.476055634213376, "grad_norm": 0.75, "learning_rate": 2.3656960902021568e-06, "loss": 4.0337, "step": 22443 }, { "epoch": 7.476388773215624, "grad_norm": 0.734375, "learning_rate": 2.3651060851754163e-06, "loss": 3.9763, "step": 22444 }, { "epoch": 7.476721912217873, "grad_norm": 0.7734375, "learning_rate": 2.3645161399590672e-06, "loss": 3.9433, "step": 22445 }, { "epoch": 7.4770550512201215, "grad_norm": 0.78125, "learning_rate": 2.3639262545599825e-06, "loss": 4.0077, "step": 22446 }, { "epoch": 7.477388190222371, "grad_norm": 0.77734375, "learning_rate": 2.3633364289850293e-06, "loss": 3.9254, "step": 22447 }, { "epoch": 7.477721329224619, "grad_norm": 0.8359375, "learning_rate": 2.3627466632410828e-06, "loss": 4.0014, "step": 22448 }, { "epoch": 7.478054468226867, "grad_norm": 0.80859375, "learning_rate": 2.3621569573350105e-06, "loss": 4.031, "step": 22449 }, { "epoch": 7.478387607229116, "grad_norm": 0.7734375, "learning_rate": 2.3615673112736815e-06, "loss": 4.0039, "step": 22450 }, { "epoch": 7.478720746231365, "grad_norm": 0.77734375, "learning_rate": 2.360977725063963e-06, "loss": 3.9364, "step": 22451 }, { "epoch": 7.479053885233614, "grad_norm": 0.76171875, "learning_rate": 2.3603881987127236e-06, "loss": 3.9925, "step": 22452 }, { "epoch": 7.479387024235862, "grad_norm": 0.7421875, "learning_rate": 2.359798732226829e-06, "loss": 3.9817, "step": 22453 }, { "epoch": 7.479720163238111, "grad_norm": 0.76171875, "learning_rate": 2.359209325613143e-06, "loss": 3.9564, "step": 22454 }, { "epoch": 7.48005330224036, "grad_norm": 0.7734375, "learning_rate": 2.358619978878536e-06, "loss": 3.9888, "step": 22455 }, { "epoch": 7.480386441242609, "grad_norm": 0.7578125, "learning_rate": 2.3580306920298703e-06, "loss": 3.9844, "step": 22456 }, { "epoch": 7.480719580244857, "grad_norm": 0.7890625, "learning_rate": 2.357441465074009e-06, "loss": 4.0167, "step": 22457 }, { "epoch": 7.4810527192471055, "grad_norm": 0.76953125, "learning_rate": 2.3568522980178137e-06, "loss": 3.9548, "step": 22458 }, { "epoch": 7.481385858249355, "grad_norm": 0.7421875, "learning_rate": 2.356263190868154e-06, "loss": 4.0481, "step": 22459 }, { "epoch": 7.481718997251603, "grad_norm": 0.78515625, "learning_rate": 2.3556741436318837e-06, "loss": 4.0052, "step": 22460 }, { "epoch": 7.482052136253852, "grad_norm": 0.78125, "learning_rate": 2.3550851563158675e-06, "loss": 3.92, "step": 22461 }, { "epoch": 7.4823852752561, "grad_norm": 0.7578125, "learning_rate": 2.3544962289269636e-06, "loss": 4.0043, "step": 22462 }, { "epoch": 7.48271841425835, "grad_norm": 0.74609375, "learning_rate": 2.3539073614720353e-06, "loss": 4.0146, "step": 22463 }, { "epoch": 7.483051553260598, "grad_norm": 0.828125, "learning_rate": 2.3533185539579395e-06, "loss": 3.9938, "step": 22464 }, { "epoch": 7.483384692262847, "grad_norm": 0.7890625, "learning_rate": 2.3527298063915353e-06, "loss": 3.965, "step": 22465 }, { "epoch": 7.483717831265095, "grad_norm": 0.77734375, "learning_rate": 2.3521411187796766e-06, "loss": 3.9863, "step": 22466 }, { "epoch": 7.484050970267344, "grad_norm": 0.765625, "learning_rate": 2.3515524911292274e-06, "loss": 3.9814, "step": 22467 }, { "epoch": 7.484384109269593, "grad_norm": 0.7578125, "learning_rate": 2.3509639234470417e-06, "loss": 3.9823, "step": 22468 }, { "epoch": 7.484717248271841, "grad_norm": 0.76171875, "learning_rate": 2.350375415739969e-06, "loss": 3.9841, "step": 22469 }, { "epoch": 7.48505038727409, "grad_norm": 0.7578125, "learning_rate": 2.349786968014871e-06, "loss": 4.0531, "step": 22470 }, { "epoch": 7.485383526276339, "grad_norm": 0.7890625, "learning_rate": 2.3491985802785995e-06, "loss": 4.035, "step": 22471 }, { "epoch": 7.485716665278588, "grad_norm": 0.75390625, "learning_rate": 2.348610252538008e-06, "loss": 3.9743, "step": 22472 }, { "epoch": 7.486049804280836, "grad_norm": 0.76171875, "learning_rate": 2.3480219847999467e-06, "loss": 3.9491, "step": 22473 }, { "epoch": 7.486382943283084, "grad_norm": 0.7890625, "learning_rate": 2.347433777071272e-06, "loss": 4.0249, "step": 22474 }, { "epoch": 7.486716082285334, "grad_norm": 0.80078125, "learning_rate": 2.346845629358835e-06, "loss": 4.0336, "step": 22475 }, { "epoch": 7.487049221287582, "grad_norm": 0.8125, "learning_rate": 2.346257541669483e-06, "loss": 3.9556, "step": 22476 }, { "epoch": 7.487382360289831, "grad_norm": 0.75, "learning_rate": 2.345669514010068e-06, "loss": 3.9408, "step": 22477 }, { "epoch": 7.487715499292079, "grad_norm": 0.765625, "learning_rate": 2.345081546387439e-06, "loss": 4.0117, "step": 22478 }, { "epoch": 7.4880486382943285, "grad_norm": 0.7421875, "learning_rate": 2.344493638808445e-06, "loss": 3.9123, "step": 22479 }, { "epoch": 7.488381777296577, "grad_norm": 0.78515625, "learning_rate": 2.34390579127993e-06, "loss": 3.9374, "step": 22480 }, { "epoch": 7.488714916298826, "grad_norm": 0.828125, "learning_rate": 2.3433180038087478e-06, "loss": 3.9723, "step": 22481 }, { "epoch": 7.489048055301074, "grad_norm": 0.77734375, "learning_rate": 2.3427302764017407e-06, "loss": 3.9867, "step": 22482 }, { "epoch": 7.4893811943033235, "grad_norm": 0.7265625, "learning_rate": 2.342142609065756e-06, "loss": 3.984, "step": 22483 }, { "epoch": 7.489714333305572, "grad_norm": 0.75, "learning_rate": 2.3415550018076344e-06, "loss": 3.9554, "step": 22484 }, { "epoch": 7.49004747230782, "grad_norm": 0.7265625, "learning_rate": 2.3409674546342295e-06, "loss": 3.996, "step": 22485 }, { "epoch": 7.490380611310069, "grad_norm": 0.76953125, "learning_rate": 2.3403799675523772e-06, "loss": 4.041, "step": 22486 }, { "epoch": 7.490713750312318, "grad_norm": 0.79296875, "learning_rate": 2.3397925405689195e-06, "loss": 3.9659, "step": 22487 }, { "epoch": 7.491046889314567, "grad_norm": 0.796875, "learning_rate": 2.3392051736907046e-06, "loss": 4.0251, "step": 22488 }, { "epoch": 7.491380028316815, "grad_norm": 0.7578125, "learning_rate": 2.338617866924571e-06, "loss": 4.0038, "step": 22489 }, { "epoch": 7.491713167319064, "grad_norm": 0.7578125, "learning_rate": 2.3380306202773592e-06, "loss": 3.9754, "step": 22490 }, { "epoch": 7.4920463063213125, "grad_norm": 0.796875, "learning_rate": 2.3374434337559076e-06, "loss": 3.9598, "step": 22491 }, { "epoch": 7.492379445323561, "grad_norm": 0.77734375, "learning_rate": 2.336856307367061e-06, "loss": 4.0034, "step": 22492 }, { "epoch": 7.49271258432581, "grad_norm": 0.77734375, "learning_rate": 2.336269241117655e-06, "loss": 4.0315, "step": 22493 }, { "epoch": 7.493045723328058, "grad_norm": 0.82421875, "learning_rate": 2.335682235014527e-06, "loss": 3.9953, "step": 22494 }, { "epoch": 7.4933788623303075, "grad_norm": 0.765625, "learning_rate": 2.335095289064516e-06, "loss": 3.9643, "step": 22495 }, { "epoch": 7.493712001332556, "grad_norm": 0.76171875, "learning_rate": 2.3345084032744572e-06, "loss": 3.932, "step": 22496 }, { "epoch": 7.494045140334805, "grad_norm": 0.79296875, "learning_rate": 2.333921577651187e-06, "loss": 3.9682, "step": 22497 }, { "epoch": 7.494378279337053, "grad_norm": 0.80859375, "learning_rate": 2.333334812201539e-06, "loss": 3.9715, "step": 22498 }, { "epoch": 7.494711418339302, "grad_norm": 0.78125, "learning_rate": 2.3327481069323503e-06, "loss": 4.0002, "step": 22499 }, { "epoch": 7.495044557341551, "grad_norm": 0.79296875, "learning_rate": 2.332161461850455e-06, "loss": 4.0499, "step": 22500 }, { "epoch": 7.4953776963438, "grad_norm": 0.734375, "learning_rate": 2.331574876962685e-06, "loss": 4.0198, "step": 22501 }, { "epoch": 7.495710835346048, "grad_norm": 0.74609375, "learning_rate": 2.3309883522758705e-06, "loss": 3.9519, "step": 22502 }, { "epoch": 7.4960439743482965, "grad_norm": 0.73046875, "learning_rate": 2.3304018877968504e-06, "loss": 3.9391, "step": 22503 }, { "epoch": 7.496377113350546, "grad_norm": 0.796875, "learning_rate": 2.3298154835324488e-06, "loss": 3.9387, "step": 22504 }, { "epoch": 7.496710252352794, "grad_norm": 0.765625, "learning_rate": 2.3292291394894987e-06, "loss": 4.0353, "step": 22505 }, { "epoch": 7.497043391355043, "grad_norm": 0.76953125, "learning_rate": 2.3286428556748254e-06, "loss": 3.9805, "step": 22506 }, { "epoch": 7.4973765303572915, "grad_norm": 0.76953125, "learning_rate": 2.328056632095265e-06, "loss": 3.9846, "step": 22507 }, { "epoch": 7.497709669359541, "grad_norm": 0.78515625, "learning_rate": 2.327470468757642e-06, "loss": 3.967, "step": 22508 }, { "epoch": 7.498042808361789, "grad_norm": 0.73828125, "learning_rate": 2.326884365668785e-06, "loss": 3.892, "step": 22509 }, { "epoch": 7.498375947364037, "grad_norm": 0.76953125, "learning_rate": 2.3262983228355177e-06, "loss": 3.9676, "step": 22510 }, { "epoch": 7.498709086366286, "grad_norm": 0.7578125, "learning_rate": 2.325712340264671e-06, "loss": 3.9965, "step": 22511 }, { "epoch": 7.499042225368535, "grad_norm": 0.80078125, "learning_rate": 2.32512641796307e-06, "loss": 3.9196, "step": 22512 }, { "epoch": 7.499375364370784, "grad_norm": 0.734375, "learning_rate": 2.324540555937533e-06, "loss": 4.0115, "step": 22513 }, { "epoch": 7.499708503373032, "grad_norm": 0.84375, "learning_rate": 2.3239547541948905e-06, "loss": 4.0343, "step": 22514 }, { "epoch": 7.500041642375281, "grad_norm": 0.76953125, "learning_rate": 2.3233690127419648e-06, "loss": 3.9947, "step": 22515 }, { "epoch": 7.50037478137753, "grad_norm": 0.765625, "learning_rate": 2.3227833315855765e-06, "loss": 3.9851, "step": 22516 }, { "epoch": 7.500707920379779, "grad_norm": 0.80078125, "learning_rate": 2.3221977107325466e-06, "loss": 3.939, "step": 22517 }, { "epoch": 7.501041059382027, "grad_norm": 0.77734375, "learning_rate": 2.321612150189701e-06, "loss": 3.9135, "step": 22518 }, { "epoch": 7.501374198384276, "grad_norm": 0.76171875, "learning_rate": 2.3210266499638573e-06, "loss": 3.9448, "step": 22519 }, { "epoch": 7.501707337386525, "grad_norm": 0.79296875, "learning_rate": 2.320441210061835e-06, "loss": 3.9348, "step": 22520 }, { "epoch": 7.502040476388773, "grad_norm": 0.78125, "learning_rate": 2.3198558304904538e-06, "loss": 3.9831, "step": 22521 }, { "epoch": 7.502373615391022, "grad_norm": 0.765625, "learning_rate": 2.3192705112565327e-06, "loss": 4.0339, "step": 22522 }, { "epoch": 7.50270675439327, "grad_norm": 0.78515625, "learning_rate": 2.318685252366888e-06, "loss": 3.977, "step": 22523 }, { "epoch": 7.50303989339552, "grad_norm": 0.75390625, "learning_rate": 2.3181000538283355e-06, "loss": 4.0515, "step": 22524 }, { "epoch": 7.503373032397768, "grad_norm": 0.76953125, "learning_rate": 2.317514915647696e-06, "loss": 3.9977, "step": 22525 }, { "epoch": 7.503706171400017, "grad_norm": 0.7421875, "learning_rate": 2.316929837831781e-06, "loss": 4.0287, "step": 22526 }, { "epoch": 7.504039310402265, "grad_norm": 0.75, "learning_rate": 2.3163448203874084e-06, "loss": 4.0214, "step": 22527 }, { "epoch": 7.504372449404514, "grad_norm": 0.765625, "learning_rate": 2.3157598633213877e-06, "loss": 3.9843, "step": 22528 }, { "epoch": 7.504705588406763, "grad_norm": 0.8125, "learning_rate": 2.3151749666405385e-06, "loss": 3.9741, "step": 22529 }, { "epoch": 7.505038727409011, "grad_norm": 0.78125, "learning_rate": 2.3145901303516726e-06, "loss": 3.9877, "step": 22530 }, { "epoch": 7.50537186641126, "grad_norm": 0.78515625, "learning_rate": 2.3140053544615956e-06, "loss": 3.9539, "step": 22531 }, { "epoch": 7.505705005413509, "grad_norm": 0.7578125, "learning_rate": 2.313420638977126e-06, "loss": 4.0082, "step": 22532 }, { "epoch": 7.506038144415758, "grad_norm": 0.73828125, "learning_rate": 2.312835983905071e-06, "loss": 3.9726, "step": 22533 }, { "epoch": 7.506371283418006, "grad_norm": 0.78125, "learning_rate": 2.3122513892522418e-06, "loss": 3.9682, "step": 22534 }, { "epoch": 7.506704422420254, "grad_norm": 0.7734375, "learning_rate": 2.311666855025445e-06, "loss": 3.9751, "step": 22535 }, { "epoch": 7.507037561422504, "grad_norm": 0.796875, "learning_rate": 2.3110823812314934e-06, "loss": 3.9542, "step": 22536 }, { "epoch": 7.507370700424752, "grad_norm": 0.76171875, "learning_rate": 2.3104979678771938e-06, "loss": 3.9689, "step": 22537 }, { "epoch": 7.507703839427001, "grad_norm": 0.78515625, "learning_rate": 2.309913614969352e-06, "loss": 3.9293, "step": 22538 }, { "epoch": 7.508036978429249, "grad_norm": 0.7734375, "learning_rate": 2.3093293225147744e-06, "loss": 3.9927, "step": 22539 }, { "epoch": 7.5083701174314985, "grad_norm": 0.74609375, "learning_rate": 2.308745090520268e-06, "loss": 4.0203, "step": 22540 }, { "epoch": 7.508703256433747, "grad_norm": 0.74609375, "learning_rate": 2.308160918992636e-06, "loss": 3.9746, "step": 22541 }, { "epoch": 7.509036395435996, "grad_norm": 0.7734375, "learning_rate": 2.3075768079386823e-06, "loss": 3.9927, "step": 22542 }, { "epoch": 7.509369534438244, "grad_norm": 0.7109375, "learning_rate": 2.306992757365214e-06, "loss": 3.9435, "step": 22543 }, { "epoch": 7.5097026734404935, "grad_norm": 0.75390625, "learning_rate": 2.306408767279032e-06, "loss": 4.0051, "step": 22544 }, { "epoch": 7.510035812442742, "grad_norm": 0.81640625, "learning_rate": 2.3058248376869393e-06, "loss": 3.9868, "step": 22545 }, { "epoch": 7.51036895144499, "grad_norm": 0.765625, "learning_rate": 2.305240968595733e-06, "loss": 4.0381, "step": 22546 }, { "epoch": 7.510702090447239, "grad_norm": 0.80078125, "learning_rate": 2.304657160012221e-06, "loss": 3.9302, "step": 22547 }, { "epoch": 7.5110352294494875, "grad_norm": 0.81640625, "learning_rate": 2.304073411943202e-06, "loss": 4.0094, "step": 22548 }, { "epoch": 7.511368368451737, "grad_norm": 0.7890625, "learning_rate": 2.3034897243954687e-06, "loss": 3.9838, "step": 22549 }, { "epoch": 7.511701507453985, "grad_norm": 0.79296875, "learning_rate": 2.3029060973758272e-06, "loss": 4.012, "step": 22550 }, { "epoch": 7.512034646456234, "grad_norm": 0.71875, "learning_rate": 2.302322530891073e-06, "loss": 3.9801, "step": 22551 }, { "epoch": 7.5123677854584825, "grad_norm": 0.74609375, "learning_rate": 2.301739024948002e-06, "loss": 3.9666, "step": 22552 }, { "epoch": 7.512700924460731, "grad_norm": 0.7734375, "learning_rate": 2.3011555795534124e-06, "loss": 3.9629, "step": 22553 }, { "epoch": 7.51303406346298, "grad_norm": 0.73828125, "learning_rate": 2.3005721947140977e-06, "loss": 4.0239, "step": 22554 }, { "epoch": 7.513367202465228, "grad_norm": 0.765625, "learning_rate": 2.2999888704368573e-06, "loss": 3.9767, "step": 22555 }, { "epoch": 7.5137003414674775, "grad_norm": 0.77734375, "learning_rate": 2.2994056067284824e-06, "loss": 3.9199, "step": 22556 }, { "epoch": 7.514033480469726, "grad_norm": 0.76953125, "learning_rate": 2.298822403595769e-06, "loss": 3.9988, "step": 22557 }, { "epoch": 7.514366619471975, "grad_norm": 0.76171875, "learning_rate": 2.2982392610455076e-06, "loss": 3.9275, "step": 22558 }, { "epoch": 7.514699758474223, "grad_norm": 0.72265625, "learning_rate": 2.297656179084493e-06, "loss": 4.0353, "step": 22559 }, { "epoch": 7.515032897476472, "grad_norm": 0.78125, "learning_rate": 2.297073157719515e-06, "loss": 4.0058, "step": 22560 }, { "epoch": 7.515366036478721, "grad_norm": 0.78515625, "learning_rate": 2.2964901969573636e-06, "loss": 3.9726, "step": 22561 }, { "epoch": 7.51569917548097, "grad_norm": 0.76953125, "learning_rate": 2.2959072968048318e-06, "loss": 3.9504, "step": 22562 }, { "epoch": 7.516032314483218, "grad_norm": 0.78515625, "learning_rate": 2.2953244572687085e-06, "loss": 4.0711, "step": 22563 }, { "epoch": 7.5163654534854665, "grad_norm": 0.7734375, "learning_rate": 2.2947416783557827e-06, "loss": 3.9815, "step": 22564 }, { "epoch": 7.516698592487716, "grad_norm": 0.7734375, "learning_rate": 2.2941589600728387e-06, "loss": 3.9397, "step": 22565 }, { "epoch": 7.517031731489964, "grad_norm": 0.78125, "learning_rate": 2.293576302426671e-06, "loss": 3.9719, "step": 22566 }, { "epoch": 7.517364870492213, "grad_norm": 0.7734375, "learning_rate": 2.292993705424061e-06, "loss": 3.9357, "step": 22567 }, { "epoch": 7.517698009494461, "grad_norm": 0.765625, "learning_rate": 2.2924111690717933e-06, "loss": 3.9814, "step": 22568 }, { "epoch": 7.518031148496711, "grad_norm": 0.8125, "learning_rate": 2.2918286933766582e-06, "loss": 3.9824, "step": 22569 }, { "epoch": 7.518364287498959, "grad_norm": 0.78125, "learning_rate": 2.291246278345438e-06, "loss": 4.0519, "step": 22570 }, { "epoch": 7.518697426501207, "grad_norm": 0.765625, "learning_rate": 2.290663923984916e-06, "loss": 4.0084, "step": 22571 }, { "epoch": 7.519030565503456, "grad_norm": 0.72265625, "learning_rate": 2.290081630301874e-06, "loss": 3.9859, "step": 22572 }, { "epoch": 7.519363704505705, "grad_norm": 0.78125, "learning_rate": 2.2894993973030986e-06, "loss": 4.056, "step": 22573 }, { "epoch": 7.519696843507954, "grad_norm": 0.75, "learning_rate": 2.2889172249953696e-06, "loss": 3.9968, "step": 22574 }, { "epoch": 7.520029982510202, "grad_norm": 0.7734375, "learning_rate": 2.2883351133854673e-06, "loss": 4.0178, "step": 22575 }, { "epoch": 7.520363121512451, "grad_norm": 0.75, "learning_rate": 2.2877530624801734e-06, "loss": 3.9606, "step": 22576 }, { "epoch": 7.5206962605147, "grad_norm": 0.796875, "learning_rate": 2.287171072286265e-06, "loss": 3.9268, "step": 22577 }, { "epoch": 7.521029399516949, "grad_norm": 0.75, "learning_rate": 2.2865891428105238e-06, "loss": 4.0274, "step": 22578 }, { "epoch": 7.521362538519197, "grad_norm": 0.78125, "learning_rate": 2.2860072740597244e-06, "loss": 4.0281, "step": 22579 }, { "epoch": 7.521695677521446, "grad_norm": 0.7421875, "learning_rate": 2.2854254660406487e-06, "loss": 3.9992, "step": 22580 }, { "epoch": 7.522028816523695, "grad_norm": 0.81640625, "learning_rate": 2.2848437187600716e-06, "loss": 4.0099, "step": 22581 }, { "epoch": 7.522361955525943, "grad_norm": 0.7265625, "learning_rate": 2.2842620322247697e-06, "loss": 3.9397, "step": 22582 }, { "epoch": 7.522695094528192, "grad_norm": 0.76171875, "learning_rate": 2.2836804064415148e-06, "loss": 3.9449, "step": 22583 }, { "epoch": 7.52302823353044, "grad_norm": 0.734375, "learning_rate": 2.28309884141709e-06, "loss": 4.029, "step": 22584 }, { "epoch": 7.5233613725326896, "grad_norm": 0.73828125, "learning_rate": 2.2825173371582616e-06, "loss": 3.9406, "step": 22585 }, { "epoch": 7.523694511534938, "grad_norm": 0.76953125, "learning_rate": 2.2819358936718028e-06, "loss": 3.9546, "step": 22586 }, { "epoch": 7.524027650537187, "grad_norm": 0.75, "learning_rate": 2.2813545109644904e-06, "loss": 3.9991, "step": 22587 }, { "epoch": 7.524360789539435, "grad_norm": 0.76171875, "learning_rate": 2.2807731890430944e-06, "loss": 3.968, "step": 22588 }, { "epoch": 7.524693928541684, "grad_norm": 0.734375, "learning_rate": 2.280191927914386e-06, "loss": 3.9759, "step": 22589 }, { "epoch": 7.525027067543933, "grad_norm": 0.78515625, "learning_rate": 2.279610727585133e-06, "loss": 4.0434, "step": 22590 }, { "epoch": 7.525360206546181, "grad_norm": 0.7421875, "learning_rate": 2.279029588062111e-06, "loss": 3.9786, "step": 22591 }, { "epoch": 7.52569334554843, "grad_norm": 0.7578125, "learning_rate": 2.2784485093520842e-06, "loss": 4.0029, "step": 22592 }, { "epoch": 7.526026484550679, "grad_norm": 0.73046875, "learning_rate": 2.2778674914618237e-06, "loss": 4.0494, "step": 22593 }, { "epoch": 7.526359623552928, "grad_norm": 0.796875, "learning_rate": 2.277286534398095e-06, "loss": 3.9373, "step": 22594 }, { "epoch": 7.526692762555176, "grad_norm": 0.79296875, "learning_rate": 2.2767056381676664e-06, "loss": 3.9494, "step": 22595 }, { "epoch": 7.527025901557424, "grad_norm": 0.80859375, "learning_rate": 2.2761248027773036e-06, "loss": 3.9437, "step": 22596 }, { "epoch": 7.5273590405596735, "grad_norm": 0.77734375, "learning_rate": 2.275544028233769e-06, "loss": 3.9451, "step": 22597 }, { "epoch": 7.527692179561922, "grad_norm": 0.76953125, "learning_rate": 2.2749633145438324e-06, "loss": 3.934, "step": 22598 }, { "epoch": 7.528025318564171, "grad_norm": 0.7890625, "learning_rate": 2.2743826617142566e-06, "loss": 3.9771, "step": 22599 }, { "epoch": 7.528358457566419, "grad_norm": 0.78125, "learning_rate": 2.273802069751804e-06, "loss": 3.9727, "step": 22600 }, { "epoch": 7.5286915965686685, "grad_norm": 0.7734375, "learning_rate": 2.273221538663237e-06, "loss": 3.9335, "step": 22601 }, { "epoch": 7.529024735570917, "grad_norm": 0.77734375, "learning_rate": 2.2726410684553174e-06, "loss": 3.9777, "step": 22602 }, { "epoch": 7.529357874573166, "grad_norm": 0.77734375, "learning_rate": 2.272060659134808e-06, "loss": 3.9819, "step": 22603 }, { "epoch": 7.529691013575414, "grad_norm": 0.7734375, "learning_rate": 2.2714803107084674e-06, "loss": 3.9011, "step": 22604 }, { "epoch": 7.530024152577663, "grad_norm": 0.765625, "learning_rate": 2.2709000231830547e-06, "loss": 4.0775, "step": 22605 }, { "epoch": 7.530357291579912, "grad_norm": 0.7421875, "learning_rate": 2.270319796565332e-06, "loss": 4.0103, "step": 22606 }, { "epoch": 7.53069043058216, "grad_norm": 0.73828125, "learning_rate": 2.269739630862057e-06, "loss": 3.995, "step": 22607 }, { "epoch": 7.531023569584409, "grad_norm": 0.76953125, "learning_rate": 2.269159526079986e-06, "loss": 4.0347, "step": 22608 }, { "epoch": 7.5313567085866575, "grad_norm": 0.796875, "learning_rate": 2.268579482225874e-06, "loss": 3.9723, "step": 22609 }, { "epoch": 7.531689847588907, "grad_norm": 0.71875, "learning_rate": 2.2679994993064825e-06, "loss": 3.9764, "step": 22610 }, { "epoch": 7.532022986591155, "grad_norm": 0.79296875, "learning_rate": 2.267419577328567e-06, "loss": 3.9062, "step": 22611 }, { "epoch": 7.532356125593404, "grad_norm": 0.73046875, "learning_rate": 2.2668397162988753e-06, "loss": 3.9454, "step": 22612 }, { "epoch": 7.5326892645956525, "grad_norm": 0.734375, "learning_rate": 2.266259916224167e-06, "loss": 3.9236, "step": 22613 }, { "epoch": 7.533022403597901, "grad_norm": 0.80078125, "learning_rate": 2.2656801771111953e-06, "loss": 3.9848, "step": 22614 }, { "epoch": 7.53335554260015, "grad_norm": 0.7890625, "learning_rate": 2.2651004989667115e-06, "loss": 3.9545, "step": 22615 }, { "epoch": 7.533688681602398, "grad_norm": 0.76953125, "learning_rate": 2.2645208817974658e-06, "loss": 3.9944, "step": 22616 }, { "epoch": 7.534021820604647, "grad_norm": 0.796875, "learning_rate": 2.2639413256102147e-06, "loss": 3.9737, "step": 22617 }, { "epoch": 7.534354959606896, "grad_norm": 0.75, "learning_rate": 2.2633618304117055e-06, "loss": 4.0308, "step": 22618 }, { "epoch": 7.534688098609145, "grad_norm": 0.765625, "learning_rate": 2.262782396208689e-06, "loss": 4.0093, "step": 22619 }, { "epoch": 7.535021237611393, "grad_norm": 0.71875, "learning_rate": 2.2622030230079127e-06, "loss": 3.9669, "step": 22620 }, { "epoch": 7.535354376613642, "grad_norm": 0.79296875, "learning_rate": 2.261623710816127e-06, "loss": 3.9599, "step": 22621 }, { "epoch": 7.535687515615891, "grad_norm": 0.73828125, "learning_rate": 2.2610444596400787e-06, "loss": 4.0354, "step": 22622 }, { "epoch": 7.53602065461814, "grad_norm": 0.75390625, "learning_rate": 2.2604652694865115e-06, "loss": 3.9641, "step": 22623 }, { "epoch": 7.536353793620388, "grad_norm": 0.7421875, "learning_rate": 2.259886140362179e-06, "loss": 4.0969, "step": 22624 }, { "epoch": 7.5366869326226364, "grad_norm": 0.7734375, "learning_rate": 2.259307072273822e-06, "loss": 4.0157, "step": 22625 }, { "epoch": 7.537020071624886, "grad_norm": 0.78125, "learning_rate": 2.2587280652281866e-06, "loss": 4.0215, "step": 22626 }, { "epoch": 7.537353210627134, "grad_norm": 0.7890625, "learning_rate": 2.2581491192320138e-06, "loss": 4.009, "step": 22627 }, { "epoch": 7.537686349629383, "grad_norm": 0.7890625, "learning_rate": 2.2575702342920527e-06, "loss": 4.0333, "step": 22628 }, { "epoch": 7.538019488631631, "grad_norm": 0.7734375, "learning_rate": 2.2569914104150454e-06, "loss": 3.9651, "step": 22629 }, { "epoch": 7.538352627633881, "grad_norm": 0.80078125, "learning_rate": 2.2564126476077274e-06, "loss": 3.8659, "step": 22630 }, { "epoch": 7.538685766636129, "grad_norm": 0.79296875, "learning_rate": 2.255833945876847e-06, "loss": 3.9365, "step": 22631 }, { "epoch": 7.539018905638377, "grad_norm": 0.76171875, "learning_rate": 2.2552553052291417e-06, "loss": 4.0239, "step": 22632 }, { "epoch": 7.539352044640626, "grad_norm": 0.73828125, "learning_rate": 2.254676725671352e-06, "loss": 3.9141, "step": 22633 }, { "epoch": 7.539685183642875, "grad_norm": 0.7578125, "learning_rate": 2.2540982072102155e-06, "loss": 4.025, "step": 22634 }, { "epoch": 7.540018322645124, "grad_norm": 0.78125, "learning_rate": 2.2535197498524744e-06, "loss": 3.9845, "step": 22635 }, { "epoch": 7.540351461647372, "grad_norm": 0.70703125, "learning_rate": 2.252941353604864e-06, "loss": 3.9677, "step": 22636 }, { "epoch": 7.540684600649621, "grad_norm": 0.7734375, "learning_rate": 2.2523630184741213e-06, "loss": 4.0052, "step": 22637 }, { "epoch": 7.54101773965187, "grad_norm": 0.7890625, "learning_rate": 2.2517847444669836e-06, "loss": 3.9879, "step": 22638 }, { "epoch": 7.541350878654119, "grad_norm": 0.75, "learning_rate": 2.251206531590186e-06, "loss": 4.0409, "step": 22639 }, { "epoch": 7.541684017656367, "grad_norm": 0.7890625, "learning_rate": 2.250628379850464e-06, "loss": 3.9372, "step": 22640 }, { "epoch": 7.542017156658616, "grad_norm": 0.8046875, "learning_rate": 2.2500502892545476e-06, "loss": 3.8762, "step": 22641 }, { "epoch": 7.542350295660865, "grad_norm": 0.75390625, "learning_rate": 2.249472259809178e-06, "loss": 3.9832, "step": 22642 }, { "epoch": 7.542683434663113, "grad_norm": 0.796875, "learning_rate": 2.2488942915210827e-06, "loss": 4.025, "step": 22643 }, { "epoch": 7.543016573665362, "grad_norm": 0.74609375, "learning_rate": 2.248316384396995e-06, "loss": 3.982, "step": 22644 }, { "epoch": 7.54334971266761, "grad_norm": 0.765625, "learning_rate": 2.247738538443644e-06, "loss": 4.013, "step": 22645 }, { "epoch": 7.5436828516698595, "grad_norm": 0.80859375, "learning_rate": 2.2471607536677653e-06, "loss": 3.9775, "step": 22646 }, { "epoch": 7.544015990672108, "grad_norm": 0.7578125, "learning_rate": 2.2465830300760897e-06, "loss": 3.953, "step": 22647 }, { "epoch": 7.544349129674357, "grad_norm": 0.7265625, "learning_rate": 2.2460053676753392e-06, "loss": 4.0093, "step": 22648 }, { "epoch": 7.544682268676605, "grad_norm": 0.75, "learning_rate": 2.245427766472243e-06, "loss": 3.9946, "step": 22649 }, { "epoch": 7.545015407678854, "grad_norm": 0.7265625, "learning_rate": 2.244850226473536e-06, "loss": 3.9658, "step": 22650 }, { "epoch": 7.545348546681103, "grad_norm": 0.76171875, "learning_rate": 2.24427274768594e-06, "loss": 3.947, "step": 22651 }, { "epoch": 7.545681685683351, "grad_norm": 0.7890625, "learning_rate": 2.243695330116183e-06, "loss": 4.0482, "step": 22652 }, { "epoch": 7.5460148246856, "grad_norm": 0.76171875, "learning_rate": 2.2431179737709876e-06, "loss": 3.9764, "step": 22653 }, { "epoch": 7.5463479636878485, "grad_norm": 0.78515625, "learning_rate": 2.2425406786570846e-06, "loss": 3.9953, "step": 22654 }, { "epoch": 7.546681102690098, "grad_norm": 0.73828125, "learning_rate": 2.241963444781194e-06, "loss": 4.019, "step": 22655 }, { "epoch": 7.547014241692346, "grad_norm": 0.73046875, "learning_rate": 2.2413862721500413e-06, "loss": 3.9921, "step": 22656 }, { "epoch": 7.547347380694594, "grad_norm": 0.8046875, "learning_rate": 2.240809160770348e-06, "loss": 3.926, "step": 22657 }, { "epoch": 7.5476805196968435, "grad_norm": 0.7734375, "learning_rate": 2.240232110648837e-06, "loss": 4.016, "step": 22658 }, { "epoch": 7.548013658699093, "grad_norm": 0.7421875, "learning_rate": 2.2396551217922287e-06, "loss": 3.9608, "step": 22659 }, { "epoch": 7.548346797701341, "grad_norm": 0.73046875, "learning_rate": 2.2390781942072416e-06, "loss": 4.0329, "step": 22660 }, { "epoch": 7.548679936703589, "grad_norm": 0.7421875, "learning_rate": 2.238501327900602e-06, "loss": 3.9626, "step": 22661 }, { "epoch": 7.5490130757058385, "grad_norm": 0.76171875, "learning_rate": 2.237924522879024e-06, "loss": 4.0335, "step": 22662 }, { "epoch": 7.549346214708087, "grad_norm": 0.74609375, "learning_rate": 2.2373477791492294e-06, "loss": 3.9304, "step": 22663 }, { "epoch": 7.549679353710336, "grad_norm": 0.73046875, "learning_rate": 2.2367710967179312e-06, "loss": 3.972, "step": 22664 }, { "epoch": 7.550012492712584, "grad_norm": 0.765625, "learning_rate": 2.236194475591854e-06, "loss": 3.9869, "step": 22665 }, { "epoch": 7.550345631714833, "grad_norm": 0.7265625, "learning_rate": 2.2356179157777072e-06, "loss": 4.0972, "step": 22666 }, { "epoch": 7.550678770717082, "grad_norm": 0.7421875, "learning_rate": 2.2350414172822076e-06, "loss": 4.0278, "step": 22667 }, { "epoch": 7.55101190971933, "grad_norm": 0.7734375, "learning_rate": 2.2344649801120734e-06, "loss": 3.9576, "step": 22668 }, { "epoch": 7.551345048721579, "grad_norm": 0.69140625, "learning_rate": 2.233888604274017e-06, "loss": 3.9509, "step": 22669 }, { "epoch": 7.5516781877238275, "grad_norm": 0.76953125, "learning_rate": 2.233312289774752e-06, "loss": 3.9655, "step": 22670 }, { "epoch": 7.552011326726077, "grad_norm": 0.73046875, "learning_rate": 2.2327360366209883e-06, "loss": 3.9805, "step": 22671 }, { "epoch": 7.552344465728325, "grad_norm": 0.79296875, "learning_rate": 2.232159844819444e-06, "loss": 3.9576, "step": 22672 }, { "epoch": 7.552677604730574, "grad_norm": 0.7734375, "learning_rate": 2.231583714376827e-06, "loss": 3.9637, "step": 22673 }, { "epoch": 7.553010743732822, "grad_norm": 0.765625, "learning_rate": 2.231007645299849e-06, "loss": 4.0301, "step": 22674 }, { "epoch": 7.553343882735071, "grad_norm": 0.79296875, "learning_rate": 2.2304316375952187e-06, "loss": 4.0115, "step": 22675 }, { "epoch": 7.55367702173732, "grad_norm": 0.765625, "learning_rate": 2.2298556912696465e-06, "loss": 3.9746, "step": 22676 }, { "epoch": 7.554010160739568, "grad_norm": 0.7578125, "learning_rate": 2.2292798063298403e-06, "loss": 3.9404, "step": 22677 }, { "epoch": 7.554343299741817, "grad_norm": 0.8359375, "learning_rate": 2.2287039827825053e-06, "loss": 3.9943, "step": 22678 }, { "epoch": 7.554676438744066, "grad_norm": 0.80078125, "learning_rate": 2.2281282206343538e-06, "loss": 4.0378, "step": 22679 }, { "epoch": 7.555009577746315, "grad_norm": 0.828125, "learning_rate": 2.22755251989209e-06, "loss": 3.9537, "step": 22680 }, { "epoch": 7.555342716748563, "grad_norm": 0.80078125, "learning_rate": 2.2269768805624185e-06, "loss": 3.9218, "step": 22681 }, { "epoch": 7.555675855750812, "grad_norm": 0.77734375, "learning_rate": 2.226401302652043e-06, "loss": 3.9547, "step": 22682 }, { "epoch": 7.556008994753061, "grad_norm": 0.7265625, "learning_rate": 2.2258257861676745e-06, "loss": 3.9757, "step": 22683 }, { "epoch": 7.55634213375531, "grad_norm": 0.71484375, "learning_rate": 2.2252503311160092e-06, "loss": 4.0796, "step": 22684 }, { "epoch": 7.556675272757558, "grad_norm": 0.77734375, "learning_rate": 2.2246749375037505e-06, "loss": 3.9486, "step": 22685 }, { "epoch": 7.557008411759806, "grad_norm": 0.7890625, "learning_rate": 2.2240996053376046e-06, "loss": 3.9834, "step": 22686 }, { "epoch": 7.557341550762056, "grad_norm": 0.765625, "learning_rate": 2.2235243346242704e-06, "loss": 4.0258, "step": 22687 }, { "epoch": 7.557674689764304, "grad_norm": 0.78125, "learning_rate": 2.222949125370449e-06, "loss": 3.9005, "step": 22688 }, { "epoch": 7.558007828766553, "grad_norm": 0.79296875, "learning_rate": 2.2223739775828383e-06, "loss": 3.949, "step": 22689 }, { "epoch": 7.558340967768801, "grad_norm": 0.734375, "learning_rate": 2.221798891268141e-06, "loss": 4.0388, "step": 22690 }, { "epoch": 7.558674106771051, "grad_norm": 0.78515625, "learning_rate": 2.221223866433055e-06, "loss": 3.9318, "step": 22691 }, { "epoch": 7.559007245773299, "grad_norm": 0.73828125, "learning_rate": 2.220648903084279e-06, "loss": 3.9258, "step": 22692 }, { "epoch": 7.559340384775547, "grad_norm": 0.78515625, "learning_rate": 2.220074001228503e-06, "loss": 3.9606, "step": 22693 }, { "epoch": 7.559673523777796, "grad_norm": 0.8203125, "learning_rate": 2.2194991608724312e-06, "loss": 3.9906, "step": 22694 }, { "epoch": 7.560006662780045, "grad_norm": 0.8125, "learning_rate": 2.2189243820227574e-06, "loss": 3.9278, "step": 22695 }, { "epoch": 7.560339801782294, "grad_norm": 0.76171875, "learning_rate": 2.218349664686175e-06, "loss": 4.0528, "step": 22696 }, { "epoch": 7.560672940784542, "grad_norm": 0.78515625, "learning_rate": 2.217775008869377e-06, "loss": 4.048, "step": 22697 }, { "epoch": 7.561006079786791, "grad_norm": 0.7890625, "learning_rate": 2.2172004145790615e-06, "loss": 3.9847, "step": 22698 }, { "epoch": 7.56133921878904, "grad_norm": 0.75390625, "learning_rate": 2.216625881821918e-06, "loss": 3.9892, "step": 22699 }, { "epoch": 7.561672357791289, "grad_norm": 0.7890625, "learning_rate": 2.216051410604639e-06, "loss": 3.9648, "step": 22700 }, { "epoch": 7.562005496793537, "grad_norm": 0.77734375, "learning_rate": 2.2154770009339167e-06, "loss": 3.9839, "step": 22701 }, { "epoch": 7.562338635795786, "grad_norm": 0.76171875, "learning_rate": 2.214902652816441e-06, "loss": 4.014, "step": 22702 }, { "epoch": 7.5626717747980345, "grad_norm": 0.8203125, "learning_rate": 2.2143283662589023e-06, "loss": 3.9721, "step": 22703 }, { "epoch": 7.563004913800283, "grad_norm": 0.7421875, "learning_rate": 2.2137541412679863e-06, "loss": 3.9872, "step": 22704 }, { "epoch": 7.563338052802532, "grad_norm": 0.73828125, "learning_rate": 2.2131799778503876e-06, "loss": 3.9061, "step": 22705 }, { "epoch": 7.56367119180478, "grad_norm": 0.77734375, "learning_rate": 2.2126058760127897e-06, "loss": 4.0363, "step": 22706 }, { "epoch": 7.5640043308070295, "grad_norm": 0.765625, "learning_rate": 2.212031835761882e-06, "loss": 4.0489, "step": 22707 }, { "epoch": 7.564337469809278, "grad_norm": 0.75, "learning_rate": 2.211457857104347e-06, "loss": 4.0173, "step": 22708 }, { "epoch": 7.564670608811527, "grad_norm": 0.75390625, "learning_rate": 2.210883940046875e-06, "loss": 3.8955, "step": 22709 }, { "epoch": 7.565003747813775, "grad_norm": 0.765625, "learning_rate": 2.2103100845961515e-06, "loss": 3.9839, "step": 22710 }, { "epoch": 7.565336886816024, "grad_norm": 0.75390625, "learning_rate": 2.2097362907588534e-06, "loss": 3.9906, "step": 22711 }, { "epoch": 7.565670025818273, "grad_norm": 0.80078125, "learning_rate": 2.2091625585416715e-06, "loss": 4.0692, "step": 22712 }, { "epoch": 7.566003164820521, "grad_norm": 0.75, "learning_rate": 2.2085888879512853e-06, "loss": 3.9959, "step": 22713 }, { "epoch": 7.56633630382277, "grad_norm": 0.7578125, "learning_rate": 2.208015278994378e-06, "loss": 3.9462, "step": 22714 }, { "epoch": 7.5666694428250185, "grad_norm": 0.74609375, "learning_rate": 2.2074417316776273e-06, "loss": 3.9586, "step": 22715 }, { "epoch": 7.567002581827268, "grad_norm": 0.75, "learning_rate": 2.2068682460077197e-06, "loss": 4.0763, "step": 22716 }, { "epoch": 7.567335720829516, "grad_norm": 0.8125, "learning_rate": 2.206294821991332e-06, "loss": 3.8849, "step": 22717 }, { "epoch": 7.567668859831765, "grad_norm": 0.765625, "learning_rate": 2.205721459635143e-06, "loss": 3.9415, "step": 22718 }, { "epoch": 7.5680019988340135, "grad_norm": 0.81640625, "learning_rate": 2.2051481589458324e-06, "loss": 3.9562, "step": 22719 }, { "epoch": 7.568335137836263, "grad_norm": 0.7578125, "learning_rate": 2.204574919930076e-06, "loss": 4.0581, "step": 22720 }, { "epoch": 7.568668276838511, "grad_norm": 0.7109375, "learning_rate": 2.2040017425945525e-06, "loss": 3.9928, "step": 22721 }, { "epoch": 7.569001415840759, "grad_norm": 0.7578125, "learning_rate": 2.2034286269459353e-06, "loss": 3.9692, "step": 22722 }, { "epoch": 7.569334554843008, "grad_norm": 0.7734375, "learning_rate": 2.202855572990905e-06, "loss": 4.0177, "step": 22723 }, { "epoch": 7.569667693845257, "grad_norm": 0.765625, "learning_rate": 2.202282580736133e-06, "loss": 3.9111, "step": 22724 }, { "epoch": 7.570000832847506, "grad_norm": 0.70703125, "learning_rate": 2.2017096501882947e-06, "loss": 3.89, "step": 22725 }, { "epoch": 7.570333971849754, "grad_norm": 0.76953125, "learning_rate": 2.2011367813540597e-06, "loss": 4.0246, "step": 22726 }, { "epoch": 7.570667110852003, "grad_norm": 0.78125, "learning_rate": 2.2005639742401073e-06, "loss": 3.9941, "step": 22727 }, { "epoch": 7.571000249854252, "grad_norm": 0.765625, "learning_rate": 2.199991228853107e-06, "loss": 4.0004, "step": 22728 }, { "epoch": 7.5713333888565, "grad_norm": 0.796875, "learning_rate": 2.199418545199725e-06, "loss": 3.9868, "step": 22729 }, { "epoch": 7.571666527858749, "grad_norm": 0.7734375, "learning_rate": 2.1988459232866388e-06, "loss": 3.9745, "step": 22730 }, { "epoch": 7.5719996668609975, "grad_norm": 0.7734375, "learning_rate": 2.1982733631205152e-06, "loss": 3.9535, "step": 22731 }, { "epoch": 7.572332805863247, "grad_norm": 0.75390625, "learning_rate": 2.197700864708023e-06, "loss": 3.9518, "step": 22732 }, { "epoch": 7.572665944865495, "grad_norm": 0.79296875, "learning_rate": 2.1971284280558287e-06, "loss": 3.9158, "step": 22733 }, { "epoch": 7.572999083867744, "grad_norm": 0.75390625, "learning_rate": 2.1965560531706047e-06, "loss": 4.0259, "step": 22734 }, { "epoch": 7.573332222869992, "grad_norm": 0.7421875, "learning_rate": 2.195983740059015e-06, "loss": 3.9837, "step": 22735 }, { "epoch": 7.573665361872241, "grad_norm": 0.76171875, "learning_rate": 2.195411488727729e-06, "loss": 3.9434, "step": 22736 }, { "epoch": 7.57399850087449, "grad_norm": 0.7578125, "learning_rate": 2.194839299183405e-06, "loss": 4.006, "step": 22737 }, { "epoch": 7.574331639876738, "grad_norm": 0.7734375, "learning_rate": 2.1942671714327148e-06, "loss": 3.9565, "step": 22738 }, { "epoch": 7.574664778878987, "grad_norm": 0.76953125, "learning_rate": 2.1936951054823194e-06, "loss": 4.0799, "step": 22739 }, { "epoch": 7.574997917881236, "grad_norm": 0.765625, "learning_rate": 2.193123101338882e-06, "loss": 3.9333, "step": 22740 }, { "epoch": 7.575331056883485, "grad_norm": 0.76953125, "learning_rate": 2.192551159009064e-06, "loss": 4.0519, "step": 22741 }, { "epoch": 7.575664195885733, "grad_norm": 0.75, "learning_rate": 2.1919792784995313e-06, "loss": 3.9769, "step": 22742 }, { "epoch": 7.575997334887982, "grad_norm": 0.7421875, "learning_rate": 2.191407459816943e-06, "loss": 3.9901, "step": 22743 }, { "epoch": 7.576330473890231, "grad_norm": 0.75, "learning_rate": 2.190835702967959e-06, "loss": 3.94, "step": 22744 }, { "epoch": 7.57666361289248, "grad_norm": 0.796875, "learning_rate": 2.1902640079592403e-06, "loss": 3.9697, "step": 22745 }, { "epoch": 7.576996751894728, "grad_norm": 0.7734375, "learning_rate": 2.189692374797444e-06, "loss": 3.9454, "step": 22746 }, { "epoch": 7.577329890896976, "grad_norm": 0.7734375, "learning_rate": 2.18912080348923e-06, "loss": 4.043, "step": 22747 }, { "epoch": 7.577663029899226, "grad_norm": 0.81640625, "learning_rate": 2.188549294041253e-06, "loss": 3.977, "step": 22748 }, { "epoch": 7.577996168901474, "grad_norm": 0.7109375, "learning_rate": 2.1879778464601747e-06, "loss": 3.9473, "step": 22749 }, { "epoch": 7.578329307903723, "grad_norm": 0.77734375, "learning_rate": 2.187406460752648e-06, "loss": 3.9452, "step": 22750 }, { "epoch": 7.578662446905971, "grad_norm": 0.796875, "learning_rate": 2.1868351369253297e-06, "loss": 3.969, "step": 22751 }, { "epoch": 7.5789955859082205, "grad_norm": 0.76171875, "learning_rate": 2.186263874984871e-06, "loss": 4.01, "step": 22752 }, { "epoch": 7.579328724910469, "grad_norm": 0.7890625, "learning_rate": 2.18569267493793e-06, "loss": 3.9598, "step": 22753 }, { "epoch": 7.579661863912717, "grad_norm": 0.75, "learning_rate": 2.185121536791163e-06, "loss": 3.9371, "step": 22754 }, { "epoch": 7.579995002914966, "grad_norm": 0.7890625, "learning_rate": 2.184550460551212e-06, "loss": 3.9575, "step": 22755 }, { "epoch": 7.580328141917215, "grad_norm": 0.7578125, "learning_rate": 2.1839794462247377e-06, "loss": 3.9682, "step": 22756 }, { "epoch": 7.580661280919464, "grad_norm": 0.7734375, "learning_rate": 2.1834084938183883e-06, "loss": 4.0358, "step": 22757 }, { "epoch": 7.580994419921712, "grad_norm": 0.76171875, "learning_rate": 2.1828376033388134e-06, "loss": 4.0188, "step": 22758 }, { "epoch": 7.581327558923961, "grad_norm": 0.765625, "learning_rate": 2.182266774792662e-06, "loss": 3.9644, "step": 22759 }, { "epoch": 7.58166069792621, "grad_norm": 0.7890625, "learning_rate": 2.181696008186587e-06, "loss": 3.9387, "step": 22760 }, { "epoch": 7.581993836928459, "grad_norm": 0.83203125, "learning_rate": 2.1811253035272337e-06, "loss": 4.0338, "step": 22761 }, { "epoch": 7.582326975930707, "grad_norm": 0.73828125, "learning_rate": 2.18055466082125e-06, "loss": 3.9896, "step": 22762 }, { "epoch": 7.582660114932956, "grad_norm": 0.80078125, "learning_rate": 2.179984080075282e-06, "loss": 3.9709, "step": 22763 }, { "epoch": 7.5829932539352045, "grad_norm": 0.77734375, "learning_rate": 2.179413561295977e-06, "loss": 3.9913, "step": 22764 }, { "epoch": 7.583326392937453, "grad_norm": 0.75, "learning_rate": 2.17884310448998e-06, "loss": 4.0513, "step": 22765 }, { "epoch": 7.583659531939702, "grad_norm": 0.78125, "learning_rate": 2.1782727096639328e-06, "loss": 3.9937, "step": 22766 }, { "epoch": 7.58399267094195, "grad_norm": 0.78125, "learning_rate": 2.1777023768244837e-06, "loss": 3.9922, "step": 22767 }, { "epoch": 7.5843258099441995, "grad_norm": 0.734375, "learning_rate": 2.1771321059782733e-06, "loss": 4.0074, "step": 22768 }, { "epoch": 7.584658948946448, "grad_norm": 0.7265625, "learning_rate": 2.176561897131945e-06, "loss": 3.9673, "step": 22769 }, { "epoch": 7.584992087948697, "grad_norm": 0.7578125, "learning_rate": 2.175991750292138e-06, "loss": 3.9809, "step": 22770 }, { "epoch": 7.585325226950945, "grad_norm": 0.796875, "learning_rate": 2.1754216654654972e-06, "loss": 3.965, "step": 22771 }, { "epoch": 7.5856583659531935, "grad_norm": 0.734375, "learning_rate": 2.174851642658664e-06, "loss": 4.0654, "step": 22772 }, { "epoch": 7.585991504955443, "grad_norm": 0.75390625, "learning_rate": 2.1742816818782694e-06, "loss": 4.0758, "step": 22773 }, { "epoch": 7.586324643957691, "grad_norm": 0.79296875, "learning_rate": 2.173711783130961e-06, "loss": 4.0308, "step": 22774 }, { "epoch": 7.58665778295994, "grad_norm": 0.80859375, "learning_rate": 2.173141946423373e-06, "loss": 3.9821, "step": 22775 }, { "epoch": 7.5869909219621885, "grad_norm": 0.78125, "learning_rate": 2.172572171762143e-06, "loss": 4.0074, "step": 22776 }, { "epoch": 7.587324060964438, "grad_norm": 0.76953125, "learning_rate": 2.172002459153906e-06, "loss": 3.9691, "step": 22777 }, { "epoch": 7.587657199966686, "grad_norm": 0.79296875, "learning_rate": 2.1714328086053025e-06, "loss": 3.9883, "step": 22778 }, { "epoch": 7.587990338968935, "grad_norm": 0.765625, "learning_rate": 2.1708632201229653e-06, "loss": 4.091, "step": 22779 }, { "epoch": 7.5883234779711835, "grad_norm": 0.74609375, "learning_rate": 2.170293693713528e-06, "loss": 3.9868, "step": 22780 }, { "epoch": 7.588656616973433, "grad_norm": 0.7421875, "learning_rate": 2.1697242293836257e-06, "loss": 4.0077, "step": 22781 }, { "epoch": 7.588989755975681, "grad_norm": 0.7421875, "learning_rate": 2.1691548271398905e-06, "loss": 4.0311, "step": 22782 }, { "epoch": 7.589322894977929, "grad_norm": 0.81640625, "learning_rate": 2.1685854869889554e-06, "loss": 3.9978, "step": 22783 }, { "epoch": 7.589656033980178, "grad_norm": 0.78125, "learning_rate": 2.168016208937451e-06, "loss": 4.0479, "step": 22784 }, { "epoch": 7.589989172982427, "grad_norm": 0.7421875, "learning_rate": 2.1674469929920064e-06, "loss": 3.996, "step": 22785 }, { "epoch": 7.590322311984676, "grad_norm": 0.73828125, "learning_rate": 2.1668778391592563e-06, "loss": 4.0178, "step": 22786 }, { "epoch": 7.590655450986924, "grad_norm": 0.80859375, "learning_rate": 2.1663087474458286e-06, "loss": 3.9379, "step": 22787 }, { "epoch": 7.590988589989173, "grad_norm": 0.75390625, "learning_rate": 2.16573971785835e-06, "loss": 3.926, "step": 22788 }, { "epoch": 7.591321728991422, "grad_norm": 0.75, "learning_rate": 2.1651707504034477e-06, "loss": 4.1632, "step": 22789 }, { "epoch": 7.59165486799367, "grad_norm": 0.765625, "learning_rate": 2.1646018450877562e-06, "loss": 3.9772, "step": 22790 }, { "epoch": 7.591988006995919, "grad_norm": 0.78515625, "learning_rate": 2.164033001917894e-06, "loss": 4.0043, "step": 22791 }, { "epoch": 7.592321145998167, "grad_norm": 0.7265625, "learning_rate": 2.1634642209004874e-06, "loss": 3.9926, "step": 22792 }, { "epoch": 7.592654285000417, "grad_norm": 0.73828125, "learning_rate": 2.1628955020421666e-06, "loss": 4.0265, "step": 22793 }, { "epoch": 7.592987424002665, "grad_norm": 0.7265625, "learning_rate": 2.162326845349552e-06, "loss": 4.0263, "step": 22794 }, { "epoch": 7.593320563004914, "grad_norm": 0.81640625, "learning_rate": 2.161758250829269e-06, "loss": 3.9996, "step": 22795 }, { "epoch": 7.593653702007162, "grad_norm": 0.80078125, "learning_rate": 2.1611897184879375e-06, "loss": 4.027, "step": 22796 }, { "epoch": 7.593986841009411, "grad_norm": 0.828125, "learning_rate": 2.160621248332184e-06, "loss": 3.844, "step": 22797 }, { "epoch": 7.59431998001166, "grad_norm": 0.7578125, "learning_rate": 2.1600528403686278e-06, "loss": 3.9714, "step": 22798 }, { "epoch": 7.594653119013909, "grad_norm": 0.7578125, "learning_rate": 2.15948449460389e-06, "loss": 3.9995, "step": 22799 }, { "epoch": 7.594986258016157, "grad_norm": 0.75390625, "learning_rate": 2.1589162110445898e-06, "loss": 4.0084, "step": 22800 }, { "epoch": 7.595319397018406, "grad_norm": 0.77734375, "learning_rate": 2.1583479896973475e-06, "loss": 4.0115, "step": 22801 }, { "epoch": 7.595652536020655, "grad_norm": 0.7734375, "learning_rate": 2.1577798305687807e-06, "loss": 4.085, "step": 22802 }, { "epoch": 7.595985675022903, "grad_norm": 0.77734375, "learning_rate": 2.157211733665506e-06, "loss": 4.0147, "step": 22803 }, { "epoch": 7.596318814025152, "grad_norm": 0.7578125, "learning_rate": 2.1566436989941437e-06, "loss": 4.0125, "step": 22804 }, { "epoch": 7.596651953027401, "grad_norm": 0.7734375, "learning_rate": 2.156075726561309e-06, "loss": 3.9819, "step": 22805 }, { "epoch": 7.59698509202965, "grad_norm": 0.80078125, "learning_rate": 2.1555078163736177e-06, "loss": 3.9557, "step": 22806 }, { "epoch": 7.597318231031898, "grad_norm": 0.79296875, "learning_rate": 2.1549399684376806e-06, "loss": 4.0007, "step": 22807 }, { "epoch": 7.597651370034146, "grad_norm": 0.7421875, "learning_rate": 2.154372182760121e-06, "loss": 3.9851, "step": 22808 }, { "epoch": 7.5979845090363956, "grad_norm": 0.74609375, "learning_rate": 2.153804459347544e-06, "loss": 3.965, "step": 22809 }, { "epoch": 7.598317648038644, "grad_norm": 0.828125, "learning_rate": 2.1532367982065632e-06, "loss": 3.9368, "step": 22810 }, { "epoch": 7.598650787040893, "grad_norm": 0.7890625, "learning_rate": 2.1526691993437953e-06, "loss": 3.9377, "step": 22811 }, { "epoch": 7.598983926043141, "grad_norm": 0.80078125, "learning_rate": 2.1521016627658487e-06, "loss": 3.9907, "step": 22812 }, { "epoch": 7.5993170650453905, "grad_norm": 0.80859375, "learning_rate": 2.151534188479334e-06, "loss": 3.9769, "step": 22813 }, { "epoch": 7.599650204047639, "grad_norm": 0.7734375, "learning_rate": 2.150966776490858e-06, "loss": 3.9491, "step": 22814 }, { "epoch": 7.599983343049887, "grad_norm": 0.82421875, "learning_rate": 2.150399426807037e-06, "loss": 4.0, "step": 22815 }, { "epoch": 7.600316482052136, "grad_norm": 0.75, "learning_rate": 2.149832139434475e-06, "loss": 3.9639, "step": 22816 }, { "epoch": 7.600649621054385, "grad_norm": 0.7421875, "learning_rate": 2.1492649143797802e-06, "loss": 3.9937, "step": 22817 }, { "epoch": 7.600982760056634, "grad_norm": 0.80859375, "learning_rate": 2.148697751649559e-06, "loss": 3.9396, "step": 22818 }, { "epoch": 7.601315899058882, "grad_norm": 0.71875, "learning_rate": 2.1481306512504182e-06, "loss": 3.9466, "step": 22819 }, { "epoch": 7.601649038061131, "grad_norm": 0.73828125, "learning_rate": 2.147563613188963e-06, "loss": 4.0187, "step": 22820 }, { "epoch": 7.6019821770633795, "grad_norm": 0.76953125, "learning_rate": 2.146996637471797e-06, "loss": 4.0027, "step": 22821 }, { "epoch": 7.602315316065629, "grad_norm": 0.7578125, "learning_rate": 2.1464297241055273e-06, "loss": 3.9706, "step": 22822 }, { "epoch": 7.602648455067877, "grad_norm": 0.78515625, "learning_rate": 2.1458628730967553e-06, "loss": 3.9764, "step": 22823 }, { "epoch": 7.602981594070126, "grad_norm": 0.78515625, "learning_rate": 2.1452960844520837e-06, "loss": 3.9094, "step": 22824 }, { "epoch": 7.6033147330723745, "grad_norm": 0.76953125, "learning_rate": 2.1447293581781128e-06, "loss": 3.9478, "step": 22825 }, { "epoch": 7.603647872074623, "grad_norm": 0.76953125, "learning_rate": 2.144162694281449e-06, "loss": 3.984, "step": 22826 }, { "epoch": 7.603981011076872, "grad_norm": 0.81640625, "learning_rate": 2.143596092768688e-06, "loss": 3.9526, "step": 22827 }, { "epoch": 7.60431415007912, "grad_norm": 0.78125, "learning_rate": 2.1430295536464297e-06, "loss": 3.891, "step": 22828 }, { "epoch": 7.604647289081369, "grad_norm": 0.7265625, "learning_rate": 2.1424630769212717e-06, "loss": 4.0239, "step": 22829 }, { "epoch": 7.604980428083618, "grad_norm": 0.78125, "learning_rate": 2.1418966625998172e-06, "loss": 4.0002, "step": 22830 }, { "epoch": 7.605313567085867, "grad_norm": 0.84375, "learning_rate": 2.141330310688661e-06, "loss": 3.9495, "step": 22831 }, { "epoch": 7.605646706088115, "grad_norm": 0.796875, "learning_rate": 2.140764021194399e-06, "loss": 4.0094, "step": 22832 }, { "epoch": 7.6059798450903635, "grad_norm": 0.75, "learning_rate": 2.1401977941236263e-06, "loss": 3.9298, "step": 22833 }, { "epoch": 7.606312984092613, "grad_norm": 0.78515625, "learning_rate": 2.139631629482942e-06, "loss": 3.9497, "step": 22834 }, { "epoch": 7.606646123094861, "grad_norm": 0.80078125, "learning_rate": 2.1390655272789415e-06, "loss": 3.9973, "step": 22835 }, { "epoch": 7.60697926209711, "grad_norm": 0.75, "learning_rate": 2.138499487518211e-06, "loss": 3.9624, "step": 22836 }, { "epoch": 7.6073124010993585, "grad_norm": 0.7734375, "learning_rate": 2.1379335102073504e-06, "loss": 4.0222, "step": 22837 }, { "epoch": 7.607645540101608, "grad_norm": 0.796875, "learning_rate": 2.1373675953529505e-06, "loss": 3.9788, "step": 22838 }, { "epoch": 7.607978679103856, "grad_norm": 0.7734375, "learning_rate": 2.1368017429616024e-06, "loss": 3.9902, "step": 22839 }, { "epoch": 7.608311818106105, "grad_norm": 0.73828125, "learning_rate": 2.1362359530398955e-06, "loss": 3.9458, "step": 22840 }, { "epoch": 7.608644957108353, "grad_norm": 0.75, "learning_rate": 2.135670225594424e-06, "loss": 3.967, "step": 22841 }, { "epoch": 7.608978096110603, "grad_norm": 0.7734375, "learning_rate": 2.1351045606317753e-06, "loss": 3.9853, "step": 22842 }, { "epoch": 7.609311235112851, "grad_norm": 0.78125, "learning_rate": 2.134538958158538e-06, "loss": 3.9569, "step": 22843 }, { "epoch": 7.609644374115099, "grad_norm": 0.80078125, "learning_rate": 2.1339734181813004e-06, "loss": 3.9188, "step": 22844 }, { "epoch": 7.609977513117348, "grad_norm": 0.75390625, "learning_rate": 2.1334079407066497e-06, "loss": 4.057, "step": 22845 }, { "epoch": 7.610310652119597, "grad_norm": 0.7265625, "learning_rate": 2.132842525741172e-06, "loss": 4.009, "step": 22846 }, { "epoch": 7.610643791121846, "grad_norm": 0.76171875, "learning_rate": 2.132277173291452e-06, "loss": 3.975, "step": 22847 }, { "epoch": 7.610976930124094, "grad_norm": 0.76953125, "learning_rate": 2.1317118833640783e-06, "loss": 3.9571, "step": 22848 }, { "epoch": 7.611310069126343, "grad_norm": 0.8203125, "learning_rate": 2.1311466559656334e-06, "loss": 3.9893, "step": 22849 }, { "epoch": 7.611643208128592, "grad_norm": 0.79296875, "learning_rate": 2.1305814911027018e-06, "loss": 3.9745, "step": 22850 }, { "epoch": 7.61197634713084, "grad_norm": 0.7890625, "learning_rate": 2.1300163887818627e-06, "loss": 4.0197, "step": 22851 }, { "epoch": 7.612309486133089, "grad_norm": 0.75, "learning_rate": 2.129451349009704e-06, "loss": 3.979, "step": 22852 }, { "epoch": 7.612642625135337, "grad_norm": 0.77734375, "learning_rate": 2.128886371792806e-06, "loss": 3.9875, "step": 22853 }, { "epoch": 7.612975764137587, "grad_norm": 0.765625, "learning_rate": 2.128321457137744e-06, "loss": 3.9773, "step": 22854 }, { "epoch": 7.613308903139835, "grad_norm": 0.77734375, "learning_rate": 2.1277566050511043e-06, "loss": 4.0818, "step": 22855 }, { "epoch": 7.613642042142084, "grad_norm": 0.78515625, "learning_rate": 2.1271918155394645e-06, "loss": 4.0524, "step": 22856 }, { "epoch": 7.613975181144332, "grad_norm": 0.78125, "learning_rate": 2.126627088609401e-06, "loss": 3.9663, "step": 22857 }, { "epoch": 7.6143083201465815, "grad_norm": 0.7578125, "learning_rate": 2.1260624242674917e-06, "loss": 4.0093, "step": 22858 }, { "epoch": 7.61464145914883, "grad_norm": 0.78515625, "learning_rate": 2.1254978225203177e-06, "loss": 3.9458, "step": 22859 }, { "epoch": 7.614974598151079, "grad_norm": 0.7578125, "learning_rate": 2.1249332833744533e-06, "loss": 4.0158, "step": 22860 }, { "epoch": 7.615307737153327, "grad_norm": 0.8046875, "learning_rate": 2.124368806836473e-06, "loss": 3.959, "step": 22861 }, { "epoch": 7.615640876155576, "grad_norm": 0.77734375, "learning_rate": 2.123804392912953e-06, "loss": 3.9672, "step": 22862 }, { "epoch": 7.615974015157825, "grad_norm": 0.7265625, "learning_rate": 2.1232400416104665e-06, "loss": 3.9537, "step": 22863 }, { "epoch": 7.616307154160073, "grad_norm": 0.7578125, "learning_rate": 2.122675752935588e-06, "loss": 3.9799, "step": 22864 }, { "epoch": 7.616640293162322, "grad_norm": 0.8046875, "learning_rate": 2.122111526894887e-06, "loss": 3.9516, "step": 22865 }, { "epoch": 7.616973432164571, "grad_norm": 0.765625, "learning_rate": 2.1215473634949407e-06, "loss": 4.023, "step": 22866 }, { "epoch": 7.61730657116682, "grad_norm": 0.7421875, "learning_rate": 2.1209832627423182e-06, "loss": 4.0272, "step": 22867 }, { "epoch": 7.617639710169068, "grad_norm": 0.76171875, "learning_rate": 2.1204192246435895e-06, "loss": 4.0275, "step": 22868 }, { "epoch": 7.617972849171316, "grad_norm": 0.76171875, "learning_rate": 2.1198552492053225e-06, "loss": 3.9279, "step": 22869 }, { "epoch": 7.6183059881735655, "grad_norm": 0.77734375, "learning_rate": 2.1192913364340905e-06, "loss": 3.9817, "step": 22870 }, { "epoch": 7.618639127175814, "grad_norm": 0.78125, "learning_rate": 2.1187274863364624e-06, "loss": 3.9759, "step": 22871 }, { "epoch": 7.618972266178063, "grad_norm": 0.78515625, "learning_rate": 2.118163698919e-06, "loss": 4.0416, "step": 22872 }, { "epoch": 7.619305405180311, "grad_norm": 0.75390625, "learning_rate": 2.117599974188272e-06, "loss": 3.9647, "step": 22873 }, { "epoch": 7.6196385441825605, "grad_norm": 0.78125, "learning_rate": 2.1170363121508488e-06, "loss": 3.9892, "step": 22874 }, { "epoch": 7.619971683184809, "grad_norm": 0.75, "learning_rate": 2.1164727128132923e-06, "loss": 3.97, "step": 22875 }, { "epoch": 7.620304822187057, "grad_norm": 0.77734375, "learning_rate": 2.1159091761821686e-06, "loss": 3.9315, "step": 22876 }, { "epoch": 7.620637961189306, "grad_norm": 0.78125, "learning_rate": 2.1153457022640387e-06, "loss": 3.9798, "step": 22877 }, { "epoch": 7.6209711001915545, "grad_norm": 0.75390625, "learning_rate": 2.11478229106547e-06, "loss": 4.058, "step": 22878 }, { "epoch": 7.621304239193804, "grad_norm": 0.7734375, "learning_rate": 2.1142189425930233e-06, "loss": 3.9075, "step": 22879 }, { "epoch": 7.621637378196052, "grad_norm": 0.7890625, "learning_rate": 2.1136556568532606e-06, "loss": 4.0512, "step": 22880 }, { "epoch": 7.621970517198301, "grad_norm": 0.7578125, "learning_rate": 2.1130924338527433e-06, "loss": 3.9271, "step": 22881 }, { "epoch": 7.6223036562005495, "grad_norm": 0.78515625, "learning_rate": 2.11252927359803e-06, "loss": 3.9792, "step": 22882 }, { "epoch": 7.622636795202799, "grad_norm": 0.82421875, "learning_rate": 2.1119661760956825e-06, "loss": 3.987, "step": 22883 }, { "epoch": 7.622969934205047, "grad_norm": 0.76953125, "learning_rate": 2.111403141352255e-06, "loss": 4.0472, "step": 22884 }, { "epoch": 7.623303073207296, "grad_norm": 0.7734375, "learning_rate": 2.110840169374313e-06, "loss": 4.0396, "step": 22885 }, { "epoch": 7.6236362122095445, "grad_norm": 0.75, "learning_rate": 2.1102772601684092e-06, "loss": 3.967, "step": 22886 }, { "epoch": 7.623969351211793, "grad_norm": 0.7734375, "learning_rate": 2.1097144137411013e-06, "loss": 3.9377, "step": 22887 }, { "epoch": 7.624302490214042, "grad_norm": 0.75390625, "learning_rate": 2.109151630098943e-06, "loss": 4.0093, "step": 22888 }, { "epoch": 7.62463562921629, "grad_norm": 0.76171875, "learning_rate": 2.108588909248496e-06, "loss": 3.9148, "step": 22889 }, { "epoch": 7.624968768218539, "grad_norm": 0.77734375, "learning_rate": 2.108026251196308e-06, "loss": 3.8932, "step": 22890 }, { "epoch": 7.625301907220788, "grad_norm": 0.7734375, "learning_rate": 2.1074636559489333e-06, "loss": 3.949, "step": 22891 }, { "epoch": 7.625635046223037, "grad_norm": 0.75, "learning_rate": 2.106901123512929e-06, "loss": 4.0632, "step": 22892 }, { "epoch": 7.625968185225285, "grad_norm": 0.80078125, "learning_rate": 2.106338653894845e-06, "loss": 3.9628, "step": 22893 }, { "epoch": 7.6263013242275335, "grad_norm": 0.765625, "learning_rate": 2.105776247101233e-06, "loss": 4.0053, "step": 22894 }, { "epoch": 7.626634463229783, "grad_norm": 0.80078125, "learning_rate": 2.1052139031386423e-06, "loss": 3.9073, "step": 22895 }, { "epoch": 7.626967602232031, "grad_norm": 0.78515625, "learning_rate": 2.104651622013626e-06, "loss": 3.9761, "step": 22896 }, { "epoch": 7.62730074123428, "grad_norm": 0.76953125, "learning_rate": 2.104089403732732e-06, "loss": 4.089, "step": 22897 }, { "epoch": 7.627633880236528, "grad_norm": 0.83203125, "learning_rate": 2.1035272483025094e-06, "loss": 4.0062, "step": 22898 }, { "epoch": 7.627967019238778, "grad_norm": 0.75390625, "learning_rate": 2.102965155729505e-06, "loss": 3.9923, "step": 22899 }, { "epoch": 7.628300158241026, "grad_norm": 0.78515625, "learning_rate": 2.102403126020268e-06, "loss": 3.9802, "step": 22900 }, { "epoch": 7.628633297243275, "grad_norm": 0.80859375, "learning_rate": 2.101841159181342e-06, "loss": 3.998, "step": 22901 }, { "epoch": 7.628966436245523, "grad_norm": 0.73828125, "learning_rate": 2.101279255219272e-06, "loss": 4.0107, "step": 22902 }, { "epoch": 7.629299575247773, "grad_norm": 0.80078125, "learning_rate": 2.100717414140608e-06, "loss": 3.9427, "step": 22903 }, { "epoch": 7.629632714250021, "grad_norm": 0.80859375, "learning_rate": 2.1001556359518905e-06, "loss": 4.0075, "step": 22904 }, { "epoch": 7.629965853252269, "grad_norm": 0.79296875, "learning_rate": 2.099593920659664e-06, "loss": 3.935, "step": 22905 }, { "epoch": 7.630298992254518, "grad_norm": 0.78515625, "learning_rate": 2.0990322682704694e-06, "loss": 3.9418, "step": 22906 }, { "epoch": 7.630632131256767, "grad_norm": 0.77734375, "learning_rate": 2.098470678790854e-06, "loss": 4.0129, "step": 22907 }, { "epoch": 7.630965270259016, "grad_norm": 0.7578125, "learning_rate": 2.0979091522273542e-06, "loss": 4.0103, "step": 22908 }, { "epoch": 7.631298409261264, "grad_norm": 0.75, "learning_rate": 2.097347688586509e-06, "loss": 3.9762, "step": 22909 }, { "epoch": 7.631631548263513, "grad_norm": 0.76953125, "learning_rate": 2.096786287874863e-06, "loss": 3.9783, "step": 22910 }, { "epoch": 7.631964687265762, "grad_norm": 0.78125, "learning_rate": 2.096224950098954e-06, "loss": 3.9223, "step": 22911 }, { "epoch": 7.63229782626801, "grad_norm": 0.78515625, "learning_rate": 2.0956636752653193e-06, "loss": 4.0111, "step": 22912 }, { "epoch": 7.632630965270259, "grad_norm": 0.7890625, "learning_rate": 2.095102463380495e-06, "loss": 3.8889, "step": 22913 }, { "epoch": 7.632964104272507, "grad_norm": 0.76171875, "learning_rate": 2.0945413144510228e-06, "loss": 4.0089, "step": 22914 }, { "epoch": 7.633297243274757, "grad_norm": 0.7734375, "learning_rate": 2.093980228483436e-06, "loss": 4.0381, "step": 22915 }, { "epoch": 7.633630382277005, "grad_norm": 0.84375, "learning_rate": 2.0934192054842714e-06, "loss": 3.9136, "step": 22916 }, { "epoch": 7.633963521279254, "grad_norm": 0.79296875, "learning_rate": 2.092858245460059e-06, "loss": 4.0707, "step": 22917 }, { "epoch": 7.634296660281502, "grad_norm": 0.77734375, "learning_rate": 2.092297348417338e-06, "loss": 3.9263, "step": 22918 }, { "epoch": 7.6346297992837515, "grad_norm": 0.80859375, "learning_rate": 2.0917365143626393e-06, "loss": 3.9916, "step": 22919 }, { "epoch": 7.634962938286, "grad_norm": 0.7890625, "learning_rate": 2.091175743302497e-06, "loss": 4.0456, "step": 22920 }, { "epoch": 7.635296077288249, "grad_norm": 0.765625, "learning_rate": 2.090615035243439e-06, "loss": 4.0511, "step": 22921 }, { "epoch": 7.635629216290497, "grad_norm": 0.76953125, "learning_rate": 2.0900543901920017e-06, "loss": 4.0706, "step": 22922 }, { "epoch": 7.635962355292746, "grad_norm": 0.765625, "learning_rate": 2.0894938081547123e-06, "loss": 3.9862, "step": 22923 }, { "epoch": 7.636295494294995, "grad_norm": 0.7578125, "learning_rate": 2.088933289138101e-06, "loss": 3.9392, "step": 22924 }, { "epoch": 7.636628633297243, "grad_norm": 0.76171875, "learning_rate": 2.0883728331486964e-06, "loss": 4.0684, "step": 22925 }, { "epoch": 7.636961772299492, "grad_norm": 0.7578125, "learning_rate": 2.087812440193027e-06, "loss": 3.9533, "step": 22926 }, { "epoch": 7.6372949113017405, "grad_norm": 0.80078125, "learning_rate": 2.08725211027762e-06, "loss": 4.0017, "step": 22927 }, { "epoch": 7.63762805030399, "grad_norm": 0.77734375, "learning_rate": 2.0866918434089996e-06, "loss": 3.9999, "step": 22928 }, { "epoch": 7.637961189306238, "grad_norm": 0.81640625, "learning_rate": 2.086131639593696e-06, "loss": 4.0445, "step": 22929 }, { "epoch": 7.638294328308486, "grad_norm": 0.8203125, "learning_rate": 2.085571498838233e-06, "loss": 3.9825, "step": 22930 }, { "epoch": 7.6386274673107355, "grad_norm": 0.7734375, "learning_rate": 2.0850114211491335e-06, "loss": 3.9731, "step": 22931 }, { "epoch": 7.638960606312984, "grad_norm": 0.75390625, "learning_rate": 2.08445140653292e-06, "loss": 3.8968, "step": 22932 }, { "epoch": 7.639293745315233, "grad_norm": 0.7265625, "learning_rate": 2.0838914549961203e-06, "loss": 3.9889, "step": 22933 }, { "epoch": 7.639626884317481, "grad_norm": 0.828125, "learning_rate": 2.083331566545256e-06, "loss": 3.9868, "step": 22934 }, { "epoch": 7.6399600233197305, "grad_norm": 0.74609375, "learning_rate": 2.082771741186842e-06, "loss": 4.0201, "step": 22935 }, { "epoch": 7.640293162321979, "grad_norm": 0.765625, "learning_rate": 2.082211978927406e-06, "loss": 3.9585, "step": 22936 }, { "epoch": 7.640626301324227, "grad_norm": 0.7578125, "learning_rate": 2.081652279773465e-06, "loss": 4.0218, "step": 22937 }, { "epoch": 7.640959440326476, "grad_norm": 0.78125, "learning_rate": 2.0810926437315395e-06, "loss": 4.009, "step": 22938 }, { "epoch": 7.641292579328725, "grad_norm": 0.75390625, "learning_rate": 2.0805330708081445e-06, "loss": 3.9414, "step": 22939 }, { "epoch": 7.641625718330974, "grad_norm": 0.734375, "learning_rate": 2.079973561009803e-06, "loss": 3.9746, "step": 22940 }, { "epoch": 7.641958857333222, "grad_norm": 0.78125, "learning_rate": 2.0794141143430294e-06, "loss": 4.0277, "step": 22941 }, { "epoch": 7.642291996335471, "grad_norm": 0.78515625, "learning_rate": 2.078854730814341e-06, "loss": 3.9417, "step": 22942 }, { "epoch": 7.6426251353377195, "grad_norm": 0.74609375, "learning_rate": 2.078295410430253e-06, "loss": 3.9839, "step": 22943 }, { "epoch": 7.642958274339969, "grad_norm": 0.7890625, "learning_rate": 2.07773615319728e-06, "loss": 4.0135, "step": 22944 }, { "epoch": 7.643291413342217, "grad_norm": 0.7890625, "learning_rate": 2.0771769591219352e-06, "loss": 3.9919, "step": 22945 }, { "epoch": 7.643624552344466, "grad_norm": 0.75390625, "learning_rate": 2.076617828210732e-06, "loss": 4.0136, "step": 22946 }, { "epoch": 7.643957691346714, "grad_norm": 0.7578125, "learning_rate": 2.0760587604701857e-06, "loss": 3.9868, "step": 22947 }, { "epoch": 7.644290830348963, "grad_norm": 0.7421875, "learning_rate": 2.0754997559068063e-06, "loss": 3.9977, "step": 22948 }, { "epoch": 7.644623969351212, "grad_norm": 0.7734375, "learning_rate": 2.0749408145271063e-06, "loss": 4.0007, "step": 22949 }, { "epoch": 7.64495710835346, "grad_norm": 0.796875, "learning_rate": 2.0743819363375923e-06, "loss": 3.923, "step": 22950 }, { "epoch": 7.645290247355709, "grad_norm": 0.76953125, "learning_rate": 2.073823121344779e-06, "loss": 3.9643, "step": 22951 }, { "epoch": 7.645623386357958, "grad_norm": 0.76953125, "learning_rate": 2.073264369555176e-06, "loss": 4.0356, "step": 22952 }, { "epoch": 7.645956525360207, "grad_norm": 0.8125, "learning_rate": 2.072705680975284e-06, "loss": 3.99, "step": 22953 }, { "epoch": 7.646289664362455, "grad_norm": 0.74609375, "learning_rate": 2.0721470556116172e-06, "loss": 3.9747, "step": 22954 }, { "epoch": 7.6466228033647035, "grad_norm": 0.73046875, "learning_rate": 2.0715884934706804e-06, "loss": 4.0184, "step": 22955 }, { "epoch": 7.646955942366953, "grad_norm": 0.7265625, "learning_rate": 2.0710299945589807e-06, "loss": 3.9755, "step": 22956 }, { "epoch": 7.647289081369201, "grad_norm": 0.828125, "learning_rate": 2.0704715588830194e-06, "loss": 4.0534, "step": 22957 }, { "epoch": 7.64762222037145, "grad_norm": 0.71484375, "learning_rate": 2.069913186449306e-06, "loss": 3.9687, "step": 22958 }, { "epoch": 7.647955359373698, "grad_norm": 0.765625, "learning_rate": 2.0693548772643435e-06, "loss": 3.9111, "step": 22959 }, { "epoch": 7.648288498375948, "grad_norm": 0.7734375, "learning_rate": 2.0687966313346332e-06, "loss": 3.957, "step": 22960 }, { "epoch": 7.648621637378196, "grad_norm": 0.72265625, "learning_rate": 2.068238448666679e-06, "loss": 3.9146, "step": 22961 }, { "epoch": 7.648954776380445, "grad_norm": 0.79296875, "learning_rate": 2.067680329266981e-06, "loss": 3.935, "step": 22962 }, { "epoch": 7.649287915382693, "grad_norm": 0.7421875, "learning_rate": 2.067122273142041e-06, "loss": 3.9964, "step": 22963 }, { "epoch": 7.6496210543849426, "grad_norm": 0.75, "learning_rate": 2.0665642802983587e-06, "loss": 3.9276, "step": 22964 }, { "epoch": 7.649954193387191, "grad_norm": 0.796875, "learning_rate": 2.0660063507424317e-06, "loss": 4.0145, "step": 22965 }, { "epoch": 7.650287332389439, "grad_norm": 0.78125, "learning_rate": 2.0654484844807626e-06, "loss": 4.0204, "step": 22966 }, { "epoch": 7.650620471391688, "grad_norm": 0.75390625, "learning_rate": 2.0648906815198477e-06, "loss": 3.9357, "step": 22967 }, { "epoch": 7.650953610393937, "grad_norm": 0.7265625, "learning_rate": 2.064332941866184e-06, "loss": 4.0241, "step": 22968 }, { "epoch": 7.651286749396186, "grad_norm": 0.7734375, "learning_rate": 2.0637752655262654e-06, "loss": 3.9809, "step": 22969 }, { "epoch": 7.651619888398434, "grad_norm": 0.80859375, "learning_rate": 2.063217652506595e-06, "loss": 3.921, "step": 22970 }, { "epoch": 7.651953027400683, "grad_norm": 0.7578125, "learning_rate": 2.0626601028136603e-06, "loss": 3.9954, "step": 22971 }, { "epoch": 7.652286166402932, "grad_norm": 0.78515625, "learning_rate": 2.062102616453956e-06, "loss": 3.9988, "step": 22972 }, { "epoch": 7.65261930540518, "grad_norm": 0.78125, "learning_rate": 2.0615451934339803e-06, "loss": 3.9666, "step": 22973 }, { "epoch": 7.652952444407429, "grad_norm": 0.77734375, "learning_rate": 2.060987833760223e-06, "loss": 3.9601, "step": 22974 }, { "epoch": 7.653285583409677, "grad_norm": 0.86328125, "learning_rate": 2.0604305374391763e-06, "loss": 3.9618, "step": 22975 }, { "epoch": 7.6536187224119265, "grad_norm": 0.79296875, "learning_rate": 2.05987330447733e-06, "loss": 4.0476, "step": 22976 }, { "epoch": 7.653951861414175, "grad_norm": 0.85546875, "learning_rate": 2.0593161348811787e-06, "loss": 3.9848, "step": 22977 }, { "epoch": 7.654285000416424, "grad_norm": 0.7734375, "learning_rate": 2.05875902865721e-06, "loss": 3.8952, "step": 22978 }, { "epoch": 7.654618139418672, "grad_norm": 0.73828125, "learning_rate": 2.0582019858119128e-06, "loss": 4.0302, "step": 22979 }, { "epoch": 7.6549512784209215, "grad_norm": 0.7734375, "learning_rate": 2.0576450063517753e-06, "loss": 4.038, "step": 22980 }, { "epoch": 7.65528441742317, "grad_norm": 0.73046875, "learning_rate": 2.0570880902832855e-06, "loss": 3.9921, "step": 22981 }, { "epoch": 7.655617556425419, "grad_norm": 0.7734375, "learning_rate": 2.05653123761293e-06, "loss": 4.004, "step": 22982 }, { "epoch": 7.655950695427667, "grad_norm": 0.7890625, "learning_rate": 2.0559744483471932e-06, "loss": 3.9133, "step": 22983 }, { "epoch": 7.656283834429916, "grad_norm": 0.7421875, "learning_rate": 2.055417722492564e-06, "loss": 3.976, "step": 22984 }, { "epoch": 7.656616973432165, "grad_norm": 0.7421875, "learning_rate": 2.0548610600555258e-06, "loss": 3.9565, "step": 22985 }, { "epoch": 7.656950112434413, "grad_norm": 0.8203125, "learning_rate": 2.0543044610425617e-06, "loss": 3.9642, "step": 22986 }, { "epoch": 7.657283251436662, "grad_norm": 0.78125, "learning_rate": 2.0537479254601536e-06, "loss": 4.0051, "step": 22987 }, { "epoch": 7.6576163904389105, "grad_norm": 0.734375, "learning_rate": 2.0531914533147895e-06, "loss": 4.0016, "step": 22988 }, { "epoch": 7.65794952944116, "grad_norm": 0.7578125, "learning_rate": 2.0526350446129453e-06, "loss": 4.0268, "step": 22989 }, { "epoch": 7.658282668443408, "grad_norm": 0.75, "learning_rate": 2.0520786993611017e-06, "loss": 4.0088, "step": 22990 }, { "epoch": 7.658615807445656, "grad_norm": 0.77734375, "learning_rate": 2.0515224175657435e-06, "loss": 3.9855, "step": 22991 }, { "epoch": 7.6589489464479055, "grad_norm": 0.75390625, "learning_rate": 2.0509661992333477e-06, "loss": 3.9004, "step": 22992 }, { "epoch": 7.659282085450154, "grad_norm": 0.796875, "learning_rate": 2.050410044370392e-06, "loss": 3.9929, "step": 22993 }, { "epoch": 7.659615224452403, "grad_norm": 0.765625, "learning_rate": 2.0498539529833547e-06, "loss": 4.027, "step": 22994 }, { "epoch": 7.659948363454651, "grad_norm": 0.7734375, "learning_rate": 2.0492979250787156e-06, "loss": 3.9497, "step": 22995 }, { "epoch": 7.6602815024569, "grad_norm": 0.7578125, "learning_rate": 2.0487419606629516e-06, "loss": 4.0156, "step": 22996 }, { "epoch": 7.660614641459149, "grad_norm": 0.74609375, "learning_rate": 2.0481860597425316e-06, "loss": 3.9678, "step": 22997 }, { "epoch": 7.660947780461398, "grad_norm": 0.84375, "learning_rate": 2.047630222323938e-06, "loss": 4.0652, "step": 22998 }, { "epoch": 7.661280919463646, "grad_norm": 0.75, "learning_rate": 2.0470744484136425e-06, "loss": 4.0543, "step": 22999 }, { "epoch": 7.661614058465895, "grad_norm": 0.8203125, "learning_rate": 2.0465187380181183e-06, "loss": 4.037, "step": 23000 }, { "epoch": 7.661947197468144, "grad_norm": 0.75390625, "learning_rate": 2.0459630911438364e-06, "loss": 3.9923, "step": 23001 }, { "epoch": 7.662280336470392, "grad_norm": 0.7421875, "learning_rate": 2.0454075077972737e-06, "loss": 3.9945, "step": 23002 }, { "epoch": 7.662613475472641, "grad_norm": 0.76171875, "learning_rate": 2.0448519879848987e-06, "loss": 4.0122, "step": 23003 }, { "epoch": 7.6629466144748895, "grad_norm": 0.765625, "learning_rate": 2.044296531713182e-06, "loss": 4.0049, "step": 23004 }, { "epoch": 7.663279753477139, "grad_norm": 0.76953125, "learning_rate": 2.043741138988594e-06, "loss": 3.9894, "step": 23005 }, { "epoch": 7.663612892479387, "grad_norm": 0.79296875, "learning_rate": 2.043185809817604e-06, "loss": 3.9612, "step": 23006 }, { "epoch": 7.663946031481636, "grad_norm": 0.7734375, "learning_rate": 2.0426305442066786e-06, "loss": 4.0039, "step": 23007 }, { "epoch": 7.664279170483884, "grad_norm": 0.75, "learning_rate": 2.0420753421622873e-06, "loss": 4.0264, "step": 23008 }, { "epoch": 7.664612309486133, "grad_norm": 0.765625, "learning_rate": 2.0415202036908947e-06, "loss": 3.9576, "step": 23009 }, { "epoch": 7.664945448488382, "grad_norm": 0.72265625, "learning_rate": 2.04096512879897e-06, "loss": 3.9628, "step": 23010 }, { "epoch": 7.66527858749063, "grad_norm": 0.76953125, "learning_rate": 2.040410117492978e-06, "loss": 3.9333, "step": 23011 }, { "epoch": 7.665611726492879, "grad_norm": 0.75, "learning_rate": 2.039855169779382e-06, "loss": 4.02, "step": 23012 }, { "epoch": 7.665944865495128, "grad_norm": 0.77734375, "learning_rate": 2.039300285664645e-06, "loss": 3.9379, "step": 23013 }, { "epoch": 7.666278004497377, "grad_norm": 0.75, "learning_rate": 2.0387454651552363e-06, "loss": 4.0432, "step": 23014 }, { "epoch": 7.666611143499625, "grad_norm": 0.80078125, "learning_rate": 2.038190708257612e-06, "loss": 3.9558, "step": 23015 }, { "epoch": 7.666944282501873, "grad_norm": 0.76171875, "learning_rate": 2.0376360149782326e-06, "loss": 4.0067, "step": 23016 }, { "epoch": 7.667277421504123, "grad_norm": 0.75, "learning_rate": 2.037081385323564e-06, "loss": 4.087, "step": 23017 }, { "epoch": 7.667610560506371, "grad_norm": 0.76171875, "learning_rate": 2.0365268193000664e-06, "loss": 3.9164, "step": 23018 }, { "epoch": 7.66794369950862, "grad_norm": 0.82421875, "learning_rate": 2.035972316914196e-06, "loss": 4.0142, "step": 23019 }, { "epoch": 7.668276838510868, "grad_norm": 0.77734375, "learning_rate": 2.035417878172411e-06, "loss": 4.0364, "step": 23020 }, { "epoch": 7.668609977513118, "grad_norm": 0.76953125, "learning_rate": 2.0348635030811738e-06, "loss": 3.9196, "step": 23021 }, { "epoch": 7.668943116515366, "grad_norm": 0.78125, "learning_rate": 2.0343091916469398e-06, "loss": 4.0701, "step": 23022 }, { "epoch": 7.669276255517615, "grad_norm": 0.7734375, "learning_rate": 2.033754943876164e-06, "loss": 4.0509, "step": 23023 }, { "epoch": 7.669609394519863, "grad_norm": 0.7734375, "learning_rate": 2.0332007597753043e-06, "loss": 3.9774, "step": 23024 }, { "epoch": 7.6699425335221125, "grad_norm": 0.75390625, "learning_rate": 2.032646639350815e-06, "loss": 3.9999, "step": 23025 }, { "epoch": 7.670275672524361, "grad_norm": 0.72265625, "learning_rate": 2.0320925826091484e-06, "loss": 3.9491, "step": 23026 }, { "epoch": 7.670608811526609, "grad_norm": 0.8046875, "learning_rate": 2.0315385895567586e-06, "loss": 3.9853, "step": 23027 }, { "epoch": 7.670941950528858, "grad_norm": 0.75390625, "learning_rate": 2.0309846602001017e-06, "loss": 3.9641, "step": 23028 }, { "epoch": 7.671275089531107, "grad_norm": 0.7578125, "learning_rate": 2.0304307945456283e-06, "loss": 3.9623, "step": 23029 }, { "epoch": 7.671608228533356, "grad_norm": 0.73828125, "learning_rate": 2.029876992599788e-06, "loss": 3.98, "step": 23030 }, { "epoch": 7.671941367535604, "grad_norm": 0.76171875, "learning_rate": 2.0293232543690303e-06, "loss": 3.9882, "step": 23031 }, { "epoch": 7.672274506537853, "grad_norm": 0.7578125, "learning_rate": 2.0287695798598124e-06, "loss": 3.9139, "step": 23032 }, { "epoch": 7.6726076455401016, "grad_norm": 0.78125, "learning_rate": 2.0282159690785756e-06, "loss": 3.9798, "step": 23033 }, { "epoch": 7.67294078454235, "grad_norm": 0.7578125, "learning_rate": 2.027662422031769e-06, "loss": 3.9632, "step": 23034 }, { "epoch": 7.673273923544599, "grad_norm": 0.76953125, "learning_rate": 2.0271089387258433e-06, "loss": 3.9725, "step": 23035 }, { "epoch": 7.673607062546847, "grad_norm": 0.73046875, "learning_rate": 2.0265555191672453e-06, "loss": 3.9869, "step": 23036 }, { "epoch": 7.6739402015490965, "grad_norm": 0.7890625, "learning_rate": 2.02600216336242e-06, "loss": 4.0126, "step": 23037 }, { "epoch": 7.674273340551345, "grad_norm": 0.73828125, "learning_rate": 2.0254488713178106e-06, "loss": 3.988, "step": 23038 }, { "epoch": 7.674606479553594, "grad_norm": 0.75390625, "learning_rate": 2.024895643039865e-06, "loss": 4.0212, "step": 23039 }, { "epoch": 7.674939618555842, "grad_norm": 0.78125, "learning_rate": 2.0243424785350275e-06, "loss": 3.9928, "step": 23040 }, { "epoch": 7.6752727575580915, "grad_norm": 0.78125, "learning_rate": 2.0237893778097393e-06, "loss": 3.9428, "step": 23041 }, { "epoch": 7.67560589656034, "grad_norm": 0.71875, "learning_rate": 2.0232363408704437e-06, "loss": 3.9449, "step": 23042 }, { "epoch": 7.675939035562589, "grad_norm": 0.796875, "learning_rate": 2.0226833677235813e-06, "loss": 3.9873, "step": 23043 }, { "epoch": 7.676272174564837, "grad_norm": 0.74609375, "learning_rate": 2.0221304583755946e-06, "loss": 4.0305, "step": 23044 }, { "epoch": 7.6766053135670855, "grad_norm": 0.76953125, "learning_rate": 2.021577612832921e-06, "loss": 3.9684, "step": 23045 }, { "epoch": 7.676938452569335, "grad_norm": 0.8125, "learning_rate": 2.0210248311020037e-06, "loss": 3.8811, "step": 23046 }, { "epoch": 7.677271591571583, "grad_norm": 0.7421875, "learning_rate": 2.020472113189279e-06, "loss": 3.9456, "step": 23047 }, { "epoch": 7.677604730573832, "grad_norm": 0.7734375, "learning_rate": 2.0199194591011873e-06, "loss": 3.9853, "step": 23048 }, { "epoch": 7.6779378695760805, "grad_norm": 0.7734375, "learning_rate": 2.0193668688441604e-06, "loss": 3.9891, "step": 23049 }, { "epoch": 7.67827100857833, "grad_norm": 0.71484375, "learning_rate": 2.0188143424246432e-06, "loss": 3.9841, "step": 23050 }, { "epoch": 7.678604147580578, "grad_norm": 0.71484375, "learning_rate": 2.0182618798490647e-06, "loss": 3.9462, "step": 23051 }, { "epoch": 7.678937286582826, "grad_norm": 0.765625, "learning_rate": 2.017709481123862e-06, "loss": 3.9889, "step": 23052 }, { "epoch": 7.679270425585075, "grad_norm": 0.7578125, "learning_rate": 2.017157146255466e-06, "loss": 3.9656, "step": 23053 }, { "epoch": 7.679603564587324, "grad_norm": 0.72265625, "learning_rate": 2.0166048752503168e-06, "loss": 4.0531, "step": 23054 }, { "epoch": 7.679936703589573, "grad_norm": 0.77734375, "learning_rate": 2.016052668114843e-06, "loss": 3.9877, "step": 23055 }, { "epoch": 7.680269842591821, "grad_norm": 0.7734375, "learning_rate": 2.0155005248554777e-06, "loss": 4.0486, "step": 23056 }, { "epoch": 7.68060298159407, "grad_norm": 0.7734375, "learning_rate": 2.01494844547865e-06, "loss": 3.9125, "step": 23057 }, { "epoch": 7.680936120596319, "grad_norm": 0.80859375, "learning_rate": 2.014396429990794e-06, "loss": 3.9723, "step": 23058 }, { "epoch": 7.681269259598568, "grad_norm": 0.7421875, "learning_rate": 2.0138444783983413e-06, "loss": 3.9978, "step": 23059 }, { "epoch": 7.681602398600816, "grad_norm": 0.7734375, "learning_rate": 2.013292590707711e-06, "loss": 3.9257, "step": 23060 }, { "epoch": 7.681935537603065, "grad_norm": 0.74609375, "learning_rate": 2.0127407669253417e-06, "loss": 4.0468, "step": 23061 }, { "epoch": 7.682268676605314, "grad_norm": 0.76171875, "learning_rate": 2.0121890070576567e-06, "loss": 3.9499, "step": 23062 }, { "epoch": 7.682601815607562, "grad_norm": 0.76171875, "learning_rate": 2.0116373111110823e-06, "loss": 3.971, "step": 23063 }, { "epoch": 7.682934954609811, "grad_norm": 0.78125, "learning_rate": 2.0110856790920436e-06, "loss": 3.8829, "step": 23064 }, { "epoch": 7.683268093612059, "grad_norm": 0.7578125, "learning_rate": 2.010534111006971e-06, "loss": 4.0524, "step": 23065 }, { "epoch": 7.683601232614309, "grad_norm": 0.73828125, "learning_rate": 2.009982606862285e-06, "loss": 4.0161, "step": 23066 }, { "epoch": 7.683934371616557, "grad_norm": 0.75390625, "learning_rate": 2.0094311666644096e-06, "loss": 3.9693, "step": 23067 }, { "epoch": 7.684267510618806, "grad_norm": 0.78125, "learning_rate": 2.0088797904197693e-06, "loss": 3.947, "step": 23068 }, { "epoch": 7.684600649621054, "grad_norm": 0.765625, "learning_rate": 2.008328478134786e-06, "loss": 3.9515, "step": 23069 }, { "epoch": 7.684933788623303, "grad_norm": 0.81640625, "learning_rate": 2.0077772298158807e-06, "loss": 3.9871, "step": 23070 }, { "epoch": 7.685266927625552, "grad_norm": 0.765625, "learning_rate": 2.007226045469472e-06, "loss": 4.0412, "step": 23071 }, { "epoch": 7.6856000666278, "grad_norm": 0.796875, "learning_rate": 2.0066749251019855e-06, "loss": 4.0195, "step": 23072 }, { "epoch": 7.685933205630049, "grad_norm": 0.7421875, "learning_rate": 2.006123868719837e-06, "loss": 3.9638, "step": 23073 }, { "epoch": 7.686266344632298, "grad_norm": 0.76953125, "learning_rate": 2.005572876329446e-06, "loss": 4.019, "step": 23074 }, { "epoch": 7.686599483634547, "grad_norm": 0.76171875, "learning_rate": 2.0050219479372275e-06, "loss": 3.9565, "step": 23075 }, { "epoch": 7.686932622636795, "grad_norm": 0.73828125, "learning_rate": 2.004471083549604e-06, "loss": 3.9415, "step": 23076 }, { "epoch": 7.687265761639043, "grad_norm": 0.80078125, "learning_rate": 2.0039202831729904e-06, "loss": 4.0082, "step": 23077 }, { "epoch": 7.687598900641293, "grad_norm": 0.796875, "learning_rate": 2.0033695468137975e-06, "loss": 3.8617, "step": 23078 }, { "epoch": 7.687932039643541, "grad_norm": 0.73046875, "learning_rate": 2.0028188744784455e-06, "loss": 3.982, "step": 23079 }, { "epoch": 7.68826517864579, "grad_norm": 0.78515625, "learning_rate": 2.0022682661733467e-06, "loss": 4.0055, "step": 23080 }, { "epoch": 7.688598317648038, "grad_norm": 0.796875, "learning_rate": 2.001717721904915e-06, "loss": 3.962, "step": 23081 }, { "epoch": 7.6889314566502875, "grad_norm": 0.7578125, "learning_rate": 2.0011672416795594e-06, "loss": 4.005, "step": 23082 }, { "epoch": 7.689264595652536, "grad_norm": 0.734375, "learning_rate": 2.000616825503698e-06, "loss": 4.0166, "step": 23083 }, { "epoch": 7.689597734654785, "grad_norm": 0.76953125, "learning_rate": 2.000066473383739e-06, "loss": 3.9563, "step": 23084 }, { "epoch": 7.689930873657033, "grad_norm": 0.75, "learning_rate": 1.9995161853260923e-06, "loss": 3.9714, "step": 23085 }, { "epoch": 7.6902640126592825, "grad_norm": 0.8046875, "learning_rate": 1.998965961337168e-06, "loss": 4.0272, "step": 23086 }, { "epoch": 7.690597151661531, "grad_norm": 0.765625, "learning_rate": 1.9984158014233757e-06, "loss": 4.0112, "step": 23087 }, { "epoch": 7.690930290663779, "grad_norm": 0.73046875, "learning_rate": 1.9978657055911222e-06, "loss": 4.029, "step": 23088 }, { "epoch": 7.691263429666028, "grad_norm": 0.75390625, "learning_rate": 1.997315673846814e-06, "loss": 3.9658, "step": 23089 }, { "epoch": 7.691596568668277, "grad_norm": 0.7734375, "learning_rate": 1.9967657061968606e-06, "loss": 3.965, "step": 23090 }, { "epoch": 7.691929707670526, "grad_norm": 0.7890625, "learning_rate": 1.9962158026476676e-06, "loss": 4.0025, "step": 23091 }, { "epoch": 7.692262846672774, "grad_norm": 0.77734375, "learning_rate": 1.9956659632056394e-06, "loss": 3.9629, "step": 23092 }, { "epoch": 7.692595985675023, "grad_norm": 0.8671875, "learning_rate": 1.9951161878771777e-06, "loss": 4.0049, "step": 23093 }, { "epoch": 7.6929291246772715, "grad_norm": 0.7734375, "learning_rate": 1.994566476668691e-06, "loss": 3.9841, "step": 23094 }, { "epoch": 7.69326226367952, "grad_norm": 0.796875, "learning_rate": 1.994016829586583e-06, "loss": 3.9519, "step": 23095 }, { "epoch": 7.693595402681769, "grad_norm": 0.734375, "learning_rate": 1.99346724663725e-06, "loss": 3.9203, "step": 23096 }, { "epoch": 7.693928541684017, "grad_norm": 0.76171875, "learning_rate": 1.9929177278270943e-06, "loss": 4.0456, "step": 23097 }, { "epoch": 7.6942616806862665, "grad_norm": 0.7734375, "learning_rate": 1.9923682731625217e-06, "loss": 3.9621, "step": 23098 }, { "epoch": 7.694594819688515, "grad_norm": 0.72265625, "learning_rate": 1.9918188826499286e-06, "loss": 3.9349, "step": 23099 }, { "epoch": 7.694927958690764, "grad_norm": 0.75, "learning_rate": 1.991269556295715e-06, "loss": 3.9815, "step": 23100 }, { "epoch": 7.695261097693012, "grad_norm": 0.78515625, "learning_rate": 1.9907202941062768e-06, "loss": 3.9884, "step": 23101 }, { "epoch": 7.695594236695261, "grad_norm": 0.7265625, "learning_rate": 1.9901710960880167e-06, "loss": 4.0208, "step": 23102 }, { "epoch": 7.69592737569751, "grad_norm": 0.76953125, "learning_rate": 1.9896219622473288e-06, "loss": 3.971, "step": 23103 }, { "epoch": 7.696260514699759, "grad_norm": 0.78515625, "learning_rate": 1.9890728925906095e-06, "loss": 4.0275, "step": 23104 }, { "epoch": 7.696593653702007, "grad_norm": 0.80078125, "learning_rate": 1.9885238871242547e-06, "loss": 4.0086, "step": 23105 }, { "epoch": 7.6969267927042555, "grad_norm": 0.80859375, "learning_rate": 1.9879749458546574e-06, "loss": 4.0304, "step": 23106 }, { "epoch": 7.697259931706505, "grad_norm": 0.796875, "learning_rate": 1.987426068788214e-06, "loss": 3.9942, "step": 23107 }, { "epoch": 7.697593070708753, "grad_norm": 0.7578125, "learning_rate": 1.9868772559313154e-06, "loss": 4.0266, "step": 23108 }, { "epoch": 7.697926209711002, "grad_norm": 0.7421875, "learning_rate": 1.9863285072903562e-06, "loss": 3.9751, "step": 23109 }, { "epoch": 7.6982593487132505, "grad_norm": 0.78125, "learning_rate": 1.9857798228717273e-06, "loss": 4.0209, "step": 23110 }, { "epoch": 7.6985924877155, "grad_norm": 0.75390625, "learning_rate": 1.985231202681821e-06, "loss": 4.0215, "step": 23111 }, { "epoch": 7.698925626717748, "grad_norm": 0.77734375, "learning_rate": 1.9846826467270234e-06, "loss": 3.9727, "step": 23112 }, { "epoch": 7.699258765719996, "grad_norm": 0.7578125, "learning_rate": 1.98413415501373e-06, "loss": 3.9645, "step": 23113 }, { "epoch": 7.699591904722245, "grad_norm": 0.76171875, "learning_rate": 1.983585727548326e-06, "loss": 3.9225, "step": 23114 }, { "epoch": 7.699925043724494, "grad_norm": 0.7578125, "learning_rate": 1.9830373643371965e-06, "loss": 4.0009, "step": 23115 }, { "epoch": 7.700258182726743, "grad_norm": 0.7890625, "learning_rate": 1.982489065386734e-06, "loss": 3.9833, "step": 23116 }, { "epoch": 7.700591321728991, "grad_norm": 0.79296875, "learning_rate": 1.9819408307033233e-06, "loss": 3.9978, "step": 23117 }, { "epoch": 7.70092446073124, "grad_norm": 0.79296875, "learning_rate": 1.9813926602933502e-06, "loss": 3.9987, "step": 23118 }, { "epoch": 7.701257599733489, "grad_norm": 0.76953125, "learning_rate": 1.9808445541631968e-06, "loss": 3.9496, "step": 23119 }, { "epoch": 7.701590738735738, "grad_norm": 0.75390625, "learning_rate": 1.980296512319252e-06, "loss": 3.9438, "step": 23120 }, { "epoch": 7.701923877737986, "grad_norm": 0.83984375, "learning_rate": 1.979748534767897e-06, "loss": 3.9783, "step": 23121 }, { "epoch": 7.702257016740235, "grad_norm": 0.83203125, "learning_rate": 1.979200621515515e-06, "loss": 4.0053, "step": 23122 }, { "epoch": 7.702590155742484, "grad_norm": 0.78125, "learning_rate": 1.978652772568488e-06, "loss": 3.9614, "step": 23123 }, { "epoch": 7.702923294744732, "grad_norm": 0.75390625, "learning_rate": 1.9781049879331967e-06, "loss": 3.9925, "step": 23124 }, { "epoch": 7.703256433746981, "grad_norm": 0.74609375, "learning_rate": 1.9775572676160212e-06, "loss": 4.0069, "step": 23125 }, { "epoch": 7.703589572749229, "grad_norm": 0.72265625, "learning_rate": 1.977009611623341e-06, "loss": 3.9493, "step": 23126 }, { "epoch": 7.703922711751479, "grad_norm": 0.796875, "learning_rate": 1.976462019961537e-06, "loss": 3.9925, "step": 23127 }, { "epoch": 7.704255850753727, "grad_norm": 0.75390625, "learning_rate": 1.975914492636988e-06, "loss": 3.9465, "step": 23128 }, { "epoch": 7.704588989755976, "grad_norm": 0.765625, "learning_rate": 1.9753670296560693e-06, "loss": 4.0131, "step": 23129 }, { "epoch": 7.704922128758224, "grad_norm": 0.76953125, "learning_rate": 1.9748196310251555e-06, "loss": 3.9971, "step": 23130 }, { "epoch": 7.705255267760473, "grad_norm": 0.79296875, "learning_rate": 1.9742722967506306e-06, "loss": 4.0107, "step": 23131 }, { "epoch": 7.705588406762722, "grad_norm": 0.78515625, "learning_rate": 1.973725026838862e-06, "loss": 3.9807, "step": 23132 }, { "epoch": 7.70592154576497, "grad_norm": 0.74609375, "learning_rate": 1.9731778212962256e-06, "loss": 3.9735, "step": 23133 }, { "epoch": 7.706254684767219, "grad_norm": 0.74609375, "learning_rate": 1.972630680129098e-06, "loss": 4.0358, "step": 23134 }, { "epoch": 7.706587823769468, "grad_norm": 0.7578125, "learning_rate": 1.9720836033438505e-06, "loss": 3.9425, "step": 23135 }, { "epoch": 7.706920962771717, "grad_norm": 0.7578125, "learning_rate": 1.9715365909468563e-06, "loss": 3.9695, "step": 23136 }, { "epoch": 7.707254101773965, "grad_norm": 0.76171875, "learning_rate": 1.970989642944484e-06, "loss": 3.9897, "step": 23137 }, { "epoch": 7.707587240776213, "grad_norm": 0.78125, "learning_rate": 1.9704427593431076e-06, "loss": 4.0054, "step": 23138 }, { "epoch": 7.707920379778463, "grad_norm": 0.75390625, "learning_rate": 1.9698959401490978e-06, "loss": 4.0026, "step": 23139 }, { "epoch": 7.708253518780712, "grad_norm": 0.76171875, "learning_rate": 1.96934918536882e-06, "loss": 4.0188, "step": 23140 }, { "epoch": 7.70858665778296, "grad_norm": 0.71875, "learning_rate": 1.9688024950086465e-06, "loss": 4.0664, "step": 23141 }, { "epoch": 7.708919796785208, "grad_norm": 0.76953125, "learning_rate": 1.968255869074943e-06, "loss": 3.998, "step": 23142 }, { "epoch": 7.7092529357874575, "grad_norm": 0.80859375, "learning_rate": 1.967709307574077e-06, "loss": 3.9545, "step": 23143 }, { "epoch": 7.709586074789706, "grad_norm": 0.76953125, "learning_rate": 1.9671628105124142e-06, "loss": 3.989, "step": 23144 }, { "epoch": 7.709919213791955, "grad_norm": 0.76953125, "learning_rate": 1.9666163778963188e-06, "loss": 3.971, "step": 23145 }, { "epoch": 7.710252352794203, "grad_norm": 0.7734375, "learning_rate": 1.9660700097321592e-06, "loss": 3.8964, "step": 23146 }, { "epoch": 7.7105854917964525, "grad_norm": 0.78125, "learning_rate": 1.965523706026297e-06, "loss": 4.0116, "step": 23147 }, { "epoch": 7.710918630798701, "grad_norm": 0.75, "learning_rate": 1.964977466785097e-06, "loss": 3.9635, "step": 23148 }, { "epoch": 7.711251769800949, "grad_norm": 0.7734375, "learning_rate": 1.9644312920149203e-06, "loss": 3.9841, "step": 23149 }, { "epoch": 7.711584908803198, "grad_norm": 0.79296875, "learning_rate": 1.963885181722129e-06, "loss": 3.9259, "step": 23150 }, { "epoch": 7.7119180478054465, "grad_norm": 0.80078125, "learning_rate": 1.9633391359130842e-06, "loss": 3.973, "step": 23151 }, { "epoch": 7.712251186807696, "grad_norm": 0.76953125, "learning_rate": 1.9627931545941437e-06, "loss": 4.001, "step": 23152 }, { "epoch": 7.712584325809944, "grad_norm": 0.80078125, "learning_rate": 1.962247237771671e-06, "loss": 4.0039, "step": 23153 }, { "epoch": 7.712917464812193, "grad_norm": 0.75, "learning_rate": 1.9617013854520243e-06, "loss": 4.0785, "step": 23154 }, { "epoch": 7.7132506038144415, "grad_norm": 0.7734375, "learning_rate": 1.9611555976415603e-06, "loss": 3.9653, "step": 23155 }, { "epoch": 7.71358374281669, "grad_norm": 0.80078125, "learning_rate": 1.960609874346634e-06, "loss": 3.934, "step": 23156 }, { "epoch": 7.713916881818939, "grad_norm": 0.8046875, "learning_rate": 1.960064215573607e-06, "loss": 4.0017, "step": 23157 }, { "epoch": 7.714250020821187, "grad_norm": 0.765625, "learning_rate": 1.959518621328834e-06, "loss": 4.0468, "step": 23158 }, { "epoch": 7.7145831598234365, "grad_norm": 0.76953125, "learning_rate": 1.9589730916186637e-06, "loss": 3.9057, "step": 23159 }, { "epoch": 7.714916298825685, "grad_norm": 0.80078125, "learning_rate": 1.9584276264494584e-06, "loss": 3.9899, "step": 23160 }, { "epoch": 7.715249437827934, "grad_norm": 0.7890625, "learning_rate": 1.957882225827567e-06, "loss": 3.9538, "step": 23161 }, { "epoch": 7.715582576830182, "grad_norm": 0.79296875, "learning_rate": 1.9573368897593445e-06, "loss": 4.0398, "step": 23162 }, { "epoch": 7.715915715832431, "grad_norm": 0.80078125, "learning_rate": 1.956791618251139e-06, "loss": 3.9928, "step": 23163 }, { "epoch": 7.71624885483468, "grad_norm": 0.78515625, "learning_rate": 1.9562464113093063e-06, "loss": 3.9594, "step": 23164 }, { "epoch": 7.716581993836929, "grad_norm": 0.7578125, "learning_rate": 1.955701268940196e-06, "loss": 4.0147, "step": 23165 }, { "epoch": 7.716915132839177, "grad_norm": 0.74609375, "learning_rate": 1.9551561911501573e-06, "loss": 3.992, "step": 23166 }, { "epoch": 7.7172482718414255, "grad_norm": 0.78515625, "learning_rate": 1.9546111779455387e-06, "loss": 3.9335, "step": 23167 }, { "epoch": 7.717581410843675, "grad_norm": 0.7578125, "learning_rate": 1.9540662293326877e-06, "loss": 4.0132, "step": 23168 }, { "epoch": 7.717914549845923, "grad_norm": 0.828125, "learning_rate": 1.9535213453179536e-06, "loss": 4.032, "step": 23169 }, { "epoch": 7.718247688848172, "grad_norm": 0.76171875, "learning_rate": 1.9529765259076797e-06, "loss": 3.9668, "step": 23170 }, { "epoch": 7.71858082785042, "grad_norm": 0.76953125, "learning_rate": 1.952431771108216e-06, "loss": 4.0101, "step": 23171 }, { "epoch": 7.71891396685267, "grad_norm": 0.765625, "learning_rate": 1.951887080925907e-06, "loss": 3.9931, "step": 23172 }, { "epoch": 7.719247105854918, "grad_norm": 0.73046875, "learning_rate": 1.9513424553670963e-06, "loss": 3.948, "step": 23173 }, { "epoch": 7.719580244857166, "grad_norm": 0.78125, "learning_rate": 1.9507978944381246e-06, "loss": 3.9237, "step": 23174 }, { "epoch": 7.719913383859415, "grad_norm": 0.765625, "learning_rate": 1.950253398145341e-06, "loss": 3.9949, "step": 23175 }, { "epoch": 7.720246522861664, "grad_norm": 0.765625, "learning_rate": 1.9497089664950864e-06, "loss": 3.9615, "step": 23176 }, { "epoch": 7.720579661863913, "grad_norm": 0.79296875, "learning_rate": 1.949164599493696e-06, "loss": 3.9739, "step": 23177 }, { "epoch": 7.720912800866161, "grad_norm": 0.73046875, "learning_rate": 1.948620297147517e-06, "loss": 4.0291, "step": 23178 }, { "epoch": 7.72124593986841, "grad_norm": 0.78515625, "learning_rate": 1.9480760594628875e-06, "loss": 4.0712, "step": 23179 }, { "epoch": 7.721579078870659, "grad_norm": 0.78515625, "learning_rate": 1.9475318864461465e-06, "loss": 3.9924, "step": 23180 }, { "epoch": 7.721912217872908, "grad_norm": 0.7578125, "learning_rate": 1.9469877781036295e-06, "loss": 4.0302, "step": 23181 }, { "epoch": 7.722245356875156, "grad_norm": 0.76171875, "learning_rate": 1.9464437344416804e-06, "loss": 4.0232, "step": 23182 }, { "epoch": 7.722578495877405, "grad_norm": 0.74609375, "learning_rate": 1.945899755466632e-06, "loss": 3.9537, "step": 23183 }, { "epoch": 7.722911634879654, "grad_norm": 0.7265625, "learning_rate": 1.945355841184821e-06, "loss": 3.9924, "step": 23184 }, { "epoch": 7.723244773881902, "grad_norm": 0.7578125, "learning_rate": 1.9448119916025843e-06, "loss": 4.0362, "step": 23185 }, { "epoch": 7.723577912884151, "grad_norm": 0.75390625, "learning_rate": 1.9442682067262548e-06, "loss": 4.0571, "step": 23186 }, { "epoch": 7.723911051886399, "grad_norm": 0.80078125, "learning_rate": 1.9437244865621667e-06, "loss": 3.9213, "step": 23187 }, { "epoch": 7.7242441908886486, "grad_norm": 0.75, "learning_rate": 1.9431808311166516e-06, "loss": 3.9983, "step": 23188 }, { "epoch": 7.724577329890897, "grad_norm": 0.7890625, "learning_rate": 1.942637240396046e-06, "loss": 3.999, "step": 23189 }, { "epoch": 7.724910468893146, "grad_norm": 0.76953125, "learning_rate": 1.942093714406679e-06, "loss": 3.9899, "step": 23190 }, { "epoch": 7.725243607895394, "grad_norm": 0.78515625, "learning_rate": 1.9415502531548817e-06, "loss": 3.9801, "step": 23191 }, { "epoch": 7.725576746897643, "grad_norm": 0.73828125, "learning_rate": 1.9410068566469854e-06, "loss": 3.9912, "step": 23192 }, { "epoch": 7.725909885899892, "grad_norm": 0.7578125, "learning_rate": 1.9404635248893153e-06, "loss": 3.9594, "step": 23193 }, { "epoch": 7.72624302490214, "grad_norm": 0.76171875, "learning_rate": 1.9399202578882076e-06, "loss": 3.9499, "step": 23194 }, { "epoch": 7.726576163904389, "grad_norm": 0.796875, "learning_rate": 1.939377055649984e-06, "loss": 4.0248, "step": 23195 }, { "epoch": 7.726909302906638, "grad_norm": 0.76953125, "learning_rate": 1.9388339181809715e-06, "loss": 3.8923, "step": 23196 }, { "epoch": 7.727242441908887, "grad_norm": 0.796875, "learning_rate": 1.9382908454874995e-06, "loss": 3.9512, "step": 23197 }, { "epoch": 7.727575580911135, "grad_norm": 0.76171875, "learning_rate": 1.9377478375758933e-06, "loss": 3.971, "step": 23198 }, { "epoch": 7.727908719913384, "grad_norm": 0.77734375, "learning_rate": 1.937204894452477e-06, "loss": 3.9318, "step": 23199 }, { "epoch": 7.7282418589156325, "grad_norm": 0.81640625, "learning_rate": 1.9366620161235722e-06, "loss": 3.9713, "step": 23200 }, { "epoch": 7.728574997917882, "grad_norm": 0.7734375, "learning_rate": 1.936119202595507e-06, "loss": 3.9369, "step": 23201 }, { "epoch": 7.72890813692013, "grad_norm": 0.73828125, "learning_rate": 1.935576453874601e-06, "loss": 3.9168, "step": 23202 }, { "epoch": 7.729241275922378, "grad_norm": 0.77734375, "learning_rate": 1.9350337699671787e-06, "loss": 3.9939, "step": 23203 }, { "epoch": 7.7295744149246275, "grad_norm": 0.76171875, "learning_rate": 1.934491150879558e-06, "loss": 3.9881, "step": 23204 }, { "epoch": 7.729907553926876, "grad_norm": 0.80078125, "learning_rate": 1.93394859661806e-06, "loss": 3.952, "step": 23205 }, { "epoch": 7.730240692929125, "grad_norm": 0.76171875, "learning_rate": 1.933406107189006e-06, "loss": 3.9841, "step": 23206 }, { "epoch": 7.730573831931373, "grad_norm": 0.7578125, "learning_rate": 1.9328636825987108e-06, "loss": 4.0463, "step": 23207 }, { "epoch": 7.7309069709336224, "grad_norm": 0.8046875, "learning_rate": 1.9323213228534977e-06, "loss": 3.9634, "step": 23208 }, { "epoch": 7.731240109935871, "grad_norm": 0.80078125, "learning_rate": 1.9317790279596813e-06, "loss": 3.9433, "step": 23209 }, { "epoch": 7.731573248938119, "grad_norm": 0.7890625, "learning_rate": 1.9312367979235783e-06, "loss": 3.9988, "step": 23210 }, { "epoch": 7.731906387940368, "grad_norm": 0.74609375, "learning_rate": 1.930694632751503e-06, "loss": 3.9874, "step": 23211 }, { "epoch": 7.7322395269426165, "grad_norm": 0.74609375, "learning_rate": 1.9301525324497765e-06, "loss": 3.9866, "step": 23212 }, { "epoch": 7.732572665944866, "grad_norm": 0.75, "learning_rate": 1.9296104970247067e-06, "loss": 4.0084, "step": 23213 }, { "epoch": 7.732905804947114, "grad_norm": 0.8203125, "learning_rate": 1.9290685264826066e-06, "loss": 4.0149, "step": 23214 }, { "epoch": 7.733238943949363, "grad_norm": 0.76171875, "learning_rate": 1.928526620829794e-06, "loss": 3.9793, "step": 23215 }, { "epoch": 7.7335720829516115, "grad_norm": 0.76953125, "learning_rate": 1.9279847800725783e-06, "loss": 3.9366, "step": 23216 }, { "epoch": 7.73390522195386, "grad_norm": 0.76953125, "learning_rate": 1.927443004217271e-06, "loss": 3.991, "step": 23217 }, { "epoch": 7.734238360956109, "grad_norm": 0.76171875, "learning_rate": 1.9269012932701803e-06, "loss": 4.0567, "step": 23218 }, { "epoch": 7.734571499958357, "grad_norm": 0.75390625, "learning_rate": 1.9263596472376204e-06, "loss": 4.0275, "step": 23219 }, { "epoch": 7.734904638960606, "grad_norm": 0.796875, "learning_rate": 1.9258180661258983e-06, "loss": 3.9939, "step": 23220 }, { "epoch": 7.735237777962855, "grad_norm": 0.7734375, "learning_rate": 1.9252765499413217e-06, "loss": 4.0226, "step": 23221 }, { "epoch": 7.735570916965104, "grad_norm": 0.8046875, "learning_rate": 1.9247350986901985e-06, "loss": 3.9769, "step": 23222 }, { "epoch": 7.735904055967352, "grad_norm": 0.7578125, "learning_rate": 1.9241937123788357e-06, "loss": 4.0026, "step": 23223 }, { "epoch": 7.736237194969601, "grad_norm": 0.76953125, "learning_rate": 1.9236523910135395e-06, "loss": 3.959, "step": 23224 }, { "epoch": 7.73657033397185, "grad_norm": 0.79296875, "learning_rate": 1.923111134600611e-06, "loss": 3.9811, "step": 23225 }, { "epoch": 7.736903472974099, "grad_norm": 0.76171875, "learning_rate": 1.922569943146362e-06, "loss": 3.954, "step": 23226 }, { "epoch": 7.737236611976347, "grad_norm": 0.80078125, "learning_rate": 1.9220288166570912e-06, "loss": 3.9755, "step": 23227 }, { "epoch": 7.7375697509785955, "grad_norm": 0.76171875, "learning_rate": 1.921487755139104e-06, "loss": 3.9969, "step": 23228 }, { "epoch": 7.737902889980845, "grad_norm": 0.734375, "learning_rate": 1.9209467585986982e-06, "loss": 4.0092, "step": 23229 }, { "epoch": 7.738236028983093, "grad_norm": 0.80078125, "learning_rate": 1.920405827042183e-06, "loss": 3.9564, "step": 23230 }, { "epoch": 7.738569167985342, "grad_norm": 0.7734375, "learning_rate": 1.919864960475852e-06, "loss": 3.9469, "step": 23231 }, { "epoch": 7.73890230698759, "grad_norm": 0.72265625, "learning_rate": 1.919324158906006e-06, "loss": 4.0256, "step": 23232 }, { "epoch": 7.73923544598984, "grad_norm": 0.7890625, "learning_rate": 1.9187834223389482e-06, "loss": 3.982, "step": 23233 }, { "epoch": 7.739568584992088, "grad_norm": 0.76171875, "learning_rate": 1.9182427507809754e-06, "loss": 4.0232, "step": 23234 }, { "epoch": 7.739901723994336, "grad_norm": 0.7421875, "learning_rate": 1.9177021442383826e-06, "loss": 4.0803, "step": 23235 }, { "epoch": 7.740234862996585, "grad_norm": 0.7734375, "learning_rate": 1.91716160271747e-06, "loss": 4.045, "step": 23236 }, { "epoch": 7.740568001998834, "grad_norm": 0.765625, "learning_rate": 1.916621126224529e-06, "loss": 3.9177, "step": 23237 }, { "epoch": 7.740901141001083, "grad_norm": 0.765625, "learning_rate": 1.9160807147658617e-06, "loss": 3.899, "step": 23238 }, { "epoch": 7.741234280003331, "grad_norm": 0.73828125, "learning_rate": 1.915540368347761e-06, "loss": 3.9604, "step": 23239 }, { "epoch": 7.74156741900558, "grad_norm": 0.80078125, "learning_rate": 1.915000086976515e-06, "loss": 3.9478, "step": 23240 }, { "epoch": 7.741900558007829, "grad_norm": 0.7890625, "learning_rate": 1.914459870658423e-06, "loss": 3.9373, "step": 23241 }, { "epoch": 7.742233697010078, "grad_norm": 0.77734375, "learning_rate": 1.913919719399774e-06, "loss": 4.0116, "step": 23242 }, { "epoch": 7.742566836012326, "grad_norm": 0.77734375, "learning_rate": 1.9133796332068623e-06, "loss": 4.0017, "step": 23243 }, { "epoch": 7.742899975014575, "grad_norm": 0.796875, "learning_rate": 1.912839612085974e-06, "loss": 4.0354, "step": 23244 }, { "epoch": 7.743233114016824, "grad_norm": 0.75390625, "learning_rate": 1.9122996560434057e-06, "loss": 4.0342, "step": 23245 }, { "epoch": 7.743566253019072, "grad_norm": 0.77734375, "learning_rate": 1.911759765085443e-06, "loss": 4.0861, "step": 23246 }, { "epoch": 7.743899392021321, "grad_norm": 0.8203125, "learning_rate": 1.9112199392183753e-06, "loss": 3.9875, "step": 23247 }, { "epoch": 7.744232531023569, "grad_norm": 0.80859375, "learning_rate": 1.9106801784484896e-06, "loss": 4.0048, "step": 23248 }, { "epoch": 7.7445656700258185, "grad_norm": 0.796875, "learning_rate": 1.9101404827820734e-06, "loss": 3.9894, "step": 23249 }, { "epoch": 7.744898809028067, "grad_norm": 0.80859375, "learning_rate": 1.909600852225413e-06, "loss": 3.8811, "step": 23250 }, { "epoch": 7.745231948030316, "grad_norm": 0.80078125, "learning_rate": 1.9090612867847917e-06, "loss": 4.071, "step": 23251 }, { "epoch": 7.745565087032564, "grad_norm": 0.7578125, "learning_rate": 1.908521786466499e-06, "loss": 3.9678, "step": 23252 }, { "epoch": 7.745898226034813, "grad_norm": 0.73828125, "learning_rate": 1.907982351276816e-06, "loss": 4.0427, "step": 23253 }, { "epoch": 7.746231365037062, "grad_norm": 0.76171875, "learning_rate": 1.9074429812220257e-06, "loss": 4.0445, "step": 23254 }, { "epoch": 7.74656450403931, "grad_norm": 0.8359375, "learning_rate": 1.9069036763084103e-06, "loss": 4.0194, "step": 23255 }, { "epoch": 7.746897643041559, "grad_norm": 0.73828125, "learning_rate": 1.9063644365422555e-06, "loss": 4.007, "step": 23256 }, { "epoch": 7.7472307820438076, "grad_norm": 0.76171875, "learning_rate": 1.9058252619298383e-06, "loss": 4.0047, "step": 23257 }, { "epoch": 7.747563921046057, "grad_norm": 0.76953125, "learning_rate": 1.9052861524774368e-06, "loss": 4.0278, "step": 23258 }, { "epoch": 7.747897060048305, "grad_norm": 0.79296875, "learning_rate": 1.904747108191336e-06, "loss": 3.9196, "step": 23259 }, { "epoch": 7.748230199050554, "grad_norm": 0.82421875, "learning_rate": 1.9042081290778123e-06, "loss": 3.972, "step": 23260 }, { "epoch": 7.7485633380528025, "grad_norm": 0.75390625, "learning_rate": 1.9036692151431428e-06, "loss": 3.9959, "step": 23261 }, { "epoch": 7.748896477055052, "grad_norm": 0.76953125, "learning_rate": 1.903130366393604e-06, "loss": 4.0507, "step": 23262 }, { "epoch": 7.7492296160573, "grad_norm": 0.77734375, "learning_rate": 1.9025915828354747e-06, "loss": 3.9485, "step": 23263 }, { "epoch": 7.749562755059548, "grad_norm": 0.76953125, "learning_rate": 1.9020528644750303e-06, "loss": 3.9712, "step": 23264 }, { "epoch": 7.7498958940617975, "grad_norm": 0.76171875, "learning_rate": 1.9015142113185449e-06, "loss": 4.0237, "step": 23265 }, { "epoch": 7.750229033064046, "grad_norm": 0.7578125, "learning_rate": 1.9009756233722924e-06, "loss": 3.9905, "step": 23266 }, { "epoch": 7.750562172066295, "grad_norm": 0.7734375, "learning_rate": 1.9004371006425464e-06, "loss": 3.9877, "step": 23267 }, { "epoch": 7.750895311068543, "grad_norm": 0.82421875, "learning_rate": 1.89989864313558e-06, "loss": 3.9807, "step": 23268 }, { "epoch": 7.751228450070792, "grad_norm": 0.83203125, "learning_rate": 1.8993602508576615e-06, "loss": 4.0377, "step": 23269 }, { "epoch": 7.751561589073041, "grad_norm": 0.80078125, "learning_rate": 1.8988219238150678e-06, "loss": 4.0319, "step": 23270 }, { "epoch": 7.751894728075289, "grad_norm": 0.7578125, "learning_rate": 1.898283662014067e-06, "loss": 3.998, "step": 23271 }, { "epoch": 7.752227867077538, "grad_norm": 0.765625, "learning_rate": 1.8977454654609275e-06, "loss": 3.947, "step": 23272 }, { "epoch": 7.7525610060797865, "grad_norm": 0.79296875, "learning_rate": 1.8972073341619166e-06, "loss": 3.9641, "step": 23273 }, { "epoch": 7.752894145082036, "grad_norm": 0.76171875, "learning_rate": 1.8966692681233097e-06, "loss": 3.9683, "step": 23274 }, { "epoch": 7.753227284084284, "grad_norm": 0.77734375, "learning_rate": 1.8961312673513664e-06, "loss": 3.929, "step": 23275 }, { "epoch": 7.753560423086533, "grad_norm": 0.765625, "learning_rate": 1.8955933318523544e-06, "loss": 3.9276, "step": 23276 }, { "epoch": 7.753893562088781, "grad_norm": 0.765625, "learning_rate": 1.8950554616325428e-06, "loss": 3.974, "step": 23277 }, { "epoch": 7.75422670109103, "grad_norm": 0.7734375, "learning_rate": 1.894517656698194e-06, "loss": 4.0093, "step": 23278 }, { "epoch": 7.754559840093279, "grad_norm": 0.7578125, "learning_rate": 1.893979917055574e-06, "loss": 3.9927, "step": 23279 }, { "epoch": 7.754892979095528, "grad_norm": 0.7734375, "learning_rate": 1.8934422427109424e-06, "loss": 3.9244, "step": 23280 }, { "epoch": 7.755226118097776, "grad_norm": 0.79296875, "learning_rate": 1.892904633670568e-06, "loss": 4.0671, "step": 23281 }, { "epoch": 7.755559257100025, "grad_norm": 0.77734375, "learning_rate": 1.8923670899407089e-06, "loss": 3.9579, "step": 23282 }, { "epoch": 7.755892396102274, "grad_norm": 0.76171875, "learning_rate": 1.8918296115276293e-06, "loss": 4.0151, "step": 23283 }, { "epoch": 7.756225535104522, "grad_norm": 0.77734375, "learning_rate": 1.8912921984375832e-06, "loss": 3.9414, "step": 23284 }, { "epoch": 7.756558674106771, "grad_norm": 0.796875, "learning_rate": 1.8907548506768368e-06, "loss": 4.0377, "step": 23285 }, { "epoch": 7.75689181310902, "grad_norm": 0.79296875, "learning_rate": 1.8902175682516473e-06, "loss": 3.9675, "step": 23286 }, { "epoch": 7.757224952111269, "grad_norm": 0.7734375, "learning_rate": 1.8896803511682714e-06, "loss": 3.9854, "step": 23287 }, { "epoch": 7.757558091113517, "grad_norm": 0.84765625, "learning_rate": 1.889143199432966e-06, "loss": 3.9713, "step": 23288 }, { "epoch": 7.757891230115765, "grad_norm": 0.76953125, "learning_rate": 1.8886061130519914e-06, "loss": 4.0249, "step": 23289 }, { "epoch": 7.758224369118015, "grad_norm": 0.75390625, "learning_rate": 1.8880690920316003e-06, "loss": 3.943, "step": 23290 }, { "epoch": 7.758557508120263, "grad_norm": 0.7890625, "learning_rate": 1.88753213637805e-06, "loss": 3.9772, "step": 23291 }, { "epoch": 7.758890647122512, "grad_norm": 0.78515625, "learning_rate": 1.8869952460975937e-06, "loss": 4.0445, "step": 23292 }, { "epoch": 7.75922378612476, "grad_norm": 0.77734375, "learning_rate": 1.8864584211964844e-06, "loss": 4.0017, "step": 23293 }, { "epoch": 7.75955692512701, "grad_norm": 0.7421875, "learning_rate": 1.8859216616809763e-06, "loss": 3.973, "step": 23294 }, { "epoch": 7.759890064129258, "grad_norm": 0.8203125, "learning_rate": 1.8853849675573176e-06, "loss": 3.9578, "step": 23295 }, { "epoch": 7.760223203131506, "grad_norm": 0.79296875, "learning_rate": 1.8848483388317658e-06, "loss": 3.9274, "step": 23296 }, { "epoch": 7.760556342133755, "grad_norm": 0.73828125, "learning_rate": 1.8843117755105685e-06, "loss": 3.9995, "step": 23297 }, { "epoch": 7.760889481136004, "grad_norm": 0.76953125, "learning_rate": 1.8837752775999747e-06, "loss": 3.9154, "step": 23298 }, { "epoch": 7.761222620138253, "grad_norm": 0.75, "learning_rate": 1.8832388451062321e-06, "loss": 3.951, "step": 23299 }, { "epoch": 7.761555759140501, "grad_norm": 0.765625, "learning_rate": 1.8827024780355939e-06, "loss": 3.9916, "step": 23300 }, { "epoch": 7.76188889814275, "grad_norm": 0.7734375, "learning_rate": 1.8821661763943068e-06, "loss": 4.0709, "step": 23301 }, { "epoch": 7.762222037144999, "grad_norm": 0.73046875, "learning_rate": 1.8816299401886108e-06, "loss": 3.9063, "step": 23302 }, { "epoch": 7.762555176147248, "grad_norm": 0.80078125, "learning_rate": 1.8810937694247593e-06, "loss": 4.0049, "step": 23303 }, { "epoch": 7.762888315149496, "grad_norm": 0.73828125, "learning_rate": 1.8805576641089947e-06, "loss": 4.0024, "step": 23304 }, { "epoch": 7.763221454151745, "grad_norm": 0.7578125, "learning_rate": 1.8800216242475611e-06, "loss": 3.9408, "step": 23305 }, { "epoch": 7.7635545931539935, "grad_norm": 0.75, "learning_rate": 1.8794856498467014e-06, "loss": 4.0506, "step": 23306 }, { "epoch": 7.763887732156242, "grad_norm": 0.765625, "learning_rate": 1.8789497409126618e-06, "loss": 3.973, "step": 23307 }, { "epoch": 7.764220871158491, "grad_norm": 0.75, "learning_rate": 1.8784138974516818e-06, "loss": 3.9537, "step": 23308 }, { "epoch": 7.764554010160739, "grad_norm": 0.828125, "learning_rate": 1.8778781194700046e-06, "loss": 3.9957, "step": 23309 }, { "epoch": 7.7648871491629885, "grad_norm": 0.82421875, "learning_rate": 1.8773424069738692e-06, "loss": 4.057, "step": 23310 }, { "epoch": 7.765220288165237, "grad_norm": 0.7734375, "learning_rate": 1.876806759969516e-06, "loss": 3.9657, "step": 23311 }, { "epoch": 7.765553427167486, "grad_norm": 0.765625, "learning_rate": 1.8762711784631847e-06, "loss": 3.9848, "step": 23312 }, { "epoch": 7.765886566169734, "grad_norm": 0.79296875, "learning_rate": 1.8757356624611104e-06, "loss": 3.9321, "step": 23313 }, { "epoch": 7.766219705171983, "grad_norm": 0.7421875, "learning_rate": 1.8752002119695367e-06, "loss": 3.9287, "step": 23314 }, { "epoch": 7.766552844174232, "grad_norm": 0.75, "learning_rate": 1.8746648269946965e-06, "loss": 4.0073, "step": 23315 }, { "epoch": 7.76688598317648, "grad_norm": 0.72265625, "learning_rate": 1.8741295075428265e-06, "loss": 3.9793, "step": 23316 }, { "epoch": 7.767219122178729, "grad_norm": 0.71484375, "learning_rate": 1.8735942536201606e-06, "loss": 4.0575, "step": 23317 }, { "epoch": 7.7675522611809775, "grad_norm": 0.8046875, "learning_rate": 1.8730590652329366e-06, "loss": 3.9765, "step": 23318 }, { "epoch": 7.767885400183227, "grad_norm": 0.7578125, "learning_rate": 1.872523942387389e-06, "loss": 4.0235, "step": 23319 }, { "epoch": 7.768218539185475, "grad_norm": 0.76171875, "learning_rate": 1.8719888850897432e-06, "loss": 3.9749, "step": 23320 }, { "epoch": 7.768551678187724, "grad_norm": 0.80078125, "learning_rate": 1.8714538933462397e-06, "loss": 3.9356, "step": 23321 }, { "epoch": 7.7688848171899725, "grad_norm": 0.74609375, "learning_rate": 1.8709189671631066e-06, "loss": 4.0188, "step": 23322 }, { "epoch": 7.769217956192222, "grad_norm": 0.7265625, "learning_rate": 1.870384106546575e-06, "loss": 4.0611, "step": 23323 }, { "epoch": 7.76955109519447, "grad_norm": 0.80078125, "learning_rate": 1.869849311502872e-06, "loss": 3.9245, "step": 23324 }, { "epoch": 7.769884234196718, "grad_norm": 0.76171875, "learning_rate": 1.869314582038233e-06, "loss": 3.9324, "step": 23325 }, { "epoch": 7.770217373198967, "grad_norm": 0.75390625, "learning_rate": 1.8687799181588818e-06, "loss": 3.9875, "step": 23326 }, { "epoch": 7.770550512201216, "grad_norm": 0.78125, "learning_rate": 1.8682453198710482e-06, "loss": 3.9317, "step": 23327 }, { "epoch": 7.770883651203465, "grad_norm": 0.72265625, "learning_rate": 1.8677107871809582e-06, "loss": 3.9741, "step": 23328 }, { "epoch": 7.771216790205713, "grad_norm": 0.74609375, "learning_rate": 1.867176320094838e-06, "loss": 3.9492, "step": 23329 }, { "epoch": 7.771549929207962, "grad_norm": 0.78125, "learning_rate": 1.8666419186189135e-06, "loss": 3.998, "step": 23330 }, { "epoch": 7.771883068210211, "grad_norm": 0.74609375, "learning_rate": 1.8661075827594078e-06, "loss": 3.9735, "step": 23331 }, { "epoch": 7.772216207212459, "grad_norm": 0.7578125, "learning_rate": 1.8655733125225439e-06, "loss": 4.0444, "step": 23332 }, { "epoch": 7.772549346214708, "grad_norm": 0.80859375, "learning_rate": 1.8650391079145495e-06, "loss": 3.9852, "step": 23333 }, { "epoch": 7.7728824852169565, "grad_norm": 0.73046875, "learning_rate": 1.8645049689416435e-06, "loss": 4.0323, "step": 23334 }, { "epoch": 7.773215624219206, "grad_norm": 0.77734375, "learning_rate": 1.8639708956100482e-06, "loss": 4.0273, "step": 23335 }, { "epoch": 7.773548763221454, "grad_norm": 0.7265625, "learning_rate": 1.8634368879259827e-06, "loss": 3.9212, "step": 23336 }, { "epoch": 7.773881902223703, "grad_norm": 0.7734375, "learning_rate": 1.8629029458956723e-06, "loss": 4.0372, "step": 23337 }, { "epoch": 7.774215041225951, "grad_norm": 0.7578125, "learning_rate": 1.862369069525331e-06, "loss": 3.9456, "step": 23338 }, { "epoch": 7.774548180228201, "grad_norm": 0.78515625, "learning_rate": 1.861835258821177e-06, "loss": 4.0195, "step": 23339 }, { "epoch": 7.774881319230449, "grad_norm": 0.7734375, "learning_rate": 1.8613015137894315e-06, "loss": 4.0215, "step": 23340 }, { "epoch": 7.775214458232698, "grad_norm": 0.75, "learning_rate": 1.8607678344363103e-06, "loss": 3.9778, "step": 23341 }, { "epoch": 7.775547597234946, "grad_norm": 0.74609375, "learning_rate": 1.8602342207680298e-06, "loss": 3.9879, "step": 23342 }, { "epoch": 7.775880736237195, "grad_norm": 0.77734375, "learning_rate": 1.8597006727908019e-06, "loss": 3.9352, "step": 23343 }, { "epoch": 7.776213875239444, "grad_norm": 0.7265625, "learning_rate": 1.8591671905108468e-06, "loss": 4.0051, "step": 23344 }, { "epoch": 7.776547014241692, "grad_norm": 0.765625, "learning_rate": 1.858633773934376e-06, "loss": 3.9779, "step": 23345 }, { "epoch": 7.776880153243941, "grad_norm": 0.765625, "learning_rate": 1.8581004230676028e-06, "loss": 4.0154, "step": 23346 }, { "epoch": 7.77721329224619, "grad_norm": 0.79296875, "learning_rate": 1.8575671379167388e-06, "loss": 3.9541, "step": 23347 }, { "epoch": 7.777546431248439, "grad_norm": 0.828125, "learning_rate": 1.8570339184879962e-06, "loss": 3.9367, "step": 23348 }, { "epoch": 7.777879570250687, "grad_norm": 0.78515625, "learning_rate": 1.8565007647875862e-06, "loss": 3.9247, "step": 23349 }, { "epoch": 7.778212709252935, "grad_norm": 0.78125, "learning_rate": 1.8559676768217157e-06, "loss": 3.965, "step": 23350 }, { "epoch": 7.778545848255185, "grad_norm": 0.76953125, "learning_rate": 1.855434654596599e-06, "loss": 3.9662, "step": 23351 }, { "epoch": 7.778878987257433, "grad_norm": 0.75390625, "learning_rate": 1.8549016981184432e-06, "loss": 4.0397, "step": 23352 }, { "epoch": 7.779212126259682, "grad_norm": 0.76171875, "learning_rate": 1.8543688073934549e-06, "loss": 3.9946, "step": 23353 }, { "epoch": 7.77954526526193, "grad_norm": 0.7578125, "learning_rate": 1.8538359824278391e-06, "loss": 4.0013, "step": 23354 }, { "epoch": 7.7798784042641795, "grad_norm": 0.8203125, "learning_rate": 1.85330322322781e-06, "loss": 3.9387, "step": 23355 }, { "epoch": 7.780211543266428, "grad_norm": 0.796875, "learning_rate": 1.8527705297995646e-06, "loss": 4.0158, "step": 23356 }, { "epoch": 7.780544682268676, "grad_norm": 0.72265625, "learning_rate": 1.852237902149308e-06, "loss": 3.9693, "step": 23357 }, { "epoch": 7.780877821270925, "grad_norm": 0.78125, "learning_rate": 1.8517053402832496e-06, "loss": 3.9642, "step": 23358 }, { "epoch": 7.781210960273174, "grad_norm": 0.80859375, "learning_rate": 1.8511728442075901e-06, "loss": 4.0214, "step": 23359 }, { "epoch": 7.781544099275423, "grad_norm": 0.78515625, "learning_rate": 1.8506404139285314e-06, "loss": 3.9608, "step": 23360 }, { "epoch": 7.781877238277671, "grad_norm": 0.78515625, "learning_rate": 1.8501080494522723e-06, "loss": 4.0074, "step": 23361 }, { "epoch": 7.78221037727992, "grad_norm": 0.80859375, "learning_rate": 1.84957575078502e-06, "loss": 3.9106, "step": 23362 }, { "epoch": 7.782543516282169, "grad_norm": 0.734375, "learning_rate": 1.849043517932971e-06, "loss": 4.018, "step": 23363 }, { "epoch": 7.782876655284418, "grad_norm": 0.75, "learning_rate": 1.8485113509023252e-06, "loss": 3.9256, "step": 23364 }, { "epoch": 7.783209794286666, "grad_norm": 0.77734375, "learning_rate": 1.8479792496992811e-06, "loss": 4.0157, "step": 23365 }, { "epoch": 7.783542933288915, "grad_norm": 0.765625, "learning_rate": 1.8474472143300365e-06, "loss": 3.9303, "step": 23366 }, { "epoch": 7.7838760722911635, "grad_norm": 0.7734375, "learning_rate": 1.8469152448007884e-06, "loss": 4.0145, "step": 23367 }, { "epoch": 7.784209211293412, "grad_norm": 0.76953125, "learning_rate": 1.8463833411177315e-06, "loss": 3.9969, "step": 23368 }, { "epoch": 7.784542350295661, "grad_norm": 0.80078125, "learning_rate": 1.8458515032870647e-06, "loss": 3.9907, "step": 23369 }, { "epoch": 7.784875489297909, "grad_norm": 0.73828125, "learning_rate": 1.8453197313149805e-06, "loss": 4.0141, "step": 23370 }, { "epoch": 7.7852086283001585, "grad_norm": 0.76953125, "learning_rate": 1.8447880252076734e-06, "loss": 3.9216, "step": 23371 }, { "epoch": 7.785541767302407, "grad_norm": 0.7734375, "learning_rate": 1.844256384971335e-06, "loss": 3.9926, "step": 23372 }, { "epoch": 7.785874906304656, "grad_norm": 0.74609375, "learning_rate": 1.8437248106121637e-06, "loss": 3.9555, "step": 23373 }, { "epoch": 7.786208045306904, "grad_norm": 0.74609375, "learning_rate": 1.8431933021363455e-06, "loss": 3.9424, "step": 23374 }, { "epoch": 7.7865411843091525, "grad_norm": 0.83203125, "learning_rate": 1.8426618595500716e-06, "loss": 3.9397, "step": 23375 }, { "epoch": 7.786874323311402, "grad_norm": 0.765625, "learning_rate": 1.8421304828595309e-06, "loss": 3.9502, "step": 23376 }, { "epoch": 7.78720746231365, "grad_norm": 0.734375, "learning_rate": 1.8415991720709183e-06, "loss": 3.9967, "step": 23377 }, { "epoch": 7.787540601315899, "grad_norm": 0.7421875, "learning_rate": 1.841067927190418e-06, "loss": 3.9912, "step": 23378 }, { "epoch": 7.7878737403181475, "grad_norm": 0.765625, "learning_rate": 1.8405367482242199e-06, "loss": 4.0459, "step": 23379 }, { "epoch": 7.788206879320397, "grad_norm": 0.79296875, "learning_rate": 1.8400056351785086e-06, "loss": 3.954, "step": 23380 }, { "epoch": 7.788540018322645, "grad_norm": 0.78125, "learning_rate": 1.8394745880594738e-06, "loss": 3.9596, "step": 23381 }, { "epoch": 7.788873157324894, "grad_norm": 0.74609375, "learning_rate": 1.8389436068733014e-06, "loss": 4.0991, "step": 23382 }, { "epoch": 7.7892062963271425, "grad_norm": 0.7578125, "learning_rate": 1.8384126916261702e-06, "loss": 4.0496, "step": 23383 }, { "epoch": 7.789539435329392, "grad_norm": 0.78515625, "learning_rate": 1.8378818423242696e-06, "loss": 3.9087, "step": 23384 }, { "epoch": 7.78987257433164, "grad_norm": 0.734375, "learning_rate": 1.8373510589737818e-06, "loss": 3.9801, "step": 23385 }, { "epoch": 7.790205713333888, "grad_norm": 0.76953125, "learning_rate": 1.8368203415808894e-06, "loss": 3.9646, "step": 23386 }, { "epoch": 7.790538852336137, "grad_norm": 0.79296875, "learning_rate": 1.8362896901517703e-06, "loss": 3.9058, "step": 23387 }, { "epoch": 7.790871991338386, "grad_norm": 0.765625, "learning_rate": 1.8357591046926118e-06, "loss": 3.9624, "step": 23388 }, { "epoch": 7.791205130340635, "grad_norm": 0.77734375, "learning_rate": 1.835228585209591e-06, "loss": 4.017, "step": 23389 }, { "epoch": 7.791538269342883, "grad_norm": 0.7265625, "learning_rate": 1.8346981317088873e-06, "loss": 4.0419, "step": 23390 }, { "epoch": 7.791871408345132, "grad_norm": 0.765625, "learning_rate": 1.8341677441966792e-06, "loss": 3.9629, "step": 23391 }, { "epoch": 7.792204547347381, "grad_norm": 0.796875, "learning_rate": 1.8336374226791442e-06, "loss": 4.067, "step": 23392 }, { "epoch": 7.792537686349629, "grad_norm": 0.84375, "learning_rate": 1.8331071671624604e-06, "loss": 3.9666, "step": 23393 }, { "epoch": 7.792870825351878, "grad_norm": 0.77734375, "learning_rate": 1.8325769776528018e-06, "loss": 4.0643, "step": 23394 }, { "epoch": 7.793203964354126, "grad_norm": 0.77734375, "learning_rate": 1.832046854156347e-06, "loss": 3.9261, "step": 23395 }, { "epoch": 7.793537103356376, "grad_norm": 0.75390625, "learning_rate": 1.8315167966792706e-06, "loss": 3.9875, "step": 23396 }, { "epoch": 7.793870242358624, "grad_norm": 0.78125, "learning_rate": 1.8309868052277452e-06, "loss": 4.0007, "step": 23397 }, { "epoch": 7.794203381360873, "grad_norm": 0.74609375, "learning_rate": 1.8304568798079422e-06, "loss": 3.9776, "step": 23398 }, { "epoch": 7.794536520363121, "grad_norm": 0.75, "learning_rate": 1.8299270204260385e-06, "loss": 4.0218, "step": 23399 }, { "epoch": 7.794869659365371, "grad_norm": 0.765625, "learning_rate": 1.829397227088206e-06, "loss": 4.0294, "step": 23400 }, { "epoch": 7.795202798367619, "grad_norm": 0.765625, "learning_rate": 1.8288674998006085e-06, "loss": 3.949, "step": 23401 }, { "epoch": 7.795535937369868, "grad_norm": 0.73828125, "learning_rate": 1.828337838569424e-06, "loss": 3.9267, "step": 23402 }, { "epoch": 7.795869076372116, "grad_norm": 0.75, "learning_rate": 1.8278082434008184e-06, "loss": 4.0113, "step": 23403 }, { "epoch": 7.796202215374365, "grad_norm": 0.7578125, "learning_rate": 1.8272787143009607e-06, "loss": 3.9258, "step": 23404 }, { "epoch": 7.796535354376614, "grad_norm": 0.796875, "learning_rate": 1.8267492512760168e-06, "loss": 3.9733, "step": 23405 }, { "epoch": 7.796868493378862, "grad_norm": 0.74609375, "learning_rate": 1.8262198543321585e-06, "loss": 4.0191, "step": 23406 }, { "epoch": 7.797201632381111, "grad_norm": 0.78125, "learning_rate": 1.8256905234755484e-06, "loss": 4.0057, "step": 23407 }, { "epoch": 7.79753477138336, "grad_norm": 0.78515625, "learning_rate": 1.825161258712354e-06, "loss": 3.9648, "step": 23408 }, { "epoch": 7.797867910385609, "grad_norm": 0.7734375, "learning_rate": 1.8246320600487397e-06, "loss": 4.0426, "step": 23409 }, { "epoch": 7.798201049387857, "grad_norm": 0.78515625, "learning_rate": 1.8241029274908676e-06, "loss": 4.0433, "step": 23410 }, { "epoch": 7.798534188390105, "grad_norm": 0.75390625, "learning_rate": 1.8235738610449035e-06, "loss": 4.0306, "step": 23411 }, { "epoch": 7.7988673273923546, "grad_norm": 0.7734375, "learning_rate": 1.8230448607170069e-06, "loss": 3.9598, "step": 23412 }, { "epoch": 7.799200466394603, "grad_norm": 0.765625, "learning_rate": 1.8225159265133431e-06, "loss": 3.9387, "step": 23413 }, { "epoch": 7.799533605396852, "grad_norm": 0.76171875, "learning_rate": 1.8219870584400712e-06, "loss": 4.0027, "step": 23414 }, { "epoch": 7.7998667443991, "grad_norm": 0.796875, "learning_rate": 1.8214582565033517e-06, "loss": 3.9589, "step": 23415 }, { "epoch": 7.8001998834013495, "grad_norm": 0.796875, "learning_rate": 1.8209295207093415e-06, "loss": 3.8723, "step": 23416 }, { "epoch": 7.800533022403598, "grad_norm": 0.7578125, "learning_rate": 1.8204008510642036e-06, "loss": 3.9983, "step": 23417 }, { "epoch": 7.800866161405846, "grad_norm": 0.7890625, "learning_rate": 1.8198722475740964e-06, "loss": 4.0098, "step": 23418 }, { "epoch": 7.801199300408095, "grad_norm": 0.7890625, "learning_rate": 1.8193437102451716e-06, "loss": 3.9611, "step": 23419 }, { "epoch": 7.801532439410344, "grad_norm": 0.73046875, "learning_rate": 1.8188152390835867e-06, "loss": 4.0299, "step": 23420 }, { "epoch": 7.801865578412593, "grad_norm": 0.78125, "learning_rate": 1.8182868340955008e-06, "loss": 4.0253, "step": 23421 }, { "epoch": 7.802198717414841, "grad_norm": 0.71484375, "learning_rate": 1.8177584952870668e-06, "loss": 3.9819, "step": 23422 }, { "epoch": 7.80253185641709, "grad_norm": 0.7734375, "learning_rate": 1.8172302226644388e-06, "loss": 3.9993, "step": 23423 }, { "epoch": 7.8028649954193385, "grad_norm": 0.71484375, "learning_rate": 1.816702016233767e-06, "loss": 4.0086, "step": 23424 }, { "epoch": 7.803198134421588, "grad_norm": 0.7734375, "learning_rate": 1.8161738760012095e-06, "loss": 4.0366, "step": 23425 }, { "epoch": 7.803531273423836, "grad_norm": 0.77734375, "learning_rate": 1.815645801972915e-06, "loss": 4.0016, "step": 23426 }, { "epoch": 7.803864412426085, "grad_norm": 0.75390625, "learning_rate": 1.8151177941550343e-06, "loss": 3.9735, "step": 23427 }, { "epoch": 7.8041975514283335, "grad_norm": 0.7265625, "learning_rate": 1.8145898525537183e-06, "loss": 4.031, "step": 23428 }, { "epoch": 7.804530690430582, "grad_norm": 0.7578125, "learning_rate": 1.8140619771751164e-06, "loss": 3.9993, "step": 23429 }, { "epoch": 7.804863829432831, "grad_norm": 0.78125, "learning_rate": 1.8135341680253758e-06, "loss": 3.9809, "step": 23430 }, { "epoch": 7.805196968435079, "grad_norm": 0.7421875, "learning_rate": 1.8130064251106436e-06, "loss": 3.9306, "step": 23431 }, { "epoch": 7.8055301074373284, "grad_norm": 0.703125, "learning_rate": 1.81247874843707e-06, "loss": 4.0181, "step": 23432 }, { "epoch": 7.805863246439577, "grad_norm": 0.78125, "learning_rate": 1.8119511380108005e-06, "loss": 3.9855, "step": 23433 }, { "epoch": 7.806196385441826, "grad_norm": 0.75390625, "learning_rate": 1.8114235938379797e-06, "loss": 4.0371, "step": 23434 }, { "epoch": 7.806529524444074, "grad_norm": 0.7734375, "learning_rate": 1.8108961159247509e-06, "loss": 3.9903, "step": 23435 }, { "epoch": 7.8068626634463225, "grad_norm": 0.76171875, "learning_rate": 1.810368704277264e-06, "loss": 3.9603, "step": 23436 }, { "epoch": 7.807195802448572, "grad_norm": 0.765625, "learning_rate": 1.8098413589016553e-06, "loss": 3.8904, "step": 23437 }, { "epoch": 7.80752894145082, "grad_norm": 0.7421875, "learning_rate": 1.8093140798040686e-06, "loss": 4.036, "step": 23438 }, { "epoch": 7.807862080453069, "grad_norm": 0.76171875, "learning_rate": 1.8087868669906484e-06, "loss": 3.9943, "step": 23439 }, { "epoch": 7.8081952194553175, "grad_norm": 0.72265625, "learning_rate": 1.8082597204675341e-06, "loss": 3.9725, "step": 23440 }, { "epoch": 7.808528358457567, "grad_norm": 0.765625, "learning_rate": 1.8077326402408656e-06, "loss": 4.0614, "step": 23441 }, { "epoch": 7.808861497459815, "grad_norm": 0.8515625, "learning_rate": 1.8072056263167805e-06, "loss": 3.9497, "step": 23442 }, { "epoch": 7.809194636462064, "grad_norm": 0.76171875, "learning_rate": 1.8066786787014214e-06, "loss": 4.0271, "step": 23443 }, { "epoch": 7.809527775464312, "grad_norm": 0.74609375, "learning_rate": 1.8061517974009235e-06, "loss": 4.0104, "step": 23444 }, { "epoch": 7.809860914466562, "grad_norm": 0.765625, "learning_rate": 1.8056249824214248e-06, "loss": 4.0267, "step": 23445 }, { "epoch": 7.81019405346881, "grad_norm": 0.80859375, "learning_rate": 1.8050982337690612e-06, "loss": 3.9413, "step": 23446 }, { "epoch": 7.810527192471058, "grad_norm": 0.7734375, "learning_rate": 1.8045715514499677e-06, "loss": 3.9507, "step": 23447 }, { "epoch": 7.810860331473307, "grad_norm": 0.80078125, "learning_rate": 1.8040449354702802e-06, "loss": 3.9412, "step": 23448 }, { "epoch": 7.811193470475556, "grad_norm": 0.7578125, "learning_rate": 1.803518385836128e-06, "loss": 4.0381, "step": 23449 }, { "epoch": 7.811526609477805, "grad_norm": 0.7578125, "learning_rate": 1.802991902553652e-06, "loss": 4.0289, "step": 23450 }, { "epoch": 7.811859748480053, "grad_norm": 0.7734375, "learning_rate": 1.8024654856289793e-06, "loss": 3.9455, "step": 23451 }, { "epoch": 7.812192887482302, "grad_norm": 0.78515625, "learning_rate": 1.8019391350682436e-06, "loss": 3.9923, "step": 23452 }, { "epoch": 7.812526026484551, "grad_norm": 0.77734375, "learning_rate": 1.8014128508775721e-06, "loss": 3.9713, "step": 23453 }, { "epoch": 7.812859165486799, "grad_norm": 0.76171875, "learning_rate": 1.8008866330631028e-06, "loss": 4.0466, "step": 23454 }, { "epoch": 7.813192304489048, "grad_norm": 0.8046875, "learning_rate": 1.8003604816309579e-06, "loss": 3.9565, "step": 23455 }, { "epoch": 7.813525443491296, "grad_norm": 0.7578125, "learning_rate": 1.7998343965872655e-06, "loss": 3.91, "step": 23456 }, { "epoch": 7.813858582493546, "grad_norm": 0.76171875, "learning_rate": 1.7993083779381577e-06, "loss": 4.0528, "step": 23457 }, { "epoch": 7.814191721495794, "grad_norm": 0.71875, "learning_rate": 1.79878242568976e-06, "loss": 3.9681, "step": 23458 }, { "epoch": 7.814524860498043, "grad_norm": 0.7890625, "learning_rate": 1.7982565398481983e-06, "loss": 3.9337, "step": 23459 }, { "epoch": 7.814857999500291, "grad_norm": 0.76171875, "learning_rate": 1.7977307204195956e-06, "loss": 3.9492, "step": 23460 }, { "epoch": 7.8151911385025405, "grad_norm": 0.76953125, "learning_rate": 1.7972049674100807e-06, "loss": 3.9581, "step": 23461 }, { "epoch": 7.815524277504789, "grad_norm": 0.73046875, "learning_rate": 1.7966792808257761e-06, "loss": 4.0151, "step": 23462 }, { "epoch": 7.815857416507038, "grad_norm": 0.74609375, "learning_rate": 1.796153660672807e-06, "loss": 3.9303, "step": 23463 }, { "epoch": 7.816190555509286, "grad_norm": 0.70703125, "learning_rate": 1.7956281069572886e-06, "loss": 4.0407, "step": 23464 }, { "epoch": 7.816523694511535, "grad_norm": 0.80078125, "learning_rate": 1.795102619685349e-06, "loss": 3.9881, "step": 23465 }, { "epoch": 7.816856833513784, "grad_norm": 0.83203125, "learning_rate": 1.7945771988631077e-06, "loss": 4.0241, "step": 23466 }, { "epoch": 7.817189972516032, "grad_norm": 0.76171875, "learning_rate": 1.794051844496684e-06, "loss": 3.9911, "step": 23467 }, { "epoch": 7.817523111518281, "grad_norm": 0.7578125, "learning_rate": 1.7935265565921942e-06, "loss": 3.9566, "step": 23468 }, { "epoch": 7.81785625052053, "grad_norm": 0.84765625, "learning_rate": 1.7930013351557625e-06, "loss": 3.95, "step": 23469 }, { "epoch": 7.818189389522779, "grad_norm": 0.76171875, "learning_rate": 1.7924761801935044e-06, "loss": 3.9125, "step": 23470 }, { "epoch": 7.818522528525027, "grad_norm": 0.76171875, "learning_rate": 1.791951091711536e-06, "loss": 3.964, "step": 23471 }, { "epoch": 7.818855667527275, "grad_norm": 0.75390625, "learning_rate": 1.7914260697159738e-06, "loss": 3.9782, "step": 23472 }, { "epoch": 7.8191888065295245, "grad_norm": 0.74609375, "learning_rate": 1.7909011142129332e-06, "loss": 3.9817, "step": 23473 }, { "epoch": 7.819521945531773, "grad_norm": 0.78125, "learning_rate": 1.7903762252085282e-06, "loss": 3.976, "step": 23474 }, { "epoch": 7.819855084534022, "grad_norm": 0.7578125, "learning_rate": 1.7898514027088716e-06, "loss": 3.945, "step": 23475 }, { "epoch": 7.82018822353627, "grad_norm": 0.7890625, "learning_rate": 1.7893266467200795e-06, "loss": 3.9315, "step": 23476 }, { "epoch": 7.8205213625385195, "grad_norm": 0.80078125, "learning_rate": 1.7888019572482634e-06, "loss": 4.0192, "step": 23477 }, { "epoch": 7.820854501540768, "grad_norm": 0.7578125, "learning_rate": 1.7882773342995333e-06, "loss": 4.0437, "step": 23478 }, { "epoch": 7.821187640543017, "grad_norm": 0.74609375, "learning_rate": 1.7877527778799999e-06, "loss": 3.9824, "step": 23479 }, { "epoch": 7.821520779545265, "grad_norm": 0.7109375, "learning_rate": 1.7872282879957743e-06, "loss": 4.068, "step": 23480 }, { "epoch": 7.821853918547514, "grad_norm": 0.7578125, "learning_rate": 1.7867038646529688e-06, "loss": 3.9722, "step": 23481 }, { "epoch": 7.822187057549763, "grad_norm": 0.765625, "learning_rate": 1.7861795078576832e-06, "loss": 3.8928, "step": 23482 }, { "epoch": 7.822520196552011, "grad_norm": 0.7734375, "learning_rate": 1.7856552176160326e-06, "loss": 3.9732, "step": 23483 }, { "epoch": 7.82285333555426, "grad_norm": 0.7265625, "learning_rate": 1.7851309939341223e-06, "loss": 4.0762, "step": 23484 }, { "epoch": 7.8231864745565085, "grad_norm": 0.765625, "learning_rate": 1.7846068368180564e-06, "loss": 3.9716, "step": 23485 }, { "epoch": 7.823519613558758, "grad_norm": 0.78125, "learning_rate": 1.7840827462739394e-06, "loss": 3.9954, "step": 23486 }, { "epoch": 7.823852752561006, "grad_norm": 0.76171875, "learning_rate": 1.7835587223078805e-06, "loss": 4.031, "step": 23487 }, { "epoch": 7.824185891563255, "grad_norm": 0.73046875, "learning_rate": 1.7830347649259798e-06, "loss": 4.096, "step": 23488 }, { "epoch": 7.8245190305655035, "grad_norm": 0.80859375, "learning_rate": 1.7825108741343415e-06, "loss": 3.9876, "step": 23489 }, { "epoch": 7.824852169567752, "grad_norm": 0.76171875, "learning_rate": 1.7819870499390683e-06, "loss": 3.9632, "step": 23490 }, { "epoch": 7.825185308570001, "grad_norm": 0.8203125, "learning_rate": 1.7814632923462595e-06, "loss": 3.9252, "step": 23491 }, { "epoch": 7.825518447572249, "grad_norm": 0.84375, "learning_rate": 1.7809396013620178e-06, "loss": 3.9512, "step": 23492 }, { "epoch": 7.825851586574498, "grad_norm": 0.7421875, "learning_rate": 1.7804159769924398e-06, "loss": 3.9757, "step": 23493 }, { "epoch": 7.826184725576747, "grad_norm": 0.77734375, "learning_rate": 1.7798924192436298e-06, "loss": 3.9584, "step": 23494 }, { "epoch": 7.826517864578996, "grad_norm": 0.77734375, "learning_rate": 1.7793689281216829e-06, "loss": 3.9513, "step": 23495 }, { "epoch": 7.826851003581244, "grad_norm": 0.7734375, "learning_rate": 1.778845503632697e-06, "loss": 3.9946, "step": 23496 }, { "epoch": 7.8271841425834925, "grad_norm": 0.73828125, "learning_rate": 1.778322145782767e-06, "loss": 4.0045, "step": 23497 }, { "epoch": 7.827517281585742, "grad_norm": 0.796875, "learning_rate": 1.777798854577993e-06, "loss": 3.978, "step": 23498 }, { "epoch": 7.82785042058799, "grad_norm": 0.77734375, "learning_rate": 1.7772756300244702e-06, "loss": 3.9509, "step": 23499 }, { "epoch": 7.828183559590239, "grad_norm": 0.8046875, "learning_rate": 1.7767524721282864e-06, "loss": 3.9808, "step": 23500 }, { "epoch": 7.828516698592487, "grad_norm": 0.7890625, "learning_rate": 1.7762293808955422e-06, "loss": 3.9714, "step": 23501 }, { "epoch": 7.828849837594737, "grad_norm": 0.76953125, "learning_rate": 1.7757063563323278e-06, "loss": 3.9864, "step": 23502 }, { "epoch": 7.829182976596985, "grad_norm": 0.796875, "learning_rate": 1.775183398444736e-06, "loss": 3.9464, "step": 23503 }, { "epoch": 7.829516115599234, "grad_norm": 0.765625, "learning_rate": 1.7746605072388552e-06, "loss": 3.9467, "step": 23504 }, { "epoch": 7.829849254601482, "grad_norm": 0.8046875, "learning_rate": 1.7741376827207806e-06, "loss": 4.0426, "step": 23505 }, { "epoch": 7.830182393603732, "grad_norm": 0.79296875, "learning_rate": 1.7736149248965996e-06, "loss": 3.9766, "step": 23506 }, { "epoch": 7.83051553260598, "grad_norm": 0.765625, "learning_rate": 1.7730922337724042e-06, "loss": 4.0564, "step": 23507 }, { "epoch": 7.830848671608228, "grad_norm": 0.79296875, "learning_rate": 1.7725696093542744e-06, "loss": 4.0141, "step": 23508 }, { "epoch": 7.831181810610477, "grad_norm": 0.76953125, "learning_rate": 1.7720470516483064e-06, "loss": 3.9712, "step": 23509 }, { "epoch": 7.831514949612726, "grad_norm": 0.79296875, "learning_rate": 1.7715245606605826e-06, "loss": 3.9485, "step": 23510 }, { "epoch": 7.831848088614975, "grad_norm": 0.8046875, "learning_rate": 1.7710021363971907e-06, "loss": 3.9811, "step": 23511 }, { "epoch": 7.832181227617223, "grad_norm": 0.71875, "learning_rate": 1.7704797788642124e-06, "loss": 3.9491, "step": 23512 }, { "epoch": 7.832514366619472, "grad_norm": 0.73828125, "learning_rate": 1.7699574880677363e-06, "loss": 4.0297, "step": 23513 }, { "epoch": 7.832847505621721, "grad_norm": 0.71875, "learning_rate": 1.769435264013845e-06, "loss": 3.9623, "step": 23514 }, { "epoch": 7.833180644623969, "grad_norm": 0.8359375, "learning_rate": 1.7689131067086195e-06, "loss": 3.9333, "step": 23515 }, { "epoch": 7.833513783626218, "grad_norm": 0.73046875, "learning_rate": 1.7683910161581433e-06, "loss": 4.0427, "step": 23516 }, { "epoch": 7.833846922628466, "grad_norm": 0.765625, "learning_rate": 1.767868992368497e-06, "loss": 3.9434, "step": 23517 }, { "epoch": 7.834180061630716, "grad_norm": 0.76171875, "learning_rate": 1.7673470353457612e-06, "loss": 4.0014, "step": 23518 }, { "epoch": 7.834513200632964, "grad_norm": 0.78515625, "learning_rate": 1.7668251450960135e-06, "loss": 3.9702, "step": 23519 }, { "epoch": 7.834846339635213, "grad_norm": 0.7734375, "learning_rate": 1.7663033216253371e-06, "loss": 4.0451, "step": 23520 }, { "epoch": 7.835179478637461, "grad_norm": 0.75390625, "learning_rate": 1.7657815649398082e-06, "loss": 4.0332, "step": 23521 }, { "epoch": 7.8355126176397105, "grad_norm": 0.74609375, "learning_rate": 1.7652598750455027e-06, "loss": 3.9691, "step": 23522 }, { "epoch": 7.835845756641959, "grad_norm": 0.7890625, "learning_rate": 1.7647382519484973e-06, "loss": 4.012, "step": 23523 }, { "epoch": 7.836178895644208, "grad_norm": 0.77734375, "learning_rate": 1.7642166956548696e-06, "loss": 4.0804, "step": 23524 }, { "epoch": 7.836512034646456, "grad_norm": 0.75390625, "learning_rate": 1.7636952061706968e-06, "loss": 4.0525, "step": 23525 }, { "epoch": 7.836845173648705, "grad_norm": 0.7578125, "learning_rate": 1.7631737835020453e-06, "loss": 3.8662, "step": 23526 }, { "epoch": 7.837178312650954, "grad_norm": 0.7578125, "learning_rate": 1.7626524276549955e-06, "loss": 3.9787, "step": 23527 }, { "epoch": 7.837511451653202, "grad_norm": 0.76171875, "learning_rate": 1.762131138635618e-06, "loss": 3.9702, "step": 23528 }, { "epoch": 7.837844590655451, "grad_norm": 0.78125, "learning_rate": 1.7616099164499843e-06, "loss": 3.9143, "step": 23529 }, { "epoch": 7.8381777296576995, "grad_norm": 0.765625, "learning_rate": 1.7610887611041634e-06, "loss": 4.0463, "step": 23530 }, { "epoch": 7.838510868659949, "grad_norm": 0.78515625, "learning_rate": 1.7605676726042305e-06, "loss": 4.0038, "step": 23531 }, { "epoch": 7.838844007662197, "grad_norm": 0.8046875, "learning_rate": 1.7600466509562525e-06, "loss": 4.0149, "step": 23532 }, { "epoch": 7.839177146664445, "grad_norm": 0.73828125, "learning_rate": 1.7595256961662983e-06, "loss": 4.0234, "step": 23533 }, { "epoch": 7.8395102856666945, "grad_norm": 0.734375, "learning_rate": 1.7590048082404361e-06, "loss": 3.9422, "step": 23534 }, { "epoch": 7.839843424668943, "grad_norm": 0.75, "learning_rate": 1.7584839871847324e-06, "loss": 3.9604, "step": 23535 }, { "epoch": 7.840176563671192, "grad_norm": 0.80859375, "learning_rate": 1.757963233005255e-06, "loss": 4.0055, "step": 23536 }, { "epoch": 7.84050970267344, "grad_norm": 0.76953125, "learning_rate": 1.7574425457080657e-06, "loss": 3.9841, "step": 23537 }, { "epoch": 7.8408428416756895, "grad_norm": 0.7734375, "learning_rate": 1.7569219252992352e-06, "loss": 3.9735, "step": 23538 }, { "epoch": 7.841175980677938, "grad_norm": 0.76171875, "learning_rate": 1.7564013717848241e-06, "loss": 3.9529, "step": 23539 }, { "epoch": 7.841509119680187, "grad_norm": 0.73828125, "learning_rate": 1.7558808851708972e-06, "loss": 3.9926, "step": 23540 }, { "epoch": 7.841842258682435, "grad_norm": 0.7578125, "learning_rate": 1.755360465463513e-06, "loss": 3.9664, "step": 23541 }, { "epoch": 7.842175397684684, "grad_norm": 0.73828125, "learning_rate": 1.7548401126687394e-06, "loss": 3.9839, "step": 23542 }, { "epoch": 7.842508536686933, "grad_norm": 0.765625, "learning_rate": 1.7543198267926363e-06, "loss": 3.955, "step": 23543 }, { "epoch": 7.842841675689181, "grad_norm": 0.75, "learning_rate": 1.7537996078412583e-06, "loss": 3.9847, "step": 23544 }, { "epoch": 7.84317481469143, "grad_norm": 0.7890625, "learning_rate": 1.7532794558206702e-06, "loss": 4.0571, "step": 23545 }, { "epoch": 7.8435079536936785, "grad_norm": 0.79296875, "learning_rate": 1.7527593707369299e-06, "loss": 3.9764, "step": 23546 }, { "epoch": 7.843841092695928, "grad_norm": 0.77734375, "learning_rate": 1.752239352596094e-06, "loss": 4.0455, "step": 23547 }, { "epoch": 7.844174231698176, "grad_norm": 0.75390625, "learning_rate": 1.7517194014042173e-06, "loss": 3.9349, "step": 23548 }, { "epoch": 7.844507370700425, "grad_norm": 0.75, "learning_rate": 1.751199517167362e-06, "loss": 4.0122, "step": 23549 }, { "epoch": 7.844840509702673, "grad_norm": 0.84375, "learning_rate": 1.7506796998915792e-06, "loss": 3.9087, "step": 23550 }, { "epoch": 7.845173648704922, "grad_norm": 0.7734375, "learning_rate": 1.7501599495829256e-06, "loss": 3.8918, "step": 23551 }, { "epoch": 7.845506787707171, "grad_norm": 0.71484375, "learning_rate": 1.749640266247455e-06, "loss": 4.036, "step": 23552 }, { "epoch": 7.845839926709419, "grad_norm": 0.77734375, "learning_rate": 1.749120649891219e-06, "loss": 3.9605, "step": 23553 }, { "epoch": 7.846173065711668, "grad_norm": 0.75, "learning_rate": 1.748601100520272e-06, "loss": 4.0264, "step": 23554 }, { "epoch": 7.846506204713917, "grad_norm": 0.73828125, "learning_rate": 1.7480816181406643e-06, "loss": 3.9262, "step": 23555 }, { "epoch": 7.846839343716166, "grad_norm": 0.76953125, "learning_rate": 1.7475622027584448e-06, "loss": 4.0013, "step": 23556 }, { "epoch": 7.847172482718414, "grad_norm": 0.7734375, "learning_rate": 1.7470428543796675e-06, "loss": 3.938, "step": 23557 }, { "epoch": 7.8475056217206625, "grad_norm": 0.734375, "learning_rate": 1.7465235730103812e-06, "loss": 4.0124, "step": 23558 }, { "epoch": 7.847838760722912, "grad_norm": 0.796875, "learning_rate": 1.7460043586566325e-06, "loss": 3.9542, "step": 23559 }, { "epoch": 7.84817189972516, "grad_norm": 0.76953125, "learning_rate": 1.7454852113244677e-06, "loss": 3.9808, "step": 23560 }, { "epoch": 7.848505038727409, "grad_norm": 0.76171875, "learning_rate": 1.7449661310199405e-06, "loss": 3.9874, "step": 23561 }, { "epoch": 7.848838177729657, "grad_norm": 0.78515625, "learning_rate": 1.7444471177490904e-06, "loss": 4.0172, "step": 23562 }, { "epoch": 7.849171316731907, "grad_norm": 0.7734375, "learning_rate": 1.7439281715179623e-06, "loss": 4.0568, "step": 23563 }, { "epoch": 7.849504455734155, "grad_norm": 0.75, "learning_rate": 1.7434092923326065e-06, "loss": 4.0401, "step": 23564 }, { "epoch": 7.849837594736404, "grad_norm": 0.80078125, "learning_rate": 1.7428904801990621e-06, "loss": 4.0231, "step": 23565 }, { "epoch": 7.850170733738652, "grad_norm": 0.7734375, "learning_rate": 1.7423717351233747e-06, "loss": 3.9318, "step": 23566 }, { "epoch": 7.850503872740902, "grad_norm": 0.78515625, "learning_rate": 1.7418530571115842e-06, "loss": 3.9109, "step": 23567 }, { "epoch": 7.85083701174315, "grad_norm": 0.734375, "learning_rate": 1.7413344461697342e-06, "loss": 4.0058, "step": 23568 }, { "epoch": 7.851170150745398, "grad_norm": 0.77734375, "learning_rate": 1.7408159023038657e-06, "loss": 3.9248, "step": 23569 }, { "epoch": 7.851503289747647, "grad_norm": 0.765625, "learning_rate": 1.7402974255200168e-06, "loss": 3.9504, "step": 23570 }, { "epoch": 7.851836428749896, "grad_norm": 0.75390625, "learning_rate": 1.7397790158242283e-06, "loss": 3.9113, "step": 23571 }, { "epoch": 7.852169567752145, "grad_norm": 0.77734375, "learning_rate": 1.7392606732225367e-06, "loss": 4.0465, "step": 23572 }, { "epoch": 7.852502706754393, "grad_norm": 0.76171875, "learning_rate": 1.7387423977209804e-06, "loss": 3.8983, "step": 23573 }, { "epoch": 7.852835845756642, "grad_norm": 0.71875, "learning_rate": 1.738224189325595e-06, "loss": 4.0074, "step": 23574 }, { "epoch": 7.853168984758891, "grad_norm": 0.7578125, "learning_rate": 1.73770604804242e-06, "loss": 3.9705, "step": 23575 }, { "epoch": 7.853502123761139, "grad_norm": 0.74609375, "learning_rate": 1.737187973877488e-06, "loss": 4.0487, "step": 23576 }, { "epoch": 7.853835262763388, "grad_norm": 0.7578125, "learning_rate": 1.7366699668368335e-06, "loss": 4.0161, "step": 23577 }, { "epoch": 7.854168401765636, "grad_norm": 0.79296875, "learning_rate": 1.736152026926489e-06, "loss": 3.9546, "step": 23578 }, { "epoch": 7.8545015407678855, "grad_norm": 0.73828125, "learning_rate": 1.735634154152493e-06, "loss": 4.0583, "step": 23579 }, { "epoch": 7.854834679770134, "grad_norm": 0.78125, "learning_rate": 1.7351163485208716e-06, "loss": 4.0053, "step": 23580 }, { "epoch": 7.855167818772383, "grad_norm": 0.765625, "learning_rate": 1.7345986100376552e-06, "loss": 3.9862, "step": 23581 }, { "epoch": 7.855500957774631, "grad_norm": 0.796875, "learning_rate": 1.73408093870888e-06, "loss": 3.9992, "step": 23582 }, { "epoch": 7.8558340967768805, "grad_norm": 0.72265625, "learning_rate": 1.7335633345405726e-06, "loss": 4.0387, "step": 23583 }, { "epoch": 7.856167235779129, "grad_norm": 0.765625, "learning_rate": 1.733045797538762e-06, "loss": 3.9723, "step": 23584 }, { "epoch": 7.856500374781378, "grad_norm": 0.80078125, "learning_rate": 1.7325283277094747e-06, "loss": 4.0109, "step": 23585 }, { "epoch": 7.856833513783626, "grad_norm": 0.80859375, "learning_rate": 1.7320109250587426e-06, "loss": 4.0114, "step": 23586 }, { "epoch": 7.857166652785875, "grad_norm": 0.75390625, "learning_rate": 1.7314935895925893e-06, "loss": 4.0603, "step": 23587 }, { "epoch": 7.857499791788124, "grad_norm": 0.796875, "learning_rate": 1.7309763213170415e-06, "loss": 4.0158, "step": 23588 }, { "epoch": 7.857832930790372, "grad_norm": 0.82421875, "learning_rate": 1.7304591202381238e-06, "loss": 3.9563, "step": 23589 }, { "epoch": 7.858166069792621, "grad_norm": 0.80078125, "learning_rate": 1.72994198636186e-06, "loss": 3.9646, "step": 23590 }, { "epoch": 7.8584992087948695, "grad_norm": 0.77734375, "learning_rate": 1.7294249196942752e-06, "loss": 3.8997, "step": 23591 }, { "epoch": 7.858832347797119, "grad_norm": 0.7734375, "learning_rate": 1.7289079202413883e-06, "loss": 3.9171, "step": 23592 }, { "epoch": 7.859165486799367, "grad_norm": 0.7734375, "learning_rate": 1.728390988009226e-06, "loss": 4.0402, "step": 23593 }, { "epoch": 7.859498625801615, "grad_norm": 0.7421875, "learning_rate": 1.7278741230038073e-06, "loss": 3.9954, "step": 23594 }, { "epoch": 7.8598317648038645, "grad_norm": 0.76953125, "learning_rate": 1.7273573252311528e-06, "loss": 3.9658, "step": 23595 }, { "epoch": 7.860164903806113, "grad_norm": 0.87890625, "learning_rate": 1.7268405946972796e-06, "loss": 3.9581, "step": 23596 }, { "epoch": 7.860498042808362, "grad_norm": 0.82421875, "learning_rate": 1.7263239314082135e-06, "loss": 3.9654, "step": 23597 }, { "epoch": 7.86083118181061, "grad_norm": 0.73828125, "learning_rate": 1.7258073353699657e-06, "loss": 4.0159, "step": 23598 }, { "epoch": 7.861164320812859, "grad_norm": 0.73046875, "learning_rate": 1.7252908065885557e-06, "loss": 4.041, "step": 23599 }, { "epoch": 7.861497459815108, "grad_norm": 0.7421875, "learning_rate": 1.7247743450699973e-06, "loss": 4.0509, "step": 23600 }, { "epoch": 7.861830598817357, "grad_norm": 0.765625, "learning_rate": 1.7242579508203104e-06, "loss": 4.0002, "step": 23601 }, { "epoch": 7.862163737819605, "grad_norm": 0.734375, "learning_rate": 1.7237416238455091e-06, "loss": 4.0341, "step": 23602 }, { "epoch": 7.862496876821854, "grad_norm": 0.73828125, "learning_rate": 1.7232253641516055e-06, "loss": 3.9567, "step": 23603 }, { "epoch": 7.862830015824103, "grad_norm": 0.78125, "learning_rate": 1.7227091717446129e-06, "loss": 3.958, "step": 23604 }, { "epoch": 7.863163154826351, "grad_norm": 0.76171875, "learning_rate": 1.7221930466305463e-06, "loss": 3.9656, "step": 23605 }, { "epoch": 7.8634962938286, "grad_norm": 0.7890625, "learning_rate": 1.721676988815418e-06, "loss": 4.0124, "step": 23606 }, { "epoch": 7.8638294328308485, "grad_norm": 0.76171875, "learning_rate": 1.7211609983052329e-06, "loss": 3.9689, "step": 23607 }, { "epoch": 7.864162571833098, "grad_norm": 0.78125, "learning_rate": 1.7206450751060079e-06, "loss": 3.9094, "step": 23608 }, { "epoch": 7.864495710835346, "grad_norm": 0.7734375, "learning_rate": 1.7201292192237491e-06, "loss": 3.9553, "step": 23609 }, { "epoch": 7.864828849837595, "grad_norm": 0.765625, "learning_rate": 1.7196134306644658e-06, "loss": 3.9614, "step": 23610 }, { "epoch": 7.865161988839843, "grad_norm": 0.77734375, "learning_rate": 1.7190977094341633e-06, "loss": 3.9609, "step": 23611 }, { "epoch": 7.865495127842092, "grad_norm": 0.8046875, "learning_rate": 1.7185820555388535e-06, "loss": 3.9005, "step": 23612 }, { "epoch": 7.865828266844341, "grad_norm": 0.72265625, "learning_rate": 1.7180664689845405e-06, "loss": 4.0134, "step": 23613 }, { "epoch": 7.866161405846589, "grad_norm": 0.765625, "learning_rate": 1.7175509497772295e-06, "loss": 3.9584, "step": 23614 }, { "epoch": 7.866494544848838, "grad_norm": 0.7421875, "learning_rate": 1.7170354979229245e-06, "loss": 4.0237, "step": 23615 }, { "epoch": 7.866827683851087, "grad_norm": 0.77734375, "learning_rate": 1.716520113427631e-06, "loss": 4.0594, "step": 23616 }, { "epoch": 7.867160822853336, "grad_norm": 0.8125, "learning_rate": 1.7160047962973503e-06, "loss": 3.9547, "step": 23617 }, { "epoch": 7.867493961855584, "grad_norm": 0.80078125, "learning_rate": 1.715489546538084e-06, "loss": 3.9332, "step": 23618 }, { "epoch": 7.867827100857832, "grad_norm": 0.73046875, "learning_rate": 1.7149743641558368e-06, "loss": 4.0676, "step": 23619 }, { "epoch": 7.868160239860082, "grad_norm": 0.7421875, "learning_rate": 1.714459249156609e-06, "loss": 4.0463, "step": 23620 }, { "epoch": 7.868493378862331, "grad_norm": 0.75390625, "learning_rate": 1.7139442015463985e-06, "loss": 4.0296, "step": 23621 }, { "epoch": 7.868826517864579, "grad_norm": 0.78515625, "learning_rate": 1.7134292213312036e-06, "loss": 4.0224, "step": 23622 }, { "epoch": 7.869159656866827, "grad_norm": 0.78515625, "learning_rate": 1.7129143085170268e-06, "loss": 3.9889, "step": 23623 }, { "epoch": 7.869492795869077, "grad_norm": 0.7421875, "learning_rate": 1.7123994631098659e-06, "loss": 3.961, "step": 23624 }, { "epoch": 7.869825934871325, "grad_norm": 0.859375, "learning_rate": 1.7118846851157107e-06, "loss": 4.0005, "step": 23625 }, { "epoch": 7.870159073873574, "grad_norm": 0.80078125, "learning_rate": 1.7113699745405645e-06, "loss": 3.979, "step": 23626 }, { "epoch": 7.870492212875822, "grad_norm": 0.7734375, "learning_rate": 1.7108553313904196e-06, "loss": 3.9921, "step": 23627 }, { "epoch": 7.8708253518780715, "grad_norm": 0.828125, "learning_rate": 1.7103407556712705e-06, "loss": 4.0355, "step": 23628 }, { "epoch": 7.87115849088032, "grad_norm": 0.74609375, "learning_rate": 1.709826247389109e-06, "loss": 3.949, "step": 23629 }, { "epoch": 7.871491629882568, "grad_norm": 0.80078125, "learning_rate": 1.7093118065499331e-06, "loss": 3.9609, "step": 23630 }, { "epoch": 7.871824768884817, "grad_norm": 0.73828125, "learning_rate": 1.7087974331597317e-06, "loss": 3.9206, "step": 23631 }, { "epoch": 7.872157907887066, "grad_norm": 0.8203125, "learning_rate": 1.7082831272244962e-06, "loss": 4.0189, "step": 23632 }, { "epoch": 7.872491046889315, "grad_norm": 0.75, "learning_rate": 1.7077688887502175e-06, "loss": 3.9917, "step": 23633 }, { "epoch": 7.872824185891563, "grad_norm": 0.74609375, "learning_rate": 1.7072547177428857e-06, "loss": 4.014, "step": 23634 }, { "epoch": 7.873157324893812, "grad_norm": 0.80078125, "learning_rate": 1.7067406142084887e-06, "loss": 3.9272, "step": 23635 }, { "epoch": 7.8734904638960606, "grad_norm": 0.78515625, "learning_rate": 1.7062265781530143e-06, "loss": 3.9696, "step": 23636 }, { "epoch": 7.873823602898309, "grad_norm": 0.75390625, "learning_rate": 1.7057126095824524e-06, "loss": 3.9567, "step": 23637 }, { "epoch": 7.874156741900558, "grad_norm": 0.76953125, "learning_rate": 1.7051987085027883e-06, "loss": 3.9977, "step": 23638 }, { "epoch": 7.874489880902806, "grad_norm": 0.828125, "learning_rate": 1.7046848749200075e-06, "loss": 3.9264, "step": 23639 }, { "epoch": 7.8748230199050555, "grad_norm": 0.7578125, "learning_rate": 1.7041711088400934e-06, "loss": 3.9718, "step": 23640 }, { "epoch": 7.875156158907304, "grad_norm": 0.76171875, "learning_rate": 1.7036574102690342e-06, "loss": 3.9868, "step": 23641 }, { "epoch": 7.875489297909553, "grad_norm": 0.8046875, "learning_rate": 1.7031437792128138e-06, "loss": 3.9416, "step": 23642 }, { "epoch": 7.875822436911801, "grad_norm": 0.76171875, "learning_rate": 1.7026302156774098e-06, "loss": 4.0434, "step": 23643 }, { "epoch": 7.8761555759140505, "grad_norm": 0.796875, "learning_rate": 1.7021167196688043e-06, "loss": 3.9882, "step": 23644 }, { "epoch": 7.876488714916299, "grad_norm": 0.74609375, "learning_rate": 1.7016032911929839e-06, "loss": 3.9885, "step": 23645 }, { "epoch": 7.876821853918548, "grad_norm": 0.7109375, "learning_rate": 1.7010899302559247e-06, "loss": 3.9884, "step": 23646 }, { "epoch": 7.877154992920796, "grad_norm": 0.828125, "learning_rate": 1.7005766368636086e-06, "loss": 3.9575, "step": 23647 }, { "epoch": 7.8774881319230445, "grad_norm": 0.73828125, "learning_rate": 1.7000634110220101e-06, "loss": 4.0146, "step": 23648 }, { "epoch": 7.877821270925294, "grad_norm": 0.78515625, "learning_rate": 1.6995502527371126e-06, "loss": 4.0028, "step": 23649 }, { "epoch": 7.878154409927542, "grad_norm": 0.76953125, "learning_rate": 1.6990371620148907e-06, "loss": 3.9315, "step": 23650 }, { "epoch": 7.878487548929791, "grad_norm": 0.76171875, "learning_rate": 1.6985241388613218e-06, "loss": 4.0371, "step": 23651 }, { "epoch": 7.8788206879320395, "grad_norm": 0.80078125, "learning_rate": 1.6980111832823797e-06, "loss": 4.0332, "step": 23652 }, { "epoch": 7.879153826934289, "grad_norm": 0.78515625, "learning_rate": 1.6974982952840412e-06, "loss": 3.9009, "step": 23653 }, { "epoch": 7.879486965936537, "grad_norm": 0.8046875, "learning_rate": 1.6969854748722782e-06, "loss": 3.9881, "step": 23654 }, { "epoch": 7.879820104938785, "grad_norm": 0.82421875, "learning_rate": 1.6964727220530642e-06, "loss": 3.9572, "step": 23655 }, { "epoch": 7.8801532439410344, "grad_norm": 0.74609375, "learning_rate": 1.6959600368323738e-06, "loss": 3.985, "step": 23656 }, { "epoch": 7.880486382943283, "grad_norm": 0.7734375, "learning_rate": 1.695447419216178e-06, "loss": 3.9899, "step": 23657 }, { "epoch": 7.880819521945532, "grad_norm": 0.75390625, "learning_rate": 1.6949348692104466e-06, "loss": 3.9933, "step": 23658 }, { "epoch": 7.88115266094778, "grad_norm": 0.7734375, "learning_rate": 1.6944223868211486e-06, "loss": 3.9718, "step": 23659 }, { "epoch": 7.881485799950029, "grad_norm": 0.8125, "learning_rate": 1.6939099720542592e-06, "loss": 3.9773, "step": 23660 }, { "epoch": 7.881818938952278, "grad_norm": 0.796875, "learning_rate": 1.69339762491574e-06, "loss": 4.0005, "step": 23661 }, { "epoch": 7.882152077954527, "grad_norm": 0.7578125, "learning_rate": 1.69288534541156e-06, "loss": 4.0647, "step": 23662 }, { "epoch": 7.882485216956775, "grad_norm": 0.76953125, "learning_rate": 1.6923731335476891e-06, "loss": 3.9164, "step": 23663 }, { "epoch": 7.882818355959024, "grad_norm": 0.73046875, "learning_rate": 1.6918609893300918e-06, "loss": 4.0979, "step": 23664 }, { "epoch": 7.883151494961273, "grad_norm": 0.7578125, "learning_rate": 1.6913489127647342e-06, "loss": 3.9281, "step": 23665 }, { "epoch": 7.883484633963521, "grad_norm": 0.76171875, "learning_rate": 1.6908369038575771e-06, "loss": 3.9638, "step": 23666 }, { "epoch": 7.88381777296577, "grad_norm": 0.8046875, "learning_rate": 1.6903249626145902e-06, "loss": 3.9765, "step": 23667 }, { "epoch": 7.884150911968018, "grad_norm": 0.79296875, "learning_rate": 1.6898130890417346e-06, "loss": 3.9115, "step": 23668 }, { "epoch": 7.884484050970268, "grad_norm": 0.75, "learning_rate": 1.6893012831449703e-06, "loss": 3.9999, "step": 23669 }, { "epoch": 7.884817189972516, "grad_norm": 0.734375, "learning_rate": 1.688789544930261e-06, "loss": 4.0054, "step": 23670 }, { "epoch": 7.885150328974765, "grad_norm": 0.7421875, "learning_rate": 1.6882778744035659e-06, "loss": 3.9215, "step": 23671 }, { "epoch": 7.885483467977013, "grad_norm": 0.75390625, "learning_rate": 1.6877662715708455e-06, "loss": 4.0007, "step": 23672 }, { "epoch": 7.885816606979262, "grad_norm": 0.72265625, "learning_rate": 1.6872547364380567e-06, "loss": 4.0024, "step": 23673 }, { "epoch": 7.886149745981511, "grad_norm": 0.8203125, "learning_rate": 1.686743269011162e-06, "loss": 3.9615, "step": 23674 }, { "epoch": 7.886482884983759, "grad_norm": 0.80859375, "learning_rate": 1.6862318692961167e-06, "loss": 3.9716, "step": 23675 }, { "epoch": 7.886816023986008, "grad_norm": 0.84765625, "learning_rate": 1.6857205372988771e-06, "loss": 3.9722, "step": 23676 }, { "epoch": 7.887149162988257, "grad_norm": 0.75, "learning_rate": 1.6852092730253981e-06, "loss": 3.9773, "step": 23677 }, { "epoch": 7.887482301990506, "grad_norm": 0.76953125, "learning_rate": 1.6846980764816402e-06, "loss": 4.0253, "step": 23678 }, { "epoch": 7.887815440992754, "grad_norm": 0.7421875, "learning_rate": 1.684186947673552e-06, "loss": 4.0042, "step": 23679 }, { "epoch": 7.888148579995003, "grad_norm": 0.79296875, "learning_rate": 1.683675886607086e-06, "loss": 4.0181, "step": 23680 }, { "epoch": 7.888481718997252, "grad_norm": 0.8203125, "learning_rate": 1.6831648932881998e-06, "loss": 3.9961, "step": 23681 }, { "epoch": 7.888814857999501, "grad_norm": 0.78125, "learning_rate": 1.6826539677228441e-06, "loss": 3.9648, "step": 23682 }, { "epoch": 7.889147997001749, "grad_norm": 0.75, "learning_rate": 1.682143109916969e-06, "loss": 3.9689, "step": 23683 }, { "epoch": 7.889481136003997, "grad_norm": 0.75390625, "learning_rate": 1.6816323198765238e-06, "loss": 3.9313, "step": 23684 }, { "epoch": 7.8898142750062465, "grad_norm": 0.77734375, "learning_rate": 1.6811215976074605e-06, "loss": 3.9445, "step": 23685 }, { "epoch": 7.890147414008495, "grad_norm": 0.73046875, "learning_rate": 1.6806109431157274e-06, "loss": 3.9267, "step": 23686 }, { "epoch": 7.890480553010744, "grad_norm": 0.78515625, "learning_rate": 1.6801003564072742e-06, "loss": 3.9636, "step": 23687 }, { "epoch": 7.890813692012992, "grad_norm": 0.74609375, "learning_rate": 1.679589837488042e-06, "loss": 3.9138, "step": 23688 }, { "epoch": 7.8911468310152415, "grad_norm": 0.79296875, "learning_rate": 1.6790793863639825e-06, "loss": 3.9986, "step": 23689 }, { "epoch": 7.89147997001749, "grad_norm": 0.72265625, "learning_rate": 1.678569003041041e-06, "loss": 4.0519, "step": 23690 }, { "epoch": 7.891813109019738, "grad_norm": 0.75390625, "learning_rate": 1.6780586875251605e-06, "loss": 3.9122, "step": 23691 }, { "epoch": 7.892146248021987, "grad_norm": 0.78515625, "learning_rate": 1.6775484398222842e-06, "loss": 3.9713, "step": 23692 }, { "epoch": 7.892479387024236, "grad_norm": 0.76953125, "learning_rate": 1.6770382599383594e-06, "loss": 3.9605, "step": 23693 }, { "epoch": 7.892812526026485, "grad_norm": 0.71875, "learning_rate": 1.6765281478793268e-06, "loss": 4.0087, "step": 23694 }, { "epoch": 7.893145665028733, "grad_norm": 0.75, "learning_rate": 1.6760181036511268e-06, "loss": 3.942, "step": 23695 }, { "epoch": 7.893478804030982, "grad_norm": 0.76171875, "learning_rate": 1.6755081272597022e-06, "loss": 3.957, "step": 23696 }, { "epoch": 7.8938119430332305, "grad_norm": 0.796875, "learning_rate": 1.6749982187109916e-06, "loss": 4.0346, "step": 23697 }, { "epoch": 7.894145082035479, "grad_norm": 0.79296875, "learning_rate": 1.6744883780109343e-06, "loss": 3.9879, "step": 23698 }, { "epoch": 7.894478221037728, "grad_norm": 0.7265625, "learning_rate": 1.6739786051654674e-06, "loss": 4.0464, "step": 23699 }, { "epoch": 7.894811360039976, "grad_norm": 0.77734375, "learning_rate": 1.6734689001805334e-06, "loss": 4.0276, "step": 23700 }, { "epoch": 7.8951444990422255, "grad_norm": 0.7890625, "learning_rate": 1.672959263062066e-06, "loss": 3.9386, "step": 23701 }, { "epoch": 7.895477638044474, "grad_norm": 0.7734375, "learning_rate": 1.6724496938160021e-06, "loss": 4.0089, "step": 23702 }, { "epoch": 7.895810777046723, "grad_norm": 0.7421875, "learning_rate": 1.6719401924482748e-06, "loss": 3.9256, "step": 23703 }, { "epoch": 7.896143916048971, "grad_norm": 0.75, "learning_rate": 1.671430758964822e-06, "loss": 3.9315, "step": 23704 }, { "epoch": 7.89647705505122, "grad_norm": 0.78125, "learning_rate": 1.6709213933715787e-06, "loss": 4.0276, "step": 23705 }, { "epoch": 7.896810194053469, "grad_norm": 0.7890625, "learning_rate": 1.6704120956744714e-06, "loss": 3.985, "step": 23706 }, { "epoch": 7.897143333055718, "grad_norm": 0.7578125, "learning_rate": 1.6699028658794383e-06, "loss": 3.9808, "step": 23707 }, { "epoch": 7.897476472057966, "grad_norm": 0.79296875, "learning_rate": 1.6693937039924092e-06, "loss": 4.034, "step": 23708 }, { "epoch": 7.8978096110602145, "grad_norm": 0.77734375, "learning_rate": 1.668884610019314e-06, "loss": 4.0295, "step": 23709 }, { "epoch": 7.898142750062464, "grad_norm": 0.75, "learning_rate": 1.6683755839660807e-06, "loss": 3.9148, "step": 23710 }, { "epoch": 7.898475889064712, "grad_norm": 0.78125, "learning_rate": 1.6678666258386428e-06, "loss": 3.9962, "step": 23711 }, { "epoch": 7.898809028066961, "grad_norm": 0.734375, "learning_rate": 1.6673577356429264e-06, "loss": 3.9248, "step": 23712 }, { "epoch": 7.8991421670692095, "grad_norm": 0.828125, "learning_rate": 1.6668489133848591e-06, "loss": 3.9755, "step": 23713 }, { "epoch": 7.899475306071459, "grad_norm": 0.79296875, "learning_rate": 1.6663401590703668e-06, "loss": 3.8964, "step": 23714 }, { "epoch": 7.899808445073707, "grad_norm": 0.8046875, "learning_rate": 1.665831472705376e-06, "loss": 4.0054, "step": 23715 }, { "epoch": 7.900141584075955, "grad_norm": 0.7578125, "learning_rate": 1.6653228542958126e-06, "loss": 4.0188, "step": 23716 }, { "epoch": 7.900474723078204, "grad_norm": 0.8203125, "learning_rate": 1.6648143038475973e-06, "loss": 4.0514, "step": 23717 }, { "epoch": 7.900807862080453, "grad_norm": 0.76953125, "learning_rate": 1.664305821366658e-06, "loss": 3.977, "step": 23718 }, { "epoch": 7.901141001082702, "grad_norm": 0.76953125, "learning_rate": 1.6637974068589168e-06, "loss": 4.0105, "step": 23719 }, { "epoch": 7.90147414008495, "grad_norm": 0.77734375, "learning_rate": 1.6632890603302945e-06, "loss": 3.8988, "step": 23720 }, { "epoch": 7.901807279087199, "grad_norm": 0.79296875, "learning_rate": 1.6627807817867096e-06, "loss": 3.956, "step": 23721 }, { "epoch": 7.902140418089448, "grad_norm": 0.75, "learning_rate": 1.6622725712340878e-06, "loss": 3.9921, "step": 23722 }, { "epoch": 7.902473557091697, "grad_norm": 0.75390625, "learning_rate": 1.6617644286783472e-06, "loss": 3.9543, "step": 23723 }, { "epoch": 7.902806696093945, "grad_norm": 0.75, "learning_rate": 1.6612563541254027e-06, "loss": 4.0252, "step": 23724 }, { "epoch": 7.903139835096194, "grad_norm": 0.75390625, "learning_rate": 1.660748347581176e-06, "loss": 4.0389, "step": 23725 }, { "epoch": 7.903472974098443, "grad_norm": 0.74609375, "learning_rate": 1.660240409051584e-06, "loss": 3.9781, "step": 23726 }, { "epoch": 7.903806113100691, "grad_norm": 0.76953125, "learning_rate": 1.6597325385425414e-06, "loss": 3.9453, "step": 23727 }, { "epoch": 7.90413925210294, "grad_norm": 0.75, "learning_rate": 1.6592247360599639e-06, "loss": 4.0165, "step": 23728 }, { "epoch": 7.904472391105188, "grad_norm": 0.84375, "learning_rate": 1.6587170016097678e-06, "loss": 3.9625, "step": 23729 }, { "epoch": 7.904805530107438, "grad_norm": 0.74609375, "learning_rate": 1.6582093351978675e-06, "loss": 3.9361, "step": 23730 }, { "epoch": 7.905138669109686, "grad_norm": 0.796875, "learning_rate": 1.657701736830175e-06, "loss": 3.9769, "step": 23731 }, { "epoch": 7.905471808111935, "grad_norm": 0.83984375, "learning_rate": 1.6571942065126023e-06, "loss": 3.9354, "step": 23732 }, { "epoch": 7.905804947114183, "grad_norm": 0.82421875, "learning_rate": 1.656686744251062e-06, "loss": 3.9596, "step": 23733 }, { "epoch": 7.906138086116432, "grad_norm": 0.77734375, "learning_rate": 1.6561793500514644e-06, "loss": 3.9448, "step": 23734 }, { "epoch": 7.906471225118681, "grad_norm": 0.75390625, "learning_rate": 1.6556720239197197e-06, "loss": 3.9251, "step": 23735 }, { "epoch": 7.906804364120929, "grad_norm": 0.77734375, "learning_rate": 1.6551647658617354e-06, "loss": 3.9133, "step": 23736 }, { "epoch": 7.907137503123178, "grad_norm": 0.8125, "learning_rate": 1.6546575758834233e-06, "loss": 3.9986, "step": 23737 }, { "epoch": 7.907470642125427, "grad_norm": 0.828125, "learning_rate": 1.6541504539906897e-06, "loss": 3.9914, "step": 23738 }, { "epoch": 7.907803781127676, "grad_norm": 0.79296875, "learning_rate": 1.6536434001894406e-06, "loss": 3.9987, "step": 23739 }, { "epoch": 7.908136920129924, "grad_norm": 0.76171875, "learning_rate": 1.653136414485582e-06, "loss": 3.992, "step": 23740 }, { "epoch": 7.908470059132173, "grad_norm": 0.7421875, "learning_rate": 1.6526294968850231e-06, "loss": 3.9926, "step": 23741 }, { "epoch": 7.908803198134422, "grad_norm": 0.76171875, "learning_rate": 1.6521226473936631e-06, "loss": 3.9406, "step": 23742 }, { "epoch": 7.909136337136671, "grad_norm": 0.7890625, "learning_rate": 1.651615866017406e-06, "loss": 3.9667, "step": 23743 }, { "epoch": 7.909469476138919, "grad_norm": 0.78125, "learning_rate": 1.6511091527621578e-06, "loss": 4.0064, "step": 23744 }, { "epoch": 7.909802615141167, "grad_norm": 0.73828125, "learning_rate": 1.6506025076338202e-06, "loss": 3.966, "step": 23745 }, { "epoch": 7.9101357541434165, "grad_norm": 0.765625, "learning_rate": 1.6500959306382928e-06, "loss": 3.9777, "step": 23746 }, { "epoch": 7.910468893145665, "grad_norm": 0.8046875, "learning_rate": 1.649589421781476e-06, "loss": 4.0065, "step": 23747 }, { "epoch": 7.910802032147914, "grad_norm": 0.80078125, "learning_rate": 1.6490829810692718e-06, "loss": 3.9492, "step": 23748 }, { "epoch": 7.911135171150162, "grad_norm": 0.7734375, "learning_rate": 1.6485766085075775e-06, "loss": 3.9737, "step": 23749 }, { "epoch": 7.9114683101524115, "grad_norm": 0.78515625, "learning_rate": 1.6480703041022924e-06, "loss": 3.9511, "step": 23750 }, { "epoch": 7.91180144915466, "grad_norm": 0.76953125, "learning_rate": 1.647564067859312e-06, "loss": 3.9192, "step": 23751 }, { "epoch": 7.912134588156908, "grad_norm": 0.74609375, "learning_rate": 1.6470578997845345e-06, "loss": 3.9799, "step": 23752 }, { "epoch": 7.912467727159157, "grad_norm": 0.75, "learning_rate": 1.646551799883855e-06, "loss": 3.9161, "step": 23753 }, { "epoch": 7.9128008661614055, "grad_norm": 0.78125, "learning_rate": 1.6460457681631663e-06, "loss": 3.9753, "step": 23754 }, { "epoch": 7.913134005163655, "grad_norm": 0.76953125, "learning_rate": 1.645539804628367e-06, "loss": 3.8946, "step": 23755 }, { "epoch": 7.913467144165903, "grad_norm": 0.72265625, "learning_rate": 1.6450339092853476e-06, "loss": 4.0556, "step": 23756 }, { "epoch": 7.913800283168152, "grad_norm": 0.75390625, "learning_rate": 1.6445280821400022e-06, "loss": 4.0461, "step": 23757 }, { "epoch": 7.9141334221704005, "grad_norm": 0.7734375, "learning_rate": 1.6440223231982212e-06, "loss": 3.9899, "step": 23758 }, { "epoch": 7.914466561172649, "grad_norm": 0.734375, "learning_rate": 1.6435166324658959e-06, "loss": 3.9451, "step": 23759 }, { "epoch": 7.914799700174898, "grad_norm": 0.7578125, "learning_rate": 1.6430110099489172e-06, "loss": 4.0932, "step": 23760 }, { "epoch": 7.915132839177147, "grad_norm": 0.7734375, "learning_rate": 1.6425054556531716e-06, "loss": 3.8927, "step": 23761 }, { "epoch": 7.9154659781793955, "grad_norm": 0.74609375, "learning_rate": 1.641999969584553e-06, "loss": 3.9614, "step": 23762 }, { "epoch": 7.915799117181644, "grad_norm": 0.76953125, "learning_rate": 1.6414945517489454e-06, "loss": 4.0069, "step": 23763 }, { "epoch": 7.916132256183893, "grad_norm": 0.7578125, "learning_rate": 1.6409892021522375e-06, "loss": 4.0181, "step": 23764 }, { "epoch": 7.916465395186141, "grad_norm": 0.78515625, "learning_rate": 1.6404839208003116e-06, "loss": 4.0671, "step": 23765 }, { "epoch": 7.91679853418839, "grad_norm": 0.79296875, "learning_rate": 1.6399787076990594e-06, "loss": 4.044, "step": 23766 }, { "epoch": 7.917131673190639, "grad_norm": 0.7265625, "learning_rate": 1.6394735628543642e-06, "loss": 4.0273, "step": 23767 }, { "epoch": 7.917464812192888, "grad_norm": 0.77734375, "learning_rate": 1.6389684862721038e-06, "loss": 4.0455, "step": 23768 }, { "epoch": 7.917797951195136, "grad_norm": 0.8125, "learning_rate": 1.6384634779581686e-06, "loss": 3.9445, "step": 23769 }, { "epoch": 7.9181310901973845, "grad_norm": 0.76171875, "learning_rate": 1.6379585379184364e-06, "loss": 4.0049, "step": 23770 }, { "epoch": 7.918464229199634, "grad_norm": 0.8125, "learning_rate": 1.6374536661587908e-06, "loss": 3.9615, "step": 23771 }, { "epoch": 7.918797368201882, "grad_norm": 0.76171875, "learning_rate": 1.6369488626851094e-06, "loss": 3.9458, "step": 23772 }, { "epoch": 7.919130507204131, "grad_norm": 0.80859375, "learning_rate": 1.6364441275032771e-06, "loss": 3.9856, "step": 23773 }, { "epoch": 7.919463646206379, "grad_norm": 0.78515625, "learning_rate": 1.6359394606191697e-06, "loss": 3.9422, "step": 23774 }, { "epoch": 7.919796785208629, "grad_norm": 0.78515625, "learning_rate": 1.6354348620386667e-06, "loss": 3.9418, "step": 23775 }, { "epoch": 7.920129924210877, "grad_norm": 0.7734375, "learning_rate": 1.6349303317676442e-06, "loss": 3.9881, "step": 23776 }, { "epoch": 7.920463063213125, "grad_norm": 0.75390625, "learning_rate": 1.63442586981198e-06, "loss": 3.9411, "step": 23777 }, { "epoch": 7.920796202215374, "grad_norm": 0.7890625, "learning_rate": 1.633921476177549e-06, "loss": 4.0395, "step": 23778 }, { "epoch": 7.921129341217623, "grad_norm": 0.7890625, "learning_rate": 1.6334171508702275e-06, "loss": 3.9695, "step": 23779 }, { "epoch": 7.921462480219872, "grad_norm": 0.828125, "learning_rate": 1.632912893895887e-06, "loss": 3.9825, "step": 23780 }, { "epoch": 7.92179561922212, "grad_norm": 0.78125, "learning_rate": 1.6324087052604054e-06, "loss": 4.0271, "step": 23781 }, { "epoch": 7.922128758224369, "grad_norm": 0.76953125, "learning_rate": 1.6319045849696534e-06, "loss": 4.0225, "step": 23782 }, { "epoch": 7.922461897226618, "grad_norm": 0.78125, "learning_rate": 1.6314005330295018e-06, "loss": 3.993, "step": 23783 }, { "epoch": 7.922795036228867, "grad_norm": 0.765625, "learning_rate": 1.6308965494458214e-06, "loss": 3.9638, "step": 23784 }, { "epoch": 7.923128175231115, "grad_norm": 0.76953125, "learning_rate": 1.6303926342244877e-06, "loss": 3.9493, "step": 23785 }, { "epoch": 7.923461314233364, "grad_norm": 0.71484375, "learning_rate": 1.6298887873713642e-06, "loss": 4.0369, "step": 23786 }, { "epoch": 7.923794453235613, "grad_norm": 0.78515625, "learning_rate": 1.6293850088923196e-06, "loss": 4.0228, "step": 23787 }, { "epoch": 7.924127592237861, "grad_norm": 0.7578125, "learning_rate": 1.628881298793226e-06, "loss": 3.9835, "step": 23788 }, { "epoch": 7.92446073124011, "grad_norm": 0.78515625, "learning_rate": 1.6283776570799484e-06, "loss": 3.9922, "step": 23789 }, { "epoch": 7.924793870242358, "grad_norm": 0.72265625, "learning_rate": 1.627874083758353e-06, "loss": 3.9309, "step": 23790 }, { "epoch": 7.925127009244608, "grad_norm": 0.7578125, "learning_rate": 1.627370578834303e-06, "loss": 4.0788, "step": 23791 }, { "epoch": 7.925460148246856, "grad_norm": 0.79296875, "learning_rate": 1.6268671423136682e-06, "loss": 4.0114, "step": 23792 }, { "epoch": 7.925793287249105, "grad_norm": 0.75, "learning_rate": 1.62636377420231e-06, "loss": 3.9741, "step": 23793 }, { "epoch": 7.926126426251353, "grad_norm": 0.7734375, "learning_rate": 1.6258604745060924e-06, "loss": 4.0318, "step": 23794 }, { "epoch": 7.926459565253602, "grad_norm": 0.83984375, "learning_rate": 1.6253572432308757e-06, "loss": 3.9925, "step": 23795 }, { "epoch": 7.926792704255851, "grad_norm": 0.78125, "learning_rate": 1.6248540803825234e-06, "loss": 3.9066, "step": 23796 }, { "epoch": 7.927125843258099, "grad_norm": 0.71484375, "learning_rate": 1.6243509859668956e-06, "loss": 4.0342, "step": 23797 }, { "epoch": 7.927458982260348, "grad_norm": 0.83984375, "learning_rate": 1.6238479599898493e-06, "loss": 3.8965, "step": 23798 }, { "epoch": 7.927792121262597, "grad_norm": 0.76953125, "learning_rate": 1.6233450024572489e-06, "loss": 4.0031, "step": 23799 }, { "epoch": 7.928125260264846, "grad_norm": 0.75390625, "learning_rate": 1.6228421133749505e-06, "loss": 3.9084, "step": 23800 }, { "epoch": 7.928458399267094, "grad_norm": 0.8203125, "learning_rate": 1.622339292748812e-06, "loss": 4.0539, "step": 23801 }, { "epoch": 7.928791538269343, "grad_norm": 0.76171875, "learning_rate": 1.6218365405846872e-06, "loss": 3.9402, "step": 23802 }, { "epoch": 7.9291246772715915, "grad_norm": 0.75, "learning_rate": 1.6213338568884387e-06, "loss": 3.9322, "step": 23803 }, { "epoch": 7.929457816273841, "grad_norm": 0.80859375, "learning_rate": 1.620831241665916e-06, "loss": 3.9503, "step": 23804 }, { "epoch": 7.929790955276089, "grad_norm": 0.76953125, "learning_rate": 1.620328694922973e-06, "loss": 3.9254, "step": 23805 }, { "epoch": 7.930124094278337, "grad_norm": 0.79296875, "learning_rate": 1.619826216665467e-06, "loss": 3.9641, "step": 23806 }, { "epoch": 7.9304572332805865, "grad_norm": 0.75390625, "learning_rate": 1.6193238068992496e-06, "loss": 4.0039, "step": 23807 }, { "epoch": 7.930790372282835, "grad_norm": 0.75, "learning_rate": 1.6188214656301719e-06, "loss": 3.9764, "step": 23808 }, { "epoch": 7.931123511285084, "grad_norm": 0.765625, "learning_rate": 1.6183191928640833e-06, "loss": 3.9935, "step": 23809 }, { "epoch": 7.931456650287332, "grad_norm": 0.765625, "learning_rate": 1.6178169886068392e-06, "loss": 3.9059, "step": 23810 }, { "epoch": 7.9317897892895814, "grad_norm": 0.828125, "learning_rate": 1.6173148528642856e-06, "loss": 4.0002, "step": 23811 }, { "epoch": 7.93212292829183, "grad_norm": 0.78125, "learning_rate": 1.616812785642272e-06, "loss": 4.0326, "step": 23812 }, { "epoch": 7.932456067294078, "grad_norm": 0.74609375, "learning_rate": 1.6163107869466472e-06, "loss": 3.9931, "step": 23813 }, { "epoch": 7.932789206296327, "grad_norm": 0.77734375, "learning_rate": 1.6158088567832575e-06, "loss": 4.0194, "step": 23814 }, { "epoch": 7.9331223452985755, "grad_norm": 0.75390625, "learning_rate": 1.6153069951579493e-06, "loss": 3.9948, "step": 23815 }, { "epoch": 7.933455484300825, "grad_norm": 0.796875, "learning_rate": 1.614805202076567e-06, "loss": 3.9927, "step": 23816 }, { "epoch": 7.933788623303073, "grad_norm": 0.74609375, "learning_rate": 1.6143034775449579e-06, "loss": 4.0469, "step": 23817 }, { "epoch": 7.934121762305322, "grad_norm": 0.7578125, "learning_rate": 1.6138018215689648e-06, "loss": 4.0236, "step": 23818 }, { "epoch": 7.9344549013075705, "grad_norm": 0.76171875, "learning_rate": 1.6133002341544318e-06, "loss": 3.9983, "step": 23819 }, { "epoch": 7.93478804030982, "grad_norm": 0.76953125, "learning_rate": 1.612798715307198e-06, "loss": 3.976, "step": 23820 }, { "epoch": 7.935121179312068, "grad_norm": 0.84765625, "learning_rate": 1.6122972650331113e-06, "loss": 3.9939, "step": 23821 }, { "epoch": 7.935454318314317, "grad_norm": 0.78125, "learning_rate": 1.6117958833380072e-06, "loss": 3.9321, "step": 23822 }, { "epoch": 7.935787457316565, "grad_norm": 0.8046875, "learning_rate": 1.6112945702277252e-06, "loss": 3.9428, "step": 23823 }, { "epoch": 7.936120596318814, "grad_norm": 0.7578125, "learning_rate": 1.6107933257081087e-06, "loss": 3.9644, "step": 23824 }, { "epoch": 7.936453735321063, "grad_norm": 0.796875, "learning_rate": 1.6102921497849931e-06, "loss": 3.9351, "step": 23825 }, { "epoch": 7.936786874323311, "grad_norm": 0.796875, "learning_rate": 1.6097910424642182e-06, "loss": 3.9591, "step": 23826 }, { "epoch": 7.93712001332556, "grad_norm": 0.78515625, "learning_rate": 1.6092900037516181e-06, "loss": 4.0609, "step": 23827 }, { "epoch": 7.937453152327809, "grad_norm": 0.77734375, "learning_rate": 1.6087890336530292e-06, "loss": 3.9935, "step": 23828 }, { "epoch": 7.937786291330058, "grad_norm": 0.78515625, "learning_rate": 1.6082881321742894e-06, "loss": 3.9579, "step": 23829 }, { "epoch": 7.938119430332306, "grad_norm": 0.7734375, "learning_rate": 1.6077872993212337e-06, "loss": 3.9154, "step": 23830 }, { "epoch": 7.9384525693345545, "grad_norm": 0.76953125, "learning_rate": 1.6072865350996884e-06, "loss": 4.0104, "step": 23831 }, { "epoch": 7.938785708336804, "grad_norm": 0.7890625, "learning_rate": 1.606785839515494e-06, "loss": 3.985, "step": 23832 }, { "epoch": 7.939118847339052, "grad_norm": 0.76171875, "learning_rate": 1.60628521257448e-06, "loss": 4.0151, "step": 23833 }, { "epoch": 7.939451986341301, "grad_norm": 0.76953125, "learning_rate": 1.6057846542824783e-06, "loss": 3.9636, "step": 23834 }, { "epoch": 7.939785125343549, "grad_norm": 0.7578125, "learning_rate": 1.605284164645315e-06, "loss": 3.9618, "step": 23835 }, { "epoch": 7.940118264345799, "grad_norm": 0.7890625, "learning_rate": 1.6047837436688264e-06, "loss": 3.9241, "step": 23836 }, { "epoch": 7.940451403348047, "grad_norm": 0.734375, "learning_rate": 1.604283391358838e-06, "loss": 4.0364, "step": 23837 }, { "epoch": 7.940784542350295, "grad_norm": 0.8125, "learning_rate": 1.603783107721177e-06, "loss": 4.0088, "step": 23838 }, { "epoch": 7.941117681352544, "grad_norm": 0.8046875, "learning_rate": 1.6032828927616727e-06, "loss": 3.9142, "step": 23839 }, { "epoch": 7.941450820354793, "grad_norm": 0.7890625, "learning_rate": 1.6027827464861503e-06, "loss": 3.9711, "step": 23840 }, { "epoch": 7.941783959357042, "grad_norm": 0.77734375, "learning_rate": 1.602282668900435e-06, "loss": 4.0083, "step": 23841 }, { "epoch": 7.94211709835929, "grad_norm": 0.7578125, "learning_rate": 1.6017826600103506e-06, "loss": 3.996, "step": 23842 }, { "epoch": 7.942450237361539, "grad_norm": 0.7890625, "learning_rate": 1.6012827198217248e-06, "loss": 3.9924, "step": 23843 }, { "epoch": 7.942783376363788, "grad_norm": 0.73828125, "learning_rate": 1.600782848340378e-06, "loss": 3.9563, "step": 23844 }, { "epoch": 7.943116515366037, "grad_norm": 0.74609375, "learning_rate": 1.600283045572134e-06, "loss": 3.9722, "step": 23845 }, { "epoch": 7.943449654368285, "grad_norm": 0.6953125, "learning_rate": 1.5997833115228123e-06, "loss": 4.0452, "step": 23846 }, { "epoch": 7.943782793370534, "grad_norm": 0.77734375, "learning_rate": 1.5992836461982363e-06, "loss": 3.9735, "step": 23847 }, { "epoch": 7.944115932372783, "grad_norm": 0.75390625, "learning_rate": 1.5987840496042266e-06, "loss": 3.9996, "step": 23848 }, { "epoch": 7.944449071375031, "grad_norm": 0.765625, "learning_rate": 1.5982845217465978e-06, "loss": 3.9497, "step": 23849 }, { "epoch": 7.94478221037728, "grad_norm": 0.78515625, "learning_rate": 1.5977850626311723e-06, "loss": 4.0045, "step": 23850 }, { "epoch": 7.945115349379528, "grad_norm": 0.796875, "learning_rate": 1.597285672263767e-06, "loss": 3.9795, "step": 23851 }, { "epoch": 7.9454484883817775, "grad_norm": 0.75390625, "learning_rate": 1.596786350650198e-06, "loss": 3.9682, "step": 23852 }, { "epoch": 7.945781627384026, "grad_norm": 0.76953125, "learning_rate": 1.5962870977962798e-06, "loss": 3.9408, "step": 23853 }, { "epoch": 7.946114766386275, "grad_norm": 0.8046875, "learning_rate": 1.5957879137078315e-06, "loss": 3.9541, "step": 23854 }, { "epoch": 7.946447905388523, "grad_norm": 0.77734375, "learning_rate": 1.595288798390666e-06, "loss": 3.9665, "step": 23855 }, { "epoch": 7.946781044390772, "grad_norm": 0.7421875, "learning_rate": 1.5947897518505964e-06, "loss": 3.9068, "step": 23856 }, { "epoch": 7.947114183393021, "grad_norm": 0.7890625, "learning_rate": 1.594290774093435e-06, "loss": 3.937, "step": 23857 }, { "epoch": 7.947447322395269, "grad_norm": 0.76171875, "learning_rate": 1.5937918651249945e-06, "loss": 4.062, "step": 23858 }, { "epoch": 7.947780461397518, "grad_norm": 0.796875, "learning_rate": 1.5932930249510862e-06, "loss": 4.0784, "step": 23859 }, { "epoch": 7.9481136003997666, "grad_norm": 0.78125, "learning_rate": 1.5927942535775177e-06, "loss": 3.9386, "step": 23860 }, { "epoch": 7.948446739402016, "grad_norm": 0.734375, "learning_rate": 1.5922955510101035e-06, "loss": 4.0147, "step": 23861 }, { "epoch": 7.948779878404264, "grad_norm": 0.7734375, "learning_rate": 1.5917969172546502e-06, "loss": 4.0017, "step": 23862 }, { "epoch": 7.949113017406513, "grad_norm": 0.7421875, "learning_rate": 1.5912983523169657e-06, "loss": 3.9987, "step": 23863 }, { "epoch": 7.9494461564087615, "grad_norm": 0.80078125, "learning_rate": 1.590799856202855e-06, "loss": 3.9185, "step": 23864 }, { "epoch": 7.949779295411011, "grad_norm": 0.77734375, "learning_rate": 1.590301428918129e-06, "loss": 3.9613, "step": 23865 }, { "epoch": 7.950112434413259, "grad_norm": 0.78515625, "learning_rate": 1.589803070468592e-06, "loss": 3.9758, "step": 23866 }, { "epoch": 7.950445573415507, "grad_norm": 0.84765625, "learning_rate": 1.5893047808600448e-06, "loss": 3.9451, "step": 23867 }, { "epoch": 7.9507787124177565, "grad_norm": 0.75, "learning_rate": 1.5888065600982956e-06, "loss": 3.9289, "step": 23868 }, { "epoch": 7.951111851420005, "grad_norm": 0.69921875, "learning_rate": 1.5883084081891468e-06, "loss": 3.9869, "step": 23869 }, { "epoch": 7.951444990422254, "grad_norm": 0.78515625, "learning_rate": 1.5878103251384005e-06, "loss": 3.9749, "step": 23870 }, { "epoch": 7.951778129424502, "grad_norm": 0.828125, "learning_rate": 1.5873123109518558e-06, "loss": 3.984, "step": 23871 }, { "epoch": 7.952111268426751, "grad_norm": 0.73828125, "learning_rate": 1.5868143656353176e-06, "loss": 3.9902, "step": 23872 }, { "epoch": 7.952444407429, "grad_norm": 0.6875, "learning_rate": 1.5863164891945844e-06, "loss": 4.0489, "step": 23873 }, { "epoch": 7.952777546431248, "grad_norm": 0.7578125, "learning_rate": 1.585818681635455e-06, "loss": 3.9386, "step": 23874 }, { "epoch": 7.953110685433497, "grad_norm": 0.78515625, "learning_rate": 1.585320942963728e-06, "loss": 3.9561, "step": 23875 }, { "epoch": 7.9534438244357455, "grad_norm": 0.796875, "learning_rate": 1.5848232731852013e-06, "loss": 3.955, "step": 23876 }, { "epoch": 7.953776963437995, "grad_norm": 0.79296875, "learning_rate": 1.5843256723056704e-06, "loss": 4.0277, "step": 23877 }, { "epoch": 7.954110102440243, "grad_norm": 0.78515625, "learning_rate": 1.583828140330933e-06, "loss": 4.0066, "step": 23878 }, { "epoch": 7.954443241442492, "grad_norm": 0.76171875, "learning_rate": 1.5833306772667802e-06, "loss": 3.9682, "step": 23879 }, { "epoch": 7.9547763804447404, "grad_norm": 0.7734375, "learning_rate": 1.5828332831190126e-06, "loss": 4.0568, "step": 23880 }, { "epoch": 7.95510951944699, "grad_norm": 0.7421875, "learning_rate": 1.5823359578934196e-06, "loss": 4.0325, "step": 23881 }, { "epoch": 7.955442658449238, "grad_norm": 0.78125, "learning_rate": 1.5818387015957957e-06, "loss": 3.9904, "step": 23882 }, { "epoch": 7.955775797451487, "grad_norm": 0.765625, "learning_rate": 1.58134151423193e-06, "loss": 3.9651, "step": 23883 }, { "epoch": 7.956108936453735, "grad_norm": 0.8046875, "learning_rate": 1.5808443958076205e-06, "loss": 3.9961, "step": 23884 }, { "epoch": 7.956442075455984, "grad_norm": 0.75, "learning_rate": 1.5803473463286504e-06, "loss": 4.0596, "step": 23885 }, { "epoch": 7.956775214458233, "grad_norm": 0.77734375, "learning_rate": 1.5798503658008095e-06, "loss": 4.0333, "step": 23886 }, { "epoch": 7.957108353460481, "grad_norm": 0.76171875, "learning_rate": 1.5793534542298913e-06, "loss": 3.9219, "step": 23887 }, { "epoch": 7.95744149246273, "grad_norm": 0.76171875, "learning_rate": 1.578856611621681e-06, "loss": 3.9328, "step": 23888 }, { "epoch": 7.957774631464979, "grad_norm": 0.7734375, "learning_rate": 1.5783598379819658e-06, "loss": 4.0162, "step": 23889 }, { "epoch": 7.958107770467228, "grad_norm": 0.76953125, "learning_rate": 1.577863133316531e-06, "loss": 3.9612, "step": 23890 }, { "epoch": 7.958440909469476, "grad_norm": 0.80859375, "learning_rate": 1.5773664976311647e-06, "loss": 3.938, "step": 23891 }, { "epoch": 7.958774048471724, "grad_norm": 0.79296875, "learning_rate": 1.5768699309316503e-06, "loss": 3.9982, "step": 23892 }, { "epoch": 7.959107187473974, "grad_norm": 0.78125, "learning_rate": 1.5763734332237716e-06, "loss": 3.9429, "step": 23893 }, { "epoch": 7.959440326476222, "grad_norm": 0.73828125, "learning_rate": 1.5758770045133126e-06, "loss": 3.9774, "step": 23894 }, { "epoch": 7.959773465478471, "grad_norm": 0.74609375, "learning_rate": 1.575380644806054e-06, "loss": 3.9716, "step": 23895 }, { "epoch": 7.960106604480719, "grad_norm": 0.74609375, "learning_rate": 1.574884354107779e-06, "loss": 3.9335, "step": 23896 }, { "epoch": 7.960439743482969, "grad_norm": 0.7265625, "learning_rate": 1.5743881324242653e-06, "loss": 3.9741, "step": 23897 }, { "epoch": 7.960772882485217, "grad_norm": 0.765625, "learning_rate": 1.5738919797612968e-06, "loss": 3.9502, "step": 23898 }, { "epoch": 7.961106021487465, "grad_norm": 0.7578125, "learning_rate": 1.5733958961246506e-06, "loss": 3.9857, "step": 23899 }, { "epoch": 7.961439160489714, "grad_norm": 0.796875, "learning_rate": 1.572899881520105e-06, "loss": 4.0041, "step": 23900 }, { "epoch": 7.961772299491963, "grad_norm": 0.7734375, "learning_rate": 1.5724039359534368e-06, "loss": 4.0669, "step": 23901 }, { "epoch": 7.962105438494212, "grad_norm": 0.78125, "learning_rate": 1.5719080594304266e-06, "loss": 4.029, "step": 23902 }, { "epoch": 7.96243857749646, "grad_norm": 0.79296875, "learning_rate": 1.5714122519568452e-06, "loss": 3.9381, "step": 23903 }, { "epoch": 7.962771716498709, "grad_norm": 0.73828125, "learning_rate": 1.5709165135384687e-06, "loss": 3.9742, "step": 23904 }, { "epoch": 7.963104855500958, "grad_norm": 0.7734375, "learning_rate": 1.5704208441810732e-06, "loss": 3.898, "step": 23905 }, { "epoch": 7.963437994503207, "grad_norm": 0.74609375, "learning_rate": 1.5699252438904328e-06, "loss": 3.9923, "step": 23906 }, { "epoch": 7.963771133505455, "grad_norm": 0.7578125, "learning_rate": 1.5694297126723182e-06, "loss": 3.9132, "step": 23907 }, { "epoch": 7.964104272507704, "grad_norm": 0.734375, "learning_rate": 1.5689342505324992e-06, "loss": 3.9537, "step": 23908 }, { "epoch": 7.9644374115099525, "grad_norm": 0.796875, "learning_rate": 1.568438857476752e-06, "loss": 4.1015, "step": 23909 }, { "epoch": 7.964770550512201, "grad_norm": 0.74609375, "learning_rate": 1.5679435335108444e-06, "loss": 4.0111, "step": 23910 }, { "epoch": 7.96510368951445, "grad_norm": 0.74609375, "learning_rate": 1.567448278640546e-06, "loss": 4.0046, "step": 23911 }, { "epoch": 7.965436828516698, "grad_norm": 0.76953125, "learning_rate": 1.566953092871625e-06, "loss": 3.9759, "step": 23912 }, { "epoch": 7.9657699675189475, "grad_norm": 0.78125, "learning_rate": 1.5664579762098496e-06, "loss": 3.9053, "step": 23913 }, { "epoch": 7.966103106521196, "grad_norm": 0.765625, "learning_rate": 1.5659629286609871e-06, "loss": 3.962, "step": 23914 }, { "epoch": 7.966436245523445, "grad_norm": 0.74609375, "learning_rate": 1.5654679502308013e-06, "loss": 3.9479, "step": 23915 }, { "epoch": 7.966769384525693, "grad_norm": 0.76171875, "learning_rate": 1.5649730409250609e-06, "loss": 4.0213, "step": 23916 }, { "epoch": 7.967102523527942, "grad_norm": 0.796875, "learning_rate": 1.5644782007495303e-06, "loss": 4.0554, "step": 23917 }, { "epoch": 7.967435662530191, "grad_norm": 0.75390625, "learning_rate": 1.5639834297099717e-06, "loss": 3.934, "step": 23918 }, { "epoch": 7.967768801532439, "grad_norm": 0.73828125, "learning_rate": 1.563488727812147e-06, "loss": 4.0147, "step": 23919 }, { "epoch": 7.968101940534688, "grad_norm": 0.7578125, "learning_rate": 1.5629940950618243e-06, "loss": 3.9963, "step": 23920 }, { "epoch": 7.9684350795369365, "grad_norm": 0.71484375, "learning_rate": 1.5624995314647589e-06, "loss": 3.9612, "step": 23921 }, { "epoch": 7.968768218539186, "grad_norm": 0.7421875, "learning_rate": 1.5620050370267129e-06, "loss": 4.037, "step": 23922 }, { "epoch": 7.969101357541434, "grad_norm": 0.74609375, "learning_rate": 1.5615106117534448e-06, "loss": 3.9672, "step": 23923 }, { "epoch": 7.969434496543683, "grad_norm": 0.78125, "learning_rate": 1.5610162556507176e-06, "loss": 3.9169, "step": 23924 }, { "epoch": 7.9697676355459315, "grad_norm": 0.78515625, "learning_rate": 1.5605219687242869e-06, "loss": 4.0049, "step": 23925 }, { "epoch": 7.970100774548181, "grad_norm": 0.7734375, "learning_rate": 1.5600277509799104e-06, "loss": 4.0386, "step": 23926 }, { "epoch": 7.970433913550429, "grad_norm": 0.78515625, "learning_rate": 1.5595336024233428e-06, "loss": 4.0449, "step": 23927 }, { "epoch": 7.970767052552677, "grad_norm": 0.77734375, "learning_rate": 1.5590395230603436e-06, "loss": 4.0293, "step": 23928 }, { "epoch": 7.971100191554926, "grad_norm": 0.765625, "learning_rate": 1.558545512896668e-06, "loss": 3.9922, "step": 23929 }, { "epoch": 7.971433330557175, "grad_norm": 0.76953125, "learning_rate": 1.5580515719380633e-06, "loss": 4.0153, "step": 23930 }, { "epoch": 7.971766469559424, "grad_norm": 0.78125, "learning_rate": 1.55755770019029e-06, "loss": 4.0641, "step": 23931 }, { "epoch": 7.972099608561672, "grad_norm": 0.7890625, "learning_rate": 1.557063897659098e-06, "loss": 3.9751, "step": 23932 }, { "epoch": 7.972432747563921, "grad_norm": 0.83203125, "learning_rate": 1.5565701643502401e-06, "loss": 3.971, "step": 23933 }, { "epoch": 7.97276588656617, "grad_norm": 0.87109375, "learning_rate": 1.5560765002694635e-06, "loss": 3.9451, "step": 23934 }, { "epoch": 7.973099025568418, "grad_norm": 0.80859375, "learning_rate": 1.5555829054225232e-06, "loss": 4.0613, "step": 23935 }, { "epoch": 7.973432164570667, "grad_norm": 0.8515625, "learning_rate": 1.555089379815167e-06, "loss": 3.9676, "step": 23936 }, { "epoch": 7.9737653035729155, "grad_norm": 0.76953125, "learning_rate": 1.5545959234531426e-06, "loss": 4.0012, "step": 23937 }, { "epoch": 7.974098442575165, "grad_norm": 0.76171875, "learning_rate": 1.5541025363421972e-06, "loss": 3.9118, "step": 23938 }, { "epoch": 7.974431581577413, "grad_norm": 0.76171875, "learning_rate": 1.5536092184880799e-06, "loss": 3.9888, "step": 23939 }, { "epoch": 7.974764720579662, "grad_norm": 0.78125, "learning_rate": 1.5531159698965338e-06, "loss": 3.9793, "step": 23940 }, { "epoch": 7.97509785958191, "grad_norm": 0.8046875, "learning_rate": 1.5526227905733048e-06, "loss": 3.9956, "step": 23941 }, { "epoch": 7.97543099858416, "grad_norm": 0.78125, "learning_rate": 1.5521296805241398e-06, "loss": 4.0328, "step": 23942 }, { "epoch": 7.975764137586408, "grad_norm": 0.734375, "learning_rate": 1.5516366397547815e-06, "loss": 3.971, "step": 23943 }, { "epoch": 7.976097276588657, "grad_norm": 0.73046875, "learning_rate": 1.5511436682709714e-06, "loss": 3.9529, "step": 23944 }, { "epoch": 7.976430415590905, "grad_norm": 0.76171875, "learning_rate": 1.5506507660784509e-06, "loss": 4.0123, "step": 23945 }, { "epoch": 7.976763554593154, "grad_norm": 0.765625, "learning_rate": 1.5501579331829648e-06, "loss": 3.9534, "step": 23946 }, { "epoch": 7.977096693595403, "grad_norm": 0.76953125, "learning_rate": 1.5496651695902534e-06, "loss": 3.9999, "step": 23947 }, { "epoch": 7.977429832597651, "grad_norm": 0.73828125, "learning_rate": 1.5491724753060499e-06, "loss": 3.9576, "step": 23948 }, { "epoch": 7.9777629715999, "grad_norm": 0.75, "learning_rate": 1.5486798503361e-06, "loss": 3.9675, "step": 23949 }, { "epoch": 7.978096110602149, "grad_norm": 0.79296875, "learning_rate": 1.5481872946861398e-06, "loss": 3.9952, "step": 23950 }, { "epoch": 7.978429249604398, "grad_norm": 0.765625, "learning_rate": 1.5476948083619046e-06, "loss": 3.9939, "step": 23951 }, { "epoch": 7.978762388606646, "grad_norm": 0.765625, "learning_rate": 1.547202391369132e-06, "loss": 4.0199, "step": 23952 }, { "epoch": 7.979095527608894, "grad_norm": 0.75390625, "learning_rate": 1.5467100437135584e-06, "loss": 4.0195, "step": 23953 }, { "epoch": 7.979428666611144, "grad_norm": 0.77734375, "learning_rate": 1.5462177654009188e-06, "loss": 4.0378, "step": 23954 }, { "epoch": 7.979761805613392, "grad_norm": 0.828125, "learning_rate": 1.545725556436946e-06, "loss": 3.9248, "step": 23955 }, { "epoch": 7.980094944615641, "grad_norm": 0.74609375, "learning_rate": 1.545233416827373e-06, "loss": 4.0228, "step": 23956 }, { "epoch": 7.980428083617889, "grad_norm": 0.75, "learning_rate": 1.5447413465779334e-06, "loss": 3.9401, "step": 23957 }, { "epoch": 7.9807612226201385, "grad_norm": 0.77734375, "learning_rate": 1.5442493456943577e-06, "loss": 3.959, "step": 23958 }, { "epoch": 7.981094361622387, "grad_norm": 0.765625, "learning_rate": 1.5437574141823753e-06, "loss": 3.9517, "step": 23959 }, { "epoch": 7.981427500624636, "grad_norm": 0.76171875, "learning_rate": 1.5432655520477196e-06, "loss": 3.9701, "step": 23960 }, { "epoch": 7.981760639626884, "grad_norm": 0.765625, "learning_rate": 1.542773759296118e-06, "loss": 3.9839, "step": 23961 }, { "epoch": 7.9820937786291335, "grad_norm": 0.75390625, "learning_rate": 1.5422820359332988e-06, "loss": 3.9183, "step": 23962 }, { "epoch": 7.982426917631382, "grad_norm": 0.7890625, "learning_rate": 1.5417903819649877e-06, "loss": 3.9621, "step": 23963 }, { "epoch": 7.98276005663363, "grad_norm": 0.76171875, "learning_rate": 1.5412987973969151e-06, "loss": 3.9903, "step": 23964 }, { "epoch": 7.983093195635879, "grad_norm": 0.76171875, "learning_rate": 1.5408072822348065e-06, "loss": 4.0146, "step": 23965 }, { "epoch": 7.983426334638128, "grad_norm": 0.80078125, "learning_rate": 1.5403158364843844e-06, "loss": 3.9977, "step": 23966 }, { "epoch": 7.983759473640377, "grad_norm": 0.78515625, "learning_rate": 1.5398244601513708e-06, "loss": 3.9613, "step": 23967 }, { "epoch": 7.984092612642625, "grad_norm": 0.76171875, "learning_rate": 1.5393331532414962e-06, "loss": 4.0365, "step": 23968 }, { "epoch": 7.984425751644874, "grad_norm": 0.76953125, "learning_rate": 1.5388419157604783e-06, "loss": 3.9324, "step": 23969 }, { "epoch": 7.9847588906471225, "grad_norm": 0.7890625, "learning_rate": 1.5383507477140402e-06, "loss": 3.9472, "step": 23970 }, { "epoch": 7.985092029649371, "grad_norm": 0.7734375, "learning_rate": 1.5378596491079014e-06, "loss": 4.0278, "step": 23971 }, { "epoch": 7.98542516865162, "grad_norm": 0.76953125, "learning_rate": 1.5373686199477857e-06, "loss": 4.0042, "step": 23972 }, { "epoch": 7.985758307653868, "grad_norm": 0.765625, "learning_rate": 1.536877660239411e-06, "loss": 3.9622, "step": 23973 }, { "epoch": 7.9860914466561175, "grad_norm": 0.72265625, "learning_rate": 1.5363867699884953e-06, "loss": 3.9675, "step": 23974 }, { "epoch": 7.986424585658366, "grad_norm": 0.80859375, "learning_rate": 1.5358959492007557e-06, "loss": 3.9576, "step": 23975 }, { "epoch": 7.986757724660615, "grad_norm": 0.80078125, "learning_rate": 1.5354051978819109e-06, "loss": 3.934, "step": 23976 }, { "epoch": 7.987090863662863, "grad_norm": 0.7890625, "learning_rate": 1.5349145160376763e-06, "loss": 3.9622, "step": 23977 }, { "epoch": 7.9874240026651115, "grad_norm": 0.73828125, "learning_rate": 1.5344239036737648e-06, "loss": 3.881, "step": 23978 }, { "epoch": 7.987757141667361, "grad_norm": 0.78515625, "learning_rate": 1.5339333607958952e-06, "loss": 3.9877, "step": 23979 }, { "epoch": 7.988090280669609, "grad_norm": 0.76171875, "learning_rate": 1.5334428874097798e-06, "loss": 3.999, "step": 23980 }, { "epoch": 7.988423419671858, "grad_norm": 0.7421875, "learning_rate": 1.5329524835211312e-06, "loss": 3.9054, "step": 23981 }, { "epoch": 7.9887565586741065, "grad_norm": 0.8046875, "learning_rate": 1.53246214913566e-06, "loss": 3.9997, "step": 23982 }, { "epoch": 7.989089697676356, "grad_norm": 0.796875, "learning_rate": 1.5319718842590824e-06, "loss": 4.0138, "step": 23983 }, { "epoch": 7.989422836678604, "grad_norm": 0.7578125, "learning_rate": 1.531481688897104e-06, "loss": 3.9463, "step": 23984 }, { "epoch": 7.989755975680853, "grad_norm": 0.7265625, "learning_rate": 1.530991563055434e-06, "loss": 3.9148, "step": 23985 }, { "epoch": 7.9900891146831015, "grad_norm": 0.75, "learning_rate": 1.5305015067397852e-06, "loss": 3.9474, "step": 23986 }, { "epoch": 7.990422253685351, "grad_norm": 0.796875, "learning_rate": 1.5300115199558647e-06, "loss": 3.9673, "step": 23987 }, { "epoch": 7.990755392687599, "grad_norm": 0.76171875, "learning_rate": 1.5295216027093786e-06, "loss": 3.9937, "step": 23988 }, { "epoch": 7.991088531689847, "grad_norm": 0.796875, "learning_rate": 1.5290317550060313e-06, "loss": 3.9164, "step": 23989 }, { "epoch": 7.991421670692096, "grad_norm": 0.8125, "learning_rate": 1.5285419768515337e-06, "loss": 4.0107, "step": 23990 }, { "epoch": 7.991754809694345, "grad_norm": 0.72265625, "learning_rate": 1.5280522682515877e-06, "loss": 3.9678, "step": 23991 }, { "epoch": 7.992087948696594, "grad_norm": 0.73828125, "learning_rate": 1.5275626292118974e-06, "loss": 3.9956, "step": 23992 }, { "epoch": 7.992421087698842, "grad_norm": 0.75390625, "learning_rate": 1.527073059738166e-06, "loss": 3.9985, "step": 23993 }, { "epoch": 7.992754226701091, "grad_norm": 0.7890625, "learning_rate": 1.5265835598360964e-06, "loss": 3.9208, "step": 23994 }, { "epoch": 7.99308736570334, "grad_norm": 0.75390625, "learning_rate": 1.5260941295113899e-06, "loss": 3.9934, "step": 23995 }, { "epoch": 7.993420504705588, "grad_norm": 0.7890625, "learning_rate": 1.525604768769745e-06, "loss": 3.954, "step": 23996 }, { "epoch": 7.993753643707837, "grad_norm": 0.7734375, "learning_rate": 1.5251154776168665e-06, "loss": 4.0191, "step": 23997 }, { "epoch": 7.994086782710085, "grad_norm": 0.78515625, "learning_rate": 1.524626256058451e-06, "loss": 3.9722, "step": 23998 }, { "epoch": 7.994419921712335, "grad_norm": 0.75390625, "learning_rate": 1.5241371041001975e-06, "loss": 3.9436, "step": 23999 }, { "epoch": 7.994753060714583, "grad_norm": 0.734375, "learning_rate": 1.5236480217478002e-06, "loss": 4.0795, "step": 24000 }, { "epoch": 7.995086199716832, "grad_norm": 0.7734375, "learning_rate": 1.5231590090069628e-06, "loss": 3.9961, "step": 24001 }, { "epoch": 7.99541933871908, "grad_norm": 0.75390625, "learning_rate": 1.5226700658833755e-06, "loss": 4.0569, "step": 24002 }, { "epoch": 7.99575247772133, "grad_norm": 0.7734375, "learning_rate": 1.5221811923827324e-06, "loss": 3.9405, "step": 24003 }, { "epoch": 7.996085616723578, "grad_norm": 0.765625, "learning_rate": 1.521692388510733e-06, "loss": 4.0558, "step": 24004 }, { "epoch": 7.996418755725827, "grad_norm": 0.734375, "learning_rate": 1.5212036542730687e-06, "loss": 3.954, "step": 24005 }, { "epoch": 7.996751894728075, "grad_norm": 0.73046875, "learning_rate": 1.5207149896754312e-06, "loss": 3.9992, "step": 24006 }, { "epoch": 7.997085033730324, "grad_norm": 0.796875, "learning_rate": 1.5202263947235107e-06, "loss": 4.0293, "step": 24007 }, { "epoch": 7.997418172732573, "grad_norm": 0.75, "learning_rate": 1.519737869423003e-06, "loss": 3.9963, "step": 24008 }, { "epoch": 7.997751311734821, "grad_norm": 0.7578125, "learning_rate": 1.5192494137795962e-06, "loss": 4.006, "step": 24009 }, { "epoch": 7.99808445073707, "grad_norm": 0.75, "learning_rate": 1.5187610277989807e-06, "loss": 4.0355, "step": 24010 }, { "epoch": 7.998417589739319, "grad_norm": 0.7734375, "learning_rate": 1.5182727114868406e-06, "loss": 3.9645, "step": 24011 }, { "epoch": 7.998750728741568, "grad_norm": 0.7578125, "learning_rate": 1.5177844648488683e-06, "loss": 4.1101, "step": 24012 }, { "epoch": 7.999083867743816, "grad_norm": 0.78125, "learning_rate": 1.5172962878907498e-06, "loss": 4.0051, "step": 24013 }, { "epoch": 7.999417006746064, "grad_norm": 0.76171875, "learning_rate": 1.516808180618171e-06, "loss": 3.9222, "step": 24014 }, { "epoch": 7.999750145748314, "grad_norm": 0.76171875, "learning_rate": 1.5163201430368151e-06, "loss": 3.9358, "step": 24015 }, { "epoch": 8.0, "grad_norm": 0.8359375, "learning_rate": 1.5158321751523716e-06, "loss": 3.9838, "step": 24016 }, { "epoch": 8.00033313900225, "grad_norm": 0.7578125, "learning_rate": 1.5153442769705208e-06, "loss": 4.0288, "step": 24017 }, { "epoch": 8.000666278004497, "grad_norm": 0.74609375, "learning_rate": 1.514856448496947e-06, "loss": 4.0164, "step": 24018 }, { "epoch": 8.000999417006746, "grad_norm": 0.7734375, "learning_rate": 1.5143686897373315e-06, "loss": 4.0015, "step": 24019 }, { "epoch": 8.001332556008995, "grad_norm": 0.7265625, "learning_rate": 1.5138810006973564e-06, "loss": 4.0452, "step": 24020 }, { "epoch": 8.001665695011244, "grad_norm": 0.76171875, "learning_rate": 1.513393381382701e-06, "loss": 3.9626, "step": 24021 }, { "epoch": 8.001998834013492, "grad_norm": 0.7578125, "learning_rate": 1.5129058317990437e-06, "loss": 4.0259, "step": 24022 }, { "epoch": 8.00233197301574, "grad_norm": 0.80859375, "learning_rate": 1.512418351952068e-06, "loss": 3.9781, "step": 24023 }, { "epoch": 8.00266511201799, "grad_norm": 0.77734375, "learning_rate": 1.5119309418474495e-06, "loss": 3.9014, "step": 24024 }, { "epoch": 8.002998251020237, "grad_norm": 0.765625, "learning_rate": 1.5114436014908648e-06, "loss": 3.9228, "step": 24025 }, { "epoch": 8.003331390022487, "grad_norm": 0.75390625, "learning_rate": 1.5109563308879895e-06, "loss": 4.0622, "step": 24026 }, { "epoch": 8.003664529024736, "grad_norm": 0.79296875, "learning_rate": 1.5104691300445042e-06, "loss": 4.0621, "step": 24027 }, { "epoch": 8.003997668026985, "grad_norm": 0.796875, "learning_rate": 1.5099819989660784e-06, "loss": 4.0153, "step": 24028 }, { "epoch": 8.004330807029232, "grad_norm": 0.80078125, "learning_rate": 1.509494937658386e-06, "loss": 4.0065, "step": 24029 }, { "epoch": 8.004663946031481, "grad_norm": 0.7890625, "learning_rate": 1.5090079461271042e-06, "loss": 4.015, "step": 24030 }, { "epoch": 8.00499708503373, "grad_norm": 0.73828125, "learning_rate": 1.508521024377904e-06, "loss": 3.9461, "step": 24031 }, { "epoch": 8.00533022403598, "grad_norm": 0.76171875, "learning_rate": 1.5080341724164553e-06, "loss": 3.954, "step": 24032 }, { "epoch": 8.005663363038227, "grad_norm": 0.71875, "learning_rate": 1.5075473902484285e-06, "loss": 3.9764, "step": 24033 }, { "epoch": 8.005996502040476, "grad_norm": 0.796875, "learning_rate": 1.5070606778794965e-06, "loss": 3.9624, "step": 24034 }, { "epoch": 8.006329641042726, "grad_norm": 0.74609375, "learning_rate": 1.5065740353153273e-06, "loss": 3.9645, "step": 24035 }, { "epoch": 8.006662780044973, "grad_norm": 0.75390625, "learning_rate": 1.5060874625615889e-06, "loss": 3.9825, "step": 24036 }, { "epoch": 8.006995919047222, "grad_norm": 0.75, "learning_rate": 1.5056009596239484e-06, "loss": 3.8982, "step": 24037 }, { "epoch": 8.007329058049471, "grad_norm": 0.81640625, "learning_rate": 1.5051145265080738e-06, "loss": 4.0701, "step": 24038 }, { "epoch": 8.00766219705172, "grad_norm": 0.7265625, "learning_rate": 1.5046281632196288e-06, "loss": 4.0031, "step": 24039 }, { "epoch": 8.007995336053968, "grad_norm": 0.78515625, "learning_rate": 1.5041418697642781e-06, "loss": 3.9349, "step": 24040 }, { "epoch": 8.008328475056217, "grad_norm": 0.7734375, "learning_rate": 1.5036556461476905e-06, "loss": 3.8964, "step": 24041 }, { "epoch": 8.008661614058466, "grad_norm": 0.76953125, "learning_rate": 1.5031694923755256e-06, "loss": 3.9636, "step": 24042 }, { "epoch": 8.008994753060714, "grad_norm": 0.7734375, "learning_rate": 1.5026834084534474e-06, "loss": 3.9807, "step": 24043 }, { "epoch": 8.009327892062963, "grad_norm": 0.74609375, "learning_rate": 1.5021973943871153e-06, "loss": 3.9528, "step": 24044 }, { "epoch": 8.009661031065212, "grad_norm": 0.7890625, "learning_rate": 1.5017114501821957e-06, "loss": 4.0351, "step": 24045 }, { "epoch": 8.009994170067461, "grad_norm": 0.76171875, "learning_rate": 1.501225575844344e-06, "loss": 4.0298, "step": 24046 }, { "epoch": 8.010327309069709, "grad_norm": 0.78125, "learning_rate": 1.5007397713792192e-06, "loss": 4.0253, "step": 24047 }, { "epoch": 8.010660448071958, "grad_norm": 0.7421875, "learning_rate": 1.5002540367924834e-06, "loss": 4.0202, "step": 24048 }, { "epoch": 8.010993587074207, "grad_norm": 0.7265625, "learning_rate": 1.4997683720897934e-06, "loss": 4.0335, "step": 24049 }, { "epoch": 8.011326726076456, "grad_norm": 0.73828125, "learning_rate": 1.499282777276804e-06, "loss": 3.9805, "step": 24050 }, { "epoch": 8.011659865078704, "grad_norm": 0.76171875, "learning_rate": 1.498797252359172e-06, "loss": 3.9136, "step": 24051 }, { "epoch": 8.011993004080953, "grad_norm": 0.76171875, "learning_rate": 1.4983117973425548e-06, "loss": 4.0473, "step": 24052 }, { "epoch": 8.012326143083202, "grad_norm": 0.86328125, "learning_rate": 1.4978264122326054e-06, "loss": 4.0058, "step": 24053 }, { "epoch": 8.01265928208545, "grad_norm": 0.76953125, "learning_rate": 1.4973410970349797e-06, "loss": 3.9656, "step": 24054 }, { "epoch": 8.012992421087699, "grad_norm": 0.7734375, "learning_rate": 1.4968558517553243e-06, "loss": 3.9807, "step": 24055 }, { "epoch": 8.013325560089948, "grad_norm": 0.7890625, "learning_rate": 1.4963706763992976e-06, "loss": 3.9079, "step": 24056 }, { "epoch": 8.013658699092197, "grad_norm": 0.76171875, "learning_rate": 1.4958855709725492e-06, "loss": 3.9793, "step": 24057 }, { "epoch": 8.013991838094444, "grad_norm": 0.78515625, "learning_rate": 1.4954005354807285e-06, "loss": 3.9285, "step": 24058 }, { "epoch": 8.014324977096694, "grad_norm": 0.78515625, "learning_rate": 1.4949155699294837e-06, "loss": 3.9336, "step": 24059 }, { "epoch": 8.014658116098943, "grad_norm": 0.73046875, "learning_rate": 1.4944306743244675e-06, "loss": 4.0015, "step": 24060 }, { "epoch": 8.01499125510119, "grad_norm": 0.765625, "learning_rate": 1.4939458486713258e-06, "loss": 3.9559, "step": 24061 }, { "epoch": 8.01532439410344, "grad_norm": 0.74609375, "learning_rate": 1.4934610929757064e-06, "loss": 3.9967, "step": 24062 }, { "epoch": 8.015657533105689, "grad_norm": 0.75390625, "learning_rate": 1.4929764072432547e-06, "loss": 3.9425, "step": 24063 }, { "epoch": 8.015990672107938, "grad_norm": 0.7421875, "learning_rate": 1.492491791479617e-06, "loss": 3.9761, "step": 24064 }, { "epoch": 8.016323811110185, "grad_norm": 0.7578125, "learning_rate": 1.4920072456904373e-06, "loss": 3.946, "step": 24065 }, { "epoch": 8.016656950112434, "grad_norm": 0.734375, "learning_rate": 1.4915227698813576e-06, "loss": 3.9282, "step": 24066 }, { "epoch": 8.016990089114683, "grad_norm": 0.77734375, "learning_rate": 1.491038364058026e-06, "loss": 3.9543, "step": 24067 }, { "epoch": 8.017323228116933, "grad_norm": 0.76953125, "learning_rate": 1.4905540282260812e-06, "loss": 3.9538, "step": 24068 }, { "epoch": 8.01765636711918, "grad_norm": 0.7578125, "learning_rate": 1.4900697623911663e-06, "loss": 3.9303, "step": 24069 }, { "epoch": 8.01798950612143, "grad_norm": 0.75390625, "learning_rate": 1.4895855665589192e-06, "loss": 3.9794, "step": 24070 }, { "epoch": 8.018322645123678, "grad_norm": 0.78125, "learning_rate": 1.4891014407349838e-06, "loss": 4.0132, "step": 24071 }, { "epoch": 8.018655784125926, "grad_norm": 0.75390625, "learning_rate": 1.4886173849249987e-06, "loss": 3.9527, "step": 24072 }, { "epoch": 8.018988923128175, "grad_norm": 0.73046875, "learning_rate": 1.4881333991345964e-06, "loss": 3.9958, "step": 24073 }, { "epoch": 8.019322062130424, "grad_norm": 0.78125, "learning_rate": 1.4876494833694205e-06, "loss": 3.9998, "step": 24074 }, { "epoch": 8.019655201132673, "grad_norm": 0.77734375, "learning_rate": 1.4871656376351048e-06, "loss": 3.945, "step": 24075 }, { "epoch": 8.01998834013492, "grad_norm": 0.75, "learning_rate": 1.4866818619372856e-06, "loss": 3.9359, "step": 24076 }, { "epoch": 8.02032147913717, "grad_norm": 0.71875, "learning_rate": 1.486198156281597e-06, "loss": 4.0044, "step": 24077 }, { "epoch": 8.02065461813942, "grad_norm": 0.77734375, "learning_rate": 1.4857145206736753e-06, "loss": 4.0235, "step": 24078 }, { "epoch": 8.020987757141667, "grad_norm": 0.75, "learning_rate": 1.4852309551191523e-06, "loss": 4.0291, "step": 24079 }, { "epoch": 8.021320896143916, "grad_norm": 0.734375, "learning_rate": 1.4847474596236613e-06, "loss": 3.9996, "step": 24080 }, { "epoch": 8.021654035146165, "grad_norm": 0.76171875, "learning_rate": 1.4842640341928335e-06, "loss": 4.0182, "step": 24081 }, { "epoch": 8.021987174148414, "grad_norm": 0.74609375, "learning_rate": 1.4837806788322985e-06, "loss": 3.9641, "step": 24082 }, { "epoch": 8.022320313150662, "grad_norm": 0.80078125, "learning_rate": 1.4832973935476888e-06, "loss": 3.9705, "step": 24083 }, { "epoch": 8.02265345215291, "grad_norm": 0.77734375, "learning_rate": 1.4828141783446295e-06, "loss": 3.9293, "step": 24084 }, { "epoch": 8.02298659115516, "grad_norm": 0.7734375, "learning_rate": 1.4823310332287537e-06, "loss": 3.9175, "step": 24085 }, { "epoch": 8.023319730157407, "grad_norm": 0.71875, "learning_rate": 1.4818479582056878e-06, "loss": 4.0144, "step": 24086 }, { "epoch": 8.023652869159656, "grad_norm": 0.7734375, "learning_rate": 1.4813649532810575e-06, "loss": 3.9725, "step": 24087 }, { "epoch": 8.023986008161906, "grad_norm": 0.71484375, "learning_rate": 1.480882018460487e-06, "loss": 3.9394, "step": 24088 }, { "epoch": 8.024319147164155, "grad_norm": 0.78515625, "learning_rate": 1.4803991537496056e-06, "loss": 4.0734, "step": 24089 }, { "epoch": 8.024652286166402, "grad_norm": 0.74609375, "learning_rate": 1.4799163591540384e-06, "loss": 4.0301, "step": 24090 }, { "epoch": 8.024985425168651, "grad_norm": 0.8046875, "learning_rate": 1.4794336346794013e-06, "loss": 3.9849, "step": 24091 }, { "epoch": 8.0253185641709, "grad_norm": 0.75390625, "learning_rate": 1.4789509803313237e-06, "loss": 3.9569, "step": 24092 }, { "epoch": 8.02565170317315, "grad_norm": 0.78125, "learning_rate": 1.478468396115426e-06, "loss": 3.9855, "step": 24093 }, { "epoch": 8.025984842175397, "grad_norm": 0.75, "learning_rate": 1.477985882037329e-06, "loss": 4.029, "step": 24094 }, { "epoch": 8.026317981177646, "grad_norm": 0.8046875, "learning_rate": 1.4775034381026507e-06, "loss": 3.9822, "step": 24095 }, { "epoch": 8.026651120179896, "grad_norm": 0.73828125, "learning_rate": 1.4770210643170145e-06, "loss": 4.0018, "step": 24096 }, { "epoch": 8.026984259182143, "grad_norm": 0.7421875, "learning_rate": 1.4765387606860374e-06, "loss": 3.9551, "step": 24097 }, { "epoch": 8.027317398184392, "grad_norm": 0.7890625, "learning_rate": 1.4760565272153366e-06, "loss": 3.9312, "step": 24098 }, { "epoch": 8.027650537186641, "grad_norm": 0.75, "learning_rate": 1.47557436391053e-06, "loss": 3.8949, "step": 24099 }, { "epoch": 8.02798367618889, "grad_norm": 0.765625, "learning_rate": 1.475092270777233e-06, "loss": 4.0274, "step": 24100 }, { "epoch": 8.028316815191138, "grad_norm": 0.76953125, "learning_rate": 1.4746102478210608e-06, "loss": 3.9811, "step": 24101 }, { "epoch": 8.028649954193387, "grad_norm": 0.7578125, "learning_rate": 1.474128295047628e-06, "loss": 4.0557, "step": 24102 }, { "epoch": 8.028983093195636, "grad_norm": 0.76953125, "learning_rate": 1.4736464124625473e-06, "loss": 3.9243, "step": 24103 }, { "epoch": 8.029316232197884, "grad_norm": 0.8046875, "learning_rate": 1.4731646000714337e-06, "loss": 3.976, "step": 24104 }, { "epoch": 8.029649371200133, "grad_norm": 0.76171875, "learning_rate": 1.4726828578798998e-06, "loss": 4.0555, "step": 24105 }, { "epoch": 8.029982510202382, "grad_norm": 0.74609375, "learning_rate": 1.472201185893555e-06, "loss": 3.9943, "step": 24106 }, { "epoch": 8.030315649204631, "grad_norm": 0.796875, "learning_rate": 1.4717195841180073e-06, "loss": 3.9484, "step": 24107 }, { "epoch": 8.030648788206879, "grad_norm": 0.7578125, "learning_rate": 1.471238052558874e-06, "loss": 3.9447, "step": 24108 }, { "epoch": 8.030981927209128, "grad_norm": 0.74609375, "learning_rate": 1.4707565912217572e-06, "loss": 3.9439, "step": 24109 }, { "epoch": 8.031315066211377, "grad_norm": 0.7734375, "learning_rate": 1.4702752001122649e-06, "loss": 3.9611, "step": 24110 }, { "epoch": 8.031648205213626, "grad_norm": 0.734375, "learning_rate": 1.4697938792360078e-06, "loss": 4.0595, "step": 24111 }, { "epoch": 8.031981344215874, "grad_norm": 0.7421875, "learning_rate": 1.4693126285985903e-06, "loss": 3.9463, "step": 24112 }, { "epoch": 8.032314483218123, "grad_norm": 0.734375, "learning_rate": 1.468831448205619e-06, "loss": 3.9787, "step": 24113 }, { "epoch": 8.032647622220372, "grad_norm": 0.75390625, "learning_rate": 1.468350338062695e-06, "loss": 3.9863, "step": 24114 }, { "epoch": 8.03298076122262, "grad_norm": 0.7734375, "learning_rate": 1.4678692981754275e-06, "loss": 3.9519, "step": 24115 }, { "epoch": 8.033313900224869, "grad_norm": 0.84765625, "learning_rate": 1.4673883285494175e-06, "loss": 4.0112, "step": 24116 }, { "epoch": 8.033647039227118, "grad_norm": 0.765625, "learning_rate": 1.4669074291902665e-06, "loss": 4.0376, "step": 24117 }, { "epoch": 8.033980178229367, "grad_norm": 0.8046875, "learning_rate": 1.4664266001035758e-06, "loss": 3.9852, "step": 24118 }, { "epoch": 8.034313317231614, "grad_norm": 0.78125, "learning_rate": 1.465945841294947e-06, "loss": 3.958, "step": 24119 }, { "epoch": 8.034646456233864, "grad_norm": 0.74609375, "learning_rate": 1.4654651527699794e-06, "loss": 3.9827, "step": 24120 }, { "epoch": 8.034979595236113, "grad_norm": 0.79296875, "learning_rate": 1.4649845345342696e-06, "loss": 4.0056, "step": 24121 }, { "epoch": 8.03531273423836, "grad_norm": 0.77734375, "learning_rate": 1.4645039865934207e-06, "loss": 3.9739, "step": 24122 }, { "epoch": 8.03564587324061, "grad_norm": 0.80078125, "learning_rate": 1.464023508953027e-06, "loss": 3.9799, "step": 24123 }, { "epoch": 8.035979012242858, "grad_norm": 0.765625, "learning_rate": 1.4635431016186862e-06, "loss": 3.9463, "step": 24124 }, { "epoch": 8.036312151245108, "grad_norm": 0.7265625, "learning_rate": 1.463062764595991e-06, "loss": 4.0176, "step": 24125 }, { "epoch": 8.036645290247355, "grad_norm": 0.79296875, "learning_rate": 1.462582497890542e-06, "loss": 3.9611, "step": 24126 }, { "epoch": 8.036978429249604, "grad_norm": 0.6875, "learning_rate": 1.4621023015079287e-06, "loss": 4.0322, "step": 24127 }, { "epoch": 8.037311568251853, "grad_norm": 0.765625, "learning_rate": 1.4616221754537428e-06, "loss": 3.9798, "step": 24128 }, { "epoch": 8.037644707254103, "grad_norm": 0.78125, "learning_rate": 1.4611421197335823e-06, "loss": 3.9829, "step": 24129 }, { "epoch": 8.03797784625635, "grad_norm": 0.73828125, "learning_rate": 1.4606621343530358e-06, "loss": 3.8874, "step": 24130 }, { "epoch": 8.0383109852586, "grad_norm": 0.6953125, "learning_rate": 1.4601822193176939e-06, "loss": 3.9628, "step": 24131 }, { "epoch": 8.038644124260848, "grad_norm": 0.75, "learning_rate": 1.4597023746331448e-06, "loss": 3.9744, "step": 24132 }, { "epoch": 8.038977263263096, "grad_norm": 0.78515625, "learning_rate": 1.4592226003049822e-06, "loss": 3.9856, "step": 24133 }, { "epoch": 8.039310402265345, "grad_norm": 0.734375, "learning_rate": 1.4587428963387917e-06, "loss": 4.0429, "step": 24134 }, { "epoch": 8.039643541267594, "grad_norm": 0.7265625, "learning_rate": 1.4582632627401613e-06, "loss": 3.979, "step": 24135 }, { "epoch": 8.039976680269843, "grad_norm": 0.73828125, "learning_rate": 1.4577836995146772e-06, "loss": 3.989, "step": 24136 }, { "epoch": 8.04030981927209, "grad_norm": 0.71875, "learning_rate": 1.4573042066679253e-06, "loss": 4.0316, "step": 24137 }, { "epoch": 8.04064295827434, "grad_norm": 0.76171875, "learning_rate": 1.4568247842054918e-06, "loss": 3.8938, "step": 24138 }, { "epoch": 8.04097609727659, "grad_norm": 0.78515625, "learning_rate": 1.4563454321329581e-06, "loss": 4.045, "step": 24139 }, { "epoch": 8.041309236278837, "grad_norm": 0.7734375, "learning_rate": 1.4558661504559112e-06, "loss": 3.9945, "step": 24140 }, { "epoch": 8.041642375281086, "grad_norm": 0.76171875, "learning_rate": 1.4553869391799318e-06, "loss": 3.9873, "step": 24141 }, { "epoch": 8.041975514283335, "grad_norm": 0.78515625, "learning_rate": 1.454907798310602e-06, "loss": 3.9772, "step": 24142 }, { "epoch": 8.042308653285584, "grad_norm": 0.75, "learning_rate": 1.454428727853501e-06, "loss": 3.9498, "step": 24143 }, { "epoch": 8.042641792287831, "grad_norm": 0.75, "learning_rate": 1.4539497278142147e-06, "loss": 3.9441, "step": 24144 }, { "epoch": 8.04297493129008, "grad_norm": 0.75, "learning_rate": 1.4534707981983155e-06, "loss": 3.9656, "step": 24145 }, { "epoch": 8.04330807029233, "grad_norm": 0.76953125, "learning_rate": 1.4529919390113858e-06, "loss": 4.0321, "step": 24146 }, { "epoch": 8.043641209294579, "grad_norm": 0.734375, "learning_rate": 1.452513150259e-06, "loss": 3.9917, "step": 24147 }, { "epoch": 8.043974348296826, "grad_norm": 0.73828125, "learning_rate": 1.4520344319467388e-06, "loss": 4.0401, "step": 24148 }, { "epoch": 8.044307487299076, "grad_norm": 0.734375, "learning_rate": 1.4515557840801769e-06, "loss": 3.9862, "step": 24149 }, { "epoch": 8.044640626301325, "grad_norm": 0.796875, "learning_rate": 1.451077206664889e-06, "loss": 3.9422, "step": 24150 }, { "epoch": 8.044973765303572, "grad_norm": 0.80078125, "learning_rate": 1.450598699706448e-06, "loss": 4.0065, "step": 24151 }, { "epoch": 8.045306904305821, "grad_norm": 0.7890625, "learning_rate": 1.4501202632104305e-06, "loss": 3.9145, "step": 24152 }, { "epoch": 8.04564004330807, "grad_norm": 0.73046875, "learning_rate": 1.449641897182411e-06, "loss": 4.0228, "step": 24153 }, { "epoch": 8.04597318231032, "grad_norm": 0.7734375, "learning_rate": 1.4491636016279536e-06, "loss": 3.9694, "step": 24154 }, { "epoch": 8.046306321312567, "grad_norm": 0.76953125, "learning_rate": 1.4486853765526362e-06, "loss": 3.9714, "step": 24155 }, { "epoch": 8.046639460314816, "grad_norm": 0.79296875, "learning_rate": 1.4482072219620268e-06, "loss": 3.9865, "step": 24156 }, { "epoch": 8.046972599317066, "grad_norm": 0.77734375, "learning_rate": 1.4477291378616954e-06, "loss": 3.9727, "step": 24157 }, { "epoch": 8.047305738319313, "grad_norm": 0.76171875, "learning_rate": 1.4472511242572086e-06, "loss": 4.0329, "step": 24158 }, { "epoch": 8.047638877321562, "grad_norm": 0.7578125, "learning_rate": 1.4467731811541376e-06, "loss": 3.9496, "step": 24159 }, { "epoch": 8.047972016323811, "grad_norm": 0.73828125, "learning_rate": 1.4462953085580477e-06, "loss": 3.9047, "step": 24160 }, { "epoch": 8.04830515532606, "grad_norm": 0.75390625, "learning_rate": 1.4458175064745061e-06, "loss": 3.9713, "step": 24161 }, { "epoch": 8.048638294328308, "grad_norm": 0.74609375, "learning_rate": 1.4453397749090758e-06, "loss": 3.9163, "step": 24162 }, { "epoch": 8.048971433330557, "grad_norm": 0.77734375, "learning_rate": 1.444862113867324e-06, "loss": 4.0518, "step": 24163 }, { "epoch": 8.049304572332806, "grad_norm": 0.78125, "learning_rate": 1.4443845233548123e-06, "loss": 3.8983, "step": 24164 }, { "epoch": 8.049637711335054, "grad_norm": 0.7890625, "learning_rate": 1.4439070033771026e-06, "loss": 3.9758, "step": 24165 }, { "epoch": 8.049970850337303, "grad_norm": 0.76171875, "learning_rate": 1.4434295539397608e-06, "loss": 3.9427, "step": 24166 }, { "epoch": 8.050303989339552, "grad_norm": 0.74609375, "learning_rate": 1.4429521750483464e-06, "loss": 4.0374, "step": 24167 }, { "epoch": 8.050637128341801, "grad_norm": 0.765625, "learning_rate": 1.4424748667084199e-06, "loss": 3.9927, "step": 24168 }, { "epoch": 8.050970267344049, "grad_norm": 0.76953125, "learning_rate": 1.4419976289255385e-06, "loss": 4.0228, "step": 24169 }, { "epoch": 8.051303406346298, "grad_norm": 0.765625, "learning_rate": 1.4415204617052647e-06, "loss": 3.9356, "step": 24170 }, { "epoch": 8.051636545348547, "grad_norm": 0.74609375, "learning_rate": 1.4410433650531572e-06, "loss": 4.013, "step": 24171 }, { "epoch": 8.051969684350796, "grad_norm": 0.78125, "learning_rate": 1.4405663389747665e-06, "loss": 3.9673, "step": 24172 }, { "epoch": 8.052302823353044, "grad_norm": 0.83203125, "learning_rate": 1.4400893834756557e-06, "loss": 3.9367, "step": 24173 }, { "epoch": 8.052635962355293, "grad_norm": 0.8125, "learning_rate": 1.439612498561377e-06, "loss": 3.9537, "step": 24174 }, { "epoch": 8.052969101357542, "grad_norm": 0.77734375, "learning_rate": 1.439135684237487e-06, "loss": 4.0422, "step": 24175 }, { "epoch": 8.05330224035979, "grad_norm": 0.73046875, "learning_rate": 1.438658940509536e-06, "loss": 3.9739, "step": 24176 }, { "epoch": 8.053635379362039, "grad_norm": 0.7578125, "learning_rate": 1.4381822673830812e-06, "loss": 4.0153, "step": 24177 }, { "epoch": 8.053968518364288, "grad_norm": 0.76171875, "learning_rate": 1.4377056648636736e-06, "loss": 3.9834, "step": 24178 }, { "epoch": 8.054301657366537, "grad_norm": 0.7734375, "learning_rate": 1.437229132956864e-06, "loss": 4.0347, "step": 24179 }, { "epoch": 8.054634796368784, "grad_norm": 0.765625, "learning_rate": 1.4367526716682026e-06, "loss": 3.9579, "step": 24180 }, { "epoch": 8.054967935371034, "grad_norm": 0.75390625, "learning_rate": 1.4362762810032403e-06, "loss": 3.9639, "step": 24181 }, { "epoch": 8.055301074373283, "grad_norm": 0.74609375, "learning_rate": 1.4357999609675248e-06, "loss": 3.9347, "step": 24182 }, { "epoch": 8.05563421337553, "grad_norm": 0.74609375, "learning_rate": 1.4353237115666032e-06, "loss": 3.9598, "step": 24183 }, { "epoch": 8.05596735237778, "grad_norm": 0.7734375, "learning_rate": 1.434847532806026e-06, "loss": 3.9428, "step": 24184 }, { "epoch": 8.056300491380028, "grad_norm": 0.7890625, "learning_rate": 1.4343714246913378e-06, "loss": 3.9625, "step": 24185 }, { "epoch": 8.056633630382278, "grad_norm": 0.7734375, "learning_rate": 1.4338953872280844e-06, "loss": 4.0021, "step": 24186 }, { "epoch": 8.056966769384525, "grad_norm": 0.83203125, "learning_rate": 1.4334194204218087e-06, "loss": 3.9596, "step": 24187 }, { "epoch": 8.057299908386774, "grad_norm": 0.74609375, "learning_rate": 1.432943524278058e-06, "loss": 3.9709, "step": 24188 }, { "epoch": 8.057633047389023, "grad_norm": 0.8046875, "learning_rate": 1.4324676988023771e-06, "loss": 4.0159, "step": 24189 }, { "epoch": 8.057966186391273, "grad_norm": 0.77734375, "learning_rate": 1.4319919440003024e-06, "loss": 3.9931, "step": 24190 }, { "epoch": 8.05829932539352, "grad_norm": 0.7421875, "learning_rate": 1.4315162598773768e-06, "loss": 3.9299, "step": 24191 }, { "epoch": 8.05863246439577, "grad_norm": 0.71875, "learning_rate": 1.4310406464391442e-06, "loss": 3.9204, "step": 24192 }, { "epoch": 8.058965603398018, "grad_norm": 0.76953125, "learning_rate": 1.430565103691143e-06, "loss": 3.972, "step": 24193 }, { "epoch": 8.059298742400266, "grad_norm": 0.76171875, "learning_rate": 1.4300896316389117e-06, "loss": 3.9764, "step": 24194 }, { "epoch": 8.059631881402515, "grad_norm": 0.7578125, "learning_rate": 1.429614230287987e-06, "loss": 4.0344, "step": 24195 }, { "epoch": 8.059965020404764, "grad_norm": 0.8125, "learning_rate": 1.4291388996439091e-06, "loss": 3.945, "step": 24196 }, { "epoch": 8.060298159407013, "grad_norm": 0.7578125, "learning_rate": 1.4286636397122148e-06, "loss": 3.9962, "step": 24197 }, { "epoch": 8.06063129840926, "grad_norm": 0.7421875, "learning_rate": 1.428188450498438e-06, "loss": 4.0012, "step": 24198 }, { "epoch": 8.06096443741151, "grad_norm": 0.765625, "learning_rate": 1.4277133320081148e-06, "loss": 3.9397, "step": 24199 }, { "epoch": 8.061297576413759, "grad_norm": 0.7734375, "learning_rate": 1.4272382842467776e-06, "loss": 4.0097, "step": 24200 }, { "epoch": 8.061630715416007, "grad_norm": 0.76171875, "learning_rate": 1.426763307219962e-06, "loss": 4.05, "step": 24201 }, { "epoch": 8.061963854418256, "grad_norm": 0.7734375, "learning_rate": 1.426288400933197e-06, "loss": 3.9468, "step": 24202 }, { "epoch": 8.062296993420505, "grad_norm": 0.73828125, "learning_rate": 1.4258135653920178e-06, "loss": 3.9666, "step": 24203 }, { "epoch": 8.062630132422754, "grad_norm": 0.75, "learning_rate": 1.4253388006019546e-06, "loss": 4.0544, "step": 24204 }, { "epoch": 8.062963271425001, "grad_norm": 0.79296875, "learning_rate": 1.424864106568536e-06, "loss": 3.9356, "step": 24205 }, { "epoch": 8.06329641042725, "grad_norm": 0.80859375, "learning_rate": 1.4243894832972911e-06, "loss": 3.9571, "step": 24206 }, { "epoch": 8.0636295494295, "grad_norm": 0.78515625, "learning_rate": 1.423914930793752e-06, "loss": 3.9201, "step": 24207 }, { "epoch": 8.063962688431749, "grad_norm": 0.7890625, "learning_rate": 1.4234404490634412e-06, "loss": 3.9553, "step": 24208 }, { "epoch": 8.064295827433996, "grad_norm": 0.75, "learning_rate": 1.4229660381118857e-06, "loss": 3.9053, "step": 24209 }, { "epoch": 8.064628966436246, "grad_norm": 0.8046875, "learning_rate": 1.4224916979446143e-06, "loss": 4.008, "step": 24210 }, { "epoch": 8.064962105438495, "grad_norm": 0.74609375, "learning_rate": 1.4220174285671519e-06, "loss": 4.009, "step": 24211 }, { "epoch": 8.065295244440742, "grad_norm": 0.8203125, "learning_rate": 1.4215432299850208e-06, "loss": 4.01, "step": 24212 }, { "epoch": 8.065628383442991, "grad_norm": 0.828125, "learning_rate": 1.4210691022037434e-06, "loss": 4.0371, "step": 24213 }, { "epoch": 8.06596152244524, "grad_norm": 0.76171875, "learning_rate": 1.4205950452288458e-06, "loss": 3.9735, "step": 24214 }, { "epoch": 8.06629466144749, "grad_norm": 0.796875, "learning_rate": 1.4201210590658477e-06, "loss": 3.8964, "step": 24215 }, { "epoch": 8.066627800449737, "grad_norm": 0.7578125, "learning_rate": 1.419647143720271e-06, "loss": 3.9497, "step": 24216 }, { "epoch": 8.066960939451986, "grad_norm": 0.7578125, "learning_rate": 1.419173299197635e-06, "loss": 3.9833, "step": 24217 }, { "epoch": 8.067294078454236, "grad_norm": 0.74609375, "learning_rate": 1.418699525503458e-06, "loss": 4.0115, "step": 24218 }, { "epoch": 8.067627217456483, "grad_norm": 0.78515625, "learning_rate": 1.4182258226432604e-06, "loss": 4.0381, "step": 24219 }, { "epoch": 8.067960356458732, "grad_norm": 0.73828125, "learning_rate": 1.4177521906225571e-06, "loss": 4.029, "step": 24220 }, { "epoch": 8.068293495460981, "grad_norm": 0.77734375, "learning_rate": 1.417278629446868e-06, "loss": 3.8943, "step": 24221 }, { "epoch": 8.06862663446323, "grad_norm": 0.765625, "learning_rate": 1.4168051391217075e-06, "loss": 3.9879, "step": 24222 }, { "epoch": 8.068959773465478, "grad_norm": 0.78515625, "learning_rate": 1.4163317196525913e-06, "loss": 4.0148, "step": 24223 }, { "epoch": 8.069292912467727, "grad_norm": 0.73828125, "learning_rate": 1.4158583710450318e-06, "loss": 4.013, "step": 24224 }, { "epoch": 8.069626051469976, "grad_norm": 0.765625, "learning_rate": 1.4153850933045478e-06, "loss": 3.9191, "step": 24225 }, { "epoch": 8.069959190472225, "grad_norm": 0.75, "learning_rate": 1.4149118864366458e-06, "loss": 4.0266, "step": 24226 }, { "epoch": 8.070292329474473, "grad_norm": 0.8203125, "learning_rate": 1.4144387504468382e-06, "loss": 3.9972, "step": 24227 }, { "epoch": 8.070625468476722, "grad_norm": 0.828125, "learning_rate": 1.41396568534064e-06, "loss": 4.0732, "step": 24228 }, { "epoch": 8.070958607478971, "grad_norm": 0.74609375, "learning_rate": 1.4134926911235594e-06, "loss": 3.9797, "step": 24229 }, { "epoch": 8.071291746481219, "grad_norm": 0.73828125, "learning_rate": 1.4130197678011052e-06, "loss": 4.0372, "step": 24230 }, { "epoch": 8.071624885483468, "grad_norm": 0.78515625, "learning_rate": 1.4125469153787845e-06, "loss": 3.9659, "step": 24231 }, { "epoch": 8.071958024485717, "grad_norm": 0.78125, "learning_rate": 1.4120741338621094e-06, "loss": 3.927, "step": 24232 }, { "epoch": 8.072291163487966, "grad_norm": 0.7578125, "learning_rate": 1.4116014232565836e-06, "loss": 3.9703, "step": 24233 }, { "epoch": 8.072624302490214, "grad_norm": 0.78515625, "learning_rate": 1.4111287835677152e-06, "loss": 3.9945, "step": 24234 }, { "epoch": 8.072957441492463, "grad_norm": 0.8125, "learning_rate": 1.4106562148010053e-06, "loss": 3.9403, "step": 24235 }, { "epoch": 8.073290580494712, "grad_norm": 0.76171875, "learning_rate": 1.4101837169619623e-06, "loss": 3.9243, "step": 24236 }, { "epoch": 8.07362371949696, "grad_norm": 0.73828125, "learning_rate": 1.4097112900560885e-06, "loss": 3.9636, "step": 24237 }, { "epoch": 8.073956858499209, "grad_norm": 0.72265625, "learning_rate": 1.4092389340888863e-06, "loss": 3.9737, "step": 24238 }, { "epoch": 8.074289997501458, "grad_norm": 0.765625, "learning_rate": 1.4087666490658566e-06, "loss": 3.9644, "step": 24239 }, { "epoch": 8.074623136503707, "grad_norm": 0.7734375, "learning_rate": 1.4082944349925037e-06, "loss": 3.9399, "step": 24240 }, { "epoch": 8.074956275505954, "grad_norm": 0.7734375, "learning_rate": 1.4078222918743258e-06, "loss": 3.9405, "step": 24241 }, { "epoch": 8.075289414508203, "grad_norm": 0.79296875, "learning_rate": 1.407350219716822e-06, "loss": 4.0107, "step": 24242 }, { "epoch": 8.075622553510453, "grad_norm": 0.78125, "learning_rate": 1.4068782185254927e-06, "loss": 3.9423, "step": 24243 }, { "epoch": 8.0759556925127, "grad_norm": 0.75, "learning_rate": 1.4064062883058337e-06, "loss": 3.9335, "step": 24244 }, { "epoch": 8.07628883151495, "grad_norm": 0.703125, "learning_rate": 1.405934429063343e-06, "loss": 3.9867, "step": 24245 }, { "epoch": 8.076621970517198, "grad_norm": 0.73046875, "learning_rate": 1.4054626408035147e-06, "loss": 3.9486, "step": 24246 }, { "epoch": 8.076955109519448, "grad_norm": 0.80859375, "learning_rate": 1.4049909235318472e-06, "loss": 3.9852, "step": 24247 }, { "epoch": 8.077288248521695, "grad_norm": 0.7578125, "learning_rate": 1.4045192772538342e-06, "loss": 4.0366, "step": 24248 }, { "epoch": 8.077621387523944, "grad_norm": 0.73828125, "learning_rate": 1.4040477019749688e-06, "loss": 3.9516, "step": 24249 }, { "epoch": 8.077954526526193, "grad_norm": 0.75390625, "learning_rate": 1.4035761977007418e-06, "loss": 4.0249, "step": 24250 }, { "epoch": 8.078287665528443, "grad_norm": 0.76171875, "learning_rate": 1.4031047644366487e-06, "loss": 3.968, "step": 24251 }, { "epoch": 8.07862080453069, "grad_norm": 0.7890625, "learning_rate": 1.4026334021881823e-06, "loss": 4.0803, "step": 24252 }, { "epoch": 8.07895394353294, "grad_norm": 0.76171875, "learning_rate": 1.402162110960825e-06, "loss": 3.9771, "step": 24253 }, { "epoch": 8.079287082535188, "grad_norm": 0.74609375, "learning_rate": 1.4016908907600736e-06, "loss": 3.914, "step": 24254 }, { "epoch": 8.079620221537436, "grad_norm": 0.73828125, "learning_rate": 1.4012197415914145e-06, "loss": 3.9905, "step": 24255 }, { "epoch": 8.079953360539685, "grad_norm": 0.7734375, "learning_rate": 1.400748663460335e-06, "loss": 3.931, "step": 24256 }, { "epoch": 8.080286499541934, "grad_norm": 0.75, "learning_rate": 1.4002776563723214e-06, "loss": 4.0101, "step": 24257 }, { "epoch": 8.080619638544183, "grad_norm": 0.7890625, "learning_rate": 1.399806720332863e-06, "loss": 4.006, "step": 24258 }, { "epoch": 8.08095277754643, "grad_norm": 0.75, "learning_rate": 1.3993358553474421e-06, "loss": 4.0707, "step": 24259 }, { "epoch": 8.08128591654868, "grad_norm": 0.7734375, "learning_rate": 1.3988650614215457e-06, "loss": 4.0127, "step": 24260 }, { "epoch": 8.081619055550929, "grad_norm": 0.76953125, "learning_rate": 1.3983943385606562e-06, "loss": 3.9798, "step": 24261 }, { "epoch": 8.081952194553176, "grad_norm": 0.77734375, "learning_rate": 1.3979236867702563e-06, "loss": 3.9558, "step": 24262 }, { "epoch": 8.082285333555426, "grad_norm": 0.73828125, "learning_rate": 1.3974531060558283e-06, "loss": 3.9337, "step": 24263 }, { "epoch": 8.082618472557675, "grad_norm": 0.70703125, "learning_rate": 1.3969825964228517e-06, "loss": 3.9372, "step": 24264 }, { "epoch": 8.082951611559924, "grad_norm": 0.77734375, "learning_rate": 1.3965121578768106e-06, "loss": 3.9895, "step": 24265 }, { "epoch": 8.083284750562171, "grad_norm": 0.7734375, "learning_rate": 1.3960417904231832e-06, "loss": 4.0088, "step": 24266 }, { "epoch": 8.08361788956442, "grad_norm": 0.7734375, "learning_rate": 1.3955714940674475e-06, "loss": 3.9522, "step": 24267 }, { "epoch": 8.08395102856667, "grad_norm": 0.7421875, "learning_rate": 1.3951012688150792e-06, "loss": 3.9434, "step": 24268 }, { "epoch": 8.084284167568919, "grad_norm": 0.8125, "learning_rate": 1.3946311146715628e-06, "loss": 3.9392, "step": 24269 }, { "epoch": 8.084617306571166, "grad_norm": 0.73828125, "learning_rate": 1.394161031642367e-06, "loss": 4.0038, "step": 24270 }, { "epoch": 8.084950445573416, "grad_norm": 0.73828125, "learning_rate": 1.3936910197329686e-06, "loss": 3.9595, "step": 24271 }, { "epoch": 8.085283584575665, "grad_norm": 0.74609375, "learning_rate": 1.3932210789488451e-06, "loss": 4.0522, "step": 24272 }, { "epoch": 8.085616723577912, "grad_norm": 0.6953125, "learning_rate": 1.3927512092954686e-06, "loss": 3.9619, "step": 24273 }, { "epoch": 8.085949862580161, "grad_norm": 0.7421875, "learning_rate": 1.3922814107783124e-06, "loss": 3.9029, "step": 24274 }, { "epoch": 8.08628300158241, "grad_norm": 0.765625, "learning_rate": 1.391811683402847e-06, "loss": 3.9652, "step": 24275 }, { "epoch": 8.08661614058466, "grad_norm": 0.828125, "learning_rate": 1.3913420271745466e-06, "loss": 4.0252, "step": 24276 }, { "epoch": 8.086949279586907, "grad_norm": 0.78515625, "learning_rate": 1.3908724420988808e-06, "loss": 4.0531, "step": 24277 }, { "epoch": 8.087282418589156, "grad_norm": 0.76953125, "learning_rate": 1.3904029281813202e-06, "loss": 4.0301, "step": 24278 }, { "epoch": 8.087615557591405, "grad_norm": 0.75390625, "learning_rate": 1.389933485427328e-06, "loss": 3.9493, "step": 24279 }, { "epoch": 8.087948696593653, "grad_norm": 0.7890625, "learning_rate": 1.3894641138423789e-06, "loss": 3.9741, "step": 24280 }, { "epoch": 8.088281835595902, "grad_norm": 0.78125, "learning_rate": 1.388994813431937e-06, "loss": 3.9058, "step": 24281 }, { "epoch": 8.088614974598151, "grad_norm": 0.78515625, "learning_rate": 1.3885255842014685e-06, "loss": 3.9517, "step": 24282 }, { "epoch": 8.0889481136004, "grad_norm": 0.76953125, "learning_rate": 1.3880564261564385e-06, "loss": 4.0934, "step": 24283 }, { "epoch": 8.089281252602648, "grad_norm": 0.79296875, "learning_rate": 1.387587339302315e-06, "loss": 3.9662, "step": 24284 }, { "epoch": 8.089614391604897, "grad_norm": 0.80859375, "learning_rate": 1.3871183236445587e-06, "loss": 3.9714, "step": 24285 }, { "epoch": 8.089947530607146, "grad_norm": 0.734375, "learning_rate": 1.3866493791886335e-06, "loss": 3.9517, "step": 24286 }, { "epoch": 8.090280669609395, "grad_norm": 0.73828125, "learning_rate": 1.3861805059400015e-06, "loss": 3.9382, "step": 24287 }, { "epoch": 8.090613808611643, "grad_norm": 0.7421875, "learning_rate": 1.3857117039041246e-06, "loss": 4.0522, "step": 24288 }, { "epoch": 8.090946947613892, "grad_norm": 0.71484375, "learning_rate": 1.385242973086463e-06, "loss": 3.9955, "step": 24289 }, { "epoch": 8.091280086616141, "grad_norm": 0.734375, "learning_rate": 1.3847743134924734e-06, "loss": 4.0333, "step": 24290 }, { "epoch": 8.091613225618389, "grad_norm": 0.78515625, "learning_rate": 1.3843057251276198e-06, "loss": 4.01, "step": 24291 }, { "epoch": 8.091946364620638, "grad_norm": 0.76953125, "learning_rate": 1.3838372079973586e-06, "loss": 3.9289, "step": 24292 }, { "epoch": 8.092279503622887, "grad_norm": 0.76953125, "learning_rate": 1.3833687621071454e-06, "loss": 4.0379, "step": 24293 }, { "epoch": 8.092612642625136, "grad_norm": 0.77734375, "learning_rate": 1.3829003874624357e-06, "loss": 3.9963, "step": 24294 }, { "epoch": 8.092945781627384, "grad_norm": 0.73046875, "learning_rate": 1.3824320840686889e-06, "loss": 4.0587, "step": 24295 }, { "epoch": 8.093278920629633, "grad_norm": 0.796875, "learning_rate": 1.38196385193136e-06, "loss": 4.0239, "step": 24296 }, { "epoch": 8.093612059631882, "grad_norm": 0.76953125, "learning_rate": 1.3814956910558963e-06, "loss": 3.9586, "step": 24297 }, { "epoch": 8.09394519863413, "grad_norm": 0.8046875, "learning_rate": 1.3810276014477578e-06, "loss": 3.9491, "step": 24298 }, { "epoch": 8.094278337636378, "grad_norm": 0.75, "learning_rate": 1.3805595831123934e-06, "loss": 3.9812, "step": 24299 }, { "epoch": 8.094611476638628, "grad_norm": 0.7421875, "learning_rate": 1.3800916360552552e-06, "loss": 4.0021, "step": 24300 }, { "epoch": 8.094944615640877, "grad_norm": 0.78515625, "learning_rate": 1.3796237602817914e-06, "loss": 4.0263, "step": 24301 }, { "epoch": 8.095277754643124, "grad_norm": 0.7734375, "learning_rate": 1.3791559557974568e-06, "loss": 3.9865, "step": 24302 }, { "epoch": 8.095610893645373, "grad_norm": 0.74609375, "learning_rate": 1.378688222607697e-06, "loss": 3.9628, "step": 24303 }, { "epoch": 8.095944032647623, "grad_norm": 0.77734375, "learning_rate": 1.378220560717961e-06, "loss": 3.9749, "step": 24304 }, { "epoch": 8.09627717164987, "grad_norm": 0.76171875, "learning_rate": 1.3777529701336964e-06, "loss": 3.9923, "step": 24305 }, { "epoch": 8.09661031065212, "grad_norm": 0.79296875, "learning_rate": 1.377285450860348e-06, "loss": 4.013, "step": 24306 }, { "epoch": 8.096943449654368, "grad_norm": 0.78515625, "learning_rate": 1.3768180029033625e-06, "loss": 3.9962, "step": 24307 }, { "epoch": 8.097276588656618, "grad_norm": 0.75390625, "learning_rate": 1.3763506262681822e-06, "loss": 3.9636, "step": 24308 }, { "epoch": 8.097609727658865, "grad_norm": 0.7578125, "learning_rate": 1.375883320960256e-06, "loss": 3.9517, "step": 24309 }, { "epoch": 8.097942866661114, "grad_norm": 0.75390625, "learning_rate": 1.3754160869850236e-06, "loss": 3.9951, "step": 24310 }, { "epoch": 8.098276005663363, "grad_norm": 0.74609375, "learning_rate": 1.3749489243479283e-06, "loss": 3.9965, "step": 24311 }, { "epoch": 8.098609144665613, "grad_norm": 0.73828125, "learning_rate": 1.374481833054409e-06, "loss": 3.984, "step": 24312 }, { "epoch": 8.09894228366786, "grad_norm": 0.7421875, "learning_rate": 1.3740148131099105e-06, "loss": 3.9265, "step": 24313 }, { "epoch": 8.09927542267011, "grad_norm": 0.76171875, "learning_rate": 1.3735478645198729e-06, "loss": 3.9492, "step": 24314 }, { "epoch": 8.099608561672358, "grad_norm": 0.76171875, "learning_rate": 1.373080987289729e-06, "loss": 3.9464, "step": 24315 }, { "epoch": 8.099941700674606, "grad_norm": 0.8203125, "learning_rate": 1.3726141814249222e-06, "loss": 4.0331, "step": 24316 }, { "epoch": 8.100274839676855, "grad_norm": 0.76171875, "learning_rate": 1.3721474469308887e-06, "loss": 3.9328, "step": 24317 }, { "epoch": 8.100607978679104, "grad_norm": 0.79296875, "learning_rate": 1.3716807838130644e-06, "loss": 4.0266, "step": 24318 }, { "epoch": 8.100941117681353, "grad_norm": 0.7890625, "learning_rate": 1.3712141920768823e-06, "loss": 3.9672, "step": 24319 }, { "epoch": 8.1012742566836, "grad_norm": 0.76171875, "learning_rate": 1.370747671727783e-06, "loss": 3.9328, "step": 24320 }, { "epoch": 8.10160739568585, "grad_norm": 0.82421875, "learning_rate": 1.3702812227711967e-06, "loss": 3.9635, "step": 24321 }, { "epoch": 8.101940534688099, "grad_norm": 0.7578125, "learning_rate": 1.3698148452125578e-06, "loss": 4.0336, "step": 24322 }, { "epoch": 8.102273673690346, "grad_norm": 0.78125, "learning_rate": 1.369348539057297e-06, "loss": 3.9535, "step": 24323 }, { "epoch": 8.102606812692596, "grad_norm": 0.73828125, "learning_rate": 1.3688823043108479e-06, "loss": 4.0093, "step": 24324 }, { "epoch": 8.102939951694845, "grad_norm": 0.8125, "learning_rate": 1.3684161409786388e-06, "loss": 3.9621, "step": 24325 }, { "epoch": 8.103273090697094, "grad_norm": 0.75390625, "learning_rate": 1.3679500490661019e-06, "loss": 3.9848, "step": 24326 }, { "epoch": 8.103606229699341, "grad_norm": 0.7734375, "learning_rate": 1.3674840285786621e-06, "loss": 3.9514, "step": 24327 }, { "epoch": 8.10393936870159, "grad_norm": 0.7890625, "learning_rate": 1.3670180795217524e-06, "loss": 3.9783, "step": 24328 }, { "epoch": 8.10427250770384, "grad_norm": 0.7578125, "learning_rate": 1.366552201900799e-06, "loss": 3.9307, "step": 24329 }, { "epoch": 8.104605646706089, "grad_norm": 0.79296875, "learning_rate": 1.3660863957212266e-06, "loss": 3.9638, "step": 24330 }, { "epoch": 8.104938785708336, "grad_norm": 0.75390625, "learning_rate": 1.3656206609884594e-06, "loss": 3.9537, "step": 24331 }, { "epoch": 8.105271924710586, "grad_norm": 0.74609375, "learning_rate": 1.365154997707929e-06, "loss": 3.9736, "step": 24332 }, { "epoch": 8.105605063712835, "grad_norm": 0.75, "learning_rate": 1.3646894058850523e-06, "loss": 3.9749, "step": 24333 }, { "epoch": 8.105938202715082, "grad_norm": 0.7890625, "learning_rate": 1.3642238855252537e-06, "loss": 4.0428, "step": 24334 }, { "epoch": 8.106271341717331, "grad_norm": 0.76171875, "learning_rate": 1.3637584366339586e-06, "loss": 3.9957, "step": 24335 }, { "epoch": 8.10660448071958, "grad_norm": 0.75, "learning_rate": 1.3632930592165862e-06, "loss": 3.9138, "step": 24336 }, { "epoch": 8.10693761972183, "grad_norm": 0.78515625, "learning_rate": 1.3628277532785579e-06, "loss": 3.9364, "step": 24337 }, { "epoch": 8.107270758724077, "grad_norm": 0.77734375, "learning_rate": 1.3623625188252912e-06, "loss": 3.9967, "step": 24338 }, { "epoch": 8.107603897726326, "grad_norm": 0.7734375, "learning_rate": 1.3618973558622089e-06, "loss": 4.0129, "step": 24339 }, { "epoch": 8.107937036728575, "grad_norm": 0.77734375, "learning_rate": 1.3614322643947268e-06, "loss": 3.9581, "step": 24340 }, { "epoch": 8.108270175730823, "grad_norm": 0.85546875, "learning_rate": 1.3609672444282634e-06, "loss": 4.007, "step": 24341 }, { "epoch": 8.108603314733072, "grad_norm": 0.75390625, "learning_rate": 1.360502295968234e-06, "loss": 4.0172, "step": 24342 }, { "epoch": 8.108936453735321, "grad_norm": 0.91796875, "learning_rate": 1.3600374190200552e-06, "loss": 3.9593, "step": 24343 }, { "epoch": 8.10926959273757, "grad_norm": 0.796875, "learning_rate": 1.359572613589141e-06, "loss": 3.8993, "step": 24344 }, { "epoch": 8.109602731739818, "grad_norm": 0.83203125, "learning_rate": 1.3591078796809045e-06, "loss": 3.9507, "step": 24345 }, { "epoch": 8.109935870742067, "grad_norm": 0.765625, "learning_rate": 1.3586432173007606e-06, "loss": 3.9887, "step": 24346 }, { "epoch": 8.110269009744316, "grad_norm": 0.75390625, "learning_rate": 1.3581786264541227e-06, "loss": 3.9584, "step": 24347 }, { "epoch": 8.110602148746565, "grad_norm": 0.828125, "learning_rate": 1.3577141071463994e-06, "loss": 3.969, "step": 24348 }, { "epoch": 8.110935287748813, "grad_norm": 0.78515625, "learning_rate": 1.357249659383001e-06, "loss": 4.0253, "step": 24349 }, { "epoch": 8.111268426751062, "grad_norm": 0.80859375, "learning_rate": 1.3567852831693426e-06, "loss": 3.9451, "step": 24350 }, { "epoch": 8.111601565753311, "grad_norm": 0.71875, "learning_rate": 1.3563209785108277e-06, "loss": 3.9609, "step": 24351 }, { "epoch": 8.111934704755559, "grad_norm": 0.76171875, "learning_rate": 1.3558567454128645e-06, "loss": 3.9917, "step": 24352 }, { "epoch": 8.112267843757808, "grad_norm": 0.73828125, "learning_rate": 1.3553925838808637e-06, "loss": 3.9629, "step": 24353 }, { "epoch": 8.112600982760057, "grad_norm": 0.734375, "learning_rate": 1.35492849392023e-06, "loss": 3.9845, "step": 24354 }, { "epoch": 8.112934121762306, "grad_norm": 0.78125, "learning_rate": 1.35446447553637e-06, "loss": 3.9923, "step": 24355 }, { "epoch": 8.113267260764554, "grad_norm": 0.76953125, "learning_rate": 1.3540005287346851e-06, "loss": 4.0246, "step": 24356 }, { "epoch": 8.113600399766803, "grad_norm": 0.7734375, "learning_rate": 1.3535366535205836e-06, "loss": 3.9998, "step": 24357 }, { "epoch": 8.113933538769052, "grad_norm": 0.7734375, "learning_rate": 1.3530728498994671e-06, "loss": 4.0079, "step": 24358 }, { "epoch": 8.1142666777713, "grad_norm": 0.73046875, "learning_rate": 1.3526091178767368e-06, "loss": 3.9945, "step": 24359 }, { "epoch": 8.114599816773548, "grad_norm": 0.75390625, "learning_rate": 1.3521454574577963e-06, "loss": 3.9629, "step": 24360 }, { "epoch": 8.114932955775798, "grad_norm": 0.78515625, "learning_rate": 1.3516818686480441e-06, "loss": 4.0248, "step": 24361 }, { "epoch": 8.115266094778047, "grad_norm": 0.72265625, "learning_rate": 1.3512183514528815e-06, "loss": 4.0363, "step": 24362 }, { "epoch": 8.115599233780294, "grad_norm": 0.765625, "learning_rate": 1.3507549058777045e-06, "loss": 3.9922, "step": 24363 }, { "epoch": 8.115932372782543, "grad_norm": 0.76953125, "learning_rate": 1.3502915319279162e-06, "loss": 3.985, "step": 24364 }, { "epoch": 8.116265511784793, "grad_norm": 0.75, "learning_rate": 1.3498282296089104e-06, "loss": 4.0145, "step": 24365 }, { "epoch": 8.11659865078704, "grad_norm": 0.7265625, "learning_rate": 1.3493649989260857e-06, "loss": 3.9255, "step": 24366 }, { "epoch": 8.11693178978929, "grad_norm": 0.74609375, "learning_rate": 1.3489018398848336e-06, "loss": 4.0677, "step": 24367 }, { "epoch": 8.117264928791538, "grad_norm": 0.796875, "learning_rate": 1.3484387524905556e-06, "loss": 3.9835, "step": 24368 }, { "epoch": 8.117598067793788, "grad_norm": 0.75390625, "learning_rate": 1.34797573674864e-06, "loss": 4.026, "step": 24369 }, { "epoch": 8.117931206796035, "grad_norm": 0.73828125, "learning_rate": 1.3475127926644826e-06, "loss": 4.0122, "step": 24370 }, { "epoch": 8.118264345798284, "grad_norm": 0.78515625, "learning_rate": 1.3470499202434722e-06, "loss": 4.0361, "step": 24371 }, { "epoch": 8.118597484800533, "grad_norm": 0.765625, "learning_rate": 1.3465871194910054e-06, "loss": 4.0046, "step": 24372 }, { "epoch": 8.118930623802783, "grad_norm": 0.7890625, "learning_rate": 1.3461243904124696e-06, "loss": 4.0785, "step": 24373 }, { "epoch": 8.11926376280503, "grad_norm": 0.7578125, "learning_rate": 1.3456617330132562e-06, "loss": 4.0197, "step": 24374 }, { "epoch": 8.119596901807279, "grad_norm": 0.80859375, "learning_rate": 1.3451991472987509e-06, "loss": 4.0155, "step": 24375 }, { "epoch": 8.119930040809528, "grad_norm": 0.7578125, "learning_rate": 1.3447366332743464e-06, "loss": 3.9553, "step": 24376 }, { "epoch": 8.120263179811776, "grad_norm": 0.7578125, "learning_rate": 1.3442741909454295e-06, "loss": 4.0399, "step": 24377 }, { "epoch": 8.120596318814025, "grad_norm": 0.75, "learning_rate": 1.3438118203173814e-06, "loss": 3.969, "step": 24378 }, { "epoch": 8.120929457816274, "grad_norm": 0.7734375, "learning_rate": 1.3433495213955933e-06, "loss": 4.0382, "step": 24379 }, { "epoch": 8.121262596818523, "grad_norm": 0.8046875, "learning_rate": 1.3428872941854474e-06, "loss": 3.9699, "step": 24380 }, { "epoch": 8.12159573582077, "grad_norm": 0.72265625, "learning_rate": 1.3424251386923291e-06, "loss": 3.9716, "step": 24381 }, { "epoch": 8.12192887482302, "grad_norm": 0.7734375, "learning_rate": 1.341963054921619e-06, "loss": 3.9731, "step": 24382 }, { "epoch": 8.122262013825269, "grad_norm": 0.78515625, "learning_rate": 1.3415010428787013e-06, "loss": 3.9172, "step": 24383 }, { "epoch": 8.122595152827516, "grad_norm": 0.72265625, "learning_rate": 1.3410391025689583e-06, "loss": 4.0173, "step": 24384 }, { "epoch": 8.122928291829766, "grad_norm": 0.7578125, "learning_rate": 1.3405772339977695e-06, "loss": 3.9306, "step": 24385 }, { "epoch": 8.123261430832015, "grad_norm": 0.7421875, "learning_rate": 1.3401154371705148e-06, "loss": 4.0394, "step": 24386 }, { "epoch": 8.123594569834264, "grad_norm": 0.76171875, "learning_rate": 1.3396537120925725e-06, "loss": 4.0072, "step": 24387 }, { "epoch": 8.123927708836511, "grad_norm": 0.734375, "learning_rate": 1.3391920587693215e-06, "loss": 4.0327, "step": 24388 }, { "epoch": 8.12426084783876, "grad_norm": 0.73046875, "learning_rate": 1.3387304772061368e-06, "loss": 3.9884, "step": 24389 }, { "epoch": 8.12459398684101, "grad_norm": 0.76953125, "learning_rate": 1.3382689674083981e-06, "loss": 4.0197, "step": 24390 }, { "epoch": 8.124927125843259, "grad_norm": 0.73046875, "learning_rate": 1.3378075293814804e-06, "loss": 4.0841, "step": 24391 }, { "epoch": 8.125260264845506, "grad_norm": 0.74609375, "learning_rate": 1.3373461631307568e-06, "loss": 4.0469, "step": 24392 }, { "epoch": 8.125593403847756, "grad_norm": 0.72265625, "learning_rate": 1.3368848686616005e-06, "loss": 3.8985, "step": 24393 }, { "epoch": 8.125926542850005, "grad_norm": 0.765625, "learning_rate": 1.3364236459793892e-06, "loss": 3.9773, "step": 24394 }, { "epoch": 8.126259681852252, "grad_norm": 0.734375, "learning_rate": 1.335962495089493e-06, "loss": 4.0926, "step": 24395 }, { "epoch": 8.126592820854501, "grad_norm": 0.77734375, "learning_rate": 1.335501415997279e-06, "loss": 3.9741, "step": 24396 }, { "epoch": 8.12692595985675, "grad_norm": 0.7734375, "learning_rate": 1.3350404087081225e-06, "loss": 4.0109, "step": 24397 }, { "epoch": 8.127259098859, "grad_norm": 0.7578125, "learning_rate": 1.3345794732273928e-06, "loss": 4.0011, "step": 24398 }, { "epoch": 8.127592237861247, "grad_norm": 0.75390625, "learning_rate": 1.334118609560457e-06, "loss": 3.9174, "step": 24399 }, { "epoch": 8.127925376863496, "grad_norm": 0.7578125, "learning_rate": 1.333657817712683e-06, "loss": 3.9664, "step": 24400 }, { "epoch": 8.128258515865745, "grad_norm": 0.78125, "learning_rate": 1.333197097689441e-06, "loss": 3.9373, "step": 24401 }, { "epoch": 8.128591654867993, "grad_norm": 0.77734375, "learning_rate": 1.332736449496095e-06, "loss": 4.0177, "step": 24402 }, { "epoch": 8.128924793870242, "grad_norm": 0.73828125, "learning_rate": 1.332275873138012e-06, "loss": 4.0277, "step": 24403 }, { "epoch": 8.129257932872491, "grad_norm": 0.7578125, "learning_rate": 1.3318153686205547e-06, "loss": 4.0148, "step": 24404 }, { "epoch": 8.12959107187474, "grad_norm": 0.7421875, "learning_rate": 1.3313549359490892e-06, "loss": 3.9733, "step": 24405 }, { "epoch": 8.129924210876988, "grad_norm": 0.80078125, "learning_rate": 1.330894575128977e-06, "loss": 3.9554, "step": 24406 }, { "epoch": 8.130257349879237, "grad_norm": 0.72265625, "learning_rate": 1.3304342861655785e-06, "loss": 3.9863, "step": 24407 }, { "epoch": 8.130590488881486, "grad_norm": 0.77734375, "learning_rate": 1.329974069064259e-06, "loss": 4.1337, "step": 24408 }, { "epoch": 8.130923627883735, "grad_norm": 0.75390625, "learning_rate": 1.329513923830378e-06, "loss": 3.9591, "step": 24409 }, { "epoch": 8.131256766885983, "grad_norm": 0.78515625, "learning_rate": 1.3290538504692948e-06, "loss": 3.9438, "step": 24410 }, { "epoch": 8.131589905888232, "grad_norm": 0.78125, "learning_rate": 1.328593848986365e-06, "loss": 3.9898, "step": 24411 }, { "epoch": 8.131923044890481, "grad_norm": 0.7890625, "learning_rate": 1.3281339193869528e-06, "loss": 3.9733, "step": 24412 }, { "epoch": 8.132256183892729, "grad_norm": 0.79296875, "learning_rate": 1.3276740616764126e-06, "loss": 4.0444, "step": 24413 }, { "epoch": 8.132589322894978, "grad_norm": 0.81640625, "learning_rate": 1.3272142758600976e-06, "loss": 3.9883, "step": 24414 }, { "epoch": 8.132922461897227, "grad_norm": 0.7421875, "learning_rate": 1.3267545619433677e-06, "loss": 3.9639, "step": 24415 }, { "epoch": 8.133255600899476, "grad_norm": 0.72265625, "learning_rate": 1.3262949199315757e-06, "loss": 4.0156, "step": 24416 }, { "epoch": 8.133588739901723, "grad_norm": 0.76953125, "learning_rate": 1.3258353498300765e-06, "loss": 3.9896, "step": 24417 }, { "epoch": 8.133921878903973, "grad_norm": 0.765625, "learning_rate": 1.3253758516442215e-06, "loss": 3.9852, "step": 24418 }, { "epoch": 8.134255017906222, "grad_norm": 0.74609375, "learning_rate": 1.3249164253793617e-06, "loss": 3.96, "step": 24419 }, { "epoch": 8.13458815690847, "grad_norm": 0.7578125, "learning_rate": 1.3244570710408524e-06, "loss": 3.9424, "step": 24420 }, { "epoch": 8.134921295910718, "grad_norm": 0.765625, "learning_rate": 1.323997788634042e-06, "loss": 4.0007, "step": 24421 }, { "epoch": 8.135254434912968, "grad_norm": 0.734375, "learning_rate": 1.3235385781642806e-06, "loss": 3.9906, "step": 24422 }, { "epoch": 8.135587573915217, "grad_norm": 0.72265625, "learning_rate": 1.3230794396369162e-06, "loss": 4.0379, "step": 24423 }, { "epoch": 8.135920712917464, "grad_norm": 0.82421875, "learning_rate": 1.3226203730572969e-06, "loss": 3.9442, "step": 24424 }, { "epoch": 8.136253851919713, "grad_norm": 0.79296875, "learning_rate": 1.3221613784307704e-06, "loss": 4.0232, "step": 24425 }, { "epoch": 8.136586990921963, "grad_norm": 0.76953125, "learning_rate": 1.3217024557626806e-06, "loss": 3.9492, "step": 24426 }, { "epoch": 8.13692012992421, "grad_norm": 0.7578125, "learning_rate": 1.3212436050583773e-06, "loss": 4.067, "step": 24427 }, { "epoch": 8.13725326892646, "grad_norm": 0.74609375, "learning_rate": 1.3207848263232023e-06, "loss": 3.9821, "step": 24428 }, { "epoch": 8.137586407928708, "grad_norm": 0.74609375, "learning_rate": 1.3203261195625003e-06, "loss": 3.9851, "step": 24429 }, { "epoch": 8.137919546930958, "grad_norm": 0.77734375, "learning_rate": 1.319867484781612e-06, "loss": 3.9771, "step": 24430 }, { "epoch": 8.138252685933205, "grad_norm": 0.79296875, "learning_rate": 1.3194089219858848e-06, "loss": 4.0003, "step": 24431 }, { "epoch": 8.138585824935454, "grad_norm": 0.7734375, "learning_rate": 1.3189504311806546e-06, "loss": 3.9783, "step": 24432 }, { "epoch": 8.138918963937703, "grad_norm": 0.76953125, "learning_rate": 1.3184920123712622e-06, "loss": 4.0268, "step": 24433 }, { "epoch": 8.139252102939952, "grad_norm": 0.7578125, "learning_rate": 1.3180336655630503e-06, "loss": 4.078, "step": 24434 }, { "epoch": 8.1395852419422, "grad_norm": 0.8671875, "learning_rate": 1.3175753907613562e-06, "loss": 3.969, "step": 24435 }, { "epoch": 8.139918380944449, "grad_norm": 0.80078125, "learning_rate": 1.3171171879715174e-06, "loss": 3.959, "step": 24436 }, { "epoch": 8.140251519946698, "grad_norm": 0.7421875, "learning_rate": 1.31665905719887e-06, "loss": 3.97, "step": 24437 }, { "epoch": 8.140584658948946, "grad_norm": 0.765625, "learning_rate": 1.3162009984487532e-06, "loss": 4.0348, "step": 24438 }, { "epoch": 8.140917797951195, "grad_norm": 0.79296875, "learning_rate": 1.3157430117265002e-06, "loss": 3.927, "step": 24439 }, { "epoch": 8.141250936953444, "grad_norm": 0.74609375, "learning_rate": 1.3152850970374464e-06, "loss": 4.0151, "step": 24440 }, { "epoch": 8.141584075955693, "grad_norm": 0.77734375, "learning_rate": 1.3148272543869251e-06, "loss": 4.0094, "step": 24441 }, { "epoch": 8.14191721495794, "grad_norm": 0.7578125, "learning_rate": 1.3143694837802692e-06, "loss": 3.9648, "step": 24442 }, { "epoch": 8.14225035396019, "grad_norm": 0.71484375, "learning_rate": 1.3139117852228104e-06, "loss": 3.9538, "step": 24443 }, { "epoch": 8.142583492962439, "grad_norm": 0.765625, "learning_rate": 1.3134541587198793e-06, "loss": 4.0275, "step": 24444 }, { "epoch": 8.142916631964686, "grad_norm": 0.828125, "learning_rate": 1.3129966042768086e-06, "loss": 3.9875, "step": 24445 }, { "epoch": 8.143249770966936, "grad_norm": 0.80859375, "learning_rate": 1.3125391218989267e-06, "loss": 3.9291, "step": 24446 }, { "epoch": 8.143582909969185, "grad_norm": 0.79296875, "learning_rate": 1.3120817115915618e-06, "loss": 3.9334, "step": 24447 }, { "epoch": 8.143916048971434, "grad_norm": 0.7734375, "learning_rate": 1.3116243733600397e-06, "loss": 3.998, "step": 24448 }, { "epoch": 8.144249187973681, "grad_norm": 0.734375, "learning_rate": 1.3111671072096936e-06, "loss": 4.0848, "step": 24449 }, { "epoch": 8.14458232697593, "grad_norm": 0.75390625, "learning_rate": 1.3107099131458437e-06, "loss": 3.9586, "step": 24450 }, { "epoch": 8.14491546597818, "grad_norm": 0.71484375, "learning_rate": 1.310252791173816e-06, "loss": 3.9706, "step": 24451 }, { "epoch": 8.145248604980429, "grad_norm": 0.7578125, "learning_rate": 1.3097957412989375e-06, "loss": 4.025, "step": 24452 }, { "epoch": 8.145581743982676, "grad_norm": 0.77734375, "learning_rate": 1.3093387635265314e-06, "loss": 3.9273, "step": 24453 }, { "epoch": 8.145914882984925, "grad_norm": 0.7265625, "learning_rate": 1.3088818578619191e-06, "loss": 3.9953, "step": 24454 }, { "epoch": 8.146248021987175, "grad_norm": 0.76953125, "learning_rate": 1.3084250243104215e-06, "loss": 4.0078, "step": 24455 }, { "epoch": 8.146581160989422, "grad_norm": 0.7578125, "learning_rate": 1.3079682628773634e-06, "loss": 3.9968, "step": 24456 }, { "epoch": 8.146914299991671, "grad_norm": 0.75390625, "learning_rate": 1.307511573568064e-06, "loss": 4.0333, "step": 24457 }, { "epoch": 8.14724743899392, "grad_norm": 0.71875, "learning_rate": 1.3070549563878405e-06, "loss": 3.9735, "step": 24458 }, { "epoch": 8.14758057799617, "grad_norm": 0.765625, "learning_rate": 1.3065984113420132e-06, "loss": 3.9684, "step": 24459 }, { "epoch": 8.147913716998417, "grad_norm": 0.734375, "learning_rate": 1.3061419384359e-06, "loss": 3.949, "step": 24460 }, { "epoch": 8.148246856000666, "grad_norm": 0.71875, "learning_rate": 1.3056855376748167e-06, "loss": 4.0664, "step": 24461 }, { "epoch": 8.148579995002915, "grad_norm": 0.75, "learning_rate": 1.305229209064079e-06, "loss": 3.9454, "step": 24462 }, { "epoch": 8.148913134005163, "grad_norm": 0.7890625, "learning_rate": 1.3047729526090047e-06, "loss": 3.9137, "step": 24463 }, { "epoch": 8.149246273007412, "grad_norm": 0.75390625, "learning_rate": 1.3043167683149065e-06, "loss": 3.9912, "step": 24464 }, { "epoch": 8.149579412009661, "grad_norm": 0.7734375, "learning_rate": 1.303860656187098e-06, "loss": 3.9037, "step": 24465 }, { "epoch": 8.14991255101191, "grad_norm": 0.7734375, "learning_rate": 1.3034046162308915e-06, "loss": 3.9145, "step": 24466 }, { "epoch": 8.150245690014158, "grad_norm": 0.81640625, "learning_rate": 1.3029486484516004e-06, "loss": 3.9271, "step": 24467 }, { "epoch": 8.150578829016407, "grad_norm": 0.78515625, "learning_rate": 1.3024927528545345e-06, "loss": 3.981, "step": 24468 }, { "epoch": 8.150911968018656, "grad_norm": 0.7578125, "learning_rate": 1.3020369294450044e-06, "loss": 3.9745, "step": 24469 }, { "epoch": 8.151245107020905, "grad_norm": 0.734375, "learning_rate": 1.3015811782283167e-06, "loss": 3.9704, "step": 24470 }, { "epoch": 8.151578246023153, "grad_norm": 0.71875, "learning_rate": 1.301125499209785e-06, "loss": 3.953, "step": 24471 }, { "epoch": 8.151911385025402, "grad_norm": 0.75390625, "learning_rate": 1.300669892394715e-06, "loss": 4.0714, "step": 24472 }, { "epoch": 8.152244524027651, "grad_norm": 0.73046875, "learning_rate": 1.300214357788412e-06, "loss": 4.0329, "step": 24473 }, { "epoch": 8.152577663029899, "grad_norm": 0.7578125, "learning_rate": 1.2997588953961813e-06, "loss": 3.9981, "step": 24474 }, { "epoch": 8.152910802032148, "grad_norm": 0.71484375, "learning_rate": 1.299303505223332e-06, "loss": 4.0133, "step": 24475 }, { "epoch": 8.153243941034397, "grad_norm": 0.7265625, "learning_rate": 1.2988481872751681e-06, "loss": 4.0706, "step": 24476 }, { "epoch": 8.153577080036646, "grad_norm": 0.76171875, "learning_rate": 1.298392941556987e-06, "loss": 3.9877, "step": 24477 }, { "epoch": 8.153910219038893, "grad_norm": 0.75390625, "learning_rate": 1.297937768074098e-06, "loss": 3.9846, "step": 24478 }, { "epoch": 8.154243358041143, "grad_norm": 0.80078125, "learning_rate": 1.2974826668317996e-06, "loss": 3.9841, "step": 24479 }, { "epoch": 8.154576497043392, "grad_norm": 0.7109375, "learning_rate": 1.297027637835394e-06, "loss": 4.0762, "step": 24480 }, { "epoch": 8.15490963604564, "grad_norm": 0.76953125, "learning_rate": 1.2965726810901793e-06, "loss": 4.0, "step": 24481 }, { "epoch": 8.155242775047888, "grad_norm": 0.75390625, "learning_rate": 1.296117796601457e-06, "loss": 4.0027, "step": 24482 }, { "epoch": 8.155575914050138, "grad_norm": 0.78515625, "learning_rate": 1.2956629843745255e-06, "loss": 3.9768, "step": 24483 }, { "epoch": 8.155909053052387, "grad_norm": 0.7734375, "learning_rate": 1.295208244414682e-06, "loss": 3.9603, "step": 24484 }, { "epoch": 8.156242192054634, "grad_norm": 0.74609375, "learning_rate": 1.2947535767272225e-06, "loss": 3.9531, "step": 24485 }, { "epoch": 8.156575331056883, "grad_norm": 0.74609375, "learning_rate": 1.2942989813174436e-06, "loss": 4.0606, "step": 24486 }, { "epoch": 8.156908470059133, "grad_norm": 0.74609375, "learning_rate": 1.293844458190639e-06, "loss": 4.01, "step": 24487 }, { "epoch": 8.15724160906138, "grad_norm": 0.78125, "learning_rate": 1.2933900073521027e-06, "loss": 3.98, "step": 24488 }, { "epoch": 8.15757474806363, "grad_norm": 0.74609375, "learning_rate": 1.2929356288071313e-06, "loss": 3.9932, "step": 24489 }, { "epoch": 8.157907887065878, "grad_norm": 0.7734375, "learning_rate": 1.2924813225610155e-06, "loss": 3.9128, "step": 24490 }, { "epoch": 8.158241026068128, "grad_norm": 0.76171875, "learning_rate": 1.2920270886190471e-06, "loss": 3.9818, "step": 24491 }, { "epoch": 8.158574165070375, "grad_norm": 0.75390625, "learning_rate": 1.291572926986514e-06, "loss": 3.9838, "step": 24492 }, { "epoch": 8.158907304072624, "grad_norm": 0.78515625, "learning_rate": 1.2911188376687108e-06, "loss": 3.9852, "step": 24493 }, { "epoch": 8.159240443074873, "grad_norm": 0.7578125, "learning_rate": 1.2906648206709277e-06, "loss": 3.9428, "step": 24494 }, { "epoch": 8.159573582077122, "grad_norm": 0.7734375, "learning_rate": 1.290210875998445e-06, "loss": 4.0178, "step": 24495 }, { "epoch": 8.15990672107937, "grad_norm": 0.7109375, "learning_rate": 1.2897570036565576e-06, "loss": 3.9739, "step": 24496 }, { "epoch": 8.160239860081619, "grad_norm": 0.82421875, "learning_rate": 1.2893032036505498e-06, "loss": 3.9573, "step": 24497 }, { "epoch": 8.160572999083868, "grad_norm": 0.7265625, "learning_rate": 1.2888494759857075e-06, "loss": 3.912, "step": 24498 }, { "epoch": 8.160906138086116, "grad_norm": 0.796875, "learning_rate": 1.2883958206673132e-06, "loss": 3.9405, "step": 24499 }, { "epoch": 8.161239277088365, "grad_norm": 0.75, "learning_rate": 1.2879422377006553e-06, "loss": 4.0931, "step": 24500 }, { "epoch": 8.161572416090614, "grad_norm": 0.7578125, "learning_rate": 1.2874887270910152e-06, "loss": 3.9237, "step": 24501 }, { "epoch": 8.161905555092863, "grad_norm": 0.75390625, "learning_rate": 1.2870352888436745e-06, "loss": 3.9858, "step": 24502 }, { "epoch": 8.16223869409511, "grad_norm": 0.73828125, "learning_rate": 1.2865819229639158e-06, "loss": 3.933, "step": 24503 }, { "epoch": 8.16257183309736, "grad_norm": 0.76953125, "learning_rate": 1.2861286294570198e-06, "loss": 3.9396, "step": 24504 }, { "epoch": 8.162904972099609, "grad_norm": 0.765625, "learning_rate": 1.2856754083282657e-06, "loss": 4.037, "step": 24505 }, { "epoch": 8.163238111101856, "grad_norm": 0.78515625, "learning_rate": 1.2852222595829316e-06, "loss": 3.9316, "step": 24506 }, { "epoch": 8.163571250104106, "grad_norm": 0.7578125, "learning_rate": 1.2847691832262983e-06, "loss": 4.0232, "step": 24507 }, { "epoch": 8.163904389106355, "grad_norm": 0.7734375, "learning_rate": 1.2843161792636418e-06, "loss": 3.9231, "step": 24508 }, { "epoch": 8.164237528108604, "grad_norm": 0.7578125, "learning_rate": 1.2838632477002384e-06, "loss": 3.9163, "step": 24509 }, { "epoch": 8.164570667110851, "grad_norm": 0.80859375, "learning_rate": 1.2834103885413625e-06, "loss": 3.9883, "step": 24510 }, { "epoch": 8.1649038061131, "grad_norm": 0.80078125, "learning_rate": 1.282957601792293e-06, "loss": 3.915, "step": 24511 }, { "epoch": 8.16523694511535, "grad_norm": 0.71875, "learning_rate": 1.2825048874583027e-06, "loss": 3.9544, "step": 24512 }, { "epoch": 8.165570084117599, "grad_norm": 0.765625, "learning_rate": 1.2820522455446609e-06, "loss": 4.0292, "step": 24513 }, { "epoch": 8.165903223119846, "grad_norm": 0.78125, "learning_rate": 1.2815996760566415e-06, "loss": 3.9219, "step": 24514 }, { "epoch": 8.166236362122095, "grad_norm": 0.75390625, "learning_rate": 1.2811471789995183e-06, "loss": 4.0101, "step": 24515 }, { "epoch": 8.166569501124345, "grad_norm": 0.828125, "learning_rate": 1.2806947543785605e-06, "loss": 3.9571, "step": 24516 }, { "epoch": 8.166902640126592, "grad_norm": 0.76953125, "learning_rate": 1.2802424021990372e-06, "loss": 3.9961, "step": 24517 }, { "epoch": 8.167235779128841, "grad_norm": 0.7890625, "learning_rate": 1.2797901224662166e-06, "loss": 3.8809, "step": 24518 }, { "epoch": 8.16756891813109, "grad_norm": 0.734375, "learning_rate": 1.2793379151853706e-06, "loss": 4.043, "step": 24519 }, { "epoch": 8.16790205713334, "grad_norm": 0.7734375, "learning_rate": 1.2788857803617631e-06, "loss": 4.0919, "step": 24520 }, { "epoch": 8.168235196135587, "grad_norm": 0.7578125, "learning_rate": 1.2784337180006616e-06, "loss": 3.9928, "step": 24521 }, { "epoch": 8.168568335137836, "grad_norm": 0.73046875, "learning_rate": 1.277981728107331e-06, "loss": 4.0653, "step": 24522 }, { "epoch": 8.168901474140085, "grad_norm": 0.76171875, "learning_rate": 1.2775298106870373e-06, "loss": 3.9273, "step": 24523 }, { "epoch": 8.169234613142333, "grad_norm": 0.75, "learning_rate": 1.2770779657450426e-06, "loss": 3.9481, "step": 24524 }, { "epoch": 8.169567752144582, "grad_norm": 0.73828125, "learning_rate": 1.2766261932866097e-06, "loss": 3.947, "step": 24525 }, { "epoch": 8.169900891146831, "grad_norm": 0.76171875, "learning_rate": 1.2761744933170032e-06, "loss": 4.0601, "step": 24526 }, { "epoch": 8.17023403014908, "grad_norm": 0.75, "learning_rate": 1.2757228658414827e-06, "loss": 3.9437, "step": 24527 }, { "epoch": 8.170567169151328, "grad_norm": 0.78515625, "learning_rate": 1.2752713108653094e-06, "loss": 3.9487, "step": 24528 }, { "epoch": 8.170900308153577, "grad_norm": 0.79296875, "learning_rate": 1.2748198283937426e-06, "loss": 3.9612, "step": 24529 }, { "epoch": 8.171233447155826, "grad_norm": 0.76953125, "learning_rate": 1.2743684184320415e-06, "loss": 4.0224, "step": 24530 }, { "epoch": 8.171566586158075, "grad_norm": 0.78125, "learning_rate": 1.2739170809854632e-06, "loss": 3.9888, "step": 24531 }, { "epoch": 8.171899725160323, "grad_norm": 0.75390625, "learning_rate": 1.2734658160592633e-06, "loss": 3.9899, "step": 24532 }, { "epoch": 8.172232864162572, "grad_norm": 0.80859375, "learning_rate": 1.273014623658702e-06, "loss": 3.9756, "step": 24533 }, { "epoch": 8.172566003164821, "grad_norm": 0.79296875, "learning_rate": 1.272563503789033e-06, "loss": 3.9434, "step": 24534 }, { "epoch": 8.172899142167068, "grad_norm": 0.78515625, "learning_rate": 1.2721124564555101e-06, "loss": 3.9671, "step": 24535 }, { "epoch": 8.173232281169318, "grad_norm": 0.8359375, "learning_rate": 1.271661481663386e-06, "loss": 3.9823, "step": 24536 }, { "epoch": 8.173565420171567, "grad_norm": 0.7734375, "learning_rate": 1.271210579417917e-06, "loss": 4.0175, "step": 24537 }, { "epoch": 8.173898559173816, "grad_norm": 0.73828125, "learning_rate": 1.2707597497243546e-06, "loss": 3.992, "step": 24538 }, { "epoch": 8.174231698176063, "grad_norm": 0.73828125, "learning_rate": 1.2703089925879454e-06, "loss": 3.9756, "step": 24539 }, { "epoch": 8.174564837178313, "grad_norm": 0.796875, "learning_rate": 1.2698583080139441e-06, "loss": 4.0315, "step": 24540 }, { "epoch": 8.174897976180562, "grad_norm": 0.7734375, "learning_rate": 1.2694076960075998e-06, "loss": 4.0542, "step": 24541 }, { "epoch": 8.17523111518281, "grad_norm": 0.78125, "learning_rate": 1.2689571565741599e-06, "loss": 3.9853, "step": 24542 }, { "epoch": 8.175564254185058, "grad_norm": 0.80078125, "learning_rate": 1.2685066897188716e-06, "loss": 4.012, "step": 24543 }, { "epoch": 8.175897393187308, "grad_norm": 0.7734375, "learning_rate": 1.2680562954469835e-06, "loss": 3.9806, "step": 24544 }, { "epoch": 8.176230532189557, "grad_norm": 0.76171875, "learning_rate": 1.2676059737637422e-06, "loss": 3.9345, "step": 24545 }, { "epoch": 8.176563671191804, "grad_norm": 0.73828125, "learning_rate": 1.2671557246743918e-06, "loss": 3.9968, "step": 24546 }, { "epoch": 8.176896810194053, "grad_norm": 0.80859375, "learning_rate": 1.2667055481841772e-06, "loss": 3.9482, "step": 24547 }, { "epoch": 8.177229949196303, "grad_norm": 0.76953125, "learning_rate": 1.2662554442983409e-06, "loss": 3.9564, "step": 24548 }, { "epoch": 8.177563088198552, "grad_norm": 0.73828125, "learning_rate": 1.2658054130221271e-06, "loss": 3.9718, "step": 24549 }, { "epoch": 8.1778962272008, "grad_norm": 0.76171875, "learning_rate": 1.2653554543607742e-06, "loss": 4.0498, "step": 24550 }, { "epoch": 8.178229366203048, "grad_norm": 0.76171875, "learning_rate": 1.2649055683195288e-06, "loss": 4.0742, "step": 24551 }, { "epoch": 8.178562505205297, "grad_norm": 0.7890625, "learning_rate": 1.2644557549036282e-06, "loss": 3.9111, "step": 24552 }, { "epoch": 8.178895644207545, "grad_norm": 0.75, "learning_rate": 1.2640060141183115e-06, "loss": 3.9903, "step": 24553 }, { "epoch": 8.179228783209794, "grad_norm": 0.734375, "learning_rate": 1.2635563459688166e-06, "loss": 3.9455, "step": 24554 }, { "epoch": 8.179561922212043, "grad_norm": 0.74609375, "learning_rate": 1.263106750460383e-06, "loss": 3.9417, "step": 24555 }, { "epoch": 8.179895061214292, "grad_norm": 0.75, "learning_rate": 1.2626572275982492e-06, "loss": 3.9332, "step": 24556 }, { "epoch": 8.18022820021654, "grad_norm": 0.76171875, "learning_rate": 1.2622077773876462e-06, "loss": 3.8767, "step": 24557 }, { "epoch": 8.180561339218789, "grad_norm": 0.73828125, "learning_rate": 1.2617583998338102e-06, "loss": 4.0632, "step": 24558 }, { "epoch": 8.180894478221038, "grad_norm": 0.8515625, "learning_rate": 1.2613090949419784e-06, "loss": 3.9531, "step": 24559 }, { "epoch": 8.181227617223286, "grad_norm": 0.8203125, "learning_rate": 1.2608598627173829e-06, "loss": 3.9105, "step": 24560 }, { "epoch": 8.181560756225535, "grad_norm": 0.76953125, "learning_rate": 1.2604107031652553e-06, "loss": 4.0689, "step": 24561 }, { "epoch": 8.181893895227784, "grad_norm": 0.75390625, "learning_rate": 1.2599616162908276e-06, "loss": 3.9003, "step": 24562 }, { "epoch": 8.182227034230033, "grad_norm": 0.80078125, "learning_rate": 1.259512602099332e-06, "loss": 4.0477, "step": 24563 }, { "epoch": 8.18256017323228, "grad_norm": 0.8046875, "learning_rate": 1.2590636605959976e-06, "loss": 3.9039, "step": 24564 }, { "epoch": 8.18289331223453, "grad_norm": 0.7734375, "learning_rate": 1.2586147917860547e-06, "loss": 4.02, "step": 24565 }, { "epoch": 8.183226451236779, "grad_norm": 0.7421875, "learning_rate": 1.2581659956747296e-06, "loss": 3.9599, "step": 24566 }, { "epoch": 8.183559590239028, "grad_norm": 0.8125, "learning_rate": 1.2577172722672516e-06, "loss": 3.8857, "step": 24567 }, { "epoch": 8.183892729241276, "grad_norm": 0.7734375, "learning_rate": 1.257268621568846e-06, "loss": 3.9608, "step": 24568 }, { "epoch": 8.184225868243525, "grad_norm": 0.78125, "learning_rate": 1.256820043584738e-06, "loss": 3.9532, "step": 24569 }, { "epoch": 8.184559007245774, "grad_norm": 0.76171875, "learning_rate": 1.2563715383201554e-06, "loss": 4.0163, "step": 24570 }, { "epoch": 8.184892146248021, "grad_norm": 0.75, "learning_rate": 1.25592310578032e-06, "loss": 3.9652, "step": 24571 }, { "epoch": 8.18522528525027, "grad_norm": 0.80078125, "learning_rate": 1.2554747459704565e-06, "loss": 3.9382, "step": 24572 }, { "epoch": 8.18555842425252, "grad_norm": 0.765625, "learning_rate": 1.2550264588957845e-06, "loss": 3.9645, "step": 24573 }, { "epoch": 8.185891563254769, "grad_norm": 0.71875, "learning_rate": 1.2545782445615317e-06, "loss": 4.0494, "step": 24574 }, { "epoch": 8.186224702257016, "grad_norm": 0.79296875, "learning_rate": 1.2541301029729128e-06, "loss": 3.9487, "step": 24575 }, { "epoch": 8.186557841259265, "grad_norm": 0.7109375, "learning_rate": 1.2536820341351473e-06, "loss": 4.0493, "step": 24576 }, { "epoch": 8.186890980261515, "grad_norm": 0.7734375, "learning_rate": 1.253234038053459e-06, "loss": 3.9316, "step": 24577 }, { "epoch": 8.187224119263762, "grad_norm": 0.796875, "learning_rate": 1.2527861147330635e-06, "loss": 3.9763, "step": 24578 }, { "epoch": 8.187557258266011, "grad_norm": 0.765625, "learning_rate": 1.2523382641791792e-06, "loss": 3.9932, "step": 24579 }, { "epoch": 8.18789039726826, "grad_norm": 0.76171875, "learning_rate": 1.2518904863970186e-06, "loss": 4.0769, "step": 24580 }, { "epoch": 8.18822353627051, "grad_norm": 0.76953125, "learning_rate": 1.251442781391803e-06, "loss": 3.9103, "step": 24581 }, { "epoch": 8.188556675272757, "grad_norm": 0.75, "learning_rate": 1.2509951491687447e-06, "loss": 3.9726, "step": 24582 }, { "epoch": 8.188889814275006, "grad_norm": 0.7734375, "learning_rate": 1.2505475897330565e-06, "loss": 3.9919, "step": 24583 }, { "epoch": 8.189222953277255, "grad_norm": 0.7421875, "learning_rate": 1.2501001030899536e-06, "loss": 3.9823, "step": 24584 }, { "epoch": 8.189556092279503, "grad_norm": 0.8125, "learning_rate": 1.249652689244647e-06, "loss": 3.9662, "step": 24585 }, { "epoch": 8.189889231281752, "grad_norm": 0.765625, "learning_rate": 1.249205348202349e-06, "loss": 4.0987, "step": 24586 }, { "epoch": 8.190222370284001, "grad_norm": 0.8203125, "learning_rate": 1.2487580799682667e-06, "loss": 3.9598, "step": 24587 }, { "epoch": 8.19055550928625, "grad_norm": 0.78515625, "learning_rate": 1.2483108845476143e-06, "loss": 3.9308, "step": 24588 }, { "epoch": 8.190888648288498, "grad_norm": 0.734375, "learning_rate": 1.2478637619455995e-06, "loss": 3.9822, "step": 24589 }, { "epoch": 8.191221787290747, "grad_norm": 0.7734375, "learning_rate": 1.2474167121674302e-06, "loss": 4.037, "step": 24590 }, { "epoch": 8.191554926292996, "grad_norm": 0.78515625, "learning_rate": 1.2469697352183101e-06, "loss": 3.9669, "step": 24591 }, { "epoch": 8.191888065295245, "grad_norm": 0.75390625, "learning_rate": 1.246522831103453e-06, "loss": 3.9702, "step": 24592 }, { "epoch": 8.192221204297493, "grad_norm": 0.71875, "learning_rate": 1.2460759998280584e-06, "loss": 3.9337, "step": 24593 }, { "epoch": 8.192554343299742, "grad_norm": 0.74609375, "learning_rate": 1.2456292413973303e-06, "loss": 3.9942, "step": 24594 }, { "epoch": 8.192887482301991, "grad_norm": 0.76171875, "learning_rate": 1.2451825558164765e-06, "loss": 4.0233, "step": 24595 }, { "epoch": 8.193220621304238, "grad_norm": 0.765625, "learning_rate": 1.244735943090697e-06, "loss": 3.9233, "step": 24596 }, { "epoch": 8.193553760306488, "grad_norm": 0.734375, "learning_rate": 1.2442894032251965e-06, "loss": 3.9479, "step": 24597 }, { "epoch": 8.193886899308737, "grad_norm": 0.7578125, "learning_rate": 1.2438429362251718e-06, "loss": 3.9739, "step": 24598 }, { "epoch": 8.194220038310986, "grad_norm": 0.78515625, "learning_rate": 1.2433965420958276e-06, "loss": 3.9047, "step": 24599 }, { "epoch": 8.194553177313233, "grad_norm": 0.76171875, "learning_rate": 1.2429502208423629e-06, "loss": 4.0623, "step": 24600 }, { "epoch": 8.194886316315483, "grad_norm": 0.76953125, "learning_rate": 1.242503972469976e-06, "loss": 3.9954, "step": 24601 }, { "epoch": 8.195219455317732, "grad_norm": 0.76953125, "learning_rate": 1.2420577969838611e-06, "loss": 3.9702, "step": 24602 }, { "epoch": 8.19555259431998, "grad_norm": 0.8125, "learning_rate": 1.24161169438922e-06, "loss": 4.0273, "step": 24603 }, { "epoch": 8.195885733322228, "grad_norm": 0.74609375, "learning_rate": 1.2411656646912475e-06, "loss": 4.0373, "step": 24604 }, { "epoch": 8.196218872324478, "grad_norm": 0.78515625, "learning_rate": 1.240719707895138e-06, "loss": 4.0062, "step": 24605 }, { "epoch": 8.196552011326727, "grad_norm": 0.7734375, "learning_rate": 1.2402738240060846e-06, "loss": 3.9834, "step": 24606 }, { "epoch": 8.196885150328974, "grad_norm": 0.75, "learning_rate": 1.239828013029284e-06, "loss": 3.9859, "step": 24607 }, { "epoch": 8.197218289331223, "grad_norm": 0.78515625, "learning_rate": 1.2393822749699281e-06, "loss": 3.9736, "step": 24608 }, { "epoch": 8.197551428333473, "grad_norm": 0.76953125, "learning_rate": 1.2389366098332077e-06, "loss": 4.0281, "step": 24609 }, { "epoch": 8.197884567335722, "grad_norm": 0.71484375, "learning_rate": 1.2384910176243145e-06, "loss": 4.0256, "step": 24610 }, { "epoch": 8.198217706337969, "grad_norm": 0.7578125, "learning_rate": 1.238045498348439e-06, "loss": 3.9973, "step": 24611 }, { "epoch": 8.198550845340218, "grad_norm": 0.73828125, "learning_rate": 1.2376000520107692e-06, "loss": 3.9568, "step": 24612 }, { "epoch": 8.198883984342467, "grad_norm": 0.7421875, "learning_rate": 1.2371546786164931e-06, "loss": 3.9688, "step": 24613 }, { "epoch": 8.199217123344715, "grad_norm": 0.78125, "learning_rate": 1.2367093781708017e-06, "loss": 4.0302, "step": 24614 }, { "epoch": 8.199550262346964, "grad_norm": 0.74609375, "learning_rate": 1.2362641506788799e-06, "loss": 3.9815, "step": 24615 }, { "epoch": 8.199883401349213, "grad_norm": 0.75390625, "learning_rate": 1.2358189961459128e-06, "loss": 4.0002, "step": 24616 }, { "epoch": 8.200216540351462, "grad_norm": 0.75390625, "learning_rate": 1.235373914577085e-06, "loss": 4.0225, "step": 24617 }, { "epoch": 8.20054967935371, "grad_norm": 0.76953125, "learning_rate": 1.234928905977584e-06, "loss": 3.9202, "step": 24618 }, { "epoch": 8.200882818355959, "grad_norm": 0.7421875, "learning_rate": 1.234483970352592e-06, "loss": 4.0114, "step": 24619 }, { "epoch": 8.201215957358208, "grad_norm": 0.79296875, "learning_rate": 1.2340391077072882e-06, "loss": 3.9449, "step": 24620 }, { "epoch": 8.201549096360456, "grad_norm": 0.76953125, "learning_rate": 1.2335943180468575e-06, "loss": 3.9622, "step": 24621 }, { "epoch": 8.201882235362705, "grad_norm": 0.73828125, "learning_rate": 1.23314960137648e-06, "loss": 3.9691, "step": 24622 }, { "epoch": 8.202215374364954, "grad_norm": 0.73046875, "learning_rate": 1.2327049577013367e-06, "loss": 3.9087, "step": 24623 }, { "epoch": 8.202548513367203, "grad_norm": 0.76171875, "learning_rate": 1.232260387026603e-06, "loss": 3.9987, "step": 24624 }, { "epoch": 8.20288165236945, "grad_norm": 0.7890625, "learning_rate": 1.2318158893574618e-06, "loss": 3.9311, "step": 24625 }, { "epoch": 8.2032147913717, "grad_norm": 0.75390625, "learning_rate": 1.231371464699089e-06, "loss": 3.9903, "step": 24626 }, { "epoch": 8.203547930373949, "grad_norm": 0.7578125, "learning_rate": 1.230927113056661e-06, "loss": 3.9391, "step": 24627 }, { "epoch": 8.203881069376198, "grad_norm": 0.765625, "learning_rate": 1.2304828344353522e-06, "loss": 3.9633, "step": 24628 }, { "epoch": 8.204214208378446, "grad_norm": 0.76171875, "learning_rate": 1.2300386288403401e-06, "loss": 3.9812, "step": 24629 }, { "epoch": 8.204547347380695, "grad_norm": 0.765625, "learning_rate": 1.2295944962767964e-06, "loss": 4.0091, "step": 24630 }, { "epoch": 8.204880486382944, "grad_norm": 0.72265625, "learning_rate": 1.2291504367498935e-06, "loss": 3.9695, "step": 24631 }, { "epoch": 8.205213625385191, "grad_norm": 0.8125, "learning_rate": 1.2287064502648065e-06, "loss": 3.8751, "step": 24632 }, { "epoch": 8.20554676438744, "grad_norm": 0.75390625, "learning_rate": 1.2282625368267065e-06, "loss": 3.983, "step": 24633 }, { "epoch": 8.20587990338969, "grad_norm": 0.79296875, "learning_rate": 1.227818696440764e-06, "loss": 3.9558, "step": 24634 }, { "epoch": 8.206213042391939, "grad_norm": 0.78125, "learning_rate": 1.227374929112145e-06, "loss": 3.936, "step": 24635 }, { "epoch": 8.206546181394186, "grad_norm": 0.78125, "learning_rate": 1.2269312348460246e-06, "loss": 4.0002, "step": 24636 }, { "epoch": 8.206879320396435, "grad_norm": 0.76171875, "learning_rate": 1.2264876136475686e-06, "loss": 4.0408, "step": 24637 }, { "epoch": 8.207212459398685, "grad_norm": 0.7578125, "learning_rate": 1.2260440655219405e-06, "loss": 3.9592, "step": 24638 }, { "epoch": 8.207545598400932, "grad_norm": 0.79296875, "learning_rate": 1.2256005904743118e-06, "loss": 3.9698, "step": 24639 }, { "epoch": 8.207878737403181, "grad_norm": 0.80078125, "learning_rate": 1.225157188509845e-06, "loss": 3.9828, "step": 24640 }, { "epoch": 8.20821187640543, "grad_norm": 0.77734375, "learning_rate": 1.2247138596337063e-06, "loss": 3.9379, "step": 24641 }, { "epoch": 8.20854501540768, "grad_norm": 0.76953125, "learning_rate": 1.224270603851057e-06, "loss": 3.9301, "step": 24642 }, { "epoch": 8.208878154409927, "grad_norm": 0.8125, "learning_rate": 1.2238274211670634e-06, "loss": 3.9414, "step": 24643 }, { "epoch": 8.209211293412176, "grad_norm": 0.8125, "learning_rate": 1.2233843115868866e-06, "loss": 4.0471, "step": 24644 }, { "epoch": 8.209544432414425, "grad_norm": 0.875, "learning_rate": 1.2229412751156874e-06, "loss": 3.9347, "step": 24645 }, { "epoch": 8.209877571416673, "grad_norm": 0.76953125, "learning_rate": 1.222498311758626e-06, "loss": 4.0193, "step": 24646 }, { "epoch": 8.210210710418922, "grad_norm": 0.72265625, "learning_rate": 1.2220554215208629e-06, "loss": 3.9578, "step": 24647 }, { "epoch": 8.210543849421171, "grad_norm": 0.76953125, "learning_rate": 1.2216126044075555e-06, "loss": 3.9917, "step": 24648 }, { "epoch": 8.21087698842342, "grad_norm": 0.77734375, "learning_rate": 1.2211698604238618e-06, "loss": 3.9887, "step": 24649 }, { "epoch": 8.211210127425668, "grad_norm": 0.71484375, "learning_rate": 1.2207271895749383e-06, "loss": 3.8872, "step": 24650 }, { "epoch": 8.211543266427917, "grad_norm": 0.75390625, "learning_rate": 1.220284591865944e-06, "loss": 3.9873, "step": 24651 }, { "epoch": 8.211876405430166, "grad_norm": 0.82421875, "learning_rate": 1.2198420673020318e-06, "loss": 4.004, "step": 24652 }, { "epoch": 8.212209544432415, "grad_norm": 0.8203125, "learning_rate": 1.2193996158883575e-06, "loss": 4.013, "step": 24653 }, { "epoch": 8.212542683434663, "grad_norm": 0.7578125, "learning_rate": 1.2189572376300715e-06, "loss": 3.9992, "step": 24654 }, { "epoch": 8.212875822436912, "grad_norm": 0.796875, "learning_rate": 1.2185149325323331e-06, "loss": 3.9073, "step": 24655 }, { "epoch": 8.213208961439161, "grad_norm": 0.828125, "learning_rate": 1.2180727006002873e-06, "loss": 3.9639, "step": 24656 }, { "epoch": 8.213542100441408, "grad_norm": 0.74609375, "learning_rate": 1.2176305418390869e-06, "loss": 4.0479, "step": 24657 }, { "epoch": 8.213875239443658, "grad_norm": 0.7265625, "learning_rate": 1.2171884562538848e-06, "loss": 3.9405, "step": 24658 }, { "epoch": 8.214208378445907, "grad_norm": 0.7734375, "learning_rate": 1.2167464438498283e-06, "loss": 4.0184, "step": 24659 }, { "epoch": 8.214541517448156, "grad_norm": 0.75, "learning_rate": 1.216304504632067e-06, "loss": 4.0117, "step": 24660 }, { "epoch": 8.214874656450403, "grad_norm": 0.74609375, "learning_rate": 1.2158626386057453e-06, "loss": 3.9243, "step": 24661 }, { "epoch": 8.215207795452653, "grad_norm": 0.8046875, "learning_rate": 1.215420845776014e-06, "loss": 3.8975, "step": 24662 }, { "epoch": 8.215540934454902, "grad_norm": 0.79296875, "learning_rate": 1.2149791261480185e-06, "loss": 3.9878, "step": 24663 }, { "epoch": 8.21587407345715, "grad_norm": 0.796875, "learning_rate": 1.2145374797269015e-06, "loss": 3.9495, "step": 24664 }, { "epoch": 8.216207212459398, "grad_norm": 0.7734375, "learning_rate": 1.2140959065178096e-06, "loss": 3.9664, "step": 24665 }, { "epoch": 8.216540351461648, "grad_norm": 0.7890625, "learning_rate": 1.2136544065258848e-06, "loss": 3.9728, "step": 24666 }, { "epoch": 8.216873490463897, "grad_norm": 0.80078125, "learning_rate": 1.21321297975627e-06, "loss": 3.9898, "step": 24667 }, { "epoch": 8.217206629466144, "grad_norm": 0.765625, "learning_rate": 1.212771626214105e-06, "loss": 3.9583, "step": 24668 }, { "epoch": 8.217539768468393, "grad_norm": 0.75, "learning_rate": 1.2123303459045343e-06, "loss": 4.007, "step": 24669 }, { "epoch": 8.217872907470642, "grad_norm": 0.81640625, "learning_rate": 1.211889138832696e-06, "loss": 3.9476, "step": 24670 }, { "epoch": 8.218206046472892, "grad_norm": 0.734375, "learning_rate": 1.2114480050037288e-06, "loss": 4.0104, "step": 24671 }, { "epoch": 8.218539185475139, "grad_norm": 0.7734375, "learning_rate": 1.21100694442277e-06, "loss": 3.9821, "step": 24672 }, { "epoch": 8.218872324477388, "grad_norm": 0.765625, "learning_rate": 1.2105659570949615e-06, "loss": 3.9156, "step": 24673 }, { "epoch": 8.219205463479637, "grad_norm": 0.78515625, "learning_rate": 1.2101250430254357e-06, "loss": 4.0236, "step": 24674 }, { "epoch": 8.219538602481885, "grad_norm": 0.70703125, "learning_rate": 1.2096842022193268e-06, "loss": 4.0702, "step": 24675 }, { "epoch": 8.219871741484134, "grad_norm": 0.7109375, "learning_rate": 1.2092434346817743e-06, "loss": 4.0204, "step": 24676 }, { "epoch": 8.220204880486383, "grad_norm": 0.75, "learning_rate": 1.2088027404179097e-06, "loss": 4.0344, "step": 24677 }, { "epoch": 8.220538019488632, "grad_norm": 0.7734375, "learning_rate": 1.2083621194328674e-06, "loss": 3.9811, "step": 24678 }, { "epoch": 8.22087115849088, "grad_norm": 0.75, "learning_rate": 1.2079215717317762e-06, "loss": 3.9025, "step": 24679 }, { "epoch": 8.221204297493129, "grad_norm": 0.734375, "learning_rate": 1.2074810973197717e-06, "loss": 3.9863, "step": 24680 }, { "epoch": 8.221537436495378, "grad_norm": 0.8125, "learning_rate": 1.207040696201983e-06, "loss": 4.0158, "step": 24681 }, { "epoch": 8.221870575497626, "grad_norm": 0.76171875, "learning_rate": 1.2066003683835398e-06, "loss": 4.0843, "step": 24682 }, { "epoch": 8.222203714499875, "grad_norm": 0.8203125, "learning_rate": 1.2061601138695708e-06, "loss": 3.9989, "step": 24683 }, { "epoch": 8.222536853502124, "grad_norm": 0.765625, "learning_rate": 1.2057199326652036e-06, "loss": 4.03, "step": 24684 }, { "epoch": 8.222869992504373, "grad_norm": 0.796875, "learning_rate": 1.2052798247755664e-06, "loss": 3.926, "step": 24685 }, { "epoch": 8.22320313150662, "grad_norm": 0.7890625, "learning_rate": 1.2048397902057829e-06, "loss": 3.9579, "step": 24686 }, { "epoch": 8.22353627050887, "grad_norm": 0.78515625, "learning_rate": 1.204399828960982e-06, "loss": 3.8624, "step": 24687 }, { "epoch": 8.223869409511119, "grad_norm": 0.8359375, "learning_rate": 1.2039599410462873e-06, "loss": 4.0242, "step": 24688 }, { "epoch": 8.224202548513368, "grad_norm": 0.77734375, "learning_rate": 1.2035201264668223e-06, "loss": 4.0153, "step": 24689 }, { "epoch": 8.224535687515615, "grad_norm": 0.765625, "learning_rate": 1.2030803852277075e-06, "loss": 4.0061, "step": 24690 }, { "epoch": 8.224868826517865, "grad_norm": 0.7890625, "learning_rate": 1.2026407173340708e-06, "loss": 3.9278, "step": 24691 }, { "epoch": 8.225201965520114, "grad_norm": 0.73046875, "learning_rate": 1.2022011227910273e-06, "loss": 4.042, "step": 24692 }, { "epoch": 8.225535104522361, "grad_norm": 0.78125, "learning_rate": 1.2017616016037003e-06, "loss": 3.9714, "step": 24693 }, { "epoch": 8.22586824352461, "grad_norm": 0.7109375, "learning_rate": 1.2013221537772074e-06, "loss": 3.8739, "step": 24694 }, { "epoch": 8.22620138252686, "grad_norm": 0.74609375, "learning_rate": 1.2008827793166693e-06, "loss": 3.8894, "step": 24695 }, { "epoch": 8.226534521529109, "grad_norm": 0.75390625, "learning_rate": 1.2004434782272038e-06, "loss": 3.9938, "step": 24696 }, { "epoch": 8.226867660531356, "grad_norm": 0.80859375, "learning_rate": 1.2000042505139261e-06, "loss": 3.9481, "step": 24697 }, { "epoch": 8.227200799533605, "grad_norm": 0.7421875, "learning_rate": 1.1995650961819517e-06, "loss": 3.8988, "step": 24698 }, { "epoch": 8.227533938535855, "grad_norm": 0.75, "learning_rate": 1.1991260152363994e-06, "loss": 4.006, "step": 24699 }, { "epoch": 8.227867077538102, "grad_norm": 0.7578125, "learning_rate": 1.1986870076823825e-06, "loss": 3.9379, "step": 24700 }, { "epoch": 8.228200216540351, "grad_norm": 0.76171875, "learning_rate": 1.19824807352501e-06, "loss": 4.0008, "step": 24701 }, { "epoch": 8.2285333555426, "grad_norm": 0.8046875, "learning_rate": 1.1978092127693998e-06, "loss": 3.9802, "step": 24702 }, { "epoch": 8.22886649454485, "grad_norm": 0.7734375, "learning_rate": 1.197370425420662e-06, "loss": 3.9623, "step": 24703 }, { "epoch": 8.229199633547097, "grad_norm": 0.74609375, "learning_rate": 1.196931711483907e-06, "loss": 3.9394, "step": 24704 }, { "epoch": 8.229532772549346, "grad_norm": 0.7890625, "learning_rate": 1.196493070964243e-06, "loss": 3.9503, "step": 24705 }, { "epoch": 8.229865911551595, "grad_norm": 0.78125, "learning_rate": 1.196054503866784e-06, "loss": 4.0031, "step": 24706 }, { "epoch": 8.230199050553843, "grad_norm": 0.765625, "learning_rate": 1.1956160101966348e-06, "loss": 4.0067, "step": 24707 }, { "epoch": 8.230532189556092, "grad_norm": 0.73046875, "learning_rate": 1.1951775899589043e-06, "loss": 3.9966, "step": 24708 }, { "epoch": 8.230865328558341, "grad_norm": 0.78515625, "learning_rate": 1.194739243158698e-06, "loss": 4.0537, "step": 24709 }, { "epoch": 8.23119846756059, "grad_norm": 0.80078125, "learning_rate": 1.194300969801123e-06, "loss": 3.9933, "step": 24710 }, { "epoch": 8.231531606562838, "grad_norm": 0.765625, "learning_rate": 1.1938627698912835e-06, "loss": 3.9511, "step": 24711 }, { "epoch": 8.231864745565087, "grad_norm": 0.72265625, "learning_rate": 1.1934246434342818e-06, "loss": 3.9452, "step": 24712 }, { "epoch": 8.232197884567336, "grad_norm": 0.78125, "learning_rate": 1.192986590435225e-06, "loss": 4.0292, "step": 24713 }, { "epoch": 8.232531023569585, "grad_norm": 0.765625, "learning_rate": 1.1925486108992129e-06, "loss": 3.9664, "step": 24714 }, { "epoch": 8.232864162571833, "grad_norm": 0.77734375, "learning_rate": 1.192110704831348e-06, "loss": 3.915, "step": 24715 }, { "epoch": 8.233197301574082, "grad_norm": 0.80078125, "learning_rate": 1.1916728722367284e-06, "loss": 3.9485, "step": 24716 }, { "epoch": 8.233530440576331, "grad_norm": 0.77734375, "learning_rate": 1.191235113120458e-06, "loss": 3.9795, "step": 24717 }, { "epoch": 8.233863579578578, "grad_norm": 0.75, "learning_rate": 1.190797427487636e-06, "loss": 3.9737, "step": 24718 }, { "epoch": 8.234196718580828, "grad_norm": 0.765625, "learning_rate": 1.1903598153433542e-06, "loss": 3.9672, "step": 24719 }, { "epoch": 8.234529857583077, "grad_norm": 0.765625, "learning_rate": 1.1899222766927154e-06, "loss": 3.9568, "step": 24720 }, { "epoch": 8.234862996585326, "grad_norm": 0.78125, "learning_rate": 1.1894848115408151e-06, "loss": 3.9917, "step": 24721 }, { "epoch": 8.235196135587573, "grad_norm": 0.75, "learning_rate": 1.189047419892748e-06, "loss": 3.9578, "step": 24722 }, { "epoch": 8.235529274589823, "grad_norm": 0.7734375, "learning_rate": 1.1886101017536065e-06, "loss": 3.9442, "step": 24723 }, { "epoch": 8.235862413592072, "grad_norm": 0.8046875, "learning_rate": 1.18817285712849e-06, "loss": 3.9548, "step": 24724 }, { "epoch": 8.23619555259432, "grad_norm": 0.75, "learning_rate": 1.1877356860224876e-06, "loss": 3.9962, "step": 24725 }, { "epoch": 8.236528691596568, "grad_norm": 0.75390625, "learning_rate": 1.1872985884406918e-06, "loss": 3.9613, "step": 24726 }, { "epoch": 8.236861830598817, "grad_norm": 0.828125, "learning_rate": 1.1868615643881945e-06, "loss": 3.9666, "step": 24727 }, { "epoch": 8.237194969601067, "grad_norm": 0.76953125, "learning_rate": 1.1864246138700858e-06, "loss": 3.9657, "step": 24728 }, { "epoch": 8.237528108603314, "grad_norm": 0.765625, "learning_rate": 1.1859877368914549e-06, "loss": 3.9693, "step": 24729 }, { "epoch": 8.237861247605563, "grad_norm": 0.69921875, "learning_rate": 1.1855509334573894e-06, "loss": 3.9513, "step": 24730 }, { "epoch": 8.238194386607812, "grad_norm": 0.8359375, "learning_rate": 1.1851142035729795e-06, "loss": 3.9069, "step": 24731 }, { "epoch": 8.238527525610062, "grad_norm": 0.75, "learning_rate": 1.184677547243312e-06, "loss": 4.084, "step": 24732 }, { "epoch": 8.238860664612309, "grad_norm": 0.7734375, "learning_rate": 1.1842409644734708e-06, "loss": 3.9906, "step": 24733 }, { "epoch": 8.239193803614558, "grad_norm": 0.80078125, "learning_rate": 1.1838044552685422e-06, "loss": 3.9576, "step": 24734 }, { "epoch": 8.239526942616807, "grad_norm": 0.7578125, "learning_rate": 1.1833680196336115e-06, "loss": 3.9956, "step": 24735 }, { "epoch": 8.239860081619055, "grad_norm": 0.76953125, "learning_rate": 1.182931657573764e-06, "loss": 3.9738, "step": 24736 }, { "epoch": 8.240193220621304, "grad_norm": 0.8203125, "learning_rate": 1.1824953690940776e-06, "loss": 4.0262, "step": 24737 }, { "epoch": 8.240526359623553, "grad_norm": 0.8203125, "learning_rate": 1.182059154199635e-06, "loss": 3.9995, "step": 24738 }, { "epoch": 8.240859498625802, "grad_norm": 0.734375, "learning_rate": 1.1816230128955206e-06, "loss": 3.941, "step": 24739 }, { "epoch": 8.24119263762805, "grad_norm": 0.79296875, "learning_rate": 1.1811869451868123e-06, "loss": 4.0294, "step": 24740 }, { "epoch": 8.241525776630299, "grad_norm": 0.765625, "learning_rate": 1.1807509510785899e-06, "loss": 4.0018, "step": 24741 }, { "epoch": 8.241858915632548, "grad_norm": 0.796875, "learning_rate": 1.180315030575929e-06, "loss": 3.9772, "step": 24742 }, { "epoch": 8.242192054634796, "grad_norm": 0.75390625, "learning_rate": 1.1798791836839119e-06, "loss": 3.9171, "step": 24743 }, { "epoch": 8.242525193637045, "grad_norm": 0.80078125, "learning_rate": 1.179443410407613e-06, "loss": 3.9694, "step": 24744 }, { "epoch": 8.242858332639294, "grad_norm": 0.7421875, "learning_rate": 1.1790077107521077e-06, "loss": 4.012, "step": 24745 }, { "epoch": 8.243191471641543, "grad_norm": 0.7578125, "learning_rate": 1.1785720847224712e-06, "loss": 3.9784, "step": 24746 }, { "epoch": 8.24352461064379, "grad_norm": 0.77734375, "learning_rate": 1.1781365323237785e-06, "loss": 3.9746, "step": 24747 }, { "epoch": 8.24385774964604, "grad_norm": 0.76171875, "learning_rate": 1.1777010535611016e-06, "loss": 3.9361, "step": 24748 }, { "epoch": 8.244190888648289, "grad_norm": 0.765625, "learning_rate": 1.177265648439511e-06, "loss": 4.0423, "step": 24749 }, { "epoch": 8.244524027650538, "grad_norm": 0.76171875, "learning_rate": 1.1768303169640831e-06, "loss": 4.0776, "step": 24750 }, { "epoch": 8.244857166652785, "grad_norm": 0.8046875, "learning_rate": 1.176395059139886e-06, "loss": 4.0619, "step": 24751 }, { "epoch": 8.245190305655035, "grad_norm": 0.75390625, "learning_rate": 1.1759598749719893e-06, "loss": 3.9885, "step": 24752 }, { "epoch": 8.245523444657284, "grad_norm": 0.7890625, "learning_rate": 1.1755247644654603e-06, "loss": 3.9551, "step": 24753 }, { "epoch": 8.245856583659531, "grad_norm": 0.73828125, "learning_rate": 1.1750897276253728e-06, "loss": 4.0123, "step": 24754 }, { "epoch": 8.24618972266178, "grad_norm": 0.78125, "learning_rate": 1.174654764456789e-06, "loss": 3.9524, "step": 24755 }, { "epoch": 8.24652286166403, "grad_norm": 0.83984375, "learning_rate": 1.1742198749647741e-06, "loss": 3.9581, "step": 24756 }, { "epoch": 8.246856000666279, "grad_norm": 0.7265625, "learning_rate": 1.1737850591543978e-06, "loss": 3.9352, "step": 24757 }, { "epoch": 8.247189139668526, "grad_norm": 0.7890625, "learning_rate": 1.173350317030723e-06, "loss": 3.9274, "step": 24758 }, { "epoch": 8.247522278670775, "grad_norm": 0.7734375, "learning_rate": 1.1729156485988136e-06, "loss": 4.0044, "step": 24759 }, { "epoch": 8.247855417673025, "grad_norm": 0.81640625, "learning_rate": 1.172481053863731e-06, "loss": 3.9822, "step": 24760 }, { "epoch": 8.248188556675272, "grad_norm": 0.75390625, "learning_rate": 1.1720465328305397e-06, "loss": 3.9148, "step": 24761 }, { "epoch": 8.248521695677521, "grad_norm": 0.78125, "learning_rate": 1.1716120855043006e-06, "loss": 3.9688, "step": 24762 }, { "epoch": 8.24885483467977, "grad_norm": 0.76953125, "learning_rate": 1.1711777118900729e-06, "loss": 4.026, "step": 24763 }, { "epoch": 8.24918797368202, "grad_norm": 0.80859375, "learning_rate": 1.1707434119929177e-06, "loss": 3.9154, "step": 24764 }, { "epoch": 8.249521112684267, "grad_norm": 0.78515625, "learning_rate": 1.1703091858178918e-06, "loss": 4.0549, "step": 24765 }, { "epoch": 8.249854251686516, "grad_norm": 0.75390625, "learning_rate": 1.1698750333700536e-06, "loss": 3.881, "step": 24766 }, { "epoch": 8.250187390688765, "grad_norm": 0.78125, "learning_rate": 1.169440954654459e-06, "loss": 4.0127, "step": 24767 }, { "epoch": 8.250520529691013, "grad_norm": 0.71875, "learning_rate": 1.1690069496761674e-06, "loss": 3.9651, "step": 24768 }, { "epoch": 8.250853668693262, "grad_norm": 0.734375, "learning_rate": 1.1685730184402313e-06, "loss": 3.9894, "step": 24769 }, { "epoch": 8.251186807695511, "grad_norm": 0.74609375, "learning_rate": 1.168139160951706e-06, "loss": 3.9526, "step": 24770 }, { "epoch": 8.25151994669776, "grad_norm": 0.75, "learning_rate": 1.167705377215642e-06, "loss": 3.872, "step": 24771 }, { "epoch": 8.251853085700008, "grad_norm": 0.72265625, "learning_rate": 1.1672716672370992e-06, "loss": 3.9695, "step": 24772 }, { "epoch": 8.252186224702257, "grad_norm": 0.81640625, "learning_rate": 1.1668380310211224e-06, "loss": 4.0064, "step": 24773 }, { "epoch": 8.252519363704506, "grad_norm": 0.7734375, "learning_rate": 1.1664044685727635e-06, "loss": 3.9538, "step": 24774 }, { "epoch": 8.252852502706755, "grad_norm": 0.765625, "learning_rate": 1.1659709798970759e-06, "loss": 3.9895, "step": 24775 }, { "epoch": 8.253185641709003, "grad_norm": 0.75390625, "learning_rate": 1.1655375649991071e-06, "loss": 4.0258, "step": 24776 }, { "epoch": 8.253518780711252, "grad_norm": 0.7578125, "learning_rate": 1.165104223883905e-06, "loss": 4.0255, "step": 24777 }, { "epoch": 8.253851919713501, "grad_norm": 0.79296875, "learning_rate": 1.1646709565565152e-06, "loss": 3.9371, "step": 24778 }, { "epoch": 8.254185058715748, "grad_norm": 0.74609375, "learning_rate": 1.1642377630219882e-06, "loss": 4.0578, "step": 24779 }, { "epoch": 8.254518197717998, "grad_norm": 0.79296875, "learning_rate": 1.1638046432853707e-06, "loss": 3.9726, "step": 24780 }, { "epoch": 8.254851336720247, "grad_norm": 0.7421875, "learning_rate": 1.163371597351702e-06, "loss": 4.0355, "step": 24781 }, { "epoch": 8.255184475722496, "grad_norm": 0.7578125, "learning_rate": 1.162938625226028e-06, "loss": 3.9419, "step": 24782 }, { "epoch": 8.255517614724743, "grad_norm": 0.7109375, "learning_rate": 1.1625057269133945e-06, "loss": 3.988, "step": 24783 }, { "epoch": 8.255850753726993, "grad_norm": 0.734375, "learning_rate": 1.1620729024188428e-06, "loss": 4.0065, "step": 24784 }, { "epoch": 8.256183892729242, "grad_norm": 0.765625, "learning_rate": 1.1616401517474129e-06, "loss": 3.9249, "step": 24785 }, { "epoch": 8.25651703173149, "grad_norm": 0.77734375, "learning_rate": 1.1612074749041456e-06, "loss": 3.964, "step": 24786 }, { "epoch": 8.256850170733738, "grad_norm": 0.78125, "learning_rate": 1.1607748718940822e-06, "loss": 3.987, "step": 24787 }, { "epoch": 8.257183309735987, "grad_norm": 0.8046875, "learning_rate": 1.1603423427222618e-06, "loss": 4.0385, "step": 24788 }, { "epoch": 8.257516448738237, "grad_norm": 0.76171875, "learning_rate": 1.1599098873937211e-06, "loss": 3.9394, "step": 24789 }, { "epoch": 8.257849587740484, "grad_norm": 0.765625, "learning_rate": 1.1594775059134972e-06, "loss": 4.0033, "step": 24790 }, { "epoch": 8.258182726742733, "grad_norm": 0.7578125, "learning_rate": 1.1590451982866274e-06, "loss": 4.0481, "step": 24791 }, { "epoch": 8.258515865744982, "grad_norm": 0.73046875, "learning_rate": 1.1586129645181462e-06, "loss": 4.0265, "step": 24792 }, { "epoch": 8.258849004747232, "grad_norm": 0.78125, "learning_rate": 1.1581808046130863e-06, "loss": 3.9576, "step": 24793 }, { "epoch": 8.259182143749479, "grad_norm": 0.734375, "learning_rate": 1.157748718576486e-06, "loss": 3.9752, "step": 24794 }, { "epoch": 8.259515282751728, "grad_norm": 0.7421875, "learning_rate": 1.1573167064133755e-06, "loss": 3.9736, "step": 24795 }, { "epoch": 8.259848421753977, "grad_norm": 0.74609375, "learning_rate": 1.1568847681287874e-06, "loss": 4.0127, "step": 24796 }, { "epoch": 8.260181560756225, "grad_norm": 0.7421875, "learning_rate": 1.1564529037277512e-06, "loss": 3.8994, "step": 24797 }, { "epoch": 8.260514699758474, "grad_norm": 0.80078125, "learning_rate": 1.156021113215302e-06, "loss": 3.9614, "step": 24798 }, { "epoch": 8.260847838760723, "grad_norm": 0.7890625, "learning_rate": 1.155589396596463e-06, "loss": 3.9739, "step": 24799 }, { "epoch": 8.261180977762972, "grad_norm": 0.765625, "learning_rate": 1.1551577538762648e-06, "loss": 3.9716, "step": 24800 }, { "epoch": 8.26151411676522, "grad_norm": 0.8125, "learning_rate": 1.154726185059738e-06, "loss": 4.031, "step": 24801 }, { "epoch": 8.261847255767469, "grad_norm": 0.765625, "learning_rate": 1.1542946901519062e-06, "loss": 3.9255, "step": 24802 }, { "epoch": 8.262180394769718, "grad_norm": 0.80859375, "learning_rate": 1.1538632691577969e-06, "loss": 3.9767, "step": 24803 }, { "epoch": 8.262513533771966, "grad_norm": 0.75390625, "learning_rate": 1.153431922082433e-06, "loss": 3.9577, "step": 24804 }, { "epoch": 8.262846672774215, "grad_norm": 0.8125, "learning_rate": 1.1530006489308429e-06, "loss": 3.9583, "step": 24805 }, { "epoch": 8.263179811776464, "grad_norm": 0.78515625, "learning_rate": 1.1525694497080467e-06, "loss": 3.9963, "step": 24806 }, { "epoch": 8.263512950778713, "grad_norm": 0.75390625, "learning_rate": 1.1521383244190685e-06, "loss": 3.9854, "step": 24807 }, { "epoch": 8.26384608978096, "grad_norm": 0.76171875, "learning_rate": 1.1517072730689298e-06, "loss": 3.9546, "step": 24808 }, { "epoch": 8.26417922878321, "grad_norm": 0.75390625, "learning_rate": 1.1512762956626504e-06, "loss": 4.0129, "step": 24809 }, { "epoch": 8.264512367785459, "grad_norm": 0.77734375, "learning_rate": 1.1508453922052507e-06, "loss": 3.956, "step": 24810 }, { "epoch": 8.264845506787708, "grad_norm": 0.72265625, "learning_rate": 1.1504145627017487e-06, "loss": 4.0603, "step": 24811 }, { "epoch": 8.265178645789955, "grad_norm": 0.75, "learning_rate": 1.149983807157165e-06, "loss": 3.968, "step": 24812 }, { "epoch": 8.265511784792205, "grad_norm": 0.78515625, "learning_rate": 1.1495531255765163e-06, "loss": 3.8774, "step": 24813 }, { "epoch": 8.265844923794454, "grad_norm": 0.7578125, "learning_rate": 1.1491225179648186e-06, "loss": 4.013, "step": 24814 }, { "epoch": 8.266178062796701, "grad_norm": 0.74609375, "learning_rate": 1.1486919843270852e-06, "loss": 4.0564, "step": 24815 }, { "epoch": 8.26651120179895, "grad_norm": 0.73828125, "learning_rate": 1.1482615246683372e-06, "loss": 3.9717, "step": 24816 }, { "epoch": 8.2668443408012, "grad_norm": 0.75390625, "learning_rate": 1.1478311389935833e-06, "loss": 3.9852, "step": 24817 }, { "epoch": 8.267177479803449, "grad_norm": 0.7578125, "learning_rate": 1.1474008273078359e-06, "loss": 3.9669, "step": 24818 }, { "epoch": 8.267510618805696, "grad_norm": 0.796875, "learning_rate": 1.1469705896161103e-06, "loss": 3.9327, "step": 24819 }, { "epoch": 8.267843757807945, "grad_norm": 0.734375, "learning_rate": 1.1465404259234175e-06, "loss": 3.9921, "step": 24820 }, { "epoch": 8.268176896810195, "grad_norm": 0.73828125, "learning_rate": 1.1461103362347665e-06, "loss": 4.0636, "step": 24821 }, { "epoch": 8.268510035812442, "grad_norm": 0.72265625, "learning_rate": 1.1456803205551664e-06, "loss": 4.0396, "step": 24822 }, { "epoch": 8.268843174814691, "grad_norm": 0.77734375, "learning_rate": 1.1452503788896284e-06, "loss": 4.0366, "step": 24823 }, { "epoch": 8.26917631381694, "grad_norm": 0.74609375, "learning_rate": 1.1448205112431599e-06, "loss": 3.9579, "step": 24824 }, { "epoch": 8.26950945281919, "grad_norm": 0.76953125, "learning_rate": 1.1443907176207679e-06, "loss": 4.0395, "step": 24825 }, { "epoch": 8.269842591821437, "grad_norm": 0.73046875, "learning_rate": 1.1439609980274548e-06, "loss": 3.9072, "step": 24826 }, { "epoch": 8.270175730823686, "grad_norm": 0.7578125, "learning_rate": 1.1435313524682307e-06, "loss": 3.9729, "step": 24827 }, { "epoch": 8.270508869825935, "grad_norm": 0.76953125, "learning_rate": 1.143101780948098e-06, "loss": 3.8635, "step": 24828 }, { "epoch": 8.270842008828183, "grad_norm": 0.82421875, "learning_rate": 1.142672283472061e-06, "loss": 3.8857, "step": 24829 }, { "epoch": 8.271175147830432, "grad_norm": 0.74609375, "learning_rate": 1.1422428600451192e-06, "loss": 3.9742, "step": 24830 }, { "epoch": 8.271508286832681, "grad_norm": 0.8125, "learning_rate": 1.1418135106722804e-06, "loss": 3.9982, "step": 24831 }, { "epoch": 8.27184142583493, "grad_norm": 0.82421875, "learning_rate": 1.1413842353585413e-06, "loss": 3.9722, "step": 24832 }, { "epoch": 8.272174564837178, "grad_norm": 0.7734375, "learning_rate": 1.1409550341089038e-06, "loss": 4.0226, "step": 24833 }, { "epoch": 8.272507703839427, "grad_norm": 0.796875, "learning_rate": 1.140525906928366e-06, "loss": 3.993, "step": 24834 }, { "epoch": 8.272840842841676, "grad_norm": 0.7734375, "learning_rate": 1.140096853821927e-06, "loss": 3.9578, "step": 24835 }, { "epoch": 8.273173981843925, "grad_norm": 0.78125, "learning_rate": 1.139667874794584e-06, "loss": 4.0053, "step": 24836 }, { "epoch": 8.273507120846173, "grad_norm": 0.8125, "learning_rate": 1.1392389698513317e-06, "loss": 3.8689, "step": 24837 }, { "epoch": 8.273840259848422, "grad_norm": 0.75390625, "learning_rate": 1.1388101389971701e-06, "loss": 3.9011, "step": 24838 }, { "epoch": 8.274173398850671, "grad_norm": 0.77734375, "learning_rate": 1.1383813822370926e-06, "loss": 4.0492, "step": 24839 }, { "epoch": 8.274506537852918, "grad_norm": 0.73828125, "learning_rate": 1.1379526995760919e-06, "loss": 3.9926, "step": 24840 }, { "epoch": 8.274839676855168, "grad_norm": 0.76171875, "learning_rate": 1.1375240910191609e-06, "loss": 3.9543, "step": 24841 }, { "epoch": 8.275172815857417, "grad_norm": 0.76171875, "learning_rate": 1.1370955565712946e-06, "loss": 3.9204, "step": 24842 }, { "epoch": 8.275505954859666, "grad_norm": 0.76171875, "learning_rate": 1.1366670962374847e-06, "loss": 3.9368, "step": 24843 }, { "epoch": 8.275839093861913, "grad_norm": 0.83984375, "learning_rate": 1.1362387100227175e-06, "loss": 3.8933, "step": 24844 }, { "epoch": 8.276172232864162, "grad_norm": 0.74609375, "learning_rate": 1.1358103979319864e-06, "loss": 3.9539, "step": 24845 }, { "epoch": 8.276505371866412, "grad_norm": 0.7421875, "learning_rate": 1.13538215997028e-06, "loss": 3.9825, "step": 24846 }, { "epoch": 8.27683851086866, "grad_norm": 0.7890625, "learning_rate": 1.1349539961425848e-06, "loss": 3.9191, "step": 24847 }, { "epoch": 8.277171649870908, "grad_norm": 0.8046875, "learning_rate": 1.1345259064538871e-06, "loss": 3.9569, "step": 24848 }, { "epoch": 8.277504788873157, "grad_norm": 0.77734375, "learning_rate": 1.134097890909178e-06, "loss": 3.9848, "step": 24849 }, { "epoch": 8.277837927875407, "grad_norm": 0.74609375, "learning_rate": 1.133669949513439e-06, "loss": 4.0957, "step": 24850 }, { "epoch": 8.278171066877654, "grad_norm": 0.7421875, "learning_rate": 1.1332420822716555e-06, "loss": 3.9605, "step": 24851 }, { "epoch": 8.278504205879903, "grad_norm": 0.75, "learning_rate": 1.132814289188812e-06, "loss": 3.953, "step": 24852 }, { "epoch": 8.278837344882152, "grad_norm": 0.75390625, "learning_rate": 1.1323865702698898e-06, "loss": 4.0615, "step": 24853 }, { "epoch": 8.279170483884402, "grad_norm": 0.7265625, "learning_rate": 1.1319589255198723e-06, "loss": 4.0412, "step": 24854 }, { "epoch": 8.279503622886649, "grad_norm": 0.7265625, "learning_rate": 1.1315313549437392e-06, "loss": 4.0442, "step": 24855 }, { "epoch": 8.279836761888898, "grad_norm": 0.77734375, "learning_rate": 1.1311038585464725e-06, "loss": 3.9766, "step": 24856 }, { "epoch": 8.280169900891147, "grad_norm": 0.77734375, "learning_rate": 1.1306764363330507e-06, "loss": 3.9171, "step": 24857 }, { "epoch": 8.280503039893395, "grad_norm": 0.77734375, "learning_rate": 1.1302490883084532e-06, "loss": 3.9535, "step": 24858 }, { "epoch": 8.280836178895644, "grad_norm": 0.78515625, "learning_rate": 1.1298218144776551e-06, "loss": 4.055, "step": 24859 }, { "epoch": 8.281169317897893, "grad_norm": 0.76953125, "learning_rate": 1.1293946148456367e-06, "loss": 4.0162, "step": 24860 }, { "epoch": 8.281502456900142, "grad_norm": 0.80078125, "learning_rate": 1.1289674894173738e-06, "loss": 4.0108, "step": 24861 }, { "epoch": 8.28183559590239, "grad_norm": 0.73828125, "learning_rate": 1.1285404381978368e-06, "loss": 3.9653, "step": 24862 }, { "epoch": 8.282168734904639, "grad_norm": 0.74609375, "learning_rate": 1.1281134611920049e-06, "loss": 3.9683, "step": 24863 }, { "epoch": 8.282501873906888, "grad_norm": 0.80078125, "learning_rate": 1.1276865584048502e-06, "loss": 3.9767, "step": 24864 }, { "epoch": 8.282835012909135, "grad_norm": 0.79296875, "learning_rate": 1.1272597298413433e-06, "loss": 3.9308, "step": 24865 }, { "epoch": 8.283168151911385, "grad_norm": 0.76953125, "learning_rate": 1.1268329755064565e-06, "loss": 4.0372, "step": 24866 }, { "epoch": 8.283501290913634, "grad_norm": 0.7265625, "learning_rate": 1.1264062954051632e-06, "loss": 3.9773, "step": 24867 }, { "epoch": 8.283834429915883, "grad_norm": 0.7734375, "learning_rate": 1.125979689542431e-06, "loss": 4.0009, "step": 24868 }, { "epoch": 8.28416756891813, "grad_norm": 0.7890625, "learning_rate": 1.1255531579232292e-06, "loss": 4.0544, "step": 24869 }, { "epoch": 8.28450070792038, "grad_norm": 0.70703125, "learning_rate": 1.1251267005525265e-06, "loss": 3.9642, "step": 24870 }, { "epoch": 8.284833846922629, "grad_norm": 0.76171875, "learning_rate": 1.1247003174352887e-06, "loss": 3.9945, "step": 24871 }, { "epoch": 8.285166985924878, "grad_norm": 0.76953125, "learning_rate": 1.1242740085764846e-06, "loss": 3.9679, "step": 24872 }, { "epoch": 8.285500124927125, "grad_norm": 0.796875, "learning_rate": 1.1238477739810777e-06, "loss": 3.9636, "step": 24873 }, { "epoch": 8.285833263929375, "grad_norm": 0.75, "learning_rate": 1.1234216136540321e-06, "loss": 3.939, "step": 24874 }, { "epoch": 8.286166402931624, "grad_norm": 0.76953125, "learning_rate": 1.1229955276003148e-06, "loss": 3.9953, "step": 24875 }, { "epoch": 8.286499541933871, "grad_norm": 0.78125, "learning_rate": 1.122569515824887e-06, "loss": 4.0143, "step": 24876 }, { "epoch": 8.28683268093612, "grad_norm": 0.73046875, "learning_rate": 1.122143578332711e-06, "loss": 4.0051, "step": 24877 }, { "epoch": 8.28716581993837, "grad_norm": 0.734375, "learning_rate": 1.1217177151287458e-06, "loss": 4.0826, "step": 24878 }, { "epoch": 8.287498958940619, "grad_norm": 0.73046875, "learning_rate": 1.121291926217958e-06, "loss": 4.0609, "step": 24879 }, { "epoch": 8.287832097942866, "grad_norm": 0.76953125, "learning_rate": 1.1208662116053013e-06, "loss": 4.0372, "step": 24880 }, { "epoch": 8.288165236945115, "grad_norm": 0.73828125, "learning_rate": 1.120440571295733e-06, "loss": 3.9496, "step": 24881 }, { "epoch": 8.288498375947364, "grad_norm": 0.75390625, "learning_rate": 1.120015005294217e-06, "loss": 3.9449, "step": 24882 }, { "epoch": 8.288831514949612, "grad_norm": 0.7265625, "learning_rate": 1.119589513605707e-06, "loss": 3.9836, "step": 24883 }, { "epoch": 8.289164653951861, "grad_norm": 0.76953125, "learning_rate": 1.1191640962351584e-06, "loss": 3.9642, "step": 24884 }, { "epoch": 8.28949779295411, "grad_norm": 0.79296875, "learning_rate": 1.1187387531875262e-06, "loss": 4.0665, "step": 24885 }, { "epoch": 8.28983093195636, "grad_norm": 0.76171875, "learning_rate": 1.118313484467767e-06, "loss": 3.9878, "step": 24886 }, { "epoch": 8.290164070958607, "grad_norm": 0.72265625, "learning_rate": 1.1178882900808334e-06, "loss": 3.9643, "step": 24887 }, { "epoch": 8.290497209960856, "grad_norm": 0.75390625, "learning_rate": 1.1174631700316777e-06, "loss": 3.9656, "step": 24888 }, { "epoch": 8.290830348963105, "grad_norm": 0.7890625, "learning_rate": 1.1170381243252512e-06, "loss": 4.0672, "step": 24889 }, { "epoch": 8.291163487965353, "grad_norm": 0.78515625, "learning_rate": 1.1166131529665051e-06, "loss": 3.9967, "step": 24890 }, { "epoch": 8.291496626967602, "grad_norm": 0.78125, "learning_rate": 1.116188255960389e-06, "loss": 3.925, "step": 24891 }, { "epoch": 8.291829765969851, "grad_norm": 0.7734375, "learning_rate": 1.1157634333118517e-06, "loss": 4.0033, "step": 24892 }, { "epoch": 8.2921629049721, "grad_norm": 0.73828125, "learning_rate": 1.1153386850258425e-06, "loss": 4.0751, "step": 24893 }, { "epoch": 8.292496043974348, "grad_norm": 0.7578125, "learning_rate": 1.11491401110731e-06, "loss": 3.9544, "step": 24894 }, { "epoch": 8.292829182976597, "grad_norm": 0.80859375, "learning_rate": 1.1144894115611982e-06, "loss": 4.0297, "step": 24895 }, { "epoch": 8.293162321978846, "grad_norm": 0.796875, "learning_rate": 1.1140648863924521e-06, "loss": 3.9163, "step": 24896 }, { "epoch": 8.293495460981095, "grad_norm": 0.73828125, "learning_rate": 1.1136404356060223e-06, "loss": 4.0048, "step": 24897 }, { "epoch": 8.293828599983343, "grad_norm": 0.78125, "learning_rate": 1.1132160592068464e-06, "loss": 3.8862, "step": 24898 }, { "epoch": 8.294161738985592, "grad_norm": 0.7890625, "learning_rate": 1.1127917571998678e-06, "loss": 3.9519, "step": 24899 }, { "epoch": 8.294494877987841, "grad_norm": 0.76953125, "learning_rate": 1.1123675295900317e-06, "loss": 3.8985, "step": 24900 }, { "epoch": 8.294828016990088, "grad_norm": 0.78515625, "learning_rate": 1.1119433763822794e-06, "loss": 3.9708, "step": 24901 }, { "epoch": 8.295161155992337, "grad_norm": 0.765625, "learning_rate": 1.1115192975815503e-06, "loss": 4.0006, "step": 24902 }, { "epoch": 8.295494294994587, "grad_norm": 0.765625, "learning_rate": 1.1110952931927817e-06, "loss": 4.0461, "step": 24903 }, { "epoch": 8.295827433996836, "grad_norm": 0.78125, "learning_rate": 1.110671363220916e-06, "loss": 3.997, "step": 24904 }, { "epoch": 8.296160572999083, "grad_norm": 0.796875, "learning_rate": 1.1102475076708904e-06, "loss": 4.067, "step": 24905 }, { "epoch": 8.296493712001332, "grad_norm": 0.796875, "learning_rate": 1.1098237265476405e-06, "loss": 4.0032, "step": 24906 }, { "epoch": 8.296826851003582, "grad_norm": 0.78515625, "learning_rate": 1.109400019856103e-06, "loss": 3.9777, "step": 24907 }, { "epoch": 8.29715999000583, "grad_norm": 0.76171875, "learning_rate": 1.1089763876012141e-06, "loss": 3.9264, "step": 24908 }, { "epoch": 8.297493129008078, "grad_norm": 0.83203125, "learning_rate": 1.1085528297879072e-06, "loss": 3.9076, "step": 24909 }, { "epoch": 8.297826268010327, "grad_norm": 0.7890625, "learning_rate": 1.1081293464211134e-06, "loss": 3.9309, "step": 24910 }, { "epoch": 8.298159407012577, "grad_norm": 0.76171875, "learning_rate": 1.1077059375057704e-06, "loss": 3.9326, "step": 24911 }, { "epoch": 8.298492546014824, "grad_norm": 0.76171875, "learning_rate": 1.107282603046807e-06, "loss": 3.9843, "step": 24912 }, { "epoch": 8.298825685017073, "grad_norm": 0.78515625, "learning_rate": 1.1068593430491556e-06, "loss": 4.0881, "step": 24913 }, { "epoch": 8.299158824019322, "grad_norm": 0.703125, "learning_rate": 1.106436157517743e-06, "loss": 3.9398, "step": 24914 }, { "epoch": 8.299491963021572, "grad_norm": 0.76171875, "learning_rate": 1.106013046457504e-06, "loss": 4.0118, "step": 24915 }, { "epoch": 8.299825102023819, "grad_norm": 0.75390625, "learning_rate": 1.105590009873362e-06, "loss": 3.9945, "step": 24916 }, { "epoch": 8.300158241026068, "grad_norm": 0.8125, "learning_rate": 1.1051670477702463e-06, "loss": 3.9777, "step": 24917 }, { "epoch": 8.300491380028317, "grad_norm": 0.80078125, "learning_rate": 1.1047441601530814e-06, "loss": 4.0393, "step": 24918 }, { "epoch": 8.300824519030565, "grad_norm": 0.78125, "learning_rate": 1.1043213470267974e-06, "loss": 4.0559, "step": 24919 }, { "epoch": 8.301157658032814, "grad_norm": 0.72265625, "learning_rate": 1.1038986083963155e-06, "loss": 4.0457, "step": 24920 }, { "epoch": 8.301490797035063, "grad_norm": 0.75390625, "learning_rate": 1.1034759442665616e-06, "loss": 4.0379, "step": 24921 }, { "epoch": 8.301823936037312, "grad_norm": 0.71484375, "learning_rate": 1.1030533546424568e-06, "loss": 3.9926, "step": 24922 }, { "epoch": 8.30215707503956, "grad_norm": 0.75390625, "learning_rate": 1.1026308395289255e-06, "loss": 3.9246, "step": 24923 }, { "epoch": 8.302490214041809, "grad_norm": 0.75390625, "learning_rate": 1.1022083989308907e-06, "loss": 3.9343, "step": 24924 }, { "epoch": 8.302823353044058, "grad_norm": 0.76171875, "learning_rate": 1.1017860328532664e-06, "loss": 4.0359, "step": 24925 }, { "epoch": 8.303156492046305, "grad_norm": 0.7578125, "learning_rate": 1.1013637413009782e-06, "loss": 4.0076, "step": 24926 }, { "epoch": 8.303489631048555, "grad_norm": 0.76953125, "learning_rate": 1.1009415242789425e-06, "loss": 3.9625, "step": 24927 }, { "epoch": 8.303822770050804, "grad_norm": 0.72265625, "learning_rate": 1.1005193817920783e-06, "loss": 4.0275, "step": 24928 }, { "epoch": 8.304155909053053, "grad_norm": 0.74609375, "learning_rate": 1.1000973138453007e-06, "loss": 3.95, "step": 24929 }, { "epoch": 8.3044890480553, "grad_norm": 0.79296875, "learning_rate": 1.0996753204435283e-06, "loss": 3.9954, "step": 24930 }, { "epoch": 8.30482218705755, "grad_norm": 0.734375, "learning_rate": 1.0992534015916754e-06, "loss": 3.984, "step": 24931 }, { "epoch": 8.305155326059799, "grad_norm": 0.74609375, "learning_rate": 1.0988315572946567e-06, "loss": 4.0581, "step": 24932 }, { "epoch": 8.305488465062048, "grad_norm": 0.78125, "learning_rate": 1.0984097875573853e-06, "loss": 4.0109, "step": 24933 }, { "epoch": 8.305821604064295, "grad_norm": 0.7421875, "learning_rate": 1.0979880923847744e-06, "loss": 3.9714, "step": 24934 }, { "epoch": 8.306154743066545, "grad_norm": 0.8046875, "learning_rate": 1.0975664717817347e-06, "loss": 4.0788, "step": 24935 }, { "epoch": 8.306487882068794, "grad_norm": 0.7890625, "learning_rate": 1.0971449257531765e-06, "loss": 3.9395, "step": 24936 }, { "epoch": 8.306821021071041, "grad_norm": 0.80859375, "learning_rate": 1.0967234543040136e-06, "loss": 3.9976, "step": 24937 }, { "epoch": 8.30715416007329, "grad_norm": 0.796875, "learning_rate": 1.0963020574391526e-06, "loss": 3.9077, "step": 24938 }, { "epoch": 8.30748729907554, "grad_norm": 0.80078125, "learning_rate": 1.0958807351635018e-06, "loss": 3.9948, "step": 24939 }, { "epoch": 8.307820438077789, "grad_norm": 0.7890625, "learning_rate": 1.0954594874819681e-06, "loss": 4.0599, "step": 24940 }, { "epoch": 8.308153577080036, "grad_norm": 0.7734375, "learning_rate": 1.0950383143994607e-06, "loss": 3.9786, "step": 24941 }, { "epoch": 8.308486716082285, "grad_norm": 0.71875, "learning_rate": 1.0946172159208857e-06, "loss": 4.0188, "step": 24942 }, { "epoch": 8.308819855084534, "grad_norm": 0.78515625, "learning_rate": 1.094196192051142e-06, "loss": 3.9634, "step": 24943 }, { "epoch": 8.309152994086782, "grad_norm": 0.80078125, "learning_rate": 1.0937752427951403e-06, "loss": 3.9661, "step": 24944 }, { "epoch": 8.309486133089031, "grad_norm": 0.80078125, "learning_rate": 1.0933543681577807e-06, "loss": 3.9905, "step": 24945 }, { "epoch": 8.30981927209128, "grad_norm": 0.7578125, "learning_rate": 1.092933568143966e-06, "loss": 4.0156, "step": 24946 }, { "epoch": 8.31015241109353, "grad_norm": 0.72265625, "learning_rate": 1.0925128427585954e-06, "loss": 4.0498, "step": 24947 }, { "epoch": 8.310485550095777, "grad_norm": 0.78515625, "learning_rate": 1.0920921920065744e-06, "loss": 3.9963, "step": 24948 }, { "epoch": 8.310818689098026, "grad_norm": 0.78125, "learning_rate": 1.091671615892799e-06, "loss": 3.9439, "step": 24949 }, { "epoch": 8.311151828100275, "grad_norm": 0.7578125, "learning_rate": 1.0912511144221684e-06, "loss": 3.9891, "step": 24950 }, { "epoch": 8.311484967102524, "grad_norm": 0.765625, "learning_rate": 1.0908306875995816e-06, "loss": 3.9645, "step": 24951 }, { "epoch": 8.311818106104772, "grad_norm": 0.76953125, "learning_rate": 1.0904103354299352e-06, "loss": 3.9501, "step": 24952 }, { "epoch": 8.312151245107021, "grad_norm": 0.78515625, "learning_rate": 1.0899900579181255e-06, "loss": 4.0304, "step": 24953 }, { "epoch": 8.31248438410927, "grad_norm": 0.7734375, "learning_rate": 1.089569855069045e-06, "loss": 3.9555, "step": 24954 }, { "epoch": 8.312817523111518, "grad_norm": 0.7578125, "learning_rate": 1.0891497268875934e-06, "loss": 4.0619, "step": 24955 }, { "epoch": 8.313150662113767, "grad_norm": 0.74609375, "learning_rate": 1.0887296733786606e-06, "loss": 3.9572, "step": 24956 }, { "epoch": 8.313483801116016, "grad_norm": 0.78515625, "learning_rate": 1.0883096945471414e-06, "loss": 3.9591, "step": 24957 }, { "epoch": 8.313816940118265, "grad_norm": 0.7421875, "learning_rate": 1.087889790397924e-06, "loss": 4.02, "step": 24958 }, { "epoch": 8.314150079120513, "grad_norm": 0.8046875, "learning_rate": 1.087469960935904e-06, "loss": 4.0132, "step": 24959 }, { "epoch": 8.314483218122762, "grad_norm": 0.734375, "learning_rate": 1.0870502061659707e-06, "loss": 4.0038, "step": 24960 }, { "epoch": 8.31481635712501, "grad_norm": 0.7734375, "learning_rate": 1.0866305260930109e-06, "loss": 3.9606, "step": 24961 }, { "epoch": 8.315149496127258, "grad_norm": 0.78515625, "learning_rate": 1.0862109207219125e-06, "loss": 3.9735, "step": 24962 }, { "epoch": 8.315482635129507, "grad_norm": 0.78125, "learning_rate": 1.0857913900575658e-06, "loss": 3.9965, "step": 24963 }, { "epoch": 8.315815774131757, "grad_norm": 0.8046875, "learning_rate": 1.0853719341048568e-06, "loss": 4.0633, "step": 24964 }, { "epoch": 8.316148913134006, "grad_norm": 0.73046875, "learning_rate": 1.0849525528686706e-06, "loss": 3.9859, "step": 24965 }, { "epoch": 8.316482052136253, "grad_norm": 0.7890625, "learning_rate": 1.0845332463538904e-06, "loss": 3.9524, "step": 24966 }, { "epoch": 8.316815191138502, "grad_norm": 0.76953125, "learning_rate": 1.0841140145654035e-06, "loss": 3.9939, "step": 24967 }, { "epoch": 8.317148330140752, "grad_norm": 0.7890625, "learning_rate": 1.0836948575080912e-06, "loss": 4.0143, "step": 24968 }, { "epoch": 8.317481469143, "grad_norm": 0.78125, "learning_rate": 1.0832757751868372e-06, "loss": 3.916, "step": 24969 }, { "epoch": 8.317814608145248, "grad_norm": 0.7421875, "learning_rate": 1.0828567676065207e-06, "loss": 3.9339, "step": 24970 }, { "epoch": 8.318147747147497, "grad_norm": 0.7265625, "learning_rate": 1.0824378347720238e-06, "loss": 3.9954, "step": 24971 }, { "epoch": 8.318480886149747, "grad_norm": 0.7734375, "learning_rate": 1.0820189766882257e-06, "loss": 3.9655, "step": 24972 }, { "epoch": 8.318814025151994, "grad_norm": 0.7734375, "learning_rate": 1.0816001933600037e-06, "loss": 3.9425, "step": 24973 }, { "epoch": 8.319147164154243, "grad_norm": 0.7734375, "learning_rate": 1.0811814847922393e-06, "loss": 3.988, "step": 24974 }, { "epoch": 8.319480303156492, "grad_norm": 0.73046875, "learning_rate": 1.0807628509898065e-06, "loss": 3.968, "step": 24975 }, { "epoch": 8.319813442158742, "grad_norm": 0.8046875, "learning_rate": 1.080344291957583e-06, "loss": 4.0135, "step": 24976 }, { "epoch": 8.320146581160989, "grad_norm": 0.81640625, "learning_rate": 1.079925807700442e-06, "loss": 4.0161, "step": 24977 }, { "epoch": 8.320479720163238, "grad_norm": 0.7734375, "learning_rate": 1.0795073982232626e-06, "loss": 3.9905, "step": 24978 }, { "epoch": 8.320812859165487, "grad_norm": 0.734375, "learning_rate": 1.079089063530913e-06, "loss": 3.961, "step": 24979 }, { "epoch": 8.321145998167735, "grad_norm": 0.77734375, "learning_rate": 1.0786708036282672e-06, "loss": 4.0023, "step": 24980 }, { "epoch": 8.321479137169984, "grad_norm": 0.75390625, "learning_rate": 1.0782526185201993e-06, "loss": 3.9889, "step": 24981 }, { "epoch": 8.321812276172233, "grad_norm": 0.78515625, "learning_rate": 1.0778345082115798e-06, "loss": 4.0733, "step": 24982 }, { "epoch": 8.322145415174482, "grad_norm": 0.7421875, "learning_rate": 1.0774164727072773e-06, "loss": 4.0122, "step": 24983 }, { "epoch": 8.32247855417673, "grad_norm": 0.84375, "learning_rate": 1.0769985120121593e-06, "loss": 3.9849, "step": 24984 }, { "epoch": 8.322811693178979, "grad_norm": 0.75, "learning_rate": 1.0765806261310987e-06, "loss": 3.9588, "step": 24985 }, { "epoch": 8.323144832181228, "grad_norm": 0.75390625, "learning_rate": 1.0761628150689608e-06, "loss": 3.9741, "step": 24986 }, { "epoch": 8.323477971183475, "grad_norm": 0.7578125, "learning_rate": 1.075745078830612e-06, "loss": 3.9618, "step": 24987 }, { "epoch": 8.323811110185725, "grad_norm": 0.77734375, "learning_rate": 1.075327417420918e-06, "loss": 4.0215, "step": 24988 }, { "epoch": 8.324144249187974, "grad_norm": 0.80078125, "learning_rate": 1.0749098308447434e-06, "loss": 4.0174, "step": 24989 }, { "epoch": 8.324477388190223, "grad_norm": 0.78125, "learning_rate": 1.0744923191069534e-06, "loss": 3.997, "step": 24990 }, { "epoch": 8.32481052719247, "grad_norm": 0.7265625, "learning_rate": 1.0740748822124074e-06, "loss": 4.0381, "step": 24991 }, { "epoch": 8.32514366619472, "grad_norm": 0.77734375, "learning_rate": 1.0736575201659734e-06, "loss": 3.9696, "step": 24992 }, { "epoch": 8.325476805196969, "grad_norm": 0.8046875, "learning_rate": 1.0732402329725092e-06, "loss": 3.9418, "step": 24993 }, { "epoch": 8.325809944199218, "grad_norm": 0.80078125, "learning_rate": 1.0728230206368764e-06, "loss": 3.8959, "step": 24994 }, { "epoch": 8.326143083201465, "grad_norm": 0.77734375, "learning_rate": 1.0724058831639322e-06, "loss": 3.9426, "step": 24995 }, { "epoch": 8.326476222203715, "grad_norm": 0.7578125, "learning_rate": 1.07198882055854e-06, "loss": 4.005, "step": 24996 }, { "epoch": 8.326809361205964, "grad_norm": 0.80078125, "learning_rate": 1.0715718328255547e-06, "loss": 4.0254, "step": 24997 }, { "epoch": 8.327142500208211, "grad_norm": 0.7734375, "learning_rate": 1.0711549199698312e-06, "loss": 4.0101, "step": 24998 }, { "epoch": 8.32747563921046, "grad_norm": 0.80859375, "learning_rate": 1.070738081996229e-06, "loss": 4.0025, "step": 24999 }, { "epoch": 8.32780877821271, "grad_norm": 0.75390625, "learning_rate": 1.0703213189096035e-06, "loss": 3.979, "step": 25000 }, { "epoch": 8.328141917214959, "grad_norm": 0.75, "learning_rate": 1.0699046307148073e-06, "loss": 3.9415, "step": 25001 }, { "epoch": 8.328475056217206, "grad_norm": 0.78125, "learning_rate": 1.0694880174166931e-06, "loss": 3.9403, "step": 25002 }, { "epoch": 8.328808195219455, "grad_norm": 0.78515625, "learning_rate": 1.0690714790201167e-06, "loss": 3.9579, "step": 25003 }, { "epoch": 8.329141334221704, "grad_norm": 0.74609375, "learning_rate": 1.0686550155299288e-06, "loss": 4.0396, "step": 25004 }, { "epoch": 8.329474473223952, "grad_norm": 0.83984375, "learning_rate": 1.0682386269509789e-06, "loss": 4.0191, "step": 25005 }, { "epoch": 8.329807612226201, "grad_norm": 0.8046875, "learning_rate": 1.0678223132881182e-06, "loss": 3.9677, "step": 25006 }, { "epoch": 8.33014075122845, "grad_norm": 0.79296875, "learning_rate": 1.0674060745461952e-06, "loss": 4.0083, "step": 25007 }, { "epoch": 8.3304738902307, "grad_norm": 0.76953125, "learning_rate": 1.0669899107300587e-06, "loss": 3.9705, "step": 25008 }, { "epoch": 8.330807029232947, "grad_norm": 0.73828125, "learning_rate": 1.0665738218445557e-06, "loss": 4.0104, "step": 25009 }, { "epoch": 8.331140168235196, "grad_norm": 0.75, "learning_rate": 1.0661578078945312e-06, "loss": 4.0566, "step": 25010 }, { "epoch": 8.331473307237445, "grad_norm": 0.73828125, "learning_rate": 1.0657418688848346e-06, "loss": 3.9451, "step": 25011 }, { "epoch": 8.331806446239694, "grad_norm": 0.78125, "learning_rate": 1.0653260048203083e-06, "loss": 4.0618, "step": 25012 }, { "epoch": 8.332139585241942, "grad_norm": 0.74609375, "learning_rate": 1.0649102157057966e-06, "loss": 3.9835, "step": 25013 }, { "epoch": 8.332472724244191, "grad_norm": 0.75, "learning_rate": 1.064494501546143e-06, "loss": 3.9795, "step": 25014 }, { "epoch": 8.33280586324644, "grad_norm": 0.734375, "learning_rate": 1.0640788623461886e-06, "loss": 3.9865, "step": 25015 }, { "epoch": 8.333139002248688, "grad_norm": 0.796875, "learning_rate": 1.0636632981107755e-06, "loss": 3.8993, "step": 25016 }, { "epoch": 8.333472141250937, "grad_norm": 0.7734375, "learning_rate": 1.0632478088447413e-06, "loss": 3.983, "step": 25017 }, { "epoch": 8.333805280253186, "grad_norm": 0.78125, "learning_rate": 1.0628323945529308e-06, "loss": 3.9046, "step": 25018 }, { "epoch": 8.334138419255435, "grad_norm": 0.76171875, "learning_rate": 1.06241705524018e-06, "loss": 3.9825, "step": 25019 }, { "epoch": 8.334471558257682, "grad_norm": 0.71875, "learning_rate": 1.0620017909113273e-06, "loss": 3.9452, "step": 25020 }, { "epoch": 8.334804697259932, "grad_norm": 0.78515625, "learning_rate": 1.0615866015712063e-06, "loss": 3.9789, "step": 25021 }, { "epoch": 8.33513783626218, "grad_norm": 0.74609375, "learning_rate": 1.0611714872246578e-06, "loss": 4.0504, "step": 25022 }, { "epoch": 8.335470975264428, "grad_norm": 0.70703125, "learning_rate": 1.0607564478765166e-06, "loss": 3.9116, "step": 25023 }, { "epoch": 8.335804114266677, "grad_norm": 0.76953125, "learning_rate": 1.0603414835316126e-06, "loss": 4.0213, "step": 25024 }, { "epoch": 8.336137253268927, "grad_norm": 0.76953125, "learning_rate": 1.059926594194783e-06, "loss": 3.9842, "step": 25025 }, { "epoch": 8.336470392271176, "grad_norm": 0.73046875, "learning_rate": 1.0595117798708596e-06, "loss": 4.0104, "step": 25026 }, { "epoch": 8.336803531273423, "grad_norm": 0.76953125, "learning_rate": 1.0590970405646739e-06, "loss": 3.9943, "step": 25027 }, { "epoch": 8.337136670275672, "grad_norm": 0.78515625, "learning_rate": 1.0586823762810554e-06, "loss": 3.9922, "step": 25028 }, { "epoch": 8.337469809277922, "grad_norm": 0.7734375, "learning_rate": 1.0582677870248363e-06, "loss": 4.0017, "step": 25029 }, { "epoch": 8.33780294828017, "grad_norm": 0.8203125, "learning_rate": 1.0578532728008453e-06, "loss": 4.0028, "step": 25030 }, { "epoch": 8.338136087282418, "grad_norm": 0.7265625, "learning_rate": 1.0574388336139098e-06, "loss": 4.1084, "step": 25031 }, { "epoch": 8.338469226284667, "grad_norm": 0.72265625, "learning_rate": 1.057024469468858e-06, "loss": 3.9704, "step": 25032 }, { "epoch": 8.338802365286917, "grad_norm": 0.70703125, "learning_rate": 1.056610180370515e-06, "loss": 4.0236, "step": 25033 }, { "epoch": 8.339135504289164, "grad_norm": 0.78515625, "learning_rate": 1.056195966323707e-06, "loss": 3.9904, "step": 25034 }, { "epoch": 8.339468643291413, "grad_norm": 0.7578125, "learning_rate": 1.0557818273332568e-06, "loss": 4.049, "step": 25035 }, { "epoch": 8.339801782293662, "grad_norm": 0.80859375, "learning_rate": 1.055367763403992e-06, "loss": 3.9667, "step": 25036 }, { "epoch": 8.340134921295911, "grad_norm": 0.78125, "learning_rate": 1.0549537745407343e-06, "loss": 3.9683, "step": 25037 }, { "epoch": 8.340468060298159, "grad_norm": 0.77734375, "learning_rate": 1.054539860748305e-06, "loss": 4.0103, "step": 25038 }, { "epoch": 8.340801199300408, "grad_norm": 0.765625, "learning_rate": 1.0541260220315233e-06, "loss": 3.9602, "step": 25039 }, { "epoch": 8.341134338302657, "grad_norm": 0.76953125, "learning_rate": 1.053712258395215e-06, "loss": 4.0108, "step": 25040 }, { "epoch": 8.341467477304905, "grad_norm": 0.8125, "learning_rate": 1.0532985698441955e-06, "loss": 3.9656, "step": 25041 }, { "epoch": 8.341800616307154, "grad_norm": 0.7578125, "learning_rate": 1.0528849563832817e-06, "loss": 4.0015, "step": 25042 }, { "epoch": 8.342133755309403, "grad_norm": 0.72265625, "learning_rate": 1.052471418017295e-06, "loss": 4.0038, "step": 25043 }, { "epoch": 8.342466894311652, "grad_norm": 0.734375, "learning_rate": 1.0520579547510523e-06, "loss": 3.999, "step": 25044 }, { "epoch": 8.3428000333139, "grad_norm": 0.7734375, "learning_rate": 1.0516445665893671e-06, "loss": 3.9353, "step": 25045 }, { "epoch": 8.343133172316149, "grad_norm": 0.765625, "learning_rate": 1.0512312535370538e-06, "loss": 3.986, "step": 25046 }, { "epoch": 8.343466311318398, "grad_norm": 0.8203125, "learning_rate": 1.0508180155989315e-06, "loss": 3.9681, "step": 25047 }, { "epoch": 8.343799450320645, "grad_norm": 0.703125, "learning_rate": 1.050404852779809e-06, "loss": 4.0062, "step": 25048 }, { "epoch": 8.344132589322895, "grad_norm": 0.7734375, "learning_rate": 1.0499917650845006e-06, "loss": 4.0072, "step": 25049 }, { "epoch": 8.344465728325144, "grad_norm": 0.796875, "learning_rate": 1.0495787525178172e-06, "loss": 3.9064, "step": 25050 }, { "epoch": 8.344798867327393, "grad_norm": 0.7578125, "learning_rate": 1.04916581508457e-06, "loss": 4.0042, "step": 25051 }, { "epoch": 8.34513200632964, "grad_norm": 0.80859375, "learning_rate": 1.048752952789569e-06, "loss": 3.9552, "step": 25052 }, { "epoch": 8.34546514533189, "grad_norm": 0.76953125, "learning_rate": 1.0483401656376198e-06, "loss": 4.0428, "step": 25053 }, { "epoch": 8.345798284334139, "grad_norm": 0.77734375, "learning_rate": 1.0479274536335363e-06, "loss": 3.9851, "step": 25054 }, { "epoch": 8.346131423336388, "grad_norm": 0.7578125, "learning_rate": 1.0475148167821224e-06, "loss": 3.9988, "step": 25055 }, { "epoch": 8.346464562338635, "grad_norm": 0.7890625, "learning_rate": 1.0471022550881847e-06, "loss": 3.9794, "step": 25056 }, { "epoch": 8.346797701340885, "grad_norm": 0.78125, "learning_rate": 1.0466897685565286e-06, "loss": 3.9739, "step": 25057 }, { "epoch": 8.347130840343134, "grad_norm": 0.7734375, "learning_rate": 1.046277357191959e-06, "loss": 3.9592, "step": 25058 }, { "epoch": 8.347463979345381, "grad_norm": 0.76953125, "learning_rate": 1.0458650209992799e-06, "loss": 3.9758, "step": 25059 }, { "epoch": 8.34779711834763, "grad_norm": 0.78515625, "learning_rate": 1.045452759983293e-06, "loss": 3.9883, "step": 25060 }, { "epoch": 8.34813025734988, "grad_norm": 0.76171875, "learning_rate": 1.0450405741487997e-06, "loss": 3.9643, "step": 25061 }, { "epoch": 8.348463396352129, "grad_norm": 0.73828125, "learning_rate": 1.0446284635006036e-06, "loss": 3.9457, "step": 25062 }, { "epoch": 8.348796535354376, "grad_norm": 0.8203125, "learning_rate": 1.0442164280435042e-06, "loss": 3.9899, "step": 25063 }, { "epoch": 8.349129674356625, "grad_norm": 0.76953125, "learning_rate": 1.0438044677823002e-06, "loss": 3.9248, "step": 25064 }, { "epoch": 8.349462813358874, "grad_norm": 0.7265625, "learning_rate": 1.0433925827217877e-06, "loss": 3.9883, "step": 25065 }, { "epoch": 8.349795952361122, "grad_norm": 0.734375, "learning_rate": 1.0429807728667687e-06, "loss": 3.9648, "step": 25066 }, { "epoch": 8.350129091363371, "grad_norm": 0.8046875, "learning_rate": 1.0425690382220392e-06, "loss": 4.1007, "step": 25067 }, { "epoch": 8.35046223036562, "grad_norm": 0.74609375, "learning_rate": 1.0421573787923908e-06, "loss": 4.0113, "step": 25068 }, { "epoch": 8.35079536936787, "grad_norm": 0.76171875, "learning_rate": 1.0417457945826225e-06, "loss": 4.0113, "step": 25069 }, { "epoch": 8.351128508370117, "grad_norm": 0.8046875, "learning_rate": 1.0413342855975274e-06, "loss": 4.0134, "step": 25070 }, { "epoch": 8.351461647372366, "grad_norm": 0.80859375, "learning_rate": 1.0409228518418985e-06, "loss": 3.9511, "step": 25071 }, { "epoch": 8.351794786374615, "grad_norm": 0.76171875, "learning_rate": 1.0405114933205256e-06, "loss": 3.9616, "step": 25072 }, { "epoch": 8.352127925376864, "grad_norm": 0.73046875, "learning_rate": 1.0401002100382053e-06, "loss": 4.0772, "step": 25073 }, { "epoch": 8.352461064379112, "grad_norm": 0.7734375, "learning_rate": 1.0396890019997246e-06, "loss": 3.9738, "step": 25074 }, { "epoch": 8.352794203381361, "grad_norm": 0.7421875, "learning_rate": 1.0392778692098745e-06, "loss": 3.9196, "step": 25075 }, { "epoch": 8.35312734238361, "grad_norm": 0.76171875, "learning_rate": 1.038866811673443e-06, "loss": 3.9936, "step": 25076 }, { "epoch": 8.353460481385858, "grad_norm": 0.796875, "learning_rate": 1.0384558293952184e-06, "loss": 4.0729, "step": 25077 }, { "epoch": 8.353793620388107, "grad_norm": 0.76953125, "learning_rate": 1.0380449223799883e-06, "loss": 3.9639, "step": 25078 }, { "epoch": 8.354126759390356, "grad_norm": 0.765625, "learning_rate": 1.0376340906325356e-06, "loss": 3.9305, "step": 25079 }, { "epoch": 8.354459898392605, "grad_norm": 0.7734375, "learning_rate": 1.0372233341576506e-06, "loss": 3.9262, "step": 25080 }, { "epoch": 8.354793037394852, "grad_norm": 0.83203125, "learning_rate": 1.0368126529601158e-06, "loss": 4.0287, "step": 25081 }, { "epoch": 8.355126176397102, "grad_norm": 0.7421875, "learning_rate": 1.0364020470447139e-06, "loss": 4.0603, "step": 25082 }, { "epoch": 8.35545931539935, "grad_norm": 0.73828125, "learning_rate": 1.0359915164162261e-06, "loss": 3.9761, "step": 25083 }, { "epoch": 8.355792454401598, "grad_norm": 0.75, "learning_rate": 1.035581061079438e-06, "loss": 3.9717, "step": 25084 }, { "epoch": 8.356125593403847, "grad_norm": 0.7421875, "learning_rate": 1.0351706810391307e-06, "loss": 3.999, "step": 25085 }, { "epoch": 8.356458732406097, "grad_norm": 0.79296875, "learning_rate": 1.0347603763000776e-06, "loss": 4.013, "step": 25086 }, { "epoch": 8.356791871408346, "grad_norm": 0.734375, "learning_rate": 1.0343501468670653e-06, "loss": 4.0137, "step": 25087 }, { "epoch": 8.357125010410593, "grad_norm": 0.79296875, "learning_rate": 1.0339399927448687e-06, "loss": 3.9941, "step": 25088 }, { "epoch": 8.357458149412842, "grad_norm": 0.77734375, "learning_rate": 1.033529913938266e-06, "loss": 4.0031, "step": 25089 }, { "epoch": 8.357791288415092, "grad_norm": 0.7578125, "learning_rate": 1.0331199104520313e-06, "loss": 4.0292, "step": 25090 }, { "epoch": 8.35812442741734, "grad_norm": 0.7734375, "learning_rate": 1.0327099822909447e-06, "loss": 3.9953, "step": 25091 }, { "epoch": 8.358457566419588, "grad_norm": 0.734375, "learning_rate": 1.0323001294597786e-06, "loss": 3.9251, "step": 25092 }, { "epoch": 8.358790705421837, "grad_norm": 0.76171875, "learning_rate": 1.031890351963307e-06, "loss": 3.986, "step": 25093 }, { "epoch": 8.359123844424087, "grad_norm": 0.734375, "learning_rate": 1.0314806498063023e-06, "loss": 4.0132, "step": 25094 }, { "epoch": 8.359456983426334, "grad_norm": 0.75, "learning_rate": 1.0310710229935378e-06, "loss": 3.9702, "step": 25095 }, { "epoch": 8.359790122428583, "grad_norm": 0.79296875, "learning_rate": 1.0306614715297844e-06, "loss": 3.9576, "step": 25096 }, { "epoch": 8.360123261430832, "grad_norm": 0.8203125, "learning_rate": 1.03025199541981e-06, "loss": 3.8975, "step": 25097 }, { "epoch": 8.360456400433081, "grad_norm": 0.78125, "learning_rate": 1.0298425946683884e-06, "loss": 3.9415, "step": 25098 }, { "epoch": 8.360789539435329, "grad_norm": 0.81640625, "learning_rate": 1.0294332692802857e-06, "loss": 3.9402, "step": 25099 }, { "epoch": 8.361122678437578, "grad_norm": 0.78515625, "learning_rate": 1.0290240192602706e-06, "loss": 4.004, "step": 25100 }, { "epoch": 8.361455817439827, "grad_norm": 0.79296875, "learning_rate": 1.0286148446131102e-06, "loss": 4.0293, "step": 25101 }, { "epoch": 8.361788956442075, "grad_norm": 0.8125, "learning_rate": 1.0282057453435673e-06, "loss": 3.9517, "step": 25102 }, { "epoch": 8.362122095444324, "grad_norm": 0.79296875, "learning_rate": 1.027796721456413e-06, "loss": 3.9847, "step": 25103 }, { "epoch": 8.362455234446573, "grad_norm": 0.8046875, "learning_rate": 1.027387772956407e-06, "loss": 3.9453, "step": 25104 }, { "epoch": 8.362788373448822, "grad_norm": 0.73828125, "learning_rate": 1.0269788998483118e-06, "loss": 4.0017, "step": 25105 }, { "epoch": 8.36312151245107, "grad_norm": 0.7734375, "learning_rate": 1.026570102136893e-06, "loss": 4.0301, "step": 25106 }, { "epoch": 8.363454651453319, "grad_norm": 0.76171875, "learning_rate": 1.026161379826912e-06, "loss": 4.0213, "step": 25107 }, { "epoch": 8.363787790455568, "grad_norm": 0.7734375, "learning_rate": 1.025752732923128e-06, "loss": 4.0425, "step": 25108 }, { "epoch": 8.364120929457815, "grad_norm": 0.73046875, "learning_rate": 1.0253441614302999e-06, "loss": 3.9901, "step": 25109 }, { "epoch": 8.364454068460065, "grad_norm": 0.78515625, "learning_rate": 1.0249356653531905e-06, "loss": 3.957, "step": 25110 }, { "epoch": 8.364787207462314, "grad_norm": 0.7421875, "learning_rate": 1.024527244696555e-06, "loss": 3.9611, "step": 25111 }, { "epoch": 8.365120346464563, "grad_norm": 0.7421875, "learning_rate": 1.0241188994651514e-06, "loss": 3.9563, "step": 25112 }, { "epoch": 8.36545348546681, "grad_norm": 0.765625, "learning_rate": 1.023710629663736e-06, "loss": 3.9707, "step": 25113 }, { "epoch": 8.36578662446906, "grad_norm": 0.796875, "learning_rate": 1.023302435297064e-06, "loss": 3.9379, "step": 25114 }, { "epoch": 8.366119763471309, "grad_norm": 0.75, "learning_rate": 1.02289431636989e-06, "loss": 4.0063, "step": 25115 }, { "epoch": 8.366452902473558, "grad_norm": 0.78515625, "learning_rate": 1.0224862728869669e-06, "loss": 3.9702, "step": 25116 }, { "epoch": 8.366786041475805, "grad_norm": 0.7421875, "learning_rate": 1.022078304853049e-06, "loss": 3.9792, "step": 25117 }, { "epoch": 8.367119180478054, "grad_norm": 0.80859375, "learning_rate": 1.0216704122728887e-06, "loss": 3.9631, "step": 25118 }, { "epoch": 8.367452319480304, "grad_norm": 0.78125, "learning_rate": 1.0212625951512352e-06, "loss": 3.9979, "step": 25119 }, { "epoch": 8.367785458482551, "grad_norm": 0.7421875, "learning_rate": 1.0208548534928384e-06, "loss": 3.9662, "step": 25120 }, { "epoch": 8.3681185974848, "grad_norm": 0.7578125, "learning_rate": 1.0204471873024524e-06, "loss": 3.9905, "step": 25121 }, { "epoch": 8.36845173648705, "grad_norm": 0.78515625, "learning_rate": 1.0200395965848195e-06, "loss": 3.9611, "step": 25122 }, { "epoch": 8.368784875489299, "grad_norm": 0.7265625, "learning_rate": 1.0196320813446884e-06, "loss": 4.0118, "step": 25123 }, { "epoch": 8.369118014491546, "grad_norm": 0.7578125, "learning_rate": 1.0192246415868092e-06, "loss": 3.9224, "step": 25124 }, { "epoch": 8.369451153493795, "grad_norm": 0.71875, "learning_rate": 1.018817277315925e-06, "loss": 3.9161, "step": 25125 }, { "epoch": 8.369784292496044, "grad_norm": 0.78125, "learning_rate": 1.0184099885367814e-06, "loss": 3.8736, "step": 25126 }, { "epoch": 8.370117431498294, "grad_norm": 0.81640625, "learning_rate": 1.018002775254121e-06, "loss": 4.0245, "step": 25127 }, { "epoch": 8.370450570500541, "grad_norm": 0.7578125, "learning_rate": 1.0175956374726899e-06, "loss": 3.9425, "step": 25128 }, { "epoch": 8.37078370950279, "grad_norm": 0.75390625, "learning_rate": 1.0171885751972288e-06, "loss": 3.9835, "step": 25129 }, { "epoch": 8.37111684850504, "grad_norm": 0.7578125, "learning_rate": 1.0167815884324786e-06, "loss": 4.0269, "step": 25130 }, { "epoch": 8.371449987507287, "grad_norm": 0.75, "learning_rate": 1.0163746771831805e-06, "loss": 3.9505, "step": 25131 }, { "epoch": 8.371783126509536, "grad_norm": 0.7578125, "learning_rate": 1.0159678414540734e-06, "loss": 3.8949, "step": 25132 }, { "epoch": 8.372116265511785, "grad_norm": 0.8046875, "learning_rate": 1.0155610812498978e-06, "loss": 3.9722, "step": 25133 }, { "epoch": 8.372449404514034, "grad_norm": 0.80859375, "learning_rate": 1.015154396575388e-06, "loss": 3.9703, "step": 25134 }, { "epoch": 8.372782543516282, "grad_norm": 0.7890625, "learning_rate": 1.0147477874352843e-06, "loss": 4.0343, "step": 25135 }, { "epoch": 8.373115682518531, "grad_norm": 0.765625, "learning_rate": 1.0143412538343222e-06, "loss": 3.9125, "step": 25136 }, { "epoch": 8.37344882152078, "grad_norm": 0.72265625, "learning_rate": 1.013934795777237e-06, "loss": 3.9857, "step": 25137 }, { "epoch": 8.373781960523027, "grad_norm": 0.8046875, "learning_rate": 1.0135284132687612e-06, "loss": 4.0074, "step": 25138 }, { "epoch": 8.374115099525277, "grad_norm": 0.75, "learning_rate": 1.0131221063136328e-06, "loss": 3.9488, "step": 25139 }, { "epoch": 8.374448238527526, "grad_norm": 0.71484375, "learning_rate": 1.0127158749165793e-06, "loss": 4.0121, "step": 25140 }, { "epoch": 8.374781377529775, "grad_norm": 0.7578125, "learning_rate": 1.0123097190823327e-06, "loss": 3.9283, "step": 25141 }, { "epoch": 8.375114516532022, "grad_norm": 0.78125, "learning_rate": 1.0119036388156276e-06, "loss": 3.9344, "step": 25142 }, { "epoch": 8.375447655534272, "grad_norm": 0.7890625, "learning_rate": 1.0114976341211914e-06, "loss": 4.0903, "step": 25143 }, { "epoch": 8.37578079453652, "grad_norm": 0.78515625, "learning_rate": 1.0110917050037543e-06, "loss": 3.9799, "step": 25144 }, { "epoch": 8.376113933538768, "grad_norm": 0.78125, "learning_rate": 1.0106858514680422e-06, "loss": 4.065, "step": 25145 }, { "epoch": 8.376447072541017, "grad_norm": 0.77734375, "learning_rate": 1.0102800735187856e-06, "loss": 3.9836, "step": 25146 }, { "epoch": 8.376780211543267, "grad_norm": 0.7109375, "learning_rate": 1.0098743711607098e-06, "loss": 4.0459, "step": 25147 }, { "epoch": 8.377113350545516, "grad_norm": 0.71484375, "learning_rate": 1.009468744398541e-06, "loss": 4.051, "step": 25148 }, { "epoch": 8.377446489547763, "grad_norm": 0.8125, "learning_rate": 1.0090631932369996e-06, "loss": 4.0081, "step": 25149 }, { "epoch": 8.377779628550012, "grad_norm": 0.74609375, "learning_rate": 1.0086577176808142e-06, "loss": 4.01, "step": 25150 }, { "epoch": 8.378112767552262, "grad_norm": 0.7578125, "learning_rate": 1.0082523177347063e-06, "loss": 4.0366, "step": 25151 }, { "epoch": 8.37844590655451, "grad_norm": 0.796875, "learning_rate": 1.0078469934033988e-06, "loss": 3.9771, "step": 25152 }, { "epoch": 8.378779045556758, "grad_norm": 0.7578125, "learning_rate": 1.0074417446916093e-06, "loss": 4.061, "step": 25153 }, { "epoch": 8.379112184559007, "grad_norm": 0.71484375, "learning_rate": 1.0070365716040616e-06, "loss": 3.9538, "step": 25154 }, { "epoch": 8.379445323561256, "grad_norm": 0.70703125, "learning_rate": 1.0066314741454754e-06, "loss": 4.0236, "step": 25155 }, { "epoch": 8.379778462563504, "grad_norm": 0.75, "learning_rate": 1.0062264523205668e-06, "loss": 3.9756, "step": 25156 }, { "epoch": 8.380111601565753, "grad_norm": 0.75390625, "learning_rate": 1.0058215061340555e-06, "loss": 4.0559, "step": 25157 }, { "epoch": 8.380444740568002, "grad_norm": 0.77734375, "learning_rate": 1.0054166355906569e-06, "loss": 4.007, "step": 25158 }, { "epoch": 8.380777879570251, "grad_norm": 0.74609375, "learning_rate": 1.005011840695087e-06, "loss": 4.0394, "step": 25159 }, { "epoch": 8.381111018572499, "grad_norm": 0.765625, "learning_rate": 1.0046071214520598e-06, "loss": 4.0083, "step": 25160 }, { "epoch": 8.381444157574748, "grad_norm": 0.78125, "learning_rate": 1.004202477866292e-06, "loss": 4.0102, "step": 25161 }, { "epoch": 8.381777296576997, "grad_norm": 0.80078125, "learning_rate": 1.003797909942496e-06, "loss": 3.9341, "step": 25162 }, { "epoch": 8.382110435579245, "grad_norm": 0.78515625, "learning_rate": 1.0033934176853828e-06, "loss": 3.8878, "step": 25163 }, { "epoch": 8.382443574581494, "grad_norm": 0.74609375, "learning_rate": 1.0029890010996632e-06, "loss": 4.0512, "step": 25164 }, { "epoch": 8.382776713583743, "grad_norm": 0.74609375, "learning_rate": 1.0025846601900513e-06, "loss": 4.0301, "step": 25165 }, { "epoch": 8.383109852585992, "grad_norm": 0.85546875, "learning_rate": 1.002180394961256e-06, "loss": 3.9163, "step": 25166 }, { "epoch": 8.38344299158824, "grad_norm": 0.76953125, "learning_rate": 1.0017762054179818e-06, "loss": 4.0088, "step": 25167 }, { "epoch": 8.383776130590489, "grad_norm": 0.72265625, "learning_rate": 1.001372091564941e-06, "loss": 3.9906, "step": 25168 }, { "epoch": 8.384109269592738, "grad_norm": 0.765625, "learning_rate": 1.0009680534068397e-06, "loss": 3.996, "step": 25169 }, { "epoch": 8.384442408594985, "grad_norm": 0.76171875, "learning_rate": 1.0005640909483832e-06, "loss": 3.9434, "step": 25170 }, { "epoch": 8.384775547597235, "grad_norm": 0.82421875, "learning_rate": 1.0001602041942761e-06, "loss": 4.0375, "step": 25171 }, { "epoch": 8.385108686599484, "grad_norm": 0.79296875, "learning_rate": 9.997563931492257e-07, "loss": 3.9611, "step": 25172 }, { "epoch": 8.385441825601733, "grad_norm": 0.765625, "learning_rate": 9.993526578179338e-07, "loss": 4.0194, "step": 25173 }, { "epoch": 8.38577496460398, "grad_norm": 0.80078125, "learning_rate": 9.989489982051026e-07, "loss": 3.9567, "step": 25174 }, { "epoch": 8.38610810360623, "grad_norm": 0.79296875, "learning_rate": 9.98545414315435e-07, "loss": 4.0846, "step": 25175 }, { "epoch": 8.386441242608479, "grad_norm": 0.7890625, "learning_rate": 9.981419061536304e-07, "loss": 4.0115, "step": 25176 }, { "epoch": 8.386774381610728, "grad_norm": 0.72265625, "learning_rate": 9.977384737243903e-07, "loss": 4.0137, "step": 25177 }, { "epoch": 8.387107520612975, "grad_norm": 0.765625, "learning_rate": 9.973351170324108e-07, "loss": 4.0488, "step": 25178 }, { "epoch": 8.387440659615224, "grad_norm": 0.765625, "learning_rate": 9.969318360823939e-07, "loss": 3.972, "step": 25179 }, { "epoch": 8.387773798617474, "grad_norm": 0.78515625, "learning_rate": 9.965286308790362e-07, "loss": 4.0302, "step": 25180 }, { "epoch": 8.388106937619721, "grad_norm": 0.76953125, "learning_rate": 9.961255014270335e-07, "loss": 3.9291, "step": 25181 }, { "epoch": 8.38844007662197, "grad_norm": 0.78515625, "learning_rate": 9.957224477310797e-07, "loss": 4.1001, "step": 25182 }, { "epoch": 8.38877321562422, "grad_norm": 0.76171875, "learning_rate": 9.95319469795872e-07, "loss": 3.9618, "step": 25183 }, { "epoch": 8.389106354626469, "grad_norm": 0.74609375, "learning_rate": 9.949165676261056e-07, "loss": 3.9563, "step": 25184 }, { "epoch": 8.389439493628716, "grad_norm": 0.76953125, "learning_rate": 9.94513741226468e-07, "loss": 4.0472, "step": 25185 }, { "epoch": 8.389772632630965, "grad_norm": 0.79296875, "learning_rate": 9.941109906016558e-07, "loss": 3.9808, "step": 25186 }, { "epoch": 8.390105771633214, "grad_norm": 0.76171875, "learning_rate": 9.937083157563594e-07, "loss": 3.9818, "step": 25187 }, { "epoch": 8.390438910635464, "grad_norm": 0.71484375, "learning_rate": 9.93305716695268e-07, "loss": 3.9675, "step": 25188 }, { "epoch": 8.390772049637711, "grad_norm": 0.72265625, "learning_rate": 9.9290319342307e-07, "loss": 3.9584, "step": 25189 }, { "epoch": 8.39110518863996, "grad_norm": 0.74609375, "learning_rate": 9.925007459444569e-07, "loss": 3.9703, "step": 25190 }, { "epoch": 8.39143832764221, "grad_norm": 0.78515625, "learning_rate": 9.920983742641155e-07, "loss": 4.0104, "step": 25191 }, { "epoch": 8.391771466644457, "grad_norm": 0.80078125, "learning_rate": 9.916960783867312e-07, "loss": 3.9926, "step": 25192 }, { "epoch": 8.392104605646706, "grad_norm": 0.84375, "learning_rate": 9.912938583169905e-07, "loss": 3.981, "step": 25193 }, { "epoch": 8.392437744648955, "grad_norm": 0.76171875, "learning_rate": 9.908917140595796e-07, "loss": 4.0195, "step": 25194 }, { "epoch": 8.392770883651204, "grad_norm": 0.75390625, "learning_rate": 9.904896456191806e-07, "loss": 3.9997, "step": 25195 }, { "epoch": 8.393104022653452, "grad_norm": 0.73046875, "learning_rate": 9.90087653000478e-07, "loss": 3.9975, "step": 25196 }, { "epoch": 8.3934371616557, "grad_norm": 0.71484375, "learning_rate": 9.896857362081522e-07, "loss": 4.0163, "step": 25197 }, { "epoch": 8.39377030065795, "grad_norm": 0.7734375, "learning_rate": 9.892838952468878e-07, "loss": 3.981, "step": 25198 }, { "epoch": 8.394103439660197, "grad_norm": 0.73828125, "learning_rate": 9.888821301213635e-07, "loss": 4.0358, "step": 25199 }, { "epoch": 8.394436578662447, "grad_norm": 0.765625, "learning_rate": 9.884804408362599e-07, "loss": 4.0077, "step": 25200 }, { "epoch": 8.394769717664696, "grad_norm": 0.7265625, "learning_rate": 9.880788273962533e-07, "loss": 4.0264, "step": 25201 }, { "epoch": 8.395102856666945, "grad_norm": 0.7421875, "learning_rate": 9.876772898060272e-07, "loss": 4.0202, "step": 25202 }, { "epoch": 8.395435995669192, "grad_norm": 0.75, "learning_rate": 9.87275828070253e-07, "loss": 3.9752, "step": 25203 }, { "epoch": 8.395769134671442, "grad_norm": 0.7734375, "learning_rate": 9.868744421936068e-07, "loss": 3.9897, "step": 25204 }, { "epoch": 8.39610227367369, "grad_norm": 0.8046875, "learning_rate": 9.864731321807684e-07, "loss": 3.9163, "step": 25205 }, { "epoch": 8.396435412675938, "grad_norm": 0.80078125, "learning_rate": 9.860718980364092e-07, "loss": 3.9309, "step": 25206 }, { "epoch": 8.396768551678187, "grad_norm": 0.73828125, "learning_rate": 9.856707397652034e-07, "loss": 4.0215, "step": 25207 }, { "epoch": 8.397101690680437, "grad_norm": 0.8125, "learning_rate": 9.852696573718217e-07, "loss": 4.0147, "step": 25208 }, { "epoch": 8.397434829682686, "grad_norm": 0.73046875, "learning_rate": 9.848686508609394e-07, "loss": 4.0206, "step": 25209 }, { "epoch": 8.397767968684933, "grad_norm": 0.78515625, "learning_rate": 9.844677202372263e-07, "loss": 4.0221, "step": 25210 }, { "epoch": 8.398101107687182, "grad_norm": 0.79296875, "learning_rate": 9.840668655053509e-07, "loss": 4.0698, "step": 25211 }, { "epoch": 8.398434246689432, "grad_norm": 0.7890625, "learning_rate": 9.83666086669983e-07, "loss": 3.9914, "step": 25212 }, { "epoch": 8.39876738569168, "grad_norm": 0.75390625, "learning_rate": 9.832653837357903e-07, "loss": 4.0145, "step": 25213 }, { "epoch": 8.399100524693928, "grad_norm": 0.7265625, "learning_rate": 9.828647567074417e-07, "loss": 4.0061, "step": 25214 }, { "epoch": 8.399433663696177, "grad_norm": 0.78515625, "learning_rate": 9.824642055896001e-07, "loss": 3.9748, "step": 25215 }, { "epoch": 8.399766802698426, "grad_norm": 0.77734375, "learning_rate": 9.820637303869361e-07, "loss": 3.9567, "step": 25216 }, { "epoch": 8.400099941700674, "grad_norm": 0.78515625, "learning_rate": 9.816633311041109e-07, "loss": 3.9972, "step": 25217 }, { "epoch": 8.400433080702923, "grad_norm": 0.78125, "learning_rate": 9.812630077457898e-07, "loss": 4.0538, "step": 25218 }, { "epoch": 8.400766219705172, "grad_norm": 0.77734375, "learning_rate": 9.808627603166334e-07, "loss": 3.9388, "step": 25219 }, { "epoch": 8.401099358707421, "grad_norm": 0.7578125, "learning_rate": 9.804625888213088e-07, "loss": 3.974, "step": 25220 }, { "epoch": 8.401432497709669, "grad_norm": 0.8046875, "learning_rate": 9.800624932644712e-07, "loss": 3.9614, "step": 25221 }, { "epoch": 8.401765636711918, "grad_norm": 0.78515625, "learning_rate": 9.796624736507829e-07, "loss": 3.9823, "step": 25222 }, { "epoch": 8.402098775714167, "grad_norm": 0.74609375, "learning_rate": 9.792625299849043e-07, "loss": 3.9663, "step": 25223 }, { "epoch": 8.402431914716415, "grad_norm": 0.79296875, "learning_rate": 9.788626622714941e-07, "loss": 3.9247, "step": 25224 }, { "epoch": 8.402765053718664, "grad_norm": 0.75390625, "learning_rate": 9.784628705152089e-07, "loss": 3.9917, "step": 25225 }, { "epoch": 8.403098192720913, "grad_norm": 0.7578125, "learning_rate": 9.780631547207045e-07, "loss": 3.9649, "step": 25226 }, { "epoch": 8.403431331723162, "grad_norm": 0.73046875, "learning_rate": 9.776635148926382e-07, "loss": 3.9731, "step": 25227 }, { "epoch": 8.40376447072541, "grad_norm": 0.71484375, "learning_rate": 9.772639510356657e-07, "loss": 3.9436, "step": 25228 }, { "epoch": 8.404097609727659, "grad_norm": 0.765625, "learning_rate": 9.768644631544396e-07, "loss": 4.0138, "step": 25229 }, { "epoch": 8.404430748729908, "grad_norm": 0.75, "learning_rate": 9.764650512536138e-07, "loss": 4.0247, "step": 25230 }, { "epoch": 8.404763887732157, "grad_norm": 0.75390625, "learning_rate": 9.760657153378396e-07, "loss": 3.9062, "step": 25231 }, { "epoch": 8.405097026734405, "grad_norm": 0.7421875, "learning_rate": 9.756664554117692e-07, "loss": 3.9498, "step": 25232 }, { "epoch": 8.405430165736654, "grad_norm": 0.74609375, "learning_rate": 9.75267271480052e-07, "loss": 3.9608, "step": 25233 }, { "epoch": 8.405763304738903, "grad_norm": 0.734375, "learning_rate": 9.748681635473394e-07, "loss": 3.9531, "step": 25234 }, { "epoch": 8.40609644374115, "grad_norm": 0.76171875, "learning_rate": 9.744691316182793e-07, "loss": 4.0107, "step": 25235 }, { "epoch": 8.4064295827434, "grad_norm": 0.76171875, "learning_rate": 9.740701756975198e-07, "loss": 3.9094, "step": 25236 }, { "epoch": 8.406762721745649, "grad_norm": 0.765625, "learning_rate": 9.736712957897054e-07, "loss": 4.0107, "step": 25237 }, { "epoch": 8.407095860747898, "grad_norm": 0.78125, "learning_rate": 9.732724918994881e-07, "loss": 3.9963, "step": 25238 }, { "epoch": 8.407428999750145, "grad_norm": 0.7265625, "learning_rate": 9.728737640315077e-07, "loss": 4.0257, "step": 25239 }, { "epoch": 8.407762138752394, "grad_norm": 0.80078125, "learning_rate": 9.724751121904104e-07, "loss": 3.9503, "step": 25240 }, { "epoch": 8.408095277754644, "grad_norm": 0.76953125, "learning_rate": 9.720765363808385e-07, "loss": 3.945, "step": 25241 }, { "epoch": 8.408428416756891, "grad_norm": 0.75390625, "learning_rate": 9.716780366074362e-07, "loss": 3.9585, "step": 25242 }, { "epoch": 8.40876155575914, "grad_norm": 0.78515625, "learning_rate": 9.712796128748445e-07, "loss": 4.0072, "step": 25243 }, { "epoch": 8.40909469476139, "grad_norm": 0.8125, "learning_rate": 9.708812651877042e-07, "loss": 3.9513, "step": 25244 }, { "epoch": 8.409427833763639, "grad_norm": 0.78515625, "learning_rate": 9.704829935506535e-07, "loss": 4.0259, "step": 25245 }, { "epoch": 8.409760972765886, "grad_norm": 0.77734375, "learning_rate": 9.700847979683345e-07, "loss": 4.031, "step": 25246 }, { "epoch": 8.410094111768135, "grad_norm": 0.76953125, "learning_rate": 9.69686678445386e-07, "loss": 3.9578, "step": 25247 }, { "epoch": 8.410427250770384, "grad_norm": 0.79296875, "learning_rate": 9.692886349864392e-07, "loss": 3.9578, "step": 25248 }, { "epoch": 8.410760389772634, "grad_norm": 0.7421875, "learning_rate": 9.688906675961362e-07, "loss": 3.944, "step": 25249 }, { "epoch": 8.411093528774881, "grad_norm": 0.7734375, "learning_rate": 9.684927762791102e-07, "loss": 4.0072, "step": 25250 }, { "epoch": 8.41142666777713, "grad_norm": 0.734375, "learning_rate": 9.680949610399964e-07, "loss": 3.9794, "step": 25251 }, { "epoch": 8.41175980677938, "grad_norm": 0.7734375, "learning_rate": 9.67697221883427e-07, "loss": 3.9157, "step": 25252 }, { "epoch": 8.412092945781627, "grad_norm": 0.80859375, "learning_rate": 9.67299558814037e-07, "loss": 3.8955, "step": 25253 }, { "epoch": 8.412426084783876, "grad_norm": 0.78125, "learning_rate": 9.669019718364578e-07, "loss": 3.8913, "step": 25254 }, { "epoch": 8.412759223786125, "grad_norm": 0.7578125, "learning_rate": 9.665044609553194e-07, "loss": 3.9982, "step": 25255 }, { "epoch": 8.413092362788374, "grad_norm": 0.828125, "learning_rate": 9.661070261752533e-07, "loss": 3.8932, "step": 25256 }, { "epoch": 8.413425501790622, "grad_norm": 0.81640625, "learning_rate": 9.657096675008872e-07, "loss": 3.9721, "step": 25257 }, { "epoch": 8.41375864079287, "grad_norm": 0.7578125, "learning_rate": 9.65312384936851e-07, "loss": 4.0392, "step": 25258 }, { "epoch": 8.41409177979512, "grad_norm": 0.78515625, "learning_rate": 9.649151784877689e-07, "loss": 3.9651, "step": 25259 }, { "epoch": 8.414424918797367, "grad_norm": 0.80859375, "learning_rate": 9.645180481582724e-07, "loss": 3.9698, "step": 25260 }, { "epoch": 8.414758057799617, "grad_norm": 0.73828125, "learning_rate": 9.641209939529839e-07, "loss": 4.0202, "step": 25261 }, { "epoch": 8.415091196801866, "grad_norm": 0.73828125, "learning_rate": 9.6372401587653e-07, "loss": 4.0582, "step": 25262 }, { "epoch": 8.415424335804115, "grad_norm": 0.76953125, "learning_rate": 9.633271139335317e-07, "loss": 3.9533, "step": 25263 }, { "epoch": 8.415757474806362, "grad_norm": 0.734375, "learning_rate": 9.62930288128616e-07, "loss": 3.9998, "step": 25264 }, { "epoch": 8.416090613808612, "grad_norm": 0.7734375, "learning_rate": 9.625335384664047e-07, "loss": 3.9871, "step": 25265 }, { "epoch": 8.41642375281086, "grad_norm": 0.8359375, "learning_rate": 9.621368649515144e-07, "loss": 3.9572, "step": 25266 }, { "epoch": 8.416756891813108, "grad_norm": 0.77734375, "learning_rate": 9.617402675885698e-07, "loss": 4.0135, "step": 25267 }, { "epoch": 8.417090030815357, "grad_norm": 0.76953125, "learning_rate": 9.613437463821894e-07, "loss": 3.9708, "step": 25268 }, { "epoch": 8.417423169817607, "grad_norm": 0.75390625, "learning_rate": 9.609473013369924e-07, "loss": 4.0395, "step": 25269 }, { "epoch": 8.417756308819856, "grad_norm": 0.734375, "learning_rate": 9.60550932457593e-07, "loss": 4.0138, "step": 25270 }, { "epoch": 8.418089447822103, "grad_norm": 0.78125, "learning_rate": 9.60154639748613e-07, "loss": 3.9155, "step": 25271 }, { "epoch": 8.418422586824352, "grad_norm": 0.78515625, "learning_rate": 9.597584232146667e-07, "loss": 3.9582, "step": 25272 }, { "epoch": 8.418755725826601, "grad_norm": 0.78125, "learning_rate": 9.593622828603678e-07, "loss": 3.9467, "step": 25273 }, { "epoch": 8.41908886482885, "grad_norm": 0.78515625, "learning_rate": 9.589662186903313e-07, "loss": 3.9509, "step": 25274 }, { "epoch": 8.419422003831098, "grad_norm": 0.73046875, "learning_rate": 9.58570230709171e-07, "loss": 3.9741, "step": 25275 }, { "epoch": 8.419755142833347, "grad_norm": 0.77734375, "learning_rate": 9.581743189214986e-07, "loss": 3.9899, "step": 25276 }, { "epoch": 8.420088281835596, "grad_norm": 0.81640625, "learning_rate": 9.577784833319242e-07, "loss": 4.0229, "step": 25277 }, { "epoch": 8.420421420837844, "grad_norm": 0.76171875, "learning_rate": 9.573827239450622e-07, "loss": 3.9705, "step": 25278 }, { "epoch": 8.420754559840093, "grad_norm": 0.8515625, "learning_rate": 9.56987040765519e-07, "loss": 3.9849, "step": 25279 }, { "epoch": 8.421087698842342, "grad_norm": 0.76171875, "learning_rate": 9.56591433797906e-07, "loss": 4.0428, "step": 25280 }, { "epoch": 8.421420837844591, "grad_norm": 0.796875, "learning_rate": 9.561959030468266e-07, "loss": 4.041, "step": 25281 }, { "epoch": 8.421753976846839, "grad_norm": 0.78125, "learning_rate": 9.558004485168936e-07, "loss": 3.9874, "step": 25282 }, { "epoch": 8.422087115849088, "grad_norm": 0.828125, "learning_rate": 9.554050702127128e-07, "loss": 3.9951, "step": 25283 }, { "epoch": 8.422420254851337, "grad_norm": 0.7421875, "learning_rate": 9.55009768138885e-07, "loss": 4.0222, "step": 25284 }, { "epoch": 8.422753393853585, "grad_norm": 0.765625, "learning_rate": 9.546145423000154e-07, "loss": 4.0713, "step": 25285 }, { "epoch": 8.423086532855834, "grad_norm": 0.77734375, "learning_rate": 9.542193927007107e-07, "loss": 4.0157, "step": 25286 }, { "epoch": 8.423419671858083, "grad_norm": 0.78125, "learning_rate": 9.538243193455726e-07, "loss": 3.9088, "step": 25287 }, { "epoch": 8.423752810860332, "grad_norm": 0.73828125, "learning_rate": 9.534293222392018e-07, "loss": 3.9816, "step": 25288 }, { "epoch": 8.42408594986258, "grad_norm": 0.73046875, "learning_rate": 9.530344013861971e-07, "loss": 3.9925, "step": 25289 }, { "epoch": 8.424419088864829, "grad_norm": 0.77734375, "learning_rate": 9.526395567911633e-07, "loss": 3.9896, "step": 25290 }, { "epoch": 8.424752227867078, "grad_norm": 0.77734375, "learning_rate": 9.522447884586991e-07, "loss": 3.9525, "step": 25291 }, { "epoch": 8.425085366869327, "grad_norm": 0.76171875, "learning_rate": 9.518500963933966e-07, "loss": 3.9343, "step": 25292 }, { "epoch": 8.425418505871574, "grad_norm": 0.7734375, "learning_rate": 9.514554805998596e-07, "loss": 4.0076, "step": 25293 }, { "epoch": 8.425751644873824, "grad_norm": 0.765625, "learning_rate": 9.51060941082682e-07, "loss": 3.9282, "step": 25294 }, { "epoch": 8.426084783876073, "grad_norm": 0.73828125, "learning_rate": 9.506664778464586e-07, "loss": 3.9459, "step": 25295 }, { "epoch": 8.42641792287832, "grad_norm": 0.7890625, "learning_rate": 9.502720908957846e-07, "loss": 3.9687, "step": 25296 }, { "epoch": 8.42675106188057, "grad_norm": 0.72265625, "learning_rate": 9.498777802352548e-07, "loss": 4.0202, "step": 25297 }, { "epoch": 8.427084200882819, "grad_norm": 0.75, "learning_rate": 9.494835458694614e-07, "loss": 3.953, "step": 25298 }, { "epoch": 8.427417339885068, "grad_norm": 0.80078125, "learning_rate": 9.490893878029966e-07, "loss": 3.9659, "step": 25299 }, { "epoch": 8.427750478887315, "grad_norm": 0.7578125, "learning_rate": 9.486953060404507e-07, "loss": 3.9742, "step": 25300 }, { "epoch": 8.428083617889564, "grad_norm": 0.7421875, "learning_rate": 9.483013005864144e-07, "loss": 3.9554, "step": 25301 }, { "epoch": 8.428416756891814, "grad_norm": 0.796875, "learning_rate": 9.479073714454772e-07, "loss": 3.9011, "step": 25302 }, { "epoch": 8.428749895894061, "grad_norm": 0.80859375, "learning_rate": 9.475135186222264e-07, "loss": 3.9351, "step": 25303 }, { "epoch": 8.42908303489631, "grad_norm": 0.7734375, "learning_rate": 9.471197421212507e-07, "loss": 3.9372, "step": 25304 }, { "epoch": 8.42941617389856, "grad_norm": 0.77734375, "learning_rate": 9.467260419471374e-07, "loss": 4.066, "step": 25305 }, { "epoch": 8.429749312900809, "grad_norm": 0.78125, "learning_rate": 9.46332418104471e-07, "loss": 4.0389, "step": 25306 }, { "epoch": 8.430082451903056, "grad_norm": 0.74609375, "learning_rate": 9.459388705978355e-07, "loss": 3.9511, "step": 25307 }, { "epoch": 8.430415590905305, "grad_norm": 0.77734375, "learning_rate": 9.455453994318172e-07, "loss": 3.9646, "step": 25308 }, { "epoch": 8.430748729907554, "grad_norm": 0.78515625, "learning_rate": 9.451520046110007e-07, "loss": 3.9352, "step": 25309 }, { "epoch": 8.431081868909803, "grad_norm": 0.7578125, "learning_rate": 9.447586861399615e-07, "loss": 3.9096, "step": 25310 }, { "epoch": 8.431415007912051, "grad_norm": 0.73828125, "learning_rate": 9.44365444023286e-07, "loss": 4.0503, "step": 25311 }, { "epoch": 8.4317481469143, "grad_norm": 0.77734375, "learning_rate": 9.439722782655547e-07, "loss": 3.9606, "step": 25312 }, { "epoch": 8.43208128591655, "grad_norm": 0.76171875, "learning_rate": 9.435791888713455e-07, "loss": 4.0163, "step": 25313 }, { "epoch": 8.432414424918797, "grad_norm": 0.75390625, "learning_rate": 9.431861758452356e-07, "loss": 3.9798, "step": 25314 }, { "epoch": 8.432747563921046, "grad_norm": 0.75, "learning_rate": 9.427932391918074e-07, "loss": 3.9793, "step": 25315 }, { "epoch": 8.433080702923295, "grad_norm": 0.78515625, "learning_rate": 9.424003789156343e-07, "loss": 3.8381, "step": 25316 }, { "epoch": 8.433413841925544, "grad_norm": 0.7421875, "learning_rate": 9.420075950212931e-07, "loss": 4.0014, "step": 25317 }, { "epoch": 8.433746980927792, "grad_norm": 0.79296875, "learning_rate": 9.416148875133591e-07, "loss": 3.9878, "step": 25318 }, { "epoch": 8.43408011993004, "grad_norm": 0.828125, "learning_rate": 9.41222256396406e-07, "loss": 3.9409, "step": 25319 }, { "epoch": 8.43441325893229, "grad_norm": 0.78125, "learning_rate": 9.40829701675007e-07, "loss": 3.9603, "step": 25320 }, { "epoch": 8.434746397934537, "grad_norm": 0.7578125, "learning_rate": 9.404372233537343e-07, "loss": 3.9597, "step": 25321 }, { "epoch": 8.435079536936787, "grad_norm": 0.7578125, "learning_rate": 9.400448214371606e-07, "loss": 3.9529, "step": 25322 }, { "epoch": 8.435412675939036, "grad_norm": 0.83203125, "learning_rate": 9.396524959298561e-07, "loss": 3.9995, "step": 25323 }, { "epoch": 8.435745814941285, "grad_norm": 0.76953125, "learning_rate": 9.3926024683639e-07, "loss": 3.9862, "step": 25324 }, { "epoch": 8.436078953943532, "grad_norm": 0.765625, "learning_rate": 9.388680741613306e-07, "loss": 3.9845, "step": 25325 }, { "epoch": 8.436412092945782, "grad_norm": 0.7734375, "learning_rate": 9.384759779092484e-07, "loss": 4.0152, "step": 25326 }, { "epoch": 8.43674523194803, "grad_norm": 0.7421875, "learning_rate": 9.380839580847094e-07, "loss": 3.9777, "step": 25327 }, { "epoch": 8.437078370950278, "grad_norm": 0.765625, "learning_rate": 9.376920146922787e-07, "loss": 3.9964, "step": 25328 }, { "epoch": 8.437411509952527, "grad_norm": 0.75, "learning_rate": 9.3730014773652e-07, "loss": 3.9852, "step": 25329 }, { "epoch": 8.437744648954776, "grad_norm": 0.7734375, "learning_rate": 9.369083572220013e-07, "loss": 4.0208, "step": 25330 }, { "epoch": 8.438077787957026, "grad_norm": 0.75390625, "learning_rate": 9.365166431532849e-07, "loss": 3.9559, "step": 25331 }, { "epoch": 8.438410926959273, "grad_norm": 0.75, "learning_rate": 9.361250055349326e-07, "loss": 3.9535, "step": 25332 }, { "epoch": 8.438744065961522, "grad_norm": 0.734375, "learning_rate": 9.357334443715054e-07, "loss": 3.9292, "step": 25333 }, { "epoch": 8.439077204963771, "grad_norm": 0.76953125, "learning_rate": 9.353419596675669e-07, "loss": 3.962, "step": 25334 }, { "epoch": 8.43941034396602, "grad_norm": 0.78125, "learning_rate": 9.34950551427676e-07, "loss": 4.021, "step": 25335 }, { "epoch": 8.439743482968268, "grad_norm": 0.78125, "learning_rate": 9.345592196563907e-07, "loss": 4.0171, "step": 25336 }, { "epoch": 8.440076621970517, "grad_norm": 0.73828125, "learning_rate": 9.341679643582699e-07, "loss": 4.0006, "step": 25337 }, { "epoch": 8.440409760972766, "grad_norm": 0.78125, "learning_rate": 9.337767855378715e-07, "loss": 3.9474, "step": 25338 }, { "epoch": 8.440742899975014, "grad_norm": 0.76171875, "learning_rate": 9.333856831997504e-07, "loss": 3.9564, "step": 25339 }, { "epoch": 8.441076038977263, "grad_norm": 0.8125, "learning_rate": 9.32994657348461e-07, "loss": 3.9521, "step": 25340 }, { "epoch": 8.441409177979512, "grad_norm": 0.77734375, "learning_rate": 9.326037079885616e-07, "loss": 3.9531, "step": 25341 }, { "epoch": 8.441742316981761, "grad_norm": 0.74609375, "learning_rate": 9.322128351246043e-07, "loss": 3.9914, "step": 25342 }, { "epoch": 8.442075455984009, "grad_norm": 0.80078125, "learning_rate": 9.318220387611412e-07, "loss": 4.0103, "step": 25343 }, { "epoch": 8.442408594986258, "grad_norm": 0.73046875, "learning_rate": 9.314313189027238e-07, "loss": 4.0207, "step": 25344 }, { "epoch": 8.442741733988507, "grad_norm": 0.765625, "learning_rate": 9.310406755539066e-07, "loss": 4.0071, "step": 25345 }, { "epoch": 8.443074872990755, "grad_norm": 0.76171875, "learning_rate": 9.306501087192362e-07, "loss": 3.933, "step": 25346 }, { "epoch": 8.443408011993004, "grad_norm": 0.734375, "learning_rate": 9.302596184032605e-07, "loss": 3.9998, "step": 25347 }, { "epoch": 8.443741150995253, "grad_norm": 0.78515625, "learning_rate": 9.298692046105325e-07, "loss": 4.0308, "step": 25348 }, { "epoch": 8.444074289997502, "grad_norm": 0.78515625, "learning_rate": 9.29478867345597e-07, "loss": 4.0364, "step": 25349 }, { "epoch": 8.44440742899975, "grad_norm": 0.71875, "learning_rate": 9.290886066130011e-07, "loss": 4.0802, "step": 25350 }, { "epoch": 8.444740568001999, "grad_norm": 0.73046875, "learning_rate": 9.286984224172879e-07, "loss": 3.9649, "step": 25351 }, { "epoch": 8.445073707004248, "grad_norm": 0.76171875, "learning_rate": 9.283083147630079e-07, "loss": 3.9254, "step": 25352 }, { "epoch": 8.445406846006497, "grad_norm": 0.75, "learning_rate": 9.279182836547006e-07, "loss": 4.0351, "step": 25353 }, { "epoch": 8.445739985008744, "grad_norm": 0.765625, "learning_rate": 9.275283290969103e-07, "loss": 3.9878, "step": 25354 }, { "epoch": 8.446073124010994, "grad_norm": 0.81640625, "learning_rate": 9.271384510941797e-07, "loss": 3.9544, "step": 25355 }, { "epoch": 8.446406263013243, "grad_norm": 0.76953125, "learning_rate": 9.267486496510485e-07, "loss": 4.0359, "step": 25356 }, { "epoch": 8.44673940201549, "grad_norm": 0.73828125, "learning_rate": 9.263589247720583e-07, "loss": 3.9955, "step": 25357 }, { "epoch": 8.44707254101774, "grad_norm": 0.7734375, "learning_rate": 9.259692764617461e-07, "loss": 3.969, "step": 25358 }, { "epoch": 8.447405680019989, "grad_norm": 0.8125, "learning_rate": 9.25579704724655e-07, "loss": 3.9357, "step": 25359 }, { "epoch": 8.447738819022238, "grad_norm": 0.7734375, "learning_rate": 9.251902095653197e-07, "loss": 4.0391, "step": 25360 }, { "epoch": 8.448071958024485, "grad_norm": 0.76953125, "learning_rate": 9.248007909882774e-07, "loss": 3.9849, "step": 25361 }, { "epoch": 8.448405097026734, "grad_norm": 0.7734375, "learning_rate": 9.244114489980621e-07, "loss": 3.9689, "step": 25362 }, { "epoch": 8.448738236028984, "grad_norm": 0.76953125, "learning_rate": 9.24022183599214e-07, "loss": 3.9803, "step": 25363 }, { "epoch": 8.449071375031231, "grad_norm": 0.74609375, "learning_rate": 9.236329947962632e-07, "loss": 3.9711, "step": 25364 }, { "epoch": 8.44940451403348, "grad_norm": 0.75390625, "learning_rate": 9.232438825937408e-07, "loss": 3.9513, "step": 25365 }, { "epoch": 8.44973765303573, "grad_norm": 0.76953125, "learning_rate": 9.228548469961842e-07, "loss": 4.018, "step": 25366 }, { "epoch": 8.450070792037979, "grad_norm": 0.78125, "learning_rate": 9.22465888008123e-07, "loss": 3.9099, "step": 25367 }, { "epoch": 8.450403931040226, "grad_norm": 0.85546875, "learning_rate": 9.220770056340869e-07, "loss": 3.9632, "step": 25368 }, { "epoch": 8.450737070042475, "grad_norm": 0.7734375, "learning_rate": 9.21688199878604e-07, "loss": 4.0645, "step": 25369 }, { "epoch": 8.451070209044724, "grad_norm": 0.73828125, "learning_rate": 9.212994707462072e-07, "loss": 3.9976, "step": 25370 }, { "epoch": 8.451403348046973, "grad_norm": 0.75390625, "learning_rate": 9.209108182414213e-07, "loss": 3.9861, "step": 25371 }, { "epoch": 8.45173648704922, "grad_norm": 0.80859375, "learning_rate": 9.205222423687761e-07, "loss": 4.0285, "step": 25372 }, { "epoch": 8.45206962605147, "grad_norm": 0.8359375, "learning_rate": 9.201337431327922e-07, "loss": 3.9584, "step": 25373 }, { "epoch": 8.45240276505372, "grad_norm": 0.78125, "learning_rate": 9.197453205380007e-07, "loss": 3.9121, "step": 25374 }, { "epoch": 8.452735904055967, "grad_norm": 0.75390625, "learning_rate": 9.193569745889225e-07, "loss": 4.0542, "step": 25375 }, { "epoch": 8.453069043058216, "grad_norm": 0.765625, "learning_rate": 9.189687052900819e-07, "loss": 3.973, "step": 25376 }, { "epoch": 8.453402182060465, "grad_norm": 0.75390625, "learning_rate": 9.185805126460006e-07, "loss": 4.0009, "step": 25377 }, { "epoch": 8.453735321062714, "grad_norm": 0.71875, "learning_rate": 9.181923966612015e-07, "loss": 3.9989, "step": 25378 }, { "epoch": 8.454068460064962, "grad_norm": 0.8125, "learning_rate": 9.178043573402053e-07, "loss": 3.9426, "step": 25379 }, { "epoch": 8.45440159906721, "grad_norm": 0.7734375, "learning_rate": 9.174163946875316e-07, "loss": 3.9557, "step": 25380 }, { "epoch": 8.45473473806946, "grad_norm": 0.765625, "learning_rate": 9.170285087076994e-07, "loss": 4.0278, "step": 25381 }, { "epoch": 8.455067877071707, "grad_norm": 0.75390625, "learning_rate": 9.166406994052256e-07, "loss": 4.0019, "step": 25382 }, { "epoch": 8.455401016073957, "grad_norm": 0.76953125, "learning_rate": 9.162529667846279e-07, "loss": 3.9734, "step": 25383 }, { "epoch": 8.455734155076206, "grad_norm": 0.7578125, "learning_rate": 9.158653108504223e-07, "loss": 3.9915, "step": 25384 }, { "epoch": 8.456067294078455, "grad_norm": 0.765625, "learning_rate": 9.154777316071252e-07, "loss": 4.0119, "step": 25385 }, { "epoch": 8.456400433080702, "grad_norm": 0.75, "learning_rate": 9.150902290592509e-07, "loss": 4.0448, "step": 25386 }, { "epoch": 8.456733572082952, "grad_norm": 0.74609375, "learning_rate": 9.14702803211312e-07, "loss": 4.0409, "step": 25387 }, { "epoch": 8.4570667110852, "grad_norm": 0.7578125, "learning_rate": 9.143154540678209e-07, "loss": 3.9312, "step": 25388 }, { "epoch": 8.457399850087448, "grad_norm": 0.7890625, "learning_rate": 9.139281816332915e-07, "loss": 3.9884, "step": 25389 }, { "epoch": 8.457732989089697, "grad_norm": 0.77734375, "learning_rate": 9.135409859122351e-07, "loss": 3.9605, "step": 25390 }, { "epoch": 8.458066128091946, "grad_norm": 0.76171875, "learning_rate": 9.131538669091566e-07, "loss": 3.8496, "step": 25391 }, { "epoch": 8.458399267094196, "grad_norm": 0.81640625, "learning_rate": 9.127668246285697e-07, "loss": 3.9452, "step": 25392 }, { "epoch": 8.458732406096443, "grad_norm": 0.78515625, "learning_rate": 9.123798590749818e-07, "loss": 4.0074, "step": 25393 }, { "epoch": 8.459065545098692, "grad_norm": 0.703125, "learning_rate": 9.119929702529e-07, "loss": 3.946, "step": 25394 }, { "epoch": 8.459398684100941, "grad_norm": 0.7578125, "learning_rate": 9.116061581668283e-07, "loss": 3.9939, "step": 25395 }, { "epoch": 8.45973182310319, "grad_norm": 0.7734375, "learning_rate": 9.112194228212764e-07, "loss": 4.0644, "step": 25396 }, { "epoch": 8.460064962105438, "grad_norm": 0.80078125, "learning_rate": 9.108327642207472e-07, "loss": 3.893, "step": 25397 }, { "epoch": 8.460398101107687, "grad_norm": 0.796875, "learning_rate": 9.104461823697432e-07, "loss": 4.0007, "step": 25398 }, { "epoch": 8.460731240109936, "grad_norm": 0.76953125, "learning_rate": 9.10059677272769e-07, "loss": 3.9228, "step": 25399 }, { "epoch": 8.461064379112184, "grad_norm": 0.796875, "learning_rate": 9.096732489343259e-07, "loss": 3.965, "step": 25400 }, { "epoch": 8.461397518114433, "grad_norm": 0.78125, "learning_rate": 9.092868973589147e-07, "loss": 3.9674, "step": 25401 }, { "epoch": 8.461730657116682, "grad_norm": 0.75, "learning_rate": 9.08900622551034e-07, "loss": 3.9852, "step": 25402 }, { "epoch": 8.462063796118931, "grad_norm": 0.77734375, "learning_rate": 9.085144245151864e-07, "loss": 3.9883, "step": 25403 }, { "epoch": 8.462396935121179, "grad_norm": 0.77734375, "learning_rate": 9.081283032558688e-07, "loss": 3.9955, "step": 25404 }, { "epoch": 8.462730074123428, "grad_norm": 0.796875, "learning_rate": 9.077422587775777e-07, "loss": 3.9153, "step": 25405 }, { "epoch": 8.463063213125677, "grad_norm": 0.76953125, "learning_rate": 9.073562910848096e-07, "loss": 4.0003, "step": 25406 }, { "epoch": 8.463396352127926, "grad_norm": 0.76171875, "learning_rate": 9.069704001820625e-07, "loss": 3.9524, "step": 25407 }, { "epoch": 8.463729491130174, "grad_norm": 0.77734375, "learning_rate": 9.065845860738311e-07, "loss": 4.0088, "step": 25408 }, { "epoch": 8.464062630132423, "grad_norm": 0.765625, "learning_rate": 9.061988487646053e-07, "loss": 4.0357, "step": 25409 }, { "epoch": 8.464395769134672, "grad_norm": 0.765625, "learning_rate": 9.058131882588821e-07, "loss": 3.9566, "step": 25410 }, { "epoch": 8.46472890813692, "grad_norm": 0.75, "learning_rate": 9.054276045611524e-07, "loss": 3.9479, "step": 25411 }, { "epoch": 8.465062047139169, "grad_norm": 0.78515625, "learning_rate": 9.050420976759071e-07, "loss": 3.9608, "step": 25412 }, { "epoch": 8.465395186141418, "grad_norm": 0.765625, "learning_rate": 9.046566676076348e-07, "loss": 3.9624, "step": 25413 }, { "epoch": 8.465728325143667, "grad_norm": 0.76953125, "learning_rate": 9.042713143608291e-07, "loss": 3.9846, "step": 25414 }, { "epoch": 8.466061464145914, "grad_norm": 0.734375, "learning_rate": 9.038860379399758e-07, "loss": 3.9242, "step": 25415 }, { "epoch": 8.466394603148164, "grad_norm": 0.71875, "learning_rate": 9.035008383495627e-07, "loss": 4.0497, "step": 25416 }, { "epoch": 8.466727742150413, "grad_norm": 0.76171875, "learning_rate": 9.031157155940773e-07, "loss": 4.038, "step": 25417 }, { "epoch": 8.46706088115266, "grad_norm": 0.734375, "learning_rate": 9.027306696780049e-07, "loss": 3.991, "step": 25418 }, { "epoch": 8.46739402015491, "grad_norm": 0.75390625, "learning_rate": 9.023457006058306e-07, "loss": 3.9234, "step": 25419 }, { "epoch": 8.467727159157159, "grad_norm": 0.79296875, "learning_rate": 9.019608083820388e-07, "loss": 3.9664, "step": 25420 }, { "epoch": 8.468060298159408, "grad_norm": 0.7421875, "learning_rate": 9.015759930111103e-07, "loss": 3.9346, "step": 25421 }, { "epoch": 8.468393437161655, "grad_norm": 0.8203125, "learning_rate": 9.011912544975306e-07, "loss": 3.9413, "step": 25422 }, { "epoch": 8.468726576163904, "grad_norm": 0.75390625, "learning_rate": 9.008065928457804e-07, "loss": 3.9956, "step": 25423 }, { "epoch": 8.469059715166154, "grad_norm": 0.80078125, "learning_rate": 9.004220080603392e-07, "loss": 3.9391, "step": 25424 }, { "epoch": 8.469392854168401, "grad_norm": 0.78515625, "learning_rate": 9.000375001456854e-07, "loss": 3.9413, "step": 25425 }, { "epoch": 8.46972599317065, "grad_norm": 0.78515625, "learning_rate": 8.996530691063018e-07, "loss": 3.9685, "step": 25426 }, { "epoch": 8.4700591321729, "grad_norm": 0.7578125, "learning_rate": 8.992687149466625e-07, "loss": 4.0067, "step": 25427 }, { "epoch": 8.470392271175148, "grad_norm": 0.75390625, "learning_rate": 8.988844376712447e-07, "loss": 3.9347, "step": 25428 }, { "epoch": 8.470725410177396, "grad_norm": 0.80078125, "learning_rate": 8.985002372845255e-07, "loss": 3.9614, "step": 25429 }, { "epoch": 8.471058549179645, "grad_norm": 0.77734375, "learning_rate": 8.981161137909808e-07, "loss": 3.9732, "step": 25430 }, { "epoch": 8.471391688181894, "grad_norm": 0.79296875, "learning_rate": 8.977320671950834e-07, "loss": 3.9895, "step": 25431 }, { "epoch": 8.471724827184143, "grad_norm": 0.765625, "learning_rate": 8.973480975013049e-07, "loss": 3.9388, "step": 25432 }, { "epoch": 8.47205796618639, "grad_norm": 0.71875, "learning_rate": 8.969642047141216e-07, "loss": 3.9781, "step": 25433 }, { "epoch": 8.47239110518864, "grad_norm": 0.796875, "learning_rate": 8.965803888380034e-07, "loss": 4.0031, "step": 25434 }, { "epoch": 8.47272424419089, "grad_norm": 0.76953125, "learning_rate": 8.961966498774198e-07, "loss": 3.9736, "step": 25435 }, { "epoch": 8.473057383193137, "grad_norm": 0.72265625, "learning_rate": 8.958129878368427e-07, "loss": 4.0295, "step": 25436 }, { "epoch": 8.473390522195386, "grad_norm": 0.78515625, "learning_rate": 8.954294027207388e-07, "loss": 4.0045, "step": 25437 }, { "epoch": 8.473723661197635, "grad_norm": 0.76953125, "learning_rate": 8.950458945335773e-07, "loss": 4.0188, "step": 25438 }, { "epoch": 8.474056800199884, "grad_norm": 0.765625, "learning_rate": 8.946624632798231e-07, "loss": 3.9558, "step": 25439 }, { "epoch": 8.474389939202132, "grad_norm": 0.7734375, "learning_rate": 8.942791089639449e-07, "loss": 3.9993, "step": 25440 }, { "epoch": 8.47472307820438, "grad_norm": 0.77734375, "learning_rate": 8.938958315904084e-07, "loss": 4.0162, "step": 25441 }, { "epoch": 8.47505621720663, "grad_norm": 0.71875, "learning_rate": 8.935126311636757e-07, "loss": 4.0184, "step": 25442 }, { "epoch": 8.475389356208877, "grad_norm": 0.78125, "learning_rate": 8.931295076882101e-07, "loss": 4.0009, "step": 25443 }, { "epoch": 8.475722495211127, "grad_norm": 0.73828125, "learning_rate": 8.927464611684788e-07, "loss": 3.9962, "step": 25444 }, { "epoch": 8.476055634213376, "grad_norm": 0.78125, "learning_rate": 8.923634916089374e-07, "loss": 4.0399, "step": 25445 }, { "epoch": 8.476388773215625, "grad_norm": 0.72265625, "learning_rate": 8.919805990140481e-07, "loss": 3.9999, "step": 25446 }, { "epoch": 8.476721912217872, "grad_norm": 0.765625, "learning_rate": 8.915977833882732e-07, "loss": 3.9664, "step": 25447 }, { "epoch": 8.477055051220121, "grad_norm": 0.76953125, "learning_rate": 8.912150447360709e-07, "loss": 3.9495, "step": 25448 }, { "epoch": 8.47738819022237, "grad_norm": 0.78125, "learning_rate": 8.908323830618984e-07, "loss": 3.9766, "step": 25449 }, { "epoch": 8.477721329224618, "grad_norm": 0.76953125, "learning_rate": 8.90449798370212e-07, "loss": 3.9454, "step": 25450 }, { "epoch": 8.478054468226867, "grad_norm": 0.765625, "learning_rate": 8.900672906654699e-07, "loss": 3.9375, "step": 25451 }, { "epoch": 8.478387607229116, "grad_norm": 0.74609375, "learning_rate": 8.896848599521279e-07, "loss": 3.9662, "step": 25452 }, { "epoch": 8.478720746231366, "grad_norm": 0.765625, "learning_rate": 8.893025062346388e-07, "loss": 4.027, "step": 25453 }, { "epoch": 8.479053885233613, "grad_norm": 0.8046875, "learning_rate": 8.889202295174567e-07, "loss": 4.0075, "step": 25454 }, { "epoch": 8.479387024235862, "grad_norm": 0.7578125, "learning_rate": 8.885380298050347e-07, "loss": 3.9415, "step": 25455 }, { "epoch": 8.479720163238111, "grad_norm": 0.80078125, "learning_rate": 8.881559071018233e-07, "loss": 3.9754, "step": 25456 }, { "epoch": 8.48005330224036, "grad_norm": 0.75, "learning_rate": 8.877738614122741e-07, "loss": 3.9802, "step": 25457 }, { "epoch": 8.480386441242608, "grad_norm": 0.82421875, "learning_rate": 8.873918927408376e-07, "loss": 3.9322, "step": 25458 }, { "epoch": 8.480719580244857, "grad_norm": 0.76953125, "learning_rate": 8.870100010919635e-07, "loss": 4.0442, "step": 25459 }, { "epoch": 8.481052719247106, "grad_norm": 0.765625, "learning_rate": 8.866281864700992e-07, "loss": 3.9684, "step": 25460 }, { "epoch": 8.481385858249354, "grad_norm": 0.76171875, "learning_rate": 8.862464488796903e-07, "loss": 4.0665, "step": 25461 }, { "epoch": 8.481718997251603, "grad_norm": 0.78125, "learning_rate": 8.858647883251875e-07, "loss": 4.0529, "step": 25462 }, { "epoch": 8.482052136253852, "grad_norm": 0.77734375, "learning_rate": 8.854832048110329e-07, "loss": 3.9754, "step": 25463 }, { "epoch": 8.482385275256101, "grad_norm": 0.71875, "learning_rate": 8.851016983416713e-07, "loss": 3.9685, "step": 25464 }, { "epoch": 8.482718414258349, "grad_norm": 0.76171875, "learning_rate": 8.847202689215458e-07, "loss": 3.9876, "step": 25465 }, { "epoch": 8.483051553260598, "grad_norm": 0.75, "learning_rate": 8.843389165551022e-07, "loss": 3.9305, "step": 25466 }, { "epoch": 8.483384692262847, "grad_norm": 0.7421875, "learning_rate": 8.839576412467809e-07, "loss": 3.9437, "step": 25467 }, { "epoch": 8.483717831265096, "grad_norm": 0.74609375, "learning_rate": 8.835764430010227e-07, "loss": 3.9541, "step": 25468 }, { "epoch": 8.484050970267344, "grad_norm": 0.8515625, "learning_rate": 8.831953218222654e-07, "loss": 3.9765, "step": 25469 }, { "epoch": 8.484384109269593, "grad_norm": 0.72265625, "learning_rate": 8.828142777149534e-07, "loss": 4.014, "step": 25470 }, { "epoch": 8.484717248271842, "grad_norm": 0.73828125, "learning_rate": 8.824333106835228e-07, "loss": 4.0025, "step": 25471 }, { "epoch": 8.48505038727409, "grad_norm": 0.78125, "learning_rate": 8.820524207324077e-07, "loss": 4.0418, "step": 25472 }, { "epoch": 8.485383526276339, "grad_norm": 0.76171875, "learning_rate": 8.816716078660495e-07, "loss": 4.0423, "step": 25473 }, { "epoch": 8.485716665278588, "grad_norm": 0.7421875, "learning_rate": 8.812908720888812e-07, "loss": 4.0265, "step": 25474 }, { "epoch": 8.486049804280837, "grad_norm": 0.76171875, "learning_rate": 8.809102134053387e-07, "loss": 3.9374, "step": 25475 }, { "epoch": 8.486382943283084, "grad_norm": 0.76171875, "learning_rate": 8.805296318198533e-07, "loss": 4.0571, "step": 25476 }, { "epoch": 8.486716082285334, "grad_norm": 0.75390625, "learning_rate": 8.801491273368615e-07, "loss": 3.9736, "step": 25477 }, { "epoch": 8.487049221287583, "grad_norm": 0.73828125, "learning_rate": 8.797686999607937e-07, "loss": 3.9502, "step": 25478 }, { "epoch": 8.48738236028983, "grad_norm": 0.7578125, "learning_rate": 8.793883496960817e-07, "loss": 4.0209, "step": 25479 }, { "epoch": 8.48771549929208, "grad_norm": 0.75390625, "learning_rate": 8.790080765471542e-07, "loss": 3.9766, "step": 25480 }, { "epoch": 8.488048638294329, "grad_norm": 0.74609375, "learning_rate": 8.786278805184428e-07, "loss": 3.9528, "step": 25481 }, { "epoch": 8.488381777296578, "grad_norm": 0.79296875, "learning_rate": 8.782477616143739e-07, "loss": 3.938, "step": 25482 }, { "epoch": 8.488714916298825, "grad_norm": 0.72265625, "learning_rate": 8.778677198393747e-07, "loss": 3.9307, "step": 25483 }, { "epoch": 8.489048055301074, "grad_norm": 0.8203125, "learning_rate": 8.774877551978744e-07, "loss": 3.9874, "step": 25484 }, { "epoch": 8.489381194303323, "grad_norm": 0.765625, "learning_rate": 8.771078676942975e-07, "loss": 3.9562, "step": 25485 }, { "epoch": 8.489714333305571, "grad_norm": 0.7265625, "learning_rate": 8.767280573330683e-07, "loss": 4.0436, "step": 25486 }, { "epoch": 8.49004747230782, "grad_norm": 0.765625, "learning_rate": 8.763483241186104e-07, "loss": 4.0053, "step": 25487 }, { "epoch": 8.49038061131007, "grad_norm": 0.74609375, "learning_rate": 8.759686680553489e-07, "loss": 3.9227, "step": 25488 }, { "epoch": 8.490713750312318, "grad_norm": 0.7578125, "learning_rate": 8.755890891477067e-07, "loss": 3.8883, "step": 25489 }, { "epoch": 8.491046889314566, "grad_norm": 0.77734375, "learning_rate": 8.752095874000996e-07, "loss": 4.0071, "step": 25490 }, { "epoch": 8.491380028316815, "grad_norm": 0.8125, "learning_rate": 8.74830162816953e-07, "loss": 3.9169, "step": 25491 }, { "epoch": 8.491713167319064, "grad_norm": 0.74609375, "learning_rate": 8.744508154026853e-07, "loss": 3.9489, "step": 25492 }, { "epoch": 8.492046306321313, "grad_norm": 0.75390625, "learning_rate": 8.740715451617145e-07, "loss": 3.9169, "step": 25493 }, { "epoch": 8.49237944532356, "grad_norm": 0.76953125, "learning_rate": 8.736923520984572e-07, "loss": 3.9075, "step": 25494 }, { "epoch": 8.49271258432581, "grad_norm": 0.765625, "learning_rate": 8.733132362173329e-07, "loss": 3.9734, "step": 25495 }, { "epoch": 8.49304572332806, "grad_norm": 0.76171875, "learning_rate": 8.729341975227559e-07, "loss": 4.0301, "step": 25496 }, { "epoch": 8.493378862330307, "grad_norm": 0.77734375, "learning_rate": 8.725552360191416e-07, "loss": 3.9862, "step": 25497 }, { "epoch": 8.493712001332556, "grad_norm": 0.71875, "learning_rate": 8.721763517109033e-07, "loss": 3.9672, "step": 25498 }, { "epoch": 8.494045140334805, "grad_norm": 0.7734375, "learning_rate": 8.717975446024548e-07, "loss": 3.9601, "step": 25499 }, { "epoch": 8.494378279337054, "grad_norm": 0.75390625, "learning_rate": 8.714188146982085e-07, "loss": 3.9539, "step": 25500 }, { "epoch": 8.494711418339302, "grad_norm": 0.73828125, "learning_rate": 8.710401620025735e-07, "loss": 4.044, "step": 25501 }, { "epoch": 8.49504455734155, "grad_norm": 0.82421875, "learning_rate": 8.706615865199644e-07, "loss": 4.0332, "step": 25502 }, { "epoch": 8.4953776963438, "grad_norm": 0.78515625, "learning_rate": 8.702830882547885e-07, "loss": 3.9379, "step": 25503 }, { "epoch": 8.495710835346047, "grad_norm": 0.76171875, "learning_rate": 8.699046672114542e-07, "loss": 3.9305, "step": 25504 }, { "epoch": 8.496043974348297, "grad_norm": 0.7734375, "learning_rate": 8.695263233943685e-07, "loss": 4.003, "step": 25505 }, { "epoch": 8.496377113350546, "grad_norm": 0.7421875, "learning_rate": 8.691480568079413e-07, "loss": 3.9741, "step": 25506 }, { "epoch": 8.496710252352795, "grad_norm": 0.76953125, "learning_rate": 8.687698674565783e-07, "loss": 3.9598, "step": 25507 }, { "epoch": 8.497043391355042, "grad_norm": 0.75, "learning_rate": 8.683917553446818e-07, "loss": 4.1193, "step": 25508 }, { "epoch": 8.497376530357291, "grad_norm": 0.7578125, "learning_rate": 8.680137204766558e-07, "loss": 4.0395, "step": 25509 }, { "epoch": 8.49770966935954, "grad_norm": 0.76171875, "learning_rate": 8.67635762856906e-07, "loss": 4.0237, "step": 25510 }, { "epoch": 8.498042808361788, "grad_norm": 0.7578125, "learning_rate": 8.672578824898344e-07, "loss": 4.0427, "step": 25511 }, { "epoch": 8.498375947364037, "grad_norm": 0.77734375, "learning_rate": 8.66880079379842e-07, "loss": 3.9894, "step": 25512 }, { "epoch": 8.498709086366286, "grad_norm": 0.71484375, "learning_rate": 8.665023535313277e-07, "loss": 3.9857, "step": 25513 }, { "epoch": 8.499042225368536, "grad_norm": 0.76953125, "learning_rate": 8.661247049486936e-07, "loss": 4.0533, "step": 25514 }, { "epoch": 8.499375364370783, "grad_norm": 0.74609375, "learning_rate": 8.65747133636339e-07, "loss": 3.9569, "step": 25515 }, { "epoch": 8.499708503373032, "grad_norm": 0.734375, "learning_rate": 8.653696395986602e-07, "loss": 4.0307, "step": 25516 }, { "epoch": 8.500041642375281, "grad_norm": 0.75, "learning_rate": 8.649922228400545e-07, "loss": 3.9776, "step": 25517 }, { "epoch": 8.50037478137753, "grad_norm": 0.76171875, "learning_rate": 8.646148833649176e-07, "loss": 4.0474, "step": 25518 }, { "epoch": 8.500707920379778, "grad_norm": 0.76171875, "learning_rate": 8.642376211776451e-07, "loss": 3.9848, "step": 25519 }, { "epoch": 8.501041059382027, "grad_norm": 0.77734375, "learning_rate": 8.638604362826303e-07, "loss": 4.013, "step": 25520 }, { "epoch": 8.501374198384276, "grad_norm": 0.78125, "learning_rate": 8.634833286842686e-07, "loss": 3.9953, "step": 25521 }, { "epoch": 8.501707337386524, "grad_norm": 0.73828125, "learning_rate": 8.631062983869517e-07, "loss": 3.9973, "step": 25522 }, { "epoch": 8.502040476388773, "grad_norm": 0.75390625, "learning_rate": 8.627293453950702e-07, "loss": 4.0092, "step": 25523 }, { "epoch": 8.502373615391022, "grad_norm": 0.8203125, "learning_rate": 8.623524697130147e-07, "loss": 3.9992, "step": 25524 }, { "epoch": 8.502706754393271, "grad_norm": 0.80078125, "learning_rate": 8.619756713451793e-07, "loss": 3.9616, "step": 25525 }, { "epoch": 8.503039893395519, "grad_norm": 0.78125, "learning_rate": 8.61598950295947e-07, "loss": 3.926, "step": 25526 }, { "epoch": 8.503373032397768, "grad_norm": 0.7890625, "learning_rate": 8.61222306569707e-07, "loss": 3.9624, "step": 25527 }, { "epoch": 8.503706171400017, "grad_norm": 0.78515625, "learning_rate": 8.608457401708489e-07, "loss": 3.9816, "step": 25528 }, { "epoch": 8.504039310402266, "grad_norm": 0.7890625, "learning_rate": 8.604692511037585e-07, "loss": 3.9817, "step": 25529 }, { "epoch": 8.504372449404514, "grad_norm": 0.75, "learning_rate": 8.600928393728189e-07, "loss": 4.0183, "step": 25530 }, { "epoch": 8.504705588406763, "grad_norm": 0.73828125, "learning_rate": 8.597165049824149e-07, "loss": 3.9767, "step": 25531 }, { "epoch": 8.505038727409012, "grad_norm": 0.765625, "learning_rate": 8.593402479369322e-07, "loss": 3.9783, "step": 25532 }, { "epoch": 8.50537186641126, "grad_norm": 0.73828125, "learning_rate": 8.589640682407515e-07, "loss": 4.0617, "step": 25533 }, { "epoch": 8.505705005413509, "grad_norm": 0.75390625, "learning_rate": 8.585879658982551e-07, "loss": 3.9895, "step": 25534 }, { "epoch": 8.506038144415758, "grad_norm": 0.7578125, "learning_rate": 8.582119409138236e-07, "loss": 3.9675, "step": 25535 }, { "epoch": 8.506371283418007, "grad_norm": 0.8203125, "learning_rate": 8.578359932918378e-07, "loss": 3.959, "step": 25536 }, { "epoch": 8.506704422420254, "grad_norm": 0.77734375, "learning_rate": 8.57460123036675e-07, "loss": 3.9711, "step": 25537 }, { "epoch": 8.507037561422504, "grad_norm": 0.75, "learning_rate": 8.570843301527123e-07, "loss": 3.9591, "step": 25538 }, { "epoch": 8.507370700424753, "grad_norm": 0.75390625, "learning_rate": 8.567086146443307e-07, "loss": 3.9988, "step": 25539 }, { "epoch": 8.507703839427, "grad_norm": 0.734375, "learning_rate": 8.563329765159039e-07, "loss": 3.9374, "step": 25540 }, { "epoch": 8.50803697842925, "grad_norm": 0.7734375, "learning_rate": 8.559574157718078e-07, "loss": 3.9336, "step": 25541 }, { "epoch": 8.508370117431499, "grad_norm": 0.75390625, "learning_rate": 8.555819324164155e-07, "loss": 3.9872, "step": 25542 }, { "epoch": 8.508703256433748, "grad_norm": 0.7578125, "learning_rate": 8.552065264541051e-07, "loss": 3.9756, "step": 25543 }, { "epoch": 8.509036395435995, "grad_norm": 0.71484375, "learning_rate": 8.548311978892448e-07, "loss": 3.9747, "step": 25544 }, { "epoch": 8.509369534438244, "grad_norm": 0.73046875, "learning_rate": 8.544559467262053e-07, "loss": 4.0028, "step": 25545 }, { "epoch": 8.509702673440493, "grad_norm": 0.7578125, "learning_rate": 8.540807729693615e-07, "loss": 3.8754, "step": 25546 }, { "epoch": 8.51003581244274, "grad_norm": 0.76953125, "learning_rate": 8.537056766230822e-07, "loss": 3.988, "step": 25547 }, { "epoch": 8.51036895144499, "grad_norm": 0.7734375, "learning_rate": 8.533306576917358e-07, "loss": 3.9898, "step": 25548 }, { "epoch": 8.51070209044724, "grad_norm": 0.76953125, "learning_rate": 8.529557161796886e-07, "loss": 4.0008, "step": 25549 }, { "epoch": 8.511035229449488, "grad_norm": 0.76171875, "learning_rate": 8.525808520913114e-07, "loss": 3.9565, "step": 25550 }, { "epoch": 8.511368368451736, "grad_norm": 0.73828125, "learning_rate": 8.522060654309707e-07, "loss": 3.9581, "step": 25551 }, { "epoch": 8.511701507453985, "grad_norm": 0.7578125, "learning_rate": 8.518313562030289e-07, "loss": 3.9801, "step": 25552 }, { "epoch": 8.512034646456234, "grad_norm": 0.78515625, "learning_rate": 8.514567244118498e-07, "loss": 3.9768, "step": 25553 }, { "epoch": 8.512367785458483, "grad_norm": 0.765625, "learning_rate": 8.510821700618019e-07, "loss": 3.9908, "step": 25554 }, { "epoch": 8.51270092446073, "grad_norm": 0.76953125, "learning_rate": 8.507076931572439e-07, "loss": 4.029, "step": 25555 }, { "epoch": 8.51303406346298, "grad_norm": 0.78125, "learning_rate": 8.503332937025391e-07, "loss": 4.0108, "step": 25556 }, { "epoch": 8.51336720246523, "grad_norm": 0.7734375, "learning_rate": 8.499589717020473e-07, "loss": 3.9833, "step": 25557 }, { "epoch": 8.513700341467477, "grad_norm": 0.75390625, "learning_rate": 8.495847271601309e-07, "loss": 3.9008, "step": 25558 }, { "epoch": 8.514033480469726, "grad_norm": 0.74609375, "learning_rate": 8.492105600811473e-07, "loss": 4.0388, "step": 25559 }, { "epoch": 8.514366619471975, "grad_norm": 0.73828125, "learning_rate": 8.488364704694554e-07, "loss": 4.0454, "step": 25560 }, { "epoch": 8.514699758474224, "grad_norm": 0.8046875, "learning_rate": 8.484624583294126e-07, "loss": 3.9972, "step": 25561 }, { "epoch": 8.515032897476472, "grad_norm": 0.78515625, "learning_rate": 8.480885236653738e-07, "loss": 4.0418, "step": 25562 }, { "epoch": 8.51536603647872, "grad_norm": 0.73828125, "learning_rate": 8.47714666481697e-07, "loss": 3.9485, "step": 25563 }, { "epoch": 8.51569917548097, "grad_norm": 0.73828125, "learning_rate": 8.473408867827331e-07, "loss": 4.025, "step": 25564 }, { "epoch": 8.516032314483217, "grad_norm": 0.7578125, "learning_rate": 8.469671845728402e-07, "loss": 4.0194, "step": 25565 }, { "epoch": 8.516365453485466, "grad_norm": 0.77734375, "learning_rate": 8.46593559856369e-07, "loss": 4.0833, "step": 25566 }, { "epoch": 8.516698592487716, "grad_norm": 0.74609375, "learning_rate": 8.46220012637671e-07, "loss": 4.0144, "step": 25567 }, { "epoch": 8.517031731489965, "grad_norm": 0.77734375, "learning_rate": 8.458465429210968e-07, "loss": 3.9592, "step": 25568 }, { "epoch": 8.517364870492212, "grad_norm": 0.7265625, "learning_rate": 8.454731507110005e-07, "loss": 3.9854, "step": 25569 }, { "epoch": 8.517698009494461, "grad_norm": 0.765625, "learning_rate": 8.450998360117271e-07, "loss": 4.0025, "step": 25570 }, { "epoch": 8.51803114849671, "grad_norm": 0.734375, "learning_rate": 8.447265988276245e-07, "loss": 3.9692, "step": 25571 }, { "epoch": 8.518364287498958, "grad_norm": 0.765625, "learning_rate": 8.443534391630428e-07, "loss": 4.0331, "step": 25572 }, { "epoch": 8.518697426501207, "grad_norm": 0.78125, "learning_rate": 8.439803570223284e-07, "loss": 4.0273, "step": 25573 }, { "epoch": 8.519030565503456, "grad_norm": 0.74609375, "learning_rate": 8.436073524098246e-07, "loss": 3.954, "step": 25574 }, { "epoch": 8.519363704505706, "grad_norm": 0.7421875, "learning_rate": 8.432344253298768e-07, "loss": 4.0316, "step": 25575 }, { "epoch": 8.519696843507953, "grad_norm": 0.78125, "learning_rate": 8.42861575786831e-07, "loss": 4.0307, "step": 25576 }, { "epoch": 8.520029982510202, "grad_norm": 0.76171875, "learning_rate": 8.424888037850278e-07, "loss": 4.012, "step": 25577 }, { "epoch": 8.520363121512451, "grad_norm": 0.765625, "learning_rate": 8.421161093288104e-07, "loss": 4.0006, "step": 25578 }, { "epoch": 8.5206962605147, "grad_norm": 0.79296875, "learning_rate": 8.417434924225195e-07, "loss": 3.9385, "step": 25579 }, { "epoch": 8.521029399516948, "grad_norm": 0.796875, "learning_rate": 8.413709530704949e-07, "loss": 3.995, "step": 25580 }, { "epoch": 8.521362538519197, "grad_norm": 0.77734375, "learning_rate": 8.409984912770757e-07, "loss": 3.9507, "step": 25581 }, { "epoch": 8.521695677521446, "grad_norm": 0.765625, "learning_rate": 8.406261070466e-07, "loss": 3.9291, "step": 25582 }, { "epoch": 8.522028816523694, "grad_norm": 0.73828125, "learning_rate": 8.402538003834071e-07, "loss": 3.926, "step": 25583 }, { "epoch": 8.522361955525943, "grad_norm": 0.76171875, "learning_rate": 8.398815712918323e-07, "loss": 3.8976, "step": 25584 }, { "epoch": 8.522695094528192, "grad_norm": 0.765625, "learning_rate": 8.395094197762115e-07, "loss": 4.0306, "step": 25585 }, { "epoch": 8.523028233530441, "grad_norm": 0.74609375, "learning_rate": 8.39137345840878e-07, "loss": 4.0069, "step": 25586 }, { "epoch": 8.523361372532689, "grad_norm": 0.76171875, "learning_rate": 8.387653494901698e-07, "loss": 3.9931, "step": 25587 }, { "epoch": 8.523694511534938, "grad_norm": 0.765625, "learning_rate": 8.38393430728416e-07, "loss": 3.9465, "step": 25588 }, { "epoch": 8.524027650537187, "grad_norm": 0.7578125, "learning_rate": 8.380215895599475e-07, "loss": 4.0107, "step": 25589 }, { "epoch": 8.524360789539436, "grad_norm": 0.71875, "learning_rate": 8.376498259890997e-07, "loss": 3.9844, "step": 25590 }, { "epoch": 8.524693928541684, "grad_norm": 0.765625, "learning_rate": 8.372781400202009e-07, "loss": 3.9795, "step": 25591 }, { "epoch": 8.525027067543933, "grad_norm": 0.7421875, "learning_rate": 8.369065316575802e-07, "loss": 3.9104, "step": 25592 }, { "epoch": 8.525360206546182, "grad_norm": 0.765625, "learning_rate": 8.365350009055642e-07, "loss": 3.9593, "step": 25593 }, { "epoch": 8.52569334554843, "grad_norm": 0.76953125, "learning_rate": 8.361635477684842e-07, "loss": 3.953, "step": 25594 }, { "epoch": 8.526026484550679, "grad_norm": 0.78515625, "learning_rate": 8.357921722506653e-07, "loss": 3.9743, "step": 25595 }, { "epoch": 8.526359623552928, "grad_norm": 0.8046875, "learning_rate": 8.354208743564323e-07, "loss": 3.9565, "step": 25596 }, { "epoch": 8.526692762555177, "grad_norm": 0.71875, "learning_rate": 8.350496540901118e-07, "loss": 3.8619, "step": 25597 }, { "epoch": 8.527025901557424, "grad_norm": 0.76171875, "learning_rate": 8.346785114560259e-07, "loss": 3.9581, "step": 25598 }, { "epoch": 8.527359040559674, "grad_norm": 0.7734375, "learning_rate": 8.343074464584982e-07, "loss": 4.0118, "step": 25599 }, { "epoch": 8.527692179561923, "grad_norm": 0.734375, "learning_rate": 8.339364591018508e-07, "loss": 3.9043, "step": 25600 }, { "epoch": 8.52802531856417, "grad_norm": 0.76953125, "learning_rate": 8.335655493904038e-07, "loss": 3.9664, "step": 25601 }, { "epoch": 8.52835845756642, "grad_norm": 0.71875, "learning_rate": 8.331947173284801e-07, "loss": 4.0514, "step": 25602 }, { "epoch": 8.528691596568668, "grad_norm": 0.75, "learning_rate": 8.328239629203973e-07, "loss": 4.0204, "step": 25603 }, { "epoch": 8.529024735570918, "grad_norm": 0.76953125, "learning_rate": 8.324532861704745e-07, "loss": 3.9654, "step": 25604 }, { "epoch": 8.529357874573165, "grad_norm": 0.82421875, "learning_rate": 8.32082687083029e-07, "loss": 3.9223, "step": 25605 }, { "epoch": 8.529691013575414, "grad_norm": 0.7734375, "learning_rate": 8.317121656623772e-07, "loss": 3.9738, "step": 25606 }, { "epoch": 8.530024152577663, "grad_norm": 0.796875, "learning_rate": 8.313417219128358e-07, "loss": 3.9556, "step": 25607 }, { "epoch": 8.53035729157991, "grad_norm": 0.765625, "learning_rate": 8.309713558387172e-07, "loss": 3.9736, "step": 25608 }, { "epoch": 8.53069043058216, "grad_norm": 0.7578125, "learning_rate": 8.306010674443388e-07, "loss": 3.9703, "step": 25609 }, { "epoch": 8.53102356958441, "grad_norm": 0.75390625, "learning_rate": 8.30230856734012e-07, "loss": 3.9995, "step": 25610 }, { "epoch": 8.531356708586658, "grad_norm": 0.76171875, "learning_rate": 8.298607237120493e-07, "loss": 3.9793, "step": 25611 }, { "epoch": 8.531689847588906, "grad_norm": 0.76953125, "learning_rate": 8.294906683827597e-07, "loss": 4.0001, "step": 25612 }, { "epoch": 8.532022986591155, "grad_norm": 0.7421875, "learning_rate": 8.291206907504572e-07, "loss": 4.01, "step": 25613 }, { "epoch": 8.532356125593404, "grad_norm": 0.7890625, "learning_rate": 8.28750790819452e-07, "loss": 4.0619, "step": 25614 }, { "epoch": 8.532689264595653, "grad_norm": 0.76953125, "learning_rate": 8.283809685940461e-07, "loss": 3.8992, "step": 25615 }, { "epoch": 8.5330224035979, "grad_norm": 0.78125, "learning_rate": 8.280112240785529e-07, "loss": 3.9578, "step": 25616 }, { "epoch": 8.53335554260015, "grad_norm": 0.76953125, "learning_rate": 8.276415572772772e-07, "loss": 4.0444, "step": 25617 }, { "epoch": 8.5336886816024, "grad_norm": 0.76171875, "learning_rate": 8.272719681945256e-07, "loss": 3.951, "step": 25618 }, { "epoch": 8.534021820604647, "grad_norm": 0.7890625, "learning_rate": 8.269024568346006e-07, "loss": 4.0183, "step": 25619 }, { "epoch": 8.534354959606896, "grad_norm": 0.75, "learning_rate": 8.265330232018095e-07, "loss": 3.9925, "step": 25620 }, { "epoch": 8.534688098609145, "grad_norm": 0.77734375, "learning_rate": 8.261636673004539e-07, "loss": 4.0255, "step": 25621 }, { "epoch": 8.535021237611394, "grad_norm": 0.73046875, "learning_rate": 8.257943891348368e-07, "loss": 4.05, "step": 25622 }, { "epoch": 8.535354376613641, "grad_norm": 0.78125, "learning_rate": 8.254251887092584e-07, "loss": 3.9723, "step": 25623 }, { "epoch": 8.53568751561589, "grad_norm": 0.76171875, "learning_rate": 8.250560660280193e-07, "loss": 3.995, "step": 25624 }, { "epoch": 8.53602065461814, "grad_norm": 0.7734375, "learning_rate": 8.246870210954185e-07, "loss": 4.0256, "step": 25625 }, { "epoch": 8.536353793620389, "grad_norm": 0.75, "learning_rate": 8.243180539157544e-07, "loss": 3.992, "step": 25626 }, { "epoch": 8.536686932622636, "grad_norm": 0.8125, "learning_rate": 8.239491644933276e-07, "loss": 3.949, "step": 25627 }, { "epoch": 8.537020071624886, "grad_norm": 0.7421875, "learning_rate": 8.235803528324312e-07, "loss": 4.0368, "step": 25628 }, { "epoch": 8.537353210627135, "grad_norm": 0.7734375, "learning_rate": 8.232116189373637e-07, "loss": 3.9126, "step": 25629 }, { "epoch": 8.537686349629382, "grad_norm": 0.73046875, "learning_rate": 8.228429628124173e-07, "loss": 3.9228, "step": 25630 }, { "epoch": 8.538019488631631, "grad_norm": 0.75390625, "learning_rate": 8.224743844618888e-07, "loss": 3.912, "step": 25631 }, { "epoch": 8.53835262763388, "grad_norm": 0.76171875, "learning_rate": 8.22105883890072e-07, "loss": 3.98, "step": 25632 }, { "epoch": 8.538685766636128, "grad_norm": 0.71484375, "learning_rate": 8.217374611012543e-07, "loss": 3.9701, "step": 25633 }, { "epoch": 8.539018905638377, "grad_norm": 0.74609375, "learning_rate": 8.213691160997316e-07, "loss": 3.9961, "step": 25634 }, { "epoch": 8.539352044640626, "grad_norm": 0.765625, "learning_rate": 8.21000848889793e-07, "loss": 3.9804, "step": 25635 }, { "epoch": 8.539685183642876, "grad_norm": 0.76953125, "learning_rate": 8.206326594757274e-07, "loss": 4.0497, "step": 25636 }, { "epoch": 8.540018322645123, "grad_norm": 0.76953125, "learning_rate": 8.202645478618231e-07, "loss": 3.9525, "step": 25637 }, { "epoch": 8.540351461647372, "grad_norm": 0.7734375, "learning_rate": 8.198965140523692e-07, "loss": 4.0027, "step": 25638 }, { "epoch": 8.540684600649621, "grad_norm": 0.7421875, "learning_rate": 8.195285580516524e-07, "loss": 3.9998, "step": 25639 }, { "epoch": 8.54101773965187, "grad_norm": 0.7421875, "learning_rate": 8.191606798639573e-07, "loss": 3.9923, "step": 25640 }, { "epoch": 8.541350878654118, "grad_norm": 0.78125, "learning_rate": 8.187928794935707e-07, "loss": 3.9878, "step": 25641 }, { "epoch": 8.541684017656367, "grad_norm": 0.76171875, "learning_rate": 8.184251569447748e-07, "loss": 3.9401, "step": 25642 }, { "epoch": 8.542017156658616, "grad_norm": 0.7578125, "learning_rate": 8.180575122218539e-07, "loss": 4.0389, "step": 25643 }, { "epoch": 8.542350295660864, "grad_norm": 0.734375, "learning_rate": 8.176899453290895e-07, "loss": 4.0082, "step": 25644 }, { "epoch": 8.542683434663113, "grad_norm": 0.78125, "learning_rate": 8.173224562707624e-07, "loss": 3.9467, "step": 25645 }, { "epoch": 8.543016573665362, "grad_norm": 0.734375, "learning_rate": 8.169550450511548e-07, "loss": 4.0118, "step": 25646 }, { "epoch": 8.543349712667611, "grad_norm": 0.7578125, "learning_rate": 8.16587711674546e-07, "loss": 4.0245, "step": 25647 }, { "epoch": 8.543682851669859, "grad_norm": 0.7578125, "learning_rate": 8.162204561452142e-07, "loss": 3.9508, "step": 25648 }, { "epoch": 8.544015990672108, "grad_norm": 0.7578125, "learning_rate": 8.158532784674349e-07, "loss": 4.0837, "step": 25649 }, { "epoch": 8.544349129674357, "grad_norm": 0.71484375, "learning_rate": 8.154861786454909e-07, "loss": 4.0118, "step": 25650 }, { "epoch": 8.544682268676606, "grad_norm": 0.75, "learning_rate": 8.151191566836528e-07, "loss": 4.0586, "step": 25651 }, { "epoch": 8.545015407678854, "grad_norm": 0.78125, "learning_rate": 8.147522125861944e-07, "loss": 3.9344, "step": 25652 }, { "epoch": 8.545348546681103, "grad_norm": 0.74609375, "learning_rate": 8.143853463573953e-07, "loss": 4.0359, "step": 25653 }, { "epoch": 8.545681685683352, "grad_norm": 0.796875, "learning_rate": 8.14018558001525e-07, "loss": 4.0218, "step": 25654 }, { "epoch": 8.5460148246856, "grad_norm": 0.765625, "learning_rate": 8.136518475228577e-07, "loss": 4.0092, "step": 25655 }, { "epoch": 8.546347963687849, "grad_norm": 0.734375, "learning_rate": 8.132852149256617e-07, "loss": 4.0153, "step": 25656 }, { "epoch": 8.546681102690098, "grad_norm": 0.80078125, "learning_rate": 8.12918660214211e-07, "loss": 3.9843, "step": 25657 }, { "epoch": 8.547014241692347, "grad_norm": 0.765625, "learning_rate": 8.125521833927732e-07, "loss": 4.0472, "step": 25658 }, { "epoch": 8.547347380694594, "grad_norm": 0.80859375, "learning_rate": 8.121857844656181e-07, "loss": 4.0063, "step": 25659 }, { "epoch": 8.547680519696844, "grad_norm": 0.7421875, "learning_rate": 8.11819463437013e-07, "loss": 3.9606, "step": 25660 }, { "epoch": 8.548013658699093, "grad_norm": 0.79296875, "learning_rate": 8.114532203112246e-07, "loss": 4.042, "step": 25661 }, { "epoch": 8.54834679770134, "grad_norm": 0.796875, "learning_rate": 8.110870550925178e-07, "loss": 3.9941, "step": 25662 }, { "epoch": 8.54867993670359, "grad_norm": 0.72265625, "learning_rate": 8.107209677851585e-07, "loss": 3.911, "step": 25663 }, { "epoch": 8.549013075705838, "grad_norm": 0.8046875, "learning_rate": 8.103549583934114e-07, "loss": 3.9454, "step": 25664 }, { "epoch": 8.549346214708088, "grad_norm": 0.73046875, "learning_rate": 8.099890269215398e-07, "loss": 3.9949, "step": 25665 }, { "epoch": 8.549679353710335, "grad_norm": 0.765625, "learning_rate": 8.096231733738055e-07, "loss": 4.0979, "step": 25666 }, { "epoch": 8.550012492712584, "grad_norm": 0.71875, "learning_rate": 8.092573977544682e-07, "loss": 4.0099, "step": 25667 }, { "epoch": 8.550345631714833, "grad_norm": 0.82421875, "learning_rate": 8.088917000677937e-07, "loss": 4.0371, "step": 25668 }, { "epoch": 8.55067877071708, "grad_norm": 0.7890625, "learning_rate": 8.085260803180361e-07, "loss": 3.941, "step": 25669 }, { "epoch": 8.55101190971933, "grad_norm": 0.78125, "learning_rate": 8.081605385094537e-07, "loss": 3.9866, "step": 25670 }, { "epoch": 8.55134504872158, "grad_norm": 0.78125, "learning_rate": 8.07795074646309e-07, "loss": 4.0132, "step": 25671 }, { "epoch": 8.551678187723828, "grad_norm": 0.77734375, "learning_rate": 8.074296887328559e-07, "loss": 3.9749, "step": 25672 }, { "epoch": 8.552011326726076, "grad_norm": 0.8046875, "learning_rate": 8.070643807733511e-07, "loss": 4.1018, "step": 25673 }, { "epoch": 8.552344465728325, "grad_norm": 0.77734375, "learning_rate": 8.066991507720478e-07, "loss": 4.057, "step": 25674 }, { "epoch": 8.552677604730574, "grad_norm": 0.77734375, "learning_rate": 8.063339987332036e-07, "loss": 3.969, "step": 25675 }, { "epoch": 8.553010743732823, "grad_norm": 0.77734375, "learning_rate": 8.059689246610691e-07, "loss": 3.9841, "step": 25676 }, { "epoch": 8.55334388273507, "grad_norm": 0.7734375, "learning_rate": 8.056039285598976e-07, "loss": 3.971, "step": 25677 }, { "epoch": 8.55367702173732, "grad_norm": 0.796875, "learning_rate": 8.052390104339408e-07, "loss": 3.9266, "step": 25678 }, { "epoch": 8.554010160739569, "grad_norm": 0.7421875, "learning_rate": 8.048741702874485e-07, "loss": 4.0064, "step": 25679 }, { "epoch": 8.554343299741817, "grad_norm": 0.7734375, "learning_rate": 8.045094081246698e-07, "loss": 4.0139, "step": 25680 }, { "epoch": 8.554676438744066, "grad_norm": 0.78515625, "learning_rate": 8.041447239498531e-07, "loss": 3.8883, "step": 25681 }, { "epoch": 8.555009577746315, "grad_norm": 0.74609375, "learning_rate": 8.037801177672491e-07, "loss": 3.9253, "step": 25682 }, { "epoch": 8.555342716748564, "grad_norm": 0.77734375, "learning_rate": 8.034155895811026e-07, "loss": 3.9918, "step": 25683 }, { "epoch": 8.555675855750811, "grad_norm": 0.796875, "learning_rate": 8.030511393956588e-07, "loss": 3.9861, "step": 25684 }, { "epoch": 8.55600899475306, "grad_norm": 0.734375, "learning_rate": 8.026867672151633e-07, "loss": 4.0137, "step": 25685 }, { "epoch": 8.55634213375531, "grad_norm": 0.77734375, "learning_rate": 8.023224730438628e-07, "loss": 4.0146, "step": 25686 }, { "epoch": 8.556675272757559, "grad_norm": 0.73828125, "learning_rate": 8.019582568859971e-07, "loss": 4.0378, "step": 25687 }, { "epoch": 8.557008411759806, "grad_norm": 0.7421875, "learning_rate": 8.015941187458087e-07, "loss": 3.9949, "step": 25688 }, { "epoch": 8.557341550762056, "grad_norm": 0.7265625, "learning_rate": 8.012300586275409e-07, "loss": 4.0259, "step": 25689 }, { "epoch": 8.557674689764305, "grad_norm": 0.74609375, "learning_rate": 8.008660765354337e-07, "loss": 3.9606, "step": 25690 }, { "epoch": 8.558007828766552, "grad_norm": 0.734375, "learning_rate": 8.005021724737268e-07, "loss": 4.0121, "step": 25691 }, { "epoch": 8.558340967768801, "grad_norm": 0.765625, "learning_rate": 8.00138346446658e-07, "loss": 3.9678, "step": 25692 }, { "epoch": 8.55867410677105, "grad_norm": 0.78515625, "learning_rate": 7.997745984584645e-07, "loss": 3.9743, "step": 25693 }, { "epoch": 8.559007245773298, "grad_norm": 0.80078125, "learning_rate": 7.994109285133855e-07, "loss": 3.9107, "step": 25694 }, { "epoch": 8.559340384775547, "grad_norm": 0.72265625, "learning_rate": 7.990473366156568e-07, "loss": 4.0234, "step": 25695 }, { "epoch": 8.559673523777796, "grad_norm": 0.80859375, "learning_rate": 7.986838227695098e-07, "loss": 3.9487, "step": 25696 }, { "epoch": 8.560006662780046, "grad_norm": 0.8125, "learning_rate": 7.98320386979183e-07, "loss": 3.996, "step": 25697 }, { "epoch": 8.560339801782293, "grad_norm": 0.7734375, "learning_rate": 7.97957029248908e-07, "loss": 3.9532, "step": 25698 }, { "epoch": 8.560672940784542, "grad_norm": 0.75390625, "learning_rate": 7.975937495829164e-07, "loss": 4.0636, "step": 25699 }, { "epoch": 8.561006079786791, "grad_norm": 0.765625, "learning_rate": 7.972305479854389e-07, "loss": 3.9707, "step": 25700 }, { "epoch": 8.56133921878904, "grad_norm": 0.734375, "learning_rate": 7.968674244607088e-07, "loss": 3.9908, "step": 25701 }, { "epoch": 8.561672357791288, "grad_norm": 0.765625, "learning_rate": 7.965043790129544e-07, "loss": 4.0164, "step": 25702 }, { "epoch": 8.562005496793537, "grad_norm": 0.80078125, "learning_rate": 7.961414116464042e-07, "loss": 3.9609, "step": 25703 }, { "epoch": 8.562338635795786, "grad_norm": 0.73046875, "learning_rate": 7.957785223652861e-07, "loss": 4.0067, "step": 25704 }, { "epoch": 8.562671774798034, "grad_norm": 0.8203125, "learning_rate": 7.954157111738269e-07, "loss": 3.9932, "step": 25705 }, { "epoch": 8.563004913800283, "grad_norm": 0.7734375, "learning_rate": 7.950529780762525e-07, "loss": 4.0153, "step": 25706 }, { "epoch": 8.563338052802532, "grad_norm": 0.7265625, "learning_rate": 7.946903230767869e-07, "loss": 3.9673, "step": 25707 }, { "epoch": 8.563671191804781, "grad_norm": 0.75390625, "learning_rate": 7.943277461796558e-07, "loss": 3.9774, "step": 25708 }, { "epoch": 8.564004330807029, "grad_norm": 0.75, "learning_rate": 7.939652473890827e-07, "loss": 4.0151, "step": 25709 }, { "epoch": 8.564337469809278, "grad_norm": 0.76171875, "learning_rate": 7.93602826709289e-07, "loss": 3.9775, "step": 25710 }, { "epoch": 8.564670608811527, "grad_norm": 0.78515625, "learning_rate": 7.932404841444957e-07, "loss": 3.9263, "step": 25711 }, { "epoch": 8.565003747813776, "grad_norm": 0.7890625, "learning_rate": 7.928782196989243e-07, "loss": 3.9188, "step": 25712 }, { "epoch": 8.565336886816024, "grad_norm": 0.765625, "learning_rate": 7.925160333767964e-07, "loss": 4.0018, "step": 25713 }, { "epoch": 8.565670025818273, "grad_norm": 0.75390625, "learning_rate": 7.921539251823245e-07, "loss": 3.9601, "step": 25714 }, { "epoch": 8.566003164820522, "grad_norm": 0.78125, "learning_rate": 7.91791895119732e-07, "loss": 3.9411, "step": 25715 }, { "epoch": 8.56633630382277, "grad_norm": 0.765625, "learning_rate": 7.914299431932345e-07, "loss": 3.9867, "step": 25716 }, { "epoch": 8.566669442825019, "grad_norm": 0.76171875, "learning_rate": 7.910680694070471e-07, "loss": 3.8975, "step": 25717 }, { "epoch": 8.567002581827268, "grad_norm": 0.74609375, "learning_rate": 7.907062737653831e-07, "loss": 4.0608, "step": 25718 }, { "epoch": 8.567335720829517, "grad_norm": 0.75390625, "learning_rate": 7.903445562724607e-07, "loss": 3.9702, "step": 25719 }, { "epoch": 8.567668859831764, "grad_norm": 0.75, "learning_rate": 7.899829169324915e-07, "loss": 4.0423, "step": 25720 }, { "epoch": 8.568001998834013, "grad_norm": 0.7890625, "learning_rate": 7.896213557496865e-07, "loss": 4.0202, "step": 25721 }, { "epoch": 8.568335137836263, "grad_norm": 0.80078125, "learning_rate": 7.892598727282587e-07, "loss": 3.929, "step": 25722 }, { "epoch": 8.56866827683851, "grad_norm": 0.734375, "learning_rate": 7.888984678724176e-07, "loss": 4.0128, "step": 25723 }, { "epoch": 8.56900141584076, "grad_norm": 0.796875, "learning_rate": 7.885371411863737e-07, "loss": 3.9904, "step": 25724 }, { "epoch": 8.569334554843008, "grad_norm": 0.765625, "learning_rate": 7.881758926743329e-07, "loss": 3.9687, "step": 25725 }, { "epoch": 8.569667693845258, "grad_norm": 0.71875, "learning_rate": 7.878147223405077e-07, "loss": 3.961, "step": 25726 }, { "epoch": 8.570000832847505, "grad_norm": 0.78125, "learning_rate": 7.874536301891014e-07, "loss": 3.9949, "step": 25727 }, { "epoch": 8.570333971849754, "grad_norm": 0.7578125, "learning_rate": 7.870926162243214e-07, "loss": 3.9436, "step": 25728 }, { "epoch": 8.570667110852003, "grad_norm": 0.76953125, "learning_rate": 7.86731680450371e-07, "loss": 3.9506, "step": 25729 }, { "epoch": 8.57100024985425, "grad_norm": 0.765625, "learning_rate": 7.863708228714561e-07, "loss": 3.982, "step": 25730 }, { "epoch": 8.5713333888565, "grad_norm": 0.76953125, "learning_rate": 7.860100434917816e-07, "loss": 3.9413, "step": 25731 }, { "epoch": 8.57166652785875, "grad_norm": 0.74609375, "learning_rate": 7.856493423155451e-07, "loss": 3.9543, "step": 25732 }, { "epoch": 8.571999666860998, "grad_norm": 0.765625, "learning_rate": 7.852887193469513e-07, "loss": 3.9994, "step": 25733 }, { "epoch": 8.572332805863246, "grad_norm": 0.7734375, "learning_rate": 7.849281745902004e-07, "loss": 3.9513, "step": 25734 }, { "epoch": 8.572665944865495, "grad_norm": 0.75390625, "learning_rate": 7.845677080494906e-07, "loss": 4.013, "step": 25735 }, { "epoch": 8.572999083867744, "grad_norm": 0.71484375, "learning_rate": 7.842073197290212e-07, "loss": 3.9871, "step": 25736 }, { "epoch": 8.573332222869993, "grad_norm": 0.75390625, "learning_rate": 7.838470096329902e-07, "loss": 4.0284, "step": 25737 }, { "epoch": 8.57366536187224, "grad_norm": 0.74609375, "learning_rate": 7.834867777655955e-07, "loss": 3.9408, "step": 25738 }, { "epoch": 8.57399850087449, "grad_norm": 0.75390625, "learning_rate": 7.831266241310317e-07, "loss": 4.0903, "step": 25739 }, { "epoch": 8.574331639876739, "grad_norm": 0.76171875, "learning_rate": 7.827665487334931e-07, "loss": 3.958, "step": 25740 }, { "epoch": 8.574664778878986, "grad_norm": 0.6953125, "learning_rate": 7.824065515771755e-07, "loss": 3.9656, "step": 25741 }, { "epoch": 8.574997917881236, "grad_norm": 0.73046875, "learning_rate": 7.820466326662715e-07, "loss": 4.0247, "step": 25742 }, { "epoch": 8.575331056883485, "grad_norm": 0.796875, "learning_rate": 7.816867920049725e-07, "loss": 3.9652, "step": 25743 }, { "epoch": 8.575664195885734, "grad_norm": 0.73828125, "learning_rate": 7.813270295974695e-07, "loss": 4.0597, "step": 25744 }, { "epoch": 8.575997334887981, "grad_norm": 0.79296875, "learning_rate": 7.809673454479557e-07, "loss": 4.0114, "step": 25745 }, { "epoch": 8.57633047389023, "grad_norm": 0.76171875, "learning_rate": 7.806077395606187e-07, "loss": 3.9401, "step": 25746 }, { "epoch": 8.57666361289248, "grad_norm": 0.76171875, "learning_rate": 7.802482119396467e-07, "loss": 4.0027, "step": 25747 }, { "epoch": 8.576996751894729, "grad_norm": 0.77734375, "learning_rate": 7.798887625892273e-07, "loss": 4.0823, "step": 25748 }, { "epoch": 8.577329890896976, "grad_norm": 0.7421875, "learning_rate": 7.795293915135512e-07, "loss": 4.0489, "step": 25749 }, { "epoch": 8.577663029899226, "grad_norm": 0.78515625, "learning_rate": 7.791700987167993e-07, "loss": 3.9275, "step": 25750 }, { "epoch": 8.577996168901475, "grad_norm": 0.7265625, "learning_rate": 7.788108842031566e-07, "loss": 3.9633, "step": 25751 }, { "epoch": 8.578329307903722, "grad_norm": 0.7265625, "learning_rate": 7.784517479768113e-07, "loss": 3.9719, "step": 25752 }, { "epoch": 8.578662446905971, "grad_norm": 0.74609375, "learning_rate": 7.780926900419436e-07, "loss": 4.0559, "step": 25753 }, { "epoch": 8.57899558590822, "grad_norm": 0.734375, "learning_rate": 7.777337104027365e-07, "loss": 3.9843, "step": 25754 }, { "epoch": 8.57932872491047, "grad_norm": 0.77734375, "learning_rate": 7.773748090633703e-07, "loss": 3.9824, "step": 25755 }, { "epoch": 8.579661863912717, "grad_norm": 0.75390625, "learning_rate": 7.770159860280274e-07, "loss": 3.9883, "step": 25756 }, { "epoch": 8.579995002914966, "grad_norm": 0.75390625, "learning_rate": 7.766572413008869e-07, "loss": 4.0218, "step": 25757 }, { "epoch": 8.580328141917215, "grad_norm": 0.7734375, "learning_rate": 7.762985748861262e-07, "loss": 4.0692, "step": 25758 }, { "epoch": 8.580661280919463, "grad_norm": 0.8203125, "learning_rate": 7.759399867879239e-07, "loss": 3.9517, "step": 25759 }, { "epoch": 8.580994419921712, "grad_norm": 0.80859375, "learning_rate": 7.755814770104564e-07, "loss": 3.9657, "step": 25760 }, { "epoch": 8.581327558923961, "grad_norm": 0.78515625, "learning_rate": 7.752230455578996e-07, "loss": 4.0038, "step": 25761 }, { "epoch": 8.58166069792621, "grad_norm": 0.7421875, "learning_rate": 7.748646924344269e-07, "loss": 4.0192, "step": 25762 }, { "epoch": 8.581993836928458, "grad_norm": 0.765625, "learning_rate": 7.745064176442157e-07, "loss": 3.9868, "step": 25763 }, { "epoch": 8.582326975930707, "grad_norm": 0.7890625, "learning_rate": 7.741482211914369e-07, "loss": 3.9276, "step": 25764 }, { "epoch": 8.582660114932956, "grad_norm": 0.7578125, "learning_rate": 7.737901030802638e-07, "loss": 3.9352, "step": 25765 }, { "epoch": 8.582993253935204, "grad_norm": 0.7578125, "learning_rate": 7.734320633148647e-07, "loss": 3.9105, "step": 25766 }, { "epoch": 8.583326392937453, "grad_norm": 0.77734375, "learning_rate": 7.730741018994156e-07, "loss": 4.0313, "step": 25767 }, { "epoch": 8.583659531939702, "grad_norm": 0.80078125, "learning_rate": 7.727162188380821e-07, "loss": 4.0349, "step": 25768 }, { "epoch": 8.583992670941951, "grad_norm": 0.78515625, "learning_rate": 7.72358414135031e-07, "loss": 3.9731, "step": 25769 }, { "epoch": 8.584325809944199, "grad_norm": 0.77734375, "learning_rate": 7.720006877944338e-07, "loss": 3.9534, "step": 25770 }, { "epoch": 8.584658948946448, "grad_norm": 0.7578125, "learning_rate": 7.716430398204557e-07, "loss": 4.0125, "step": 25771 }, { "epoch": 8.584992087948697, "grad_norm": 0.75390625, "learning_rate": 7.712854702172634e-07, "loss": 3.9826, "step": 25772 }, { "epoch": 8.585325226950946, "grad_norm": 0.74609375, "learning_rate": 7.709279789890191e-07, "loss": 3.9368, "step": 25773 }, { "epoch": 8.585658365953194, "grad_norm": 0.78125, "learning_rate": 7.705705661398899e-07, "loss": 4.0044, "step": 25774 }, { "epoch": 8.585991504955443, "grad_norm": 0.83203125, "learning_rate": 7.702132316740379e-07, "loss": 3.9932, "step": 25775 }, { "epoch": 8.586324643957692, "grad_norm": 0.77734375, "learning_rate": 7.69855975595625e-07, "loss": 3.9987, "step": 25776 }, { "epoch": 8.58665778295994, "grad_norm": 0.7734375, "learning_rate": 7.694987979088119e-07, "loss": 3.9523, "step": 25777 }, { "epoch": 8.586990921962188, "grad_norm": 0.734375, "learning_rate": 7.691416986177604e-07, "loss": 3.9668, "step": 25778 }, { "epoch": 8.587324060964438, "grad_norm": 0.7578125, "learning_rate": 7.687846777266289e-07, "loss": 3.9656, "step": 25779 }, { "epoch": 8.587657199966687, "grad_norm": 0.76171875, "learning_rate": 7.684277352395746e-07, "loss": 3.9448, "step": 25780 }, { "epoch": 8.587990338968934, "grad_norm": 0.734375, "learning_rate": 7.680708711607579e-07, "loss": 4.012, "step": 25781 }, { "epoch": 8.588323477971183, "grad_norm": 0.75, "learning_rate": 7.677140854943351e-07, "loss": 4.0586, "step": 25782 }, { "epoch": 8.588656616973433, "grad_norm": 0.7421875, "learning_rate": 7.673573782444599e-07, "loss": 4.0195, "step": 25783 }, { "epoch": 8.58898975597568, "grad_norm": 0.76953125, "learning_rate": 7.670007494152879e-07, "loss": 3.9729, "step": 25784 }, { "epoch": 8.58932289497793, "grad_norm": 0.79296875, "learning_rate": 7.666441990109757e-07, "loss": 4.0059, "step": 25785 }, { "epoch": 8.589656033980178, "grad_norm": 0.8203125, "learning_rate": 7.662877270356727e-07, "loss": 3.9743, "step": 25786 }, { "epoch": 8.589989172982428, "grad_norm": 0.7578125, "learning_rate": 7.659313334935331e-07, "loss": 3.9951, "step": 25787 }, { "epoch": 8.590322311984675, "grad_norm": 0.75390625, "learning_rate": 7.655750183887059e-07, "loss": 4.0005, "step": 25788 }, { "epoch": 8.590655450986924, "grad_norm": 0.79296875, "learning_rate": 7.652187817253437e-07, "loss": 3.9601, "step": 25789 }, { "epoch": 8.590988589989173, "grad_norm": 0.75390625, "learning_rate": 7.648626235075956e-07, "loss": 4.0061, "step": 25790 }, { "epoch": 8.59132172899142, "grad_norm": 0.80078125, "learning_rate": 7.645065437396101e-07, "loss": 3.9412, "step": 25791 }, { "epoch": 8.59165486799367, "grad_norm": 0.7421875, "learning_rate": 7.641505424255321e-07, "loss": 3.9845, "step": 25792 }, { "epoch": 8.59198800699592, "grad_norm": 0.83984375, "learning_rate": 7.637946195695125e-07, "loss": 3.9461, "step": 25793 }, { "epoch": 8.592321145998168, "grad_norm": 0.82421875, "learning_rate": 7.634387751756964e-07, "loss": 3.933, "step": 25794 }, { "epoch": 8.592654285000416, "grad_norm": 0.78515625, "learning_rate": 7.630830092482244e-07, "loss": 3.9846, "step": 25795 }, { "epoch": 8.592987424002665, "grad_norm": 0.74609375, "learning_rate": 7.627273217912442e-07, "loss": 4.0048, "step": 25796 }, { "epoch": 8.593320563004914, "grad_norm": 0.73828125, "learning_rate": 7.623717128088983e-07, "loss": 3.9541, "step": 25797 }, { "epoch": 8.593653702007163, "grad_norm": 0.81640625, "learning_rate": 7.620161823053276e-07, "loss": 3.9953, "step": 25798 }, { "epoch": 8.59398684100941, "grad_norm": 0.796875, "learning_rate": 7.616607302846729e-07, "loss": 3.9462, "step": 25799 }, { "epoch": 8.59431998001166, "grad_norm": 0.76171875, "learning_rate": 7.613053567510767e-07, "loss": 3.9852, "step": 25800 }, { "epoch": 8.594653119013909, "grad_norm": 0.78125, "learning_rate": 7.609500617086773e-07, "loss": 4.0126, "step": 25801 }, { "epoch": 8.594986258016156, "grad_norm": 0.78515625, "learning_rate": 7.605948451616132e-07, "loss": 3.9903, "step": 25802 }, { "epoch": 8.595319397018406, "grad_norm": 0.74609375, "learning_rate": 7.60239707114021e-07, "loss": 4.0254, "step": 25803 }, { "epoch": 8.595652536020655, "grad_norm": 0.78125, "learning_rate": 7.598846475700383e-07, "loss": 3.9617, "step": 25804 }, { "epoch": 8.595985675022904, "grad_norm": 0.78515625, "learning_rate": 7.595296665338008e-07, "loss": 3.9434, "step": 25805 }, { "epoch": 8.596318814025151, "grad_norm": 0.75, "learning_rate": 7.591747640094413e-07, "loss": 3.9082, "step": 25806 }, { "epoch": 8.5966519530274, "grad_norm": 0.74609375, "learning_rate": 7.588199400010961e-07, "loss": 3.9075, "step": 25807 }, { "epoch": 8.59698509202965, "grad_norm": 0.76953125, "learning_rate": 7.584651945128973e-07, "loss": 3.9294, "step": 25808 }, { "epoch": 8.597318231031899, "grad_norm": 0.76953125, "learning_rate": 7.581105275489772e-07, "loss": 3.956, "step": 25809 }, { "epoch": 8.597651370034146, "grad_norm": 0.7578125, "learning_rate": 7.577559391134642e-07, "loss": 3.9265, "step": 25810 }, { "epoch": 8.597984509036396, "grad_norm": 0.78515625, "learning_rate": 7.57401429210495e-07, "loss": 3.937, "step": 25811 }, { "epoch": 8.598317648038645, "grad_norm": 0.74609375, "learning_rate": 7.570469978441921e-07, "loss": 3.9684, "step": 25812 }, { "epoch": 8.598650787040892, "grad_norm": 0.7578125, "learning_rate": 7.566926450186856e-07, "loss": 4.0183, "step": 25813 }, { "epoch": 8.598983926043141, "grad_norm": 0.77734375, "learning_rate": 7.563383707381055e-07, "loss": 3.9747, "step": 25814 }, { "epoch": 8.59931706504539, "grad_norm": 0.73828125, "learning_rate": 7.55984175006576e-07, "loss": 4.0115, "step": 25815 }, { "epoch": 8.59965020404764, "grad_norm": 0.7734375, "learning_rate": 7.556300578282238e-07, "loss": 3.9493, "step": 25816 }, { "epoch": 8.599983343049887, "grad_norm": 0.75, "learning_rate": 7.552760192071715e-07, "loss": 4.0087, "step": 25817 }, { "epoch": 8.600316482052136, "grad_norm": 0.73828125, "learning_rate": 7.549220591475464e-07, "loss": 4.0365, "step": 25818 }, { "epoch": 8.600649621054385, "grad_norm": 0.765625, "learning_rate": 7.545681776534688e-07, "loss": 3.9921, "step": 25819 }, { "epoch": 8.600982760056633, "grad_norm": 0.78515625, "learning_rate": 7.54214374729062e-07, "loss": 3.9552, "step": 25820 }, { "epoch": 8.601315899058882, "grad_norm": 0.7421875, "learning_rate": 7.538606503784459e-07, "loss": 3.9857, "step": 25821 }, { "epoch": 8.601649038061131, "grad_norm": 0.890625, "learning_rate": 7.535070046057416e-07, "loss": 3.9358, "step": 25822 }, { "epoch": 8.60198217706338, "grad_norm": 0.7734375, "learning_rate": 7.531534374150672e-07, "loss": 3.9939, "step": 25823 }, { "epoch": 8.602315316065628, "grad_norm": 0.73046875, "learning_rate": 7.527999488105406e-07, "loss": 3.9811, "step": 25824 }, { "epoch": 8.602648455067877, "grad_norm": 0.7421875, "learning_rate": 7.524465387962814e-07, "loss": 3.9938, "step": 25825 }, { "epoch": 8.602981594070126, "grad_norm": 0.75390625, "learning_rate": 7.52093207376405e-07, "loss": 4.0626, "step": 25826 }, { "epoch": 8.603314733072374, "grad_norm": 0.76953125, "learning_rate": 7.517399545550272e-07, "loss": 4.0034, "step": 25827 }, { "epoch": 8.603647872074623, "grad_norm": 0.75, "learning_rate": 7.513867803362604e-07, "loss": 4.0156, "step": 25828 }, { "epoch": 8.603981011076872, "grad_norm": 0.7734375, "learning_rate": 7.51033684724223e-07, "loss": 3.9193, "step": 25829 }, { "epoch": 8.604314150079121, "grad_norm": 0.74609375, "learning_rate": 7.506806677230235e-07, "loss": 4.0137, "step": 25830 }, { "epoch": 8.604647289081369, "grad_norm": 0.78125, "learning_rate": 7.503277293367753e-07, "loss": 3.994, "step": 25831 }, { "epoch": 8.604980428083618, "grad_norm": 0.75390625, "learning_rate": 7.499748695695874e-07, "loss": 4.0811, "step": 25832 }, { "epoch": 8.605313567085867, "grad_norm": 0.75, "learning_rate": 7.496220884255733e-07, "loss": 4.038, "step": 25833 }, { "epoch": 8.605646706088116, "grad_norm": 0.77734375, "learning_rate": 7.492693859088406e-07, "loss": 3.9323, "step": 25834 }, { "epoch": 8.605979845090364, "grad_norm": 0.7578125, "learning_rate": 7.489167620234977e-07, "loss": 4.0161, "step": 25835 }, { "epoch": 8.606312984092613, "grad_norm": 0.765625, "learning_rate": 7.485642167736504e-07, "loss": 4.0283, "step": 25836 }, { "epoch": 8.606646123094862, "grad_norm": 0.75390625, "learning_rate": 7.482117501634078e-07, "loss": 4.0086, "step": 25837 }, { "epoch": 8.60697926209711, "grad_norm": 0.77734375, "learning_rate": 7.478593621968752e-07, "loss": 4.015, "step": 25838 }, { "epoch": 8.607312401099358, "grad_norm": 0.7578125, "learning_rate": 7.475070528781533e-07, "loss": 4.0293, "step": 25839 }, { "epoch": 8.607645540101608, "grad_norm": 0.79296875, "learning_rate": 7.471548222113497e-07, "loss": 3.9653, "step": 25840 }, { "epoch": 8.607978679103857, "grad_norm": 0.76953125, "learning_rate": 7.468026702005654e-07, "loss": 4.0476, "step": 25841 }, { "epoch": 8.608311818106104, "grad_norm": 0.765625, "learning_rate": 7.464505968499027e-07, "loss": 3.9379, "step": 25842 }, { "epoch": 8.608644957108353, "grad_norm": 0.77734375, "learning_rate": 7.460986021634611e-07, "loss": 3.9511, "step": 25843 }, { "epoch": 8.608978096110603, "grad_norm": 0.77734375, "learning_rate": 7.457466861453438e-07, "loss": 4.0003, "step": 25844 }, { "epoch": 8.60931123511285, "grad_norm": 0.80078125, "learning_rate": 7.453948487996468e-07, "loss": 3.9787, "step": 25845 }, { "epoch": 8.6096443741151, "grad_norm": 0.75390625, "learning_rate": 7.4504309013047e-07, "loss": 4.059, "step": 25846 }, { "epoch": 8.609977513117348, "grad_norm": 0.76171875, "learning_rate": 7.446914101419103e-07, "loss": 4.0049, "step": 25847 }, { "epoch": 8.610310652119598, "grad_norm": 0.73828125, "learning_rate": 7.443398088380635e-07, "loss": 4.0607, "step": 25848 }, { "epoch": 8.610643791121845, "grad_norm": 0.74609375, "learning_rate": 7.439882862230246e-07, "loss": 4.0276, "step": 25849 }, { "epoch": 8.610976930124094, "grad_norm": 0.79296875, "learning_rate": 7.43636842300888e-07, "loss": 3.9442, "step": 25850 }, { "epoch": 8.611310069126343, "grad_norm": 0.765625, "learning_rate": 7.432854770757494e-07, "loss": 4.1091, "step": 25851 }, { "epoch": 8.61164320812859, "grad_norm": 0.74609375, "learning_rate": 7.42934190551699e-07, "loss": 3.9482, "step": 25852 }, { "epoch": 8.61197634713084, "grad_norm": 0.77734375, "learning_rate": 7.42582982732831e-07, "loss": 3.9395, "step": 25853 }, { "epoch": 8.612309486133089, "grad_norm": 0.703125, "learning_rate": 7.42231853623232e-07, "loss": 4.0536, "step": 25854 }, { "epoch": 8.612642625135338, "grad_norm": 0.73046875, "learning_rate": 7.418808032269963e-07, "loss": 4.0177, "step": 25855 }, { "epoch": 8.612975764137586, "grad_norm": 0.765625, "learning_rate": 7.415298315482133e-07, "loss": 3.9146, "step": 25856 }, { "epoch": 8.613308903139835, "grad_norm": 0.7578125, "learning_rate": 7.411789385909654e-07, "loss": 4.0035, "step": 25857 }, { "epoch": 8.613642042142084, "grad_norm": 0.7890625, "learning_rate": 7.408281243593442e-07, "loss": 3.9919, "step": 25858 }, { "epoch": 8.613975181144333, "grad_norm": 0.78125, "learning_rate": 7.40477388857436e-07, "loss": 3.9857, "step": 25859 }, { "epoch": 8.61430832014658, "grad_norm": 0.7578125, "learning_rate": 7.401267320893246e-07, "loss": 4.0185, "step": 25860 }, { "epoch": 8.61464145914883, "grad_norm": 0.81640625, "learning_rate": 7.397761540590936e-07, "loss": 3.9704, "step": 25861 }, { "epoch": 8.614974598151079, "grad_norm": 0.76953125, "learning_rate": 7.394256547708297e-07, "loss": 3.995, "step": 25862 }, { "epoch": 8.615307737153326, "grad_norm": 0.75, "learning_rate": 7.39075234228613e-07, "loss": 3.9853, "step": 25863 }, { "epoch": 8.615640876155576, "grad_norm": 0.796875, "learning_rate": 7.387248924365262e-07, "loss": 3.9481, "step": 25864 }, { "epoch": 8.615974015157825, "grad_norm": 0.83984375, "learning_rate": 7.38374629398649e-07, "loss": 4.0356, "step": 25865 }, { "epoch": 8.616307154160074, "grad_norm": 0.77734375, "learning_rate": 7.380244451190618e-07, "loss": 3.9984, "step": 25866 }, { "epoch": 8.616640293162321, "grad_norm": 0.74609375, "learning_rate": 7.376743396018429e-07, "loss": 4.0545, "step": 25867 }, { "epoch": 8.61697343216457, "grad_norm": 0.76953125, "learning_rate": 7.373243128510698e-07, "loss": 4.0476, "step": 25868 }, { "epoch": 8.61730657116682, "grad_norm": 0.7734375, "learning_rate": 7.369743648708227e-07, "loss": 3.9493, "step": 25869 }, { "epoch": 8.617639710169069, "grad_norm": 0.79296875, "learning_rate": 7.36624495665175e-07, "loss": 3.9918, "step": 25870 }, { "epoch": 8.617972849171316, "grad_norm": 0.74609375, "learning_rate": 7.362747052382015e-07, "loss": 3.9238, "step": 25871 }, { "epoch": 8.618305988173566, "grad_norm": 0.78125, "learning_rate": 7.359249935939777e-07, "loss": 3.9989, "step": 25872 }, { "epoch": 8.618639127175815, "grad_norm": 0.75390625, "learning_rate": 7.355753607365768e-07, "loss": 3.9303, "step": 25873 }, { "epoch": 8.618972266178062, "grad_norm": 0.77734375, "learning_rate": 7.352258066700738e-07, "loss": 3.9709, "step": 25874 }, { "epoch": 8.619305405180311, "grad_norm": 0.84375, "learning_rate": 7.348763313985354e-07, "loss": 3.9659, "step": 25875 }, { "epoch": 8.61963854418256, "grad_norm": 0.75, "learning_rate": 7.345269349260336e-07, "loss": 4.0468, "step": 25876 }, { "epoch": 8.61997168318481, "grad_norm": 0.75, "learning_rate": 7.3417761725664e-07, "loss": 4.0052, "step": 25877 }, { "epoch": 8.620304822187057, "grad_norm": 0.75, "learning_rate": 7.338283783944222e-07, "loss": 4.0366, "step": 25878 }, { "epoch": 8.620637961189306, "grad_norm": 0.75390625, "learning_rate": 7.334792183434494e-07, "loss": 3.9804, "step": 25879 }, { "epoch": 8.620971100191555, "grad_norm": 0.7734375, "learning_rate": 7.33130137107785e-07, "loss": 3.939, "step": 25880 }, { "epoch": 8.621304239193803, "grad_norm": 0.71875, "learning_rate": 7.327811346914992e-07, "loss": 3.8981, "step": 25881 }, { "epoch": 8.621637378196052, "grad_norm": 0.765625, "learning_rate": 7.324322110986553e-07, "loss": 3.9964, "step": 25882 }, { "epoch": 8.621970517198301, "grad_norm": 0.7734375, "learning_rate": 7.320833663333184e-07, "loss": 4.0952, "step": 25883 }, { "epoch": 8.62230365620055, "grad_norm": 0.75390625, "learning_rate": 7.317346003995504e-07, "loss": 4.0519, "step": 25884 }, { "epoch": 8.622636795202798, "grad_norm": 0.81640625, "learning_rate": 7.313859133014136e-07, "loss": 3.9125, "step": 25885 }, { "epoch": 8.622969934205047, "grad_norm": 0.74609375, "learning_rate": 7.310373050429708e-07, "loss": 3.9641, "step": 25886 }, { "epoch": 8.623303073207296, "grad_norm": 0.7421875, "learning_rate": 7.306887756282804e-07, "loss": 4.0205, "step": 25887 }, { "epoch": 8.623636212209544, "grad_norm": 0.72265625, "learning_rate": 7.303403250614049e-07, "loss": 3.997, "step": 25888 }, { "epoch": 8.623969351211793, "grad_norm": 0.75390625, "learning_rate": 7.29991953346402e-07, "loss": 3.976, "step": 25889 }, { "epoch": 8.624302490214042, "grad_norm": 0.76953125, "learning_rate": 7.296436604873283e-07, "loss": 3.9267, "step": 25890 }, { "epoch": 8.624635629216291, "grad_norm": 0.76171875, "learning_rate": 7.292954464882398e-07, "loss": 3.9868, "step": 25891 }, { "epoch": 8.624968768218539, "grad_norm": 0.80078125, "learning_rate": 7.289473113531975e-07, "loss": 3.9358, "step": 25892 }, { "epoch": 8.625301907220788, "grad_norm": 0.828125, "learning_rate": 7.285992550862514e-07, "loss": 3.9182, "step": 25893 }, { "epoch": 8.625635046223037, "grad_norm": 0.7265625, "learning_rate": 7.282512776914557e-07, "loss": 4.0381, "step": 25894 }, { "epoch": 8.625968185225286, "grad_norm": 0.72265625, "learning_rate": 7.279033791728664e-07, "loss": 3.8984, "step": 25895 }, { "epoch": 8.626301324227533, "grad_norm": 0.77734375, "learning_rate": 7.275555595345346e-07, "loss": 3.9666, "step": 25896 }, { "epoch": 8.626634463229783, "grad_norm": 0.7890625, "learning_rate": 7.272078187805109e-07, "loss": 3.8728, "step": 25897 }, { "epoch": 8.626967602232032, "grad_norm": 0.78125, "learning_rate": 7.268601569148456e-07, "loss": 3.9064, "step": 25898 }, { "epoch": 8.62730074123428, "grad_norm": 0.7890625, "learning_rate": 7.265125739415896e-07, "loss": 4.0017, "step": 25899 }, { "epoch": 8.627633880236528, "grad_norm": 0.7578125, "learning_rate": 7.261650698647912e-07, "loss": 3.9902, "step": 25900 }, { "epoch": 8.627967019238778, "grad_norm": 0.7890625, "learning_rate": 7.258176446884973e-07, "loss": 4.013, "step": 25901 }, { "epoch": 8.628300158241027, "grad_norm": 0.77734375, "learning_rate": 7.254702984167555e-07, "loss": 4.0268, "step": 25902 }, { "epoch": 8.628633297243274, "grad_norm": 0.8125, "learning_rate": 7.251230310536108e-07, "loss": 3.9511, "step": 25903 }, { "epoch": 8.628966436245523, "grad_norm": 0.78125, "learning_rate": 7.247758426031084e-07, "loss": 3.9868, "step": 25904 }, { "epoch": 8.629299575247773, "grad_norm": 0.75390625, "learning_rate": 7.244287330692908e-07, "loss": 4.0085, "step": 25905 }, { "epoch": 8.629632714250022, "grad_norm": 0.73828125, "learning_rate": 7.240817024562041e-07, "loss": 4.0305, "step": 25906 }, { "epoch": 8.62996585325227, "grad_norm": 0.75390625, "learning_rate": 7.237347507678891e-07, "loss": 3.976, "step": 25907 }, { "epoch": 8.630298992254518, "grad_norm": 0.77734375, "learning_rate": 7.233878780083866e-07, "loss": 3.897, "step": 25908 }, { "epoch": 8.630632131256768, "grad_norm": 0.7890625, "learning_rate": 7.230410841817362e-07, "loss": 3.9914, "step": 25909 }, { "epoch": 8.630965270259015, "grad_norm": 0.8046875, "learning_rate": 7.226943692919802e-07, "loss": 3.9883, "step": 25910 }, { "epoch": 8.631298409261264, "grad_norm": 0.76953125, "learning_rate": 7.223477333431546e-07, "loss": 4.0442, "step": 25911 }, { "epoch": 8.631631548263513, "grad_norm": 0.80078125, "learning_rate": 7.220011763392964e-07, "loss": 3.9378, "step": 25912 }, { "epoch": 8.63196468726576, "grad_norm": 0.78125, "learning_rate": 7.216546982844438e-07, "loss": 3.8871, "step": 25913 }, { "epoch": 8.63229782626801, "grad_norm": 0.76171875, "learning_rate": 7.213082991826328e-07, "loss": 4.0169, "step": 25914 }, { "epoch": 8.632630965270259, "grad_norm": 0.73828125, "learning_rate": 7.209619790378976e-07, "loss": 4.0185, "step": 25915 }, { "epoch": 8.632964104272508, "grad_norm": 0.76171875, "learning_rate": 7.206157378542703e-07, "loss": 3.9422, "step": 25916 }, { "epoch": 8.633297243274756, "grad_norm": 0.7578125, "learning_rate": 7.202695756357871e-07, "loss": 3.943, "step": 25917 }, { "epoch": 8.633630382277005, "grad_norm": 0.77734375, "learning_rate": 7.199234923864778e-07, "loss": 3.959, "step": 25918 }, { "epoch": 8.633963521279254, "grad_norm": 0.7421875, "learning_rate": 7.195774881103765e-07, "loss": 3.9676, "step": 25919 }, { "epoch": 8.634296660281503, "grad_norm": 0.74609375, "learning_rate": 7.192315628115073e-07, "loss": 4.0489, "step": 25920 }, { "epoch": 8.63462979928375, "grad_norm": 0.7265625, "learning_rate": 7.188857164939055e-07, "loss": 4.0094, "step": 25921 }, { "epoch": 8.634962938286, "grad_norm": 0.74609375, "learning_rate": 7.185399491615971e-07, "loss": 3.9173, "step": 25922 }, { "epoch": 8.635296077288249, "grad_norm": 0.81640625, "learning_rate": 7.181942608186096e-07, "loss": 4.0575, "step": 25923 }, { "epoch": 8.635629216290496, "grad_norm": 0.796875, "learning_rate": 7.178486514689681e-07, "loss": 4.0304, "step": 25924 }, { "epoch": 8.635962355292746, "grad_norm": 0.73828125, "learning_rate": 7.17503121116701e-07, "loss": 4.0422, "step": 25925 }, { "epoch": 8.636295494294995, "grad_norm": 0.72265625, "learning_rate": 7.171576697658319e-07, "loss": 3.9291, "step": 25926 }, { "epoch": 8.636628633297244, "grad_norm": 0.72265625, "learning_rate": 7.168122974203841e-07, "loss": 3.9996, "step": 25927 }, { "epoch": 8.636961772299491, "grad_norm": 0.7890625, "learning_rate": 7.164670040843813e-07, "loss": 3.9814, "step": 25928 }, { "epoch": 8.63729491130174, "grad_norm": 0.78125, "learning_rate": 7.161217897618442e-07, "loss": 3.9643, "step": 25929 }, { "epoch": 8.63762805030399, "grad_norm": 0.765625, "learning_rate": 7.157766544567945e-07, "loss": 3.9867, "step": 25930 }, { "epoch": 8.637961189306239, "grad_norm": 0.76953125, "learning_rate": 7.15431598173251e-07, "loss": 3.987, "step": 25931 }, { "epoch": 8.638294328308486, "grad_norm": 0.74609375, "learning_rate": 7.150866209152351e-07, "loss": 4.0426, "step": 25932 }, { "epoch": 8.638627467310735, "grad_norm": 0.75, "learning_rate": 7.147417226867639e-07, "loss": 3.9941, "step": 25933 }, { "epoch": 8.638960606312985, "grad_norm": 0.8125, "learning_rate": 7.143969034918557e-07, "loss": 3.9402, "step": 25934 }, { "epoch": 8.639293745315232, "grad_norm": 0.74609375, "learning_rate": 7.140521633345231e-07, "loss": 3.9894, "step": 25935 }, { "epoch": 8.639626884317481, "grad_norm": 0.7578125, "learning_rate": 7.137075022187872e-07, "loss": 3.8775, "step": 25936 }, { "epoch": 8.63996002331973, "grad_norm": 0.765625, "learning_rate": 7.133629201486605e-07, "loss": 3.9926, "step": 25937 }, { "epoch": 8.64029316232198, "grad_norm": 0.83203125, "learning_rate": 7.130184171281531e-07, "loss": 3.9839, "step": 25938 }, { "epoch": 8.640626301324227, "grad_norm": 0.796875, "learning_rate": 7.126739931612828e-07, "loss": 4.0144, "step": 25939 }, { "epoch": 8.640959440326476, "grad_norm": 0.859375, "learning_rate": 7.123296482520578e-07, "loss": 4.0008, "step": 25940 }, { "epoch": 8.641292579328725, "grad_norm": 0.75390625, "learning_rate": 7.119853824044917e-07, "loss": 4.0118, "step": 25941 }, { "epoch": 8.641625718330973, "grad_norm": 0.75, "learning_rate": 7.116411956225904e-07, "loss": 4.0124, "step": 25942 }, { "epoch": 8.641958857333222, "grad_norm": 0.76171875, "learning_rate": 7.112970879103683e-07, "loss": 3.9838, "step": 25943 }, { "epoch": 8.642291996335471, "grad_norm": 0.734375, "learning_rate": 7.109530592718297e-07, "loss": 3.9915, "step": 25944 }, { "epoch": 8.64262513533772, "grad_norm": 0.78125, "learning_rate": 7.106091097109837e-07, "loss": 3.9595, "step": 25945 }, { "epoch": 8.642958274339968, "grad_norm": 0.74609375, "learning_rate": 7.102652392318348e-07, "loss": 3.9889, "step": 25946 }, { "epoch": 8.643291413342217, "grad_norm": 0.76953125, "learning_rate": 7.099214478383906e-07, "loss": 3.9442, "step": 25947 }, { "epoch": 8.643624552344466, "grad_norm": 0.80078125, "learning_rate": 7.095777355346536e-07, "loss": 3.9635, "step": 25948 }, { "epoch": 8.643957691346714, "grad_norm": 0.75, "learning_rate": 7.092341023246265e-07, "loss": 3.9693, "step": 25949 }, { "epoch": 8.644290830348963, "grad_norm": 0.7734375, "learning_rate": 7.088905482123145e-07, "loss": 4.0254, "step": 25950 }, { "epoch": 8.644623969351212, "grad_norm": 0.765625, "learning_rate": 7.085470732017185e-07, "loss": 3.95, "step": 25951 }, { "epoch": 8.644957108353461, "grad_norm": 0.76171875, "learning_rate": 7.082036772968386e-07, "loss": 3.9884, "step": 25952 }, { "epoch": 8.645290247355709, "grad_norm": 0.77734375, "learning_rate": 7.078603605016734e-07, "loss": 3.9992, "step": 25953 }, { "epoch": 8.645623386357958, "grad_norm": 0.734375, "learning_rate": 7.075171228202254e-07, "loss": 4.026, "step": 25954 }, { "epoch": 8.645956525360207, "grad_norm": 0.76953125, "learning_rate": 7.071739642564914e-07, "loss": 3.9934, "step": 25955 }, { "epoch": 8.646289664362456, "grad_norm": 0.796875, "learning_rate": 7.068308848144642e-07, "loss": 3.9642, "step": 25956 }, { "epoch": 8.646622803364703, "grad_norm": 0.75390625, "learning_rate": 7.064878844981445e-07, "loss": 3.9956, "step": 25957 }, { "epoch": 8.646955942366953, "grad_norm": 0.7578125, "learning_rate": 7.061449633115269e-07, "loss": 3.969, "step": 25958 }, { "epoch": 8.647289081369202, "grad_norm": 0.765625, "learning_rate": 7.058021212586046e-07, "loss": 4.0359, "step": 25959 }, { "epoch": 8.64762222037145, "grad_norm": 0.73828125, "learning_rate": 7.054593583433704e-07, "loss": 4.006, "step": 25960 }, { "epoch": 8.647955359373698, "grad_norm": 0.76953125, "learning_rate": 7.051166745698187e-07, "loss": 3.9866, "step": 25961 }, { "epoch": 8.648288498375948, "grad_norm": 0.765625, "learning_rate": 7.047740699419403e-07, "loss": 3.9368, "step": 25962 }, { "epoch": 8.648621637378197, "grad_norm": 0.765625, "learning_rate": 7.044315444637261e-07, "loss": 3.9325, "step": 25963 }, { "epoch": 8.648954776380444, "grad_norm": 0.78515625, "learning_rate": 7.04089098139164e-07, "loss": 3.9265, "step": 25964 }, { "epoch": 8.649287915382693, "grad_norm": 0.796875, "learning_rate": 7.037467309722448e-07, "loss": 4.0114, "step": 25965 }, { "epoch": 8.649621054384943, "grad_norm": 0.7890625, "learning_rate": 7.034044429669554e-07, "loss": 4.0162, "step": 25966 }, { "epoch": 8.649954193387192, "grad_norm": 0.77734375, "learning_rate": 7.030622341272825e-07, "loss": 4.0652, "step": 25967 }, { "epoch": 8.65028733238944, "grad_norm": 0.69921875, "learning_rate": 7.027201044572113e-07, "loss": 4.016, "step": 25968 }, { "epoch": 8.650620471391688, "grad_norm": 0.7890625, "learning_rate": 7.023780539607294e-07, "loss": 3.9565, "step": 25969 }, { "epoch": 8.650953610393938, "grad_norm": 0.73046875, "learning_rate": 7.020360826418196e-07, "loss": 3.9979, "step": 25970 }, { "epoch": 8.651286749396185, "grad_norm": 0.77734375, "learning_rate": 7.016941905044652e-07, "loss": 3.9564, "step": 25971 }, { "epoch": 8.651619888398434, "grad_norm": 0.73046875, "learning_rate": 7.013523775526464e-07, "loss": 4.0317, "step": 25972 }, { "epoch": 8.651953027400683, "grad_norm": 0.81640625, "learning_rate": 7.010106437903501e-07, "loss": 3.9562, "step": 25973 }, { "epoch": 8.65228616640293, "grad_norm": 0.7578125, "learning_rate": 7.006689892215515e-07, "loss": 3.9795, "step": 25974 }, { "epoch": 8.65261930540518, "grad_norm": 0.75, "learning_rate": 7.003274138502305e-07, "loss": 3.9908, "step": 25975 }, { "epoch": 8.652952444407429, "grad_norm": 0.7578125, "learning_rate": 6.999859176803683e-07, "loss": 4.0349, "step": 25976 }, { "epoch": 8.653285583409678, "grad_norm": 0.75, "learning_rate": 6.996445007159416e-07, "loss": 3.9626, "step": 25977 }, { "epoch": 8.653618722411926, "grad_norm": 0.7734375, "learning_rate": 6.993031629609264e-07, "loss": 3.961, "step": 25978 }, { "epoch": 8.653951861414175, "grad_norm": 0.76953125, "learning_rate": 6.98961904419298e-07, "loss": 3.9243, "step": 25979 }, { "epoch": 8.654285000416424, "grad_norm": 0.7421875, "learning_rate": 6.986207250950338e-07, "loss": 4.0194, "step": 25980 }, { "epoch": 8.654618139418673, "grad_norm": 0.76953125, "learning_rate": 6.982796249921067e-07, "loss": 3.9237, "step": 25981 }, { "epoch": 8.65495127842092, "grad_norm": 0.765625, "learning_rate": 6.979386041144892e-07, "loss": 3.9162, "step": 25982 }, { "epoch": 8.65528441742317, "grad_norm": 0.8046875, "learning_rate": 6.975976624661531e-07, "loss": 3.9907, "step": 25983 }, { "epoch": 8.655617556425419, "grad_norm": 0.734375, "learning_rate": 6.972568000510712e-07, "loss": 3.996, "step": 25984 }, { "epoch": 8.655950695427666, "grad_norm": 0.7421875, "learning_rate": 6.969160168732127e-07, "loss": 4.0662, "step": 25985 }, { "epoch": 8.656283834429916, "grad_norm": 0.76953125, "learning_rate": 6.965753129365454e-07, "loss": 3.9116, "step": 25986 }, { "epoch": 8.656616973432165, "grad_norm": 0.75390625, "learning_rate": 6.96234688245041e-07, "loss": 4.0188, "step": 25987 }, { "epoch": 8.656950112434414, "grad_norm": 0.765625, "learning_rate": 6.958941428026655e-07, "loss": 3.973, "step": 25988 }, { "epoch": 8.657283251436661, "grad_norm": 0.77734375, "learning_rate": 6.955536766133857e-07, "loss": 3.9522, "step": 25989 }, { "epoch": 8.65761639043891, "grad_norm": 0.7734375, "learning_rate": 6.952132896811661e-07, "loss": 4.0196, "step": 25990 }, { "epoch": 8.65794952944116, "grad_norm": 0.73046875, "learning_rate": 6.948729820099742e-07, "loss": 3.9393, "step": 25991 }, { "epoch": 8.658282668443409, "grad_norm": 0.796875, "learning_rate": 6.945327536037719e-07, "loss": 3.9014, "step": 25992 }, { "epoch": 8.658615807445656, "grad_norm": 0.765625, "learning_rate": 6.941926044665209e-07, "loss": 3.9836, "step": 25993 }, { "epoch": 8.658948946447905, "grad_norm": 0.76171875, "learning_rate": 6.938525346021857e-07, "loss": 4.0236, "step": 25994 }, { "epoch": 8.659282085450155, "grad_norm": 0.734375, "learning_rate": 6.935125440147264e-07, "loss": 3.9808, "step": 25995 }, { "epoch": 8.659615224452402, "grad_norm": 0.765625, "learning_rate": 6.931726327081032e-07, "loss": 4.0088, "step": 25996 }, { "epoch": 8.659948363454651, "grad_norm": 0.7734375, "learning_rate": 6.928328006862744e-07, "loss": 3.9354, "step": 25997 }, { "epoch": 8.6602815024569, "grad_norm": 0.75390625, "learning_rate": 6.924930479531997e-07, "loss": 3.9923, "step": 25998 }, { "epoch": 8.66061464145915, "grad_norm": 0.75, "learning_rate": 6.921533745128364e-07, "loss": 4.076, "step": 25999 }, { "epoch": 8.660947780461397, "grad_norm": 0.71484375, "learning_rate": 6.918137803691408e-07, "loss": 3.9772, "step": 26000 }, { "epoch": 8.661280919463646, "grad_norm": 0.80859375, "learning_rate": 6.914742655260678e-07, "loss": 3.951, "step": 26001 }, { "epoch": 8.661614058465895, "grad_norm": 0.75, "learning_rate": 6.91134829987572e-07, "loss": 3.9965, "step": 26002 }, { "epoch": 8.661947197468143, "grad_norm": 0.7890625, "learning_rate": 6.907954737576075e-07, "loss": 3.9763, "step": 26003 }, { "epoch": 8.662280336470392, "grad_norm": 0.75390625, "learning_rate": 6.904561968401263e-07, "loss": 3.9877, "step": 26004 }, { "epoch": 8.662613475472641, "grad_norm": 0.76171875, "learning_rate": 6.901169992390818e-07, "loss": 4.0312, "step": 26005 }, { "epoch": 8.66294661447489, "grad_norm": 0.71484375, "learning_rate": 6.897778809584243e-07, "loss": 3.9253, "step": 26006 }, { "epoch": 8.663279753477138, "grad_norm": 0.76171875, "learning_rate": 6.894388420021039e-07, "loss": 3.9727, "step": 26007 }, { "epoch": 8.663612892479387, "grad_norm": 0.76171875, "learning_rate": 6.890998823740674e-07, "loss": 3.8606, "step": 26008 }, { "epoch": 8.663946031481636, "grad_norm": 0.7734375, "learning_rate": 6.887610020782683e-07, "loss": 3.9969, "step": 26009 }, { "epoch": 8.664279170483884, "grad_norm": 0.78515625, "learning_rate": 6.884222011186486e-07, "loss": 3.9927, "step": 26010 }, { "epoch": 8.664612309486133, "grad_norm": 0.74609375, "learning_rate": 6.880834794991558e-07, "loss": 3.9603, "step": 26011 }, { "epoch": 8.664945448488382, "grad_norm": 0.78515625, "learning_rate": 6.877448372237352e-07, "loss": 3.9992, "step": 26012 }, { "epoch": 8.665278587490631, "grad_norm": 0.796875, "learning_rate": 6.874062742963336e-07, "loss": 3.9869, "step": 26013 }, { "epoch": 8.665611726492878, "grad_norm": 0.71875, "learning_rate": 6.87067790720892e-07, "loss": 3.9502, "step": 26014 }, { "epoch": 8.665944865495128, "grad_norm": 0.78125, "learning_rate": 6.867293865013547e-07, "loss": 3.9723, "step": 26015 }, { "epoch": 8.666278004497377, "grad_norm": 0.79296875, "learning_rate": 6.863910616416603e-07, "loss": 3.9159, "step": 26016 }, { "epoch": 8.666611143499626, "grad_norm": 0.73046875, "learning_rate": 6.860528161457539e-07, "loss": 3.9632, "step": 26017 }, { "epoch": 8.666944282501873, "grad_norm": 0.77734375, "learning_rate": 6.85714650017574e-07, "loss": 3.9744, "step": 26018 }, { "epoch": 8.667277421504123, "grad_norm": 0.7265625, "learning_rate": 6.853765632610568e-07, "loss": 3.9867, "step": 26019 }, { "epoch": 8.667610560506372, "grad_norm": 0.7421875, "learning_rate": 6.85038555880143e-07, "loss": 3.939, "step": 26020 }, { "epoch": 8.66794369950862, "grad_norm": 0.76171875, "learning_rate": 6.84700627878769e-07, "loss": 3.9543, "step": 26021 }, { "epoch": 8.668276838510868, "grad_norm": 0.796875, "learning_rate": 6.843627792608706e-07, "loss": 3.9603, "step": 26022 }, { "epoch": 8.668609977513118, "grad_norm": 0.7578125, "learning_rate": 6.840250100303813e-07, "loss": 3.9529, "step": 26023 }, { "epoch": 8.668943116515367, "grad_norm": 0.7890625, "learning_rate": 6.836873201912397e-07, "loss": 4.0121, "step": 26024 }, { "epoch": 8.669276255517614, "grad_norm": 0.78125, "learning_rate": 6.833497097473759e-07, "loss": 4.0088, "step": 26025 }, { "epoch": 8.669609394519863, "grad_norm": 0.7421875, "learning_rate": 6.830121787027235e-07, "loss": 3.9775, "step": 26026 }, { "epoch": 8.669942533522113, "grad_norm": 0.7578125, "learning_rate": 6.826747270612135e-07, "loss": 3.9637, "step": 26027 }, { "epoch": 8.670275672524362, "grad_norm": 0.75, "learning_rate": 6.82337354826777e-07, "loss": 3.9431, "step": 26028 }, { "epoch": 8.67060881152661, "grad_norm": 0.7265625, "learning_rate": 6.820000620033423e-07, "loss": 4.0558, "step": 26029 }, { "epoch": 8.670941950528858, "grad_norm": 0.76171875, "learning_rate": 6.816628485948389e-07, "loss": 4.0152, "step": 26030 }, { "epoch": 8.671275089531107, "grad_norm": 0.7421875, "learning_rate": 6.813257146051954e-07, "loss": 3.996, "step": 26031 }, { "epoch": 8.671608228533355, "grad_norm": 0.7578125, "learning_rate": 6.809886600383386e-07, "loss": 3.9634, "step": 26032 }, { "epoch": 8.671941367535604, "grad_norm": 0.76953125, "learning_rate": 6.806516848981936e-07, "loss": 3.9736, "step": 26033 }, { "epoch": 8.672274506537853, "grad_norm": 0.7890625, "learning_rate": 6.803147891886849e-07, "loss": 4.0109, "step": 26034 }, { "epoch": 8.672607645540102, "grad_norm": 0.7578125, "learning_rate": 6.799779729137385e-07, "loss": 3.9224, "step": 26035 }, { "epoch": 8.67294078454235, "grad_norm": 0.6953125, "learning_rate": 6.796412360772778e-07, "loss": 4.0508, "step": 26036 }, { "epoch": 8.673273923544599, "grad_norm": 0.78125, "learning_rate": 6.793045786832214e-07, "loss": 3.9953, "step": 26037 }, { "epoch": 8.673607062546848, "grad_norm": 0.8046875, "learning_rate": 6.789680007354937e-07, "loss": 3.9477, "step": 26038 }, { "epoch": 8.673940201549096, "grad_norm": 0.76953125, "learning_rate": 6.786315022380157e-07, "loss": 3.9718, "step": 26039 }, { "epoch": 8.674273340551345, "grad_norm": 0.75, "learning_rate": 6.782950831947044e-07, "loss": 3.8747, "step": 26040 }, { "epoch": 8.674606479553594, "grad_norm": 0.765625, "learning_rate": 6.779587436094789e-07, "loss": 4.0234, "step": 26041 }, { "epoch": 8.674939618555843, "grad_norm": 0.75, "learning_rate": 6.776224834862588e-07, "loss": 3.9664, "step": 26042 }, { "epoch": 8.67527275755809, "grad_norm": 0.76171875, "learning_rate": 6.772863028289599e-07, "loss": 3.9393, "step": 26043 }, { "epoch": 8.67560589656034, "grad_norm": 0.765625, "learning_rate": 6.769502016414969e-07, "loss": 3.977, "step": 26044 }, { "epoch": 8.675939035562589, "grad_norm": 0.75, "learning_rate": 6.766141799277856e-07, "loss": 4.0107, "step": 26045 }, { "epoch": 8.676272174564836, "grad_norm": 0.78125, "learning_rate": 6.762782376917404e-07, "loss": 3.9334, "step": 26046 }, { "epoch": 8.676605313567086, "grad_norm": 0.79296875, "learning_rate": 6.759423749372723e-07, "loss": 3.9925, "step": 26047 }, { "epoch": 8.676938452569335, "grad_norm": 0.75390625, "learning_rate": 6.756065916682941e-07, "loss": 3.912, "step": 26048 }, { "epoch": 8.677271591571584, "grad_norm": 0.7265625, "learning_rate": 6.752708878887185e-07, "loss": 3.9957, "step": 26049 }, { "epoch": 8.677604730573831, "grad_norm": 0.7890625, "learning_rate": 6.749352636024547e-07, "loss": 3.9438, "step": 26050 }, { "epoch": 8.67793786957608, "grad_norm": 0.71875, "learning_rate": 6.745997188134123e-07, "loss": 3.9742, "step": 26051 }, { "epoch": 8.67827100857833, "grad_norm": 0.75390625, "learning_rate": 6.74264253525498e-07, "loss": 3.9879, "step": 26052 }, { "epoch": 8.678604147580579, "grad_norm": 0.73828125, "learning_rate": 6.739288677426223e-07, "loss": 3.9522, "step": 26053 }, { "epoch": 8.678937286582826, "grad_norm": 0.75, "learning_rate": 6.735935614686908e-07, "loss": 4.0281, "step": 26054 }, { "epoch": 8.679270425585075, "grad_norm": 0.76171875, "learning_rate": 6.732583347076074e-07, "loss": 3.9974, "step": 26055 }, { "epoch": 8.679603564587325, "grad_norm": 0.78125, "learning_rate": 6.729231874632763e-07, "loss": 3.9184, "step": 26056 }, { "epoch": 8.679936703589572, "grad_norm": 0.7421875, "learning_rate": 6.725881197396036e-07, "loss": 4.039, "step": 26057 }, { "epoch": 8.680269842591821, "grad_norm": 0.75390625, "learning_rate": 6.722531315404912e-07, "loss": 3.9664, "step": 26058 }, { "epoch": 8.68060298159407, "grad_norm": 0.74609375, "learning_rate": 6.719182228698409e-07, "loss": 3.927, "step": 26059 }, { "epoch": 8.68093612059632, "grad_norm": 0.8203125, "learning_rate": 6.71583393731553e-07, "loss": 3.9047, "step": 26060 }, { "epoch": 8.681269259598567, "grad_norm": 0.77734375, "learning_rate": 6.712486441295293e-07, "loss": 3.9602, "step": 26061 }, { "epoch": 8.681602398600816, "grad_norm": 0.7578125, "learning_rate": 6.709139740676692e-07, "loss": 4.1108, "step": 26062 }, { "epoch": 8.681935537603065, "grad_norm": 0.76953125, "learning_rate": 6.705793835498663e-07, "loss": 3.9244, "step": 26063 }, { "epoch": 8.682268676605313, "grad_norm": 0.765625, "learning_rate": 6.702448725800225e-07, "loss": 4.0781, "step": 26064 }, { "epoch": 8.682601815607562, "grad_norm": 0.7578125, "learning_rate": 6.69910441162033e-07, "loss": 3.9829, "step": 26065 }, { "epoch": 8.682934954609811, "grad_norm": 0.7265625, "learning_rate": 6.695760892997938e-07, "loss": 4.022, "step": 26066 }, { "epoch": 8.68326809361206, "grad_norm": 0.81640625, "learning_rate": 6.692418169971958e-07, "loss": 3.9573, "step": 26067 }, { "epoch": 8.683601232614308, "grad_norm": 0.796875, "learning_rate": 6.689076242581371e-07, "loss": 4.0202, "step": 26068 }, { "epoch": 8.683934371616557, "grad_norm": 0.75390625, "learning_rate": 6.685735110865085e-07, "loss": 4.0083, "step": 26069 }, { "epoch": 8.684267510618806, "grad_norm": 0.7734375, "learning_rate": 6.682394774862011e-07, "loss": 4.0265, "step": 26070 }, { "epoch": 8.684600649621053, "grad_norm": 0.79296875, "learning_rate": 6.679055234611068e-07, "loss": 3.9338, "step": 26071 }, { "epoch": 8.684933788623303, "grad_norm": 0.78515625, "learning_rate": 6.675716490151148e-07, "loss": 3.9853, "step": 26072 }, { "epoch": 8.685266927625552, "grad_norm": 0.76171875, "learning_rate": 6.67237854152114e-07, "loss": 4.0414, "step": 26073 }, { "epoch": 8.685600066627801, "grad_norm": 0.77734375, "learning_rate": 6.669041388759911e-07, "loss": 3.9667, "step": 26074 }, { "epoch": 8.685933205630048, "grad_norm": 0.7890625, "learning_rate": 6.665705031906363e-07, "loss": 4.0093, "step": 26075 }, { "epoch": 8.686266344632298, "grad_norm": 0.72265625, "learning_rate": 6.662369470999332e-07, "loss": 3.9783, "step": 26076 }, { "epoch": 8.686599483634547, "grad_norm": 0.77734375, "learning_rate": 6.65903470607768e-07, "loss": 3.9505, "step": 26077 }, { "epoch": 8.686932622636796, "grad_norm": 0.75, "learning_rate": 6.65570073718024e-07, "loss": 4.0383, "step": 26078 }, { "epoch": 8.687265761639043, "grad_norm": 0.74609375, "learning_rate": 6.652367564345857e-07, "loss": 3.9791, "step": 26079 }, { "epoch": 8.687598900641293, "grad_norm": 0.78125, "learning_rate": 6.649035187613375e-07, "loss": 3.9002, "step": 26080 }, { "epoch": 8.687932039643542, "grad_norm": 0.7578125, "learning_rate": 6.645703607021547e-07, "loss": 3.9637, "step": 26081 }, { "epoch": 8.68826517864579, "grad_norm": 0.79296875, "learning_rate": 6.642372822609241e-07, "loss": 4.0093, "step": 26082 }, { "epoch": 8.688598317648038, "grad_norm": 0.734375, "learning_rate": 6.639042834415218e-07, "loss": 4.0523, "step": 26083 }, { "epoch": 8.688931456650288, "grad_norm": 0.74609375, "learning_rate": 6.635713642478289e-07, "loss": 3.9564, "step": 26084 }, { "epoch": 8.689264595652537, "grad_norm": 0.7734375, "learning_rate": 6.632385246837197e-07, "loss": 3.9186, "step": 26085 }, { "epoch": 8.689597734654784, "grad_norm": 0.73828125, "learning_rate": 6.629057647530745e-07, "loss": 3.9654, "step": 26086 }, { "epoch": 8.689930873657033, "grad_norm": 0.78125, "learning_rate": 6.625730844597685e-07, "loss": 3.9382, "step": 26087 }, { "epoch": 8.690264012659282, "grad_norm": 0.73828125, "learning_rate": 6.622404838076762e-07, "loss": 3.9611, "step": 26088 }, { "epoch": 8.690597151661532, "grad_norm": 0.80859375, "learning_rate": 6.619079628006711e-07, "loss": 3.9906, "step": 26089 }, { "epoch": 8.690930290663779, "grad_norm": 0.734375, "learning_rate": 6.615755214426275e-07, "loss": 4.0016, "step": 26090 }, { "epoch": 8.691263429666028, "grad_norm": 0.76171875, "learning_rate": 6.612431597374174e-07, "loss": 3.964, "step": 26091 }, { "epoch": 8.691596568668277, "grad_norm": 0.71484375, "learning_rate": 6.609108776889103e-07, "loss": 3.9968, "step": 26092 }, { "epoch": 8.691929707670525, "grad_norm": 0.73046875, "learning_rate": 6.605786753009788e-07, "loss": 4.0524, "step": 26093 }, { "epoch": 8.692262846672774, "grad_norm": 0.77734375, "learning_rate": 6.602465525774914e-07, "loss": 3.947, "step": 26094 }, { "epoch": 8.692595985675023, "grad_norm": 0.7734375, "learning_rate": 6.599145095223177e-07, "loss": 4.0308, "step": 26095 }, { "epoch": 8.692929124677272, "grad_norm": 0.796875, "learning_rate": 6.595825461393228e-07, "loss": 3.9529, "step": 26096 }, { "epoch": 8.69326226367952, "grad_norm": 0.7734375, "learning_rate": 6.592506624323754e-07, "loss": 4.0122, "step": 26097 }, { "epoch": 8.693595402681769, "grad_norm": 0.796875, "learning_rate": 6.589188584053432e-07, "loss": 4.0037, "step": 26098 }, { "epoch": 8.693928541684018, "grad_norm": 0.76171875, "learning_rate": 6.585871340620864e-07, "loss": 3.9809, "step": 26099 }, { "epoch": 8.694261680686266, "grad_norm": 0.734375, "learning_rate": 6.582554894064702e-07, "loss": 4.0227, "step": 26100 }, { "epoch": 8.694594819688515, "grad_norm": 0.75390625, "learning_rate": 6.579239244423591e-07, "loss": 4.0328, "step": 26101 }, { "epoch": 8.694927958690764, "grad_norm": 0.73046875, "learning_rate": 6.575924391736149e-07, "loss": 3.9555, "step": 26102 }, { "epoch": 8.695261097693013, "grad_norm": 0.7890625, "learning_rate": 6.572610336040979e-07, "loss": 4.0176, "step": 26103 }, { "epoch": 8.69559423669526, "grad_norm": 0.78125, "learning_rate": 6.569297077376676e-07, "loss": 3.9565, "step": 26104 }, { "epoch": 8.69592737569751, "grad_norm": 0.73828125, "learning_rate": 6.56598461578185e-07, "loss": 3.9151, "step": 26105 }, { "epoch": 8.696260514699759, "grad_norm": 0.7265625, "learning_rate": 6.562672951295079e-07, "loss": 3.9256, "step": 26106 }, { "epoch": 8.696593653702006, "grad_norm": 0.79296875, "learning_rate": 6.559362083954931e-07, "loss": 3.9662, "step": 26107 }, { "epoch": 8.696926792704256, "grad_norm": 0.73046875, "learning_rate": 6.556052013799977e-07, "loss": 3.958, "step": 26108 }, { "epoch": 8.697259931706505, "grad_norm": 0.76953125, "learning_rate": 6.552742740868759e-07, "loss": 3.9926, "step": 26109 }, { "epoch": 8.697593070708754, "grad_norm": 0.78125, "learning_rate": 6.549434265199838e-07, "loss": 3.9448, "step": 26110 }, { "epoch": 8.697926209711001, "grad_norm": 0.7890625, "learning_rate": 6.546126586831727e-07, "loss": 4.0328, "step": 26111 }, { "epoch": 8.69825934871325, "grad_norm": 0.76171875, "learning_rate": 6.542819705802985e-07, "loss": 3.9908, "step": 26112 }, { "epoch": 8.6985924877155, "grad_norm": 0.75, "learning_rate": 6.539513622152116e-07, "loss": 3.9286, "step": 26113 }, { "epoch": 8.698925626717749, "grad_norm": 0.7890625, "learning_rate": 6.536208335917629e-07, "loss": 3.8766, "step": 26114 }, { "epoch": 8.699258765719996, "grad_norm": 0.78515625, "learning_rate": 6.532903847138003e-07, "loss": 3.9886, "step": 26115 }, { "epoch": 8.699591904722245, "grad_norm": 0.77734375, "learning_rate": 6.52960015585178e-07, "loss": 4.0305, "step": 26116 }, { "epoch": 8.699925043724495, "grad_norm": 0.78515625, "learning_rate": 6.5262972620974e-07, "loss": 3.9962, "step": 26117 }, { "epoch": 8.700258182726742, "grad_norm": 0.73828125, "learning_rate": 6.52299516591332e-07, "loss": 3.9906, "step": 26118 }, { "epoch": 8.700591321728991, "grad_norm": 0.74609375, "learning_rate": 6.519693867338044e-07, "loss": 3.9789, "step": 26119 }, { "epoch": 8.70092446073124, "grad_norm": 0.734375, "learning_rate": 6.516393366410009e-07, "loss": 4.0009, "step": 26120 }, { "epoch": 8.70125759973349, "grad_norm": 0.7265625, "learning_rate": 6.513093663167649e-07, "loss": 3.9732, "step": 26121 }, { "epoch": 8.701590738735737, "grad_norm": 0.80078125, "learning_rate": 6.509794757649401e-07, "loss": 3.9775, "step": 26122 }, { "epoch": 8.701923877737986, "grad_norm": 0.78125, "learning_rate": 6.506496649893701e-07, "loss": 3.9448, "step": 26123 }, { "epoch": 8.702257016740235, "grad_norm": 0.796875, "learning_rate": 6.503199339938959e-07, "loss": 4.0153, "step": 26124 }, { "epoch": 8.702590155742483, "grad_norm": 0.765625, "learning_rate": 6.499902827823579e-07, "loss": 3.9314, "step": 26125 }, { "epoch": 8.702923294744732, "grad_norm": 0.7421875, "learning_rate": 6.496607113585962e-07, "loss": 3.8894, "step": 26126 }, { "epoch": 8.703256433746981, "grad_norm": 0.71875, "learning_rate": 6.493312197264495e-07, "loss": 4.0028, "step": 26127 }, { "epoch": 8.70358957274923, "grad_norm": 0.78515625, "learning_rate": 6.490018078897555e-07, "loss": 4.0048, "step": 26128 }, { "epoch": 8.703922711751478, "grad_norm": 0.75, "learning_rate": 6.486724758523504e-07, "loss": 3.9153, "step": 26129 }, { "epoch": 8.704255850753727, "grad_norm": 0.7421875, "learning_rate": 6.48343223618072e-07, "loss": 3.9247, "step": 26130 }, { "epoch": 8.704588989755976, "grad_norm": 0.765625, "learning_rate": 6.480140511907536e-07, "loss": 3.9632, "step": 26131 }, { "epoch": 8.704922128758223, "grad_norm": 0.796875, "learning_rate": 6.476849585742309e-07, "loss": 4.019, "step": 26132 }, { "epoch": 8.705255267760473, "grad_norm": 0.765625, "learning_rate": 6.473559457723354e-07, "loss": 3.9605, "step": 26133 }, { "epoch": 8.705588406762722, "grad_norm": 0.80078125, "learning_rate": 6.470270127889019e-07, "loss": 3.9708, "step": 26134 }, { "epoch": 8.705921545764971, "grad_norm": 0.77734375, "learning_rate": 6.466981596277596e-07, "loss": 3.8195, "step": 26135 }, { "epoch": 8.706254684767218, "grad_norm": 0.79296875, "learning_rate": 6.463693862927381e-07, "loss": 3.9711, "step": 26136 }, { "epoch": 8.706587823769468, "grad_norm": 0.80078125, "learning_rate": 6.4604069278767e-07, "loss": 3.9812, "step": 26137 }, { "epoch": 8.706920962771717, "grad_norm": 0.82421875, "learning_rate": 6.457120791163815e-07, "loss": 3.9214, "step": 26138 }, { "epoch": 8.707254101773966, "grad_norm": 0.7578125, "learning_rate": 6.453835452827012e-07, "loss": 4.0297, "step": 26139 }, { "epoch": 8.707587240776213, "grad_norm": 0.80078125, "learning_rate": 6.450550912904534e-07, "loss": 4.0056, "step": 26140 }, { "epoch": 8.707920379778463, "grad_norm": 0.76953125, "learning_rate": 6.447267171434687e-07, "loss": 4.0038, "step": 26141 }, { "epoch": 8.708253518780712, "grad_norm": 0.8359375, "learning_rate": 6.443984228455688e-07, "loss": 3.9443, "step": 26142 }, { "epoch": 8.70858665778296, "grad_norm": 0.734375, "learning_rate": 6.44070208400579e-07, "loss": 4.0018, "step": 26143 }, { "epoch": 8.708919796785208, "grad_norm": 0.75390625, "learning_rate": 6.437420738123187e-07, "loss": 4.0107, "step": 26144 }, { "epoch": 8.709252935787458, "grad_norm": 0.765625, "learning_rate": 6.434140190846141e-07, "loss": 4.0873, "step": 26145 }, { "epoch": 8.709586074789707, "grad_norm": 0.76171875, "learning_rate": 6.430860442212846e-07, "loss": 4.0185, "step": 26146 }, { "epoch": 8.709919213791954, "grad_norm": 0.76171875, "learning_rate": 6.427581492261514e-07, "loss": 4.0374, "step": 26147 }, { "epoch": 8.710252352794203, "grad_norm": 0.76953125, "learning_rate": 6.424303341030313e-07, "loss": 3.999, "step": 26148 }, { "epoch": 8.710585491796452, "grad_norm": 0.7421875, "learning_rate": 6.421025988557455e-07, "loss": 3.9575, "step": 26149 }, { "epoch": 8.710918630798702, "grad_norm": 0.8046875, "learning_rate": 6.417749434881101e-07, "loss": 3.9336, "step": 26150 }, { "epoch": 8.711251769800949, "grad_norm": 0.78125, "learning_rate": 6.414473680039421e-07, "loss": 4.0038, "step": 26151 }, { "epoch": 8.711584908803198, "grad_norm": 0.7421875, "learning_rate": 6.411198724070566e-07, "loss": 3.955, "step": 26152 }, { "epoch": 8.711918047805447, "grad_norm": 0.765625, "learning_rate": 6.407924567012682e-07, "loss": 4.0099, "step": 26153 }, { "epoch": 8.712251186807695, "grad_norm": 0.765625, "learning_rate": 6.404651208903906e-07, "loss": 4.0038, "step": 26154 }, { "epoch": 8.712584325809944, "grad_norm": 0.76953125, "learning_rate": 6.401378649782347e-07, "loss": 4.0045, "step": 26155 }, { "epoch": 8.712917464812193, "grad_norm": 0.79296875, "learning_rate": 6.398106889686167e-07, "loss": 3.9852, "step": 26156 }, { "epoch": 8.713250603814442, "grad_norm": 0.76171875, "learning_rate": 6.394835928653445e-07, "loss": 4.0302, "step": 26157 }, { "epoch": 8.71358374281669, "grad_norm": 0.7578125, "learning_rate": 6.391565766722282e-07, "loss": 3.9657, "step": 26158 }, { "epoch": 8.713916881818939, "grad_norm": 0.7578125, "learning_rate": 6.388296403930765e-07, "loss": 3.9387, "step": 26159 }, { "epoch": 8.714250020821188, "grad_norm": 0.74609375, "learning_rate": 6.38502784031699e-07, "loss": 3.9358, "step": 26160 }, { "epoch": 8.714583159823436, "grad_norm": 0.76953125, "learning_rate": 6.381760075919041e-07, "loss": 3.9909, "step": 26161 }, { "epoch": 8.714916298825685, "grad_norm": 0.7734375, "learning_rate": 6.37849311077493e-07, "loss": 3.9974, "step": 26162 }, { "epoch": 8.715249437827934, "grad_norm": 0.734375, "learning_rate": 6.375226944922752e-07, "loss": 3.9868, "step": 26163 }, { "epoch": 8.715582576830183, "grad_norm": 0.7265625, "learning_rate": 6.371961578400543e-07, "loss": 3.906, "step": 26164 }, { "epoch": 8.71591571583243, "grad_norm": 0.76171875, "learning_rate": 6.36869701124633e-07, "loss": 3.9498, "step": 26165 }, { "epoch": 8.71624885483468, "grad_norm": 0.765625, "learning_rate": 6.365433243498125e-07, "loss": 4.0203, "step": 26166 }, { "epoch": 8.716581993836929, "grad_norm": 0.80078125, "learning_rate": 6.362170275193981e-07, "loss": 3.9625, "step": 26167 }, { "epoch": 8.716915132839176, "grad_norm": 0.7578125, "learning_rate": 6.358908106371886e-07, "loss": 4.0179, "step": 26168 }, { "epoch": 8.717248271841425, "grad_norm": 0.76171875, "learning_rate": 6.355646737069831e-07, "loss": 3.934, "step": 26169 }, { "epoch": 8.717581410843675, "grad_norm": 0.81640625, "learning_rate": 6.352386167325805e-07, "loss": 3.9847, "step": 26170 }, { "epoch": 8.717914549845924, "grad_norm": 0.73828125, "learning_rate": 6.349126397177793e-07, "loss": 3.9326, "step": 26171 }, { "epoch": 8.718247688848171, "grad_norm": 0.734375, "learning_rate": 6.345867426663756e-07, "loss": 3.9531, "step": 26172 }, { "epoch": 8.71858082785042, "grad_norm": 0.76171875, "learning_rate": 6.342609255821649e-07, "loss": 3.9606, "step": 26173 }, { "epoch": 8.71891396685267, "grad_norm": 0.79296875, "learning_rate": 6.339351884689448e-07, "loss": 4.0058, "step": 26174 }, { "epoch": 8.719247105854919, "grad_norm": 0.80078125, "learning_rate": 6.336095313305082e-07, "loss": 4.0115, "step": 26175 }, { "epoch": 8.719580244857166, "grad_norm": 0.796875, "learning_rate": 6.33283954170647e-07, "loss": 4.0351, "step": 26176 }, { "epoch": 8.719913383859415, "grad_norm": 0.80078125, "learning_rate": 6.329584569931541e-07, "loss": 3.9984, "step": 26177 }, { "epoch": 8.720246522861665, "grad_norm": 0.75, "learning_rate": 6.326330398018221e-07, "loss": 3.9804, "step": 26178 }, { "epoch": 8.720579661863912, "grad_norm": 0.7265625, "learning_rate": 6.323077026004415e-07, "loss": 3.9254, "step": 26179 }, { "epoch": 8.720912800866161, "grad_norm": 0.74609375, "learning_rate": 6.319824453927983e-07, "loss": 4.0242, "step": 26180 }, { "epoch": 8.72124593986841, "grad_norm": 0.76953125, "learning_rate": 6.316572681826854e-07, "loss": 3.9383, "step": 26181 }, { "epoch": 8.72157907887066, "grad_norm": 0.77734375, "learning_rate": 6.313321709738881e-07, "loss": 4.0045, "step": 26182 }, { "epoch": 8.721912217872907, "grad_norm": 0.77734375, "learning_rate": 6.310071537701934e-07, "loss": 3.9963, "step": 26183 }, { "epoch": 8.722245356875156, "grad_norm": 0.765625, "learning_rate": 6.306822165753856e-07, "loss": 3.9893, "step": 26184 }, { "epoch": 8.722578495877405, "grad_norm": 0.73828125, "learning_rate": 6.303573593932527e-07, "loss": 4.0234, "step": 26185 }, { "epoch": 8.722911634879653, "grad_norm": 0.77734375, "learning_rate": 6.300325822275774e-07, "loss": 3.9949, "step": 26186 }, { "epoch": 8.723244773881902, "grad_norm": 0.7265625, "learning_rate": 6.297078850821417e-07, "loss": 3.9777, "step": 26187 }, { "epoch": 8.723577912884151, "grad_norm": 0.76171875, "learning_rate": 6.293832679607276e-07, "loss": 3.9364, "step": 26188 }, { "epoch": 8.7239110518864, "grad_norm": 0.74609375, "learning_rate": 6.29058730867117e-07, "loss": 3.8958, "step": 26189 }, { "epoch": 8.724244190888648, "grad_norm": 0.74609375, "learning_rate": 6.287342738050902e-07, "loss": 3.964, "step": 26190 }, { "epoch": 8.724577329890897, "grad_norm": 0.76171875, "learning_rate": 6.28409896778426e-07, "loss": 3.9644, "step": 26191 }, { "epoch": 8.724910468893146, "grad_norm": 0.76953125, "learning_rate": 6.280855997909013e-07, "loss": 4.0219, "step": 26192 }, { "epoch": 8.725243607895393, "grad_norm": 0.76171875, "learning_rate": 6.277613828462955e-07, "loss": 3.98, "step": 26193 }, { "epoch": 8.725576746897643, "grad_norm": 0.71875, "learning_rate": 6.274372459483849e-07, "loss": 4.0498, "step": 26194 }, { "epoch": 8.725909885899892, "grad_norm": 0.73828125, "learning_rate": 6.271131891009446e-07, "loss": 3.9639, "step": 26195 }, { "epoch": 8.726243024902141, "grad_norm": 0.796875, "learning_rate": 6.267892123077468e-07, "loss": 3.9959, "step": 26196 }, { "epoch": 8.726576163904388, "grad_norm": 0.7421875, "learning_rate": 6.264653155725708e-07, "loss": 3.9299, "step": 26197 }, { "epoch": 8.726909302906638, "grad_norm": 0.7890625, "learning_rate": 6.261414988991846e-07, "loss": 3.9334, "step": 26198 }, { "epoch": 8.727242441908887, "grad_norm": 0.7890625, "learning_rate": 6.258177622913591e-07, "loss": 3.9607, "step": 26199 }, { "epoch": 8.727575580911136, "grad_norm": 0.75390625, "learning_rate": 6.25494105752869e-07, "loss": 3.9963, "step": 26200 }, { "epoch": 8.727908719913383, "grad_norm": 0.76171875, "learning_rate": 6.251705292874821e-07, "loss": 3.9664, "step": 26201 }, { "epoch": 8.728241858915633, "grad_norm": 0.75390625, "learning_rate": 6.248470328989678e-07, "loss": 3.9836, "step": 26202 }, { "epoch": 8.728574997917882, "grad_norm": 0.80078125, "learning_rate": 6.245236165910931e-07, "loss": 4.003, "step": 26203 }, { "epoch": 8.72890813692013, "grad_norm": 0.74609375, "learning_rate": 6.242002803676275e-07, "loss": 4.0174, "step": 26204 }, { "epoch": 8.729241275922378, "grad_norm": 0.76953125, "learning_rate": 6.238770242323346e-07, "loss": 3.9243, "step": 26205 }, { "epoch": 8.729574414924627, "grad_norm": 0.78125, "learning_rate": 6.235538481889816e-07, "loss": 3.9423, "step": 26206 }, { "epoch": 8.729907553926877, "grad_norm": 0.765625, "learning_rate": 6.232307522413319e-07, "loss": 3.8895, "step": 26207 }, { "epoch": 8.730240692929124, "grad_norm": 0.75, "learning_rate": 6.229077363931484e-07, "loss": 3.9831, "step": 26208 }, { "epoch": 8.730573831931373, "grad_norm": 0.78125, "learning_rate": 6.225848006481941e-07, "loss": 4.0396, "step": 26209 }, { "epoch": 8.730906970933622, "grad_norm": 0.76953125, "learning_rate": 6.222619450102291e-07, "loss": 4.034, "step": 26210 }, { "epoch": 8.731240109935872, "grad_norm": 0.74609375, "learning_rate": 6.219391694830171e-07, "loss": 4.0219, "step": 26211 }, { "epoch": 8.731573248938119, "grad_norm": 0.75390625, "learning_rate": 6.21616474070316e-07, "loss": 3.9965, "step": 26212 }, { "epoch": 8.731906387940368, "grad_norm": 0.75390625, "learning_rate": 6.212938587758835e-07, "loss": 3.9685, "step": 26213 }, { "epoch": 8.732239526942617, "grad_norm": 0.765625, "learning_rate": 6.209713236034775e-07, "loss": 3.9774, "step": 26214 }, { "epoch": 8.732572665944865, "grad_norm": 0.75390625, "learning_rate": 6.206488685568585e-07, "loss": 3.9965, "step": 26215 }, { "epoch": 8.732905804947114, "grad_norm": 0.78125, "learning_rate": 6.203264936397773e-07, "loss": 3.835, "step": 26216 }, { "epoch": 8.733238943949363, "grad_norm": 0.75390625, "learning_rate": 6.200041988559904e-07, "loss": 3.9647, "step": 26217 }, { "epoch": 8.733572082951612, "grad_norm": 0.765625, "learning_rate": 6.196819842092547e-07, "loss": 3.9869, "step": 26218 }, { "epoch": 8.73390522195386, "grad_norm": 0.77734375, "learning_rate": 6.193598497033195e-07, "loss": 3.9544, "step": 26219 }, { "epoch": 8.734238360956109, "grad_norm": 0.7734375, "learning_rate": 6.190377953419396e-07, "loss": 3.9388, "step": 26220 }, { "epoch": 8.734571499958358, "grad_norm": 0.78515625, "learning_rate": 6.187158211288643e-07, "loss": 4.1021, "step": 26221 }, { "epoch": 8.734904638960606, "grad_norm": 0.7578125, "learning_rate": 6.183939270678457e-07, "loss": 3.9826, "step": 26222 }, { "epoch": 8.735237777962855, "grad_norm": 0.74609375, "learning_rate": 6.180721131626316e-07, "loss": 4.024, "step": 26223 }, { "epoch": 8.735570916965104, "grad_norm": 0.796875, "learning_rate": 6.177503794169722e-07, "loss": 4.1118, "step": 26224 }, { "epoch": 8.735904055967353, "grad_norm": 0.7109375, "learning_rate": 6.174287258346131e-07, "loss": 3.98, "step": 26225 }, { "epoch": 8.7362371949696, "grad_norm": 0.73046875, "learning_rate": 6.171071524193028e-07, "loss": 4.0417, "step": 26226 }, { "epoch": 8.73657033397185, "grad_norm": 0.71875, "learning_rate": 6.167856591747851e-07, "loss": 4.0748, "step": 26227 }, { "epoch": 8.736903472974099, "grad_norm": 0.74609375, "learning_rate": 6.164642461048042e-07, "loss": 3.9851, "step": 26228 }, { "epoch": 8.737236611976346, "grad_norm": 0.75390625, "learning_rate": 6.161429132131066e-07, "loss": 3.9085, "step": 26229 }, { "epoch": 8.737569750978595, "grad_norm": 0.7578125, "learning_rate": 6.158216605034334e-07, "loss": 3.9853, "step": 26230 }, { "epoch": 8.737902889980845, "grad_norm": 0.78515625, "learning_rate": 6.155004879795264e-07, "loss": 3.934, "step": 26231 }, { "epoch": 8.738236028983094, "grad_norm": 0.80078125, "learning_rate": 6.151793956451263e-07, "loss": 3.9812, "step": 26232 }, { "epoch": 8.738569167985341, "grad_norm": 0.73046875, "learning_rate": 6.148583835039764e-07, "loss": 4.0124, "step": 26233 }, { "epoch": 8.73890230698759, "grad_norm": 0.7421875, "learning_rate": 6.145374515598115e-07, "loss": 4.0379, "step": 26234 }, { "epoch": 8.73923544598984, "grad_norm": 0.78125, "learning_rate": 6.142165998163718e-07, "loss": 3.9508, "step": 26235 }, { "epoch": 8.739568584992089, "grad_norm": 0.75390625, "learning_rate": 6.138958282773918e-07, "loss": 3.9546, "step": 26236 }, { "epoch": 8.739901723994336, "grad_norm": 0.7578125, "learning_rate": 6.135751369466127e-07, "loss": 3.8994, "step": 26237 }, { "epoch": 8.740234862996585, "grad_norm": 0.76953125, "learning_rate": 6.132545258277667e-07, "loss": 3.9608, "step": 26238 }, { "epoch": 8.740568001998835, "grad_norm": 0.7890625, "learning_rate": 6.129339949245888e-07, "loss": 3.9389, "step": 26239 }, { "epoch": 8.740901141001082, "grad_norm": 0.734375, "learning_rate": 6.126135442408112e-07, "loss": 4.063, "step": 26240 }, { "epoch": 8.741234280003331, "grad_norm": 0.7265625, "learning_rate": 6.122931737801693e-07, "loss": 3.8929, "step": 26241 }, { "epoch": 8.74156741900558, "grad_norm": 0.7421875, "learning_rate": 6.119728835463942e-07, "loss": 3.9757, "step": 26242 }, { "epoch": 8.74190055800783, "grad_norm": 0.73828125, "learning_rate": 6.116526735432129e-07, "loss": 3.9681, "step": 26243 }, { "epoch": 8.742233697010077, "grad_norm": 0.76953125, "learning_rate": 6.113325437743592e-07, "loss": 3.9617, "step": 26244 }, { "epoch": 8.742566836012326, "grad_norm": 0.79296875, "learning_rate": 6.110124942435608e-07, "loss": 3.922, "step": 26245 }, { "epoch": 8.742899975014575, "grad_norm": 0.76953125, "learning_rate": 6.106925249545447e-07, "loss": 3.9832, "step": 26246 }, { "epoch": 8.743233114016824, "grad_norm": 0.74609375, "learning_rate": 6.103726359110374e-07, "loss": 3.9696, "step": 26247 }, { "epoch": 8.743566253019072, "grad_norm": 0.75390625, "learning_rate": 6.10052827116768e-07, "loss": 3.9994, "step": 26248 }, { "epoch": 8.743899392021321, "grad_norm": 0.75, "learning_rate": 6.097330985754587e-07, "loss": 4.0679, "step": 26249 }, { "epoch": 8.74423253102357, "grad_norm": 0.80859375, "learning_rate": 6.094134502908349e-07, "loss": 4.0332, "step": 26250 }, { "epoch": 8.744565670025818, "grad_norm": 0.77734375, "learning_rate": 6.090938822666195e-07, "loss": 4.0218, "step": 26251 }, { "epoch": 8.744898809028067, "grad_norm": 0.76953125, "learning_rate": 6.087743945065344e-07, "loss": 3.9748, "step": 26252 }, { "epoch": 8.745231948030316, "grad_norm": 0.78515625, "learning_rate": 6.084549870143017e-07, "loss": 4.0544, "step": 26253 }, { "epoch": 8.745565087032563, "grad_norm": 0.7421875, "learning_rate": 6.081356597936391e-07, "loss": 3.9939, "step": 26254 }, { "epoch": 8.745898226034813, "grad_norm": 0.71484375, "learning_rate": 6.078164128482705e-07, "loss": 4.0013, "step": 26255 }, { "epoch": 8.746231365037062, "grad_norm": 0.78125, "learning_rate": 6.074972461819119e-07, "loss": 4.0061, "step": 26256 }, { "epoch": 8.746564504039311, "grad_norm": 0.73828125, "learning_rate": 6.071781597982815e-07, "loss": 4.051, "step": 26257 }, { "epoch": 8.746897643041558, "grad_norm": 0.7734375, "learning_rate": 6.068591537010943e-07, "loss": 3.9272, "step": 26258 }, { "epoch": 8.747230782043808, "grad_norm": 0.76953125, "learning_rate": 6.065402278940682e-07, "loss": 3.9714, "step": 26259 }, { "epoch": 8.747563921046057, "grad_norm": 0.77734375, "learning_rate": 6.062213823809196e-07, "loss": 3.985, "step": 26260 }, { "epoch": 8.747897060048306, "grad_norm": 0.77734375, "learning_rate": 6.059026171653561e-07, "loss": 3.9952, "step": 26261 }, { "epoch": 8.748230199050553, "grad_norm": 0.73828125, "learning_rate": 6.055839322510967e-07, "loss": 3.9497, "step": 26262 }, { "epoch": 8.748563338052803, "grad_norm": 0.76953125, "learning_rate": 6.052653276418516e-07, "loss": 4.0399, "step": 26263 }, { "epoch": 8.748896477055052, "grad_norm": 0.75390625, "learning_rate": 6.049468033413302e-07, "loss": 4.0091, "step": 26264 }, { "epoch": 8.749229616057299, "grad_norm": 0.7421875, "learning_rate": 6.046283593532431e-07, "loss": 3.9195, "step": 26265 }, { "epoch": 8.749562755059548, "grad_norm": 0.75390625, "learning_rate": 6.043099956813023e-07, "loss": 4.0688, "step": 26266 }, { "epoch": 8.749895894061797, "grad_norm": 0.73828125, "learning_rate": 6.03991712329213e-07, "loss": 3.9893, "step": 26267 }, { "epoch": 8.750229033064047, "grad_norm": 0.73046875, "learning_rate": 6.03673509300684e-07, "loss": 4.0285, "step": 26268 }, { "epoch": 8.750562172066294, "grad_norm": 0.7421875, "learning_rate": 6.033553865994216e-07, "loss": 3.9575, "step": 26269 }, { "epoch": 8.750895311068543, "grad_norm": 0.734375, "learning_rate": 6.030373442291301e-07, "loss": 3.9914, "step": 26270 }, { "epoch": 8.751228450070792, "grad_norm": 0.75390625, "learning_rate": 6.027193821935151e-07, "loss": 3.9721, "step": 26271 }, { "epoch": 8.751561589073042, "grad_norm": 0.7578125, "learning_rate": 6.024015004962785e-07, "loss": 4.006, "step": 26272 }, { "epoch": 8.751894728075289, "grad_norm": 0.7578125, "learning_rate": 6.020836991411257e-07, "loss": 4.0134, "step": 26273 }, { "epoch": 8.752227867077538, "grad_norm": 0.796875, "learning_rate": 6.017659781317563e-07, "loss": 4.0069, "step": 26274 }, { "epoch": 8.752561006079787, "grad_norm": 0.7890625, "learning_rate": 6.014483374718724e-07, "loss": 3.9847, "step": 26275 }, { "epoch": 8.752894145082035, "grad_norm": 0.7734375, "learning_rate": 6.011307771651716e-07, "loss": 4.0386, "step": 26276 }, { "epoch": 8.753227284084284, "grad_norm": 0.78515625, "learning_rate": 6.008132972153555e-07, "loss": 3.9095, "step": 26277 }, { "epoch": 8.753560423086533, "grad_norm": 0.734375, "learning_rate": 6.004958976261224e-07, "loss": 3.9631, "step": 26278 }, { "epoch": 8.753893562088782, "grad_norm": 0.80078125, "learning_rate": 6.001785784011654e-07, "loss": 3.9879, "step": 26279 }, { "epoch": 8.75422670109103, "grad_norm": 0.7265625, "learning_rate": 5.998613395441832e-07, "loss": 3.953, "step": 26280 }, { "epoch": 8.754559840093279, "grad_norm": 0.79296875, "learning_rate": 5.99544181058872e-07, "loss": 3.9698, "step": 26281 }, { "epoch": 8.754892979095528, "grad_norm": 0.796875, "learning_rate": 5.992271029489238e-07, "loss": 4.004, "step": 26282 }, { "epoch": 8.755226118097776, "grad_norm": 0.73046875, "learning_rate": 5.989101052180332e-07, "loss": 4.0012, "step": 26283 }, { "epoch": 8.755559257100025, "grad_norm": 0.75, "learning_rate": 5.985931878698913e-07, "loss": 4.021, "step": 26284 }, { "epoch": 8.755892396102274, "grad_norm": 0.7421875, "learning_rate": 5.982763509081912e-07, "loss": 4.0655, "step": 26285 }, { "epoch": 8.756225535104523, "grad_norm": 0.7578125, "learning_rate": 5.979595943366223e-07, "loss": 3.9526, "step": 26286 }, { "epoch": 8.75655867410677, "grad_norm": 0.74609375, "learning_rate": 5.976429181588741e-07, "loss": 3.9896, "step": 26287 }, { "epoch": 8.75689181310902, "grad_norm": 0.72265625, "learning_rate": 5.973263223786363e-07, "loss": 4.0381, "step": 26288 }, { "epoch": 8.757224952111269, "grad_norm": 0.7890625, "learning_rate": 5.97009806999595e-07, "loss": 4.0283, "step": 26289 }, { "epoch": 8.757558091113516, "grad_norm": 0.7890625, "learning_rate": 5.96693372025438e-07, "loss": 3.9214, "step": 26290 }, { "epoch": 8.757891230115765, "grad_norm": 0.73046875, "learning_rate": 5.963770174598493e-07, "loss": 3.9576, "step": 26291 }, { "epoch": 8.758224369118015, "grad_norm": 0.7421875, "learning_rate": 5.960607433065157e-07, "loss": 3.9826, "step": 26292 }, { "epoch": 8.758557508120264, "grad_norm": 0.76953125, "learning_rate": 5.95744549569121e-07, "loss": 4.0393, "step": 26293 }, { "epoch": 8.758890647122511, "grad_norm": 0.76953125, "learning_rate": 5.954284362513482e-07, "loss": 4.0543, "step": 26294 }, { "epoch": 8.75922378612476, "grad_norm": 0.7578125, "learning_rate": 5.951124033568774e-07, "loss": 3.968, "step": 26295 }, { "epoch": 8.75955692512701, "grad_norm": 0.84375, "learning_rate": 5.947964508893935e-07, "loss": 3.9889, "step": 26296 }, { "epoch": 8.759890064129259, "grad_norm": 0.796875, "learning_rate": 5.944805788525726e-07, "loss": 3.889, "step": 26297 }, { "epoch": 8.760223203131506, "grad_norm": 0.80078125, "learning_rate": 5.94164787250095e-07, "loss": 3.9959, "step": 26298 }, { "epoch": 8.760556342133755, "grad_norm": 0.7890625, "learning_rate": 5.938490760856402e-07, "loss": 3.9218, "step": 26299 }, { "epoch": 8.760889481136005, "grad_norm": 0.80078125, "learning_rate": 5.935334453628854e-07, "loss": 4.0161, "step": 26300 }, { "epoch": 8.761222620138252, "grad_norm": 0.8046875, "learning_rate": 5.932178950855069e-07, "loss": 3.9684, "step": 26301 }, { "epoch": 8.761555759140501, "grad_norm": 0.74609375, "learning_rate": 5.929024252571782e-07, "loss": 3.9761, "step": 26302 }, { "epoch": 8.76188889814275, "grad_norm": 0.734375, "learning_rate": 5.925870358815774e-07, "loss": 3.9124, "step": 26303 }, { "epoch": 8.762222037145, "grad_norm": 0.77734375, "learning_rate": 5.922717269623756e-07, "loss": 3.9991, "step": 26304 }, { "epoch": 8.762555176147247, "grad_norm": 0.75390625, "learning_rate": 5.919564985032457e-07, "loss": 3.9712, "step": 26305 }, { "epoch": 8.762888315149496, "grad_norm": 0.7578125, "learning_rate": 5.916413505078608e-07, "loss": 4.0275, "step": 26306 }, { "epoch": 8.763221454151745, "grad_norm": 0.78125, "learning_rate": 5.913262829798893e-07, "loss": 4.0473, "step": 26307 }, { "epoch": 8.763554593153994, "grad_norm": 0.76171875, "learning_rate": 5.910112959230035e-07, "loss": 4.0503, "step": 26308 }, { "epoch": 8.763887732156242, "grad_norm": 0.7734375, "learning_rate": 5.906963893408696e-07, "loss": 3.9817, "step": 26309 }, { "epoch": 8.764220871158491, "grad_norm": 0.75, "learning_rate": 5.903815632371587e-07, "loss": 4.0026, "step": 26310 }, { "epoch": 8.76455401016074, "grad_norm": 0.8046875, "learning_rate": 5.900668176155363e-07, "loss": 3.9602, "step": 26311 }, { "epoch": 8.764887149162988, "grad_norm": 0.76953125, "learning_rate": 5.897521524796686e-07, "loss": 4.0152, "step": 26312 }, { "epoch": 8.765220288165237, "grad_norm": 0.78515625, "learning_rate": 5.894375678332212e-07, "loss": 3.9441, "step": 26313 }, { "epoch": 8.765553427167486, "grad_norm": 0.7421875, "learning_rate": 5.891230636798576e-07, "loss": 4.0436, "step": 26314 }, { "epoch": 8.765886566169733, "grad_norm": 0.73046875, "learning_rate": 5.888086400232407e-07, "loss": 3.9534, "step": 26315 }, { "epoch": 8.766219705171983, "grad_norm": 0.75390625, "learning_rate": 5.884942968670326e-07, "loss": 3.9887, "step": 26316 }, { "epoch": 8.766552844174232, "grad_norm": 0.82421875, "learning_rate": 5.881800342148972e-07, "loss": 3.993, "step": 26317 }, { "epoch": 8.766885983176481, "grad_norm": 0.75, "learning_rate": 5.878658520704938e-07, "loss": 3.9822, "step": 26318 }, { "epoch": 8.767219122178728, "grad_norm": 0.75390625, "learning_rate": 5.875517504374805e-07, "loss": 3.9705, "step": 26319 }, { "epoch": 8.767552261180978, "grad_norm": 0.765625, "learning_rate": 5.872377293195161e-07, "loss": 3.9798, "step": 26320 }, { "epoch": 8.767885400183227, "grad_norm": 0.7421875, "learning_rate": 5.869237887202609e-07, "loss": 4.0199, "step": 26321 }, { "epoch": 8.768218539185476, "grad_norm": 0.77734375, "learning_rate": 5.86609928643371e-07, "loss": 3.9639, "step": 26322 }, { "epoch": 8.768551678187723, "grad_norm": 0.7578125, "learning_rate": 5.862961490924978e-07, "loss": 3.9766, "step": 26323 }, { "epoch": 8.768884817189972, "grad_norm": 0.7421875, "learning_rate": 5.859824500713018e-07, "loss": 3.9524, "step": 26324 }, { "epoch": 8.769217956192222, "grad_norm": 0.77734375, "learning_rate": 5.856688315834341e-07, "loss": 4.0018, "step": 26325 }, { "epoch": 8.769551095194469, "grad_norm": 0.7890625, "learning_rate": 5.853552936325477e-07, "loss": 3.995, "step": 26326 }, { "epoch": 8.769884234196718, "grad_norm": 0.796875, "learning_rate": 5.850418362222937e-07, "loss": 3.9405, "step": 26327 }, { "epoch": 8.770217373198967, "grad_norm": 0.74609375, "learning_rate": 5.84728459356326e-07, "loss": 4.0036, "step": 26328 }, { "epoch": 8.770550512201217, "grad_norm": 0.79296875, "learning_rate": 5.844151630382932e-07, "loss": 4.0301, "step": 26329 }, { "epoch": 8.770883651203464, "grad_norm": 0.796875, "learning_rate": 5.841019472718451e-07, "loss": 3.8771, "step": 26330 }, { "epoch": 8.771216790205713, "grad_norm": 0.76171875, "learning_rate": 5.837888120606294e-07, "loss": 4.0127, "step": 26331 }, { "epoch": 8.771549929207962, "grad_norm": 0.74609375, "learning_rate": 5.834757574082933e-07, "loss": 4.028, "step": 26332 }, { "epoch": 8.771883068210212, "grad_norm": 0.734375, "learning_rate": 5.83162783318483e-07, "loss": 3.9984, "step": 26333 }, { "epoch": 8.772216207212459, "grad_norm": 0.796875, "learning_rate": 5.828498897948456e-07, "loss": 3.982, "step": 26334 }, { "epoch": 8.772549346214708, "grad_norm": 0.78125, "learning_rate": 5.825370768410224e-07, "loss": 3.9161, "step": 26335 }, { "epoch": 8.772882485216957, "grad_norm": 0.76953125, "learning_rate": 5.822243444606604e-07, "loss": 4.0571, "step": 26336 }, { "epoch": 8.773215624219205, "grad_norm": 0.78125, "learning_rate": 5.819116926574008e-07, "loss": 3.9135, "step": 26337 }, { "epoch": 8.773548763221454, "grad_norm": 0.7421875, "learning_rate": 5.815991214348859e-07, "loss": 3.9411, "step": 26338 }, { "epoch": 8.773881902223703, "grad_norm": 0.75390625, "learning_rate": 5.812866307967543e-07, "loss": 3.9505, "step": 26339 }, { "epoch": 8.774215041225952, "grad_norm": 0.79296875, "learning_rate": 5.809742207466498e-07, "loss": 4.0343, "step": 26340 }, { "epoch": 8.7745481802282, "grad_norm": 0.78125, "learning_rate": 5.806618912882086e-07, "loss": 3.9506, "step": 26341 }, { "epoch": 8.774881319230449, "grad_norm": 0.75, "learning_rate": 5.80349642425067e-07, "loss": 3.9266, "step": 26342 }, { "epoch": 8.775214458232698, "grad_norm": 0.76171875, "learning_rate": 5.800374741608663e-07, "loss": 4.0198, "step": 26343 }, { "epoch": 8.775547597234945, "grad_norm": 0.8046875, "learning_rate": 5.797253864992394e-07, "loss": 3.9388, "step": 26344 }, { "epoch": 8.775880736237195, "grad_norm": 0.7734375, "learning_rate": 5.794133794438233e-07, "loss": 3.9847, "step": 26345 }, { "epoch": 8.776213875239444, "grad_norm": 0.76953125, "learning_rate": 5.791014529982494e-07, "loss": 3.9808, "step": 26346 }, { "epoch": 8.776547014241693, "grad_norm": 0.765625, "learning_rate": 5.787896071661547e-07, "loss": 3.9925, "step": 26347 }, { "epoch": 8.77688015324394, "grad_norm": 0.8203125, "learning_rate": 5.784778419511696e-07, "loss": 3.9534, "step": 26348 }, { "epoch": 8.77721329224619, "grad_norm": 0.75, "learning_rate": 5.781661573569263e-07, "loss": 4.0163, "step": 26349 }, { "epoch": 8.777546431248439, "grad_norm": 0.765625, "learning_rate": 5.778545533870536e-07, "loss": 3.9746, "step": 26350 }, { "epoch": 8.777879570250686, "grad_norm": 0.79296875, "learning_rate": 5.775430300451825e-07, "loss": 3.9379, "step": 26351 }, { "epoch": 8.778212709252935, "grad_norm": 0.80078125, "learning_rate": 5.772315873349413e-07, "loss": 4.0122, "step": 26352 }, { "epoch": 8.778545848255185, "grad_norm": 0.76171875, "learning_rate": 5.769202252599551e-07, "loss": 4.0068, "step": 26353 }, { "epoch": 8.778878987257434, "grad_norm": 0.7890625, "learning_rate": 5.766089438238553e-07, "loss": 3.9969, "step": 26354 }, { "epoch": 8.779212126259681, "grad_norm": 0.74609375, "learning_rate": 5.762977430302649e-07, "loss": 3.9299, "step": 26355 }, { "epoch": 8.77954526526193, "grad_norm": 0.78515625, "learning_rate": 5.759866228828092e-07, "loss": 3.9395, "step": 26356 }, { "epoch": 8.77987840426418, "grad_norm": 0.765625, "learning_rate": 5.756755833851104e-07, "loss": 3.9521, "step": 26357 }, { "epoch": 8.780211543266429, "grad_norm": 0.765625, "learning_rate": 5.753646245407956e-07, "loss": 4.004, "step": 26358 }, { "epoch": 8.780544682268676, "grad_norm": 0.75, "learning_rate": 5.750537463534828e-07, "loss": 3.9909, "step": 26359 }, { "epoch": 8.780877821270925, "grad_norm": 0.77734375, "learning_rate": 5.747429488267939e-07, "loss": 4.0551, "step": 26360 }, { "epoch": 8.781210960273174, "grad_norm": 0.7734375, "learning_rate": 5.744322319643503e-07, "loss": 3.9697, "step": 26361 }, { "epoch": 8.781544099275422, "grad_norm": 0.79296875, "learning_rate": 5.741215957697699e-07, "loss": 3.9039, "step": 26362 }, { "epoch": 8.781877238277671, "grad_norm": 0.8125, "learning_rate": 5.738110402466723e-07, "loss": 3.9238, "step": 26363 }, { "epoch": 8.78221037727992, "grad_norm": 0.78125, "learning_rate": 5.735005653986722e-07, "loss": 3.987, "step": 26364 }, { "epoch": 8.78254351628217, "grad_norm": 0.79296875, "learning_rate": 5.731901712293891e-07, "loss": 4.0316, "step": 26365 }, { "epoch": 8.782876655284417, "grad_norm": 0.765625, "learning_rate": 5.728798577424377e-07, "loss": 4.0317, "step": 26366 }, { "epoch": 8.783209794286666, "grad_norm": 0.76171875, "learning_rate": 5.725696249414308e-07, "loss": 4.0069, "step": 26367 }, { "epoch": 8.783542933288915, "grad_norm": 0.76171875, "learning_rate": 5.72259472829984e-07, "loss": 4.0093, "step": 26368 }, { "epoch": 8.783876072291164, "grad_norm": 0.7578125, "learning_rate": 5.719494014117086e-07, "loss": 3.973, "step": 26369 }, { "epoch": 8.784209211293412, "grad_norm": 0.7734375, "learning_rate": 5.716394106902157e-07, "loss": 3.8987, "step": 26370 }, { "epoch": 8.784542350295661, "grad_norm": 0.796875, "learning_rate": 5.713295006691166e-07, "loss": 3.9204, "step": 26371 }, { "epoch": 8.78487548929791, "grad_norm": 0.75390625, "learning_rate": 5.710196713520219e-07, "loss": 3.9713, "step": 26372 }, { "epoch": 8.785208628300158, "grad_norm": 0.73828125, "learning_rate": 5.707099227425402e-07, "loss": 4.0496, "step": 26373 }, { "epoch": 8.785541767302407, "grad_norm": 0.7578125, "learning_rate": 5.704002548442796e-07, "loss": 4.0272, "step": 26374 }, { "epoch": 8.785874906304656, "grad_norm": 0.78125, "learning_rate": 5.700906676608447e-07, "loss": 3.973, "step": 26375 }, { "epoch": 8.786208045306905, "grad_norm": 0.75, "learning_rate": 5.697811611958459e-07, "loss": 3.9695, "step": 26376 }, { "epoch": 8.786541184309153, "grad_norm": 0.78515625, "learning_rate": 5.694717354528845e-07, "loss": 3.9093, "step": 26377 }, { "epoch": 8.786874323311402, "grad_norm": 0.7890625, "learning_rate": 5.691623904355659e-07, "loss": 3.9498, "step": 26378 }, { "epoch": 8.787207462313651, "grad_norm": 0.78515625, "learning_rate": 5.688531261474922e-07, "loss": 3.9688, "step": 26379 }, { "epoch": 8.787540601315898, "grad_norm": 0.75, "learning_rate": 5.685439425922681e-07, "loss": 4.0748, "step": 26380 }, { "epoch": 8.787873740318147, "grad_norm": 0.75, "learning_rate": 5.682348397734932e-07, "loss": 3.9621, "step": 26381 }, { "epoch": 8.788206879320397, "grad_norm": 0.8203125, "learning_rate": 5.67925817694768e-07, "loss": 4.0716, "step": 26382 }, { "epoch": 8.788540018322646, "grad_norm": 0.73828125, "learning_rate": 5.676168763596912e-07, "loss": 3.9855, "step": 26383 }, { "epoch": 8.788873157324893, "grad_norm": 0.80078125, "learning_rate": 5.673080157718641e-07, "loss": 3.956, "step": 26384 }, { "epoch": 8.789206296327142, "grad_norm": 0.8046875, "learning_rate": 5.66999235934883e-07, "loss": 4.0237, "step": 26385 }, { "epoch": 8.789539435329392, "grad_norm": 0.76171875, "learning_rate": 5.666905368523417e-07, "loss": 3.9884, "step": 26386 }, { "epoch": 8.789872574331639, "grad_norm": 0.77734375, "learning_rate": 5.66381918527839e-07, "loss": 4.0273, "step": 26387 }, { "epoch": 8.790205713333888, "grad_norm": 0.765625, "learning_rate": 5.660733809649695e-07, "loss": 3.9836, "step": 26388 }, { "epoch": 8.790538852336137, "grad_norm": 0.7421875, "learning_rate": 5.657649241673254e-07, "loss": 4.0077, "step": 26389 }, { "epoch": 8.790871991338387, "grad_norm": 0.74609375, "learning_rate": 5.654565481384996e-07, "loss": 3.9518, "step": 26390 }, { "epoch": 8.791205130340634, "grad_norm": 0.796875, "learning_rate": 5.651482528820858e-07, "loss": 4.0108, "step": 26391 }, { "epoch": 8.791538269342883, "grad_norm": 0.73828125, "learning_rate": 5.648400384016747e-07, "loss": 3.9964, "step": 26392 }, { "epoch": 8.791871408345132, "grad_norm": 0.76171875, "learning_rate": 5.645319047008548e-07, "loss": 4.0626, "step": 26393 }, { "epoch": 8.792204547347382, "grad_norm": 0.76953125, "learning_rate": 5.642238517832168e-07, "loss": 3.9753, "step": 26394 }, { "epoch": 8.792537686349629, "grad_norm": 0.7578125, "learning_rate": 5.639158796523477e-07, "loss": 4.0046, "step": 26395 }, { "epoch": 8.792870825351878, "grad_norm": 0.72265625, "learning_rate": 5.636079883118347e-07, "loss": 3.9869, "step": 26396 }, { "epoch": 8.793203964354127, "grad_norm": 0.80078125, "learning_rate": 5.633001777652632e-07, "loss": 3.9152, "step": 26397 }, { "epoch": 8.793537103356375, "grad_norm": 0.75, "learning_rate": 5.629924480162213e-07, "loss": 3.9641, "step": 26398 }, { "epoch": 8.793870242358624, "grad_norm": 0.7421875, "learning_rate": 5.626847990682918e-07, "loss": 4.0809, "step": 26399 }, { "epoch": 8.794203381360873, "grad_norm": 0.76171875, "learning_rate": 5.623772309250585e-07, "loss": 4.0323, "step": 26400 }, { "epoch": 8.794536520363122, "grad_norm": 0.72265625, "learning_rate": 5.62069743590102e-07, "loss": 3.975, "step": 26401 }, { "epoch": 8.79486965936537, "grad_norm": 0.71484375, "learning_rate": 5.617623370670069e-07, "loss": 3.9846, "step": 26402 }, { "epoch": 8.795202798367619, "grad_norm": 0.796875, "learning_rate": 5.614550113593536e-07, "loss": 4.0035, "step": 26403 }, { "epoch": 8.795535937369868, "grad_norm": 0.7734375, "learning_rate": 5.611477664707177e-07, "loss": 3.975, "step": 26404 }, { "epoch": 8.795869076372115, "grad_norm": 0.73828125, "learning_rate": 5.608406024046828e-07, "loss": 4.0423, "step": 26405 }, { "epoch": 8.796202215374365, "grad_norm": 0.78125, "learning_rate": 5.605335191648236e-07, "loss": 4.0487, "step": 26406 }, { "epoch": 8.796535354376614, "grad_norm": 0.80078125, "learning_rate": 5.602265167547191e-07, "loss": 3.9602, "step": 26407 }, { "epoch": 8.796868493378863, "grad_norm": 0.78125, "learning_rate": 5.599195951779421e-07, "loss": 4.0646, "step": 26408 }, { "epoch": 8.79720163238111, "grad_norm": 0.7890625, "learning_rate": 5.596127544380714e-07, "loss": 3.9669, "step": 26409 }, { "epoch": 8.79753477138336, "grad_norm": 0.8046875, "learning_rate": 5.593059945386791e-07, "loss": 4.0053, "step": 26410 }, { "epoch": 8.797867910385609, "grad_norm": 0.77734375, "learning_rate": 5.589993154833384e-07, "loss": 3.9182, "step": 26411 }, { "epoch": 8.798201049387856, "grad_norm": 0.78515625, "learning_rate": 5.586927172756212e-07, "loss": 3.997, "step": 26412 }, { "epoch": 8.798534188390105, "grad_norm": 0.7578125, "learning_rate": 5.583861999190998e-07, "loss": 4.0009, "step": 26413 }, { "epoch": 8.798867327392355, "grad_norm": 0.796875, "learning_rate": 5.580797634173429e-07, "loss": 3.9457, "step": 26414 }, { "epoch": 8.799200466394604, "grad_norm": 0.77734375, "learning_rate": 5.577734077739194e-07, "loss": 4.0348, "step": 26415 }, { "epoch": 8.799533605396851, "grad_norm": 0.7890625, "learning_rate": 5.574671329924006e-07, "loss": 3.987, "step": 26416 }, { "epoch": 8.7998667443991, "grad_norm": 0.74609375, "learning_rate": 5.571609390763519e-07, "loss": 3.9721, "step": 26417 }, { "epoch": 8.80019988340135, "grad_norm": 0.76953125, "learning_rate": 5.568548260293396e-07, "loss": 3.9981, "step": 26418 }, { "epoch": 8.800533022403599, "grad_norm": 0.79296875, "learning_rate": 5.565487938549294e-07, "loss": 4.047, "step": 26419 }, { "epoch": 8.800866161405846, "grad_norm": 0.74609375, "learning_rate": 5.562428425566874e-07, "loss": 3.9886, "step": 26420 }, { "epoch": 8.801199300408095, "grad_norm": 0.75390625, "learning_rate": 5.559369721381766e-07, "loss": 4.018, "step": 26421 }, { "epoch": 8.801532439410344, "grad_norm": 0.734375, "learning_rate": 5.556311826029583e-07, "loss": 4.0865, "step": 26422 }, { "epoch": 8.801865578412592, "grad_norm": 0.79296875, "learning_rate": 5.553254739545949e-07, "loss": 3.9741, "step": 26423 }, { "epoch": 8.802198717414841, "grad_norm": 0.75390625, "learning_rate": 5.550198461966475e-07, "loss": 3.9713, "step": 26424 }, { "epoch": 8.80253185641709, "grad_norm": 0.7734375, "learning_rate": 5.547142993326773e-07, "loss": 4.0414, "step": 26425 }, { "epoch": 8.80286499541934, "grad_norm": 0.74609375, "learning_rate": 5.544088333662408e-07, "loss": 4.0238, "step": 26426 }, { "epoch": 8.803198134421587, "grad_norm": 0.75390625, "learning_rate": 5.541034483008969e-07, "loss": 3.9901, "step": 26427 }, { "epoch": 8.803531273423836, "grad_norm": 0.78515625, "learning_rate": 5.537981441402043e-07, "loss": 4.0598, "step": 26428 }, { "epoch": 8.803864412426085, "grad_norm": 0.7421875, "learning_rate": 5.534929208877177e-07, "loss": 4.0304, "step": 26429 }, { "epoch": 8.804197551428334, "grad_norm": 0.78515625, "learning_rate": 5.531877785469925e-07, "loss": 4.0113, "step": 26430 }, { "epoch": 8.804530690430582, "grad_norm": 0.74609375, "learning_rate": 5.528827171215825e-07, "loss": 3.9464, "step": 26431 }, { "epoch": 8.804863829432831, "grad_norm": 0.73046875, "learning_rate": 5.525777366150417e-07, "loss": 3.9134, "step": 26432 }, { "epoch": 8.80519696843508, "grad_norm": 0.8203125, "learning_rate": 5.522728370309221e-07, "loss": 3.9492, "step": 26433 }, { "epoch": 8.805530107437328, "grad_norm": 0.76953125, "learning_rate": 5.519680183727743e-07, "loss": 3.9605, "step": 26434 }, { "epoch": 8.805863246439577, "grad_norm": 0.7734375, "learning_rate": 5.516632806441505e-07, "loss": 3.9868, "step": 26435 }, { "epoch": 8.806196385441826, "grad_norm": 0.7578125, "learning_rate": 5.513586238486001e-07, "loss": 3.9323, "step": 26436 }, { "epoch": 8.806529524444075, "grad_norm": 0.7578125, "learning_rate": 5.510540479896706e-07, "loss": 4.0578, "step": 26437 }, { "epoch": 8.806862663446323, "grad_norm": 0.80859375, "learning_rate": 5.50749553070908e-07, "loss": 3.9601, "step": 26438 }, { "epoch": 8.807195802448572, "grad_norm": 0.73046875, "learning_rate": 5.504451390958648e-07, "loss": 3.9766, "step": 26439 }, { "epoch": 8.80752894145082, "grad_norm": 0.75390625, "learning_rate": 5.501408060680813e-07, "loss": 4.0049, "step": 26440 }, { "epoch": 8.807862080453068, "grad_norm": 0.8046875, "learning_rate": 5.498365539911021e-07, "loss": 3.9217, "step": 26441 }, { "epoch": 8.808195219455317, "grad_norm": 0.74609375, "learning_rate": 5.495323828684753e-07, "loss": 4.0027, "step": 26442 }, { "epoch": 8.808528358457567, "grad_norm": 0.734375, "learning_rate": 5.492282927037406e-07, "loss": 3.9428, "step": 26443 }, { "epoch": 8.808861497459816, "grad_norm": 0.80078125, "learning_rate": 5.489242835004418e-07, "loss": 3.9027, "step": 26444 }, { "epoch": 8.809194636462063, "grad_norm": 0.76171875, "learning_rate": 5.486203552621177e-07, "loss": 3.9909, "step": 26445 }, { "epoch": 8.809527775464312, "grad_norm": 0.796875, "learning_rate": 5.483165079923105e-07, "loss": 3.9107, "step": 26446 }, { "epoch": 8.809860914466562, "grad_norm": 0.734375, "learning_rate": 5.48012741694559e-07, "loss": 4.0095, "step": 26447 }, { "epoch": 8.810194053468809, "grad_norm": 0.69140625, "learning_rate": 5.477090563724013e-07, "loss": 4.0214, "step": 26448 }, { "epoch": 8.810527192471058, "grad_norm": 0.75390625, "learning_rate": 5.474054520293745e-07, "loss": 3.9748, "step": 26449 }, { "epoch": 8.810860331473307, "grad_norm": 0.7421875, "learning_rate": 5.471019286690149e-07, "loss": 4.0183, "step": 26450 }, { "epoch": 8.811193470475557, "grad_norm": 0.75, "learning_rate": 5.467984862948572e-07, "loss": 3.9912, "step": 26451 }, { "epoch": 8.811526609477804, "grad_norm": 0.77734375, "learning_rate": 5.464951249104361e-07, "loss": 3.9655, "step": 26452 }, { "epoch": 8.811859748480053, "grad_norm": 0.7734375, "learning_rate": 5.461918445192863e-07, "loss": 4.0117, "step": 26453 }, { "epoch": 8.812192887482302, "grad_norm": 0.78515625, "learning_rate": 5.45888645124939e-07, "loss": 3.961, "step": 26454 }, { "epoch": 8.812526026484552, "grad_norm": 0.75, "learning_rate": 5.455855267309273e-07, "loss": 3.9612, "step": 26455 }, { "epoch": 8.812859165486799, "grad_norm": 0.796875, "learning_rate": 5.452824893407792e-07, "loss": 3.921, "step": 26456 }, { "epoch": 8.813192304489048, "grad_norm": 0.78515625, "learning_rate": 5.449795329580296e-07, "loss": 3.9444, "step": 26457 }, { "epoch": 8.813525443491297, "grad_norm": 0.74609375, "learning_rate": 5.446766575862019e-07, "loss": 3.9892, "step": 26458 }, { "epoch": 8.813858582493545, "grad_norm": 0.77734375, "learning_rate": 5.443738632288244e-07, "loss": 3.9333, "step": 26459 }, { "epoch": 8.814191721495794, "grad_norm": 0.75, "learning_rate": 5.440711498894276e-07, "loss": 4.0139, "step": 26460 }, { "epoch": 8.814524860498043, "grad_norm": 0.75, "learning_rate": 5.437685175715354e-07, "loss": 4.0413, "step": 26461 }, { "epoch": 8.814857999500292, "grad_norm": 0.76953125, "learning_rate": 5.434659662786723e-07, "loss": 3.9799, "step": 26462 }, { "epoch": 8.81519113850254, "grad_norm": 0.78515625, "learning_rate": 5.431634960143614e-07, "loss": 3.9755, "step": 26463 }, { "epoch": 8.815524277504789, "grad_norm": 0.7265625, "learning_rate": 5.42861106782129e-07, "loss": 3.9879, "step": 26464 }, { "epoch": 8.815857416507038, "grad_norm": 0.7421875, "learning_rate": 5.425587985854949e-07, "loss": 3.9625, "step": 26465 }, { "epoch": 8.816190555509285, "grad_norm": 0.7265625, "learning_rate": 5.422565714279829e-07, "loss": 3.9878, "step": 26466 }, { "epoch": 8.816523694511535, "grad_norm": 0.78515625, "learning_rate": 5.419544253131092e-07, "loss": 3.9696, "step": 26467 }, { "epoch": 8.816856833513784, "grad_norm": 0.7265625, "learning_rate": 5.416523602443962e-07, "loss": 3.9864, "step": 26468 }, { "epoch": 8.817189972516033, "grad_norm": 0.77734375, "learning_rate": 5.413503762253611e-07, "loss": 3.9808, "step": 26469 }, { "epoch": 8.81752311151828, "grad_norm": 0.796875, "learning_rate": 5.410484732595216e-07, "loss": 3.9765, "step": 26470 }, { "epoch": 8.81785625052053, "grad_norm": 0.76953125, "learning_rate": 5.407466513503936e-07, "loss": 4.0393, "step": 26471 }, { "epoch": 8.818189389522779, "grad_norm": 0.71875, "learning_rate": 5.40444910501495e-07, "loss": 4.0367, "step": 26472 }, { "epoch": 8.818522528525026, "grad_norm": 0.76171875, "learning_rate": 5.401432507163379e-07, "loss": 4.0504, "step": 26473 }, { "epoch": 8.818855667527275, "grad_norm": 0.734375, "learning_rate": 5.398416719984378e-07, "loss": 4.0641, "step": 26474 }, { "epoch": 8.819188806529525, "grad_norm": 0.75390625, "learning_rate": 5.395401743513062e-07, "loss": 3.9943, "step": 26475 }, { "epoch": 8.819521945531774, "grad_norm": 0.76953125, "learning_rate": 5.392387577784552e-07, "loss": 3.9607, "step": 26476 }, { "epoch": 8.819855084534021, "grad_norm": 0.76171875, "learning_rate": 5.389374222833954e-07, "loss": 4.0362, "step": 26477 }, { "epoch": 8.82018822353627, "grad_norm": 0.78515625, "learning_rate": 5.386361678696355e-07, "loss": 3.9366, "step": 26478 }, { "epoch": 8.82052136253852, "grad_norm": 0.75390625, "learning_rate": 5.383349945406879e-07, "loss": 3.9568, "step": 26479 }, { "epoch": 8.820854501540769, "grad_norm": 0.74609375, "learning_rate": 5.380339023000588e-07, "loss": 3.9806, "step": 26480 }, { "epoch": 8.821187640543016, "grad_norm": 0.74609375, "learning_rate": 5.377328911512547e-07, "loss": 4.0599, "step": 26481 }, { "epoch": 8.821520779545265, "grad_norm": 0.73828125, "learning_rate": 5.37431961097781e-07, "loss": 3.954, "step": 26482 }, { "epoch": 8.821853918547514, "grad_norm": 0.78125, "learning_rate": 5.37131112143145e-07, "loss": 3.9258, "step": 26483 }, { "epoch": 8.822187057549762, "grad_norm": 0.78125, "learning_rate": 5.36830344290852e-07, "loss": 4.0447, "step": 26484 }, { "epoch": 8.822520196552011, "grad_norm": 0.71484375, "learning_rate": 5.365296575444003e-07, "loss": 4.047, "step": 26485 }, { "epoch": 8.82285333555426, "grad_norm": 0.8046875, "learning_rate": 5.362290519072962e-07, "loss": 3.9285, "step": 26486 }, { "epoch": 8.82318647455651, "grad_norm": 0.7734375, "learning_rate": 5.359285273830402e-07, "loss": 3.8503, "step": 26487 }, { "epoch": 8.823519613558757, "grad_norm": 0.7734375, "learning_rate": 5.356280839751338e-07, "loss": 4.0093, "step": 26488 }, { "epoch": 8.823852752561006, "grad_norm": 0.7421875, "learning_rate": 5.353277216870731e-07, "loss": 3.947, "step": 26489 }, { "epoch": 8.824185891563255, "grad_norm": 0.8046875, "learning_rate": 5.350274405223604e-07, "loss": 3.9252, "step": 26490 }, { "epoch": 8.824519030565504, "grad_norm": 0.8125, "learning_rate": 5.347272404844922e-07, "loss": 3.9874, "step": 26491 }, { "epoch": 8.824852169567752, "grad_norm": 0.76953125, "learning_rate": 5.344271215769647e-07, "loss": 3.8895, "step": 26492 }, { "epoch": 8.825185308570001, "grad_norm": 0.7265625, "learning_rate": 5.341270838032736e-07, "loss": 3.985, "step": 26493 }, { "epoch": 8.82551844757225, "grad_norm": 0.7578125, "learning_rate": 5.338271271669143e-07, "loss": 3.9593, "step": 26494 }, { "epoch": 8.825851586574498, "grad_norm": 0.765625, "learning_rate": 5.335272516713799e-07, "loss": 3.9584, "step": 26495 }, { "epoch": 8.826184725576747, "grad_norm": 0.765625, "learning_rate": 5.332274573201626e-07, "loss": 3.9868, "step": 26496 }, { "epoch": 8.826517864578996, "grad_norm": 0.828125, "learning_rate": 5.329277441167571e-07, "loss": 3.942, "step": 26497 }, { "epoch": 8.826851003581245, "grad_norm": 0.734375, "learning_rate": 5.326281120646523e-07, "loss": 3.9177, "step": 26498 }, { "epoch": 8.827184142583492, "grad_norm": 0.76171875, "learning_rate": 5.323285611673387e-07, "loss": 4.0836, "step": 26499 }, { "epoch": 8.827517281585742, "grad_norm": 0.76171875, "learning_rate": 5.320290914283035e-07, "loss": 4.0414, "step": 26500 }, { "epoch": 8.82785042058799, "grad_norm": 0.77734375, "learning_rate": 5.31729702851039e-07, "loss": 4.0098, "step": 26501 }, { "epoch": 8.828183559590238, "grad_norm": 0.7890625, "learning_rate": 5.314303954390315e-07, "loss": 4.0148, "step": 26502 }, { "epoch": 8.828516698592487, "grad_norm": 0.73828125, "learning_rate": 5.311311691957633e-07, "loss": 4.0632, "step": 26503 }, { "epoch": 8.828849837594737, "grad_norm": 0.75390625, "learning_rate": 5.30832024124723e-07, "loss": 3.9212, "step": 26504 }, { "epoch": 8.829182976596986, "grad_norm": 0.76953125, "learning_rate": 5.305329602293948e-07, "loss": 3.8853, "step": 26505 }, { "epoch": 8.829516115599233, "grad_norm": 0.73828125, "learning_rate": 5.302339775132625e-07, "loss": 4.0022, "step": 26506 }, { "epoch": 8.829849254601482, "grad_norm": 0.765625, "learning_rate": 5.299350759798058e-07, "loss": 3.976, "step": 26507 }, { "epoch": 8.830182393603732, "grad_norm": 0.76171875, "learning_rate": 5.296362556325102e-07, "loss": 3.9831, "step": 26508 }, { "epoch": 8.830515532605979, "grad_norm": 0.72265625, "learning_rate": 5.293375164748538e-07, "loss": 3.9813, "step": 26509 }, { "epoch": 8.830848671608228, "grad_norm": 0.75390625, "learning_rate": 5.29038858510317e-07, "loss": 3.9715, "step": 26510 }, { "epoch": 8.831181810610477, "grad_norm": 0.765625, "learning_rate": 5.28740281742379e-07, "loss": 3.9861, "step": 26511 }, { "epoch": 8.831514949612727, "grad_norm": 0.734375, "learning_rate": 5.284417861745167e-07, "loss": 4.0117, "step": 26512 }, { "epoch": 8.831848088614974, "grad_norm": 0.796875, "learning_rate": 5.281433718102066e-07, "loss": 4.0163, "step": 26513 }, { "epoch": 8.832181227617223, "grad_norm": 0.80078125, "learning_rate": 5.27845038652926e-07, "loss": 3.9789, "step": 26514 }, { "epoch": 8.832514366619472, "grad_norm": 0.72265625, "learning_rate": 5.27546786706147e-07, "loss": 3.9734, "step": 26515 }, { "epoch": 8.832847505621721, "grad_norm": 0.765625, "learning_rate": 5.272486159733478e-07, "loss": 4.0316, "step": 26516 }, { "epoch": 8.833180644623969, "grad_norm": 0.734375, "learning_rate": 5.269505264579988e-07, "loss": 3.9511, "step": 26517 }, { "epoch": 8.833513783626218, "grad_norm": 0.7109375, "learning_rate": 5.266525181635723e-07, "loss": 4.0055, "step": 26518 }, { "epoch": 8.833846922628467, "grad_norm": 0.74609375, "learning_rate": 5.263545910935389e-07, "loss": 3.9873, "step": 26519 }, { "epoch": 8.834180061630715, "grad_norm": 0.76953125, "learning_rate": 5.260567452513723e-07, "loss": 3.9981, "step": 26520 }, { "epoch": 8.834513200632964, "grad_norm": 0.75, "learning_rate": 5.257589806405375e-07, "loss": 4.0543, "step": 26521 }, { "epoch": 8.834846339635213, "grad_norm": 0.78125, "learning_rate": 5.254612972645032e-07, "loss": 4.036, "step": 26522 }, { "epoch": 8.835179478637462, "grad_norm": 0.78125, "learning_rate": 5.251636951267391e-07, "loss": 4.0593, "step": 26523 }, { "epoch": 8.83551261763971, "grad_norm": 0.734375, "learning_rate": 5.24866174230711e-07, "loss": 3.9573, "step": 26524 }, { "epoch": 8.835845756641959, "grad_norm": 0.765625, "learning_rate": 5.245687345798833e-07, "loss": 4.0528, "step": 26525 }, { "epoch": 8.836178895644208, "grad_norm": 0.80078125, "learning_rate": 5.242713761777201e-07, "loss": 3.9681, "step": 26526 }, { "epoch": 8.836512034646455, "grad_norm": 0.7734375, "learning_rate": 5.23974099027687e-07, "loss": 3.9418, "step": 26527 }, { "epoch": 8.836845173648705, "grad_norm": 0.7578125, "learning_rate": 5.236769031332453e-07, "loss": 3.9444, "step": 26528 }, { "epoch": 8.837178312650954, "grad_norm": 0.81640625, "learning_rate": 5.233797884978573e-07, "loss": 3.9872, "step": 26529 }, { "epoch": 8.837511451653203, "grad_norm": 0.734375, "learning_rate": 5.230827551249833e-07, "loss": 3.9297, "step": 26530 }, { "epoch": 8.83784459065545, "grad_norm": 0.79296875, "learning_rate": 5.227858030180835e-07, "loss": 3.9486, "step": 26531 }, { "epoch": 8.8381777296577, "grad_norm": 0.8203125, "learning_rate": 5.224889321806165e-07, "loss": 4.0055, "step": 26532 }, { "epoch": 8.838510868659949, "grad_norm": 0.80078125, "learning_rate": 5.221921426160395e-07, "loss": 3.9838, "step": 26533 }, { "epoch": 8.838844007662196, "grad_norm": 0.828125, "learning_rate": 5.218954343278106e-07, "loss": 4.0596, "step": 26534 }, { "epoch": 8.839177146664445, "grad_norm": 0.75390625, "learning_rate": 5.215988073193847e-07, "loss": 3.9549, "step": 26535 }, { "epoch": 8.839510285666695, "grad_norm": 0.734375, "learning_rate": 5.21302261594219e-07, "loss": 3.9513, "step": 26536 }, { "epoch": 8.839843424668944, "grad_norm": 0.7421875, "learning_rate": 5.210057971557639e-07, "loss": 3.9707, "step": 26537 }, { "epoch": 8.840176563671191, "grad_norm": 0.796875, "learning_rate": 5.207094140074775e-07, "loss": 3.9929, "step": 26538 }, { "epoch": 8.84050970267344, "grad_norm": 0.79296875, "learning_rate": 5.20413112152808e-07, "loss": 3.9299, "step": 26539 }, { "epoch": 8.84084284167569, "grad_norm": 0.70703125, "learning_rate": 5.201168915952067e-07, "loss": 4.037, "step": 26540 }, { "epoch": 8.841175980677939, "grad_norm": 0.80859375, "learning_rate": 5.198207523381269e-07, "loss": 3.9274, "step": 26541 }, { "epoch": 8.841509119680186, "grad_norm": 0.7578125, "learning_rate": 5.195246943850154e-07, "loss": 3.9331, "step": 26542 }, { "epoch": 8.841842258682435, "grad_norm": 0.76171875, "learning_rate": 5.192287177393224e-07, "loss": 3.9656, "step": 26543 }, { "epoch": 8.842175397684684, "grad_norm": 0.73046875, "learning_rate": 5.189328224044931e-07, "loss": 4.004, "step": 26544 }, { "epoch": 8.842508536686932, "grad_norm": 0.765625, "learning_rate": 5.186370083839768e-07, "loss": 4.0414, "step": 26545 }, { "epoch": 8.842841675689181, "grad_norm": 0.75390625, "learning_rate": 5.18341275681217e-07, "loss": 3.9939, "step": 26546 }, { "epoch": 8.84317481469143, "grad_norm": 0.78515625, "learning_rate": 5.180456242996595e-07, "loss": 3.9967, "step": 26547 }, { "epoch": 8.84350795369368, "grad_norm": 0.78515625, "learning_rate": 5.177500542427474e-07, "loss": 4.0254, "step": 26548 }, { "epoch": 8.843841092695927, "grad_norm": 0.75390625, "learning_rate": 5.174545655139246e-07, "loss": 3.9794, "step": 26549 }, { "epoch": 8.844174231698176, "grad_norm": 0.71875, "learning_rate": 5.171591581166307e-07, "loss": 3.9941, "step": 26550 }, { "epoch": 8.844507370700425, "grad_norm": 0.75390625, "learning_rate": 5.168638320543073e-07, "loss": 4.0285, "step": 26551 }, { "epoch": 8.844840509702674, "grad_norm": 0.7421875, "learning_rate": 5.165685873303957e-07, "loss": 3.9099, "step": 26552 }, { "epoch": 8.845173648704922, "grad_norm": 0.75, "learning_rate": 5.162734239483341e-07, "loss": 4.0034, "step": 26553 }, { "epoch": 8.845506787707171, "grad_norm": 0.74609375, "learning_rate": 5.159783419115605e-07, "loss": 4.086, "step": 26554 }, { "epoch": 8.84583992670942, "grad_norm": 0.765625, "learning_rate": 5.156833412235104e-07, "loss": 3.9851, "step": 26555 }, { "epoch": 8.846173065711668, "grad_norm": 0.765625, "learning_rate": 5.153884218876246e-07, "loss": 3.9014, "step": 26556 }, { "epoch": 8.846506204713917, "grad_norm": 0.8359375, "learning_rate": 5.150935839073326e-07, "loss": 4.0086, "step": 26557 }, { "epoch": 8.846839343716166, "grad_norm": 0.78125, "learning_rate": 5.147988272860718e-07, "loss": 4.0167, "step": 26558 }, { "epoch": 8.847172482718415, "grad_norm": 0.7734375, "learning_rate": 5.145041520272736e-07, "loss": 3.8418, "step": 26559 }, { "epoch": 8.847505621720662, "grad_norm": 0.76171875, "learning_rate": 5.14209558134372e-07, "loss": 3.9542, "step": 26560 }, { "epoch": 8.847838760722912, "grad_norm": 0.8203125, "learning_rate": 5.139150456107982e-07, "loss": 3.9787, "step": 26561 }, { "epoch": 8.84817189972516, "grad_norm": 0.7421875, "learning_rate": 5.136206144599831e-07, "loss": 3.9253, "step": 26562 }, { "epoch": 8.848505038727408, "grad_norm": 0.7734375, "learning_rate": 5.133262646853529e-07, "loss": 3.9831, "step": 26563 }, { "epoch": 8.848838177729657, "grad_norm": 0.796875, "learning_rate": 5.130319962903415e-07, "loss": 3.9119, "step": 26564 }, { "epoch": 8.849171316731907, "grad_norm": 0.77734375, "learning_rate": 5.12737809278373e-07, "loss": 4.0186, "step": 26565 }, { "epoch": 8.849504455734156, "grad_norm": 0.76171875, "learning_rate": 5.124437036528729e-07, "loss": 3.9655, "step": 26566 }, { "epoch": 8.849837594736403, "grad_norm": 0.73046875, "learning_rate": 5.121496794172692e-07, "loss": 3.9899, "step": 26567 }, { "epoch": 8.850170733738652, "grad_norm": 0.75, "learning_rate": 5.118557365749868e-07, "loss": 3.9841, "step": 26568 }, { "epoch": 8.850503872740902, "grad_norm": 0.796875, "learning_rate": 5.115618751294487e-07, "loss": 3.9731, "step": 26569 }, { "epoch": 8.850837011743149, "grad_norm": 0.76953125, "learning_rate": 5.112680950840765e-07, "loss": 3.9489, "step": 26570 }, { "epoch": 8.851170150745398, "grad_norm": 0.78515625, "learning_rate": 5.109743964422947e-07, "loss": 3.9513, "step": 26571 }, { "epoch": 8.851503289747647, "grad_norm": 0.76171875, "learning_rate": 5.106807792075233e-07, "loss": 4.0309, "step": 26572 }, { "epoch": 8.851836428749897, "grad_norm": 0.765625, "learning_rate": 5.103872433831818e-07, "loss": 3.9806, "step": 26573 }, { "epoch": 8.852169567752144, "grad_norm": 0.79296875, "learning_rate": 5.100937889726895e-07, "loss": 3.9583, "step": 26574 }, { "epoch": 8.852502706754393, "grad_norm": 0.8046875, "learning_rate": 5.098004159794642e-07, "loss": 3.9203, "step": 26575 }, { "epoch": 8.852835845756642, "grad_norm": 0.77734375, "learning_rate": 5.095071244069233e-07, "loss": 3.9522, "step": 26576 }, { "epoch": 8.853168984758891, "grad_norm": 0.76171875, "learning_rate": 5.092139142584814e-07, "loss": 3.9213, "step": 26577 }, { "epoch": 8.853502123761139, "grad_norm": 0.7734375, "learning_rate": 5.089207855375577e-07, "loss": 3.9323, "step": 26578 }, { "epoch": 8.853835262763388, "grad_norm": 0.75390625, "learning_rate": 5.086277382475635e-07, "loss": 3.9211, "step": 26579 }, { "epoch": 8.854168401765637, "grad_norm": 0.78125, "learning_rate": 5.083347723919127e-07, "loss": 3.9537, "step": 26580 }, { "epoch": 8.854501540767885, "grad_norm": 0.76171875, "learning_rate": 5.080418879740167e-07, "loss": 3.9406, "step": 26581 }, { "epoch": 8.854834679770134, "grad_norm": 0.77734375, "learning_rate": 5.077490849972913e-07, "loss": 4.0339, "step": 26582 }, { "epoch": 8.855167818772383, "grad_norm": 0.77734375, "learning_rate": 5.074563634651419e-07, "loss": 3.9376, "step": 26583 }, { "epoch": 8.855500957774632, "grad_norm": 0.76171875, "learning_rate": 5.071637233809792e-07, "loss": 4.0588, "step": 26584 }, { "epoch": 8.85583409677688, "grad_norm": 0.765625, "learning_rate": 5.068711647482138e-07, "loss": 3.9494, "step": 26585 }, { "epoch": 8.856167235779129, "grad_norm": 0.77734375, "learning_rate": 5.065786875702521e-07, "loss": 3.9811, "step": 26586 }, { "epoch": 8.856500374781378, "grad_norm": 0.765625, "learning_rate": 5.062862918505007e-07, "loss": 4.0305, "step": 26587 }, { "epoch": 8.856833513783627, "grad_norm": 0.72265625, "learning_rate": 5.059939775923649e-07, "loss": 3.9759, "step": 26588 }, { "epoch": 8.857166652785875, "grad_norm": 0.73828125, "learning_rate": 5.057017447992516e-07, "loss": 4.0201, "step": 26589 }, { "epoch": 8.857499791788124, "grad_norm": 0.7421875, "learning_rate": 5.054095934745625e-07, "loss": 4.0286, "step": 26590 }, { "epoch": 8.857832930790373, "grad_norm": 0.7578125, "learning_rate": 5.05117523621702e-07, "loss": 4.0031, "step": 26591 }, { "epoch": 8.85816606979262, "grad_norm": 0.78125, "learning_rate": 5.048255352440714e-07, "loss": 4.0118, "step": 26592 }, { "epoch": 8.85849920879487, "grad_norm": 0.77734375, "learning_rate": 5.045336283450721e-07, "loss": 3.9951, "step": 26593 }, { "epoch": 8.858832347797119, "grad_norm": 0.78125, "learning_rate": 5.04241802928104e-07, "loss": 3.9816, "step": 26594 }, { "epoch": 8.859165486799366, "grad_norm": 0.79296875, "learning_rate": 5.039500589965643e-07, "loss": 3.9544, "step": 26595 }, { "epoch": 8.859498625801615, "grad_norm": 0.75, "learning_rate": 5.036583965538544e-07, "loss": 4.0093, "step": 26596 }, { "epoch": 8.859831764803864, "grad_norm": 0.75, "learning_rate": 5.0336681560337e-07, "loss": 4.0186, "step": 26597 }, { "epoch": 8.860164903806114, "grad_norm": 0.765625, "learning_rate": 5.030753161485075e-07, "loss": 3.9925, "step": 26598 }, { "epoch": 8.860498042808361, "grad_norm": 0.74609375, "learning_rate": 5.027838981926617e-07, "loss": 4.0192, "step": 26599 }, { "epoch": 8.86083118181061, "grad_norm": 0.80859375, "learning_rate": 5.02492561739229e-07, "loss": 3.9833, "step": 26600 }, { "epoch": 8.86116432081286, "grad_norm": 0.75390625, "learning_rate": 5.022013067916009e-07, "loss": 3.9482, "step": 26601 }, { "epoch": 8.861497459815109, "grad_norm": 0.7421875, "learning_rate": 5.019101333531698e-07, "loss": 4.015, "step": 26602 }, { "epoch": 8.861830598817356, "grad_norm": 0.77734375, "learning_rate": 5.01619041427327e-07, "loss": 3.9309, "step": 26603 }, { "epoch": 8.862163737819605, "grad_norm": 0.75, "learning_rate": 5.013280310174648e-07, "loss": 3.989, "step": 26604 }, { "epoch": 8.862496876821854, "grad_norm": 0.72265625, "learning_rate": 5.010371021269722e-07, "loss": 4.0543, "step": 26605 }, { "epoch": 8.862830015824102, "grad_norm": 0.7265625, "learning_rate": 5.007462547592373e-07, "loss": 3.8878, "step": 26606 }, { "epoch": 8.863163154826351, "grad_norm": 0.7578125, "learning_rate": 5.004554889176475e-07, "loss": 3.9753, "step": 26607 }, { "epoch": 8.8634962938286, "grad_norm": 0.8046875, "learning_rate": 5.001648046055915e-07, "loss": 4.0176, "step": 26608 }, { "epoch": 8.86382943283085, "grad_norm": 0.75390625, "learning_rate": 4.998742018264544e-07, "loss": 3.9955, "step": 26609 }, { "epoch": 8.864162571833097, "grad_norm": 0.7734375, "learning_rate": 4.995836805836191e-07, "loss": 3.9632, "step": 26610 }, { "epoch": 8.864495710835346, "grad_norm": 0.75390625, "learning_rate": 4.992932408804712e-07, "loss": 3.9654, "step": 26611 }, { "epoch": 8.864828849837595, "grad_norm": 0.7734375, "learning_rate": 4.990028827203941e-07, "loss": 3.9694, "step": 26612 }, { "epoch": 8.865161988839844, "grad_norm": 0.77734375, "learning_rate": 4.987126061067699e-07, "loss": 3.9628, "step": 26613 }, { "epoch": 8.865495127842092, "grad_norm": 0.77734375, "learning_rate": 4.984224110429766e-07, "loss": 4.0191, "step": 26614 }, { "epoch": 8.86582826684434, "grad_norm": 0.72265625, "learning_rate": 4.981322975323993e-07, "loss": 4.0144, "step": 26615 }, { "epoch": 8.86616140584659, "grad_norm": 0.73828125, "learning_rate": 4.978422655784134e-07, "loss": 3.9803, "step": 26616 }, { "epoch": 8.866494544848837, "grad_norm": 0.75390625, "learning_rate": 4.975523151843997e-07, "loss": 4.0115, "step": 26617 }, { "epoch": 8.866827683851087, "grad_norm": 0.73046875, "learning_rate": 4.972624463537337e-07, "loss": 3.9987, "step": 26618 }, { "epoch": 8.867160822853336, "grad_norm": 0.78125, "learning_rate": 4.969726590897919e-07, "loss": 4.0227, "step": 26619 }, { "epoch": 8.867493961855585, "grad_norm": 0.7890625, "learning_rate": 4.966829533959508e-07, "loss": 4.0786, "step": 26620 }, { "epoch": 8.867827100857832, "grad_norm": 0.734375, "learning_rate": 4.963933292755827e-07, "loss": 3.9491, "step": 26621 }, { "epoch": 8.868160239860082, "grad_norm": 0.76171875, "learning_rate": 4.961037867320631e-07, "loss": 3.9604, "step": 26622 }, { "epoch": 8.86849337886233, "grad_norm": 0.74609375, "learning_rate": 4.958143257687653e-07, "loss": 4.0809, "step": 26623 }, { "epoch": 8.868826517864578, "grad_norm": 0.7734375, "learning_rate": 4.955249463890582e-07, "loss": 3.9646, "step": 26624 }, { "epoch": 8.869159656866827, "grad_norm": 0.76953125, "learning_rate": 4.952356485963133e-07, "loss": 3.9651, "step": 26625 }, { "epoch": 8.869492795869077, "grad_norm": 0.79296875, "learning_rate": 4.949464323939021e-07, "loss": 4.0192, "step": 26626 }, { "epoch": 8.869825934871326, "grad_norm": 0.83203125, "learning_rate": 4.946572977851927e-07, "loss": 3.9936, "step": 26627 }, { "epoch": 8.870159073873573, "grad_norm": 0.75390625, "learning_rate": 4.943682447735506e-07, "loss": 4.0127, "step": 26628 }, { "epoch": 8.870492212875822, "grad_norm": 0.76171875, "learning_rate": 4.940792733623451e-07, "loss": 3.9938, "step": 26629 }, { "epoch": 8.870825351878072, "grad_norm": 0.78515625, "learning_rate": 4.937903835549406e-07, "loss": 3.9879, "step": 26630 }, { "epoch": 8.871158490880319, "grad_norm": 0.79296875, "learning_rate": 4.93501575354704e-07, "loss": 3.9516, "step": 26631 }, { "epoch": 8.871491629882568, "grad_norm": 0.76953125, "learning_rate": 4.932128487649956e-07, "loss": 3.9944, "step": 26632 }, { "epoch": 8.871824768884817, "grad_norm": 0.7421875, "learning_rate": 4.929242037891829e-07, "loss": 4.0098, "step": 26633 }, { "epoch": 8.872157907887066, "grad_norm": 0.796875, "learning_rate": 4.926356404306257e-07, "loss": 3.9904, "step": 26634 }, { "epoch": 8.872491046889314, "grad_norm": 0.7890625, "learning_rate": 4.923471586926845e-07, "loss": 3.9935, "step": 26635 }, { "epoch": 8.872824185891563, "grad_norm": 0.73828125, "learning_rate": 4.920587585787217e-07, "loss": 3.9356, "step": 26636 }, { "epoch": 8.873157324893812, "grad_norm": 0.74609375, "learning_rate": 4.917704400920939e-07, "loss": 3.9261, "step": 26637 }, { "epoch": 8.873490463896061, "grad_norm": 0.765625, "learning_rate": 4.914822032361616e-07, "loss": 3.9814, "step": 26638 }, { "epoch": 8.873823602898309, "grad_norm": 0.78125, "learning_rate": 4.911940480142796e-07, "loss": 3.9647, "step": 26639 }, { "epoch": 8.874156741900558, "grad_norm": 0.75, "learning_rate": 4.909059744298078e-07, "loss": 4.06, "step": 26640 }, { "epoch": 8.874489880902807, "grad_norm": 0.75390625, "learning_rate": 4.906179824860985e-07, "loss": 4.003, "step": 26641 }, { "epoch": 8.874823019905055, "grad_norm": 0.8046875, "learning_rate": 4.903300721865081e-07, "loss": 4.0389, "step": 26642 }, { "epoch": 8.875156158907304, "grad_norm": 0.77734375, "learning_rate": 4.900422435343882e-07, "loss": 3.9713, "step": 26643 }, { "epoch": 8.875489297909553, "grad_norm": 0.8046875, "learning_rate": 4.897544965330944e-07, "loss": 3.9388, "step": 26644 }, { "epoch": 8.875822436911802, "grad_norm": 0.78515625, "learning_rate": 4.894668311859773e-07, "loss": 3.9362, "step": 26645 }, { "epoch": 8.87615557591405, "grad_norm": 0.796875, "learning_rate": 4.891792474963852e-07, "loss": 3.9593, "step": 26646 }, { "epoch": 8.876488714916299, "grad_norm": 0.75390625, "learning_rate": 4.888917454676686e-07, "loss": 3.9879, "step": 26647 }, { "epoch": 8.876821853918548, "grad_norm": 0.8046875, "learning_rate": 4.886043251031782e-07, "loss": 3.9178, "step": 26648 }, { "epoch": 8.877154992920797, "grad_norm": 0.76171875, "learning_rate": 4.883169864062606e-07, "loss": 3.902, "step": 26649 }, { "epoch": 8.877488131923045, "grad_norm": 0.78515625, "learning_rate": 4.880297293802629e-07, "loss": 3.9561, "step": 26650 }, { "epoch": 8.877821270925294, "grad_norm": 0.7578125, "learning_rate": 4.877425540285302e-07, "loss": 3.9444, "step": 26651 }, { "epoch": 8.878154409927543, "grad_norm": 0.765625, "learning_rate": 4.874554603544096e-07, "loss": 4.0466, "step": 26652 }, { "epoch": 8.87848754892979, "grad_norm": 0.75, "learning_rate": 4.871684483612427e-07, "loss": 3.9448, "step": 26653 }, { "epoch": 8.87882068793204, "grad_norm": 0.81640625, "learning_rate": 4.868815180523742e-07, "loss": 4.004, "step": 26654 }, { "epoch": 8.879153826934289, "grad_norm": 0.79296875, "learning_rate": 4.865946694311458e-07, "loss": 4.0232, "step": 26655 }, { "epoch": 8.879486965936536, "grad_norm": 0.77734375, "learning_rate": 4.863079025008987e-07, "loss": 3.9565, "step": 26656 }, { "epoch": 8.879820104938785, "grad_norm": 0.7421875, "learning_rate": 4.860212172649722e-07, "loss": 4.0619, "step": 26657 }, { "epoch": 8.880153243941034, "grad_norm": 0.78125, "learning_rate": 4.857346137267052e-07, "loss": 4.07, "step": 26658 }, { "epoch": 8.880486382943284, "grad_norm": 0.734375, "learning_rate": 4.854480918894383e-07, "loss": 4.0625, "step": 26659 }, { "epoch": 8.880819521945531, "grad_norm": 0.71484375, "learning_rate": 4.85161651756508e-07, "loss": 4.0778, "step": 26660 }, { "epoch": 8.88115266094778, "grad_norm": 0.79296875, "learning_rate": 4.8487529333125e-07, "loss": 3.945, "step": 26661 }, { "epoch": 8.88148579995003, "grad_norm": 0.76171875, "learning_rate": 4.845890166169992e-07, "loss": 3.9891, "step": 26662 }, { "epoch": 8.881818938952279, "grad_norm": 0.73046875, "learning_rate": 4.843028216170936e-07, "loss": 4.062, "step": 26663 }, { "epoch": 8.882152077954526, "grad_norm": 0.76953125, "learning_rate": 4.840167083348623e-07, "loss": 3.9105, "step": 26664 }, { "epoch": 8.882485216956775, "grad_norm": 0.765625, "learning_rate": 4.837306767736385e-07, "loss": 4.0041, "step": 26665 }, { "epoch": 8.882818355959024, "grad_norm": 0.74609375, "learning_rate": 4.83444726936757e-07, "loss": 3.9573, "step": 26666 }, { "epoch": 8.883151494961272, "grad_norm": 0.7421875, "learning_rate": 4.831588588275459e-07, "loss": 3.9753, "step": 26667 }, { "epoch": 8.883484633963521, "grad_norm": 0.7578125, "learning_rate": 4.828730724493361e-07, "loss": 4.002, "step": 26668 }, { "epoch": 8.88381777296577, "grad_norm": 0.7578125, "learning_rate": 4.825873678054546e-07, "loss": 3.9266, "step": 26669 }, { "epoch": 8.88415091196802, "grad_norm": 0.79296875, "learning_rate": 4.823017448992323e-07, "loss": 3.9926, "step": 26670 }, { "epoch": 8.884484050970267, "grad_norm": 0.765625, "learning_rate": 4.82016203733994e-07, "loss": 4.07, "step": 26671 }, { "epoch": 8.884817189972516, "grad_norm": 0.7578125, "learning_rate": 4.817307443130653e-07, "loss": 3.9724, "step": 26672 }, { "epoch": 8.885150328974765, "grad_norm": 0.7578125, "learning_rate": 4.814453666397728e-07, "loss": 4.0563, "step": 26673 }, { "epoch": 8.885483467977014, "grad_norm": 0.77734375, "learning_rate": 4.811600707174388e-07, "loss": 3.9998, "step": 26674 }, { "epoch": 8.885816606979262, "grad_norm": 0.78515625, "learning_rate": 4.808748565493881e-07, "loss": 4.0626, "step": 26675 }, { "epoch": 8.88614974598151, "grad_norm": 0.7421875, "learning_rate": 4.805897241389398e-07, "loss": 3.9683, "step": 26676 }, { "epoch": 8.88648288498376, "grad_norm": 0.734375, "learning_rate": 4.803046734894195e-07, "loss": 3.9209, "step": 26677 }, { "epoch": 8.886816023986007, "grad_norm": 0.79296875, "learning_rate": 4.800197046041438e-07, "loss": 3.9078, "step": 26678 }, { "epoch": 8.887149162988257, "grad_norm": 0.796875, "learning_rate": 4.797348174864341e-07, "loss": 3.9557, "step": 26679 }, { "epoch": 8.887482301990506, "grad_norm": 0.75390625, "learning_rate": 4.79450012139607e-07, "loss": 4.0228, "step": 26680 }, { "epoch": 8.887815440992755, "grad_norm": 0.75, "learning_rate": 4.791652885669822e-07, "loss": 3.9644, "step": 26681 }, { "epoch": 8.888148579995002, "grad_norm": 0.78515625, "learning_rate": 4.78880646771874e-07, "loss": 3.9781, "step": 26682 }, { "epoch": 8.888481718997252, "grad_norm": 0.76953125, "learning_rate": 4.785960867575978e-07, "loss": 3.9761, "step": 26683 }, { "epoch": 8.8888148579995, "grad_norm": 0.7578125, "learning_rate": 4.783116085274694e-07, "loss": 3.9948, "step": 26684 }, { "epoch": 8.889147997001748, "grad_norm": 0.79296875, "learning_rate": 4.78027212084802e-07, "loss": 3.9749, "step": 26685 }, { "epoch": 8.889481136003997, "grad_norm": 0.734375, "learning_rate": 4.777428974329079e-07, "loss": 4.007, "step": 26686 }, { "epoch": 8.889814275006247, "grad_norm": 0.79296875, "learning_rate": 4.774586645750977e-07, "loss": 3.9822, "step": 26687 }, { "epoch": 8.890147414008496, "grad_norm": 0.828125, "learning_rate": 4.771745135146849e-07, "loss": 3.9586, "step": 26688 }, { "epoch": 8.890480553010743, "grad_norm": 0.77734375, "learning_rate": 4.768904442549774e-07, "loss": 3.9181, "step": 26689 }, { "epoch": 8.890813692012992, "grad_norm": 0.81640625, "learning_rate": 4.7660645679928505e-07, "loss": 3.9208, "step": 26690 }, { "epoch": 8.891146831015242, "grad_norm": 0.75, "learning_rate": 4.7632255115091285e-07, "loss": 3.9609, "step": 26691 }, { "epoch": 8.891479970017489, "grad_norm": 0.79296875, "learning_rate": 4.7603872731317053e-07, "loss": 4.0044, "step": 26692 }, { "epoch": 8.891813109019738, "grad_norm": 0.8046875, "learning_rate": 4.7575498528936306e-07, "loss": 3.9468, "step": 26693 }, { "epoch": 8.892146248021987, "grad_norm": 0.74609375, "learning_rate": 4.7547132508279604e-07, "loss": 3.9487, "step": 26694 }, { "epoch": 8.892479387024236, "grad_norm": 0.7578125, "learning_rate": 4.751877466967719e-07, "loss": 3.939, "step": 26695 }, { "epoch": 8.892812526026484, "grad_norm": 0.78125, "learning_rate": 4.7490425013459545e-07, "loss": 3.894, "step": 26696 }, { "epoch": 8.893145665028733, "grad_norm": 0.71875, "learning_rate": 4.7462083539956816e-07, "loss": 3.9659, "step": 26697 }, { "epoch": 8.893478804030982, "grad_norm": 0.7734375, "learning_rate": 4.743375024949917e-07, "loss": 4.0052, "step": 26698 }, { "epoch": 8.893811943033231, "grad_norm": 0.7578125, "learning_rate": 4.74054251424165e-07, "loss": 3.9733, "step": 26699 }, { "epoch": 8.894145082035479, "grad_norm": 0.7734375, "learning_rate": 4.737710821903887e-07, "loss": 3.9588, "step": 26700 }, { "epoch": 8.894478221037728, "grad_norm": 0.74609375, "learning_rate": 4.734879947969603e-07, "loss": 3.9338, "step": 26701 }, { "epoch": 8.894811360039977, "grad_norm": 0.77734375, "learning_rate": 4.7320498924717625e-07, "loss": 3.9527, "step": 26702 }, { "epoch": 8.895144499042225, "grad_norm": 0.79296875, "learning_rate": 4.7292206554433477e-07, "loss": 3.9034, "step": 26703 }, { "epoch": 8.895477638044474, "grad_norm": 0.7890625, "learning_rate": 4.7263922369173154e-07, "loss": 3.932, "step": 26704 }, { "epoch": 8.895810777046723, "grad_norm": 0.765625, "learning_rate": 4.723564636926597e-07, "loss": 3.9626, "step": 26705 }, { "epoch": 8.896143916048972, "grad_norm": 0.765625, "learning_rate": 4.7207378555041173e-07, "loss": 4.0339, "step": 26706 }, { "epoch": 8.89647705505122, "grad_norm": 0.75, "learning_rate": 4.717911892682833e-07, "loss": 4.0002, "step": 26707 }, { "epoch": 8.896810194053469, "grad_norm": 0.7578125, "learning_rate": 4.7150867484956507e-07, "loss": 3.9874, "step": 26708 }, { "epoch": 8.897143333055718, "grad_norm": 0.765625, "learning_rate": 4.7122624229754525e-07, "loss": 3.9957, "step": 26709 }, { "epoch": 8.897476472057967, "grad_norm": 0.77734375, "learning_rate": 4.709438916155162e-07, "loss": 4.0445, "step": 26710 }, { "epoch": 8.897809611060215, "grad_norm": 0.70703125, "learning_rate": 4.706616228067662e-07, "loss": 3.9386, "step": 26711 }, { "epoch": 8.898142750062464, "grad_norm": 0.8046875, "learning_rate": 4.703794358745825e-07, "loss": 3.9487, "step": 26712 }, { "epoch": 8.898475889064713, "grad_norm": 0.765625, "learning_rate": 4.700973308222509e-07, "loss": 3.9926, "step": 26713 }, { "epoch": 8.89880902806696, "grad_norm": 0.75390625, "learning_rate": 4.6981530765306037e-07, "loss": 3.9945, "step": 26714 }, { "epoch": 8.89914216706921, "grad_norm": 0.80078125, "learning_rate": 4.6953336637029414e-07, "loss": 3.9757, "step": 26715 }, { "epoch": 8.899475306071459, "grad_norm": 0.7734375, "learning_rate": 4.6925150697723547e-07, "loss": 3.9923, "step": 26716 }, { "epoch": 8.899808445073708, "grad_norm": 0.7421875, "learning_rate": 4.6896972947716915e-07, "loss": 3.9915, "step": 26717 }, { "epoch": 8.900141584075955, "grad_norm": 0.765625, "learning_rate": 4.6868803387337594e-07, "loss": 3.9561, "step": 26718 }, { "epoch": 8.900474723078204, "grad_norm": 0.79296875, "learning_rate": 4.6840642016913737e-07, "loss": 3.937, "step": 26719 }, { "epoch": 8.900807862080454, "grad_norm": 0.75390625, "learning_rate": 4.6812488836773166e-07, "loss": 4.0394, "step": 26720 }, { "epoch": 8.901141001082701, "grad_norm": 0.796875, "learning_rate": 4.67843438472442e-07, "loss": 3.993, "step": 26721 }, { "epoch": 8.90147414008495, "grad_norm": 0.83984375, "learning_rate": 4.675620704865449e-07, "loss": 3.9521, "step": 26722 }, { "epoch": 8.9018072790872, "grad_norm": 0.76953125, "learning_rate": 4.67280784413317e-07, "loss": 4.0008, "step": 26723 }, { "epoch": 8.902140418089449, "grad_norm": 0.7734375, "learning_rate": 4.6699958025603397e-07, "loss": 3.9831, "step": 26724 }, { "epoch": 8.902473557091696, "grad_norm": 0.77734375, "learning_rate": 4.6671845801797314e-07, "loss": 3.9771, "step": 26725 }, { "epoch": 8.902806696093945, "grad_norm": 0.74609375, "learning_rate": 4.6643741770241026e-07, "loss": 3.9467, "step": 26726 }, { "epoch": 8.903139835096194, "grad_norm": 0.796875, "learning_rate": 4.6615645931261356e-07, "loss": 3.9771, "step": 26727 }, { "epoch": 8.903472974098442, "grad_norm": 0.79296875, "learning_rate": 4.6587558285186045e-07, "loss": 3.9145, "step": 26728 }, { "epoch": 8.903806113100691, "grad_norm": 0.7734375, "learning_rate": 4.655947883234207e-07, "loss": 4.0342, "step": 26729 }, { "epoch": 8.90413925210294, "grad_norm": 0.75, "learning_rate": 4.6531407573056516e-07, "loss": 3.9736, "step": 26730 }, { "epoch": 8.90447239110519, "grad_norm": 0.76171875, "learning_rate": 4.650334450765628e-07, "loss": 4.0284, "step": 26731 }, { "epoch": 8.904805530107437, "grad_norm": 0.78125, "learning_rate": 4.6475289636468437e-07, "loss": 3.9219, "step": 26732 }, { "epoch": 8.905138669109686, "grad_norm": 0.78515625, "learning_rate": 4.644724295981956e-07, "loss": 3.9746, "step": 26733 }, { "epoch": 8.905471808111935, "grad_norm": 0.82421875, "learning_rate": 4.6419204478036466e-07, "loss": 3.9525, "step": 26734 }, { "epoch": 8.905804947114184, "grad_norm": 0.7265625, "learning_rate": 4.6391174191445644e-07, "loss": 4.0072, "step": 26735 }, { "epoch": 8.906138086116432, "grad_norm": 0.74609375, "learning_rate": 4.6363152100373673e-07, "loss": 3.9988, "step": 26736 }, { "epoch": 8.90647122511868, "grad_norm": 0.74609375, "learning_rate": 4.633513820514687e-07, "loss": 3.9388, "step": 26737 }, { "epoch": 8.90680436412093, "grad_norm": 0.77734375, "learning_rate": 4.630713250609164e-07, "loss": 3.9686, "step": 26738 }, { "epoch": 8.907137503123177, "grad_norm": 0.76171875, "learning_rate": 4.6279135003533896e-07, "loss": 3.9879, "step": 26739 }, { "epoch": 8.907470642125427, "grad_norm": 0.765625, "learning_rate": 4.62511456978002e-07, "loss": 4.0619, "step": 26740 }, { "epoch": 8.907803781127676, "grad_norm": 0.78125, "learning_rate": 4.62231645892163e-07, "loss": 3.9579, "step": 26741 }, { "epoch": 8.908136920129925, "grad_norm": 0.7734375, "learning_rate": 4.6195191678108094e-07, "loss": 3.9737, "step": 26742 }, { "epoch": 8.908470059132172, "grad_norm": 0.796875, "learning_rate": 4.6167226964801414e-07, "loss": 3.9824, "step": 26743 }, { "epoch": 8.908803198134422, "grad_norm": 0.76953125, "learning_rate": 4.613927044962224e-07, "loss": 3.9539, "step": 26744 }, { "epoch": 8.90913633713667, "grad_norm": 0.796875, "learning_rate": 4.61113221328959e-07, "loss": 4.0111, "step": 26745 }, { "epoch": 8.909469476138918, "grad_norm": 0.8046875, "learning_rate": 4.608338201494797e-07, "loss": 3.9604, "step": 26746 }, { "epoch": 8.909802615141167, "grad_norm": 0.71484375, "learning_rate": 4.6055450096104014e-07, "loss": 4.0119, "step": 26747 }, { "epoch": 8.910135754143417, "grad_norm": 0.8125, "learning_rate": 4.602752637668928e-07, "loss": 4.0094, "step": 26748 }, { "epoch": 8.910468893145666, "grad_norm": 0.7421875, "learning_rate": 4.5999610857029167e-07, "loss": 3.9588, "step": 26749 }, { "epoch": 8.910802032147913, "grad_norm": 0.7890625, "learning_rate": 4.597170353744859e-07, "loss": 4.0301, "step": 26750 }, { "epoch": 8.911135171150162, "grad_norm": 0.765625, "learning_rate": 4.594380441827278e-07, "loss": 3.9767, "step": 26751 }, { "epoch": 8.911468310152411, "grad_norm": 0.8203125, "learning_rate": 4.5915913499826737e-07, "loss": 3.936, "step": 26752 }, { "epoch": 8.911801449154659, "grad_norm": 0.72265625, "learning_rate": 4.58880307824352e-07, "loss": 3.9412, "step": 26753 }, { "epoch": 8.912134588156908, "grad_norm": 0.765625, "learning_rate": 4.5860156266423063e-07, "loss": 4.0493, "step": 26754 }, { "epoch": 8.912467727159157, "grad_norm": 0.83984375, "learning_rate": 4.583228995211491e-07, "loss": 4.1009, "step": 26755 }, { "epoch": 8.912800866161406, "grad_norm": 0.7890625, "learning_rate": 4.58044318398354e-07, "loss": 4.069, "step": 26756 }, { "epoch": 8.913134005163654, "grad_norm": 0.7734375, "learning_rate": 4.5776581929908773e-07, "loss": 3.9256, "step": 26757 }, { "epoch": 8.913467144165903, "grad_norm": 0.7734375, "learning_rate": 4.574874022265976e-07, "loss": 3.9311, "step": 26758 }, { "epoch": 8.913800283168152, "grad_norm": 0.79296875, "learning_rate": 4.572090671841261e-07, "loss": 3.9315, "step": 26759 }, { "epoch": 8.914133422170401, "grad_norm": 0.76953125, "learning_rate": 4.569308141749132e-07, "loss": 3.9434, "step": 26760 }, { "epoch": 8.914466561172649, "grad_norm": 0.80859375, "learning_rate": 4.5665264320220027e-07, "loss": 3.9334, "step": 26761 }, { "epoch": 8.914799700174898, "grad_norm": 0.828125, "learning_rate": 4.5637455426923075e-07, "loss": 3.9771, "step": 26762 }, { "epoch": 8.915132839177147, "grad_norm": 0.79296875, "learning_rate": 4.5609654737923945e-07, "loss": 3.9801, "step": 26763 }, { "epoch": 8.915465978179395, "grad_norm": 0.79296875, "learning_rate": 4.558186225354663e-07, "loss": 3.9489, "step": 26764 }, { "epoch": 8.915799117181644, "grad_norm": 0.78515625, "learning_rate": 4.5554077974114873e-07, "loss": 4.0033, "step": 26765 }, { "epoch": 8.916132256183893, "grad_norm": 0.73046875, "learning_rate": 4.5526301899952244e-07, "loss": 3.9562, "step": 26766 }, { "epoch": 8.916465395186142, "grad_norm": 0.75, "learning_rate": 4.549853403138241e-07, "loss": 4.0327, "step": 26767 }, { "epoch": 8.91679853418839, "grad_norm": 0.76171875, "learning_rate": 4.547077436872851e-07, "loss": 4.0594, "step": 26768 }, { "epoch": 8.917131673190639, "grad_norm": 0.74609375, "learning_rate": 4.5443022912314224e-07, "loss": 3.9533, "step": 26769 }, { "epoch": 8.917464812192888, "grad_norm": 0.73828125, "learning_rate": 4.54152796624627e-07, "loss": 4.0262, "step": 26770 }, { "epoch": 8.917797951195137, "grad_norm": 0.73828125, "learning_rate": 4.5387544619497013e-07, "loss": 4.0021, "step": 26771 }, { "epoch": 8.918131090197384, "grad_norm": 0.7890625, "learning_rate": 4.535981778374024e-07, "loss": 3.9984, "step": 26772 }, { "epoch": 8.918464229199634, "grad_norm": 0.765625, "learning_rate": 4.5332099155515284e-07, "loss": 3.9401, "step": 26773 }, { "epoch": 8.918797368201883, "grad_norm": 0.80859375, "learning_rate": 4.530438873514514e-07, "loss": 4.0585, "step": 26774 }, { "epoch": 8.91913050720413, "grad_norm": 0.7578125, "learning_rate": 4.5276686522952386e-07, "loss": 3.9393, "step": 26775 }, { "epoch": 8.91946364620638, "grad_norm": 0.78125, "learning_rate": 4.5248992519259845e-07, "loss": 3.9504, "step": 26776 }, { "epoch": 8.919796785208629, "grad_norm": 0.7578125, "learning_rate": 4.522130672439009e-07, "loss": 3.9363, "step": 26777 }, { "epoch": 8.920129924210878, "grad_norm": 0.734375, "learning_rate": 4.519362913866562e-07, "loss": 3.9711, "step": 26778 }, { "epoch": 8.920463063213125, "grad_norm": 0.7578125, "learning_rate": 4.5165959762408586e-07, "loss": 3.9047, "step": 26779 }, { "epoch": 8.920796202215374, "grad_norm": 0.76953125, "learning_rate": 4.5138298595941737e-07, "loss": 3.9377, "step": 26780 }, { "epoch": 8.921129341217624, "grad_norm": 0.78125, "learning_rate": 4.511064563958681e-07, "loss": 3.9787, "step": 26781 }, { "epoch": 8.921462480219871, "grad_norm": 0.71484375, "learning_rate": 4.508300089366604e-07, "loss": 3.9376, "step": 26782 }, { "epoch": 8.92179561922212, "grad_norm": 0.765625, "learning_rate": 4.5055364358501437e-07, "loss": 3.9755, "step": 26783 }, { "epoch": 8.92212875822437, "grad_norm": 0.8046875, "learning_rate": 4.5027736034414984e-07, "loss": 3.922, "step": 26784 }, { "epoch": 8.922461897226619, "grad_norm": 0.80078125, "learning_rate": 4.500011592172851e-07, "loss": 3.9527, "step": 26785 }, { "epoch": 8.922795036228866, "grad_norm": 0.7578125, "learning_rate": 4.4972504020763583e-07, "loss": 3.927, "step": 26786 }, { "epoch": 8.923128175231115, "grad_norm": 0.734375, "learning_rate": 4.4944900331841787e-07, "loss": 3.9608, "step": 26787 }, { "epoch": 8.923461314233364, "grad_norm": 0.76953125, "learning_rate": 4.4917304855284944e-07, "loss": 3.9539, "step": 26788 }, { "epoch": 8.923794453235612, "grad_norm": 0.7109375, "learning_rate": 4.48897175914143e-07, "loss": 3.9734, "step": 26789 }, { "epoch": 8.924127592237861, "grad_norm": 0.7890625, "learning_rate": 4.486213854055102e-07, "loss": 3.987, "step": 26790 }, { "epoch": 8.92446073124011, "grad_norm": 0.76171875, "learning_rate": 4.4834567703016666e-07, "loss": 3.9708, "step": 26791 }, { "epoch": 8.92479387024236, "grad_norm": 0.7421875, "learning_rate": 4.4807005079132156e-07, "loss": 4.0031, "step": 26792 }, { "epoch": 8.925127009244607, "grad_norm": 0.73046875, "learning_rate": 4.477945066921857e-07, "loss": 4.0017, "step": 26793 }, { "epoch": 8.925460148246856, "grad_norm": 0.75390625, "learning_rate": 4.475190447359673e-07, "loss": 3.9948, "step": 26794 }, { "epoch": 8.925793287249105, "grad_norm": 0.81640625, "learning_rate": 4.4724366492587793e-07, "loss": 3.9711, "step": 26795 }, { "epoch": 8.926126426251354, "grad_norm": 0.83203125, "learning_rate": 4.4696836726512427e-07, "loss": 3.9622, "step": 26796 }, { "epoch": 8.926459565253602, "grad_norm": 0.76171875, "learning_rate": 4.4669315175691116e-07, "loss": 3.9374, "step": 26797 }, { "epoch": 8.92679270425585, "grad_norm": 0.7734375, "learning_rate": 4.464180184044461e-07, "loss": 3.9413, "step": 26798 }, { "epoch": 8.9271258432581, "grad_norm": 0.78125, "learning_rate": 4.4614296721093324e-07, "loss": 3.9704, "step": 26799 }, { "epoch": 8.927458982260347, "grad_norm": 0.75390625, "learning_rate": 4.4586799817957575e-07, "loss": 4.0249, "step": 26800 }, { "epoch": 8.927792121262597, "grad_norm": 0.78125, "learning_rate": 4.455931113135761e-07, "loss": 3.9254, "step": 26801 }, { "epoch": 8.928125260264846, "grad_norm": 0.75, "learning_rate": 4.453183066161376e-07, "loss": 3.9529, "step": 26802 }, { "epoch": 8.928458399267095, "grad_norm": 0.7421875, "learning_rate": 4.4504358409046015e-07, "loss": 3.9451, "step": 26803 }, { "epoch": 8.928791538269342, "grad_norm": 0.76953125, "learning_rate": 4.4476894373974453e-07, "loss": 3.9393, "step": 26804 }, { "epoch": 8.929124677271592, "grad_norm": 0.77734375, "learning_rate": 4.444943855671882e-07, "loss": 3.9704, "step": 26805 }, { "epoch": 8.92945781627384, "grad_norm": 0.76171875, "learning_rate": 4.4421990957599106e-07, "loss": 4.0092, "step": 26806 }, { "epoch": 8.929790955276088, "grad_norm": 0.7734375, "learning_rate": 4.439455157693498e-07, "loss": 3.987, "step": 26807 }, { "epoch": 8.930124094278337, "grad_norm": 0.74609375, "learning_rate": 4.436712041504576e-07, "loss": 4.0152, "step": 26808 }, { "epoch": 8.930457233280586, "grad_norm": 0.7890625, "learning_rate": 4.433969747225136e-07, "loss": 3.9864, "step": 26809 }, { "epoch": 8.930790372282836, "grad_norm": 0.765625, "learning_rate": 4.431228274887103e-07, "loss": 3.9809, "step": 26810 }, { "epoch": 8.931123511285083, "grad_norm": 0.76171875, "learning_rate": 4.428487624522401e-07, "loss": 3.9046, "step": 26811 }, { "epoch": 8.931456650287332, "grad_norm": 0.77734375, "learning_rate": 4.4257477961629545e-07, "loss": 4.0751, "step": 26812 }, { "epoch": 8.931789789289581, "grad_norm": 0.78515625, "learning_rate": 4.4230087898406964e-07, "loss": 3.985, "step": 26813 }, { "epoch": 8.932122928291829, "grad_norm": 0.734375, "learning_rate": 4.420270605587509e-07, "loss": 3.9086, "step": 26814 }, { "epoch": 8.932456067294078, "grad_norm": 0.7734375, "learning_rate": 4.4175332434353013e-07, "loss": 4.0064, "step": 26815 }, { "epoch": 8.932789206296327, "grad_norm": 0.72265625, "learning_rate": 4.4147967034159465e-07, "loss": 3.9723, "step": 26816 }, { "epoch": 8.933122345298576, "grad_norm": 0.7578125, "learning_rate": 4.41206098556132e-07, "loss": 3.9925, "step": 26817 }, { "epoch": 8.933455484300824, "grad_norm": 0.77734375, "learning_rate": 4.4093260899032954e-07, "loss": 3.9914, "step": 26818 }, { "epoch": 8.933788623303073, "grad_norm": 0.7578125, "learning_rate": 4.4065920164737065e-07, "loss": 4.0085, "step": 26819 }, { "epoch": 8.934121762305322, "grad_norm": 0.765625, "learning_rate": 4.403858765304436e-07, "loss": 3.9418, "step": 26820 }, { "epoch": 8.934454901307571, "grad_norm": 0.76953125, "learning_rate": 4.4011263364272914e-07, "loss": 3.9713, "step": 26821 }, { "epoch": 8.934788040309819, "grad_norm": 0.7578125, "learning_rate": 4.3983947298741054e-07, "loss": 4.0213, "step": 26822 }, { "epoch": 8.935121179312068, "grad_norm": 0.75390625, "learning_rate": 4.395663945676695e-07, "loss": 4.0284, "step": 26823 }, { "epoch": 8.935454318314317, "grad_norm": 0.75390625, "learning_rate": 4.3929339838668844e-07, "loss": 3.9785, "step": 26824 }, { "epoch": 8.935787457316565, "grad_norm": 0.72265625, "learning_rate": 4.3902048444764556e-07, "loss": 3.952, "step": 26825 }, { "epoch": 8.936120596318814, "grad_norm": 0.76953125, "learning_rate": 4.387476527537193e-07, "loss": 3.9883, "step": 26826 }, { "epoch": 8.936453735321063, "grad_norm": 0.765625, "learning_rate": 4.384749033080879e-07, "loss": 4.0373, "step": 26827 }, { "epoch": 8.936786874323312, "grad_norm": 0.796875, "learning_rate": 4.382022361139296e-07, "loss": 4.0945, "step": 26828 }, { "epoch": 8.93712001332556, "grad_norm": 0.796875, "learning_rate": 4.3792965117441855e-07, "loss": 4.0022, "step": 26829 }, { "epoch": 8.937453152327809, "grad_norm": 0.84375, "learning_rate": 4.3765714849273134e-07, "loss": 3.992, "step": 26830 }, { "epoch": 8.937786291330058, "grad_norm": 0.80078125, "learning_rate": 4.3738472807204054e-07, "loss": 3.9949, "step": 26831 }, { "epoch": 8.938119430332307, "grad_norm": 0.79296875, "learning_rate": 4.3711238991552023e-07, "loss": 3.9649, "step": 26832 }, { "epoch": 8.938452569334554, "grad_norm": 0.7890625, "learning_rate": 4.3684013402634447e-07, "loss": 3.9988, "step": 26833 }, { "epoch": 8.938785708336804, "grad_norm": 0.78515625, "learning_rate": 4.3656796040767914e-07, "loss": 3.9646, "step": 26834 }, { "epoch": 8.939118847339053, "grad_norm": 0.765625, "learning_rate": 4.362958690626992e-07, "loss": 4.0196, "step": 26835 }, { "epoch": 8.9394519863413, "grad_norm": 0.76171875, "learning_rate": 4.360238599945729e-07, "loss": 3.9231, "step": 26836 }, { "epoch": 8.93978512534355, "grad_norm": 0.7265625, "learning_rate": 4.357519332064677e-07, "loss": 4.0375, "step": 26837 }, { "epoch": 8.940118264345799, "grad_norm": 0.78515625, "learning_rate": 4.354800887015503e-07, "loss": 3.9765, "step": 26838 }, { "epoch": 8.940451403348048, "grad_norm": 0.76953125, "learning_rate": 4.352083264829898e-07, "loss": 3.9976, "step": 26839 }, { "epoch": 8.940784542350295, "grad_norm": 0.7734375, "learning_rate": 4.3493664655394865e-07, "loss": 4.0106, "step": 26840 }, { "epoch": 8.941117681352544, "grad_norm": 0.73828125, "learning_rate": 4.346650489175935e-07, "loss": 3.9791, "step": 26841 }, { "epoch": 8.941450820354794, "grad_norm": 0.76171875, "learning_rate": 4.3439353357708764e-07, "loss": 3.9587, "step": 26842 }, { "epoch": 8.941783959357041, "grad_norm": 0.75, "learning_rate": 4.341221005355919e-07, "loss": 3.9775, "step": 26843 }, { "epoch": 8.94211709835929, "grad_norm": 0.78515625, "learning_rate": 4.338507497962696e-07, "loss": 3.9927, "step": 26844 }, { "epoch": 8.94245023736154, "grad_norm": 0.73046875, "learning_rate": 4.335794813622798e-07, "loss": 3.9725, "step": 26845 }, { "epoch": 8.942783376363789, "grad_norm": 0.765625, "learning_rate": 4.333082952367842e-07, "loss": 4.0495, "step": 26846 }, { "epoch": 8.943116515366036, "grad_norm": 0.78125, "learning_rate": 4.3303719142294027e-07, "loss": 3.9698, "step": 26847 }, { "epoch": 8.943449654368285, "grad_norm": 0.73828125, "learning_rate": 4.327661699239063e-07, "loss": 3.9558, "step": 26848 }, { "epoch": 8.943782793370534, "grad_norm": 0.75, "learning_rate": 4.324952307428373e-07, "loss": 3.9075, "step": 26849 }, { "epoch": 8.944115932372782, "grad_norm": 0.765625, "learning_rate": 4.322243738828924e-07, "loss": 3.9674, "step": 26850 }, { "epoch": 8.94444907137503, "grad_norm": 0.78125, "learning_rate": 4.319535993472257e-07, "loss": 3.9657, "step": 26851 }, { "epoch": 8.94478221037728, "grad_norm": 0.7890625, "learning_rate": 4.31682907138988e-07, "loss": 3.9404, "step": 26852 }, { "epoch": 8.94511534937953, "grad_norm": 0.75, "learning_rate": 4.3141229726133517e-07, "loss": 3.9065, "step": 26853 }, { "epoch": 8.945448488381777, "grad_norm": 0.81640625, "learning_rate": 4.3114176971741885e-07, "loss": 3.9812, "step": 26854 }, { "epoch": 8.945781627384026, "grad_norm": 0.78125, "learning_rate": 4.3087132451038985e-07, "loss": 3.9346, "step": 26855 }, { "epoch": 8.946114766386275, "grad_norm": 0.73046875, "learning_rate": 4.306009616433973e-07, "loss": 3.9218, "step": 26856 }, { "epoch": 8.946447905388524, "grad_norm": 0.76953125, "learning_rate": 4.30330681119592e-07, "loss": 4.0197, "step": 26857 }, { "epoch": 8.946781044390772, "grad_norm": 0.765625, "learning_rate": 4.300604829421215e-07, "loss": 4.0349, "step": 26858 }, { "epoch": 8.94711418339302, "grad_norm": 0.7421875, "learning_rate": 4.297903671141332e-07, "loss": 3.9703, "step": 26859 }, { "epoch": 8.94744732239527, "grad_norm": 0.75, "learning_rate": 4.295203336387729e-07, "loss": 4.0097, "step": 26860 }, { "epoch": 8.947780461397517, "grad_norm": 0.78125, "learning_rate": 4.292503825191857e-07, "loss": 3.9546, "step": 26861 }, { "epoch": 8.948113600399767, "grad_norm": 0.75390625, "learning_rate": 4.289805137585165e-07, "loss": 3.9161, "step": 26862 }, { "epoch": 8.948446739402016, "grad_norm": 0.81640625, "learning_rate": 4.287107273599078e-07, "loss": 3.9875, "step": 26863 }, { "epoch": 8.948779878404265, "grad_norm": 0.75390625, "learning_rate": 4.284410233265038e-07, "loss": 3.9628, "step": 26864 }, { "epoch": 8.949113017406512, "grad_norm": 0.71875, "learning_rate": 4.281714016614444e-07, "loss": 3.9539, "step": 26865 }, { "epoch": 8.949446156408762, "grad_norm": 0.734375, "learning_rate": 4.2790186236787127e-07, "loss": 3.9946, "step": 26866 }, { "epoch": 8.94977929541101, "grad_norm": 0.78515625, "learning_rate": 4.276324054489211e-07, "loss": 3.9649, "step": 26867 }, { "epoch": 8.950112434413258, "grad_norm": 0.7734375, "learning_rate": 4.2736303090773643e-07, "loss": 3.9681, "step": 26868 }, { "epoch": 8.950445573415507, "grad_norm": 0.7890625, "learning_rate": 4.2709373874745464e-07, "loss": 3.9589, "step": 26869 }, { "epoch": 8.950778712417756, "grad_norm": 0.8125, "learning_rate": 4.268245289712083e-07, "loss": 3.996, "step": 26870 }, { "epoch": 8.951111851420006, "grad_norm": 0.78515625, "learning_rate": 4.265554015821374e-07, "loss": 3.934, "step": 26871 }, { "epoch": 8.951444990422253, "grad_norm": 0.76171875, "learning_rate": 4.262863565833744e-07, "loss": 3.9395, "step": 26872 }, { "epoch": 8.951778129424502, "grad_norm": 0.71875, "learning_rate": 4.2601739397805345e-07, "loss": 3.978, "step": 26873 }, { "epoch": 8.952111268426751, "grad_norm": 0.77734375, "learning_rate": 4.257485137693079e-07, "loss": 4.0343, "step": 26874 }, { "epoch": 8.952444407428999, "grad_norm": 0.72265625, "learning_rate": 4.254797159602686e-07, "loss": 3.9651, "step": 26875 }, { "epoch": 8.952777546431248, "grad_norm": 0.80078125, "learning_rate": 4.25211000554068e-07, "loss": 3.8756, "step": 26876 }, { "epoch": 8.953110685433497, "grad_norm": 0.78125, "learning_rate": 4.2494236755383525e-07, "loss": 4.0009, "step": 26877 }, { "epoch": 8.953443824435746, "grad_norm": 0.76953125, "learning_rate": 4.2467381696269956e-07, "loss": 3.959, "step": 26878 }, { "epoch": 8.953776963437994, "grad_norm": 0.7890625, "learning_rate": 4.2440534878378927e-07, "loss": 3.9866, "step": 26879 }, { "epoch": 8.954110102440243, "grad_norm": 0.80859375, "learning_rate": 4.241369630202302e-07, "loss": 3.9795, "step": 26880 }, { "epoch": 8.954443241442492, "grad_norm": 0.7734375, "learning_rate": 4.23868659675149e-07, "loss": 4.012, "step": 26881 }, { "epoch": 8.954776380444741, "grad_norm": 0.7734375, "learning_rate": 4.2360043875167065e-07, "loss": 3.9366, "step": 26882 }, { "epoch": 8.955109519446989, "grad_norm": 0.80859375, "learning_rate": 4.2333230025292017e-07, "loss": 3.9323, "step": 26883 }, { "epoch": 8.955442658449238, "grad_norm": 0.84765625, "learning_rate": 4.2306424418202094e-07, "loss": 3.9299, "step": 26884 }, { "epoch": 8.955775797451487, "grad_norm": 0.71875, "learning_rate": 4.227962705420946e-07, "loss": 4.0048, "step": 26885 }, { "epoch": 8.956108936453735, "grad_norm": 0.7890625, "learning_rate": 4.225283793362611e-07, "loss": 3.9524, "step": 26886 }, { "epoch": 8.956442075455984, "grad_norm": 0.72265625, "learning_rate": 4.2226057056764466e-07, "loss": 4.0512, "step": 26887 }, { "epoch": 8.956775214458233, "grad_norm": 0.7578125, "learning_rate": 4.219928442393611e-07, "loss": 3.9689, "step": 26888 }, { "epoch": 8.957108353460482, "grad_norm": 0.77734375, "learning_rate": 4.2172520035452883e-07, "loss": 3.898, "step": 26889 }, { "epoch": 8.95744149246273, "grad_norm": 0.73046875, "learning_rate": 4.214576389162669e-07, "loss": 4.0821, "step": 26890 }, { "epoch": 8.957774631464979, "grad_norm": 0.75390625, "learning_rate": 4.211901599276921e-07, "loss": 3.996, "step": 26891 }, { "epoch": 8.958107770467228, "grad_norm": 0.7734375, "learning_rate": 4.209227633919194e-07, "loss": 4.0184, "step": 26892 }, { "epoch": 8.958440909469477, "grad_norm": 0.77734375, "learning_rate": 4.2065544931206204e-07, "loss": 3.9732, "step": 26893 }, { "epoch": 8.958774048471724, "grad_norm": 0.7890625, "learning_rate": 4.203882176912352e-07, "loss": 4.0342, "step": 26894 }, { "epoch": 8.959107187473974, "grad_norm": 0.77734375, "learning_rate": 4.201210685325521e-07, "loss": 3.9512, "step": 26895 }, { "epoch": 8.959440326476223, "grad_norm": 0.7578125, "learning_rate": 4.1985400183912367e-07, "loss": 4.0064, "step": 26896 }, { "epoch": 8.95977346547847, "grad_norm": 0.75390625, "learning_rate": 4.195870176140598e-07, "loss": 3.9873, "step": 26897 }, { "epoch": 8.96010660448072, "grad_norm": 0.73828125, "learning_rate": 4.1932011586047146e-07, "loss": 3.9641, "step": 26898 }, { "epoch": 8.960439743482969, "grad_norm": 0.73828125, "learning_rate": 4.1905329658146697e-07, "loss": 4.0263, "step": 26899 }, { "epoch": 8.960772882485218, "grad_norm": 0.765625, "learning_rate": 4.1878655978015294e-07, "loss": 4.0229, "step": 26900 }, { "epoch": 8.961106021487465, "grad_norm": 0.7265625, "learning_rate": 4.1851990545963866e-07, "loss": 4.0269, "step": 26901 }, { "epoch": 8.961439160489714, "grad_norm": 0.75, "learning_rate": 4.1825333362302984e-07, "loss": 3.9838, "step": 26902 }, { "epoch": 8.961772299491964, "grad_norm": 0.7578125, "learning_rate": 4.1798684427343e-07, "loss": 4.0655, "step": 26903 }, { "epoch": 8.962105438494211, "grad_norm": 0.7734375, "learning_rate": 4.177204374139423e-07, "loss": 4.0039, "step": 26904 }, { "epoch": 8.96243857749646, "grad_norm": 0.77734375, "learning_rate": 4.1745411304767447e-07, "loss": 4.0083, "step": 26905 }, { "epoch": 8.96277171649871, "grad_norm": 0.78125, "learning_rate": 4.171878711777238e-07, "loss": 3.9553, "step": 26906 }, { "epoch": 8.963104855500958, "grad_norm": 0.75390625, "learning_rate": 4.169217118071922e-07, "loss": 4.0636, "step": 26907 }, { "epoch": 8.963437994503206, "grad_norm": 0.76171875, "learning_rate": 4.166556349391812e-07, "loss": 3.9677, "step": 26908 }, { "epoch": 8.963771133505455, "grad_norm": 0.734375, "learning_rate": 4.1638964057679e-07, "loss": 3.978, "step": 26909 }, { "epoch": 8.964104272507704, "grad_norm": 0.74609375, "learning_rate": 4.16123728723117e-07, "loss": 3.9248, "step": 26910 }, { "epoch": 8.964437411509952, "grad_norm": 0.76171875, "learning_rate": 4.158578993812573e-07, "loss": 4.0198, "step": 26911 }, { "epoch": 8.9647705505122, "grad_norm": 0.79296875, "learning_rate": 4.1559215255430996e-07, "loss": 4.0004, "step": 26912 }, { "epoch": 8.96510368951445, "grad_norm": 0.796875, "learning_rate": 4.1532648824536916e-07, "loss": 3.964, "step": 26913 }, { "epoch": 8.9654368285167, "grad_norm": 0.7734375, "learning_rate": 4.150609064575292e-07, "loss": 4.0037, "step": 26914 }, { "epoch": 8.965769967518947, "grad_norm": 0.765625, "learning_rate": 4.147954071938842e-07, "loss": 3.978, "step": 26915 }, { "epoch": 8.966103106521196, "grad_norm": 0.75390625, "learning_rate": 4.1452999045752586e-07, "loss": 4.0094, "step": 26916 }, { "epoch": 8.966436245523445, "grad_norm": 0.7890625, "learning_rate": 4.142646562515459e-07, "loss": 3.9334, "step": 26917 }, { "epoch": 8.966769384525694, "grad_norm": 0.75390625, "learning_rate": 4.1399940457903435e-07, "loss": 3.9463, "step": 26918 }, { "epoch": 8.967102523527942, "grad_norm": 0.796875, "learning_rate": 4.1373423544308203e-07, "loss": 3.9238, "step": 26919 }, { "epoch": 8.96743566253019, "grad_norm": 0.765625, "learning_rate": 4.1346914884677734e-07, "loss": 3.8995, "step": 26920 }, { "epoch": 8.96776880153244, "grad_norm": 0.79296875, "learning_rate": 4.132041447932078e-07, "loss": 3.9697, "step": 26921 }, { "epoch": 8.968101940534687, "grad_norm": 0.77734375, "learning_rate": 4.129392232854601e-07, "loss": 3.9952, "step": 26922 }, { "epoch": 8.968435079536937, "grad_norm": 0.76953125, "learning_rate": 4.1267438432662006e-07, "loss": 3.9083, "step": 26923 }, { "epoch": 8.968768218539186, "grad_norm": 0.78515625, "learning_rate": 4.12409627919772e-07, "loss": 3.9607, "step": 26924 }, { "epoch": 8.969101357541435, "grad_norm": 0.7421875, "learning_rate": 4.12144954068e-07, "loss": 4.0469, "step": 26925 }, { "epoch": 8.969434496543682, "grad_norm": 0.76171875, "learning_rate": 4.1188036277438584e-07, "loss": 3.9917, "step": 26926 }, { "epoch": 8.969767635545931, "grad_norm": 0.74609375, "learning_rate": 4.116158540420137e-07, "loss": 3.98, "step": 26927 }, { "epoch": 8.97010077454818, "grad_norm": 0.73828125, "learning_rate": 4.113514278739636e-07, "loss": 3.9818, "step": 26928 }, { "epoch": 8.97043391355043, "grad_norm": 0.82421875, "learning_rate": 4.1108708427331553e-07, "loss": 3.9492, "step": 26929 }, { "epoch": 8.970767052552677, "grad_norm": 0.75390625, "learning_rate": 4.1082282324314715e-07, "loss": 3.9323, "step": 26930 }, { "epoch": 8.971100191554926, "grad_norm": 0.74609375, "learning_rate": 4.105586447865384e-07, "loss": 3.9782, "step": 26931 }, { "epoch": 8.971433330557176, "grad_norm": 0.7734375, "learning_rate": 4.1029454890656765e-07, "loss": 3.9507, "step": 26932 }, { "epoch": 8.971766469559423, "grad_norm": 0.7578125, "learning_rate": 4.1003053560630667e-07, "loss": 3.9805, "step": 26933 }, { "epoch": 8.972099608561672, "grad_norm": 0.7421875, "learning_rate": 4.0976660488883377e-07, "loss": 4.0212, "step": 26934 }, { "epoch": 8.972432747563921, "grad_norm": 0.8046875, "learning_rate": 4.0950275675722236e-07, "loss": 3.9672, "step": 26935 }, { "epoch": 8.972765886566169, "grad_norm": 0.72265625, "learning_rate": 4.0923899121454664e-07, "loss": 3.9689, "step": 26936 }, { "epoch": 8.973099025568418, "grad_norm": 0.7578125, "learning_rate": 4.089753082638767e-07, "loss": 3.9668, "step": 26937 }, { "epoch": 8.973432164570667, "grad_norm": 0.72265625, "learning_rate": 4.0871170790828583e-07, "loss": 3.9437, "step": 26938 }, { "epoch": 8.973765303572916, "grad_norm": 0.79296875, "learning_rate": 4.0844819015084495e-07, "loss": 3.9644, "step": 26939 }, { "epoch": 8.974098442575164, "grad_norm": 0.76953125, "learning_rate": 4.081847549946216e-07, "loss": 3.9789, "step": 26940 }, { "epoch": 8.974431581577413, "grad_norm": 0.75, "learning_rate": 4.07921402442685e-07, "loss": 4.0011, "step": 26941 }, { "epoch": 8.974764720579662, "grad_norm": 0.7265625, "learning_rate": 4.076581324981027e-07, "loss": 4.0112, "step": 26942 }, { "epoch": 8.975097859581911, "grad_norm": 0.7734375, "learning_rate": 4.0739494516394136e-07, "loss": 4.0807, "step": 26943 }, { "epoch": 8.975430998584159, "grad_norm": 0.75390625, "learning_rate": 4.071318404432653e-07, "loss": 3.927, "step": 26944 }, { "epoch": 8.975764137586408, "grad_norm": 0.73828125, "learning_rate": 4.0686881833914113e-07, "loss": 3.9759, "step": 26945 }, { "epoch": 8.976097276588657, "grad_norm": 0.7265625, "learning_rate": 4.0660587885463146e-07, "loss": 4.0206, "step": 26946 }, { "epoch": 8.976430415590904, "grad_norm": 0.7734375, "learning_rate": 4.0634302199279967e-07, "loss": 3.9262, "step": 26947 }, { "epoch": 8.976763554593154, "grad_norm": 0.7578125, "learning_rate": 4.060802477567049e-07, "loss": 3.9971, "step": 26948 }, { "epoch": 8.977096693595403, "grad_norm": 0.75390625, "learning_rate": 4.058175561494115e-07, "loss": 3.9666, "step": 26949 }, { "epoch": 8.977429832597652, "grad_norm": 0.72265625, "learning_rate": 4.055549471739778e-07, "loss": 3.9596, "step": 26950 }, { "epoch": 8.9777629715999, "grad_norm": 0.75, "learning_rate": 4.052924208334613e-07, "loss": 4.0388, "step": 26951 }, { "epoch": 8.978096110602149, "grad_norm": 0.765625, "learning_rate": 4.0502997713092127e-07, "loss": 3.887, "step": 26952 }, { "epoch": 8.978429249604398, "grad_norm": 0.7734375, "learning_rate": 4.047676160694153e-07, "loss": 4.0184, "step": 26953 }, { "epoch": 8.978762388606647, "grad_norm": 0.76171875, "learning_rate": 4.045053376519975e-07, "loss": 3.9833, "step": 26954 }, { "epoch": 8.979095527608894, "grad_norm": 0.76171875, "learning_rate": 4.04243141881723e-07, "loss": 4.0133, "step": 26955 }, { "epoch": 8.979428666611144, "grad_norm": 0.765625, "learning_rate": 4.039810287616469e-07, "loss": 4.0751, "step": 26956 }, { "epoch": 8.979761805613393, "grad_norm": 0.7890625, "learning_rate": 4.037189982948225e-07, "loss": 4.0871, "step": 26957 }, { "epoch": 8.98009494461564, "grad_norm": 0.7890625, "learning_rate": 4.034570504843016e-07, "loss": 4.0006, "step": 26958 }, { "epoch": 8.98042808361789, "grad_norm": 0.80859375, "learning_rate": 4.031951853331342e-07, "loss": 3.9954, "step": 26959 }, { "epoch": 8.980761222620139, "grad_norm": 0.79296875, "learning_rate": 4.029334028443721e-07, "loss": 4.0146, "step": 26960 }, { "epoch": 8.981094361622388, "grad_norm": 0.75390625, "learning_rate": 4.0267170302106277e-07, "loss": 3.9536, "step": 26961 }, { "epoch": 8.981427500624635, "grad_norm": 0.76953125, "learning_rate": 4.0241008586625467e-07, "loss": 4.0588, "step": 26962 }, { "epoch": 8.981760639626884, "grad_norm": 0.7265625, "learning_rate": 4.02148551382997e-07, "loss": 3.9518, "step": 26963 }, { "epoch": 8.982093778629133, "grad_norm": 0.71875, "learning_rate": 4.01887099574334e-07, "loss": 4.0527, "step": 26964 }, { "epoch": 8.982426917631381, "grad_norm": 0.74609375, "learning_rate": 4.0162573044331243e-07, "loss": 3.9736, "step": 26965 }, { "epoch": 8.98276005663363, "grad_norm": 0.75, "learning_rate": 4.013644439929756e-07, "loss": 4.0364, "step": 26966 }, { "epoch": 8.98309319563588, "grad_norm": 0.7265625, "learning_rate": 4.01103240226367e-07, "loss": 4.0458, "step": 26967 }, { "epoch": 8.983426334638128, "grad_norm": 0.796875, "learning_rate": 4.0084211914653087e-07, "loss": 4.019, "step": 26968 }, { "epoch": 8.983759473640376, "grad_norm": 0.74609375, "learning_rate": 4.0058108075650715e-07, "loss": 3.9755, "step": 26969 }, { "epoch": 8.984092612642625, "grad_norm": 0.72265625, "learning_rate": 4.003201250593344e-07, "loss": 4.0033, "step": 26970 }, { "epoch": 8.984425751644874, "grad_norm": 0.734375, "learning_rate": 4.0005925205805594e-07, "loss": 4.018, "step": 26971 }, { "epoch": 8.984758890647122, "grad_norm": 0.77734375, "learning_rate": 3.997984617557085e-07, "loss": 4.0523, "step": 26972 }, { "epoch": 8.98509202964937, "grad_norm": 0.74609375, "learning_rate": 3.995377541553305e-07, "loss": 4.0239, "step": 26973 }, { "epoch": 8.98542516865162, "grad_norm": 0.765625, "learning_rate": 3.9927712925995703e-07, "loss": 3.9374, "step": 26974 }, { "epoch": 8.98575830765387, "grad_norm": 0.75, "learning_rate": 3.990165870726256e-07, "loss": 3.9383, "step": 26975 }, { "epoch": 8.986091446656117, "grad_norm": 0.7421875, "learning_rate": 3.987561275963697e-07, "loss": 3.9944, "step": 26976 }, { "epoch": 8.986424585658366, "grad_norm": 0.74609375, "learning_rate": 3.9849575083422434e-07, "loss": 4.0424, "step": 26977 }, { "epoch": 8.986757724660615, "grad_norm": 0.7890625, "learning_rate": 3.982354567892213e-07, "loss": 4.0486, "step": 26978 }, { "epoch": 8.987090863662864, "grad_norm": 0.76171875, "learning_rate": 3.979752454643931e-07, "loss": 4.0399, "step": 26979 }, { "epoch": 8.987424002665112, "grad_norm": 0.75, "learning_rate": 3.977151168627699e-07, "loss": 3.9661, "step": 26980 }, { "epoch": 8.98775714166736, "grad_norm": 0.7421875, "learning_rate": 3.974550709873817e-07, "loss": 3.9749, "step": 26981 }, { "epoch": 8.98809028066961, "grad_norm": 0.73828125, "learning_rate": 3.9719510784125864e-07, "loss": 4.044, "step": 26982 }, { "epoch": 8.988423419671857, "grad_norm": 0.73828125, "learning_rate": 3.969352274274274e-07, "loss": 3.9381, "step": 26983 }, { "epoch": 8.988756558674107, "grad_norm": 0.7578125, "learning_rate": 3.966754297489156e-07, "loss": 3.9379, "step": 26984 }, { "epoch": 8.989089697676356, "grad_norm": 0.7890625, "learning_rate": 3.964157148087491e-07, "loss": 3.9805, "step": 26985 }, { "epoch": 8.989422836678605, "grad_norm": 0.77734375, "learning_rate": 3.961560826099547e-07, "loss": 3.9399, "step": 26986 }, { "epoch": 8.989755975680852, "grad_norm": 0.796875, "learning_rate": 3.958965331555542e-07, "loss": 3.9167, "step": 26987 }, { "epoch": 8.990089114683101, "grad_norm": 0.734375, "learning_rate": 3.956370664485701e-07, "loss": 3.9484, "step": 26988 }, { "epoch": 8.99042225368535, "grad_norm": 0.77734375, "learning_rate": 3.9537768249202746e-07, "loss": 3.9936, "step": 26989 }, { "epoch": 8.9907553926876, "grad_norm": 0.73828125, "learning_rate": 3.951183812889464e-07, "loss": 4.0209, "step": 26990 }, { "epoch": 8.991088531689847, "grad_norm": 0.79296875, "learning_rate": 3.94859162842347e-07, "loss": 4.0559, "step": 26991 }, { "epoch": 8.991421670692096, "grad_norm": 0.74609375, "learning_rate": 3.946000271552469e-07, "loss": 4.0387, "step": 26992 }, { "epoch": 8.991754809694346, "grad_norm": 0.78125, "learning_rate": 3.943409742306678e-07, "loss": 3.9509, "step": 26993 }, { "epoch": 8.992087948696593, "grad_norm": 0.72265625, "learning_rate": 3.940820040716256e-07, "loss": 4.0088, "step": 26994 }, { "epoch": 8.992421087698842, "grad_norm": 0.7734375, "learning_rate": 3.938231166811371e-07, "loss": 4.0169, "step": 26995 }, { "epoch": 8.992754226701091, "grad_norm": 0.7734375, "learning_rate": 3.935643120622165e-07, "loss": 3.9648, "step": 26996 }, { "epoch": 8.993087365703339, "grad_norm": 0.72265625, "learning_rate": 3.9330559021787975e-07, "loss": 3.951, "step": 26997 }, { "epoch": 8.993420504705588, "grad_norm": 0.75390625, "learning_rate": 3.9304695115113947e-07, "loss": 3.9407, "step": 26998 }, { "epoch": 8.993753643707837, "grad_norm": 0.7578125, "learning_rate": 3.927883948650074e-07, "loss": 3.9702, "step": 26999 }, { "epoch": 8.994086782710086, "grad_norm": 0.7421875, "learning_rate": 3.9252992136249777e-07, "loss": 4.0815, "step": 27000 }, { "epoch": 8.994419921712334, "grad_norm": 0.7109375, "learning_rate": 3.9227153064661987e-07, "loss": 4.0523, "step": 27001 }, { "epoch": 8.994753060714583, "grad_norm": 0.76171875, "learning_rate": 3.920132227203829e-07, "loss": 4.0034, "step": 27002 }, { "epoch": 8.995086199716832, "grad_norm": 0.76171875, "learning_rate": 3.917549975867954e-07, "loss": 3.979, "step": 27003 }, { "epoch": 8.995419338719081, "grad_norm": 0.76953125, "learning_rate": 3.9149685524886734e-07, "loss": 3.9111, "step": 27004 }, { "epoch": 8.995752477721329, "grad_norm": 0.7890625, "learning_rate": 3.912387957096031e-07, "loss": 3.9662, "step": 27005 }, { "epoch": 8.996085616723578, "grad_norm": 0.765625, "learning_rate": 3.909808189720085e-07, "loss": 4.0788, "step": 27006 }, { "epoch": 8.996418755725827, "grad_norm": 0.75390625, "learning_rate": 3.907229250390904e-07, "loss": 3.8864, "step": 27007 }, { "epoch": 8.996751894728074, "grad_norm": 0.76953125, "learning_rate": 3.904651139138513e-07, "loss": 3.9175, "step": 27008 }, { "epoch": 8.997085033730324, "grad_norm": 0.80859375, "learning_rate": 3.902073855992938e-07, "loss": 3.9223, "step": 27009 }, { "epoch": 8.997418172732573, "grad_norm": 0.7734375, "learning_rate": 3.899497400984206e-07, "loss": 4.0005, "step": 27010 }, { "epoch": 8.997751311734822, "grad_norm": 0.76953125, "learning_rate": 3.8969217741423257e-07, "loss": 3.94, "step": 27011 }, { "epoch": 8.99808445073707, "grad_norm": 0.78515625, "learning_rate": 3.8943469754973064e-07, "loss": 4.0112, "step": 27012 }, { "epoch": 8.998417589739319, "grad_norm": 0.79296875, "learning_rate": 3.891773005079133e-07, "loss": 3.9351, "step": 27013 }, { "epoch": 8.998750728741568, "grad_norm": 0.796875, "learning_rate": 3.889199862917764e-07, "loss": 3.9933, "step": 27014 }, { "epoch": 8.999083867743817, "grad_norm": 0.75390625, "learning_rate": 3.886627549043209e-07, "loss": 3.9687, "step": 27015 }, { "epoch": 8.999417006746064, "grad_norm": 0.75390625, "learning_rate": 3.8840560634854026e-07, "loss": 3.9741, "step": 27016 }, { "epoch": 8.999750145748314, "grad_norm": 0.75, "learning_rate": 3.881485406274313e-07, "loss": 3.9191, "step": 27017 }, { "epoch": 9.0, "grad_norm": 0.8828125, "learning_rate": 3.878915577439865e-07, "loss": 3.943, "step": 27018 }, { "epoch": 9.00033313900225, "grad_norm": 0.765625, "learning_rate": 3.87634657701201e-07, "loss": 4.0002, "step": 27019 }, { "epoch": 9.000666278004497, "grad_norm": 0.77734375, "learning_rate": 3.8737784050206673e-07, "loss": 3.9664, "step": 27020 }, { "epoch": 9.000999417006746, "grad_norm": 0.74609375, "learning_rate": 3.871211061495744e-07, "loss": 4.0161, "step": 27021 }, { "epoch": 9.001332556008995, "grad_norm": 0.7421875, "learning_rate": 3.868644546467143e-07, "loss": 3.9361, "step": 27022 }, { "epoch": 9.001665695011244, "grad_norm": 0.71484375, "learning_rate": 3.8660788599647724e-07, "loss": 3.9498, "step": 27023 }, { "epoch": 9.001998834013492, "grad_norm": 0.80078125, "learning_rate": 3.863514002018501e-07, "loss": 3.9497, "step": 27024 }, { "epoch": 9.00233197301574, "grad_norm": 0.73046875, "learning_rate": 3.8609499726581954e-07, "loss": 4.0023, "step": 27025 }, { "epoch": 9.00266511201799, "grad_norm": 0.8046875, "learning_rate": 3.858386771913758e-07, "loss": 4.0497, "step": 27026 }, { "epoch": 9.002998251020237, "grad_norm": 0.765625, "learning_rate": 3.855824399815014e-07, "loss": 3.9503, "step": 27027 }, { "epoch": 9.003331390022487, "grad_norm": 0.7734375, "learning_rate": 3.853262856391815e-07, "loss": 3.9496, "step": 27028 }, { "epoch": 9.003664529024736, "grad_norm": 0.7578125, "learning_rate": 3.850702141673995e-07, "loss": 4.0099, "step": 27029 }, { "epoch": 9.003997668026985, "grad_norm": 0.76171875, "learning_rate": 3.8481422556913974e-07, "loss": 4.0637, "step": 27030 }, { "epoch": 9.004330807029232, "grad_norm": 0.7578125, "learning_rate": 3.84558319847384e-07, "loss": 3.9856, "step": 27031 }, { "epoch": 9.004663946031481, "grad_norm": 0.76953125, "learning_rate": 3.84302497005109e-07, "loss": 4.0337, "step": 27032 }, { "epoch": 9.00499708503373, "grad_norm": 0.7578125, "learning_rate": 3.8404675704529905e-07, "loss": 3.979, "step": 27033 }, { "epoch": 9.00533022403598, "grad_norm": 0.77734375, "learning_rate": 3.83791099970931e-07, "loss": 3.95, "step": 27034 }, { "epoch": 9.005663363038227, "grad_norm": 0.75, "learning_rate": 3.8353552578498236e-07, "loss": 3.9794, "step": 27035 }, { "epoch": 9.005996502040476, "grad_norm": 0.75390625, "learning_rate": 3.8328003449042995e-07, "loss": 3.9895, "step": 27036 }, { "epoch": 9.006329641042726, "grad_norm": 0.7890625, "learning_rate": 3.8302462609025146e-07, "loss": 3.9582, "step": 27037 }, { "epoch": 9.006662780044973, "grad_norm": 0.765625, "learning_rate": 3.827693005874211e-07, "loss": 4.0153, "step": 27038 }, { "epoch": 9.006995919047222, "grad_norm": 0.8046875, "learning_rate": 3.8251405798491234e-07, "loss": 3.9751, "step": 27039 }, { "epoch": 9.007329058049471, "grad_norm": 0.7578125, "learning_rate": 3.822588982856978e-07, "loss": 3.906, "step": 27040 }, { "epoch": 9.00766219705172, "grad_norm": 0.76953125, "learning_rate": 3.82003821492751e-07, "loss": 4.0488, "step": 27041 }, { "epoch": 9.007995336053968, "grad_norm": 0.76171875, "learning_rate": 3.817488276090411e-07, "loss": 3.931, "step": 27042 }, { "epoch": 9.008328475056217, "grad_norm": 0.765625, "learning_rate": 3.814939166375392e-07, "loss": 4.0027, "step": 27043 }, { "epoch": 9.008661614058466, "grad_norm": 0.73046875, "learning_rate": 3.812390885812153e-07, "loss": 3.9395, "step": 27044 }, { "epoch": 9.008994753060714, "grad_norm": 0.7734375, "learning_rate": 3.809843434430363e-07, "loss": 3.9864, "step": 27045 }, { "epoch": 9.009327892062963, "grad_norm": 0.76171875, "learning_rate": 3.8072968122597055e-07, "loss": 3.9741, "step": 27046 }, { "epoch": 9.009661031065212, "grad_norm": 0.78515625, "learning_rate": 3.804751019329825e-07, "loss": 3.9297, "step": 27047 }, { "epoch": 9.009994170067461, "grad_norm": 0.8046875, "learning_rate": 3.8022060556703956e-07, "loss": 3.9486, "step": 27048 }, { "epoch": 9.010327309069709, "grad_norm": 0.796875, "learning_rate": 3.7996619213110623e-07, "loss": 3.9649, "step": 27049 }, { "epoch": 9.010660448071958, "grad_norm": 0.75390625, "learning_rate": 3.797118616281425e-07, "loss": 3.9576, "step": 27050 }, { "epoch": 9.010993587074207, "grad_norm": 0.79296875, "learning_rate": 3.794576140611136e-07, "loss": 4.011, "step": 27051 }, { "epoch": 9.011326726076456, "grad_norm": 0.74609375, "learning_rate": 3.792034494329813e-07, "loss": 3.9825, "step": 27052 }, { "epoch": 9.011659865078704, "grad_norm": 0.734375, "learning_rate": 3.789493677467049e-07, "loss": 3.9608, "step": 27053 }, { "epoch": 9.011993004080953, "grad_norm": 0.7734375, "learning_rate": 3.786953690052436e-07, "loss": 3.9537, "step": 27054 }, { "epoch": 9.012326143083202, "grad_norm": 0.80078125, "learning_rate": 3.784414532115568e-07, "loss": 4.0212, "step": 27055 }, { "epoch": 9.01265928208545, "grad_norm": 0.7890625, "learning_rate": 3.781876203686022e-07, "loss": 4.0266, "step": 27056 }, { "epoch": 9.012992421087699, "grad_norm": 0.72265625, "learning_rate": 3.7793387047933643e-07, "loss": 3.9764, "step": 27057 }, { "epoch": 9.013325560089948, "grad_norm": 0.7734375, "learning_rate": 3.776802035467139e-07, "loss": 3.9635, "step": 27058 }, { "epoch": 9.013658699092197, "grad_norm": 0.72265625, "learning_rate": 3.774266195736906e-07, "loss": 3.9332, "step": 27059 }, { "epoch": 9.013991838094444, "grad_norm": 0.74609375, "learning_rate": 3.771731185632199e-07, "loss": 3.9415, "step": 27060 }, { "epoch": 9.014324977096694, "grad_norm": 0.7578125, "learning_rate": 3.769197005182537e-07, "loss": 3.9679, "step": 27061 }, { "epoch": 9.014658116098943, "grad_norm": 0.75390625, "learning_rate": 3.7666636544174377e-07, "loss": 3.9886, "step": 27062 }, { "epoch": 9.01499125510119, "grad_norm": 0.74609375, "learning_rate": 3.7641311333664275e-07, "loss": 3.869, "step": 27063 }, { "epoch": 9.01532439410344, "grad_norm": 0.7265625, "learning_rate": 3.761599442058991e-07, "loss": 3.8804, "step": 27064 }, { "epoch": 9.015657533105689, "grad_norm": 0.74609375, "learning_rate": 3.7590685805246217e-07, "loss": 3.9845, "step": 27065 }, { "epoch": 9.015990672107938, "grad_norm": 0.796875, "learning_rate": 3.756538548792779e-07, "loss": 3.8589, "step": 27066 }, { "epoch": 9.016323811110185, "grad_norm": 0.76171875, "learning_rate": 3.7540093468929724e-07, "loss": 3.9592, "step": 27067 }, { "epoch": 9.016656950112434, "grad_norm": 0.78125, "learning_rate": 3.751480974854629e-07, "loss": 3.9376, "step": 27068 }, { "epoch": 9.016990089114683, "grad_norm": 0.76171875, "learning_rate": 3.7489534327072e-07, "loss": 4.0229, "step": 27069 }, { "epoch": 9.017323228116933, "grad_norm": 0.77734375, "learning_rate": 3.746426720480145e-07, "loss": 3.9463, "step": 27070 }, { "epoch": 9.01765636711918, "grad_norm": 0.765625, "learning_rate": 3.743900838202874e-07, "loss": 3.8809, "step": 27071 }, { "epoch": 9.01798950612143, "grad_norm": 0.7421875, "learning_rate": 3.7413757859048296e-07, "loss": 4.0493, "step": 27072 }, { "epoch": 9.018322645123678, "grad_norm": 0.76171875, "learning_rate": 3.7388515636153973e-07, "loss": 3.9278, "step": 27073 }, { "epoch": 9.018655784125926, "grad_norm": 0.7890625, "learning_rate": 3.736328171363995e-07, "loss": 3.9326, "step": 27074 }, { "epoch": 9.018988923128175, "grad_norm": 0.76953125, "learning_rate": 3.733805609180033e-07, "loss": 3.9681, "step": 27075 }, { "epoch": 9.019322062130424, "grad_norm": 0.78125, "learning_rate": 3.731283877092853e-07, "loss": 3.9994, "step": 27076 }, { "epoch": 9.019655201132673, "grad_norm": 0.73828125, "learning_rate": 3.7287629751318497e-07, "loss": 4.0915, "step": 27077 }, { "epoch": 9.01998834013492, "grad_norm": 0.7734375, "learning_rate": 3.7262429033263906e-07, "loss": 3.9857, "step": 27078 }, { "epoch": 9.02032147913717, "grad_norm": 0.7578125, "learning_rate": 3.7237236617058275e-07, "loss": 3.9468, "step": 27079 }, { "epoch": 9.02065461813942, "grad_norm": 0.76953125, "learning_rate": 3.7212052502994783e-07, "loss": 4.0054, "step": 27080 }, { "epoch": 9.020987757141667, "grad_norm": 0.7421875, "learning_rate": 3.718687669136719e-07, "loss": 4.0036, "step": 27081 }, { "epoch": 9.021320896143916, "grad_norm": 0.77734375, "learning_rate": 3.7161709182468443e-07, "loss": 4.0091, "step": 27082 }, { "epoch": 9.021654035146165, "grad_norm": 0.765625, "learning_rate": 3.7136549976591885e-07, "loss": 3.9999, "step": 27083 }, { "epoch": 9.021987174148414, "grad_norm": 0.76171875, "learning_rate": 3.711139907403036e-07, "loss": 3.8982, "step": 27084 }, { "epoch": 9.022320313150662, "grad_norm": 0.74609375, "learning_rate": 3.708625647507696e-07, "loss": 3.9644, "step": 27085 }, { "epoch": 9.02265345215291, "grad_norm": 0.78515625, "learning_rate": 3.706112218002447e-07, "loss": 4.0442, "step": 27086 }, { "epoch": 9.02298659115516, "grad_norm": 0.71875, "learning_rate": 3.7035996189165645e-07, "loss": 3.9706, "step": 27087 }, { "epoch": 9.023319730157407, "grad_norm": 0.75, "learning_rate": 3.701087850279325e-07, "loss": 3.9791, "step": 27088 }, { "epoch": 9.023652869159656, "grad_norm": 0.77734375, "learning_rate": 3.6985769121199805e-07, "loss": 4.0327, "step": 27089 }, { "epoch": 9.023986008161906, "grad_norm": 0.75, "learning_rate": 3.696066804467774e-07, "loss": 3.9748, "step": 27090 }, { "epoch": 9.024319147164155, "grad_norm": 0.7578125, "learning_rate": 3.69355752735194e-07, "loss": 3.9628, "step": 27091 }, { "epoch": 9.024652286166402, "grad_norm": 0.734375, "learning_rate": 3.6910490808017146e-07, "loss": 3.8953, "step": 27092 }, { "epoch": 9.024985425168651, "grad_norm": 0.76171875, "learning_rate": 3.6885414648463316e-07, "loss": 4.0776, "step": 27093 }, { "epoch": 9.0253185641709, "grad_norm": 0.75, "learning_rate": 3.6860346795149517e-07, "loss": 3.9328, "step": 27094 }, { "epoch": 9.02565170317315, "grad_norm": 0.76171875, "learning_rate": 3.68352872483681e-07, "loss": 4.0032, "step": 27095 }, { "epoch": 9.025984842175397, "grad_norm": 0.76953125, "learning_rate": 3.681023600841099e-07, "loss": 3.9361, "step": 27096 }, { "epoch": 9.026317981177646, "grad_norm": 0.76171875, "learning_rate": 3.6785193075569793e-07, "loss": 3.9869, "step": 27097 }, { "epoch": 9.026651120179896, "grad_norm": 0.76171875, "learning_rate": 3.676015845013611e-07, "loss": 3.9425, "step": 27098 }, { "epoch": 9.026984259182143, "grad_norm": 0.79296875, "learning_rate": 3.6735132132401873e-07, "loss": 3.9715, "step": 27099 }, { "epoch": 9.027317398184392, "grad_norm": 0.765625, "learning_rate": 3.671011412265843e-07, "loss": 4.0169, "step": 27100 }, { "epoch": 9.027650537186641, "grad_norm": 0.8203125, "learning_rate": 3.668510442119721e-07, "loss": 3.9798, "step": 27101 }, { "epoch": 9.02798367618889, "grad_norm": 0.7578125, "learning_rate": 3.666010302830941e-07, "loss": 4.0141, "step": 27102 }, { "epoch": 9.028316815191138, "grad_norm": 0.76171875, "learning_rate": 3.663510994428637e-07, "loss": 3.9113, "step": 27103 }, { "epoch": 9.028649954193387, "grad_norm": 0.75390625, "learning_rate": 3.661012516941911e-07, "loss": 3.9314, "step": 27104 }, { "epoch": 9.028983093195636, "grad_norm": 0.77734375, "learning_rate": 3.658514870399873e-07, "loss": 3.9219, "step": 27105 }, { "epoch": 9.029316232197884, "grad_norm": 0.71484375, "learning_rate": 3.6560180548316e-07, "loss": 3.9352, "step": 27106 }, { "epoch": 9.029649371200133, "grad_norm": 0.71875, "learning_rate": 3.6535220702661937e-07, "loss": 4.0879, "step": 27107 }, { "epoch": 9.029982510202382, "grad_norm": 0.79296875, "learning_rate": 3.651026916732722e-07, "loss": 3.9707, "step": 27108 }, { "epoch": 9.030315649204631, "grad_norm": 0.71484375, "learning_rate": 3.6485325942602454e-07, "loss": 3.9891, "step": 27109 }, { "epoch": 9.030648788206879, "grad_norm": 0.75, "learning_rate": 3.6460391028778073e-07, "loss": 3.9497, "step": 27110 }, { "epoch": 9.030981927209128, "grad_norm": 0.71484375, "learning_rate": 3.643546442614476e-07, "loss": 3.9999, "step": 27111 }, { "epoch": 9.031315066211377, "grad_norm": 0.796875, "learning_rate": 3.641054613499262e-07, "loss": 3.9494, "step": 27112 }, { "epoch": 9.031648205213626, "grad_norm": 0.78125, "learning_rate": 3.638563615561191e-07, "loss": 3.9456, "step": 27113 }, { "epoch": 9.031981344215874, "grad_norm": 0.7421875, "learning_rate": 3.6360734488292995e-07, "loss": 4.03, "step": 27114 }, { "epoch": 9.032314483218123, "grad_norm": 0.77734375, "learning_rate": 3.633584113332572e-07, "loss": 3.9519, "step": 27115 }, { "epoch": 9.032647622220372, "grad_norm": 0.73046875, "learning_rate": 3.63109560910001e-07, "loss": 3.9625, "step": 27116 }, { "epoch": 9.03298076122262, "grad_norm": 0.75390625, "learning_rate": 3.628607936160591e-07, "loss": 3.8953, "step": 27117 }, { "epoch": 9.033313900224869, "grad_norm": 0.75390625, "learning_rate": 3.626121094543308e-07, "loss": 3.8757, "step": 27118 }, { "epoch": 9.033647039227118, "grad_norm": 0.78515625, "learning_rate": 3.6236350842771214e-07, "loss": 4.0266, "step": 27119 }, { "epoch": 9.033980178229367, "grad_norm": 0.7734375, "learning_rate": 3.6211499053909915e-07, "loss": 3.9964, "step": 27120 }, { "epoch": 9.034313317231614, "grad_norm": 0.8203125, "learning_rate": 3.6186655579138446e-07, "loss": 3.9851, "step": 27121 }, { "epoch": 9.034646456233864, "grad_norm": 0.77734375, "learning_rate": 3.616182041874641e-07, "loss": 4.0128, "step": 27122 }, { "epoch": 9.034979595236113, "grad_norm": 0.7890625, "learning_rate": 3.6136993573022995e-07, "loss": 3.9319, "step": 27123 }, { "epoch": 9.03531273423836, "grad_norm": 0.76171875, "learning_rate": 3.6112175042257223e-07, "loss": 3.9604, "step": 27124 }, { "epoch": 9.03564587324061, "grad_norm": 0.74609375, "learning_rate": 3.6087364826738525e-07, "loss": 3.9903, "step": 27125 }, { "epoch": 9.035979012242858, "grad_norm": 0.78125, "learning_rate": 3.6062562926755663e-07, "loss": 3.9743, "step": 27126 }, { "epoch": 9.036312151245108, "grad_norm": 0.7265625, "learning_rate": 3.6037769342597504e-07, "loss": 4.006, "step": 27127 }, { "epoch": 9.036645290247355, "grad_norm": 0.75390625, "learning_rate": 3.6012984074552805e-07, "loss": 3.956, "step": 27128 }, { "epoch": 9.036978429249604, "grad_norm": 0.80078125, "learning_rate": 3.5988207122910596e-07, "loss": 4.0104, "step": 27129 }, { "epoch": 9.037311568251853, "grad_norm": 0.76953125, "learning_rate": 3.596343848795905e-07, "loss": 3.9875, "step": 27130 }, { "epoch": 9.037644707254103, "grad_norm": 0.7265625, "learning_rate": 3.593867816998686e-07, "loss": 4.007, "step": 27131 }, { "epoch": 9.03797784625635, "grad_norm": 0.76171875, "learning_rate": 3.591392616928238e-07, "loss": 4.0084, "step": 27132 }, { "epoch": 9.0383109852586, "grad_norm": 0.76171875, "learning_rate": 3.588918248613404e-07, "loss": 3.942, "step": 27133 }, { "epoch": 9.038644124260848, "grad_norm": 0.78125, "learning_rate": 3.5864447120829954e-07, "loss": 4.0837, "step": 27134 }, { "epoch": 9.038977263263096, "grad_norm": 0.76171875, "learning_rate": 3.5839720073658127e-07, "loss": 4.03, "step": 27135 }, { "epoch": 9.039310402265345, "grad_norm": 0.76953125, "learning_rate": 3.581500134490684e-07, "loss": 4.0384, "step": 27136 }, { "epoch": 9.039643541267594, "grad_norm": 0.75390625, "learning_rate": 3.579029093486377e-07, "loss": 4.0118, "step": 27137 }, { "epoch": 9.039976680269843, "grad_norm": 0.77734375, "learning_rate": 3.576558884381695e-07, "loss": 4.0303, "step": 27138 }, { "epoch": 9.04030981927209, "grad_norm": 0.78125, "learning_rate": 3.5740895072053887e-07, "loss": 3.9382, "step": 27139 }, { "epoch": 9.04064295827434, "grad_norm": 0.8125, "learning_rate": 3.5716209619862354e-07, "loss": 3.9751, "step": 27140 }, { "epoch": 9.04097609727659, "grad_norm": 0.73828125, "learning_rate": 3.569153248752988e-07, "loss": 3.889, "step": 27141 }, { "epoch": 9.041309236278837, "grad_norm": 0.7578125, "learning_rate": 3.566686367534372e-07, "loss": 3.9282, "step": 27142 }, { "epoch": 9.041642375281086, "grad_norm": 0.83203125, "learning_rate": 3.5642203183591487e-07, "loss": 3.9425, "step": 27143 }, { "epoch": 9.041975514283335, "grad_norm": 0.7890625, "learning_rate": 3.5617551012560283e-07, "loss": 3.9798, "step": 27144 }, { "epoch": 9.042308653285584, "grad_norm": 0.75, "learning_rate": 3.5592907162537216e-07, "loss": 3.9657, "step": 27145 }, { "epoch": 9.042641792287831, "grad_norm": 0.79296875, "learning_rate": 3.5568271633809296e-07, "loss": 3.9769, "step": 27146 }, { "epoch": 9.04297493129008, "grad_norm": 0.73046875, "learning_rate": 3.55436444266638e-07, "loss": 4.0473, "step": 27147 }, { "epoch": 9.04330807029233, "grad_norm": 0.73828125, "learning_rate": 3.551902554138717e-07, "loss": 4.0198, "step": 27148 }, { "epoch": 9.043641209294579, "grad_norm": 0.78515625, "learning_rate": 3.549441497826633e-07, "loss": 3.9581, "step": 27149 }, { "epoch": 9.043974348296826, "grad_norm": 0.75390625, "learning_rate": 3.5469812737587897e-07, "loss": 3.9537, "step": 27150 }, { "epoch": 9.044307487299076, "grad_norm": 0.7265625, "learning_rate": 3.5445218819638554e-07, "loss": 4.0485, "step": 27151 }, { "epoch": 9.044640626301325, "grad_norm": 0.7265625, "learning_rate": 3.542063322470465e-07, "loss": 3.9742, "step": 27152 }, { "epoch": 9.044973765303572, "grad_norm": 0.77734375, "learning_rate": 3.539605595307263e-07, "loss": 3.9936, "step": 27153 }, { "epoch": 9.045306904305821, "grad_norm": 0.75390625, "learning_rate": 3.537148700502868e-07, "loss": 3.9387, "step": 27154 }, { "epoch": 9.04564004330807, "grad_norm": 0.73046875, "learning_rate": 3.534692638085907e-07, "loss": 4.0487, "step": 27155 }, { "epoch": 9.04597318231032, "grad_norm": 0.7578125, "learning_rate": 3.5322374080849903e-07, "loss": 3.9794, "step": 27156 }, { "epoch": 9.046306321312567, "grad_norm": 0.78125, "learning_rate": 3.5297830105286947e-07, "loss": 4.0138, "step": 27157 }, { "epoch": 9.046639460314816, "grad_norm": 0.75, "learning_rate": 3.5273294454456235e-07, "loss": 3.9869, "step": 27158 }, { "epoch": 9.046972599317066, "grad_norm": 0.76171875, "learning_rate": 3.524876712864361e-07, "loss": 3.9834, "step": 27159 }, { "epoch": 9.047305738319313, "grad_norm": 0.79296875, "learning_rate": 3.5224248128134685e-07, "loss": 3.956, "step": 27160 }, { "epoch": 9.047638877321562, "grad_norm": 0.75, "learning_rate": 3.519973745321506e-07, "loss": 4.0013, "step": 27161 }, { "epoch": 9.047972016323811, "grad_norm": 0.79296875, "learning_rate": 3.5175235104170257e-07, "loss": 3.9241, "step": 27162 }, { "epoch": 9.04830515532606, "grad_norm": 0.74609375, "learning_rate": 3.515074108128563e-07, "loss": 3.9451, "step": 27163 }, { "epoch": 9.048638294328308, "grad_norm": 0.78515625, "learning_rate": 3.512625538484654e-07, "loss": 4.0389, "step": 27164 }, { "epoch": 9.048971433330557, "grad_norm": 0.75, "learning_rate": 3.510177801513817e-07, "loss": 3.9879, "step": 27165 }, { "epoch": 9.049304572332806, "grad_norm": 0.74609375, "learning_rate": 3.5077308972445626e-07, "loss": 3.9903, "step": 27166 }, { "epoch": 9.049637711335054, "grad_norm": 0.75390625, "learning_rate": 3.5052848257053847e-07, "loss": 3.9464, "step": 27167 }, { "epoch": 9.049970850337303, "grad_norm": 0.78125, "learning_rate": 3.5028395869247775e-07, "loss": 3.9758, "step": 27168 }, { "epoch": 9.050303989339552, "grad_norm": 0.71875, "learning_rate": 3.5003951809312347e-07, "loss": 3.978, "step": 27169 }, { "epoch": 9.050637128341801, "grad_norm": 0.73046875, "learning_rate": 3.4979516077532164e-07, "loss": 3.9981, "step": 27170 }, { "epoch": 9.050970267344049, "grad_norm": 0.76953125, "learning_rate": 3.4955088674192006e-07, "loss": 4.0173, "step": 27171 }, { "epoch": 9.051303406346298, "grad_norm": 0.76171875, "learning_rate": 3.493066959957605e-07, "loss": 3.9055, "step": 27172 }, { "epoch": 9.051636545348547, "grad_norm": 0.76171875, "learning_rate": 3.4906258853969085e-07, "loss": 4.081, "step": 27173 }, { "epoch": 9.051969684350796, "grad_norm": 0.7109375, "learning_rate": 3.488185643765546e-07, "loss": 4.0263, "step": 27174 }, { "epoch": 9.052302823353044, "grad_norm": 0.7578125, "learning_rate": 3.4857462350919025e-07, "loss": 4.0574, "step": 27175 }, { "epoch": 9.052635962355293, "grad_norm": 0.734375, "learning_rate": 3.4833076594044307e-07, "loss": 3.9936, "step": 27176 }, { "epoch": 9.052969101357542, "grad_norm": 0.765625, "learning_rate": 3.480869916731516e-07, "loss": 4.0353, "step": 27177 }, { "epoch": 9.05330224035979, "grad_norm": 0.7578125, "learning_rate": 3.478433007101553e-07, "loss": 3.9658, "step": 27178 }, { "epoch": 9.053635379362039, "grad_norm": 0.80078125, "learning_rate": 3.4759969305429266e-07, "loss": 3.9006, "step": 27179 }, { "epoch": 9.053968518364288, "grad_norm": 0.7734375, "learning_rate": 3.473561687084023e-07, "loss": 4.0105, "step": 27180 }, { "epoch": 9.054301657366537, "grad_norm": 0.76953125, "learning_rate": 3.4711272767532024e-07, "loss": 4.0815, "step": 27181 }, { "epoch": 9.054634796368784, "grad_norm": 0.7890625, "learning_rate": 3.4686936995788094e-07, "loss": 3.8944, "step": 27182 }, { "epoch": 9.054967935371034, "grad_norm": 0.8046875, "learning_rate": 3.466260955589204e-07, "loss": 3.9852, "step": 27183 }, { "epoch": 9.055301074373283, "grad_norm": 0.71484375, "learning_rate": 3.4638290448127216e-07, "loss": 3.9789, "step": 27184 }, { "epoch": 9.05563421337553, "grad_norm": 0.75, "learning_rate": 3.4613979672776743e-07, "loss": 3.9609, "step": 27185 }, { "epoch": 9.05596735237778, "grad_norm": 0.80078125, "learning_rate": 3.458967723012388e-07, "loss": 3.9446, "step": 27186 }, { "epoch": 9.056300491380028, "grad_norm": 0.7890625, "learning_rate": 3.456538312045174e-07, "loss": 3.9694, "step": 27187 }, { "epoch": 9.056633630382278, "grad_norm": 0.765625, "learning_rate": 3.4541097344043257e-07, "loss": 3.9696, "step": 27188 }, { "epoch": 9.056966769384525, "grad_norm": 0.75390625, "learning_rate": 3.4516819901181303e-07, "loss": 4.007, "step": 27189 }, { "epoch": 9.057299908386774, "grad_norm": 0.76953125, "learning_rate": 3.449255079214855e-07, "loss": 4.0045, "step": 27190 }, { "epoch": 9.057633047389023, "grad_norm": 0.80078125, "learning_rate": 3.4468290017227954e-07, "loss": 4.022, "step": 27191 }, { "epoch": 9.057966186391273, "grad_norm": 0.78125, "learning_rate": 3.444403757670195e-07, "loss": 3.9705, "step": 27192 }, { "epoch": 9.05829932539352, "grad_norm": 0.7578125, "learning_rate": 3.441979347085297e-07, "loss": 3.9185, "step": 27193 }, { "epoch": 9.05863246439577, "grad_norm": 0.74609375, "learning_rate": 3.439555769996339e-07, "loss": 3.997, "step": 27194 }, { "epoch": 9.058965603398018, "grad_norm": 0.7265625, "learning_rate": 3.437133026431563e-07, "loss": 3.9711, "step": 27195 }, { "epoch": 9.059298742400266, "grad_norm": 0.7578125, "learning_rate": 3.4347111164191815e-07, "loss": 3.9802, "step": 27196 }, { "epoch": 9.059631881402515, "grad_norm": 0.73046875, "learning_rate": 3.4322900399874043e-07, "loss": 4.0294, "step": 27197 }, { "epoch": 9.059965020404764, "grad_norm": 0.78125, "learning_rate": 3.4298697971644257e-07, "loss": 3.9123, "step": 27198 }, { "epoch": 9.060298159407013, "grad_norm": 0.7890625, "learning_rate": 3.427450387978448e-07, "loss": 3.9673, "step": 27199 }, { "epoch": 9.06063129840926, "grad_norm": 0.734375, "learning_rate": 3.4250318124576576e-07, "loss": 3.9797, "step": 27200 }, { "epoch": 9.06096443741151, "grad_norm": 0.8125, "learning_rate": 3.422614070630206e-07, "loss": 4.0255, "step": 27201 }, { "epoch": 9.061297576413759, "grad_norm": 0.7890625, "learning_rate": 3.4201971625242716e-07, "loss": 3.9082, "step": 27202 }, { "epoch": 9.061630715416007, "grad_norm": 0.734375, "learning_rate": 3.41778108816799e-07, "loss": 3.9903, "step": 27203 }, { "epoch": 9.061963854418256, "grad_norm": 0.76953125, "learning_rate": 3.415365847589522e-07, "loss": 3.9934, "step": 27204 }, { "epoch": 9.062296993420505, "grad_norm": 0.76171875, "learning_rate": 3.4129514408169694e-07, "loss": 3.9215, "step": 27205 }, { "epoch": 9.062630132422754, "grad_norm": 0.78125, "learning_rate": 3.410537867878494e-07, "loss": 4.0505, "step": 27206 }, { "epoch": 9.062963271425001, "grad_norm": 0.765625, "learning_rate": 3.408125128802181e-07, "loss": 3.979, "step": 27207 }, { "epoch": 9.06329641042725, "grad_norm": 0.703125, "learning_rate": 3.40571322361615e-07, "loss": 4.0992, "step": 27208 }, { "epoch": 9.0636295494295, "grad_norm": 0.734375, "learning_rate": 3.403302152348478e-07, "loss": 3.9793, "step": 27209 }, { "epoch": 9.063962688431749, "grad_norm": 0.75390625, "learning_rate": 3.400891915027268e-07, "loss": 4.0245, "step": 27210 }, { "epoch": 9.064295827433996, "grad_norm": 0.703125, "learning_rate": 3.3984825116805814e-07, "loss": 4.0526, "step": 27211 }, { "epoch": 9.064628966436246, "grad_norm": 0.734375, "learning_rate": 3.3960739423364777e-07, "loss": 4.0427, "step": 27212 }, { "epoch": 9.064962105438495, "grad_norm": 0.80859375, "learning_rate": 3.393666207023019e-07, "loss": 4.0042, "step": 27213 }, { "epoch": 9.065295244440742, "grad_norm": 0.78515625, "learning_rate": 3.3912593057682573e-07, "loss": 3.9269, "step": 27214 }, { "epoch": 9.065628383442991, "grad_norm": 0.84765625, "learning_rate": 3.38885323860022e-07, "loss": 3.8924, "step": 27215 }, { "epoch": 9.06596152244524, "grad_norm": 0.7578125, "learning_rate": 3.386448005546919e-07, "loss": 4.0174, "step": 27216 }, { "epoch": 9.06629466144749, "grad_norm": 0.796875, "learning_rate": 3.384043606636397e-07, "loss": 3.9315, "step": 27217 }, { "epoch": 9.066627800449737, "grad_norm": 0.8203125, "learning_rate": 3.38164004189665e-07, "loss": 3.9949, "step": 27218 }, { "epoch": 9.066960939451986, "grad_norm": 0.79296875, "learning_rate": 3.379237311355662e-07, "loss": 3.9927, "step": 27219 }, { "epoch": 9.067294078454236, "grad_norm": 0.75390625, "learning_rate": 3.3768354150414376e-07, "loss": 4.0111, "step": 27220 }, { "epoch": 9.067627217456483, "grad_norm": 0.77734375, "learning_rate": 3.374434352981945e-07, "loss": 4.0169, "step": 27221 }, { "epoch": 9.067960356458732, "grad_norm": 0.74609375, "learning_rate": 3.372034125205145e-07, "loss": 3.9939, "step": 27222 }, { "epoch": 9.068293495460981, "grad_norm": 0.765625, "learning_rate": 3.369634731738999e-07, "loss": 4.0521, "step": 27223 }, { "epoch": 9.06862663446323, "grad_norm": 0.78125, "learning_rate": 3.36723617261146e-07, "loss": 3.973, "step": 27224 }, { "epoch": 9.068959773465478, "grad_norm": 0.7578125, "learning_rate": 3.364838447850471e-07, "loss": 3.9907, "step": 27225 }, { "epoch": 9.069292912467727, "grad_norm": 0.7890625, "learning_rate": 3.3624415574839446e-07, "loss": 3.9556, "step": 27226 }, { "epoch": 9.069626051469976, "grad_norm": 0.765625, "learning_rate": 3.360045501539807e-07, "loss": 3.9914, "step": 27227 }, { "epoch": 9.069959190472225, "grad_norm": 0.71875, "learning_rate": 3.357650280045979e-07, "loss": 3.9679, "step": 27228 }, { "epoch": 9.070292329474473, "grad_norm": 0.76171875, "learning_rate": 3.355255893030346e-07, "loss": 4.0181, "step": 27229 }, { "epoch": 9.070625468476722, "grad_norm": 0.75390625, "learning_rate": 3.3528623405207763e-07, "loss": 3.996, "step": 27230 }, { "epoch": 9.070958607478971, "grad_norm": 0.796875, "learning_rate": 3.350469622545199e-07, "loss": 3.9296, "step": 27231 }, { "epoch": 9.071291746481219, "grad_norm": 0.75390625, "learning_rate": 3.3480777391314497e-07, "loss": 4.0277, "step": 27232 }, { "epoch": 9.071624885483468, "grad_norm": 0.7578125, "learning_rate": 3.345686690307398e-07, "loss": 3.984, "step": 27233 }, { "epoch": 9.071958024485717, "grad_norm": 0.734375, "learning_rate": 3.343296476100879e-07, "loss": 3.9953, "step": 27234 }, { "epoch": 9.072291163487966, "grad_norm": 0.73828125, "learning_rate": 3.340907096539764e-07, "loss": 3.989, "step": 27235 }, { "epoch": 9.072624302490214, "grad_norm": 0.7734375, "learning_rate": 3.338518551651862e-07, "loss": 4.0552, "step": 27236 }, { "epoch": 9.072957441492463, "grad_norm": 0.75, "learning_rate": 3.336130841465018e-07, "loss": 3.9357, "step": 27237 }, { "epoch": 9.073290580494712, "grad_norm": 0.76953125, "learning_rate": 3.3337439660070024e-07, "loss": 3.9743, "step": 27238 }, { "epoch": 9.07362371949696, "grad_norm": 0.75, "learning_rate": 3.3313579253056504e-07, "loss": 4.005, "step": 27239 }, { "epoch": 9.073956858499209, "grad_norm": 0.7734375, "learning_rate": 3.32897271938874e-07, "loss": 4.0465, "step": 27240 }, { "epoch": 9.074289997501458, "grad_norm": 0.74609375, "learning_rate": 3.3265883482840655e-07, "loss": 4.0202, "step": 27241 }, { "epoch": 9.074623136503707, "grad_norm": 0.765625, "learning_rate": 3.3242048120193795e-07, "loss": 3.9428, "step": 27242 }, { "epoch": 9.074956275505954, "grad_norm": 0.78515625, "learning_rate": 3.321822110622469e-07, "loss": 4.0032, "step": 27243 }, { "epoch": 9.075289414508203, "grad_norm": 0.76953125, "learning_rate": 3.3194402441210777e-07, "loss": 3.9555, "step": 27244 }, { "epoch": 9.075622553510453, "grad_norm": 0.78515625, "learning_rate": 3.317059212542942e-07, "loss": 4.007, "step": 27245 }, { "epoch": 9.0759556925127, "grad_norm": 0.78515625, "learning_rate": 3.3146790159158057e-07, "loss": 3.9855, "step": 27246 }, { "epoch": 9.07628883151495, "grad_norm": 0.82421875, "learning_rate": 3.3122996542673897e-07, "loss": 4.0373, "step": 27247 }, { "epoch": 9.076621970517198, "grad_norm": 0.74609375, "learning_rate": 3.3099211276254036e-07, "loss": 4.0422, "step": 27248 }, { "epoch": 9.076955109519448, "grad_norm": 0.76171875, "learning_rate": 3.3075434360175433e-07, "loss": 4.0069, "step": 27249 }, { "epoch": 9.077288248521695, "grad_norm": 0.75390625, "learning_rate": 3.305166579471536e-07, "loss": 3.9349, "step": 27250 }, { "epoch": 9.077621387523944, "grad_norm": 0.74609375, "learning_rate": 3.3027905580150427e-07, "loss": 3.9322, "step": 27251 }, { "epoch": 9.077954526526193, "grad_norm": 0.76171875, "learning_rate": 3.300415371675741e-07, "loss": 4.0318, "step": 27252 }, { "epoch": 9.078287665528443, "grad_norm": 0.74609375, "learning_rate": 3.298041020481293e-07, "loss": 3.9928, "step": 27253 }, { "epoch": 9.07862080453069, "grad_norm": 0.76953125, "learning_rate": 3.295667504459368e-07, "loss": 3.9506, "step": 27254 }, { "epoch": 9.07895394353294, "grad_norm": 0.765625, "learning_rate": 3.2932948236376187e-07, "loss": 3.9557, "step": 27255 }, { "epoch": 9.079287082535188, "grad_norm": 0.75, "learning_rate": 3.2909229780436394e-07, "loss": 3.9219, "step": 27256 }, { "epoch": 9.079620221537436, "grad_norm": 0.81640625, "learning_rate": 3.2885519677051086e-07, "loss": 3.9834, "step": 27257 }, { "epoch": 9.079953360539685, "grad_norm": 0.765625, "learning_rate": 3.2861817926496117e-07, "loss": 3.9016, "step": 27258 }, { "epoch": 9.080286499541934, "grad_norm": 0.76953125, "learning_rate": 3.283812452904769e-07, "loss": 3.9917, "step": 27259 }, { "epoch": 9.080619638544183, "grad_norm": 0.74609375, "learning_rate": 3.2814439484981585e-07, "loss": 3.9831, "step": 27260 }, { "epoch": 9.08095277754643, "grad_norm": 0.76171875, "learning_rate": 3.279076279457399e-07, "loss": 4.0303, "step": 27261 }, { "epoch": 9.08128591654868, "grad_norm": 0.7421875, "learning_rate": 3.2767094458100526e-07, "loss": 3.9562, "step": 27262 }, { "epoch": 9.081619055550929, "grad_norm": 0.76953125, "learning_rate": 3.2743434475836887e-07, "loss": 3.9611, "step": 27263 }, { "epoch": 9.081952194553176, "grad_norm": 0.796875, "learning_rate": 3.2719782848058683e-07, "loss": 4.0082, "step": 27264 }, { "epoch": 9.082285333555426, "grad_norm": 0.7578125, "learning_rate": 3.2696139575041363e-07, "loss": 3.9128, "step": 27265 }, { "epoch": 9.082618472557675, "grad_norm": 0.71875, "learning_rate": 3.267250465706037e-07, "loss": 3.9803, "step": 27266 }, { "epoch": 9.082951611559924, "grad_norm": 0.80859375, "learning_rate": 3.2648878094390824e-07, "loss": 3.8966, "step": 27267 }, { "epoch": 9.083284750562171, "grad_norm": 0.734375, "learning_rate": 3.2625259887308165e-07, "loss": 3.9998, "step": 27268 }, { "epoch": 9.08361788956442, "grad_norm": 0.7890625, "learning_rate": 3.260165003608742e-07, "loss": 3.9853, "step": 27269 }, { "epoch": 9.08395102856667, "grad_norm": 0.76171875, "learning_rate": 3.257804854100355e-07, "loss": 3.9935, "step": 27270 }, { "epoch": 9.084284167568919, "grad_norm": 0.73046875, "learning_rate": 3.25544554023314e-07, "loss": 3.9855, "step": 27271 }, { "epoch": 9.084617306571166, "grad_norm": 0.76953125, "learning_rate": 3.2530870620346013e-07, "loss": 3.9938, "step": 27272 }, { "epoch": 9.084950445573416, "grad_norm": 0.76953125, "learning_rate": 3.2507294195322003e-07, "loss": 3.9708, "step": 27273 }, { "epoch": 9.085283584575665, "grad_norm": 0.796875, "learning_rate": 3.2483726127533643e-07, "loss": 4.0016, "step": 27274 }, { "epoch": 9.085616723577912, "grad_norm": 0.77734375, "learning_rate": 3.2460166417255965e-07, "loss": 3.9149, "step": 27275 }, { "epoch": 9.085949862580161, "grad_norm": 0.7890625, "learning_rate": 3.2436615064763085e-07, "loss": 3.9691, "step": 27276 }, { "epoch": 9.08628300158241, "grad_norm": 0.7734375, "learning_rate": 3.241307207032945e-07, "loss": 3.9602, "step": 27277 }, { "epoch": 9.08661614058466, "grad_norm": 0.7890625, "learning_rate": 3.238953743422909e-07, "loss": 3.9941, "step": 27278 }, { "epoch": 9.086949279586907, "grad_norm": 0.73828125, "learning_rate": 3.2366011156736374e-07, "loss": 3.8932, "step": 27279 }, { "epoch": 9.087282418589156, "grad_norm": 0.8046875, "learning_rate": 3.234249323812524e-07, "loss": 4.0073, "step": 27280 }, { "epoch": 9.087615557591405, "grad_norm": 0.7578125, "learning_rate": 3.2318983678669647e-07, "loss": 4.0411, "step": 27281 }, { "epoch": 9.087948696593653, "grad_norm": 0.78515625, "learning_rate": 3.229548247864336e-07, "loss": 3.9804, "step": 27282 }, { "epoch": 9.088281835595902, "grad_norm": 0.76953125, "learning_rate": 3.2271989638320174e-07, "loss": 3.9173, "step": 27283 }, { "epoch": 9.088614974598151, "grad_norm": 0.73046875, "learning_rate": 3.22485051579737e-07, "loss": 3.9835, "step": 27284 }, { "epoch": 9.0889481136004, "grad_norm": 0.76953125, "learning_rate": 3.222502903787755e-07, "loss": 3.8923, "step": 27285 }, { "epoch": 9.089281252602648, "grad_norm": 0.82421875, "learning_rate": 3.220156127830501e-07, "loss": 3.9562, "step": 27286 }, { "epoch": 9.089614391604897, "grad_norm": 0.734375, "learning_rate": 3.21781018795296e-07, "loss": 4.0012, "step": 27287 }, { "epoch": 9.089947530607146, "grad_norm": 0.8125, "learning_rate": 3.215465084182445e-07, "loss": 3.9872, "step": 27288 }, { "epoch": 9.090280669609395, "grad_norm": 0.7578125, "learning_rate": 3.2131208165462834e-07, "loss": 4.0008, "step": 27289 }, { "epoch": 9.090613808611643, "grad_norm": 0.77734375, "learning_rate": 3.2107773850717613e-07, "loss": 4.0561, "step": 27290 }, { "epoch": 9.090946947613892, "grad_norm": 0.76171875, "learning_rate": 3.2084347897862076e-07, "loss": 3.949, "step": 27291 }, { "epoch": 9.091280086616141, "grad_norm": 0.765625, "learning_rate": 3.2060930307168834e-07, "loss": 3.9794, "step": 27292 }, { "epoch": 9.091613225618389, "grad_norm": 0.78515625, "learning_rate": 3.2037521078910505e-07, "loss": 3.9207, "step": 27293 }, { "epoch": 9.091946364620638, "grad_norm": 0.7578125, "learning_rate": 3.2014120213360116e-07, "loss": 3.9485, "step": 27294 }, { "epoch": 9.092279503622887, "grad_norm": 0.76171875, "learning_rate": 3.199072771079012e-07, "loss": 4.0022, "step": 27295 }, { "epoch": 9.092612642625136, "grad_norm": 0.75, "learning_rate": 3.196734357147288e-07, "loss": 3.9294, "step": 27296 }, { "epoch": 9.092945781627384, "grad_norm": 0.75390625, "learning_rate": 3.194396779568068e-07, "loss": 3.9837, "step": 27297 }, { "epoch": 9.093278920629633, "grad_norm": 0.765625, "learning_rate": 3.192060038368613e-07, "loss": 3.9862, "step": 27298 }, { "epoch": 9.093612059631882, "grad_norm": 0.765625, "learning_rate": 3.189724133576119e-07, "loss": 3.968, "step": 27299 }, { "epoch": 9.09394519863413, "grad_norm": 0.76953125, "learning_rate": 3.1873890652177966e-07, "loss": 4.0313, "step": 27300 }, { "epoch": 9.094278337636378, "grad_norm": 0.76953125, "learning_rate": 3.1850548333208497e-07, "loss": 3.9727, "step": 27301 }, { "epoch": 9.094611476638628, "grad_norm": 0.75390625, "learning_rate": 3.182721437912464e-07, "loss": 3.9735, "step": 27302 }, { "epoch": 9.094944615640877, "grad_norm": 0.78125, "learning_rate": 3.180388879019819e-07, "loss": 3.9748, "step": 27303 }, { "epoch": 9.095277754643124, "grad_norm": 0.76171875, "learning_rate": 3.1780571566700666e-07, "loss": 4.0753, "step": 27304 }, { "epoch": 9.095610893645373, "grad_norm": 0.76171875, "learning_rate": 3.1757262708903947e-07, "loss": 3.9554, "step": 27305 }, { "epoch": 9.095944032647623, "grad_norm": 0.75390625, "learning_rate": 3.173396221707939e-07, "loss": 3.9761, "step": 27306 }, { "epoch": 9.09627717164987, "grad_norm": 0.7578125, "learning_rate": 3.171067009149844e-07, "loss": 3.949, "step": 27307 }, { "epoch": 9.09661031065212, "grad_norm": 0.79296875, "learning_rate": 3.1687386332432234e-07, "loss": 4.0223, "step": 27308 }, { "epoch": 9.096943449654368, "grad_norm": 0.7578125, "learning_rate": 3.166411094015237e-07, "loss": 3.9591, "step": 27309 }, { "epoch": 9.097276588656618, "grad_norm": 0.7890625, "learning_rate": 3.164084391492955e-07, "loss": 4.0109, "step": 27310 }, { "epoch": 9.097609727658865, "grad_norm": 0.75, "learning_rate": 3.1617585257034816e-07, "loss": 3.9502, "step": 27311 }, { "epoch": 9.097942866661114, "grad_norm": 0.77734375, "learning_rate": 3.159433496673936e-07, "loss": 4.0073, "step": 27312 }, { "epoch": 9.098276005663363, "grad_norm": 0.75390625, "learning_rate": 3.15710930443138e-07, "loss": 4.0309, "step": 27313 }, { "epoch": 9.098609144665613, "grad_norm": 0.71484375, "learning_rate": 3.154785949002892e-07, "loss": 4.0029, "step": 27314 }, { "epoch": 9.09894228366786, "grad_norm": 0.7421875, "learning_rate": 3.152463430415517e-07, "loss": 3.9205, "step": 27315 }, { "epoch": 9.09927542267011, "grad_norm": 0.79296875, "learning_rate": 3.150141748696342e-07, "loss": 3.9968, "step": 27316 }, { "epoch": 9.099608561672358, "grad_norm": 0.73828125, "learning_rate": 3.147820903872378e-07, "loss": 4.0239, "step": 27317 }, { "epoch": 9.099941700674606, "grad_norm": 0.76171875, "learning_rate": 3.145500895970671e-07, "loss": 3.9892, "step": 27318 }, { "epoch": 9.100274839676855, "grad_norm": 0.76171875, "learning_rate": 3.1431817250182405e-07, "loss": 3.9557, "step": 27319 }, { "epoch": 9.100607978679104, "grad_norm": 0.71875, "learning_rate": 3.1408633910421066e-07, "loss": 3.9393, "step": 27320 }, { "epoch": 9.100941117681353, "grad_norm": 0.81640625, "learning_rate": 3.138545894069264e-07, "loss": 3.9599, "step": 27321 }, { "epoch": 9.1012742566836, "grad_norm": 0.75390625, "learning_rate": 3.1362292341267e-07, "loss": 3.9381, "step": 27322 }, { "epoch": 9.10160739568585, "grad_norm": 0.73828125, "learning_rate": 3.1339134112414265e-07, "loss": 3.9487, "step": 27323 }, { "epoch": 9.101940534688099, "grad_norm": 0.75390625, "learning_rate": 3.1315984254403877e-07, "loss": 3.9775, "step": 27324 }, { "epoch": 9.102273673690346, "grad_norm": 0.75390625, "learning_rate": 3.1292842767505714e-07, "loss": 3.9573, "step": 27325 }, { "epoch": 9.102606812692596, "grad_norm": 0.7890625, "learning_rate": 3.1269709651989055e-07, "loss": 4.0032, "step": 27326 }, { "epoch": 9.102939951694845, "grad_norm": 0.78515625, "learning_rate": 3.1246584908123683e-07, "loss": 3.9715, "step": 27327 }, { "epoch": 9.103273090697094, "grad_norm": 0.75, "learning_rate": 3.122346853617872e-07, "loss": 3.942, "step": 27328 }, { "epoch": 9.103606229699341, "grad_norm": 0.765625, "learning_rate": 3.120036053642353e-07, "loss": 3.9561, "step": 27329 }, { "epoch": 9.10393936870159, "grad_norm": 0.7421875, "learning_rate": 3.1177260909127064e-07, "loss": 3.8957, "step": 27330 }, { "epoch": 9.10427250770384, "grad_norm": 0.72265625, "learning_rate": 3.115416965455861e-07, "loss": 4.0014, "step": 27331 }, { "epoch": 9.104605646706089, "grad_norm": 0.796875, "learning_rate": 3.113108677298712e-07, "loss": 3.9794, "step": 27332 }, { "epoch": 9.104938785708336, "grad_norm": 0.76171875, "learning_rate": 3.110801226468138e-07, "loss": 3.9412, "step": 27333 }, { "epoch": 9.105271924710586, "grad_norm": 0.73828125, "learning_rate": 3.1084946129910084e-07, "loss": 3.9485, "step": 27334 }, { "epoch": 9.105605063712835, "grad_norm": 0.7421875, "learning_rate": 3.106188836894219e-07, "loss": 3.9403, "step": 27335 }, { "epoch": 9.105938202715082, "grad_norm": 0.73828125, "learning_rate": 3.103883898204599e-07, "loss": 4.0309, "step": 27336 }, { "epoch": 9.106271341717331, "grad_norm": 0.76171875, "learning_rate": 3.1015797969489915e-07, "loss": 4.066, "step": 27337 }, { "epoch": 9.10660448071958, "grad_norm": 0.78125, "learning_rate": 3.0992765331542605e-07, "loss": 3.9777, "step": 27338 }, { "epoch": 9.10693761972183, "grad_norm": 0.78125, "learning_rate": 3.0969741068472254e-07, "loss": 3.9233, "step": 27339 }, { "epoch": 9.107270758724077, "grad_norm": 0.75390625, "learning_rate": 3.0946725180546974e-07, "loss": 3.9844, "step": 27340 }, { "epoch": 9.107603897726326, "grad_norm": 0.78125, "learning_rate": 3.092371766803473e-07, "loss": 4.0337, "step": 27341 }, { "epoch": 9.107937036728575, "grad_norm": 0.76953125, "learning_rate": 3.09007185312038e-07, "loss": 4.0068, "step": 27342 }, { "epoch": 9.108270175730823, "grad_norm": 0.78515625, "learning_rate": 3.0877727770321883e-07, "loss": 3.9229, "step": 27343 }, { "epoch": 9.108603314733072, "grad_norm": 0.76171875, "learning_rate": 3.085474538565694e-07, "loss": 3.9342, "step": 27344 }, { "epoch": 9.108936453735321, "grad_norm": 0.7421875, "learning_rate": 3.083177137747642e-07, "loss": 3.9828, "step": 27345 }, { "epoch": 9.10926959273757, "grad_norm": 0.7734375, "learning_rate": 3.080880574604811e-07, "loss": 3.9547, "step": 27346 }, { "epoch": 9.109602731739818, "grad_norm": 0.80078125, "learning_rate": 3.078584849163946e-07, "loss": 3.9282, "step": 27347 }, { "epoch": 9.109935870742067, "grad_norm": 0.7890625, "learning_rate": 3.0762899614517755e-07, "loss": 3.969, "step": 27348 }, { "epoch": 9.110269009744316, "grad_norm": 0.79296875, "learning_rate": 3.073995911495045e-07, "loss": 3.9251, "step": 27349 }, { "epoch": 9.110602148746565, "grad_norm": 0.75, "learning_rate": 3.071702699320475e-07, "loss": 3.994, "step": 27350 }, { "epoch": 9.110935287748813, "grad_norm": 0.77734375, "learning_rate": 3.069410324954769e-07, "loss": 4.0251, "step": 27351 }, { "epoch": 9.111268426751062, "grad_norm": 0.77734375, "learning_rate": 3.067118788424622e-07, "loss": 4.0423, "step": 27352 }, { "epoch": 9.111601565753311, "grad_norm": 0.78515625, "learning_rate": 3.0648280897567634e-07, "loss": 4.0323, "step": 27353 }, { "epoch": 9.111934704755559, "grad_norm": 0.77734375, "learning_rate": 3.062538228977829e-07, "loss": 4.0439, "step": 27354 }, { "epoch": 9.112267843757808, "grad_norm": 0.79296875, "learning_rate": 3.060249206114499e-07, "loss": 3.9691, "step": 27355 }, { "epoch": 9.112600982760057, "grad_norm": 0.72265625, "learning_rate": 3.0579610211934503e-07, "loss": 3.9855, "step": 27356 }, { "epoch": 9.112934121762306, "grad_norm": 0.75390625, "learning_rate": 3.05567367424133e-07, "loss": 3.9674, "step": 27357 }, { "epoch": 9.113267260764554, "grad_norm": 0.74609375, "learning_rate": 3.053387165284782e-07, "loss": 3.9952, "step": 27358 }, { "epoch": 9.113600399766803, "grad_norm": 0.765625, "learning_rate": 3.051101494350436e-07, "loss": 3.9539, "step": 27359 }, { "epoch": 9.113933538769052, "grad_norm": 0.76171875, "learning_rate": 3.048816661464912e-07, "loss": 3.9938, "step": 27360 }, { "epoch": 9.1142666777713, "grad_norm": 0.7734375, "learning_rate": 3.046532666654839e-07, "loss": 4.0128, "step": 27361 }, { "epoch": 9.114599816773548, "grad_norm": 0.77734375, "learning_rate": 3.044249509946803e-07, "loss": 4.048, "step": 27362 }, { "epoch": 9.114932955775798, "grad_norm": 0.76171875, "learning_rate": 3.041967191367409e-07, "loss": 3.9535, "step": 27363 }, { "epoch": 9.115266094778047, "grad_norm": 0.72265625, "learning_rate": 3.039685710943235e-07, "loss": 3.9876, "step": 27364 }, { "epoch": 9.115599233780294, "grad_norm": 0.73828125, "learning_rate": 3.037405068700852e-07, "loss": 3.9437, "step": 27365 }, { "epoch": 9.115932372782543, "grad_norm": 0.81640625, "learning_rate": 3.0351252646668297e-07, "loss": 3.9941, "step": 27366 }, { "epoch": 9.116265511784793, "grad_norm": 0.734375, "learning_rate": 3.0328462988677225e-07, "loss": 4.0064, "step": 27367 }, { "epoch": 9.11659865078704, "grad_norm": 0.7578125, "learning_rate": 3.0305681713300753e-07, "loss": 4.0135, "step": 27368 }, { "epoch": 9.11693178978929, "grad_norm": 0.75, "learning_rate": 3.028290882080434e-07, "loss": 4.0237, "step": 27369 }, { "epoch": 9.117264928791538, "grad_norm": 0.7890625, "learning_rate": 3.0260144311452935e-07, "loss": 3.9758, "step": 27370 }, { "epoch": 9.117598067793788, "grad_norm": 0.76953125, "learning_rate": 3.023738818551208e-07, "loss": 3.96, "step": 27371 }, { "epoch": 9.117931206796035, "grad_norm": 0.77734375, "learning_rate": 3.021464044324665e-07, "loss": 3.9379, "step": 27372 }, { "epoch": 9.118264345798284, "grad_norm": 0.76953125, "learning_rate": 3.0191901084921506e-07, "loss": 4.0041, "step": 27373 }, { "epoch": 9.118597484800533, "grad_norm": 0.80859375, "learning_rate": 3.016917011080153e-07, "loss": 3.9488, "step": 27374 }, { "epoch": 9.118930623802783, "grad_norm": 0.796875, "learning_rate": 3.0146447521151755e-07, "loss": 3.9584, "step": 27375 }, { "epoch": 9.11926376280503, "grad_norm": 0.7578125, "learning_rate": 3.012373331623655e-07, "loss": 3.951, "step": 27376 }, { "epoch": 9.119596901807279, "grad_norm": 0.71875, "learning_rate": 3.010102749632071e-07, "loss": 3.9787, "step": 27377 }, { "epoch": 9.119930040809528, "grad_norm": 0.7578125, "learning_rate": 3.007833006166844e-07, "loss": 4.0329, "step": 27378 }, { "epoch": 9.120263179811776, "grad_norm": 0.75390625, "learning_rate": 3.005564101254435e-07, "loss": 3.9591, "step": 27379 }, { "epoch": 9.120596318814025, "grad_norm": 0.765625, "learning_rate": 3.0032960349212824e-07, "loss": 4.0755, "step": 27380 }, { "epoch": 9.120929457816274, "grad_norm": 0.74609375, "learning_rate": 3.0010288071937646e-07, "loss": 4.0825, "step": 27381 }, { "epoch": 9.121262596818523, "grad_norm": 0.734375, "learning_rate": 2.9987624180983193e-07, "loss": 3.9567, "step": 27382 }, { "epoch": 9.12159573582077, "grad_norm": 0.7734375, "learning_rate": 2.996496867661333e-07, "loss": 3.9429, "step": 27383 }, { "epoch": 9.12192887482302, "grad_norm": 0.78515625, "learning_rate": 2.994232155909202e-07, "loss": 3.9524, "step": 27384 }, { "epoch": 9.122262013825269, "grad_norm": 0.75, "learning_rate": 2.991968282868296e-07, "loss": 3.9796, "step": 27385 }, { "epoch": 9.122595152827516, "grad_norm": 0.80859375, "learning_rate": 2.9897052485649947e-07, "loss": 4.0122, "step": 27386 }, { "epoch": 9.122928291829766, "grad_norm": 0.7890625, "learning_rate": 2.98744305302566e-07, "loss": 3.9417, "step": 27387 }, { "epoch": 9.123261430832015, "grad_norm": 0.7734375, "learning_rate": 2.985181696276629e-07, "loss": 3.9871, "step": 27388 }, { "epoch": 9.123594569834264, "grad_norm": 0.76171875, "learning_rate": 2.982921178344247e-07, "loss": 4.0412, "step": 27389 }, { "epoch": 9.123927708836511, "grad_norm": 0.76171875, "learning_rate": 2.9806614992548527e-07, "loss": 4.0134, "step": 27390 }, { "epoch": 9.12426084783876, "grad_norm": 0.77734375, "learning_rate": 2.9784026590347486e-07, "loss": 3.9445, "step": 27391 }, { "epoch": 9.12459398684101, "grad_norm": 0.71875, "learning_rate": 2.976144657710256e-07, "loss": 3.9717, "step": 27392 }, { "epoch": 9.124927125843259, "grad_norm": 0.8046875, "learning_rate": 2.97388749530767e-07, "loss": 3.9626, "step": 27393 }, { "epoch": 9.125260264845506, "grad_norm": 0.76953125, "learning_rate": 2.971631171853295e-07, "loss": 3.9743, "step": 27394 }, { "epoch": 9.125593403847756, "grad_norm": 0.76171875, "learning_rate": 2.969375687373402e-07, "loss": 3.9659, "step": 27395 }, { "epoch": 9.125926542850005, "grad_norm": 0.75, "learning_rate": 2.967121041894261e-07, "loss": 3.9446, "step": 27396 }, { "epoch": 9.126259681852252, "grad_norm": 0.76171875, "learning_rate": 2.964867235442142e-07, "loss": 3.9944, "step": 27397 }, { "epoch": 9.126592820854501, "grad_norm": 0.75, "learning_rate": 2.9626142680432927e-07, "loss": 4.0128, "step": 27398 }, { "epoch": 9.12692595985675, "grad_norm": 0.765625, "learning_rate": 2.960362139723949e-07, "loss": 3.9153, "step": 27399 }, { "epoch": 9.127259098859, "grad_norm": 0.74609375, "learning_rate": 2.958110850510348e-07, "loss": 4.0037, "step": 27400 }, { "epoch": 9.127592237861247, "grad_norm": 0.7265625, "learning_rate": 2.95586040042872e-07, "loss": 3.993, "step": 27401 }, { "epoch": 9.127925376863496, "grad_norm": 0.80078125, "learning_rate": 2.953610789505268e-07, "loss": 4.0209, "step": 27402 }, { "epoch": 9.128258515865745, "grad_norm": 0.80859375, "learning_rate": 2.9513620177661875e-07, "loss": 4.005, "step": 27403 }, { "epoch": 9.128591654867993, "grad_norm": 0.74609375, "learning_rate": 2.949114085237692e-07, "loss": 3.9958, "step": 27404 }, { "epoch": 9.128924793870242, "grad_norm": 0.8125, "learning_rate": 2.9468669919459596e-07, "loss": 3.9422, "step": 27405 }, { "epoch": 9.129257932872491, "grad_norm": 0.7578125, "learning_rate": 2.9446207379171614e-07, "loss": 4.0124, "step": 27406 }, { "epoch": 9.12959107187474, "grad_norm": 0.75, "learning_rate": 2.9423753231774515e-07, "loss": 4.0175, "step": 27407 }, { "epoch": 9.129924210876988, "grad_norm": 0.7734375, "learning_rate": 2.9401307477530006e-07, "loss": 3.9386, "step": 27408 }, { "epoch": 9.130257349879237, "grad_norm": 0.765625, "learning_rate": 2.9378870116699375e-07, "loss": 4.0897, "step": 27409 }, { "epoch": 9.130590488881486, "grad_norm": 0.7734375, "learning_rate": 2.9356441149544e-07, "loss": 4.0815, "step": 27410 }, { "epoch": 9.130923627883735, "grad_norm": 0.7578125, "learning_rate": 2.9334020576325256e-07, "loss": 4.0321, "step": 27411 }, { "epoch": 9.131256766885983, "grad_norm": 0.78515625, "learning_rate": 2.931160839730418e-07, "loss": 3.8671, "step": 27412 }, { "epoch": 9.131589905888232, "grad_norm": 0.73828125, "learning_rate": 2.92892046127419e-07, "loss": 3.9346, "step": 27413 }, { "epoch": 9.131923044890481, "grad_norm": 0.7734375, "learning_rate": 2.92668092228992e-07, "loss": 4.0149, "step": 27414 }, { "epoch": 9.132256183892729, "grad_norm": 0.77734375, "learning_rate": 2.924442222803722e-07, "loss": 3.9782, "step": 27415 }, { "epoch": 9.132589322894978, "grad_norm": 0.74609375, "learning_rate": 2.9222043628416574e-07, "loss": 3.9306, "step": 27416 }, { "epoch": 9.132922461897227, "grad_norm": 0.80859375, "learning_rate": 2.9199673424297884e-07, "loss": 3.9658, "step": 27417 }, { "epoch": 9.133255600899476, "grad_norm": 0.73046875, "learning_rate": 2.91773116159417e-07, "loss": 3.9105, "step": 27418 }, { "epoch": 9.133588739901723, "grad_norm": 0.7578125, "learning_rate": 2.9154958203608557e-07, "loss": 3.9671, "step": 27419 }, { "epoch": 9.133921878903973, "grad_norm": 0.7734375, "learning_rate": 2.9132613187558833e-07, "loss": 3.9562, "step": 27420 }, { "epoch": 9.134255017906222, "grad_norm": 0.74609375, "learning_rate": 2.9110276568052737e-07, "loss": 4.0176, "step": 27421 }, { "epoch": 9.13458815690847, "grad_norm": 0.7890625, "learning_rate": 2.9087948345350393e-07, "loss": 3.9604, "step": 27422 }, { "epoch": 9.134921295910718, "grad_norm": 0.7734375, "learning_rate": 2.906562851971209e-07, "loss": 4.0581, "step": 27423 }, { "epoch": 9.135254434912968, "grad_norm": 0.77734375, "learning_rate": 2.9043317091397624e-07, "loss": 4.0058, "step": 27424 }, { "epoch": 9.135587573915217, "grad_norm": 0.78125, "learning_rate": 2.9021014060666954e-07, "loss": 3.9872, "step": 27425 }, { "epoch": 9.135920712917464, "grad_norm": 0.80859375, "learning_rate": 2.899871942777979e-07, "loss": 3.958, "step": 27426 }, { "epoch": 9.136253851919713, "grad_norm": 0.7734375, "learning_rate": 2.8976433192995923e-07, "loss": 3.9938, "step": 27427 }, { "epoch": 9.136586990921963, "grad_norm": 0.79296875, "learning_rate": 2.895415535657481e-07, "loss": 3.9997, "step": 27428 }, { "epoch": 9.13692012992421, "grad_norm": 0.7578125, "learning_rate": 2.8931885918775993e-07, "loss": 3.9365, "step": 27429 }, { "epoch": 9.13725326892646, "grad_norm": 0.76953125, "learning_rate": 2.890962487985885e-07, "loss": 3.9884, "step": 27430 }, { "epoch": 9.137586407928708, "grad_norm": 0.79296875, "learning_rate": 2.888737224008284e-07, "loss": 4.0084, "step": 27431 }, { "epoch": 9.137919546930958, "grad_norm": 0.765625, "learning_rate": 2.886512799970692e-07, "loss": 4.0489, "step": 27432 }, { "epoch": 9.138252685933205, "grad_norm": 0.76171875, "learning_rate": 2.8842892158990217e-07, "loss": 3.9442, "step": 27433 }, { "epoch": 9.138585824935454, "grad_norm": 0.7578125, "learning_rate": 2.8820664718191946e-07, "loss": 4.0285, "step": 27434 }, { "epoch": 9.138918963937703, "grad_norm": 0.7734375, "learning_rate": 2.879844567757081e-07, "loss": 4.0283, "step": 27435 }, { "epoch": 9.139252102939952, "grad_norm": 0.7421875, "learning_rate": 2.8776235037385523e-07, "loss": 3.9645, "step": 27436 }, { "epoch": 9.1395852419422, "grad_norm": 0.79296875, "learning_rate": 2.8754032797895125e-07, "loss": 3.9834, "step": 27437 }, { "epoch": 9.139918380944449, "grad_norm": 0.73046875, "learning_rate": 2.873183895935791e-07, "loss": 3.9809, "step": 27438 }, { "epoch": 9.140251519946698, "grad_norm": 0.75390625, "learning_rate": 2.8709653522032587e-07, "loss": 4.0312, "step": 27439 }, { "epoch": 9.140584658948946, "grad_norm": 0.7578125, "learning_rate": 2.8687476486177365e-07, "loss": 3.984, "step": 27440 }, { "epoch": 9.140917797951195, "grad_norm": 0.765625, "learning_rate": 2.866530785205079e-07, "loss": 4.0125, "step": 27441 }, { "epoch": 9.141250936953444, "grad_norm": 0.77734375, "learning_rate": 2.864314761991091e-07, "loss": 4.0337, "step": 27442 }, { "epoch": 9.141584075955693, "grad_norm": 0.77734375, "learning_rate": 2.8620995790015925e-07, "loss": 3.9719, "step": 27443 }, { "epoch": 9.14191721495794, "grad_norm": 0.71484375, "learning_rate": 2.8598852362623885e-07, "loss": 3.9923, "step": 27444 }, { "epoch": 9.14225035396019, "grad_norm": 0.796875, "learning_rate": 2.8576717337992664e-07, "loss": 3.9796, "step": 27445 }, { "epoch": 9.142583492962439, "grad_norm": 0.79296875, "learning_rate": 2.855459071638006e-07, "loss": 3.996, "step": 27446 }, { "epoch": 9.142916631964686, "grad_norm": 0.796875, "learning_rate": 2.8532472498043693e-07, "loss": 4.0248, "step": 27447 }, { "epoch": 9.143249770966936, "grad_norm": 0.79296875, "learning_rate": 2.851036268324145e-07, "loss": 4.0281, "step": 27448 }, { "epoch": 9.143582909969185, "grad_norm": 0.765625, "learning_rate": 2.848826127223078e-07, "loss": 3.9851, "step": 27449 }, { "epoch": 9.143916048971434, "grad_norm": 0.76171875, "learning_rate": 2.846616826526899e-07, "loss": 3.9677, "step": 27450 }, { "epoch": 9.144249187973681, "grad_norm": 0.796875, "learning_rate": 2.844408366261353e-07, "loss": 3.9752, "step": 27451 }, { "epoch": 9.14458232697593, "grad_norm": 0.73046875, "learning_rate": 2.8422007464521696e-07, "loss": 4.0321, "step": 27452 }, { "epoch": 9.14491546597818, "grad_norm": 0.75, "learning_rate": 2.8399939671250456e-07, "loss": 4.0236, "step": 27453 }, { "epoch": 9.145248604980429, "grad_norm": 0.75390625, "learning_rate": 2.837788028305685e-07, "loss": 4.0709, "step": 27454 }, { "epoch": 9.145581743982676, "grad_norm": 0.765625, "learning_rate": 2.8355829300198003e-07, "loss": 4.0485, "step": 27455 }, { "epoch": 9.145914882984925, "grad_norm": 0.75, "learning_rate": 2.8333786722930716e-07, "loss": 4.0049, "step": 27456 }, { "epoch": 9.146248021987175, "grad_norm": 0.7734375, "learning_rate": 2.8311752551511613e-07, "loss": 3.9654, "step": 27457 }, { "epoch": 9.146581160989422, "grad_norm": 0.77734375, "learning_rate": 2.828972678619732e-07, "loss": 3.9391, "step": 27458 }, { "epoch": 9.146914299991671, "grad_norm": 0.734375, "learning_rate": 2.8267709427244633e-07, "loss": 3.9978, "step": 27459 }, { "epoch": 9.14724743899392, "grad_norm": 0.76953125, "learning_rate": 2.8245700474909846e-07, "loss": 3.9561, "step": 27460 }, { "epoch": 9.14758057799617, "grad_norm": 0.75390625, "learning_rate": 2.8223699929449343e-07, "loss": 4.0378, "step": 27461 }, { "epoch": 9.147913716998417, "grad_norm": 0.7578125, "learning_rate": 2.820170779111933e-07, "loss": 4.05, "step": 27462 }, { "epoch": 9.148246856000666, "grad_norm": 0.734375, "learning_rate": 2.817972406017602e-07, "loss": 4.0431, "step": 27463 }, { "epoch": 9.148579995002915, "grad_norm": 0.765625, "learning_rate": 2.815774873687554e-07, "loss": 4.0111, "step": 27464 }, { "epoch": 9.148913134005163, "grad_norm": 0.75390625, "learning_rate": 2.8135781821473693e-07, "loss": 4.0015, "step": 27465 }, { "epoch": 9.149246273007412, "grad_norm": 0.78125, "learning_rate": 2.811382331422635e-07, "loss": 3.9422, "step": 27466 }, { "epoch": 9.149579412009661, "grad_norm": 0.80078125, "learning_rate": 2.809187321538947e-07, "loss": 3.9643, "step": 27467 }, { "epoch": 9.14991255101191, "grad_norm": 0.78515625, "learning_rate": 2.8069931525218614e-07, "loss": 3.9952, "step": 27468 }, { "epoch": 9.150245690014158, "grad_norm": 0.7890625, "learning_rate": 2.804799824396939e-07, "loss": 4.0121, "step": 27469 }, { "epoch": 9.150578829016407, "grad_norm": 0.7578125, "learning_rate": 2.8026073371897195e-07, "loss": 4.0335, "step": 27470 }, { "epoch": 9.150911968018656, "grad_norm": 0.765625, "learning_rate": 2.8004156909257476e-07, "loss": 3.9145, "step": 27471 }, { "epoch": 9.151245107020905, "grad_norm": 0.78515625, "learning_rate": 2.798224885630546e-07, "loss": 3.9559, "step": 27472 }, { "epoch": 9.151578246023153, "grad_norm": 0.796875, "learning_rate": 2.796034921329635e-07, "loss": 3.9408, "step": 27473 }, { "epoch": 9.151911385025402, "grad_norm": 0.85546875, "learning_rate": 2.7938457980485276e-07, "loss": 3.9527, "step": 27474 }, { "epoch": 9.152244524027651, "grad_norm": 0.73828125, "learning_rate": 2.7916575158127125e-07, "loss": 4.0039, "step": 27475 }, { "epoch": 9.152577663029899, "grad_norm": 0.78125, "learning_rate": 2.789470074647693e-07, "loss": 3.9608, "step": 27476 }, { "epoch": 9.152910802032148, "grad_norm": 0.77734375, "learning_rate": 2.787283474578925e-07, "loss": 3.8608, "step": 27477 }, { "epoch": 9.153243941034397, "grad_norm": 0.75390625, "learning_rate": 2.785097715631904e-07, "loss": 3.9921, "step": 27478 }, { "epoch": 9.153577080036646, "grad_norm": 0.74609375, "learning_rate": 2.782912797832085e-07, "loss": 3.9545, "step": 27479 }, { "epoch": 9.153910219038893, "grad_norm": 0.7578125, "learning_rate": 2.7807287212048894e-07, "loss": 3.9794, "step": 27480 }, { "epoch": 9.154243358041143, "grad_norm": 0.734375, "learning_rate": 2.778545485775788e-07, "loss": 3.965, "step": 27481 }, { "epoch": 9.154576497043392, "grad_norm": 0.734375, "learning_rate": 2.7763630915702026e-07, "loss": 4.025, "step": 27482 }, { "epoch": 9.15490963604564, "grad_norm": 0.734375, "learning_rate": 2.774181538613546e-07, "loss": 3.9873, "step": 27483 }, { "epoch": 9.155242775047888, "grad_norm": 0.7578125, "learning_rate": 2.7720008269312314e-07, "loss": 3.9469, "step": 27484 }, { "epoch": 9.155575914050138, "grad_norm": 0.78125, "learning_rate": 2.769820956548672e-07, "loss": 3.991, "step": 27485 }, { "epoch": 9.155909053052387, "grad_norm": 0.74609375, "learning_rate": 2.767641927491238e-07, "loss": 3.953, "step": 27486 }, { "epoch": 9.156242192054634, "grad_norm": 0.7578125, "learning_rate": 2.765463739784327e-07, "loss": 3.9934, "step": 27487 }, { "epoch": 9.156575331056883, "grad_norm": 0.78515625, "learning_rate": 2.7632863934533017e-07, "loss": 4.0486, "step": 27488 }, { "epoch": 9.156908470059133, "grad_norm": 0.75, "learning_rate": 2.761109888523533e-07, "loss": 4.0663, "step": 27489 }, { "epoch": 9.15724160906138, "grad_norm": 0.75, "learning_rate": 2.7589342250203515e-07, "loss": 3.9617, "step": 27490 }, { "epoch": 9.15757474806363, "grad_norm": 0.7890625, "learning_rate": 2.7567594029691113e-07, "loss": 3.9781, "step": 27491 }, { "epoch": 9.157907887065878, "grad_norm": 0.765625, "learning_rate": 2.754585422395159e-07, "loss": 3.9682, "step": 27492 }, { "epoch": 9.158241026068128, "grad_norm": 0.7265625, "learning_rate": 2.752412283323799e-07, "loss": 3.9744, "step": 27493 }, { "epoch": 9.158574165070375, "grad_norm": 0.74609375, "learning_rate": 2.7502399857803453e-07, "loss": 3.9752, "step": 27494 }, { "epoch": 9.158907304072624, "grad_norm": 0.73046875, "learning_rate": 2.7480685297900934e-07, "loss": 4.0186, "step": 27495 }, { "epoch": 9.159240443074873, "grad_norm": 0.78515625, "learning_rate": 2.7458979153783565e-07, "loss": 4.0585, "step": 27496 }, { "epoch": 9.159573582077122, "grad_norm": 0.8359375, "learning_rate": 2.743728142570415e-07, "loss": 3.9451, "step": 27497 }, { "epoch": 9.15990672107937, "grad_norm": 0.78125, "learning_rate": 2.7415592113915225e-07, "loss": 3.976, "step": 27498 }, { "epoch": 9.160239860081619, "grad_norm": 0.7421875, "learning_rate": 2.739391121866952e-07, "loss": 3.9301, "step": 27499 }, { "epoch": 9.160572999083868, "grad_norm": 0.7734375, "learning_rate": 2.7372238740219657e-07, "loss": 4.0024, "step": 27500 }, { "epoch": 9.160906138086116, "grad_norm": 0.765625, "learning_rate": 2.735057467881802e-07, "loss": 4.0206, "step": 27501 }, { "epoch": 9.161239277088365, "grad_norm": 0.734375, "learning_rate": 2.7328919034716737e-07, "loss": 3.9802, "step": 27502 }, { "epoch": 9.161572416090614, "grad_norm": 0.75390625, "learning_rate": 2.730727180816844e-07, "loss": 3.9687, "step": 27503 }, { "epoch": 9.161905555092863, "grad_norm": 0.7734375, "learning_rate": 2.728563299942502e-07, "loss": 3.9843, "step": 27504 }, { "epoch": 9.16223869409511, "grad_norm": 0.76953125, "learning_rate": 2.72640026087386e-07, "loss": 3.9426, "step": 27505 }, { "epoch": 9.16257183309736, "grad_norm": 0.765625, "learning_rate": 2.7242380636361143e-07, "loss": 3.9923, "step": 27506 }, { "epoch": 9.162904972099609, "grad_norm": 0.8125, "learning_rate": 2.7220767082544366e-07, "loss": 3.9264, "step": 27507 }, { "epoch": 9.163238111101856, "grad_norm": 0.76171875, "learning_rate": 2.719916194754024e-07, "loss": 4.0928, "step": 27508 }, { "epoch": 9.163571250104106, "grad_norm": 0.765625, "learning_rate": 2.717756523160023e-07, "loss": 3.9316, "step": 27509 }, { "epoch": 9.163904389106355, "grad_norm": 0.74609375, "learning_rate": 2.715597693497579e-07, "loss": 3.9922, "step": 27510 }, { "epoch": 9.164237528108604, "grad_norm": 0.74609375, "learning_rate": 2.713439705791873e-07, "loss": 3.9861, "step": 27511 }, { "epoch": 9.164570667110851, "grad_norm": 0.765625, "learning_rate": 2.7112825600680183e-07, "loss": 3.936, "step": 27512 }, { "epoch": 9.1649038061131, "grad_norm": 0.77734375, "learning_rate": 2.709126256351144e-07, "loss": 3.995, "step": 27513 }, { "epoch": 9.16523694511535, "grad_norm": 0.7578125, "learning_rate": 2.706970794666364e-07, "loss": 3.9435, "step": 27514 }, { "epoch": 9.165570084117599, "grad_norm": 0.7578125, "learning_rate": 2.704816175038799e-07, "loss": 3.9498, "step": 27515 }, { "epoch": 9.165903223119846, "grad_norm": 0.78515625, "learning_rate": 2.70266239749353e-07, "loss": 3.9637, "step": 27516 }, { "epoch": 9.166236362122095, "grad_norm": 0.75, "learning_rate": 2.700509462055636e-07, "loss": 3.9943, "step": 27517 }, { "epoch": 9.166569501124345, "grad_norm": 0.7109375, "learning_rate": 2.698357368750215e-07, "loss": 3.9845, "step": 27518 }, { "epoch": 9.166902640126592, "grad_norm": 0.7734375, "learning_rate": 2.6962061176023287e-07, "loss": 4.0038, "step": 27519 }, { "epoch": 9.167235779128841, "grad_norm": 0.73828125, "learning_rate": 2.694055708637025e-07, "loss": 3.9516, "step": 27520 }, { "epoch": 9.16756891813109, "grad_norm": 0.73046875, "learning_rate": 2.691906141879358e-07, "loss": 3.9745, "step": 27521 }, { "epoch": 9.16790205713334, "grad_norm": 0.78515625, "learning_rate": 2.689757417354366e-07, "loss": 3.9435, "step": 27522 }, { "epoch": 9.168235196135587, "grad_norm": 0.7578125, "learning_rate": 2.6876095350870877e-07, "loss": 3.9823, "step": 27523 }, { "epoch": 9.168568335137836, "grad_norm": 0.8046875, "learning_rate": 2.6854624951025196e-07, "loss": 3.9806, "step": 27524 }, { "epoch": 9.168901474140085, "grad_norm": 0.74609375, "learning_rate": 2.683316297425684e-07, "loss": 3.9289, "step": 27525 }, { "epoch": 9.169234613142333, "grad_norm": 0.796875, "learning_rate": 2.681170942081576e-07, "loss": 4.0183, "step": 27526 }, { "epoch": 9.169567752144582, "grad_norm": 0.734375, "learning_rate": 2.6790264290951773e-07, "loss": 3.951, "step": 27527 }, { "epoch": 9.169900891146831, "grad_norm": 0.74609375, "learning_rate": 2.6768827584914754e-07, "loss": 3.9972, "step": 27528 }, { "epoch": 9.17023403014908, "grad_norm": 0.76171875, "learning_rate": 2.6747399302954417e-07, "loss": 3.9975, "step": 27529 }, { "epoch": 9.170567169151328, "grad_norm": 0.7578125, "learning_rate": 2.672597944532032e-07, "loss": 3.9705, "step": 27530 }, { "epoch": 9.170900308153577, "grad_norm": 0.73046875, "learning_rate": 2.6704568012261924e-07, "loss": 3.9219, "step": 27531 }, { "epoch": 9.171233447155826, "grad_norm": 0.78515625, "learning_rate": 2.668316500402862e-07, "loss": 3.9154, "step": 27532 }, { "epoch": 9.171566586158075, "grad_norm": 0.7578125, "learning_rate": 2.6661770420869867e-07, "loss": 3.9252, "step": 27533 }, { "epoch": 9.171899725160323, "grad_norm": 0.76953125, "learning_rate": 2.664038426303472e-07, "loss": 4.0033, "step": 27534 }, { "epoch": 9.172232864162572, "grad_norm": 0.74609375, "learning_rate": 2.661900653077215e-07, "loss": 3.9379, "step": 27535 }, { "epoch": 9.172566003164821, "grad_norm": 0.796875, "learning_rate": 2.6597637224331453e-07, "loss": 3.8911, "step": 27536 }, { "epoch": 9.172899142167068, "grad_norm": 0.734375, "learning_rate": 2.657627634396134e-07, "loss": 4.016, "step": 27537 }, { "epoch": 9.173232281169318, "grad_norm": 0.7578125, "learning_rate": 2.655492388991071e-07, "loss": 3.967, "step": 27538 }, { "epoch": 9.173565420171567, "grad_norm": 0.73046875, "learning_rate": 2.653357986242819e-07, "loss": 4.0596, "step": 27539 }, { "epoch": 9.173898559173816, "grad_norm": 0.76171875, "learning_rate": 2.651224426176241e-07, "loss": 4.032, "step": 27540 }, { "epoch": 9.174231698176063, "grad_norm": 0.71875, "learning_rate": 2.6490917088162007e-07, "loss": 3.9949, "step": 27541 }, { "epoch": 9.174564837178313, "grad_norm": 0.765625, "learning_rate": 2.646959834187529e-07, "loss": 3.9713, "step": 27542 }, { "epoch": 9.174897976180562, "grad_norm": 0.77734375, "learning_rate": 2.644828802315055e-07, "loss": 3.943, "step": 27543 }, { "epoch": 9.17523111518281, "grad_norm": 0.78125, "learning_rate": 2.64269861322361e-07, "loss": 4.0371, "step": 27544 }, { "epoch": 9.175564254185058, "grad_norm": 0.74609375, "learning_rate": 2.640569266937998e-07, "loss": 3.9912, "step": 27545 }, { "epoch": 9.175897393187308, "grad_norm": 0.73828125, "learning_rate": 2.6384407634830077e-07, "loss": 4.0354, "step": 27546 }, { "epoch": 9.176230532189557, "grad_norm": 0.77734375, "learning_rate": 2.63631310288347e-07, "loss": 4.0124, "step": 27547 }, { "epoch": 9.176563671191804, "grad_norm": 0.77734375, "learning_rate": 2.634186285164139e-07, "loss": 3.913, "step": 27548 }, { "epoch": 9.176896810194053, "grad_norm": 0.74609375, "learning_rate": 2.6320603103497874e-07, "loss": 3.9558, "step": 27549 }, { "epoch": 9.177229949196303, "grad_norm": 0.72265625, "learning_rate": 2.629935178465179e-07, "loss": 3.9827, "step": 27550 }, { "epoch": 9.177563088198552, "grad_norm": 0.80078125, "learning_rate": 2.627810889535093e-07, "loss": 3.9856, "step": 27551 }, { "epoch": 9.1778962272008, "grad_norm": 0.7265625, "learning_rate": 2.625687443584243e-07, "loss": 4.037, "step": 27552 }, { "epoch": 9.178229366203048, "grad_norm": 0.7421875, "learning_rate": 2.623564840637369e-07, "loss": 4.0221, "step": 27553 }, { "epoch": 9.178562505205297, "grad_norm": 0.80078125, "learning_rate": 2.6214430807191993e-07, "loss": 3.9374, "step": 27554 }, { "epoch": 9.178895644207545, "grad_norm": 0.8046875, "learning_rate": 2.61932216385444e-07, "loss": 3.9957, "step": 27555 }, { "epoch": 9.179228783209794, "grad_norm": 0.76953125, "learning_rate": 2.6172020900678135e-07, "loss": 3.9265, "step": 27556 }, { "epoch": 9.179561922212043, "grad_norm": 0.7734375, "learning_rate": 2.615082859383991e-07, "loss": 4.051, "step": 27557 }, { "epoch": 9.179895061214292, "grad_norm": 0.76953125, "learning_rate": 2.6129644718276697e-07, "loss": 4.0536, "step": 27558 }, { "epoch": 9.18022820021654, "grad_norm": 0.7421875, "learning_rate": 2.6108469274235215e-07, "loss": 4.0376, "step": 27559 }, { "epoch": 9.180561339218789, "grad_norm": 0.7265625, "learning_rate": 2.6087302261962264e-07, "loss": 3.9932, "step": 27560 }, { "epoch": 9.180894478221038, "grad_norm": 0.765625, "learning_rate": 2.606614368170407e-07, "loss": 4.0244, "step": 27561 }, { "epoch": 9.181227617223286, "grad_norm": 0.73828125, "learning_rate": 2.6044993533707343e-07, "loss": 3.9768, "step": 27562 }, { "epoch": 9.181560756225535, "grad_norm": 0.7890625, "learning_rate": 2.6023851818218316e-07, "loss": 3.9595, "step": 27563 }, { "epoch": 9.181893895227784, "grad_norm": 0.765625, "learning_rate": 2.6002718535483363e-07, "loss": 3.9406, "step": 27564 }, { "epoch": 9.182227034230033, "grad_norm": 0.7421875, "learning_rate": 2.5981593685748373e-07, "loss": 4.0273, "step": 27565 }, { "epoch": 9.18256017323228, "grad_norm": 0.78515625, "learning_rate": 2.5960477269259737e-07, "loss": 4.0265, "step": 27566 }, { "epoch": 9.18289331223453, "grad_norm": 0.76171875, "learning_rate": 2.593936928626334e-07, "loss": 3.9305, "step": 27567 }, { "epoch": 9.183226451236779, "grad_norm": 0.81640625, "learning_rate": 2.5918269737004896e-07, "loss": 4.0484, "step": 27568 }, { "epoch": 9.183559590239028, "grad_norm": 0.7734375, "learning_rate": 2.58971786217303e-07, "loss": 3.8854, "step": 27569 }, { "epoch": 9.183892729241276, "grad_norm": 0.74609375, "learning_rate": 2.58760959406851e-07, "loss": 3.9758, "step": 27570 }, { "epoch": 9.184225868243525, "grad_norm": 0.765625, "learning_rate": 2.585502169411502e-07, "loss": 3.9419, "step": 27571 }, { "epoch": 9.184559007245774, "grad_norm": 0.78515625, "learning_rate": 2.583395588226528e-07, "loss": 3.9747, "step": 27572 }, { "epoch": 9.184892146248021, "grad_norm": 0.76171875, "learning_rate": 2.5812898505381515e-07, "loss": 3.9545, "step": 27573 }, { "epoch": 9.18522528525027, "grad_norm": 0.76171875, "learning_rate": 2.579184956370895e-07, "loss": 4.0138, "step": 27574 }, { "epoch": 9.18555842425252, "grad_norm": 0.78125, "learning_rate": 2.5770809057492626e-07, "loss": 4.0475, "step": 27575 }, { "epoch": 9.185891563254769, "grad_norm": 0.75390625, "learning_rate": 2.57497769869777e-07, "loss": 3.9673, "step": 27576 }, { "epoch": 9.186224702257016, "grad_norm": 0.7265625, "learning_rate": 2.5728753352409205e-07, "loss": 3.9777, "step": 27577 }, { "epoch": 9.186557841259265, "grad_norm": 0.7578125, "learning_rate": 2.570773815403196e-07, "loss": 3.9957, "step": 27578 }, { "epoch": 9.186890980261515, "grad_norm": 0.7578125, "learning_rate": 2.5686731392090686e-07, "loss": 4.0087, "step": 27579 }, { "epoch": 9.187224119263762, "grad_norm": 0.77734375, "learning_rate": 2.5665733066830177e-07, "loss": 3.9744, "step": 27580 }, { "epoch": 9.187557258266011, "grad_norm": 0.79296875, "learning_rate": 2.5644743178494993e-07, "loss": 3.9843, "step": 27581 }, { "epoch": 9.18789039726826, "grad_norm": 0.7421875, "learning_rate": 2.562376172732961e-07, "loss": 4.013, "step": 27582 }, { "epoch": 9.18822353627051, "grad_norm": 0.73828125, "learning_rate": 2.560278871357824e-07, "loss": 3.9737, "step": 27583 }, { "epoch": 9.188556675272757, "grad_norm": 0.77734375, "learning_rate": 2.5581824137485527e-07, "loss": 3.9555, "step": 27584 }, { "epoch": 9.188889814275006, "grad_norm": 0.73828125, "learning_rate": 2.556086799929544e-07, "loss": 3.9774, "step": 27585 }, { "epoch": 9.189222953277255, "grad_norm": 0.7734375, "learning_rate": 2.5539920299252117e-07, "loss": 3.9489, "step": 27586 }, { "epoch": 9.189556092279503, "grad_norm": 0.75390625, "learning_rate": 2.551898103759945e-07, "loss": 3.8936, "step": 27587 }, { "epoch": 9.189889231281752, "grad_norm": 0.734375, "learning_rate": 2.549805021458157e-07, "loss": 4.0222, "step": 27588 }, { "epoch": 9.190222370284001, "grad_norm": 0.75, "learning_rate": 2.547712783044204e-07, "loss": 3.9856, "step": 27589 }, { "epoch": 9.19055550928625, "grad_norm": 0.75390625, "learning_rate": 2.5456213885424654e-07, "loss": 3.9716, "step": 27590 }, { "epoch": 9.190888648288498, "grad_norm": 0.78515625, "learning_rate": 2.543530837977298e-07, "loss": 3.9309, "step": 27591 }, { "epoch": 9.191221787290747, "grad_norm": 0.7421875, "learning_rate": 2.541441131373065e-07, "loss": 4.0232, "step": 27592 }, { "epoch": 9.191554926292996, "grad_norm": 0.76953125, "learning_rate": 2.539352268754097e-07, "loss": 3.9438, "step": 27593 }, { "epoch": 9.191888065295245, "grad_norm": 0.73828125, "learning_rate": 2.5372642501447165e-07, "loss": 3.9664, "step": 27594 }, { "epoch": 9.192221204297493, "grad_norm": 0.765625, "learning_rate": 2.5351770755692697e-07, "loss": 3.9821, "step": 27595 }, { "epoch": 9.192554343299742, "grad_norm": 0.7734375, "learning_rate": 2.5330907450520466e-07, "loss": 4.0176, "step": 27596 }, { "epoch": 9.192887482301991, "grad_norm": 0.78125, "learning_rate": 2.531005258617336e-07, "loss": 4.0421, "step": 27597 }, { "epoch": 9.193220621304238, "grad_norm": 0.7578125, "learning_rate": 2.528920616289468e-07, "loss": 3.985, "step": 27598 }, { "epoch": 9.193553760306488, "grad_norm": 0.7578125, "learning_rate": 2.52683681809269e-07, "loss": 4.0304, "step": 27599 }, { "epoch": 9.193886899308737, "grad_norm": 0.8046875, "learning_rate": 2.5247538640512907e-07, "loss": 3.9916, "step": 27600 }, { "epoch": 9.194220038310986, "grad_norm": 0.78125, "learning_rate": 2.522671754189526e-07, "loss": 3.9876, "step": 27601 }, { "epoch": 9.194553177313233, "grad_norm": 0.75, "learning_rate": 2.520590488531652e-07, "loss": 3.996, "step": 27602 }, { "epoch": 9.194886316315483, "grad_norm": 0.74609375, "learning_rate": 2.518510067101906e-07, "loss": 3.9097, "step": 27603 }, { "epoch": 9.195219455317732, "grad_norm": 0.74609375, "learning_rate": 2.516430489924537e-07, "loss": 3.9341, "step": 27604 }, { "epoch": 9.19555259431998, "grad_norm": 0.796875, "learning_rate": 2.514351757023733e-07, "loss": 3.9315, "step": 27605 }, { "epoch": 9.195885733322228, "grad_norm": 0.8046875, "learning_rate": 2.512273868423734e-07, "loss": 3.9691, "step": 27606 }, { "epoch": 9.196218872324478, "grad_norm": 0.68359375, "learning_rate": 2.5101968241487445e-07, "loss": 4.0378, "step": 27607 }, { "epoch": 9.196552011326727, "grad_norm": 0.7890625, "learning_rate": 2.5081206242229455e-07, "loss": 3.9502, "step": 27608 }, { "epoch": 9.196885150328974, "grad_norm": 0.75, "learning_rate": 2.506045268670509e-07, "loss": 3.9799, "step": 27609 }, { "epoch": 9.197218289331223, "grad_norm": 0.7421875, "learning_rate": 2.503970757515642e-07, "loss": 3.989, "step": 27610 }, { "epoch": 9.197551428333473, "grad_norm": 0.734375, "learning_rate": 2.5018970907824816e-07, "loss": 3.9999, "step": 27611 }, { "epoch": 9.197884567335722, "grad_norm": 0.765625, "learning_rate": 2.4998242684951934e-07, "loss": 3.9786, "step": 27612 }, { "epoch": 9.198217706337969, "grad_norm": 0.7890625, "learning_rate": 2.4977522906779155e-07, "loss": 3.9855, "step": 27613 }, { "epoch": 9.198550845340218, "grad_norm": 0.7578125, "learning_rate": 2.4956811573547873e-07, "loss": 3.9938, "step": 27614 }, { "epoch": 9.198883984342467, "grad_norm": 0.71875, "learning_rate": 2.4936108685499307e-07, "loss": 4.0266, "step": 27615 }, { "epoch": 9.199217123344715, "grad_norm": 0.77734375, "learning_rate": 2.491541424287444e-07, "loss": 4.03, "step": 27616 }, { "epoch": 9.199550262346964, "grad_norm": 0.765625, "learning_rate": 2.489472824591457e-07, "loss": 4.0451, "step": 27617 }, { "epoch": 9.199883401349213, "grad_norm": 0.78125, "learning_rate": 2.4874050694860593e-07, "loss": 3.9244, "step": 27618 }, { "epoch": 9.200216540351462, "grad_norm": 0.7578125, "learning_rate": 2.4853381589953235e-07, "loss": 3.9496, "step": 27619 }, { "epoch": 9.20054967935371, "grad_norm": 0.75390625, "learning_rate": 2.4832720931433296e-07, "loss": 3.9691, "step": 27620 }, { "epoch": 9.200882818355959, "grad_norm": 0.76953125, "learning_rate": 2.4812068719541503e-07, "loss": 4.0309, "step": 27621 }, { "epoch": 9.201215957358208, "grad_norm": 0.765625, "learning_rate": 2.4791424954518414e-07, "loss": 3.991, "step": 27622 }, { "epoch": 9.201549096360456, "grad_norm": 0.7734375, "learning_rate": 2.4770789636604257e-07, "loss": 3.9432, "step": 27623 }, { "epoch": 9.201882235362705, "grad_norm": 0.734375, "learning_rate": 2.475016276603967e-07, "loss": 3.9846, "step": 27624 }, { "epoch": 9.202215374364954, "grad_norm": 0.7421875, "learning_rate": 2.472954434306479e-07, "loss": 3.9929, "step": 27625 }, { "epoch": 9.202548513367203, "grad_norm": 0.734375, "learning_rate": 2.470893436791977e-07, "loss": 4.0189, "step": 27626 }, { "epoch": 9.20288165236945, "grad_norm": 0.7421875, "learning_rate": 2.4688332840844654e-07, "loss": 3.9354, "step": 27627 }, { "epoch": 9.2032147913717, "grad_norm": 0.7734375, "learning_rate": 2.466773976207942e-07, "loss": 3.9209, "step": 27628 }, { "epoch": 9.203547930373949, "grad_norm": 0.73046875, "learning_rate": 2.4647155131864053e-07, "loss": 4.0155, "step": 27629 }, { "epoch": 9.203881069376198, "grad_norm": 0.75390625, "learning_rate": 2.4626578950438183e-07, "loss": 3.9578, "step": 27630 }, { "epoch": 9.204214208378446, "grad_norm": 0.734375, "learning_rate": 2.4606011218041543e-07, "loss": 4.0357, "step": 27631 }, { "epoch": 9.204547347380695, "grad_norm": 0.734375, "learning_rate": 2.45854519349136e-07, "loss": 3.9858, "step": 27632 }, { "epoch": 9.204880486382944, "grad_norm": 0.79296875, "learning_rate": 2.4564901101293916e-07, "loss": 3.9706, "step": 27633 }, { "epoch": 9.205213625385191, "grad_norm": 0.80859375, "learning_rate": 2.4544358717421805e-07, "loss": 3.9539, "step": 27634 }, { "epoch": 9.20554676438744, "grad_norm": 0.79296875, "learning_rate": 2.4523824783536654e-07, "loss": 4.0351, "step": 27635 }, { "epoch": 9.20587990338969, "grad_norm": 0.78515625, "learning_rate": 2.4503299299877516e-07, "loss": 4.0132, "step": 27636 }, { "epoch": 9.206213042391939, "grad_norm": 0.7421875, "learning_rate": 2.4482782266683547e-07, "loss": 4.0201, "step": 27637 }, { "epoch": 9.206546181394186, "grad_norm": 0.79296875, "learning_rate": 2.4462273684193625e-07, "loss": 3.9561, "step": 27638 }, { "epoch": 9.206879320396435, "grad_norm": 0.73828125, "learning_rate": 2.444177355264682e-07, "loss": 3.9684, "step": 27639 }, { "epoch": 9.207212459398685, "grad_norm": 0.74609375, "learning_rate": 2.4421281872281847e-07, "loss": 4.0365, "step": 27640 }, { "epoch": 9.207545598400932, "grad_norm": 0.78125, "learning_rate": 2.440079864333711e-07, "loss": 3.9266, "step": 27641 }, { "epoch": 9.207878737403181, "grad_norm": 0.77734375, "learning_rate": 2.4380323866051576e-07, "loss": 3.9498, "step": 27642 }, { "epoch": 9.20821187640543, "grad_norm": 0.74609375, "learning_rate": 2.4359857540663553e-07, "loss": 3.9812, "step": 27643 }, { "epoch": 9.20854501540768, "grad_norm": 0.72265625, "learning_rate": 2.4339399667411525e-07, "loss": 3.9763, "step": 27644 }, { "epoch": 9.208878154409927, "grad_norm": 0.734375, "learning_rate": 2.431895024653355e-07, "loss": 3.9794, "step": 27645 }, { "epoch": 9.209211293412176, "grad_norm": 0.7890625, "learning_rate": 2.42985092782681e-07, "loss": 3.957, "step": 27646 }, { "epoch": 9.209544432414425, "grad_norm": 0.75390625, "learning_rate": 2.427807676285315e-07, "loss": 4.0151, "step": 27647 }, { "epoch": 9.209877571416673, "grad_norm": 0.76953125, "learning_rate": 2.4257652700526684e-07, "loss": 3.9963, "step": 27648 }, { "epoch": 9.210210710418922, "grad_norm": 0.74609375, "learning_rate": 2.423723709152659e-07, "loss": 4.0461, "step": 27649 }, { "epoch": 9.210543849421171, "grad_norm": 0.75390625, "learning_rate": 2.4216829936090674e-07, "loss": 3.9864, "step": 27650 }, { "epoch": 9.21087698842342, "grad_norm": 0.7578125, "learning_rate": 2.4196431234456665e-07, "loss": 3.9032, "step": 27651 }, { "epoch": 9.211210127425668, "grad_norm": 0.765625, "learning_rate": 2.417604098686213e-07, "loss": 3.987, "step": 27652 }, { "epoch": 9.211543266427917, "grad_norm": 0.75390625, "learning_rate": 2.4155659193544537e-07, "loss": 3.8566, "step": 27653 }, { "epoch": 9.211876405430166, "grad_norm": 0.76953125, "learning_rate": 2.413528585474145e-07, "loss": 3.9524, "step": 27654 }, { "epoch": 9.212209544432415, "grad_norm": 0.7265625, "learning_rate": 2.4114920970690014e-07, "loss": 3.9573, "step": 27655 }, { "epoch": 9.212542683434663, "grad_norm": 0.75, "learning_rate": 2.4094564541627453e-07, "loss": 3.9827, "step": 27656 }, { "epoch": 9.212875822436912, "grad_norm": 0.73828125, "learning_rate": 2.407421656779091e-07, "loss": 3.9248, "step": 27657 }, { "epoch": 9.213208961439161, "grad_norm": 0.76953125, "learning_rate": 2.4053877049417527e-07, "loss": 3.964, "step": 27658 }, { "epoch": 9.213542100441408, "grad_norm": 0.76171875, "learning_rate": 2.4033545986743954e-07, "loss": 4.0367, "step": 27659 }, { "epoch": 9.213875239443658, "grad_norm": 0.77734375, "learning_rate": 2.401322338000708e-07, "loss": 3.9883, "step": 27660 }, { "epoch": 9.214208378445907, "grad_norm": 0.76953125, "learning_rate": 2.399290922944372e-07, "loss": 3.9503, "step": 27661 }, { "epoch": 9.214541517448156, "grad_norm": 0.80078125, "learning_rate": 2.397260353529043e-07, "loss": 3.9551, "step": 27662 }, { "epoch": 9.214874656450403, "grad_norm": 0.76953125, "learning_rate": 2.3952306297783776e-07, "loss": 3.9948, "step": 27663 }, { "epoch": 9.215207795452653, "grad_norm": 0.80859375, "learning_rate": 2.3932017517160067e-07, "loss": 3.9679, "step": 27664 }, { "epoch": 9.215540934454902, "grad_norm": 0.71484375, "learning_rate": 2.3911737193655774e-07, "loss": 4.0037, "step": 27665 }, { "epoch": 9.21587407345715, "grad_norm": 0.71484375, "learning_rate": 2.3891465327506967e-07, "loss": 4.0048, "step": 27666 }, { "epoch": 9.216207212459398, "grad_norm": 0.7421875, "learning_rate": 2.3871201918949955e-07, "loss": 4.0126, "step": 27667 }, { "epoch": 9.216540351461648, "grad_norm": 0.80859375, "learning_rate": 2.3850946968220543e-07, "loss": 3.9919, "step": 27668 }, { "epoch": 9.216873490463897, "grad_norm": 0.80078125, "learning_rate": 2.3830700475554802e-07, "loss": 3.9996, "step": 27669 }, { "epoch": 9.217206629466144, "grad_norm": 0.765625, "learning_rate": 2.3810462441188535e-07, "loss": 4.0238, "step": 27670 }, { "epoch": 9.217539768468393, "grad_norm": 0.78125, "learning_rate": 2.379023286535739e-07, "loss": 3.9684, "step": 27671 }, { "epoch": 9.217872907470642, "grad_norm": 0.75, "learning_rate": 2.3770011748297098e-07, "loss": 3.956, "step": 27672 }, { "epoch": 9.218206046472892, "grad_norm": 0.7734375, "learning_rate": 2.3749799090243217e-07, "loss": 4.0061, "step": 27673 }, { "epoch": 9.218539185475139, "grad_norm": 0.77734375, "learning_rate": 2.372959489143106e-07, "loss": 3.9657, "step": 27674 }, { "epoch": 9.218872324477388, "grad_norm": 0.78125, "learning_rate": 2.370939915209594e-07, "loss": 4.0157, "step": 27675 }, { "epoch": 9.219205463479637, "grad_norm": 0.78125, "learning_rate": 2.3689211872473415e-07, "loss": 3.9459, "step": 27676 }, { "epoch": 9.219538602481885, "grad_norm": 0.78125, "learning_rate": 2.3669033052798218e-07, "loss": 3.9233, "step": 27677 }, { "epoch": 9.219871741484134, "grad_norm": 0.7734375, "learning_rate": 2.364886269330549e-07, "loss": 3.9943, "step": 27678 }, { "epoch": 9.220204880486383, "grad_norm": 0.828125, "learning_rate": 2.362870079423038e-07, "loss": 3.9737, "step": 27679 }, { "epoch": 9.220538019488632, "grad_norm": 0.80078125, "learning_rate": 2.3608547355807531e-07, "loss": 3.9987, "step": 27680 }, { "epoch": 9.22087115849088, "grad_norm": 0.80078125, "learning_rate": 2.3588402378271755e-07, "loss": 3.9616, "step": 27681 }, { "epoch": 9.221204297493129, "grad_norm": 0.7734375, "learning_rate": 2.3568265861857618e-07, "loss": 4.0496, "step": 27682 }, { "epoch": 9.221537436495378, "grad_norm": 0.7578125, "learning_rate": 2.3548137806799847e-07, "loss": 3.959, "step": 27683 }, { "epoch": 9.221870575497626, "grad_norm": 0.7578125, "learning_rate": 2.352801821333267e-07, "loss": 3.9957, "step": 27684 }, { "epoch": 9.222203714499875, "grad_norm": 0.76171875, "learning_rate": 2.350790708169065e-07, "loss": 4.0567, "step": 27685 }, { "epoch": 9.222536853502124, "grad_norm": 0.77734375, "learning_rate": 2.3487804412107933e-07, "loss": 3.9676, "step": 27686 }, { "epoch": 9.222869992504373, "grad_norm": 0.75, "learning_rate": 2.3467710204818583e-07, "loss": 3.9407, "step": 27687 }, { "epoch": 9.22320313150662, "grad_norm": 0.78125, "learning_rate": 2.3447624460056828e-07, "loss": 4.0077, "step": 27688 }, { "epoch": 9.22353627050887, "grad_norm": 0.78125, "learning_rate": 2.3427547178056396e-07, "loss": 3.9844, "step": 27689 }, { "epoch": 9.223869409511119, "grad_norm": 0.80078125, "learning_rate": 2.3407478359051438e-07, "loss": 3.9499, "step": 27690 }, { "epoch": 9.224202548513368, "grad_norm": 0.7890625, "learning_rate": 2.3387418003275512e-07, "loss": 3.9793, "step": 27691 }, { "epoch": 9.224535687515615, "grad_norm": 0.77734375, "learning_rate": 2.3367366110962353e-07, "loss": 4.0054, "step": 27692 }, { "epoch": 9.224868826517865, "grad_norm": 0.7734375, "learning_rate": 2.3347322682345352e-07, "loss": 3.9751, "step": 27693 }, { "epoch": 9.225201965520114, "grad_norm": 0.73828125, "learning_rate": 2.3327287717658324e-07, "loss": 4.033, "step": 27694 }, { "epoch": 9.225535104522361, "grad_norm": 0.7421875, "learning_rate": 2.3307261217134334e-07, "loss": 3.937, "step": 27695 }, { "epoch": 9.22586824352461, "grad_norm": 0.73828125, "learning_rate": 2.328724318100678e-07, "loss": 4.0012, "step": 27696 }, { "epoch": 9.22620138252686, "grad_norm": 0.78515625, "learning_rate": 2.3267233609508637e-07, "loss": 3.9646, "step": 27697 }, { "epoch": 9.226534521529109, "grad_norm": 0.78515625, "learning_rate": 2.3247232502873224e-07, "loss": 3.9158, "step": 27698 }, { "epoch": 9.226867660531356, "grad_norm": 0.7578125, "learning_rate": 2.3227239861333433e-07, "loss": 3.9584, "step": 27699 }, { "epoch": 9.227200799533605, "grad_norm": 0.76171875, "learning_rate": 2.320725568512208e-07, "loss": 3.9657, "step": 27700 }, { "epoch": 9.227533938535855, "grad_norm": 0.7734375, "learning_rate": 2.3187279974471898e-07, "loss": 3.8981, "step": 27701 }, { "epoch": 9.227867077538102, "grad_norm": 0.80859375, "learning_rate": 2.3167312729615697e-07, "loss": 3.8924, "step": 27702 }, { "epoch": 9.228200216540351, "grad_norm": 0.75, "learning_rate": 2.3147353950786126e-07, "loss": 4.0189, "step": 27703 }, { "epoch": 9.2285333555426, "grad_norm": 0.7265625, "learning_rate": 2.3127403638215332e-07, "loss": 3.9845, "step": 27704 }, { "epoch": 9.22886649454485, "grad_norm": 0.7734375, "learning_rate": 2.3107461792135965e-07, "loss": 3.9793, "step": 27705 }, { "epoch": 9.229199633547097, "grad_norm": 0.8125, "learning_rate": 2.3087528412780169e-07, "loss": 3.935, "step": 27706 }, { "epoch": 9.229532772549346, "grad_norm": 0.8125, "learning_rate": 2.3067603500380174e-07, "loss": 3.9937, "step": 27707 }, { "epoch": 9.229865911551595, "grad_norm": 0.75390625, "learning_rate": 2.3047687055168048e-07, "loss": 3.883, "step": 27708 }, { "epoch": 9.230199050553843, "grad_norm": 0.77734375, "learning_rate": 2.3027779077375853e-07, "loss": 4.0226, "step": 27709 }, { "epoch": 9.230532189556092, "grad_norm": 0.7890625, "learning_rate": 2.3007879567235406e-07, "loss": 4.0026, "step": 27710 }, { "epoch": 9.230865328558341, "grad_norm": 0.7734375, "learning_rate": 2.298798852497852e-07, "loss": 4.0035, "step": 27711 }, { "epoch": 9.23119846756059, "grad_norm": 0.77734375, "learning_rate": 2.2968105950836843e-07, "loss": 3.9458, "step": 27712 }, { "epoch": 9.231531606562838, "grad_norm": 0.7734375, "learning_rate": 2.2948231845041938e-07, "loss": 3.9706, "step": 27713 }, { "epoch": 9.231864745565087, "grad_norm": 0.7578125, "learning_rate": 2.2928366207825456e-07, "loss": 3.9627, "step": 27714 }, { "epoch": 9.232197884567336, "grad_norm": 0.765625, "learning_rate": 2.2908509039418462e-07, "loss": 3.9724, "step": 27715 }, { "epoch": 9.232531023569585, "grad_norm": 0.75390625, "learning_rate": 2.2888660340052603e-07, "loss": 3.8449, "step": 27716 }, { "epoch": 9.232864162571833, "grad_norm": 0.75, "learning_rate": 2.2868820109958943e-07, "loss": 4.0179, "step": 27717 }, { "epoch": 9.233197301574082, "grad_norm": 0.78125, "learning_rate": 2.2848988349368548e-07, "loss": 4.0173, "step": 27718 }, { "epoch": 9.233530440576331, "grad_norm": 0.75390625, "learning_rate": 2.2829165058512403e-07, "loss": 3.9254, "step": 27719 }, { "epoch": 9.233863579578578, "grad_norm": 0.75390625, "learning_rate": 2.2809350237621402e-07, "loss": 4.0266, "step": 27720 }, { "epoch": 9.234196718580828, "grad_norm": 0.76171875, "learning_rate": 2.2789543886926528e-07, "loss": 3.9162, "step": 27721 }, { "epoch": 9.234529857583077, "grad_norm": 0.77734375, "learning_rate": 2.2769746006658182e-07, "loss": 3.9598, "step": 27722 }, { "epoch": 9.234862996585326, "grad_norm": 0.73828125, "learning_rate": 2.2749956597047178e-07, "loss": 4.015, "step": 27723 }, { "epoch": 9.235196135587573, "grad_norm": 0.7734375, "learning_rate": 2.2730175658324e-07, "loss": 4.0044, "step": 27724 }, { "epoch": 9.235529274589823, "grad_norm": 0.75, "learning_rate": 2.271040319071896e-07, "loss": 3.9468, "step": 27725 }, { "epoch": 9.235862413592072, "grad_norm": 0.78125, "learning_rate": 2.2690639194462376e-07, "loss": 4.003, "step": 27726 }, { "epoch": 9.23619555259432, "grad_norm": 0.74609375, "learning_rate": 2.2670883669784565e-07, "loss": 3.9473, "step": 27727 }, { "epoch": 9.236528691596568, "grad_norm": 0.7265625, "learning_rate": 2.265113661691551e-07, "loss": 3.9889, "step": 27728 }, { "epoch": 9.236861830598817, "grad_norm": 0.80078125, "learning_rate": 2.2631398036085354e-07, "loss": 3.9905, "step": 27729 }, { "epoch": 9.237194969601067, "grad_norm": 0.77734375, "learning_rate": 2.261166792752392e-07, "loss": 3.9588, "step": 27730 }, { "epoch": 9.237528108603314, "grad_norm": 0.77734375, "learning_rate": 2.259194629146094e-07, "loss": 3.8976, "step": 27731 }, { "epoch": 9.237861247605563, "grad_norm": 0.7421875, "learning_rate": 2.2572233128126314e-07, "loss": 4.0154, "step": 27732 }, { "epoch": 9.238194386607812, "grad_norm": 0.7890625, "learning_rate": 2.255252843774952e-07, "loss": 3.9561, "step": 27733 }, { "epoch": 9.238527525610062, "grad_norm": 0.71484375, "learning_rate": 2.2532832220560128e-07, "loss": 3.9394, "step": 27734 }, { "epoch": 9.238860664612309, "grad_norm": 0.77734375, "learning_rate": 2.251314447678754e-07, "loss": 4.0255, "step": 27735 }, { "epoch": 9.239193803614558, "grad_norm": 0.7890625, "learning_rate": 2.2493465206661152e-07, "loss": 3.9187, "step": 27736 }, { "epoch": 9.239526942616807, "grad_norm": 0.80078125, "learning_rate": 2.2473794410410032e-07, "loss": 3.9572, "step": 27737 }, { "epoch": 9.239860081619055, "grad_norm": 0.78125, "learning_rate": 2.2454132088263413e-07, "loss": 3.9853, "step": 27738 }, { "epoch": 9.240193220621304, "grad_norm": 0.7578125, "learning_rate": 2.2434478240450362e-07, "loss": 3.957, "step": 27739 }, { "epoch": 9.240526359623553, "grad_norm": 0.77734375, "learning_rate": 2.2414832867199698e-07, "loss": 3.9417, "step": 27740 }, { "epoch": 9.240859498625802, "grad_norm": 0.78515625, "learning_rate": 2.2395195968740234e-07, "loss": 3.9862, "step": 27741 }, { "epoch": 9.24119263762805, "grad_norm": 0.74609375, "learning_rate": 2.237556754530079e-07, "loss": 4.0795, "step": 27742 }, { "epoch": 9.241525776630299, "grad_norm": 0.70703125, "learning_rate": 2.2355947597109932e-07, "loss": 3.9522, "step": 27743 }, { "epoch": 9.241858915632548, "grad_norm": 0.734375, "learning_rate": 2.233633612439623e-07, "loss": 3.9186, "step": 27744 }, { "epoch": 9.242192054634796, "grad_norm": 0.77734375, "learning_rate": 2.2316733127388078e-07, "loss": 3.9174, "step": 27745 }, { "epoch": 9.242525193637045, "grad_norm": 0.78125, "learning_rate": 2.2297138606313798e-07, "loss": 4.0457, "step": 27746 }, { "epoch": 9.242858332639294, "grad_norm": 0.76953125, "learning_rate": 2.227755256140171e-07, "loss": 3.9491, "step": 27747 }, { "epoch": 9.243191471641543, "grad_norm": 0.7890625, "learning_rate": 2.2257974992879875e-07, "loss": 4.008, "step": 27748 }, { "epoch": 9.24352461064379, "grad_norm": 0.80859375, "learning_rate": 2.2238405900976366e-07, "loss": 3.9409, "step": 27749 }, { "epoch": 9.24385774964604, "grad_norm": 0.7734375, "learning_rate": 2.221884528591908e-07, "loss": 3.9768, "step": 27750 }, { "epoch": 9.244190888648289, "grad_norm": 0.78125, "learning_rate": 2.219929314793584e-07, "loss": 4.0158, "step": 27751 }, { "epoch": 9.244524027650538, "grad_norm": 0.734375, "learning_rate": 2.2179749487254458e-07, "loss": 3.9929, "step": 27752 }, { "epoch": 9.244857166652785, "grad_norm": 0.7578125, "learning_rate": 2.2160214304102505e-07, "loss": 4.0185, "step": 27753 }, { "epoch": 9.245190305655035, "grad_norm": 0.76171875, "learning_rate": 2.214068759870755e-07, "loss": 3.9993, "step": 27754 }, { "epoch": 9.245523444657284, "grad_norm": 0.78125, "learning_rate": 2.2121169371297157e-07, "loss": 3.9607, "step": 27755 }, { "epoch": 9.245856583659531, "grad_norm": 0.765625, "learning_rate": 2.2101659622098398e-07, "loss": 3.9607, "step": 27756 }, { "epoch": 9.24618972266178, "grad_norm": 0.7421875, "learning_rate": 2.2082158351338838e-07, "loss": 4.0092, "step": 27757 }, { "epoch": 9.24652286166403, "grad_norm": 0.8125, "learning_rate": 2.206266555924538e-07, "loss": 3.9653, "step": 27758 }, { "epoch": 9.246856000666279, "grad_norm": 0.72265625, "learning_rate": 2.2043181246045095e-07, "loss": 4.0292, "step": 27759 }, { "epoch": 9.247189139668526, "grad_norm": 0.7265625, "learning_rate": 2.202370541196505e-07, "loss": 4.0198, "step": 27760 }, { "epoch": 9.247522278670775, "grad_norm": 0.7578125, "learning_rate": 2.2004238057232064e-07, "loss": 3.9582, "step": 27761 }, { "epoch": 9.247855417673025, "grad_norm": 0.75390625, "learning_rate": 2.1984779182072866e-07, "loss": 3.9547, "step": 27762 }, { "epoch": 9.248188556675272, "grad_norm": 0.76953125, "learning_rate": 2.1965328786714116e-07, "loss": 3.9408, "step": 27763 }, { "epoch": 9.248521695677521, "grad_norm": 0.78515625, "learning_rate": 2.19458868713823e-07, "loss": 3.8901, "step": 27764 }, { "epoch": 9.24885483467977, "grad_norm": 0.7734375, "learning_rate": 2.1926453436304063e-07, "loss": 3.948, "step": 27765 }, { "epoch": 9.24918797368202, "grad_norm": 0.8125, "learning_rate": 2.190702848170556e-07, "loss": 4.0375, "step": 27766 }, { "epoch": 9.249521112684267, "grad_norm": 0.74609375, "learning_rate": 2.1887612007813113e-07, "loss": 3.9641, "step": 27767 }, { "epoch": 9.249854251686516, "grad_norm": 0.78125, "learning_rate": 2.1868204014852954e-07, "loss": 3.977, "step": 27768 }, { "epoch": 9.250187390688765, "grad_norm": 0.77734375, "learning_rate": 2.1848804503051073e-07, "loss": 3.9493, "step": 27769 }, { "epoch": 9.250520529691013, "grad_norm": 0.7421875, "learning_rate": 2.1829413472633368e-07, "loss": 3.9835, "step": 27770 }, { "epoch": 9.250853668693262, "grad_norm": 0.734375, "learning_rate": 2.1810030923825913e-07, "loss": 3.9441, "step": 27771 }, { "epoch": 9.251186807695511, "grad_norm": 0.765625, "learning_rate": 2.1790656856854274e-07, "loss": 4.0492, "step": 27772 }, { "epoch": 9.25151994669776, "grad_norm": 0.765625, "learning_rate": 2.1771291271944187e-07, "loss": 3.9829, "step": 27773 }, { "epoch": 9.251853085700008, "grad_norm": 0.75390625, "learning_rate": 2.1751934169321142e-07, "loss": 3.9818, "step": 27774 }, { "epoch": 9.252186224702257, "grad_norm": 0.78125, "learning_rate": 2.1732585549210788e-07, "loss": 4.0898, "step": 27775 }, { "epoch": 9.252519363704506, "grad_norm": 0.76171875, "learning_rate": 2.1713245411838362e-07, "loss": 3.9875, "step": 27776 }, { "epoch": 9.252852502706755, "grad_norm": 0.73046875, "learning_rate": 2.1693913757429101e-07, "loss": 3.9625, "step": 27777 }, { "epoch": 9.253185641709003, "grad_norm": 0.75390625, "learning_rate": 2.1674590586208327e-07, "loss": 4.0092, "step": 27778 }, { "epoch": 9.253518780711252, "grad_norm": 0.765625, "learning_rate": 2.165527589840094e-07, "loss": 3.9581, "step": 27779 }, { "epoch": 9.253851919713501, "grad_norm": 0.76953125, "learning_rate": 2.1635969694232095e-07, "loss": 3.9976, "step": 27780 }, { "epoch": 9.254185058715748, "grad_norm": 0.765625, "learning_rate": 2.1616671973926443e-07, "loss": 3.9798, "step": 27781 }, { "epoch": 9.254518197717998, "grad_norm": 0.77734375, "learning_rate": 2.1597382737708976e-07, "loss": 3.9043, "step": 27782 }, { "epoch": 9.254851336720247, "grad_norm": 0.74609375, "learning_rate": 2.157810198580426e-07, "loss": 4.0191, "step": 27783 }, { "epoch": 9.255184475722496, "grad_norm": 0.76171875, "learning_rate": 2.155882971843695e-07, "loss": 3.8824, "step": 27784 }, { "epoch": 9.255517614724743, "grad_norm": 0.83203125, "learning_rate": 2.153956593583137e-07, "loss": 3.9752, "step": 27785 }, { "epoch": 9.255850753726993, "grad_norm": 0.7578125, "learning_rate": 2.1520310638212082e-07, "loss": 3.9433, "step": 27786 }, { "epoch": 9.256183892729242, "grad_norm": 0.76171875, "learning_rate": 2.1501063825803247e-07, "loss": 3.9853, "step": 27787 }, { "epoch": 9.25651703173149, "grad_norm": 0.80078125, "learning_rate": 2.1481825498829182e-07, "loss": 3.9966, "step": 27788 }, { "epoch": 9.256850170733738, "grad_norm": 0.75390625, "learning_rate": 2.1462595657513712e-07, "loss": 4.0618, "step": 27789 }, { "epoch": 9.257183309735987, "grad_norm": 0.79296875, "learning_rate": 2.144337430208107e-07, "loss": 3.9281, "step": 27790 }, { "epoch": 9.257516448738237, "grad_norm": 0.71875, "learning_rate": 2.1424161432755163e-07, "loss": 4.07, "step": 27791 }, { "epoch": 9.257849587740484, "grad_norm": 0.765625, "learning_rate": 2.140495704975956e-07, "loss": 4.0103, "step": 27792 }, { "epoch": 9.258182726742733, "grad_norm": 0.73828125, "learning_rate": 2.1385761153318167e-07, "loss": 3.9165, "step": 27793 }, { "epoch": 9.258515865744982, "grad_norm": 0.76171875, "learning_rate": 2.136657374365439e-07, "loss": 3.9403, "step": 27794 }, { "epoch": 9.258849004747232, "grad_norm": 0.7578125, "learning_rate": 2.134739482099188e-07, "loss": 3.9395, "step": 27795 }, { "epoch": 9.259182143749479, "grad_norm": 0.7421875, "learning_rate": 2.1328224385553874e-07, "loss": 4.0043, "step": 27796 }, { "epoch": 9.259515282751728, "grad_norm": 0.78515625, "learning_rate": 2.1309062437563865e-07, "loss": 4.0362, "step": 27797 }, { "epoch": 9.259848421753977, "grad_norm": 0.76171875, "learning_rate": 2.1289908977244922e-07, "loss": 3.9562, "step": 27798 }, { "epoch": 9.260181560756225, "grad_norm": 0.73828125, "learning_rate": 2.1270764004820114e-07, "loss": 4.0075, "step": 27799 }, { "epoch": 9.260514699758474, "grad_norm": 0.7734375, "learning_rate": 2.1251627520512434e-07, "loss": 4.0248, "step": 27800 }, { "epoch": 9.260847838760723, "grad_norm": 0.75390625, "learning_rate": 2.1232499524544868e-07, "loss": 3.9509, "step": 27801 }, { "epoch": 9.261180977762972, "grad_norm": 0.796875, "learning_rate": 2.1213380017140239e-07, "loss": 3.9866, "step": 27802 }, { "epoch": 9.26151411676522, "grad_norm": 0.72265625, "learning_rate": 2.1194268998521117e-07, "loss": 3.9785, "step": 27803 }, { "epoch": 9.261847255767469, "grad_norm": 0.7578125, "learning_rate": 2.1175166468910156e-07, "loss": 3.925, "step": 27804 }, { "epoch": 9.262180394769718, "grad_norm": 0.72265625, "learning_rate": 2.1156072428529933e-07, "loss": 4.0385, "step": 27805 }, { "epoch": 9.262513533771966, "grad_norm": 0.75, "learning_rate": 2.1136986877602765e-07, "loss": 3.9419, "step": 27806 }, { "epoch": 9.262846672774215, "grad_norm": 0.76171875, "learning_rate": 2.1117909816350894e-07, "loss": 3.9709, "step": 27807 }, { "epoch": 9.263179811776464, "grad_norm": 0.7578125, "learning_rate": 2.1098841244996724e-07, "loss": 3.9516, "step": 27808 }, { "epoch": 9.263512950778713, "grad_norm": 0.7734375, "learning_rate": 2.1079781163762246e-07, "loss": 4.0431, "step": 27809 }, { "epoch": 9.26384608978096, "grad_norm": 0.75390625, "learning_rate": 2.106072957286953e-07, "loss": 3.9613, "step": 27810 }, { "epoch": 9.26417922878321, "grad_norm": 0.76171875, "learning_rate": 2.10416864725404e-07, "loss": 4.0754, "step": 27811 }, { "epoch": 9.264512367785459, "grad_norm": 0.78125, "learning_rate": 2.1022651862996677e-07, "loss": 4.0079, "step": 27812 }, { "epoch": 9.264845506787708, "grad_norm": 0.7734375, "learning_rate": 2.1003625744460102e-07, "loss": 3.9792, "step": 27813 }, { "epoch": 9.265178645789955, "grad_norm": 0.765625, "learning_rate": 2.0984608117152165e-07, "loss": 3.9615, "step": 27814 }, { "epoch": 9.265511784792205, "grad_norm": 0.75390625, "learning_rate": 2.0965598981294688e-07, "loss": 3.9073, "step": 27815 }, { "epoch": 9.265844923794454, "grad_norm": 0.76953125, "learning_rate": 2.0946598337108825e-07, "loss": 4.0293, "step": 27816 }, { "epoch": 9.266178062796701, "grad_norm": 0.79296875, "learning_rate": 2.0927606184815983e-07, "loss": 3.9793, "step": 27817 }, { "epoch": 9.26651120179895, "grad_norm": 0.75, "learning_rate": 2.090862252463724e-07, "loss": 3.9888, "step": 27818 }, { "epoch": 9.2668443408012, "grad_norm": 0.765625, "learning_rate": 2.0889647356793995e-07, "loss": 3.8744, "step": 27819 }, { "epoch": 9.267177479803449, "grad_norm": 0.765625, "learning_rate": 2.0870680681507077e-07, "loss": 3.9805, "step": 27820 }, { "epoch": 9.267510618805696, "grad_norm": 0.74609375, "learning_rate": 2.085172249899739e-07, "loss": 3.9479, "step": 27821 }, { "epoch": 9.267843757807945, "grad_norm": 0.8125, "learning_rate": 2.083277280948584e-07, "loss": 3.9119, "step": 27822 }, { "epoch": 9.268176896810195, "grad_norm": 0.7734375, "learning_rate": 2.0813831613193085e-07, "loss": 4.0329, "step": 27823 }, { "epoch": 9.268510035812442, "grad_norm": 0.76953125, "learning_rate": 2.0794898910339783e-07, "loss": 3.9511, "step": 27824 }, { "epoch": 9.268843174814691, "grad_norm": 0.71875, "learning_rate": 2.0775974701146423e-07, "loss": 3.9399, "step": 27825 }, { "epoch": 9.26917631381694, "grad_norm": 0.75390625, "learning_rate": 2.0757058985833493e-07, "loss": 4.0525, "step": 27826 }, { "epoch": 9.26950945281919, "grad_norm": 0.77734375, "learning_rate": 2.073815176462132e-07, "loss": 4.0134, "step": 27827 }, { "epoch": 9.269842591821437, "grad_norm": 0.7734375, "learning_rate": 2.0719253037730062e-07, "loss": 4.0148, "step": 27828 }, { "epoch": 9.270175730823686, "grad_norm": 0.76171875, "learning_rate": 2.0700362805379873e-07, "loss": 3.9412, "step": 27829 }, { "epoch": 9.270508869825935, "grad_norm": 0.73046875, "learning_rate": 2.068148106779083e-07, "loss": 3.9733, "step": 27830 }, { "epoch": 9.270842008828183, "grad_norm": 0.75390625, "learning_rate": 2.0662607825182838e-07, "loss": 3.9432, "step": 27831 }, { "epoch": 9.271175147830432, "grad_norm": 0.78515625, "learning_rate": 2.0643743077775723e-07, "loss": 4.0469, "step": 27832 }, { "epoch": 9.271508286832681, "grad_norm": 0.76953125, "learning_rate": 2.0624886825789142e-07, "loss": 3.9614, "step": 27833 }, { "epoch": 9.27184142583493, "grad_norm": 0.79296875, "learning_rate": 2.0606039069442918e-07, "loss": 3.9447, "step": 27834 }, { "epoch": 9.272174564837178, "grad_norm": 0.73828125, "learning_rate": 2.0587199808956375e-07, "loss": 3.9716, "step": 27835 }, { "epoch": 9.272507703839427, "grad_norm": 0.77734375, "learning_rate": 2.056836904454909e-07, "loss": 3.9752, "step": 27836 }, { "epoch": 9.272840842841676, "grad_norm": 0.72265625, "learning_rate": 2.0549546776440303e-07, "loss": 3.9571, "step": 27837 }, { "epoch": 9.273173981843925, "grad_norm": 0.7734375, "learning_rate": 2.0530733004849505e-07, "loss": 3.9517, "step": 27838 }, { "epoch": 9.273507120846173, "grad_norm": 0.70703125, "learning_rate": 2.0511927729995522e-07, "loss": 3.9743, "step": 27839 }, { "epoch": 9.273840259848422, "grad_norm": 0.7890625, "learning_rate": 2.0493130952097426e-07, "loss": 3.9978, "step": 27840 }, { "epoch": 9.274173398850671, "grad_norm": 0.7890625, "learning_rate": 2.0474342671374298e-07, "loss": 3.8541, "step": 27841 }, { "epoch": 9.274506537852918, "grad_norm": 0.76171875, "learning_rate": 2.0455562888044953e-07, "loss": 3.9992, "step": 27842 }, { "epoch": 9.274839676855168, "grad_norm": 0.7421875, "learning_rate": 2.0436791602328142e-07, "loss": 3.9377, "step": 27843 }, { "epoch": 9.275172815857417, "grad_norm": 0.7734375, "learning_rate": 2.0418028814442352e-07, "loss": 3.9407, "step": 27844 }, { "epoch": 9.275505954859666, "grad_norm": 0.7578125, "learning_rate": 2.0399274524606325e-07, "loss": 3.9734, "step": 27845 }, { "epoch": 9.275839093861913, "grad_norm": 0.73828125, "learning_rate": 2.0380528733038557e-07, "loss": 4.0274, "step": 27846 }, { "epoch": 9.276172232864162, "grad_norm": 0.76953125, "learning_rate": 2.0361791439957117e-07, "loss": 3.9935, "step": 27847 }, { "epoch": 9.276505371866412, "grad_norm": 0.77734375, "learning_rate": 2.03430626455805e-07, "loss": 3.957, "step": 27848 }, { "epoch": 9.27683851086866, "grad_norm": 0.7890625, "learning_rate": 2.0324342350126702e-07, "loss": 3.9441, "step": 27849 }, { "epoch": 9.277171649870908, "grad_norm": 0.7421875, "learning_rate": 2.0305630553813876e-07, "loss": 3.9664, "step": 27850 }, { "epoch": 9.277504788873157, "grad_norm": 0.71875, "learning_rate": 2.0286927256859932e-07, "loss": 4.0279, "step": 27851 }, { "epoch": 9.277837927875407, "grad_norm": 0.7421875, "learning_rate": 2.02682324594827e-07, "loss": 3.9822, "step": 27852 }, { "epoch": 9.278171066877654, "grad_norm": 0.7890625, "learning_rate": 2.0249546161900002e-07, "loss": 3.955, "step": 27853 }, { "epoch": 9.278504205879903, "grad_norm": 0.76953125, "learning_rate": 2.0230868364329498e-07, "loss": 3.9596, "step": 27854 }, { "epoch": 9.278837344882152, "grad_norm": 0.76171875, "learning_rate": 2.0212199066988678e-07, "loss": 3.9894, "step": 27855 }, { "epoch": 9.279170483884402, "grad_norm": 0.76171875, "learning_rate": 2.0193538270094957e-07, "loss": 3.9859, "step": 27856 }, { "epoch": 9.279503622886649, "grad_norm": 0.75, "learning_rate": 2.0174885973865826e-07, "loss": 3.9709, "step": 27857 }, { "epoch": 9.279836761888898, "grad_norm": 0.73046875, "learning_rate": 2.015624217851836e-07, "loss": 4.0623, "step": 27858 }, { "epoch": 9.280169900891147, "grad_norm": 0.71875, "learning_rate": 2.0137606884269966e-07, "loss": 3.9808, "step": 27859 }, { "epoch": 9.280503039893395, "grad_norm": 0.78125, "learning_rate": 2.0118980091337474e-07, "loss": 4.0234, "step": 27860 }, { "epoch": 9.280836178895644, "grad_norm": 0.77734375, "learning_rate": 2.0100361799938043e-07, "loss": 3.9812, "step": 27861 }, { "epoch": 9.281169317897893, "grad_norm": 0.7421875, "learning_rate": 2.0081752010288335e-07, "loss": 3.9432, "step": 27862 }, { "epoch": 9.281502456900142, "grad_norm": 0.73828125, "learning_rate": 2.0063150722605256e-07, "loss": 3.985, "step": 27863 }, { "epoch": 9.28183559590239, "grad_norm": 0.7734375, "learning_rate": 2.0044557937105552e-07, "loss": 3.9658, "step": 27864 }, { "epoch": 9.282168734904639, "grad_norm": 0.71875, "learning_rate": 2.0025973654005464e-07, "loss": 4.0001, "step": 27865 }, { "epoch": 9.282501873906888, "grad_norm": 0.76171875, "learning_rate": 2.0007397873521821e-07, "loss": 4.0129, "step": 27866 }, { "epoch": 9.282835012909135, "grad_norm": 0.7421875, "learning_rate": 1.99888305958707e-07, "loss": 4.087, "step": 27867 }, { "epoch": 9.283168151911385, "grad_norm": 0.76171875, "learning_rate": 1.9970271821268593e-07, "loss": 3.9982, "step": 27868 }, { "epoch": 9.283501290913634, "grad_norm": 0.79296875, "learning_rate": 1.9951721549931495e-07, "loss": 3.9529, "step": 27869 }, { "epoch": 9.283834429915883, "grad_norm": 0.7421875, "learning_rate": 1.993317978207565e-07, "loss": 4.06, "step": 27870 }, { "epoch": 9.28416756891813, "grad_norm": 0.7421875, "learning_rate": 1.9914646517916885e-07, "loss": 4.0333, "step": 27871 }, { "epoch": 9.28450070792038, "grad_norm": 0.83984375, "learning_rate": 1.9896121757671193e-07, "loss": 3.9729, "step": 27872 }, { "epoch": 9.284833846922629, "grad_norm": 0.78515625, "learning_rate": 1.9877605501554236e-07, "loss": 4.01, "step": 27873 }, { "epoch": 9.285166985924878, "grad_norm": 0.80078125, "learning_rate": 1.9859097749781757e-07, "loss": 4.0599, "step": 27874 }, { "epoch": 9.285500124927125, "grad_norm": 0.81640625, "learning_rate": 1.984059850256925e-07, "loss": 3.9741, "step": 27875 }, { "epoch": 9.285833263929375, "grad_norm": 0.78125, "learning_rate": 1.9822107760132296e-07, "loss": 4.0744, "step": 27876 }, { "epoch": 9.286166402931624, "grad_norm": 0.796875, "learning_rate": 1.9803625522686135e-07, "loss": 3.9456, "step": 27877 }, { "epoch": 9.286499541933871, "grad_norm": 0.75, "learning_rate": 1.9785151790446264e-07, "loss": 4.0796, "step": 27878 }, { "epoch": 9.28683268093612, "grad_norm": 0.7578125, "learning_rate": 1.9766686563627678e-07, "loss": 3.9974, "step": 27879 }, { "epoch": 9.28716581993837, "grad_norm": 0.75, "learning_rate": 1.9748229842445453e-07, "loss": 3.9586, "step": 27880 }, { "epoch": 9.287498958940619, "grad_norm": 0.78125, "learning_rate": 1.9729781627114667e-07, "loss": 4.0543, "step": 27881 }, { "epoch": 9.287832097942866, "grad_norm": 0.75, "learning_rate": 1.9711341917850233e-07, "loss": 4.0688, "step": 27882 }, { "epoch": 9.288165236945115, "grad_norm": 0.76953125, "learning_rate": 1.9692910714866812e-07, "loss": 3.9938, "step": 27883 }, { "epoch": 9.288498375947364, "grad_norm": 0.72265625, "learning_rate": 1.9674488018379066e-07, "loss": 3.9422, "step": 27884 }, { "epoch": 9.288831514949612, "grad_norm": 0.75390625, "learning_rate": 1.965607382860174e-07, "loss": 4.0645, "step": 27885 }, { "epoch": 9.289164653951861, "grad_norm": 0.76171875, "learning_rate": 1.9637668145749244e-07, "loss": 3.9533, "step": 27886 }, { "epoch": 9.28949779295411, "grad_norm": 0.75390625, "learning_rate": 1.9619270970035907e-07, "loss": 4.019, "step": 27887 }, { "epoch": 9.28983093195636, "grad_norm": 0.75390625, "learning_rate": 1.9600882301675978e-07, "loss": 3.9113, "step": 27888 }, { "epoch": 9.290164070958607, "grad_norm": 0.7578125, "learning_rate": 1.9582502140883863e-07, "loss": 3.9876, "step": 27889 }, { "epoch": 9.290497209960856, "grad_norm": 0.79296875, "learning_rate": 1.9564130487873479e-07, "loss": 3.9612, "step": 27890 }, { "epoch": 9.290830348963105, "grad_norm": 0.8046875, "learning_rate": 1.9545767342858817e-07, "loss": 3.9041, "step": 27891 }, { "epoch": 9.291163487965353, "grad_norm": 0.78125, "learning_rate": 1.9527412706053876e-07, "loss": 3.9923, "step": 27892 }, { "epoch": 9.291496626967602, "grad_norm": 0.75390625, "learning_rate": 1.950906657767232e-07, "loss": 3.9829, "step": 27893 }, { "epoch": 9.291829765969851, "grad_norm": 0.77734375, "learning_rate": 1.9490728957927972e-07, "loss": 3.8973, "step": 27894 }, { "epoch": 9.2921629049721, "grad_norm": 0.71484375, "learning_rate": 1.947239984703425e-07, "loss": 4.0991, "step": 27895 }, { "epoch": 9.292496043974348, "grad_norm": 0.734375, "learning_rate": 1.945407924520473e-07, "loss": 3.9989, "step": 27896 }, { "epoch": 9.292829182976597, "grad_norm": 0.80078125, "learning_rate": 1.9435767152652912e-07, "loss": 3.9571, "step": 27897 }, { "epoch": 9.293162321978846, "grad_norm": 0.75390625, "learning_rate": 1.9417463569592037e-07, "loss": 3.9619, "step": 27898 }, { "epoch": 9.293495460981095, "grad_norm": 0.7578125, "learning_rate": 1.9399168496235103e-07, "loss": 3.9532, "step": 27899 }, { "epoch": 9.293828599983343, "grad_norm": 0.7890625, "learning_rate": 1.9380881932795609e-07, "loss": 3.9858, "step": 27900 }, { "epoch": 9.294161738985592, "grad_norm": 0.7890625, "learning_rate": 1.9362603879486212e-07, "loss": 3.9188, "step": 27901 }, { "epoch": 9.294494877987841, "grad_norm": 0.75390625, "learning_rate": 1.934433433651983e-07, "loss": 4.0151, "step": 27902 }, { "epoch": 9.294828016990088, "grad_norm": 0.76953125, "learning_rate": 1.932607330410946e-07, "loss": 3.9443, "step": 27903 }, { "epoch": 9.295161155992337, "grad_norm": 0.7109375, "learning_rate": 1.9307820782467762e-07, "loss": 4.0421, "step": 27904 }, { "epoch": 9.295494294994587, "grad_norm": 0.77734375, "learning_rate": 1.928957677180715e-07, "loss": 3.9408, "step": 27905 }, { "epoch": 9.295827433996836, "grad_norm": 0.734375, "learning_rate": 1.9271341272340288e-07, "loss": 4.0135, "step": 27906 }, { "epoch": 9.296160572999083, "grad_norm": 0.78125, "learning_rate": 1.9253114284279506e-07, "loss": 4.0677, "step": 27907 }, { "epoch": 9.296493712001332, "grad_norm": 0.765625, "learning_rate": 1.9234895807837215e-07, "loss": 3.9781, "step": 27908 }, { "epoch": 9.296826851003582, "grad_norm": 0.7734375, "learning_rate": 1.9216685843225583e-07, "loss": 3.9854, "step": 27909 }, { "epoch": 9.29715999000583, "grad_norm": 0.7421875, "learning_rate": 1.9198484390656607e-07, "loss": 3.9677, "step": 27910 }, { "epoch": 9.297493129008078, "grad_norm": 0.734375, "learning_rate": 1.9180291450342447e-07, "loss": 4.016, "step": 27911 }, { "epoch": 9.297826268010327, "grad_norm": 0.77734375, "learning_rate": 1.916210702249485e-07, "loss": 3.8971, "step": 27912 }, { "epoch": 9.298159407012577, "grad_norm": 0.76171875, "learning_rate": 1.9143931107325735e-07, "loss": 3.9961, "step": 27913 }, { "epoch": 9.298492546014824, "grad_norm": 0.765625, "learning_rate": 1.9125763705046846e-07, "loss": 4.0569, "step": 27914 }, { "epoch": 9.298825685017073, "grad_norm": 0.7890625, "learning_rate": 1.9107604815869678e-07, "loss": 3.8927, "step": 27915 }, { "epoch": 9.299158824019322, "grad_norm": 0.74609375, "learning_rate": 1.9089454440005898e-07, "loss": 4.026, "step": 27916 }, { "epoch": 9.299491963021572, "grad_norm": 0.8046875, "learning_rate": 1.9071312577666672e-07, "loss": 3.9281, "step": 27917 }, { "epoch": 9.299825102023819, "grad_norm": 0.73828125, "learning_rate": 1.905317922906366e-07, "loss": 4.029, "step": 27918 }, { "epoch": 9.300158241026068, "grad_norm": 0.78125, "learning_rate": 1.903505439440778e-07, "loss": 3.9784, "step": 27919 }, { "epoch": 9.300491380028317, "grad_norm": 0.765625, "learning_rate": 1.9016938073910278e-07, "loss": 3.9983, "step": 27920 }, { "epoch": 9.300824519030565, "grad_norm": 0.7890625, "learning_rate": 1.8998830267781986e-07, "loss": 3.8838, "step": 27921 }, { "epoch": 9.301157658032814, "grad_norm": 0.76171875, "learning_rate": 1.8980730976234152e-07, "loss": 3.9762, "step": 27922 }, { "epoch": 9.301490797035063, "grad_norm": 0.76171875, "learning_rate": 1.8962640199477355e-07, "loss": 3.9888, "step": 27923 }, { "epoch": 9.301823936037312, "grad_norm": 0.7421875, "learning_rate": 1.8944557937722345e-07, "loss": 4.0204, "step": 27924 }, { "epoch": 9.30215707503956, "grad_norm": 0.7265625, "learning_rate": 1.8926484191179788e-07, "loss": 4.0123, "step": 27925 }, { "epoch": 9.302490214041809, "grad_norm": 0.734375, "learning_rate": 1.890841896006018e-07, "loss": 3.9449, "step": 27926 }, { "epoch": 9.302823353044058, "grad_norm": 0.74609375, "learning_rate": 1.8890362244574104e-07, "loss": 4.0291, "step": 27927 }, { "epoch": 9.303156492046305, "grad_norm": 0.77734375, "learning_rate": 1.887231404493156e-07, "loss": 3.9752, "step": 27928 }, { "epoch": 9.303489631048555, "grad_norm": 0.73046875, "learning_rate": 1.8854274361342962e-07, "loss": 3.9851, "step": 27929 }, { "epoch": 9.303822770050804, "grad_norm": 0.75, "learning_rate": 1.883624319401847e-07, "loss": 4.0165, "step": 27930 }, { "epoch": 9.304155909053053, "grad_norm": 0.734375, "learning_rate": 1.8818220543168008e-07, "loss": 4.0129, "step": 27931 }, { "epoch": 9.3044890480553, "grad_norm": 0.69921875, "learning_rate": 1.8800206409001402e-07, "loss": 3.9863, "step": 27932 }, { "epoch": 9.30482218705755, "grad_norm": 0.78515625, "learning_rate": 1.8782200791728821e-07, "loss": 3.9516, "step": 27933 }, { "epoch": 9.305155326059799, "grad_norm": 0.8046875, "learning_rate": 1.8764203691559678e-07, "loss": 4.0059, "step": 27934 }, { "epoch": 9.305488465062048, "grad_norm": 0.765625, "learning_rate": 1.8746215108703807e-07, "loss": 3.9556, "step": 27935 }, { "epoch": 9.305821604064295, "grad_norm": 0.75, "learning_rate": 1.872823504337054e-07, "loss": 3.941, "step": 27936 }, { "epoch": 9.306154743066545, "grad_norm": 0.72265625, "learning_rate": 1.8710263495769457e-07, "loss": 4.0765, "step": 27937 }, { "epoch": 9.306487882068794, "grad_norm": 0.7734375, "learning_rate": 1.8692300466109812e-07, "loss": 3.9597, "step": 27938 }, { "epoch": 9.306821021071041, "grad_norm": 0.78515625, "learning_rate": 1.867434595460077e-07, "loss": 4.0441, "step": 27939 }, { "epoch": 9.30715416007329, "grad_norm": 0.7890625, "learning_rate": 1.8656399961451658e-07, "loss": 3.922, "step": 27940 }, { "epoch": 9.30748729907554, "grad_norm": 0.71484375, "learning_rate": 1.86384624868714e-07, "loss": 3.9892, "step": 27941 }, { "epoch": 9.307820438077789, "grad_norm": 0.74609375, "learning_rate": 1.8620533531068905e-07, "loss": 3.9378, "step": 27942 }, { "epoch": 9.308153577080036, "grad_norm": 0.75390625, "learning_rate": 1.860261309425293e-07, "loss": 4.0072, "step": 27943 }, { "epoch": 9.308486716082285, "grad_norm": 0.8125, "learning_rate": 1.8584701176632468e-07, "loss": 3.9753, "step": 27944 }, { "epoch": 9.308819855084534, "grad_norm": 0.765625, "learning_rate": 1.8566797778416022e-07, "loss": 3.9915, "step": 27945 }, { "epoch": 9.309152994086782, "grad_norm": 0.78515625, "learning_rate": 1.8548902899811927e-07, "loss": 3.9927, "step": 27946 }, { "epoch": 9.309486133089031, "grad_norm": 0.73828125, "learning_rate": 1.8531016541028844e-07, "loss": 3.8856, "step": 27947 }, { "epoch": 9.30981927209128, "grad_norm": 0.7578125, "learning_rate": 1.851313870227511e-07, "loss": 4.0068, "step": 27948 }, { "epoch": 9.31015241109353, "grad_norm": 0.7109375, "learning_rate": 1.8495269383758895e-07, "loss": 3.9748, "step": 27949 }, { "epoch": 9.310485550095777, "grad_norm": 0.76953125, "learning_rate": 1.847740858568836e-07, "loss": 3.9329, "step": 27950 }, { "epoch": 9.310818689098026, "grad_norm": 0.734375, "learning_rate": 1.8459556308271507e-07, "loss": 3.9664, "step": 27951 }, { "epoch": 9.311151828100275, "grad_norm": 0.78125, "learning_rate": 1.8441712551716338e-07, "loss": 3.9993, "step": 27952 }, { "epoch": 9.311484967102524, "grad_norm": 0.71875, "learning_rate": 1.8423877316230685e-07, "loss": 3.9428, "step": 27953 }, { "epoch": 9.311818106104772, "grad_norm": 0.78125, "learning_rate": 1.8406050602022217e-07, "loss": 3.9214, "step": 27954 }, { "epoch": 9.312151245107021, "grad_norm": 0.7265625, "learning_rate": 1.8388232409298684e-07, "loss": 3.9515, "step": 27955 }, { "epoch": 9.31248438410927, "grad_norm": 0.76953125, "learning_rate": 1.8370422738267585e-07, "loss": 3.9874, "step": 27956 }, { "epoch": 9.312817523111518, "grad_norm": 0.7265625, "learning_rate": 1.8352621589136253e-07, "loss": 3.9627, "step": 27957 }, { "epoch": 9.313150662113767, "grad_norm": 0.765625, "learning_rate": 1.8334828962112193e-07, "loss": 3.9751, "step": 27958 }, { "epoch": 9.313483801116016, "grad_norm": 0.75, "learning_rate": 1.8317044857402653e-07, "loss": 4.0382, "step": 27959 }, { "epoch": 9.313816940118265, "grad_norm": 0.8046875, "learning_rate": 1.8299269275214636e-07, "loss": 3.9025, "step": 27960 }, { "epoch": 9.314150079120513, "grad_norm": 0.73828125, "learning_rate": 1.8281502215755224e-07, "loss": 3.9414, "step": 27961 }, { "epoch": 9.314483218122762, "grad_norm": 0.75390625, "learning_rate": 1.8263743679231503e-07, "loss": 4.0316, "step": 27962 }, { "epoch": 9.31481635712501, "grad_norm": 0.77734375, "learning_rate": 1.8245993665850307e-07, "loss": 3.9879, "step": 27963 }, { "epoch": 9.315149496127258, "grad_norm": 0.73046875, "learning_rate": 1.822825217581822e-07, "loss": 4.0044, "step": 27964 }, { "epoch": 9.315482635129507, "grad_norm": 0.7734375, "learning_rate": 1.8210519209341914e-07, "loss": 4.0469, "step": 27965 }, { "epoch": 9.315815774131757, "grad_norm": 0.75, "learning_rate": 1.8192794766628052e-07, "loss": 4.0406, "step": 27966 }, { "epoch": 9.316148913134006, "grad_norm": 0.78125, "learning_rate": 1.8175078847883137e-07, "loss": 3.9695, "step": 27967 }, { "epoch": 9.316482052136253, "grad_norm": 0.7890625, "learning_rate": 1.8157371453313337e-07, "loss": 4.009, "step": 27968 }, { "epoch": 9.316815191138502, "grad_norm": 0.8046875, "learning_rate": 1.8139672583124905e-07, "loss": 3.9374, "step": 27969 }, { "epoch": 9.317148330140752, "grad_norm": 0.75, "learning_rate": 1.8121982237524176e-07, "loss": 3.9895, "step": 27970 }, { "epoch": 9.317481469143, "grad_norm": 0.78125, "learning_rate": 1.810430041671715e-07, "loss": 3.9601, "step": 27971 }, { "epoch": 9.317814608145248, "grad_norm": 0.7734375, "learning_rate": 1.8086627120909665e-07, "loss": 3.9752, "step": 27972 }, { "epoch": 9.318147747147497, "grad_norm": 0.7265625, "learning_rate": 1.8068962350307721e-07, "loss": 3.9769, "step": 27973 }, { "epoch": 9.318480886149747, "grad_norm": 0.8046875, "learning_rate": 1.8051306105116905e-07, "loss": 4.0112, "step": 27974 }, { "epoch": 9.318814025151994, "grad_norm": 0.7578125, "learning_rate": 1.8033658385543052e-07, "loss": 3.9787, "step": 27975 }, { "epoch": 9.319147164154243, "grad_norm": 0.765625, "learning_rate": 1.8016019191791579e-07, "loss": 3.9481, "step": 27976 }, { "epoch": 9.319480303156492, "grad_norm": 0.72265625, "learning_rate": 1.7998388524068072e-07, "loss": 3.9738, "step": 27977 }, { "epoch": 9.319813442158742, "grad_norm": 0.75, "learning_rate": 1.7980766382577783e-07, "loss": 4.0277, "step": 27978 }, { "epoch": 9.320146581160989, "grad_norm": 0.77734375, "learning_rate": 1.7963152767526052e-07, "loss": 3.9742, "step": 27979 }, { "epoch": 9.320479720163238, "grad_norm": 0.7890625, "learning_rate": 1.7945547679117964e-07, "loss": 4.0152, "step": 27980 }, { "epoch": 9.320812859165487, "grad_norm": 0.73828125, "learning_rate": 1.7927951117558765e-07, "loss": 4.0375, "step": 27981 }, { "epoch": 9.321145998167735, "grad_norm": 0.76171875, "learning_rate": 1.7910363083053134e-07, "loss": 4.0519, "step": 27982 }, { "epoch": 9.321479137169984, "grad_norm": 0.75390625, "learning_rate": 1.7892783575805982e-07, "loss": 3.9834, "step": 27983 }, { "epoch": 9.321812276172233, "grad_norm": 0.796875, "learning_rate": 1.7875212596022317e-07, "loss": 3.9557, "step": 27984 }, { "epoch": 9.322145415174482, "grad_norm": 0.75, "learning_rate": 1.7857650143906558e-07, "loss": 4.0171, "step": 27985 }, { "epoch": 9.32247855417673, "grad_norm": 0.75, "learning_rate": 1.7840096219663455e-07, "loss": 3.9535, "step": 27986 }, { "epoch": 9.322811693178979, "grad_norm": 0.7421875, "learning_rate": 1.782255082349718e-07, "loss": 4.0473, "step": 27987 }, { "epoch": 9.323144832181228, "grad_norm": 0.7421875, "learning_rate": 1.7805013955612487e-07, "loss": 4.0778, "step": 27988 }, { "epoch": 9.323477971183475, "grad_norm": 0.7734375, "learning_rate": 1.7787485616213378e-07, "loss": 3.9984, "step": 27989 }, { "epoch": 9.323811110185725, "grad_norm": 0.76171875, "learning_rate": 1.7769965805504106e-07, "loss": 3.9367, "step": 27990 }, { "epoch": 9.324144249187974, "grad_norm": 0.75, "learning_rate": 1.775245452368876e-07, "loss": 4.0108, "step": 27991 }, { "epoch": 9.324477388190223, "grad_norm": 0.7578125, "learning_rate": 1.7734951770971176e-07, "loss": 3.9419, "step": 27992 }, { "epoch": 9.32481052719247, "grad_norm": 0.75, "learning_rate": 1.7717457547555438e-07, "loss": 3.925, "step": 27993 }, { "epoch": 9.32514366619472, "grad_norm": 0.72265625, "learning_rate": 1.7699971853645052e-07, "loss": 4.0305, "step": 27994 }, { "epoch": 9.325476805196969, "grad_norm": 0.73046875, "learning_rate": 1.768249468944394e-07, "loss": 4.0675, "step": 27995 }, { "epoch": 9.325809944199218, "grad_norm": 0.75390625, "learning_rate": 1.766502605515552e-07, "loss": 3.8792, "step": 27996 }, { "epoch": 9.326143083201465, "grad_norm": 0.76171875, "learning_rate": 1.764756595098338e-07, "loss": 4.0372, "step": 27997 }, { "epoch": 9.326476222203715, "grad_norm": 0.765625, "learning_rate": 1.763011437713069e-07, "loss": 3.9905, "step": 27998 }, { "epoch": 9.326809361205964, "grad_norm": 0.8046875, "learning_rate": 1.761267133380104e-07, "loss": 3.9162, "step": 27999 }, { "epoch": 9.327142500208211, "grad_norm": 0.73828125, "learning_rate": 1.7595236821197346e-07, "loss": 3.9555, "step": 28000 }, { "epoch": 9.32747563921046, "grad_norm": 0.7578125, "learning_rate": 1.75778108395227e-07, "loss": 4.0578, "step": 28001 }, { "epoch": 9.32780877821271, "grad_norm": 0.7421875, "learning_rate": 1.756039338898019e-07, "loss": 4.0044, "step": 28002 }, { "epoch": 9.328141917214959, "grad_norm": 0.77734375, "learning_rate": 1.754298446977265e-07, "loss": 3.9496, "step": 28003 }, { "epoch": 9.328475056217206, "grad_norm": 0.7890625, "learning_rate": 1.7525584082102837e-07, "loss": 3.9713, "step": 28004 }, { "epoch": 9.328808195219455, "grad_norm": 0.734375, "learning_rate": 1.7508192226173335e-07, "loss": 3.9373, "step": 28005 }, { "epoch": 9.329141334221704, "grad_norm": 0.7421875, "learning_rate": 1.7490808902186907e-07, "loss": 4.0048, "step": 28006 }, { "epoch": 9.329474473223952, "grad_norm": 0.76171875, "learning_rate": 1.7473434110345883e-07, "loss": 3.981, "step": 28007 }, { "epoch": 9.329807612226201, "grad_norm": 0.7578125, "learning_rate": 1.7456067850852853e-07, "loss": 4.0423, "step": 28008 }, { "epoch": 9.33014075122845, "grad_norm": 0.75390625, "learning_rate": 1.743871012390974e-07, "loss": 4.0389, "step": 28009 }, { "epoch": 9.3304738902307, "grad_norm": 0.76171875, "learning_rate": 1.742136092971905e-07, "loss": 4.0189, "step": 28010 }, { "epoch": 9.330807029232947, "grad_norm": 0.7265625, "learning_rate": 1.7404020268482784e-07, "loss": 3.9922, "step": 28011 }, { "epoch": 9.331140168235196, "grad_norm": 0.73828125, "learning_rate": 1.7386688140402786e-07, "loss": 3.9481, "step": 28012 }, { "epoch": 9.331473307237445, "grad_norm": 0.76171875, "learning_rate": 1.7369364545680972e-07, "loss": 4.0462, "step": 28013 }, { "epoch": 9.331806446239694, "grad_norm": 0.80859375, "learning_rate": 1.735204948451935e-07, "loss": 3.9588, "step": 28014 }, { "epoch": 9.332139585241942, "grad_norm": 0.7578125, "learning_rate": 1.7334742957119343e-07, "loss": 3.9614, "step": 28015 }, { "epoch": 9.332472724244191, "grad_norm": 0.79296875, "learning_rate": 1.7317444963682706e-07, "loss": 4.0006, "step": 28016 }, { "epoch": 9.33280586324644, "grad_norm": 0.7578125, "learning_rate": 1.7300155504410776e-07, "loss": 3.9918, "step": 28017 }, { "epoch": 9.333139002248688, "grad_norm": 0.76171875, "learning_rate": 1.7282874579505058e-07, "loss": 3.9467, "step": 28018 }, { "epoch": 9.333472141250937, "grad_norm": 0.78125, "learning_rate": 1.726560218916673e-07, "loss": 4.0276, "step": 28019 }, { "epoch": 9.333805280253186, "grad_norm": 0.73828125, "learning_rate": 1.7248338333597041e-07, "loss": 3.9233, "step": 28020 }, { "epoch": 9.334138419255435, "grad_norm": 0.79296875, "learning_rate": 1.7231083012997083e-07, "loss": 3.9678, "step": 28021 }, { "epoch": 9.334471558257682, "grad_norm": 0.78515625, "learning_rate": 1.7213836227567864e-07, "loss": 3.9141, "step": 28022 }, { "epoch": 9.334804697259932, "grad_norm": 0.8515625, "learning_rate": 1.719659797751022e-07, "loss": 3.9922, "step": 28023 }, { "epoch": 9.33513783626218, "grad_norm": 0.73828125, "learning_rate": 1.7179368263024907e-07, "loss": 4.0348, "step": 28024 }, { "epoch": 9.335470975264428, "grad_norm": 0.83984375, "learning_rate": 1.7162147084312768e-07, "loss": 3.9721, "step": 28025 }, { "epoch": 9.335804114266677, "grad_norm": 0.78515625, "learning_rate": 1.7144934441574306e-07, "loss": 3.99, "step": 28026 }, { "epoch": 9.336137253268927, "grad_norm": 0.7578125, "learning_rate": 1.7127730335009862e-07, "loss": 4.0054, "step": 28027 }, { "epoch": 9.336470392271176, "grad_norm": 0.8203125, "learning_rate": 1.7110534764820108e-07, "loss": 4.0568, "step": 28028 }, { "epoch": 9.336803531273423, "grad_norm": 0.76171875, "learning_rate": 1.709334773120505e-07, "loss": 3.9475, "step": 28029 }, { "epoch": 9.337136670275672, "grad_norm": 0.75390625, "learning_rate": 1.7076169234365114e-07, "loss": 3.9028, "step": 28030 }, { "epoch": 9.337469809277922, "grad_norm": 0.796875, "learning_rate": 1.705899927450022e-07, "loss": 3.9257, "step": 28031 }, { "epoch": 9.33780294828017, "grad_norm": 0.765625, "learning_rate": 1.704183785181046e-07, "loss": 3.9838, "step": 28032 }, { "epoch": 9.338136087282418, "grad_norm": 0.74609375, "learning_rate": 1.7024684966495756e-07, "loss": 3.9379, "step": 28033 }, { "epoch": 9.338469226284667, "grad_norm": 0.76953125, "learning_rate": 1.7007540618755867e-07, "loss": 3.8836, "step": 28034 }, { "epoch": 9.338802365286917, "grad_norm": 0.74609375, "learning_rate": 1.6990404808790383e-07, "loss": 4.0168, "step": 28035 }, { "epoch": 9.339135504289164, "grad_norm": 0.7734375, "learning_rate": 1.6973277536799058e-07, "loss": 4.0375, "step": 28036 }, { "epoch": 9.339468643291413, "grad_norm": 0.78125, "learning_rate": 1.6956158802981318e-07, "loss": 3.9655, "step": 28037 }, { "epoch": 9.339801782293662, "grad_norm": 0.7890625, "learning_rate": 1.6939048607536505e-07, "loss": 3.9384, "step": 28038 }, { "epoch": 9.340134921295911, "grad_norm": 0.7890625, "learning_rate": 1.6921946950663958e-07, "loss": 3.9629, "step": 28039 }, { "epoch": 9.340468060298159, "grad_norm": 0.7578125, "learning_rate": 1.6904853832562933e-07, "loss": 4.0328, "step": 28040 }, { "epoch": 9.340801199300408, "grad_norm": 0.80078125, "learning_rate": 1.688776925343244e-07, "loss": 4.0099, "step": 28041 }, { "epoch": 9.341134338302657, "grad_norm": 0.7421875, "learning_rate": 1.6870693213471484e-07, "loss": 3.9932, "step": 28042 }, { "epoch": 9.341467477304905, "grad_norm": 0.76953125, "learning_rate": 1.6853625712879072e-07, "loss": 4.0091, "step": 28043 }, { "epoch": 9.341800616307154, "grad_norm": 0.67578125, "learning_rate": 1.683656675185405e-07, "loss": 4.0034, "step": 28044 }, { "epoch": 9.342133755309403, "grad_norm": 0.75390625, "learning_rate": 1.6819516330594752e-07, "loss": 4.0016, "step": 28045 }, { "epoch": 9.342466894311652, "grad_norm": 0.76171875, "learning_rate": 1.680247444930011e-07, "loss": 4.0257, "step": 28046 }, { "epoch": 9.3428000333139, "grad_norm": 0.78515625, "learning_rate": 1.6785441108168627e-07, "loss": 3.948, "step": 28047 }, { "epoch": 9.343133172316149, "grad_norm": 0.78515625, "learning_rate": 1.6768416307398477e-07, "loss": 3.982, "step": 28048 }, { "epoch": 9.343466311318398, "grad_norm": 0.74609375, "learning_rate": 1.675140004718817e-07, "loss": 4.0127, "step": 28049 }, { "epoch": 9.343799450320645, "grad_norm": 0.73828125, "learning_rate": 1.6734392327735798e-07, "loss": 4.06, "step": 28050 }, { "epoch": 9.344132589322895, "grad_norm": 0.7890625, "learning_rate": 1.6717393149239534e-07, "loss": 3.9361, "step": 28051 }, { "epoch": 9.344465728325144, "grad_norm": 0.75390625, "learning_rate": 1.670040251189739e-07, "loss": 3.9374, "step": 28052 }, { "epoch": 9.344798867327393, "grad_norm": 0.7890625, "learning_rate": 1.6683420415907202e-07, "loss": 3.9888, "step": 28053 }, { "epoch": 9.34513200632964, "grad_norm": 0.765625, "learning_rate": 1.6666446861466817e-07, "loss": 3.9834, "step": 28054 }, { "epoch": 9.34546514533189, "grad_norm": 0.76171875, "learning_rate": 1.6649481848773905e-07, "loss": 3.9923, "step": 28055 }, { "epoch": 9.345798284334139, "grad_norm": 0.7421875, "learning_rate": 1.6632525378026147e-07, "loss": 3.9114, "step": 28056 }, { "epoch": 9.346131423336388, "grad_norm": 0.73828125, "learning_rate": 1.6615577449420883e-07, "loss": 3.9934, "step": 28057 }, { "epoch": 9.346464562338635, "grad_norm": 0.80078125, "learning_rate": 1.6598638063155702e-07, "loss": 3.9577, "step": 28058 }, { "epoch": 9.346797701340885, "grad_norm": 0.76953125, "learning_rate": 1.6581707219427866e-07, "loss": 3.9739, "step": 28059 }, { "epoch": 9.347130840343134, "grad_norm": 0.78125, "learning_rate": 1.656478491843455e-07, "loss": 3.9637, "step": 28060 }, { "epoch": 9.347463979345381, "grad_norm": 0.7734375, "learning_rate": 1.6547871160372846e-07, "loss": 3.9403, "step": 28061 }, { "epoch": 9.34779711834763, "grad_norm": 0.75390625, "learning_rate": 1.6530965945439847e-07, "loss": 3.9757, "step": 28062 }, { "epoch": 9.34813025734988, "grad_norm": 0.7578125, "learning_rate": 1.6514069273832395e-07, "loss": 3.953, "step": 28063 }, { "epoch": 9.348463396352129, "grad_norm": 0.7578125, "learning_rate": 1.6497181145747165e-07, "loss": 3.9854, "step": 28064 }, { "epoch": 9.348796535354376, "grad_norm": 0.73828125, "learning_rate": 1.6480301561381166e-07, "loss": 4.0107, "step": 28065 }, { "epoch": 9.349129674356625, "grad_norm": 0.83203125, "learning_rate": 1.6463430520930828e-07, "loss": 3.991, "step": 28066 }, { "epoch": 9.349462813358874, "grad_norm": 0.75, "learning_rate": 1.6446568024592656e-07, "loss": 3.9564, "step": 28067 }, { "epoch": 9.349795952361122, "grad_norm": 0.7578125, "learning_rate": 1.642971407256308e-07, "loss": 3.9824, "step": 28068 }, { "epoch": 9.350129091363371, "grad_norm": 0.71484375, "learning_rate": 1.6412868665038437e-07, "loss": 3.9956, "step": 28069 }, { "epoch": 9.35046223036562, "grad_norm": 0.7421875, "learning_rate": 1.6396031802214913e-07, "loss": 3.9372, "step": 28070 }, { "epoch": 9.35079536936787, "grad_norm": 0.75390625, "learning_rate": 1.6379203484288673e-07, "loss": 3.9731, "step": 28071 }, { "epoch": 9.351128508370117, "grad_norm": 0.7734375, "learning_rate": 1.6362383711455652e-07, "loss": 3.998, "step": 28072 }, { "epoch": 9.351461647372366, "grad_norm": 0.7421875, "learning_rate": 1.6345572483911857e-07, "loss": 3.9576, "step": 28073 }, { "epoch": 9.351794786374615, "grad_norm": 0.796875, "learning_rate": 1.632876980185305e-07, "loss": 3.999, "step": 28074 }, { "epoch": 9.352127925376864, "grad_norm": 0.73828125, "learning_rate": 1.6311975665474822e-07, "loss": 3.9393, "step": 28075 }, { "epoch": 9.352461064379112, "grad_norm": 0.75390625, "learning_rate": 1.62951900749731e-07, "loss": 4.0263, "step": 28076 }, { "epoch": 9.352794203381361, "grad_norm": 0.78125, "learning_rate": 1.627841303054306e-07, "loss": 3.9604, "step": 28077 }, { "epoch": 9.35312734238361, "grad_norm": 0.7421875, "learning_rate": 1.626164453238038e-07, "loss": 3.9494, "step": 28078 }, { "epoch": 9.353460481385858, "grad_norm": 0.7734375, "learning_rate": 1.6244884580680158e-07, "loss": 4.0101, "step": 28079 }, { "epoch": 9.353793620388107, "grad_norm": 0.75390625, "learning_rate": 1.6228133175637817e-07, "loss": 3.9173, "step": 28080 }, { "epoch": 9.354126759390356, "grad_norm": 0.71484375, "learning_rate": 1.6211390317448366e-07, "loss": 3.9785, "step": 28081 }, { "epoch": 9.354459898392605, "grad_norm": 0.7578125, "learning_rate": 1.6194656006306735e-07, "loss": 4.0025, "step": 28082 }, { "epoch": 9.354793037394852, "grad_norm": 0.82421875, "learning_rate": 1.6177930242408017e-07, "loss": 3.971, "step": 28083 }, { "epoch": 9.355126176397102, "grad_norm": 0.77734375, "learning_rate": 1.6161213025946975e-07, "loss": 3.9797, "step": 28084 }, { "epoch": 9.35545931539935, "grad_norm": 0.7734375, "learning_rate": 1.6144504357118285e-07, "loss": 3.9517, "step": 28085 }, { "epoch": 9.355792454401598, "grad_norm": 0.74609375, "learning_rate": 1.612780423611654e-07, "loss": 4.023, "step": 28086 }, { "epoch": 9.356125593403847, "grad_norm": 0.7109375, "learning_rate": 1.6111112663136335e-07, "loss": 4.0303, "step": 28087 }, { "epoch": 9.356458732406097, "grad_norm": 0.734375, "learning_rate": 1.609442963837218e-07, "loss": 3.9562, "step": 28088 }, { "epoch": 9.356791871408346, "grad_norm": 0.765625, "learning_rate": 1.6077755162018177e-07, "loss": 4.0153, "step": 28089 }, { "epoch": 9.357125010410593, "grad_norm": 0.75, "learning_rate": 1.6061089234268662e-07, "loss": 3.9806, "step": 28090 }, { "epoch": 9.357458149412842, "grad_norm": 0.78125, "learning_rate": 1.6044431855317815e-07, "loss": 3.9857, "step": 28091 }, { "epoch": 9.357791288415092, "grad_norm": 0.78125, "learning_rate": 1.6027783025359483e-07, "loss": 3.926, "step": 28092 }, { "epoch": 9.35812442741734, "grad_norm": 0.7578125, "learning_rate": 1.601114274458776e-07, "loss": 3.9068, "step": 28093 }, { "epoch": 9.358457566419588, "grad_norm": 0.76171875, "learning_rate": 1.5994511013196405e-07, "loss": 4.0057, "step": 28094 }, { "epoch": 9.358790705421837, "grad_norm": 0.7578125, "learning_rate": 1.59778878313791e-07, "loss": 3.9356, "step": 28095 }, { "epoch": 9.359123844424087, "grad_norm": 0.7578125, "learning_rate": 1.5961273199329523e-07, "loss": 4.0136, "step": 28096 }, { "epoch": 9.359456983426334, "grad_norm": 0.7421875, "learning_rate": 1.594466711724127e-07, "loss": 3.9355, "step": 28097 }, { "epoch": 9.359790122428583, "grad_norm": 0.77734375, "learning_rate": 1.5928069585307599e-07, "loss": 4.0187, "step": 28098 }, { "epoch": 9.360123261430832, "grad_norm": 0.7890625, "learning_rate": 1.5911480603722023e-07, "loss": 3.966, "step": 28099 }, { "epoch": 9.360456400433081, "grad_norm": 0.73828125, "learning_rate": 1.5894900172677556e-07, "loss": 4.0369, "step": 28100 }, { "epoch": 9.360789539435329, "grad_norm": 0.76953125, "learning_rate": 1.5878328292367378e-07, "loss": 4.047, "step": 28101 }, { "epoch": 9.361122678437578, "grad_norm": 0.74609375, "learning_rate": 1.5861764962984747e-07, "loss": 3.9388, "step": 28102 }, { "epoch": 9.361455817439827, "grad_norm": 0.734375, "learning_rate": 1.5845210184722265e-07, "loss": 3.9955, "step": 28103 }, { "epoch": 9.361788956442075, "grad_norm": 0.71484375, "learning_rate": 1.582866395777302e-07, "loss": 3.9708, "step": 28104 }, { "epoch": 9.362122095444324, "grad_norm": 0.76953125, "learning_rate": 1.581212628232953e-07, "loss": 4.0135, "step": 28105 }, { "epoch": 9.362455234446573, "grad_norm": 0.75, "learning_rate": 1.579559715858464e-07, "loss": 3.9301, "step": 28106 }, { "epoch": 9.362788373448822, "grad_norm": 0.71875, "learning_rate": 1.5779076586730695e-07, "loss": 3.9841, "step": 28107 }, { "epoch": 9.36312151245107, "grad_norm": 0.765625, "learning_rate": 1.5762564566960124e-07, "loss": 4.0456, "step": 28108 }, { "epoch": 9.363454651453319, "grad_norm": 0.72265625, "learning_rate": 1.5746061099465358e-07, "loss": 4.0462, "step": 28109 }, { "epoch": 9.363787790455568, "grad_norm": 0.79296875, "learning_rate": 1.572956618443866e-07, "loss": 3.9718, "step": 28110 }, { "epoch": 9.364120929457815, "grad_norm": 0.75, "learning_rate": 1.5713079822072042e-07, "loss": 3.994, "step": 28111 }, { "epoch": 9.364454068460065, "grad_norm": 0.7421875, "learning_rate": 1.5696602012557515e-07, "loss": 4.0529, "step": 28112 }, { "epoch": 9.364787207462314, "grad_norm": 0.7109375, "learning_rate": 1.5680132756087096e-07, "loss": 3.966, "step": 28113 }, { "epoch": 9.365120346464563, "grad_norm": 0.765625, "learning_rate": 1.566367205285263e-07, "loss": 3.9655, "step": 28114 }, { "epoch": 9.36545348546681, "grad_norm": 0.76953125, "learning_rate": 1.5647219903045878e-07, "loss": 3.945, "step": 28115 }, { "epoch": 9.36578662446906, "grad_norm": 0.7421875, "learning_rate": 1.5630776306858358e-07, "loss": 3.9259, "step": 28116 }, { "epoch": 9.366119763471309, "grad_norm": 0.765625, "learning_rate": 1.5614341264481663e-07, "loss": 3.9412, "step": 28117 }, { "epoch": 9.366452902473558, "grad_norm": 0.7734375, "learning_rate": 1.559791477610714e-07, "loss": 4.0091, "step": 28118 }, { "epoch": 9.366786041475805, "grad_norm": 0.796875, "learning_rate": 1.5581496841926224e-07, "loss": 3.955, "step": 28119 }, { "epoch": 9.367119180478054, "grad_norm": 0.7890625, "learning_rate": 1.5565087462130173e-07, "loss": 3.9274, "step": 28120 }, { "epoch": 9.367452319480304, "grad_norm": 0.75390625, "learning_rate": 1.5548686636910087e-07, "loss": 3.982, "step": 28121 }, { "epoch": 9.367785458482551, "grad_norm": 0.75390625, "learning_rate": 1.5532294366456895e-07, "loss": 4.0372, "step": 28122 }, { "epoch": 9.3681185974848, "grad_norm": 0.74609375, "learning_rate": 1.551591065096161e-07, "loss": 3.9657, "step": 28123 }, { "epoch": 9.36845173648705, "grad_norm": 0.80078125, "learning_rate": 1.5499535490615252e-07, "loss": 3.9125, "step": 28124 }, { "epoch": 9.368784875489299, "grad_norm": 0.7734375, "learning_rate": 1.5483168885608246e-07, "loss": 3.9707, "step": 28125 }, { "epoch": 9.369118014491546, "grad_norm": 0.75, "learning_rate": 1.5466810836131356e-07, "loss": 3.9427, "step": 28126 }, { "epoch": 9.369451153493795, "grad_norm": 0.765625, "learning_rate": 1.5450461342375183e-07, "loss": 4.0051, "step": 28127 }, { "epoch": 9.369784292496044, "grad_norm": 0.77734375, "learning_rate": 1.5434120404530072e-07, "loss": 3.993, "step": 28128 }, { "epoch": 9.370117431498294, "grad_norm": 0.76953125, "learning_rate": 1.5417788022786373e-07, "loss": 3.9883, "step": 28129 }, { "epoch": 9.370450570500541, "grad_norm": 0.7578125, "learning_rate": 1.5401464197334265e-07, "loss": 3.9974, "step": 28130 }, { "epoch": 9.37078370950279, "grad_norm": 0.72265625, "learning_rate": 1.5385148928364097e-07, "loss": 4.0296, "step": 28131 }, { "epoch": 9.37111684850504, "grad_norm": 0.71875, "learning_rate": 1.5368842216065715e-07, "loss": 3.9569, "step": 28132 }, { "epoch": 9.371449987507287, "grad_norm": 0.75390625, "learning_rate": 1.5352544060629136e-07, "loss": 3.9485, "step": 28133 }, { "epoch": 9.371783126509536, "grad_norm": 0.71875, "learning_rate": 1.533625446224421e-07, "loss": 4.0384, "step": 28134 }, { "epoch": 9.372116265511785, "grad_norm": 0.78125, "learning_rate": 1.5319973421100531e-07, "loss": 3.9769, "step": 28135 }, { "epoch": 9.372449404514034, "grad_norm": 0.75390625, "learning_rate": 1.530370093738795e-07, "loss": 3.9196, "step": 28136 }, { "epoch": 9.372782543516282, "grad_norm": 0.765625, "learning_rate": 1.5287437011295813e-07, "loss": 3.9287, "step": 28137 }, { "epoch": 9.373115682518531, "grad_norm": 0.73046875, "learning_rate": 1.5271181643013722e-07, "loss": 3.9626, "step": 28138 }, { "epoch": 9.37344882152078, "grad_norm": 0.80859375, "learning_rate": 1.5254934832730938e-07, "loss": 3.9299, "step": 28139 }, { "epoch": 9.373781960523027, "grad_norm": 0.7578125, "learning_rate": 1.5238696580636645e-07, "loss": 3.9708, "step": 28140 }, { "epoch": 9.374115099525277, "grad_norm": 0.73046875, "learning_rate": 1.522246688692011e-07, "loss": 3.9885, "step": 28141 }, { "epoch": 9.374448238527526, "grad_norm": 0.75, "learning_rate": 1.5206245751770349e-07, "loss": 3.9539, "step": 28142 }, { "epoch": 9.374781377529775, "grad_norm": 0.78515625, "learning_rate": 1.5190033175376206e-07, "loss": 3.9724, "step": 28143 }, { "epoch": 9.375114516532022, "grad_norm": 0.81640625, "learning_rate": 1.5173829157926532e-07, "loss": 3.9431, "step": 28144 }, { "epoch": 9.375447655534272, "grad_norm": 0.765625, "learning_rate": 1.515763369961018e-07, "loss": 4.0402, "step": 28145 }, { "epoch": 9.37578079453652, "grad_norm": 0.7578125, "learning_rate": 1.5141446800615744e-07, "loss": 4.0017, "step": 28146 }, { "epoch": 9.376113933538768, "grad_norm": 0.765625, "learning_rate": 1.5125268461131742e-07, "loss": 4.0211, "step": 28147 }, { "epoch": 9.376447072541017, "grad_norm": 0.76953125, "learning_rate": 1.5109098681346605e-07, "loss": 3.947, "step": 28148 }, { "epoch": 9.376780211543267, "grad_norm": 0.75390625, "learning_rate": 1.5092937461448603e-07, "loss": 4.0134, "step": 28149 }, { "epoch": 9.377113350545516, "grad_norm": 0.8125, "learning_rate": 1.5076784801626247e-07, "loss": 4.0006, "step": 28150 }, { "epoch": 9.377446489547763, "grad_norm": 0.79296875, "learning_rate": 1.5060640702067474e-07, "loss": 3.9564, "step": 28151 }, { "epoch": 9.377779628550012, "grad_norm": 0.796875, "learning_rate": 1.5044505162960297e-07, "loss": 3.949, "step": 28152 }, { "epoch": 9.378112767552262, "grad_norm": 0.75390625, "learning_rate": 1.5028378184492735e-07, "loss": 4.0243, "step": 28153 }, { "epoch": 9.37844590655451, "grad_norm": 0.765625, "learning_rate": 1.5012259766852639e-07, "loss": 3.8969, "step": 28154 }, { "epoch": 9.378779045556758, "grad_norm": 0.73046875, "learning_rate": 1.4996149910227768e-07, "loss": 3.9819, "step": 28155 }, { "epoch": 9.379112184559007, "grad_norm": 0.73046875, "learning_rate": 1.4980048614805648e-07, "loss": 4.0324, "step": 28156 }, { "epoch": 9.379445323561256, "grad_norm": 0.765625, "learning_rate": 1.4963955880773956e-07, "loss": 3.9734, "step": 28157 }, { "epoch": 9.379778462563504, "grad_norm": 0.75, "learning_rate": 1.4947871708320126e-07, "loss": 4.0198, "step": 28158 }, { "epoch": 9.380111601565753, "grad_norm": 0.765625, "learning_rate": 1.493179609763143e-07, "loss": 3.997, "step": 28159 }, { "epoch": 9.380444740568002, "grad_norm": 0.78125, "learning_rate": 1.4915729048895126e-07, "loss": 4.0339, "step": 28160 }, { "epoch": 9.380777879570251, "grad_norm": 0.734375, "learning_rate": 1.489967056229849e-07, "loss": 3.9943, "step": 28161 }, { "epoch": 9.381111018572499, "grad_norm": 0.77734375, "learning_rate": 1.4883620638028368e-07, "loss": 3.9292, "step": 28162 }, { "epoch": 9.381444157574748, "grad_norm": 0.76171875, "learning_rate": 1.4867579276271777e-07, "loss": 3.997, "step": 28163 }, { "epoch": 9.381777296576997, "grad_norm": 0.7890625, "learning_rate": 1.4851546477215732e-07, "loss": 3.9829, "step": 28164 }, { "epoch": 9.382110435579245, "grad_norm": 0.7265625, "learning_rate": 1.4835522241046755e-07, "loss": 3.9046, "step": 28165 }, { "epoch": 9.382443574581494, "grad_norm": 0.76953125, "learning_rate": 1.4819506567951612e-07, "loss": 3.9181, "step": 28166 }, { "epoch": 9.382776713583743, "grad_norm": 0.77734375, "learning_rate": 1.4803499458116738e-07, "loss": 4.0011, "step": 28167 }, { "epoch": 9.383109852585992, "grad_norm": 0.7578125, "learning_rate": 1.478750091172873e-07, "loss": 3.9531, "step": 28168 }, { "epoch": 9.38344299158824, "grad_norm": 0.72265625, "learning_rate": 1.4771510928973942e-07, "loss": 3.9552, "step": 28169 }, { "epoch": 9.383776130590489, "grad_norm": 0.8125, "learning_rate": 1.475552951003839e-07, "loss": 4.0191, "step": 28170 }, { "epoch": 9.384109269592738, "grad_norm": 0.765625, "learning_rate": 1.473955665510851e-07, "loss": 4.0587, "step": 28171 }, { "epoch": 9.384442408594985, "grad_norm": 0.76953125, "learning_rate": 1.4723592364370152e-07, "loss": 4.0312, "step": 28172 }, { "epoch": 9.384775547597235, "grad_norm": 0.75, "learning_rate": 1.470763663800942e-07, "loss": 4.0322, "step": 28173 }, { "epoch": 9.385108686599484, "grad_norm": 0.74609375, "learning_rate": 1.4691689476211994e-07, "loss": 4.0043, "step": 28174 }, { "epoch": 9.385441825601733, "grad_norm": 0.7265625, "learning_rate": 1.467575087916373e-07, "loss": 3.9632, "step": 28175 }, { "epoch": 9.38577496460398, "grad_norm": 0.74609375, "learning_rate": 1.4659820847050225e-07, "loss": 3.8872, "step": 28176 }, { "epoch": 9.38610810360623, "grad_norm": 0.8046875, "learning_rate": 1.464389938005717e-07, "loss": 3.9813, "step": 28177 }, { "epoch": 9.386441242608479, "grad_norm": 0.7734375, "learning_rate": 1.4627986478369825e-07, "loss": 4.0367, "step": 28178 }, { "epoch": 9.386774381610728, "grad_norm": 0.75390625, "learning_rate": 1.4612082142173633e-07, "loss": 4.014, "step": 28179 }, { "epoch": 9.387107520612975, "grad_norm": 0.8515625, "learning_rate": 1.4596186371653857e-07, "loss": 3.9233, "step": 28180 }, { "epoch": 9.387440659615224, "grad_norm": 0.77734375, "learning_rate": 1.4580299166995604e-07, "loss": 4.0029, "step": 28181 }, { "epoch": 9.387773798617474, "grad_norm": 0.71875, "learning_rate": 1.4564420528383971e-07, "loss": 4.003, "step": 28182 }, { "epoch": 9.388106937619721, "grad_norm": 0.765625, "learning_rate": 1.4548550456003895e-07, "loss": 3.9292, "step": 28183 }, { "epoch": 9.38844007662197, "grad_norm": 0.734375, "learning_rate": 1.453268895004023e-07, "loss": 4.0226, "step": 28184 }, { "epoch": 9.38877321562422, "grad_norm": 0.71875, "learning_rate": 1.4516836010677658e-07, "loss": 3.9241, "step": 28185 }, { "epoch": 9.389106354626469, "grad_norm": 0.75, "learning_rate": 1.450099163810087e-07, "loss": 4.0133, "step": 28186 }, { "epoch": 9.389439493628716, "grad_norm": 0.74609375, "learning_rate": 1.448515583249463e-07, "loss": 3.922, "step": 28187 }, { "epoch": 9.389772632630965, "grad_norm": 0.765625, "learning_rate": 1.446932859404304e-07, "loss": 4.0265, "step": 28188 }, { "epoch": 9.390105771633214, "grad_norm": 0.78515625, "learning_rate": 1.4453509922930625e-07, "loss": 3.9691, "step": 28189 }, { "epoch": 9.390438910635464, "grad_norm": 0.73828125, "learning_rate": 1.443769981934165e-07, "loss": 3.9842, "step": 28190 }, { "epoch": 9.390772049637711, "grad_norm": 0.78125, "learning_rate": 1.44218982834603e-07, "loss": 3.9772, "step": 28191 }, { "epoch": 9.39110518863996, "grad_norm": 0.734375, "learning_rate": 1.440610531547043e-07, "loss": 4.0728, "step": 28192 }, { "epoch": 9.39143832764221, "grad_norm": 0.76953125, "learning_rate": 1.4390320915556226e-07, "loss": 3.9489, "step": 28193 }, { "epoch": 9.391771466644457, "grad_norm": 0.74609375, "learning_rate": 1.4374545083901542e-07, "loss": 4.0221, "step": 28194 }, { "epoch": 9.392104605646706, "grad_norm": 0.734375, "learning_rate": 1.4358777820689896e-07, "loss": 3.9654, "step": 28195 }, { "epoch": 9.392437744648955, "grad_norm": 0.74609375, "learning_rate": 1.4343019126105223e-07, "loss": 3.9928, "step": 28196 }, { "epoch": 9.392770883651204, "grad_norm": 0.76171875, "learning_rate": 1.4327269000330878e-07, "loss": 3.9561, "step": 28197 }, { "epoch": 9.393104022653452, "grad_norm": 0.734375, "learning_rate": 1.4311527443550383e-07, "loss": 4.0193, "step": 28198 }, { "epoch": 9.3934371616557, "grad_norm": 0.77734375, "learning_rate": 1.429579445594717e-07, "loss": 3.9722, "step": 28199 }, { "epoch": 9.39377030065795, "grad_norm": 0.76953125, "learning_rate": 1.4280070037704261e-07, "loss": 3.9485, "step": 28200 }, { "epoch": 9.394103439660197, "grad_norm": 0.78125, "learning_rate": 1.4264354189005096e-07, "loss": 4.0042, "step": 28201 }, { "epoch": 9.394436578662447, "grad_norm": 0.80078125, "learning_rate": 1.4248646910032608e-07, "loss": 3.9808, "step": 28202 }, { "epoch": 9.394769717664696, "grad_norm": 0.703125, "learning_rate": 1.4232948200969652e-07, "loss": 3.989, "step": 28203 }, { "epoch": 9.395102856666945, "grad_norm": 0.7421875, "learning_rate": 1.4217258061999249e-07, "loss": 3.9581, "step": 28204 }, { "epoch": 9.395435995669192, "grad_norm": 0.76953125, "learning_rate": 1.4201576493304087e-07, "loss": 3.9418, "step": 28205 }, { "epoch": 9.395769134671442, "grad_norm": 0.74609375, "learning_rate": 1.4185903495066848e-07, "loss": 3.9901, "step": 28206 }, { "epoch": 9.39610227367369, "grad_norm": 0.765625, "learning_rate": 1.4170239067469975e-07, "loss": 3.9712, "step": 28207 }, { "epoch": 9.396435412675938, "grad_norm": 0.76171875, "learning_rate": 1.4154583210696072e-07, "loss": 4.0173, "step": 28208 }, { "epoch": 9.396768551678187, "grad_norm": 0.75, "learning_rate": 1.4138935924927486e-07, "loss": 3.967, "step": 28209 }, { "epoch": 9.397101690680437, "grad_norm": 0.796875, "learning_rate": 1.412329721034633e-07, "loss": 3.974, "step": 28210 }, { "epoch": 9.397434829682686, "grad_norm": 0.78515625, "learning_rate": 1.410766706713487e-07, "loss": 3.9807, "step": 28211 }, { "epoch": 9.397767968684933, "grad_norm": 0.79296875, "learning_rate": 1.4092045495475132e-07, "loss": 3.966, "step": 28212 }, { "epoch": 9.398101107687182, "grad_norm": 0.75390625, "learning_rate": 1.407643249554913e-07, "loss": 3.9771, "step": 28213 }, { "epoch": 9.398434246689432, "grad_norm": 0.70703125, "learning_rate": 1.4060828067538722e-07, "loss": 3.9448, "step": 28214 }, { "epoch": 9.39876738569168, "grad_norm": 0.734375, "learning_rate": 1.4045232211625598e-07, "loss": 4.04, "step": 28215 }, { "epoch": 9.399100524693928, "grad_norm": 0.7890625, "learning_rate": 1.4029644927991441e-07, "loss": 4.0193, "step": 28216 }, { "epoch": 9.399433663696177, "grad_norm": 0.8046875, "learning_rate": 1.4014066216817779e-07, "loss": 3.9176, "step": 28217 }, { "epoch": 9.399766802698426, "grad_norm": 0.76953125, "learning_rate": 1.3998496078286126e-07, "loss": 3.9737, "step": 28218 }, { "epoch": 9.400099941700674, "grad_norm": 0.75390625, "learning_rate": 1.398293451257776e-07, "loss": 4.0079, "step": 28219 }, { "epoch": 9.400433080702923, "grad_norm": 0.73046875, "learning_rate": 1.3967381519874117e-07, "loss": 4.0323, "step": 28220 }, { "epoch": 9.400766219705172, "grad_norm": 0.75390625, "learning_rate": 1.3951837100356134e-07, "loss": 3.9646, "step": 28221 }, { "epoch": 9.401099358707421, "grad_norm": 0.7578125, "learning_rate": 1.3936301254205002e-07, "loss": 4.0157, "step": 28222 }, { "epoch": 9.401432497709669, "grad_norm": 0.7578125, "learning_rate": 1.3920773981601742e-07, "loss": 3.9436, "step": 28223 }, { "epoch": 9.401765636711918, "grad_norm": 0.7421875, "learning_rate": 1.390525528272704e-07, "loss": 3.9709, "step": 28224 }, { "epoch": 9.402098775714167, "grad_norm": 0.84375, "learning_rate": 1.3889745157761675e-07, "loss": 3.9573, "step": 28225 }, { "epoch": 9.402431914716415, "grad_norm": 0.78125, "learning_rate": 1.387424360688641e-07, "loss": 3.9729, "step": 28226 }, { "epoch": 9.402765053718664, "grad_norm": 0.77734375, "learning_rate": 1.3858750630281775e-07, "loss": 3.9626, "step": 28227 }, { "epoch": 9.403098192720913, "grad_norm": 0.796875, "learning_rate": 1.3843266228128204e-07, "loss": 3.9517, "step": 28228 }, { "epoch": 9.403431331723162, "grad_norm": 0.78125, "learning_rate": 1.3827790400606056e-07, "loss": 3.9407, "step": 28229 }, { "epoch": 9.40376447072541, "grad_norm": 0.7734375, "learning_rate": 1.3812323147895601e-07, "loss": 4.0504, "step": 28230 }, { "epoch": 9.404097609727659, "grad_norm": 0.73828125, "learning_rate": 1.3796864470177028e-07, "loss": 4.0401, "step": 28231 }, { "epoch": 9.404430748729908, "grad_norm": 0.8515625, "learning_rate": 1.3781414367630362e-07, "loss": 3.9439, "step": 28232 }, { "epoch": 9.404763887732157, "grad_norm": 0.75, "learning_rate": 1.3765972840435627e-07, "loss": 3.9368, "step": 28233 }, { "epoch": 9.405097026734405, "grad_norm": 0.765625, "learning_rate": 1.375053988877259e-07, "loss": 3.9906, "step": 28234 }, { "epoch": 9.405430165736654, "grad_norm": 0.78125, "learning_rate": 1.3735115512821027e-07, "loss": 4.0181, "step": 28235 }, { "epoch": 9.405763304738903, "grad_norm": 0.75, "learning_rate": 1.3719699712760548e-07, "loss": 3.977, "step": 28236 }, { "epoch": 9.40609644374115, "grad_norm": 0.8046875, "learning_rate": 1.370429248877092e-07, "loss": 3.9507, "step": 28237 }, { "epoch": 9.4064295827434, "grad_norm": 0.74609375, "learning_rate": 1.3688893841031418e-07, "loss": 3.9629, "step": 28238 }, { "epoch": 9.406762721745649, "grad_norm": 0.79296875, "learning_rate": 1.3673503769721486e-07, "loss": 3.9571, "step": 28239 }, { "epoch": 9.407095860747898, "grad_norm": 0.74609375, "learning_rate": 1.3658122275020224e-07, "loss": 3.996, "step": 28240 }, { "epoch": 9.407428999750145, "grad_norm": 0.7578125, "learning_rate": 1.364274935710716e-07, "loss": 3.969, "step": 28241 }, { "epoch": 9.407762138752394, "grad_norm": 0.76953125, "learning_rate": 1.3627385016160981e-07, "loss": 4.0289, "step": 28242 }, { "epoch": 9.408095277754644, "grad_norm": 0.7109375, "learning_rate": 1.3612029252360798e-07, "loss": 4.0005, "step": 28243 }, { "epoch": 9.408428416756891, "grad_norm": 0.78125, "learning_rate": 1.359668206588538e-07, "loss": 4.064, "step": 28244 }, { "epoch": 9.40876155575914, "grad_norm": 0.78515625, "learning_rate": 1.358134345691367e-07, "loss": 4.007, "step": 28245 }, { "epoch": 9.40909469476139, "grad_norm": 0.7734375, "learning_rate": 1.3566013425624191e-07, "loss": 3.9456, "step": 28246 }, { "epoch": 9.409427833763639, "grad_norm": 0.74609375, "learning_rate": 1.355069197219555e-07, "loss": 3.9849, "step": 28247 }, { "epoch": 9.409760972765886, "grad_norm": 0.73046875, "learning_rate": 1.3535379096806187e-07, "loss": 3.9357, "step": 28248 }, { "epoch": 9.410094111768135, "grad_norm": 0.75, "learning_rate": 1.3520074799634464e-07, "loss": 3.9758, "step": 28249 }, { "epoch": 9.410427250770384, "grad_norm": 0.77734375, "learning_rate": 1.3504779080858732e-07, "loss": 3.9589, "step": 28250 }, { "epoch": 9.410760389772634, "grad_norm": 0.74609375, "learning_rate": 1.3489491940657017e-07, "loss": 3.9241, "step": 28251 }, { "epoch": 9.411093528774881, "grad_norm": 0.76953125, "learning_rate": 1.3474213379207428e-07, "loss": 3.9781, "step": 28252 }, { "epoch": 9.41142666777713, "grad_norm": 0.703125, "learning_rate": 1.345894339668799e-07, "loss": 4.0356, "step": 28253 }, { "epoch": 9.41175980677938, "grad_norm": 0.7578125, "learning_rate": 1.3443681993276558e-07, "loss": 3.9924, "step": 28254 }, { "epoch": 9.412092945781627, "grad_norm": 0.7890625, "learning_rate": 1.342842916915074e-07, "loss": 3.9078, "step": 28255 }, { "epoch": 9.412426084783876, "grad_norm": 0.77734375, "learning_rate": 1.3413184924488314e-07, "loss": 3.9299, "step": 28256 }, { "epoch": 9.412759223786125, "grad_norm": 0.75390625, "learning_rate": 1.3397949259466885e-07, "loss": 3.9903, "step": 28257 }, { "epoch": 9.413092362788374, "grad_norm": 0.78125, "learning_rate": 1.3382722174263895e-07, "loss": 4.0283, "step": 28258 }, { "epoch": 9.413425501790622, "grad_norm": 0.74609375, "learning_rate": 1.3367503669056702e-07, "loss": 3.9429, "step": 28259 }, { "epoch": 9.41375864079287, "grad_norm": 0.76171875, "learning_rate": 1.3352293744022497e-07, "loss": 3.9411, "step": 28260 }, { "epoch": 9.41409177979512, "grad_norm": 0.80078125, "learning_rate": 1.3337092399338475e-07, "loss": 4.0132, "step": 28261 }, { "epoch": 9.414424918797367, "grad_norm": 0.75, "learning_rate": 1.332189963518174e-07, "loss": 3.9856, "step": 28262 }, { "epoch": 9.414758057799617, "grad_norm": 0.78125, "learning_rate": 1.3306715451729235e-07, "loss": 4.0316, "step": 28263 }, { "epoch": 9.415091196801866, "grad_norm": 0.734375, "learning_rate": 1.3291539849157818e-07, "loss": 3.9302, "step": 28264 }, { "epoch": 9.415424335804115, "grad_norm": 0.75390625, "learning_rate": 1.3276372827644268e-07, "loss": 3.9693, "step": 28265 }, { "epoch": 9.415757474806362, "grad_norm": 0.859375, "learning_rate": 1.326121438736519e-07, "loss": 3.9608, "step": 28266 }, { "epoch": 9.416090613808612, "grad_norm": 0.76953125, "learning_rate": 1.3246064528497192e-07, "loss": 3.952, "step": 28267 }, { "epoch": 9.41642375281086, "grad_norm": 0.71875, "learning_rate": 1.3230923251216805e-07, "loss": 3.9472, "step": 28268 }, { "epoch": 9.416756891813108, "grad_norm": 0.76171875, "learning_rate": 1.3215790555700297e-07, "loss": 3.9651, "step": 28269 }, { "epoch": 9.417090030815357, "grad_norm": 0.8125, "learning_rate": 1.320066644212395e-07, "loss": 4.107, "step": 28270 }, { "epoch": 9.417423169817607, "grad_norm": 0.70703125, "learning_rate": 1.3185550910663952e-07, "loss": 4.0142, "step": 28271 }, { "epoch": 9.417756308819856, "grad_norm": 0.7890625, "learning_rate": 1.3170443961496331e-07, "loss": 4.0303, "step": 28272 }, { "epoch": 9.418089447822103, "grad_norm": 0.7578125, "learning_rate": 1.3155345594796947e-07, "loss": 3.9016, "step": 28273 }, { "epoch": 9.418422586824352, "grad_norm": 0.78125, "learning_rate": 1.314025581074191e-07, "loss": 3.9925, "step": 28274 }, { "epoch": 9.418755725826601, "grad_norm": 0.81640625, "learning_rate": 1.3125174609506907e-07, "loss": 3.9698, "step": 28275 }, { "epoch": 9.41908886482885, "grad_norm": 0.73046875, "learning_rate": 1.311010199126747e-07, "loss": 4.0312, "step": 28276 }, { "epoch": 9.419422003831098, "grad_norm": 0.7578125, "learning_rate": 1.309503795619929e-07, "loss": 3.9255, "step": 28277 }, { "epoch": 9.419755142833347, "grad_norm": 0.75, "learning_rate": 1.3079982504477727e-07, "loss": 3.993, "step": 28278 }, { "epoch": 9.420088281835596, "grad_norm": 0.7265625, "learning_rate": 1.3064935636278224e-07, "loss": 3.9875, "step": 28279 }, { "epoch": 9.420421420837844, "grad_norm": 0.7890625, "learning_rate": 1.3049897351775973e-07, "loss": 3.9629, "step": 28280 }, { "epoch": 9.420754559840093, "grad_norm": 0.7734375, "learning_rate": 1.3034867651146254e-07, "loss": 3.9738, "step": 28281 }, { "epoch": 9.421087698842342, "grad_norm": 0.7421875, "learning_rate": 1.301984653456409e-07, "loss": 4.0628, "step": 28282 }, { "epoch": 9.421420837844591, "grad_norm": 0.78125, "learning_rate": 1.3004834002204423e-07, "loss": 3.8927, "step": 28283 }, { "epoch": 9.421753976846839, "grad_norm": 0.78125, "learning_rate": 1.2989830054242036e-07, "loss": 4.045, "step": 28284 }, { "epoch": 9.422087115849088, "grad_norm": 0.75390625, "learning_rate": 1.2974834690851784e-07, "loss": 4.0113, "step": 28285 }, { "epoch": 9.422420254851337, "grad_norm": 0.765625, "learning_rate": 1.295984791220836e-07, "loss": 3.9741, "step": 28286 }, { "epoch": 9.422753393853585, "grad_norm": 0.71484375, "learning_rate": 1.2944869718486295e-07, "loss": 4.0228, "step": 28287 }, { "epoch": 9.423086532855834, "grad_norm": 0.75390625, "learning_rate": 1.2929900109859948e-07, "loss": 4.0155, "step": 28288 }, { "epoch": 9.423419671858083, "grad_norm": 0.75, "learning_rate": 1.2914939086503846e-07, "loss": 3.9609, "step": 28289 }, { "epoch": 9.423752810860332, "grad_norm": 0.76171875, "learning_rate": 1.2899986648592098e-07, "loss": 4.0123, "step": 28290 }, { "epoch": 9.42408594986258, "grad_norm": 0.76953125, "learning_rate": 1.2885042796299068e-07, "loss": 3.9473, "step": 28291 }, { "epoch": 9.424419088864829, "grad_norm": 0.75, "learning_rate": 1.2870107529798613e-07, "loss": 3.9514, "step": 28292 }, { "epoch": 9.424752227867078, "grad_norm": 0.82421875, "learning_rate": 1.2855180849264764e-07, "loss": 3.92, "step": 28293 }, { "epoch": 9.425085366869327, "grad_norm": 0.74609375, "learning_rate": 1.2840262754871463e-07, "loss": 3.9772, "step": 28294 }, { "epoch": 9.425418505871574, "grad_norm": 0.734375, "learning_rate": 1.2825353246792403e-07, "loss": 4.01, "step": 28295 }, { "epoch": 9.425751644873824, "grad_norm": 0.75390625, "learning_rate": 1.2810452325201283e-07, "loss": 3.9528, "step": 28296 }, { "epoch": 9.426084783876073, "grad_norm": 0.80078125, "learning_rate": 1.2795559990271627e-07, "loss": 3.9647, "step": 28297 }, { "epoch": 9.42641792287832, "grad_norm": 0.7734375, "learning_rate": 1.2780676242176964e-07, "loss": 4.0414, "step": 28298 }, { "epoch": 9.42675106188057, "grad_norm": 0.70703125, "learning_rate": 1.276580108109049e-07, "loss": 3.9541, "step": 28299 }, { "epoch": 9.427084200882819, "grad_norm": 0.75390625, "learning_rate": 1.2750934507185647e-07, "loss": 3.9582, "step": 28300 }, { "epoch": 9.427417339885068, "grad_norm": 0.78125, "learning_rate": 1.2736076520635548e-07, "loss": 3.9396, "step": 28301 }, { "epoch": 9.427750478887315, "grad_norm": 0.80859375, "learning_rate": 1.2721227121613223e-07, "loss": 3.9708, "step": 28302 }, { "epoch": 9.428083617889564, "grad_norm": 0.7890625, "learning_rate": 1.2706386310291695e-07, "loss": 4.0167, "step": 28303 }, { "epoch": 9.428416756891814, "grad_norm": 0.74609375, "learning_rate": 1.269155408684383e-07, "loss": 3.9407, "step": 28304 }, { "epoch": 9.428749895894061, "grad_norm": 0.74609375, "learning_rate": 1.267673045144224e-07, "loss": 3.9466, "step": 28305 }, { "epoch": 9.42908303489631, "grad_norm": 0.79296875, "learning_rate": 1.2661915404259788e-07, "loss": 3.9865, "step": 28306 }, { "epoch": 9.42941617389856, "grad_norm": 0.73046875, "learning_rate": 1.2647108945468917e-07, "loss": 3.9105, "step": 28307 }, { "epoch": 9.429749312900809, "grad_norm": 0.73828125, "learning_rate": 1.263231107524207e-07, "loss": 3.911, "step": 28308 }, { "epoch": 9.430082451903056, "grad_norm": 0.734375, "learning_rate": 1.261752179375178e-07, "loss": 3.9269, "step": 28309 }, { "epoch": 9.430415590905305, "grad_norm": 0.75, "learning_rate": 1.2602741101170073e-07, "loss": 4.0511, "step": 28310 }, { "epoch": 9.430748729907554, "grad_norm": 0.74609375, "learning_rate": 1.2587968997669315e-07, "loss": 3.9187, "step": 28311 }, { "epoch": 9.431081868909803, "grad_norm": 0.71875, "learning_rate": 1.2573205483421447e-07, "loss": 3.9897, "step": 28312 }, { "epoch": 9.431415007912051, "grad_norm": 0.71484375, "learning_rate": 1.25584505585985e-07, "loss": 3.987, "step": 28313 }, { "epoch": 9.4317481469143, "grad_norm": 0.7890625, "learning_rate": 1.2543704223372255e-07, "loss": 3.9561, "step": 28314 }, { "epoch": 9.43208128591655, "grad_norm": 0.77734375, "learning_rate": 1.2528966477914654e-07, "loss": 4.0168, "step": 28315 }, { "epoch": 9.432414424918797, "grad_norm": 0.76171875, "learning_rate": 1.2514237322397147e-07, "loss": 4.0207, "step": 28316 }, { "epoch": 9.432747563921046, "grad_norm": 0.77734375, "learning_rate": 1.2499516756991263e-07, "loss": 3.9189, "step": 28317 }, { "epoch": 9.433080702923295, "grad_norm": 0.80859375, "learning_rate": 1.248480478186878e-07, "loss": 4.0192, "step": 28318 }, { "epoch": 9.433413841925544, "grad_norm": 0.7578125, "learning_rate": 1.2470101397200813e-07, "loss": 3.9438, "step": 28319 }, { "epoch": 9.433746980927792, "grad_norm": 0.78515625, "learning_rate": 1.2455406603158635e-07, "loss": 4.0675, "step": 28320 }, { "epoch": 9.43408011993004, "grad_norm": 0.7265625, "learning_rate": 1.2440720399913453e-07, "loss": 4.0463, "step": 28321 }, { "epoch": 9.43441325893229, "grad_norm": 0.76171875, "learning_rate": 1.242604278763637e-07, "loss": 3.9703, "step": 28322 }, { "epoch": 9.434746397934537, "grad_norm": 0.75, "learning_rate": 1.241137376649834e-07, "loss": 3.96, "step": 28323 }, { "epoch": 9.435079536936787, "grad_norm": 0.76171875, "learning_rate": 1.2396713336670057e-07, "loss": 3.9975, "step": 28324 }, { "epoch": 9.435412675939036, "grad_norm": 0.7421875, "learning_rate": 1.2382061498322466e-07, "loss": 3.9914, "step": 28325 }, { "epoch": 9.435745814941285, "grad_norm": 0.75390625, "learning_rate": 1.2367418251626266e-07, "loss": 3.9822, "step": 28326 }, { "epoch": 9.436078953943532, "grad_norm": 0.7578125, "learning_rate": 1.2352783596751905e-07, "loss": 3.9617, "step": 28327 }, { "epoch": 9.436412092945782, "grad_norm": 0.78515625, "learning_rate": 1.2338157533869743e-07, "loss": 3.9307, "step": 28328 }, { "epoch": 9.43674523194803, "grad_norm": 0.83203125, "learning_rate": 1.2323540063150396e-07, "loss": 3.8909, "step": 28329 }, { "epoch": 9.437078370950278, "grad_norm": 0.7578125, "learning_rate": 1.2308931184763978e-07, "loss": 4.0379, "step": 28330 }, { "epoch": 9.437411509952527, "grad_norm": 0.73828125, "learning_rate": 1.2294330898880768e-07, "loss": 4.0331, "step": 28331 }, { "epoch": 9.437744648954776, "grad_norm": 0.79296875, "learning_rate": 1.2279739205670632e-07, "loss": 3.9474, "step": 28332 }, { "epoch": 9.438077787957026, "grad_norm": 0.74609375, "learning_rate": 1.22651561053036e-07, "loss": 3.9224, "step": 28333 }, { "epoch": 9.438410926959273, "grad_norm": 0.74609375, "learning_rate": 1.22505815979497e-07, "loss": 3.9926, "step": 28334 }, { "epoch": 9.438744065961522, "grad_norm": 0.72265625, "learning_rate": 1.2236015683778467e-07, "loss": 3.9588, "step": 28335 }, { "epoch": 9.439077204963771, "grad_norm": 0.75, "learning_rate": 1.2221458362959597e-07, "loss": 4.0228, "step": 28336 }, { "epoch": 9.43941034396602, "grad_norm": 0.7578125, "learning_rate": 1.2206909635662788e-07, "loss": 4.0305, "step": 28337 }, { "epoch": 9.439743482968268, "grad_norm": 0.78515625, "learning_rate": 1.2192369502057489e-07, "loss": 3.9556, "step": 28338 }, { "epoch": 9.440076621970517, "grad_norm": 0.76953125, "learning_rate": 1.2177837962312894e-07, "loss": 3.9977, "step": 28339 }, { "epoch": 9.440409760972766, "grad_norm": 0.7578125, "learning_rate": 1.2163315016598453e-07, "loss": 4.004, "step": 28340 }, { "epoch": 9.440742899975014, "grad_norm": 0.765625, "learning_rate": 1.2148800665083198e-07, "loss": 3.9906, "step": 28341 }, { "epoch": 9.441076038977263, "grad_norm": 0.7265625, "learning_rate": 1.2134294907936162e-07, "loss": 3.9861, "step": 28342 }, { "epoch": 9.441409177979512, "grad_norm": 0.75, "learning_rate": 1.2119797745326456e-07, "loss": 3.9593, "step": 28343 }, { "epoch": 9.441742316981761, "grad_norm": 0.74609375, "learning_rate": 1.210530917742278e-07, "loss": 4.0673, "step": 28344 }, { "epoch": 9.442075455984009, "grad_norm": 0.75390625, "learning_rate": 1.2090829204394083e-07, "loss": 4.0064, "step": 28345 }, { "epoch": 9.442408594986258, "grad_norm": 0.73828125, "learning_rate": 1.2076357826408895e-07, "loss": 3.9602, "step": 28346 }, { "epoch": 9.442741733988507, "grad_norm": 0.734375, "learning_rate": 1.206189504363575e-07, "loss": 3.9534, "step": 28347 }, { "epoch": 9.443074872990755, "grad_norm": 0.81640625, "learning_rate": 1.204744085624318e-07, "loss": 3.9862, "step": 28348 }, { "epoch": 9.443408011993004, "grad_norm": 0.77734375, "learning_rate": 1.203299526439955e-07, "loss": 3.9116, "step": 28349 }, { "epoch": 9.443741150995253, "grad_norm": 0.8125, "learning_rate": 1.2018558268273056e-07, "loss": 3.9765, "step": 28350 }, { "epoch": 9.444074289997502, "grad_norm": 0.74609375, "learning_rate": 1.20041298680319e-07, "loss": 3.9352, "step": 28351 }, { "epoch": 9.44440742899975, "grad_norm": 0.72265625, "learning_rate": 1.198971006384411e-07, "loss": 3.8995, "step": 28352 }, { "epoch": 9.444740568001999, "grad_norm": 0.76953125, "learning_rate": 1.1975298855877724e-07, "loss": 3.9487, "step": 28353 }, { "epoch": 9.445073707004248, "grad_norm": 0.765625, "learning_rate": 1.196089624430044e-07, "loss": 4.0526, "step": 28354 }, { "epoch": 9.445406846006497, "grad_norm": 0.828125, "learning_rate": 1.1946502229280205e-07, "loss": 3.9332, "step": 28355 }, { "epoch": 9.445739985008744, "grad_norm": 0.796875, "learning_rate": 1.1932116810984634e-07, "loss": 3.992, "step": 28356 }, { "epoch": 9.446073124010994, "grad_norm": 0.7890625, "learning_rate": 1.1917739989581261e-07, "loss": 3.9332, "step": 28357 }, { "epoch": 9.446406263013243, "grad_norm": 0.75, "learning_rate": 1.1903371765237453e-07, "loss": 3.9409, "step": 28358 }, { "epoch": 9.44673940201549, "grad_norm": 0.75, "learning_rate": 1.1889012138120659e-07, "loss": 3.8998, "step": 28359 }, { "epoch": 9.44707254101774, "grad_norm": 0.7578125, "learning_rate": 1.187466110839816e-07, "loss": 3.9811, "step": 28360 }, { "epoch": 9.447405680019989, "grad_norm": 0.73828125, "learning_rate": 1.1860318676237075e-07, "loss": 4.0517, "step": 28361 }, { "epoch": 9.447738819022238, "grad_norm": 0.71875, "learning_rate": 1.1845984841804436e-07, "loss": 3.9455, "step": 28362 }, { "epoch": 9.448071958024485, "grad_norm": 0.80859375, "learning_rate": 1.1831659605267276e-07, "loss": 3.9986, "step": 28363 }, { "epoch": 9.448405097026734, "grad_norm": 0.7890625, "learning_rate": 1.181734296679246e-07, "loss": 4.0348, "step": 28364 }, { "epoch": 9.448738236028984, "grad_norm": 0.7421875, "learning_rate": 1.1803034926546525e-07, "loss": 3.9888, "step": 28365 }, { "epoch": 9.449071375031231, "grad_norm": 0.80078125, "learning_rate": 1.17887354846965e-07, "loss": 3.9692, "step": 28366 }, { "epoch": 9.44940451403348, "grad_norm": 0.76953125, "learning_rate": 1.1774444641408671e-07, "loss": 3.9968, "step": 28367 }, { "epoch": 9.44973765303573, "grad_norm": 0.75390625, "learning_rate": 1.1760162396849488e-07, "loss": 3.9388, "step": 28368 }, { "epoch": 9.450070792037979, "grad_norm": 0.75, "learning_rate": 1.1745888751185486e-07, "loss": 3.9908, "step": 28369 }, { "epoch": 9.450403931040226, "grad_norm": 0.7890625, "learning_rate": 1.1731623704582778e-07, "loss": 3.9365, "step": 28370 }, { "epoch": 9.450737070042475, "grad_norm": 0.765625, "learning_rate": 1.1717367257207651e-07, "loss": 4.0329, "step": 28371 }, { "epoch": 9.451070209044724, "grad_norm": 0.78125, "learning_rate": 1.170311940922597e-07, "loss": 4.004, "step": 28372 }, { "epoch": 9.451403348046973, "grad_norm": 0.77734375, "learning_rate": 1.1688880160803855e-07, "loss": 4.0242, "step": 28373 }, { "epoch": 9.45173648704922, "grad_norm": 0.75, "learning_rate": 1.1674649512107088e-07, "loss": 3.9935, "step": 28374 }, { "epoch": 9.45206962605147, "grad_norm": 0.75, "learning_rate": 1.1660427463301454e-07, "loss": 3.9504, "step": 28375 }, { "epoch": 9.45240276505372, "grad_norm": 0.796875, "learning_rate": 1.1646214014552569e-07, "loss": 4.0174, "step": 28376 }, { "epoch": 9.452735904055967, "grad_norm": 0.76953125, "learning_rate": 1.1632009166026053e-07, "loss": 3.9999, "step": 28377 }, { "epoch": 9.453069043058216, "grad_norm": 0.7734375, "learning_rate": 1.1617812917887355e-07, "loss": 3.9831, "step": 28378 }, { "epoch": 9.453402182060465, "grad_norm": 0.80078125, "learning_rate": 1.160362527030176e-07, "loss": 4.0258, "step": 28379 }, { "epoch": 9.453735321062714, "grad_norm": 0.8046875, "learning_rate": 1.1589446223434552e-07, "loss": 4.0518, "step": 28380 }, { "epoch": 9.454068460064962, "grad_norm": 0.75390625, "learning_rate": 1.1575275777450933e-07, "loss": 4.0824, "step": 28381 }, { "epoch": 9.45440159906721, "grad_norm": 0.76171875, "learning_rate": 1.1561113932515855e-07, "loss": 4.0222, "step": 28382 }, { "epoch": 9.45473473806946, "grad_norm": 0.80078125, "learning_rate": 1.1546960688794434e-07, "loss": 3.9711, "step": 28383 }, { "epoch": 9.455067877071707, "grad_norm": 0.75390625, "learning_rate": 1.1532816046451456e-07, "loss": 4.0001, "step": 28384 }, { "epoch": 9.455401016073957, "grad_norm": 0.75, "learning_rate": 1.1518680005651622e-07, "loss": 3.9434, "step": 28385 }, { "epoch": 9.455734155076206, "grad_norm": 0.76953125, "learning_rate": 1.150455256655955e-07, "loss": 3.9953, "step": 28386 }, { "epoch": 9.456067294078455, "grad_norm": 0.74609375, "learning_rate": 1.1490433729339861e-07, "loss": 4.0653, "step": 28387 }, { "epoch": 9.456400433080702, "grad_norm": 0.72265625, "learning_rate": 1.1476323494157087e-07, "loss": 3.9604, "step": 28388 }, { "epoch": 9.456733572082952, "grad_norm": 0.8125, "learning_rate": 1.1462221861175515e-07, "loss": 3.9988, "step": 28389 }, { "epoch": 9.4570667110852, "grad_norm": 0.7734375, "learning_rate": 1.1448128830559345e-07, "loss": 3.9987, "step": 28390 }, { "epoch": 9.457399850087448, "grad_norm": 0.74609375, "learning_rate": 1.1434044402472698e-07, "loss": 3.958, "step": 28391 }, { "epoch": 9.457732989089697, "grad_norm": 0.78515625, "learning_rate": 1.1419968577079776e-07, "loss": 3.9776, "step": 28392 }, { "epoch": 9.458066128091946, "grad_norm": 0.765625, "learning_rate": 1.1405901354544529e-07, "loss": 3.9315, "step": 28393 }, { "epoch": 9.458399267094196, "grad_norm": 0.734375, "learning_rate": 1.1391842735030661e-07, "loss": 3.923, "step": 28394 }, { "epoch": 9.458732406096443, "grad_norm": 0.71875, "learning_rate": 1.137779271870204e-07, "loss": 4.03, "step": 28395 }, { "epoch": 9.459065545098692, "grad_norm": 0.8125, "learning_rate": 1.1363751305722203e-07, "loss": 3.9631, "step": 28396 }, { "epoch": 9.459398684100941, "grad_norm": 0.76171875, "learning_rate": 1.1349718496254851e-07, "loss": 4.0502, "step": 28397 }, { "epoch": 9.45973182310319, "grad_norm": 0.734375, "learning_rate": 1.1335694290463356e-07, "loss": 3.972, "step": 28398 }, { "epoch": 9.460064962105438, "grad_norm": 0.703125, "learning_rate": 1.1321678688511083e-07, "loss": 3.9799, "step": 28399 }, { "epoch": 9.460398101107687, "grad_norm": 0.78125, "learning_rate": 1.1307671690561239e-07, "loss": 3.9647, "step": 28400 }, { "epoch": 9.460731240109936, "grad_norm": 0.78125, "learning_rate": 1.1293673296777024e-07, "loss": 3.9929, "step": 28401 }, { "epoch": 9.461064379112184, "grad_norm": 0.75390625, "learning_rate": 1.127968350732156e-07, "loss": 3.9548, "step": 28402 }, { "epoch": 9.461397518114433, "grad_norm": 0.78125, "learning_rate": 1.126570232235763e-07, "loss": 3.9872, "step": 28403 }, { "epoch": 9.461730657116682, "grad_norm": 0.8046875, "learning_rate": 1.1251729742048189e-07, "loss": 3.947, "step": 28404 }, { "epoch": 9.462063796118931, "grad_norm": 0.73046875, "learning_rate": 1.1237765766555941e-07, "loss": 4.0323, "step": 28405 }, { "epoch": 9.462396935121179, "grad_norm": 0.7734375, "learning_rate": 1.1223810396043588e-07, "loss": 4.0677, "step": 28406 }, { "epoch": 9.462730074123428, "grad_norm": 0.72265625, "learning_rate": 1.120986363067375e-07, "loss": 4.0139, "step": 28407 }, { "epoch": 9.463063213125677, "grad_norm": 0.72265625, "learning_rate": 1.1195925470608714e-07, "loss": 3.9866, "step": 28408 }, { "epoch": 9.463396352127926, "grad_norm": 0.78125, "learning_rate": 1.1181995916010851e-07, "loss": 3.9895, "step": 28409 }, { "epoch": 9.463729491130174, "grad_norm": 0.7578125, "learning_rate": 1.1168074967042447e-07, "loss": 4.0595, "step": 28410 }, { "epoch": 9.464062630132423, "grad_norm": 0.7578125, "learning_rate": 1.1154162623865787e-07, "loss": 3.9914, "step": 28411 }, { "epoch": 9.464395769134672, "grad_norm": 0.76171875, "learning_rate": 1.1140258886642662e-07, "loss": 3.9527, "step": 28412 }, { "epoch": 9.46472890813692, "grad_norm": 0.76171875, "learning_rate": 1.1126363755535273e-07, "loss": 3.9886, "step": 28413 }, { "epoch": 9.465062047139169, "grad_norm": 0.74609375, "learning_rate": 1.1112477230705243e-07, "loss": 4.0607, "step": 28414 }, { "epoch": 9.465395186141418, "grad_norm": 0.71484375, "learning_rate": 1.1098599312314522e-07, "loss": 4.0093, "step": 28415 }, { "epoch": 9.465728325143667, "grad_norm": 0.80859375, "learning_rate": 1.1084730000524568e-07, "loss": 3.9632, "step": 28416 }, { "epoch": 9.466061464145914, "grad_norm": 0.734375, "learning_rate": 1.1070869295497083e-07, "loss": 4.0042, "step": 28417 }, { "epoch": 9.466394603148164, "grad_norm": 0.734375, "learning_rate": 1.1057017197393438e-07, "loss": 4.0598, "step": 28418 }, { "epoch": 9.466727742150413, "grad_norm": 0.77734375, "learning_rate": 1.1043173706375003e-07, "loss": 4.0046, "step": 28419 }, { "epoch": 9.46706088115266, "grad_norm": 0.7890625, "learning_rate": 1.1029338822602985e-07, "loss": 3.987, "step": 28420 }, { "epoch": 9.46739402015491, "grad_norm": 0.76171875, "learning_rate": 1.1015512546238588e-07, "loss": 4.098, "step": 28421 }, { "epoch": 9.467727159157159, "grad_norm": 0.80078125, "learning_rate": 1.1001694877442847e-07, "loss": 3.8734, "step": 28422 }, { "epoch": 9.468060298159408, "grad_norm": 0.75, "learning_rate": 1.0987885816376719e-07, "loss": 3.9833, "step": 28423 }, { "epoch": 9.468393437161655, "grad_norm": 0.75390625, "learning_rate": 1.0974085363200908e-07, "loss": 3.9895, "step": 28424 }, { "epoch": 9.468726576163904, "grad_norm": 0.80859375, "learning_rate": 1.096029351807637e-07, "loss": 4.0532, "step": 28425 }, { "epoch": 9.469059715166154, "grad_norm": 0.76171875, "learning_rate": 1.0946510281163641e-07, "loss": 4.0057, "step": 28426 }, { "epoch": 9.469392854168401, "grad_norm": 0.80078125, "learning_rate": 1.093273565262326e-07, "loss": 4.0151, "step": 28427 }, { "epoch": 9.46972599317065, "grad_norm": 0.78125, "learning_rate": 1.09189696326156e-07, "loss": 4.0006, "step": 28428 }, { "epoch": 9.4700591321729, "grad_norm": 0.80859375, "learning_rate": 1.0905212221301281e-07, "loss": 4.023, "step": 28429 }, { "epoch": 9.470392271175148, "grad_norm": 0.734375, "learning_rate": 1.0891463418840259e-07, "loss": 3.9044, "step": 28430 }, { "epoch": 9.470725410177396, "grad_norm": 0.8046875, "learning_rate": 1.0877723225392821e-07, "loss": 3.9895, "step": 28431 }, { "epoch": 9.471058549179645, "grad_norm": 0.734375, "learning_rate": 1.0863991641118926e-07, "loss": 3.9992, "step": 28432 }, { "epoch": 9.471391688181894, "grad_norm": 0.73046875, "learning_rate": 1.0850268666178609e-07, "loss": 3.9792, "step": 28433 }, { "epoch": 9.471724827184143, "grad_norm": 0.7734375, "learning_rate": 1.0836554300731661e-07, "loss": 3.9908, "step": 28434 }, { "epoch": 9.47205796618639, "grad_norm": 0.73828125, "learning_rate": 1.0822848544937785e-07, "loss": 3.9767, "step": 28435 }, { "epoch": 9.47239110518864, "grad_norm": 0.73828125, "learning_rate": 1.080915139895669e-07, "loss": 3.995, "step": 28436 }, { "epoch": 9.47272424419089, "grad_norm": 0.75390625, "learning_rate": 1.0795462862947913e-07, "loss": 3.9273, "step": 28437 }, { "epoch": 9.473057383193137, "grad_norm": 0.76171875, "learning_rate": 1.0781782937070828e-07, "loss": 4.0798, "step": 28438 }, { "epoch": 9.473390522195386, "grad_norm": 0.73046875, "learning_rate": 1.0768111621484888e-07, "loss": 4.0534, "step": 28439 }, { "epoch": 9.473723661197635, "grad_norm": 0.7578125, "learning_rate": 1.0754448916349302e-07, "loss": 3.9918, "step": 28440 }, { "epoch": 9.474056800199884, "grad_norm": 0.8046875, "learning_rate": 1.0740794821823108e-07, "loss": 4.0293, "step": 28441 }, { "epoch": 9.474389939202132, "grad_norm": 0.7890625, "learning_rate": 1.0727149338065429e-07, "loss": 3.9825, "step": 28442 }, { "epoch": 9.47472307820438, "grad_norm": 0.765625, "learning_rate": 1.071351246523522e-07, "loss": 4.0199, "step": 28443 }, { "epoch": 9.47505621720663, "grad_norm": 0.75, "learning_rate": 1.0699884203491273e-07, "loss": 3.9839, "step": 28444 }, { "epoch": 9.475389356208877, "grad_norm": 0.75, "learning_rate": 1.0686264552992458e-07, "loss": 3.9674, "step": 28445 }, { "epoch": 9.475722495211127, "grad_norm": 0.7890625, "learning_rate": 1.0672653513897151e-07, "loss": 3.9436, "step": 28446 }, { "epoch": 9.476055634213376, "grad_norm": 0.73828125, "learning_rate": 1.0659051086364224e-07, "loss": 3.9497, "step": 28447 }, { "epoch": 9.476388773215625, "grad_norm": 0.72265625, "learning_rate": 1.06454572705518e-07, "loss": 4.002, "step": 28448 }, { "epoch": 9.476721912217872, "grad_norm": 0.75, "learning_rate": 1.063187206661842e-07, "loss": 4.0305, "step": 28449 }, { "epoch": 9.477055051220121, "grad_norm": 0.7421875, "learning_rate": 1.0618295474722206e-07, "loss": 4.0184, "step": 28450 }, { "epoch": 9.47738819022237, "grad_norm": 0.75, "learning_rate": 1.0604727495021449e-07, "loss": 4.0676, "step": 28451 }, { "epoch": 9.477721329224618, "grad_norm": 0.73828125, "learning_rate": 1.0591168127674022e-07, "loss": 3.9935, "step": 28452 }, { "epoch": 9.478054468226867, "grad_norm": 0.7421875, "learning_rate": 1.0577617372837884e-07, "loss": 4.0736, "step": 28453 }, { "epoch": 9.478387607229116, "grad_norm": 0.77734375, "learning_rate": 1.0564075230670989e-07, "loss": 3.9885, "step": 28454 }, { "epoch": 9.478720746231366, "grad_norm": 0.7578125, "learning_rate": 1.0550541701330962e-07, "loss": 3.9887, "step": 28455 }, { "epoch": 9.479053885233613, "grad_norm": 0.74609375, "learning_rate": 1.0537016784975512e-07, "loss": 4.0019, "step": 28456 }, { "epoch": 9.479387024235862, "grad_norm": 0.7421875, "learning_rate": 1.0523500481762177e-07, "loss": 3.9581, "step": 28457 }, { "epoch": 9.479720163238111, "grad_norm": 0.77734375, "learning_rate": 1.0509992791848333e-07, "loss": 3.9718, "step": 28458 }, { "epoch": 9.48005330224036, "grad_norm": 0.7734375, "learning_rate": 1.0496493715391353e-07, "loss": 3.8959, "step": 28459 }, { "epoch": 9.480386441242608, "grad_norm": 0.7890625, "learning_rate": 1.0483003252548363e-07, "loss": 3.9226, "step": 28460 }, { "epoch": 9.480719580244857, "grad_norm": 0.73828125, "learning_rate": 1.046952140347665e-07, "loss": 3.9461, "step": 28461 }, { "epoch": 9.481052719247106, "grad_norm": 0.7578125, "learning_rate": 1.045604816833326e-07, "loss": 3.9501, "step": 28462 }, { "epoch": 9.481385858249354, "grad_norm": 0.7109375, "learning_rate": 1.0442583547274981e-07, "loss": 3.9421, "step": 28463 }, { "epoch": 9.481718997251603, "grad_norm": 0.76953125, "learning_rate": 1.0429127540458688e-07, "loss": 3.9976, "step": 28464 }, { "epoch": 9.482052136253852, "grad_norm": 0.76953125, "learning_rate": 1.0415680148041257e-07, "loss": 3.9789, "step": 28465 }, { "epoch": 9.482385275256101, "grad_norm": 0.72265625, "learning_rate": 1.0402241370179228e-07, "loss": 3.9105, "step": 28466 }, { "epoch": 9.482718414258349, "grad_norm": 0.77734375, "learning_rate": 1.0388811207029059e-07, "loss": 3.9747, "step": 28467 }, { "epoch": 9.483051553260598, "grad_norm": 0.80078125, "learning_rate": 1.0375389658747209e-07, "loss": 3.9503, "step": 28468 }, { "epoch": 9.483384692262847, "grad_norm": 0.75390625, "learning_rate": 1.0361976725490052e-07, "loss": 3.953, "step": 28469 }, { "epoch": 9.483717831265096, "grad_norm": 0.79296875, "learning_rate": 1.034857240741388e-07, "loss": 3.9841, "step": 28470 }, { "epoch": 9.484050970267344, "grad_norm": 0.72265625, "learning_rate": 1.0335176704674736e-07, "loss": 3.9521, "step": 28471 }, { "epoch": 9.484384109269593, "grad_norm": 0.7734375, "learning_rate": 1.0321789617428579e-07, "loss": 3.9237, "step": 28472 }, { "epoch": 9.484717248271842, "grad_norm": 0.7890625, "learning_rate": 1.0308411145831532e-07, "loss": 3.97, "step": 28473 }, { "epoch": 9.48505038727409, "grad_norm": 0.78125, "learning_rate": 1.0295041290039387e-07, "loss": 4.0043, "step": 28474 }, { "epoch": 9.485383526276339, "grad_norm": 0.76953125, "learning_rate": 1.0281680050207686e-07, "loss": 4.0217, "step": 28475 }, { "epoch": 9.485716665278588, "grad_norm": 0.765625, "learning_rate": 1.0268327426492224e-07, "loss": 3.9066, "step": 28476 }, { "epoch": 9.486049804280837, "grad_norm": 0.765625, "learning_rate": 1.0254983419048541e-07, "loss": 3.9649, "step": 28477 }, { "epoch": 9.486382943283084, "grad_norm": 0.828125, "learning_rate": 1.024164802803193e-07, "loss": 4.0574, "step": 28478 }, { "epoch": 9.486716082285334, "grad_norm": 0.73046875, "learning_rate": 1.022832125359785e-07, "loss": 3.9662, "step": 28479 }, { "epoch": 9.487049221287583, "grad_norm": 0.75390625, "learning_rate": 1.021500309590151e-07, "loss": 3.9697, "step": 28480 }, { "epoch": 9.48738236028983, "grad_norm": 0.75390625, "learning_rate": 1.0201693555097952e-07, "loss": 4.0245, "step": 28481 }, { "epoch": 9.48771549929208, "grad_norm": 0.71484375, "learning_rate": 1.0188392631342302e-07, "loss": 4.0481, "step": 28482 }, { "epoch": 9.488048638294329, "grad_norm": 0.796875, "learning_rate": 1.0175100324789438e-07, "loss": 3.9827, "step": 28483 }, { "epoch": 9.488381777296578, "grad_norm": 0.78125, "learning_rate": 1.0161816635594234e-07, "loss": 3.965, "step": 28484 }, { "epoch": 9.488714916298825, "grad_norm": 0.7734375, "learning_rate": 1.0148541563911318e-07, "loss": 3.9692, "step": 28485 }, { "epoch": 9.489048055301074, "grad_norm": 0.76953125, "learning_rate": 1.0135275109895398e-07, "loss": 3.9974, "step": 28486 }, { "epoch": 9.489381194303323, "grad_norm": 0.74609375, "learning_rate": 1.0122017273701017e-07, "loss": 3.9597, "step": 28487 }, { "epoch": 9.489714333305571, "grad_norm": 0.73046875, "learning_rate": 1.0108768055482554e-07, "loss": 4.0048, "step": 28488 }, { "epoch": 9.49004747230782, "grad_norm": 0.73828125, "learning_rate": 1.0095527455394299e-07, "loss": 4.0416, "step": 28489 }, { "epoch": 9.49038061131007, "grad_norm": 0.71875, "learning_rate": 1.0082295473590464e-07, "loss": 3.9493, "step": 28490 }, { "epoch": 9.490713750312318, "grad_norm": 0.7578125, "learning_rate": 1.0069072110225341e-07, "loss": 4.0123, "step": 28491 }, { "epoch": 9.491046889314566, "grad_norm": 0.80859375, "learning_rate": 1.0055857365452891e-07, "loss": 3.9474, "step": 28492 }, { "epoch": 9.491380028316815, "grad_norm": 0.77734375, "learning_rate": 1.0042651239426826e-07, "loss": 3.9043, "step": 28493 }, { "epoch": 9.491713167319064, "grad_norm": 0.76953125, "learning_rate": 1.0029453732301186e-07, "loss": 3.9643, "step": 28494 }, { "epoch": 9.492046306321313, "grad_norm": 0.74609375, "learning_rate": 1.0016264844229684e-07, "loss": 3.9262, "step": 28495 }, { "epoch": 9.49237944532356, "grad_norm": 0.77734375, "learning_rate": 1.0003084575365861e-07, "loss": 3.9683, "step": 28496 }, { "epoch": 9.49271258432581, "grad_norm": 0.7734375, "learning_rate": 9.989912925863181e-08, "loss": 4.0293, "step": 28497 }, { "epoch": 9.49304572332806, "grad_norm": 0.75, "learning_rate": 9.97674989587527e-08, "loss": 3.9493, "step": 28498 }, { "epoch": 9.493378862330307, "grad_norm": 0.79296875, "learning_rate": 9.963595485555338e-08, "loss": 3.9696, "step": 28499 }, { "epoch": 9.493712001332556, "grad_norm": 0.73828125, "learning_rate": 9.950449695056596e-08, "loss": 3.918, "step": 28500 }, { "epoch": 9.494045140334805, "grad_norm": 0.74609375, "learning_rate": 9.937312524532172e-08, "loss": 3.928, "step": 28501 }, { "epoch": 9.494378279337054, "grad_norm": 0.7421875, "learning_rate": 9.924183974135026e-08, "loss": 4.0219, "step": 28502 }, { "epoch": 9.494711418339302, "grad_norm": 0.78515625, "learning_rate": 9.911064044018203e-08, "loss": 3.9533, "step": 28503 }, { "epoch": 9.49504455734155, "grad_norm": 0.7578125, "learning_rate": 9.897952734334414e-08, "loss": 3.9909, "step": 28504 }, { "epoch": 9.4953776963438, "grad_norm": 0.75, "learning_rate": 9.884850045236372e-08, "loss": 4.0583, "step": 28505 }, { "epoch": 9.495710835346047, "grad_norm": 0.7421875, "learning_rate": 9.871755976876868e-08, "loss": 4.0022, "step": 28506 }, { "epoch": 9.496043974348297, "grad_norm": 0.765625, "learning_rate": 9.858670529408197e-08, "loss": 4.0227, "step": 28507 }, { "epoch": 9.496377113350546, "grad_norm": 0.76171875, "learning_rate": 9.845593702982825e-08, "loss": 4.0309, "step": 28508 }, { "epoch": 9.496710252352795, "grad_norm": 0.71875, "learning_rate": 9.832525497753208e-08, "loss": 3.9188, "step": 28509 }, { "epoch": 9.497043391355042, "grad_norm": 0.7421875, "learning_rate": 9.819465913871478e-08, "loss": 4.0054, "step": 28510 }, { "epoch": 9.497376530357291, "grad_norm": 0.734375, "learning_rate": 9.806414951489761e-08, "loss": 4.0199, "step": 28511 }, { "epoch": 9.49770966935954, "grad_norm": 0.78125, "learning_rate": 9.793372610759937e-08, "loss": 3.9568, "step": 28512 }, { "epoch": 9.498042808361788, "grad_norm": 0.79296875, "learning_rate": 9.780338891834134e-08, "loss": 3.9883, "step": 28513 }, { "epoch": 9.498375947364037, "grad_norm": 0.8046875, "learning_rate": 9.76731379486398e-08, "loss": 3.9475, "step": 28514 }, { "epoch": 9.498709086366286, "grad_norm": 0.75, "learning_rate": 9.754297320001354e-08, "loss": 3.9921, "step": 28515 }, { "epoch": 9.499042225368536, "grad_norm": 0.734375, "learning_rate": 9.741289467397718e-08, "loss": 4.0201, "step": 28516 }, { "epoch": 9.499375364370783, "grad_norm": 0.75390625, "learning_rate": 9.728290237204784e-08, "loss": 4.0198, "step": 28517 }, { "epoch": 9.499708503373032, "grad_norm": 0.796875, "learning_rate": 9.715299629573764e-08, "loss": 3.9488, "step": 28518 }, { "epoch": 9.500041642375281, "grad_norm": 0.8203125, "learning_rate": 9.702317644656039e-08, "loss": 3.9414, "step": 28519 }, { "epoch": 9.50037478137753, "grad_norm": 0.75, "learning_rate": 9.689344282602903e-08, "loss": 3.9457, "step": 28520 }, { "epoch": 9.500707920379778, "grad_norm": 0.76953125, "learning_rate": 9.676379543565317e-08, "loss": 4.0246, "step": 28521 }, { "epoch": 9.501041059382027, "grad_norm": 0.76171875, "learning_rate": 9.663423427694412e-08, "loss": 4.0264, "step": 28522 }, { "epoch": 9.501374198384276, "grad_norm": 0.84375, "learning_rate": 9.650475935141068e-08, "loss": 3.9128, "step": 28523 }, { "epoch": 9.501707337386524, "grad_norm": 0.80078125, "learning_rate": 9.63753706605608e-08, "loss": 4.0051, "step": 28524 }, { "epoch": 9.502040476388773, "grad_norm": 0.71875, "learning_rate": 9.624606820590076e-08, "loss": 4.0602, "step": 28525 }, { "epoch": 9.502373615391022, "grad_norm": 0.7265625, "learning_rate": 9.611685198893855e-08, "loss": 4.0042, "step": 28526 }, { "epoch": 9.502706754393271, "grad_norm": 0.78515625, "learning_rate": 9.598772201117795e-08, "loss": 3.9441, "step": 28527 }, { "epoch": 9.503039893395519, "grad_norm": 0.76953125, "learning_rate": 9.585867827412359e-08, "loss": 3.902, "step": 28528 }, { "epoch": 9.503373032397768, "grad_norm": 0.73828125, "learning_rate": 9.572972077927844e-08, "loss": 4.0108, "step": 28529 }, { "epoch": 9.503706171400017, "grad_norm": 0.75390625, "learning_rate": 9.560084952814297e-08, "loss": 3.9781, "step": 28530 }, { "epoch": 9.504039310402266, "grad_norm": 0.7578125, "learning_rate": 9.547206452222096e-08, "loss": 4.02, "step": 28531 }, { "epoch": 9.504372449404514, "grad_norm": 0.72265625, "learning_rate": 9.534336576301122e-08, "loss": 4.0107, "step": 28532 }, { "epoch": 9.504705588406763, "grad_norm": 0.8203125, "learning_rate": 9.521475325201256e-08, "loss": 3.9359, "step": 28533 }, { "epoch": 9.505038727409012, "grad_norm": 0.77734375, "learning_rate": 9.508622699072295e-08, "loss": 3.9502, "step": 28534 }, { "epoch": 9.50537186641126, "grad_norm": 0.74609375, "learning_rate": 9.49577869806395e-08, "loss": 3.8805, "step": 28535 }, { "epoch": 9.505705005413509, "grad_norm": 0.75, "learning_rate": 9.482943322325937e-08, "loss": 3.9463, "step": 28536 }, { "epoch": 9.506038144415758, "grad_norm": 0.75, "learning_rate": 9.47011657200772e-08, "loss": 3.9875, "step": 28537 }, { "epoch": 9.506371283418007, "grad_norm": 0.80859375, "learning_rate": 9.457298447258594e-08, "loss": 3.9435, "step": 28538 }, { "epoch": 9.506704422420254, "grad_norm": 0.77734375, "learning_rate": 9.444488948227941e-08, "loss": 4.0444, "step": 28539 }, { "epoch": 9.507037561422504, "grad_norm": 0.7578125, "learning_rate": 9.431688075064892e-08, "loss": 4.0951, "step": 28540 }, { "epoch": 9.507370700424753, "grad_norm": 0.78125, "learning_rate": 9.418895827918662e-08, "loss": 3.9699, "step": 28541 }, { "epoch": 9.507703839427, "grad_norm": 0.76171875, "learning_rate": 9.406112206938128e-08, "loss": 4.0078, "step": 28542 }, { "epoch": 9.50803697842925, "grad_norm": 0.7265625, "learning_rate": 9.39333721227234e-08, "loss": 3.9835, "step": 28543 }, { "epoch": 9.508370117431499, "grad_norm": 0.76171875, "learning_rate": 9.380570844070014e-08, "loss": 3.926, "step": 28544 }, { "epoch": 9.508703256433748, "grad_norm": 0.75390625, "learning_rate": 9.367813102479861e-08, "loss": 3.939, "step": 28545 }, { "epoch": 9.509036395435995, "grad_norm": 0.77734375, "learning_rate": 9.355063987650514e-08, "loss": 3.9596, "step": 28546 }, { "epoch": 9.509369534438244, "grad_norm": 0.80078125, "learning_rate": 9.342323499730438e-08, "loss": 3.9445, "step": 28547 }, { "epoch": 9.509702673440493, "grad_norm": 0.7421875, "learning_rate": 9.32959163886793e-08, "loss": 4.0106, "step": 28548 }, { "epoch": 9.51003581244274, "grad_norm": 0.765625, "learning_rate": 9.316868405211453e-08, "loss": 4.0036, "step": 28549 }, { "epoch": 9.51036895144499, "grad_norm": 0.734375, "learning_rate": 9.304153798909143e-08, "loss": 4.0354, "step": 28550 }, { "epoch": 9.51070209044724, "grad_norm": 0.76171875, "learning_rate": 9.291447820109045e-08, "loss": 3.9871, "step": 28551 }, { "epoch": 9.511035229449488, "grad_norm": 0.68359375, "learning_rate": 9.27875046895929e-08, "loss": 3.9279, "step": 28552 }, { "epoch": 9.511368368451736, "grad_norm": 0.76953125, "learning_rate": 9.266061745607596e-08, "loss": 4.0155, "step": 28553 }, { "epoch": 9.511701507453985, "grad_norm": 0.7421875, "learning_rate": 9.253381650201926e-08, "loss": 4.0188, "step": 28554 }, { "epoch": 9.512034646456234, "grad_norm": 0.7421875, "learning_rate": 9.240710182889994e-08, "loss": 3.9838, "step": 28555 }, { "epoch": 9.512367785458483, "grad_norm": 0.78125, "learning_rate": 9.228047343819185e-08, "loss": 3.9956, "step": 28556 }, { "epoch": 9.51270092446073, "grad_norm": 0.78515625, "learning_rate": 9.215393133137129e-08, "loss": 4.0064, "step": 28557 }, { "epoch": 9.51303406346298, "grad_norm": 0.80859375, "learning_rate": 9.202747550991208e-08, "loss": 3.9984, "step": 28558 }, { "epoch": 9.51336720246523, "grad_norm": 0.76171875, "learning_rate": 9.190110597528723e-08, "loss": 4.0272, "step": 28559 }, { "epoch": 9.513700341467477, "grad_norm": 0.8125, "learning_rate": 9.177482272896803e-08, "loss": 3.9891, "step": 28560 }, { "epoch": 9.514033480469726, "grad_norm": 0.71875, "learning_rate": 9.164862577242666e-08, "loss": 4.0179, "step": 28561 }, { "epoch": 9.514366619471975, "grad_norm": 0.765625, "learning_rate": 9.152251510713194e-08, "loss": 4.0019, "step": 28562 }, { "epoch": 9.514699758474224, "grad_norm": 0.75, "learning_rate": 9.139649073455353e-08, "loss": 3.9948, "step": 28563 }, { "epoch": 9.515032897476472, "grad_norm": 0.765625, "learning_rate": 9.12705526561594e-08, "loss": 3.9416, "step": 28564 }, { "epoch": 9.51536603647872, "grad_norm": 0.76953125, "learning_rate": 9.114470087341508e-08, "loss": 4.0774, "step": 28565 }, { "epoch": 9.51569917548097, "grad_norm": 0.71875, "learning_rate": 9.101893538778855e-08, "loss": 3.993, "step": 28566 }, { "epoch": 9.516032314483217, "grad_norm": 0.7421875, "learning_rate": 9.089325620074196e-08, "loss": 3.9709, "step": 28567 }, { "epoch": 9.516365453485466, "grad_norm": 0.7421875, "learning_rate": 9.076766331374248e-08, "loss": 3.9683, "step": 28568 }, { "epoch": 9.516698592487716, "grad_norm": 0.7578125, "learning_rate": 9.06421567282506e-08, "loss": 3.9808, "step": 28569 }, { "epoch": 9.517031731489965, "grad_norm": 0.7421875, "learning_rate": 9.05167364457285e-08, "loss": 3.8922, "step": 28570 }, { "epoch": 9.517364870492212, "grad_norm": 0.78515625, "learning_rate": 9.039140246763833e-08, "loss": 3.9704, "step": 28571 }, { "epoch": 9.517698009494461, "grad_norm": 0.7265625, "learning_rate": 9.026615479543892e-08, "loss": 3.9863, "step": 28572 }, { "epoch": 9.51803114849671, "grad_norm": 0.7578125, "learning_rate": 9.014099343058912e-08, "loss": 4.0899, "step": 28573 }, { "epoch": 9.518364287498958, "grad_norm": 0.765625, "learning_rate": 9.001591837454693e-08, "loss": 3.9606, "step": 28574 }, { "epoch": 9.518697426501207, "grad_norm": 0.72265625, "learning_rate": 8.989092962876949e-08, "loss": 4.031, "step": 28575 }, { "epoch": 9.519030565503456, "grad_norm": 0.76171875, "learning_rate": 8.976602719471316e-08, "loss": 3.9445, "step": 28576 }, { "epoch": 9.519363704505706, "grad_norm": 0.76953125, "learning_rate": 8.964121107383094e-08, "loss": 3.9409, "step": 28577 }, { "epoch": 9.519696843507953, "grad_norm": 0.76171875, "learning_rate": 8.951648126757833e-08, "loss": 3.966, "step": 28578 }, { "epoch": 9.520029982510202, "grad_norm": 0.734375, "learning_rate": 8.93918377774075e-08, "loss": 4.04, "step": 28579 }, { "epoch": 9.520363121512451, "grad_norm": 0.77734375, "learning_rate": 8.926728060477063e-08, "loss": 3.9239, "step": 28580 }, { "epoch": 9.5206962605147, "grad_norm": 0.76953125, "learning_rate": 8.914280975111822e-08, "loss": 3.9818, "step": 28581 }, { "epoch": 9.521029399516948, "grad_norm": 0.78515625, "learning_rate": 8.901842521789993e-08, "loss": 3.8933, "step": 28582 }, { "epoch": 9.521362538519197, "grad_norm": 0.78515625, "learning_rate": 8.889412700656546e-08, "loss": 3.9558, "step": 28583 }, { "epoch": 9.521695677521446, "grad_norm": 0.7890625, "learning_rate": 8.876991511856114e-08, "loss": 4.017, "step": 28584 }, { "epoch": 9.522028816523694, "grad_norm": 0.7734375, "learning_rate": 8.864578955533497e-08, "loss": 3.9826, "step": 28585 }, { "epoch": 9.522361955525943, "grad_norm": 0.796875, "learning_rate": 8.852175031833249e-08, "loss": 3.9608, "step": 28586 }, { "epoch": 9.522695094528192, "grad_norm": 0.75, "learning_rate": 8.839779740899834e-08, "loss": 3.9573, "step": 28587 }, { "epoch": 9.523028233530441, "grad_norm": 0.77734375, "learning_rate": 8.827393082877638e-08, "loss": 4.0065, "step": 28588 }, { "epoch": 9.523361372532689, "grad_norm": 0.734375, "learning_rate": 8.81501505791088e-08, "loss": 4.0363, "step": 28589 }, { "epoch": 9.523694511534938, "grad_norm": 0.75, "learning_rate": 8.80264566614386e-08, "loss": 3.9301, "step": 28590 }, { "epoch": 9.524027650537187, "grad_norm": 0.76171875, "learning_rate": 8.790284907720631e-08, "loss": 4.0156, "step": 28591 }, { "epoch": 9.524360789539436, "grad_norm": 0.7578125, "learning_rate": 8.777932782784992e-08, "loss": 4.0363, "step": 28592 }, { "epoch": 9.524693928541684, "grad_norm": 0.78125, "learning_rate": 8.76558929148108e-08, "loss": 4.0039, "step": 28593 }, { "epoch": 9.525027067543933, "grad_norm": 0.74609375, "learning_rate": 8.753254433952446e-08, "loss": 4.0266, "step": 28594 }, { "epoch": 9.525360206546182, "grad_norm": 0.734375, "learning_rate": 8.74092821034289e-08, "loss": 3.9869, "step": 28595 }, { "epoch": 9.52569334554843, "grad_norm": 0.78515625, "learning_rate": 8.728610620795968e-08, "loss": 3.9637, "step": 28596 }, { "epoch": 9.526026484550679, "grad_norm": 0.76953125, "learning_rate": 8.716301665455145e-08, "loss": 3.9808, "step": 28597 }, { "epoch": 9.526359623552928, "grad_norm": 0.8046875, "learning_rate": 8.704001344463807e-08, "loss": 3.9262, "step": 28598 }, { "epoch": 9.526692762555177, "grad_norm": 0.74609375, "learning_rate": 8.691709657965174e-08, "loss": 4.0147, "step": 28599 }, { "epoch": 9.527025901557424, "grad_norm": 0.734375, "learning_rate": 8.679426606102464e-08, "loss": 3.9949, "step": 28600 }, { "epoch": 9.527359040559674, "grad_norm": 0.76171875, "learning_rate": 8.667152189018728e-08, "loss": 3.9779, "step": 28601 }, { "epoch": 9.527692179561923, "grad_norm": 0.73828125, "learning_rate": 8.654886406857021e-08, "loss": 4.0036, "step": 28602 }, { "epoch": 9.52802531856417, "grad_norm": 0.80078125, "learning_rate": 8.64262925976006e-08, "loss": 3.9568, "step": 28603 }, { "epoch": 9.52835845756642, "grad_norm": 0.8203125, "learning_rate": 8.630380747870647e-08, "loss": 4.0516, "step": 28604 }, { "epoch": 9.528691596568668, "grad_norm": 0.69140625, "learning_rate": 8.618140871331587e-08, "loss": 3.9711, "step": 28605 }, { "epoch": 9.529024735570918, "grad_norm": 0.7421875, "learning_rate": 8.605909630285264e-08, "loss": 4.0017, "step": 28606 }, { "epoch": 9.529357874573165, "grad_norm": 0.7578125, "learning_rate": 8.593687024874314e-08, "loss": 4.005, "step": 28607 }, { "epoch": 9.529691013575414, "grad_norm": 0.734375, "learning_rate": 8.581473055241041e-08, "loss": 4.0122, "step": 28608 }, { "epoch": 9.530024152577663, "grad_norm": 0.75390625, "learning_rate": 8.569267721527663e-08, "loss": 4.0084, "step": 28609 }, { "epoch": 9.53035729157991, "grad_norm": 0.76171875, "learning_rate": 8.557071023876317e-08, "loss": 3.9852, "step": 28610 }, { "epoch": 9.53069043058216, "grad_norm": 0.75, "learning_rate": 8.544882962429141e-08, "loss": 3.9838, "step": 28611 }, { "epoch": 9.53102356958441, "grad_norm": 0.75390625, "learning_rate": 8.532703537328185e-08, "loss": 3.9784, "step": 28612 }, { "epoch": 9.531356708586658, "grad_norm": 0.74609375, "learning_rate": 8.520532748715171e-08, "loss": 4.0034, "step": 28613 }, { "epoch": 9.531689847588906, "grad_norm": 0.765625, "learning_rate": 8.508370596731901e-08, "loss": 3.9258, "step": 28614 }, { "epoch": 9.532022986591155, "grad_norm": 0.734375, "learning_rate": 8.496217081520014e-08, "loss": 3.9805, "step": 28615 }, { "epoch": 9.532356125593404, "grad_norm": 0.75390625, "learning_rate": 8.484072203221144e-08, "loss": 3.9826, "step": 28616 }, { "epoch": 9.532689264595653, "grad_norm": 0.76953125, "learning_rate": 8.471935961976763e-08, "loss": 4.0068, "step": 28617 }, { "epoch": 9.5330224035979, "grad_norm": 0.77734375, "learning_rate": 8.45980835792809e-08, "loss": 3.9868, "step": 28618 }, { "epoch": 9.53335554260015, "grad_norm": 0.77734375, "learning_rate": 8.447689391216513e-08, "loss": 3.925, "step": 28619 }, { "epoch": 9.5336886816024, "grad_norm": 0.76953125, "learning_rate": 8.435579061983167e-08, "loss": 3.9987, "step": 28620 }, { "epoch": 9.534021820604647, "grad_norm": 0.76953125, "learning_rate": 8.42347737036911e-08, "loss": 3.8788, "step": 28621 }, { "epoch": 9.534354959606896, "grad_norm": 0.7578125, "learning_rate": 8.411384316515225e-08, "loss": 3.9308, "step": 28622 }, { "epoch": 9.534688098609145, "grad_norm": 0.7265625, "learning_rate": 8.399299900562485e-08, "loss": 3.993, "step": 28623 }, { "epoch": 9.535021237611394, "grad_norm": 0.76953125, "learning_rate": 8.38722412265161e-08, "loss": 4.0181, "step": 28624 }, { "epoch": 9.535354376613641, "grad_norm": 0.734375, "learning_rate": 8.375156982923238e-08, "loss": 4.0227, "step": 28625 }, { "epoch": 9.53568751561589, "grad_norm": 0.703125, "learning_rate": 8.363098481517839e-08, "loss": 4.0075, "step": 28626 }, { "epoch": 9.53602065461814, "grad_norm": 0.75390625, "learning_rate": 8.351048618576052e-08, "loss": 3.9839, "step": 28627 }, { "epoch": 9.536353793620389, "grad_norm": 0.7890625, "learning_rate": 8.33900739423818e-08, "loss": 4.0229, "step": 28628 }, { "epoch": 9.536686932622636, "grad_norm": 0.7421875, "learning_rate": 8.326974808644277e-08, "loss": 4.0047, "step": 28629 }, { "epoch": 9.537020071624886, "grad_norm": 0.74609375, "learning_rate": 8.314950861934816e-08, "loss": 3.9626, "step": 28630 }, { "epoch": 9.537353210627135, "grad_norm": 0.703125, "learning_rate": 8.302935554249603e-08, "loss": 3.9773, "step": 28631 }, { "epoch": 9.537686349629382, "grad_norm": 0.7734375, "learning_rate": 8.290928885728771e-08, "loss": 4.0877, "step": 28632 }, { "epoch": 9.538019488631631, "grad_norm": 0.79296875, "learning_rate": 8.278930856511962e-08, "loss": 3.9891, "step": 28633 }, { "epoch": 9.53835262763388, "grad_norm": 0.7578125, "learning_rate": 8.266941466739064e-08, "loss": 3.9079, "step": 28634 }, { "epoch": 9.538685766636128, "grad_norm": 0.73046875, "learning_rate": 8.254960716549798e-08, "loss": 4.0213, "step": 28635 }, { "epoch": 9.539018905638377, "grad_norm": 0.76953125, "learning_rate": 8.242988606083551e-08, "loss": 3.9755, "step": 28636 }, { "epoch": 9.539352044640626, "grad_norm": 0.79296875, "learning_rate": 8.231025135479797e-08, "loss": 3.9356, "step": 28637 }, { "epoch": 9.539685183642876, "grad_norm": 0.77734375, "learning_rate": 8.219070304878006e-08, "loss": 4.0622, "step": 28638 }, { "epoch": 9.540018322645123, "grad_norm": 0.7578125, "learning_rate": 8.207124114417403e-08, "loss": 3.9986, "step": 28639 }, { "epoch": 9.540351461647372, "grad_norm": 0.7734375, "learning_rate": 8.195186564236956e-08, "loss": 3.9602, "step": 28640 }, { "epoch": 9.540684600649621, "grad_norm": 0.765625, "learning_rate": 8.18325765447589e-08, "loss": 4.0244, "step": 28641 }, { "epoch": 9.54101773965187, "grad_norm": 0.74609375, "learning_rate": 8.171337385273093e-08, "loss": 3.9794, "step": 28642 }, { "epoch": 9.541350878654118, "grad_norm": 0.75390625, "learning_rate": 8.159425756767452e-08, "loss": 4.0005, "step": 28643 }, { "epoch": 9.541684017656367, "grad_norm": 0.7734375, "learning_rate": 8.147522769097693e-08, "loss": 4.0305, "step": 28644 }, { "epoch": 9.542017156658616, "grad_norm": 0.74609375, "learning_rate": 8.135628422402369e-08, "loss": 4.059, "step": 28645 }, { "epoch": 9.542350295660864, "grad_norm": 0.76953125, "learning_rate": 8.123742716820204e-08, "loss": 3.9748, "step": 28646 }, { "epoch": 9.542683434663113, "grad_norm": 0.7734375, "learning_rate": 8.111865652489419e-08, "loss": 3.9465, "step": 28647 }, { "epoch": 9.543016573665362, "grad_norm": 0.7578125, "learning_rate": 8.099997229548489e-08, "loss": 4.0111, "step": 28648 }, { "epoch": 9.543349712667611, "grad_norm": 0.75390625, "learning_rate": 8.088137448135718e-08, "loss": 4.0258, "step": 28649 }, { "epoch": 9.543682851669859, "grad_norm": 0.75, "learning_rate": 8.07628630838908e-08, "loss": 3.9638, "step": 28650 }, { "epoch": 9.544015990672108, "grad_norm": 0.8125, "learning_rate": 8.064443810446798e-08, "loss": 4.0075, "step": 28651 }, { "epoch": 9.544349129674357, "grad_norm": 0.76953125, "learning_rate": 8.052609954446594e-08, "loss": 4.035, "step": 28652 }, { "epoch": 9.544682268676606, "grad_norm": 0.7734375, "learning_rate": 8.040784740526524e-08, "loss": 4.0165, "step": 28653 }, { "epoch": 9.545015407678854, "grad_norm": 0.7578125, "learning_rate": 8.02896816882423e-08, "loss": 3.9876, "step": 28654 }, { "epoch": 9.545348546681103, "grad_norm": 0.7578125, "learning_rate": 8.017160239477267e-08, "loss": 3.9189, "step": 28655 }, { "epoch": 9.545681685683352, "grad_norm": 0.77734375, "learning_rate": 8.005360952623359e-08, "loss": 3.9227, "step": 28656 }, { "epoch": 9.5460148246856, "grad_norm": 0.8203125, "learning_rate": 7.993570308399728e-08, "loss": 4.0455, "step": 28657 }, { "epoch": 9.546347963687849, "grad_norm": 0.7265625, "learning_rate": 7.981788306943932e-08, "loss": 3.9863, "step": 28658 }, { "epoch": 9.546681102690098, "grad_norm": 0.7578125, "learning_rate": 7.970014948393028e-08, "loss": 4.047, "step": 28659 }, { "epoch": 9.547014241692347, "grad_norm": 0.7421875, "learning_rate": 7.95825023288424e-08, "loss": 4.0627, "step": 28660 }, { "epoch": 9.547347380694594, "grad_norm": 0.76171875, "learning_rate": 7.94649416055454e-08, "loss": 3.9405, "step": 28661 }, { "epoch": 9.547680519696844, "grad_norm": 0.7734375, "learning_rate": 7.934746731540904e-08, "loss": 3.9265, "step": 28662 }, { "epoch": 9.548013658699093, "grad_norm": 0.75, "learning_rate": 7.923007945980221e-08, "loss": 3.9891, "step": 28663 }, { "epoch": 9.54834679770134, "grad_norm": 0.734375, "learning_rate": 7.911277804009132e-08, "loss": 4.028, "step": 28664 }, { "epoch": 9.54867993670359, "grad_norm": 0.73828125, "learning_rate": 7.899556305764277e-08, "loss": 3.9645, "step": 28665 }, { "epoch": 9.549013075705838, "grad_norm": 0.79296875, "learning_rate": 7.887843451382132e-08, "loss": 4.008, "step": 28666 }, { "epoch": 9.549346214708088, "grad_norm": 0.75390625, "learning_rate": 7.876139240999336e-08, "loss": 3.9876, "step": 28667 }, { "epoch": 9.549679353710335, "grad_norm": 0.75, "learning_rate": 7.86444367475203e-08, "loss": 4.0392, "step": 28668 }, { "epoch": 9.550012492712584, "grad_norm": 0.75, "learning_rate": 7.852756752776441e-08, "loss": 3.9928, "step": 28669 }, { "epoch": 9.550345631714833, "grad_norm": 0.75, "learning_rate": 7.841078475208791e-08, "loss": 3.9892, "step": 28670 }, { "epoch": 9.55067877071708, "grad_norm": 0.796875, "learning_rate": 7.829408842185055e-08, "loss": 4.0271, "step": 28671 }, { "epoch": 9.55101190971933, "grad_norm": 0.7890625, "learning_rate": 7.817747853841206e-08, "loss": 3.9675, "step": 28672 }, { "epoch": 9.55134504872158, "grad_norm": 0.7265625, "learning_rate": 7.806095510312971e-08, "loss": 3.9702, "step": 28673 }, { "epoch": 9.551678187723828, "grad_norm": 0.78515625, "learning_rate": 7.794451811736159e-08, "loss": 4.0445, "step": 28674 }, { "epoch": 9.552011326726076, "grad_norm": 0.765625, "learning_rate": 7.782816758246325e-08, "loss": 4.003, "step": 28675 }, { "epoch": 9.552344465728325, "grad_norm": 0.78515625, "learning_rate": 7.771190349979112e-08, "loss": 4.0077, "step": 28676 }, { "epoch": 9.552677604730574, "grad_norm": 0.76953125, "learning_rate": 7.75957258706983e-08, "loss": 3.9916, "step": 28677 }, { "epoch": 9.553010743732823, "grad_norm": 0.78125, "learning_rate": 7.747963469653785e-08, "loss": 3.9448, "step": 28678 }, { "epoch": 9.55334388273507, "grad_norm": 0.8125, "learning_rate": 7.73636299786637e-08, "loss": 3.8705, "step": 28679 }, { "epoch": 9.55367702173732, "grad_norm": 0.77734375, "learning_rate": 7.724771171842476e-08, "loss": 4.0074, "step": 28680 }, { "epoch": 9.554010160739569, "grad_norm": 0.82421875, "learning_rate": 7.71318799171733e-08, "loss": 4.0078, "step": 28681 }, { "epoch": 9.554343299741817, "grad_norm": 0.80078125, "learning_rate": 7.701613457625655e-08, "loss": 4.0531, "step": 28682 }, { "epoch": 9.554676438744066, "grad_norm": 0.79296875, "learning_rate": 7.69004756970243e-08, "loss": 3.9096, "step": 28683 }, { "epoch": 9.555009577746315, "grad_norm": 0.80078125, "learning_rate": 7.678490328082294e-08, "loss": 4.0154, "step": 28684 }, { "epoch": 9.555342716748564, "grad_norm": 0.7734375, "learning_rate": 7.66694173289989e-08, "loss": 4.0081, "step": 28685 }, { "epoch": 9.555675855750811, "grad_norm": 0.7421875, "learning_rate": 7.655401784289695e-08, "loss": 3.9796, "step": 28686 }, { "epoch": 9.55600899475306, "grad_norm": 0.78515625, "learning_rate": 7.643870482386184e-08, "loss": 3.9956, "step": 28687 }, { "epoch": 9.55634213375531, "grad_norm": 0.73046875, "learning_rate": 7.632347827323582e-08, "loss": 3.9572, "step": 28688 }, { "epoch": 9.556675272757559, "grad_norm": 0.80859375, "learning_rate": 7.620833819236284e-08, "loss": 3.9227, "step": 28689 }, { "epoch": 9.557008411759806, "grad_norm": 0.7578125, "learning_rate": 7.60932845825818e-08, "loss": 3.9915, "step": 28690 }, { "epoch": 9.557341550762056, "grad_norm": 0.74609375, "learning_rate": 7.597831744523415e-08, "loss": 4.0254, "step": 28691 }, { "epoch": 9.557674689764305, "grad_norm": 0.77734375, "learning_rate": 7.58634367816588e-08, "loss": 3.9795, "step": 28692 }, { "epoch": 9.558007828766552, "grad_norm": 0.78125, "learning_rate": 7.574864259319386e-08, "loss": 3.9931, "step": 28693 }, { "epoch": 9.558340967768801, "grad_norm": 0.7578125, "learning_rate": 7.563393488117576e-08, "loss": 3.9556, "step": 28694 }, { "epoch": 9.55867410677105, "grad_norm": 0.7734375, "learning_rate": 7.551931364694176e-08, "loss": 3.9708, "step": 28695 }, { "epoch": 9.559007245773298, "grad_norm": 0.76953125, "learning_rate": 7.540477889182579e-08, "loss": 3.9852, "step": 28696 }, { "epoch": 9.559340384775547, "grad_norm": 0.73046875, "learning_rate": 7.529033061716262e-08, "loss": 3.9648, "step": 28697 }, { "epoch": 9.559673523777796, "grad_norm": 0.73828125, "learning_rate": 7.517596882428618e-08, "loss": 3.9601, "step": 28698 }, { "epoch": 9.560006662780046, "grad_norm": 0.8125, "learning_rate": 7.506169351452625e-08, "loss": 4.0379, "step": 28699 }, { "epoch": 9.560339801782293, "grad_norm": 0.7109375, "learning_rate": 7.49475046892159e-08, "loss": 3.9906, "step": 28700 }, { "epoch": 9.560672940784542, "grad_norm": 0.74609375, "learning_rate": 7.483340234968411e-08, "loss": 3.9565, "step": 28701 }, { "epoch": 9.561006079786791, "grad_norm": 0.796875, "learning_rate": 7.471938649726062e-08, "loss": 3.9476, "step": 28702 }, { "epoch": 9.56133921878904, "grad_norm": 0.765625, "learning_rate": 7.460545713327271e-08, "loss": 4.0186, "step": 28703 }, { "epoch": 9.561672357791288, "grad_norm": 0.7421875, "learning_rate": 7.449161425904849e-08, "loss": 4.0132, "step": 28704 }, { "epoch": 9.562005496793537, "grad_norm": 0.75, "learning_rate": 7.437785787591272e-08, "loss": 4.0102, "step": 28705 }, { "epoch": 9.562338635795786, "grad_norm": 0.73828125, "learning_rate": 7.426418798519102e-08, "loss": 3.9756, "step": 28706 }, { "epoch": 9.562671774798034, "grad_norm": 0.75, "learning_rate": 7.415060458820815e-08, "loss": 4.055, "step": 28707 }, { "epoch": 9.563004913800283, "grad_norm": 0.7890625, "learning_rate": 7.40371076862864e-08, "loss": 3.9683, "step": 28708 }, { "epoch": 9.563338052802532, "grad_norm": 0.734375, "learning_rate": 7.392369728074721e-08, "loss": 4.0121, "step": 28709 }, { "epoch": 9.563671191804781, "grad_norm": 0.75390625, "learning_rate": 7.381037337291119e-08, "loss": 4.0414, "step": 28710 }, { "epoch": 9.564004330807029, "grad_norm": 0.76171875, "learning_rate": 7.36971359641006e-08, "loss": 3.9868, "step": 28711 }, { "epoch": 9.564337469809278, "grad_norm": 0.76953125, "learning_rate": 7.358398505563274e-08, "loss": 3.9256, "step": 28712 }, { "epoch": 9.564670608811527, "grad_norm": 0.734375, "learning_rate": 7.347092064882571e-08, "loss": 4.0272, "step": 28713 }, { "epoch": 9.565003747813776, "grad_norm": 0.7421875, "learning_rate": 7.335794274499596e-08, "loss": 4.0049, "step": 28714 }, { "epoch": 9.565336886816024, "grad_norm": 0.78515625, "learning_rate": 7.324505134546078e-08, "loss": 3.9997, "step": 28715 }, { "epoch": 9.565670025818273, "grad_norm": 0.80859375, "learning_rate": 7.313224645153494e-08, "loss": 4.0506, "step": 28716 }, { "epoch": 9.566003164820522, "grad_norm": 0.7265625, "learning_rate": 7.301952806453155e-08, "loss": 4.0503, "step": 28717 }, { "epoch": 9.56633630382277, "grad_norm": 0.7734375, "learning_rate": 7.290689618576373e-08, "loss": 3.9149, "step": 28718 }, { "epoch": 9.566669442825019, "grad_norm": 0.79296875, "learning_rate": 7.279435081654296e-08, "loss": 4.0148, "step": 28719 }, { "epoch": 9.567002581827268, "grad_norm": 0.73828125, "learning_rate": 7.268189195818147e-08, "loss": 3.9785, "step": 28720 }, { "epoch": 9.567335720829517, "grad_norm": 0.80859375, "learning_rate": 7.256951961198743e-08, "loss": 3.9838, "step": 28721 }, { "epoch": 9.567668859831764, "grad_norm": 0.74609375, "learning_rate": 7.245723377927143e-08, "loss": 3.9834, "step": 28722 }, { "epoch": 9.568001998834013, "grad_norm": 0.7421875, "learning_rate": 7.234503446134078e-08, "loss": 3.9755, "step": 28723 }, { "epoch": 9.568335137836263, "grad_norm": 0.734375, "learning_rate": 7.22329216595019e-08, "loss": 4.0134, "step": 28724 }, { "epoch": 9.56866827683851, "grad_norm": 0.74609375, "learning_rate": 7.212089537506128e-08, "loss": 4.0307, "step": 28725 }, { "epoch": 9.56900141584076, "grad_norm": 0.7734375, "learning_rate": 7.200895560932369e-08, "loss": 4.0195, "step": 28726 }, { "epoch": 9.569334554843008, "grad_norm": 0.73046875, "learning_rate": 7.189710236359227e-08, "loss": 3.9483, "step": 28727 }, { "epoch": 9.569667693845258, "grad_norm": 0.78125, "learning_rate": 7.178533563917011e-08, "loss": 3.9871, "step": 28728 }, { "epoch": 9.570000832847505, "grad_norm": 0.765625, "learning_rate": 7.167365543735954e-08, "loss": 4.0138, "step": 28729 }, { "epoch": 9.570333971849754, "grad_norm": 0.75, "learning_rate": 7.156206175946117e-08, "loss": 3.9344, "step": 28730 }, { "epoch": 9.570667110852003, "grad_norm": 0.75390625, "learning_rate": 7.14505546067748e-08, "loss": 4.0082, "step": 28731 }, { "epoch": 9.57100024985425, "grad_norm": 0.7421875, "learning_rate": 7.133913398059938e-08, "loss": 4.0369, "step": 28732 }, { "epoch": 9.5713333888565, "grad_norm": 0.75390625, "learning_rate": 7.122779988223222e-08, "loss": 3.8893, "step": 28733 }, { "epoch": 9.57166652785875, "grad_norm": 0.75390625, "learning_rate": 7.111655231297142e-08, "loss": 3.9534, "step": 28734 }, { "epoch": 9.571999666860998, "grad_norm": 0.765625, "learning_rate": 7.100539127411099e-08, "loss": 3.9476, "step": 28735 }, { "epoch": 9.572332805863246, "grad_norm": 0.734375, "learning_rate": 7.089431676694652e-08, "loss": 3.9744, "step": 28736 }, { "epoch": 9.572665944865495, "grad_norm": 0.7109375, "learning_rate": 7.078332879277283e-08, "loss": 3.9967, "step": 28737 }, { "epoch": 9.572999083867744, "grad_norm": 0.734375, "learning_rate": 7.067242735288054e-08, "loss": 4.0523, "step": 28738 }, { "epoch": 9.573332222869993, "grad_norm": 0.8046875, "learning_rate": 7.056161244856362e-08, "loss": 3.9916, "step": 28739 }, { "epoch": 9.57366536187224, "grad_norm": 0.7265625, "learning_rate": 7.045088408111106e-08, "loss": 3.9781, "step": 28740 }, { "epoch": 9.57399850087449, "grad_norm": 0.78515625, "learning_rate": 7.034024225181346e-08, "loss": 3.9183, "step": 28741 }, { "epoch": 9.574331639876739, "grad_norm": 0.72265625, "learning_rate": 7.022968696195897e-08, "loss": 3.9774, "step": 28742 }, { "epoch": 9.574664778878986, "grad_norm": 0.73046875, "learning_rate": 7.011921821283657e-08, "loss": 3.9682, "step": 28743 }, { "epoch": 9.574997917881236, "grad_norm": 0.76171875, "learning_rate": 7.000883600573187e-08, "loss": 3.9538, "step": 28744 }, { "epoch": 9.575331056883485, "grad_norm": 0.7734375, "learning_rate": 6.989854034193138e-08, "loss": 4.0216, "step": 28745 }, { "epoch": 9.575664195885734, "grad_norm": 0.8046875, "learning_rate": 6.978833122271822e-08, "loss": 4.0571, "step": 28746 }, { "epoch": 9.575997334887981, "grad_norm": 0.78515625, "learning_rate": 6.967820864937802e-08, "loss": 3.9475, "step": 28747 }, { "epoch": 9.57633047389023, "grad_norm": 0.76953125, "learning_rate": 6.956817262319226e-08, "loss": 4.0505, "step": 28748 }, { "epoch": 9.57666361289248, "grad_norm": 0.81640625, "learning_rate": 6.945822314544326e-08, "loss": 4.0007, "step": 28749 }, { "epoch": 9.576996751894729, "grad_norm": 0.7265625, "learning_rate": 6.934836021741164e-08, "loss": 3.9204, "step": 28750 }, { "epoch": 9.577329890896976, "grad_norm": 0.7890625, "learning_rate": 6.923858384037557e-08, "loss": 3.9678, "step": 28751 }, { "epoch": 9.577663029899226, "grad_norm": 0.76953125, "learning_rate": 6.91288940156165e-08, "loss": 4.0622, "step": 28752 }, { "epoch": 9.577996168901475, "grad_norm": 0.75390625, "learning_rate": 6.90192907444101e-08, "loss": 3.9867, "step": 28753 }, { "epoch": 9.578329307903722, "grad_norm": 0.69921875, "learning_rate": 6.890977402803283e-08, "loss": 3.9913, "step": 28754 }, { "epoch": 9.578662446905971, "grad_norm": 0.78515625, "learning_rate": 6.880034386776202e-08, "loss": 4.0099, "step": 28755 }, { "epoch": 9.57899558590822, "grad_norm": 0.75, "learning_rate": 6.869100026487079e-08, "loss": 4.0049, "step": 28756 }, { "epoch": 9.57932872491047, "grad_norm": 0.73046875, "learning_rate": 6.858174322063315e-08, "loss": 3.9997, "step": 28757 }, { "epoch": 9.579661863912717, "grad_norm": 0.7734375, "learning_rate": 6.84725727363214e-08, "loss": 3.9242, "step": 28758 }, { "epoch": 9.579995002914966, "grad_norm": 0.73046875, "learning_rate": 6.83634888132087e-08, "loss": 4.0086, "step": 28759 }, { "epoch": 9.580328141917215, "grad_norm": 0.73828125, "learning_rate": 6.825449145256318e-08, "loss": 4.084, "step": 28760 }, { "epoch": 9.580661280919463, "grad_norm": 0.75390625, "learning_rate": 6.814558065565635e-08, "loss": 3.927, "step": 28761 }, { "epoch": 9.580994419921712, "grad_norm": 0.7421875, "learning_rate": 6.803675642375635e-08, "loss": 3.9324, "step": 28762 }, { "epoch": 9.581327558923961, "grad_norm": 0.78125, "learning_rate": 6.792801875813048e-08, "loss": 4.0147, "step": 28763 }, { "epoch": 9.58166069792621, "grad_norm": 0.84375, "learning_rate": 6.781936766004526e-08, "loss": 3.93, "step": 28764 }, { "epoch": 9.581993836928458, "grad_norm": 0.82421875, "learning_rate": 6.771080313076549e-08, "loss": 3.9637, "step": 28765 }, { "epoch": 9.582326975930707, "grad_norm": 0.80859375, "learning_rate": 6.760232517155768e-08, "loss": 4.0557, "step": 28766 }, { "epoch": 9.582660114932956, "grad_norm": 0.73828125, "learning_rate": 6.749393378368329e-08, "loss": 4.025, "step": 28767 }, { "epoch": 9.582993253935204, "grad_norm": 0.80859375, "learning_rate": 6.738562896840633e-08, "loss": 3.9509, "step": 28768 }, { "epoch": 9.583326392937453, "grad_norm": 0.71875, "learning_rate": 6.727741072698746e-08, "loss": 3.9533, "step": 28769 }, { "epoch": 9.583659531939702, "grad_norm": 0.7734375, "learning_rate": 6.716927906068815e-08, "loss": 3.9279, "step": 28770 }, { "epoch": 9.583992670941951, "grad_norm": 0.765625, "learning_rate": 6.706123397076741e-08, "loss": 4.0064, "step": 28771 }, { "epoch": 9.584325809944199, "grad_norm": 0.7734375, "learning_rate": 6.695327545848257e-08, "loss": 3.9787, "step": 28772 }, { "epoch": 9.584658948946448, "grad_norm": 0.76171875, "learning_rate": 6.68454035250926e-08, "loss": 3.9942, "step": 28773 }, { "epoch": 9.584992087948697, "grad_norm": 0.75, "learning_rate": 6.67376181718532e-08, "loss": 3.984, "step": 28774 }, { "epoch": 9.585325226950946, "grad_norm": 0.734375, "learning_rate": 6.662991940002083e-08, "loss": 3.9942, "step": 28775 }, { "epoch": 9.585658365953194, "grad_norm": 0.83203125, "learning_rate": 6.652230721084868e-08, "loss": 4.0147, "step": 28776 }, { "epoch": 9.585991504955443, "grad_norm": 0.77734375, "learning_rate": 6.641478160559072e-08, "loss": 3.9948, "step": 28777 }, { "epoch": 9.586324643957692, "grad_norm": 0.78125, "learning_rate": 6.630734258550014e-08, "loss": 4.0215, "step": 28778 }, { "epoch": 9.58665778295994, "grad_norm": 0.75, "learning_rate": 6.619999015182677e-08, "loss": 4.0161, "step": 28779 }, { "epoch": 9.586990921962188, "grad_norm": 0.75, "learning_rate": 6.609272430582208e-08, "loss": 4.0049, "step": 28780 }, { "epoch": 9.587324060964438, "grad_norm": 0.7421875, "learning_rate": 6.598554504873594e-08, "loss": 3.9685, "step": 28781 }, { "epoch": 9.587657199966687, "grad_norm": 0.75390625, "learning_rate": 6.587845238181567e-08, "loss": 3.9604, "step": 28782 }, { "epoch": 9.587990338968934, "grad_norm": 0.75, "learning_rate": 6.577144630630943e-08, "loss": 3.9295, "step": 28783 }, { "epoch": 9.588323477971183, "grad_norm": 0.74609375, "learning_rate": 6.56645268234629e-08, "loss": 3.9768, "step": 28784 }, { "epoch": 9.588656616973433, "grad_norm": 0.765625, "learning_rate": 6.55576939345226e-08, "loss": 4.0029, "step": 28785 }, { "epoch": 9.58898975597568, "grad_norm": 0.7578125, "learning_rate": 6.545094764073167e-08, "loss": 3.9308, "step": 28786 }, { "epoch": 9.58932289497793, "grad_norm": 0.77734375, "learning_rate": 6.534428794333414e-08, "loss": 4.0177, "step": 28787 }, { "epoch": 9.589656033980178, "grad_norm": 0.7421875, "learning_rate": 6.523771484357233e-08, "loss": 4.0507, "step": 28788 }, { "epoch": 9.589989172982428, "grad_norm": 0.78515625, "learning_rate": 6.513122834268692e-08, "loss": 3.997, "step": 28789 }, { "epoch": 9.590322311984675, "grad_norm": 0.80078125, "learning_rate": 6.502482844191943e-08, "loss": 4.0236, "step": 28790 }, { "epoch": 9.590655450986924, "grad_norm": 0.78515625, "learning_rate": 6.491851514250801e-08, "loss": 4.0386, "step": 28791 }, { "epoch": 9.590988589989173, "grad_norm": 0.734375, "learning_rate": 6.48122884456917e-08, "loss": 3.9651, "step": 28792 }, { "epoch": 9.59132172899142, "grad_norm": 0.80859375, "learning_rate": 6.470614835270783e-08, "loss": 4.0374, "step": 28793 }, { "epoch": 9.59165486799367, "grad_norm": 0.7578125, "learning_rate": 6.46000948647929e-08, "loss": 4.0353, "step": 28794 }, { "epoch": 9.59198800699592, "grad_norm": 0.765625, "learning_rate": 6.449412798318094e-08, "loss": 3.9827, "step": 28795 }, { "epoch": 9.592321145998168, "grad_norm": 0.765625, "learning_rate": 6.438824770910761e-08, "loss": 3.9461, "step": 28796 }, { "epoch": 9.592654285000416, "grad_norm": 0.80859375, "learning_rate": 6.428245404380612e-08, "loss": 3.8681, "step": 28797 }, { "epoch": 9.592987424002665, "grad_norm": 0.78125, "learning_rate": 6.417674698850711e-08, "loss": 3.955, "step": 28798 }, { "epoch": 9.593320563004914, "grad_norm": 0.8046875, "learning_rate": 6.40711265444438e-08, "loss": 4.0368, "step": 28799 }, { "epoch": 9.593653702007163, "grad_norm": 0.8203125, "learning_rate": 6.396559271284519e-08, "loss": 3.9249, "step": 28800 }, { "epoch": 9.59398684100941, "grad_norm": 0.79296875, "learning_rate": 6.386014549494196e-08, "loss": 3.9746, "step": 28801 }, { "epoch": 9.59431998001166, "grad_norm": 0.75, "learning_rate": 6.375478489196062e-08, "loss": 4.0352, "step": 28802 }, { "epoch": 9.594653119013909, "grad_norm": 0.75, "learning_rate": 6.364951090512939e-08, "loss": 3.9279, "step": 28803 }, { "epoch": 9.594986258016156, "grad_norm": 0.7421875, "learning_rate": 6.35443235356739e-08, "loss": 4.0358, "step": 28804 }, { "epoch": 9.595319397018406, "grad_norm": 0.765625, "learning_rate": 6.34392227848199e-08, "loss": 3.9204, "step": 28805 }, { "epoch": 9.595652536020655, "grad_norm": 0.734375, "learning_rate": 6.333420865379219e-08, "loss": 3.9795, "step": 28806 }, { "epoch": 9.595985675022904, "grad_norm": 0.7578125, "learning_rate": 6.322928114381233e-08, "loss": 3.9934, "step": 28807 }, { "epoch": 9.596318814025151, "grad_norm": 0.78125, "learning_rate": 6.312444025610348e-08, "loss": 3.9993, "step": 28808 }, { "epoch": 9.5966519530274, "grad_norm": 0.7578125, "learning_rate": 6.301968599188634e-08, "loss": 4.0067, "step": 28809 }, { "epoch": 9.59698509202965, "grad_norm": 0.73046875, "learning_rate": 6.291501835238162e-08, "loss": 4.0017, "step": 28810 }, { "epoch": 9.597318231031899, "grad_norm": 0.7734375, "learning_rate": 6.281043733880831e-08, "loss": 3.9749, "step": 28811 }, { "epoch": 9.597651370034146, "grad_norm": 0.734375, "learning_rate": 6.270594295238464e-08, "loss": 4.0069, "step": 28812 }, { "epoch": 9.597984509036396, "grad_norm": 0.75390625, "learning_rate": 6.26015351943271e-08, "loss": 3.9878, "step": 28813 }, { "epoch": 9.598317648038645, "grad_norm": 0.77734375, "learning_rate": 6.249721406585223e-08, "loss": 3.9139, "step": 28814 }, { "epoch": 9.598650787040892, "grad_norm": 0.7734375, "learning_rate": 6.239297956817658e-08, "loss": 3.9692, "step": 28815 }, { "epoch": 9.598983926043141, "grad_norm": 0.7421875, "learning_rate": 6.228883170251165e-08, "loss": 4.0224, "step": 28816 }, { "epoch": 9.59931706504539, "grad_norm": 0.72265625, "learning_rate": 6.218477047007148e-08, "loss": 3.8842, "step": 28817 }, { "epoch": 9.59965020404764, "grad_norm": 0.7421875, "learning_rate": 6.208079587206928e-08, "loss": 4.0088, "step": 28818 }, { "epoch": 9.599983343049887, "grad_norm": 0.73828125, "learning_rate": 6.197690790971489e-08, "loss": 3.9333, "step": 28819 }, { "epoch": 9.600316482052136, "grad_norm": 0.78515625, "learning_rate": 6.187310658421902e-08, "loss": 3.983, "step": 28820 }, { "epoch": 9.600649621054385, "grad_norm": 0.78515625, "learning_rate": 6.176939189678988e-08, "loss": 4.0153, "step": 28821 }, { "epoch": 9.600982760056633, "grad_norm": 0.765625, "learning_rate": 6.166576384863731e-08, "loss": 4.0088, "step": 28822 }, { "epoch": 9.601315899058882, "grad_norm": 0.7890625, "learning_rate": 6.15622224409662e-08, "loss": 3.993, "step": 28823 }, { "epoch": 9.601649038061131, "grad_norm": 0.7734375, "learning_rate": 6.145876767498393e-08, "loss": 4.0411, "step": 28824 }, { "epoch": 9.60198217706338, "grad_norm": 0.8125, "learning_rate": 6.135539955189534e-08, "loss": 3.9385, "step": 28825 }, { "epoch": 9.602315316065628, "grad_norm": 0.75, "learning_rate": 6.125211807290449e-08, "loss": 3.9735, "step": 28826 }, { "epoch": 9.602648455067877, "grad_norm": 0.77734375, "learning_rate": 6.114892323921289e-08, "loss": 3.9578, "step": 28827 }, { "epoch": 9.602981594070126, "grad_norm": 0.7578125, "learning_rate": 6.104581505202461e-08, "loss": 3.9114, "step": 28828 }, { "epoch": 9.603314733072374, "grad_norm": 0.80078125, "learning_rate": 6.094279351254035e-08, "loss": 3.9914, "step": 28829 }, { "epoch": 9.603647872074623, "grad_norm": 0.765625, "learning_rate": 6.083985862195912e-08, "loss": 3.9225, "step": 28830 }, { "epoch": 9.603981011076872, "grad_norm": 0.7890625, "learning_rate": 6.073701038148083e-08, "loss": 3.9158, "step": 28831 }, { "epoch": 9.604314150079121, "grad_norm": 0.796875, "learning_rate": 6.0634248792302e-08, "loss": 3.9802, "step": 28832 }, { "epoch": 9.604647289081369, "grad_norm": 0.76953125, "learning_rate": 6.053157385562086e-08, "loss": 3.9286, "step": 28833 }, { "epoch": 9.604980428083618, "grad_norm": 0.75390625, "learning_rate": 6.042898557263393e-08, "loss": 4.0317, "step": 28834 }, { "epoch": 9.605313567085867, "grad_norm": 0.7734375, "learning_rate": 6.032648394453361e-08, "loss": 4.006, "step": 28835 }, { "epoch": 9.605646706088116, "grad_norm": 0.78515625, "learning_rate": 6.022406897251642e-08, "loss": 3.9565, "step": 28836 }, { "epoch": 9.605979845090364, "grad_norm": 0.765625, "learning_rate": 6.012174065777393e-08, "loss": 3.982, "step": 28837 }, { "epoch": 9.606312984092613, "grad_norm": 0.7265625, "learning_rate": 6.001949900149933e-08, "loss": 3.9729, "step": 28838 }, { "epoch": 9.606646123094862, "grad_norm": 0.70703125, "learning_rate": 5.991734400488169e-08, "loss": 3.9553, "step": 28839 }, { "epoch": 9.60697926209711, "grad_norm": 0.73046875, "learning_rate": 5.981527566911172e-08, "loss": 4.0479, "step": 28840 }, { "epoch": 9.607312401099358, "grad_norm": 0.78125, "learning_rate": 5.971329399537845e-08, "loss": 4.024, "step": 28841 }, { "epoch": 9.607645540101608, "grad_norm": 0.75390625, "learning_rate": 5.961139898487011e-08, "loss": 3.8946, "step": 28842 }, { "epoch": 9.607978679103857, "grad_norm": 0.76171875, "learning_rate": 5.950959063877326e-08, "loss": 3.964, "step": 28843 }, { "epoch": 9.608311818106104, "grad_norm": 0.77734375, "learning_rate": 5.940786895827277e-08, "loss": 3.9684, "step": 28844 }, { "epoch": 9.608644957108353, "grad_norm": 0.75, "learning_rate": 5.93062339445552e-08, "loss": 3.9548, "step": 28845 }, { "epoch": 9.608978096110603, "grad_norm": 0.734375, "learning_rate": 5.920468559880293e-08, "loss": 4.0234, "step": 28846 }, { "epoch": 9.60931123511285, "grad_norm": 0.7578125, "learning_rate": 5.910322392219919e-08, "loss": 3.9997, "step": 28847 }, { "epoch": 9.6096443741151, "grad_norm": 0.765625, "learning_rate": 5.900184891592636e-08, "loss": 3.9719, "step": 28848 }, { "epoch": 9.609977513117348, "grad_norm": 0.7578125, "learning_rate": 5.8900560581165165e-08, "loss": 4.0121, "step": 28849 }, { "epoch": 9.610310652119598, "grad_norm": 0.79296875, "learning_rate": 5.879935891909383e-08, "loss": 4.0867, "step": 28850 }, { "epoch": 9.610643791121845, "grad_norm": 0.74609375, "learning_rate": 5.86982439308939e-08, "loss": 3.9846, "step": 28851 }, { "epoch": 9.610976930124094, "grad_norm": 0.765625, "learning_rate": 5.8597215617740286e-08, "loss": 3.9588, "step": 28852 }, { "epoch": 9.611310069126343, "grad_norm": 0.8125, "learning_rate": 5.84962739808112e-08, "loss": 3.9776, "step": 28853 }, { "epoch": 9.61164320812859, "grad_norm": 0.76171875, "learning_rate": 5.8395419021283204e-08, "loss": 3.9309, "step": 28854 }, { "epoch": 9.61197634713084, "grad_norm": 0.765625, "learning_rate": 5.829465074032953e-08, "loss": 3.94, "step": 28855 }, { "epoch": 9.612309486133089, "grad_norm": 0.7578125, "learning_rate": 5.819396913912423e-08, "loss": 4.0012, "step": 28856 }, { "epoch": 9.612642625135338, "grad_norm": 0.7421875, "learning_rate": 5.8093374218840534e-08, "loss": 4.0052, "step": 28857 }, { "epoch": 9.612975764137586, "grad_norm": 0.76171875, "learning_rate": 5.7992865980650014e-08, "loss": 4.0489, "step": 28858 }, { "epoch": 9.613308903139835, "grad_norm": 0.74609375, "learning_rate": 5.789244442572256e-08, "loss": 3.963, "step": 28859 }, { "epoch": 9.613642042142084, "grad_norm": 0.7578125, "learning_rate": 5.77921095552289e-08, "loss": 3.988, "step": 28860 }, { "epoch": 9.613975181144333, "grad_norm": 0.7734375, "learning_rate": 5.769186137033811e-08, "loss": 4.0509, "step": 28861 }, { "epoch": 9.61430832014658, "grad_norm": 0.75, "learning_rate": 5.759169987221674e-08, "loss": 3.9207, "step": 28862 }, { "epoch": 9.61464145914883, "grad_norm": 0.765625, "learning_rate": 5.749162506203137e-08, "loss": 4.0369, "step": 28863 }, { "epoch": 9.614974598151079, "grad_norm": 0.78515625, "learning_rate": 5.739163694094774e-08, "loss": 3.9309, "step": 28864 }, { "epoch": 9.615307737153326, "grad_norm": 0.8125, "learning_rate": 5.729173551013156e-08, "loss": 3.9067, "step": 28865 }, { "epoch": 9.615640876155576, "grad_norm": 0.79296875, "learning_rate": 5.7191920770746084e-08, "loss": 3.9141, "step": 28866 }, { "epoch": 9.615974015157825, "grad_norm": 0.80078125, "learning_rate": 5.709219272395372e-08, "loss": 3.9805, "step": 28867 }, { "epoch": 9.616307154160074, "grad_norm": 0.77734375, "learning_rate": 5.699255137091519e-08, "loss": 3.9778, "step": 28868 }, { "epoch": 9.616640293162321, "grad_norm": 0.78515625, "learning_rate": 5.689299671279208e-08, "loss": 3.9612, "step": 28869 }, { "epoch": 9.61697343216457, "grad_norm": 0.7265625, "learning_rate": 5.679352875074428e-08, "loss": 3.9633, "step": 28870 }, { "epoch": 9.61730657116682, "grad_norm": 0.75390625, "learning_rate": 5.669414748592922e-08, "loss": 3.9938, "step": 28871 }, { "epoch": 9.617639710169069, "grad_norm": 0.78515625, "learning_rate": 5.6594852919505944e-08, "loss": 4.0205, "step": 28872 }, { "epoch": 9.617972849171316, "grad_norm": 0.78515625, "learning_rate": 5.649564505263022e-08, "loss": 3.992, "step": 28873 }, { "epoch": 9.618305988173566, "grad_norm": 0.76171875, "learning_rate": 5.639652388645694e-08, "loss": 4.0181, "step": 28874 }, { "epoch": 9.618639127175815, "grad_norm": 0.74609375, "learning_rate": 5.629748942214102e-08, "loss": 3.9646, "step": 28875 }, { "epoch": 9.618972266178062, "grad_norm": 0.765625, "learning_rate": 5.619854166083738e-08, "loss": 4.0014, "step": 28876 }, { "epoch": 9.619305405180311, "grad_norm": 0.7578125, "learning_rate": 5.609968060369758e-08, "loss": 3.9906, "step": 28877 }, { "epoch": 9.61963854418256, "grad_norm": 0.73828125, "learning_rate": 5.6000906251872377e-08, "loss": 4.0086, "step": 28878 }, { "epoch": 9.61997168318481, "grad_norm": 0.76953125, "learning_rate": 5.5902218606513346e-08, "loss": 4.0395, "step": 28879 }, { "epoch": 9.620304822187057, "grad_norm": 0.78515625, "learning_rate": 5.580361766876874e-08, "loss": 3.9784, "step": 28880 }, { "epoch": 9.620637961189306, "grad_norm": 0.75390625, "learning_rate": 5.57051034397893e-08, "loss": 4.005, "step": 28881 }, { "epoch": 9.620971100191555, "grad_norm": 0.796875, "learning_rate": 5.560667592071994e-08, "loss": 4.0323, "step": 28882 }, { "epoch": 9.621304239193803, "grad_norm": 0.7734375, "learning_rate": 5.5508335112708084e-08, "loss": 4.0189, "step": 28883 }, { "epoch": 9.621637378196052, "grad_norm": 0.76171875, "learning_rate": 5.541008101690031e-08, "loss": 3.9684, "step": 28884 }, { "epoch": 9.621970517198301, "grad_norm": 0.80078125, "learning_rate": 5.531191363443988e-08, "loss": 3.9632, "step": 28885 }, { "epoch": 9.62230365620055, "grad_norm": 0.79296875, "learning_rate": 5.521383296647003e-08, "loss": 3.9563, "step": 28886 }, { "epoch": 9.622636795202798, "grad_norm": 0.7734375, "learning_rate": 5.511583901413403e-08, "loss": 4.0522, "step": 28887 }, { "epoch": 9.622969934205047, "grad_norm": 0.74609375, "learning_rate": 5.5017931778573446e-08, "loss": 3.9763, "step": 28888 }, { "epoch": 9.623303073207296, "grad_norm": 0.76171875, "learning_rate": 5.4920111260927385e-08, "loss": 3.9964, "step": 28889 }, { "epoch": 9.623636212209544, "grad_norm": 0.7578125, "learning_rate": 5.482237746233576e-08, "loss": 4.0264, "step": 28890 }, { "epoch": 9.623969351211793, "grad_norm": 0.765625, "learning_rate": 5.472473038393766e-08, "loss": 4.0202, "step": 28891 }, { "epoch": 9.624302490214042, "grad_norm": 0.765625, "learning_rate": 5.462717002687051e-08, "loss": 3.994, "step": 28892 }, { "epoch": 9.624635629216291, "grad_norm": 0.78515625, "learning_rate": 5.452969639226924e-08, "loss": 3.9316, "step": 28893 }, { "epoch": 9.624968768218539, "grad_norm": 0.7578125, "learning_rate": 5.4432309481270426e-08, "loss": 4.0245, "step": 28894 }, { "epoch": 9.625301907220788, "grad_norm": 0.77734375, "learning_rate": 5.4335009295009005e-08, "loss": 3.9362, "step": 28895 }, { "epoch": 9.625635046223037, "grad_norm": 0.77734375, "learning_rate": 5.423779583461658e-08, "loss": 3.9295, "step": 28896 }, { "epoch": 9.625968185225286, "grad_norm": 0.75, "learning_rate": 5.414066910122556e-08, "loss": 3.9875, "step": 28897 }, { "epoch": 9.626301324227533, "grad_norm": 0.734375, "learning_rate": 5.404362909596922e-08, "loss": 3.9557, "step": 28898 }, { "epoch": 9.626634463229783, "grad_norm": 0.765625, "learning_rate": 5.394667581997664e-08, "loss": 3.962, "step": 28899 }, { "epoch": 9.626967602232032, "grad_norm": 0.765625, "learning_rate": 5.384980927437694e-08, "loss": 4.0646, "step": 28900 }, { "epoch": 9.62730074123428, "grad_norm": 0.7578125, "learning_rate": 5.375302946029836e-08, "loss": 3.9563, "step": 28901 }, { "epoch": 9.627633880236528, "grad_norm": 0.7734375, "learning_rate": 5.3656336378868357e-08, "loss": 3.9968, "step": 28902 }, { "epoch": 9.627967019238778, "grad_norm": 0.73046875, "learning_rate": 5.355973003121434e-08, "loss": 4.0096, "step": 28903 }, { "epoch": 9.628300158241027, "grad_norm": 0.73828125, "learning_rate": 5.346321041845958e-08, "loss": 3.9319, "step": 28904 }, { "epoch": 9.628633297243274, "grad_norm": 0.7421875, "learning_rate": 5.336677754172986e-08, "loss": 3.9296, "step": 28905 }, { "epoch": 9.628966436245523, "grad_norm": 0.78515625, "learning_rate": 5.327043140214677e-08, "loss": 3.9696, "step": 28906 }, { "epoch": 9.629299575247773, "grad_norm": 0.7421875, "learning_rate": 5.317417200083441e-08, "loss": 4.0236, "step": 28907 }, { "epoch": 9.629632714250022, "grad_norm": 0.74609375, "learning_rate": 5.307799933891272e-08, "loss": 4.0255, "step": 28908 }, { "epoch": 9.62996585325227, "grad_norm": 0.72265625, "learning_rate": 5.298191341750247e-08, "loss": 3.9559, "step": 28909 }, { "epoch": 9.630298992254518, "grad_norm": 0.78515625, "learning_rate": 5.288591423772277e-08, "loss": 3.9661, "step": 28910 }, { "epoch": 9.630632131256768, "grad_norm": 0.78125, "learning_rate": 5.2790001800691876e-08, "loss": 3.9447, "step": 28911 }, { "epoch": 9.630965270259015, "grad_norm": 0.76953125, "learning_rate": 5.269417610752642e-08, "loss": 4.03, "step": 28912 }, { "epoch": 9.631298409261264, "grad_norm": 0.73828125, "learning_rate": 5.2598437159343814e-08, "loss": 3.9442, "step": 28913 }, { "epoch": 9.631631548263513, "grad_norm": 0.74609375, "learning_rate": 5.2502784957258186e-08, "loss": 3.8962, "step": 28914 }, { "epoch": 9.63196468726576, "grad_norm": 0.765625, "learning_rate": 5.2407219502382806e-08, "loss": 4.0252, "step": 28915 }, { "epoch": 9.63229782626801, "grad_norm": 0.75390625, "learning_rate": 5.231174079583262e-08, "loss": 3.9535, "step": 28916 }, { "epoch": 9.632630965270259, "grad_norm": 0.7890625, "learning_rate": 5.221634883871923e-08, "loss": 4.0118, "step": 28917 }, { "epoch": 9.632964104272508, "grad_norm": 0.7890625, "learning_rate": 5.212104363215342e-08, "loss": 3.9828, "step": 28918 }, { "epoch": 9.633297243274756, "grad_norm": 0.7734375, "learning_rate": 5.2025825177244304e-08, "loss": 3.9892, "step": 28919 }, { "epoch": 9.633630382277005, "grad_norm": 0.75390625, "learning_rate": 5.193069347510349e-08, "loss": 4.0609, "step": 28920 }, { "epoch": 9.633963521279254, "grad_norm": 0.75390625, "learning_rate": 5.1835648526836754e-08, "loss": 4.0107, "step": 28921 }, { "epoch": 9.634296660281503, "grad_norm": 0.72265625, "learning_rate": 5.174069033355322e-08, "loss": 3.9312, "step": 28922 }, { "epoch": 9.63462979928375, "grad_norm": 0.7578125, "learning_rate": 5.1645818896356165e-08, "loss": 3.9732, "step": 28923 }, { "epoch": 9.634962938286, "grad_norm": 0.734375, "learning_rate": 5.155103421635304e-08, "loss": 3.9313, "step": 28924 }, { "epoch": 9.635296077288249, "grad_norm": 0.75, "learning_rate": 5.14563362946463e-08, "loss": 3.9792, "step": 28925 }, { "epoch": 9.635629216290496, "grad_norm": 0.76953125, "learning_rate": 5.1361725132340886e-08, "loss": 3.9922, "step": 28926 }, { "epoch": 9.635962355292746, "grad_norm": 0.74609375, "learning_rate": 5.1267200730536757e-08, "loss": 3.9242, "step": 28927 }, { "epoch": 9.636295494294995, "grad_norm": 0.7421875, "learning_rate": 5.1172763090335526e-08, "loss": 3.9706, "step": 28928 }, { "epoch": 9.636628633297244, "grad_norm": 0.734375, "learning_rate": 5.107841221283882e-08, "loss": 4.0303, "step": 28929 }, { "epoch": 9.636961772299491, "grad_norm": 0.78515625, "learning_rate": 5.098414809914326e-08, "loss": 3.9868, "step": 28930 }, { "epoch": 9.63729491130174, "grad_norm": 0.78125, "learning_rate": 5.088997075034796e-08, "loss": 3.9615, "step": 28931 }, { "epoch": 9.63762805030399, "grad_norm": 0.80078125, "learning_rate": 5.079588016755038e-08, "loss": 3.9894, "step": 28932 }, { "epoch": 9.637961189306239, "grad_norm": 0.828125, "learning_rate": 5.070187635184464e-08, "loss": 4.021, "step": 28933 }, { "epoch": 9.638294328308486, "grad_norm": 0.76171875, "learning_rate": 5.060795930432738e-08, "loss": 3.9622, "step": 28934 }, { "epoch": 9.638627467310735, "grad_norm": 0.83203125, "learning_rate": 5.05141290260927e-08, "loss": 3.9816, "step": 28935 }, { "epoch": 9.638960606312985, "grad_norm": 0.76953125, "learning_rate": 5.042038551823225e-08, "loss": 3.9493, "step": 28936 }, { "epoch": 9.639293745315232, "grad_norm": 0.73828125, "learning_rate": 5.032672878183847e-08, "loss": 3.9933, "step": 28937 }, { "epoch": 9.639626884317481, "grad_norm": 0.73046875, "learning_rate": 5.0233158818002165e-08, "loss": 4.0425, "step": 28938 }, { "epoch": 9.63996002331973, "grad_norm": 0.7734375, "learning_rate": 5.013967562781413e-08, "loss": 3.9984, "step": 28939 }, { "epoch": 9.64029316232198, "grad_norm": 0.76171875, "learning_rate": 5.004627921236266e-08, "loss": 3.9556, "step": 28940 }, { "epoch": 9.640626301324227, "grad_norm": 0.8125, "learning_rate": 4.9952969572734374e-08, "loss": 3.8657, "step": 28941 }, { "epoch": 9.640959440326476, "grad_norm": 0.76171875, "learning_rate": 4.9859746710018416e-08, "loss": 3.9203, "step": 28942 }, { "epoch": 9.641292579328725, "grad_norm": 0.78515625, "learning_rate": 4.976661062529891e-08, "loss": 3.8661, "step": 28943 }, { "epoch": 9.641625718330973, "grad_norm": 0.76171875, "learning_rate": 4.967356131966083e-08, "loss": 3.9798, "step": 28944 }, { "epoch": 9.641958857333222, "grad_norm": 0.734375, "learning_rate": 4.9580598794188305e-08, "loss": 3.9891, "step": 28945 }, { "epoch": 9.642291996335471, "grad_norm": 0.7734375, "learning_rate": 4.948772304996463e-08, "loss": 3.9851, "step": 28946 }, { "epoch": 9.64262513533772, "grad_norm": 0.74609375, "learning_rate": 4.939493408807144e-08, "loss": 3.9861, "step": 28947 }, { "epoch": 9.642958274339968, "grad_norm": 0.80078125, "learning_rate": 4.930223190958871e-08, "loss": 3.9465, "step": 28948 }, { "epoch": 9.643291413342217, "grad_norm": 0.7578125, "learning_rate": 4.9209616515597245e-08, "loss": 4.0249, "step": 28949 }, { "epoch": 9.643624552344466, "grad_norm": 0.84765625, "learning_rate": 4.911708790717534e-08, "loss": 3.9893, "step": 28950 }, { "epoch": 9.643957691346714, "grad_norm": 0.74609375, "learning_rate": 4.902464608540047e-08, "loss": 4.0201, "step": 28951 }, { "epoch": 9.644290830348963, "grad_norm": 0.76171875, "learning_rate": 4.8932291051349276e-08, "loss": 4.0471, "step": 28952 }, { "epoch": 9.644623969351212, "grad_norm": 0.79296875, "learning_rate": 4.8840022806099226e-08, "loss": 4.0279, "step": 28953 }, { "epoch": 9.644957108353461, "grad_norm": 0.75390625, "learning_rate": 4.87478413507228e-08, "loss": 4.0681, "step": 28954 }, { "epoch": 9.645290247355709, "grad_norm": 0.77734375, "learning_rate": 4.8655746686294967e-08, "loss": 3.9729, "step": 28955 }, { "epoch": 9.645623386357958, "grad_norm": 0.78125, "learning_rate": 4.856373881388737e-08, "loss": 3.989, "step": 28956 }, { "epoch": 9.645956525360207, "grad_norm": 0.77734375, "learning_rate": 4.847181773457332e-08, "loss": 3.9458, "step": 28957 }, { "epoch": 9.646289664362456, "grad_norm": 0.79296875, "learning_rate": 4.8379983449422796e-08, "loss": 3.9284, "step": 28958 }, { "epoch": 9.646622803364703, "grad_norm": 0.76953125, "learning_rate": 4.82882359595041e-08, "loss": 3.9541, "step": 28959 }, { "epoch": 9.646955942366953, "grad_norm": 0.77734375, "learning_rate": 4.8196575265888054e-08, "loss": 3.9105, "step": 28960 }, { "epoch": 9.647289081369202, "grad_norm": 0.79296875, "learning_rate": 4.81050013696413e-08, "loss": 4.0232, "step": 28961 }, { "epoch": 9.64762222037145, "grad_norm": 0.75390625, "learning_rate": 4.801351427182965e-08, "loss": 3.8774, "step": 28962 }, { "epoch": 9.647955359373698, "grad_norm": 0.796875, "learning_rate": 4.7922113973520586e-08, "loss": 3.9546, "step": 28963 }, { "epoch": 9.648288498375948, "grad_norm": 0.7421875, "learning_rate": 4.7830800475776596e-08, "loss": 3.9615, "step": 28964 }, { "epoch": 9.648621637378197, "grad_norm": 0.76953125, "learning_rate": 4.773957377966348e-08, "loss": 3.9984, "step": 28965 }, { "epoch": 9.648954776380444, "grad_norm": 0.796875, "learning_rate": 4.76484338862429e-08, "loss": 4.0413, "step": 28966 }, { "epoch": 9.649287915382693, "grad_norm": 0.76171875, "learning_rate": 4.755738079657568e-08, "loss": 3.9431, "step": 28967 }, { "epoch": 9.649621054384943, "grad_norm": 0.73046875, "learning_rate": 4.746641451172262e-08, "loss": 3.9322, "step": 28968 }, { "epoch": 9.649954193387192, "grad_norm": 0.7734375, "learning_rate": 4.7375535032744554e-08, "loss": 3.9217, "step": 28969 }, { "epoch": 9.65028733238944, "grad_norm": 0.7734375, "learning_rate": 4.728474236069896e-08, "loss": 4.0258, "step": 28970 }, { "epoch": 9.650620471391688, "grad_norm": 0.828125, "learning_rate": 4.71940364966425e-08, "loss": 4.0061, "step": 28971 }, { "epoch": 9.650953610393938, "grad_norm": 0.79296875, "learning_rate": 4.710341744163432e-08, "loss": 4.0107, "step": 28972 }, { "epoch": 9.651286749396185, "grad_norm": 0.765625, "learning_rate": 4.701288519672775e-08, "loss": 3.9267, "step": 28973 }, { "epoch": 9.651619888398434, "grad_norm": 0.79296875, "learning_rate": 4.692243976297778e-08, "loss": 3.9365, "step": 28974 }, { "epoch": 9.651953027400683, "grad_norm": 0.765625, "learning_rate": 4.683208114143772e-08, "loss": 3.9919, "step": 28975 }, { "epoch": 9.65228616640293, "grad_norm": 0.765625, "learning_rate": 4.674180933316091e-08, "loss": 3.9546, "step": 28976 }, { "epoch": 9.65261930540518, "grad_norm": 0.75390625, "learning_rate": 4.6651624339198174e-08, "loss": 4.016, "step": 28977 }, { "epoch": 9.652952444407429, "grad_norm": 0.75, "learning_rate": 4.656152616060033e-08, "loss": 3.9984, "step": 28978 }, { "epoch": 9.653285583409678, "grad_norm": 0.78125, "learning_rate": 4.647151479841655e-08, "loss": 3.9896, "step": 28979 }, { "epoch": 9.653618722411926, "grad_norm": 0.7578125, "learning_rate": 4.638159025369515e-08, "loss": 3.9801, "step": 28980 }, { "epoch": 9.653951861414175, "grad_norm": 0.78515625, "learning_rate": 4.6291752527483636e-08, "loss": 4.0139, "step": 28981 }, { "epoch": 9.654285000416424, "grad_norm": 0.76953125, "learning_rate": 4.6202001620827825e-08, "loss": 3.9962, "step": 28982 }, { "epoch": 9.654618139418673, "grad_norm": 0.7734375, "learning_rate": 4.61123375347744e-08, "loss": 3.9103, "step": 28983 }, { "epoch": 9.65495127842092, "grad_norm": 0.77734375, "learning_rate": 4.60227602703675e-08, "loss": 4.0131, "step": 28984 }, { "epoch": 9.65528441742317, "grad_norm": 0.78125, "learning_rate": 4.593326982864965e-08, "loss": 4.0345, "step": 28985 }, { "epoch": 9.655617556425419, "grad_norm": 0.76171875, "learning_rate": 4.584386621066416e-08, "loss": 3.9398, "step": 28986 }, { "epoch": 9.655950695427666, "grad_norm": 0.7734375, "learning_rate": 4.5754549417451883e-08, "loss": 3.9294, "step": 28987 }, { "epoch": 9.656283834429916, "grad_norm": 0.7421875, "learning_rate": 4.5665319450053643e-08, "loss": 3.9511, "step": 28988 }, { "epoch": 9.656616973432165, "grad_norm": 0.76953125, "learning_rate": 4.5576176309506945e-08, "loss": 3.8963, "step": 28989 }, { "epoch": 9.656950112434414, "grad_norm": 0.75, "learning_rate": 4.5487119996852625e-08, "loss": 3.9775, "step": 28990 }, { "epoch": 9.657283251436661, "grad_norm": 0.81640625, "learning_rate": 4.539815051312735e-08, "loss": 3.9679, "step": 28991 }, { "epoch": 9.65761639043891, "grad_norm": 0.74609375, "learning_rate": 4.530926785936612e-08, "loss": 4.008, "step": 28992 }, { "epoch": 9.65794952944116, "grad_norm": 0.765625, "learning_rate": 4.522047203660479e-08, "loss": 3.9268, "step": 28993 }, { "epoch": 9.658282668443409, "grad_norm": 0.72265625, "learning_rate": 4.513176304587918e-08, "loss": 4.0249, "step": 28994 }, { "epoch": 9.658615807445656, "grad_norm": 0.74609375, "learning_rate": 4.504314088822015e-08, "loss": 4.0126, "step": 28995 }, { "epoch": 9.658948946447905, "grad_norm": 0.78515625, "learning_rate": 4.495460556466185e-08, "loss": 3.9676, "step": 28996 }, { "epoch": 9.659282085450155, "grad_norm": 0.81640625, "learning_rate": 4.4866157076234315e-08, "loss": 3.9637, "step": 28997 }, { "epoch": 9.659615224452402, "grad_norm": 0.7578125, "learning_rate": 4.477779542396837e-08, "loss": 4.0669, "step": 28998 }, { "epoch": 9.659948363454651, "grad_norm": 0.78125, "learning_rate": 4.46895206088932e-08, "loss": 3.9929, "step": 28999 }, { "epoch": 9.6602815024569, "grad_norm": 0.76171875, "learning_rate": 4.460133263203714e-08, "loss": 3.9565, "step": 29000 }, { "epoch": 9.66061464145915, "grad_norm": 0.7578125, "learning_rate": 4.4513231494426876e-08, "loss": 3.9874, "step": 29001 }, { "epoch": 9.660947780461397, "grad_norm": 0.7421875, "learning_rate": 4.4425217197089084e-08, "loss": 3.9746, "step": 29002 }, { "epoch": 9.661280919463646, "grad_norm": 0.78125, "learning_rate": 4.433728974104878e-08, "loss": 4.0015, "step": 29003 }, { "epoch": 9.661614058465895, "grad_norm": 0.75, "learning_rate": 4.4249449127330144e-08, "loss": 3.9407, "step": 29004 }, { "epoch": 9.661947197468143, "grad_norm": 0.77734375, "learning_rate": 4.416169535695569e-08, "loss": 4.0514, "step": 29005 }, { "epoch": 9.662280336470392, "grad_norm": 0.75, "learning_rate": 4.407402843094877e-08, "loss": 4.0157, "step": 29006 }, { "epoch": 9.662613475472641, "grad_norm": 0.796875, "learning_rate": 4.3986448350330224e-08, "loss": 4.0001, "step": 29007 }, { "epoch": 9.66294661447489, "grad_norm": 0.78125, "learning_rate": 4.389895511611924e-08, "loss": 3.9469, "step": 29008 }, { "epoch": 9.663279753477138, "grad_norm": 0.75390625, "learning_rate": 4.381154872933585e-08, "loss": 3.9612, "step": 29009 }, { "epoch": 9.663612892479387, "grad_norm": 0.75390625, "learning_rate": 4.372422919099756e-08, "loss": 3.9307, "step": 29010 }, { "epoch": 9.663946031481636, "grad_norm": 0.78515625, "learning_rate": 4.3636996502121886e-08, "loss": 4.0274, "step": 29011 }, { "epoch": 9.664279170483884, "grad_norm": 0.7421875, "learning_rate": 4.354985066372469e-08, "loss": 3.9316, "step": 29012 }, { "epoch": 9.664612309486133, "grad_norm": 0.78125, "learning_rate": 4.346279167682182e-08, "loss": 3.972, "step": 29013 }, { "epoch": 9.664945448488382, "grad_norm": 0.75390625, "learning_rate": 4.337581954242581e-08, "loss": 4.0009, "step": 29014 }, { "epoch": 9.665278587490631, "grad_norm": 0.7109375, "learning_rate": 4.328893426155001e-08, "loss": 4.0122, "step": 29015 }, { "epoch": 9.665611726492878, "grad_norm": 0.78515625, "learning_rate": 4.320213583520777e-08, "loss": 3.9143, "step": 29016 }, { "epoch": 9.665944865495128, "grad_norm": 0.78125, "learning_rate": 4.311542426440912e-08, "loss": 3.9708, "step": 29017 }, { "epoch": 9.666278004497377, "grad_norm": 0.73828125, "learning_rate": 4.3028799550164085e-08, "loss": 3.9581, "step": 29018 }, { "epoch": 9.666611143499626, "grad_norm": 0.8359375, "learning_rate": 4.294226169348103e-08, "loss": 3.9409, "step": 29019 }, { "epoch": 9.666944282501873, "grad_norm": 0.7578125, "learning_rate": 4.285581069536998e-08, "loss": 3.9386, "step": 29020 }, { "epoch": 9.667277421504123, "grad_norm": 0.79296875, "learning_rate": 4.276944655683595e-08, "loss": 3.9102, "step": 29021 }, { "epoch": 9.667610560506372, "grad_norm": 0.765625, "learning_rate": 4.268316927888566e-08, "loss": 4.0748, "step": 29022 }, { "epoch": 9.66794369950862, "grad_norm": 0.7578125, "learning_rate": 4.2596978862524115e-08, "loss": 3.9519, "step": 29023 }, { "epoch": 9.668276838510868, "grad_norm": 0.765625, "learning_rate": 4.251087530875469e-08, "loss": 3.9601, "step": 29024 }, { "epoch": 9.668609977513118, "grad_norm": 0.796875, "learning_rate": 4.2424858618580755e-08, "loss": 3.9908, "step": 29025 }, { "epoch": 9.668943116515367, "grad_norm": 0.765625, "learning_rate": 4.2338928793003994e-08, "loss": 3.9865, "step": 29026 }, { "epoch": 9.669276255517614, "grad_norm": 0.7890625, "learning_rate": 4.2253085833025284e-08, "loss": 3.929, "step": 29027 }, { "epoch": 9.669609394519863, "grad_norm": 0.78515625, "learning_rate": 4.216732973964465e-08, "loss": 3.9881, "step": 29028 }, { "epoch": 9.669942533522113, "grad_norm": 0.74609375, "learning_rate": 4.208166051386214e-08, "loss": 4.0266, "step": 29029 }, { "epoch": 9.670275672524362, "grad_norm": 0.75, "learning_rate": 4.199607815667361e-08, "loss": 3.9108, "step": 29030 }, { "epoch": 9.67060881152661, "grad_norm": 0.7421875, "learning_rate": 4.19105826690766e-08, "loss": 3.9302, "step": 29031 }, { "epoch": 9.670941950528858, "grad_norm": 0.7578125, "learning_rate": 4.1825174052066984e-08, "loss": 3.9302, "step": 29032 }, { "epoch": 9.671275089531107, "grad_norm": 0.76171875, "learning_rate": 4.173985230663979e-08, "loss": 4.0494, "step": 29033 }, { "epoch": 9.671608228533355, "grad_norm": 0.7265625, "learning_rate": 4.1654617433789235e-08, "loss": 3.9951, "step": 29034 }, { "epoch": 9.671941367535604, "grad_norm": 0.8125, "learning_rate": 4.1569469434507845e-08, "loss": 4.037, "step": 29035 }, { "epoch": 9.672274506537853, "grad_norm": 0.8203125, "learning_rate": 4.1484408309786505e-08, "loss": 3.9593, "step": 29036 }, { "epoch": 9.672607645540102, "grad_norm": 0.78515625, "learning_rate": 4.139943406061691e-08, "loss": 4.0001, "step": 29037 }, { "epoch": 9.67294078454235, "grad_norm": 0.796875, "learning_rate": 4.131454668798829e-08, "loss": 3.9796, "step": 29038 }, { "epoch": 9.673273923544599, "grad_norm": 0.7421875, "learning_rate": 4.122974619289066e-08, "loss": 3.9623, "step": 29039 }, { "epoch": 9.673607062546848, "grad_norm": 0.76953125, "learning_rate": 4.1145032576309914e-08, "loss": 4.0907, "step": 29040 }, { "epoch": 9.673940201549096, "grad_norm": 0.75390625, "learning_rate": 4.106040583923359e-08, "loss": 3.9935, "step": 29041 }, { "epoch": 9.674273340551345, "grad_norm": 0.7890625, "learning_rate": 4.097586598264841e-08, "loss": 3.9888, "step": 29042 }, { "epoch": 9.674606479553594, "grad_norm": 0.73046875, "learning_rate": 4.0891413007537734e-08, "loss": 3.9618, "step": 29043 }, { "epoch": 9.674939618555843, "grad_norm": 0.765625, "learning_rate": 4.080704691488496e-08, "loss": 3.9732, "step": 29044 }, { "epoch": 9.67527275755809, "grad_norm": 0.75, "learning_rate": 4.0722767705674294e-08, "loss": 3.9354, "step": 29045 }, { "epoch": 9.67560589656034, "grad_norm": 0.74609375, "learning_rate": 4.0638575380886613e-08, "loss": 3.9834, "step": 29046 }, { "epoch": 9.675939035562589, "grad_norm": 0.796875, "learning_rate": 4.0554469941502804e-08, "loss": 3.9188, "step": 29047 }, { "epoch": 9.676272174564836, "grad_norm": 0.796875, "learning_rate": 4.047045138850208e-08, "loss": 3.9876, "step": 29048 }, { "epoch": 9.676605313567086, "grad_norm": 0.7421875, "learning_rate": 4.038651972286284e-08, "loss": 4.1029, "step": 29049 }, { "epoch": 9.676938452569335, "grad_norm": 0.75, "learning_rate": 4.030267494556344e-08, "loss": 3.9834, "step": 29050 }, { "epoch": 9.677271591571584, "grad_norm": 0.74609375, "learning_rate": 4.02189170575798e-08, "loss": 3.9643, "step": 29051 }, { "epoch": 9.677604730573831, "grad_norm": 0.7578125, "learning_rate": 4.013524605988861e-08, "loss": 3.9061, "step": 29052 }, { "epoch": 9.67793786957608, "grad_norm": 0.7734375, "learning_rate": 4.005166195346327e-08, "loss": 4.0052, "step": 29053 }, { "epoch": 9.67827100857833, "grad_norm": 0.765625, "learning_rate": 3.9968164739278e-08, "loss": 3.9681, "step": 29054 }, { "epoch": 9.678604147580579, "grad_norm": 0.74609375, "learning_rate": 3.988475441830536e-08, "loss": 4.0073, "step": 29055 }, { "epoch": 9.678937286582826, "grad_norm": 0.74609375, "learning_rate": 3.980143099151623e-08, "loss": 3.9545, "step": 29056 }, { "epoch": 9.679270425585075, "grad_norm": 0.7421875, "learning_rate": 3.971819445988317e-08, "loss": 3.9711, "step": 29057 }, { "epoch": 9.679603564587325, "grad_norm": 0.75, "learning_rate": 3.9635044824372915e-08, "loss": 4.0344, "step": 29058 }, { "epoch": 9.679936703589572, "grad_norm": 0.77734375, "learning_rate": 3.9551982085954677e-08, "loss": 3.9959, "step": 29059 }, { "epoch": 9.680269842591821, "grad_norm": 0.75, "learning_rate": 3.946900624559768e-08, "loss": 3.9968, "step": 29060 }, { "epoch": 9.68060298159407, "grad_norm": 0.765625, "learning_rate": 3.9386117304267e-08, "loss": 3.9526, "step": 29061 }, { "epoch": 9.68093612059632, "grad_norm": 0.75390625, "learning_rate": 3.930331526292852e-08, "loss": 4.0207, "step": 29062 }, { "epoch": 9.681269259598567, "grad_norm": 0.7734375, "learning_rate": 3.922060012254564e-08, "loss": 4.005, "step": 29063 }, { "epoch": 9.681602398600816, "grad_norm": 0.75, "learning_rate": 3.913797188408424e-08, "loss": 3.9366, "step": 29064 }, { "epoch": 9.681935537603065, "grad_norm": 0.75, "learning_rate": 3.905543054850441e-08, "loss": 4.0581, "step": 29065 }, { "epoch": 9.682268676605313, "grad_norm": 0.77734375, "learning_rate": 3.89729761167687e-08, "loss": 3.9748, "step": 29066 }, { "epoch": 9.682601815607562, "grad_norm": 0.77734375, "learning_rate": 3.889060858983717e-08, "loss": 4.0046, "step": 29067 }, { "epoch": 9.682934954609811, "grad_norm": 0.75, "learning_rate": 3.880832796866906e-08, "loss": 3.983, "step": 29068 }, { "epoch": 9.68326809361206, "grad_norm": 0.74609375, "learning_rate": 3.872613425422278e-08, "loss": 3.8814, "step": 29069 }, { "epoch": 9.683601232614308, "grad_norm": 0.79296875, "learning_rate": 3.864402744745671e-08, "loss": 3.9929, "step": 29070 }, { "epoch": 9.683934371616557, "grad_norm": 0.73046875, "learning_rate": 3.856200754932593e-08, "loss": 4.0616, "step": 29071 }, { "epoch": 9.684267510618806, "grad_norm": 0.80859375, "learning_rate": 3.848007456078634e-08, "loss": 4.0008, "step": 29072 }, { "epoch": 9.684600649621053, "grad_norm": 0.76171875, "learning_rate": 3.8398228482792174e-08, "loss": 3.8804, "step": 29073 }, { "epoch": 9.684933788623303, "grad_norm": 0.7421875, "learning_rate": 3.8316469316296844e-08, "loss": 4.0677, "step": 29074 }, { "epoch": 9.685266927625552, "grad_norm": 0.79296875, "learning_rate": 3.823479706225375e-08, "loss": 4.0408, "step": 29075 }, { "epoch": 9.685600066627801, "grad_norm": 0.79296875, "learning_rate": 3.8153211721612126e-08, "loss": 3.9051, "step": 29076 }, { "epoch": 9.685933205630048, "grad_norm": 0.77734375, "learning_rate": 3.8071713295322884e-08, "loss": 4.0246, "step": 29077 }, { "epoch": 9.686266344632298, "grad_norm": 0.78515625, "learning_rate": 3.79903017843361e-08, "loss": 3.9481, "step": 29078 }, { "epoch": 9.686599483634547, "grad_norm": 0.79296875, "learning_rate": 3.790897718960018e-08, "loss": 3.9082, "step": 29079 }, { "epoch": 9.686932622636796, "grad_norm": 0.765625, "learning_rate": 3.7827739512061035e-08, "loss": 3.9712, "step": 29080 }, { "epoch": 9.687265761639043, "grad_norm": 0.87890625, "learning_rate": 3.774658875266623e-08, "loss": 3.9347, "step": 29081 }, { "epoch": 9.687598900641293, "grad_norm": 0.7578125, "learning_rate": 3.766552491236003e-08, "loss": 4.018, "step": 29082 }, { "epoch": 9.687932039643542, "grad_norm": 0.7578125, "learning_rate": 3.758454799208749e-08, "loss": 3.9388, "step": 29083 }, { "epoch": 9.68826517864579, "grad_norm": 0.73046875, "learning_rate": 3.7503657992791207e-08, "loss": 4.0089, "step": 29084 }, { "epoch": 9.688598317648038, "grad_norm": 0.74609375, "learning_rate": 3.742285491541292e-08, "loss": 4.0024, "step": 29085 }, { "epoch": 9.688931456650288, "grad_norm": 0.71875, "learning_rate": 3.7342138760895204e-08, "loss": 3.9303, "step": 29086 }, { "epoch": 9.689264595652537, "grad_norm": 0.77734375, "learning_rate": 3.7261509530177316e-08, "loss": 3.9591, "step": 29087 }, { "epoch": 9.689597734654784, "grad_norm": 0.74609375, "learning_rate": 3.7180967224198504e-08, "loss": 3.9666, "step": 29088 }, { "epoch": 9.689930873657033, "grad_norm": 0.80078125, "learning_rate": 3.710051184389718e-08, "loss": 4.0165, "step": 29089 }, { "epoch": 9.690264012659282, "grad_norm": 0.75390625, "learning_rate": 3.702014339021009e-08, "loss": 4.0372, "step": 29090 }, { "epoch": 9.690597151661532, "grad_norm": 0.78515625, "learning_rate": 3.6939861864074e-08, "loss": 4.0614, "step": 29091 }, { "epoch": 9.690930290663779, "grad_norm": 0.75, "learning_rate": 3.685966726642231e-08, "loss": 4.0724, "step": 29092 }, { "epoch": 9.691263429666028, "grad_norm": 0.79296875, "learning_rate": 3.677955959819179e-08, "loss": 4.0548, "step": 29093 }, { "epoch": 9.691596568668277, "grad_norm": 0.7734375, "learning_rate": 3.6699538860313355e-08, "loss": 3.9284, "step": 29094 }, { "epoch": 9.691929707670525, "grad_norm": 0.765625, "learning_rate": 3.661960505372042e-08, "loss": 3.9985, "step": 29095 }, { "epoch": 9.692262846672774, "grad_norm": 0.7421875, "learning_rate": 3.653975817934308e-08, "loss": 4.0313, "step": 29096 }, { "epoch": 9.692595985675023, "grad_norm": 0.75, "learning_rate": 3.6459998238111413e-08, "loss": 4.0425, "step": 29097 }, { "epoch": 9.692929124677272, "grad_norm": 0.7578125, "learning_rate": 3.638032523095469e-08, "loss": 4.0361, "step": 29098 }, { "epoch": 9.69326226367952, "grad_norm": 0.82421875, "learning_rate": 3.6300739158801335e-08, "loss": 3.9314, "step": 29099 }, { "epoch": 9.693595402681769, "grad_norm": 0.7421875, "learning_rate": 3.6221240022578094e-08, "loss": 3.9125, "step": 29100 }, { "epoch": 9.693928541684018, "grad_norm": 0.78125, "learning_rate": 3.61418278232109e-08, "loss": 4.0263, "step": 29101 }, { "epoch": 9.694261680686266, "grad_norm": 0.80078125, "learning_rate": 3.6062502561624855e-08, "loss": 3.9944, "step": 29102 }, { "epoch": 9.694594819688515, "grad_norm": 0.70703125, "learning_rate": 3.598326423874337e-08, "loss": 4.0641, "step": 29103 }, { "epoch": 9.694927958690764, "grad_norm": 0.75390625, "learning_rate": 3.590411285548989e-08, "loss": 3.9931, "step": 29104 }, { "epoch": 9.695261097693013, "grad_norm": 0.7265625, "learning_rate": 3.582504841278617e-08, "loss": 3.9846, "step": 29105 }, { "epoch": 9.69559423669526, "grad_norm": 0.7890625, "learning_rate": 3.5746070911553965e-08, "loss": 3.9512, "step": 29106 }, { "epoch": 9.69592737569751, "grad_norm": 0.76171875, "learning_rate": 3.5667180352711715e-08, "loss": 4.0022, "step": 29107 }, { "epoch": 9.696260514699759, "grad_norm": 0.8203125, "learning_rate": 3.558837673717952e-08, "loss": 3.947, "step": 29108 }, { "epoch": 9.696593653702006, "grad_norm": 0.75390625, "learning_rate": 3.550966006587497e-08, "loss": 4.0122, "step": 29109 }, { "epoch": 9.696926792704256, "grad_norm": 0.76953125, "learning_rate": 3.543103033971401e-08, "loss": 3.9999, "step": 29110 }, { "epoch": 9.697259931706505, "grad_norm": 0.75390625, "learning_rate": 3.535248755961423e-08, "loss": 3.9412, "step": 29111 }, { "epoch": 9.697593070708754, "grad_norm": 0.7421875, "learning_rate": 3.5274031726489077e-08, "loss": 3.9792, "step": 29112 }, { "epoch": 9.697926209711001, "grad_norm": 0.73828125, "learning_rate": 3.5195662841252805e-08, "loss": 3.9488, "step": 29113 }, { "epoch": 9.69825934871325, "grad_norm": 0.734375, "learning_rate": 3.511738090481886e-08, "loss": 4.019, "step": 29114 }, { "epoch": 9.6985924877155, "grad_norm": 0.77734375, "learning_rate": 3.503918591809818e-08, "loss": 3.9498, "step": 29115 }, { "epoch": 9.698925626717749, "grad_norm": 0.796875, "learning_rate": 3.49610778820017e-08, "loss": 4.0012, "step": 29116 }, { "epoch": 9.699258765719996, "grad_norm": 0.75390625, "learning_rate": 3.4883056797439526e-08, "loss": 4.0129, "step": 29117 }, { "epoch": 9.699591904722245, "grad_norm": 0.74609375, "learning_rate": 3.48051226653201e-08, "loss": 4.0086, "step": 29118 }, { "epoch": 9.699925043724495, "grad_norm": 0.82421875, "learning_rate": 3.472727548655269e-08, "loss": 3.9382, "step": 29119 }, { "epoch": 9.700258182726742, "grad_norm": 0.76953125, "learning_rate": 3.464951526204158e-08, "loss": 3.9843, "step": 29120 }, { "epoch": 9.700591321728991, "grad_norm": 0.7734375, "learning_rate": 3.457184199269353e-08, "loss": 3.9196, "step": 29121 }, { "epoch": 9.70092446073124, "grad_norm": 0.77734375, "learning_rate": 3.44942556794145e-08, "loss": 3.9307, "step": 29122 }, { "epoch": 9.70125759973349, "grad_norm": 0.78125, "learning_rate": 3.4416756323106255e-08, "loss": 3.985, "step": 29123 }, { "epoch": 9.701590738735737, "grad_norm": 0.78515625, "learning_rate": 3.433934392467225e-08, "loss": 3.9884, "step": 29124 }, { "epoch": 9.701923877737986, "grad_norm": 0.76953125, "learning_rate": 3.426201848501509e-08, "loss": 3.9473, "step": 29125 }, { "epoch": 9.702257016740235, "grad_norm": 0.75, "learning_rate": 3.4184780005034057e-08, "loss": 4.0267, "step": 29126 }, { "epoch": 9.702590155742483, "grad_norm": 0.7890625, "learning_rate": 3.410762848562926e-08, "loss": 4.0046, "step": 29127 }, { "epoch": 9.702923294744732, "grad_norm": 0.78125, "learning_rate": 3.403056392769999e-08, "loss": 4.0152, "step": 29128 }, { "epoch": 9.703256433746981, "grad_norm": 0.7578125, "learning_rate": 3.3953586332143014e-08, "loss": 3.959, "step": 29129 }, { "epoch": 9.70358957274923, "grad_norm": 0.765625, "learning_rate": 3.387669569985596e-08, "loss": 4.0039, "step": 29130 }, { "epoch": 9.703922711751478, "grad_norm": 0.8125, "learning_rate": 3.3799892031733114e-08, "loss": 3.904, "step": 29131 }, { "epoch": 9.704255850753727, "grad_norm": 0.76171875, "learning_rate": 3.3723175328669585e-08, "loss": 3.9577, "step": 29132 }, { "epoch": 9.704588989755976, "grad_norm": 0.7578125, "learning_rate": 3.3646545591559665e-08, "loss": 3.964, "step": 29133 }, { "epoch": 9.704922128758223, "grad_norm": 0.7421875, "learning_rate": 3.357000282129513e-08, "loss": 3.9725, "step": 29134 }, { "epoch": 9.705255267760473, "grad_norm": 0.78515625, "learning_rate": 3.349354701876861e-08, "loss": 3.9679, "step": 29135 }, { "epoch": 9.705588406762722, "grad_norm": 0.7265625, "learning_rate": 3.341717818486856e-08, "loss": 3.9429, "step": 29136 }, { "epoch": 9.705921545764971, "grad_norm": 0.7890625, "learning_rate": 3.3340896320486755e-08, "loss": 3.9979, "step": 29137 }, { "epoch": 9.706254684767218, "grad_norm": 0.76171875, "learning_rate": 3.3264701426510835e-08, "loss": 3.9665, "step": 29138 }, { "epoch": 9.706587823769468, "grad_norm": 0.7578125, "learning_rate": 3.318859350382758e-08, "loss": 3.9736, "step": 29139 }, { "epoch": 9.706920962771717, "grad_norm": 0.734375, "learning_rate": 3.3112572553323786e-08, "loss": 3.9967, "step": 29140 }, { "epoch": 9.707254101773966, "grad_norm": 0.75390625, "learning_rate": 3.3036638575886236e-08, "loss": 3.9604, "step": 29141 }, { "epoch": 9.707587240776213, "grad_norm": 0.74609375, "learning_rate": 3.2960791572397565e-08, "loss": 3.9725, "step": 29142 }, { "epoch": 9.707920379778463, "grad_norm": 0.6953125, "learning_rate": 3.2885031543742894e-08, "loss": 3.9188, "step": 29143 }, { "epoch": 9.708253518780712, "grad_norm": 0.80859375, "learning_rate": 3.280935849080319e-08, "loss": 4.048, "step": 29144 }, { "epoch": 9.70858665778296, "grad_norm": 0.73828125, "learning_rate": 3.273377241446107e-08, "loss": 3.991, "step": 29145 }, { "epoch": 9.708919796785208, "grad_norm": 0.765625, "learning_rate": 3.265827331559668e-08, "loss": 4.0063, "step": 29146 }, { "epoch": 9.709252935787458, "grad_norm": 0.7734375, "learning_rate": 3.258286119508846e-08, "loss": 3.9412, "step": 29147 }, { "epoch": 9.709586074789707, "grad_norm": 0.76171875, "learning_rate": 3.2507536053815735e-08, "loss": 3.9586, "step": 29148 }, { "epoch": 9.709919213791954, "grad_norm": 0.8125, "learning_rate": 3.243229789265528e-08, "loss": 4.0539, "step": 29149 }, { "epoch": 9.710252352794203, "grad_norm": 0.76171875, "learning_rate": 3.2357146712484745e-08, "loss": 3.9628, "step": 29150 }, { "epoch": 9.710585491796452, "grad_norm": 0.84375, "learning_rate": 3.228208251417758e-08, "loss": 3.9627, "step": 29151 }, { "epoch": 9.710918630798702, "grad_norm": 0.8203125, "learning_rate": 3.220710529860976e-08, "loss": 3.9434, "step": 29152 }, { "epoch": 9.711251769800949, "grad_norm": 0.796875, "learning_rate": 3.213221506665393e-08, "loss": 3.9923, "step": 29153 }, { "epoch": 9.711584908803198, "grad_norm": 0.7421875, "learning_rate": 3.205741181918187e-08, "loss": 3.9618, "step": 29154 }, { "epoch": 9.711918047805447, "grad_norm": 0.77734375, "learning_rate": 3.1982695557066234e-08, "loss": 3.9449, "step": 29155 }, { "epoch": 9.712251186807695, "grad_norm": 0.75, "learning_rate": 3.190806628117632e-08, "loss": 3.9501, "step": 29156 }, { "epoch": 9.712584325809944, "grad_norm": 0.765625, "learning_rate": 3.183352399238143e-08, "loss": 3.9821, "step": 29157 }, { "epoch": 9.712917464812193, "grad_norm": 0.7578125, "learning_rate": 3.175906869154921e-08, "loss": 4.0124, "step": 29158 }, { "epoch": 9.713250603814442, "grad_norm": 0.76953125, "learning_rate": 3.1684700379548124e-08, "loss": 4.0251, "step": 29159 }, { "epoch": 9.71358374281669, "grad_norm": 0.80078125, "learning_rate": 3.1610419057244155e-08, "loss": 4.0462, "step": 29160 }, { "epoch": 9.713916881818939, "grad_norm": 0.7890625, "learning_rate": 3.153622472550244e-08, "loss": 3.9953, "step": 29161 }, { "epoch": 9.714250020821188, "grad_norm": 0.78515625, "learning_rate": 3.146211738518645e-08, "loss": 4.0515, "step": 29162 }, { "epoch": 9.714583159823436, "grad_norm": 0.78125, "learning_rate": 3.1388097037160504e-08, "loss": 3.9638, "step": 29163 }, { "epoch": 9.714916298825685, "grad_norm": 0.7578125, "learning_rate": 3.13141636822864e-08, "loss": 3.9975, "step": 29164 }, { "epoch": 9.715249437827934, "grad_norm": 0.73828125, "learning_rate": 3.124031732142513e-08, "loss": 4.0396, "step": 29165 }, { "epoch": 9.715582576830183, "grad_norm": 0.76171875, "learning_rate": 3.116655795543599e-08, "loss": 3.9896, "step": 29166 }, { "epoch": 9.71591571583243, "grad_norm": 0.8203125, "learning_rate": 3.1092885585179977e-08, "loss": 3.9231, "step": 29167 }, { "epoch": 9.71624885483468, "grad_norm": 0.73046875, "learning_rate": 3.101930021151389e-08, "loss": 3.9654, "step": 29168 }, { "epoch": 9.716581993836929, "grad_norm": 0.765625, "learning_rate": 3.094580183529455e-08, "loss": 3.9622, "step": 29169 }, { "epoch": 9.716915132839176, "grad_norm": 0.7421875, "learning_rate": 3.0872390457379605e-08, "loss": 3.9078, "step": 29170 }, { "epoch": 9.717248271841425, "grad_norm": 0.8046875, "learning_rate": 3.079906607862254e-08, "loss": 3.9912, "step": 29171 }, { "epoch": 9.717581410843675, "grad_norm": 0.80078125, "learning_rate": 3.07258286998785e-08, "loss": 3.988, "step": 29172 }, { "epoch": 9.717914549845924, "grad_norm": 0.7578125, "learning_rate": 3.065267832200014e-08, "loss": 3.9977, "step": 29173 }, { "epoch": 9.718247688848171, "grad_norm": 0.79296875, "learning_rate": 3.057961494583928e-08, "loss": 3.9856, "step": 29174 }, { "epoch": 9.71858082785042, "grad_norm": 0.765625, "learning_rate": 3.0506638572247724e-08, "loss": 4.0114, "step": 29175 }, { "epoch": 9.71891396685267, "grad_norm": 0.734375, "learning_rate": 3.043374920207398e-08, "loss": 3.9858, "step": 29176 }, { "epoch": 9.719247105854919, "grad_norm": 0.79296875, "learning_rate": 3.036094683616902e-08, "loss": 3.9151, "step": 29177 }, { "epoch": 9.719580244857166, "grad_norm": 0.80859375, "learning_rate": 3.028823147537885e-08, "loss": 3.9538, "step": 29178 }, { "epoch": 9.719913383859415, "grad_norm": 0.78515625, "learning_rate": 3.021560312055194e-08, "loss": 3.9533, "step": 29179 }, { "epoch": 9.720246522861665, "grad_norm": 0.77734375, "learning_rate": 3.014306177253345e-08, "loss": 3.9846, "step": 29180 }, { "epoch": 9.720579661863912, "grad_norm": 0.7734375, "learning_rate": 3.0070607432168553e-08, "loss": 3.9691, "step": 29181 }, { "epoch": 9.720912800866161, "grad_norm": 0.73828125, "learning_rate": 2.9998240100302385e-08, "loss": 3.9838, "step": 29182 }, { "epoch": 9.72124593986841, "grad_norm": 0.74609375, "learning_rate": 2.992595977777512e-08, "loss": 3.931, "step": 29183 }, { "epoch": 9.72157907887066, "grad_norm": 0.79296875, "learning_rate": 2.985376646543109e-08, "loss": 3.9683, "step": 29184 }, { "epoch": 9.721912217872907, "grad_norm": 0.7578125, "learning_rate": 2.9781660164110436e-08, "loss": 3.9633, "step": 29185 }, { "epoch": 9.722245356875156, "grad_norm": 0.79296875, "learning_rate": 2.97096408746525e-08, "loss": 3.92, "step": 29186 }, { "epoch": 9.722578495877405, "grad_norm": 0.796875, "learning_rate": 2.963770859789744e-08, "loss": 3.9548, "step": 29187 }, { "epoch": 9.722911634879653, "grad_norm": 0.7109375, "learning_rate": 2.9565863334681254e-08, "loss": 4.0352, "step": 29188 }, { "epoch": 9.723244773881902, "grad_norm": 0.7578125, "learning_rate": 2.9494105085842438e-08, "loss": 3.9798, "step": 29189 }, { "epoch": 9.723577912884151, "grad_norm": 0.77734375, "learning_rate": 2.9422433852216156e-08, "loss": 3.9545, "step": 29190 }, { "epoch": 9.7239110518864, "grad_norm": 0.8046875, "learning_rate": 2.9350849634637577e-08, "loss": 4.0084, "step": 29191 }, { "epoch": 9.724244190888648, "grad_norm": 0.79296875, "learning_rate": 2.92793524339402e-08, "loss": 3.9608, "step": 29192 }, { "epoch": 9.724577329890897, "grad_norm": 0.76953125, "learning_rate": 2.9207942250955855e-08, "loss": 3.9649, "step": 29193 }, { "epoch": 9.724910468893146, "grad_norm": 0.7734375, "learning_rate": 2.9136619086518045e-08, "loss": 4.0119, "step": 29194 }, { "epoch": 9.725243607895393, "grad_norm": 0.77734375, "learning_rate": 2.906538294145611e-08, "loss": 3.9559, "step": 29195 }, { "epoch": 9.725576746897643, "grad_norm": 0.75, "learning_rate": 2.8994233816601045e-08, "loss": 3.982, "step": 29196 }, { "epoch": 9.725909885899892, "grad_norm": 0.78125, "learning_rate": 2.892317171278136e-08, "loss": 4.0297, "step": 29197 }, { "epoch": 9.726243024902141, "grad_norm": 0.73828125, "learning_rate": 2.8852196630823892e-08, "loss": 4.0306, "step": 29198 }, { "epoch": 9.726576163904388, "grad_norm": 0.74609375, "learning_rate": 2.8781308571555476e-08, "loss": 3.9702, "step": 29199 }, { "epoch": 9.726909302906638, "grad_norm": 0.75390625, "learning_rate": 2.8710507535802956e-08, "loss": 3.9728, "step": 29200 }, { "epoch": 9.727242441908887, "grad_norm": 0.74609375, "learning_rate": 2.8639793524389833e-08, "loss": 4.0263, "step": 29201 }, { "epoch": 9.727575580911136, "grad_norm": 0.7578125, "learning_rate": 2.8569166538140455e-08, "loss": 3.9377, "step": 29202 }, { "epoch": 9.727908719913383, "grad_norm": 0.7265625, "learning_rate": 2.8498626577876663e-08, "loss": 3.9982, "step": 29203 }, { "epoch": 9.728241858915633, "grad_norm": 0.7421875, "learning_rate": 2.8428173644421128e-08, "loss": 3.9942, "step": 29204 }, { "epoch": 9.728574997917882, "grad_norm": 0.77734375, "learning_rate": 2.8357807738594032e-08, "loss": 4.0591, "step": 29205 }, { "epoch": 9.72890813692013, "grad_norm": 0.75390625, "learning_rate": 2.8287528861213884e-08, "loss": 3.9652, "step": 29206 }, { "epoch": 9.729241275922378, "grad_norm": 0.7578125, "learning_rate": 2.8217337013101696e-08, "loss": 4.0082, "step": 29207 }, { "epoch": 9.729574414924627, "grad_norm": 0.734375, "learning_rate": 2.814723219507265e-08, "loss": 4.0061, "step": 29208 }, { "epoch": 9.729907553926877, "grad_norm": 0.76953125, "learning_rate": 2.807721440794525e-08, "loss": 3.9499, "step": 29209 }, { "epoch": 9.730240692929124, "grad_norm": 0.73828125, "learning_rate": 2.8007283652533855e-08, "loss": 3.9481, "step": 29210 }, { "epoch": 9.730573831931373, "grad_norm": 0.75390625, "learning_rate": 2.7937439929652807e-08, "loss": 3.9885, "step": 29211 }, { "epoch": 9.730906970933622, "grad_norm": 0.81640625, "learning_rate": 2.786768324011646e-08, "loss": 4.0001, "step": 29212 }, { "epoch": 9.731240109935872, "grad_norm": 0.75390625, "learning_rate": 2.7798013584736658e-08, "loss": 4.0745, "step": 29213 }, { "epoch": 9.731573248938119, "grad_norm": 0.7578125, "learning_rate": 2.772843096432526e-08, "loss": 4.0564, "step": 29214 }, { "epoch": 9.731906387940368, "grad_norm": 0.8046875, "learning_rate": 2.7658935379693274e-08, "loss": 3.9955, "step": 29215 }, { "epoch": 9.732239526942617, "grad_norm": 0.75, "learning_rate": 2.7589526831649226e-08, "loss": 3.934, "step": 29216 }, { "epoch": 9.732572665944865, "grad_norm": 0.74609375, "learning_rate": 2.7520205321002468e-08, "loss": 3.9516, "step": 29217 }, { "epoch": 9.732905804947114, "grad_norm": 0.76171875, "learning_rate": 2.7450970848559852e-08, "loss": 4.0001, "step": 29218 }, { "epoch": 9.733238943949363, "grad_norm": 0.7734375, "learning_rate": 2.7381823415127406e-08, "loss": 4.0078, "step": 29219 }, { "epoch": 9.733572082951612, "grad_norm": 0.76171875, "learning_rate": 2.7312763021511145e-08, "loss": 3.9146, "step": 29220 }, { "epoch": 9.73390522195386, "grad_norm": 0.71484375, "learning_rate": 2.7243789668515428e-08, "loss": 3.9643, "step": 29221 }, { "epoch": 9.734238360956109, "grad_norm": 0.75390625, "learning_rate": 2.7174903356943782e-08, "loss": 3.9875, "step": 29222 }, { "epoch": 9.734571499958358, "grad_norm": 0.74609375, "learning_rate": 2.710610408759806e-08, "loss": 3.9573, "step": 29223 }, { "epoch": 9.734904638960606, "grad_norm": 0.7734375, "learning_rate": 2.7037391861280126e-08, "loss": 3.9765, "step": 29224 }, { "epoch": 9.735237777962855, "grad_norm": 0.75, "learning_rate": 2.6968766678790168e-08, "loss": 3.9072, "step": 29225 }, { "epoch": 9.735570916965104, "grad_norm": 0.76953125, "learning_rate": 2.6900228540928385e-08, "loss": 3.9765, "step": 29226 }, { "epoch": 9.735904055967353, "grad_norm": 0.76171875, "learning_rate": 2.6831777448490803e-08, "loss": 3.9888, "step": 29227 }, { "epoch": 9.7362371949696, "grad_norm": 0.78125, "learning_rate": 2.676341340227678e-08, "loss": 3.9512, "step": 29228 }, { "epoch": 9.73657033397185, "grad_norm": 0.71875, "learning_rate": 2.6695136403082355e-08, "loss": 3.9324, "step": 29229 }, { "epoch": 9.736903472974099, "grad_norm": 0.7890625, "learning_rate": 2.6626946451701882e-08, "loss": 3.9828, "step": 29230 }, { "epoch": 9.737236611976346, "grad_norm": 0.734375, "learning_rate": 2.6558843548930567e-08, "loss": 3.9602, "step": 29231 }, { "epoch": 9.737569750978595, "grad_norm": 0.7890625, "learning_rate": 2.6490827695561102e-08, "loss": 3.8985, "step": 29232 }, { "epoch": 9.737902889980845, "grad_norm": 0.828125, "learning_rate": 2.6422898892386193e-08, "loss": 4.0404, "step": 29233 }, { "epoch": 9.738236028983094, "grad_norm": 0.765625, "learning_rate": 2.6355057140196038e-08, "loss": 4.001, "step": 29234 }, { "epoch": 9.738569167985341, "grad_norm": 0.73046875, "learning_rate": 2.62873024397825e-08, "loss": 3.9733, "step": 29235 }, { "epoch": 9.73890230698759, "grad_norm": 0.7265625, "learning_rate": 2.621963479193329e-08, "loss": 4.0121, "step": 29236 }, { "epoch": 9.73923544598984, "grad_norm": 0.76953125, "learning_rate": 2.615205419743777e-08, "loss": 3.9759, "step": 29237 }, { "epoch": 9.739568584992089, "grad_norm": 0.74609375, "learning_rate": 2.608456065708198e-08, "loss": 3.999, "step": 29238 }, { "epoch": 9.739901723994336, "grad_norm": 0.76953125, "learning_rate": 2.6017154171651958e-08, "loss": 3.9841, "step": 29239 }, { "epoch": 9.740234862996585, "grad_norm": 0.74609375, "learning_rate": 2.5949834741933742e-08, "loss": 4.038, "step": 29240 }, { "epoch": 9.740568001998835, "grad_norm": 0.74609375, "learning_rate": 2.5882602368711705e-08, "loss": 4.0417, "step": 29241 }, { "epoch": 9.740901141001082, "grad_norm": 0.7734375, "learning_rate": 2.5815457052768554e-08, "loss": 4.0055, "step": 29242 }, { "epoch": 9.741234280003331, "grad_norm": 0.74609375, "learning_rate": 2.5748398794886162e-08, "loss": 3.9085, "step": 29243 }, { "epoch": 9.74156741900558, "grad_norm": 0.76953125, "learning_rate": 2.5681427595845575e-08, "loss": 3.9432, "step": 29244 }, { "epoch": 9.74190055800783, "grad_norm": 0.7734375, "learning_rate": 2.5614543456426997e-08, "loss": 3.9261, "step": 29245 }, { "epoch": 9.742233697010077, "grad_norm": 0.7421875, "learning_rate": 2.5547746377408976e-08, "loss": 3.9496, "step": 29246 }, { "epoch": 9.742566836012326, "grad_norm": 0.7421875, "learning_rate": 2.5481036359570886e-08, "loss": 4.0024, "step": 29247 }, { "epoch": 9.742899975014575, "grad_norm": 0.7265625, "learning_rate": 2.5414413403688773e-08, "loss": 4.0204, "step": 29248 }, { "epoch": 9.743233114016824, "grad_norm": 0.80078125, "learning_rate": 2.5347877510538687e-08, "loss": 3.9805, "step": 29249 }, { "epoch": 9.743566253019072, "grad_norm": 0.76953125, "learning_rate": 2.5281428680895836e-08, "loss": 4.0117, "step": 29250 }, { "epoch": 9.743899392021321, "grad_norm": 0.76953125, "learning_rate": 2.5215066915533768e-08, "loss": 3.9531, "step": 29251 }, { "epoch": 9.74423253102357, "grad_norm": 0.7890625, "learning_rate": 2.5148792215226035e-08, "loss": 3.9618, "step": 29252 }, { "epoch": 9.744565670025818, "grad_norm": 0.77734375, "learning_rate": 2.5082604580744516e-08, "loss": 4.0641, "step": 29253 }, { "epoch": 9.744898809028067, "grad_norm": 0.7890625, "learning_rate": 2.5016504012860265e-08, "loss": 3.9493, "step": 29254 }, { "epoch": 9.745231948030316, "grad_norm": 0.76953125, "learning_rate": 2.495049051234266e-08, "loss": 3.963, "step": 29255 }, { "epoch": 9.745565087032563, "grad_norm": 0.71484375, "learning_rate": 2.488456407996026e-08, "loss": 3.9705, "step": 29256 }, { "epoch": 9.745898226034813, "grad_norm": 0.7734375, "learning_rate": 2.4818724716482443e-08, "loss": 4.0079, "step": 29257 }, { "epoch": 9.746231365037062, "grad_norm": 0.76171875, "learning_rate": 2.475297242267527e-08, "loss": 3.9902, "step": 29258 }, { "epoch": 9.746564504039311, "grad_norm": 0.78515625, "learning_rate": 2.4687307199303954e-08, "loss": 3.9603, "step": 29259 }, { "epoch": 9.746897643041558, "grad_norm": 0.74609375, "learning_rate": 2.4621729047134555e-08, "loss": 4.0058, "step": 29260 }, { "epoch": 9.747230782043808, "grad_norm": 0.75, "learning_rate": 2.4556237966929797e-08, "loss": 4.007, "step": 29261 }, { "epoch": 9.747563921046057, "grad_norm": 0.72265625, "learning_rate": 2.4490833959454062e-08, "loss": 3.9651, "step": 29262 }, { "epoch": 9.747897060048306, "grad_norm": 0.79296875, "learning_rate": 2.4425517025467582e-08, "loss": 4.0281, "step": 29263 }, { "epoch": 9.748230199050553, "grad_norm": 0.73046875, "learning_rate": 2.436028716573141e-08, "loss": 4.0514, "step": 29264 }, { "epoch": 9.748563338052803, "grad_norm": 0.71484375, "learning_rate": 2.4295144381006606e-08, "loss": 3.9827, "step": 29265 }, { "epoch": 9.748896477055052, "grad_norm": 0.8046875, "learning_rate": 2.4230088672050066e-08, "loss": 3.9151, "step": 29266 }, { "epoch": 9.749229616057299, "grad_norm": 0.765625, "learning_rate": 2.416512003962118e-08, "loss": 3.9581, "step": 29267 }, { "epoch": 9.749562755059548, "grad_norm": 0.7421875, "learning_rate": 2.4100238484476013e-08, "loss": 3.9665, "step": 29268 }, { "epoch": 9.749895894061797, "grad_norm": 0.76171875, "learning_rate": 2.4035444007369788e-08, "loss": 3.96, "step": 29269 }, { "epoch": 9.750229033064047, "grad_norm": 0.7734375, "learning_rate": 2.3970736609057742e-08, "loss": 3.9956, "step": 29270 }, { "epoch": 9.750562172066294, "grad_norm": 0.77734375, "learning_rate": 2.3906116290294268e-08, "loss": 3.9845, "step": 29271 }, { "epoch": 9.750895311068543, "grad_norm": 0.77734375, "learning_rate": 2.3841583051831262e-08, "loss": 3.9379, "step": 29272 }, { "epoch": 9.751228450070792, "grad_norm": 0.78515625, "learning_rate": 2.3777136894419794e-08, "loss": 3.9424, "step": 29273 }, { "epoch": 9.751561589073042, "grad_norm": 0.75, "learning_rate": 2.371277781881176e-08, "loss": 3.9786, "step": 29274 }, { "epoch": 9.751894728075289, "grad_norm": 0.76953125, "learning_rate": 2.3648505825755728e-08, "loss": 4.0206, "step": 29275 }, { "epoch": 9.752227867077538, "grad_norm": 0.80859375, "learning_rate": 2.35843209160011e-08, "loss": 4.0124, "step": 29276 }, { "epoch": 9.752561006079787, "grad_norm": 0.7578125, "learning_rate": 2.352022309029561e-08, "loss": 3.9565, "step": 29277 }, { "epoch": 9.752894145082035, "grad_norm": 0.78125, "learning_rate": 2.345621234938533e-08, "loss": 4.0356, "step": 29278 }, { "epoch": 9.753227284084284, "grad_norm": 0.75390625, "learning_rate": 2.3392288694015497e-08, "loss": 4.009, "step": 29279 }, { "epoch": 9.753560423086533, "grad_norm": 0.8125, "learning_rate": 2.3328452124931342e-08, "loss": 4.0504, "step": 29280 }, { "epoch": 9.753893562088782, "grad_norm": 0.77734375, "learning_rate": 2.326470264287728e-08, "loss": 4.0132, "step": 29281 }, { "epoch": 9.75422670109103, "grad_norm": 0.75390625, "learning_rate": 2.3201040248593542e-08, "loss": 4.0077, "step": 29282 }, { "epoch": 9.754559840093279, "grad_norm": 0.77734375, "learning_rate": 2.3137464942823704e-08, "loss": 3.9632, "step": 29283 }, { "epoch": 9.754892979095528, "grad_norm": 0.734375, "learning_rate": 2.307397672630718e-08, "loss": 3.9961, "step": 29284 }, { "epoch": 9.755226118097776, "grad_norm": 0.75, "learning_rate": 2.30105755997842e-08, "loss": 3.9822, "step": 29285 }, { "epoch": 9.755559257100025, "grad_norm": 0.79296875, "learning_rate": 2.2947261563992518e-08, "loss": 3.9476, "step": 29286 }, { "epoch": 9.755892396102274, "grad_norm": 0.74609375, "learning_rate": 2.2884034619669868e-08, "loss": 3.9598, "step": 29287 }, { "epoch": 9.756225535104523, "grad_norm": 0.734375, "learning_rate": 2.2820894767553168e-08, "loss": 3.9632, "step": 29288 }, { "epoch": 9.75655867410677, "grad_norm": 0.76171875, "learning_rate": 2.275784200837766e-08, "loss": 4.0265, "step": 29289 }, { "epoch": 9.75689181310902, "grad_norm": 0.734375, "learning_rate": 2.2694876342876925e-08, "loss": 3.9701, "step": 29290 }, { "epoch": 9.757224952111269, "grad_norm": 0.7734375, "learning_rate": 2.2631997771786206e-08, "loss": 4.0198, "step": 29291 }, { "epoch": 9.757558091113516, "grad_norm": 0.8046875, "learning_rate": 2.2569206295835754e-08, "loss": 3.9997, "step": 29292 }, { "epoch": 9.757891230115765, "grad_norm": 0.78515625, "learning_rate": 2.2506501915758316e-08, "loss": 3.989, "step": 29293 }, { "epoch": 9.758224369118015, "grad_norm": 0.71875, "learning_rate": 2.2443884632283306e-08, "loss": 4.008, "step": 29294 }, { "epoch": 9.758557508120264, "grad_norm": 0.73046875, "learning_rate": 2.238135444614181e-08, "loss": 3.8919, "step": 29295 }, { "epoch": 9.758890647122511, "grad_norm": 0.765625, "learning_rate": 2.2318911358059912e-08, "loss": 4.0128, "step": 29296 }, { "epoch": 9.75922378612476, "grad_norm": 0.73828125, "learning_rate": 2.225655536876703e-08, "loss": 3.9881, "step": 29297 }, { "epoch": 9.75955692512701, "grad_norm": 0.74609375, "learning_rate": 2.219428647898758e-08, "loss": 3.9942, "step": 29298 }, { "epoch": 9.759890064129259, "grad_norm": 0.765625, "learning_rate": 2.2132104689448485e-08, "loss": 4.0143, "step": 29299 }, { "epoch": 9.760223203131506, "grad_norm": 0.7890625, "learning_rate": 2.2070010000873332e-08, "loss": 4.0163, "step": 29300 }, { "epoch": 9.760556342133755, "grad_norm": 0.74609375, "learning_rate": 2.2008002413984873e-08, "loss": 3.9794, "step": 29301 }, { "epoch": 9.760889481136005, "grad_norm": 0.7421875, "learning_rate": 2.1946081929506702e-08, "loss": 4.0726, "step": 29302 }, { "epoch": 9.761222620138252, "grad_norm": 0.80859375, "learning_rate": 2.1884248548158238e-08, "loss": 4.0636, "step": 29303 }, { "epoch": 9.761555759140501, "grad_norm": 0.7265625, "learning_rate": 2.1822502270661404e-08, "loss": 3.9613, "step": 29304 }, { "epoch": 9.76188889814275, "grad_norm": 0.76953125, "learning_rate": 2.1760843097734796e-08, "loss": 3.9167, "step": 29305 }, { "epoch": 9.762222037145, "grad_norm": 0.76171875, "learning_rate": 2.1699271030096168e-08, "loss": 3.9998, "step": 29306 }, { "epoch": 9.762555176147247, "grad_norm": 0.7734375, "learning_rate": 2.1637786068463284e-08, "loss": 3.9861, "step": 29307 }, { "epoch": 9.762888315149496, "grad_norm": 0.7421875, "learning_rate": 2.15763882135514e-08, "loss": 4.013, "step": 29308 }, { "epoch": 9.763221454151745, "grad_norm": 0.77734375, "learning_rate": 2.151507746607745e-08, "loss": 4.0393, "step": 29309 }, { "epoch": 9.763554593153994, "grad_norm": 0.76171875, "learning_rate": 2.1453853826753355e-08, "loss": 4.0057, "step": 29310 }, { "epoch": 9.763887732156242, "grad_norm": 0.78515625, "learning_rate": 2.1392717296294386e-08, "loss": 3.9889, "step": 29311 }, { "epoch": 9.764220871158491, "grad_norm": 0.7421875, "learning_rate": 2.13316678754108e-08, "loss": 3.907, "step": 29312 }, { "epoch": 9.76455401016074, "grad_norm": 0.7265625, "learning_rate": 2.127070556481536e-08, "loss": 3.9497, "step": 29313 }, { "epoch": 9.764887149162988, "grad_norm": 0.80859375, "learning_rate": 2.1209830365216676e-08, "loss": 3.9853, "step": 29314 }, { "epoch": 9.765220288165237, "grad_norm": 0.74609375, "learning_rate": 2.1149042277324172e-08, "loss": 3.9446, "step": 29315 }, { "epoch": 9.765553427167486, "grad_norm": 0.765625, "learning_rate": 2.1088341301846447e-08, "loss": 3.918, "step": 29316 }, { "epoch": 9.765886566169733, "grad_norm": 0.7890625, "learning_rate": 2.1027727439491274e-08, "loss": 3.9124, "step": 29317 }, { "epoch": 9.766219705171983, "grad_norm": 0.7890625, "learning_rate": 2.0967200690962252e-08, "loss": 3.9278, "step": 29318 }, { "epoch": 9.766552844174232, "grad_norm": 0.73828125, "learning_rate": 2.0906761056966317e-08, "loss": 4.0157, "step": 29319 }, { "epoch": 9.766885983176481, "grad_norm": 0.77734375, "learning_rate": 2.0846408538207072e-08, "loss": 3.9957, "step": 29320 }, { "epoch": 9.767219122178728, "grad_norm": 0.76953125, "learning_rate": 2.0786143135387292e-08, "loss": 4.0179, "step": 29321 }, { "epoch": 9.767552261180978, "grad_norm": 0.76171875, "learning_rate": 2.072596484920891e-08, "loss": 3.9961, "step": 29322 }, { "epoch": 9.767885400183227, "grad_norm": 0.75390625, "learning_rate": 2.0665873680372204e-08, "loss": 3.9889, "step": 29323 }, { "epoch": 9.768218539185476, "grad_norm": 0.72265625, "learning_rate": 2.060586962957911e-08, "loss": 3.9246, "step": 29324 }, { "epoch": 9.768551678187723, "grad_norm": 0.74609375, "learning_rate": 2.0545952697526572e-08, "loss": 3.9979, "step": 29325 }, { "epoch": 9.768884817189972, "grad_norm": 0.75390625, "learning_rate": 2.0486122884913195e-08, "loss": 3.9923, "step": 29326 }, { "epoch": 9.769217956192222, "grad_norm": 0.76171875, "learning_rate": 2.0426380192436756e-08, "loss": 4.0147, "step": 29327 }, { "epoch": 9.769551095194469, "grad_norm": 0.7734375, "learning_rate": 2.03667246207917e-08, "loss": 4.0139, "step": 29328 }, { "epoch": 9.769884234196718, "grad_norm": 0.765625, "learning_rate": 2.0307156170673302e-08, "loss": 3.9877, "step": 29329 }, { "epoch": 9.770217373198967, "grad_norm": 0.75390625, "learning_rate": 2.0247674842775176e-08, "loss": 4.0398, "step": 29330 }, { "epoch": 9.770550512201217, "grad_norm": 0.74609375, "learning_rate": 2.0188280637790936e-08, "loss": 4.0312, "step": 29331 }, { "epoch": 9.770883651203464, "grad_norm": 0.73046875, "learning_rate": 2.012897355641169e-08, "loss": 3.9623, "step": 29332 }, { "epoch": 9.771216790205713, "grad_norm": 0.76171875, "learning_rate": 2.006975359932939e-08, "loss": 4.0525, "step": 29333 }, { "epoch": 9.771549929207962, "grad_norm": 0.7265625, "learning_rate": 2.0010620767231814e-08, "loss": 4.0175, "step": 29334 }, { "epoch": 9.771883068210212, "grad_norm": 0.76953125, "learning_rate": 1.995157506081008e-08, "loss": 4.0245, "step": 29335 }, { "epoch": 9.772216207212459, "grad_norm": 0.79296875, "learning_rate": 1.989261648074947e-08, "loss": 3.9807, "step": 29336 }, { "epoch": 9.772549346214708, "grad_norm": 0.74609375, "learning_rate": 1.9833745027739435e-08, "loss": 3.9901, "step": 29337 }, { "epoch": 9.772882485216957, "grad_norm": 0.7734375, "learning_rate": 1.9774960702462764e-08, "loss": 3.9794, "step": 29338 }, { "epoch": 9.773215624219205, "grad_norm": 0.7578125, "learning_rate": 1.97162635056064e-08, "loss": 3.9676, "step": 29339 }, { "epoch": 9.773548763221454, "grad_norm": 0.76953125, "learning_rate": 1.965765343785314e-08, "loss": 3.9772, "step": 29340 }, { "epoch": 9.773881902223703, "grad_norm": 0.74609375, "learning_rate": 1.9599130499885766e-08, "loss": 4.0013, "step": 29341 }, { "epoch": 9.774215041225952, "grad_norm": 0.76953125, "learning_rate": 1.954069469238623e-08, "loss": 4.036, "step": 29342 }, { "epoch": 9.7745481802282, "grad_norm": 0.78515625, "learning_rate": 1.9482346016034823e-08, "loss": 3.9781, "step": 29343 }, { "epoch": 9.774881319230449, "grad_norm": 0.72265625, "learning_rate": 1.9424084471511838e-08, "loss": 3.9726, "step": 29344 }, { "epoch": 9.775214458232698, "grad_norm": 0.73828125, "learning_rate": 1.9365910059495062e-08, "loss": 4.0309, "step": 29345 }, { "epoch": 9.775547597234945, "grad_norm": 0.80859375, "learning_rate": 1.9307822780662287e-08, "loss": 4.0016, "step": 29346 }, { "epoch": 9.775880736237195, "grad_norm": 0.7890625, "learning_rate": 1.924982263568964e-08, "loss": 3.9926, "step": 29347 }, { "epoch": 9.776213875239444, "grad_norm": 0.74609375, "learning_rate": 1.9191909625254077e-08, "loss": 4.0058, "step": 29348 }, { "epoch": 9.776547014241693, "grad_norm": 0.7421875, "learning_rate": 1.913408375002923e-08, "loss": 3.9716, "step": 29349 }, { "epoch": 9.77688015324394, "grad_norm": 0.765625, "learning_rate": 1.9076345010688724e-08, "loss": 3.9861, "step": 29350 }, { "epoch": 9.77721329224619, "grad_norm": 0.81640625, "learning_rate": 1.901869340790452e-08, "loss": 4.0229, "step": 29351 }, { "epoch": 9.777546431248439, "grad_norm": 0.78515625, "learning_rate": 1.8961128942349414e-08, "loss": 4.0351, "step": 29352 }, { "epoch": 9.777879570250686, "grad_norm": 0.765625, "learning_rate": 1.8903651614693705e-08, "loss": 3.9737, "step": 29353 }, { "epoch": 9.778212709252935, "grad_norm": 0.765625, "learning_rate": 1.8846261425606027e-08, "loss": 4.0004, "step": 29354 }, { "epoch": 9.778545848255185, "grad_norm": 0.76171875, "learning_rate": 1.8788958375755006e-08, "loss": 4.0543, "step": 29355 }, { "epoch": 9.778878987257434, "grad_norm": 0.796875, "learning_rate": 1.873174246580761e-08, "loss": 4.0066, "step": 29356 }, { "epoch": 9.779212126259681, "grad_norm": 0.7578125, "learning_rate": 1.8674613696432474e-08, "loss": 3.9737, "step": 29357 }, { "epoch": 9.77954526526193, "grad_norm": 0.78515625, "learning_rate": 1.86175720682924e-08, "loss": 4.0132, "step": 29358 }, { "epoch": 9.77987840426418, "grad_norm": 0.73046875, "learning_rate": 1.856061758205352e-08, "loss": 4.0347, "step": 29359 }, { "epoch": 9.780211543266429, "grad_norm": 0.8046875, "learning_rate": 1.8503750238378637e-08, "loss": 3.9999, "step": 29360 }, { "epoch": 9.780544682268676, "grad_norm": 0.80078125, "learning_rate": 1.8446970037930556e-08, "loss": 4.0126, "step": 29361 }, { "epoch": 9.780877821270925, "grad_norm": 0.78125, "learning_rate": 1.8390276981370414e-08, "loss": 3.9232, "step": 29362 }, { "epoch": 9.781210960273174, "grad_norm": 0.7890625, "learning_rate": 1.8333671069357683e-08, "loss": 3.933, "step": 29363 }, { "epoch": 9.781544099275422, "grad_norm": 0.734375, "learning_rate": 1.827715230255267e-08, "loss": 3.8827, "step": 29364 }, { "epoch": 9.781877238277671, "grad_norm": 0.74609375, "learning_rate": 1.8220720681613178e-08, "loss": 3.9992, "step": 29365 }, { "epoch": 9.78221037727992, "grad_norm": 0.78125, "learning_rate": 1.816437620719702e-08, "loss": 3.9945, "step": 29366 }, { "epoch": 9.78254351628217, "grad_norm": 0.73828125, "learning_rate": 1.81081188799595e-08, "loss": 3.888, "step": 29367 }, { "epoch": 9.782876655284417, "grad_norm": 0.75, "learning_rate": 1.8051948700557596e-08, "loss": 4.0862, "step": 29368 }, { "epoch": 9.783209794286666, "grad_norm": 0.734375, "learning_rate": 1.799586566964495e-08, "loss": 3.9733, "step": 29369 }, { "epoch": 9.783542933288915, "grad_norm": 0.75390625, "learning_rate": 1.793986978787354e-08, "loss": 4.056, "step": 29370 }, { "epoch": 9.783876072291164, "grad_norm": 0.75, "learning_rate": 1.7883961055897015e-08, "loss": 3.9741, "step": 29371 }, { "epoch": 9.784209211293412, "grad_norm": 0.75390625, "learning_rate": 1.782813947436568e-08, "loss": 3.9318, "step": 29372 }, { "epoch": 9.784542350295661, "grad_norm": 0.7265625, "learning_rate": 1.7772405043930684e-08, "loss": 4.0271, "step": 29373 }, { "epoch": 9.78487548929791, "grad_norm": 0.76953125, "learning_rate": 1.7716757765240678e-08, "loss": 3.9796, "step": 29374 }, { "epoch": 9.785208628300158, "grad_norm": 0.71875, "learning_rate": 1.766119763894347e-08, "loss": 4.0213, "step": 29375 }, { "epoch": 9.785541767302407, "grad_norm": 0.70703125, "learning_rate": 1.7605724665686883e-08, "loss": 3.9709, "step": 29376 }, { "epoch": 9.785874906304656, "grad_norm": 0.76171875, "learning_rate": 1.7550338846117063e-08, "loss": 3.9845, "step": 29377 }, { "epoch": 9.786208045306905, "grad_norm": 0.74609375, "learning_rate": 1.7495040180878498e-08, "loss": 3.9176, "step": 29378 }, { "epoch": 9.786541184309153, "grad_norm": 0.8125, "learning_rate": 1.7439828670615665e-08, "loss": 3.9781, "step": 29379 }, { "epoch": 9.786874323311402, "grad_norm": 0.73828125, "learning_rate": 1.738470431597139e-08, "loss": 3.9452, "step": 29380 }, { "epoch": 9.787207462313651, "grad_norm": 0.83203125, "learning_rate": 1.7329667117587655e-08, "loss": 3.9837, "step": 29381 }, { "epoch": 9.787540601315898, "grad_norm": 0.79296875, "learning_rate": 1.727471707610645e-08, "loss": 3.9788, "step": 29382 }, { "epoch": 9.787873740318147, "grad_norm": 0.734375, "learning_rate": 1.7219854192167263e-08, "loss": 4.0026, "step": 29383 }, { "epoch": 9.788206879320397, "grad_norm": 0.8125, "learning_rate": 1.7165078466408747e-08, "loss": 3.9274, "step": 29384 }, { "epoch": 9.788540018322646, "grad_norm": 0.77734375, "learning_rate": 1.7110389899469558e-08, "loss": 3.9873, "step": 29385 }, { "epoch": 9.788873157324893, "grad_norm": 0.73828125, "learning_rate": 1.705578849198669e-08, "loss": 3.9997, "step": 29386 }, { "epoch": 9.789206296327142, "grad_norm": 0.7421875, "learning_rate": 1.7001274244595467e-08, "loss": 3.9521, "step": 29387 }, { "epoch": 9.789539435329392, "grad_norm": 0.75390625, "learning_rate": 1.6946847157931212e-08, "loss": 3.9327, "step": 29388 }, { "epoch": 9.789872574331639, "grad_norm": 0.77734375, "learning_rate": 1.6892507232627586e-08, "loss": 4.0031, "step": 29389 }, { "epoch": 9.790205713333888, "grad_norm": 0.7734375, "learning_rate": 1.6838254469318248e-08, "loss": 4.0269, "step": 29390 }, { "epoch": 9.790538852336137, "grad_norm": 0.7421875, "learning_rate": 1.6784088868635194e-08, "loss": 4.044, "step": 29391 }, { "epoch": 9.790871991338387, "grad_norm": 0.76171875, "learning_rate": 1.6730010431208752e-08, "loss": 3.9407, "step": 29392 }, { "epoch": 9.791205130340634, "grad_norm": 0.7421875, "learning_rate": 1.667601915766842e-08, "loss": 3.9639, "step": 29393 }, { "epoch": 9.791538269342883, "grad_norm": 0.7890625, "learning_rate": 1.6622115048643694e-08, "loss": 4.007, "step": 29394 }, { "epoch": 9.791871408345132, "grad_norm": 0.7421875, "learning_rate": 1.6568298104762404e-08, "loss": 3.9818, "step": 29395 }, { "epoch": 9.792204547347382, "grad_norm": 0.73046875, "learning_rate": 1.6514568326651547e-08, "loss": 3.9368, "step": 29396 }, { "epoch": 9.792537686349629, "grad_norm": 0.76953125, "learning_rate": 1.6460925714937292e-08, "loss": 3.9953, "step": 29397 }, { "epoch": 9.792870825351878, "grad_norm": 0.7734375, "learning_rate": 1.6407370270243306e-08, "loss": 4.0193, "step": 29398 }, { "epoch": 9.793203964354127, "grad_norm": 0.74609375, "learning_rate": 1.6353901993194088e-08, "loss": 3.9924, "step": 29399 }, { "epoch": 9.793537103356375, "grad_norm": 0.74609375, "learning_rate": 1.6300520884411642e-08, "loss": 4.0123, "step": 29400 }, { "epoch": 9.793870242358624, "grad_norm": 0.74609375, "learning_rate": 1.6247226944519633e-08, "loss": 3.9466, "step": 29401 }, { "epoch": 9.794203381360873, "grad_norm": 0.77734375, "learning_rate": 1.6194020174136738e-08, "loss": 3.9997, "step": 29402 }, { "epoch": 9.794536520363122, "grad_norm": 0.77734375, "learning_rate": 1.614090057388412e-08, "loss": 3.9892, "step": 29403 }, { "epoch": 9.79486965936537, "grad_norm": 0.7734375, "learning_rate": 1.6087868144379624e-08, "loss": 3.9866, "step": 29404 }, { "epoch": 9.795202798367619, "grad_norm": 0.76953125, "learning_rate": 1.603492288624192e-08, "loss": 3.9882, "step": 29405 }, { "epoch": 9.795535937369868, "grad_norm": 0.7578125, "learning_rate": 1.5982064800087183e-08, "loss": 4.0083, "step": 29406 }, { "epoch": 9.795869076372115, "grad_norm": 0.75, "learning_rate": 1.592929388653075e-08, "loss": 4.043, "step": 29407 }, { "epoch": 9.796202215374365, "grad_norm": 0.70703125, "learning_rate": 1.5876610146187132e-08, "loss": 3.9709, "step": 29408 }, { "epoch": 9.796535354376614, "grad_norm": 0.73828125, "learning_rate": 1.5824013579670836e-08, "loss": 3.9766, "step": 29409 }, { "epoch": 9.796868493378863, "grad_norm": 0.71484375, "learning_rate": 1.5771504187593877e-08, "loss": 3.9989, "step": 29410 }, { "epoch": 9.79720163238111, "grad_norm": 0.734375, "learning_rate": 1.5719081970568262e-08, "loss": 4.0039, "step": 29411 }, { "epoch": 9.79753477138336, "grad_norm": 0.78515625, "learning_rate": 1.5666746929204334e-08, "loss": 3.9278, "step": 29412 }, { "epoch": 9.797867910385609, "grad_norm": 0.75390625, "learning_rate": 1.561449906411244e-08, "loss": 3.98, "step": 29413 }, { "epoch": 9.798201049387856, "grad_norm": 0.69921875, "learning_rate": 1.55623383758996e-08, "loss": 4.011, "step": 29414 }, { "epoch": 9.798534188390105, "grad_norm": 0.7421875, "learning_rate": 1.5510264865174483e-08, "loss": 3.9319, "step": 29415 }, { "epoch": 9.798867327392355, "grad_norm": 0.734375, "learning_rate": 1.545827853254328e-08, "loss": 4.0129, "step": 29416 }, { "epoch": 9.799200466394604, "grad_norm": 0.77734375, "learning_rate": 1.5406379378612168e-08, "loss": 4.0174, "step": 29417 }, { "epoch": 9.799533605396851, "grad_norm": 0.74609375, "learning_rate": 1.5354567403984832e-08, "loss": 4.0521, "step": 29418 }, { "epoch": 9.7998667443991, "grad_norm": 0.77734375, "learning_rate": 1.5302842609264956e-08, "loss": 3.9399, "step": 29419 }, { "epoch": 9.80019988340135, "grad_norm": 0.75, "learning_rate": 1.525120499505539e-08, "loss": 3.9921, "step": 29420 }, { "epoch": 9.800533022403599, "grad_norm": 0.76171875, "learning_rate": 1.519965456195732e-08, "loss": 3.8836, "step": 29421 }, { "epoch": 9.800866161405846, "grad_norm": 0.75390625, "learning_rate": 1.51481913105711e-08, "loss": 4.0065, "step": 29422 }, { "epoch": 9.801199300408095, "grad_norm": 0.8359375, "learning_rate": 1.509681524149542e-08, "loss": 3.9735, "step": 29423 }, { "epoch": 9.801532439410344, "grad_norm": 0.72265625, "learning_rate": 1.504552635533063e-08, "loss": 3.8871, "step": 29424 }, { "epoch": 9.801865578412592, "grad_norm": 0.75390625, "learning_rate": 1.499432465267292e-08, "loss": 3.9548, "step": 29425 }, { "epoch": 9.802198717414841, "grad_norm": 0.72265625, "learning_rate": 1.494321013411848e-08, "loss": 3.9751, "step": 29426 }, { "epoch": 9.80253185641709, "grad_norm": 0.78515625, "learning_rate": 1.4892182800263499e-08, "loss": 4.0438, "step": 29427 }, { "epoch": 9.80286499541934, "grad_norm": 0.8203125, "learning_rate": 1.484124265170167e-08, "loss": 4.0529, "step": 29428 }, { "epoch": 9.803198134421587, "grad_norm": 0.7265625, "learning_rate": 1.4790389689026684e-08, "loss": 3.9401, "step": 29429 }, { "epoch": 9.803531273423836, "grad_norm": 0.7421875, "learning_rate": 1.4739623912830568e-08, "loss": 3.8812, "step": 29430 }, { "epoch": 9.803864412426085, "grad_norm": 0.78125, "learning_rate": 1.4688945323704517e-08, "loss": 4.0681, "step": 29431 }, { "epoch": 9.804197551428334, "grad_norm": 0.7109375, "learning_rate": 1.4638353922239722e-08, "loss": 3.9942, "step": 29432 }, { "epoch": 9.804530690430582, "grad_norm": 0.77734375, "learning_rate": 1.4587849709024881e-08, "loss": 3.9461, "step": 29433 }, { "epoch": 9.804863829432831, "grad_norm": 0.7734375, "learning_rate": 1.4537432684647856e-08, "loss": 4.0041, "step": 29434 }, { "epoch": 9.80519696843508, "grad_norm": 0.79296875, "learning_rate": 1.4487102849695677e-08, "loss": 3.9317, "step": 29435 }, { "epoch": 9.805530107437328, "grad_norm": 0.7421875, "learning_rate": 1.4436860204756208e-08, "loss": 3.9573, "step": 29436 }, { "epoch": 9.805863246439577, "grad_norm": 0.7734375, "learning_rate": 1.4386704750413148e-08, "loss": 4.0347, "step": 29437 }, { "epoch": 9.806196385441826, "grad_norm": 0.79296875, "learning_rate": 1.4336636487251032e-08, "loss": 3.9868, "step": 29438 }, { "epoch": 9.806529524444075, "grad_norm": 0.7109375, "learning_rate": 1.4286655415852723e-08, "loss": 3.999, "step": 29439 }, { "epoch": 9.806862663446323, "grad_norm": 0.7890625, "learning_rate": 1.4236761536801923e-08, "loss": 3.894, "step": 29440 }, { "epoch": 9.807195802448572, "grad_norm": 0.76953125, "learning_rate": 1.4186954850678169e-08, "loss": 4.0211, "step": 29441 }, { "epoch": 9.80752894145082, "grad_norm": 0.765625, "learning_rate": 1.4137235358061829e-08, "loss": 4.0023, "step": 29442 }, { "epoch": 9.807862080453068, "grad_norm": 0.7578125, "learning_rate": 1.4087603059533272e-08, "loss": 3.9853, "step": 29443 }, { "epoch": 9.808195219455317, "grad_norm": 0.7890625, "learning_rate": 1.4038057955668704e-08, "loss": 3.9369, "step": 29444 }, { "epoch": 9.808528358457567, "grad_norm": 0.734375, "learning_rate": 1.3988600047046829e-08, "loss": 3.9463, "step": 29445 }, { "epoch": 9.808861497459816, "grad_norm": 0.75, "learning_rate": 1.393922933424302e-08, "loss": 3.9443, "step": 29446 }, { "epoch": 9.809194636462063, "grad_norm": 0.7421875, "learning_rate": 1.3889945817831817e-08, "loss": 4.0092, "step": 29447 }, { "epoch": 9.809527775464312, "grad_norm": 0.72265625, "learning_rate": 1.3840749498388593e-08, "loss": 4.0347, "step": 29448 }, { "epoch": 9.809860914466562, "grad_norm": 0.765625, "learning_rate": 1.3791640376485392e-08, "loss": 3.93, "step": 29449 }, { "epoch": 9.810194053468809, "grad_norm": 0.73046875, "learning_rate": 1.3742618452694256e-08, "loss": 4.0027, "step": 29450 }, { "epoch": 9.810527192471058, "grad_norm": 0.73828125, "learning_rate": 1.3693683727586393e-08, "loss": 4.0093, "step": 29451 }, { "epoch": 9.810860331473307, "grad_norm": 0.76953125, "learning_rate": 1.364483620173218e-08, "loss": 4.0123, "step": 29452 }, { "epoch": 9.811193470475557, "grad_norm": 0.74609375, "learning_rate": 1.3596075875700331e-08, "loss": 3.9861, "step": 29453 }, { "epoch": 9.811526609477804, "grad_norm": 0.75, "learning_rate": 1.3547402750057892e-08, "loss": 3.8763, "step": 29454 }, { "epoch": 9.811859748480053, "grad_norm": 0.75, "learning_rate": 1.349881682537274e-08, "loss": 3.9961, "step": 29455 }, { "epoch": 9.812192887482302, "grad_norm": 0.765625, "learning_rate": 1.345031810221109e-08, "loss": 4.0873, "step": 29456 }, { "epoch": 9.812526026484552, "grad_norm": 0.7734375, "learning_rate": 1.3401906581137491e-08, "loss": 4.0413, "step": 29457 }, { "epoch": 9.812859165486799, "grad_norm": 0.7734375, "learning_rate": 1.3353582262715659e-08, "loss": 3.974, "step": 29458 }, { "epoch": 9.813192304489048, "grad_norm": 0.765625, "learning_rate": 1.3305345147508475e-08, "loss": 4.0238, "step": 29459 }, { "epoch": 9.813525443491297, "grad_norm": 0.76953125, "learning_rate": 1.325719523607799e-08, "loss": 3.9694, "step": 29460 }, { "epoch": 9.813858582493545, "grad_norm": 0.7421875, "learning_rate": 1.3209132528984591e-08, "loss": 4.0299, "step": 29461 }, { "epoch": 9.814191721495794, "grad_norm": 0.7578125, "learning_rate": 1.3161157026787829e-08, "loss": 4.0346, "step": 29462 }, { "epoch": 9.814524860498043, "grad_norm": 0.76953125, "learning_rate": 1.311326873004809e-08, "loss": 3.9891, "step": 29463 }, { "epoch": 9.814857999500292, "grad_norm": 0.75, "learning_rate": 1.3065467639322426e-08, "loss": 3.9763, "step": 29464 }, { "epoch": 9.81519113850254, "grad_norm": 0.80078125, "learning_rate": 1.3017753755166229e-08, "loss": 3.9711, "step": 29465 }, { "epoch": 9.815524277504789, "grad_norm": 0.80078125, "learning_rate": 1.2970127078137384e-08, "loss": 4.0111, "step": 29466 }, { "epoch": 9.815857416507038, "grad_norm": 0.73828125, "learning_rate": 1.2922587608789615e-08, "loss": 4.0381, "step": 29467 }, { "epoch": 9.816190555509285, "grad_norm": 0.80078125, "learning_rate": 1.2875135347676647e-08, "loss": 4.0124, "step": 29468 }, { "epoch": 9.816523694511535, "grad_norm": 0.7734375, "learning_rate": 1.2827770295351371e-08, "loss": 3.9765, "step": 29469 }, { "epoch": 9.816856833513784, "grad_norm": 0.7421875, "learning_rate": 1.2780492452365012e-08, "loss": 4.0702, "step": 29470 }, { "epoch": 9.817189972516033, "grad_norm": 0.77734375, "learning_rate": 1.2733301819268794e-08, "loss": 4.0142, "step": 29471 }, { "epoch": 9.81752311151828, "grad_norm": 0.7890625, "learning_rate": 1.2686198396611449e-08, "loss": 3.9685, "step": 29472 }, { "epoch": 9.81785625052053, "grad_norm": 0.765625, "learning_rate": 1.2639182184943366e-08, "loss": 3.9878, "step": 29473 }, { "epoch": 9.818189389522779, "grad_norm": 0.7421875, "learning_rate": 1.2592253184810776e-08, "loss": 4.0141, "step": 29474 }, { "epoch": 9.818522528525026, "grad_norm": 0.75390625, "learning_rate": 1.2545411396761574e-08, "loss": 3.9117, "step": 29475 }, { "epoch": 9.818855667527275, "grad_norm": 0.7109375, "learning_rate": 1.2498656821339494e-08, "loss": 3.9381, "step": 29476 }, { "epoch": 9.819188806529525, "grad_norm": 0.78515625, "learning_rate": 1.2451989459090762e-08, "loss": 3.8987, "step": 29477 }, { "epoch": 9.819521945531774, "grad_norm": 0.7421875, "learning_rate": 1.240540931055828e-08, "loss": 3.9561, "step": 29478 }, { "epoch": 9.819855084534021, "grad_norm": 0.73046875, "learning_rate": 1.2358916376284113e-08, "loss": 3.9775, "step": 29479 }, { "epoch": 9.82018822353627, "grad_norm": 0.73828125, "learning_rate": 1.231251065681116e-08, "loss": 3.979, "step": 29480 }, { "epoch": 9.82052136253852, "grad_norm": 0.7421875, "learning_rate": 1.2266192152678157e-08, "loss": 3.9339, "step": 29481 }, { "epoch": 9.820854501540769, "grad_norm": 0.7421875, "learning_rate": 1.2219960864426339e-08, "loss": 3.9148, "step": 29482 }, { "epoch": 9.821187640543016, "grad_norm": 0.765625, "learning_rate": 1.2173816792593607e-08, "loss": 3.9447, "step": 29483 }, { "epoch": 9.821520779545265, "grad_norm": 0.765625, "learning_rate": 1.2127759937717031e-08, "loss": 3.9705, "step": 29484 }, { "epoch": 9.821853918547514, "grad_norm": 0.80078125, "learning_rate": 1.2081790300333684e-08, "loss": 3.9652, "step": 29485 }, { "epoch": 9.822187057549762, "grad_norm": 0.73828125, "learning_rate": 1.2035907880978136e-08, "loss": 4.0322, "step": 29486 }, { "epoch": 9.822520196552011, "grad_norm": 0.76953125, "learning_rate": 1.1990112680185795e-08, "loss": 3.9931, "step": 29487 }, { "epoch": 9.82285333555426, "grad_norm": 0.78125, "learning_rate": 1.1944404698489564e-08, "loss": 3.9043, "step": 29488 }, { "epoch": 9.82318647455651, "grad_norm": 0.80859375, "learning_rate": 1.189878393642152e-08, "loss": 3.9438, "step": 29489 }, { "epoch": 9.823519613558757, "grad_norm": 0.73046875, "learning_rate": 1.1853250394513737e-08, "loss": 3.9442, "step": 29490 }, { "epoch": 9.823852752561006, "grad_norm": 0.77734375, "learning_rate": 1.1807804073296624e-08, "loss": 3.9427, "step": 29491 }, { "epoch": 9.824185891563255, "grad_norm": 0.79296875, "learning_rate": 1.1762444973298091e-08, "loss": 3.9616, "step": 29492 }, { "epoch": 9.824519030565504, "grad_norm": 0.7734375, "learning_rate": 1.171717309504855e-08, "loss": 3.965, "step": 29493 }, { "epoch": 9.824852169567752, "grad_norm": 0.76171875, "learning_rate": 1.1671988439074243e-08, "loss": 4.0365, "step": 29494 }, { "epoch": 9.825185308570001, "grad_norm": 0.74609375, "learning_rate": 1.162689100590142e-08, "loss": 3.9359, "step": 29495 }, { "epoch": 9.82551844757225, "grad_norm": 0.7421875, "learning_rate": 1.1581880796055489e-08, "loss": 3.9843, "step": 29496 }, { "epoch": 9.825851586574498, "grad_norm": 0.8671875, "learning_rate": 1.1536957810061033e-08, "loss": 3.9356, "step": 29497 }, { "epoch": 9.826184725576747, "grad_norm": 0.765625, "learning_rate": 1.1492122048440968e-08, "loss": 3.9414, "step": 29498 }, { "epoch": 9.826517864578996, "grad_norm": 0.8125, "learning_rate": 1.1447373511717374e-08, "loss": 3.8883, "step": 29499 }, { "epoch": 9.826851003581245, "grad_norm": 0.765625, "learning_rate": 1.1402712200411503e-08, "loss": 4.0731, "step": 29500 }, { "epoch": 9.827184142583492, "grad_norm": 0.76953125, "learning_rate": 1.1358138115044603e-08, "loss": 4.0205, "step": 29501 }, { "epoch": 9.827517281585742, "grad_norm": 0.73828125, "learning_rate": 1.1313651256134593e-08, "loss": 4.0025, "step": 29502 }, { "epoch": 9.82785042058799, "grad_norm": 0.76171875, "learning_rate": 1.1269251624199394e-08, "loss": 3.9702, "step": 29503 }, { "epoch": 9.828183559590238, "grad_norm": 0.75, "learning_rate": 1.1224939219757757e-08, "loss": 3.9995, "step": 29504 }, { "epoch": 9.828516698592487, "grad_norm": 0.78515625, "learning_rate": 1.1180714043325102e-08, "loss": 3.9388, "step": 29505 }, { "epoch": 9.828849837594737, "grad_norm": 0.765625, "learning_rate": 1.1136576095415186e-08, "loss": 4.0011, "step": 29506 }, { "epoch": 9.829182976596986, "grad_norm": 0.73828125, "learning_rate": 1.109252537654426e-08, "loss": 4.017, "step": 29507 }, { "epoch": 9.829516115599233, "grad_norm": 0.77734375, "learning_rate": 1.1048561887224418e-08, "loss": 3.9663, "step": 29508 }, { "epoch": 9.829849254601482, "grad_norm": 0.8203125, "learning_rate": 1.1004685627966914e-08, "loss": 3.9715, "step": 29509 }, { "epoch": 9.830182393603732, "grad_norm": 0.77734375, "learning_rate": 1.0960896599284676e-08, "loss": 3.9942, "step": 29510 }, { "epoch": 9.830515532605979, "grad_norm": 0.73046875, "learning_rate": 1.091719480168646e-08, "loss": 3.9188, "step": 29511 }, { "epoch": 9.830848671608228, "grad_norm": 0.79296875, "learning_rate": 1.0873580235681025e-08, "loss": 4.0321, "step": 29512 }, { "epoch": 9.831181810610477, "grad_norm": 0.76953125, "learning_rate": 1.0830052901777966e-08, "loss": 3.9964, "step": 29513 }, { "epoch": 9.831514949612727, "grad_norm": 0.8046875, "learning_rate": 1.078661280048271e-08, "loss": 4.0346, "step": 29514 }, { "epoch": 9.831848088614974, "grad_norm": 0.73828125, "learning_rate": 1.0743259932302352e-08, "loss": 3.9179, "step": 29515 }, { "epoch": 9.832181227617223, "grad_norm": 0.76953125, "learning_rate": 1.0699994297740657e-08, "loss": 3.9613, "step": 29516 }, { "epoch": 9.832514366619472, "grad_norm": 0.76953125, "learning_rate": 1.0656815897302218e-08, "loss": 3.9795, "step": 29517 }, { "epoch": 9.832847505621721, "grad_norm": 0.76953125, "learning_rate": 1.0613724731489972e-08, "loss": 3.9353, "step": 29518 }, { "epoch": 9.833180644623969, "grad_norm": 0.72265625, "learning_rate": 1.0570720800806011e-08, "loss": 3.9944, "step": 29519 }, { "epoch": 9.833513783626218, "grad_norm": 0.79296875, "learning_rate": 1.0527804105751604e-08, "loss": 4.0467, "step": 29520 }, { "epoch": 9.833846922628467, "grad_norm": 0.76953125, "learning_rate": 1.0484974646825518e-08, "loss": 4.0307, "step": 29521 }, { "epoch": 9.834180061630715, "grad_norm": 0.8046875, "learning_rate": 1.0442232424527354e-08, "loss": 3.9811, "step": 29522 }, { "epoch": 9.834513200632964, "grad_norm": 0.76953125, "learning_rate": 1.0399577439355046e-08, "loss": 3.9833, "step": 29523 }, { "epoch": 9.834846339635213, "grad_norm": 0.75, "learning_rate": 1.0357009691804865e-08, "loss": 4.0214, "step": 29524 }, { "epoch": 9.835179478637462, "grad_norm": 0.73828125, "learning_rate": 1.0314529182373078e-08, "loss": 4.0411, "step": 29525 }, { "epoch": 9.83551261763971, "grad_norm": 0.74609375, "learning_rate": 1.0272135911554292e-08, "loss": 3.996, "step": 29526 }, { "epoch": 9.835845756641959, "grad_norm": 0.77734375, "learning_rate": 1.0229829879842278e-08, "loss": 3.9898, "step": 29527 }, { "epoch": 9.836178895644208, "grad_norm": 0.71875, "learning_rate": 1.0187611087729975e-08, "loss": 4.0588, "step": 29528 }, { "epoch": 9.836512034646455, "grad_norm": 0.79296875, "learning_rate": 1.014547953570949e-08, "loss": 3.949, "step": 29529 }, { "epoch": 9.836845173648705, "grad_norm": 0.82421875, "learning_rate": 1.0103435224271262e-08, "loss": 3.9445, "step": 29530 }, { "epoch": 9.837178312650954, "grad_norm": 0.73828125, "learning_rate": 1.0061478153904068e-08, "loss": 4.0123, "step": 29531 }, { "epoch": 9.837511451653203, "grad_norm": 0.8203125, "learning_rate": 1.0019608325098351e-08, "loss": 3.988, "step": 29532 }, { "epoch": 9.83784459065545, "grad_norm": 0.7734375, "learning_rate": 9.977825738340385e-09, "loss": 3.9034, "step": 29533 }, { "epoch": 9.8381777296577, "grad_norm": 0.734375, "learning_rate": 9.936130394118115e-09, "loss": 3.8605, "step": 29534 }, { "epoch": 9.838510868659949, "grad_norm": 0.73046875, "learning_rate": 9.894522292916153e-09, "loss": 3.9608, "step": 29535 }, { "epoch": 9.838844007662196, "grad_norm": 0.74609375, "learning_rate": 9.853001435219112e-09, "loss": 3.9782, "step": 29536 }, { "epoch": 9.839177146664445, "grad_norm": 0.76953125, "learning_rate": 9.811567821510769e-09, "loss": 3.988, "step": 29537 }, { "epoch": 9.839510285666695, "grad_norm": 0.7578125, "learning_rate": 9.770221452274908e-09, "loss": 3.9585, "step": 29538 }, { "epoch": 9.839843424668944, "grad_norm": 0.765625, "learning_rate": 9.728962327991142e-09, "loss": 3.9565, "step": 29539 }, { "epoch": 9.840176563671191, "grad_norm": 0.72265625, "learning_rate": 9.687790449141587e-09, "loss": 4.0165, "step": 29540 }, { "epoch": 9.84050970267344, "grad_norm": 0.7734375, "learning_rate": 9.646705816205027e-09, "loss": 4.0162, "step": 29541 }, { "epoch": 9.84084284167569, "grad_norm": 0.77734375, "learning_rate": 9.605708429660243e-09, "loss": 3.9672, "step": 29542 }, { "epoch": 9.841175980677939, "grad_norm": 0.76953125, "learning_rate": 9.564798289985189e-09, "loss": 3.9642, "step": 29543 }, { "epoch": 9.841509119680186, "grad_norm": 0.75, "learning_rate": 9.523975397656148e-09, "loss": 4.0401, "step": 29544 }, { "epoch": 9.841842258682435, "grad_norm": 0.76953125, "learning_rate": 9.483239753147744e-09, "loss": 3.9413, "step": 29545 }, { "epoch": 9.842175397684684, "grad_norm": 0.7890625, "learning_rate": 9.442591356935427e-09, "loss": 3.9378, "step": 29546 }, { "epoch": 9.842508536686932, "grad_norm": 0.76953125, "learning_rate": 9.402030209492152e-09, "loss": 3.9734, "step": 29547 }, { "epoch": 9.842841675689181, "grad_norm": 0.7578125, "learning_rate": 9.361556311290875e-09, "loss": 3.9739, "step": 29548 }, { "epoch": 9.84317481469143, "grad_norm": 0.74609375, "learning_rate": 9.321169662802886e-09, "loss": 4.0219, "step": 29549 }, { "epoch": 9.84350795369368, "grad_norm": 0.765625, "learning_rate": 9.280870264498643e-09, "loss": 4.0077, "step": 29550 }, { "epoch": 9.843841092695927, "grad_norm": 0.76171875, "learning_rate": 9.240658116846934e-09, "loss": 4.0097, "step": 29551 }, { "epoch": 9.844174231698176, "grad_norm": 0.75390625, "learning_rate": 9.200533220317386e-09, "loss": 4.0072, "step": 29552 }, { "epoch": 9.844507370700425, "grad_norm": 0.7578125, "learning_rate": 9.160495575376294e-09, "loss": 3.9734, "step": 29553 }, { "epoch": 9.844840509702674, "grad_norm": 0.75, "learning_rate": 9.120545182489116e-09, "loss": 3.9798, "step": 29554 }, { "epoch": 9.845173648704922, "grad_norm": 0.73046875, "learning_rate": 9.080682042123812e-09, "loss": 3.9505, "step": 29555 }, { "epoch": 9.845506787707171, "grad_norm": 0.73828125, "learning_rate": 9.040906154742511e-09, "loss": 3.948, "step": 29556 }, { "epoch": 9.84583992670942, "grad_norm": 0.73046875, "learning_rate": 9.001217520809846e-09, "loss": 4.0313, "step": 29557 }, { "epoch": 9.846173065711668, "grad_norm": 0.73046875, "learning_rate": 8.96161614078711e-09, "loss": 4.0087, "step": 29558 }, { "epoch": 9.846506204713917, "grad_norm": 0.75, "learning_rate": 8.922102015135602e-09, "loss": 4.0038, "step": 29559 }, { "epoch": 9.846839343716166, "grad_norm": 0.76953125, "learning_rate": 8.882675144316621e-09, "loss": 3.973, "step": 29560 }, { "epoch": 9.847172482718415, "grad_norm": 0.80859375, "learning_rate": 8.843335528788965e-09, "loss": 4.0201, "step": 29561 }, { "epoch": 9.847505621720662, "grad_norm": 0.78515625, "learning_rate": 8.804083169009768e-09, "loss": 4.0175, "step": 29562 }, { "epoch": 9.847838760722912, "grad_norm": 0.77734375, "learning_rate": 8.764918065437833e-09, "loss": 4.0249, "step": 29563 }, { "epoch": 9.84817189972516, "grad_norm": 0.76953125, "learning_rate": 8.725840218527792e-09, "loss": 4.0082, "step": 29564 }, { "epoch": 9.848505038727408, "grad_norm": 0.7421875, "learning_rate": 8.686849628735949e-09, "loss": 3.9355, "step": 29565 }, { "epoch": 9.848838177729657, "grad_norm": 0.77734375, "learning_rate": 8.64794629651694e-09, "loss": 3.9977, "step": 29566 }, { "epoch": 9.849171316731907, "grad_norm": 0.71875, "learning_rate": 8.60913022232207e-09, "loss": 3.985, "step": 29567 }, { "epoch": 9.849504455734156, "grad_norm": 0.7578125, "learning_rate": 8.570401406605144e-09, "loss": 3.9354, "step": 29568 }, { "epoch": 9.849837594736403, "grad_norm": 0.75, "learning_rate": 8.531759849816634e-09, "loss": 3.9826, "step": 29569 }, { "epoch": 9.850170733738652, "grad_norm": 0.8125, "learning_rate": 8.49320555240618e-09, "loss": 3.9866, "step": 29570 }, { "epoch": 9.850503872740902, "grad_norm": 0.71875, "learning_rate": 8.454738514823424e-09, "loss": 3.9503, "step": 29571 }, { "epoch": 9.850837011743149, "grad_norm": 0.77734375, "learning_rate": 8.41635873751634e-09, "loss": 3.9604, "step": 29572 }, { "epoch": 9.851170150745398, "grad_norm": 0.75390625, "learning_rate": 8.378066220932069e-09, "loss": 4.0257, "step": 29573 }, { "epoch": 9.851503289747647, "grad_norm": 0.72265625, "learning_rate": 8.33986096551692e-09, "loss": 4.0199, "step": 29574 }, { "epoch": 9.851836428749897, "grad_norm": 0.71875, "learning_rate": 8.301742971714709e-09, "loss": 3.93, "step": 29575 }, { "epoch": 9.852169567752144, "grad_norm": 0.74609375, "learning_rate": 8.26371223997091e-09, "loss": 4.0096, "step": 29576 }, { "epoch": 9.852502706754393, "grad_norm": 0.78515625, "learning_rate": 8.225768770726839e-09, "loss": 3.9938, "step": 29577 }, { "epoch": 9.852835845756642, "grad_norm": 0.78125, "learning_rate": 8.187912564426303e-09, "loss": 3.9683, "step": 29578 }, { "epoch": 9.853168984758891, "grad_norm": 0.7265625, "learning_rate": 8.150143621508954e-09, "loss": 4.0291, "step": 29579 }, { "epoch": 9.853502123761139, "grad_norm": 0.75390625, "learning_rate": 8.112461942415273e-09, "loss": 3.9568, "step": 29580 }, { "epoch": 9.853835262763388, "grad_norm": 0.79296875, "learning_rate": 8.074867527583241e-09, "loss": 3.9373, "step": 29581 }, { "epoch": 9.854168401765637, "grad_norm": 0.76171875, "learning_rate": 8.037360377452508e-09, "loss": 3.9475, "step": 29582 }, { "epoch": 9.854501540767885, "grad_norm": 0.75390625, "learning_rate": 7.999940492458558e-09, "loss": 4.0316, "step": 29583 }, { "epoch": 9.854834679770134, "grad_norm": 0.8125, "learning_rate": 7.962607873037708e-09, "loss": 3.9782, "step": 29584 }, { "epoch": 9.855167818772383, "grad_norm": 0.71875, "learning_rate": 7.925362519624613e-09, "loss": 3.9858, "step": 29585 }, { "epoch": 9.855500957774632, "grad_norm": 0.74609375, "learning_rate": 7.888204432653922e-09, "loss": 4.0139, "step": 29586 }, { "epoch": 9.85583409677688, "grad_norm": 0.77734375, "learning_rate": 7.85113361255696e-09, "loss": 4.0512, "step": 29587 }, { "epoch": 9.856167235779129, "grad_norm": 0.75, "learning_rate": 7.814150059766712e-09, "loss": 3.9217, "step": 29588 }, { "epoch": 9.856500374781378, "grad_norm": 0.78125, "learning_rate": 7.777253774713667e-09, "loss": 3.9937, "step": 29589 }, { "epoch": 9.856833513783627, "grad_norm": 0.7578125, "learning_rate": 7.74044475782748e-09, "loss": 3.9498, "step": 29590 }, { "epoch": 9.857166652785875, "grad_norm": 0.74609375, "learning_rate": 7.703723009536145e-09, "loss": 3.9438, "step": 29591 }, { "epoch": 9.857499791788124, "grad_norm": 0.765625, "learning_rate": 7.667088530268484e-09, "loss": 3.9453, "step": 29592 }, { "epoch": 9.857832930790373, "grad_norm": 0.76171875, "learning_rate": 7.630541320451657e-09, "loss": 3.9459, "step": 29593 }, { "epoch": 9.85816606979262, "grad_norm": 0.76171875, "learning_rate": 7.594081380509489e-09, "loss": 4.015, "step": 29594 }, { "epoch": 9.85849920879487, "grad_norm": 0.765625, "learning_rate": 7.557708710868305e-09, "loss": 4.0367, "step": 29595 }, { "epoch": 9.858832347797119, "grad_norm": 0.77734375, "learning_rate": 7.521423311951103e-09, "loss": 4.0043, "step": 29596 }, { "epoch": 9.859165486799366, "grad_norm": 0.83203125, "learning_rate": 7.485225184180877e-09, "loss": 4.0014, "step": 29597 }, { "epoch": 9.859498625801615, "grad_norm": 0.8125, "learning_rate": 7.449114327978956e-09, "loss": 4.0252, "step": 29598 }, { "epoch": 9.859831764803864, "grad_norm": 0.75, "learning_rate": 7.413090743765838e-09, "loss": 4.0169, "step": 29599 }, { "epoch": 9.860164903806114, "grad_norm": 0.75, "learning_rate": 7.377154431961186e-09, "loss": 4.0444, "step": 29600 }, { "epoch": 9.860498042808361, "grad_norm": 0.75, "learning_rate": 7.3413053929846654e-09, "loss": 3.9638, "step": 29601 }, { "epoch": 9.86083118181061, "grad_norm": 0.76953125, "learning_rate": 7.305543627251776e-09, "loss": 4.0577, "step": 29602 }, { "epoch": 9.86116432081286, "grad_norm": 0.7421875, "learning_rate": 7.269869135180518e-09, "loss": 3.9412, "step": 29603 }, { "epoch": 9.861497459815109, "grad_norm": 0.73828125, "learning_rate": 7.234281917185559e-09, "loss": 3.9229, "step": 29604 }, { "epoch": 9.861830598817356, "grad_norm": 0.765625, "learning_rate": 7.1987819736824e-09, "loss": 3.9499, "step": 29605 }, { "epoch": 9.862163737819605, "grad_norm": 0.73046875, "learning_rate": 7.163369305084044e-09, "loss": 3.9346, "step": 29606 }, { "epoch": 9.862496876821854, "grad_norm": 0.75, "learning_rate": 7.1280439118034944e-09, "loss": 3.9557, "step": 29607 }, { "epoch": 9.862830015824102, "grad_norm": 0.7265625, "learning_rate": 7.0928057942504234e-09, "loss": 3.9912, "step": 29608 }, { "epoch": 9.863163154826351, "grad_norm": 0.73046875, "learning_rate": 7.057654952837833e-09, "loss": 4.0272, "step": 29609 }, { "epoch": 9.8634962938286, "grad_norm": 0.76953125, "learning_rate": 7.022591387972899e-09, "loss": 3.9949, "step": 29610 }, { "epoch": 9.86382943283085, "grad_norm": 0.7734375, "learning_rate": 6.987615100065292e-09, "loss": 3.9968, "step": 29611 }, { "epoch": 9.864162571833097, "grad_norm": 0.75, "learning_rate": 6.952726089522188e-09, "loss": 4.0184, "step": 29612 }, { "epoch": 9.864495710835346, "grad_norm": 0.75, "learning_rate": 6.91792435675076e-09, "loss": 4.0409, "step": 29613 }, { "epoch": 9.864828849837595, "grad_norm": 0.77734375, "learning_rate": 6.88320990215402e-09, "loss": 4.0039, "step": 29614 }, { "epoch": 9.865161988839844, "grad_norm": 0.765625, "learning_rate": 6.848582726139141e-09, "loss": 4.0179, "step": 29615 }, { "epoch": 9.865495127842092, "grad_norm": 0.7421875, "learning_rate": 6.814042829107469e-09, "loss": 4.0177, "step": 29616 }, { "epoch": 9.86582826684434, "grad_norm": 0.76953125, "learning_rate": 6.779590211462017e-09, "loss": 3.9363, "step": 29617 }, { "epoch": 9.86616140584659, "grad_norm": 0.77734375, "learning_rate": 6.745224873604128e-09, "loss": 3.9625, "step": 29618 }, { "epoch": 9.866494544848837, "grad_norm": 0.83203125, "learning_rate": 6.710946815933483e-09, "loss": 4.0307, "step": 29619 }, { "epoch": 9.866827683851087, "grad_norm": 0.7578125, "learning_rate": 6.6767560388505955e-09, "loss": 4.0077, "step": 29620 }, { "epoch": 9.867160822853336, "grad_norm": 0.796875, "learning_rate": 6.6426525427526475e-09, "loss": 3.9497, "step": 29621 }, { "epoch": 9.867493961855585, "grad_norm": 0.71875, "learning_rate": 6.608636328037654e-09, "loss": 3.9381, "step": 29622 }, { "epoch": 9.867827100857832, "grad_norm": 0.75, "learning_rate": 6.5747073951011315e-09, "loss": 3.9786, "step": 29623 }, { "epoch": 9.868160239860082, "grad_norm": 0.77734375, "learning_rate": 6.540865744338598e-09, "loss": 4.028, "step": 29624 }, { "epoch": 9.86849337886233, "grad_norm": 0.75, "learning_rate": 6.5071113761439034e-09, "loss": 3.9559, "step": 29625 }, { "epoch": 9.868826517864578, "grad_norm": 0.80078125, "learning_rate": 6.473444290910902e-09, "loss": 3.9669, "step": 29626 }, { "epoch": 9.869159656866827, "grad_norm": 0.76953125, "learning_rate": 6.439864489030945e-09, "loss": 3.9294, "step": 29627 }, { "epoch": 9.869492795869077, "grad_norm": 0.76171875, "learning_rate": 6.406371970895386e-09, "loss": 3.9964, "step": 29628 }, { "epoch": 9.869825934871326, "grad_norm": 0.73046875, "learning_rate": 6.372966736894747e-09, "loss": 3.9992, "step": 29629 }, { "epoch": 9.870159073873573, "grad_norm": 0.71875, "learning_rate": 6.3396487874178845e-09, "loss": 4.0519, "step": 29630 }, { "epoch": 9.870492212875822, "grad_norm": 0.7734375, "learning_rate": 6.3064181228528195e-09, "loss": 3.9742, "step": 29631 }, { "epoch": 9.870825351878072, "grad_norm": 0.734375, "learning_rate": 6.273274743586743e-09, "loss": 4.0165, "step": 29632 }, { "epoch": 9.871158490880319, "grad_norm": 0.78125, "learning_rate": 6.240218650005181e-09, "loss": 3.9745, "step": 29633 }, { "epoch": 9.871491629882568, "grad_norm": 0.7578125, "learning_rate": 6.207249842494489e-09, "loss": 3.9858, "step": 29634 }, { "epoch": 9.871824768884817, "grad_norm": 0.79296875, "learning_rate": 6.174368321436863e-09, "loss": 4.0139, "step": 29635 }, { "epoch": 9.872157907887066, "grad_norm": 0.734375, "learning_rate": 6.141574087216994e-09, "loss": 3.989, "step": 29636 }, { "epoch": 9.872491046889314, "grad_norm": 0.73046875, "learning_rate": 6.108867140216245e-09, "loss": 3.9396, "step": 29637 }, { "epoch": 9.872824185891563, "grad_norm": 0.73828125, "learning_rate": 6.076247480815145e-09, "loss": 3.9805, "step": 29638 }, { "epoch": 9.873157324893812, "grad_norm": 0.7421875, "learning_rate": 6.043715109393388e-09, "loss": 3.9527, "step": 29639 }, { "epoch": 9.873490463896061, "grad_norm": 0.75, "learning_rate": 6.011270026331506e-09, "loss": 4.0059, "step": 29640 }, { "epoch": 9.873823602898309, "grad_norm": 0.76953125, "learning_rate": 5.978912232005862e-09, "loss": 3.9847, "step": 29641 }, { "epoch": 9.874156741900558, "grad_norm": 0.75390625, "learning_rate": 5.946641726793656e-09, "loss": 3.9366, "step": 29642 }, { "epoch": 9.874489880902807, "grad_norm": 0.7578125, "learning_rate": 5.914458511071252e-09, "loss": 3.9367, "step": 29643 }, { "epoch": 9.874823019905055, "grad_norm": 0.78125, "learning_rate": 5.8823625852125194e-09, "loss": 3.9699, "step": 29644 }, { "epoch": 9.875156158907304, "grad_norm": 0.78125, "learning_rate": 5.8503539495929905e-09, "loss": 4.0217, "step": 29645 }, { "epoch": 9.875489297909553, "grad_norm": 0.7890625, "learning_rate": 5.818432604584034e-09, "loss": 3.9564, "step": 29646 }, { "epoch": 9.875822436911802, "grad_norm": 0.77734375, "learning_rate": 5.786598550557853e-09, "loss": 4.0347, "step": 29647 }, { "epoch": 9.87615557591405, "grad_norm": 0.73828125, "learning_rate": 5.7548517878858175e-09, "loss": 4.0045, "step": 29648 }, { "epoch": 9.876488714916299, "grad_norm": 0.7421875, "learning_rate": 5.723192316937631e-09, "loss": 3.9307, "step": 29649 }, { "epoch": 9.876821853918548, "grad_norm": 0.765625, "learning_rate": 5.6916201380805e-09, "loss": 3.9244, "step": 29650 }, { "epoch": 9.877154992920797, "grad_norm": 0.78515625, "learning_rate": 5.660135251684129e-09, "loss": 3.9238, "step": 29651 }, { "epoch": 9.877488131923045, "grad_norm": 0.76953125, "learning_rate": 5.628737658114891e-09, "loss": 3.9776, "step": 29652 }, { "epoch": 9.877821270925294, "grad_norm": 0.7734375, "learning_rate": 5.597427357737495e-09, "loss": 3.9571, "step": 29653 }, { "epoch": 9.878154409927543, "grad_norm": 0.8046875, "learning_rate": 5.566204350916648e-09, "loss": 3.9124, "step": 29654 }, { "epoch": 9.87848754892979, "grad_norm": 0.78125, "learning_rate": 5.5350686380170604e-09, "loss": 4.0564, "step": 29655 }, { "epoch": 9.87882068793204, "grad_norm": 0.73828125, "learning_rate": 5.5040202194009404e-09, "loss": 4.015, "step": 29656 }, { "epoch": 9.879153826934289, "grad_norm": 0.765625, "learning_rate": 5.473059095430499e-09, "loss": 3.9711, "step": 29657 }, { "epoch": 9.879486965936536, "grad_norm": 0.78515625, "learning_rate": 5.442185266465449e-09, "loss": 3.9416, "step": 29658 }, { "epoch": 9.879820104938785, "grad_norm": 0.80859375, "learning_rate": 5.411398732865502e-09, "loss": 4.0454, "step": 29659 }, { "epoch": 9.880153243941034, "grad_norm": 0.765625, "learning_rate": 5.380699494989538e-09, "loss": 4.0047, "step": 29660 }, { "epoch": 9.880486382943284, "grad_norm": 0.76171875, "learning_rate": 5.3500875531956036e-09, "loss": 3.9418, "step": 29661 }, { "epoch": 9.880819521945531, "grad_norm": 0.7578125, "learning_rate": 5.319562907839248e-09, "loss": 3.959, "step": 29662 }, { "epoch": 9.88115266094778, "grad_norm": 0.75, "learning_rate": 5.289125559276853e-09, "loss": 4.0132, "step": 29663 }, { "epoch": 9.88148579995003, "grad_norm": 0.7890625, "learning_rate": 5.258775507862301e-09, "loss": 4.0261, "step": 29664 }, { "epoch": 9.881818938952279, "grad_norm": 0.76171875, "learning_rate": 5.228512753949477e-09, "loss": 3.9454, "step": 29665 }, { "epoch": 9.882152077954526, "grad_norm": 0.74609375, "learning_rate": 5.198337297891431e-09, "loss": 4.0229, "step": 29666 }, { "epoch": 9.882485216956775, "grad_norm": 0.7109375, "learning_rate": 5.168249140037884e-09, "loss": 4.0417, "step": 29667 }, { "epoch": 9.882818355959024, "grad_norm": 0.75, "learning_rate": 5.138248280741053e-09, "loss": 3.9558, "step": 29668 }, { "epoch": 9.883151494961272, "grad_norm": 0.7578125, "learning_rate": 5.108334720349828e-09, "loss": 3.9211, "step": 29669 }, { "epoch": 9.883484633963521, "grad_norm": 0.7109375, "learning_rate": 5.078508459213094e-09, "loss": 4.0585, "step": 29670 }, { "epoch": 9.88381777296577, "grad_norm": 0.7421875, "learning_rate": 5.048769497677242e-09, "loss": 3.9618, "step": 29671 }, { "epoch": 9.88415091196802, "grad_norm": 0.75390625, "learning_rate": 5.0191178360886606e-09, "loss": 3.9881, "step": 29672 }, { "epoch": 9.884484050970267, "grad_norm": 0.7734375, "learning_rate": 4.989553474794573e-09, "loss": 4.0218, "step": 29673 }, { "epoch": 9.884817189972516, "grad_norm": 0.7734375, "learning_rate": 4.960076414137205e-09, "loss": 3.975, "step": 29674 }, { "epoch": 9.885150328974765, "grad_norm": 0.734375, "learning_rate": 4.930686654460448e-09, "loss": 3.9933, "step": 29675 }, { "epoch": 9.885483467977014, "grad_norm": 0.7421875, "learning_rate": 4.901384196107361e-09, "loss": 3.9414, "step": 29676 }, { "epoch": 9.885816606979262, "grad_norm": 0.78125, "learning_rate": 4.872169039418506e-09, "loss": 3.9788, "step": 29677 }, { "epoch": 9.88614974598151, "grad_norm": 0.75, "learning_rate": 4.843041184735275e-09, "loss": 4.0021, "step": 29678 }, { "epoch": 9.88648288498376, "grad_norm": 0.7421875, "learning_rate": 4.8140006323948995e-09, "loss": 4.0012, "step": 29679 }, { "epoch": 9.886816023986007, "grad_norm": 0.7421875, "learning_rate": 4.785047382737939e-09, "loss": 3.9155, "step": 29680 }, { "epoch": 9.887149162988257, "grad_norm": 0.796875, "learning_rate": 4.75618143609996e-09, "loss": 3.9365, "step": 29681 }, { "epoch": 9.887482301990506, "grad_norm": 0.734375, "learning_rate": 4.727402792817359e-09, "loss": 3.9545, "step": 29682 }, { "epoch": 9.887815440992755, "grad_norm": 0.75390625, "learning_rate": 4.698711453226534e-09, "loss": 4.0153, "step": 29683 }, { "epoch": 9.888148579995002, "grad_norm": 0.79296875, "learning_rate": 4.670107417661385e-09, "loss": 3.9652, "step": 29684 }, { "epoch": 9.888481718997252, "grad_norm": 0.78125, "learning_rate": 4.641590686453312e-09, "loss": 3.965, "step": 29685 }, { "epoch": 9.8888148579995, "grad_norm": 0.72265625, "learning_rate": 4.613161259936216e-09, "loss": 3.9288, "step": 29686 }, { "epoch": 9.889147997001748, "grad_norm": 0.7578125, "learning_rate": 4.584819138441498e-09, "loss": 4.0283, "step": 29687 }, { "epoch": 9.889481136003997, "grad_norm": 0.734375, "learning_rate": 4.556564322298062e-09, "loss": 4.0738, "step": 29688 }, { "epoch": 9.889814275006247, "grad_norm": 0.75, "learning_rate": 4.528396811835644e-09, "loss": 3.97, "step": 29689 }, { "epoch": 9.890147414008496, "grad_norm": 0.76171875, "learning_rate": 4.5003166073823154e-09, "loss": 4.0069, "step": 29690 }, { "epoch": 9.890480553010743, "grad_norm": 0.73828125, "learning_rate": 4.472323709265314e-09, "loss": 3.9977, "step": 29691 }, { "epoch": 9.890813692012992, "grad_norm": 0.75, "learning_rate": 4.444418117809379e-09, "loss": 4.0317, "step": 29692 }, { "epoch": 9.891146831015242, "grad_norm": 0.73828125, "learning_rate": 4.41659983334175e-09, "loss": 4.0135, "step": 29693 }, { "epoch": 9.891479970017489, "grad_norm": 0.73046875, "learning_rate": 4.388868856184669e-09, "loss": 3.9749, "step": 29694 }, { "epoch": 9.891813109019738, "grad_norm": 0.78125, "learning_rate": 4.3612251866620435e-09, "loss": 3.9876, "step": 29695 }, { "epoch": 9.892146248021987, "grad_norm": 0.734375, "learning_rate": 4.333668825096115e-09, "loss": 3.8989, "step": 29696 }, { "epoch": 9.892479387024236, "grad_norm": 0.79296875, "learning_rate": 4.306199771806629e-09, "loss": 4.0177, "step": 29697 }, { "epoch": 9.892812526026484, "grad_norm": 0.7890625, "learning_rate": 4.278818027114994e-09, "loss": 4.0519, "step": 29698 }, { "epoch": 9.893145665028733, "grad_norm": 0.77734375, "learning_rate": 4.251523591338458e-09, "loss": 3.96, "step": 29699 }, { "epoch": 9.893478804030982, "grad_norm": 0.73046875, "learning_rate": 4.2243164647967626e-09, "loss": 3.9363, "step": 29700 }, { "epoch": 9.893811943033231, "grad_norm": 0.80859375, "learning_rate": 4.19719664780549e-09, "loss": 3.954, "step": 29701 }, { "epoch": 9.894145082035479, "grad_norm": 0.73046875, "learning_rate": 4.170164140681054e-09, "loss": 3.9986, "step": 29702 }, { "epoch": 9.894478221037728, "grad_norm": 0.765625, "learning_rate": 4.143218943738203e-09, "loss": 4.0397, "step": 29703 }, { "epoch": 9.894811360039977, "grad_norm": 0.7265625, "learning_rate": 4.116361057290851e-09, "loss": 3.9315, "step": 29704 }, { "epoch": 9.895144499042225, "grad_norm": 0.75390625, "learning_rate": 4.089590481652083e-09, "loss": 3.9651, "step": 29705 }, { "epoch": 9.895477638044474, "grad_norm": 0.7734375, "learning_rate": 4.062907217133316e-09, "loss": 3.9391, "step": 29706 }, { "epoch": 9.895810777046723, "grad_norm": 0.7734375, "learning_rate": 4.0363112640459665e-09, "loss": 3.9705, "step": 29707 }, { "epoch": 9.896143916048972, "grad_norm": 0.78515625, "learning_rate": 4.009802622698955e-09, "loss": 3.9913, "step": 29708 }, { "epoch": 9.89647705505122, "grad_norm": 0.73828125, "learning_rate": 3.9833812934012005e-09, "loss": 3.9945, "step": 29709 }, { "epoch": 9.896810194053469, "grad_norm": 0.74609375, "learning_rate": 3.95704727646079e-09, "loss": 4.015, "step": 29710 }, { "epoch": 9.897143333055718, "grad_norm": 0.765625, "learning_rate": 3.930800572184978e-09, "loss": 3.9909, "step": 29711 }, { "epoch": 9.897476472057967, "grad_norm": 0.78515625, "learning_rate": 3.9046411808785206e-09, "loss": 4.0055, "step": 29712 }, { "epoch": 9.897809611060215, "grad_norm": 0.79296875, "learning_rate": 3.878569102846174e-09, "loss": 3.9599, "step": 29713 }, { "epoch": 9.898142750062464, "grad_norm": 0.7578125, "learning_rate": 3.852584338391862e-09, "loss": 4.0462, "step": 29714 }, { "epoch": 9.898475889064713, "grad_norm": 0.80078125, "learning_rate": 3.826686887818676e-09, "loss": 4.0191, "step": 29715 }, { "epoch": 9.89880902806696, "grad_norm": 0.71875, "learning_rate": 3.80087675142804e-09, "loss": 3.9735, "step": 29716 }, { "epoch": 9.89914216706921, "grad_norm": 0.75390625, "learning_rate": 3.775153929519715e-09, "loss": 3.9992, "step": 29717 }, { "epoch": 9.899475306071459, "grad_norm": 0.8203125, "learning_rate": 3.749518422395126e-09, "loss": 3.9853, "step": 29718 }, { "epoch": 9.899808445073708, "grad_norm": 0.76171875, "learning_rate": 3.723970230350704e-09, "loss": 4.0361, "step": 29719 }, { "epoch": 9.900141584075955, "grad_norm": 0.7578125, "learning_rate": 3.698509353686208e-09, "loss": 3.9981, "step": 29720 }, { "epoch": 9.900474723078204, "grad_norm": 0.75390625, "learning_rate": 3.6731357926964025e-09, "loss": 3.9841, "step": 29721 }, { "epoch": 9.900807862080454, "grad_norm": 0.76171875, "learning_rate": 3.6478495476777176e-09, "loss": 3.9738, "step": 29722 }, { "epoch": 9.901141001082701, "grad_norm": 0.76953125, "learning_rate": 3.6226506189240837e-09, "loss": 4.0496, "step": 29723 }, { "epoch": 9.90147414008495, "grad_norm": 0.7734375, "learning_rate": 3.597539006730266e-09, "loss": 4.0136, "step": 29724 }, { "epoch": 9.9018072790872, "grad_norm": 0.75390625, "learning_rate": 3.572514711387698e-09, "loss": 3.9878, "step": 29725 }, { "epoch": 9.902140418089449, "grad_norm": 0.78125, "learning_rate": 3.5475777331878125e-09, "loss": 3.9392, "step": 29726 }, { "epoch": 9.902473557091696, "grad_norm": 0.76171875, "learning_rate": 3.5227280724212108e-09, "loss": 3.9738, "step": 29727 }, { "epoch": 9.902806696093945, "grad_norm": 0.75390625, "learning_rate": 3.4979657293776614e-09, "loss": 3.9657, "step": 29728 }, { "epoch": 9.903139835096194, "grad_norm": 0.78515625, "learning_rate": 3.4732907043460995e-09, "loss": 3.9664, "step": 29729 }, { "epoch": 9.903472974098442, "grad_norm": 0.796875, "learning_rate": 3.4487029976121298e-09, "loss": 4.0769, "step": 29730 }, { "epoch": 9.903806113100691, "grad_norm": 0.73828125, "learning_rate": 3.4242026094638555e-09, "loss": 4.0161, "step": 29731 }, { "epoch": 9.90413925210294, "grad_norm": 0.76953125, "learning_rate": 3.399789540186049e-09, "loss": 3.9389, "step": 29732 }, { "epoch": 9.90447239110519, "grad_norm": 0.7578125, "learning_rate": 3.3754637900634824e-09, "loss": 3.9718, "step": 29733 }, { "epoch": 9.904805530107437, "grad_norm": 0.75390625, "learning_rate": 3.3512253593784294e-09, "loss": 4.0183, "step": 29734 }, { "epoch": 9.905138669109686, "grad_norm": 0.796875, "learning_rate": 3.3270742484139973e-09, "loss": 3.9974, "step": 29735 }, { "epoch": 9.905471808111935, "grad_norm": 0.7421875, "learning_rate": 3.3030104574516275e-09, "loss": 4.0075, "step": 29736 }, { "epoch": 9.905804947114184, "grad_norm": 0.765625, "learning_rate": 3.279033986771096e-09, "loss": 3.9651, "step": 29737 }, { "epoch": 9.906138086116432, "grad_norm": 0.74609375, "learning_rate": 3.2551448366521797e-09, "loss": 3.9448, "step": 29738 }, { "epoch": 9.90647122511868, "grad_norm": 0.73046875, "learning_rate": 3.2313430073729886e-09, "loss": 3.9581, "step": 29739 }, { "epoch": 9.90680436412093, "grad_norm": 0.7265625, "learning_rate": 3.2076284992099693e-09, "loss": 3.985, "step": 29740 }, { "epoch": 9.907137503123177, "grad_norm": 0.734375, "learning_rate": 3.1840013124412317e-09, "loss": 4.051, "step": 29741 }, { "epoch": 9.907470642125427, "grad_norm": 0.765625, "learning_rate": 3.1604614473398907e-09, "loss": 3.9543, "step": 29742 }, { "epoch": 9.907803781127676, "grad_norm": 0.71484375, "learning_rate": 3.13700890418156e-09, "loss": 4.0227, "step": 29743 }, { "epoch": 9.908136920129925, "grad_norm": 0.75, "learning_rate": 3.1136436832385207e-09, "loss": 4.012, "step": 29744 }, { "epoch": 9.908470059132172, "grad_norm": 0.75390625, "learning_rate": 3.090365784783056e-09, "loss": 4.0649, "step": 29745 }, { "epoch": 9.908803198134422, "grad_norm": 0.7578125, "learning_rate": 3.067175209087447e-09, "loss": 3.918, "step": 29746 }, { "epoch": 9.90913633713667, "grad_norm": 0.7578125, "learning_rate": 3.0440719564198137e-09, "loss": 3.993, "step": 29747 }, { "epoch": 9.909469476138918, "grad_norm": 0.7421875, "learning_rate": 3.021056027051605e-09, "loss": 3.9387, "step": 29748 }, { "epoch": 9.909802615141167, "grad_norm": 0.7421875, "learning_rate": 2.998127421248442e-09, "loss": 3.9736, "step": 29749 }, { "epoch": 9.910135754143417, "grad_norm": 0.78515625, "learning_rate": 2.9752861392792764e-09, "loss": 4.021, "step": 29750 }, { "epoch": 9.910468893145666, "grad_norm": 0.74609375, "learning_rate": 2.9525321814097285e-09, "loss": 4.0882, "step": 29751 }, { "epoch": 9.910802032147913, "grad_norm": 0.74609375, "learning_rate": 2.929865547903754e-09, "loss": 3.9807, "step": 29752 }, { "epoch": 9.911135171150162, "grad_norm": 0.7578125, "learning_rate": 2.907286239026974e-09, "loss": 4.0098, "step": 29753 }, { "epoch": 9.911468310152411, "grad_norm": 0.74609375, "learning_rate": 2.8847942550416783e-09, "loss": 3.9111, "step": 29754 }, { "epoch": 9.911801449154659, "grad_norm": 0.75, "learning_rate": 2.862389596209325e-09, "loss": 3.9454, "step": 29755 }, { "epoch": 9.912134588156908, "grad_norm": 0.7890625, "learning_rate": 2.8400722627913713e-09, "loss": 3.9588, "step": 29756 }, { "epoch": 9.912467727159157, "grad_norm": 0.76171875, "learning_rate": 2.817842255048442e-09, "loss": 3.9841, "step": 29757 }, { "epoch": 9.912800866161406, "grad_norm": 0.765625, "learning_rate": 2.795699573237831e-09, "loss": 4.0124, "step": 29758 }, { "epoch": 9.913134005163654, "grad_norm": 0.78125, "learning_rate": 2.773644217619331e-09, "loss": 4.024, "step": 29759 }, { "epoch": 9.913467144165903, "grad_norm": 0.7578125, "learning_rate": 2.751676188448571e-09, "loss": 3.9572, "step": 29760 }, { "epoch": 9.913800283168152, "grad_norm": 0.77734375, "learning_rate": 2.729795485981179e-09, "loss": 4.0009, "step": 29761 }, { "epoch": 9.914133422170401, "grad_norm": 0.80859375, "learning_rate": 2.708002110472785e-09, "loss": 4.0127, "step": 29762 }, { "epoch": 9.914466561172649, "grad_norm": 0.7734375, "learning_rate": 2.6862960621773515e-09, "loss": 3.9387, "step": 29763 }, { "epoch": 9.914799700174898, "grad_norm": 0.81640625, "learning_rate": 2.6646773413480096e-09, "loss": 3.9881, "step": 29764 }, { "epoch": 9.915132839177147, "grad_norm": 0.7578125, "learning_rate": 2.6431459482353924e-09, "loss": 3.963, "step": 29765 }, { "epoch": 9.915465978179395, "grad_norm": 0.72265625, "learning_rate": 2.6217018830909658e-09, "loss": 3.9683, "step": 29766 }, { "epoch": 9.915799117181644, "grad_norm": 0.72265625, "learning_rate": 2.600345146163696e-09, "loss": 4.004, "step": 29767 }, { "epoch": 9.916132256183893, "grad_norm": 0.76953125, "learning_rate": 2.5790757377042175e-09, "loss": 4.0281, "step": 29768 }, { "epoch": 9.916465395186142, "grad_norm": 0.82421875, "learning_rate": 2.5578936579581658e-09, "loss": 3.9704, "step": 29769 }, { "epoch": 9.91679853418839, "grad_norm": 0.76953125, "learning_rate": 2.5367989071736765e-09, "loss": 3.9878, "step": 29770 }, { "epoch": 9.917131673190639, "grad_norm": 0.79296875, "learning_rate": 2.515791485596386e-09, "loss": 3.9945, "step": 29771 }, { "epoch": 9.917464812192888, "grad_norm": 0.8046875, "learning_rate": 2.494871393469433e-09, "loss": 3.9476, "step": 29772 }, { "epoch": 9.917797951195137, "grad_norm": 0.76953125, "learning_rate": 2.4740386310384555e-09, "loss": 4.0017, "step": 29773 }, { "epoch": 9.918131090197384, "grad_norm": 0.734375, "learning_rate": 2.4532931985457587e-09, "loss": 4.0423, "step": 29774 }, { "epoch": 9.918464229199634, "grad_norm": 0.78125, "learning_rate": 2.432635096231983e-09, "loss": 3.9761, "step": 29775 }, { "epoch": 9.918797368201883, "grad_norm": 0.81640625, "learning_rate": 2.412064324338603e-09, "loss": 3.9623, "step": 29776 }, { "epoch": 9.91913050720413, "grad_norm": 0.75, "learning_rate": 2.3915808831045936e-09, "loss": 3.9918, "step": 29777 }, { "epoch": 9.91946364620638, "grad_norm": 0.74609375, "learning_rate": 2.3711847727689305e-09, "loss": 3.9764, "step": 29778 }, { "epoch": 9.919796785208629, "grad_norm": 0.7421875, "learning_rate": 2.350875993568924e-09, "loss": 4.0012, "step": 29779 }, { "epoch": 9.920129924210878, "grad_norm": 0.76171875, "learning_rate": 2.3306545457418837e-09, "loss": 3.9813, "step": 29780 }, { "epoch": 9.920463063213125, "grad_norm": 0.7109375, "learning_rate": 2.3105204295217896e-09, "loss": 4.0091, "step": 29781 }, { "epoch": 9.920796202215374, "grad_norm": 0.80859375, "learning_rate": 2.290473645145119e-09, "loss": 3.9479, "step": 29782 }, { "epoch": 9.921129341217624, "grad_norm": 0.74609375, "learning_rate": 2.270514192844186e-09, "loss": 3.9847, "step": 29783 }, { "epoch": 9.921462480219871, "grad_norm": 0.78125, "learning_rate": 2.250642072852138e-09, "loss": 4.0441, "step": 29784 }, { "epoch": 9.92179561922212, "grad_norm": 0.76953125, "learning_rate": 2.2308572853996235e-09, "loss": 3.9127, "step": 29785 }, { "epoch": 9.92212875822437, "grad_norm": 0.73828125, "learning_rate": 2.211159830717291e-09, "loss": 3.9652, "step": 29786 }, { "epoch": 9.922461897226619, "grad_norm": 0.78515625, "learning_rate": 2.191549709034124e-09, "loss": 4.0125, "step": 29787 }, { "epoch": 9.922795036228866, "grad_norm": 0.796875, "learning_rate": 2.172026920579939e-09, "loss": 3.9806, "step": 29788 }, { "epoch": 9.923128175231115, "grad_norm": 0.77734375, "learning_rate": 2.1525914655820544e-09, "loss": 3.9543, "step": 29789 }, { "epoch": 9.923461314233364, "grad_norm": 0.74609375, "learning_rate": 2.1332433442652898e-09, "loss": 3.955, "step": 29790 }, { "epoch": 9.923794453235612, "grad_norm": 0.79296875, "learning_rate": 2.113982556856131e-09, "loss": 3.9504, "step": 29791 }, { "epoch": 9.924127592237861, "grad_norm": 0.79296875, "learning_rate": 2.0948091035785656e-09, "loss": 3.9975, "step": 29792 }, { "epoch": 9.92446073124011, "grad_norm": 0.7109375, "learning_rate": 2.075722984655748e-09, "loss": 3.9616, "step": 29793 }, { "epoch": 9.92479387024236, "grad_norm": 0.79296875, "learning_rate": 2.0567242003108334e-09, "loss": 4.0275, "step": 29794 }, { "epoch": 9.925127009244607, "grad_norm": 0.76171875, "learning_rate": 2.0378127507644783e-09, "loss": 3.9244, "step": 29795 }, { "epoch": 9.925460148246856, "grad_norm": 0.76953125, "learning_rate": 2.0189886362365073e-09, "loss": 4.0182, "step": 29796 }, { "epoch": 9.925793287249105, "grad_norm": 0.80078125, "learning_rate": 2.000251856947577e-09, "loss": 4.0389, "step": 29797 }, { "epoch": 9.926126426251354, "grad_norm": 0.734375, "learning_rate": 1.9816024131141808e-09, "loss": 3.9064, "step": 29798 }, { "epoch": 9.926459565253602, "grad_norm": 0.73828125, "learning_rate": 1.9630403049553102e-09, "loss": 3.9638, "step": 29799 }, { "epoch": 9.92679270425585, "grad_norm": 0.73046875, "learning_rate": 1.9445655326857936e-09, "loss": 3.9744, "step": 29800 }, { "epoch": 9.9271258432581, "grad_norm": 0.74609375, "learning_rate": 1.926178096521292e-09, "loss": 4.0159, "step": 29801 }, { "epoch": 9.927458982260347, "grad_norm": 0.734375, "learning_rate": 1.9078779966766323e-09, "loss": 3.951, "step": 29802 }, { "epoch": 9.927792121262597, "grad_norm": 0.75390625, "learning_rate": 1.8896652333633137e-09, "loss": 3.9758, "step": 29803 }, { "epoch": 9.928125260264846, "grad_norm": 0.7734375, "learning_rate": 1.8715398067953304e-09, "loss": 4.058, "step": 29804 }, { "epoch": 9.928458399267095, "grad_norm": 0.78125, "learning_rate": 1.8535017171825152e-09, "loss": 3.9708, "step": 29805 }, { "epoch": 9.928791538269342, "grad_norm": 0.73046875, "learning_rate": 1.8355509647355328e-09, "loss": 4.0432, "step": 29806 }, { "epoch": 9.929124677271592, "grad_norm": 0.73828125, "learning_rate": 1.8176875496633828e-09, "loss": 4.0149, "step": 29807 }, { "epoch": 9.92945781627384, "grad_norm": 0.75390625, "learning_rate": 1.7999114721742316e-09, "loss": 3.9459, "step": 29808 }, { "epoch": 9.929790955276088, "grad_norm": 0.7890625, "learning_rate": 1.7822227324754137e-09, "loss": 3.961, "step": 29809 }, { "epoch": 9.930124094278337, "grad_norm": 0.75390625, "learning_rate": 1.7646213307725978e-09, "loss": 3.9548, "step": 29810 }, { "epoch": 9.930457233280586, "grad_norm": 0.75390625, "learning_rate": 1.7471072672697874e-09, "loss": 3.97, "step": 29811 }, { "epoch": 9.930790372282836, "grad_norm": 0.76171875, "learning_rate": 1.7296805421726514e-09, "loss": 4.0047, "step": 29812 }, { "epoch": 9.931123511285083, "grad_norm": 0.76171875, "learning_rate": 1.7123411556835279e-09, "loss": 3.9378, "step": 29813 }, { "epoch": 9.931456650287332, "grad_norm": 0.765625, "learning_rate": 1.695089108004755e-09, "loss": 3.952, "step": 29814 }, { "epoch": 9.931789789289581, "grad_norm": 0.734375, "learning_rate": 1.6779243993361726e-09, "loss": 3.9398, "step": 29815 }, { "epoch": 9.932122928291829, "grad_norm": 0.796875, "learning_rate": 1.660847029878454e-09, "loss": 3.9826, "step": 29816 }, { "epoch": 9.932456067294078, "grad_norm": 0.73046875, "learning_rate": 1.6438569998306063e-09, "loss": 4.0134, "step": 29817 }, { "epoch": 9.932789206296327, "grad_norm": 0.7421875, "learning_rate": 1.6269543093908047e-09, "loss": 4.0598, "step": 29818 }, { "epoch": 9.933122345298576, "grad_norm": 0.76171875, "learning_rate": 1.6101389587555582e-09, "loss": 4.0285, "step": 29819 }, { "epoch": 9.933455484300824, "grad_norm": 0.73046875, "learning_rate": 1.593410948120544e-09, "loss": 3.9741, "step": 29820 }, { "epoch": 9.933788623303073, "grad_norm": 0.77734375, "learning_rate": 1.576770277680606e-09, "loss": 3.9453, "step": 29821 }, { "epoch": 9.934121762305322, "grad_norm": 0.71484375, "learning_rate": 1.5602169476297556e-09, "loss": 4.0329, "step": 29822 }, { "epoch": 9.934454901307571, "grad_norm": 0.7734375, "learning_rate": 1.5437509581603393e-09, "loss": 3.9582, "step": 29823 }, { "epoch": 9.934788040309819, "grad_norm": 0.76171875, "learning_rate": 1.5273723094655357e-09, "loss": 3.9576, "step": 29824 }, { "epoch": 9.935121179312068, "grad_norm": 0.76953125, "learning_rate": 1.5110810017343602e-09, "loss": 3.9732, "step": 29825 }, { "epoch": 9.935454318314317, "grad_norm": 0.7578125, "learning_rate": 1.4948770351583263e-09, "loss": 4.0221, "step": 29826 }, { "epoch": 9.935787457316565, "grad_norm": 0.828125, "learning_rate": 1.4787604099239516e-09, "loss": 4.0289, "step": 29827 }, { "epoch": 9.936120596318814, "grad_norm": 0.7734375, "learning_rate": 1.4627311262210841e-09, "loss": 3.9893, "step": 29828 }, { "epoch": 9.936453735321063, "grad_norm": 0.7734375, "learning_rate": 1.4467891842362413e-09, "loss": 3.972, "step": 29829 }, { "epoch": 9.936786874323312, "grad_norm": 0.72265625, "learning_rate": 1.4309345841534427e-09, "loss": 3.957, "step": 29830 }, { "epoch": 9.93712001332556, "grad_norm": 0.7265625, "learning_rate": 1.4151673261583731e-09, "loss": 3.9763, "step": 29831 }, { "epoch": 9.937453152327809, "grad_norm": 0.73828125, "learning_rate": 1.3994874104350519e-09, "loss": 4.0185, "step": 29832 }, { "epoch": 9.937786291330058, "grad_norm": 0.74609375, "learning_rate": 1.3838948371658333e-09, "loss": 3.9926, "step": 29833 }, { "epoch": 9.938119430332307, "grad_norm": 0.74609375, "learning_rate": 1.3683896065322388e-09, "loss": 3.9912, "step": 29834 }, { "epoch": 9.938452569334554, "grad_norm": 0.73828125, "learning_rate": 1.3529717187141243e-09, "loss": 3.9643, "step": 29835 }, { "epoch": 9.938785708336804, "grad_norm": 0.72265625, "learning_rate": 1.3376411738930116e-09, "loss": 3.9925, "step": 29836 }, { "epoch": 9.939118847339053, "grad_norm": 0.76171875, "learning_rate": 1.322397972245426e-09, "loss": 3.9454, "step": 29837 }, { "epoch": 9.9394519863413, "grad_norm": 0.78515625, "learning_rate": 1.3072421139503909e-09, "loss": 3.9205, "step": 29838 }, { "epoch": 9.93978512534355, "grad_norm": 0.76953125, "learning_rate": 1.2921735991835992e-09, "loss": 3.9753, "step": 29839 }, { "epoch": 9.940118264345799, "grad_norm": 0.78515625, "learning_rate": 1.2771924281207438e-09, "loss": 3.9795, "step": 29840 }, { "epoch": 9.940451403348048, "grad_norm": 0.76171875, "learning_rate": 1.2622986009366843e-09, "loss": 4.0372, "step": 29841 }, { "epoch": 9.940784542350295, "grad_norm": 0.75, "learning_rate": 1.2474921178037835e-09, "loss": 4.0032, "step": 29842 }, { "epoch": 9.941117681352544, "grad_norm": 0.73828125, "learning_rate": 1.2327729788960684e-09, "loss": 4.0521, "step": 29843 }, { "epoch": 9.941450820354794, "grad_norm": 0.7578125, "learning_rate": 1.218141184384236e-09, "loss": 4.0789, "step": 29844 }, { "epoch": 9.941783959357041, "grad_norm": 0.82421875, "learning_rate": 1.203596734438983e-09, "loss": 4.0278, "step": 29845 }, { "epoch": 9.94211709835929, "grad_norm": 0.75, "learning_rate": 1.1891396292285084e-09, "loss": 4.0489, "step": 29846 }, { "epoch": 9.94245023736154, "grad_norm": 0.765625, "learning_rate": 1.174769868922676e-09, "loss": 3.9444, "step": 29847 }, { "epoch": 9.942783376363789, "grad_norm": 0.75, "learning_rate": 1.160487453688852e-09, "loss": 4.0378, "step": 29848 }, { "epoch": 9.943116515366036, "grad_norm": 0.78515625, "learning_rate": 1.1462923836919048e-09, "loss": 4.0102, "step": 29849 }, { "epoch": 9.943449654368285, "grad_norm": 0.77734375, "learning_rate": 1.1321846590992003e-09, "loss": 3.9884, "step": 29850 }, { "epoch": 9.943782793370534, "grad_norm": 0.7578125, "learning_rate": 1.1181642800731085e-09, "loss": 3.9848, "step": 29851 }, { "epoch": 9.944115932372782, "grad_norm": 0.8125, "learning_rate": 1.1042312467784975e-09, "loss": 3.9178, "step": 29852 }, { "epoch": 9.94444907137503, "grad_norm": 0.7421875, "learning_rate": 1.0903855593769052e-09, "loss": 3.9319, "step": 29853 }, { "epoch": 9.94478221037728, "grad_norm": 0.7421875, "learning_rate": 1.0766272180298686e-09, "loss": 4.0129, "step": 29854 }, { "epoch": 9.94511534937953, "grad_norm": 0.74609375, "learning_rate": 1.0629562228964274e-09, "loss": 3.986, "step": 29855 }, { "epoch": 9.945448488381777, "grad_norm": 0.78515625, "learning_rate": 1.0493725741381188e-09, "loss": 3.9546, "step": 29856 }, { "epoch": 9.945781627384026, "grad_norm": 0.7734375, "learning_rate": 1.0358762719114844e-09, "loss": 3.9977, "step": 29857 }, { "epoch": 9.946114766386275, "grad_norm": 0.765625, "learning_rate": 1.0224673163738984e-09, "loss": 3.9562, "step": 29858 }, { "epoch": 9.946447905388524, "grad_norm": 0.765625, "learning_rate": 1.0091457076819021e-09, "loss": 3.9948, "step": 29859 }, { "epoch": 9.946781044390772, "grad_norm": 0.78515625, "learning_rate": 9.959114459912044e-10, "loss": 3.9874, "step": 29860 }, { "epoch": 9.94711418339302, "grad_norm": 0.7734375, "learning_rate": 9.827645314541833e-10, "loss": 4.0388, "step": 29861 }, { "epoch": 9.94744732239527, "grad_norm": 0.7890625, "learning_rate": 9.69704964225715e-10, "loss": 4.0537, "step": 29862 }, { "epoch": 9.947780461397517, "grad_norm": 0.77734375, "learning_rate": 9.56732744457345e-10, "loss": 3.968, "step": 29863 }, { "epoch": 9.948113600399767, "grad_norm": 0.80078125, "learning_rate": 9.438478723006182e-10, "loss": 3.95, "step": 29864 }, { "epoch": 9.948446739402016, "grad_norm": 0.75390625, "learning_rate": 9.310503479045829e-10, "loss": 3.943, "step": 29865 }, { "epoch": 9.948779878404265, "grad_norm": 0.7578125, "learning_rate": 9.183401714191186e-10, "loss": 3.9714, "step": 29866 }, { "epoch": 9.949113017406512, "grad_norm": 0.7890625, "learning_rate": 9.057173429916077e-10, "loss": 4.046, "step": 29867 }, { "epoch": 9.949446156408762, "grad_norm": 0.7734375, "learning_rate": 8.931818627694321e-10, "loss": 3.9253, "step": 29868 }, { "epoch": 9.94977929541101, "grad_norm": 0.73828125, "learning_rate": 8.807337308991415e-10, "loss": 4.0032, "step": 29869 }, { "epoch": 9.950112434413258, "grad_norm": 0.71484375, "learning_rate": 8.68372947524787e-10, "loss": 4.0439, "step": 29870 }, { "epoch": 9.950445573415507, "grad_norm": 0.73046875, "learning_rate": 8.56099512791253e-10, "loss": 3.9817, "step": 29871 }, { "epoch": 9.950778712417756, "grad_norm": 0.7734375, "learning_rate": 8.439134268409254e-10, "loss": 3.9498, "step": 29872 }, { "epoch": 9.951111851420006, "grad_norm": 0.78125, "learning_rate": 8.318146898153578e-10, "loss": 3.9317, "step": 29873 }, { "epoch": 9.951444990422253, "grad_norm": 0.78515625, "learning_rate": 8.198033018561036e-10, "loss": 3.9592, "step": 29874 }, { "epoch": 9.951778129424502, "grad_norm": 0.80078125, "learning_rate": 8.078792631030507e-10, "loss": 4.0745, "step": 29875 }, { "epoch": 9.952111268426751, "grad_norm": 0.76953125, "learning_rate": 7.960425736952548e-10, "loss": 3.9549, "step": 29876 }, { "epoch": 9.952444407428999, "grad_norm": 0.7421875, "learning_rate": 7.842932337701059e-10, "loss": 4.0017, "step": 29877 }, { "epoch": 9.952777546431248, "grad_norm": 0.71875, "learning_rate": 7.72631243464994e-10, "loss": 4.0136, "step": 29878 }, { "epoch": 9.953110685433497, "grad_norm": 0.75, "learning_rate": 7.610566029148114e-10, "loss": 3.9993, "step": 29879 }, { "epoch": 9.953443824435746, "grad_norm": 0.765625, "learning_rate": 7.495693122561153e-10, "loss": 3.9517, "step": 29880 }, { "epoch": 9.953776963437994, "grad_norm": 0.75, "learning_rate": 7.381693716204674e-10, "loss": 3.9866, "step": 29881 }, { "epoch": 9.954110102440243, "grad_norm": 0.79296875, "learning_rate": 7.268567811419269e-10, "loss": 3.936, "step": 29882 }, { "epoch": 9.954443241442492, "grad_norm": 0.73828125, "learning_rate": 7.156315409528879e-10, "loss": 3.9202, "step": 29883 }, { "epoch": 9.954776380444741, "grad_norm": 0.7578125, "learning_rate": 7.044936511824141e-10, "loss": 3.9733, "step": 29884 }, { "epoch": 9.955109519446989, "grad_norm": 0.74609375, "learning_rate": 6.934431119612339e-10, "loss": 3.9406, "step": 29885 }, { "epoch": 9.955442658449238, "grad_norm": 0.7734375, "learning_rate": 6.824799234184109e-10, "loss": 4.0511, "step": 29886 }, { "epoch": 9.955775797451487, "grad_norm": 0.76953125, "learning_rate": 6.716040856813432e-10, "loss": 4.0213, "step": 29887 }, { "epoch": 9.956108936453735, "grad_norm": 0.7734375, "learning_rate": 6.608155988757636e-10, "loss": 4.0458, "step": 29888 }, { "epoch": 9.956442075455984, "grad_norm": 0.78125, "learning_rate": 6.501144631282374e-10, "loss": 3.9758, "step": 29889 }, { "epoch": 9.956775214458233, "grad_norm": 0.76953125, "learning_rate": 6.395006785644975e-10, "loss": 3.9757, "step": 29890 }, { "epoch": 9.957108353460482, "grad_norm": 0.7109375, "learning_rate": 6.289742453061131e-10, "loss": 4.0251, "step": 29891 }, { "epoch": 9.95744149246273, "grad_norm": 0.7421875, "learning_rate": 6.185351634763192e-10, "loss": 4.0085, "step": 29892 }, { "epoch": 9.957774631464979, "grad_norm": 0.7265625, "learning_rate": 6.081834331975178e-10, "loss": 4.015, "step": 29893 }, { "epoch": 9.958107770467228, "grad_norm": 0.78125, "learning_rate": 5.979190545887803e-10, "loss": 3.9553, "step": 29894 }, { "epoch": 9.958440909469477, "grad_norm": 0.75390625, "learning_rate": 5.877420277716761e-10, "loss": 3.9608, "step": 29895 }, { "epoch": 9.958774048471724, "grad_norm": 0.7265625, "learning_rate": 5.776523528636113e-10, "loss": 3.9974, "step": 29896 }, { "epoch": 9.959107187473974, "grad_norm": 0.7421875, "learning_rate": 5.676500299811593e-10, "loss": 4.0398, "step": 29897 }, { "epoch": 9.959440326476223, "grad_norm": 0.75, "learning_rate": 5.57735059242559e-10, "loss": 3.9675, "step": 29898 }, { "epoch": 9.95977346547847, "grad_norm": 0.79296875, "learning_rate": 5.479074407627182e-10, "loss": 4.0811, "step": 29899 }, { "epoch": 9.96010660448072, "grad_norm": 0.765625, "learning_rate": 5.381671746557126e-10, "loss": 3.968, "step": 29900 }, { "epoch": 9.960439743482969, "grad_norm": 0.79296875, "learning_rate": 5.285142610356175e-10, "loss": 3.9959, "step": 29901 }, { "epoch": 9.960772882485218, "grad_norm": 0.71484375, "learning_rate": 5.189487000140103e-10, "loss": 3.9564, "step": 29902 }, { "epoch": 9.961106021487465, "grad_norm": 0.734375, "learning_rate": 5.094704917033011e-10, "loss": 3.9529, "step": 29903 }, { "epoch": 9.961439160489714, "grad_norm": 0.76953125, "learning_rate": 5.000796362134019e-10, "loss": 3.9797, "step": 29904 }, { "epoch": 9.961772299491964, "grad_norm": 0.80859375, "learning_rate": 4.907761336533922e-10, "loss": 4.0062, "step": 29905 }, { "epoch": 9.962105438494211, "grad_norm": 0.77734375, "learning_rate": 4.815599841315189e-10, "loss": 3.9618, "step": 29906 }, { "epoch": 9.96243857749646, "grad_norm": 0.734375, "learning_rate": 4.724311877560284e-10, "loss": 3.9403, "step": 29907 }, { "epoch": 9.96277171649871, "grad_norm": 0.7734375, "learning_rate": 4.633897446326696e-10, "loss": 3.9958, "step": 29908 }, { "epoch": 9.963104855500958, "grad_norm": 0.8046875, "learning_rate": 4.544356548671913e-10, "loss": 4.0406, "step": 29909 }, { "epoch": 9.963437994503206, "grad_norm": 0.7578125, "learning_rate": 4.455689185628442e-10, "loss": 3.9748, "step": 29910 }, { "epoch": 9.963771133505455, "grad_norm": 0.7421875, "learning_rate": 4.367895358237117e-10, "loss": 4.0392, "step": 29911 }, { "epoch": 9.964104272507704, "grad_norm": 0.7578125, "learning_rate": 4.2809750675221194e-10, "loss": 3.9496, "step": 29912 }, { "epoch": 9.964437411509952, "grad_norm": 0.75390625, "learning_rate": 4.194928314499302e-10, "loss": 4.0077, "step": 29913 }, { "epoch": 9.9647705505122, "grad_norm": 0.73046875, "learning_rate": 4.10975510015954e-10, "loss": 3.9717, "step": 29914 }, { "epoch": 9.96510368951445, "grad_norm": 0.7578125, "learning_rate": 4.025455425493707e-10, "loss": 4.0236, "step": 29915 }, { "epoch": 9.9654368285167, "grad_norm": 0.80859375, "learning_rate": 3.9420292915010037e-10, "loss": 4.0483, "step": 29916 }, { "epoch": 9.965769967518947, "grad_norm": 0.73046875, "learning_rate": 3.8594766991389976e-10, "loss": 3.9811, "step": 29917 }, { "epoch": 9.966103106521196, "grad_norm": 0.796875, "learning_rate": 3.7777976493735823e-10, "loss": 3.9576, "step": 29918 }, { "epoch": 9.966436245523445, "grad_norm": 0.7578125, "learning_rate": 3.696992143153999e-10, "loss": 3.908, "step": 29919 }, { "epoch": 9.966769384525694, "grad_norm": 0.75390625, "learning_rate": 3.6170601814211614e-10, "loss": 3.9232, "step": 29920 }, { "epoch": 9.967102523527942, "grad_norm": 0.77734375, "learning_rate": 3.538001765107657e-10, "loss": 4.0257, "step": 29921 }, { "epoch": 9.96743566253019, "grad_norm": 0.7890625, "learning_rate": 3.459816895137746e-10, "loss": 3.9108, "step": 29922 }, { "epoch": 9.96776880153244, "grad_norm": 0.76171875, "learning_rate": 3.38250557241071e-10, "loss": 3.9809, "step": 29923 }, { "epoch": 9.968101940534687, "grad_norm": 0.80859375, "learning_rate": 3.306067797842482e-10, "loss": 3.9928, "step": 29924 }, { "epoch": 9.968435079536937, "grad_norm": 0.80078125, "learning_rate": 3.2305035723156906e-10, "loss": 3.9959, "step": 29925 }, { "epoch": 9.968768218539186, "grad_norm": 0.72265625, "learning_rate": 3.155812896704635e-10, "loss": 4.0354, "step": 29926 }, { "epoch": 9.969101357541435, "grad_norm": 0.765625, "learning_rate": 3.0819957718919434e-10, "loss": 3.9188, "step": 29927 }, { "epoch": 9.969434496543682, "grad_norm": 0.7578125, "learning_rate": 3.0090521987352626e-10, "loss": 3.9808, "step": 29928 }, { "epoch": 9.969767635545931, "grad_norm": 0.7421875, "learning_rate": 2.93698217806726e-10, "loss": 3.9675, "step": 29929 }, { "epoch": 9.97010077454818, "grad_norm": 0.80078125, "learning_rate": 2.8657857107455833e-10, "loss": 4.0781, "step": 29930 }, { "epoch": 9.97043391355043, "grad_norm": 0.765625, "learning_rate": 2.7954627975945723e-10, "loss": 4.065, "step": 29931 }, { "epoch": 9.970767052552677, "grad_norm": 0.75, "learning_rate": 2.7260134394385684e-10, "loss": 3.9974, "step": 29932 }, { "epoch": 9.971100191554926, "grad_norm": 0.796875, "learning_rate": 2.6574376370769317e-10, "loss": 4.0051, "step": 29933 }, { "epoch": 9.971433330557176, "grad_norm": 0.73828125, "learning_rate": 2.5897353913090226e-10, "loss": 3.9707, "step": 29934 }, { "epoch": 9.971766469559423, "grad_norm": 0.7734375, "learning_rate": 2.522906702925876e-10, "loss": 3.9773, "step": 29935 }, { "epoch": 9.972099608561672, "grad_norm": 0.75390625, "learning_rate": 2.456951572710198e-10, "loss": 3.9221, "step": 29936 }, { "epoch": 9.972432747563921, "grad_norm": 0.8203125, "learning_rate": 2.3918700014280426e-10, "loss": 3.9821, "step": 29937 }, { "epoch": 9.972765886566169, "grad_norm": 0.80078125, "learning_rate": 2.327661989828811e-10, "loss": 4.0194, "step": 29938 }, { "epoch": 9.973099025568418, "grad_norm": 0.7890625, "learning_rate": 2.26432753867023e-10, "loss": 3.9688, "step": 29939 }, { "epoch": 9.973432164570667, "grad_norm": 0.734375, "learning_rate": 2.2018666486933736e-10, "loss": 4.0432, "step": 29940 }, { "epoch": 9.973765303572916, "grad_norm": 0.765625, "learning_rate": 2.1402793206143357e-10, "loss": 3.9294, "step": 29941 }, { "epoch": 9.974098442575164, "grad_norm": 0.75390625, "learning_rate": 2.0795655551575366e-10, "loss": 3.995, "step": 29942 }, { "epoch": 9.974431581577413, "grad_norm": 0.76953125, "learning_rate": 2.0197253530307435e-10, "loss": 3.9103, "step": 29943 }, { "epoch": 9.974764720579662, "grad_norm": 0.77734375, "learning_rate": 1.9607587149250704e-10, "loss": 3.983, "step": 29944 }, { "epoch": 9.975097859581911, "grad_norm": 0.73046875, "learning_rate": 1.9026656415316313e-10, "loss": 3.9501, "step": 29945 }, { "epoch": 9.975430998584159, "grad_norm": 0.76171875, "learning_rate": 1.8454461335248863e-10, "loss": 4.0294, "step": 29946 }, { "epoch": 9.975764137586408, "grad_norm": 0.71875, "learning_rate": 1.7891001915792958e-10, "loss": 3.9766, "step": 29947 }, { "epoch": 9.976097276588657, "grad_norm": 0.7890625, "learning_rate": 1.733627816336014e-10, "loss": 3.984, "step": 29948 }, { "epoch": 9.976430415590904, "grad_norm": 0.72265625, "learning_rate": 1.6790290084611747e-10, "loss": 3.9989, "step": 29949 }, { "epoch": 9.976763554593154, "grad_norm": 0.72265625, "learning_rate": 1.6253037685709515e-10, "loss": 4.0146, "step": 29950 }, { "epoch": 9.977096693595403, "grad_norm": 0.77734375, "learning_rate": 1.5724520972981716e-10, "loss": 3.9461, "step": 29951 }, { "epoch": 9.977429832597652, "grad_norm": 0.77734375, "learning_rate": 1.5204739952673353e-10, "loss": 3.9723, "step": 29952 }, { "epoch": 9.9777629715999, "grad_norm": 0.76953125, "learning_rate": 1.4693694630696364e-10, "loss": 4.0454, "step": 29953 }, { "epoch": 9.978096110602149, "grad_norm": 0.765625, "learning_rate": 1.4191385013129222e-10, "loss": 3.9613, "step": 29954 }, { "epoch": 9.978429249604398, "grad_norm": 0.77734375, "learning_rate": 1.3697811105717328e-10, "loss": 4.0162, "step": 29955 }, { "epoch": 9.978762388606647, "grad_norm": 0.80859375, "learning_rate": 1.3212972914289357e-10, "loss": 4.0282, "step": 29956 }, { "epoch": 9.979095527608894, "grad_norm": 0.73046875, "learning_rate": 1.2736870444507443e-10, "loss": 3.888, "step": 29957 }, { "epoch": 9.979428666611144, "grad_norm": 0.7734375, "learning_rate": 1.2269503701783923e-10, "loss": 3.9652, "step": 29958 }, { "epoch": 9.979761805613393, "grad_norm": 0.7734375, "learning_rate": 1.1810872691697672e-10, "loss": 4.0352, "step": 29959 }, { "epoch": 9.98009494461564, "grad_norm": 0.7734375, "learning_rate": 1.1360977419577755e-10, "loss": 3.9205, "step": 29960 }, { "epoch": 9.98042808361789, "grad_norm": 0.765625, "learning_rate": 1.0919817890586714e-10, "loss": 4.0035, "step": 29961 }, { "epoch": 9.980761222620139, "grad_norm": 0.76953125, "learning_rate": 1.0487394109887083e-10, "loss": 3.9572, "step": 29962 }, { "epoch": 9.981094361622388, "grad_norm": 0.7578125, "learning_rate": 1.0063706082558133e-10, "loss": 3.9382, "step": 29963 }, { "epoch": 9.981427500624635, "grad_norm": 0.7578125, "learning_rate": 9.648753813512601e-11, "loss": 4.0006, "step": 29964 }, { "epoch": 9.981760639626884, "grad_norm": 0.81640625, "learning_rate": 9.242537307579957e-11, "loss": 3.9824, "step": 29965 }, { "epoch": 9.982093778629133, "grad_norm": 0.71875, "learning_rate": 8.845056569423137e-11, "loss": 4.0257, "step": 29966 }, { "epoch": 9.982426917631381, "grad_norm": 0.73828125, "learning_rate": 8.456311603871614e-11, "loss": 3.9911, "step": 29967 }, { "epoch": 9.98276005663363, "grad_norm": 0.74609375, "learning_rate": 8.076302415255254e-11, "loss": 3.9419, "step": 29968 }, { "epoch": 9.98309319563588, "grad_norm": 0.796875, "learning_rate": 7.705029008070463e-11, "loss": 3.9991, "step": 29969 }, { "epoch": 9.983426334638128, "grad_norm": 0.765625, "learning_rate": 7.34249138664711e-11, "loss": 3.9449, "step": 29970 }, { "epoch": 9.983759473640376, "grad_norm": 0.74609375, "learning_rate": 6.988689555148531e-11, "loss": 3.9504, "step": 29971 }, { "epoch": 9.984092612642625, "grad_norm": 0.78515625, "learning_rate": 6.64362351782133e-11, "loss": 4.0443, "step": 29972 }, { "epoch": 9.984425751644874, "grad_norm": 0.74609375, "learning_rate": 6.30729327857904e-11, "loss": 4.0482, "step": 29973 }, { "epoch": 9.984758890647122, "grad_norm": 0.77734375, "learning_rate": 5.979698841418468e-11, "loss": 3.9428, "step": 29974 }, { "epoch": 9.98509202964937, "grad_norm": 0.76953125, "learning_rate": 5.660840210086615e-11, "loss": 3.979, "step": 29975 }, { "epoch": 9.98542516865162, "grad_norm": 0.75, "learning_rate": 5.350717388330484e-11, "loss": 4.0196, "step": 29976 }, { "epoch": 9.98575830765387, "grad_norm": 0.73046875, "learning_rate": 5.049330379730543e-11, "loss": 3.9863, "step": 29977 }, { "epoch": 9.986091446656117, "grad_norm": 0.7734375, "learning_rate": 4.756679187867263e-11, "loss": 3.9862, "step": 29978 }, { "epoch": 9.986424585658366, "grad_norm": 0.7890625, "learning_rate": 4.472763815988046e-11, "loss": 3.9593, "step": 29979 }, { "epoch": 9.986757724660615, "grad_norm": 0.74609375, "learning_rate": 4.1975842675900934e-11, "loss": 3.926, "step": 29980 }, { "epoch": 9.987090863662864, "grad_norm": 0.79296875, "learning_rate": 3.9311405458375417e-11, "loss": 3.931, "step": 29981 }, { "epoch": 9.987424002665112, "grad_norm": 0.74609375, "learning_rate": 3.673432653727993e-11, "loss": 3.9851, "step": 29982 }, { "epoch": 9.98775714166736, "grad_norm": 0.78125, "learning_rate": 3.4244605944255826e-11, "loss": 3.9455, "step": 29983 }, { "epoch": 9.98809028066961, "grad_norm": 0.72265625, "learning_rate": 3.1842243706781126e-11, "loss": 3.8944, "step": 29984 }, { "epoch": 9.988423419671857, "grad_norm": 0.765625, "learning_rate": 2.952723985316652e-11, "loss": 4.0491, "step": 29985 }, { "epoch": 9.988756558674107, "grad_norm": 0.80859375, "learning_rate": 2.7299594410890027e-11, "loss": 4.0044, "step": 29986 }, { "epoch": 9.989089697676356, "grad_norm": 0.8125, "learning_rate": 2.5159307405764332e-11, "loss": 3.98, "step": 29987 }, { "epoch": 9.989422836678605, "grad_norm": 0.73828125, "learning_rate": 2.3106378862769452e-11, "loss": 3.9137, "step": 29988 }, { "epoch": 9.989755975680852, "grad_norm": 0.71875, "learning_rate": 2.1140808805220068e-11, "loss": 4.0197, "step": 29989 }, { "epoch": 9.990089114683101, "grad_norm": 0.7421875, "learning_rate": 1.926259725643087e-11, "loss": 3.9295, "step": 29990 }, { "epoch": 9.99042225368535, "grad_norm": 0.82421875, "learning_rate": 1.7471744238883868e-11, "loss": 3.9868, "step": 29991 }, { "epoch": 9.9907553926876, "grad_norm": 0.79296875, "learning_rate": 1.576824977256308e-11, "loss": 3.9358, "step": 29992 }, { "epoch": 9.991088531689847, "grad_norm": 0.77734375, "learning_rate": 1.4152113877452522e-11, "loss": 3.9859, "step": 29993 }, { "epoch": 9.991421670692096, "grad_norm": 0.73828125, "learning_rate": 1.2623336572703537e-11, "loss": 3.9256, "step": 29994 }, { "epoch": 9.991754809694346, "grad_norm": 0.82421875, "learning_rate": 1.1181917875802139e-11, "loss": 3.9736, "step": 29995 }, { "epoch": 9.992087948696593, "grad_norm": 0.78515625, "learning_rate": 9.827857803401674e-12, "loss": 3.9723, "step": 29996 }, { "epoch": 9.992421087698842, "grad_norm": 0.78125, "learning_rate": 8.561156372155488e-12, "loss": 3.9829, "step": 29997 }, { "epoch": 9.992754226701091, "grad_norm": 0.765625, "learning_rate": 7.381813596218923e-12, "loss": 3.9617, "step": 29998 }, { "epoch": 9.993087365703339, "grad_norm": 0.79296875, "learning_rate": 6.2898294889146555e-12, "loss": 4.0211, "step": 29999 }, { "epoch": 9.993420504705588, "grad_norm": 0.76953125, "learning_rate": 5.285204063565363e-12, "loss": 3.9486, "step": 30000 } ], "logging_steps": 1, "max_steps": 30010, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2762863144901856e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }